From e2379c4956099294994e090b9bede94bbbbdcab1 Mon Sep 17 00:00:00 2001 From: auth12 Date: Thu, 16 Jul 2020 13:37:51 +0100 Subject: Added windows support on client. --- client/wolfssl/wolfcrypt/src/aes.c | 8704 ++ client/wolfssl/wolfcrypt/src/aes_asm.S | 1338 + client/wolfssl/wolfcrypt/src/aes_asm.asm | 1529 + client/wolfssl/wolfcrypt/src/aes_gcm_asm.S | 8733 ++ client/wolfssl/wolfcrypt/src/arc4.c | 149 + client/wolfssl/wolfcrypt/src/asm.c | 1783 + client/wolfssl/wolfcrypt/src/asn.c | 17525 ++++ client/wolfssl/wolfcrypt/src/async.c | 0 client/wolfssl/wolfcrypt/src/blake2b.c | 452 + client/wolfssl/wolfcrypt/src/blake2s.c | 446 + client/wolfssl/wolfcrypt/src/camellia.c | 1644 + client/wolfssl/wolfcrypt/src/chacha.c | 348 + client/wolfssl/wolfcrypt/src/chacha20_poly1305.c | 286 + client/wolfssl/wolfcrypt/src/chacha_asm.S | 1420 + client/wolfssl/wolfcrypt/src/cmac.c | 215 + client/wolfssl/wolfcrypt/src/coding.c | 511 + client/wolfssl/wolfcrypt/src/compress.c | 198 + client/wolfssl/wolfcrypt/src/cpuid.c | 110 + client/wolfssl/wolfcrypt/src/cryptocb.c | 648 + client/wolfssl/wolfcrypt/src/curve25519.c | 513 + client/wolfssl/wolfcrypt/src/curve448.c | 635 + client/wolfssl/wolfcrypt/src/des3.c | 1825 + client/wolfssl/wolfcrypt/src/dh.c | 2491 + client/wolfssl/wolfcrypt/src/dsa.c | 920 + client/wolfssl/wolfcrypt/src/ecc.c | 10761 +++ client/wolfssl/wolfcrypt/src/ecc_fp.c | 1 + client/wolfssl/wolfcrypt/src/ed25519.c | 814 + client/wolfssl/wolfcrypt/src/ed448.c | 917 + client/wolfssl/wolfcrypt/src/error.c | 530 + client/wolfssl/wolfcrypt/src/evp.c | 6595 ++ client/wolfssl/wolfcrypt/src/fe_448.c | 2458 + client/wolfssl/wolfcrypt/src/fe_low_mem.c | 611 + client/wolfssl/wolfcrypt/src/fe_operations.c | 1438 + client/wolfssl/wolfcrypt/src/fe_x25519_128.i | 625 + client/wolfssl/wolfcrypt/src/fe_x25519_asm.S | 16542 ++++ client/wolfssl/wolfcrypt/src/fips.c | 0 client/wolfssl/wolfcrypt/src/fips_test.c | 0 client/wolfssl/wolfcrypt/src/fp_mont_small.i | 3874 + client/wolfssl/wolfcrypt/src/fp_mul_comba_12.i | 147 + client/wolfssl/wolfcrypt/src/fp_mul_comba_17.i | 187 + client/wolfssl/wolfcrypt/src/fp_mul_comba_20.i | 210 + client/wolfssl/wolfcrypt/src/fp_mul_comba_24.i | 243 + client/wolfssl/wolfcrypt/src/fp_mul_comba_28.i | 275 + client/wolfssl/wolfcrypt/src/fp_mul_comba_3.i | 61 + client/wolfssl/wolfcrypt/src/fp_mul_comba_32.i | 321 + client/wolfssl/wolfcrypt/src/fp_mul_comba_4.i | 83 + client/wolfssl/wolfcrypt/src/fp_mul_comba_48.i | 435 + client/wolfssl/wolfcrypt/src/fp_mul_comba_6.i | 99 + client/wolfssl/wolfcrypt/src/fp_mul_comba_64.i | 563 + client/wolfssl/wolfcrypt/src/fp_mul_comba_7.i | 107 + client/wolfssl/wolfcrypt/src/fp_mul_comba_8.i | 115 + client/wolfssl/wolfcrypt/src/fp_mul_comba_9.i | 123 + .../wolfssl/wolfcrypt/src/fp_mul_comba_small_set.i | 1268 + client/wolfssl/wolfcrypt/src/fp_sqr_comba_12.i | 177 + client/wolfssl/wolfcrypt/src/fp_sqr_comba_17.i | 227 + client/wolfssl/wolfcrypt/src/fp_sqr_comba_20.i | 257 + client/wolfssl/wolfcrypt/src/fp_sqr_comba_24.i | 297 + client/wolfssl/wolfcrypt/src/fp_sqr_comba_28.i | 337 + client/wolfssl/wolfcrypt/src/fp_sqr_comba_3.i | 73 + client/wolfssl/wolfcrypt/src/fp_sqr_comba_32.i | 377 + client/wolfssl/wolfcrypt/src/fp_sqr_comba_4.i | 97 + client/wolfssl/wolfcrypt/src/fp_sqr_comba_48.i | 537 + client/wolfssl/wolfcrypt/src/fp_sqr_comba_6.i | 117 + client/wolfssl/wolfcrypt/src/fp_sqr_comba_64.i | 697 + client/wolfssl/wolfcrypt/src/fp_sqr_comba_7.i | 127 + client/wolfssl/wolfcrypt/src/fp_sqr_comba_8.i | 137 + client/wolfssl/wolfcrypt/src/fp_sqr_comba_9.i | 147 + .../wolfssl/wolfcrypt/src/fp_sqr_comba_small_set.i | 1558 + client/wolfssl/wolfcrypt/src/ge_448.c | 10780 +++ client/wolfssl/wolfcrypt/src/ge_low_mem.c | 563 + client/wolfssl/wolfcrypt/src/ge_operations.c | 9803 ++ client/wolfssl/wolfcrypt/src/hash.c | 1677 + client/wolfssl/wolfcrypt/src/hc128.c | 430 + client/wolfssl/wolfcrypt/src/hmac.c | 1290 + client/wolfssl/wolfcrypt/src/idea.c | 303 + client/wolfssl/wolfcrypt/src/include.am | 127 + client/wolfssl/wolfcrypt/src/integer.c | 5320 ++ client/wolfssl/wolfcrypt/src/logging.c | 843 + client/wolfssl/wolfcrypt/src/md2.c | 162 + client/wolfssl/wolfcrypt/src/md4.c | 211 + client/wolfssl/wolfcrypt/src/md5.c | 572 + client/wolfssl/wolfcrypt/src/memory.c | 1126 + client/wolfssl/wolfcrypt/src/misc.c | 405 + client/wolfssl/wolfcrypt/src/pkcs12.c | 2403 + client/wolfssl/wolfcrypt/src/pkcs7.c | 12523 +++ client/wolfssl/wolfcrypt/src/poly1305.c | 868 + client/wolfssl/wolfcrypt/src/poly1305_asm.S | 1105 + .../wolfssl/wolfcrypt/src/port/Espressif/README.md | 109 + .../wolfcrypt/src/port/Espressif/esp32_aes.c | 299 + .../wolfcrypt/src/port/Espressif/esp32_mp.c | 514 + .../wolfcrypt/src/port/Espressif/esp32_sha.c | 434 + .../wolfcrypt/src/port/Espressif/esp32_util.c | 67 + .../wolfssl/wolfcrypt/src/port/Renesas/README.md | 176 + .../wolfcrypt/src/port/Renesas/renesas_tsip_aes.c | 156 + .../wolfcrypt/src/port/Renesas/renesas_tsip_sha.c | 274 + .../wolfcrypt/src/port/Renesas/renesas_tsip_util.c | 719 + .../wolfssl/wolfcrypt/src/port/af_alg/afalg_aes.c | 900 + .../wolfssl/wolfcrypt/src/port/af_alg/afalg_hash.c | 339 + .../wolfssl/wolfcrypt/src/port/af_alg/wc_afalg.c | 141 + .../wolfcrypt/src/port/arm/armv8-32-curve25519.S | 6012 ++ .../wolfcrypt/src/port/arm/armv8-32-curve25519.c | 5581 ++ .../wolfcrypt/src/port/arm/armv8-32-sha512-asm.S | 5335 ++ .../wolfcrypt/src/port/arm/armv8-32-sha512-asm.c | 4783 + client/wolfssl/wolfcrypt/src/port/arm/armv8-aes.c | 4653 + .../wolfssl/wolfcrypt/src/port/arm/armv8-chacha.c | 2857 + .../wolfcrypt/src/port/arm/armv8-curve25519.S | 6715 ++ .../wolfcrypt/src/port/arm/armv8-curve25519.c | 6725 ++ .../wolfcrypt/src/port/arm/armv8-poly1305.c | 1166 + .../wolfssl/wolfcrypt/src/port/arm/armv8-sha256.c | 1508 + .../wolfcrypt/src/port/arm/armv8-sha512-asm.S | 1046 + .../wolfcrypt/src/port/arm/armv8-sha512-asm.c | 1041 + .../wolfssl/wolfcrypt/src/port/arm/armv8-sha512.c | 715 + client/wolfssl/wolfcrypt/src/port/arm/cryptoCell.c | 309 + .../wolfcrypt/src/port/arm/cryptoCellHash.c | 134 + client/wolfssl/wolfcrypt/src/port/atmel/README.md | 94 + client/wolfssl/wolfcrypt/src/port/atmel/atmel.c | 843 + client/wolfssl/wolfcrypt/src/port/caam/caam_aes.c | 649 + .../wolfssl/wolfcrypt/src/port/caam/caam_doc.pdf | Bin 0 -> 1107370 bytes .../wolfssl/wolfcrypt/src/port/caam/caam_driver.c | 1713 + client/wolfssl/wolfcrypt/src/port/caam/caam_init.c | 289 + client/wolfssl/wolfcrypt/src/port/caam/caam_sha.c | 397 + client/wolfssl/wolfcrypt/src/port/cavium/README.md | 3 + .../wolfcrypt/src/port/cavium/README_Octeon.md | 3 + .../wolfcrypt/src/port/cavium/cavium_nitrox.c | 0 .../wolfcrypt/src/port/cavium/cavium_octeon_sync.c | 879 + .../wolfssl/wolfcrypt/src/port/devcrypto/README.md | 43 + .../wolfcrypt/src/port/devcrypto/devcrypto_aes.c | 384 + .../wolfcrypt/src/port/devcrypto/devcrypto_hash.c | 248 + .../wolfcrypt/src/port/devcrypto/wc_devcrypto.c | 167 + client/wolfssl/wolfcrypt/src/port/intel/README.md | 3 + .../wolfssl/wolfcrypt/src/port/intel/quickassist.c | 0 .../wolfcrypt/src/port/intel/quickassist_mem.c | 0 .../wolfcrypt/src/port/intel/quickassist_sync.c | 2004 + .../wolfcrypt/src/port/mynewt/mynewt_port.c | 146 + client/wolfssl/wolfcrypt/src/port/nrf51.c | 220 + client/wolfssl/wolfcrypt/src/port/nxp/ksdk_port.c | 1731 + .../wolfcrypt/src/port/pic32/pic32mz-crypt.c | 804 + client/wolfssl/wolfcrypt/src/port/st/README.md | 132 + client/wolfssl/wolfcrypt/src/port/st/stm32.c | 879 + client/wolfssl/wolfcrypt/src/port/st/stsafe.c | 566 + client/wolfssl/wolfcrypt/src/port/ti/ti-aes.c | 569 + client/wolfssl/wolfcrypt/src/port/ti/ti-ccm.c | 94 + client/wolfssl/wolfcrypt/src/port/ti/ti-des3.c | 204 + client/wolfssl/wolfcrypt/src/port/ti/ti-hash.c | 338 + .../wolfssl/wolfcrypt/src/port/xilinx/xil-aesgcm.c | 202 + .../wolfssl/wolfcrypt/src/port/xilinx/xil-sha3.c | 158 + client/wolfssl/wolfcrypt/src/pwdbased.c | 795 + client/wolfssl/wolfcrypt/src/rabbit.c | 342 + client/wolfssl/wolfcrypt/src/random.c | 2552 + client/wolfssl/wolfcrypt/src/ripemd.c | 366 + client/wolfssl/wolfcrypt/src/rsa.c | 4201 + client/wolfssl/wolfcrypt/src/selftest.c | 0 client/wolfssl/wolfcrypt/src/sha.c | 882 + client/wolfssl/wolfcrypt/src/sha256.c | 1644 + client/wolfssl/wolfcrypt/src/sha256_asm.S | 22653 +++++ client/wolfssl/wolfcrypt/src/sha3.c | 1216 + client/wolfssl/wolfcrypt/src/sha512.c | 1225 + client/wolfssl/wolfcrypt/src/sha512_asm.S | 10741 +++ client/wolfssl/wolfcrypt/src/signature.c | 559 + client/wolfssl/wolfcrypt/src/sp_arm32.c | 89057 +++++++++++++++++++ client/wolfssl/wolfcrypt/src/sp_arm64.c | 42082 +++++++++ client/wolfssl/wolfcrypt/src/sp_armthumb.c | 27863 ++++++ client/wolfssl/wolfcrypt/src/sp_c32.c | 23857 +++++ client/wolfssl/wolfcrypt/src/sp_c64.c | 23220 +++++ client/wolfssl/wolfcrypt/src/sp_cortexm.c | 25687 ++++++ client/wolfssl/wolfcrypt/src/sp_dsp32.c | 4908 + client/wolfssl/wolfcrypt/src/sp_int.c | 2203 + client/wolfssl/wolfcrypt/src/sp_x86_64.c | 29555 ++++++ client/wolfssl/wolfcrypt/src/sp_x86_64_asm.S | 41830 +++++++++ client/wolfssl/wolfcrypt/src/srp.c | 756 + client/wolfssl/wolfcrypt/src/tfm.c | 5068 ++ client/wolfssl/wolfcrypt/src/wc_dsp.c | 327 + client/wolfssl/wolfcrypt/src/wc_encrypt.c | 660 + client/wolfssl/wolfcrypt/src/wc_pkcs11.c | 2546 + client/wolfssl/wolfcrypt/src/wc_port.c | 2276 + client/wolfssl/wolfcrypt/src/wolfcrypt_first.c | 0 client/wolfssl/wolfcrypt/src/wolfcrypt_last.c | 0 client/wolfssl/wolfcrypt/src/wolfevent.c | 283 + client/wolfssl/wolfcrypt/src/wolfmath.c | 381 + 179 files changed, 593704 insertions(+) create mode 100644 client/wolfssl/wolfcrypt/src/aes.c create mode 100644 client/wolfssl/wolfcrypt/src/aes_asm.S create mode 100644 client/wolfssl/wolfcrypt/src/aes_asm.asm create mode 100644 client/wolfssl/wolfcrypt/src/aes_gcm_asm.S create mode 100644 client/wolfssl/wolfcrypt/src/arc4.c create mode 100644 client/wolfssl/wolfcrypt/src/asm.c create mode 100644 client/wolfssl/wolfcrypt/src/asn.c create mode 100644 client/wolfssl/wolfcrypt/src/async.c create mode 100644 client/wolfssl/wolfcrypt/src/blake2b.c create mode 100644 client/wolfssl/wolfcrypt/src/blake2s.c create mode 100644 client/wolfssl/wolfcrypt/src/camellia.c create mode 100644 client/wolfssl/wolfcrypt/src/chacha.c create mode 100644 client/wolfssl/wolfcrypt/src/chacha20_poly1305.c create mode 100644 client/wolfssl/wolfcrypt/src/chacha_asm.S create mode 100644 client/wolfssl/wolfcrypt/src/cmac.c create mode 100644 client/wolfssl/wolfcrypt/src/coding.c create mode 100644 client/wolfssl/wolfcrypt/src/compress.c create mode 100644 client/wolfssl/wolfcrypt/src/cpuid.c create mode 100644 client/wolfssl/wolfcrypt/src/cryptocb.c create mode 100644 client/wolfssl/wolfcrypt/src/curve25519.c create mode 100644 client/wolfssl/wolfcrypt/src/curve448.c create mode 100644 client/wolfssl/wolfcrypt/src/des3.c create mode 100644 client/wolfssl/wolfcrypt/src/dh.c create mode 100644 client/wolfssl/wolfcrypt/src/dsa.c create mode 100644 client/wolfssl/wolfcrypt/src/ecc.c create mode 100644 client/wolfssl/wolfcrypt/src/ecc_fp.c create mode 100644 client/wolfssl/wolfcrypt/src/ed25519.c create mode 100644 client/wolfssl/wolfcrypt/src/ed448.c create mode 100644 client/wolfssl/wolfcrypt/src/error.c create mode 100644 client/wolfssl/wolfcrypt/src/evp.c create mode 100644 client/wolfssl/wolfcrypt/src/fe_448.c create mode 100644 client/wolfssl/wolfcrypt/src/fe_low_mem.c create mode 100644 client/wolfssl/wolfcrypt/src/fe_operations.c create mode 100644 client/wolfssl/wolfcrypt/src/fe_x25519_128.i create mode 100644 client/wolfssl/wolfcrypt/src/fe_x25519_asm.S create mode 100644 client/wolfssl/wolfcrypt/src/fips.c create mode 100644 client/wolfssl/wolfcrypt/src/fips_test.c create mode 100644 client/wolfssl/wolfcrypt/src/fp_mont_small.i create mode 100644 client/wolfssl/wolfcrypt/src/fp_mul_comba_12.i create mode 100644 client/wolfssl/wolfcrypt/src/fp_mul_comba_17.i create mode 100644 client/wolfssl/wolfcrypt/src/fp_mul_comba_20.i create mode 100644 client/wolfssl/wolfcrypt/src/fp_mul_comba_24.i create mode 100644 client/wolfssl/wolfcrypt/src/fp_mul_comba_28.i create mode 100644 client/wolfssl/wolfcrypt/src/fp_mul_comba_3.i create mode 100644 client/wolfssl/wolfcrypt/src/fp_mul_comba_32.i create mode 100644 client/wolfssl/wolfcrypt/src/fp_mul_comba_4.i create mode 100644 client/wolfssl/wolfcrypt/src/fp_mul_comba_48.i create mode 100644 client/wolfssl/wolfcrypt/src/fp_mul_comba_6.i create mode 100644 client/wolfssl/wolfcrypt/src/fp_mul_comba_64.i create mode 100644 client/wolfssl/wolfcrypt/src/fp_mul_comba_7.i create mode 100644 client/wolfssl/wolfcrypt/src/fp_mul_comba_8.i create mode 100644 client/wolfssl/wolfcrypt/src/fp_mul_comba_9.i create mode 100644 client/wolfssl/wolfcrypt/src/fp_mul_comba_small_set.i create mode 100644 client/wolfssl/wolfcrypt/src/fp_sqr_comba_12.i create mode 100644 client/wolfssl/wolfcrypt/src/fp_sqr_comba_17.i create mode 100644 client/wolfssl/wolfcrypt/src/fp_sqr_comba_20.i create mode 100644 client/wolfssl/wolfcrypt/src/fp_sqr_comba_24.i create mode 100644 client/wolfssl/wolfcrypt/src/fp_sqr_comba_28.i create mode 100644 client/wolfssl/wolfcrypt/src/fp_sqr_comba_3.i create mode 100644 client/wolfssl/wolfcrypt/src/fp_sqr_comba_32.i create mode 100644 client/wolfssl/wolfcrypt/src/fp_sqr_comba_4.i create mode 100644 client/wolfssl/wolfcrypt/src/fp_sqr_comba_48.i create mode 100644 client/wolfssl/wolfcrypt/src/fp_sqr_comba_6.i create mode 100644 client/wolfssl/wolfcrypt/src/fp_sqr_comba_64.i create mode 100644 client/wolfssl/wolfcrypt/src/fp_sqr_comba_7.i create mode 100644 client/wolfssl/wolfcrypt/src/fp_sqr_comba_8.i create mode 100644 client/wolfssl/wolfcrypt/src/fp_sqr_comba_9.i create mode 100644 client/wolfssl/wolfcrypt/src/fp_sqr_comba_small_set.i create mode 100644 client/wolfssl/wolfcrypt/src/ge_448.c create mode 100644 client/wolfssl/wolfcrypt/src/ge_low_mem.c create mode 100644 client/wolfssl/wolfcrypt/src/ge_operations.c create mode 100644 client/wolfssl/wolfcrypt/src/hash.c create mode 100644 client/wolfssl/wolfcrypt/src/hc128.c create mode 100644 client/wolfssl/wolfcrypt/src/hmac.c create mode 100644 client/wolfssl/wolfcrypt/src/idea.c create mode 100644 client/wolfssl/wolfcrypt/src/include.am create mode 100644 client/wolfssl/wolfcrypt/src/integer.c create mode 100644 client/wolfssl/wolfcrypt/src/logging.c create mode 100644 client/wolfssl/wolfcrypt/src/md2.c create mode 100644 client/wolfssl/wolfcrypt/src/md4.c create mode 100644 client/wolfssl/wolfcrypt/src/md5.c create mode 100644 client/wolfssl/wolfcrypt/src/memory.c create mode 100644 client/wolfssl/wolfcrypt/src/misc.c create mode 100644 client/wolfssl/wolfcrypt/src/pkcs12.c create mode 100644 client/wolfssl/wolfcrypt/src/pkcs7.c create mode 100644 client/wolfssl/wolfcrypt/src/poly1305.c create mode 100644 client/wolfssl/wolfcrypt/src/poly1305_asm.S create mode 100644 client/wolfssl/wolfcrypt/src/port/Espressif/README.md create mode 100644 client/wolfssl/wolfcrypt/src/port/Espressif/esp32_aes.c create mode 100644 client/wolfssl/wolfcrypt/src/port/Espressif/esp32_mp.c create mode 100644 client/wolfssl/wolfcrypt/src/port/Espressif/esp32_sha.c create mode 100644 client/wolfssl/wolfcrypt/src/port/Espressif/esp32_util.c create mode 100644 client/wolfssl/wolfcrypt/src/port/Renesas/README.md create mode 100644 client/wolfssl/wolfcrypt/src/port/Renesas/renesas_tsip_aes.c create mode 100644 client/wolfssl/wolfcrypt/src/port/Renesas/renesas_tsip_sha.c create mode 100644 client/wolfssl/wolfcrypt/src/port/Renesas/renesas_tsip_util.c create mode 100644 client/wolfssl/wolfcrypt/src/port/af_alg/afalg_aes.c create mode 100644 client/wolfssl/wolfcrypt/src/port/af_alg/afalg_hash.c create mode 100644 client/wolfssl/wolfcrypt/src/port/af_alg/wc_afalg.c create mode 100644 client/wolfssl/wolfcrypt/src/port/arm/armv8-32-curve25519.S create mode 100644 client/wolfssl/wolfcrypt/src/port/arm/armv8-32-curve25519.c create mode 100644 client/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha512-asm.S create mode 100644 client/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha512-asm.c create mode 100644 client/wolfssl/wolfcrypt/src/port/arm/armv8-aes.c create mode 100644 client/wolfssl/wolfcrypt/src/port/arm/armv8-chacha.c create mode 100644 client/wolfssl/wolfcrypt/src/port/arm/armv8-curve25519.S create mode 100644 client/wolfssl/wolfcrypt/src/port/arm/armv8-curve25519.c create mode 100644 client/wolfssl/wolfcrypt/src/port/arm/armv8-poly1305.c create mode 100644 client/wolfssl/wolfcrypt/src/port/arm/armv8-sha256.c create mode 100644 client/wolfssl/wolfcrypt/src/port/arm/armv8-sha512-asm.S create mode 100644 client/wolfssl/wolfcrypt/src/port/arm/armv8-sha512-asm.c create mode 100644 client/wolfssl/wolfcrypt/src/port/arm/armv8-sha512.c create mode 100644 client/wolfssl/wolfcrypt/src/port/arm/cryptoCell.c create mode 100644 client/wolfssl/wolfcrypt/src/port/arm/cryptoCellHash.c create mode 100644 client/wolfssl/wolfcrypt/src/port/atmel/README.md create mode 100644 client/wolfssl/wolfcrypt/src/port/atmel/atmel.c create mode 100644 client/wolfssl/wolfcrypt/src/port/caam/caam_aes.c create mode 100644 client/wolfssl/wolfcrypt/src/port/caam/caam_doc.pdf create mode 100644 client/wolfssl/wolfcrypt/src/port/caam/caam_driver.c create mode 100644 client/wolfssl/wolfcrypt/src/port/caam/caam_init.c create mode 100644 client/wolfssl/wolfcrypt/src/port/caam/caam_sha.c create mode 100644 client/wolfssl/wolfcrypt/src/port/cavium/README.md create mode 100644 client/wolfssl/wolfcrypt/src/port/cavium/README_Octeon.md create mode 100644 client/wolfssl/wolfcrypt/src/port/cavium/cavium_nitrox.c create mode 100644 client/wolfssl/wolfcrypt/src/port/cavium/cavium_octeon_sync.c create mode 100644 client/wolfssl/wolfcrypt/src/port/devcrypto/README.md create mode 100644 client/wolfssl/wolfcrypt/src/port/devcrypto/devcrypto_aes.c create mode 100644 client/wolfssl/wolfcrypt/src/port/devcrypto/devcrypto_hash.c create mode 100644 client/wolfssl/wolfcrypt/src/port/devcrypto/wc_devcrypto.c create mode 100644 client/wolfssl/wolfcrypt/src/port/intel/README.md create mode 100644 client/wolfssl/wolfcrypt/src/port/intel/quickassist.c create mode 100644 client/wolfssl/wolfcrypt/src/port/intel/quickassist_mem.c create mode 100644 client/wolfssl/wolfcrypt/src/port/intel/quickassist_sync.c create mode 100644 client/wolfssl/wolfcrypt/src/port/mynewt/mynewt_port.c create mode 100644 client/wolfssl/wolfcrypt/src/port/nrf51.c create mode 100644 client/wolfssl/wolfcrypt/src/port/nxp/ksdk_port.c create mode 100644 client/wolfssl/wolfcrypt/src/port/pic32/pic32mz-crypt.c create mode 100644 client/wolfssl/wolfcrypt/src/port/st/README.md create mode 100644 client/wolfssl/wolfcrypt/src/port/st/stm32.c create mode 100644 client/wolfssl/wolfcrypt/src/port/st/stsafe.c create mode 100644 client/wolfssl/wolfcrypt/src/port/ti/ti-aes.c create mode 100644 client/wolfssl/wolfcrypt/src/port/ti/ti-ccm.c create mode 100644 client/wolfssl/wolfcrypt/src/port/ti/ti-des3.c create mode 100644 client/wolfssl/wolfcrypt/src/port/ti/ti-hash.c create mode 100644 client/wolfssl/wolfcrypt/src/port/xilinx/xil-aesgcm.c create mode 100644 client/wolfssl/wolfcrypt/src/port/xilinx/xil-sha3.c create mode 100644 client/wolfssl/wolfcrypt/src/pwdbased.c create mode 100644 client/wolfssl/wolfcrypt/src/rabbit.c create mode 100644 client/wolfssl/wolfcrypt/src/random.c create mode 100644 client/wolfssl/wolfcrypt/src/ripemd.c create mode 100644 client/wolfssl/wolfcrypt/src/rsa.c create mode 100644 client/wolfssl/wolfcrypt/src/selftest.c create mode 100644 client/wolfssl/wolfcrypt/src/sha.c create mode 100644 client/wolfssl/wolfcrypt/src/sha256.c create mode 100644 client/wolfssl/wolfcrypt/src/sha256_asm.S create mode 100644 client/wolfssl/wolfcrypt/src/sha3.c create mode 100644 client/wolfssl/wolfcrypt/src/sha512.c create mode 100644 client/wolfssl/wolfcrypt/src/sha512_asm.S create mode 100644 client/wolfssl/wolfcrypt/src/signature.c create mode 100644 client/wolfssl/wolfcrypt/src/sp_arm32.c create mode 100644 client/wolfssl/wolfcrypt/src/sp_arm64.c create mode 100644 client/wolfssl/wolfcrypt/src/sp_armthumb.c create mode 100644 client/wolfssl/wolfcrypt/src/sp_c32.c create mode 100644 client/wolfssl/wolfcrypt/src/sp_c64.c create mode 100644 client/wolfssl/wolfcrypt/src/sp_cortexm.c create mode 100644 client/wolfssl/wolfcrypt/src/sp_dsp32.c create mode 100644 client/wolfssl/wolfcrypt/src/sp_int.c create mode 100644 client/wolfssl/wolfcrypt/src/sp_x86_64.c create mode 100644 client/wolfssl/wolfcrypt/src/sp_x86_64_asm.S create mode 100644 client/wolfssl/wolfcrypt/src/srp.c create mode 100644 client/wolfssl/wolfcrypt/src/tfm.c create mode 100644 client/wolfssl/wolfcrypt/src/wc_dsp.c create mode 100644 client/wolfssl/wolfcrypt/src/wc_encrypt.c create mode 100644 client/wolfssl/wolfcrypt/src/wc_pkcs11.c create mode 100644 client/wolfssl/wolfcrypt/src/wc_port.c create mode 100644 client/wolfssl/wolfcrypt/src/wolfcrypt_first.c create mode 100644 client/wolfssl/wolfcrypt/src/wolfcrypt_last.c create mode 100644 client/wolfssl/wolfcrypt/src/wolfevent.c create mode 100644 client/wolfssl/wolfcrypt/src/wolfmath.c (limited to 'client/wolfssl/wolfcrypt/src') diff --git a/client/wolfssl/wolfcrypt/src/aes.c b/client/wolfssl/wolfcrypt/src/aes.c new file mode 100644 index 0000000..4b5b437 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/aes.c @@ -0,0 +1,8704 @@ +/* aes.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include +#include + +#if !defined(NO_AES) + +/* Tip: Locate the software cipher modes by searching for "Software AES" */ + +#if defined(HAVE_FIPS) && \ + defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2) + + /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */ + #define FIPS_NO_WRAPPERS + + #ifdef USE_WINDOWS_API + #pragma code_seg(".fipsA$g") + #pragma const_seg(".fipsB$g") + #endif +#endif + +#include +#include + +#ifdef WOLF_CRYPTO_CB + #include +#endif + + +/* fips wrapper calls, user can call direct */ +#if defined(HAVE_FIPS) && \ + (!defined(HAVE_FIPS_VERSION) || (HAVE_FIPS_VERSION < 2)) + + int wc_AesSetKey(Aes* aes, const byte* key, word32 len, const byte* iv, + int dir) + { + if (aes == NULL || !( (len == 16) || (len == 24) || (len == 32)) ) { + return BAD_FUNC_ARG; + } + + return AesSetKey_fips(aes, key, len, iv, dir); + } + int wc_AesSetIV(Aes* aes, const byte* iv) + { + if (aes == NULL) { + return BAD_FUNC_ARG; + } + + return AesSetIV_fips(aes, iv); + } + #ifdef HAVE_AES_CBC + int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + return AesCbcEncrypt_fips(aes, out, in, sz); + } + #ifdef HAVE_AES_DECRYPT + int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + if (aes == NULL || out == NULL || in == NULL + || sz % AES_BLOCK_SIZE != 0) { + return BAD_FUNC_ARG; + } + + return AesCbcDecrypt_fips(aes, out, in, sz); + } + #endif /* HAVE_AES_DECRYPT */ + #endif /* HAVE_AES_CBC */ + + /* AES-CTR */ + #ifdef WOLFSSL_AES_COUNTER + int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + return AesCtrEncrypt(aes, out, in, sz); + } + #endif + + /* AES-DIRECT */ + #if defined(WOLFSSL_AES_DIRECT) + void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in) + { + AesEncryptDirect(aes, out, in); + } + + #ifdef HAVE_AES_DECRYPT + void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in) + { + AesDecryptDirect(aes, out, in); + } + #endif /* HAVE_AES_DECRYPT */ + + int wc_AesSetKeyDirect(Aes* aes, const byte* key, word32 len, + const byte* iv, int dir) + { + return AesSetKeyDirect(aes, key, len, iv, dir); + } + #endif /* WOLFSSL_AES_DIRECT */ + + /* AES-GCM */ + #ifdef HAVE_AESGCM + int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) + { + if (aes == NULL || !( (len == 16) || (len == 24) || (len == 32)) ) { + return BAD_FUNC_ARG; + } + + return AesGcmSetKey_fips(aes, key, len); + } + int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) + { + if (aes == NULL || authTagSz > AES_BLOCK_SIZE || + authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ || + ivSz == 0 || ivSz > AES_BLOCK_SIZE) { + return BAD_FUNC_ARG; + } + + return AesGcmEncrypt_fips(aes, out, in, sz, iv, ivSz, authTag, + authTagSz, authIn, authInSz); + } + + #ifdef HAVE_AES_DECRYPT + int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) + { + if (aes == NULL || out == NULL || in == NULL || iv == NULL + || authTag == NULL || authTagSz > AES_BLOCK_SIZE || + ivSz == 0 || ivSz > AES_BLOCK_SIZE) { + return BAD_FUNC_ARG; + } + + return AesGcmDecrypt_fips(aes, out, in, sz, iv, ivSz, authTag, + authTagSz, authIn, authInSz); + } + #endif /* HAVE_AES_DECRYPT */ + + int wc_GmacSetKey(Gmac* gmac, const byte* key, word32 len) + { + if (gmac == NULL || key == NULL || !((len == 16) || + (len == 24) || (len == 32)) ) { + return BAD_FUNC_ARG; + } + + return GmacSetKey(gmac, key, len); + } + int wc_GmacUpdate(Gmac* gmac, const byte* iv, word32 ivSz, + const byte* authIn, word32 authInSz, + byte* authTag, word32 authTagSz) + { + if (gmac == NULL || authTagSz > AES_BLOCK_SIZE || + authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ) { + return BAD_FUNC_ARG; + } + + return GmacUpdate(gmac, iv, ivSz, authIn, authInSz, + authTag, authTagSz); + } + #endif /* HAVE_AESGCM */ + + /* AES-CCM */ + #if defined(HAVE_AESCCM) && \ + defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2) + int wc_AesCcmSetKey(Aes* aes, const byte* key, word32 keySz) + { + return AesCcmSetKey(aes, key, keySz); + } + int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz, + const byte* nonce, word32 nonceSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) + { + /* sanity check on arguments */ + if (aes == NULL || out == NULL || in == NULL || nonce == NULL + || authTag == NULL || nonceSz < 7 || nonceSz > 13) + return BAD_FUNC_ARG; + + AesCcmEncrypt(aes, out, in, inSz, nonce, nonceSz, authTag, + authTagSz, authIn, authInSz); + return 0; + } + + #ifdef HAVE_AES_DECRYPT + int wc_AesCcmDecrypt(Aes* aes, byte* out, + const byte* in, word32 inSz, + const byte* nonce, word32 nonceSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) + { + + if (aes == NULL || out == NULL || in == NULL || nonce == NULL + || authTag == NULL || nonceSz < 7 || nonceSz > 13) { + return BAD_FUNC_ARG; + } + + return AesCcmDecrypt(aes, out, in, inSz, nonce, nonceSz, + authTag, authTagSz, authIn, authInSz); + } + #endif /* HAVE_AES_DECRYPT */ + #endif /* HAVE_AESCCM && HAVE_FIPS_VERSION 2 */ + + int wc_AesInit(Aes* aes, void* h, int i) + { + if (aes == NULL) + return BAD_FUNC_ARG; + + (void)h; + (void)i; + + /* FIPS doesn't support: + return AesInit(aes, h, i); */ + return 0; + } + void wc_AesFree(Aes* aes) + { + (void)aes; + /* FIPS doesn't support: + AesFree(aes); */ + } + +#else /* else build without fips, or for FIPS v2 */ + + +#if defined(WOLFSSL_TI_CRYPT) + #include +#else + +#include + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#if !defined(WOLFSSL_ARMASM) + +#ifdef WOLFSSL_IMX6_CAAM_BLOB + /* case of possibly not using hardware acceleration for AES but using key + blobs */ + #include +#endif + +#ifdef DEBUG_AESNI + #include +#endif + +#ifdef _MSC_VER + /* 4127 warning constant while(1) */ + #pragma warning(disable: 4127) +#endif + + +/* Define AES implementation includes and functions */ +#if defined(STM32_CRYPTO) + /* STM32F2/F4/F7/L4 hardware AES support for ECB, CBC, CTR and GCM modes */ + +#if defined(WOLFSSL_AES_DIRECT) || defined(HAVE_AESGCM) || defined(HAVE_AESCCM) + + static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + int ret = 0; + #ifdef WOLFSSL_STM32_CUBEMX + CRYP_HandleTypeDef hcryp; + #else + CRYP_InitTypeDef cryptInit; + CRYP_KeyInitTypeDef keyInit; + #endif + + #ifdef WOLFSSL_STM32_CUBEMX + ret = wc_Stm32_Aes_Init(aes, &hcryp); + if (ret != 0) + return ret; + + #ifdef STM32_CRYPTO_AES_ONLY + hcryp.Init.OperatingMode = CRYP_ALGOMODE_ENCRYPT; + hcryp.Init.ChainingMode = CRYP_CHAINMODE_AES_ECB; + hcryp.Init.KeyWriteFlag = CRYP_KEY_WRITE_ENABLE; + #elif defined(STM32_HAL_V2) + hcryp.Init.Algorithm = CRYP_AES_ECB; + #endif + HAL_CRYP_Init(&hcryp); + + #ifdef STM32_CRYPTO_AES_ONLY + ret = HAL_CRYPEx_AES(&hcryp, (uint8_t*)inBlock, AES_BLOCK_SIZE, + outBlock, STM32_HAL_TIMEOUT); + #elif defined(STM32_HAL_V2) + ret = HAL_CRYP_Encrypt(&hcryp, (uint32_t*)inBlock, AES_BLOCK_SIZE, + (uint32_t*)outBlock, STM32_HAL_TIMEOUT); + #else + ret = HAL_CRYP_AESECB_Encrypt(&hcryp, (uint8_t*)inBlock, AES_BLOCK_SIZE, + outBlock, STM32_HAL_TIMEOUT); + #endif + if (ret != HAL_OK) { + ret = WC_TIMEOUT_E; + } + HAL_CRYP_DeInit(&hcryp); + + #else /* STD_PERI_LIB */ + ret = wc_Stm32_Aes_Init(aes, &cryptInit, &keyInit); + if (ret != 0) + return ret; + + /* reset registers to their default values */ + CRYP_DeInit(); + + /* setup key */ + CRYP_KeyInit(&keyInit); + + /* set direction and mode */ + cryptInit.CRYP_AlgoDir = CRYP_AlgoDir_Encrypt; + cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_ECB; + CRYP_Init(&cryptInit); + + /* enable crypto processor */ + CRYP_Cmd(ENABLE); + + /* flush IN/OUT FIFOs */ + CRYP_FIFOFlush(); + + CRYP_DataIn(*(uint32_t*)&inBlock[0]); + CRYP_DataIn(*(uint32_t*)&inBlock[4]); + CRYP_DataIn(*(uint32_t*)&inBlock[8]); + CRYP_DataIn(*(uint32_t*)&inBlock[12]); + + /* wait until the complete message has been processed */ + while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {} + + *(uint32_t*)&outBlock[0] = CRYP_DataOut(); + *(uint32_t*)&outBlock[4] = CRYP_DataOut(); + *(uint32_t*)&outBlock[8] = CRYP_DataOut(); + *(uint32_t*)&outBlock[12] = CRYP_DataOut(); + + /* disable crypto processor */ + CRYP_Cmd(DISABLE); + #endif /* WOLFSSL_STM32_CUBEMX */ + + return ret; + } +#endif /* WOLFSSL_AES_DIRECT || HAVE_AESGCM || HAVE_AESCCM */ + +#ifdef HAVE_AES_DECRYPT + #if defined(WOLFSSL_AES_DIRECT) || defined(HAVE_AESCCM) + static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + int ret = 0; + #ifdef WOLFSSL_STM32_CUBEMX + CRYP_HandleTypeDef hcryp; + #else + CRYP_InitTypeDef cryptInit; + CRYP_KeyInitTypeDef keyInit; + #endif + + #ifdef WOLFSSL_STM32_CUBEMX + ret = wc_Stm32_Aes_Init(aes, &hcryp); + if (ret != 0) + return ret; + + #ifdef STM32_CRYPTO_AES_ONLY + hcryp.Init.OperatingMode = CRYP_ALGOMODE_DECRYPT; + hcryp.Init.ChainingMode = CRYP_CHAINMODE_AES_ECB; + hcryp.Init.KeyWriteFlag = CRYP_KEY_WRITE_ENABLE; + #elif defined(STM32_HAL_V2) + hcryp.Init.Algorithm = CRYP_AES_ECB; + #endif + HAL_CRYP_Init(&hcryp); + + #ifdef STM32_CRYPTO_AES_ONLY + ret = HAL_CRYPEx_AES(&hcryp, (uint8_t*)inBlock, AES_BLOCK_SIZE, + outBlock, STM32_HAL_TIMEOUT); + #elif defined(STM32_HAL_V2) + ret = HAL_CRYP_Decrypt(&hcryp, (uint32_t*)inBlock, AES_BLOCK_SIZE, + (uint32_t*)outBlock, STM32_HAL_TIMEOUT); + #else + ret = HAL_CRYP_AESECB_Decrypt(&hcryp, (uint8_t*)inBlock, AES_BLOCK_SIZE, + outBlock, STM32_HAL_TIMEOUT); + #endif + if (ret != HAL_OK) { + ret = WC_TIMEOUT_E; + } + HAL_CRYP_DeInit(&hcryp); + + #else /* STD_PERI_LIB */ + ret = wc_Stm32_Aes_Init(aes, &cryptInit, &keyInit); + if (ret != 0) + return ret; + + /* reset registers to their default values */ + CRYP_DeInit(); + + /* set direction and key */ + CRYP_KeyInit(&keyInit); + cryptInit.CRYP_AlgoDir = CRYP_AlgoDir_Decrypt; + cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_Key; + CRYP_Init(&cryptInit); + + /* enable crypto processor */ + CRYP_Cmd(ENABLE); + + /* wait until decrypt key has been initialized */ + while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {} + + /* set direction and mode */ + cryptInit.CRYP_AlgoDir = CRYP_AlgoDir_Decrypt; + cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_ECB; + CRYP_Init(&cryptInit); + + /* enable crypto processor */ + CRYP_Cmd(ENABLE); + + /* flush IN/OUT FIFOs */ + CRYP_FIFOFlush(); + + CRYP_DataIn(*(uint32_t*)&inBlock[0]); + CRYP_DataIn(*(uint32_t*)&inBlock[4]); + CRYP_DataIn(*(uint32_t*)&inBlock[8]); + CRYP_DataIn(*(uint32_t*)&inBlock[12]); + + /* wait until the complete message has been processed */ + while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {} + + *(uint32_t*)&outBlock[0] = CRYP_DataOut(); + *(uint32_t*)&outBlock[4] = CRYP_DataOut(); + *(uint32_t*)&outBlock[8] = CRYP_DataOut(); + *(uint32_t*)&outBlock[12] = CRYP_DataOut(); + + /* disable crypto processor */ + CRYP_Cmd(DISABLE); + #endif /* WOLFSSL_STM32_CUBEMX */ + + return ret; + } + #endif /* WOLFSSL_AES_DIRECT || HAVE_AESCCM */ +#endif /* HAVE_AES_DECRYPT */ + +#elif defined(HAVE_COLDFIRE_SEC) + /* Freescale Coldfire SEC support for CBC mode. + * NOTE: no support for AES-CTR/GCM/CCM/Direct */ + #include + #include "sec.h" + #include "mcf5475_sec.h" + #include "mcf5475_siu.h" +#elif defined(FREESCALE_LTC) + #include "fsl_ltc.h" + #if defined(FREESCALE_LTC_AES_GCM) + #undef NEED_AES_TABLES + #undef GCM_TABLE + #else + /* if LTC doesn't have GCM, use software with LTC AES ECB mode */ + static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + wc_AesEncryptDirect(aes, outBlock, inBlock); + return 0; + } + static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + wc_AesDecryptDirect(aes, outBlock, inBlock); + return 0; + } + #endif +#elif defined(FREESCALE_MMCAU) + /* Freescale mmCAU hardware AES support for Direct, CBC, CCM, GCM modes + * through the CAU/mmCAU library. Documentation located in + * ColdFire/ColdFire+ CAU and Kinetis mmCAU Software Library User + * Guide (See note in README). */ + #ifdef FREESCALE_MMCAU_CLASSIC + /* MMCAU 1.4 library used with non-KSDK / classic MQX builds */ + #include "cau_api.h" + #else + #include "fsl_mmcau.h" + #endif + + static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + int ret; + + #ifdef FREESCALE_MMCAU_CLASSIC + if ((wolfssl_word)outBlock % WOLFSSL_MMCAU_ALIGNMENT) { + WOLFSSL_MSG("Bad cau_aes_encrypt alignment"); + return BAD_ALIGN_E; + } + #endif + + ret = wolfSSL_CryptHwMutexLock(); + if(ret == 0) { + #ifdef FREESCALE_MMCAU_CLASSIC + cau_aes_encrypt(inBlock, (byte*)aes->key, aes->rounds, outBlock); + #else + MMCAU_AES_EncryptEcb(inBlock, (byte*)aes->key, aes->rounds, + outBlock); + #endif + wolfSSL_CryptHwMutexUnLock(); + } + return ret; + } + #ifdef HAVE_AES_DECRYPT + static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + int ret; + + #ifdef FREESCALE_MMCAU_CLASSIC + if ((wolfssl_word)outBlock % WOLFSSL_MMCAU_ALIGNMENT) { + WOLFSSL_MSG("Bad cau_aes_decrypt alignment"); + return BAD_ALIGN_E; + } + #endif + + ret = wolfSSL_CryptHwMutexLock(); + if(ret == 0) { + #ifdef FREESCALE_MMCAU_CLASSIC + cau_aes_decrypt(inBlock, (byte*)aes->key, aes->rounds, outBlock); + #else + MMCAU_AES_DecryptEcb(inBlock, (byte*)aes->key, aes->rounds, + outBlock); + #endif + wolfSSL_CryptHwMutexUnLock(); + } + return ret; + } + #endif /* HAVE_AES_DECRYPT */ + +#elif defined(WOLFSSL_PIC32MZ_CRYPT) + + #include + + #if defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) + static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + return wc_Pic32AesCrypt(aes->key, aes->keylen, NULL, 0, + outBlock, inBlock, AES_BLOCK_SIZE, + PIC32_ENCRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_RECB); + } + #endif + + #if defined(HAVE_AES_DECRYPT) && defined(WOLFSSL_AES_DIRECT) + static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + return wc_Pic32AesCrypt(aes->key, aes->keylen, NULL, 0, + outBlock, inBlock, AES_BLOCK_SIZE, + PIC32_DECRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_RECB); + } + #endif + +#elif defined(WOLFSSL_NRF51_AES) + /* Use built-in AES hardware - AES 128 ECB Encrypt Only */ + #include "wolfssl/wolfcrypt/port/nrf51.h" + + static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + return nrf51_aes_encrypt(inBlock, (byte*)aes->key, aes->rounds, outBlock); + } + + #ifdef HAVE_AES_DECRYPT + #error nRF51 AES Hardware does not support decrypt + #endif /* HAVE_AES_DECRYPT */ + +#elif defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_AES) + + #include "wolfssl/wolfcrypt/port/Espressif/esp32-crypt.h" + + #if defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) + static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + return wc_esp32AesEncrypt(aes, inBlock, outBlock); + } + #endif + + #if defined(HAVE_AES_DECRYPT) && defined(WOLFSSL_AES_DIRECT) + static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + return wc_esp32AesDecrypt(aes, inBlock, outBlock); + } + #endif + +#elif defined(WOLFSSL_AESNI) + + #define NEED_AES_TABLES + + /* Each platform needs to query info type 1 from cpuid to see if aesni is + * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts + */ + + #ifndef AESNI_ALIGN + #define AESNI_ALIGN 16 + #endif + + #ifdef _MSC_VER + #define XASM_LINK(f) + #elif defined(__APPLE__) + #define XASM_LINK(f) asm("_" f) + #else + #define XASM_LINK(f) asm(f) + #endif /* _MSC_VER */ + + static int checkAESNI = 0; + static int haveAESNI = 0; + static word32 intel_flags = 0; + + static int Check_CPU_support_AES(void) + { + intel_flags = cpuid_get_flags(); + + return IS_INTEL_AESNI(intel_flags) != 0; + } + + + /* tell C compiler these are asm functions in case any mix up of ABI underscore + prefix between clang/gcc/llvm etc */ + #ifdef HAVE_AES_CBC + void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, + unsigned char* ivec, unsigned long length, + const unsigned char* KS, int nr) + XASM_LINK("AES_CBC_encrypt"); + + #ifdef HAVE_AES_DECRYPT + #if defined(WOLFSSL_AESNI_BY4) + void AES_CBC_decrypt_by4(const unsigned char* in, unsigned char* out, + unsigned char* ivec, unsigned long length, + const unsigned char* KS, int nr) + XASM_LINK("AES_CBC_decrypt_by4"); + #elif defined(WOLFSSL_AESNI_BY6) + void AES_CBC_decrypt_by6(const unsigned char* in, unsigned char* out, + unsigned char* ivec, unsigned long length, + const unsigned char* KS, int nr) + XASM_LINK("AES_CBC_decrypt_by6"); + #else /* WOLFSSL_AESNI_BYx */ + void AES_CBC_decrypt_by8(const unsigned char* in, unsigned char* out, + unsigned char* ivec, unsigned long length, + const unsigned char* KS, int nr) + XASM_LINK("AES_CBC_decrypt_by8"); + #endif /* WOLFSSL_AESNI_BYx */ + #endif /* HAVE_AES_DECRYPT */ + #endif /* HAVE_AES_CBC */ + + void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, + unsigned long length, const unsigned char* KS, int nr) + XASM_LINK("AES_ECB_encrypt"); + + #ifdef HAVE_AES_DECRYPT + void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, + unsigned long length, const unsigned char* KS, int nr) + XASM_LINK("AES_ECB_decrypt"); + #endif + + void AES_128_Key_Expansion(const unsigned char* userkey, + unsigned char* key_schedule) + XASM_LINK("AES_128_Key_Expansion"); + + void AES_192_Key_Expansion(const unsigned char* userkey, + unsigned char* key_schedule) + XASM_LINK("AES_192_Key_Expansion"); + + void AES_256_Key_Expansion(const unsigned char* userkey, + unsigned char* key_schedule) + XASM_LINK("AES_256_Key_Expansion"); + + + static int AES_set_encrypt_key(const unsigned char *userKey, const int bits, + Aes* aes) + { + int ret; + + if (!userKey || !aes) + return BAD_FUNC_ARG; + + switch (bits) { + case 128: + AES_128_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 10; + return 0; + case 192: + AES_192_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 12; + return 0; + case 256: + AES_256_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 14; + return 0; + default: + ret = BAD_FUNC_ARG; + } + + return ret; + } + + #ifdef HAVE_AES_DECRYPT + static int AES_set_decrypt_key(const unsigned char* userKey, + const int bits, Aes* aes) + { + int nr; + Aes temp_key; + __m128i *Key_Schedule = (__m128i*)aes->key; + __m128i *Temp_Key_Schedule = (__m128i*)temp_key.key; + + if (!userKey || !aes) + return BAD_FUNC_ARG; + + if (AES_set_encrypt_key(userKey,bits,&temp_key) == BAD_FUNC_ARG) + return BAD_FUNC_ARG; + + nr = temp_key.rounds; + aes->rounds = nr; + + Key_Schedule[nr] = Temp_Key_Schedule[0]; + Key_Schedule[nr-1] = _mm_aesimc_si128(Temp_Key_Schedule[1]); + Key_Schedule[nr-2] = _mm_aesimc_si128(Temp_Key_Schedule[2]); + Key_Schedule[nr-3] = _mm_aesimc_si128(Temp_Key_Schedule[3]); + Key_Schedule[nr-4] = _mm_aesimc_si128(Temp_Key_Schedule[4]); + Key_Schedule[nr-5] = _mm_aesimc_si128(Temp_Key_Schedule[5]); + Key_Schedule[nr-6] = _mm_aesimc_si128(Temp_Key_Schedule[6]); + Key_Schedule[nr-7] = _mm_aesimc_si128(Temp_Key_Schedule[7]); + Key_Schedule[nr-8] = _mm_aesimc_si128(Temp_Key_Schedule[8]); + Key_Schedule[nr-9] = _mm_aesimc_si128(Temp_Key_Schedule[9]); + + if (nr>10) { + Key_Schedule[nr-10] = _mm_aesimc_si128(Temp_Key_Schedule[10]); + Key_Schedule[nr-11] = _mm_aesimc_si128(Temp_Key_Schedule[11]); + } + + if (nr>12) { + Key_Schedule[nr-12] = _mm_aesimc_si128(Temp_Key_Schedule[12]); + Key_Schedule[nr-13] = _mm_aesimc_si128(Temp_Key_Schedule[13]); + } + + Key_Schedule[0] = Temp_Key_Schedule[nr]; + + return 0; + } + #endif /* HAVE_AES_DECRYPT */ + +#elif (defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES)) || \ + ((defined(WOLFSSL_AFALG) || defined(WOLFSSL_DEVCRYPTO_AES)) && \ + defined(HAVE_AESCCM)) + static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + wc_AesEncryptDirect(aes, outBlock, inBlock); + return 0; + } + +#elif defined(WOLFSSL_AFALG) +#elif defined(WOLFSSL_DEVCRYPTO_AES) + +#elif defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_AES) + #include "hal_data.h" + + #ifndef WOLFSSL_SCE_AES256_HANDLE + #define WOLFSSL_SCE_AES256_HANDLE g_sce_aes_256 + #endif + + #ifndef WOLFSSL_SCE_AES192_HANDLE + #define WOLFSSL_SCE_AES192_HANDLE g_sce_aes_192 + #endif + + #ifndef WOLFSSL_SCE_AES128_HANDLE + #define WOLFSSL_SCE_AES128_HANDLE g_sce_aes_128 + #endif + + static int AES_ECB_encrypt(Aes* aes, const byte* inBlock, byte* outBlock, + int sz) + { + uint32_t ret; + + if (WOLFSSL_SCE_GSCE_HANDLE.p_cfg->endian_flag == + CRYPTO_WORD_ENDIAN_BIG) { + ByteReverseWords((word32*)inBlock, (word32*)inBlock, sz); + } + + switch (aes->keylen) { + #ifdef WOLFSSL_AES_128 + case AES_128_KEY_SIZE: + ret = WOLFSSL_SCE_AES128_HANDLE.p_api->encrypt( + WOLFSSL_SCE_AES128_HANDLE.p_ctrl, aes->key, + NULL, (sz / sizeof(word32)), (word32*)inBlock, + (word32*)outBlock); + break; + #endif + #ifdef WOLFSSL_AES_192 + case AES_192_KEY_SIZE: + ret = WOLFSSL_SCE_AES192_HANDLE.p_api->encrypt( + WOLFSSL_SCE_AES192_HANDLE.p_ctrl, aes->key, + NULL, (sz / sizeof(word32)), (word32*)inBlock, + (word32*)outBlock); + break; + #endif + #ifdef WOLFSSL_AES_256 + case AES_256_KEY_SIZE: + ret = WOLFSSL_SCE_AES256_HANDLE.p_api->encrypt( + WOLFSSL_SCE_AES256_HANDLE.p_ctrl, aes->key, + NULL, (sz / sizeof(word32)), (word32*)inBlock, + (word32*)outBlock); + break; + #endif + default: + WOLFSSL_MSG("Unknown key size"); + return BAD_FUNC_ARG; + } + + if (ret != SSP_SUCCESS) { + /* revert input */ + ByteReverseWords((word32*)inBlock, (word32*)inBlock, sz); + return WC_HW_E; + } + + if (WOLFSSL_SCE_GSCE_HANDLE.p_cfg->endian_flag == + CRYPTO_WORD_ENDIAN_BIG) { + ByteReverseWords((word32*)outBlock, (word32*)outBlock, sz); + if (inBlock != outBlock) { + /* revert input */ + ByteReverseWords((word32*)inBlock, (word32*)inBlock, sz); + } + } + return 0; + } + + #if defined(HAVE_AES_DECRYPT) + static int AES_ECB_decrypt(Aes* aes, const byte* inBlock, byte* outBlock, + int sz) + { + uint32_t ret; + + if (WOLFSSL_SCE_GSCE_HANDLE.p_cfg->endian_flag == + CRYPTO_WORD_ENDIAN_BIG) { + ByteReverseWords((word32*)inBlock, (word32*)inBlock, sz); + } + + switch (aes->keylen) { + #ifdef WOLFSSL_AES_128 + case AES_128_KEY_SIZE: + ret = WOLFSSL_SCE_AES128_HANDLE.p_api->decrypt( + WOLFSSL_SCE_AES128_HANDLE.p_ctrl, aes->key, aes->reg, + (sz / sizeof(word32)), (word32*)inBlock, + (word32*)outBlock); + break; + #endif + #ifdef WOLFSSL_AES_192 + case AES_192_KEY_SIZE: + ret = WOLFSSL_SCE_AES192_HANDLE.p_api->decrypt( + WOLFSSL_SCE_AES192_HANDLE.p_ctrl, aes->key, aes->reg, + (sz / sizeof(word32)), (word32*)inBlock, + (word32*)outBlock); + break; + #endif + #ifdef WOLFSSL_AES_256 + case AES_256_KEY_SIZE: + ret = WOLFSSL_SCE_AES256_HANDLE.p_api->decrypt( + WOLFSSL_SCE_AES256_HANDLE.p_ctrl, aes->key, aes->reg, + (sz / sizeof(word32)), (word32*)inBlock, + (word32*)outBlock); + break; + #endif + default: + WOLFSSL_MSG("Unknown key size"); + return BAD_FUNC_ARG; + } + if (ret != SSP_SUCCESS) { + return WC_HW_E; + } + + if (WOLFSSL_SCE_GSCE_HANDLE.p_cfg->endian_flag == + CRYPTO_WORD_ENDIAN_BIG) { + ByteReverseWords((word32*)outBlock, (word32*)outBlock, sz); + if (inBlock != outBlock) { + /* revert input */ + ByteReverseWords((word32*)inBlock, (word32*)inBlock, sz); + } + } + + return 0; + } + + #endif + + #if defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) + static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + return AES_ECB_encrypt(aes, inBlock, outBlock, AES_BLOCK_SIZE); + } + #endif + + #if defined(HAVE_AES_DECRYPT) && defined(WOLFSSL_AES_DIRECT) + static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + return AES_ECB_decrypt(aes, inBlock, outBlock, AES_BLOCK_SIZE); + } + #endif +#else + + /* using wolfCrypt software implementation */ + #define NEED_AES_TABLES +#endif + + + +#ifdef NEED_AES_TABLES + +static const word32 rcon[] = { + 0x01000000, 0x02000000, 0x04000000, 0x08000000, + 0x10000000, 0x20000000, 0x40000000, 0x80000000, + 0x1B000000, 0x36000000, + /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */ +}; + +#ifndef WOLFSSL_AES_SMALL_TABLES +static const word32 Te[4][256] = { +{ + 0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU, + 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U, + 0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU, + 0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU, + 0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U, + 0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU, + 0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU, + 0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU, + 0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU, + 0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU, + 0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U, + 0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU, + 0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU, + 0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U, + 0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU, + 0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU, + 0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU, + 0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU, + 0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU, + 0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U, + 0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU, + 0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU, + 0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU, + 0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU, + 0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U, + 0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U, + 0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U, + 0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U, + 0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU, + 0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U, + 0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U, + 0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU, + 0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU, + 0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U, + 0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U, + 0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U, + 0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU, + 0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U, + 0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU, + 0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U, + 0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU, + 0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U, + 0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U, + 0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU, + 0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U, + 0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U, + 0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U, + 0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U, + 0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U, + 0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U, + 0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U, + 0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U, + 0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU, + 0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U, + 0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U, + 0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U, + 0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U, + 0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U, + 0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U, + 0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU, + 0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U, + 0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U, + 0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U, + 0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU, +}, +{ + 0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU, + 0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U, + 0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU, + 0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U, + 0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU, + 0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U, + 0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU, + 0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U, + 0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U, + 0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU, + 0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U, + 0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U, + 0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U, + 0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU, + 0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U, + 0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U, + 0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU, + 0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U, + 0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U, + 0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U, + 0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU, + 0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU, + 0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U, + 0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU, + 0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU, + 0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U, + 0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU, + 0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U, + 0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU, + 0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U, + 0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U, + 0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U, + 0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU, + 0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U, + 0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU, + 0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U, + 0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU, + 0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U, + 0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U, + 0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU, + 0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU, + 0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU, + 0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U, + 0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U, + 0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU, + 0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U, + 0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU, + 0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U, + 0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU, + 0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U, + 0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU, + 0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU, + 0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U, + 0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU, + 0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U, + 0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU, + 0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U, + 0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U, + 0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U, + 0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU, + 0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU, + 0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U, + 0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU, + 0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U, +}, +{ + 0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU, + 0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U, + 0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU, + 0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U, + 0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU, + 0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U, + 0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU, + 0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U, + 0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U, + 0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU, + 0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U, + 0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U, + 0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U, + 0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU, + 0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U, + 0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U, + 0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU, + 0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U, + 0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U, + 0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U, + 0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU, + 0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU, + 0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U, + 0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU, + 0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU, + 0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U, + 0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU, + 0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U, + 0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU, + 0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U, + 0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U, + 0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U, + 0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU, + 0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U, + 0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU, + 0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U, + 0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU, + 0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U, + 0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U, + 0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU, + 0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU, + 0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU, + 0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U, + 0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U, + 0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU, + 0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U, + 0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU, + 0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U, + 0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU, + 0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U, + 0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU, + 0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU, + 0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U, + 0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU, + 0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U, + 0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU, + 0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U, + 0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U, + 0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U, + 0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU, + 0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU, + 0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U, + 0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU, + 0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U, +}, +{ + 0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U, + 0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U, + 0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U, + 0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU, + 0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU, + 0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU, + 0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U, + 0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU, + 0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU, + 0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U, + 0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U, + 0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU, + 0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU, + 0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU, + 0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU, + 0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU, + 0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U, + 0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU, + 0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU, + 0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U, + 0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U, + 0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U, + 0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U, + 0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U, + 0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU, + 0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U, + 0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU, + 0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU, + 0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U, + 0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U, + 0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U, + 0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU, + 0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U, + 0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU, + 0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU, + 0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U, + 0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U, + 0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU, + 0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U, + 0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU, + 0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U, + 0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U, + 0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U, + 0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U, + 0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU, + 0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U, + 0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU, + 0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U, + 0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU, + 0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U, + 0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU, + 0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU, + 0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU, + 0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU, + 0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U, + 0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U, + 0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U, + 0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U, + 0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U, + 0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U, + 0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU, + 0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U, + 0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU, + 0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU, +} +}; + +#ifdef HAVE_AES_DECRYPT +static const word32 Td[4][256] = { +{ + 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U, + 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U, + 0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U, + 0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU, + 0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U, + 0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U, + 0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU, + 0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U, + 0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU, + 0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U, + 0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U, + 0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U, + 0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U, + 0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU, + 0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U, + 0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU, + 0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U, + 0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU, + 0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U, + 0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U, + 0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U, + 0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU, + 0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U, + 0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU, + 0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U, + 0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU, + 0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U, + 0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU, + 0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU, + 0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U, + 0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU, + 0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U, + 0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU, + 0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U, + 0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U, + 0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U, + 0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU, + 0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U, + 0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U, + 0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU, + 0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U, + 0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U, + 0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U, + 0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U, + 0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U, + 0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU, + 0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U, + 0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U, + 0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U, + 0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U, + 0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U, + 0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU, + 0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU, + 0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU, + 0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU, + 0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U, + 0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U, + 0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU, + 0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU, + 0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U, + 0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU, + 0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U, + 0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U, + 0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U, +}, +{ + 0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU, + 0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U, + 0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU, + 0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U, + 0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U, + 0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U, + 0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U, + 0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U, + 0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U, + 0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU, + 0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU, + 0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU, + 0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U, + 0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU, + 0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U, + 0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U, + 0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U, + 0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU, + 0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU, + 0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U, + 0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU, + 0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U, + 0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU, + 0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU, + 0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U, + 0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U, + 0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U, + 0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU, + 0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U, + 0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU, + 0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U, + 0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U, + 0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U, + 0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU, + 0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U, + 0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U, + 0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U, + 0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U, + 0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U, + 0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U, + 0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU, + 0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU, + 0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U, + 0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU, + 0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U, + 0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU, + 0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU, + 0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U, + 0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU, + 0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U, + 0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U, + 0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U, + 0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U, + 0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U, + 0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U, + 0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U, + 0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU, + 0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U, + 0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U, + 0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU, + 0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U, + 0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U, + 0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U, + 0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U, +}, +{ + 0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U, + 0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U, + 0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U, + 0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U, + 0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU, + 0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U, + 0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U, + 0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U, + 0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U, + 0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU, + 0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U, + 0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U, + 0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU, + 0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U, + 0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U, + 0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U, + 0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U, + 0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U, + 0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U, + 0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU, + + 0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U, + 0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U, + 0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U, + 0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U, + 0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U, + 0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU, + 0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU, + 0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U, + 0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU, + 0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U, + 0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU, + 0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU, + 0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU, + 0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU, + 0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U, + 0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U, + 0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U, + 0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U, + 0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U, + 0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U, + 0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U, + 0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU, + 0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU, + 0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U, + 0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U, + 0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU, + 0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU, + 0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U, + 0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U, + 0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U, + 0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U, + 0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U, + 0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U, + 0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U, + 0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU, + 0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U, + 0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U, + 0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U, + 0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U, + 0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U, + 0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U, + 0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU, + 0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U, + 0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U, +}, +{ + 0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU, + 0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU, + 0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U, + 0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U, + 0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU, + 0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU, + 0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U, + 0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU, + 0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U, + 0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU, + 0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U, + 0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U, + 0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U, + 0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U, + 0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U, + 0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU, + 0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU, + 0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U, + 0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U, + 0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU, + 0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU, + 0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U, + 0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U, + 0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U, + 0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U, + 0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU, + 0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U, + 0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U, + 0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU, + 0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU, + 0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U, + 0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U, + 0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U, + 0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU, + 0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U, + 0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U, + 0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U, + 0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U, + 0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U, + 0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U, + 0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U, + 0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU, + 0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U, + 0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U, + 0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU, + 0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU, + 0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U, + 0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU, + 0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U, + 0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U, + 0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U, + 0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U, + 0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U, + 0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U, + 0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU, + 0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU, + 0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU, + 0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU, + 0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U, + 0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U, + 0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U, + 0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU, + 0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U, + 0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U, +} +}; +#endif /* HAVE_AES_DECRYPT */ +#endif + +#ifdef HAVE_AES_DECRYPT +#if (defined(HAVE_AES_CBC) && !defined(WOLFSSL_DEVCRYPTO_CBC)) \ + || defined(WOLFSSL_AES_DIRECT) +static const byte Td4[256] = +{ + 0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U, + 0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU, + 0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U, + 0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU, + 0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU, + 0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU, + 0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U, + 0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U, + 0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U, + 0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U, + 0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU, + 0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U, + 0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU, + 0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U, + 0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U, + 0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU, + 0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU, + 0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U, + 0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U, + 0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU, + 0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U, + 0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU, + 0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U, + 0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U, + 0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U, + 0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU, + 0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU, + 0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU, + 0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U, + 0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U, + 0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U, + 0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU, +}; +#endif /* HAVE_AES_CBC || WOLFSSL_AES_DIRECT */ +#endif /* HAVE_AES_DECRYPT */ + +#define GETBYTE(x, y) (word32)((byte)((x) >> (8 * (y)))) + +#ifdef WOLFSSL_AES_SMALL_TABLES +static const byte Tsbox[256] = { + 0x63U, 0x7cU, 0x77U, 0x7bU, 0xf2U, 0x6bU, 0x6fU, 0xc5U, + 0x30U, 0x01U, 0x67U, 0x2bU, 0xfeU, 0xd7U, 0xabU, 0x76U, + 0xcaU, 0x82U, 0xc9U, 0x7dU, 0xfaU, 0x59U, 0x47U, 0xf0U, + 0xadU, 0xd4U, 0xa2U, 0xafU, 0x9cU, 0xa4U, 0x72U, 0xc0U, + 0xb7U, 0xfdU, 0x93U, 0x26U, 0x36U, 0x3fU, 0xf7U, 0xccU, + 0x34U, 0xa5U, 0xe5U, 0xf1U, 0x71U, 0xd8U, 0x31U, 0x15U, + 0x04U, 0xc7U, 0x23U, 0xc3U, 0x18U, 0x96U, 0x05U, 0x9aU, + 0x07U, 0x12U, 0x80U, 0xe2U, 0xebU, 0x27U, 0xb2U, 0x75U, + 0x09U, 0x83U, 0x2cU, 0x1aU, 0x1bU, 0x6eU, 0x5aU, 0xa0U, + 0x52U, 0x3bU, 0xd6U, 0xb3U, 0x29U, 0xe3U, 0x2fU, 0x84U, + 0x53U, 0xd1U, 0x00U, 0xedU, 0x20U, 0xfcU, 0xb1U, 0x5bU, + 0x6aU, 0xcbU, 0xbeU, 0x39U, 0x4aU, 0x4cU, 0x58U, 0xcfU, + 0xd0U, 0xefU, 0xaaU, 0xfbU, 0x43U, 0x4dU, 0x33U, 0x85U, + 0x45U, 0xf9U, 0x02U, 0x7fU, 0x50U, 0x3cU, 0x9fU, 0xa8U, + 0x51U, 0xa3U, 0x40U, 0x8fU, 0x92U, 0x9dU, 0x38U, 0xf5U, + 0xbcU, 0xb6U, 0xdaU, 0x21U, 0x10U, 0xffU, 0xf3U, 0xd2U, + 0xcdU, 0x0cU, 0x13U, 0xecU, 0x5fU, 0x97U, 0x44U, 0x17U, + 0xc4U, 0xa7U, 0x7eU, 0x3dU, 0x64U, 0x5dU, 0x19U, 0x73U, + 0x60U, 0x81U, 0x4fU, 0xdcU, 0x22U, 0x2aU, 0x90U, 0x88U, + 0x46U, 0xeeU, 0xb8U, 0x14U, 0xdeU, 0x5eU, 0x0bU, 0xdbU, + 0xe0U, 0x32U, 0x3aU, 0x0aU, 0x49U, 0x06U, 0x24U, 0x5cU, + 0xc2U, 0xd3U, 0xacU, 0x62U, 0x91U, 0x95U, 0xe4U, 0x79U, + 0xe7U, 0xc8U, 0x37U, 0x6dU, 0x8dU, 0xd5U, 0x4eU, 0xa9U, + 0x6cU, 0x56U, 0xf4U, 0xeaU, 0x65U, 0x7aU, 0xaeU, 0x08U, + 0xbaU, 0x78U, 0x25U, 0x2eU, 0x1cU, 0xa6U, 0xb4U, 0xc6U, + 0xe8U, 0xddU, 0x74U, 0x1fU, 0x4bU, 0xbdU, 0x8bU, 0x8aU, + 0x70U, 0x3eU, 0xb5U, 0x66U, 0x48U, 0x03U, 0xf6U, 0x0eU, + 0x61U, 0x35U, 0x57U, 0xb9U, 0x86U, 0xc1U, 0x1dU, 0x9eU, + 0xe1U, 0xf8U, 0x98U, 0x11U, 0x69U, 0xd9U, 0x8eU, 0x94U, + 0x9bU, 0x1eU, 0x87U, 0xe9U, 0xceU, 0x55U, 0x28U, 0xdfU, + 0x8cU, 0xa1U, 0x89U, 0x0dU, 0xbfU, 0xe6U, 0x42U, 0x68U, + 0x41U, 0x99U, 0x2dU, 0x0fU, 0xb0U, 0x54U, 0xbbU, 0x16U +}; + +#define AES_XTIME(x) ((byte)((byte)((x) << 1) ^ ((0 - ((x) >> 7)) & 0x1b))) + +static word32 col_mul(word32 t, int i2, int i3, int ia, int ib) +{ + byte t3 = GETBYTE(t, i3); + byte tm = AES_XTIME(GETBYTE(t, i2) ^ t3); + + return GETBYTE(t, ia) ^ GETBYTE(t, ib) ^ t3 ^ tm; +} + +static word32 inv_col_mul(word32 t, int i9, int ib, int id, int ie) +{ + byte t9 = GETBYTE(t, i9); + byte tb = GETBYTE(t, ib); + byte td = GETBYTE(t, id); + byte te = GETBYTE(t, ie); + byte t0 = t9 ^ tb ^ td; + return t0 ^ AES_XTIME(AES_XTIME(AES_XTIME(t0 ^ te) ^ td ^ te) ^ tb ^ te); +} +#endif + +#if defined(HAVE_AES_CBC) || defined(WOLFSSL_AES_DIRECT) || defined(HAVE_AESGCM) + +#ifndef WC_CACHE_LINE_SZ + #if defined(__x86_64__) || defined(_M_X64) || \ + (defined(__ILP32__) && (__ILP32__ >= 1)) + #define WC_CACHE_LINE_SZ 64 + #else + /* default cache line size */ + #define WC_CACHE_LINE_SZ 32 + #endif +#endif + + +#ifndef WOLFSSL_AES_SMALL_TABLES +/* load 4 Te Tables into cache by cache line stride */ +static WC_INLINE word32 PreFetchTe(void) +{ + word32 x = 0; + int i,j; + + for (i = 0; i < 4; i++) { + /* 256 elements, each one is 4 bytes */ + for (j = 0; j < 256; j += WC_CACHE_LINE_SZ/4) { + x &= Te[i][j]; + } + } + return x; +} +#else +/* load sbox into cache by cache line stride */ +static WC_INLINE word32 PreFetchSBox(void) +{ + word32 x = 0; + int i; + + for (i = 0; i < 256; i += WC_CACHE_LINE_SZ/4) { + x &= Tsbox[i]; + } + return x; +} +#endif + +/* Software AES - ECB Encrypt */ +static void wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) +{ + word32 s0, s1, s2, s3; + word32 t0, t1, t2, t3; + word32 r = aes->rounds >> 1; + const word32* rk = aes->key; + + if (r > 7 || r == 0) { + WOLFSSL_MSG("AesEncrypt encountered improper key, set it up"); + return; /* stop instead of seg-faulting, set up your keys! */ + } + +#ifdef WOLFSSL_AESNI + if (haveAESNI && aes->use_aesni) { + #ifdef DEBUG_AESNI + printf("about to aes encrypt\n"); + printf("in = %p\n", inBlock); + printf("out = %p\n", outBlock); + printf("aes->key = %p\n", aes->key); + printf("aes->rounds = %d\n", aes->rounds); + printf("sz = %d\n", AES_BLOCK_SIZE); + #endif + + /* check alignment, decrypt doesn't need alignment */ + if ((wolfssl_word)inBlock % AESNI_ALIGN) { + #ifndef NO_WOLFSSL_ALLOC_ALIGN + byte* tmp = (byte*)XMALLOC(AES_BLOCK_SIZE + AESNI_ALIGN, aes->heap, + DYNAMIC_TYPE_TMP_BUFFER); + byte* tmp_align; + if (tmp == NULL) return; + + tmp_align = tmp + (AESNI_ALIGN - ((size_t)tmp % AESNI_ALIGN)); + + XMEMCPY(tmp_align, inBlock, AES_BLOCK_SIZE); + AES_ECB_encrypt(tmp_align, tmp_align, AES_BLOCK_SIZE, + (byte*)aes->key, aes->rounds); + XMEMCPY(outBlock, tmp_align, AES_BLOCK_SIZE); + XFREE(tmp, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); + return; + #else + WOLFSSL_MSG("AES-ECB encrypt with bad alignment"); + return; + #endif + } + + AES_ECB_encrypt(inBlock, outBlock, AES_BLOCK_SIZE, (byte*)aes->key, + aes->rounds); + + return; + } + else { + #ifdef DEBUG_AESNI + printf("Skipping AES-NI\n"); + #endif + } +#endif +#if defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_AES) + AES_ECB_encrypt(aes, inBlock, outBlock, AES_BLOCK_SIZE); + return; +#endif + + /* + * map byte array block to cipher state + * and add initial round key: + */ + XMEMCPY(&s0, inBlock, sizeof(s0)); + XMEMCPY(&s1, inBlock + sizeof(s0), sizeof(s1)); + XMEMCPY(&s2, inBlock + 2 * sizeof(s0), sizeof(s2)); + XMEMCPY(&s3, inBlock + 3 * sizeof(s0), sizeof(s3)); + +#ifdef LITTLE_ENDIAN_ORDER + s0 = ByteReverseWord32(s0); + s1 = ByteReverseWord32(s1); + s2 = ByteReverseWord32(s2); + s3 = ByteReverseWord32(s3); +#endif + + /* AddRoundKey */ + s0 ^= rk[0]; + s1 ^= rk[1]; + s2 ^= rk[2]; + s3 ^= rk[3]; + +#ifndef WOLFSSL_AES_SMALL_TABLES + s0 |= PreFetchTe(); + + /* + * Nr - 1 full rounds: + */ + + for (;;) { + t0 = + Te[0][GETBYTE(s0, 3)] ^ + Te[1][GETBYTE(s1, 2)] ^ + Te[2][GETBYTE(s2, 1)] ^ + Te[3][GETBYTE(s3, 0)] ^ + rk[4]; + t1 = + Te[0][GETBYTE(s1, 3)] ^ + Te[1][GETBYTE(s2, 2)] ^ + Te[2][GETBYTE(s3, 1)] ^ + Te[3][GETBYTE(s0, 0)] ^ + rk[5]; + t2 = + Te[0][GETBYTE(s2, 3)] ^ + Te[1][GETBYTE(s3, 2)] ^ + Te[2][GETBYTE(s0, 1)] ^ + Te[3][GETBYTE(s1, 0)] ^ + rk[6]; + t3 = + Te[0][GETBYTE(s3, 3)] ^ + Te[1][GETBYTE(s0, 2)] ^ + Te[2][GETBYTE(s1, 1)] ^ + Te[3][GETBYTE(s2, 0)] ^ + rk[7]; + + rk += 8; + if (--r == 0) { + break; + } + + s0 = + Te[0][GETBYTE(t0, 3)] ^ + Te[1][GETBYTE(t1, 2)] ^ + Te[2][GETBYTE(t2, 1)] ^ + Te[3][GETBYTE(t3, 0)] ^ + rk[0]; + s1 = + Te[0][GETBYTE(t1, 3)] ^ + Te[1][GETBYTE(t2, 2)] ^ + Te[2][GETBYTE(t3, 1)] ^ + Te[3][GETBYTE(t0, 0)] ^ + rk[1]; + s2 = + Te[0][GETBYTE(t2, 3)] ^ + Te[1][GETBYTE(t3, 2)] ^ + Te[2][GETBYTE(t0, 1)] ^ + Te[3][GETBYTE(t1, 0)] ^ + rk[2]; + s3 = + Te[0][GETBYTE(t3, 3)] ^ + Te[1][GETBYTE(t0, 2)] ^ + Te[2][GETBYTE(t1, 1)] ^ + Te[3][GETBYTE(t2, 0)] ^ + rk[3]; + } + + /* + * apply last round and + * map cipher state to byte array block: + */ + + s0 = + (Te[2][GETBYTE(t0, 3)] & 0xff000000) ^ + (Te[3][GETBYTE(t1, 2)] & 0x00ff0000) ^ + (Te[0][GETBYTE(t2, 1)] & 0x0000ff00) ^ + (Te[1][GETBYTE(t3, 0)] & 0x000000ff) ^ + rk[0]; + s1 = + (Te[2][GETBYTE(t1, 3)] & 0xff000000) ^ + (Te[3][GETBYTE(t2, 2)] & 0x00ff0000) ^ + (Te[0][GETBYTE(t3, 1)] & 0x0000ff00) ^ + (Te[1][GETBYTE(t0, 0)] & 0x000000ff) ^ + rk[1]; + s2 = + (Te[2][GETBYTE(t2, 3)] & 0xff000000) ^ + (Te[3][GETBYTE(t3, 2)] & 0x00ff0000) ^ + (Te[0][GETBYTE(t0, 1)] & 0x0000ff00) ^ + (Te[1][GETBYTE(t1, 0)] & 0x000000ff) ^ + rk[2]; + s3 = + (Te[2][GETBYTE(t3, 3)] & 0xff000000) ^ + (Te[3][GETBYTE(t0, 2)] & 0x00ff0000) ^ + (Te[0][GETBYTE(t1, 1)] & 0x0000ff00) ^ + (Te[1][GETBYTE(t2, 0)] & 0x000000ff) ^ + rk[3]; +#else + s0 |= PreFetchSBox(); + + r *= 2; + /* Two rounds at a time */ + for (rk += 4; r > 1; r--, rk += 4) { + t0 = + ((word32)Tsbox[GETBYTE(s0, 3)] << 24) ^ + ((word32)Tsbox[GETBYTE(s1, 2)] << 16) ^ + ((word32)Tsbox[GETBYTE(s2, 1)] << 8) ^ + ((word32)Tsbox[GETBYTE(s3, 0)]); + t1 = + ((word32)Tsbox[GETBYTE(s1, 3)] << 24) ^ + ((word32)Tsbox[GETBYTE(s2, 2)] << 16) ^ + ((word32)Tsbox[GETBYTE(s3, 1)] << 8) ^ + ((word32)Tsbox[GETBYTE(s0, 0)]); + t2 = + ((word32)Tsbox[GETBYTE(s2, 3)] << 24) ^ + ((word32)Tsbox[GETBYTE(s3, 2)] << 16) ^ + ((word32)Tsbox[GETBYTE(s0, 1)] << 8) ^ + ((word32)Tsbox[GETBYTE(s1, 0)]); + t3 = + ((word32)Tsbox[GETBYTE(s3, 3)] << 24) ^ + ((word32)Tsbox[GETBYTE(s0, 2)] << 16) ^ + ((word32)Tsbox[GETBYTE(s1, 1)] << 8) ^ + ((word32)Tsbox[GETBYTE(s2, 0)]); + + s0 = + (col_mul(t0, 3, 2, 0, 1) << 24) ^ + (col_mul(t0, 2, 1, 0, 3) << 16) ^ + (col_mul(t0, 1, 0, 2, 3) << 8) ^ + (col_mul(t0, 0, 3, 2, 1) ) ^ + rk[0]; + s1 = + (col_mul(t1, 3, 2, 0, 1) << 24) ^ + (col_mul(t1, 2, 1, 0, 3) << 16) ^ + (col_mul(t1, 1, 0, 2, 3) << 8) ^ + (col_mul(t1, 0, 3, 2, 1) ) ^ + rk[1]; + s2 = + (col_mul(t2, 3, 2, 0, 1) << 24) ^ + (col_mul(t2, 2, 1, 0, 3) << 16) ^ + (col_mul(t2, 1, 0, 2, 3) << 8) ^ + (col_mul(t2, 0, 3, 2, 1) ) ^ + rk[2]; + s3 = + (col_mul(t3, 3, 2, 0, 1) << 24) ^ + (col_mul(t3, 2, 1, 0, 3) << 16) ^ + (col_mul(t3, 1, 0, 2, 3) << 8) ^ + (col_mul(t3, 0, 3, 2, 1) ) ^ + rk[3]; + } + + t0 = + ((word32)Tsbox[GETBYTE(s0, 3)] << 24) ^ + ((word32)Tsbox[GETBYTE(s1, 2)] << 16) ^ + ((word32)Tsbox[GETBYTE(s2, 1)] << 8) ^ + ((word32)Tsbox[GETBYTE(s3, 0)]); + t1 = + ((word32)Tsbox[GETBYTE(s1, 3)] << 24) ^ + ((word32)Tsbox[GETBYTE(s2, 2)] << 16) ^ + ((word32)Tsbox[GETBYTE(s3, 1)] << 8) ^ + ((word32)Tsbox[GETBYTE(s0, 0)]); + t2 = + ((word32)Tsbox[GETBYTE(s2, 3)] << 24) ^ + ((word32)Tsbox[GETBYTE(s3, 2)] << 16) ^ + ((word32)Tsbox[GETBYTE(s0, 1)] << 8) ^ + ((word32)Tsbox[GETBYTE(s1, 0)]); + t3 = + ((word32)Tsbox[GETBYTE(s3, 3)] << 24) ^ + ((word32)Tsbox[GETBYTE(s0, 2)] << 16) ^ + ((word32)Tsbox[GETBYTE(s1, 1)] << 8) ^ + ((word32)Tsbox[GETBYTE(s2, 0)]); + s0 = t0 ^ rk[0]; + s1 = t1 ^ rk[1]; + s2 = t2 ^ rk[2]; + s3 = t3 ^ rk[3]; +#endif + + /* write out */ +#ifdef LITTLE_ENDIAN_ORDER + s0 = ByteReverseWord32(s0); + s1 = ByteReverseWord32(s1); + s2 = ByteReverseWord32(s2); + s3 = ByteReverseWord32(s3); +#endif + + XMEMCPY(outBlock, &s0, sizeof(s0)); + XMEMCPY(outBlock + sizeof(s0), &s1, sizeof(s1)); + XMEMCPY(outBlock + 2 * sizeof(s0), &s2, sizeof(s2)); + XMEMCPY(outBlock + 3 * sizeof(s0), &s3, sizeof(s3)); + +} +#endif /* HAVE_AES_CBC || WOLFSSL_AES_DIRECT || HAVE_AESGCM */ + +#if defined(HAVE_AES_DECRYPT) +#if (defined(HAVE_AES_CBC) && !defined(WOLFSSL_DEVCRYPTO_CBC)) || \ + defined(WOLFSSL_AES_DIRECT) + +#ifndef WOLFSSL_AES_SMALL_TABLES +/* load 4 Td Tables into cache by cache line stride */ +static WC_INLINE word32 PreFetchTd(void) +{ + word32 x = 0; + int i,j; + + for (i = 0; i < 4; i++) { + /* 256 elements, each one is 4 bytes */ + for (j = 0; j < 256; j += WC_CACHE_LINE_SZ/4) { + x &= Td[i][j]; + } + } + return x; +} +#endif + +/* load Td Table4 into cache by cache line stride */ +static WC_INLINE word32 PreFetchTd4(void) +{ + word32 x = 0; + int i; + + for (i = 0; i < 256; i += WC_CACHE_LINE_SZ) { + x &= (word32)Td4[i]; + } + return x; +} + +/* Software AES - ECB Decrypt */ +static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) +{ + word32 s0, s1, s2, s3; + word32 t0, t1, t2, t3; + word32 r = aes->rounds >> 1; + + const word32* rk = aes->key; + if (r > 7 || r == 0) { + WOLFSSL_MSG("AesDecrypt encountered improper key, set it up"); + return; /* stop instead of seg-faulting, set up your keys! */ + } +#ifdef WOLFSSL_AESNI + if (haveAESNI && aes->use_aesni) { + #ifdef DEBUG_AESNI + printf("about to aes decrypt\n"); + printf("in = %p\n", inBlock); + printf("out = %p\n", outBlock); + printf("aes->key = %p\n", aes->key); + printf("aes->rounds = %d\n", aes->rounds); + printf("sz = %d\n", AES_BLOCK_SIZE); + #endif + + /* if input and output same will overwrite input iv */ + if ((const byte*)aes->tmp != inBlock) + XMEMCPY(aes->tmp, inBlock, AES_BLOCK_SIZE); + AES_ECB_decrypt(inBlock, outBlock, AES_BLOCK_SIZE, (byte*)aes->key, + aes->rounds); + return; + } + else { + #ifdef DEBUG_AESNI + printf("Skipping AES-NI\n"); + #endif + } +#endif /* WOLFSSL_AESNI */ +#if defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_AES) + return AES_ECB_decrypt(aes, inBlock, outBlock, AES_BLOCK_SIZE); +#endif + + /* + * map byte array block to cipher state + * and add initial round key: + */ + XMEMCPY(&s0, inBlock, sizeof(s0)); + XMEMCPY(&s1, inBlock + sizeof(s0), sizeof(s1)); + XMEMCPY(&s2, inBlock + 2 * sizeof(s0), sizeof(s2)); + XMEMCPY(&s3, inBlock + 3 * sizeof(s0), sizeof(s3)); + +#ifdef LITTLE_ENDIAN_ORDER + s0 = ByteReverseWord32(s0); + s1 = ByteReverseWord32(s1); + s2 = ByteReverseWord32(s2); + s3 = ByteReverseWord32(s3); +#endif + + s0 ^= rk[0]; + s1 ^= rk[1]; + s2 ^= rk[2]; + s3 ^= rk[3]; + +#ifndef WOLFSSL_AES_SMALL_TABLES + s0 |= PreFetchTd(); + + /* + * Nr - 1 full rounds: + */ + + for (;;) { + t0 = + Td[0][GETBYTE(s0, 3)] ^ + Td[1][GETBYTE(s3, 2)] ^ + Td[2][GETBYTE(s2, 1)] ^ + Td[3][GETBYTE(s1, 0)] ^ + rk[4]; + t1 = + Td[0][GETBYTE(s1, 3)] ^ + Td[1][GETBYTE(s0, 2)] ^ + Td[2][GETBYTE(s3, 1)] ^ + Td[3][GETBYTE(s2, 0)] ^ + rk[5]; + t2 = + Td[0][GETBYTE(s2, 3)] ^ + Td[1][GETBYTE(s1, 2)] ^ + Td[2][GETBYTE(s0, 1)] ^ + Td[3][GETBYTE(s3, 0)] ^ + rk[6]; + t3 = + Td[0][GETBYTE(s3, 3)] ^ + Td[1][GETBYTE(s2, 2)] ^ + Td[2][GETBYTE(s1, 1)] ^ + Td[3][GETBYTE(s0, 0)] ^ + rk[7]; + + rk += 8; + if (--r == 0) { + break; + } + + s0 = + Td[0][GETBYTE(t0, 3)] ^ + Td[1][GETBYTE(t3, 2)] ^ + Td[2][GETBYTE(t2, 1)] ^ + Td[3][GETBYTE(t1, 0)] ^ + rk[0]; + s1 = + Td[0][GETBYTE(t1, 3)] ^ + Td[1][GETBYTE(t0, 2)] ^ + Td[2][GETBYTE(t3, 1)] ^ + Td[3][GETBYTE(t2, 0)] ^ + rk[1]; + s2 = + Td[0][GETBYTE(t2, 3)] ^ + Td[1][GETBYTE(t1, 2)] ^ + Td[2][GETBYTE(t0, 1)] ^ + Td[3][GETBYTE(t3, 0)] ^ + rk[2]; + s3 = + Td[0][GETBYTE(t3, 3)] ^ + Td[1][GETBYTE(t2, 2)] ^ + Td[2][GETBYTE(t1, 1)] ^ + Td[3][GETBYTE(t0, 0)] ^ + rk[3]; + } + /* + * apply last round and + * map cipher state to byte array block: + */ + + t0 |= PreFetchTd4(); + + s0 = + ((word32)Td4[GETBYTE(t0, 3)] << 24) ^ + ((word32)Td4[GETBYTE(t3, 2)] << 16) ^ + ((word32)Td4[GETBYTE(t2, 1)] << 8) ^ + ((word32)Td4[GETBYTE(t1, 0)]) ^ + rk[0]; + s1 = + ((word32)Td4[GETBYTE(t1, 3)] << 24) ^ + ((word32)Td4[GETBYTE(t0, 2)] << 16) ^ + ((word32)Td4[GETBYTE(t3, 1)] << 8) ^ + ((word32)Td4[GETBYTE(t2, 0)]) ^ + rk[1]; + s2 = + ((word32)Td4[GETBYTE(t2, 3)] << 24) ^ + ((word32)Td4[GETBYTE(t1, 2)] << 16) ^ + ((word32)Td4[GETBYTE(t0, 1)] << 8) ^ + ((word32)Td4[GETBYTE(t3, 0)]) ^ + rk[2]; + s3 = + ((word32)Td4[GETBYTE(t3, 3)] << 24) ^ + ((word32)Td4[GETBYTE(t2, 2)] << 16) ^ + ((word32)Td4[GETBYTE(t1, 1)] << 8) ^ + ((word32)Td4[GETBYTE(t0, 0)]) ^ + rk[3]; +#else + s0 |= PreFetchTd4(); + + r *= 2; + for (rk += 4; r > 1; r--, rk += 4) { + t0 = + ((word32)Td4[GETBYTE(s0, 3)] << 24) ^ + ((word32)Td4[GETBYTE(s3, 2)] << 16) ^ + ((word32)Td4[GETBYTE(s2, 1)] << 8) ^ + ((word32)Td4[GETBYTE(s1, 0)]) ^ + rk[0]; + t1 = + ((word32)Td4[GETBYTE(s1, 3)] << 24) ^ + ((word32)Td4[GETBYTE(s0, 2)] << 16) ^ + ((word32)Td4[GETBYTE(s3, 1)] << 8) ^ + ((word32)Td4[GETBYTE(s2, 0)]) ^ + rk[1]; + t2 = + ((word32)Td4[GETBYTE(s2, 3)] << 24) ^ + ((word32)Td4[GETBYTE(s1, 2)] << 16) ^ + ((word32)Td4[GETBYTE(s0, 1)] << 8) ^ + ((word32)Td4[GETBYTE(s3, 0)]) ^ + rk[2]; + t3 = + ((word32)Td4[GETBYTE(s3, 3)] << 24) ^ + ((word32)Td4[GETBYTE(s2, 2)] << 16) ^ + ((word32)Td4[GETBYTE(s1, 1)] << 8) ^ + ((word32)Td4[GETBYTE(s0, 0)]) ^ + rk[3]; + + s0 = + (inv_col_mul(t0, 0, 2, 1, 3) << 24) ^ + (inv_col_mul(t0, 3, 1, 0, 2) << 16) ^ + (inv_col_mul(t0, 2, 0, 3, 1) << 8) ^ + (inv_col_mul(t0, 1, 3, 2, 0) ); + s1 = + (inv_col_mul(t1, 0, 2, 1, 3) << 24) ^ + (inv_col_mul(t1, 3, 1, 0, 2) << 16) ^ + (inv_col_mul(t1, 2, 0, 3, 1) << 8) ^ + (inv_col_mul(t1, 1, 3, 2, 0) ); + s2 = + (inv_col_mul(t2, 0, 2, 1, 3) << 24) ^ + (inv_col_mul(t2, 3, 1, 0, 2) << 16) ^ + (inv_col_mul(t2, 2, 0, 3, 1) << 8) ^ + (inv_col_mul(t2, 1, 3, 2, 0) ); + s3 = + (inv_col_mul(t3, 0, 2, 1, 3) << 24) ^ + (inv_col_mul(t3, 3, 1, 0, 2) << 16) ^ + (inv_col_mul(t3, 2, 0, 3, 1) << 8) ^ + (inv_col_mul(t3, 1, 3, 2, 0) ); + } + + t0 = + ((word32)Td4[GETBYTE(s0, 3)] << 24) ^ + ((word32)Td4[GETBYTE(s3, 2)] << 16) ^ + ((word32)Td4[GETBYTE(s2, 1)] << 8) ^ + ((word32)Td4[GETBYTE(s1, 0)]); + t1 = + ((word32)Td4[GETBYTE(s1, 3)] << 24) ^ + ((word32)Td4[GETBYTE(s0, 2)] << 16) ^ + ((word32)Td4[GETBYTE(s3, 1)] << 8) ^ + ((word32)Td4[GETBYTE(s2, 0)]); + t2 = + ((word32)Td4[GETBYTE(s2, 3)] << 24) ^ + ((word32)Td4[GETBYTE(s1, 2)] << 16) ^ + ((word32)Td4[GETBYTE(s0, 1)] << 8) ^ + ((word32)Td4[GETBYTE(s3, 0)]); + t3 = + ((word32)Td4[GETBYTE(s3, 3)] << 24) ^ + ((word32)Td4[GETBYTE(s2, 2)] << 16) ^ + ((word32)Td4[GETBYTE(s1, 1)] << 8) ^ + ((word32)Td4[GETBYTE(s0, 0)]); + s0 = t0 ^ rk[0]; + s1 = t1 ^ rk[1]; + s2 = t2 ^ rk[2]; + s3 = t3 ^ rk[3]; +#endif + + /* write out */ +#ifdef LITTLE_ENDIAN_ORDER + s0 = ByteReverseWord32(s0); + s1 = ByteReverseWord32(s1); + s2 = ByteReverseWord32(s2); + s3 = ByteReverseWord32(s3); +#endif + + XMEMCPY(outBlock, &s0, sizeof(s0)); + XMEMCPY(outBlock + sizeof(s0), &s1, sizeof(s1)); + XMEMCPY(outBlock + 2 * sizeof(s0), &s2, sizeof(s2)); + XMEMCPY(outBlock + 3 * sizeof(s0), &s3, sizeof(s3)); +} +#endif /* HAVE_AES_CBC || WOLFSSL_AES_DIRECT */ +#endif /* HAVE_AES_DECRYPT */ + +#endif /* NEED_AES_TABLES */ + + + +/* wc_AesSetKey */ +#if defined(STM32_CRYPTO) + + int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) + { + word32 *rk; + + (void)dir; + + if (aes == NULL || (keylen != 16 && + #ifdef WOLFSSL_AES_192 + keylen != 24 && + #endif + keylen != 32)) { + return BAD_FUNC_ARG; + } + + rk = aes->key; + aes->keylen = keylen; + aes->rounds = keylen/4 + 6; + XMEMCPY(rk, userKey, keylen); + #if !defined(WOLFSSL_STM32_CUBEMX) || defined(STM32_HAL_V2) + ByteReverseWords(rk, rk, keylen); + #endif + #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) || \ + defined(WOLFSSL_AES_OFB) + aes->left = 0; + #endif + + return wc_AesSetIV(aes, iv); + } + #if defined(WOLFSSL_AES_DIRECT) + int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) + { + return wc_AesSetKey(aes, userKey, keylen, iv, dir); + } + #endif + +#elif defined(HAVE_COLDFIRE_SEC) + #if defined (HAVE_THREADX) + #include "memory_pools.h" + extern TX_BYTE_POOL mp_ncached; /* Non Cached memory pool */ + #endif + + #define AES_BUFFER_SIZE (AES_BLOCK_SIZE * 64) + static unsigned char *AESBuffIn = NULL; + static unsigned char *AESBuffOut = NULL; + static byte *secReg; + static byte *secKey; + static volatile SECdescriptorType *secDesc; + + static wolfSSL_Mutex Mutex_AesSEC; + + #define SEC_DESC_AES_CBC_ENCRYPT 0x60300010 + #define SEC_DESC_AES_CBC_DECRYPT 0x60200010 + + extern volatile unsigned char __MBAR[]; + + int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) + { + if (AESBuffIn == NULL) { + #if defined (HAVE_THREADX) + int s1, s2, s3, s4, s5; + s5 = tx_byte_allocate(&mp_ncached,(void *)&secDesc, + sizeof(SECdescriptorType), TX_NO_WAIT); + s1 = tx_byte_allocate(&mp_ncached, (void *)&AESBuffIn, + AES_BUFFER_SIZE, TX_NO_WAIT); + s2 = tx_byte_allocate(&mp_ncached, (void *)&AESBuffOut, + AES_BUFFER_SIZE, TX_NO_WAIT); + s3 = tx_byte_allocate(&mp_ncached, (void *)&secKey, + AES_BLOCK_SIZE*2, TX_NO_WAIT); + s4 = tx_byte_allocate(&mp_ncached, (void *)&secReg, + AES_BLOCK_SIZE, TX_NO_WAIT); + + if (s1 || s2 || s3 || s4 || s5) + return BAD_FUNC_ARG; + #else + #warning "Allocate non-Cache buffers" + #endif + + wc_InitMutex(&Mutex_AesSEC); + } + + if (!((keylen == 16) || (keylen == 24) || (keylen == 32))) + return BAD_FUNC_ARG; + + if (aes == NULL) + return BAD_FUNC_ARG; + + aes->keylen = keylen; + aes->rounds = keylen/4 + 6; + XMEMCPY(aes->key, userKey, keylen); + + if (iv) + XMEMCPY(aes->reg, iv, AES_BLOCK_SIZE); + + #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) || \ + defined(WOLFSSL_AES_OFB) + aes->left = 0; + #endif + + return 0; + } +#elif defined(FREESCALE_LTC) + int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv, + int dir) + { + if (aes == NULL || !((keylen == 16) || (keylen == 24) || (keylen == 32))) + return BAD_FUNC_ARG; + + aes->rounds = keylen/4 + 6; + XMEMCPY(aes->key, userKey, keylen); + + #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) || \ + defined(WOLFSSL_AES_OFB) + aes->left = 0; + #endif + + return wc_AesSetIV(aes, iv); + } + + int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) + { + return wc_AesSetKey(aes, userKey, keylen, iv, dir); + } +#elif defined(FREESCALE_MMCAU) + int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) + { + int ret; + byte* rk; + byte* tmpKey = (byte*)userKey; + int tmpKeyDynamic = 0; + word32 alignOffset = 0; + + (void)dir; + + if (!((keylen == 16) || (keylen == 24) || (keylen == 32))) + return BAD_FUNC_ARG; + if (aes == NULL) + return BAD_FUNC_ARG; + + rk = (byte*)aes->key; + if (rk == NULL) + return BAD_FUNC_ARG; + + #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) || \ + defined(WOLFSSL_AES_OFB) + aes->left = 0; + #endif + + aes->rounds = keylen/4 + 6; + + #ifdef FREESCALE_MMCAU_CLASSIC + if ((wolfssl_word)userKey % WOLFSSL_MMCAU_ALIGNMENT) { + #ifndef NO_WOLFSSL_ALLOC_ALIGN + byte* tmp = (byte*)XMALLOC(keylen + WOLFSSL_MMCAU_ALIGNMENT, + aes->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (tmp == NULL) { + return MEMORY_E; + } + alignOffset = WOLFSSL_MMCAU_ALIGNMENT - + ((wolfssl_word)tmp % WOLFSSL_MMCAU_ALIGNMENT); + tmpKey = tmp + alignOffset; + XMEMCPY(tmpKey, userKey, keylen); + tmpKeyDynamic = 1; + #else + WOLFSSL_MSG("Bad cau_aes_set_key alignment"); + return BAD_ALIGN_E; + #endif + } + #endif + + ret = wolfSSL_CryptHwMutexLock(); + if(ret == 0) { + #ifdef FREESCALE_MMCAU_CLASSIC + cau_aes_set_key(tmpKey, keylen*8, rk); + #else + MMCAU_AES_SetKey(tmpKey, keylen, rk); + #endif + wolfSSL_CryptHwMutexUnLock(); + + ret = wc_AesSetIV(aes, iv); + } + + if (tmpKeyDynamic == 1) { + XFREE(tmpKey - alignOffset, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); + } + + return ret; + } + + int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) + { + return wc_AesSetKey(aes, userKey, keylen, iv, dir); + } + +#elif defined(WOLFSSL_NRF51_AES) + int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) + { + int ret; + + (void)dir; + (void)iv; + + if (aes == NULL || keylen != 16) + return BAD_FUNC_ARG; + + aes->keylen = keylen; + aes->rounds = keylen/4 + 6; + ret = nrf51_aes_set_key(userKey); + + #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) || \ + defined(WOLFSSL_AES_OFB) + aes->left = 0; + #endif + + return ret; + } + + int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) + { + return wc_AesSetKey(aes, userKey, keylen, iv, dir); + } +#elif defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_AES) + + int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) + { + (void)dir; + (void)iv; + + if (aes == NULL || (keylen != 16 && keylen != 24 && keylen != 32)) { + return BAD_FUNC_ARG; + } + + aes->keylen = keylen; + aes->rounds = keylen/4 + 6; + + XMEMCPY(aes->key, userKey, keylen); + #if defined(WOLFSSL_AES_COUNTER) + aes->left = 0; + #endif + return wc_AesSetIV(aes, iv); + } + + int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) + { + return wc_AesSetKey(aes, userKey, keylen, iv, dir); + } +#elif defined(WOLFSSL_CRYPTOCELL) && defined(WOLFSSL_CRYPTOCELL_AES) + + int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv, + int dir) + { + SaSiError_t ret = SASI_OK; + SaSiAesIv_t iv_aes; + + if (aes == NULL || + (keylen != AES_128_KEY_SIZE && + keylen != AES_192_KEY_SIZE && + keylen != AES_256_KEY_SIZE)) { + return BAD_FUNC_ARG; + } + #if defined(AES_MAX_KEY_SIZE) + if (keylen > (AES_MAX_KEY_SIZE/8)) { + return BAD_FUNC_ARG; + } + #endif + if (dir != AES_ENCRYPTION && + dir != AES_DECRYPTION) { + return BAD_FUNC_ARG; + } + + if (dir == AES_ENCRYPTION) { + aes->ctx.mode = SASI_AES_ENCRYPT; + SaSi_AesInit(&aes->ctx.user_ctx, + SASI_AES_ENCRYPT, + SASI_AES_MODE_CBC, + SASI_AES_PADDING_NONE); + } + else { + aes->ctx.mode = SASI_AES_DECRYPT; + SaSi_AesInit(&aes->ctx.user_ctx, + SASI_AES_DECRYPT, + SASI_AES_MODE_CBC, + SASI_AES_PADDING_NONE); + } + + aes->keylen = keylen; + aes->rounds = keylen/4 + 6; + XMEMCPY(aes->key, userKey, keylen); + + aes->ctx.key.pKey = (uint8_t*)aes->key; + aes->ctx.key.keySize= keylen; + + ret = SaSi_AesSetKey(&aes->ctx.user_ctx, + SASI_AES_USER_KEY, + &aes->ctx.key, + sizeof(aes->ctx.key)); + if (ret != SASI_OK) { + return BAD_FUNC_ARG; + } + + ret = wc_AesSetIV(aes, iv); + + if (iv) + XMEMCPY(iv_aes, iv, AES_BLOCK_SIZE); + else + XMEMSET(iv_aes, 0, AES_BLOCK_SIZE); + + + ret = SaSi_AesSetIv(&aes->ctx.user_ctx, iv_aes); + if (ret != SASI_OK) { + return ret; + } + return ret; + } + #if defined(WOLFSSL_AES_DIRECT) + int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) + { + return wc_AesSetKey(aes, userKey, keylen, iv, dir); + } + #endif + +#elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES) + /* implemented in wolfcrypt/src/port/caam/caam_aes.c */ + +#elif defined(WOLFSSL_AFALG) + /* implemented in wolfcrypt/src/port/af_alg/afalg_aes.c */ + +#elif defined(WOLFSSL_DEVCRYPTO_AES) + /* implemented in wolfcrypt/src/port/devcrypto/devcrypto_aes.c */ + +#else + + /* Software AES - SetKey */ + static int wc_AesSetKeyLocal(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) + { + word32 *rk = aes->key; + #ifdef NEED_AES_TABLES + word32 temp; + unsigned int i = 0; + #endif + + #ifdef WOLFSSL_AESNI + aes->use_aesni = 0; + #endif /* WOLFSSL_AESNI */ + #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) || \ + defined(WOLFSSL_AES_OFB) + aes->left = 0; + #endif + + aes->keylen = keylen; + aes->rounds = (keylen/4) + 6; + + XMEMCPY(rk, userKey, keylen); + #if defined(LITTLE_ENDIAN_ORDER) && !defined(WOLFSSL_PIC32MZ_CRYPT) && \ + (!defined(WOLFSSL_ESP32WROOM32_CRYPT) || \ + defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_AES)) + ByteReverseWords(rk, rk, keylen); + #endif + +#ifdef NEED_AES_TABLES + switch (keylen) { + #if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 128 && \ + defined(WOLFSSL_AES_128) + case 16: + while (1) + { + temp = rk[3]; + rk[4] = rk[0] ^ + #ifndef WOLFSSL_AES_SMALL_TABLES + (Te[2][GETBYTE(temp, 2)] & 0xff000000) ^ + (Te[3][GETBYTE(temp, 1)] & 0x00ff0000) ^ + (Te[0][GETBYTE(temp, 0)] & 0x0000ff00) ^ + (Te[1][GETBYTE(temp, 3)] & 0x000000ff) ^ + #else + ((word32)Tsbox[GETBYTE(temp, 2)] << 24) ^ + ((word32)Tsbox[GETBYTE(temp, 1)] << 16) ^ + ((word32)Tsbox[GETBYTE(temp, 0)] << 8) ^ + ((word32)Tsbox[GETBYTE(temp, 3)]) ^ + #endif + rcon[i]; + rk[5] = rk[1] ^ rk[4]; + rk[6] = rk[2] ^ rk[5]; + rk[7] = rk[3] ^ rk[6]; + if (++i == 10) + break; + rk += 4; + } + break; + #endif /* 128 */ + + #if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 192 && \ + defined(WOLFSSL_AES_192) + case 24: + /* for (;;) here triggers a bug in VC60 SP4 w/ Pro Pack */ + while (1) + { + temp = rk[ 5]; + rk[ 6] = rk[ 0] ^ + #ifndef WOLFSSL_AES_SMALL_TABLES + (Te[2][GETBYTE(temp, 2)] & 0xff000000) ^ + (Te[3][GETBYTE(temp, 1)] & 0x00ff0000) ^ + (Te[0][GETBYTE(temp, 0)] & 0x0000ff00) ^ + (Te[1][GETBYTE(temp, 3)] & 0x000000ff) ^ + #else + ((word32)Tsbox[GETBYTE(temp, 2)] << 24) ^ + ((word32)Tsbox[GETBYTE(temp, 1)] << 16) ^ + ((word32)Tsbox[GETBYTE(temp, 0)] << 8) ^ + ((word32)Tsbox[GETBYTE(temp, 3)]) ^ + #endif + rcon[i]; + rk[ 7] = rk[ 1] ^ rk[ 6]; + rk[ 8] = rk[ 2] ^ rk[ 7]; + rk[ 9] = rk[ 3] ^ rk[ 8]; + if (++i == 8) + break; + rk[10] = rk[ 4] ^ rk[ 9]; + rk[11] = rk[ 5] ^ rk[10]; + rk += 6; + } + break; + #endif /* 192 */ + + #if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 256 && \ + defined(WOLFSSL_AES_256) + case 32: + while (1) + { + temp = rk[ 7]; + rk[ 8] = rk[ 0] ^ + #ifndef WOLFSSL_AES_SMALL_TABLES + (Te[2][GETBYTE(temp, 2)] & 0xff000000) ^ + (Te[3][GETBYTE(temp, 1)] & 0x00ff0000) ^ + (Te[0][GETBYTE(temp, 0)] & 0x0000ff00) ^ + (Te[1][GETBYTE(temp, 3)] & 0x000000ff) ^ + #else + ((word32)Tsbox[GETBYTE(temp, 2)] << 24) ^ + ((word32)Tsbox[GETBYTE(temp, 1)] << 16) ^ + ((word32)Tsbox[GETBYTE(temp, 0)] << 8) ^ + ((word32)Tsbox[GETBYTE(temp, 3)]) ^ + #endif + rcon[i]; + rk[ 9] = rk[ 1] ^ rk[ 8]; + rk[10] = rk[ 2] ^ rk[ 9]; + rk[11] = rk[ 3] ^ rk[10]; + if (++i == 7) + break; + temp = rk[11]; + rk[12] = rk[ 4] ^ + #ifndef WOLFSSL_AES_SMALL_TABLES + (Te[2][GETBYTE(temp, 3)] & 0xff000000) ^ + (Te[3][GETBYTE(temp, 2)] & 0x00ff0000) ^ + (Te[0][GETBYTE(temp, 1)] & 0x0000ff00) ^ + (Te[1][GETBYTE(temp, 0)] & 0x000000ff); + #else + ((word32)Tsbox[GETBYTE(temp, 3)] << 24) ^ + ((word32)Tsbox[GETBYTE(temp, 2)] << 16) ^ + ((word32)Tsbox[GETBYTE(temp, 1)] << 8) ^ + ((word32)Tsbox[GETBYTE(temp, 0)]); + #endif + rk[13] = rk[ 5] ^ rk[12]; + rk[14] = rk[ 6] ^ rk[13]; + rk[15] = rk[ 7] ^ rk[14]; + + rk += 8; + } + break; + #endif /* 256 */ + + default: + return BAD_FUNC_ARG; + } /* switch */ + + #if defined(HAVE_AES_DECRYPT) + if (dir == AES_DECRYPTION) { + unsigned int j; + rk = aes->key; + + /* invert the order of the round keys: */ + for (i = 0, j = 4* aes->rounds; i < j; i += 4, j -= 4) { + temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp; + temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp; + temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp; + temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp; + } + #if !defined(WOLFSSL_AES_SMALL_TABLES) + /* apply the inverse MixColumn transform to all round keys but the + first and the last: */ + for (i = 1; i < aes->rounds; i++) { + rk += 4; + rk[0] = + Td[0][Te[1][GETBYTE(rk[0], 3)] & 0xff] ^ + Td[1][Te[1][GETBYTE(rk[0], 2)] & 0xff] ^ + Td[2][Te[1][GETBYTE(rk[0], 1)] & 0xff] ^ + Td[3][Te[1][GETBYTE(rk[0], 0)] & 0xff]; + rk[1] = + Td[0][Te[1][GETBYTE(rk[1], 3)] & 0xff] ^ + Td[1][Te[1][GETBYTE(rk[1], 2)] & 0xff] ^ + Td[2][Te[1][GETBYTE(rk[1], 1)] & 0xff] ^ + Td[3][Te[1][GETBYTE(rk[1], 0)] & 0xff]; + rk[2] = + Td[0][Te[1][GETBYTE(rk[2], 3)] & 0xff] ^ + Td[1][Te[1][GETBYTE(rk[2], 2)] & 0xff] ^ + Td[2][Te[1][GETBYTE(rk[2], 1)] & 0xff] ^ + Td[3][Te[1][GETBYTE(rk[2], 0)] & 0xff]; + rk[3] = + Td[0][Te[1][GETBYTE(rk[3], 3)] & 0xff] ^ + Td[1][Te[1][GETBYTE(rk[3], 2)] & 0xff] ^ + Td[2][Te[1][GETBYTE(rk[3], 1)] & 0xff] ^ + Td[3][Te[1][GETBYTE(rk[3], 0)] & 0xff]; + } + #endif + } + #else + (void)dir; + #endif /* HAVE_AES_DECRYPT */ + (void)temp; +#endif /* NEED_AES_TABLES */ + +#if defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_AES) + XMEMCPY((byte*)aes->key, userKey, keylen); + if (WOLFSSL_SCE_GSCE_HANDLE.p_cfg->endian_flag == CRYPTO_WORD_ENDIAN_BIG) { + ByteReverseWords(aes->key, aes->key, 32); + } +#endif + + return wc_AesSetIV(aes, iv); + } + + int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) + { + int ret; + #if defined(AES_MAX_KEY_SIZE) + const word32 max_key_len = (AES_MAX_KEY_SIZE / 8); + #endif + + #ifdef WOLFSSL_IMX6_CAAM_BLOB + byte local[32]; + word32 localSz = 32; + + if (keylen == (16 + WC_CAAM_BLOB_SZ) || + keylen == (24 + WC_CAAM_BLOB_SZ) || + keylen == (32 + WC_CAAM_BLOB_SZ)) { + if (wc_caamOpenBlob((byte*)userKey, keylen, local, &localSz) != 0) { + return BAD_FUNC_ARG; + } + + /* set local values */ + userKey = local; + keylen = localSz; + } + #endif + if (aes == NULL || + !((keylen == 16) || (keylen == 24) || (keylen == 32))) { + return BAD_FUNC_ARG; + } + + #if defined(AES_MAX_KEY_SIZE) + /* Check key length */ + if (keylen > max_key_len) { + return BAD_FUNC_ARG; + } + #endif + aes->keylen = keylen; + aes->rounds = keylen/4 + 6; + + #if defined(WOLF_CRYPTO_CB) || (defined(WOLFSSL_DEVCRYPTO) && \ + (defined(WOLFSSL_DEVCRYPTO_AES) || defined(WOLFSSL_DEVCRYPTO_CBC))) || \ + (defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)) + #ifdef WOLF_CRYPTO_CB + if (aes->devId != INVALID_DEVID) + #endif + { + XMEMCPY(aes->devKey, userKey, keylen); + } + #endif + + #ifdef WOLFSSL_AESNI + if (checkAESNI == 0) { + haveAESNI = Check_CPU_support_AES(); + checkAESNI = 1; + } + if (haveAESNI) { + #if defined(WOLFSSL_AES_COUNTER) || defined(WOLFSSL_AES_CFB) || \ + defined(WOLFSSL_AES_OFB) + aes->left = 0; + #endif /* WOLFSSL_AES_COUNTER */ + aes->use_aesni = 1; + if (iv) + XMEMCPY(aes->reg, iv, AES_BLOCK_SIZE); + else + XMEMSET(aes->reg, 0, AES_BLOCK_SIZE); + if (dir == AES_ENCRYPTION) + return AES_set_encrypt_key(userKey, keylen * 8, aes); + #ifdef HAVE_AES_DECRYPT + else + return AES_set_decrypt_key(userKey, keylen * 8, aes); + #endif + } + #endif /* WOLFSSL_AESNI */ + + ret = wc_AesSetKeyLocal(aes, userKey, keylen, iv, dir); + + #if defined(WOLFSSL_DEVCRYPTO) && \ + (defined(WOLFSSL_DEVCRYPTO_AES) || defined(WOLFSSL_DEVCRYPTO_CBC)) + aes->ctx.cfd = -1; + #endif + #ifdef WOLFSSL_IMX6_CAAM_BLOB + ForceZero(local, sizeof(local)); + #endif + return ret; + } + + #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) + /* AES-CTR and AES-DIRECT need to use this for key setup, no aesni yet */ + int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) + { + int ret; + + #ifdef WOLFSSL_IMX6_CAAM_BLOB + byte local[32]; + word32 localSz = 32; + + if (keylen == (16 + WC_CAAM_BLOB_SZ) || + keylen == (24 + WC_CAAM_BLOB_SZ) || + keylen == (32 + WC_CAAM_BLOB_SZ)) { + if (wc_caamOpenBlob((byte*)userKey, keylen, local, &localSz) + != 0) { + return BAD_FUNC_ARG; + } + + /* set local values */ + userKey = local; + keylen = localSz; + } + #endif + ret = wc_AesSetKeyLocal(aes, userKey, keylen, iv, dir); + + #ifdef WOLFSSL_IMX6_CAAM_BLOB + ForceZero(local, sizeof(local)); + #endif + + return ret; + } + #endif /* WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ +#endif /* wc_AesSetKey block */ + + +/* wc_AesSetIV is shared between software and hardware */ +int wc_AesSetIV(Aes* aes, const byte* iv) +{ + if (aes == NULL) + return BAD_FUNC_ARG; + + if (iv) + XMEMCPY(aes->reg, iv, AES_BLOCK_SIZE); + else + XMEMSET(aes->reg, 0, AES_BLOCK_SIZE); + + return 0; +} + +/* AES-DIRECT */ +#if defined(WOLFSSL_AES_DIRECT) + #if defined(HAVE_COLDFIRE_SEC) + #error "Coldfire SEC doesn't yet support AES direct" + + #elif defined(FREESCALE_LTC) + /* Allow direct access to one block encrypt */ + void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in) + { + byte *key; + uint32_t keySize; + + key = (byte*)aes->key; + wc_AesGetKeySize(aes, &keySize); + + LTC_AES_EncryptEcb(LTC_BASE, in, out, AES_BLOCK_SIZE, + key, keySize); + } + + /* Allow direct access to one block decrypt */ + void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in) + { + byte *key; + uint32_t keySize; + + key = (byte*)aes->key; + wc_AesGetKeySize(aes, &keySize); + + LTC_AES_DecryptEcb(LTC_BASE, in, out, AES_BLOCK_SIZE, + key, keySize, kLTC_EncryptKey); + } + + #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES) + /* implemented in wolfcrypt/src/port/caam/caam_aes.c */ + + #elif defined(WOLFSSL_AFALG) + /* implemented in wolfcrypt/src/port/af_alg/afalg_aes.c */ + + #elif defined(WOLFSSL_DEVCRYPTO_AES) + /* implemented in wolfcrypt/src/port/devcrypt/devcrypto_aes.c */ + + #elif defined(STM32_CRYPTO) + /* Allow direct access to one block encrypt */ + void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in) + { + if (wolfSSL_CryptHwMutexLock() == 0) { + wc_AesEncrypt(aes, in, out); + wolfSSL_CryptHwMutexUnLock(); + } + } + #ifdef HAVE_AES_DECRYPT + /* Allow direct access to one block decrypt */ + void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in) + { + if (wolfSSL_CryptHwMutexLock() == 0) { + wc_AesDecrypt(aes, in, out); + wolfSSL_CryptHwMutexUnLock(); + } + } + #endif /* HAVE_AES_DECRYPT */ + + #elif defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_AES) + + /* Allow direct access to one block encrypt */ + void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in) + { + wc_AesEncrypt(aes, in, out); + } + #ifdef HAVE_AES_DECRYPT + /* Allow direct access to one block decrypt */ + void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in) + { + wc_AesDecrypt(aes, in, out); + } + #endif /* HAVE_AES_DECRYPT */ + #else + /* Allow direct access to one block encrypt */ + void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in) + { + wc_AesEncrypt(aes, in, out); + } + #ifdef HAVE_AES_DECRYPT + /* Allow direct access to one block decrypt */ + void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in) + { + wc_AesDecrypt(aes, in, out); + } + #endif /* HAVE_AES_DECRYPT */ + #endif /* AES direct block */ +#endif /* WOLFSSL_AES_DIRECT */ + + +/* AES-CBC */ +#ifdef HAVE_AES_CBC +#if defined(STM32_CRYPTO) + +#ifdef WOLFSSL_STM32_CUBEMX + int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + int ret = 0; + word32 blocks = (sz / AES_BLOCK_SIZE); + CRYP_HandleTypeDef hcryp; + + ret = wc_Stm32_Aes_Init(aes, &hcryp); + if (ret != 0) + return ret; + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + + #ifdef STM32_CRYPTO_AES_ONLY + hcryp.Init.OperatingMode = CRYP_ALGOMODE_ENCRYPT; + hcryp.Init.ChainingMode = CRYP_CHAINMODE_AES_CBC; + hcryp.Init.KeyWriteFlag = CRYP_KEY_WRITE_ENABLE; + #elif defined(STM32_HAL_V2) + hcryp.Init.Algorithm = CRYP_AES_CBC; + ByteReverseWords(aes->reg, aes->reg, AES_BLOCK_SIZE); + #endif + hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)aes->reg; + HAL_CRYP_Init(&hcryp); + + while (blocks--) { + #ifdef STM32_CRYPTO_AES_ONLY + ret = HAL_CRYPEx_AES(&hcryp, (uint8_t*)in, AES_BLOCK_SIZE, + out, STM32_HAL_TIMEOUT); + #elif defined(STM32_HAL_V2) + ret = HAL_CRYP_Encrypt(&hcryp, (uint32_t*)in, AES_BLOCK_SIZE, + (uint32_t*)out, STM32_HAL_TIMEOUT); + #else + ret = HAL_CRYP_AESCBC_Encrypt(&hcryp, (uint8_t*)in, AES_BLOCK_SIZE, + out, STM32_HAL_TIMEOUT); + #endif + if (ret != HAL_OK) { + ret = WC_TIMEOUT_E; + break; + } + + /* store iv for next call */ + XMEMCPY(aes->reg, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + + sz -= AES_BLOCK_SIZE; + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + } + + HAL_CRYP_DeInit(&hcryp); + + wolfSSL_CryptHwMutexUnLock(); + + return ret; + } + #ifdef HAVE_AES_DECRYPT + int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + int ret = 0; + word32 blocks = (sz / AES_BLOCK_SIZE); + CRYP_HandleTypeDef hcryp; + + ret = wc_Stm32_Aes_Init(aes, &hcryp); + if (ret != 0) + return ret; + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + + /* if input and output same will overwrite input iv */ + XMEMCPY(aes->tmp, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + + #ifdef STM32_CRYPTO_AES_ONLY + hcryp.Init.OperatingMode = CRYP_ALGOMODE_KEYDERIVATION_DECRYPT; + hcryp.Init.ChainingMode = CRYP_CHAINMODE_AES_CBC; + hcryp.Init.KeyWriteFlag = CRYP_KEY_WRITE_ENABLE; + #elif defined(STM32_HAL_V2) + hcryp.Init.Algorithm = CRYP_AES_CBC; + ByteReverseWords(aes->reg, aes->reg, AES_BLOCK_SIZE); + #endif + + hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)aes->reg; + HAL_CRYP_Init(&hcryp); + + while (blocks--) { + #ifdef STM32_CRYPTO_AES_ONLY + ret = HAL_CRYPEx_AES(&hcryp, (uint8_t*)in, AES_BLOCK_SIZE, + out, STM32_HAL_TIMEOUT); + #elif defined(STM32_HAL_V2) + ret = HAL_CRYP_Decrypt(&hcryp, (uint32_t*)in, AES_BLOCK_SIZE, + (uint32_t*)out, STM32_HAL_TIMEOUT); + #else + ret = HAL_CRYP_AESCBC_Decrypt(&hcryp, (uint8_t*)in, AES_BLOCK_SIZE, + out, STM32_HAL_TIMEOUT); + #endif + if (ret != HAL_OK) { + ret = WC_TIMEOUT_E; + break; + } + + /* store iv for next call */ + XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE); + + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + } + + HAL_CRYP_DeInit(&hcryp); + wolfSSL_CryptHwMutexUnLock(); + + return ret; + } + #endif /* HAVE_AES_DECRYPT */ + +#else /* STD_PERI_LIB */ + int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + int ret; + word32 *iv; + word32 blocks = (sz / AES_BLOCK_SIZE); + CRYP_InitTypeDef cryptInit; + CRYP_KeyInitTypeDef keyInit; + CRYP_IVInitTypeDef ivInit; + + ret = wc_Stm32_Aes_Init(aes, &cryptInit, &keyInit); + if (ret != 0) + return ret; + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + + /* reset registers to their default values */ + CRYP_DeInit(); + + /* set key */ + CRYP_KeyInit(&keyInit); + + /* set iv */ + iv = aes->reg; + CRYP_IVStructInit(&ivInit); + ByteReverseWords(iv, iv, AES_BLOCK_SIZE); + ivInit.CRYP_IV0Left = iv[0]; + ivInit.CRYP_IV0Right = iv[1]; + ivInit.CRYP_IV1Left = iv[2]; + ivInit.CRYP_IV1Right = iv[3]; + CRYP_IVInit(&ivInit); + + /* set direction and mode */ + cryptInit.CRYP_AlgoDir = CRYP_AlgoDir_Encrypt; + cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_CBC; + CRYP_Init(&cryptInit); + + /* enable crypto processor */ + CRYP_Cmd(ENABLE); + + while (blocks--) { + /* flush IN/OUT FIFOs */ + CRYP_FIFOFlush(); + + CRYP_DataIn(*(uint32_t*)&in[0]); + CRYP_DataIn(*(uint32_t*)&in[4]); + CRYP_DataIn(*(uint32_t*)&in[8]); + CRYP_DataIn(*(uint32_t*)&in[12]); + + /* wait until the complete message has been processed */ + while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {} + + *(uint32_t*)&out[0] = CRYP_DataOut(); + *(uint32_t*)&out[4] = CRYP_DataOut(); + *(uint32_t*)&out[8] = CRYP_DataOut(); + *(uint32_t*)&out[12] = CRYP_DataOut(); + + /* store iv for next call */ + XMEMCPY(aes->reg, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + + sz -= AES_BLOCK_SIZE; + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + } + + /* disable crypto processor */ + CRYP_Cmd(DISABLE); + wolfSSL_CryptHwMutexUnLock(); + + return ret; + } + + #ifdef HAVE_AES_DECRYPT + int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + int ret; + word32 *iv; + word32 blocks = (sz / AES_BLOCK_SIZE); + CRYP_InitTypeDef cryptInit; + CRYP_KeyInitTypeDef keyInit; + CRYP_IVInitTypeDef ivInit; + + ret = wc_Stm32_Aes_Init(aes, &cryptInit, &keyInit); + if (ret != 0) + return ret; + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + + /* if input and output same will overwrite input iv */ + XMEMCPY(aes->tmp, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + + /* reset registers to their default values */ + CRYP_DeInit(); + + /* set direction and key */ + CRYP_KeyInit(&keyInit); + cryptInit.CRYP_AlgoDir = CRYP_AlgoDir_Decrypt; + cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_Key; + CRYP_Init(&cryptInit); + + /* enable crypto processor */ + CRYP_Cmd(ENABLE); + + /* wait until key has been prepared */ + while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {} + + /* set direction and mode */ + cryptInit.CRYP_AlgoDir = CRYP_AlgoDir_Decrypt; + cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_CBC; + CRYP_Init(&cryptInit); + + /* set iv */ + iv = aes->reg; + CRYP_IVStructInit(&ivInit); + ByteReverseWords(iv, iv, AES_BLOCK_SIZE); + ivInit.CRYP_IV0Left = iv[0]; + ivInit.CRYP_IV0Right = iv[1]; + ivInit.CRYP_IV1Left = iv[2]; + ivInit.CRYP_IV1Right = iv[3]; + CRYP_IVInit(&ivInit); + + /* enable crypto processor */ + CRYP_Cmd(ENABLE); + + while (blocks--) { + /* flush IN/OUT FIFOs */ + CRYP_FIFOFlush(); + + CRYP_DataIn(*(uint32_t*)&in[0]); + CRYP_DataIn(*(uint32_t*)&in[4]); + CRYP_DataIn(*(uint32_t*)&in[8]); + CRYP_DataIn(*(uint32_t*)&in[12]); + + /* wait until the complete message has been processed */ + while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {} + + *(uint32_t*)&out[0] = CRYP_DataOut(); + *(uint32_t*)&out[4] = CRYP_DataOut(); + *(uint32_t*)&out[8] = CRYP_DataOut(); + *(uint32_t*)&out[12] = CRYP_DataOut(); + + /* store iv for next call */ + XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE); + + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + } + + /* disable crypto processor */ + CRYP_Cmd(DISABLE); + wolfSSL_CryptHwMutexUnLock(); + + return ret; + } + #endif /* HAVE_AES_DECRYPT */ +#endif /* WOLFSSL_STM32_CUBEMX */ + +#elif defined(HAVE_COLDFIRE_SEC) + static int wc_AesCbcCrypt(Aes* aes, byte* po, const byte* pi, word32 sz, + word32 descHeader) + { + #ifdef DEBUG_WOLFSSL + int i; int stat1, stat2; int ret; + #endif + + int size; + volatile int v; + + if ((pi == NULL) || (po == NULL)) + return BAD_FUNC_ARG; /*wrong pointer*/ + + wc_LockMutex(&Mutex_AesSEC); + + /* Set descriptor for SEC */ + secDesc->length1 = 0x0; + secDesc->pointer1 = NULL; + + secDesc->length2 = AES_BLOCK_SIZE; + secDesc->pointer2 = (byte *)secReg; /* Initial Vector */ + + switch(aes->rounds) { + case 10: secDesc->length3 = 16; break; + case 12: secDesc->length3 = 24; break; + case 14: secDesc->length3 = 32; break; + } + XMEMCPY(secKey, aes->key, secDesc->length3); + + secDesc->pointer3 = (byte *)secKey; + secDesc->pointer4 = AESBuffIn; + secDesc->pointer5 = AESBuffOut; + secDesc->length6 = 0x0; + secDesc->pointer6 = NULL; + secDesc->length7 = 0x0; + secDesc->pointer7 = NULL; + secDesc->nextDescriptorPtr = NULL; + + while (sz) { + secDesc->header = descHeader; + XMEMCPY(secReg, aes->reg, AES_BLOCK_SIZE); + if ((sz % AES_BUFFER_SIZE) == sz) { + size = sz; + sz = 0; + } else { + size = AES_BUFFER_SIZE; + sz -= AES_BUFFER_SIZE; + } + secDesc->length4 = size; + secDesc->length5 = size; + + XMEMCPY(AESBuffIn, pi, size); + if(descHeader == SEC_DESC_AES_CBC_DECRYPT) { + XMEMCPY((void*)aes->tmp, (void*)&(pi[size-AES_BLOCK_SIZE]), + AES_BLOCK_SIZE); + } + + /* Point SEC to the location of the descriptor */ + MCF_SEC_FR0 = (uint32)secDesc; + /* Initialize SEC and wait for encryption to complete */ + MCF_SEC_CCCR0 = 0x0000001a; + /* poll SISR to determine when channel is complete */ + v=0; + + while ((secDesc->header>> 24) != 0xff) v++; + + #ifdef DEBUG_WOLFSSL + ret = MCF_SEC_SISRH; + stat1 = MCF_SEC_AESSR; + stat2 = MCF_SEC_AESISR; + if (ret & 0xe0000000) { + db_printf("Aes_Cbc(i=%d):ISRH=%08x, AESSR=%08x, " + "AESISR=%08x\n", i, ret, stat1, stat2); + } + #endif + + XMEMCPY(po, AESBuffOut, size); + + if (descHeader == SEC_DESC_AES_CBC_ENCRYPT) { + XMEMCPY((void*)aes->reg, (void*)&(po[size-AES_BLOCK_SIZE]), + AES_BLOCK_SIZE); + } else { + XMEMCPY((void*)aes->reg, (void*)aes->tmp, AES_BLOCK_SIZE); + } + + pi += size; + po += size; + } + + wc_UnLockMutex(&Mutex_AesSEC); + return 0; + } + + int wc_AesCbcEncrypt(Aes* aes, byte* po, const byte* pi, word32 sz) + { + return (wc_AesCbcCrypt(aes, po, pi, sz, SEC_DESC_AES_CBC_ENCRYPT)); + } + + #ifdef HAVE_AES_DECRYPT + int wc_AesCbcDecrypt(Aes* aes, byte* po, const byte* pi, word32 sz) + { + return (wc_AesCbcCrypt(aes, po, pi, sz, SEC_DESC_AES_CBC_DECRYPT)); + } + #endif /* HAVE_AES_DECRYPT */ + +#elif defined(FREESCALE_LTC) + int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + uint32_t keySize; + status_t status; + byte *iv, *enc_key; + word32 blocks = (sz / AES_BLOCK_SIZE); + + iv = (byte*)aes->reg; + enc_key = (byte*)aes->key; + + status = wc_AesGetKeySize(aes, &keySize); + if (status != 0) { + return status; + } + + status = LTC_AES_EncryptCbc(LTC_BASE, in, out, blocks * AES_BLOCK_SIZE, + iv, enc_key, keySize); + + /* store iv for next call */ + if (status == kStatus_Success) { + XMEMCPY(iv, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + } + + return (status == kStatus_Success) ? 0 : -1; + } + + #ifdef HAVE_AES_DECRYPT + int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + uint32_t keySize; + status_t status; + byte* iv, *dec_key; + word32 blocks = (sz / AES_BLOCK_SIZE); + byte temp_block[AES_BLOCK_SIZE]; + + iv = (byte*)aes->reg; + dec_key = (byte*)aes->key; + + status = wc_AesGetKeySize(aes, &keySize); + if (status != 0) { + return status; + } + + /* get IV for next call */ + XMEMCPY(temp_block, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + + status = LTC_AES_DecryptCbc(LTC_BASE, in, out, blocks * AES_BLOCK_SIZE, + iv, dec_key, keySize, kLTC_EncryptKey); + + /* store IV for next call */ + if (status == kStatus_Success) { + XMEMCPY(iv, temp_block, AES_BLOCK_SIZE); + } + + return (status == kStatus_Success) ? 0 : -1; + } + #endif /* HAVE_AES_DECRYPT */ + +#elif defined(FREESCALE_MMCAU) + int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + int i; + int offset = 0; + word32 blocks = (sz / AES_BLOCK_SIZE); + byte *iv; + byte temp_block[AES_BLOCK_SIZE]; + + iv = (byte*)aes->reg; + + while (blocks--) { + XMEMCPY(temp_block, in + offset, AES_BLOCK_SIZE); + + /* XOR block with IV for CBC */ + for (i = 0; i < AES_BLOCK_SIZE; i++) + temp_block[i] ^= iv[i]; + + wc_AesEncrypt(aes, temp_block, out + offset); + + offset += AES_BLOCK_SIZE; + + /* store IV for next block */ + XMEMCPY(iv, out + offset - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + } + + return 0; + } + #ifdef HAVE_AES_DECRYPT + int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + int i; + int offset = 0; + word32 blocks = (sz / AES_BLOCK_SIZE); + byte* iv; + byte temp_block[AES_BLOCK_SIZE]; + + iv = (byte*)aes->reg; + + while (blocks--) { + XMEMCPY(temp_block, in + offset, AES_BLOCK_SIZE); + + wc_AesDecrypt(aes, in + offset, out + offset); + + /* XOR block with IV for CBC */ + for (i = 0; i < AES_BLOCK_SIZE; i++) + (out + offset)[i] ^= iv[i]; + + /* store IV for next block */ + XMEMCPY(iv, temp_block, AES_BLOCK_SIZE); + + offset += AES_BLOCK_SIZE; + } + + return 0; + } + #endif /* HAVE_AES_DECRYPT */ + +#elif defined(WOLFSSL_PIC32MZ_CRYPT) + + int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + int ret; + + /* hardware fails on input that is not a multiple of AES block size */ + if (sz % AES_BLOCK_SIZE != 0) { + return BAD_FUNC_ARG; + } + + ret = wc_Pic32AesCrypt( + aes->key, aes->keylen, aes->reg, AES_BLOCK_SIZE, + out, in, sz, PIC32_ENCRYPTION, + PIC32_ALGO_AES, PIC32_CRYPTOALGO_RCBC); + + /* store iv for next call */ + if (ret == 0) { + XMEMCPY(aes->reg, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + } + + return ret; + } + #ifdef HAVE_AES_DECRYPT + int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + int ret; + byte scratch[AES_BLOCK_SIZE]; + + /* hardware fails on input that is not a multiple of AES block size */ + if (sz % AES_BLOCK_SIZE != 0) { + return BAD_FUNC_ARG; + } + XMEMCPY(scratch, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + + ret = wc_Pic32AesCrypt( + aes->key, aes->keylen, aes->reg, AES_BLOCK_SIZE, + out, in, sz, PIC32_DECRYPTION, + PIC32_ALGO_AES, PIC32_CRYPTOALGO_RCBC); + + /* store iv for next call */ + if (ret == 0) { + XMEMCPY((byte*)aes->reg, scratch, AES_BLOCK_SIZE); + } + + return ret; + } + #endif /* HAVE_AES_DECRYPT */ +#elif defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_AES) + + int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + return wc_esp32AesCbcEncrypt(aes, out, in, sz); + } + int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + return wc_esp32AesCbcDecrypt(aes, out, in, sz); + } +#elif defined(WOLFSSL_CRYPTOCELL) && defined(WOLFSSL_CRYPTOCELL_AES) + int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + return SaSi_AesBlock(&aes->ctx.user_ctx, (uint8_t* )in, sz, out); + } + int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + return SaSi_AesBlock(&aes->ctx.user_ctx, (uint8_t* )in, sz, out); + } +#elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES) + /* implemented in wolfcrypt/src/port/caam/caam_aes.c */ + +#elif defined(WOLFSSL_AFALG) + /* implemented in wolfcrypt/src/port/af_alg/afalg_aes.c */ + +#elif defined(WOLFSSL_DEVCRYPTO_CBC) + /* implemented in wolfcrypt/src/port/devcrypt/devcrypto_aes.c */ + +#else + + /* Software AES - CBC Encrypt */ + int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + word32 blocks = (sz / AES_BLOCK_SIZE); + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + #ifdef WOLF_CRYPTO_CB + if (aes->devId != INVALID_DEVID) { + int ret = wc_CryptoCb_AesCbcEncrypt(aes, out, in, sz); + if (ret != CRYPTOCB_UNAVAILABLE) + return ret; + /* fall-through when unavailable */ + } + #endif + #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES) + /* if async and byte count above threshold */ + if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES && + sz >= WC_ASYNC_THRESH_AES_CBC) { + #if defined(HAVE_CAVIUM) + return NitroxAesCbcEncrypt(aes, out, in, sz); + #elif defined(HAVE_INTEL_QA) + return IntelQaSymAesCbcEncrypt(&aes->asyncDev, out, in, sz, + (const byte*)aes->devKey, aes->keylen, + (byte*)aes->reg, AES_BLOCK_SIZE); + #else /* WOLFSSL_ASYNC_CRYPT_TEST */ + if (wc_AsyncTestInit(&aes->asyncDev, ASYNC_TEST_AES_CBC_ENCRYPT)) { + WC_ASYNC_TEST* testDev = &aes->asyncDev.test; + testDev->aes.aes = aes; + testDev->aes.out = out; + testDev->aes.in = in; + testDev->aes.sz = sz; + return WC_PENDING_E; + } + #endif + } + #endif /* WOLFSSL_ASYNC_CRYPT */ + + #ifdef WOLFSSL_AESNI + if (haveAESNI) { + #ifdef DEBUG_AESNI + printf("about to aes cbc encrypt\n"); + printf("in = %p\n", in); + printf("out = %p\n", out); + printf("aes->key = %p\n", aes->key); + printf("aes->reg = %p\n", aes->reg); + printf("aes->rounds = %d\n", aes->rounds); + printf("sz = %d\n", sz); + #endif + + /* check alignment, decrypt doesn't need alignment */ + if ((wolfssl_word)in % AESNI_ALIGN) { + #ifndef NO_WOLFSSL_ALLOC_ALIGN + byte* tmp = (byte*)XMALLOC(sz + AES_BLOCK_SIZE + AESNI_ALIGN, + aes->heap, DYNAMIC_TYPE_TMP_BUFFER); + byte* tmp_align; + if (tmp == NULL) return MEMORY_E; + + tmp_align = tmp + (AESNI_ALIGN - ((size_t)tmp % AESNI_ALIGN)); + XMEMCPY(tmp_align, in, sz); + AES_CBC_encrypt(tmp_align, tmp_align, (byte*)aes->reg, sz, + (byte*)aes->key, aes->rounds); + /* store iv for next call */ + XMEMCPY(aes->reg, tmp_align + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + + XMEMCPY(out, tmp_align, sz); + XFREE(tmp, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); + return 0; + #else + WOLFSSL_MSG("AES-CBC encrypt with bad alignment"); + return BAD_ALIGN_E; + #endif + } + + AES_CBC_encrypt(in, out, (byte*)aes->reg, sz, (byte*)aes->key, + aes->rounds); + /* store iv for next call */ + XMEMCPY(aes->reg, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + + return 0; + } + #endif + + while (blocks--) { + xorbuf((byte*)aes->reg, in, AES_BLOCK_SIZE); + wc_AesEncrypt(aes, (byte*)aes->reg, (byte*)aes->reg); + XMEMCPY(out, aes->reg, AES_BLOCK_SIZE); + + out += AES_BLOCK_SIZE; + in += AES_BLOCK_SIZE; + } + + return 0; + } + + #ifdef HAVE_AES_DECRYPT + /* Software AES - CBC Decrypt */ + int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + word32 blocks; + + if (aes == NULL || out == NULL || in == NULL + || sz % AES_BLOCK_SIZE != 0) { + return BAD_FUNC_ARG; + } + + #ifdef WOLF_CRYPTO_CB + if (aes->devId != INVALID_DEVID) { + int ret = wc_CryptoCb_AesCbcDecrypt(aes, out, in, sz); + if (ret != CRYPTOCB_UNAVAILABLE) + return ret; + /* fall-through when unavailable */ + } + #endif + #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES) + /* if async and byte count above threshold */ + if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES && + sz >= WC_ASYNC_THRESH_AES_CBC) { + #if defined(HAVE_CAVIUM) + return NitroxAesCbcDecrypt(aes, out, in, sz); + #elif defined(HAVE_INTEL_QA) + return IntelQaSymAesCbcDecrypt(&aes->asyncDev, out, in, sz, + (const byte*)aes->devKey, aes->keylen, + (byte*)aes->reg, AES_BLOCK_SIZE); + #else /* WOLFSSL_ASYNC_CRYPT_TEST */ + if (wc_AsyncTestInit(&aes->asyncDev, ASYNC_TEST_AES_CBC_DECRYPT)) { + WC_ASYNC_TEST* testDev = &aes->asyncDev.test; + testDev->aes.aes = aes; + testDev->aes.out = out; + testDev->aes.in = in; + testDev->aes.sz = sz; + return WC_PENDING_E; + } + #endif + } + #endif + + #ifdef WOLFSSL_AESNI + if (haveAESNI) { + #ifdef DEBUG_AESNI + printf("about to aes cbc decrypt\n"); + printf("in = %p\n", in); + printf("out = %p\n", out); + printf("aes->key = %p\n", aes->key); + printf("aes->reg = %p\n", aes->reg); + printf("aes->rounds = %d\n", aes->rounds); + printf("sz = %d\n", sz); + #endif + + /* if input and output same will overwrite input iv */ + XMEMCPY(aes->tmp, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + #if defined(WOLFSSL_AESNI_BY4) + AES_CBC_decrypt_by4(in, out, (byte*)aes->reg, sz, (byte*)aes->key, + aes->rounds); + #elif defined(WOLFSSL_AESNI_BY6) + AES_CBC_decrypt_by6(in, out, (byte*)aes->reg, sz, (byte*)aes->key, + aes->rounds); + #else /* WOLFSSL_AESNI_BYx */ + AES_CBC_decrypt_by8(in, out, (byte*)aes->reg, sz, (byte*)aes->key, + aes->rounds); + #endif /* WOLFSSL_AESNI_BYx */ + /* store iv for next call */ + XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE); + return 0; + } + #endif + + blocks = sz / AES_BLOCK_SIZE; + while (blocks--) { + XMEMCPY(aes->tmp, in, AES_BLOCK_SIZE); + wc_AesDecrypt(aes, (byte*)aes->tmp, out); + xorbuf(out, (byte*)aes->reg, AES_BLOCK_SIZE); + /* store iv for next call */ + XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE); + + out += AES_BLOCK_SIZE; + in += AES_BLOCK_SIZE; + } + + return 0; + } + #endif + +#endif /* AES-CBC block */ +#endif /* HAVE_AES_CBC */ + +/* AES-CTR */ +#if defined(WOLFSSL_AES_COUNTER) + + #ifdef STM32_CRYPTO + #define NEED_AES_CTR_SOFT + #define XTRANSFORM_AESCTRBLOCK wc_AesCtrEncryptBlock + + int wc_AesCtrEncryptBlock(Aes* aes, byte* out, const byte* in) + { + int ret = 0; + #ifdef WOLFSSL_STM32_CUBEMX + CRYP_HandleTypeDef hcryp; + #ifdef STM32_HAL_V2 + word32 iv[AES_BLOCK_SIZE/sizeof(word32)]; + #endif + #else + word32 *iv; + CRYP_InitTypeDef cryptInit; + CRYP_KeyInitTypeDef keyInit; + CRYP_IVInitTypeDef ivInit; + #endif + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + + #ifdef WOLFSSL_STM32_CUBEMX + ret = wc_Stm32_Aes_Init(aes, &hcryp); + if (ret != 0) { + wolfSSL_CryptHwMutexUnLock(); + return ret; + } + + #ifdef STM32_CRYPTO_AES_ONLY + hcryp.Init.OperatingMode = CRYP_ALGOMODE_ENCRYPT; + hcryp.Init.ChainingMode = CRYP_CHAINMODE_AES_CTR; + hcryp.Init.KeyWriteFlag = CRYP_KEY_WRITE_ENABLE; + hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)aes->reg; + #elif defined(STM32_HAL_V2) + hcryp.Init.Algorithm = CRYP_AES_CTR; + ByteReverseWords(iv, aes->reg, AES_BLOCK_SIZE); + hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)iv; + #else + hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)aes->reg; + #endif + HAL_CRYP_Init(&hcryp); + + #ifdef STM32_CRYPTO_AES_ONLY + ret = HAL_CRYPEx_AES(&hcryp, (byte*)in, AES_BLOCK_SIZE, + out, STM32_HAL_TIMEOUT); + #elif defined(STM32_HAL_V2) + ret = HAL_CRYP_Encrypt(&hcryp, (uint32_t*)in, AES_BLOCK_SIZE, + (uint32_t*)out, STM32_HAL_TIMEOUT); + #else + ret = HAL_CRYP_AESCTR_Encrypt(&hcryp, (byte*)in, AES_BLOCK_SIZE, + out, STM32_HAL_TIMEOUT); + #endif + if (ret != HAL_OK) { + ret = WC_TIMEOUT_E; + } + HAL_CRYP_DeInit(&hcryp); + + #else /* STD_PERI_LIB */ + ret = wc_Stm32_Aes_Init(aes, &cryptInit, &keyInit); + if (ret != 0) { + wolfSSL_CryptHwMutexUnLock(); + return ret; + } + + /* reset registers to their default values */ + CRYP_DeInit(); + + /* set key */ + CRYP_KeyInit(&keyInit); + + /* set iv */ + iv = aes->reg; + CRYP_IVStructInit(&ivInit); + ivInit.CRYP_IV0Left = ByteReverseWord32(iv[0]); + ivInit.CRYP_IV0Right = ByteReverseWord32(iv[1]); + ivInit.CRYP_IV1Left = ByteReverseWord32(iv[2]); + ivInit.CRYP_IV1Right = ByteReverseWord32(iv[3]); + CRYP_IVInit(&ivInit); + + /* set direction and mode */ + cryptInit.CRYP_AlgoDir = CRYP_AlgoDir_Encrypt; + cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_CTR; + CRYP_Init(&cryptInit); + + /* enable crypto processor */ + CRYP_Cmd(ENABLE); + + /* flush IN/OUT FIFOs */ + CRYP_FIFOFlush(); + + CRYP_DataIn(*(uint32_t*)&in[0]); + CRYP_DataIn(*(uint32_t*)&in[4]); + CRYP_DataIn(*(uint32_t*)&in[8]); + CRYP_DataIn(*(uint32_t*)&in[12]); + + /* wait until the complete message has been processed */ + while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {} + + *(uint32_t*)&out[0] = CRYP_DataOut(); + *(uint32_t*)&out[4] = CRYP_DataOut(); + *(uint32_t*)&out[8] = CRYP_DataOut(); + *(uint32_t*)&out[12] = CRYP_DataOut(); + + /* disable crypto processor */ + CRYP_Cmd(DISABLE); + + #endif /* WOLFSSL_STM32_CUBEMX */ + + wolfSSL_CryptHwMutexUnLock(); + return ret; + } + + + #elif defined(WOLFSSL_PIC32MZ_CRYPT) + + #define NEED_AES_CTR_SOFT + #define XTRANSFORM_AESCTRBLOCK wc_AesCtrEncryptBlock + + int wc_AesCtrEncryptBlock(Aes* aes, byte* out, const byte* in) + { + word32 tmpIv[AES_BLOCK_SIZE / sizeof(word32)]; + XMEMCPY(tmpIv, aes->reg, AES_BLOCK_SIZE); + return wc_Pic32AesCrypt( + aes->key, aes->keylen, tmpIv, AES_BLOCK_SIZE, + out, in, AES_BLOCK_SIZE, + PIC32_ENCRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_RCTR); + } + + #elif defined(HAVE_COLDFIRE_SEC) + #error "Coldfire SEC doesn't currently support AES-CTR mode" + + #elif defined(FREESCALE_LTC) + int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + uint32_t keySize; + byte *iv, *enc_key; + byte* tmp; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + /* consume any unused bytes left in aes->tmp */ + tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left; + while (aes->left && sz) { + *(out++) = *(in++) ^ *(tmp++); + aes->left--; + sz--; + } + + if (sz) { + iv = (byte*)aes->reg; + enc_key = (byte*)aes->key; + + wc_AesGetKeySize(aes, &keySize); + + LTC_AES_CryptCtr(LTC_BASE, in, out, sz, + iv, enc_key, keySize, (byte*)aes->tmp, + (uint32_t*)&aes->left); + } + + return 0; + } + + #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES) + /* implemented in wolfcrypt/src/port/caam/caam_aes.c */ + + #elif defined(WOLFSSL_AFALG) + /* implemented in wolfcrypt/src/port/af_alg/afalg_aes.c */ + + #elif defined(WOLFSSL_DEVCRYPTO_AES) + /* implemented in wolfcrypt/src/port/devcrypt/devcrypto_aes.c */ + + #elif defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_AES) + /* esp32 doesn't support CRT mode by hw. */ + /* use aes ecnryption plus sw implementation */ + #define NEED_AES_CTR_SOFT + + #else + + /* Use software based AES counter */ + #define NEED_AES_CTR_SOFT + #endif + + #ifdef NEED_AES_CTR_SOFT + /* Increment AES counter */ + static WC_INLINE void IncrementAesCounter(byte* inOutCtr) + { + /* in network byte order so start at end and work back */ + int i; + for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) { + if (++inOutCtr[i]) /* we're done unless we overflow */ + return; + } + } + + /* Software AES - CTR Encrypt */ + int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + byte* tmp; + byte scratch[AES_BLOCK_SIZE]; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + /* consume any unused bytes left in aes->tmp */ + tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left; + while (aes->left && sz) { + *(out++) = *(in++) ^ *(tmp++); + aes->left--; + sz--; + } + + /* do as many block size ops as possible */ + while (sz >= AES_BLOCK_SIZE) { + #ifdef XTRANSFORM_AESCTRBLOCK + XTRANSFORM_AESCTRBLOCK(aes, out, in); + #else + wc_AesEncrypt(aes, (byte*)aes->reg, scratch); + xorbuf(scratch, in, AES_BLOCK_SIZE); + XMEMCPY(out, scratch, AES_BLOCK_SIZE); + #endif + IncrementAesCounter((byte*)aes->reg); + + out += AES_BLOCK_SIZE; + in += AES_BLOCK_SIZE; + sz -= AES_BLOCK_SIZE; + aes->left = 0; + } + ForceZero(scratch, AES_BLOCK_SIZE); + + /* handle non block size remaining and store unused byte count in left */ + if (sz) { + wc_AesEncrypt(aes, (byte*)aes->reg, (byte*)aes->tmp); + IncrementAesCounter((byte*)aes->reg); + + aes->left = AES_BLOCK_SIZE; + tmp = (byte*)aes->tmp; + + while (sz--) { + *(out++) = *(in++) ^ *(tmp++); + aes->left--; + } + } + + return 0; + } + + #endif /* NEED_AES_CTR_SOFT */ + +#endif /* WOLFSSL_AES_COUNTER */ +#endif /* !WOLFSSL_ARMASM */ + + +/* + * The IV for AES GCM and CCM, stored in struct Aes's member reg, is comprised + * of two parts in order: + * 1. The fixed field which may be 0 or 4 bytes long. In TLS, this is set + * to the implicit IV. + * 2. The explicit IV is generated by wolfCrypt. It needs to be managed + * by wolfCrypt to ensure the IV is unique for each call to encrypt. + * The IV may be a 96-bit random value, or the 32-bit fixed value and a + * 64-bit set of 0 or random data. The final 32-bits of reg is used as a + * block counter during the encryption. + */ + +#if (defined(HAVE_AESGCM) && !defined(WC_NO_RNG)) || defined(HAVE_AESCCM) +static WC_INLINE void IncCtr(byte* ctr, word32 ctrSz) +{ + int i; + for (i = ctrSz-1; i >= 0; i--) { + if (++ctr[i]) + break; + } +} +#endif /* HAVE_AESGCM || HAVE_AESCCM */ + + +#ifdef HAVE_AESGCM + +#if defined(HAVE_COLDFIRE_SEC) + #error "Coldfire SEC doesn't currently support AES-GCM mode" + +#elif defined(WOLFSSL_NRF51_AES) + #error "nRF51 doesn't currently support AES-GCM mode" + +#endif + +#ifdef WOLFSSL_ARMASM + /* implementation is located in wolfcrypt/src/port/arm/armv8-aes.c */ + +#elif defined(WOLFSSL_AFALG) + /* implemented in wolfcrypt/src/port/afalg/afalg_aes.c */ + +#elif defined(WOLFSSL_DEVCRYPTO_AES) + /* implemented in wolfcrypt/src/port/devcrypt/devcrypto_aes.c */ + +#else /* software + AESNI implementation */ + +#if !defined(FREESCALE_LTC_AES_GCM) +static WC_INLINE void IncrementGcmCounter(byte* inOutCtr) +{ + int i; + + /* in network byte order so start at end and work back */ + for (i = AES_BLOCK_SIZE - 1; i >= AES_BLOCK_SIZE - CTR_SZ; i--) { + if (++inOutCtr[i]) /* we're done unless we overflow */ + return; + } +} +#ifdef STM32_CRYPTO_AES_GCM +static WC_INLINE void DecrementGcmCounter(byte* inOutCtr) +{ + int i; + + /* in network byte order so start at end and work back */ + for (i = AES_BLOCK_SIZE - 1; i >= AES_BLOCK_SIZE - CTR_SZ; i--) { + if (--inOutCtr[i] != 0xFF) /* we're done unless we underflow */ + return; + } +} +#endif /* STM32_CRYPTO_AES_GCM */ +#endif /* !FREESCALE_LTC_AES_GCM */ + +#if defined(GCM_SMALL) || defined(GCM_TABLE) + +static WC_INLINE void FlattenSzInBits(byte* buf, word32 sz) +{ + /* Multiply the sz by 8 */ + word32 szHi = (sz >> (8*sizeof(sz) - 3)); + sz <<= 3; + + /* copy over the words of the sz into the destination buffer */ + buf[0] = (szHi >> 24) & 0xff; + buf[1] = (szHi >> 16) & 0xff; + buf[2] = (szHi >> 8) & 0xff; + buf[3] = szHi & 0xff; + buf[4] = (sz >> 24) & 0xff; + buf[5] = (sz >> 16) & 0xff; + buf[6] = (sz >> 8) & 0xff; + buf[7] = sz & 0xff; +} + + +static WC_INLINE void RIGHTSHIFTX(byte* x) +{ + int i; + int carryOut = 0; + int carryIn = 0; + int borrow = x[15] & 0x01; + + for (i = 0; i < AES_BLOCK_SIZE; i++) { + carryOut = x[i] & 0x01; + x[i] = (x[i] >> 1) | (carryIn ? 0x80 : 0); + carryIn = carryOut; + } + if (borrow) x[0] ^= 0xE1; +} + +#endif /* defined(GCM_SMALL) || defined(GCM_TABLE) */ + + +#ifdef GCM_TABLE + +static void GenerateM0(Aes* aes) +{ + int i, j; + byte (*m)[AES_BLOCK_SIZE] = aes->M0; + + XMEMCPY(m[128], aes->H, AES_BLOCK_SIZE); + + for (i = 64; i > 0; i /= 2) { + XMEMCPY(m[i], m[i*2], AES_BLOCK_SIZE); + RIGHTSHIFTX(m[i]); + } + + for (i = 2; i < 256; i *= 2) { + for (j = 1; j < i; j++) { + XMEMCPY(m[i+j], m[i], AES_BLOCK_SIZE); + xorbuf(m[i+j], m[j], AES_BLOCK_SIZE); + } + } + + XMEMSET(m[0], 0, AES_BLOCK_SIZE); +} + +#endif /* GCM_TABLE */ + +/* Software AES - GCM SetKey */ +int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) +{ + int ret; + byte iv[AES_BLOCK_SIZE]; + + #ifdef WOLFSSL_IMX6_CAAM_BLOB + byte local[32]; + word32 localSz = 32; + + if (len == (16 + WC_CAAM_BLOB_SZ) || + len == (24 + WC_CAAM_BLOB_SZ) || + len == (32 + WC_CAAM_BLOB_SZ)) { + if (wc_caamOpenBlob((byte*)key, len, local, &localSz) != 0) { + return BAD_FUNC_ARG; + } + + /* set local values */ + key = local; + len = localSz; + } + #endif + + if (!((len == 16) || (len == 24) || (len == 32))) + return BAD_FUNC_ARG; + +#ifdef OPENSSL_EXTRA + if (aes != NULL) { + XMEMSET(aes->aadH, 0, sizeof(aes->aadH)); + aes->aadLen = 0; + } +#endif + XMEMSET(iv, 0, AES_BLOCK_SIZE); + ret = wc_AesSetKey(aes, key, len, iv, AES_ENCRYPTION); + + #ifdef WOLFSSL_AESNI + /* AES-NI code generates its own H value. */ + if (haveAESNI) + return ret; + #endif /* WOLFSSL_AESNI */ + +#if !defined(FREESCALE_LTC_AES_GCM) + if (ret == 0) { + wc_AesEncrypt(aes, iv, aes->H); + #ifdef GCM_TABLE + GenerateM0(aes); + #endif /* GCM_TABLE */ + } +#endif /* FREESCALE_LTC_AES_GCM */ + +#if defined(WOLFSSL_XILINX_CRYPT) + wc_AesGcmSetKey_ex(aes, key, len, XSECURE_CSU_AES_KEY_SRC_KUP); +#elif defined(WOLFSSL_AFALG_XILINX_AES) + wc_AesGcmSetKey_ex(aes, key, len, 0); +#endif + +#ifdef WOLF_CRYPTO_CB + if (aes->devId != INVALID_DEVID) { + XMEMCPY(aes->devKey, key, len); + } +#endif + +#ifdef WOLFSSL_IMX6_CAAM_BLOB + ForceZero(local, sizeof(local)); +#endif + + return ret; +} + + +#ifdef WOLFSSL_AESNI + +#if defined(USE_INTEL_SPEEDUP) + #define HAVE_INTEL_AVX1 + #define HAVE_INTEL_AVX2 +#endif /* USE_INTEL_SPEEDUP */ + +#ifndef _MSC_VER + +void AES_GCM_encrypt(const unsigned char *in, unsigned char *out, + const unsigned char* addt, const unsigned char* ivec, + unsigned char *tag, unsigned int nbytes, + unsigned int abytes, unsigned int ibytes, + unsigned int tbytes, const unsigned char* key, int nr) + XASM_LINK("AES_GCM_encrypt"); +#ifdef HAVE_INTEL_AVX1 +void AES_GCM_encrypt_avx1(const unsigned char *in, unsigned char *out, + const unsigned char* addt, const unsigned char* ivec, + unsigned char *tag, unsigned int nbytes, + unsigned int abytes, unsigned int ibytes, + unsigned int tbytes, const unsigned char* key, + int nr) + XASM_LINK("AES_GCM_encrypt_avx1"); +#ifdef HAVE_INTEL_AVX2 +void AES_GCM_encrypt_avx2(const unsigned char *in, unsigned char *out, + const unsigned char* addt, const unsigned char* ivec, + unsigned char *tag, unsigned int nbytes, + unsigned int abytes, unsigned int ibytes, + unsigned int tbytes, const unsigned char* key, + int nr) + XASM_LINK("AES_GCM_encrypt_avx2"); +#endif /* HAVE_INTEL_AVX2 */ +#endif /* HAVE_INTEL_AVX1 */ + +#ifdef HAVE_AES_DECRYPT +void AES_GCM_decrypt(const unsigned char *in, unsigned char *out, + const unsigned char* addt, const unsigned char* ivec, + const unsigned char *tag, int nbytes, int abytes, + int ibytes, int tbytes, const unsigned char* key, int nr, + int* res) + XASM_LINK("AES_GCM_decrypt"); +#ifdef HAVE_INTEL_AVX1 +void AES_GCM_decrypt_avx1(const unsigned char *in, unsigned char *out, + const unsigned char* addt, const unsigned char* ivec, + const unsigned char *tag, int nbytes, int abytes, + int ibytes, int tbytes, const unsigned char* key, + int nr, int* res) + XASM_LINK("AES_GCM_decrypt_avx1"); +#ifdef HAVE_INTEL_AVX2 +void AES_GCM_decrypt_avx2(const unsigned char *in, unsigned char *out, + const unsigned char* addt, const unsigned char* ivec, + const unsigned char *tag, int nbytes, int abytes, + int ibytes, int tbytes, const unsigned char* key, + int nr, int* res) + XASM_LINK("AES_GCM_decrypt_avx2"); +#endif /* HAVE_INTEL_AVX2 */ +#endif /* HAVE_INTEL_AVX1 */ +#endif /* HAVE_AES_DECRYPT */ + +#else /* _MSC_VER */ + +#define S(w,z) ((char)((unsigned long long)(w) >> (8*(7-(z))) & 0xFF)) +#define M128_INIT(x,y) { S((x),7), S((x),6), S((x),5), S((x),4), \ + S((x),3), S((x),2), S((x),1), S((x),0), \ + S((y),7), S((y),6), S((y),5), S((y),4), \ + S((y),3), S((y),2), S((y),1), S((y),0) } + +static const __m128i MOD2_128 = + M128_INIT(0x1, (long long int)0xc200000000000000UL); + + +/* See Intel® Carry-Less Multiplication Instruction + * and its Usage for Computing the GCM Mode White Paper + * by Shay Gueron, Intel Mobility Group, Israel Development Center; + * and Michael E. Kounavis, Intel Labs, Circuits and Systems Research */ + + +/* Figure 9. AES-GCM – Encrypt With Single Block Ghash at a Time */ + +static const __m128i ONE = M128_INIT(0x0, 0x1); +#ifndef AES_GCM_AESNI_NO_UNROLL +static const __m128i TWO = M128_INIT(0x0, 0x2); +static const __m128i THREE = M128_INIT(0x0, 0x3); +static const __m128i FOUR = M128_INIT(0x0, 0x4); +static const __m128i FIVE = M128_INIT(0x0, 0x5); +static const __m128i SIX = M128_INIT(0x0, 0x6); +static const __m128i SEVEN = M128_INIT(0x0, 0x7); +static const __m128i EIGHT = M128_INIT(0x0, 0x8); +#endif +static const __m128i BSWAP_EPI64 = + M128_INIT(0x0001020304050607, 0x08090a0b0c0d0e0f); +static const __m128i BSWAP_MASK = + M128_INIT(0x08090a0b0c0d0e0f, 0x0001020304050607); + + +/* The following are for MSC based builds which do not allow + * inline assembly. Intrinsic functions are used instead. */ + +#define aes_gcm_calc_iv_12(KEY, ivec, nr, H, Y, T) \ +do \ +{ \ + word32 iv12[4]; \ + iv12[0] = *(word32*)&ivec[0]; \ + iv12[1] = *(word32*)&ivec[4]; \ + iv12[2] = *(word32*)&ivec[8]; \ + iv12[3] = 0x01000000; \ + Y = _mm_loadu_si128((__m128i*)iv12); \ + \ + /* (Compute E[ZERO, KS] and E[Y0, KS] together */ \ + tmp1 = _mm_load_si128(&KEY[0]); \ + tmp2 = _mm_xor_si128(Y, KEY[0]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); \ + tmp2 = _mm_aesenc_si128(tmp2, KEY[1]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); \ + tmp2 = _mm_aesenc_si128(tmp2, KEY[2]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); \ + tmp2 = _mm_aesenc_si128(tmp2, KEY[3]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); \ + tmp2 = _mm_aesenc_si128(tmp2, KEY[4]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); \ + tmp2 = _mm_aesenc_si128(tmp2, KEY[5]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); \ + tmp2 = _mm_aesenc_si128(tmp2, KEY[6]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); \ + tmp2 = _mm_aesenc_si128(tmp2, KEY[7]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); \ + tmp2 = _mm_aesenc_si128(tmp2, KEY[8]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); \ + tmp2 = _mm_aesenc_si128(tmp2, KEY[9]); \ + lastKey = KEY[10]; \ + if (nr > 10) { \ + tmp1 = _mm_aesenc_si128(tmp1, lastKey); \ + tmp2 = _mm_aesenc_si128(tmp2, lastKey); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); \ + tmp2 = _mm_aesenc_si128(tmp2, KEY[11]); \ + lastKey = KEY[12]; \ + if (nr > 12) { \ + tmp1 = _mm_aesenc_si128(tmp1, lastKey); \ + tmp2 = _mm_aesenc_si128(tmp2, lastKey); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); \ + tmp2 = _mm_aesenc_si128(tmp2, KEY[13]); \ + lastKey = KEY[14]; \ + } \ + } \ + H = _mm_aesenclast_si128(tmp1, lastKey); \ + T = _mm_aesenclast_si128(tmp2, lastKey); \ + H = _mm_shuffle_epi8(H, BSWAP_MASK); \ +} \ +while (0) + +#define aes_gcm_calc_iv(KEY, ivec, ibytes, nr, H, Y, T) \ +do \ +{ \ + if (ibytes % 16) { \ + i = ibytes / 16; \ + for (j=0; j < (int)(ibytes%16); j++) \ + ((unsigned char*)&last_block)[j] = ivec[i*16+j]; \ + } \ + tmp1 = _mm_load_si128(&KEY[0]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); \ + lastKey = KEY[10]; \ + if (nr > 10) { \ + tmp1 = _mm_aesenc_si128(tmp1, lastKey); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); \ + lastKey = KEY[12]; \ + if (nr > 12) { \ + tmp1 = _mm_aesenc_si128(tmp1, lastKey); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); \ + lastKey = KEY[14]; \ + } \ + } \ + H = _mm_aesenclast_si128(tmp1, lastKey); \ + H = _mm_shuffle_epi8(H, BSWAP_MASK); \ + Y = _mm_setzero_si128(); \ + for (i=0; i < (int)(ibytes/16); i++) { \ + tmp1 = _mm_loadu_si128(&((__m128i*)ivec)[i]); \ + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); \ + Y = _mm_xor_si128(Y, tmp1); \ + Y = gfmul_sw(Y, H); \ + } \ + if (ibytes % 16) { \ + tmp1 = last_block; \ + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); \ + Y = _mm_xor_si128(Y, tmp1); \ + Y = gfmul_sw(Y, H); \ + } \ + tmp1 = _mm_insert_epi64(tmp1, ibytes*8, 0); \ + tmp1 = _mm_insert_epi64(tmp1, 0, 1); \ + Y = _mm_xor_si128(Y, tmp1); \ + Y = gfmul_sw(Y, H); \ + Y = _mm_shuffle_epi8(Y, BSWAP_MASK); /* Compute E(K, Y0) */ \ + tmp1 = _mm_xor_si128(Y, KEY[0]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); \ + lastKey = KEY[10]; \ + if (nr > 10) { \ + tmp1 = _mm_aesenc_si128(tmp1, lastKey); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); \ + lastKey = KEY[12]; \ + if (nr > 12) { \ + tmp1 = _mm_aesenc_si128(tmp1, lastKey); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); \ + lastKey = KEY[14]; \ + } \ + } \ + T = _mm_aesenclast_si128(tmp1, lastKey); \ +} \ +while (0) + +#define AES_ENC_8(j) \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); \ + tmp2 = _mm_aesenc_si128(tmp2, KEY[j]); \ + tmp3 = _mm_aesenc_si128(tmp3, KEY[j]); \ + tmp4 = _mm_aesenc_si128(tmp4, KEY[j]); \ + tmp5 = _mm_aesenc_si128(tmp5, KEY[j]); \ + tmp6 = _mm_aesenc_si128(tmp6, KEY[j]); \ + tmp7 = _mm_aesenc_si128(tmp7, KEY[j]); \ + tmp8 = _mm_aesenc_si128(tmp8, KEY[j]); + +#define AES_ENC_LAST_8() \ + tmp1 =_mm_aesenclast_si128(tmp1, lastKey); \ + tmp2 =_mm_aesenclast_si128(tmp2, lastKey); \ + tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[i*8+0])); \ + tmp2 = _mm_xor_si128(tmp2, _mm_loadu_si128(&((__m128i*)in)[i*8+1])); \ + _mm_storeu_si128(&((__m128i*)out)[i*8+0], tmp1); \ + _mm_storeu_si128(&((__m128i*)out)[i*8+1], tmp2); \ + tmp3 =_mm_aesenclast_si128(tmp3, lastKey); \ + tmp4 =_mm_aesenclast_si128(tmp4, lastKey); \ + tmp3 = _mm_xor_si128(tmp3, _mm_loadu_si128(&((__m128i*)in)[i*8+2])); \ + tmp4 = _mm_xor_si128(tmp4, _mm_loadu_si128(&((__m128i*)in)[i*8+3])); \ + _mm_storeu_si128(&((__m128i*)out)[i*8+2], tmp3); \ + _mm_storeu_si128(&((__m128i*)out)[i*8+3], tmp4); \ + tmp5 =_mm_aesenclast_si128(tmp5, lastKey); \ + tmp6 =_mm_aesenclast_si128(tmp6, lastKey); \ + tmp5 = _mm_xor_si128(tmp5, _mm_loadu_si128(&((__m128i*)in)[i*8+4])); \ + tmp6 = _mm_xor_si128(tmp6, _mm_loadu_si128(&((__m128i*)in)[i*8+5])); \ + _mm_storeu_si128(&((__m128i*)out)[i*8+4], tmp5); \ + _mm_storeu_si128(&((__m128i*)out)[i*8+5], tmp6); \ + tmp7 =_mm_aesenclast_si128(tmp7, lastKey); \ + tmp8 =_mm_aesenclast_si128(tmp8, lastKey); \ + tmp7 = _mm_xor_si128(tmp7, _mm_loadu_si128(&((__m128i*)in)[i*8+6])); \ + tmp8 = _mm_xor_si128(tmp8, _mm_loadu_si128(&((__m128i*)in)[i*8+7])); \ + _mm_storeu_si128(&((__m128i*)out)[i*8+6], tmp7); \ + _mm_storeu_si128(&((__m128i*)out)[i*8+7], tmp8); + + +static __m128i gfmul_sw(__m128i a, __m128i b) +{ + __m128i r, t1, t2, t3, t4, t5, t6, t7; + t2 = _mm_shuffle_epi32(b, 78); + t3 = _mm_shuffle_epi32(a, 78); + t2 = _mm_xor_si128(t2, b); + t3 = _mm_xor_si128(t3, a); + t4 = _mm_clmulepi64_si128(b, a, 0x11); + t1 = _mm_clmulepi64_si128(b, a, 0x00); + t2 = _mm_clmulepi64_si128(t2, t3, 0x00); + t2 = _mm_xor_si128(t2, t1); + t2 = _mm_xor_si128(t2, t4); + t3 = _mm_slli_si128(t2, 8); + t2 = _mm_srli_si128(t2, 8); + t1 = _mm_xor_si128(t1, t3); + t4 = _mm_xor_si128(t4, t2); + + t5 = _mm_srli_epi32(t1, 31); + t6 = _mm_srli_epi32(t4, 31); + t1 = _mm_slli_epi32(t1, 1); + t4 = _mm_slli_epi32(t4, 1); + t7 = _mm_srli_si128(t5, 12); + t5 = _mm_slli_si128(t5, 4); + t6 = _mm_slli_si128(t6, 4); + t4 = _mm_or_si128(t4, t7); + t1 = _mm_or_si128(t1, t5); + t4 = _mm_or_si128(t4, t6); + + t5 = _mm_slli_epi32(t1, 31); + t6 = _mm_slli_epi32(t1, 30); + t7 = _mm_slli_epi32(t1, 25); + t5 = _mm_xor_si128(t5, t6); + t5 = _mm_xor_si128(t5, t7); + + t6 = _mm_srli_si128(t5, 4); + t5 = _mm_slli_si128(t5, 12); + t1 = _mm_xor_si128(t1, t5); + t7 = _mm_srli_epi32(t1, 1); + t3 = _mm_srli_epi32(t1, 2); + t2 = _mm_srli_epi32(t1, 7); + + t7 = _mm_xor_si128(t7, t3); + t7 = _mm_xor_si128(t7, t2); + t7 = _mm_xor_si128(t7, t6); + t7 = _mm_xor_si128(t7, t1); + r = _mm_xor_si128(t4, t7); + + return r; +} + +static void gfmul_only(__m128i a, __m128i b, __m128i* r0, __m128i* r1) +{ + __m128i t1, t2, t3, t4; + + /* 128 x 128 Carryless Multiply */ + t2 = _mm_shuffle_epi32(b, 78); + t3 = _mm_shuffle_epi32(a, 78); + t2 = _mm_xor_si128(t2, b); + t3 = _mm_xor_si128(t3, a); + t4 = _mm_clmulepi64_si128(b, a, 0x11); + t1 = _mm_clmulepi64_si128(b, a, 0x00); + t2 = _mm_clmulepi64_si128(t2, t3, 0x00); + t2 = _mm_xor_si128(t2, t1); + t2 = _mm_xor_si128(t2, t4); + t3 = _mm_slli_si128(t2, 8); + t2 = _mm_srli_si128(t2, 8); + t1 = _mm_xor_si128(t1, t3); + t4 = _mm_xor_si128(t4, t2); + *r0 = _mm_xor_si128(t1, *r0); + *r1 = _mm_xor_si128(t4, *r1); +} + +static __m128i gfmul_shl1(__m128i a) +{ + __m128i t1 = a, t2; + t2 = _mm_srli_epi64(t1, 63); + t1 = _mm_slli_epi64(t1, 1); + t2 = _mm_slli_si128(t2, 8); + t1 = _mm_or_si128(t1, t2); + /* if (a[1] >> 63) t1 = _mm_xor_si128(t1, MOD2_128); */ + a = _mm_shuffle_epi32(a, 0xff); + a = _mm_srai_epi32(a, 31); + a = _mm_and_si128(a, MOD2_128); + t1 = _mm_xor_si128(t1, a); + return t1; +} + +static __m128i ghash_red(__m128i r0, __m128i r1) +{ + __m128i t2, t3; + __m128i t5, t6, t7; + + t5 = _mm_slli_epi32(r0, 31); + t6 = _mm_slli_epi32(r0, 30); + t7 = _mm_slli_epi32(r0, 25); + t5 = _mm_xor_si128(t5, t6); + t5 = _mm_xor_si128(t5, t7); + + t6 = _mm_srli_si128(t5, 4); + t5 = _mm_slli_si128(t5, 12); + r0 = _mm_xor_si128(r0, t5); + t7 = _mm_srli_epi32(r0, 1); + t3 = _mm_srli_epi32(r0, 2); + t2 = _mm_srli_epi32(r0, 7); + + t7 = _mm_xor_si128(t7, t3); + t7 = _mm_xor_si128(t7, t2); + t7 = _mm_xor_si128(t7, t6); + t7 = _mm_xor_si128(t7, r0); + return _mm_xor_si128(r1, t7); +} + +static __m128i gfmul_shifted(__m128i a, __m128i b) +{ + __m128i t0 = _mm_setzero_si128(), t1 = _mm_setzero_si128(); + gfmul_only(a, b, &t0, &t1); + return ghash_red(t0, t1); +} + +#ifndef AES_GCM_AESNI_NO_UNROLL +static __m128i gfmul8(__m128i a1, __m128i a2, __m128i a3, __m128i a4, + __m128i a5, __m128i a6, __m128i a7, __m128i a8, + __m128i b1, __m128i b2, __m128i b3, __m128i b4, + __m128i b5, __m128i b6, __m128i b7, __m128i b8) +{ + __m128i t0 = _mm_setzero_si128(), t1 = _mm_setzero_si128(); + gfmul_only(a1, b8, &t0, &t1); + gfmul_only(a2, b7, &t0, &t1); + gfmul_only(a3, b6, &t0, &t1); + gfmul_only(a4, b5, &t0, &t1); + gfmul_only(a5, b4, &t0, &t1); + gfmul_only(a6, b3, &t0, &t1); + gfmul_only(a7, b2, &t0, &t1); + gfmul_only(a8, b1, &t0, &t1); + return ghash_red(t0, t1); +} +#endif + + +static void AES_GCM_encrypt(const unsigned char *in, + unsigned char *out, + const unsigned char* addt, + const unsigned char* ivec, + unsigned char *tag, unsigned int nbytes, + unsigned int abytes, unsigned int ibytes, + unsigned int tbytes, + const unsigned char* key, int nr) +{ + int i, j ,k; + __m128i ctr1; + __m128i H, Y, T; + __m128i X = _mm_setzero_si128(); + __m128i *KEY = (__m128i*)key, lastKey; + __m128i last_block = _mm_setzero_si128(); + __m128i tmp1, tmp2; +#ifndef AES_GCM_AESNI_NO_UNROLL + __m128i HT[8]; + __m128i r0, r1; + __m128i XV; + __m128i tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; +#endif + + if (ibytes == GCM_NONCE_MID_SZ) + aes_gcm_calc_iv_12(KEY, ivec, nr, H, Y, T); + else + aes_gcm_calc_iv(KEY, ivec, ibytes, nr, H, Y, T); + + for (i=0; i < (int)(abytes/16); i++) { + tmp1 = _mm_loadu_si128(&((__m128i*)addt)[i]); + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X = _mm_xor_si128(X, tmp1); + X = gfmul_sw(X, H); + } + if (abytes%16) { + last_block = _mm_setzero_si128(); + for (j=0; j < (int)(abytes%16); j++) + ((unsigned char*)&last_block)[j] = addt[i*16+j]; + tmp1 = last_block; + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X = _mm_xor_si128(X, tmp1); + X = gfmul_sw(X, H); + } + tmp1 = _mm_shuffle_epi8(Y, BSWAP_EPI64); + ctr1 = _mm_add_epi32(tmp1, ONE); + H = gfmul_shl1(H); + +#ifndef AES_GCM_AESNI_NO_UNROLL + i = 0; + if (nbytes >= 16*8) { + HT[0] = H; + HT[1] = gfmul_shifted(H, H); + HT[2] = gfmul_shifted(H, HT[1]); + HT[3] = gfmul_shifted(HT[1], HT[1]); + HT[4] = gfmul_shifted(HT[1], HT[2]); + HT[5] = gfmul_shifted(HT[2], HT[2]); + HT[6] = gfmul_shifted(HT[2], HT[3]); + HT[7] = gfmul_shifted(HT[3], HT[3]); + + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + tmp2 = _mm_add_epi32(ctr1, ONE); + tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_EPI64); + tmp3 = _mm_add_epi32(ctr1, TWO); + tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_EPI64); + tmp4 = _mm_add_epi32(ctr1, THREE); + tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_EPI64); + tmp5 = _mm_add_epi32(ctr1, FOUR); + tmp5 = _mm_shuffle_epi8(tmp5, BSWAP_EPI64); + tmp6 = _mm_add_epi32(ctr1, FIVE); + tmp6 = _mm_shuffle_epi8(tmp6, BSWAP_EPI64); + tmp7 = _mm_add_epi32(ctr1, SIX); + tmp7 = _mm_shuffle_epi8(tmp7, BSWAP_EPI64); + tmp8 = _mm_add_epi32(ctr1, SEVEN); + tmp8 = _mm_shuffle_epi8(tmp8, BSWAP_EPI64); + ctr1 = _mm_add_epi32(ctr1, EIGHT); + tmp1 =_mm_xor_si128(tmp1, KEY[0]); + tmp2 =_mm_xor_si128(tmp2, KEY[0]); + tmp3 =_mm_xor_si128(tmp3, KEY[0]); + tmp4 =_mm_xor_si128(tmp4, KEY[0]); + tmp5 =_mm_xor_si128(tmp5, KEY[0]); + tmp6 =_mm_xor_si128(tmp6, KEY[0]); + tmp7 =_mm_xor_si128(tmp7, KEY[0]); + tmp8 =_mm_xor_si128(tmp8, KEY[0]); + AES_ENC_8(1); + AES_ENC_8(2); + AES_ENC_8(3); + AES_ENC_8(4); + AES_ENC_8(5); + AES_ENC_8(6); + AES_ENC_8(7); + AES_ENC_8(8); + AES_ENC_8(9); + lastKey = KEY[10]; + if (nr > 10) { + AES_ENC_8(10); + AES_ENC_8(11); + lastKey = KEY[12]; + if (nr > 12) { + AES_ENC_8(12); + AES_ENC_8(13); + lastKey = KEY[14]; + } + } + AES_ENC_LAST_8(); + + for (i=1; i < (int)(nbytes/16/8); i++) { + r0 = _mm_setzero_si128(); + r1 = _mm_setzero_si128(); + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + tmp2 = _mm_add_epi32(ctr1, ONE); + tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_EPI64); + tmp3 = _mm_add_epi32(ctr1, TWO); + tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_EPI64); + tmp4 = _mm_add_epi32(ctr1, THREE); + tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_EPI64); + tmp5 = _mm_add_epi32(ctr1, FOUR); + tmp5 = _mm_shuffle_epi8(tmp5, BSWAP_EPI64); + tmp6 = _mm_add_epi32(ctr1, FIVE); + tmp6 = _mm_shuffle_epi8(tmp6, BSWAP_EPI64); + tmp7 = _mm_add_epi32(ctr1, SIX); + tmp7 = _mm_shuffle_epi8(tmp7, BSWAP_EPI64); + tmp8 = _mm_add_epi32(ctr1, SEVEN); + tmp8 = _mm_shuffle_epi8(tmp8, BSWAP_EPI64); + ctr1 = _mm_add_epi32(ctr1, EIGHT); + tmp1 =_mm_xor_si128(tmp1, KEY[0]); + tmp2 =_mm_xor_si128(tmp2, KEY[0]); + tmp3 =_mm_xor_si128(tmp3, KEY[0]); + tmp4 =_mm_xor_si128(tmp4, KEY[0]); + tmp5 =_mm_xor_si128(tmp5, KEY[0]); + tmp6 =_mm_xor_si128(tmp6, KEY[0]); + tmp7 =_mm_xor_si128(tmp7, KEY[0]); + tmp8 =_mm_xor_si128(tmp8, KEY[0]); + /* 128 x 128 Carryless Multiply */ + XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+0]); + XV = _mm_shuffle_epi8(XV, BSWAP_MASK); + XV = _mm_xor_si128(XV, X); + gfmul_only(XV, HT[7], &r0, &r1); + tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[1]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[1]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[1]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[1]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[1]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[1]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[1]); + /* 128 x 128 Carryless Multiply */ + XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+1]); + XV = _mm_shuffle_epi8(XV, BSWAP_MASK); + gfmul_only(XV, HT[6], &r0, &r1); + tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[2]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[2]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[2]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[2]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[2]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[2]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[2]); + /* 128 x 128 Carryless Multiply */ + XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+2]); + XV = _mm_shuffle_epi8(XV, BSWAP_MASK); + gfmul_only(XV, HT[5], &r0, &r1); + tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[3]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[3]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[3]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[3]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[3]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[3]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[3]); + /* 128 x 128 Carryless Multiply */ + XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+3]); + XV = _mm_shuffle_epi8(XV, BSWAP_MASK); + gfmul_only(XV, HT[4], &r0, &r1); + tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[4]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[4]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[4]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[4]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[4]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[4]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[4]); + /* 128 x 128 Carryless Multiply */ + XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+4]); + XV = _mm_shuffle_epi8(XV, BSWAP_MASK); + gfmul_only(XV, HT[3], &r0, &r1); + tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[5]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[5]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[5]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[5]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[5]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[5]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[5]); + /* 128 x 128 Carryless Multiply */ + XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+5]); + XV = _mm_shuffle_epi8(XV, BSWAP_MASK); + gfmul_only(XV, HT[2], &r0, &r1); + tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[6]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[6]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[6]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[6]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[6]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[6]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[6]); + /* 128 x 128 Carryless Multiply */ + XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+6]); + XV = _mm_shuffle_epi8(XV, BSWAP_MASK); + gfmul_only(XV, HT[1], &r0, &r1); + tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[7]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[7]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[7]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[7]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[7]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[7]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[7]); + /* 128 x 128 Carryless Multiply */ + XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+7]); + XV = _mm_shuffle_epi8(XV, BSWAP_MASK); + gfmul_only(XV, HT[0], &r0, &r1); + tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[8]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[8]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[8]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[8]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[8]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[8]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[8]); + /* Reduction */ + X = ghash_red(r0, r1); + tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[9]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[9]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[9]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[9]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[9]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[9]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[9]); + lastKey = KEY[10]; + if (nr > 10) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[10]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[10]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[10]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[10]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[10]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[10]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[10]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[10]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[11]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[11]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[11]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[11]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[11]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[11]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[11]); + lastKey = KEY[12]; + if (nr > 12) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[12]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[12]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[12]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[12]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[12]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[12]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[12]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[12]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[13]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[13]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[13]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[13]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[13]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[13]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[13]); + lastKey = KEY[14]; + } + } + AES_ENC_LAST_8(); + } + + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_MASK); + tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_MASK); + tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_MASK); + tmp5 = _mm_shuffle_epi8(tmp5, BSWAP_MASK); + tmp6 = _mm_shuffle_epi8(tmp6, BSWAP_MASK); + tmp7 = _mm_shuffle_epi8(tmp7, BSWAP_MASK); + tmp8 = _mm_shuffle_epi8(tmp8, BSWAP_MASK); + tmp1 = _mm_xor_si128(X, tmp1); + X = gfmul8(tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, + HT[0], HT[1], HT[2], HT[3], HT[4], HT[5], HT[6], HT[7]); + } + for (k = i*8; k < (int)(nbytes/16); k++) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + ctr1 = _mm_add_epi32(ctr1, ONE); + tmp1 = _mm_xor_si128(tmp1, KEY[0]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); + lastKey = KEY[10]; + if (nr > 10) { + tmp1 = _mm_aesenc_si128(tmp1, lastKey); + tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); + lastKey = KEY[12]; + if (nr > 12) { + tmp1 = _mm_aesenc_si128(tmp1, lastKey); + tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); + lastKey = KEY[14]; + } + } + tmp1 = _mm_aesenclast_si128(tmp1, lastKey); + tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[k])); + _mm_storeu_si128(&((__m128i*)out)[k], tmp1); + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X =_mm_xor_si128(X, tmp1); + X = gfmul_shifted(X, H); + } +#else /* AES_GCM_AESNI_NO_UNROLL */ + for (k = 0; k < (int)(nbytes/16) && k < 1; k++) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + ctr1 = _mm_add_epi32(ctr1, ONE); + tmp1 = _mm_xor_si128(tmp1, KEY[0]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); + lastKey = KEY[10]; + if (nr > 10) { + tmp1 = _mm_aesenc_si128(tmp1, lastKey); + tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); + lastKey = KEY[12]; + if (nr > 12) { + tmp1 = _mm_aesenc_si128(tmp1, lastKey); + tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); + lastKey = KEY[14]; + } + } + tmp1 = _mm_aesenclast_si128(tmp1, lastKey); + tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[k])); + _mm_storeu_si128(&((__m128i*)out)[k], tmp1); + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X =_mm_xor_si128(X, tmp1); + } + for (; k < (int)(nbytes/16); k++) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + ctr1 = _mm_add_epi32(ctr1, ONE); + tmp1 = _mm_xor_si128(tmp1, KEY[0]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); + X = gfmul_shifted(X, H); + lastKey = KEY[10]; + if (nr > 10) { + tmp1 = _mm_aesenc_si128(tmp1, lastKey); + tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); + lastKey = KEY[12]; + if (nr > 12) { + tmp1 = _mm_aesenc_si128(tmp1, lastKey); + tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); + lastKey = KEY[14]; + } + } + tmp1 = _mm_aesenclast_si128(tmp1, lastKey); + tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[k])); + _mm_storeu_si128(&((__m128i*)out)[k], tmp1); + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X =_mm_xor_si128(X, tmp1); + } + if (k > 0) { + X = gfmul_shifted(X, H); + } +#endif /* AES_GCM_AESNI_NO_UNROLL */ + + /* If one partial block remains */ + if (nbytes % 16) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + tmp1 = _mm_xor_si128(tmp1, KEY[0]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); + lastKey = KEY[10]; + if (nr > 10) { + tmp1 = _mm_aesenc_si128(tmp1, lastKey); + tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); + lastKey = KEY[12]; + if (nr > 12) { + tmp1 = _mm_aesenc_si128(tmp1, lastKey); + tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); + lastKey = KEY[14]; + } + } + tmp1 = _mm_aesenclast_si128(tmp1, lastKey); + last_block = tmp1; + for (j=0; j < (int)(nbytes%16); j++) + ((unsigned char*)&last_block)[j] = in[k*16+j]; + tmp1 = _mm_xor_si128(tmp1, last_block); + last_block = tmp1; + for (j=0; j < (int)(nbytes%16); j++) + out[k*16+j] = ((unsigned char*)&last_block)[j]; + tmp1 = last_block; + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X =_mm_xor_si128(X, tmp1); + X = gfmul_shifted(X, H); + } + tmp1 = _mm_insert_epi64(tmp1, nbytes*8, 0); + tmp1 = _mm_insert_epi64(tmp1, abytes*8, 1); + X = _mm_xor_si128(X, tmp1); + X = gfmul_shifted(X, H); + X = _mm_shuffle_epi8(X, BSWAP_MASK); + T = _mm_xor_si128(X, T); + /*_mm_storeu_si128((__m128i*)tag, T);*/ + XMEMCPY(tag, &T, tbytes); +} + +#ifdef HAVE_AES_DECRYPT + +static void AES_GCM_decrypt(const unsigned char *in, + unsigned char *out, + const unsigned char* addt, + const unsigned char* ivec, + const unsigned char *tag, int nbytes, int abytes, + int ibytes, word32 tbytes, const unsigned char* key, + int nr, int* res) +{ + int i, j ,k; + __m128i H, Y, T; + __m128i *KEY = (__m128i*)key, lastKey; + __m128i ctr1; + __m128i last_block = _mm_setzero_si128(); + __m128i X = _mm_setzero_si128(); + __m128i tmp1, tmp2, XV; +#ifndef AES_GCM_AESNI_NO_UNROLL + __m128i HT[8]; + __m128i r0, r1; + __m128i tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; +#endif /* AES_GCM_AESNI_NO_UNROLL */ + + if (ibytes == GCM_NONCE_MID_SZ) + aes_gcm_calc_iv_12(KEY, ivec, nr, H, Y, T); + else + aes_gcm_calc_iv(KEY, ivec, ibytes, nr, H, Y, T); + + for (i=0; i 10) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[10]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[10]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[10]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[10]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[10]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[10]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[10]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[10]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[11]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[11]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[11]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[11]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[11]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[11]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[11]); + lastKey = KEY[12]; + if (nr > 12) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[12]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[12]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[12]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[12]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[12]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[12]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[12]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[12]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[13]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[13]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[13]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[13]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[13]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[13]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[13]); + lastKey = KEY[14]; + } + } + AES_ENC_LAST_8(); + } + } + +#endif /* AES_GCM_AESNI_NO_UNROLL */ + + for (k = i*8; k < nbytes/16; k++) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + ctr1 = _mm_add_epi32(ctr1, ONE); + tmp1 = _mm_xor_si128(tmp1, KEY[0]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); + /* 128 x 128 Carryless Multiply */ + XV = _mm_loadu_si128(&((__m128i*)in)[k]); + XV = _mm_shuffle_epi8(XV, BSWAP_MASK); + XV = _mm_xor_si128(XV, X); + X = gfmul_shifted(XV, H); + lastKey = KEY[10]; + if (nr > 10) { + tmp1 = _mm_aesenc_si128(tmp1, lastKey); + tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); + lastKey = KEY[12]; + if (nr > 12) { + tmp1 = _mm_aesenc_si128(tmp1, lastKey); + tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); + lastKey = KEY[14]; + } + } + tmp1 = _mm_aesenclast_si128(tmp1, lastKey); + tmp2 = _mm_loadu_si128(&((__m128i*)in)[k]); + tmp1 = _mm_xor_si128(tmp1, tmp2); + _mm_storeu_si128(&((__m128i*)out)[k], tmp1); + } + + /* If one partial block remains */ + if (nbytes % 16) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + tmp1 = _mm_xor_si128(tmp1, KEY[0]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); + lastKey = KEY[10]; + if (nr > 10) { + tmp1 = _mm_aesenc_si128(tmp1, lastKey); + tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); + lastKey = KEY[12]; + if (nr > 12) { + tmp1 = _mm_aesenc_si128(tmp1, lastKey); + tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); + lastKey = KEY[14]; + } + } + tmp1 = _mm_aesenclast_si128(tmp1, lastKey); + last_block = _mm_setzero_si128(); + for (j=0; j < nbytes%16; j++) + ((unsigned char*)&last_block)[j] = in[k*16+j]; + XV = last_block; + tmp1 = _mm_xor_si128(tmp1, last_block); + last_block = tmp1; + for (j=0; j < nbytes%16; j++) + out[k*16+j] = ((unsigned char*)&last_block)[j]; + XV = _mm_shuffle_epi8(XV, BSWAP_MASK); + XV = _mm_xor_si128(XV, X); + X = gfmul_shifted(XV, H); + } + + tmp1 = _mm_insert_epi64(tmp1, nbytes*8, 0); + tmp1 = _mm_insert_epi64(tmp1, abytes*8, 1); + /* 128 x 128 Carryless Multiply */ + X = _mm_xor_si128(X, tmp1); + X = gfmul_shifted(X, H); + X = _mm_shuffle_epi8(X, BSWAP_MASK); + T = _mm_xor_si128(X, T); + +/* if (0xffff != + _mm_movemask_epi8(_mm_cmpeq_epi8(T, _mm_loadu_si128((__m128i*)tag)))) */ + if (XMEMCMP(tag, &T, tbytes) != 0) + *res = 0; /* in case the authentication failed */ + else + *res = 1; /* when successful returns 1 */ +} + +#endif /* HAVE_AES_DECRYPT */ +#endif /* _MSC_VER */ +#endif /* WOLFSSL_AESNI */ + + +#if defined(GCM_SMALL) +static void GMULT(byte* X, byte* Y) +{ + byte Z[AES_BLOCK_SIZE]; + byte V[AES_BLOCK_SIZE]; + int i, j; + + XMEMSET(Z, 0, AES_BLOCK_SIZE); + XMEMCPY(V, X, AES_BLOCK_SIZE); + for (i = 0; i < AES_BLOCK_SIZE; i++) + { + byte y = Y[i]; + for (j = 0; j < 8; j++) + { + if (y & 0x80) { + xorbuf(Z, V, AES_BLOCK_SIZE); + } + + RIGHTSHIFTX(V); + y = y << 1; + } + } + XMEMCPY(X, Z, AES_BLOCK_SIZE); +} + + +void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c, + word32 cSz, byte* s, word32 sSz) +{ + byte x[AES_BLOCK_SIZE]; + byte scratch[AES_BLOCK_SIZE]; + word32 blocks, partial; + byte* h = aes->H; + + XMEMSET(x, 0, AES_BLOCK_SIZE); + + /* Hash in A, the Additional Authentication Data */ + if (aSz != 0 && a != NULL) { + blocks = aSz / AES_BLOCK_SIZE; + partial = aSz % AES_BLOCK_SIZE; + while (blocks--) { + xorbuf(x, a, AES_BLOCK_SIZE); + GMULT(x, h); + a += AES_BLOCK_SIZE; + } + if (partial != 0) { + XMEMSET(scratch, 0, AES_BLOCK_SIZE); + XMEMCPY(scratch, a, partial); + xorbuf(x, scratch, AES_BLOCK_SIZE); + GMULT(x, h); + } + } + + /* Hash in C, the Ciphertext */ + if (cSz != 0 && c != NULL) { + blocks = cSz / AES_BLOCK_SIZE; + partial = cSz % AES_BLOCK_SIZE; + while (blocks--) { + xorbuf(x, c, AES_BLOCK_SIZE); + GMULT(x, h); + c += AES_BLOCK_SIZE; + } + if (partial != 0) { + XMEMSET(scratch, 0, AES_BLOCK_SIZE); + XMEMCPY(scratch, c, partial); + xorbuf(x, scratch, AES_BLOCK_SIZE); + GMULT(x, h); + } + } + + /* Hash in the lengths of A and C in bits */ + FlattenSzInBits(&scratch[0], aSz); + FlattenSzInBits(&scratch[8], cSz); + xorbuf(x, scratch, AES_BLOCK_SIZE); + GMULT(x, h); + + /* Copy the result into s. */ + XMEMCPY(s, x, sSz); +} + +/* end GCM_SMALL */ +#elif defined(GCM_TABLE) + +static const byte R[256][2] = { + {0x00, 0x00}, {0x01, 0xc2}, {0x03, 0x84}, {0x02, 0x46}, + {0x07, 0x08}, {0x06, 0xca}, {0x04, 0x8c}, {0x05, 0x4e}, + {0x0e, 0x10}, {0x0f, 0xd2}, {0x0d, 0x94}, {0x0c, 0x56}, + {0x09, 0x18}, {0x08, 0xda}, {0x0a, 0x9c}, {0x0b, 0x5e}, + {0x1c, 0x20}, {0x1d, 0xe2}, {0x1f, 0xa4}, {0x1e, 0x66}, + {0x1b, 0x28}, {0x1a, 0xea}, {0x18, 0xac}, {0x19, 0x6e}, + {0x12, 0x30}, {0x13, 0xf2}, {0x11, 0xb4}, {0x10, 0x76}, + {0x15, 0x38}, {0x14, 0xfa}, {0x16, 0xbc}, {0x17, 0x7e}, + {0x38, 0x40}, {0x39, 0x82}, {0x3b, 0xc4}, {0x3a, 0x06}, + {0x3f, 0x48}, {0x3e, 0x8a}, {0x3c, 0xcc}, {0x3d, 0x0e}, + {0x36, 0x50}, {0x37, 0x92}, {0x35, 0xd4}, {0x34, 0x16}, + {0x31, 0x58}, {0x30, 0x9a}, {0x32, 0xdc}, {0x33, 0x1e}, + {0x24, 0x60}, {0x25, 0xa2}, {0x27, 0xe4}, {0x26, 0x26}, + {0x23, 0x68}, {0x22, 0xaa}, {0x20, 0xec}, {0x21, 0x2e}, + {0x2a, 0x70}, {0x2b, 0xb2}, {0x29, 0xf4}, {0x28, 0x36}, + {0x2d, 0x78}, {0x2c, 0xba}, {0x2e, 0xfc}, {0x2f, 0x3e}, + {0x70, 0x80}, {0x71, 0x42}, {0x73, 0x04}, {0x72, 0xc6}, + {0x77, 0x88}, {0x76, 0x4a}, {0x74, 0x0c}, {0x75, 0xce}, + {0x7e, 0x90}, {0x7f, 0x52}, {0x7d, 0x14}, {0x7c, 0xd6}, + {0x79, 0x98}, {0x78, 0x5a}, {0x7a, 0x1c}, {0x7b, 0xde}, + {0x6c, 0xa0}, {0x6d, 0x62}, {0x6f, 0x24}, {0x6e, 0xe6}, + {0x6b, 0xa8}, {0x6a, 0x6a}, {0x68, 0x2c}, {0x69, 0xee}, + {0x62, 0xb0}, {0x63, 0x72}, {0x61, 0x34}, {0x60, 0xf6}, + {0x65, 0xb8}, {0x64, 0x7a}, {0x66, 0x3c}, {0x67, 0xfe}, + {0x48, 0xc0}, {0x49, 0x02}, {0x4b, 0x44}, {0x4a, 0x86}, + {0x4f, 0xc8}, {0x4e, 0x0a}, {0x4c, 0x4c}, {0x4d, 0x8e}, + {0x46, 0xd0}, {0x47, 0x12}, {0x45, 0x54}, {0x44, 0x96}, + {0x41, 0xd8}, {0x40, 0x1a}, {0x42, 0x5c}, {0x43, 0x9e}, + {0x54, 0xe0}, {0x55, 0x22}, {0x57, 0x64}, {0x56, 0xa6}, + {0x53, 0xe8}, {0x52, 0x2a}, {0x50, 0x6c}, {0x51, 0xae}, + {0x5a, 0xf0}, {0x5b, 0x32}, {0x59, 0x74}, {0x58, 0xb6}, + {0x5d, 0xf8}, {0x5c, 0x3a}, {0x5e, 0x7c}, {0x5f, 0xbe}, + {0xe1, 0x00}, {0xe0, 0xc2}, {0xe2, 0x84}, {0xe3, 0x46}, + {0xe6, 0x08}, {0xe7, 0xca}, {0xe5, 0x8c}, {0xe4, 0x4e}, + {0xef, 0x10}, {0xee, 0xd2}, {0xec, 0x94}, {0xed, 0x56}, + {0xe8, 0x18}, {0xe9, 0xda}, {0xeb, 0x9c}, {0xea, 0x5e}, + {0xfd, 0x20}, {0xfc, 0xe2}, {0xfe, 0xa4}, {0xff, 0x66}, + {0xfa, 0x28}, {0xfb, 0xea}, {0xf9, 0xac}, {0xf8, 0x6e}, + {0xf3, 0x30}, {0xf2, 0xf2}, {0xf0, 0xb4}, {0xf1, 0x76}, + {0xf4, 0x38}, {0xf5, 0xfa}, {0xf7, 0xbc}, {0xf6, 0x7e}, + {0xd9, 0x40}, {0xd8, 0x82}, {0xda, 0xc4}, {0xdb, 0x06}, + {0xde, 0x48}, {0xdf, 0x8a}, {0xdd, 0xcc}, {0xdc, 0x0e}, + {0xd7, 0x50}, {0xd6, 0x92}, {0xd4, 0xd4}, {0xd5, 0x16}, + {0xd0, 0x58}, {0xd1, 0x9a}, {0xd3, 0xdc}, {0xd2, 0x1e}, + {0xc5, 0x60}, {0xc4, 0xa2}, {0xc6, 0xe4}, {0xc7, 0x26}, + {0xc2, 0x68}, {0xc3, 0xaa}, {0xc1, 0xec}, {0xc0, 0x2e}, + {0xcb, 0x70}, {0xca, 0xb2}, {0xc8, 0xf4}, {0xc9, 0x36}, + {0xcc, 0x78}, {0xcd, 0xba}, {0xcf, 0xfc}, {0xce, 0x3e}, + {0x91, 0x80}, {0x90, 0x42}, {0x92, 0x04}, {0x93, 0xc6}, + {0x96, 0x88}, {0x97, 0x4a}, {0x95, 0x0c}, {0x94, 0xce}, + {0x9f, 0x90}, {0x9e, 0x52}, {0x9c, 0x14}, {0x9d, 0xd6}, + {0x98, 0x98}, {0x99, 0x5a}, {0x9b, 0x1c}, {0x9a, 0xde}, + {0x8d, 0xa0}, {0x8c, 0x62}, {0x8e, 0x24}, {0x8f, 0xe6}, + {0x8a, 0xa8}, {0x8b, 0x6a}, {0x89, 0x2c}, {0x88, 0xee}, + {0x83, 0xb0}, {0x82, 0x72}, {0x80, 0x34}, {0x81, 0xf6}, + {0x84, 0xb8}, {0x85, 0x7a}, {0x87, 0x3c}, {0x86, 0xfe}, + {0xa9, 0xc0}, {0xa8, 0x02}, {0xaa, 0x44}, {0xab, 0x86}, + {0xae, 0xc8}, {0xaf, 0x0a}, {0xad, 0x4c}, {0xac, 0x8e}, + {0xa7, 0xd0}, {0xa6, 0x12}, {0xa4, 0x54}, {0xa5, 0x96}, + {0xa0, 0xd8}, {0xa1, 0x1a}, {0xa3, 0x5c}, {0xa2, 0x9e}, + {0xb5, 0xe0}, {0xb4, 0x22}, {0xb6, 0x64}, {0xb7, 0xa6}, + {0xb2, 0xe8}, {0xb3, 0x2a}, {0xb1, 0x6c}, {0xb0, 0xae}, + {0xbb, 0xf0}, {0xba, 0x32}, {0xb8, 0x74}, {0xb9, 0xb6}, + {0xbc, 0xf8}, {0xbd, 0x3a}, {0xbf, 0x7c}, {0xbe, 0xbe} }; + + +static void GMULT(byte *x, byte m[256][AES_BLOCK_SIZE]) +{ + int i, j; + byte Z[AES_BLOCK_SIZE]; + byte a; + + XMEMSET(Z, 0, sizeof(Z)); + + for (i = 15; i > 0; i--) { + xorbuf(Z, m[x[i]], AES_BLOCK_SIZE); + a = Z[15]; + + for (j = 15; j > 0; j--) { + Z[j] = Z[j-1]; + } + + Z[0] = R[a][0]; + Z[1] ^= R[a][1]; + } + xorbuf(Z, m[x[0]], AES_BLOCK_SIZE); + + XMEMCPY(x, Z, AES_BLOCK_SIZE); +} + + +void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c, + word32 cSz, byte* s, word32 sSz) +{ + byte x[AES_BLOCK_SIZE]; + byte scratch[AES_BLOCK_SIZE]; + word32 blocks, partial; + + XMEMSET(x, 0, AES_BLOCK_SIZE); + + /* Hash in A, the Additional Authentication Data */ + if (aSz != 0 && a != NULL) { + blocks = aSz / AES_BLOCK_SIZE; + partial = aSz % AES_BLOCK_SIZE; + while (blocks--) { + xorbuf(x, a, AES_BLOCK_SIZE); + GMULT(x, aes->M0); + a += AES_BLOCK_SIZE; + } + if (partial != 0) { + XMEMSET(scratch, 0, AES_BLOCK_SIZE); + XMEMCPY(scratch, a, partial); + xorbuf(x, scratch, AES_BLOCK_SIZE); + GMULT(x, aes->M0); + } + } + + /* Hash in C, the Ciphertext */ + if (cSz != 0 && c != NULL) { + blocks = cSz / AES_BLOCK_SIZE; + partial = cSz % AES_BLOCK_SIZE; + while (blocks--) { + xorbuf(x, c, AES_BLOCK_SIZE); + GMULT(x, aes->M0); + c += AES_BLOCK_SIZE; + } + if (partial != 0) { + XMEMSET(scratch, 0, AES_BLOCK_SIZE); + XMEMCPY(scratch, c, partial); + xorbuf(x, scratch, AES_BLOCK_SIZE); + GMULT(x, aes->M0); + } + } + + /* Hash in the lengths of A and C in bits */ + FlattenSzInBits(&scratch[0], aSz); + FlattenSzInBits(&scratch[8], cSz); + xorbuf(x, scratch, AES_BLOCK_SIZE); + GMULT(x, aes->M0); + + /* Copy the result into s. */ + XMEMCPY(s, x, sSz); +} + +/* end GCM_TABLE */ +#elif defined(WORD64_AVAILABLE) && !defined(GCM_WORD32) + +#if !defined(FREESCALE_LTC_AES_GCM) +static void GMULT(word64* X, word64* Y) +{ + word64 Z[2] = {0,0}; + word64 V[2]; + int i, j; + V[0] = X[0]; V[1] = X[1]; + + for (i = 0; i < 2; i++) + { + word64 y = Y[i]; + for (j = 0; j < 64; j++) + { + if (y & 0x8000000000000000ULL) { + Z[0] ^= V[0]; + Z[1] ^= V[1]; + } + + if (V[1] & 0x0000000000000001) { + V[1] >>= 1; + V[1] |= ((V[0] & 0x0000000000000001) ? + 0x8000000000000000ULL : 0); + V[0] >>= 1; + V[0] ^= 0xE100000000000000ULL; + } + else { + V[1] >>= 1; + V[1] |= ((V[0] & 0x0000000000000001) ? + 0x8000000000000000ULL : 0); + V[0] >>= 1; + } + y <<= 1; + } + } + X[0] = Z[0]; + X[1] = Z[1]; +} + + +void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c, + word32 cSz, byte* s, word32 sSz) +{ + word64 x[2] = {0,0}; + word32 blocks, partial; + word64 bigH[2]; + + XMEMCPY(bigH, aes->H, AES_BLOCK_SIZE); + #ifdef LITTLE_ENDIAN_ORDER + ByteReverseWords64(bigH, bigH, AES_BLOCK_SIZE); + #endif + + /* Hash in A, the Additional Authentication Data */ + if (aSz != 0 && a != NULL) { + word64 bigA[2]; + blocks = aSz / AES_BLOCK_SIZE; + partial = aSz % AES_BLOCK_SIZE; + while (blocks--) { + XMEMCPY(bigA, a, AES_BLOCK_SIZE); + #ifdef LITTLE_ENDIAN_ORDER + ByteReverseWords64(bigA, bigA, AES_BLOCK_SIZE); + #endif + x[0] ^= bigA[0]; + x[1] ^= bigA[1]; + GMULT(x, bigH); + a += AES_BLOCK_SIZE; + } + if (partial != 0) { + XMEMSET(bigA, 0, AES_BLOCK_SIZE); + XMEMCPY(bigA, a, partial); + #ifdef LITTLE_ENDIAN_ORDER + ByteReverseWords64(bigA, bigA, AES_BLOCK_SIZE); + #endif + x[0] ^= bigA[0]; + x[1] ^= bigA[1]; + GMULT(x, bigH); + } +#ifdef OPENSSL_EXTRA + /* store AAD partial tag for next call */ + aes->aadH[0] = (word32)((x[0] & 0xFFFFFFFF00000000) >> 32); + aes->aadH[1] = (word32)(x[0] & 0xFFFFFFFF); + aes->aadH[2] = (word32)((x[1] & 0xFFFFFFFF00000000) >> 32); + aes->aadH[3] = (word32)(x[1] & 0xFFFFFFFF); +#endif + } + + /* Hash in C, the Ciphertext */ + if (cSz != 0 && c != NULL) { + word64 bigC[2]; + blocks = cSz / AES_BLOCK_SIZE; + partial = cSz % AES_BLOCK_SIZE; +#ifdef OPENSSL_EXTRA + /* Start from last AAD partial tag */ + if(aes->aadLen) { + x[0] = ((word64)aes->aadH[0]) << 32 | aes->aadH[1]; + x[1] = ((word64)aes->aadH[2]) << 32 | aes->aadH[3]; + } +#endif + while (blocks--) { + XMEMCPY(bigC, c, AES_BLOCK_SIZE); + #ifdef LITTLE_ENDIAN_ORDER + ByteReverseWords64(bigC, bigC, AES_BLOCK_SIZE); + #endif + x[0] ^= bigC[0]; + x[1] ^= bigC[1]; + GMULT(x, bigH); + c += AES_BLOCK_SIZE; + } + if (partial != 0) { + XMEMSET(bigC, 0, AES_BLOCK_SIZE); + XMEMCPY(bigC, c, partial); + #ifdef LITTLE_ENDIAN_ORDER + ByteReverseWords64(bigC, bigC, AES_BLOCK_SIZE); + #endif + x[0] ^= bigC[0]; + x[1] ^= bigC[1]; + GMULT(x, bigH); + } + } + + /* Hash in the lengths in bits of A and C */ + { + word64 len[2]; + len[0] = aSz; len[1] = cSz; +#ifdef OPENSSL_EXTRA + if (aes->aadLen) + len[0] = (word64)aes->aadLen; +#endif + /* Lengths are in bytes. Convert to bits. */ + len[0] *= 8; + len[1] *= 8; + + x[0] ^= len[0]; + x[1] ^= len[1]; + GMULT(x, bigH); + } + #ifdef LITTLE_ENDIAN_ORDER + ByteReverseWords64(x, x, AES_BLOCK_SIZE); + #endif + XMEMCPY(s, x, sSz); +} +#endif /* !FREESCALE_LTC_AES_GCM */ + +/* end defined(WORD64_AVAILABLE) && !defined(GCM_WORD32) */ +#else /* GCM_WORD32 */ + +static void GMULT(word32* X, word32* Y) +{ + word32 Z[4] = {0,0,0,0}; + word32 V[4]; + int i, j; + + V[0] = X[0]; V[1] = X[1]; V[2] = X[2]; V[3] = X[3]; + + for (i = 0; i < 4; i++) + { + word32 y = Y[i]; + for (j = 0; j < 32; j++) + { + if (y & 0x80000000) { + Z[0] ^= V[0]; + Z[1] ^= V[1]; + Z[2] ^= V[2]; + Z[3] ^= V[3]; + } + + if (V[3] & 0x00000001) { + V[3] >>= 1; + V[3] |= ((V[2] & 0x00000001) ? 0x80000000 : 0); + V[2] >>= 1; + V[2] |= ((V[1] & 0x00000001) ? 0x80000000 : 0); + V[1] >>= 1; + V[1] |= ((V[0] & 0x00000001) ? 0x80000000 : 0); + V[0] >>= 1; + V[0] ^= 0xE1000000; + } else { + V[3] >>= 1; + V[3] |= ((V[2] & 0x00000001) ? 0x80000000 : 0); + V[2] >>= 1; + V[2] |= ((V[1] & 0x00000001) ? 0x80000000 : 0); + V[1] >>= 1; + V[1] |= ((V[0] & 0x00000001) ? 0x80000000 : 0); + V[0] >>= 1; + } + y <<= 1; + } + } + X[0] = Z[0]; + X[1] = Z[1]; + X[2] = Z[2]; + X[3] = Z[3]; +} + + +void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c, + word32 cSz, byte* s, word32 sSz) +{ + word32 x[4] = {0,0,0,0}; + word32 blocks, partial; + word32 bigH[4]; + + XMEMCPY(bigH, aes->H, AES_BLOCK_SIZE); + #ifdef LITTLE_ENDIAN_ORDER + ByteReverseWords(bigH, bigH, AES_BLOCK_SIZE); + #endif + + /* Hash in A, the Additional Authentication Data */ + if (aSz != 0 && a != NULL) { + word32 bigA[4]; + blocks = aSz / AES_BLOCK_SIZE; + partial = aSz % AES_BLOCK_SIZE; + while (blocks--) { + XMEMCPY(bigA, a, AES_BLOCK_SIZE); + #ifdef LITTLE_ENDIAN_ORDER + ByteReverseWords(bigA, bigA, AES_BLOCK_SIZE); + #endif + x[0] ^= bigA[0]; + x[1] ^= bigA[1]; + x[2] ^= bigA[2]; + x[3] ^= bigA[3]; + GMULT(x, bigH); + a += AES_BLOCK_SIZE; + } + if (partial != 0) { + XMEMSET(bigA, 0, AES_BLOCK_SIZE); + XMEMCPY(bigA, a, partial); + #ifdef LITTLE_ENDIAN_ORDER + ByteReverseWords(bigA, bigA, AES_BLOCK_SIZE); + #endif + x[0] ^= bigA[0]; + x[1] ^= bigA[1]; + x[2] ^= bigA[2]; + x[3] ^= bigA[3]; + GMULT(x, bigH); + } + } + + /* Hash in C, the Ciphertext */ + if (cSz != 0 && c != NULL) { + word32 bigC[4]; + blocks = cSz / AES_BLOCK_SIZE; + partial = cSz % AES_BLOCK_SIZE; + while (blocks--) { + XMEMCPY(bigC, c, AES_BLOCK_SIZE); + #ifdef LITTLE_ENDIAN_ORDER + ByteReverseWords(bigC, bigC, AES_BLOCK_SIZE); + #endif + x[0] ^= bigC[0]; + x[1] ^= bigC[1]; + x[2] ^= bigC[2]; + x[3] ^= bigC[3]; + GMULT(x, bigH); + c += AES_BLOCK_SIZE; + } + if (partial != 0) { + XMEMSET(bigC, 0, AES_BLOCK_SIZE); + XMEMCPY(bigC, c, partial); + #ifdef LITTLE_ENDIAN_ORDER + ByteReverseWords(bigC, bigC, AES_BLOCK_SIZE); + #endif + x[0] ^= bigC[0]; + x[1] ^= bigC[1]; + x[2] ^= bigC[2]; + x[3] ^= bigC[3]; + GMULT(x, bigH); + } + } + + /* Hash in the lengths in bits of A and C */ + { + word32 len[4]; + + /* Lengths are in bytes. Convert to bits. */ + len[0] = (aSz >> (8*sizeof(aSz) - 3)); + len[1] = aSz << 3; + len[2] = (cSz >> (8*sizeof(cSz) - 3)); + len[3] = cSz << 3; + + x[0] ^= len[0]; + x[1] ^= len[1]; + x[2] ^= len[2]; + x[3] ^= len[3]; + GMULT(x, bigH); + } + #ifdef LITTLE_ENDIAN_ORDER + ByteReverseWords(x, x, AES_BLOCK_SIZE); + #endif + XMEMCPY(s, x, sSz); +} + +#endif /* end GCM_WORD32 */ + + +#if !defined(WOLFSSL_XILINX_CRYPT) && !defined(WOLFSSL_AFALG_XILINX_AES) +#ifdef FREESCALE_LTC_AES_GCM +int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + status_t status; + word32 keySize; + + /* argument checks */ + if (aes == NULL || authTagSz > AES_BLOCK_SIZE || ivSz == 0) { + return BAD_FUNC_ARG; + } + + if (authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ) { + WOLFSSL_MSG("GcmEncrypt authTagSz too small error"); + return BAD_FUNC_ARG; + } + + status = wc_AesGetKeySize(aes, &keySize); + if (status) + return status; + + status = LTC_AES_EncryptTagGcm(LTC_BASE, in, out, sz, iv, ivSz, + authIn, authInSz, (byte*)aes->key, keySize, authTag, authTagSz); + + return (status == kStatus_Success) ? 0 : AES_GCM_AUTH_E; +} + +#else + +#ifdef STM32_CRYPTO_AES_GCM + +/* this function supports inline encrypt */ +static int wc_AesGcmEncrypt_STM32(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + int ret; +#ifdef WOLFSSL_STM32_CUBEMX + CRYP_HandleTypeDef hcryp; +#else + word32 keyCopy[AES_256_KEY_SIZE/sizeof(word32)]; +#endif + word32 keySize; + int status = HAL_OK; + word32 blocks = sz / AES_BLOCK_SIZE; + word32 partial = sz % AES_BLOCK_SIZE; + byte tag[AES_BLOCK_SIZE]; + byte partialBlock[AES_BLOCK_SIZE]; + byte ctr[AES_BLOCK_SIZE]; + byte* authInPadded = NULL; + int authPadSz; + + ret = wc_AesGetKeySize(aes, &keySize); + if (ret != 0) + return ret; + +#ifdef WOLFSSL_STM32_CUBEMX + ret = wc_Stm32_Aes_Init(aes, &hcryp); + if (ret != 0) + return ret; +#endif + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + + XMEMSET(ctr, 0, AES_BLOCK_SIZE); + if (ivSz == GCM_NONCE_MID_SZ) { + XMEMCPY(ctr, iv, ivSz); + ctr[AES_BLOCK_SIZE - 1] = 1; + } + else { + GHASH(aes, NULL, 0, iv, ivSz, ctr, AES_BLOCK_SIZE); + } + /* Hardware requires counter + 1 */ + IncrementGcmCounter(ctr); + + if (authInSz == 0 || (authInSz % AES_BLOCK_SIZE) != 0) { + /* Need to pad the AAD to a full block with zeros. */ + authPadSz = ((authInSz / AES_BLOCK_SIZE) + 1) * AES_BLOCK_SIZE; + authInPadded = (byte*)XMALLOC(authPadSz, aes->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (authInPadded == NULL) { + wolfSSL_CryptHwMutexUnLock(); + return MEMORY_E; + } + XMEMSET(authInPadded, 0, authPadSz); + XMEMCPY(authInPadded, authIn, authInSz); + } else { + authPadSz = authInSz; + authInPadded = (byte*)authIn; + } + +#ifdef WOLFSSL_STM32_CUBEMX + hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)ctr; + hcryp.Init.Header = (STM_CRYPT_TYPE*)authInPadded; + hcryp.Init.HeaderSize = authInSz; + +#ifdef STM32_CRYPTO_AES_ONLY + /* Set the CRYP parameters */ + hcryp.Init.ChainingMode = CRYP_CHAINMODE_AES_GCM_GMAC; + hcryp.Init.OperatingMode = CRYP_ALGOMODE_ENCRYPT; + hcryp.Init.GCMCMACPhase = CRYP_INIT_PHASE; + HAL_CRYP_Init(&hcryp); + + /* GCM init phase */ + status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, 0, NULL, STM32_HAL_TIMEOUT); + if (status == HAL_OK) { + /* GCM header phase */ + hcryp.Init.GCMCMACPhase = CRYP_HEADER_PHASE; + status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, 0, NULL, STM32_HAL_TIMEOUT); + } + if (status == HAL_OK) { + /* GCM payload phase - blocks */ + hcryp.Init.GCMCMACPhase = CRYP_PAYLOAD_PHASE; + if (blocks) { + status = HAL_CRYPEx_AES_Auth(&hcryp, (byte*)in, + (blocks * AES_BLOCK_SIZE), out, STM32_HAL_TIMEOUT); + } + } + if (status == HAL_OK && (partial != 0 || blocks == 0)) { + /* GCM payload phase - partial remainder */ + XMEMSET(partialBlock, 0, sizeof(partialBlock)); + XMEMCPY(partialBlock, in + (blocks * AES_BLOCK_SIZE), partial); + status = HAL_CRYPEx_AES_Auth(&hcryp, partialBlock, partial, + partialBlock, STM32_HAL_TIMEOUT); + XMEMCPY(out + (blocks * AES_BLOCK_SIZE), partialBlock, partial); + } + if (status == HAL_OK) { + /* GCM final phase */ + hcryp.Init.GCMCMACPhase = CRYP_FINAL_PHASE; + status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, sz, tag, STM32_HAL_TIMEOUT); + } +#elif defined(STM32_HAL_V2) + hcryp.Init.Algorithm = CRYP_AES_GCM; + ByteReverseWords((word32*)partialBlock, (word32*)ctr, AES_BLOCK_SIZE); + hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)partialBlock; + HAL_CRYP_Init(&hcryp); + + /* GCM payload phase - can handle partial blocks */ + status = HAL_CRYP_Encrypt(&hcryp, (uint32_t*)in, + (blocks * AES_BLOCK_SIZE) + partial, (uint32_t*)out, STM32_HAL_TIMEOUT); + if (status == HAL_OK) { + /* Compute the authTag */ + status = HAL_CRYPEx_AESGCM_GenerateAuthTAG(&hcryp, (uint32_t*)tag, + STM32_HAL_TIMEOUT); + } +#else + HAL_CRYP_Init(&hcryp); + if (blocks) { + /* GCM payload phase - blocks */ + status = HAL_CRYPEx_AESGCM_Encrypt(&hcryp, (byte*)in, + (blocks * AES_BLOCK_SIZE), out, STM32_HAL_TIMEOUT); + } + if (status == HAL_OK && (partial != 0 || blocks == 0)) { + /* GCM payload phase - partial remainder */ + XMEMSET(partialBlock, 0, sizeof(partialBlock)); + XMEMCPY(partialBlock, in + (blocks * AES_BLOCK_SIZE), partial); + status = HAL_CRYPEx_AESGCM_Encrypt(&hcryp, partialBlock, partial, + partialBlock, STM32_HAL_TIMEOUT); + XMEMCPY(out + (blocks * AES_BLOCK_SIZE), partialBlock, partial); + } + if (status == HAL_OK) { + /* Compute the authTag */ + status = HAL_CRYPEx_AESGCM_Finish(&hcryp, sz, tag, STM32_HAL_TIMEOUT); + } +#endif + + if (status != HAL_OK) + ret = AES_GCM_AUTH_E; + HAL_CRYP_DeInit(&hcryp); + +#else /* STD_PERI_LIB */ + ByteReverseWords(keyCopy, (word32*)aes->key, keySize); + status = CRYP_AES_GCM(MODE_ENCRYPT, (uint8_t*)ctr, + (uint8_t*)keyCopy, keySize * 8, + (uint8_t*)in, sz, + (uint8_t*)authInPadded, authInSz, + (uint8_t*)out, tag); + if (status != SUCCESS) + ret = AES_GCM_AUTH_E; +#endif /* WOLFSSL_STM32_CUBEMX */ + + if (ret == 0) { + /* return authTag */ + if (authTag) { + /* STM32 GCM won't compute Auth correctly for partial or + when IV != 12, so use software here */ + if (sz == 0 || partial != 0 || ivSz != GCM_NONCE_MID_SZ) { + DecrementGcmCounter(ctr); /* hardware requires +1, so subtract it */ + GHASH(aes, authIn, authInSz, out, sz, authTag, authTagSz); + wc_AesEncrypt(aes, ctr, tag); + xorbuf(authTag, tag, authTagSz); + } + else { + XMEMCPY(authTag, tag, authTagSz); + } + } + } + + /* Free memory if not a multiple of AES_BLOCK_SZ */ + if (authInPadded != authIn) { + XFREE(authInPadded, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); + } + + wolfSSL_CryptHwMutexUnLock(); + + return ret; +} + +#endif /* STM32_CRYPTO_AES_GCM */ + +#ifdef WOLFSSL_AESNI +int AES_GCM_encrypt_C(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz); +#else +static +#endif +int AES_GCM_encrypt_C(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + int ret = 0; + word32 blocks = sz / AES_BLOCK_SIZE; + word32 partial = sz % AES_BLOCK_SIZE; + const byte* p = in; + byte* c = out; + byte counter[AES_BLOCK_SIZE]; + byte initialCounter[AES_BLOCK_SIZE]; + byte *ctr; + byte scratch[AES_BLOCK_SIZE]; +#ifdef OPENSSL_EXTRA + word32 aadTemp; +#endif + ctr = counter; + XMEMSET(initialCounter, 0, AES_BLOCK_SIZE); + XMEMSET(scratch, 0, AES_BLOCK_SIZE); + if (ivSz == GCM_NONCE_MID_SZ) { + XMEMCPY(initialCounter, iv, ivSz); + initialCounter[AES_BLOCK_SIZE - 1] = 1; + } + else { +#ifdef OPENSSL_EXTRA + aadTemp = aes->aadLen; + aes->aadLen = 0; +#endif + GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE); +#ifdef OPENSSL_EXTRA + aes->aadLen = aadTemp; +#endif + } + XMEMCPY(ctr, initialCounter, AES_BLOCK_SIZE); + +#ifdef WOLFSSL_PIC32MZ_CRYPT + if (blocks) { + /* use initial IV for HW, but don't use it below */ + XMEMCPY(aes->reg, ctr, AES_BLOCK_SIZE); + + ret = wc_Pic32AesCrypt( + aes->key, aes->keylen, aes->reg, AES_BLOCK_SIZE, + out, in, (blocks * AES_BLOCK_SIZE), + PIC32_ENCRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_AES_GCM); + if (ret != 0) + return ret; + } + /* process remainder using partial handling */ +#endif + +#if defined(HAVE_AES_ECB) && !defined(WOLFSSL_PIC32MZ_CRYPT) + /* some hardware acceleration can gain performance from doing AES encryption + * of the whole buffer at once */ + if (c != p && blocks > 0) { /* can not handle inline encryption */ + while (blocks--) { + IncrementGcmCounter(ctr); + XMEMCPY(c, ctr, AES_BLOCK_SIZE); + c += AES_BLOCK_SIZE; + } + + /* reset number of blocks and then do encryption */ + blocks = sz / AES_BLOCK_SIZE; + wc_AesEcbEncrypt(aes, out, out, AES_BLOCK_SIZE * blocks); + xorbuf(out, p, AES_BLOCK_SIZE * blocks); + p += AES_BLOCK_SIZE * blocks; + } + else +#endif /* HAVE_AES_ECB && !WOLFSSL_PIC32MZ_CRYPT */ + + while (blocks--) { + IncrementGcmCounter(ctr); + #if !defined(WOLFSSL_PIC32MZ_CRYPT) + wc_AesEncrypt(aes, ctr, scratch); + xorbuf(scratch, p, AES_BLOCK_SIZE); + XMEMCPY(c, scratch, AES_BLOCK_SIZE); + #endif + p += AES_BLOCK_SIZE; + c += AES_BLOCK_SIZE; + } + + if (partial != 0) { + IncrementGcmCounter(ctr); + wc_AesEncrypt(aes, ctr, scratch); + xorbuf(scratch, p, partial); + XMEMCPY(c, scratch, partial); + } + if (authTag) { + GHASH(aes, authIn, authInSz, out, sz, authTag, authTagSz); + wc_AesEncrypt(aes, initialCounter, scratch); + xorbuf(authTag, scratch, authTagSz); +#ifdef OPENSSL_EXTRA + if (!in && !sz) + /* store AAD size for next call */ + aes->aadLen = authInSz; +#endif + } + + return ret; +} + +/* Software AES - GCM Encrypt */ +int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + /* argument checks */ + if (aes == NULL || authTagSz > AES_BLOCK_SIZE || ivSz == 0) { + return BAD_FUNC_ARG; + } + + if (authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ) { + WOLFSSL_MSG("GcmEncrypt authTagSz too small error"); + return BAD_FUNC_ARG; + } + +#ifdef WOLF_CRYPTO_CB + if (aes->devId != INVALID_DEVID) { + int ret = wc_CryptoCb_AesGcmEncrypt(aes, out, in, sz, iv, ivSz, + authTag, authTagSz, authIn, authInSz); + if (ret != CRYPTOCB_UNAVAILABLE) + return ret; + /* fall-through when unavailable */ + } +#endif + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES) + /* if async and byte count above threshold */ + /* only 12-byte IV is supported in HW */ + if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES && + sz >= WC_ASYNC_THRESH_AES_GCM && ivSz == GCM_NONCE_MID_SZ) { + #if defined(HAVE_CAVIUM) + #ifdef HAVE_CAVIUM_V + if (authInSz == 20) { /* Nitrox V GCM is only working with 20 byte AAD */ + return NitroxAesGcmEncrypt(aes, out, in, sz, + (const byte*)aes->devKey, aes->keylen, iv, ivSz, + authTag, authTagSz, authIn, authInSz); + } + #endif + #elif defined(HAVE_INTEL_QA) + return IntelQaSymAesGcmEncrypt(&aes->asyncDev, out, in, sz, + (const byte*)aes->devKey, aes->keylen, iv, ivSz, + authTag, authTagSz, authIn, authInSz); + #else /* WOLFSSL_ASYNC_CRYPT_TEST */ + if (wc_AsyncTestInit(&aes->asyncDev, ASYNC_TEST_AES_GCM_ENCRYPT)) { + WC_ASYNC_TEST* testDev = &aes->asyncDev.test; + testDev->aes.aes = aes; + testDev->aes.out = out; + testDev->aes.in = in; + testDev->aes.sz = sz; + testDev->aes.iv = iv; + testDev->aes.ivSz = ivSz; + testDev->aes.authTag = authTag; + testDev->aes.authTagSz = authTagSz; + testDev->aes.authIn = authIn; + testDev->aes.authInSz = authInSz; + return WC_PENDING_E; + } + #endif + } +#endif /* WOLFSSL_ASYNC_CRYPT */ + +#ifdef STM32_CRYPTO_AES_GCM + /* The STM standard peripheral library API's doesn't support partial blocks */ + #ifdef STD_PERI_LIB + if (partial == 0) + #endif + { + return wc_AesGcmEncrypt_STM32( + aes, out, in, sz, iv, ivSz, + authTag, authTagSz, authIn, authInSz); + } +#endif /* STM32_CRYPTO_AES_GCM */ + +#ifdef WOLFSSL_AESNI + #ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_AVX2(intel_flags)) { + AES_GCM_encrypt_avx2(in, out, authIn, iv, authTag, sz, authInSz, ivSz, + authTagSz, (const byte*)aes->key, aes->rounds); + return 0; + } + else + #endif + #ifdef HAVE_INTEL_AVX1 + if (IS_INTEL_AVX1(intel_flags)) { + AES_GCM_encrypt_avx1(in, out, authIn, iv, authTag, sz, authInSz, ivSz, + authTagSz, (const byte*)aes->key, aes->rounds); + return 0; + } + else + #endif + if (haveAESNI) { + AES_GCM_encrypt(in, out, authIn, iv, authTag, sz, authInSz, ivSz, + authTagSz, (const byte*)aes->key, aes->rounds); + return 0; + } + else +#endif + { + return AES_GCM_encrypt_C(aes, out, in, sz, iv, ivSz, authTag, authTagSz, + authIn, authInSz); + } +} +#endif + + + +/* AES GCM Decrypt */ +#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AESGCM_DECRYPT) +#ifdef FREESCALE_LTC_AES_GCM +int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + int ret; + word32 keySize; + status_t status; + + /* argument checks */ + /* If the sz is non-zero, both in and out must be set. If sz is 0, + * in and out are don't cares, as this is is the GMAC case. */ + if (aes == NULL || iv == NULL || (sz != 0 && (in == NULL || out == NULL)) || + authTag == NULL || authTagSz > AES_BLOCK_SIZE || authTagSz == 0 || + ivSz == 0) { + + return BAD_FUNC_ARG; + } + + ret = wc_AesGetKeySize(aes, &keySize); + if (ret != 0) { + return ret; + } + + status = LTC_AES_DecryptTagGcm(LTC_BASE, in, out, sz, iv, ivSz, + authIn, authInSz, (byte*)aes->key, keySize, authTag, authTagSz); + + return (status == kStatus_Success) ? 0 : AES_GCM_AUTH_E; +} + +#else + +#ifdef STM32_CRYPTO_AES_GCM +/* this function supports inline decrypt */ +static int wc_AesGcmDecrypt_STM32(Aes* aes, byte* out, + const byte* in, word32 sz, + const byte* iv, word32 ivSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + int ret; +#ifdef WOLFSSL_STM32_CUBEMX + CRYP_HandleTypeDef hcryp; +#else + word32 keyCopy[AES_256_KEY_SIZE/sizeof(word32)]; +#endif + word32 keySize; + int status = HAL_OK; + word32 blocks = sz / AES_BLOCK_SIZE; + word32 partial = sz % AES_BLOCK_SIZE; + byte tag[AES_BLOCK_SIZE]; + byte partialBlock[AES_BLOCK_SIZE]; + byte ctr[AES_BLOCK_SIZE]; + byte* authInPadded = NULL; + int authPadSz; + + ret = wc_AesGetKeySize(aes, &keySize); + if (ret != 0) + return ret; + +#ifdef WOLFSSL_STM32_CUBEMX + ret = wc_Stm32_Aes_Init(aes, &hcryp); + if (ret != 0) + return ret; +#endif + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + + XMEMSET(ctr, 0, AES_BLOCK_SIZE); + if (ivSz == GCM_NONCE_MID_SZ) { + XMEMCPY(ctr, iv, ivSz); + ctr[AES_BLOCK_SIZE - 1] = 1; + } + else { + GHASH(aes, NULL, 0, iv, ivSz, ctr, AES_BLOCK_SIZE); + } + /* Hardware requires counter + 1 */ + IncrementGcmCounter(ctr); + + if (authInSz == 0 || (authInSz % AES_BLOCK_SIZE) != 0) { + /* Need to pad the AAD to a full block with zeros. */ + authPadSz = ((authInSz / AES_BLOCK_SIZE) + 1) * AES_BLOCK_SIZE; + authInPadded = (byte*)XMALLOC(authPadSz, aes->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (authInPadded == NULL) { + wolfSSL_CryptHwMutexUnLock(); + return MEMORY_E; + } + XMEMSET(authInPadded, 0, authPadSz); + XMEMCPY(authInPadded, authIn, authInSz); + } else { + authPadSz = authInSz; + authInPadded = (byte*)authIn; + } + +#ifdef WOLFSSL_STM32_CUBEMX + hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)ctr; + hcryp.Init.Header = (STM_CRYPT_TYPE*)authInPadded; + hcryp.Init.HeaderSize = authInSz; + +#ifdef STM32_CRYPTO_AES_ONLY + /* Set the CRYP parameters */ + hcryp.Init.ChainingMode = CRYP_CHAINMODE_AES_GCM_GMAC; + hcryp.Init.OperatingMode = CRYP_ALGOMODE_DECRYPT; + hcryp.Init.GCMCMACPhase = CRYP_INIT_PHASE; + HAL_CRYP_Init(&hcryp); + + /* GCM init phase */ + status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, 0, NULL, STM32_HAL_TIMEOUT); + if (status == HAL_OK) { + /* GCM header phase */ + hcryp.Init.GCMCMACPhase = CRYP_HEADER_PHASE; + status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, 0, NULL, STM32_HAL_TIMEOUT); + } + if (status == HAL_OK) { + /* GCM payload phase - blocks */ + hcryp.Init.GCMCMACPhase = CRYP_PAYLOAD_PHASE; + if (blocks) { + status = HAL_CRYPEx_AES_Auth(&hcryp, (byte*)in, + (blocks * AES_BLOCK_SIZE), out, STM32_HAL_TIMEOUT); + } + } + if (status == HAL_OK && (partial != 0 || blocks == 0)) { + /* GCM payload phase - partial remainder */ + XMEMSET(partialBlock, 0, sizeof(partialBlock)); + XMEMCPY(partialBlock, in + (blocks * AES_BLOCK_SIZE), partial); + status = HAL_CRYPEx_AES_Auth(&hcryp, partialBlock, partial, + partialBlock, STM32_HAL_TIMEOUT); + XMEMCPY(out + (blocks * AES_BLOCK_SIZE), partialBlock, partial); + } + if (status == HAL_OK) { + /* GCM final phase */ + hcryp.Init.GCMCMACPhase = CRYP_FINAL_PHASE; + status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, sz, tag, STM32_HAL_TIMEOUT); + } +#elif defined(STM32_HAL_V2) + hcryp.Init.Algorithm = CRYP_AES_GCM; + ByteReverseWords((word32*)partialBlock, (word32*)ctr, AES_BLOCK_SIZE); + hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)partialBlock; + HAL_CRYP_Init(&hcryp); + + /* GCM payload phase - can handle partial blocks */ + status = HAL_CRYP_Decrypt(&hcryp, (uint32_t*)in, + (blocks * AES_BLOCK_SIZE) + partial, (uint32_t*)out, STM32_HAL_TIMEOUT); + if (status == HAL_OK) { + /* Compute the authTag */ + status = HAL_CRYPEx_AESGCM_GenerateAuthTAG(&hcryp, (uint32_t*)tag, + STM32_HAL_TIMEOUT); + } +#else + HAL_CRYP_Init(&hcryp); + if (blocks) { + /* GCM payload phase - blocks */ + status = HAL_CRYPEx_AESGCM_Decrypt(&hcryp, (byte*)in, + (blocks * AES_BLOCK_SIZE), out, STM32_HAL_TIMEOUT); + } + if (status == HAL_OK && (partial != 0 || blocks == 0)) { + /* GCM payload phase - partial remainder */ + XMEMSET(partialBlock, 0, sizeof(partialBlock)); + XMEMCPY(partialBlock, in + (blocks * AES_BLOCK_SIZE), partial); + status = HAL_CRYPEx_AESGCM_Decrypt(&hcryp, partialBlock, partial, + partialBlock, STM32_HAL_TIMEOUT); + XMEMCPY(out + (blocks * AES_BLOCK_SIZE), partialBlock, partial); + } + if (status == HAL_OK) { + /* Compute the authTag */ + status = HAL_CRYPEx_AESGCM_Finish(&hcryp, sz, tag, STM32_HAL_TIMEOUT); + } +#endif + + if (status != HAL_OK) + ret = AES_GCM_AUTH_E; + + HAL_CRYP_DeInit(&hcryp); + +#else /* STD_PERI_LIB */ + ByteReverseWords(keyCopy, (word32*)aes->key, aes->keylen); + + /* Input size and auth size need to be the actual sizes, even though + * they are not block aligned, because this length (in bits) is used + * in the final GHASH. */ + status = CRYP_AES_GCM(MODE_DECRYPT, (uint8_t*)ctr, + (uint8_t*)keyCopy, keySize * 8, + (uint8_t*)in, sz, + (uint8_t*)authInPadded, authInSz, + (uint8_t*)out, tag); + if (status != SUCCESS) + ret = AES_GCM_AUTH_E; +#endif /* WOLFSSL_STM32_CUBEMX */ + + /* STM32 GCM hardware only supports IV of 12 bytes, so use software for auth */ + if (sz == 0 || ivSz != GCM_NONCE_MID_SZ) { + DecrementGcmCounter(ctr); /* hardware requires +1, so subtract it */ + GHASH(aes, authIn, authInSz, in, sz, tag, sizeof(tag)); + wc_AesEncrypt(aes, ctr, partialBlock); + xorbuf(tag, partialBlock, sizeof(tag)); + } + + if (ConstantCompare(authTag, tag, authTagSz) != 0) { + ret = AES_GCM_AUTH_E; + } + + /* Free memory if not a multiple of AES_BLOCK_SZ */ + if (authInPadded != authIn) { + XFREE(authInPadded, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); + } + + wolfSSL_CryptHwMutexUnLock(); + + return ret; +} + +#endif /* STM32_CRYPTO_AES_GCM */ + +#ifdef WOLFSSL_AESNI +int AES_GCM_decrypt_C(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz); +#else +static +#endif +int AES_GCM_decrypt_C(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + int ret = 0; + word32 blocks = sz / AES_BLOCK_SIZE; + word32 partial = sz % AES_BLOCK_SIZE; + const byte* c = in; + byte* p = out; + byte counter[AES_BLOCK_SIZE]; + byte initialCounter[AES_BLOCK_SIZE]; + byte *ctr; + byte scratch[AES_BLOCK_SIZE]; + byte Tprime[AES_BLOCK_SIZE]; + byte EKY0[AES_BLOCK_SIZE]; +#ifdef OPENSSL_EXTRA + word32 aadTemp; +#endif + ctr = counter; + XMEMSET(initialCounter, 0, AES_BLOCK_SIZE); + if (ivSz == GCM_NONCE_MID_SZ) { + XMEMCPY(initialCounter, iv, ivSz); + initialCounter[AES_BLOCK_SIZE - 1] = 1; + } + else { +#ifdef OPENSSL_EXTRA + aadTemp = aes->aadLen; + aes->aadLen = 0; +#endif + GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE); +#ifdef OPENSSL_EXTRA + aes->aadLen = aadTemp; +#endif + } + XMEMCPY(ctr, initialCounter, AES_BLOCK_SIZE); + + /* Calc the authTag again using the received auth data and the cipher text */ + GHASH(aes, authIn, authInSz, in, sz, Tprime, sizeof(Tprime)); + wc_AesEncrypt(aes, ctr, EKY0); + xorbuf(Tprime, EKY0, sizeof(Tprime)); + +#ifdef OPENSSL_EXTRA + if (!out) { + /* authenticated, non-confidential data */ + /* store AAD size for next call */ + aes->aadLen = authInSz; + } +#endif + if (ConstantCompare(authTag, Tprime, authTagSz) != 0) { + return AES_GCM_AUTH_E; + } + +#if defined(WOLFSSL_PIC32MZ_CRYPT) + if (blocks) { + /* use initial IV for HW, but don't use it below */ + XMEMCPY(aes->reg, ctr, AES_BLOCK_SIZE); + + ret = wc_Pic32AesCrypt( + aes->key, aes->keylen, aes->reg, AES_BLOCK_SIZE, + out, in, (blocks * AES_BLOCK_SIZE), + PIC32_DECRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_AES_GCM); + if (ret != 0) + return ret; + } + /* process remainder using partial handling */ +#endif + +#if defined(HAVE_AES_ECB) && !defined(WOLFSSL_PIC32MZ_CRYPT) + /* some hardware acceleration can gain performance from doing AES encryption + * of the whole buffer at once */ + if (c != p && blocks > 0) { /* can not handle inline decryption */ + while (blocks--) { + IncrementGcmCounter(ctr); + XMEMCPY(p, ctr, AES_BLOCK_SIZE); + p += AES_BLOCK_SIZE; + } + + /* reset number of blocks and then do encryption */ + blocks = sz / AES_BLOCK_SIZE; + + wc_AesEcbEncrypt(aes, out, out, AES_BLOCK_SIZE * blocks); + xorbuf(out, c, AES_BLOCK_SIZE * blocks); + c += AES_BLOCK_SIZE * blocks; + } + else +#endif /* HAVE_AES_ECB && !PIC32MZ */ + while (blocks--) { + IncrementGcmCounter(ctr); + #if !defined(WOLFSSL_PIC32MZ_CRYPT) + wc_AesEncrypt(aes, ctr, scratch); + xorbuf(scratch, c, AES_BLOCK_SIZE); + XMEMCPY(p, scratch, AES_BLOCK_SIZE); + #endif + p += AES_BLOCK_SIZE; + c += AES_BLOCK_SIZE; + } + + if (partial != 0) { + IncrementGcmCounter(ctr); + wc_AesEncrypt(aes, ctr, scratch); + xorbuf(scratch, c, partial); + XMEMCPY(p, scratch, partial); + } + + return ret; +} + +/* Software AES - GCM Decrypt */ +int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ +#ifdef WOLFSSL_AESNI + int res = AES_GCM_AUTH_E; +#endif + + /* argument checks */ + /* If the sz is non-zero, both in and out must be set. If sz is 0, + * in and out are don't cares, as this is is the GMAC case. */ + if (aes == NULL || iv == NULL || (sz != 0 && (in == NULL || out == NULL)) || + authTag == NULL || authTagSz > AES_BLOCK_SIZE || authTagSz == 0 || + ivSz == 0) { + + return BAD_FUNC_ARG; + } + +#ifdef WOLF_CRYPTO_CB + if (aes->devId != INVALID_DEVID) { + int ret = wc_CryptoCb_AesGcmDecrypt(aes, out, in, sz, iv, ivSz, + authTag, authTagSz, authIn, authInSz); + if (ret != CRYPTOCB_UNAVAILABLE) + return ret; + /* fall-through when unavailable */ + } +#endif + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES) + /* if async and byte count above threshold */ + /* only 12-byte IV is supported in HW */ + if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES && + sz >= WC_ASYNC_THRESH_AES_GCM && ivSz == GCM_NONCE_MID_SZ) { + #if defined(HAVE_CAVIUM) + #ifdef HAVE_CAVIUM_V + if (authInSz == 20) { /* Nitrox V GCM is only working with 20 byte AAD */ + return NitroxAesGcmDecrypt(aes, out, in, sz, + (const byte*)aes->devKey, aes->keylen, iv, ivSz, + authTag, authTagSz, authIn, authInSz); + } + #endif + #elif defined(HAVE_INTEL_QA) + return IntelQaSymAesGcmDecrypt(&aes->asyncDev, out, in, sz, + (const byte*)aes->devKey, aes->keylen, iv, ivSz, + authTag, authTagSz, authIn, authInSz); + #else /* WOLFSSL_ASYNC_CRYPT_TEST */ + if (wc_AsyncTestInit(&aes->asyncDev, ASYNC_TEST_AES_GCM_DECRYPT)) { + WC_ASYNC_TEST* testDev = &aes->asyncDev.test; + testDev->aes.aes = aes; + testDev->aes.out = out; + testDev->aes.in = in; + testDev->aes.sz = sz; + testDev->aes.iv = iv; + testDev->aes.ivSz = ivSz; + testDev->aes.authTag = (byte*)authTag; + testDev->aes.authTagSz = authTagSz; + testDev->aes.authIn = authIn; + testDev->aes.authInSz = authInSz; + return WC_PENDING_E; + } + #endif + } +#endif /* WOLFSSL_ASYNC_CRYPT */ + +#ifdef STM32_CRYPTO_AES_GCM + /* The STM standard peripheral library API's doesn't support partial blocks */ + #ifdef STD_PERI_LIB + if (partial == 0) + #endif + { + return wc_AesGcmDecrypt_STM32( + aes, out, in, sz, iv, ivSz, + authTag, authTagSz, authIn, authInSz); + } +#endif /* STM32_CRYPTO_AES_GCM */ + +#ifdef WOLFSSL_AESNI + #ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_AVX2(intel_flags)) { + AES_GCM_decrypt_avx2(in, out, authIn, iv, authTag, sz, authInSz, ivSz, + authTagSz, (byte*)aes->key, aes->rounds, &res); + if (res == 0) + return AES_GCM_AUTH_E; + return 0; + } + else + #endif + #ifdef HAVE_INTEL_AVX1 + if (IS_INTEL_AVX1(intel_flags)) { + AES_GCM_decrypt_avx1(in, out, authIn, iv, authTag, sz, authInSz, ivSz, + authTagSz, (byte*)aes->key, aes->rounds, &res); + if (res == 0) + return AES_GCM_AUTH_E; + return 0; + } + else + #endif + if (haveAESNI) { + AES_GCM_decrypt(in, out, authIn, iv, authTag, sz, authInSz, ivSz, + authTagSz, (byte*)aes->key, aes->rounds, &res); + if (res == 0) + return AES_GCM_AUTH_E; + return 0; + } + else +#endif + { + return AES_GCM_decrypt_C(aes, out, in, sz, iv, ivSz, authTag, authTagSz, + authIn, authInSz); + } +} +#endif +#endif /* HAVE_AES_DECRYPT || HAVE_AESGCM_DECRYPT */ +#endif /* WOLFSSL_XILINX_CRYPT */ +#endif /* end of block for AESGCM implementation selection */ + + +/* Common to all, abstract functions that build off of lower level AESGCM + * functions */ +#ifndef WC_NO_RNG + +int wc_AesGcmSetExtIV(Aes* aes, const byte* iv, word32 ivSz) +{ + int ret = 0; + + if (aes == NULL || iv == NULL || + (ivSz != GCM_NONCE_MIN_SZ && ivSz != GCM_NONCE_MID_SZ && + ivSz != GCM_NONCE_MAX_SZ)) { + + ret = BAD_FUNC_ARG; + } + + if (ret == 0) { + XMEMCPY((byte*)aes->reg, iv, ivSz); + + /* If the IV is 96, allow for a 2^64 invocation counter. + * For any other size for the nonce, limit the invocation + * counter to 32-bits. (SP 800-38D 8.3) */ + aes->invokeCtr[0] = 0; + aes->invokeCtr[1] = (ivSz == GCM_NONCE_MID_SZ) ? 0 : 0xFFFFFFFF; + aes->nonceSz = ivSz; + } + + return ret; +} + + +int wc_AesGcmSetIV(Aes* aes, word32 ivSz, + const byte* ivFixed, word32 ivFixedSz, + WC_RNG* rng) +{ + int ret = 0; + + if (aes == NULL || rng == NULL || + (ivSz != GCM_NONCE_MIN_SZ && ivSz != GCM_NONCE_MID_SZ && + ivSz != GCM_NONCE_MAX_SZ) || + (ivFixed == NULL && ivFixedSz != 0) || + (ivFixed != NULL && ivFixedSz != AES_IV_FIXED_SZ)) { + + ret = BAD_FUNC_ARG; + } + + if (ret == 0) { + byte* iv = (byte*)aes->reg; + + if (ivFixedSz) + XMEMCPY(iv, ivFixed, ivFixedSz); + + ret = wc_RNG_GenerateBlock(rng, iv + ivFixedSz, ivSz - ivFixedSz); + } + + if (ret == 0) { + /* If the IV is 96, allow for a 2^64 invocation counter. + * For any other size for the nonce, limit the invocation + * counter to 32-bits. (SP 800-38D 8.3) */ + aes->invokeCtr[0] = 0; + aes->invokeCtr[1] = (ivSz == GCM_NONCE_MID_SZ) ? 0 : 0xFFFFFFFF; + aes->nonceSz = ivSz; + } + + return ret; +} + + +int wc_AesGcmEncrypt_ex(Aes* aes, byte* out, const byte* in, word32 sz, + byte* ivOut, word32 ivOutSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + int ret = 0; + + if (aes == NULL || (sz != 0 && (in == NULL || out == NULL)) || + ivOut == NULL || ivOutSz != aes->nonceSz || + (authIn == NULL && authInSz != 0)) { + + ret = BAD_FUNC_ARG; + } + + if (ret == 0) { + aes->invokeCtr[0]++; + if (aes->invokeCtr[0] == 0) { + aes->invokeCtr[1]++; + if (aes->invokeCtr[1] == 0) + ret = AES_GCM_OVERFLOW_E; + } + } + + if (ret == 0) { + XMEMCPY(ivOut, aes->reg, ivOutSz); + ret = wc_AesGcmEncrypt(aes, out, in, sz, + (byte*)aes->reg, ivOutSz, + authTag, authTagSz, + authIn, authInSz); + if (ret == 0) + IncCtr((byte*)aes->reg, ivOutSz); + } + + return ret; +} + +int wc_Gmac(const byte* key, word32 keySz, byte* iv, word32 ivSz, + const byte* authIn, word32 authInSz, + byte* authTag, word32 authTagSz, WC_RNG* rng) +{ + Aes aes; + int ret; + + if (key == NULL || iv == NULL || (authIn == NULL && authInSz != 0) || + authTag == NULL || authTagSz == 0 || rng == NULL) { + + return BAD_FUNC_ARG; + } + + ret = wc_AesInit(&aes, NULL, INVALID_DEVID); + if (ret == 0) { + ret = wc_AesGcmSetKey(&aes, key, keySz); + if (ret == 0) + ret = wc_AesGcmSetIV(&aes, ivSz, NULL, 0, rng); + if (ret == 0) + ret = wc_AesGcmEncrypt_ex(&aes, NULL, NULL, 0, iv, ivSz, + authTag, authTagSz, authIn, authInSz); + wc_AesFree(&aes); + } + ForceZero(&aes, sizeof(aes)); + + return ret; +} + +int wc_GmacVerify(const byte* key, word32 keySz, + const byte* iv, word32 ivSz, + const byte* authIn, word32 authInSz, + const byte* authTag, word32 authTagSz) +{ + int ret; +#ifndef NO_AES_DECRYPT + Aes aes; + + if (key == NULL || iv == NULL || (authIn == NULL && authInSz != 0) || + authTag == NULL || authTagSz == 0 || authTagSz > AES_BLOCK_SIZE) { + + return BAD_FUNC_ARG; + } + + ret = wc_AesInit(&aes, NULL, INVALID_DEVID); + if (ret == 0) { + ret = wc_AesGcmSetKey(&aes, key, keySz); + if (ret == 0) + ret = wc_AesGcmDecrypt(&aes, NULL, NULL, 0, iv, ivSz, + authTag, authTagSz, authIn, authInSz); + wc_AesFree(&aes); + } + ForceZero(&aes, sizeof(aes)); +#else + (void)key; + (void)keySz; + (void)iv; + (void)ivSz; + (void)authIn; + (void)authInSz; + (void)authTag; + (void)authTagSz; + ret = NOT_COMPILED_IN; +#endif + return ret; +} + +#endif /* WC_NO_RNG */ + + +WOLFSSL_API int wc_GmacSetKey(Gmac* gmac, const byte* key, word32 len) +{ + if (gmac == NULL || key == NULL) { + return BAD_FUNC_ARG; + } + return wc_AesGcmSetKey(&gmac->aes, key, len); +} + + +WOLFSSL_API int wc_GmacUpdate(Gmac* gmac, const byte* iv, word32 ivSz, + const byte* authIn, word32 authInSz, + byte* authTag, word32 authTagSz) +{ + return wc_AesGcmEncrypt(&gmac->aes, NULL, NULL, 0, iv, ivSz, + authTag, authTagSz, authIn, authInSz); +} + +#endif /* HAVE_AESGCM */ + + +#ifdef HAVE_AESCCM + +int wc_AesCcmSetKey(Aes* aes, const byte* key, word32 keySz) +{ + if (!((keySz == 16) || (keySz == 24) || (keySz == 32))) + return BAD_FUNC_ARG; + + return wc_AesSetKey(aes, key, keySz, NULL, AES_ENCRYPTION); +} + +#ifdef WOLFSSL_ARMASM + /* implementation located in wolfcrypt/src/port/arm/armv8-aes.c */ + +#elif defined(HAVE_COLDFIRE_SEC) + #error "Coldfire SEC doesn't currently support AES-CCM mode" + +#elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES) + /* implemented in wolfcrypt/src/port/caam_aes.c */ + +#elif defined(FREESCALE_LTC) + +/* return 0 on success */ +int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz, + const byte* nonce, word32 nonceSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + byte *key; + uint32_t keySize; + status_t status; + + /* sanity check on arguments */ + if (aes == NULL || out == NULL || in == NULL || nonce == NULL + || authTag == NULL || nonceSz < 7 || nonceSz > 13) + return BAD_FUNC_ARG; + + key = (byte*)aes->key; + + status = wc_AesGetKeySize(aes, &keySize); + if (status != 0) { + return status; + } + + status = LTC_AES_EncryptTagCcm(LTC_BASE, in, out, inSz, + nonce, nonceSz, authIn, authInSz, key, keySize, authTag, authTagSz); + + return (kStatus_Success == status) ? 0 : BAD_FUNC_ARG; +} + +#ifdef HAVE_AES_DECRYPT +int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz, + const byte* nonce, word32 nonceSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + byte *key; + uint32_t keySize; + status_t status; + + /* sanity check on arguments */ + if (aes == NULL || out == NULL || in == NULL || nonce == NULL + || authTag == NULL || nonceSz < 7 || nonceSz > 13) + return BAD_FUNC_ARG; + + key = (byte*)aes->key; + + status = wc_AesGetKeySize(aes, &keySize); + if (status != 0) { + return status; + } + + status = LTC_AES_DecryptTagCcm(LTC_BASE, in, out, inSz, + nonce, nonceSz, authIn, authInSz, key, keySize, authTag, authTagSz); + + if (status == kStatus_Success) { + return 0; + } + else { + XMEMSET(out, 0, inSz); + return AES_CCM_AUTH_E; + } +} +#endif /* HAVE_AES_DECRYPT */ + +#else + +/* Software CCM */ +static void roll_x(Aes* aes, const byte* in, word32 inSz, byte* out) +{ + /* process the bulk of the data */ + while (inSz >= AES_BLOCK_SIZE) { + xorbuf(out, in, AES_BLOCK_SIZE); + in += AES_BLOCK_SIZE; + inSz -= AES_BLOCK_SIZE; + + wc_AesEncrypt(aes, out, out); + } + + /* process remainder of the data */ + if (inSz > 0) { + xorbuf(out, in, inSz); + wc_AesEncrypt(aes, out, out); + } +} + +static void roll_auth(Aes* aes, const byte* in, word32 inSz, byte* out) +{ + word32 authLenSz; + word32 remainder; + + /* encode the length in */ + if (inSz <= 0xFEFF) { + authLenSz = 2; + out[0] ^= ((inSz & 0xFF00) >> 8); + out[1] ^= (inSz & 0x00FF); + } + else if (inSz <= 0xFFFFFFFF) { + authLenSz = 6; + out[0] ^= 0xFF; out[1] ^= 0xFE; + out[2] ^= ((inSz & 0xFF000000) >> 24); + out[3] ^= ((inSz & 0x00FF0000) >> 16); + out[4] ^= ((inSz & 0x0000FF00) >> 8); + out[5] ^= (inSz & 0x000000FF); + } + /* Note, the protocol handles auth data up to 2^64, but we are + * using 32-bit sizes right now, so the bigger data isn't handled + * else if (inSz <= 0xFFFFFFFFFFFFFFFF) {} */ + else + return; + + /* start fill out the rest of the first block */ + remainder = AES_BLOCK_SIZE - authLenSz; + if (inSz >= remainder) { + /* plenty of bulk data to fill the remainder of this block */ + xorbuf(out + authLenSz, in, remainder); + inSz -= remainder; + in += remainder; + } + else { + /* not enough bulk data, copy what is available, and pad zero */ + xorbuf(out + authLenSz, in, inSz); + inSz = 0; + } + wc_AesEncrypt(aes, out, out); + + if (inSz > 0) + roll_x(aes, in, inSz, out); +} + + +static WC_INLINE void AesCcmCtrInc(byte* B, word32 lenSz) +{ + word32 i; + + for (i = 0; i < lenSz; i++) { + if (++B[AES_BLOCK_SIZE - 1 - i] != 0) return; + } +} + +#ifdef WOLFSSL_AESNI +static WC_INLINE void AesCcmCtrIncSet4(byte* B, word32 lenSz) +{ + word32 i; + + /* B+1 = B */ + XMEMCPY(B + AES_BLOCK_SIZE * 1, B, AES_BLOCK_SIZE); + /* B+2,B+3 = B,B+1 */ + XMEMCPY(B + AES_BLOCK_SIZE * 2, B, AES_BLOCK_SIZE * 2); + + for (i = 0; i < lenSz; i++) { + if (++B[AES_BLOCK_SIZE * 1 - 1 - i] != 0) break; + } + B[AES_BLOCK_SIZE * 2 - 1] += 2; + if (B[AES_BLOCK_SIZE * 2 - 1] < 2) { + for (i = 1; i < lenSz; i++) { + if (++B[AES_BLOCK_SIZE * 2 - 1 - i] != 0) break; + } + } + B[AES_BLOCK_SIZE * 3 - 1] += 3; + if (B[AES_BLOCK_SIZE * 3 - 1] < 3) { + for (i = 1; i < lenSz; i++) { + if (++B[AES_BLOCK_SIZE * 3 - 1 - i] != 0) break; + } + } +} + +static WC_INLINE void AesCcmCtrInc4(byte* B, word32 lenSz) +{ + word32 i; + + B[AES_BLOCK_SIZE - 1] += 4; + if (B[AES_BLOCK_SIZE - 1] < 4) { + for (i = 1; i < lenSz; i++) { + if (++B[AES_BLOCK_SIZE - 1 - i] != 0) break; + } + } +} +#endif + +/* Software AES - CCM Encrypt */ +/* return 0 on success */ +int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz, + const byte* nonce, word32 nonceSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ +#ifndef WOLFSSL_AESNI + byte A[AES_BLOCK_SIZE]; + byte B[AES_BLOCK_SIZE]; +#else + ALIGN128 byte A[AES_BLOCK_SIZE * 4]; + ALIGN128 byte B[AES_BLOCK_SIZE * 4]; +#endif + byte lenSz; + word32 i; + byte mask = 0xFF; + const word32 wordSz = (word32)sizeof(word32); + + /* sanity check on arguments */ + if (aes == NULL || out == NULL || in == NULL || nonce == NULL + || authTag == NULL || nonceSz < 7 || nonceSz > 13 || + authTagSz > AES_BLOCK_SIZE) + return BAD_FUNC_ARG; + + XMEMSET(A, 0, sizeof(A)); + XMEMCPY(B+1, nonce, nonceSz); + lenSz = AES_BLOCK_SIZE - 1 - (byte)nonceSz; + B[0] = (authInSz > 0 ? 64 : 0) + + (8 * (((byte)authTagSz - 2) / 2)) + + (lenSz - 1); + for (i = 0; i < lenSz; i++) { + if (mask && i >= wordSz) + mask = 0x00; + B[AES_BLOCK_SIZE - 1 - i] = (inSz >> ((8 * i) & mask)) & mask; + } + + wc_AesEncrypt(aes, B, A); + + if (authInSz > 0) + roll_auth(aes, authIn, authInSz, A); + if (inSz > 0) + roll_x(aes, in, inSz, A); + XMEMCPY(authTag, A, authTagSz); + + B[0] = lenSz - 1; + for (i = 0; i < lenSz; i++) + B[AES_BLOCK_SIZE - 1 - i] = 0; + wc_AesEncrypt(aes, B, A); + xorbuf(authTag, A, authTagSz); + + B[15] = 1; +#ifdef WOLFSSL_AESNI + if (haveAESNI && aes->use_aesni) { + while (inSz >= AES_BLOCK_SIZE * 4) { + AesCcmCtrIncSet4(B, lenSz); + + AES_ECB_encrypt(B, A, AES_BLOCK_SIZE * 4, (byte*)aes->key, + aes->rounds); + xorbuf(A, in, AES_BLOCK_SIZE * 4); + XMEMCPY(out, A, AES_BLOCK_SIZE * 4); + + inSz -= AES_BLOCK_SIZE * 4; + in += AES_BLOCK_SIZE * 4; + out += AES_BLOCK_SIZE * 4; + + if (inSz < AES_BLOCK_SIZE * 4) { + AesCcmCtrInc4(B, lenSz); + } + } + } +#endif + while (inSz >= AES_BLOCK_SIZE) { + wc_AesEncrypt(aes, B, A); + xorbuf(A, in, AES_BLOCK_SIZE); + XMEMCPY(out, A, AES_BLOCK_SIZE); + + AesCcmCtrInc(B, lenSz); + inSz -= AES_BLOCK_SIZE; + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + } + if (inSz > 0) { + wc_AesEncrypt(aes, B, A); + xorbuf(A, in, inSz); + XMEMCPY(out, A, inSz); + } + + ForceZero(A, AES_BLOCK_SIZE); + ForceZero(B, AES_BLOCK_SIZE); + + return 0; +} + +#ifdef HAVE_AES_DECRYPT +/* Software AES - CCM Decrypt */ +int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz, + const byte* nonce, word32 nonceSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ +#ifndef WOLFSSL_AESNI + byte A[AES_BLOCK_SIZE]; + byte B[AES_BLOCK_SIZE]; +#else + ALIGN128 byte B[AES_BLOCK_SIZE * 4]; + ALIGN128 byte A[AES_BLOCK_SIZE * 4]; +#endif + byte* o; + byte lenSz; + word32 i, oSz; + int result = 0; + byte mask = 0xFF; + const word32 wordSz = (word32)sizeof(word32); + + /* sanity check on arguments */ + if (aes == NULL || out == NULL || in == NULL || nonce == NULL + || authTag == NULL || nonceSz < 7 || nonceSz > 13 || + authTagSz > AES_BLOCK_SIZE) + return BAD_FUNC_ARG; + + o = out; + oSz = inSz; + XMEMCPY(B+1, nonce, nonceSz); + lenSz = AES_BLOCK_SIZE - 1 - (byte)nonceSz; + + B[0] = lenSz - 1; + for (i = 0; i < lenSz; i++) + B[AES_BLOCK_SIZE - 1 - i] = 0; + B[15] = 1; + +#ifdef WOLFSSL_AESNI + if (haveAESNI && aes->use_aesni) { + while (oSz >= AES_BLOCK_SIZE * 4) { + AesCcmCtrIncSet4(B, lenSz); + + AES_ECB_encrypt(B, A, AES_BLOCK_SIZE * 4, (byte*)aes->key, + aes->rounds); + xorbuf(A, in, AES_BLOCK_SIZE * 4); + XMEMCPY(o, A, AES_BLOCK_SIZE * 4); + + oSz -= AES_BLOCK_SIZE * 4; + in += AES_BLOCK_SIZE * 4; + o += AES_BLOCK_SIZE * 4; + + if (oSz < AES_BLOCK_SIZE * 4) { + AesCcmCtrInc4(B, lenSz); + } + } + } +#endif + while (oSz >= AES_BLOCK_SIZE) { + wc_AesEncrypt(aes, B, A); + xorbuf(A, in, AES_BLOCK_SIZE); + XMEMCPY(o, A, AES_BLOCK_SIZE); + + AesCcmCtrInc(B, lenSz); + oSz -= AES_BLOCK_SIZE; + in += AES_BLOCK_SIZE; + o += AES_BLOCK_SIZE; + } + if (inSz > 0) { + wc_AesEncrypt(aes, B, A); + xorbuf(A, in, oSz); + XMEMCPY(o, A, oSz); + } + + for (i = 0; i < lenSz; i++) + B[AES_BLOCK_SIZE - 1 - i] = 0; + wc_AesEncrypt(aes, B, A); + + o = out; + oSz = inSz; + + B[0] = (authInSz > 0 ? 64 : 0) + + (8 * (((byte)authTagSz - 2) / 2)) + + (lenSz - 1); + for (i = 0; i < lenSz; i++) { + if (mask && i >= wordSz) + mask = 0x00; + B[AES_BLOCK_SIZE - 1 - i] = (inSz >> ((8 * i) & mask)) & mask; + } + + wc_AesEncrypt(aes, B, A); + + if (authInSz > 0) + roll_auth(aes, authIn, authInSz, A); + if (inSz > 0) + roll_x(aes, o, oSz, A); + + B[0] = lenSz - 1; + for (i = 0; i < lenSz; i++) + B[AES_BLOCK_SIZE - 1 - i] = 0; + wc_AesEncrypt(aes, B, B); + xorbuf(A, B, authTagSz); + + if (ConstantCompare(A, authTag, authTagSz) != 0) { + /* If the authTag check fails, don't keep the decrypted data. + * Unfortunately, you need the decrypted data to calculate the + * check value. */ + XMEMSET(out, 0, inSz); + result = AES_CCM_AUTH_E; + } + + ForceZero(A, AES_BLOCK_SIZE); + ForceZero(B, AES_BLOCK_SIZE); + o = NULL; + + return result; +} + +#endif /* HAVE_AES_DECRYPT */ +#endif /* software CCM */ + +/* abstract functions that call lower level AESCCM functions */ +#ifndef WC_NO_RNG + +int wc_AesCcmSetNonce(Aes* aes, const byte* nonce, word32 nonceSz) +{ + int ret = 0; + + if (aes == NULL || nonce == NULL || + nonceSz < CCM_NONCE_MIN_SZ || nonceSz > CCM_NONCE_MAX_SZ) { + + ret = BAD_FUNC_ARG; + } + + if (ret == 0) { + XMEMCPY(aes->reg, nonce, nonceSz); + aes->nonceSz = nonceSz; + + /* Invocation counter should be 2^61 */ + aes->invokeCtr[0] = 0; + aes->invokeCtr[1] = 0xE0000000; + } + + return ret; +} + + +int wc_AesCcmEncrypt_ex(Aes* aes, byte* out, const byte* in, word32 sz, + byte* ivOut, word32 ivOutSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + int ret = 0; + + if (aes == NULL || out == NULL || + (in == NULL && sz != 0) || + ivOut == NULL || + (authIn == NULL && authInSz != 0) || + (ivOutSz != aes->nonceSz)) { + + ret = BAD_FUNC_ARG; + } + + if (ret == 0) { + aes->invokeCtr[0]++; + if (aes->invokeCtr[0] == 0) { + aes->invokeCtr[1]++; + if (aes->invokeCtr[1] == 0) + ret = AES_CCM_OVERFLOW_E; + } + } + + if (ret == 0) { + ret = wc_AesCcmEncrypt(aes, out, in, sz, + (byte*)aes->reg, aes->nonceSz, + authTag, authTagSz, + authIn, authInSz); + if (ret == 0) { + XMEMCPY(ivOut, aes->reg, aes->nonceSz); + IncCtr((byte*)aes->reg, aes->nonceSz); + } + } + + return ret; +} + +#endif /* WC_NO_RNG */ + +#endif /* HAVE_AESCCM */ + + +/* Initialize Aes for use with async hardware */ +int wc_AesInit(Aes* aes, void* heap, int devId) +{ + int ret = 0; + + if (aes == NULL) + return BAD_FUNC_ARG; + + aes->heap = heap; + +#ifdef WOLF_CRYPTO_CB + aes->devId = devId; + aes->devCtx = NULL; +#else + (void)devId; +#endif +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES) + ret = wolfAsync_DevCtxInit(&aes->asyncDev, WOLFSSL_ASYNC_MARKER_AES, + aes->heap, devId); +#endif /* WOLFSSL_ASYNC_CRYPT */ + +#ifdef WOLFSSL_AFALG + aes->alFd = -1; + aes->rdFd = -1; +#endif +#if defined(WOLFSSL_DEVCRYPTO) && \ + (defined(WOLFSSL_DEVCRYPTO_AES) || defined(WOLFSSL_DEVCRYPTO_CBC)) + aes->ctx.cfd = -1; +#endif +#if defined(WOLFSSL_CRYPTOCELL) && defined(WOLFSSL_CRYPTOCELL_AES) + XMEMSET(&aes->ctx, 0, sizeof(aes->ctx)); +#endif +#ifdef HAVE_AESGCM +#ifdef OPENSSL_EXTRA + XMEMSET(aes->aadH, 0, sizeof(aes->aadH)); + aes->aadLen = 0; +#endif +#endif + return ret; +} + +#ifdef HAVE_PKCS11 +int wc_AesInit_Id(Aes* aes, unsigned char* id, int len, void* heap, int devId) +{ + int ret = 0; + + if (aes == NULL) + ret = BAD_FUNC_ARG; + if (ret == 0 && (len < 0 || len > AES_MAX_ID_LEN)) + ret = BUFFER_E; + + if (ret == 0) + ret = wc_AesInit(aes, heap, devId); + if (ret == 0) { + XMEMCPY(aes->id, id, len); + aes->idLen = len; + } + + return ret; +} +#endif + +/* Free Aes from use with async hardware */ +void wc_AesFree(Aes* aes) +{ + if (aes == NULL) + return; + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES) + wolfAsync_DevCtxFree(&aes->asyncDev, WOLFSSL_ASYNC_MARKER_AES); +#endif /* WOLFSSL_ASYNC_CRYPT */ +#if defined(WOLFSSL_AFALG) || defined(WOLFSSL_AFALG_XILINX_AES) + if (aes->rdFd > 0) { /* negative is error case */ + close(aes->rdFd); + } + if (aes->alFd > 0) { + close(aes->alFd); + } +#endif /* WOLFSSL_AFALG */ +#if defined(WOLFSSL_DEVCRYPTO) && \ + (defined(WOLFSSL_DEVCRYPTO_AES) || defined(WOLFSSL_DEVCRYPTO_CBC)) + wc_DevCryptoFree(&aes->ctx); +#endif +#if defined(WOLF_CRYPTO_CB) || (defined(WOLFSSL_DEVCRYPTO) && \ + (defined(WOLFSSL_DEVCRYPTO_AES) || defined(WOLFSSL_DEVCRYPTO_CBC))) || \ + (defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)) + ForceZero((byte*)aes->devKey, AES_MAX_KEY_SIZE/WOLFSSL_BIT_SIZE); +#endif +} + + +int wc_AesGetKeySize(Aes* aes, word32* keySize) +{ + int ret = 0; + + if (aes == NULL || keySize == NULL) { + return BAD_FUNC_ARG; + } +#if defined(WOLFSSL_CRYPTOCELL) && defined(WOLFSSL_CRYPTOCELL_AES) + *keySize = aes->ctx.key.keySize; + return ret; +#endif + switch (aes->rounds) { +#ifdef WOLFSSL_AES_128 + case 10: + *keySize = 16; + break; +#endif +#ifdef WOLFSSL_AES_192 + case 12: + *keySize = 24; + break; +#endif +#ifdef WOLFSSL_AES_256 + case 14: + *keySize = 32; + break; +#endif + default: + *keySize = 0; + ret = BAD_FUNC_ARG; + } + + return ret; +} + +#endif /* !WOLFSSL_TI_CRYPT */ + +#ifdef HAVE_AES_ECB +#if defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES) + /* implemented in wolfcrypt/src/port/caam/caam_aes.c */ + +#elif defined(WOLFSSL_AFALG) + /* implemented in wolfcrypt/src/port/af_alg/afalg_aes.c */ + +#elif defined(WOLFSSL_DEVCRYPTO_AES) + /* implemented in wolfcrypt/src/port/devcrypt/devcrypto_aes.c */ + +#elif defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_AES) + +/* Software AES - ECB */ +int wc_AesEcbEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + if ((in == NULL) || (out == NULL) || (aes == NULL)) + return BAD_FUNC_ARG; + + return AES_ECB_encrypt(aes, in, out, sz); +} + + +int wc_AesEcbDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + if ((in == NULL) || (out == NULL) || (aes == NULL)) + return BAD_FUNC_ARG; + + return AES_ECB_decrypt(aes, in, out, sz); +} + +#else + +/* Software AES - ECB */ +int wc_AesEcbEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + word32 blocks = sz / AES_BLOCK_SIZE; + + if ((in == NULL) || (out == NULL) || (aes == NULL)) + return BAD_FUNC_ARG; + while (blocks>0) { + wc_AesEncryptDirect(aes, out, in); + out += AES_BLOCK_SIZE; + in += AES_BLOCK_SIZE; + sz -= AES_BLOCK_SIZE; + blocks--; + } + return 0; +} + + +int wc_AesEcbDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + word32 blocks = sz / AES_BLOCK_SIZE; + + if ((in == NULL) || (out == NULL) || (aes == NULL)) + return BAD_FUNC_ARG; + while (blocks>0) { + wc_AesDecryptDirect(aes, out, in); + out += AES_BLOCK_SIZE; + in += AES_BLOCK_SIZE; + sz -= AES_BLOCK_SIZE; + blocks--; + } + return 0; +} +#endif +#endif /* HAVE_AES_ECB */ + +#if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_OFB) +/* Feedback AES mode + * + * aes structure holding key to use for encryption + * out buffer to hold result of encryption (must be at least as large as input + * buffer) + * in buffer to encrypt + * sz size of input buffer + * mode flag to specify AES mode + * + * returns 0 on success and negative error values on failure + */ +/* Software AES - CFB Encrypt */ +static int wc_AesFeedbackEncrypt(Aes* aes, byte* out, const byte* in, + word32 sz, byte mode) +{ + byte* tmp = NULL; +#ifdef WOLFSSL_AES_CFB + byte* reg = NULL; +#endif + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef WOLFSSL_AES_CFB + if (aes->left && sz) { + reg = (byte*)aes->reg + AES_BLOCK_SIZE - aes->left; + } +#endif + + /* consume any unused bytes left in aes->tmp */ + tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left; + while (aes->left && sz) { + *(out) = *(in++) ^ *(tmp++); + #ifdef WOLFSSL_AES_CFB + if (mode == AES_CFB_MODE) { + *(reg++) = *out; + } + #endif + out++; + aes->left--; + sz--; + } + + while (sz >= AES_BLOCK_SIZE) { + /* Using aes->tmp here for inline case i.e. in=out */ + wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg); + #ifdef WOLFSSL_AES_OFB + if (mode == AES_OFB_MODE) { + XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE); + } + #endif + xorbuf((byte*)aes->tmp, in, AES_BLOCK_SIZE); + #ifdef WOLFSSL_AES_CFB + if (mode == AES_CFB_MODE) { + XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE); + } + #endif + XMEMCPY(out, aes->tmp, AES_BLOCK_SIZE); + out += AES_BLOCK_SIZE; + in += AES_BLOCK_SIZE; + sz -= AES_BLOCK_SIZE; + aes->left = 0; + } + + /* encrypt left over data */ + if (sz) { + wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg); + aes->left = AES_BLOCK_SIZE; + tmp = (byte*)aes->tmp; + #ifdef WOLFSSL_AES_OFB + if (mode == AES_OFB_MODE) { + XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE); + } + #endif + #ifdef WOLFSSL_AES_CFB + reg = (byte*)aes->reg; + #endif + + while (sz--) { + *(out) = *(in++) ^ *(tmp++); + #ifdef WOLFSSL_AES_CFB + if (mode == AES_CFB_MODE) { + *(reg++) = *out; + } + #endif + out++; + aes->left--; + } + } + + return 0; +} + + +#ifdef HAVE_AES_DECRYPT +/* CFB 128 + * + * aes structure holding key to use for decryption + * out buffer to hold result of decryption (must be at least as large as input + * buffer) + * in buffer to decrypt + * sz size of input buffer + * + * returns 0 on success and negative error values on failure + */ +/* Software AES - CFB Decrypt */ +static int wc_AesFeedbackDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, + byte mode) +{ + byte* tmp; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + #ifdef WOLFSSL_AES_CFB + /* check if more input needs copied over to aes->reg */ + if (aes->left && sz && mode == AES_CFB_MODE) { + int size = min(aes->left, sz); + XMEMCPY((byte*)aes->reg + AES_BLOCK_SIZE - aes->left, in, size); + } + #endif + + /* consume any unused bytes left in aes->tmp */ + tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left; + while (aes->left && sz) { + *(out++) = *(in++) ^ *(tmp++); + aes->left--; + sz--; + } + + while (sz > AES_BLOCK_SIZE) { + /* Using aes->tmp here for inline case i.e. in=out */ + wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg); + #ifdef WOLFSSL_AES_OFB + if (mode == AES_OFB_MODE) { + XMEMCPY((byte*)aes->reg, (byte*)aes->tmp, AES_BLOCK_SIZE); + } + #endif + xorbuf((byte*)aes->tmp, in, AES_BLOCK_SIZE); + #ifdef WOLFSSL_AES_CFB + if (mode == AES_CFB_MODE) { + XMEMCPY(aes->reg, in, AES_BLOCK_SIZE); + } + #endif + XMEMCPY(out, (byte*)aes->tmp, AES_BLOCK_SIZE); + out += AES_BLOCK_SIZE; + in += AES_BLOCK_SIZE; + sz -= AES_BLOCK_SIZE; + aes->left = 0; + } + + /* decrypt left over data */ + if (sz) { + wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg); + #ifdef WOLFSSL_AES_CFB + if (mode == AES_CFB_MODE) { + XMEMCPY(aes->reg, in, sz); + } + #endif + #ifdef WOLFSSL_AES_OFB + if (mode == AES_OFB_MODE) { + XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE); + } + #endif + + aes->left = AES_BLOCK_SIZE; + tmp = (byte*)aes->tmp; + + while (sz--) { + *(out++) = *(in++) ^ *(tmp++); + aes->left--; + } + } + + return 0; +} +#endif /* HAVE_AES_DECRYPT */ +#endif /* WOLFSSL_AES_CFB */ + +#ifdef WOLFSSL_AES_CFB +/* CFB 128 + * + * aes structure holding key to use for encryption + * out buffer to hold result of encryption (must be at least as large as input + * buffer) + * in buffer to encrypt + * sz size of input buffer + * + * returns 0 on success and negative error values on failure + */ +/* Software AES - CFB Encrypt */ +int wc_AesCfbEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + return wc_AesFeedbackEncrypt(aes, out, in, sz, AES_CFB_MODE); +} + + +#ifdef HAVE_AES_DECRYPT +/* CFB 128 + * + * aes structure holding key to use for decryption + * out buffer to hold result of decryption (must be at least as large as input + * buffer) + * in buffer to decrypt + * sz size of input buffer + * + * returns 0 on success and negative error values on failure + */ +/* Software AES - CFB Decrypt */ +int wc_AesCfbDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + return wc_AesFeedbackDecrypt(aes, out, in, sz, AES_CFB_MODE); +} +#endif /* HAVE_AES_DECRYPT */ + + +/* shift the whole AES_BLOCK_SIZE array left by 8 or 1 bits */ +static void shiftLeftArray(byte* ary, byte shift) +{ + int i; + + if (shift == WOLFSSL_BIT_SIZE) { + /* shifting over by 8 bits */ + for (i = 0; i < AES_BLOCK_SIZE - 1; i++) { + ary[i] = ary[i+1]; + } + ary[i] = 0; + } + else { + byte carry = 0; + + /* shifting over by 7 or less bits */ + for (i = 0; i < AES_BLOCK_SIZE - 1; i++) { + carry = ary[i+1] & (0XFF << (WOLFSSL_BIT_SIZE - shift)); + carry >>= (WOLFSSL_BIT_SIZE - shift); + ary[i] = (ary[i] << shift) + carry; + } + ary[i] = ary[i] << shift; + } +} + + +/* returns 0 on success and negative values on failure */ +static int wc_AesFeedbackCFB8(Aes* aes, byte* out, const byte* in, + word32 sz, byte dir) +{ + byte *pt; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + if (sz == 0) { + return 0; + } + + while (sz > 0) { + wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg); + if (dir == AES_DECRYPTION) { + pt = (byte*)aes->reg; + + /* LSB + CAT */ + shiftLeftArray(pt, WOLFSSL_BIT_SIZE); + pt[AES_BLOCK_SIZE - 1] = in[0]; + } + + /* MSB + XOR */ + out[0] = aes->tmp[0] ^ in[0]; + if (dir == AES_ENCRYPTION) { + pt = (byte*)aes->reg; + + /* LSB + CAT */ + shiftLeftArray(pt, WOLFSSL_BIT_SIZE); + pt[AES_BLOCK_SIZE - 1] = out[0]; + } + + out += 1; + in += 1; + sz -= 1; + } + + return 0; +} + + +/* returns 0 on success and negative values on failure */ +static int wc_AesFeedbackCFB1(Aes* aes, byte* out, const byte* in, + word32 sz, byte dir) +{ + byte tmp; + byte cur = 0; /* hold current work in order to handle inline in=out */ + byte* pt; + int bit = 7; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + if (sz == 0) { + return 0; + } + + while (sz > 0) { + wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg); + if (dir == AES_DECRYPTION) { + pt = (byte*)aes->reg; + + /* LSB + CAT */ + tmp = (0X01 << bit) & in[0]; + tmp = tmp >> bit; + tmp &= 0x01; + shiftLeftArray((byte*)aes->reg, 1); + pt[AES_BLOCK_SIZE - 1] |= tmp; + } + + /* MSB + XOR */ + tmp = (0X01 << bit) & in[0]; + pt = (byte*)aes->tmp; + tmp = (pt[0] >> 7) ^ (tmp >> bit); + tmp &= 0x01; + cur |= (tmp << bit); + + + if (dir == AES_ENCRYPTION) { + pt = (byte*)aes->reg; + + /* LSB + CAT */ + shiftLeftArray((byte*)aes->reg, 1); + pt[AES_BLOCK_SIZE - 1] |= tmp; + } + + bit--; + if (bit < 0) { + out[0] = cur; + out += 1; + in += 1; + sz -= 1; + bit = 7; + cur = 0; + } + else { + sz -= 1; + } + } + + if (bit > 0 && bit < 7) { + out[0] = cur; + } + + return 0; +} + + +/* CFB 1 + * + * aes structure holding key to use for encryption + * out buffer to hold result of encryption (must be at least as large as input + * buffer) + * in buffer to encrypt (packed to left, i.e. 101 is 0x90) + * sz size of input buffer in bits (0x1 would be size of 1 and 0xFF size of 8) + * + * returns 0 on success and negative values on failure + */ +int wc_AesCfb1Encrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + return wc_AesFeedbackCFB1(aes, out, in, sz, AES_ENCRYPTION); +} + + +/* CFB 8 + * + * aes structure holding key to use for encryption + * out buffer to hold result of encryption (must be at least as large as input + * buffer) + * in buffer to encrypt + * sz size of input buffer + * + * returns 0 on success and negative values on failure + */ +int wc_AesCfb8Encrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + return wc_AesFeedbackCFB8(aes, out, in, sz, AES_ENCRYPTION); +} +#ifdef HAVE_AES_DECRYPT + +/* CFB 1 + * + * aes structure holding key to use for encryption + * out buffer to hold result of encryption (must be at least as large as input + * buffer) + * in buffer to encrypt + * sz size of input buffer in bits (0x1 would be size of 1 and 0xFF size of 8) + * + * returns 0 on success and negative values on failure + */ +int wc_AesCfb1Decrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + return wc_AesFeedbackCFB1(aes, out, in, sz, AES_DECRYPTION); +} + + +/* CFB 8 + * + * aes structure holding key to use for encryption + * out buffer to hold result of encryption (must be at least as large as input + * buffer) + * in buffer to encrypt + * sz size of input buffer + * + * returns 0 on success and negative values on failure + */ +int wc_AesCfb8Decrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + return wc_AesFeedbackCFB8(aes, out, in, sz, AES_DECRYPTION); +} +#endif /* HAVE_AES_DECRYPT */ +#endif /* WOLFSSL_AES_CFB */ + +#ifdef WOLFSSL_AES_OFB +/* OFB + * + * aes structure holding key to use for encryption + * out buffer to hold result of encryption (must be at least as large as input + * buffer) + * in buffer to encrypt + * sz size of input buffer + * + * returns 0 on success and negative error values on failure + */ +/* Software AES - CFB Encrypt */ +int wc_AesOfbEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + return wc_AesFeedbackEncrypt(aes, out, in, sz, AES_OFB_MODE); +} + + +#ifdef HAVE_AES_DECRYPT +/* OFB + * + * aes structure holding key to use for decryption + * out buffer to hold result of decryption (must be at least as large as input + * buffer) + * in buffer to decrypt + * sz size of input buffer + * + * returns 0 on success and negative error values on failure + */ +/* Software AES - OFB Decrypt */ +int wc_AesOfbDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + return wc_AesFeedbackDecrypt(aes, out, in, sz, AES_OFB_MODE); +} +#endif /* HAVE_AES_DECRYPT */ +#endif /* WOLFSSL_AES_OFB */ + + +#ifdef HAVE_AES_KEYWRAP + +/* Initialize key wrap counter with value */ +static WC_INLINE void InitKeyWrapCounter(byte* inOutCtr, word32 value) +{ + int i; + word32 bytes; + + bytes = sizeof(word32); + for (i = 0; i < (int)sizeof(word32); i++) { + inOutCtr[i+sizeof(word32)] = (value >> ((bytes - 1) * 8)) & 0xFF; + bytes--; + } +} + +/* Increment key wrap counter */ +static WC_INLINE void IncrementKeyWrapCounter(byte* inOutCtr) +{ + int i; + + /* in network byte order so start at end and work back */ + for (i = KEYWRAP_BLOCK_SIZE - 1; i >= 0; i--) { + if (++inOutCtr[i]) /* we're done unless we overflow */ + return; + } +} + +/* Decrement key wrap counter */ +static WC_INLINE void DecrementKeyWrapCounter(byte* inOutCtr) +{ + int i; + + for (i = KEYWRAP_BLOCK_SIZE - 1; i >= 0; i--) { + if (--inOutCtr[i] != 0xFF) /* we're done unless we underflow */ + return; + } +} + +/* perform AES key wrap (RFC3394), return out sz on success, negative on err */ +int wc_AesKeyWrap(const byte* key, word32 keySz, const byte* in, word32 inSz, + byte* out, word32 outSz, const byte* iv) +{ + Aes aes; + byte* r; + word32 i; + int ret, j; + + byte t[KEYWRAP_BLOCK_SIZE]; + byte tmp[AES_BLOCK_SIZE]; + + /* n must be at least 2, output size is n + 8 bytes */ + if (key == NULL || in == NULL || inSz < 2 || + out == NULL || outSz < (inSz + KEYWRAP_BLOCK_SIZE)) + return BAD_FUNC_ARG; + + /* input must be multiple of 64-bits */ + if (inSz % KEYWRAP_BLOCK_SIZE != 0) + return BAD_FUNC_ARG; + + /* user IV is optional */ + if (iv == NULL) { + XMEMSET(tmp, 0xA6, KEYWRAP_BLOCK_SIZE); + } else { + XMEMCPY(tmp, iv, KEYWRAP_BLOCK_SIZE); + } + + r = out + 8; + XMEMCPY(r, in, inSz); + XMEMSET(t, 0, sizeof(t)); + + ret = wc_AesInit(&aes, NULL, INVALID_DEVID); + if (ret != 0) + return ret; + + ret = wc_AesSetKey(&aes, key, keySz, NULL, AES_ENCRYPTION); + if (ret != 0) + return ret; + + for (j = 0; j <= 5; j++) { + for (i = 1; i <= inSz / KEYWRAP_BLOCK_SIZE; i++) { + + /* load R[i] */ + XMEMCPY(tmp + KEYWRAP_BLOCK_SIZE, r, KEYWRAP_BLOCK_SIZE); + + wc_AesEncryptDirect(&aes, tmp, tmp); + + /* calculate new A */ + IncrementKeyWrapCounter(t); + xorbuf(tmp, t, KEYWRAP_BLOCK_SIZE); + + /* save R[i] */ + XMEMCPY(r, tmp + KEYWRAP_BLOCK_SIZE, KEYWRAP_BLOCK_SIZE); + r += KEYWRAP_BLOCK_SIZE; + } + r = out + KEYWRAP_BLOCK_SIZE; + } + + /* C[0] = A */ + XMEMCPY(out, tmp, KEYWRAP_BLOCK_SIZE); + + wc_AesFree(&aes); + + return inSz + KEYWRAP_BLOCK_SIZE; +} + +int wc_AesKeyUnWrap(const byte* key, word32 keySz, const byte* in, word32 inSz, + byte* out, word32 outSz, const byte* iv) +{ + Aes aes; + byte* r; + word32 i, n; + int ret, j; + + byte t[KEYWRAP_BLOCK_SIZE]; + byte tmp[AES_BLOCK_SIZE]; + + const byte* expIv; + const byte defaultIV[] = { + 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6 + }; + + (void)iv; + + if (key == NULL || in == NULL || inSz < 3 || + out == NULL || outSz < (inSz - KEYWRAP_BLOCK_SIZE)) + return BAD_FUNC_ARG; + + /* input must be multiple of 64-bits */ + if (inSz % KEYWRAP_BLOCK_SIZE != 0) + return BAD_FUNC_ARG; + + /* user IV optional */ + if (iv != NULL) { + expIv = iv; + } else { + expIv = defaultIV; + } + + /* A = C[0], R[i] = C[i] */ + XMEMCPY(tmp, in, KEYWRAP_BLOCK_SIZE); + XMEMCPY(out, in + KEYWRAP_BLOCK_SIZE, inSz - KEYWRAP_BLOCK_SIZE); + XMEMSET(t, 0, sizeof(t)); + + ret = wc_AesInit(&aes, NULL, INVALID_DEVID); + if (ret != 0) + return ret; + + ret = wc_AesSetKey(&aes, key, keySz, NULL, AES_DECRYPTION); + if (ret != 0) + return ret; + + /* initialize counter to 6n */ + n = (inSz - 1) / KEYWRAP_BLOCK_SIZE; + InitKeyWrapCounter(t, 6 * n); + + for (j = 5; j >= 0; j--) { + for (i = n; i >= 1; i--) { + + /* calculate A */ + xorbuf(tmp, t, KEYWRAP_BLOCK_SIZE); + DecrementKeyWrapCounter(t); + + /* load R[i], starting at end of R */ + r = out + ((i - 1) * KEYWRAP_BLOCK_SIZE); + XMEMCPY(tmp + KEYWRAP_BLOCK_SIZE, r, KEYWRAP_BLOCK_SIZE); + wc_AesDecryptDirect(&aes, tmp, tmp); + + /* save R[i] */ + XMEMCPY(r, tmp + KEYWRAP_BLOCK_SIZE, KEYWRAP_BLOCK_SIZE); + } + } + + wc_AesFree(&aes); + + /* verify IV */ + if (XMEMCMP(tmp, expIv, KEYWRAP_BLOCK_SIZE) != 0) + return BAD_KEYWRAP_IV_E; + + return inSz - KEYWRAP_BLOCK_SIZE; +} + +#endif /* HAVE_AES_KEYWRAP */ + +#ifdef WOLFSSL_AES_XTS + +/* Galios Field to use */ +#define GF_XTS 0x87 + +/* This is to help with setting keys to correct encrypt or decrypt type. + * + * tweak AES key for tweak in XTS + * aes AES key for encrypt/decrypt process + * key buffer holding aes key | tweak key + * len length of key buffer in bytes. Should be twice that of key size. i.e. + * 32 for a 16 byte key. + * dir direction, either AES_ENCRYPTION or AES_DECRYPTION + * heap heap hint to use for memory. Can be NULL + * devId id to use with async crypto. Can be 0 + * + * Note: is up to user to call wc_AesFree on tweak and aes key when done. + * + * return 0 on success + */ +int wc_AesXtsSetKey(XtsAes* aes, const byte* key, word32 len, int dir, + void* heap, int devId) +{ + word32 keySz; + int ret = 0; + + if (aes == NULL || key == NULL) { + return BAD_FUNC_ARG; + } + + if ((ret = wc_AesInit(&aes->tweak, heap, devId)) != 0) { + return ret; + } + if ((ret = wc_AesInit(&aes->aes, heap, devId)) != 0) { + return ret; + } + + keySz = len/2; + if (keySz != 16 && keySz != 32) { + WOLFSSL_MSG("Unsupported key size"); + return WC_KEY_SIZE_E; + } + + if ((ret = wc_AesSetKey(&aes->aes, key, keySz, NULL, dir)) == 0) { + ret = wc_AesSetKey(&aes->tweak, key + keySz, keySz, NULL, + AES_ENCRYPTION); + if (ret != 0) { + wc_AesFree(&aes->aes); + } + } + + return ret; +} + + +/* This is used to free up resources used by Aes structs + * + * aes AES keys to free + * + * return 0 on success + */ +int wc_AesXtsFree(XtsAes* aes) +{ + if (aes != NULL) { + wc_AesFree(&aes->aes); + wc_AesFree(&aes->tweak); + } + + return 0; +} + + +/* Same process as wc_AesXtsEncrypt but uses a word64 type as the tweak value + * instead of a byte array. This just converts the word64 to a byte array and + * calls wc_AesXtsEncrypt. + * + * aes AES keys to use for block encrypt/decrypt + * out output buffer to hold cipher text + * in input plain text buffer to encrypt + * sz size of both out and in buffers + * sector value to use for tweak + * + * returns 0 on success + */ +int wc_AesXtsEncryptSector(XtsAes* aes, byte* out, const byte* in, + word32 sz, word64 sector) +{ + byte* pt; + byte i[AES_BLOCK_SIZE]; + + XMEMSET(i, 0, AES_BLOCK_SIZE); +#ifdef BIG_ENDIAN_ORDER + sector = ByteReverseWord64(sector); +#endif + pt = (byte*)§or; + XMEMCPY(i, pt, sizeof(word64)); + + return wc_AesXtsEncrypt(aes, out, in, sz, (const byte*)i, AES_BLOCK_SIZE); +} + + +/* Same process as wc_AesXtsDecrypt but uses a word64 type as the tweak value + * instead of a byte array. This just converts the word64 to a byte array. + * + * aes AES keys to use for block encrypt/decrypt + * out output buffer to hold plain text + * in input cipher text buffer to encrypt + * sz size of both out and in buffers + * sector value to use for tweak + * + * returns 0 on success + */ +int wc_AesXtsDecryptSector(XtsAes* aes, byte* out, const byte* in, word32 sz, + word64 sector) +{ + byte* pt; + byte i[AES_BLOCK_SIZE]; + + XMEMSET(i, 0, AES_BLOCK_SIZE); +#ifdef BIG_ENDIAN_ORDER + sector = ByteReverseWord64(sector); +#endif + pt = (byte*)§or; + XMEMCPY(i, pt, sizeof(word64)); + + return wc_AesXtsDecrypt(aes, out, in, sz, (const byte*)i, AES_BLOCK_SIZE); +} + +#ifdef HAVE_AES_ECB +/* helper function for encrypting / decrypting full buffer at once */ +static int _AesXtsHelper(Aes* aes, byte* out, const byte* in, word32 sz, int dir) +{ + word32 outSz = sz; + word32 totalSz = (sz / AES_BLOCK_SIZE) * AES_BLOCK_SIZE; /* total bytes */ + byte* pt = out; + + outSz -= AES_BLOCK_SIZE; + + while (outSz > 0) { + word32 j; + byte carry = 0; + + /* multiply by shift left and propagate carry */ + for (j = 0; j < AES_BLOCK_SIZE && outSz > 0; j++, outSz--) { + byte tmpC; + + tmpC = (pt[j] >> 7) & 0x01; + pt[j+AES_BLOCK_SIZE] = ((pt[j] << 1) + carry) & 0xFF; + carry = tmpC; + } + if (carry) { + pt[AES_BLOCK_SIZE] ^= GF_XTS; + } + + pt += AES_BLOCK_SIZE; + } + + xorbuf(out, in, totalSz); + if (dir == AES_ENCRYPTION) { + return wc_AesEcbEncrypt(aes, out, out, totalSz); + } + else { + return wc_AesEcbDecrypt(aes, out, out, totalSz); + } +} +#endif /* HAVE_AES_ECB */ + + +/* AES with XTS mode. (XTS) XEX encryption with Tweak and cipher text Stealing. + * + * xaes AES keys to use for block encrypt/decrypt + * out output buffer to hold cipher text + * in input plain text buffer to encrypt + * sz size of both out and in buffers + * i value to use for tweak + * iSz size of i buffer, should always be AES_BLOCK_SIZE but having this input + * adds a sanity check on how the user calls the function. + * + * returns 0 on success + */ +/* Software AES - XTS Encrypt */ +int wc_AesXtsEncrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz, + const byte* i, word32 iSz) +{ + int ret = 0; + word32 blocks = (sz / AES_BLOCK_SIZE); + Aes *aes, *tweak; + + if (xaes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + aes = &xaes->aes; + tweak = &xaes->tweak; + + if (iSz < AES_BLOCK_SIZE) { + return BAD_FUNC_ARG; + } + + if (blocks > 0) { + byte tmp[AES_BLOCK_SIZE]; + + XMEMSET(tmp, 0, AES_BLOCK_SIZE); /* set to 0's in case of improper AES + * key setup passed to encrypt direct*/ + + wc_AesEncryptDirect(tweak, tmp, i); + + #ifdef HAVE_AES_ECB + /* encrypt all of buffer at once when possible */ + if (in != out) { /* can not handle inline */ + XMEMCPY(out, tmp, AES_BLOCK_SIZE); + if ((ret = _AesXtsHelper(aes, out, in, sz, AES_ENCRYPTION)) != 0) { + return ret; + } + } + #endif + + while (blocks > 0) { + word32 j; + byte carry = 0; + byte buf[AES_BLOCK_SIZE]; + + #ifdef HAVE_AES_ECB + if (in == out) { /* check for if inline */ + #endif + XMEMCPY(buf, in, AES_BLOCK_SIZE); + xorbuf(buf, tmp, AES_BLOCK_SIZE); + wc_AesEncryptDirect(aes, out, buf); + #ifdef HAVE_AES_ECB + } + #endif + xorbuf(out, tmp, AES_BLOCK_SIZE); + + /* multiply by shift left and propagate carry */ + for (j = 0; j < AES_BLOCK_SIZE; j++) { + byte tmpC; + + tmpC = (tmp[j] >> 7) & 0x01; + tmp[j] = ((tmp[j] << 1) + carry) & 0xFF; + carry = tmpC; + } + if (carry) { + tmp[0] ^= GF_XTS; + } + + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + sz -= AES_BLOCK_SIZE; + blocks--; + } + + /* stealing operation of XTS to handle left overs */ + if (sz > 0) { + byte buf[AES_BLOCK_SIZE]; + + XMEMCPY(buf, out - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + if (sz >= AES_BLOCK_SIZE) { /* extra sanity check before copy */ + return BUFFER_E; + } + XMEMCPY(out, buf, sz); + XMEMCPY(buf, in, sz); + + xorbuf(buf, tmp, AES_BLOCK_SIZE); + wc_AesEncryptDirect(aes, out - AES_BLOCK_SIZE, buf); + xorbuf(out - AES_BLOCK_SIZE, tmp, AES_BLOCK_SIZE); + } + } + else { + WOLFSSL_MSG("Plain text input too small for encryption"); + return BAD_FUNC_ARG; + } + + return ret; +} + + +/* Same process as encryption but Aes key is AES_DECRYPTION type. + * + * xaes AES keys to use for block encrypt/decrypt + * out output buffer to hold plain text + * in input cipher text buffer to decrypt + * sz size of both out and in buffers + * i value to use for tweak + * iSz size of i buffer, should always be AES_BLOCK_SIZE but having this input + * adds a sanity check on how the user calls the function. + * + * returns 0 on success + */ +/* Software AES - XTS Decrypt */ +int wc_AesXtsDecrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz, + const byte* i, word32 iSz) +{ + int ret = 0; + word32 blocks = (sz / AES_BLOCK_SIZE); + Aes *aes, *tweak; + + if (xaes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + aes = &xaes->aes; + tweak = &xaes->tweak; + + if (iSz < AES_BLOCK_SIZE) { + return BAD_FUNC_ARG; + } + + if (blocks > 0) { + word32 j; + byte carry = 0; + byte tmp[AES_BLOCK_SIZE]; + byte stl = (sz % AES_BLOCK_SIZE); + + XMEMSET(tmp, 0, AES_BLOCK_SIZE); /* set to 0's in case of improper AES + * key setup passed to decrypt direct*/ + + wc_AesEncryptDirect(tweak, tmp, i); + + /* if Stealing then break out of loop one block early to handle special + * case */ + if (stl > 0) { + blocks--; + } + + #ifdef HAVE_AES_ECB + /* decrypt all of buffer at once when possible */ + if (in != out) { /* can not handle inline */ + XMEMCPY(out, tmp, AES_BLOCK_SIZE); + if ((ret = _AesXtsHelper(aes, out, in, sz, AES_DECRYPTION)) != 0) { + return ret; + } + } + #endif + + while (blocks > 0) { + byte buf[AES_BLOCK_SIZE]; + + #ifdef HAVE_AES_ECB + if (in == out) { /* check for if inline */ + #endif + XMEMCPY(buf, in, AES_BLOCK_SIZE); + xorbuf(buf, tmp, AES_BLOCK_SIZE); + wc_AesDecryptDirect(aes, out, buf); + #ifdef HAVE_AES_ECB + } + #endif + xorbuf(out, tmp, AES_BLOCK_SIZE); + + /* multiply by shift left and propagate carry */ + for (j = 0; j < AES_BLOCK_SIZE; j++) { + byte tmpC; + + tmpC = (tmp[j] >> 7) & 0x01; + tmp[j] = ((tmp[j] << 1) + carry) & 0xFF; + carry = tmpC; + } + if (carry) { + tmp[0] ^= GF_XTS; + } + carry = 0; + + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + sz -= AES_BLOCK_SIZE; + blocks--; + } + + /* stealing operation of XTS to handle left overs */ + if (sz > 0) { + byte buf[AES_BLOCK_SIZE]; + byte tmp2[AES_BLOCK_SIZE]; + + /* multiply by shift left and propagate carry */ + for (j = 0; j < AES_BLOCK_SIZE; j++) { + byte tmpC; + + tmpC = (tmp[j] >> 7) & 0x01; + tmp2[j] = ((tmp[j] << 1) + carry) & 0xFF; + carry = tmpC; + } + if (carry) { + tmp2[0] ^= GF_XTS; + } + + XMEMCPY(buf, in, AES_BLOCK_SIZE); + xorbuf(buf, tmp2, AES_BLOCK_SIZE); + wc_AesDecryptDirect(aes, out, buf); + xorbuf(out, tmp2, AES_BLOCK_SIZE); + + /* tmp2 holds partial | last */ + XMEMCPY(tmp2, out, AES_BLOCK_SIZE); + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + sz -= AES_BLOCK_SIZE; + + /* Make buffer with end of cipher text | last */ + XMEMCPY(buf, tmp2, AES_BLOCK_SIZE); + if (sz >= AES_BLOCK_SIZE) { /* extra sanity check before copy */ + return BUFFER_E; + } + XMEMCPY(buf, in, sz); + XMEMCPY(out, tmp2, sz); + + xorbuf(buf, tmp, AES_BLOCK_SIZE); + wc_AesDecryptDirect(aes, tmp2, buf); + xorbuf(tmp2, tmp, AES_BLOCK_SIZE); + XMEMCPY(out - AES_BLOCK_SIZE, tmp2, AES_BLOCK_SIZE); + } + } + else { + WOLFSSL_MSG("Plain text input too small for encryption"); + return BAD_FUNC_ARG; + } + + return ret; +} + +#endif /* WOLFSSL_AES_XTS */ + +#endif /* HAVE_FIPS */ +#endif /* !NO_AES */ diff --git a/client/wolfssl/wolfcrypt/src/aes_asm.S b/client/wolfssl/wolfcrypt/src/aes_asm.S new file mode 100644 index 0000000..ae1c801 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/aes_asm.S @@ -0,0 +1,1338 @@ +/* aes_asm.S + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +/* This file is in at&t asm syntax, see .asm for intel syntax */ + +/* See Intel® Advanced Encryption Standard (AES) Instructions Set White Paper + * by Intel Mobility Group, Israel Development Center, Israel Shay Gueron + */ + + +/* +AES_CBC_encrypt (const unsigned char *in, + unsigned char *out, + unsigned char ivec[16], + unsigned long length, + const unsigned char *KS, + int nr) +*/ +#ifndef __APPLE__ +.globl AES_CBC_encrypt +AES_CBC_encrypt: +#else +.globl _AES_CBC_encrypt +_AES_CBC_encrypt: +#endif +# parameter 1: %rdi +# parameter 2: %rsi +# parameter 3: %rdx +# parameter 4: %rcx +# parameter 5: %r8 +# parameter 6: %r9d +movq %rcx, %r10 +shrq $4, %rcx +shlq $60, %r10 +je NO_PARTS +addq $1, %rcx +NO_PARTS: +subq $16, %rsi +movdqa (%rdx), %xmm1 +LOOP: +pxor (%rdi), %xmm1 +pxor (%r8), %xmm1 +addq $16,%rsi +addq $16,%rdi +cmpl $12, %r9d +aesenc 16(%r8),%xmm1 +aesenc 32(%r8),%xmm1 +aesenc 48(%r8),%xmm1 +aesenc 64(%r8),%xmm1 +aesenc 80(%r8),%xmm1 +aesenc 96(%r8),%xmm1 +aesenc 112(%r8),%xmm1 +aesenc 128(%r8),%xmm1 +aesenc 144(%r8),%xmm1 +movdqa 160(%r8),%xmm2 +jb LAST +cmpl $14, %r9d + +aesenc 160(%r8),%xmm1 +aesenc 176(%r8),%xmm1 +movdqa 192(%r8),%xmm2 +jb LAST +aesenc 192(%r8),%xmm1 +aesenc 208(%r8),%xmm1 +movdqa 224(%r8),%xmm2 +LAST: +decq %rcx +aesenclast %xmm2,%xmm1 +movdqu %xmm1,(%rsi) +jne LOOP +ret + + +#if defined(WOLFSSL_AESNI_BY4) + +/* +AES_CBC_decrypt_by4 (const unsigned char *in, + unsigned char *out, + unsigned char ivec[16], + unsigned long length, + const unsigned char *KS, + int nr) +*/ +#ifndef __APPLE__ +.globl AES_CBC_decrypt_by4 +AES_CBC_decrypt_by4: +#else +.globl _AES_CBC_decrypt_by4 +_AES_CBC_decrypt_by4: +#endif +# parameter 1: %rdi +# parameter 2: %rsi +# parameter 3: %rdx +# parameter 4: %rcx +# parameter 5: %r8 +# parameter 6: %r9d + + movq %rcx, %r10 + shrq $4, %rcx + shlq $60, %r10 + je DNO_PARTS_4 + addq $1, %rcx +DNO_PARTS_4: + movq %rcx, %r10 + shlq $62, %r10 + shrq $62, %r10 + shrq $2, %rcx + movdqu (%rdx),%xmm5 + je DREMAINDER_4 + subq $64, %rsi +DLOOP_4: + movdqu (%rdi), %xmm1 + movdqu 16(%rdi), %xmm2 + movdqu 32(%rdi), %xmm3 + movdqu 48(%rdi), %xmm4 + movdqa %xmm1, %xmm6 + movdqa %xmm2, %xmm7 + movdqa %xmm3, %xmm8 + movdqa %xmm4, %xmm15 + movdqa (%r8), %xmm9 + movdqa 16(%r8), %xmm10 + movdqa 32(%r8), %xmm11 + movdqa 48(%r8), %xmm12 + pxor %xmm9, %xmm1 + pxor %xmm9, %xmm2 + pxor %xmm9, %xmm3 + pxor %xmm9, %xmm4 + aesdec %xmm10, %xmm1 + aesdec %xmm10, %xmm2 + aesdec %xmm10, %xmm3 + aesdec %xmm10, %xmm4 + aesdec %xmm11, %xmm1 + aesdec %xmm11, %xmm2 + aesdec %xmm11, %xmm3 + aesdec %xmm11, %xmm4 + aesdec %xmm12, %xmm1 + aesdec %xmm12, %xmm2 + aesdec %xmm12, %xmm3 + aesdec %xmm12, %xmm4 + movdqa 64(%r8), %xmm9 + movdqa 80(%r8), %xmm10 + movdqa 96(%r8), %xmm11 + movdqa 112(%r8), %xmm12 + aesdec %xmm9, %xmm1 + aesdec %xmm9, %xmm2 + aesdec %xmm9, %xmm3 + aesdec %xmm9, %xmm4 + aesdec %xmm10, %xmm1 + aesdec %xmm10, %xmm2 + aesdec %xmm10, %xmm3 + aesdec %xmm10, %xmm4 + aesdec %xmm11, %xmm1 + aesdec %xmm11, %xmm2 + aesdec %xmm11, %xmm3 + aesdec %xmm11, %xmm4 + aesdec %xmm12, %xmm1 + aesdec %xmm12, %xmm2 + aesdec %xmm12, %xmm3 + aesdec %xmm12, %xmm4 + movdqa 128(%r8), %xmm9 + movdqa 144(%r8), %xmm10 + movdqa 160(%r8), %xmm11 + cmpl $12, %r9d + aesdec %xmm9, %xmm1 + aesdec %xmm9, %xmm2 + aesdec %xmm9, %xmm3 + aesdec %xmm9, %xmm4 + aesdec %xmm10, %xmm1 + aesdec %xmm10, %xmm2 + aesdec %xmm10, %xmm3 + aesdec %xmm10, %xmm4 + jb DLAST_4 + movdqa 160(%r8), %xmm9 + movdqa 176(%r8), %xmm10 + movdqa 192(%r8), %xmm11 + cmpl $14, %r9d + aesdec %xmm9, %xmm1 + aesdec %xmm9, %xmm2 + aesdec %xmm9, %xmm3 + aesdec %xmm9, %xmm4 + aesdec %xmm10, %xmm1 + aesdec %xmm10, %xmm2 + aesdec %xmm10, %xmm3 + aesdec %xmm10, %xmm4 + jb DLAST_4 + movdqa 192(%r8), %xmm9 + movdqa 208(%r8), %xmm10 + movdqa 224(%r8), %xmm11 + aesdec %xmm9, %xmm1 + aesdec %xmm9, %xmm2 + aesdec %xmm9, %xmm3 + aesdec %xmm9, %xmm4 + aesdec %xmm10, %xmm1 + aesdec %xmm10, %xmm2 + aesdec %xmm10, %xmm3 + aesdec %xmm10, %xmm4 +DLAST_4: + addq $64, %rdi + addq $64, %rsi + decq %rcx + aesdeclast %xmm11, %xmm1 + aesdeclast %xmm11, %xmm2 + aesdeclast %xmm11, %xmm3 + aesdeclast %xmm11, %xmm4 + pxor %xmm5, %xmm1 + pxor %xmm6, %xmm2 + pxor %xmm7, %xmm3 + pxor %xmm8, %xmm4 + movdqu %xmm1, (%rsi) + movdqu %xmm2, 16(%rsi) + movdqu %xmm3, 32(%rsi) + movdqu %xmm4, 48(%rsi) + movdqa %xmm15,%xmm5 + jne DLOOP_4 + addq $64, %rsi +DREMAINDER_4: + cmpq $0, %r10 + je DEND_4 +DLOOP_4_2: + movdqu (%rdi), %xmm1 + movdqa %xmm1, %xmm15 + addq $16, %rdi + pxor (%r8), %xmm1 + movdqu 160(%r8), %xmm2 + cmpl $12, %r9d + aesdec 16(%r8), %xmm1 + aesdec 32(%r8), %xmm1 + aesdec 48(%r8), %xmm1 + aesdec 64(%r8), %xmm1 + aesdec 80(%r8), %xmm1 + aesdec 96(%r8), %xmm1 + aesdec 112(%r8), %xmm1 + aesdec 128(%r8), %xmm1 + aesdec 144(%r8), %xmm1 + jb DLAST_4_2 + movdqu 192(%r8), %xmm2 + cmpl $14, %r9d + aesdec 160(%r8), %xmm1 + aesdec 176(%r8), %xmm1 + jb DLAST_4_2 + movdqu 224(%r8), %xmm2 + aesdec 192(%r8), %xmm1 + aesdec 208(%r8), %xmm1 +DLAST_4_2: + aesdeclast %xmm2, %xmm1 + pxor %xmm5, %xmm1 + movdqa %xmm15, %xmm5 + movdqu %xmm1, (%rsi) + addq $16, %rsi + decq %r10 + jne DLOOP_4_2 +DEND_4: + ret + +#elif defined(WOLFSSL_AESNI_BY6) + +/* +AES_CBC_decrypt_by6 (const unsigned char *in, + unsigned char *out, + unsigned char ivec[16], + unsigned long length, + const unsigned char *KS, + int nr) +*/ +#ifndef __APPLE__ +.globl AES_CBC_decrypt_by6 +AES_CBC_decrypt_by6: +#else +.globl _AES_CBC_decrypt_by6 +_AES_CBC_decrypt_by6: +#endif +# parameter 1: %rdi - in +# parameter 2: %rsi - out +# parameter 3: %rdx - ivec +# parameter 4: %rcx - length +# parameter 5: %r8 - KS +# parameter 6: %r9d - nr + + movq %rcx, %r10 + shrq $4, %rcx + shlq $60, %r10 + je DNO_PARTS_6 + addq $1, %rcx +DNO_PARTS_6: + movq %rax, %r12 + movq %rdx, %r13 + movq %rbx, %r14 + movq $0, %rdx + movq %rcx, %rax + movq $6, %rbx + div %rbx + movq %rax, %rcx + movq %rdx, %r10 + movq %r12, %rax + movq %r13, %rdx + movq %r14, %rbx + cmpq $0, %rcx + movdqu (%rdx), %xmm7 + je DREMAINDER_6 + subq $96, %rsi +DLOOP_6: + movdqu (%rdi), %xmm1 + movdqu 16(%rdi), %xmm2 + movdqu 32(%rdi), %xmm3 + movdqu 48(%rdi), %xmm4 + movdqu 64(%rdi), %xmm5 + movdqu 80(%rdi), %xmm6 + movdqa (%r8), %xmm8 + movdqa 16(%r8), %xmm9 + movdqa 32(%r8), %xmm10 + movdqa 48(%r8), %xmm11 + pxor %xmm8, %xmm1 + pxor %xmm8, %xmm2 + pxor %xmm8, %xmm3 + pxor %xmm8, %xmm4 + pxor %xmm8, %xmm5 + pxor %xmm8, %xmm6 + aesdec %xmm9, %xmm1 + aesdec %xmm9, %xmm2 + aesdec %xmm9, %xmm3 + aesdec %xmm9, %xmm4 + aesdec %xmm9, %xmm5 + aesdec %xmm9, %xmm6 + aesdec %xmm10, %xmm1 + aesdec %xmm10, %xmm2 + aesdec %xmm10, %xmm3 + aesdec %xmm10, %xmm4 + aesdec %xmm10, %xmm5 + aesdec %xmm10, %xmm6 + aesdec %xmm11, %xmm1 + aesdec %xmm11, %xmm2 + aesdec %xmm11, %xmm3 + aesdec %xmm11, %xmm4 + aesdec %xmm11, %xmm5 + aesdec %xmm11, %xmm6 + movdqa 64(%r8), %xmm8 + movdqa 80(%r8), %xmm9 + movdqa 96(%r8), %xmm10 + movdqa 112(%r8), %xmm11 + aesdec %xmm8, %xmm1 + aesdec %xmm8, %xmm2 + aesdec %xmm8, %xmm3 + aesdec %xmm8, %xmm4 + aesdec %xmm8, %xmm5 + aesdec %xmm8, %xmm6 + aesdec %xmm9, %xmm1 + aesdec %xmm9, %xmm2 + aesdec %xmm9, %xmm3 + aesdec %xmm9, %xmm4 + aesdec %xmm9, %xmm5 + aesdec %xmm9, %xmm6 + aesdec %xmm10, %xmm1 + aesdec %xmm10, %xmm2 + aesdec %xmm10, %xmm3 + aesdec %xmm10, %xmm4 + aesdec %xmm10, %xmm5 + aesdec %xmm10, %xmm6 + aesdec %xmm11, %xmm1 + aesdec %xmm11, %xmm2 + aesdec %xmm11, %xmm3 + aesdec %xmm11, %xmm4 + aesdec %xmm11, %xmm5 + aesdec %xmm11, %xmm6 + movdqa 128(%r8), %xmm8 + movdqa 144(%r8), %xmm9 + movdqa 160(%r8), %xmm10 + cmpl $12, %r9d + aesdec %xmm8, %xmm1 + aesdec %xmm8, %xmm2 + aesdec %xmm8, %xmm3 + aesdec %xmm8, %xmm4 + aesdec %xmm8, %xmm5 + aesdec %xmm8, %xmm6 + aesdec %xmm9, %xmm1 + aesdec %xmm9, %xmm2 + aesdec %xmm9, %xmm3 + aesdec %xmm9, %xmm4 + aesdec %xmm9, %xmm5 + aesdec %xmm9, %xmm6 + jb DLAST_6 + movdqa 160(%r8), %xmm8 + movdqa 176(%r8), %xmm9 + movdqa 192(%r8), %xmm10 + cmpl $14, %r9d + aesdec %xmm8, %xmm1 + aesdec %xmm8, %xmm2 + aesdec %xmm8, %xmm3 + aesdec %xmm8, %xmm4 + aesdec %xmm8, %xmm5 + aesdec %xmm8, %xmm6 + aesdec %xmm9, %xmm1 + aesdec %xmm9, %xmm2 + aesdec %xmm9, %xmm3 + aesdec %xmm9, %xmm4 + aesdec %xmm9, %xmm5 + aesdec %xmm9, %xmm6 + jb DLAST_6 + movdqa 192(%r8), %xmm8 + movdqa 208(%r8), %xmm9 + movdqa 224(%r8), %xmm10 + aesdec %xmm8, %xmm1 + aesdec %xmm8, %xmm2 + aesdec %xmm8, %xmm3 + aesdec %xmm8, %xmm4 + aesdec %xmm8, %xmm5 + aesdec %xmm8, %xmm6 + aesdec %xmm9, %xmm1 + aesdec %xmm9, %xmm2 + aesdec %xmm9, %xmm3 + aesdec %xmm9, %xmm4 + aesdec %xmm9, %xmm5 + aesdec %xmm9, %xmm6 +DLAST_6: + addq $96, %rsi + aesdeclast %xmm10, %xmm1 + aesdeclast %xmm10, %xmm2 + aesdeclast %xmm10, %xmm3 + aesdeclast %xmm10, %xmm4 + aesdeclast %xmm10, %xmm5 + aesdeclast %xmm10, %xmm6 + movdqu (%rdi), %xmm8 + movdqu 16(%rdi), %xmm9 + movdqu 32(%rdi), %xmm10 + movdqu 48(%rdi), %xmm11 + movdqu 64(%rdi), %xmm12 + movdqu 80(%rdi), %xmm13 + pxor %xmm7, %xmm1 + pxor %xmm8, %xmm2 + pxor %xmm9, %xmm3 + pxor %xmm10, %xmm4 + pxor %xmm11, %xmm5 + pxor %xmm12, %xmm6 + movdqu %xmm13, %xmm7 + movdqu %xmm1, (%rsi) + movdqu %xmm2, 16(%rsi) + movdqu %xmm3, 32(%rsi) + movdqu %xmm4, 48(%rsi) + movdqu %xmm5, 64(%rsi) + movdqu %xmm6, 80(%rsi) + addq $96, %rdi + decq %rcx + jne DLOOP_6 + addq $96, %rsi +DREMAINDER_6: + cmpq $0, %r10 + je DEND_6 +DLOOP_6_2: + movdqu (%rdi), %xmm1 + movdqa %xmm1, %xmm10 + addq $16, %rdi + pxor (%r8), %xmm1 + movdqu 160(%r8), %xmm2 + cmpl $12, %r9d + aesdec 16(%r8), %xmm1 + aesdec 32(%r8), %xmm1 + aesdec 48(%r8), %xmm1 + aesdec 64(%r8), %xmm1 + aesdec 80(%r8), %xmm1 + aesdec 96(%r8), %xmm1 + aesdec 112(%r8), %xmm1 + aesdec 128(%r8), %xmm1 + aesdec 144(%r8), %xmm1 + jb DLAST_6_2 + movdqu 192(%r8), %xmm2 + cmpl $14, %r9d + aesdec 160(%r8), %xmm1 + aesdec 176(%r8), %xmm1 + jb DLAST_6_2 + movdqu 224(%r8), %xmm2 + aesdec 192(%r8), %xmm1 + aesdec 208(%r8), %xmm1 +DLAST_6_2: + aesdeclast %xmm2, %xmm1 + pxor %xmm7, %xmm1 + movdqa %xmm10, %xmm7 + movdqu %xmm1, (%rsi) + addq $16, %rsi + decq %r10 + jne DLOOP_6_2 +DEND_6: + ret + +#else /* WOLFSSL_AESNI_BYx */ + +/* +AES_CBC_decrypt_by8 (const unsigned char *in, + unsigned char *out, + unsigned char ivec[16], + unsigned long length, + const unsigned char *KS, + int nr) +*/ +#ifndef __APPLE__ +.globl AES_CBC_decrypt_by8 +AES_CBC_decrypt_by8: +#else +.globl _AES_CBC_decrypt_by8 +_AES_CBC_decrypt_by8: +#endif +# parameter 1: %rdi - in +# parameter 2: %rsi - out +# parameter 3: %rdx - ivec +# parameter 4: %rcx - length +# parameter 5: %r8 - KS +# parameter 6: %r9d - nr + + movq %rcx, %r10 + shrq $4, %rcx + shlq $60, %r10 + je DNO_PARTS_8 + addq $1, %rcx +DNO_PARTS_8: + movq %rcx, %r10 + shlq $61, %r10 + shrq $61, %r10 + shrq $3, %rcx + movdqu (%rdx), %xmm9 + je DREMAINDER_8 + subq $128, %rsi +DLOOP_8: + movdqu (%rdi), %xmm1 + movdqu 16(%rdi), %xmm2 + movdqu 32(%rdi), %xmm3 + movdqu 48(%rdi), %xmm4 + movdqu 64(%rdi), %xmm5 + movdqu 80(%rdi), %xmm6 + movdqu 96(%rdi), %xmm7 + movdqu 112(%rdi), %xmm8 + movdqa (%r8), %xmm10 + movdqa 16(%r8), %xmm11 + movdqa 32(%r8), %xmm12 + movdqa 48(%r8), %xmm13 + pxor %xmm10, %xmm1 + pxor %xmm10, %xmm2 + pxor %xmm10, %xmm3 + pxor %xmm10, %xmm4 + pxor %xmm10, %xmm5 + pxor %xmm10, %xmm6 + pxor %xmm10, %xmm7 + pxor %xmm10, %xmm8 + aesdec %xmm11, %xmm1 + aesdec %xmm11, %xmm2 + aesdec %xmm11, %xmm3 + aesdec %xmm11, %xmm4 + aesdec %xmm11, %xmm5 + aesdec %xmm11, %xmm6 + aesdec %xmm11, %xmm7 + aesdec %xmm11, %xmm8 + aesdec %xmm12, %xmm1 + aesdec %xmm12, %xmm2 + aesdec %xmm12, %xmm3 + aesdec %xmm12, %xmm4 + aesdec %xmm12, %xmm5 + aesdec %xmm12, %xmm6 + aesdec %xmm12, %xmm7 + aesdec %xmm12, %xmm8 + aesdec %xmm13, %xmm1 + aesdec %xmm13, %xmm2 + aesdec %xmm13, %xmm3 + aesdec %xmm13, %xmm4 + aesdec %xmm13, %xmm5 + aesdec %xmm13, %xmm6 + aesdec %xmm13, %xmm7 + aesdec %xmm13, %xmm8 + movdqa 64(%r8), %xmm10 + movdqa 80(%r8), %xmm11 + movdqa 96(%r8), %xmm12 + movdqa 112(%r8), %xmm13 + aesdec %xmm10, %xmm1 + aesdec %xmm10, %xmm2 + aesdec %xmm10, %xmm3 + aesdec %xmm10, %xmm4 + aesdec %xmm10, %xmm5 + aesdec %xmm10, %xmm6 + aesdec %xmm10, %xmm7 + aesdec %xmm10, %xmm8 + aesdec %xmm11, %xmm1 + aesdec %xmm11, %xmm2 + aesdec %xmm11, %xmm3 + aesdec %xmm11, %xmm4 + aesdec %xmm11, %xmm5 + aesdec %xmm11, %xmm6 + aesdec %xmm11, %xmm7 + aesdec %xmm11, %xmm8 + aesdec %xmm12, %xmm1 + aesdec %xmm12, %xmm2 + aesdec %xmm12, %xmm3 + aesdec %xmm12, %xmm4 + aesdec %xmm12, %xmm5 + aesdec %xmm12, %xmm6 + aesdec %xmm12, %xmm7 + aesdec %xmm12, %xmm8 + aesdec %xmm13, %xmm1 + aesdec %xmm13, %xmm2 + aesdec %xmm13, %xmm3 + aesdec %xmm13, %xmm4 + aesdec %xmm13, %xmm5 + aesdec %xmm13, %xmm6 + aesdec %xmm13, %xmm7 + aesdec %xmm13, %xmm8 + movdqa 128(%r8), %xmm10 + movdqa 144(%r8), %xmm11 + movdqa 160(%r8), %xmm12 + cmpl $12, %r9d + aesdec %xmm10, %xmm1 + aesdec %xmm10, %xmm2 + aesdec %xmm10, %xmm3 + aesdec %xmm10, %xmm4 + aesdec %xmm10, %xmm5 + aesdec %xmm10, %xmm6 + aesdec %xmm10, %xmm7 + aesdec %xmm10, %xmm8 + aesdec %xmm11, %xmm1 + aesdec %xmm11, %xmm2 + aesdec %xmm11, %xmm3 + aesdec %xmm11, %xmm4 + aesdec %xmm11, %xmm5 + aesdec %xmm11, %xmm6 + aesdec %xmm11, %xmm7 + aesdec %xmm11, %xmm8 + jb DLAST_8 + movdqa 160(%r8), %xmm10 + movdqa 176(%r8), %xmm11 + movdqa 192(%r8), %xmm12 + cmpl $14, %r9d + aesdec %xmm10, %xmm1 + aesdec %xmm10, %xmm2 + aesdec %xmm10, %xmm3 + aesdec %xmm10, %xmm4 + aesdec %xmm10, %xmm5 + aesdec %xmm10, %xmm6 + aesdec %xmm10, %xmm7 + aesdec %xmm10, %xmm8 + aesdec %xmm11, %xmm1 + aesdec %xmm11, %xmm2 + aesdec %xmm11, %xmm3 + aesdec %xmm11, %xmm4 + aesdec %xmm11, %xmm5 + aesdec %xmm11, %xmm6 + aesdec %xmm11, %xmm7 + aesdec %xmm11, %xmm8 + jb DLAST_8 + movdqa 192(%r8), %xmm10 + movdqa 208(%r8), %xmm11 + movdqa 224(%r8), %xmm12 + aesdec %xmm10, %xmm1 + aesdec %xmm10, %xmm2 + aesdec %xmm10, %xmm3 + aesdec %xmm10, %xmm4 + aesdec %xmm10, %xmm5 + aesdec %xmm10, %xmm6 + aesdec %xmm10, %xmm7 + aesdec %xmm10, %xmm8 + aesdec %xmm11, %xmm1 + aesdec %xmm11, %xmm2 + aesdec %xmm11, %xmm3 + aesdec %xmm11, %xmm4 + aesdec %xmm11, %xmm5 + aesdec %xmm11, %xmm6 + aesdec %xmm11, %xmm7 + aesdec %xmm11, %xmm8 +DLAST_8: + addq $128, %rsi + aesdeclast %xmm12, %xmm1 + aesdeclast %xmm12, %xmm2 + aesdeclast %xmm12, %xmm3 + aesdeclast %xmm12, %xmm4 + aesdeclast %xmm12, %xmm5 + aesdeclast %xmm12, %xmm6 + aesdeclast %xmm12, %xmm7 + aesdeclast %xmm12, %xmm8 + movdqu (%rdi), %xmm10 + movdqu 16(%rdi), %xmm11 + movdqu 32(%rdi), %xmm12 + movdqu 48(%rdi), %xmm13 + pxor %xmm9, %xmm1 + pxor %xmm10, %xmm2 + pxor %xmm11, %xmm3 + pxor %xmm12, %xmm4 + pxor %xmm13, %xmm5 + movdqu 64(%rdi), %xmm10 + movdqu 80(%rdi), %xmm11 + movdqu 96(%rdi), %xmm12 + movdqu 112(%rdi), %xmm9 + pxor %xmm10, %xmm6 + pxor %xmm11, %xmm7 + pxor %xmm12, %xmm8 + movdqu %xmm1, (%rsi) + movdqu %xmm2, 16(%rsi) + movdqu %xmm3, 32(%rsi) + movdqu %xmm4, 48(%rsi) + movdqu %xmm5, 64(%rsi) + movdqu %xmm6, 80(%rsi) + movdqu %xmm7, 96(%rsi) + movdqu %xmm8, 112(%rsi) + addq $128, %rdi + decq %rcx + jne DLOOP_8 + addq $128, %rsi +DREMAINDER_8: + cmpq $0, %r10 + je DEND_8 +DLOOP_8_2: + movdqu (%rdi), %xmm1 + movdqa %xmm1, %xmm10 + addq $16, %rdi + pxor (%r8), %xmm1 + movdqu 160(%r8), %xmm2 + cmpl $12, %r9d + aesdec 16(%r8), %xmm1 + aesdec 32(%r8), %xmm1 + aesdec 48(%r8), %xmm1 + aesdec 64(%r8), %xmm1 + aesdec 80(%r8), %xmm1 + aesdec 96(%r8), %xmm1 + aesdec 112(%r8), %xmm1 + aesdec 128(%r8), %xmm1 + aesdec 144(%r8), %xmm1 + jb DLAST_8_2 + movdqu 192(%r8), %xmm2 + cmpl $14, %r9d + aesdec 160(%r8), %xmm1 + aesdec 176(%r8), %xmm1 + jb DLAST_8_2 + movdqu 224(%r8), %xmm2 + aesdec 192(%r8), %xmm1 + aesdec 208(%r8), %xmm1 +DLAST_8_2: + aesdeclast %xmm2, %xmm1 + pxor %xmm9, %xmm1 + movdqa %xmm10, %xmm9 + movdqu %xmm1, (%rsi) + addq $16, %rsi + decq %r10 + jne DLOOP_8_2 +DEND_8: + ret + +#endif /* WOLFSSL_AESNI_BYx */ + + +/* +AES_ECB_encrypt (const unsigned char *in, + unsigned char *out, + unsigned long length, + const unsigned char *KS, + int nr) +*/ +#ifndef __APPLE__ +.globl AES_ECB_encrypt +AES_ECB_encrypt: +#else +.globl _AES_ECB_encrypt +_AES_ECB_encrypt: +#endif +# parameter 1: %rdi +# parameter 2: %rsi +# parameter 3: %rdx +# parameter 4: %rcx +# parameter 5: %r8d + movq %rdx, %r10 + shrq $4, %rdx + shlq $60, %r10 + je EECB_NO_PARTS_4 + addq $1, %rdx +EECB_NO_PARTS_4: + movq %rdx, %r10 + shlq $62, %r10 + shrq $62, %r10 + shrq $2, %rdx + je EECB_REMAINDER_4 + subq $64, %rsi +EECB_LOOP_4: + movdqu (%rdi), %xmm1 + movdqu 16(%rdi), %xmm2 + movdqu 32(%rdi), %xmm3 + movdqu 48(%rdi), %xmm4 + movdqa (%rcx), %xmm9 + movdqa 16(%rcx), %xmm10 + movdqa 32(%rcx), %xmm11 + movdqa 48(%rcx), %xmm12 + pxor %xmm9, %xmm1 + pxor %xmm9, %xmm2 + pxor %xmm9, %xmm3 + pxor %xmm9, %xmm4 + aesenc %xmm10, %xmm1 + aesenc %xmm10, %xmm2 + aesenc %xmm10, %xmm3 + aesenc %xmm10, %xmm4 + aesenc %xmm11, %xmm1 + aesenc %xmm11, %xmm2 + aesenc %xmm11, %xmm3 + aesenc %xmm11, %xmm4 + aesenc %xmm12, %xmm1 + aesenc %xmm12, %xmm2 + aesenc %xmm12, %xmm3 + aesenc %xmm12, %xmm4 + movdqa 64(%rcx), %xmm9 + movdqa 80(%rcx), %xmm10 + movdqa 96(%rcx), %xmm11 + movdqa 112(%rcx), %xmm12 + aesenc %xmm9, %xmm1 + aesenc %xmm9, %xmm2 + aesenc %xmm9, %xmm3 + aesenc %xmm9, %xmm4 + aesenc %xmm10, %xmm1 + aesenc %xmm10, %xmm2 + aesenc %xmm10, %xmm3 + aesenc %xmm10, %xmm4 + aesenc %xmm11, %xmm1 + aesenc %xmm11, %xmm2 + aesenc %xmm11, %xmm3 + aesenc %xmm11, %xmm4 + aesenc %xmm12, %xmm1 + aesenc %xmm12, %xmm2 + aesenc %xmm12, %xmm3 + aesenc %xmm12, %xmm4 + movdqa 128(%rcx), %xmm9 + movdqa 144(%rcx), %xmm10 + movdqa 160(%rcx), %xmm11 + cmpl $12, %r8d + aesenc %xmm9, %xmm1 + aesenc %xmm9, %xmm2 + aesenc %xmm9, %xmm3 + aesenc %xmm9, %xmm4 + aesenc %xmm10, %xmm1 + aesenc %xmm10, %xmm2 + aesenc %xmm10, %xmm3 + aesenc %xmm10, %xmm4 + jb EECB_LAST_4 + movdqa 160(%rcx), %xmm9 + movdqa 176(%rcx), %xmm10 + movdqa 192(%rcx), %xmm11 + cmpl $14, %r8d + aesenc %xmm9, %xmm1 + aesenc %xmm9, %xmm2 + aesenc %xmm9, %xmm3 + aesenc %xmm9, %xmm4 + aesenc %xmm10, %xmm1 + aesenc %xmm10, %xmm2 + aesenc %xmm10, %xmm3 + aesenc %xmm10, %xmm4 + jb EECB_LAST_4 + movdqa 192(%rcx), %xmm9 + movdqa 208(%rcx), %xmm10 + movdqa 224(%rcx), %xmm11 + aesenc %xmm9, %xmm1 + aesenc %xmm9, %xmm2 + aesenc %xmm9, %xmm3 + aesenc %xmm9, %xmm4 + aesenc %xmm10, %xmm1 + aesenc %xmm10, %xmm2 + aesenc %xmm10, %xmm3 + aesenc %xmm10, %xmm4 +EECB_LAST_4: + addq $64, %rdi + addq $64, %rsi + decq %rdx + aesenclast %xmm11, %xmm1 + aesenclast %xmm11, %xmm2 + aesenclast %xmm11, %xmm3 + aesenclast %xmm11, %xmm4 + movdqu %xmm1, (%rsi) + movdqu %xmm2, 16(%rsi) + movdqu %xmm3, 32(%rsi) + movdqu %xmm4, 48(%rsi) + jne EECB_LOOP_4 + addq $64, %rsi +EECB_REMAINDER_4: + cmpq $0, %r10 + je EECB_END_4 +EECB_LOOP_4_2: + movdqu (%rdi), %xmm1 + addq $16, %rdi + pxor (%rcx), %xmm1 + movdqu 160(%rcx), %xmm2 + aesenc 16(%rcx), %xmm1 + aesenc 32(%rcx), %xmm1 + aesenc 48(%rcx), %xmm1 + aesenc 64(%rcx), %xmm1 + aesenc 80(%rcx), %xmm1 + aesenc 96(%rcx), %xmm1 + aesenc 112(%rcx), %xmm1 + aesenc 128(%rcx), %xmm1 + aesenc 144(%rcx), %xmm1 + cmpl $12, %r8d + jb EECB_LAST_4_2 + movdqu 192(%rcx), %xmm2 + aesenc 160(%rcx), %xmm1 + aesenc 176(%rcx), %xmm1 + cmpl $14, %r8d + jb EECB_LAST_4_2 + movdqu 224(%rcx), %xmm2 + aesenc 192(%rcx), %xmm1 + aesenc 208(%rcx), %xmm1 +EECB_LAST_4_2: + aesenclast %xmm2, %xmm1 + movdqu %xmm1, (%rsi) + addq $16, %rsi + decq %r10 + jne EECB_LOOP_4_2 +EECB_END_4: + ret + + +/* +AES_ECB_decrypt (const unsigned char *in, + unsigned char *out, + unsigned long length, + const unsigned char *KS, + int nr) +*/ +#ifndef __APPLE__ +.globl AES_ECB_decrypt +AES_ECB_decrypt: +#else +.globl _AES_ECB_decrypt +_AES_ECB_decrypt: +#endif +# parameter 1: %rdi +# parameter 2: %rsi +# parameter 3: %rdx +# parameter 4: %rcx +# parameter 5: %r8d + + movq %rdx, %r10 + shrq $4, %rdx + shlq $60, %r10 + je DECB_NO_PARTS_4 + addq $1, %rdx +DECB_NO_PARTS_4: + movq %rdx, %r10 + shlq $62, %r10 + shrq $62, %r10 + shrq $2, %rdx + je DECB_REMAINDER_4 + subq $64, %rsi +DECB_LOOP_4: + movdqu (%rdi), %xmm1 + movdqu 16(%rdi), %xmm2 + movdqu 32(%rdi), %xmm3 + movdqu 48(%rdi), %xmm4 + movdqa (%rcx), %xmm9 + movdqa 16(%rcx), %xmm10 + movdqa 32(%rcx), %xmm11 + movdqa 48(%rcx), %xmm12 + pxor %xmm9, %xmm1 + pxor %xmm9, %xmm2 + pxor %xmm9, %xmm3 + pxor %xmm9, %xmm4 + aesdec %xmm10, %xmm1 + aesdec %xmm10, %xmm2 + aesdec %xmm10, %xmm3 + aesdec %xmm10, %xmm4 + aesdec %xmm11, %xmm1 + aesdec %xmm11, %xmm2 + aesdec %xmm11, %xmm3 + aesdec %xmm11, %xmm4 + aesdec %xmm12, %xmm1 + aesdec %xmm12, %xmm2 + aesdec %xmm12, %xmm3 + aesdec %xmm12, %xmm4 + movdqa 64(%rcx), %xmm9 + movdqa 80(%rcx), %xmm10 + movdqa 96(%rcx), %xmm11 + movdqa 112(%rcx), %xmm12 + aesdec %xmm9, %xmm1 + aesdec %xmm9, %xmm2 + aesdec %xmm9, %xmm3 + aesdec %xmm9, %xmm4 + aesdec %xmm10, %xmm1 + aesdec %xmm10, %xmm2 + aesdec %xmm10, %xmm3 + aesdec %xmm10, %xmm4 + aesdec %xmm11, %xmm1 + aesdec %xmm11, %xmm2 + aesdec %xmm11, %xmm3 + aesdec %xmm11, %xmm4 + aesdec %xmm12, %xmm1 + aesdec %xmm12, %xmm2 + aesdec %xmm12, %xmm3 + aesdec %xmm12, %xmm4 + movdqa 128(%rcx), %xmm9 + movdqa 144(%rcx), %xmm10 + movdqa 160(%rcx), %xmm11 + cmpl $12, %r8d + aesdec %xmm9, %xmm1 + aesdec %xmm9, %xmm2 + aesdec %xmm9, %xmm3 + aesdec %xmm9, %xmm4 + aesdec %xmm10, %xmm1 + aesdec %xmm10, %xmm2 + aesdec %xmm10, %xmm3 + aesdec %xmm10, %xmm4 + jb DECB_LAST_4 + movdqa 160(%rcx), %xmm9 + movdqa 176(%rcx), %xmm10 + movdqa 192(%rcx), %xmm11 + cmpl $14, %r8d + aesdec %xmm9, %xmm1 + aesdec %xmm9, %xmm2 + aesdec %xmm9, %xmm3 + aesdec %xmm9, %xmm4 + aesdec %xmm10, %xmm1 + aesdec %xmm10, %xmm2 + aesdec %xmm10, %xmm3 + aesdec %xmm10, %xmm4 + jb DECB_LAST_4 + movdqa 192(%rcx), %xmm9 + movdqa 208(%rcx), %xmm10 + movdqa 224(%rcx), %xmm11 + aesdec %xmm9, %xmm1 + aesdec %xmm9, %xmm2 + aesdec %xmm9, %xmm3 + aesdec %xmm9, %xmm4 + aesdec %xmm10, %xmm1 + aesdec %xmm10, %xmm2 + aesdec %xmm10, %xmm3 + aesdec %xmm10, %xmm4 +DECB_LAST_4: + addq $64, %rdi + addq $64, %rsi + decq %rdx + aesdeclast %xmm11, %xmm1 + aesdeclast %xmm11, %xmm2 + aesdeclast %xmm11, %xmm3 + aesdeclast %xmm11, %xmm4 + movdqu %xmm1, (%rsi) + movdqu %xmm2, 16(%rsi) + movdqu %xmm3, 32(%rsi) + movdqu %xmm4, 48(%rsi) + jne DECB_LOOP_4 + addq $64, %rsi +DECB_REMAINDER_4: + cmpq $0, %r10 + je DECB_END_4 +DECB_LOOP_4_2: + movdqu (%rdi), %xmm1 + addq $16, %rdi + pxor (%rcx), %xmm1 + movdqu 160(%rcx), %xmm2 + cmpl $12, %r8d + aesdec 16(%rcx), %xmm1 + aesdec 32(%rcx), %xmm1 + aesdec 48(%rcx), %xmm1 + aesdec 64(%rcx), %xmm1 + aesdec 80(%rcx), %xmm1 + aesdec 96(%rcx), %xmm1 + aesdec 112(%rcx), %xmm1 + aesdec 128(%rcx), %xmm1 + aesdec 144(%rcx), %xmm1 + jb DECB_LAST_4_2 + cmpl $14, %r8d + movdqu 192(%rcx), %xmm2 + aesdec 160(%rcx), %xmm1 + aesdec 176(%rcx), %xmm1 + jb DECB_LAST_4_2 + movdqu 224(%rcx), %xmm2 + aesdec 192(%rcx), %xmm1 + aesdec 208(%rcx), %xmm1 +DECB_LAST_4_2: + aesdeclast %xmm2, %xmm1 + movdqu %xmm1, (%rsi) + addq $16, %rsi + decq %r10 + jne DECB_LOOP_4_2 +DECB_END_4: + ret + + + + +/* +void AES_128_Key_Expansion(const unsigned char* userkey, + unsigned char* key_schedule); +*/ +.align 16,0x90 +#ifndef __APPLE__ +.globl AES_128_Key_Expansion +AES_128_Key_Expansion: +#else +.globl _AES_128_Key_Expansion +_AES_128_Key_Expansion: +#endif +# parameter 1: %rdi +# parameter 2: %rsi +movl $10, 240(%rsi) + +movdqu (%rdi), %xmm1 +movdqa %xmm1, (%rsi) + + +ASSISTS: +aeskeygenassist $1, %xmm1, %xmm2 +call PREPARE_ROUNDKEY_128 +movdqa %xmm1, 16(%rsi) +aeskeygenassist $2, %xmm1, %xmm2 +call PREPARE_ROUNDKEY_128 +movdqa %xmm1, 32(%rsi) +aeskeygenassist $4, %xmm1, %xmm2 +call PREPARE_ROUNDKEY_128 +movdqa %xmm1, 48(%rsi) +aeskeygenassist $8, %xmm1, %xmm2 +call PREPARE_ROUNDKEY_128 +movdqa %xmm1, 64(%rsi) +aeskeygenassist $16, %xmm1, %xmm2 +call PREPARE_ROUNDKEY_128 +movdqa %xmm1, 80(%rsi) +aeskeygenassist $32, %xmm1, %xmm2 +call PREPARE_ROUNDKEY_128 +movdqa %xmm1, 96(%rsi) +aeskeygenassist $64, %xmm1, %xmm2 +call PREPARE_ROUNDKEY_128 +movdqa %xmm1, 112(%rsi) +aeskeygenassist $0x80, %xmm1, %xmm2 +call PREPARE_ROUNDKEY_128 +movdqa %xmm1, 128(%rsi) +aeskeygenassist $0x1b, %xmm1, %xmm2 +call PREPARE_ROUNDKEY_128 +movdqa %xmm1, 144(%rsi) +aeskeygenassist $0x36, %xmm1, %xmm2 +call PREPARE_ROUNDKEY_128 +movdqa %xmm1, 160(%rsi) +ret + +PREPARE_ROUNDKEY_128: +pshufd $255, %xmm2, %xmm2 +movdqa %xmm1, %xmm3 +pslldq $4, %xmm3 +pxor %xmm3, %xmm1 +pslldq $4, %xmm3 +pxor %xmm3, %xmm1 +pslldq $4, %xmm3 +pxor %xmm3, %xmm1 +pxor %xmm2, %xmm1 +ret + + +/* +void AES_192_Key_Expansion (const unsigned char *userkey, + unsigned char *key) +*/ +#ifndef __APPLE__ +.globl AES_192_Key_Expansion +AES_192_Key_Expansion: +#else +.globl _AES_192_Key_Expansion +_AES_192_Key_Expansion: +#endif +# parameter 1: %rdi +# parameter 2: %rsi + +movdqu (%rdi), %xmm1 +movq 16(%rdi), %xmm3 +movdqa %xmm1, (%rsi) +movdqa %xmm3, %xmm5 + +aeskeygenassist $0x1, %xmm3, %xmm2 +call PREPARE_ROUNDKEY_192 +shufpd $0, %xmm1, %xmm5 +movdqa %xmm5, 16(%rsi) +movdqa %xmm1, %xmm6 +shufpd $1, %xmm3, %xmm6 +movdqa %xmm6, 32(%rsi) + +aeskeygenassist $0x2, %xmm3, %xmm2 +call PREPARE_ROUNDKEY_192 +movdqa %xmm1, 48(%rsi) +movdqa %xmm3, %xmm5 + +aeskeygenassist $0x4, %xmm3, %xmm2 +call PREPARE_ROUNDKEY_192 +shufpd $0, %xmm1, %xmm5 +movdqa %xmm5, 64(%rsi) +movdqa %xmm1, %xmm6 +shufpd $1, %xmm3, %xmm6 +movdqa %xmm6, 80(%rsi) + +aeskeygenassist $0x8, %xmm3, %xmm2 +call PREPARE_ROUNDKEY_192 +movdqa %xmm1, 96(%rsi) +movdqa %xmm3, %xmm5 + +aeskeygenassist $0x10, %xmm3, %xmm2 +call PREPARE_ROUNDKEY_192 +shufpd $0, %xmm1, %xmm5 +movdqa %xmm5, 112(%rsi) +movdqa %xmm1, %xmm6 +shufpd $1, %xmm3, %xmm6 +movdqa %xmm6, 128(%rsi) + +aeskeygenassist $0x20, %xmm3, %xmm2 +call PREPARE_ROUNDKEY_192 +movdqa %xmm1, 144(%rsi) +movdqa %xmm3, %xmm5 + +aeskeygenassist $0x40, %xmm3, %xmm2 +call PREPARE_ROUNDKEY_192 +shufpd $0, %xmm1, %xmm5 +movdqa %xmm5, 160(%rsi) +movdqa %xmm1, %xmm6 +shufpd $1, %xmm3, %xmm6 +movdqa %xmm6, 176(%rsi) + +aeskeygenassist $0x80, %xmm3, %xmm2 +call PREPARE_ROUNDKEY_192 +movdqa %xmm1, 192(%rsi) +movdqa %xmm3, 208(%rsi) +ret + +PREPARE_ROUNDKEY_192: +pshufd $0x55, %xmm2, %xmm2 +movdqu %xmm1, %xmm4 +pslldq $4, %xmm4 +pxor %xmm4, %xmm1 + +pslldq $4, %xmm4 +pxor %xmm4, %xmm1 +pslldq $4, %xmm4 +pxor %xmm4, %xmm1 +pxor %xmm2, %xmm1 +pshufd $0xff, %xmm1, %xmm2 +movdqu %xmm3, %xmm4 +pslldq $4, %xmm4 +pxor %xmm4, %xmm3 +pxor %xmm2, %xmm3 +ret + + +/* +void AES_256_Key_Expansion (const unsigned char *userkey, + unsigned char *key) +*/ +#ifndef __APPLE__ +.globl AES_256_Key_Expansion +AES_256_Key_Expansion: +#else +.globl _AES_256_Key_Expansion +_AES_256_Key_Expansion: +#endif +# parameter 1: %rdi +# parameter 2: %rsi + +movdqu (%rdi), %xmm1 +movdqu 16(%rdi), %xmm3 +movdqa %xmm1, (%rsi) +movdqa %xmm3, 16(%rsi) + +aeskeygenassist $0x1, %xmm3, %xmm2 +call MAKE_RK256_a +movdqa %xmm1, 32(%rsi) +aeskeygenassist $0x0, %xmm1, %xmm2 +call MAKE_RK256_b +movdqa %xmm3, 48(%rsi) +aeskeygenassist $0x2, %xmm3, %xmm2 +call MAKE_RK256_a +movdqa %xmm1, 64(%rsi) +aeskeygenassist $0x0, %xmm1, %xmm2 +call MAKE_RK256_b +movdqa %xmm3, 80(%rsi) +aeskeygenassist $0x4, %xmm3, %xmm2 +call MAKE_RK256_a +movdqa %xmm1, 96(%rsi) +aeskeygenassist $0x0, %xmm1, %xmm2 +call MAKE_RK256_b +movdqa %xmm3, 112(%rsi) +aeskeygenassist $0x8, %xmm3, %xmm2 +call MAKE_RK256_a +movdqa %xmm1, 128(%rsi) +aeskeygenassist $0x0, %xmm1, %xmm2 +call MAKE_RK256_b +movdqa %xmm3, 144(%rsi) +aeskeygenassist $0x10, %xmm3, %xmm2 +call MAKE_RK256_a +movdqa %xmm1, 160(%rsi) +aeskeygenassist $0x0, %xmm1, %xmm2 +call MAKE_RK256_b +movdqa %xmm3, 176(%rsi) +aeskeygenassist $0x20, %xmm3, %xmm2 +call MAKE_RK256_a +movdqa %xmm1, 192(%rsi) + +aeskeygenassist $0x0, %xmm1, %xmm2 +call MAKE_RK256_b +movdqa %xmm3, 208(%rsi) +aeskeygenassist $0x40, %xmm3, %xmm2 +call MAKE_RK256_a +movdqa %xmm1, 224(%rsi) + +ret + +MAKE_RK256_a: +pshufd $0xff, %xmm2, %xmm2 +movdqa %xmm1, %xmm4 +pslldq $4, %xmm4 +pxor %xmm4, %xmm1 +pslldq $4, %xmm4 +pxor %xmm4, %xmm1 +pslldq $4, %xmm4 +pxor %xmm4, %xmm1 +pxor %xmm2, %xmm1 +ret + +MAKE_RK256_b: +pshufd $0xaa, %xmm2, %xmm2 +movdqa %xmm3, %xmm4 +pslldq $4, %xmm4 +pxor %xmm4, %xmm3 +pslldq $4, %xmm4 +pxor %xmm4, %xmm3 +pslldq $4, %xmm4 +pxor %xmm4, %xmm3 +pxor %xmm2, %xmm3 +ret + +#if defined(__linux__) && defined(__ELF__) + .section .note.GNU-stack,"",%progbits +#endif diff --git a/client/wolfssl/wolfcrypt/src/aes_asm.asm b/client/wolfssl/wolfcrypt/src/aes_asm.asm new file mode 100644 index 0000000..ab3c22a --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/aes_asm.asm @@ -0,0 +1,1529 @@ +; /* aes_asm.asm +; * +; * Copyright (C) 2006-2020 wolfSSL Inc. +; * +; * This file is part of wolfSSL. +; * +; * wolfSSL is free software; you can redistribute it and/or modify +; * it under the terms of the GNU General Public License as published by +; * the Free Software Foundation; either version 2 of the License, or +; * (at your option) any later version. +; * +; * wolfSSL is distributed in the hope that it will be useful, +; * but WITHOUT ANY WARRANTY; without even the implied warranty of +; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +; * GNU General Public License for more details. +; * +; * You should have received a copy of the GNU General Public License +; * along with this program; if not, write to the Free Software +; * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA +; */ + +; +; +; /* See Intel Advanced Encryption Standard (AES) Instructions Set White Paper +; * by Israel, Intel Mobility Group Development Center, Israel Shay Gueron +; */ +; +; /* This file is in intel asm syntax, see .s for at&t syntax */ +; + + +fips_version = 0 +IFDEF HAVE_FIPS + fips_version = 1 + IFDEF HAVE_FIPS_VERSION + fips_version = HAVE_FIPS_VERSION + ENDIF +ENDIF + +IF fips_version GE 2 + fipsAh SEGMENT ALIAS(".fipsA$h") 'CODE' +ELSE + _text SEGMENT +ENDIF + + +; /* +; AES_CBC_encrypt[const ,unsigned char*in +; unsigned ,char*out +; unsigned ,char ivec+16 +; unsigned ,long length +; const ,unsigned char*KS +; int nr] +; */ +AES_CBC_encrypt PROC +;# parameter 1: rdi +;# parameter 2: rsi +;# parameter 3: rdx +;# parameter 4: rcx +;# parameter 5: r8 +;# parameter 6: r9d + +; save rdi and rsi to rax and r11, restore before ret + mov rax,rdi + mov r11,rsi + +; convert to what we had for att&t convention + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,[rsp+40] + mov r9d,[rsp+48] + + mov r10,rcx + shr rcx,4 + shl r10,60 + je NO_PARTS + add rcx,1 +NO_PARTS: + sub rsi,16 + movdqa xmm1,[rdx] +LOOP_1: + pxor xmm1,[rdi] + pxor xmm1,[r8] + add rsi,16 + add rdi,16 + cmp r9d,12 + aesenc xmm1,16[r8] + aesenc xmm1,32[r8] + aesenc xmm1,48[r8] + aesenc xmm1,64[r8] + aesenc xmm1,80[r8] + aesenc xmm1,96[r8] + aesenc xmm1,112[r8] + aesenc xmm1,128[r8] + aesenc xmm1,144[r8] + movdqa xmm2,160[r8] + jb LAST + cmp r9d,14 + + aesenc xmm1,160[r8] + aesenc xmm1,176[r8] + movdqa xmm2,192[r8] + jb LAST + aesenc xmm1,192[r8] + aesenc xmm1,208[r8] + movdqa xmm2,224[r8] +LAST: + dec rcx + aesenclast xmm1,xmm2 + movdqu [rsi],xmm1 + jne LOOP_1 + ; restore non volatile rdi,rsi + mov rdi,rax + mov rsi,r11 + ret +AES_CBC_encrypt ENDP + + +; void AES_CBC_decrypt_by4(const unsigned char* in, +; unsigned char* out, +; unsigned char ivec[16], +; unsigned long length, +; const unsigned char* KS, +; int nr) +AES_CBC_decrypt_by4 PROC +; parameter 1: rdi +; parameter 2: rsi +; parameter 3: rdx +; parameter 4: rcx +; parameter 5: r8 +; parameter 6: r9d + + ; save rdi and rsi to rax and r11, restore before ret + mov rax, rdi + mov r11, rsi + ; convert to what we had for att&t convention + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + mov rcx,r9 + mov r8, [rsp+40] + mov r9d, [rsp+48] + ; on microsoft xmm6-xmm15 are non volatile, + ; let's save on stack and restore at end + sub rsp, 8+8*16 ; 8 = align stack , 8 xmm6-12,15 16 bytes each + movdqa [rsp+0], xmm6 + movdqa [rsp+16], xmm7 + movdqa [rsp+32], xmm8 + movdqa [rsp+48], xmm9 + movdqa [rsp+64], xmm10 + movdqa [rsp+80], xmm11 + movdqa [rsp+96], xmm12 + movdqa [rsp+112], xmm15 + ; back to our original code, more or less + mov r10, rcx + shr rcx, 4 + shl r10, 60 + je DNO_PARTS_4 + add rcx, 1 +DNO_PARTS_4: + mov r10, rcx + shl r10, 62 + shr r10, 62 + shr rcx, 2 + movdqu xmm5, [rdx] + je DREMAINDER_4 + sub rsi, 64 +DLOOP_4: + movdqu xmm1, [rdi] + movdqu xmm2, 16[rdi] + movdqu xmm3, 32[rdi] + movdqu xmm4, 48[rdi] + movdqa xmm6, xmm1 + movdqa xmm7, xmm2 + movdqa xmm8, xmm3 + movdqa xmm15, xmm4 + movdqa xmm9, [r8] + movdqa xmm10, 16[r8] + movdqa xmm11, 32[r8] + movdqa xmm12, 48[r8] + pxor xmm1, xmm9 + pxor xmm2, xmm9 + pxor xmm3, xmm9 + pxor xmm4, xmm9 + aesdec xmm1, xmm10 + aesdec xmm2, xmm10 + aesdec xmm3, xmm10 + aesdec xmm4, xmm10 + aesdec xmm1, xmm11 + aesdec xmm2, xmm11 + aesdec xmm3, xmm11 + aesdec xmm4, xmm11 + aesdec xmm1, xmm12 + aesdec xmm2, xmm12 + aesdec xmm3, xmm12 + aesdec xmm4, xmm12 + movdqa xmm9, 64[r8] + movdqa xmm10, 80[r8] + movdqa xmm11, 96[r8] + movdqa xmm12, 112[r8] + aesdec xmm1, xmm9 + aesdec xmm2, xmm9 + aesdec xmm3, xmm9 + aesdec xmm4, xmm9 + aesdec xmm1, xmm10 + aesdec xmm2, xmm10 + aesdec xmm3, xmm10 + aesdec xmm4, xmm10 + aesdec xmm1, xmm11 + aesdec xmm2, xmm11 + aesdec xmm3, xmm11 + aesdec xmm4, xmm11 + aesdec xmm1, xmm12 + aesdec xmm2, xmm12 + aesdec xmm3, xmm12 + aesdec xmm4, xmm12 + movdqa xmm9, 128[r8] + movdqa xmm10, 144[r8] + movdqa xmm11, 160[r8] + cmp r9d, 12 + aesdec xmm1, xmm9 + aesdec xmm2, xmm9 + aesdec xmm3, xmm9 + aesdec xmm4, xmm9 + aesdec xmm1, xmm10 + aesdec xmm2, xmm10 + aesdec xmm3, xmm10 + aesdec xmm4, xmm10 + jb DLAST_4 + movdqa xmm9, 160[r8] + movdqa xmm10, 176[r8] + movdqa xmm11, 192[r8] + cmp r9d, 14 + aesdec xmm1, xmm9 + aesdec xmm2, xmm9 + aesdec xmm3, xmm9 + aesdec xmm4, xmm9 + aesdec xmm1, xmm10 + aesdec xmm2, xmm10 + aesdec xmm3, xmm10 + aesdec xmm4, xmm10 + jb DLAST_4 + movdqa xmm9, 192[r8] + movdqa xmm10, 208[r8] + movdqa xmm11, 224[r8] + aesdec xmm1, xmm9 + aesdec xmm2, xmm9 + aesdec xmm3, xmm9 + aesdec xmm4, xmm9 + aesdec xmm1, xmm10 + aesdec xmm2, xmm10 + aesdec xmm3, xmm10 + aesdec xmm4, xmm10 +DLAST_4: + add rdi, 64 + add rsi, 64 + dec rcx + aesdeclast xmm1, xmm11 + aesdeclast xmm2, xmm11 + aesdeclast xmm3, xmm11 + aesdeclast xmm4, xmm11 + pxor xmm1, xmm5 + pxor xmm2, xmm6 + pxor xmm3, xmm7 + pxor xmm4, xmm8 + movdqu [rsi], xmm1 + movdqu 16[rsi], xmm2 + movdqu 32[rsi], xmm3 + movdqu 48[rsi], xmm4 + movdqa xmm5, xmm15 + jne DLOOP_4 + add rsi, 64 +DREMAINDER_4: + cmp r10, 0 + je DEND_4 +DLOOP_4_2: + movdqu xmm1, [rdi] + movdqa xmm15, xmm1 + add rdi, 16 + pxor xmm1, [r8] + movdqu xmm2, 160[r8] + cmp r9d, 12 + aesdec xmm1, 16[r8] + aesdec xmm1, 32[r8] + aesdec xmm1, 48[r8] + aesdec xmm1, 64[r8] + aesdec xmm1, 80[r8] + aesdec xmm1, 96[r8] + aesdec xmm1, 112[r8] + aesdec xmm1, 128[r8] + aesdec xmm1, 144[r8] + jb DLAST_4_2 + movdqu xmm2, 192[r8] + cmp r9d, 14 + aesdec xmm1, 160[r8] + aesdec xmm1, 176[r8] + jb DLAST_4_2 + movdqu xmm2, 224[r8] + aesdec xmm1, 192[r8] + aesdec xmm1, 208[r8] +DLAST_4_2: + aesdeclast xmm1, xmm2 + pxor xmm1, xmm5 + movdqa xmm5, xmm15 + movdqu [rsi], xmm1 + add rsi, 16 + dec r10 + jne DLOOP_4_2 +DEND_4: + ; restore non volatile rdi,rsi + mov rdi, rax + mov rsi, r11 + ; restore non volatile xmms from stack + movdqa xmm6, [rsp+0] + movdqa xmm7, [rsp+16] + movdqa xmm8, [rsp+32] + movdqa xmm9, [rsp+48] + movdqa xmm10, [rsp+64] + movdqa xmm11, [rsp+80] + movdqa xmm12, [rsp+96] + movdqa xmm15, [rsp+112] + add rsp, 8+8*16 ; 8 = align stack , 8 xmm6-12,15 16 bytes each + ret +AES_CBC_decrypt_by4 ENDP + + +; void AES_CBC_decrypt_by6(const unsigned char *in, +; unsigned char *out, +; unsigned char ivec[16], +; unsigned long length, +; const unsigned char *KS, +; int nr) +AES_CBC_decrypt_by6 PROC +; parameter 1: rdi - in +; parameter 2: rsi - out +; parameter 3: rdx - ivec +; parameter 4: rcx - length +; parameter 5: r8 - KS +; parameter 6: r9d - nr + + ; save rdi and rsi to rax and r11, restore before ret + mov rax, rdi + mov r11, rsi + ; convert to what we had for att&t convention + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + mov rcx, r9 + mov r8, [rsp+40] + mov r9d, [rsp+48] + ; on microsoft xmm6-xmm15 are non volatile, + ; let's save on stack and restore at end + sub rsp, 8+9*16 ; 8 = align stack , 9 xmm6-14 16 bytes each + movdqa [rsp+0], xmm6 + movdqa [rsp+16], xmm7 + movdqa [rsp+32], xmm8 + movdqa [rsp+48], xmm9 + movdqa [rsp+64], xmm10 + movdqa [rsp+80], xmm11 + movdqa [rsp+96], xmm12 + movdqa [rsp+112], xmm13 + movdqa [rsp+128], xmm14 + ; back to our original code, more or less + mov r10, rcx + shr rcx, 4 + shl r10, 60 + je DNO_PARTS_6 + add rcx, 1 +DNO_PARTS_6: + mov r12, rax + mov r13, rdx + mov r14, rbx + mov rdx, 0 + mov rax, rcx + mov rbx, 6 + div rbx + mov rcx, rax + mov r10, rdx + mov rax, r12 + mov rdx, r13 + mov rbx, r14 + cmp rcx, 0 + movdqu xmm7, [rdx] + je DREMAINDER_6 + sub rsi, 96 +DLOOP_6: + movdqu xmm1, [rdi] + movdqu xmm2, 16[rdi] + movdqu xmm3, 32[rdi] + movdqu xmm4, 48[rdi] + movdqu xmm5, 64[rdi] + movdqu xmm6, 80[rdi] + movdqa xmm8, [r8] + movdqa xmm9, 16[r8] + movdqa xmm10, 32[r8] + movdqa xmm11, 48[r8] + pxor xmm1, xmm8 + pxor xmm2, xmm8 + pxor xmm3, xmm8 + pxor xmm4, xmm8 + pxor xmm5, xmm8 + pxor xmm6, xmm8 + aesdec xmm1, xmm9 + aesdec xmm2, xmm9 + aesdec xmm3, xmm9 + aesdec xmm4, xmm9 + aesdec xmm5, xmm9 + aesdec xmm6, xmm9 + aesdec xmm1, xmm10 + aesdec xmm2, xmm10 + aesdec xmm3, xmm10 + aesdec xmm4, xmm10 + aesdec xmm5, xmm10 + aesdec xmm6, xmm10 + aesdec xmm1, xmm11 + aesdec xmm2, xmm11 + aesdec xmm3, xmm11 + aesdec xmm4, xmm11 + aesdec xmm5, xmm11 + aesdec xmm6, xmm11 + movdqa xmm8, 64[r8] + movdqa xmm9, 80[r8] + movdqa xmm10, 96[r8] + movdqa xmm11, 112[r8] + aesdec xmm1, xmm8 + aesdec xmm2, xmm8 + aesdec xmm3, xmm8 + aesdec xmm4, xmm8 + aesdec xmm5, xmm8 + aesdec xmm6, xmm8 + aesdec xmm1, xmm9 + aesdec xmm2, xmm9 + aesdec xmm3, xmm9 + aesdec xmm4, xmm9 + aesdec xmm5, xmm9 + aesdec xmm6, xmm9 + aesdec xmm1, xmm10 + aesdec xmm2, xmm10 + aesdec xmm3, xmm10 + aesdec xmm4, xmm10 + aesdec xmm5, xmm10 + aesdec xmm6, xmm10 + aesdec xmm1, xmm11 + aesdec xmm2, xmm11 + aesdec xmm3, xmm11 + aesdec xmm4, xmm11 + aesdec xmm5, xmm11 + aesdec xmm6, xmm11 + movdqa xmm8, 128[r8] + movdqa xmm9, 144[r8] + movdqa xmm10, 160[r8] + cmp r9d, 12 + aesdec xmm1, xmm8 + aesdec xmm2, xmm8 + aesdec xmm3, xmm8 + aesdec xmm4, xmm8 + aesdec xmm5, xmm8 + aesdec xmm6, xmm8 + aesdec xmm1, xmm9 + aesdec xmm2, xmm9 + aesdec xmm3, xmm9 + aesdec xmm4, xmm9 + aesdec xmm5, xmm9 + aesdec xmm6, xmm9 + jb DLAST_6 + movdqa xmm8, 160[r8] + movdqa xmm9, 176[r8] + movdqa xmm10, 192[r8] + cmp r9d, 14 + aesdec xmm1, xmm8 + aesdec xmm2, xmm8 + aesdec xmm3, xmm8 + aesdec xmm4, xmm8 + aesdec xmm5, xmm8 + aesdec xmm6, xmm8 + aesdec xmm1, xmm9 + aesdec xmm2, xmm9 + aesdec xmm3, xmm9 + aesdec xmm4, xmm9 + aesdec xmm5, xmm9 + aesdec xmm6, xmm9 + jb DLAST_6 + movdqa xmm8, 192[r8] + movdqa xmm9, 208[r8] + movdqa xmm10, 224[r8] + aesdec xmm1, xmm8 + aesdec xmm2, xmm8 + aesdec xmm3, xmm8 + aesdec xmm4, xmm8 + aesdec xmm5, xmm8 + aesdec xmm6, xmm8 + aesdec xmm1, xmm9 + aesdec xmm2, xmm9 + aesdec xmm3, xmm9 + aesdec xmm4, xmm9 + aesdec xmm5, xmm9 + aesdec xmm6, xmm9 +DLAST_6: + add rsi, 96 + aesdeclast xmm1, xmm10 + aesdeclast xmm2, xmm10 + aesdeclast xmm3, xmm10 + aesdeclast xmm4, xmm10 + aesdeclast xmm5, xmm10 + aesdeclast xmm6, xmm10 + movdqu xmm8, [rdi] + movdqu xmm9, 16[rdi] + movdqu xmm10, 32[rdi] + movdqu xmm11, 48[rdi] + movdqu xmm12, 64[rdi] + movdqu xmm13, 80[rdi] + pxor xmm1, xmm7 + pxor xmm2, xmm8 + pxor xmm3, xmm9 + pxor xmm4, xmm10 + pxor xmm5, xmm11 + pxor xmm6, xmm12 + movdqu xmm7, xmm13 + movdqu [rsi], xmm1 + movdqu 16[rsi], xmm2 + movdqu 32[rsi], xmm3 + movdqu 48[rsi], xmm4 + movdqu 64[rsi], xmm5 + movdqu 80[rsi], xmm6 + add rdi, 96 + dec rcx + jne DLOOP_6 + add rsi, 96 +DREMAINDER_6: + cmp r10, 0 + je DEND_6 +DLOOP_6_2: + movdqu xmm1, [rdi] + movdqa xmm10, xmm1 + add rdi, 16 + pxor xmm1, [r8] + movdqu xmm2, 160[r8] + cmp r9d, 12 + aesdec xmm1, 16[r8] + aesdec xmm1, 32[r8] + aesdec xmm1, 48[r8] + aesdec xmm1, 64[r8] + aesdec xmm1, 80[r8] + aesdec xmm1, 96[r8] + aesdec xmm1, 112[r8] + aesdec xmm1, 128[r8] + aesdec xmm1, 144[r8] + jb DLAST_6_2 + movdqu xmm2, 192[r8] + cmp r9d, 14 + aesdec xmm1, 160[r8] + aesdec xmm1, 176[r8] + jb DLAST_6_2 + movdqu xmm2, 224[r8] + aesdec xmm1, 192[r8] + aesdec xmm1, 208[r8] +DLAST_6_2: + aesdeclast xmm1, xmm2 + pxor xmm1, xmm7 + movdqa xmm7, xmm10 + movdqu [rsi], xmm1 + add rsi, 16 + dec r10 + jne DLOOP_6_2 +DEND_6: + ; restore non volatile rdi,rsi + mov rdi, rax + mov rsi, r11 + ; restore non volatile xmms from stack + movdqa xmm6, [rsp+0] + movdqa xmm7, [rsp+16] + movdqa xmm8, [rsp+32] + movdqa xmm9, [rsp+48] + movdqa xmm10, [rsp+64] + movdqa xmm11, [rsp+80] + movdqa xmm12, [rsp+96] + movdqa xmm13, [rsp+112] + movdqa xmm14, [rsp+128] + add rsp, 8+9*16 ; 8 = align stack , 9 xmm6-14 16 bytes each + ret +AES_CBC_decrypt_by6 ENDP + + +; void AES_CBC_decrypt_by8(const unsigned char *in, +; unsigned char *out, +; unsigned char ivec[16], +; unsigned long length, +; const unsigned char *KS, +; int nr) +AES_CBC_decrypt_by8 PROC +; parameter 1: rdi - in +; parameter 2: rsi - out +; parameter 3: rdx - ivec +; parameter 4: rcx - length +; parameter 5: r8 - KS +; parameter 6: r9d - nr + + ; save rdi and rsi to rax and r11, restore before ret + mov rax, rdi + mov r11, rsi + ; convert to what we had for att&t convention + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + mov rcx,r9 + mov r8, [rsp+40] + mov r9d, [rsp+48] + ; on microsoft xmm6-xmm15 are non volatile, + ; let's save on stack and restore at end + sub rsp, 8+8*16 ; 8 = align stack , 8 xmm6-13 16 bytes each + movdqa [rsp+0], xmm6 + movdqa [rsp+16], xmm7 + movdqa [rsp+32], xmm8 + movdqa [rsp+48], xmm9 + movdqa [rsp+64], xmm10 + movdqa [rsp+80], xmm11 + movdqa [rsp+96], xmm12 + movdqa [rsp+112], xmm13 + ; back to our original code, more or less + mov r10, rcx + shr rcx, 4 + shl r10, 60 + je DNO_PARTS_8 + add rcx, 1 +DNO_PARTS_8: + mov r10, rcx + shl r10, 61 + shr r10, 61 + shr rcx, 3 + movdqu xmm9, [rdx] + je DREMAINDER_8 + sub rsi, 128 +DLOOP_8: + movdqu xmm1, [rdi] + movdqu xmm2, 16[rdi] + movdqu xmm3, 32[rdi] + movdqu xmm4, 48[rdi] + movdqu xmm5, 64[rdi] + movdqu xmm6, 80[rdi] + movdqu xmm7, 96[rdi] + movdqu xmm8, 112[rdi] + movdqa xmm10, [r8] + movdqa xmm11, 16[r8] + movdqa xmm12, 32[r8] + movdqa xmm13, 48[r8] + pxor xmm1, xmm10 + pxor xmm2, xmm10 + pxor xmm3, xmm10 + pxor xmm4, xmm10 + pxor xmm5, xmm10 + pxor xmm6, xmm10 + pxor xmm7, xmm10 + pxor xmm8, xmm10 + aesdec xmm1, xmm11 + aesdec xmm2, xmm11 + aesdec xmm3, xmm11 + aesdec xmm4, xmm11 + aesdec xmm5, xmm11 + aesdec xmm6, xmm11 + aesdec xmm7, xmm11 + aesdec xmm8, xmm11 + aesdec xmm1, xmm12 + aesdec xmm2, xmm12 + aesdec xmm3, xmm12 + aesdec xmm4, xmm12 + aesdec xmm5, xmm12 + aesdec xmm6, xmm12 + aesdec xmm7, xmm12 + aesdec xmm8, xmm12 + aesdec xmm1, xmm13 + aesdec xmm2, xmm13 + aesdec xmm3, xmm13 + aesdec xmm4, xmm13 + aesdec xmm5, xmm13 + aesdec xmm6, xmm13 + aesdec xmm7, xmm13 + aesdec xmm8, xmm13 + movdqa xmm10, 64[r8] + movdqa xmm11, 80[r8] + movdqa xmm12, 96[r8] + movdqa xmm13, 112[r8] + aesdec xmm1, xmm10 + aesdec xmm2, xmm10 + aesdec xmm3, xmm10 + aesdec xmm4, xmm10 + aesdec xmm5, xmm10 + aesdec xmm6, xmm10 + aesdec xmm7, xmm10 + aesdec xmm8, xmm10 + aesdec xmm1, xmm11 + aesdec xmm2, xmm11 + aesdec xmm3, xmm11 + aesdec xmm4, xmm11 + aesdec xmm5, xmm11 + aesdec xmm6, xmm11 + aesdec xmm7, xmm11 + aesdec xmm8, xmm11 + aesdec xmm1, xmm12 + aesdec xmm2, xmm12 + aesdec xmm3, xmm12 + aesdec xmm4, xmm12 + aesdec xmm5, xmm12 + aesdec xmm6, xmm12 + aesdec xmm7, xmm12 + aesdec xmm8, xmm12 + aesdec xmm1, xmm13 + aesdec xmm2, xmm13 + aesdec xmm3, xmm13 + aesdec xmm4, xmm13 + aesdec xmm5, xmm13 + aesdec xmm6, xmm13 + aesdec xmm7, xmm13 + aesdec xmm8, xmm13 + movdqa xmm10, 128[r8] + movdqa xmm11, 144[r8] + movdqa xmm12, 160[r8] + cmp r9d, 12 + aesdec xmm1, xmm10 + aesdec xmm2, xmm10 + aesdec xmm3, xmm10 + aesdec xmm4, xmm10 + aesdec xmm5, xmm10 + aesdec xmm6, xmm10 + aesdec xmm7, xmm10 + aesdec xmm8, xmm10 + aesdec xmm1, xmm11 + aesdec xmm2, xmm11 + aesdec xmm3, xmm11 + aesdec xmm4, xmm11 + aesdec xmm5, xmm11 + aesdec xmm6, xmm11 + aesdec xmm7, xmm11 + aesdec xmm8, xmm11 + jb DLAST_8 + movdqa xmm10, 160[r8] + movdqa xmm11, 176[r8] + movdqa xmm12, 192[r8] + cmp r9d, 14 + aesdec xmm1, xmm10 + aesdec xmm2, xmm10 + aesdec xmm3, xmm10 + aesdec xmm4, xmm10 + aesdec xmm5, xmm10 + aesdec xmm6, xmm10 + aesdec xmm7, xmm10 + aesdec xmm8, xmm10 + aesdec xmm1, xmm11 + aesdec xmm2, xmm11 + aesdec xmm3, xmm11 + aesdec xmm4, xmm11 + aesdec xmm5, xmm11 + aesdec xmm6, xmm11 + aesdec xmm7, xmm11 + aesdec xmm8, xmm11 + jb DLAST_8 + movdqa xmm10, 192[r8] + movdqa xmm11, 208[r8] + movdqa xmm12, 224[r8] + aesdec xmm1, xmm10 + aesdec xmm2, xmm10 + aesdec xmm3, xmm10 + aesdec xmm4, xmm10 + aesdec xmm5, xmm10 + aesdec xmm6, xmm10 + aesdec xmm7, xmm10 + aesdec xmm8, xmm10 + aesdec xmm1, xmm11 + aesdec xmm2, xmm11 + aesdec xmm3, xmm11 + aesdec xmm4, xmm11 + aesdec xmm5, xmm11 + aesdec xmm6, xmm11 + aesdec xmm7, xmm11 + aesdec xmm8, xmm11 +DLAST_8: + add rsi, 128 + aesdeclast xmm1, xmm12 + aesdeclast xmm2, xmm12 + aesdeclast xmm3, xmm12 + aesdeclast xmm4, xmm12 + aesdeclast xmm5, xmm12 + aesdeclast xmm6, xmm12 + aesdeclast xmm7, xmm12 + aesdeclast xmm8, xmm12 + movdqu xmm10, [rdi] + movdqu xmm11, 16[rdi] + movdqu xmm12, 32[rdi] + movdqu xmm13, 48[rdi] + pxor xmm1, xmm9 + pxor xmm2, xmm10 + pxor xmm3, xmm11 + pxor xmm4, xmm12 + pxor xmm5, xmm13 + movdqu xmm10, 64[rdi] + movdqu xmm11, 80[rdi] + movdqu xmm12, 96[rdi] + movdqu xmm9, 112[rdi] + pxor xmm6, xmm10 + pxor xmm7, xmm11 + pxor xmm8, xmm12 + movdqu [rsi], xmm1 + movdqu 16[rsi], xmm2 + movdqu 32[rsi], xmm3 + movdqu 48[rsi], xmm4 + movdqu 64[rsi], xmm5 + movdqu 80[rsi], xmm6 + movdqu 96[rsi], xmm7 + movdqu 112[rsi], xmm8 + add rdi, 128 + dec rcx + jne DLOOP_8 + add rsi, 128 +DREMAINDER_8: + cmp r10, 0 + je DEND_8 +DLOOP_8_2: + movdqu xmm1, [rdi] + movdqa xmm10, xmm1 + add rdi, 16 + pxor xmm1, [r8] + movdqu xmm2, 160[r8] + cmp r9d, 12 + aesdec xmm1, 16[r8] + aesdec xmm1, 32[r8] + aesdec xmm1, 48[r8] + aesdec xmm1, 64[r8] + aesdec xmm1, 80[r8] + aesdec xmm1, 96[r8] + aesdec xmm1, 112[r8] + aesdec xmm1, 128[r8] + aesdec xmm1, 144[r8] + jb DLAST_8_2 + movdqu xmm2, 192[r8] + cmp r9d, 14 + aesdec xmm1, 160[r8] + aesdec xmm1, 176[r8] + jb DLAST_8_2 + movdqu xmm2, 224[r8] + aesdec xmm1, 192[r8] + aesdec xmm1, 208[r8] +DLAST_8_2: + aesdeclast xmm1, xmm2 + pxor xmm1, xmm9 + movdqa xmm9, xmm10 + movdqu [rsi], xmm1 + add rsi, 16 + dec r10 + jne DLOOP_8_2 +DEND_8: + ; restore non volatile rdi,rsi + mov rdi, rax + mov rsi, r11 + ; restore non volatile xmms from stack + movdqa xmm6, [rsp+0] + movdqa xmm7, [rsp+16] + movdqa xmm8, [rsp+32] + movdqa xmm9, [rsp+48] + movdqa xmm10, [rsp+64] + movdqa xmm11, [rsp+80] + movdqa xmm12, [rsp+96] + movdqa xmm13, [rsp+112] + add rsp, 8+8*16 ; 8 = align stack , 8 xmm6-13 16 bytes each + ret +AES_CBC_decrypt_by8 ENDP + + +; /* +; AES_ECB_encrypt[const ,unsigned char*in +; unsigned ,char*out +; unsigned ,long length +; const ,unsigned char*KS +; int nr] +; */ +; . globl AES_ECB_encrypt +AES_ECB_encrypt PROC +;# parameter 1: rdi +;# parameter 2: rsi +;# parameter 3: rdx +;# parameter 4: rcx +;# parameter 5: r8d + +; save rdi and rsi to rax and r11, restore before ret + mov rax,rdi + mov r11,rsi + +; convert to what we had for att&t convention + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8d,[rsp+40] + +; on microsoft xmm6-xmm15 are non volaitle, let's save on stack and restore at end + sub rsp,8+4*16 ; 8 = align stack , 4 xmm9-12, 16 bytes each + movdqa [rsp+0], xmm9 + movdqa [rsp+16], xmm10 + movdqa [rsp+32], xmm11 + movdqa [rsp+48], xmm12 + + + mov r10,rdx + shr rdx,4 + shl r10,60 + je EECB_NO_PARTS_4 + add rdx,1 +EECB_NO_PARTS_4: + mov r10,rdx + shl r10,62 + shr r10,62 + shr rdx,2 + je EECB_REMAINDER_4 + sub rsi,64 +EECB_LOOP_4: + movdqu xmm1,[rdi] + movdqu xmm2,16[rdi] + movdqu xmm3,32[rdi] + movdqu xmm4,48[rdi] + movdqa xmm9,[rcx] + movdqa xmm10,16[rcx] + movdqa xmm11,32[rcx] + movdqa xmm12,48[rcx] + pxor xmm1,xmm9 + pxor xmm2,xmm9 + pxor xmm3,xmm9 + pxor xmm4,xmm9 + aesenc xmm1,xmm10 + aesenc xmm2,xmm10 + aesenc xmm3,xmm10 + aesenc xmm4,xmm10 + aesenc xmm1,xmm11 + aesenc xmm2,xmm11 + aesenc xmm3,xmm11 + aesenc xmm4,xmm11 + aesenc xmm1,xmm12 + aesenc xmm2,xmm12 + aesenc xmm3,xmm12 + aesenc xmm4,xmm12 + movdqa xmm9,64[rcx] + movdqa xmm10,80[rcx] + movdqa xmm11,96[rcx] + movdqa xmm12,112[rcx] + aesenc xmm1,xmm9 + aesenc xmm2,xmm9 + aesenc xmm3,xmm9 + aesenc xmm4,xmm9 + aesenc xmm1,xmm10 + aesenc xmm2,xmm10 + aesenc xmm3,xmm10 + aesenc xmm4,xmm10 + aesenc xmm1,xmm11 + aesenc xmm2,xmm11 + aesenc xmm3,xmm11 + aesenc xmm4,xmm11 + aesenc xmm1,xmm12 + aesenc xmm2,xmm12 + aesenc xmm3,xmm12 + aesenc xmm4,xmm12 + movdqa xmm9,128[rcx] + movdqa xmm10,144[rcx] + movdqa xmm11,160[rcx] + cmp r8d,12 + aesenc xmm1,xmm9 + aesenc xmm2,xmm9 + aesenc xmm3,xmm9 + aesenc xmm4,xmm9 + aesenc xmm1,xmm10 + aesenc xmm2,xmm10 + aesenc xmm3,xmm10 + aesenc xmm4,xmm10 + jb EECB_LAST_4 + movdqa xmm9,160[rcx] + movdqa xmm10,176[rcx] + movdqa xmm11,192[rcx] + cmp r8d,14 + aesenc xmm1,xmm9 + aesenc xmm2,xmm9 + aesenc xmm3,xmm9 + aesenc xmm4,xmm9 + aesenc xmm1,xmm10 + aesenc xmm2,xmm10 + aesenc xmm3,xmm10 + aesenc xmm4,xmm10 + jb EECB_LAST_4 + movdqa xmm9,192[rcx] + movdqa xmm10,208[rcx] + movdqa xmm11,224[rcx] + aesenc xmm1,xmm9 + aesenc xmm2,xmm9 + aesenc xmm3,xmm9 + aesenc xmm4,xmm9 + aesenc xmm1,xmm10 + aesenc xmm2,xmm10 + aesenc xmm3,xmm10 + aesenc xmm4,xmm10 +EECB_LAST_4: + add rdi,64 + add rsi,64 + dec rdx + aesenclast xmm1,xmm11 + aesenclast xmm2,xmm11 + aesenclast xmm3,xmm11 + aesenclast xmm4,xmm11 + movdqu [rsi],xmm1 + movdqu 16[rsi],xmm2 + movdqu 32[rsi],xmm3 + movdqu 48[rsi],xmm4 + jne EECB_LOOP_4 + add rsi,64 +EECB_REMAINDER_4: + cmp r10,0 + je EECB_END_4 +EECB_LOOP_4_2: + movdqu xmm1,[rdi] + add rdi,16 + pxor xmm1,[rcx] + movdqu xmm2,160[rcx] + aesenc xmm1,16[rcx] + aesenc xmm1,32[rcx] + aesenc xmm1,48[rcx] + aesenc xmm1,64[rcx] + aesenc xmm1,80[rcx] + aesenc xmm1,96[rcx] + aesenc xmm1,112[rcx] + aesenc xmm1,128[rcx] + aesenc xmm1,144[rcx] + cmp r8d,12 + jb EECB_LAST_4_2 + movdqu xmm2,192[rcx] + aesenc xmm1,160[rcx] + aesenc xmm1,176[rcx] + cmp r8d,14 + jb EECB_LAST_4_2 + movdqu xmm2,224[rcx] + aesenc xmm1,192[rcx] + aesenc xmm1,208[rcx] +EECB_LAST_4_2: + aesenclast xmm1,xmm2 + movdqu [rsi],xmm1 + add rsi,16 + dec r10 + jne EECB_LOOP_4_2 +EECB_END_4: + ; restore non volatile rdi,rsi + mov rdi,rax + mov rsi,r11 + ; restore non volatile xmms from stack + movdqa xmm9, [rsp+0] + movdqa xmm10, [rsp+16] + movdqa xmm11, [rsp+32] + movdqa xmm12, [rsp+48] + add rsp,8+4*16 ; 8 = align stack , 4 xmm9-12 16 bytes each + ret +AES_ECB_encrypt ENDP + +; /* +; AES_ECB_decrypt[const ,unsigned char*in +; unsigned ,char*out +; unsigned ,long length +; const ,unsigned char*KS +; int nr] +; */ +; . globl AES_ECB_decrypt +AES_ECB_decrypt PROC +;# parameter 1: rdi +;# parameter 2: rsi +;# parameter 3: rdx +;# parameter 4: rcx +;# parameter 5: r8d + +; save rdi and rsi to rax and r11, restore before ret + mov rax,rdi + mov r11,rsi + +; convert to what we had for att&t convention + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8d,[rsp+40] + +; on microsoft xmm6-xmm15 are non volaitle, let's save on stack and restore at end + sub rsp,8+4*16 ; 8 = align stack , 4 xmm9-12, 16 bytes each + movdqa [rsp+0], xmm9 + movdqa [rsp+16], xmm10 + movdqa [rsp+32], xmm11 + movdqa [rsp+48], xmm12 + + mov r10,rdx + shr rdx,4 + shl r10,60 + je DECB_NO_PARTS_4 + add rdx,1 +DECB_NO_PARTS_4: + mov r10,rdx + shl r10,62 + shr r10,62 + shr rdx,2 + je DECB_REMAINDER_4 + sub rsi,64 +DECB_LOOP_4: + movdqu xmm1,[rdi] + movdqu xmm2,16[rdi] + movdqu xmm3,32[rdi] + movdqu xmm4,48[rdi] + movdqa xmm9,[rcx] + movdqa xmm10,16[rcx] + movdqa xmm11,32[rcx] + movdqa xmm12,48[rcx] + pxor xmm1,xmm9 + pxor xmm2,xmm9 + pxor xmm3,xmm9 + pxor xmm4,xmm9 + aesdec xmm1,xmm10 + aesdec xmm2,xmm10 + aesdec xmm3,xmm10 + aesdec xmm4,xmm10 + aesdec xmm1,xmm11 + aesdec xmm2,xmm11 + aesdec xmm3,xmm11 + aesdec xmm4,xmm11 + aesdec xmm1,xmm12 + aesdec xmm2,xmm12 + aesdec xmm3,xmm12 + aesdec xmm4,xmm12 + movdqa xmm9,64[rcx] + movdqa xmm10,80[rcx] + movdqa xmm11,96[rcx] + movdqa xmm12,112[rcx] + aesdec xmm1,xmm9 + aesdec xmm2,xmm9 + aesdec xmm3,xmm9 + aesdec xmm4,xmm9 + aesdec xmm1,xmm10 + aesdec xmm2,xmm10 + aesdec xmm3,xmm10 + aesdec xmm4,xmm10 + aesdec xmm1,xmm11 + aesdec xmm2,xmm11 + aesdec xmm3,xmm11 + aesdec xmm4,xmm11 + aesdec xmm1,xmm12 + aesdec xmm2,xmm12 + aesdec xmm3,xmm12 + aesdec xmm4,xmm12 + movdqa xmm9,128[rcx] + movdqa xmm10,144[rcx] + movdqa xmm11,160[rcx] + cmp r8d,12 + aesdec xmm1,xmm9 + aesdec xmm2,xmm9 + aesdec xmm3,xmm9 + aesdec xmm4,xmm9 + aesdec xmm1,xmm10 + aesdec xmm2,xmm10 + aesdec xmm3,xmm10 + aesdec xmm4,xmm10 + jb DECB_LAST_4 + movdqa xmm9,160[rcx] + movdqa xmm10,176[rcx] + movdqa xmm11,192[rcx] + cmp r8d,14 + aesdec xmm1,xmm9 + aesdec xmm2,xmm9 + aesdec xmm3,xmm9 + aesdec xmm4,xmm9 + aesdec xmm1,xmm10 + aesdec xmm2,xmm10 + aesdec xmm3,xmm10 + aesdec xmm4,xmm10 + jb DECB_LAST_4 + movdqa xmm9,192[rcx] + movdqa xmm10,208[rcx] + movdqa xmm11,224[rcx] + aesdec xmm1,xmm9 + aesdec xmm2,xmm9 + aesdec xmm3,xmm9 + aesdec xmm4,xmm9 + aesdec xmm1,xmm10 + aesdec xmm2,xmm10 + aesdec xmm3,xmm10 + aesdec xmm4,xmm10 +DECB_LAST_4: + add rdi,64 + add rsi,64 + dec rdx + aesdeclast xmm1,xmm11 + aesdeclast xmm2,xmm11 + aesdeclast xmm3,xmm11 + aesdeclast xmm4,xmm11 + movdqu [rsi],xmm1 + movdqu 16[rsi],xmm2 + movdqu 32[rsi],xmm3 + movdqu 48[rsi],xmm4 + jne DECB_LOOP_4 + add rsi,64 +DECB_REMAINDER_4: + cmp r10,0 + je DECB_END_4 +DECB_LOOP_4_2: + movdqu xmm1,[rdi] + add rdi,16 + pxor xmm1,[rcx] + movdqu xmm2,160[rcx] + cmp r8d,12 + aesdec xmm1,16[rcx] + aesdec xmm1,32[rcx] + aesdec xmm1,48[rcx] + aesdec xmm1,64[rcx] + aesdec xmm1,80[rcx] + aesdec xmm1,96[rcx] + aesdec xmm1,112[rcx] + aesdec xmm1,128[rcx] + aesdec xmm1,144[rcx] + jb DECB_LAST_4_2 + cmp r8d,14 + movdqu xmm2,192[rcx] + aesdec xmm1,160[rcx] + aesdec xmm1,176[rcx] + jb DECB_LAST_4_2 + movdqu xmm2,224[rcx] + aesdec xmm1,192[rcx] + aesdec xmm1,208[rcx] +DECB_LAST_4_2: + aesdeclast xmm1,xmm2 + movdqu [rsi],xmm1 + add rsi,16 + dec r10 + jne DECB_LOOP_4_2 +DECB_END_4: + ; restore non volatile rdi,rsi + mov rdi,rax + mov rsi,r11 + ; restore non volatile xmms from stack + movdqa xmm9, [rsp+0] + movdqa xmm10, [rsp+16] + movdqa xmm11, [rsp+32] + movdqa xmm12, [rsp+48] + add rsp,8+4*16 ; 8 = align stack , 4 xmm9-12 16 bytes each + ret +AES_ECB_decrypt ENDP + + + +; /* +; void ,AES_128_Key_Expansion[const unsigned char*userkey +; unsigned char*key_schedule]/ +; */ +; . align 16,0x90 +; . globl AES_128_Key_Expansion +AES_128_Key_Expansion PROC +;# parameter 1: rdi +;# parameter 2: rsi + +; save rdi and rsi to rax and r11, restore before ret + mov rax,rdi + mov r11,rsi + +; convert to what we had for att&t convention + mov rdi,rcx + mov rsi,rdx + + mov dword ptr 240[rsi],10 + + movdqu xmm1,[rdi] + movdqa [rsi],xmm1 + + +ASSISTS: + aeskeygenassist xmm2,xmm1,1 + call PREPARE_ROUNDKEY_128 + movdqa 16[rsi],xmm1 + + aeskeygenassist xmm2,xmm1,2 + call PREPARE_ROUNDKEY_128 + movdqa 32[rsi],xmm1 + + aeskeygenassist xmm2,xmm1,4 + call PREPARE_ROUNDKEY_128 + movdqa 48[rsi],xmm1 + + aeskeygenassist xmm2,xmm1,8 + call PREPARE_ROUNDKEY_128 + movdqa 64[rsi],xmm1 + + aeskeygenassist xmm2,xmm1,16 + call PREPARE_ROUNDKEY_128 + movdqa 80[rsi],xmm1 + + aeskeygenassist xmm2,xmm1,32 + call PREPARE_ROUNDKEY_128 + movdqa 96[rsi],xmm1 + + aeskeygenassist xmm2,xmm1,64 + call PREPARE_ROUNDKEY_128 + movdqa 112[rsi],xmm1 + aeskeygenassist xmm2,xmm1,80h + call PREPARE_ROUNDKEY_128 + movdqa 128[rsi],xmm1 + aeskeygenassist xmm2,xmm1,1bh + call PREPARE_ROUNDKEY_128 + movdqa 144[rsi],xmm1 + aeskeygenassist xmm2,xmm1,36h + call PREPARE_ROUNDKEY_128 + movdqa 160[rsi],xmm1 + ; restore non volatile rdi,rsi + mov rdi,rax + mov rsi,r11 + ret + +PREPARE_ROUNDKEY_128: + pshufd xmm2,xmm2,255 + movdqa xmm3,xmm1 + pslldq xmm3,4 + pxor xmm1,xmm3 + pslldq xmm3,4 + pxor xmm1,xmm3 + pslldq xmm3,4 + pxor xmm1,xmm3 + pxor xmm1,xmm2 + ret +AES_128_Key_Expansion ENDP + +; /* +; void ,AES_192_Key_Expansion[const unsigned char*userkey +; unsigned char*key] +; */ +; . globl AES_192_Key_Expansion +AES_192_Key_Expansion PROC +;# parameter 1: rdi +;# parameter 2: rsi + +; save rdi and rsi to rax and r11, restore before ret + mov rax,rdi + mov r11,rsi + +; convert to what we had for att&t convention + mov rdi,rcx + mov rsi,rdx + +; on microsoft xmm6-xmm15 are non volaitle, let's save on stack and restore at end + sub rsp,8+1*16 ; 8 = align stack , 1 xmm6, 16 bytes each + movdqa [rsp+0], xmm6 + + movdqu xmm1,[rdi] + movq xmm3,qword ptr 16[rdi] + movdqa [rsi],xmm1 + movdqa xmm5,xmm3 + + aeskeygenassist xmm2,xmm3,1h + call PREPARE_ROUNDKEY_192 + shufpd xmm5,xmm1,0 + movdqa 16[rsi],xmm5 + movdqa xmm6,xmm1 + shufpd xmm6,xmm3,1 + movdqa 32[rsi],xmm6 + + aeskeygenassist xmm2,xmm3,2h + call PREPARE_ROUNDKEY_192 + movdqa 48[rsi],xmm1 + movdqa xmm5,xmm3 + + aeskeygenassist xmm2,xmm3,4h + call PREPARE_ROUNDKEY_192 + shufpd xmm5,xmm1,0 + movdqa 64[rsi],xmm5 + movdqa xmm6,xmm1 + shufpd xmm6,xmm3,1 + movdqa 80[rsi],xmm6 + + aeskeygenassist xmm2,xmm3,8h + call PREPARE_ROUNDKEY_192 + movdqa 96[rsi],xmm1 + movdqa xmm5,xmm3 + + aeskeygenassist xmm2,xmm3,10h + call PREPARE_ROUNDKEY_192 + shufpd xmm5,xmm1,0 + movdqa 112[rsi],xmm5 + movdqa xmm6,xmm1 + shufpd xmm6,xmm3,1 + movdqa 128[rsi],xmm6 + + aeskeygenassist xmm2,xmm3,20h + call PREPARE_ROUNDKEY_192 + movdqa 144[rsi],xmm1 + movdqa xmm5,xmm3 + + aeskeygenassist xmm2,xmm3,40h + call PREPARE_ROUNDKEY_192 + shufpd xmm5,xmm1,0 + movdqa 160[rsi],xmm5 + movdqa xmm6,xmm1 + shufpd xmm6,xmm3,1 + movdqa 176[rsi],xmm6 + + aeskeygenassist xmm2,xmm3,80h + call PREPARE_ROUNDKEY_192 + movdqa 192[rsi],xmm1 + movdqa 208[rsi],xmm3 + ; restore non volatile rdi,rsi + mov rdi,rax + mov rsi,r11 +; restore non volatile xmms from stack + movdqa xmm6, [rsp+0] + add rsp,8+1*16 ; 8 = align stack , 1 xmm6 16 bytes each + ret + +PREPARE_ROUNDKEY_192: + pshufd xmm2,xmm2,55h + movdqu xmm4,xmm1 + pslldq xmm4,4 + pxor xmm1,xmm4 + + pslldq xmm4,4 + pxor xmm1,xmm4 + pslldq xmm4,4 + pxor xmm1,xmm4 + pxor xmm1,xmm2 + pshufd xmm2,xmm1,0ffh + movdqu xmm4,xmm3 + pslldq xmm4,4 + pxor xmm3,xmm4 + pxor xmm3,xmm2 + ret +AES_192_Key_Expansion ENDP + +; /* +; void ,AES_256_Key_Expansion[const unsigned char*userkey +; unsigned char*key] +; */ +; . globl AES_256_Key_Expansion +AES_256_Key_Expansion PROC +;# parameter 1: rdi +;# parameter 2: rsi + +; save rdi and rsi to rax and r11, restore before ret + mov rax,rdi + mov r11,rsi + +; convert to what we had for att&t convention + mov rdi,rcx + mov rsi,rdx + + movdqu xmm1,[rdi] + movdqu xmm3,16[rdi] + movdqa [rsi],xmm1 + movdqa 16[rsi],xmm3 + + aeskeygenassist xmm2,xmm3,1h + call MAKE_RK256_a + movdqa 32[rsi],xmm1 + aeskeygenassist xmm2,xmm1,0h + call MAKE_RK256_b + movdqa 48[rsi],xmm3 + aeskeygenassist xmm2,xmm3,2h + call MAKE_RK256_a + movdqa 64[rsi],xmm1 + aeskeygenassist xmm2,xmm1,0h + call MAKE_RK256_b + movdqa 80[rsi],xmm3 + aeskeygenassist xmm2,xmm3,4h + call MAKE_RK256_a + movdqa 96[rsi],xmm1 + aeskeygenassist xmm2,xmm1,0h + call MAKE_RK256_b + movdqa 112[rsi],xmm3 + aeskeygenassist xmm2,xmm3,8h + call MAKE_RK256_a + movdqa 128[rsi],xmm1 + aeskeygenassist xmm2,xmm1,0h + call MAKE_RK256_b + movdqa 144[rsi],xmm3 + aeskeygenassist xmm2,xmm3,10h + call MAKE_RK256_a + movdqa 160[rsi],xmm1 + aeskeygenassist xmm2,xmm1,0h + call MAKE_RK256_b + movdqa 176[rsi],xmm3 + aeskeygenassist xmm2,xmm3,20h + call MAKE_RK256_a + movdqa 192[rsi],xmm1 + + aeskeygenassist xmm2,xmm1,0h + call MAKE_RK256_b + movdqa 208[rsi],xmm3 + aeskeygenassist xmm2,xmm3,40h + call MAKE_RK256_a + movdqa 224[rsi],xmm1 + + ; restore non volatile rdi,rsi + mov rdi,rax + mov rsi,r11 + ret +AES_256_Key_Expansion ENDP + +MAKE_RK256_a: + pshufd xmm2,xmm2,0ffh + movdqa xmm4,xmm1 + pslldq xmm4,4 + pxor xmm1,xmm4 + pslldq xmm4,4 + pxor xmm1,xmm4 + pslldq xmm4,4 + pxor xmm1,xmm4 + pxor xmm1,xmm2 + ret + +MAKE_RK256_b: + pshufd xmm2,xmm2,0aah + movdqa xmm4,xmm3 + pslldq xmm4,4 + pxor xmm3,xmm4 + pslldq xmm4,4 + pxor xmm3,xmm4 + pslldq xmm4,4 + pxor xmm3,xmm4 + pxor xmm3,xmm2 + ret + + +IF fips_version GE 2 + fipsAh ENDS +ELSE + _text ENDS +ENDIF + +END diff --git a/client/wolfssl/wolfcrypt/src/aes_gcm_asm.S b/client/wolfssl/wolfcrypt/src/aes_gcm_asm.S new file mode 100644 index 0000000..e878690 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/aes_gcm_asm.S @@ -0,0 +1,8733 @@ +/* aes_gcm_asm + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#ifndef HAVE_INTEL_AVX1 +#define HAVE_INTEL_AVX1 +#endif /* HAVE_INTEL_AVX1 */ +#ifndef NO_AVX2_SUPPORT +#define HAVE_INTEL_AVX2 +#endif /* NO_AVX2_SUPPORT */ + +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_aes_gcm_one: +.quad 0x0, 0x1 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_aes_gcm_two: +.quad 0x0, 0x2 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_aes_gcm_three: +.quad 0x0, 0x3 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_aes_gcm_four: +.quad 0x0, 0x4 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_aes_gcm_five: +.quad 0x0, 0x5 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_aes_gcm_six: +.quad 0x0, 0x6 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_aes_gcm_seven: +.quad 0x0, 0x7 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_aes_gcm_eight: +.quad 0x0, 0x8 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_aes_gcm_bswap_epi64: +.quad 0x1020304050607, 0x8090a0b0c0d0e0f +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_aes_gcm_bswap_mask: +.quad 0x8090a0b0c0d0e0f, 0x1020304050607 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_aes_gcm_mod2_128: +.quad 0x1, 0xc200000000000000 +#ifndef __APPLE__ +.text +.globl AES_GCM_encrypt +.type AES_GCM_encrypt,@function +.align 4 +AES_GCM_encrypt: +#else +.section __TEXT,__text +.globl _AES_GCM_encrypt +.p2align 2 +_AES_GCM_encrypt: +#endif /* __APPLE__ */ + pushq %r13 + pushq %r12 + pushq %rbx + pushq %r14 + pushq %r15 + movq %rdx, %r12 + movq %rcx, %rax + movl 48(%rsp), %r11d + movl 56(%rsp), %ebx + movl 64(%rsp), %r14d + movq 72(%rsp), %r15 + movl 80(%rsp), %r10d + subq $0xa0, %rsp + pxor %xmm4, %xmm4 + pxor %xmm6, %xmm6 + cmpl $12, %ebx + movl %ebx, %edx + jne L_AES_GCM_encrypt_iv_not_12 + # # Calculate values when IV is 12 bytes + # Set counter based on IV + movl $0x1000000, %ecx + pinsrq $0x00, (%rax), %xmm4 + pinsrd $2, 8(%rax), %xmm4 + pinsrd $3, %ecx, %xmm4 + # H = Encrypt X(=0) and T = Encrypt counter + movdqa %xmm4, %xmm1 + movdqa (%r15), %xmm5 + pxor %xmm5, %xmm1 + movdqa 16(%r15), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 32(%r15), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 48(%r15), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 64(%r15), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 80(%r15), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 96(%r15), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 112(%r15), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 128(%r15), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 144(%r15), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + cmpl $11, %r10d + movdqa 160(%r15), %xmm7 + jl L_AES_GCM_encrypt_calc_iv_12_last + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 176(%r15), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + cmpl $13, %r10d + movdqa 192(%r15), %xmm7 + jl L_AES_GCM_encrypt_calc_iv_12_last + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 208(%r15), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 224(%r15), %xmm7 +L_AES_GCM_encrypt_calc_iv_12_last: + aesenclast %xmm7, %xmm5 + aesenclast %xmm7, %xmm1 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm5 + movdqa %xmm1, 144(%rsp) + jmp L_AES_GCM_encrypt_iv_done +L_AES_GCM_encrypt_iv_not_12: + # Calculate values when IV is not 12 bytes + # H = Encrypt X(=0) + movdqa (%r15), %xmm5 + aesenc 16(%r15), %xmm5 + aesenc 32(%r15), %xmm5 + aesenc 48(%r15), %xmm5 + aesenc 64(%r15), %xmm5 + aesenc 80(%r15), %xmm5 + aesenc 96(%r15), %xmm5 + aesenc 112(%r15), %xmm5 + aesenc 128(%r15), %xmm5 + aesenc 144(%r15), %xmm5 + cmpl $11, %r10d + movdqa 160(%r15), %xmm9 + jl L_AES_GCM_encrypt_calc_iv_1_aesenc_avx_last + aesenc %xmm9, %xmm5 + aesenc 176(%r15), %xmm5 + cmpl $13, %r10d + movdqa 192(%r15), %xmm9 + jl L_AES_GCM_encrypt_calc_iv_1_aesenc_avx_last + aesenc %xmm9, %xmm5 + aesenc 208(%r15), %xmm5 + movdqa 224(%r15), %xmm9 +L_AES_GCM_encrypt_calc_iv_1_aesenc_avx_last: + aesenclast %xmm9, %xmm5 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm5 + # Calc counter + # Initialization vector + cmpl $0x00, %edx + movq $0x00, %rcx + je L_AES_GCM_encrypt_calc_iv_done + cmpl $16, %edx + jl L_AES_GCM_encrypt_calc_iv_lt16 + andl $0xfffffff0, %edx +L_AES_GCM_encrypt_calc_iv_16_loop: + movdqu (%rax,%rcx,1), %xmm8 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm8 + pxor %xmm8, %xmm4 + pshufd $0x4e, %xmm4, %xmm1 + pshufd $0x4e, %xmm5, %xmm2 + movdqa %xmm5, %xmm3 + movdqa %xmm5, %xmm0 + pclmulqdq $0x11, %xmm4, %xmm3 + pclmulqdq $0x00, %xmm4, %xmm0 + pxor %xmm4, %xmm1 + pxor %xmm5, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + movdqa %xmm0, %xmm7 + movdqa %xmm3, %xmm4 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm7 + pxor %xmm1, %xmm4 + movdqa %xmm7, %xmm0 + movdqa %xmm4, %xmm1 + psrld $31, %xmm0 + psrld $31, %xmm1 + pslld $0x01, %xmm7 + pslld $0x01, %xmm4 + movdqa %xmm0, %xmm2 + pslldq $4, %xmm0 + psrldq $12, %xmm2 + pslldq $4, %xmm1 + por %xmm2, %xmm4 + por %xmm0, %xmm7 + por %xmm1, %xmm4 + movdqa %xmm7, %xmm0 + movdqa %xmm7, %xmm1 + movdqa %xmm7, %xmm2 + pslld $31, %xmm0 + pslld $30, %xmm1 + pslld $25, %xmm2 + pxor %xmm1, %xmm0 + pxor %xmm2, %xmm0 + movdqa %xmm0, %xmm1 + psrldq $4, %xmm1 + pslldq $12, %xmm0 + pxor %xmm0, %xmm7 + movdqa %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + psrld $0x01, %xmm2 + psrld $2, %xmm3 + psrld $7, %xmm0 + pxor %xmm3, %xmm2 + pxor %xmm0, %xmm2 + pxor %xmm1, %xmm2 + pxor %xmm7, %xmm2 + pxor %xmm2, %xmm4 + addl $16, %ecx + cmpl %edx, %ecx + jl L_AES_GCM_encrypt_calc_iv_16_loop + movl %ebx, %edx + cmpl %edx, %ecx + je L_AES_GCM_encrypt_calc_iv_done +L_AES_GCM_encrypt_calc_iv_lt16: + subq $16, %rsp + pxor %xmm8, %xmm8 + xorl %ebx, %ebx + movdqa %xmm8, (%rsp) +L_AES_GCM_encrypt_calc_iv_loop: + movzbl (%rax,%rcx,1), %r13d + movb %r13b, (%rsp,%rbx,1) + incl %ecx + incl %ebx + cmpl %edx, %ecx + jl L_AES_GCM_encrypt_calc_iv_loop + movdqa (%rsp), %xmm8 + addq $16, %rsp + pshufb L_aes_gcm_bswap_mask(%rip), %xmm8 + pxor %xmm8, %xmm4 + pshufd $0x4e, %xmm4, %xmm1 + pshufd $0x4e, %xmm5, %xmm2 + movdqa %xmm5, %xmm3 + movdqa %xmm5, %xmm0 + pclmulqdq $0x11, %xmm4, %xmm3 + pclmulqdq $0x00, %xmm4, %xmm0 + pxor %xmm4, %xmm1 + pxor %xmm5, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + movdqa %xmm0, %xmm7 + movdqa %xmm3, %xmm4 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm7 + pxor %xmm1, %xmm4 + movdqa %xmm7, %xmm0 + movdqa %xmm4, %xmm1 + psrld $31, %xmm0 + psrld $31, %xmm1 + pslld $0x01, %xmm7 + pslld $0x01, %xmm4 + movdqa %xmm0, %xmm2 + pslldq $4, %xmm0 + psrldq $12, %xmm2 + pslldq $4, %xmm1 + por %xmm2, %xmm4 + por %xmm0, %xmm7 + por %xmm1, %xmm4 + movdqa %xmm7, %xmm0 + movdqa %xmm7, %xmm1 + movdqa %xmm7, %xmm2 + pslld $31, %xmm0 + pslld $30, %xmm1 + pslld $25, %xmm2 + pxor %xmm1, %xmm0 + pxor %xmm2, %xmm0 + movdqa %xmm0, %xmm1 + psrldq $4, %xmm1 + pslldq $12, %xmm0 + pxor %xmm0, %xmm7 + movdqa %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + psrld $0x01, %xmm2 + psrld $2, %xmm3 + psrld $7, %xmm0 + pxor %xmm3, %xmm2 + pxor %xmm0, %xmm2 + pxor %xmm1, %xmm2 + pxor %xmm7, %xmm2 + pxor %xmm2, %xmm4 +L_AES_GCM_encrypt_calc_iv_done: + # T = Encrypt counter + pxor %xmm0, %xmm0 + shll $3, %edx + pinsrq $0x00, %rdx, %xmm0 + pxor %xmm0, %xmm4 + pshufd $0x4e, %xmm4, %xmm1 + pshufd $0x4e, %xmm5, %xmm2 + movdqa %xmm5, %xmm3 + movdqa %xmm5, %xmm0 + pclmulqdq $0x11, %xmm4, %xmm3 + pclmulqdq $0x00, %xmm4, %xmm0 + pxor %xmm4, %xmm1 + pxor %xmm5, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + movdqa %xmm0, %xmm7 + movdqa %xmm3, %xmm4 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm7 + pxor %xmm1, %xmm4 + movdqa %xmm7, %xmm0 + movdqa %xmm4, %xmm1 + psrld $31, %xmm0 + psrld $31, %xmm1 + pslld $0x01, %xmm7 + pslld $0x01, %xmm4 + movdqa %xmm0, %xmm2 + pslldq $4, %xmm0 + psrldq $12, %xmm2 + pslldq $4, %xmm1 + por %xmm2, %xmm4 + por %xmm0, %xmm7 + por %xmm1, %xmm4 + movdqa %xmm7, %xmm0 + movdqa %xmm7, %xmm1 + movdqa %xmm7, %xmm2 + pslld $31, %xmm0 + pslld $30, %xmm1 + pslld $25, %xmm2 + pxor %xmm1, %xmm0 + pxor %xmm2, %xmm0 + movdqa %xmm0, %xmm1 + psrldq $4, %xmm1 + pslldq $12, %xmm0 + pxor %xmm0, %xmm7 + movdqa %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + psrld $0x01, %xmm2 + psrld $2, %xmm3 + psrld $7, %xmm0 + pxor %xmm3, %xmm2 + pxor %xmm0, %xmm2 + pxor %xmm1, %xmm2 + pxor %xmm7, %xmm2 + pxor %xmm2, %xmm4 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm4 + # Encrypt counter + movdqa (%r15), %xmm8 + pxor %xmm4, %xmm8 + aesenc 16(%r15), %xmm8 + aesenc 32(%r15), %xmm8 + aesenc 48(%r15), %xmm8 + aesenc 64(%r15), %xmm8 + aesenc 80(%r15), %xmm8 + aesenc 96(%r15), %xmm8 + aesenc 112(%r15), %xmm8 + aesenc 128(%r15), %xmm8 + aesenc 144(%r15), %xmm8 + cmpl $11, %r10d + movdqa 160(%r15), %xmm9 + jl L_AES_GCM_encrypt_calc_iv_2_aesenc_avx_last + aesenc %xmm9, %xmm8 + aesenc 176(%r15), %xmm8 + cmpl $13, %r10d + movdqa 192(%r15), %xmm9 + jl L_AES_GCM_encrypt_calc_iv_2_aesenc_avx_last + aesenc %xmm9, %xmm8 + aesenc 208(%r15), %xmm8 + movdqa 224(%r15), %xmm9 +L_AES_GCM_encrypt_calc_iv_2_aesenc_avx_last: + aesenclast %xmm9, %xmm8 + movdqa %xmm8, 144(%rsp) +L_AES_GCM_encrypt_iv_done: + # Additional authentication data + movl %r11d, %edx + cmpl $0x00, %edx + je L_AES_GCM_encrypt_calc_aad_done + xorl %ecx, %ecx + cmpl $16, %edx + jl L_AES_GCM_encrypt_calc_aad_lt16 + andl $0xfffffff0, %edx +L_AES_GCM_encrypt_calc_aad_16_loop: + movdqu (%r12,%rcx,1), %xmm8 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm8 + pxor %xmm8, %xmm6 + pshufd $0x4e, %xmm6, %xmm1 + pshufd $0x4e, %xmm5, %xmm2 + movdqa %xmm5, %xmm3 + movdqa %xmm5, %xmm0 + pclmulqdq $0x11, %xmm6, %xmm3 + pclmulqdq $0x00, %xmm6, %xmm0 + pxor %xmm6, %xmm1 + pxor %xmm5, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + movdqa %xmm0, %xmm7 + movdqa %xmm3, %xmm6 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm7 + pxor %xmm1, %xmm6 + movdqa %xmm7, %xmm0 + movdqa %xmm6, %xmm1 + psrld $31, %xmm0 + psrld $31, %xmm1 + pslld $0x01, %xmm7 + pslld $0x01, %xmm6 + movdqa %xmm0, %xmm2 + pslldq $4, %xmm0 + psrldq $12, %xmm2 + pslldq $4, %xmm1 + por %xmm2, %xmm6 + por %xmm0, %xmm7 + por %xmm1, %xmm6 + movdqa %xmm7, %xmm0 + movdqa %xmm7, %xmm1 + movdqa %xmm7, %xmm2 + pslld $31, %xmm0 + pslld $30, %xmm1 + pslld $25, %xmm2 + pxor %xmm1, %xmm0 + pxor %xmm2, %xmm0 + movdqa %xmm0, %xmm1 + psrldq $4, %xmm1 + pslldq $12, %xmm0 + pxor %xmm0, %xmm7 + movdqa %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + psrld $0x01, %xmm2 + psrld $2, %xmm3 + psrld $7, %xmm0 + pxor %xmm3, %xmm2 + pxor %xmm0, %xmm2 + pxor %xmm1, %xmm2 + pxor %xmm7, %xmm2 + pxor %xmm2, %xmm6 + addl $16, %ecx + cmpl %edx, %ecx + jl L_AES_GCM_encrypt_calc_aad_16_loop + movl %r11d, %edx + cmpl %edx, %ecx + je L_AES_GCM_encrypt_calc_aad_done +L_AES_GCM_encrypt_calc_aad_lt16: + subq $16, %rsp + pxor %xmm8, %xmm8 + xorl %ebx, %ebx + movdqa %xmm8, (%rsp) +L_AES_GCM_encrypt_calc_aad_loop: + movzbl (%r12,%rcx,1), %r13d + movb %r13b, (%rsp,%rbx,1) + incl %ecx + incl %ebx + cmpl %edx, %ecx + jl L_AES_GCM_encrypt_calc_aad_loop + movdqa (%rsp), %xmm8 + addq $16, %rsp + pshufb L_aes_gcm_bswap_mask(%rip), %xmm8 + pxor %xmm8, %xmm6 + pshufd $0x4e, %xmm6, %xmm1 + pshufd $0x4e, %xmm5, %xmm2 + movdqa %xmm5, %xmm3 + movdqa %xmm5, %xmm0 + pclmulqdq $0x11, %xmm6, %xmm3 + pclmulqdq $0x00, %xmm6, %xmm0 + pxor %xmm6, %xmm1 + pxor %xmm5, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + movdqa %xmm0, %xmm7 + movdqa %xmm3, %xmm6 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm7 + pxor %xmm1, %xmm6 + movdqa %xmm7, %xmm0 + movdqa %xmm6, %xmm1 + psrld $31, %xmm0 + psrld $31, %xmm1 + pslld $0x01, %xmm7 + pslld $0x01, %xmm6 + movdqa %xmm0, %xmm2 + pslldq $4, %xmm0 + psrldq $12, %xmm2 + pslldq $4, %xmm1 + por %xmm2, %xmm6 + por %xmm0, %xmm7 + por %xmm1, %xmm6 + movdqa %xmm7, %xmm0 + movdqa %xmm7, %xmm1 + movdqa %xmm7, %xmm2 + pslld $31, %xmm0 + pslld $30, %xmm1 + pslld $25, %xmm2 + pxor %xmm1, %xmm0 + pxor %xmm2, %xmm0 + movdqa %xmm0, %xmm1 + psrldq $4, %xmm1 + pslldq $12, %xmm0 + pxor %xmm0, %xmm7 + movdqa %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + psrld $0x01, %xmm2 + psrld $2, %xmm3 + psrld $7, %xmm0 + pxor %xmm3, %xmm2 + pxor %xmm0, %xmm2 + pxor %xmm1, %xmm2 + pxor %xmm7, %xmm2 + pxor %xmm2, %xmm6 +L_AES_GCM_encrypt_calc_aad_done: + # Calculate counter and H + pshufb L_aes_gcm_bswap_epi64(%rip), %xmm4 + movdqa %xmm5, %xmm9 + paddd L_aes_gcm_one(%rip), %xmm4 + movdqa %xmm5, %xmm8 + movdqa %xmm4, 128(%rsp) + psrlq $63, %xmm9 + psllq $0x01, %xmm8 + pslldq $8, %xmm9 + por %xmm9, %xmm8 + pshufd $0xff, %xmm5, %xmm5 + psrad $31, %xmm5 + pand L_aes_gcm_mod2_128(%rip), %xmm5 + pxor %xmm8, %xmm5 + xorq %rbx, %rbx + cmpl $0x80, %r9d + movl %r9d, %r13d + jl L_AES_GCM_encrypt_done_128 + andl $0xffffff80, %r13d + movdqa %xmm6, %xmm2 + # H ^ 1 + movdqa %xmm5, (%rsp) + # H ^ 2 + pshufd $0x4e, %xmm5, %xmm9 + pshufd $0x4e, %xmm5, %xmm10 + movdqa %xmm5, %xmm11 + movdqa %xmm5, %xmm8 + pclmulqdq $0x11, %xmm5, %xmm11 + pclmulqdq $0x00, %xmm5, %xmm8 + pxor %xmm5, %xmm9 + pxor %xmm5, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm0 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm0 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm0 + movdqa %xmm0, 16(%rsp) + # H ^ 3 + pshufd $0x4e, %xmm5, %xmm9 + pshufd $0x4e, %xmm0, %xmm10 + movdqa %xmm0, %xmm11 + movdqa %xmm0, %xmm8 + pclmulqdq $0x11, %xmm5, %xmm11 + pclmulqdq $0x00, %xmm5, %xmm8 + pxor %xmm5, %xmm9 + pxor %xmm0, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm1 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm1 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm1 + movdqa %xmm1, 32(%rsp) + # H ^ 4 + pshufd $0x4e, %xmm0, %xmm9 + pshufd $0x4e, %xmm0, %xmm10 + movdqa %xmm0, %xmm11 + movdqa %xmm0, %xmm8 + pclmulqdq $0x11, %xmm0, %xmm11 + pclmulqdq $0x00, %xmm0, %xmm8 + pxor %xmm0, %xmm9 + pxor %xmm0, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm3 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm3 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm3 + movdqa %xmm3, 48(%rsp) + # H ^ 5 + pshufd $0x4e, %xmm0, %xmm9 + pshufd $0x4e, %xmm1, %xmm10 + movdqa %xmm1, %xmm11 + movdqa %xmm1, %xmm8 + pclmulqdq $0x11, %xmm0, %xmm11 + pclmulqdq $0x00, %xmm0, %xmm8 + pxor %xmm0, %xmm9 + pxor %xmm1, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm7 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm7 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm7 + movdqa %xmm7, 64(%rsp) + # H ^ 6 + pshufd $0x4e, %xmm1, %xmm9 + pshufd $0x4e, %xmm1, %xmm10 + movdqa %xmm1, %xmm11 + movdqa %xmm1, %xmm8 + pclmulqdq $0x11, %xmm1, %xmm11 + pclmulqdq $0x00, %xmm1, %xmm8 + pxor %xmm1, %xmm9 + pxor %xmm1, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm7 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm7 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm7 + movdqa %xmm7, 80(%rsp) + # H ^ 7 + pshufd $0x4e, %xmm1, %xmm9 + pshufd $0x4e, %xmm3, %xmm10 + movdqa %xmm3, %xmm11 + movdqa %xmm3, %xmm8 + pclmulqdq $0x11, %xmm1, %xmm11 + pclmulqdq $0x00, %xmm1, %xmm8 + pxor %xmm1, %xmm9 + pxor %xmm3, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm7 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm7 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm7 + movdqa %xmm7, 96(%rsp) + # H ^ 8 + pshufd $0x4e, %xmm3, %xmm9 + pshufd $0x4e, %xmm3, %xmm10 + movdqa %xmm3, %xmm11 + movdqa %xmm3, %xmm8 + pclmulqdq $0x11, %xmm3, %xmm11 + pclmulqdq $0x00, %xmm3, %xmm8 + pxor %xmm3, %xmm9 + pxor %xmm3, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm7 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm7 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm7 + movdqa %xmm7, 112(%rsp) + # First 128 bytes of input + movdqa 128(%rsp), %xmm8 + movdqa L_aes_gcm_bswap_epi64(%rip), %xmm1 + movdqa %xmm8, %xmm0 + pshufb %xmm1, %xmm8 + movdqa %xmm0, %xmm9 + paddd L_aes_gcm_one(%rip), %xmm9 + pshufb %xmm1, %xmm9 + movdqa %xmm0, %xmm10 + paddd L_aes_gcm_two(%rip), %xmm10 + pshufb %xmm1, %xmm10 + movdqa %xmm0, %xmm11 + paddd L_aes_gcm_three(%rip), %xmm11 + pshufb %xmm1, %xmm11 + movdqa %xmm0, %xmm12 + paddd L_aes_gcm_four(%rip), %xmm12 + pshufb %xmm1, %xmm12 + movdqa %xmm0, %xmm13 + paddd L_aes_gcm_five(%rip), %xmm13 + pshufb %xmm1, %xmm13 + movdqa %xmm0, %xmm14 + paddd L_aes_gcm_six(%rip), %xmm14 + pshufb %xmm1, %xmm14 + movdqa %xmm0, %xmm15 + paddd L_aes_gcm_seven(%rip), %xmm15 + pshufb %xmm1, %xmm15 + paddd L_aes_gcm_eight(%rip), %xmm0 + movdqa (%r15), %xmm7 + movdqa %xmm0, 128(%rsp) + pxor %xmm7, %xmm8 + pxor %xmm7, %xmm9 + pxor %xmm7, %xmm10 + pxor %xmm7, %xmm11 + pxor %xmm7, %xmm12 + pxor %xmm7, %xmm13 + pxor %xmm7, %xmm14 + pxor %xmm7, %xmm15 + movdqa 16(%r15), %xmm7 + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + movdqa 32(%r15), %xmm7 + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + movdqa 48(%r15), %xmm7 + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + movdqa 64(%r15), %xmm7 + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + movdqa 80(%r15), %xmm7 + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + movdqa 96(%r15), %xmm7 + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + movdqa 112(%r15), %xmm7 + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + movdqa 128(%r15), %xmm7 + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + movdqa 144(%r15), %xmm7 + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + cmpl $11, %r10d + movdqa 160(%r15), %xmm7 + jl L_AES_GCM_encrypt_enc_done + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + movdqa 176(%r15), %xmm7 + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + cmpl $13, %r10d + movdqa 192(%r15), %xmm7 + jl L_AES_GCM_encrypt_enc_done + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + movdqa 208(%r15), %xmm7 + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + movdqa 224(%r15), %xmm7 +L_AES_GCM_encrypt_enc_done: + aesenclast %xmm7, %xmm8 + aesenclast %xmm7, %xmm9 + movdqu (%rdi), %xmm0 + movdqu 16(%rdi), %xmm1 + pxor %xmm0, %xmm8 + pxor %xmm1, %xmm9 + movdqu %xmm8, (%rsi) + movdqu %xmm9, 16(%rsi) + aesenclast %xmm7, %xmm10 + aesenclast %xmm7, %xmm11 + movdqu 32(%rdi), %xmm0 + movdqu 48(%rdi), %xmm1 + pxor %xmm0, %xmm10 + pxor %xmm1, %xmm11 + movdqu %xmm10, 32(%rsi) + movdqu %xmm11, 48(%rsi) + aesenclast %xmm7, %xmm12 + aesenclast %xmm7, %xmm13 + movdqu 64(%rdi), %xmm0 + movdqu 80(%rdi), %xmm1 + pxor %xmm0, %xmm12 + pxor %xmm1, %xmm13 + movdqu %xmm12, 64(%rsi) + movdqu %xmm13, 80(%rsi) + aesenclast %xmm7, %xmm14 + aesenclast %xmm7, %xmm15 + movdqu 96(%rdi), %xmm0 + movdqu 112(%rdi), %xmm1 + pxor %xmm0, %xmm14 + pxor %xmm1, %xmm15 + movdqu %xmm14, 96(%rsi) + movdqu %xmm15, 112(%rsi) + cmpl $0x80, %r13d + movl $0x80, %ebx + jle L_AES_GCM_encrypt_end_128 + # More 128 bytes of input +L_AES_GCM_encrypt_ghash_128: + leaq (%rdi,%rbx,1), %rcx + leaq (%rsi,%rbx,1), %rdx + movdqa 128(%rsp), %xmm8 + movdqa L_aes_gcm_bswap_epi64(%rip), %xmm1 + movdqa %xmm8, %xmm0 + pshufb %xmm1, %xmm8 + movdqa %xmm0, %xmm9 + paddd L_aes_gcm_one(%rip), %xmm9 + pshufb %xmm1, %xmm9 + movdqa %xmm0, %xmm10 + paddd L_aes_gcm_two(%rip), %xmm10 + pshufb %xmm1, %xmm10 + movdqa %xmm0, %xmm11 + paddd L_aes_gcm_three(%rip), %xmm11 + pshufb %xmm1, %xmm11 + movdqa %xmm0, %xmm12 + paddd L_aes_gcm_four(%rip), %xmm12 + pshufb %xmm1, %xmm12 + movdqa %xmm0, %xmm13 + paddd L_aes_gcm_five(%rip), %xmm13 + pshufb %xmm1, %xmm13 + movdqa %xmm0, %xmm14 + paddd L_aes_gcm_six(%rip), %xmm14 + pshufb %xmm1, %xmm14 + movdqa %xmm0, %xmm15 + paddd L_aes_gcm_seven(%rip), %xmm15 + pshufb %xmm1, %xmm15 + paddd L_aes_gcm_eight(%rip), %xmm0 + movdqa (%r15), %xmm7 + movdqa %xmm0, 128(%rsp) + pxor %xmm7, %xmm8 + pxor %xmm7, %xmm9 + pxor %xmm7, %xmm10 + pxor %xmm7, %xmm11 + pxor %xmm7, %xmm12 + pxor %xmm7, %xmm13 + pxor %xmm7, %xmm14 + pxor %xmm7, %xmm15 + movdqa 112(%rsp), %xmm7 + movdqu -128(%rdx), %xmm0 + aesenc 16(%r15), %xmm8 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm0 + pxor %xmm2, %xmm0 + pshufd $0x4e, %xmm7, %xmm1 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm7, %xmm1 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm3 + pclmulqdq $0x11, %xmm7, %xmm3 + aesenc 16(%r15), %xmm9 + aesenc 16(%r15), %xmm10 + movdqa %xmm0, %xmm2 + pclmulqdq $0x00, %xmm7, %xmm2 + aesenc 16(%r15), %xmm11 + aesenc 16(%r15), %xmm12 + pclmulqdq $0x00, %xmm5, %xmm1 + aesenc 16(%r15), %xmm13 + aesenc 16(%r15), %xmm14 + aesenc 16(%r15), %xmm15 + pxor %xmm2, %xmm1 + pxor %xmm3, %xmm1 + movdqa 96(%rsp), %xmm7 + movdqu -112(%rdx), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm0 + aesenc 32(%r15), %xmm8 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + aesenc 32(%r15), %xmm9 + aesenc 32(%r15), %xmm10 + pclmulqdq $0x00, %xmm0, %xmm7 + aesenc 32(%r15), %xmm11 + aesenc 32(%r15), %xmm12 + pclmulqdq $0x00, %xmm5, %xmm4 + aesenc 32(%r15), %xmm13 + aesenc 32(%r15), %xmm14 + aesenc 32(%r15), %xmm15 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqa 80(%rsp), %xmm7 + movdqu -96(%rdx), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm0 + aesenc 48(%r15), %xmm8 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + aesenc 48(%r15), %xmm9 + aesenc 48(%r15), %xmm10 + pclmulqdq $0x00, %xmm0, %xmm7 + aesenc 48(%r15), %xmm11 + aesenc 48(%r15), %xmm12 + pclmulqdq $0x00, %xmm5, %xmm4 + aesenc 48(%r15), %xmm13 + aesenc 48(%r15), %xmm14 + aesenc 48(%r15), %xmm15 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqa 64(%rsp), %xmm7 + movdqu -80(%rdx), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm0 + aesenc 64(%r15), %xmm8 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + aesenc 64(%r15), %xmm9 + aesenc 64(%r15), %xmm10 + pclmulqdq $0x00, %xmm0, %xmm7 + aesenc 64(%r15), %xmm11 + aesenc 64(%r15), %xmm12 + pclmulqdq $0x00, %xmm5, %xmm4 + aesenc 64(%r15), %xmm13 + aesenc 64(%r15), %xmm14 + aesenc 64(%r15), %xmm15 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqa 48(%rsp), %xmm7 + movdqu -64(%rdx), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm0 + aesenc 80(%r15), %xmm8 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + aesenc 80(%r15), %xmm9 + aesenc 80(%r15), %xmm10 + pclmulqdq $0x00, %xmm0, %xmm7 + aesenc 80(%r15), %xmm11 + aesenc 80(%r15), %xmm12 + pclmulqdq $0x00, %xmm5, %xmm4 + aesenc 80(%r15), %xmm13 + aesenc 80(%r15), %xmm14 + aesenc 80(%r15), %xmm15 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqa 32(%rsp), %xmm7 + movdqu -48(%rdx), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm0 + aesenc 96(%r15), %xmm8 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + aesenc 96(%r15), %xmm9 + aesenc 96(%r15), %xmm10 + pclmulqdq $0x00, %xmm0, %xmm7 + aesenc 96(%r15), %xmm11 + aesenc 96(%r15), %xmm12 + pclmulqdq $0x00, %xmm5, %xmm4 + aesenc 96(%r15), %xmm13 + aesenc 96(%r15), %xmm14 + aesenc 96(%r15), %xmm15 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqa 16(%rsp), %xmm7 + movdqu -32(%rdx), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm0 + aesenc 112(%r15), %xmm8 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + aesenc 112(%r15), %xmm9 + aesenc 112(%r15), %xmm10 + pclmulqdq $0x00, %xmm0, %xmm7 + aesenc 112(%r15), %xmm11 + aesenc 112(%r15), %xmm12 + pclmulqdq $0x00, %xmm5, %xmm4 + aesenc 112(%r15), %xmm13 + aesenc 112(%r15), %xmm14 + aesenc 112(%r15), %xmm15 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqa (%rsp), %xmm7 + movdqu -16(%rdx), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm0 + aesenc 128(%r15), %xmm8 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + aesenc 128(%r15), %xmm9 + aesenc 128(%r15), %xmm10 + pclmulqdq $0x00, %xmm0, %xmm7 + aesenc 128(%r15), %xmm11 + aesenc 128(%r15), %xmm12 + pclmulqdq $0x00, %xmm5, %xmm4 + aesenc 128(%r15), %xmm13 + aesenc 128(%r15), %xmm14 + aesenc 128(%r15), %xmm15 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqa %xmm1, %xmm5 + psrldq $8, %xmm1 + pslldq $8, %xmm5 + aesenc 144(%r15), %xmm8 + pxor %xmm5, %xmm2 + pxor %xmm1, %xmm3 + movdqa %xmm2, %xmm7 + movdqa %xmm2, %xmm4 + movdqa %xmm2, %xmm5 + aesenc 144(%r15), %xmm9 + pslld $31, %xmm7 + pslld $30, %xmm4 + pslld $25, %xmm5 + aesenc 144(%r15), %xmm10 + pxor %xmm4, %xmm7 + pxor %xmm5, %xmm7 + aesenc 144(%r15), %xmm11 + movdqa %xmm7, %xmm4 + pslldq $12, %xmm7 + psrldq $4, %xmm4 + aesenc 144(%r15), %xmm12 + pxor %xmm7, %xmm2 + movdqa %xmm2, %xmm5 + movdqa %xmm2, %xmm1 + movdqa %xmm2, %xmm0 + aesenc 144(%r15), %xmm13 + psrld $0x01, %xmm5 + psrld $2, %xmm1 + psrld $7, %xmm0 + aesenc 144(%r15), %xmm14 + pxor %xmm1, %xmm5 + pxor %xmm0, %xmm5 + aesenc 144(%r15), %xmm15 + pxor %xmm4, %xmm5 + pxor %xmm5, %xmm2 + pxor %xmm3, %xmm2 + cmpl $11, %r10d + movdqa 160(%r15), %xmm7 + jl L_AES_GCM_encrypt_aesenc_128_ghash_avx_done + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + movdqa 176(%r15), %xmm7 + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + cmpl $13, %r10d + movdqa 192(%r15), %xmm7 + jl L_AES_GCM_encrypt_aesenc_128_ghash_avx_done + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + movdqa 208(%r15), %xmm7 + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + movdqa 224(%r15), %xmm7 +L_AES_GCM_encrypt_aesenc_128_ghash_avx_done: + aesenclast %xmm7, %xmm8 + aesenclast %xmm7, %xmm9 + movdqu (%rcx), %xmm0 + movdqu 16(%rcx), %xmm1 + pxor %xmm0, %xmm8 + pxor %xmm1, %xmm9 + movdqu %xmm8, (%rdx) + movdqu %xmm9, 16(%rdx) + aesenclast %xmm7, %xmm10 + aesenclast %xmm7, %xmm11 + movdqu 32(%rcx), %xmm0 + movdqu 48(%rcx), %xmm1 + pxor %xmm0, %xmm10 + pxor %xmm1, %xmm11 + movdqu %xmm10, 32(%rdx) + movdqu %xmm11, 48(%rdx) + aesenclast %xmm7, %xmm12 + aesenclast %xmm7, %xmm13 + movdqu 64(%rcx), %xmm0 + movdqu 80(%rcx), %xmm1 + pxor %xmm0, %xmm12 + pxor %xmm1, %xmm13 + movdqu %xmm12, 64(%rdx) + movdqu %xmm13, 80(%rdx) + aesenclast %xmm7, %xmm14 + aesenclast %xmm7, %xmm15 + movdqu 96(%rcx), %xmm0 + movdqu 112(%rcx), %xmm1 + pxor %xmm0, %xmm14 + pxor %xmm1, %xmm15 + movdqu %xmm14, 96(%rdx) + movdqu %xmm15, 112(%rdx) + addl $0x80, %ebx + cmpl %r13d, %ebx + jl L_AES_GCM_encrypt_ghash_128 +L_AES_GCM_encrypt_end_128: + movdqa L_aes_gcm_bswap_mask(%rip), %xmm4 + pshufb %xmm4, %xmm8 + pshufb %xmm4, %xmm9 + pshufb %xmm4, %xmm10 + pshufb %xmm4, %xmm11 + pxor %xmm2, %xmm8 + pshufb %xmm4, %xmm12 + pshufb %xmm4, %xmm13 + pshufb %xmm4, %xmm14 + pshufb %xmm4, %xmm15 + movdqa 112(%rsp), %xmm7 + pshufd $0x4e, %xmm8, %xmm1 + pshufd $0x4e, %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + pclmulqdq $0x11, %xmm8, %xmm3 + pclmulqdq $0x00, %xmm8, %xmm0 + pxor %xmm8, %xmm1 + pxor %xmm7, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + movdqa %xmm0, %xmm4 + movdqa %xmm3, %xmm6 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm4 + pxor %xmm1, %xmm6 + movdqa 96(%rsp), %xmm7 + pshufd $0x4e, %xmm9, %xmm1 + pshufd $0x4e, %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + pclmulqdq $0x11, %xmm9, %xmm3 + pclmulqdq $0x00, %xmm9, %xmm0 + pxor %xmm9, %xmm1 + pxor %xmm7, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + pxor %xmm0, %xmm4 + pxor %xmm3, %xmm6 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm4 + pxor %xmm1, %xmm6 + movdqa 80(%rsp), %xmm7 + pshufd $0x4e, %xmm10, %xmm1 + pshufd $0x4e, %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + pclmulqdq $0x11, %xmm10, %xmm3 + pclmulqdq $0x00, %xmm10, %xmm0 + pxor %xmm10, %xmm1 + pxor %xmm7, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + pxor %xmm0, %xmm4 + pxor %xmm3, %xmm6 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm4 + pxor %xmm1, %xmm6 + movdqa 64(%rsp), %xmm7 + pshufd $0x4e, %xmm11, %xmm1 + pshufd $0x4e, %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + pclmulqdq $0x11, %xmm11, %xmm3 + pclmulqdq $0x00, %xmm11, %xmm0 + pxor %xmm11, %xmm1 + pxor %xmm7, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + pxor %xmm0, %xmm4 + pxor %xmm3, %xmm6 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm4 + pxor %xmm1, %xmm6 + movdqa 48(%rsp), %xmm7 + pshufd $0x4e, %xmm12, %xmm1 + pshufd $0x4e, %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + pclmulqdq $0x11, %xmm12, %xmm3 + pclmulqdq $0x00, %xmm12, %xmm0 + pxor %xmm12, %xmm1 + pxor %xmm7, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + pxor %xmm0, %xmm4 + pxor %xmm3, %xmm6 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm4 + pxor %xmm1, %xmm6 + movdqa 32(%rsp), %xmm7 + pshufd $0x4e, %xmm13, %xmm1 + pshufd $0x4e, %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + pclmulqdq $0x11, %xmm13, %xmm3 + pclmulqdq $0x00, %xmm13, %xmm0 + pxor %xmm13, %xmm1 + pxor %xmm7, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + pxor %xmm0, %xmm4 + pxor %xmm3, %xmm6 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm4 + pxor %xmm1, %xmm6 + movdqa 16(%rsp), %xmm7 + pshufd $0x4e, %xmm14, %xmm1 + pshufd $0x4e, %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + pclmulqdq $0x11, %xmm14, %xmm3 + pclmulqdq $0x00, %xmm14, %xmm0 + pxor %xmm14, %xmm1 + pxor %xmm7, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + pxor %xmm0, %xmm4 + pxor %xmm3, %xmm6 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm4 + pxor %xmm1, %xmm6 + movdqa (%rsp), %xmm7 + pshufd $0x4e, %xmm15, %xmm1 + pshufd $0x4e, %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + pclmulqdq $0x11, %xmm15, %xmm3 + pclmulqdq $0x00, %xmm15, %xmm0 + pxor %xmm15, %xmm1 + pxor %xmm7, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + pxor %xmm0, %xmm4 + pxor %xmm3, %xmm6 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm4 + pxor %xmm1, %xmm6 + movdqa %xmm4, %xmm0 + movdqa %xmm4, %xmm1 + movdqa %xmm4, %xmm2 + pslld $31, %xmm0 + pslld $30, %xmm1 + pslld $25, %xmm2 + pxor %xmm1, %xmm0 + pxor %xmm2, %xmm0 + movdqa %xmm0, %xmm1 + psrldq $4, %xmm1 + pslldq $12, %xmm0 + pxor %xmm0, %xmm4 + movdqa %xmm4, %xmm2 + movdqa %xmm4, %xmm3 + movdqa %xmm4, %xmm0 + psrld $0x01, %xmm2 + psrld $2, %xmm3 + psrld $7, %xmm0 + pxor %xmm3, %xmm2 + pxor %xmm0, %xmm2 + pxor %xmm1, %xmm2 + pxor %xmm4, %xmm2 + pxor %xmm2, %xmm6 + movdqa (%rsp), %xmm5 +L_AES_GCM_encrypt_done_128: + movl %r9d, %edx + cmpl %edx, %ebx + jge L_AES_GCM_encrypt_done_enc + movl %r9d, %r13d + andl $0xfffffff0, %r13d + cmpl %r13d, %ebx + jge L_AES_GCM_encrypt_last_block_done + leaq (%rdi,%rbx,1), %rcx + leaq (%rsi,%rbx,1), %rdx + movdqa 128(%rsp), %xmm8 + movdqa %xmm8, %xmm9 + pshufb L_aes_gcm_bswap_epi64(%rip), %xmm8 + paddd L_aes_gcm_one(%rip), %xmm9 + pxor (%r15), %xmm8 + movdqa %xmm9, 128(%rsp) + aesenc 16(%r15), %xmm8 + aesenc 32(%r15), %xmm8 + aesenc 48(%r15), %xmm8 + aesenc 64(%r15), %xmm8 + aesenc 80(%r15), %xmm8 + aesenc 96(%r15), %xmm8 + aesenc 112(%r15), %xmm8 + aesenc 128(%r15), %xmm8 + aesenc 144(%r15), %xmm8 + cmpl $11, %r10d + movdqa 160(%r15), %xmm9 + jl L_AES_GCM_encrypt_aesenc_block_aesenc_avx_last + aesenc %xmm9, %xmm8 + aesenc 176(%r15), %xmm8 + cmpl $13, %r10d + movdqa 192(%r15), %xmm9 + jl L_AES_GCM_encrypt_aesenc_block_aesenc_avx_last + aesenc %xmm9, %xmm8 + aesenc 208(%r15), %xmm8 + movdqa 224(%r15), %xmm9 +L_AES_GCM_encrypt_aesenc_block_aesenc_avx_last: + aesenclast %xmm9, %xmm8 + movdqu (%rcx), %xmm9 + pxor %xmm9, %xmm8 + movdqu %xmm8, (%rdx) + pshufb L_aes_gcm_bswap_mask(%rip), %xmm8 + pxor %xmm8, %xmm6 + addl $16, %ebx + cmpl %r13d, %ebx + jge L_AES_GCM_encrypt_last_block_ghash +L_AES_GCM_encrypt_last_block_start: + leaq (%rdi,%rbx,1), %rcx + leaq (%rsi,%rbx,1), %rdx + movdqa 128(%rsp), %xmm8 + movdqa %xmm8, %xmm9 + pshufb L_aes_gcm_bswap_epi64(%rip), %xmm8 + paddd L_aes_gcm_one(%rip), %xmm9 + pxor (%r15), %xmm8 + movdqa %xmm9, 128(%rsp) + movdqa %xmm6, %xmm10 + pclmulqdq $16, %xmm5, %xmm10 + aesenc 16(%r15), %xmm8 + aesenc 32(%r15), %xmm8 + movdqa %xmm6, %xmm11 + pclmulqdq $0x01, %xmm5, %xmm11 + aesenc 48(%r15), %xmm8 + aesenc 64(%r15), %xmm8 + movdqa %xmm6, %xmm12 + pclmulqdq $0x00, %xmm5, %xmm12 + aesenc 80(%r15), %xmm8 + movdqa %xmm6, %xmm1 + pclmulqdq $0x11, %xmm5, %xmm1 + aesenc 96(%r15), %xmm8 + pxor %xmm11, %xmm10 + movdqa %xmm10, %xmm2 + psrldq $8, %xmm10 + pslldq $8, %xmm2 + aesenc 112(%r15), %xmm8 + movdqa %xmm1, %xmm3 + pxor %xmm12, %xmm2 + pxor %xmm10, %xmm3 + movdqa L_aes_gcm_mod2_128(%rip), %xmm0 + movdqa %xmm2, %xmm11 + pclmulqdq $16, %xmm0, %xmm11 + aesenc 128(%r15), %xmm8 + pshufd $0x4e, %xmm2, %xmm10 + pxor %xmm11, %xmm10 + movdqa %xmm10, %xmm11 + pclmulqdq $16, %xmm0, %xmm11 + aesenc 144(%r15), %xmm8 + pshufd $0x4e, %xmm10, %xmm6 + pxor %xmm11, %xmm6 + pxor %xmm3, %xmm6 + cmpl $11, %r10d + movdqa 160(%r15), %xmm9 + jl L_AES_GCM_encrypt_aesenc_gfmul_last + aesenc %xmm9, %xmm8 + aesenc 176(%r15), %xmm8 + cmpl $13, %r10d + movdqa 192(%r15), %xmm9 + jl L_AES_GCM_encrypt_aesenc_gfmul_last + aesenc %xmm9, %xmm8 + aesenc 208(%r15), %xmm8 + movdqa 224(%r15), %xmm9 +L_AES_GCM_encrypt_aesenc_gfmul_last: + aesenclast %xmm9, %xmm8 + movdqu (%rcx), %xmm9 + pxor %xmm9, %xmm8 + movdqu %xmm8, (%rdx) + pshufb L_aes_gcm_bswap_mask(%rip), %xmm8 + pxor %xmm8, %xmm6 + addl $16, %ebx + cmpl %r13d, %ebx + jl L_AES_GCM_encrypt_last_block_start +L_AES_GCM_encrypt_last_block_ghash: + pshufd $0x4e, %xmm5, %xmm9 + pshufd $0x4e, %xmm6, %xmm10 + movdqa %xmm6, %xmm11 + movdqa %xmm6, %xmm8 + pclmulqdq $0x11, %xmm5, %xmm11 + pclmulqdq $0x00, %xmm5, %xmm8 + pxor %xmm5, %xmm9 + pxor %xmm6, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm6 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm6 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm6 +L_AES_GCM_encrypt_last_block_done: + movl %r9d, %ecx + movl %ecx, %edx + andl $15, %ecx + jz L_AES_GCM_encrypt_aesenc_last15_enc_avx_done + movdqa 128(%rsp), %xmm4 + pshufb L_aes_gcm_bswap_epi64(%rip), %xmm4 + pxor (%r15), %xmm4 + aesenc 16(%r15), %xmm4 + aesenc 32(%r15), %xmm4 + aesenc 48(%r15), %xmm4 + aesenc 64(%r15), %xmm4 + aesenc 80(%r15), %xmm4 + aesenc 96(%r15), %xmm4 + aesenc 112(%r15), %xmm4 + aesenc 128(%r15), %xmm4 + aesenc 144(%r15), %xmm4 + cmpl $11, %r10d + movdqa 160(%r15), %xmm9 + jl L_AES_GCM_encrypt_aesenc_last15_enc_avx_aesenc_avx_last + aesenc %xmm9, %xmm4 + aesenc 176(%r15), %xmm4 + cmpl $13, %r10d + movdqa 192(%r15), %xmm9 + jl L_AES_GCM_encrypt_aesenc_last15_enc_avx_aesenc_avx_last + aesenc %xmm9, %xmm4 + aesenc 208(%r15), %xmm4 + movdqa 224(%r15), %xmm9 +L_AES_GCM_encrypt_aesenc_last15_enc_avx_aesenc_avx_last: + aesenclast %xmm9, %xmm4 + subq $16, %rsp + xorl %ecx, %ecx + movdqa %xmm4, (%rsp) +L_AES_GCM_encrypt_aesenc_last15_enc_avx_loop: + movzbl (%rdi,%rbx,1), %r13d + xorb (%rsp,%rcx,1), %r13b + movb %r13b, (%rsi,%rbx,1) + movb %r13b, (%rsp,%rcx,1) + incl %ebx + incl %ecx + cmpl %edx, %ebx + jl L_AES_GCM_encrypt_aesenc_last15_enc_avx_loop + xorq %r13, %r13 + cmpl $16, %ecx + je L_AES_GCM_encrypt_aesenc_last15_enc_avx_finish_enc +L_AES_GCM_encrypt_aesenc_last15_enc_avx_byte_loop: + movb %r13b, (%rsp,%rcx,1) + incl %ecx + cmpl $16, %ecx + jl L_AES_GCM_encrypt_aesenc_last15_enc_avx_byte_loop +L_AES_GCM_encrypt_aesenc_last15_enc_avx_finish_enc: + movdqa (%rsp), %xmm4 + addq $16, %rsp + pshufb L_aes_gcm_bswap_mask(%rip), %xmm4 + pxor %xmm4, %xmm6 + pshufd $0x4e, %xmm5, %xmm9 + pshufd $0x4e, %xmm6, %xmm10 + movdqa %xmm6, %xmm11 + movdqa %xmm6, %xmm8 + pclmulqdq $0x11, %xmm5, %xmm11 + pclmulqdq $0x00, %xmm5, %xmm8 + pxor %xmm5, %xmm9 + pxor %xmm6, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm6 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm6 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm6 +L_AES_GCM_encrypt_aesenc_last15_enc_avx_done: +L_AES_GCM_encrypt_done_enc: + movl %r9d, %edx + movl %r11d, %ecx + shlq $3, %rdx + shlq $3, %rcx + pinsrq $0x00, %rdx, %xmm0 + pinsrq $0x01, %rcx, %xmm0 + pxor %xmm0, %xmm6 + pshufd $0x4e, %xmm5, %xmm9 + pshufd $0x4e, %xmm6, %xmm10 + movdqa %xmm6, %xmm11 + movdqa %xmm6, %xmm8 + pclmulqdq $0x11, %xmm5, %xmm11 + pclmulqdq $0x00, %xmm5, %xmm8 + pxor %xmm5, %xmm9 + pxor %xmm6, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm6 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm6 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm6 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm6 + movdqa 144(%rsp), %xmm0 + pxor %xmm6, %xmm0 + cmpl $16, %r14d + je L_AES_GCM_encrypt_store_tag_16 + xorq %rcx, %rcx + movdqa %xmm0, (%rsp) +L_AES_GCM_encrypt_store_tag_loop: + movzbl (%rsp,%rcx,1), %r13d + movb %r13b, (%r8,%rcx,1) + incl %ecx + cmpl %r14d, %ecx + jne L_AES_GCM_encrypt_store_tag_loop + jmp L_AES_GCM_encrypt_store_tag_done +L_AES_GCM_encrypt_store_tag_16: + movdqu %xmm0, (%r8) +L_AES_GCM_encrypt_store_tag_done: + addq $0xa0, %rsp + popq %r15 + popq %r14 + popq %rbx + popq %r12 + popq %r13 + repz retq +#ifndef __APPLE__ +.size AES_GCM_encrypt,.-AES_GCM_encrypt +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl AES_GCM_decrypt +.type AES_GCM_decrypt,@function +.align 4 +AES_GCM_decrypt: +#else +.section __TEXT,__text +.globl _AES_GCM_decrypt +.p2align 2 +_AES_GCM_decrypt: +#endif /* __APPLE__ */ + pushq %r13 + pushq %r12 + pushq %rbx + pushq %r14 + pushq %r15 + pushq %rbp + movq %rdx, %r12 + movq %rcx, %rax + movl 56(%rsp), %r11d + movl 64(%rsp), %ebx + movl 72(%rsp), %r14d + movq 80(%rsp), %r15 + movl 88(%rsp), %r10d + movq 96(%rsp), %rbp + subq $0xa8, %rsp + pxor %xmm4, %xmm4 + pxor %xmm6, %xmm6 + cmpl $12, %ebx + movl %ebx, %edx + jne L_AES_GCM_decrypt_iv_not_12 + # # Calculate values when IV is 12 bytes + # Set counter based on IV + movl $0x1000000, %ecx + pinsrq $0x00, (%rax), %xmm4 + pinsrd $2, 8(%rax), %xmm4 + pinsrd $3, %ecx, %xmm4 + # H = Encrypt X(=0) and T = Encrypt counter + movdqa %xmm4, %xmm1 + movdqa (%r15), %xmm5 + pxor %xmm5, %xmm1 + movdqa 16(%r15), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 32(%r15), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 48(%r15), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 64(%r15), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 80(%r15), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 96(%r15), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 112(%r15), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 128(%r15), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 144(%r15), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + cmpl $11, %r10d + movdqa 160(%r15), %xmm7 + jl L_AES_GCM_decrypt_calc_iv_12_last + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 176(%r15), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + cmpl $13, %r10d + movdqa 192(%r15), %xmm7 + jl L_AES_GCM_decrypt_calc_iv_12_last + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 208(%r15), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 224(%r15), %xmm7 +L_AES_GCM_decrypt_calc_iv_12_last: + aesenclast %xmm7, %xmm5 + aesenclast %xmm7, %xmm1 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm5 + movdqa %xmm1, 144(%rsp) + jmp L_AES_GCM_decrypt_iv_done +L_AES_GCM_decrypt_iv_not_12: + # Calculate values when IV is not 12 bytes + # H = Encrypt X(=0) + movdqa (%r15), %xmm5 + aesenc 16(%r15), %xmm5 + aesenc 32(%r15), %xmm5 + aesenc 48(%r15), %xmm5 + aesenc 64(%r15), %xmm5 + aesenc 80(%r15), %xmm5 + aesenc 96(%r15), %xmm5 + aesenc 112(%r15), %xmm5 + aesenc 128(%r15), %xmm5 + aesenc 144(%r15), %xmm5 + cmpl $11, %r10d + movdqa 160(%r15), %xmm9 + jl L_AES_GCM_decrypt_calc_iv_1_aesenc_avx_last + aesenc %xmm9, %xmm5 + aesenc 176(%r15), %xmm5 + cmpl $13, %r10d + movdqa 192(%r15), %xmm9 + jl L_AES_GCM_decrypt_calc_iv_1_aesenc_avx_last + aesenc %xmm9, %xmm5 + aesenc 208(%r15), %xmm5 + movdqa 224(%r15), %xmm9 +L_AES_GCM_decrypt_calc_iv_1_aesenc_avx_last: + aesenclast %xmm9, %xmm5 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm5 + # Calc counter + # Initialization vector + cmpl $0x00, %edx + movq $0x00, %rcx + je L_AES_GCM_decrypt_calc_iv_done + cmpl $16, %edx + jl L_AES_GCM_decrypt_calc_iv_lt16 + andl $0xfffffff0, %edx +L_AES_GCM_decrypt_calc_iv_16_loop: + movdqu (%rax,%rcx,1), %xmm8 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm8 + pxor %xmm8, %xmm4 + pshufd $0x4e, %xmm4, %xmm1 + pshufd $0x4e, %xmm5, %xmm2 + movdqa %xmm5, %xmm3 + movdqa %xmm5, %xmm0 + pclmulqdq $0x11, %xmm4, %xmm3 + pclmulqdq $0x00, %xmm4, %xmm0 + pxor %xmm4, %xmm1 + pxor %xmm5, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + movdqa %xmm0, %xmm7 + movdqa %xmm3, %xmm4 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm7 + pxor %xmm1, %xmm4 + movdqa %xmm7, %xmm0 + movdqa %xmm4, %xmm1 + psrld $31, %xmm0 + psrld $31, %xmm1 + pslld $0x01, %xmm7 + pslld $0x01, %xmm4 + movdqa %xmm0, %xmm2 + pslldq $4, %xmm0 + psrldq $12, %xmm2 + pslldq $4, %xmm1 + por %xmm2, %xmm4 + por %xmm0, %xmm7 + por %xmm1, %xmm4 + movdqa %xmm7, %xmm0 + movdqa %xmm7, %xmm1 + movdqa %xmm7, %xmm2 + pslld $31, %xmm0 + pslld $30, %xmm1 + pslld $25, %xmm2 + pxor %xmm1, %xmm0 + pxor %xmm2, %xmm0 + movdqa %xmm0, %xmm1 + psrldq $4, %xmm1 + pslldq $12, %xmm0 + pxor %xmm0, %xmm7 + movdqa %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + psrld $0x01, %xmm2 + psrld $2, %xmm3 + psrld $7, %xmm0 + pxor %xmm3, %xmm2 + pxor %xmm0, %xmm2 + pxor %xmm1, %xmm2 + pxor %xmm7, %xmm2 + pxor %xmm2, %xmm4 + addl $16, %ecx + cmpl %edx, %ecx + jl L_AES_GCM_decrypt_calc_iv_16_loop + movl %ebx, %edx + cmpl %edx, %ecx + je L_AES_GCM_decrypt_calc_iv_done +L_AES_GCM_decrypt_calc_iv_lt16: + subq $16, %rsp + pxor %xmm8, %xmm8 + xorl %ebx, %ebx + movdqa %xmm8, (%rsp) +L_AES_GCM_decrypt_calc_iv_loop: + movzbl (%rax,%rcx,1), %r13d + movb %r13b, (%rsp,%rbx,1) + incl %ecx + incl %ebx + cmpl %edx, %ecx + jl L_AES_GCM_decrypt_calc_iv_loop + movdqa (%rsp), %xmm8 + addq $16, %rsp + pshufb L_aes_gcm_bswap_mask(%rip), %xmm8 + pxor %xmm8, %xmm4 + pshufd $0x4e, %xmm4, %xmm1 + pshufd $0x4e, %xmm5, %xmm2 + movdqa %xmm5, %xmm3 + movdqa %xmm5, %xmm0 + pclmulqdq $0x11, %xmm4, %xmm3 + pclmulqdq $0x00, %xmm4, %xmm0 + pxor %xmm4, %xmm1 + pxor %xmm5, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + movdqa %xmm0, %xmm7 + movdqa %xmm3, %xmm4 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm7 + pxor %xmm1, %xmm4 + movdqa %xmm7, %xmm0 + movdqa %xmm4, %xmm1 + psrld $31, %xmm0 + psrld $31, %xmm1 + pslld $0x01, %xmm7 + pslld $0x01, %xmm4 + movdqa %xmm0, %xmm2 + pslldq $4, %xmm0 + psrldq $12, %xmm2 + pslldq $4, %xmm1 + por %xmm2, %xmm4 + por %xmm0, %xmm7 + por %xmm1, %xmm4 + movdqa %xmm7, %xmm0 + movdqa %xmm7, %xmm1 + movdqa %xmm7, %xmm2 + pslld $31, %xmm0 + pslld $30, %xmm1 + pslld $25, %xmm2 + pxor %xmm1, %xmm0 + pxor %xmm2, %xmm0 + movdqa %xmm0, %xmm1 + psrldq $4, %xmm1 + pslldq $12, %xmm0 + pxor %xmm0, %xmm7 + movdqa %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + psrld $0x01, %xmm2 + psrld $2, %xmm3 + psrld $7, %xmm0 + pxor %xmm3, %xmm2 + pxor %xmm0, %xmm2 + pxor %xmm1, %xmm2 + pxor %xmm7, %xmm2 + pxor %xmm2, %xmm4 +L_AES_GCM_decrypt_calc_iv_done: + # T = Encrypt counter + pxor %xmm0, %xmm0 + shll $3, %edx + pinsrq $0x00, %rdx, %xmm0 + pxor %xmm0, %xmm4 + pshufd $0x4e, %xmm4, %xmm1 + pshufd $0x4e, %xmm5, %xmm2 + movdqa %xmm5, %xmm3 + movdqa %xmm5, %xmm0 + pclmulqdq $0x11, %xmm4, %xmm3 + pclmulqdq $0x00, %xmm4, %xmm0 + pxor %xmm4, %xmm1 + pxor %xmm5, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + movdqa %xmm0, %xmm7 + movdqa %xmm3, %xmm4 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm7 + pxor %xmm1, %xmm4 + movdqa %xmm7, %xmm0 + movdqa %xmm4, %xmm1 + psrld $31, %xmm0 + psrld $31, %xmm1 + pslld $0x01, %xmm7 + pslld $0x01, %xmm4 + movdqa %xmm0, %xmm2 + pslldq $4, %xmm0 + psrldq $12, %xmm2 + pslldq $4, %xmm1 + por %xmm2, %xmm4 + por %xmm0, %xmm7 + por %xmm1, %xmm4 + movdqa %xmm7, %xmm0 + movdqa %xmm7, %xmm1 + movdqa %xmm7, %xmm2 + pslld $31, %xmm0 + pslld $30, %xmm1 + pslld $25, %xmm2 + pxor %xmm1, %xmm0 + pxor %xmm2, %xmm0 + movdqa %xmm0, %xmm1 + psrldq $4, %xmm1 + pslldq $12, %xmm0 + pxor %xmm0, %xmm7 + movdqa %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + psrld $0x01, %xmm2 + psrld $2, %xmm3 + psrld $7, %xmm0 + pxor %xmm3, %xmm2 + pxor %xmm0, %xmm2 + pxor %xmm1, %xmm2 + pxor %xmm7, %xmm2 + pxor %xmm2, %xmm4 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm4 + # Encrypt counter + movdqa (%r15), %xmm8 + pxor %xmm4, %xmm8 + aesenc 16(%r15), %xmm8 + aesenc 32(%r15), %xmm8 + aesenc 48(%r15), %xmm8 + aesenc 64(%r15), %xmm8 + aesenc 80(%r15), %xmm8 + aesenc 96(%r15), %xmm8 + aesenc 112(%r15), %xmm8 + aesenc 128(%r15), %xmm8 + aesenc 144(%r15), %xmm8 + cmpl $11, %r10d + movdqa 160(%r15), %xmm9 + jl L_AES_GCM_decrypt_calc_iv_2_aesenc_avx_last + aesenc %xmm9, %xmm8 + aesenc 176(%r15), %xmm8 + cmpl $13, %r10d + movdqa 192(%r15), %xmm9 + jl L_AES_GCM_decrypt_calc_iv_2_aesenc_avx_last + aesenc %xmm9, %xmm8 + aesenc 208(%r15), %xmm8 + movdqa 224(%r15), %xmm9 +L_AES_GCM_decrypt_calc_iv_2_aesenc_avx_last: + aesenclast %xmm9, %xmm8 + movdqa %xmm8, 144(%rsp) +L_AES_GCM_decrypt_iv_done: + # Additional authentication data + movl %r11d, %edx + cmpl $0x00, %edx + je L_AES_GCM_decrypt_calc_aad_done + xorl %ecx, %ecx + cmpl $16, %edx + jl L_AES_GCM_decrypt_calc_aad_lt16 + andl $0xfffffff0, %edx +L_AES_GCM_decrypt_calc_aad_16_loop: + movdqu (%r12,%rcx,1), %xmm8 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm8 + pxor %xmm8, %xmm6 + pshufd $0x4e, %xmm6, %xmm1 + pshufd $0x4e, %xmm5, %xmm2 + movdqa %xmm5, %xmm3 + movdqa %xmm5, %xmm0 + pclmulqdq $0x11, %xmm6, %xmm3 + pclmulqdq $0x00, %xmm6, %xmm0 + pxor %xmm6, %xmm1 + pxor %xmm5, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + movdqa %xmm0, %xmm7 + movdqa %xmm3, %xmm6 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm7 + pxor %xmm1, %xmm6 + movdqa %xmm7, %xmm0 + movdqa %xmm6, %xmm1 + psrld $31, %xmm0 + psrld $31, %xmm1 + pslld $0x01, %xmm7 + pslld $0x01, %xmm6 + movdqa %xmm0, %xmm2 + pslldq $4, %xmm0 + psrldq $12, %xmm2 + pslldq $4, %xmm1 + por %xmm2, %xmm6 + por %xmm0, %xmm7 + por %xmm1, %xmm6 + movdqa %xmm7, %xmm0 + movdqa %xmm7, %xmm1 + movdqa %xmm7, %xmm2 + pslld $31, %xmm0 + pslld $30, %xmm1 + pslld $25, %xmm2 + pxor %xmm1, %xmm0 + pxor %xmm2, %xmm0 + movdqa %xmm0, %xmm1 + psrldq $4, %xmm1 + pslldq $12, %xmm0 + pxor %xmm0, %xmm7 + movdqa %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + psrld $0x01, %xmm2 + psrld $2, %xmm3 + psrld $7, %xmm0 + pxor %xmm3, %xmm2 + pxor %xmm0, %xmm2 + pxor %xmm1, %xmm2 + pxor %xmm7, %xmm2 + pxor %xmm2, %xmm6 + addl $16, %ecx + cmpl %edx, %ecx + jl L_AES_GCM_decrypt_calc_aad_16_loop + movl %r11d, %edx + cmpl %edx, %ecx + je L_AES_GCM_decrypt_calc_aad_done +L_AES_GCM_decrypt_calc_aad_lt16: + subq $16, %rsp + pxor %xmm8, %xmm8 + xorl %ebx, %ebx + movdqa %xmm8, (%rsp) +L_AES_GCM_decrypt_calc_aad_loop: + movzbl (%r12,%rcx,1), %r13d + movb %r13b, (%rsp,%rbx,1) + incl %ecx + incl %ebx + cmpl %edx, %ecx + jl L_AES_GCM_decrypt_calc_aad_loop + movdqa (%rsp), %xmm8 + addq $16, %rsp + pshufb L_aes_gcm_bswap_mask(%rip), %xmm8 + pxor %xmm8, %xmm6 + pshufd $0x4e, %xmm6, %xmm1 + pshufd $0x4e, %xmm5, %xmm2 + movdqa %xmm5, %xmm3 + movdqa %xmm5, %xmm0 + pclmulqdq $0x11, %xmm6, %xmm3 + pclmulqdq $0x00, %xmm6, %xmm0 + pxor %xmm6, %xmm1 + pxor %xmm5, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + movdqa %xmm0, %xmm7 + movdqa %xmm3, %xmm6 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm7 + pxor %xmm1, %xmm6 + movdqa %xmm7, %xmm0 + movdqa %xmm6, %xmm1 + psrld $31, %xmm0 + psrld $31, %xmm1 + pslld $0x01, %xmm7 + pslld $0x01, %xmm6 + movdqa %xmm0, %xmm2 + pslldq $4, %xmm0 + psrldq $12, %xmm2 + pslldq $4, %xmm1 + por %xmm2, %xmm6 + por %xmm0, %xmm7 + por %xmm1, %xmm6 + movdqa %xmm7, %xmm0 + movdqa %xmm7, %xmm1 + movdqa %xmm7, %xmm2 + pslld $31, %xmm0 + pslld $30, %xmm1 + pslld $25, %xmm2 + pxor %xmm1, %xmm0 + pxor %xmm2, %xmm0 + movdqa %xmm0, %xmm1 + psrldq $4, %xmm1 + pslldq $12, %xmm0 + pxor %xmm0, %xmm7 + movdqa %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + psrld $0x01, %xmm2 + psrld $2, %xmm3 + psrld $7, %xmm0 + pxor %xmm3, %xmm2 + pxor %xmm0, %xmm2 + pxor %xmm1, %xmm2 + pxor %xmm7, %xmm2 + pxor %xmm2, %xmm6 +L_AES_GCM_decrypt_calc_aad_done: + # Calculate counter and H + pshufb L_aes_gcm_bswap_epi64(%rip), %xmm4 + movdqa %xmm5, %xmm9 + paddd L_aes_gcm_one(%rip), %xmm4 + movdqa %xmm5, %xmm8 + movdqa %xmm4, 128(%rsp) + psrlq $63, %xmm9 + psllq $0x01, %xmm8 + pslldq $8, %xmm9 + por %xmm9, %xmm8 + pshufd $0xff, %xmm5, %xmm5 + psrad $31, %xmm5 + pand L_aes_gcm_mod2_128(%rip), %xmm5 + pxor %xmm8, %xmm5 + xorl %ebx, %ebx + cmpl $0x80, %r9d + movl %r9d, %r13d + jl L_AES_GCM_decrypt_done_128 + andl $0xffffff80, %r13d + movdqa %xmm6, %xmm2 + # H ^ 1 + movdqa %xmm5, (%rsp) + # H ^ 2 + pshufd $0x4e, %xmm5, %xmm9 + pshufd $0x4e, %xmm5, %xmm10 + movdqa %xmm5, %xmm11 + movdqa %xmm5, %xmm8 + pclmulqdq $0x11, %xmm5, %xmm11 + pclmulqdq $0x00, %xmm5, %xmm8 + pxor %xmm5, %xmm9 + pxor %xmm5, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm0 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm0 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm0 + movdqa %xmm0, 16(%rsp) + # H ^ 3 + pshufd $0x4e, %xmm5, %xmm9 + pshufd $0x4e, %xmm0, %xmm10 + movdqa %xmm0, %xmm11 + movdqa %xmm0, %xmm8 + pclmulqdq $0x11, %xmm5, %xmm11 + pclmulqdq $0x00, %xmm5, %xmm8 + pxor %xmm5, %xmm9 + pxor %xmm0, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm1 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm1 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm1 + movdqa %xmm1, 32(%rsp) + # H ^ 4 + pshufd $0x4e, %xmm0, %xmm9 + pshufd $0x4e, %xmm0, %xmm10 + movdqa %xmm0, %xmm11 + movdqa %xmm0, %xmm8 + pclmulqdq $0x11, %xmm0, %xmm11 + pclmulqdq $0x00, %xmm0, %xmm8 + pxor %xmm0, %xmm9 + pxor %xmm0, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm3 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm3 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm3 + movdqa %xmm3, 48(%rsp) + # H ^ 5 + pshufd $0x4e, %xmm0, %xmm9 + pshufd $0x4e, %xmm1, %xmm10 + movdqa %xmm1, %xmm11 + movdqa %xmm1, %xmm8 + pclmulqdq $0x11, %xmm0, %xmm11 + pclmulqdq $0x00, %xmm0, %xmm8 + pxor %xmm0, %xmm9 + pxor %xmm1, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm7 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm7 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm7 + movdqa %xmm7, 64(%rsp) + # H ^ 6 + pshufd $0x4e, %xmm1, %xmm9 + pshufd $0x4e, %xmm1, %xmm10 + movdqa %xmm1, %xmm11 + movdqa %xmm1, %xmm8 + pclmulqdq $0x11, %xmm1, %xmm11 + pclmulqdq $0x00, %xmm1, %xmm8 + pxor %xmm1, %xmm9 + pxor %xmm1, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm7 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm7 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm7 + movdqa %xmm7, 80(%rsp) + # H ^ 7 + pshufd $0x4e, %xmm1, %xmm9 + pshufd $0x4e, %xmm3, %xmm10 + movdqa %xmm3, %xmm11 + movdqa %xmm3, %xmm8 + pclmulqdq $0x11, %xmm1, %xmm11 + pclmulqdq $0x00, %xmm1, %xmm8 + pxor %xmm1, %xmm9 + pxor %xmm3, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm7 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm7 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm7 + movdqa %xmm7, 96(%rsp) + # H ^ 8 + pshufd $0x4e, %xmm3, %xmm9 + pshufd $0x4e, %xmm3, %xmm10 + movdqa %xmm3, %xmm11 + movdqa %xmm3, %xmm8 + pclmulqdq $0x11, %xmm3, %xmm11 + pclmulqdq $0x00, %xmm3, %xmm8 + pxor %xmm3, %xmm9 + pxor %xmm3, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm7 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm7 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm7 + movdqa %xmm7, 112(%rsp) +L_AES_GCM_decrypt_ghash_128: + leaq (%rdi,%rbx,1), %rcx + leaq (%rsi,%rbx,1), %rdx + movdqa 128(%rsp), %xmm8 + movdqa L_aes_gcm_bswap_epi64(%rip), %xmm1 + movdqa %xmm8, %xmm0 + pshufb %xmm1, %xmm8 + movdqa %xmm0, %xmm9 + paddd L_aes_gcm_one(%rip), %xmm9 + pshufb %xmm1, %xmm9 + movdqa %xmm0, %xmm10 + paddd L_aes_gcm_two(%rip), %xmm10 + pshufb %xmm1, %xmm10 + movdqa %xmm0, %xmm11 + paddd L_aes_gcm_three(%rip), %xmm11 + pshufb %xmm1, %xmm11 + movdqa %xmm0, %xmm12 + paddd L_aes_gcm_four(%rip), %xmm12 + pshufb %xmm1, %xmm12 + movdqa %xmm0, %xmm13 + paddd L_aes_gcm_five(%rip), %xmm13 + pshufb %xmm1, %xmm13 + movdqa %xmm0, %xmm14 + paddd L_aes_gcm_six(%rip), %xmm14 + pshufb %xmm1, %xmm14 + movdqa %xmm0, %xmm15 + paddd L_aes_gcm_seven(%rip), %xmm15 + pshufb %xmm1, %xmm15 + paddd L_aes_gcm_eight(%rip), %xmm0 + movdqa (%r15), %xmm7 + movdqa %xmm0, 128(%rsp) + pxor %xmm7, %xmm8 + pxor %xmm7, %xmm9 + pxor %xmm7, %xmm10 + pxor %xmm7, %xmm11 + pxor %xmm7, %xmm12 + pxor %xmm7, %xmm13 + pxor %xmm7, %xmm14 + pxor %xmm7, %xmm15 + movdqa 112(%rsp), %xmm7 + movdqu (%rcx), %xmm0 + aesenc 16(%r15), %xmm8 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm0 + pxor %xmm2, %xmm0 + pshufd $0x4e, %xmm7, %xmm1 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm7, %xmm1 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm3 + pclmulqdq $0x11, %xmm7, %xmm3 + aesenc 16(%r15), %xmm9 + aesenc 16(%r15), %xmm10 + movdqa %xmm0, %xmm2 + pclmulqdq $0x00, %xmm7, %xmm2 + aesenc 16(%r15), %xmm11 + aesenc 16(%r15), %xmm12 + pclmulqdq $0x00, %xmm5, %xmm1 + aesenc 16(%r15), %xmm13 + aesenc 16(%r15), %xmm14 + aesenc 16(%r15), %xmm15 + pxor %xmm2, %xmm1 + pxor %xmm3, %xmm1 + movdqa 96(%rsp), %xmm7 + movdqu 16(%rcx), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm0 + aesenc 32(%r15), %xmm8 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + aesenc 32(%r15), %xmm9 + aesenc 32(%r15), %xmm10 + pclmulqdq $0x00, %xmm0, %xmm7 + aesenc 32(%r15), %xmm11 + aesenc 32(%r15), %xmm12 + pclmulqdq $0x00, %xmm5, %xmm4 + aesenc 32(%r15), %xmm13 + aesenc 32(%r15), %xmm14 + aesenc 32(%r15), %xmm15 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqa 80(%rsp), %xmm7 + movdqu 32(%rcx), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm0 + aesenc 48(%r15), %xmm8 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + aesenc 48(%r15), %xmm9 + aesenc 48(%r15), %xmm10 + pclmulqdq $0x00, %xmm0, %xmm7 + aesenc 48(%r15), %xmm11 + aesenc 48(%r15), %xmm12 + pclmulqdq $0x00, %xmm5, %xmm4 + aesenc 48(%r15), %xmm13 + aesenc 48(%r15), %xmm14 + aesenc 48(%r15), %xmm15 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqa 64(%rsp), %xmm7 + movdqu 48(%rcx), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm0 + aesenc 64(%r15), %xmm8 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + aesenc 64(%r15), %xmm9 + aesenc 64(%r15), %xmm10 + pclmulqdq $0x00, %xmm0, %xmm7 + aesenc 64(%r15), %xmm11 + aesenc 64(%r15), %xmm12 + pclmulqdq $0x00, %xmm5, %xmm4 + aesenc 64(%r15), %xmm13 + aesenc 64(%r15), %xmm14 + aesenc 64(%r15), %xmm15 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqa 48(%rsp), %xmm7 + movdqu 64(%rcx), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm0 + aesenc 80(%r15), %xmm8 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + aesenc 80(%r15), %xmm9 + aesenc 80(%r15), %xmm10 + pclmulqdq $0x00, %xmm0, %xmm7 + aesenc 80(%r15), %xmm11 + aesenc 80(%r15), %xmm12 + pclmulqdq $0x00, %xmm5, %xmm4 + aesenc 80(%r15), %xmm13 + aesenc 80(%r15), %xmm14 + aesenc 80(%r15), %xmm15 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqa 32(%rsp), %xmm7 + movdqu 80(%rcx), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm0 + aesenc 96(%r15), %xmm8 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + aesenc 96(%r15), %xmm9 + aesenc 96(%r15), %xmm10 + pclmulqdq $0x00, %xmm0, %xmm7 + aesenc 96(%r15), %xmm11 + aesenc 96(%r15), %xmm12 + pclmulqdq $0x00, %xmm5, %xmm4 + aesenc 96(%r15), %xmm13 + aesenc 96(%r15), %xmm14 + aesenc 96(%r15), %xmm15 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqa 16(%rsp), %xmm7 + movdqu 96(%rcx), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm0 + aesenc 112(%r15), %xmm8 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + aesenc 112(%r15), %xmm9 + aesenc 112(%r15), %xmm10 + pclmulqdq $0x00, %xmm0, %xmm7 + aesenc 112(%r15), %xmm11 + aesenc 112(%r15), %xmm12 + pclmulqdq $0x00, %xmm5, %xmm4 + aesenc 112(%r15), %xmm13 + aesenc 112(%r15), %xmm14 + aesenc 112(%r15), %xmm15 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqa (%rsp), %xmm7 + movdqu 112(%rcx), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm0 + aesenc 128(%r15), %xmm8 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + aesenc 128(%r15), %xmm9 + aesenc 128(%r15), %xmm10 + pclmulqdq $0x00, %xmm0, %xmm7 + aesenc 128(%r15), %xmm11 + aesenc 128(%r15), %xmm12 + pclmulqdq $0x00, %xmm5, %xmm4 + aesenc 128(%r15), %xmm13 + aesenc 128(%r15), %xmm14 + aesenc 128(%r15), %xmm15 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqa %xmm1, %xmm5 + psrldq $8, %xmm1 + pslldq $8, %xmm5 + aesenc 144(%r15), %xmm8 + pxor %xmm5, %xmm2 + pxor %xmm1, %xmm3 + movdqa %xmm2, %xmm7 + movdqa %xmm2, %xmm4 + movdqa %xmm2, %xmm5 + aesenc 144(%r15), %xmm9 + pslld $31, %xmm7 + pslld $30, %xmm4 + pslld $25, %xmm5 + aesenc 144(%r15), %xmm10 + pxor %xmm4, %xmm7 + pxor %xmm5, %xmm7 + aesenc 144(%r15), %xmm11 + movdqa %xmm7, %xmm4 + pslldq $12, %xmm7 + psrldq $4, %xmm4 + aesenc 144(%r15), %xmm12 + pxor %xmm7, %xmm2 + movdqa %xmm2, %xmm5 + movdqa %xmm2, %xmm1 + movdqa %xmm2, %xmm0 + aesenc 144(%r15), %xmm13 + psrld $0x01, %xmm5 + psrld $2, %xmm1 + psrld $7, %xmm0 + aesenc 144(%r15), %xmm14 + pxor %xmm1, %xmm5 + pxor %xmm0, %xmm5 + aesenc 144(%r15), %xmm15 + pxor %xmm4, %xmm5 + pxor %xmm5, %xmm2 + pxor %xmm3, %xmm2 + cmpl $11, %r10d + movdqa 160(%r15), %xmm7 + jl L_AES_GCM_decrypt_aesenc_128_ghash_avx_done + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + movdqa 176(%r15), %xmm7 + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + cmpl $13, %r10d + movdqa 192(%r15), %xmm7 + jl L_AES_GCM_decrypt_aesenc_128_ghash_avx_done + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + movdqa 208(%r15), %xmm7 + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + movdqa 224(%r15), %xmm7 +L_AES_GCM_decrypt_aesenc_128_ghash_avx_done: + aesenclast %xmm7, %xmm8 + aesenclast %xmm7, %xmm9 + movdqu (%rcx), %xmm0 + movdqu 16(%rcx), %xmm1 + pxor %xmm0, %xmm8 + pxor %xmm1, %xmm9 + movdqu %xmm8, (%rdx) + movdqu %xmm9, 16(%rdx) + aesenclast %xmm7, %xmm10 + aesenclast %xmm7, %xmm11 + movdqu 32(%rcx), %xmm0 + movdqu 48(%rcx), %xmm1 + pxor %xmm0, %xmm10 + pxor %xmm1, %xmm11 + movdqu %xmm10, 32(%rdx) + movdqu %xmm11, 48(%rdx) + aesenclast %xmm7, %xmm12 + aesenclast %xmm7, %xmm13 + movdqu 64(%rcx), %xmm0 + movdqu 80(%rcx), %xmm1 + pxor %xmm0, %xmm12 + pxor %xmm1, %xmm13 + movdqu %xmm12, 64(%rdx) + movdqu %xmm13, 80(%rdx) + aesenclast %xmm7, %xmm14 + aesenclast %xmm7, %xmm15 + movdqu 96(%rcx), %xmm0 + movdqu 112(%rcx), %xmm1 + pxor %xmm0, %xmm14 + pxor %xmm1, %xmm15 + movdqu %xmm14, 96(%rdx) + movdqu %xmm15, 112(%rdx) + addl $0x80, %ebx + cmpl %r13d, %ebx + jl L_AES_GCM_decrypt_ghash_128 + movdqa %xmm2, %xmm6 + movdqa (%rsp), %xmm5 +L_AES_GCM_decrypt_done_128: + movl %r9d, %edx + cmpl %edx, %ebx + jge L_AES_GCM_decrypt_done_dec + movl %r9d, %r13d + andl $0xfffffff0, %r13d + cmpl %r13d, %ebx + jge L_AES_GCM_decrypt_last_block_done +L_AES_GCM_decrypt_last_block_start: + leaq (%rdi,%rbx,1), %rcx + leaq (%rsi,%rbx,1), %rdx + movdqu (%rcx), %xmm1 + movdqa %xmm5, %xmm0 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm1 + pxor %xmm6, %xmm1 + movdqa 128(%rsp), %xmm8 + movdqa %xmm8, %xmm9 + pshufb L_aes_gcm_bswap_epi64(%rip), %xmm8 + paddd L_aes_gcm_one(%rip), %xmm9 + pxor (%r15), %xmm8 + movdqa %xmm9, 128(%rsp) + movdqa %xmm1, %xmm10 + pclmulqdq $16, %xmm0, %xmm10 + aesenc 16(%r15), %xmm8 + aesenc 32(%r15), %xmm8 + movdqa %xmm1, %xmm11 + pclmulqdq $0x01, %xmm0, %xmm11 + aesenc 48(%r15), %xmm8 + aesenc 64(%r15), %xmm8 + movdqa %xmm1, %xmm12 + pclmulqdq $0x00, %xmm0, %xmm12 + aesenc 80(%r15), %xmm8 + movdqa %xmm1, %xmm1 + pclmulqdq $0x11, %xmm0, %xmm1 + aesenc 96(%r15), %xmm8 + pxor %xmm11, %xmm10 + movdqa %xmm10, %xmm2 + psrldq $8, %xmm10 + pslldq $8, %xmm2 + aesenc 112(%r15), %xmm8 + movdqa %xmm1, %xmm3 + pxor %xmm12, %xmm2 + pxor %xmm10, %xmm3 + movdqa L_aes_gcm_mod2_128(%rip), %xmm0 + movdqa %xmm2, %xmm11 + pclmulqdq $16, %xmm0, %xmm11 + aesenc 128(%r15), %xmm8 + pshufd $0x4e, %xmm2, %xmm10 + pxor %xmm11, %xmm10 + movdqa %xmm10, %xmm11 + pclmulqdq $16, %xmm0, %xmm11 + aesenc 144(%r15), %xmm8 + pshufd $0x4e, %xmm10, %xmm6 + pxor %xmm11, %xmm6 + pxor %xmm3, %xmm6 + cmpl $11, %r10d + movdqa 160(%r15), %xmm9 + jl L_AES_GCM_decrypt_aesenc_gfmul_last + aesenc %xmm9, %xmm8 + aesenc 176(%r15), %xmm8 + cmpl $13, %r10d + movdqa 192(%r15), %xmm9 + jl L_AES_GCM_decrypt_aesenc_gfmul_last + aesenc %xmm9, %xmm8 + aesenc 208(%r15), %xmm8 + movdqa 224(%r15), %xmm9 +L_AES_GCM_decrypt_aesenc_gfmul_last: + aesenclast %xmm9, %xmm8 + movdqu (%rcx), %xmm9 + pxor %xmm9, %xmm8 + movdqu %xmm8, (%rdx) + addl $16, %ebx + cmpl %r13d, %ebx + jl L_AES_GCM_decrypt_last_block_start +L_AES_GCM_decrypt_last_block_done: + movl %r9d, %ecx + movl %ecx, %edx + andl $15, %ecx + jz L_AES_GCM_decrypt_aesenc_last15_dec_avx_done + movdqa 128(%rsp), %xmm4 + pshufb L_aes_gcm_bswap_epi64(%rip), %xmm4 + pxor (%r15), %xmm4 + aesenc 16(%r15), %xmm4 + aesenc 32(%r15), %xmm4 + aesenc 48(%r15), %xmm4 + aesenc 64(%r15), %xmm4 + aesenc 80(%r15), %xmm4 + aesenc 96(%r15), %xmm4 + aesenc 112(%r15), %xmm4 + aesenc 128(%r15), %xmm4 + aesenc 144(%r15), %xmm4 + cmpl $11, %r10d + movdqa 160(%r15), %xmm9 + jl L_AES_GCM_decrypt_aesenc_last15_dec_avx_aesenc_avx_last + aesenc %xmm9, %xmm4 + aesenc 176(%r15), %xmm4 + cmpl $13, %r10d + movdqa 192(%r15), %xmm9 + jl L_AES_GCM_decrypt_aesenc_last15_dec_avx_aesenc_avx_last + aesenc %xmm9, %xmm4 + aesenc 208(%r15), %xmm4 + movdqa 224(%r15), %xmm9 +L_AES_GCM_decrypt_aesenc_last15_dec_avx_aesenc_avx_last: + aesenclast %xmm9, %xmm4 + subq $32, %rsp + xorl %ecx, %ecx + movdqa %xmm4, (%rsp) + pxor %xmm0, %xmm0 + movdqa %xmm0, 16(%rsp) +L_AES_GCM_decrypt_aesenc_last15_dec_avx_loop: + movzbl (%rdi,%rbx,1), %r13d + movb %r13b, 16(%rsp,%rcx,1) + xorb (%rsp,%rcx,1), %r13b + movb %r13b, (%rsi,%rbx,1) + incl %ebx + incl %ecx + cmpl %edx, %ebx + jl L_AES_GCM_decrypt_aesenc_last15_dec_avx_loop + movdqa 16(%rsp), %xmm4 + addq $32, %rsp + pshufb L_aes_gcm_bswap_mask(%rip), %xmm4 + pxor %xmm4, %xmm6 + pshufd $0x4e, %xmm5, %xmm9 + pshufd $0x4e, %xmm6, %xmm10 + movdqa %xmm6, %xmm11 + movdqa %xmm6, %xmm8 + pclmulqdq $0x11, %xmm5, %xmm11 + pclmulqdq $0x00, %xmm5, %xmm8 + pxor %xmm5, %xmm9 + pxor %xmm6, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm6 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm6 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm6 +L_AES_GCM_decrypt_aesenc_last15_dec_avx_done: +L_AES_GCM_decrypt_done_dec: + movl %r9d, %edx + movl %r11d, %ecx + shlq $3, %rdx + shlq $3, %rcx + pinsrq $0x00, %rdx, %xmm0 + pinsrq $0x01, %rcx, %xmm0 + pxor %xmm0, %xmm6 + pshufd $0x4e, %xmm5, %xmm9 + pshufd $0x4e, %xmm6, %xmm10 + movdqa %xmm6, %xmm11 + movdqa %xmm6, %xmm8 + pclmulqdq $0x11, %xmm5, %xmm11 + pclmulqdq $0x00, %xmm5, %xmm8 + pxor %xmm5, %xmm9 + pxor %xmm6, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm6 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm6 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm6 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm6 + movdqa 144(%rsp), %xmm0 + pxor %xmm6, %xmm0 + cmpl $16, %r14d + je L_AES_GCM_decrypt_cmp_tag_16 + subq $16, %rsp + xorq %rcx, %rcx + xorq %rbx, %rbx + movdqa %xmm0, (%rsp) +L_AES_GCM_decrypt_cmp_tag_loop: + movzbl (%rsp,%rcx,1), %r13d + xorb (%r8,%rcx,1), %r13b + orb %r13b, %bl + incl %ecx + cmpl %r14d, %ecx + jne L_AES_GCM_decrypt_cmp_tag_loop + cmpb $0x00, %bl + sete %bl + addq $16, %rsp + xorq %rcx, %rcx + jmp L_AES_GCM_decrypt_cmp_tag_done +L_AES_GCM_decrypt_cmp_tag_16: + movdqu (%r8), %xmm1 + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm0, %rdx + # %%edx == 0xFFFF then return 1 else => return 0 + xorl %ebx, %ebx + cmpl $0xffff, %edx + sete %bl +L_AES_GCM_decrypt_cmp_tag_done: + movl %ebx, (%rbp) + addq $0xa8, %rsp + popq %rbp + popq %r15 + popq %r14 + popq %rbx + popq %r12 + popq %r13 + repz retq +#ifndef __APPLE__ +.size AES_GCM_decrypt,.-AES_GCM_decrypt +#endif /* __APPLE__ */ +#ifdef HAVE_INTEL_AVX1 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx1_aes_gcm_one: +.quad 0x0, 0x1 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx1_aes_gcm_two: +.quad 0x0, 0x2 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx1_aes_gcm_three: +.quad 0x0, 0x3 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx1_aes_gcm_four: +.quad 0x0, 0x4 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx1_aes_gcm_five: +.quad 0x0, 0x5 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx1_aes_gcm_six: +.quad 0x0, 0x6 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx1_aes_gcm_seven: +.quad 0x0, 0x7 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx1_aes_gcm_eight: +.quad 0x0, 0x8 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx1_aes_gcm_bswap_epi64: +.quad 0x1020304050607, 0x8090a0b0c0d0e0f +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx1_aes_gcm_bswap_mask: +.quad 0x8090a0b0c0d0e0f, 0x1020304050607 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx1_aes_gcm_mod2_128: +.quad 0x1, 0xc200000000000000 +#ifndef __APPLE__ +.text +.globl AES_GCM_encrypt_avx1 +.type AES_GCM_encrypt_avx1,@function +.align 4 +AES_GCM_encrypt_avx1: +#else +.section __TEXT,__text +.globl _AES_GCM_encrypt_avx1 +.p2align 2 +_AES_GCM_encrypt_avx1: +#endif /* __APPLE__ */ + pushq %r13 + pushq %r12 + pushq %rbx + pushq %r14 + pushq %r15 + movq %rdx, %r12 + movq %rcx, %rax + movl 48(%rsp), %r11d + movl 56(%rsp), %ebx + movl 64(%rsp), %r14d + movq 72(%rsp), %r15 + movl 80(%rsp), %r10d + subq $0xa0, %rsp + vpxor %xmm4, %xmm4, %xmm4 + vpxor %xmm6, %xmm6, %xmm6 + movl %ebx, %edx + cmpl $12, %edx + jne L_AES_GCM_encrypt_avx1_iv_not_12 + # # Calculate values when IV is 12 bytes + # Set counter based on IV + movl $0x1000000, %ecx + vpinsrq $0x00, (%rax), %xmm4, %xmm4 + vpinsrd $2, 8(%rax), %xmm4, %xmm4 + vpinsrd $3, %ecx, %xmm4, %xmm4 + # H = Encrypt X(=0) and T = Encrypt counter + vmovdqa (%r15), %xmm5 + vpxor %xmm5, %xmm4, %xmm1 + vmovdqa 16(%r15), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 32(%r15), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 48(%r15), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 64(%r15), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 80(%r15), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 96(%r15), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 112(%r15), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 128(%r15), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 144(%r15), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + cmpl $11, %r10d + vmovdqa 160(%r15), %xmm7 + jl L_AES_GCM_encrypt_avx1_calc_iv_12_last + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 176(%r15), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + cmpl $13, %r10d + vmovdqa 192(%r15), %xmm7 + jl L_AES_GCM_encrypt_avx1_calc_iv_12_last + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 208(%r15), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 224(%r15), %xmm7 +L_AES_GCM_encrypt_avx1_calc_iv_12_last: + vaesenclast %xmm7, %xmm5, %xmm5 + vaesenclast %xmm7, %xmm1, %xmm1 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm5, %xmm5 + vmovdqa %xmm1, 144(%rsp) + jmp L_AES_GCM_encrypt_avx1_iv_done +L_AES_GCM_encrypt_avx1_iv_not_12: + # Calculate values when IV is not 12 bytes + # H = Encrypt X(=0) + vmovdqa (%r15), %xmm5 + vaesenc 16(%r15), %xmm5, %xmm5 + vaesenc 32(%r15), %xmm5, %xmm5 + vaesenc 48(%r15), %xmm5, %xmm5 + vaesenc 64(%r15), %xmm5, %xmm5 + vaesenc 80(%r15), %xmm5, %xmm5 + vaesenc 96(%r15), %xmm5, %xmm5 + vaesenc 112(%r15), %xmm5, %xmm5 + vaesenc 128(%r15), %xmm5, %xmm5 + vaesenc 144(%r15), %xmm5, %xmm5 + cmpl $11, %r10d + vmovdqa 160(%r15), %xmm9 + jl L_AES_GCM_encrypt_avx1_calc_iv_1_aesenc_avx_last + vaesenc %xmm9, %xmm5, %xmm5 + vaesenc 176(%r15), %xmm5, %xmm5 + cmpl $13, %r10d + vmovdqa 192(%r15), %xmm9 + jl L_AES_GCM_encrypt_avx1_calc_iv_1_aesenc_avx_last + vaesenc %xmm9, %xmm5, %xmm5 + vaesenc 208(%r15), %xmm5, %xmm5 + vmovdqa 224(%r15), %xmm9 +L_AES_GCM_encrypt_avx1_calc_iv_1_aesenc_avx_last: + vaesenclast %xmm9, %xmm5, %xmm5 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm5, %xmm5 + # Calc counter + # Initialization vector + cmpl $0x00, %edx + movq $0x00, %rcx + je L_AES_GCM_encrypt_avx1_calc_iv_done + cmpl $16, %edx + jl L_AES_GCM_encrypt_avx1_calc_iv_lt16 + andl $0xfffffff0, %edx +L_AES_GCM_encrypt_avx1_calc_iv_16_loop: + vmovdqu (%rax,%rcx,1), %xmm8 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm8, %xmm8 + vpxor %xmm8, %xmm4, %xmm4 + # ghash_gfmul_avx + vpshufd $0x4e, %xmm4, %xmm1 + vpshufd $0x4e, %xmm5, %xmm2 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpxor %xmm4, %xmm1, %xmm1 + vpxor %xmm5, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vmovdqa %xmm0, %xmm7 + vmovdqa %xmm3, %xmm4 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm7, %xmm7 + vpxor %xmm1, %xmm4, %xmm4 + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm4, %xmm4 + vpslld $31, %xmm7, %xmm0 + vpslld $30, %xmm7, %xmm1 + vpslld $25, %xmm7, %xmm2 + vpxor %xmm1, %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vmovdqa %xmm0, %xmm1 + vpsrldq $4, %xmm1, %xmm1 + vpslldq $12, %xmm0, %xmm0 + vpxor %xmm0, %xmm7, %xmm7 + vpsrld $0x01, %xmm7, %xmm2 + vpsrld $2, %xmm7, %xmm3 + vpsrld $7, %xmm7, %xmm0 + vpxor %xmm3, %xmm2, %xmm2 + vpxor %xmm0, %xmm2, %xmm2 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm2, %xmm4, %xmm4 + addl $16, %ecx + cmpl %edx, %ecx + jl L_AES_GCM_encrypt_avx1_calc_iv_16_loop + movl %ebx, %edx + cmpl %edx, %ecx + je L_AES_GCM_encrypt_avx1_calc_iv_done +L_AES_GCM_encrypt_avx1_calc_iv_lt16: + subq $16, %rsp + vpxor %xmm8, %xmm8, %xmm8 + xorl %ebx, %ebx + vmovdqa %xmm8, (%rsp) +L_AES_GCM_encrypt_avx1_calc_iv_loop: + movzbl (%rax,%rcx,1), %r13d + movb %r13b, (%rsp,%rbx,1) + incl %ecx + incl %ebx + cmpl %edx, %ecx + jl L_AES_GCM_encrypt_avx1_calc_iv_loop + vmovdqa (%rsp), %xmm8 + addq $16, %rsp + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm8, %xmm8 + vpxor %xmm8, %xmm4, %xmm4 + # ghash_gfmul_avx + vpshufd $0x4e, %xmm4, %xmm1 + vpshufd $0x4e, %xmm5, %xmm2 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpxor %xmm4, %xmm1, %xmm1 + vpxor %xmm5, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vmovdqa %xmm0, %xmm7 + vmovdqa %xmm3, %xmm4 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm7, %xmm7 + vpxor %xmm1, %xmm4, %xmm4 + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm4, %xmm4 + vpslld $31, %xmm7, %xmm0 + vpslld $30, %xmm7, %xmm1 + vpslld $25, %xmm7, %xmm2 + vpxor %xmm1, %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vmovdqa %xmm0, %xmm1 + vpsrldq $4, %xmm1, %xmm1 + vpslldq $12, %xmm0, %xmm0 + vpxor %xmm0, %xmm7, %xmm7 + vpsrld $0x01, %xmm7, %xmm2 + vpsrld $2, %xmm7, %xmm3 + vpsrld $7, %xmm7, %xmm0 + vpxor %xmm3, %xmm2, %xmm2 + vpxor %xmm0, %xmm2, %xmm2 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm2, %xmm4, %xmm4 +L_AES_GCM_encrypt_avx1_calc_iv_done: + # T = Encrypt counter + vpxor %xmm0, %xmm0, %xmm0 + shll $3, %edx + vpinsrq $0x00, %rdx, %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + # ghash_gfmul_avx + vpshufd $0x4e, %xmm4, %xmm1 + vpshufd $0x4e, %xmm5, %xmm2 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpxor %xmm4, %xmm1, %xmm1 + vpxor %xmm5, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vmovdqa %xmm0, %xmm7 + vmovdqa %xmm3, %xmm4 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm7, %xmm7 + vpxor %xmm1, %xmm4, %xmm4 + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm4, %xmm4 + vpslld $31, %xmm7, %xmm0 + vpslld $30, %xmm7, %xmm1 + vpslld $25, %xmm7, %xmm2 + vpxor %xmm1, %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vmovdqa %xmm0, %xmm1 + vpsrldq $4, %xmm1, %xmm1 + vpslldq $12, %xmm0, %xmm0 + vpxor %xmm0, %xmm7, %xmm7 + vpsrld $0x01, %xmm7, %xmm2 + vpsrld $2, %xmm7, %xmm3 + vpsrld $7, %xmm7, %xmm0 + vpxor %xmm3, %xmm2, %xmm2 + vpxor %xmm0, %xmm2, %xmm2 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm2, %xmm4, %xmm4 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm4, %xmm4 + # Encrypt counter + vmovdqa (%r15), %xmm8 + vpxor %xmm4, %xmm8, %xmm8 + vaesenc 16(%r15), %xmm8, %xmm8 + vaesenc 32(%r15), %xmm8, %xmm8 + vaesenc 48(%r15), %xmm8, %xmm8 + vaesenc 64(%r15), %xmm8, %xmm8 + vaesenc 80(%r15), %xmm8, %xmm8 + vaesenc 96(%r15), %xmm8, %xmm8 + vaesenc 112(%r15), %xmm8, %xmm8 + vaesenc 128(%r15), %xmm8, %xmm8 + vaesenc 144(%r15), %xmm8, %xmm8 + cmpl $11, %r10d + vmovdqa 160(%r15), %xmm9 + jl L_AES_GCM_encrypt_avx1_calc_iv_2_aesenc_avx_last + vaesenc %xmm9, %xmm8, %xmm8 + vaesenc 176(%r15), %xmm8, %xmm8 + cmpl $13, %r10d + vmovdqa 192(%r15), %xmm9 + jl L_AES_GCM_encrypt_avx1_calc_iv_2_aesenc_avx_last + vaesenc %xmm9, %xmm8, %xmm8 + vaesenc 208(%r15), %xmm8, %xmm8 + vmovdqa 224(%r15), %xmm9 +L_AES_GCM_encrypt_avx1_calc_iv_2_aesenc_avx_last: + vaesenclast %xmm9, %xmm8, %xmm8 + vmovdqa %xmm8, 144(%rsp) +L_AES_GCM_encrypt_avx1_iv_done: + # Additional authentication data + movl %r11d, %edx + cmpl $0x00, %edx + je L_AES_GCM_encrypt_avx1_calc_aad_done + xorl %ecx, %ecx + cmpl $16, %edx + jl L_AES_GCM_encrypt_avx1_calc_aad_lt16 + andl $0xfffffff0, %edx +L_AES_GCM_encrypt_avx1_calc_aad_16_loop: + vmovdqu (%r12,%rcx,1), %xmm8 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm8, %xmm8 + vpxor %xmm8, %xmm6, %xmm6 + # ghash_gfmul_avx + vpshufd $0x4e, %xmm6, %xmm1 + vpshufd $0x4e, %xmm5, %xmm2 + vpclmulqdq $0x11, %xmm6, %xmm5, %xmm3 + vpclmulqdq $0x00, %xmm6, %xmm5, %xmm0 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm5, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vmovdqa %xmm0, %xmm7 + vmovdqa %xmm3, %xmm6 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm7, %xmm7 + vpxor %xmm1, %xmm6, %xmm6 + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm6, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm6, %xmm6 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm6, %xmm6 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm6, %xmm6 + vpslld $31, %xmm7, %xmm0 + vpslld $30, %xmm7, %xmm1 + vpslld $25, %xmm7, %xmm2 + vpxor %xmm1, %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vmovdqa %xmm0, %xmm1 + vpsrldq $4, %xmm1, %xmm1 + vpslldq $12, %xmm0, %xmm0 + vpxor %xmm0, %xmm7, %xmm7 + vpsrld $0x01, %xmm7, %xmm2 + vpsrld $2, %xmm7, %xmm3 + vpsrld $7, %xmm7, %xmm0 + vpxor %xmm3, %xmm2, %xmm2 + vpxor %xmm0, %xmm2, %xmm2 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm2, %xmm6, %xmm6 + addl $16, %ecx + cmpl %edx, %ecx + jl L_AES_GCM_encrypt_avx1_calc_aad_16_loop + movl %r11d, %edx + cmpl %edx, %ecx + je L_AES_GCM_encrypt_avx1_calc_aad_done +L_AES_GCM_encrypt_avx1_calc_aad_lt16: + subq $16, %rsp + vpxor %xmm8, %xmm8, %xmm8 + xorl %ebx, %ebx + vmovdqa %xmm8, (%rsp) +L_AES_GCM_encrypt_avx1_calc_aad_loop: + movzbl (%r12,%rcx,1), %r13d + movb %r13b, (%rsp,%rbx,1) + incl %ecx + incl %ebx + cmpl %edx, %ecx + jl L_AES_GCM_encrypt_avx1_calc_aad_loop + vmovdqa (%rsp), %xmm8 + addq $16, %rsp + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm8, %xmm8 + vpxor %xmm8, %xmm6, %xmm6 + # ghash_gfmul_avx + vpshufd $0x4e, %xmm6, %xmm1 + vpshufd $0x4e, %xmm5, %xmm2 + vpclmulqdq $0x11, %xmm6, %xmm5, %xmm3 + vpclmulqdq $0x00, %xmm6, %xmm5, %xmm0 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm5, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vmovdqa %xmm0, %xmm7 + vmovdqa %xmm3, %xmm6 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm7, %xmm7 + vpxor %xmm1, %xmm6, %xmm6 + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm6, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm6, %xmm6 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm6, %xmm6 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm6, %xmm6 + vpslld $31, %xmm7, %xmm0 + vpslld $30, %xmm7, %xmm1 + vpslld $25, %xmm7, %xmm2 + vpxor %xmm1, %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vmovdqa %xmm0, %xmm1 + vpsrldq $4, %xmm1, %xmm1 + vpslldq $12, %xmm0, %xmm0 + vpxor %xmm0, %xmm7, %xmm7 + vpsrld $0x01, %xmm7, %xmm2 + vpsrld $2, %xmm7, %xmm3 + vpsrld $7, %xmm7, %xmm0 + vpxor %xmm3, %xmm2, %xmm2 + vpxor %xmm0, %xmm2, %xmm2 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm2, %xmm6, %xmm6 +L_AES_GCM_encrypt_avx1_calc_aad_done: + # Calculate counter and H + vpsrlq $63, %xmm5, %xmm9 + vpsllq $0x01, %xmm5, %xmm8 + vpslldq $8, %xmm9, %xmm9 + vpor %xmm9, %xmm8, %xmm8 + vpshufd $0xff, %xmm5, %xmm5 + vpsrad $31, %xmm5, %xmm5 + vpshufb L_avx1_aes_gcm_bswap_epi64(%rip), %xmm4, %xmm4 + vpand L_avx1_aes_gcm_mod2_128(%rip), %xmm5, %xmm5 + vpaddd L_avx1_aes_gcm_one(%rip), %xmm4, %xmm4 + vpxor %xmm8, %xmm5, %xmm5 + vmovdqa %xmm4, 128(%rsp) + xorl %ebx, %ebx + cmpl $0x80, %r9d + movl %r9d, %r13d + jl L_AES_GCM_encrypt_avx1_done_128 + andl $0xffffff80, %r13d + vmovdqa %xmm6, %xmm2 + # H ^ 1 + vmovdqa %xmm5, (%rsp) + # H ^ 2 + vpclmulqdq $0x00, %xmm5, %xmm5, %xmm8 + vpclmulqdq $0x11, %xmm5, %xmm5, %xmm0 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm0, %xmm0 + vmovdqa %xmm0, 16(%rsp) + # H ^ 3 + # ghash_gfmul_red_avx + vpshufd $0x4e, %xmm5, %xmm9 + vpshufd $0x4e, %xmm0, %xmm10 + vpclmulqdq $0x11, %xmm5, %xmm0, %xmm11 + vpclmulqdq $0x00, %xmm5, %xmm0, %xmm8 + vpxor %xmm5, %xmm9, %xmm9 + vpxor %xmm0, %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm10, %xmm9, %xmm9 + vpxor %xmm8, %xmm9, %xmm9 + vpxor %xmm11, %xmm9, %xmm9 + vpslldq $8, %xmm9, %xmm10 + vpsrldq $8, %xmm9, %xmm9 + vpxor %xmm10, %xmm8, %xmm8 + vpxor %xmm9, %xmm11, %xmm1 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm1, %xmm1 + vmovdqa %xmm1, 32(%rsp) + # H ^ 4 + vpclmulqdq $0x00, %xmm0, %xmm0, %xmm8 + vpclmulqdq $0x11, %xmm0, %xmm0, %xmm3 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm3, %xmm3 + vmovdqa %xmm3, 48(%rsp) + # H ^ 5 + # ghash_gfmul_red_avx + vpshufd $0x4e, %xmm0, %xmm9 + vpshufd $0x4e, %xmm1, %xmm10 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm11 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm8 + vpxor %xmm0, %xmm9, %xmm9 + vpxor %xmm1, %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm10, %xmm9, %xmm9 + vpxor %xmm8, %xmm9, %xmm9 + vpxor %xmm11, %xmm9, %xmm9 + vpslldq $8, %xmm9, %xmm10 + vpsrldq $8, %xmm9, %xmm9 + vpxor %xmm10, %xmm8, %xmm8 + vpxor %xmm9, %xmm11, %xmm7 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm7, %xmm7 + vmovdqa %xmm7, 64(%rsp) + # H ^ 6 + vpclmulqdq $0x00, %xmm1, %xmm1, %xmm8 + vpclmulqdq $0x11, %xmm1, %xmm1, %xmm7 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm7, %xmm7 + vmovdqa %xmm7, 80(%rsp) + # H ^ 7 + # ghash_gfmul_red_avx + vpshufd $0x4e, %xmm1, %xmm9 + vpshufd $0x4e, %xmm3, %xmm10 + vpclmulqdq $0x11, %xmm1, %xmm3, %xmm11 + vpclmulqdq $0x00, %xmm1, %xmm3, %xmm8 + vpxor %xmm1, %xmm9, %xmm9 + vpxor %xmm3, %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm10, %xmm9, %xmm9 + vpxor %xmm8, %xmm9, %xmm9 + vpxor %xmm11, %xmm9, %xmm9 + vpslldq $8, %xmm9, %xmm10 + vpsrldq $8, %xmm9, %xmm9 + vpxor %xmm10, %xmm8, %xmm8 + vpxor %xmm9, %xmm11, %xmm7 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm7, %xmm7 + vmovdqa %xmm7, 96(%rsp) + # H ^ 8 + vpclmulqdq $0x00, %xmm3, %xmm3, %xmm8 + vpclmulqdq $0x11, %xmm3, %xmm3, %xmm7 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm7, %xmm7 + vmovdqa %xmm7, 112(%rsp) + # First 128 bytes of input + vmovdqa 128(%rsp), %xmm0 + vmovdqa L_avx1_aes_gcm_bswap_epi64(%rip), %xmm1 + vpshufb %xmm1, %xmm0, %xmm8 + vpaddd L_avx1_aes_gcm_one(%rip), %xmm0, %xmm9 + vpshufb %xmm1, %xmm9, %xmm9 + vpaddd L_avx1_aes_gcm_two(%rip), %xmm0, %xmm10 + vpshufb %xmm1, %xmm10, %xmm10 + vpaddd L_avx1_aes_gcm_three(%rip), %xmm0, %xmm11 + vpshufb %xmm1, %xmm11, %xmm11 + vpaddd L_avx1_aes_gcm_four(%rip), %xmm0, %xmm12 + vpshufb %xmm1, %xmm12, %xmm12 + vpaddd L_avx1_aes_gcm_five(%rip), %xmm0, %xmm13 + vpshufb %xmm1, %xmm13, %xmm13 + vpaddd L_avx1_aes_gcm_six(%rip), %xmm0, %xmm14 + vpshufb %xmm1, %xmm14, %xmm14 + vpaddd L_avx1_aes_gcm_seven(%rip), %xmm0, %xmm15 + vpshufb %xmm1, %xmm15, %xmm15 + vpaddd L_avx1_aes_gcm_eight(%rip), %xmm0, %xmm0 + vmovdqa (%r15), %xmm7 + vmovdqa %xmm0, 128(%rsp) + vpxor %xmm7, %xmm8, %xmm8 + vpxor %xmm7, %xmm9, %xmm9 + vpxor %xmm7, %xmm10, %xmm10 + vpxor %xmm7, %xmm11, %xmm11 + vpxor %xmm7, %xmm12, %xmm12 + vpxor %xmm7, %xmm13, %xmm13 + vpxor %xmm7, %xmm14, %xmm14 + vpxor %xmm7, %xmm15, %xmm15 + vmovdqa 16(%r15), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 32(%r15), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 48(%r15), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 64(%r15), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 80(%r15), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 96(%r15), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 112(%r15), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 128(%r15), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 144(%r15), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + cmpl $11, %r10d + vmovdqa 160(%r15), %xmm7 + jl L_AES_GCM_encrypt_avx1_aesenc_128_enc_done + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 176(%r15), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + cmpl $13, %r10d + vmovdqa 192(%r15), %xmm7 + jl L_AES_GCM_encrypt_avx1_aesenc_128_enc_done + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 208(%r15), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 224(%r15), %xmm7 +L_AES_GCM_encrypt_avx1_aesenc_128_enc_done: + vaesenclast %xmm7, %xmm8, %xmm8 + vaesenclast %xmm7, %xmm9, %xmm9 + vmovdqu (%rdi), %xmm0 + vmovdqu 16(%rdi), %xmm1 + vpxor %xmm0, %xmm8, %xmm8 + vpxor %xmm1, %xmm9, %xmm9 + vmovdqu %xmm8, (%rsi) + vmovdqu %xmm9, 16(%rsi) + vaesenclast %xmm7, %xmm10, %xmm10 + vaesenclast %xmm7, %xmm11, %xmm11 + vmovdqu 32(%rdi), %xmm0 + vmovdqu 48(%rdi), %xmm1 + vpxor %xmm0, %xmm10, %xmm10 + vpxor %xmm1, %xmm11, %xmm11 + vmovdqu %xmm10, 32(%rsi) + vmovdqu %xmm11, 48(%rsi) + vaesenclast %xmm7, %xmm12, %xmm12 + vaesenclast %xmm7, %xmm13, %xmm13 + vmovdqu 64(%rdi), %xmm0 + vmovdqu 80(%rdi), %xmm1 + vpxor %xmm0, %xmm12, %xmm12 + vpxor %xmm1, %xmm13, %xmm13 + vmovdqu %xmm12, 64(%rsi) + vmovdqu %xmm13, 80(%rsi) + vaesenclast %xmm7, %xmm14, %xmm14 + vaesenclast %xmm7, %xmm15, %xmm15 + vmovdqu 96(%rdi), %xmm0 + vmovdqu 112(%rdi), %xmm1 + vpxor %xmm0, %xmm14, %xmm14 + vpxor %xmm1, %xmm15, %xmm15 + vmovdqu %xmm14, 96(%rsi) + vmovdqu %xmm15, 112(%rsi) + cmpl $0x80, %r13d + movl $0x80, %ebx + jle L_AES_GCM_encrypt_avx1_end_128 + # More 128 bytes of input +L_AES_GCM_encrypt_avx1_ghash_128: + leaq (%rdi,%rbx,1), %rcx + leaq (%rsi,%rbx,1), %rdx + vmovdqa 128(%rsp), %xmm0 + vmovdqa L_avx1_aes_gcm_bswap_epi64(%rip), %xmm1 + vpshufb %xmm1, %xmm0, %xmm8 + vpaddd L_avx1_aes_gcm_one(%rip), %xmm0, %xmm9 + vpshufb %xmm1, %xmm9, %xmm9 + vpaddd L_avx1_aes_gcm_two(%rip), %xmm0, %xmm10 + vpshufb %xmm1, %xmm10, %xmm10 + vpaddd L_avx1_aes_gcm_three(%rip), %xmm0, %xmm11 + vpshufb %xmm1, %xmm11, %xmm11 + vpaddd L_avx1_aes_gcm_four(%rip), %xmm0, %xmm12 + vpshufb %xmm1, %xmm12, %xmm12 + vpaddd L_avx1_aes_gcm_five(%rip), %xmm0, %xmm13 + vpshufb %xmm1, %xmm13, %xmm13 + vpaddd L_avx1_aes_gcm_six(%rip), %xmm0, %xmm14 + vpshufb %xmm1, %xmm14, %xmm14 + vpaddd L_avx1_aes_gcm_seven(%rip), %xmm0, %xmm15 + vpshufb %xmm1, %xmm15, %xmm15 + vpaddd L_avx1_aes_gcm_eight(%rip), %xmm0, %xmm0 + vmovdqa (%r15), %xmm7 + vmovdqa %xmm0, 128(%rsp) + vpxor %xmm7, %xmm8, %xmm8 + vpxor %xmm7, %xmm9, %xmm9 + vpxor %xmm7, %xmm10, %xmm10 + vpxor %xmm7, %xmm11, %xmm11 + vpxor %xmm7, %xmm12, %xmm12 + vpxor %xmm7, %xmm13, %xmm13 + vpxor %xmm7, %xmm14, %xmm14 + vpxor %xmm7, %xmm15, %xmm15 + vmovdqa 112(%rsp), %xmm7 + vmovdqu -128(%rdx), %xmm0 + vaesenc 16(%r15), %xmm8, %xmm8 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vpshufd $0x4e, %xmm7, %xmm1 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm3 + vaesenc 16(%r15), %xmm9, %xmm9 + vaesenc 16(%r15), %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm2 + vaesenc 16(%r15), %xmm11, %xmm11 + vaesenc 16(%r15), %xmm12, %xmm12 + vpclmulqdq $0x00, %xmm5, %xmm1, %xmm1 + vaesenc 16(%r15), %xmm13, %xmm13 + vaesenc 16(%r15), %xmm14, %xmm14 + vaesenc 16(%r15), %xmm15, %xmm15 + vpxor %xmm2, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vmovdqa 96(%rsp), %xmm7 + vmovdqu -112(%rdx), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vaesenc 32(%r15), %xmm8, %xmm8 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vaesenc 32(%r15), %xmm9, %xmm9 + vaesenc 32(%r15), %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vaesenc 32(%r15), %xmm11, %xmm11 + vaesenc 32(%r15), %xmm12, %xmm12 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vaesenc 32(%r15), %xmm13, %xmm13 + vaesenc 32(%r15), %xmm14, %xmm14 + vaesenc 32(%r15), %xmm15, %xmm15 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqa 80(%rsp), %xmm7 + vmovdqu -96(%rdx), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vaesenc 48(%r15), %xmm8, %xmm8 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vaesenc 48(%r15), %xmm9, %xmm9 + vaesenc 48(%r15), %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vaesenc 48(%r15), %xmm11, %xmm11 + vaesenc 48(%r15), %xmm12, %xmm12 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vaesenc 48(%r15), %xmm13, %xmm13 + vaesenc 48(%r15), %xmm14, %xmm14 + vaesenc 48(%r15), %xmm15, %xmm15 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqa 64(%rsp), %xmm7 + vmovdqu -80(%rdx), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vaesenc 64(%r15), %xmm8, %xmm8 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vaesenc 64(%r15), %xmm9, %xmm9 + vaesenc 64(%r15), %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vaesenc 64(%r15), %xmm11, %xmm11 + vaesenc 64(%r15), %xmm12, %xmm12 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vaesenc 64(%r15), %xmm13, %xmm13 + vaesenc 64(%r15), %xmm14, %xmm14 + vaesenc 64(%r15), %xmm15, %xmm15 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqa 48(%rsp), %xmm7 + vmovdqu -64(%rdx), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vaesenc 80(%r15), %xmm8, %xmm8 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vaesenc 80(%r15), %xmm9, %xmm9 + vaesenc 80(%r15), %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vaesenc 80(%r15), %xmm11, %xmm11 + vaesenc 80(%r15), %xmm12, %xmm12 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vaesenc 80(%r15), %xmm13, %xmm13 + vaesenc 80(%r15), %xmm14, %xmm14 + vaesenc 80(%r15), %xmm15, %xmm15 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqa 32(%rsp), %xmm7 + vmovdqu -48(%rdx), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vaesenc 96(%r15), %xmm8, %xmm8 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vaesenc 96(%r15), %xmm9, %xmm9 + vaesenc 96(%r15), %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vaesenc 96(%r15), %xmm11, %xmm11 + vaesenc 96(%r15), %xmm12, %xmm12 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vaesenc 96(%r15), %xmm13, %xmm13 + vaesenc 96(%r15), %xmm14, %xmm14 + vaesenc 96(%r15), %xmm15, %xmm15 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqa 16(%rsp), %xmm7 + vmovdqu -32(%rdx), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vaesenc 112(%r15), %xmm8, %xmm8 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vaesenc 112(%r15), %xmm9, %xmm9 + vaesenc 112(%r15), %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vaesenc 112(%r15), %xmm11, %xmm11 + vaesenc 112(%r15), %xmm12, %xmm12 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vaesenc 112(%r15), %xmm13, %xmm13 + vaesenc 112(%r15), %xmm14, %xmm14 + vaesenc 112(%r15), %xmm15, %xmm15 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqa (%rsp), %xmm7 + vmovdqu -16(%rdx), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vaesenc 128(%r15), %xmm8, %xmm8 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vaesenc 128(%r15), %xmm9, %xmm9 + vaesenc 128(%r15), %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vaesenc 128(%r15), %xmm11, %xmm11 + vaesenc 128(%r15), %xmm12, %xmm12 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vaesenc 128(%r15), %xmm13, %xmm13 + vaesenc 128(%r15), %xmm14, %xmm14 + vaesenc 128(%r15), %xmm15, %xmm15 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vpslldq $8, %xmm1, %xmm5 + vpsrldq $8, %xmm1, %xmm1 + vaesenc 144(%r15), %xmm8, %xmm8 + vpxor %xmm5, %xmm2, %xmm2 + vpxor %xmm1, %xmm3, %xmm3 + vaesenc 144(%r15), %xmm9, %xmm9 + vpslld $31, %xmm2, %xmm7 + vpslld $30, %xmm2, %xmm4 + vpslld $25, %xmm2, %xmm5 + vaesenc 144(%r15), %xmm10, %xmm10 + vpxor %xmm4, %xmm7, %xmm7 + vpxor %xmm5, %xmm7, %xmm7 + vaesenc 144(%r15), %xmm11, %xmm11 + vpsrldq $4, %xmm7, %xmm4 + vpslldq $12, %xmm7, %xmm7 + vaesenc 144(%r15), %xmm12, %xmm12 + vpxor %xmm7, %xmm2, %xmm2 + vpsrld $0x01, %xmm2, %xmm5 + vaesenc 144(%r15), %xmm13, %xmm13 + vpsrld $2, %xmm2, %xmm1 + vpsrld $7, %xmm2, %xmm0 + vaesenc 144(%r15), %xmm14, %xmm14 + vpxor %xmm1, %xmm5, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vaesenc 144(%r15), %xmm15, %xmm15 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm5, %xmm2, %xmm2 + vpxor %xmm3, %xmm2, %xmm2 + cmpl $11, %r10d + vmovdqa 160(%r15), %xmm7 + jl L_AES_GCM_encrypt_avx1_aesenc_128_ghash_avx_done + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 176(%r15), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + cmpl $13, %r10d + vmovdqa 192(%r15), %xmm7 + jl L_AES_GCM_encrypt_avx1_aesenc_128_ghash_avx_done + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 208(%r15), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 224(%r15), %xmm7 +L_AES_GCM_encrypt_avx1_aesenc_128_ghash_avx_done: + vaesenclast %xmm7, %xmm8, %xmm8 + vaesenclast %xmm7, %xmm9, %xmm9 + vmovdqu (%rcx), %xmm0 + vmovdqu 16(%rcx), %xmm1 + vpxor %xmm0, %xmm8, %xmm8 + vpxor %xmm1, %xmm9, %xmm9 + vmovdqu %xmm8, (%rdx) + vmovdqu %xmm9, 16(%rdx) + vaesenclast %xmm7, %xmm10, %xmm10 + vaesenclast %xmm7, %xmm11, %xmm11 + vmovdqu 32(%rcx), %xmm0 + vmovdqu 48(%rcx), %xmm1 + vpxor %xmm0, %xmm10, %xmm10 + vpxor %xmm1, %xmm11, %xmm11 + vmovdqu %xmm10, 32(%rdx) + vmovdqu %xmm11, 48(%rdx) + vaesenclast %xmm7, %xmm12, %xmm12 + vaesenclast %xmm7, %xmm13, %xmm13 + vmovdqu 64(%rcx), %xmm0 + vmovdqu 80(%rcx), %xmm1 + vpxor %xmm0, %xmm12, %xmm12 + vpxor %xmm1, %xmm13, %xmm13 + vmovdqu %xmm12, 64(%rdx) + vmovdqu %xmm13, 80(%rdx) + vaesenclast %xmm7, %xmm14, %xmm14 + vaesenclast %xmm7, %xmm15, %xmm15 + vmovdqu 96(%rcx), %xmm0 + vmovdqu 112(%rcx), %xmm1 + vpxor %xmm0, %xmm14, %xmm14 + vpxor %xmm1, %xmm15, %xmm15 + vmovdqu %xmm14, 96(%rdx) + vmovdqu %xmm15, 112(%rdx) + addl $0x80, %ebx + cmpl %r13d, %ebx + jl L_AES_GCM_encrypt_avx1_ghash_128 +L_AES_GCM_encrypt_avx1_end_128: + vmovdqa L_avx1_aes_gcm_bswap_mask(%rip), %xmm4 + vpshufb %xmm4, %xmm8, %xmm8 + vpshufb %xmm4, %xmm9, %xmm9 + vpshufb %xmm4, %xmm10, %xmm10 + vpshufb %xmm4, %xmm11, %xmm11 + vpxor %xmm2, %xmm8, %xmm8 + vpshufb %xmm4, %xmm12, %xmm12 + vpshufb %xmm4, %xmm13, %xmm13 + vpshufb %xmm4, %xmm14, %xmm14 + vpshufb %xmm4, %xmm15, %xmm15 + vmovdqa (%rsp), %xmm7 + vmovdqa 16(%rsp), %xmm5 + # ghash_gfmul_avx + vpshufd $0x4e, %xmm15, %xmm1 + vpshufd $0x4e, %xmm7, %xmm2 + vpclmulqdq $0x11, %xmm15, %xmm7, %xmm3 + vpclmulqdq $0x00, %xmm15, %xmm7, %xmm0 + vpxor %xmm15, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vmovdqa %xmm0, %xmm4 + vmovdqa %xmm3, %xmm6 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm4, %xmm4 + vpxor %xmm1, %xmm6, %xmm6 + # ghash_gfmul_xor_avx + vpshufd $0x4e, %xmm14, %xmm1 + vpshufd $0x4e, %xmm5, %xmm2 + vpclmulqdq $0x11, %xmm14, %xmm5, %xmm3 + vpclmulqdq $0x00, %xmm14, %xmm5, %xmm0 + vpxor %xmm14, %xmm1, %xmm1 + vpxor %xmm5, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vpxor %xmm0, %xmm4, %xmm4 + vpxor %xmm3, %xmm6, %xmm6 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm4, %xmm4 + vpxor %xmm1, %xmm6, %xmm6 + vmovdqa 32(%rsp), %xmm7 + vmovdqa 48(%rsp), %xmm5 + # ghash_gfmul_xor_avx + vpshufd $0x4e, %xmm13, %xmm1 + vpshufd $0x4e, %xmm7, %xmm2 + vpclmulqdq $0x11, %xmm13, %xmm7, %xmm3 + vpclmulqdq $0x00, %xmm13, %xmm7, %xmm0 + vpxor %xmm13, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vpxor %xmm0, %xmm4, %xmm4 + vpxor %xmm3, %xmm6, %xmm6 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm4, %xmm4 + vpxor %xmm1, %xmm6, %xmm6 + # ghash_gfmul_xor_avx + vpshufd $0x4e, %xmm12, %xmm1 + vpshufd $0x4e, %xmm5, %xmm2 + vpclmulqdq $0x11, %xmm12, %xmm5, %xmm3 + vpclmulqdq $0x00, %xmm12, %xmm5, %xmm0 + vpxor %xmm12, %xmm1, %xmm1 + vpxor %xmm5, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vpxor %xmm0, %xmm4, %xmm4 + vpxor %xmm3, %xmm6, %xmm6 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm4, %xmm4 + vpxor %xmm1, %xmm6, %xmm6 + vmovdqa 64(%rsp), %xmm7 + vmovdqa 80(%rsp), %xmm5 + # ghash_gfmul_xor_avx + vpshufd $0x4e, %xmm11, %xmm1 + vpshufd $0x4e, %xmm7, %xmm2 + vpclmulqdq $0x11, %xmm11, %xmm7, %xmm3 + vpclmulqdq $0x00, %xmm11, %xmm7, %xmm0 + vpxor %xmm11, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vpxor %xmm0, %xmm4, %xmm4 + vpxor %xmm3, %xmm6, %xmm6 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm4, %xmm4 + vpxor %xmm1, %xmm6, %xmm6 + # ghash_gfmul_xor_avx + vpshufd $0x4e, %xmm10, %xmm1 + vpshufd $0x4e, %xmm5, %xmm2 + vpclmulqdq $0x11, %xmm10, %xmm5, %xmm3 + vpclmulqdq $0x00, %xmm10, %xmm5, %xmm0 + vpxor %xmm10, %xmm1, %xmm1 + vpxor %xmm5, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vpxor %xmm0, %xmm4, %xmm4 + vpxor %xmm3, %xmm6, %xmm6 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm4, %xmm4 + vpxor %xmm1, %xmm6, %xmm6 + vmovdqa 96(%rsp), %xmm7 + vmovdqa 112(%rsp), %xmm5 + # ghash_gfmul_xor_avx + vpshufd $0x4e, %xmm9, %xmm1 + vpshufd $0x4e, %xmm7, %xmm2 + vpclmulqdq $0x11, %xmm9, %xmm7, %xmm3 + vpclmulqdq $0x00, %xmm9, %xmm7, %xmm0 + vpxor %xmm9, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vpxor %xmm0, %xmm4, %xmm4 + vpxor %xmm3, %xmm6, %xmm6 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm4, %xmm4 + vpxor %xmm1, %xmm6, %xmm6 + # ghash_gfmul_xor_avx + vpshufd $0x4e, %xmm8, %xmm1 + vpshufd $0x4e, %xmm5, %xmm2 + vpclmulqdq $0x11, %xmm8, %xmm5, %xmm3 + vpclmulqdq $0x00, %xmm8, %xmm5, %xmm0 + vpxor %xmm8, %xmm1, %xmm1 + vpxor %xmm5, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vpxor %xmm0, %xmm4, %xmm4 + vpxor %xmm3, %xmm6, %xmm6 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm4, %xmm4 + vpxor %xmm1, %xmm6, %xmm6 + vpslld $31, %xmm4, %xmm0 + vpslld $30, %xmm4, %xmm1 + vpslld $25, %xmm4, %xmm2 + vpxor %xmm1, %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vmovdqa %xmm0, %xmm1 + vpsrldq $4, %xmm1, %xmm1 + vpslldq $12, %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + vpsrld $0x01, %xmm4, %xmm2 + vpsrld $2, %xmm4, %xmm3 + vpsrld $7, %xmm4, %xmm0 + vpxor %xmm3, %xmm2, %xmm2 + vpxor %xmm0, %xmm2, %xmm2 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm4, %xmm2, %xmm2 + vpxor %xmm2, %xmm6, %xmm6 + vmovdqa (%rsp), %xmm5 +L_AES_GCM_encrypt_avx1_done_128: + movl %r9d, %edx + cmpl %edx, %ebx + jge L_AES_GCM_encrypt_avx1_done_enc + movl %r9d, %r13d + andl $0xfffffff0, %r13d + cmpl %r13d, %ebx + jge L_AES_GCM_encrypt_avx1_last_block_done + vmovdqa 128(%rsp), %xmm9 + vpshufb L_avx1_aes_gcm_bswap_epi64(%rip), %xmm9, %xmm8 + vpaddd L_avx1_aes_gcm_one(%rip), %xmm9, %xmm9 + vmovdqa %xmm9, 128(%rsp) + vpxor (%r15), %xmm8, %xmm8 + vaesenc 16(%r15), %xmm8, %xmm8 + vaesenc 32(%r15), %xmm8, %xmm8 + vaesenc 48(%r15), %xmm8, %xmm8 + vaesenc 64(%r15), %xmm8, %xmm8 + vaesenc 80(%r15), %xmm8, %xmm8 + vaesenc 96(%r15), %xmm8, %xmm8 + vaesenc 112(%r15), %xmm8, %xmm8 + vaesenc 128(%r15), %xmm8, %xmm8 + vaesenc 144(%r15), %xmm8, %xmm8 + cmpl $11, %r10d + vmovdqa 160(%r15), %xmm9 + jl L_AES_GCM_encrypt_avx1_aesenc_block_last + vaesenc %xmm9, %xmm8, %xmm8 + vaesenc 176(%r15), %xmm8, %xmm8 + cmpl $13, %r10d + vmovdqa 192(%r15), %xmm9 + jl L_AES_GCM_encrypt_avx1_aesenc_block_last + vaesenc %xmm9, %xmm8, %xmm8 + vaesenc 208(%r15), %xmm8, %xmm8 + vmovdqa 224(%r15), %xmm9 +L_AES_GCM_encrypt_avx1_aesenc_block_last: + vaesenclast %xmm9, %xmm8, %xmm8 + vmovdqu (%rdi,%rbx,1), %xmm9 + vpxor %xmm9, %xmm8, %xmm8 + vmovdqu %xmm8, (%rsi,%rbx,1) + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm8, %xmm8 + vpxor %xmm8, %xmm6, %xmm6 + addl $16, %ebx + cmpl %r13d, %ebx + jge L_AES_GCM_encrypt_avx1_last_block_ghash +L_AES_GCM_encrypt_avx1_last_block_start: + vmovdqu (%rdi,%rbx,1), %xmm13 + vmovdqa 128(%rsp), %xmm9 + vpshufb L_avx1_aes_gcm_bswap_epi64(%rip), %xmm9, %xmm8 + vpaddd L_avx1_aes_gcm_one(%rip), %xmm9, %xmm9 + vmovdqa %xmm9, 128(%rsp) + vpxor (%r15), %xmm8, %xmm8 + vpclmulqdq $16, %xmm5, %xmm6, %xmm10 + vaesenc 16(%r15), %xmm8, %xmm8 + vaesenc 32(%r15), %xmm8, %xmm8 + vpclmulqdq $0x01, %xmm5, %xmm6, %xmm11 + vaesenc 48(%r15), %xmm8, %xmm8 + vaesenc 64(%r15), %xmm8, %xmm8 + vpclmulqdq $0x00, %xmm5, %xmm6, %xmm12 + vaesenc 80(%r15), %xmm8, %xmm8 + vpclmulqdq $0x11, %xmm5, %xmm6, %xmm1 + vaesenc 96(%r15), %xmm8, %xmm8 + vpxor %xmm11, %xmm10, %xmm10 + vpslldq $8, %xmm10, %xmm2 + vpsrldq $8, %xmm10, %xmm10 + vaesenc 112(%r15), %xmm8, %xmm8 + vpxor %xmm12, %xmm2, %xmm2 + vpxor %xmm10, %xmm1, %xmm3 + vmovdqa L_avx1_aes_gcm_mod2_128(%rip), %xmm0 + vpclmulqdq $16, %xmm0, %xmm2, %xmm11 + vaesenc 128(%r15), %xmm8, %xmm8 + vpshufd $0x4e, %xmm2, %xmm10 + vpxor %xmm11, %xmm10, %xmm10 + vpclmulqdq $16, %xmm0, %xmm10, %xmm11 + vaesenc 144(%r15), %xmm8, %xmm8 + vpshufd $0x4e, %xmm10, %xmm10 + vpxor %xmm11, %xmm10, %xmm10 + vpxor %xmm3, %xmm10, %xmm6 + cmpl $11, %r10d + vmovdqa 160(%r15), %xmm9 + jl L_AES_GCM_encrypt_avx1_aesenc_gfmul_last + vaesenc %xmm9, %xmm8, %xmm8 + vaesenc 176(%r15), %xmm8, %xmm8 + cmpl $13, %r10d + vmovdqa 192(%r15), %xmm9 + jl L_AES_GCM_encrypt_avx1_aesenc_gfmul_last + vaesenc %xmm9, %xmm8, %xmm8 + vaesenc 208(%r15), %xmm8, %xmm8 + vmovdqa 224(%r15), %xmm9 +L_AES_GCM_encrypt_avx1_aesenc_gfmul_last: + vaesenclast %xmm9, %xmm8, %xmm8 + vmovdqa %xmm13, %xmm0 + vpxor %xmm0, %xmm8, %xmm8 + vmovdqu %xmm8, (%rsi,%rbx,1) + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm8, %xmm8 + addl $16, %ebx + vpxor %xmm8, %xmm6, %xmm6 + cmpl %r13d, %ebx + jl L_AES_GCM_encrypt_avx1_last_block_start +L_AES_GCM_encrypt_avx1_last_block_ghash: + # ghash_gfmul_red_avx + vpshufd $0x4e, %xmm5, %xmm9 + vpshufd $0x4e, %xmm6, %xmm10 + vpclmulqdq $0x11, %xmm5, %xmm6, %xmm11 + vpclmulqdq $0x00, %xmm5, %xmm6, %xmm8 + vpxor %xmm5, %xmm9, %xmm9 + vpxor %xmm6, %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm10, %xmm9, %xmm9 + vpxor %xmm8, %xmm9, %xmm9 + vpxor %xmm11, %xmm9, %xmm9 + vpslldq $8, %xmm9, %xmm10 + vpsrldq $8, %xmm9, %xmm9 + vpxor %xmm10, %xmm8, %xmm8 + vpxor %xmm9, %xmm11, %xmm6 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm6, %xmm6 +L_AES_GCM_encrypt_avx1_last_block_done: + movl %r9d, %ecx + movl %ecx, %edx + andl $15, %ecx + jz L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_done + vmovdqa 128(%rsp), %xmm4 + vpshufb L_avx1_aes_gcm_bswap_epi64(%rip), %xmm4, %xmm4 + vpxor (%r15), %xmm4, %xmm4 + vaesenc 16(%r15), %xmm4, %xmm4 + vaesenc 32(%r15), %xmm4, %xmm4 + vaesenc 48(%r15), %xmm4, %xmm4 + vaesenc 64(%r15), %xmm4, %xmm4 + vaesenc 80(%r15), %xmm4, %xmm4 + vaesenc 96(%r15), %xmm4, %xmm4 + vaesenc 112(%r15), %xmm4, %xmm4 + vaesenc 128(%r15), %xmm4, %xmm4 + vaesenc 144(%r15), %xmm4, %xmm4 + cmpl $11, %r10d + vmovdqa 160(%r15), %xmm9 + jl L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_aesenc_avx_last + vaesenc %xmm9, %xmm4, %xmm4 + vaesenc 176(%r15), %xmm4, %xmm4 + cmpl $13, %r10d + vmovdqa 192(%r15), %xmm9 + jl L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_aesenc_avx_last + vaesenc %xmm9, %xmm4, %xmm4 + vaesenc 208(%r15), %xmm4, %xmm4 + vmovdqa 224(%r15), %xmm9 +L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_aesenc_avx_last: + vaesenclast %xmm9, %xmm4, %xmm4 + subq $16, %rsp + xorl %ecx, %ecx + vmovdqa %xmm4, (%rsp) +L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_loop: + movzbl (%rdi,%rbx,1), %r13d + xorb (%rsp,%rcx,1), %r13b + movb %r13b, (%rsi,%rbx,1) + movb %r13b, (%rsp,%rcx,1) + incl %ebx + incl %ecx + cmpl %edx, %ebx + jl L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_loop + xorq %r13, %r13 + cmpl $16, %ecx + je L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_finish_enc +L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_byte_loop: + movb %r13b, (%rsp,%rcx,1) + incl %ecx + cmpl $16, %ecx + jl L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_byte_loop +L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_finish_enc: + vmovdqa (%rsp), %xmm4 + addq $16, %rsp + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm4, %xmm4 + vpxor %xmm4, %xmm6, %xmm6 + # ghash_gfmul_red_avx + vpshufd $0x4e, %xmm5, %xmm9 + vpshufd $0x4e, %xmm6, %xmm10 + vpclmulqdq $0x11, %xmm5, %xmm6, %xmm11 + vpclmulqdq $0x00, %xmm5, %xmm6, %xmm8 + vpxor %xmm5, %xmm9, %xmm9 + vpxor %xmm6, %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm10, %xmm9, %xmm9 + vpxor %xmm8, %xmm9, %xmm9 + vpxor %xmm11, %xmm9, %xmm9 + vpslldq $8, %xmm9, %xmm10 + vpsrldq $8, %xmm9, %xmm9 + vpxor %xmm10, %xmm8, %xmm8 + vpxor %xmm9, %xmm11, %xmm6 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm6, %xmm6 +L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_done: +L_AES_GCM_encrypt_avx1_done_enc: + movl %r9d, %edx + movl %r11d, %ecx + shlq $3, %rdx + shlq $3, %rcx + vpinsrq $0x00, %rdx, %xmm0, %xmm0 + vpinsrq $0x01, %rcx, %xmm0, %xmm0 + vpxor %xmm0, %xmm6, %xmm6 + # ghash_gfmul_red_avx + vpshufd $0x4e, %xmm5, %xmm9 + vpshufd $0x4e, %xmm6, %xmm10 + vpclmulqdq $0x11, %xmm5, %xmm6, %xmm11 + vpclmulqdq $0x00, %xmm5, %xmm6, %xmm8 + vpxor %xmm5, %xmm9, %xmm9 + vpxor %xmm6, %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm10, %xmm9, %xmm9 + vpxor %xmm8, %xmm9, %xmm9 + vpxor %xmm11, %xmm9, %xmm9 + vpslldq $8, %xmm9, %xmm10 + vpsrldq $8, %xmm9, %xmm9 + vpxor %xmm10, %xmm8, %xmm8 + vpxor %xmm9, %xmm11, %xmm6 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm6, %xmm6 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm6, %xmm6 + vpxor 144(%rsp), %xmm6, %xmm0 + cmpl $16, %r14d + je L_AES_GCM_encrypt_avx1_store_tag_16 + xorq %rcx, %rcx + vmovdqa %xmm0, (%rsp) +L_AES_GCM_encrypt_avx1_store_tag_loop: + movzbl (%rsp,%rcx,1), %r13d + movb %r13b, (%r8,%rcx,1) + incl %ecx + cmpl %r14d, %ecx + jne L_AES_GCM_encrypt_avx1_store_tag_loop + jmp L_AES_GCM_encrypt_avx1_store_tag_done +L_AES_GCM_encrypt_avx1_store_tag_16: + vmovdqu %xmm0, (%r8) +L_AES_GCM_encrypt_avx1_store_tag_done: + vzeroupper + addq $0xa0, %rsp + popq %r15 + popq %r14 + popq %rbx + popq %r12 + popq %r13 + repz retq +#ifndef __APPLE__ +.size AES_GCM_encrypt_avx1,.-AES_GCM_encrypt_avx1 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl AES_GCM_decrypt_avx1 +.type AES_GCM_decrypt_avx1,@function +.align 4 +AES_GCM_decrypt_avx1: +#else +.section __TEXT,__text +.globl _AES_GCM_decrypt_avx1 +.p2align 2 +_AES_GCM_decrypt_avx1: +#endif /* __APPLE__ */ + pushq %r13 + pushq %r12 + pushq %rbx + pushq %r14 + pushq %r15 + pushq %rbp + movq %rdx, %r12 + movq %rcx, %rax + movl 56(%rsp), %r11d + movl 64(%rsp), %ebx + movl 72(%rsp), %r14d + movq 80(%rsp), %r15 + movl 88(%rsp), %r10d + movq 96(%rsp), %rbp + subq $0xa8, %rsp + vpxor %xmm4, %xmm4, %xmm4 + vpxor %xmm6, %xmm6, %xmm6 + cmpl $12, %ebx + movl %ebx, %edx + jne L_AES_GCM_decrypt_avx1_iv_not_12 + # # Calculate values when IV is 12 bytes + # Set counter based on IV + movl $0x1000000, %ecx + vpinsrq $0x00, (%rax), %xmm4, %xmm4 + vpinsrd $2, 8(%rax), %xmm4, %xmm4 + vpinsrd $3, %ecx, %xmm4, %xmm4 + # H = Encrypt X(=0) and T = Encrypt counter + vmovdqa (%r15), %xmm5 + vpxor %xmm5, %xmm4, %xmm1 + vmovdqa 16(%r15), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 32(%r15), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 48(%r15), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 64(%r15), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 80(%r15), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 96(%r15), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 112(%r15), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 128(%r15), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 144(%r15), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + cmpl $11, %r10d + vmovdqa 160(%r15), %xmm7 + jl L_AES_GCM_decrypt_avx1_calc_iv_12_last + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 176(%r15), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + cmpl $13, %r10d + vmovdqa 192(%r15), %xmm7 + jl L_AES_GCM_decrypt_avx1_calc_iv_12_last + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 208(%r15), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 224(%r15), %xmm7 +L_AES_GCM_decrypt_avx1_calc_iv_12_last: + vaesenclast %xmm7, %xmm5, %xmm5 + vaesenclast %xmm7, %xmm1, %xmm1 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm5, %xmm5 + vmovdqa %xmm1, 144(%rsp) + jmp L_AES_GCM_decrypt_avx1_iv_done +L_AES_GCM_decrypt_avx1_iv_not_12: + # Calculate values when IV is not 12 bytes + # H = Encrypt X(=0) + vmovdqa (%r15), %xmm5 + vaesenc 16(%r15), %xmm5, %xmm5 + vaesenc 32(%r15), %xmm5, %xmm5 + vaesenc 48(%r15), %xmm5, %xmm5 + vaesenc 64(%r15), %xmm5, %xmm5 + vaesenc 80(%r15), %xmm5, %xmm5 + vaesenc 96(%r15), %xmm5, %xmm5 + vaesenc 112(%r15), %xmm5, %xmm5 + vaesenc 128(%r15), %xmm5, %xmm5 + vaesenc 144(%r15), %xmm5, %xmm5 + cmpl $11, %r10d + vmovdqa 160(%r15), %xmm9 + jl L_AES_GCM_decrypt_avx1_calc_iv_1_aesenc_avx_last + vaesenc %xmm9, %xmm5, %xmm5 + vaesenc 176(%r15), %xmm5, %xmm5 + cmpl $13, %r10d + vmovdqa 192(%r15), %xmm9 + jl L_AES_GCM_decrypt_avx1_calc_iv_1_aesenc_avx_last + vaesenc %xmm9, %xmm5, %xmm5 + vaesenc 208(%r15), %xmm5, %xmm5 + vmovdqa 224(%r15), %xmm9 +L_AES_GCM_decrypt_avx1_calc_iv_1_aesenc_avx_last: + vaesenclast %xmm9, %xmm5, %xmm5 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm5, %xmm5 + # Calc counter + # Initialization vector + cmpl $0x00, %edx + movq $0x00, %rcx + je L_AES_GCM_decrypt_avx1_calc_iv_done + cmpl $16, %edx + jl L_AES_GCM_decrypt_avx1_calc_iv_lt16 + andl $0xfffffff0, %edx +L_AES_GCM_decrypt_avx1_calc_iv_16_loop: + vmovdqu (%rax,%rcx,1), %xmm8 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm8, %xmm8 + vpxor %xmm8, %xmm4, %xmm4 + # ghash_gfmul_avx + vpshufd $0x4e, %xmm4, %xmm1 + vpshufd $0x4e, %xmm5, %xmm2 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpxor %xmm4, %xmm1, %xmm1 + vpxor %xmm5, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vmovdqa %xmm0, %xmm7 + vmovdqa %xmm3, %xmm4 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm7, %xmm7 + vpxor %xmm1, %xmm4, %xmm4 + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm4, %xmm4 + vpslld $31, %xmm7, %xmm0 + vpslld $30, %xmm7, %xmm1 + vpslld $25, %xmm7, %xmm2 + vpxor %xmm1, %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vmovdqa %xmm0, %xmm1 + vpsrldq $4, %xmm1, %xmm1 + vpslldq $12, %xmm0, %xmm0 + vpxor %xmm0, %xmm7, %xmm7 + vpsrld $0x01, %xmm7, %xmm2 + vpsrld $2, %xmm7, %xmm3 + vpsrld $7, %xmm7, %xmm0 + vpxor %xmm3, %xmm2, %xmm2 + vpxor %xmm0, %xmm2, %xmm2 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm2, %xmm4, %xmm4 + addl $16, %ecx + cmpl %edx, %ecx + jl L_AES_GCM_decrypt_avx1_calc_iv_16_loop + movl %ebx, %edx + cmpl %edx, %ecx + je L_AES_GCM_decrypt_avx1_calc_iv_done +L_AES_GCM_decrypt_avx1_calc_iv_lt16: + subq $16, %rsp + vpxor %xmm8, %xmm8, %xmm8 + xorl %ebx, %ebx + vmovdqa %xmm8, (%rsp) +L_AES_GCM_decrypt_avx1_calc_iv_loop: + movzbl (%rax,%rcx,1), %r13d + movb %r13b, (%rsp,%rbx,1) + incl %ecx + incl %ebx + cmpl %edx, %ecx + jl L_AES_GCM_decrypt_avx1_calc_iv_loop + vmovdqa (%rsp), %xmm8 + addq $16, %rsp + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm8, %xmm8 + vpxor %xmm8, %xmm4, %xmm4 + # ghash_gfmul_avx + vpshufd $0x4e, %xmm4, %xmm1 + vpshufd $0x4e, %xmm5, %xmm2 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpxor %xmm4, %xmm1, %xmm1 + vpxor %xmm5, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vmovdqa %xmm0, %xmm7 + vmovdqa %xmm3, %xmm4 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm7, %xmm7 + vpxor %xmm1, %xmm4, %xmm4 + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm4, %xmm4 + vpslld $31, %xmm7, %xmm0 + vpslld $30, %xmm7, %xmm1 + vpslld $25, %xmm7, %xmm2 + vpxor %xmm1, %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vmovdqa %xmm0, %xmm1 + vpsrldq $4, %xmm1, %xmm1 + vpslldq $12, %xmm0, %xmm0 + vpxor %xmm0, %xmm7, %xmm7 + vpsrld $0x01, %xmm7, %xmm2 + vpsrld $2, %xmm7, %xmm3 + vpsrld $7, %xmm7, %xmm0 + vpxor %xmm3, %xmm2, %xmm2 + vpxor %xmm0, %xmm2, %xmm2 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm2, %xmm4, %xmm4 +L_AES_GCM_decrypt_avx1_calc_iv_done: + # T = Encrypt counter + vpxor %xmm0, %xmm0, %xmm0 + shll $3, %edx + vpinsrq $0x00, %rdx, %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + # ghash_gfmul_avx + vpshufd $0x4e, %xmm4, %xmm1 + vpshufd $0x4e, %xmm5, %xmm2 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpxor %xmm4, %xmm1, %xmm1 + vpxor %xmm5, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vmovdqa %xmm0, %xmm7 + vmovdqa %xmm3, %xmm4 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm7, %xmm7 + vpxor %xmm1, %xmm4, %xmm4 + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm4, %xmm4 + vpslld $31, %xmm7, %xmm0 + vpslld $30, %xmm7, %xmm1 + vpslld $25, %xmm7, %xmm2 + vpxor %xmm1, %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vmovdqa %xmm0, %xmm1 + vpsrldq $4, %xmm1, %xmm1 + vpslldq $12, %xmm0, %xmm0 + vpxor %xmm0, %xmm7, %xmm7 + vpsrld $0x01, %xmm7, %xmm2 + vpsrld $2, %xmm7, %xmm3 + vpsrld $7, %xmm7, %xmm0 + vpxor %xmm3, %xmm2, %xmm2 + vpxor %xmm0, %xmm2, %xmm2 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm2, %xmm4, %xmm4 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm4, %xmm4 + # Encrypt counter + vmovdqa (%r15), %xmm8 + vpxor %xmm4, %xmm8, %xmm8 + vaesenc 16(%r15), %xmm8, %xmm8 + vaesenc 32(%r15), %xmm8, %xmm8 + vaesenc 48(%r15), %xmm8, %xmm8 + vaesenc 64(%r15), %xmm8, %xmm8 + vaesenc 80(%r15), %xmm8, %xmm8 + vaesenc 96(%r15), %xmm8, %xmm8 + vaesenc 112(%r15), %xmm8, %xmm8 + vaesenc 128(%r15), %xmm8, %xmm8 + vaesenc 144(%r15), %xmm8, %xmm8 + cmpl $11, %r10d + vmovdqa 160(%r15), %xmm9 + jl L_AES_GCM_decrypt_avx1_calc_iv_2_aesenc_avx_last + vaesenc %xmm9, %xmm8, %xmm8 + vaesenc 176(%r15), %xmm8, %xmm8 + cmpl $13, %r10d + vmovdqa 192(%r15), %xmm9 + jl L_AES_GCM_decrypt_avx1_calc_iv_2_aesenc_avx_last + vaesenc %xmm9, %xmm8, %xmm8 + vaesenc 208(%r15), %xmm8, %xmm8 + vmovdqa 224(%r15), %xmm9 +L_AES_GCM_decrypt_avx1_calc_iv_2_aesenc_avx_last: + vaesenclast %xmm9, %xmm8, %xmm8 + vmovdqa %xmm8, 144(%rsp) +L_AES_GCM_decrypt_avx1_iv_done: + # Additional authentication data + movl %r11d, %edx + cmpl $0x00, %edx + je L_AES_GCM_decrypt_avx1_calc_aad_done + xorl %ecx, %ecx + cmpl $16, %edx + jl L_AES_GCM_decrypt_avx1_calc_aad_lt16 + andl $0xfffffff0, %edx +L_AES_GCM_decrypt_avx1_calc_aad_16_loop: + vmovdqu (%r12,%rcx,1), %xmm8 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm8, %xmm8 + vpxor %xmm8, %xmm6, %xmm6 + # ghash_gfmul_avx + vpshufd $0x4e, %xmm6, %xmm1 + vpshufd $0x4e, %xmm5, %xmm2 + vpclmulqdq $0x11, %xmm6, %xmm5, %xmm3 + vpclmulqdq $0x00, %xmm6, %xmm5, %xmm0 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm5, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vmovdqa %xmm0, %xmm7 + vmovdqa %xmm3, %xmm6 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm7, %xmm7 + vpxor %xmm1, %xmm6, %xmm6 + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm6, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm6, %xmm6 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm6, %xmm6 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm6, %xmm6 + vpslld $31, %xmm7, %xmm0 + vpslld $30, %xmm7, %xmm1 + vpslld $25, %xmm7, %xmm2 + vpxor %xmm1, %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vmovdqa %xmm0, %xmm1 + vpsrldq $4, %xmm1, %xmm1 + vpslldq $12, %xmm0, %xmm0 + vpxor %xmm0, %xmm7, %xmm7 + vpsrld $0x01, %xmm7, %xmm2 + vpsrld $2, %xmm7, %xmm3 + vpsrld $7, %xmm7, %xmm0 + vpxor %xmm3, %xmm2, %xmm2 + vpxor %xmm0, %xmm2, %xmm2 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm2, %xmm6, %xmm6 + addl $16, %ecx + cmpl %edx, %ecx + jl L_AES_GCM_decrypt_avx1_calc_aad_16_loop + movl %r11d, %edx + cmpl %edx, %ecx + je L_AES_GCM_decrypt_avx1_calc_aad_done +L_AES_GCM_decrypt_avx1_calc_aad_lt16: + subq $16, %rsp + vpxor %xmm8, %xmm8, %xmm8 + xorl %ebx, %ebx + vmovdqa %xmm8, (%rsp) +L_AES_GCM_decrypt_avx1_calc_aad_loop: + movzbl (%r12,%rcx,1), %r13d + movb %r13b, (%rsp,%rbx,1) + incl %ecx + incl %ebx + cmpl %edx, %ecx + jl L_AES_GCM_decrypt_avx1_calc_aad_loop + vmovdqa (%rsp), %xmm8 + addq $16, %rsp + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm8, %xmm8 + vpxor %xmm8, %xmm6, %xmm6 + # ghash_gfmul_avx + vpshufd $0x4e, %xmm6, %xmm1 + vpshufd $0x4e, %xmm5, %xmm2 + vpclmulqdq $0x11, %xmm6, %xmm5, %xmm3 + vpclmulqdq $0x00, %xmm6, %xmm5, %xmm0 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm5, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vmovdqa %xmm0, %xmm7 + vmovdqa %xmm3, %xmm6 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm7, %xmm7 + vpxor %xmm1, %xmm6, %xmm6 + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm6, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm6, %xmm6 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm6, %xmm6 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm6, %xmm6 + vpslld $31, %xmm7, %xmm0 + vpslld $30, %xmm7, %xmm1 + vpslld $25, %xmm7, %xmm2 + vpxor %xmm1, %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vmovdqa %xmm0, %xmm1 + vpsrldq $4, %xmm1, %xmm1 + vpslldq $12, %xmm0, %xmm0 + vpxor %xmm0, %xmm7, %xmm7 + vpsrld $0x01, %xmm7, %xmm2 + vpsrld $2, %xmm7, %xmm3 + vpsrld $7, %xmm7, %xmm0 + vpxor %xmm3, %xmm2, %xmm2 + vpxor %xmm0, %xmm2, %xmm2 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm2, %xmm6, %xmm6 +L_AES_GCM_decrypt_avx1_calc_aad_done: + # Calculate counter and H + vpsrlq $63, %xmm5, %xmm9 + vpsllq $0x01, %xmm5, %xmm8 + vpslldq $8, %xmm9, %xmm9 + vpor %xmm9, %xmm8, %xmm8 + vpshufd $0xff, %xmm5, %xmm5 + vpsrad $31, %xmm5, %xmm5 + vpshufb L_avx1_aes_gcm_bswap_epi64(%rip), %xmm4, %xmm4 + vpand L_avx1_aes_gcm_mod2_128(%rip), %xmm5, %xmm5 + vpaddd L_avx1_aes_gcm_one(%rip), %xmm4, %xmm4 + vpxor %xmm8, %xmm5, %xmm5 + vmovdqa %xmm4, 128(%rsp) + xorl %ebx, %ebx + cmpl $0x80, %r9d + movl %r9d, %r13d + jl L_AES_GCM_decrypt_avx1_done_128 + andl $0xffffff80, %r13d + vmovdqa %xmm6, %xmm2 + # H ^ 1 + vmovdqa %xmm5, (%rsp) + # H ^ 2 + vpclmulqdq $0x00, %xmm5, %xmm5, %xmm8 + vpclmulqdq $0x11, %xmm5, %xmm5, %xmm0 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm0, %xmm0 + vmovdqa %xmm0, 16(%rsp) + # H ^ 3 + # ghash_gfmul_red_avx + vpshufd $0x4e, %xmm5, %xmm9 + vpshufd $0x4e, %xmm0, %xmm10 + vpclmulqdq $0x11, %xmm5, %xmm0, %xmm11 + vpclmulqdq $0x00, %xmm5, %xmm0, %xmm8 + vpxor %xmm5, %xmm9, %xmm9 + vpxor %xmm0, %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm10, %xmm9, %xmm9 + vpxor %xmm8, %xmm9, %xmm9 + vpxor %xmm11, %xmm9, %xmm9 + vpslldq $8, %xmm9, %xmm10 + vpsrldq $8, %xmm9, %xmm9 + vpxor %xmm10, %xmm8, %xmm8 + vpxor %xmm9, %xmm11, %xmm1 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm1, %xmm1 + vmovdqa %xmm1, 32(%rsp) + # H ^ 4 + vpclmulqdq $0x00, %xmm0, %xmm0, %xmm8 + vpclmulqdq $0x11, %xmm0, %xmm0, %xmm3 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm3, %xmm3 + vmovdqa %xmm3, 48(%rsp) + # H ^ 5 + # ghash_gfmul_red_avx + vpshufd $0x4e, %xmm0, %xmm9 + vpshufd $0x4e, %xmm1, %xmm10 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm11 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm8 + vpxor %xmm0, %xmm9, %xmm9 + vpxor %xmm1, %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm10, %xmm9, %xmm9 + vpxor %xmm8, %xmm9, %xmm9 + vpxor %xmm11, %xmm9, %xmm9 + vpslldq $8, %xmm9, %xmm10 + vpsrldq $8, %xmm9, %xmm9 + vpxor %xmm10, %xmm8, %xmm8 + vpxor %xmm9, %xmm11, %xmm7 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm7, %xmm7 + vmovdqa %xmm7, 64(%rsp) + # H ^ 6 + vpclmulqdq $0x00, %xmm1, %xmm1, %xmm8 + vpclmulqdq $0x11, %xmm1, %xmm1, %xmm7 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm7, %xmm7 + vmovdqa %xmm7, 80(%rsp) + # H ^ 7 + # ghash_gfmul_red_avx + vpshufd $0x4e, %xmm1, %xmm9 + vpshufd $0x4e, %xmm3, %xmm10 + vpclmulqdq $0x11, %xmm1, %xmm3, %xmm11 + vpclmulqdq $0x00, %xmm1, %xmm3, %xmm8 + vpxor %xmm1, %xmm9, %xmm9 + vpxor %xmm3, %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm10, %xmm9, %xmm9 + vpxor %xmm8, %xmm9, %xmm9 + vpxor %xmm11, %xmm9, %xmm9 + vpslldq $8, %xmm9, %xmm10 + vpsrldq $8, %xmm9, %xmm9 + vpxor %xmm10, %xmm8, %xmm8 + vpxor %xmm9, %xmm11, %xmm7 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm7, %xmm7 + vmovdqa %xmm7, 96(%rsp) + # H ^ 8 + vpclmulqdq $0x00, %xmm3, %xmm3, %xmm8 + vpclmulqdq $0x11, %xmm3, %xmm3, %xmm7 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm7, %xmm7 + vmovdqa %xmm7, 112(%rsp) +L_AES_GCM_decrypt_avx1_ghash_128: + leaq (%rdi,%rbx,1), %rcx + leaq (%rsi,%rbx,1), %rdx + vmovdqa 128(%rsp), %xmm0 + vmovdqa L_avx1_aes_gcm_bswap_epi64(%rip), %xmm1 + vpshufb %xmm1, %xmm0, %xmm8 + vpaddd L_avx1_aes_gcm_one(%rip), %xmm0, %xmm9 + vpshufb %xmm1, %xmm9, %xmm9 + vpaddd L_avx1_aes_gcm_two(%rip), %xmm0, %xmm10 + vpshufb %xmm1, %xmm10, %xmm10 + vpaddd L_avx1_aes_gcm_three(%rip), %xmm0, %xmm11 + vpshufb %xmm1, %xmm11, %xmm11 + vpaddd L_avx1_aes_gcm_four(%rip), %xmm0, %xmm12 + vpshufb %xmm1, %xmm12, %xmm12 + vpaddd L_avx1_aes_gcm_five(%rip), %xmm0, %xmm13 + vpshufb %xmm1, %xmm13, %xmm13 + vpaddd L_avx1_aes_gcm_six(%rip), %xmm0, %xmm14 + vpshufb %xmm1, %xmm14, %xmm14 + vpaddd L_avx1_aes_gcm_seven(%rip), %xmm0, %xmm15 + vpshufb %xmm1, %xmm15, %xmm15 + vpaddd L_avx1_aes_gcm_eight(%rip), %xmm0, %xmm0 + vmovdqa (%r15), %xmm7 + vmovdqa %xmm0, 128(%rsp) + vpxor %xmm7, %xmm8, %xmm8 + vpxor %xmm7, %xmm9, %xmm9 + vpxor %xmm7, %xmm10, %xmm10 + vpxor %xmm7, %xmm11, %xmm11 + vpxor %xmm7, %xmm12, %xmm12 + vpxor %xmm7, %xmm13, %xmm13 + vpxor %xmm7, %xmm14, %xmm14 + vpxor %xmm7, %xmm15, %xmm15 + vmovdqa 112(%rsp), %xmm7 + vmovdqu (%rcx), %xmm0 + vaesenc 16(%r15), %xmm8, %xmm8 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vpshufd $0x4e, %xmm7, %xmm1 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm3 + vaesenc 16(%r15), %xmm9, %xmm9 + vaesenc 16(%r15), %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm2 + vaesenc 16(%r15), %xmm11, %xmm11 + vaesenc 16(%r15), %xmm12, %xmm12 + vpclmulqdq $0x00, %xmm5, %xmm1, %xmm1 + vaesenc 16(%r15), %xmm13, %xmm13 + vaesenc 16(%r15), %xmm14, %xmm14 + vaesenc 16(%r15), %xmm15, %xmm15 + vpxor %xmm2, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vmovdqa 96(%rsp), %xmm7 + vmovdqu 16(%rcx), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vaesenc 32(%r15), %xmm8, %xmm8 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vaesenc 32(%r15), %xmm9, %xmm9 + vaesenc 32(%r15), %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vaesenc 32(%r15), %xmm11, %xmm11 + vaesenc 32(%r15), %xmm12, %xmm12 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vaesenc 32(%r15), %xmm13, %xmm13 + vaesenc 32(%r15), %xmm14, %xmm14 + vaesenc 32(%r15), %xmm15, %xmm15 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqa 80(%rsp), %xmm7 + vmovdqu 32(%rcx), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vaesenc 48(%r15), %xmm8, %xmm8 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vaesenc 48(%r15), %xmm9, %xmm9 + vaesenc 48(%r15), %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vaesenc 48(%r15), %xmm11, %xmm11 + vaesenc 48(%r15), %xmm12, %xmm12 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vaesenc 48(%r15), %xmm13, %xmm13 + vaesenc 48(%r15), %xmm14, %xmm14 + vaesenc 48(%r15), %xmm15, %xmm15 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqa 64(%rsp), %xmm7 + vmovdqu 48(%rcx), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vaesenc 64(%r15), %xmm8, %xmm8 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vaesenc 64(%r15), %xmm9, %xmm9 + vaesenc 64(%r15), %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vaesenc 64(%r15), %xmm11, %xmm11 + vaesenc 64(%r15), %xmm12, %xmm12 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vaesenc 64(%r15), %xmm13, %xmm13 + vaesenc 64(%r15), %xmm14, %xmm14 + vaesenc 64(%r15), %xmm15, %xmm15 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqa 48(%rsp), %xmm7 + vmovdqu 64(%rcx), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vaesenc 80(%r15), %xmm8, %xmm8 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vaesenc 80(%r15), %xmm9, %xmm9 + vaesenc 80(%r15), %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vaesenc 80(%r15), %xmm11, %xmm11 + vaesenc 80(%r15), %xmm12, %xmm12 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vaesenc 80(%r15), %xmm13, %xmm13 + vaesenc 80(%r15), %xmm14, %xmm14 + vaesenc 80(%r15), %xmm15, %xmm15 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqa 32(%rsp), %xmm7 + vmovdqu 80(%rcx), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vaesenc 96(%r15), %xmm8, %xmm8 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vaesenc 96(%r15), %xmm9, %xmm9 + vaesenc 96(%r15), %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vaesenc 96(%r15), %xmm11, %xmm11 + vaesenc 96(%r15), %xmm12, %xmm12 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vaesenc 96(%r15), %xmm13, %xmm13 + vaesenc 96(%r15), %xmm14, %xmm14 + vaesenc 96(%r15), %xmm15, %xmm15 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqa 16(%rsp), %xmm7 + vmovdqu 96(%rcx), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vaesenc 112(%r15), %xmm8, %xmm8 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vaesenc 112(%r15), %xmm9, %xmm9 + vaesenc 112(%r15), %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vaesenc 112(%r15), %xmm11, %xmm11 + vaesenc 112(%r15), %xmm12, %xmm12 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vaesenc 112(%r15), %xmm13, %xmm13 + vaesenc 112(%r15), %xmm14, %xmm14 + vaesenc 112(%r15), %xmm15, %xmm15 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqa (%rsp), %xmm7 + vmovdqu 112(%rcx), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vaesenc 128(%r15), %xmm8, %xmm8 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vaesenc 128(%r15), %xmm9, %xmm9 + vaesenc 128(%r15), %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vaesenc 128(%r15), %xmm11, %xmm11 + vaesenc 128(%r15), %xmm12, %xmm12 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vaesenc 128(%r15), %xmm13, %xmm13 + vaesenc 128(%r15), %xmm14, %xmm14 + vaesenc 128(%r15), %xmm15, %xmm15 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vpslldq $8, %xmm1, %xmm5 + vpsrldq $8, %xmm1, %xmm1 + vaesenc 144(%r15), %xmm8, %xmm8 + vpxor %xmm5, %xmm2, %xmm2 + vpxor %xmm1, %xmm3, %xmm3 + vaesenc 144(%r15), %xmm9, %xmm9 + vpslld $31, %xmm2, %xmm7 + vpslld $30, %xmm2, %xmm4 + vpslld $25, %xmm2, %xmm5 + vaesenc 144(%r15), %xmm10, %xmm10 + vpxor %xmm4, %xmm7, %xmm7 + vpxor %xmm5, %xmm7, %xmm7 + vaesenc 144(%r15), %xmm11, %xmm11 + vpsrldq $4, %xmm7, %xmm4 + vpslldq $12, %xmm7, %xmm7 + vaesenc 144(%r15), %xmm12, %xmm12 + vpxor %xmm7, %xmm2, %xmm2 + vpsrld $0x01, %xmm2, %xmm5 + vaesenc 144(%r15), %xmm13, %xmm13 + vpsrld $2, %xmm2, %xmm1 + vpsrld $7, %xmm2, %xmm0 + vaesenc 144(%r15), %xmm14, %xmm14 + vpxor %xmm1, %xmm5, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vaesenc 144(%r15), %xmm15, %xmm15 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm5, %xmm2, %xmm2 + vpxor %xmm3, %xmm2, %xmm2 + cmpl $11, %r10d + vmovdqa 160(%r15), %xmm7 + jl L_AES_GCM_decrypt_avx1_aesenc_128_ghash_avx_done + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 176(%r15), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + cmpl $13, %r10d + vmovdqa 192(%r15), %xmm7 + jl L_AES_GCM_decrypt_avx1_aesenc_128_ghash_avx_done + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 208(%r15), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 224(%r15), %xmm7 +L_AES_GCM_decrypt_avx1_aesenc_128_ghash_avx_done: + vaesenclast %xmm7, %xmm8, %xmm8 + vaesenclast %xmm7, %xmm9, %xmm9 + vmovdqu (%rcx), %xmm0 + vmovdqu 16(%rcx), %xmm1 + vpxor %xmm0, %xmm8, %xmm8 + vpxor %xmm1, %xmm9, %xmm9 + vmovdqu %xmm8, (%rdx) + vmovdqu %xmm9, 16(%rdx) + vaesenclast %xmm7, %xmm10, %xmm10 + vaesenclast %xmm7, %xmm11, %xmm11 + vmovdqu 32(%rcx), %xmm0 + vmovdqu 48(%rcx), %xmm1 + vpxor %xmm0, %xmm10, %xmm10 + vpxor %xmm1, %xmm11, %xmm11 + vmovdqu %xmm10, 32(%rdx) + vmovdqu %xmm11, 48(%rdx) + vaesenclast %xmm7, %xmm12, %xmm12 + vaesenclast %xmm7, %xmm13, %xmm13 + vmovdqu 64(%rcx), %xmm0 + vmovdqu 80(%rcx), %xmm1 + vpxor %xmm0, %xmm12, %xmm12 + vpxor %xmm1, %xmm13, %xmm13 + vmovdqu %xmm12, 64(%rdx) + vmovdqu %xmm13, 80(%rdx) + vaesenclast %xmm7, %xmm14, %xmm14 + vaesenclast %xmm7, %xmm15, %xmm15 + vmovdqu 96(%rcx), %xmm0 + vmovdqu 112(%rcx), %xmm1 + vpxor %xmm0, %xmm14, %xmm14 + vpxor %xmm1, %xmm15, %xmm15 + vmovdqu %xmm14, 96(%rdx) + vmovdqu %xmm15, 112(%rdx) + addl $0x80, %ebx + cmpl %r13d, %ebx + jl L_AES_GCM_decrypt_avx1_ghash_128 + vmovdqa %xmm2, %xmm6 + vmovdqa (%rsp), %xmm5 +L_AES_GCM_decrypt_avx1_done_128: + movl %r9d, %edx + cmpl %edx, %ebx + jge L_AES_GCM_decrypt_avx1_done_dec + movl %r9d, %r13d + andl $0xfffffff0, %r13d + cmpl %r13d, %ebx + jge L_AES_GCM_decrypt_avx1_last_block_done +L_AES_GCM_decrypt_avx1_last_block_start: + vmovdqu (%rdi,%rbx,1), %xmm13 + vmovdqa %xmm5, %xmm0 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm13, %xmm1 + vpxor %xmm6, %xmm1, %xmm1 + vmovdqa 128(%rsp), %xmm9 + vpshufb L_avx1_aes_gcm_bswap_epi64(%rip), %xmm9, %xmm8 + vpaddd L_avx1_aes_gcm_one(%rip), %xmm9, %xmm9 + vmovdqa %xmm9, 128(%rsp) + vpxor (%r15), %xmm8, %xmm8 + vpclmulqdq $16, %xmm0, %xmm1, %xmm10 + vaesenc 16(%r15), %xmm8, %xmm8 + vaesenc 32(%r15), %xmm8, %xmm8 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm11 + vaesenc 48(%r15), %xmm8, %xmm8 + vaesenc 64(%r15), %xmm8, %xmm8 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm12 + vaesenc 80(%r15), %xmm8, %xmm8 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vaesenc 96(%r15), %xmm8, %xmm8 + vpxor %xmm11, %xmm10, %xmm10 + vpslldq $8, %xmm10, %xmm2 + vpsrldq $8, %xmm10, %xmm10 + vaesenc 112(%r15), %xmm8, %xmm8 + vpxor %xmm12, %xmm2, %xmm2 + vpxor %xmm10, %xmm1, %xmm3 + vmovdqa L_avx1_aes_gcm_mod2_128(%rip), %xmm0 + vpclmulqdq $16, %xmm0, %xmm2, %xmm11 + vaesenc 128(%r15), %xmm8, %xmm8 + vpshufd $0x4e, %xmm2, %xmm10 + vpxor %xmm11, %xmm10, %xmm10 + vpclmulqdq $16, %xmm0, %xmm10, %xmm11 + vaesenc 144(%r15), %xmm8, %xmm8 + vpshufd $0x4e, %xmm10, %xmm10 + vpxor %xmm11, %xmm10, %xmm10 + vpxor %xmm3, %xmm10, %xmm6 + cmpl $11, %r10d + vmovdqa 160(%r15), %xmm9 + jl L_AES_GCM_decrypt_avx1_aesenc_gfmul_last + vaesenc %xmm9, %xmm8, %xmm8 + vaesenc 176(%r15), %xmm8, %xmm8 + cmpl $13, %r10d + vmovdqa 192(%r15), %xmm9 + jl L_AES_GCM_decrypt_avx1_aesenc_gfmul_last + vaesenc %xmm9, %xmm8, %xmm8 + vaesenc 208(%r15), %xmm8, %xmm8 + vmovdqa 224(%r15), %xmm9 +L_AES_GCM_decrypt_avx1_aesenc_gfmul_last: + vaesenclast %xmm9, %xmm8, %xmm8 + vmovdqa %xmm13, %xmm0 + vpxor %xmm0, %xmm8, %xmm8 + vmovdqu %xmm8, (%rsi,%rbx,1) + addl $16, %ebx + cmpl %r13d, %ebx + jl L_AES_GCM_decrypt_avx1_last_block_start +L_AES_GCM_decrypt_avx1_last_block_done: + movl %r9d, %ecx + movl %ecx, %edx + andl $15, %ecx + jz L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_done + vmovdqa 128(%rsp), %xmm4 + vpshufb L_avx1_aes_gcm_bswap_epi64(%rip), %xmm4, %xmm4 + vpxor (%r15), %xmm4, %xmm4 + vaesenc 16(%r15), %xmm4, %xmm4 + vaesenc 32(%r15), %xmm4, %xmm4 + vaesenc 48(%r15), %xmm4, %xmm4 + vaesenc 64(%r15), %xmm4, %xmm4 + vaesenc 80(%r15), %xmm4, %xmm4 + vaesenc 96(%r15), %xmm4, %xmm4 + vaesenc 112(%r15), %xmm4, %xmm4 + vaesenc 128(%r15), %xmm4, %xmm4 + vaesenc 144(%r15), %xmm4, %xmm4 + cmpl $11, %r10d + vmovdqa 160(%r15), %xmm9 + jl L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_aesenc_avx_last + vaesenc %xmm9, %xmm4, %xmm4 + vaesenc 176(%r15), %xmm4, %xmm4 + cmpl $13, %r10d + vmovdqa 192(%r15), %xmm9 + jl L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_aesenc_avx_last + vaesenc %xmm9, %xmm4, %xmm4 + vaesenc 208(%r15), %xmm4, %xmm4 + vmovdqa 224(%r15), %xmm9 +L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_aesenc_avx_last: + vaesenclast %xmm9, %xmm4, %xmm4 + subq $32, %rsp + xorl %ecx, %ecx + vmovdqa %xmm4, (%rsp) + vpxor %xmm0, %xmm0, %xmm0 + vmovdqa %xmm0, 16(%rsp) +L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_loop: + movzbl (%rdi,%rbx,1), %r13d + movb %r13b, 16(%rsp,%rcx,1) + xorb (%rsp,%rcx,1), %r13b + movb %r13b, (%rsi,%rbx,1) + incl %ebx + incl %ecx + cmpl %edx, %ebx + jl L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_loop + vmovdqa 16(%rsp), %xmm4 + addq $32, %rsp + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm4, %xmm4 + vpxor %xmm4, %xmm6, %xmm6 + # ghash_gfmul_red_avx + vpshufd $0x4e, %xmm5, %xmm9 + vpshufd $0x4e, %xmm6, %xmm10 + vpclmulqdq $0x11, %xmm5, %xmm6, %xmm11 + vpclmulqdq $0x00, %xmm5, %xmm6, %xmm8 + vpxor %xmm5, %xmm9, %xmm9 + vpxor %xmm6, %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm10, %xmm9, %xmm9 + vpxor %xmm8, %xmm9, %xmm9 + vpxor %xmm11, %xmm9, %xmm9 + vpslldq $8, %xmm9, %xmm10 + vpsrldq $8, %xmm9, %xmm9 + vpxor %xmm10, %xmm8, %xmm8 + vpxor %xmm9, %xmm11, %xmm6 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm6, %xmm6 +L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_done: +L_AES_GCM_decrypt_avx1_done_dec: + movl %r9d, %edx + movl %r11d, %ecx + shlq $3, %rdx + shlq $3, %rcx + vpinsrq $0x00, %rdx, %xmm0, %xmm0 + vpinsrq $0x01, %rcx, %xmm0, %xmm0 + vpxor %xmm0, %xmm6, %xmm6 + # ghash_gfmul_red_avx + vpshufd $0x4e, %xmm5, %xmm9 + vpshufd $0x4e, %xmm6, %xmm10 + vpclmulqdq $0x11, %xmm5, %xmm6, %xmm11 + vpclmulqdq $0x00, %xmm5, %xmm6, %xmm8 + vpxor %xmm5, %xmm9, %xmm9 + vpxor %xmm6, %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm10, %xmm9, %xmm9 + vpxor %xmm8, %xmm9, %xmm9 + vpxor %xmm11, %xmm9, %xmm9 + vpslldq $8, %xmm9, %xmm10 + vpsrldq $8, %xmm9, %xmm9 + vpxor %xmm10, %xmm8, %xmm8 + vpxor %xmm9, %xmm11, %xmm6 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm6, %xmm6 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm6, %xmm6 + vpxor 144(%rsp), %xmm6, %xmm0 + cmpl $16, %r14d + je L_AES_GCM_decrypt_avx1_cmp_tag_16 + subq $16, %rsp + xorq %rcx, %rcx + xorq %rbx, %rbx + vmovdqa %xmm0, (%rsp) +L_AES_GCM_decrypt_avx1_cmp_tag_loop: + movzbl (%rsp,%rcx,1), %r13d + xorb (%r8,%rcx,1), %r13b + orb %r13b, %bl + incl %ecx + cmpl %r14d, %ecx + jne L_AES_GCM_decrypt_avx1_cmp_tag_loop + cmpb $0x00, %bl + sete %bl + addq $16, %rsp + xorq %rcx, %rcx + jmp L_AES_GCM_decrypt_avx1_cmp_tag_done +L_AES_GCM_decrypt_avx1_cmp_tag_16: + vmovdqu (%r8), %xmm1 + vpcmpeqb %xmm1, %xmm0, %xmm0 + vpmovmskb %xmm0, %rdx + # %%edx == 0xFFFF then return 1 else => return 0 + xorl %ebx, %ebx + cmpl $0xffff, %edx + sete %bl +L_AES_GCM_decrypt_avx1_cmp_tag_done: + movl %ebx, (%rbp) + vzeroupper + addq $0xa8, %rsp + popq %rbp + popq %r15 + popq %r14 + popq %rbx + popq %r12 + popq %r13 + repz retq +#ifndef __APPLE__ +.size AES_GCM_decrypt_avx1,.-AES_GCM_decrypt_avx1 +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX1 */ +#ifdef HAVE_INTEL_AVX2 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx2_aes_gcm_one: +.quad 0x0, 0x1 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx2_aes_gcm_two: +.quad 0x0, 0x2 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx2_aes_gcm_three: +.quad 0x0, 0x3 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx2_aes_gcm_four: +.quad 0x0, 0x4 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx2_aes_gcm_five: +.quad 0x0, 0x5 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx2_aes_gcm_six: +.quad 0x0, 0x6 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx2_aes_gcm_seven: +.quad 0x0, 0x7 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx2_aes_gcm_eight: +.quad 0x0, 0x8 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx2_aes_gcm_bswap_one: +.quad 0x0, 0x100000000000000 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx2_aes_gcm_bswap_epi64: +.quad 0x1020304050607, 0x8090a0b0c0d0e0f +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx2_aes_gcm_bswap_mask: +.quad 0x8090a0b0c0d0e0f, 0x1020304050607 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx2_aes_gcm_mod2_128: +.quad 0x1, 0xc200000000000000 +#ifndef __APPLE__ +.text +.globl AES_GCM_encrypt_avx2 +.type AES_GCM_encrypt_avx2,@function +.align 4 +AES_GCM_encrypt_avx2: +#else +.section __TEXT,__text +.globl _AES_GCM_encrypt_avx2 +.p2align 2 +_AES_GCM_encrypt_avx2: +#endif /* __APPLE__ */ + pushq %r13 + pushq %r12 + pushq %r15 + pushq %rbx + pushq %r14 + movq %rdx, %r12 + movq %rcx, %rax + movq %r8, %r15 + movq %rsi, %r8 + movl %r9d, %r10d + movl 48(%rsp), %r11d + movl 56(%rsp), %ebx + movl 64(%rsp), %r14d + movq 72(%rsp), %rsi + movl 80(%rsp), %r9d + subq $0xa0, %rsp + vpxor %xmm4, %xmm4, %xmm4 + vpxor %xmm6, %xmm6, %xmm6 + movl %ebx, %edx + cmpl $12, %edx + je L_AES_GCM_encrypt_avx2_iv_12 + # Calculate values when IV is not 12 bytes + # H = Encrypt X(=0) + vmovdqa (%rsi), %xmm5 + vaesenc 16(%rsi), %xmm5, %xmm5 + vaesenc 32(%rsi), %xmm5, %xmm5 + vaesenc 48(%rsi), %xmm5, %xmm5 + vaesenc 64(%rsi), %xmm5, %xmm5 + vaesenc 80(%rsi), %xmm5, %xmm5 + vaesenc 96(%rsi), %xmm5, %xmm5 + vaesenc 112(%rsi), %xmm5, %xmm5 + vaesenc 128(%rsi), %xmm5, %xmm5 + vaesenc 144(%rsi), %xmm5, %xmm5 + cmpl $11, %r9d + vmovdqa 160(%rsi), %xmm0 + jl L_AES_GCM_encrypt_avx2_calc_iv_1_aesenc_avx_last + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc 176(%rsi), %xmm5, %xmm5 + cmpl $13, %r9d + vmovdqa 192(%rsi), %xmm0 + jl L_AES_GCM_encrypt_avx2_calc_iv_1_aesenc_avx_last + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc 208(%rsi), %xmm5, %xmm5 + vmovdqa 224(%rsi), %xmm0 +L_AES_GCM_encrypt_avx2_calc_iv_1_aesenc_avx_last: + vaesenclast %xmm0, %xmm5, %xmm5 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm5, %xmm5 + # Calc counter + # Initialization vector + cmpl $0x00, %edx + movq $0x00, %rcx + je L_AES_GCM_encrypt_avx2_calc_iv_done + cmpl $16, %edx + jl L_AES_GCM_encrypt_avx2_calc_iv_lt16 + andl $0xfffffff0, %edx +L_AES_GCM_encrypt_avx2_calc_iv_16_loop: + vmovdqu (%rax,%rcx,1), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + # ghash_gfmul_avx + vpclmulqdq $16, %xmm4, %xmm5, %xmm2 + vpclmulqdq $0x01, %xmm4, %xmm5, %xmm1 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm1, %xmm0, %xmm7 + vpxor %xmm2, %xmm3, %xmm4 + # ghash_mid + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm4, %xmm4 + # ghash_red + vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm2 + vpclmulqdq $16, %xmm2, %xmm7, %xmm0 + vpshufd $0x4e, %xmm7, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm4, %xmm4 + addl $16, %ecx + cmpl %edx, %ecx + jl L_AES_GCM_encrypt_avx2_calc_iv_16_loop + movl %ebx, %edx + cmpl %edx, %ecx + je L_AES_GCM_encrypt_avx2_calc_iv_done +L_AES_GCM_encrypt_avx2_calc_iv_lt16: + vpxor %xmm0, %xmm0, %xmm0 + xorl %ebx, %ebx + vmovdqa %xmm0, (%rsp) +L_AES_GCM_encrypt_avx2_calc_iv_loop: + movzbl (%rax,%rcx,1), %r13d + movb %r13b, (%rsp,%rbx,1) + incl %ecx + incl %ebx + cmpl %edx, %ecx + jl L_AES_GCM_encrypt_avx2_calc_iv_loop + vmovdqa (%rsp), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + # ghash_gfmul_avx + vpclmulqdq $16, %xmm4, %xmm5, %xmm2 + vpclmulqdq $0x01, %xmm4, %xmm5, %xmm1 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm1, %xmm0, %xmm7 + vpxor %xmm2, %xmm3, %xmm4 + # ghash_mid + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm4, %xmm4 + # ghash_red + vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm2 + vpclmulqdq $16, %xmm2, %xmm7, %xmm0 + vpshufd $0x4e, %xmm7, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm4, %xmm4 +L_AES_GCM_encrypt_avx2_calc_iv_done: + # T = Encrypt counter + vpxor %xmm0, %xmm0, %xmm0 + shll $3, %edx + vpinsrq $0x00, %rdx, %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + # ghash_gfmul_avx + vpclmulqdq $16, %xmm4, %xmm5, %xmm2 + vpclmulqdq $0x01, %xmm4, %xmm5, %xmm1 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm1, %xmm0, %xmm7 + vpxor %xmm2, %xmm3, %xmm4 + # ghash_mid + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm4, %xmm4 + # ghash_red + vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm2 + vpclmulqdq $16, %xmm2, %xmm7, %xmm0 + vpshufd $0x4e, %xmm7, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm4, %xmm4 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm4, %xmm4 + # Encrypt counter + vmovdqa (%rsi), %xmm15 + vpxor %xmm4, %xmm15, %xmm15 + vaesenc 16(%rsi), %xmm15, %xmm15 + vaesenc 32(%rsi), %xmm15, %xmm15 + vaesenc 48(%rsi), %xmm15, %xmm15 + vaesenc 64(%rsi), %xmm15, %xmm15 + vaesenc 80(%rsi), %xmm15, %xmm15 + vaesenc 96(%rsi), %xmm15, %xmm15 + vaesenc 112(%rsi), %xmm15, %xmm15 + vaesenc 128(%rsi), %xmm15, %xmm15 + vaesenc 144(%rsi), %xmm15, %xmm15 + cmpl $11, %r9d + vmovdqa 160(%rsi), %xmm0 + jl L_AES_GCM_encrypt_avx2_calc_iv_2_aesenc_avx_last + vaesenc %xmm0, %xmm15, %xmm15 + vaesenc 176(%rsi), %xmm15, %xmm15 + cmpl $13, %r9d + vmovdqa 192(%rsi), %xmm0 + jl L_AES_GCM_encrypt_avx2_calc_iv_2_aesenc_avx_last + vaesenc %xmm0, %xmm15, %xmm15 + vaesenc 208(%rsi), %xmm15, %xmm15 + vmovdqa 224(%rsi), %xmm0 +L_AES_GCM_encrypt_avx2_calc_iv_2_aesenc_avx_last: + vaesenclast %xmm0, %xmm15, %xmm15 + jmp L_AES_GCM_encrypt_avx2_iv_done +L_AES_GCM_encrypt_avx2_iv_12: + # # Calculate values when IV is 12 bytes + # Set counter based on IV + vmovdqa L_avx2_aes_gcm_bswap_one(%rip), %xmm4 + vmovdqa (%rsi), %xmm5 + vpblendd $7, (%rax), %xmm4, %xmm4 + # H = Encrypt X(=0) and T = Encrypt counter + vmovdqa 16(%rsi), %xmm7 + vpxor %xmm5, %xmm4, %xmm15 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 32(%rsi), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm15, %xmm15 + vmovdqa 48(%rsi), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm15, %xmm15 + vmovdqa 64(%rsi), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm15, %xmm15 + vmovdqa 80(%rsi), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm15, %xmm15 + vmovdqa 96(%rsi), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm15, %xmm15 + vmovdqa 112(%rsi), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm15, %xmm15 + vmovdqa 128(%rsi), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm15, %xmm15 + vmovdqa 144(%rsi), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm15, %xmm15 + cmpl $11, %r9d + vmovdqa 160(%rsi), %xmm0 + jl L_AES_GCM_encrypt_avx2_calc_iv_12_last + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm15, %xmm15 + vmovdqa 176(%rsi), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm15, %xmm15 + cmpl $13, %r9d + vmovdqa 192(%rsi), %xmm0 + jl L_AES_GCM_encrypt_avx2_calc_iv_12_last + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm15, %xmm15 + vmovdqa 208(%rsi), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm15, %xmm15 + vmovdqa 224(%rsi), %xmm0 +L_AES_GCM_encrypt_avx2_calc_iv_12_last: + vaesenclast %xmm0, %xmm5, %xmm5 + vaesenclast %xmm0, %xmm15, %xmm15 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm5, %xmm5 +L_AES_GCM_encrypt_avx2_iv_done: + # Additional authentication data + movl %r11d, %edx + cmpl $0x00, %edx + je L_AES_GCM_encrypt_avx2_calc_aad_done + xorl %ecx, %ecx + cmpl $16, %edx + jl L_AES_GCM_encrypt_avx2_calc_aad_lt16 + andl $0xfffffff0, %edx +L_AES_GCM_encrypt_avx2_calc_aad_16_loop: + vmovdqu (%r12,%rcx,1), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vpxor %xmm0, %xmm6, %xmm6 + # ghash_gfmul_avx + vpclmulqdq $16, %xmm6, %xmm5, %xmm2 + vpclmulqdq $0x01, %xmm6, %xmm5, %xmm1 + vpclmulqdq $0x00, %xmm6, %xmm5, %xmm0 + vpclmulqdq $0x11, %xmm6, %xmm5, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm1, %xmm0, %xmm7 + vpxor %xmm2, %xmm3, %xmm6 + # ghash_mid + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm6, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm6, %xmm6 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm6, %xmm6 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm6, %xmm6 + # ghash_red + vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm2 + vpclmulqdq $16, %xmm2, %xmm7, %xmm0 + vpshufd $0x4e, %xmm7, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm6, %xmm6 + addl $16, %ecx + cmpl %edx, %ecx + jl L_AES_GCM_encrypt_avx2_calc_aad_16_loop + movl %r11d, %edx + cmpl %edx, %ecx + je L_AES_GCM_encrypt_avx2_calc_aad_done +L_AES_GCM_encrypt_avx2_calc_aad_lt16: + vpxor %xmm0, %xmm0, %xmm0 + xorl %ebx, %ebx + vmovdqa %xmm0, (%rsp) +L_AES_GCM_encrypt_avx2_calc_aad_loop: + movzbl (%r12,%rcx,1), %r13d + movb %r13b, (%rsp,%rbx,1) + incl %ecx + incl %ebx + cmpl %edx, %ecx + jl L_AES_GCM_encrypt_avx2_calc_aad_loop + vmovdqa (%rsp), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vpxor %xmm0, %xmm6, %xmm6 + # ghash_gfmul_avx + vpclmulqdq $16, %xmm6, %xmm5, %xmm2 + vpclmulqdq $0x01, %xmm6, %xmm5, %xmm1 + vpclmulqdq $0x00, %xmm6, %xmm5, %xmm0 + vpclmulqdq $0x11, %xmm6, %xmm5, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm1, %xmm0, %xmm7 + vpxor %xmm2, %xmm3, %xmm6 + # ghash_mid + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm6, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm6, %xmm6 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm6, %xmm6 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm6, %xmm6 + # ghash_red + vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm2 + vpclmulqdq $16, %xmm2, %xmm7, %xmm0 + vpshufd $0x4e, %xmm7, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm6, %xmm6 +L_AES_GCM_encrypt_avx2_calc_aad_done: + # Calculate counter and H + vpsrlq $63, %xmm5, %xmm1 + vpsllq $0x01, %xmm5, %xmm0 + vpslldq $8, %xmm1, %xmm1 + vpor %xmm1, %xmm0, %xmm0 + vpshufd $0xff, %xmm5, %xmm5 + vpsrad $31, %xmm5, %xmm5 + vpshufb L_avx2_aes_gcm_bswap_epi64(%rip), %xmm4, %xmm4 + vpand L_avx2_aes_gcm_mod2_128(%rip), %xmm5, %xmm5 + vpaddd L_avx2_aes_gcm_one(%rip), %xmm4, %xmm4 + vpxor %xmm0, %xmm5, %xmm5 + xorl %ebx, %ebx + cmpl $0x80, %r10d + movl %r10d, %r13d + jl L_AES_GCM_encrypt_avx2_done_128 + andl $0xffffff80, %r13d + vmovdqa %xmm4, 128(%rsp) + vmovdqa %xmm15, 144(%rsp) + vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm3 + # H ^ 1 and H ^ 2 + vpclmulqdq $0x00, %xmm5, %xmm5, %xmm9 + vpclmulqdq $0x11, %xmm5, %xmm5, %xmm10 + vpclmulqdq $16, %xmm3, %xmm9, %xmm8 + vpshufd $0x4e, %xmm9, %xmm9 + vpxor %xmm8, %xmm9, %xmm9 + vpclmulqdq $16, %xmm3, %xmm9, %xmm8 + vpshufd $0x4e, %xmm9, %xmm9 + vpxor %xmm8, %xmm9, %xmm9 + vpxor %xmm9, %xmm10, %xmm0 + vmovdqa %xmm5, (%rsp) + vmovdqa %xmm0, 16(%rsp) + # H ^ 3 and H ^ 4 + vpclmulqdq $16, %xmm5, %xmm0, %xmm11 + vpclmulqdq $0x01, %xmm5, %xmm0, %xmm10 + vpclmulqdq $0x00, %xmm5, %xmm0, %xmm9 + vpclmulqdq $0x11, %xmm5, %xmm0, %xmm12 + vpclmulqdq $0x00, %xmm0, %xmm0, %xmm13 + vpclmulqdq $0x11, %xmm0, %xmm0, %xmm14 + vpxor %xmm10, %xmm11, %xmm11 + vpslldq $8, %xmm11, %xmm10 + vpsrldq $8, %xmm11, %xmm11 + vpxor %xmm9, %xmm10, %xmm10 + vpclmulqdq $16, %xmm3, %xmm13, %xmm8 + vpclmulqdq $16, %xmm3, %xmm10, %xmm9 + vpshufd $0x4e, %xmm10, %xmm10 + vpshufd $0x4e, %xmm13, %xmm13 + vpxor %xmm9, %xmm10, %xmm10 + vpxor %xmm8, %xmm13, %xmm13 + vpclmulqdq $16, %xmm3, %xmm10, %xmm9 + vpclmulqdq $16, %xmm3, %xmm13, %xmm8 + vpshufd $0x4e, %xmm10, %xmm10 + vpshufd $0x4e, %xmm13, %xmm13 + vpxor %xmm11, %xmm12, %xmm12 + vpxor %xmm8, %xmm13, %xmm13 + vpxor %xmm12, %xmm10, %xmm10 + vpxor %xmm14, %xmm13, %xmm2 + vpxor %xmm9, %xmm10, %xmm1 + vmovdqa %xmm1, 32(%rsp) + vmovdqa %xmm2, 48(%rsp) + # H ^ 5 and H ^ 6 + vpclmulqdq $16, %xmm0, %xmm1, %xmm11 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm10 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm9 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm12 + vpclmulqdq $0x00, %xmm1, %xmm1, %xmm13 + vpclmulqdq $0x11, %xmm1, %xmm1, %xmm14 + vpxor %xmm10, %xmm11, %xmm11 + vpslldq $8, %xmm11, %xmm10 + vpsrldq $8, %xmm11, %xmm11 + vpxor %xmm9, %xmm10, %xmm10 + vpclmulqdq $16, %xmm3, %xmm13, %xmm8 + vpclmulqdq $16, %xmm3, %xmm10, %xmm9 + vpshufd $0x4e, %xmm10, %xmm10 + vpshufd $0x4e, %xmm13, %xmm13 + vpxor %xmm9, %xmm10, %xmm10 + vpxor %xmm8, %xmm13, %xmm13 + vpclmulqdq $16, %xmm3, %xmm10, %xmm9 + vpclmulqdq $16, %xmm3, %xmm13, %xmm8 + vpshufd $0x4e, %xmm10, %xmm10 + vpshufd $0x4e, %xmm13, %xmm13 + vpxor %xmm11, %xmm12, %xmm12 + vpxor %xmm8, %xmm13, %xmm13 + vpxor %xmm12, %xmm10, %xmm10 + vpxor %xmm14, %xmm13, %xmm0 + vpxor %xmm9, %xmm10, %xmm7 + vmovdqa %xmm7, 64(%rsp) + vmovdqa %xmm0, 80(%rsp) + # H ^ 7 and H ^ 8 + vpclmulqdq $16, %xmm1, %xmm2, %xmm11 + vpclmulqdq $0x01, %xmm1, %xmm2, %xmm10 + vpclmulqdq $0x00, %xmm1, %xmm2, %xmm9 + vpclmulqdq $0x11, %xmm1, %xmm2, %xmm12 + vpclmulqdq $0x00, %xmm2, %xmm2, %xmm13 + vpclmulqdq $0x11, %xmm2, %xmm2, %xmm14 + vpxor %xmm10, %xmm11, %xmm11 + vpslldq $8, %xmm11, %xmm10 + vpsrldq $8, %xmm11, %xmm11 + vpxor %xmm9, %xmm10, %xmm10 + vpclmulqdq $16, %xmm3, %xmm13, %xmm8 + vpclmulqdq $16, %xmm3, %xmm10, %xmm9 + vpshufd $0x4e, %xmm10, %xmm10 + vpshufd $0x4e, %xmm13, %xmm13 + vpxor %xmm9, %xmm10, %xmm10 + vpxor %xmm8, %xmm13, %xmm13 + vpclmulqdq $16, %xmm3, %xmm10, %xmm9 + vpclmulqdq $16, %xmm3, %xmm13, %xmm8 + vpshufd $0x4e, %xmm10, %xmm10 + vpshufd $0x4e, %xmm13, %xmm13 + vpxor %xmm11, %xmm12, %xmm12 + vpxor %xmm8, %xmm13, %xmm13 + vpxor %xmm12, %xmm10, %xmm10 + vpxor %xmm14, %xmm13, %xmm0 + vpxor %xmm9, %xmm10, %xmm7 + vmovdqa %xmm7, 96(%rsp) + vmovdqa %xmm0, 112(%rsp) + # First 128 bytes of input + # aesenc_128 + # aesenc_ctr + vmovdqa 128(%rsp), %xmm0 + vmovdqa L_avx2_aes_gcm_bswap_epi64(%rip), %xmm1 + vpaddd L_avx2_aes_gcm_one(%rip), %xmm0, %xmm9 + vpshufb %xmm1, %xmm0, %xmm8 + vpaddd L_avx2_aes_gcm_two(%rip), %xmm0, %xmm10 + vpshufb %xmm1, %xmm9, %xmm9 + vpaddd L_avx2_aes_gcm_three(%rip), %xmm0, %xmm11 + vpshufb %xmm1, %xmm10, %xmm10 + vpaddd L_avx2_aes_gcm_four(%rip), %xmm0, %xmm12 + vpshufb %xmm1, %xmm11, %xmm11 + vpaddd L_avx2_aes_gcm_five(%rip), %xmm0, %xmm13 + vpshufb %xmm1, %xmm12, %xmm12 + vpaddd L_avx2_aes_gcm_six(%rip), %xmm0, %xmm14 + vpshufb %xmm1, %xmm13, %xmm13 + vpaddd L_avx2_aes_gcm_seven(%rip), %xmm0, %xmm15 + vpshufb %xmm1, %xmm14, %xmm14 + vpaddd L_avx2_aes_gcm_eight(%rip), %xmm0, %xmm0 + vpshufb %xmm1, %xmm15, %xmm15 + # aesenc_xor + vmovdqa (%rsi), %xmm7 + vmovdqa %xmm0, 128(%rsp) + vpxor %xmm7, %xmm8, %xmm8 + vpxor %xmm7, %xmm9, %xmm9 + vpxor %xmm7, %xmm10, %xmm10 + vpxor %xmm7, %xmm11, %xmm11 + vpxor %xmm7, %xmm12, %xmm12 + vpxor %xmm7, %xmm13, %xmm13 + vpxor %xmm7, %xmm14, %xmm14 + vpxor %xmm7, %xmm15, %xmm15 + vmovdqa 16(%rsi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 32(%rsi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 48(%rsi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 64(%rsi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 80(%rsi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 96(%rsi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 112(%rsi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 128(%rsi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 144(%rsi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + cmpl $11, %r9d + vmovdqa 160(%rsi), %xmm7 + jl L_AES_GCM_encrypt_avx2_aesenc_128_enc_done + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 176(%rsi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + cmpl $13, %r9d + vmovdqa 192(%rsi), %xmm7 + jl L_AES_GCM_encrypt_avx2_aesenc_128_enc_done + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 208(%rsi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 224(%rsi), %xmm7 +L_AES_GCM_encrypt_avx2_aesenc_128_enc_done: + # aesenc_last + vaesenclast %xmm7, %xmm8, %xmm8 + vaesenclast %xmm7, %xmm9, %xmm9 + vaesenclast %xmm7, %xmm10, %xmm10 + vaesenclast %xmm7, %xmm11, %xmm11 + vmovdqu (%rdi), %xmm0 + vmovdqu 16(%rdi), %xmm1 + vmovdqu 32(%rdi), %xmm2 + vmovdqu 48(%rdi), %xmm3 + vpxor %xmm0, %xmm8, %xmm8 + vpxor %xmm1, %xmm9, %xmm9 + vpxor %xmm2, %xmm10, %xmm10 + vpxor %xmm3, %xmm11, %xmm11 + vmovdqu %xmm8, (%r8) + vmovdqu %xmm9, 16(%r8) + vmovdqu %xmm10, 32(%r8) + vmovdqu %xmm11, 48(%r8) + vaesenclast %xmm7, %xmm12, %xmm12 + vaesenclast %xmm7, %xmm13, %xmm13 + vaesenclast %xmm7, %xmm14, %xmm14 + vaesenclast %xmm7, %xmm15, %xmm15 + vmovdqu 64(%rdi), %xmm0 + vmovdqu 80(%rdi), %xmm1 + vmovdqu 96(%rdi), %xmm2 + vmovdqu 112(%rdi), %xmm3 + vpxor %xmm0, %xmm12, %xmm12 + vpxor %xmm1, %xmm13, %xmm13 + vpxor %xmm2, %xmm14, %xmm14 + vpxor %xmm3, %xmm15, %xmm15 + vmovdqu %xmm12, 64(%r8) + vmovdqu %xmm13, 80(%r8) + vmovdqu %xmm14, 96(%r8) + vmovdqu %xmm15, 112(%r8) + cmpl $0x80, %r13d + movl $0x80, %ebx + jle L_AES_GCM_encrypt_avx2_end_128 + # More 128 bytes of input +L_AES_GCM_encrypt_avx2_ghash_128: + # aesenc_128_ghash + leaq (%rdi,%rbx,1), %rcx + leaq (%r8,%rbx,1), %rdx + # aesenc_ctr + vmovdqa 128(%rsp), %xmm0 + vmovdqa L_avx2_aes_gcm_bswap_epi64(%rip), %xmm1 + vpaddd L_avx2_aes_gcm_one(%rip), %xmm0, %xmm9 + vpshufb %xmm1, %xmm0, %xmm8 + vpaddd L_avx2_aes_gcm_two(%rip), %xmm0, %xmm10 + vpshufb %xmm1, %xmm9, %xmm9 + vpaddd L_avx2_aes_gcm_three(%rip), %xmm0, %xmm11 + vpshufb %xmm1, %xmm10, %xmm10 + vpaddd L_avx2_aes_gcm_four(%rip), %xmm0, %xmm12 + vpshufb %xmm1, %xmm11, %xmm11 + vpaddd L_avx2_aes_gcm_five(%rip), %xmm0, %xmm13 + vpshufb %xmm1, %xmm12, %xmm12 + vpaddd L_avx2_aes_gcm_six(%rip), %xmm0, %xmm14 + vpshufb %xmm1, %xmm13, %xmm13 + vpaddd L_avx2_aes_gcm_seven(%rip), %xmm0, %xmm15 + vpshufb %xmm1, %xmm14, %xmm14 + vpaddd L_avx2_aes_gcm_eight(%rip), %xmm0, %xmm0 + vpshufb %xmm1, %xmm15, %xmm15 + # aesenc_xor + vmovdqa (%rsi), %xmm7 + vmovdqa %xmm0, 128(%rsp) + vpxor %xmm7, %xmm8, %xmm8 + vpxor %xmm7, %xmm9, %xmm9 + vpxor %xmm7, %xmm10, %xmm10 + vpxor %xmm7, %xmm11, %xmm11 + vpxor %xmm7, %xmm12, %xmm12 + vpxor %xmm7, %xmm13, %xmm13 + vpxor %xmm7, %xmm14, %xmm14 + vpxor %xmm7, %xmm15, %xmm15 + # aesenc_pclmul_1 + vmovdqu -128(%rdx), %xmm1 + vmovdqu 16(%rsi), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1 + vmovdqa 112(%rsp), %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm5 + vpclmulqdq $0x01, %xmm2, %xmm1, %xmm3 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm6 + vpclmulqdq $0x11, %xmm2, %xmm1, %xmm7 + vaesenc %xmm0, %xmm8, %xmm8 + vaesenc %xmm0, %xmm9, %xmm9 + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc %xmm0, %xmm12, %xmm12 + vaesenc %xmm0, %xmm13, %xmm13 + vaesenc %xmm0, %xmm14, %xmm14 + vaesenc %xmm0, %xmm15, %xmm15 + # aesenc_pclmul_2 + vmovdqu -112(%rdx), %xmm1 + vmovdqa 96(%rsp), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vmovdqu 32(%rsi), %xmm0 + vpxor %xmm1, %xmm7, %xmm7 + vaesenc %xmm0, %xmm8, %xmm8 + vaesenc %xmm0, %xmm9, %xmm9 + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc %xmm0, %xmm12, %xmm12 + vaesenc %xmm0, %xmm13, %xmm13 + vaesenc %xmm0, %xmm14, %xmm14 + vaesenc %xmm0, %xmm15, %xmm15 + # aesenc_pclmul_n + vmovdqu -96(%rdx), %xmm1 + vmovdqa 80(%rsp), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vmovdqu 48(%rsi), %xmm0 + vpxor %xmm1, %xmm7, %xmm7 + vaesenc %xmm0, %xmm8, %xmm8 + vaesenc %xmm0, %xmm9, %xmm9 + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc %xmm0, %xmm12, %xmm12 + vaesenc %xmm0, %xmm13, %xmm13 + vaesenc %xmm0, %xmm14, %xmm14 + vaesenc %xmm0, %xmm15, %xmm15 + # aesenc_pclmul_n + vmovdqu -80(%rdx), %xmm1 + vmovdqa 64(%rsp), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vmovdqu 64(%rsi), %xmm0 + vpxor %xmm1, %xmm7, %xmm7 + vaesenc %xmm0, %xmm8, %xmm8 + vaesenc %xmm0, %xmm9, %xmm9 + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc %xmm0, %xmm12, %xmm12 + vaesenc %xmm0, %xmm13, %xmm13 + vaesenc %xmm0, %xmm14, %xmm14 + vaesenc %xmm0, %xmm15, %xmm15 + # aesenc_pclmul_n + vmovdqu -64(%rdx), %xmm1 + vmovdqa 48(%rsp), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vmovdqu 80(%rsi), %xmm0 + vpxor %xmm1, %xmm7, %xmm7 + vaesenc %xmm0, %xmm8, %xmm8 + vaesenc %xmm0, %xmm9, %xmm9 + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc %xmm0, %xmm12, %xmm12 + vaesenc %xmm0, %xmm13, %xmm13 + vaesenc %xmm0, %xmm14, %xmm14 + vaesenc %xmm0, %xmm15, %xmm15 + # aesenc_pclmul_n + vmovdqu -48(%rdx), %xmm1 + vmovdqa 32(%rsp), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vmovdqu 96(%rsi), %xmm0 + vpxor %xmm1, %xmm7, %xmm7 + vaesenc %xmm0, %xmm8, %xmm8 + vaesenc %xmm0, %xmm9, %xmm9 + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc %xmm0, %xmm12, %xmm12 + vaesenc %xmm0, %xmm13, %xmm13 + vaesenc %xmm0, %xmm14, %xmm14 + vaesenc %xmm0, %xmm15, %xmm15 + # aesenc_pclmul_n + vmovdqu -32(%rdx), %xmm1 + vmovdqa 16(%rsp), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vmovdqu 112(%rsi), %xmm0 + vpxor %xmm1, %xmm7, %xmm7 + vaesenc %xmm0, %xmm8, %xmm8 + vaesenc %xmm0, %xmm9, %xmm9 + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc %xmm0, %xmm12, %xmm12 + vaesenc %xmm0, %xmm13, %xmm13 + vaesenc %xmm0, %xmm14, %xmm14 + vaesenc %xmm0, %xmm15, %xmm15 + # aesenc_pclmul_n + vmovdqu -16(%rdx), %xmm1 + vmovdqa (%rsp), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vmovdqu 128(%rsi), %xmm0 + vpxor %xmm1, %xmm7, %xmm7 + vaesenc %xmm0, %xmm8, %xmm8 + vaesenc %xmm0, %xmm9, %xmm9 + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc %xmm0, %xmm12, %xmm12 + vaesenc %xmm0, %xmm13, %xmm13 + vaesenc %xmm0, %xmm14, %xmm14 + vaesenc %xmm0, %xmm15, %xmm15 + # aesenc_pclmul_l + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm4, %xmm6, %xmm6 + vpxor %xmm3, %xmm5, %xmm5 + vpslldq $8, %xmm5, %xmm1 + vpsrldq $8, %xmm5, %xmm5 + vmovdqa 144(%rsi), %xmm4 + vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm0 + vaesenc %xmm4, %xmm8, %xmm8 + vpxor %xmm1, %xmm6, %xmm6 + vpxor %xmm5, %xmm7, %xmm7 + vpclmulqdq $16, %xmm0, %xmm6, %xmm3 + vaesenc %xmm4, %xmm9, %xmm9 + vaesenc %xmm4, %xmm10, %xmm10 + vaesenc %xmm4, %xmm11, %xmm11 + vpshufd $0x4e, %xmm6, %xmm6 + vpxor %xmm3, %xmm6, %xmm6 + vpclmulqdq $16, %xmm0, %xmm6, %xmm3 + vaesenc %xmm4, %xmm12, %xmm12 + vaesenc %xmm4, %xmm13, %xmm13 + vaesenc %xmm4, %xmm14, %xmm14 + vpshufd $0x4e, %xmm6, %xmm6 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm7, %xmm6, %xmm6 + vaesenc %xmm4, %xmm15, %xmm15 + cmpl $11, %r9d + vmovdqa 160(%rsi), %xmm7 + jl L_AES_GCM_encrypt_avx2_aesenc_128_ghash_avx_done + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 176(%rsi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + cmpl $13, %r9d + vmovdqa 192(%rsi), %xmm7 + jl L_AES_GCM_encrypt_avx2_aesenc_128_ghash_avx_done + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 208(%rsi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 224(%rsi), %xmm7 +L_AES_GCM_encrypt_avx2_aesenc_128_ghash_avx_done: + # aesenc_last + vaesenclast %xmm7, %xmm8, %xmm8 + vaesenclast %xmm7, %xmm9, %xmm9 + vaesenclast %xmm7, %xmm10, %xmm10 + vaesenclast %xmm7, %xmm11, %xmm11 + vmovdqu (%rcx), %xmm0 + vmovdqu 16(%rcx), %xmm1 + vmovdqu 32(%rcx), %xmm2 + vmovdqu 48(%rcx), %xmm3 + vpxor %xmm0, %xmm8, %xmm8 + vpxor %xmm1, %xmm9, %xmm9 + vpxor %xmm2, %xmm10, %xmm10 + vpxor %xmm3, %xmm11, %xmm11 + vmovdqu %xmm8, (%rdx) + vmovdqu %xmm9, 16(%rdx) + vmovdqu %xmm10, 32(%rdx) + vmovdqu %xmm11, 48(%rdx) + vaesenclast %xmm7, %xmm12, %xmm12 + vaesenclast %xmm7, %xmm13, %xmm13 + vaesenclast %xmm7, %xmm14, %xmm14 + vaesenclast %xmm7, %xmm15, %xmm15 + vmovdqu 64(%rcx), %xmm0 + vmovdqu 80(%rcx), %xmm1 + vmovdqu 96(%rcx), %xmm2 + vmovdqu 112(%rcx), %xmm3 + vpxor %xmm0, %xmm12, %xmm12 + vpxor %xmm1, %xmm13, %xmm13 + vpxor %xmm2, %xmm14, %xmm14 + vpxor %xmm3, %xmm15, %xmm15 + vmovdqu %xmm12, 64(%rdx) + vmovdqu %xmm13, 80(%rdx) + vmovdqu %xmm14, 96(%rdx) + vmovdqu %xmm15, 112(%rdx) + # aesenc_128_ghash - end + addl $0x80, %ebx + cmpl %r13d, %ebx + jl L_AES_GCM_encrypt_avx2_ghash_128 +L_AES_GCM_encrypt_avx2_end_128: + vmovdqa L_avx2_aes_gcm_bswap_mask(%rip), %xmm4 + vpshufb %xmm4, %xmm8, %xmm8 + vpshufb %xmm4, %xmm9, %xmm9 + vpshufb %xmm4, %xmm10, %xmm10 + vpshufb %xmm4, %xmm11, %xmm11 + vpshufb %xmm4, %xmm12, %xmm12 + vpshufb %xmm4, %xmm13, %xmm13 + vpshufb %xmm4, %xmm14, %xmm14 + vpshufb %xmm4, %xmm15, %xmm15 + vpxor %xmm6, %xmm8, %xmm8 + vmovdqu (%rsp), %xmm7 + vpclmulqdq $16, %xmm15, %xmm7, %xmm5 + vpclmulqdq $0x01, %xmm15, %xmm7, %xmm1 + vpclmulqdq $0x00, %xmm15, %xmm7, %xmm4 + vpclmulqdq $0x11, %xmm15, %xmm7, %xmm6 + vpxor %xmm1, %xmm5, %xmm5 + vmovdqu 16(%rsp), %xmm7 + vpclmulqdq $16, %xmm14, %xmm7, %xmm2 + vpclmulqdq $0x01, %xmm14, %xmm7, %xmm1 + vpclmulqdq $0x00, %xmm14, %xmm7, %xmm0 + vpclmulqdq $0x11, %xmm14, %xmm7, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm0, %xmm4, %xmm4 + vmovdqu 32(%rsp), %xmm15 + vmovdqu 48(%rsp), %xmm7 + vpclmulqdq $16, %xmm13, %xmm15, %xmm2 + vpclmulqdq $0x01, %xmm13, %xmm15, %xmm1 + vpclmulqdq $0x00, %xmm13, %xmm15, %xmm0 + vpclmulqdq $0x11, %xmm13, %xmm15, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm0, %xmm4, %xmm4 + vpclmulqdq $16, %xmm12, %xmm7, %xmm2 + vpclmulqdq $0x01, %xmm12, %xmm7, %xmm1 + vpclmulqdq $0x00, %xmm12, %xmm7, %xmm0 + vpclmulqdq $0x11, %xmm12, %xmm7, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm0, %xmm4, %xmm4 + vmovdqu 64(%rsp), %xmm15 + vmovdqu 80(%rsp), %xmm7 + vpclmulqdq $16, %xmm11, %xmm15, %xmm2 + vpclmulqdq $0x01, %xmm11, %xmm15, %xmm1 + vpclmulqdq $0x00, %xmm11, %xmm15, %xmm0 + vpclmulqdq $0x11, %xmm11, %xmm15, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm0, %xmm4, %xmm4 + vpclmulqdq $16, %xmm10, %xmm7, %xmm2 + vpclmulqdq $0x01, %xmm10, %xmm7, %xmm1 + vpclmulqdq $0x00, %xmm10, %xmm7, %xmm0 + vpclmulqdq $0x11, %xmm10, %xmm7, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm0, %xmm4, %xmm4 + vmovdqu 96(%rsp), %xmm15 + vmovdqu 112(%rsp), %xmm7 + vpclmulqdq $16, %xmm9, %xmm15, %xmm2 + vpclmulqdq $0x01, %xmm9, %xmm15, %xmm1 + vpclmulqdq $0x00, %xmm9, %xmm15, %xmm0 + vpclmulqdq $0x11, %xmm9, %xmm15, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm0, %xmm4, %xmm4 + vpclmulqdq $16, %xmm8, %xmm7, %xmm2 + vpclmulqdq $0x01, %xmm8, %xmm7, %xmm1 + vpclmulqdq $0x00, %xmm8, %xmm7, %xmm0 + vpclmulqdq $0x11, %xmm8, %xmm7, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm0, %xmm4, %xmm4 + vpslldq $8, %xmm5, %xmm7 + vpsrldq $8, %xmm5, %xmm5 + vpxor %xmm7, %xmm4, %xmm4 + vpxor %xmm5, %xmm6, %xmm6 + # ghash_red + vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm2 + vpclmulqdq $16, %xmm2, %xmm4, %xmm0 + vpshufd $0x4e, %xmm4, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm6, %xmm6 + vmovdqa (%rsp), %xmm5 + vmovdqu 128(%rsp), %xmm4 + vmovdqu 144(%rsp), %xmm15 +L_AES_GCM_encrypt_avx2_done_128: + cmpl %r10d, %ebx + je L_AES_GCM_encrypt_avx2_done_enc + movl %r10d, %r13d + andl $0xfffffff0, %r13d + cmpl %r13d, %ebx + jge L_AES_GCM_encrypt_avx2_last_block_done + # aesenc_block + vmovdqa %xmm4, %xmm1 + vpshufb L_avx2_aes_gcm_bswap_epi64(%rip), %xmm1, %xmm0 + vpaddd L_avx2_aes_gcm_one(%rip), %xmm1, %xmm1 + vpxor (%rsi), %xmm0, %xmm0 + vmovdqa 16(%rsi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqa 32(%rsi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqa 48(%rsi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqa 64(%rsi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqa 80(%rsi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqa 96(%rsi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqa 112(%rsi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqa 128(%rsi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqa 144(%rsi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqa %xmm1, %xmm4 + cmpl $11, %r9d + vmovdqa 160(%rsi), %xmm1 + jl L_AES_GCM_encrypt_avx2_aesenc_block_last + vaesenc %xmm1, %xmm0, %xmm0 + vmovdqa 176(%rsi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + cmpl $13, %r9d + vmovdqa 192(%rsi), %xmm1 + jl L_AES_GCM_encrypt_avx2_aesenc_block_last + vaesenc %xmm1, %xmm0, %xmm0 + vmovdqa 208(%rsi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqa 224(%rsi), %xmm1 +L_AES_GCM_encrypt_avx2_aesenc_block_last: + vaesenclast %xmm1, %xmm0, %xmm0 + vmovdqu (%rdi,%rbx,1), %xmm1 + vpxor %xmm1, %xmm0, %xmm0 + vmovdqu %xmm0, (%r8,%rbx,1) + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vpxor %xmm0, %xmm6, %xmm6 + addl $16, %ebx + cmpl %r13d, %ebx + jge L_AES_GCM_encrypt_avx2_last_block_ghash +L_AES_GCM_encrypt_avx2_last_block_start: + vmovdqu (%rdi,%rbx,1), %xmm12 + vpshufb L_avx2_aes_gcm_bswap_epi64(%rip), %xmm4, %xmm11 + vpaddd L_avx2_aes_gcm_one(%rip), %xmm4, %xmm4 + # aesenc_gfmul_sb + vpclmulqdq $0x01, %xmm5, %xmm6, %xmm2 + vpclmulqdq $16, %xmm5, %xmm6, %xmm3 + vpclmulqdq $0x00, %xmm5, %xmm6, %xmm1 + vpclmulqdq $0x11, %xmm5, %xmm6, %xmm8 + vpxor (%rsi), %xmm11, %xmm11 + vaesenc 16(%rsi), %xmm11, %xmm11 + vpxor %xmm2, %xmm3, %xmm3 + vpslldq $8, %xmm3, %xmm2 + vpsrldq $8, %xmm3, %xmm3 + vaesenc 32(%rsi), %xmm11, %xmm11 + vpxor %xmm1, %xmm2, %xmm2 + vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm2, %xmm1 + vaesenc 48(%rsi), %xmm11, %xmm11 + vaesenc 64(%rsi), %xmm11, %xmm11 + vaesenc 80(%rsi), %xmm11, %xmm11 + vpshufd $0x4e, %xmm2, %xmm2 + vpxor %xmm1, %xmm2, %xmm2 + vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm2, %xmm1 + vaesenc 96(%rsi), %xmm11, %xmm11 + vaesenc 112(%rsi), %xmm11, %xmm11 + vaesenc 128(%rsi), %xmm11, %xmm11 + vpshufd $0x4e, %xmm2, %xmm2 + vaesenc 144(%rsi), %xmm11, %xmm11 + vpxor %xmm3, %xmm8, %xmm8 + vpxor %xmm8, %xmm2, %xmm2 + vmovdqa 160(%rsi), %xmm0 + cmpl $11, %r9d + jl L_AES_GCM_encrypt_avx2_aesenc_gfmul_sb_last + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc 176(%rsi), %xmm11, %xmm11 + vmovdqa 192(%rsi), %xmm0 + cmpl $13, %r9d + jl L_AES_GCM_encrypt_avx2_aesenc_gfmul_sb_last + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc 208(%rsi), %xmm11, %xmm11 + vmovdqa 224(%rsi), %xmm0 +L_AES_GCM_encrypt_avx2_aesenc_gfmul_sb_last: + vaesenclast %xmm0, %xmm11, %xmm11 + vpxor %xmm1, %xmm2, %xmm6 + vpxor %xmm12, %xmm11, %xmm11 + vmovdqu %xmm11, (%r8,%rbx,1) + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm11, %xmm11 + vpxor %xmm11, %xmm6, %xmm6 + addl $16, %ebx + cmpl %r13d, %ebx + jl L_AES_GCM_encrypt_avx2_last_block_start +L_AES_GCM_encrypt_avx2_last_block_ghash: + # ghash_gfmul_red + vpclmulqdq $16, %xmm5, %xmm6, %xmm10 + vpclmulqdq $0x01, %xmm5, %xmm6, %xmm9 + vpclmulqdq $0x00, %xmm5, %xmm6, %xmm8 + vpxor %xmm9, %xmm10, %xmm10 + vpslldq $8, %xmm10, %xmm9 + vpsrldq $8, %xmm10, %xmm10 + vpxor %xmm8, %xmm9, %xmm9 + vpclmulqdq $0x11, %xmm5, %xmm6, %xmm6 + vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm9, %xmm8 + vpshufd $0x4e, %xmm9, %xmm9 + vpxor %xmm8, %xmm9, %xmm9 + vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm9, %xmm8 + vpshufd $0x4e, %xmm9, %xmm9 + vpxor %xmm10, %xmm6, %xmm6 + vpxor %xmm9, %xmm6, %xmm6 + vpxor %xmm8, %xmm6, %xmm6 +L_AES_GCM_encrypt_avx2_last_block_done: + movl %r10d, %ecx + movl %r10d, %edx + andl $15, %ecx + jz L_AES_GCM_encrypt_avx2_done_enc + # aesenc_last15_enc + vpshufb L_avx2_aes_gcm_bswap_epi64(%rip), %xmm4, %xmm4 + vpxor (%rsi), %xmm4, %xmm4 + vaesenc 16(%rsi), %xmm4, %xmm4 + vaesenc 32(%rsi), %xmm4, %xmm4 + vaesenc 48(%rsi), %xmm4, %xmm4 + vaesenc 64(%rsi), %xmm4, %xmm4 + vaesenc 80(%rsi), %xmm4, %xmm4 + vaesenc 96(%rsi), %xmm4, %xmm4 + vaesenc 112(%rsi), %xmm4, %xmm4 + vaesenc 128(%rsi), %xmm4, %xmm4 + vaesenc 144(%rsi), %xmm4, %xmm4 + cmpl $11, %r9d + vmovdqa 160(%rsi), %xmm0 + jl L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_aesenc_avx_last + vaesenc %xmm0, %xmm4, %xmm4 + vaesenc 176(%rsi), %xmm4, %xmm4 + cmpl $13, %r9d + vmovdqa 192(%rsi), %xmm0 + jl L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_aesenc_avx_last + vaesenc %xmm0, %xmm4, %xmm4 + vaesenc 208(%rsi), %xmm4, %xmm4 + vmovdqa 224(%rsi), %xmm0 +L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_aesenc_avx_last: + vaesenclast %xmm0, %xmm4, %xmm4 + xorl %ecx, %ecx + vpxor %xmm0, %xmm0, %xmm0 + vmovdqa %xmm4, (%rsp) + vmovdqa %xmm0, 16(%rsp) +L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_loop: + movzbl (%rdi,%rbx,1), %r13d + xorb (%rsp,%rcx,1), %r13b + movb %r13b, 16(%rsp,%rcx,1) + movb %r13b, (%r8,%rbx,1) + incl %ebx + incl %ecx + cmpl %edx, %ebx + jl L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_loop +L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_finish_enc: + vmovdqa 16(%rsp), %xmm4 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm4, %xmm4 + vpxor %xmm4, %xmm6, %xmm6 + # ghash_gfmul_red + vpclmulqdq $16, %xmm5, %xmm6, %xmm2 + vpclmulqdq $0x01, %xmm5, %xmm6, %xmm1 + vpclmulqdq $0x00, %xmm5, %xmm6, %xmm0 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $0x11, %xmm5, %xmm6, %xmm6 + vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm2, %xmm6, %xmm6 + vpxor %xmm1, %xmm6, %xmm6 + vpxor %xmm0, %xmm6, %xmm6 +L_AES_GCM_encrypt_avx2_done_enc: + # calc_tag + shlq $3, %r10 + vpinsrq $0x00, %r10, %xmm0, %xmm0 + shlq $3, %r11 + vpinsrq $0x01, %r11, %xmm1, %xmm1 + vpblendd $12, %xmm1, %xmm0, %xmm0 + vpxor %xmm6, %xmm0, %xmm0 + # ghash_gfmul_red + vpclmulqdq $16, %xmm5, %xmm0, %xmm4 + vpclmulqdq $0x01, %xmm5, %xmm0, %xmm3 + vpclmulqdq $0x00, %xmm5, %xmm0, %xmm2 + vpxor %xmm3, %xmm4, %xmm4 + vpslldq $8, %xmm4, %xmm3 + vpsrldq $8, %xmm4, %xmm4 + vpxor %xmm2, %xmm3, %xmm3 + vpclmulqdq $0x11, %xmm5, %xmm0, %xmm0 + vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm3, %xmm2 + vpshufd $0x4e, %xmm3, %xmm3 + vpxor %xmm2, %xmm3, %xmm3 + vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm3, %xmm2 + vpshufd $0x4e, %xmm3, %xmm3 + vpxor %xmm4, %xmm0, %xmm0 + vpxor %xmm3, %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vpxor %xmm15, %xmm0, %xmm0 + # store_tag + cmpl $16, %r14d + je L_AES_GCM_encrypt_avx2_store_tag_16 + xorq %rcx, %rcx + vmovdqa %xmm0, (%rsp) +L_AES_GCM_encrypt_avx2_store_tag_loop: + movzbl (%rsp,%rcx,1), %r13d + movb %r13b, (%r15,%rcx,1) + incl %ecx + cmpl %r14d, %ecx + jne L_AES_GCM_encrypt_avx2_store_tag_loop + jmp L_AES_GCM_encrypt_avx2_store_tag_done +L_AES_GCM_encrypt_avx2_store_tag_16: + vmovdqu %xmm0, (%r15) +L_AES_GCM_encrypt_avx2_store_tag_done: + vzeroupper + addq $0xa0, %rsp + popq %r14 + popq %rbx + popq %r15 + popq %r12 + popq %r13 + repz retq +#ifndef __APPLE__ +.size AES_GCM_encrypt_avx2,.-AES_GCM_encrypt_avx2 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl AES_GCM_decrypt_avx2 +.type AES_GCM_decrypt_avx2,@function +.align 4 +AES_GCM_decrypt_avx2: +#else +.section __TEXT,__text +.globl _AES_GCM_decrypt_avx2 +.p2align 2 +_AES_GCM_decrypt_avx2: +#endif /* __APPLE__ */ + pushq %r13 + pushq %r12 + pushq %r14 + pushq %rbx + pushq %r15 + pushq %rbp + movq %rdx, %r12 + movq %rcx, %rax + movq %r8, %r14 + movq %rsi, %r8 + movl %r9d, %r10d + movl 56(%rsp), %r11d + movl 64(%rsp), %ebx + movl 72(%rsp), %r15d + movq 80(%rsp), %rsi + movl 88(%rsp), %r9d + movq 96(%rsp), %rbp + subq $0xa8, %rsp + vpxor %xmm4, %xmm4, %xmm4 + vpxor %xmm6, %xmm6, %xmm6 + movl %ebx, %edx + cmpl $12, %edx + je L_AES_GCM_decrypt_avx2_iv_12 + # Calculate values when IV is not 12 bytes + # H = Encrypt X(=0) + vmovdqa (%rsi), %xmm5 + vaesenc 16(%rsi), %xmm5, %xmm5 + vaesenc 32(%rsi), %xmm5, %xmm5 + vaesenc 48(%rsi), %xmm5, %xmm5 + vaesenc 64(%rsi), %xmm5, %xmm5 + vaesenc 80(%rsi), %xmm5, %xmm5 + vaesenc 96(%rsi), %xmm5, %xmm5 + vaesenc 112(%rsi), %xmm5, %xmm5 + vaesenc 128(%rsi), %xmm5, %xmm5 + vaesenc 144(%rsi), %xmm5, %xmm5 + cmpl $11, %r9d + vmovdqa 160(%rsi), %xmm0 + jl L_AES_GCM_decrypt_avx2_calc_iv_1_aesenc_avx_last + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc 176(%rsi), %xmm5, %xmm5 + cmpl $13, %r9d + vmovdqa 192(%rsi), %xmm0 + jl L_AES_GCM_decrypt_avx2_calc_iv_1_aesenc_avx_last + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc 208(%rsi), %xmm5, %xmm5 + vmovdqa 224(%rsi), %xmm0 +L_AES_GCM_decrypt_avx2_calc_iv_1_aesenc_avx_last: + vaesenclast %xmm0, %xmm5, %xmm5 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm5, %xmm5 + # Calc counter + # Initialization vector + cmpl $0x00, %edx + movq $0x00, %rcx + je L_AES_GCM_decrypt_avx2_calc_iv_done + cmpl $16, %edx + jl L_AES_GCM_decrypt_avx2_calc_iv_lt16 + andl $0xfffffff0, %edx +L_AES_GCM_decrypt_avx2_calc_iv_16_loop: + vmovdqu (%rax,%rcx,1), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + # ghash_gfmul_avx + vpclmulqdq $16, %xmm4, %xmm5, %xmm2 + vpclmulqdq $0x01, %xmm4, %xmm5, %xmm1 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm1, %xmm0, %xmm7 + vpxor %xmm2, %xmm3, %xmm4 + # ghash_mid + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm4, %xmm4 + # ghash_red + vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm2 + vpclmulqdq $16, %xmm2, %xmm7, %xmm0 + vpshufd $0x4e, %xmm7, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm4, %xmm4 + addl $16, %ecx + cmpl %edx, %ecx + jl L_AES_GCM_decrypt_avx2_calc_iv_16_loop + movl %ebx, %edx + cmpl %edx, %ecx + je L_AES_GCM_decrypt_avx2_calc_iv_done +L_AES_GCM_decrypt_avx2_calc_iv_lt16: + vpxor %xmm0, %xmm0, %xmm0 + xorl %ebx, %ebx + vmovdqa %xmm0, (%rsp) +L_AES_GCM_decrypt_avx2_calc_iv_loop: + movzbl (%rax,%rcx,1), %r13d + movb %r13b, (%rsp,%rbx,1) + incl %ecx + incl %ebx + cmpl %edx, %ecx + jl L_AES_GCM_decrypt_avx2_calc_iv_loop + vmovdqa (%rsp), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + # ghash_gfmul_avx + vpclmulqdq $16, %xmm4, %xmm5, %xmm2 + vpclmulqdq $0x01, %xmm4, %xmm5, %xmm1 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm1, %xmm0, %xmm7 + vpxor %xmm2, %xmm3, %xmm4 + # ghash_mid + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm4, %xmm4 + # ghash_red + vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm2 + vpclmulqdq $16, %xmm2, %xmm7, %xmm0 + vpshufd $0x4e, %xmm7, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm4, %xmm4 +L_AES_GCM_decrypt_avx2_calc_iv_done: + # T = Encrypt counter + vpxor %xmm0, %xmm0, %xmm0 + shll $3, %edx + vpinsrq $0x00, %rdx, %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + # ghash_gfmul_avx + vpclmulqdq $16, %xmm4, %xmm5, %xmm2 + vpclmulqdq $0x01, %xmm4, %xmm5, %xmm1 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm1, %xmm0, %xmm7 + vpxor %xmm2, %xmm3, %xmm4 + # ghash_mid + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm4, %xmm4 + # ghash_red + vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm2 + vpclmulqdq $16, %xmm2, %xmm7, %xmm0 + vpshufd $0x4e, %xmm7, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm4, %xmm4 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm4, %xmm4 + # Encrypt counter + vmovdqa (%rsi), %xmm15 + vpxor %xmm4, %xmm15, %xmm15 + vaesenc 16(%rsi), %xmm15, %xmm15 + vaesenc 32(%rsi), %xmm15, %xmm15 + vaesenc 48(%rsi), %xmm15, %xmm15 + vaesenc 64(%rsi), %xmm15, %xmm15 + vaesenc 80(%rsi), %xmm15, %xmm15 + vaesenc 96(%rsi), %xmm15, %xmm15 + vaesenc 112(%rsi), %xmm15, %xmm15 + vaesenc 128(%rsi), %xmm15, %xmm15 + vaesenc 144(%rsi), %xmm15, %xmm15 + cmpl $11, %r9d + vmovdqa 160(%rsi), %xmm0 + jl L_AES_GCM_decrypt_avx2_calc_iv_2_aesenc_avx_last + vaesenc %xmm0, %xmm15, %xmm15 + vaesenc 176(%rsi), %xmm15, %xmm15 + cmpl $13, %r9d + vmovdqa 192(%rsi), %xmm0 + jl L_AES_GCM_decrypt_avx2_calc_iv_2_aesenc_avx_last + vaesenc %xmm0, %xmm15, %xmm15 + vaesenc 208(%rsi), %xmm15, %xmm15 + vmovdqa 224(%rsi), %xmm0 +L_AES_GCM_decrypt_avx2_calc_iv_2_aesenc_avx_last: + vaesenclast %xmm0, %xmm15, %xmm15 + jmp L_AES_GCM_decrypt_avx2_iv_done +L_AES_GCM_decrypt_avx2_iv_12: + # # Calculate values when IV is 12 bytes + # Set counter based on IV + vmovdqa L_avx2_aes_gcm_bswap_one(%rip), %xmm4 + vmovdqa (%rsi), %xmm5 + vpblendd $7, (%rax), %xmm4, %xmm4 + # H = Encrypt X(=0) and T = Encrypt counter + vmovdqa 16(%rsi), %xmm7 + vpxor %xmm5, %xmm4, %xmm15 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 32(%rsi), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm15, %xmm15 + vmovdqa 48(%rsi), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm15, %xmm15 + vmovdqa 64(%rsi), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm15, %xmm15 + vmovdqa 80(%rsi), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm15, %xmm15 + vmovdqa 96(%rsi), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm15, %xmm15 + vmovdqa 112(%rsi), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm15, %xmm15 + vmovdqa 128(%rsi), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm15, %xmm15 + vmovdqa 144(%rsi), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm15, %xmm15 + cmpl $11, %r9d + vmovdqa 160(%rsi), %xmm0 + jl L_AES_GCM_decrypt_avx2_calc_iv_12_last + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm15, %xmm15 + vmovdqa 176(%rsi), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm15, %xmm15 + cmpl $13, %r9d + vmovdqa 192(%rsi), %xmm0 + jl L_AES_GCM_decrypt_avx2_calc_iv_12_last + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm15, %xmm15 + vmovdqa 208(%rsi), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm15, %xmm15 + vmovdqa 224(%rsi), %xmm0 +L_AES_GCM_decrypt_avx2_calc_iv_12_last: + vaesenclast %xmm0, %xmm5, %xmm5 + vaesenclast %xmm0, %xmm15, %xmm15 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm5, %xmm5 +L_AES_GCM_decrypt_avx2_iv_done: + # Additional authentication data + movl %r11d, %edx + cmpl $0x00, %edx + je L_AES_GCM_decrypt_avx2_calc_aad_done + xorl %ecx, %ecx + cmpl $16, %edx + jl L_AES_GCM_decrypt_avx2_calc_aad_lt16 + andl $0xfffffff0, %edx +L_AES_GCM_decrypt_avx2_calc_aad_16_loop: + vmovdqu (%r12,%rcx,1), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vpxor %xmm0, %xmm6, %xmm6 + # ghash_gfmul_avx + vpclmulqdq $16, %xmm6, %xmm5, %xmm2 + vpclmulqdq $0x01, %xmm6, %xmm5, %xmm1 + vpclmulqdq $0x00, %xmm6, %xmm5, %xmm0 + vpclmulqdq $0x11, %xmm6, %xmm5, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm1, %xmm0, %xmm7 + vpxor %xmm2, %xmm3, %xmm6 + # ghash_mid + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm6, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm6, %xmm6 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm6, %xmm6 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm6, %xmm6 + # ghash_red + vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm2 + vpclmulqdq $16, %xmm2, %xmm7, %xmm0 + vpshufd $0x4e, %xmm7, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm6, %xmm6 + addl $16, %ecx + cmpl %edx, %ecx + jl L_AES_GCM_decrypt_avx2_calc_aad_16_loop + movl %r11d, %edx + cmpl %edx, %ecx + je L_AES_GCM_decrypt_avx2_calc_aad_done +L_AES_GCM_decrypt_avx2_calc_aad_lt16: + vpxor %xmm0, %xmm0, %xmm0 + xorl %ebx, %ebx + vmovdqa %xmm0, (%rsp) +L_AES_GCM_decrypt_avx2_calc_aad_loop: + movzbl (%r12,%rcx,1), %r13d + movb %r13b, (%rsp,%rbx,1) + incl %ecx + incl %ebx + cmpl %edx, %ecx + jl L_AES_GCM_decrypt_avx2_calc_aad_loop + vmovdqa (%rsp), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vpxor %xmm0, %xmm6, %xmm6 + # ghash_gfmul_avx + vpclmulqdq $16, %xmm6, %xmm5, %xmm2 + vpclmulqdq $0x01, %xmm6, %xmm5, %xmm1 + vpclmulqdq $0x00, %xmm6, %xmm5, %xmm0 + vpclmulqdq $0x11, %xmm6, %xmm5, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm1, %xmm0, %xmm7 + vpxor %xmm2, %xmm3, %xmm6 + # ghash_mid + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm6, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm6, %xmm6 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm6, %xmm6 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm6, %xmm6 + # ghash_red + vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm2 + vpclmulqdq $16, %xmm2, %xmm7, %xmm0 + vpshufd $0x4e, %xmm7, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm6, %xmm6 +L_AES_GCM_decrypt_avx2_calc_aad_done: + # Calculate counter and H + vpsrlq $63, %xmm5, %xmm1 + vpsllq $0x01, %xmm5, %xmm0 + vpslldq $8, %xmm1, %xmm1 + vpor %xmm1, %xmm0, %xmm0 + vpshufd $0xff, %xmm5, %xmm5 + vpsrad $31, %xmm5, %xmm5 + vpshufb L_avx2_aes_gcm_bswap_epi64(%rip), %xmm4, %xmm4 + vpand L_avx2_aes_gcm_mod2_128(%rip), %xmm5, %xmm5 + vpaddd L_avx2_aes_gcm_one(%rip), %xmm4, %xmm4 + vpxor %xmm0, %xmm5, %xmm5 + xorl %ebx, %ebx + cmpl $0x80, %r10d + movl %r10d, %r13d + jl L_AES_GCM_decrypt_avx2_done_128 + andl $0xffffff80, %r13d + vmovdqa %xmm4, 128(%rsp) + vmovdqa %xmm15, 144(%rsp) + vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm3 + # H ^ 1 and H ^ 2 + vpclmulqdq $0x00, %xmm5, %xmm5, %xmm9 + vpclmulqdq $0x11, %xmm5, %xmm5, %xmm10 + vpclmulqdq $16, %xmm3, %xmm9, %xmm8 + vpshufd $0x4e, %xmm9, %xmm9 + vpxor %xmm8, %xmm9, %xmm9 + vpclmulqdq $16, %xmm3, %xmm9, %xmm8 + vpshufd $0x4e, %xmm9, %xmm9 + vpxor %xmm8, %xmm9, %xmm9 + vpxor %xmm9, %xmm10, %xmm0 + vmovdqa %xmm5, (%rsp) + vmovdqa %xmm0, 16(%rsp) + # H ^ 3 and H ^ 4 + vpclmulqdq $16, %xmm5, %xmm0, %xmm11 + vpclmulqdq $0x01, %xmm5, %xmm0, %xmm10 + vpclmulqdq $0x00, %xmm5, %xmm0, %xmm9 + vpclmulqdq $0x11, %xmm5, %xmm0, %xmm12 + vpclmulqdq $0x00, %xmm0, %xmm0, %xmm13 + vpclmulqdq $0x11, %xmm0, %xmm0, %xmm14 + vpxor %xmm10, %xmm11, %xmm11 + vpslldq $8, %xmm11, %xmm10 + vpsrldq $8, %xmm11, %xmm11 + vpxor %xmm9, %xmm10, %xmm10 + vpclmulqdq $16, %xmm3, %xmm13, %xmm8 + vpclmulqdq $16, %xmm3, %xmm10, %xmm9 + vpshufd $0x4e, %xmm10, %xmm10 + vpshufd $0x4e, %xmm13, %xmm13 + vpxor %xmm9, %xmm10, %xmm10 + vpxor %xmm8, %xmm13, %xmm13 + vpclmulqdq $16, %xmm3, %xmm10, %xmm9 + vpclmulqdq $16, %xmm3, %xmm13, %xmm8 + vpshufd $0x4e, %xmm10, %xmm10 + vpshufd $0x4e, %xmm13, %xmm13 + vpxor %xmm11, %xmm12, %xmm12 + vpxor %xmm8, %xmm13, %xmm13 + vpxor %xmm12, %xmm10, %xmm10 + vpxor %xmm14, %xmm13, %xmm2 + vpxor %xmm9, %xmm10, %xmm1 + vmovdqa %xmm1, 32(%rsp) + vmovdqa %xmm2, 48(%rsp) + # H ^ 5 and H ^ 6 + vpclmulqdq $16, %xmm0, %xmm1, %xmm11 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm10 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm9 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm12 + vpclmulqdq $0x00, %xmm1, %xmm1, %xmm13 + vpclmulqdq $0x11, %xmm1, %xmm1, %xmm14 + vpxor %xmm10, %xmm11, %xmm11 + vpslldq $8, %xmm11, %xmm10 + vpsrldq $8, %xmm11, %xmm11 + vpxor %xmm9, %xmm10, %xmm10 + vpclmulqdq $16, %xmm3, %xmm13, %xmm8 + vpclmulqdq $16, %xmm3, %xmm10, %xmm9 + vpshufd $0x4e, %xmm10, %xmm10 + vpshufd $0x4e, %xmm13, %xmm13 + vpxor %xmm9, %xmm10, %xmm10 + vpxor %xmm8, %xmm13, %xmm13 + vpclmulqdq $16, %xmm3, %xmm10, %xmm9 + vpclmulqdq $16, %xmm3, %xmm13, %xmm8 + vpshufd $0x4e, %xmm10, %xmm10 + vpshufd $0x4e, %xmm13, %xmm13 + vpxor %xmm11, %xmm12, %xmm12 + vpxor %xmm8, %xmm13, %xmm13 + vpxor %xmm12, %xmm10, %xmm10 + vpxor %xmm14, %xmm13, %xmm0 + vpxor %xmm9, %xmm10, %xmm7 + vmovdqa %xmm7, 64(%rsp) + vmovdqa %xmm0, 80(%rsp) + # H ^ 7 and H ^ 8 + vpclmulqdq $16, %xmm1, %xmm2, %xmm11 + vpclmulqdq $0x01, %xmm1, %xmm2, %xmm10 + vpclmulqdq $0x00, %xmm1, %xmm2, %xmm9 + vpclmulqdq $0x11, %xmm1, %xmm2, %xmm12 + vpclmulqdq $0x00, %xmm2, %xmm2, %xmm13 + vpclmulqdq $0x11, %xmm2, %xmm2, %xmm14 + vpxor %xmm10, %xmm11, %xmm11 + vpslldq $8, %xmm11, %xmm10 + vpsrldq $8, %xmm11, %xmm11 + vpxor %xmm9, %xmm10, %xmm10 + vpclmulqdq $16, %xmm3, %xmm13, %xmm8 + vpclmulqdq $16, %xmm3, %xmm10, %xmm9 + vpshufd $0x4e, %xmm10, %xmm10 + vpshufd $0x4e, %xmm13, %xmm13 + vpxor %xmm9, %xmm10, %xmm10 + vpxor %xmm8, %xmm13, %xmm13 + vpclmulqdq $16, %xmm3, %xmm10, %xmm9 + vpclmulqdq $16, %xmm3, %xmm13, %xmm8 + vpshufd $0x4e, %xmm10, %xmm10 + vpshufd $0x4e, %xmm13, %xmm13 + vpxor %xmm11, %xmm12, %xmm12 + vpxor %xmm8, %xmm13, %xmm13 + vpxor %xmm12, %xmm10, %xmm10 + vpxor %xmm14, %xmm13, %xmm0 + vpxor %xmm9, %xmm10, %xmm7 + vmovdqa %xmm7, 96(%rsp) + vmovdqa %xmm0, 112(%rsp) +L_AES_GCM_decrypt_avx2_ghash_128: + # aesenc_128_ghash + leaq (%rdi,%rbx,1), %rcx + leaq (%r8,%rbx,1), %rdx + # aesenc_ctr + vmovdqa 128(%rsp), %xmm0 + vmovdqa L_avx2_aes_gcm_bswap_epi64(%rip), %xmm1 + vpaddd L_avx2_aes_gcm_one(%rip), %xmm0, %xmm9 + vpshufb %xmm1, %xmm0, %xmm8 + vpaddd L_avx2_aes_gcm_two(%rip), %xmm0, %xmm10 + vpshufb %xmm1, %xmm9, %xmm9 + vpaddd L_avx2_aes_gcm_three(%rip), %xmm0, %xmm11 + vpshufb %xmm1, %xmm10, %xmm10 + vpaddd L_avx2_aes_gcm_four(%rip), %xmm0, %xmm12 + vpshufb %xmm1, %xmm11, %xmm11 + vpaddd L_avx2_aes_gcm_five(%rip), %xmm0, %xmm13 + vpshufb %xmm1, %xmm12, %xmm12 + vpaddd L_avx2_aes_gcm_six(%rip), %xmm0, %xmm14 + vpshufb %xmm1, %xmm13, %xmm13 + vpaddd L_avx2_aes_gcm_seven(%rip), %xmm0, %xmm15 + vpshufb %xmm1, %xmm14, %xmm14 + vpaddd L_avx2_aes_gcm_eight(%rip), %xmm0, %xmm0 + vpshufb %xmm1, %xmm15, %xmm15 + # aesenc_xor + vmovdqa (%rsi), %xmm7 + vmovdqa %xmm0, 128(%rsp) + vpxor %xmm7, %xmm8, %xmm8 + vpxor %xmm7, %xmm9, %xmm9 + vpxor %xmm7, %xmm10, %xmm10 + vpxor %xmm7, %xmm11, %xmm11 + vpxor %xmm7, %xmm12, %xmm12 + vpxor %xmm7, %xmm13, %xmm13 + vpxor %xmm7, %xmm14, %xmm14 + vpxor %xmm7, %xmm15, %xmm15 + # aesenc_pclmul_1 + vmovdqu (%rcx), %xmm1 + vmovdqu 16(%rsi), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1 + vmovdqa 112(%rsp), %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm5 + vpclmulqdq $0x01, %xmm2, %xmm1, %xmm3 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm6 + vpclmulqdq $0x11, %xmm2, %xmm1, %xmm7 + vaesenc %xmm0, %xmm8, %xmm8 + vaesenc %xmm0, %xmm9, %xmm9 + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc %xmm0, %xmm12, %xmm12 + vaesenc %xmm0, %xmm13, %xmm13 + vaesenc %xmm0, %xmm14, %xmm14 + vaesenc %xmm0, %xmm15, %xmm15 + # aesenc_pclmul_2 + vmovdqu 16(%rcx), %xmm1 + vmovdqa 96(%rsp), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vmovdqu 32(%rsi), %xmm0 + vpxor %xmm1, %xmm7, %xmm7 + vaesenc %xmm0, %xmm8, %xmm8 + vaesenc %xmm0, %xmm9, %xmm9 + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc %xmm0, %xmm12, %xmm12 + vaesenc %xmm0, %xmm13, %xmm13 + vaesenc %xmm0, %xmm14, %xmm14 + vaesenc %xmm0, %xmm15, %xmm15 + # aesenc_pclmul_n + vmovdqu 32(%rcx), %xmm1 + vmovdqa 80(%rsp), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vmovdqu 48(%rsi), %xmm0 + vpxor %xmm1, %xmm7, %xmm7 + vaesenc %xmm0, %xmm8, %xmm8 + vaesenc %xmm0, %xmm9, %xmm9 + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc %xmm0, %xmm12, %xmm12 + vaesenc %xmm0, %xmm13, %xmm13 + vaesenc %xmm0, %xmm14, %xmm14 + vaesenc %xmm0, %xmm15, %xmm15 + # aesenc_pclmul_n + vmovdqu 48(%rcx), %xmm1 + vmovdqa 64(%rsp), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vmovdqu 64(%rsi), %xmm0 + vpxor %xmm1, %xmm7, %xmm7 + vaesenc %xmm0, %xmm8, %xmm8 + vaesenc %xmm0, %xmm9, %xmm9 + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc %xmm0, %xmm12, %xmm12 + vaesenc %xmm0, %xmm13, %xmm13 + vaesenc %xmm0, %xmm14, %xmm14 + vaesenc %xmm0, %xmm15, %xmm15 + # aesenc_pclmul_n + vmovdqu 64(%rcx), %xmm1 + vmovdqa 48(%rsp), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vmovdqu 80(%rsi), %xmm0 + vpxor %xmm1, %xmm7, %xmm7 + vaesenc %xmm0, %xmm8, %xmm8 + vaesenc %xmm0, %xmm9, %xmm9 + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc %xmm0, %xmm12, %xmm12 + vaesenc %xmm0, %xmm13, %xmm13 + vaesenc %xmm0, %xmm14, %xmm14 + vaesenc %xmm0, %xmm15, %xmm15 + # aesenc_pclmul_n + vmovdqu 80(%rcx), %xmm1 + vmovdqa 32(%rsp), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vmovdqu 96(%rsi), %xmm0 + vpxor %xmm1, %xmm7, %xmm7 + vaesenc %xmm0, %xmm8, %xmm8 + vaesenc %xmm0, %xmm9, %xmm9 + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc %xmm0, %xmm12, %xmm12 + vaesenc %xmm0, %xmm13, %xmm13 + vaesenc %xmm0, %xmm14, %xmm14 + vaesenc %xmm0, %xmm15, %xmm15 + # aesenc_pclmul_n + vmovdqu 96(%rcx), %xmm1 + vmovdqa 16(%rsp), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vmovdqu 112(%rsi), %xmm0 + vpxor %xmm1, %xmm7, %xmm7 + vaesenc %xmm0, %xmm8, %xmm8 + vaesenc %xmm0, %xmm9, %xmm9 + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc %xmm0, %xmm12, %xmm12 + vaesenc %xmm0, %xmm13, %xmm13 + vaesenc %xmm0, %xmm14, %xmm14 + vaesenc %xmm0, %xmm15, %xmm15 + # aesenc_pclmul_n + vmovdqu 112(%rcx), %xmm1 + vmovdqa (%rsp), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vmovdqu 128(%rsi), %xmm0 + vpxor %xmm1, %xmm7, %xmm7 + vaesenc %xmm0, %xmm8, %xmm8 + vaesenc %xmm0, %xmm9, %xmm9 + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc %xmm0, %xmm12, %xmm12 + vaesenc %xmm0, %xmm13, %xmm13 + vaesenc %xmm0, %xmm14, %xmm14 + vaesenc %xmm0, %xmm15, %xmm15 + # aesenc_pclmul_l + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm4, %xmm6, %xmm6 + vpxor %xmm3, %xmm5, %xmm5 + vpslldq $8, %xmm5, %xmm1 + vpsrldq $8, %xmm5, %xmm5 + vmovdqa 144(%rsi), %xmm4 + vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm0 + vaesenc %xmm4, %xmm8, %xmm8 + vpxor %xmm1, %xmm6, %xmm6 + vpxor %xmm5, %xmm7, %xmm7 + vpclmulqdq $16, %xmm0, %xmm6, %xmm3 + vaesenc %xmm4, %xmm9, %xmm9 + vaesenc %xmm4, %xmm10, %xmm10 + vaesenc %xmm4, %xmm11, %xmm11 + vpshufd $0x4e, %xmm6, %xmm6 + vpxor %xmm3, %xmm6, %xmm6 + vpclmulqdq $16, %xmm0, %xmm6, %xmm3 + vaesenc %xmm4, %xmm12, %xmm12 + vaesenc %xmm4, %xmm13, %xmm13 + vaesenc %xmm4, %xmm14, %xmm14 + vpshufd $0x4e, %xmm6, %xmm6 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm7, %xmm6, %xmm6 + vaesenc %xmm4, %xmm15, %xmm15 + cmpl $11, %r9d + vmovdqa 160(%rsi), %xmm7 + jl L_AES_GCM_decrypt_avx2_aesenc_128_ghash_avx_done + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 176(%rsi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + cmpl $13, %r9d + vmovdqa 192(%rsi), %xmm7 + jl L_AES_GCM_decrypt_avx2_aesenc_128_ghash_avx_done + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 208(%rsi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 224(%rsi), %xmm7 +L_AES_GCM_decrypt_avx2_aesenc_128_ghash_avx_done: + # aesenc_last + vaesenclast %xmm7, %xmm8, %xmm8 + vaesenclast %xmm7, %xmm9, %xmm9 + vaesenclast %xmm7, %xmm10, %xmm10 + vaesenclast %xmm7, %xmm11, %xmm11 + vmovdqu (%rcx), %xmm0 + vmovdqu 16(%rcx), %xmm1 + vmovdqu 32(%rcx), %xmm2 + vmovdqu 48(%rcx), %xmm3 + vpxor %xmm0, %xmm8, %xmm8 + vpxor %xmm1, %xmm9, %xmm9 + vpxor %xmm2, %xmm10, %xmm10 + vpxor %xmm3, %xmm11, %xmm11 + vmovdqu %xmm8, (%rdx) + vmovdqu %xmm9, 16(%rdx) + vmovdqu %xmm10, 32(%rdx) + vmovdqu %xmm11, 48(%rdx) + vaesenclast %xmm7, %xmm12, %xmm12 + vaesenclast %xmm7, %xmm13, %xmm13 + vaesenclast %xmm7, %xmm14, %xmm14 + vaesenclast %xmm7, %xmm15, %xmm15 + vmovdqu 64(%rcx), %xmm0 + vmovdqu 80(%rcx), %xmm1 + vmovdqu 96(%rcx), %xmm2 + vmovdqu 112(%rcx), %xmm3 + vpxor %xmm0, %xmm12, %xmm12 + vpxor %xmm1, %xmm13, %xmm13 + vpxor %xmm2, %xmm14, %xmm14 + vpxor %xmm3, %xmm15, %xmm15 + vmovdqu %xmm12, 64(%rdx) + vmovdqu %xmm13, 80(%rdx) + vmovdqu %xmm14, 96(%rdx) + vmovdqu %xmm15, 112(%rdx) + # aesenc_128_ghash - end + addl $0x80, %ebx + cmpl %r13d, %ebx + jl L_AES_GCM_decrypt_avx2_ghash_128 + vmovdqa (%rsp), %xmm5 + vmovdqa 128(%rsp), %xmm4 + vmovdqa 144(%rsp), %xmm15 +L_AES_GCM_decrypt_avx2_done_128: + cmpl %r10d, %ebx + jge L_AES_GCM_decrypt_avx2_done_dec + movl %r10d, %r13d + andl $0xfffffff0, %r13d + cmpl %r13d, %ebx + jge L_AES_GCM_decrypt_avx2_last_block_done +L_AES_GCM_decrypt_avx2_last_block_start: + vmovdqu (%rdi,%rbx,1), %xmm11 + vpshufb L_avx2_aes_gcm_bswap_epi64(%rip), %xmm4, %xmm10 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm11, %xmm12 + vpaddd L_avx2_aes_gcm_one(%rip), %xmm4, %xmm4 + vpxor %xmm6, %xmm12, %xmm12 + # aesenc_gfmul_sb + vpclmulqdq $0x01, %xmm5, %xmm12, %xmm2 + vpclmulqdq $16, %xmm5, %xmm12, %xmm3 + vpclmulqdq $0x00, %xmm5, %xmm12, %xmm1 + vpclmulqdq $0x11, %xmm5, %xmm12, %xmm8 + vpxor (%rsi), %xmm10, %xmm10 + vaesenc 16(%rsi), %xmm10, %xmm10 + vpxor %xmm2, %xmm3, %xmm3 + vpslldq $8, %xmm3, %xmm2 + vpsrldq $8, %xmm3, %xmm3 + vaesenc 32(%rsi), %xmm10, %xmm10 + vpxor %xmm1, %xmm2, %xmm2 + vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm2, %xmm1 + vaesenc 48(%rsi), %xmm10, %xmm10 + vaesenc 64(%rsi), %xmm10, %xmm10 + vaesenc 80(%rsi), %xmm10, %xmm10 + vpshufd $0x4e, %xmm2, %xmm2 + vpxor %xmm1, %xmm2, %xmm2 + vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm2, %xmm1 + vaesenc 96(%rsi), %xmm10, %xmm10 + vaesenc 112(%rsi), %xmm10, %xmm10 + vaesenc 128(%rsi), %xmm10, %xmm10 + vpshufd $0x4e, %xmm2, %xmm2 + vaesenc 144(%rsi), %xmm10, %xmm10 + vpxor %xmm3, %xmm8, %xmm8 + vpxor %xmm8, %xmm2, %xmm2 + vmovdqa 160(%rsi), %xmm0 + cmpl $11, %r9d + jl L_AES_GCM_decrypt_avx2_aesenc_gfmul_sb_last + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc 176(%rsi), %xmm10, %xmm10 + vmovdqa 192(%rsi), %xmm0 + cmpl $13, %r9d + jl L_AES_GCM_decrypt_avx2_aesenc_gfmul_sb_last + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc 208(%rsi), %xmm10, %xmm10 + vmovdqa 224(%rsi), %xmm0 +L_AES_GCM_decrypt_avx2_aesenc_gfmul_sb_last: + vaesenclast %xmm0, %xmm10, %xmm10 + vpxor %xmm1, %xmm2, %xmm6 + vpxor %xmm11, %xmm10, %xmm10 + vmovdqu %xmm10, (%r8,%rbx,1) + addl $16, %ebx + cmpl %r13d, %ebx + jl L_AES_GCM_decrypt_avx2_last_block_start +L_AES_GCM_decrypt_avx2_last_block_done: + movl %r10d, %ecx + movl %r10d, %edx + andl $15, %ecx + jz L_AES_GCM_decrypt_avx2_done_dec + # aesenc_last15_dec + vpshufb L_avx2_aes_gcm_bswap_epi64(%rip), %xmm4, %xmm4 + vpxor (%rsi), %xmm4, %xmm4 + vaesenc 16(%rsi), %xmm4, %xmm4 + vaesenc 32(%rsi), %xmm4, %xmm4 + vaesenc 48(%rsi), %xmm4, %xmm4 + vaesenc 64(%rsi), %xmm4, %xmm4 + vaesenc 80(%rsi), %xmm4, %xmm4 + vaesenc 96(%rsi), %xmm4, %xmm4 + vaesenc 112(%rsi), %xmm4, %xmm4 + vaesenc 128(%rsi), %xmm4, %xmm4 + vaesenc 144(%rsi), %xmm4, %xmm4 + cmpl $11, %r9d + vmovdqa 160(%rsi), %xmm1 + jl L_AES_GCM_decrypt_avx2_aesenc_last15_dec_avx_aesenc_avx_last + vaesenc %xmm1, %xmm4, %xmm4 + vaesenc 176(%rsi), %xmm4, %xmm4 + cmpl $13, %r9d + vmovdqa 192(%rsi), %xmm1 + jl L_AES_GCM_decrypt_avx2_aesenc_last15_dec_avx_aesenc_avx_last + vaesenc %xmm1, %xmm4, %xmm4 + vaesenc 208(%rsi), %xmm4, %xmm4 + vmovdqa 224(%rsi), %xmm1 +L_AES_GCM_decrypt_avx2_aesenc_last15_dec_avx_aesenc_avx_last: + vaesenclast %xmm1, %xmm4, %xmm4 + xorl %ecx, %ecx + vpxor %xmm0, %xmm0, %xmm0 + vmovdqa %xmm4, (%rsp) + vmovdqa %xmm0, 16(%rsp) +L_AES_GCM_decrypt_avx2_aesenc_last15_dec_avx_loop: + movzbl (%rdi,%rbx,1), %r13d + movb %r13b, 16(%rsp,%rcx,1) + xorb (%rsp,%rcx,1), %r13b + movb %r13b, (%r8,%rbx,1) + incl %ebx + incl %ecx + cmpl %edx, %ebx + jl L_AES_GCM_decrypt_avx2_aesenc_last15_dec_avx_loop + vmovdqa 16(%rsp), %xmm4 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm4, %xmm4 + vpxor %xmm4, %xmm6, %xmm6 + # ghash_gfmul_red + vpclmulqdq $16, %xmm5, %xmm6, %xmm2 + vpclmulqdq $0x01, %xmm5, %xmm6, %xmm1 + vpclmulqdq $0x00, %xmm5, %xmm6, %xmm0 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $0x11, %xmm5, %xmm6, %xmm6 + vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm2, %xmm6, %xmm6 + vpxor %xmm1, %xmm6, %xmm6 + vpxor %xmm0, %xmm6, %xmm6 +L_AES_GCM_decrypt_avx2_done_dec: + # calc_tag + shlq $3, %r10 + vpinsrq $0x00, %r10, %xmm0, %xmm0 + shlq $3, %r11 + vpinsrq $0x01, %r11, %xmm1, %xmm1 + vpblendd $12, %xmm1, %xmm0, %xmm0 + vpxor %xmm6, %xmm0, %xmm0 + # ghash_gfmul_red + vpclmulqdq $16, %xmm5, %xmm0, %xmm4 + vpclmulqdq $0x01, %xmm5, %xmm0, %xmm3 + vpclmulqdq $0x00, %xmm5, %xmm0, %xmm2 + vpxor %xmm3, %xmm4, %xmm4 + vpslldq $8, %xmm4, %xmm3 + vpsrldq $8, %xmm4, %xmm4 + vpxor %xmm2, %xmm3, %xmm3 + vpclmulqdq $0x11, %xmm5, %xmm0, %xmm0 + vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm3, %xmm2 + vpshufd $0x4e, %xmm3, %xmm3 + vpxor %xmm2, %xmm3, %xmm3 + vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm3, %xmm2 + vpshufd $0x4e, %xmm3, %xmm3 + vpxor %xmm4, %xmm0, %xmm0 + vpxor %xmm3, %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vpxor %xmm15, %xmm0, %xmm0 + # cmp_tag + cmpl $16, %r15d + je L_AES_GCM_decrypt_avx2_cmp_tag_16 + xorq %rdx, %rdx + xorq %rax, %rax + vmovdqa %xmm0, (%rsp) +L_AES_GCM_decrypt_avx2_cmp_tag_loop: + movzbl (%rsp,%rdx,1), %r13d + xorb (%r14,%rdx,1), %r13b + orb %r13b, %al + incl %edx + cmpl %r15d, %edx + jne L_AES_GCM_decrypt_avx2_cmp_tag_loop + cmpb $0x00, %al + sete %al + jmp L_AES_GCM_decrypt_avx2_cmp_tag_done +L_AES_GCM_decrypt_avx2_cmp_tag_16: + vmovdqu (%r14), %xmm1 + vpcmpeqb %xmm1, %xmm0, %xmm0 + vpmovmskb %xmm0, %rdx + # %%edx == 0xFFFF then return 1 else => return 0 + xorl %eax, %eax + cmpl $0xffff, %edx + sete %al +L_AES_GCM_decrypt_avx2_cmp_tag_done: + movl %eax, (%rbp) + vzeroupper + addq $0xa8, %rsp + popq %rbp + popq %r15 + popq %rbx + popq %r14 + popq %r12 + popq %r13 + repz retq +#ifndef __APPLE__ +.size AES_GCM_decrypt_avx2,.-AES_GCM_decrypt_avx2 +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX2 */ diff --git a/client/wolfssl/wolfcrypt/src/arc4.c b/client/wolfssl/wolfcrypt/src/arc4.c new file mode 100644 index 0000000..7eb8268 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/arc4.c @@ -0,0 +1,149 @@ +/* arc4.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifndef NO_RC4 + +#include +#include + + +int wc_Arc4SetKey(Arc4* arc4, const byte* key, word32 length) +{ + int ret = 0; + word32 i; + word32 keyIndex = 0, stateIndex = 0; + + if (arc4 == NULL || key == NULL || length == 0) { + return BAD_FUNC_ARG; + } + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ARC4) && \ + defined(HAVE_CAVIUM) && !defined(HAVE_CAVIUM_V) + if (arc4->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ARC4) { + return NitroxArc4SetKey(arc4, key, length); + } +#endif + + arc4->x = 1; + arc4->y = 0; + + for (i = 0; i < ARC4_STATE_SIZE; i++) + arc4->state[i] = (byte)i; + + for (i = 0; i < ARC4_STATE_SIZE; i++) { + word32 a = arc4->state[i]; + stateIndex += key[keyIndex] + a; + stateIndex &= 0xFF; + arc4->state[i] = arc4->state[stateIndex]; + arc4->state[stateIndex] = (byte)a; + + if (++keyIndex >= length) + keyIndex = 0; + } + + return ret; +} + + +static WC_INLINE byte MakeByte(word32* x, word32* y, byte* s) +{ + word32 a = s[*x], b; + *y = (*y+a) & 0xff; + + b = s[*y]; + s[*x] = (byte)b; + s[*y] = (byte)a; + *x = (*x+1) & 0xff; + + return s[(a+b) & 0xff]; +} + + +int wc_Arc4Process(Arc4* arc4, byte* out, const byte* in, word32 length) +{ + int ret = 0; + word32 x; + word32 y; + + if (arc4 == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ARC4) && \ + defined(HAVE_CAVIUM) && !defined(HAVE_CAVIUM_V) + if (arc4->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ARC4) { + return NitroxArc4Process(arc4, out, in, length); + } +#endif + + x = arc4->x; + y = arc4->y; + + while(length--) + *out++ = *in++ ^ MakeByte(&x, &y, arc4->state); + + arc4->x = (byte)x; + arc4->y = (byte)y; + + return ret; +} + +/* Initialize Arc4 for use with async device */ +int wc_Arc4Init(Arc4* arc4, void* heap, int devId) +{ + int ret = 0; + + if (arc4 == NULL) + return BAD_FUNC_ARG; + + arc4->heap = heap; + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ARC4) + ret = wolfAsync_DevCtxInit(&arc4->asyncDev, WOLFSSL_ASYNC_MARKER_ARC4, + arc4->heap, devId); +#else + (void)devId; +#endif /* WOLFSSL_ASYNC_CRYPT */ + + return ret; +} + + +/* Free Arc4 from use with async device */ +void wc_Arc4Free(Arc4* arc4) +{ + if (arc4 == NULL) + return; + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ARC4) + wolfAsync_DevCtxFree(&arc4->asyncDev, WOLFSSL_ASYNC_MARKER_ARC4); +#endif /* WOLFSSL_ASYNC_CRYPT */ +} + +#endif /* NO_RC4 */ + diff --git a/client/wolfssl/wolfcrypt/src/asm.c b/client/wolfssl/wolfcrypt/src/asm.c new file mode 100644 index 0000000..0af4447 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/asm.c @@ -0,0 +1,1783 @@ +/* asm.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +/* + * Based on public domain TomsFastMath 0.10 by Tom St Denis, tomstdenis@iahu.ca, + * http://math.libtomcrypt.com + */ + + +/******************************************************************/ +/* fp_montgomery_reduce.c asm or generic */ + + +/* Each platform needs to query info type 1 from cpuid to see if aesni is + * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts + */ + +#if defined(HAVE_INTEL_MULX) +#ifndef _MSC_VER + #define cpuid(reg, leaf, sub)\ + __asm__ __volatile__ ("cpuid":\ + "=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3]) :\ + "a" (leaf), "c"(sub)); + + #define XASM_LINK(f) asm(f) +#else + + #include + #define cpuid(a,b,c) __cpuidex((int*)a,b,c) + + #define XASM_LINK(f) + +#endif /* _MSC_VER */ + +#define EAX 0 +#define EBX 1 +#define ECX 2 +#define EDX 3 + +#define CPUID_AVX1 0x1 +#define CPUID_AVX2 0x2 +#define CPUID_RDRAND 0x4 +#define CPUID_RDSEED 0x8 +#define CPUID_BMI2 0x10 /* MULX, RORX */ +#define CPUID_ADX 0x20 /* ADCX, ADOX */ + +#define IS_INTEL_AVX1 (cpuid_flags&CPUID_AVX1) +#define IS_INTEL_AVX2 (cpuid_flags&CPUID_AVX2) +#define IS_INTEL_BMI2 (cpuid_flags&CPUID_BMI2) +#define IS_INTEL_ADX (cpuid_flags&CPUID_ADX) +#define IS_INTEL_RDRAND (cpuid_flags&CPUID_RDRAND) +#define IS_INTEL_RDSEED (cpuid_flags&CPUID_RDSEED) +#define SET_FLAGS + +static word32 cpuid_check = 0 ; +static word32 cpuid_flags = 0 ; + +static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit) { + int got_intel_cpu = 0; + int got_amd_cpu = 0; + unsigned int reg[5]; + + reg[4] = '\0' ; + cpuid(reg, 0, 0); + + /* check for intel cpu */ + if( memcmp((char *)&(reg[EBX]), "Genu", 4) == 0 && + memcmp((char *)&(reg[EDX]), "ineI", 4) == 0 && + memcmp((char *)&(reg[ECX]), "ntel", 4) == 0) { + got_intel_cpu = 1; + } + + /* check for AMD cpu */ + if( memcmp((char *)&(reg[EBX]), "Auth", 4) == 0 && + memcmp((char *)&(reg[EDX]), "enti", 4) == 0 && + memcmp((char *)&(reg[ECX]), "cAMD", 4) == 0) { + got_amd_cpu = 1; + } + if (got_intel_cpu || got_amd_cpu) { + cpuid(reg, leaf, sub); + return((reg[num]>>bit)&0x1) ; + } + return 0 ; +} + +WC_INLINE static int set_cpuid_flags(void) { + if(cpuid_check == 0) { + if(cpuid_flag(7, 0, EBX, 8)){ cpuid_flags |= CPUID_BMI2 ; } + if(cpuid_flag(7, 0, EBX,19)){ cpuid_flags |= CPUID_ADX ; } + cpuid_check = 1 ; + return 0 ; + } + return 1 ; +} + +#define RETURN return +#define IF_HAVE_INTEL_MULX(func, ret) \ + if(cpuid_check==0)set_cpuid_flags() ; \ + if(IS_INTEL_BMI2 && IS_INTEL_ADX){ func; ret ; } + +#else + #define IF_HAVE_INTEL_MULX(func, ret) +#endif + +#if defined(TFM_X86) && !defined(TFM_SSE2) +/* x86-32 code */ + +#define MONT_START +#define MONT_FINI +#define LOOP_END +#define LOOP_START \ + mu = c[x] * mp + +#define INNERMUL \ +__asm__( \ + "movl %5,%%eax \n\t" \ + "mull %4 \n\t" \ + "addl %1,%%eax \n\t" \ + "adcl $0,%%edx \n\t" \ + "addl %%eax,%0 \n\t" \ + "adcl $0,%%edx \n\t" \ + "movl %%edx,%1 \n\t" \ +:"=g"(_c[LO]), "=r"(cy) \ +:"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \ +: "%eax", "%edx", "cc") + +#define PROPCARRY \ +__asm__( \ + "addl %1,%0 \n\t" \ + "setb %%al \n\t" \ + "movzbl %%al,%1 \n\t" \ +:"=g"(_c[LO]), "=r"(cy) \ +:"0"(_c[LO]), "1"(cy) \ +: "%eax", "cc") + +/******************************************************************/ +#elif defined(TFM_X86_64) +/* x86-64 code */ + +#define MONT_START +#define MONT_FINI +#define LOOP_END +#define LOOP_START \ + mu = c[x] * mp + +#define INNERMUL \ +__asm__( \ + "movq %5,%%rax \n\t" \ + "mulq %4 \n\t" \ + "addq %1,%%rax \n\t" \ + "adcq $0,%%rdx \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq $0,%%rdx \n\t" \ + "movq %%rdx,%1 \n\t" \ +:"=g"(_c[LO]), "=r"(cy) \ +:"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \ +: "%rax", "%rdx", "cc") + +#if defined(HAVE_INTEL_MULX) +#define MULX_INNERMUL8(x,y,z,cy) \ + __asm__ volatile ( \ + "movq %[yn], %%rdx\n\t" \ + "xorq %%rcx, %%rcx\n\t" \ + "movq 0(%[c]), %%r8\n\t" \ + "movq 8(%[c]), %%r9\n\t" \ + "movq 16(%[c]), %%r10\n\t" \ + "movq 24(%[c]), %%r11\n\t" \ + "movq 32(%[c]), %%r12\n\t" \ + "movq 40(%[c]), %%r13\n\t" \ + "movq 48(%[c]), %%r14\n\t" \ + "movq 56(%[c]), %%r15\n\t" \ + \ + "mulx 0(%[xp]), %%rax, %%rcx\n\t" \ + "adcxq %[cy], %%r8\n\t" \ + "adoxq %%rax, %%r8\n\t" \ + "mulx 8(%[xp]), %%rax, %[cy]\n\t" \ + "adcxq %%rcx, %%r9\n\t" \ + "adoxq %%rax, %%r9\n\t" \ + "mulx 16(%[xp]), %%rax, %%rcx\n\t" \ + "adcxq %[cy], %%r10\n\t" \ + "adoxq %%rax, %%r10\n\t" \ + "mulx 24(%[xp]), %%rax, %[cy]\n\t" \ + "adcxq %%rcx, %%r11\n\t" \ + "adoxq %%rax, %%r11\n\t" \ + "mulx 32(%[xp]), %%rax, %%rcx\n\t" \ + "adcxq %[cy], %%r12\n\t" \ + "adoxq %%rax, %%r12\n\t" \ + "mulx 40(%[xp]), %%rax, %[cy]\n\t" \ + "adcxq %%rcx, %%r13\n\t" \ + "adoxq %%rax, %%r13\n\t" \ + "mulx 48(%[xp]), %%rax, %%rcx\n\t" \ + "adcxq %[cy], %%r14\n\t" \ + "adoxq %%rax, %%r14\n\t" \ + "adcxq %%rcx, %%r15\n\t" \ + "mulx 56(%[xp]), %%rax, %[cy]\n\t" \ + "movq $0, %%rdx\n\t" \ + "adoxq %%rdx, %%rax\n\t" \ + "adcxq %%rdx, %[cy]\n\t" \ + "adoxq %%rdx, %[cy]\n\t" \ + "addq %%rax, %%r15\n\t" \ + "adcq $0, %[cy]\n\t" \ + \ + "movq %%r8, 0(%[c])\n\t" \ + "movq %%r9, 8(%[c])\n\t" \ + "movq %%r10, 16(%[c])\n\t" \ + "movq %%r11, 24(%[c])\n\t" \ + "movq %%r12, 32(%[c])\n\t" \ + "movq %%r13, 40(%[c])\n\t" \ + "movq %%r14, 48(%[c])\n\t" \ + "movq %%r15, 56(%[c])\n\t" \ + : [cy] "+r" (cy) \ + : [xp] "r" (x), [c] "r" (c_mulx), [yn] "rm" (y) \ + :"%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", \ + "%rdx", "%rax", "%rcx" \ + ) + +#define INNERMUL8_MULX \ +{\ + MULX_INNERMUL8(tmpm, mu, _c, cy);\ +} +#endif + +#define INNERMUL8 \ + __asm__( \ + "movq 0(%5),%%rax \n\t" \ + "movq 0(%2),%%r10 \n\t" \ + "movq 0x8(%5),%%r11 \n\t" \ + "mulq %4 \n\t" \ + "addq %%r10,%%rax \n\t" \ + "adcq $0,%%rdx \n\t" \ + "movq 0x8(%2),%%r10 \n\t" \ + "addq %3,%%rax \n\t" \ + "adcq $0,%%rdx \n\t" \ + "movq %%rax,0(%0) \n\t" \ + "movq %%rdx,%1 \n\t" \ + \ + "movq %%r11,%%rax \n\t" \ + "movq 0x10(%5),%%r11 \n\t" \ + "mulq %4 \n\t" \ + "addq %%r10,%%rax \n\t" \ + "adcq $0,%%rdx \n\t" \ + "movq 0x10(%2),%%r10 \n\t" \ + "addq %3,%%rax \n\t" \ + "adcq $0,%%rdx \n\t" \ + "movq %%rax,0x8(%0) \n\t" \ + "movq %%rdx,%1 \n\t" \ + \ + "movq %%r11,%%rax \n\t" \ + "movq 0x18(%5),%%r11 \n\t" \ + "mulq %4 \n\t" \ + "addq %%r10,%%rax \n\t" \ + "adcq $0,%%rdx \n\t" \ + "movq 0x18(%2),%%r10 \n\t" \ + "addq %3,%%rax \n\t" \ + "adcq $0,%%rdx \n\t" \ + "movq %%rax,0x10(%0) \n\t" \ + "movq %%rdx,%1 \n\t" \ + \ + "movq %%r11,%%rax \n\t" \ + "movq 0x20(%5),%%r11 \n\t" \ + "mulq %4 \n\t" \ + "addq %%r10,%%rax \n\t" \ + "adcq $0,%%rdx \n\t" \ + "movq 0x20(%2),%%r10 \n\t" \ + "addq %3,%%rax \n\t" \ + "adcq $0,%%rdx \n\t" \ + "movq %%rax,0x18(%0) \n\t" \ + "movq %%rdx,%1 \n\t" \ + \ + "movq %%r11,%%rax \n\t" \ + "movq 0x28(%5),%%r11 \n\t" \ + "mulq %4 \n\t" \ + "addq %%r10,%%rax \n\t" \ + "adcq $0,%%rdx \n\t" \ + "movq 0x28(%2),%%r10 \n\t" \ + "addq %3,%%rax \n\t" \ + "adcq $0,%%rdx \n\t" \ + "movq %%rax,0x20(%0) \n\t" \ + "movq %%rdx,%1 \n\t" \ + \ + "movq %%r11,%%rax \n\t" \ + "movq 0x30(%5),%%r11 \n\t" \ + "mulq %4 \n\t" \ + "addq %%r10,%%rax \n\t" \ + "adcq $0,%%rdx \n\t" \ + "movq 0x30(%2),%%r10 \n\t" \ + "addq %3,%%rax \n\t" \ + "adcq $0,%%rdx \n\t" \ + "movq %%rax,0x28(%0) \n\t" \ + "movq %%rdx,%1 \n\t" \ + \ + "movq %%r11,%%rax \n\t" \ + "movq 0x38(%5),%%r11 \n\t" \ + "mulq %4 \n\t" \ + "addq %%r10,%%rax \n\t" \ + "adcq $0,%%rdx \n\t" \ + "movq 0x38(%2),%%r10 \n\t" \ + "addq %3,%%rax \n\t" \ + "adcq $0,%%rdx \n\t" \ + "movq %%rax,0x30(%0) \n\t" \ + "movq %%rdx,%1 \n\t" \ + \ + "movq %%r11,%%rax \n\t" \ + "mulq %4 \n\t" \ + "addq %%r10,%%rax \n\t" \ + "adcq $0,%%rdx \n\t" \ + "addq %3,%%rax \n\t" \ + "adcq $0,%%rdx \n\t" \ + "movq %%rax,0x38(%0) \n\t" \ + "movq %%rdx,%1 \n\t" \ + \ +:"=r"(_c), "=r"(cy) \ +: "0"(_c), "1"(cy), "g"(mu), "r"(tmpm)\ +: "%rax", "%rdx", "%r10", "%r11", "cc") + +#define PROPCARRY \ +__asm__( \ + "addq %1,%0 \n\t" \ + "setb %%al \n\t" \ + "movzbq %%al,%1 \n\t" \ +:"=g"(_c[LO]), "=r"(cy) \ +:"0"(_c[LO]), "1"(cy) \ +: "%rax", "cc") + +/******************************************************************/ +#elif defined(TFM_SSE2) +/* SSE2 code (assumes 32-bit fp_digits) */ +/* XMM register assignments: + * xmm0 *tmpm++, then Mu * (*tmpm++) + * xmm1 c[x], then Mu + * xmm2 mp + * xmm3 cy + * xmm4 _c[LO] + */ + +#define MONT_START \ + __asm__("movd %0,%%mm2"::"g"(mp)) + +#define MONT_FINI \ + __asm__("emms") + +#define LOOP_START \ +__asm__( \ +"movd %0,%%mm1 \n\t" \ +"pxor %%mm3,%%mm3 \n\t" \ +"pmuludq %%mm2,%%mm1 \n\t" \ +:: "g"(c[x])) + +/* pmuludq on mmx registers does a 32x32->64 multiply. */ +#define INNERMUL \ +__asm__( \ + "movd %1,%%mm4 \n\t" \ + "movd %2,%%mm0 \n\t" \ + "paddq %%mm4,%%mm3 \n\t" \ + "pmuludq %%mm1,%%mm0 \n\t" \ + "paddq %%mm0,%%mm3 \n\t" \ + "movd %%mm3,%0 \n\t" \ + "psrlq $32, %%mm3 \n\t" \ +:"=g"(_c[LO]) : "0"(_c[LO]), "g"(*tmpm++) ); + +#define INNERMUL8 \ +__asm__( \ + "movd 0(%1),%%mm4 \n\t" \ + "movd 0(%2),%%mm0 \n\t" \ + "paddq %%mm4,%%mm3 \n\t" \ + "pmuludq %%mm1,%%mm0 \n\t" \ + "movd 4(%2),%%mm5 \n\t" \ + "paddq %%mm0,%%mm3 \n\t" \ + "movd 4(%1),%%mm6 \n\t" \ + "movd %%mm3,0(%0) \n\t" \ + "psrlq $32, %%mm3 \n\t" \ +\ + "paddq %%mm6,%%mm3 \n\t" \ + "pmuludq %%mm1,%%mm5 \n\t" \ + "movd 8(%2),%%mm6 \n\t" \ + "paddq %%mm5,%%mm3 \n\t" \ + "movd 8(%1),%%mm7 \n\t" \ + "movd %%mm3,4(%0) \n\t" \ + "psrlq $32, %%mm3 \n\t" \ +\ + "paddq %%mm7,%%mm3 \n\t" \ + "pmuludq %%mm1,%%mm6 \n\t" \ + "movd 12(%2),%%mm7 \n\t" \ + "paddq %%mm6,%%mm3 \n\t" \ + "movd 12(%1),%%mm5 \n\t" \ + "movd %%mm3,8(%0) \n\t" \ + "psrlq $32, %%mm3 \n\t" \ +\ + "paddq %%mm5,%%mm3 \n\t" \ + "pmuludq %%mm1,%%mm7 \n\t" \ + "movd 16(%2),%%mm5 \n\t" \ + "paddq %%mm7,%%mm3 \n\t" \ + "movd 16(%1),%%mm6 \n\t" \ + "movd %%mm3,12(%0) \n\t" \ + "psrlq $32, %%mm3 \n\t" \ +\ + "paddq %%mm6,%%mm3 \n\t" \ + "pmuludq %%mm1,%%mm5 \n\t" \ + "movd 20(%2),%%mm6 \n\t" \ + "paddq %%mm5,%%mm3 \n\t" \ + "movd 20(%1),%%mm7 \n\t" \ + "movd %%mm3,16(%0) \n\t" \ + "psrlq $32, %%mm3 \n\t" \ +\ + "paddq %%mm7,%%mm3 \n\t" \ + "pmuludq %%mm1,%%mm6 \n\t" \ + "movd 24(%2),%%mm7 \n\t" \ + "paddq %%mm6,%%mm3 \n\t" \ + "movd 24(%1),%%mm5 \n\t" \ + "movd %%mm3,20(%0) \n\t" \ + "psrlq $32, %%mm3 \n\t" \ +\ + "paddq %%mm5,%%mm3 \n\t" \ + "pmuludq %%mm1,%%mm7 \n\t" \ + "movd 28(%2),%%mm5 \n\t" \ + "paddq %%mm7,%%mm3 \n\t" \ + "movd 28(%1),%%mm6 \n\t" \ + "movd %%mm3,24(%0) \n\t" \ + "psrlq $32, %%mm3 \n\t" \ +\ + "paddq %%mm6,%%mm3 \n\t" \ + "pmuludq %%mm1,%%mm5 \n\t" \ + "paddq %%mm5,%%mm3 \n\t" \ + "movd %%mm3,28(%0) \n\t" \ + "psrlq $32, %%mm3 \n\t" \ +:"=r"(_c) : "0"(_c), "r"(tmpm) ); + +/* TAO switched tmpm from "g" to "r" after gcc tried to index the indexed stack + pointer */ + +#define LOOP_END \ +__asm__( "movd %%mm3,%0 \n" :"=r"(cy)) + +#define PROPCARRY \ +__asm__( \ + "addl %1,%0 \n\t" \ + "setb %%al \n\t" \ + "movzbl %%al,%1 \n\t" \ +:"=g"(_c[LO]), "=r"(cy) \ +:"0"(_c[LO]), "1"(cy) \ +: "%eax", "cc") + +/******************************************************************/ +#elif defined(TFM_ARM) + /* ARMv4 code */ + +#define MONT_START +#define MONT_FINI +#define LOOP_END +#define LOOP_START \ + mu = c[x] * mp + + +#ifdef __thumb__ + +#define INNERMUL \ +__asm__( \ + " LDR r0,%1 \n\t" \ + " ADDS r0,r0,%0 \n\t" \ + " ITE CS \n\t" \ + " MOVCS %0,#1 \n\t" \ + " MOVCC %0,#0 \n\t" \ + " UMLAL r0,%0,%3,%4 \n\t" \ + " STR r0,%1 \n\t" \ +:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"m"(_c[0]):"r0","cc"); + +#define PROPCARRY \ +__asm__( \ + " LDR r0,%1 \n\t" \ + " ADDS r0,r0,%0 \n\t" \ + " STR r0,%1 \n\t" \ + " ITE CS \n\t" \ + " MOVCS %0,#1 \n\t" \ + " MOVCC %0,#0 \n\t" \ +:"=r"(cy),"=m"(_c[0]):"0"(cy),"m"(_c[0]):"r0","cc"); + + +/* TAO thumb mode uses ite (if then else) to detect carry directly + * fixed unmatched constraint warning by changing 1 to m */ + +#else /* __thumb__ */ + +#define INNERMUL \ +__asm__( \ + " LDR r0,%1 \n\t" \ + " ADDS r0,r0,%0 \n\t" \ + " MOVCS %0,#1 \n\t" \ + " MOVCC %0,#0 \n\t" \ + " UMLAL r0,%0,%3,%4 \n\t" \ + " STR r0,%1 \n\t" \ +:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r0","cc"); + +#define PROPCARRY \ +__asm__( \ + " LDR r0,%1 \n\t" \ + " ADDS r0,r0,%0 \n\t" \ + " STR r0,%1 \n\t" \ + " MOVCS %0,#1 \n\t" \ + " MOVCC %0,#0 \n\t" \ +:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r0","cc"); + +#endif /* __thumb__ */ + +#elif defined(TFM_PPC32) + +/* PPC32 */ +#define MONT_START +#define MONT_FINI +#define LOOP_END +#define LOOP_START \ + mu = c[x] * mp + +#define INNERMUL \ +__asm__( \ + " mullw 16,%3,%4 \n\t" \ + " mulhwu 17,%3,%4 \n\t" \ + " addc 16,16,%2 \n\t" \ + " addze 17,17 \n\t" \ + " addc %1,16,%5 \n\t" \ + " addze %0,17 \n\t" \ +:"=r"(cy),"=r"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "cc"); ++tmpm; + +#define PROPCARRY \ +__asm__( \ + " addc %1,%3,%2 \n\t" \ + " xor %0,%2,%2 \n\t" \ + " addze %0,%2 \n\t" \ +:"=r"(cy),"=r"(_c[0]):"0"(cy),"1"(_c[0]):"cc"); + +#elif defined(TFM_PPC64) + +/* PPC64 */ +#define MONT_START +#define MONT_FINI +#define LOOP_END +#define LOOP_START \ + mu = c[x] * mp + +#define INNERMUL \ +__asm__( \ + " mulld r16,%3,%4 \n\t" \ + " mulhdu r17,%3,%4 \n\t" \ + " addc r16,16,%0 \n\t" \ + " addze r17,r17 \n\t" \ + " ldx r18,0,%1 \n\t" \ + " addc r16,r16,r18 \n\t" \ + " addze %0,r17 \n\t" \ + " sdx r16,0,%1 \n\t" \ +:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"r16", "r17", "r18","cc"); ++tmpm; + +#define PROPCARRY \ +__asm__( \ + " ldx r16,0,%1 \n\t" \ + " addc r16,r16,%0 \n\t" \ + " sdx r16,0,%1 \n\t" \ + " xor %0,%0,%0 \n\t" \ + " addze %0,%0 \n\t" \ +:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r16","cc"); + +/******************************************************************/ + +#elif defined(TFM_AVR32) + +/* AVR32 */ +#define MONT_START +#define MONT_FINI +#define LOOP_END +#define LOOP_START \ + mu = c[x] * mp + +#define INNERMUL \ +__asm__( \ + " ld.w r2,%1 \n\t" \ + " add r2,%0 \n\t" \ + " eor r3,r3 \n\t" \ + " acr r3 \n\t" \ + " macu.d r2,%3,%4 \n\t" \ + " st.w %1,r2 \n\t" \ + " mov %0,r3 \n\t" \ +:"=r"(cy),"=r"(_c):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c):"r2","r3"); + +#define PROPCARRY \ +__asm__( \ + " ld.w r2,%1 \n\t" \ + " add r2,%0 \n\t" \ + " st.w %1,r2 \n\t" \ + " eor %0,%0 \n\t" \ + " acr %0 \n\t" \ +:"=r"(cy),"=r"(&_c[0]):"0"(cy),"1"(&_c[0]):"r2","cc"); + +/******************************************************************/ +#elif defined(TFM_MIPS) + +/* MIPS */ +#define MONT_START +#define MONT_FINI +#define LOOP_END +#define LOOP_START \ + mu = c[x] * mp + +#define INNERMUL \ +__asm__( \ + " multu %3,%4 \n\t" \ + " mflo $12 \n\t" \ + " mfhi $13 \n\t" \ + " addu $12,$12,%0 \n\t" \ + " sltu $10,$12,%0 \n\t" \ + " addu $13,$13,$10 \n\t" \ + " lw $10,%1 \n\t" \ + " addu $12,$12,$10 \n\t" \ + " sltu $10,$12,$10 \n\t" \ + " addu %0,$13,$10 \n\t" \ + " sw $12,%1 \n\t" \ +:"+r"(cy),"+m"(_c[0]):""(cy),"r"(mu),"r"(tmpm[0]),""(_c[0]):"$10","$12","$13"); ++tmpm; + +#define PROPCARRY \ +__asm__( \ + " lw $10,%1 \n\t" \ + " addu $10,$10,%0 \n\t" \ + " sw $10,%1 \n\t" \ + " sltu %0,$10,%0 \n\t" \ +:"+r"(cy),"+m"(_c[0]):""(cy),""(_c[0]):"$10"); + +/******************************************************************/ +#else + +/* ISO C code */ +#define MONT_START +#define MONT_FINI +#define LOOP_END +#define LOOP_START \ + mu = c[x] * mp + +#define INNERMUL \ + do { fp_word t; \ + t = ((fp_word)_c[0] + (fp_word)cy) + \ + (((fp_word)mu) * ((fp_word)*tmpm++)); \ + _c[0] = (fp_digit)t; \ + cy = (fp_digit)(t >> DIGIT_BIT); \ + } while (0) + +#define PROPCARRY \ + do { fp_digit t = _c[0] += cy; cy = (t < cy); } while (0) + +#endif +/******************************************************************/ + + +#define LO 0 +/* end fp_montogomery_reduce.c asm */ + + +/* start fp_sqr_comba.c asm */ +#if defined(TFM_X86) + +/* x86-32 optimized */ + +#define COMBA_START + +#define CLEAR_CARRY \ + c0 = c1 = c2 = 0; + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define CARRY_FORWARD \ + do { c0 = c1; c1 = c2; c2 = 0; } while (0); + +#define COMBA_FINI + +#define SQRADD(i, j) \ +__asm__( \ + "movl %6,%%eax \n\t" \ + "mull %%eax \n\t" \ + "addl %%eax,%0 \n\t" \ + "adcl %%edx,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","cc"); + +#define SQRADD2(i, j) \ +__asm__( \ + "movl %6,%%eax \n\t" \ + "mull %7 \n\t" \ + "addl %%eax,%0 \n\t" \ + "adcl %%edx,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + "addl %%eax,%0 \n\t" \ + "adcl %%edx,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx", "cc"); + +#define SQRADDSC(i, j) \ +__asm__( \ + "movl %3,%%eax \n\t" \ + "mull %4 \n\t" \ + "movl %%eax,%0 \n\t" \ + "movl %%edx,%1 \n\t" \ + "xorl %2,%2 \n\t" \ + :"=r"(sc0), "=r"(sc1), "=r"(sc2): "g"(i), "g"(j) :"%eax","%edx","cc"); + +#define SQRADDAC(i, j) \ +__asm__( \ + "movl %6,%%eax \n\t" \ + "mull %7 \n\t" \ + "addl %%eax,%0 \n\t" \ + "adcl %%edx,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","cc"); + +#define SQRADDDB \ +__asm__( \ + "addl %6,%0 \n\t" \ + "adcl %7,%1 \n\t" \ + "adcl %8,%2 \n\t" \ + "addl %6,%0 \n\t" \ + "adcl %7,%1 \n\t" \ + "adcl %8,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc"); + +#elif defined(TFM_X86_64) +/* x86-64 optimized */ + +#define COMBA_START + +#define CLEAR_CARRY \ + c0 = c1 = c2 = 0; + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define CARRY_FORWARD \ + do { c0 = c1; c1 = c2; c2 = 0; } while (0); + +#define COMBA_FINI + +#define SQRADD(i, j) \ +__asm__( \ + "movq %6,%%rax \n\t" \ + "mulq %%rax \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq %%rdx,%1 \n\t" \ + "adcq $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "x"(i) :"%rax","%rdx","cc"); + +#define SQRADD2(i, j) \ +__asm__( \ + "movq %6,%%rax \n\t" \ + "mulq %7 \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq %%rdx,%1 \n\t" \ + "adcq $0,%2 \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq %%rdx,%1 \n\t" \ + "adcq $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","cc"); + +#define SQRADDSC(i, j) \ +__asm__( \ + "movq %3,%%rax \n\t" \ + "mulq %4 \n\t" \ + "movq %%rax,%0 \n\t" \ + "movq %%rdx,%1 \n\t" \ + "xorq %2,%2 \n\t" \ + :"=r"(sc0), "=r"(sc1), "=r"(sc2): "g"(i), "g"(j) :"%rax","%rdx","cc"); + +#define SQRADDAC(i, j) \ +__asm__( \ + "movq %6,%%rax \n\t" \ + "mulq %7 \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq %%rdx,%1 \n\t" \ + "adcq $0,%2 \n\t" \ + :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","cc"); + +#define SQRADDDB \ +__asm__( \ + "addq %6,%0 \n\t" \ + "adcq %7,%1 \n\t" \ + "adcq %8,%2 \n\t" \ + "addq %6,%0 \n\t" \ + "adcq %7,%1 \n\t" \ + "adcq %8,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc"); + +#elif defined(TFM_SSE2) + +/* SSE2 Optimized */ +#define COMBA_START + +#define CLEAR_CARRY \ + c0 = c1 = c2 = 0; + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define CARRY_FORWARD \ + do { c0 = c1; c1 = c2; c2 = 0; } while (0); + +#define COMBA_FINI \ + __asm__("emms"); + +#define SQRADD(i, j) \ +__asm__( \ + "movd %6,%%mm0 \n\t" \ + "pmuludq %%mm0,%%mm0\n\t" \ + "movd %%mm0,%%eax \n\t" \ + "psrlq $32,%%mm0 \n\t" \ + "addl %%eax,%0 \n\t" \ + "movd %%mm0,%%eax \n\t" \ + "adcl %%eax,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","cc"); + +#define SQRADD2(i, j) \ +__asm__( \ + "movd %6,%%mm0 \n\t" \ + "movd %7,%%mm1 \n\t" \ + "pmuludq %%mm1,%%mm0\n\t" \ + "movd %%mm0,%%eax \n\t" \ + "psrlq $32,%%mm0 \n\t" \ + "movd %%mm0,%%edx \n\t" \ + "addl %%eax,%0 \n\t" \ + "adcl %%edx,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + "addl %%eax,%0 \n\t" \ + "adcl %%edx,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","cc"); + +#define SQRADDSC(i, j) \ +__asm__( \ + "movd %3,%%mm0 \n\t" \ + "movd %4,%%mm1 \n\t" \ + "pmuludq %%mm1,%%mm0\n\t" \ + "movd %%mm0,%0 \n\t" \ + "psrlq $32,%%mm0 \n\t" \ + "movd %%mm0,%1 \n\t" \ + "xorl %2,%2 \n\t" \ + :"=r"(sc0), "=r"(sc1), "=r"(sc2): "m"(i), "m"(j)); + +/* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */ + +#define SQRADDAC(i, j) \ +__asm__( \ + "movd %6,%%mm0 \n\t" \ + "movd %7,%%mm1 \n\t" \ + "pmuludq %%mm1,%%mm0\n\t" \ + "movd %%mm0,%%eax \n\t" \ + "psrlq $32,%%mm0 \n\t" \ + "movd %%mm0,%%edx \n\t" \ + "addl %%eax,%0 \n\t" \ + "adcl %%edx,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j) :"%eax","%edx","cc"); + +#define SQRADDDB \ +__asm__( \ + "addl %6,%0 \n\t" \ + "adcl %7,%1 \n\t" \ + "adcl %8,%2 \n\t" \ + "addl %6,%0 \n\t" \ + "adcl %7,%1 \n\t" \ + "adcl %8,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc"); + +#elif defined(TFM_ARM) + +/* ARM code */ + +#define COMBA_START + +#define CLEAR_CARRY \ + c0 = c1 = c2 = 0; + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define CARRY_FORWARD \ + do { c0 = c1; c1 = c2; c2 = 0; } while (0); + +#define COMBA_FINI + +/* multiplies point i and j, updates carry "c1" and digit c2 */ +#define SQRADD(i, j) \ +__asm__( \ +" UMULL r0,r1,%6,%6 \n\t" \ +" ADDS %0,%0,r0 \n\t" \ +" ADCS %1,%1,r1 \n\t" \ +" ADC %2,%2,#0 \n\t" \ +:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i) : "r0", "r1", "cc"); + +/* for squaring some of the terms are doubled... */ +#define SQRADD2(i, j) \ +__asm__( \ +" UMULL r0,r1,%6,%7 \n\t" \ +" ADDS %0,%0,r0 \n\t" \ +" ADCS %1,%1,r1 \n\t" \ +" ADC %2,%2,#0 \n\t" \ +" ADDS %0,%0,r0 \n\t" \ +" ADCS %1,%1,r1 \n\t" \ +" ADC %2,%2,#0 \n\t" \ +:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "cc"); + +#define SQRADDSC(i, j) \ +__asm__( \ +" UMULL %0,%1,%3,%4 \n\t" \ +" SUB %2,%2,%2 \n\t" \ +:"=r"(sc0), "=r"(sc1), "=r"(sc2) : "r"(i), "r"(j) : "cc"); + +/* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */ + +#define SQRADDAC(i, j) \ +__asm__( \ +" UMULL r0,r1,%6,%7 \n\t" \ +" ADDS %0,%0,r0 \n\t" \ +" ADCS %1,%1,r1 \n\t" \ +" ADC %2,%2,#0 \n\t" \ +:"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "r0", "r1", "cc"); + +#define SQRADDDB \ +__asm__( \ +" ADDS %0,%0,%3 \n\t" \ +" ADCS %1,%1,%4 \n\t" \ +" ADC %2,%2,%5 \n\t" \ +" ADDS %0,%0,%3 \n\t" \ +" ADCS %1,%1,%4 \n\t" \ +" ADC %2,%2,%5 \n\t" \ +:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc"); + +#elif defined(TFM_PPC32) + +/* PPC32 */ + +#define COMBA_START + +#define CLEAR_CARRY \ + c0 = c1 = c2 = 0; + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define CARRY_FORWARD \ + do { c0 = c1; c1 = c2; c2 = 0; } while (0); + +#define COMBA_FINI + +/* multiplies point i and j, updates carry "c1" and digit c2 */ +#define SQRADD(i, j) \ +__asm__( \ + " mullw 16,%6,%6 \n\t" \ + " addc %0,%0,16 \n\t" \ + " mulhwu 16,%6,%6 \n\t" \ + " adde %1,%1,16 \n\t" \ + " addze %2,%2 \n\t" \ +:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","cc"); + +/* for squaring some of the terms are doubled... */ +#define SQRADD2(i, j) \ +__asm__( \ + " mullw 16,%6,%7 \n\t" \ + " mulhwu 17,%6,%7 \n\t" \ + " addc %0,%0,16 \n\t" \ + " adde %1,%1,17 \n\t" \ + " addze %2,%2 \n\t" \ + " addc %0,%0,16 \n\t" \ + " adde %1,%1,17 \n\t" \ + " addze %2,%2 \n\t" \ +:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","cc"); + +#define SQRADDSC(i, j) \ +__asm__( \ + " mullw %0,%6,%7 \n\t" \ + " mulhwu %1,%6,%7 \n\t" \ + " xor %2,%2,%2 \n\t" \ +:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc"); + +#define SQRADDAC(i, j) \ +__asm__( \ + " mullw 16,%6,%7 \n\t" \ + " addc %0,%0,16 \n\t" \ + " mulhwu 16,%6,%7 \n\t" \ + " adde %1,%1,16 \n\t" \ + " addze %2,%2 \n\t" \ +:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "cc"); + +#define SQRADDDB \ +__asm__( \ + " addc %0,%0,%3 \n\t" \ + " adde %1,%1,%4 \n\t" \ + " adde %2,%2,%5 \n\t" \ + " addc %0,%0,%3 \n\t" \ + " adde %1,%1,%4 \n\t" \ + " adde %2,%2,%5 \n\t" \ +:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc"); + +#elif defined(TFM_PPC64) +/* PPC64 */ + +#define COMBA_START + +#define CLEAR_CARRY \ + c0 = c1 = c2 = 0; + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define CARRY_FORWARD \ + do { c0 = c1; c1 = c2; c2 = 0; } while (0); + +#define COMBA_FINI + +/* multiplies point i and j, updates carry "c1" and digit c2 */ +#define SQRADD(i, j) \ +__asm__( \ + " mulld r16,%6,%6 \n\t" \ + " addc %0,%0,r16 \n\t" \ + " mulhdu r16,%6,%6 \n\t" \ + " adde %1,%1,r16 \n\t" \ + " addze %2,%2 \n\t" \ +:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r16","cc"); + +/* for squaring some of the terms are doubled... */ +#define SQRADD2(i, j) \ +__asm__( \ + " mulld r16,%6,%7 \n\t" \ + " mulhdu r17,%6,%7 \n\t" \ + " addc %0,%0,r16 \n\t" \ + " adde %1,%1,r17 \n\t" \ + " addze %2,%2 \n\t" \ + " addc %0,%0,r16 \n\t" \ + " adde %1,%1,r17 \n\t" \ + " addze %2,%2 \n\t" \ +:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16", "r17","cc"); + +#define SQRADDSC(i, j) \ +__asm__( \ + " mulld %0,%6,%7 \n\t" \ + " mulhdu %1,%6,%7 \n\t" \ + " xor %2,%2,%2 \n\t" \ +:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc"); + +#define SQRADDAC(i, j) \ +__asm__( \ + " mulld r16,%6,%7 \n\t" \ + " addc %0,%0,r16 \n\t" \ + " mulhdu r16,%6,%7 \n\t" \ + " adde %1,%1,r16 \n\t" \ + " addze %2,%2 \n\t" \ +:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r16", "cc"); + +#define SQRADDDB \ +__asm__( \ + " addc %0,%0,%3 \n\t" \ + " adde %1,%1,%4 \n\t" \ + " adde %2,%2,%5 \n\t" \ + " addc %0,%0,%3 \n\t" \ + " adde %1,%1,%4 \n\t" \ + " adde %2,%2,%5 \n\t" \ +:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc"); + + +#elif defined(TFM_AVR32) + +/* AVR32 */ + +#define COMBA_START + +#define CLEAR_CARRY \ + c0 = c1 = c2 = 0; + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define CARRY_FORWARD \ + do { c0 = c1; c1 = c2; c2 = 0; } while (0); + +#define COMBA_FINI + +/* multiplies point i and j, updates carry "c1" and digit c2 */ +#define SQRADD(i, j) \ +__asm__( \ + " mulu.d r2,%6,%6 \n\t" \ + " add %0,%0,r2 \n\t" \ + " adc %1,%1,r3 \n\t" \ + " acr %2 \n\t" \ +:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r2","r3"); + +/* for squaring some of the terms are doubled... */ +#define SQRADD2(i, j) \ +__asm__( \ + " mulu.d r2,%6,%7 \n\t" \ + " add %0,%0,r2 \n\t" \ + " adc %1,%1,r3 \n\t" \ + " acr %2, \n\t" \ + " add %0,%0,r2 \n\t" \ + " adc %1,%1,r3 \n\t" \ + " acr %2, \n\t" \ +:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2", "r3"); + +#define SQRADDSC(i, j) \ +__asm__( \ + " mulu.d r2,%6,%7 \n\t" \ + " mov %0,r2 \n\t" \ + " mov %1,r3 \n\t" \ + " eor %2,%2 \n\t" \ +:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "r2", "r3"); + +#define SQRADDAC(i, j) \ +__asm__( \ + " mulu.d r2,%6,%7 \n\t" \ + " add %0,%0,r2 \n\t" \ + " adc %1,%1,r3 \n\t" \ + " acr %2 \n\t" \ +:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r2", "r3"); + +#define SQRADDDB \ +__asm__( \ + " add %0,%0,%3 \n\t" \ + " adc %1,%1,%4 \n\t" \ + " adc %2,%2,%5 \n\t" \ + " add %0,%0,%3 \n\t" \ + " adc %1,%1,%4 \n\t" \ + " adc %2,%2,%5 \n\t" \ +:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc"); + +#elif defined(TFM_MIPS) + +/* MIPS */ +#define COMBA_START + +#define CLEAR_CARRY \ + c0 = c1 = c2 = 0; + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define CARRY_FORWARD \ + do { c0 = c1; c1 = c2; c2 = 0; } while (0); + +#define COMBA_FINI + +/* multiplies point i and j, updates carry "c1" and digit c2 */ +#define SQRADD(i, j) \ +__asm__( \ + " multu %6,%6 \n\t" \ + " mflo $12 \n\t" \ + " mfhi $13 \n\t" \ + " addu %0,%0,$12 \n\t" \ + " sltu $12,%0,$12 \n\t" \ + " addu %1,%1,$13 \n\t" \ + " sltu $13,%1,$13 \n\t" \ + " addu %1,%1,$12 \n\t" \ + " sltu $12,%1,$12 \n\t" \ + " addu %2,%2,$13 \n\t" \ + " addu %2,%2,$12 \n\t" \ +:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"$12","$13"); + +/* for squaring some of the terms are doubled... */ +#define SQRADD2(i, j) \ +__asm__( \ + " multu %6,%7 \n\t" \ + " mflo $12 \n\t" \ + " mfhi $13 \n\t" \ + \ + " addu %0,%0,$12 \n\t" \ + " sltu $14,%0,$12 \n\t" \ + " addu %1,%1,$13 \n\t" \ + " sltu $15,%1,$13 \n\t" \ + " addu %1,%1,$14 \n\t" \ + " sltu $14,%1,$14 \n\t" \ + " addu %2,%2,$15 \n\t" \ + " addu %2,%2,$14 \n\t" \ + \ + " addu %0,%0,$12 \n\t" \ + " sltu $14,%0,$12 \n\t" \ + " addu %1,%1,$13 \n\t" \ + " sltu $15,%1,$13 \n\t" \ + " addu %1,%1,$14 \n\t" \ + " sltu $14,%1,$14 \n\t" \ + " addu %2,%2,$15 \n\t" \ + " addu %2,%2,$14 \n\t" \ +:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12", "$13", "$14", "$15"); + +#define SQRADDSC(i, j) \ +__asm__( \ + " multu %6,%7 \n\t" \ + " mflo %0 \n\t" \ + " mfhi %1 \n\t" \ + " xor %2,%2,%2 \n\t" \ +:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc"); + +#define SQRADDAC(i, j) \ +__asm__( \ + " multu %6,%7 \n\t" \ + " mflo $12 \n\t" \ + " mfhi $13 \n\t" \ + " addu %0,%0,$12 \n\t" \ + " sltu $12,%0,$12 \n\t" \ + " addu %1,%1,$13 \n\t" \ + " sltu $13,%1,$13 \n\t" \ + " addu %1,%1,$12 \n\t" \ + " sltu $12,%1,$12 \n\t" \ + " addu %2,%2,$13 \n\t" \ + " addu %2,%2,$12 \n\t" \ +:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"$12", "$13", "$14"); + +#define SQRADDDB \ +__asm__( \ + " addu %0,%0,%3 \n\t" \ + " sltu $10,%0,%3 \n\t" \ + " addu %1,%1,$10 \n\t" \ + " sltu $10,%1,$10 \n\t" \ + " addu %1,%1,%4 \n\t" \ + " sltu $11,%1,%4 \n\t" \ + " addu %2,%2,$10 \n\t" \ + " addu %2,%2,$11 \n\t" \ + " addu %2,%2,%5 \n\t" \ + \ + " addu %0,%0,%3 \n\t" \ + " sltu $10,%0,%3 \n\t" \ + " addu %1,%1,$10 \n\t" \ + " sltu $10,%1,$10 \n\t" \ + " addu %1,%1,%4 \n\t" \ + " sltu $11,%1,%4 \n\t" \ + " addu %2,%2,$10 \n\t" \ + " addu %2,%2,$11 \n\t" \ + " addu %2,%2,%5 \n\t" \ +:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "$10", "$11"); + +#else + +#define TFM_ISO + +/* ISO C portable code */ + +#define COMBA_START + +#define CLEAR_CARRY \ + c0 = c1 = c2 = 0; + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define CARRY_FORWARD \ + do { c0 = c1; c1 = c2; c2 = 0; } while (0); + +#define COMBA_FINI + +/* multiplies point i and j, updates carry "c1" and digit c2 */ +#define SQRADD(i, j) \ + do { fp_word t; \ + t = c0 + ((fp_word)i) * ((fp_word)j); c0 = (fp_digit)t; \ + t = c1 + (t >> DIGIT_BIT); c1 = (fp_digit)t; \ + c2 +=(fp_digit) (t >> DIGIT_BIT); \ + } while (0); + + +/* for squaring some of the terms are doubled... */ +#define SQRADD2(i, j) \ + do { fp_word t; \ + t = ((fp_word)i) * ((fp_word)j); \ + tt = (fp_word)c0 + t; c0 = (fp_digit)tt; \ + tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = (fp_digit)tt; \ + c2 +=(fp_digit)(tt >> DIGIT_BIT); \ + tt = (fp_word)c0 + t; c0 = (fp_digit)tt; \ + tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = (fp_digit)tt; \ + c2 +=(fp_digit)(tt >> DIGIT_BIT); \ + } while (0); + +#define SQRADDSC(i, j) \ + do { fp_word t; \ + t = ((fp_word)i) * ((fp_word)j); \ + sc0 = (fp_digit)t; sc1 = (t >> DIGIT_BIT); sc2 = 0; \ + } while (0); + +#define SQRADDAC(i, j) \ + do { fp_word t; \ + t = sc0 + ((fp_word)i) * ((fp_word)j); sc0 = (fp_digit)t; \ + t = sc1 + (t >> DIGIT_BIT); sc1 = (fp_digit)t; \ + sc2 += (fp_digit)(t >> DIGIT_BIT); \ + } while (0); + +#define SQRADDDB \ + do { fp_word t; \ + t = ((fp_word)sc0) + ((fp_word)sc0) + c0; c0 = (fp_digit)t; \ + t = ((fp_word)sc1) + ((fp_word)sc1) + c1 + (t >> DIGIT_BIT); \ + c1 = (fp_digit)t; \ + c2 = c2 + (fp_digit)(((fp_word)sc2) + ((fp_word)sc2) + (t >> DIGIT_BIT)); \ + } while (0); + +#endif + +#ifdef TFM_SMALL_SET + #include "fp_sqr_comba_small_set.i" +#endif + +#if defined(TFM_SQR3) && FP_SIZE >= 6 + #include "fp_sqr_comba_3.i" +#endif +#if defined(TFM_SQR4) && FP_SIZE >= 8 + #include "fp_sqr_comba_4.i" +#endif +#if defined(TFM_SQR6) && FP_SIZE >= 12 + #include "fp_sqr_comba_6.i" +#endif +#if defined(TFM_SQR7) && FP_SIZE >= 14 + #include "fp_sqr_comba_7.i" +#endif +#if defined(TFM_SQR8) && FP_SIZE >= 16 + #include "fp_sqr_comba_8.i" +#endif +#if defined(TFM_SQR9) && FP_SIZE >= 18 + #include "fp_sqr_comba_9.i" +#endif +#if defined(TFM_SQR12) && FP_SIZE >= 24 + #include "fp_sqr_comba_12.i" +#endif +#if defined(TFM_SQR17) && FP_SIZE >= 34 + #include "fp_sqr_comba_17.i" +#endif +#if defined(TFM_SQR20) && FP_SIZE >= 40 + #include "fp_sqr_comba_20.i" +#endif +#if defined(TFM_SQR24) && FP_SIZE >= 48 + #include "fp_sqr_comba_24.i" +#endif +#if defined(TFM_SQR28) && FP_SIZE >= 56 + #include "fp_sqr_comba_28.i" +#endif +#if defined(TFM_SQR32) && FP_SIZE >= 64 + #include "fp_sqr_comba_32.i" +#endif +#if defined(TFM_SQR48) && FP_SIZE >= 96 + #include "fp_sqr_comba_48.i" +#endif +#if defined(TFM_SQR64) && FP_SIZE >= 128 + #include "fp_sqr_comba_64.i" +#endif +/* end fp_sqr_comba.c asm */ + +/* start fp_mul_comba.c asm */ +/* these are the combas. Worship them. */ +#if defined(TFM_X86) +/* Generic x86 optimized code */ + +/* anything you need at the start */ +#define COMBA_START + +/* clear the chaining variables */ +#define COMBA_CLEAR \ + c0 = c1 = c2 = 0; + +/* forward the carry to the next digit */ +#define COMBA_FORWARD \ + do { c0 = c1; c1 = c2; c2 = 0; } while (0); + +/* store the first sum */ +#define COMBA_STORE(x) \ + x = c0; + +/* store the second sum [carry] */ +#define COMBA_STORE2(x) \ + x = c1; + +/* anything you need at the end */ +#define COMBA_FINI + +/* this should multiply i and j */ +#define MULADD(i, j) \ +__asm__( \ + "movl %6,%%eax \n\t" \ + "mull %7 \n\t" \ + "addl %%eax,%0 \n\t" \ + "adcl %%edx,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","cc"); + +#elif defined(TFM_X86_64) +/* x86-64 optimized */ + +/* anything you need at the start */ +#define COMBA_START + +/* clear the chaining variables */ +#define COMBA_CLEAR \ + c0 = c1 = c2 = 0; + +/* forward the carry to the next digit */ +#define COMBA_FORWARD \ + do { c0 = c1; c1 = c2; c2 = 0; } while (0); + +/* store the first sum */ +#define COMBA_STORE(x) \ + x = c0; + +/* store the second sum [carry] */ +#define COMBA_STORE2(x) \ + x = c1; + +/* anything you need at the end */ +#define COMBA_FINI + +/* this should multiply i and j */ +#define MULADD(i, j) \ +__asm__ ( \ + "movq %6,%%rax \n\t" \ + "mulq %7 \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq %%rdx,%1 \n\t" \ + "adcq $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","cc"); + + +#if defined(HAVE_INTEL_MULX) +#define MULADD_BODY(a,b,c) \ + __asm__ volatile( \ + "movq %[a0],%%rdx\n\t" \ + "xorq %%rcx, %%rcx\n\t" \ + "movq 0(%[cp]),%%r8\n\t" \ + "movq 8(%[cp]),%%r9\n\t" \ + "movq 16(%[cp]),%%r10\n\t" \ + "movq 24(%[cp]),%%r11\n\t" \ + "movq 32(%[cp]),%%r12\n\t" \ + "movq 40(%[cp]),%%r13\n\t" \ + \ + "mulx (%[bp]),%%rax, %%rbx\n\t" \ + "adoxq %%rax, %%r8\n\t" \ + "mulx 8(%[bp]),%%rax, %%rcx\n\t" \ + "adcxq %%rbx, %%r9\n\t" \ + "adoxq %%rax, %%r9\n\t" \ + "mulx 16(%[bp]),%%rax, %%rbx\n\t" \ + "adcxq %%rcx, %%r10\n\t" \ + "adoxq %%rax, %%r10\n\t" \ + "mulx 24(%[bp]),%%rax, %%rcx\n\t" \ + "adcxq %%rbx, %%r11\n\t" \ + "adoxq %%rax, %%r11\n\t" \ + "adcxq %%rcx, %%r12\n\t" \ + "mov $0, %%rdx\n\t" \ + "adox %%rdx, %%r12\n\t" \ + "adcx %%rdx, %%r13\n\t" \ + \ + "movq %%r8, 0(%[cp])\n\t" \ + "movq %%r9, 8(%[cp])\n\t" \ + "movq %%r10, 16(%[cp])\n\t" \ + "movq %%r11, 24(%[cp])\n\t" \ + "movq %%r12, 32(%[cp])\n\t" \ + "movq %%r13, 40(%[cp])\n\t" \ + : \ + : [a0] "r" (a->dp[ix]), [bp] "r" (&(b->dp[iy])), \ + [cp] "r" (&(c->dp[iz])) \ + : "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", \ + "%rdx", "%rax", "%rcx", "%rbx" \ + ) + +#define TFM_INTEL_MUL_COMBA(a, b, c) \ + for (iz=0; izdp[iz] = 0; \ + for (ix=0; ixused; ix++) { \ + for (iy=0; iyused; iy+=4) { \ + iz = ix + iy; \ + MULADD_BODY(a, b, c); \ + } \ + } +#endif + +#elif defined(TFM_SSE2) +/* use SSE2 optimizations */ + +/* anything you need at the start */ +#define COMBA_START + +/* clear the chaining variables */ +#define COMBA_CLEAR \ + c0 = c1 = c2 = 0; + +/* forward the carry to the next digit */ +#define COMBA_FORWARD \ + do { c0 = c1; c1 = c2; c2 = 0; } while (0); + +/* store the first sum */ +#define COMBA_STORE(x) \ + x = c0; + +/* store the second sum [carry] */ +#define COMBA_STORE2(x) \ + x = c1; + +/* anything you need at the end */ +#define COMBA_FINI \ + __asm__("emms"); + +/* this should multiply i and j */ +#define MULADD(i, j) \ +__asm__( \ + "movd %6,%%mm0 \n\t" \ + "movd %7,%%mm1 \n\t" \ + "pmuludq %%mm1,%%mm0\n\t" \ + "movd %%mm0,%%eax \n\t" \ + "psrlq $32,%%mm0 \n\t" \ + "addl %%eax,%0 \n\t" \ + "movd %%mm0,%%eax \n\t" \ + "adcl %%eax,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","cc"); + +#elif defined(TFM_ARM) +/* ARM code */ + +#define COMBA_START + +#define COMBA_CLEAR \ + c0 = c1 = c2 = 0; + +#define COMBA_FORWARD \ + do { c0 = c1; c1 = c2; c2 = 0; } while (0); + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define COMBA_FINI + +#define MULADD(i, j) \ +__asm__( \ +" UMULL r0,r1,%6,%7 \n\t" \ +" ADDS %0,%0,r0 \n\t" \ +" ADCS %1,%1,r1 \n\t" \ +" ADC %2,%2,#0 \n\t" \ +:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "cc"); + +#elif defined(TFM_PPC32) +/* For 32-bit PPC */ + +#define COMBA_START + +#define COMBA_CLEAR \ + c0 = c1 = c2 = 0; + +#define COMBA_FORWARD \ + do { c0 = c1; c1 = c2; c2 = 0; } while (0); + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define COMBA_FINI + +/* untested: will mulhwu change the flags? Docs say no */ +#define MULADD(i, j) \ +__asm__( \ + " mullw 16,%6,%7 \n\t" \ + " addc %0,%0,16 \n\t" \ + " mulhwu 16,%6,%7 \n\t" \ + " adde %1,%1,16 \n\t" \ + " addze %2,%2 \n\t" \ +:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16"); + +#elif defined(TFM_PPC64) +/* For 64-bit PPC */ + +#define COMBA_START + +#define COMBA_CLEAR \ + c0 = c1 = c2 = 0; + +#define COMBA_FORWARD \ + do { c0 = c1; c1 = c2; c2 = 0; } while (0); + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define COMBA_FINI + +/* untested: will mulhdu change the flags? Docs say no */ +#define MULADD(i, j) \ +____asm__( \ + " mulld r16,%6,%7 \n\t" \ + " addc %0,%0,16 \n\t" \ + " mulhdu r16,%6,%7 \n\t" \ + " adde %1,%1,16 \n\t" \ + " addze %2,%2 \n\t" \ +:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16"); + +#elif defined(TFM_AVR32) + +/* ISO C code */ + +#define COMBA_START + +#define COMBA_CLEAR \ + c0 = c1 = c2 = 0; + +#define COMBA_FORWARD \ + do { c0 = c1; c1 = c2; c2 = 0; } while (0); + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define COMBA_FINI + +#define MULADD(i, j) \ +____asm__( \ + " mulu.d r2,%6,%7 \n\t"\ + " add %0,r2 \n\t"\ + " adc %1,%1,r3 \n\t"\ + " acr %2 \n\t"\ +:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2","r3"); + +#elif defined(TFM_MIPS) + +/* MIPS */ +#define COMBA_START + +#define COMBA_CLEAR \ + c0 = c1 = c2 = 0; + +#define COMBA_FORWARD \ + do { c0 = c1; c1 = c2; c2 = 0; } while (0); + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define COMBA_FINI + +#define MULADD(i, j) \ +__asm__( \ + " multu %6,%7 \n\t" \ + " mflo $12 \n\t" \ + " mfhi $13 \n\t" \ + " addu %0,%0,$12 \n\t" \ + " sltu $12,%0,$12 \n\t" \ + " addu %1,%1,$13 \n\t" \ + " sltu $13,%1,$13 \n\t" \ + " addu %1,%1,$12 \n\t" \ + " sltu $12,%1,$12 \n\t" \ + " addu %2,%2,$13 \n\t" \ + " addu %2,%2,$12 \n\t" \ +:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12","$13"); + +#else +/* ISO C code */ + +#define COMBA_START + +#define COMBA_CLEAR \ + c0 = c1 = c2 = 0; + +#define COMBA_FORWARD \ + do { c0 = c1; c1 = c2; c2 = 0; } while (0); + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define COMBA_FINI + +#define MULADD(i, j) \ + do { fp_word t; \ + t = (fp_word)c0 + ((fp_word)i) * ((fp_word)j); \ + c0 = (fp_digit)t; \ + t = (fp_word)c1 + (t >> DIGIT_BIT); \ + c1 = (fp_digit)t; \ + c2 += (fp_digit)(t >> DIGIT_BIT); \ + } while (0); + +#endif + + +#ifdef TFM_SMALL_SET + #include "fp_mul_comba_small_set.i" +#endif + +#if defined(TFM_MUL3) && FP_SIZE >= 6 + #include "fp_mul_comba_3.i" +#endif +#if defined(TFM_MUL4) && FP_SIZE >= 8 + #include "fp_mul_comba_4.i" +#endif +#if defined(TFM_MUL6) && FP_SIZE >= 12 + #include "fp_mul_comba_6.i" +#endif +#if defined(TFM_MUL7) && FP_SIZE >= 14 + #include "fp_mul_comba_7.i" +#endif +#if defined(TFM_MUL8) && FP_SIZE >= 16 + #include "fp_mul_comba_8.i" +#endif +#if defined(TFM_MUL9) && FP_SIZE >= 18 + #include "fp_mul_comba_9.i" +#endif +#if defined(TFM_MUL12) && FP_SIZE >= 24 + #include "fp_mul_comba_12.i" +#endif +#if defined(TFM_MUL17) && FP_SIZE >= 34 + #include "fp_mul_comba_17.i" +#endif +#if defined(TFM_MUL20) && FP_SIZE >= 40 + #include "fp_mul_comba_20.i" +#endif +#if defined(TFM_MUL24) && FP_SIZE >= 48 + #include "fp_mul_comba_24.i" +#endif +#if defined(TFM_MUL28) && FP_SIZE >= 56 + #include "fp_mul_comba_28.i" +#endif +#if defined(TFM_MUL32) && FP_SIZE >= 64 + #include "fp_mul_comba_32.i" +#endif +#if defined(TFM_MUL48) && FP_SIZE >= 96 + #include "fp_mul_comba_48.i" +#endif +#if defined(TFM_MUL64) && FP_SIZE >= 128 + #include "fp_mul_comba_64.i" +#endif + +/* end fp_mul_comba.c asm */ + diff --git a/client/wolfssl/wolfcrypt/src/asn.c b/client/wolfssl/wolfcrypt/src/asn.c new file mode 100644 index 0000000..c4e6004 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/asn.c @@ -0,0 +1,17525 @@ +/* asn.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +/* +ASN Options: + * NO_ASN_TIME: Disables time parts of the ASN code for systems without an RTC + or wishing to save space. + * IGNORE_NAME_CONSTRAINTS: Skip ASN name checks. + * ASN_DUMP_OID: Allows dump of OID information for debugging. + * RSA_DECODE_EXTRA: Decodes extra information in RSA public key. + * WOLFSSL_CERT_GEN: Cert generation. Saves extra certificate info in GetName. + * WOLFSSL_NO_ASN_STRICT: Disable strict RFC compliance checks to + restore 3.13.0 behavior. + * WOLFSSL_NO_OCSP_OPTIONAL_CERTS: Skip optional OCSP certs (responder issuer + must still be trusted) + * WOLFSSL_NO_TRUSTED_CERTS_VERIFY: Workaround for situation where entire cert + chain is not loaded. This only matches on subject and public key and + does not perform a PKI validation, so it is not a secure solution. + Only enabled for OCSP. + * WOLFSSL_NO_OCSP_ISSUER_CHECK: Can be defined for backwards compatibility to + disable checking of OCSP subject hash with issuer hash. + * WOLFSSL_SMALL_CERT_VERIFY: Verify the certificate signature without using + DecodedCert. Doubles up on some code but allows smaller dynamic memory + usage. + * WOLFSSL_NO_OCSP_DATE_CHECK: Disable date checks for OCSP responses. This + may be required when the system's real-time clock is not very accurate. + It is recommended to enforce the nonce check instead if possible. + * WOLFSSL_FORCE_OCSP_NONCE_CHECK: Require nonces to be available in OCSP + responses. The nonces are optional and may not be supported by all + responders. If it can be ensured that the used responder sends nonces this + option may improve security. +*/ + +#ifndef NO_ASN + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#ifndef NO_RC4 + #include +#endif + +#ifdef HAVE_NTRU + #include "libntruencrypt/ntru_crypto.h" +#endif + +#if defined(WOLFSSL_SHA512) || defined(WOLFSSL_SHA384) + #include +#endif + +#ifndef NO_SHA256 + #include +#endif + +#ifdef HAVE_ECC + #include +#endif + +#ifdef HAVE_ED25519 + #include +#endif + +#ifdef HAVE_ED448 + #include +#endif + +#ifndef NO_RSA + #include +#if defined(WOLFSSL_XILINX_CRYPT) || defined(WOLFSSL_CRYPTOCELL) +extern int wc_InitRsaHw(RsaKey* key); +#endif +#endif + +#ifdef WOLF_CRYPTO_CB + #include +#endif + +#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) + #include +#endif + +#ifdef _MSC_VER + /* 4996 warning to use MS extensions e.g., strcpy_s instead of XSTRNCPY */ + #pragma warning(disable: 4996) +#endif + +#define ERROR_OUT(err, eLabel) { ret = (err); goto eLabel; } + +#if defined(HAVE_SELFTEST) || ( !defined(NO_SKID) && \ + ( !defined(HAVE_FIPS) || \ + !defined(HAVE_FIPS_VERSION) )) + #ifndef WOLFSSL_AES_KEY_SIZE_ENUM + #define WOLFSSL_AES_KEY_SIZE_ENUM + enum Asn_Misc { + AES_IV_SIZE = 16, + AES_128_KEY_SIZE = 16, + AES_192_KEY_SIZE = 24, + AES_256_KEY_SIZE = 32 + }; + #endif +#endif +#ifdef WOLFSSL_RENESAS_TSIP_TLS +void tsip_inform_key_position(const word32 key_n_start, + const word32 key_n_len, const word32 key_e_start, + const word32 key_e_len); +int tsip_tls_CertVerify(const byte *cert, word32 certSz, + const byte *signature, word32 sigSz, + word32 key_n_start, word32 key_n_len, + word32 key_e_start, word32 key_e_len, + byte *tsip_encRsaKeyIdx); +#endif +int GetLength(const byte* input, word32* inOutIdx, int* len, + word32 maxIdx) +{ + return GetLength_ex(input, inOutIdx, len, maxIdx, 1); +} + + +/* give option to check length value found against index. 1 to check 0 to not */ +int GetLength_ex(const byte* input, word32* inOutIdx, int* len, + word32 maxIdx, int check) +{ + int length = 0; + word32 idx = *inOutIdx; + byte b; + + *len = 0; /* default length */ + + if ((idx + 1) > maxIdx) { /* for first read */ + WOLFSSL_MSG("GetLength bad index on input"); + return BUFFER_E; + } + + b = input[idx++]; + if (b >= ASN_LONG_LENGTH) { + word32 bytes = b & 0x7F; + + if ((idx + bytes) > maxIdx) { /* for reading bytes */ + WOLFSSL_MSG("GetLength bad long length"); + return BUFFER_E; + } + + if (bytes > sizeof(length)) { + return ASN_PARSE_E; + } + while (bytes--) { + b = input[idx++]; + length = (length << 8) | b; + } + if (length < 0) { + return ASN_PARSE_E; + } + } + else + length = b; + + if (check && (idx + length) > maxIdx) { /* for user of length */ + WOLFSSL_MSG("GetLength value exceeds buffer length"); + return BUFFER_E; + } + + *inOutIdx = idx; + if (length > 0) + *len = length; + + return length; +} + + +/* input : buffer to read from + * inOutIdx : index to start reading from, gets advanced by 1 if successful + * maxIdx : maximum index value + * tag : ASN tag value found + * + * returns 0 on success + */ +int GetASNTag(const byte* input, word32* inOutIdx, byte* tag, word32 maxIdx) +{ + word32 idx; + + if (tag == NULL || inOutIdx == NULL || input == NULL) { + return BAD_FUNC_ARG; + } + + idx = *inOutIdx; + if (idx + ASN_TAG_SZ > maxIdx) { + WOLFSSL_MSG("Buffer too small for ASN tag"); + return BUFFER_E; + } + + *tag = input[idx]; + *inOutIdx = idx + ASN_TAG_SZ; + return 0; +} + + +static int GetASNHeader_ex(const byte* input, byte tag, word32* inOutIdx, int* len, + word32 maxIdx, int check) +{ + word32 idx = *inOutIdx; + byte tagFound; + int length; + + if (GetASNTag(input, &idx, &tagFound, maxIdx) != 0) + return ASN_PARSE_E; + + if (tagFound != tag) + return ASN_PARSE_E; + + if (GetLength_ex(input, &idx, &length, maxIdx, check) < 0) + return ASN_PARSE_E; + + *len = length; + *inOutIdx = idx; + return length; +} + + +/* Get the DER/BER encoding of an ASN.1 header. + * + * input Buffer holding DER/BER encoded data. + * tag ASN.1 tag value expected in header. + * inOutIdx Current index into buffer to parse. + * len The number of bytes in the ASN.1 data. + * maxIdx Length of data in buffer. + * returns BUFFER_E when there is not enough data to parse. + * ASN_PARSE_E when the expected tag is not found or length is invalid. + * Otherwise, the number of bytes in the ASN.1 data. + */ +static int GetASNHeader(const byte* input, byte tag, word32* inOutIdx, int* len, + word32 maxIdx) +{ + return GetASNHeader_ex(input, tag, inOutIdx, len, maxIdx, 1); +} + +static int GetHeader(const byte* input, byte* tag, word32* inOutIdx, int* len, + word32 maxIdx, int check) +{ + word32 idx = *inOutIdx; + int length; + + if ((idx + 1) > maxIdx) + return BUFFER_E; + + *tag = input[idx++]; + + if (GetLength_ex(input, &idx, &length, maxIdx, check) < 0) + return ASN_PARSE_E; + + *len = length; + *inOutIdx = idx; + return length; +} + +int GetSequence(const byte* input, word32* inOutIdx, int* len, + word32 maxIdx) +{ + return GetASNHeader(input, ASN_SEQUENCE | ASN_CONSTRUCTED, inOutIdx, len, + maxIdx); +} + + +int GetSequence_ex(const byte* input, word32* inOutIdx, int* len, + word32 maxIdx, int check) +{ + return GetASNHeader_ex(input, ASN_SEQUENCE | ASN_CONSTRUCTED, inOutIdx, len, + maxIdx, check); +} + + +int GetSet(const byte* input, word32* inOutIdx, int* len, + word32 maxIdx) +{ + return GetASNHeader(input, ASN_SET | ASN_CONSTRUCTED, inOutIdx, len, + maxIdx); +} + + +int GetSet_ex(const byte* input, word32* inOutIdx, int* len, + word32 maxIdx, int check) +{ + return GetASNHeader_ex(input, ASN_SET | ASN_CONSTRUCTED, inOutIdx, len, + maxIdx, check); +} + +/* Get the DER/BER encoded ASN.1 NULL element. + * Ensure that the all fields are as expected and move index past the element. + * + * input Buffer holding DER/BER encoded data. + * inOutIdx Current index into buffer to parse. + * maxIdx Length of data in buffer. + * returns BUFFER_E when there is not enough data to parse. + * ASN_TAG_NULL_E when the NULL tag is not found. + * ASN_EXPECT_0_E when the length is not zero. + * Otherwise, 0 to indicate success. + */ +static int GetASNNull(const byte* input, word32* inOutIdx, word32 maxIdx) +{ + word32 idx = *inOutIdx; + byte b; + + if ((idx + 2) > maxIdx) + return BUFFER_E; + + b = input[idx++]; + if (b != ASN_TAG_NULL) + return ASN_TAG_NULL_E; + + if (input[idx++] != 0) + return ASN_EXPECT_0_E; + + *inOutIdx = idx; + return 0; +} + +/* Set the DER/BER encoding of the ASN.1 NULL element. + * + * output Buffer to write into. + * returns the number of bytes added to the buffer. + */ +static int SetASNNull(byte* output) +{ + output[0] = ASN_TAG_NULL; + output[1] = 0; + + return 2; +} + +/* Get the DER/BER encoding of an ASN.1 BOOLEAN. + * + * input Buffer holding DER/BER encoded data. + * inOutIdx Current index into buffer to parse. + * maxIdx Length of data in buffer. + * returns BUFFER_E when there is not enough data to parse. + * ASN_PARSE_E when the BOOLEAN tag is not found or length is not 1. + * Otherwise, 0 to indicate the value was false and 1 to indicate true. + */ +static int GetBoolean(const byte* input, word32* inOutIdx, word32 maxIdx) +{ + word32 idx = *inOutIdx; + byte b; + + if ((idx + 3) > maxIdx) + return BUFFER_E; + + b = input[idx++]; + if (b != ASN_BOOLEAN) + return ASN_PARSE_E; + + if (input[idx++] != 1) + return ASN_PARSE_E; + + b = input[idx++] != 0; + + *inOutIdx = idx; + return b; +} + +#ifdef ASN1_SET_BOOLEAN +/* Set the DER/BER encoding of the ASN.1 NULL element. + * Note: Function not required as yet. + * + * val Boolean value to encode. + * output Buffer to write into. + * returns the number of bytes added to the buffer. + */ +static int SetBoolean(int val, byte* output) +{ + output[0] = ASN_BOOLEAN; + output[1] = 1; + output[2] = val ? -1 : 0; + + return 3; +} +#endif + +/* Get the DER/BER encoding of an ASN.1 OCTET_STRING header. + * + * input Buffer holding DER/BER encoded data. + * inOutIdx Current index into buffer to parse. + * len The number of bytes in the ASN.1 data. + * maxIdx Length of data in buffer. + * returns BUFFER_E when there is not enough data to parse. + * ASN_PARSE_E when the OCTET_STRING tag is not found or length is + * invalid. + * Otherwise, the number of bytes in the ASN.1 data. + */ +int GetOctetString(const byte* input, word32* inOutIdx, int* len, + word32 maxIdx) +{ + return GetASNHeader(input, ASN_OCTET_STRING, inOutIdx, len, maxIdx); +} + +/* Get the DER/BER encoding of an ASN.1 INTEGER header. + * Removes the leading zero byte when found. + * + * input Buffer holding DER/BER encoded data. + * inOutIdx Current index into buffer to parse. + * len The number of bytes in the ASN.1 data (excluding any leading zero). + * maxIdx Length of data in buffer. + * returns BUFFER_E when there is not enough data to parse. + * ASN_PARSE_E when the INTEGER tag is not found, length is invalid, + * or invalid use of or missing leading zero. + * Otherwise, 0 to indicate success. + */ +static int GetASNInt(const byte* input, word32* inOutIdx, int* len, + word32 maxIdx) +{ + int ret; + + ret = GetASNHeader(input, ASN_INTEGER, inOutIdx, len, maxIdx); + if (ret < 0) + return ret; + + if (*len > 0) { + /* remove leading zero, unless there is only one 0x00 byte */ + if ((input[*inOutIdx] == 0x00) && (*len > 1)) { + (*inOutIdx)++; + (*len)--; + + if (*len > 0 && (input[*inOutIdx] & 0x80) == 0) + return ASN_PARSE_E; + } + } + + return 0; +} + +/* Get the DER/BER encoding of an ASN.1 INTEGER that has a value of no more than + * 7 bits. + * + * input Buffer holding DER/BER encoded data. + * inOutIdx Current index into buffer to parse. + * maxIdx Length of data in buffer. + * returns BUFFER_E when there is not enough data to parse. + * ASN_PARSE_E when the INTEGER tag is not found or length is invalid. + * Otherwise, the 7-bit value. + */ +static int GetInteger7Bit(const byte* input, word32* inOutIdx, word32 maxIdx) +{ + word32 idx = *inOutIdx; + byte b; + + if ((idx + 3) > maxIdx) + return BUFFER_E; + + if (GetASNTag(input, &idx, &b, maxIdx) != 0) + return ASN_PARSE_E; + if (b != ASN_INTEGER) + return ASN_PARSE_E; + if (input[idx++] != 1) + return ASN_PARSE_E; + b = input[idx++]; + + *inOutIdx = idx; + return b; +} + + +#if !defined(NO_DSA) && !defined(NO_SHA) +static const char sigSha1wDsaName[] = "SHAwDSA"; +#endif /* NO_DSA */ +#ifndef NO_RSA +#ifdef WOLFSSL_MD2 + static const char sigMd2wRsaName[] = "md2WithRSAEncryption"; +#endif +#ifndef NO_MD5 + static const char sigMd5wRsaName[] = "md5WithRSAEncryption"; +#endif +#ifndef NO_SHA + static const char sigSha1wRsaName[] = "sha1WithRSAEncryption"; +#endif +#ifdef WOLFSSL_SHA224 + static const char sigSha224wRsaName[] = "sha224WithRSAEncryption"; +#endif +#ifndef NO_SHA256 + static const char sigSha256wRsaName[] = "sha256WithRSAEncryption"; +#endif +#ifdef WOLFSSL_SHA384 + static const char sigSha384wRsaName[] = "sha384WithRSAEncryption"; +#endif +#ifdef WOLFSSL_SHA512 + static const char sigSha512wRsaName[] = "sha512WithRSAEncryption"; +#endif +#endif /* NO_RSA */ +#ifdef HAVE_ECC +#ifndef NO_SHA + static const char sigSha1wEcdsaName[] = "SHAwECDSA"; +#endif +#ifdef WOLFSSL_SHA224 + static const char sigSha224wEcdsaName[] = "SHA224wECDSA"; +#endif +#ifndef NO_SHA256 + static const char sigSha256wEcdsaName[] = "SHA256wECDSA"; +#endif +#ifdef WOLFSSL_SHA384 + static const char sigSha384wEcdsaName[] = "SHA384wECDSA"; +#endif +#ifdef WOLFSSL_SHA512 + static const char sigSha512wEcdsaName[] = "SHA512wECDSA"; +#endif +#endif /* HAVE_ECC */ +static const char sigUnknownName[] = "Unknown"; + + +/* Get the human readable string for a signature type + * + * oid Oid value for signature + */ +const char* GetSigName(int oid) { + switch (oid) { + #if !defined(NO_DSA) && !defined(NO_SHA) + case CTC_SHAwDSA: + return sigSha1wDsaName; + #endif /* NO_DSA && NO_SHA */ + #ifndef NO_RSA + #ifdef WOLFSSL_MD2 + case CTC_MD2wRSA: + return sigMd2wRsaName; + #endif + #ifndef NO_MD5 + case CTC_MD5wRSA: + return sigMd5wRsaName; + #endif + #ifndef NO_SHA + case CTC_SHAwRSA: + return sigSha1wRsaName; + #endif + #ifdef WOLFSSL_SHA224 + case CTC_SHA224wRSA: + return sigSha224wRsaName; + #endif + #ifndef NO_SHA256 + case CTC_SHA256wRSA: + return sigSha256wRsaName; + #endif + #ifdef WOLFSSL_SHA384 + case CTC_SHA384wRSA: + return sigSha384wRsaName; + #endif + #ifdef WOLFSSL_SHA512 + case CTC_SHA512wRSA: + return sigSha512wRsaName; + #endif + #endif /* NO_RSA */ + #ifdef HAVE_ECC + #ifndef NO_SHA + case CTC_SHAwECDSA: + return sigSha1wEcdsaName; + #endif + #ifdef WOLFSSL_SHA224 + case CTC_SHA224wECDSA: + return sigSha224wEcdsaName; + #endif + #ifndef NO_SHA256 + case CTC_SHA256wECDSA: + return sigSha256wEcdsaName; + #endif + #ifdef WOLFSSL_SHA384 + case CTC_SHA384wECDSA: + return sigSha384wEcdsaName; + #endif + #ifdef WOLFSSL_SHA512 + case CTC_SHA512wECDSA: + return sigSha512wEcdsaName; + #endif + #endif /* HAVE_ECC */ + default: + return sigUnknownName; + } +} + + +#if !defined(NO_DSA) || defined(HAVE_ECC) || !defined(NO_CERTS) || \ + (!defined(NO_RSA) && \ + (defined(WOLFSSL_CERT_GEN) || \ + ((defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA)) && !defined(HAVE_USER_RSA)))) +/* Set the DER/BER encoding of the ASN.1 INTEGER header. + * + * len Length of data to encode. + * firstByte First byte of data, most significant byte of integer, to encode. + * output Buffer to write into. + * returns the number of bytes added to the buffer. + */ +static int SetASNInt(int len, byte firstByte, byte* output) +{ + word32 idx = 0; + + if (output) + output[idx] = ASN_INTEGER; + idx++; + if (firstByte & 0x80) + len++; + idx += SetLength(len, output ? output + idx : NULL); + if (firstByte & 0x80) { + if (output) + output[idx] = 0x00; + idx++; + } + + return idx; +} +#endif + +#if !defined(NO_DSA) || defined(HAVE_ECC) || (defined(WOLFSSL_CERT_GEN) && \ + !defined(NO_RSA)) || ((defined(WOLFSSL_KEY_GEN) || \ + defined(OPENSSL_EXTRA)) && !defined(NO_RSA) && !defined(HAVE_USER_RSA)) +/* Set the DER/BER encoding of the ASN.1 INTEGER element with an mp_int. + * The number is assumed to be positive. + * + * n Multi-precision integer to encode. + * maxSz Maximum size of the encoded integer. + * A negative value indicates no check of length requested. + * output Buffer to write into. + * returns BUFFER_E when the data is too long for the buffer. + * MP_TO_E when encoding the integer fails. + * Otherwise, the number of bytes added to the buffer. + */ +static int SetASNIntMP(mp_int* n, int maxSz, byte* output) +{ + int idx = 0; + int leadingBit; + int length; + int err; + + leadingBit = mp_leading_bit(n); + length = mp_unsigned_bin_size(n); + idx = SetASNInt(length, leadingBit ? 0x80 : 0x00, output); + if (maxSz >= 0 && (idx + length) > maxSz) + return BUFFER_E; + + if (output) { + err = mp_to_unsigned_bin(n, output + idx); + if (err != MP_OKAY) + return MP_TO_E; + } + idx += length; + + return idx; +} +#endif + +#if !defined(NO_RSA) && defined(HAVE_USER_RSA) && \ + (defined(WOLFSSL_CERT_GEN) || defined(OPENSSL_EXTRA)) +/* Set the DER/BER encoding of the ASN.1 INTEGER element with an mp_int from + * an RSA key. + * The number is assumed to be positive. + * + * n Multi-precision integer to encode. + * output Buffer to write into. + * returns BUFFER_E when the data is too long for the buffer. + * MP_TO_E when encoding the integer fails. + * Otherwise, the number of bytes added to the buffer. + */ +static int SetASNIntRSA(void* n, byte* output) +{ + int idx = 0; + int leadingBit; + int length; + int err; + + leadingBit = wc_Rsa_leading_bit(n); + length = wc_Rsa_unsigned_bin_size(n); + idx = SetASNInt(length, leadingBit ? 0x80 : 0x00, output); + if ((idx + length) > MAX_RSA_INT_SZ) + return BUFFER_E; + + if (output) { + err = wc_Rsa_to_unsigned_bin(n, output + idx, length); + if (err != MP_OKAY) + return MP_TO_E; + } + idx += length; + + return idx; +} +#endif /* !NO_RSA && HAVE_USER_RSA && WOLFSSL_CERT_GEN */ + +/* Windows header clash for WinCE using GetVersion */ +int GetMyVersion(const byte* input, word32* inOutIdx, + int* version, word32 maxIdx) +{ + word32 idx = *inOutIdx; + byte tag; + + if ((idx + MIN_VERSION_SZ) > maxIdx) + return ASN_PARSE_E; + + if (GetASNTag(input, &idx, &tag, maxIdx) != 0) + return ASN_PARSE_E; + + if (tag != ASN_INTEGER) + return ASN_PARSE_E; + + if (input[idx++] != 0x01) + return ASN_VERSION_E; + + *version = input[idx++]; + *inOutIdx = idx; + + return *version; +} + + +#ifndef NO_PWDBASED +/* Get small count integer, 32 bits or less */ +int GetShortInt(const byte* input, word32* inOutIdx, int* number, word32 maxIdx) +{ + word32 idx = *inOutIdx; + word32 len; + byte tag; + + *number = 0; + + /* check for type and length bytes */ + if ((idx + 2) > maxIdx) + return BUFFER_E; + + if (GetASNTag(input, &idx, &tag, maxIdx) != 0) + return ASN_PARSE_E; + + if (tag != ASN_INTEGER) + return ASN_PARSE_E; + + len = input[idx++]; + if (len > 4) + return ASN_PARSE_E; + + if (len + idx > maxIdx) + return ASN_PARSE_E; + + while (len--) { + *number = *number << 8 | input[idx++]; + } + + *inOutIdx = idx; + + return *number; +} + + +/* Set small integer, 32 bits or less. DER encoding with no leading 0s + * returns total amount written including ASN tag and length byte on success */ +int SetShortInt(byte* input, word32* inOutIdx, word32 number, word32 maxIdx) +{ + word32 idx = *inOutIdx; + word32 len = 0; + int i; + byte ar[MAX_LENGTH_SZ]; + + /* check for room for type and length bytes */ + if ((idx + 2) > maxIdx) + return BUFFER_E; + + input[idx++] = ASN_INTEGER; + idx++; /* place holder for length byte */ + if (MAX_LENGTH_SZ + idx > maxIdx) + return ASN_PARSE_E; + + /* find first non zero byte */ + XMEMSET(ar, 0, MAX_LENGTH_SZ); + c32toa(number, ar); + for (i = 0; i < MAX_LENGTH_SZ; i++) { + if (ar[i] != 0) { + break; + } + } + + /* handle case of 0 */ + if (i == MAX_LENGTH_SZ) { + input[idx++] = 0; len++; + } + + for (; i < MAX_LENGTH_SZ && idx < maxIdx; i++) { + input[idx++] = ar[i]; len++; + } + + /* jump back to beginning of input buffer using unaltered inOutIdx value + * and set number of bytes for integer, then update the index value */ + input[*inOutIdx + 1] = (byte)len; + *inOutIdx = idx; + + return len + 2; /* size of integer bytes plus ASN TAG and length byte */ +} +#endif /* !NO_PWDBASED */ + +/* May not have one, not an error */ +static int GetExplicitVersion(const byte* input, word32* inOutIdx, int* version, + word32 maxIdx) +{ + word32 idx = *inOutIdx; + byte tag; + + WOLFSSL_ENTER("GetExplicitVersion"); + + if (GetASNTag(input, &idx, &tag, maxIdx) != 0) + return ASN_PARSE_E; + + if (tag == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED)) { + *inOutIdx = ++idx; /* skip header */ + return GetMyVersion(input, inOutIdx, version, maxIdx); + } + + /* go back as is */ + *version = 0; + + return 0; +} + +int GetInt(mp_int* mpi, const byte* input, word32* inOutIdx, word32 maxIdx) +{ + word32 idx = *inOutIdx; + int ret; + int length; + + ret = GetASNInt(input, &idx, &length, maxIdx); + if (ret != 0) + return ret; + + if (mp_init(mpi) != MP_OKAY) + return MP_INIT_E; + + if (mp_read_unsigned_bin(mpi, (byte*)input + idx, length) != 0) { + mp_clear(mpi); + return ASN_GETINT_E; + } + +#ifdef HAVE_WOLF_BIGINT + if (wc_bigint_from_unsigned_bin(&mpi->raw, input + idx, length) != 0) { + mp_clear(mpi); + return ASN_GETINT_E; + } +#endif /* HAVE_WOLF_BIGINT */ + + *inOutIdx = idx + length; + + return 0; +} + +#if (!defined(WOLFSSL_KEY_GEN) && !defined(OPENSSL_EXTRA) && defined(RSA_LOW_MEM)) \ + || defined(WOLFSSL_RSA_PUBLIC_ONLY) || (!defined(NO_DSA) && defined(WOLFSSL_QT)) +#if !defined(NO_RSA) && !defined(HAVE_USER_RSA) +static int SkipInt(const byte* input, word32* inOutIdx, word32 maxIdx) +{ + word32 idx = *inOutIdx; + int ret; + int length; + + ret = GetASNInt(input, &idx, &length, maxIdx); + if (ret != 0) + return ret; + + *inOutIdx = idx + length; + + return 0; +} +#endif +#endif + +static int CheckBitString(const byte* input, word32* inOutIdx, int* len, + word32 maxIdx, int zeroBits, byte* unusedBits) +{ + word32 idx = *inOutIdx; + int length; + byte b; + + if (GetASNTag(input, &idx, &b, maxIdx) != 0) { + return ASN_BITSTR_E; + } + + if (b != ASN_BIT_STRING) { + return ASN_BITSTR_E; + } + + if (GetLength(input, &idx, &length, maxIdx) < 0) + return ASN_PARSE_E; + + /* extra sanity check that length is greater than 0 */ + if (length <= 0) { + WOLFSSL_MSG("Error length was 0 in CheckBitString"); + return BUFFER_E; + } + + if (idx + 1 > maxIdx) { + WOLFSSL_MSG("Attempted buffer read larger than input buffer"); + return BUFFER_E; + } + + b = input[idx]; + if (zeroBits && b != 0x00) + return ASN_EXPECT_0_E; + if (b >= 0x08) + return ASN_PARSE_E; + if (b != 0) { + if ((byte)(input[idx + length - 1] << (8 - b)) != 0) + return ASN_PARSE_E; + } + idx++; + length--; /* length has been checked for greater than 0 */ + + *inOutIdx = idx; + if (len != NULL) + *len = length; + if (unusedBits != NULL) + *unusedBits = b; + + return 0; +} + +/* RSA (with CertGen or KeyGen) OR ECC OR ED25519 OR ED448 (with CertGen or + * KeyGen) */ +#if (!defined(NO_RSA) && !defined(HAVE_USER_RSA) && \ + (defined(WOLFSSL_CERT_GEN) || defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA))) || \ + (defined(HAVE_ECC) && defined(HAVE_ECC_KEY_EXPORT)) || \ + ((defined(HAVE_ED25519) || defined(HAVE_ED448)) && \ + (defined(WOLFSSL_CERT_GEN) || defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA))) + +/* Set the DER/BER encoding of the ASN.1 BIT_STRING header. + * + * len Length of data to encode. + * unusedBits The number of unused bits in the last byte of data. + * That is, the number of least significant zero bits before a one. + * The last byte is the most-significant non-zero byte of a number. + * output Buffer to write into. + * returns the number of bytes added to the buffer. + */ +word32 SetBitString(word32 len, byte unusedBits, byte* output) +{ + word32 idx = 0; + + if (output) + output[idx] = ASN_BIT_STRING; + idx++; + + idx += SetLength(len + 1, output ? output + idx : NULL); + if (output) + output[idx] = unusedBits; + idx++; + + return idx; +} +#endif /* !NO_RSA || HAVE_ECC || HAVE_ED25519 || HAVE_ED448 */ + +#ifdef ASN_BER_TO_DER +/* Pull informtation from the ASN.1 BER encoded item header */ +static int GetBerHeader(const byte* data, word32* idx, word32 maxIdx, + byte* pTag, word32* pLen, int* indef) +{ + int len = 0; + byte tag; + word32 i = *idx; + + *indef = 0; + + /* Check there is enough data for a minimal header */ + if (i + 2 > maxIdx) { + return ASN_PARSE_E; + } + + /* Retrieve tag */ + tag = data[i++]; + + /* Indefinite length handled specially */ + if (data[i] == 0x80) { + /* Check valid tag for indefinite */ + if (((tag & 0xc0) == 0) && ((tag & ASN_CONSTRUCTED) == 0x00)) { + return ASN_PARSE_E; + } + i++; + *indef = 1; + } + else if (GetLength(data, &i, &len, maxIdx) < 0) { + return ASN_PARSE_E; + } + + /* Return tag, length and index after BER item header */ + *pTag = tag; + *pLen = len; + *idx = i; + return 0; +} + +#ifndef INDEF_ITEMS_MAX +#define INDEF_ITEMS_MAX 20 +#endif + +/* Indef length item data */ +typedef struct Indef { + word32 start; + int depth; + int headerLen; + word32 len; +} Indef; + +/* Indef length items */ +typedef struct IndefItems +{ + Indef len[INDEF_ITEMS_MAX]; + int cnt; + int idx; + int depth; +} IndefItems; + + +/* Get header length of current item */ +static int IndefItems_HeaderLen(IndefItems* items) +{ + return items->len[items->idx].headerLen; +} + +/* Get data length of current item */ +static word32 IndefItems_Len(IndefItems* items) +{ + return items->len[items->idx].len; +} + +/* Add a indefinite length item */ +static int IndefItems_AddItem(IndefItems* items, word32 start) +{ + int ret = 0; + int i; + + if (items->cnt == INDEF_ITEMS_MAX) { + ret = MEMORY_E; + } + else { + i = items->cnt++; + items->len[i].start = start; + items->len[i].depth = items->depth++; + items->len[i].headerLen = 1; + items->len[i].len = 0; + items->idx = i; + } + + return ret; +} + +/* Increase data length of current item */ +static void IndefItems_AddData(IndefItems* items, word32 length) +{ + items->len[items->idx].len += length; +} + +/* Update header length of current item to reflect data length */ +static void IndefItems_UpdateHeaderLen(IndefItems* items) +{ + items->len[items->idx].headerLen += + SetLength(items->len[items->idx].len, NULL); +} + +/* Go to indefinite parent of current item */ +static void IndefItems_Up(IndefItems* items) +{ + int i; + int depth = items->len[items->idx].depth - 1; + + for (i = items->cnt - 1; i >= 0; i--) { + if (items->len[i].depth == depth) { + break; + } + } + items->idx = i; + items->depth = depth + 1; +} + +/* Calculate final length by adding length of indefinite child items */ +static void IndefItems_CalcLength(IndefItems* items) +{ + int i; + int idx = items->idx; + + for (i = idx + 1; i < items->cnt; i++) { + if (items->len[i].depth == items->depth) { + items->len[idx].len += items->len[i].headerLen; + items->len[idx].len += items->len[i].len; + } + } + items->len[idx].headerLen += SetLength(items->len[idx].len, NULL); +} + +/* Add more data to indefinite length item */ +static void IndefItems_MoreData(IndefItems* items, word32 length) +{ + if (items->cnt > 0 && items->idx >= 0) { + items->len[items->idx].len += length; + } +} + +/* Convert a BER encoding with indefinite length items to DER. + * + * ber BER encoded data. + * berSz Length of BER encoded data. + * der Buffer to hold DER encoded version of data. + * NULL indicates only the length is required. + * derSz The size of the buffer to hold the DER encoded data. + * Will be set if der is NULL, otherwise the value is checked as der is + * filled. + * returns ASN_PARSE_E if the BER data is invalid and BAD_FUNC_ARG if ber or + * derSz are NULL. + */ +int wc_BerToDer(const byte* ber, word32 berSz, byte* der, word32* derSz) +{ + int ret = 0; + word32 i, j; +#ifdef WOLFSSL_SMALL_STACK + IndefItems* indefItems = NULL; +#else + IndefItems indefItems[1]; +#endif + byte tag, basic; + word32 length; + int indef; + + if (ber == NULL || derSz == NULL) + return BAD_FUNC_ARG; + +#ifdef WOLFSSL_SMALL_STACK + indefItems = XMALLOC(sizeof(IndefItems), NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (indefItems == NULL) { + ret = MEMORY_E; + goto end; + } +#endif + + XMEMSET(indefItems, 0, sizeof(*indefItems)); + + /* Calculate indefinite item lengths */ + for (i = 0; i < berSz; ) { + word32 start = i; + + /* Get next BER item */ + ret = GetBerHeader(ber, &i, berSz, &tag, &length, &indef); + if (ret != 0) { + goto end; + } + + if (indef) { + /* Indefinite item - add to list */ + ret = IndefItems_AddItem(indefItems, i); + if (ret != 0) { + goto end; + } + + if ((tag & 0xC0) == 0 && + tag != (ASN_SEQUENCE | ASN_CONSTRUCTED) && + tag != (ASN_SET | ASN_CONSTRUCTED)) { + /* Constructed basic type - get repeating tag */ + basic = tag & (~ASN_CONSTRUCTED); + + /* Add up lengths of each item below */ + for (; i < berSz; ) { + /* Get next BER_item */ + ret = GetBerHeader(ber, &i, berSz, &tag, &length, &indef); + if (ret != 0) { + goto end; + } + + /* End of content closes item */ + if (tag == ASN_EOC) { + /* Must be zero length */ + if (length != 0) { + ret = ASN_PARSE_E; + goto end; + } + break; + } + + /* Must not be indefinite and tag must match parent */ + if (indef || tag != basic) { + ret = ASN_PARSE_E; + goto end; + } + + /* Add to length */ + IndefItems_AddData(indefItems, length); + /* Skip data */ + i += length; + } + + /* Ensure we got an EOC and not end of data */ + if (tag != ASN_EOC) { + ret = ASN_PARSE_E; + goto end; + } + + /* Set the header length to include the length field */ + IndefItems_UpdateHeaderLen(indefItems); + /* Go to indefinte parent item */ + IndefItems_Up(indefItems); + } + } + else if (tag == ASN_EOC) { + /* End-of-content must be 0 length */ + if (length != 0) { + ret = ASN_PARSE_E; + goto end; + } + /* Check there is an item to close - missing EOC */ + if (indefItems->depth == 0) { + ret = ASN_PARSE_E; + goto end; + } + + /* Finish calculation of data length for indefinite item */ + IndefItems_CalcLength(indefItems); + /* Go to indefinte parent item */ + IndefItems_Up(indefItems); + } + else { + /* Known length item to add in - make sure enough data for it */ + if (i + length > berSz) { + ret = ASN_PARSE_E; + goto end; + } + + /* Include all data - can't have indefinite inside definite */ + i += length; + /* Add entire item to current indefinite item */ + IndefItems_MoreData(indefItems, i - start); + } + } + /* Check we had a EOC for each indefinite item */ + if (indefItems->depth != 0) { + ret = ASN_PARSE_E; + goto end; + } + + /* Write out DER */ + + j = 0; + /* Reset index */ + indefItems->idx = 0; + for (i = 0; i < berSz; ) { + word32 start = i; + + /* Get item - checked above */ + (void)GetBerHeader(ber, &i, berSz, &tag, &length, &indef); + if (indef) { + if (der != NULL) { + /* Check enough space for header */ + if (j + IndefItems_HeaderLen(indefItems) > *derSz) { + ret = BUFFER_E; + goto end; + } + + if ((tag & 0xC0) == 0 && + tag != (ASN_SEQUENCE | ASN_CONSTRUCTED) && + tag != (ASN_SET | ASN_CONSTRUCTED)) { + /* Remove constructed tag for basic types */ + tag &= ~ASN_CONSTRUCTED; + } + /* Add tag and length */ + der[j] = tag; + (void)SetLength(IndefItems_Len(indefItems), der + j + 1); + } + /* Add header length of indefinite item */ + j += IndefItems_HeaderLen(indefItems); + + if ((tag & 0xC0) == 0 && + tag != (ASN_SEQUENCE | ASN_CONSTRUCTED) && + tag != (ASN_SET | ASN_CONSTRUCTED)) { + /* For basic type - get each child item and add data */ + for (; i < berSz; ) { + (void)GetBerHeader(ber, &i, berSz, &tag, &length, &indef); + if (tag == ASN_EOC) { + break; + } + if (der != NULL) { + if (j + length > *derSz) { + ret = BUFFER_E; + goto end; + } + XMEMCPY(der + j, ber + i, length); + } + j += length; + i += length; + } + } + + /* Move to next indef item in list */ + indefItems->idx++; + } + else if (tag == ASN_EOC) { + /* End-Of-Content is not written out in DER */ + } + else { + /* Write out definite length item as is. */ + i += length; + if (der != NULL) { + /* Ensure space for item */ + if (j + i - start > *derSz) { + ret = BUFFER_E; + goto end; + } + /* Copy item as is */ + XMEMCPY(der + j, ber + start, i - start); + } + j += i - start; + } + } + + /* Return the length of the DER encoded ASN.1 */ + *derSz = j; + if (der == NULL) { + ret = LENGTH_ONLY_E; + } +end: +#ifdef WOLFSSL_SMALL_STACK + if (indefItems != NULL) { + XFREE(indefItems, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + return ret; +} +#endif + +#if defined(WOLFSSL_CERT_GEN) || defined(WOLFSSL_KEY_GEN) + +#if (!defined(NO_RSA) && !defined(HAVE_USER_RSA)) || \ + defined(HAVE_ECC) || defined(HAVE_ED25519) || defined(HAVE_ED448) + +#ifdef WOLFSSL_CERT_EXT +/* Set the DER/BER encoding of the ASN.1 BIT_STRING with a 16-bit value. + * + * val 16-bit value to encode. + * output Buffer to write into. + * returns the number of bytes added to the buffer. + */ +static word32 SetBitString16Bit(word16 val, byte* output) +{ + word32 idx; + int len; + byte lastByte; + byte unusedBits = 0; + + if ((val >> 8) != 0) { + len = 2; + lastByte = (byte)(val >> 8); + } + else { + len = 1; + lastByte = (byte)val; + } + + while (((lastByte >> unusedBits) & 0x01) == 0x00) + unusedBits++; + + idx = SetBitString(len, unusedBits, output); + output[idx++] = (byte)val; + if (len > 1) + output[idx++] = (byte)(val >> 8); + + return idx; +} +#endif /* WOLFSSL_CERT_EXT */ +#endif /* !NO_RSA || HAVE_ECC || HAVE_ED25519 || defined(HAVE_ED448) */ +#endif /* WOLFSSL_CERT_GEN || WOLFSSL_KEY_GEN */ + + + +/* hashType */ +#ifdef WOLFSSL_MD2 + static const byte hashMd2hOid[] = {42, 134, 72, 134, 247, 13, 2, 2}; +#endif +#ifndef NO_MD5 + static const byte hashMd5hOid[] = {42, 134, 72, 134, 247, 13, 2, 5}; +#endif +#ifndef NO_SHA + static const byte hashSha1hOid[] = {43, 14, 3, 2, 26}; +#endif +#ifdef WOLFSSL_SHA224 + static const byte hashSha224hOid[] = {96, 134, 72, 1, 101, 3, 4, 2, 4}; +#endif +#ifndef NO_SHA256 + static const byte hashSha256hOid[] = {96, 134, 72, 1, 101, 3, 4, 2, 1}; +#endif +#ifdef WOLFSSL_SHA384 + static const byte hashSha384hOid[] = {96, 134, 72, 1, 101, 3, 4, 2, 2}; +#endif +#ifdef WOLFSSL_SHA512 + static const byte hashSha512hOid[] = {96, 134, 72, 1, 101, 3, 4, 2, 3}; +#endif + +/* hmacType */ +#ifndef NO_HMAC + #ifdef WOLFSSL_SHA224 + static const byte hmacSha224Oid[] = {42, 134, 72, 134, 247, 13, 2, 8}; + #endif + #ifndef NO_SHA256 + static const byte hmacSha256Oid[] = {42, 134, 72, 134, 247, 13, 2, 9}; + #endif + #ifdef WOLFSSL_SHA384 + static const byte hmacSha384Oid[] = {42, 134, 72, 134, 247, 13, 2, 10}; + #endif + #ifdef WOLFSSL_SHA512 + static const byte hmacSha512Oid[] = {42, 134, 72, 134, 247, 13, 2, 11}; + #endif +#endif + +/* sigType */ +#if !defined(NO_DSA) && !defined(NO_SHA) + static const byte sigSha1wDsaOid[] = {42, 134, 72, 206, 56, 4, 3}; +#endif /* NO_DSA */ +#ifndef NO_RSA + #ifdef WOLFSSL_MD2 + static const byte sigMd2wRsaOid[] = {42, 134, 72, 134, 247, 13, 1, 1, 2}; + #endif + #ifndef NO_MD5 + static const byte sigMd5wRsaOid[] = {42, 134, 72, 134, 247, 13, 1, 1, 4}; + #endif + #ifndef NO_SHA + static const byte sigSha1wRsaOid[] = {42, 134, 72, 134, 247, 13, 1, 1, 5}; + #endif + #ifdef WOLFSSL_SHA224 + static const byte sigSha224wRsaOid[] = {42, 134, 72, 134, 247, 13, 1, 1,14}; + #endif + #ifndef NO_SHA256 + static const byte sigSha256wRsaOid[] = {42, 134, 72, 134, 247, 13, 1, 1,11}; + #endif + #ifdef WOLFSSL_SHA384 + static const byte sigSha384wRsaOid[] = {42, 134, 72, 134, 247, 13, 1, 1,12}; + #endif + #ifdef WOLFSSL_SHA512 + static const byte sigSha512wRsaOid[] = {42, 134, 72, 134, 247, 13, 1, 1,13}; + #endif +#endif /* NO_RSA */ +#ifdef HAVE_ECC + #ifndef NO_SHA + static const byte sigSha1wEcdsaOid[] = {42, 134, 72, 206, 61, 4, 1}; + #endif + #ifdef WOLFSSL_SHA224 + static const byte sigSha224wEcdsaOid[] = {42, 134, 72, 206, 61, 4, 3, 1}; + #endif + #ifndef NO_SHA256 + static const byte sigSha256wEcdsaOid[] = {42, 134, 72, 206, 61, 4, 3, 2}; + #endif + #ifdef WOLFSSL_SHA384 + static const byte sigSha384wEcdsaOid[] = {42, 134, 72, 206, 61, 4, 3, 3}; + #endif + #ifdef WOLFSSL_SHA512 + static const byte sigSha512wEcdsaOid[] = {42, 134, 72, 206, 61, 4, 3, 4}; + #endif +#endif /* HAVE_ECC */ +#ifdef HAVE_ED25519 + static const byte sigEd25519Oid[] = {43, 101, 112}; +#endif /* HAVE_ED25519 */ +#ifdef HAVE_ED448 + static const byte sigEd448Oid[] = {43, 101, 113}; +#endif /* HAVE_ED448 */ + +/* keyType */ +#ifndef NO_DSA + static const byte keyDsaOid[] = {42, 134, 72, 206, 56, 4, 1}; +#endif /* NO_DSA */ +#ifndef NO_RSA + static const byte keyRsaOid[] = {42, 134, 72, 134, 247, 13, 1, 1, 1}; +#endif /* NO_RSA */ +#ifdef HAVE_NTRU + static const byte keyNtruOid[] = {43, 6, 1, 4, 1, 193, 22, 1, 1, 1, 1}; +#endif /* HAVE_NTRU */ +#ifdef HAVE_ECC + static const byte keyEcdsaOid[] = {42, 134, 72, 206, 61, 2, 1}; +#endif /* HAVE_ECC */ +#ifdef HAVE_ED25519 + static const byte keyEd25519Oid[] = {43, 101, 112}; +#endif /* HAVE_ED25519 */ +#ifdef HAVE_ED448 + static const byte keyEd448Oid[] = {43, 101, 113}; +#endif /* HAVE_ED448 */ +#if !defined(NO_DH) && (defined(WOLFSSL_QT) || defined(OPENSSL_ALL)) + static const byte keyDhOid[] = {42, 134, 72, 134, 247, 13, 1, 3, 1}; +#endif /* ! NO_DH ... */ + +/* curveType */ +#ifdef HAVE_ECC + /* See "ecc_sets" table in ecc.c */ +#endif /* HAVE_ECC */ + +#ifdef HAVE_AES_CBC +/* blkType */ + #ifdef WOLFSSL_AES_128 + static const byte blkAes128CbcOid[] = {96, 134, 72, 1, 101, 3, 4, 1, 2}; + #endif + #ifdef WOLFSSL_AES_192 + static const byte blkAes192CbcOid[] = {96, 134, 72, 1, 101, 3, 4, 1, 22}; + #endif + #ifdef WOLFSSL_AES_256 + static const byte blkAes256CbcOid[] = {96, 134, 72, 1, 101, 3, 4, 1, 42}; + #endif +#endif /* HAVE_AES_CBC */ +#ifdef HAVE_AESGCM + #ifdef WOLFSSL_AES_128 + static const byte blkAes128GcmOid[] = {96, 134, 72, 1, 101, 3, 4, 1, 6}; + #endif + #ifdef WOLFSSL_AES_192 + static const byte blkAes192GcmOid[] = {96, 134, 72, 1, 101, 3, 4, 1, 26}; + #endif + #ifdef WOLFSSL_AES_256 + static const byte blkAes256GcmOid[] = {96, 134, 72, 1, 101, 3, 4, 1, 46}; + #endif +#endif /* HAVE_AESGCM */ +#ifdef HAVE_AESCCM + #ifdef WOLFSSL_AES_128 + static const byte blkAes128CcmOid[] = {96, 134, 72, 1, 101, 3, 4, 1, 7}; + #endif + #ifdef WOLFSSL_AES_192 + static const byte blkAes192CcmOid[] = {96, 134, 72, 1, 101, 3, 4, 1, 27}; + #endif + #ifdef WOLFSSL_AES_256 + static const byte blkAes256CcmOid[] = {96, 134, 72, 1, 101, 3, 4, 1, 47}; + #endif +#endif /* HAVE_AESCCM */ + +#ifndef NO_DES3 + static const byte blkDesCbcOid[] = {43, 14, 3, 2, 7}; + static const byte blkDes3CbcOid[] = {42, 134, 72, 134, 247, 13, 3, 7}; +#endif + +/* keyWrapType */ +#ifdef WOLFSSL_AES_128 + static const byte wrapAes128Oid[] = {96, 134, 72, 1, 101, 3, 4, 1, 5}; +#endif +#ifdef WOLFSSL_AES_192 + static const byte wrapAes192Oid[] = {96, 134, 72, 1, 101, 3, 4, 1, 25}; +#endif +#ifdef WOLFSSL_AES_256 + static const byte wrapAes256Oid[] = {96, 134, 72, 1, 101, 3, 4, 1, 45}; +#endif +#ifdef HAVE_PKCS7 +/* From RFC 3211 */ +static const byte wrapPwriKekOid[] = {42, 134, 72, 134, 247, 13, 1, 9, 16, 3,9}; +#endif + +/* cmsKeyAgreeType */ +#ifndef NO_SHA + static const byte dhSinglePass_stdDH_sha1kdf_Oid[] = + {43, 129, 5, 16, 134, 72, 63, 0, 2}; +#endif +#ifdef WOLFSSL_SHA224 + static const byte dhSinglePass_stdDH_sha224kdf_Oid[] = {43, 129, 4, 1, 11, 0}; +#endif +#ifndef NO_SHA256 + static const byte dhSinglePass_stdDH_sha256kdf_Oid[] = {43, 129, 4, 1, 11, 1}; +#endif +#ifdef WOLFSSL_SHA384 + static const byte dhSinglePass_stdDH_sha384kdf_Oid[] = {43, 129, 4, 1, 11, 2}; +#endif +#ifdef WOLFSSL_SHA512 + static const byte dhSinglePass_stdDH_sha512kdf_Oid[] = {43, 129, 4, 1, 11, 3}; +#endif + +/* ocspType */ +#ifdef HAVE_OCSP + static const byte ocspBasicOid[] = {43, 6, 1, 5, 5, 7, 48, 1, 1}; + static const byte ocspNonceOid[] = {43, 6, 1, 5, 5, 7, 48, 1, 2}; +#endif /* HAVE_OCSP */ + +/* certExtType */ +static const byte extBasicCaOid[] = {85, 29, 19}; +static const byte extAltNamesOid[] = {85, 29, 17}; +static const byte extCrlDistOid[] = {85, 29, 31}; +static const byte extAuthInfoOid[] = {43, 6, 1, 5, 5, 7, 1, 1}; +static const byte extAuthKeyOid[] = {85, 29, 35}; +static const byte extSubjKeyOid[] = {85, 29, 14}; +static const byte extCertPolicyOid[] = {85, 29, 32}; +static const byte extKeyUsageOid[] = {85, 29, 15}; +static const byte extInhibitAnyOid[] = {85, 29, 54}; +static const byte extExtKeyUsageOid[] = {85, 29, 37}; +#ifndef IGNORE_NAME_CONSTRAINTS + static const byte extNameConsOid[] = {85, 29, 30}; +#endif + +/* certAuthInfoType */ +#ifdef HAVE_OCSP + static const byte extAuthInfoOcspOid[] = {43, 6, 1, 5, 5, 7, 48, 1}; +#endif +static const byte extAuthInfoCaIssuerOid[] = {43, 6, 1, 5, 5, 7, 48, 2}; + +/* certPolicyType */ +static const byte extCertPolicyAnyOid[] = {85, 29, 32, 0}; + +/* certKeyUseType */ +static const byte extAltNamesHwNameOid[] = {43, 6, 1, 5, 5, 7, 8, 4}; + +/* certKeyUseType */ +static const byte extExtKeyUsageAnyOid[] = {85, 29, 37, 0}; +static const byte extExtKeyUsageServerAuthOid[] = {43, 6, 1, 5, 5, 7, 3, 1}; +static const byte extExtKeyUsageClientAuthOid[] = {43, 6, 1, 5, 5, 7, 3, 2}; +static const byte extExtKeyUsageCodeSigningOid[] = {43, 6, 1, 5, 5, 7, 3, 3}; +static const byte extExtKeyUsageEmailProtectOid[] = {43, 6, 1, 5, 5, 7, 3, 4}; +static const byte extExtKeyUsageTimestampOid[] = {43, 6, 1, 5, 5, 7, 3, 8}; +static const byte extExtKeyUsageOcspSignOid[] = {43, 6, 1, 5, 5, 7, 3, 9}; + +/* kdfType */ +static const byte pbkdf2Oid[] = {42, 134, 72, 134, 247, 13, 1, 5, 12}; + +/* PKCS5 */ +#if !defined(NO_DES3) && !defined(NO_SHA) +static const byte pbeSha1Des[] = {42, 134, 72, 134, 247, 13, 1, 5, 10}; +#endif +static const byte pbes2[] = {42, 134, 72, 134, 247, 13, 1, 5, 13}; + +/* PKCS12 */ +#if !defined(NO_RC4) && !defined(NO_SHA) +static const byte pbeSha1RC4128[] = {42, 134, 72, 134, 247, 13, 1, 12, 1, 1}; +#endif +#if !defined(NO_DES3) && !defined(NO_SHA) +static const byte pbeSha1Des3[] = {42, 134, 72, 134, 247, 13, 1, 12, 1, 3}; +#endif + +#ifdef HAVE_LIBZ +/* zlib compression */ +static const byte zlibCompress[] = {42, 134, 72, 134, 247, 13, 1, 9, 16, 3, 8}; +#endif +#ifdef WOLFSSL_APACHE_HTTPD +/* tlsExtType */ +static const byte tlsFeatureOid[] = {43, 6, 1, 5, 5, 7, 1, 24}; +/* certNameType */ +static const byte dnsSRVOid[] = {43, 6, 1, 5, 5, 7, 8, 7}; +#endif + + +/* returns a pointer to the OID string on success and NULL on fail */ +const byte* OidFromId(word32 id, word32 type, word32* oidSz) +{ + const byte* oid = NULL; + + *oidSz = 0; + + switch (type) { + + case oidHashType: + switch (id) { + #ifdef WOLFSSL_MD2 + case MD2h: + oid = hashMd2hOid; + *oidSz = sizeof(hashMd2hOid); + break; + #endif + #ifndef NO_MD5 + case MD5h: + oid = hashMd5hOid; + *oidSz = sizeof(hashMd5hOid); + break; + #endif + #ifndef NO_SHA + case SHAh: + oid = hashSha1hOid; + *oidSz = sizeof(hashSha1hOid); + break; + #endif + #ifdef WOLFSSL_SHA224 + case SHA224h: + oid = hashSha224hOid; + *oidSz = sizeof(hashSha224hOid); + break; + #endif + #ifndef NO_SHA256 + case SHA256h: + oid = hashSha256hOid; + *oidSz = sizeof(hashSha256hOid); + break; + #endif + #ifdef WOLFSSL_SHA384 + case SHA384h: + oid = hashSha384hOid; + *oidSz = sizeof(hashSha384hOid); + break; + #endif + #ifdef WOLFSSL_SHA512 + case SHA512h: + oid = hashSha512hOid; + *oidSz = sizeof(hashSha512hOid); + break; + #endif + } + break; + + case oidSigType: + switch (id) { + #if !defined(NO_DSA) && !defined(NO_SHA) + case CTC_SHAwDSA: + oid = sigSha1wDsaOid; + *oidSz = sizeof(sigSha1wDsaOid); + break; + #endif /* NO_DSA */ + #ifndef NO_RSA + #ifdef WOLFSSL_MD2 + case CTC_MD2wRSA: + oid = sigMd2wRsaOid; + *oidSz = sizeof(sigMd2wRsaOid); + break; + #endif + #ifndef NO_MD5 + case CTC_MD5wRSA: + oid = sigMd5wRsaOid; + *oidSz = sizeof(sigMd5wRsaOid); + break; + #endif + #ifndef NO_SHA + case CTC_SHAwRSA: + oid = sigSha1wRsaOid; + *oidSz = sizeof(sigSha1wRsaOid); + break; + #endif + #ifdef WOLFSSL_SHA224 + case CTC_SHA224wRSA: + oid = sigSha224wRsaOid; + *oidSz = sizeof(sigSha224wRsaOid); + break; + #endif + #ifndef NO_SHA256 + case CTC_SHA256wRSA: + oid = sigSha256wRsaOid; + *oidSz = sizeof(sigSha256wRsaOid); + break; + #endif + #ifdef WOLFSSL_SHA384 + case CTC_SHA384wRSA: + oid = sigSha384wRsaOid; + *oidSz = sizeof(sigSha384wRsaOid); + break; + #endif + #ifdef WOLFSSL_SHA512 + case CTC_SHA512wRSA: + oid = sigSha512wRsaOid; + *oidSz = sizeof(sigSha512wRsaOid); + break; + #endif /* WOLFSSL_SHA512 */ + #endif /* NO_RSA */ + #ifdef HAVE_ECC + #ifndef NO_SHA + case CTC_SHAwECDSA: + oid = sigSha1wEcdsaOid; + *oidSz = sizeof(sigSha1wEcdsaOid); + break; + #endif + #ifdef WOLFSSL_SHA224 + case CTC_SHA224wECDSA: + oid = sigSha224wEcdsaOid; + *oidSz = sizeof(sigSha224wEcdsaOid); + break; + #endif + #ifndef NO_SHA256 + case CTC_SHA256wECDSA: + oid = sigSha256wEcdsaOid; + *oidSz = sizeof(sigSha256wEcdsaOid); + break; + #endif + #ifdef WOLFSSL_SHA384 + case CTC_SHA384wECDSA: + oid = sigSha384wEcdsaOid; + *oidSz = sizeof(sigSha384wEcdsaOid); + break; + #endif + #ifdef WOLFSSL_SHA512 + case CTC_SHA512wECDSA: + oid = sigSha512wEcdsaOid; + *oidSz = sizeof(sigSha512wEcdsaOid); + break; + #endif + #endif /* HAVE_ECC */ + #ifdef HAVE_ED25519 + case CTC_ED25519: + oid = sigEd25519Oid; + *oidSz = sizeof(sigEd25519Oid); + break; + #endif + #ifdef HAVE_ED448 + case CTC_ED448: + oid = sigEd448Oid; + *oidSz = sizeof(sigEd448Oid); + break; + #endif + default: + break; + } + break; + + case oidKeyType: + switch (id) { + #ifndef NO_DSA + case DSAk: + oid = keyDsaOid; + *oidSz = sizeof(keyDsaOid); + break; + #endif /* NO_DSA */ + #ifndef NO_RSA + case RSAk: + oid = keyRsaOid; + *oidSz = sizeof(keyRsaOid); + break; + #endif /* NO_RSA */ + #ifdef HAVE_NTRU + case NTRUk: + oid = keyNtruOid; + *oidSz = sizeof(keyNtruOid); + break; + #endif /* HAVE_NTRU */ + #ifdef HAVE_ECC + case ECDSAk: + oid = keyEcdsaOid; + *oidSz = sizeof(keyEcdsaOid); + break; + #endif /* HAVE_ECC */ + #ifdef HAVE_ED25519 + case ED25519k: + oid = keyEd25519Oid; + *oidSz = sizeof(keyEd25519Oid); + break; + #endif /* HAVE_ED25519 */ + #ifdef HAVE_ED448 + case ED448k: + oid = keyEd448Oid; + *oidSz = sizeof(keyEd448Oid); + break; + #endif /* HAVE_ED448 */ + #if !defined(NO_DH) && (defined(WOLFSSL_QT) || defined(OPENSSL_ALL)) + case DHk: + oid = keyDhOid; + *oidSz = sizeof(keyDhOid); + break; + #endif /* ! NO_DH && (WOLFSSL_QT || OPENSSL_ALL */ + default: + break; + } + break; + + #ifdef HAVE_ECC + case oidCurveType: + if (wc_ecc_get_oid(id, &oid, oidSz) < 0) { + WOLFSSL_MSG("ECC OID not found"); + } + break; + #endif /* HAVE_ECC */ + + case oidBlkType: + switch (id) { + #ifdef HAVE_AES_CBC + #ifdef WOLFSSL_AES_128 + case AES128CBCb: + oid = blkAes128CbcOid; + *oidSz = sizeof(blkAes128CbcOid); + break; + #endif + #ifdef WOLFSSL_AES_192 + case AES192CBCb: + oid = blkAes192CbcOid; + *oidSz = sizeof(blkAes192CbcOid); + break; + #endif + #ifdef WOLFSSL_AES_256 + case AES256CBCb: + oid = blkAes256CbcOid; + *oidSz = sizeof(blkAes256CbcOid); + break; + #endif + #endif /* HAVE_AES_CBC */ + #ifdef HAVE_AESGCM + #ifdef WOLFSSL_AES_128 + case AES128GCMb: + oid = blkAes128GcmOid; + *oidSz = sizeof(blkAes128GcmOid); + break; + #endif + #ifdef WOLFSSL_AES_192 + case AES192GCMb: + oid = blkAes192GcmOid; + *oidSz = sizeof(blkAes192GcmOid); + break; + #endif + #ifdef WOLFSSL_AES_256 + case AES256GCMb: + oid = blkAes256GcmOid; + *oidSz = sizeof(blkAes256GcmOid); + break; + #endif + #endif /* HAVE_AESGCM */ + #ifdef HAVE_AESCCM + #ifdef WOLFSSL_AES_128 + case AES128CCMb: + oid = blkAes128CcmOid; + *oidSz = sizeof(blkAes128CcmOid); + break; + #endif + #ifdef WOLFSSL_AES_192 + case AES192CCMb: + oid = blkAes192CcmOid; + *oidSz = sizeof(blkAes192CcmOid); + break; + #endif + #ifdef WOLFSSL_AES_256 + case AES256CCMb: + oid = blkAes256CcmOid; + *oidSz = sizeof(blkAes256CcmOid); + break; + #endif + #endif /* HAVE_AESCCM */ + #ifndef NO_DES3 + case DESb: + oid = blkDesCbcOid; + *oidSz = sizeof(blkDesCbcOid); + break; + case DES3b: + oid = blkDes3CbcOid; + *oidSz = sizeof(blkDes3CbcOid); + break; + #endif /* !NO_DES3 */ + } + break; + + #ifdef HAVE_OCSP + case oidOcspType: + switch (id) { + case OCSP_BASIC_OID: + oid = ocspBasicOid; + *oidSz = sizeof(ocspBasicOid); + break; + case OCSP_NONCE_OID: + oid = ocspNonceOid; + *oidSz = sizeof(ocspNonceOid); + break; + } + break; + #endif /* HAVE_OCSP */ + + case oidCertExtType: + switch (id) { + case BASIC_CA_OID: + oid = extBasicCaOid; + *oidSz = sizeof(extBasicCaOid); + break; + case ALT_NAMES_OID: + oid = extAltNamesOid; + *oidSz = sizeof(extAltNamesOid); + break; + case CRL_DIST_OID: + oid = extCrlDistOid; + *oidSz = sizeof(extCrlDistOid); + break; + case AUTH_INFO_OID: + oid = extAuthInfoOid; + *oidSz = sizeof(extAuthInfoOid); + break; + case AUTH_KEY_OID: + oid = extAuthKeyOid; + *oidSz = sizeof(extAuthKeyOid); + break; + case SUBJ_KEY_OID: + oid = extSubjKeyOid; + *oidSz = sizeof(extSubjKeyOid); + break; + case CERT_POLICY_OID: + oid = extCertPolicyOid; + *oidSz = sizeof(extCertPolicyOid); + break; + case KEY_USAGE_OID: + oid = extKeyUsageOid; + *oidSz = sizeof(extKeyUsageOid); + break; + case INHIBIT_ANY_OID: + oid = extInhibitAnyOid; + *oidSz = sizeof(extInhibitAnyOid); + break; + case EXT_KEY_USAGE_OID: + oid = extExtKeyUsageOid; + *oidSz = sizeof(extExtKeyUsageOid); + break; + #ifndef IGNORE_NAME_CONSTRAINTS + case NAME_CONS_OID: + oid = extNameConsOid; + *oidSz = sizeof(extNameConsOid); + break; + #endif + } + break; + + case oidCrlExtType: + #ifdef HAVE_CRL + switch (id) { + case AUTH_KEY_OID: + oid = extAuthKeyOid; + *oidSz = sizeof(extAuthKeyOid); + break; + } + #endif + break; + + case oidCertAuthInfoType: + switch (id) { + #ifdef HAVE_OCSP + case AIA_OCSP_OID: + oid = extAuthInfoOcspOid; + *oidSz = sizeof(extAuthInfoOcspOid); + break; + #endif + case AIA_CA_ISSUER_OID: + oid = extAuthInfoCaIssuerOid; + *oidSz = sizeof(extAuthInfoCaIssuerOid); + break; + } + break; + + case oidCertPolicyType: + switch (id) { + case CP_ANY_OID: + oid = extCertPolicyAnyOid; + *oidSz = sizeof(extCertPolicyAnyOid); + break; + } + break; + + case oidCertAltNameType: + switch (id) { + case HW_NAME_OID: + oid = extAltNamesHwNameOid; + *oidSz = sizeof(extAltNamesHwNameOid); + break; + } + break; + + case oidCertKeyUseType: + switch (id) { + case EKU_ANY_OID: + oid = extExtKeyUsageAnyOid; + *oidSz = sizeof(extExtKeyUsageAnyOid); + break; + case EKU_SERVER_AUTH_OID: + oid = extExtKeyUsageServerAuthOid; + *oidSz = sizeof(extExtKeyUsageServerAuthOid); + break; + case EKU_CLIENT_AUTH_OID: + oid = extExtKeyUsageClientAuthOid; + *oidSz = sizeof(extExtKeyUsageClientAuthOid); + break; + case EKU_CODESIGNING_OID: + oid = extExtKeyUsageCodeSigningOid; + *oidSz = sizeof(extExtKeyUsageCodeSigningOid); + break; + case EKU_EMAILPROTECT_OID: + oid = extExtKeyUsageEmailProtectOid; + *oidSz = sizeof(extExtKeyUsageEmailProtectOid); + break; + case EKU_TIMESTAMP_OID: + oid = extExtKeyUsageTimestampOid; + *oidSz = sizeof(extExtKeyUsageTimestampOid); + break; + case EKU_OCSP_SIGN_OID: + oid = extExtKeyUsageOcspSignOid; + *oidSz = sizeof(extExtKeyUsageOcspSignOid); + break; + } + break; + + case oidKdfType: + switch (id) { + case PBKDF2_OID: + oid = pbkdf2Oid; + *oidSz = sizeof(pbkdf2Oid); + break; + } + break; + + case oidPBEType: + switch (id) { + #if !defined(NO_SHA) && !defined(NO_RC4) + case PBE_SHA1_RC4_128: + oid = pbeSha1RC4128; + *oidSz = sizeof(pbeSha1RC4128); + break; + #endif + #if !defined(NO_SHA) && !defined(NO_DES3) + case PBE_SHA1_DES: + oid = pbeSha1Des; + *oidSz = sizeof(pbeSha1Des); + break; + + #endif + #if !defined(NO_SHA) && !defined(NO_DES3) + case PBE_SHA1_DES3: + oid = pbeSha1Des3; + *oidSz = sizeof(pbeSha1Des3); + break; + #endif + case PBES2: + oid = pbes2; + *oidSz = sizeof(pbes2); + break; + } + break; + + case oidKeyWrapType: + switch (id) { + #ifdef WOLFSSL_AES_128 + case AES128_WRAP: + oid = wrapAes128Oid; + *oidSz = sizeof(wrapAes128Oid); + break; + #endif + #ifdef WOLFSSL_AES_192 + case AES192_WRAP: + oid = wrapAes192Oid; + *oidSz = sizeof(wrapAes192Oid); + break; + #endif + #ifdef WOLFSSL_AES_256 + case AES256_WRAP: + oid = wrapAes256Oid; + *oidSz = sizeof(wrapAes256Oid); + break; + #endif + #ifdef HAVE_PKCS7 + case PWRI_KEK_WRAP: + oid = wrapPwriKekOid; + *oidSz = sizeof(wrapPwriKekOid); + break; + #endif + } + break; + + case oidCmsKeyAgreeType: + switch (id) { + #ifndef NO_SHA + case dhSinglePass_stdDH_sha1kdf_scheme: + oid = dhSinglePass_stdDH_sha1kdf_Oid; + *oidSz = sizeof(dhSinglePass_stdDH_sha1kdf_Oid); + break; + #endif + #ifdef WOLFSSL_SHA224 + case dhSinglePass_stdDH_sha224kdf_scheme: + oid = dhSinglePass_stdDH_sha224kdf_Oid; + *oidSz = sizeof(dhSinglePass_stdDH_sha224kdf_Oid); + break; + #endif + #ifndef NO_SHA256 + case dhSinglePass_stdDH_sha256kdf_scheme: + oid = dhSinglePass_stdDH_sha256kdf_Oid; + *oidSz = sizeof(dhSinglePass_stdDH_sha256kdf_Oid); + break; + #endif + #ifdef WOLFSSL_SHA384 + case dhSinglePass_stdDH_sha384kdf_scheme: + oid = dhSinglePass_stdDH_sha384kdf_Oid; + *oidSz = sizeof(dhSinglePass_stdDH_sha384kdf_Oid); + break; + #endif + #ifdef WOLFSSL_SHA512 + case dhSinglePass_stdDH_sha512kdf_scheme: + oid = dhSinglePass_stdDH_sha512kdf_Oid; + *oidSz = sizeof(dhSinglePass_stdDH_sha512kdf_Oid); + break; + #endif + } + break; + +#ifndef NO_HMAC + case oidHmacType: + switch (id) { + #ifdef WOLFSSL_SHA224 + case HMAC_SHA224_OID: + oid = hmacSha224Oid; + *oidSz = sizeof(hmacSha224Oid); + break; + #endif + #ifndef NO_SHA256 + case HMAC_SHA256_OID: + oid = hmacSha256Oid; + *oidSz = sizeof(hmacSha256Oid); + break; + #endif + #ifdef WOLFSSL_SHA384 + case HMAC_SHA384_OID: + oid = hmacSha384Oid; + *oidSz = sizeof(hmacSha384Oid); + break; + #endif + #ifdef WOLFSSL_SHA512 + case HMAC_SHA512_OID: + oid = hmacSha512Oid; + *oidSz = sizeof(hmacSha512Oid); + break; + #endif + } + break; +#endif /* !NO_HMAC */ + +#ifdef HAVE_LIBZ + case oidCompressType: + switch (id) { + case ZLIBc: + oid = zlibCompress; + *oidSz = sizeof(zlibCompress); + break; + } + break; +#endif /* HAVE_LIBZ */ +#ifdef WOLFSSL_APACHE_HTTPD + case oidCertNameType: + switch (id) { + case NID_id_on_dnsSRV: + oid = dnsSRVOid; + *oidSz = sizeof(dnsSRVOid); + break; + } + break; + case oidTlsExtType: + switch (id) { + case TLS_FEATURE_OID: + oid = tlsFeatureOid; + *oidSz = sizeof(tlsFeatureOid); + break; + } + break; +#endif /* WOLFSSL_APACHE_HTTPD */ + case oidIgnoreType: + default: + break; + } + + return oid; +} + +#ifdef HAVE_OID_ENCODING +int EncodeObjectId(const word16* in, word32 inSz, byte* out, word32* outSz) +{ + int i, x, len; + word32 d, t; + + /* check args */ + if (in == NULL || outSz == NULL) { + return BAD_FUNC_ARG; + } + + /* compute length of encoded OID */ + d = (in[0] * 40) + in[1]; + len = 0; + for (i = 1; i < (int)inSz; i++) { + x = 0; + t = d; + while (t) { + x++; + t >>= 1; + } + len += (x / 7) + ((x % 7) ? 1 : 0) + (d == 0 ? 1 : 0); + + if (i < (int)inSz - 1) { + d = in[i + 1]; + } + } + + if (out) { + /* verify length */ + if ((int)*outSz < len) { + return BUFFER_E; /* buffer provided is not large enough */ + } + + /* calc first byte */ + d = (in[0] * 40) + in[1]; + + /* encode bytes */ + x = 0; + for (i = 1; i < (int)inSz; i++) { + if (d) { + int y = x, z; + byte mask = 0; + while (d) { + out[x++] = (byte)((d & 0x7F) | mask); + d >>= 7; + mask |= 0x80; /* upper bit is set on all but the last byte */ + } + /* now swap bytes y...x-1 */ + z = x - 1; + while (y < z) { + mask = out[y]; + out[y] = out[z]; + out[z] = mask; + ++y; + --z; + } + } + else { + out[x++] = 0x00; /* zero value */ + } + + /* next word */ + if (i < (int)inSz - 1) { + d = in[i + 1]; + } + } + } + + /* return length */ + *outSz = len; + + return 0; +} +#endif /* HAVE_OID_ENCODING */ + +#ifdef HAVE_OID_DECODING +int DecodeObjectId(const byte* in, word32 inSz, word16* out, word32* outSz) +{ + int x = 0, y = 0; + word32 t = 0; + + /* check args */ + if (in == NULL || outSz == NULL) { + return BAD_FUNC_ARG; + } + + /* decode bytes */ + while (inSz--) { + t = (t << 7) | (in[x] & 0x7F); + if (!(in[x] & 0x80)) { + if (y >= (int)*outSz) { + return BUFFER_E; + } + if (y == 0) { + out[0] = (t / 40); + out[1] = (t % 40); + y = 2; + } + else { + out[y++] = t; + } + t = 0; /* reset tmp */ + } + x++; + } + + /* return length */ + *outSz = y; + + return 0; +} +#endif /* HAVE_OID_DECODING */ + +/* Get the DER/BER encoding of an ASN.1 OBJECT_ID header. + * + * input Buffer holding DER/BER encoded data. + * inOutIdx Current index into buffer to parse. + * len The number of bytes in the ASN.1 data. + * maxIdx Length of data in buffer. + * returns BUFFER_E when there is not enough data to parse. + * ASN_OBJECt_ID_E when the OBJECT_ID tag is not found. + * ASN_PARSE_E when length is invalid. + * Otherwise, 0 to indicate success. + */ +int GetASNObjectId(const byte* input, word32* inOutIdx, int* len, + word32 maxIdx) +{ + word32 idx = *inOutIdx; + int length; + byte tag; + + if ((idx + 1) > maxIdx) + return BUFFER_E; + + if (GetASNTag(input, &idx, &tag, maxIdx) != 0) + return ASN_PARSE_E; + + if (tag != ASN_OBJECT_ID) + return ASN_OBJECT_ID_E; + + if (GetLength(input, &idx, &length, maxIdx) < 0) + return ASN_PARSE_E; + + *len = length; + *inOutIdx = idx; + return 0; +} + +/* Set the DER/BER encoding of the ASN.1 OBJECT_ID header. + * + * len Length of the OBJECT_ID data. + * output Buffer to write into. + * returns the number of bytes added to the buffer. + */ +int SetObjectId(int len, byte* output) +{ + int idx = 0; + + output[idx++] = ASN_OBJECT_ID; + idx += SetLength(len, output + idx); + + return idx; +} + +int GetObjectId(const byte* input, word32* inOutIdx, word32* oid, + word32 oidType, word32 maxIdx) +{ + int ret = 0, length; + word32 idx = *inOutIdx; +#ifndef NO_VERIFY_OID + word32 actualOidSz = 0; + const byte* actualOid; +#endif /* NO_VERIFY_OID */ + + (void)oidType; + WOLFSSL_ENTER("GetObjectId()"); + *oid = 0; + + ret = GetASNObjectId(input, &idx, &length, maxIdx); + if (ret != 0) + return ret; + +#ifndef NO_VERIFY_OID + actualOid = &input[idx]; + if (length > 0) + actualOidSz = (word32)length; +#endif /* NO_VERIFY_OID */ + + while (length--) { + /* odd HC08 compiler behavior here when input[idx++] */ + *oid += (word32)input[idx]; + idx++; + } + /* just sum it up for now */ + + *inOutIdx = idx; + +#ifndef NO_VERIFY_OID + { + const byte* checkOid = NULL; + word32 checkOidSz; + #ifdef ASN_DUMP_OID + word32 i; + #endif + + if (oidType != oidIgnoreType) { + checkOid = OidFromId(*oid, oidType, &checkOidSz); + + #ifdef ASN_DUMP_OID + /* support for dumping OID information */ + printf("OID (Type %d, Sz %d, Sum %d): ", oidType, actualOidSz, *oid); + for (i=0; itype = RSA_PRIVATE; + + if (GetInt(&key->n, input, inOutIdx, inSz) < 0 || + GetInt(&key->e, input, inOutIdx, inSz) < 0 || +#ifndef WOLFSSL_RSA_PUBLIC_ONLY + GetInt(&key->d, input, inOutIdx, inSz) < 0 || + GetInt(&key->p, input, inOutIdx, inSz) < 0 || + GetInt(&key->q, input, inOutIdx, inSz) < 0) +#else + SkipInt(input, inOutIdx, inSz) < 0 || + SkipInt(input, inOutIdx, inSz) < 0 || + SkipInt(input, inOutIdx, inSz) < 0 ) + +#endif + return ASN_RSA_KEY_E; +#if (defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA) || !defined(RSA_LOW_MEM)) \ + && !defined(WOLFSSL_RSA_PUBLIC_ONLY) + if (GetInt(&key->dP, input, inOutIdx, inSz) < 0 || + GetInt(&key->dQ, input, inOutIdx, inSz) < 0 || + GetInt(&key->u, input, inOutIdx, inSz) < 0 ) return ASN_RSA_KEY_E; +#else + if (SkipInt(input, inOutIdx, inSz) < 0 || + SkipInt(input, inOutIdx, inSz) < 0 || + SkipInt(input, inOutIdx, inSz) < 0 ) return ASN_RSA_KEY_E; +#endif + +#if defined(WOLFSSL_XILINX_CRYPT) || defined(WOLFSSL_CRYPTOCELL) + if (wc_InitRsaHw(key) != 0) { + return BAD_STATE_E; + } +#endif + + return 0; +} +#endif /* HAVE_USER_RSA */ +#endif /* NO_RSA */ + +#if defined(HAVE_PKCS8) || defined(HAVE_PKCS12) + +/* Remove PKCS8 header, place inOutIdx at beginning of traditional, + * return traditional length on success, negative on error */ +int ToTraditionalInline_ex(const byte* input, word32* inOutIdx, word32 sz, + word32* algId) +{ + word32 idx; + int version, length; + int ret; + byte tag; + + if (input == NULL || inOutIdx == NULL) + return BAD_FUNC_ARG; + + idx = *inOutIdx; + + if (GetSequence(input, &idx, &length, sz) < 0) + return ASN_PARSE_E; + + if (GetMyVersion(input, &idx, &version, sz) < 0) + return ASN_PARSE_E; + + if (GetAlgoId(input, &idx, algId, oidKeyType, sz) < 0) + return ASN_PARSE_E; + + if (GetASNTag(input, &idx, &tag, sz) < 0) + return ASN_PARSE_E; + idx = idx - 1; /* reset idx after finding tag */ + + if (tag == ASN_OBJECT_ID) { + if (SkipObjectId(input, &idx, sz) < 0) + return ASN_PARSE_E; + } + + ret = GetOctetString(input, &idx, &length, sz); + if (ret < 0) { + if (ret == BUFFER_E) + return ASN_PARSE_E; + /* Some private keys don't expect an octet string */ + WOLFSSL_MSG("Couldn't find Octet string"); + } + + *inOutIdx = idx; + + return length; +} + +int ToTraditionalInline(const byte* input, word32* inOutIdx, word32 sz) +{ + word32 oid; + + return ToTraditionalInline_ex(input, inOutIdx, sz, &oid); +} + +/* Remove PKCS8 header, move beginning of traditional to beginning of input */ +int ToTraditional_ex(byte* input, word32 sz, word32* algId) +{ + word32 inOutIdx = 0; + int length; + + if (input == NULL) + return BAD_FUNC_ARG; + + length = ToTraditionalInline_ex(input, &inOutIdx, sz, algId); + if (length < 0) + return length; + + XMEMMOVE(input, input + inOutIdx, length); + + return length; +} + +int ToTraditional(byte* input, word32 sz) +{ + word32 oid; + + return ToTraditional_ex(input, sz, &oid); +} + +#endif /* HAVE_PKCS8 || HAVE_PKCS12 */ + +#ifdef HAVE_PKCS8 + +/* find beginning of traditional key inside PKCS#8 unencrypted buffer + * return traditional length on success, with inOutIdx at beginning of + * traditional + * return negative on failure/error */ +int wc_GetPkcs8TraditionalOffset(byte* input, word32* inOutIdx, word32 sz) +{ + int length; + word32 algId; + + if (input == NULL || inOutIdx == NULL || (*inOutIdx > sz)) + return BAD_FUNC_ARG; + + length = ToTraditionalInline_ex(input, inOutIdx, sz, &algId); + + return length; +} + + +/* PKCS#8 from RFC 5208 + * This function takes in a DER key and converts it to PKCS#8 format. Used + * in creating PKCS#12 shrouded key bags. + * Reverse of ToTraditional + * + * PrivateKeyInfo ::= SEQUENCE { + * version Version, + * privateKeyAlgorithm PrivateKeyAlgorithmIdentifier, + * privateKey PrivateKey, + * attributes optional + * } + * Version ::= INTEGER + * PrivateKeyAlgorithmIdentifier ::= AlgorithmIdentifier + * PrivateKey ::= OCTET STRING + * + * out buffer to place result in + * outSz size of out buffer + * key buffer with DER key + * keySz size of key buffer + * algoID algorithm ID i.e. RSAk + * curveOID ECC curve oid if used. Should be NULL for RSA keys. + * oidSz size of curve oid. Is set to 0 if curveOID is NULL. + * + * Returns the size of PKCS#8 placed into out. In error cases returns negative + * values. + */ +int wc_CreatePKCS8Key(byte* out, word32* outSz, byte* key, word32 keySz, + int algoID, const byte* curveOID, word32 oidSz) +{ + word32 keyIdx = 0; + word32 tmpSz = 0; + word32 sz; + + + /* If out is NULL then return the max size needed + * + 2 for ASN_OBJECT_ID and ASN_OCTET_STRING tags */ + if (out == NULL && outSz != NULL) { + *outSz = keySz + MAX_SEQ_SZ + MAX_VERSION_SZ + MAX_ALGO_SZ + + MAX_LENGTH_SZ + MAX_LENGTH_SZ + 2; + + if (curveOID != NULL) + *outSz += oidSz + MAX_LENGTH_SZ + 1; + + WOLFSSL_MSG("Checking size of PKCS8"); + + return LENGTH_ONLY_E; + } + + WOLFSSL_ENTER("wc_CreatePKCS8Key()"); + + if (key == NULL || out == NULL || outSz == NULL) { + return BAD_FUNC_ARG; + } + + /* check the buffer has enough room for largest possible size */ + if (curveOID != NULL) { + if (*outSz < (keySz + MAX_SEQ_SZ + MAX_VERSION_SZ + MAX_ALGO_SZ + + MAX_LENGTH_SZ + MAX_LENGTH_SZ + 3 + oidSz + MAX_LENGTH_SZ)) + return BUFFER_E; + } + else { + oidSz = 0; /* with no curveOID oid size must be 0 */ + if (*outSz < (keySz + MAX_SEQ_SZ + MAX_VERSION_SZ + MAX_ALGO_SZ + + MAX_LENGTH_SZ + MAX_LENGTH_SZ + 2)) + return BUFFER_E; + } + + /* PrivateKeyInfo ::= SEQUENCE */ + keyIdx += MAX_SEQ_SZ; /* save room for sequence */ + + /* version Version + * no header information just INTEGER */ + sz = SetMyVersion(PKCS8v0, out + keyIdx, 0); + tmpSz += sz; keyIdx += sz; + + /* privateKeyAlgorithm PrivateKeyAlgorithmIdentifier */ + sz = 0; /* set sz to 0 and get privateKey oid buffer size needed */ + if (curveOID != NULL && oidSz > 0) { + byte buf[MAX_LENGTH_SZ]; + sz = SetLength(oidSz, buf); + sz += 1; /* plus one for ASN object id */ + } + sz = SetAlgoID(algoID, out + keyIdx, oidKeyType, oidSz + sz); + tmpSz += sz; keyIdx += sz; + + /* privateKey PrivateKey * + * pkcs8 ecc uses slightly different format. Places curve oid in + * buffer */ + if (curveOID != NULL && oidSz > 0) { + sz = SetObjectId(oidSz, out + keyIdx); + keyIdx += sz; tmpSz += sz; + XMEMCPY(out + keyIdx, curveOID, oidSz); + keyIdx += oidSz; tmpSz += oidSz; + } + + sz = SetOctetString(keySz, out + keyIdx); + keyIdx += sz; tmpSz += sz; + XMEMCPY(out + keyIdx, key, keySz); + tmpSz += keySz; + + /* attributes optional + * No attributes currently added */ + + /* rewind and add sequence */ + sz = SetSequence(tmpSz, out); + XMEMMOVE(out + sz, out + MAX_SEQ_SZ, tmpSz); + + return tmpSz + sz; +} + +#endif /* HAVE_PKCS8 */ + +#if defined(HAVE_PKCS12) || !defined(NO_CHECK_PRIVATE_KEY) +/* check that the private key is a pair for the public key in certificate + * return 1 (true) on match + * return 0 or negative value on failure/error + * + * key : buffer holding DER format key + * keySz : size of key buffer + * der : a initialized and parsed DecodedCert holding a certificate */ +int wc_CheckPrivateKey(byte* key, word32 keySz, DecodedCert* der) +{ + int ret; + (void)keySz; + + if (key == NULL || der == NULL) { + return BAD_FUNC_ARG; + } + + #if !defined(NO_RSA) && !defined(NO_ASN_CRYPT) + /* test if RSA key */ + if (der->keyOID == RSAk) { + #ifdef WOLFSSL_SMALL_STACK + RsaKey* a; + RsaKey* b = NULL; + #else + RsaKey a[1], b[1]; + #endif + word32 keyIdx = 0; + + #ifdef WOLFSSL_SMALL_STACK + a = (RsaKey*)XMALLOC(sizeof(RsaKey), NULL, DYNAMIC_TYPE_RSA); + if (a == NULL) + return MEMORY_E; + b = (RsaKey*)XMALLOC(sizeof(RsaKey), NULL, DYNAMIC_TYPE_RSA); + if (b == NULL) { + XFREE(a, NULL, DYNAMIC_TYPE_RSA); + return MEMORY_E; + } + #endif + + if ((ret = wc_InitRsaKey(a, NULL)) < 0) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(b, NULL, DYNAMIC_TYPE_RSA); + XFREE(a, NULL, DYNAMIC_TYPE_RSA); + #endif + return ret; + } + if ((ret = wc_InitRsaKey(b, NULL)) < 0) { + wc_FreeRsaKey(a); + #ifdef WOLFSSL_SMALL_STACK + XFREE(b, NULL, DYNAMIC_TYPE_RSA); + XFREE(a, NULL, DYNAMIC_TYPE_RSA); + #endif + return ret; + } + if ((ret = wc_RsaPrivateKeyDecode(key, &keyIdx, a, keySz)) == 0) { + WOLFSSL_MSG("Checking RSA key pair"); + keyIdx = 0; /* reset to 0 for parsing public key */ + + if ((ret = wc_RsaPublicKeyDecode(der->publicKey, &keyIdx, b, + der->pubKeySize)) == 0) { + /* limit for user RSA crypto because of RsaKey + * dereference. */ + #if defined(HAVE_USER_RSA) + WOLFSSL_MSG("Cannot verify RSA pair with user RSA"); + ret = 1; /* return first RSA cert as match */ + #else + /* both keys extracted successfully now check n and e + * values are the same. This is dereferencing RsaKey */ + if (mp_cmp(&(a->n), &(b->n)) != MP_EQ || + mp_cmp(&(a->e), &(b->e)) != MP_EQ) { + ret = MP_CMP_E; + } + else + ret = 1; + #endif + } + } + wc_FreeRsaKey(b); + wc_FreeRsaKey(a); + #ifdef WOLFSSL_SMALL_STACK + XFREE(b, NULL, DYNAMIC_TYPE_RSA); + XFREE(a, NULL, DYNAMIC_TYPE_RSA); + #endif + } + else + #endif /* !NO_RSA && !NO_ASN_CRYPT */ + + #if defined(HAVE_ECC) && defined(HAVE_ECC_KEY_EXPORT) && !defined(NO_ASN_CRYPT) + if (der->keyOID == ECDSAk) { + #ifdef WOLFSSL_SMALL_STACK + ecc_key* key_pair; + byte* privDer; + #else + ecc_key key_pair[1]; + byte privDer[MAX_ECC_BYTES]; + #endif + word32 privSz = MAX_ECC_BYTES; + word32 keyIdx = 0; + + #ifdef WOLFSSL_SMALL_STACK + key_pair = (ecc_key*)XMALLOC(sizeof(ecc_key), NULL, DYNAMIC_TYPE_ECC); + if (key_pair == NULL) + return MEMORY_E; + privDer = (byte*)XMALLOC(MAX_ECC_BYTES, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (privDer == NULL) { + XFREE(key_pair, NULL, DYNAMIC_TYPE_ECC); + return MEMORY_E; + } + #endif + + if ((ret = wc_ecc_init(key_pair)) < 0) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(privDer, NULL, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(key_pair, NULL, DYNAMIC_TYPE_ECC); + #endif + return ret; + } + + if ((ret = wc_EccPrivateKeyDecode(key, &keyIdx, key_pair, + keySz)) == 0) { + WOLFSSL_MSG("Checking ECC key pair"); + + if ((ret = wc_ecc_export_private_only(key_pair, privDer, &privSz)) + == 0) { + wc_ecc_free(key_pair); + ret = wc_ecc_init(key_pair); + if (ret == 0) { + ret = wc_ecc_import_private_key((const byte*)privDer, + privSz, (const byte*)der->publicKey, + der->pubKeySize, key_pair); + } + + /* public and private extracted successfully now check if is + * a pair and also do sanity checks on key. wc_ecc_check_key + * checks that private * base generator equals pubkey */ + if (ret == 0) { + if ((ret = wc_ecc_check_key(key_pair)) == 0) { + ret = 1; + } + } + ForceZero(privDer, privSz); + } + } + wc_ecc_free(key_pair); + #ifdef WOLFSSL_SMALL_STACK + XFREE(privDer, NULL, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(key_pair, NULL, DYNAMIC_TYPE_ECC); + #endif + } + else + #endif /* HAVE_ECC && HAVE_ECC_KEY_EXPORT && !NO_ASN_CRYPT */ + + #if defined(HAVE_ED25519) && !defined(NO_ASN_CRYPT) + if (der->keyOID == ED25519k) { + #ifdef WOLFSSL_SMALL_STACK + ed25519_key* key_pair; + #else + ed25519_key key_pair[1]; + #endif + word32 keyIdx = 0; + + #ifdef WOLFSSL_SMALL_STACK + key_pair = (ed25519_key*)XMALLOC(sizeof(ed25519_key), NULL, + DYNAMIC_TYPE_ED25519); + if (key_pair == NULL) + return MEMORY_E; + #endif + + if ((ret = wc_ed25519_init(key_pair)) < 0) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(key_pair, NULL, DYNAMIC_TYPE_ED25519); + #endif + return ret; + } + if ((ret = wc_Ed25519PrivateKeyDecode(key, &keyIdx, key_pair, + keySz)) == 0) { + WOLFSSL_MSG("Checking ED25519 key pair"); + keyIdx = 0; + if ((ret = wc_ed25519_import_public(der->publicKey, der->pubKeySize, + key_pair)) == 0) { + /* public and private extracted successfully no check if is + * a pair and also do sanity checks on key. wc_ecc_check_key + * checks that private * base generator equals pubkey */ + if ((ret = wc_ed25519_check_key(key_pair)) == 0) + ret = 1; + } + } + wc_ed25519_free(key_pair); + #ifdef WOLFSSL_SMALL_STACK + XFREE(key_pair, NULL, DYNAMIC_TYPE_ED25519); + #endif + } + else + #endif /* HAVE_ED25519 && !NO_ASN_CRYPT */ + + #if defined(HAVE_ED448) && !defined(NO_ASN_CRYPT) + if (der->keyOID == ED448k) { + #ifdef WOLFSSL_SMALL_STACK + ed448_key* key_pair = NULL; + #else + ed448_key key_pair[1]; + #endif + word32 keyIdx = 0; + + #ifdef WOLFSSL_SMALL_STACK + key_pair = (ed448_key*)XMALLOC(sizeof(ed448_key), NULL, + DYNAMIC_TYPE_ED448); + if (key_pair == NULL) + return MEMORY_E; + #endif + + if ((ret = wc_ed448_init(key_pair)) < 0) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(key_pair, NULL, DYNAMIC_TYPE_ED448); + #endif + return ret; + } + if ((ret = wc_Ed448PrivateKeyDecode(key, &keyIdx, key_pair, + keySz)) == 0) { + WOLFSSL_MSG("Checking ED448 key pair"); + keyIdx = 0; + if ((ret = wc_ed448_import_public(der->publicKey, der->pubKeySize, + key_pair)) == 0) { + /* public and private extracted successfully no check if is + * a pair and also do sanity checks on key. wc_ecc_check_key + * checks that private * base generator equals pubkey */ + if ((ret = wc_ed448_check_key(key_pair)) == 0) + ret = 1; + } + } + wc_ed448_free(key_pair); + #ifdef WOLFSSL_SMALL_STACK + XFREE(key_pair, NULL, DYNAMIC_TYPE_ED448); + #endif + } + else + #endif /* HAVE_ED448 && !NO_ASN_CRYPT */ + { + ret = 0; + } + + (void)keySz; + + return ret; +} + +#endif /* HAVE_PKCS12 || !NO_CHECK_PRIVATE_KEY */ + +#ifndef NO_PWDBASED + +#if defined(HAVE_PKCS8) || defined(HAVE_PKCS12) +/* Check To see if PKCS version algo is supported, set id if it is return 0 + < 0 on error */ +static int CheckAlgo(int first, int second, int* id, int* version, int* blockSz) +{ + *id = ALGO_ID_E; + *version = PKCS5; /* default */ + if (blockSz) *blockSz = 8; /* default */ + + if (first == 1) { + switch (second) { +#if !defined(NO_SHA) + #ifndef NO_RC4 + case PBE_SHA1_RC4_128: + *id = PBE_SHA1_RC4_128; + *version = PKCS12v1; + return 0; + #endif + #ifndef NO_DES3 + case PBE_SHA1_DES3: + *id = PBE_SHA1_DES3; + *version = PKCS12v1; + if (blockSz) *blockSz = DES_BLOCK_SIZE; + return 0; + case PBE_SHA1_DES: + *id = PBE_SHA1_DES; + *version = PKCS12v1; + if (blockSz) *blockSz = DES_BLOCK_SIZE; + return 0; + #endif +#endif /* !NO_SHA */ + default: + return ALGO_ID_E; + } + } + + if (first != PKCS5) + return ASN_INPUT_E; /* VERSION ERROR */ + + if (second == PBES2) { + *version = PKCS5v2; + return 0; + } + + switch (second) { +#ifndef NO_DES3 + #ifndef NO_MD5 + case 3: /* see RFC 2898 for ids */ + *id = PBE_MD5_DES; + if (blockSz) *blockSz = DES_BLOCK_SIZE; + return 0; + #endif + #ifndef NO_SHA + case 10: + *id = PBE_SHA1_DES; + if (blockSz) *blockSz = DES_BLOCK_SIZE; + return 0; + #endif +#endif /* !NO_DES3 */ + default: + return ALGO_ID_E; + + } +} + +/* Check To see if PKCS v2 algo is supported, set id if it is return 0 + < 0 on error */ +static int CheckAlgoV2(int oid, int* id, int* blockSz) +{ + if (blockSz) *blockSz = 8; /* default */ + (void)id; /* not used if AES and DES3 disabled */ + switch (oid) { +#if !defined(NO_DES3) && !defined(NO_SHA) + case DESb: + *id = PBE_SHA1_DES; + if (blockSz) *blockSz = DES_BLOCK_SIZE; + return 0; + case DES3b: + *id = PBE_SHA1_DES3; + if (blockSz) *blockSz = DES_BLOCK_SIZE; + return 0; +#endif +#ifdef WOLFSSL_AES_256 + case AES256CBCb: + *id = PBE_AES256_CBC; + if (blockSz) *blockSz = AES_BLOCK_SIZE; + return 0; +#endif +#ifdef WOLFSSL_AES_128 + case AES128CBCb: + *id = PBE_AES128_CBC; + if (blockSz) *blockSz = AES_BLOCK_SIZE; + return 0; +#endif + default: + WOLFSSL_MSG("No PKCS v2 algo found"); + return ALGO_ID_E; + + } +} + +#endif /* HAVE_PKCS8 || HAVE_PKCS12 */ + +#ifdef HAVE_PKCS8 + +int wc_GetKeyOID(byte* key, word32 keySz, const byte** curveOID, word32* oidSz, + int* algoID, void* heap) +{ + word32 tmpIdx = 0; + + if (key == NULL || algoID == NULL) + return BAD_FUNC_ARG; + + *algoID = 0; + + #if !defined(NO_RSA) && !defined(NO_ASN_CRYPT) + { + RsaKey rsa; + + wc_InitRsaKey(&rsa, heap); + if (wc_RsaPrivateKeyDecode(key, &tmpIdx, &rsa, keySz) == 0) { + *algoID = RSAk; + } + else { + WOLFSSL_MSG("Not RSA DER key"); + } + wc_FreeRsaKey(&rsa); + } + #endif /* !NO_RSA && !NO_ASN_CRYPT */ + #if defined(HAVE_ECC) && !defined(NO_ASN_CRYPT) + if (*algoID == 0) { + ecc_key ecc; + + tmpIdx = 0; + wc_ecc_init_ex(&ecc, heap, INVALID_DEVID); + if (wc_EccPrivateKeyDecode(key, &tmpIdx, &ecc, keySz) == 0) { + *algoID = ECDSAk; + + /* now find oid */ + if (wc_ecc_get_oid(ecc.dp->oidSum, curveOID, oidSz) < 0) { + WOLFSSL_MSG("Error getting ECC curve OID"); + wc_ecc_free(&ecc); + return BAD_FUNC_ARG; + } + } + else { + WOLFSSL_MSG("Not ECC DER key either"); + } + wc_ecc_free(&ecc); + } +#endif /* HAVE_ECC && !NO_ASN_CRYPT */ +#if defined(HAVE_ED25519) && !defined(NO_ASN_CRYPT) + if (*algoID != RSAk && *algoID != ECDSAk) { + ed25519_key ed25519; + + tmpIdx = 0; + if (wc_ed25519_init(&ed25519) == 0) { + if (wc_Ed25519PrivateKeyDecode(key, &tmpIdx, &ed25519, keySz) + == 0) { + *algoID = ED25519k; + } + else { + WOLFSSL_MSG("Not ED25519 DER key"); + } + wc_ed25519_free(&ed25519); + } + else { + WOLFSSL_MSG("GetKeyOID wc_ed25519_init failed"); + } + } +#endif /* HAVE_ED25519 && !NO_ASN_CRYPT */ +#if defined(HAVE_ED448) && !defined(NO_ASN_CRYPT) + if (*algoID != RSAk && *algoID != ECDSAk && *algoID != ED25519k) { + ed448_key ed448; + + tmpIdx = 0; + if (wc_ed448_init(&ed448) == 0) { + if (wc_Ed448PrivateKeyDecode(key, &tmpIdx, &ed448, keySz) == 0) { + *algoID = ED448k; + } + else { + WOLFSSL_MSG("Not ED448 DER key"); + } + wc_ed448_free(&ed448); + } + else { + WOLFSSL_MSG("GetKeyOID wc_ed448_init failed"); + } + } +#endif /* HAVE_ED448 && !NO_ASN_CRYPT */ + + /* if flag is not set then is neither RSA or ECC key that could be + * found */ + if (*algoID == 0) { + WOLFSSL_MSG("Bad key DER or compile options"); + return BAD_FUNC_ARG; + } + + (void)tmpIdx; + (void)curveOID; + (void)oidSz; + (void)keySz; + (void)heap; + + return 1; +} + +#endif /* HAVE_PKCS8 */ + +#if defined(HAVE_PKCS8) || defined(HAVE_PKCS12) + +#define PKCS8_MIN_BLOCK_SIZE 8 +static int Pkcs8Pad(byte* buf, int sz, int blockSz) +{ + int i, padSz; + + /* calculate pad size */ + padSz = blockSz - (sz & (blockSz - 1)); + + /* pad with padSz value */ + if (buf) { + for (i = 0; i < padSz; i++) { + buf[sz+i] = (byte)(padSz & 0xFF); + } + } + + /* return adjusted length */ + return sz + padSz; +} + +#endif /* HAVE_PKCS8 || HAVE_PKCS12 */ + +#ifdef HAVE_PKCS8 + +/* + * Used when creating PKCS12 shrouded key bags + * vPKCS is the version of PKCS to use + * vAlgo is the algorithm version to use + * + * if salt is NULL a random number is generated + * + * returns the size of encrypted data on success + */ +int UnTraditionalEnc(byte* key, word32 keySz, byte* out, word32* outSz, + const char* password, int passwordSz, int vPKCS, int vAlgo, + byte* salt, word32 saltSz, int itt, WC_RNG* rng, void* heap) +{ + int algoID = 0; + byte* tmp; + word32 tmpSz = 0; + word32 sz; + word32 seqSz; + word32 inOutIdx = 0; + word32 totalSz = 0; + int version, id; + int ret; + int blockSz = 0; + + const byte* curveOID = NULL; + word32 oidSz = 0; + +#ifdef WOLFSSL_SMALL_STACK + byte* saltTmp = NULL; + byte* cbcIv = NULL; +#else + byte saltTmp[MAX_IV_SIZE]; + byte cbcIv[MAX_IV_SIZE]; +#endif + + WOLFSSL_ENTER("UnTraditionalEnc()"); + + if (saltSz > MAX_SALT_SIZE) + return ASN_PARSE_E; + + + inOutIdx += MAX_SEQ_SZ; /* leave room for size of finished shroud */ + if (CheckAlgo(vPKCS, vAlgo, &id, &version, &blockSz) < 0) { + WOLFSSL_MSG("Bad/Unsupported algorithm ID"); + return ASN_INPUT_E; /* Algo ID error */ + } + + if (out != NULL) { + if (*outSz < inOutIdx + MAX_ALGO_SZ + MAX_SALT_SIZE + MAX_SEQ_SZ + 1 + + MAX_LENGTH_SZ + MAX_SHORT_SZ + 1) + return BUFFER_E; + + if (version == PKCS5v2) { + WOLFSSL_MSG("PKCS5v2 Not supported yet\n"); + return ASN_VERSION_E; + } + + if (salt == NULL || saltSz == 0) { + saltSz = 8; + #ifdef WOLFSSL_SMALL_STACK + saltTmp = (byte*)XMALLOC(saltSz, heap, DYNAMIC_TYPE_TMP_BUFFER); + if (saltTmp == NULL) + return MEMORY_E; + #endif + salt = saltTmp; + + if ((ret = wc_RNG_GenerateBlock(rng, saltTmp, saltSz)) != 0) { + WOLFSSL_MSG("Error generating random salt"); + #ifdef WOLFSSL_SMALL_STACK + if (saltTmp != NULL) + XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return ret; + } + } + + + /* leave room for a sequence (contains salt and iterations int) */ + inOutIdx += MAX_SEQ_SZ; sz = 0; + inOutIdx += MAX_ALGO_SZ; + + /* place salt in buffer */ + out[inOutIdx++] = ASN_OCTET_STRING; sz++; + tmpSz = SetLength(saltSz, out + inOutIdx); + inOutIdx += tmpSz; sz += tmpSz; + XMEMCPY(out + inOutIdx, salt, saltSz); + inOutIdx += saltSz; sz += saltSz; + + /* place iteration count in buffer */ + ret = SetShortInt(out, &inOutIdx, itt, *outSz); + if (ret < 0) { + return ret; + } + sz += (word32)ret; + + /* wind back index and set sequence then clean up buffer */ + inOutIdx -= (sz + MAX_SEQ_SZ); + tmpSz = SetSequence(sz, out + inOutIdx); + XMEMMOVE(out + inOutIdx + tmpSz, out + inOutIdx + MAX_SEQ_SZ, sz); + totalSz += tmpSz + sz; sz += tmpSz; + + /* add in algo ID */ + inOutIdx -= MAX_ALGO_SZ; + tmpSz = SetAlgoID(id, out + inOutIdx, oidPBEType, sz); + XMEMMOVE(out + inOutIdx + tmpSz, out + inOutIdx + MAX_ALGO_SZ, sz); + totalSz += tmpSz; inOutIdx += tmpSz + sz; + + /* octet string containing encrypted key */ + out[inOutIdx++] = ASN_OCTET_STRING; totalSz++; + } + + /* check key type and get OID if ECC */ + if ((ret = wc_GetKeyOID(key, keySz, &curveOID, &oidSz, &algoID, heap))< 0) { + WOLFSSL_MSG("Error getting key OID"); + return ret; + } + + /* PKCS#8 wrapping around key */ + if (wc_CreatePKCS8Key(NULL, &tmpSz, key, keySz, algoID, curveOID, oidSz) + != LENGTH_ONLY_E) { + #ifdef WOLFSSL_SMALL_STACK + if (saltTmp != NULL) + XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return MEMORY_E; + } + + /* check if should return max size */ + if (out == NULL) { + /* account for salt size */ + if (salt == NULL || saltSz == 0) { + tmpSz += MAX_SALT_SIZE; + } + else { + tmpSz += saltSz; + } + + /* plus 3 for tags */ + *outSz = tmpSz + MAX_ALGO_SZ + MAX_LENGTH_SZ +MAX_LENGTH_SZ + MAX_SEQ_SZ + + MAX_LENGTH_SZ + MAX_SEQ_SZ + 3; + return LENGTH_ONLY_E; + } + + /* reserve buffer for crypto and make sure it supports full blocks */ + tmp = (byte*)XMALLOC(tmpSz + (blockSz-1), heap, DYNAMIC_TYPE_TMP_BUFFER); + if (tmp == NULL) { + #ifdef WOLFSSL_SMALL_STACK + if (saltTmp != NULL) + XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return MEMORY_E; + } + + if ((ret = wc_CreatePKCS8Key(tmp, &tmpSz, key, keySz, algoID, curveOID, + oidSz)) < 0) { + XFREE(tmp, heap, DYNAMIC_TYPE_TMP_BUFFER); + WOLFSSL_MSG("Error wrapping key with PKCS#8"); + #ifdef WOLFSSL_SMALL_STACK + if (saltTmp != NULL) + XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return ret; + } + tmpSz = ret; + + /* adjust size to pad */ + tmpSz = Pkcs8Pad(tmp, tmpSz, blockSz); + +#ifdef WOLFSSL_SMALL_STACK + cbcIv = (byte*)XMALLOC(MAX_IV_SIZE, heap, DYNAMIC_TYPE_TMP_BUFFER); + if (cbcIv == NULL) { + if (saltTmp != NULL) + XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(salt, heap, DYNAMIC_TYPE_TMP_BUFFER); + return MEMORY_E; + } +#endif + + /* encrypt PKCS#8 wrapped key */ + if ((ret = wc_CryptKey(password, passwordSz, salt, saltSz, itt, id, + tmp, tmpSz, version, cbcIv, 1, 0)) < 0) { + XFREE(tmp, heap, DYNAMIC_TYPE_TMP_BUFFER); + WOLFSSL_MSG("Error encrypting key"); + #ifdef WOLFSSL_SMALL_STACK + if (saltTmp != NULL) + XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER); + if (cbcIv != NULL) + XFREE(cbcIv, heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return ret; /* encryption failure */ + } + totalSz += tmpSz; + +#ifdef WOLFSSL_SMALL_STACK + if (saltTmp != NULL) + XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER); + if (cbcIv != NULL) + XFREE(cbcIv, heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + if (*outSz < inOutIdx + tmpSz + MAX_LENGTH_SZ) { + XFREE(tmp, heap, DYNAMIC_TYPE_TMP_BUFFER); + return BUFFER_E; + } + + /* set length of key and copy over encrypted key */ + seqSz = SetLength(tmpSz, out + inOutIdx); + inOutIdx += seqSz; totalSz += seqSz; + XMEMCPY(out + inOutIdx, tmp, tmpSz); + XFREE(tmp, heap, DYNAMIC_TYPE_TMP_BUFFER); + + /* set total size at beginning */ + sz = SetSequence(totalSz, out); + XMEMMOVE(out + sz, out + MAX_SEQ_SZ, totalSz); + + (void)rng; + + return totalSz + sz; +} + +static int GetAlgoV2(int encAlgId, const byte** oid, int *len, int* id, + int *blkSz) +{ + int ret = 0; + + switch (encAlgId) { +#if !defined(NO_DES3) && !defined(NO_SHA) + case DESb: + *len = sizeof(blkDesCbcOid); + *oid = blkDesCbcOid; + *id = PBE_SHA1_DES; + *blkSz = 8; + break; + case DES3b: + *len = sizeof(blkDes3CbcOid); + *oid = blkDes3CbcOid; + *id = PBE_SHA1_DES3; + *blkSz = 8; + break; +#endif +#if defined(WOLFSSL_AES_256) && defined(HAVE_AES_CBC) + case AES256CBCb: + *len = sizeof(blkAes256CbcOid); + *oid = blkAes256CbcOid; + *id = PBE_AES256_CBC; + *blkSz = 16; + break; +#endif + default: + (void)len; + (void)oid; + (void)id; + (void)blkSz; + ret = ALGO_ID_E; + } + + return ret; +} + +/* Converts Encrypted PKCS#8 to 'traditional' (i.e. PKCS#8 removed from + * decrypted key.) + */ +int TraditionalEnc(byte* key, word32 keySz, byte* out, word32* outSz, + const char* password, int passwordSz, int vPKCS, int vAlgo, + int encAlgId, byte* salt, word32 saltSz, int itt, WC_RNG* rng, + void* heap) +{ + int ret = 0; + int version, blockSz, id; + word32 idx = 0, encIdx; +#ifdef WOLFSSL_SMALL_STACK + byte* saltTmp = NULL; +#else + byte saltTmp[MAX_SALT_SIZE]; +#endif + byte cbcIv[MAX_IV_SIZE]; + byte *pkcs8Key = NULL; + word32 pkcs8KeySz = 0, padSz = 0; + int algId = 0; + const byte* curveOid = NULL; + word32 curveOidSz = 0; + const byte* pbeOid = NULL; + word32 pbeOidSz = 0; + const byte* encOid = NULL; + int encOidSz = 0; + word32 pbeLen = 0, kdfLen = 0, encLen = 0; + word32 innerLen = 0, outerLen; + + ret = CheckAlgo(vPKCS, vAlgo, &id, &version, &blockSz); + /* create random salt if one not provided */ + if (ret == 0 && (salt == NULL || saltSz == 0)) { + saltSz = 8; + #ifdef WOLFSSL_SMALL_STACK + saltTmp = (byte*)XMALLOC(saltSz, heap, DYNAMIC_TYPE_TMP_BUFFER); + if (saltTmp == NULL) + return MEMORY_E; + #endif + salt = saltTmp; + + if ((ret = wc_RNG_GenerateBlock(rng, saltTmp, saltSz)) != 0) { + WOLFSSL_MSG("Error generating random salt"); + #ifdef WOLFSSL_SMALL_STACK + XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return ret; + } + } + + if (ret == 0) { + /* check key type and get OID if ECC */ + ret = wc_GetKeyOID(key, keySz, &curveOid, &curveOidSz, &algId, heap); + if (ret == 1) + ret = 0; + } + if (ret == 0) { + ret = wc_CreatePKCS8Key(NULL, &pkcs8KeySz, key, keySz, algId, curveOid, + curveOidSz); + if (ret == LENGTH_ONLY_E) + ret = 0; + } + if (ret == 0) { + pkcs8Key = (byte*)XMALLOC(pkcs8KeySz, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (pkcs8Key == NULL) + ret = MEMORY_E; + } + if (ret == 0) { + ret = wc_CreatePKCS8Key(pkcs8Key, &pkcs8KeySz, key, keySz, algId, + curveOid, curveOidSz); + if (ret >= 0) { + pkcs8KeySz = ret; + ret = 0; + } + } + + if (ret == 0 && version == PKCS5v2) + ret = GetAlgoV2(encAlgId, &encOid, &encOidSz, &id, &blockSz); + + if (ret == 0) { + padSz = (blockSz - (pkcs8KeySz & (blockSz - 1))) & (blockSz - 1); + /* inner = OCT salt INT itt */ + innerLen = 2 + saltSz + 2 + (itt < 256 ? 1 : 2); + + if (version != PKCS5v2) { + pbeOid = OidFromId(id, oidPBEType, &pbeOidSz); + /* pbe = OBJ pbse1 SEQ [ inner ] */ + pbeLen = 2 + pbeOidSz + 2 + innerLen; + } + else { + pbeOid = pbes2; + pbeOidSz = sizeof(pbes2); + /* kdf = OBJ pbkdf2 [ SEQ innerLen ] */ + kdfLen = 2 + sizeof(pbkdf2Oid) + 2 + innerLen; + /* enc = OBJ enc_alg OCT iv */ + encLen = 2 + encOidSz + 2 + blockSz; + /* pbe = OBJ pbse2 SEQ [ SEQ [ kdf ] SEQ [ enc ] ] */ + pbeLen = 2 + sizeof(pbes2) + 2 + 2 + kdfLen + 2 + encLen; + + ret = wc_RNG_GenerateBlock(rng, cbcIv, blockSz); + } + } + if (ret == 0) { + /* outer = SEQ [ pbe ] OCT encrypted_PKCS#8_key */ + outerLen = 2 + pbeLen; + outerLen += SetOctetString(pkcs8KeySz + padSz, out); + outerLen += pkcs8KeySz + padSz; + + idx += SetSequence(outerLen, out + idx); + + encIdx = idx + outerLen - pkcs8KeySz - padSz; + /* Put Encrypted content in place. */ + XMEMCPY(out + encIdx, pkcs8Key, pkcs8KeySz); + if (padSz > 0) { + XMEMSET(out + encIdx + pkcs8KeySz, padSz, padSz); + pkcs8KeySz += padSz; + } + ret = wc_CryptKey(password, passwordSz, salt, saltSz, itt, id, + out + encIdx, pkcs8KeySz, version, cbcIv, 1, 0); + } + if (ret == 0) { + if (version != PKCS5v2) { + /* PBE algorithm */ + idx += SetSequence(pbeLen, out + idx); + idx += SetObjectId(pbeOidSz, out + idx); + XMEMCPY(out + idx, pbeOid, pbeOidSz); + idx += pbeOidSz; + } + else { + /* PBES2 algorithm identifier */ + idx += SetSequence(pbeLen, out + idx); + idx += SetObjectId(pbeOidSz, out + idx); + XMEMCPY(out + idx, pbeOid, pbeOidSz); + idx += pbeOidSz; + /* PBES2 Parameters: SEQ [ kdf ] SEQ [ enc ] */ + idx += SetSequence(2 + kdfLen + 2 + encLen, out + idx); + /* KDF Algorithm Identifier */ + idx += SetSequence(kdfLen, out + idx); + idx += SetObjectId(sizeof(pbkdf2Oid), out + idx); + XMEMCPY(out + idx, pbkdf2Oid, sizeof(pbkdf2Oid)); + idx += sizeof(pbkdf2Oid); + } + idx += SetSequence(innerLen, out + idx); + idx += SetOctetString(saltSz, out + idx); + XMEMCPY(out + idx, salt, saltSz); idx += saltSz; + ret = SetShortInt(out, &idx, itt, *outSz); + if (ret > 0) + ret = 0; + } + if (ret == 0) { + if (version == PKCS5v2) { + /* Encryption Algorithm Identifier */ + idx += SetSequence(encLen, out + idx); + idx += SetObjectId(encOidSz, out + idx); + XMEMCPY(out + idx, encOid, encOidSz); + idx += encOidSz; + /* Encryption Algorithm Parameter: CBC IV */ + idx += SetOctetString(blockSz, out + idx); + XMEMCPY(out + idx, cbcIv, blockSz); + idx += blockSz; + } + idx += SetOctetString(pkcs8KeySz, out + idx); + /* Default PRF - no need to write out OID */ + idx += pkcs8KeySz; + + ret = idx; + } + + if (pkcs8Key != NULL) { + ForceZero(pkcs8Key, pkcs8KeySz); + XFREE(pkcs8Key, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#ifdef WOLFSSL_SMALL_STACK + if (saltTmp != NULL) { + XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + (void)rng; + + return ret; +} + +#endif /* HAVE_PKCS8 */ + +#if defined(HAVE_PKCS8) || defined(HAVE_PKCS12) +/* decrypt PKCS + * + * NOTE: input buffer is overwritten with decrypted data! + * + * input[in/out] data to decrypt and results are written to + * sz size of input buffer + * password password if used. Can be NULL for no password + * passwordSz size of password buffer + * + * returns the total size of decrypted content on success. + */ +int DecryptContent(byte* input, word32 sz, const char* password, int passwordSz) +{ + word32 inOutIdx = 0, seqEnd, oid, shaOid = 0; + int ret = 0, first, second, length = 0, version, saltSz, id; + int iterations = 0, keySz = 0; +#ifdef WOLFSSL_SMALL_STACK + byte* salt = NULL; + byte* cbcIv = NULL; +#else + byte salt[MAX_SALT_SIZE]; + byte cbcIv[MAX_IV_SIZE]; +#endif + byte tag; + + if (passwordSz < 0) { + WOLFSSL_MSG("Bad password size"); + return BAD_FUNC_ARG; + } + + if (GetAlgoId(input, &inOutIdx, &oid, oidIgnoreType, sz) < 0) { + ERROR_OUT(ASN_PARSE_E, exit_dc); + } + + first = input[inOutIdx - 2]; /* PKCS version always 2nd to last byte */ + second = input[inOutIdx - 1]; /* version.algo, algo id last byte */ + + if (CheckAlgo(first, second, &id, &version, NULL) < 0) { + ERROR_OUT(ASN_INPUT_E, exit_dc); /* Algo ID error */ + } + + if (version == PKCS5v2) { + if (GetSequence(input, &inOutIdx, &length, sz) < 0) { + ERROR_OUT(ASN_PARSE_E, exit_dc); + } + + if (GetAlgoId(input, &inOutIdx, &oid, oidKdfType, sz) < 0) { + ERROR_OUT(ASN_PARSE_E, exit_dc); + } + + if (oid != PBKDF2_OID) { + ERROR_OUT(ASN_PARSE_E, exit_dc); + } + } + + if (GetSequence(input, &inOutIdx, &length, sz) <= 0) { + ERROR_OUT(ASN_PARSE_E, exit_dc); + } + /* Find the end of this SEQUENCE so we can check for the OPTIONAL and + * DEFAULT items. */ + seqEnd = inOutIdx + length; + + ret = GetOctetString(input, &inOutIdx, &saltSz, sz); + if (ret < 0) + goto exit_dc; + + if (saltSz > MAX_SALT_SIZE) { + ERROR_OUT(ASN_PARSE_E, exit_dc); + } + +#ifdef WOLFSSL_SMALL_STACK + salt = (byte*)XMALLOC(MAX_SALT_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (salt == NULL) { + ERROR_OUT(MEMORY_E, exit_dc); + } +#endif + + XMEMCPY(salt, &input[inOutIdx], saltSz); + inOutIdx += saltSz; + + if (GetShortInt(input, &inOutIdx, &iterations, sz) < 0) { + ERROR_OUT(ASN_PARSE_E, exit_dc); + } + + /* OPTIONAL key length */ + if (seqEnd > inOutIdx) { + word32 localIdx = inOutIdx; + + if (GetASNTag(input, &localIdx, &tag, sz) < 0) { + ERROR_OUT(ASN_PARSE_E, exit_dc); + } + + if (tag == ASN_INTEGER && + GetShortInt(input, &inOutIdx, &keySz, sz) < 0) { + ERROR_OUT(ASN_PARSE_E, exit_dc); + } + } + + /* DEFAULT HMAC is SHA-1 */ + if (seqEnd > inOutIdx) { + if (GetAlgoId(input, &inOutIdx, &oid, oidHmacType, sz) < 0) { + ERROR_OUT(ASN_PARSE_E, exit_dc); + } + + shaOid = oid; + } + +#ifdef WOLFSSL_SMALL_STACK + cbcIv = (byte*)XMALLOC(MAX_IV_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (cbcIv == NULL) { + ERROR_OUT(MEMORY_E, exit_dc); + } +#endif + + if (version == PKCS5v2) { + /* get encryption algo */ + if (GetAlgoId(input, &inOutIdx, &oid, oidBlkType, sz) < 0) { + ERROR_OUT(ASN_PARSE_E, exit_dc); + } + + if (CheckAlgoV2(oid, &id, NULL) < 0) { + ERROR_OUT(ASN_PARSE_E, exit_dc); /* PKCS v2 algo id error */ + } + + if (shaOid == 0) + shaOid = oid; + + ret = GetOctetString(input, &inOutIdx, &length, sz); + if (ret < 0) + goto exit_dc; + + if (length > MAX_IV_SIZE) { + ERROR_OUT(ASN_PARSE_E, exit_dc); + } + + XMEMCPY(cbcIv, &input[inOutIdx], length); + inOutIdx += length; + } + + if (GetASNTag(input, &inOutIdx, &tag, sz) < 0) { + ERROR_OUT(ASN_PARSE_E, exit_dc); + } + + if (tag != (ASN_CONTEXT_SPECIFIC | 0) && tag != ASN_OCTET_STRING) { + ERROR_OUT(ASN_PARSE_E, exit_dc); + } + + if (GetLength(input, &inOutIdx, &length, sz) < 0) { + ERROR_OUT(ASN_PARSE_E, exit_dc); + } + + ret = wc_CryptKey(password, passwordSz, salt, saltSz, iterations, id, + input + inOutIdx, length, version, cbcIv, 0, shaOid); + +exit_dc: +#ifdef WOLFSSL_SMALL_STACK + XFREE(salt, NULL, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(cbcIv, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + if (ret == 0) { + XMEMMOVE(input, input + inOutIdx, length); + ret = length; + } + + return ret; +} + + +/* Remove Encrypted PKCS8 header, move beginning of traditional to beginning + of input */ +int ToTraditionalEnc(byte* input, word32 sz,const char* password, + int passwordSz, word32* algId) +{ + int ret, length; + word32 inOutIdx = 0; + + if (GetSequence(input, &inOutIdx, &length, sz) < 0) { + ret = ASN_PARSE_E; + } + else { + ret = DecryptContent(input + inOutIdx, sz - inOutIdx, password, + passwordSz); + if (ret > 0) { + XMEMMOVE(input, input + inOutIdx, ret); + ret = ToTraditional_ex(input, ret, algId); + } + } + + return ret; +} + +#endif /* HAVE_PKCS8 || HAVE_PKCS12 */ + +#ifdef HAVE_PKCS12 + +/* encrypt PKCS 12 content + * + * NOTE: if out is NULL then outSz is set with the total buffer size needed and + * the error value LENGTH_ONLY_E is returned. + * + * input data to encrypt + * inputSz size of input buffer + * out buffer to hold the result + * outSz size of out buffer + * password password if used. Can be NULL for no password + * passwordSz size of password buffer + * vPKCS version of PKCS i.e. PKCS5v2 + * vAlgo algorithm version + * salt buffer holding salt if used. If NULL then a random salt is created + * saltSz size of salt buffer if it is not NULL + * itt number of iterations used + * rng random number generator to use + * heap possible heap hint for mallocs/frees + * + * returns the total size of encrypted content on success. + * + * data returned is : + * [ seq - obj [ seq -salt,itt]] , construct with encrypted data + */ +int EncryptContent(byte* input, word32 inputSz, byte* out, word32* outSz, + const char* password, int passwordSz, int vPKCS, int vAlgo, + byte* salt, word32 saltSz, int itt, WC_RNG* rng, void* heap) +{ + word32 sz; + word32 inOutIdx = 0; + word32 tmpIdx = 0; + word32 totalSz = 0; + word32 seqSz; + word32 innerSz; + int ret; + int version, id, blockSz = 0; +#ifdef WOLFSSL_SMALL_STACK + byte* saltTmp = NULL; + byte* cbcIv = NULL; +#else + byte saltTmp[MAX_SALT_SIZE]; + byte cbcIv[MAX_IV_SIZE]; +#endif + byte seq[MAX_SEQ_SZ]; + byte shr[MAX_SHORT_SZ]; + word32 maxShr = MAX_SHORT_SZ; + word32 algoSz; + const byte* algoName; + + (void)heap; + + WOLFSSL_ENTER("EncryptContent()"); + + if (CheckAlgo(vPKCS, vAlgo, &id, &version, &blockSz) < 0) + return ASN_INPUT_E; /* Algo ID error */ + + if (version == PKCS5v2) { + WOLFSSL_MSG("PKCS#5 version 2 not supported yet"); + return BAD_FUNC_ARG; + } + + if (saltSz > MAX_SALT_SIZE) + return ASN_PARSE_E; + + if (outSz == NULL) { + return BAD_FUNC_ARG; + } + + /* calculate size */ + /* size of constructed string at end */ + sz = Pkcs8Pad(NULL, inputSz, blockSz); + totalSz = ASN_TAG_SZ; + totalSz += SetLength(sz, seq); + totalSz += sz; + + /* size of sequence holding object id and sub sequence of salt and itt */ + algoName = OidFromId(id, oidPBEType, &algoSz); + if (algoName == NULL) { + WOLFSSL_MSG("Unknown Algorithm"); + return 0; + } + innerSz = SetObjectId(algoSz, seq); + innerSz += algoSz; + + /* get subsequence of salt and itt */ + if (salt == NULL || saltSz == 0) { + sz = 8; + } + else { + sz = saltSz; + } + seqSz = SetOctetString(sz, seq); + seqSz += sz; + + tmpIdx = 0; + seqSz += SetShortInt(shr, &tmpIdx, itt, maxShr); + innerSz += seqSz + SetSequence(seqSz, seq); + totalSz += innerSz + SetSequence(innerSz, seq); + + if (out == NULL) { + *outSz = totalSz; + return LENGTH_ONLY_E; + } + + inOutIdx = 0; + if (totalSz > *outSz) + return BUFFER_E; + + inOutIdx += SetSequence(innerSz, out + inOutIdx); + inOutIdx += SetObjectId(algoSz, out + inOutIdx); + XMEMCPY(out + inOutIdx, algoName, algoSz); + inOutIdx += algoSz; + inOutIdx += SetSequence(seqSz, out + inOutIdx); + + /* create random salt if one not provided */ + if (salt == NULL || saltSz == 0) { + saltSz = 8; + #ifdef WOLFSSL_SMALL_STACK + saltTmp = (byte*)XMALLOC(saltSz, heap, DYNAMIC_TYPE_TMP_BUFFER); + if (saltTmp == NULL) + return MEMORY_E; + #endif + salt = saltTmp; + + if ((ret = wc_RNG_GenerateBlock(rng, saltTmp, saltSz)) != 0) { + WOLFSSL_MSG("Error generating random salt"); + #ifdef WOLFSSL_SMALL_STACK + XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return ret; + } + } + inOutIdx += SetOctetString(saltSz, out + inOutIdx); + if (saltSz + inOutIdx > *outSz) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return BUFFER_E; + } + XMEMCPY(out + inOutIdx, salt, saltSz); + inOutIdx += saltSz; + + /* place iteration setting in buffer */ + ret = SetShortInt(out, &inOutIdx, itt, *outSz); + if (ret < 0) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return ret; + } + + if (inOutIdx + 1 > *outSz) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return BUFFER_E; + } + out[inOutIdx++] = ASN_CONTEXT_SPECIFIC | 0; + + /* get pad size and verify buffer room */ + sz = Pkcs8Pad(NULL, inputSz, blockSz); + if (sz + inOutIdx > *outSz) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return BUFFER_E; + } + inOutIdx += SetLength(sz, out + inOutIdx); + + /* copy input to output buffer and pad end */ + XMEMCPY(out + inOutIdx, input, inputSz); + sz = Pkcs8Pad(out + inOutIdx, inputSz, blockSz); +#ifdef WOLFSSL_SMALL_STACK + cbcIv = (byte*)XMALLOC(MAX_IV_SIZE, heap, DYNAMIC_TYPE_TMP_BUFFER); + if (cbcIv == NULL) { + XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER); + return MEMORY_E; + } +#endif + + /* encrypt */ + if ((ret = wc_CryptKey(password, passwordSz, salt, saltSz, itt, id, + out + inOutIdx, sz, version, cbcIv, 1, 0)) < 0) { + + #ifdef WOLFSSL_SMALL_STACK + XFREE(cbcIv, heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return ret; /* encrypt failure */ + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(cbcIv, heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + (void)rng; + + return inOutIdx + sz; +} + + +#endif /* HAVE_PKCS12 */ +#endif /* NO_PWDBASED */ + +#ifndef NO_RSA + +#ifndef HAVE_USER_RSA +#ifdef WOLFSSL_RENESAS_TSIP +/* This function is to retrieve key position information in a cert.* + * The information will be used to call TSIP TLS-linked API for * + * certificate verification. */ +static int RsaPublicKeyDecodeRawIndex(const byte* input, word32* inOutIdx, + word32 inSz, word32* key_n, + word32* key_n_len, word32* key_e, + word32* key_e_len) +{ + + int ret = 0; + int length = 0; +#if defined(OPENSSL_EXTRA) || defined(RSA_DECODE_EXTRA) + byte b; +#endif + + if (input == NULL || inOutIdx == NULL) + return BAD_FUNC_ARG; + + if (GetSequence(input, inOutIdx, &length, inSz) < 0) + return ASN_PARSE_E; + +#if defined(OPENSSL_EXTRA) || defined(RSA_DECODE_EXTRA) + if ((*inOutIdx + 1) > inSz) + return BUFFER_E; + + b = input[*inOutIdx]; + if (b != ASN_INTEGER) { + /* not from decoded cert, will have algo id, skip past */ + if (GetSequence(input, inOutIdx, &length, inSz) < 0) + return ASN_PARSE_E; + + if (SkipObjectId(input, inOutIdx, inSz) < 0) + return ASN_PARSE_E; + + /* Option NULL ASN.1 tag */ + if (*inOutIdx >= inSz) { + return BUFFER_E; + } + if (input[*inOutIdx] == ASN_TAG_NULL) { + ret = GetASNNull(input, inOutIdx, inSz); + if (ret != 0) + return ret; + } + + /* should have bit tag length and seq next */ + ret = CheckBitString(input, inOutIdx, NULL, inSz, 1, NULL); + if (ret != 0) + return ret; + + if (GetSequence(input, inOutIdx, &length, inSz) < 0) + return ASN_PARSE_E; + } +#endif /* OPENSSL_EXTRA */ + + /* Get modulus */ + ret = GetASNInt(input, inOutIdx, &length, inSz); + *key_n += *inOutIdx; + if (ret < 0) { + return ASN_RSA_KEY_E; + } + if (key_n_len) + *key_n_len = length; + *inOutIdx += length; + + /* Get exponent */ + ret = GetASNInt(input, inOutIdx, &length, inSz); + *key_e += *inOutIdx; + if (ret < 0) { + return ASN_RSA_KEY_E; + } + if (key_e_len) + *key_e_len = length; + + return ret; +} +#endif /* WOLFSSL_RENESAS_TSIP */ + +int wc_RsaPublicKeyDecode_ex(const byte* input, word32* inOutIdx, word32 inSz, + const byte** n, word32* nSz, const byte** e, word32* eSz) +{ + int ret = 0; + int length = 0; +#if defined(OPENSSL_EXTRA) || defined(RSA_DECODE_EXTRA) + word32 localIdx; + byte tag; +#endif + + if (input == NULL || inOutIdx == NULL) + return BAD_FUNC_ARG; + + if (GetSequence(input, inOutIdx, &length, inSz) < 0) + return ASN_PARSE_E; + +#if defined(OPENSSL_EXTRA) || defined(RSA_DECODE_EXTRA) + localIdx = *inOutIdx; + if (GetASNTag(input, &localIdx, &tag, inSz) < 0) + return BUFFER_E; + + if (tag != ASN_INTEGER) { + /* not from decoded cert, will have algo id, skip past */ + if (GetSequence(input, inOutIdx, &length, inSz) < 0) + return ASN_PARSE_E; + + if (SkipObjectId(input, inOutIdx, inSz) < 0) + return ASN_PARSE_E; + + /* Option NULL ASN.1 tag */ + if (*inOutIdx >= inSz) { + return BUFFER_E; + } + + localIdx = *inOutIdx; + if (GetASNTag(input, &localIdx, &tag, inSz) < 0) + return ASN_PARSE_E; + + if (tag == ASN_TAG_NULL) { + ret = GetASNNull(input, inOutIdx, inSz); + if (ret != 0) + return ret; + } + + /* should have bit tag length and seq next */ + ret = CheckBitString(input, inOutIdx, NULL, inSz, 1, NULL); + if (ret != 0) + return ret; + + if (GetSequence(input, inOutIdx, &length, inSz) < 0) + return ASN_PARSE_E; + } +#endif /* OPENSSL_EXTRA */ + + /* Get modulus */ + ret = GetASNInt(input, inOutIdx, &length, inSz); + if (ret < 0) { + return ASN_RSA_KEY_E; + } + if (nSz) + *nSz = length; + if (n) + *n = &input[*inOutIdx]; + *inOutIdx += length; + + /* Get exponent */ + ret = GetASNInt(input, inOutIdx, &length, inSz); + if (ret < 0) { + return ASN_RSA_KEY_E; + } + if (eSz) + *eSz = length; + if (e) + *e = &input[*inOutIdx]; + *inOutIdx += length; + + return ret; +} + +int wc_RsaPublicKeyDecode(const byte* input, word32* inOutIdx, RsaKey* key, + word32 inSz) +{ + int ret; + const byte *n = NULL, *e = NULL; + word32 nSz = 0, eSz = 0; + + if (key == NULL) + return BAD_FUNC_ARG; + + ret = wc_RsaPublicKeyDecode_ex(input, inOutIdx, inSz, &n, &nSz, &e, &eSz); + if (ret == 0) { + ret = wc_RsaPublicKeyDecodeRaw(n, nSz, e, eSz, key); + } + + return ret; +} + +/* import RSA public key elements (n, e) into RsaKey structure (key) */ +int wc_RsaPublicKeyDecodeRaw(const byte* n, word32 nSz, const byte* e, + word32 eSz, RsaKey* key) +{ + if (n == NULL || e == NULL || key == NULL) + return BAD_FUNC_ARG; + + key->type = RSA_PUBLIC; + + if (mp_init(&key->n) != MP_OKAY) + return MP_INIT_E; + + if (mp_read_unsigned_bin(&key->n, n, nSz) != 0) { + mp_clear(&key->n); + return ASN_GETINT_E; + } +#ifdef HAVE_WOLF_BIGINT + if ((int)nSz > 0 && wc_bigint_from_unsigned_bin(&key->n.raw, n, nSz) != 0) { + mp_clear(&key->n); + return ASN_GETINT_E; + } +#endif /* HAVE_WOLF_BIGINT */ + + if (mp_init(&key->e) != MP_OKAY) { + mp_clear(&key->n); + return MP_INIT_E; + } + + if (mp_read_unsigned_bin(&key->e, e, eSz) != 0) { + mp_clear(&key->n); + mp_clear(&key->e); + return ASN_GETINT_E; + } +#ifdef HAVE_WOLF_BIGINT + if ((int)eSz > 0 && wc_bigint_from_unsigned_bin(&key->e.raw, e, eSz) != 0) { + mp_clear(&key->n); + mp_clear(&key->e); + return ASN_GETINT_E; + } +#endif /* HAVE_WOLF_BIGINT */ + +#ifdef WOLFSSL_XILINX_CRYPT + if (wc_InitRsaHw(key) != 0) { + return BAD_STATE_E; + } +#endif + + return 0; +} +#endif /* HAVE_USER_RSA */ +#endif /* !NO_RSA */ + +#ifndef NO_DH + +int wc_DhKeyDecode(const byte* input, word32* inOutIdx, DhKey* key, word32 inSz) +{ + int ret = 0; + int length; + #if defined(WOLFSSL_QT) || defined(OPENSSL_ALL) + word32 oid = 0, temp = 0; + #endif + + WOLFSSL_ENTER("wc_DhKeyDecode"); + + if (inOutIdx == NULL) + return BAD_FUNC_ARG; + + if (GetSequence(input, inOutIdx, &length, inSz) < 0) + return ASN_PARSE_E; + + #if defined(WOLFSSL_QT) || defined(OPENSSL_ALL) + temp = *inOutIdx; + #endif + + /* Assume input started after 1.2.840.113549.1.3.1 dhKeyAgreement */ + if (GetInt(&key->p, input, inOutIdx, inSz) < 0 || + GetInt(&key->g, input, inOutIdx, inSz) < 0) { + ret = ASN_DH_KEY_E; + } + + #if defined(WOLFSSL_QT) || defined(OPENSSL_ALL) + /* If ASN_DH_KEY_E: Check if input started at beginning of key */ + if (ret == ASN_DH_KEY_E) { + /* rewind back to after the first sequence */ + *inOutIdx = temp; + if (GetSequence(input, inOutIdx, &length, inSz) < 0) + return ASN_PARSE_E; + + /* Check for dhKeyAgreement */ + ret = GetObjectId(input, inOutIdx, &oid, oidKeyType, inSz); + if (oid != DHk || ret < 0) + return ASN_DH_KEY_E; + + if (GetSequence(input, inOutIdx, &length, inSz) < 0) + return ASN_PARSE_E; + + if (GetInt(&key->p, input, inOutIdx, inSz) < 0 || + GetInt(&key->g, input, inOutIdx, inSz) < 0) { + return ASN_DH_KEY_E; + } + } + + temp = *inOutIdx; + ret = (CheckBitString(input, inOutIdx, &length, inSz, 0, NULL) == 0); + if (ret > 0) { + /* Found Bit String */ + if (GetInt(&key->pub, input, inOutIdx, inSz) == 0) { + WOLFSSL_MSG("Found Public Key"); + ret = 0; + } + } else { + *inOutIdx = temp; + ret = (GetOctetString(input, inOutIdx, &length, inSz) >= 0); + if (ret > 0) { + /* Found Octet String */ + if (GetInt(&key->priv, input, inOutIdx, inSz) == 0) { + WOLFSSL_MSG("Found Private Key"); + ret = 0; + } + } else { + /* Don't use length from failed CheckBitString/GetOctetString */ + *inOutIdx = temp; + ret = 0; + } + } + #endif /* WOLFSSL_QT || OPENSSL_ALL */ + + WOLFSSL_MSG("wc_DhKeyDecode Success"); + + return ret; +} + + +int wc_DhParamsLoad(const byte* input, word32 inSz, byte* p, word32* pInOutSz, + byte* g, word32* gInOutSz) +{ + word32 idx = 0; + int ret; + int length; + + if (GetSequence(input, &idx, &length, inSz) <= 0) + return ASN_PARSE_E; + + ret = GetASNInt(input, &idx, &length, inSz); + if (ret != 0) + return ret; + + if (length <= (int)*pInOutSz) { + XMEMCPY(p, &input[idx], length); + *pInOutSz = length; + } + else { + return BUFFER_E; + } + idx += length; + + ret = GetASNInt(input, &idx, &length, inSz); + if (ret != 0) + return ret; + + if (length <= (int)*gInOutSz) { + XMEMCPY(g, &input[idx], length); + *gInOutSz = length; + } + else { + return BUFFER_E; + } + + return 0; +} +#endif /* NO_DH */ + + +#ifndef NO_DSA + +int DsaPublicKeyDecode(const byte* input, word32* inOutIdx, DsaKey* key, + word32 inSz) +{ + int length; + int ret = 0; + word32 oid; + + if (input == NULL || inOutIdx == NULL || key == NULL) + return BAD_FUNC_ARG; + + if (GetSequence(input, inOutIdx, &length, inSz) < 0) + return ASN_PARSE_E; + + if (GetInt(&key->p, input, inOutIdx, inSz) < 0 || + GetInt(&key->q, input, inOutIdx, inSz) < 0 || + GetInt(&key->g, input, inOutIdx, inSz) < 0 || + GetInt(&key->y, input, inOutIdx, inSz) < 0 ) + ret = ASN_DH_KEY_E; + + if (ret != 0) { + if (GetSequence(input, inOutIdx, &length, inSz) < 0) + return ASN_PARSE_E; + + ret = GetObjectId(input, inOutIdx, &oid, oidIgnoreType, inSz); + if (ret != 0) + return ret; + + if (GetSequence(input, inOutIdx, &length, inSz) < 0) + return ASN_PARSE_E; + + if (GetInt(&key->p, input, inOutIdx, inSz) < 0 || + GetInt(&key->q, input, inOutIdx, inSz) < 0 || + GetInt(&key->g, input, inOutIdx, inSz) < 0) + return ASN_DH_KEY_E; + + if (CheckBitString(input, inOutIdx, &length, inSz, 0, NULL) < 0) + return ASN_PARSE_E; + + if (GetInt(&key->y, input, inOutIdx, inSz) < 0 ) + return ASN_DH_KEY_E; + + ret = 0; + } + + key->type = DSA_PUBLIC; + return ret; +} + + +int DsaPrivateKeyDecode(const byte* input, word32* inOutIdx, DsaKey* key, + word32 inSz) +{ + int length, version, ret = 0, temp = 0; + + /* Sanity checks on input */ + if (input == NULL || inOutIdx == NULL || key == NULL) { + return BAD_FUNC_ARG; + } + + if (GetSequence(input, inOutIdx, &length, inSz) < 0) + return ASN_PARSE_E; + + temp = (int)*inOutIdx; + + /* Default case expects a certificate with OctetString but no version ID */ + ret = GetInt(&key->p, input, inOutIdx, inSz); + if (ret < 0) { + mp_clear(&key->p); + ret = ASN_PARSE_E; + } + else { + ret = GetInt(&key->q, input, inOutIdx, inSz); + if (ret < 0) { + mp_clear(&key->p); + mp_clear(&key->q); + ret = ASN_PARSE_E; + } + else { + ret = GetInt(&key->g, input, inOutIdx, inSz); + if (ret < 0) { + mp_clear(&key->p); + mp_clear(&key->q); + mp_clear(&key->g); + ret = ASN_PARSE_E; + } + else { + ret = GetOctetString(input, inOutIdx, &length, inSz); + if (ret < 0) { + mp_clear(&key->p); + mp_clear(&key->q); + mp_clear(&key->g); + ret = ASN_PARSE_E; + } + else { + ret = GetInt(&key->y, input, inOutIdx, inSz); + if (ret < 0) { + mp_clear(&key->p); + mp_clear(&key->q); + mp_clear(&key->g); + mp_clear(&key->y); + ret = ASN_PARSE_E; + } + } + } + } + } + /* An alternate pass if default certificate fails parsing */ + if (ret == ASN_PARSE_E) { + *inOutIdx = temp; + if (GetMyVersion(input, inOutIdx, &version, inSz) < 0) + return ASN_PARSE_E; + + if (GetInt(&key->p, input, inOutIdx, inSz) < 0 || + GetInt(&key->q, input, inOutIdx, inSz) < 0 || + GetInt(&key->g, input, inOutIdx, inSz) < 0 || + GetInt(&key->y, input, inOutIdx, inSz) < 0 || + GetInt(&key->x, input, inOutIdx, inSz) < 0 ) + return ASN_DH_KEY_E; + } + + key->type = DSA_PRIVATE; + return 0; +} + +static mp_int* GetDsaInt(DsaKey* key, int idx) +{ + if (idx == 0) + return &key->p; + if (idx == 1) + return &key->q; + if (idx == 2) + return &key->g; + if (idx == 3) + return &key->y; + if (idx == 4) + return &key->x; + + return NULL; +} + +/* Release Tmp DSA resources */ +static WC_INLINE void FreeTmpDsas(byte** tmps, void* heap) +{ + int i; + + for (i = 0; i < DSA_INTS; i++) + XFREE(tmps[i], heap, DYNAMIC_TYPE_DSA); + + (void)heap; +} + +#if !defined(HAVE_SELFTEST) && defined(WOLFSSL_KEY_GEN) +/* Write a public DSA key to output */ +int wc_SetDsaPublicKey(byte* output, DsaKey* key, + int outLen, int with_header) +{ + /* p, g, q = DSA params, y = public exponent */ +#ifdef WOLFSSL_SMALL_STACK + byte* p = NULL; + byte* g = NULL; + byte* q = NULL; + byte* y = NULL; +#else + byte p[MAX_DSA_INT_SZ]; + byte g[MAX_DSA_INT_SZ]; + byte q[MAX_DSA_INT_SZ]; + byte y[MAX_DSA_INT_SZ]; +#endif + byte innerSeq[MAX_SEQ_SZ]; + byte outerSeq[MAX_SEQ_SZ]; + byte bitString[1 + MAX_LENGTH_SZ + 1]; + int idx, pSz, gSz, qSz, ySz, innerSeqSz, outerSeqSz, bitStringSz = 0; + + WOLFSSL_ENTER("wc_SetDsaPublicKey"); + + if (output == NULL || key == NULL || outLen < MAX_SEQ_SZ) { + return BAD_FUNC_ARG; + } + + /* p */ +#ifdef WOLFSSL_SMALL_STACK + p = (byte*)XMALLOC(MAX_DSA_INT_SZ, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (p == NULL) + return MEMORY_E; +#endif + if ((pSz = SetASNIntMP(&key->p, MAX_DSA_INT_SZ, p)) < 0) { + WOLFSSL_MSG("SetASNIntMP Error with p"); +#ifdef WOLFSSL_SMALL_STACK + XFREE(p, key->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return pSz; + } + + /* q */ +#ifdef WOLFSSL_SMALL_STACK + q = (byte*)XMALLOC(MAX_DSA_INT_SZ, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (q == NULL) + return MEMORY_E; +#endif + if ((qSz = SetASNIntMP(&key->q, MAX_DSA_INT_SZ, q)) < 0) { + WOLFSSL_MSG("SetASNIntMP Error with q"); +#ifdef WOLFSSL_SMALL_STACK + XFREE(p, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(q, key->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return qSz; + } + + /* g */ +#ifdef WOLFSSL_SMALL_STACK + g = (byte*)XMALLOC(MAX_DSA_INT_SZ, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (g == NULL) + return MEMORY_E; +#endif + if ((gSz = SetASNIntMP(&key->g, MAX_DSA_INT_SZ, g)) < 0) { + WOLFSSL_MSG("SetASNIntMP Error with g"); +#ifdef WOLFSSL_SMALL_STACK + XFREE(p, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(q, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(g, key->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return gSz; + } + + /* y */ +#ifdef WOLFSSL_SMALL_STACK + y = (byte*)XMALLOC(MAX_DSA_INT_SZ, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (y == NULL) + return MEMORY_E; +#endif + if ((ySz = SetASNIntMP(&key->y, MAX_DSA_INT_SZ, y)) < 0) { + WOLFSSL_MSG("SetASNIntMP Error with y"); +#ifdef WOLFSSL_SMALL_STACK + XFREE(p, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(q, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(g, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(y, key->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return ySz; + } + + innerSeqSz = SetSequence(pSz + qSz + gSz, innerSeq); + + /* check output size */ + if ((innerSeqSz + pSz + qSz + gSz) > outLen) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(p, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(q, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(g, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(y, key->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + WOLFSSL_MSG("Error, output size smaller than outlen"); + return BUFFER_E; + } + + if (with_header) { + int algoSz; +#ifdef WOLFSSL_SMALL_STACK + byte* algo = NULL; + + algo = (byte*)XMALLOC(MAX_ALGO_SZ, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (algo == NULL) { + XFREE(p, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(q, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(g, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(y, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + return MEMORY_E; + } +#else + byte algo[MAX_ALGO_SZ]; +#endif + algoSz = SetAlgoID(DSAk, algo, oidKeyType, 0); + bitStringSz = SetBitString(ySz, 0, bitString); + outerSeqSz = SetSequence(algoSz + innerSeqSz + pSz + qSz + gSz, + outerSeq); + + idx = SetSequence(algoSz + innerSeqSz + pSz + qSz + gSz + bitStringSz + + ySz + outerSeqSz, output); + + /* check output size */ + if ((idx + algoSz + bitStringSz + innerSeqSz + pSz + qSz + gSz + ySz) > + outLen) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(p, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(q, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(g, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(y, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(algo, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + WOLFSSL_MSG("Error, output size smaller than outlen"); + return BUFFER_E; + } + + /* outerSeq */ + XMEMCPY(output + idx, outerSeq, outerSeqSz); + idx += outerSeqSz; + /* algo */ + XMEMCPY(output + idx, algo, algoSz); + idx += algoSz; +#ifdef WOLFSSL_SMALL_STACK + XFREE(algo, key->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + } else { + idx = 0; + } + + /* innerSeq */ + XMEMCPY(output + idx, innerSeq, innerSeqSz); + idx += innerSeqSz; + /* p */ + XMEMCPY(output + idx, p, pSz); + idx += pSz; + /* q */ + XMEMCPY(output + idx, q, qSz); + idx += qSz; + /* g */ + XMEMCPY(output + idx, g, gSz); + idx += gSz; + /* bit string */ + XMEMCPY(output + idx, bitString, bitStringSz); + idx += bitStringSz; + /* y */ + XMEMCPY(output + idx, y, ySz); + idx += ySz; + +#ifdef WOLFSSL_SMALL_STACK + XFREE(p, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(q, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(g, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(y, key->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return idx; +} + +/* Convert DSA Public key to DER format, write to output (inLen), return bytes + written */ +int wc_DsaKeyToPublicDer(DsaKey* key, byte* output, word32 inLen) +{ + return wc_SetDsaPublicKey(output, key, inLen, 1); +} +#endif /* !HAVE_SELFTEST && WOLFSSL_KEY_GEN */ + +/* Convert private DsaKey key to DER format, write to output (inLen), + return bytes written */ +int wc_DsaKeyToDer(DsaKey* key, byte* output, word32 inLen) +{ + word32 seqSz, verSz, rawLen, intTotalLen = 0; + word32 sizes[DSA_INTS]; + int i, j, outLen, ret = 0, mpSz; + + byte seq[MAX_SEQ_SZ]; + byte ver[MAX_VERSION_SZ]; + byte* tmps[DSA_INTS]; + + if (!key || !output) + return BAD_FUNC_ARG; + + if (key->type != DSA_PRIVATE) + return BAD_FUNC_ARG; + + for (i = 0; i < DSA_INTS; i++) + tmps[i] = NULL; + + /* write all big ints from key to DER tmps */ + for (i = 0; i < DSA_INTS; i++) { + mp_int* keyInt = GetDsaInt(key, i); + + rawLen = mp_unsigned_bin_size(keyInt) + 1; + tmps[i] = (byte*)XMALLOC(rawLen + MAX_SEQ_SZ, key->heap, + DYNAMIC_TYPE_DSA); + if (tmps[i] == NULL) { + ret = MEMORY_E; + break; + } + + mpSz = SetASNIntMP(keyInt, -1, tmps[i]); + if (mpSz < 0) { + ret = mpSz; + break; + } + intTotalLen += (sizes[i] = mpSz); + } + + if (ret != 0) { + FreeTmpDsas(tmps, key->heap); + return ret; + } + + /* make headers */ + verSz = SetMyVersion(0, ver, FALSE); + seqSz = SetSequence(verSz + intTotalLen, seq); + + outLen = seqSz + verSz + intTotalLen; + if (outLen > (int)inLen) { + FreeTmpDsas(tmps, key->heap); + return BAD_FUNC_ARG; + } + + /* write to output */ + XMEMCPY(output, seq, seqSz); + j = seqSz; + XMEMCPY(output + j, ver, verSz); + j += verSz; + + for (i = 0; i < DSA_INTS; i++) { + XMEMCPY(output + j, tmps[i], sizes[i]); + j += sizes[i]; + } + FreeTmpDsas(tmps, key->heap); + + return outLen; +} + +#endif /* NO_DSA */ + +void InitDecodedCert(DecodedCert* cert, + const byte* source, word32 inSz, void* heap) +{ + if (cert != NULL) { + XMEMSET(cert, 0, sizeof(DecodedCert)); + + cert->subjectCNEnc = CTC_UTF8; + cert->issuer[0] = '\0'; + cert->subject[0] = '\0'; + cert->source = source; /* don't own */ + cert->maxIdx = inSz; /* can't go over this index */ + cert->heap = heap; + cert->maxPathLen = WOLFSSL_MAX_PATH_LEN; + #ifdef WOLFSSL_CERT_GEN + cert->subjectSNEnc = CTC_UTF8; + cert->subjectCEnc = CTC_PRINTABLE; + cert->subjectLEnc = CTC_UTF8; + cert->subjectSTEnc = CTC_UTF8; + cert->subjectOEnc = CTC_UTF8; + cert->subjectOUEnc = CTC_UTF8; + #endif /* WOLFSSL_CERT_GEN */ + + #ifndef NO_CERTS + InitSignatureCtx(&cert->sigCtx, heap, INVALID_DEVID); + #endif + } +} + + +void FreeAltNames(DNS_entry* altNames, void* heap) +{ + (void)heap; + + while (altNames) { + DNS_entry* tmp = altNames->next; + + XFREE(altNames->name, heap, DYNAMIC_TYPE_ALTNAME); + XFREE(altNames, heap, DYNAMIC_TYPE_ALTNAME); + altNames = tmp; + } +} + +#ifndef IGNORE_NAME_CONSTRAINTS + +void FreeNameSubtrees(Base_entry* names, void* heap) +{ + (void)heap; + + while (names) { + Base_entry* tmp = names->next; + + XFREE(names->name, heap, DYNAMIC_TYPE_ALTNAME); + XFREE(names, heap, DYNAMIC_TYPE_ALTNAME); + names = tmp; + } +} + +#endif /* IGNORE_NAME_CONSTRAINTS */ + +void FreeDecodedCert(DecodedCert* cert) +{ + if (cert == NULL) + return; + if (cert->subjectCNStored == 1) + XFREE(cert->subjectCN, cert->heap, DYNAMIC_TYPE_SUBJECT_CN); + if (cert->pubKeyStored == 1) + XFREE((void*)cert->publicKey, cert->heap, DYNAMIC_TYPE_PUBLIC_KEY); + if (cert->weOwnAltNames && cert->altNames) + FreeAltNames(cert->altNames, cert->heap); +#ifndef IGNORE_NAME_CONSTRAINTS + if (cert->altEmailNames) + FreeAltNames(cert->altEmailNames, cert->heap); + if (cert->permittedNames) + FreeNameSubtrees(cert->permittedNames, cert->heap); + if (cert->excludedNames) + FreeNameSubtrees(cert->excludedNames, cert->heap); +#endif /* IGNORE_NAME_CONSTRAINTS */ +#ifdef WOLFSSL_SEP + XFREE(cert->deviceType, cert->heap, DYNAMIC_TYPE_X509_EXT); + XFREE(cert->hwType, cert->heap, DYNAMIC_TYPE_X509_EXT); + XFREE(cert->hwSerialNum, cert->heap, DYNAMIC_TYPE_X509_EXT); +#endif /* WOLFSSL_SEP */ +#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) + if (cert->issuerName.fullName != NULL) + XFREE(cert->issuerName.fullName, cert->heap, DYNAMIC_TYPE_X509); + if (cert->subjectName.fullName != NULL) + XFREE(cert->subjectName.fullName, cert->heap, DYNAMIC_TYPE_X509); +#endif /* OPENSSL_EXTRA */ +#ifdef WOLFSSL_RENESAS_TSIP_TLS + if (cert->tsip_encRsaKeyIdx != NULL) + XFREE(cert->tsip_encRsaKeyIdx, cert->heap, DYNAMIC_TYPE_RSA); +#endif +#ifndef NO_CERTS + FreeSignatureCtx(&cert->sigCtx); +#endif +} + +static int GetCertHeader(DecodedCert* cert) +{ + int ret = 0, len; + + if (GetSequence(cert->source, &cert->srcIdx, &len, cert->maxIdx) < 0) + return ASN_PARSE_E; + + /* Reset the max index for the size indicated in the outer wrapper. */ + cert->maxIdx = len + cert->srcIdx; + cert->certBegin = cert->srcIdx; + + if (GetSequence(cert->source, &cert->srcIdx, &len, cert->maxIdx) < 0) + return ASN_PARSE_E; + + cert->sigIndex = len + cert->srcIdx; + if (cert->sigIndex > cert->maxIdx) + return ASN_PARSE_E; + + if (GetExplicitVersion(cert->source, &cert->srcIdx, &cert->version, + cert->sigIndex) < 0) + return ASN_PARSE_E; + + if (GetSerialNumber(cert->source, &cert->srcIdx, cert->serial, + &cert->serialSz, cert->sigIndex) < 0) + return ASN_PARSE_E; + + return ret; +} + +#if !defined(NO_RSA) +/* Store Rsa Key, may save later, Dsa could use in future */ +static int StoreRsaKey(DecodedCert* cert, word32 bitStringEnd) +{ + int length; + word32 recvd = cert->srcIdx; + + if (GetSequence(cert->source, &cert->srcIdx, &length, bitStringEnd) < 0) + return ASN_PARSE_E; + + recvd = cert->srcIdx - recvd; + length += recvd; + + while (recvd--) + cert->srcIdx--; +#if defined(WOLFSSL_RENESAS_TSIP) + cert->sigCtx.pubkey_n_start = cert->sigCtx.pubkey_e_start = cert->srcIdx; +#endif + cert->pubKeySize = length; + cert->publicKey = cert->source + cert->srcIdx; + cert->srcIdx += length; + + return 0; +} +#endif /* !NO_RSA */ + +#ifdef HAVE_ECC + + /* return 0 on success if the ECC curve oid sum is supported */ + static int CheckCurve(word32 oid) + { + int ret = 0; + word32 oidSz = 0; + + ret = wc_ecc_get_oid(oid, NULL, &oidSz); + if (ret < 0 || oidSz == 0) { + WOLFSSL_MSG("CheckCurve not found"); + ret = ALGO_ID_E; + } + + return ret; + } + +#endif /* HAVE_ECC */ + +static int GetKey(DecodedCert* cert) +{ + int length; +#if !defined(NO_DSA) && defined(WOLFSSL_QT) + int tmpLen; +#endif +#if defined(HAVE_ECC) || defined(HAVE_NTRU) + int tmpIdx = cert->srcIdx; +#endif + + if (GetSequence(cert->source, &cert->srcIdx, &length, cert->maxIdx) < 0) + return ASN_PARSE_E; + +#if !defined(NO_DSA) && defined(WOLFSSL_QT) + tmpLen = length + 4; +#endif + + if (GetAlgoId(cert->source, &cert->srcIdx, + &cert->keyOID, oidKeyType, cert->maxIdx) < 0) + return ASN_PARSE_E; + + switch (cert->keyOID) { + #ifndef NO_RSA + case RSAk: + { + int ret; + + ret = CheckBitString(cert->source, &cert->srcIdx, &length, + cert->maxIdx, 1, NULL); + if (ret != 0) + return ret; + + #ifdef HAVE_OCSP + ret = CalcHashId(cert->source + cert->srcIdx, length, + cert->subjectKeyHash); + if (ret != 0) + return ret; + #endif + + return StoreRsaKey(cert, cert->srcIdx + length); + } + + #endif /* NO_RSA */ + #ifdef HAVE_NTRU + case NTRUk: + { + const byte* key = &cert->source[tmpIdx]; + byte* next = (byte*)key; + word16 keyLen; + word32 rc; + word32 remaining = cert->maxIdx - cert->srcIdx; + byte* publicKey; +#ifdef WOLFSSL_SMALL_STACK + byte* keyBlob = NULL; +#else + byte keyBlob[MAX_NTRU_KEY_SZ]; +#endif + rc = ntru_crypto_ntru_encrypt_subjectPublicKeyInfo2PublicKey(key, + &keyLen, NULL, &next, &remaining); + if (rc != NTRU_OK) + return ASN_NTRU_KEY_E; + if (keyLen > MAX_NTRU_KEY_SZ) + return ASN_NTRU_KEY_E; + +#ifdef WOLFSSL_SMALL_STACK + keyBlob = (byte*)XMALLOC(MAX_NTRU_KEY_SZ, cert->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (keyBlob == NULL) + return MEMORY_E; +#endif + + rc = ntru_crypto_ntru_encrypt_subjectPublicKeyInfo2PublicKey(key, + &keyLen, keyBlob, &next, &remaining); + if (rc != NTRU_OK) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(keyBlob, cert->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return ASN_NTRU_KEY_E; + } + + if ( (next - key) < 0) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(keyBlob, cert->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return ASN_NTRU_KEY_E; + } + + cert->srcIdx = tmpIdx + (int)(next - key); + + publicKey = (byte*)XMALLOC(keyLen, cert->heap, + DYNAMIC_TYPE_PUBLIC_KEY); + if (publicKey == NULL) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(keyBlob, cert->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return MEMORY_E; + } + XMEMCPY(publicKey, keyBlob, keyLen); + cert->publicKey = publicKey; + cert->pubKeyStored = 1; + cert->pubKeySize = keyLen; + +#ifdef WOLFSSL_SMALL_STACK + XFREE(keyBlob, cert->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return 0; + } + #endif /* HAVE_NTRU */ + #ifdef HAVE_ECC + case ECDSAk: + { + int ret; + byte seq[5]; + int pubLen = length + 1 + SetLength(length, seq); + word32 localIdx; + byte* publicKey; + byte tag; + + localIdx = cert->srcIdx; + if (GetASNTag(cert->source, &localIdx, &tag, cert->maxIdx) < 0) + return ASN_PARSE_E; + + if (tag != (ASN_SEQUENCE | ASN_CONSTRUCTED)) { + if (GetObjectId(cert->source, &cert->srcIdx, + &cert->pkCurveOID, oidCurveType, cert->maxIdx) < 0) + return ASN_PARSE_E; + + if (CheckCurve(cert->pkCurveOID) < 0) + return ECC_CURVE_OID_E; + + /* key header */ + ret = CheckBitString(cert->source, &cert->srcIdx, &length, + cert->maxIdx, 1, NULL); + if (ret != 0) + return ret; + #ifdef HAVE_OCSP + ret = CalcHashId(cert->source + cert->srcIdx, length, + cert->subjectKeyHash); + if (ret != 0) + return ret; + #endif + } + + publicKey = (byte*)XMALLOC(pubLen, cert->heap, + DYNAMIC_TYPE_PUBLIC_KEY); + if (publicKey == NULL) + return MEMORY_E; + XMEMCPY(publicKey, &cert->source[tmpIdx], pubLen); + cert->publicKey = publicKey; + cert->pubKeyStored = 1; + cert->pubKeySize = pubLen; + + cert->srcIdx = tmpIdx + pubLen; + + return 0; + } + #endif /* HAVE_ECC */ + #ifdef HAVE_ED25519 + case ED25519k: + { + byte* publicKey; + int ret; + + cert->pkCurveOID = ED25519k; + + ret = CheckBitString(cert->source, &cert->srcIdx, &length, + cert->maxIdx, 1, NULL); + if (ret != 0) + return ret; + + #ifdef HAVE_OCSP + ret = CalcHashId(cert->source + cert->srcIdx, length, + cert->subjectKeyHash); + if (ret != 0) + return ret; + #endif + + publicKey = (byte*) XMALLOC(length, cert->heap, + DYNAMIC_TYPE_PUBLIC_KEY); + if (publicKey == NULL) + return MEMORY_E; + XMEMCPY(publicKey, &cert->source[cert->srcIdx], length); + cert->publicKey = publicKey; + cert->pubKeyStored = 1; + cert->pubKeySize = length; + + cert->srcIdx += length; + + return 0; + } + #endif /* HAVE_ED25519 */ + #ifdef HAVE_ED448 + case ED448k: + { + byte* publicKey; + int ret; + + cert->pkCurveOID = ED448k; + + ret = CheckBitString(cert->source, &cert->srcIdx, &length, + cert->maxIdx, 1, NULL); + if (ret != 0) + return ret; + + #ifdef HAVE_OCSP + ret = CalcHashId(cert->source + cert->srcIdx, length, + cert->subjectKeyHash); + if (ret != 0) + return ret; + #endif + + publicKey = (byte*) XMALLOC(length, cert->heap, + DYNAMIC_TYPE_PUBLIC_KEY); + if (publicKey == NULL) + return MEMORY_E; + XMEMCPY(publicKey, &cert->source[cert->srcIdx], length); + cert->publicKey = publicKey; + cert->pubKeyStored = 1; + cert->pubKeySize = length; + + cert->srcIdx += length; + + return 0; + } + #endif /* HAVE_ED448 */ + #if !defined(NO_DSA) && defined(WOLFSSL_QT) + case DSAk: + { + int ret; + ret = GetSequence(cert->source, &cert->srcIdx, &length, + cert->maxIdx); + if (ret < 0) + return ret; + + ret = SkipInt(cert->source, &cert->srcIdx, cert->maxIdx); + if (ret != 0) + return ret; + ret = SkipInt(cert->source, &cert->srcIdx, cert->maxIdx); + if (ret != 0) + return ret; + ret = SkipInt(cert->source, &cert->srcIdx, cert->maxIdx); + if (ret != 0) + return ret; + + ret = CheckBitString(cert->source, &cert->srcIdx, &length, + cert->maxIdx, 1, NULL); + if (ret != 0) + return ret; + + ret = GetASNInt(cert->source, &cert->srcIdx, &length, cert->maxIdx); + if (ret !=0) + return ASN_PARSE_E; + + cert->publicKey = cert->source + tmpIdx; + cert->pubKeySize = tmpLen; + cert->srcIdx += length; + return 0; + } + #endif /* NO_DSA && QT */ + default: + return ASN_UNKNOWN_OID_E; + } +} + +#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) +#if defined(HAVE_ECC) +/* Converts ECC curve enum values in ecc_curve_id to the associated OpenSSL NID + value */ +WOLFSSL_API int EccEnumToNID(int n) +{ + WOLFSSL_ENTER("EccEnumToNID()"); + + switch(n) { + case ECC_SECP192R1: + return NID_X9_62_prime192v1; + case ECC_PRIME192V2: + return NID_X9_62_prime192v2; + case ECC_PRIME192V3: + return NID_X9_62_prime192v3; + case ECC_PRIME239V1: + return NID_X9_62_prime239v1; + case ECC_PRIME239V2: + return NID_X9_62_prime239v2; + case ECC_PRIME239V3: + return NID_X9_62_prime239v3; + case ECC_SECP256R1: + return NID_X9_62_prime256v1; + case ECC_SECP112R1: + return NID_secp112r1; + case ECC_SECP112R2: + return NID_secp112r2; + case ECC_SECP128R1: + return NID_secp128r1; + case ECC_SECP128R2: + return NID_secp128r2; + case ECC_SECP160R1: + return NID_secp160r1; + case ECC_SECP160R2: + return NID_secp160r2; + case ECC_SECP224R1: + return NID_secp224r1; + case ECC_SECP384R1: + return NID_secp384r1; + case ECC_SECP521R1: + return NID_secp521r1; + case ECC_SECP160K1: + return NID_secp160k1; + case ECC_SECP192K1: + return NID_secp192k1; + case ECC_SECP224K1: + return NID_secp224k1; + case ECC_SECP256K1: + return NID_secp256k1; + case ECC_BRAINPOOLP160R1: + return NID_brainpoolP160r1; + case ECC_BRAINPOOLP192R1: + return NID_brainpoolP192r1; + case ECC_BRAINPOOLP224R1: + return NID_brainpoolP224r1; + case ECC_BRAINPOOLP256R1: + return NID_brainpoolP256r1; + case ECC_BRAINPOOLP320R1: + return NID_brainpoolP320r1; + case ECC_BRAINPOOLP384R1: + return NID_brainpoolP384r1; + case ECC_BRAINPOOLP512R1: + return NID_brainpoolP512r1; + default: + WOLFSSL_MSG("NID not found"); + return -1; + } +} +#endif /* HAVE_ECC */ +#endif /* OPENSSL_EXTRA || OPENSSL_EXTRA_X509_SMALL */ + +#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) +int wc_OBJ_sn2nid(const char *sn) +{ + const struct { + const char *sn; + int nid; + } sn2nid[] = { + {WOLFSSL_COMMON_NAME, NID_commonName}, + {WOLFSSL_COUNTRY_NAME, NID_countryName}, + {WOLFSSL_LOCALITY_NAME, NID_localityName}, + {WOLFSSL_STATE_NAME, NID_stateOrProvinceName}, + {WOLFSSL_ORG_NAME, NID_organizationName}, + {WOLFSSL_ORGUNIT_NAME, NID_organizationalUnitName}, + {WOLFSSL_EMAIL_ADDR, NID_emailAddress}, + {NULL, -1}}; + + int i; + #ifdef HAVE_ECC + int eccEnum; + #endif + WOLFSSL_ENTER("OBJ_sn2nid"); + for(i=0; sn2nid[i].sn != NULL; i++) { + if(XSTRNCMP(sn, sn2nid[i].sn, XSTRLEN(sn2nid[i].sn)) == 0) { + return sn2nid[i].nid; + } + } + #ifdef HAVE_ECC + /* Nginx uses this OpenSSL string. */ + if (XSTRNCMP(sn, "prime256v1", 10) == 0) + sn = "SECP256R1"; + if (XSTRNCMP(sn, "secp384r1", 10) == 0) + sn = "SECP384R1"; + /* find based on name and return NID */ + for (i = 0; ecc_sets[i].size != 0 && ecc_sets[i].name != NULL; i++) { + if (XSTRNCMP(sn, ecc_sets[i].name, ECC_MAXNAME) == 0) { + eccEnum = ecc_sets[i].id; + /* Convert enum value in ecc_curve_id to OpenSSL NID */ + return EccEnumToNID(eccEnum); + } + } + #endif + + return NID_undef; +} +#endif + +/* Routine for calculating hashId */ +int CalcHashId(const byte* data, word32 len, byte* hash) +{ + int ret; + +#ifdef WOLF_CRYPTO_CB + /* try to use a registered crypto callback */ + ret = wc_CryptoCb_Sha256Hash(NULL, data, len, hash); + if (ret != CRYPTOCB_UNAVAILABLE) + return ret; + /* fall-through when unavailable */ +#endif + +#if defined(NO_SHA) && !defined(NO_SHA256) + ret = wc_Sha256Hash(data, len, hash); +#elif !defined(NO_SHA) + ret = wc_ShaHash(data, len, hash); +#else + ret = NOT_COMPILED_IN; +#endif + + return ret; +} + +/* process NAME, either issuer or subject */ +static int GetName(DecodedCert* cert, int nameType, int maxIdx) +{ + int length; /* length of all distinguished names */ + int dummy; + int ret; + char* full; + byte* hash; + word32 idx, localIdx = 0; + byte tag; + #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) + DecodedName* dName = + (nameType == ISSUER) ? &cert->issuerName : &cert->subjectName; + int dcnum = 0; + #ifdef OPENSSL_EXTRA + int count = 0; + #endif + #endif /* OPENSSL_EXTRA */ + + WOLFSSL_MSG("Getting Cert Name"); + + if (nameType == ISSUER) { + full = cert->issuer; + hash = cert->issuerHash; + } + else { + full = cert->subject; + hash = cert->subjectHash; + } + + if (cert->srcIdx >= (word32)maxIdx) { + return BUFFER_E; + } + + localIdx = cert->srcIdx; + if (GetASNTag(cert->source, &localIdx, &tag, maxIdx) < 0) { + return ASN_PARSE_E; + } + + if (tag == ASN_OBJECT_ID) { + WOLFSSL_MSG("Trying optional prefix..."); + + if (SkipObjectId(cert->source, &cert->srcIdx, maxIdx) < 0) + return ASN_PARSE_E; + WOLFSSL_MSG("Got optional prefix"); + } + + /* For OCSP, RFC2560 section 4.1.1 states the issuer hash should be + * calculated over the entire DER encoding of the Name field, including + * the tag and length. */ + idx = cert->srcIdx; + if (GetSequence(cert->source, &cert->srcIdx, &length, maxIdx) < 0) + return ASN_PARSE_E; + + ret = CalcHashId(&cert->source[idx], length + cert->srcIdx - idx, hash); + if (ret != 0) + return ret; + + length += cert->srcIdx; + idx = 0; + +#if defined(HAVE_PKCS7) || defined(WOLFSSL_CERT_EXT) + /* store pointer to raw issuer */ + if (nameType == ISSUER) { + cert->issuerRaw = &cert->source[cert->srcIdx]; + cert->issuerRawLen = length - cert->srcIdx; + } +#endif +#ifndef IGNORE_NAME_CONSTRAINTS + if (nameType == SUBJECT) { + cert->subjectRaw = &cert->source[cert->srcIdx]; + cert->subjectRawLen = length - cert->srcIdx; + } +#endif + + while (cert->srcIdx < (word32)length) { + byte b = 0; + byte joint[3]; + byte tooBig = FALSE; + int oidSz; + const char* copy = NULL; + int copyLen = 0; + int strLen = 0; + byte id = 0; + + if (GetSet(cert->source, &cert->srcIdx, &dummy, maxIdx) < 0) { + WOLFSSL_MSG("Cert name lacks set header, trying sequence"); + } + + if (GetSequence(cert->source, &cert->srcIdx, &dummy, maxIdx) <= 0) + return ASN_PARSE_E; + + ret = GetASNObjectId(cert->source, &cert->srcIdx, &oidSz, maxIdx); + if (ret != 0) + return ret; + + /* make sure there is room for joint */ + if ((cert->srcIdx + sizeof(joint)) > (word32)maxIdx) + return ASN_PARSE_E; + + XMEMCPY(joint, &cert->source[cert->srcIdx], sizeof(joint)); + + /* v1 name types */ + if (joint[0] == 0x55 && joint[1] == 0x04) { + cert->srcIdx += 3; + id = joint[2]; + if (GetHeader(cert->source, &b, &cert->srcIdx, &strLen, + maxIdx, 1) < 0) { + return ASN_PARSE_E; + } + + if (id == ASN_COMMON_NAME) { + if (nameType == SUBJECT) { + cert->subjectCN = (char *)&cert->source[cert->srcIdx]; + cert->subjectCNLen = strLen; + cert->subjectCNEnc = b; + } + + copy = WOLFSSL_COMMON_NAME; + copyLen = sizeof(WOLFSSL_COMMON_NAME) - 1; + #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) + dName->cnIdx = cert->srcIdx; + dName->cnLen = strLen; + #endif /* OPENSSL_EXTRA */ + } + else if (id == ASN_SUR_NAME) { + copy = WOLFSSL_SUR_NAME; + copyLen = sizeof(WOLFSSL_SUR_NAME) - 1; + #ifdef WOLFSSL_CERT_GEN + if (nameType == SUBJECT) { + cert->subjectSN = (char*)&cert->source[cert->srcIdx]; + cert->subjectSNLen = strLen; + cert->subjectSNEnc = b; + } + #endif /* WOLFSSL_CERT_GEN */ + #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) + dName->snIdx = cert->srcIdx; + dName->snLen = strLen; + #endif /* OPENSSL_EXTRA */ + } + else if (id == ASN_COUNTRY_NAME) { + copy = WOLFSSL_COUNTRY_NAME; + copyLen = sizeof(WOLFSSL_COUNTRY_NAME) - 1; + #ifdef WOLFSSL_CERT_GEN + if (nameType == SUBJECT) { + cert->subjectC = (char*)&cert->source[cert->srcIdx]; + cert->subjectCLen = strLen; + cert->subjectCEnc = b; + } + #endif /* WOLFSSL_CERT_GEN */ + #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) + dName->cIdx = cert->srcIdx; + dName->cLen = strLen; + #endif /* OPENSSL_EXTRA */ + } + else if (id == ASN_LOCALITY_NAME) { + copy = WOLFSSL_LOCALITY_NAME; + copyLen = sizeof(WOLFSSL_LOCALITY_NAME) - 1; + #ifdef WOLFSSL_CERT_GEN + if (nameType == SUBJECT) { + cert->subjectL = (char*)&cert->source[cert->srcIdx]; + cert->subjectLLen = strLen; + cert->subjectLEnc = b; + } + #endif /* WOLFSSL_CERT_GEN */ + #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) + dName->lIdx = cert->srcIdx; + dName->lLen = strLen; + #endif /* OPENSSL_EXTRA */ + } + else if (id == ASN_STATE_NAME) { + copy = WOLFSSL_STATE_NAME; + copyLen = sizeof(WOLFSSL_STATE_NAME) - 1; + #ifdef WOLFSSL_CERT_GEN + if (nameType == SUBJECT) { + cert->subjectST = (char*)&cert->source[cert->srcIdx]; + cert->subjectSTLen = strLen; + cert->subjectSTEnc = b; + } + #endif /* WOLFSSL_CERT_GEN */ + #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) + dName->stIdx = cert->srcIdx; + dName->stLen = strLen; + #endif /* OPENSSL_EXTRA */ + } + else if (id == ASN_ORG_NAME) { + copy = WOLFSSL_ORG_NAME; + copyLen = sizeof(WOLFSSL_ORG_NAME) - 1; + #ifdef WOLFSSL_CERT_GEN + if (nameType == SUBJECT) { + cert->subjectO = (char*)&cert->source[cert->srcIdx]; + cert->subjectOLen = strLen; + cert->subjectOEnc = b; + } + #endif /* WOLFSSL_CERT_GEN */ + #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) + dName->oIdx = cert->srcIdx; + dName->oLen = strLen; + #endif /* OPENSSL_EXTRA */ + } + else if (id == ASN_ORGUNIT_NAME) { + copy = WOLFSSL_ORGUNIT_NAME; + copyLen = sizeof(WOLFSSL_ORGUNIT_NAME) - 1; + #ifdef WOLFSSL_CERT_GEN + if (nameType == SUBJECT) { + cert->subjectOU = (char*)&cert->source[cert->srcIdx]; + cert->subjectOULen = strLen; + cert->subjectOUEnc = b; + } + #endif /* WOLFSSL_CERT_GEN */ + #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) + dName->ouIdx = cert->srcIdx; + dName->ouLen = strLen; + #endif /* OPENSSL_EXTRA */ + } + else if (id == ASN_SERIAL_NUMBER) { + copy = WOLFSSL_SERIAL_NUMBER; + copyLen = sizeof(WOLFSSL_SERIAL_NUMBER) - 1; + #ifdef WOLFSSL_CERT_GEN + if (nameType == SUBJECT) { + cert->subjectSND = (char*)&cert->source[cert->srcIdx]; + cert->subjectSNDLen = strLen; + cert->subjectSNDEnc = b; + } + #endif /* WOLFSSL_CERT_GEN */ + #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) + dName->snIdx = cert->srcIdx; + dName->snLen = strLen; + #endif /* OPENSSL_EXTRA */ + } + #ifdef WOLFSSL_CERT_EXT + else if (id == ASN_BUS_CAT) { + copy = WOLFSSL_BUS_CAT; + copyLen = sizeof(WOLFSSL_BUS_CAT) - 1; + #ifdef WOLFSSL_CERT_GEN + if (nameType == SUBJECT) { + cert->subjectBC = (char*)&cert->source[cert->srcIdx]; + cert->subjectBCLen = strLen; + cert->subjectBCEnc = b; + } + #endif /* WOLFSSL_CERT_GEN */ + #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) + dName->bcIdx = cert->srcIdx; + dName->bcLen = strLen; + #endif /* OPENSSL_EXTRA */ + } + #endif /* WOLFSSL_CERT_EXT */ + } + #ifdef WOLFSSL_CERT_EXT + else if ((cert->srcIdx + ASN_JOI_PREFIX_SZ + 2 <= (word32)maxIdx) && + (0 == XMEMCMP(&cert->source[cert->srcIdx], ASN_JOI_PREFIX, + ASN_JOI_PREFIX_SZ)) && + ((cert->source[cert->srcIdx+ASN_JOI_PREFIX_SZ] == ASN_JOI_C) || + (cert->source[cert->srcIdx+ASN_JOI_PREFIX_SZ] == ASN_JOI_ST))) + { + cert->srcIdx += ASN_JOI_PREFIX_SZ; + id = cert->source[cert->srcIdx++]; + b = cert->source[cert->srcIdx++]; /* encoding */ + + if (GetLength(cert->source, &cert->srcIdx, &strLen, + maxIdx) < 0) + return ASN_PARSE_E; + + /* Check for jurisdiction of incorporation country name */ + if (id == ASN_JOI_C) { + copy = WOLFSSL_JOI_C; + copyLen = sizeof(WOLFSSL_JOI_C) - 1; + #ifdef WOLFSSL_CERT_GEN + if (nameType == SUBJECT) { + cert->subjectJC = (char*)&cert->source[cert->srcIdx]; + cert->subjectJCLen = strLen; + cert->subjectJCEnc = b; + } + #endif /* WOLFSSL_CERT_GEN */ + #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) + dName->jcIdx = cert->srcIdx; + dName->jcLen = strLen; + #endif /* OPENSSL_EXTRA */ + } + + /* Check for jurisdiction of incorporation state name */ + else if (id == ASN_JOI_ST) { + copy = WOLFSSL_JOI_ST; + copyLen = sizeof(WOLFSSL_JOI_ST) - 1; + #ifdef WOLFSSL_CERT_GEN + if (nameType == SUBJECT) { + cert->subjectJS = (char*)&cert->source[cert->srcIdx]; + cert->subjectJSLen = strLen; + cert->subjectJSEnc = b; + } + #endif /* WOLFSSL_CERT_GEN */ + #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) + dName->jsIdx = cert->srcIdx; + dName->jsLen = strLen; + #endif /* OPENSSL_EXTRA */ + } + + if ((strLen + copyLen) > (int)(ASN_NAME_MAX - idx)) { + WOLFSSL_MSG("ASN Name too big, skipping"); + tooBig = TRUE; + } + } + #endif /* WOLFSSL_CERT_EXT */ + else { + /* skip */ + byte email = FALSE; + byte pilot = FALSE; + + if (joint[0] == 0x2a && joint[1] == 0x86) { /* email id hdr */ + id = ASN_EMAIL_NAME; + email = TRUE; + } + + if (joint[0] == 0x9 && joint[1] == 0x92) { /* uid id hdr */ + /* last value of OID is the type of pilot attribute */ + id = cert->source[cert->srcIdx + oidSz - 1]; + pilot = TRUE; + } + + cert->srcIdx += oidSz + 1; + + if (GetLength(cert->source, &cert->srcIdx, &strLen, maxIdx) < 0) + return ASN_PARSE_E; + + if (strLen > (int)(ASN_NAME_MAX - idx)) { + WOLFSSL_MSG("ASN name too big, skipping"); + tooBig = TRUE; + } + + if (email) { + copyLen = sizeof(WOLFSSL_EMAIL_ADDR) - 1; + if ((copyLen + strLen) > (int)(ASN_NAME_MAX - idx)) { + WOLFSSL_MSG("ASN name too big, skipping"); + tooBig = TRUE; + } + else { + copy = WOLFSSL_EMAIL_ADDR; + } + + #ifdef WOLFSSL_CERT_GEN + if (nameType == SUBJECT) { + cert->subjectEmail = (char*)&cert->source[cert->srcIdx]; + cert->subjectEmailLen = strLen; + } + #endif /* WOLFSSL_CERT_GEN */ + #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) + dName->emailIdx = cert->srcIdx; + dName->emailLen = strLen; + #endif /* OPENSSL_EXTRA */ + #ifndef IGNORE_NAME_CONSTRAINTS + { + DNS_entry* emailName; + + emailName = (DNS_entry*)XMALLOC(sizeof(DNS_entry), + cert->heap, DYNAMIC_TYPE_ALTNAME); + if (emailName == NULL) { + WOLFSSL_MSG("\tOut of Memory"); + return MEMORY_E; + } + emailName->type = 0; + emailName->name = (char*)XMALLOC(strLen + 1, + cert->heap, DYNAMIC_TYPE_ALTNAME); + if (emailName->name == NULL) { + WOLFSSL_MSG("\tOut of Memory"); + XFREE(emailName, cert->heap, DYNAMIC_TYPE_ALTNAME); + return MEMORY_E; + } + emailName->len = strLen; + XMEMCPY(emailName->name, &cert->source[cert->srcIdx], + strLen); + emailName->name[strLen] = '\0'; + + emailName->next = cert->altEmailNames; + cert->altEmailNames = emailName; + } + #endif /* IGNORE_NAME_CONSTRAINTS */ + } + + if (pilot) { + switch (id) { + case ASN_USER_ID: + copy = WOLFSSL_USER_ID; + copyLen = sizeof(WOLFSSL_USER_ID) - 1; + #if defined(OPENSSL_EXTRA) || \ + defined(OPENSSL_EXTRA_X509_SMALL) + dName->uidIdx = cert->srcIdx; + dName->uidLen = strLen; + #endif /* OPENSSL_EXTRA */ + break; + + case ASN_DOMAIN_COMPONENT: + copy = WOLFSSL_DOMAIN_COMPONENT; + copyLen = sizeof(WOLFSSL_DOMAIN_COMPONENT) - 1; + #if defined(OPENSSL_EXTRA) || \ + defined(OPENSSL_EXTRA_X509_SMALL) + dName->dcIdx[dcnum] = cert->srcIdx; + dName->dcLen[dcnum] = strLen; + dName->dcNum = dcnum + 1; + dcnum++; + #endif /* OPENSSL_EXTRA */ + break; + + default: + WOLFSSL_MSG("Unknown pilot attribute type"); + return ASN_PARSE_E; + } + } + } + if ((copyLen + strLen) > (int)(ASN_NAME_MAX - idx)) + { + WOLFSSL_MSG("ASN Name too big, skipping"); + tooBig = TRUE; + } + if ((copy != NULL) && !tooBig) { + XMEMCPY(&full[idx], copy, copyLen); + idx += copyLen; + XMEMCPY(&full[idx], &cert->source[cert->srcIdx], strLen); + idx += strLen; + + #ifdef OPENSSL_EXTRA + if (count < DOMAIN_COMPONENT_MAX) { + /* store order that DN was parsed */ + dName->loc[count++] = id; + } + #endif + } + cert->srcIdx += strLen; + } + full[idx++] = 0; +#if defined(OPENSSL_EXTRA) + /* store order that DN was parsed */ + dName->locSz = count; +#endif + + #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) + { + int totalLen = 0; + int i = 0; + + if (dName->cnLen != 0) + totalLen += dName->cnLen + 4; + if (dName->snLen != 0) + totalLen += dName->snLen + 4; + if (dName->cLen != 0) + totalLen += dName->cLen + 3; + if (dName->lLen != 0) + totalLen += dName->lLen + 3; + if (dName->stLen != 0) + totalLen += dName->stLen + 4; + if (dName->oLen != 0) + totalLen += dName->oLen + 3; + if (dName->ouLen != 0) + totalLen += dName->ouLen + 4; + if (dName->emailLen != 0) + totalLen += dName->emailLen + 14; + if (dName->uidLen != 0) + totalLen += dName->uidLen + 5; + if (dName->serialLen != 0) + totalLen += dName->serialLen + 14; + if (dName->dcNum != 0){ + for (i = 0;i < dName->dcNum;i++) + totalLen += dName->dcLen[i] + 4; + } + + dName->fullName = (char*)XMALLOC(totalLen + 1, cert->heap, + DYNAMIC_TYPE_X509); + if (dName->fullName != NULL) { + idx = 0; + + if (dName->cnLen != 0) { + dName->entryCount++; + XMEMCPY(&dName->fullName[idx], WOLFSSL_COMMON_NAME, 4); + dName->cnNid = wc_OBJ_sn2nid((const char *)WOLFSSL_COMMON_NAME); + idx += 4; + XMEMCPY(&dName->fullName[idx], + &cert->source[dName->cnIdx], dName->cnLen); + dName->cnIdx = idx; + idx += dName->cnLen; + } + if (dName->snLen != 0) { + dName->entryCount++; + XMEMCPY(&dName->fullName[idx], WOLFSSL_SUR_NAME, 4); + dName->snNid = wc_OBJ_sn2nid((const char *)WOLFSSL_SUR_NAME); + idx += 4; + XMEMCPY(&dName->fullName[idx], + &cert->source[dName->snIdx], dName->snLen); + dName->snIdx = idx; + idx += dName->snLen; + } + if (dName->cLen != 0) { + dName->entryCount++; + XMEMCPY(&dName->fullName[idx], WOLFSSL_COUNTRY_NAME, 3); + dName->cNid = wc_OBJ_sn2nid((const char *)WOLFSSL_COUNTRY_NAME); + idx += 3; + XMEMCPY(&dName->fullName[idx], + &cert->source[dName->cIdx], dName->cLen); + dName->cIdx = idx; + idx += dName->cLen; + } + if (dName->lLen != 0) { + dName->entryCount++; + XMEMCPY(&dName->fullName[idx], WOLFSSL_LOCALITY_NAME, 3); + dName->lNid = wc_OBJ_sn2nid((const char *)WOLFSSL_LOCALITY_NAME); + idx += 3; + XMEMCPY(&dName->fullName[idx], + &cert->source[dName->lIdx], dName->lLen); + dName->lIdx = idx; + idx += dName->lLen; + } + if (dName->stLen != 0) { + dName->entryCount++; + XMEMCPY(&dName->fullName[idx], WOLFSSL_STATE_NAME, 4); + dName->stNid = wc_OBJ_sn2nid((const char *)WOLFSSL_STATE_NAME); + idx += 4; + XMEMCPY(&dName->fullName[idx], + &cert->source[dName->stIdx], dName->stLen); + dName->stIdx = idx; + idx += dName->stLen; + } + if (dName->oLen != 0) { + dName->entryCount++; + XMEMCPY(&dName->fullName[idx], WOLFSSL_ORG_NAME, 3); + dName->oNid = wc_OBJ_sn2nid((const char *)WOLFSSL_ORG_NAME); + idx += 3; + XMEMCPY(&dName->fullName[idx], + &cert->source[dName->oIdx], dName->oLen); + dName->oIdx = idx; + idx += dName->oLen; + } + if (dName->ouLen != 0) { + dName->entryCount++; + XMEMCPY(&dName->fullName[idx], WOLFSSL_ORGUNIT_NAME, 4); + dName->ouNid = wc_OBJ_sn2nid((const char *)WOLFSSL_ORGUNIT_NAME); + idx += 4; + XMEMCPY(&dName->fullName[idx], + &cert->source[dName->ouIdx], dName->ouLen); + dName->ouIdx = idx; + idx += dName->ouLen; + } + if (dName->emailLen != 0) { + dName->entryCount++; + XMEMCPY(&dName->fullName[idx], "/emailAddress=", 14); + dName->emailNid = wc_OBJ_sn2nid((const char *)"/emailAddress="); + idx += 14; + XMEMCPY(&dName->fullName[idx], + &cert->source[dName->emailIdx], dName->emailLen); + dName->emailIdx = idx; + idx += dName->emailLen; + } + for (i = 0;i < dName->dcNum;i++){ + if (dName->dcLen[i] != 0) { + dName->entryCount++; + XMEMCPY(&dName->fullName[idx], WOLFSSL_DOMAIN_COMPONENT, 4); + idx += 4; + XMEMCPY(&dName->fullName[idx], + &cert->source[dName->dcIdx[i]], dName->dcLen[i]); + dName->dcIdx[i] = idx; + idx += dName->dcLen[i]; + } + } + if (dName->uidLen != 0) { + dName->entryCount++; + XMEMCPY(&dName->fullName[idx], "/UID=", 5); + dName->uidNid = wc_OBJ_sn2nid((const char *)"/UID="); + idx += 5; + XMEMCPY(&dName->fullName[idx], + &cert->source[dName->uidIdx], dName->uidLen); + dName->uidIdx = idx; + idx += dName->uidLen; + } + if (dName->serialLen != 0) { + dName->entryCount++; + XMEMCPY(&dName->fullName[idx], WOLFSSL_SERIAL_NUMBER, 14); + dName->serialNid = wc_OBJ_sn2nid((const char *)WOLFSSL_SERIAL_NUMBER); + idx += 14; + XMEMCPY(&dName->fullName[idx], + &cert->source[dName->serialIdx], dName->serialLen); + dName->serialIdx = idx; + idx += dName->serialLen; + } + dName->fullName[idx] = '\0'; + dName->fullNameLen = totalLen; + } + } + #endif /* OPENSSL_EXTRA */ + + return 0; +} + + +#ifndef NO_ASN_TIME + +/* two byte date/time, add to value */ +static WC_INLINE int GetTime(int* value, const byte* date, int* idx) +{ + int i = *idx; + + if (date[i] < 0x30 || date[i] > 0x39 || date[i+1] < 0x30 || + date[i+1] > 0x39) { + return ASN_PARSE_E; + } + + *value += btoi(date[i++]) * 10; + *value += btoi(date[i++]); + + *idx = i; + + return 0; +} + +int ExtractDate(const unsigned char* date, unsigned char format, + struct tm* certTime, int* idx) +{ + XMEMSET(certTime, 0, sizeof(struct tm)); + + if (format == ASN_UTC_TIME) { + if (btoi(date[*idx]) >= 5) + certTime->tm_year = 1900; + else + certTime->tm_year = 2000; + } + else { /* format == GENERALIZED_TIME */ + if (GetTime(&certTime->tm_year, date, idx) != 0) return 0; + certTime->tm_year *= 100; + } + + /* adjust tm_year, tm_mon */ + if (GetTime(&certTime->tm_year, date, idx) != 0) return 0; + certTime->tm_year -= 1900; + if (GetTime(&certTime->tm_mon , date, idx) != 0) return 0; + certTime->tm_mon -= 1; + if (GetTime(&certTime->tm_mday, date, idx) != 0) return 0; + if (GetTime(&certTime->tm_hour, date, idx) != 0) return 0; + if (GetTime(&certTime->tm_min , date, idx) != 0) return 0; + if (GetTime(&certTime->tm_sec , date, idx) != 0) return 0; + + return 1; +} + + +#if defined(OPENSSL_ALL) || defined(WOLFSSL_MYSQL_COMPATIBLE) || \ + defined(OPENSSL_EXTRA) || defined(WOLFSSL_NGINX) || defined(WOLFSSL_HAPROXY) +int GetTimeString(byte* date, int format, char* buf, int len) +{ + struct tm t; + int idx = 0; + + if (!ExtractDate(date, (unsigned char)format, &t, &idx)) { + return 0; + } + + if (date[idx] != 'Z') { + WOLFSSL_MSG("UTCtime, not Zulu") ; + return 0; + } + + /* place month in buffer */ + buf[0] = '\0'; + switch(t.tm_mon) { + case 0: XSTRNCAT(buf, "Jan ", 5); break; + case 1: XSTRNCAT(buf, "Feb ", 5); break; + case 2: XSTRNCAT(buf, "Mar ", 5); break; + case 3: XSTRNCAT(buf, "Apr ", 5); break; + case 4: XSTRNCAT(buf, "May ", 5); break; + case 5: XSTRNCAT(buf, "Jun ", 5); break; + case 6: XSTRNCAT(buf, "Jul ", 5); break; + case 7: XSTRNCAT(buf, "Aug ", 5); break; + case 8: XSTRNCAT(buf, "Sep ", 5); break; + case 9: XSTRNCAT(buf, "Oct ", 5); break; + case 10: XSTRNCAT(buf, "Nov ", 5); break; + case 11: XSTRNCAT(buf, "Dec ", 5); break; + default: + return 0; + + } + idx = 4; /* use idx now for char buffer */ + + XSNPRINTF(buf + idx, len - idx, "%2d %02d:%02d:%02d %d GMT", + t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec, t.tm_year + 1900); + + return 1; +} +#endif /* OPENSSL_ALL || WOLFSSL_MYSQL_COMPATIBLE || WOLFSSL_NGINX || WOLFSSL_HAPROXY */ + + +#if !defined(NO_ASN_TIME) && defined(HAVE_PKCS7) + +/* Set current time string, either UTC or GeneralizedTime. + * (void*) tm should be a pointer to time_t, output is placed in buf. + * + * Return time string length placed in buf on success, negative on error */ +int GetAsnTimeString(void* currTime, byte* buf, word32 len) +{ + struct tm* ts = NULL; + struct tm* tmpTime = NULL; +#if defined(NEED_TMP_TIME) + struct tm tmpTimeStorage; + tmpTime = &tmpTimeStorage; +#else + (void)tmpTime; +#endif + byte* data_ptr = buf; + word32 data_len = 0; + int year, mon, day, hour, mini, sec; + + WOLFSSL_ENTER("SetAsnTimeString"); + + if (buf == NULL || len == 0) + return BAD_FUNC_ARG; + + ts = (struct tm *)XGMTIME((time_t*)currTime, tmpTime); + if (ts == NULL){ + WOLFSSL_MSG("failed to get time data."); + return ASN_TIME_E; + } + + /* Note ASN_UTC_TIME_SIZE and ASN_GENERALIZED_TIME_SIZE include space for + * the null terminator. ASN encoded values leave off the terminator. */ + + if (ts->tm_year >= 50 && ts->tm_year < 150) { + /* UTC Time */ + char utc_str[ASN_UTC_TIME_SIZE]; + data_len = ASN_UTC_TIME_SIZE - 1 + 2; + + if (len < data_len) + return BUFFER_E; + + if (ts->tm_year >= 50 && ts->tm_year < 100) { + year = ts->tm_year; + } else if (ts->tm_year >= 100 && ts->tm_year < 150) { + year = ts->tm_year - 100; + } + else { + WOLFSSL_MSG("unsupported year range"); + return BAD_FUNC_ARG; + } + mon = ts->tm_mon + 1; + day = ts->tm_mday; + hour = ts->tm_hour; + mini = ts->tm_min; + sec = ts->tm_sec; + XSNPRINTF((char *)utc_str, ASN_UTC_TIME_SIZE, + "%02d%02d%02d%02d%02d%02dZ", year, mon, day, hour, mini, sec); + *data_ptr = (byte) ASN_UTC_TIME; data_ptr++; + /* -1 below excludes null terminator */ + *data_ptr = (byte) ASN_UTC_TIME_SIZE - 1; data_ptr++; + XMEMCPY(data_ptr,(byte *)utc_str, ASN_UTC_TIME_SIZE - 1); + + } else { + /* GeneralizedTime */ + char gt_str[ASN_GENERALIZED_TIME_SIZE]; + data_len = ASN_GENERALIZED_TIME_SIZE - 1 + 2; + + if (len < data_len) + return BUFFER_E; + + year = ts->tm_year + 1900; + mon = ts->tm_mon + 1; + day = ts->tm_mday; + hour = ts->tm_hour; + mini = ts->tm_min; + sec = ts->tm_sec; + XSNPRINTF((char *)gt_str, ASN_GENERALIZED_TIME_SIZE, + "%4d%02d%02d%02d%02d%02dZ", year, mon, day, hour, mini, sec); + *data_ptr = (byte) ASN_GENERALIZED_TIME; data_ptr++; + /* -1 below excludes null terminator */ + *data_ptr = (byte) ASN_GENERALIZED_TIME_SIZE - 1; data_ptr++; + XMEMCPY(data_ptr,(byte *)gt_str, ASN_GENERALIZED_TIME_SIZE - 1); + } + + return data_len; +} + +#endif /* !NO_ASN_TIME && HAVE_PKCS7 */ + + +#if defined(USE_WOLF_VALIDDATE) + +/* to the second */ +int DateGreaterThan(const struct tm* a, const struct tm* b) +{ + if (a->tm_year > b->tm_year) + return 1; + + if (a->tm_year == b->tm_year && a->tm_mon > b->tm_mon) + return 1; + + if (a->tm_year == b->tm_year && a->tm_mon == b->tm_mon && + a->tm_mday > b->tm_mday) + return 1; + + if (a->tm_year == b->tm_year && a->tm_mon == b->tm_mon && + a->tm_mday == b->tm_mday && a->tm_hour > b->tm_hour) + return 1; + + if (a->tm_year == b->tm_year && a->tm_mon == b->tm_mon && + a->tm_mday == b->tm_mday && a->tm_hour == b->tm_hour && + a->tm_min > b->tm_min) + return 1; + + if (a->tm_year == b->tm_year && a->tm_mon == b->tm_mon && + a->tm_mday == b->tm_mday && a->tm_hour == b->tm_hour && + a->tm_min == b->tm_min && a->tm_sec > b->tm_sec) + return 1; + + return 0; /* false */ +} + + +static WC_INLINE int DateLessThan(const struct tm* a, const struct tm* b) +{ + return DateGreaterThan(b,a); +} + +/* like atoi but only use first byte */ +/* Make sure before and after dates are valid */ +int ValidateDate(const byte* date, byte format, int dateType) +{ + time_t ltime; + struct tm certTime; + struct tm* localTime; + struct tm* tmpTime; + int i = 0; + int timeDiff = 0 ; + int diffHH = 0 ; int diffMM = 0 ; + int diffSign = 0 ; + +#if defined(NEED_TMP_TIME) + struct tm tmpTimeStorage; + tmpTime = &tmpTimeStorage; +#else + tmpTime = NULL; +#endif + (void)tmpTime; + + ltime = XTIME(0); + +#ifdef WOLFSSL_BEFORE_DATE_CLOCK_SKEW + if (dateType == BEFORE) { + WOLFSSL_MSG("Skewing local time for before date check"); + ltime += WOLFSSL_BEFORE_DATE_CLOCK_SKEW; + } +#endif + +#ifdef WOLFSSL_AFTER_DATE_CLOCK_SKEW + if (dateType == AFTER) { + WOLFSSL_MSG("Skewing local time for after date check"); + ltime -= WOLFSSL_AFTER_DATE_CLOCK_SKEW; + } +#endif + + if (!ExtractDate(date, format, &certTime, &i)) { + WOLFSSL_MSG("Error extracting the date"); + return 0; + } + + if ((date[i] == '+') || (date[i] == '-')) { + WOLFSSL_MSG("Using time differential, not Zulu") ; + diffSign = date[i++] == '+' ? 1 : -1 ; + if (GetTime(&diffHH, date, &i) != 0) + return 0; + if (GetTime(&diffMM, date, &i) != 0) + return 0; + timeDiff = diffSign * (diffHH*60 + diffMM) * 60 ; + } else if (date[i] != 'Z') { + WOLFSSL_MSG("UTCtime, neither Zulu or time differential") ; + return 0; + } + + ltime -= (time_t)timeDiff ; + localTime = XGMTIME(<ime, tmpTime); + + if (localTime == NULL) { + WOLFSSL_MSG("XGMTIME failed"); + return 0; + } + + if (dateType == BEFORE) { + if (DateLessThan(localTime, &certTime)) { + WOLFSSL_MSG("Date BEFORE check failed"); + return 0; + } + } + else { /* dateType == AFTER */ + if (DateGreaterThan(localTime, &certTime)) { + WOLFSSL_MSG("Date AFTER check failed"); + return 0; + } + } + + return 1; +} +#endif /* USE_WOLF_VALIDDATE */ + +int wc_GetTime(void* timePtr, word32 timeSize) +{ + time_t* ltime = (time_t*)timePtr; + + if (timePtr == NULL) { + return BAD_FUNC_ARG; + } + + if ((word32)sizeof(time_t) > timeSize) { + return BUFFER_E; + } + + *ltime = XTIME(0); + + return 0; +} + +#endif /* !NO_ASN_TIME */ + + +/* Get date buffer, format and length. Returns 0=success or error */ +static int GetDateInfo(const byte* source, word32* idx, const byte** pDate, + byte* pFormat, int* pLength, word32 maxIdx) +{ + int length; + byte format; + + if (source == NULL || idx == NULL) + return BAD_FUNC_ARG; + + /* get ASN format header */ + if (*idx+1 > maxIdx) + return BUFFER_E; + format = source[*idx]; + *idx += 1; + if (format != ASN_UTC_TIME && format != ASN_GENERALIZED_TIME) + return ASN_TIME_E; + + /* get length */ + if (GetLength(source, idx, &length, maxIdx) < 0) + return ASN_PARSE_E; + if (length > MAX_DATE_SIZE || length < MIN_DATE_SIZE) + return ASN_DATE_SZ_E; + + /* return format, date and length */ + if (pFormat) + *pFormat = format; + if (pDate) + *pDate = &source[*idx]; + if (pLength) + *pLength = length; + + *idx += length; + + return 0; +} + +static int GetDate(DecodedCert* cert, int dateType, int verify, int maxIdx) +{ + int ret, length; + const byte *datePtr = NULL; + byte date[MAX_DATE_SIZE]; + byte format; + word32 startIdx = 0; + + if (dateType == BEFORE) + cert->beforeDate = &cert->source[cert->srcIdx]; + else + cert->afterDate = &cert->source[cert->srcIdx]; + startIdx = cert->srcIdx; + + ret = GetDateInfo(cert->source, &cert->srcIdx, &datePtr, &format, + &length, maxIdx); + if (ret < 0) + return ret; + + XMEMSET(date, 0, MAX_DATE_SIZE); + XMEMCPY(date, datePtr, length); + + if (dateType == BEFORE) + cert->beforeDateLen = cert->srcIdx - startIdx; + else + cert->afterDateLen = cert->srcIdx - startIdx; + +#ifndef NO_ASN_TIME + if (verify != NO_VERIFY && verify != VERIFY_SKIP_DATE && + !XVALIDATE_DATE(date, format, dateType)) { + if (dateType == BEFORE) + return ASN_BEFORE_DATE_E; + else + return ASN_AFTER_DATE_E; + } +#else + (void)verify; +#endif + + return 0; +} + +static int GetValidity(DecodedCert* cert, int verify, int maxIdx) +{ + int length; + int badDate = 0; + + if (GetSequence(cert->source, &cert->srcIdx, &length, maxIdx) < 0) + return ASN_PARSE_E; + + maxIdx = cert->srcIdx + length; + + if (GetDate(cert, BEFORE, verify, maxIdx) < 0) + badDate = ASN_BEFORE_DATE_E; /* continue parsing */ + + if (GetDate(cert, AFTER, verify, maxIdx) < 0) + return ASN_AFTER_DATE_E; + + if (badDate != 0) + return badDate; + + return 0; +} + + +int wc_GetDateInfo(const byte* certDate, int certDateSz, const byte** date, + byte* format, int* length) +{ + int ret; + word32 idx = 0; + + ret = GetDateInfo(certDate, &idx, date, format, length, certDateSz); + if (ret < 0) + return ret; + + return 0; +} + +#ifndef NO_ASN_TIME +int wc_GetDateAsCalendarTime(const byte* date, int length, byte format, + struct tm* timearg) +{ + int idx = 0; + (void)length; + if (!ExtractDate(date, format, timearg, &idx)) + return ASN_TIME_E; + return 0; +} + +#if defined(WOLFSSL_CERT_GEN) && defined(WOLFSSL_ALT_NAMES) +int wc_GetCertDates(Cert* cert, struct tm* before, struct tm* after) +{ + int ret = 0; + const byte* date; + byte format; + int length; + + if (cert == NULL) + return BAD_FUNC_ARG; + + if (before && cert->beforeDateSz > 0) { + ret = wc_GetDateInfo(cert->beforeDate, cert->beforeDateSz, &date, + &format, &length); + if (ret == 0) + ret = wc_GetDateAsCalendarTime(date, length, format, before); + } + if (after && cert->afterDateSz > 0) { + ret = wc_GetDateInfo(cert->afterDate, cert->afterDateSz, &date, + &format, &length); + if (ret == 0) + ret = wc_GetDateAsCalendarTime(date, length, format, after); + } + + return ret; +} +#endif /* WOLFSSL_CERT_GEN && WOLFSSL_ALT_NAMES */ +#endif /* !NO_ASN_TIME */ + +/* parses certificate up to point of X.509 public key + * + * if cert date is invalid then badDate gets set to error value, otherwise is 0 + * + * returns a negative value on fail case + */ +int wc_GetPubX509(DecodedCert* cert, int verify, int* badDate) +{ + int ret; + + if (cert == NULL || badDate == NULL) + return BAD_FUNC_ARG; + + *badDate = 0; + if ( (ret = GetCertHeader(cert)) < 0) + return ret; + + WOLFSSL_MSG("Got Cert Header"); + + /* Using the sigIndex as the upper bound because that's where the + * actual certificate data ends. */ + if ( (ret = GetAlgoId(cert->source, &cert->srcIdx, &cert->signatureOID, + oidSigType, cert->sigIndex)) < 0) + return ret; + + WOLFSSL_MSG("Got Algo ID"); + + if ( (ret = GetName(cert, ISSUER, cert->sigIndex)) < 0) + return ret; + + if ( (ret = GetValidity(cert, verify, cert->sigIndex)) < 0) + *badDate = ret; + + if ( (ret = GetName(cert, SUBJECT, cert->sigIndex)) < 0) + return ret; + + WOLFSSL_MSG("Got Subject Name"); + return ret; +} + +int DecodeToKey(DecodedCert* cert, int verify) +{ + int badDate = 0; + int ret; + + if ( (ret = wc_GetPubX509(cert, verify, &badDate)) < 0) + return ret; + + /* Determine if self signed */ + cert->selfSigned = XMEMCMP(cert->issuerHash, + cert->subjectHash, + KEYID_SIZE) == 0 ? 1 : 0; + + if ( (ret = GetKey(cert)) < 0) + return ret; + + WOLFSSL_MSG("Got Key"); + + if (badDate != 0) + return badDate; + + return ret; +} + +static int GetSignature(DecodedCert* cert) +{ + int length; + int ret; + ret = CheckBitString(cert->source, &cert->srcIdx, &length, cert->maxIdx, 1, + NULL); + if (ret != 0) + return ret; + + cert->sigLength = length; + cert->signature = &cert->source[cert->srcIdx]; + cert->srcIdx += cert->sigLength; + + return 0; +} + +static word32 SetOctetString8Bit(word32 len, byte* output) +{ + output[0] = ASN_OCTET_STRING; + output[1] = (byte)len; + return 2; +} + +static word32 SetDigest(const byte* digest, word32 digSz, byte* output) +{ + word32 idx = SetOctetString8Bit(digSz, output); + XMEMCPY(&output[idx], digest, digSz); + + return idx + digSz; +} + + +static word32 BytePrecision(word32 value) +{ + word32 i; + for (i = sizeof(value); i; --i) + if (value >> ((i - 1) * WOLFSSL_BIT_SIZE)) + break; + + return i; +} + + +word32 SetLength(word32 length, byte* output) +{ + word32 i = 0, j; + + if (length < ASN_LONG_LENGTH) { + if (output) + output[i] = (byte)length; + i++; + } + else { + if (output) + output[i] = (byte)(BytePrecision(length) | ASN_LONG_LENGTH); + i++; + + for (j = BytePrecision(length); j; --j) { + if (output) + output[i] = (byte)(length >> ((j - 1) * WOLFSSL_BIT_SIZE)); + i++; + } + } + + return i; +} + +word32 SetSequence(word32 len, byte* output) +{ + if (output) + output[0] = ASN_SEQUENCE | ASN_CONSTRUCTED; + return SetLength(len, output ? output + 1 : NULL) + 1; +} + +word32 SetOctetString(word32 len, byte* output) +{ + output[0] = ASN_OCTET_STRING; + return SetLength(len, output + 1) + 1; +} + +/* Write a set header to output */ +word32 SetSet(word32 len, byte* output) +{ + output[0] = ASN_SET | ASN_CONSTRUCTED; + return SetLength(len, output + 1) + 1; +} + +word32 SetImplicit(byte tag, byte number, word32 len, byte* output) +{ + + output[0] = ((tag == ASN_SEQUENCE || tag == ASN_SET) ? ASN_CONSTRUCTED : 0) + | ASN_CONTEXT_SPECIFIC | number; + return SetLength(len, output + 1) + 1; +} + +word32 SetExplicit(byte number, word32 len, byte* output) +{ + output[0] = ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | number; + return SetLength(len, output + 1) + 1; +} + + +#if defined(HAVE_ECC) && defined(HAVE_ECC_KEY_EXPORT) + +static int SetCurve(ecc_key* key, byte* output) +{ +#ifdef HAVE_OID_ENCODING + int ret; +#endif + int idx = 0; + word32 oidSz = 0; + + /* validate key */ + if (key == NULL || key->dp == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef HAVE_OID_ENCODING + ret = EncodeObjectId(key->dp->oid, key->dp->oidSz, NULL, &oidSz); + if (ret != 0) { + return ret; + } +#else + oidSz = key->dp->oidSz; +#endif + + idx += SetObjectId(oidSz, output); + +#ifdef HAVE_OID_ENCODING + ret = EncodeObjectId(key->dp->oid, key->dp->oidSz, output+idx, &oidSz); + if (ret != 0) { + return ret; + } +#else + XMEMCPY(output+idx, key->dp->oid, oidSz); +#endif + idx += oidSz; + + return idx; +} + +#endif /* HAVE_ECC && HAVE_ECC_KEY_EXPORT */ + + +#ifdef HAVE_ECC +static WC_INLINE int IsSigAlgoECDSA(int algoOID) +{ + /* ECDSA sigAlgo must not have ASN1 NULL parameters */ + if (algoOID == CTC_SHAwECDSA || algoOID == CTC_SHA256wECDSA || + algoOID == CTC_SHA384wECDSA || algoOID == CTC_SHA512wECDSA) { + return 1; + } + + return 0; +} +#endif + +word32 SetAlgoID(int algoOID, byte* output, int type, int curveSz) +{ + word32 tagSz, idSz, seqSz, algoSz = 0; + const byte* algoName = 0; + byte ID_Length[1 + MAX_LENGTH_SZ]; + byte seqArray[MAX_SEQ_SZ + 1]; /* add object_id to end */ + int length = 0; + + tagSz = (type == oidHashType || + (type == oidSigType + #ifdef HAVE_ECC + && !IsSigAlgoECDSA(algoOID) + #endif + #ifdef HAVE_ED25519 + && algoOID != ED25519k + #endif + #ifdef HAVE_ED448 + && algoOID != ED448k + #endif + ) || + (type == oidKeyType && algoOID == RSAk)) ? 2 : 0; + + algoName = OidFromId(algoOID, type, &algoSz); + + if (algoName == NULL) { + WOLFSSL_MSG("Unknown Algorithm"); + return 0; + } + + idSz = SetObjectId(algoSz, ID_Length); + seqSz = SetSequence(idSz + algoSz + tagSz + curveSz, seqArray); + + /* Copy only algo to output for DSA keys */ + if (algoOID == DSAk && output) { + XMEMCPY(output, ID_Length, idSz); + XMEMCPY(output + idSz, algoName, algoSz); + if (tagSz == 2) + SetASNNull(&output[seqSz + idSz + algoSz]); + } + else if (output) { + XMEMCPY(output, seqArray, seqSz); + XMEMCPY(output + seqSz, ID_Length, idSz); + XMEMCPY(output + seqSz + idSz, algoName, algoSz); + if (tagSz == 2) + SetASNNull(&output[seqSz + idSz + algoSz]); + } + + if (algoOID == DSAk) + length = idSz + algoSz + tagSz; + else + length = seqSz + idSz + algoSz + tagSz; + + return length; +} + + +word32 wc_EncodeSignature(byte* out, const byte* digest, word32 digSz, + int hashOID) +{ + byte digArray[MAX_ENCODED_DIG_SZ]; + byte algoArray[MAX_ALGO_SZ]; + byte seqArray[MAX_SEQ_SZ]; + word32 encDigSz, algoSz, seqSz; + + encDigSz = SetDigest(digest, digSz, digArray); + algoSz = SetAlgoID(hashOID, algoArray, oidHashType, 0); + seqSz = SetSequence(encDigSz + algoSz, seqArray); + + XMEMCPY(out, seqArray, seqSz); + XMEMCPY(out + seqSz, algoArray, algoSz); + XMEMCPY(out + seqSz + algoSz, digArray, encDigSz); + + return encDigSz + algoSz + seqSz; +} + + +#ifndef NO_CERTS + +int wc_GetCTC_HashOID(int type) +{ + int ret; + enum wc_HashType hType; + + hType = wc_HashTypeConvert(type); + ret = wc_HashGetOID(hType); + if (ret < 0) + ret = 0; /* backwards compatibility */ + + return ret; +} + +void InitSignatureCtx(SignatureCtx* sigCtx, void* heap, int devId) +{ + if (sigCtx) { + XMEMSET(sigCtx, 0, sizeof(SignatureCtx)); + sigCtx->devId = devId; + sigCtx->heap = heap; + } +} + +void FreeSignatureCtx(SignatureCtx* sigCtx) +{ + if (sigCtx == NULL) + return; + + if (sigCtx->digest) { + XFREE(sigCtx->digest, sigCtx->heap, DYNAMIC_TYPE_DIGEST); + sigCtx->digest = NULL; + } +#ifndef NO_RSA + if (sigCtx->plain) { + XFREE(sigCtx->plain, sigCtx->heap, DYNAMIC_TYPE_SIGNATURE); + sigCtx->plain = NULL; + } +#endif +#ifndef NO_ASN_CRYPT + if (sigCtx->key.ptr) { + switch (sigCtx->keyOID) { + #ifndef NO_RSA + case RSAk: + wc_FreeRsaKey(sigCtx->key.rsa); + XFREE(sigCtx->key.ptr, sigCtx->heap, DYNAMIC_TYPE_RSA); + break; + #endif /* !NO_RSA */ + #ifdef HAVE_ECC + case ECDSAk: + wc_ecc_free(sigCtx->key.ecc); + XFREE(sigCtx->key.ecc, sigCtx->heap, DYNAMIC_TYPE_ECC); + break; + #endif /* HAVE_ECC */ + #ifdef HAVE_ED25519 + case ED25519k: + wc_ed25519_free(sigCtx->key.ed25519); + XFREE(sigCtx->key.ed25519, sigCtx->heap, DYNAMIC_TYPE_ED25519); + break; + #endif /* HAVE_ED25519 */ + #ifdef HAVE_ED448 + case ED448k: + wc_ed448_free(sigCtx->key.ed448); + XFREE(sigCtx->key.ed448, sigCtx->heap, DYNAMIC_TYPE_ED448); + break; + #endif /* HAVE_ED448 */ + default: + break; + } /* switch (keyOID) */ + sigCtx->key.ptr = NULL; + } +#endif + + /* reset state, we are done */ + sigCtx->state = SIG_STATE_BEGIN; +} + +#ifndef NO_ASN_CRYPT +static int HashForSignature(const byte* buf, word32 bufSz, word32 sigOID, + byte* digest, int* typeH, int* digestSz, int verify) +{ + int ret = 0; + + (void)verify; + + switch (sigOID) { + #if defined(WOLFSSL_MD2) + case CTC_MD2wRSA: + if (!verify) { + ret = HASH_TYPE_E; + WOLFSSL_MSG("MD2 not supported for signing"); + } + else if ((ret = wc_Md2Hash(buf, bufSz, digest)) == 0) { + *typeH = MD2h; + *digestSz = MD2_DIGEST_SIZE; + } + break; + #endif + #ifndef NO_MD5 + case CTC_MD5wRSA: + if ((ret = wc_Md5Hash(buf, bufSz, digest)) == 0) { + *typeH = MD5h; + *digestSz = WC_MD5_DIGEST_SIZE; + } + break; + #endif + #ifndef NO_SHA + case CTC_SHAwRSA: + case CTC_SHAwDSA: + case CTC_SHAwECDSA: + if ((ret = wc_ShaHash(buf, bufSz, digest)) == 0) { + *typeH = SHAh; + *digestSz = WC_SHA_DIGEST_SIZE; + } + break; + #endif + #ifdef WOLFSSL_SHA224 + case CTC_SHA224wRSA: + case CTC_SHA224wECDSA: + if ((ret = wc_Sha224Hash(buf, bufSz, digest)) == 0) { + *typeH = SHA224h; + *digestSz = WC_SHA224_DIGEST_SIZE; + } + break; + #endif + #ifndef NO_SHA256 + case CTC_SHA256wRSA: + case CTC_SHA256wECDSA: + if ((ret = wc_Sha256Hash(buf, bufSz, digest)) == 0) { + *typeH = SHA256h; + *digestSz = WC_SHA256_DIGEST_SIZE; + } + break; + #endif + #ifdef WOLFSSL_SHA384 + case CTC_SHA384wRSA: + case CTC_SHA384wECDSA: + if ((ret = wc_Sha384Hash(buf, bufSz, digest)) == 0) { + *typeH = SHA384h; + *digestSz = WC_SHA384_DIGEST_SIZE; + } + break; + #endif + #ifdef WOLFSSL_SHA512 + case CTC_SHA512wRSA: + case CTC_SHA512wECDSA: + if ((ret = wc_Sha512Hash(buf, bufSz, digest)) == 0) { + *typeH = SHA512h; + *digestSz = WC_SHA512_DIGEST_SIZE; + } + break; + #endif + #ifdef HAVE_ED25519 + case CTC_ED25519: + /* Hashes done in signing operation. + * Two dependent hashes with prefixes performed. + */ + break; + #endif + #ifdef HAVE_ED448 + case CTC_ED448: + /* Hashes done in signing operation. + * Two dependent hashes with prefixes performed. + */ + break; + #endif + default: + ret = HASH_TYPE_E; + WOLFSSL_MSG("Hash for Signature has unsupported type"); + } + + return ret; +} +#endif /* !NO_ASN_CRYPT */ + +/* Return codes: 0=Success, Negative (see error-crypt.h), ASN_SIG_CONFIRM_E */ +static int ConfirmSignature(SignatureCtx* sigCtx, + const byte* buf, word32 bufSz, + const byte* key, word32 keySz, word32 keyOID, + const byte* sig, word32 sigSz, word32 sigOID, byte* rsaKeyIdx) +{ + int ret = 0; +#ifndef WOLFSSL_RENESAS_TSIP_TLS + (void)rsaKeyIdx; +#endif + if (sigCtx == NULL || buf == NULL || bufSz == 0 || key == NULL || + keySz == 0 || sig == NULL || sigSz == 0) { + return BAD_FUNC_ARG; + } + + (void)key; + (void)keySz; + (void)sig; + (void)sigSz; + + WOLFSSL_ENTER("ConfirmSignature"); + +#ifndef NO_ASN_CRYPT + switch (sigCtx->state) { + case SIG_STATE_BEGIN: + { + sigCtx->keyOID = keyOID; /* must set early for cleanup */ + + sigCtx->digest = (byte*)XMALLOC(WC_MAX_DIGEST_SIZE, sigCtx->heap, + DYNAMIC_TYPE_DIGEST); + if (sigCtx->digest == NULL) { + ERROR_OUT(MEMORY_E, exit_cs); + } + + sigCtx->state = SIG_STATE_HASH; + } /* SIG_STATE_BEGIN */ + FALL_THROUGH; + + case SIG_STATE_HASH: + { + ret = HashForSignature(buf, bufSz, sigOID, sigCtx->digest, + &sigCtx->typeH, &sigCtx->digestSz, 1); + if (ret != 0) { + goto exit_cs; + } + + sigCtx->state = SIG_STATE_KEY; + } /* SIG_STATE_HASH */ + FALL_THROUGH; + + case SIG_STATE_KEY: + { + switch (keyOID) { + #ifndef NO_RSA + case RSAk: + { + word32 idx = 0; + + sigCtx->key.rsa = (RsaKey*)XMALLOC(sizeof(RsaKey), + sigCtx->heap, DYNAMIC_TYPE_RSA); + sigCtx->plain = (byte*)XMALLOC(MAX_ENCODED_SIG_SZ, + sigCtx->heap, DYNAMIC_TYPE_SIGNATURE); + if (sigCtx->key.rsa == NULL || sigCtx->plain == NULL) { + ERROR_OUT(MEMORY_E, exit_cs); + } + if ((ret = wc_InitRsaKey_ex(sigCtx->key.rsa, sigCtx->heap, + sigCtx->devId)) != 0) { + goto exit_cs; + } + if (sigSz > MAX_ENCODED_SIG_SZ) { + WOLFSSL_MSG("Verify Signature is too big"); + ERROR_OUT(BUFFER_E, exit_cs); + } + if ((ret = wc_RsaPublicKeyDecode(key, &idx, sigCtx->key.rsa, + keySz)) != 0) { + WOLFSSL_MSG("ASN Key decode error RSA"); + goto exit_cs; + } + XMEMCPY(sigCtx->plain, sig, sigSz); + sigCtx->out = NULL; + + #ifdef WOLFSSL_ASYNC_CRYPT + sigCtx->asyncDev = &sigCtx->key.rsa->asyncDev; + #endif + break; + } + #endif /* !NO_RSA */ + #ifdef HAVE_ECC + case ECDSAk: + { + word32 idx = 0; + + sigCtx->verify = 0; + sigCtx->key.ecc = (ecc_key*)XMALLOC(sizeof(ecc_key), + sigCtx->heap, DYNAMIC_TYPE_ECC); + if (sigCtx->key.ecc == NULL) { + ERROR_OUT(MEMORY_E, exit_cs); + } + if ((ret = wc_ecc_init_ex(sigCtx->key.ecc, sigCtx->heap, + sigCtx->devId)) < 0) { + goto exit_cs; + } + ret = wc_EccPublicKeyDecode(key, &idx, sigCtx->key.ecc, + keySz); + if (ret < 0) { + WOLFSSL_MSG("ASN Key import error ECC"); + goto exit_cs; + } + #ifdef WOLFSSL_ASYNC_CRYPT + sigCtx->asyncDev = &sigCtx->key.ecc->asyncDev; + #endif + break; + } + #endif /* HAVE_ECC */ + #ifdef HAVE_ED25519 + case ED25519k: + { + sigCtx->verify = 0; + sigCtx->key.ed25519 = (ed25519_key*)XMALLOC( + sizeof(ed25519_key), sigCtx->heap, + DYNAMIC_TYPE_ED25519); + if (sigCtx->key.ed25519 == NULL) { + ERROR_OUT(MEMORY_E, exit_cs); + } + if ((ret = wc_ed25519_init(sigCtx->key.ed25519)) < 0) { + goto exit_cs; + } + if ((ret = wc_ed25519_import_public(key, keySz, + sigCtx->key.ed25519)) < 0) { + WOLFSSL_MSG("ASN Key import error ED25519"); + goto exit_cs; + } + #ifdef WOLFSSL_ASYNC_CRYPT + sigCtx->asyncDev = &sigCtx->key.ed25519->asyncDev; + #endif + break; + } + #endif + #ifdef HAVE_ED448 + case ED448k: + { + sigCtx->verify = 0; + sigCtx->key.ed448 = (ed448_key*)XMALLOC( + sizeof(ed448_key), sigCtx->heap, + DYNAMIC_TYPE_ED448); + if (sigCtx->key.ed448 == NULL) { + ERROR_OUT(MEMORY_E, exit_cs); + } + if ((ret = wc_ed448_init(sigCtx->key.ed448)) < 0) { + goto exit_cs; + } + if ((ret = wc_ed448_import_public(key, keySz, + sigCtx->key.ed448)) < 0) { + WOLFSSL_MSG("ASN Key import error ED448"); + goto exit_cs; + } + #ifdef WOLFSSL_ASYNC_CRYPT + sigCtx->asyncDev = &sigCtx->key.ed448->asyncDev; + #endif + break; + } + #endif + default: + WOLFSSL_MSG("Verify Key type unknown"); + ret = ASN_UNKNOWN_OID_E; + break; + } /* switch (keyOID) */ + + if (ret != 0) { + goto exit_cs; + } + + sigCtx->state = SIG_STATE_DO; + + #ifdef WOLFSSL_ASYNC_CRYPT + if (sigCtx->devId != INVALID_DEVID && sigCtx->asyncDev && sigCtx->asyncCtx) { + /* make sure event is initialized */ + WOLF_EVENT* event = &sigCtx->asyncDev->event; + ret = wolfAsync_EventInit(event, WOLF_EVENT_TYPE_ASYNC_WOLFSSL, + sigCtx->asyncCtx, WC_ASYNC_FLAG_CALL_AGAIN); + } + #endif + } /* SIG_STATE_KEY */ + FALL_THROUGH; + + case SIG_STATE_DO: + { + switch (keyOID) { + #ifndef NO_RSA + case RSAk: + { + #ifdef HAVE_PK_CALLBACKS + if (sigCtx->pkCbRsa) { + ret = sigCtx->pkCbRsa( + sigCtx->plain, sigSz, &sigCtx->out, + key, keySz, + sigCtx->pkCtxRsa); + } + else + #endif /* HAVE_PK_CALLBACKS */ + { + #ifdef WOLFSSL_RENESAS_TSIP_TLS + if (rsaKeyIdx != NULL) + { + ret = tsip_tls_CertVerify(buf, bufSz, sigCtx->plain, + sigSz, + sigCtx->pubkey_n_start - sigCtx->certBegin, + sigCtx->pubkey_n_len - 1, + sigCtx->pubkey_e_start - sigCtx->certBegin, + sigCtx->pubkey_e_len - 1, + rsaKeyIdx); + + if (ret == 0){ + sigCtx->verifyByTSIP = 1; + ret = 0; + } else { + WOLFSSL_MSG("RSA Verify by tsip didn't match"); + ret = ASN_SIG_CONFIRM_E; + } + } else + #endif + ret = wc_RsaSSL_VerifyInline(sigCtx->plain, sigSz, + &sigCtx->out, sigCtx->key.rsa); + } + break; + } + #endif /* !NO_RSA */ + #if defined(HAVE_ECC) + case ECDSAk: + { + #ifdef HAVE_PK_CALLBACKS + if (sigCtx->pkCbEcc) { + ret = sigCtx->pkCbEcc( + sig, sigSz, + sigCtx->digest, sigCtx->digestSz, + key, keySz, &sigCtx->verify, + sigCtx->pkCtxEcc); + } + else + #endif /* HAVE_PK_CALLBACKS */ + { + ret = wc_ecc_verify_hash(sig, sigSz, sigCtx->digest, + sigCtx->digestSz, &sigCtx->verify, + sigCtx->key.ecc); + } + break; + } + #endif /* HAVE_ECC */ + #ifdef HAVE_ED25519 + case ED25519k: + { + ret = wc_ed25519_verify_msg(sig, sigSz, buf, bufSz, + &sigCtx->verify, sigCtx->key.ed25519); + break; + } + #endif + #ifdef HAVE_ED448 + case ED448k: + { + ret = wc_ed448_verify_msg(sig, sigSz, buf, bufSz, + &sigCtx->verify, sigCtx->key.ed448, + NULL, 0); + break; + } + #endif + default: + break; + } /* switch (keyOID) */ + + #ifdef WOLFSSL_ASYNC_CRYPT + if (ret == WC_PENDING_E) { + goto exit_cs; + } + #endif + + if (ret < 0) { + /* treat all RSA errors as ASN_SIG_CONFIRM_E */ + ret = ASN_SIG_CONFIRM_E; + goto exit_cs; + } + + sigCtx->state = SIG_STATE_CHECK; + } /* SIG_STATE_DO */ + FALL_THROUGH; + + case SIG_STATE_CHECK: + { + switch (keyOID) { + #ifndef NO_RSA + case RSAk: + { + int encodedSigSz, verifySz; + #ifdef WOLFSSL_RENESAS_TSIP + if (sigCtx->verifyByTSIP == 1) break; + #endif + #ifdef WOLFSSL_SMALL_STACK + byte* encodedSig = (byte*)XMALLOC(MAX_ENCODED_SIG_SZ, + sigCtx->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (encodedSig == NULL) { + ERROR_OUT(MEMORY_E, exit_cs); + } + #else + byte encodedSig[MAX_ENCODED_SIG_SZ]; + #endif + + verifySz = ret; + + /* make sure we're right justified */ + encodedSigSz = wc_EncodeSignature(encodedSig, + sigCtx->digest, sigCtx->digestSz, sigCtx->typeH); + if (encodedSigSz == verifySz && sigCtx->out != NULL && + XMEMCMP(sigCtx->out, encodedSig, encodedSigSz) == 0) { + ret = 0; + } + else { + WOLFSSL_MSG("RSA SSL verify match encode error"); + ret = ASN_SIG_CONFIRM_E; + } + + #ifdef WOLFSSL_SMALL_STACK + XFREE(encodedSig, sigCtx->heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + break; + } + #endif /* NO_RSA */ + #ifdef HAVE_ECC + case ECDSAk: + { + if (sigCtx->verify == 1) { + ret = 0; + } + else { + WOLFSSL_MSG("ECC Verify didn't match"); + ret = ASN_SIG_CONFIRM_E; + } + break; + } + #endif /* HAVE_ECC */ + #ifdef HAVE_ED25519 + case ED25519k: + { + if (sigCtx->verify == 1) { + ret = 0; + } + else { + WOLFSSL_MSG("ED25519 Verify didn't match"); + ret = ASN_SIG_CONFIRM_E; + } + break; + } + #endif /* HAVE_ED25519 */ + #ifdef HAVE_ED448 + case ED448k: + { + if (sigCtx->verify == 1) { + ret = 0; + } + else { + WOLFSSL_MSG("ED448 Verify didn't match"); + ret = ASN_SIG_CONFIRM_E; + } + break; + } + #endif /* HAVE_ED448 */ + default: + break; + } /* switch (keyOID) */ + + break; + } /* SIG_STATE_CHECK */ + } /* switch (sigCtx->state) */ + +exit_cs: + +#endif /* !NO_ASN_CRYPT */ + + (void)keyOID; + (void)sigOID; + + WOLFSSL_LEAVE("ConfirmSignature", ret); + +#ifdef WOLFSSL_ASYNC_CRYPT + if (ret == WC_PENDING_E) + return ret; +#endif + + FreeSignatureCtx(sigCtx); + + return ret; +} + + +#ifndef IGNORE_NAME_CONSTRAINTS + +static int MatchBaseName(int type, const char* name, int nameSz, + const char* base, int baseSz) +{ + if (base == NULL || baseSz <= 0 || name == NULL || nameSz <= 0 || + name[0] == '.' || nameSz < baseSz || + (type != ASN_RFC822_TYPE && type != ASN_DNS_TYPE)) + return 0; + + /* If an email type, handle special cases where the base is only + * a domain, or is an email address itself. */ + if (type == ASN_RFC822_TYPE) { + const char* p = NULL; + int count = 0; + + if (base[0] != '.') { + p = base; + count = 0; + + /* find the '@' in the base */ + while (*p != '@' && count < baseSz) { + count++; + p++; + } + + /* No '@' in base, reset p to NULL */ + if (count >= baseSz) + p = NULL; + } + + if (p == NULL) { + /* Base isn't an email address, it is a domain name, + * wind the name forward one character past its '@'. */ + p = name; + count = 0; + while (*p != '@' && count < baseSz) { + count++; + p++; + } + + if (count < baseSz && *p == '@') { + name = p + 1; + nameSz -= count + 1; + } + } + } + + if ((type == ASN_DNS_TYPE || type == ASN_RFC822_TYPE) && base[0] == '.') { + int szAdjust = nameSz - baseSz; + name += szAdjust; + nameSz -= szAdjust; + } + + while (nameSz > 0) { + if (XTOLOWER((unsigned char)*name++) != + XTOLOWER((unsigned char)*base++)) + return 0; + nameSz--; + } + + return 1; +} + + +static int ConfirmNameConstraints(Signer* signer, DecodedCert* cert) +{ + if (signer == NULL || cert == NULL) + return 0; + + /* Check against the excluded list */ + if (signer->excludedNames) { + Base_entry* base = signer->excludedNames; + + while (base != NULL) { + switch (base->type) { + case ASN_DNS_TYPE: + { + DNS_entry* name = cert->altNames; + while (name != NULL) { + if (MatchBaseName(ASN_DNS_TYPE, + name->name, name->len, + base->name, base->nameSz)) { + return 0; + } + name = name->next; + } + break; + } + case ASN_RFC822_TYPE: + { + DNS_entry* name = cert->altEmailNames; + while (name != NULL) { + if (MatchBaseName(ASN_RFC822_TYPE, + name->name, name->len, + base->name, base->nameSz)) { + return 0; + } + name = name->next; + } + break; + } + case ASN_DIR_TYPE: + { + /* allow permitted dirName smaller than actual subject */ + if (cert->subjectRawLen >= base->nameSz && + XMEMCMP(cert->subjectRaw, base->name, + base->nameSz) == 0) { + return 0; + } + break; + } + }; /* switch */ + base = base->next; + } + } + + /* Check against the permitted list */ + if (signer->permittedNames != NULL) { + int needDns = 0; + int matchDns = 0; + int needEmail = 0; + int matchEmail = 0; + int needDir = 0; + int matchDir = 0; + Base_entry* base = signer->permittedNames; + + while (base != NULL) { + switch (base->type) { + case ASN_DNS_TYPE: + { + DNS_entry* name = cert->altNames; + + if (name != NULL) + needDns = 1; + + while (name != NULL) { + matchDns = MatchBaseName(ASN_DNS_TYPE, + name->name, name->len, + base->name, base->nameSz); + name = name->next; + } + break; + } + case ASN_RFC822_TYPE: + { + DNS_entry* name = cert->altEmailNames; + + if (name != NULL) + needEmail = 1; + + while (name != NULL) { + matchEmail = MatchBaseName(ASN_DNS_TYPE, + name->name, name->len, + base->name, base->nameSz); + name = name->next; + } + break; + } + case ASN_DIR_TYPE: + { + /* allow permitted dirName smaller than actual subject */ + needDir = 1; + if (cert->subjectRaw != NULL && + cert->subjectRawLen >= base->nameSz && + XMEMCMP(cert->subjectRaw, base->name, + base->nameSz) == 0) { + matchDir = 1; + } + break; + } + } /* switch */ + base = base->next; + } + + if ((needDns && !matchDns) || + (needEmail && !matchEmail) || + (needDir && !matchDir)) { + return 0; + } + } + + return 1; +} + +#endif /* IGNORE_NAME_CONSTRAINTS */ + +static int DecodeAltNames(const byte* input, int sz, DecodedCert* cert) +{ + word32 idx = 0; + int length = 0; + + WOLFSSL_ENTER("DecodeAltNames"); + + if (GetSequence(input, &idx, &length, sz) < 0) { + WOLFSSL_MSG("\tBad Sequence"); + return ASN_PARSE_E; + } + + if (length == 0) { + /* RFC 5280 4.2.1.6. Subject Alternative Name + If the subjectAltName extension is present, the sequence MUST + contain at least one entry. */ + return ASN_PARSE_E; + } + + cert->weOwnAltNames = 1; + + while (length > 0) { + byte b = input[idx++]; + + length--; + + /* Save DNS Type names in the altNames list. */ + /* Save Other Type names in the cert's OidMap */ + if (b == (ASN_CONTEXT_SPECIFIC | ASN_DNS_TYPE)) { + DNS_entry* dnsEntry; + int strLen; + word32 lenStartIdx = idx; + + if (GetLength(input, &idx, &strLen, sz) < 0) { + WOLFSSL_MSG("\tfail: str length"); + return ASN_PARSE_E; + } + length -= (idx - lenStartIdx); + + dnsEntry = (DNS_entry*)XMALLOC(sizeof(DNS_entry), cert->heap, + DYNAMIC_TYPE_ALTNAME); + if (dnsEntry == NULL) { + WOLFSSL_MSG("\tOut of Memory"); + return MEMORY_E; + } + + dnsEntry->type = ASN_DNS_TYPE; + dnsEntry->name = (char*)XMALLOC(strLen + 1, cert->heap, + DYNAMIC_TYPE_ALTNAME); + if (dnsEntry->name == NULL) { + WOLFSSL_MSG("\tOut of Memory"); + XFREE(dnsEntry, cert->heap, DYNAMIC_TYPE_ALTNAME); + return MEMORY_E; + } + dnsEntry->len = strLen; + XMEMCPY(dnsEntry->name, &input[idx], strLen); + dnsEntry->name[strLen] = '\0'; + + dnsEntry->next = cert->altNames; + cert->altNames = dnsEntry; + + length -= strLen; + idx += strLen; + } + #ifndef IGNORE_NAME_CONSTRAINTS + else if (b == (ASN_CONTEXT_SPECIFIC | ASN_RFC822_TYPE)) { + DNS_entry* emailEntry; + int strLen; + word32 lenStartIdx = idx; + + if (GetLength(input, &idx, &strLen, sz) < 0) { + WOLFSSL_MSG("\tfail: str length"); + return ASN_PARSE_E; + } + length -= (idx - lenStartIdx); + + emailEntry = (DNS_entry*)XMALLOC(sizeof(DNS_entry), cert->heap, + DYNAMIC_TYPE_ALTNAME); + if (emailEntry == NULL) { + WOLFSSL_MSG("\tOut of Memory"); + return MEMORY_E; + } + + emailEntry->type = ASN_RFC822_TYPE; + emailEntry->name = (char*)XMALLOC(strLen + 1, cert->heap, + DYNAMIC_TYPE_ALTNAME); + if (emailEntry->name == NULL) { + WOLFSSL_MSG("\tOut of Memory"); + XFREE(emailEntry, cert->heap, DYNAMIC_TYPE_ALTNAME); + return MEMORY_E; + } + emailEntry->len = strLen; + XMEMCPY(emailEntry->name, &input[idx], strLen); + emailEntry->name[strLen] = '\0'; + + emailEntry->next = cert->altEmailNames; + cert->altEmailNames = emailEntry; + + length -= strLen; + idx += strLen; + } + else if (b == (ASN_CONTEXT_SPECIFIC | ASN_URI_TYPE)) { + DNS_entry* uriEntry; + int strLen; + word32 lenStartIdx = idx; + + WOLFSSL_MSG("\tPutting URI into list but not using"); + if (GetLength(input, &idx, &strLen, sz) < 0) { + WOLFSSL_MSG("\tfail: str length"); + return ASN_PARSE_E; + } + length -= (idx - lenStartIdx); + + /* check that strLen at index is not past input buffer */ + if (strLen + (int)idx > sz) { + return BUFFER_E; + } + + #ifndef WOLFSSL_NO_ASN_STRICT + /* Verify RFC 5280 Sec 4.2.1.6 rule: + "The name MUST NOT be a relative URI" */ + + { + int i; + + /* skip past scheme (i.e http,ftp,...) finding first ':' char */ + for (i = 0; i < strLen; i++) { + if (input[idx + i] == ':') { + break; + } + if (input[idx + i] == '/') { + i = strLen; /* error, found relative path since '/' was + * encountered before ':'. Returning error + * value in next if statement. */ + } + } + + /* test if no ':' char was found and test that the next two + * chars are // to match the pattern "://" */ + if (i >= strLen - 2 || (input[idx + i + 1] != '/' || + input[idx + i + 2] != '/')) { + WOLFSSL_MSG("\tAlt Name must be absolute URI"); + return ASN_ALT_NAME_E; + } + } + #endif + + uriEntry = (DNS_entry*)XMALLOC(sizeof(DNS_entry), cert->heap, + DYNAMIC_TYPE_ALTNAME); + if (uriEntry == NULL) { + WOLFSSL_MSG("\tOut of Memory"); + return MEMORY_E; + } + + uriEntry->type = ASN_URI_TYPE; + uriEntry->name = (char*)XMALLOC(strLen + 1, cert->heap, + DYNAMIC_TYPE_ALTNAME); + if (uriEntry->name == NULL) { + WOLFSSL_MSG("\tOut of Memory"); + XFREE(uriEntry, cert->heap, DYNAMIC_TYPE_ALTNAME); + return MEMORY_E; + } + uriEntry->len = strLen; + XMEMCPY(uriEntry->name, &input[idx], strLen); + uriEntry->name[strLen] = '\0'; + + uriEntry->next = cert->altNames; + cert->altNames = uriEntry; + + length -= strLen; + idx += strLen; + } +#if defined(WOLFSSL_QT) || defined(OPENSSL_ALL) + else if (b == (ASN_CONTEXT_SPECIFIC | ASN_IP_TYPE)) { + DNS_entry* ipAddr; + int strLen; + word32 lenStartIdx = idx; + WOLFSSL_MSG("Decoding Subject Alt. Name: IP Address"); + + if (GetLength(input, &idx, &strLen, sz) < 0) { + WOLFSSL_MSG("\tfail: str length"); + return ASN_PARSE_E; + } + length -= (idx - lenStartIdx); + /* check that strLen at index is not past input buffer */ + if (strLen + (int)idx > sz) { + return BUFFER_E; + } + + ipAddr = (DNS_entry*)XMALLOC(sizeof(DNS_entry), cert->heap, + DYNAMIC_TYPE_ALTNAME); + if (ipAddr == NULL) { + WOLFSSL_MSG("\tOut of Memory"); + return MEMORY_E; + } + + ipAddr->type = ASN_IP_TYPE; + ipAddr->name = (char*)XMALLOC(strLen + 1, cert->heap, + DYNAMIC_TYPE_ALTNAME); + if (ipAddr->name == NULL) { + WOLFSSL_MSG("\tOut of Memory"); + XFREE(ipAddr, cert->heap, DYNAMIC_TYPE_ALTNAME); + return MEMORY_E; + } + ipAddr->len = strLen; + XMEMCPY(ipAddr->name, &input[idx], strLen); + ipAddr->name[strLen] = '\0'; + + ipAddr->next = cert->altNames; + cert->altNames = ipAddr; + + length -= strLen; + idx += strLen; + } +#endif /* WOLFSSL_QT || OPENSSL_ALL */ +#endif /* IGNORE_NAME_CONSTRAINTS */ +#ifdef WOLFSSL_SEP + else if (b == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | ASN_OTHER_TYPE)) + { + int strLen; + word32 lenStartIdx = idx; + word32 oid = 0; + int ret; + byte tag; + + if (GetLength(input, &idx, &strLen, sz) < 0) { + WOLFSSL_MSG("\tfail: other name length"); + return ASN_PARSE_E; + } + /* Consume the rest of this sequence. */ + length -= (strLen + idx - lenStartIdx); + + if (GetObjectId(input, &idx, &oid, oidCertAltNameType, sz) < 0) { + WOLFSSL_MSG("\tbad OID"); + return ASN_PARSE_E; + } + + if (oid != HW_NAME_OID) { + WOLFSSL_MSG("\tincorrect OID"); + return ASN_PARSE_E; + } + + if (GetASNTag(input, &idx, &tag, sz) < 0) { + return ASN_PARSE_E; + } + + if (tag != (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED)) { + WOLFSSL_MSG("\twrong type"); + return ASN_PARSE_E; + } + + if (GetLength(input, &idx, &strLen, sz) < 0) { + WOLFSSL_MSG("\tfail: str len"); + return ASN_PARSE_E; + } + + if (GetSequence(input, &idx, &strLen, sz) < 0) { + WOLFSSL_MSG("\tBad Sequence"); + return ASN_PARSE_E; + } + + ret = GetASNObjectId(input, &idx, &strLen, sz); + if (ret != 0) { + WOLFSSL_MSG("\tbad OID"); + return ret; + } + + cert->hwType = (byte*)XMALLOC(strLen, cert->heap, + DYNAMIC_TYPE_X509_EXT); + if (cert->hwType == NULL) { + WOLFSSL_MSG("\tOut of Memory"); + return MEMORY_E; + } + + XMEMCPY(cert->hwType, &input[idx], strLen); + cert->hwTypeSz = strLen; + idx += strLen; + + ret = GetOctetString(input, &idx, &strLen, sz); + if (ret < 0) + return ret; + + cert->hwSerialNum = (byte*)XMALLOC(strLen + 1, cert->heap, + DYNAMIC_TYPE_X509_EXT); + if (cert->hwSerialNum == NULL) { + WOLFSSL_MSG("\tOut of Memory"); + return MEMORY_E; + } + + XMEMCPY(cert->hwSerialNum, &input[idx], strLen); + cert->hwSerialNum[strLen] = '\0'; + cert->hwSerialNumSz = strLen; + idx += strLen; + } + #endif /* WOLFSSL_SEP */ + else { + int strLen; + word32 lenStartIdx = idx; + + WOLFSSL_MSG("\tUnsupported name type, skipping"); + + if (GetLength(input, &idx, &strLen, sz) < 0) { + WOLFSSL_MSG("\tfail: unsupported name length"); + return ASN_PARSE_E; + } + length -= (strLen + idx - lenStartIdx); + idx += strLen; + } + } + return 0; +} + +static int DecodeBasicCaConstraint(const byte* input, int sz, DecodedCert* cert) +{ + word32 idx = 0; + int length = 0; + int ret; + + WOLFSSL_ENTER("DecodeBasicCaConstraint"); + + if (GetSequence(input, &idx, &length, sz) < 0) { + WOLFSSL_MSG("\tfail: bad SEQUENCE"); + return ASN_PARSE_E; + } + + if (length == 0) + return 0; + + /* If the basic ca constraint is false, this extension may be named, but + * left empty. So, if the length is 0, just return. */ + + ret = GetBoolean(input, &idx, sz); + +#ifndef WOLFSSL_X509_BASICCONS_INT + if (ret < 0) { + WOLFSSL_MSG("\tfail: constraint not valid BOOLEAN"); + return ret; + } + + cert->isCA = (byte)ret; +#else + if (ret < 0) { + if(input[idx] == ASN_INTEGER) { + /* For OpenSSL compatibility, if ASN_INTEGER it is valid format */ + cert->isCA = FALSE; + } else return ret; + } else + cert->isCA = (byte)ret; +#endif + + /* If there isn't any more data, return. */ + if (idx >= (word32)sz) { + return 0; + } + + ret = GetInteger7Bit(input, &idx, sz); + if (ret < 0) + return ret; + cert->pathLength = (byte)ret; + cert->pathLengthSet = 1; + + return 0; +} + + +#define CRLDP_FULL_NAME 0 + /* From RFC3280 SS4.2.1.14, Distribution Point Name*/ +#define GENERALNAME_URI 6 + /* From RFC3280 SS4.2.1.7, GeneralName */ + +static int DecodeCrlDist(const byte* input, int sz, DecodedCert* cert) +{ + word32 idx = 0, localIdx; + int length = 0; + byte tag = 0; + + WOLFSSL_ENTER("DecodeCrlDist"); + + /* Unwrap the list of Distribution Points*/ + if (GetSequence(input, &idx, &length, sz) < 0) + return ASN_PARSE_E; + + /* Unwrap a single Distribution Point */ + if (GetSequence(input, &idx, &length, sz) < 0) + return ASN_PARSE_E; + + /* The Distribution Point has three explicit optional members + * First check for a DistributionPointName + */ + localIdx = idx; + if (GetASNTag(input, &localIdx, &tag, sz) == 0 && + tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)) + { + idx++; + if (GetLength(input, &idx, &length, sz) < 0) + return ASN_PARSE_E; + + localIdx = idx; + if (GetASNTag(input, &localIdx, &tag, sz) == 0 && + tag == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | + CRLDP_FULL_NAME)) + { + idx++; + if (GetLength(input, &idx, &length, sz) < 0) + return ASN_PARSE_E; + + localIdx = idx; + if (GetASNTag(input, &localIdx, &tag, sz) == 0 && + tag == (ASN_CONTEXT_SPECIFIC | GENERALNAME_URI)) + { + idx++; + if (GetLength(input, &idx, &length, sz) < 0) + return ASN_PARSE_E; + + cert->extCrlInfoSz = length; + cert->extCrlInfo = input + idx; + idx += length; + } + else + /* This isn't a URI, skip it. */ + idx += length; + } + else { + /* This isn't a FULLNAME, skip it. */ + idx += length; + } + } + + /* Check for reasonFlags */ + localIdx = idx; + if (idx < (word32)sz && + GetASNTag(input, &localIdx, &tag, sz) == 0 && + tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1)) + { + idx++; + if (GetLength(input, &idx, &length, sz) < 0) + return ASN_PARSE_E; + idx += length; + } + + /* Check for cRLIssuer */ + localIdx = idx; + if (idx < (word32)sz && + GetASNTag(input, &localIdx, &tag, sz) == 0 && + tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 2)) + { + idx++; + if (GetLength(input, &idx, &length, sz) < 0) + return ASN_PARSE_E; + idx += length; + } + + if (idx < (word32)sz) + { + WOLFSSL_MSG("\tThere are more CRL Distribution Point records, " + "but we only use the first one."); + } + + return 0; +} + + +static int DecodeAuthInfo(const byte* input, int sz, DecodedCert* cert) +/* + * Read Authority Information Access records. If there are + * any issues, return without saving the record. + */ +{ + word32 idx = 0; + int length = 0; + int count = 0; + byte b = 0; + word32 oid; + + WOLFSSL_ENTER("DecodeAuthInfo"); + + /* Unwrap the list of AIAs */ + if (GetSequence(input, &idx, &length, sz) < 0) + return ASN_PARSE_E; + + while ((idx < (word32)sz) && (count < MAX_AIA_SZ)) { + /* Unwrap a single AIA */ + if (GetSequence(input, &idx, &length, sz) < 0) + return ASN_PARSE_E; + + oid = 0; + if (GetObjectId(input, &idx, &oid, oidCertAuthInfoType, sz) < 0) + return ASN_PARSE_E; + + /* Only supporting URIs right now. */ + if (GetASNTag(input, &idx, &b, sz) < 0) + return ASN_PARSE_E; + + if (GetLength(input, &idx, &length, sz) < 0) + return ASN_PARSE_E; + + /* Set ocsp entry */ + if (b == (ASN_CONTEXT_SPECIFIC | GENERALNAME_URI) && + oid == AIA_OCSP_OID) + { + cert->extAuthInfoSz = length; + cert->extAuthInfo = input + idx; + count++; + #if !defined(OPENSSL_ALL) || !defined(WOLFSSL_QT) + break; + #endif + } + #if defined(OPENSSL_ALL) || defined(WOLFSSL_QT) + /* Set CaIssuers entry */ + else if ((b == (ASN_CONTEXT_SPECIFIC | GENERALNAME_URI)) && + oid == AIA_CA_ISSUER_OID) + { + cert->extAuthInfoCaIssuerSz = length; + cert->extAuthInfoCaIssuer = input + idx; + count++; + } + #endif + idx += length; + } + + return 0; +} + + +static int DecodeAuthKeyId(const byte* input, int sz, DecodedCert* cert) +{ + word32 idx = 0; + int length = 0, ret = 0; + byte tag; + + WOLFSSL_ENTER("DecodeAuthKeyId"); + + if (GetSequence(input, &idx, &length, sz) < 0) { + WOLFSSL_MSG("\tfail: should be a SEQUENCE\n"); + return ASN_PARSE_E; + } + + if (GetASNTag(input, &idx, &tag, sz) < 0) { + return ASN_PARSE_E; + } + + if (tag != (ASN_CONTEXT_SPECIFIC | 0)) { + WOLFSSL_MSG("\tinfo: OPTIONAL item 0, not available\n"); + cert->extAuthKeyIdSet = 0; + return 0; + } + + if (GetLength(input, &idx, &length, sz) <= 0) { + WOLFSSL_MSG("\tfail: extension data length"); + return ASN_PARSE_E; + } + +#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) + cert->extAuthKeyIdSrc = &input[idx]; + cert->extAuthKeyIdSz = length; +#endif /* OPENSSL_EXTRA */ + + if (length == KEYID_SIZE) { + XMEMCPY(cert->extAuthKeyId, input + idx, length); + } + else + ret = CalcHashId(input + idx, length, cert->extAuthKeyId); + + return ret; +} + + +static int DecodeSubjKeyId(const byte* input, int sz, DecodedCert* cert) +{ + word32 idx = 0; + int length = 0, ret = 0; + + WOLFSSL_ENTER("DecodeSubjKeyId"); + + if (sz <= 0) + return ASN_PARSE_E; + + ret = GetOctetString(input, &idx, &length, sz); + if (ret < 0) + return ret; + + #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) + cert->extSubjKeyIdSrc = &input[idx]; + cert->extSubjKeyIdSz = length; + #endif /* OPENSSL_EXTRA */ + + if (length == KEYID_SIZE) { + XMEMCPY(cert->extSubjKeyId, input + idx, length); + } + else + ret = CalcHashId(input + idx, length, cert->extSubjKeyId); + + return ret; +} + + +static int DecodeKeyUsage(const byte* input, int sz, DecodedCert* cert) +{ + word32 idx = 0; + int length; + int ret; + WOLFSSL_ENTER("DecodeKeyUsage"); + + ret = CheckBitString(input, &idx, &length, sz, 0, NULL); + if (ret != 0) + return ret; + + cert->extKeyUsage = (word16)(input[idx]); + if (length == 2) + cert->extKeyUsage |= (word16)(input[idx+1] << 8); + + return 0; +} + + +static int DecodeExtKeyUsage(const byte* input, int sz, DecodedCert* cert) +{ + word32 idx = 0, oid; + int length, ret; + + WOLFSSL_MSG("DecodeExtKeyUsage"); + + if (GetSequence(input, &idx, &length, sz) < 0) { + WOLFSSL_MSG("\tfail: should be a SEQUENCE"); + return ASN_PARSE_E; + } + +#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) + cert->extExtKeyUsageSrc = input + idx; + cert->extExtKeyUsageSz = length; +#endif + + while (idx < (word32)sz) { + ret = GetObjectId(input, &idx, &oid, oidCertKeyUseType, sz); + if (ret == ASN_UNKNOWN_OID_E) + continue; + else if (ret < 0) + return ret; + + switch (oid) { + case EKU_ANY_OID: + cert->extExtKeyUsage |= EXTKEYUSE_ANY; + break; + case EKU_SERVER_AUTH_OID: + cert->extExtKeyUsage |= EXTKEYUSE_SERVER_AUTH; + break; + case EKU_CLIENT_AUTH_OID: + cert->extExtKeyUsage |= EXTKEYUSE_CLIENT_AUTH; + break; + case EKU_CODESIGNING_OID: + cert->extExtKeyUsage |= EXTKEYUSE_CODESIGN; + break; + case EKU_EMAILPROTECT_OID: + cert->extExtKeyUsage |= EXTKEYUSE_EMAILPROT; + break; + case EKU_TIMESTAMP_OID: + cert->extExtKeyUsage |= EXTKEYUSE_TIMESTAMP; + break; + case EKU_OCSP_SIGN_OID: + cert->extExtKeyUsage |= EXTKEYUSE_OCSP_SIGN; + break; + } + + #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) + cert->extExtKeyUsageCount++; + #endif + } + + return 0; +} + + +#ifndef IGNORE_NAME_CONSTRAINTS +#define ASN_TYPE_MASK 0xF +static int DecodeSubtree(const byte* input, int sz, + Base_entry** head, void* heap) +{ + word32 idx = 0; + + (void)heap; + + while (idx < (word32)sz) { + int seqLength, strLength; + word32 nameIdx; + byte b, bType; + + if (GetSequence(input, &idx, &seqLength, sz) < 0) { + WOLFSSL_MSG("\tfail: should be a SEQUENCE"); + return ASN_PARSE_E; + } + nameIdx = idx; + b = input[nameIdx++]; + + if (GetLength(input, &nameIdx, &strLength, sz) <= 0) { + WOLFSSL_MSG("\tinvalid length"); + return ASN_PARSE_E; + } + + /* Get type, LSB 4-bits */ + bType = (b & ASN_TYPE_MASK); + + if (bType == ASN_DNS_TYPE || bType == ASN_RFC822_TYPE || + bType == ASN_DIR_TYPE) { + Base_entry* entry; + + /* if constructed has leading sequence */ + if (b & ASN_CONSTRUCTED) { + if (GetSequence(input, &nameIdx, &strLength, sz) < 0) { + WOLFSSL_MSG("\tfail: constructed be a SEQUENCE"); + return ASN_PARSE_E; + } + } + + entry = (Base_entry*)XMALLOC(sizeof(Base_entry), heap, + DYNAMIC_TYPE_ALTNAME); + if (entry == NULL) { + WOLFSSL_MSG("allocate error"); + return MEMORY_E; + } + + entry->name = (char*)XMALLOC(strLength, heap, DYNAMIC_TYPE_ALTNAME); + if (entry->name == NULL) { + WOLFSSL_MSG("allocate error"); + XFREE(entry, heap, DYNAMIC_TYPE_ALTNAME); + return MEMORY_E; + } + + XMEMCPY(entry->name, &input[nameIdx], strLength); + entry->nameSz = strLength; + entry->type = bType; + + entry->next = *head; + *head = entry; + } + + idx += seqLength; + } + + return 0; +} + + +static int DecodeNameConstraints(const byte* input, int sz, DecodedCert* cert) +{ + word32 idx = 0; + int length = 0; + + WOLFSSL_ENTER("DecodeNameConstraints"); + + if (GetSequence(input, &idx, &length, sz) < 0) { + WOLFSSL_MSG("\tfail: should be a SEQUENCE"); + return ASN_PARSE_E; + } + + while (idx < (word32)sz) { + byte b = input[idx++]; + Base_entry** subtree = NULL; + + if (GetLength(input, &idx, &length, sz) <= 0) { + WOLFSSL_MSG("\tinvalid length"); + return ASN_PARSE_E; + } + + if (b == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 0)) + subtree = &cert->permittedNames; + else if (b == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 1)) + subtree = &cert->excludedNames; + else { + WOLFSSL_MSG("\tinvalid subtree"); + return ASN_PARSE_E; + } + + DecodeSubtree(input + idx, length, subtree, cert->heap); + + idx += length; + } + + return 0; +} +#endif /* IGNORE_NAME_CONSTRAINTS */ + +#if (defined(WOLFSSL_CERT_EXT) && !defined(WOLFSSL_SEP)) || defined(OPENSSL_EXTRA) + +/* Decode ITU-T X.690 OID format to a string representation + * return string length */ +int DecodePolicyOID(char *out, word32 outSz, const byte *in, word32 inSz) +{ + word32 val, inIdx = 0, outIdx = 0; + int w = 0; + + if (out == NULL || in == NULL || outSz < 4 || inSz < 2) + return BAD_FUNC_ARG; + + /* The first byte expands into b/40 dot b%40. */ + val = in[inIdx++]; + + w = XSNPRINTF(out, outSz, "%u.%u", val / 40, val % 40); + if (w < 0) + goto exit; + outIdx += w; + val = 0; + + while (inIdx < inSz && outIdx < outSz) { + /* extract the next OID digit from in to val */ + /* first bit is used to set if value is coded on 1 or multiple bytes */ + if (in[inIdx] & 0x80) { + val += in[inIdx] & 0x7F; + val *= 128; + } + else { + /* write val as text into out */ + val += in[inIdx]; + w = XSNPRINTF(out + outIdx, outSz - outIdx, ".%u", val); + if (w < 0) + goto exit; + outIdx += w; + val = 0; + } + inIdx++; + } + if (outIdx == outSz) + outIdx--; + out[outIdx] = 0; + + w = (int)outIdx; + +exit: + return w; +} +#endif /* WOLFSSL_CERT_EXT && !WOLFSSL_SEP */ + +#if defined(WOLFSSL_SEP) || defined(WOLFSSL_CERT_EXT) || defined(WOLFSSL_QT) + /* Reference: https://tools.ietf.org/html/rfc5280#section-4.2.1.4 */ + static int DecodeCertPolicy(const byte* input, int sz, DecodedCert* cert) + { + word32 idx = 0; + word32 oldIdx; + int ret; + int total_length = 0, policy_length = 0, length = 0; + #if !defined(WOLFSSL_SEP) && defined(WOLFSSL_CERT_EXT) && \ + !defined(WOLFSSL_DUP_CERTPOL) + int i; + #endif + + WOLFSSL_ENTER("DecodeCertPolicy"); + #if defined(WOLFSSL_SEP) || defined(WOLFSSL_CERT_EXT) + /* Check if cert is null before dereferencing below */ + if (cert == NULL) + return BAD_FUNC_ARG; + #endif + + #if defined(WOLFSSL_CERT_EXT) + cert->extCertPoliciesNb = 0; + #endif + + if (GetSequence(input, &idx, &total_length, sz) < 0) { + WOLFSSL_MSG("\tGet CertPolicy total seq failed"); + return ASN_PARSE_E; + } + + /* Validate total length */ + if (total_length > (sz - (int)idx)) { + WOLFSSL_MSG("\tCertPolicy length mismatch"); + return ASN_PARSE_E; + } + + /* Unwrap certificatePolicies */ + do { + if (GetSequence(input, &idx, &policy_length, sz) < 0) { + WOLFSSL_MSG("\tGet CertPolicy seq failed"); + return ASN_PARSE_E; + } + + oldIdx = idx; + ret = GetASNObjectId(input, &idx, &length, sz); + if (ret != 0) + return ret; + policy_length -= idx - oldIdx; + + if (length > 0) { + /* Verify length won't overrun buffer */ + if (length > (sz - (int)idx)) { + WOLFSSL_MSG("\tCertPolicy length exceeds input buffer"); + return ASN_PARSE_E; + } + + #if defined(WOLFSSL_SEP) + cert->deviceType = (byte*)XMALLOC(length, cert->heap, + DYNAMIC_TYPE_X509_EXT); + if (cert->deviceType == NULL) { + WOLFSSL_MSG("\tCouldn't alloc memory for deviceType"); + return MEMORY_E; + } + cert->deviceTypeSz = length; + XMEMCPY(cert->deviceType, input + idx, length); + break; + #elif defined(WOLFSSL_CERT_EXT) + /* decode cert policy */ + if (DecodePolicyOID(cert->extCertPolicies[ + cert->extCertPoliciesNb], MAX_CERTPOL_SZ, + input + idx, length) <= 0) { + WOLFSSL_MSG("\tCouldn't decode CertPolicy"); + return ASN_PARSE_E; + } + #ifndef WOLFSSL_DUP_CERTPOL + /* From RFC 5280 section 4.2.1.3 "A certificate policy OID MUST + * NOT appear more than once in a certificate policies + * extension". This is a sanity check for duplicates. + * extCertPolicies should only have OID values, additional + * qualifiers need to be stored in a separate array. */ + for (i = 0; i < cert->extCertPoliciesNb; i++) { + if (XMEMCMP(cert->extCertPolicies[i], + cert->extCertPolicies[cert->extCertPoliciesNb], + MAX_CERTPOL_SZ) == 0) { + WOLFSSL_MSG("Duplicate policy OIDs not allowed"); + WOLFSSL_MSG("Use WOLFSSL_DUP_CERTPOL if wanted"); + return CERTPOLICIES_E; + } + } + #endif /* !WOLFSSL_DUP_CERTPOL */ + cert->extCertPoliciesNb++; + #else + WOLFSSL_LEAVE("DecodeCertPolicy : unsupported mode", 0); + return 0; + #endif + } + idx += policy_length; + } while((int)idx < total_length + #if defined(WOLFSSL_CERT_EXT) + && cert->extCertPoliciesNb < MAX_CERTPOL_NB + #endif + ); + + WOLFSSL_LEAVE("DecodeCertPolicy", 0); + return 0; + } +#endif /* WOLFSSL_SEP */ + +/* Macro to check if bit is set, if not sets and return success. + Otherwise returns failure */ +/* Macro required here because bit-field operation */ +#ifndef WOLFSSL_NO_ASN_STRICT + #define VERIFY_AND_SET_OID(bit) \ + if (bit == 0) \ + bit = 1; \ + else \ + return ASN_OBJECT_ID_E; +#else + /* With no strict defined, the verify is skipped */ +#define VERIFY_AND_SET_OID(bit) bit = 1; +#endif + +static int DecodeCertExtensions(DecodedCert* cert) +/* + * Processing the Certificate Extensions. This does not modify the current + * index. It is works starting with the recorded extensions pointer. + */ +{ + int ret = 0; + word32 idx = 0; + int sz = cert->extensionsSz; + const byte* input = cert->extensions; + int length; + word32 oid; + byte critical = 0; + byte criticalFail = 0; + byte tag = 0; + + WOLFSSL_ENTER("DecodeCertExtensions"); + + if (input == NULL || sz == 0) + return BAD_FUNC_ARG; + + if (GetASNTag(input, &idx, &tag, sz) < 0) { + return ASN_PARSE_E; + } + + if (tag != ASN_EXTENSIONS) { + WOLFSSL_MSG("\tfail: should be an EXTENSIONS"); + return ASN_PARSE_E; + } + + if (GetLength(input, &idx, &length, sz) < 0) { + WOLFSSL_MSG("\tfail: invalid length"); + return ASN_PARSE_E; + } + + if (GetSequence(input, &idx, &length, sz) < 0) { + WOLFSSL_MSG("\tfail: should be a SEQUENCE (1)"); + return ASN_PARSE_E; + } + + while (idx < (word32)sz) { + word32 localIdx; + + if (GetSequence(input, &idx, &length, sz) < 0) { + WOLFSSL_MSG("\tfail: should be a SEQUENCE"); + return ASN_PARSE_E; + } + + oid = 0; + if ((ret = GetObjectId(input, &idx, &oid, oidCertExtType, sz)) < 0) { + WOLFSSL_MSG("\tfail: OBJECT ID"); + return ret; + } + + /* check for critical flag */ + critical = 0; + if ((idx + 1) > (word32)sz) { + WOLFSSL_MSG("\tfail: malformed buffer"); + return BUFFER_E; + } + + localIdx = idx; + if (GetASNTag(input, &localIdx, &tag, sz) == 0) { + if (tag == ASN_BOOLEAN) { + ret = GetBoolean(input, &idx, sz); + if (ret < 0) { + WOLFSSL_MSG("\tfail: critical boolean"); + return ret; + } + + critical = (byte)ret; + } + } + + /* process the extension based on the OID */ + ret = GetOctetString(input, &idx, &length, sz); + if (ret < 0) { + WOLFSSL_MSG("\tfail: bad OCTET STRING"); + return ret; + } + + switch (oid) { + case BASIC_CA_OID: + VERIFY_AND_SET_OID(cert->extBasicConstSet); + #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) + cert->extBasicConstCrit = critical; + #endif + if (DecodeBasicCaConstraint(&input[idx], length, cert) < 0) + return ASN_PARSE_E; + break; + + case CRL_DIST_OID: + VERIFY_AND_SET_OID(cert->extCRLdistSet); + #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) + cert->extCRLdistCrit = critical; + #endif + if (DecodeCrlDist(&input[idx], length, cert) < 0) + return ASN_PARSE_E; + break; + + case AUTH_INFO_OID: + VERIFY_AND_SET_OID(cert->extAuthInfoSet); + #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) + cert->extAuthInfoCrit = critical; + #endif + if (DecodeAuthInfo(&input[idx], length, cert) < 0) + return ASN_PARSE_E; + break; + + case ALT_NAMES_OID: + VERIFY_AND_SET_OID(cert->extSubjAltNameSet); + #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) + cert->extSubjAltNameCrit = critical; + #endif + ret = DecodeAltNames(&input[idx], length, cert); + if (ret < 0) + return ret; + break; + + case AUTH_KEY_OID: + VERIFY_AND_SET_OID(cert->extAuthKeyIdSet); + #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) + cert->extAuthKeyIdCrit = critical; + #endif + #ifndef WOLFSSL_ALLOW_CRIT_SKID + /* This check is added due to RFC 5280 section 4.2.1.1 + * stating that conforming CA's must mark this extension + * as non-critical. When parsing extensions check that + * certificate was made in compliance with this. */ + if (critical) { + WOLFSSL_MSG("Critical Auth Key ID is not allowed"); + WOLFSSL_MSG("Use macro WOLFSSL_ALLOW_CRIT_SKID if wanted"); + return ASN_CRIT_EXT_E; + } + #endif + if (DecodeAuthKeyId(&input[idx], length, cert) < 0) + return ASN_PARSE_E; + break; + + case SUBJ_KEY_OID: + VERIFY_AND_SET_OID(cert->extSubjKeyIdSet); + #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) + cert->extSubjKeyIdCrit = critical; + #endif + #ifndef WOLFSSL_ALLOW_CRIT_SKID + /* This check is added due to RFC 5280 section 4.2.1.2 + * stating that conforming CA's must mark this extension + * as non-critical. When parsing extensions check that + * certificate was made in compliance with this. */ + if (critical) { + WOLFSSL_MSG("Critical Subject Key ID is not allowed"); + WOLFSSL_MSG("Use macro WOLFSSL_ALLOW_CRIT_SKID if wanted"); + return ASN_CRIT_EXT_E; + } + #endif + + if (DecodeSubjKeyId(&input[idx], length, cert) < 0) + return ASN_PARSE_E; + break; + + case CERT_POLICY_OID: + #if defined(WOLFSSL_SEP) || defined(WOLFSSL_QT) + VERIFY_AND_SET_OID(cert->extCertPolicySet); + #if defined(OPENSSL_EXTRA) || \ + defined(OPENSSL_EXTRA_X509_SMALL) + cert->extCertPolicyCrit = critical; + #endif + #endif + #if defined(WOLFSSL_SEP) || defined(WOLFSSL_CERT_EXT) || \ + defined(WOLFSSL_QT) + if (DecodeCertPolicy(&input[idx], length, cert) < 0) { + return ASN_PARSE_E; + } + #else + WOLFSSL_MSG("Certificate Policy extension not supported yet."); + #endif + break; + + case KEY_USAGE_OID: + VERIFY_AND_SET_OID(cert->extKeyUsageSet); + #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) + cert->extKeyUsageCrit = critical; + #endif + if (DecodeKeyUsage(&input[idx], length, cert) < 0) + return ASN_PARSE_E; + break; + + case EXT_KEY_USAGE_OID: + VERIFY_AND_SET_OID(cert->extExtKeyUsageSet); + #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) + cert->extExtKeyUsageCrit = critical; + #endif + if (DecodeExtKeyUsage(&input[idx], length, cert) < 0) + return ASN_PARSE_E; + break; + + #ifndef IGNORE_NAME_CONSTRAINTS + case NAME_CONS_OID: + #ifndef WOLFSSL_NO_ASN_STRICT + /* Verify RFC 5280 Sec 4.2.1.10 rule: + "The name constraints extension, + which MUST be used only in a CA certificate" */ + if (!cert->isCA) { + WOLFSSL_MSG("Name constraints allowed only for CA certs"); + return ASN_NAME_INVALID_E; + } + #endif + VERIFY_AND_SET_OID(cert->extNameConstraintSet); + #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL) + cert->extNameConstraintCrit = critical; + #endif + if (DecodeNameConstraints(&input[idx], length, cert) < 0) + return ASN_PARSE_E; + break; + #endif /* IGNORE_NAME_CONSTRAINTS */ + + case INHIBIT_ANY_OID: + VERIFY_AND_SET_OID(cert->inhibitAnyOidSet); + WOLFSSL_MSG("Inhibit anyPolicy extension not supported yet."); + break; + + #ifndef IGNORE_NETSCAPE_CERT_TYPE + case NETSCAPE_CT_OID: + WOLFSSL_MSG("Netscape certificate type extension not supported " + "yet."); + if (CheckBitString(input, &idx, &length, idx + length, 0, + NULL) < 0) { + return ASN_PARSE_E; + } + break; + #endif + + default: + #ifndef WOLFSSL_NO_ASN_STRICT + /* While it is a failure to not support critical extensions, + * still parse the certificate ignoring the unsupported + * extension to allow caller to accept it with the verify + * callback. */ + if (critical) + criticalFail = 1; + #endif + break; + } + idx += length; + } + + return criticalFail ? ASN_CRIT_EXT_E : 0; +} + +int ParseCert(DecodedCert* cert, int type, int verify, void* cm) +{ + int ret; + char* ptr; + + ret = ParseCertRelative(cert, type, verify, cm); + if (ret < 0) + return ret; + + if (cert->subjectCNLen > 0) { + ptr = (char*) XMALLOC(cert->subjectCNLen + 1, cert->heap, + DYNAMIC_TYPE_SUBJECT_CN); + if (ptr == NULL) + return MEMORY_E; + XMEMCPY(ptr, cert->subjectCN, cert->subjectCNLen); + ptr[cert->subjectCNLen] = '\0'; + cert->subjectCN = ptr; + cert->subjectCNStored = 1; + } + + if (cert->keyOID == RSAk && + cert->publicKey != NULL && cert->pubKeySize > 0) { + ptr = (char*) XMALLOC(cert->pubKeySize, cert->heap, + DYNAMIC_TYPE_PUBLIC_KEY); + if (ptr == NULL) + return MEMORY_E; + XMEMCPY(ptr, cert->publicKey, cert->pubKeySize); + cert->publicKey = (byte *)ptr; + cert->pubKeyStored = 1; + } + + return ret; +} + +/* from SSL proper, for locking can't do find here anymore */ +#ifdef __cplusplus + extern "C" { +#endif + Signer* GetCA(void* signers, byte* hash); + #ifndef NO_SKID + Signer* GetCAByName(void* signers, byte* hash); + #endif +#ifdef __cplusplus + } +#endif + +#if defined(WOLFCRYPT_ONLY) || defined(NO_CERTS) + +/* dummy functions, not using wolfSSL so don't need actual ones */ +Signer* GetCA(void* signers, byte* hash) +{ + (void)hash; + + return (Signer*)signers; +} + +#ifndef NO_SKID +Signer* GetCAByName(void* signers, byte* hash) +{ + (void)hash; + + return (Signer*)signers; +} +#endif /* NO_SKID */ + +#endif /* WOLFCRYPT_ONLY || NO_CERTS */ + +#if defined(WOLFSSL_NO_TRUSTED_CERTS_VERIFY) && !defined(NO_SKID) +static Signer* GetCABySubjectAndPubKey(DecodedCert* cert, void* cm) +{ + Signer* ca = NULL; + if (cert->extSubjKeyIdSet) + ca = GetCA(cm, cert->extSubjKeyId); + if (ca == NULL) + ca = GetCAByName(cm, cert->subjectHash); + if (ca) { + if ((ca->pubKeySize == cert->pubKeySize) && + (XMEMCMP(ca->publicKey, cert->publicKey, ca->pubKeySize) == 0)) { + return ca; + } + } + return NULL; +} +#endif + +#if defined(WOLFSSL_SMALL_CERT_VERIFY) || defined(OPENSSL_EXTRA) +/* Only quick step through the certificate to find fields that are then used + * in certificate signature verification. + * Must use the signature OID from the signed part of the certificate. + * + * This is only for minimizing dynamic memory usage during TLS certificate + * chain processing. + * Doesn't support: + * OCSP Only: alt lookup using subject and pub key w/o sig check + */ +static int CheckCertSignature_ex(const byte* cert, word32 certSz, void* heap, + void* cm, const byte* pubKey, word32 pubKeySz, int pubKeyOID) +{ +#ifndef WOLFSSL_SMALL_STACK + SignatureCtx sigCtx[1]; +#else + SignatureCtx* sigCtx; +#endif + byte hash[KEYID_SIZE]; + Signer* ca = NULL; + word32 idx = 0; + int len; + word32 tbsCertIdx = 0; + word32 sigIndex = 0; + word32 signatureOID = 0; + word32 oid = 0; + word32 issuerIdx = 0; + word32 issuerSz = 0; +#ifndef NO_SKID + int extLen = 0; + word32 extIdx = 0; + word32 extEndIdx = 0; + int extAuthKeyIdSet = 0; +#endif + int ret = 0; + word32 localIdx; + byte tag; + + + if (cert == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef WOLFSSL_SMALL_STACK + sigCtx = (SignatureCtx*)XMALLOC(sizeof(*sigCtx), heap, DYNAMIC_TYPE_SIGNATURE); + if (sigCtx == NULL) + return MEMORY_E; +#endif + InitSignatureCtx(sigCtx, heap, INVALID_DEVID); + + /* Certificate SEQUENCE */ + if (GetSequence(cert, &idx, &len, certSz) < 0) + ret = ASN_PARSE_E; + if (ret == 0) { + tbsCertIdx = idx; + + /* TBSCertificate SEQUENCE */ + if (GetSequence(cert, &idx, &len, certSz) < 0) + ret = ASN_PARSE_E; + } + if (ret == 0) { + sigIndex = len + idx; + + if ((idx + 1) > certSz) + ret = BUFFER_E; + } + if (ret == 0) { + /* version - optional */ + localIdx = idx; + if (GetASNTag(cert, &localIdx, &tag, certSz) == 0) { + if (tag == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED)) { + idx++; + if (GetLength(cert, &idx, &len, certSz) < 0) + ret = ASN_PARSE_E; + idx += len; + } + } + } + + if (ret == 0) { + /* serialNumber */ + if (GetASNHeader(cert, ASN_INTEGER, &idx, &len, certSz) < 0) + ret = ASN_PARSE_E; + } + if (ret == 0) { + idx += len; + + /* signature */ + if (GetAlgoId(cert, &idx, &signatureOID, oidSigType, certSz) < 0) + ret = ASN_PARSE_E; + } + + if (ret == 0) { + issuerIdx = idx; + /* issuer */ + if (GetSequence(cert, &idx, &len, certSz) < 0) + ret = ASN_PARSE_E; + } + if (ret == 0) { + issuerSz = len + idx - issuerIdx; + } +#ifndef NO_SKID + if (ret == 0) { + idx += len; + + /* validity */ + if (GetSequence(cert, &idx, &len, certSz) < 0) + ret = ASN_PARSE_E; + } + if (ret == 0) { + idx += len; + + /* subject */ + if (GetSequence(cert, &idx, &len, certSz) < 0) + ret = ASN_PARSE_E; + } + if (ret == 0) { + idx += len; + + /* subjectPublicKeyInfo */ + if (GetSequence(cert, &idx, &len, certSz) < 0) + ret = ASN_PARSE_E; + } + if (ret == 0) { + idx += len; + + if ((idx + 1) > certSz) + ret = BUFFER_E; + } + if (ret == 0) { + /* issuerUniqueID - optional */ + localIdx = idx; + if (GetASNTag(cert, &localIdx, &tag, certSz) == 0) { + if (tag == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 1)) { + idx++; + if (GetLength(cert, &idx, &len, certSz) < 0) + ret = ASN_PARSE_E; + idx += len; + } + } + } + if (ret == 0) { + if ((idx + 1) > certSz) + ret = BUFFER_E; + } + if (ret == 0) { + /* subjectUniqueID - optional */ + localIdx = idx; + if (GetASNTag(cert, &localIdx, &tag, certSz) == 0) { + if (tag == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 2)) { + idx++; + if (GetLength(cert, &idx, &len, certSz) < 0) + ret = ASN_PARSE_E; + idx += len; + } + } + } + + if (ret == 0) { + if ((idx + 1) > certSz) + ret = BUFFER_E; + } + /* extensions - optional */ + localIdx = idx; + if (ret == 0 && GetASNTag(cert, &localIdx, &tag, certSz) == 0 && + tag == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 3)) { + idx++; + if (GetLength(cert, &idx, &extLen, certSz) < 0) + ret = ASN_PARSE_E; + if (ret == 0) { + if (GetSequence(cert, &idx, &extLen, certSz) < 0) + ret = ASN_PARSE_E; + } + if (ret == 0) { + extEndIdx = idx + extLen; + + /* Check each extension for the ones we want. */ + while (ret == 0 && idx < extEndIdx) { + if (GetSequence(cert, &idx, &len, certSz) < 0) + ret = ASN_PARSE_E; + if (ret == 0) { + extIdx = idx; + if (GetObjectId(cert, &extIdx, &oid, oidCertExtType, + certSz) < 0) { + ret = ASN_PARSE_E; + } + + if (ret == 0) { + if ((extIdx + 1) > certSz) + ret = BUFFER_E; + } + } + + if (ret == 0) { + localIdx = extIdx; + if (GetASNTag(cert, &localIdx, &tag, certSz) == 0 && + tag == ASN_BOOLEAN) { + if (GetBoolean(cert, &extIdx, certSz) < 0) + ret = ASN_PARSE_E; + } + } + if (ret == 0) { + if (GetOctetString(cert, &extIdx, &extLen, certSz) < 0) + ret = ASN_PARSE_E; + } + + if (ret == 0) { + switch (oid) { + case AUTH_KEY_OID: + if (GetSequence(cert, &extIdx, &extLen, certSz) < 0) + ret = ASN_PARSE_E; + + if (ret == 0 && (extIdx + 1) >= certSz) + ret = BUFFER_E; + + if (ret == 0 && + GetASNTag(cert, &extIdx, &tag, certSz) == 0 && + tag == (ASN_CONTEXT_SPECIFIC | 0)) { + if (GetLength(cert, &extIdx, &extLen, certSz) <= 0) + ret = ASN_PARSE_E; + if (ret == 0) { + extAuthKeyIdSet = 1; + if (extLen == KEYID_SIZE) + XMEMCPY(hash, cert + extIdx, extLen); + else { + ret = CalcHashId(cert + extIdx, extLen, + hash); + } + } + } + break; + + default: + break; + } + } + idx += len; + } + } + } + + if (ret == 0 && pubKey == NULL) { + if (extAuthKeyIdSet) + ca = GetCA(cm, hash); + if (ca == NULL) { + ret = CalcHashId(cert + issuerIdx, issuerSz, hash); + if (ret == 0) + ca = GetCAByName(cm, hash); + } + } +#else + if (ret == 0 && pubKey == NULL) { + ret = CalcHashId(cert + issuerIdx, issuerSz, hash); + if (ret == 0) + ca = GetCA(cm, hash); + } +#endif /* !NO_SKID */ + if (ca == NULL && pubKey == NULL) + ret = ASN_NO_SIGNER_E; + + if (ret == 0) { + idx = sigIndex; + /* signatureAlgorithm */ + if (GetAlgoId(cert, &idx, &oid, oidSigType, certSz) < 0) + ret = ASN_PARSE_E; + } + if (ret == 0) { + if (oid != signatureOID) + ret = ASN_SIG_OID_E; + } + if (ret == 0) { + /* signatureValue */ + if (CheckBitString(cert, &idx, &len, certSz, 1, NULL) < 0) + ret = ASN_PARSE_E; + } + + if (ret == 0) { + if (pubKey != NULL) { + ret = ConfirmSignature(sigCtx, cert + tbsCertIdx, + sigIndex - tbsCertIdx, + pubKey, pubKeySz, pubKeyOID, + cert + idx, len, signatureOID, NULL); + } + else { + ret = ConfirmSignature(sigCtx, cert + tbsCertIdx, + sigIndex - tbsCertIdx, + ca->publicKey, ca->pubKeySize, ca->keyOID, + cert + idx, len, signatureOID, NULL); + } + if (ret != 0) { + WOLFSSL_MSG("Confirm signature failed"); + } + } + + FreeSignatureCtx(sigCtx); +#ifdef WOLFSSL_SMALL_STACK + if (sigCtx != NULL) + XFREE(sigCtx, heap, DYNAMIC_TYPE_SIGNATURE); +#endif + return ret; +} + +#ifdef OPENSSL_EXTRA +/* Call CheckCertSignature_ex using a public key buffer for verification + */ +int CheckCertSignaturePubKey(const byte* cert, word32 certSz, void* heap, + const byte* pubKey, word32 pubKeySz, int pubKeyOID) +{ + return CheckCertSignature_ex(cert, certSz, heap, NULL, + pubKey, pubKeySz, pubKeyOID); +} +#endif /* OPENSSL_EXTRA */ +#ifdef WOLFSSL_SMALL_CERT_VERIFY +/* Call CheckCertSignature_ex using a certificate manager (cm) + */ +int CheckCertSignature(const byte* cert, word32 certSz, void* heap, void* cm) +{ + return CheckCertSignature_ex(cert, certSz, heap, cm, NULL, 0, 0); +} +#endif /* WOLFSSL_SMALL_CERT_VERIFY */ +#endif /* WOLFSSL_SMALL_CERT_VERIFY || OPENSSL_EXTRA */ + +int ParseCertRelative(DecodedCert* cert, int type, int verify, void* cm) +{ + int ret = 0; + int checkPathLen = 0; + int decrementMaxPathLen = 0; + word32 confirmOID; +#if defined(WOLFSSL_RENESAS_TSIP) + int idx = 0; +#endif + byte* tsip_encRsaKeyIdx; + + if (cert == NULL) { + return BAD_FUNC_ARG; + } + + if (cert->sigCtx.state == SIG_STATE_BEGIN) { + cert->badDate = 0; + cert->criticalExt = 0; + if ((ret = DecodeToKey(cert, verify)) < 0) { + if (ret == ASN_BEFORE_DATE_E || ret == ASN_AFTER_DATE_E) + cert->badDate = ret; + else + return ret; + } + + WOLFSSL_MSG("Parsed Past Key"); + + if (cert->srcIdx < cert->sigIndex) { + #ifndef ALLOW_V1_EXTENSIONS + if (cert->version < 2) { + WOLFSSL_MSG("\tv1 and v2 certs not allowed extensions"); + return ASN_VERSION_E; + } + #endif + + /* save extensions */ + cert->extensions = &cert->source[cert->srcIdx]; + cert->extensionsSz = cert->sigIndex - cert->srcIdx; + cert->extensionsIdx = cert->srcIdx; /* for potential later use */ + + if ((ret = DecodeCertExtensions(cert)) < 0) { + if (ret == ASN_CRIT_EXT_E) + cert->criticalExt = ret; + else + return ret; + } + + /* advance past extensions */ + cert->srcIdx = cert->sigIndex; + } + + if ((ret = GetAlgoId(cert->source, &cert->srcIdx, &confirmOID, + oidSigType, cert->maxIdx)) < 0) + return ret; + + if ((ret = GetSignature(cert)) < 0) + return ret; + + if (confirmOID != cert->signatureOID) + return ASN_SIG_OID_E; + + #ifndef NO_SKID + if (cert->extSubjKeyIdSet == 0 && cert->publicKey != NULL && + cert->pubKeySize > 0) { + ret = CalcHashId(cert->publicKey, cert->pubKeySize, + cert->extSubjKeyId); + if (ret != 0) + return ret; + } + #endif /* !NO_SKID */ + + if (!cert->selfSigned || (verify != NO_VERIFY && type != CA_TYPE && + type != TRUSTED_PEER_TYPE)) { + cert->ca = NULL; + #ifndef NO_SKID + if (cert->extAuthKeyIdSet) { + cert->ca = GetCA(cm, cert->extAuthKeyId); + } + if (cert->ca == NULL && cert->extSubjKeyIdSet + && verify != VERIFY_OCSP) { + cert->ca = GetCA(cm, cert->extSubjKeyId); + } + if (cert->ca != NULL && XMEMCMP(cert->issuerHash, + cert->ca->subjectNameHash, KEYID_SIZE) != 0) { + cert->ca = NULL; + } + if (cert->ca == NULL) { + cert->ca = GetCAByName(cm, cert->issuerHash); + /* If AKID is available then this CA doesn't have the public + * key required */ + if (cert->ca && cert->extAuthKeyIdSet) { + WOLFSSL_MSG("CA SKID doesn't match AKID"); + cert->ca = NULL; + } + } + + /* OCSP Only: alt lookup using subject and pub key w/o sig check */ + #ifdef WOLFSSL_NO_TRUSTED_CERTS_VERIFY + if (cert->ca == NULL && verify == VERIFY_OCSP) { + cert->ca = GetCABySubjectAndPubKey(cert, cm); + if (cert->ca) { + ret = 0; /* success */ + goto exit_pcr; + } + } + #endif /* WOLFSSL_NO_TRUSTED_CERTS_VERIFY */ + #else + cert->ca = GetCA(cm, cert->issuerHash); + #endif /* !NO_SKID */ + } + + if (cert->selfSigned) { + cert->maxPathLen = WOLFSSL_MAX_PATH_LEN; + } else { + /* RFC 5280 Section 4.2.1.9: + * + * load/receive check + * + * 1) Is CA boolean set? + * No - SKIP CHECK + * Yes - Check key usage + * 2) Is Key usage extension present? + * No - goto 3 + * Yes - check keyCertSign assertion + * 2.a) Is keyCertSign asserted? + * No - goto 4 + * Yes - goto 3 + * 3) Is pathLen set? + * No - goto 4 + * Yes - check pathLen against maxPathLen. + * 3.a) Is pathLen less than maxPathLen? + * No - goto 4 + * Yes - set maxPathLen to pathLen and EXIT + * 4) Is maxPathLen > 0? + * Yes - Reduce by 1 + * No - ERROR + */ + + if (cert->ca && cert->pathLengthSet) { + cert->maxPathLen = cert->pathLength; + if (cert->isCA) { + WOLFSSL_MSG("\tCA boolean set"); + if (cert->extKeyUsageSet) { + WOLFSSL_MSG("\tExtension Key Usage Set"); + if ((cert->extKeyUsage & KEYUSE_KEY_CERT_SIGN) != 0) { + checkPathLen = 1; + } else { + decrementMaxPathLen = 1; + } + } else { + checkPathLen = 1; + } /* !cert->ca check */ + } /* cert is not a CA (assuming entity cert) */ + + if (checkPathLen && cert->pathLengthSet) { + if (cert->pathLength < cert->ca->maxPathLen) { + WOLFSSL_MSG("\tmaxPathLen status: set to pathLength"); + cert->maxPathLen = cert->pathLength; + } else { + decrementMaxPathLen = 1; + } + } + + if (decrementMaxPathLen && cert->ca->maxPathLen > 0) { + WOLFSSL_MSG("\tmaxPathLen status: reduce by 1"); + cert->maxPathLen = cert->ca->maxPathLen - 1; + if (verify != NO_VERIFY && type != CA_TYPE && + type != TRUSTED_PEER_TYPE) { + WOLFSSL_MSG("\tmaxPathLen status: OK"); + } + } else if (decrementMaxPathLen && cert->ca->maxPathLen == 0) { + cert->maxPathLen = 0; + if (verify != NO_VERIFY && type != CA_TYPE && + type != TRUSTED_PEER_TYPE) { + WOLFSSL_MSG("\tNon-entity cert, maxPathLen is 0"); + WOLFSSL_MSG("\tmaxPathLen status: ERROR"); + return ASN_PATHLEN_INV_E; + } + } + } else if (cert->ca && cert->isCA) { + /* case where cert->pathLength extension is not set */ + if (cert->ca->maxPathLen > 0) { + cert->maxPathLen = cert->ca->maxPathLen - 1; + } else { + cert->maxPathLen = 0; + if (verify != NO_VERIFY && type != CA_TYPE && + type != TRUSTED_PEER_TYPE) { + WOLFSSL_MSG("\tNon-entity cert, maxPathLen is 0"); + WOLFSSL_MSG("\tmaxPathLen status: ERROR"); + return ASN_PATHLEN_INV_E; + } + } + } + #ifdef HAVE_OCSP + if (verify != NO_VERIFY && type != CA_TYPE && + type != TRUSTED_PEER_TYPE) { + if (cert->ca) { + /* Need the CA's public key hash for OCSP */ + XMEMCPY(cert->issuerKeyHash, cert->ca->subjectKeyHash, + KEYID_SIZE); + } + + } + #endif /* HAVE_OCSP */ + } + } +#if defined(WOLFSSL_RENESAS_TSIP) + /* prepare for TSIP TLS cert verification API use */ + if (cert->keyOID == RSAk) { + /* to call TSIP API, it needs keys position info in bytes */ + if ((ret = RsaPublicKeyDecodeRawIndex(cert->publicKey, (word32*)&idx, + cert->pubKeySize, + &cert->sigCtx.pubkey_n_start, + &cert->sigCtx.pubkey_n_len, + &cert->sigCtx.pubkey_e_start, + &cert->sigCtx.pubkey_e_len)) != 0) { + WOLFSSL_MSG("Decoding index from cert failed."); + return ret; + } + cert->sigCtx.certBegin = cert->certBegin; + } + /* check if we can use TSIP for cert verification */ + /* if the ca is verified as tsip root ca. */ + /* TSIP can only handle 2048 bits(256 byte) key. */ + if (cert->ca && tsip_checkCA(cert->ca->cm_idx) != 0 && + cert->sigCtx.pubkey_n_len == 256) { + + /* assign memory to encrypted tsip Rsa key index */ + if (!cert->tsip_encRsaKeyIdx) + cert->tsip_encRsaKeyIdx = + (byte*)XMALLOC(TSIP_TLS_ENCPUBKEY_SZ_BY_CERTVRFY, + cert->heap, DYNAMIC_TYPE_RSA); + if (cert->tsip_encRsaKeyIdx == NULL) + return MEMORY_E; + } else { + if (cert->ca) { + /* TSIP isn't usable */ + if (tsip_checkCA(cert->ca->cm_idx) == 0) + WOLFSSL_MSG("TSIP isn't usable because the ca isn't verified " + "by TSIP."); + else if (cert->sigCtx.pubkey_n_len != 256) + WOLFSSL_MSG("TSIP isn't usable because the ca isn't signed by " + "RSA 2048."); + else + WOLFSSL_MSG("TSIP isn't usable"); + } + cert->tsip_encRsaKeyIdx = NULL; + } + + tsip_encRsaKeyIdx = cert->tsip_encRsaKeyIdx; +#else + tsip_encRsaKeyIdx = NULL; +#endif + + if (verify != NO_VERIFY && type != CA_TYPE && type != TRUSTED_PEER_TYPE) { + if (cert->ca) { + if (verify == VERIFY || verify == VERIFY_OCSP || + verify == VERIFY_SKIP_DATE) { + /* try to confirm/verify signature */ + if ((ret = ConfirmSignature(&cert->sigCtx, + cert->source + cert->certBegin, + cert->sigIndex - cert->certBegin, + cert->ca->publicKey, cert->ca->pubKeySize, + cert->ca->keyOID, cert->signature, + cert->sigLength, cert->signatureOID, + tsip_encRsaKeyIdx)) != 0) { + if (ret != 0 && ret != WC_PENDING_E) { + WOLFSSL_MSG("Confirm signature failed"); + } + return ret; + } + } + #ifndef IGNORE_NAME_CONSTRAINTS + if (verify == VERIFY || verify == VERIFY_OCSP || + verify == VERIFY_NAME || verify == VERIFY_SKIP_DATE) { + /* check that this cert's name is permitted by the signer's + * name constraints */ + if (!ConfirmNameConstraints(cert->ca, cert)) { + WOLFSSL_MSG("Confirm name constraint failed"); + return ASN_NAME_INVALID_E; + } + } + #endif /* IGNORE_NAME_CONSTRAINTS */ + } + else { + /* no signer */ + WOLFSSL_MSG("No CA signer to verify with"); + return ASN_NO_SIGNER_E; + } + } + +#if defined(WOLFSSL_NO_TRUSTED_CERTS_VERIFY) && !defined(NO_SKID) +exit_pcr: +#endif + + if (cert->badDate != 0) { + if (verify != VERIFY_SKIP_DATE) { + return cert->badDate; + } + WOLFSSL_MSG("Date error: Verify option is skipping"); + } + + if (cert->criticalExt != 0) + return cert->criticalExt; + + return ret; +} + +/* Create and init an new signer */ +Signer* MakeSigner(void* heap) +{ + Signer* signer = (Signer*) XMALLOC(sizeof(Signer), heap, + DYNAMIC_TYPE_SIGNER); + if (signer) { + XMEMSET(signer, 0, sizeof(Signer)); + } + (void)heap; + + return signer; +} + + +/* Free an individual signer */ +void FreeSigner(Signer* signer, void* heap) +{ + XFREE(signer->name, heap, DYNAMIC_TYPE_SUBJECT_CN); + XFREE((void*)signer->publicKey, heap, DYNAMIC_TYPE_PUBLIC_KEY); +#ifndef IGNORE_NAME_CONSTRAINTS + if (signer->permittedNames) + FreeNameSubtrees(signer->permittedNames, heap); + if (signer->excludedNames) + FreeNameSubtrees(signer->excludedNames, heap); +#endif +#ifdef WOLFSSL_SIGNER_DER_CERT + FreeDer(&signer->derCert); +#endif + XFREE(signer, heap, DYNAMIC_TYPE_SIGNER); + + (void)heap; +} + + +/* Free the whole singer table with number of rows */ +void FreeSignerTable(Signer** table, int rows, void* heap) +{ + int i; + + for (i = 0; i < rows; i++) { + Signer* signer = table[i]; + while (signer) { + Signer* next = signer->next; + FreeSigner(signer, heap); + signer = next; + } + table[i] = NULL; + } +} + +#ifdef WOLFSSL_TRUST_PEER_CERT +/* Free an individual trusted peer cert */ +void FreeTrustedPeer(TrustedPeerCert* tp, void* heap) +{ + if (tp == NULL) { + return; + } + + if (tp->name) { + XFREE(tp->name, heap, DYNAMIC_TYPE_SUBJECT_CN); + } + + if (tp->sig) { + XFREE(tp->sig, heap, DYNAMIC_TYPE_SIGNATURE); + } +#ifndef IGNORE_NAME_CONSTRAINTS + if (tp->permittedNames) + FreeNameSubtrees(tp->permittedNames, heap); + if (tp->excludedNames) + FreeNameSubtrees(tp->excludedNames, heap); +#endif + XFREE(tp, heap, DYNAMIC_TYPE_CERT); + + (void)heap; +} + +/* Free the whole Trusted Peer linked list */ +void FreeTrustedPeerTable(TrustedPeerCert** table, int rows, void* heap) +{ + int i; + + for (i = 0; i < rows; i++) { + TrustedPeerCert* tp = table[i]; + while (tp) { + TrustedPeerCert* next = tp->next; + FreeTrustedPeer(tp, heap); + tp = next; + } + table[i] = NULL; + } +} +#endif /* WOLFSSL_TRUST_PEER_CERT */ + +int SetMyVersion(word32 version, byte* output, int header) +{ + int i = 0; + + if (output == NULL) + return BAD_FUNC_ARG; + + if (header) { + output[i++] = ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED; + output[i++] = 3; + } + output[i++] = ASN_INTEGER; + output[i++] = 0x01; + output[i++] = (byte)version; + + return i; +} + +int SetSerialNumber(const byte* sn, word32 snSz, byte* output, + word32 outputSz, int maxSnSz) +{ + int i; + int snSzInt = (int)snSz; + + if (sn == NULL || output == NULL || snSzInt < 0) + return BAD_FUNC_ARG; + + /* remove leading zeros */ + while (snSzInt > 0 && sn[0] == 0) { + snSzInt--; + sn++; + } + /* RFC 5280 - 4.1.2.2: + * Serial numbers must be a positive value (and not zero) */ + if (snSzInt == 0) + return BAD_FUNC_ARG; + + if (sn[0] & 0x80) + maxSnSz--; + /* truncate if input is too long */ + if (snSzInt > maxSnSz) + snSzInt = maxSnSz; + + i = SetASNInt(snSzInt, sn[0], NULL); + /* truncate if input is too long */ + if (snSzInt > (int)outputSz - i) + snSzInt = (int)outputSz - i; + /* sanity check number of bytes to copy */ + if (snSzInt <= 0) { + return BUFFER_E; + } + + /* write out ASN.1 Integer */ + (void)SetASNInt(snSzInt, sn[0], output); + XMEMCPY(output + i, sn, snSzInt); + + /* compute final length */ + i += snSzInt; + + return i; +} + +#endif /* !NO_CERTS */ + +int GetSerialNumber(const byte* input, word32* inOutIdx, + byte* serial, int* serialSz, word32 maxIdx) +{ + int result = 0; + int ret; + + WOLFSSL_ENTER("GetSerialNumber"); + + if (serial == NULL || input == NULL || serialSz == NULL) { + return BAD_FUNC_ARG; + } + + /* First byte is ASN type */ + if ((*inOutIdx+1) > maxIdx) { + WOLFSSL_MSG("Bad idx first"); + return BUFFER_E; + } + + ret = GetASNInt(input, inOutIdx, serialSz, maxIdx); + if (ret != 0) + return ret; + + if (*serialSz > EXTERNAL_SERIAL_SIZE) { + WOLFSSL_MSG("Serial size bad"); + return ASN_PARSE_E; + } + + /* return serial */ + XMEMCPY(serial, &input[*inOutIdx], *serialSz); + *inOutIdx += *serialSz; + + return result; +} + +#ifndef NO_CERTS + +int AllocDer(DerBuffer** pDer, word32 length, int type, void* heap) +{ + int ret = BAD_FUNC_ARG; + if (pDer) { + int dynType = 0; + DerBuffer* der; + + /* Determine dynamic type */ + switch (type) { + case CA_TYPE: dynType = DYNAMIC_TYPE_CA; break; + case CERT_TYPE: dynType = DYNAMIC_TYPE_CERT; break; + case CRL_TYPE: dynType = DYNAMIC_TYPE_CRL; break; + case DSA_TYPE: dynType = DYNAMIC_TYPE_DSA; break; + case ECC_TYPE: dynType = DYNAMIC_TYPE_ECC; break; + case RSA_TYPE: dynType = DYNAMIC_TYPE_RSA; break; + default: dynType = DYNAMIC_TYPE_KEY; break; + } + + /* Setup new buffer */ + *pDer = (DerBuffer*)XMALLOC(sizeof(DerBuffer) + length, heap, dynType); + if (*pDer == NULL) { + return MEMORY_E; + } + XMEMSET(*pDer, 0, sizeof(DerBuffer) + length); + + der = *pDer; + der->type = type; + der->dynType = dynType; /* Cache this for FreeDer */ + der->heap = heap; + der->buffer = (byte*)der + sizeof(DerBuffer); + der->length = length; + ret = 0; /* Success */ + } + return ret; +} + +void FreeDer(DerBuffer** pDer) +{ + if (pDer && *pDer) + { + DerBuffer* der = (DerBuffer*)*pDer; + + /* ForceZero private keys */ + if (der->type == PRIVATEKEY_TYPE) { + ForceZero(der->buffer, der->length); + } + der->buffer = NULL; + der->length = 0; + XFREE(der, der->heap, der->dynType); + + *pDer = NULL; + } +} + +int wc_AllocDer(DerBuffer** pDer, word32 length, int type, void* heap) +{ + return AllocDer(pDer, length, type, heap); +} +void wc_FreeDer(DerBuffer** pDer) +{ + FreeDer(pDer); +} + + +#if defined(WOLFSSL_PEM_TO_DER) || defined(WOLFSSL_DER_TO_PEM) + +/* Max X509 header length indicates the max length + 2 ('\n', '\0') */ +#define MAX_X509_HEADER_SZ (37 + 2) + +wcchar BEGIN_CERT = "-----BEGIN CERTIFICATE-----"; +wcchar END_CERT = "-----END CERTIFICATE-----"; +#ifdef WOLFSSL_CERT_REQ + wcchar BEGIN_CERT_REQ = "-----BEGIN CERTIFICATE REQUEST-----"; + wcchar END_CERT_REQ = "-----END CERTIFICATE REQUEST-----"; +#endif +#ifndef NO_DH + wcchar BEGIN_DH_PARAM = "-----BEGIN DH PARAMETERS-----"; + wcchar END_DH_PARAM = "-----END DH PARAMETERS-----"; +#endif +#ifndef NO_DSA + wcchar BEGIN_DSA_PARAM = "-----BEGIN DSA PARAMETERS-----"; + wcchar END_DSA_PARAM = "-----END DSA PARAMETERS-----"; +#endif +wcchar BEGIN_X509_CRL = "-----BEGIN X509 CRL-----"; +wcchar END_X509_CRL = "-----END X509 CRL-----"; +wcchar BEGIN_RSA_PRIV = "-----BEGIN RSA PRIVATE KEY-----"; +wcchar END_RSA_PRIV = "-----END RSA PRIVATE KEY-----"; +wcchar BEGIN_PRIV_KEY = "-----BEGIN PRIVATE KEY-----"; +wcchar END_PRIV_KEY = "-----END PRIVATE KEY-----"; +wcchar BEGIN_ENC_PRIV_KEY = "-----BEGIN ENCRYPTED PRIVATE KEY-----"; +wcchar END_ENC_PRIV_KEY = "-----END ENCRYPTED PRIVATE KEY-----"; +#ifdef HAVE_ECC + wcchar BEGIN_EC_PRIV = "-----BEGIN EC PRIVATE KEY-----"; + wcchar END_EC_PRIV = "-----END EC PRIVATE KEY-----"; +#endif +#if defined(HAVE_ECC) || defined(HAVE_ED25519) || defined(HAVE_ED448) || \ + !defined(NO_DSA) + wcchar BEGIN_DSA_PRIV = "-----BEGIN DSA PRIVATE KEY-----"; + wcchar END_DSA_PRIV = "-----END DSA PRIVATE KEY-----"; +#endif +#ifdef OPENSSL_EXTRA + const char BEGIN_PRIV_KEY_PREFIX[] = "-----BEGIN"; + const char PRIV_KEY_SUFFIX[] = "PRIVATE KEY-----"; + const char END_PRIV_KEY_PREFIX[] = "-----END"; +#endif +wcchar BEGIN_PUB_KEY = "-----BEGIN PUBLIC KEY-----"; +wcchar END_PUB_KEY = "-----END PUBLIC KEY-----"; +#if defined(HAVE_ED25519) || defined(HAVE_ED448) + wcchar BEGIN_EDDSA_PRIV = "-----BEGIN EDDSA PRIVATE KEY-----"; + wcchar END_EDDSA_PRIV = "-----END EDDSA PRIVATE KEY-----"; +#endif +#ifdef HAVE_CRL + const char *const BEGIN_CRL = "-----BEGIN X509 CRL-----"; + wcchar END_CRL = "-----END X509 CRL-----"; +#endif + + +static WC_INLINE char* SkipEndOfLineChars(char* line, const char* endOfLine) +{ + /* eat end of line characters */ + while (line < endOfLine && + (line[0] == '\r' || line[0] == '\n')) { + line++; + } + return line; +} + +int wc_PemGetHeaderFooter(int type, const char** header, const char** footer) +{ + int ret = BAD_FUNC_ARG; + + switch (type) { + case CA_TYPE: /* same as below */ + case TRUSTED_PEER_TYPE: + case CERT_TYPE: + if (header) *header = BEGIN_CERT; + if (footer) *footer = END_CERT; + ret = 0; + break; + + case CRL_TYPE: + if (header) *header = BEGIN_X509_CRL; + if (footer) *footer = END_X509_CRL; + ret = 0; + break; + #ifndef NO_DH + case DH_PARAM_TYPE: + if (header) *header = BEGIN_DH_PARAM; + if (footer) *footer = END_DH_PARAM; + ret = 0; + break; + #endif + #ifndef NO_DSA + case DSA_PARAM_TYPE: + if (header) *header = BEGIN_DSA_PARAM; + if (footer) *footer = END_DSA_PARAM; + ret = 0; + break; + #endif + #ifdef WOLFSSL_CERT_REQ + case CERTREQ_TYPE: + if (header) *header = BEGIN_CERT_REQ; + if (footer) *footer = END_CERT_REQ; + ret = 0; + break; + #endif + #ifndef NO_DSA + case DSA_TYPE: + case DSA_PRIVATEKEY_TYPE: + if (header) *header = BEGIN_DSA_PRIV; + if (footer) *footer = END_DSA_PRIV; + ret = 0; + break; + #endif + #ifdef HAVE_ECC + case ECC_TYPE: + case ECC_PRIVATEKEY_TYPE: + if (header) *header = BEGIN_EC_PRIV; + if (footer) *footer = END_EC_PRIV; + ret = 0; + break; + #endif + case RSA_TYPE: + case PRIVATEKEY_TYPE: + if (header) *header = BEGIN_RSA_PRIV; + if (footer) *footer = END_RSA_PRIV; + ret = 0; + break; + #ifdef HAVE_ED25519 + case ED25519_TYPE: + #endif + #ifdef HAVE_ED448 + case ED448_TYPE: + #endif + #if defined(HAVE_ED25519) || defined(HAVE_ED448) + case EDDSA_PRIVATEKEY_TYPE: + if (header) *header = BEGIN_EDDSA_PRIV; + if (footer) *footer = END_EDDSA_PRIV; + ret = 0; + break; + #endif + case PUBLICKEY_TYPE: + case ECC_PUBLICKEY_TYPE: + if (header) *header = BEGIN_PUB_KEY; + if (footer) *footer = END_PUB_KEY; + ret = 0; + break; + #if !defined(NO_DH) && (defined(WOLFSSL_QT) || defined(OPENSSL_ALL)) + case DH_PRIVATEKEY_TYPE: + #endif + case PKCS8_PRIVATEKEY_TYPE: + if (header) *header = BEGIN_PRIV_KEY; + if (footer) *footer = END_PRIV_KEY; + ret = 0; + break; + case PKCS8_ENC_PRIVATEKEY_TYPE: + if (header) *header = BEGIN_ENC_PRIV_KEY; + if (footer) *footer = END_ENC_PRIV_KEY; + ret = 0; + break; + default: + break; + } + return ret; +} + +#ifdef WOLFSSL_ENCRYPTED_KEYS + +static wcchar kProcTypeHeader = "Proc-Type"; +static wcchar kDecInfoHeader = "DEK-Info"; + +#ifdef WOLFSSL_PEM_TO_DER +#ifndef NO_DES3 + static wcchar kEncTypeDes = "DES-CBC"; + static wcchar kEncTypeDes3 = "DES-EDE3-CBC"; +#endif +#if !defined(NO_AES) && defined(HAVE_AES_CBC) && defined(WOLFSSL_AES_128) + static wcchar kEncTypeAesCbc128 = "AES-128-CBC"; +#endif +#if !defined(NO_AES) && defined(HAVE_AES_CBC) && defined(WOLFSSL_AES_192) + static wcchar kEncTypeAesCbc192 = "AES-192-CBC"; +#endif +#if !defined(NO_AES) && defined(HAVE_AES_CBC) && defined(WOLFSSL_AES_256) + static wcchar kEncTypeAesCbc256 = "AES-256-CBC"; +#endif + +int wc_EncryptedInfoGet(EncryptedInfo* info, const char* cipherInfo) +{ + int ret = 0; + + if (info == NULL || cipherInfo == NULL) + return BAD_FUNC_ARG; + + /* determine cipher information */ +#ifndef NO_DES3 + if (XSTRNCMP(cipherInfo, kEncTypeDes, XSTRLEN(kEncTypeDes)) == 0) { + info->cipherType = WC_CIPHER_DES; + info->keySz = DES_KEY_SIZE; + if (info->ivSz == 0) info->ivSz = DES_IV_SIZE; + } + else if (XSTRNCMP(cipherInfo, kEncTypeDes3, XSTRLEN(kEncTypeDes3)) == 0) { + info->cipherType = WC_CIPHER_DES3; + info->keySz = DES3_KEY_SIZE; + if (info->ivSz == 0) info->ivSz = DES_IV_SIZE; + } + else +#endif /* !NO_DES3 */ +#if !defined(NO_AES) && defined(HAVE_AES_CBC) && defined(WOLFSSL_AES_128) + if (XSTRNCMP(cipherInfo, kEncTypeAesCbc128, XSTRLEN(kEncTypeAesCbc128)) == 0) { + info->cipherType = WC_CIPHER_AES_CBC; + info->keySz = AES_128_KEY_SIZE; + if (info->ivSz == 0) info->ivSz = AES_IV_SIZE; + } + else +#endif +#if !defined(NO_AES) && defined(HAVE_AES_CBC) && defined(WOLFSSL_AES_192) + if (XSTRNCMP(cipherInfo, kEncTypeAesCbc192, XSTRLEN(kEncTypeAesCbc192)) == 0) { + info->cipherType = WC_CIPHER_AES_CBC; + info->keySz = AES_192_KEY_SIZE; + if (info->ivSz == 0) info->ivSz = AES_IV_SIZE; + } + else +#endif +#if !defined(NO_AES) && defined(HAVE_AES_CBC) && defined(WOLFSSL_AES_256) + if (XSTRNCMP(cipherInfo, kEncTypeAesCbc256, XSTRLEN(kEncTypeAesCbc256)) == 0) { + info->cipherType = WC_CIPHER_AES_CBC; + info->keySz = AES_256_KEY_SIZE; + if (info->ivSz == 0) info->ivSz = AES_IV_SIZE; + } + else +#endif + { + ret = NOT_COMPILED_IN; + } + return ret; +} + +int wc_EncryptedInfoParse(EncryptedInfo* info, char** pBuffer, size_t bufSz) +{ + int err = 0; + char* bufferStart; + char* bufferEnd; + char* line; + word32 lineSz; + char* finish; + word32 finishSz; + char* start = NULL; + word32 startSz; + char* newline = NULL; + + if (info == NULL || pBuffer == NULL || bufSz == 0) + return BAD_FUNC_ARG; + + bufferStart = *pBuffer; + bufferEnd = bufferStart + bufSz; + + /* find encrypted info marker */ + line = XSTRNSTR(bufferStart, kProcTypeHeader, + min((word32)bufSz, PEM_LINE_LEN)); + if (line != NULL) { + if (line >= bufferEnd) { + return BUFFER_E; + } + + lineSz = (word32)(bufferEnd - line); + + /* find DEC-Info marker */ + start = XSTRNSTR(line, kDecInfoHeader, min(lineSz, PEM_LINE_LEN)); + + if (start == NULL) + return BUFFER_E; + + /* skip dec-info and ": " */ + start += XSTRLEN(kDecInfoHeader); + if (start >= bufferEnd) + return BUFFER_E; + + if (start[0] == ':') { + start++; + if (start >= bufferEnd) + return BUFFER_E; + } + if (start[0] == ' ') + start++; + + startSz = (word32)(bufferEnd - start); + finish = XSTRNSTR(start, ",", min(startSz, PEM_LINE_LEN)); + + if ((start != NULL) && (finish != NULL) && (start < finish)) { + if (finish >= bufferEnd) { + return BUFFER_E; + } + + finishSz = (word32)(bufferEnd - finish); + newline = XSTRNSTR(finish, "\r", min(finishSz, PEM_LINE_LEN)); + + /* get cipher name */ + if (NAME_SZ < (finish - start)) /* buffer size of info->name */ + return BUFFER_E; + if (XMEMCPY(info->name, start, finish - start) == NULL) + return BUFFER_E; + info->name[finish - start] = '\0'; /* null term */ + + /* populate info */ + err = wc_EncryptedInfoGet(info, info->name); + if (err != 0) + return err; + + /* get IV */ + if (finishSz < info->ivSz + 1) + return BUFFER_E; + + if (newline == NULL) { + newline = XSTRNSTR(finish, "\n", min(finishSz, + PEM_LINE_LEN)); + } + if ((newline != NULL) && (newline > finish)) { + finish++; + info->ivSz = (word32)(newline - finish); + if (info->ivSz > IV_SZ) + return BUFFER_E; + if (XMEMCPY(info->iv, finish, info->ivSz) == NULL) + return BUFFER_E; + info->set = 1; + } + else + return BUFFER_E; + } + else + return BUFFER_E; + + /* eat end of line characters */ + newline = SkipEndOfLineChars(newline, bufferEnd); + + /* return new headerEnd */ + + *pBuffer = newline; + } + + return err; +} +#endif /* WOLFSSL_PEM_TO_DER */ + +#ifdef WOLFSSL_DER_TO_PEM +static int wc_EncryptedInfoAppend(char* dest, int destSz, char* cipherInfo) +{ + if (cipherInfo != NULL) { + int cipherInfoStrLen = (int)XSTRLEN((char*)cipherInfo); + + if (cipherInfoStrLen > HEADER_ENCRYPTED_KEY_SIZE - (9+14+10+3)) + cipherInfoStrLen = HEADER_ENCRYPTED_KEY_SIZE - (9+14+10+3); + + if (destSz - (int)XSTRLEN(dest) >= cipherInfoStrLen + (9+14+8+2+2+1)) { + /* strncat's src length needs to include the NULL */ + XSTRNCAT(dest, kProcTypeHeader, 10); + XSTRNCAT(dest, ": 4,ENCRYPTED\n", 15); + XSTRNCAT(dest, kDecInfoHeader, 9); + XSTRNCAT(dest, ": ", 3); + XSTRNCAT(dest, cipherInfo, destSz - (int)XSTRLEN(dest) - 1); + XSTRNCAT(dest, "\n\n", 4); + } + } + return 0; +} +#endif /* WOLFSSL_DER_TO_PEM */ +#endif /* WOLFSSL_ENCRYPTED_KEYS */ + +#ifdef WOLFSSL_DER_TO_PEM + +/* Used for compatibility API */ +int wc_DerToPem(const byte* der, word32 derSz, + byte* output, word32 outSz, int type) +{ + return wc_DerToPemEx(der, derSz, output, outSz, NULL, type); +} + +/* convert der buffer to pem into output, can't do inplace, der and output + need to be different */ +int wc_DerToPemEx(const byte* der, word32 derSz, byte* output, word32 outSz, + byte *cipher_info, int type) +{ + const char* headerStr = NULL; + const char* footerStr = NULL; +#ifdef WOLFSSL_SMALL_STACK + char* header = NULL; + char* footer = NULL; +#else + char header[MAX_X509_HEADER_SZ + HEADER_ENCRYPTED_KEY_SIZE]; + char footer[MAX_X509_HEADER_SZ]; +#endif + int headerLen = MAX_X509_HEADER_SZ + HEADER_ENCRYPTED_KEY_SIZE; + int footerLen = MAX_X509_HEADER_SZ; + int i; + int err; + int outLen; /* return length or error */ + + (void)cipher_info; + + if (der == output) /* no in place conversion */ + return BAD_FUNC_ARG; + + err = wc_PemGetHeaderFooter(type, &headerStr, &footerStr); + if (err != 0) + return err; + +#ifdef WOLFSSL_SMALL_STACK + header = (char*)XMALLOC(headerLen, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (header == NULL) + return MEMORY_E; + + footer = (char*)XMALLOC(footerLen, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (footer == NULL) { + XFREE(header, NULL, DYNAMIC_TYPE_TMP_BUFFER); + return MEMORY_E; + } +#endif + + /* build header and footer based on type */ + XSTRNCPY(header, headerStr, headerLen - 1); + header[headerLen - 2] = 0; + XSTRNCPY(footer, footerStr, footerLen - 1); + footer[footerLen - 2] = 0; + + /* add new line to end */ + XSTRNCAT(header, "\n", 2); + XSTRNCAT(footer, "\n", 2); + +#ifdef WOLFSSL_ENCRYPTED_KEYS + err = wc_EncryptedInfoAppend(header, headerLen, (char*)cipher_info); + if (err != 0) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(header, NULL, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(footer, NULL, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return err; + } +#endif + + headerLen = (int)XSTRLEN(header); + footerLen = (int)XSTRLEN(footer); + + /* if null output and 0 size passed in then return size needed */ + if (!output && outSz == 0) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(header, NULL, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(footer, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + outLen = 0; + if ((err = Base64_Encode(der, derSz, NULL, (word32*)&outLen)) + != LENGTH_ONLY_E) { + return err; + } + return headerLen + footerLen + outLen; + } + + if (!der || !output) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(header, NULL, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(footer, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return BAD_FUNC_ARG; + } + + /* don't even try if outSz too short */ + if (outSz < headerLen + footerLen + derSz) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(header, NULL, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(footer, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return BAD_FUNC_ARG; + } + + /* header */ + XMEMCPY(output, header, headerLen); + i = headerLen; + +#ifdef WOLFSSL_SMALL_STACK + XFREE(header, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + /* body */ + outLen = outSz - (headerLen + footerLen); /* input to Base64_Encode */ + if ( (err = Base64_Encode(der, derSz, output + i, (word32*)&outLen)) < 0) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(footer, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return err; + } + i += outLen; + + /* footer */ + if ( (i + footerLen) > (int)outSz) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(footer, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return BAD_FUNC_ARG; + } + XMEMCPY(output + i, footer, footerLen); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(footer, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return outLen + headerLen + footerLen; +} + +#endif /* WOLFSSL_DER_TO_PEM */ + +#ifdef WOLFSSL_PEM_TO_DER + +/* Remove PEM header/footer, convert to ASN1, store any encrypted data + info->consumed tracks of PEM bytes consumed in case multiple parts */ +int PemToDer(const unsigned char* buff, long longSz, int type, + DerBuffer** pDer, void* heap, EncryptedInfo* info, int* keyFormat) +{ + const char* header = NULL; + const char* footer = NULL; + char* headerEnd; + char* footerEnd; + char* consumedEnd; + char* bufferEnd = (char*)(buff + longSz); + long neededSz; + int ret = 0; + int sz = (int)longSz; + int encrypted_key = 0; + DerBuffer* der; +#if defined(HAVE_PKCS8) || defined(WOLFSSL_ENCRYPTED_KEYS) + word32 algId = 0; + #if defined(WOLFSSL_ENCRYPTED_KEYS) && !defined(NO_DES3) && !defined(NO_WOLFSSL_SKIP_TRAILING_PAD) + int padVal = 0; + #endif +#endif +#ifdef OPENSSL_EXTRA + char beginBuf[PEM_LINE_LEN + 1]; /* add 1 for null terminator */ + char endBuf[PEM_LINE_LEN + 1]; /* add 1 for null terminator */ +#endif + + WOLFSSL_ENTER("PemToDer"); + + /* get PEM header and footer based on type */ + ret = wc_PemGetHeaderFooter(type, &header, &footer); + if (ret != 0) + return ret; + + /* map header if not found for type */ + for (;;) { + headerEnd = XSTRNSTR((char*)buff, header, sz); + + if (headerEnd) { + break; + } else + if (type == PRIVATEKEY_TYPE) { + if (header == BEGIN_RSA_PRIV) { + header = BEGIN_PRIV_KEY; footer = END_PRIV_KEY; + } else + if (header == BEGIN_PRIV_KEY) { + header = BEGIN_ENC_PRIV_KEY; footer = END_ENC_PRIV_KEY; + } else + #ifdef HAVE_ECC + if (header == BEGIN_ENC_PRIV_KEY) { + header = BEGIN_EC_PRIV; footer = END_EC_PRIV; + } else + if (header == BEGIN_EC_PRIV) { + header = BEGIN_DSA_PRIV; footer = END_DSA_PRIV; + } else + #endif + #if defined(HAVE_ED25519) || defined(HAVE_ED448) + #ifdef HAVE_ECC + if (header == BEGIN_DSA_PRIV) + #else + if (header == BEGIN_ENC_PRIV_KEY) + #endif + { + header = BEGIN_EDDSA_PRIV; footer = END_EDDSA_PRIV; + } else + #endif + { + break; + } + } else +#ifdef HAVE_CRL + if ((type == CRL_TYPE) && (header != BEGIN_CRL)) { + header = BEGIN_CRL; footer = END_CRL; + } else +#endif + { + break; + } + } + + if (!headerEnd) { +#ifdef OPENSSL_EXTRA + char* beginEnd; + int endLen; + /* see if there is a -----BEGIN * PRIVATE KEY----- header */ + headerEnd = XSTRNSTR((char*)buff, PRIV_KEY_SUFFIX, sz); + if (headerEnd) { + beginEnd = headerEnd + XSTR_SIZEOF(PRIV_KEY_SUFFIX); + /* back up to BEGIN_PRIV_KEY_PREFIX */ + headerEnd -= XSTR_SIZEOF(BEGIN_PRIV_KEY_PREFIX); + while (headerEnd > (char*)buff && + XSTRNCMP(headerEnd, BEGIN_PRIV_KEY_PREFIX, + XSTR_SIZEOF(BEGIN_PRIV_KEY_PREFIX)) != 0) { + headerEnd--; + } + if (headerEnd <= (char*)buff || + XSTRNCMP(headerEnd, BEGIN_PRIV_KEY_PREFIX, + XSTR_SIZEOF(BEGIN_PRIV_KEY_PREFIX)) != 0 || + beginEnd - headerEnd > PEM_LINE_LEN) { + WOLFSSL_MSG("Couldn't find PEM header"); + return ASN_NO_PEM_HEADER; + } + /* headerEnd now points to beginning of header */ + XMEMCPY(beginBuf, headerEnd, beginEnd - headerEnd); + beginBuf[beginEnd - headerEnd] = '\0'; + /* look for matching footer */ + footer = XSTRNSTR(beginEnd, + beginBuf + XSTR_SIZEOF(BEGIN_PRIV_KEY_PREFIX), + (unsigned int)((char*)buff + sz - beginEnd)); + if (!footer) { + WOLFSSL_MSG("Couldn't find PEM footer"); + return ASN_NO_PEM_HEADER; + } + footer -= XSTR_SIZEOF(END_PRIV_KEY_PREFIX); + endLen = (unsigned int)(beginEnd - headerEnd - + (XSTR_SIZEOF(BEGIN_PRIV_KEY_PREFIX) - + XSTR_SIZEOF(END_PRIV_KEY_PREFIX))); + XMEMCPY(endBuf, footer, endLen); + endBuf[endLen] = '\0'; + + header = beginBuf; + footer = endBuf; + headerEnd = beginEnd; + } else { + WOLFSSL_MSG("Couldn't find PEM header"); + return ASN_NO_PEM_HEADER; + } +#else + WOLFSSL_MSG("Couldn't find PEM header"); + return ASN_NO_PEM_HEADER; +#endif + } else { + headerEnd += XSTRLEN(header); + } + + /* eat end of line characters */ + headerEnd = SkipEndOfLineChars(headerEnd, bufferEnd); + + if (type == PRIVATEKEY_TYPE) { + /* keyFormat is Key_Sum enum */ + if (keyFormat) { + #ifdef HAVE_ECC + if (header == BEGIN_EC_PRIV) + *keyFormat = ECDSAk; + #endif + #if !defined(NO_DSA) + if (header == BEGIN_DSA_PRIV) + *keyFormat = DSAk; + #endif + } + } + +#ifdef WOLFSSL_ENCRYPTED_KEYS + if (info) { + ret = wc_EncryptedInfoParse(info, &headerEnd, bufferEnd - headerEnd); + if (ret < 0) + return ret; + if (info->set) + encrypted_key = 1; + } +#endif /* WOLFSSL_ENCRYPTED_KEYS */ + + /* find footer */ + footerEnd = XSTRNSTR(headerEnd, footer, (unsigned int)((char*)buff + sz - headerEnd)); + if (!footerEnd) { + if (info) + info->consumed = longSz; /* No more certs if no footer */ + return BUFFER_E; + } + + consumedEnd = footerEnd + XSTRLEN(footer); + + if (consumedEnd < bufferEnd) { /* handle no end of line on last line */ + /* eat end of line characters */ + consumedEnd = SkipEndOfLineChars(consumedEnd, bufferEnd); + /* skip possible null term */ + if (consumedEnd < bufferEnd && consumedEnd[0] == '\0') + consumedEnd++; + } + + if (info) + info->consumed = (long)(consumedEnd - (char*)buff); + + /* set up der buffer */ + neededSz = (long)(footerEnd - headerEnd); + if (neededSz > sz || neededSz <= 0) + return BUFFER_E; + + ret = AllocDer(pDer, (word32)neededSz, type, heap); + if (ret < 0) { + return ret; + } + der = *pDer; + + if (Base64_Decode((byte*)headerEnd, (word32)neededSz, + der->buffer, &der->length) < 0) + return BUFFER_E; + + if ((header == BEGIN_PRIV_KEY +#ifdef OPENSSL_EXTRA + || header == beginBuf +#endif +#ifdef HAVE_ECC + || header == BEGIN_EC_PRIV +#endif + ) && !encrypted_key) + { + #ifdef HAVE_PKCS8 + /* pkcs8 key, convert and adjust length */ + if ((ret = ToTraditional_ex(der->buffer, der->length, &algId)) > 0) { + der->length = ret; + if (keyFormat) { + *keyFormat = algId; + } + } + else { + /* ignore failure here and assume key is not pkcs8 wrapped */ + } + #endif + + return 0; + } + +#ifdef WOLFSSL_ENCRYPTED_KEYS + if (encrypted_key || header == BEGIN_ENC_PRIV_KEY) { + int passwordSz = NAME_SZ; + #ifdef WOLFSSL_SMALL_STACK + char* password = NULL; + #else + char password[NAME_SZ]; + #endif + + if (!info || !info->passwd_cb) { + WOLFSSL_MSG("No password callback set"); + return NO_PASSWORD; + } + + #ifdef WOLFSSL_SMALL_STACK + password = (char*)XMALLOC(passwordSz, heap, DYNAMIC_TYPE_STRING); + if (password == NULL) + return MEMORY_E; + #endif + + /* get password */ + ret = info->passwd_cb(password, passwordSz, PEM_PASS_READ, + info->passwd_userdata); + if (ret >= 0) { + passwordSz = ret; + + /* convert and adjust length */ + if (header == BEGIN_ENC_PRIV_KEY) { + #ifndef NO_PWDBASED + ret = ToTraditionalEnc(der->buffer, der->length, + password, passwordSz, &algId); + + if (ret >= 0) { + der->length = ret; + if (keyFormat) { + *keyFormat = algId; + } + ret = 0; + } + #else + ret = NOT_COMPILED_IN; + #endif + } + /* decrypt the key */ + else { + if (passwordSz == 0) { + /* The key is encrypted but does not have a password */ + WOLFSSL_MSG("No password for encrypted key"); + ret = NO_PASSWORD; + } + else { + ret = wc_BufferKeyDecrypt(info, der->buffer, der->length, + (byte*)password, passwordSz, WC_MD5); + +#ifndef NO_WOLFSSL_SKIP_TRAILING_PAD + #ifndef NO_DES3 + if (info->cipherType == WC_CIPHER_DES3) { + padVal = der->buffer[der->length-1]; + if (padVal <= DES_BLOCK_SIZE) { + der->length -= padVal; + } + } + #endif /* !NO_DES3 */ +#endif /* !NO_WOLFSSL_SKIP_TRAILING_PAD */ + } + } +#ifdef OPENSSL_EXTRA + if (ret) { + PEMerr(0, PEM_R_BAD_DECRYPT); + } +#endif + ForceZero(password, passwordSz); + } +#ifdef OPENSSL_EXTRA + else { + PEMerr(0, PEM_R_BAD_PASSWORD_READ); + } +#endif + + #ifdef WOLFSSL_SMALL_STACK + XFREE(password, heap, DYNAMIC_TYPE_STRING); + #endif + } +#endif /* WOLFSSL_ENCRYPTED_KEYS */ + + return ret; +} + +int wc_PemToDer(const unsigned char* buff, long longSz, int type, + DerBuffer** pDer, void* heap, EncryptedInfo* info, int* eccKey) +{ + return PemToDer(buff, longSz, type, pDer, heap, info, eccKey); +} + + +/* our KeyPemToDer password callback, password in userData */ +static WC_INLINE int OurPasswordCb(char* passwd, int sz, int rw, void* userdata) +{ + (void)rw; + + if (userdata == NULL) + return 0; + + XSTRNCPY(passwd, (char*)userdata, sz); + return min((word32)sz, (word32)XSTRLEN((char*)userdata)); +} + +/* Return bytes written to buff or < 0 for error */ +int wc_KeyPemToDer(const unsigned char* pem, int pemSz, + unsigned char* buff, int buffSz, const char* pass) +{ + int eccKey = 0; + int ret; + DerBuffer* der = NULL; +#ifdef WOLFSSL_SMALL_STACK + EncryptedInfo* info = NULL; +#else + EncryptedInfo info[1]; +#endif + + WOLFSSL_ENTER("wc_KeyPemToDer"); + + if (pem == NULL || buff == NULL || buffSz <= 0) { + WOLFSSL_MSG("Bad pem der args"); + return BAD_FUNC_ARG; + } + +#ifdef WOLFSSL_SMALL_STACK + info = (EncryptedInfo*)XMALLOC(sizeof(EncryptedInfo), NULL, + DYNAMIC_TYPE_ENCRYPTEDINFO); + if (info == NULL) + return MEMORY_E; +#endif + + XMEMSET(info, 0, sizeof(EncryptedInfo)); + info->passwd_cb = OurPasswordCb; + info->passwd_userdata = (void*)pass; + + ret = PemToDer(pem, pemSz, PRIVATEKEY_TYPE, &der, NULL, info, &eccKey); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(info, NULL, DYNAMIC_TYPE_ENCRYPTEDINFO); +#endif + + if (ret < 0 || der == NULL) { + WOLFSSL_MSG("Bad Pem To Der"); + } + else { + if (der->length <= (word32)buffSz) { + XMEMCPY(buff, der->buffer, der->length); + ret = der->length; + } + else { + WOLFSSL_MSG("Bad der length"); + ret = BAD_FUNC_ARG; + } + } + + FreeDer(&der); + return ret; +} + + +/* Return bytes written to buff or < 0 for error */ +int wc_CertPemToDer(const unsigned char* pem, int pemSz, + unsigned char* buff, int buffSz, int type) +{ + int eccKey = 0; + int ret; + DerBuffer* der = NULL; + + WOLFSSL_ENTER("wc_CertPemToDer"); + + if (pem == NULL || buff == NULL || buffSz <= 0) { + WOLFSSL_MSG("Bad pem der args"); + return BAD_FUNC_ARG; + } + + if (type != CERT_TYPE && type != CA_TYPE && type != CERTREQ_TYPE) { + WOLFSSL_MSG("Bad cert type"); + return BAD_FUNC_ARG; + } + + + ret = PemToDer(pem, pemSz, type, &der, NULL, NULL, &eccKey); + if (ret < 0 || der == NULL) { + WOLFSSL_MSG("Bad Pem To Der"); + } + else { + if (der->length <= (word32)buffSz) { + XMEMCPY(buff, der->buffer, der->length); + ret = der->length; + } + else { + WOLFSSL_MSG("Bad der length"); + ret = BAD_FUNC_ARG; + } + } + + FreeDer(&der); + return ret; +} + +#endif /* WOLFSSL_PEM_TO_DER */ +#endif /* WOLFSSL_PEM_TO_DER || WOLFSSL_DER_TO_PEM */ + + +#ifdef WOLFSSL_PEM_TO_DER +#if defined(WOLFSSL_CERT_EXT) || defined(WOLFSSL_PUB_PEM_TO_DER) +/* Return bytes written to buff or < 0 for error */ +int wc_PubKeyPemToDer(const unsigned char* pem, int pemSz, + unsigned char* buff, int buffSz) +{ + int ret; + DerBuffer* der = NULL; + + WOLFSSL_ENTER("wc_PubKeyPemToDer"); + + if (pem == NULL || buff == NULL || buffSz <= 0) { + WOLFSSL_MSG("Bad pem der args"); + return BAD_FUNC_ARG; + } + + ret = PemToDer(pem, pemSz, PUBLICKEY_TYPE, &der, NULL, NULL, NULL); + if (ret < 0 || der == NULL) { + WOLFSSL_MSG("Bad Pem To Der"); + } + else { + if (der->length <= (word32)buffSz) { + XMEMCPY(buff, der->buffer, der->length); + ret = der->length; + } + else { + WOLFSSL_MSG("Bad der length"); + ret = BAD_FUNC_ARG; + } + } + + FreeDer(&der); + return ret; +} +#endif /* WOLFSSL_CERT_EXT || WOLFSSL_PUB_PEM_TO_DER */ +#endif /* WOLFSSL_PEM_TO_DER */ + +#ifndef NO_FILESYSTEM + +#ifdef WOLFSSL_CERT_GEN +/* load pem cert from file into der buffer, return der size or error */ +int wc_PemCertToDer(const char* fileName, unsigned char* derBuf, int derSz) +{ +#ifdef WOLFSSL_SMALL_STACK + byte staticBuffer[1]; /* force XMALLOC */ +#else + byte staticBuffer[FILE_BUFFER_SIZE]; +#endif + byte* fileBuf = staticBuffer; + int dynamic = 0; + int ret = 0; + long sz = 0; + XFILE file; + DerBuffer* converted = NULL; + + WOLFSSL_ENTER("wc_PemCertToDer"); + + if (fileName == NULL) { + ret = BAD_FUNC_ARG; + } + else { + file = XFOPEN(fileName, "rb"); + if (file == XBADFILE) { + ret = BUFFER_E; + } + } + + if (ret == 0) { + if(XFSEEK(file, 0, XSEEK_END) != 0) + ret = BUFFER_E; + sz = XFTELL(file); + XREWIND(file); + + if (sz <= 0) { + ret = BUFFER_E; + } + else if (sz > (long)sizeof(staticBuffer)) { + #ifdef WOLFSSL_STATIC_MEMORY + WOLFSSL_MSG("File was larger then static buffer"); + return MEMORY_E; + #endif + fileBuf = (byte*)XMALLOC(sz, NULL, DYNAMIC_TYPE_FILE); + if (fileBuf == NULL) + ret = MEMORY_E; + else + dynamic = 1; + } + + if (ret == 0) { + if ( (ret = (int)XFREAD(fileBuf, 1, sz, file)) != sz) { + ret = BUFFER_E; + } + #ifdef WOLFSSL_PEM_TO_DER + else { + ret = PemToDer(fileBuf, sz, CA_TYPE, &converted, 0, NULL,NULL); + } + #endif + + if (ret == 0) { + if (converted->length < (word32)derSz) { + XMEMCPY(derBuf, converted->buffer, converted->length); + ret = converted->length; + } + else + ret = BUFFER_E; + } + + FreeDer(&converted); + } + + XFCLOSE(file); + if (dynamic) + XFREE(fileBuf, NULL, DYNAMIC_TYPE_FILE); + } + + return ret; +} +#endif /* WOLFSSL_CERT_GEN */ + +#if defined(WOLFSSL_CERT_EXT) || defined(WOLFSSL_PUB_PEM_TO_DER) +/* load pem public key from file into der buffer, return der size or error */ +int wc_PemPubKeyToDer(const char* fileName, + unsigned char* derBuf, int derSz) +{ +#ifdef WOLFSSL_SMALL_STACK + byte staticBuffer[1]; /* force XMALLOC */ +#else + byte staticBuffer[FILE_BUFFER_SIZE]; +#endif + byte* fileBuf = staticBuffer; + int dynamic = 0; + int ret = 0; + long sz = 0; + XFILE file; + DerBuffer* converted = NULL; + + WOLFSSL_ENTER("wc_PemPubKeyToDer"); + + if (fileName == NULL) { + ret = BAD_FUNC_ARG; + } + else { + file = XFOPEN(fileName, "rb"); + if (file == XBADFILE) { + ret = BUFFER_E; + } + } + + if (ret == 0) { + if(XFSEEK(file, 0, XSEEK_END) != 0) + ret = BUFFER_E; + sz = XFTELL(file); + XREWIND(file); + + if (sz <= 0) { + ret = BUFFER_E; + } + else if (sz > (long)sizeof(staticBuffer)) { + #ifdef WOLFSSL_STATIC_MEMORY + WOLFSSL_MSG("File was larger then static buffer"); + return MEMORY_E; + #endif + fileBuf = (byte*)XMALLOC(sz, NULL, DYNAMIC_TYPE_FILE); + if (fileBuf == NULL) + ret = MEMORY_E; + else + dynamic = 1; + } + if (ret == 0) { + if ( (ret = (int)XFREAD(fileBuf, 1, sz, file)) != sz) { + ret = BUFFER_E; + } + #ifdef WOLFSSL_PEM_TO_DER + else { + ret = PemToDer(fileBuf, sz, PUBLICKEY_TYPE, &converted, + 0, NULL, NULL); + } + #endif + + if (ret == 0) { + if (converted->length < (word32)derSz) { + XMEMCPY(derBuf, converted->buffer, converted->length); + ret = converted->length; + } + else + ret = BUFFER_E; + } + + FreeDer(&converted); + } + + XFCLOSE(file); + if (dynamic) + XFREE(fileBuf, NULL, DYNAMIC_TYPE_FILE); + } + + return ret; +} +#endif /* WOLFSSL_CERT_EXT || WOLFSSL_PUB_PEM_TO_DER */ + +#endif /* !NO_FILESYSTEM */ + + +#if !defined(NO_RSA) && (defined(WOLFSSL_CERT_GEN) || \ + ((defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA)) && !defined(HAVE_USER_RSA))) +/* USER RSA ifdef portions used instead of refactor in consideration for + possible fips build */ +/* Write a public RSA key to output */ +static int SetRsaPublicKey(byte* output, RsaKey* key, + int outLen, int with_header) +{ +#ifdef WOLFSSL_SMALL_STACK + byte* n = NULL; + byte* e = NULL; +#else + byte n[MAX_RSA_INT_SZ]; + byte e[MAX_RSA_E_SZ]; +#endif + byte seq[MAX_SEQ_SZ]; + byte bitString[1 + MAX_LENGTH_SZ + 1]; + int nSz; + int eSz; + int seqSz; + int bitStringSz; + int idx; + + if (output == NULL || key == NULL || outLen < MAX_SEQ_SZ) + return BAD_FUNC_ARG; + + /* n */ +#ifdef WOLFSSL_SMALL_STACK + n = (byte*)XMALLOC(MAX_RSA_INT_SZ, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (n == NULL) + return MEMORY_E; +#endif + +#ifdef HAVE_USER_RSA + nSz = SetASNIntRSA(key->n, n); +#else + nSz = SetASNIntMP(&key->n, MAX_RSA_INT_SZ, n); +#endif + if (nSz < 0) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(n, key->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return nSz; + } + + /* e */ +#ifdef WOLFSSL_SMALL_STACK + e = (byte*)XMALLOC(MAX_RSA_E_SZ, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (e == NULL) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(n, key->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return MEMORY_E; + } +#endif + +#ifdef HAVE_USER_RSA + eSz = SetASNIntRSA(key->e, e); +#else + eSz = SetASNIntMP(&key->e, MAX_RSA_INT_SZ, e); +#endif + if (eSz < 0) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(n, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(e, key->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return eSz; + } + + seqSz = SetSequence(nSz + eSz, seq); + + /* check output size */ + if ( (seqSz + nSz + eSz) > outLen) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(n, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(e, key->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return BUFFER_E; + } + + /* headers */ + if (with_header) { + int algoSz; +#ifdef WOLFSSL_SMALL_STACK + byte* algo; + + algo = (byte*)XMALLOC(MAX_ALGO_SZ, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (algo == NULL) { + XFREE(n, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(e, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + return MEMORY_E; + } +#else + byte algo[MAX_ALGO_SZ]; +#endif + algoSz = SetAlgoID(RSAk, algo, oidKeyType, 0); + bitStringSz = SetBitString(seqSz + nSz + eSz, 0, bitString); + + idx = SetSequence(nSz + eSz + seqSz + bitStringSz + algoSz, output); + + /* check output size */ + if ( (idx + algoSz + bitStringSz + seqSz + nSz + eSz) > outLen) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(n, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(e, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(algo, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + + return BUFFER_E; + } + + /* algo */ + XMEMCPY(output + idx, algo, algoSz); + idx += algoSz; + /* bit string */ + XMEMCPY(output + idx, bitString, bitStringSz); + idx += bitStringSz; +#ifdef WOLFSSL_SMALL_STACK + XFREE(algo, key->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + } + else + idx = 0; + + /* seq */ + XMEMCPY(output + idx, seq, seqSz); + idx += seqSz; + /* n */ + XMEMCPY(output + idx, n, nSz); + idx += nSz; + /* e */ + XMEMCPY(output + idx, e, eSz); + idx += eSz; + +#ifdef WOLFSSL_SMALL_STACK + XFREE(n, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(e, key->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return idx; +} + +#endif /* !NO_RSA && (WOLFSSL_CERT_GEN || (WOLFSSL_KEY_GEN && + !HAVE_USER_RSA))) */ + +#if !defined(NO_RSA) && (defined(WOLFSSL_CERT_GEN) || defined(OPENSSL_EXTRA)) +int wc_RsaPublicKeyDerSize(RsaKey* key, int with_header) +{ + int idx = 0; + int nSz, eSz, seqSz, bitStringSz, algoSz; + + if (key == NULL) + return BAD_FUNC_ARG; + + /* n */ +#ifdef HAVE_USER_RSA + nSz = SetASNIntRSA(key->n, NULL); +#else + nSz = SetASNIntMP(&key->n, MAX_RSA_INT_SZ, NULL); +#endif + if (nSz < 0) { + return nSz; + } + + /* e */ +#ifdef HAVE_USER_RSA + eSz = SetASNIntRSA(key->e, NULL); +#else + eSz = SetASNIntMP(&key->e, MAX_RSA_INT_SZ, NULL); +#endif + if (eSz < 0) { + return eSz; + } + + seqSz = SetSequence(nSz + eSz, NULL); + + /* headers */ + if (with_header) { + algoSz = SetAlgoID(RSAk, NULL, oidKeyType, 0); + bitStringSz = SetBitString(seqSz + nSz + eSz, 0, NULL); + + idx += SetSequence(nSz + eSz + seqSz + bitStringSz + algoSz, NULL); + + /* algo */ + idx += algoSz; + /* bit string */ + idx += bitStringSz; + } + + /* seq */ + idx += seqSz; + /* n */ + idx += nSz; + /* e */ + idx += eSz; + + return idx; +} + +#endif /* !NO_RSA && WOLFSSL_CERT_GEN */ + + +#if defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA) && !defined(HAVE_USER_RSA) + +static mp_int* GetRsaInt(RsaKey* key, int idx) +{ + if (idx == 0) + return &key->n; + if (idx == 1) + return &key->e; + if (idx == 2) + return &key->d; + if (idx == 3) + return &key->p; + if (idx == 4) + return &key->q; + if (idx == 5) + return &key->dP; + if (idx == 6) + return &key->dQ; + if (idx == 7) + return &key->u; + + return NULL; +} + + +/* Release Tmp RSA resources */ +static WC_INLINE void FreeTmpRsas(byte** tmps, void* heap) +{ + int i; + + (void)heap; + + for (i = 0; i < RSA_INTS; i++) + XFREE(tmps[i], heap, DYNAMIC_TYPE_RSA); +} + + +/* Convert RsaKey key to DER format, write to output (inLen), return bytes + written */ +int wc_RsaKeyToDer(RsaKey* key, byte* output, word32 inLen) +{ + word32 seqSz, verSz, rawLen, intTotalLen = 0; + word32 sizes[RSA_INTS]; + int i, j, outLen, ret = 0, mpSz; + + byte seq[MAX_SEQ_SZ]; + byte ver[MAX_VERSION_SZ]; + byte* tmps[RSA_INTS]; + + if (!key) + return BAD_FUNC_ARG; + + if (key->type != RSA_PRIVATE) + return BAD_FUNC_ARG; + + for (i = 0; i < RSA_INTS; i++) + tmps[i] = NULL; + + /* write all big ints from key to DER tmps */ + for (i = 0; i < RSA_INTS; i++) { + mp_int* keyInt = GetRsaInt(key, i); + + rawLen = mp_unsigned_bin_size(keyInt) + 1; + tmps[i] = (byte*)XMALLOC(rawLen + MAX_SEQ_SZ, key->heap, + DYNAMIC_TYPE_RSA); + if (tmps[i] == NULL) { + ret = MEMORY_E; + break; + } + + mpSz = SetASNIntMP(keyInt, MAX_RSA_INT_SZ, tmps[i]); + if (mpSz < 0) { + ret = mpSz; + break; + } + intTotalLen += (sizes[i] = mpSz); + } + + if (ret != 0) { + FreeTmpRsas(tmps, key->heap); + return ret; + } + + /* make headers */ + verSz = SetMyVersion(0, ver, FALSE); + seqSz = SetSequence(verSz + intTotalLen, seq); + + outLen = seqSz + verSz + intTotalLen; + if (output) { + if (outLen > (int)inLen) { + FreeTmpRsas(tmps, key->heap); + return BAD_FUNC_ARG; + } + + /* write to output */ + XMEMCPY(output, seq, seqSz); + j = seqSz; + XMEMCPY(output + j, ver, verSz); + j += verSz; + + for (i = 0; i < RSA_INTS; i++) { + XMEMCPY(output + j, tmps[i], sizes[i]); + j += sizes[i]; + } + } + FreeTmpRsas(tmps, key->heap); + + return outLen; +} +#endif + +#if (defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA)) && !defined(NO_RSA) && !defined(HAVE_USER_RSA) +/* Convert Rsa Public key to DER format, write to output (inLen), return bytes + written */ +int wc_RsaKeyToPublicDer(RsaKey* key, byte* output, word32 inLen) +{ + return SetRsaPublicKey(output, key, inLen, 1); +} + +#endif /* (WOLFSSL_KEY_GEN || OPENSSL_EXTRA) && !NO_RSA && !HAVE_USER_RSA */ + + +#ifdef WOLFSSL_CERT_GEN + +/* Initialize and Set Certificate defaults: + version = 3 (0x2) + serial = 0 + sigType = SHA_WITH_RSA + issuer = blank + daysValid = 500 + selfSigned = 1 (true) use subject as issuer + subject = blank +*/ +int wc_InitCert(Cert* cert) +{ +#ifdef WOLFSSL_MULTI_ATTRIB + int i = 0; +#endif + if (cert == NULL) { + return BAD_FUNC_ARG; + } + + XMEMSET(cert, 0, sizeof(Cert)); + + cert->version = 2; /* version 3 is hex 2 */ +#ifndef NO_SHA + cert->sigType = CTC_SHAwRSA; +#elif !defined(NO_SHA256) + cert->sigType = CTC_SHA256wRSA; +#else + cert->sigType = 0; +#endif + cert->daysValid = 500; + cert->selfSigned = 1; + cert->keyType = RSA_KEY; + + cert->issuer.countryEnc = CTC_PRINTABLE; + cert->issuer.stateEnc = CTC_UTF8; + cert->issuer.localityEnc = CTC_UTF8; + cert->issuer.surEnc = CTC_UTF8; + cert->issuer.orgEnc = CTC_UTF8; + cert->issuer.unitEnc = CTC_UTF8; + cert->issuer.commonNameEnc = CTC_UTF8; + + cert->subject.countryEnc = CTC_PRINTABLE; + cert->subject.stateEnc = CTC_UTF8; + cert->subject.localityEnc = CTC_UTF8; + cert->subject.surEnc = CTC_UTF8; + cert->subject.orgEnc = CTC_UTF8; + cert->subject.unitEnc = CTC_UTF8; + cert->subject.commonNameEnc = CTC_UTF8; + +#ifdef WOLFSSL_MULTI_ATTRIB + for (i = 0; i < CTC_MAX_ATTRIB; i++) { + cert->issuer.name[i].type = CTC_UTF8; + cert->subject.name[i].type = CTC_UTF8; + } +#endif /* WOLFSSL_MULTI_ATTRIB */ + +#ifdef WOLFSSL_HEAP_TEST + cert->heap = (void*)WOLFSSL_HEAP_TEST; +#endif + + return 0; +} + + +/* DER encoded x509 Certificate */ +typedef struct DerCert { + byte size[MAX_LENGTH_SZ]; /* length encoded */ + byte version[MAX_VERSION_SZ]; /* version encoded */ + byte serial[(int)CTC_SERIAL_SIZE + (int)MAX_LENGTH_SZ]; /* serial number encoded */ + byte sigAlgo[MAX_ALGO_SZ]; /* signature algo encoded */ + byte issuer[ASN_NAME_MAX]; /* issuer encoded */ + byte subject[ASN_NAME_MAX]; /* subject encoded */ + byte validity[MAX_DATE_SIZE*2 + MAX_SEQ_SZ*2]; /* before and after dates */ + byte publicKey[MAX_PUBLIC_KEY_SZ]; /* rsa / ntru public key encoded */ + byte ca[MAX_CA_SZ]; /* basic constraint CA true size */ + byte extensions[MAX_EXTENSIONS_SZ]; /* all extensions */ +#ifdef WOLFSSL_CERT_EXT + byte skid[MAX_KID_SZ]; /* Subject Key Identifier extension */ + byte akid[MAX_KID_SZ]; /* Authority Key Identifier extension */ + byte keyUsage[MAX_KEYUSAGE_SZ]; /* Key Usage extension */ + byte extKeyUsage[MAX_EXTKEYUSAGE_SZ]; /* Extended Key Usage extension */ + byte certPolicies[MAX_CERTPOL_NB*MAX_CERTPOL_SZ]; /* Certificate Policies */ +#endif +#ifdef WOLFSSL_CERT_REQ + byte attrib[MAX_ATTRIB_SZ]; /* Cert req attributes encoded */ +#endif +#ifdef WOLFSSL_ALT_NAMES + byte altNames[CTC_MAX_ALT_SIZE]; /* Alternative Names encoded */ +#endif + int sizeSz; /* encoded size length */ + int versionSz; /* encoded version length */ + int serialSz; /* encoded serial length */ + int sigAlgoSz; /* encoded sig algo length */ + int issuerSz; /* encoded issuer length */ + int subjectSz; /* encoded subject length */ + int validitySz; /* encoded validity length */ + int publicKeySz; /* encoded public key length */ + int caSz; /* encoded CA extension length */ +#ifdef WOLFSSL_CERT_EXT + int skidSz; /* encoded SKID extension length */ + int akidSz; /* encoded SKID extension length */ + int keyUsageSz; /* encoded KeyUsage extension length */ + int extKeyUsageSz; /* encoded ExtendedKeyUsage extension length */ + int certPoliciesSz; /* encoded CertPolicies extension length*/ +#endif +#ifdef WOLFSSL_ALT_NAMES + int altNamesSz; /* encoded AltNames extension length */ +#endif + int extensionsSz; /* encoded extensions total length */ + int total; /* total encoded lengths */ +#ifdef WOLFSSL_CERT_REQ + int attribSz; +#endif +} DerCert; + + +#ifdef WOLFSSL_CERT_REQ + +/* Write a set header to output */ +static word32 SetPrintableString(word32 len, byte* output) +{ + output[0] = ASN_PRINTABLE_STRING; + return SetLength(len, output + 1) + 1; +} + +static word32 SetUTF8String(word32 len, byte* output) +{ + output[0] = ASN_UTF8STRING; + return SetLength(len, output + 1) + 1; +} + +#endif /* WOLFSSL_CERT_REQ */ + + +#ifndef WOLFSSL_CERT_GEN_CACHE +/* wc_SetCert_Free is only public when WOLFSSL_CERT_GEN_CACHE is not defined */ +static +#endif +void wc_SetCert_Free(Cert* cert) +{ + if (cert != NULL) { + cert->der = NULL; + if (cert->decodedCert) { + FreeDecodedCert((DecodedCert*)cert->decodedCert); + + XFREE(cert->decodedCert, cert->heap, DYNAMIC_TYPE_DCERT); + cert->decodedCert = NULL; + } + } +} + +static int wc_SetCert_LoadDer(Cert* cert, const byte* der, word32 derSz) +{ + int ret; + + if (cert == NULL) { + ret = BAD_FUNC_ARG; + } + else { + /* Allocate DecodedCert struct and Zero */ + cert->decodedCert = (void*)XMALLOC(sizeof(DecodedCert), cert->heap, + DYNAMIC_TYPE_DCERT); + + if (cert->decodedCert == NULL) { + ret = MEMORY_E; + } + else { + XMEMSET(cert->decodedCert, 0, sizeof(DecodedCert)); + + InitDecodedCert((DecodedCert*)cert->decodedCert, der, derSz, + cert->heap); + ret = ParseCertRelative((DecodedCert*)cert->decodedCert, + CERT_TYPE, 0, NULL); + if (ret >= 0) { + cert->der = (byte*)der; + } + else { + wc_SetCert_Free(cert); + } + } + } + + return ret; +} + +#endif /* WOLFSSL_CERT_GEN */ + + +#if defined(HAVE_ECC) && defined(HAVE_ECC_KEY_EXPORT) + +/* Write a public ECC key to output */ +static int SetEccPublicKey(byte* output, ecc_key* key, int with_header) +{ + byte bitString[1 + MAX_LENGTH_SZ + 1]; + int algoSz; + int curveSz; + int bitStringSz; + int idx; + word32 pubSz = ECC_BUFSIZE; +#ifdef WOLFSSL_SMALL_STACK + byte* algo = NULL; + byte* curve = NULL; + byte* pub; +#else + byte algo[MAX_ALGO_SZ]; + byte curve[MAX_ALGO_SZ]; + byte pub[ECC_BUFSIZE]; +#endif + int ret; + +#ifdef WOLFSSL_SMALL_STACK + pub = (byte*)XMALLOC(ECC_BUFSIZE, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (pub == NULL) + return MEMORY_E; +#endif + +#ifdef HAVE_SELFTEST + /* older version of ecc.c can not handle dp being NULL */ + if (key != NULL && key->dp == NULL) { + ret = BAD_FUNC_ARG; + } + else { + ret = wc_ecc_export_x963(key, pub, &pubSz); + } +#else + ret = wc_ecc_export_x963(key, pub, &pubSz); +#endif + if (ret != 0) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(pub, key->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return ret; + } + + /* headers */ + if (with_header) { +#ifdef WOLFSSL_SMALL_STACK + curve = (byte*)XMALLOC(MAX_ALGO_SZ, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (curve == NULL) { + XFREE(pub, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + return MEMORY_E; + } +#endif + curveSz = SetCurve(key, curve); + if (curveSz <= 0) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(curve, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(pub, key->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return curveSz; + } + +#ifdef WOLFSSL_SMALL_STACK + algo = (byte*)XMALLOC(MAX_ALGO_SZ, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (algo == NULL) { + XFREE(curve, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(pub, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + return MEMORY_E; + } +#endif + algoSz = SetAlgoID(ECDSAk, algo, oidKeyType, curveSz); + + bitStringSz = SetBitString(pubSz, 0, bitString); + + idx = SetSequence(pubSz + curveSz + bitStringSz + algoSz, output); + /* algo */ + if (output) + XMEMCPY(output + idx, algo, algoSz); + idx += algoSz; + /* curve */ + if (output) + XMEMCPY(output + idx, curve, curveSz); + idx += curveSz; + /* bit string */ + if (output) + XMEMCPY(output + idx, bitString, bitStringSz); + idx += bitStringSz; + } + else + idx = 0; + + /* pub */ + if (output) + XMEMCPY(output + idx, pub, pubSz); + idx += pubSz; + +#ifdef WOLFSSL_SMALL_STACK + if (with_header) { + XFREE(algo, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(curve, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + } + XFREE(pub, key->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return idx; +} + + +/* returns the size of buffer used, the public ECC key in DER format is stored + in output buffer + with_AlgCurve is a flag for when to include a header that has the Algorithm + and Curve information */ +int wc_EccPublicKeyToDer(ecc_key* key, byte* output, word32 inLen, + int with_AlgCurve) +{ + word32 infoSz = 0; + word32 keySz = 0; + int ret; + + if (key == NULL) { + return BAD_FUNC_ARG; + } + + if (with_AlgCurve) { + /* buffer space for algorithm/curve */ + infoSz += MAX_SEQ_SZ; + infoSz += 2 * MAX_ALGO_SZ; + + /* buffer space for public key sequence */ + infoSz += MAX_SEQ_SZ; + infoSz += TRAILING_ZERO; + } + +#ifdef HAVE_SELFTEST + /* older version of ecc.c can not handle dp being NULL */ + if (key != NULL && key->dp == NULL) { + keySz = 1 + 2 * MAX_ECC_BYTES; + ret = LENGTH_ONLY_E; + } + else { + ret = wc_ecc_export_x963(key, NULL, &keySz); + } +#else + ret = wc_ecc_export_x963(key, NULL, &keySz); +#endif + if (ret != LENGTH_ONLY_E) { + WOLFSSL_MSG("Error in getting ECC public key size"); + return ret; + } + + /* if output null then just return size */ + if (output == NULL) { + return keySz + infoSz; + } + + if (inLen < keySz + infoSz) { + return BUFFER_E; + } + + return SetEccPublicKey(output, key, with_AlgCurve); +} + +int wc_EccPublicKeyDerSize(ecc_key* key, int with_AlgCurve) +{ + return wc_EccPublicKeyToDer(key, NULL, 0, with_AlgCurve); +} + +#endif /* HAVE_ECC && HAVE_ECC_KEY_EXPORT */ + +#if defined(HAVE_ED25519) && (defined(WOLFSSL_CERT_GEN) || \ + defined(WOLFSSL_KEY_GEN)) + +/* Write a public ECC key to output */ +static int SetEd25519PublicKey(byte* output, ed25519_key* key, int with_header) +{ + byte bitString[1 + MAX_LENGTH_SZ + 1]; + int algoSz; + int bitStringSz; + int idx; + word32 pubSz = ED25519_PUB_KEY_SIZE; +#ifdef WOLFSSL_SMALL_STACK + byte* algo = NULL; + byte* pub; +#else + byte algo[MAX_ALGO_SZ]; + byte pub[ED25519_PUB_KEY_SIZE]; +#endif + +#ifdef WOLFSSL_SMALL_STACK + pub = (byte*)XMALLOC(ECC_BUFSIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (pub == NULL) + return MEMORY_E; +#endif + + idx = wc_ed25519_export_public(key, pub, &pubSz); + if (idx != 0) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(pub, key->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return idx; + } + + /* headers */ + if (with_header) { +#ifdef WOLFSSL_SMALL_STACK + algo = (byte*)XMALLOC(MAX_ALGO_SZ, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (algo == NULL) { + XFREE(pub, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + return MEMORY_E; + } +#endif + algoSz = SetAlgoID(ED25519k, algo, oidKeyType, 0); + + bitStringSz = SetBitString(pubSz, 0, bitString); + + idx = SetSequence(pubSz + bitStringSz + algoSz, output); + /* algo */ + XMEMCPY(output + idx, algo, algoSz); + idx += algoSz; + /* bit string */ + XMEMCPY(output + idx, bitString, bitStringSz); + idx += bitStringSz; + } + else + idx = 0; + + /* pub */ + XMEMCPY(output + idx, pub, pubSz); + idx += pubSz; + +#ifdef WOLFSSL_SMALL_STACK + if (with_header) { + XFREE(algo, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } + XFREE(pub, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return idx; +} + +int wc_Ed25519PublicKeyToDer(ed25519_key* key, byte* output, word32 inLen, + int withAlg) +{ + word32 infoSz = 0; + word32 keySz = 0; + int ret; + + if (output == NULL || key == NULL) { + return BAD_FUNC_ARG; + } + + if (withAlg) { + /* buffer space for algorithm */ + infoSz += MAX_SEQ_SZ; + infoSz += MAX_ALGO_SZ; + + /* buffer space for public key sequence */ + infoSz += MAX_SEQ_SZ; + infoSz += TRAILING_ZERO; + } + + if ((ret = wc_ed25519_export_public(key, output, &keySz)) != BUFFER_E) { + WOLFSSL_MSG("Error in getting ECC public key size"); + return ret; + } + + if (inLen < keySz + infoSz) { + return BUFFER_E; + } + + return SetEd25519PublicKey(output, key, withAlg); +} +#endif /* HAVE_ED25519 && (WOLFSSL_CERT_GEN || WOLFSSL_KEY_GEN) */ +#if defined(HAVE_ED448) && (defined(WOLFSSL_CERT_GEN) || \ + defined(WOLFSSL_KEY_GEN)) + +/* Write a public ECC key to output */ +static int SetEd448PublicKey(byte* output, ed448_key* key, int with_header) +{ + byte bitString[1 + MAX_LENGTH_SZ + 1]; + int algoSz; + int bitStringSz; + int idx; + word32 pubSz = ED448_PUB_KEY_SIZE; +#ifdef WOLFSSL_SMALL_STACK + byte* algo = NULL; + byte* pub = NULL; +#else + byte algo[MAX_ALGO_SZ]; + byte pub[ED448_PUB_KEY_SIZE]; +#endif + +#ifdef WOLFSSL_SMALL_STACK + pub = (byte*)XMALLOC(ECC_BUFSIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (pub == NULL) + return MEMORY_E; +#endif + + idx = wc_ed448_export_public(key, pub, &pubSz); + if (idx != 0) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(pub, key->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return idx; + } + + /* headers */ + if (with_header) { +#ifdef WOLFSSL_SMALL_STACK + algo = (byte*)XMALLOC(MAX_ALGO_SZ, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (algo == NULL) { + XFREE(pub, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + return MEMORY_E; + } +#endif + algoSz = SetAlgoID(ED448k, algo, oidKeyType, 0); + + bitStringSz = SetBitString(pubSz, 0, bitString); + + idx = SetSequence(pubSz + bitStringSz + algoSz, output); + /* algo */ + XMEMCPY(output + idx, algo, algoSz); + idx += algoSz; + /* bit string */ + XMEMCPY(output + idx, bitString, bitStringSz); + idx += bitStringSz; + } + else + idx = 0; + + /* pub */ + XMEMCPY(output + idx, pub, pubSz); + idx += pubSz; + +#ifdef WOLFSSL_SMALL_STACK + if (with_header) { + XFREE(algo, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } + XFREE(pub, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return idx; +} + +int wc_Ed448PublicKeyToDer(ed448_key* key, byte* output, word32 inLen, + int withAlg) +{ + word32 infoSz = 0; + word32 keySz = 0; + int ret; + + if (output == NULL || key == NULL) { + return BAD_FUNC_ARG; + } + + if (withAlg) { + /* buffer space for algorithm */ + infoSz += MAX_SEQ_SZ; + infoSz += MAX_ALGO_SZ; + + /* buffer space for public key sequence */ + infoSz += MAX_SEQ_SZ; + infoSz += TRAILING_ZERO; + } + + if ((ret = wc_ed448_export_public(key, output, &keySz)) != BUFFER_E) { + WOLFSSL_MSG("Error in getting ECC public key size"); + return ret; + } + + if (inLen < keySz + infoSz) { + return BUFFER_E; + } + + return SetEd448PublicKey(output, key, withAlg); +} +#endif /* HAVE_ED448 && (WOLFSSL_CERT_GEN || WOLFSSL_KEY_GEN) */ + + +#ifdef WOLFSSL_CERT_GEN + +static WC_INLINE byte itob(int number) +{ + return (byte)number + 0x30; +} + + +/* write time to output, format */ +static void SetTime(struct tm* date, byte* output) +{ + int i = 0; + + output[i++] = itob((date->tm_year % 10000) / 1000); + output[i++] = itob((date->tm_year % 1000) / 100); + output[i++] = itob((date->tm_year % 100) / 10); + output[i++] = itob( date->tm_year % 10); + + output[i++] = itob(date->tm_mon / 10); + output[i++] = itob(date->tm_mon % 10); + + output[i++] = itob(date->tm_mday / 10); + output[i++] = itob(date->tm_mday % 10); + + output[i++] = itob(date->tm_hour / 10); + output[i++] = itob(date->tm_hour % 10); + + output[i++] = itob(date->tm_min / 10); + output[i++] = itob(date->tm_min % 10); + + output[i++] = itob(date->tm_sec / 10); + output[i++] = itob(date->tm_sec % 10); + + output[i] = 'Z'; /* Zulu profile */ +} + + +#ifdef WOLFSSL_ALT_NAMES + +/* Copy Dates from cert, return bytes written */ +static int CopyValidity(byte* output, Cert* cert) +{ + int seqSz; + + WOLFSSL_ENTER("CopyValidity"); + + /* headers and output */ + seqSz = SetSequence(cert->beforeDateSz + cert->afterDateSz, output); + if (output) { + XMEMCPY(output + seqSz, cert->beforeDate, cert->beforeDateSz); + XMEMCPY(output + seqSz + cert->beforeDateSz, cert->afterDate, + cert->afterDateSz); + } + return seqSz + cert->beforeDateSz + cert->afterDateSz; +} + +#endif + + +/* Set Date validity from now until now + daysValid + * return size in bytes written to output, 0 on error */ +static int SetValidity(byte* output, int daysValid) +{ + byte before[MAX_DATE_SIZE]; + byte after[MAX_DATE_SIZE]; + + int beforeSz; + int afterSz; + int seqSz; + + time_t now; + time_t then; + struct tm* tmpTime; + struct tm* expandedTime; + struct tm localTime; + +#if defined(NEED_TMP_TIME) + /* for use with gmtime_r */ + struct tm tmpTimeStorage; + tmpTime = &tmpTimeStorage; +#else + tmpTime = NULL; +#endif + (void)tmpTime; + + now = XTIME(0); + + /* before now */ + before[0] = ASN_GENERALIZED_TIME; + beforeSz = SetLength(ASN_GEN_TIME_SZ, before + 1) + 1; /* gen tag */ + + /* subtract 1 day of seconds for more compliance */ + then = now - 86400; + expandedTime = XGMTIME(&then, tmpTime); + if (expandedTime == NULL) { + WOLFSSL_MSG("XGMTIME failed"); + return 0; /* error */ + } + localTime = *expandedTime; + + /* adjust */ + localTime.tm_year += 1900; + localTime.tm_mon += 1; + + SetTime(&localTime, before + beforeSz); + beforeSz += ASN_GEN_TIME_SZ; + + after[0] = ASN_GENERALIZED_TIME; + afterSz = SetLength(ASN_GEN_TIME_SZ, after + 1) + 1; /* gen tag */ + + /* add daysValid of seconds */ + then = now + (daysValid * (time_t)86400); + expandedTime = XGMTIME(&then, tmpTime); + if (expandedTime == NULL) { + WOLFSSL_MSG("XGMTIME failed"); + return 0; /* error */ + } + localTime = *expandedTime; + + /* adjust */ + localTime.tm_year += 1900; + localTime.tm_mon += 1; + + SetTime(&localTime, after + afterSz); + afterSz += ASN_GEN_TIME_SZ; + + /* headers and output */ + seqSz = SetSequence(beforeSz + afterSz, output); + XMEMCPY(output + seqSz, before, beforeSz); + XMEMCPY(output + seqSz + beforeSz, after, afterSz); + + return seqSz + beforeSz + afterSz; +} + + +/* ASN Encoded Name field */ +typedef struct EncodedName { + int nameLen; /* actual string value length */ + int totalLen; /* total encoded length */ + int type; /* type of name */ + int used; /* are we actually using this one */ + byte encoded[CTC_NAME_SIZE * 2]; /* encoding */ +} EncodedName; + + +/* Get Which Name from index */ +static const char* GetOneName(CertName* name, int idx) +{ + switch (idx) { + case 0: + return name->country; + + case 1: + return name->state; + + case 2: + return name->locality; + + case 3: + return name->sur; + + case 4: + return name->org; + + case 5: + return name->unit; + + case 6: + return name->commonName; + + case 7: + return name->serialDev; + +#ifdef WOLFSSL_CERT_EXT + case 8: + return name->busCat; + + case 9: +#else + case 8: +#endif + return name->email; + + default: + return 0; + } +} + + +/* Get Which Name Encoding from index */ +static char GetNameType(CertName* name, int idx) +{ + switch (idx) { + case 0: + return name->countryEnc; + + case 1: + return name->stateEnc; + + case 2: + return name->localityEnc; + + case 3: + return name->surEnc; + + case 4: + return name->orgEnc; + + case 5: + return name->unitEnc; + + case 6: + return name->commonNameEnc; + + case 7: + return name->serialDevEnc; + +#ifdef WOLFSSL_CERT_EXT + case 8: + return name->busCatEnc; + + case 9: +#else + case 8: +#endif + /* FALL THROUGH */ + /* The last index, email name, does not have encoding type. + The empty case here is to keep track of it for future reference. */ + default: + return 0; + } +} + + +/* Get ASN Name from index */ +static byte GetNameId(int idx) +{ + switch (idx) { + case 0: + return ASN_COUNTRY_NAME; + + case 1: + return ASN_STATE_NAME; + + case 2: + return ASN_LOCALITY_NAME; + + case 3: + return ASN_SUR_NAME; + + case 4: + return ASN_ORG_NAME; + + case 5: + return ASN_ORGUNIT_NAME; + + case 6: + return ASN_COMMON_NAME; + + case 7: + return ASN_SERIAL_NUMBER; + +#ifdef WOLFSSL_CERT_EXT + case 8: + return ASN_BUS_CAT; + + case 9: +#else + case 8: +#endif + return ASN_EMAIL_NAME; + + default: + return 0; + } +} + +/* + Extensions ::= SEQUENCE OF Extension + + Extension ::= SEQUENCE { + extnId OBJECT IDENTIFIER, + critical BOOLEAN DEFAULT FALSE, + extnValue OCTET STRING } + */ + +/* encode all extensions, return total bytes written */ +static int SetExtensions(byte* out, word32 outSz, int *IdxInOut, + const byte* ext, int extSz) +{ + if (out == NULL || IdxInOut == NULL || ext == NULL) + return BAD_FUNC_ARG; + + if (outSz < (word32)(*IdxInOut+extSz)) + return BUFFER_E; + + XMEMCPY(&out[*IdxInOut], ext, extSz); /* extensions */ + *IdxInOut += extSz; + + return *IdxInOut; +} + +/* encode extensions header, return total bytes written */ +static int SetExtensionsHeader(byte* out, word32 outSz, int extSz) +{ + byte sequence[MAX_SEQ_SZ]; + byte len[MAX_LENGTH_SZ]; + int seqSz, lenSz, idx = 0; + + if (out == NULL) + return BAD_FUNC_ARG; + + if (outSz < 3) + return BUFFER_E; + + seqSz = SetSequence(extSz, sequence); + + /* encode extensions length provided */ + lenSz = SetLength(extSz+seqSz, len); + + if (outSz < (word32)(lenSz+seqSz+1)) + return BUFFER_E; + + out[idx++] = ASN_EXTENSIONS; /* extensions id */ + XMEMCPY(&out[idx], len, lenSz); /* length */ + idx += lenSz; + + XMEMCPY(&out[idx], sequence, seqSz); /* sequence */ + idx += seqSz; + + return idx; +} + + +/* encode CA basic constraint true, return total bytes written */ +static int SetCa(byte* out, word32 outSz) +{ + const byte ca[] = { 0x30, 0x0c, 0x06, 0x03, 0x55, 0x1d, 0x13, 0x04, + 0x05, 0x30, 0x03, 0x01, 0x01, 0xff }; + + if (out == NULL) + return BAD_FUNC_ARG; + + if (outSz < sizeof(ca)) + return BUFFER_E; + + XMEMCPY(out, ca, sizeof(ca)); + + return (int)sizeof(ca); +} + + +#ifdef WOLFSSL_CERT_EXT +/* encode OID and associated value, return total bytes written */ +static int SetOidValue(byte* out, word32 outSz, const byte *oid, word32 oidSz, + byte *in, word32 inSz) +{ + int idx = 0; + + if (out == NULL || oid == NULL || in == NULL) + return BAD_FUNC_ARG; + + if (outSz < 3) + return BUFFER_E; + + /* sequence, + 1 => byte to put value size */ + idx = SetSequence(inSz + oidSz + 1, out); + + if ((idx + inSz + oidSz + 1) > outSz) + return BUFFER_E; + + XMEMCPY(out+idx, oid, oidSz); + idx += oidSz; + out[idx++] = (byte)inSz; + XMEMCPY(out+idx, in, inSz); + + return (idx+inSz); +} + +/* encode Subject Key Identifier, return total bytes written + * RFC5280 : non-critical */ +static int SetSKID(byte* output, word32 outSz, const byte *input, word32 length) +{ + byte skid_len[1 + MAX_LENGTH_SZ]; + byte skid_enc_len[MAX_LENGTH_SZ]; + int idx = 0, skid_lenSz, skid_enc_lenSz; + const byte skid_oid[] = { 0x06, 0x03, 0x55, 0x1d, 0x0e, 0x04 }; + + if (output == NULL || input == NULL) + return BAD_FUNC_ARG; + + /* Octet String header */ + skid_lenSz = SetOctetString(length, skid_len); + + /* length of encoded value */ + skid_enc_lenSz = SetLength(length + skid_lenSz, skid_enc_len); + + if (outSz < 3) + return BUFFER_E; + + idx = SetSequence(length + sizeof(skid_oid) + skid_lenSz + skid_enc_lenSz, + output); + + if ((length + sizeof(skid_oid) + skid_lenSz + skid_enc_lenSz) > outSz) + return BUFFER_E; + + /* put oid */ + XMEMCPY(output+idx, skid_oid, sizeof(skid_oid)); + idx += sizeof(skid_oid); + + /* put encoded len */ + XMEMCPY(output+idx, skid_enc_len, skid_enc_lenSz); + idx += skid_enc_lenSz; + + /* put octet header */ + XMEMCPY(output+idx, skid_len, skid_lenSz); + idx += skid_lenSz; + + /* put value */ + XMEMCPY(output+idx, input, length); + idx += length; + + return idx; +} + +/* encode Authority Key Identifier, return total bytes written + * RFC5280 : non-critical */ +static int SetAKID(byte* output, word32 outSz, + byte *input, word32 length, void* heap) +{ + byte *enc_val; + int ret, enc_valSz; + const byte akid_oid[] = { 0x06, 0x03, 0x55, 0x1d, 0x23, 0x04 }; + const byte akid_cs[] = { 0x80 }; + + (void)heap; + + if (output == NULL || input == NULL) + return BAD_FUNC_ARG; + + enc_valSz = length + 3 + sizeof(akid_cs); + enc_val = (byte *)XMALLOC(enc_valSz, heap, DYNAMIC_TYPE_TMP_BUFFER); + if (enc_val == NULL) + return MEMORY_E; + + /* sequence for ContentSpec & value */ + ret = SetOidValue(enc_val, enc_valSz, akid_cs, sizeof(akid_cs), + input, length); + if (ret > 0) { + enc_valSz = ret; + + ret = SetOidValue(output, outSz, akid_oid, sizeof(akid_oid), + enc_val, enc_valSz); + } + + XFREE(enc_val, heap, DYNAMIC_TYPE_TMP_BUFFER); + return ret; +} + +/* encode Key Usage, return total bytes written + * RFC5280 : critical */ +static int SetKeyUsage(byte* output, word32 outSz, word16 input) +{ + byte ku[5]; + int idx; + const byte keyusage_oid[] = { 0x06, 0x03, 0x55, 0x1d, 0x0f, + 0x01, 0x01, 0xff, 0x04}; + if (output == NULL) + return BAD_FUNC_ARG; + + idx = SetBitString16Bit(input, ku); + return SetOidValue(output, outSz, keyusage_oid, sizeof(keyusage_oid), + ku, idx); +} + +static int SetOjectIdValue(byte* output, word32 outSz, int* idx, + const byte* oid, word32 oidSz) +{ + /* verify room */ + if (*idx + 2 + oidSz >= outSz) + return ASN_PARSE_E; + + *idx += SetObjectId(oidSz, &output[*idx]); + XMEMCPY(&output[*idx], oid, oidSz); + *idx += oidSz; + + return 0; +} + +/* encode Extended Key Usage (RFC 5280 4.2.1.12), return total bytes written */ +static int SetExtKeyUsage(Cert* cert, byte* output, word32 outSz, byte input) +{ + int idx = 0, oidListSz = 0, totalSz, ret = 0; + const byte extkeyusage_oid[] = { 0x06, 0x03, 0x55, 0x1d, 0x25 }; + + if (output == NULL) + return BAD_FUNC_ARG; + + /* Skip to OID List */ + totalSz = 2 + sizeof(extkeyusage_oid) + 4; + idx = totalSz; + + /* Build OID List */ + /* If any set, then just use it */ + if (input & EXTKEYUSE_ANY) { + ret |= SetOjectIdValue(output, outSz, &idx, + extExtKeyUsageAnyOid, sizeof(extExtKeyUsageAnyOid)); + } + else { + if (input & EXTKEYUSE_SERVER_AUTH) + ret |= SetOjectIdValue(output, outSz, &idx, + extExtKeyUsageServerAuthOid, sizeof(extExtKeyUsageServerAuthOid)); + if (input & EXTKEYUSE_CLIENT_AUTH) + ret |= SetOjectIdValue(output, outSz, &idx, + extExtKeyUsageClientAuthOid, sizeof(extExtKeyUsageClientAuthOid)); + if (input & EXTKEYUSE_CODESIGN) + ret |= SetOjectIdValue(output, outSz, &idx, + extExtKeyUsageCodeSigningOid, sizeof(extExtKeyUsageCodeSigningOid)); + if (input & EXTKEYUSE_EMAILPROT) + ret |= SetOjectIdValue(output, outSz, &idx, + extExtKeyUsageEmailProtectOid, sizeof(extExtKeyUsageEmailProtectOid)); + if (input & EXTKEYUSE_TIMESTAMP) + ret |= SetOjectIdValue(output, outSz, &idx, + extExtKeyUsageTimestampOid, sizeof(extExtKeyUsageTimestampOid)); + if (input & EXTKEYUSE_OCSP_SIGN) + ret |= SetOjectIdValue(output, outSz, &idx, + extExtKeyUsageOcspSignOid, sizeof(extExtKeyUsageOcspSignOid)); + #ifdef WOLFSSL_EKU_OID + /* iterate through OID values */ + if (input & EXTKEYUSE_USER) { + int i, sz; + for (i = 0; i < CTC_MAX_EKU_NB; i++) { + sz = cert->extKeyUsageOIDSz[i]; + if (sz > 0) { + ret |= SetOjectIdValue(output, outSz, &idx, + cert->extKeyUsageOID[i], sz); + } + } + } + #endif /* WOLFSSL_EKU_OID */ + } + if (ret != 0) + return ASN_PARSE_E; + + /* Calculate Sizes */ + oidListSz = idx - totalSz; + totalSz = idx - 2; /* exclude first seq/len (2) */ + + /* 1. Seq + Total Len (2) */ + idx = SetSequence(totalSz, output); + + /* 2. Object ID (2) */ + XMEMCPY(&output[idx], extkeyusage_oid, sizeof(extkeyusage_oid)); + idx += sizeof(extkeyusage_oid); + + /* 3. Octet String (2) */ + idx += SetOctetString(totalSz - idx, &output[idx]); + + /* 4. Seq + OidListLen (2) */ + idx += SetSequence(oidListSz, &output[idx]); + + /* 5. Oid List (already set in-place above) */ + idx += oidListSz; + + (void)cert; + return idx; +} + +/* encode Certificate Policies, return total bytes written + * each input value must be ITU-T X.690 formatted : a.b.c... + * input must be an array of values with a NULL terminated for the latest + * RFC5280 : non-critical */ +static int SetCertificatePolicies(byte *output, + word32 outputSz, + char input[MAX_CERTPOL_NB][MAX_CERTPOL_SZ], + word16 nb_certpol, + void* heap) +{ + byte oid[MAX_OID_SZ], + der_oid[MAX_CERTPOL_NB][MAX_OID_SZ], + out[MAX_CERTPOL_SZ]; + word32 oidSz; + word32 outSz, i = 0, der_oidSz[MAX_CERTPOL_NB]; + int ret; + + const byte certpol_oid[] = { 0x06, 0x03, 0x55, 0x1d, 0x20, 0x04 }; + const byte oid_oid[] = { 0x06 }; + + if (output == NULL || input == NULL || nb_certpol > MAX_CERTPOL_NB) + return BAD_FUNC_ARG; + + for (i = 0; i < nb_certpol; i++) { + oidSz = sizeof(oid); + XMEMSET(oid, 0, oidSz); + + ret = EncodePolicyOID(oid, &oidSz, input[i], heap); + if (ret != 0) + return ret; + + /* compute sequence value for the oid */ + ret = SetOidValue(der_oid[i], MAX_OID_SZ, oid_oid, + sizeof(oid_oid), oid, oidSz); + if (ret <= 0) + return ret; + else + der_oidSz[i] = (word32)ret; + } + + /* concatenate oid, keep two byte for sequence/size of the created value */ + for (i = 0, outSz = 2; i < nb_certpol; i++) { + XMEMCPY(out+outSz, der_oid[i], der_oidSz[i]); + outSz += der_oidSz[i]; + } + + /* add sequence */ + ret = SetSequence(outSz-2, out); + if (ret <= 0) + return ret; + + /* add Policy OID to compute final value */ + return SetOidValue(output, outputSz, certpol_oid, sizeof(certpol_oid), + out, outSz); +} +#endif /* WOLFSSL_CERT_EXT */ + + +#ifdef WOLFSSL_ALT_NAMES + +/* encode Alternative Names, return total bytes written */ +static int SetAltNames(byte *output, word32 outSz, + const byte *input, word32 length) +{ + byte san_len[1 + MAX_LENGTH_SZ]; + int idx = 0, san_lenSz; + const byte san_oid[] = { 0x06, 0x03, 0x55, 0x1d, 0x11 }; + + if (output == NULL || input == NULL) + return BAD_FUNC_ARG; + + if (outSz < length) + return BUFFER_E; + + /* Octet String header */ + san_lenSz = SetOctetString(length, san_len); + + if (outSz < MAX_SEQ_SZ) + return BUFFER_E; + + idx = SetSequence(length + sizeof(san_oid) + san_lenSz, output); + + if ((length + sizeof(san_oid) + san_lenSz) > outSz) + return BUFFER_E; + + /* put oid */ + XMEMCPY(output+idx, san_oid, sizeof(san_oid)); + idx += sizeof(san_oid); + + /* put octet header */ + XMEMCPY(output+idx, san_len, san_lenSz); + idx += san_lenSz; + + /* put value */ + XMEMCPY(output+idx, input, length); + idx += length; + + return idx; +} + + +#ifdef WOLFSSL_CERT_GEN + +int FlattenAltNames(byte* output, word32 outputSz, const DNS_entry* names) +{ + word32 idx; + const DNS_entry* curName; + word32 namesSz = 0; + + if (output == NULL) + return BAD_FUNC_ARG; + + if (names == NULL) + return 0; + + curName = names; + do { + namesSz += curName->len + 2 + + ((curName->len < ASN_LONG_LENGTH) ? 0 + : BytePrecision(curName->len)); + curName = curName->next; + } while (curName != NULL); + + if (outputSz < MAX_SEQ_SZ + namesSz) + return BUFFER_E; + + idx = SetSequence(namesSz, output); + + curName = names; + do { + output[idx++] = ASN_CONTEXT_SPECIFIC | curName->type; + idx += SetLength(curName->len, output + idx); + XMEMCPY(output + idx, curName->name, curName->len); + idx += curName->len; + curName = curName->next; + } while (curName != NULL); + + return idx; +} + +#endif /* WOLFSSL_CERT_GEN */ + +#endif /* WOLFSSL_ALT_NAMES */ + +/* Encodes one attribute of the name (issuer/subject) + * + * name structure to hold result of encoding + * nameStr value to be encoded + * nameType type of encoding i.e CTC_UTF8 + * type id of attribute i.e ASN_COMMON_NAME + * + * returns length on success + */ +static int wc_EncodeName(EncodedName* name, const char* nameStr, char nameType, + byte type) +{ + word32 idx = 0; + + if (nameStr) { + /* bottom up */ + byte firstLen[1 + MAX_LENGTH_SZ]; + byte secondLen[MAX_LENGTH_SZ]; + byte sequence[MAX_SEQ_SZ]; + byte set[MAX_SET_SZ]; + + int strLen = (int)XSTRLEN(nameStr); + int thisLen = strLen; + int firstSz, secondSz, seqSz, setSz; + + if (strLen == 0) { /* no user data for this item */ + name->used = 0; + return 0; + } + + /* Restrict country code size */ + if (ASN_COUNTRY_NAME == type && strLen != CTC_COUNTRY_SIZE) { + return ASN_COUNTRY_SIZE_E; + } + + secondSz = SetLength(strLen, secondLen); + thisLen += secondSz; + switch (type) { + case ASN_EMAIL_NAME: /* email */ + thisLen += EMAIL_JOINT_LEN; + firstSz = EMAIL_JOINT_LEN; + break; + + case ASN_DOMAIN_COMPONENT: + thisLen += PILOT_JOINT_LEN; + firstSz = PILOT_JOINT_LEN; + break; + + default: + thisLen++; /* str type */ + thisLen += JOINT_LEN; + firstSz = JOINT_LEN + 1; + } + thisLen++; /* id type */ + firstSz = SetObjectId(firstSz, firstLen); + thisLen += firstSz; + + seqSz = SetSequence(thisLen, sequence); + thisLen += seqSz; + setSz = SetSet(thisLen, set); + thisLen += setSz; + + if (thisLen > (int)sizeof(name->encoded)) { + return BUFFER_E; + } + + /* store it */ + idx = 0; + /* set */ + XMEMCPY(name->encoded, set, setSz); + idx += setSz; + /* seq */ + XMEMCPY(name->encoded + idx, sequence, seqSz); + idx += seqSz; + /* asn object id */ + XMEMCPY(name->encoded + idx, firstLen, firstSz); + idx += firstSz; + switch (type) { + case ASN_EMAIL_NAME: + { + const byte EMAIL_OID[] = { 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, + 0x01, 0x09, 0x01, 0x16 }; + /* email joint id */ + XMEMCPY(name->encoded + idx, EMAIL_OID, sizeof(EMAIL_OID)); + idx += (int)sizeof(EMAIL_OID); + } + break; + + case ASN_DOMAIN_COMPONENT: + { + const byte PILOT_OID[] = { 0x09, 0x92, 0x26, 0x89, + 0x93, 0xF2, 0x2C, 0x64, 0x01 + }; + + XMEMCPY(name->encoded + idx, PILOT_OID, + sizeof(PILOT_OID)); + idx += (int)sizeof(PILOT_OID); + /* id type */ + name->encoded[idx++] = type; + /* str type */ + name->encoded[idx++] = nameType; + } + break; + + default: + name->encoded[idx++] = 0x55; + name->encoded[idx++] = 0x04; + /* id type */ + name->encoded[idx++] = type; + /* str type */ + name->encoded[idx++] = nameType; + } + /* second length */ + XMEMCPY(name->encoded + idx, secondLen, secondSz); + idx += secondSz; + /* str value */ + XMEMCPY(name->encoded + idx, nameStr, strLen); + idx += strLen; + + name->type = type; + name->totalLen = idx; + name->used = 1; + } + else + name->used = 0; + + return idx; +} + +/* encode CertName into output, return total bytes written */ +int SetName(byte* output, word32 outputSz, CertName* name) +{ + int totalBytes = 0, i, idx; +#ifdef WOLFSSL_SMALL_STACK + EncodedName* names = NULL; +#else + EncodedName names[NAME_ENTRIES]; +#endif +#ifdef WOLFSSL_MULTI_ATTRIB + EncodedName addNames[CTC_MAX_ATTRIB]; + int j, type; +#endif + + if (output == NULL || name == NULL) + return BAD_FUNC_ARG; + + if (outputSz < 3) + return BUFFER_E; + +#ifdef WOLFSSL_SMALL_STACK + names = (EncodedName*)XMALLOC(sizeof(EncodedName) * NAME_ENTRIES, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (names == NULL) + return MEMORY_E; +#endif + + for (i = 0; i < NAME_ENTRIES; i++) { + int ret; + const char* nameStr = GetOneName(name, i); + + ret = wc_EncodeName(&names[i], nameStr, GetNameType(name, i), + GetNameId(i)); + if (ret < 0) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(names, NULL, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return BUFFER_E; + } + totalBytes += ret; + } +#ifdef WOLFSSL_MULTI_ATTRIB + for (i = 0; i < CTC_MAX_ATTRIB; i++) { + if (name->name[i].sz > 0) { + int ret; + ret = wc_EncodeName(&addNames[i], name->name[i].value, + name->name[i].type, name->name[i].id); + if (ret < 0) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(names, NULL, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return BUFFER_E; + } + totalBytes += ret; + } + else { + addNames[i].used = 0; + } + } +#endif /* WOLFSSL_MULTI_ATTRIB */ + + /* header */ + idx = SetSequence(totalBytes, output); + totalBytes += idx; + if (totalBytes > ASN_NAME_MAX) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(names, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return BUFFER_E; + } + + for (i = 0; i < NAME_ENTRIES; i++) { + #ifdef WOLFSSL_MULTI_ATTRIB + type = GetNameId(i); + + /* list all DC values before OUs */ + if (type == ASN_ORGUNIT_NAME) { + type = ASN_DOMAIN_COMPONENT; + for (j = 0; j < CTC_MAX_ATTRIB; j++) { + if (name->name[j].sz > 0 && type == name->name[j].id) { + if (outputSz < (word32)(idx+addNames[j].totalLen)) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(names, NULL, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return BUFFER_E; + } + + XMEMCPY(output + idx, addNames[j].encoded, + addNames[j].totalLen); + idx += addNames[j].totalLen; + } + } + type = ASN_ORGUNIT_NAME; + } + + /* write all similar types to the buffer */ + for (j = 0; j < CTC_MAX_ATTRIB; j++) { + if (name->name[j].sz > 0 && type == name->name[j].id) { + if (outputSz < (word32)(idx+addNames[j].totalLen)) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(names, NULL, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return BUFFER_E; + } + + XMEMCPY(output + idx, addNames[j].encoded, + addNames[j].totalLen); + idx += addNames[j].totalLen; + } + } + #endif /* WOLFSSL_MULTI_ATTRIB */ + + if (names[i].used) { + if (outputSz < (word32)(idx+names[i].totalLen)) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(names, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return BUFFER_E; + } + + XMEMCPY(output + idx, names[i].encoded, names[i].totalLen); + idx += names[i].totalLen; + } + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(names, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return totalBytes; +} + +/* encode info from cert into DER encoded format */ +static int EncodeCert(Cert* cert, DerCert* der, RsaKey* rsaKey, ecc_key* eccKey, + WC_RNG* rng, const byte* ntruKey, word16 ntruSz, + ed25519_key* ed25519Key, ed448_key* ed448Key) +{ + int ret; + + if (cert == NULL || der == NULL || rng == NULL) + return BAD_FUNC_ARG; + + /* make sure at least one key type is provided */ + if (rsaKey == NULL && eccKey == NULL && ed25519Key == NULL && + ed448Key == NULL && ntruKey == NULL) { + return PUBLIC_KEY_E; + } + + /* init */ + XMEMSET(der, 0, sizeof(DerCert)); + + /* version */ + der->versionSz = SetMyVersion(cert->version, der->version, TRUE); + + /* serial number (must be positive) */ + if (cert->serialSz == 0) { + /* generate random serial */ + cert->serialSz = CTC_GEN_SERIAL_SZ; + ret = wc_RNG_GenerateBlock(rng, cert->serial, cert->serialSz); + if (ret != 0) + return ret; + /* Clear the top bit to avoid a negative value */ + cert->serial[0] &= 0x7f; + } + der->serialSz = SetSerialNumber(cert->serial, cert->serialSz, der->serial, + sizeof(der->serial), CTC_SERIAL_SIZE); + if (der->serialSz < 0) + return der->serialSz; + + /* signature algo */ + der->sigAlgoSz = SetAlgoID(cert->sigType, der->sigAlgo, oidSigType, 0); + if (der->sigAlgoSz <= 0) + return ALGO_ID_E; + + /* public key */ +#ifndef NO_RSA + if (cert->keyType == RSA_KEY) { + if (rsaKey == NULL) + return PUBLIC_KEY_E; + der->publicKeySz = SetRsaPublicKey(der->publicKey, rsaKey, + sizeof(der->publicKey), 1); + } +#endif + +#ifdef HAVE_ECC + if (cert->keyType == ECC_KEY) { + if (eccKey == NULL) + return PUBLIC_KEY_E; + der->publicKeySz = SetEccPublicKey(der->publicKey, eccKey, 1); + } +#endif + +#ifdef HAVE_ED25519 + if (cert->keyType == ED25519_KEY) { + if (ed25519Key == NULL) + return PUBLIC_KEY_E; + der->publicKeySz = SetEd25519PublicKey(der->publicKey, ed25519Key, 1); + } +#endif + +#ifdef HAVE_ED448 + if (cert->keyType == ED448_KEY) { + if (ed448Key == NULL) + return PUBLIC_KEY_E; + der->publicKeySz = SetEd448PublicKey(der->publicKey, ed448Key, 1); + } +#endif + +#ifdef HAVE_NTRU + if (cert->keyType == NTRU_KEY) { + word32 rc; + word16 encodedSz; + + if (ntruKey == NULL) + return PUBLIC_KEY_E; + + rc = ntru_crypto_ntru_encrypt_publicKey2SubjectPublicKeyInfo(ntruSz, + ntruKey, &encodedSz, NULL); + if (rc != NTRU_OK) + return PUBLIC_KEY_E; + if (encodedSz > MAX_PUBLIC_KEY_SZ) + return PUBLIC_KEY_E; + + rc = ntru_crypto_ntru_encrypt_publicKey2SubjectPublicKeyInfo(ntruSz, + ntruKey, &encodedSz, der->publicKey); + if (rc != NTRU_OK) + return PUBLIC_KEY_E; + + der->publicKeySz = encodedSz; + } +#else + (void)ntruSz; +#endif /* HAVE_NTRU */ + + if (der->publicKeySz <= 0) + return PUBLIC_KEY_E; + + der->validitySz = 0; +#ifdef WOLFSSL_ALT_NAMES + /* date validity copy ? */ + if (cert->beforeDateSz && cert->afterDateSz) { + der->validitySz = CopyValidity(der->validity, cert); + if (der->validitySz <= 0) + return DATE_E; + } +#endif + + /* date validity */ + if (der->validitySz == 0) { + der->validitySz = SetValidity(der->validity, cert->daysValid); + if (der->validitySz <= 0) + return DATE_E; + } + + /* subject name */ +#ifdef WOLFSSL_CERT_EXT + if (XSTRLEN((const char*)cert->sbjRaw) > 0) { + /* Use the raw subject */ + int idx; + + der->subjectSz = min(sizeof(der->subject), + (word32)XSTRLEN((const char*)cert->sbjRaw)); + /* header */ + idx = SetSequence(der->subjectSz, der->subject); + if (der->subjectSz + idx > (int)sizeof(der->subject)) { + return SUBJECT_E; + } + + XMEMCPY((char*)der->subject + idx, (const char*)cert->sbjRaw, + der->subjectSz); + der->subjectSz += idx; + } + else +#endif + { + /* Use the name structure */ + der->subjectSz = SetName(der->subject, sizeof(der->subject), + &cert->subject); + } + if (der->subjectSz <= 0) + return SUBJECT_E; + + /* issuer name */ +#ifdef WOLFSSL_CERT_EXT + if (XSTRLEN((const char*)cert->issRaw) > 0) { + /* Use the raw issuer */ + int idx; + + der->issuerSz = min(sizeof(der->issuer), + (word32)XSTRLEN((const char*)cert->issRaw)); + /* header */ + idx = SetSequence(der->issuerSz, der->issuer); + if (der->issuerSz + idx > (int)sizeof(der->issuer)) { + return ISSUER_E; + } + + XMEMCPY((char*)der->issuer + idx, (const char*)cert->issRaw, + der->issuerSz); + der->issuerSz += idx; + } + else +#endif + { + /* Use the name structure */ + der->issuerSz = SetName(der->issuer, sizeof(der->issuer), + cert->selfSigned ? &cert->subject : &cert->issuer); + } + if (der->issuerSz <= 0) + return ISSUER_E; + + /* set the extensions */ + der->extensionsSz = 0; + + /* CA */ + if (cert->isCA) { + der->caSz = SetCa(der->ca, sizeof(der->ca)); + if (der->caSz <= 0) + return CA_TRUE_E; + + der->extensionsSz += der->caSz; + } + else + der->caSz = 0; + +#ifdef WOLFSSL_ALT_NAMES + /* Alternative Name */ + if (cert->altNamesSz) { + der->altNamesSz = SetAltNames(der->altNames, sizeof(der->altNames), + cert->altNames, cert->altNamesSz); + if (der->altNamesSz <= 0) + return ALT_NAME_E; + + der->extensionsSz += der->altNamesSz; + } + else + der->altNamesSz = 0; +#endif + +#ifdef WOLFSSL_CERT_EXT + /* SKID */ + if (cert->skidSz) { + /* check the provided SKID size */ + if (cert->skidSz > (int)min(CTC_MAX_SKID_SIZE, sizeof(der->skid))) + return SKID_E; + + /* Note: different skid buffers sizes for der (MAX_KID_SZ) and + cert (CTC_MAX_SKID_SIZE). */ + der->skidSz = SetSKID(der->skid, sizeof(der->skid), + cert->skid, cert->skidSz); + if (der->skidSz <= 0) + return SKID_E; + + der->extensionsSz += der->skidSz; + } + else + der->skidSz = 0; + + /* AKID */ + if (cert->akidSz) { + /* check the provided AKID size */ + if (cert->akidSz > (int)min(CTC_MAX_AKID_SIZE, sizeof(der->akid))) + return AKID_E; + + der->akidSz = SetAKID(der->akid, sizeof(der->akid), + cert->akid, cert->akidSz, cert->heap); + if (der->akidSz <= 0) + return AKID_E; + + der->extensionsSz += der->akidSz; + } + else + der->akidSz = 0; + + /* Key Usage */ + if (cert->keyUsage != 0){ + der->keyUsageSz = SetKeyUsage(der->keyUsage, sizeof(der->keyUsage), + cert->keyUsage); + if (der->keyUsageSz <= 0) + return KEYUSAGE_E; + + der->extensionsSz += der->keyUsageSz; + } + else + der->keyUsageSz = 0; + + /* Extended Key Usage */ + if (cert->extKeyUsage != 0){ + der->extKeyUsageSz = SetExtKeyUsage(cert, der->extKeyUsage, + sizeof(der->extKeyUsage), cert->extKeyUsage); + if (der->extKeyUsageSz <= 0) + return EXTKEYUSAGE_E; + + der->extensionsSz += der->extKeyUsageSz; + } + else + der->extKeyUsageSz = 0; + + /* Certificate Policies */ + if (cert->certPoliciesNb != 0) { + der->certPoliciesSz = SetCertificatePolicies(der->certPolicies, + sizeof(der->certPolicies), + cert->certPolicies, + cert->certPoliciesNb, + cert->heap); + if (der->certPoliciesSz <= 0) + return CERTPOLICIES_E; + + der->extensionsSz += der->certPoliciesSz; + } + else + der->certPoliciesSz = 0; +#endif /* WOLFSSL_CERT_EXT */ + + /* put extensions */ + if (der->extensionsSz > 0) { + + /* put the start of extensions sequence (ID, Size) */ + der->extensionsSz = SetExtensionsHeader(der->extensions, + sizeof(der->extensions), + der->extensionsSz); + if (der->extensionsSz <= 0) + return EXTENSIONS_E; + + /* put CA */ + if (der->caSz) { + ret = SetExtensions(der->extensions, sizeof(der->extensions), + &der->extensionsSz, + der->ca, der->caSz); + if (ret == 0) + return EXTENSIONS_E; + } + +#ifdef WOLFSSL_ALT_NAMES + /* put Alternative Names */ + if (der->altNamesSz) { + ret = SetExtensions(der->extensions, sizeof(der->extensions), + &der->extensionsSz, + der->altNames, der->altNamesSz); + if (ret <= 0) + return EXTENSIONS_E; + } +#endif + +#ifdef WOLFSSL_CERT_EXT + /* put SKID */ + if (der->skidSz) { + ret = SetExtensions(der->extensions, sizeof(der->extensions), + &der->extensionsSz, + der->skid, der->skidSz); + if (ret <= 0) + return EXTENSIONS_E; + } + + /* put AKID */ + if (der->akidSz) { + ret = SetExtensions(der->extensions, sizeof(der->extensions), + &der->extensionsSz, + der->akid, der->akidSz); + if (ret <= 0) + return EXTENSIONS_E; + } + + /* put KeyUsage */ + if (der->keyUsageSz) { + ret = SetExtensions(der->extensions, sizeof(der->extensions), + &der->extensionsSz, + der->keyUsage, der->keyUsageSz); + if (ret <= 0) + return EXTENSIONS_E; + } + + /* put ExtendedKeyUsage */ + if (der->extKeyUsageSz) { + ret = SetExtensions(der->extensions, sizeof(der->extensions), + &der->extensionsSz, + der->extKeyUsage, der->extKeyUsageSz); + if (ret <= 0) + return EXTENSIONS_E; + } + + /* put Certificate Policies */ + if (der->certPoliciesSz) { + ret = SetExtensions(der->extensions, sizeof(der->extensions), + &der->extensionsSz, + der->certPolicies, der->certPoliciesSz); + if (ret <= 0) + return EXTENSIONS_E; + } +#endif /* WOLFSSL_CERT_EXT */ + } + + der->total = der->versionSz + der->serialSz + der->sigAlgoSz + + der->publicKeySz + der->validitySz + der->subjectSz + der->issuerSz + + der->extensionsSz; + + return 0; +} + + +/* write DER encoded cert to buffer, size already checked */ +static int WriteCertBody(DerCert* der, byte* buf) +{ + int idx; + + /* signed part header */ + idx = SetSequence(der->total, buf); + /* version */ + XMEMCPY(buf + idx, der->version, der->versionSz); + idx += der->versionSz; + /* serial */ + XMEMCPY(buf + idx, der->serial, der->serialSz); + idx += der->serialSz; + /* sig algo */ + XMEMCPY(buf + idx, der->sigAlgo, der->sigAlgoSz); + idx += der->sigAlgoSz; + /* issuer */ + XMEMCPY(buf + idx, der->issuer, der->issuerSz); + idx += der->issuerSz; + /* validity */ + XMEMCPY(buf + idx, der->validity, der->validitySz); + idx += der->validitySz; + /* subject */ + XMEMCPY(buf + idx, der->subject, der->subjectSz); + idx += der->subjectSz; + /* public key */ + XMEMCPY(buf + idx, der->publicKey, der->publicKeySz); + idx += der->publicKeySz; + if (der->extensionsSz) { + /* extensions */ + XMEMCPY(buf + idx, der->extensions, min(der->extensionsSz, + (int)sizeof(der->extensions))); + idx += der->extensionsSz; + } + + return idx; +} + + +/* Make RSA signature from buffer (sz), write to sig (sigSz) */ +static int MakeSignature(CertSignCtx* certSignCtx, const byte* buf, int sz, + byte* sig, int sigSz, RsaKey* rsaKey, ecc_key* eccKey, + ed25519_key* ed25519Key, ed448_key* ed448Key, WC_RNG* rng, int sigAlgoType, + void* heap) +{ + int digestSz = 0, typeH = 0, ret = 0; + + (void)digestSz; + (void)typeH; + (void)buf; + (void)sz; + (void)sig; + (void)sigSz; + (void)rsaKey; + (void)eccKey; + (void)ed25519Key; + (void)ed448Key; + (void)rng; + (void)heap; + + switch (certSignCtx->state) { + case CERTSIGN_STATE_BEGIN: + case CERTSIGN_STATE_DIGEST: + + certSignCtx->state = CERTSIGN_STATE_DIGEST; + certSignCtx->digest = (byte*)XMALLOC(WC_MAX_DIGEST_SIZE, heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (certSignCtx->digest == NULL) { + ret = MEMORY_E; goto exit_ms; + } + + ret = HashForSignature(buf, sz, sigAlgoType, certSignCtx->digest, + &typeH, &digestSz, 0); + /* set next state, since WC_PENDING_E rentry for these are not "call again" */ + certSignCtx->state = CERTSIGN_STATE_ENCODE; + if (ret != 0) { + goto exit_ms; + } + FALL_THROUGH; + + case CERTSIGN_STATE_ENCODE: + #ifndef NO_RSA + if (rsaKey) { + certSignCtx->encSig = (byte*)XMALLOC(MAX_DER_DIGEST_SZ, heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (certSignCtx->encSig == NULL) { + ret = MEMORY_E; goto exit_ms; + } + + /* signature */ + certSignCtx->encSigSz = wc_EncodeSignature(certSignCtx->encSig, + certSignCtx->digest, digestSz, typeH); + } + #endif /* !NO_RSA */ + FALL_THROUGH; + + case CERTSIGN_STATE_DO: + certSignCtx->state = CERTSIGN_STATE_DO; + ret = ALGO_ID_E; /* default to error */ + + #ifndef NO_RSA + if (rsaKey) { + /* signature */ + ret = wc_RsaSSL_Sign(certSignCtx->encSig, certSignCtx->encSigSz, + sig, sigSz, rsaKey, rng); + } + #endif /* !NO_RSA */ + + #ifdef HAVE_ECC + if (!rsaKey && eccKey) { + word32 outSz = sigSz; + + ret = wc_ecc_sign_hash(certSignCtx->digest, digestSz, + sig, &outSz, rng, eccKey); + if (ret == 0) + ret = outSz; + } + #endif /* HAVE_ECC */ + + #ifdef HAVE_ED25519 + if (!rsaKey && !eccKey && ed25519Key) { + word32 outSz = sigSz; + + ret = wc_ed25519_sign_msg(buf, sz, sig, &outSz, ed25519Key); + if (ret == 0) + ret = outSz; + } + #endif /* HAVE_ECC */ + + #ifdef HAVE_ED448 + if (!rsaKey && !eccKey && !ed25519Key && ed448Key) { + word32 outSz = sigSz; + + ret = wc_ed448_sign_msg(buf, sz, sig, &outSz, ed448Key, NULL, 0); + if (ret == 0) + ret = outSz; + } + #endif /* HAVE_ECC */ + break; + } + +exit_ms: + +#ifdef WOLFSSL_ASYNC_CRYPT + if (ret == WC_PENDING_E) { + return ret; + } +#endif + +#ifndef NO_RSA + if (rsaKey) { + XFREE(certSignCtx->encSig, heap, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif /* !NO_RSA */ + + XFREE(certSignCtx->digest, heap, DYNAMIC_TYPE_TMP_BUFFER); + certSignCtx->digest = NULL; + + /* reset state */ + certSignCtx->state = CERTSIGN_STATE_BEGIN; + + return ret; +} + + +/* add signature to end of buffer, size of buffer assumed checked, return + new length */ +static int AddSignature(byte* buf, int bodySz, const byte* sig, int sigSz, + int sigAlgoType) +{ + byte seq[MAX_SEQ_SZ]; + int idx = bodySz, seqSz; + + /* algo */ + idx += SetAlgoID(sigAlgoType, buf ? buf + idx : NULL, oidSigType, 0); + /* bit string */ + idx += SetBitString(sigSz, 0, buf ? buf + idx : NULL); + /* signature */ + if (buf) + XMEMCPY(buf + idx, sig, sigSz); + idx += sigSz; + + /* make room for overall header */ + seqSz = SetSequence(idx, seq); + if (buf) { + XMEMMOVE(buf + seqSz, buf, idx); + XMEMCPY(buf, seq, seqSz); + } + + return idx + seqSz; +} + + +/* Make an x509 Certificate v3 any key type from cert input, write to buffer */ +static int MakeAnyCert(Cert* cert, byte* derBuffer, word32 derSz, + RsaKey* rsaKey, ecc_key* eccKey, WC_RNG* rng, + const byte* ntruKey, word16 ntruSz, + ed25519_key* ed25519Key, ed448_key* ed448Key) +{ + int ret; +#ifdef WOLFSSL_SMALL_STACK + DerCert* der; +#else + DerCert der[1]; +#endif + + if (derBuffer == NULL) { + return BAD_FUNC_ARG; + } + + cert->keyType = eccKey ? ECC_KEY : (rsaKey ? RSA_KEY : + (ed25519Key ? ED25519_KEY : (ed448Key ? ED448_KEY : NTRU_KEY))); + +#ifdef WOLFSSL_SMALL_STACK + der = (DerCert*)XMALLOC(sizeof(DerCert), cert->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (der == NULL) + return MEMORY_E; +#endif + + ret = EncodeCert(cert, der, rsaKey, eccKey, rng, ntruKey, ntruSz, + ed25519Key, ed448Key); + if (ret == 0) { + if (der->total + MAX_SEQ_SZ * 2 > (int)derSz) + ret = BUFFER_E; + else + ret = cert->bodySz = WriteCertBody(der, derBuffer); + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(der, cert->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return ret; +} + + +/* Make an x509 Certificate v3 RSA or ECC from cert input, write to buffer */ +int wc_MakeCert_ex(Cert* cert, byte* derBuffer, word32 derSz, int keyType, + void* key, WC_RNG* rng) +{ + RsaKey* rsaKey = NULL; + ecc_key* eccKey = NULL; + ed25519_key* ed25519Key = NULL; + ed448_key* ed448Key = NULL; + + if (keyType == RSA_TYPE) + rsaKey = (RsaKey*)key; + else if (keyType == ECC_TYPE) + eccKey = (ecc_key*)key; + else if (keyType == ED25519_TYPE) + ed25519Key = (ed25519_key*)key; + else if (keyType == ED448_TYPE) + ed448Key = (ed448_key*)key; + + return MakeAnyCert(cert, derBuffer, derSz, rsaKey, eccKey, rng, NULL, 0, + ed25519Key, ed448Key); +} +/* Make an x509 Certificate v3 RSA or ECC from cert input, write to buffer */ +int wc_MakeCert(Cert* cert, byte* derBuffer, word32 derSz, RsaKey* rsaKey, + ecc_key* eccKey, WC_RNG* rng) +{ + return MakeAnyCert(cert, derBuffer, derSz, rsaKey, eccKey, rng, NULL, 0, + NULL, NULL); +} + + +#ifdef HAVE_NTRU + +int wc_MakeNtruCert(Cert* cert, byte* derBuffer, word32 derSz, + const byte* ntruKey, word16 keySz, WC_RNG* rng) +{ + return MakeAnyCert(cert, derBuffer, derSz, NULL, NULL, rng, ntruKey, keySz, NULL); +} + +#endif /* HAVE_NTRU */ + + +#ifdef WOLFSSL_CERT_REQ + +static int SetReqAttrib(byte* output, char* pw, int pwPrintableString, + int extSz) +{ + const byte cpOid[] = + { ASN_OBJECT_ID, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, + 0x09, 0x07 }; + const byte erOid[] = + { ASN_OBJECT_ID, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, + 0x09, 0x0e }; + + int sz = 0; /* overall size */ + int cpSz = 0; /* Challenge Password section size */ + int cpSeqSz = 0; + int cpSetSz = 0; + int cpStrSz = 0; + int pwSz = 0; + int erSz = 0; /* Extension Request section size */ + int erSeqSz = 0; + int erSetSz = 0; + byte cpSeq[MAX_SEQ_SZ]; + byte cpSet[MAX_SET_SZ]; + byte cpStr[MAX_PRSTR_SZ]; + byte erSeq[MAX_SEQ_SZ]; + byte erSet[MAX_SET_SZ]; + + output[0] = 0xa0; + sz++; + + if (pw && pw[0]) { + pwSz = (int)XSTRLEN(pw); + if (pwPrintableString) { + cpStrSz = SetPrintableString(pwSz, cpStr); + } else { + cpStrSz = SetUTF8String(pwSz, cpStr); + } + cpSetSz = SetSet(cpStrSz + pwSz, cpSet); + cpSeqSz = SetSequence(sizeof(cpOid) + cpSetSz + cpStrSz + pwSz, cpSeq); + cpSz = cpSeqSz + sizeof(cpOid) + cpSetSz + cpStrSz + pwSz; + } + + if (extSz) { + erSetSz = SetSet(extSz, erSet); + erSeqSz = SetSequence(erSetSz + sizeof(erOid) + extSz, erSeq); + erSz = extSz + erSetSz + erSeqSz + sizeof(erOid); + } + + /* Put the pieces together. */ + sz += SetLength(cpSz + erSz, &output[sz]); + + if (cpSz) { + XMEMCPY(&output[sz], cpSeq, cpSeqSz); + sz += cpSeqSz; + XMEMCPY(&output[sz], cpOid, sizeof(cpOid)); + sz += sizeof(cpOid); + XMEMCPY(&output[sz], cpSet, cpSetSz); + sz += cpSetSz; + XMEMCPY(&output[sz], cpStr, cpStrSz); + sz += cpStrSz; + XMEMCPY(&output[sz], pw, pwSz); + sz += pwSz; + } + + if (erSz) { + XMEMCPY(&output[sz], erSeq, erSeqSz); + sz += erSeqSz; + XMEMCPY(&output[sz], erOid, sizeof(erOid)); + sz += sizeof(erOid); + XMEMCPY(&output[sz], erSet, erSetSz); + sz += erSetSz; + /* The actual extension data will be tacked onto the output later. */ + } + + return sz; +} + + +/* encode info from cert into DER encoded format */ +static int EncodeCertReq(Cert* cert, DerCert* der, RsaKey* rsaKey, + ecc_key* eccKey, ed25519_key* ed25519Key, + ed448_key* ed448Key) +{ + (void)eccKey; + (void)ed25519Key; + (void)ed448Key; + + if (cert == NULL || der == NULL) + return BAD_FUNC_ARG; + + if (rsaKey == NULL && eccKey == NULL && ed25519Key == NULL && + ed448Key == NULL) { + return PUBLIC_KEY_E; + } + + /* init */ + XMEMSET(der, 0, sizeof(DerCert)); + + /* version */ + der->versionSz = SetMyVersion(cert->version, der->version, FALSE); + + /* subject name */ + der->subjectSz = SetName(der->subject, sizeof(der->subject), &cert->subject); + if (der->subjectSz <= 0) + return SUBJECT_E; + + /* public key */ +#ifndef NO_RSA + if (cert->keyType == RSA_KEY) { + if (rsaKey == NULL) + return PUBLIC_KEY_E; + der->publicKeySz = SetRsaPublicKey(der->publicKey, rsaKey, + sizeof(der->publicKey), 1); + } +#endif + +#ifdef HAVE_ECC + if (cert->keyType == ECC_KEY) { + der->publicKeySz = SetEccPublicKey(der->publicKey, eccKey, 1); + } +#endif + +#ifdef HAVE_ED25519 + if (cert->keyType == ED25519_KEY) { + if (ed25519Key == NULL) + return PUBLIC_KEY_E; + der->publicKeySz = SetEd25519PublicKey(der->publicKey, ed25519Key, 1); + } +#endif + +#ifdef HAVE_ED448 + if (cert->keyType == ED448_KEY) { + if (ed448Key == NULL) + return PUBLIC_KEY_E; + der->publicKeySz = SetEd448PublicKey(der->publicKey, ed448Key, 1); + } +#endif + if (der->publicKeySz <= 0) + return PUBLIC_KEY_E; + + /* set the extensions */ + der->extensionsSz = 0; + + /* CA */ + if (cert->isCA) { + der->caSz = SetCa(der->ca, sizeof(der->ca)); + if (der->caSz <= 0) + return CA_TRUE_E; + + der->extensionsSz += der->caSz; + } + else + der->caSz = 0; + +#ifdef WOLFSSL_CERT_EXT + /* SKID */ + if (cert->skidSz) { + /* check the provided SKID size */ + if (cert->skidSz > (int)min(CTC_MAX_SKID_SIZE, sizeof(der->skid))) + return SKID_E; + + der->skidSz = SetSKID(der->skid, sizeof(der->skid), + cert->skid, cert->skidSz); + if (der->skidSz <= 0) + return SKID_E; + + der->extensionsSz += der->skidSz; + } + else + der->skidSz = 0; + + /* Key Usage */ + if (cert->keyUsage != 0){ + der->keyUsageSz = SetKeyUsage(der->keyUsage, sizeof(der->keyUsage), + cert->keyUsage); + if (der->keyUsageSz <= 0) + return KEYUSAGE_E; + + der->extensionsSz += der->keyUsageSz; + } + else + der->keyUsageSz = 0; + + /* Extended Key Usage */ + if (cert->extKeyUsage != 0){ + der->extKeyUsageSz = SetExtKeyUsage(cert, der->extKeyUsage, + sizeof(der->extKeyUsage), cert->extKeyUsage); + if (der->extKeyUsageSz <= 0) + return EXTKEYUSAGE_E; + + der->extensionsSz += der->extKeyUsageSz; + } + else + der->extKeyUsageSz = 0; + +#endif /* WOLFSSL_CERT_EXT */ + + /* put extensions */ + if (der->extensionsSz > 0) { + int ret; + + /* put the start of sequence (ID, Size) */ + der->extensionsSz = SetSequence(der->extensionsSz, der->extensions); + if (der->extensionsSz <= 0) + return EXTENSIONS_E; + + /* put CA */ + if (der->caSz) { + ret = SetExtensions(der->extensions, sizeof(der->extensions), + &der->extensionsSz, + der->ca, der->caSz); + if (ret <= 0) + return EXTENSIONS_E; + } + +#ifdef WOLFSSL_CERT_EXT + /* put SKID */ + if (der->skidSz) { + ret = SetExtensions(der->extensions, sizeof(der->extensions), + &der->extensionsSz, + der->skid, der->skidSz); + if (ret <= 0) + return EXTENSIONS_E; + } + + /* put AKID */ + if (der->akidSz) { + ret = SetExtensions(der->extensions, sizeof(der->extensions), + &der->extensionsSz, + der->akid, der->akidSz); + if (ret <= 0) + return EXTENSIONS_E; + } + + /* put KeyUsage */ + if (der->keyUsageSz) { + ret = SetExtensions(der->extensions, sizeof(der->extensions), + &der->extensionsSz, + der->keyUsage, der->keyUsageSz); + if (ret <= 0) + return EXTENSIONS_E; + } + + /* put ExtendedKeyUsage */ + if (der->extKeyUsageSz) { + ret = SetExtensions(der->extensions, sizeof(der->extensions), + &der->extensionsSz, + der->extKeyUsage, der->extKeyUsageSz); + if (ret <= 0) + return EXTENSIONS_E; + } + +#endif /* WOLFSSL_CERT_EXT */ + } + + der->attribSz = SetReqAttrib(der->attrib, cert->challengePw, + cert->challengePwPrintableString, + der->extensionsSz); + if (der->attribSz <= 0) + return REQ_ATTRIBUTE_E; + + der->total = der->versionSz + der->subjectSz + der->publicKeySz + + der->extensionsSz + der->attribSz; + + return 0; +} + + +/* write DER encoded cert req to buffer, size already checked */ +static int WriteCertReqBody(DerCert* der, byte* buf) +{ + int idx; + + /* signed part header */ + idx = SetSequence(der->total, buf); + /* version */ + if (buf) + XMEMCPY(buf + idx, der->version, der->versionSz); + idx += der->versionSz; + /* subject */ + if (buf) + XMEMCPY(buf + idx, der->subject, der->subjectSz); + idx += der->subjectSz; + /* public key */ + if (buf) + XMEMCPY(buf + idx, der->publicKey, der->publicKeySz); + idx += der->publicKeySz; + /* attributes */ + if (buf) + XMEMCPY(buf + idx, der->attrib, der->attribSz); + idx += der->attribSz; + /* extensions */ + if (der->extensionsSz) { + if (buf) + XMEMCPY(buf + idx, der->extensions, min(der->extensionsSz, + (int)sizeof(der->extensions))); + idx += der->extensionsSz; + } + + return idx; +} + + +static int MakeCertReq(Cert* cert, byte* derBuffer, word32 derSz, + RsaKey* rsaKey, ecc_key* eccKey, ed25519_key* ed25519Key, + ed448_key* ed448Key) +{ + int ret; +#ifdef WOLFSSL_SMALL_STACK + DerCert* der; +#else + DerCert der[1]; +#endif + + cert->keyType = eccKey ? ECC_KEY : (ed25519Key ? ED25519_KEY : + (ed448Key ? ED448_KEY: RSA_KEY)); + +#ifdef WOLFSSL_SMALL_STACK + der = (DerCert*)XMALLOC(sizeof(DerCert), cert->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (der == NULL) + return MEMORY_E; +#endif + + ret = EncodeCertReq(cert, der, rsaKey, eccKey, ed25519Key, ed448Key); + + if (ret == 0) { + if (der->total + MAX_SEQ_SZ * 2 > (int)derSz) + ret = BUFFER_E; + else + ret = cert->bodySz = WriteCertReqBody(der, derBuffer); + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(der, cert->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return ret; +} + +int wc_MakeCertReq_ex(Cert* cert, byte* derBuffer, word32 derSz, int keyType, + void* key) +{ + RsaKey* rsaKey = NULL; + ecc_key* eccKey = NULL; + ed25519_key* ed25519Key = NULL; + ed448_key* ed448Key = NULL; + + if (keyType == RSA_TYPE) + rsaKey = (RsaKey*)key; + else if (keyType == ECC_TYPE) + eccKey = (ecc_key*)key; + else if (keyType == ED25519_TYPE) + ed25519Key = (ed25519_key*)key; + else if (keyType == ED448_TYPE) + ed448Key = (ed448_key*)key; + + return MakeCertReq(cert, derBuffer, derSz, rsaKey, eccKey, ed25519Key, + ed448Key); +} + +int wc_MakeCertReq(Cert* cert, byte* derBuffer, word32 derSz, + RsaKey* rsaKey, ecc_key* eccKey) +{ + return MakeCertReq(cert, derBuffer, derSz, rsaKey, eccKey, NULL, NULL); +} +#endif /* WOLFSSL_CERT_REQ */ + + +static int SignCert(int requestSz, int sType, byte* buf, word32 buffSz, + RsaKey* rsaKey, ecc_key* eccKey, ed25519_key* ed25519Key, + ed448_key* ed448Key, WC_RNG* rng) +{ + int sigSz = 0; + void* heap = NULL; + CertSignCtx* certSignCtx; +#ifndef WOLFSSL_ASYNC_CRYPT + CertSignCtx certSignCtx_lcl; + + certSignCtx = &certSignCtx_lcl; + XMEMSET(certSignCtx, 0, sizeof(CertSignCtx)); +#else + certSignCtx = NULL; +#endif + + if (requestSz < 0) + return requestSz; + + /* locate ctx */ + if (rsaKey) { + #ifndef NO_RSA + #ifdef WOLFSSL_ASYNC_CRYPT + certSignCtx = &rsaKey->certSignCtx; + #endif + heap = rsaKey->heap; + #else + return NOT_COMPILED_IN; + #endif /* NO_RSA */ + } + else if (eccKey) { + #ifdef HAVE_ECC + #ifdef WOLFSSL_ASYNC_CRYPT + certSignCtx = &eccKey->certSignCtx; + #endif + heap = eccKey->heap; + #else + return NOT_COMPILED_IN; + #endif /* HAVE_ECC */ + } + +#ifdef WOLFSSL_ASYNC_CRYPT + if (certSignCtx == NULL) { + return BAD_FUNC_ARG; + } +#endif + + if (certSignCtx->sig == NULL) { + certSignCtx->sig = (byte*)XMALLOC(MAX_ENCODED_SIG_SZ, heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (certSignCtx->sig == NULL) + return MEMORY_E; + } + + sigSz = MakeSignature(certSignCtx, buf, requestSz, certSignCtx->sig, + MAX_ENCODED_SIG_SZ, rsaKey, eccKey, ed25519Key, ed448Key, rng, sType, + heap); +#ifdef WOLFSSL_ASYNC_CRYPT + if (sigSz == WC_PENDING_E) { + /* Not free'ing certSignCtx->sig here because it could still be in use + * with async operations. */ + return sigSz; + } +#endif + + if (sigSz >= 0) { + if (requestSz + MAX_SEQ_SZ * 2 + sigSz > (int)buffSz) + sigSz = BUFFER_E; + else + sigSz = AddSignature(buf, requestSz, certSignCtx->sig, sigSz, + sType); + } + + XFREE(certSignCtx->sig, heap, DYNAMIC_TYPE_TMP_BUFFER); + certSignCtx->sig = NULL; + + return sigSz; +} + +int wc_SignCert_ex(int requestSz, int sType, byte* buf, word32 buffSz, + int keyType, void* key, WC_RNG* rng) +{ + RsaKey* rsaKey = NULL; + ecc_key* eccKey = NULL; + ed25519_key* ed25519Key = NULL; + ed448_key* ed448Key = NULL; + + if (keyType == RSA_TYPE) + rsaKey = (RsaKey*)key; + else if (keyType == ECC_TYPE) + eccKey = (ecc_key*)key; + else if (keyType == ED25519_TYPE) + ed25519Key = (ed25519_key*)key; + else if (keyType == ED448_TYPE) + ed448Key = (ed448_key*)key; + + return SignCert(requestSz, sType, buf, buffSz, rsaKey, eccKey, ed25519Key, + ed448Key, rng); +} + +int wc_SignCert(int requestSz, int sType, byte* buf, word32 buffSz, + RsaKey* rsaKey, ecc_key* eccKey, WC_RNG* rng) +{ + return SignCert(requestSz, sType, buf, buffSz, rsaKey, eccKey, NULL, NULL, + rng); +} + +int wc_MakeSelfCert(Cert* cert, byte* buf, word32 buffSz, + RsaKey* key, WC_RNG* rng) +{ + int ret; + + ret = wc_MakeCert(cert, buf, buffSz, key, NULL, rng); + if (ret < 0) + return ret; + + return wc_SignCert(cert->bodySz, cert->sigType, + buf, buffSz, key, NULL, rng); +} + + +#ifdef WOLFSSL_CERT_EXT + +/* Get raw subject from cert, which may contain OIDs not parsed by Decode. + The raw subject pointer will only be valid while "cert" is valid. */ +int wc_GetSubjectRaw(byte **subjectRaw, Cert *cert) +{ + int rc = BAD_FUNC_ARG; + if ((subjectRaw != NULL) && (cert != NULL)) { + *subjectRaw = cert->sbjRaw; + rc = 0; + } + return rc; +} + +/* Set KID from public key */ +static int SetKeyIdFromPublicKey(Cert *cert, RsaKey *rsakey, ecc_key *eckey, + byte *ntruKey, word16 ntruKeySz, + ed25519_key* ed25519Key, ed448_key* ed448Key, + int kid_type) +{ + byte *buf; + int bufferSz, ret; + + if (cert == NULL || + (rsakey == NULL && eckey == NULL && ntruKey == NULL && + ed25519Key == NULL && ed448Key == NULL) || + (kid_type != SKID_TYPE && kid_type != AKID_TYPE)) + return BAD_FUNC_ARG; + + buf = (byte *)XMALLOC(MAX_PUBLIC_KEY_SZ, cert->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (buf == NULL) + return MEMORY_E; + + /* Public Key */ + bufferSz = -1; +#ifndef NO_RSA + /* RSA public key */ + if (rsakey != NULL) + bufferSz = SetRsaPublicKey(buf, rsakey, MAX_PUBLIC_KEY_SZ, 0); +#endif +#ifdef HAVE_ECC + /* ECC public key */ + if (eckey != NULL) + bufferSz = SetEccPublicKey(buf, eckey, 0); +#endif +#ifdef HAVE_NTRU + /* NTRU public key */ + if (ntruKey != NULL) { + bufferSz = MAX_PUBLIC_KEY_SZ; + ret = ntru_crypto_ntru_encrypt_publicKey2SubjectPublicKeyInfo( + ntruKeySz, ntruKey, (word16 *)(&bufferSz), buf); + if (ret != NTRU_OK) + bufferSz = -1; + } +#else + (void)ntruKeySz; +#endif +#ifdef HAVE_ED25519 + /* ED25519 public key */ + if (ed25519Key != NULL) + bufferSz = SetEd25519PublicKey(buf, ed25519Key, 0); +#endif +#ifdef HAVE_ED448 + /* ED448 public key */ + if (ed448Key != NULL) + bufferSz = SetEd448PublicKey(buffer, ed448Key, 0); +#endif + + if (bufferSz <= 0) { + XFREE(buf, cert->heap, DYNAMIC_TYPE_TMP_BUFFER); + return PUBLIC_KEY_E; + } + + /* Compute SKID by hashing public key */ + if (kid_type == SKID_TYPE) { + ret = CalcHashId(buf, bufferSz, cert->skid); + cert->skidSz = KEYID_SIZE; + } + else if (kid_type == AKID_TYPE) { + ret = CalcHashId(buf, bufferSz, cert->akid); + cert->akidSz = KEYID_SIZE; + } + else + ret = BAD_FUNC_ARG; + + XFREE(buf, cert->heap, DYNAMIC_TYPE_TMP_BUFFER); + return ret; +} + +int wc_SetSubjectKeyIdFromPublicKey_ex(Cert *cert, int keyType, void* key) +{ + RsaKey* rsaKey = NULL; + ecc_key* eccKey = NULL; + ed25519_key* ed25519Key = NULL; + ed448_key* ed448Key = NULL; + + if (keyType == RSA_TYPE) + rsaKey = (RsaKey*)key; + else if (keyType == ECC_TYPE) + eccKey = (ecc_key*)key; + else if (keyType == ED25519_TYPE) + ed25519Key = (ed25519_key*)key; + else if (keyType == ED448_TYPE) + ed448Key = (ed448_key*)key; + + return SetKeyIdFromPublicKey(cert, rsaKey, eccKey, NULL, 0, ed25519Key, + ed448Key, SKID_TYPE); +} + +/* Set SKID from RSA or ECC public key */ +int wc_SetSubjectKeyIdFromPublicKey(Cert *cert, RsaKey *rsakey, ecc_key *eckey) +{ + return SetKeyIdFromPublicKey(cert, rsakey, eckey, NULL, 0, NULL, NULL, + SKID_TYPE); +} + +#ifdef HAVE_NTRU +/* Set SKID from NTRU public key */ +int wc_SetSubjectKeyIdFromNtruPublicKey(Cert *cert, + byte *ntruKey, word16 ntruKeySz) +{ + return SetKeyIdFromPublicKey(cert, NULL,NULL,ntruKey, ntruKeySz, NULL, NULL, + SKID_TYPE); +} +#endif + +int wc_SetAuthKeyIdFromPublicKey_ex(Cert *cert, int keyType, void* key) +{ + RsaKey* rsaKey = NULL; + ecc_key* eccKey = NULL; + ed25519_key* ed25519Key = NULL; + ed448_key* ed448Key = NULL; + + if (keyType == RSA_TYPE) + rsaKey = (RsaKey*)key; + else if (keyType == ECC_TYPE) + eccKey = (ecc_key*)key; + else if (keyType == ED25519_TYPE) + ed25519Key = (ed25519_key*)key; + else if (keyType == ED448_TYPE) + ed448Key = (ed448_key*)key; + + return SetKeyIdFromPublicKey(cert, rsaKey, eccKey, NULL, 0, ed25519Key, + ed448Key, AKID_TYPE); +} + +/* Set SKID from RSA or ECC public key */ +int wc_SetAuthKeyIdFromPublicKey(Cert *cert, RsaKey *rsakey, ecc_key *eckey) +{ + return SetKeyIdFromPublicKey(cert, rsakey, eckey, NULL, 0, NULL, NULL, + AKID_TYPE); +} + + +#if !defined(NO_FILESYSTEM) && !defined(NO_ASN_CRYPT) + +/* Set SKID from public key file in PEM */ +int wc_SetSubjectKeyId(Cert *cert, const char* file) +{ + int ret, derSz; + byte* der; + word32 idx; + RsaKey *rsakey = NULL; + ecc_key *eckey = NULL; + + if (cert == NULL || file == NULL) + return BAD_FUNC_ARG; + + der = (byte*)XMALLOC(MAX_PUBLIC_KEY_SZ, cert->heap, DYNAMIC_TYPE_CERT); + if (der == NULL) { + WOLFSSL_MSG("wc_SetSubjectKeyId memory Problem"); + return MEMORY_E; + } + derSz = MAX_PUBLIC_KEY_SZ; + + XMEMSET(der, 0, derSz); + derSz = wc_PemPubKeyToDer(file, der, derSz); + if (derSz <= 0) { + XFREE(der, cert->heap, DYNAMIC_TYPE_CERT); + return derSz; + } + + /* Load PubKey in internal structure */ +#ifndef NO_RSA + rsakey = (RsaKey*) XMALLOC(sizeof(RsaKey), cert->heap, DYNAMIC_TYPE_RSA); + if (rsakey == NULL) { + XFREE(der, cert->heap, DYNAMIC_TYPE_CERT); + return MEMORY_E; + } + + if (wc_InitRsaKey(rsakey, cert->heap) != 0) { + WOLFSSL_MSG("wc_InitRsaKey failure"); + XFREE(rsakey, cert->heap, DYNAMIC_TYPE_RSA); + XFREE(der, cert->heap, DYNAMIC_TYPE_CERT); + return MEMORY_E; + } + + idx = 0; + ret = wc_RsaPublicKeyDecode(der, &idx, rsakey, derSz); + if (ret != 0) +#endif + { +#ifndef NO_RSA + WOLFSSL_MSG("wc_RsaPublicKeyDecode failed"); + wc_FreeRsaKey(rsakey); + XFREE(rsakey, cert->heap, DYNAMIC_TYPE_RSA); + rsakey = NULL; +#endif +#ifdef HAVE_ECC + /* Check to load ecc public key */ + eckey = (ecc_key*) XMALLOC(sizeof(ecc_key), cert->heap, + DYNAMIC_TYPE_ECC); + if (eckey == NULL) { + XFREE(der, cert->heap, DYNAMIC_TYPE_CERT); + return MEMORY_E; + } + + if (wc_ecc_init(eckey) != 0) { + WOLFSSL_MSG("wc_ecc_init failure"); + wc_ecc_free(eckey); + XFREE(eckey, cert->heap, DYNAMIC_TYPE_ECC); + XFREE(der, cert->heap, DYNAMIC_TYPE_CERT); + return MEMORY_E; + } + + idx = 0; + ret = wc_EccPublicKeyDecode(der, &idx, eckey, derSz); + if (ret != 0) { + WOLFSSL_MSG("wc_EccPublicKeyDecode failed"); + XFREE(der, cert->heap, DYNAMIC_TYPE_CERT); + wc_ecc_free(eckey); + XFREE(eckey, cert->heap, DYNAMIC_TYPE_ECC); + return PUBLIC_KEY_E; + } +#else + XFREE(der, cert->heap, DYNAMIC_TYPE_CERT); + return PUBLIC_KEY_E; +#endif /* HAVE_ECC */ + } + + XFREE(der, cert->heap, DYNAMIC_TYPE_CERT); + + ret = wc_SetSubjectKeyIdFromPublicKey(cert, rsakey, eckey); + +#ifndef NO_RSA + wc_FreeRsaKey(rsakey); + XFREE(rsakey, cert->heap, DYNAMIC_TYPE_RSA); +#endif +#ifdef HAVE_ECC + wc_ecc_free(eckey); + XFREE(eckey, cert->heap, DYNAMIC_TYPE_ECC); +#endif + return ret; +} + +#endif /* !NO_FILESYSTEM && !NO_ASN_CRYPT */ + +static int SetAuthKeyIdFromDcert(Cert* cert, DecodedCert* decoded) +{ + int ret = 0; + + /* Subject Key Id not found !! */ + if (decoded->extSubjKeyIdSet == 0) { + ret = ASN_NO_SKID; + } + + /* SKID invalid size */ + else if (sizeof(cert->akid) < sizeof(decoded->extSubjKeyId)) { + ret = MEMORY_E; + } + + else { + /* Put the SKID of CA to AKID of certificate */ + XMEMCPY(cert->akid, decoded->extSubjKeyId, KEYID_SIZE); + cert->akidSz = KEYID_SIZE; + } + + return ret; +} + +/* Set AKID from certificate contains in buffer (DER encoded) */ +int wc_SetAuthKeyIdFromCert(Cert *cert, const byte *der, int derSz) +{ + int ret = 0; + + if (cert == NULL) { + ret = BAD_FUNC_ARG; + } + else { + /* Check if decodedCert is cached */ + if (cert->der != der) { + /* Allocate cache for the decoded cert */ + ret = wc_SetCert_LoadDer(cert, der, derSz); + } + + if (ret >= 0) { + ret = SetAuthKeyIdFromDcert(cert, (DecodedCert*)cert->decodedCert); +#ifndef WOLFSSL_CERT_GEN_CACHE + wc_SetCert_Free(cert); +#endif + } + } + + return ret; +} + + +#ifndef NO_FILESYSTEM + +/* Set AKID from certificate file in PEM */ +int wc_SetAuthKeyId(Cert *cert, const char* file) +{ + int ret; + int derSz; + byte* der; + + if (cert == NULL || file == NULL) + return BAD_FUNC_ARG; + + der = (byte*)XMALLOC(EIGHTK_BUF, cert->heap, DYNAMIC_TYPE_CERT); + if (der == NULL) { + WOLFSSL_MSG("wc_SetAuthKeyId OOF Problem"); + return MEMORY_E; + } + + derSz = wc_PemCertToDer(file, der, EIGHTK_BUF); + if (derSz <= 0) + { + XFREE(der, cert->heap, DYNAMIC_TYPE_CERT); + return derSz; + } + + ret = wc_SetAuthKeyIdFromCert(cert, der, derSz); + XFREE(der, cert->heap, DYNAMIC_TYPE_CERT); + + return ret; +} + +#endif /* !NO_FILESYSTEM */ + +/* Set KeyUsage from human readable string */ +int wc_SetKeyUsage(Cert *cert, const char *value) +{ + int ret = 0; + char *token, *str, *ptr; + word32 len; + + if (cert == NULL || value == NULL) + return BAD_FUNC_ARG; + + cert->keyUsage = 0; + + /* duplicate string (including terminator) */ + len = (word32)XSTRLEN(value); + str = (char*)XMALLOC(len+1, cert->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (str == NULL) + return MEMORY_E; + XMEMCPY(str, value, len+1); + + /* parse value, and set corresponding Key Usage value */ + if ((token = XSTRTOK(str, ",", &ptr)) == NULL) { + XFREE(str, cert->heap, DYNAMIC_TYPE_TMP_BUFFER); + return KEYUSAGE_E; + } + while (token != NULL) + { + len = (word32)XSTRLEN(token); + + if (!XSTRNCASECMP(token, "digitalSignature", len)) + cert->keyUsage |= KEYUSE_DIGITAL_SIG; + else if (!XSTRNCASECMP(token, "nonRepudiation", len) || + !XSTRNCASECMP(token, "contentCommitment", len)) + cert->keyUsage |= KEYUSE_CONTENT_COMMIT; + else if (!XSTRNCASECMP(token, "keyEncipherment", len)) + cert->keyUsage |= KEYUSE_KEY_ENCIPHER; + else if (!XSTRNCASECMP(token, "dataEncipherment", len)) + cert->keyUsage |= KEYUSE_DATA_ENCIPHER; + else if (!XSTRNCASECMP(token, "keyAgreement", len)) + cert->keyUsage |= KEYUSE_KEY_AGREE; + else if (!XSTRNCASECMP(token, "keyCertSign", len)) + cert->keyUsage |= KEYUSE_KEY_CERT_SIGN; + else if (!XSTRNCASECMP(token, "cRLSign", len)) + cert->keyUsage |= KEYUSE_CRL_SIGN; + else if (!XSTRNCASECMP(token, "encipherOnly", len)) + cert->keyUsage |= KEYUSE_ENCIPHER_ONLY; + else if (!XSTRNCASECMP(token, "decipherOnly", len)) + cert->keyUsage |= KEYUSE_DECIPHER_ONLY; + else { + ret = KEYUSAGE_E; + break; + } + + token = XSTRTOK(NULL, ",", &ptr); + } + + XFREE(str, cert->heap, DYNAMIC_TYPE_TMP_BUFFER); + return ret; +} + +/* Set ExtendedKeyUsage from human readable string */ +int wc_SetExtKeyUsage(Cert *cert, const char *value) +{ + int ret = 0; + char *token, *str, *ptr; + word32 len; + + if (cert == NULL || value == NULL) + return BAD_FUNC_ARG; + + cert->extKeyUsage = 0; + + /* duplicate string (including terminator) */ + len = (word32)XSTRLEN(value); + str = (char*)XMALLOC(len+1, cert->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (str == NULL) + return MEMORY_E; + XMEMCPY(str, value, len+1); + + /* parse value, and set corresponding Key Usage value */ + if ((token = XSTRTOK(str, ",", &ptr)) == NULL) { + XFREE(str, cert->heap, DYNAMIC_TYPE_TMP_BUFFER); + return EXTKEYUSAGE_E; + } + + while (token != NULL) + { + len = (word32)XSTRLEN(token); + + if (!XSTRNCASECMP(token, "any", len)) + cert->extKeyUsage |= EXTKEYUSE_ANY; + else if (!XSTRNCASECMP(token, "serverAuth", len)) + cert->extKeyUsage |= EXTKEYUSE_SERVER_AUTH; + else if (!XSTRNCASECMP(token, "clientAuth", len)) + cert->extKeyUsage |= EXTKEYUSE_CLIENT_AUTH; + else if (!XSTRNCASECMP(token, "codeSigning", len)) + cert->extKeyUsage |= EXTKEYUSE_CODESIGN; + else if (!XSTRNCASECMP(token, "emailProtection", len)) + cert->extKeyUsage |= EXTKEYUSE_EMAILPROT; + else if (!XSTRNCASECMP(token, "timeStamping", len)) + cert->extKeyUsage |= EXTKEYUSE_TIMESTAMP; + else if (!XSTRNCASECMP(token, "OCSPSigning", len)) + cert->extKeyUsage |= EXTKEYUSE_OCSP_SIGN; + else { + ret = EXTKEYUSAGE_E; + break; + } + + token = XSTRTOK(NULL, ",", &ptr); + } + + XFREE(str, cert->heap, DYNAMIC_TYPE_TMP_BUFFER); + return ret; +} + +#ifdef WOLFSSL_EKU_OID +/* + * cert structure to set EKU oid in + * oid the oid in byte representation + * sz size of oid buffer + * idx index of array to place oid + * + * returns 0 on success + */ +int wc_SetExtKeyUsageOID(Cert *cert, const char *in, word32 sz, byte idx, + void* heap) +{ + byte oid[MAX_OID_SZ]; + word32 oidSz = MAX_OID_SZ; + + if (idx >= CTC_MAX_EKU_NB || sz >= CTC_MAX_EKU_OID_SZ) { + WOLFSSL_MSG("Either idx or sz was too large"); + return BAD_FUNC_ARG; + } + + if (EncodePolicyOID(oid, &oidSz, in, heap) != 0) { + return BUFFER_E; + } + + XMEMCPY(cert->extKeyUsageOID[idx], oid, oidSz); + cert->extKeyUsageOIDSz[idx] = oidSz; + cert->extKeyUsage |= EXTKEYUSE_USER; + + return 0; +} +#endif /* WOLFSSL_EKU_OID */ +#endif /* WOLFSSL_CERT_EXT */ + + +#ifdef WOLFSSL_ALT_NAMES + +static int SetAltNamesFromDcert(Cert* cert, DecodedCert* decoded) +{ + int ret = 0; + byte tag; + + if (decoded->extensions) { + int length; + word32 maxExtensionsIdx; + + decoded->srcIdx = decoded->extensionsIdx; + if (GetASNTag(decoded->source, &decoded->srcIdx, &tag, decoded->maxIdx) + != 0) { + return ASN_PARSE_E; + } + + if (tag != ASN_EXTENSIONS) { + ret = ASN_PARSE_E; + } + else if (GetLength(decoded->source, &decoded->srcIdx, &length, + decoded->maxIdx) < 0) { + ret = ASN_PARSE_E; + } + else if (GetSequence(decoded->source, &decoded->srcIdx, &length, + decoded->maxIdx) < 0) { + ret = ASN_PARSE_E; + } + else { + maxExtensionsIdx = decoded->srcIdx + length; + + while (decoded->srcIdx < maxExtensionsIdx) { + word32 oid; + word32 startIdx = decoded->srcIdx; + word32 tmpIdx; + + if (GetSequence(decoded->source, &decoded->srcIdx, &length, + decoded->maxIdx) < 0) { + ret = ASN_PARSE_E; + break; + } + + tmpIdx = decoded->srcIdx; + decoded->srcIdx = startIdx; + + if (GetAlgoId(decoded->source, &decoded->srcIdx, &oid, + oidCertExtType, decoded->maxIdx) < 0) { + ret = ASN_PARSE_E; + break; + } + + if (oid == ALT_NAMES_OID) { + cert->altNamesSz = length + (tmpIdx - startIdx); + + if (cert->altNamesSz < (int)sizeof(cert->altNames)) + XMEMCPY(cert->altNames, &decoded->source[startIdx], + cert->altNamesSz); + else { + cert->altNamesSz = 0; + WOLFSSL_MSG("AltNames extensions too big"); + ret = ALT_NAME_E; + break; + } + } + decoded->srcIdx = tmpIdx + length; + } + } + } + + return ret; +} + +#ifndef NO_FILESYSTEM + +/* Set Alt Names from der cert, return 0 on success */ +static int SetAltNamesFromCert(Cert* cert, const byte* der, int derSz) +{ + int ret; +#ifdef WOLFSSL_SMALL_STACK + DecodedCert* decoded; +#else + DecodedCert decoded[1]; +#endif + + if (derSz < 0) + return derSz; + +#ifdef WOLFSSL_SMALL_STACK + decoded = (DecodedCert*)XMALLOC(sizeof(DecodedCert), cert->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (decoded == NULL) + return MEMORY_E; +#endif + + InitDecodedCert(decoded, der, derSz, NULL); + ret = ParseCertRelative(decoded, CA_TYPE, NO_VERIFY, 0); + + if (ret < 0) { + WOLFSSL_MSG("ParseCertRelative error"); + } + else { + ret = SetAltNamesFromDcert(cert, decoded); + } + + FreeDecodedCert(decoded); +#ifdef WOLFSSL_SMALL_STACK + XFREE(decoded, cert->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return ret < 0 ? ret : 0; +} + +#endif + +static int SetDatesFromDcert(Cert* cert, DecodedCert* decoded) +{ + int ret = 0; + + if (decoded->beforeDate == NULL || decoded->afterDate == NULL) { + WOLFSSL_MSG("Couldn't extract dates"); + ret = -1; + } + else if (decoded->beforeDateLen > MAX_DATE_SIZE || + decoded->afterDateLen > MAX_DATE_SIZE) { + WOLFSSL_MSG("Bad date size"); + ret = -1; + } + else { + XMEMCPY(cert->beforeDate, decoded->beforeDate, decoded->beforeDateLen); + XMEMCPY(cert->afterDate, decoded->afterDate, decoded->afterDateLen); + + cert->beforeDateSz = decoded->beforeDateLen; + cert->afterDateSz = decoded->afterDateLen; + } + + return ret; +} + +#endif /* WOLFSSL_ALT_NAMES */ + +static void SetNameFromDcert(CertName* cn, DecodedCert* decoded) +{ + int sz; + + if (decoded->subjectCN) { + sz = (decoded->subjectCNLen < CTC_NAME_SIZE) ? decoded->subjectCNLen + : CTC_NAME_SIZE - 1; + XSTRNCPY(cn->commonName, decoded->subjectCN, sz); + cn->commonName[sz] = '\0'; + cn->commonNameEnc = decoded->subjectCNEnc; + } + if (decoded->subjectC) { + sz = (decoded->subjectCLen < CTC_NAME_SIZE) ? decoded->subjectCLen + : CTC_NAME_SIZE - 1; + XSTRNCPY(cn->country, decoded->subjectC, sz); + cn->country[sz] = '\0'; + cn->countryEnc = decoded->subjectCEnc; + } + if (decoded->subjectST) { + sz = (decoded->subjectSTLen < CTC_NAME_SIZE) ? decoded->subjectSTLen + : CTC_NAME_SIZE - 1; + XSTRNCPY(cn->state, decoded->subjectST, sz); + cn->state[sz] = '\0'; + cn->stateEnc = decoded->subjectSTEnc; + } + if (decoded->subjectL) { + sz = (decoded->subjectLLen < CTC_NAME_SIZE) ? decoded->subjectLLen + : CTC_NAME_SIZE - 1; + XSTRNCPY(cn->locality, decoded->subjectL, sz); + cn->locality[sz] = '\0'; + cn->localityEnc = decoded->subjectLEnc; + } + if (decoded->subjectO) { + sz = (decoded->subjectOLen < CTC_NAME_SIZE) ? decoded->subjectOLen + : CTC_NAME_SIZE - 1; + XSTRNCPY(cn->org, decoded->subjectO, sz); + cn->org[sz] = '\0'; + cn->orgEnc = decoded->subjectOEnc; + } + if (decoded->subjectOU) { + sz = (decoded->subjectOULen < CTC_NAME_SIZE) ? decoded->subjectOULen + : CTC_NAME_SIZE - 1; + XSTRNCPY(cn->unit, decoded->subjectOU, sz); + cn->unit[sz] = '\0'; + cn->unitEnc = decoded->subjectOUEnc; + } + if (decoded->subjectSN) { + sz = (decoded->subjectSNLen < CTC_NAME_SIZE) ? decoded->subjectSNLen + : CTC_NAME_SIZE - 1; + XSTRNCPY(cn->sur, decoded->subjectSN, sz); + cn->sur[sz] = '\0'; + cn->surEnc = decoded->subjectSNEnc; + } + if (decoded->subjectSND) { + sz = (decoded->subjectSNDLen < CTC_NAME_SIZE) ? decoded->subjectSNDLen + : CTC_NAME_SIZE - 1; + XSTRNCPY(cn->serialDev, decoded->subjectSND, sz); + cn->serialDev[sz] = '\0'; + cn->serialDevEnc = decoded->subjectSNDEnc; + } +#ifdef WOLFSSL_CERT_EXT + if (decoded->subjectBC) { + sz = (decoded->subjectBCLen < CTC_NAME_SIZE) ? decoded->subjectBCLen + : CTC_NAME_SIZE - 1; + XSTRNCPY(cn->busCat, decoded->subjectBC, sz); + cn->busCat[sz] = '\0'; + cn->busCatEnc = decoded->subjectBCEnc; + } + if (decoded->subjectJC) { + sz = (decoded->subjectJCLen < CTC_NAME_SIZE) ? decoded->subjectJCLen + : CTC_NAME_SIZE - 1; + XSTRNCPY(cn->joiC, decoded->subjectJC, sz); + cn->joiC[sz] = '\0'; + cn->joiCEnc = decoded->subjectJCEnc; + } + if (decoded->subjectJS) { + sz = (decoded->subjectJSLen < CTC_NAME_SIZE) ? decoded->subjectJSLen + : CTC_NAME_SIZE - 1; + XSTRNCPY(cn->joiSt, decoded->subjectJS, sz); + cn->joiSt[sz] = '\0'; + cn->joiStEnc = decoded->subjectJSEnc; + } +#endif + if (decoded->subjectEmail) { + sz = (decoded->subjectEmailLen < CTC_NAME_SIZE) + ? decoded->subjectEmailLen : CTC_NAME_SIZE - 1; + XSTRNCPY(cn->email, decoded->subjectEmail, sz); + cn->email[sz] = '\0'; + } +} + +#ifndef NO_FILESYSTEM + +/* Set cn name from der buffer, return 0 on success */ +static int SetNameFromCert(CertName* cn, const byte* der, int derSz) +{ + int ret; +#ifdef WOLFSSL_SMALL_STACK + DecodedCert* decoded; +#else + DecodedCert decoded[1]; +#endif + + if (derSz < 0) + return derSz; + +#ifdef WOLFSSL_SMALL_STACK + decoded = (DecodedCert*)XMALLOC(sizeof(DecodedCert), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (decoded == NULL) + return MEMORY_E; +#endif + + InitDecodedCert(decoded, der, derSz, NULL); + ret = ParseCertRelative(decoded, CA_TYPE, NO_VERIFY, 0); + + if (ret < 0) { + WOLFSSL_MSG("ParseCertRelative error"); + } + else { + SetNameFromDcert(cn, decoded); + } + + FreeDecodedCert(decoded); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(decoded, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return ret < 0 ? ret : 0; +} + +/* Set cert issuer from issuerFile in PEM */ +int wc_SetIssuer(Cert* cert, const char* issuerFile) +{ + int ret; + int derSz; + byte* der; + + if (cert == NULL) { + return BAD_FUNC_ARG; + } + + der = (byte*)XMALLOC(EIGHTK_BUF, cert->heap, DYNAMIC_TYPE_CERT); + if (der == NULL) { + WOLFSSL_MSG("wc_SetIssuer OOF Problem"); + return MEMORY_E; + } + derSz = wc_PemCertToDer(issuerFile, der, EIGHTK_BUF); + cert->selfSigned = 0; + ret = SetNameFromCert(&cert->issuer, der, derSz); + XFREE(der, cert->heap, DYNAMIC_TYPE_CERT); + + return ret; +} + + +/* Set cert subject from subjectFile in PEM */ +int wc_SetSubject(Cert* cert, const char* subjectFile) +{ + int ret; + int derSz; + byte* der; + + if (cert == NULL) { + return BAD_FUNC_ARG; + } + + der = (byte*)XMALLOC(EIGHTK_BUF, cert->heap, DYNAMIC_TYPE_CERT); + if (der == NULL) { + WOLFSSL_MSG("wc_SetSubject OOF Problem"); + return MEMORY_E; + } + + derSz = wc_PemCertToDer(subjectFile, der, EIGHTK_BUF); + ret = SetNameFromCert(&cert->subject, der, derSz); + XFREE(der, cert->heap, DYNAMIC_TYPE_CERT); + + return ret; +} + +#ifdef WOLFSSL_ALT_NAMES + +/* Set alt names from file in PEM */ +int wc_SetAltNames(Cert* cert, const char* file) +{ + int ret; + int derSz; + byte* der; + + if (cert == NULL) { + return BAD_FUNC_ARG; + } + + der = (byte*)XMALLOC(EIGHTK_BUF, cert->heap, DYNAMIC_TYPE_CERT); + if (der == NULL) { + WOLFSSL_MSG("wc_SetAltNames OOF Problem"); + return MEMORY_E; + } + derSz = wc_PemCertToDer(file, der, EIGHTK_BUF); + ret = SetAltNamesFromCert(cert, der, derSz); + XFREE(der, cert->heap, DYNAMIC_TYPE_CERT); + + return ret; +} + +#endif /* WOLFSSL_ALT_NAMES */ + +#endif /* !NO_FILESYSTEM */ + +/* Set cert issuer from DER buffer */ +int wc_SetIssuerBuffer(Cert* cert, const byte* der, int derSz) +{ + int ret = 0; + + if (cert == NULL) { + ret = BAD_FUNC_ARG; + } + else { + cert->selfSigned = 0; + + /* Check if decodedCert is cached */ + if (cert->der != der) { + /* Allocate cache for the decoded cert */ + ret = wc_SetCert_LoadDer(cert, der, derSz); + } + + if (ret >= 0) { + SetNameFromDcert(&cert->issuer, (DecodedCert*)cert->decodedCert); +#ifndef WOLFSSL_CERT_GEN_CACHE + wc_SetCert_Free(cert); +#endif + } + } + + return ret; +} + +/* Set cert subject from DER buffer */ +int wc_SetSubjectBuffer(Cert* cert, const byte* der, int derSz) +{ + int ret = 0; + + if (cert == NULL) { + ret = BAD_FUNC_ARG; + } + else { + /* Check if decodedCert is cached */ + if (cert->der != der) { + /* Allocate cache for the decoded cert */ + ret = wc_SetCert_LoadDer(cert, der, derSz); + } + + if (ret >= 0) { + SetNameFromDcert(&cert->subject, (DecodedCert*)cert->decodedCert); +#ifndef WOLFSSL_CERT_GEN_CACHE + wc_SetCert_Free(cert); +#endif + } + } + + return ret; +} +#ifdef WOLFSSL_CERT_EXT +/* Set cert raw subject from DER buffer */ +int wc_SetSubjectRaw(Cert* cert, const byte* der, int derSz) +{ + int ret = 0; + + if (cert == NULL) { + ret = BAD_FUNC_ARG; + } + else { + /* Check if decodedCert is cached */ + if (cert->der != der) { + /* Allocate cache for the decoded cert */ + ret = wc_SetCert_LoadDer(cert, der, derSz); + } + + if (ret >= 0) { + if ((((DecodedCert*)cert->decodedCert)->subjectRaw) && + (((DecodedCert*)cert->decodedCert)->subjectRawLen <= + (int)sizeof(CertName))) { + XMEMCPY(cert->sbjRaw, + ((DecodedCert*)cert->decodedCert)->subjectRaw, + ((DecodedCert*)cert->decodedCert)->subjectRawLen); + } +#ifndef WOLFSSL_CERT_GEN_CACHE + wc_SetCert_Free(cert); +#endif + } + } + + return ret; +} + +/* Set cert raw issuer from DER buffer */ +int wc_SetIssuerRaw(Cert* cert, const byte* der, int derSz) +{ + int ret = 0; + + if (cert == NULL) { + ret = BAD_FUNC_ARG; + } + else { + /* Check if decodedCert is cached */ + if (cert->der != der) { + /* Allocate cache for the decoded cert */ + ret = wc_SetCert_LoadDer(cert, der, derSz); + } + + if (ret >= 0) { + if ((((DecodedCert*)cert->decodedCert)->issuerRaw) && + (((DecodedCert*)cert->decodedCert)->issuerRawLen <= + (int)sizeof(CertName))) { + XMEMCPY(cert->issRaw, + ((DecodedCert*)cert->decodedCert)->issuerRaw, + ((DecodedCert*)cert->decodedCert)->issuerRawLen); + } +#ifndef WOLFSSL_CERT_GEN_CACHE + wc_SetCert_Free(cert); +#endif + } + } + return ret; +} +#endif + +#ifdef WOLFSSL_ALT_NAMES + +/* Set cert alt names from DER buffer */ +int wc_SetAltNamesBuffer(Cert* cert, const byte* der, int derSz) +{ + int ret = 0; + + if (cert == NULL) { + ret = BAD_FUNC_ARG; + } + else { + /* Check if decodedCert is cached */ + if (cert->der != der) { + /* Allocate cache for the decoded cert */ + ret = wc_SetCert_LoadDer(cert, der, derSz); + } + + if (ret >= 0) { + ret = SetAltNamesFromDcert(cert, (DecodedCert*)cert->decodedCert); +#ifndef WOLFSSL_CERT_GEN_CACHE + wc_SetCert_Free(cert); +#endif + } + } + + return(ret); +} + +/* Set cert dates from DER buffer */ +int wc_SetDatesBuffer(Cert* cert, const byte* der, int derSz) +{ + int ret = 0; + + if (cert == NULL) { + ret = BAD_FUNC_ARG; + } + else { + /* Check if decodedCert is cached */ + if (cert->der != der) { + /* Allocate cache for the decoded cert */ + ret = wc_SetCert_LoadDer(cert, der, derSz); + } + + if (ret >= 0) { + ret = SetDatesFromDcert(cert, (DecodedCert*)cert->decodedCert); +#ifndef WOLFSSL_CERT_GEN_CACHE + wc_SetCert_Free(cert); +#endif + } + } + + return(ret); +} + +#endif /* WOLFSSL_ALT_NAMES */ + +#endif /* WOLFSSL_CERT_GEN */ + +#if (defined(WOLFSSL_CERT_GEN) && defined(WOLFSSL_CERT_EXT)) \ + || defined(OPENSSL_EXTRA) +/* Encode OID string representation to ITU-T X.690 format */ +int EncodePolicyOID(byte *out, word32 *outSz, const char *in, void* heap) +{ + word32 val, idx = 0, nb_val; + char *token, *str, *ptr; + word32 len; + + (void)heap; + + if (out == NULL || outSz == NULL || *outSz < 2 || in == NULL) + return BAD_FUNC_ARG; + + /* duplicate string (including terminator) */ + len = (word32)XSTRLEN(in); + str = (char *)XMALLOC(len+1, heap, DYNAMIC_TYPE_TMP_BUFFER); + if (str == NULL) + return MEMORY_E; + XMEMCPY(str, in, len+1); + + nb_val = 0; + + /* parse value, and set corresponding Policy OID value */ + token = XSTRTOK(str, ".", &ptr); + while (token != NULL) + { + val = (word32)XATOI(token); + + if (nb_val == 0) { + if (val > 2) { + XFREE(str, heap, DYNAMIC_TYPE_TMP_BUFFER); + return ASN_OBJECT_ID_E; + } + + out[idx] = (byte)(40 * val); + } + else if (nb_val == 1) { + if (val > 127) { + XFREE(str, heap, DYNAMIC_TYPE_TMP_BUFFER); + return ASN_OBJECT_ID_E; + } + + if (idx > *outSz) { + XFREE(str, heap, DYNAMIC_TYPE_TMP_BUFFER); + return BUFFER_E; + } + + out[idx++] += (byte)val; + } + else { + word32 tb = 0, x; + int i = 0; + byte oid[MAX_OID_SZ]; + + while (val >= 128) { + x = val % 128; + val /= 128; + oid[i++] = (byte) (((tb++) ? 0x80 : 0) | x); + } + + if ((idx+(word32)i) > *outSz) { + XFREE(str, heap, DYNAMIC_TYPE_TMP_BUFFER); + return BUFFER_E; + } + + oid[i] = (byte) (((tb++) ? 0x80 : 0) | val); + + /* push value in the right order */ + while (i >= 0) + out[idx++] = oid[i--]; + } + + token = XSTRTOK(NULL, ".", &ptr); + nb_val++; + } + + *outSz = idx; + + XFREE(str, heap, DYNAMIC_TYPE_TMP_BUFFER); + return 0; +} +#endif /* WOLFSSL_CERT_EXT || OPENSSL_EXTRA */ + +#endif /* !NO_CERTS */ + +#if !defined(NO_DH) && (defined(WOLFSSL_QT) || defined(OPENSSL_ALL)) +/* Helper function for wolfSSL_i2d_DHparams */ +int StoreDHparams(byte* out, word32* outLen, mp_int* p, mp_int* g) +{ + word32 idx = 0; + int pSz; + int gSz; + unsigned int tmp; + word32 headerSz = 4; /* 2*ASN_TAG + 2*LEN(ENUM) */ + + /* If the leading bit on the INTEGER is a 1, add a leading zero */ + int pLeadingZero = mp_leading_bit(p); + int gLeadingZero = mp_leading_bit(g); + int pLen = mp_unsigned_bin_size(p); + int gLen = mp_unsigned_bin_size(g); + + WOLFSSL_ENTER("StoreDHparams"); + if (out == NULL) { + WOLFSSL_MSG("Null buffer error"); + return BUFFER_E; + } + + tmp = pLeadingZero + gLeadingZero + pLen + gLen; + if (*outLen < (tmp + headerSz)) { + return BUFFER_E; + } + + /* Set sequence */ + idx = SetSequence(tmp + headerSz + 2, out); + + /* Encode p */ + pSz = SetASNIntMP(p, -1, &out[idx]); + if (pSz < 0) { + WOLFSSL_MSG("SetASNIntMP failed"); + return pSz; + } + idx += pSz; + + /* Encode g */ + gSz = SetASNIntMP(g, -1, &out[idx]); + if (gSz < 0) { + WOLFSSL_MSG("SetASNIntMP failed"); + return gSz; + } + idx += gSz; + + *outLen = idx; + + return 0; +} +#endif /* !NO_DH && WOLFSSL_QT || OPENSSL_ALL */ + +#ifdef HAVE_ECC + +/* Der Encode r & s ints into out, outLen is (in/out) size */ +int StoreECC_DSA_Sig(byte* out, word32* outLen, mp_int* r, mp_int* s) +{ + word32 idx = 0; + int rSz; /* encoding size */ + int sSz; + word32 headerSz = 4; /* 2*ASN_TAG + 2*LEN(ENUM) */ + + /* If the leading bit on the INTEGER is a 1, add a leading zero */ + int rLeadingZero = mp_leading_bit(r); + int sLeadingZero = mp_leading_bit(s); + int rLen = mp_unsigned_bin_size(r); /* big int size */ + int sLen = mp_unsigned_bin_size(s); + + if (*outLen < (rLen + rLeadingZero + sLen + sLeadingZero + + headerSz + 2)) /* SEQ_TAG + LEN(ENUM) */ + return BUFFER_E; + + idx = SetSequence(rLen + rLeadingZero + sLen+sLeadingZero + headerSz, out); + + /* store r */ + rSz = SetASNIntMP(r, -1, &out[idx]); + if (rSz < 0) + return rSz; + idx += rSz; + + /* store s */ + sSz = SetASNIntMP(s, -1, &out[idx]); + if (sSz < 0) + return sSz; + idx += sSz; + + *outLen = idx; + + return 0; +} + + +/* Der Decode ECC-DSA Signature, r & s stored as big ints */ +int DecodeECC_DSA_Sig(const byte* sig, word32 sigLen, mp_int* r, mp_int* s) +{ + word32 idx = 0; + int len = 0; + + if (GetSequence(sig, &idx, &len, sigLen) < 0) { + return ASN_ECC_KEY_E; + } + +#ifndef NO_STRICT_ECDSA_LEN + /* enable strict length checking for signature */ + if (sigLen != idx + (word32)len) { + return ASN_ECC_KEY_E; + } +#else + /* allow extra signature bytes at end */ + if ((word32)len > (sigLen - idx)) { + return ASN_ECC_KEY_E; + } +#endif + + if (GetInt(r, sig, &idx, sigLen) < 0) { + return ASN_ECC_KEY_E; + } + + if (GetInt(s, sig, &idx, sigLen) < 0) { + return ASN_ECC_KEY_E; + } + + return 0; +} + + +int wc_EccPrivateKeyDecode(const byte* input, word32* inOutIdx, ecc_key* key, + word32 inSz) +{ + word32 oidSum; + int version, length; + int privSz, pubSz = 0; + byte b; + int ret = 0; + int curve_id = ECC_CURVE_DEF; +#ifdef WOLFSSL_SMALL_STACK + byte* priv; + byte* pub; +#else + byte priv[ECC_MAXSIZE+1]; + byte pub[2*(ECC_MAXSIZE+1)]; /* public key has two parts plus header */ +#endif + byte* pubData = NULL; + + if (input == NULL || inOutIdx == NULL || key == NULL || inSz == 0) + return BAD_FUNC_ARG; + + if (GetSequence(input, inOutIdx, &length, inSz) < 0) + return ASN_PARSE_E; + + if (GetMyVersion(input, inOutIdx, &version, inSz) < 0) + return ASN_PARSE_E; + + if (*inOutIdx >= inSz) + return ASN_PARSE_E; + + b = input[*inOutIdx]; + *inOutIdx += 1; + + /* priv type */ + if (b != 4 && b != 6 && b != 7) + return ASN_PARSE_E; + + if (GetLength(input, inOutIdx, &length, inSz) < 0) + return ASN_PARSE_E; + + if (length > ECC_MAXSIZE) + return BUFFER_E; + +#ifdef WOLFSSL_SMALL_STACK + priv = (byte*)XMALLOC(ECC_MAXSIZE+1, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (priv == NULL) + return MEMORY_E; + + pub = (byte*)XMALLOC(2*(ECC_MAXSIZE+1), key->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (pub == NULL) { + XFREE(priv, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + return MEMORY_E; + } +#endif + + /* priv key */ + privSz = length; + XMEMCPY(priv, &input[*inOutIdx], privSz); + *inOutIdx += length; + + if ((*inOutIdx + 1) < inSz) { + /* prefix 0, may have */ + b = input[*inOutIdx]; + if (b == ECC_PREFIX_0) { + *inOutIdx += 1; + + if (GetLength(input, inOutIdx, &length, inSz) <= 0) + ret = ASN_PARSE_E; + else { + ret = GetObjectId(input, inOutIdx, &oidSum, oidIgnoreType, + inSz); + if (ret == 0) { + if ((ret = CheckCurve(oidSum)) < 0) + ret = ECC_CURVE_OID_E; + else { + curve_id = ret; + ret = 0; + } + } + } + } + } + + if (ret == 0 && (*inOutIdx + 1) < inSz) { + /* prefix 1 */ + b = input[*inOutIdx]; + *inOutIdx += 1; + + if (b != ECC_PREFIX_1) { + ret = ASN_ECC_KEY_E; + } + else if (GetLength(input, inOutIdx, &length, inSz) <= 0) { + ret = ASN_PARSE_E; + } + else { + /* key header */ + ret = CheckBitString(input, inOutIdx, &length, inSz, 0, NULL); + if (ret == 0) { + /* pub key */ + pubSz = length; + if (pubSz < 2*(ECC_MAXSIZE+1)) { + XMEMCPY(pub, &input[*inOutIdx], pubSz); + *inOutIdx += length; + pubData = pub; + } + else + ret = BUFFER_E; + } + } + } + + if (ret == 0) { + ret = wc_ecc_import_private_key_ex(priv, privSz, pubData, pubSz, key, + curve_id); + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(priv, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(pub, key->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return ret; +} + + +#ifdef WOLFSSL_CUSTOM_CURVES +static void ByteToHex(byte n, char* str) +{ + const char hexChar[] = { '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; + + str[0] = hexChar[n >> 4]; + str[1] = hexChar[n & 0xf]; +} + +/* returns 0 on success */ +static int ASNToHexString(const byte* input, word32* inOutIdx, char** out, + word32 inSz, void* heap, int heapType) +{ + int len; + int i; + char* str; + word32 localIdx; + byte tag; + + if (*inOutIdx >= inSz) { + return BUFFER_E; + } + + localIdx = *inOutIdx; + if (GetASNTag(input, &localIdx, &tag, inSz) == 0 && tag == ASN_INTEGER) { + if (GetASNInt(input, inOutIdx, &len, inSz) < 0) + return ASN_PARSE_E; + } + else { + if (GetOctetString(input, inOutIdx, &len, inSz) < 0) + return ASN_PARSE_E; + } + + str = (char*)XMALLOC(len * 2 + 1, heap, heapType); + for (i=0; i MAX_ECC_STRING) { + WOLFSSL_MSG("ECC Param too large for buffer"); + ret = BUFFER_E; + } + else { + XSTRNCPY(*dst, src, length); + } + XFREE(src, key->heap, DYNAMIC_TYPE_ECC_BUFFER); +#endif + + return ret; +} +#endif /* WOLFSSL_CUSTOM_CURVES */ + +int wc_EccPublicKeyDecode(const byte* input, word32* inOutIdx, + ecc_key* key, word32 inSz) +{ + int length; + int ret; + int curve_id = ECC_CURVE_DEF; + word32 oidSum, localIdx; + byte tag; + + if (input == NULL || inOutIdx == NULL || key == NULL || inSz == 0) + return BAD_FUNC_ARG; + + if (GetSequence(input, inOutIdx, &length, inSz) < 0) + return ASN_PARSE_E; + + if (GetSequence(input, inOutIdx, &length, inSz) < 0) + return ASN_PARSE_E; + + ret = SkipObjectId(input, inOutIdx, inSz); + if (ret != 0) + return ret; + + if (*inOutIdx >= inSz) { + return BUFFER_E; + } + + localIdx = *inOutIdx; + if (GetASNTag(input, &localIdx, &tag, inSz) == 0 && + tag == (ASN_SEQUENCE | ASN_CONSTRUCTED)) { +#ifdef WOLFSSL_CUSTOM_CURVES + ecc_set_type* curve; + int len; + char* point = NULL; + + ret = 0; + + curve = (ecc_set_type*)XMALLOC(sizeof(*curve), key->heap, + DYNAMIC_TYPE_ECC_BUFFER); + if (curve == NULL) + ret = MEMORY_E; + + if (ret == 0) { + static const char customName[] = "Custom"; + XMEMSET(curve, 0, sizeof(*curve)); + #ifndef WOLFSSL_ECC_CURVE_STATIC + curve->name = customName; + #else + XMEMCPY((void*)curve->name, customName, sizeof(customName)); + #endif + curve->id = ECC_CURVE_CUSTOM; + + if (GetSequence(input, inOutIdx, &length, inSz) < 0) + ret = ASN_PARSE_E; + } + + if (ret == 0) { + GetInteger7Bit(input, inOutIdx, inSz); + if (GetSequence(input, inOutIdx, &length, inSz) < 0) + ret = ASN_PARSE_E; + } + if (ret == 0) { + char* p = NULL; + SkipObjectId(input, inOutIdx, inSz); + ret = ASNToHexString(input, inOutIdx, &p, inSz, + key->heap, DYNAMIC_TYPE_ECC_BUFFER); + if (ret == 0) + ret = EccKeyParamCopy((char**)&curve->prime, p); + } + if (ret == 0) { + curve->size = (int)XSTRLEN(curve->prime) / 2; + + if (GetSequence(input, inOutIdx, &length, inSz) < 0) + ret = ASN_PARSE_E; + } + if (ret == 0) { + char* af = NULL; + ret = ASNToHexString(input, inOutIdx, &af, inSz, + key->heap, DYNAMIC_TYPE_ECC_BUFFER); + if (ret == 0) + ret = EccKeyParamCopy((char**)&curve->Af, af); + } + if (ret == 0) { + char* bf = NULL; + ret = ASNToHexString(input, inOutIdx, &bf, inSz, + key->heap, DYNAMIC_TYPE_ECC_BUFFER); + if (ret == 0) + ret = EccKeyParamCopy((char**)&curve->Bf, bf); + } + if (ret == 0) { + localIdx = *inOutIdx; + if (*inOutIdx < inSz && GetASNTag(input, &localIdx, &tag, inSz) + == 0 && tag == ASN_BIT_STRING) { + len = 0; + ret = GetASNHeader(input, ASN_BIT_STRING, inOutIdx, &len, inSz); + *inOutIdx += len; + } + } + if (ret == 0) { + ret = ASNToHexString(input, inOutIdx, (char**)&point, inSz, + key->heap, DYNAMIC_TYPE_ECC_BUFFER); + + /* sanity check that point buffer is not smaller than the expected + * size to hold ( 0 4 || Gx || Gy ) + * where Gx and Gy are each the size of curve->size * 2 */ + if (ret == 0 && (int)XSTRLEN(point) < (curve->size * 4) + 2) { + XFREE(point, key->heap, DYNAMIC_TYPE_ECC_BUFFER); + ret = BUFFER_E; + } + } + if (ret == 0) { + #ifndef WOLFSSL_ECC_CURVE_STATIC + curve->Gx = (const char*)XMALLOC(curve->size * 2 + 2, key->heap, + DYNAMIC_TYPE_ECC_BUFFER); + curve->Gy = (const char*)XMALLOC(curve->size * 2 + 2, key->heap, + DYNAMIC_TYPE_ECC_BUFFER); + if (curve->Gx == NULL || curve->Gy == NULL) { + XFREE(point, key->heap, DYNAMIC_TYPE_ECC_BUFFER); + ret = MEMORY_E; + } + #else + if (curve->size * 2 + 2 > MAX_ECC_STRING) { + WOLFSSL_MSG("curve size is too large to fit in buffer"); + ret = BUFFER_E; + } + #endif + } + if (ret == 0) { + char* o = NULL; + + XMEMCPY((char*)curve->Gx, point + 2, curve->size * 2); + XMEMCPY((char*)curve->Gy, point + curve->size * 2 + 2, + curve->size * 2); + ((char*)curve->Gx)[curve->size * 2] = '\0'; + ((char*)curve->Gy)[curve->size * 2] = '\0'; + XFREE(point, key->heap, DYNAMIC_TYPE_ECC_BUFFER); + ret = ASNToHexString(input, inOutIdx, &o, inSz, + key->heap, DYNAMIC_TYPE_ECC_BUFFER); + if (ret == 0) + ret = EccKeyParamCopy((char**)&curve->order, o); + } + if (ret == 0) { + curve->cofactor = GetInteger7Bit(input, inOutIdx, inSz); + + #ifndef WOLFSSL_ECC_CURVE_STATIC + curve->oid = NULL; + #else + XMEMSET((void*)curve->oid, 0, sizeof(curve->oid)); + #endif + curve->oidSz = 0; + curve->oidSum = 0; + + if (wc_ecc_set_custom_curve(key, curve) < 0) { + ret = ASN_PARSE_E; + } + #ifdef WOLFSSL_CUSTOM_CURVES + key->deallocSet = 1; + #endif + curve = NULL; + } + if (curve != NULL) + wc_ecc_free_curve(curve, key->heap); + + if (ret < 0) + return ret; +#else + return ASN_PARSE_E; +#endif /* WOLFSSL_CUSTOM_CURVES */ + } + else { + /* ecc params information */ + ret = GetObjectId(input, inOutIdx, &oidSum, oidIgnoreType, inSz); + if (ret != 0) + return ret; + + /* get curve id */ + curve_id = wc_ecc_get_oid(oidSum, NULL, 0); + if (curve_id < 0) + return ECC_CURVE_OID_E; + } + + /* key header */ + ret = CheckBitString(input, inOutIdx, &length, inSz, 1, NULL); + if (ret != 0) + return ret; + + /* This is the raw point data compressed or uncompressed. */ + if (wc_ecc_import_x963_ex(input + *inOutIdx, length, key, + curve_id) != 0) { + return ASN_ECC_KEY_E; + } + + *inOutIdx += length; + + return 0; +} + +#if defined(HAVE_ECC_KEY_EXPORT) && !defined(NO_ASN_CRYPT) +/* build DER formatted ECC key, include optional public key if requested, + * return length on success, negative on error */ +static int wc_BuildEccKeyDer(ecc_key* key, byte* output, word32 inLen, + int pubIn) +{ + byte curve[MAX_ALGO_SZ+2]; + byte ver[MAX_VERSION_SZ]; + byte seq[MAX_SEQ_SZ]; + byte *prv = NULL, *pub = NULL; + int ret, totalSz, curveSz, verSz; + int privHdrSz = ASN_ECC_HEADER_SZ; + int pubHdrSz = ASN_ECC_CONTEXT_SZ + ASN_ECC_HEADER_SZ; + + word32 idx = 0, prvidx = 0, pubidx = 0, curveidx = 0; + word32 seqSz, privSz, pubSz = ECC_BUFSIZE; + + if (key == NULL || output == NULL || inLen == 0) + return BAD_FUNC_ARG; + + /* curve */ + curve[curveidx++] = ECC_PREFIX_0; + curveidx++ /* to put the size after computation */; + curveSz = SetCurve(key, curve+curveidx); + if (curveSz < 0) + return curveSz; + /* set computed size */ + curve[1] = (byte)curveSz; + curveidx += curveSz; + + /* private */ + privSz = key->dp->size; + prv = (byte*)XMALLOC(privSz + privHdrSz + MAX_SEQ_SZ, + key->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (prv == NULL) { + return MEMORY_E; + } + prvidx += SetOctetString8Bit(key->dp->size, &prv[prvidx]); + ret = wc_ecc_export_private_only(key, prv + prvidx, &privSz); + if (ret < 0) { + XFREE(prv, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + return ret; + } + prvidx += privSz; + + /* pubIn */ + if (pubIn) { + ret = wc_ecc_export_x963(key, NULL, &pubSz); + if (ret != LENGTH_ONLY_E) { + XFREE(prv, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + return ret; + } + + pub = (byte*)XMALLOC(pubSz + pubHdrSz + MAX_SEQ_SZ, + key->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (pub == NULL) { + XFREE(prv, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + return MEMORY_E; + } + + pub[pubidx++] = ECC_PREFIX_1; + if (pubSz > 128) /* leading zero + extra size byte */ + pubidx += SetLength(pubSz + ASN_ECC_CONTEXT_SZ + 2, pub+pubidx); + else /* leading zero */ + pubidx += SetLength(pubSz + ASN_ECC_CONTEXT_SZ + 1, pub+pubidx); + + /* SetBitString adds leading zero */ + pubidx += SetBitString(pubSz, 0, pub + pubidx); + ret = wc_ecc_export_x963(key, pub + pubidx, &pubSz); + if (ret != 0) { + XFREE(prv, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(pub, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + return ret; + } + pubidx += pubSz; + } + + /* make headers */ + verSz = SetMyVersion(1, ver, FALSE); + seqSz = SetSequence(verSz + prvidx + pubidx + curveidx, seq); + + totalSz = prvidx + pubidx + curveidx + verSz + seqSz; + if (totalSz > (int)inLen) { + XFREE(prv, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (pubIn) { + XFREE(pub, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + } + return BAD_FUNC_ARG; + } + + /* write out */ + /* seq */ + XMEMCPY(output + idx, seq, seqSz); + idx = seqSz; + + /* ver */ + XMEMCPY(output + idx, ver, verSz); + idx += verSz; + + /* private */ + XMEMCPY(output + idx, prv, prvidx); + idx += prvidx; + XFREE(prv, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + + /* curve */ + XMEMCPY(output + idx, curve, curveidx); + idx += curveidx; + + /* pubIn */ + if (pubIn) { + XMEMCPY(output + idx, pub, pubidx); + /* idx += pubidx; not used after write, if more data remove comment */ + XFREE(pub, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + } + + return totalSz; +} + +/* Write a Private ecc key, including public to DER format, + * length on success else < 0 */ +int wc_EccKeyToDer(ecc_key* key, byte* output, word32 inLen) +{ + return wc_BuildEccKeyDer(key, output, inLen, 1); +} + + +/* Write only private ecc key to DER format, + * length on success else < 0 */ +int wc_EccPrivateKeyToDer(ecc_key* key, byte* output, word32 inLen) +{ + return wc_BuildEccKeyDer(key, output, inLen, 0); +} + +#ifdef HAVE_PKCS8 +/* Write only private ecc key to unencrypted PKCS#8 format. + * + * If output is NULL, places required PKCS#8 buffer size in outLen and + * returns LENGTH_ONLY_E. + * + * return length on success else < 0 */ +int wc_EccPrivateKeyToPKCS8(ecc_key* key, byte* output, word32* outLen) +{ + int ret, tmpDerSz; + int algoID = 0; + word32 oidSz = 0; + word32 pkcs8Sz = 0; + const byte* curveOID = NULL; + byte* tmpDer = NULL; + + if (key == NULL || outLen == NULL) + return BAD_FUNC_ARG; + + /* set algoID, get curve OID */ + algoID = ECDSAk; + ret = wc_ecc_get_oid(key->dp->oidSum, &curveOID, &oidSz); + if (ret < 0) + return ret; + + /* temp buffer for plain DER key */ + tmpDer = (byte*)XMALLOC(ECC_BUFSIZE, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (tmpDer == NULL) + return MEMORY_E; + + XMEMSET(tmpDer, 0, ECC_BUFSIZE); + + tmpDerSz = wc_BuildEccKeyDer(key, tmpDer, ECC_BUFSIZE, 0); + if (tmpDerSz < 0) { + XFREE(tmpDer, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + return tmpDerSz; + } + + /* get pkcs8 expected output size */ + ret = wc_CreatePKCS8Key(NULL, &pkcs8Sz, tmpDer, tmpDerSz, algoID, + curveOID, oidSz); + if (ret != LENGTH_ONLY_E) { + XFREE(tmpDer, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + return ret; + } + + if (output == NULL) { + XFREE(tmpDer, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + *outLen = pkcs8Sz; + return LENGTH_ONLY_E; + + } else if (*outLen < pkcs8Sz) { + XFREE(tmpDer, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + WOLFSSL_MSG("Input buffer too small for ECC PKCS#8 key"); + return BUFFER_E; + } + + ret = wc_CreatePKCS8Key(output, &pkcs8Sz, tmpDer, tmpDerSz, + algoID, curveOID, oidSz); + if (ret < 0) { + XFREE(tmpDer, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + return ret; + } + + XFREE(tmpDer, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + + *outLen = ret; + return ret; +} +#endif /* HAVE_PKCS8 */ +#endif /* HAVE_ECC_KEY_EXPORT && !NO_ASN_CRYPT */ +#endif /* HAVE_ECC */ + + +#ifdef HAVE_ED25519 + +int wc_Ed25519PrivateKeyDecode(const byte* input, word32* inOutIdx, + ed25519_key* key, word32 inSz) +{ + word32 oid; + int ret, version, length, endKeyIdx, privSz, pubSz; + const byte* priv; + const byte* pub; + + if (input == NULL || inOutIdx == NULL || key == NULL || inSz == 0) + return BAD_FUNC_ARG; + + if (GetSequence(input, inOutIdx, &length, inSz) >= 0) { + endKeyIdx = *inOutIdx + length; + + if (GetMyVersion(input, inOutIdx, &version, inSz) < 0) + return ASN_PARSE_E; + if (version != 0) { + WOLFSSL_MSG("Unrecognized version of ED25519 private key"); + return ASN_PARSE_E; + } + + if (GetAlgoId(input, inOutIdx, &oid, oidKeyType, inSz) < 0) + return ASN_PARSE_E; + if (oid != ED25519k) + return ASN_PARSE_E; + + if (GetOctetString(input, inOutIdx, &length, inSz) < 0) + return ASN_PARSE_E; + + if (GetOctetString(input, inOutIdx, &privSz, inSz) < 0) + return ASN_PARSE_E; + + priv = input + *inOutIdx; + *inOutIdx += privSz; + } + else { + if (GetOctetString(input, inOutIdx, &privSz, inSz) < 0) + return ASN_PARSE_E; + + if (privSz != 32) + return ASN_PARSE_E; + + priv = input + *inOutIdx; + *inOutIdx += privSz; + endKeyIdx = *inOutIdx; + } + + if (endKeyIdx == (int)*inOutIdx) { + ret = wc_ed25519_import_private_only(priv, privSz, key); + } + else { + if (GetASNHeader(input, ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 1, + inOutIdx, &length, inSz) < 0) { + return ASN_PARSE_E; + } + if (GetOctetString(input, inOutIdx, &pubSz, inSz) < 0) + return ASN_PARSE_E; + pub = input + *inOutIdx; + *inOutIdx += pubSz; + + ret = wc_ed25519_import_private_key(priv, privSz, pub, pubSz, key); + } + if (ret == 0 && endKeyIdx != (int)*inOutIdx) + return ASN_PARSE_E; + + return ret; +} + + +int wc_Ed25519PublicKeyDecode(const byte* input, word32* inOutIdx, + ed25519_key* key, word32 inSz) +{ + int length; + int ret; + + if (input == NULL || inOutIdx == NULL || key == NULL || inSz == 0) + return BAD_FUNC_ARG; + + if (GetSequence(input, inOutIdx, &length, inSz) < 0) + return ASN_PARSE_E; + + if (GetSequence(input, inOutIdx, &length, inSz) < 0) + return ASN_PARSE_E; + + ret = SkipObjectId(input, inOutIdx, inSz); + if (ret != 0) + return ret; + + /* key header */ + ret = CheckBitString(input, inOutIdx, NULL, inSz, 1, NULL); + if (ret != 0) + return ret; + + /* This is the raw point data compressed or uncompressed. */ + if (wc_ed25519_import_public(input + *inOutIdx, inSz - *inOutIdx, key) != 0) + return ASN_ECC_KEY_E; + + return 0; +} + + +#ifdef WOLFSSL_KEY_GEN + +/* build DER formatted ED25519 key, + * return length on success, negative on error */ +static int wc_BuildEd25519KeyDer(ed25519_key* key, byte* output, word32 inLen, + int pubOut) +{ + byte algoArray[MAX_ALGO_SZ]; + byte ver[MAX_VERSION_SZ]; + byte seq[MAX_SEQ_SZ]; + int ret; + word32 idx = 0, seqSz, verSz, algoSz, privSz, pubSz = 0; + + if (key == NULL || output == NULL || inLen == 0) + return BAD_FUNC_ARG; + + if (pubOut) + pubSz = 2 + 2 + ED25519_PUB_KEY_SIZE; + privSz = 2 + 2 + ED25519_KEY_SIZE; + algoSz = SetAlgoID(ED25519k, algoArray, oidKeyType, 0); + verSz = SetMyVersion(0, ver, FALSE); + seqSz = SetSequence(verSz + algoSz + privSz + pubSz, seq); + + if (seqSz + verSz + algoSz + privSz + pubSz > inLen) + return BAD_FUNC_ARG; + + /* write out */ + /* seq */ + XMEMCPY(output + idx, seq, seqSz); + idx = seqSz; + /* ver */ + XMEMCPY(output + idx, ver, verSz); + idx += verSz; + /* algo */ + XMEMCPY(output + idx, algoArray, algoSz); + idx += algoSz; + /* privKey */ + idx += SetOctetString(2 + ED25519_KEY_SIZE, output + idx); + idx += SetOctetString(ED25519_KEY_SIZE, output + idx); + ret = wc_ed25519_export_private_only(key, output + idx, &privSz); + if (ret != 0) + return ret; + idx += privSz; + /* pubKey */ + if (pubOut) { + idx += SetExplicit(1, 2 + ED25519_PUB_KEY_SIZE, output + idx); + idx += SetOctetString(ED25519_KEY_SIZE, output + idx); + ret = wc_ed25519_export_public(key, output + idx, &pubSz); + if (ret != 0) + return ret; + idx += pubSz; + } + + return idx; +} + +/* Write a Private ecc key, including public to DER format, + * length on success else < 0 */ +int wc_Ed25519KeyToDer(ed25519_key* key, byte* output, word32 inLen) +{ + return wc_BuildEd25519KeyDer(key, output, inLen, 1); +} + + + +/* Write only private ecc key to DER format, + * length on success else < 0 */ +int wc_Ed25519PrivateKeyToDer(ed25519_key* key, byte* output, word32 inLen) +{ + return wc_BuildEd25519KeyDer(key, output, inLen, 0); +} + +#endif /* WOLFSSL_KEY_GEN */ + +#endif /* HAVE_ED25519 */ + +#ifdef HAVE_ED448 + +int wc_Ed448PrivateKeyDecode(const byte* input, word32* inOutIdx, + ed448_key* key, word32 inSz) +{ + word32 oid; + int ret, version, length, endKeyIdx, privSz, pubSz; + const byte* priv; + const byte* pub; + + if (input == NULL || inOutIdx == NULL || key == NULL || inSz == 0) + return BAD_FUNC_ARG; + + if (GetSequence(input, inOutIdx, &length, inSz) >= 0) { + endKeyIdx = *inOutIdx + length; + + if (GetMyVersion(input, inOutIdx, &version, inSz) < 0) + return ASN_PARSE_E; + if (version != 0) { + WOLFSSL_MSG("Unrecognized version of ED448 private key"); + return ASN_PARSE_E; + } + + if (GetAlgoId(input, inOutIdx, &oid, oidKeyType, inSz) < 0) + return ASN_PARSE_E; + if (oid != ED448k) + return ASN_PARSE_E; + + if (GetOctetString(input, inOutIdx, &length, inSz) < 0) + return ASN_PARSE_E; + + if (GetOctetString(input, inOutIdx, &privSz, inSz) < 0) + return ASN_PARSE_E; + + priv = input + *inOutIdx; + *inOutIdx += privSz; + } + else { + if (GetOctetString(input, inOutIdx, &privSz, inSz) < 0) + return ASN_PARSE_E; + + if (privSz != 57) + return ASN_PARSE_E; + + priv = input + *inOutIdx; + *inOutIdx += privSz; + endKeyIdx = *inOutIdx; + } + + if (endKeyIdx == (int)*inOutIdx) { + ret = wc_ed448_import_private_only(priv, privSz, key); + } + else { + if (GetASNHeader(input, ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 1, + inOutIdx, &length, inSz) < 0) { + return ASN_PARSE_E; + } + if (GetOctetString(input, inOutIdx, &pubSz, inSz) < 0) + return ASN_PARSE_E; + pub = input + *inOutIdx; + *inOutIdx += pubSz; + + ret = wc_ed448_import_private_key(priv, privSz, pub, pubSz, key); + } + if (ret == 0 && endKeyIdx != (int)*inOutIdx) + return ASN_PARSE_E; + + return ret; +} + + +int wc_Ed448PublicKeyDecode(const byte* input, word32* inOutIdx, + ed448_key* key, word32 inSz) +{ + int length; + int ret; + + if (input == NULL || inOutIdx == NULL || key == NULL || inSz == 0) + return BAD_FUNC_ARG; + + if (GetSequence(input, inOutIdx, &length, inSz) < 0) + return ASN_PARSE_E; + + if (GetSequence(input, inOutIdx, &length, inSz) < 0) + return ASN_PARSE_E; + + ret = SkipObjectId(input, inOutIdx, inSz); + if (ret != 0) + return ret; + + /* key header */ + ret = CheckBitString(input, inOutIdx, NULL, inSz, 1, NULL); + if (ret != 0) + return ret; + + /* This is the raw point data compressed or uncompressed. */ + if (wc_ed448_import_public(input + *inOutIdx, inSz - *inOutIdx, key) != 0) + return ASN_ECC_KEY_E; + + return 0; +} + + +#ifdef WOLFSSL_KEY_GEN + +/* build DER formatted ED448 key, + * return length on success, negative on error */ +static int wc_BuildEd448KeyDer(ed448_key* key, byte* output, word32 inLen, + int pubOut) +{ + byte algoArray[MAX_ALGO_SZ]; + byte ver[MAX_VERSION_SZ]; + byte seq[MAX_SEQ_SZ]; + int ret; + word32 idx = 0, seqSz, verSz, algoSz, privSz, pubSz = 0; + + if (key == NULL || output == NULL || inLen == 0) + return BAD_FUNC_ARG; + + if (pubOut) { + pubSz = 2 + 2 + ED448_PUB_KEY_SIZE; + } + privSz = 2 + 2 + ED448_KEY_SIZE; + algoSz = SetAlgoID(ED448k, algoArray, oidKeyType, 0); + verSz = SetMyVersion(0, ver, FALSE); + seqSz = SetSequence(verSz + algoSz + privSz + pubSz, seq); + + if (seqSz + verSz + algoSz + privSz + pubSz > inLen) + return BAD_FUNC_ARG; + + /* write out */ + /* seq */ + XMEMCPY(output + idx, seq, seqSz); + idx = seqSz; + /* ver */ + XMEMCPY(output + idx, ver, verSz); + idx += verSz; + /* algo */ + XMEMCPY(output + idx, algoArray, algoSz); + idx += algoSz; + /* privKey */ + idx += SetOctetString(2 + ED448_KEY_SIZE, output + idx); + idx += SetOctetString(ED448_KEY_SIZE, output + idx); + ret = wc_ed448_export_private_only(key, output + idx, &privSz); + if (ret != 0) + return ret; + idx += privSz; + /* pubKey */ + if (pubOut) { + idx += SetExplicit(1, 2 + ED448_PUB_KEY_SIZE, output + idx); + idx += SetOctetString(ED448_KEY_SIZE, output + idx); + ret = wc_ed448_export_public(key, output + idx, &pubSz); + if (ret != 0) + return ret; + idx += pubSz; + } + + return idx; +} + +/* Write a Private ecc key, including public to DER format, + * length on success else < 0 */ +int wc_Ed448KeyToDer(ed448_key* key, byte* output, word32 inLen) +{ + return wc_BuildEd448KeyDer(key, output, inLen, 1); +} + + + +/* Write only private ecc key to DER format, + * length on success else < 0 */ +int wc_Ed448PrivateKeyToDer(ed448_key* key, byte* output, word32 inLen) +{ + return wc_BuildEd448KeyDer(key, output, inLen, 0); +} + +#endif /* WOLFSSL_KEY_GEN */ + +#endif /* HAVE_ED448 */ + +#if defined(HAVE_OCSP) || defined(HAVE_CRL) + +/* Get raw Date only, no processing, 0 on success */ +static int GetBasicDate(const byte* source, word32* idx, byte* date, + byte* format, int maxIdx) +{ + int ret, length; + const byte *datePtr = NULL; + + WOLFSSL_ENTER("GetBasicDate"); + + ret = GetDateInfo(source, idx, &datePtr, format, &length, maxIdx); + if (ret < 0) + return ret; + + XMEMCPY(date, datePtr, length); + + return 0; +} + +#endif /* HAVE_OCSP || HAVE_CRL */ + + +#ifdef HAVE_OCSP + +static int GetEnumerated(const byte* input, word32* inOutIdx, int *value, + int sz) +{ + word32 idx = *inOutIdx; + word32 len; + byte tag; + + WOLFSSL_ENTER("GetEnumerated"); + + *value = 0; + + if (GetASNTag(input, &idx, &tag, sz) < 0) + return ASN_PARSE_E; + + if (tag != ASN_ENUMERATED) + return ASN_PARSE_E; + + if ((int)idx >= sz) + return BUFFER_E; + + len = input[idx++]; + if (len > 4 || (int)(len + idx) > sz) + return ASN_PARSE_E; + + while (len--) { + *value = *value << 8 | input[idx++]; + } + + *inOutIdx = idx; + + return *value; +} + + +static int DecodeSingleResponse(byte* source, + word32* ioIndex, OcspResponse* resp, word32 size) +{ + word32 idx = *ioIndex, prevIndex, oid, localIdx; + int length, wrapperSz; + CertStatus* cs = resp->status; + int ret; + byte tag; + + WOLFSSL_ENTER("DecodeSingleResponse"); + + /* Outer wrapper of the SEQUENCE OF Single Responses. */ + if (GetSequence(source, &idx, &wrapperSz, size) < 0) + return ASN_PARSE_E; + + prevIndex = idx; + + /* When making a request, we only request one status on one certificate + * at a time. There should only be one SingleResponse */ + + /* Wrapper around the Single Response */ + if (GetSequence(source, &idx, &length, size) < 0) + return ASN_PARSE_E; + + /* Wrapper around the CertID */ + if (GetSequence(source, &idx, &length, size) < 0) + return ASN_PARSE_E; + /* Skip the hash algorithm */ + if (GetAlgoId(source, &idx, &oid, oidIgnoreType, size) < 0) + return ASN_PARSE_E; + /* Save reference to the hash of CN */ + ret = GetOctetString(source, &idx, &length, size); + if (ret < 0) + return ret; + resp->issuerHash = source + idx; + idx += length; + /* Save reference to the hash of the issuer public key */ + ret = GetOctetString(source, &idx, &length, size); + if (ret < 0) + return ret; + resp->issuerKeyHash = source + idx; + idx += length; + + /* Get serial number */ + if (GetSerialNumber(source, &idx, cs->serial, &cs->serialSz, size) < 0) + return ASN_PARSE_E; + + if ( idx >= size ) + return BUFFER_E; + + /* CertStatus */ + switch (source[idx++]) + { + case (ASN_CONTEXT_SPECIFIC | CERT_GOOD): + cs->status = CERT_GOOD; + idx++; + break; + case (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | CERT_REVOKED): + cs->status = CERT_REVOKED; + if (GetLength(source, &idx, &length, size) < 0) + return ASN_PARSE_E; + idx += length; + break; + case (ASN_CONTEXT_SPECIFIC | CERT_UNKNOWN): + cs->status = CERT_UNKNOWN; + idx++; + break; + default: + return ASN_PARSE_E; + } + +#if defined(OPENSSL_ALL) || defined(WOLFSSL_NGINX) || defined(WOLFSSL_HAPROXY) + cs->thisDateAsn = source + idx; + localIdx = 0; + if (GetDateInfo(cs->thisDateAsn, &localIdx, NULL, + (byte*)&cs->thisDateParsed.type, + &cs->thisDateParsed.length, size) < 0) + return ASN_PARSE_E; + XMEMCPY(cs->thisDateParsed.data, + cs->thisDateAsn + localIdx - cs->thisDateParsed.length, + cs->thisDateParsed.length); +#endif + if (GetBasicDate(source, &idx, cs->thisDate, + &cs->thisDateFormat, size) < 0) + return ASN_PARSE_E; + +#ifndef NO_ASN_TIME +#ifndef WOLFSSL_NO_OCSP_DATE_CHECK + if (!XVALIDATE_DATE(cs->thisDate, cs->thisDateFormat, BEFORE)) + return ASN_BEFORE_DATE_E; +#endif +#endif + + /* The following items are optional. Only check for them if there is more + * unprocessed data in the singleResponse wrapper. */ + + localIdx = idx; + if (((int)(idx - prevIndex) < wrapperSz) && + GetASNTag(source, &localIdx, &tag, size) == 0 && + tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)) + { + idx++; + if (GetLength(source, &idx, &length, size) < 0) + return ASN_PARSE_E; +#if defined(OPENSSL_ALL) || defined(WOLFSSL_NGINX) || defined(WOLFSSL_HAPROXY) + cs->nextDateAsn = source + idx; + localIdx = 0; + if (GetDateInfo(cs->nextDateAsn, &localIdx, NULL, + (byte*)&cs->nextDateParsed.type, + &cs->nextDateParsed.length, size) < 0) + return ASN_PARSE_E; + XMEMCPY(cs->nextDateParsed.data, + cs->nextDateAsn + localIdx - cs->nextDateParsed.length, + cs->nextDateParsed.length); +#endif + if (GetBasicDate(source, &idx, cs->nextDate, + &cs->nextDateFormat, size) < 0) + return ASN_PARSE_E; + +#ifndef NO_ASN_TIME +#ifndef WOLFSSL_NO_OCSP_DATE_CHECK + if (!XVALIDATE_DATE(cs->nextDate, cs->nextDateFormat, AFTER)) + return ASN_AFTER_DATE_E; +#endif +#endif + } + + localIdx = idx; + if (((int)(idx - prevIndex) < wrapperSz) && + GetASNTag(source, &localIdx, &tag, size) == 0 && + tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1)) + { + idx++; + if (GetLength(source, &idx, &length, size) < 0) + return ASN_PARSE_E; + idx += length; + } + + *ioIndex = idx; + + return 0; +} + +static int DecodeOcspRespExtensions(byte* source, + word32* ioIndex, OcspResponse* resp, word32 sz) +{ + word32 idx = *ioIndex; + int length; + int ext_bound; /* boundary index for the sequence of extensions */ + word32 oid; + int ret; + byte tag; + + WOLFSSL_ENTER("DecodeOcspRespExtensions"); + + if ((idx + 1) > sz) + return BUFFER_E; + + if (GetASNTag(source, &idx, &tag, sz) < 0) + return ASN_PARSE_E; + + if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1)) + return ASN_PARSE_E; + + if (GetLength(source, &idx, &length, sz) < 0) + return ASN_PARSE_E; + + if (GetSequence(source, &idx, &length, sz) < 0) + return ASN_PARSE_E; + + ext_bound = idx + length; + + while (idx < (word32)ext_bound) { + word32 localIdx; + + if (GetSequence(source, &idx, &length, sz) < 0) { + WOLFSSL_MSG("\tfail: should be a SEQUENCE"); + return ASN_PARSE_E; + } + + oid = 0; + if (GetObjectId(source, &idx, &oid, oidOcspType, sz) < 0) { + WOLFSSL_MSG("\tfail: OBJECT ID"); + return ASN_PARSE_E; + } + + /* check for critical flag */ + if ((idx + 1) > (word32)sz) { + WOLFSSL_MSG("\tfail: malformed buffer"); + return BUFFER_E; + } + + localIdx = idx; + if (GetASNTag(source, &localIdx, &tag, sz) == 0 && tag == ASN_BOOLEAN) { + WOLFSSL_MSG("\tfound optional critical flag, moving past"); + ret = GetBoolean(source, &idx, sz); + if (ret < 0) + return ret; + } + + ret = GetOctetString(source, &idx, &length, sz); + if (ret < 0) + return ret; + + if (oid == OCSP_NONCE_OID) { + /* get data inside extra OCTET_STRING */ + ret = GetOctetString(source, &idx, &length, sz); + if (ret < 0) + return ret; + + resp->nonce = source + idx; + resp->nonceSz = length; + } + + idx += length; + } + + *ioIndex = idx; + return 0; +} + + +static int DecodeResponseData(byte* source, + word32* ioIndex, OcspResponse* resp, word32 size) +{ + word32 idx = *ioIndex, prev_idx, localIdx; + int length; + int version; + int ret; + byte tag; + + WOLFSSL_ENTER("DecodeResponseData"); + + resp->response = source + idx; + prev_idx = idx; + if (GetSequence(source, &idx, &length, size) < 0) + return ASN_PARSE_E; + resp->responseSz = length + idx - prev_idx; + + /* Get version. It is an EXPLICIT[0] DEFAULT(0) value. If this + * item isn't an EXPLICIT[0], then set version to zero and move + * onto the next item. + */ + localIdx = idx; + if (GetASNTag(source, &localIdx, &tag, size) == 0 && + tag == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED)) + { + idx += 2; /* Eat the value and length */ + if (GetMyVersion(source, &idx, &version, size) < 0) + return ASN_PARSE_E; + } else + version = 0; + + localIdx = idx; + if (GetASNTag(source, &localIdx, &tag, size) == 0 && + ( tag == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 1) || + tag == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 2) )) + { + idx++; /* advance past ASN tag */ + if (GetLength(source, &idx, &length, size) < 0) + return ASN_PARSE_E; + idx += length; + } + else + return ASN_PARSE_E; + + /* save pointer to the producedAt time */ + if (GetBasicDate(source, &idx, resp->producedDate, + &resp->producedDateFormat, size) < 0) + return ASN_PARSE_E; + + if ((ret = DecodeSingleResponse(source, &idx, resp, size)) < 0) + return ret; /* ASN_PARSE_E, ASN_BEFORE_DATE_E, ASN_AFTER_DATE_E */ + + /* + * Check the length of the ResponseData against the current index to + * see if there are extensions, they are optional. + */ + if (idx - prev_idx < resp->responseSz) + if (DecodeOcspRespExtensions(source, &idx, resp, size) < 0) + return ASN_PARSE_E; + + *ioIndex = idx; + return 0; +} + + +#ifndef WOLFSSL_NO_OCSP_OPTIONAL_CERTS + +static int DecodeCerts(byte* source, + word32* ioIndex, OcspResponse* resp, word32 size) +{ + word32 idx = *ioIndex; + byte tag; + + WOLFSSL_ENTER("DecodeCerts"); + + if (GetASNTag(source, &idx, &tag, size) < 0) + return ASN_PARSE_E; + + if (tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC)) + { + int length; + + if (GetLength(source, &idx, &length, size) < 0) + return ASN_PARSE_E; + + if (GetSequence(source, &idx, &length, size) < 0) + return ASN_PARSE_E; + + resp->cert = source + idx; + resp->certSz = length; + + idx += length; + } + *ioIndex = idx; + return 0; +} + +#endif /* WOLFSSL_NO_OCSP_OPTIONAL_CERTS */ + + +static int DecodeBasicOcspResponse(byte* source, word32* ioIndex, + OcspResponse* resp, word32 size, void* cm, void* heap, int noVerify) +{ + int length; + word32 idx = *ioIndex; + word32 end_index; + int ret; + int sigLength; + + WOLFSSL_ENTER("DecodeBasicOcspResponse"); + (void)heap; + + if (GetSequence(source, &idx, &length, size) < 0) + return ASN_PARSE_E; + + if (idx + length > size) + return ASN_INPUT_E; + end_index = idx + length; + + if ((ret = DecodeResponseData(source, &idx, resp, size)) < 0) + return ret; /* ASN_PARSE_E, ASN_BEFORE_DATE_E, ASN_AFTER_DATE_E */ + + /* Get the signature algorithm */ + if (GetAlgoId(source, &idx, &resp->sigOID, oidSigType, size) < 0) + return ASN_PARSE_E; + + ret = CheckBitString(source, &idx, &sigLength, size, 1, NULL); + if (ret != 0) + return ret; + + resp->sigSz = sigLength; + resp->sig = source + idx; + idx += sigLength; + + /* + * Check the length of the BasicOcspResponse against the current index to + * see if there are certificates, they are optional. + */ +#ifndef WOLFSSL_NO_OCSP_OPTIONAL_CERTS + if (idx < end_index) + { + DecodedCert cert; + + if (DecodeCerts(source, &idx, resp, size) < 0) + return ASN_PARSE_E; + + InitDecodedCert(&cert, resp->cert, resp->certSz, heap); + + /* Don't verify if we don't have access to Cert Manager. */ + ret = ParseCertRelative(&cert, CERT_TYPE, + noVerify ? NO_VERIFY : VERIFY_OCSP, cm); + if (ret < 0) { + WOLFSSL_MSG("\tOCSP Responder certificate parsing failed"); + FreeDecodedCert(&cert); + return ret; + } + +#ifndef WOLFSSL_NO_OCSP_ISSUER_CHECK + if ((cert.extExtKeyUsage & EXTKEYUSE_OCSP_SIGN) == 0) { + if (XMEMCMP(cert.subjectHash, + resp->issuerHash, KEYID_SIZE) == 0) { + WOLFSSL_MSG("\tOCSP Response signed by issuer"); + } + else { + WOLFSSL_MSG("\tOCSP Responder key usage check failed"); + #ifdef OPENSSL_EXTRA + resp->verifyError = OCSP_BAD_ISSUER; + #else + FreeDecodedCert(&cert); + return BAD_OCSP_RESPONDER; + #endif + } + } +#endif + + /* ConfirmSignature is blocking here */ + ret = ConfirmSignature(&cert.sigCtx, + resp->response, resp->responseSz, + cert.publicKey, cert.pubKeySize, cert.keyOID, + resp->sig, resp->sigSz, resp->sigOID, NULL); + + FreeDecodedCert(&cert); + + if (ret != 0) { + WOLFSSL_MSG("\tOCSP Confirm signature failed"); + return ASN_OCSP_CONFIRM_E; + } + } + else +#endif /* WOLFSSL_NO_OCSP_OPTIONAL_CERTS */ + { + Signer* ca; + int sigValid = -1; + + #ifndef NO_SKID + ca = GetCA(cm, resp->issuerKeyHash); + #else + ca = GetCA(cm, resp->issuerHash); + #endif + + if (ca) { + SignatureCtx sigCtx; + InitSignatureCtx(&sigCtx, heap, INVALID_DEVID); + + /* ConfirmSignature is blocking here */ + sigValid = ConfirmSignature(&sigCtx, resp->response, + resp->responseSz, ca->publicKey, ca->pubKeySize, ca->keyOID, + resp->sig, resp->sigSz, resp->sigOID, NULL); + } + if (ca == NULL || sigValid != 0) { + WOLFSSL_MSG("\tOCSP Confirm signature failed"); + return ASN_OCSP_CONFIRM_E; + } + + (void)noVerify; + } + + *ioIndex = idx; + return 0; +} + + +void InitOcspResponse(OcspResponse* resp, CertStatus* status, + byte* source, word32 inSz) +{ + WOLFSSL_ENTER("InitOcspResponse"); + + XMEMSET(status, 0, sizeof(CertStatus)); + XMEMSET(resp, 0, sizeof(OcspResponse)); + + resp->responseStatus = -1; + resp->status = status; + resp->source = source; + resp->maxIdx = inSz; +} + + +int OcspResponseDecode(OcspResponse* resp, void* cm, void* heap, int noVerify) +{ + int ret; + int length = 0; + word32 idx = 0; + byte* source = resp->source; + word32 size = resp->maxIdx; + word32 oid; + byte tag; + + WOLFSSL_ENTER("OcspResponseDecode"); + + /* peel the outer SEQUENCE wrapper */ + if (GetSequence(source, &idx, &length, size) < 0) + return ASN_PARSE_E; + + /* First get the responseStatus, an ENUMERATED */ + if (GetEnumerated(source, &idx, &resp->responseStatus, size) < 0) + return ASN_PARSE_E; + + if (resp->responseStatus != OCSP_SUCCESSFUL) + return 0; + + /* Next is an EXPLICIT record called ResponseBytes, OPTIONAL */ + if (idx >= size) + return ASN_INPUT_E; + if (GetASNTag(source, &idx, &tag, size) < 0) + return ASN_PARSE_E; + if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC)) + return ASN_PARSE_E; + if (GetLength(source, &idx, &length, size) < 0) + return ASN_PARSE_E; + + /* Get the responseBytes SEQUENCE */ + if (GetSequence(source, &idx, &length, size) < 0) + return ASN_PARSE_E; + + /* Check ObjectID for the resposeBytes */ + if (GetObjectId(source, &idx, &oid, oidOcspType, size) < 0) + return ASN_PARSE_E; + if (oid != OCSP_BASIC_OID) + return ASN_PARSE_E; + ret = GetOctetString(source, &idx, &length, size); + if (ret < 0) + return ret; + + ret = DecodeBasicOcspResponse(source, &idx, resp, size, cm, heap, noVerify); + if (ret < 0) + return ret; + + return 0; +} + + +word32 EncodeOcspRequestExtensions(OcspRequest* req, byte* output, word32 size) +{ + const byte NonceObjId[] = { 0x2b, 0x06, 0x01, 0x05, 0x05, 0x07, + 0x30, 0x01, 0x02 }; + byte seqArray[5][MAX_SEQ_SZ]; + word32 seqSz[5], totalSz = (word32)sizeof(NonceObjId); + + WOLFSSL_ENTER("SetOcspReqExtensions"); + + if (!req || !output || !req->nonceSz) + return 0; + + totalSz += req->nonceSz; + totalSz += seqSz[0] = SetOctetString(req->nonceSz, seqArray[0]); + totalSz += seqSz[1] = SetOctetString(req->nonceSz + seqSz[0], seqArray[1]); + totalSz += seqSz[2] = SetObjectId(sizeof(NonceObjId), seqArray[2]); + totalSz += seqSz[3] = SetSequence(totalSz, seqArray[3]); + totalSz += seqSz[4] = SetSequence(totalSz, seqArray[4]); + + if (totalSz > size) + return 0; + + totalSz = 0; + + XMEMCPY(output + totalSz, seqArray[4], seqSz[4]); + totalSz += seqSz[4]; + + XMEMCPY(output + totalSz, seqArray[3], seqSz[3]); + totalSz += seqSz[3]; + + XMEMCPY(output + totalSz, seqArray[2], seqSz[2]); + totalSz += seqSz[2]; + + XMEMCPY(output + totalSz, NonceObjId, sizeof(NonceObjId)); + totalSz += (word32)sizeof(NonceObjId); + + XMEMCPY(output + totalSz, seqArray[1], seqSz[1]); + totalSz += seqSz[1]; + + XMEMCPY(output + totalSz, seqArray[0], seqSz[0]); + totalSz += seqSz[0]; + + XMEMCPY(output + totalSz, req->nonce, req->nonceSz); + totalSz += req->nonceSz; + + return totalSz; +} + + +int EncodeOcspRequest(OcspRequest* req, byte* output, word32 size) +{ + byte seqArray[5][MAX_SEQ_SZ]; + /* The ASN.1 of the OCSP Request is an onion of sequences */ + byte algoArray[MAX_ALGO_SZ]; + byte issuerArray[MAX_ENCODED_DIG_SZ]; + byte issuerKeyArray[MAX_ENCODED_DIG_SZ]; + byte snArray[MAX_SN_SZ]; + byte extArray[MAX_OCSP_EXT_SZ]; + word32 seqSz[5], algoSz, issuerSz, issuerKeySz, extSz, totalSz; + int i, snSz; + + WOLFSSL_ENTER("EncodeOcspRequest"); + +#ifdef NO_SHA + algoSz = SetAlgoID(SHA256h, algoArray, oidHashType, 0); +#else + algoSz = SetAlgoID(SHAh, algoArray, oidHashType, 0); +#endif + + issuerSz = SetDigest(req->issuerHash, KEYID_SIZE, issuerArray); + issuerKeySz = SetDigest(req->issuerKeyHash, KEYID_SIZE, issuerKeyArray); + snSz = SetSerialNumber(req->serial, req->serialSz, snArray, + MAX_SN_SZ, MAX_SN_SZ); + extSz = 0; + + if (snSz < 0) + return snSz; + + if (req->nonceSz) { + /* TLS Extensions use this function too - put extensions after + * ASN.1: Context Specific [2]. + */ + extSz = EncodeOcspRequestExtensions(req, extArray + 2, + OCSP_NONCE_EXT_SZ); + extSz += SetExplicit(2, extSz, extArray); + } + + totalSz = algoSz + issuerSz + issuerKeySz + snSz; + for (i = 4; i >= 0; i--) { + seqSz[i] = SetSequence(totalSz, seqArray[i]); + totalSz += seqSz[i]; + if (i == 2) totalSz += extSz; + } + + if (output == NULL) + return totalSz; + if (totalSz > size) + return BUFFER_E; + + totalSz = 0; + for (i = 0; i < 5; i++) { + XMEMCPY(output + totalSz, seqArray[i], seqSz[i]); + totalSz += seqSz[i]; + } + + XMEMCPY(output + totalSz, algoArray, algoSz); + totalSz += algoSz; + + XMEMCPY(output + totalSz, issuerArray, issuerSz); + totalSz += issuerSz; + + XMEMCPY(output + totalSz, issuerKeyArray, issuerKeySz); + totalSz += issuerKeySz; + + XMEMCPY(output + totalSz, snArray, snSz); + totalSz += snSz; + + if (extSz != 0) { + XMEMCPY(output + totalSz, extArray, extSz); + totalSz += extSz; + } + + return totalSz; +} + + +int InitOcspRequest(OcspRequest* req, DecodedCert* cert, byte useNonce, + void* heap) +{ + int ret; + + WOLFSSL_ENTER("InitOcspRequest"); + + if (req == NULL) + return BAD_FUNC_ARG; + + ForceZero(req, sizeof(OcspRequest)); + req->heap = heap; + + if (cert) { + XMEMCPY(req->issuerHash, cert->issuerHash, KEYID_SIZE); + XMEMCPY(req->issuerKeyHash, cert->issuerKeyHash, KEYID_SIZE); + + req->serial = (byte*)XMALLOC(cert->serialSz, req->heap, + DYNAMIC_TYPE_OCSP_REQUEST); + if (req->serial == NULL) + return MEMORY_E; + + XMEMCPY(req->serial, cert->serial, cert->serialSz); + req->serialSz = cert->serialSz; + + if (cert->extAuthInfoSz != 0 && cert->extAuthInfo != NULL) { + req->url = (byte*)XMALLOC(cert->extAuthInfoSz + 1, req->heap, + DYNAMIC_TYPE_OCSP_REQUEST); + if (req->url == NULL) { + XFREE(req->serial, req->heap, DYNAMIC_TYPE_OCSP); + return MEMORY_E; + } + + XMEMCPY(req->url, cert->extAuthInfo, cert->extAuthInfoSz); + req->urlSz = cert->extAuthInfoSz; + req->url[req->urlSz] = 0; + } + } + + if (useNonce) { + WC_RNG rng; + + #ifndef HAVE_FIPS + ret = wc_InitRng_ex(&rng, req->heap, INVALID_DEVID); + #else + ret = wc_InitRng(&rng); + #endif + if (ret != 0) { + WOLFSSL_MSG("\tCannot initialize RNG. Skipping the OSCP Nonce."); + } else { + if (wc_RNG_GenerateBlock(&rng, req->nonce, MAX_OCSP_NONCE_SZ) != 0) + WOLFSSL_MSG("\tCannot run RNG. Skipping the OSCP Nonce."); + else + req->nonceSz = MAX_OCSP_NONCE_SZ; + + wc_FreeRng(&rng); + } + } + + return 0; +} + +void FreeOcspRequest(OcspRequest* req) +{ + WOLFSSL_ENTER("FreeOcspRequest"); + + if (req) { + if (req->serial) + XFREE(req->serial, req->heap, DYNAMIC_TYPE_OCSP_REQUEST); + req->serial = NULL; + +#ifdef OPENSSL_EXTRA + if (req->serialInt) { + if (req->serialInt->isDynamic) { + XFREE(req->serialInt->data, NULL, DYNAMIC_TYPE_OPENSSL); + } + XFREE(req->serialInt, NULL, DYNAMIC_TYPE_OPENSSL); + } + req->serialInt = NULL; +#endif + + if (req->url) + XFREE(req->url, req->heap, DYNAMIC_TYPE_OCSP_REQUEST); + req->url = NULL; + } +} + + +int CompareOcspReqResp(OcspRequest* req, OcspResponse* resp) +{ + int cmp; + + WOLFSSL_ENTER("CompareOcspReqResp"); + + if (req == NULL) + { + WOLFSSL_MSG("\tReq missing"); + return -1; + } + + if (resp == NULL) + { + WOLFSSL_MSG("\tResp missing"); + return 1; + } + + /* Nonces are not critical. The responder may not necessarily add + * the nonce to the response. */ + if (req->nonceSz +#ifndef WOLFSSL_FORCE_OCSP_NONCE_CHECK + && resp->nonceSz != 0 +#endif + ) { + cmp = req->nonceSz - resp->nonceSz; + if (cmp != 0) + { + WOLFSSL_MSG("\tnonceSz mismatch"); + return cmp; + } + + cmp = XMEMCMP(req->nonce, resp->nonce, req->nonceSz); + if (cmp != 0) + { + WOLFSSL_MSG("\tnonce mismatch"); + return cmp; + } + } + + cmp = XMEMCMP(req->issuerHash, resp->issuerHash, KEYID_SIZE); + if (cmp != 0) + { + WOLFSSL_MSG("\tissuerHash mismatch"); + return cmp; + } + + cmp = XMEMCMP(req->issuerKeyHash, resp->issuerKeyHash, KEYID_SIZE); + if (cmp != 0) + { + WOLFSSL_MSG("\tissuerKeyHash mismatch"); + return cmp; + } + + cmp = req->serialSz - resp->status->serialSz; + if (cmp != 0) + { + WOLFSSL_MSG("\tserialSz mismatch"); + return cmp; + } + + cmp = XMEMCMP(req->serial, resp->status->serial, req->serialSz); + if (cmp != 0) + { + WOLFSSL_MSG("\tserial mismatch"); + return cmp; + } + + return 0; +} + +#endif /* HAVE_OCSP */ + + +/* store WC_SHA hash of NAME */ +int GetNameHash(const byte* source, word32* idx, byte* hash, + int maxIdx) +{ + int length; /* length of all distinguished names */ + int ret; + word32 dummy; + byte tag; + + WOLFSSL_ENTER("GetNameHash"); + + dummy = *idx; + if (GetASNTag(source, &dummy, &tag, maxIdx) == 0 && tag == ASN_OBJECT_ID) { + WOLFSSL_MSG("Trying optional prefix..."); + + if (GetLength(source, idx, &length, maxIdx) < 0) + return ASN_PARSE_E; + + *idx += length; + WOLFSSL_MSG("Got optional prefix"); + } + + /* For OCSP, RFC2560 section 4.1.1 states the issuer hash should be + * calculated over the entire DER encoding of the Name field, including + * the tag and length. */ + dummy = *idx; + if (GetSequence(source, idx, &length, maxIdx) < 0) + return ASN_PARSE_E; + + ret = CalcHashId(source + dummy, length + *idx - dummy, hash); + + *idx += length; + + return ret; +} + + +#ifdef HAVE_CRL + +/* initialize decoded CRL */ +void InitDecodedCRL(DecodedCRL* dcrl, void* heap) +{ + WOLFSSL_MSG("InitDecodedCRL"); + + XMEMSET(dcrl, 0, sizeof(DecodedCRL)); + dcrl->heap = heap; + #ifdef WOLFSSL_HEAP_TEST + dcrl->heap = (void*)WOLFSSL_HEAP_TEST; + #endif +} + + +/* free decoded CRL resources */ +void FreeDecodedCRL(DecodedCRL* dcrl) +{ + RevokedCert* tmp = dcrl->certs; + + WOLFSSL_MSG("FreeDecodedCRL"); + + while(tmp) { + RevokedCert* next = tmp->next; + XFREE(tmp, dcrl->heap, DYNAMIC_TYPE_REVOKED); + tmp = next; + } +} + + +/* Get Revoked Cert list, 0 on success */ +static int GetRevoked(const byte* buff, word32* idx, DecodedCRL* dcrl, + int maxIdx) +{ + int ret, len; + word32 end; + byte b; + RevokedCert* rc; + + WOLFSSL_ENTER("GetRevoked"); + + if (GetSequence(buff, idx, &len, maxIdx) < 0) + return ASN_PARSE_E; + + end = *idx + len; + + rc = (RevokedCert*)XMALLOC(sizeof(RevokedCert), dcrl->heap, + DYNAMIC_TYPE_REVOKED); + if (rc == NULL) { + WOLFSSL_MSG("Alloc Revoked Cert failed"); + return MEMORY_E; + } + + if (GetSerialNumber(buff, idx, rc->serialNumber, &rc->serialSz, + maxIdx) < 0) { + XFREE(rc, dcrl->heap, DYNAMIC_TYPE_REVOKED); + return ASN_PARSE_E; + } + + /* add to list */ + rc->next = dcrl->certs; + dcrl->certs = rc; + dcrl->totalCerts++; + + /* get date */ + ret = GetDateInfo(buff, idx, NULL, &b, NULL, maxIdx); + if (ret < 0) { + WOLFSSL_MSG("Expecting Date"); + return ret; + } + + /* skip extensions */ + *idx = end; + + return 0; +} + + +/* Get CRL Signature, 0 on success */ +static int GetCRL_Signature(const byte* source, word32* idx, DecodedCRL* dcrl, + int maxIdx) +{ + int length; + int ret; + + WOLFSSL_ENTER("GetCRL_Signature"); + + ret = CheckBitString(source, idx, &length, maxIdx, 1, NULL); + if (ret != 0) + return ret; + dcrl->sigLength = length; + + dcrl->signature = (byte*)&source[*idx]; + *idx += dcrl->sigLength; + + return 0; +} + +int VerifyCRL_Signature(SignatureCtx* sigCtx, const byte* toBeSigned, + word32 tbsSz, const byte* signature, word32 sigSz, + word32 signatureOID, Signer *ca, void* heap) +{ + /* try to confirm/verify signature */ +#ifndef IGNORE_KEY_EXTENSIONS + if ((ca->keyUsage & KEYUSE_CRL_SIGN) == 0) { + WOLFSSL_MSG("CA cannot sign CRLs"); + return ASN_CRL_NO_SIGNER_E; + } +#endif /* IGNORE_KEY_EXTENSIONS */ + + InitSignatureCtx(sigCtx, heap, INVALID_DEVID); + if (ConfirmSignature(sigCtx, toBeSigned, tbsSz, ca->publicKey, + ca->pubKeySize, ca->keyOID, signature, sigSz, + signatureOID, NULL) != 0) { + WOLFSSL_MSG("CRL Confirm signature failed"); + return ASN_CRL_CONFIRM_E; + } + + return 0; +} + + +static int ParseCRL_CertList(DecodedCRL* dcrl, const byte* buf, + word32* inOutIdx, int sz) +{ + word32 oid, dateIdx, idx, checkIdx; + int version, doNextDate = 1; + byte tag; + + if (dcrl == NULL || inOutIdx == NULL || buf == NULL) { + return BAD_FUNC_ARG; + } + + /* may have version */ + idx = *inOutIdx; + + checkIdx = idx; + if (GetASNTag(buf, &checkIdx, &tag, sz) == 0 && tag == ASN_INTEGER) { + if (GetMyVersion(buf, &idx, &version, sz) < 0) + return ASN_PARSE_E; + } + + if (GetAlgoId(buf, &idx, &oid, oidIgnoreType, sz) < 0) + return ASN_PARSE_E; + + if (GetNameHash(buf, &idx, dcrl->issuerHash, sz) < 0) + return ASN_PARSE_E; + + if (GetBasicDate(buf, &idx, dcrl->lastDate, &dcrl->lastDateFormat, sz) < 0) + return ASN_PARSE_E; + + dateIdx = idx; + + if (GetBasicDate(buf, &idx, dcrl->nextDate, &dcrl->nextDateFormat, sz) < 0) + { +#ifndef WOLFSSL_NO_CRL_NEXT_DATE + (void)dateIdx; + return ASN_PARSE_E; +#else + dcrl->nextDateFormat = ASN_OTHER_TYPE; /* skip flag */ + doNextDate = 0; + idx = dateIdx; +#endif + } + + if (doNextDate) { +#ifndef NO_ASN_TIME + if (!XVALIDATE_DATE(dcrl->nextDate, dcrl->nextDateFormat, AFTER)) { + WOLFSSL_MSG("CRL after date is no longer valid"); + return ASN_AFTER_DATE_E; + } +#endif + } + + checkIdx = idx; + if (idx != dcrl->sigIndex && + GetASNTag(buf, &checkIdx, &tag, sz) == 0 && tag != CRL_EXTENSIONS) { + + int len; + + if (GetSequence(buf, &idx, &len, sz) < 0) + return ASN_PARSE_E; + len += idx; + + while (idx < (word32)len) { + if (GetRevoked(buf, &idx, dcrl, len) < 0) + return ASN_PARSE_E; + } + } + + *inOutIdx = idx; + + return 0; +} + + +#ifndef NO_SKID +static int ParseCRL_AuthKeyIdExt(const byte* input, int sz, DecodedCRL* dcrl) +{ + word32 idx = 0; + int length = 0, ret = 0; + byte tag; + + WOLFSSL_ENTER("ParseCRL_AuthKeyIdExt"); + + if (GetSequence(input, &idx, &length, sz) < 0) { + WOLFSSL_MSG("\tfail: should be a SEQUENCE\n"); + return ASN_PARSE_E; + } + + if (GetASNTag(input, &idx, &tag, sz) < 0) { + return ASN_PARSE_E; + } + + if (tag != (ASN_CONTEXT_SPECIFIC | 0)) { + WOLFSSL_MSG("\tinfo: OPTIONAL item 0, not available\n"); + return 0; + } + + if (GetLength(input, &idx, &length, sz) <= 0) { + WOLFSSL_MSG("\tfail: extension data length"); + return ASN_PARSE_E; + } + + dcrl->extAuthKeyIdSet = 1; + if (length == KEYID_SIZE) { + XMEMCPY(dcrl->extAuthKeyId, input + idx, length); + } + else { + ret = CalcHashId(input + idx, length, dcrl->extAuthKeyId); + } + + return ret; +} +#endif + + +static int ParseCRL_Extensions(DecodedCRL* dcrl, const byte* buf, + word32* inOutIdx, word32 sz) +{ + int length; + word32 idx; + word32 ext_bound; /* boundary index for the sequence of extensions */ + word32 oid; + byte tag; + + WOLFSSL_ENTER("ParseCRL_Extensions"); + (void)dcrl; + + if (inOutIdx == NULL) + return BAD_FUNC_ARG; + + idx = *inOutIdx; + + /* CRL Extensions are optional */ + if ((idx + 1) > sz) + return 0; + + /* CRL Extensions are optional */ + if (GetASNTag(buf, &idx, &tag, sz) < 0) + return 0; + + /* CRL Extensions are optional */ + if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)) + return 0; + + if (GetLength(buf, &idx, &length, sz) < 0) + return ASN_PARSE_E; + + if (GetSequence(buf, &idx, &length, sz) < 0) + return ASN_PARSE_E; + + ext_bound = idx + length; + + while (idx < (word32)ext_bound) { + word32 localIdx; + int ret; + + if (GetSequence(buf, &idx, &length, sz) < 0) { + WOLFSSL_MSG("\tfail: should be a SEQUENCE"); + return ASN_PARSE_E; + } + + oid = 0; + if (GetObjectId(buf, &idx, &oid, oidCrlExtType, sz) < 0) { + WOLFSSL_MSG("\tfail: OBJECT ID"); + return ASN_PARSE_E; + } + + /* check for critical flag */ + if ((idx + 1) > (word32)sz) { + WOLFSSL_MSG("\tfail: malformed buffer"); + return BUFFER_E; + } + + localIdx = idx; + if (GetASNTag(buf, &localIdx, &tag, sz) == 0 && tag == ASN_BOOLEAN) { + WOLFSSL_MSG("\tfound optional critical flag, moving past"); + ret = GetBoolean(buf, &idx, sz); + if (ret < 0) + return ret; + } + + ret = GetOctetString(buf, &idx, &length, sz); + if (ret < 0) + return ret; + + if (oid == AUTH_KEY_OID) { + #ifndef NO_SKID + ret = ParseCRL_AuthKeyIdExt(buf + idx, length, dcrl); + if (ret < 0) { + WOLFSSL_MSG("\tcouldn't parse AuthKeyId extension"); + return ret; + } + #endif + } + + idx += length; + } + + *inOutIdx = idx; + + return 0; +} + + +/* prase crl buffer into decoded state, 0 on success */ +int ParseCRL(DecodedCRL* dcrl, const byte* buff, word32 sz, void* cm) +{ + int len; + word32 idx = 0; + Signer* ca = NULL; + SignatureCtx sigCtx; + + WOLFSSL_MSG("ParseCRL"); + + /* raw crl hash */ + /* hash here if needed for optimized comparisons + * wc_Sha sha; + * wc_InitSha(&sha); + * wc_ShaUpdate(&sha, buff, sz); + * wc_ShaFinal(&sha, dcrl->crlHash); */ + + if (GetSequence(buff, &idx, &len, sz) < 0) + return ASN_PARSE_E; + + dcrl->certBegin = idx; + /* Normalize sz for the length inside the outer sequence. */ + sz = len + idx; + + if (GetSequence(buff, &idx, &len, sz) < 0) + return ASN_PARSE_E; + dcrl->sigIndex = len + idx; + + if (ParseCRL_CertList(dcrl, buff, &idx, idx + len) < 0) + return ASN_PARSE_E; + + if (ParseCRL_Extensions(dcrl, buff, &idx, idx + len) < 0) + return ASN_PARSE_E; + + idx = dcrl->sigIndex; + + if (GetAlgoId(buff, &idx, &dcrl->signatureOID, oidSigType, sz) < 0) + return ASN_PARSE_E; + + if (GetCRL_Signature(buff, &idx, dcrl, sz) < 0) + return ASN_PARSE_E; + + /* openssl doesn't add skid by default for CRLs cause firefox chokes + if experiencing issues uncomment NO_SKID define in CRL section of + wolfssl/wolfcrypt/settings.h */ +#ifndef NO_SKID + if (dcrl->extAuthKeyIdSet) { + ca = GetCA(cm, dcrl->extAuthKeyId); /* more unique than issuerHash */ + } + if (ca != NULL && XMEMCMP(dcrl->issuerHash, ca->subjectNameHash, + KEYID_SIZE) != 0) { + ca = NULL; + } + if (ca == NULL) { + ca = GetCAByName(cm, dcrl->issuerHash); /* last resort */ + /* If AKID is available then this CA doesn't have the public + * key required */ + if (ca && dcrl->extAuthKeyIdSet) { + WOLFSSL_MSG("CA SKID doesn't match AKID"); + ca = NULL; + } + } +#else + ca = GetCA(cm, dcrl->issuerHash); +#endif /* !NO_SKID */ + WOLFSSL_MSG("About to verify CRL signature"); + + if (ca == NULL) { + WOLFSSL_MSG("Did NOT find CRL issuer CA"); + return ASN_CRL_NO_SIGNER_E; + } + + WOLFSSL_MSG("Found CRL issuer CA"); + return VerifyCRL_Signature(&sigCtx, buff + dcrl->certBegin, + dcrl->sigIndex - dcrl->certBegin, dcrl->signature, dcrl->sigLength, + dcrl->signatureOID, ca, dcrl->heap); +} + +#endif /* HAVE_CRL */ + + + +#ifdef WOLFSSL_CERT_PIV + +int wc_ParseCertPIV(wc_CertPIV* piv, const byte* buf, word32 totalSz) +{ + int length = 0; + word32 idx = 0; + + WOLFSSL_ENTER("wc_ParseCertPIV"); + + if (piv == NULL || buf == NULL || totalSz == 0) + return BAD_FUNC_ARG; + + XMEMSET(piv, 0, sizeof(wc_CertPIV)); + + /* Detect Identiv PIV (with 0x0A, 0x0B and 0x0C sections) */ + /* Certificate (0A 82 05FA) */ + if (GetASNHeader(buf, ASN_PIV_CERT, &idx, &length, totalSz) >= 0) { + /* Identiv Type PIV card */ + piv->isIdentiv = 1; + + piv->cert = &buf[idx]; + piv->certSz = length; + idx += length; + + /* Nonce (0B 14) */ + if (GetASNHeader(buf, ASN_PIV_NONCE, &idx, &length, totalSz) >= 0) { + piv->nonce = &buf[idx]; + piv->nonceSz = length; + idx += length; + } + + /* Signed Nonce (0C 82 0100) */ + if (GetASNHeader(buf, ASN_PIV_SIGNED_NONCE, &idx, &length, totalSz) >= 0) { + piv->signedNonce = &buf[idx]; + piv->signedNonceSz = length; + } + + idx = 0; + buf = piv->cert; + totalSz = piv->certSz; + } + + /* Certificate Buffer Total Size (53 82 05F6) */ + if (GetASNHeader(buf, ASN_APPLICATION | ASN_PRINTABLE_STRING, &idx, + &length, totalSz) < 0) { + return ASN_PARSE_E; + } + /* PIV Certificate (70 82 05ED) */ + if (GetASNHeader(buf, ASN_PIV_TAG_CERT, &idx, &length, + totalSz) < 0) { + return ASN_PARSE_E; + } + + /* Capture certificate buffer pointer and length */ + piv->cert = &buf[idx]; + piv->certSz = length; + idx += length; + + /* PIV Certificate Info (71 01 00) */ + if (GetASNHeader(buf, ASN_PIV_TAG_CERT_INFO, &idx, &length, + totalSz) >= 0) { + if (length >= 1) { + piv->compression = (buf[idx] & ASN_PIV_CERT_INFO_COMPRESSED); + piv->isX509 = (buf[idx] & ASN_PIV_CERT_INFO_ISX509); + } + idx += length; + } + + /* PIV Error Detection (FE 00) */ + if (GetASNHeader(buf, ASN_PIV_TAG_ERR_DET, &idx, &length, + totalSz) >= 0) { + piv->certErrDet = &buf[idx]; + piv->certErrDetSz = length; + idx += length; + } + + return 0; +} + +#endif /* WOLFSSL_CERT_PIV */ + + +#undef ERROR_OUT + +#endif /* !NO_ASN */ + +#ifdef WOLFSSL_SEP + + +#endif /* WOLFSSL_SEP */ diff --git a/client/wolfssl/wolfcrypt/src/async.c b/client/wolfssl/wolfcrypt/src/async.c new file mode 100644 index 0000000..e69de29 diff --git a/client/wolfssl/wolfcrypt/src/blake2b.c b/client/wolfssl/wolfcrypt/src/blake2b.c new file mode 100644 index 0000000..1541947 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/blake2b.c @@ -0,0 +1,452 @@ +/* + BLAKE2 reference source code package - reference C implementations + + Written in 2012 by Samuel Neves + + To the extent possible under law, the author(s) have dedicated all copyright + and related and neighboring rights to this software to the public domain + worldwide. This software is distributed without any warranty. + + You should have received a copy of the CC0 Public Domain Dedication along with + this software. If not, see . +*/ +/* blake2b.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef HAVE_BLAKE2 + +#include +#include + + +static const word64 blake2b_IV[8] = +{ + 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, + 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL, + 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL, + 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL +}; + +static const byte blake2b_sigma[12][16] = +{ + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } +}; + + +static WC_INLINE int blake2b_set_lastnode( blake2b_state *S ) +{ + S->f[1] = ~0ULL; + return 0; +} + +/* Some helper functions, not necessarily useful */ +static WC_INLINE int blake2b_set_lastblock( blake2b_state *S ) +{ + if( S->last_node ) blake2b_set_lastnode( S ); + + S->f[0] = ~0ULL; + return 0; +} + +static WC_INLINE int blake2b_increment_counter( blake2b_state *S, const word64 + inc ) +{ + S->t[0] += inc; + S->t[1] += ( S->t[0] < inc ); + return 0; +} + +static WC_INLINE int blake2b_init0( blake2b_state *S ) +{ + int i; + XMEMSET( S, 0, sizeof( blake2b_state ) ); + + for( i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i]; + + return 0; +} + +/* init xors IV with input parameter block */ +int blake2b_init_param( blake2b_state *S, const blake2b_param *P ) +{ + word32 i; + byte *p ; + blake2b_init0( S ); + p = ( byte * )( P ); + + /* IV XOR ParamBlock */ + for( i = 0; i < 8; ++i ) + S->h[i] ^= load64( p + sizeof( S->h[i] ) * i ); + + return 0; +} + + + +int blake2b_init( blake2b_state *S, const byte outlen ) +{ + blake2b_param P[1]; + + if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; + +#ifdef WOLFSSL_BLAKE2B_INIT_EACH_FIELD + P->digest_length = outlen; + P->key_length = 0; + P->fanout = 1; + P->depth = 1; + store32( &P->leaf_length, 0 ); + store64( &P->node_offset, 0 ); + P->node_depth = 0; + P->inner_length = 0; + XMEMSET( P->reserved, 0, sizeof( P->reserved ) ); + XMEMSET( P->salt, 0, sizeof( P->salt ) ); + XMEMSET( P->personal, 0, sizeof( P->personal ) ); +#else + XMEMSET( P, 0, sizeof( *P ) ); + P->digest_length = outlen; + P->fanout = 1; + P->depth = 1; +#endif + return blake2b_init_param( S, P ); +} + + +int blake2b_init_key( blake2b_state *S, const byte outlen, const void *key, + const byte keylen ) +{ + blake2b_param P[1]; + + if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; + + if ( !key || !keylen || keylen > BLAKE2B_KEYBYTES ) return -1; + +#ifdef WOLFSSL_BLAKE2B_INIT_EACH_FIELD + P->digest_length = outlen; + P->key_length = keylen; + P->fanout = 1; + P->depth = 1; + store32( &P->leaf_length, 0 ); + store64( &P->node_offset, 0 ); + P->node_depth = 0; + P->inner_length = 0; + XMEMSET( P->reserved, 0, sizeof( P->reserved ) ); + XMEMSET( P->salt, 0, sizeof( P->salt ) ); + XMEMSET( P->personal, 0, sizeof( P->personal ) ); +#else + XMEMSET( P, 0, sizeof( *P ) ); + P->digest_length = outlen; + P->key_length = keylen; + P->fanout = 1; + P->depth = 1; +#endif + + if( blake2b_init_param( S, P ) < 0 ) return -1; + + { +#ifdef WOLFSSL_SMALL_STACK + byte* block; + + block = (byte*)XMALLOC(BLAKE2B_BLOCKBYTES, NULL, DYNAMIC_TYPE_TMP_BUFFER); + + if ( block == NULL ) return -1; +#else + byte block[BLAKE2B_BLOCKBYTES]; +#endif + + XMEMSET( block, 0, BLAKE2B_BLOCKBYTES ); + XMEMCPY( block, key, keylen ); + blake2b_update( S, block, BLAKE2B_BLOCKBYTES ); + secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from */ + /* memory */ + +#ifdef WOLFSSL_SMALL_STACK + XFREE(block, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + } + return 0; +} + +static int blake2b_compress( blake2b_state *S, + const byte block[BLAKE2B_BLOCKBYTES] ) +{ + int i; + +#ifdef WOLFSSL_SMALL_STACK + word64* m; + word64* v; + + m = (word64*)XMALLOC(sizeof(word64) * 16, NULL, DYNAMIC_TYPE_TMP_BUFFER); + + if ( m == NULL ) return -1; + + v = (word64*)XMALLOC(sizeof(word64) * 16, NULL, DYNAMIC_TYPE_TMP_BUFFER); + + if ( v == NULL ) + { + XFREE(m, NULL, DYNAMIC_TYPE_TMP_BUFFER); + return -1; + } +#else + word64 m[16]; + word64 v[16]; +#endif + + for( i = 0; i < 16; ++i ) + m[i] = load64( block + i * sizeof( m[i] ) ); + + for( i = 0; i < 8; ++i ) + v[i] = S->h[i]; + + v[ 8] = blake2b_IV[0]; + v[ 9] = blake2b_IV[1]; + v[10] = blake2b_IV[2]; + v[11] = blake2b_IV[3]; + v[12] = S->t[0] ^ blake2b_IV[4]; + v[13] = S->t[1] ^ blake2b_IV[5]; + v[14] = S->f[0] ^ blake2b_IV[6]; + v[15] = S->f[1] ^ blake2b_IV[7]; +#define G(r,i,a,b,c,d) \ + do { \ + a = a + b + m[blake2b_sigma[r][2*i+0]]; \ + d = rotr64(d ^ a, 32); \ + c = c + d; \ + b = rotr64(b ^ c, 24); \ + a = a + b + m[blake2b_sigma[r][2*i+1]]; \ + d = rotr64(d ^ a, 16); \ + c = c + d; \ + b = rotr64(b ^ c, 63); \ + } while(0) +#define ROUND(r) \ + do { \ + G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ + G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ + G(r,2,v[ 2],v[ 6],v[10],v[14]); \ + G(r,3,v[ 3],v[ 7],v[11],v[15]); \ + G(r,4,v[ 0],v[ 5],v[10],v[15]); \ + G(r,5,v[ 1],v[ 6],v[11],v[12]); \ + G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ + G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ + } while(0) + ROUND( 0 ); + ROUND( 1 ); + ROUND( 2 ); + ROUND( 3 ); + ROUND( 4 ); + ROUND( 5 ); + ROUND( 6 ); + ROUND( 7 ); + ROUND( 8 ); + ROUND( 9 ); + ROUND( 10 ); + ROUND( 11 ); + + for( i = 0; i < 8; ++i ) + S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; + +#undef G +#undef ROUND + +#ifdef WOLFSSL_SMALL_STACK + XFREE(m, NULL, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(v, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return 0; +} + +/* inlen now in bytes */ +int blake2b_update( blake2b_state *S, const byte *in, word64 inlen ) +{ + while( inlen > 0 ) + { + word64 left = S->buflen; + word64 fill = 2 * BLAKE2B_BLOCKBYTES - left; + + if( inlen > fill ) + { + XMEMCPY( S->buf + left, in, (wolfssl_word)fill ); /* Fill buffer */ + S->buflen += fill; + blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); + + if ( blake2b_compress( S, S->buf ) < 0 ) return -1; /* Compress */ + + XMEMCPY( S->buf, S->buf + BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); + /* Shift buffer left */ + S->buflen -= BLAKE2B_BLOCKBYTES; + in += fill; + inlen -= fill; + } + else /* inlen <= fill */ + { + XMEMCPY( S->buf + left, in, (wolfssl_word)inlen ); + S->buflen += inlen; /* Be lazy, do not compress */ + inlen = 0; + } + } + + return 0; +} + +/* Is this correct? */ +int blake2b_final( blake2b_state *S, byte *out, byte outlen ) +{ + byte buffer[BLAKE2B_OUTBYTES]; + int i; + + if( S->buflen > BLAKE2B_BLOCKBYTES ) + { + blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); + + if ( blake2b_compress( S, S->buf ) < 0 ) return -1; + + S->buflen -= BLAKE2B_BLOCKBYTES; + XMEMCPY( S->buf, S->buf + BLAKE2B_BLOCKBYTES, (wolfssl_word)S->buflen ); + } + + blake2b_increment_counter( S, S->buflen ); + blake2b_set_lastblock( S ); + XMEMSET( S->buf + S->buflen, 0, (wolfssl_word)(2 * BLAKE2B_BLOCKBYTES - S->buflen) ); + /* Padding */ + if ( blake2b_compress( S, S->buf ) < 0 ) return -1; + + for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ + store64( buffer + sizeof( S->h[i] ) * i, S->h[i] ); + + XMEMCPY( out, buffer, outlen ); + return 0; +} + +/* inlen, at least, should be word64. Others can be size_t. */ +int blake2b( byte *out, const void *in, const void *key, const byte outlen, + const word64 inlen, byte keylen ) +{ + blake2b_state S[1]; + + /* Verify parameters */ + if ( NULL == in ) return -1; + + if ( NULL == out ) return -1; + + if( NULL == key ) keylen = 0; + + if( keylen > 0 ) + { + if( blake2b_init_key( S, outlen, key, keylen ) < 0 ) return -1; + } + else + { + if( blake2b_init( S, outlen ) < 0 ) return -1; + } + + if ( blake2b_update( S, ( byte * )in, inlen ) < 0) return -1; + + return blake2b_final( S, out, outlen ); +} + +#if defined(BLAKE2B_SELFTEST) +#include +#include "blake2-kat.h" +int main( int argc, char **argv ) +{ + byte key[BLAKE2B_KEYBYTES]; + byte buf[KAT_LENGTH]; + + for( word32 i = 0; i < BLAKE2B_KEYBYTES; ++i ) + key[i] = ( byte )i; + + for( word32 i = 0; i < KAT_LENGTH; ++i ) + buf[i] = ( byte )i; + + for( word32 i = 0; i < KAT_LENGTH; ++i ) + { + byte hash[BLAKE2B_OUTBYTES]; + if ( blake2b( hash, buf, key, BLAKE2B_OUTBYTES, i, BLAKE2B_KEYBYTES ) < 0 ) + { + puts( "error" ); + return -1; + } + + if( 0 != XMEMCMP( hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES ) ) + { + puts( "error" ); + return -1; + } + } + + puts( "ok" ); + return 0; +} +#endif + + +/* wolfCrypt API */ + +/* Init Blake2b digest, track size in case final doesn't want to "remember" */ +int wc_InitBlake2b(Blake2b* b2b, word32 digestSz) +{ + if (b2b == NULL){ + return -1; + } + b2b->digestSz = digestSz; + + return blake2b_init(b2b->S, (byte)digestSz); +} + + +/* Blake2b Update */ +int wc_Blake2bUpdate(Blake2b* b2b, const byte* data, word32 sz) +{ + return blake2b_update(b2b->S, data, sz); +} + + +/* Blake2b Final, if pass in zero size we use init digestSz */ +int wc_Blake2bFinal(Blake2b* b2b, byte* final, word32 requestSz) +{ + word32 sz = requestSz ? requestSz : b2b->digestSz; + + return blake2b_final(b2b->S, final, (byte)sz); +} + + +/* end CTaoCrypt API */ + +#endif /* HAVE_BLAKE2 */ + diff --git a/client/wolfssl/wolfcrypt/src/blake2s.c b/client/wolfssl/wolfcrypt/src/blake2s.c new file mode 100644 index 0000000..651a1d1 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/blake2s.c @@ -0,0 +1,446 @@ +/* + BLAKE2 reference source code package - reference C implementations + + Written in 2012 by Samuel Neves + + To the extent possible under law, the author(s) have dedicated all copyright + and related and neighboring rights to this software to the public domain + worldwide. This software is distributed without any warranty. + + You should have received a copy of the CC0 Public Domain Dedication along with + this software. If not, see . +*/ +/* blake2s.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef HAVE_BLAKE2S + +#include +#include + + +static const word32 blake2s_IV[8] = +{ + 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, + 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 +}; + +static const byte blake2s_sigma[10][16] = +{ + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } +}; + + +static WC_INLINE int blake2s_set_lastnode( blake2s_state *S ) +{ + S->f[1] = ~0; + return 0; +} + +/* Some helper functions, not necessarily useful */ +static WC_INLINE int blake2s_set_lastblock( blake2s_state *S ) +{ + if( S->last_node ) blake2s_set_lastnode( S ); + + S->f[0] = ~0; + return 0; +} + +static WC_INLINE int blake2s_increment_counter( blake2s_state *S, const word32 + inc ) +{ + S->t[0] += inc; + S->t[1] += ( S->t[0] < inc ); + return 0; +} + +static WC_INLINE int blake2s_init0( blake2s_state *S ) +{ + int i; + XMEMSET( S, 0, sizeof( blake2s_state ) ); + + for( i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i]; + + return 0; +} + +/* init xors IV with input parameter block */ +int blake2s_init_param( blake2s_state *S, const blake2s_param *P ) +{ + word32 i; + byte *p ; + blake2s_init0( S ); + p = ( byte * )( P ); + + /* IV XOR ParamBlock */ + for( i = 0; i < 8; ++i ) + S->h[i] ^= load32( p + sizeof( S->h[i] ) * i ); + + return 0; +} + + + +int blake2s_init( blake2s_state *S, const byte outlen ) +{ + blake2s_param P[1]; + + if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1; + +#ifdef WOLFSSL_BLAKE2S_INIT_EACH_FIELD + P->digest_length = outlen; + P->key_length = 0; + P->fanout = 1; + P->depth = 1; + store32( &P->leaf_length, 0 ); + store32( &P->node_offset, 0 ); + P->node_depth = 0; + P->inner_length = 0; + XMEMSET( P->reserved, 0, sizeof( P->reserved ) ); + XMEMSET( P->salt, 0, sizeof( P->salt ) ); + XMEMSET( P->personal, 0, sizeof( P->personal ) ); +#else + XMEMSET( P, 0, sizeof( *P ) ); + P->digest_length = outlen; + P->fanout = 1; + P->depth = 1; +#endif + return blake2s_init_param( S, P ); +} + + +int blake2s_init_key( blake2s_state *S, const byte outlen, const void *key, + const byte keylen ) +{ + blake2s_param P[1]; + + if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1; + + if ( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1; + +#ifdef WOLFSSL_BLAKE2S_INIT_EACH_FIELD + P->digest_length = outlen; + P->key_length = keylen; + P->fanout = 1; + P->depth = 1; + store32( &P->leaf_length, 0 ); + store64( &P->node_offset, 0 ); + P->node_depth = 0; + P->inner_length = 0; + XMEMSET( P->reserved, 0, sizeof( P->reserved ) ); + XMEMSET( P->salt, 0, sizeof( P->salt ) ); + XMEMSET( P->personal, 0, sizeof( P->personal ) ); +#else + XMEMSET( P, 0, sizeof( *P ) ); + P->digest_length = outlen; + P->key_length = keylen; + P->fanout = 1; + P->depth = 1; +#endif + + if( blake2s_init_param( S, P ) < 0 ) return -1; + + { +#ifdef WOLFSSL_SMALL_STACK + byte* block; + + block = (byte*)XMALLOC(BLAKE2S_BLOCKBYTES, NULL, DYNAMIC_TYPE_TMP_BUFFER); + + if ( block == NULL ) return -1; +#else + byte block[BLAKE2S_BLOCKBYTES]; +#endif + + XMEMSET( block, 0, BLAKE2S_BLOCKBYTES ); + XMEMCPY( block, key, keylen ); + blake2s_update( S, block, BLAKE2S_BLOCKBYTES ); + secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from */ + /* memory */ + +#ifdef WOLFSSL_SMALL_STACK + XFREE(block, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + } + return 0; +} + +static int blake2s_compress( blake2s_state *S, + const byte block[BLAKE2S_BLOCKBYTES] ) +{ + int i; + +#ifdef WOLFSSL_SMALL_STACK + word32* m; + word32* v; + + m = (word32*)XMALLOC(sizeof(word32) * 16, NULL, DYNAMIC_TYPE_TMP_BUFFER); + + if ( m == NULL ) return -1; + + v = (word32*)XMALLOC(sizeof(word32) * 16, NULL, DYNAMIC_TYPE_TMP_BUFFER); + + if ( v == NULL ) + { + XFREE(m, NULL, DYNAMIC_TYPE_TMP_BUFFER); + return -1; + } +#else + word32 m[16]; + word32 v[16]; +#endif + + for( i = 0; i < 16; ++i ) + m[i] = load32( block + i * sizeof( m[i] ) ); + + for( i = 0; i < 8; ++i ) + v[i] = S->h[i]; + + v[ 8] = blake2s_IV[0]; + v[ 9] = blake2s_IV[1]; + v[10] = blake2s_IV[2]; + v[11] = blake2s_IV[3]; + v[12] = S->t[0] ^ blake2s_IV[4]; + v[13] = S->t[1] ^ blake2s_IV[5]; + v[14] = S->f[0] ^ blake2s_IV[6]; + v[15] = S->f[1] ^ blake2s_IV[7]; +#define G(r,i,a,b,c,d) \ + do { \ + a = a + b + m[blake2s_sigma[r][2*i+0]]; \ + d = rotr32(d ^ a, 16); \ + c = c + d; \ + b = rotr32(b ^ c, 12); \ + a = a + b + m[blake2s_sigma[r][2*i+1]]; \ + d = rotr32(d ^ a, 8); \ + c = c + d; \ + b = rotr32(b ^ c, 7); \ + } while(0) +#define ROUND(r) \ + do { \ + G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ + G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ + G(r,2,v[ 2],v[ 6],v[10],v[14]); \ + G(r,3,v[ 3],v[ 7],v[11],v[15]); \ + G(r,4,v[ 0],v[ 5],v[10],v[15]); \ + G(r,5,v[ 1],v[ 6],v[11],v[12]); \ + G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ + G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ + } while(0) + ROUND( 0 ); + ROUND( 1 ); + ROUND( 2 ); + ROUND( 3 ); + ROUND( 4 ); + ROUND( 5 ); + ROUND( 6 ); + ROUND( 7 ); + ROUND( 8 ); + ROUND( 9 ); + + for( i = 0; i < 8; ++i ) + S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; + +#undef G +#undef ROUND + +#ifdef WOLFSSL_SMALL_STACK + XFREE(m, NULL, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(v, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return 0; +} + +/* inlen now in bytes */ +int blake2s_update( blake2s_state *S, const byte *in, word32 inlen ) +{ + while( inlen > 0 ) + { + word32 left = S->buflen; + word32 fill = 2 * BLAKE2S_BLOCKBYTES - left; + + if( inlen > fill ) + { + XMEMCPY( S->buf + left, in, (wolfssl_word)fill ); /* Fill buffer */ + S->buflen += fill; + blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); + + if ( blake2s_compress( S, S->buf ) < 0 ) return -1; /* Compress */ + + XMEMCPY( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); + /* Shift buffer left */ + S->buflen -= BLAKE2S_BLOCKBYTES; + in += fill; + inlen -= fill; + } + else /* inlen <= fill */ + { + XMEMCPY( S->buf + left, in, (wolfssl_word)inlen ); + S->buflen += inlen; /* Be lazy, do not compress */ + inlen = 0; + } + } + + return 0; +} + +/* Is this correct? */ +int blake2s_final( blake2s_state *S, byte *out, byte outlen ) +{ + int i; + byte buffer[BLAKE2S_BLOCKBYTES]; + + if( S->buflen > BLAKE2S_BLOCKBYTES ) + { + blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); + + if ( blake2s_compress( S, S->buf ) < 0 ) return -1; + + S->buflen -= BLAKE2S_BLOCKBYTES; + XMEMCPY( S->buf, S->buf + BLAKE2S_BLOCKBYTES, (wolfssl_word)S->buflen ); + } + + blake2s_increment_counter( S, S->buflen ); + blake2s_set_lastblock( S ); + XMEMSET( S->buf + S->buflen, 0, (wolfssl_word)(2 * BLAKE2S_BLOCKBYTES - S->buflen) ); + /* Padding */ + if ( blake2s_compress( S, S->buf ) < 0 ) return -1; + + for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ + store64( buffer + sizeof( S->h[i] ) * i, S->h[i] ); + + XMEMCPY( out, buffer, outlen ); + return 0; +} + +/* inlen, at least, should be word32. Others can be size_t. */ +int blake2s( byte *out, const void *in, const void *key, const byte outlen, + const word32 inlen, byte keylen ) +{ + blake2s_state S[1]; + + /* Verify parameters */ + if ( NULL == in ) return -1; + + if ( NULL == out ) return -1; + + if( NULL == key ) keylen = 0; + + if( keylen > 0 ) + { + if( blake2s_init_key( S, outlen, key, keylen ) < 0 ) return -1; + } + else + { + if( blake2s_init( S, outlen ) < 0 ) return -1; + } + + if ( blake2s_update( S, ( byte * )in, inlen ) < 0) return -1; + + return blake2s_final( S, out, outlen ); +} + +#if defined(BLAKE2S_SELFTEST) +#include +#include "blake2-kat.h" +int main( int argc, char **argv ) +{ + byte key[BLAKE2S_KEYBYTES]; + byte buf[KAT_LENGTH]; + + for( word32 i = 0; i < BLAKE2S_KEYBYTES; ++i ) + key[i] = ( byte )i; + + for( word32 i = 0; i < KAT_LENGTH; ++i ) + buf[i] = ( byte )i; + + for( word32 i = 0; i < KAT_LENGTH; ++i ) + { + byte hash[BLAKE2S_OUTBYTES]; + if ( blake2s( hash, buf, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES ) < 0 ) + { + puts( "error" ); + return -1; + } + + if( 0 != XMEMCMP( hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES ) ) + { + puts( "error" ); + return -1; + } + } + + puts( "ok" ); + return 0; +} +#endif + + +/* wolfCrypt API */ + +/* Init Blake2s digest, track size in case final doesn't want to "remember" */ +int wc_InitBlake2s(Blake2s* b2s, word32 digestSz) +{ + if (b2s == NULL){ + return -1; + } + b2s->digestSz = digestSz; + + return blake2s_init(b2s->S, (byte)digestSz); +} + + +/* Blake2s Update */ +int wc_Blake2sUpdate(Blake2s* b2s, const byte* data, word32 sz) +{ + return blake2s_update(b2s->S, data, sz); +} + + +/* Blake2s Final, if pass in zero size we use init digestSz */ +int wc_Blake2sFinal(Blake2s* b2s, byte* final, word32 requestSz) +{ + word32 sz = requestSz ? requestSz : b2s->digestSz; + + return blake2s_final(b2s->S, final, (byte)sz); +} + + +/* end CTaoCrypt API */ + +#endif /* HAVE_BLAKE2S */ + diff --git a/client/wolfssl/wolfcrypt/src/camellia.c b/client/wolfssl/wolfcrypt/src/camellia.c new file mode 100644 index 0000000..89ee661 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/camellia.c @@ -0,0 +1,1644 @@ +/* camellia.c ver 1.2.0 + * + * Copyright (c) 2006,2007 + * NTT (Nippon Telegraph and Telephone Corporation) . All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer as + * the first lines of this file unmodified. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NTT ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL NTT BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* camellia.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +/* + * Algorithm Specification + * http://info.isl.ntt.co.jp/crypt/eng/camellia/specifications.html + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef HAVE_CAMELLIA + +#include +#include +#include +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +/* u32 must be 32bit word */ +typedef unsigned int u32; +typedef unsigned char u8; + +/* key constants */ + +#define CAMELLIA_SIGMA1L ((u32)0xA09E667FL) +#define CAMELLIA_SIGMA1R ((u32)0x3BCC908BL) +#define CAMELLIA_SIGMA2L ((u32)0xB67AE858L) +#define CAMELLIA_SIGMA2R ((u32)0x4CAA73B2L) +#define CAMELLIA_SIGMA3L ((u32)0xC6EF372FL) +#define CAMELLIA_SIGMA3R ((u32)0xE94F82BEL) +#define CAMELLIA_SIGMA4L ((u32)0x54FF53A5L) +#define CAMELLIA_SIGMA4R ((u32)0xF1D36F1CL) +#define CAMELLIA_SIGMA5L ((u32)0x10E527FAL) +#define CAMELLIA_SIGMA5R ((u32)0xDE682D1DL) +#define CAMELLIA_SIGMA6L ((u32)0xB05688C2L) +#define CAMELLIA_SIGMA6R ((u32)0xB3E6C1FDL) + +/* + * macros + */ + + +#if defined(_MSC_VER) + +# define SWAP(x) (_lrotl(x, 8) & 0x00ff00ff | _lrotr(x, 8) & 0xff00ff00) +# define GETU32(p) SWAP(*((u32 *)(p))) +# define PUTU32(ct, st) {*((u32 *)(ct)) = SWAP((st));} + +#else /* not MS-VC */ + +# define GETU32(pt) \ + (((u32)(pt)[0] << 24) \ + ^ ((u32)(pt)[1] << 16) \ + ^ ((u32)(pt)[2] << 8) \ + ^ ((u32)(pt)[3])) + +# define PUTU32(ct, st) { \ + (ct)[0] = (u8)((st) >> 24); \ + (ct)[1] = (u8)((st) >> 16); \ + (ct)[2] = (u8)((st) >> 8); \ + (ct)[3] = (u8)(st); } + +#endif + +#define CamelliaSubkeyL(INDEX) (subkey[(INDEX)*2]) +#define CamelliaSubkeyR(INDEX) (subkey[(INDEX)*2 + 1]) + +/* rotation right shift 1byte */ +#define CAMELLIA_RR8(x) (((x) >> 8) + ((x) << 24)) +/* rotation left shift 1bit */ +#define CAMELLIA_RL1(x) (((x) << 1) + ((x) >> 31)) +/* rotation left shift 1byte */ +#define CAMELLIA_RL8(x) (((x) << 8) + ((x) >> 24)) + +#define CAMELLIA_ROLDQ(ll, lr, rl, rr, w0, w1, bits) \ + do { \ + w0 = ll; \ + ll = (ll << bits) + (lr >> (32 - bits)); \ + lr = (lr << bits) + (rl >> (32 - bits)); \ + rl = (rl << bits) + (rr >> (32 - bits)); \ + rr = (rr << bits) + (w0 >> (32 - bits)); \ + } while(0) + +#define CAMELLIA_ROLDQo32(ll, lr, rl, rr, w0, w1, bits) \ + do { \ + w0 = ll; \ + w1 = lr; \ + ll = (lr << (bits - 32)) + (rl >> (64 - bits)); \ + lr = (rl << (bits - 32)) + (rr >> (64 - bits)); \ + rl = (rr << (bits - 32)) + (w0 >> (64 - bits)); \ + rr = (w0 << (bits - 32)) + (w1 >> (64 - bits)); \ + } while(0) + +#define CAMELLIA_SP1110(INDEX) (camellia_sp1110[(INDEX)]) +#define CAMELLIA_SP0222(INDEX) (camellia_sp0222[(INDEX)]) +#define CAMELLIA_SP3033(INDEX) (camellia_sp3033[(INDEX)]) +#define CAMELLIA_SP4404(INDEX) (camellia_sp4404[(INDEX)]) + +#define CAMELLIA_F(xl, xr, kl, kr, yl, yr, il, ir, t0, t1) \ + do { \ + il = xl ^ kl; \ + ir = xr ^ kr; \ + t0 = il >> 16; \ + t1 = ir >> 16; \ + yl = CAMELLIA_SP1110(ir & 0xff) \ + ^ CAMELLIA_SP0222((t1 >> 8) & 0xff) \ + ^ CAMELLIA_SP3033(t1 & 0xff) \ + ^ CAMELLIA_SP4404((ir >> 8) & 0xff); \ + yr = CAMELLIA_SP1110((t0 >> 8) & 0xff) \ + ^ CAMELLIA_SP0222(t0 & 0xff) \ + ^ CAMELLIA_SP3033((il >> 8) & 0xff) \ + ^ CAMELLIA_SP4404(il & 0xff); \ + yl ^= yr; \ + yr = CAMELLIA_RR8(yr); \ + yr ^= yl; \ + } while(0) + + +/* + * for speed up + * + */ +#define CAMELLIA_FLS(ll, lr, rl, rr, kll, klr, krl, krr, t0, t1, t2, t3) \ + do { \ + t0 = kll; \ + t0 &= ll; \ + lr ^= CAMELLIA_RL1(t0); \ + t1 = klr; \ + t1 |= lr; \ + ll ^= t1; \ + \ + t2 = krr; \ + t2 |= rr; \ + rl ^= t2; \ + t3 = krl; \ + t3 &= rl; \ + rr ^= CAMELLIA_RL1(t3); \ + } while(0) + +#define CAMELLIA_ROUNDSM(xl, xr, kl, kr, yl, yr, il, ir, t0, t1) \ + do { \ + ir = CAMELLIA_SP1110(xr & 0xff) \ + ^ CAMELLIA_SP0222((xr >> 24) & 0xff) \ + ^ CAMELLIA_SP3033((xr >> 16) & 0xff) \ + ^ CAMELLIA_SP4404((xr >> 8) & 0xff); \ + il = CAMELLIA_SP1110((xl >> 24) & 0xff) \ + ^ CAMELLIA_SP0222((xl >> 16) & 0xff) \ + ^ CAMELLIA_SP3033((xl >> 8) & 0xff) \ + ^ CAMELLIA_SP4404(xl & 0xff); \ + il ^= kl; \ + ir ^= kr; \ + ir ^= il; \ + il = CAMELLIA_RR8(il); \ + il ^= ir; \ + yl ^= ir; \ + yr ^= il; \ + } while(0) + + +static const u32 camellia_sp1110[256] = { + 0x70707000,0x82828200,0x2c2c2c00,0xececec00, + 0xb3b3b300,0x27272700,0xc0c0c000,0xe5e5e500, + 0xe4e4e400,0x85858500,0x57575700,0x35353500, + 0xeaeaea00,0x0c0c0c00,0xaeaeae00,0x41414100, + 0x23232300,0xefefef00,0x6b6b6b00,0x93939300, + 0x45454500,0x19191900,0xa5a5a500,0x21212100, + 0xededed00,0x0e0e0e00,0x4f4f4f00,0x4e4e4e00, + 0x1d1d1d00,0x65656500,0x92929200,0xbdbdbd00, + 0x86868600,0xb8b8b800,0xafafaf00,0x8f8f8f00, + 0x7c7c7c00,0xebebeb00,0x1f1f1f00,0xcecece00, + 0x3e3e3e00,0x30303000,0xdcdcdc00,0x5f5f5f00, + 0x5e5e5e00,0xc5c5c500,0x0b0b0b00,0x1a1a1a00, + 0xa6a6a600,0xe1e1e100,0x39393900,0xcacaca00, + 0xd5d5d500,0x47474700,0x5d5d5d00,0x3d3d3d00, + 0xd9d9d900,0x01010100,0x5a5a5a00,0xd6d6d600, + 0x51515100,0x56565600,0x6c6c6c00,0x4d4d4d00, + 0x8b8b8b00,0x0d0d0d00,0x9a9a9a00,0x66666600, + 0xfbfbfb00,0xcccccc00,0xb0b0b000,0x2d2d2d00, + 0x74747400,0x12121200,0x2b2b2b00,0x20202000, + 0xf0f0f000,0xb1b1b100,0x84848400,0x99999900, + 0xdfdfdf00,0x4c4c4c00,0xcbcbcb00,0xc2c2c200, + 0x34343400,0x7e7e7e00,0x76767600,0x05050500, + 0x6d6d6d00,0xb7b7b700,0xa9a9a900,0x31313100, + 0xd1d1d100,0x17171700,0x04040400,0xd7d7d700, + 0x14141400,0x58585800,0x3a3a3a00,0x61616100, + 0xdedede00,0x1b1b1b00,0x11111100,0x1c1c1c00, + 0x32323200,0x0f0f0f00,0x9c9c9c00,0x16161600, + 0x53535300,0x18181800,0xf2f2f200,0x22222200, + 0xfefefe00,0x44444400,0xcfcfcf00,0xb2b2b200, + 0xc3c3c300,0xb5b5b500,0x7a7a7a00,0x91919100, + 0x24242400,0x08080800,0xe8e8e800,0xa8a8a800, + 0x60606000,0xfcfcfc00,0x69696900,0x50505000, + 0xaaaaaa00,0xd0d0d000,0xa0a0a000,0x7d7d7d00, + 0xa1a1a100,0x89898900,0x62626200,0x97979700, + 0x54545400,0x5b5b5b00,0x1e1e1e00,0x95959500, + 0xe0e0e000,0xffffff00,0x64646400,0xd2d2d200, + 0x10101000,0xc4c4c400,0x00000000,0x48484800, + 0xa3a3a300,0xf7f7f700,0x75757500,0xdbdbdb00, + 0x8a8a8a00,0x03030300,0xe6e6e600,0xdadada00, + 0x09090900,0x3f3f3f00,0xdddddd00,0x94949400, + 0x87878700,0x5c5c5c00,0x83838300,0x02020200, + 0xcdcdcd00,0x4a4a4a00,0x90909000,0x33333300, + 0x73737300,0x67676700,0xf6f6f600,0xf3f3f300, + 0x9d9d9d00,0x7f7f7f00,0xbfbfbf00,0xe2e2e200, + 0x52525200,0x9b9b9b00,0xd8d8d800,0x26262600, + 0xc8c8c800,0x37373700,0xc6c6c600,0x3b3b3b00, + 0x81818100,0x96969600,0x6f6f6f00,0x4b4b4b00, + 0x13131300,0xbebebe00,0x63636300,0x2e2e2e00, + 0xe9e9e900,0x79797900,0xa7a7a700,0x8c8c8c00, + 0x9f9f9f00,0x6e6e6e00,0xbcbcbc00,0x8e8e8e00, + 0x29292900,0xf5f5f500,0xf9f9f900,0xb6b6b600, + 0x2f2f2f00,0xfdfdfd00,0xb4b4b400,0x59595900, + 0x78787800,0x98989800,0x06060600,0x6a6a6a00, + 0xe7e7e700,0x46464600,0x71717100,0xbababa00, + 0xd4d4d400,0x25252500,0xababab00,0x42424200, + 0x88888800,0xa2a2a200,0x8d8d8d00,0xfafafa00, + 0x72727200,0x07070700,0xb9b9b900,0x55555500, + 0xf8f8f800,0xeeeeee00,0xacacac00,0x0a0a0a00, + 0x36363600,0x49494900,0x2a2a2a00,0x68686800, + 0x3c3c3c00,0x38383800,0xf1f1f100,0xa4a4a400, + 0x40404000,0x28282800,0xd3d3d300,0x7b7b7b00, + 0xbbbbbb00,0xc9c9c900,0x43434300,0xc1c1c100, + 0x15151500,0xe3e3e300,0xadadad00,0xf4f4f400, + 0x77777700,0xc7c7c700,0x80808000,0x9e9e9e00, +}; + +static const u32 camellia_sp0222[256] = { + 0x00e0e0e0,0x00050505,0x00585858,0x00d9d9d9, + 0x00676767,0x004e4e4e,0x00818181,0x00cbcbcb, + 0x00c9c9c9,0x000b0b0b,0x00aeaeae,0x006a6a6a, + 0x00d5d5d5,0x00181818,0x005d5d5d,0x00828282, + 0x00464646,0x00dfdfdf,0x00d6d6d6,0x00272727, + 0x008a8a8a,0x00323232,0x004b4b4b,0x00424242, + 0x00dbdbdb,0x001c1c1c,0x009e9e9e,0x009c9c9c, + 0x003a3a3a,0x00cacaca,0x00252525,0x007b7b7b, + 0x000d0d0d,0x00717171,0x005f5f5f,0x001f1f1f, + 0x00f8f8f8,0x00d7d7d7,0x003e3e3e,0x009d9d9d, + 0x007c7c7c,0x00606060,0x00b9b9b9,0x00bebebe, + 0x00bcbcbc,0x008b8b8b,0x00161616,0x00343434, + 0x004d4d4d,0x00c3c3c3,0x00727272,0x00959595, + 0x00ababab,0x008e8e8e,0x00bababa,0x007a7a7a, + 0x00b3b3b3,0x00020202,0x00b4b4b4,0x00adadad, + 0x00a2a2a2,0x00acacac,0x00d8d8d8,0x009a9a9a, + 0x00171717,0x001a1a1a,0x00353535,0x00cccccc, + 0x00f7f7f7,0x00999999,0x00616161,0x005a5a5a, + 0x00e8e8e8,0x00242424,0x00565656,0x00404040, + 0x00e1e1e1,0x00636363,0x00090909,0x00333333, + 0x00bfbfbf,0x00989898,0x00979797,0x00858585, + 0x00686868,0x00fcfcfc,0x00ececec,0x000a0a0a, + 0x00dadada,0x006f6f6f,0x00535353,0x00626262, + 0x00a3a3a3,0x002e2e2e,0x00080808,0x00afafaf, + 0x00282828,0x00b0b0b0,0x00747474,0x00c2c2c2, + 0x00bdbdbd,0x00363636,0x00222222,0x00383838, + 0x00646464,0x001e1e1e,0x00393939,0x002c2c2c, + 0x00a6a6a6,0x00303030,0x00e5e5e5,0x00444444, + 0x00fdfdfd,0x00888888,0x009f9f9f,0x00656565, + 0x00878787,0x006b6b6b,0x00f4f4f4,0x00232323, + 0x00484848,0x00101010,0x00d1d1d1,0x00515151, + 0x00c0c0c0,0x00f9f9f9,0x00d2d2d2,0x00a0a0a0, + 0x00555555,0x00a1a1a1,0x00414141,0x00fafafa, + 0x00434343,0x00131313,0x00c4c4c4,0x002f2f2f, + 0x00a8a8a8,0x00b6b6b6,0x003c3c3c,0x002b2b2b, + 0x00c1c1c1,0x00ffffff,0x00c8c8c8,0x00a5a5a5, + 0x00202020,0x00898989,0x00000000,0x00909090, + 0x00474747,0x00efefef,0x00eaeaea,0x00b7b7b7, + 0x00151515,0x00060606,0x00cdcdcd,0x00b5b5b5, + 0x00121212,0x007e7e7e,0x00bbbbbb,0x00292929, + 0x000f0f0f,0x00b8b8b8,0x00070707,0x00040404, + 0x009b9b9b,0x00949494,0x00212121,0x00666666, + 0x00e6e6e6,0x00cecece,0x00ededed,0x00e7e7e7, + 0x003b3b3b,0x00fefefe,0x007f7f7f,0x00c5c5c5, + 0x00a4a4a4,0x00373737,0x00b1b1b1,0x004c4c4c, + 0x00919191,0x006e6e6e,0x008d8d8d,0x00767676, + 0x00030303,0x002d2d2d,0x00dedede,0x00969696, + 0x00262626,0x007d7d7d,0x00c6c6c6,0x005c5c5c, + 0x00d3d3d3,0x00f2f2f2,0x004f4f4f,0x00191919, + 0x003f3f3f,0x00dcdcdc,0x00797979,0x001d1d1d, + 0x00525252,0x00ebebeb,0x00f3f3f3,0x006d6d6d, + 0x005e5e5e,0x00fbfbfb,0x00696969,0x00b2b2b2, + 0x00f0f0f0,0x00313131,0x000c0c0c,0x00d4d4d4, + 0x00cfcfcf,0x008c8c8c,0x00e2e2e2,0x00757575, + 0x00a9a9a9,0x004a4a4a,0x00575757,0x00848484, + 0x00111111,0x00454545,0x001b1b1b,0x00f5f5f5, + 0x00e4e4e4,0x000e0e0e,0x00737373,0x00aaaaaa, + 0x00f1f1f1,0x00dddddd,0x00595959,0x00141414, + 0x006c6c6c,0x00929292,0x00545454,0x00d0d0d0, + 0x00787878,0x00707070,0x00e3e3e3,0x00494949, + 0x00808080,0x00505050,0x00a7a7a7,0x00f6f6f6, + 0x00777777,0x00939393,0x00868686,0x00838383, + 0x002a2a2a,0x00c7c7c7,0x005b5b5b,0x00e9e9e9, + 0x00eeeeee,0x008f8f8f,0x00010101,0x003d3d3d, +}; + +static const u32 camellia_sp3033[256] = { + 0x38003838,0x41004141,0x16001616,0x76007676, + 0xd900d9d9,0x93009393,0x60006060,0xf200f2f2, + 0x72007272,0xc200c2c2,0xab00abab,0x9a009a9a, + 0x75007575,0x06000606,0x57005757,0xa000a0a0, + 0x91009191,0xf700f7f7,0xb500b5b5,0xc900c9c9, + 0xa200a2a2,0x8c008c8c,0xd200d2d2,0x90009090, + 0xf600f6f6,0x07000707,0xa700a7a7,0x27002727, + 0x8e008e8e,0xb200b2b2,0x49004949,0xde00dede, + 0x43004343,0x5c005c5c,0xd700d7d7,0xc700c7c7, + 0x3e003e3e,0xf500f5f5,0x8f008f8f,0x67006767, + 0x1f001f1f,0x18001818,0x6e006e6e,0xaf00afaf, + 0x2f002f2f,0xe200e2e2,0x85008585,0x0d000d0d, + 0x53005353,0xf000f0f0,0x9c009c9c,0x65006565, + 0xea00eaea,0xa300a3a3,0xae00aeae,0x9e009e9e, + 0xec00ecec,0x80008080,0x2d002d2d,0x6b006b6b, + 0xa800a8a8,0x2b002b2b,0x36003636,0xa600a6a6, + 0xc500c5c5,0x86008686,0x4d004d4d,0x33003333, + 0xfd00fdfd,0x66006666,0x58005858,0x96009696, + 0x3a003a3a,0x09000909,0x95009595,0x10001010, + 0x78007878,0xd800d8d8,0x42004242,0xcc00cccc, + 0xef00efef,0x26002626,0xe500e5e5,0x61006161, + 0x1a001a1a,0x3f003f3f,0x3b003b3b,0x82008282, + 0xb600b6b6,0xdb00dbdb,0xd400d4d4,0x98009898, + 0xe800e8e8,0x8b008b8b,0x02000202,0xeb00ebeb, + 0x0a000a0a,0x2c002c2c,0x1d001d1d,0xb000b0b0, + 0x6f006f6f,0x8d008d8d,0x88008888,0x0e000e0e, + 0x19001919,0x87008787,0x4e004e4e,0x0b000b0b, + 0xa900a9a9,0x0c000c0c,0x79007979,0x11001111, + 0x7f007f7f,0x22002222,0xe700e7e7,0x59005959, + 0xe100e1e1,0xda00dada,0x3d003d3d,0xc800c8c8, + 0x12001212,0x04000404,0x74007474,0x54005454, + 0x30003030,0x7e007e7e,0xb400b4b4,0x28002828, + 0x55005555,0x68006868,0x50005050,0xbe00bebe, + 0xd000d0d0,0xc400c4c4,0x31003131,0xcb00cbcb, + 0x2a002a2a,0xad00adad,0x0f000f0f,0xca00caca, + 0x70007070,0xff00ffff,0x32003232,0x69006969, + 0x08000808,0x62006262,0x00000000,0x24002424, + 0xd100d1d1,0xfb00fbfb,0xba00baba,0xed00eded, + 0x45004545,0x81008181,0x73007373,0x6d006d6d, + 0x84008484,0x9f009f9f,0xee00eeee,0x4a004a4a, + 0xc300c3c3,0x2e002e2e,0xc100c1c1,0x01000101, + 0xe600e6e6,0x25002525,0x48004848,0x99009999, + 0xb900b9b9,0xb300b3b3,0x7b007b7b,0xf900f9f9, + 0xce00cece,0xbf00bfbf,0xdf00dfdf,0x71007171, + 0x29002929,0xcd00cdcd,0x6c006c6c,0x13001313, + 0x64006464,0x9b009b9b,0x63006363,0x9d009d9d, + 0xc000c0c0,0x4b004b4b,0xb700b7b7,0xa500a5a5, + 0x89008989,0x5f005f5f,0xb100b1b1,0x17001717, + 0xf400f4f4,0xbc00bcbc,0xd300d3d3,0x46004646, + 0xcf00cfcf,0x37003737,0x5e005e5e,0x47004747, + 0x94009494,0xfa00fafa,0xfc00fcfc,0x5b005b5b, + 0x97009797,0xfe00fefe,0x5a005a5a,0xac00acac, + 0x3c003c3c,0x4c004c4c,0x03000303,0x35003535, + 0xf300f3f3,0x23002323,0xb800b8b8,0x5d005d5d, + 0x6a006a6a,0x92009292,0xd500d5d5,0x21002121, + 0x44004444,0x51005151,0xc600c6c6,0x7d007d7d, + 0x39003939,0x83008383,0xdc00dcdc,0xaa00aaaa, + 0x7c007c7c,0x77007777,0x56005656,0x05000505, + 0x1b001b1b,0xa400a4a4,0x15001515,0x34003434, + 0x1e001e1e,0x1c001c1c,0xf800f8f8,0x52005252, + 0x20002020,0x14001414,0xe900e9e9,0xbd00bdbd, + 0xdd00dddd,0xe400e4e4,0xa100a1a1,0xe000e0e0, + 0x8a008a8a,0xf100f1f1,0xd600d6d6,0x7a007a7a, + 0xbb00bbbb,0xe300e3e3,0x40004040,0x4f004f4f, +}; + +static const u32 camellia_sp4404[256] = { + 0x70700070,0x2c2c002c,0xb3b300b3,0xc0c000c0, + 0xe4e400e4,0x57570057,0xeaea00ea,0xaeae00ae, + 0x23230023,0x6b6b006b,0x45450045,0xa5a500a5, + 0xeded00ed,0x4f4f004f,0x1d1d001d,0x92920092, + 0x86860086,0xafaf00af,0x7c7c007c,0x1f1f001f, + 0x3e3e003e,0xdcdc00dc,0x5e5e005e,0x0b0b000b, + 0xa6a600a6,0x39390039,0xd5d500d5,0x5d5d005d, + 0xd9d900d9,0x5a5a005a,0x51510051,0x6c6c006c, + 0x8b8b008b,0x9a9a009a,0xfbfb00fb,0xb0b000b0, + 0x74740074,0x2b2b002b,0xf0f000f0,0x84840084, + 0xdfdf00df,0xcbcb00cb,0x34340034,0x76760076, + 0x6d6d006d,0xa9a900a9,0xd1d100d1,0x04040004, + 0x14140014,0x3a3a003a,0xdede00de,0x11110011, + 0x32320032,0x9c9c009c,0x53530053,0xf2f200f2, + 0xfefe00fe,0xcfcf00cf,0xc3c300c3,0x7a7a007a, + 0x24240024,0xe8e800e8,0x60600060,0x69690069, + 0xaaaa00aa,0xa0a000a0,0xa1a100a1,0x62620062, + 0x54540054,0x1e1e001e,0xe0e000e0,0x64640064, + 0x10100010,0x00000000,0xa3a300a3,0x75750075, + 0x8a8a008a,0xe6e600e6,0x09090009,0xdddd00dd, + 0x87870087,0x83830083,0xcdcd00cd,0x90900090, + 0x73730073,0xf6f600f6,0x9d9d009d,0xbfbf00bf, + 0x52520052,0xd8d800d8,0xc8c800c8,0xc6c600c6, + 0x81810081,0x6f6f006f,0x13130013,0x63630063, + 0xe9e900e9,0xa7a700a7,0x9f9f009f,0xbcbc00bc, + 0x29290029,0xf9f900f9,0x2f2f002f,0xb4b400b4, + 0x78780078,0x06060006,0xe7e700e7,0x71710071, + 0xd4d400d4,0xabab00ab,0x88880088,0x8d8d008d, + 0x72720072,0xb9b900b9,0xf8f800f8,0xacac00ac, + 0x36360036,0x2a2a002a,0x3c3c003c,0xf1f100f1, + 0x40400040,0xd3d300d3,0xbbbb00bb,0x43430043, + 0x15150015,0xadad00ad,0x77770077,0x80800080, + 0x82820082,0xecec00ec,0x27270027,0xe5e500e5, + 0x85850085,0x35350035,0x0c0c000c,0x41410041, + 0xefef00ef,0x93930093,0x19190019,0x21210021, + 0x0e0e000e,0x4e4e004e,0x65650065,0xbdbd00bd, + 0xb8b800b8,0x8f8f008f,0xebeb00eb,0xcece00ce, + 0x30300030,0x5f5f005f,0xc5c500c5,0x1a1a001a, + 0xe1e100e1,0xcaca00ca,0x47470047,0x3d3d003d, + 0x01010001,0xd6d600d6,0x56560056,0x4d4d004d, + 0x0d0d000d,0x66660066,0xcccc00cc,0x2d2d002d, + 0x12120012,0x20200020,0xb1b100b1,0x99990099, + 0x4c4c004c,0xc2c200c2,0x7e7e007e,0x05050005, + 0xb7b700b7,0x31310031,0x17170017,0xd7d700d7, + 0x58580058,0x61610061,0x1b1b001b,0x1c1c001c, + 0x0f0f000f,0x16160016,0x18180018,0x22220022, + 0x44440044,0xb2b200b2,0xb5b500b5,0x91910091, + 0x08080008,0xa8a800a8,0xfcfc00fc,0x50500050, + 0xd0d000d0,0x7d7d007d,0x89890089,0x97970097, + 0x5b5b005b,0x95950095,0xffff00ff,0xd2d200d2, + 0xc4c400c4,0x48480048,0xf7f700f7,0xdbdb00db, + 0x03030003,0xdada00da,0x3f3f003f,0x94940094, + 0x5c5c005c,0x02020002,0x4a4a004a,0x33330033, + 0x67670067,0xf3f300f3,0x7f7f007f,0xe2e200e2, + 0x9b9b009b,0x26260026,0x37370037,0x3b3b003b, + 0x96960096,0x4b4b004b,0xbebe00be,0x2e2e002e, + 0x79790079,0x8c8c008c,0x6e6e006e,0x8e8e008e, + 0xf5f500f5,0xb6b600b6,0xfdfd00fd,0x59590059, + 0x98980098,0x6a6a006a,0x46460046,0xbaba00ba, + 0x25250025,0x42420042,0xa2a200a2,0xfafa00fa, + 0x07070007,0x55550055,0xeeee00ee,0x0a0a000a, + 0x49490049,0x68680068,0x38380038,0xa4a400a4, + 0x28280028,0x7b7b007b,0xc9c900c9,0xc1c100c1, + 0xe3e300e3,0xf4f400f4,0xc7c700c7,0x9e9e009e, +}; + + +/** + * Stuff related to the Camellia key schedule + */ +#define subl(x) subL[(x)] +#define subr(x) subR[(x)] + +static int camellia_setup128(const unsigned char *key, u32 *subkey) +{ + u32 kll, klr, krl, krr; + u32 il, ir, t0, t1, w0, w1; + u32 kw4l, kw4r, dw, tl, tr; + +#ifdef WOLFSSL_SMALL_STACK + u32* subL; + u32* subR; + + subL = (u32*) XMALLOC(sizeof(u32) * 26, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (subL == NULL) + return MEMORY_E; + + subR = (u32*) XMALLOC(sizeof(u32) * 26, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (subR == NULL) { + XFREE(subL, NULL, DYNAMIC_TYPE_TMP_BUFFER); + return MEMORY_E; + } +#else + u32 subL[26]; + u32 subR[26]; +#endif + + /** + * k == kll || klr || krl || krr (|| is concatenation) + */ + kll = GETU32(key ); + klr = GETU32(key + 4); + krl = GETU32(key + 8); + krr = GETU32(key + 12); + /** + * generate KL dependent subkeys + */ + subl(0) = kll; subr(0) = klr; + subl(1) = krl; subr(1) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(4) = kll; subr(4) = klr; + subl(5) = krl; subr(5) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 30); + subl(10) = kll; subr(10) = klr; + subl(11) = krl; subr(11) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(13) = krl; subr(13) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17); + subl(16) = kll; subr(16) = klr; + subl(17) = krl; subr(17) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17); + subl(18) = kll; subr(18) = klr; + subl(19) = krl; subr(19) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17); + subl(22) = kll; subr(22) = klr; + subl(23) = krl; subr(23) = krr; + + /* generate KA */ + kll = subl(0); klr = subr(0); + krl = subl(1); krr = subr(1); + CAMELLIA_F(kll, klr, + CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, + w0, w1, il, ir, t0, t1); + krl ^= w0; krr ^= w1; + CAMELLIA_F(krl, krr, + CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, + kll, klr, il, ir, t0, t1); + CAMELLIA_F(kll, klr, + CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, + krl, krr, il, ir, t0, t1); + krl ^= w0; krr ^= w1; + CAMELLIA_F(krl, krr, + CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, + w0, w1, il, ir, t0, t1); + kll ^= w0; klr ^= w1; + + /* generate KA dependent subkeys */ + subl(2) = kll; subr(2) = klr; + subl(3) = krl; subr(3) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(6) = kll; subr(6) = klr; + subl(7) = krl; subr(7) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(8) = kll; subr(8) = klr; + subl(9) = krl; subr(9) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(12) = kll; subr(12) = klr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(14) = kll; subr(14) = klr; + subl(15) = krl; subr(15) = krr; + CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 34); + subl(20) = kll; subr(20) = klr; + subl(21) = krl; subr(21) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17); + subl(24) = kll; subr(24) = klr; + subl(25) = krl; subr(25) = krr; + + + /* absorb kw2 to other subkeys */ + subl(3) ^= subl(1); subr(3) ^= subr(1); + subl(5) ^= subl(1); subr(5) ^= subr(1); + subl(7) ^= subl(1); subr(7) ^= subr(1); + subl(1) ^= subr(1) & ~subr(9); + dw = subl(1) & subl(9), subr(1) ^= CAMELLIA_RL1(dw); + subl(11) ^= subl(1); subr(11) ^= subr(1); + subl(13) ^= subl(1); subr(13) ^= subr(1); + subl(15) ^= subl(1); subr(15) ^= subr(1); + subl(1) ^= subr(1) & ~subr(17); + dw = subl(1) & subl(17), subr(1) ^= CAMELLIA_RL1(dw); + subl(19) ^= subl(1); subr(19) ^= subr(1); + subl(21) ^= subl(1); subr(21) ^= subr(1); + subl(23) ^= subl(1); subr(23) ^= subr(1); + subl(24) ^= subl(1); subr(24) ^= subr(1); + + /* absorb kw4 to other subkeys */ + kw4l = subl(25); kw4r = subr(25); + subl(22) ^= kw4l; subr(22) ^= kw4r; + subl(20) ^= kw4l; subr(20) ^= kw4r; + subl(18) ^= kw4l; subr(18) ^= kw4r; + kw4l ^= kw4r & ~subr(16); + dw = kw4l & subl(16), kw4r ^= CAMELLIA_RL1(dw); + subl(14) ^= kw4l; subr(14) ^= kw4r; + subl(12) ^= kw4l; subr(12) ^= kw4r; + subl(10) ^= kw4l; subr(10) ^= kw4r; + kw4l ^= kw4r & ~subr(8); + dw = kw4l & subl(8), kw4r ^= CAMELLIA_RL1(dw); + subl(6) ^= kw4l; subr(6) ^= kw4r; + subl(4) ^= kw4l; subr(4) ^= kw4r; + subl(2) ^= kw4l; subr(2) ^= kw4r; + subl(0) ^= kw4l; subr(0) ^= kw4r; + + /* key XOR is end of F-function */ + CamelliaSubkeyL(0) = subl(0) ^ subl(2); + CamelliaSubkeyR(0) = subr(0) ^ subr(2); + CamelliaSubkeyL(2) = subl(3); + CamelliaSubkeyR(2) = subr(3); + CamelliaSubkeyL(3) = subl(2) ^ subl(4); + CamelliaSubkeyR(3) = subr(2) ^ subr(4); + CamelliaSubkeyL(4) = subl(3) ^ subl(5); + CamelliaSubkeyR(4) = subr(3) ^ subr(5); + CamelliaSubkeyL(5) = subl(4) ^ subl(6); + CamelliaSubkeyR(5) = subr(4) ^ subr(6); + CamelliaSubkeyL(6) = subl(5) ^ subl(7); + CamelliaSubkeyR(6) = subr(5) ^ subr(7); + tl = subl(10) ^ (subr(10) & ~subr(8)); + dw = tl & subl(8), tr = subr(10) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(7) = subl(6) ^ tl; + CamelliaSubkeyR(7) = subr(6) ^ tr; + CamelliaSubkeyL(8) = subl(8); + CamelliaSubkeyR(8) = subr(8); + CamelliaSubkeyL(9) = subl(9); + CamelliaSubkeyR(9) = subr(9); + tl = subl(7) ^ (subr(7) & ~subr(9)); + dw = tl & subl(9), tr = subr(7) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(10) = tl ^ subl(11); + CamelliaSubkeyR(10) = tr ^ subr(11); + CamelliaSubkeyL(11) = subl(10) ^ subl(12); + CamelliaSubkeyR(11) = subr(10) ^ subr(12); + CamelliaSubkeyL(12) = subl(11) ^ subl(13); + CamelliaSubkeyR(12) = subr(11) ^ subr(13); + CamelliaSubkeyL(13) = subl(12) ^ subl(14); + CamelliaSubkeyR(13) = subr(12) ^ subr(14); + CamelliaSubkeyL(14) = subl(13) ^ subl(15); + CamelliaSubkeyR(14) = subr(13) ^ subr(15); + tl = subl(18) ^ (subr(18) & ~subr(16)); + dw = tl & subl(16), tr = subr(18) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(15) = subl(14) ^ tl; + CamelliaSubkeyR(15) = subr(14) ^ tr; + CamelliaSubkeyL(16) = subl(16); + CamelliaSubkeyR(16) = subr(16); + CamelliaSubkeyL(17) = subl(17); + CamelliaSubkeyR(17) = subr(17); + tl = subl(15) ^ (subr(15) & ~subr(17)); + dw = tl & subl(17), tr = subr(15) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(18) = tl ^ subl(19); + CamelliaSubkeyR(18) = tr ^ subr(19); + CamelliaSubkeyL(19) = subl(18) ^ subl(20); + CamelliaSubkeyR(19) = subr(18) ^ subr(20); + CamelliaSubkeyL(20) = subl(19) ^ subl(21); + CamelliaSubkeyR(20) = subr(19) ^ subr(21); + CamelliaSubkeyL(21) = subl(20) ^ subl(22); + CamelliaSubkeyR(21) = subr(20) ^ subr(22); + CamelliaSubkeyL(22) = subl(21) ^ subl(23); + CamelliaSubkeyR(22) = subr(21) ^ subr(23); + CamelliaSubkeyL(23) = subl(22); + CamelliaSubkeyR(23) = subr(22); + CamelliaSubkeyL(24) = subl(24) ^ subl(23); + CamelliaSubkeyR(24) = subr(24) ^ subr(23); + + /* apply the inverse of the last half of P-function */ + dw = CamelliaSubkeyL(2) ^ CamelliaSubkeyR(2), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(2) = CamelliaSubkeyL(2) ^ dw, CamelliaSubkeyL(2) = dw; + dw = CamelliaSubkeyL(3) ^ CamelliaSubkeyR(3), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(3) = CamelliaSubkeyL(3) ^ dw, CamelliaSubkeyL(3) = dw; + dw = CamelliaSubkeyL(4) ^ CamelliaSubkeyR(4), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(4) = CamelliaSubkeyL(4) ^ dw, CamelliaSubkeyL(4) = dw; + dw = CamelliaSubkeyL(5) ^ CamelliaSubkeyR(5), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(5) = CamelliaSubkeyL(5) ^ dw, CamelliaSubkeyL(5) = dw; + dw = CamelliaSubkeyL(6) ^ CamelliaSubkeyR(6), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(6) = CamelliaSubkeyL(6) ^ dw, CamelliaSubkeyL(6) = dw; + dw = CamelliaSubkeyL(7) ^ CamelliaSubkeyR(7), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(7) = CamelliaSubkeyL(7) ^ dw, CamelliaSubkeyL(7) = dw; + dw = CamelliaSubkeyL(10) ^ CamelliaSubkeyR(10), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(10) = CamelliaSubkeyL(10) ^ dw, CamelliaSubkeyL(10) = dw; + dw = CamelliaSubkeyL(11) ^ CamelliaSubkeyR(11), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(11) = CamelliaSubkeyL(11) ^ dw, CamelliaSubkeyL(11) = dw; + dw = CamelliaSubkeyL(12) ^ CamelliaSubkeyR(12), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(12) = CamelliaSubkeyL(12) ^ dw, CamelliaSubkeyL(12) = dw; + dw = CamelliaSubkeyL(13) ^ CamelliaSubkeyR(13), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(13) = CamelliaSubkeyL(13) ^ dw, CamelliaSubkeyL(13) = dw; + dw = CamelliaSubkeyL(14) ^ CamelliaSubkeyR(14), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(14) = CamelliaSubkeyL(14) ^ dw, CamelliaSubkeyL(14) = dw; + dw = CamelliaSubkeyL(15) ^ CamelliaSubkeyR(15), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(15) = CamelliaSubkeyL(15) ^ dw, CamelliaSubkeyL(15) = dw; + dw = CamelliaSubkeyL(18) ^ CamelliaSubkeyR(18), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(18) = CamelliaSubkeyL(18) ^ dw, CamelliaSubkeyL(18) = dw; + dw = CamelliaSubkeyL(19) ^ CamelliaSubkeyR(19), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(19) = CamelliaSubkeyL(19) ^ dw, CamelliaSubkeyL(19) = dw; + dw = CamelliaSubkeyL(20) ^ CamelliaSubkeyR(20), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(20) = CamelliaSubkeyL(20) ^ dw, CamelliaSubkeyL(20) = dw; + dw = CamelliaSubkeyL(21) ^ CamelliaSubkeyR(21), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(21) = CamelliaSubkeyL(21) ^ dw, CamelliaSubkeyL(21) = dw; + dw = CamelliaSubkeyL(22) ^ CamelliaSubkeyR(22), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(22) = CamelliaSubkeyL(22) ^ dw, CamelliaSubkeyL(22) = dw; + dw = CamelliaSubkeyL(23) ^ CamelliaSubkeyR(23), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(23) = CamelliaSubkeyL(23) ^ dw, CamelliaSubkeyL(23) = dw; + +#ifdef WOLFSSL_SMALL_STACK + XFREE(subL, NULL, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(subR, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return 0; +} + +static int camellia_setup256(const unsigned char *key, u32 *subkey) +{ + u32 kll,klr,krl,krr; /* left half of key */ + u32 krll,krlr,krrl,krrr; /* right half of key */ + u32 il, ir, t0, t1, w0, w1; /* temporary variables */ + u32 kw4l, kw4r, dw, tl, tr; + +#ifdef WOLFSSL_SMALL_STACK + u32* subL; + u32* subR; + + subL = (u32*) XMALLOC(sizeof(u32) * 34, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (subL == NULL) + return MEMORY_E; + + subR = (u32*) XMALLOC(sizeof(u32) * 34, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (subR == NULL) { + XFREE(subL, NULL, DYNAMIC_TYPE_TMP_BUFFER); + return MEMORY_E; + } +#else + u32 subL[34]; + u32 subR[34]; +#endif + + /** + * key = (kll || klr || krl || krr || krll || krlr || krrl || krrr) + * (|| is concatenation) + */ + + kll = GETU32(key ); + klr = GETU32(key + 4); + krl = GETU32(key + 8); + krr = GETU32(key + 12); + krll = GETU32(key + 16); + krlr = GETU32(key + 20); + krrl = GETU32(key + 24); + krrr = GETU32(key + 28); + + /* generate KL dependent subkeys */ + subl(0) = kll; subr(0) = klr; + subl(1) = krl; subr(1) = krr; + CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 45); + subl(12) = kll; subr(12) = klr; + subl(13) = krl; subr(13) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(16) = kll; subr(16) = klr; + subl(17) = krl; subr(17) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17); + subl(22) = kll; subr(22) = klr; + subl(23) = krl; subr(23) = krr; + CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 34); + subl(30) = kll; subr(30) = klr; + subl(31) = krl; subr(31) = krr; + + /* generate KR dependent subkeys */ + CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 15); + subl(4) = krll; subr(4) = krlr; + subl(5) = krrl; subr(5) = krrr; + CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 15); + subl(8) = krll; subr(8) = krlr; + subl(9) = krrl; subr(9) = krrr; + CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 30); + subl(18) = krll; subr(18) = krlr; + subl(19) = krrl; subr(19) = krrr; + CAMELLIA_ROLDQo32(krll, krlr, krrl, krrr, w0, w1, 34); + subl(26) = krll; subr(26) = krlr; + subl(27) = krrl; subr(27) = krrr; + CAMELLIA_ROLDQo32(krll, krlr, krrl, krrr, w0, w1, 34); + + /* generate KA */ + kll = subl(0) ^ krll; klr = subr(0) ^ krlr; + krl = subl(1) ^ krrl; krr = subr(1) ^ krrr; + CAMELLIA_F(kll, klr, + CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, + w0, w1, il, ir, t0, t1); + krl ^= w0; krr ^= w1; + CAMELLIA_F(krl, krr, + CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, + kll, klr, il, ir, t0, t1); + kll ^= krll; klr ^= krlr; + CAMELLIA_F(kll, klr, + CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, + krl, krr, il, ir, t0, t1); + krl ^= w0 ^ krrl; krr ^= w1 ^ krrr; + CAMELLIA_F(krl, krr, + CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, + w0, w1, il, ir, t0, t1); + kll ^= w0; klr ^= w1; + + /* generate KB */ + krll ^= kll; krlr ^= klr; + krrl ^= krl; krrr ^= krr; + CAMELLIA_F(krll, krlr, + CAMELLIA_SIGMA5L, CAMELLIA_SIGMA5R, + w0, w1, il, ir, t0, t1); + krrl ^= w0; krrr ^= w1; + CAMELLIA_F(krrl, krrr, + CAMELLIA_SIGMA6L, CAMELLIA_SIGMA6R, + w0, w1, il, ir, t0, t1); + krll ^= w0; krlr ^= w1; + + /* generate KA dependent subkeys */ + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(6) = kll; subr(6) = klr; + subl(7) = krl; subr(7) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 30); + subl(14) = kll; subr(14) = klr; + subl(15) = krl; subr(15) = krr; + subl(24) = klr; subr(24) = krl; + subl(25) = krr; subr(25) = kll; + CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 49); + subl(28) = kll; subr(28) = klr; + subl(29) = krl; subr(29) = krr; + + /* generate KB dependent subkeys */ + subl(2) = krll; subr(2) = krlr; + subl(3) = krrl; subr(3) = krrr; + CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 30); + subl(10) = krll; subr(10) = krlr; + subl(11) = krrl; subr(11) = krrr; + CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 30); + subl(20) = krll; subr(20) = krlr; + subl(21) = krrl; subr(21) = krrr; + CAMELLIA_ROLDQo32(krll, krlr, krrl, krrr, w0, w1, 51); + subl(32) = krll; subr(32) = krlr; + subl(33) = krrl; subr(33) = krrr; + + /* absorb kw2 to other subkeys */ + subl(3) ^= subl(1); subr(3) ^= subr(1); + subl(5) ^= subl(1); subr(5) ^= subr(1); + subl(7) ^= subl(1); subr(7) ^= subr(1); + subl(1) ^= subr(1) & ~subr(9); + dw = subl(1) & subl(9), subr(1) ^= CAMELLIA_RL1(dw); + subl(11) ^= subl(1); subr(11) ^= subr(1); + subl(13) ^= subl(1); subr(13) ^= subr(1); + subl(15) ^= subl(1); subr(15) ^= subr(1); + subl(1) ^= subr(1) & ~subr(17); + dw = subl(1) & subl(17), subr(1) ^= CAMELLIA_RL1(dw); + subl(19) ^= subl(1); subr(19) ^= subr(1); + subl(21) ^= subl(1); subr(21) ^= subr(1); + subl(23) ^= subl(1); subr(23) ^= subr(1); + subl(1) ^= subr(1) & ~subr(25); + dw = subl(1) & subl(25), subr(1) ^= CAMELLIA_RL1(dw); + subl(27) ^= subl(1); subr(27) ^= subr(1); + subl(29) ^= subl(1); subr(29) ^= subr(1); + subl(31) ^= subl(1); subr(31) ^= subr(1); + subl(32) ^= subl(1); subr(32) ^= subr(1); + + /* absorb kw4 to other subkeys */ + kw4l = subl(33); kw4r = subr(33); + subl(30) ^= kw4l; subr(30) ^= kw4r; + subl(28) ^= kw4l; subr(28) ^= kw4r; + subl(26) ^= kw4l; subr(26) ^= kw4r; + kw4l ^= kw4r & ~subr(24); + dw = kw4l & subl(24), kw4r ^= CAMELLIA_RL1(dw); + subl(22) ^= kw4l; subr(22) ^= kw4r; + subl(20) ^= kw4l; subr(20) ^= kw4r; + subl(18) ^= kw4l; subr(18) ^= kw4r; + kw4l ^= kw4r & ~subr(16); + dw = kw4l & subl(16), kw4r ^= CAMELLIA_RL1(dw); + subl(14) ^= kw4l; subr(14) ^= kw4r; + subl(12) ^= kw4l; subr(12) ^= kw4r; + subl(10) ^= kw4l; subr(10) ^= kw4r; + kw4l ^= kw4r & ~subr(8); + dw = kw4l & subl(8), kw4r ^= CAMELLIA_RL1(dw); + subl(6) ^= kw4l; subr(6) ^= kw4r; + subl(4) ^= kw4l; subr(4) ^= kw4r; + subl(2) ^= kw4l; subr(2) ^= kw4r; + subl(0) ^= kw4l; subr(0) ^= kw4r; + + /* key XOR is end of F-function */ + CamelliaSubkeyL(0) = subl(0) ^ subl(2); + CamelliaSubkeyR(0) = subr(0) ^ subr(2); + CamelliaSubkeyL(2) = subl(3); + CamelliaSubkeyR(2) = subr(3); + CamelliaSubkeyL(3) = subl(2) ^ subl(4); + CamelliaSubkeyR(3) = subr(2) ^ subr(4); + CamelliaSubkeyL(4) = subl(3) ^ subl(5); + CamelliaSubkeyR(4) = subr(3) ^ subr(5); + CamelliaSubkeyL(5) = subl(4) ^ subl(6); + CamelliaSubkeyR(5) = subr(4) ^ subr(6); + CamelliaSubkeyL(6) = subl(5) ^ subl(7); + CamelliaSubkeyR(6) = subr(5) ^ subr(7); + tl = subl(10) ^ (subr(10) & ~subr(8)); + dw = tl & subl(8), tr = subr(10) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(7) = subl(6) ^ tl; + CamelliaSubkeyR(7) = subr(6) ^ tr; + CamelliaSubkeyL(8) = subl(8); + CamelliaSubkeyR(8) = subr(8); + CamelliaSubkeyL(9) = subl(9); + CamelliaSubkeyR(9) = subr(9); + tl = subl(7) ^ (subr(7) & ~subr(9)); + dw = tl & subl(9), tr = subr(7) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(10) = tl ^ subl(11); + CamelliaSubkeyR(10) = tr ^ subr(11); + CamelliaSubkeyL(11) = subl(10) ^ subl(12); + CamelliaSubkeyR(11) = subr(10) ^ subr(12); + CamelliaSubkeyL(12) = subl(11) ^ subl(13); + CamelliaSubkeyR(12) = subr(11) ^ subr(13); + CamelliaSubkeyL(13) = subl(12) ^ subl(14); + CamelliaSubkeyR(13) = subr(12) ^ subr(14); + CamelliaSubkeyL(14) = subl(13) ^ subl(15); + CamelliaSubkeyR(14) = subr(13) ^ subr(15); + tl = subl(18) ^ (subr(18) & ~subr(16)); + dw = tl & subl(16), tr = subr(18) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(15) = subl(14) ^ tl; + CamelliaSubkeyR(15) = subr(14) ^ tr; + CamelliaSubkeyL(16) = subl(16); + CamelliaSubkeyR(16) = subr(16); + CamelliaSubkeyL(17) = subl(17); + CamelliaSubkeyR(17) = subr(17); + tl = subl(15) ^ (subr(15) & ~subr(17)); + dw = tl & subl(17), tr = subr(15) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(18) = tl ^ subl(19); + CamelliaSubkeyR(18) = tr ^ subr(19); + CamelliaSubkeyL(19) = subl(18) ^ subl(20); + CamelliaSubkeyR(19) = subr(18) ^ subr(20); + CamelliaSubkeyL(20) = subl(19) ^ subl(21); + CamelliaSubkeyR(20) = subr(19) ^ subr(21); + CamelliaSubkeyL(21) = subl(20) ^ subl(22); + CamelliaSubkeyR(21) = subr(20) ^ subr(22); + CamelliaSubkeyL(22) = subl(21) ^ subl(23); + CamelliaSubkeyR(22) = subr(21) ^ subr(23); + tl = subl(26) ^ (subr(26) & ~subr(24)); + dw = tl & subl(24), tr = subr(26) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(23) = subl(22) ^ tl; + CamelliaSubkeyR(23) = subr(22) ^ tr; + CamelliaSubkeyL(24) = subl(24); + CamelliaSubkeyR(24) = subr(24); + CamelliaSubkeyL(25) = subl(25); + CamelliaSubkeyR(25) = subr(25); + tl = subl(23) ^ (subr(23) & ~subr(25)); + dw = tl & subl(25), tr = subr(23) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(26) = tl ^ subl(27); + CamelliaSubkeyR(26) = tr ^ subr(27); + CamelliaSubkeyL(27) = subl(26) ^ subl(28); + CamelliaSubkeyR(27) = subr(26) ^ subr(28); + CamelliaSubkeyL(28) = subl(27) ^ subl(29); + CamelliaSubkeyR(28) = subr(27) ^ subr(29); + CamelliaSubkeyL(29) = subl(28) ^ subl(30); + CamelliaSubkeyR(29) = subr(28) ^ subr(30); + CamelliaSubkeyL(30) = subl(29) ^ subl(31); + CamelliaSubkeyR(30) = subr(29) ^ subr(31); + CamelliaSubkeyL(31) = subl(30); + CamelliaSubkeyR(31) = subr(30); + CamelliaSubkeyL(32) = subl(32) ^ subl(31); + CamelliaSubkeyR(32) = subr(32) ^ subr(31); + + /* apply the inverse of the last half of P-function */ + dw = CamelliaSubkeyL(2) ^ CamelliaSubkeyR(2), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(2) = CamelliaSubkeyL(2) ^ dw, CamelliaSubkeyL(2) = dw; + dw = CamelliaSubkeyL(3) ^ CamelliaSubkeyR(3), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(3) = CamelliaSubkeyL(3) ^ dw, CamelliaSubkeyL(3) = dw; + dw = CamelliaSubkeyL(4) ^ CamelliaSubkeyR(4), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(4) = CamelliaSubkeyL(4) ^ dw, CamelliaSubkeyL(4) = dw; + dw = CamelliaSubkeyL(5) ^ CamelliaSubkeyR(5), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(5) = CamelliaSubkeyL(5) ^ dw, CamelliaSubkeyL(5) = dw; + dw = CamelliaSubkeyL(6) ^ CamelliaSubkeyR(6), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(6) = CamelliaSubkeyL(6) ^ dw, CamelliaSubkeyL(6) = dw; + dw = CamelliaSubkeyL(7) ^ CamelliaSubkeyR(7), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(7) = CamelliaSubkeyL(7) ^ dw, CamelliaSubkeyL(7) = dw; + dw = CamelliaSubkeyL(10) ^ CamelliaSubkeyR(10), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(10) = CamelliaSubkeyL(10) ^ dw, CamelliaSubkeyL(10) = dw; + dw = CamelliaSubkeyL(11) ^ CamelliaSubkeyR(11), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(11) = CamelliaSubkeyL(11) ^ dw, CamelliaSubkeyL(11) = dw; + dw = CamelliaSubkeyL(12) ^ CamelliaSubkeyR(12), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(12) = CamelliaSubkeyL(12) ^ dw, CamelliaSubkeyL(12) = dw; + dw = CamelliaSubkeyL(13) ^ CamelliaSubkeyR(13), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(13) = CamelliaSubkeyL(13) ^ dw, CamelliaSubkeyL(13) = dw; + dw = CamelliaSubkeyL(14) ^ CamelliaSubkeyR(14), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(14) = CamelliaSubkeyL(14) ^ dw, CamelliaSubkeyL(14) = dw; + dw = CamelliaSubkeyL(15) ^ CamelliaSubkeyR(15), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(15) = CamelliaSubkeyL(15) ^ dw, CamelliaSubkeyL(15) = dw; + dw = CamelliaSubkeyL(18) ^ CamelliaSubkeyR(18), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(18) = CamelliaSubkeyL(18) ^ dw, CamelliaSubkeyL(18) = dw; + dw = CamelliaSubkeyL(19) ^ CamelliaSubkeyR(19), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(19) = CamelliaSubkeyL(19) ^ dw, CamelliaSubkeyL(19) = dw; + dw = CamelliaSubkeyL(20) ^ CamelliaSubkeyR(20), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(20) = CamelliaSubkeyL(20) ^ dw, CamelliaSubkeyL(20) = dw; + dw = CamelliaSubkeyL(21) ^ CamelliaSubkeyR(21), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(21) = CamelliaSubkeyL(21) ^ dw, CamelliaSubkeyL(21) = dw; + dw = CamelliaSubkeyL(22) ^ CamelliaSubkeyR(22), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(22) = CamelliaSubkeyL(22) ^ dw, CamelliaSubkeyL(22) = dw; + dw = CamelliaSubkeyL(23) ^ CamelliaSubkeyR(23), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(23) = CamelliaSubkeyL(23) ^ dw, CamelliaSubkeyL(23) = dw; + dw = CamelliaSubkeyL(26) ^ CamelliaSubkeyR(26), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(26) = CamelliaSubkeyL(26) ^ dw, CamelliaSubkeyL(26) = dw; + dw = CamelliaSubkeyL(27) ^ CamelliaSubkeyR(27), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(27) = CamelliaSubkeyL(27) ^ dw, CamelliaSubkeyL(27) = dw; + dw = CamelliaSubkeyL(28) ^ CamelliaSubkeyR(28), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(28) = CamelliaSubkeyL(28) ^ dw, CamelliaSubkeyL(28) = dw; + dw = CamelliaSubkeyL(29) ^ CamelliaSubkeyR(29), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(29) = CamelliaSubkeyL(29) ^ dw, CamelliaSubkeyL(29) = dw; + dw = CamelliaSubkeyL(30) ^ CamelliaSubkeyR(30), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(30) = CamelliaSubkeyL(30) ^ dw, CamelliaSubkeyL(30) = dw; + dw = CamelliaSubkeyL(31) ^ CamelliaSubkeyR(31), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(31) = CamelliaSubkeyL(31) ^ dw,CamelliaSubkeyL(31) = dw; + +#ifdef WOLFSSL_SMALL_STACK + XFREE(subL, NULL, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(subR, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return 0; +} + +static int camellia_setup192(const unsigned char *key, u32 *subkey) +{ + unsigned char kk[32]; + u32 krll, krlr, krrl,krrr; + + XMEMCPY(kk, key, 24); + XMEMCPY((unsigned char *)&krll, key+16,4); + XMEMCPY((unsigned char *)&krlr, key+20,4); + krrl = ~krll; + krrr = ~krlr; + XMEMCPY(kk+24, (unsigned char *)&krrl, 4); + XMEMCPY(kk+28, (unsigned char *)&krrr, 4); + + return camellia_setup256(kk, subkey); +} + + +/** + * Stuff related to camellia encryption/decryption + * + * "io" must be 4byte aligned and big-endian data. + */ +static void camellia_encrypt128(const u32 *subkey, u32 *io) +{ + u32 il, ir, t0, t1; + + /* pre whitening but absorb kw2*/ + io[0] ^= CamelliaSubkeyL(0); + io[1] ^= CamelliaSubkeyR(0); + /* main iteration */ + + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(2),CamelliaSubkeyR(2), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(3),CamelliaSubkeyR(3), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(4),CamelliaSubkeyR(4), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(5),CamelliaSubkeyR(5), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(6),CamelliaSubkeyR(6), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(7),CamelliaSubkeyR(7), + io[0],io[1],il,ir,t0,t1); + + CAMELLIA_FLS(io[0],io[1],io[2],io[3], + CamelliaSubkeyL(8),CamelliaSubkeyR(8), + CamelliaSubkeyL(9),CamelliaSubkeyR(9), + t0,t1,il,ir); + + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(10),CamelliaSubkeyR(10), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(11),CamelliaSubkeyR(11), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(12),CamelliaSubkeyR(12), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(13),CamelliaSubkeyR(13), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(14),CamelliaSubkeyR(14), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(15),CamelliaSubkeyR(15), + io[0],io[1],il,ir,t0,t1); + + CAMELLIA_FLS(io[0],io[1],io[2],io[3], + CamelliaSubkeyL(16),CamelliaSubkeyR(16), + CamelliaSubkeyL(17),CamelliaSubkeyR(17), + t0,t1,il,ir); + + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(18),CamelliaSubkeyR(18), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(19),CamelliaSubkeyR(19), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(20),CamelliaSubkeyR(20), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(21),CamelliaSubkeyR(21), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(22),CamelliaSubkeyR(22), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(23),CamelliaSubkeyR(23), + io[0],io[1],il,ir,t0,t1); + + /* post whitening but kw4 */ + io[2] ^= CamelliaSubkeyL(24); + io[3] ^= CamelliaSubkeyR(24); + + t0 = io[0]; + t1 = io[1]; + io[0] = io[2]; + io[1] = io[3]; + io[2] = t0; + io[3] = t1; + + return; +} + +static void camellia_decrypt128(const u32 *subkey, u32 *io) +{ + u32 il,ir,t0,t1; /* temporary variables */ + + /* pre whitening but absorb kw2*/ + io[0] ^= CamelliaSubkeyL(24); + io[1] ^= CamelliaSubkeyR(24); + + /* main iteration */ + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(23),CamelliaSubkeyR(23), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(22),CamelliaSubkeyR(22), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(21),CamelliaSubkeyR(21), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(20),CamelliaSubkeyR(20), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(19),CamelliaSubkeyR(19), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(18),CamelliaSubkeyR(18), + io[0],io[1],il,ir,t0,t1); + + CAMELLIA_FLS(io[0],io[1],io[2],io[3], + CamelliaSubkeyL(17),CamelliaSubkeyR(17), + CamelliaSubkeyL(16),CamelliaSubkeyR(16), + t0,t1,il,ir); + + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(15),CamelliaSubkeyR(15), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(14),CamelliaSubkeyR(14), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(13),CamelliaSubkeyR(13), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(12),CamelliaSubkeyR(12), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(11),CamelliaSubkeyR(11), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(10),CamelliaSubkeyR(10), + io[0],io[1],il,ir,t0,t1); + + CAMELLIA_FLS(io[0],io[1],io[2],io[3], + CamelliaSubkeyL(9),CamelliaSubkeyR(9), + CamelliaSubkeyL(8),CamelliaSubkeyR(8), + t0,t1,il,ir); + + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(7),CamelliaSubkeyR(7), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(6),CamelliaSubkeyR(6), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(5),CamelliaSubkeyR(5), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(4),CamelliaSubkeyR(4), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(3),CamelliaSubkeyR(3), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(2),CamelliaSubkeyR(2), + io[0],io[1],il,ir,t0,t1); + + /* post whitening but kw4 */ + io[2] ^= CamelliaSubkeyL(0); + io[3] ^= CamelliaSubkeyR(0); + + t0 = io[0]; + t1 = io[1]; + io[0] = io[2]; + io[1] = io[3]; + io[2] = t0; + io[3] = t1; + + return; +} + +/** + * stuff for 192 and 256bit encryption/decryption + */ +static void camellia_encrypt256(const u32 *subkey, u32 *io) +{ + u32 il,ir,t0,t1; /* temporary variables */ + + /* pre whitening but absorb kw2*/ + io[0] ^= CamelliaSubkeyL(0); + io[1] ^= CamelliaSubkeyR(0); + + /* main iteration */ + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(2),CamelliaSubkeyR(2), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(3),CamelliaSubkeyR(3), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(4),CamelliaSubkeyR(4), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(5),CamelliaSubkeyR(5), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(6),CamelliaSubkeyR(6), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(7),CamelliaSubkeyR(7), + io[0],io[1],il,ir,t0,t1); + + CAMELLIA_FLS(io[0],io[1],io[2],io[3], + CamelliaSubkeyL(8),CamelliaSubkeyR(8), + CamelliaSubkeyL(9),CamelliaSubkeyR(9), + t0,t1,il,ir); + + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(10),CamelliaSubkeyR(10), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(11),CamelliaSubkeyR(11), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(12),CamelliaSubkeyR(12), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(13),CamelliaSubkeyR(13), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(14),CamelliaSubkeyR(14), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(15),CamelliaSubkeyR(15), + io[0],io[1],il,ir,t0,t1); + + CAMELLIA_FLS(io[0],io[1],io[2],io[3], + CamelliaSubkeyL(16),CamelliaSubkeyR(16), + CamelliaSubkeyL(17),CamelliaSubkeyR(17), + t0,t1,il,ir); + + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(18),CamelliaSubkeyR(18), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(19),CamelliaSubkeyR(19), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(20),CamelliaSubkeyR(20), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(21),CamelliaSubkeyR(21), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(22),CamelliaSubkeyR(22), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(23),CamelliaSubkeyR(23), + io[0],io[1],il,ir,t0,t1); + + CAMELLIA_FLS(io[0],io[1],io[2],io[3], + CamelliaSubkeyL(24),CamelliaSubkeyR(24), + CamelliaSubkeyL(25),CamelliaSubkeyR(25), + t0,t1,il,ir); + + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(26),CamelliaSubkeyR(26), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(27),CamelliaSubkeyR(27), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(28),CamelliaSubkeyR(28), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(29),CamelliaSubkeyR(29), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(30),CamelliaSubkeyR(30), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(31),CamelliaSubkeyR(31), + io[0],io[1],il,ir,t0,t1); + + /* post whitening but kw4 */ + io[2] ^= CamelliaSubkeyL(32); + io[3] ^= CamelliaSubkeyR(32); + + t0 = io[0]; + t1 = io[1]; + io[0] = io[2]; + io[1] = io[3]; + io[2] = t0; + io[3] = t1; + + return; +} + +static void camellia_decrypt256(const u32 *subkey, u32 *io) +{ + u32 il,ir,t0,t1; /* temporary variables */ + + /* pre whitening but absorb kw2*/ + io[0] ^= CamelliaSubkeyL(32); + io[1] ^= CamelliaSubkeyR(32); + + /* main iteration */ + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(31),CamelliaSubkeyR(31), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(30),CamelliaSubkeyR(30), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(29),CamelliaSubkeyR(29), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(28),CamelliaSubkeyR(28), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(27),CamelliaSubkeyR(27), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(26),CamelliaSubkeyR(26), + io[0],io[1],il,ir,t0,t1); + + CAMELLIA_FLS(io[0],io[1],io[2],io[3], + CamelliaSubkeyL(25),CamelliaSubkeyR(25), + CamelliaSubkeyL(24),CamelliaSubkeyR(24), + t0,t1,il,ir); + + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(23),CamelliaSubkeyR(23), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(22),CamelliaSubkeyR(22), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(21),CamelliaSubkeyR(21), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(20),CamelliaSubkeyR(20), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(19),CamelliaSubkeyR(19), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(18),CamelliaSubkeyR(18), + io[0],io[1],il,ir,t0,t1); + + CAMELLIA_FLS(io[0],io[1],io[2],io[3], + CamelliaSubkeyL(17),CamelliaSubkeyR(17), + CamelliaSubkeyL(16),CamelliaSubkeyR(16), + t0,t1,il,ir); + + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(15),CamelliaSubkeyR(15), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(14),CamelliaSubkeyR(14), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(13),CamelliaSubkeyR(13), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(12),CamelliaSubkeyR(12), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(11),CamelliaSubkeyR(11), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(10),CamelliaSubkeyR(10), + io[0],io[1],il,ir,t0,t1); + + CAMELLIA_FLS(io[0],io[1],io[2],io[3], + CamelliaSubkeyL(9),CamelliaSubkeyR(9), + CamelliaSubkeyL(8),CamelliaSubkeyR(8), + t0,t1,il,ir); + + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(7),CamelliaSubkeyR(7), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(6),CamelliaSubkeyR(6), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(5),CamelliaSubkeyR(5), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(4),CamelliaSubkeyR(4), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(3),CamelliaSubkeyR(3), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(2),CamelliaSubkeyR(2), + io[0],io[1],il,ir,t0,t1); + + /* post whitening but kw4 */ + io[2] ^= CamelliaSubkeyL(0); + io[3] ^= CamelliaSubkeyR(0); + + t0 = io[0]; + t1 = io[1]; + io[0] = io[2]; + io[1] = io[3]; + io[2] = t0; + io[3] = t1; + + return; +} + +/*** + * + * API for compatibility + */ + +static void Camellia_EncryptBlock(const int keyBitLength, + const unsigned char *plaintext, + const KEY_TABLE_TYPE keyTable, + unsigned char *ciphertext) +{ + u32 tmp[4]; + + tmp[0] = GETU32(plaintext); + tmp[1] = GETU32(plaintext + 4); + tmp[2] = GETU32(plaintext + 8); + tmp[3] = GETU32(plaintext + 12); + + switch (keyBitLength) { + case 128: + camellia_encrypt128(keyTable, tmp); + break; + case 192: + /* fall through */ + case 256: + camellia_encrypt256(keyTable, tmp); + break; + default: + break; + } + + PUTU32(ciphertext, tmp[0]); + PUTU32(ciphertext + 4, tmp[1]); + PUTU32(ciphertext + 8, tmp[2]); + PUTU32(ciphertext + 12, tmp[3]); +} + +static void Camellia_DecryptBlock(const int keyBitLength, + const unsigned char *ciphertext, + const KEY_TABLE_TYPE keyTable, + unsigned char *plaintext) +{ + u32 tmp[4]; + + tmp[0] = GETU32(ciphertext); + tmp[1] = GETU32(ciphertext + 4); + tmp[2] = GETU32(ciphertext + 8); + tmp[3] = GETU32(ciphertext + 12); + + switch (keyBitLength) { + case 128: + camellia_decrypt128(keyTable, tmp); + break; + case 192: + /* fall through */ + case 256: + camellia_decrypt256(keyTable, tmp); + break; + default: + break; + } + PUTU32(plaintext, tmp[0]); + PUTU32(plaintext + 4, tmp[1]); + PUTU32(plaintext + 8, tmp[2]); + PUTU32(plaintext + 12, tmp[3]); +} + + + +/* wolfCrypt wrappers to the Camellia code */ + +int wc_CamelliaSetKey(Camellia* cam, const byte* key, word32 len, const byte* iv) +{ + int ret = 0; + + if (cam == NULL) return BAD_FUNC_ARG; + + XMEMSET(cam->key, 0, sizeof(KEY_TABLE_TYPE)); + + switch (len) { + case 16: + ret = camellia_setup128(key, cam->key); + break; + case 24: + ret = camellia_setup192(key, cam->key); + break; + case 32: + ret = camellia_setup256(key, cam->key); + break; + default: + return BAD_FUNC_ARG; + } + + if (ret != 0) + return ret; + + cam->keySz = len * 8; + + return wc_CamelliaSetIV(cam, iv); +} + + +int wc_CamelliaSetIV(Camellia* cam, const byte* iv) +{ + if (cam == NULL) + return BAD_FUNC_ARG; + + if (iv) + XMEMCPY(cam->reg, iv, CAMELLIA_BLOCK_SIZE); + else + XMEMSET(cam->reg, 0, CAMELLIA_BLOCK_SIZE); + + return 0; +} + + +int wc_CamelliaEncryptDirect(Camellia* cam, byte* out, const byte* in) +{ + if (cam == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + Camellia_EncryptBlock(cam->keySz, in, cam->key, out); + + return 0; +} + + +int wc_CamelliaDecryptDirect(Camellia* cam, byte* out, const byte* in) +{ + if (cam == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + Camellia_DecryptBlock(cam->keySz, in, cam->key, out); + + return 0; +} + + +int wc_CamelliaCbcEncrypt(Camellia* cam, byte* out, const byte* in, word32 sz) +{ + word32 blocks; + if (cam == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + blocks = sz / CAMELLIA_BLOCK_SIZE; + + while (blocks--) { + xorbuf((byte*)cam->reg, in, CAMELLIA_BLOCK_SIZE); + Camellia_EncryptBlock(cam->keySz, (byte*)cam->reg, + cam->key, (byte*)cam->reg); + XMEMCPY(out, cam->reg, CAMELLIA_BLOCK_SIZE); + + out += CAMELLIA_BLOCK_SIZE; + in += CAMELLIA_BLOCK_SIZE; + } + + return 0; +} + + +int wc_CamelliaCbcDecrypt(Camellia* cam, byte* out, const byte* in, word32 sz) +{ + word32 blocks; + if (cam == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + blocks = sz / CAMELLIA_BLOCK_SIZE; + + while (blocks--) { + XMEMCPY(cam->tmp, in, CAMELLIA_BLOCK_SIZE); + Camellia_DecryptBlock(cam->keySz, (byte*)cam->tmp, cam->key, out); + xorbuf(out, (byte*)cam->reg, CAMELLIA_BLOCK_SIZE); + XMEMCPY(cam->reg, cam->tmp, CAMELLIA_BLOCK_SIZE); + + out += CAMELLIA_BLOCK_SIZE; + in += CAMELLIA_BLOCK_SIZE; + } + + return 0; +} + + +#endif /* HAVE_CAMELLIA */ + diff --git a/client/wolfssl/wolfcrypt/src/chacha.c b/client/wolfssl/wolfcrypt/src/chacha.c new file mode 100644 index 0000000..38a1ede --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/chacha.c @@ -0,0 +1,348 @@ +/* chacha.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* + * based from + * chacha-ref.c version 20080118 + * D. J. Bernstein + * Public domain. + */ + + +#ifdef WOLFSSL_ARMASM + /* implementation is located in wolfcrypt/src/port/arm/armv8-chacha.c */ + +#else +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#if defined(HAVE_CHACHA) && !defined(WOLFSSL_ARMASM) + +#include +#include +#include +#include +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#ifdef CHACHA_AEAD_TEST + #include +#endif + +#ifdef USE_INTEL_CHACHA_SPEEDUP + #include + #include + + #if defined(__GNUC__) && ((__GNUC__ < 4) || \ + (__GNUC__ == 4 && __GNUC_MINOR__ <= 8)) + #undef NO_AVX2_SUPPORT + #define NO_AVX2_SUPPORT + #endif + #if defined(__clang__) && ((__clang_major__ < 3) || \ + (__clang_major__ == 3 && __clang_minor__ <= 5)) + #undef NO_AVX2_SUPPORT + #define NO_AVX2_SUPPORT + #elif defined(__clang__) && defined(NO_AVX2_SUPPORT) + #undef NO_AVX2_SUPPORT + #endif + + #ifndef NO_AVX2_SUPPORT + #define HAVE_INTEL_AVX2 + #endif + + static int cpuidFlagsSet = 0; + static int cpuidFlags = 0; +#endif + +#ifdef BIG_ENDIAN_ORDER + #define LITTLE32(x) ByteReverseWord32(x) +#else + #define LITTLE32(x) (x) +#endif + +/* Number of rounds */ +#define ROUNDS 20 + +#define U32C(v) (v##U) +#define U32V(v) ((word32)(v) & U32C(0xFFFFFFFF)) +#define U8TO32_LITTLE(p) LITTLE32(((word32*)(p))[0]) + +#define ROTATE(v,c) rotlFixed(v, c) +#define XOR(v,w) ((v) ^ (w)) +#define PLUS(v,w) (U32V((v) + (w))) +#define PLUSONE(v) (PLUS((v),1)) + +#define QUARTERROUND(a,b,c,d) \ + x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]),16); \ + x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]),12); \ + x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]), 8); \ + x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]), 7); + + +/** + * Set up iv(nonce). Earlier versions used 64 bits instead of 96, this version + * uses the typical AEAD 96 bit nonce and can do record sizes of 256 GB. + */ +int wc_Chacha_SetIV(ChaCha* ctx, const byte* inIv, word32 counter) +{ + word32 temp[CHACHA_IV_WORDS];/* used for alignment of memory */ + +#ifdef CHACHA_AEAD_TEST + word32 i; + printf("NONCE : "); + for (i = 0; i < CHACHA_IV_BYTES; i++) { + printf("%02x", inIv[i]); + } + printf("\n\n"); +#endif + + if (ctx == NULL) + return BAD_FUNC_ARG; + + XMEMCPY(temp, inIv, CHACHA_IV_BYTES); + + ctx->left = 0; /* resets state */ + ctx->X[CHACHA_IV_BYTES+0] = counter; /* block counter */ + ctx->X[CHACHA_IV_BYTES+1] = LITTLE32(temp[0]); /* fixed variable from nonce */ + ctx->X[CHACHA_IV_BYTES+2] = LITTLE32(temp[1]); /* counter from nonce */ + ctx->X[CHACHA_IV_BYTES+3] = LITTLE32(temp[2]); /* counter from nonce */ + + return 0; +} + +/* "expand 32-byte k" as unsigned 32 byte */ +static const word32 sigma[4] = {0x61707865, 0x3320646e, 0x79622d32, 0x6b206574}; +/* "expand 16-byte k" as unsigned 16 byte */ +static const word32 tau[4] = {0x61707865, 0x3120646e, 0x79622d36, 0x6b206574}; + +/** + * Key setup. 8 word iv (nonce) + */ +int wc_Chacha_SetKey(ChaCha* ctx, const byte* key, word32 keySz) +{ + const word32* constants; + const byte* k; + +#ifdef XSTREAM_ALIGN + word32 alignKey[8]; +#endif + + if (ctx == NULL) + return BAD_FUNC_ARG; + + if (keySz != (CHACHA_MAX_KEY_SZ/2) && keySz != CHACHA_MAX_KEY_SZ) + return BAD_FUNC_ARG; + +#ifdef XSTREAM_ALIGN + if ((wolfssl_word)key % 4) { + WOLFSSL_MSG("wc_ChachaSetKey unaligned key"); + XMEMCPY(alignKey, key, keySz); + k = (byte*)alignKey; + } + else { + k = key; + } +#else + k = key; +#endif /* XSTREAM_ALIGN */ + +#ifdef CHACHA_AEAD_TEST + word32 i; + printf("ChaCha key used :\n"); + for (i = 0; i < keySz; i++) { + printf("%02x", key[i]); + if ((i + 1) % 8 == 0) + printf("\n"); + } + printf("\n\n"); +#endif + + ctx->X[4] = U8TO32_LITTLE(k + 0); + ctx->X[5] = U8TO32_LITTLE(k + 4); + ctx->X[6] = U8TO32_LITTLE(k + 8); + ctx->X[7] = U8TO32_LITTLE(k + 12); + if (keySz == CHACHA_MAX_KEY_SZ) { + k += 16; + constants = sigma; + } + else { + constants = tau; + } + ctx->X[ 8] = U8TO32_LITTLE(k + 0); + ctx->X[ 9] = U8TO32_LITTLE(k + 4); + ctx->X[10] = U8TO32_LITTLE(k + 8); + ctx->X[11] = U8TO32_LITTLE(k + 12); + ctx->X[ 0] = constants[0]; + ctx->X[ 1] = constants[1]; + ctx->X[ 2] = constants[2]; + ctx->X[ 3] = constants[3]; + ctx->left = 0; /* resets state */ + + return 0; +} + +/** + * Converts word into bytes with rotations having been done. + */ +static WC_INLINE void wc_Chacha_wordtobyte(word32 output[CHACHA_CHUNK_WORDS], + const word32 input[CHACHA_CHUNK_WORDS]) +{ + word32 x[CHACHA_CHUNK_WORDS]; + word32 i; + + for (i = 0; i < CHACHA_CHUNK_WORDS; i++) { + x[i] = input[i]; + } + + for (i = (ROUNDS); i > 0; i -= 2) { + QUARTERROUND(0, 4, 8, 12) + QUARTERROUND(1, 5, 9, 13) + QUARTERROUND(2, 6, 10, 14) + QUARTERROUND(3, 7, 11, 15) + QUARTERROUND(0, 5, 10, 15) + QUARTERROUND(1, 6, 11, 12) + QUARTERROUND(2, 7, 8, 13) + QUARTERROUND(3, 4, 9, 14) + } + + for (i = 0; i < CHACHA_CHUNK_WORDS; i++) { + x[i] = PLUS(x[i], input[i]); + } + + for (i = 0; i < CHACHA_CHUNK_WORDS; i++) { + output[i] = LITTLE32(x[i]); + } +} + +#ifdef __cplusplus + extern "C" { +#endif + +extern void chacha_encrypt_x64(ChaCha* ctx, const byte* m, byte* c, + word32 bytes); +extern void chacha_encrypt_avx1(ChaCha* ctx, const byte* m, byte* c, + word32 bytes); +extern void chacha_encrypt_avx2(ChaCha* ctx, const byte* m, byte* c, + word32 bytes); + +#ifdef __cplusplus + } /* extern "C" */ +#endif + + +/** + * Encrypt a stream of bytes + */ +static void wc_Chacha_encrypt_bytes(ChaCha* ctx, const byte* m, byte* c, + word32 bytes) +{ + byte* output; + word32 temp[CHACHA_CHUNK_WORDS]; /* used to make sure aligned */ + word32 i; + + /* handle left overs */ + if (bytes > 0 && ctx->left > 0) { + wc_Chacha_wordtobyte(temp, ctx->X); /* recreate the stream */ + output = (byte*)temp + CHACHA_CHUNK_BYTES - ctx->left; + for (i = 0; i < bytes && i < ctx->left; i++) { + c[i] = m[i] ^ output[i]; + } + ctx->left = ctx->left - i; + + /* Used up all of the stream that was left, increment the counter */ + if (ctx->left == 0) { + ctx->X[CHACHA_IV_BYTES] = PLUSONE(ctx->X[CHACHA_IV_BYTES]); + } + bytes = bytes - i; + c += i; + m += i; + } + + output = (byte*)temp; + while (bytes >= CHACHA_CHUNK_BYTES) { + wc_Chacha_wordtobyte(temp, ctx->X); + ctx->X[CHACHA_IV_BYTES] = PLUSONE(ctx->X[CHACHA_IV_BYTES]); + for (i = 0; i < CHACHA_CHUNK_BYTES; ++i) { + c[i] = m[i] ^ output[i]; + } + bytes -= CHACHA_CHUNK_BYTES; + c += CHACHA_CHUNK_BYTES; + m += CHACHA_CHUNK_BYTES; + } + + if (bytes) { + /* in this case there will always be some left over since bytes is less + * than CHACHA_CHUNK_BYTES, so do not increment counter after getting + * stream in order for the stream to be recreated on next call */ + wc_Chacha_wordtobyte(temp, ctx->X); + for (i = 0; i < bytes; ++i) { + c[i] = m[i] ^ output[i]; + } + ctx->left = CHACHA_CHUNK_BYTES - i; + } +} + + +/** + * API to encrypt/decrypt a message of any size. + */ +int wc_Chacha_Process(ChaCha* ctx, byte* output, const byte* input, + word32 msglen) +{ + if (ctx == NULL) + return BAD_FUNC_ARG; + +#ifdef USE_INTEL_CHACHA_SPEEDUP + if (!cpuidFlagsSet) { + cpuidFlags = cpuid_get_flags(); + cpuidFlagsSet = 1; + } + + #ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_AVX2(cpuidFlags)) { + chacha_encrypt_avx2(ctx, input, output, msglen); + return 0; + } + #endif + if (IS_INTEL_AVX1(cpuidFlags)) { + chacha_encrypt_avx1(ctx, input, output, msglen); + return 0; + } + else { + chacha_encrypt_x64(ctx, input, output, msglen); + return 0; + } +#endif + wc_Chacha_encrypt_bytes(ctx, input, output, msglen); + + return 0; +} + +#endif /* HAVE_CHACHA*/ + +#endif /* WOLFSSL_ARMASM */ diff --git a/client/wolfssl/wolfcrypt/src/chacha20_poly1305.c b/client/wolfssl/wolfcrypt/src/chacha20_poly1305.c new file mode 100644 index 0000000..64bc4c1 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/chacha20_poly1305.c @@ -0,0 +1,286 @@ +/* chacha.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#if defined(HAVE_CHACHA) && defined(HAVE_POLY1305) + +#include +#include +#include + +#ifdef NO_INLINE +#include +#else +#define WOLFSSL_MISC_INCLUDED +#include +#endif + +#define CHACHA20_POLY1305_AEAD_INITIAL_COUNTER 0 +int wc_ChaCha20Poly1305_Encrypt( + const byte inKey[CHACHA20_POLY1305_AEAD_KEYSIZE], + const byte inIV[CHACHA20_POLY1305_AEAD_IV_SIZE], + const byte* inAAD, const word32 inAADLen, + const byte* inPlaintext, const word32 inPlaintextLen, + byte* outCiphertext, + byte outAuthTag[CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE]) +{ + int ret; + ChaChaPoly_Aead aead; + + /* Validate function arguments */ + if (!inKey || !inIV || + !inPlaintext || !inPlaintextLen || + !outCiphertext || + !outAuthTag) + { + return BAD_FUNC_ARG; + } + + ret = wc_ChaCha20Poly1305_Init(&aead, inKey, inIV, + CHACHA20_POLY1305_AEAD_ENCRYPT); + if (ret == 0) + ret = wc_ChaCha20Poly1305_UpdateAad(&aead, inAAD, inAADLen); + if (ret == 0) + ret = wc_ChaCha20Poly1305_UpdateData(&aead, inPlaintext, outCiphertext, + inPlaintextLen); + if (ret == 0) + ret = wc_ChaCha20Poly1305_Final(&aead, outAuthTag); + return ret; +} + +int wc_ChaCha20Poly1305_Decrypt( + const byte inKey[CHACHA20_POLY1305_AEAD_KEYSIZE], + const byte inIV[CHACHA20_POLY1305_AEAD_IV_SIZE], + const byte* inAAD, const word32 inAADLen, + const byte* inCiphertext, const word32 inCiphertextLen, + const byte inAuthTag[CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE], + byte* outPlaintext) +{ + int ret; + ChaChaPoly_Aead aead; + byte calculatedAuthTag[CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE]; + + /* Validate function arguments */ + if (!inKey || !inIV || + !inCiphertext || !inCiphertextLen || + !inAuthTag || + !outPlaintext) + { + return BAD_FUNC_ARG; + } + + XMEMSET(calculatedAuthTag, 0, sizeof(calculatedAuthTag)); + + ret = wc_ChaCha20Poly1305_Init(&aead, inKey, inIV, + CHACHA20_POLY1305_AEAD_DECRYPT); + if (ret == 0) + ret = wc_ChaCha20Poly1305_UpdateAad(&aead, inAAD, inAADLen); + if (ret == 0) + ret = wc_ChaCha20Poly1305_UpdateData(&aead, inCiphertext, outPlaintext, + inCiphertextLen); + if (ret == 0) + ret = wc_ChaCha20Poly1305_Final(&aead, calculatedAuthTag); + if (ret == 0) + ret = wc_ChaCha20Poly1305_CheckTag(inAuthTag, calculatedAuthTag); + return ret; +} + +int wc_ChaCha20Poly1305_CheckTag( + const byte authTag[CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE], + const byte authTagChk[CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE]) +{ + int ret = 0; + if (authTag == NULL || authTagChk == NULL) { + return BAD_FUNC_ARG; + } + if (ConstantCompare(authTag, authTagChk, + CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE) != 0) { + ret = MAC_CMP_FAILED_E; + } + return ret; +} + +int wc_ChaCha20Poly1305_Init(ChaChaPoly_Aead* aead, + const byte inKey[CHACHA20_POLY1305_AEAD_KEYSIZE], + const byte inIV[CHACHA20_POLY1305_AEAD_IV_SIZE], + int isEncrypt) +{ + int ret; + byte authKey[CHACHA20_POLY1305_AEAD_KEYSIZE]; + + /* check arguments */ + if (aead == NULL || inKey == NULL || inIV == NULL) { + return BAD_FUNC_ARG; + } + + /* setup aead context */ + XMEMSET(aead, 0, sizeof(ChaChaPoly_Aead)); + XMEMSET(authKey, 0, sizeof(authKey)); + aead->isEncrypt = isEncrypt; + + /* Initialize the ChaCha20 context (key and iv) */ + ret = wc_Chacha_SetKey(&aead->chacha, inKey, + CHACHA20_POLY1305_AEAD_KEYSIZE); + if (ret == 0) { + ret = wc_Chacha_SetIV(&aead->chacha, inIV, + CHACHA20_POLY1305_AEAD_INITIAL_COUNTER); + } + + /* Create the Poly1305 key */ + if (ret == 0) { + ret = wc_Chacha_Process(&aead->chacha, authKey, authKey, + CHACHA20_POLY1305_AEAD_KEYSIZE); + } + + /* Initialize Poly1305 context */ + if (ret == 0) { + ret = wc_Poly1305SetKey(&aead->poly, authKey, + CHACHA20_POLY1305_AEAD_KEYSIZE); + } + + /* advance counter by 1 after creating Poly1305 key */ + if (ret == 0) { + ret = wc_Chacha_SetIV(&aead->chacha, inIV, + CHACHA20_POLY1305_AEAD_INITIAL_COUNTER + 1); + } + + if (ret == 0) { + aead->state = CHACHA20_POLY1305_STATE_READY; + } + + return ret; +} + +/* optional additional authentication data */ +int wc_ChaCha20Poly1305_UpdateAad(ChaChaPoly_Aead* aead, + const byte* inAAD, word32 inAADLen) +{ + int ret = 0; + + if (aead == NULL || (inAAD == NULL && inAADLen > 0)) { + return BAD_FUNC_ARG; + } + if (aead->state != CHACHA20_POLY1305_STATE_READY && + aead->state != CHACHA20_POLY1305_STATE_AAD) { + return BAD_STATE_E; + } + + if (inAAD && inAADLen > 0) { + ret = wc_Poly1305Update(&aead->poly, inAAD, inAADLen); + if (ret == 0) { + aead->aadLen += inAADLen; + aead->state = CHACHA20_POLY1305_STATE_AAD; + } + } + + return ret; +} + +/* inData and outData can be same pointer (inline) */ +int wc_ChaCha20Poly1305_UpdateData(ChaChaPoly_Aead* aead, + const byte* inData, byte* outData, word32 dataLen) +{ + int ret = 0; + + if (aead == NULL || inData == NULL || outData == NULL) { + return BAD_FUNC_ARG; + } + if (aead->state != CHACHA20_POLY1305_STATE_READY && + aead->state != CHACHA20_POLY1305_STATE_AAD && + aead->state != CHACHA20_POLY1305_STATE_DATA) { + return BAD_STATE_E; + } + + /* Pad the AAD */ + if (aead->state == CHACHA20_POLY1305_STATE_AAD) { + ret = wc_Poly1305_Pad(&aead->poly, aead->aadLen); + } + + /* advance state */ + aead->state = CHACHA20_POLY1305_STATE_DATA; + + /* Perform ChaCha20 encrypt/decrypt and Poly1305 auth calc */ + if (ret == 0) { + if (aead->isEncrypt) { + ret = wc_Chacha_Process(&aead->chacha, outData, inData, dataLen); + if (ret == 0) + ret = wc_Poly1305Update(&aead->poly, outData, dataLen); + } + else { + ret = wc_Poly1305Update(&aead->poly, inData, dataLen); + if (ret == 0) + ret = wc_Chacha_Process(&aead->chacha, outData, inData, dataLen); + } + } + if (ret == 0) { + aead->dataLen += dataLen; + } + return ret; +} + +int wc_ChaCha20Poly1305_Final(ChaChaPoly_Aead* aead, + byte outAuthTag[CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE]) +{ + int ret = 0; + + if (aead == NULL || outAuthTag == NULL) { + return BAD_FUNC_ARG; + } + if (aead->state != CHACHA20_POLY1305_STATE_AAD && + aead->state != CHACHA20_POLY1305_STATE_DATA) { + return BAD_STATE_E; + } + + /* Pad the AAD - Make sure it is done */ + if (aead->state == CHACHA20_POLY1305_STATE_AAD) { + ret = wc_Poly1305_Pad(&aead->poly, aead->aadLen); + } + + /* Pad the ciphertext to 16 bytes */ + if (ret == 0) { + ret = wc_Poly1305_Pad(&aead->poly, aead->dataLen); + } + + /* Add the aad length and plaintext/ciphertext length */ + if (ret == 0) { + ret = wc_Poly1305_EncodeSizes(&aead->poly, aead->aadLen, + aead->dataLen); + } + + /* Finalize the auth tag */ + if (ret == 0) { + ret = wc_Poly1305Final(&aead->poly, outAuthTag); + } + + /* reset and cleanup sensitive context */ + ForceZero(aead, sizeof(ChaChaPoly_Aead)); + + return ret; +} + +#endif /* HAVE_CHACHA && HAVE_POLY1305 */ diff --git a/client/wolfssl/wolfcrypt/src/chacha_asm.S b/client/wolfssl/wolfcrypt/src/chacha_asm.S new file mode 100644 index 0000000..f9d5fff --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/chacha_asm.S @@ -0,0 +1,1420 @@ +/* chacha_asm + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#ifndef HAVE_INTEL_AVX1 +#define HAVE_INTEL_AVX1 +#endif /* HAVE_INTEL_AVX1 */ +#ifndef NO_AVX2_SUPPORT +#define HAVE_INTEL_AVX2 +#endif /* NO_AVX2_SUPPORT */ + +#ifndef __APPLE__ +.text +.globl chacha_encrypt_x64 +.type chacha_encrypt_x64,@function +.align 4 +chacha_encrypt_x64: +#else +.section __TEXT,__text +.globl _chacha_encrypt_x64 +.p2align 2 +_chacha_encrypt_x64: +#endif /* __APPLE__ */ + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $0x40, %rsp + cmpl $0x40, %ecx + jl L_chacha_x64_small +L_chacha_x64_start: + subq $48, %rsp + movq %rdx, 24(%rsp) + movq %rsi, 32(%rsp) + movq %rcx, 40(%rsp) + movq 32(%rdi), %rax + movq 40(%rdi), %rbx + movq %rax, 8(%rsp) + movq %rbx, 16(%rsp) + movl (%rdi), %eax + movl 4(%rdi), %ebx + movl 8(%rdi), %ecx + movl 12(%rdi), %edx + movl 16(%rdi), %r8d + movl 20(%rdi), %r9d + movl 24(%rdi), %r10d + movl 28(%rdi), %r11d + movl 48(%rdi), %r12d + movl 52(%rdi), %r13d + movl 56(%rdi), %r14d + movl 60(%rdi), %r15d + movb $10, (%rsp) + movl 8(%rsp), %esi + movl 12(%rsp), %ebp +L_chacha_x64_block_crypt_start: + addl %r8d, %eax + addl %r9d, %ebx + xorl %eax, %r12d + xorl %ebx, %r13d + roll $16, %r12d + roll $16, %r13d + addl %r12d, %esi + addl %r13d, %ebp + xorl %esi, %r8d + xorl %ebp, %r9d + roll $12, %r8d + roll $12, %r9d + addl %r8d, %eax + addl %r9d, %ebx + xorl %eax, %r12d + xorl %ebx, %r13d + roll $8, %r12d + roll $8, %r13d + addl %r12d, %esi + addl %r13d, %ebp + xorl %esi, %r8d + xorl %ebp, %r9d + roll $7, %r8d + roll $7, %r9d + movl %esi, 8(%rsp) + movl %ebp, 12(%rsp) + movl 16(%rsp), %esi + movl 20(%rsp), %ebp + addl %r10d, %ecx + addl %r11d, %edx + xorl %ecx, %r14d + xorl %edx, %r15d + roll $16, %r14d + roll $16, %r15d + addl %r14d, %esi + addl %r15d, %ebp + xorl %esi, %r10d + xorl %ebp, %r11d + roll $12, %r10d + roll $12, %r11d + addl %r10d, %ecx + addl %r11d, %edx + xorl %ecx, %r14d + xorl %edx, %r15d + roll $8, %r14d + roll $8, %r15d + addl %r14d, %esi + addl %r15d, %ebp + xorl %esi, %r10d + xorl %ebp, %r11d + roll $7, %r10d + roll $7, %r11d + addl %r9d, %eax + addl %r10d, %ebx + xorl %eax, %r15d + xorl %ebx, %r12d + roll $16, %r15d + roll $16, %r12d + addl %r15d, %esi + addl %r12d, %ebp + xorl %esi, %r9d + xorl %ebp, %r10d + roll $12, %r9d + roll $12, %r10d + addl %r9d, %eax + addl %r10d, %ebx + xorl %eax, %r15d + xorl %ebx, %r12d + roll $8, %r15d + roll $8, %r12d + addl %r15d, %esi + addl %r12d, %ebp + xorl %esi, %r9d + xorl %ebp, %r10d + roll $7, %r9d + roll $7, %r10d + movl %esi, 16(%rsp) + movl %ebp, 20(%rsp) + movl 8(%rsp), %esi + movl 12(%rsp), %ebp + addl %r11d, %ecx + addl %r8d, %edx + xorl %ecx, %r13d + xorl %edx, %r14d + roll $16, %r13d + roll $16, %r14d + addl %r13d, %esi + addl %r14d, %ebp + xorl %esi, %r11d + xorl %ebp, %r8d + roll $12, %r11d + roll $12, %r8d + addl %r11d, %ecx + addl %r8d, %edx + xorl %ecx, %r13d + xorl %edx, %r14d + roll $8, %r13d + roll $8, %r14d + addl %r13d, %esi + addl %r14d, %ebp + xorl %esi, %r11d + xorl %ebp, %r8d + roll $7, %r11d + roll $7, %r8d + decb (%rsp) + jnz L_chacha_x64_block_crypt_start + movl %esi, 8(%rsp) + movl %ebp, 12(%rsp) + movq 32(%rsp), %rsi + movq 24(%rsp), %rbp + addl (%rdi), %eax + addl 4(%rdi), %ebx + addl 8(%rdi), %ecx + addl 12(%rdi), %edx + addl 16(%rdi), %r8d + addl 20(%rdi), %r9d + addl 24(%rdi), %r10d + addl 28(%rdi), %r11d + addl 48(%rdi), %r12d + addl 52(%rdi), %r13d + addl 56(%rdi), %r14d + addl 60(%rdi), %r15d + xorl (%rsi), %eax + xorl 4(%rsi), %ebx + xorl 8(%rsi), %ecx + xorl 12(%rsi), %edx + xorl 16(%rsi), %r8d + xorl 20(%rsi), %r9d + xorl 24(%rsi), %r10d + xorl 28(%rsi), %r11d + xorl 48(%rsi), %r12d + xorl 52(%rsi), %r13d + xorl 56(%rsi), %r14d + xorl 60(%rsi), %r15d + movl %eax, (%rbp) + movl %ebx, 4(%rbp) + movl %ecx, 8(%rbp) + movl %edx, 12(%rbp) + movl %r8d, 16(%rbp) + movl %r9d, 20(%rbp) + movl %r10d, 24(%rbp) + movl %r11d, 28(%rbp) + movl %r12d, 48(%rbp) + movl %r13d, 52(%rbp) + movl %r14d, 56(%rbp) + movl %r15d, 60(%rbp) + movl 8(%rsp), %eax + movl 12(%rsp), %ebx + movl 16(%rsp), %ecx + movl 20(%rsp), %edx + addl 32(%rdi), %eax + addl 36(%rdi), %ebx + addl 40(%rdi), %ecx + addl 44(%rdi), %edx + xorl 32(%rsi), %eax + xorl 36(%rsi), %ebx + xorl 40(%rsi), %ecx + xorl 44(%rsi), %edx + movl %eax, 32(%rbp) + movl %ebx, 36(%rbp) + movl %ecx, 40(%rbp) + movl %edx, 44(%rbp) + movq 24(%rsp), %rdx + movq 40(%rsp), %rcx + addl $0x01, 48(%rdi) + addq $48, %rsp + subl $0x40, %ecx + addq $0x40, %rsi + addq $0x40, %rdx + cmpl $0x40, %ecx + jge L_chacha_x64_start +L_chacha_x64_small: + cmpl $0x00, %ecx + je L_chacha_x64_done + subq $48, %rsp + movq %rdx, 24(%rsp) + movq %rsi, 32(%rsp) + movq %rcx, 40(%rsp) + movq 32(%rdi), %rax + movq 40(%rdi), %rbx + movq %rax, 8(%rsp) + movq %rbx, 16(%rsp) + movl (%rdi), %eax + movl 4(%rdi), %ebx + movl 8(%rdi), %ecx + movl 12(%rdi), %edx + movl 16(%rdi), %r8d + movl 20(%rdi), %r9d + movl 24(%rdi), %r10d + movl 28(%rdi), %r11d + movl 48(%rdi), %r12d + movl 52(%rdi), %r13d + movl 56(%rdi), %r14d + movl 60(%rdi), %r15d + movb $10, (%rsp) + movl 8(%rsp), %esi + movl 12(%rsp), %ebp +L_chacha_x64_partial_crypt_start: + addl %r8d, %eax + addl %r9d, %ebx + xorl %eax, %r12d + xorl %ebx, %r13d + roll $16, %r12d + roll $16, %r13d + addl %r12d, %esi + addl %r13d, %ebp + xorl %esi, %r8d + xorl %ebp, %r9d + roll $12, %r8d + roll $12, %r9d + addl %r8d, %eax + addl %r9d, %ebx + xorl %eax, %r12d + xorl %ebx, %r13d + roll $8, %r12d + roll $8, %r13d + addl %r12d, %esi + addl %r13d, %ebp + xorl %esi, %r8d + xorl %ebp, %r9d + roll $7, %r8d + roll $7, %r9d + movl %esi, 8(%rsp) + movl %ebp, 12(%rsp) + movl 16(%rsp), %esi + movl 20(%rsp), %ebp + addl %r10d, %ecx + addl %r11d, %edx + xorl %ecx, %r14d + xorl %edx, %r15d + roll $16, %r14d + roll $16, %r15d + addl %r14d, %esi + addl %r15d, %ebp + xorl %esi, %r10d + xorl %ebp, %r11d + roll $12, %r10d + roll $12, %r11d + addl %r10d, %ecx + addl %r11d, %edx + xorl %ecx, %r14d + xorl %edx, %r15d + roll $8, %r14d + roll $8, %r15d + addl %r14d, %esi + addl %r15d, %ebp + xorl %esi, %r10d + xorl %ebp, %r11d + roll $7, %r10d + roll $7, %r11d + addl %r9d, %eax + addl %r10d, %ebx + xorl %eax, %r15d + xorl %ebx, %r12d + roll $16, %r15d + roll $16, %r12d + addl %r15d, %esi + addl %r12d, %ebp + xorl %esi, %r9d + xorl %ebp, %r10d + roll $12, %r9d + roll $12, %r10d + addl %r9d, %eax + addl %r10d, %ebx + xorl %eax, %r15d + xorl %ebx, %r12d + roll $8, %r15d + roll $8, %r12d + addl %r15d, %esi + addl %r12d, %ebp + xorl %esi, %r9d + xorl %ebp, %r10d + roll $7, %r9d + roll $7, %r10d + movl %esi, 16(%rsp) + movl %ebp, 20(%rsp) + movl 8(%rsp), %esi + movl 12(%rsp), %ebp + addl %r11d, %ecx + addl %r8d, %edx + xorl %ecx, %r13d + xorl %edx, %r14d + roll $16, %r13d + roll $16, %r14d + addl %r13d, %esi + addl %r14d, %ebp + xorl %esi, %r11d + xorl %ebp, %r8d + roll $12, %r11d + roll $12, %r8d + addl %r11d, %ecx + addl %r8d, %edx + xorl %ecx, %r13d + xorl %edx, %r14d + roll $8, %r13d + roll $8, %r14d + addl %r13d, %esi + addl %r14d, %ebp + xorl %esi, %r11d + xorl %ebp, %r8d + roll $7, %r11d + roll $7, %r8d + decb (%rsp) + jnz L_chacha_x64_partial_crypt_start + movl %esi, 8(%rsp) + movl %ebp, 12(%rsp) + movq 32(%rsp), %rsi + addl (%rdi), %eax + addl 4(%rdi), %ebx + addl 8(%rdi), %ecx + addl 12(%rdi), %edx + addl 16(%rdi), %r8d + addl 20(%rdi), %r9d + addl 24(%rdi), %r10d + addl 28(%rdi), %r11d + addl 48(%rdi), %r12d + addl 52(%rdi), %r13d + addl 56(%rdi), %r14d + addl 60(%rdi), %r15d + movl %eax, 48(%rsp) + movl %ebx, 52(%rsp) + movl %ecx, 56(%rsp) + movl %edx, 60(%rsp) + movl %r8d, 64(%rsp) + movl %r9d, 68(%rsp) + movl %r10d, 72(%rsp) + movl %r11d, 76(%rsp) + movl %r12d, 96(%rsp) + movl %r13d, 100(%rsp) + movl %r14d, 104(%rsp) + movl %r15d, 108(%rsp) + movl 8(%rsp), %eax + movl 12(%rsp), %ebx + movl 16(%rsp), %ecx + movl 20(%rsp), %edx + addl 32(%rdi), %eax + addl 36(%rdi), %ebx + addl 40(%rdi), %ecx + addl 44(%rdi), %edx + movl %eax, 80(%rsp) + movl %ebx, 84(%rsp) + movl %ecx, 88(%rsp) + movl %edx, 92(%rsp) + movq 24(%rsp), %rdx + movq 40(%rsp), %rcx + addl $0x01, 48(%rdi) + addq $48, %rsp + movl %ecx, %r8d + xorq %rbx, %rbx + andl $7, %r8d + jz L_chacha_x64_partial_start64 +L_chacha_x64_partial_start8: + movzbl (%rsp,%rbx,1), %eax + xorb (%rsi,%rbx,1), %al + movb %al, (%rdx,%rbx,1) + incl %ebx + cmpl %r8d, %ebx + jne L_chacha_x64_partial_start8 + je L_chacha_x64_partial_end64 +L_chacha_x64_partial_start64: + movq (%rsp,%rbx,1), %rax + xorq (%rsi,%rbx,1), %rax + movq %rax, (%rdx,%rbx,1) + addl $8, %ebx +L_chacha_x64_partial_end64: + cmpl %ecx, %ebx + jne L_chacha_x64_partial_start64 +L_chacha_x64_done: + addq $0x40, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + repz retq +#ifndef __APPLE__ +.size chacha_encrypt_x64,.-chacha_encrypt_x64 +#endif /* __APPLE__ */ +#ifdef HAVE_INTEL_AVX1 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_chacha20_avx1_rotl8: +.quad 0x605040702010003, 0xe0d0c0f0a09080b +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_chacha20_avx1_rotl16: +.quad 0x504070601000302, 0xd0c0f0e09080b0a +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_chacha20_avx1_add: +.quad 0x100000000, 0x300000002 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_chacha20_avx1_four: +.quad 0x400000004, 0x400000004 +#ifndef __APPLE__ +.text +.globl chacha_encrypt_avx1 +.type chacha_encrypt_avx1,@function +.align 4 +chacha_encrypt_avx1: +#else +.section __TEXT,__text +.globl _chacha_encrypt_avx1 +.p2align 2 +_chacha_encrypt_avx1: +#endif /* __APPLE__ */ + subq $0x190, %rsp + movq %rsp, %r9 + leaq 256(%rsp), %r10 + andq $-16, %r9 + andq $-16, %r10 + movl %ecx, %eax + shrl $8, %eax + jz L_chacha20_avx1_end128 + vpshufd $0x00, (%rdi), %xmm0 + vpshufd $0x00, 4(%rdi), %xmm1 + vpshufd $0x00, 8(%rdi), %xmm2 + vpshufd $0x00, 12(%rdi), %xmm3 + vpshufd $0x00, 16(%rdi), %xmm4 + vpshufd $0x00, 20(%rdi), %xmm5 + vpshufd $0x00, 24(%rdi), %xmm6 + vpshufd $0x00, 28(%rdi), %xmm7 + vpshufd $0x00, 32(%rdi), %xmm8 + vpshufd $0x00, 36(%rdi), %xmm9 + vpshufd $0x00, 40(%rdi), %xmm10 + vpshufd $0x00, 44(%rdi), %xmm11 + vpshufd $0x00, 48(%rdi), %xmm12 + vpshufd $0x00, 52(%rdi), %xmm13 + vpshufd $0x00, 56(%rdi), %xmm14 + vpshufd $0x00, 60(%rdi), %xmm15 + vpaddd L_chacha20_avx1_add(%rip), %xmm12, %xmm12 + vmovdqa %xmm0, (%r9) + vmovdqa %xmm1, 16(%r9) + vmovdqa %xmm2, 32(%r9) + vmovdqa %xmm3, 48(%r9) + vmovdqa %xmm4, 64(%r9) + vmovdqa %xmm5, 80(%r9) + vmovdqa %xmm6, 96(%r9) + vmovdqa %xmm7, 112(%r9) + vmovdqa %xmm8, 128(%r9) + vmovdqa %xmm9, 144(%r9) + vmovdqa %xmm10, 160(%r9) + vmovdqa %xmm11, 176(%r9) + vmovdqa %xmm12, 192(%r9) + vmovdqa %xmm13, 208(%r9) + vmovdqa %xmm14, 224(%r9) + vmovdqa %xmm15, 240(%r9) +L_chacha20_avx1_start128: + vmovdqa %xmm11, 48(%r10) + movb $10, %r8b +L_chacha20_avx1_loop128: + vpaddd %xmm4, %xmm0, %xmm0 + vpxor %xmm0, %xmm12, %xmm12 + vmovdqa 48(%r10), %xmm11 + vpshufb L_chacha20_avx1_rotl16(%rip), %xmm12, %xmm12 + vpaddd %xmm12, %xmm8, %xmm8 + vpxor %xmm8, %xmm4, %xmm4 + vpaddd %xmm5, %xmm1, %xmm1 + vpxor %xmm1, %xmm13, %xmm13 + vpshufb L_chacha20_avx1_rotl16(%rip), %xmm13, %xmm13 + vpaddd %xmm13, %xmm9, %xmm9 + vpxor %xmm9, %xmm5, %xmm5 + vpaddd %xmm6, %xmm2, %xmm2 + vpxor %xmm2, %xmm14, %xmm14 + vpshufb L_chacha20_avx1_rotl16(%rip), %xmm14, %xmm14 + vpaddd %xmm14, %xmm10, %xmm10 + vpxor %xmm10, %xmm6, %xmm6 + vpaddd %xmm7, %xmm3, %xmm3 + vpxor %xmm3, %xmm15, %xmm15 + vpshufb L_chacha20_avx1_rotl16(%rip), %xmm15, %xmm15 + vpaddd %xmm15, %xmm11, %xmm11 + vpxor %xmm11, %xmm7, %xmm7 + vmovdqa %xmm11, 48(%r10) + vpsrld $20, %xmm4, %xmm11 + vpslld $12, %xmm4, %xmm4 + vpxor %xmm11, %xmm4, %xmm4 + vpsrld $20, %xmm5, %xmm11 + vpslld $12, %xmm5, %xmm5 + vpxor %xmm11, %xmm5, %xmm5 + vpsrld $20, %xmm6, %xmm11 + vpslld $12, %xmm6, %xmm6 + vpxor %xmm11, %xmm6, %xmm6 + vpsrld $20, %xmm7, %xmm11 + vpslld $12, %xmm7, %xmm7 + vpxor %xmm11, %xmm7, %xmm7 + vpaddd %xmm4, %xmm0, %xmm0 + vpxor %xmm0, %xmm12, %xmm12 + vmovdqa 48(%r10), %xmm11 + vpshufb L_chacha20_avx1_rotl8(%rip), %xmm12, %xmm12 + vpaddd %xmm12, %xmm8, %xmm8 + vpxor %xmm8, %xmm4, %xmm4 + vpaddd %xmm5, %xmm1, %xmm1 + vpxor %xmm1, %xmm13, %xmm13 + vpshufb L_chacha20_avx1_rotl8(%rip), %xmm13, %xmm13 + vpaddd %xmm13, %xmm9, %xmm9 + vpxor %xmm9, %xmm5, %xmm5 + vpaddd %xmm6, %xmm2, %xmm2 + vpxor %xmm2, %xmm14, %xmm14 + vpshufb L_chacha20_avx1_rotl8(%rip), %xmm14, %xmm14 + vpaddd %xmm14, %xmm10, %xmm10 + vpxor %xmm10, %xmm6, %xmm6 + vpaddd %xmm7, %xmm3, %xmm3 + vpxor %xmm3, %xmm15, %xmm15 + vpshufb L_chacha20_avx1_rotl8(%rip), %xmm15, %xmm15 + vpaddd %xmm15, %xmm11, %xmm11 + vpxor %xmm11, %xmm7, %xmm7 + vmovdqa %xmm11, 48(%r10) + vpsrld $25, %xmm4, %xmm11 + vpslld $7, %xmm4, %xmm4 + vpxor %xmm11, %xmm4, %xmm4 + vpsrld $25, %xmm5, %xmm11 + vpslld $7, %xmm5, %xmm5 + vpxor %xmm11, %xmm5, %xmm5 + vpsrld $25, %xmm6, %xmm11 + vpslld $7, %xmm6, %xmm6 + vpxor %xmm11, %xmm6, %xmm6 + vpsrld $25, %xmm7, %xmm11 + vpslld $7, %xmm7, %xmm7 + vpxor %xmm11, %xmm7, %xmm7 + vpaddd %xmm5, %xmm0, %xmm0 + vpxor %xmm0, %xmm15, %xmm15 + vmovdqa 48(%r10), %xmm11 + vpshufb L_chacha20_avx1_rotl16(%rip), %xmm15, %xmm15 + vpaddd %xmm15, %xmm10, %xmm10 + vpxor %xmm10, %xmm5, %xmm5 + vpaddd %xmm6, %xmm1, %xmm1 + vpxor %xmm1, %xmm12, %xmm12 + vpshufb L_chacha20_avx1_rotl16(%rip), %xmm12, %xmm12 + vpaddd %xmm12, %xmm11, %xmm11 + vpxor %xmm11, %xmm6, %xmm6 + vpaddd %xmm7, %xmm2, %xmm2 + vpxor %xmm2, %xmm13, %xmm13 + vpshufb L_chacha20_avx1_rotl16(%rip), %xmm13, %xmm13 + vpaddd %xmm13, %xmm8, %xmm8 + vpxor %xmm8, %xmm7, %xmm7 + vpaddd %xmm4, %xmm3, %xmm3 + vpxor %xmm3, %xmm14, %xmm14 + vpshufb L_chacha20_avx1_rotl16(%rip), %xmm14, %xmm14 + vpaddd %xmm14, %xmm9, %xmm9 + vpxor %xmm9, %xmm4, %xmm4 + vmovdqa %xmm11, 48(%r10) + vpsrld $20, %xmm5, %xmm11 + vpslld $12, %xmm5, %xmm5 + vpxor %xmm11, %xmm5, %xmm5 + vpsrld $20, %xmm6, %xmm11 + vpslld $12, %xmm6, %xmm6 + vpxor %xmm11, %xmm6, %xmm6 + vpsrld $20, %xmm7, %xmm11 + vpslld $12, %xmm7, %xmm7 + vpxor %xmm11, %xmm7, %xmm7 + vpsrld $20, %xmm4, %xmm11 + vpslld $12, %xmm4, %xmm4 + vpxor %xmm11, %xmm4, %xmm4 + vpaddd %xmm5, %xmm0, %xmm0 + vpxor %xmm0, %xmm15, %xmm15 + vmovdqa 48(%r10), %xmm11 + vpshufb L_chacha20_avx1_rotl8(%rip), %xmm15, %xmm15 + vpaddd %xmm15, %xmm10, %xmm10 + vpxor %xmm10, %xmm5, %xmm5 + vpaddd %xmm6, %xmm1, %xmm1 + vpxor %xmm1, %xmm12, %xmm12 + vpshufb L_chacha20_avx1_rotl8(%rip), %xmm12, %xmm12 + vpaddd %xmm12, %xmm11, %xmm11 + vpxor %xmm11, %xmm6, %xmm6 + vpaddd %xmm7, %xmm2, %xmm2 + vpxor %xmm2, %xmm13, %xmm13 + vpshufb L_chacha20_avx1_rotl8(%rip), %xmm13, %xmm13 + vpaddd %xmm13, %xmm8, %xmm8 + vpxor %xmm8, %xmm7, %xmm7 + vpaddd %xmm4, %xmm3, %xmm3 + vpxor %xmm3, %xmm14, %xmm14 + vpshufb L_chacha20_avx1_rotl8(%rip), %xmm14, %xmm14 + vpaddd %xmm14, %xmm9, %xmm9 + vpxor %xmm9, %xmm4, %xmm4 + vmovdqa %xmm11, 48(%r10) + vpsrld $25, %xmm5, %xmm11 + vpslld $7, %xmm5, %xmm5 + vpxor %xmm11, %xmm5, %xmm5 + vpsrld $25, %xmm6, %xmm11 + vpslld $7, %xmm6, %xmm6 + vpxor %xmm11, %xmm6, %xmm6 + vpsrld $25, %xmm7, %xmm11 + vpslld $7, %xmm7, %xmm7 + vpxor %xmm11, %xmm7, %xmm7 + vpsrld $25, %xmm4, %xmm11 + vpslld $7, %xmm4, %xmm4 + vpxor %xmm11, %xmm4, %xmm4 + decb %r8b + jnz L_chacha20_avx1_loop128 + vmovdqa 48(%r10), %xmm11 + vpaddd (%r9), %xmm0, %xmm0 + vpaddd 16(%r9), %xmm1, %xmm1 + vpaddd 32(%r9), %xmm2, %xmm2 + vpaddd 48(%r9), %xmm3, %xmm3 + vpaddd 64(%r9), %xmm4, %xmm4 + vpaddd 80(%r9), %xmm5, %xmm5 + vpaddd 96(%r9), %xmm6, %xmm6 + vpaddd 112(%r9), %xmm7, %xmm7 + vpaddd 128(%r9), %xmm8, %xmm8 + vpaddd 144(%r9), %xmm9, %xmm9 + vpaddd 160(%r9), %xmm10, %xmm10 + vpaddd 176(%r9), %xmm11, %xmm11 + vpaddd 192(%r9), %xmm12, %xmm12 + vpaddd 208(%r9), %xmm13, %xmm13 + vpaddd 224(%r9), %xmm14, %xmm14 + vpaddd 240(%r9), %xmm15, %xmm15 + vmovdqa %xmm8, (%r10) + vmovdqa %xmm9, 16(%r10) + vmovdqa %xmm10, 32(%r10) + vmovdqa %xmm11, 48(%r10) + vmovdqa %xmm12, 64(%r10) + vmovdqa %xmm13, 80(%r10) + vmovdqa %xmm14, 96(%r10) + vmovdqa %xmm15, 112(%r10) + vpunpckldq %xmm1, %xmm0, %xmm8 + vpunpckldq %xmm3, %xmm2, %xmm9 + vpunpckhdq %xmm1, %xmm0, %xmm12 + vpunpckhdq %xmm3, %xmm2, %xmm13 + vpunpckldq %xmm5, %xmm4, %xmm10 + vpunpckldq %xmm7, %xmm6, %xmm11 + vpunpckhdq %xmm5, %xmm4, %xmm14 + vpunpckhdq %xmm7, %xmm6, %xmm15 + vpunpcklqdq %xmm9, %xmm8, %xmm0 + vpunpcklqdq %xmm11, %xmm10, %xmm1 + vpunpckhqdq %xmm9, %xmm8, %xmm2 + vpunpckhqdq %xmm11, %xmm10, %xmm3 + vpunpcklqdq %xmm13, %xmm12, %xmm4 + vpunpcklqdq %xmm15, %xmm14, %xmm5 + vpunpckhqdq %xmm13, %xmm12, %xmm6 + vpunpckhqdq %xmm15, %xmm14, %xmm7 + vmovdqu (%rsi), %xmm8 + vmovdqu 16(%rsi), %xmm9 + vmovdqu 64(%rsi), %xmm10 + vmovdqu 80(%rsi), %xmm11 + vmovdqu 128(%rsi), %xmm12 + vmovdqu 144(%rsi), %xmm13 + vmovdqu 192(%rsi), %xmm14 + vmovdqu 208(%rsi), %xmm15 + vpxor %xmm8, %xmm0, %xmm0 + vpxor %xmm9, %xmm1, %xmm1 + vpxor %xmm10, %xmm2, %xmm2 + vpxor %xmm11, %xmm3, %xmm3 + vpxor %xmm12, %xmm4, %xmm4 + vpxor %xmm13, %xmm5, %xmm5 + vpxor %xmm14, %xmm6, %xmm6 + vpxor %xmm15, %xmm7, %xmm7 + vmovdqu %xmm0, (%rdx) + vmovdqu %xmm1, 16(%rdx) + vmovdqu %xmm2, 64(%rdx) + vmovdqu %xmm3, 80(%rdx) + vmovdqu %xmm4, 128(%rdx) + vmovdqu %xmm5, 144(%rdx) + vmovdqu %xmm6, 192(%rdx) + vmovdqu %xmm7, 208(%rdx) + vmovdqa (%r10), %xmm0 + vmovdqa 16(%r10), %xmm1 + vmovdqa 32(%r10), %xmm2 + vmovdqa 48(%r10), %xmm3 + vmovdqa 64(%r10), %xmm4 + vmovdqa 80(%r10), %xmm5 + vmovdqa 96(%r10), %xmm6 + vmovdqa 112(%r10), %xmm7 + vpunpckldq %xmm1, %xmm0, %xmm8 + vpunpckldq %xmm3, %xmm2, %xmm9 + vpunpckhdq %xmm1, %xmm0, %xmm12 + vpunpckhdq %xmm3, %xmm2, %xmm13 + vpunpckldq %xmm5, %xmm4, %xmm10 + vpunpckldq %xmm7, %xmm6, %xmm11 + vpunpckhdq %xmm5, %xmm4, %xmm14 + vpunpckhdq %xmm7, %xmm6, %xmm15 + vpunpcklqdq %xmm9, %xmm8, %xmm0 + vpunpcklqdq %xmm11, %xmm10, %xmm1 + vpunpckhqdq %xmm9, %xmm8, %xmm2 + vpunpckhqdq %xmm11, %xmm10, %xmm3 + vpunpcklqdq %xmm13, %xmm12, %xmm4 + vpunpcklqdq %xmm15, %xmm14, %xmm5 + vpunpckhqdq %xmm13, %xmm12, %xmm6 + vpunpckhqdq %xmm15, %xmm14, %xmm7 + vmovdqu 32(%rsi), %xmm8 + vmovdqu 48(%rsi), %xmm9 + vmovdqu 96(%rsi), %xmm10 + vmovdqu 112(%rsi), %xmm11 + vmovdqu 160(%rsi), %xmm12 + vmovdqu 176(%rsi), %xmm13 + vmovdqu 224(%rsi), %xmm14 + vmovdqu 240(%rsi), %xmm15 + vpxor %xmm8, %xmm0, %xmm0 + vpxor %xmm9, %xmm1, %xmm1 + vpxor %xmm10, %xmm2, %xmm2 + vpxor %xmm11, %xmm3, %xmm3 + vpxor %xmm12, %xmm4, %xmm4 + vpxor %xmm13, %xmm5, %xmm5 + vpxor %xmm14, %xmm6, %xmm6 + vpxor %xmm15, %xmm7, %xmm7 + vmovdqu %xmm0, 32(%rdx) + vmovdqu %xmm1, 48(%rdx) + vmovdqu %xmm2, 96(%rdx) + vmovdqu %xmm3, 112(%rdx) + vmovdqu %xmm4, 160(%rdx) + vmovdqu %xmm5, 176(%rdx) + vmovdqu %xmm6, 224(%rdx) + vmovdqu %xmm7, 240(%rdx) + vmovdqa 192(%r9), %xmm12 + addq $0x100, %rsi + addq $0x100, %rdx + vpaddd L_chacha20_avx1_four(%rip), %xmm12, %xmm12 + subl $0x100, %ecx + vmovdqa %xmm12, 192(%r9) + cmpl $0x100, %ecx + jl L_chacha20_avx1_done128 + vmovdqa (%r9), %xmm0 + vmovdqa 16(%r9), %xmm1 + vmovdqa 32(%r9), %xmm2 + vmovdqa 48(%r9), %xmm3 + vmovdqa 64(%r9), %xmm4 + vmovdqa 80(%r9), %xmm5 + vmovdqa 96(%r9), %xmm6 + vmovdqa 112(%r9), %xmm7 + vmovdqa 128(%r9), %xmm8 + vmovdqa 144(%r9), %xmm9 + vmovdqa 160(%r9), %xmm10 + vmovdqa 176(%r9), %xmm11 + vmovdqa 192(%r9), %xmm12 + vmovdqa 208(%r9), %xmm13 + vmovdqa 224(%r9), %xmm14 + vmovdqa 240(%r9), %xmm15 + jmp L_chacha20_avx1_start128 +L_chacha20_avx1_done128: + shl $2, %eax + addl %eax, 48(%rdi) +L_chacha20_avx1_end128: + cmpl $0x40, %ecx + jl L_chacha20_avx1_block_done +L_chacha20_avx1_block_start: + vmovdqu (%rdi), %xmm0 + vmovdqu 16(%rdi), %xmm1 + vmovdqu 32(%rdi), %xmm2 + vmovdqu 48(%rdi), %xmm3 + vmovdqa %xmm0, %xmm5 + vmovdqa %xmm1, %xmm6 + vmovdqa %xmm2, %xmm7 + vmovdqa %xmm3, %xmm8 + movb $10, %al +L_chacha20_avx1_block_crypt_start: + vpaddd %xmm1, %xmm0, %xmm0 + vpxor %xmm0, %xmm3, %xmm3 + vpshufb L_chacha20_avx1_rotl16(%rip), %xmm3, %xmm3 + vpaddd %xmm3, %xmm2, %xmm2 + vpxor %xmm2, %xmm1, %xmm1 + vpsrld $20, %xmm1, %xmm4 + vpslld $12, %xmm1, %xmm1 + vpxor %xmm4, %xmm1, %xmm1 + vpaddd %xmm1, %xmm0, %xmm0 + vpxor %xmm0, %xmm3, %xmm3 + vpshufb L_chacha20_avx1_rotl8(%rip), %xmm3, %xmm3 + vpaddd %xmm3, %xmm2, %xmm2 + vpxor %xmm2, %xmm1, %xmm1 + vpsrld $25, %xmm1, %xmm4 + vpslld $7, %xmm1, %xmm1 + vpxor %xmm4, %xmm1, %xmm1 + vpshufd $57, %xmm1, %xmm1 + vpshufd $0x4e, %xmm2, %xmm2 + vpshufd $0x93, %xmm3, %xmm3 + vpaddd %xmm1, %xmm0, %xmm0 + vpxor %xmm0, %xmm3, %xmm3 + vpshufb L_chacha20_avx1_rotl16(%rip), %xmm3, %xmm3 + vpaddd %xmm3, %xmm2, %xmm2 + vpxor %xmm2, %xmm1, %xmm1 + vpsrld $20, %xmm1, %xmm4 + vpslld $12, %xmm1, %xmm1 + vpxor %xmm4, %xmm1, %xmm1 + vpaddd %xmm1, %xmm0, %xmm0 + vpxor %xmm0, %xmm3, %xmm3 + vpshufb L_chacha20_avx1_rotl8(%rip), %xmm3, %xmm3 + vpaddd %xmm3, %xmm2, %xmm2 + vpxor %xmm2, %xmm1, %xmm1 + vpsrld $25, %xmm1, %xmm4 + vpslld $7, %xmm1, %xmm1 + vpxor %xmm4, %xmm1, %xmm1 + vpshufd $0x93, %xmm1, %xmm1 + vpshufd $0x4e, %xmm2, %xmm2 + vpshufd $57, %xmm3, %xmm3 + decb %al + jnz L_chacha20_avx1_block_crypt_start + vpaddd %xmm5, %xmm0, %xmm0 + vpaddd %xmm6, %xmm1, %xmm1 + vpaddd %xmm7, %xmm2, %xmm2 + vpaddd %xmm8, %xmm3, %xmm3 + vmovdqu (%rsi), %xmm5 + vmovdqu 16(%rsi), %xmm6 + vmovdqu 32(%rsi), %xmm7 + vmovdqu 48(%rsi), %xmm8 + vpxor %xmm5, %xmm0, %xmm0 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm8, %xmm3, %xmm3 + vmovdqu %xmm0, (%rdx) + vmovdqu %xmm1, 16(%rdx) + vmovdqu %xmm2, 32(%rdx) + vmovdqu %xmm3, 48(%rdx) + addl $0x01, 48(%rdi) + subl $0x40, %ecx + addq $0x40, %rsi + addq $0x40, %rdx + cmpl $0x40, %ecx + jge L_chacha20_avx1_block_start +L_chacha20_avx1_block_done: + cmpl $0x00, %ecx + je L_chacha20_avx1_partial_done + vmovdqu (%rdi), %xmm0 + vmovdqu 16(%rdi), %xmm1 + vmovdqu 32(%rdi), %xmm2 + vmovdqu 48(%rdi), %xmm3 + vmovdqa %xmm0, %xmm5 + vmovdqa %xmm1, %xmm6 + vmovdqa %xmm2, %xmm7 + vmovdqa %xmm3, %xmm8 + movb $10, %al +L_chacha20_avx1_partial_crypt_start: + vpaddd %xmm1, %xmm0, %xmm0 + vpxor %xmm0, %xmm3, %xmm3 + vpshufb L_chacha20_avx1_rotl16(%rip), %xmm3, %xmm3 + vpaddd %xmm3, %xmm2, %xmm2 + vpxor %xmm2, %xmm1, %xmm1 + vpsrld $20, %xmm1, %xmm4 + vpslld $12, %xmm1, %xmm1 + vpxor %xmm4, %xmm1, %xmm1 + vpaddd %xmm1, %xmm0, %xmm0 + vpxor %xmm0, %xmm3, %xmm3 + vpshufb L_chacha20_avx1_rotl8(%rip), %xmm3, %xmm3 + vpaddd %xmm3, %xmm2, %xmm2 + vpxor %xmm2, %xmm1, %xmm1 + vpsrld $25, %xmm1, %xmm4 + vpslld $7, %xmm1, %xmm1 + vpxor %xmm4, %xmm1, %xmm1 + vpshufd $57, %xmm1, %xmm1 + vpshufd $0x4e, %xmm2, %xmm2 + vpshufd $0x93, %xmm3, %xmm3 + vpaddd %xmm1, %xmm0, %xmm0 + vpxor %xmm0, %xmm3, %xmm3 + vpshufb L_chacha20_avx1_rotl16(%rip), %xmm3, %xmm3 + vpaddd %xmm3, %xmm2, %xmm2 + vpxor %xmm2, %xmm1, %xmm1 + vpsrld $20, %xmm1, %xmm4 + vpslld $12, %xmm1, %xmm1 + vpxor %xmm4, %xmm1, %xmm1 + vpaddd %xmm1, %xmm0, %xmm0 + vpxor %xmm0, %xmm3, %xmm3 + vpshufb L_chacha20_avx1_rotl8(%rip), %xmm3, %xmm3 + vpaddd %xmm3, %xmm2, %xmm2 + vpxor %xmm2, %xmm1, %xmm1 + vpsrld $25, %xmm1, %xmm4 + vpslld $7, %xmm1, %xmm1 + vpxor %xmm4, %xmm1, %xmm1 + vpshufd $0x93, %xmm1, %xmm1 + vpshufd $0x4e, %xmm2, %xmm2 + vpshufd $57, %xmm3, %xmm3 + decb %al + jnz L_chacha20_avx1_partial_crypt_start + vpaddd %xmm5, %xmm0, %xmm0 + vpaddd %xmm6, %xmm1, %xmm1 + vpaddd %xmm7, %xmm2, %xmm2 + vpaddd %xmm8, %xmm3, %xmm3 + vmovdqu %xmm0, (%r10) + vmovdqu %xmm1, 16(%r10) + vmovdqu %xmm2, 32(%r10) + vmovdqu %xmm3, 48(%r10) + addl $0x01, 48(%rdi) + movl %ecx, %r8d + xorq %r11, %r11 + andl $7, %r8d + jz L_chacha20_avx1_partial_start64 +L_chacha20_avx1_partial_start8: + movzbl (%r10,%r11,1), %eax + xorb (%rsi,%r11,1), %al + movb %al, (%rdx,%r11,1) + incl %r11d + cmpl %r8d, %r11d + jne L_chacha20_avx1_partial_start8 + je L_chacha20_avx1_partial_end64 +L_chacha20_avx1_partial_start64: + movq (%r10,%r11,1), %rax + xorq (%rsi,%r11,1), %rax + movq %rax, (%rdx,%r11,1) + addl $8, %r11d +L_chacha20_avx1_partial_end64: + cmpl %ecx, %r11d + jne L_chacha20_avx1_partial_start64 +L_chacha20_avx1_partial_done: + addq $0x190, %rsp + repz retq +#ifndef __APPLE__ +.size chacha_encrypt_avx1,.-chacha_encrypt_avx1 +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX1 */ +#ifdef HAVE_INTEL_AVX2 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 32 +#else +.p2align 5 +#endif /* __APPLE__ */ +L_chacha20_avx2_rotl8: +.quad 0x605040702010003, 0xe0d0c0f0a09080b +.quad 0x605040702010003, 0xe0d0c0f0a09080b +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 32 +#else +.p2align 5 +#endif /* __APPLE__ */ +L_chacha20_avx2_rotl16: +.quad 0x504070601000302, 0xd0c0f0e09080b0a +.quad 0x504070601000302, 0xd0c0f0e09080b0a +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 32 +#else +.p2align 5 +#endif /* __APPLE__ */ +L_chacha20_avx2_add: +.quad 0x100000000, 0x300000002 +.quad 0x500000004, 0x700000006 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 32 +#else +.p2align 5 +#endif /* __APPLE__ */ +L_chacha20_avx2_eight: +.quad 0x800000008, 0x800000008 +.quad 0x800000008, 0x800000008 +#ifndef __APPLE__ +.text +.globl chacha_encrypt_avx2 +.type chacha_encrypt_avx2,@function +.align 4 +chacha_encrypt_avx2: +#else +.section __TEXT,__text +.globl _chacha_encrypt_avx2 +.p2align 2 +_chacha_encrypt_avx2: +#endif /* __APPLE__ */ + subq $0x310, %rsp + movq %rsp, %r9 + leaq 512(%rsp), %r10 + andq $-32, %r9 + andq $-32, %r10 + movl %ecx, %eax + shrl $9, %eax + jz L_chacha20_avx2_end256 + vpbroadcastd (%rdi), %ymm0 + vpbroadcastd 4(%rdi), %ymm1 + vpbroadcastd 8(%rdi), %ymm2 + vpbroadcastd 12(%rdi), %ymm3 + vpbroadcastd 16(%rdi), %ymm4 + vpbroadcastd 20(%rdi), %ymm5 + vpbroadcastd 24(%rdi), %ymm6 + vpbroadcastd 28(%rdi), %ymm7 + vpbroadcastd 32(%rdi), %ymm8 + vpbroadcastd 36(%rdi), %ymm9 + vpbroadcastd 40(%rdi), %ymm10 + vpbroadcastd 44(%rdi), %ymm11 + vpbroadcastd 48(%rdi), %ymm12 + vpbroadcastd 52(%rdi), %ymm13 + vpbroadcastd 56(%rdi), %ymm14 + vpbroadcastd 60(%rdi), %ymm15 + vpaddd L_chacha20_avx2_add(%rip), %ymm12, %ymm12 + vmovdqa %ymm0, (%r9) + vmovdqa %ymm1, 32(%r9) + vmovdqa %ymm2, 64(%r9) + vmovdqa %ymm3, 96(%r9) + vmovdqa %ymm4, 128(%r9) + vmovdqa %ymm5, 160(%r9) + vmovdqa %ymm6, 192(%r9) + vmovdqa %ymm7, 224(%r9) + vmovdqa %ymm8, 256(%r9) + vmovdqa %ymm9, 288(%r9) + vmovdqa %ymm10, 320(%r9) + vmovdqa %ymm11, 352(%r9) + vmovdqa %ymm12, 384(%r9) + vmovdqa %ymm13, 416(%r9) + vmovdqa %ymm14, 448(%r9) + vmovdqa %ymm15, 480(%r9) +L_chacha20_avx2_start256: + movb $10, %r8b + vmovdqa %ymm11, 96(%r10) +L_chacha20_avx2_loop256: + vpaddd %ymm4, %ymm0, %ymm0 + vpxor %ymm0, %ymm12, %ymm12 + vmovdqa 96(%r10), %ymm11 + vpshufb L_chacha20_avx2_rotl16(%rip), %ymm12, %ymm12 + vpaddd %ymm12, %ymm8, %ymm8 + vpxor %ymm8, %ymm4, %ymm4 + vpaddd %ymm5, %ymm1, %ymm1 + vpxor %ymm1, %ymm13, %ymm13 + vpshufb L_chacha20_avx2_rotl16(%rip), %ymm13, %ymm13 + vpaddd %ymm13, %ymm9, %ymm9 + vpxor %ymm9, %ymm5, %ymm5 + vpaddd %ymm6, %ymm2, %ymm2 + vpxor %ymm2, %ymm14, %ymm14 + vpshufb L_chacha20_avx2_rotl16(%rip), %ymm14, %ymm14 + vpaddd %ymm14, %ymm10, %ymm10 + vpxor %ymm10, %ymm6, %ymm6 + vpaddd %ymm7, %ymm3, %ymm3 + vpxor %ymm3, %ymm15, %ymm15 + vpshufb L_chacha20_avx2_rotl16(%rip), %ymm15, %ymm15 + vpaddd %ymm15, %ymm11, %ymm11 + vpxor %ymm11, %ymm7, %ymm7 + vmovdqa %ymm11, 96(%r10) + vpsrld $20, %ymm4, %ymm11 + vpslld $12, %ymm4, %ymm4 + vpxor %ymm11, %ymm4, %ymm4 + vpsrld $20, %ymm5, %ymm11 + vpslld $12, %ymm5, %ymm5 + vpxor %ymm11, %ymm5, %ymm5 + vpsrld $20, %ymm6, %ymm11 + vpslld $12, %ymm6, %ymm6 + vpxor %ymm11, %ymm6, %ymm6 + vpsrld $20, %ymm7, %ymm11 + vpslld $12, %ymm7, %ymm7 + vpxor %ymm11, %ymm7, %ymm7 + vpaddd %ymm4, %ymm0, %ymm0 + vpxor %ymm0, %ymm12, %ymm12 + vmovdqa 96(%r10), %ymm11 + vpshufb L_chacha20_avx2_rotl8(%rip), %ymm12, %ymm12 + vpaddd %ymm12, %ymm8, %ymm8 + vpxor %ymm8, %ymm4, %ymm4 + vpaddd %ymm5, %ymm1, %ymm1 + vpxor %ymm1, %ymm13, %ymm13 + vpshufb L_chacha20_avx2_rotl8(%rip), %ymm13, %ymm13 + vpaddd %ymm13, %ymm9, %ymm9 + vpxor %ymm9, %ymm5, %ymm5 + vpaddd %ymm6, %ymm2, %ymm2 + vpxor %ymm2, %ymm14, %ymm14 + vpshufb L_chacha20_avx2_rotl8(%rip), %ymm14, %ymm14 + vpaddd %ymm14, %ymm10, %ymm10 + vpxor %ymm10, %ymm6, %ymm6 + vpaddd %ymm7, %ymm3, %ymm3 + vpxor %ymm3, %ymm15, %ymm15 + vpshufb L_chacha20_avx2_rotl8(%rip), %ymm15, %ymm15 + vpaddd %ymm15, %ymm11, %ymm11 + vpxor %ymm11, %ymm7, %ymm7 + vmovdqa %ymm11, 96(%r10) + vpsrld $25, %ymm4, %ymm11 + vpslld $7, %ymm4, %ymm4 + vpxor %ymm11, %ymm4, %ymm4 + vpsrld $25, %ymm5, %ymm11 + vpslld $7, %ymm5, %ymm5 + vpxor %ymm11, %ymm5, %ymm5 + vpsrld $25, %ymm6, %ymm11 + vpslld $7, %ymm6, %ymm6 + vpxor %ymm11, %ymm6, %ymm6 + vpsrld $25, %ymm7, %ymm11 + vpslld $7, %ymm7, %ymm7 + vpxor %ymm11, %ymm7, %ymm7 + vpaddd %ymm5, %ymm0, %ymm0 + vpxor %ymm0, %ymm15, %ymm15 + vmovdqa 96(%r10), %ymm11 + vpshufb L_chacha20_avx2_rotl16(%rip), %ymm15, %ymm15 + vpaddd %ymm15, %ymm10, %ymm10 + vpxor %ymm10, %ymm5, %ymm5 + vpaddd %ymm6, %ymm1, %ymm1 + vpxor %ymm1, %ymm12, %ymm12 + vpshufb L_chacha20_avx2_rotl16(%rip), %ymm12, %ymm12 + vpaddd %ymm12, %ymm11, %ymm11 + vpxor %ymm11, %ymm6, %ymm6 + vpaddd %ymm7, %ymm2, %ymm2 + vpxor %ymm2, %ymm13, %ymm13 + vpshufb L_chacha20_avx2_rotl16(%rip), %ymm13, %ymm13 + vpaddd %ymm13, %ymm8, %ymm8 + vpxor %ymm8, %ymm7, %ymm7 + vpaddd %ymm4, %ymm3, %ymm3 + vpxor %ymm3, %ymm14, %ymm14 + vpshufb L_chacha20_avx2_rotl16(%rip), %ymm14, %ymm14 + vpaddd %ymm14, %ymm9, %ymm9 + vpxor %ymm9, %ymm4, %ymm4 + vmovdqa %ymm11, 96(%r10) + vpsrld $20, %ymm5, %ymm11 + vpslld $12, %ymm5, %ymm5 + vpxor %ymm11, %ymm5, %ymm5 + vpsrld $20, %ymm6, %ymm11 + vpslld $12, %ymm6, %ymm6 + vpxor %ymm11, %ymm6, %ymm6 + vpsrld $20, %ymm7, %ymm11 + vpslld $12, %ymm7, %ymm7 + vpxor %ymm11, %ymm7, %ymm7 + vpsrld $20, %ymm4, %ymm11 + vpslld $12, %ymm4, %ymm4 + vpxor %ymm11, %ymm4, %ymm4 + vpaddd %ymm5, %ymm0, %ymm0 + vpxor %ymm0, %ymm15, %ymm15 + vmovdqa 96(%r10), %ymm11 + vpshufb L_chacha20_avx2_rotl8(%rip), %ymm15, %ymm15 + vpaddd %ymm15, %ymm10, %ymm10 + vpxor %ymm10, %ymm5, %ymm5 + vpaddd %ymm6, %ymm1, %ymm1 + vpxor %ymm1, %ymm12, %ymm12 + vpshufb L_chacha20_avx2_rotl8(%rip), %ymm12, %ymm12 + vpaddd %ymm12, %ymm11, %ymm11 + vpxor %ymm11, %ymm6, %ymm6 + vpaddd %ymm7, %ymm2, %ymm2 + vpxor %ymm2, %ymm13, %ymm13 + vpshufb L_chacha20_avx2_rotl8(%rip), %ymm13, %ymm13 + vpaddd %ymm13, %ymm8, %ymm8 + vpxor %ymm8, %ymm7, %ymm7 + vpaddd %ymm4, %ymm3, %ymm3 + vpxor %ymm3, %ymm14, %ymm14 + vpshufb L_chacha20_avx2_rotl8(%rip), %ymm14, %ymm14 + vpaddd %ymm14, %ymm9, %ymm9 + vpxor %ymm9, %ymm4, %ymm4 + vmovdqa %ymm11, 96(%r10) + vpsrld $25, %ymm5, %ymm11 + vpslld $7, %ymm5, %ymm5 + vpxor %ymm11, %ymm5, %ymm5 + vpsrld $25, %ymm6, %ymm11 + vpslld $7, %ymm6, %ymm6 + vpxor %ymm11, %ymm6, %ymm6 + vpsrld $25, %ymm7, %ymm11 + vpslld $7, %ymm7, %ymm7 + vpxor %ymm11, %ymm7, %ymm7 + vpsrld $25, %ymm4, %ymm11 + vpslld $7, %ymm4, %ymm4 + vpxor %ymm11, %ymm4, %ymm4 + decb %r8b + jnz L_chacha20_avx2_loop256 + vmovdqa 96(%r10), %ymm11 + vpaddd (%r9), %ymm0, %ymm0 + vpaddd 32(%r9), %ymm1, %ymm1 + vpaddd 64(%r9), %ymm2, %ymm2 + vpaddd 96(%r9), %ymm3, %ymm3 + vpaddd 128(%r9), %ymm4, %ymm4 + vpaddd 160(%r9), %ymm5, %ymm5 + vpaddd 192(%r9), %ymm6, %ymm6 + vpaddd 224(%r9), %ymm7, %ymm7 + vpaddd 256(%r9), %ymm8, %ymm8 + vpaddd 288(%r9), %ymm9, %ymm9 + vpaddd 320(%r9), %ymm10, %ymm10 + vpaddd 352(%r9), %ymm11, %ymm11 + vpaddd 384(%r9), %ymm12, %ymm12 + vpaddd 416(%r9), %ymm13, %ymm13 + vpaddd 448(%r9), %ymm14, %ymm14 + vpaddd 480(%r9), %ymm15, %ymm15 + vmovdqa %ymm8, (%r10) + vmovdqa %ymm9, 32(%r10) + vmovdqa %ymm10, 64(%r10) + vmovdqa %ymm11, 96(%r10) + vmovdqa %ymm12, 128(%r10) + vmovdqa %ymm13, 160(%r10) + vmovdqa %ymm14, 192(%r10) + vmovdqa %ymm15, 224(%r10) + vpunpckldq %ymm1, %ymm0, %ymm8 + vpunpckldq %ymm3, %ymm2, %ymm9 + vpunpckhdq %ymm1, %ymm0, %ymm12 + vpunpckhdq %ymm3, %ymm2, %ymm13 + vpunpckldq %ymm5, %ymm4, %ymm10 + vpunpckldq %ymm7, %ymm6, %ymm11 + vpunpckhdq %ymm5, %ymm4, %ymm14 + vpunpckhdq %ymm7, %ymm6, %ymm15 + vpunpcklqdq %ymm9, %ymm8, %ymm0 + vpunpcklqdq %ymm11, %ymm10, %ymm1 + vpunpckhqdq %ymm9, %ymm8, %ymm2 + vpunpckhqdq %ymm11, %ymm10, %ymm3 + vpunpcklqdq %ymm13, %ymm12, %ymm4 + vpunpcklqdq %ymm15, %ymm14, %ymm5 + vpunpckhqdq %ymm13, %ymm12, %ymm6 + vpunpckhqdq %ymm15, %ymm14, %ymm7 + vperm2i128 $32, %ymm1, %ymm0, %ymm8 + vperm2i128 $32, %ymm3, %ymm2, %ymm9 + vperm2i128 $49, %ymm1, %ymm0, %ymm12 + vperm2i128 $49, %ymm3, %ymm2, %ymm13 + vperm2i128 $32, %ymm5, %ymm4, %ymm10 + vperm2i128 $32, %ymm7, %ymm6, %ymm11 + vperm2i128 $49, %ymm5, %ymm4, %ymm14 + vperm2i128 $49, %ymm7, %ymm6, %ymm15 + vmovdqu (%rsi), %ymm0 + vmovdqu 64(%rsi), %ymm1 + vmovdqu 128(%rsi), %ymm2 + vmovdqu 192(%rsi), %ymm3 + vmovdqu 256(%rsi), %ymm4 + vmovdqu 320(%rsi), %ymm5 + vmovdqu 384(%rsi), %ymm6 + vmovdqu 448(%rsi), %ymm7 + vpxor %ymm0, %ymm8, %ymm8 + vpxor %ymm1, %ymm9, %ymm9 + vpxor %ymm2, %ymm10, %ymm10 + vpxor %ymm3, %ymm11, %ymm11 + vpxor %ymm4, %ymm12, %ymm12 + vpxor %ymm5, %ymm13, %ymm13 + vpxor %ymm6, %ymm14, %ymm14 + vpxor %ymm7, %ymm15, %ymm15 + vmovdqu %ymm8, (%rdx) + vmovdqu %ymm9, 64(%rdx) + vmovdqu %ymm10, 128(%rdx) + vmovdqu %ymm11, 192(%rdx) + vmovdqu %ymm12, 256(%rdx) + vmovdqu %ymm13, 320(%rdx) + vmovdqu %ymm14, 384(%rdx) + vmovdqu %ymm15, 448(%rdx) + vmovdqa (%r10), %ymm0 + vmovdqa 32(%r10), %ymm1 + vmovdqa 64(%r10), %ymm2 + vmovdqa 96(%r10), %ymm3 + vmovdqa 128(%r10), %ymm4 + vmovdqa 160(%r10), %ymm5 + vmovdqa 192(%r10), %ymm6 + vmovdqa 224(%r10), %ymm7 + vpunpckldq %ymm1, %ymm0, %ymm8 + vpunpckldq %ymm3, %ymm2, %ymm9 + vpunpckhdq %ymm1, %ymm0, %ymm12 + vpunpckhdq %ymm3, %ymm2, %ymm13 + vpunpckldq %ymm5, %ymm4, %ymm10 + vpunpckldq %ymm7, %ymm6, %ymm11 + vpunpckhdq %ymm5, %ymm4, %ymm14 + vpunpckhdq %ymm7, %ymm6, %ymm15 + vpunpcklqdq %ymm9, %ymm8, %ymm0 + vpunpcklqdq %ymm11, %ymm10, %ymm1 + vpunpckhqdq %ymm9, %ymm8, %ymm2 + vpunpckhqdq %ymm11, %ymm10, %ymm3 + vpunpcklqdq %ymm13, %ymm12, %ymm4 + vpunpcklqdq %ymm15, %ymm14, %ymm5 + vpunpckhqdq %ymm13, %ymm12, %ymm6 + vpunpckhqdq %ymm15, %ymm14, %ymm7 + vperm2i128 $32, %ymm1, %ymm0, %ymm8 + vperm2i128 $32, %ymm3, %ymm2, %ymm9 + vperm2i128 $49, %ymm1, %ymm0, %ymm12 + vperm2i128 $49, %ymm3, %ymm2, %ymm13 + vperm2i128 $32, %ymm5, %ymm4, %ymm10 + vperm2i128 $32, %ymm7, %ymm6, %ymm11 + vperm2i128 $49, %ymm5, %ymm4, %ymm14 + vperm2i128 $49, %ymm7, %ymm6, %ymm15 + vmovdqu 32(%rsi), %ymm0 + vmovdqu 96(%rsi), %ymm1 + vmovdqu 160(%rsi), %ymm2 + vmovdqu 224(%rsi), %ymm3 + vmovdqu 288(%rsi), %ymm4 + vmovdqu 352(%rsi), %ymm5 + vmovdqu 416(%rsi), %ymm6 + vmovdqu 480(%rsi), %ymm7 + vpxor %ymm0, %ymm8, %ymm8 + vpxor %ymm1, %ymm9, %ymm9 + vpxor %ymm2, %ymm10, %ymm10 + vpxor %ymm3, %ymm11, %ymm11 + vpxor %ymm4, %ymm12, %ymm12 + vpxor %ymm5, %ymm13, %ymm13 + vpxor %ymm6, %ymm14, %ymm14 + vpxor %ymm7, %ymm15, %ymm15 + vmovdqu %ymm8, 32(%rdx) + vmovdqu %ymm9, 96(%rdx) + vmovdqu %ymm10, 160(%rdx) + vmovdqu %ymm11, 224(%rdx) + vmovdqu %ymm12, 288(%rdx) + vmovdqu %ymm13, 352(%rdx) + vmovdqu %ymm14, 416(%rdx) + vmovdqu %ymm15, 480(%rdx) + vmovdqa 384(%r9), %ymm12 + addq $0x200, %rsi + addq $0x200, %rdx + vpaddd L_chacha20_avx2_eight(%rip), %ymm12, %ymm12 + subl $0x200, %ecx + vmovdqa %ymm12, 384(%r9) + cmpl $0x200, %ecx + jl L_chacha20_avx2_done256 + vmovdqa (%r9), %ymm0 + vmovdqa 32(%r9), %ymm1 + vmovdqa 64(%r9), %ymm2 + vmovdqa 96(%r9), %ymm3 + vmovdqa 128(%r9), %ymm4 + vmovdqa 160(%r9), %ymm5 + vmovdqa 192(%r9), %ymm6 + vmovdqa 224(%r9), %ymm7 + vmovdqa 256(%r9), %ymm8 + vmovdqa 288(%r9), %ymm9 + vmovdqa 320(%r9), %ymm10 + vmovdqa 352(%r9), %ymm11 + vmovdqa 384(%r9), %ymm12 + vmovdqa 416(%r9), %ymm13 + vmovdqa 448(%r9), %ymm14 + vmovdqa 480(%r9), %ymm15 + jmp L_chacha20_avx2_start256 +L_chacha20_avx2_done256: + shl $3, %eax + addl %eax, 48(%rdi) +L_chacha20_avx2_end256: +#ifndef __APPLE__ + callq chacha_encrypt_avx1@plt +#else + callq _chacha_encrypt_avx1 +#endif /* __APPLE__ */ + addq $0x310, %rsp + repz retq +#ifndef __APPLE__ +.size chacha_encrypt_avx2,.-chacha_encrypt_avx2 +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX2 */ diff --git a/client/wolfssl/wolfcrypt/src/cmac.c b/client/wolfssl/wolfcrypt/src/cmac.c new file mode 100644 index 0000000..9d30bb5 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/cmac.c @@ -0,0 +1,215 @@ +/* cmac.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#if defined(WOLFSSL_CMAC) && !defined(NO_AES) && defined(WOLFSSL_AES_DIRECT) + +#if defined(HAVE_FIPS) && \ + defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2) + + /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */ + #define FIPS_NO_WRAPPERS + + #ifdef USE_WINDOWS_API + #pragma code_seg(".fipsA$n") + #pragma const_seg(".fipsB$n") + #endif +#endif + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#include +#include +#include + + +static void ShiftAndXorRb(byte* out, byte* in) +{ + int i, j, xorRb; + int mask = 0, last = 0; + byte Rb = 0x87; + + xorRb = (in[0] & 0x80) != 0; + + for (i = 1, j = AES_BLOCK_SIZE - 1; i <= AES_BLOCK_SIZE; i++, j--) { + last = (in[j] & 0x80) ? 1 : 0; + out[j] = (byte)((in[j] << 1) | mask); + mask = last; + if (xorRb) { + out[j] ^= Rb; + Rb = 0; + } + } +} + + +int wc_InitCmac(Cmac* cmac, const byte* key, word32 keySz, + int type, void* unused) +{ + int ret; + + (void)unused; + + if (cmac == NULL || key == NULL || keySz == 0 || type != WC_CMAC_AES) + return BAD_FUNC_ARG; + + XMEMSET(cmac, 0, sizeof(Cmac)); + ret = wc_AesSetKey(&cmac->aes, key, keySz, NULL, AES_ENCRYPTION); + if (ret == 0) { + byte l[AES_BLOCK_SIZE]; + + XMEMSET(l, 0, AES_BLOCK_SIZE); + wc_AesEncryptDirect(&cmac->aes, l, l); + ShiftAndXorRb(cmac->k1, l); + ShiftAndXorRb(cmac->k2, cmac->k1); + ForceZero(l, AES_BLOCK_SIZE); + } + return ret; +} + + +int wc_CmacUpdate(Cmac* cmac, const byte* in, word32 inSz) +{ + if ((cmac == NULL) || (in == NULL && inSz != 0)) + return BAD_FUNC_ARG; + + while (inSz != 0) { + word32 add = min(inSz, AES_BLOCK_SIZE - cmac->bufferSz); + XMEMCPY(&cmac->buffer[cmac->bufferSz], in, add); + + cmac->bufferSz += add; + in += add; + inSz -= add; + + if (cmac->bufferSz == AES_BLOCK_SIZE && inSz != 0) { + if (cmac->totalSz != 0) + xorbuf(cmac->buffer, cmac->digest, AES_BLOCK_SIZE); + wc_AesEncryptDirect(&cmac->aes, + cmac->digest, + cmac->buffer); + cmac->totalSz += AES_BLOCK_SIZE; + cmac->bufferSz = 0; + } + } + + return 0; +} + + +int wc_CmacFinal(Cmac* cmac, byte* out, word32* outSz) +{ + const byte* subKey; + + if (cmac == NULL || out == NULL || outSz == NULL) + return BAD_FUNC_ARG; + + if (*outSz < WC_CMAC_TAG_MIN_SZ || *outSz > WC_CMAC_TAG_MAX_SZ) + return BUFFER_E; + + if (cmac->bufferSz == AES_BLOCK_SIZE) { + subKey = cmac->k1; + } + else { + word32 remainder = AES_BLOCK_SIZE - cmac->bufferSz; + + if (remainder == 0) + remainder = AES_BLOCK_SIZE; + + if (remainder > 1) + XMEMSET(cmac->buffer + AES_BLOCK_SIZE - remainder, 0, remainder); + cmac->buffer[AES_BLOCK_SIZE - remainder] = 0x80; + subKey = cmac->k2; + } + xorbuf(cmac->buffer, cmac->digest, AES_BLOCK_SIZE); + xorbuf(cmac->buffer, subKey, AES_BLOCK_SIZE); + wc_AesEncryptDirect(&cmac->aes, cmac->digest, cmac->buffer); + + XMEMCPY(out, cmac->digest, *outSz); + + ForceZero(cmac, sizeof(Cmac)); + + return 0; +} + + +int wc_AesCmacGenerate(byte* out, word32* outSz, + const byte* in, word32 inSz, + const byte* key, word32 keySz) +{ + Cmac cmac; + int ret; + + if (out == NULL || (in == NULL && inSz > 0) || key == NULL || keySz == 0) + return BAD_FUNC_ARG; + + ret = wc_InitCmac(&cmac, key, keySz, WC_CMAC_AES, NULL); + if (ret != 0) + return ret; + + ret = wc_CmacUpdate(&cmac, in, inSz); + if (ret != 0) + return ret; + + ret = wc_CmacFinal(&cmac, out, outSz); + if (ret != 0) + return ret; + + return 0; +} + + +int wc_AesCmacVerify(const byte* check, word32 checkSz, + const byte* in, word32 inSz, + const byte* key, word32 keySz) +{ + byte a[AES_BLOCK_SIZE]; + word32 aSz = sizeof(a); + int result; + int compareRet; + + if (check == NULL || checkSz == 0 || (in == NULL && inSz != 0) || + key == NULL || keySz == 0) + + return BAD_FUNC_ARG; + + XMEMSET(a, 0, aSz); + result = wc_AesCmacGenerate(a, &aSz, in, inSz, key, keySz); + compareRet = ConstantCompare(check, a, min(checkSz, aSz)); + + if (result == 0) + result = compareRet ? 1 : 0; + + return result; +} + + +#endif /* WOLFSSL_CMAC && NO_AES && WOLFSSL_AES_DIRECT */ diff --git a/client/wolfssl/wolfcrypt/src/coding.c b/client/wolfssl/wolfcrypt/src/coding.c new file mode 100644 index 0000000..f6c814e --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/coding.c @@ -0,0 +1,511 @@ +/* coding.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifndef NO_CODING + +#include +#include +#include + + +enum { + BAD = 0xFF, /* invalid encoding */ + PAD = '=', + PEM_LINE_SZ = 64, + BASE64_MIN = 0x2B, + BASE16_MIN = 0x30, +}; + + +#ifdef WOLFSSL_BASE64_DECODE + +static +const byte base64Decode[] = { 62, BAD, BAD, BAD, 63, /* + starts at 0x2B */ + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, + BAD, BAD, BAD, BAD, BAD, BAD, BAD, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, + BAD, BAD, BAD, BAD, BAD, BAD, + 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, + 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, + 46, 47, 48, 49, 50, 51 + }; + +static WC_INLINE int Base64_SkipNewline(const byte* in, word32 *inLen, word32 *outJ) +{ + word32 len = *inLen; + word32 j = *outJ; + if (len && (in[j] == ' ' || in[j] == '\r' || in[j] == '\n')) { + byte endLine = in[j++]; + len--; + while (len && endLine == ' ') { /* allow trailing whitespace */ + endLine = in[j++]; + len--; + } + if (endLine == '\r') { + if (len) { + endLine = in[j++]; + len--; + } + } + if (endLine != '\n') { + WOLFSSL_MSG("Bad end of line in Base64 Decode"); + return ASN_INPUT_E; + } + } + if (!len) { + return BUFFER_E; + } + *inLen = len; + *outJ = j; + return 0; +} + +int Base64_Decode(const byte* in, word32 inLen, byte* out, word32* outLen) +{ + word32 i = 0; + word32 j = 0; + word32 plainSz = inLen - ((inLen + (PEM_LINE_SZ - 1)) / PEM_LINE_SZ ); + int ret; + const byte maxIdx = (byte)sizeof(base64Decode) + BASE64_MIN - 1; + + plainSz = (plainSz * 3 + 3) / 4; + if (plainSz > *outLen) return BAD_FUNC_ARG; + + while (inLen > 3) { + int pad3 = 0; + int pad4 = 0; + + byte b1, b2, b3; + byte e1, e2, e3, e4; + if ((ret = Base64_SkipNewline(in, &inLen, &j)) != 0) { + if (ret == BUFFER_E) { + /* Running out of buffer here is not an error */ + break; + } + return ret; + } + e1 = in[j++]; + if (e1 == '\0') { + break; + } + inLen--; + if ((ret = Base64_SkipNewline(in, &inLen, &j)) != 0) { + return ret; + } + e2 = in[j++]; + inLen--; + if ((ret = Base64_SkipNewline(in, &inLen, &j)) != 0) { + return ret; + } + e3 = in[j++]; + inLen--; + if ((ret = Base64_SkipNewline(in, &inLen, &j)) != 0) { + return ret; + } + e4 = in[j++]; + inLen--; + + if (e1 == 0) /* end file 0's */ + break; + if (e3 == PAD) + pad3 = 1; + if (e4 == PAD) + pad4 = 1; + + if (e1 < BASE64_MIN || e2 < BASE64_MIN || e3 < BASE64_MIN || e4 < BASE64_MIN) { + WOLFSSL_MSG("Bad Base64 Decode data, too small"); + return ASN_INPUT_E; + } + + if (e1 > maxIdx || e2 > maxIdx || e3 > maxIdx || e4 > maxIdx) { + WOLFSSL_MSG("Bad Base64 Decode data, too big"); + return ASN_INPUT_E; + } + + if (i + 1 + !pad3 + !pad4 > *outLen) { + WOLFSSL_MSG("Bad Base64 Decode out buffer, too small"); + return BAD_FUNC_ARG; + } + + e1 = base64Decode[e1 - BASE64_MIN]; + e2 = base64Decode[e2 - BASE64_MIN]; + e3 = (e3 == PAD) ? 0 : base64Decode[e3 - BASE64_MIN]; + e4 = (e4 == PAD) ? 0 : base64Decode[e4 - BASE64_MIN]; + + b1 = (byte)((e1 << 2) | (e2 >> 4)); + b2 = (byte)(((e2 & 0xF) << 4) | (e3 >> 2)); + b3 = (byte)(((e3 & 0x3) << 6) | e4); + + out[i++] = b1; + if (!pad3) + out[i++] = b2; + if (!pad4) + out[i++] = b3; + else + break; + } +/* If the output buffer has a room for an extra byte, add a null terminator */ + if (out && *outLen > i) + out[i]= '\0'; + + *outLen = i; + + return 0; +} + +#endif /* WOLFSSL_BASE64_DECODE */ + +#if defined(WOLFSSL_BASE64_ENCODE) + +static +const byte base64Encode[] = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', + 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', + 'U', 'V', 'W', 'X', 'Y', 'Z', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', + 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', + 'u', 'v', 'w', 'x', 'y', 'z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + '+', '/' + }; + + +/* make sure *i (idx) won't exceed max, store and possibly escape to out, + * raw means use e w/o decode, 0 on success */ +static int CEscape(int escaped, byte e, byte* out, word32* i, word32 max, + int raw, int getSzOnly) +{ + int doEscape = 0; + word32 needed = 1; + word32 idx = *i; + + byte basic; + byte plus = 0; + byte equals = 0; + byte newline = 0; + + if (raw) + basic = e; + else + basic = base64Encode[e]; + + /* check whether to escape. Only escape for EncodeEsc */ + if (escaped == WC_ESC_NL_ENC) { + switch ((char)basic) { + case '+' : + plus = 1; + doEscape = 1; + needed += 2; + break; + case '=' : + equals = 1; + doEscape = 1; + needed += 2; + break; + case '\n' : + newline = 1; + doEscape = 1; + needed += 2; + break; + default: + /* do nothing */ + break; + } + } + + /* check size */ + if ( (idx+needed) > max && !getSzOnly) { + WOLFSSL_MSG("Escape buffer max too small"); + return BUFFER_E; + } + + /* store it */ + if (doEscape == 0) { + if(getSzOnly) + idx++; + else + out[idx++] = basic; + } + else { + if(getSzOnly) + idx+=3; + else { + out[idx++] = '%'; /* start escape */ + + if (plus) { + out[idx++] = '2'; + out[idx++] = 'B'; + } + else if (equals) { + out[idx++] = '3'; + out[idx++] = 'D'; + } + else if (newline) { + out[idx++] = '0'; + out[idx++] = 'A'; + } + } + } + *i = idx; + + return 0; +} + + +/* internal worker, handles both escaped and normal line endings. + If out buffer is NULL, will return sz needed in outLen */ +static int DoBase64_Encode(const byte* in, word32 inLen, byte* out, + word32* outLen, int escaped) +{ + int ret = 0; + word32 i = 0, + j = 0, + n = 0; /* new line counter */ + + int getSzOnly = (out == NULL); + + word32 outSz = (inLen + 3 - 1) / 3 * 4; + word32 addSz = (outSz + PEM_LINE_SZ - 1) / PEM_LINE_SZ; /* new lines */ + + if (escaped == WC_ESC_NL_ENC) + addSz *= 3; /* instead of just \n, we're doing %0A triplet */ + else if (escaped == WC_NO_NL_ENC) + addSz = 0; /* encode without \n */ + + outSz += addSz; + + /* if escaped we can't predetermine size for one pass encoding, but + * make sure we have enough if no escapes are in input + * Also need to ensure outLen valid before dereference */ + if (!outLen || (outSz > *outLen && !getSzOnly)) return BAD_FUNC_ARG; + + while (inLen > 2) { + byte b1 = in[j++]; + byte b2 = in[j++]; + byte b3 = in[j++]; + + /* encoded idx */ + byte e1 = b1 >> 2; + byte e2 = (byte)(((b1 & 0x3) << 4) | (b2 >> 4)); + byte e3 = (byte)(((b2 & 0xF) << 2) | (b3 >> 6)); + byte e4 = b3 & 0x3F; + + /* store */ + ret = CEscape(escaped, e1, out, &i, *outLen, 0, getSzOnly); + if (ret != 0) break; + ret = CEscape(escaped, e2, out, &i, *outLen, 0, getSzOnly); + if (ret != 0) break; + ret = CEscape(escaped, e3, out, &i, *outLen, 0, getSzOnly); + if (ret != 0) break; + ret = CEscape(escaped, e4, out, &i, *outLen, 0, getSzOnly); + if (ret != 0) break; + + inLen -= 3; + + /* Insert newline after PEM_LINE_SZ, unless no \n requested */ + if (escaped != WC_NO_NL_ENC && (++n % (PEM_LINE_SZ/4)) == 0 && inLen) { + ret = CEscape(escaped, '\n', out, &i, *outLen, 1, getSzOnly); + if (ret != 0) break; + } + } + + /* last integral */ + if (inLen && ret == 0) { + int twoBytes = (inLen == 2); + + byte b1 = in[j++]; + byte b2 = (twoBytes) ? in[j++] : 0; + + byte e1 = b1 >> 2; + byte e2 = (byte)(((b1 & 0x3) << 4) | (b2 >> 4)); + byte e3 = (byte)((b2 & 0xF) << 2); + + ret = CEscape(escaped, e1, out, &i, *outLen, 0, getSzOnly); + if (ret == 0) + ret = CEscape(escaped, e2, out, &i, *outLen, 0, getSzOnly); + if (ret == 0) { + /* third */ + if (twoBytes) + ret = CEscape(escaped, e3, out, &i, *outLen, 0, getSzOnly); + else + ret = CEscape(escaped, '=', out, &i, *outLen, 1, getSzOnly); + } + /* fourth always pad */ + if (ret == 0) + ret = CEscape(escaped, '=', out, &i, *outLen, 1, getSzOnly); + } + + if (ret == 0 && escaped != WC_NO_NL_ENC) + ret = CEscape(escaped, '\n', out, &i, *outLen, 1, getSzOnly); + + if (i != outSz && escaped != 1 && ret == 0) + return ASN_INPUT_E; +/* If the output buffer has a room for an extra byte, add a null terminator */ + if (out && *outLen > i) + out[i]= '\0'; + + *outLen = i; + + if (ret == 0) + return getSzOnly ? LENGTH_ONLY_E : 0; + + return ret; +} + + +/* Base64 Encode, PEM style, with \n line endings */ +int Base64_Encode(const byte* in, word32 inLen, byte* out, word32* outLen) +{ + return DoBase64_Encode(in, inLen, out, outLen, WC_STD_ENC); +} + + +/* Base64 Encode, with %0A escaped line endings instead of \n */ +int Base64_EncodeEsc(const byte* in, word32 inLen, byte* out, word32* outLen) +{ + return DoBase64_Encode(in, inLen, out, outLen, WC_ESC_NL_ENC); +} + +int Base64_Encode_NoNl(const byte* in, word32 inLen, byte* out, word32* outLen) +{ + return DoBase64_Encode(in, inLen, out, outLen, WC_NO_NL_ENC); +} + +#endif /* WOLFSSL_BASE64_ENCODE */ + + +#ifdef WOLFSSL_BASE16 + +static +const byte hexDecode[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + BAD, BAD, BAD, BAD, BAD, BAD, BAD, + 10, 11, 12, 13, 14, 15, /* upper case A-F */ + BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, + BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, + BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, + BAD, BAD, /* G - ` */ + 10, 11, 12, 13, 14, 15 /* lower case a-f */ + }; /* A starts at 0x41 not 0x3A */ + +int Base16_Decode(const byte* in, word32 inLen, byte* out, word32* outLen) +{ + word32 inIdx = 0; + word32 outIdx = 0; + + if (in == NULL || out == NULL || outLen == NULL) + return BAD_FUNC_ARG; + + if (inLen == 1 && *outLen && in) { + byte b = in[inIdx++] - BASE16_MIN; /* 0 starts at 0x30 */ + + /* sanity check */ + if (b >= sizeof(hexDecode)/sizeof(hexDecode[0])) + return ASN_INPUT_E; + + b = hexDecode[b]; + + if (b == BAD) + return ASN_INPUT_E; + + out[outIdx++] = b; + + *outLen = outIdx; + return 0; + } + + if (inLen % 2) + return BAD_FUNC_ARG; + + if (*outLen < (inLen / 2)) + return BAD_FUNC_ARG; + + while (inLen) { + byte b = in[inIdx++] - BASE16_MIN; /* 0 starts at 0x30 */ + byte b2 = in[inIdx++] - BASE16_MIN; + + /* sanity checks */ + if (b >= sizeof(hexDecode)/sizeof(hexDecode[0])) + return ASN_INPUT_E; + if (b2 >= sizeof(hexDecode)/sizeof(hexDecode[0])) + return ASN_INPUT_E; + + b = hexDecode[b]; + b2 = hexDecode[b2]; + + if (b == BAD || b2 == BAD) + return ASN_INPUT_E; + + out[outIdx++] = (byte)((b << 4) | b2); + inLen -= 2; + } + + *outLen = outIdx; + return 0; +} + +int Base16_Encode(const byte* in, word32 inLen, byte* out, word32* outLen) +{ + word32 outIdx = 0; + word32 i; + byte hb, lb; + + if (in == NULL || out == NULL || outLen == NULL) + return BAD_FUNC_ARG; + + if (*outLen < (2 * inLen + 1)) + return BAD_FUNC_ARG; + + for (i = 0; i < inLen; i++) { + hb = in[i] >> 4; + lb = in[i] & 0x0f; + + /* ASCII value */ + hb += '0'; + if (hb > '9') + hb += 7; + + /* ASCII value */ + lb += '0'; + if (lb>'9') + lb += 7; + + out[outIdx++] = hb; + out[outIdx++] = lb; + } + + /* force 0 at this end */ + out[outIdx++] = 0; + + *outLen = outIdx; + return 0; +} + +#endif /* WOLFSSL_BASE16 */ + +#endif /* !NO_CODING */ diff --git a/client/wolfssl/wolfcrypt/src/compress.c b/client/wolfssl/wolfcrypt/src/compress.c new file mode 100644 index 0000000..28d04f0 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/compress.c @@ -0,0 +1,198 @@ +/* compress.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef HAVE_LIBZ + + +#include +#include +#include +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#include + + +/* alloc user allocs to work with zlib */ +static void* myAlloc(void* opaque, unsigned int item, unsigned int size) +{ + (void)opaque; + return XMALLOC(item * size, opaque, DYNAMIC_TYPE_LIBZ); +} + + +static void myFree(void* opaque, void* memory) +{ + (void)opaque; + XFREE(memory, opaque, DYNAMIC_TYPE_LIBZ); +} + + +#ifdef HAVE_MCAPI + #define DEFLATE_DEFAULT_WINDOWBITS 11 + #define DEFLATE_DEFAULT_MEMLEVEL 1 +#else + #define DEFLATE_DEFAULT_WINDOWBITS 15 + #define DEFLATE_DEFAULT_MEMLEVEL 8 +#endif + + +/* + * out - pointer to destination buffer + * outSz - size of destination buffer + * in - pointer to source buffer to compress + * inSz - size of source to compress + * flags - flags to control how compress operates + * + * return: + * negative - error code + * positive - bytes stored in out buffer + * + * Note, the output buffer still needs to be larger than the input buffer. + * The right chunk of data won't compress at all, and the lookup table will + * add to the size of the output. The libz code says the compressed + * buffer should be srcSz + 0.1% + 12. + */ +int wc_Compress_ex(byte* out, word32 outSz, const byte* in, word32 inSz, + word32 flags, word32 windowBits) +{ + z_stream stream; + int result = 0; + + stream.next_in = (Bytef*)in; + stream.avail_in = (uInt)inSz; +#ifdef MAXSEG_64K + /* Check for source > 64K on 16-bit machine: */ + if ((uLong)stream.avail_in != inSz) return COMPRESS_INIT_E; +#endif + stream.next_out = out; + stream.avail_out = (uInt)outSz; + if ((uLong)stream.avail_out != outSz) return COMPRESS_INIT_E; + + stream.zalloc = (alloc_func)myAlloc; + stream.zfree = (free_func)myFree; + stream.opaque = (voidpf)0; + + if (deflateInit2(&stream, Z_DEFAULT_COMPRESSION, Z_DEFLATED, + DEFLATE_DEFAULT_WINDOWBITS | windowBits, + DEFLATE_DEFAULT_MEMLEVEL, + flags ? Z_FIXED : Z_DEFAULT_STRATEGY) != Z_OK) + return COMPRESS_INIT_E; + + if (deflate(&stream, Z_FINISH) != Z_STREAM_END) { + deflateEnd(&stream); + return COMPRESS_E; + } + + result = (int)stream.total_out; + + if (deflateEnd(&stream) != Z_OK) + result = COMPRESS_E; + + return result; +} + +int wc_Compress(byte* out, word32 outSz, const byte* in, word32 inSz, word32 flags) +{ + return wc_Compress_ex(out, outSz, in, inSz, flags, 0); +} + + +/* windowBits: +* deflateInit() and inflateInit(), as well as deflateInit2() and inflateInit2() + with windowBits in 0..15 all process zlib-wrapped deflate data. + (See RFC 1950 and RFC 1951.) +* deflateInit2() and inflateInit2() with negative windowBits in -1..-15 process + raw deflate data with no header or trailer. +* deflateInit2() and inflateInit2() with windowBits in 16..31, i.e. 16 + added to 0..15, process gzip-wrapped deflate data (RFC 1952). +* inflateInit2() with windowBits in 32..47 (32 added to 0..15) will + automatically detect either a gzip or zlib header (but not raw deflate + data), and decompress accordingly. +*/ +int wc_DeCompress_ex(byte* out, word32 outSz, const byte* in, word32 inSz, + int windowBits) +/* + * out - pointer to destination buffer + * outSz - size of destination buffer + * in - pointer to source buffer to compress + * inSz - size of source to compress + * windowBits - flags to control how decompress operates + * + * return: + * negative - error code + * positive - bytes stored in out buffer + */ +{ + z_stream stream; + int result = 0; + + stream.next_in = (Bytef*)in; + stream.avail_in = (uInt)inSz; + /* Check for source > 64K on 16-bit machine: */ + if ((uLong)stream.avail_in != inSz) return DECOMPRESS_INIT_E; + + stream.next_out = out; + stream.avail_out = (uInt)outSz; + if ((uLong)stream.avail_out != outSz) return DECOMPRESS_INIT_E; + + stream.zalloc = (alloc_func)myAlloc; + stream.zfree = (free_func)myFree; + stream.opaque = (voidpf)0; + + if (inflateInit2(&stream, DEFLATE_DEFAULT_WINDOWBITS | windowBits) != Z_OK) + return DECOMPRESS_INIT_E; + + result = inflate(&stream, Z_FINISH); + if (result != Z_STREAM_END) { + inflateEnd(&stream); + return DECOMPRESS_E; + } + + result = (int)stream.total_out; + + if (inflateEnd(&stream) != Z_OK) + result = DECOMPRESS_E; + + return result; +} + + +int wc_DeCompress(byte* out, word32 outSz, const byte* in, word32 inSz) +{ + return wc_DeCompress_ex(out, outSz, in, inSz, 0); +} + + +#endif /* HAVE_LIBZ */ + diff --git a/client/wolfssl/wolfcrypt/src/cpuid.c b/client/wolfssl/wolfcrypt/src/cpuid.c new file mode 100644 index 0000000..85c4bf2 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/cpuid.c @@ -0,0 +1,110 @@ +/* cpuid.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#include + +#if (defined(WOLFSSL_X86_64_BUILD) || defined(USE_INTEL_SPEEDUP) || \ + defined(WOLFSSL_AESNI)) && !defined(WOLFSSL_NO_ASM) + /* Each platform needs to query info type 1 from cpuid to see if aesni is + * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts + */ + + #ifndef _MSC_VER + #define cpuid(reg, leaf, sub)\ + __asm__ __volatile__ ("cpuid":\ + "=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3]) :\ + "a" (leaf), "c"(sub)); + + #define XASM_LINK(f) asm(f) + #else + #include + + #define cpuid(a,b,c) __cpuidex((int*)a,b,c) + + #define XASM_LINK(f) + #endif /* _MSC_VER */ + + #define EAX 0 + #define EBX 1 + #define ECX 2 + #define EDX 3 + + static word32 cpuid_check = 0; + static word32 cpuid_flags = 0; + + static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit) + { + int got_intel_cpu = 0; + int got_amd_cpu = 0; + unsigned int reg[5]; + reg[4] = '\0'; + cpuid(reg, 0, 0); + + /* check for Intel cpu */ + if (XMEMCMP((char *)&(reg[EBX]), "Genu", 4) == 0 && + XMEMCMP((char *)&(reg[EDX]), "ineI", 4) == 0 && + XMEMCMP((char *)&(reg[ECX]), "ntel", 4) == 0) { + got_intel_cpu = 1; + } + + /* check for AMD cpu */ + if (XMEMCMP((char *)&(reg[EBX]), "Auth", 4) == 0 && + XMEMCMP((char *)&(reg[EDX]), "enti", 4) == 0 && + XMEMCMP((char *)&(reg[ECX]), "cAMD", 4) == 0) { + got_amd_cpu = 1; + } + + if (got_intel_cpu || got_amd_cpu) { + cpuid(reg, leaf, sub); + return ((reg[num] >> bit) & 0x1); + } + return 0; + } + + + void cpuid_set_flags(void) + { + if (!cpuid_check) { + if (cpuid_flag(1, 0, ECX, 28)) { cpuid_flags |= CPUID_AVX1 ; } + if (cpuid_flag(7, 0, EBX, 5)) { cpuid_flags |= CPUID_AVX2 ; } + if (cpuid_flag(7, 0, EBX, 8)) { cpuid_flags |= CPUID_BMI2 ; } + if (cpuid_flag(1, 0, ECX, 30)) { cpuid_flags |= CPUID_RDRAND; } + if (cpuid_flag(7, 0, EBX, 18)) { cpuid_flags |= CPUID_RDSEED; } + if (cpuid_flag(1, 0, ECX, 25)) { cpuid_flags |= CPUID_AESNI ; } + if (cpuid_flag(7, 0, EBX, 19)) { cpuid_flags |= CPUID_ADX ; } + cpuid_check = 1; + } + } + + word32 cpuid_get_flags(void) + { + if (!cpuid_check) + cpuid_set_flags(); + return cpuid_flags; + } +#endif diff --git a/client/wolfssl/wolfcrypt/src/cryptocb.c b/client/wolfssl/wolfcrypt/src/cryptocb.c new file mode 100644 index 0000000..79f89db --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/cryptocb.c @@ -0,0 +1,648 @@ +/* cryptocb.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/* This framework provides a central place for crypto hardware integration + using the devId scheme. If not supported return `CRYPTOCB_UNAVAILABLE`. */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef WOLF_CRYPTO_CB + +#include +#include +#include + + +/* TODO: Consider linked list with mutex */ +#ifndef MAX_CRYPTO_DEVID_CALLBACKS +#define MAX_CRYPTO_DEVID_CALLBACKS 8 +#endif + +typedef struct CryptoCb { + int devId; + CryptoDevCallbackFunc cb; + void* ctx; +} CryptoCb; +static WOLFSSL_GLOBAL CryptoCb gCryptoDev[MAX_CRYPTO_DEVID_CALLBACKS]; + +static CryptoCb* wc_CryptoCb_FindDevice(int devId) +{ + int i; + for (i=0; idevId = devId; + dev->cb = cb; + dev->ctx = ctx; + + return 0; +} + +void wc_CryptoCb_UnRegisterDevice(int devId) +{ + CryptoCb* dev = wc_CryptoCb_FindDevice(devId); + if (dev) { + XMEMSET(dev, 0, sizeof(*dev)); + dev->devId = INVALID_DEVID; + } +} + +#ifndef NO_RSA +int wc_CryptoCb_Rsa(const byte* in, word32 inLen, byte* out, + word32* outLen, int type, RsaKey* key, WC_RNG* rng) +{ + int ret = CRYPTOCB_UNAVAILABLE; + CryptoCb* dev; + + if (key == NULL) + return ret; + + /* locate registered callback */ + dev = wc_CryptoCb_FindDevice(key->devId); + if (dev && dev->cb) { + wc_CryptoInfo cryptoInfo; + XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo)); + cryptoInfo.algo_type = WC_ALGO_TYPE_PK; + cryptoInfo.pk.type = WC_PK_TYPE_RSA; + cryptoInfo.pk.rsa.in = in; + cryptoInfo.pk.rsa.inLen = inLen; + cryptoInfo.pk.rsa.out = out; + cryptoInfo.pk.rsa.outLen = outLen; + cryptoInfo.pk.rsa.type = type; + cryptoInfo.pk.rsa.key = key; + cryptoInfo.pk.rsa.rng = rng; + + ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx); + } + + return wc_CryptoCb_TranslateErrorCode(ret); +} + +#ifdef WOLFSSL_KEY_GEN +int wc_CryptoCb_MakeRsaKey(RsaKey* key, int size, long e, WC_RNG* rng) +{ + int ret = CRYPTOCB_UNAVAILABLE; + CryptoCb* dev; + + if (key == NULL) + return ret; + + /* locate registered callback */ + dev = wc_CryptoCb_FindDevice(key->devId); + if (dev && dev->cb) { + wc_CryptoInfo cryptoInfo; + XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo)); + cryptoInfo.algo_type = WC_ALGO_TYPE_PK; + cryptoInfo.pk.type = WC_PK_TYPE_RSA_KEYGEN; + cryptoInfo.pk.rsakg.key = key; + cryptoInfo.pk.rsakg.size = size; + cryptoInfo.pk.rsakg.e = e; + cryptoInfo.pk.rsakg.rng = rng; + + ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx); + } + + return wc_CryptoCb_TranslateErrorCode(ret); +} +#endif +#endif /* !NO_RSA */ + +#ifdef HAVE_ECC +int wc_CryptoCb_MakeEccKey(WC_RNG* rng, int keySize, ecc_key* key, int curveId) +{ + int ret = CRYPTOCB_UNAVAILABLE; + CryptoCb* dev; + + if (key == NULL) + return ret; + + /* locate registered callback */ + dev = wc_CryptoCb_FindDevice(key->devId); + if (dev && dev->cb) { + wc_CryptoInfo cryptoInfo; + XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo)); + cryptoInfo.algo_type = WC_ALGO_TYPE_PK; + cryptoInfo.pk.type = WC_PK_TYPE_EC_KEYGEN; + cryptoInfo.pk.eckg.rng = rng; + cryptoInfo.pk.eckg.size = keySize; + cryptoInfo.pk.eckg.key = key; + cryptoInfo.pk.eckg.curveId = curveId; + + ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx); + } + + return wc_CryptoCb_TranslateErrorCode(ret); +} + +int wc_CryptoCb_Ecdh(ecc_key* private_key, ecc_key* public_key, + byte* out, word32* outlen) +{ + int ret = CRYPTOCB_UNAVAILABLE; + CryptoCb* dev; + + if (private_key == NULL) + return ret; + + /* locate registered callback */ + dev = wc_CryptoCb_FindDevice(private_key->devId); + if (dev && dev->cb) { + wc_CryptoInfo cryptoInfo; + XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo)); + cryptoInfo.algo_type = WC_ALGO_TYPE_PK; + cryptoInfo.pk.type = WC_PK_TYPE_ECDH; + cryptoInfo.pk.ecdh.private_key = private_key; + cryptoInfo.pk.ecdh.public_key = public_key; + cryptoInfo.pk.ecdh.out = out; + cryptoInfo.pk.ecdh.outlen = outlen; + + ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx); + } + + return wc_CryptoCb_TranslateErrorCode(ret); +} + +int wc_CryptoCb_EccSign(const byte* in, word32 inlen, byte* out, + word32 *outlen, WC_RNG* rng, ecc_key* key) +{ + int ret = CRYPTOCB_UNAVAILABLE; + CryptoCb* dev; + + if (key == NULL) + return ret; + + /* locate registered callback */ + dev = wc_CryptoCb_FindDevice(key->devId); + if (dev && dev->cb) { + wc_CryptoInfo cryptoInfo; + XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo)); + cryptoInfo.algo_type = WC_ALGO_TYPE_PK; + cryptoInfo.pk.type = WC_PK_TYPE_ECDSA_SIGN; + cryptoInfo.pk.eccsign.in = in; + cryptoInfo.pk.eccsign.inlen = inlen; + cryptoInfo.pk.eccsign.out = out; + cryptoInfo.pk.eccsign.outlen = outlen; + cryptoInfo.pk.eccsign.rng = rng; + cryptoInfo.pk.eccsign.key = key; + + ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx); + } + + return wc_CryptoCb_TranslateErrorCode(ret); +} + +int wc_CryptoCb_EccVerify(const byte* sig, word32 siglen, + const byte* hash, word32 hashlen, int* res, ecc_key* key) +{ + int ret = CRYPTOCB_UNAVAILABLE; + CryptoCb* dev; + + if (key == NULL) + return ret; + + /* locate registered callback */ + dev = wc_CryptoCb_FindDevice(key->devId); + if (dev && dev->cb) { + wc_CryptoInfo cryptoInfo; + XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo)); + cryptoInfo.algo_type = WC_ALGO_TYPE_PK; + cryptoInfo.pk.type = WC_PK_TYPE_ECDSA_VERIFY; + cryptoInfo.pk.eccverify.sig = sig; + cryptoInfo.pk.eccverify.siglen = siglen; + cryptoInfo.pk.eccverify.hash = hash; + cryptoInfo.pk.eccverify.hashlen = hashlen; + cryptoInfo.pk.eccverify.res = res; + cryptoInfo.pk.eccverify.key = key; + + ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx); + } + + return wc_CryptoCb_TranslateErrorCode(ret); +} +#endif /* HAVE_ECC */ + +#ifndef NO_AES +#ifdef HAVE_AESGCM +int wc_CryptoCb_AesGcmEncrypt(Aes* aes, byte* out, + const byte* in, word32 sz, + const byte* iv, word32 ivSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + int ret = CRYPTOCB_UNAVAILABLE; + CryptoCb* dev; + + /* locate registered callback */ + if (aes) { + dev = wc_CryptoCb_FindDevice(aes->devId); + } + else { + /* locate first callback and try using it */ + dev = wc_CryptoCb_FindDeviceByIndex(0); + } + + if (dev && dev->cb) { + wc_CryptoInfo cryptoInfo; + XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo)); + cryptoInfo.algo_type = WC_ALGO_TYPE_CIPHER; + cryptoInfo.cipher.type = WC_CIPHER_AES_GCM; + cryptoInfo.cipher.enc = 1; + cryptoInfo.cipher.aesgcm_enc.aes = aes; + cryptoInfo.cipher.aesgcm_enc.out = out; + cryptoInfo.cipher.aesgcm_enc.in = in; + cryptoInfo.cipher.aesgcm_enc.sz = sz; + cryptoInfo.cipher.aesgcm_enc.iv = iv; + cryptoInfo.cipher.aesgcm_enc.ivSz = ivSz; + cryptoInfo.cipher.aesgcm_enc.authTag = authTag; + cryptoInfo.cipher.aesgcm_enc.authTagSz = authTagSz; + cryptoInfo.cipher.aesgcm_enc.authIn = authIn; + cryptoInfo.cipher.aesgcm_enc.authInSz = authInSz; + + ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx); + } + + return wc_CryptoCb_TranslateErrorCode(ret); +} + +int wc_CryptoCb_AesGcmDecrypt(Aes* aes, byte* out, + const byte* in, word32 sz, + const byte* iv, word32 ivSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + int ret = CRYPTOCB_UNAVAILABLE; + CryptoCb* dev; + + /* locate registered callback */ + if (aes) { + dev = wc_CryptoCb_FindDevice(aes->devId); + } + else { + /* locate first callback and try using it */ + dev = wc_CryptoCb_FindDeviceByIndex(0); + } + + if (dev && dev->cb) { + wc_CryptoInfo cryptoInfo; + XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo)); + cryptoInfo.algo_type = WC_ALGO_TYPE_CIPHER; + cryptoInfo.cipher.type = WC_CIPHER_AES_GCM; + cryptoInfo.cipher.enc = 0; + cryptoInfo.cipher.aesgcm_dec.aes = aes; + cryptoInfo.cipher.aesgcm_dec.out = out; + cryptoInfo.cipher.aesgcm_dec.in = in; + cryptoInfo.cipher.aesgcm_dec.sz = sz; + cryptoInfo.cipher.aesgcm_dec.iv = iv; + cryptoInfo.cipher.aesgcm_dec.ivSz = ivSz; + cryptoInfo.cipher.aesgcm_dec.authTag = authTag; + cryptoInfo.cipher.aesgcm_dec.authTagSz = authTagSz; + cryptoInfo.cipher.aesgcm_dec.authIn = authIn; + cryptoInfo.cipher.aesgcm_dec.authInSz = authInSz; + + ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx); + } + + return wc_CryptoCb_TranslateErrorCode(ret); +} +#endif /* HAVE_AESGCM */ + +#ifdef HAVE_AES_CBC +int wc_CryptoCb_AesCbcEncrypt(Aes* aes, byte* out, + const byte* in, word32 sz) +{ + int ret = CRYPTOCB_UNAVAILABLE; + CryptoCb* dev; + + /* locate registered callback */ + if (aes) { + dev = wc_CryptoCb_FindDevice(aes->devId); + } + else { + /* locate first callback and try using it */ + dev = wc_CryptoCb_FindDeviceByIndex(0); + } + + if (dev && dev->cb) { + wc_CryptoInfo cryptoInfo; + XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo)); + cryptoInfo.algo_type = WC_ALGO_TYPE_CIPHER; + cryptoInfo.cipher.type = WC_CIPHER_AES_CBC; + cryptoInfo.cipher.enc = 1; + cryptoInfo.cipher.aescbc.aes = aes; + cryptoInfo.cipher.aescbc.out = out; + cryptoInfo.cipher.aescbc.in = in; + cryptoInfo.cipher.aescbc.sz = sz; + + ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx); + } + + return wc_CryptoCb_TranslateErrorCode(ret); +} + +int wc_CryptoCb_AesCbcDecrypt(Aes* aes, byte* out, + const byte* in, word32 sz) +{ + int ret = CRYPTOCB_UNAVAILABLE; + CryptoCb* dev; + + /* locate registered callback */ + if (aes) { + dev = wc_CryptoCb_FindDevice(aes->devId); + } + else { + /* locate first callback and try using it */ + dev = wc_CryptoCb_FindDeviceByIndex(0); + } + + if (dev && dev->cb) { + wc_CryptoInfo cryptoInfo; + XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo)); + cryptoInfo.algo_type = WC_ALGO_TYPE_CIPHER; + cryptoInfo.cipher.type = WC_CIPHER_AES_CBC; + cryptoInfo.cipher.enc = 0; + cryptoInfo.cipher.aescbc.aes = aes; + cryptoInfo.cipher.aescbc.out = out; + cryptoInfo.cipher.aescbc.in = in; + cryptoInfo.cipher.aescbc.sz = sz; + + ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx); + } + + return wc_CryptoCb_TranslateErrorCode(ret); +} +#endif /* HAVE_AES_CBC */ +#endif /* !NO_AES */ + +#ifndef NO_DES3 +int wc_CryptoCb_Des3Encrypt(Des3* des3, byte* out, + const byte* in, word32 sz) +{ + int ret = CRYPTOCB_UNAVAILABLE; + CryptoCb* dev; + + /* locate registered callback */ + if (des3) { + dev = wc_CryptoCb_FindDevice(des3->devId); + } + else { + /* locate first callback and try using it */ + dev = wc_CryptoCb_FindDeviceByIndex(0); + } + + if (dev && dev->cb) { + wc_CryptoInfo cryptoInfo; + XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo)); + cryptoInfo.algo_type = WC_ALGO_TYPE_CIPHER; + cryptoInfo.cipher.type = WC_CIPHER_DES3; + cryptoInfo.cipher.enc = 1; + cryptoInfo.cipher.des3.des = des3; + cryptoInfo.cipher.des3.out = out; + cryptoInfo.cipher.des3.in = in; + cryptoInfo.cipher.des3.sz = sz; + + ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx); + } + + return wc_CryptoCb_TranslateErrorCode(ret); +} + +int wc_CryptoCb_Des3Decrypt(Des3* des3, byte* out, + const byte* in, word32 sz) +{ + int ret = CRYPTOCB_UNAVAILABLE; + CryptoCb* dev; + + /* locate registered callback */ + if (des3) { + dev = wc_CryptoCb_FindDevice(des3->devId); + } + else { + /* locate first callback and try using it */ + dev = wc_CryptoCb_FindDeviceByIndex(0); + } + + if (dev && dev->cb) { + wc_CryptoInfo cryptoInfo; + XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo)); + cryptoInfo.algo_type = WC_ALGO_TYPE_CIPHER; + cryptoInfo.cipher.type = WC_CIPHER_DES3; + cryptoInfo.cipher.enc = 0; + cryptoInfo.cipher.des3.des = des3; + cryptoInfo.cipher.des3.out = out; + cryptoInfo.cipher.des3.in = in; + cryptoInfo.cipher.des3.sz = sz; + + ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx); + } + + return wc_CryptoCb_TranslateErrorCode(ret); +} +#endif /* !NO_DES3 */ + +#ifndef NO_SHA +int wc_CryptoCb_ShaHash(wc_Sha* sha, const byte* in, + word32 inSz, byte* digest) +{ + int ret = CRYPTOCB_UNAVAILABLE; + CryptoCb* dev; + + /* locate registered callback */ + if (sha) { + dev = wc_CryptoCb_FindDevice(sha->devId); + } + else { + /* locate first callback and try using it */ + dev = wc_CryptoCb_FindDeviceByIndex(0); + } + + if (dev && dev->cb) { + wc_CryptoInfo cryptoInfo; + XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo)); + cryptoInfo.algo_type = WC_ALGO_TYPE_HASH; + cryptoInfo.hash.type = WC_HASH_TYPE_SHA; + cryptoInfo.hash.sha1 = sha; + cryptoInfo.hash.in = in; + cryptoInfo.hash.inSz = inSz; + cryptoInfo.hash.digest = digest; + + ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx); + } + + return wc_CryptoCb_TranslateErrorCode(ret); +} +#endif /* !NO_SHA */ + +#ifndef NO_SHA256 +int wc_CryptoCb_Sha256Hash(wc_Sha256* sha256, const byte* in, + word32 inSz, byte* digest) +{ + int ret = CRYPTOCB_UNAVAILABLE; + CryptoCb* dev; + + /* locate registered callback */ + if (sha256) { + dev = wc_CryptoCb_FindDevice(sha256->devId); + } + else { + /* locate first callback and try using it */ + dev = wc_CryptoCb_FindDeviceByIndex(0); + } + + if (dev && dev->cb) { + wc_CryptoInfo cryptoInfo; + XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo)); + cryptoInfo.algo_type = WC_ALGO_TYPE_HASH; + cryptoInfo.hash.type = WC_HASH_TYPE_SHA256; + cryptoInfo.hash.sha256 = sha256; + cryptoInfo.hash.in = in; + cryptoInfo.hash.inSz = inSz; + cryptoInfo.hash.digest = digest; + + ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx); + } + + return wc_CryptoCb_TranslateErrorCode(ret); +} +#endif /* !NO_SHA256 */ + +#ifndef NO_HMAC +int wc_CryptoCb_Hmac(Hmac* hmac, int macType, const byte* in, word32 inSz, + byte* digest) +{ + int ret = CRYPTOCB_UNAVAILABLE; + CryptoCb* dev; + + if (hmac == NULL) + return ret; + + /* locate registered callback */ + dev = wc_CryptoCb_FindDevice(hmac->devId); + if (dev && dev->cb) { + wc_CryptoInfo cryptoInfo; + XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo)); + cryptoInfo.algo_type = WC_ALGO_TYPE_HMAC; + cryptoInfo.hmac.macType = macType; + cryptoInfo.hmac.in = in; + cryptoInfo.hmac.inSz = inSz; + cryptoInfo.hmac.digest = digest; + cryptoInfo.hmac.hmac = hmac; + + ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx); + } + + return wc_CryptoCb_TranslateErrorCode(ret); +} +#endif /* !NO_HMAC */ + +#ifndef WC_NO_RNG +int wc_CryptoCb_RandomBlock(WC_RNG* rng, byte* out, word32 sz) +{ + int ret = CRYPTOCB_UNAVAILABLE; + CryptoCb* dev; + + /* locate registered callback */ + if (rng) { + dev = wc_CryptoCb_FindDevice(rng->devId); + } + else { + /* locate first callback and try using it */ + dev = wc_CryptoCb_FindDeviceByIndex(0); + } + + if (dev && dev->cb) { + wc_CryptoInfo cryptoInfo; + XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo)); + cryptoInfo.algo_type = WC_ALGO_TYPE_RNG; + cryptoInfo.rng.rng = rng; + cryptoInfo.rng.out = out; + cryptoInfo.rng.sz = sz; + + ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx); + } + + return wc_CryptoCb_TranslateErrorCode(ret); +} + +int wc_CryptoCb_RandomSeed(OS_Seed* os, byte* seed, word32 sz) +{ + int ret = CRYPTOCB_UNAVAILABLE; + CryptoCb* dev; + + /* locate registered callback */ + dev = wc_CryptoCb_FindDevice(os->devId); + if (dev && dev->cb) { + wc_CryptoInfo cryptoInfo; + XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo)); + cryptoInfo.algo_type = WC_ALGO_TYPE_SEED; + cryptoInfo.seed.os = os; + cryptoInfo.seed.seed = seed; + cryptoInfo.seed.sz = sz; + + ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx); + } + + return wc_CryptoCb_TranslateErrorCode(ret); +} +#endif /* !WC_NO_RNG */ + +#endif /* WOLF_CRYPTO_CB */ diff --git a/client/wolfssl/wolfcrypt/src/curve25519.c b/client/wolfssl/wolfcrypt/src/curve25519.c new file mode 100644 index 0000000..39e1216 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/curve25519.c @@ -0,0 +1,513 @@ +/* curve25519.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + /* Based On Daniel J Bernstein's curve25519 Public Domain ref10 work. */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef HAVE_CURVE25519 + +#include +#include +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#if defined(FREESCALE_LTC_ECC) + #include +#endif + +const curve25519_set_type curve25519_sets[] = { + { + CURVE25519_KEYSIZE, + "CURVE25519", + } +}; + +int wc_curve25519_make_key(WC_RNG* rng, int keysize, curve25519_key* key) +{ +#ifdef FREESCALE_LTC_ECC + const ECPoint* basepoint = wc_curve25519_GetBasePoint(); +#else + unsigned char basepoint[CURVE25519_KEYSIZE] = {9}; +#endif + int ret; + + if (key == NULL || rng == NULL) + return BAD_FUNC_ARG; + + /* currently only a key size of 32 bytes is used */ + if (keysize != CURVE25519_KEYSIZE) + return ECC_BAD_ARG_E; + +#ifndef FREESCALE_LTC_ECC + fe_init(); +#endif + + /* random number for private key */ + ret = wc_RNG_GenerateBlock(rng, key->k.point, keysize); + if (ret != 0) + return ret; + + /* Clamp the private key */ + key->k.point[0] &= 248; + key->k.point[CURVE25519_KEYSIZE-1] &= 63; /* same &=127 because |=64 after */ + key->k.point[CURVE25519_KEYSIZE-1] |= 64; + + /* compute public key */ + #ifdef FREESCALE_LTC_ECC + ret = wc_curve25519(&key->p, key->k.point, basepoint, kLTC_Weierstrass); /* input basepoint on Weierstrass curve */ + #else + ret = curve25519(key->p.point, key->k.point, basepoint); + #endif + if (ret != 0) { + ForceZero(key->k.point, keysize); + ForceZero(key->p.point, keysize); + return ret; + } + + return ret; +} + +#ifdef HAVE_CURVE25519_SHARED_SECRET + +int wc_curve25519_shared_secret(curve25519_key* private_key, + curve25519_key* public_key, + byte* out, word32* outlen) +{ + return wc_curve25519_shared_secret_ex(private_key, public_key, + out, outlen, EC25519_BIG_ENDIAN); +} + +int wc_curve25519_shared_secret_ex(curve25519_key* private_key, + curve25519_key* public_key, + byte* out, word32* outlen, int endian) +{ + #ifdef FREESCALE_LTC_ECC + ECPoint o = {{0}}; + #else + unsigned char o[CURVE25519_KEYSIZE]; + #endif + int ret = 0; + + /* sanity check */ + if (private_key == NULL || public_key == NULL || + out == NULL || outlen == NULL || *outlen < CURVE25519_KEYSIZE) + return BAD_FUNC_ARG; + + /* avoid implementation fingerprinting */ + if (public_key->p.point[CURVE25519_KEYSIZE-1] > 0x7F) + return ECC_BAD_ARG_E; + + #ifdef FREESCALE_LTC_ECC + ret = wc_curve25519(&o, private_key->k.point, &public_key->p, kLTC_Curve25519 /* input point P on Curve25519 */); + #else + ret = curve25519(o, private_key->k.point, public_key->p.point); + #endif + if (ret != 0) { + #ifdef FREESCALE_LTC_ECC + ForceZero(o.point, CURVE25519_KEYSIZE); + ForceZero(o.pointY, CURVE25519_KEYSIZE); + #else + ForceZero(o, CURVE25519_KEYSIZE); + #endif + return ret; + } + + if (endian == EC25519_BIG_ENDIAN) { + int i; + /* put shared secret key in Big Endian format */ + for (i = 0; i < CURVE25519_KEYSIZE; i++) + #ifdef FREESCALE_LTC_ECC + out[i] = o.point[CURVE25519_KEYSIZE - i -1]; + #else + out[i] = o[CURVE25519_KEYSIZE - i -1]; + #endif + } + else /* put shared secret key in Little Endian format */ + #ifdef FREESCALE_LTC_ECC + XMEMCPY(out, o.point, CURVE25519_KEYSIZE); + #else + XMEMCPY(out, o, CURVE25519_KEYSIZE); + #endif + + *outlen = CURVE25519_KEYSIZE; + + #ifdef FREESCALE_LTC_ECC + ForceZero(o.point, CURVE25519_KEYSIZE); + ForceZero(o.pointY, CURVE25519_KEYSIZE); + #else + ForceZero(o, CURVE25519_KEYSIZE); + #endif + + return ret; +} + +#endif /* HAVE_CURVE25519_SHARED_SECRET */ + +#ifdef HAVE_CURVE25519_KEY_EXPORT + +/* export curve25519 public key (Big endian) + * return 0 on success */ +int wc_curve25519_export_public(curve25519_key* key, byte* out, word32* outLen) +{ + return wc_curve25519_export_public_ex(key, out, outLen, EC25519_BIG_ENDIAN); +} + +/* export curve25519 public key (Big or Little endian) + * return 0 on success */ +int wc_curve25519_export_public_ex(curve25519_key* key, byte* out, + word32* outLen, int endian) +{ + if (key == NULL || out == NULL || outLen == NULL) + return BAD_FUNC_ARG; + + /* check and set outgoing key size */ + if (*outLen < CURVE25519_KEYSIZE) { + *outLen = CURVE25519_KEYSIZE; + return ECC_BAD_ARG_E; + } + *outLen = CURVE25519_KEYSIZE; + + if (endian == EC25519_BIG_ENDIAN) { + int i; + + /* read keys in Big Endian format */ + for (i = 0; i < CURVE25519_KEYSIZE; i++) + out[i] = key->p.point[CURVE25519_KEYSIZE - i - 1]; + } + else + XMEMCPY(out, key->p.point, CURVE25519_KEYSIZE); + + return 0; +} + +#endif /* HAVE_CURVE25519_KEY_EXPORT */ + +#ifdef HAVE_CURVE25519_KEY_IMPORT + +/* import curve25519 public key (Big endian) + * return 0 on success */ +int wc_curve25519_import_public(const byte* in, word32 inLen, + curve25519_key* key) +{ + return wc_curve25519_import_public_ex(in, inLen, key, EC25519_BIG_ENDIAN); +} + +/* import curve25519 public key (Big or Little endian) + * return 0 on success */ +int wc_curve25519_import_public_ex(const byte* in, word32 inLen, + curve25519_key* key, int endian) +{ + /* sanity check */ + if (key == NULL || in == NULL) + return BAD_FUNC_ARG; + + /* check size of incoming keys */ + if (inLen != CURVE25519_KEYSIZE) + return ECC_BAD_ARG_E; + + if (endian == EC25519_BIG_ENDIAN) { + int i; + + /* read keys in Big Endian format */ + for (i = 0; i < CURVE25519_KEYSIZE; i++) + key->p.point[i] = in[CURVE25519_KEYSIZE - i - 1]; + } + else + XMEMCPY(key->p.point, in, inLen); + + key->dp = &curve25519_sets[0]; + + /* LTC needs also Y coordinate - let's compute it */ + #ifdef FREESCALE_LTC_ECC + ltc_pkha_ecc_point_t ltcPoint; + ltcPoint.X = &key->p.point[0]; + ltcPoint.Y = &key->p.pointY[0]; + LTC_PKHA_Curve25519ComputeY(<cPoint); + #endif + + return 0; +} + +/* Check the public key value (big or little endian) + * + * pub Public key bytes. + * pubSz Size of public key in bytes. + * endian Public key bytes passed in as big-endian or little-endian. + * returns BAD_FUNC_ARGS when pub is NULL, + * BUFFER_E when size of public key is zero; + * ECC_OUT_OF_RANGE_E if the high bit is set; + * ECC_BAD_ARG_E if key length is not 32 bytes, public key value is + * zero or one; and + * 0 otherwise. + */ +int wc_curve25519_check_public(const byte* pub, word32 pubSz, int endian) +{ + word32 i; + + if (pub == NULL) + return BAD_FUNC_ARG; + + /* Check for empty key data */ + if (pubSz == 0) + return BUFFER_E; + + /* Check key length */ + if (pubSz != CURVE25519_KEYSIZE) + return ECC_BAD_ARG_E; + + + if (endian == EC25519_LITTLE_ENDIAN) { + /* Check for value of zero or one */ + for (i = pubSz - 1; i > 0; i--) { + if (pub[i] != 0) + break; + } + if (i == 0 && (pub[0] == 0 || pub[0] == 1)) + return ECC_BAD_ARG_E; + + /* Check high bit set */ + if (pub[CURVE25519_KEYSIZE-1] & 0x80) + return ECC_OUT_OF_RANGE_E; + } + else { + /* Check for value of zero or one */ + for (i = 0; i < pubSz-1; i++) { + if (pub[i] != 0) + break; + } + if (i == pubSz - 1 && (pub[i] == 0 || pub[i] == 1)) + return ECC_BAD_ARG_E; + + /* Check high bit set */ + if (pub[0] & 0x80) + return ECC_OUT_OF_RANGE_E; + } + + return 0; +} + +#endif /* HAVE_CURVE25519_KEY_IMPORT */ + + +#ifdef HAVE_CURVE25519_KEY_EXPORT + +/* export curve25519 private key only raw (Big endian) + * outLen is in/out size + * return 0 on success */ +int wc_curve25519_export_private_raw(curve25519_key* key, byte* out, + word32* outLen) +{ + return wc_curve25519_export_private_raw_ex(key, out, outLen, + EC25519_BIG_ENDIAN); +} + +/* export curve25519 private key only raw (Big or Little endian) + * outLen is in/out size + * return 0 on success */ +int wc_curve25519_export_private_raw_ex(curve25519_key* key, byte* out, + word32* outLen, int endian) +{ + /* sanity check */ + if (key == NULL || out == NULL || outLen == NULL) + return BAD_FUNC_ARG; + + /* check size of outgoing buffer */ + if (*outLen < CURVE25519_KEYSIZE) { + *outLen = CURVE25519_KEYSIZE; + return ECC_BAD_ARG_E; + } + *outLen = CURVE25519_KEYSIZE; + + if (endian == EC25519_BIG_ENDIAN) { + int i; + + /* put the key in Big Endian format */ + for (i = 0; i < CURVE25519_KEYSIZE; i++) + out[i] = key->k.point[CURVE25519_KEYSIZE - i - 1]; + } + else + XMEMCPY(out, key->k.point, CURVE25519_KEYSIZE); + + return 0; +} + +/* curve25519 key pair export (Big or Little endian) + * return 0 on success */ +int wc_curve25519_export_key_raw(curve25519_key* key, + byte* priv, word32 *privSz, + byte* pub, word32 *pubSz) +{ + return wc_curve25519_export_key_raw_ex(key, priv, privSz, + pub, pubSz, EC25519_BIG_ENDIAN); +} + +/* curve25519 key pair export (Big or Little endian) + * return 0 on success */ +int wc_curve25519_export_key_raw_ex(curve25519_key* key, + byte* priv, word32 *privSz, + byte* pub, word32 *pubSz, + int endian) +{ + int ret; + + /* export private part */ + ret = wc_curve25519_export_private_raw_ex(key, priv, privSz, endian); + if (ret != 0) + return ret; + + /* export public part */ + return wc_curve25519_export_public_ex(key, pub, pubSz, endian); +} + +#endif /* HAVE_CURVE25519_KEY_EXPORT */ + +#ifdef HAVE_CURVE25519_KEY_IMPORT + +/* curve25519 private key import (Big endian) + * Public key to match private key needs to be imported too + * return 0 on success */ +int wc_curve25519_import_private_raw(const byte* priv, word32 privSz, + const byte* pub, word32 pubSz, + curve25519_key* key) +{ + return wc_curve25519_import_private_raw_ex(priv, privSz, pub, pubSz, + key, EC25519_BIG_ENDIAN); +} + +/* curve25519 private key import (Big or Little endian) + * Public key to match private key needs to be imported too + * return 0 on success */ +int wc_curve25519_import_private_raw_ex(const byte* priv, word32 privSz, + const byte* pub, word32 pubSz, + curve25519_key* key, int endian) +{ + int ret; + + /* import private part */ + ret = wc_curve25519_import_private_ex(priv, privSz, key, endian); + if (ret != 0) + return ret; + + /* import public part */ + return wc_curve25519_import_public_ex(pub, pubSz, key, endian); +} + +/* curve25519 private key import only. (Big endian) + * return 0 on success */ +int wc_curve25519_import_private(const byte* priv, word32 privSz, + curve25519_key* key) +{ + return wc_curve25519_import_private_ex(priv, privSz, + key, EC25519_BIG_ENDIAN); +} + +/* curve25519 private key import only. (Big or Little endian) + * return 0 on success */ +int wc_curve25519_import_private_ex(const byte* priv, word32 privSz, + curve25519_key* key, int endian) +{ + /* sanity check */ + if (key == NULL || priv == NULL) + return BAD_FUNC_ARG; + + /* check size of incoming keys */ + if ((int)privSz != CURVE25519_KEYSIZE) + return ECC_BAD_ARG_E; + + if (endian == EC25519_BIG_ENDIAN) { + int i; + + /* read the key in Big Endian format */ + for (i = 0; i < CURVE25519_KEYSIZE; i++) + key->k.point[i] = priv[CURVE25519_KEYSIZE - i - 1]; + } + else + XMEMCPY(key->k.point, priv, CURVE25519_KEYSIZE); + + key->dp = &curve25519_sets[0]; + + /* Clamp the key */ + key->k.point[0] &= 248; + key->k.point[privSz-1] &= 63; /* same &=127 because |=64 after */ + key->k.point[privSz-1] |= 64; + + return 0; +} + +#endif /* HAVE_CURVE25519_KEY_IMPORT */ + + +int wc_curve25519_init(curve25519_key* key) +{ + if (key == NULL) + return BAD_FUNC_ARG; + + XMEMSET(key, 0, sizeof(*key)); + + /* currently the format for curve25519 */ + key->dp = &curve25519_sets[0]; + +#ifndef FREESCALE_LTC_ECC + fe_init(); +#endif + + return 0; +} + + +/* Clean the memory of a key */ +void wc_curve25519_free(curve25519_key* key) +{ + if (key == NULL) + return; + + key->dp = NULL; + ForceZero(key->p.point, sizeof(key->p.point)); + ForceZero(key->k.point, sizeof(key->k.point)); + #ifdef FREESCALE_LTC_ECC + ForceZero(key->p.point, sizeof(key->p.pointY)); + ForceZero(key->k.point, sizeof(key->k.pointY)); + #endif +} + + +/* get key size */ +int wc_curve25519_size(curve25519_key* key) +{ + if (key == NULL) + return 0; + + return key->dp->size; +} + +#endif /*HAVE_CURVE25519*/ + diff --git a/client/wolfssl/wolfcrypt/src/curve448.c b/client/wolfssl/wolfcrypt/src/curve448.c new file mode 100644 index 0000000..135f238 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/curve448.c @@ -0,0 +1,635 @@ +/* curve448.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* Implemented to: RFC 7748 */ + +/* Based On Daniel J Bernstein's curve25519 Public Domain ref10 work. + * Reworked for curve448 by Sean Parkinson. + */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef HAVE_CURVE448 + +#include +#include +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + + +/* Make a new curve448 private/public key. + * + * rng [in] Random number generator. + * keysize [in] Size of the key to generate. + * key [in] Curve448 key object. + * returns BAD_FUNC_ARG when rng or key are NULL, + * ECC_BAD_ARG_E when keysize is not CURVE448_KEY_SIZE, + * 0 otherwise. + */ +int wc_curve448_make_key(WC_RNG* rng, int keysize, curve448_key* key) +{ + unsigned char basepoint[CURVE448_KEY_SIZE] = {5}; + int ret = 0; + + if ((key == NULL) || (rng == NULL)) { + ret = BAD_FUNC_ARG; + } + + /* currently only a key size of 56 bytes is used */ + if ((ret == 0) && (keysize != CURVE448_KEY_SIZE)) { + ret = ECC_BAD_ARG_E; + } + + if (ret == 0) { + fe448_init(); + + /* random number for private key */ + ret = wc_RNG_GenerateBlock(rng, key->k, keysize); + } + if (ret == 0) { + /* Clamp the private key */ + key->k[0] &= 0xfc; + key->k[CURVE448_KEY_SIZE-1] |= 0x80; + + /* compute public key */ + ret = curve448(key->p, key->k, basepoint); + if (ret != 0) { + ForceZero(key->k, keysize); + ForceZero(key->p, keysize); + } + } + + return ret; +} + +#ifdef HAVE_CURVE448_SHARED_SECRET + +/* Calculate the shared secret from the private key and peer's public key. + * Calculation over curve448. + * Secret encoded big-endian. + * + * private_key [in] Curve448 private key. + * public_key [in] Curve448 public key. + * out [in] Array to hold shared secret. + * outLen [in/out] On in, the number of bytes in array. + * On out, the number bytes put into array. + * returns BAD_FUNC_ARG when a parameter is NULL or outLen is less than + * CURVE448_KEY_SIZE, + * 0 otherwise. + */ +int wc_curve448_shared_secret(curve448_key* private_key, + curve448_key* public_key, + byte* out, word32* outLen) +{ + return wc_curve448_shared_secret_ex(private_key, public_key, out, outLen, + EC448_BIG_ENDIAN); +} + +/* Calculate the shared secret from the private key and peer's public key. + * Calculation over curve448. + * + * private_key [in] Curve448 private key. + * public_key [in] Curve448 public key. + * out [in] Array to hold shared secret. + * outLen [in/out] On in, the number of bytes in array. + * On out, the number bytes put into array. + * endian [in] Endianness to use when encoding number in array. + * returns BAD_FUNC_ARG when a parameter is NULL or outLen is less than + * CURVE448_PUB_KEY_SIZE, + * 0 otherwise. + */ +int wc_curve448_shared_secret_ex(curve448_key* private_key, + curve448_key* public_key, + byte* out, word32* outLen, int endian) +{ + unsigned char o[CURVE448_PUB_KEY_SIZE]; + int ret = 0; + int i; + + /* sanity check */ + if ((private_key == NULL) || (public_key == NULL) || (out == NULL) || + (outLen == NULL) || (*outLen < CURVE448_PUB_KEY_SIZE)) { + ret = BAD_FUNC_ARG; + } + + if (ret == 0) { + ret = curve448(o, private_key->k, public_key->p); + } + if (ret == 0) { + if (endian == EC448_BIG_ENDIAN) { + /* put shared secret key in Big Endian format */ + for (i = 0; i < CURVE448_PUB_KEY_SIZE; i++) { + out[i] = o[CURVE448_PUB_KEY_SIZE - i -1]; + } + } + else { + /* put shared secret key in Little Endian format */ + XMEMCPY(out, o, CURVE448_PUB_KEY_SIZE); + } + + *outLen = CURVE448_PUB_KEY_SIZE; + } + + ForceZero(o, CURVE448_PUB_KEY_SIZE); + + return ret; +} + +#endif /* HAVE_CURVE448_SHARED_SECRET */ + +#ifdef HAVE_CURVE448_KEY_EXPORT + +/* Export the curve448 public key. + * Public key encoded big-endian. + * + * key [in] Curve448 public key. + * out [in] Array to hold public key. + * outLen [in/out] On in, the number of bytes in array. + * On out, the number bytes put into array. + * returns BAD_FUNC_ARG when a parameter is NULL, + * ECC_BAD_ARG_E when outLen is less than CURVE448_PUB_KEY_SIZE, + * 0 otherwise. + */ +int wc_curve448_export_public(curve448_key* key, byte* out, word32* outLen) +{ + return wc_curve448_export_public_ex(key, out, outLen, EC448_BIG_ENDIAN); +} + +/* Export the curve448 public key. + * + * key [in] Curve448 public key. + * out [in] Array to hold public key. + * outLen [in/out] On in, the number of bytes in array. + * On out, the number bytes put into array. + * endian [in] Endianness to use when encoding number in array. + * returns BAD_FUNC_ARG when a parameter is NULL, + * ECC_BAD_ARG_E when outLen is less than CURVE448_PUB_KEY_SIZE, + * 0 otherwise. + */ +int wc_curve448_export_public_ex(curve448_key* key, byte* out, word32* outLen, + int endian) +{ + int ret = 0; + int i; + + if ((key == NULL) || (out == NULL) || (outLen == NULL)) { + ret = BAD_FUNC_ARG; + } + + /* check and set outgoing key size */ + if ((ret == 0) && (*outLen < CURVE448_PUB_KEY_SIZE)) { + *outLen = CURVE448_PUB_KEY_SIZE; + ret = ECC_BAD_ARG_E; + } + if (ret == 0) { + *outLen = CURVE448_PUB_KEY_SIZE; + + if (endian == EC448_BIG_ENDIAN) { + /* read keys in Big Endian format */ + for (i = 0; i < CURVE448_PUB_KEY_SIZE; i++) { + out[i] = key->p[CURVE448_PUB_KEY_SIZE - i - 1]; + } + } + else { + XMEMCPY(out, key->p, CURVE448_PUB_KEY_SIZE); + } + } + + return ret; +} + +#endif /* HAVE_CURVE448_KEY_EXPORT */ + +#ifdef HAVE_CURVE448_KEY_IMPORT + +/* Import a curve448 public key from a byte array. + * Public key encoded in big-endian. + * + * in [in] Array holding public key. + * inLen [in] Number of bytes of data in array. + * key [in] Curve448 public key. + * returns BAD_FUNC_ARG when a parameter is NULL, + * ECC_BAD_ARG_E when inLen is less than CURVE448_PUB_KEY_SIZE, + * 0 otherwise. + */ +int wc_curve448_import_public(const byte* in, word32 inLen, curve448_key* key) +{ + return wc_curve448_import_public_ex(in, inLen, key, EC448_BIG_ENDIAN); +} + +/* Import a curve448 public key from a byte array. + * + * in [in] Array holding public key. + * inLen [in] Number of bytes of data in array. + * key [in] Curve448 public key. + * endian [in] Endianness of encoded number in byte array. + * returns BAD_FUNC_ARG when a parameter is NULL, + * ECC_BAD_ARG_E when inLen is less than CURVE448_PUB_KEY_SIZE, + * 0 otherwise. + */ +int wc_curve448_import_public_ex(const byte* in, word32 inLen, + curve448_key* key, int endian) +{ + int ret = 0; + int i; + + /* sanity check */ + if ((key == NULL) || (in == NULL)) { + ret = BAD_FUNC_ARG; + } + + /* check size of incoming keys */ + if ((ret == 0) && (inLen != CURVE448_PUB_KEY_SIZE)) { + ret = ECC_BAD_ARG_E; + } + + if (ret == 0) { + if (endian == EC448_BIG_ENDIAN) { + /* read keys in Big Endian format */ + for (i = 0; i < CURVE448_PUB_KEY_SIZE; i++) { + key->p[i] = in[CURVE448_PUB_KEY_SIZE - i - 1]; + } + } + else + XMEMCPY(key->p, in, inLen); + } + + return ret; +} + +/* Check the public key value (big or little endian) + * + * pub [in] Public key bytes. + * pubSz [in] Size of public key in bytes. + * endian [in] Public key bytes passed in as big-endian or little-endian. + * returns BAD_FUNC_ARGS when pub is NULL, + * ECC_BAD_ARG_E when key length is not 56 bytes, public key value is + * zero or one; + * BUFFER_E when size of public key is zero; + * 0 otherwise. + */ +int wc_curve448_check_public(const byte* pub, word32 pubSz, int endian) +{ + int ret = 0; + word32 i; + + if (pub == NULL) { + ret = BAD_FUNC_ARG; + } + + /* Check for empty key data */ + if ((ret == 0) && (pubSz == 0)) { + ret = BUFFER_E; + } + + /* Check key length */ + if ((ret == 0) && (pubSz != CURVE448_PUB_KEY_SIZE)) { + ret = ECC_BAD_ARG_E; + } + + if (ret == 0) { + if (endian == EC448_LITTLE_ENDIAN) { + /* Check for value of zero or one */ + for (i = pubSz - 1; i > 0; i--) { + if (pub[i] != 0) { + break; + } + } + if ((i == 0) && (pub[0] == 0 || pub[0] == 1)) { + return ECC_BAD_ARG_E; + } + } + else { + /* Check for value of zero or one */ + for (i = 0; i < pubSz-1; i++) { + if (pub[i] != 0) { + break; + } + } + if ((i == pubSz - 1) && (pub[i] == 0 || pub[i] == 1)) { + ret = ECC_BAD_ARG_E; + } + } + } + + return ret; +} + +#endif /* HAVE_CURVE448_KEY_IMPORT */ + + +#ifdef HAVE_CURVE448_KEY_EXPORT + +/* Export the curve448 private key raw form. + * Private key encoded big-endian. + * + * key [in] Curve448 private key. + * out [in] Array to hold private key. + * outLen [in/out] On in, the number of bytes in array. + * On out, the number bytes put into array. + * returns BAD_FUNC_ARG when a parameter is NULL, + * ECC_BAD_ARG_E when outLen is less than CURVE448_KEY_SIZE, + * 0 otherwise. + */ +int wc_curve448_export_private_raw(curve448_key* key, byte* out, word32* outLen) +{ + return wc_curve448_export_private_raw_ex(key, out, outLen, + EC448_BIG_ENDIAN); +} + +/* Export the curve448 private key raw form. + * + * key [in] Curve448 private key. + * out [in] Array to hold private key. + * outLen [in/out] On in, the number of bytes in array. + * On out, the number bytes put into array. + * endian [in] Endianness to use when encoding number in array. + * returns BAD_FUNC_ARG when a parameter is NULL, + * ECC_BAD_ARG_E when outLen is less than CURVE448_KEY_SIZE, + * 0 otherwise. + */ +int wc_curve448_export_private_raw_ex(curve448_key* key, byte* out, + word32* outLen, int endian) +{ + int ret = 0; + int i; + + /* sanity check */ + if ((key == NULL) || (out == NULL) || (outLen == NULL)) { + ret = BAD_FUNC_ARG; + } + + /* check size of outgoing buffer */ + if ((ret == 0) && (*outLen < CURVE448_KEY_SIZE)) { + *outLen = CURVE448_KEY_SIZE; + ret = ECC_BAD_ARG_E; + } + if (ret == 0) { + *outLen = CURVE448_KEY_SIZE; + + if (endian == EC448_BIG_ENDIAN) { + /* put the key in Big Endian format */ + for (i = 0; i < CURVE448_KEY_SIZE; i++) { + out[i] = key->k[CURVE448_KEY_SIZE - i - 1]; + } + } + else { + XMEMCPY(out, key->k, CURVE448_KEY_SIZE); + } + } + + return ret; +} + +/* Export the curve448 private and public keys in raw form. + * Private and public key encoded big-endian. + * + * key [in] Curve448 private key. + * priv [in] Array to hold private key. + * privSz [in/out] On in, the number of bytes in private key array. + * On out, the number bytes put into private key array. + * pub [in] Array to hold public key. + * pubSz [in/out] On in, the number of bytes in public key array. + * On out, the number bytes put into public key array. + * returns BAD_FUNC_ARG when a parameter is NULL, + * ECC_BAD_ARG_E when privSz is less than CURVE448_KEY_SIZE or pubSz is + * less than CURVE448_PUB_KEY_SIZE, + * 0 otherwise. + */ +int wc_curve448_export_key_raw(curve448_key* key, byte* priv, word32 *privSz, + byte* pub, word32 *pubSz) +{ + return wc_curve448_export_key_raw_ex(key, priv, privSz, pub, pubSz, + EC448_BIG_ENDIAN); +} + +/* Export the curve448 private and public keys in raw form. + * + * key [in] Curve448 private key. + * priv [in] Array to hold private key. + * privSz [in/out] On in, the number of bytes in private key array. + * On out, the number bytes put into private key array. + * pub [in] Array to hold public key. + * pubSz [in/out] On in, the number of bytes in public key array. + * On out, the number bytes put into public key array. + * endian [in] Endianness to use when encoding number in array. + * returns BAD_FUNC_ARG when a parameter is NULL, + * ECC_BAD_ARG_E when privSz is less than CURVE448_KEY_SIZE or pubSz is + * less than CURVE448_PUB_KEY_SIZE, + * 0 otherwise. + */ +int wc_curve448_export_key_raw_ex(curve448_key* key, byte* priv, word32 *privSz, + byte* pub, word32 *pubSz, int endian) +{ + int ret; + + /* export private part */ + ret = wc_curve448_export_private_raw_ex(key, priv, privSz, endian); + if (ret == 0) { + /* export public part */ + ret = wc_curve448_export_public_ex(key, pub, pubSz, endian); + } + + return ret; +} + +#endif /* HAVE_CURVE448_KEY_EXPORT */ + +#ifdef HAVE_CURVE448_KEY_IMPORT + +/* Import curve448 private and public keys from a byte arrays. + * Private and public keys encoded in big-endian. + * + * piv [in] Array holding private key. + * privSz [in] Number of bytes of data in private key array. + * pub [in] Array holding public key. + * pubSz [in] Number of bytes of data in public key array. + * key [in] Curve448 private/public key. + * returns BAD_FUNC_ARG when a parameter is NULL, + * ECC_BAD_ARG_E when privSz is less than CURVE448_KEY_SIZE or pubSz is + * less than CURVE448_PUB_KEY_SIZE, + * 0 otherwise. + */ +int wc_curve448_import_private_raw(const byte* priv, word32 privSz, + const byte* pub, word32 pubSz, + curve448_key* key) +{ + return wc_curve448_import_private_raw_ex(priv, privSz, pub, pubSz, key, + EC448_BIG_ENDIAN); +} + +/* Import curve448 private and public keys from a byte arrays. + * + * piv [in] Array holding private key. + * privSz [in] Number of bytes of data in private key array. + * pub [in] Array holding public key. + * pubSz [in] Number of bytes of data in public key array. + * key [in] Curve448 private/public key. + * endian [in] Endianness of encoded numbers in byte arrays. + * returns BAD_FUNC_ARG when a parameter is NULL, + * ECC_BAD_ARG_E when privSz is less than CURVE448_KEY_SIZE or pubSz is + * less than CURVE448_PUB_KEY_SIZE, + * 0 otherwise. + */ +int wc_curve448_import_private_raw_ex(const byte* priv, word32 privSz, + const byte* pub, word32 pubSz, + curve448_key* key, int endian) +{ + int ret; + + /* import private part */ + ret = wc_curve448_import_private_ex(priv, privSz, key, endian); + if (ret == 0) { + /* import public part */ + return wc_curve448_import_public_ex(pub, pubSz, key, endian); + } + + return ret; +} + +/* Import curve448 private key from a byte array. + * Private key encoded in big-endian. + * + * piv [in] Array holding private key. + * privSz [in] Number of bytes of data in private key array. + * key [in] Curve448 private/public key. + * returns BAD_FUNC_ARG when a parameter is NULL, + * ECC_BAD_ARG_E when privSz is less than CURVE448_KEY_SIZE, + * 0 otherwise. + */ +int wc_curve448_import_private(const byte* priv, word32 privSz, + curve448_key* key) +{ + return wc_curve448_import_private_ex(priv, privSz, key, EC448_BIG_ENDIAN); +} + +/* Import curve448 private key from a byte array. + * + * piv [in] Array holding private key. + * privSz [in] Number of bytes of data in private key array. + * key [in] Curve448 private/public key. + * endian [in] Endianness of encoded number in byte array. + * returns BAD_FUNC_ARG when a parameter is NULL, + * ECC_BAD_ARG_E when privSz is less than CURVE448_KEY_SIZE, + * 0 otherwise. + */ +int wc_curve448_import_private_ex(const byte* priv, word32 privSz, + curve448_key* key, int endian) +{ + int ret = 0; + int i; + + /* sanity check */ + if ((key == NULL) || (priv == NULL)) { + ret = BAD_FUNC_ARG; + } + + /* check size of incoming keys */ + if ((ret == 0) && ((int)privSz != CURVE448_KEY_SIZE)) { + ret = ECC_BAD_ARG_E; + } + + if (ret == 0) { + if (endian == EC448_BIG_ENDIAN) { + /* read the key in Big Endian format */ + for (i = 0; i < CURVE448_KEY_SIZE; i++) { + key->k[i] = priv[CURVE448_KEY_SIZE - i - 1]; + } + } + else { + XMEMCPY(key->k, priv, CURVE448_KEY_SIZE); + } + + /* Clamp the key */ + key->k[0] &= 0xfc; + key->k[CURVE448_KEY_SIZE-1] |= 0x80; + } + + return ret; +} + +#endif /* HAVE_CURVE448_KEY_IMPORT */ + + +/* Initialize the curve448 key. + * + * key [in] Curve448 key object. + * returns BAD_FUNC_ARG when key is NULL, + * 0 otherwise. + */ +int wc_curve448_init(curve448_key* key) +{ + int ret = 0; + + if (key == NULL) { + ret = BAD_FUNC_ARG; + } + + if (ret == 0) { + XMEMSET(key, 0, sizeof(*key)); + + fe448_init(); + } + + return ret; +} + + +/* Clears the curve448 key data. + * + * key [in] Curve448 key object. + */ +void wc_curve448_free(curve448_key* key) +{ + if (key != NULL) { + ForceZero(key->p, sizeof(key->p)); + ForceZero(key->k, sizeof(key->k)); + } +} + + +/* Get the curve448 key's size. + * + * key [in] Curve448 key object. + * returns 0 if key is NULL, + * CURVE448_KEY_SIZE otherwise. + */ +int wc_curve448_size(curve448_key* key) +{ + int ret = 0; + + if (key != NULL) { + ret = CURVE448_KEY_SIZE; + } + + return ret; +} + +#endif /* HAVE_CURVE448 */ + diff --git a/client/wolfssl/wolfcrypt/src/des3.c b/client/wolfssl/wolfcrypt/src/des3.c new file mode 100644 index 0000000..b4b0187 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/des3.c @@ -0,0 +1,1825 @@ +/* des3.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include +#include +#include + + +#ifndef NO_DES3 + +#if defined(HAVE_FIPS) && \ + defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2) + + /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */ + #define FIPS_NO_WRAPPERS + + #ifdef USE_WINDOWS_API + #pragma code_seg(".fipsA$i") + #pragma const_seg(".fipsB$i") + #endif +#endif + +#include + +#ifdef WOLF_CRYPTO_CB + #include +#endif + +/* fips wrapper calls, user can call direct */ +#if defined(HAVE_FIPS) && \ + (!defined(HAVE_FIPS_VERSION) || (HAVE_FIPS_VERSION < 2)) + + int wc_Des_SetKey(Des* des, const byte* key, const byte* iv, int dir) + { + return Des_SetKey(des, key, iv, dir); + } + int wc_Des3_SetKey(Des3* des, const byte* key, const byte* iv, int dir) + { + if (des == NULL || key == NULL || dir < 0) { + return BAD_FUNC_ARG; + } + + return Des3_SetKey_fips(des, key, iv, dir); + } + int wc_Des_CbcEncrypt(Des* des, byte* out, const byte* in, word32 sz) + { + return Des_CbcEncrypt(des, out, in, sz); + } + int wc_Des_CbcDecrypt(Des* des, byte* out, const byte* in, word32 sz) + { + return Des_CbcDecrypt(des, out, in, sz); + } + int wc_Des3_CbcEncrypt(Des3* des, byte* out, const byte* in, word32 sz) + { + if (des == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + return Des3_CbcEncrypt_fips(des, out, in, sz); + } + int wc_Des3_CbcDecrypt(Des3* des, byte* out, const byte* in, word32 sz) + { + if (des == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + return Des3_CbcDecrypt_fips(des, out, in, sz); + } + + #ifdef WOLFSSL_DES_ECB + /* One block, compatibility only */ + int wc_Des_EcbEncrypt(Des* des, byte* out, const byte* in, word32 sz) + { + return Des_EcbEncrypt(des, out, in, sz); + } + int wc_Des3_EcbEncrypt(Des3* des, byte* out, const byte* in, word32 sz) + { + return Des3_EcbEncrypt(des, out, in, sz); + } + #endif /* WOLFSSL_DES_ECB */ + + void wc_Des_SetIV(Des* des, const byte* iv) + { + Des_SetIV(des, iv); + } + int wc_Des3_SetIV(Des3* des, const byte* iv) + { + return Des3_SetIV_fips(des, iv); + } + + int wc_Des3Init(Des3* des3, void* heap, int devId) + { + (void)des3; + (void)heap; + (void)devId; + /* FIPS doesn't support: + return Des3Init(des3, heap, devId); */ + return 0; + } + void wc_Des3Free(Des3* des3) + { + (void)des3; + /* FIPS doesn't support: + Des3Free(des3); */ + } + +#else /* else build without fips, or for FIPS v2 */ + + +#if defined(WOLFSSL_TI_CRYPT) + #include +#else + + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + + +/* Hardware Acceleration */ +#if defined(STM32_CRYPTO) + + /* + * STM32F2/F4 hardware DES/3DES support through the standard + * peripheral library. (See note in README). + */ + + int wc_Des_SetKey(Des* des, const byte* key, const byte* iv, int dir) + { + word32 *dkey = des->key; + + (void)dir; + + XMEMCPY(dkey, key, 8); + #ifndef WOLFSSL_STM32_CUBEMX + ByteReverseWords(dkey, dkey, 8); + #endif + + wc_Des_SetIV(des, iv); + + return 0; + } + + int wc_Des3_SetKey(Des3* des, const byte* key, const byte* iv, int dir) + { + if (des == NULL || key == NULL) + return BAD_FUNC_ARG; + + (void)dir; + + #ifndef WOLFSSL_STM32_CUBEMX + { + word32 *dkey1 = des->key[0]; + word32 *dkey2 = des->key[1]; + word32 *dkey3 = des->key[2]; + + XMEMCPY(dkey1, key, 8); /* set key 1 */ + XMEMCPY(dkey2, key + 8, 8); /* set key 2 */ + XMEMCPY(dkey3, key + 16, 8); /* set key 3 */ + + ByteReverseWords(dkey1, dkey1, 8); + ByteReverseWords(dkey2, dkey2, 8); + ByteReverseWords(dkey3, dkey3, 8); + } + #else + XMEMCPY(des->key[0], key, DES3_KEYLEN); /* CUBEMX wants keys in sequential memory */ + #endif + + return wc_Des3_SetIV(des, iv); + } + + static void DesCrypt(Des* des, byte* out, const byte* in, word32 sz, + int dir, int mode) + { + int ret; + #ifdef WOLFSSL_STM32_CUBEMX + CRYP_HandleTypeDef hcryp; + #else + word32 *dkey, *iv; + CRYP_InitTypeDef DES_CRYP_InitStructure; + CRYP_KeyInitTypeDef DES_CRYP_KeyInitStructure; + CRYP_IVInitTypeDef DES_CRYP_IVInitStructure; + #endif + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return; + } + + #ifdef WOLFSSL_STM32_CUBEMX + XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef)); + hcryp.Instance = CRYP; + hcryp.Init.KeySize = CRYP_KEYSIZE_128B; + hcryp.Init.DataType = CRYP_DATATYPE_8B; + hcryp.Init.pKey = (uint8_t*)des->key; + hcryp.Init.pInitVect = (uint8_t*)des->reg; + + HAL_CRYP_Init(&hcryp); + + while (sz > 0) { + /* if input and output same will overwrite input iv */ + XMEMCPY(des->tmp, in + sz - DES_BLOCK_SIZE, DES_BLOCK_SIZE); + + if (mode == DES_CBC) { + if (dir == DES_ENCRYPTION) { + HAL_CRYP_DESCBC_Encrypt(&hcryp, (uint8_t*)in, + DES_BLOCK_SIZE, out, STM32_HAL_TIMEOUT); + } + else { + HAL_CRYP_DESCBC_Decrypt(&hcryp, (uint8_t*)in, + DES_BLOCK_SIZE, out, STM32_HAL_TIMEOUT); + } + } + else { + if (dir == DES_ENCRYPTION) { + HAL_CRYP_DESECB_Encrypt(&hcryp, (uint8_t*)in, + DES_BLOCK_SIZE, out, STM32_HAL_TIMEOUT); + } + else { + HAL_CRYP_DESECB_Decrypt(&hcryp, (uint8_t*)in, + DES_BLOCK_SIZE, out, STM32_HAL_TIMEOUT); + } + } + + /* store iv for next call */ + XMEMCPY(des->reg, des->tmp, DES_BLOCK_SIZE); + + sz -= DES_BLOCK_SIZE; + in += DES_BLOCK_SIZE; + out += DES_BLOCK_SIZE; + } + + HAL_CRYP_DeInit(&hcryp); + #else + dkey = des->key; + iv = des->reg; + + /* crypto structure initialization */ + CRYP_KeyStructInit(&DES_CRYP_KeyInitStructure); + CRYP_StructInit(&DES_CRYP_InitStructure); + CRYP_IVStructInit(&DES_CRYP_IVInitStructure); + + /* reset registers to their default values */ + CRYP_DeInit(); + + /* set direction, mode, and datatype */ + if (dir == DES_ENCRYPTION) { + DES_CRYP_InitStructure.CRYP_AlgoDir = CRYP_AlgoDir_Encrypt; + } else { /* DES_DECRYPTION */ + DES_CRYP_InitStructure.CRYP_AlgoDir = CRYP_AlgoDir_Decrypt; + } + + if (mode == DES_CBC) { + DES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_DES_CBC; + } else { /* DES_ECB */ + DES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_DES_ECB; + } + + DES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b; + CRYP_Init(&DES_CRYP_InitStructure); + + /* load key into correct registers */ + DES_CRYP_KeyInitStructure.CRYP_Key1Left = dkey[0]; + DES_CRYP_KeyInitStructure.CRYP_Key1Right = dkey[1]; + CRYP_KeyInit(&DES_CRYP_KeyInitStructure); + + /* set iv */ + ByteReverseWords(iv, iv, DES_BLOCK_SIZE); + DES_CRYP_IVInitStructure.CRYP_IV0Left = iv[0]; + DES_CRYP_IVInitStructure.CRYP_IV0Right = iv[1]; + CRYP_IVInit(&DES_CRYP_IVInitStructure); + + /* enable crypto processor */ + CRYP_Cmd(ENABLE); + + while (sz > 0) { + /* flush IN/OUT FIFOs */ + CRYP_FIFOFlush(); + + /* if input and output same will overwrite input iv */ + XMEMCPY(des->tmp, in + sz - DES_BLOCK_SIZE, DES_BLOCK_SIZE); + + CRYP_DataIn(*(uint32_t*)&in[0]); + CRYP_DataIn(*(uint32_t*)&in[4]); + + /* wait until the complete message has been processed */ + while(CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {} + + *(uint32_t*)&out[0] = CRYP_DataOut(); + *(uint32_t*)&out[4] = CRYP_DataOut(); + + /* store iv for next call */ + XMEMCPY(des->reg, des->tmp, DES_BLOCK_SIZE); + + sz -= DES_BLOCK_SIZE; + in += DES_BLOCK_SIZE; + out += DES_BLOCK_SIZE; + } + + /* disable crypto processor */ + CRYP_Cmd(DISABLE); + #endif /* WOLFSSL_STM32_CUBEMX */ + wolfSSL_CryptHwMutexUnLock(); + } + + int wc_Des_CbcEncrypt(Des* des, byte* out, const byte* in, word32 sz) + { + DesCrypt(des, out, in, sz, DES_ENCRYPTION, DES_CBC); + return 0; + } + + int wc_Des_CbcDecrypt(Des* des, byte* out, const byte* in, word32 sz) + { + DesCrypt(des, out, in, sz, DES_DECRYPTION, DES_CBC); + return 0; + } + + int wc_Des_EcbEncrypt(Des* des, byte* out, const byte* in, word32 sz) + { + DesCrypt(des, out, in, sz, DES_ENCRYPTION, DES_ECB); + return 0; + } + + static void Des3Crypt(Des3* des, byte* out, const byte* in, word32 sz, + int dir) + { + if (des == NULL || out == NULL || in == NULL) + return BAD_FUNC_ARG; + + #ifdef WOLFSSL_STM32_CUBEMX + { + CRYP_HandleTypeDef hcryp; + + XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef)); + hcryp.Instance = CRYP; + hcryp.Init.KeySize = CRYP_KEYSIZE_128B; + hcryp.Init.DataType = CRYP_DATATYPE_8B; + hcryp.Init.pKey = (uint8_t*)des->key; + hcryp.Init.pInitVect = (uint8_t*)des->reg; + + HAL_CRYP_Init(&hcryp); + + while (sz > 0) + { + if (dir == DES_ENCRYPTION) { + HAL_CRYP_TDESCBC_Encrypt(&hcryp, (byte*)in, + DES_BLOCK_SIZE, out, STM32_HAL_TIMEOUT); + } + else { + HAL_CRYP_TDESCBC_Decrypt(&hcryp, (byte*)in, + DES_BLOCK_SIZE, out, STM32_HAL_TIMEOUT); + } + + /* store iv for next call */ + XMEMCPY(des->reg, out + sz - DES_BLOCK_SIZE, DES_BLOCK_SIZE); + + sz -= DES_BLOCK_SIZE; + in += DES_BLOCK_SIZE; + out += DES_BLOCK_SIZE; + } + + HAL_CRYP_DeInit(&hcryp); + } + #else + { + word32 *dkey1, *dkey2, *dkey3, *iv; + CRYP_InitTypeDef DES3_CRYP_InitStructure; + CRYP_KeyInitTypeDef DES3_CRYP_KeyInitStructure; + CRYP_IVInitTypeDef DES3_CRYP_IVInitStructure; + + dkey1 = des->key[0]; + dkey2 = des->key[1]; + dkey3 = des->key[2]; + iv = des->reg; + + /* crypto structure initialization */ + CRYP_KeyStructInit(&DES3_CRYP_KeyInitStructure); + CRYP_StructInit(&DES3_CRYP_InitStructure); + CRYP_IVStructInit(&DES3_CRYP_IVInitStructure); + + /* reset registers to their default values */ + CRYP_DeInit(); + + /* set direction, mode, and datatype */ + if (dir == DES_ENCRYPTION) { + DES3_CRYP_InitStructure.CRYP_AlgoDir = CRYP_AlgoDir_Encrypt; + } else { + DES3_CRYP_InitStructure.CRYP_AlgoDir = CRYP_AlgoDir_Decrypt; + } + + DES3_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_TDES_CBC; + DES3_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b; + CRYP_Init(&DES3_CRYP_InitStructure); + + /* load key into correct registers */ + DES3_CRYP_KeyInitStructure.CRYP_Key1Left = dkey1[0]; + DES3_CRYP_KeyInitStructure.CRYP_Key1Right = dkey1[1]; + DES3_CRYP_KeyInitStructure.CRYP_Key2Left = dkey2[0]; + DES3_CRYP_KeyInitStructure.CRYP_Key2Right = dkey2[1]; + DES3_CRYP_KeyInitStructure.CRYP_Key3Left = dkey3[0]; + DES3_CRYP_KeyInitStructure.CRYP_Key3Right = dkey3[1]; + CRYP_KeyInit(&DES3_CRYP_KeyInitStructure); + + /* set iv */ + ByteReverseWords(iv, iv, DES_BLOCK_SIZE); + DES3_CRYP_IVInitStructure.CRYP_IV0Left = iv[0]; + DES3_CRYP_IVInitStructure.CRYP_IV0Right = iv[1]; + CRYP_IVInit(&DES3_CRYP_IVInitStructure); + + /* enable crypto processor */ + CRYP_Cmd(ENABLE); + + while (sz > 0) + { + /* flush IN/OUT FIFOs */ + CRYP_FIFOFlush(); + + CRYP_DataIn(*(uint32_t*)&in[0]); + CRYP_DataIn(*(uint32_t*)&in[4]); + + /* wait until the complete message has been processed */ + while(CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {} + + *(uint32_t*)&out[0] = CRYP_DataOut(); + *(uint32_t*)&out[4] = CRYP_DataOut(); + + /* store iv for next call */ + XMEMCPY(des->reg, out + sz - DES_BLOCK_SIZE, DES_BLOCK_SIZE); + + sz -= DES_BLOCK_SIZE; + in += DES_BLOCK_SIZE; + out += DES_BLOCK_SIZE; + } + + /* disable crypto processor */ + CRYP_Cmd(DISABLE); + } + #endif /* WOLFSSL_STM32_CUBEMX */ + } + + int wc_Des3_CbcEncrypt(Des3* des, byte* out, const byte* in, word32 sz) + { + Des3Crypt(des, out, in, sz, DES_ENCRYPTION); + return 0; + } + + int wc_Des3_CbcDecrypt(Des3* des, byte* out, const byte* in, word32 sz) + { + Des3Crypt(des, out, in, sz, DES_DECRYPTION); + return 0; + } + +#elif defined(HAVE_COLDFIRE_SEC) + + #include + + #include "sec.h" + #include "mcf5475_sec.h" + #include "mcf5475_siu.h" + + #if defined (HAVE_THREADX) + #include "memory_pools.h" + extern TX_BYTE_POOL mp_ncached; /* Non Cached memory pool */ + #endif + + #define DES_BUFFER_SIZE (DES_BLOCK_SIZE * 64) + static unsigned char *desBuffIn = NULL; + static unsigned char *desBuffOut = NULL; + static byte *secIV; + static byte *secKey; + static volatile SECdescriptorType *secDesc; + + static wolfSSL_Mutex Mutex_DesSEC; + + #define SEC_DESC_DES_CBC_ENCRYPT 0x20500010 + #define SEC_DESC_DES_CBC_DECRYPT 0x20400010 + #define SEC_DESC_DES3_CBC_ENCRYPT 0x20700010 + #define SEC_DESC_DES3_CBC_DECRYPT 0x20600010 + + #define DES_IVLEN 8 + #define DES_KEYLEN 8 + #define DES3_IVLEN 8 + #define DES3_KEYLEN 24 + + extern volatile unsigned char __MBAR[]; + + static void wc_Des_Cbc(byte* out, const byte* in, word32 sz, + byte *key, byte *iv, word32 desc) + { + #ifdef DEBUG_WOLFSSL + int ret; int stat1,stat2; + #endif + int size; + volatile int v; + + wc_LockMutex(&Mutex_DesSEC) ; + + secDesc->length1 = 0x0; + secDesc->pointer1 = NULL; + if((desc==SEC_DESC_DES_CBC_ENCRYPT)||(desc==SEC_DESC_DES_CBC_DECRYPT)){ + secDesc->length2 = DES_IVLEN; + secDesc->length3 = DES_KEYLEN; + } else { + secDesc->length2 = DES3_IVLEN; + secDesc->length3 = DES3_KEYLEN; + } + secDesc->pointer2 = secIV; + secDesc->pointer3 = secKey; + secDesc->pointer4 = desBuffIn; + secDesc->pointer5 = desBuffOut; + secDesc->length6 = 0; + secDesc->pointer6 = NULL; + secDesc->length7 = 0x0; + secDesc->pointer7 = NULL; + secDesc->nextDescriptorPtr = NULL; + + while(sz) { + XMEMCPY(secIV, iv, secDesc->length2); + if((sz%DES_BUFFER_SIZE) == sz) { + size = sz; + sz = 0; + } else { + size = DES_BUFFER_SIZE; + sz -= DES_BUFFER_SIZE; + } + + XMEMCPY(desBuffIn, in, size); + XMEMCPY(secKey, key, secDesc->length3); + + secDesc->header = desc; + secDesc->length4 = size; + secDesc->length5 = size; + /* Point SEC to the location of the descriptor */ + MCF_SEC_FR0 = (uint32)secDesc; + /* Initialize SEC and wait for encryption to complete */ + MCF_SEC_CCCR0 = 0x0000001a; + /* poll SISR to determine when channel is complete */ + v=0; + while((secDesc->header>> 24) != 0xff) { + if(v++ > 1000)break; + } + + #ifdef DEBUG_WOLFSSL + ret = MCF_SEC_SISRH; + stat1 = MCF_SEC_DSR; + stat2 = MCF_SEC_DISR; + if(ret & 0xe0000000) { + /* db_printf("Des_Cbc(%x):ISRH=%08x, DSR=%08x, DISR=%08x\n", desc, ret, stat1, stat2); */ + } + #endif + + XMEMCPY(out, desBuffOut, size); + + if ((desc==SEC_DESC_DES3_CBC_ENCRYPT)||(desc==SEC_DESC_DES_CBC_ENCRYPT)) { + XMEMCPY((void*)iv, (void*)&(out[size-secDesc->length2]), secDesc->length2); + } else { + XMEMCPY((void*)iv, (void*)&(in[size-secDesc->length2]), secDesc->length2); + } + + in += size; + out += size; + + } + wc_UnLockMutex(&Mutex_DesSEC) ; + + } + + + int wc_Des_CbcEncrypt(Des* des, byte* out, const byte* in, word32 sz) + { + wc_Des_Cbc(out, in, sz, (byte *)des->key, (byte *)des->reg, SEC_DESC_DES_CBC_ENCRYPT); + return 0; + } + + int wc_Des_CbcDecrypt(Des* des, byte* out, const byte* in, word32 sz) + { + wc_Des_Cbc(out, in, sz, (byte *)des->key, (byte *)des->reg, SEC_DESC_DES_CBC_DECRYPT); + return 0; + } + + int wc_Des3_CbcEncrypt(Des3* des3, byte* out, const byte* in, word32 sz) + { + wc_Des_Cbc(out, in, sz, (byte *)des3->key, (byte *)des3->reg, SEC_DESC_DES3_CBC_ENCRYPT); + return 0; + } + + + int wc_Des3_CbcDecrypt(Des3* des3, byte* out, const byte* in, word32 sz) + { + wc_Des_Cbc(out, in, sz, (byte *)des3->key, (byte *)des3->reg, SEC_DESC_DES3_CBC_DECRYPT); + return 0; + } + + static void setParity(byte *buf, int len) + { + int i, j; + byte v; + int bits; + + for (i=0; i> 1; + buf[i] = v << 1; + bits = 0; + for (j=0; j<7; j++) { + bits += (v&0x1); + v = v >> 1; + } + buf[i] |= (1 - (bits&0x1)); + } + + } + + int wc_Des_SetKey(Des* des, const byte* key, const byte* iv, int dir) + { + if(desBuffIn == NULL) { + #if defined (HAVE_THREADX) + int s1, s2, s3, s4, s5; + s5 = tx_byte_allocate(&mp_ncached,(void *)&secDesc, + sizeof(SECdescriptorType), TX_NO_WAIT); + s1 = tx_byte_allocate(&mp_ncached,(void *)&desBuffIn, DES_BUFFER_SIZE, TX_NO_WAIT); + s2 = tx_byte_allocate(&mp_ncached,(void *)&desBuffOut, DES_BUFFER_SIZE, TX_NO_WAIT); + /* Don't know des or des3 to be used. Allocate larger buffers */ + s3 = tx_byte_allocate(&mp_ncached,(void *)&secKey, DES3_KEYLEN,TX_NO_WAIT); + s4 = tx_byte_allocate(&mp_ncached,(void *)&secIV, DES3_IVLEN, TX_NO_WAIT); + #else + #warning "Allocate non-Cache buffers" + #endif + + InitMutex(&Mutex_DesSEC); + } + + XMEMCPY(des->key, key, DES_KEYLEN); + setParity((byte *)des->key, DES_KEYLEN); + + if (iv) { + XMEMCPY(des->reg, iv, DES_IVLEN); + } else { + XMEMSET(des->reg, 0x0, DES_IVLEN); + } + return 0; + } + + int wc_Des3_SetKey(Des3* des3, const byte* key, const byte* iv, int dir) + { + if (des3 == NULL || key == NULL) { + return BAD_FUNC_ARG; + } + + if (desBuffIn == NULL) { + #if defined (HAVE_THREADX) + int s1, s2, s3, s4, s5; + s5 = tx_byte_allocate(&mp_ncached,(void *)&secDesc, + sizeof(SECdescriptorType), TX_NO_WAIT); + s1 = tx_byte_allocate(&mp_ncached,(void *)&desBuffIn, DES_BUFFER_SIZE, TX_NO_WAIT); + s2 = tx_byte_allocate(&mp_ncached,(void *)&desBuffOut, DES_BUFFER_SIZE, TX_NO_WAIT); + s3 = tx_byte_allocate(&mp_ncached,(void *)&secKey, DES3_KEYLEN,TX_NO_WAIT); + s4 = tx_byte_allocate(&mp_ncached,(void *)&secIV, DES3_IVLEN, TX_NO_WAIT); + #else + #warning "Allocate non-Cache buffers" + #endif + + InitMutex(&Mutex_DesSEC); + } + + XMEMCPY(des3->key[0], key, DES3_KEYLEN); + setParity((byte *)des3->key[0], DES3_KEYLEN); + + if (iv) { + XMEMCPY(des3->reg, iv, DES3_IVLEN); + } else { + XMEMSET(des3->reg, 0x0, DES3_IVLEN); + } + return 0; + + } +#elif defined(FREESCALE_LTC_DES) + + #include "fsl_ltc.h" + int wc_Des_SetKey(Des* des, const byte* key, const byte* iv, int dir) + { + byte* dkey; + + if (des == NULL || key == NULL) { + return BAD_FUNC_ARG; + } + + dkey = (byte*)des->key; + + XMEMCPY(dkey, key, 8); + + wc_Des_SetIV(des, iv); + + return 0; + } + + int wc_Des3_SetKey(Des3* des, const byte* key, const byte* iv, int dir) + { + int ret = 0; + byte* dkey1 = (byte*)des->key[0]; + byte* dkey2 = (byte*)des->key[1]; + byte* dkey3 = (byte*)des->key[2]; + + XMEMCPY(dkey1, key, 8); /* set key 1 */ + XMEMCPY(dkey2, key + 8, 8); /* set key 2 */ + XMEMCPY(dkey3, key + 16, 8); /* set key 3 */ + + ret = wc_Des3_SetIV(des, iv); + if (ret != 0) + return ret; + + return ret; + } + + int wc_Des_CbcEncrypt(Des* des, byte* out, const byte* in, word32 sz) + { + status_t status; + status = LTC_DES_EncryptCbc(LTC_BASE, in, out, sz, (byte*)des->reg, (byte*)des->key); + if (status == kStatus_Success) + return 0; + else + return -1; + } + + int wc_Des_CbcDecrypt(Des* des, byte* out, const byte* in, word32 sz) + { + status_t status; + status = LTC_DES_DecryptCbc(LTC_BASE, in, out, sz, (byte*)des->reg, (byte*)des->key); + if (status == kStatus_Success) + return 0; + else + return -1; + } + + int wc_Des3_CbcEncrypt(Des3* des, byte* out, const byte* in, word32 sz) + { + status_t status; + status = LTC_DES3_EncryptCbc(LTC_BASE, + in, + out, + sz, + (byte*)des->reg, + (byte*)des->key[0], + (byte*)des->key[1], + (byte*)des->key[2]); + if (status == kStatus_Success) + return 0; + else + return -1; + } + + int wc_Des3_CbcDecrypt(Des3* des, byte* out, const byte* in, word32 sz) + { + status_t status; + status = LTC_DES3_DecryptCbc(LTC_BASE, + in, + out, + sz, + (byte*)des->reg, + (byte*)des->key[0], + (byte*)des->key[1], + (byte*)des->key[2]); + if (status == kStatus_Success) + return 0; + else + return -1; + + } + +#elif defined(FREESCALE_MMCAU) + /* + * Freescale mmCAU hardware DES/3DES support through the CAU/mmCAU library. + * Documentation located in ColdFire/ColdFire+ CAU and Kinetis mmCAU + * Software Library User Guide (See note in README). + */ + #ifdef FREESCALE_MMCAU_CLASSIC + #include "cau_api.h" + #else + #include "fsl_mmcau.h" + #endif + + const unsigned char parityLookup[128] = { + 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0, + 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1, + 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1, + 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0 + }; + + int wc_Des_SetKey(Des* des, const byte* key, const byte* iv, int dir) + { + int i = 0; + byte* dkey; + + + if (des == NULL || key == NULL) { + return BAD_FUNC_ARG; + } + + dkey = (byte*)des->key; + + XMEMCPY(dkey, key, 8); + + wc_Des_SetIV(des, iv); + + /* fix key parity, if needed */ + for (i = 0; i < 8; i++) { + dkey[i] = ((dkey[i] & 0xFE) | parityLookup[dkey[i] >> 1]); + } + + return 0; + } + + int wc_Des3_SetKey(Des3* des, const byte* key, const byte* iv, int dir) + { + int i = 0, ret = 0; + byte* dkey1 = (byte*)des->key[0]; + byte* dkey2 = (byte*)des->key[1]; + byte* dkey3 = (byte*)des->key[2]; + + XMEMCPY(dkey1, key, 8); /* set key 1 */ + XMEMCPY(dkey2, key + 8, 8); /* set key 2 */ + XMEMCPY(dkey3, key + 16, 8); /* set key 3 */ + + ret = wc_Des3_SetIV(des, iv); + if (ret != 0) + return ret; + + /* fix key parity if needed */ + for (i = 0; i < 8; i++) + dkey1[i] = ((dkey1[i] & 0xFE) | parityLookup[dkey1[i] >> 1]); + + for (i = 0; i < 8; i++) + dkey2[i] = ((dkey2[i] & 0xFE) | parityLookup[dkey2[i] >> 1]); + + for (i = 0; i < 8; i++) + dkey3[i] = ((dkey3[i] & 0xFE) | parityLookup[dkey3[i] >> 1]); + + return ret; + } + + int wc_Des_CbcEncrypt(Des* des, byte* out, const byte* in, word32 sz) + { + int i; + int offset = 0; + int len = sz; + int ret = 0; + byte *iv; + byte temp_block[DES_BLOCK_SIZE]; + + iv = (byte*)des->reg; + + #ifdef FREESCALE_MMCAU_CLASSIC + if ((wolfssl_word)out % WOLFSSL_MMCAU_ALIGNMENT) { + WOLFSSL_MSG("Bad cau_des_encrypt alignment"); + return BAD_ALIGN_E; + } + #endif + + while (len > 0) + { + XMEMCPY(temp_block, in + offset, DES_BLOCK_SIZE); + + /* XOR block with IV for CBC */ + for (i = 0; i < DES_BLOCK_SIZE; i++) + temp_block[i] ^= iv[i]; + + ret = wolfSSL_CryptHwMutexLock(); + if(ret != 0) { + return ret; + } + #ifdef FREESCALE_MMCAU_CLASSIC + cau_des_encrypt(temp_block, (byte*)des->key, out + offset); + #else + MMCAU_DES_EncryptEcb(temp_block, (byte*)des->key, out + offset); + #endif + wolfSSL_CryptHwMutexUnLock(); + + len -= DES_BLOCK_SIZE; + offset += DES_BLOCK_SIZE; + + /* store IV for next block */ + XMEMCPY(iv, out + offset - DES_BLOCK_SIZE, DES_BLOCK_SIZE); + } + + return ret; + } + + int wc_Des_CbcDecrypt(Des* des, byte* out, const byte* in, word32 sz) + { + int i; + int offset = 0; + int len = sz; + int ret = 0; + byte* iv; + byte temp_block[DES_BLOCK_SIZE]; + + iv = (byte*)des->reg; + + #ifdef FREESCALE_MMCAU_CLASSIC + if ((wolfssl_word)out % WOLFSSL_MMCAU_ALIGNMENT) { + WOLFSSL_MSG("Bad cau_des_decrypt alignment"); + return BAD_ALIGN_E; + } + #endif + + while (len > 0) + { + XMEMCPY(temp_block, in + offset, DES_BLOCK_SIZE); + + ret = wolfSSL_CryptHwMutexLock(); + if(ret != 0) { + return ret; + } + + #ifdef FREESCALE_MMCAU_CLASSIC + cau_des_decrypt(in + offset, (byte*)des->key, out + offset); + #else + MMCAU_DES_DecryptEcb(in + offset, (byte*)des->key, out + offset); + #endif + wolfSSL_CryptHwMutexUnLock(); + + /* XOR block with IV for CBC */ + for (i = 0; i < DES_BLOCK_SIZE; i++) + (out + offset)[i] ^= iv[i]; + + /* store IV for next block */ + XMEMCPY(iv, temp_block, DES_BLOCK_SIZE); + + len -= DES_BLOCK_SIZE; + offset += DES_BLOCK_SIZE; + } + + return ret; + } + + int wc_Des3_CbcEncrypt(Des3* des, byte* out, const byte* in, word32 sz) + { + int i; + int offset = 0; + int len = sz; + int ret = 0; + + byte *iv; + byte temp_block[DES_BLOCK_SIZE]; + + iv = (byte*)des->reg; + + #ifdef FREESCALE_MMCAU_CLASSIC + if ((wolfssl_word)out % WOLFSSL_MMCAU_ALIGNMENT) { + WOLFSSL_MSG("Bad 3ede cau_des_encrypt alignment"); + return BAD_ALIGN_E; + } + #endif + + while (len > 0) + { + XMEMCPY(temp_block, in + offset, DES_BLOCK_SIZE); + + /* XOR block with IV for CBC */ + for (i = 0; i < DES_BLOCK_SIZE; i++) + temp_block[i] ^= iv[i]; + + ret = wolfSSL_CryptHwMutexLock(); + if(ret != 0) { + return ret; + } + #ifdef FREESCALE_MMCAU_CLASSIC + cau_des_encrypt(temp_block, (byte*)des->key[0], out + offset); + cau_des_decrypt(out + offset, (byte*)des->key[1], out + offset); + cau_des_encrypt(out + offset, (byte*)des->key[2], out + offset); + #else + MMCAU_DES_EncryptEcb(temp_block , (byte*)des->key[0], out + offset); + MMCAU_DES_DecryptEcb(out + offset, (byte*)des->key[1], out + offset); + MMCAU_DES_EncryptEcb(out + offset, (byte*)des->key[2], out + offset); + #endif + wolfSSL_CryptHwMutexUnLock(); + + len -= DES_BLOCK_SIZE; + offset += DES_BLOCK_SIZE; + + /* store IV for next block */ + XMEMCPY(iv, out + offset - DES_BLOCK_SIZE, DES_BLOCK_SIZE); + } + + return ret; + } + + int wc_Des3_CbcDecrypt(Des3* des, byte* out, const byte* in, word32 sz) + { + int i; + int offset = 0; + int len = sz; + int ret = 0; + + byte* iv; + byte temp_block[DES_BLOCK_SIZE]; + + iv = (byte*)des->reg; + + #ifdef FREESCALE_MMCAU_CLASSIC + if ((wolfssl_word)out % WOLFSSL_MMCAU_ALIGNMENT) { + WOLFSSL_MSG("Bad 3ede cau_des_decrypt alignment"); + return BAD_ALIGN_E; + } + #endif + + while (len > 0) + { + XMEMCPY(temp_block, in + offset, DES_BLOCK_SIZE); + + ret = wolfSSL_CryptHwMutexLock(); + if(ret != 0) { + return ret; + } + #ifdef FREESCALE_MMCAU_CLASSIC + cau_des_decrypt(in + offset, (byte*)des->key[2], out + offset); + cau_des_encrypt(out + offset, (byte*)des->key[1], out + offset); + cau_des_decrypt(out + offset, (byte*)des->key[0], out + offset); + #else + MMCAU_DES_DecryptEcb(in + offset , (byte*)des->key[2], out + offset); + MMCAU_DES_EncryptEcb(out + offset, (byte*)des->key[1], out + offset); + MMCAU_DES_DecryptEcb(out + offset, (byte*)des->key[0], out + offset); + #endif + wolfSSL_CryptHwMutexUnLock(); + + /* XOR block with IV for CBC */ + for (i = 0; i < DES_BLOCK_SIZE; i++) + (out + offset)[i] ^= iv[i]; + + /* store IV for next block */ + XMEMCPY(iv, temp_block, DES_BLOCK_SIZE); + + len -= DES_BLOCK_SIZE; + offset += DES_BLOCK_SIZE; + } + + return ret; + } + + +#elif defined(WOLFSSL_PIC32MZ_CRYPT) + + /* PIC32MZ DES hardware requires size multiple of block size */ + #include + + int wc_Des_SetKey(Des* des, const byte* key, const byte* iv, int dir) + { + if (des == NULL || key == NULL || iv == NULL) + return BAD_FUNC_ARG; + + XMEMCPY(des->key, key, DES_KEYLEN); + XMEMCPY(des->reg, iv, DES_IVLEN); + + return 0; + } + + int wc_Des3_SetKey(Des3* des, const byte* key, const byte* iv, int dir) + { + if (des == NULL || key == NULL || iv == NULL) + return BAD_FUNC_ARG; + + XMEMCPY(des->key[0], key, DES3_KEYLEN); + XMEMCPY(des->reg, iv, DES3_IVLEN); + + return 0; + } + + int wc_Des_CbcEncrypt(Des* des, byte* out, const byte* in, word32 sz) + { + word32 blocks = sz / DES_BLOCK_SIZE; + + if (des == NULL || out == NULL || in == NULL) + return BAD_FUNC_ARG; + + return wc_Pic32DesCrypt(des->key, DES_KEYLEN, des->reg, DES_IVLEN, + out, in, (blocks * DES_BLOCK_SIZE), + PIC32_ENCRYPTION, PIC32_ALGO_DES, PIC32_CRYPTOALGO_CBC); + } + + int wc_Des_CbcDecrypt(Des* des, byte* out, const byte* in, word32 sz) + { + word32 blocks = sz / DES_BLOCK_SIZE; + + if (des == NULL || out == NULL || in == NULL) + return BAD_FUNC_ARG; + + return wc_Pic32DesCrypt(des->key, DES_KEYLEN, des->reg, DES_IVLEN, + out, in, (blocks * DES_BLOCK_SIZE), + PIC32_DECRYPTION, PIC32_ALGO_DES, PIC32_CRYPTOALGO_CBC); + } + + int wc_Des3_CbcEncrypt(Des3* des, byte* out, const byte* in, word32 sz) + { + word32 blocks = sz / DES_BLOCK_SIZE; + + if (des == NULL || out == NULL || in == NULL) + return BAD_FUNC_ARG; + + return wc_Pic32DesCrypt(des->key[0], DES3_KEYLEN, des->reg, DES3_IVLEN, + out, in, (blocks * DES_BLOCK_SIZE), + PIC32_ENCRYPTION, PIC32_ALGO_TDES, PIC32_CRYPTOALGO_TCBC); + } + + int wc_Des3_CbcDecrypt(Des3* des, byte* out, const byte* in, word32 sz) + { + word32 blocks = sz / DES_BLOCK_SIZE; + + if (des == NULL || out == NULL || in == NULL) + return BAD_FUNC_ARG; + + return wc_Pic32DesCrypt(des->key[0], DES3_KEYLEN, des->reg, DES3_IVLEN, + out, in, (blocks * DES_BLOCK_SIZE), + PIC32_DECRYPTION, PIC32_ALGO_TDES, PIC32_CRYPTOALGO_TCBC); + } + + #ifdef WOLFSSL_DES_ECB + int wc_Des_EcbEncrypt(Des* des, byte* out, const byte* in, word32 sz) + { + word32 blocks = sz / DES_BLOCK_SIZE; + + if (des == NULL || out == NULL || in == NULL) + return BAD_FUNC_ARG; + + return wc_Pic32DesCrypt(des->key, DES_KEYLEN, des->reg, DES_IVLEN, + out, in, (blocks * DES_BLOCK_SIZE), + PIC32_ENCRYPTION, PIC32_ALGO_DES, PIC32_CRYPTOALGO_ECB); + } + + int wc_Des3_EcbEncrypt(Des3* des, byte* out, const byte* in, word32 sz) + { + word32 blocks = sz / DES_BLOCK_SIZE; + + if (des == NULL || out == NULL || in == NULL) + return BAD_FUNC_ARG; + + return wc_Pic32DesCrypt(des->key[0], DES3_KEYLEN, des->reg, DES3_IVLEN, + out, in, (blocks * DES_BLOCK_SIZE), + PIC32_ENCRYPTION, PIC32_ALGO_TDES, PIC32_CRYPTOALGO_TECB); + } + #endif /* WOLFSSL_DES_ECB */ + +#else + #define NEED_SOFT_DES + +#endif + + +#ifdef NEED_SOFT_DES + + /* permuted choice table (key) */ + static const byte pc1[] = { + 57, 49, 41, 33, 25, 17, 9, + 1, 58, 50, 42, 34, 26, 18, + 10, 2, 59, 51, 43, 35, 27, + 19, 11, 3, 60, 52, 44, 36, + + 63, 55, 47, 39, 31, 23, 15, + 7, 62, 54, 46, 38, 30, 22, + 14, 6, 61, 53, 45, 37, 29, + 21, 13, 5, 28, 20, 12, 4 + }; + + /* number left rotations of pc1 */ + static const byte totrot[] = { + 1,2,4,6,8,10,12,14,15,17,19,21,23,25,27,28 + }; + + /* permuted choice key (table) */ + static const byte pc2[] = { + 14, 17, 11, 24, 1, 5, + 3, 28, 15, 6, 21, 10, + 23, 19, 12, 4, 26, 8, + 16, 7, 27, 20, 13, 2, + 41, 52, 31, 37, 47, 55, + 30, 40, 51, 45, 33, 48, + 44, 49, 39, 56, 34, 53, + 46, 42, 50, 36, 29, 32 + }; + + /* End of DES-defined tables */ + + /* bit 0 is left-most in byte */ + static const int bytebit[] = { + 0200,0100,040,020,010,04,02,01 + }; + + static const word32 Spbox[8][64] = { + { 0x01010400,0x00000000,0x00010000,0x01010404, + 0x01010004,0x00010404,0x00000004,0x00010000, + 0x00000400,0x01010400,0x01010404,0x00000400, + 0x01000404,0x01010004,0x01000000,0x00000004, + 0x00000404,0x01000400,0x01000400,0x00010400, + 0x00010400,0x01010000,0x01010000,0x01000404, + 0x00010004,0x01000004,0x01000004,0x00010004, + 0x00000000,0x00000404,0x00010404,0x01000000, + 0x00010000,0x01010404,0x00000004,0x01010000, + 0x01010400,0x01000000,0x01000000,0x00000400, + 0x01010004,0x00010000,0x00010400,0x01000004, + 0x00000400,0x00000004,0x01000404,0x00010404, + 0x01010404,0x00010004,0x01010000,0x01000404, + 0x01000004,0x00000404,0x00010404,0x01010400, + 0x00000404,0x01000400,0x01000400,0x00000000, + 0x00010004,0x00010400,0x00000000,0x01010004}, + { 0x80108020,0x80008000,0x00008000,0x00108020, + 0x00100000,0x00000020,0x80100020,0x80008020, + 0x80000020,0x80108020,0x80108000,0x80000000, + 0x80008000,0x00100000,0x00000020,0x80100020, + 0x00108000,0x00100020,0x80008020,0x00000000, + 0x80000000,0x00008000,0x00108020,0x80100000, + 0x00100020,0x80000020,0x00000000,0x00108000, + 0x00008020,0x80108000,0x80100000,0x00008020, + 0x00000000,0x00108020,0x80100020,0x00100000, + 0x80008020,0x80100000,0x80108000,0x00008000, + 0x80100000,0x80008000,0x00000020,0x80108020, + 0x00108020,0x00000020,0x00008000,0x80000000, + 0x00008020,0x80108000,0x00100000,0x80000020, + 0x00100020,0x80008020,0x80000020,0x00100020, + 0x00108000,0x00000000,0x80008000,0x00008020, + 0x80000000,0x80100020,0x80108020,0x00108000}, + { 0x00000208,0x08020200,0x00000000,0x08020008, + 0x08000200,0x00000000,0x00020208,0x08000200, + 0x00020008,0x08000008,0x08000008,0x00020000, + 0x08020208,0x00020008,0x08020000,0x00000208, + 0x08000000,0x00000008,0x08020200,0x00000200, + 0x00020200,0x08020000,0x08020008,0x00020208, + 0x08000208,0x00020200,0x00020000,0x08000208, + 0x00000008,0x08020208,0x00000200,0x08000000, + 0x08020200,0x08000000,0x00020008,0x00000208, + 0x00020000,0x08020200,0x08000200,0x00000000, + 0x00000200,0x00020008,0x08020208,0x08000200, + 0x08000008,0x00000200,0x00000000,0x08020008, + 0x08000208,0x00020000,0x08000000,0x08020208, + 0x00000008,0x00020208,0x00020200,0x08000008, + 0x08020000,0x08000208,0x00000208,0x08020000, + 0x00020208,0x00000008,0x08020008,0x00020200}, + { 0x00802001,0x00002081,0x00002081,0x00000080, + 0x00802080,0x00800081,0x00800001,0x00002001, + 0x00000000,0x00802000,0x00802000,0x00802081, + 0x00000081,0x00000000,0x00800080,0x00800001, + 0x00000001,0x00002000,0x00800000,0x00802001, + 0x00000080,0x00800000,0x00002001,0x00002080, + 0x00800081,0x00000001,0x00002080,0x00800080, + 0x00002000,0x00802080,0x00802081,0x00000081, + 0x00800080,0x00800001,0x00802000,0x00802081, + 0x00000081,0x00000000,0x00000000,0x00802000, + 0x00002080,0x00800080,0x00800081,0x00000001, + 0x00802001,0x00002081,0x00002081,0x00000080, + 0x00802081,0x00000081,0x00000001,0x00002000, + 0x00800001,0x00002001,0x00802080,0x00800081, + 0x00002001,0x00002080,0x00800000,0x00802001, + 0x00000080,0x00800000,0x00002000,0x00802080}, + { 0x00000100,0x02080100,0x02080000,0x42000100, + 0x00080000,0x00000100,0x40000000,0x02080000, + 0x40080100,0x00080000,0x02000100,0x40080100, + 0x42000100,0x42080000,0x00080100,0x40000000, + 0x02000000,0x40080000,0x40080000,0x00000000, + 0x40000100,0x42080100,0x42080100,0x02000100, + 0x42080000,0x40000100,0x00000000,0x42000000, + 0x02080100,0x02000000,0x42000000,0x00080100, + 0x00080000,0x42000100,0x00000100,0x02000000, + 0x40000000,0x02080000,0x42000100,0x40080100, + 0x02000100,0x40000000,0x42080000,0x02080100, + 0x40080100,0x00000100,0x02000000,0x42080000, + 0x42080100,0x00080100,0x42000000,0x42080100, + 0x02080000,0x00000000,0x40080000,0x42000000, + 0x00080100,0x02000100,0x40000100,0x00080000, + 0x00000000,0x40080000,0x02080100,0x40000100}, + { 0x20000010,0x20400000,0x00004000,0x20404010, + 0x20400000,0x00000010,0x20404010,0x00400000, + 0x20004000,0x00404010,0x00400000,0x20000010, + 0x00400010,0x20004000,0x20000000,0x00004010, + 0x00000000,0x00400010,0x20004010,0x00004000, + 0x00404000,0x20004010,0x00000010,0x20400010, + 0x20400010,0x00000000,0x00404010,0x20404000, + 0x00004010,0x00404000,0x20404000,0x20000000, + 0x20004000,0x00000010,0x20400010,0x00404000, + 0x20404010,0x00400000,0x00004010,0x20000010, + 0x00400000,0x20004000,0x20000000,0x00004010, + 0x20000010,0x20404010,0x00404000,0x20400000, + 0x00404010,0x20404000,0x00000000,0x20400010, + 0x00000010,0x00004000,0x20400000,0x00404010, + 0x00004000,0x00400010,0x20004010,0x00000000, + 0x20404000,0x20000000,0x00400010,0x20004010}, + { 0x00200000,0x04200002,0x04000802,0x00000000, + 0x00000800,0x04000802,0x00200802,0x04200800, + 0x04200802,0x00200000,0x00000000,0x04000002, + 0x00000002,0x04000000,0x04200002,0x00000802, + 0x04000800,0x00200802,0x00200002,0x04000800, + 0x04000002,0x04200000,0x04200800,0x00200002, + 0x04200000,0x00000800,0x00000802,0x04200802, + 0x00200800,0x00000002,0x04000000,0x00200800, + 0x04000000,0x00200800,0x00200000,0x04000802, + 0x04000802,0x04200002,0x04200002,0x00000002, + 0x00200002,0x04000000,0x04000800,0x00200000, + 0x04200800,0x00000802,0x00200802,0x04200800, + 0x00000802,0x04000002,0x04200802,0x04200000, + 0x00200800,0x00000000,0x00000002,0x04200802, + 0x00000000,0x00200802,0x04200000,0x00000800, + 0x04000002,0x04000800,0x00000800,0x00200002}, + { 0x10001040,0x00001000,0x00040000,0x10041040, + 0x10000000,0x10001040,0x00000040,0x10000000, + 0x00040040,0x10040000,0x10041040,0x00041000, + 0x10041000,0x00041040,0x00001000,0x00000040, + 0x10040000,0x10000040,0x10001000,0x00001040, + 0x00041000,0x00040040,0x10040040,0x10041000, + 0x00001040,0x00000000,0x00000000,0x10040040, + 0x10000040,0x10001000,0x00041040,0x00040000, + 0x00041040,0x00040000,0x10041000,0x00001000, + 0x00000040,0x10040040,0x00001000,0x00041040, + 0x10001000,0x00000040,0x10000040,0x10040000, + 0x10040040,0x10000000,0x00040000,0x10001040, + 0x00000000,0x10041040,0x00040040,0x10000040, + 0x10040000,0x10001000,0x10001040,0x00000000, + 0x10041040,0x00041000,0x00041000,0x00001040, + 0x00001040,0x00040040,0x10000000,0x10041000} + }; + + static WC_INLINE void IPERM(word32* left, word32* right) + { + word32 work; + + *right = rotlFixed(*right, 4U); + work = (*left ^ *right) & 0xf0f0f0f0; + *left ^= work; + + *right = rotrFixed(*right^work, 20U); + work = (*left ^ *right) & 0xffff0000; + *left ^= work; + + *right = rotrFixed(*right^work, 18U); + work = (*left ^ *right) & 0x33333333; + *left ^= work; + + *right = rotrFixed(*right^work, 6U); + work = (*left ^ *right) & 0x00ff00ff; + *left ^= work; + + *right = rotlFixed(*right^work, 9U); + work = (*left ^ *right) & 0xaaaaaaaa; + *left = rotlFixed(*left^work, 1U); + *right ^= work; + } + + static WC_INLINE void FPERM(word32* left, word32* right) + { + word32 work; + + *right = rotrFixed(*right, 1U); + work = (*left ^ *right) & 0xaaaaaaaa; + *right ^= work; + + *left = rotrFixed(*left^work, 9U); + work = (*left ^ *right) & 0x00ff00ff; + *right ^= work; + + *left = rotlFixed(*left^work, 6U); + work = (*left ^ *right) & 0x33333333; + *right ^= work; + + *left = rotlFixed(*left^work, 18U); + work = (*left ^ *right) & 0xffff0000; + *right ^= work; + + *left = rotlFixed(*left^work, 20U); + work = (*left ^ *right) & 0xf0f0f0f0; + *right ^= work; + + *left = rotrFixed(*left^work, 4U); + } + + static int DesSetKey(const byte* key, int dir, word32* out) + { + #define DES_KEY_BUFFER_SIZE (56+56+8) + #ifdef WOLFSSL_SMALL_STACK + byte* buffer = (byte*)XMALLOC(DES_KEY_BUFFER_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER); + + if (buffer == NULL) + return MEMORY_E; + #else + byte buffer[DES_KEY_BUFFER_SIZE]; + #endif + + { + byte* const pc1m = buffer; /* place to modify pc1 into */ + byte* const pcr = pc1m + 56; /* place to rotate pc1 into */ + byte* const ks = pcr + 56; + register int i, j, l; + int m; + + for (j = 0; j < 56; j++) { /* convert pc1 to bits of key */ + l = pc1[j] - 1; /* integer bit location */ + m = l & 07; /* find bit */ + pc1m[j] = (key[l >> 3] & /* find which key byte l is in */ + bytebit[m]) /* and which bit of that byte */ + ? 1 : 0; /* and store 1-bit result */ + } + + for (i = 0; i < 16; i++) { /* key chunk for each iteration */ + XMEMSET(ks, 0, 8); /* Clear key schedule */ + + for (j = 0; j < 56; j++) /* rotate pc1 the right amount */ + pcr[j] = + pc1m[(l = j + totrot[i]) < (j < 28 ? 28 : 56) ? l : l-28]; + + /* rotate left and right halves independently */ + for (j = 0; j < 48; j++) { /* select bits individually */ + if (pcr[pc2[j] - 1]) { /* check bit that goes to ks[j] */ + l= j % 6; /* mask it in if it's there */ + ks[j/6] |= bytebit[l] >> 2; + } + } + + /* Now convert to odd/even interleaved form for use in F */ + out[2*i] = ((word32) ks[0] << 24) + | ((word32) ks[2] << 16) + | ((word32) ks[4] << 8) + | ((word32) ks[6]); + + out[2*i + 1] = ((word32) ks[1] << 24) + | ((word32) ks[3] << 16) + | ((word32) ks[5] << 8) + | ((word32) ks[7]); + } + + /* reverse key schedule order */ + if (dir == DES_DECRYPTION) { + for (i = 0; i < 16; i += 2) { + word32 swap = out[i]; + out[i] = out[DES_KS_SIZE - 2 - i]; + out[DES_KS_SIZE - 2 - i] = swap; + + swap = out[i + 1]; + out[i + 1] = out[DES_KS_SIZE - 1 - i]; + out[DES_KS_SIZE - 1 - i] = swap; + } + } + + #ifdef WOLFSSL_SMALL_STACK + XFREE(buffer, NULL, DYNAMIC_TYPE_TMP_BUFFER); + #endif + } + + return 0; + } + + int wc_Des_SetKey(Des* des, const byte* key, const byte* iv, int dir) + { + wc_Des_SetIV(des, iv); + + return DesSetKey(key, dir, des->key); + } + + int wc_Des3_SetKey(Des3* des, const byte* key, const byte* iv, int dir) + { + int ret; + + if (des == NULL || key == NULL || dir < 0) { + return BAD_FUNC_ARG; + } + + #if defined(WOLF_CRYPTO_CB) || \ + (defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_3DES)) + #ifdef WOLF_CRYPTO_CB + if (des->devId != INVALID_DEVID) + #endif + { + XMEMCPY(des->devKey, key, DES3_KEYLEN); + } + #endif + + ret = DesSetKey(key + (dir == DES_ENCRYPTION ? 0:16), dir, des->key[0]); + if (ret != 0) + return ret; + + ret = DesSetKey(key + 8, !dir, des->key[1]); + if (ret != 0) + return ret; + + ret = DesSetKey(key + (dir == DES_DECRYPTION ? 0:16), dir, des->key[2]); + if (ret != 0) + return ret; + + return wc_Des3_SetIV(des, iv); + } + + static void DesRawProcessBlock(word32* lIn, word32* rIn, const word32* kptr) + { + word32 l = *lIn, r = *rIn, i; + + for (i=0; i<8; i++) + { + word32 work = rotrFixed(r, 4U) ^ kptr[4*i+0]; + l ^= Spbox[6][(work) & 0x3f] + ^ Spbox[4][(work >> 8) & 0x3f] + ^ Spbox[2][(work >> 16) & 0x3f] + ^ Spbox[0][(work >> 24) & 0x3f]; + work = r ^ kptr[4*i+1]; + l ^= Spbox[7][(work) & 0x3f] + ^ Spbox[5][(work >> 8) & 0x3f] + ^ Spbox[3][(work >> 16) & 0x3f] + ^ Spbox[1][(work >> 24) & 0x3f]; + + work = rotrFixed(l, 4U) ^ kptr[4*i+2]; + r ^= Spbox[6][(work) & 0x3f] + ^ Spbox[4][(work >> 8) & 0x3f] + ^ Spbox[2][(work >> 16) & 0x3f] + ^ Spbox[0][(work >> 24) & 0x3f]; + work = l ^ kptr[4*i+3]; + r ^= Spbox[7][(work) & 0x3f] + ^ Spbox[5][(work >> 8) & 0x3f] + ^ Spbox[3][(work >> 16) & 0x3f] + ^ Spbox[1][(work >> 24) & 0x3f]; + } + + *lIn = l; *rIn = r; + } + + static void DesProcessBlock(Des* des, const byte* in, byte* out) + { + word32 l, r; + + XMEMCPY(&l, in, sizeof(l)); + XMEMCPY(&r, in + sizeof(l), sizeof(r)); + #ifdef LITTLE_ENDIAN_ORDER + l = ByteReverseWord32(l); + r = ByteReverseWord32(r); + #endif + IPERM(&l,&r); + + DesRawProcessBlock(&l, &r, des->key); + + FPERM(&l,&r); + #ifdef LITTLE_ENDIAN_ORDER + l = ByteReverseWord32(l); + r = ByteReverseWord32(r); + #endif + XMEMCPY(out, &r, sizeof(r)); + XMEMCPY(out + sizeof(r), &l, sizeof(l)); + } + + static void Des3ProcessBlock(Des3* des, const byte* in, byte* out) + { + word32 l, r; + + XMEMCPY(&l, in, sizeof(l)); + XMEMCPY(&r, in + sizeof(l), sizeof(r)); + #ifdef LITTLE_ENDIAN_ORDER + l = ByteReverseWord32(l); + r = ByteReverseWord32(r); + #endif + IPERM(&l,&r); + + DesRawProcessBlock(&l, &r, des->key[0]); + DesRawProcessBlock(&r, &l, des->key[1]); + DesRawProcessBlock(&l, &r, des->key[2]); + + FPERM(&l,&r); + #ifdef LITTLE_ENDIAN_ORDER + l = ByteReverseWord32(l); + r = ByteReverseWord32(r); + #endif + XMEMCPY(out, &r, sizeof(r)); + XMEMCPY(out + sizeof(r), &l, sizeof(l)); + } + + int wc_Des_CbcEncrypt(Des* des, byte* out, const byte* in, word32 sz) + { + word32 blocks = sz / DES_BLOCK_SIZE; + + while (blocks--) { + xorbuf((byte*)des->reg, in, DES_BLOCK_SIZE); + DesProcessBlock(des, (byte*)des->reg, (byte*)des->reg); + XMEMCPY(out, des->reg, DES_BLOCK_SIZE); + + out += DES_BLOCK_SIZE; + in += DES_BLOCK_SIZE; + } + return 0; + } + + int wc_Des_CbcDecrypt(Des* des, byte* out, const byte* in, word32 sz) + { + word32 blocks = sz / DES_BLOCK_SIZE; + + while (blocks--) { + XMEMCPY(des->tmp, in, DES_BLOCK_SIZE); + DesProcessBlock(des, (byte*)des->tmp, out); + xorbuf(out, (byte*)des->reg, DES_BLOCK_SIZE); + XMEMCPY(des->reg, des->tmp, DES_BLOCK_SIZE); + + out += DES_BLOCK_SIZE; + in += DES_BLOCK_SIZE; + } + return 0; + } + + int wc_Des3_CbcEncrypt(Des3* des, byte* out, const byte* in, word32 sz) + { + word32 blocks; + + if (des == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + #ifdef WOLF_CRYPTO_CB + if (des->devId != INVALID_DEVID) { + int ret = wc_CryptoCb_Des3Encrypt(des, out, in, sz); + if (ret != CRYPTOCB_UNAVAILABLE) + return ret; + /* fall-through when unavailable */ + } + #endif + + #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_3DES) + if (des->asyncDev.marker == WOLFSSL_ASYNC_MARKER_3DES && + sz >= WC_ASYNC_THRESH_DES3_CBC) { + #if defined(HAVE_CAVIUM) + return NitroxDes3CbcEncrypt(des, out, in, sz); + #elif defined(HAVE_INTEL_QA) + return IntelQaSymDes3CbcEncrypt(&des->asyncDev, out, in, sz, + (const byte*)des->devKey, DES3_KEYLEN, (byte*)des->reg, DES3_IVLEN); + #else /* WOLFSSL_ASYNC_CRYPT_TEST */ + if (wc_AsyncTestInit(&des->asyncDev, ASYNC_TEST_DES3_CBC_ENCRYPT)) { + WC_ASYNC_TEST* testDev = &des->asyncDev.test; + testDev->des.des = des; + testDev->des.out = out; + testDev->des.in = in; + testDev->des.sz = sz; + return WC_PENDING_E; + } + #endif + } + #endif /* WOLFSSL_ASYNC_CRYPT */ + + blocks = sz / DES_BLOCK_SIZE; + while (blocks--) { + xorbuf((byte*)des->reg, in, DES_BLOCK_SIZE); + Des3ProcessBlock(des, (byte*)des->reg, (byte*)des->reg); + XMEMCPY(out, des->reg, DES_BLOCK_SIZE); + + out += DES_BLOCK_SIZE; + in += DES_BLOCK_SIZE; + } + return 0; + } + + + int wc_Des3_CbcDecrypt(Des3* des, byte* out, const byte* in, word32 sz) + { + word32 blocks; + + if (des == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + #ifdef WOLF_CRYPTO_CB + if (des->devId != INVALID_DEVID) { + int ret = wc_CryptoCb_Des3Decrypt(des, out, in, sz); + if (ret != CRYPTOCB_UNAVAILABLE) + return ret; + /* fall-through when unavailable */ + } + #endif + + #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_3DES) + if (des->asyncDev.marker == WOLFSSL_ASYNC_MARKER_3DES && + sz >= WC_ASYNC_THRESH_DES3_CBC) { + #if defined(HAVE_CAVIUM) + return NitroxDes3CbcDecrypt(des, out, in, sz); + #elif defined(HAVE_INTEL_QA) + return IntelQaSymDes3CbcDecrypt(&des->asyncDev, out, in, sz, + (const byte*)des->devKey, DES3_KEYLEN, (byte*)des->reg, DES3_IVLEN); + #else /* WOLFSSL_ASYNC_CRYPT_TEST */ + if (wc_AsyncTestInit(&des->asyncDev, ASYNC_TEST_DES3_CBC_DECRYPT)) { + WC_ASYNC_TEST* testDev = &des->asyncDev.test; + testDev->des.des = des; + testDev->des.out = out; + testDev->des.in = in; + testDev->des.sz = sz; + return WC_PENDING_E; + } + #endif + } + #endif /* WOLFSSL_ASYNC_CRYPT */ + + blocks = sz / DES_BLOCK_SIZE; + while (blocks--) { + XMEMCPY(des->tmp, in, DES_BLOCK_SIZE); + Des3ProcessBlock(des, (byte*)des->tmp, out); + xorbuf(out, (byte*)des->reg, DES_BLOCK_SIZE); + XMEMCPY(des->reg, des->tmp, DES_BLOCK_SIZE); + + out += DES_BLOCK_SIZE; + in += DES_BLOCK_SIZE; + } + return 0; + } + + #ifdef WOLFSSL_DES_ECB + /* One block, compatibility only */ + int wc_Des_EcbEncrypt(Des* des, byte* out, const byte* in, word32 sz) + { + word32 blocks = sz / DES_BLOCK_SIZE; + + if (des == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + while (blocks--) { + DesProcessBlock(des, in, out); + + out += DES_BLOCK_SIZE; + in += DES_BLOCK_SIZE; + } + return 0; + } + + int wc_Des3_EcbEncrypt(Des3* des, byte* out, const byte* in, word32 sz) + { + word32 blocks = sz / DES_BLOCK_SIZE; + + if (des == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + while (blocks--) { + Des3ProcessBlock(des, in, out); + + out += DES_BLOCK_SIZE; + in += DES_BLOCK_SIZE; + } + return 0; + } + #endif /* WOLFSSL_DES_ECB */ + +#endif /* NEED_SOFT_DES */ + + +void wc_Des_SetIV(Des* des, const byte* iv) +{ + if (des && iv) + XMEMCPY(des->reg, iv, DES_BLOCK_SIZE); + else if (des) + XMEMSET(des->reg, 0, DES_BLOCK_SIZE); +} + +int wc_Des3_SetIV(Des3* des, const byte* iv) +{ + if (des == NULL) { + return BAD_FUNC_ARG; + } + if (des && iv) + XMEMCPY(des->reg, iv, DES_BLOCK_SIZE); + else if (des) + XMEMSET(des->reg, 0, DES_BLOCK_SIZE); + + return 0; +} + + +/* Initialize Des3 for use with async device */ +int wc_Des3Init(Des3* des3, void* heap, int devId) +{ + int ret = 0; + if (des3 == NULL) + return BAD_FUNC_ARG; + + des3->heap = heap; + +#ifdef WOLF_CRYPTO_CB + des3->devId = devId; + des3->devCtx = NULL; +#else + (void)devId; +#endif + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_3DES) + ret = wolfAsync_DevCtxInit(&des3->asyncDev, WOLFSSL_ASYNC_MARKER_3DES, + des3->heap, devId); +#endif + + return ret; +} + +/* Free Des3 from use with async device */ +void wc_Des3Free(Des3* des3) +{ + if (des3 == NULL) + return; + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_3DES) + wolfAsync_DevCtxFree(&des3->asyncDev, WOLFSSL_ASYNC_MARKER_3DES); +#endif /* WOLFSSL_ASYNC_CRYPT */ +#if defined(WOLF_CRYPTO_CB) || \ + (defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_3DES)) + ForceZero(des3->devKey, sizeof(des3->devKey)); +#endif +} + +#endif /* WOLFSSL_TI_CRYPT */ +#endif /* HAVE_FIPS */ +#endif /* NO_DES3 */ diff --git a/client/wolfssl/wolfcrypt/src/dh.c b/client/wolfssl/wolfcrypt/src/dh.c new file mode 100644 index 0000000..6c53be8 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/dh.c @@ -0,0 +1,2491 @@ +/* dh.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifndef NO_DH + +#if defined(HAVE_FIPS) && \ + defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2) + + /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */ + #define FIPS_NO_WRAPPERS + + #ifdef USE_WINDOWS_API + #pragma code_seg(".fipsA$m") + #pragma const_seg(".fipsB$m") + #endif +#endif + +#include +#include +#include + +#ifdef WOLFSSL_HAVE_SP_DH +#include +#endif + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + + +/* +Possible DH enable options: + * NO_RSA: Overall control of DH default: on (not defined) + * WOLFSSL_OLD_PRIME_CHECK: Disables the new prime number check. It does not + directly effect this file, but it does speed up DH + removing the testing. It is not recommended to + disable the prime checking. default: off + +*/ + + +#if !defined(USER_MATH_LIB) && !defined(WOLFSSL_DH_CONST) + #include + #define XPOW(x,y) pow((x),(y)) + #define XLOG(x) log((x)) +#else + /* user's own math lib */ +#endif + +#ifdef HAVE_FFDHE_2048 +static const byte dh_ffdhe2048_p[] = { + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xAD, 0xF8, 0x54, 0x58, 0xA2, 0xBB, 0x4A, 0x9A, + 0xAF, 0xDC, 0x56, 0x20, 0x27, 0x3D, 0x3C, 0xF1, + 0xD8, 0xB9, 0xC5, 0x83, 0xCE, 0x2D, 0x36, 0x95, + 0xA9, 0xE1, 0x36, 0x41, 0x14, 0x64, 0x33, 0xFB, + 0xCC, 0x93, 0x9D, 0xCE, 0x24, 0x9B, 0x3E, 0xF9, + 0x7D, 0x2F, 0xE3, 0x63, 0x63, 0x0C, 0x75, 0xD8, + 0xF6, 0x81, 0xB2, 0x02, 0xAE, 0xC4, 0x61, 0x7A, + 0xD3, 0xDF, 0x1E, 0xD5, 0xD5, 0xFD, 0x65, 0x61, + 0x24, 0x33, 0xF5, 0x1F, 0x5F, 0x06, 0x6E, 0xD0, + 0x85, 0x63, 0x65, 0x55, 0x3D, 0xED, 0x1A, 0xF3, + 0xB5, 0x57, 0x13, 0x5E, 0x7F, 0x57, 0xC9, 0x35, + 0x98, 0x4F, 0x0C, 0x70, 0xE0, 0xE6, 0x8B, 0x77, + 0xE2, 0xA6, 0x89, 0xDA, 0xF3, 0xEF, 0xE8, 0x72, + 0x1D, 0xF1, 0x58, 0xA1, 0x36, 0xAD, 0xE7, 0x35, + 0x30, 0xAC, 0xCA, 0x4F, 0x48, 0x3A, 0x79, 0x7A, + 0xBC, 0x0A, 0xB1, 0x82, 0xB3, 0x24, 0xFB, 0x61, + 0xD1, 0x08, 0xA9, 0x4B, 0xB2, 0xC8, 0xE3, 0xFB, + 0xB9, 0x6A, 0xDA, 0xB7, 0x60, 0xD7, 0xF4, 0x68, + 0x1D, 0x4F, 0x42, 0xA3, 0xDE, 0x39, 0x4D, 0xF4, + 0xAE, 0x56, 0xED, 0xE7, 0x63, 0x72, 0xBB, 0x19, + 0x0B, 0x07, 0xA7, 0xC8, 0xEE, 0x0A, 0x6D, 0x70, + 0x9E, 0x02, 0xFC, 0xE1, 0xCD, 0xF7, 0xE2, 0xEC, + 0xC0, 0x34, 0x04, 0xCD, 0x28, 0x34, 0x2F, 0x61, + 0x91, 0x72, 0xFE, 0x9C, 0xE9, 0x85, 0x83, 0xFF, + 0x8E, 0x4F, 0x12, 0x32, 0xEE, 0xF2, 0x81, 0x83, + 0xC3, 0xFE, 0x3B, 0x1B, 0x4C, 0x6F, 0xAD, 0x73, + 0x3B, 0xB5, 0xFC, 0xBC, 0x2E, 0xC2, 0x20, 0x05, + 0xC5, 0x8E, 0xF1, 0x83, 0x7D, 0x16, 0x83, 0xB2, + 0xC6, 0xF3, 0x4A, 0x26, 0xC1, 0xB2, 0xEF, 0xFA, + 0x88, 0x6B, 0x42, 0x38, 0x61, 0x28, 0x5C, 0x97, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; +static const byte dh_ffdhe2048_g[] = { 0x02 }; +#ifdef HAVE_FFDHE_Q +static const byte dh_ffdhe2048_q[] = { + 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xD6, 0xFC, 0x2A, 0x2C, 0x51, 0x5D, 0xA5, 0x4D, + 0x57, 0xEE, 0x2B, 0x10, 0x13, 0x9E, 0x9E, 0x78, + 0xEC, 0x5C, 0xE2, 0xC1, 0xE7, 0x16, 0x9B, 0x4A, + 0xD4, 0xF0, 0x9B, 0x20, 0x8A, 0x32, 0x19, 0xFD, + 0xE6, 0x49, 0xCE, 0xE7, 0x12, 0x4D, 0x9F, 0x7C, + 0xBE, 0x97, 0xF1, 0xB1, 0xB1, 0x86, 0x3A, 0xEC, + 0x7B, 0x40, 0xD9, 0x01, 0x57, 0x62, 0x30, 0xBD, + 0x69, 0xEF, 0x8F, 0x6A, 0xEA, 0xFE, 0xB2, 0xB0, + 0x92, 0x19, 0xFA, 0x8F, 0xAF, 0x83, 0x37, 0x68, + 0x42, 0xB1, 0xB2, 0xAA, 0x9E, 0xF6, 0x8D, 0x79, + 0xDA, 0xAB, 0x89, 0xAF, 0x3F, 0xAB, 0xE4, 0x9A, + 0xCC, 0x27, 0x86, 0x38, 0x70, 0x73, 0x45, 0xBB, + 0xF1, 0x53, 0x44, 0xED, 0x79, 0xF7, 0xF4, 0x39, + 0x0E, 0xF8, 0xAC, 0x50, 0x9B, 0x56, 0xF3, 0x9A, + 0x98, 0x56, 0x65, 0x27, 0xA4, 0x1D, 0x3C, 0xBD, + 0x5E, 0x05, 0x58, 0xC1, 0x59, 0x92, 0x7D, 0xB0, + 0xE8, 0x84, 0x54, 0xA5, 0xD9, 0x64, 0x71, 0xFD, + 0xDC, 0xB5, 0x6D, 0x5B, 0xB0, 0x6B, 0xFA, 0x34, + 0x0E, 0xA7, 0xA1, 0x51, 0xEF, 0x1C, 0xA6, 0xFA, + 0x57, 0x2B, 0x76, 0xF3, 0xB1, 0xB9, 0x5D, 0x8C, + 0x85, 0x83, 0xD3, 0xE4, 0x77, 0x05, 0x36, 0xB8, + 0x4F, 0x01, 0x7E, 0x70, 0xE6, 0xFB, 0xF1, 0x76, + 0x60, 0x1A, 0x02, 0x66, 0x94, 0x1A, 0x17, 0xB0, + 0xC8, 0xB9, 0x7F, 0x4E, 0x74, 0xC2, 0xC1, 0xFF, + 0xC7, 0x27, 0x89, 0x19, 0x77, 0x79, 0x40, 0xC1, + 0xE1, 0xFF, 0x1D, 0x8D, 0xA6, 0x37, 0xD6, 0xB9, + 0x9D, 0xDA, 0xFE, 0x5E, 0x17, 0x61, 0x10, 0x02, + 0xE2, 0xC7, 0x78, 0xC1, 0xBE, 0x8B, 0x41, 0xD9, + 0x63, 0x79, 0xA5, 0x13, 0x60, 0xD9, 0x77, 0xFD, + 0x44, 0x35, 0xA1, 0x1C, 0x30, 0x94, 0x2E, 0x4B, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; +#endif /* HAVE_FFDHE_Q */ + +const DhParams* wc_Dh_ffdhe2048_Get(void) +{ + static const DhParams ffdhe2048 = { + #ifdef HAVE_FFDHE_Q + dh_ffdhe2048_q, sizeof(dh_ffdhe2048_q), + #endif /* HAVE_FFDHE_Q */ + dh_ffdhe2048_p, sizeof(dh_ffdhe2048_p), + dh_ffdhe2048_g, sizeof(dh_ffdhe2048_g) + }; + return &ffdhe2048; +} +#endif + +#ifdef HAVE_FFDHE_3072 +static const byte dh_ffdhe3072_p[] = { + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xAD, 0xF8, 0x54, 0x58, 0xA2, 0xBB, 0x4A, 0x9A, + 0xAF, 0xDC, 0x56, 0x20, 0x27, 0x3D, 0x3C, 0xF1, + 0xD8, 0xB9, 0xC5, 0x83, 0xCE, 0x2D, 0x36, 0x95, + 0xA9, 0xE1, 0x36, 0x41, 0x14, 0x64, 0x33, 0xFB, + 0xCC, 0x93, 0x9D, 0xCE, 0x24, 0x9B, 0x3E, 0xF9, + 0x7D, 0x2F, 0xE3, 0x63, 0x63, 0x0C, 0x75, 0xD8, + 0xF6, 0x81, 0xB2, 0x02, 0xAE, 0xC4, 0x61, 0x7A, + 0xD3, 0xDF, 0x1E, 0xD5, 0xD5, 0xFD, 0x65, 0x61, + 0x24, 0x33, 0xF5, 0x1F, 0x5F, 0x06, 0x6E, 0xD0, + 0x85, 0x63, 0x65, 0x55, 0x3D, 0xED, 0x1A, 0xF3, + 0xB5, 0x57, 0x13, 0x5E, 0x7F, 0x57, 0xC9, 0x35, + 0x98, 0x4F, 0x0C, 0x70, 0xE0, 0xE6, 0x8B, 0x77, + 0xE2, 0xA6, 0x89, 0xDA, 0xF3, 0xEF, 0xE8, 0x72, + 0x1D, 0xF1, 0x58, 0xA1, 0x36, 0xAD, 0xE7, 0x35, + 0x30, 0xAC, 0xCA, 0x4F, 0x48, 0x3A, 0x79, 0x7A, + 0xBC, 0x0A, 0xB1, 0x82, 0xB3, 0x24, 0xFB, 0x61, + 0xD1, 0x08, 0xA9, 0x4B, 0xB2, 0xC8, 0xE3, 0xFB, + 0xB9, 0x6A, 0xDA, 0xB7, 0x60, 0xD7, 0xF4, 0x68, + 0x1D, 0x4F, 0x42, 0xA3, 0xDE, 0x39, 0x4D, 0xF4, + 0xAE, 0x56, 0xED, 0xE7, 0x63, 0x72, 0xBB, 0x19, + 0x0B, 0x07, 0xA7, 0xC8, 0xEE, 0x0A, 0x6D, 0x70, + 0x9E, 0x02, 0xFC, 0xE1, 0xCD, 0xF7, 0xE2, 0xEC, + 0xC0, 0x34, 0x04, 0xCD, 0x28, 0x34, 0x2F, 0x61, + 0x91, 0x72, 0xFE, 0x9C, 0xE9, 0x85, 0x83, 0xFF, + 0x8E, 0x4F, 0x12, 0x32, 0xEE, 0xF2, 0x81, 0x83, + 0xC3, 0xFE, 0x3B, 0x1B, 0x4C, 0x6F, 0xAD, 0x73, + 0x3B, 0xB5, 0xFC, 0xBC, 0x2E, 0xC2, 0x20, 0x05, + 0xC5, 0x8E, 0xF1, 0x83, 0x7D, 0x16, 0x83, 0xB2, + 0xC6, 0xF3, 0x4A, 0x26, 0xC1, 0xB2, 0xEF, 0xFA, + 0x88, 0x6B, 0x42, 0x38, 0x61, 0x1F, 0xCF, 0xDC, + 0xDE, 0x35, 0x5B, 0x3B, 0x65, 0x19, 0x03, 0x5B, + 0xBC, 0x34, 0xF4, 0xDE, 0xF9, 0x9C, 0x02, 0x38, + 0x61, 0xB4, 0x6F, 0xC9, 0xD6, 0xE6, 0xC9, 0x07, + 0x7A, 0xD9, 0x1D, 0x26, 0x91, 0xF7, 0xF7, 0xEE, + 0x59, 0x8C, 0xB0, 0xFA, 0xC1, 0x86, 0xD9, 0x1C, + 0xAE, 0xFE, 0x13, 0x09, 0x85, 0x13, 0x92, 0x70, + 0xB4, 0x13, 0x0C, 0x93, 0xBC, 0x43, 0x79, 0x44, + 0xF4, 0xFD, 0x44, 0x52, 0xE2, 0xD7, 0x4D, 0xD3, + 0x64, 0xF2, 0xE2, 0x1E, 0x71, 0xF5, 0x4B, 0xFF, + 0x5C, 0xAE, 0x82, 0xAB, 0x9C, 0x9D, 0xF6, 0x9E, + 0xE8, 0x6D, 0x2B, 0xC5, 0x22, 0x36, 0x3A, 0x0D, + 0xAB, 0xC5, 0x21, 0x97, 0x9B, 0x0D, 0xEA, 0xDA, + 0x1D, 0xBF, 0x9A, 0x42, 0xD5, 0xC4, 0x48, 0x4E, + 0x0A, 0xBC, 0xD0, 0x6B, 0xFA, 0x53, 0xDD, 0xEF, + 0x3C, 0x1B, 0x20, 0xEE, 0x3F, 0xD5, 0x9D, 0x7C, + 0x25, 0xE4, 0x1D, 0x2B, 0x66, 0xC6, 0x2E, 0x37, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; +static const byte dh_ffdhe3072_g[] = { 0x02 }; +#ifdef HAVE_FFDHE_Q +static const byte dh_ffdhe3072_q[] = { + 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xD6, 0xFC, 0x2A, 0x2C, 0x51, 0x5D, 0xA5, 0x4D, + 0x57, 0xEE, 0x2B, 0x10, 0x13, 0x9E, 0x9E, 0x78, + 0xEC, 0x5C, 0xE2, 0xC1, 0xE7, 0x16, 0x9B, 0x4A, + 0xD4, 0xF0, 0x9B, 0x20, 0x8A, 0x32, 0x19, 0xFD, + 0xE6, 0x49, 0xCE, 0xE7, 0x12, 0x4D, 0x9F, 0x7C, + 0xBE, 0x97, 0xF1, 0xB1, 0xB1, 0x86, 0x3A, 0xEC, + 0x7B, 0x40, 0xD9, 0x01, 0x57, 0x62, 0x30, 0xBD, + 0x69, 0xEF, 0x8F, 0x6A, 0xEA, 0xFE, 0xB2, 0xB0, + 0x92, 0x19, 0xFA, 0x8F, 0xAF, 0x83, 0x37, 0x68, + 0x42, 0xB1, 0xB2, 0xAA, 0x9E, 0xF6, 0x8D, 0x79, + 0xDA, 0xAB, 0x89, 0xAF, 0x3F, 0xAB, 0xE4, 0x9A, + 0xCC, 0x27, 0x86, 0x38, 0x70, 0x73, 0x45, 0xBB, + 0xF1, 0x53, 0x44, 0xED, 0x79, 0xF7, 0xF4, 0x39, + 0x0E, 0xF8, 0xAC, 0x50, 0x9B, 0x56, 0xF3, 0x9A, + 0x98, 0x56, 0x65, 0x27, 0xA4, 0x1D, 0x3C, 0xBD, + 0x5E, 0x05, 0x58, 0xC1, 0x59, 0x92, 0x7D, 0xB0, + 0xE8, 0x84, 0x54, 0xA5, 0xD9, 0x64, 0x71, 0xFD, + 0xDC, 0xB5, 0x6D, 0x5B, 0xB0, 0x6B, 0xFA, 0x34, + 0x0E, 0xA7, 0xA1, 0x51, 0xEF, 0x1C, 0xA6, 0xFA, + 0x57, 0x2B, 0x76, 0xF3, 0xB1, 0xB9, 0x5D, 0x8C, + 0x85, 0x83, 0xD3, 0xE4, 0x77, 0x05, 0x36, 0xB8, + 0x4F, 0x01, 0x7E, 0x70, 0xE6, 0xFB, 0xF1, 0x76, + 0x60, 0x1A, 0x02, 0x66, 0x94, 0x1A, 0x17, 0xB0, + 0xC8, 0xB9, 0x7F, 0x4E, 0x74, 0xC2, 0xC1, 0xFF, + 0xC7, 0x27, 0x89, 0x19, 0x77, 0x79, 0x40, 0xC1, + 0xE1, 0xFF, 0x1D, 0x8D, 0xA6, 0x37, 0xD6, 0xB9, + 0x9D, 0xDA, 0xFE, 0x5E, 0x17, 0x61, 0x10, 0x02, + 0xE2, 0xC7, 0x78, 0xC1, 0xBE, 0x8B, 0x41, 0xD9, + 0x63, 0x79, 0xA5, 0x13, 0x60, 0xD9, 0x77, 0xFD, + 0x44, 0x35, 0xA1, 0x1C, 0x30, 0x8F, 0xE7, 0xEE, + 0x6F, 0x1A, 0xAD, 0x9D, 0xB2, 0x8C, 0x81, 0xAD, + 0xDE, 0x1A, 0x7A, 0x6F, 0x7C, 0xCE, 0x01, 0x1C, + 0x30, 0xDA, 0x37, 0xE4, 0xEB, 0x73, 0x64, 0x83, + 0xBD, 0x6C, 0x8E, 0x93, 0x48, 0xFB, 0xFB, 0xF7, + 0x2C, 0xC6, 0x58, 0x7D, 0x60, 0xC3, 0x6C, 0x8E, + 0x57, 0x7F, 0x09, 0x84, 0xC2, 0x89, 0xC9, 0x38, + 0x5A, 0x09, 0x86, 0x49, 0xDE, 0x21, 0xBC, 0xA2, + 0x7A, 0x7E, 0xA2, 0x29, 0x71, 0x6B, 0xA6, 0xE9, + 0xB2, 0x79, 0x71, 0x0F, 0x38, 0xFA, 0xA5, 0xFF, + 0xAE, 0x57, 0x41, 0x55, 0xCE, 0x4E, 0xFB, 0x4F, + 0x74, 0x36, 0x95, 0xE2, 0x91, 0x1B, 0x1D, 0x06, + 0xD5, 0xE2, 0x90, 0xCB, 0xCD, 0x86, 0xF5, 0x6D, + 0x0E, 0xDF, 0xCD, 0x21, 0x6A, 0xE2, 0x24, 0x27, + 0x05, 0x5E, 0x68, 0x35, 0xFD, 0x29, 0xEE, 0xF7, + 0x9E, 0x0D, 0x90, 0x77, 0x1F, 0xEA, 0xCE, 0xBE, + 0x12, 0xF2, 0x0E, 0x95, 0xB3, 0x63, 0x17, 0x1B, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; +#endif /* HAVE_FFDHE_Q */ + +const DhParams* wc_Dh_ffdhe3072_Get(void) +{ + static const DhParams ffdhe3072 = { + #ifdef HAVE_FFDHE_Q + dh_ffdhe3072_q, sizeof(dh_ffdhe3072_q), + #endif /* HAVE_FFDHE_Q */ + dh_ffdhe3072_p, sizeof(dh_ffdhe3072_p), + dh_ffdhe3072_g, sizeof(dh_ffdhe3072_g) + }; + return &ffdhe3072; +} +#endif + +#ifdef HAVE_FFDHE_4096 +static const byte dh_ffdhe4096_p[] = { + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xAD, 0xF8, 0x54, 0x58, 0xA2, 0xBB, 0x4A, 0x9A, + 0xAF, 0xDC, 0x56, 0x20, 0x27, 0x3D, 0x3C, 0xF1, + 0xD8, 0xB9, 0xC5, 0x83, 0xCE, 0x2D, 0x36, 0x95, + 0xA9, 0xE1, 0x36, 0x41, 0x14, 0x64, 0x33, 0xFB, + 0xCC, 0x93, 0x9D, 0xCE, 0x24, 0x9B, 0x3E, 0xF9, + 0x7D, 0x2F, 0xE3, 0x63, 0x63, 0x0C, 0x75, 0xD8, + 0xF6, 0x81, 0xB2, 0x02, 0xAE, 0xC4, 0x61, 0x7A, + 0xD3, 0xDF, 0x1E, 0xD5, 0xD5, 0xFD, 0x65, 0x61, + 0x24, 0x33, 0xF5, 0x1F, 0x5F, 0x06, 0x6E, 0xD0, + 0x85, 0x63, 0x65, 0x55, 0x3D, 0xED, 0x1A, 0xF3, + 0xB5, 0x57, 0x13, 0x5E, 0x7F, 0x57, 0xC9, 0x35, + 0x98, 0x4F, 0x0C, 0x70, 0xE0, 0xE6, 0x8B, 0x77, + 0xE2, 0xA6, 0x89, 0xDA, 0xF3, 0xEF, 0xE8, 0x72, + 0x1D, 0xF1, 0x58, 0xA1, 0x36, 0xAD, 0xE7, 0x35, + 0x30, 0xAC, 0xCA, 0x4F, 0x48, 0x3A, 0x79, 0x7A, + 0xBC, 0x0A, 0xB1, 0x82, 0xB3, 0x24, 0xFB, 0x61, + 0xD1, 0x08, 0xA9, 0x4B, 0xB2, 0xC8, 0xE3, 0xFB, + 0xB9, 0x6A, 0xDA, 0xB7, 0x60, 0xD7, 0xF4, 0x68, + 0x1D, 0x4F, 0x42, 0xA3, 0xDE, 0x39, 0x4D, 0xF4, + 0xAE, 0x56, 0xED, 0xE7, 0x63, 0x72, 0xBB, 0x19, + 0x0B, 0x07, 0xA7, 0xC8, 0xEE, 0x0A, 0x6D, 0x70, + 0x9E, 0x02, 0xFC, 0xE1, 0xCD, 0xF7, 0xE2, 0xEC, + 0xC0, 0x34, 0x04, 0xCD, 0x28, 0x34, 0x2F, 0x61, + 0x91, 0x72, 0xFE, 0x9C, 0xE9, 0x85, 0x83, 0xFF, + 0x8E, 0x4F, 0x12, 0x32, 0xEE, 0xF2, 0x81, 0x83, + 0xC3, 0xFE, 0x3B, 0x1B, 0x4C, 0x6F, 0xAD, 0x73, + 0x3B, 0xB5, 0xFC, 0xBC, 0x2E, 0xC2, 0x20, 0x05, + 0xC5, 0x8E, 0xF1, 0x83, 0x7D, 0x16, 0x83, 0xB2, + 0xC6, 0xF3, 0x4A, 0x26, 0xC1, 0xB2, 0xEF, 0xFA, + 0x88, 0x6B, 0x42, 0x38, 0x61, 0x1F, 0xCF, 0xDC, + 0xDE, 0x35, 0x5B, 0x3B, 0x65, 0x19, 0x03, 0x5B, + 0xBC, 0x34, 0xF4, 0xDE, 0xF9, 0x9C, 0x02, 0x38, + 0x61, 0xB4, 0x6F, 0xC9, 0xD6, 0xE6, 0xC9, 0x07, + 0x7A, 0xD9, 0x1D, 0x26, 0x91, 0xF7, 0xF7, 0xEE, + 0x59, 0x8C, 0xB0, 0xFA, 0xC1, 0x86, 0xD9, 0x1C, + 0xAE, 0xFE, 0x13, 0x09, 0x85, 0x13, 0x92, 0x70, + 0xB4, 0x13, 0x0C, 0x93, 0xBC, 0x43, 0x79, 0x44, + 0xF4, 0xFD, 0x44, 0x52, 0xE2, 0xD7, 0x4D, 0xD3, + 0x64, 0xF2, 0xE2, 0x1E, 0x71, 0xF5, 0x4B, 0xFF, + 0x5C, 0xAE, 0x82, 0xAB, 0x9C, 0x9D, 0xF6, 0x9E, + 0xE8, 0x6D, 0x2B, 0xC5, 0x22, 0x36, 0x3A, 0x0D, + 0xAB, 0xC5, 0x21, 0x97, 0x9B, 0x0D, 0xEA, 0xDA, + 0x1D, 0xBF, 0x9A, 0x42, 0xD5, 0xC4, 0x48, 0x4E, + 0x0A, 0xBC, 0xD0, 0x6B, 0xFA, 0x53, 0xDD, 0xEF, + 0x3C, 0x1B, 0x20, 0xEE, 0x3F, 0xD5, 0x9D, 0x7C, + 0x25, 0xE4, 0x1D, 0x2B, 0x66, 0x9E, 0x1E, 0xF1, + 0x6E, 0x6F, 0x52, 0xC3, 0x16, 0x4D, 0xF4, 0xFB, + 0x79, 0x30, 0xE9, 0xE4, 0xE5, 0x88, 0x57, 0xB6, + 0xAC, 0x7D, 0x5F, 0x42, 0xD6, 0x9F, 0x6D, 0x18, + 0x77, 0x63, 0xCF, 0x1D, 0x55, 0x03, 0x40, 0x04, + 0x87, 0xF5, 0x5B, 0xA5, 0x7E, 0x31, 0xCC, 0x7A, + 0x71, 0x35, 0xC8, 0x86, 0xEF, 0xB4, 0x31, 0x8A, + 0xED, 0x6A, 0x1E, 0x01, 0x2D, 0x9E, 0x68, 0x32, + 0xA9, 0x07, 0x60, 0x0A, 0x91, 0x81, 0x30, 0xC4, + 0x6D, 0xC7, 0x78, 0xF9, 0x71, 0xAD, 0x00, 0x38, + 0x09, 0x29, 0x99, 0xA3, 0x33, 0xCB, 0x8B, 0x7A, + 0x1A, 0x1D, 0xB9, 0x3D, 0x71, 0x40, 0x00, 0x3C, + 0x2A, 0x4E, 0xCE, 0xA9, 0xF9, 0x8D, 0x0A, 0xCC, + 0x0A, 0x82, 0x91, 0xCD, 0xCE, 0xC9, 0x7D, 0xCF, + 0x8E, 0xC9, 0xB5, 0x5A, 0x7F, 0x88, 0xA4, 0x6B, + 0x4D, 0xB5, 0xA8, 0x51, 0xF4, 0x41, 0x82, 0xE1, + 0xC6, 0x8A, 0x00, 0x7E, 0x5E, 0x65, 0x5F, 0x6A, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; +static const byte dh_ffdhe4096_g[] = { 0x02 }; +#ifdef HAVE_FFDHE_Q +static const byte dh_ffdhe4096_q[] = { + 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xD6, 0xFC, 0x2A, 0x2C, 0x51, 0x5D, 0xA5, 0x4D, + 0x57, 0xEE, 0x2B, 0x10, 0x13, 0x9E, 0x9E, 0x78, + 0xEC, 0x5C, 0xE2, 0xC1, 0xE7, 0x16, 0x9B, 0x4A, + 0xD4, 0xF0, 0x9B, 0x20, 0x8A, 0x32, 0x19, 0xFD, + 0xE6, 0x49, 0xCE, 0xE7, 0x12, 0x4D, 0x9F, 0x7C, + 0xBE, 0x97, 0xF1, 0xB1, 0xB1, 0x86, 0x3A, 0xEC, + 0x7B, 0x40, 0xD9, 0x01, 0x57, 0x62, 0x30, 0xBD, + 0x69, 0xEF, 0x8F, 0x6A, 0xEA, 0xFE, 0xB2, 0xB0, + 0x92, 0x19, 0xFA, 0x8F, 0xAF, 0x83, 0x37, 0x68, + 0x42, 0xB1, 0xB2, 0xAA, 0x9E, 0xF6, 0x8D, 0x79, + 0xDA, 0xAB, 0x89, 0xAF, 0x3F, 0xAB, 0xE4, 0x9A, + 0xCC, 0x27, 0x86, 0x38, 0x70, 0x73, 0x45, 0xBB, + 0xF1, 0x53, 0x44, 0xED, 0x79, 0xF7, 0xF4, 0x39, + 0x0E, 0xF8, 0xAC, 0x50, 0x9B, 0x56, 0xF3, 0x9A, + 0x98, 0x56, 0x65, 0x27, 0xA4, 0x1D, 0x3C, 0xBD, + 0x5E, 0x05, 0x58, 0xC1, 0x59, 0x92, 0x7D, 0xB0, + 0xE8, 0x84, 0x54, 0xA5, 0xD9, 0x64, 0x71, 0xFD, + 0xDC, 0xB5, 0x6D, 0x5B, 0xB0, 0x6B, 0xFA, 0x34, + 0x0E, 0xA7, 0xA1, 0x51, 0xEF, 0x1C, 0xA6, 0xFA, + 0x57, 0x2B, 0x76, 0xF3, 0xB1, 0xB9, 0x5D, 0x8C, + 0x85, 0x83, 0xD3, 0xE4, 0x77, 0x05, 0x36, 0xB8, + 0x4F, 0x01, 0x7E, 0x70, 0xE6, 0xFB, 0xF1, 0x76, + 0x60, 0x1A, 0x02, 0x66, 0x94, 0x1A, 0x17, 0xB0, + 0xC8, 0xB9, 0x7F, 0x4E, 0x74, 0xC2, 0xC1, 0xFF, + 0xC7, 0x27, 0x89, 0x19, 0x77, 0x79, 0x40, 0xC1, + 0xE1, 0xFF, 0x1D, 0x8D, 0xA6, 0x37, 0xD6, 0xB9, + 0x9D, 0xDA, 0xFE, 0x5E, 0x17, 0x61, 0x10, 0x02, + 0xE2, 0xC7, 0x78, 0xC1, 0xBE, 0x8B, 0x41, 0xD9, + 0x63, 0x79, 0xA5, 0x13, 0x60, 0xD9, 0x77, 0xFD, + 0x44, 0x35, 0xA1, 0x1C, 0x30, 0x8F, 0xE7, 0xEE, + 0x6F, 0x1A, 0xAD, 0x9D, 0xB2, 0x8C, 0x81, 0xAD, + 0xDE, 0x1A, 0x7A, 0x6F, 0x7C, 0xCE, 0x01, 0x1C, + 0x30, 0xDA, 0x37, 0xE4, 0xEB, 0x73, 0x64, 0x83, + 0xBD, 0x6C, 0x8E, 0x93, 0x48, 0xFB, 0xFB, 0xF7, + 0x2C, 0xC6, 0x58, 0x7D, 0x60, 0xC3, 0x6C, 0x8E, + 0x57, 0x7F, 0x09, 0x84, 0xC2, 0x89, 0xC9, 0x38, + 0x5A, 0x09, 0x86, 0x49, 0xDE, 0x21, 0xBC, 0xA2, + 0x7A, 0x7E, 0xA2, 0x29, 0x71, 0x6B, 0xA6, 0xE9, + 0xB2, 0x79, 0x71, 0x0F, 0x38, 0xFA, 0xA5, 0xFF, + 0xAE, 0x57, 0x41, 0x55, 0xCE, 0x4E, 0xFB, 0x4F, + 0x74, 0x36, 0x95, 0xE2, 0x91, 0x1B, 0x1D, 0x06, + 0xD5, 0xE2, 0x90, 0xCB, 0xCD, 0x86, 0xF5, 0x6D, + 0x0E, 0xDF, 0xCD, 0x21, 0x6A, 0xE2, 0x24, 0x27, + 0x05, 0x5E, 0x68, 0x35, 0xFD, 0x29, 0xEE, 0xF7, + 0x9E, 0x0D, 0x90, 0x77, 0x1F, 0xEA, 0xCE, 0xBE, + 0x12, 0xF2, 0x0E, 0x95, 0xB3, 0x4F, 0x0F, 0x78, + 0xB7, 0x37, 0xA9, 0x61, 0x8B, 0x26, 0xFA, 0x7D, + 0xBC, 0x98, 0x74, 0xF2, 0x72, 0xC4, 0x2B, 0xDB, + 0x56, 0x3E, 0xAF, 0xA1, 0x6B, 0x4F, 0xB6, 0x8C, + 0x3B, 0xB1, 0xE7, 0x8E, 0xAA, 0x81, 0xA0, 0x02, + 0x43, 0xFA, 0xAD, 0xD2, 0xBF, 0x18, 0xE6, 0x3D, + 0x38, 0x9A, 0xE4, 0x43, 0x77, 0xDA, 0x18, 0xC5, + 0x76, 0xB5, 0x0F, 0x00, 0x96, 0xCF, 0x34, 0x19, + 0x54, 0x83, 0xB0, 0x05, 0x48, 0xC0, 0x98, 0x62, + 0x36, 0xE3, 0xBC, 0x7C, 0xB8, 0xD6, 0x80, 0x1C, + 0x04, 0x94, 0xCC, 0xD1, 0x99, 0xE5, 0xC5, 0xBD, + 0x0D, 0x0E, 0xDC, 0x9E, 0xB8, 0xA0, 0x00, 0x1E, + 0x15, 0x27, 0x67, 0x54, 0xFC, 0xC6, 0x85, 0x66, + 0x05, 0x41, 0x48, 0xE6, 0xE7, 0x64, 0xBE, 0xE7, + 0xC7, 0x64, 0xDA, 0xAD, 0x3F, 0xC4, 0x52, 0x35, + 0xA6, 0xDA, 0xD4, 0x28, 0xFA, 0x20, 0xC1, 0x70, + 0xE3, 0x45, 0x00, 0x3F, 0x2F, 0x32, 0xAF, 0xB5, + 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; +#endif /* HAVE_FFDHE_Q */ + +const DhParams* wc_Dh_ffdhe4096_Get(void) +{ + static const DhParams ffdhe4096 = { + #ifdef HAVE_FFDHE_Q + dh_ffdhe4096_q, sizeof(dh_ffdhe4096_q), + #endif /* HAVE_FFDHE_Q */ + dh_ffdhe4096_p, sizeof(dh_ffdhe4096_p), + dh_ffdhe4096_g, sizeof(dh_ffdhe4096_g) + }; + return &ffdhe4096; +} +#endif + +#ifdef HAVE_FFDHE_6144 +static const byte dh_ffdhe6144_p[] = { + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xAD, 0xF8, 0x54, 0x58, 0xA2, 0xBB, 0x4A, 0x9A, + 0xAF, 0xDC, 0x56, 0x20, 0x27, 0x3D, 0x3C, 0xF1, + 0xD8, 0xB9, 0xC5, 0x83, 0xCE, 0x2D, 0x36, 0x95, + 0xA9, 0xE1, 0x36, 0x41, 0x14, 0x64, 0x33, 0xFB, + 0xCC, 0x93, 0x9D, 0xCE, 0x24, 0x9B, 0x3E, 0xF9, + 0x7D, 0x2F, 0xE3, 0x63, 0x63, 0x0C, 0x75, 0xD8, + 0xF6, 0x81, 0xB2, 0x02, 0xAE, 0xC4, 0x61, 0x7A, + 0xD3, 0xDF, 0x1E, 0xD5, 0xD5, 0xFD, 0x65, 0x61, + 0x24, 0x33, 0xF5, 0x1F, 0x5F, 0x06, 0x6E, 0xD0, + 0x85, 0x63, 0x65, 0x55, 0x3D, 0xED, 0x1A, 0xF3, + 0xB5, 0x57, 0x13, 0x5E, 0x7F, 0x57, 0xC9, 0x35, + 0x98, 0x4F, 0x0C, 0x70, 0xE0, 0xE6, 0x8B, 0x77, + 0xE2, 0xA6, 0x89, 0xDA, 0xF3, 0xEF, 0xE8, 0x72, + 0x1D, 0xF1, 0x58, 0xA1, 0x36, 0xAD, 0xE7, 0x35, + 0x30, 0xAC, 0xCA, 0x4F, 0x48, 0x3A, 0x79, 0x7A, + 0xBC, 0x0A, 0xB1, 0x82, 0xB3, 0x24, 0xFB, 0x61, + 0xD1, 0x08, 0xA9, 0x4B, 0xB2, 0xC8, 0xE3, 0xFB, + 0xB9, 0x6A, 0xDA, 0xB7, 0x60, 0xD7, 0xF4, 0x68, + 0x1D, 0x4F, 0x42, 0xA3, 0xDE, 0x39, 0x4D, 0xF4, + 0xAE, 0x56, 0xED, 0xE7, 0x63, 0x72, 0xBB, 0x19, + 0x0B, 0x07, 0xA7, 0xC8, 0xEE, 0x0A, 0x6D, 0x70, + 0x9E, 0x02, 0xFC, 0xE1, 0xCD, 0xF7, 0xE2, 0xEC, + 0xC0, 0x34, 0x04, 0xCD, 0x28, 0x34, 0x2F, 0x61, + 0x91, 0x72, 0xFE, 0x9C, 0xE9, 0x85, 0x83, 0xFF, + 0x8E, 0x4F, 0x12, 0x32, 0xEE, 0xF2, 0x81, 0x83, + 0xC3, 0xFE, 0x3B, 0x1B, 0x4C, 0x6F, 0xAD, 0x73, + 0x3B, 0xB5, 0xFC, 0xBC, 0x2E, 0xC2, 0x20, 0x05, + 0xC5, 0x8E, 0xF1, 0x83, 0x7D, 0x16, 0x83, 0xB2, + 0xC6, 0xF3, 0x4A, 0x26, 0xC1, 0xB2, 0xEF, 0xFA, + 0x88, 0x6B, 0x42, 0x38, 0x61, 0x1F, 0xCF, 0xDC, + 0xDE, 0x35, 0x5B, 0x3B, 0x65, 0x19, 0x03, 0x5B, + 0xBC, 0x34, 0xF4, 0xDE, 0xF9, 0x9C, 0x02, 0x38, + 0x61, 0xB4, 0x6F, 0xC9, 0xD6, 0xE6, 0xC9, 0x07, + 0x7A, 0xD9, 0x1D, 0x26, 0x91, 0xF7, 0xF7, 0xEE, + 0x59, 0x8C, 0xB0, 0xFA, 0xC1, 0x86, 0xD9, 0x1C, + 0xAE, 0xFE, 0x13, 0x09, 0x85, 0x13, 0x92, 0x70, + 0xB4, 0x13, 0x0C, 0x93, 0xBC, 0x43, 0x79, 0x44, + 0xF4, 0xFD, 0x44, 0x52, 0xE2, 0xD7, 0x4D, 0xD3, + 0x64, 0xF2, 0xE2, 0x1E, 0x71, 0xF5, 0x4B, 0xFF, + 0x5C, 0xAE, 0x82, 0xAB, 0x9C, 0x9D, 0xF6, 0x9E, + 0xE8, 0x6D, 0x2B, 0xC5, 0x22, 0x36, 0x3A, 0x0D, + 0xAB, 0xC5, 0x21, 0x97, 0x9B, 0x0D, 0xEA, 0xDA, + 0x1D, 0xBF, 0x9A, 0x42, 0xD5, 0xC4, 0x48, 0x4E, + 0x0A, 0xBC, 0xD0, 0x6B, 0xFA, 0x53, 0xDD, 0xEF, + 0x3C, 0x1B, 0x20, 0xEE, 0x3F, 0xD5, 0x9D, 0x7C, + 0x25, 0xE4, 0x1D, 0x2B, 0x66, 0x9E, 0x1E, 0xF1, + 0x6E, 0x6F, 0x52, 0xC3, 0x16, 0x4D, 0xF4, 0xFB, + 0x79, 0x30, 0xE9, 0xE4, 0xE5, 0x88, 0x57, 0xB6, + 0xAC, 0x7D, 0x5F, 0x42, 0xD6, 0x9F, 0x6D, 0x18, + 0x77, 0x63, 0xCF, 0x1D, 0x55, 0x03, 0x40, 0x04, + 0x87, 0xF5, 0x5B, 0xA5, 0x7E, 0x31, 0xCC, 0x7A, + 0x71, 0x35, 0xC8, 0x86, 0xEF, 0xB4, 0x31, 0x8A, + 0xED, 0x6A, 0x1E, 0x01, 0x2D, 0x9E, 0x68, 0x32, + 0xA9, 0x07, 0x60, 0x0A, 0x91, 0x81, 0x30, 0xC4, + 0x6D, 0xC7, 0x78, 0xF9, 0x71, 0xAD, 0x00, 0x38, + 0x09, 0x29, 0x99, 0xA3, 0x33, 0xCB, 0x8B, 0x7A, + 0x1A, 0x1D, 0xB9, 0x3D, 0x71, 0x40, 0x00, 0x3C, + 0x2A, 0x4E, 0xCE, 0xA9, 0xF9, 0x8D, 0x0A, 0xCC, + 0x0A, 0x82, 0x91, 0xCD, 0xCE, 0xC9, 0x7D, 0xCF, + 0x8E, 0xC9, 0xB5, 0x5A, 0x7F, 0x88, 0xA4, 0x6B, + 0x4D, 0xB5, 0xA8, 0x51, 0xF4, 0x41, 0x82, 0xE1, + 0xC6, 0x8A, 0x00, 0x7E, 0x5E, 0x0D, 0xD9, 0x02, + 0x0B, 0xFD, 0x64, 0xB6, 0x45, 0x03, 0x6C, 0x7A, + 0x4E, 0x67, 0x7D, 0x2C, 0x38, 0x53, 0x2A, 0x3A, + 0x23, 0xBA, 0x44, 0x42, 0xCA, 0xF5, 0x3E, 0xA6, + 0x3B, 0xB4, 0x54, 0x32, 0x9B, 0x76, 0x24, 0xC8, + 0x91, 0x7B, 0xDD, 0x64, 0xB1, 0xC0, 0xFD, 0x4C, + 0xB3, 0x8E, 0x8C, 0x33, 0x4C, 0x70, 0x1C, 0x3A, + 0xCD, 0xAD, 0x06, 0x57, 0xFC, 0xCF, 0xEC, 0x71, + 0x9B, 0x1F, 0x5C, 0x3E, 0x4E, 0x46, 0x04, 0x1F, + 0x38, 0x81, 0x47, 0xFB, 0x4C, 0xFD, 0xB4, 0x77, + 0xA5, 0x24, 0x71, 0xF7, 0xA9, 0xA9, 0x69, 0x10, + 0xB8, 0x55, 0x32, 0x2E, 0xDB, 0x63, 0x40, 0xD8, + 0xA0, 0x0E, 0xF0, 0x92, 0x35, 0x05, 0x11, 0xE3, + 0x0A, 0xBE, 0xC1, 0xFF, 0xF9, 0xE3, 0xA2, 0x6E, + 0x7F, 0xB2, 0x9F, 0x8C, 0x18, 0x30, 0x23, 0xC3, + 0x58, 0x7E, 0x38, 0xDA, 0x00, 0x77, 0xD9, 0xB4, + 0x76, 0x3E, 0x4E, 0x4B, 0x94, 0xB2, 0xBB, 0xC1, + 0x94, 0xC6, 0x65, 0x1E, 0x77, 0xCA, 0xF9, 0x92, + 0xEE, 0xAA, 0xC0, 0x23, 0x2A, 0x28, 0x1B, 0xF6, + 0xB3, 0xA7, 0x39, 0xC1, 0x22, 0x61, 0x16, 0x82, + 0x0A, 0xE8, 0xDB, 0x58, 0x47, 0xA6, 0x7C, 0xBE, + 0xF9, 0xC9, 0x09, 0x1B, 0x46, 0x2D, 0x53, 0x8C, + 0xD7, 0x2B, 0x03, 0x74, 0x6A, 0xE7, 0x7F, 0x5E, + 0x62, 0x29, 0x2C, 0x31, 0x15, 0x62, 0xA8, 0x46, + 0x50, 0x5D, 0xC8, 0x2D, 0xB8, 0x54, 0x33, 0x8A, + 0xE4, 0x9F, 0x52, 0x35, 0xC9, 0x5B, 0x91, 0x17, + 0x8C, 0xCF, 0x2D, 0xD5, 0xCA, 0xCE, 0xF4, 0x03, + 0xEC, 0x9D, 0x18, 0x10, 0xC6, 0x27, 0x2B, 0x04, + 0x5B, 0x3B, 0x71, 0xF9, 0xDC, 0x6B, 0x80, 0xD6, + 0x3F, 0xDD, 0x4A, 0x8E, 0x9A, 0xDB, 0x1E, 0x69, + 0x62, 0xA6, 0x95, 0x26, 0xD4, 0x31, 0x61, 0xC1, + 0xA4, 0x1D, 0x57, 0x0D, 0x79, 0x38, 0xDA, 0xD4, + 0xA4, 0x0E, 0x32, 0x9C, 0xD0, 0xE4, 0x0E, 0x65, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; +static const byte dh_ffdhe6144_g[] = { 0x02 }; +#ifdef HAVE_FFDHE_Q +static const byte dh_ffdhe6144_q[] = { + 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xD6, 0xFC, 0x2A, 0x2C, 0x51, 0x5D, 0xA5, 0x4D, + 0x57, 0xEE, 0x2B, 0x10, 0x13, 0x9E, 0x9E, 0x78, + 0xEC, 0x5C, 0xE2, 0xC1, 0xE7, 0x16, 0x9B, 0x4A, + 0xD4, 0xF0, 0x9B, 0x20, 0x8A, 0x32, 0x19, 0xFD, + 0xE6, 0x49, 0xCE, 0xE7, 0x12, 0x4D, 0x9F, 0x7C, + 0xBE, 0x97, 0xF1, 0xB1, 0xB1, 0x86, 0x3A, 0xEC, + 0x7B, 0x40, 0xD9, 0x01, 0x57, 0x62, 0x30, 0xBD, + 0x69, 0xEF, 0x8F, 0x6A, 0xEA, 0xFE, 0xB2, 0xB0, + 0x92, 0x19, 0xFA, 0x8F, 0xAF, 0x83, 0x37, 0x68, + 0x42, 0xB1, 0xB2, 0xAA, 0x9E, 0xF6, 0x8D, 0x79, + 0xDA, 0xAB, 0x89, 0xAF, 0x3F, 0xAB, 0xE4, 0x9A, + 0xCC, 0x27, 0x86, 0x38, 0x70, 0x73, 0x45, 0xBB, + 0xF1, 0x53, 0x44, 0xED, 0x79, 0xF7, 0xF4, 0x39, + 0x0E, 0xF8, 0xAC, 0x50, 0x9B, 0x56, 0xF3, 0x9A, + 0x98, 0x56, 0x65, 0x27, 0xA4, 0x1D, 0x3C, 0xBD, + 0x5E, 0x05, 0x58, 0xC1, 0x59, 0x92, 0x7D, 0xB0, + 0xE8, 0x84, 0x54, 0xA5, 0xD9, 0x64, 0x71, 0xFD, + 0xDC, 0xB5, 0x6D, 0x5B, 0xB0, 0x6B, 0xFA, 0x34, + 0x0E, 0xA7, 0xA1, 0x51, 0xEF, 0x1C, 0xA6, 0xFA, + 0x57, 0x2B, 0x76, 0xF3, 0xB1, 0xB9, 0x5D, 0x8C, + 0x85, 0x83, 0xD3, 0xE4, 0x77, 0x05, 0x36, 0xB8, + 0x4F, 0x01, 0x7E, 0x70, 0xE6, 0xFB, 0xF1, 0x76, + 0x60, 0x1A, 0x02, 0x66, 0x94, 0x1A, 0x17, 0xB0, + 0xC8, 0xB9, 0x7F, 0x4E, 0x74, 0xC2, 0xC1, 0xFF, + 0xC7, 0x27, 0x89, 0x19, 0x77, 0x79, 0x40, 0xC1, + 0xE1, 0xFF, 0x1D, 0x8D, 0xA6, 0x37, 0xD6, 0xB9, + 0x9D, 0xDA, 0xFE, 0x5E, 0x17, 0x61, 0x10, 0x02, + 0xE2, 0xC7, 0x78, 0xC1, 0xBE, 0x8B, 0x41, 0xD9, + 0x63, 0x79, 0xA5, 0x13, 0x60, 0xD9, 0x77, 0xFD, + 0x44, 0x35, 0xA1, 0x1C, 0x30, 0x8F, 0xE7, 0xEE, + 0x6F, 0x1A, 0xAD, 0x9D, 0xB2, 0x8C, 0x81, 0xAD, + 0xDE, 0x1A, 0x7A, 0x6F, 0x7C, 0xCE, 0x01, 0x1C, + 0x30, 0xDA, 0x37, 0xE4, 0xEB, 0x73, 0x64, 0x83, + 0xBD, 0x6C, 0x8E, 0x93, 0x48, 0xFB, 0xFB, 0xF7, + 0x2C, 0xC6, 0x58, 0x7D, 0x60, 0xC3, 0x6C, 0x8E, + 0x57, 0x7F, 0x09, 0x84, 0xC2, 0x89, 0xC9, 0x38, + 0x5A, 0x09, 0x86, 0x49, 0xDE, 0x21, 0xBC, 0xA2, + 0x7A, 0x7E, 0xA2, 0x29, 0x71, 0x6B, 0xA6, 0xE9, + 0xB2, 0x79, 0x71, 0x0F, 0x38, 0xFA, 0xA5, 0xFF, + 0xAE, 0x57, 0x41, 0x55, 0xCE, 0x4E, 0xFB, 0x4F, + 0x74, 0x36, 0x95, 0xE2, 0x91, 0x1B, 0x1D, 0x06, + 0xD5, 0xE2, 0x90, 0xCB, 0xCD, 0x86, 0xF5, 0x6D, + 0x0E, 0xDF, 0xCD, 0x21, 0x6A, 0xE2, 0x24, 0x27, + 0x05, 0x5E, 0x68, 0x35, 0xFD, 0x29, 0xEE, 0xF7, + 0x9E, 0x0D, 0x90, 0x77, 0x1F, 0xEA, 0xCE, 0xBE, + 0x12, 0xF2, 0x0E, 0x95, 0xB3, 0x4F, 0x0F, 0x78, + 0xB7, 0x37, 0xA9, 0x61, 0x8B, 0x26, 0xFA, 0x7D, + 0xBC, 0x98, 0x74, 0xF2, 0x72, 0xC4, 0x2B, 0xDB, + 0x56, 0x3E, 0xAF, 0xA1, 0x6B, 0x4F, 0xB6, 0x8C, + 0x3B, 0xB1, 0xE7, 0x8E, 0xAA, 0x81, 0xA0, 0x02, + 0x43, 0xFA, 0xAD, 0xD2, 0xBF, 0x18, 0xE6, 0x3D, + 0x38, 0x9A, 0xE4, 0x43, 0x77, 0xDA, 0x18, 0xC5, + 0x76, 0xB5, 0x0F, 0x00, 0x96, 0xCF, 0x34, 0x19, + 0x54, 0x83, 0xB0, 0x05, 0x48, 0xC0, 0x98, 0x62, + 0x36, 0xE3, 0xBC, 0x7C, 0xB8, 0xD6, 0x80, 0x1C, + 0x04, 0x94, 0xCC, 0xD1, 0x99, 0xE5, 0xC5, 0xBD, + 0x0D, 0x0E, 0xDC, 0x9E, 0xB8, 0xA0, 0x00, 0x1E, + 0x15, 0x27, 0x67, 0x54, 0xFC, 0xC6, 0x85, 0x66, + 0x05, 0x41, 0x48, 0xE6, 0xE7, 0x64, 0xBE, 0xE7, + 0xC7, 0x64, 0xDA, 0xAD, 0x3F, 0xC4, 0x52, 0x35, + 0xA6, 0xDA, 0xD4, 0x28, 0xFA, 0x20, 0xC1, 0x70, + 0xE3, 0x45, 0x00, 0x3F, 0x2F, 0x06, 0xEC, 0x81, + 0x05, 0xFE, 0xB2, 0x5B, 0x22, 0x81, 0xB6, 0x3D, + 0x27, 0x33, 0xBE, 0x96, 0x1C, 0x29, 0x95, 0x1D, + 0x11, 0xDD, 0x22, 0x21, 0x65, 0x7A, 0x9F, 0x53, + 0x1D, 0xDA, 0x2A, 0x19, 0x4D, 0xBB, 0x12, 0x64, + 0x48, 0xBD, 0xEE, 0xB2, 0x58, 0xE0, 0x7E, 0xA6, + 0x59, 0xC7, 0x46, 0x19, 0xA6, 0x38, 0x0E, 0x1D, + 0x66, 0xD6, 0x83, 0x2B, 0xFE, 0x67, 0xF6, 0x38, + 0xCD, 0x8F, 0xAE, 0x1F, 0x27, 0x23, 0x02, 0x0F, + 0x9C, 0x40, 0xA3, 0xFD, 0xA6, 0x7E, 0xDA, 0x3B, + 0xD2, 0x92, 0x38, 0xFB, 0xD4, 0xD4, 0xB4, 0x88, + 0x5C, 0x2A, 0x99, 0x17, 0x6D, 0xB1, 0xA0, 0x6C, + 0x50, 0x07, 0x78, 0x49, 0x1A, 0x82, 0x88, 0xF1, + 0x85, 0x5F, 0x60, 0xFF, 0xFC, 0xF1, 0xD1, 0x37, + 0x3F, 0xD9, 0x4F, 0xC6, 0x0C, 0x18, 0x11, 0xE1, + 0xAC, 0x3F, 0x1C, 0x6D, 0x00, 0x3B, 0xEC, 0xDA, + 0x3B, 0x1F, 0x27, 0x25, 0xCA, 0x59, 0x5D, 0xE0, + 0xCA, 0x63, 0x32, 0x8F, 0x3B, 0xE5, 0x7C, 0xC9, + 0x77, 0x55, 0x60, 0x11, 0x95, 0x14, 0x0D, 0xFB, + 0x59, 0xD3, 0x9C, 0xE0, 0x91, 0x30, 0x8B, 0x41, + 0x05, 0x74, 0x6D, 0xAC, 0x23, 0xD3, 0x3E, 0x5F, + 0x7C, 0xE4, 0x84, 0x8D, 0xA3, 0x16, 0xA9, 0xC6, + 0x6B, 0x95, 0x81, 0xBA, 0x35, 0x73, 0xBF, 0xAF, + 0x31, 0x14, 0x96, 0x18, 0x8A, 0xB1, 0x54, 0x23, + 0x28, 0x2E, 0xE4, 0x16, 0xDC, 0x2A, 0x19, 0xC5, + 0x72, 0x4F, 0xA9, 0x1A, 0xE4, 0xAD, 0xC8, 0x8B, + 0xC6, 0x67, 0x96, 0xEA, 0xE5, 0x67, 0x7A, 0x01, + 0xF6, 0x4E, 0x8C, 0x08, 0x63, 0x13, 0x95, 0x82, + 0x2D, 0x9D, 0xB8, 0xFC, 0xEE, 0x35, 0xC0, 0x6B, + 0x1F, 0xEE, 0xA5, 0x47, 0x4D, 0x6D, 0x8F, 0x34, + 0xB1, 0x53, 0x4A, 0x93, 0x6A, 0x18, 0xB0, 0xE0, + 0xD2, 0x0E, 0xAB, 0x86, 0xBC, 0x9C, 0x6D, 0x6A, + 0x52, 0x07, 0x19, 0x4E, 0x68, 0x72, 0x07, 0x32, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; +#endif /* HAVE_FFDHE_Q */ + +const DhParams* wc_Dh_ffdhe6144_Get(void) +{ + static const DhParams ffdhe6144 = { + #ifdef HAVE_FFDHE_Q + dh_ffdhe6144_q, sizeof(dh_ffdhe6144_q), + #endif /* HAVE_FFDHE_Q */ + dh_ffdhe6144_p, sizeof(dh_ffdhe6144_p), + dh_ffdhe6144_g, sizeof(dh_ffdhe6144_g) + }; + return &ffdhe6144; +} +#endif + +#ifdef HAVE_FFDHE_8192 +static const byte dh_ffdhe8192_p[] = { + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xAD, 0xF8, 0x54, 0x58, 0xA2, 0xBB, 0x4A, 0x9A, + 0xAF, 0xDC, 0x56, 0x20, 0x27, 0x3D, 0x3C, 0xF1, + 0xD8, 0xB9, 0xC5, 0x83, 0xCE, 0x2D, 0x36, 0x95, + 0xA9, 0xE1, 0x36, 0x41, 0x14, 0x64, 0x33, 0xFB, + 0xCC, 0x93, 0x9D, 0xCE, 0x24, 0x9B, 0x3E, 0xF9, + 0x7D, 0x2F, 0xE3, 0x63, 0x63, 0x0C, 0x75, 0xD8, + 0xF6, 0x81, 0xB2, 0x02, 0xAE, 0xC4, 0x61, 0x7A, + 0xD3, 0xDF, 0x1E, 0xD5, 0xD5, 0xFD, 0x65, 0x61, + 0x24, 0x33, 0xF5, 0x1F, 0x5F, 0x06, 0x6E, 0xD0, + 0x85, 0x63, 0x65, 0x55, 0x3D, 0xED, 0x1A, 0xF3, + 0xB5, 0x57, 0x13, 0x5E, 0x7F, 0x57, 0xC9, 0x35, + 0x98, 0x4F, 0x0C, 0x70, 0xE0, 0xE6, 0x8B, 0x77, + 0xE2, 0xA6, 0x89, 0xDA, 0xF3, 0xEF, 0xE8, 0x72, + 0x1D, 0xF1, 0x58, 0xA1, 0x36, 0xAD, 0xE7, 0x35, + 0x30, 0xAC, 0xCA, 0x4F, 0x48, 0x3A, 0x79, 0x7A, + 0xBC, 0x0A, 0xB1, 0x82, 0xB3, 0x24, 0xFB, 0x61, + 0xD1, 0x08, 0xA9, 0x4B, 0xB2, 0xC8, 0xE3, 0xFB, + 0xB9, 0x6A, 0xDA, 0xB7, 0x60, 0xD7, 0xF4, 0x68, + 0x1D, 0x4F, 0x42, 0xA3, 0xDE, 0x39, 0x4D, 0xF4, + 0xAE, 0x56, 0xED, 0xE7, 0x63, 0x72, 0xBB, 0x19, + 0x0B, 0x07, 0xA7, 0xC8, 0xEE, 0x0A, 0x6D, 0x70, + 0x9E, 0x02, 0xFC, 0xE1, 0xCD, 0xF7, 0xE2, 0xEC, + 0xC0, 0x34, 0x04, 0xCD, 0x28, 0x34, 0x2F, 0x61, + 0x91, 0x72, 0xFE, 0x9C, 0xE9, 0x85, 0x83, 0xFF, + 0x8E, 0x4F, 0x12, 0x32, 0xEE, 0xF2, 0x81, 0x83, + 0xC3, 0xFE, 0x3B, 0x1B, 0x4C, 0x6F, 0xAD, 0x73, + 0x3B, 0xB5, 0xFC, 0xBC, 0x2E, 0xC2, 0x20, 0x05, + 0xC5, 0x8E, 0xF1, 0x83, 0x7D, 0x16, 0x83, 0xB2, + 0xC6, 0xF3, 0x4A, 0x26, 0xC1, 0xB2, 0xEF, 0xFA, + 0x88, 0x6B, 0x42, 0x38, 0x61, 0x1F, 0xCF, 0xDC, + 0xDE, 0x35, 0x5B, 0x3B, 0x65, 0x19, 0x03, 0x5B, + 0xBC, 0x34, 0xF4, 0xDE, 0xF9, 0x9C, 0x02, 0x38, + 0x61, 0xB4, 0x6F, 0xC9, 0xD6, 0xE6, 0xC9, 0x07, + 0x7A, 0xD9, 0x1D, 0x26, 0x91, 0xF7, 0xF7, 0xEE, + 0x59, 0x8C, 0xB0, 0xFA, 0xC1, 0x86, 0xD9, 0x1C, + 0xAE, 0xFE, 0x13, 0x09, 0x85, 0x13, 0x92, 0x70, + 0xB4, 0x13, 0x0C, 0x93, 0xBC, 0x43, 0x79, 0x44, + 0xF4, 0xFD, 0x44, 0x52, 0xE2, 0xD7, 0x4D, 0xD3, + 0x64, 0xF2, 0xE2, 0x1E, 0x71, 0xF5, 0x4B, 0xFF, + 0x5C, 0xAE, 0x82, 0xAB, 0x9C, 0x9D, 0xF6, 0x9E, + 0xE8, 0x6D, 0x2B, 0xC5, 0x22, 0x36, 0x3A, 0x0D, + 0xAB, 0xC5, 0x21, 0x97, 0x9B, 0x0D, 0xEA, 0xDA, + 0x1D, 0xBF, 0x9A, 0x42, 0xD5, 0xC4, 0x48, 0x4E, + 0x0A, 0xBC, 0xD0, 0x6B, 0xFA, 0x53, 0xDD, 0xEF, + 0x3C, 0x1B, 0x20, 0xEE, 0x3F, 0xD5, 0x9D, 0x7C, + 0x25, 0xE4, 0x1D, 0x2B, 0x66, 0x9E, 0x1E, 0xF1, + 0x6E, 0x6F, 0x52, 0xC3, 0x16, 0x4D, 0xF4, 0xFB, + 0x79, 0x30, 0xE9, 0xE4, 0xE5, 0x88, 0x57, 0xB6, + 0xAC, 0x7D, 0x5F, 0x42, 0xD6, 0x9F, 0x6D, 0x18, + 0x77, 0x63, 0xCF, 0x1D, 0x55, 0x03, 0x40, 0x04, + 0x87, 0xF5, 0x5B, 0xA5, 0x7E, 0x31, 0xCC, 0x7A, + 0x71, 0x35, 0xC8, 0x86, 0xEF, 0xB4, 0x31, 0x8A, + 0xED, 0x6A, 0x1E, 0x01, 0x2D, 0x9E, 0x68, 0x32, + 0xA9, 0x07, 0x60, 0x0A, 0x91, 0x81, 0x30, 0xC4, + 0x6D, 0xC7, 0x78, 0xF9, 0x71, 0xAD, 0x00, 0x38, + 0x09, 0x29, 0x99, 0xA3, 0x33, 0xCB, 0x8B, 0x7A, + 0x1A, 0x1D, 0xB9, 0x3D, 0x71, 0x40, 0x00, 0x3C, + 0x2A, 0x4E, 0xCE, 0xA9, 0xF9, 0x8D, 0x0A, 0xCC, + 0x0A, 0x82, 0x91, 0xCD, 0xCE, 0xC9, 0x7D, 0xCF, + 0x8E, 0xC9, 0xB5, 0x5A, 0x7F, 0x88, 0xA4, 0x6B, + 0x4D, 0xB5, 0xA8, 0x51, 0xF4, 0x41, 0x82, 0xE1, + 0xC6, 0x8A, 0x00, 0x7E, 0x5E, 0x0D, 0xD9, 0x02, + 0x0B, 0xFD, 0x64, 0xB6, 0x45, 0x03, 0x6C, 0x7A, + 0x4E, 0x67, 0x7D, 0x2C, 0x38, 0x53, 0x2A, 0x3A, + 0x23, 0xBA, 0x44, 0x42, 0xCA, 0xF5, 0x3E, 0xA6, + 0x3B, 0xB4, 0x54, 0x32, 0x9B, 0x76, 0x24, 0xC8, + 0x91, 0x7B, 0xDD, 0x64, 0xB1, 0xC0, 0xFD, 0x4C, + 0xB3, 0x8E, 0x8C, 0x33, 0x4C, 0x70, 0x1C, 0x3A, + 0xCD, 0xAD, 0x06, 0x57, 0xFC, 0xCF, 0xEC, 0x71, + 0x9B, 0x1F, 0x5C, 0x3E, 0x4E, 0x46, 0x04, 0x1F, + 0x38, 0x81, 0x47, 0xFB, 0x4C, 0xFD, 0xB4, 0x77, + 0xA5, 0x24, 0x71, 0xF7, 0xA9, 0xA9, 0x69, 0x10, + 0xB8, 0x55, 0x32, 0x2E, 0xDB, 0x63, 0x40, 0xD8, + 0xA0, 0x0E, 0xF0, 0x92, 0x35, 0x05, 0x11, 0xE3, + 0x0A, 0xBE, 0xC1, 0xFF, 0xF9, 0xE3, 0xA2, 0x6E, + 0x7F, 0xB2, 0x9F, 0x8C, 0x18, 0x30, 0x23, 0xC3, + 0x58, 0x7E, 0x38, 0xDA, 0x00, 0x77, 0xD9, 0xB4, + 0x76, 0x3E, 0x4E, 0x4B, 0x94, 0xB2, 0xBB, 0xC1, + 0x94, 0xC6, 0x65, 0x1E, 0x77, 0xCA, 0xF9, 0x92, + 0xEE, 0xAA, 0xC0, 0x23, 0x2A, 0x28, 0x1B, 0xF6, + 0xB3, 0xA7, 0x39, 0xC1, 0x22, 0x61, 0x16, 0x82, + 0x0A, 0xE8, 0xDB, 0x58, 0x47, 0xA6, 0x7C, 0xBE, + 0xF9, 0xC9, 0x09, 0x1B, 0x46, 0x2D, 0x53, 0x8C, + 0xD7, 0x2B, 0x03, 0x74, 0x6A, 0xE7, 0x7F, 0x5E, + 0x62, 0x29, 0x2C, 0x31, 0x15, 0x62, 0xA8, 0x46, + 0x50, 0x5D, 0xC8, 0x2D, 0xB8, 0x54, 0x33, 0x8A, + 0xE4, 0x9F, 0x52, 0x35, 0xC9, 0x5B, 0x91, 0x17, + 0x8C, 0xCF, 0x2D, 0xD5, 0xCA, 0xCE, 0xF4, 0x03, + 0xEC, 0x9D, 0x18, 0x10, 0xC6, 0x27, 0x2B, 0x04, + 0x5B, 0x3B, 0x71, 0xF9, 0xDC, 0x6B, 0x80, 0xD6, + 0x3F, 0xDD, 0x4A, 0x8E, 0x9A, 0xDB, 0x1E, 0x69, + 0x62, 0xA6, 0x95, 0x26, 0xD4, 0x31, 0x61, 0xC1, + 0xA4, 0x1D, 0x57, 0x0D, 0x79, 0x38, 0xDA, 0xD4, + 0xA4, 0x0E, 0x32, 0x9C, 0xCF, 0xF4, 0x6A, 0xAA, + 0x36, 0xAD, 0x00, 0x4C, 0xF6, 0x00, 0xC8, 0x38, + 0x1E, 0x42, 0x5A, 0x31, 0xD9, 0x51, 0xAE, 0x64, + 0xFD, 0xB2, 0x3F, 0xCE, 0xC9, 0x50, 0x9D, 0x43, + 0x68, 0x7F, 0xEB, 0x69, 0xED, 0xD1, 0xCC, 0x5E, + 0x0B, 0x8C, 0xC3, 0xBD, 0xF6, 0x4B, 0x10, 0xEF, + 0x86, 0xB6, 0x31, 0x42, 0xA3, 0xAB, 0x88, 0x29, + 0x55, 0x5B, 0x2F, 0x74, 0x7C, 0x93, 0x26, 0x65, + 0xCB, 0x2C, 0x0F, 0x1C, 0xC0, 0x1B, 0xD7, 0x02, + 0x29, 0x38, 0x88, 0x39, 0xD2, 0xAF, 0x05, 0xE4, + 0x54, 0x50, 0x4A, 0xC7, 0x8B, 0x75, 0x82, 0x82, + 0x28, 0x46, 0xC0, 0xBA, 0x35, 0xC3, 0x5F, 0x5C, + 0x59, 0x16, 0x0C, 0xC0, 0x46, 0xFD, 0x82, 0x51, + 0x54, 0x1F, 0xC6, 0x8C, 0x9C, 0x86, 0xB0, 0x22, + 0xBB, 0x70, 0x99, 0x87, 0x6A, 0x46, 0x0E, 0x74, + 0x51, 0xA8, 0xA9, 0x31, 0x09, 0x70, 0x3F, 0xEE, + 0x1C, 0x21, 0x7E, 0x6C, 0x38, 0x26, 0xE5, 0x2C, + 0x51, 0xAA, 0x69, 0x1E, 0x0E, 0x42, 0x3C, 0xFC, + 0x99, 0xE9, 0xE3, 0x16, 0x50, 0xC1, 0x21, 0x7B, + 0x62, 0x48, 0x16, 0xCD, 0xAD, 0x9A, 0x95, 0xF9, + 0xD5, 0xB8, 0x01, 0x94, 0x88, 0xD9, 0xC0, 0xA0, + 0xA1, 0xFE, 0x30, 0x75, 0xA5, 0x77, 0xE2, 0x31, + 0x83, 0xF8, 0x1D, 0x4A, 0x3F, 0x2F, 0xA4, 0x57, + 0x1E, 0xFC, 0x8C, 0xE0, 0xBA, 0x8A, 0x4F, 0xE8, + 0xB6, 0x85, 0x5D, 0xFE, 0x72, 0xB0, 0xA6, 0x6E, + 0xDE, 0xD2, 0xFB, 0xAB, 0xFB, 0xE5, 0x8A, 0x30, + 0xFA, 0xFA, 0xBE, 0x1C, 0x5D, 0x71, 0xA8, 0x7E, + 0x2F, 0x74, 0x1E, 0xF8, 0xC1, 0xFE, 0x86, 0xFE, + 0xA6, 0xBB, 0xFD, 0xE5, 0x30, 0x67, 0x7F, 0x0D, + 0x97, 0xD1, 0x1D, 0x49, 0xF7, 0xA8, 0x44, 0x3D, + 0x08, 0x22, 0xE5, 0x06, 0xA9, 0xF4, 0x61, 0x4E, + 0x01, 0x1E, 0x2A, 0x94, 0x83, 0x8F, 0xF8, 0x8C, + 0xD6, 0x8C, 0x8B, 0xB7, 0xC5, 0xC6, 0x42, 0x4C, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; +static const byte dh_ffdhe8192_g[] = { 0x02 }; +#ifdef HAVE_FFDHE_Q +static const byte dh_ffdhe8192_q[] = { + 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xD6, 0xFC, 0x2A, 0x2C, 0x51, 0x5D, 0xA5, 0x4D, + 0x57, 0xEE, 0x2B, 0x10, 0x13, 0x9E, 0x9E, 0x78, + 0xEC, 0x5C, 0xE2, 0xC1, 0xE7, 0x16, 0x9B, 0x4A, + 0xD4, 0xF0, 0x9B, 0x20, 0x8A, 0x32, 0x19, 0xFD, + 0xE6, 0x49, 0xCE, 0xE7, 0x12, 0x4D, 0x9F, 0x7C, + 0xBE, 0x97, 0xF1, 0xB1, 0xB1, 0x86, 0x3A, 0xEC, + 0x7B, 0x40, 0xD9, 0x01, 0x57, 0x62, 0x30, 0xBD, + 0x69, 0xEF, 0x8F, 0x6A, 0xEA, 0xFE, 0xB2, 0xB0, + 0x92, 0x19, 0xFA, 0x8F, 0xAF, 0x83, 0x37, 0x68, + 0x42, 0xB1, 0xB2, 0xAA, 0x9E, 0xF6, 0x8D, 0x79, + 0xDA, 0xAB, 0x89, 0xAF, 0x3F, 0xAB, 0xE4, 0x9A, + 0xCC, 0x27, 0x86, 0x38, 0x70, 0x73, 0x45, 0xBB, + 0xF1, 0x53, 0x44, 0xED, 0x79, 0xF7, 0xF4, 0x39, + 0x0E, 0xF8, 0xAC, 0x50, 0x9B, 0x56, 0xF3, 0x9A, + 0x98, 0x56, 0x65, 0x27, 0xA4, 0x1D, 0x3C, 0xBD, + 0x5E, 0x05, 0x58, 0xC1, 0x59, 0x92, 0x7D, 0xB0, + 0xE8, 0x84, 0x54, 0xA5, 0xD9, 0x64, 0x71, 0xFD, + 0xDC, 0xB5, 0x6D, 0x5B, 0xB0, 0x6B, 0xFA, 0x34, + 0x0E, 0xA7, 0xA1, 0x51, 0xEF, 0x1C, 0xA6, 0xFA, + 0x57, 0x2B, 0x76, 0xF3, 0xB1, 0xB9, 0x5D, 0x8C, + 0x85, 0x83, 0xD3, 0xE4, 0x77, 0x05, 0x36, 0xB8, + 0x4F, 0x01, 0x7E, 0x70, 0xE6, 0xFB, 0xF1, 0x76, + 0x60, 0x1A, 0x02, 0x66, 0x94, 0x1A, 0x17, 0xB0, + 0xC8, 0xB9, 0x7F, 0x4E, 0x74, 0xC2, 0xC1, 0xFF, + 0xC7, 0x27, 0x89, 0x19, 0x77, 0x79, 0x40, 0xC1, + 0xE1, 0xFF, 0x1D, 0x8D, 0xA6, 0x37, 0xD6, 0xB9, + 0x9D, 0xDA, 0xFE, 0x5E, 0x17, 0x61, 0x10, 0x02, + 0xE2, 0xC7, 0x78, 0xC1, 0xBE, 0x8B, 0x41, 0xD9, + 0x63, 0x79, 0xA5, 0x13, 0x60, 0xD9, 0x77, 0xFD, + 0x44, 0x35, 0xA1, 0x1C, 0x30, 0x8F, 0xE7, 0xEE, + 0x6F, 0x1A, 0xAD, 0x9D, 0xB2, 0x8C, 0x81, 0xAD, + 0xDE, 0x1A, 0x7A, 0x6F, 0x7C, 0xCE, 0x01, 0x1C, + 0x30, 0xDA, 0x37, 0xE4, 0xEB, 0x73, 0x64, 0x83, + 0xBD, 0x6C, 0x8E, 0x93, 0x48, 0xFB, 0xFB, 0xF7, + 0x2C, 0xC6, 0x58, 0x7D, 0x60, 0xC3, 0x6C, 0x8E, + 0x57, 0x7F, 0x09, 0x84, 0xC2, 0x89, 0xC9, 0x38, + 0x5A, 0x09, 0x86, 0x49, 0xDE, 0x21, 0xBC, 0xA2, + 0x7A, 0x7E, 0xA2, 0x29, 0x71, 0x6B, 0xA6, 0xE9, + 0xB2, 0x79, 0x71, 0x0F, 0x38, 0xFA, 0xA5, 0xFF, + 0xAE, 0x57, 0x41, 0x55, 0xCE, 0x4E, 0xFB, 0x4F, + 0x74, 0x36, 0x95, 0xE2, 0x91, 0x1B, 0x1D, 0x06, + 0xD5, 0xE2, 0x90, 0xCB, 0xCD, 0x86, 0xF5, 0x6D, + 0x0E, 0xDF, 0xCD, 0x21, 0x6A, 0xE2, 0x24, 0x27, + 0x05, 0x5E, 0x68, 0x35, 0xFD, 0x29, 0xEE, 0xF7, + 0x9E, 0x0D, 0x90, 0x77, 0x1F, 0xEA, 0xCE, 0xBE, + 0x12, 0xF2, 0x0E, 0x95, 0xB3, 0x4F, 0x0F, 0x78, + 0xB7, 0x37, 0xA9, 0x61, 0x8B, 0x26, 0xFA, 0x7D, + 0xBC, 0x98, 0x74, 0xF2, 0x72, 0xC4, 0x2B, 0xDB, + 0x56, 0x3E, 0xAF, 0xA1, 0x6B, 0x4F, 0xB6, 0x8C, + 0x3B, 0xB1, 0xE7, 0x8E, 0xAA, 0x81, 0xA0, 0x02, + 0x43, 0xFA, 0xAD, 0xD2, 0xBF, 0x18, 0xE6, 0x3D, + 0x38, 0x9A, 0xE4, 0x43, 0x77, 0xDA, 0x18, 0xC5, + 0x76, 0xB5, 0x0F, 0x00, 0x96, 0xCF, 0x34, 0x19, + 0x54, 0x83, 0xB0, 0x05, 0x48, 0xC0, 0x98, 0x62, + 0x36, 0xE3, 0xBC, 0x7C, 0xB8, 0xD6, 0x80, 0x1C, + 0x04, 0x94, 0xCC, 0xD1, 0x99, 0xE5, 0xC5, 0xBD, + 0x0D, 0x0E, 0xDC, 0x9E, 0xB8, 0xA0, 0x00, 0x1E, + 0x15, 0x27, 0x67, 0x54, 0xFC, 0xC6, 0x85, 0x66, + 0x05, 0x41, 0x48, 0xE6, 0xE7, 0x64, 0xBE, 0xE7, + 0xC7, 0x64, 0xDA, 0xAD, 0x3F, 0xC4, 0x52, 0x35, + 0xA6, 0xDA, 0xD4, 0x28, 0xFA, 0x20, 0xC1, 0x70, + 0xE3, 0x45, 0x00, 0x3F, 0x2F, 0x06, 0xEC, 0x81, + 0x05, 0xFE, 0xB2, 0x5B, 0x22, 0x81, 0xB6, 0x3D, + 0x27, 0x33, 0xBE, 0x96, 0x1C, 0x29, 0x95, 0x1D, + 0x11, 0xDD, 0x22, 0x21, 0x65, 0x7A, 0x9F, 0x53, + 0x1D, 0xDA, 0x2A, 0x19, 0x4D, 0xBB, 0x12, 0x64, + 0x48, 0xBD, 0xEE, 0xB2, 0x58, 0xE0, 0x7E, 0xA6, + 0x59, 0xC7, 0x46, 0x19, 0xA6, 0x38, 0x0E, 0x1D, + 0x66, 0xD6, 0x83, 0x2B, 0xFE, 0x67, 0xF6, 0x38, + 0xCD, 0x8F, 0xAE, 0x1F, 0x27, 0x23, 0x02, 0x0F, + 0x9C, 0x40, 0xA3, 0xFD, 0xA6, 0x7E, 0xDA, 0x3B, + 0xD2, 0x92, 0x38, 0xFB, 0xD4, 0xD4, 0xB4, 0x88, + 0x5C, 0x2A, 0x99, 0x17, 0x6D, 0xB1, 0xA0, 0x6C, + 0x50, 0x07, 0x78, 0x49, 0x1A, 0x82, 0x88, 0xF1, + 0x85, 0x5F, 0x60, 0xFF, 0xFC, 0xF1, 0xD1, 0x37, + 0x3F, 0xD9, 0x4F, 0xC6, 0x0C, 0x18, 0x11, 0xE1, + 0xAC, 0x3F, 0x1C, 0x6D, 0x00, 0x3B, 0xEC, 0xDA, + 0x3B, 0x1F, 0x27, 0x25, 0xCA, 0x59, 0x5D, 0xE0, + 0xCA, 0x63, 0x32, 0x8F, 0x3B, 0xE5, 0x7C, 0xC9, + 0x77, 0x55, 0x60, 0x11, 0x95, 0x14, 0x0D, 0xFB, + 0x59, 0xD3, 0x9C, 0xE0, 0x91, 0x30, 0x8B, 0x41, + 0x05, 0x74, 0x6D, 0xAC, 0x23, 0xD3, 0x3E, 0x5F, + 0x7C, 0xE4, 0x84, 0x8D, 0xA3, 0x16, 0xA9, 0xC6, + 0x6B, 0x95, 0x81, 0xBA, 0x35, 0x73, 0xBF, 0xAF, + 0x31, 0x14, 0x96, 0x18, 0x8A, 0xB1, 0x54, 0x23, + 0x28, 0x2E, 0xE4, 0x16, 0xDC, 0x2A, 0x19, 0xC5, + 0x72, 0x4F, 0xA9, 0x1A, 0xE4, 0xAD, 0xC8, 0x8B, + 0xC6, 0x67, 0x96, 0xEA, 0xE5, 0x67, 0x7A, 0x01, + 0xF6, 0x4E, 0x8C, 0x08, 0x63, 0x13, 0x95, 0x82, + 0x2D, 0x9D, 0xB8, 0xFC, 0xEE, 0x35, 0xC0, 0x6B, + 0x1F, 0xEE, 0xA5, 0x47, 0x4D, 0x6D, 0x8F, 0x34, + 0xB1, 0x53, 0x4A, 0x93, 0x6A, 0x18, 0xB0, 0xE0, + 0xD2, 0x0E, 0xAB, 0x86, 0xBC, 0x9C, 0x6D, 0x6A, + 0x52, 0x07, 0x19, 0x4E, 0x67, 0xFA, 0x35, 0x55, + 0x1B, 0x56, 0x80, 0x26, 0x7B, 0x00, 0x64, 0x1C, + 0x0F, 0x21, 0x2D, 0x18, 0xEC, 0xA8, 0xD7, 0x32, + 0x7E, 0xD9, 0x1F, 0xE7, 0x64, 0xA8, 0x4E, 0xA1, + 0xB4, 0x3F, 0xF5, 0xB4, 0xF6, 0xE8, 0xE6, 0x2F, + 0x05, 0xC6, 0x61, 0xDE, 0xFB, 0x25, 0x88, 0x77, + 0xC3, 0x5B, 0x18, 0xA1, 0x51, 0xD5, 0xC4, 0x14, + 0xAA, 0xAD, 0x97, 0xBA, 0x3E, 0x49, 0x93, 0x32, + 0xE5, 0x96, 0x07, 0x8E, 0x60, 0x0D, 0xEB, 0x81, + 0x14, 0x9C, 0x44, 0x1C, 0xE9, 0x57, 0x82, 0xF2, + 0x2A, 0x28, 0x25, 0x63, 0xC5, 0xBA, 0xC1, 0x41, + 0x14, 0x23, 0x60, 0x5D, 0x1A, 0xE1, 0xAF, 0xAE, + 0x2C, 0x8B, 0x06, 0x60, 0x23, 0x7E, 0xC1, 0x28, + 0xAA, 0x0F, 0xE3, 0x46, 0x4E, 0x43, 0x58, 0x11, + 0x5D, 0xB8, 0x4C, 0xC3, 0xB5, 0x23, 0x07, 0x3A, + 0x28, 0xD4, 0x54, 0x98, 0x84, 0xB8, 0x1F, 0xF7, + 0x0E, 0x10, 0xBF, 0x36, 0x1C, 0x13, 0x72, 0x96, + 0x28, 0xD5, 0x34, 0x8F, 0x07, 0x21, 0x1E, 0x7E, + 0x4C, 0xF4, 0xF1, 0x8B, 0x28, 0x60, 0x90, 0xBD, + 0xB1, 0x24, 0x0B, 0x66, 0xD6, 0xCD, 0x4A, 0xFC, + 0xEA, 0xDC, 0x00, 0xCA, 0x44, 0x6C, 0xE0, 0x50, + 0x50, 0xFF, 0x18, 0x3A, 0xD2, 0xBB, 0xF1, 0x18, + 0xC1, 0xFC, 0x0E, 0xA5, 0x1F, 0x97, 0xD2, 0x2B, + 0x8F, 0x7E, 0x46, 0x70, 0x5D, 0x45, 0x27, 0xF4, + 0x5B, 0x42, 0xAE, 0xFF, 0x39, 0x58, 0x53, 0x37, + 0x6F, 0x69, 0x7D, 0xD5, 0xFD, 0xF2, 0xC5, 0x18, + 0x7D, 0x7D, 0x5F, 0x0E, 0x2E, 0xB8, 0xD4, 0x3F, + 0x17, 0xBA, 0x0F, 0x7C, 0x60, 0xFF, 0x43, 0x7F, + 0x53, 0x5D, 0xFE, 0xF2, 0x98, 0x33, 0xBF, 0x86, + 0xCB, 0xE8, 0x8E, 0xA4, 0xFB, 0xD4, 0x22, 0x1E, + 0x84, 0x11, 0x72, 0x83, 0x54, 0xFA, 0x30, 0xA7, + 0x00, 0x8F, 0x15, 0x4A, 0x41, 0xC7, 0xFC, 0x46, + 0x6B, 0x46, 0x45, 0xDB, 0xE2, 0xE3, 0x21, 0x26, + 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; +#endif /* HAVE_FFDHE_Q */ + +const DhParams* wc_Dh_ffdhe8192_Get(void) +{ + static const DhParams ffdhe8192 = { + #ifdef HAVE_FFDHE_Q + dh_ffdhe8192_q, sizeof(dh_ffdhe8192_q), + #endif /* HAVE_FFDHE_Q */ + dh_ffdhe8192_p, sizeof(dh_ffdhe8192_p), + dh_ffdhe8192_g, sizeof(dh_ffdhe8192_g) + }; + return &ffdhe8192; +} +#endif + +int wc_InitDhKey_ex(DhKey* key, void* heap, int devId) +{ + int ret = 0; + + if (key == NULL) + return BAD_FUNC_ARG; + + key->heap = heap; /* for XMALLOC/XFREE in future */ + +#if !defined(WOLFSSL_QT) && !defined(OPENSSL_ALL) + if (mp_init_multi(&key->p, &key->g, &key->q, NULL, NULL, NULL) != MP_OKAY) +#else + if (mp_init_multi(&key->p,&key->g,&key->q,&key->pub,&key->priv,NULL) != MP_OKAY) +#endif + return MEMORY_E; + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_DH) + /* handle as async */ + ret = wolfAsync_DevCtxInit(&key->asyncDev, WOLFSSL_ASYNC_MARKER_DH, + key->heap, devId); +#else + (void)devId; +#endif + + return ret; +} + +int wc_InitDhKey(DhKey* key) +{ + return wc_InitDhKey_ex(key, NULL, INVALID_DEVID); +} + + +int wc_FreeDhKey(DhKey* key) +{ + if (key) { + mp_clear(&key->p); + mp_clear(&key->g); + mp_clear(&key->q); + + #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_DH) + wolfAsync_DevCtxFree(&key->asyncDev, WOLFSSL_ASYNC_MARKER_DH); + #endif + } + return 0; +} + + +#ifndef WC_NO_RNG +/* if defined to not use floating point values do not compile in */ +#ifndef WOLFSSL_DH_CONST + static word32 DiscreteLogWorkFactor(word32 n) + { + /* assuming discrete log takes about the same time as factoring */ + if (n < 5) + return 0; + else + return (word32)(2.4 * XPOW((double)n, 1.0/3.0) * + XPOW(XLOG((double)n), 2.0/3.0) - 5); + } +#endif /* WOLFSSL_DH_CONST*/ + + +/* if not using fixed points use DiscreteLogWorkFactor function for unusual size + otherwise round up on size needed */ +#ifndef WOLFSSL_DH_CONST + #define WOLFSSL_DH_ROUND(x) +#else + #define WOLFSSL_DH_ROUND(x) \ + do { \ + if (x % 128) { \ + x &= 0xffffff80;\ + x += 128; \ + } \ + } \ + while (0) +#endif + + +#ifndef WOLFSSL_NO_DH186 +/* validate that (L,N) match allowed sizes from SP 800-56A, Section 5.5.1.1. + * modLen - represents L, the size of p in bits + * divLen - represents N, the size of q in bits + * return 0 on success, -1 on error */ +static int CheckDhLN(int modLen, int divLen) +{ + int ret = -1; + + switch (modLen) { + /* FA */ + case 1024: + if (divLen == 160) + ret = 0; + break; + /* FB, FC */ + case 2048: + if (divLen == 224 || divLen == 256) + ret = 0; + break; + default: + break; + } + + return ret; +} + + +/* Create DH private key + * + * Based on NIST FIPS 186-4, + * "B.1.1 Key Pair Generation Using Extra Random Bits" + * + * dh - pointer to initialized DhKey structure, needs to have dh->q + * rng - pointer to initialized WC_RNG structure + * priv - output location for generated private key + * privSz - IN/OUT, size of priv buffer, size of generated private key + * + * return 0 on success, negative on error */ +static int GeneratePrivateDh186(DhKey* key, WC_RNG* rng, byte* priv, + word32* privSz) +{ + byte* cBuf; + int qSz, pSz, cSz, err; +#ifdef WOLFSSL_SMALL_STACK + mp_int* tmpQ = NULL; + mp_int* tmpX = NULL; +#else + mp_int tmpQ[1], tmpX[1]; +#endif + + /* Parameters validated in calling functions. */ + + if (mp_iszero(&key->q) == MP_YES) { + WOLFSSL_MSG("DH q parameter needed for FIPS 186-4 key generation"); + return BAD_FUNC_ARG; + } + + qSz = mp_unsigned_bin_size(&key->q); + pSz = mp_unsigned_bin_size(&key->p); + + /* verify (L,N) pair bit lengths */ + if (CheckDhLN(pSz * WOLFSSL_BIT_SIZE, qSz * WOLFSSL_BIT_SIZE) != 0) { + WOLFSSL_MSG("DH param sizes do not match SP 800-56A requirements"); + return BAD_FUNC_ARG; + } + + /* generate extra 64 bits so that bias from mod function is negligible */ + cSz = qSz + (64 / WOLFSSL_BIT_SIZE); + cBuf = (byte*)XMALLOC(cSz, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (cBuf == NULL) { + return MEMORY_E; + } +#ifdef WOLFSSL_SMALL_STACK + tmpQ = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_DH); + if (tmpQ == NULL) { + XFREE(cBuf, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + return MEMORY_E; + } + tmpX = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_DH); + if (tmpX == NULL) { + XFREE(cBuf, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(tmpQ, key->heap, DYNAMIC_TYPE_DH); + return MEMORY_E; + } +#endif + + + if ((err = mp_init_multi(tmpX, tmpQ, NULL, NULL, NULL, NULL)) + != MP_OKAY) { + XFREE(cBuf, key->heap, DYNAMIC_TYPE_TMP_BUFFER); +#ifdef WOLFSSL_SMALL_STACK + XFREE(tmpQ, key->heap, DYNAMIC_TYPE_DH); + XFREE(tmpX, key->heap, DYNAMIC_TYPE_DH); +#endif + return err; + } + + do { + /* generate N+64 bits (c) from RBG into tmpX, making sure positive. + * Hash_DRBG uses SHA-256 which matches maximum + * requested_security_strength of (L,N) */ + err = wc_RNG_GenerateBlock(rng, cBuf, cSz); + if (err == MP_OKAY) + err = mp_read_unsigned_bin(tmpX, cBuf, cSz); + if (err != MP_OKAY) { + mp_clear(tmpX); + mp_clear(tmpQ); + XFREE(cBuf, key->heap, DYNAMIC_TYPE_TMP_BUFFER); +#ifdef WOLFSSL_SMALL_STACK + XFREE(tmpQ, key->heap, DYNAMIC_TYPE_DH); + XFREE(tmpX, key->heap, DYNAMIC_TYPE_DH); +#endif + return err; + } + } while (mp_cmp_d(tmpX, 1) != MP_GT); + + ForceZero(cBuf, cSz); + XFREE(cBuf, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + + /* tmpQ = q - 1 */ + if (err == MP_OKAY) + err = mp_copy(&key->q, tmpQ); + + if (err == MP_OKAY) + err = mp_sub_d(tmpQ, 1, tmpQ); + + /* x = c mod (q-1), tmpX holds c */ + if (err == MP_OKAY) + err = mp_mod(tmpX, tmpQ, tmpX); + + /* x = c mod (q-1) + 1 */ + if (err == MP_OKAY) + err = mp_add_d(tmpX, 1, tmpX); + + /* copy tmpX into priv */ + if (err == MP_OKAY) { + pSz = mp_unsigned_bin_size(tmpX); + if (pSz > (int)*privSz) { + WOLFSSL_MSG("DH private key output buffer too small"); + err = BAD_FUNC_ARG; + } else { + *privSz = pSz; + err = mp_to_unsigned_bin(tmpX, priv); + } + } + + mp_forcezero(tmpX); + mp_clear(tmpX); + mp_clear(tmpQ); +#ifdef WOLFSSL_SMALL_STACK + XFREE(tmpQ, key->heap, DYNAMIC_TYPE_DH); + XFREE(tmpX, key->heap, DYNAMIC_TYPE_DH); +#endif + + return err; +} +#endif /* WOLFSSL_NO_DH186 */ +#endif /* !WC_NO_RNG */ + +static int GeneratePrivateDh(DhKey* key, WC_RNG* rng, byte* priv, + word32* privSz) +{ +#ifndef WC_NO_RNG + int ret = 0; + word32 sz = 0; + +#ifndef WOLFSSL_NO_DH186 + if (mp_iszero(&key->q) == MP_NO) { + + /* q param available, use NIST FIPS 186-4, "B.1.1 Key Pair + * Generation Using Extra Random Bits" */ + ret = GeneratePrivateDh186(key, rng, priv, privSz); + + } else +#endif + { + + sz = mp_unsigned_bin_size(&key->p); + + /* Table of predetermined values from the operation + 2 * DiscreteLogWorkFactor(sz * WOLFSSL_BIT_SIZE) / + WOLFSSL_BIT_SIZE + 1 + Sizes in table checked against RFC 3526 + */ + WOLFSSL_DH_ROUND(sz); /* if using fixed points only, then round up */ + switch (sz) { + case 128: sz = 21; break; + case 256: sz = 29; break; + case 384: sz = 34; break; + case 512: sz = 39; break; + case 640: sz = 42; break; + case 768: sz = 46; break; + case 896: sz = 49; break; + case 1024: sz = 52; break; + default: + #ifndef WOLFSSL_DH_CONST + /* if using floating points and size of p is not in table */ + sz = min(sz, 2 * DiscreteLogWorkFactor(sz * WOLFSSL_BIT_SIZE) / + WOLFSSL_BIT_SIZE + 1); + break; + #else + return BAD_FUNC_ARG; + #endif + } + + ret = wc_RNG_GenerateBlock(rng, priv, sz); + + if (ret == 0) { + priv[0] |= 0x0C; + *privSz = sz; + } + } + + return ret; +#else + (void)key; + (void)rng; + (void)priv; + (void)privSz; + return NOT_COMPILED_IN; +#endif /* WC_NO_RNG */ +} + + +static int GeneratePublicDh(DhKey* key, byte* priv, word32 privSz, + byte* pub, word32* pubSz) +{ + int ret = 0; +#ifndef WOLFSSL_SP_MATH +#ifdef WOLFSSL_SMALL_STACK + mp_int* x; + mp_int* y; +#else + mp_int x[1]; + mp_int y[1]; +#endif +#endif + +#ifdef WOLFSSL_HAVE_SP_DH +#ifndef WOLFSSL_SP_NO_2048 + if (mp_count_bits(&key->p) == 2048) + return sp_DhExp_2048(&key->g, priv, privSz, &key->p, pub, pubSz); +#endif +#ifndef WOLFSSL_SP_NO_3072 + if (mp_count_bits(&key->p) == 3072) + return sp_DhExp_3072(&key->g, priv, privSz, &key->p, pub, pubSz); +#endif +#ifdef WOLFSSL_SP_4096 + if (mp_count_bits(&key->p) == 4096) + return sp_DhExp_4096(&key->g, priv, privSz, &key->p, pub, pubSz); +#endif +#endif + +#ifndef WOLFSSL_SP_MATH +#ifdef WOLFSSL_SMALL_STACK + x = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_DH); + if (x == NULL) + return MEMORY_E; + y = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_DH); + if (y == NULL) { + XFREE(x, key->heap, DYNAMIC_TYPE_DH); + return MEMORY_E; + } +#endif + if (mp_init_multi(x, y, 0, 0, 0, 0) != MP_OKAY) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(y, key->heap, DYNAMIC_TYPE_DH); + XFREE(x, key->heap, DYNAMIC_TYPE_DH); + #endif + return MP_INIT_E; + } + + if (mp_read_unsigned_bin(x, priv, privSz) != MP_OKAY) + ret = MP_READ_E; + + if (ret == 0 && mp_exptmod(&key->g, x, &key->p, y) != MP_OKAY) + ret = MP_EXPTMOD_E; + + if (ret == 0 && mp_to_unsigned_bin(y, pub) != MP_OKAY) + ret = MP_TO_E; + + if (ret == 0) + *pubSz = mp_unsigned_bin_size(y); + + mp_clear(y); + mp_clear(x); +#ifdef WOLFSSL_SMALL_STACK + XFREE(y, key->heap, DYNAMIC_TYPE_DH); + XFREE(x, key->heap, DYNAMIC_TYPE_DH); +#endif +#else + ret = WC_KEY_SIZE_E; +#endif + + return ret; +} + +static int wc_DhGenerateKeyPair_Sync(DhKey* key, WC_RNG* rng, + byte* priv, word32* privSz, byte* pub, word32* pubSz) +{ + int ret; + + if (key == NULL || rng == NULL || priv == NULL || privSz == NULL || + pub == NULL || pubSz == NULL) { + return BAD_FUNC_ARG; + } + + ret = GeneratePrivateDh(key, rng, priv, privSz); + + return (ret != 0) ? ret : GeneratePublicDh(key, priv, *privSz, pub, pubSz); +} + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_DH) +static int wc_DhGenerateKeyPair_Async(DhKey* key, WC_RNG* rng, + byte* priv, word32* privSz, byte* pub, word32* pubSz) +{ + int ret; + +#if defined(HAVE_INTEL_QA) + word32 pBits; + + /* QAT DH sizes: 768, 1024, 1536, 2048, 3072 and 4096 bits */ + pBits = mp_unsigned_bin_size(&key->p) * 8; + if (pBits == 768 || pBits == 1024 || pBits == 1536 || + pBits == 2048 || pBits == 3072 || pBits == 4096) { + mp_int x; + + ret = mp_init(&x); + if (ret != MP_OKAY) + return ret; + + ret = GeneratePrivateDh(key, rng, priv, privSz); + if (ret == 0) + ret = mp_read_unsigned_bin(&x, priv, *privSz); + if (ret == MP_OKAY) + ret = wc_mp_to_bigint(&x, &x.raw); + if (ret == MP_OKAY) + ret = wc_mp_to_bigint(&key->p, &key->p.raw); + if (ret == MP_OKAY) + ret = wc_mp_to_bigint(&key->g, &key->g.raw); + if (ret == MP_OKAY) + ret = IntelQaDhKeyGen(&key->asyncDev, &key->p.raw, &key->g.raw, + &x.raw, pub, pubSz); + mp_clear(&x); + + return ret; + } + +#elif defined(HAVE_CAVIUM) + /* TODO: Not implemented - use software for now */ + +#else /* WOLFSSL_ASYNC_CRYPT_TEST */ + if (wc_AsyncTestInit(&key->asyncDev, ASYNC_TEST_DH_GEN)) { + WC_ASYNC_TEST* testDev = &key->asyncDev.test; + testDev->dhGen.key = key; + testDev->dhGen.rng = rng; + testDev->dhGen.priv = priv; + testDev->dhGen.privSz = privSz; + testDev->dhGen.pub = pub; + testDev->dhGen.pubSz = pubSz; + return WC_PENDING_E; + } +#endif + + /* otherwise use software DH */ + ret = wc_DhGenerateKeyPair_Sync(key, rng, priv, privSz, pub, pubSz); + + return ret; +} +#endif /* WOLFSSL_ASYNC_CRYPT && WC_ASYNC_ENABLE_DH */ + + +/* Check DH Public Key for invalid numbers, optionally allowing + * the public key to be checked against the large prime (q). + * Check per process in SP 800-56Ar3, section 5.6.2.3.1. + * + * key DH key group parameters. + * pub Public Key. + * pubSz Public Key size. + * prime Large prime (q), optionally NULL to skip check + * primeSz Size of large prime + * + * returns 0 on success or error code + */ +int wc_DhCheckPubKey_ex(DhKey* key, const byte* pub, word32 pubSz, + const byte* prime, word32 primeSz) +{ + int ret = 0; +#ifdef WOLFSSL_SMALL_STACK + mp_int* y = NULL; + mp_int* p = NULL; + mp_int* q = NULL; +#else + mp_int y[1]; + mp_int p[1]; + mp_int q[1]; +#endif + + if (key == NULL || pub == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef WOLFSSL_SMALL_STACK + y = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_DH); + if (y == NULL) + return MEMORY_E; + p = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_DH); + if (p == NULL) { + XFREE(y, key->heap, DYNAMIC_TYPE_DH); + return MEMORY_E; + } + q = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_DH); + if (q == NULL) { + XFREE(p, key->heap, DYNAMIC_TYPE_DH); + XFREE(y, key->heap, DYNAMIC_TYPE_DH); + return MEMORY_E; + } +#endif + + if (mp_init_multi(y, p, q, NULL, NULL, NULL) != MP_OKAY) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(q, key->heap, DYNAMIC_TYPE_DH); + XFREE(p, key->heap, DYNAMIC_TYPE_DH); + XFREE(y, key->heap, DYNAMIC_TYPE_DH); + #endif + return MP_INIT_E; + } + + if (mp_read_unsigned_bin(y, pub, pubSz) != MP_OKAY) { + ret = MP_READ_E; + } + + if (ret == 0 && prime != NULL) { + if (mp_read_unsigned_bin(q, prime, primeSz) != MP_OKAY) + ret = MP_READ_E; + + } else if (mp_iszero(&key->q) == MP_NO) { + /* use q available in DhKey */ + if (mp_copy(&key->q, q) != MP_OKAY) + ret = MP_INIT_E; + } + + /* SP 800-56Ar3, section 5.6.2.3.1, process step 1 */ + /* pub (y) should not be 0 or 1 */ + if (ret == 0 && mp_cmp_d(y, 2) == MP_LT) { + ret = MP_CMP_E; + } + + /* pub (y) shouldn't be greater than or equal to p - 1 */ + if (ret == 0 && mp_copy(&key->p, p) != MP_OKAY) { + ret = MP_INIT_E; + } + if (ret == 0 && mp_sub_d(p, 2, p) != MP_OKAY) { + ret = MP_SUB_E; + } + if (ret == 0 && mp_cmp(y, p) == MP_GT) { + ret = MP_CMP_E; + } + + if (ret == 0 && (prime != NULL || (mp_iszero(&key->q) == MP_NO) )) { + + /* restore key->p into p */ + if (mp_copy(&key->p, p) != MP_OKAY) + ret = MP_INIT_E; + } + + if (ret == 0 && prime != NULL) { +#ifdef WOLFSSL_HAVE_SP_DH +#ifndef WOLFSSL_SP_NO_2048 + if (mp_count_bits(&key->p) == 2048) { + ret = sp_ModExp_2048(y, q, p, y); + if (ret != 0) + ret = MP_EXPTMOD_E; + } + else +#endif +#ifndef WOLFSSL_SP_NO_3072 + if (mp_count_bits(&key->p) == 3072) { + ret = sp_ModExp_3072(y, q, p, y); + if (ret != 0) + ret = MP_EXPTMOD_E; + } + else +#endif +#ifdef WOLFSSL_SP_NO_4096 + if (mp_count_bits(&key->p) == 4096) { + ret = sp_ModExp_4096(y, q, p, y); + if (ret != 0) + ret = MP_EXPTMOD_E; + } + else +#endif +#endif + + { + /* SP 800-56Ar3, section 5.6.2.3.1, process step 2 */ +#ifndef WOLFSSL_SP_MATH + /* calculate (y^q) mod(p), store back into y */ + if (mp_exptmod(y, q, p, y) != MP_OKAY) + ret = MP_EXPTMOD_E; +#else + ret = WC_KEY_SIZE_E; +#endif + } + + /* verify above == 1 */ + if (ret == 0 && mp_cmp_d(y, 1) != MP_EQ) + ret = MP_CMP_E; + } + + mp_clear(y); + mp_clear(p); + mp_clear(q); +#ifdef WOLFSSL_SMALL_STACK + XFREE(q, key->heap, DYNAMIC_TYPE_DH); + XFREE(p, key->heap, DYNAMIC_TYPE_DH); + XFREE(y, key->heap, DYNAMIC_TYPE_DH); +#endif + + return ret; +} + + +/* Check DH Public Key for invalid numbers + * + * key DH key group parameters. + * pub Public Key. + * pubSz Public Key size. + * + * returns 0 on success or error code + */ +int wc_DhCheckPubKey(DhKey* key, const byte* pub, word32 pubSz) +{ + return wc_DhCheckPubKey_ex(key, pub, pubSz, NULL, 0); +} + + +/** + * Quick validity check of public key value against prime. + * Checks are: + * - Public key not 0 or 1 + * - Public key not equal to prime or prime - 1 + * - Public key not bigger than prime. + * + * prime Big-endian encoding of prime in bytes. + * primeSz Size of prime in bytes. + * pub Big-endian encoding of public key in bytes. + * pubSz Size of public key in bytes. + */ +int wc_DhCheckPubValue(const byte* prime, word32 primeSz, const byte* pub, + word32 pubSz) +{ + int ret = 0; + word32 i; + + for (i = 0; i < pubSz && pub[i] == 0; i++) { + } + pubSz -= i; + pub += i; + + if (pubSz == 0 || (pubSz == 1 && pub[0] == 1)) + ret = MP_VAL; + else if (pubSz == primeSz) { + for (i = 0; i < pubSz-1 && pub[i] == prime[i]; i++) { + } + if (i == pubSz-1 && (pub[i] == prime[i] || pub[i] == prime[i] - 1)) + ret = MP_VAL; + else if (pub[i] > prime[i]) + ret = MP_VAL; + } + else if (pubSz > primeSz) + ret = MP_VAL; + + return ret; +} + + +/* Check DH Private Key for invalid numbers, optionally allowing + * the private key to be checked against the large prime (q). + * Check per process in SP 800-56Ar3, section 5.6.2.1.2. + * + * key DH key group parameters. + * priv Private Key. + * privSz Private Key size. + * prime Large prime (q), optionally NULL to skip check + * primeSz Size of large prime + * + * returns 0 on success or error code + */ +int wc_DhCheckPrivKey_ex(DhKey* key, const byte* priv, word32 privSz, + const byte* prime, word32 primeSz) +{ + int ret = 0; +#ifdef WOLFSSL_SMALL_STACK + mp_int* x = NULL; + mp_int* q = NULL; +#else + mp_int x[1]; + mp_int q[1]; +#endif + + if (key == NULL || priv == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef WOLFSSL_SMALL_STACK + x = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_DH); + if (x == NULL) + return MEMORY_E; + q = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_DH); + if (q == NULL) { + XFREE(x, key->heap, DYNAMIC_TYPE_DH); + return MEMORY_E; + } +#endif + + if (mp_init_multi(x, q, NULL, NULL, NULL, NULL) != MP_OKAY) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(q, key->heap, DYNAMIC_TYPE_DH); + XFREE(x, key->heap, DYNAMIC_TYPE_DH); + #endif + return MP_INIT_E; + } + + if (mp_read_unsigned_bin(x, priv, privSz) != MP_OKAY) { + ret = MP_READ_E; + } + + if (ret == 0) { + if (prime != NULL) { + if (mp_read_unsigned_bin(q, prime, primeSz) != MP_OKAY) + ret = MP_READ_E; + } + else if (mp_iszero(&key->q) == MP_NO) { + /* use q available in DhKey */ + if (mp_copy(&key->q, q) != MP_OKAY) + ret = MP_INIT_E; + } + } + + /* priv (x) should not be 0 */ + if (ret == 0) { + if (mp_cmp_d(x, 0) == MP_EQ) + ret = MP_CMP_E; + } + + if (ret == 0) { + if (mp_iszero(q) == MP_NO) { + /* priv (x) shouldn't be greater than q - 1 */ + if (ret == 0) { + if (mp_copy(&key->q, q) != MP_OKAY) + ret = MP_INIT_E; + } + if (ret == 0) { + if (mp_sub_d(q, 1, q) != MP_OKAY) + ret = MP_SUB_E; + } + if (ret == 0) { + if (mp_cmp(x, q) == MP_GT) + ret = DH_CHECK_PRIV_E; + } + } + } + + mp_clear(x); + mp_clear(q); +#ifdef WOLFSSL_SMALL_STACK + XFREE(q, key->heap, DYNAMIC_TYPE_DH); + XFREE(x, key->heap, DYNAMIC_TYPE_DH); +#endif + + return ret; +} + + +/* Check DH Private Key for invalid numbers + * + * key DH key group parameters. + * priv Private Key. + * privSz Private Key size. + * + * returns 0 on success or error code + */ +int wc_DhCheckPrivKey(DhKey* key, const byte* priv, word32 privSz) +{ + return wc_DhCheckPrivKey_ex(key, priv, privSz, NULL, 0); +} + + +/* Check DH Keys for pair-wise consistency per process in + * SP 800-56Ar3, section 5.6.2.1.4, method (b) for FFC. + * + * key DH key group parameters. + * pub Public Key. + * pubSz Public Key size. + * priv Private Key. + * privSz Private Key size. + * + * returns 0 on success or error code + */ +int wc_DhCheckKeyPair(DhKey* key, const byte* pub, word32 pubSz, + const byte* priv, word32 privSz) +{ +#ifdef WOLFSSL_SMALL_STACK + mp_int* publicKey = NULL; + mp_int* privateKey = NULL; + mp_int* checkKey = NULL; +#else + mp_int publicKey[1]; + mp_int privateKey[1]; + mp_int checkKey[1]; +#endif + int ret = 0; + + if (key == NULL || pub == NULL || priv == NULL) + return BAD_FUNC_ARG; + +#ifdef WOLFSSL_SMALL_STACK + publicKey = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_DH); + if (publicKey == NULL) + return MEMORY_E; + privateKey = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_DH); + if (privateKey == NULL) { + XFREE(publicKey, key->heap, DYNAMIC_TYPE_DH); + return MEMORY_E; + } + checkKey = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_DH); + if (checkKey == NULL) { + XFREE(privateKey, key->heap, DYNAMIC_TYPE_DH); + XFREE(publicKey, key->heap, DYNAMIC_TYPE_DH); + return MEMORY_E; + } +#endif + + if (mp_init_multi(publicKey, privateKey, checkKey, + NULL, NULL, NULL) != MP_OKAY) { + + #ifdef WOLFSSL_SMALL_STACK + XFREE(privateKey, key->heap, DYNAMIC_TYPE_DH); + XFREE(publicKey, key->heap, DYNAMIC_TYPE_DH); + XFREE(checkKey, key->heap, DYNAMIC_TYPE_DH); + #endif + return MP_INIT_E; + } + + /* Load the private and public keys into big integers. */ + if (mp_read_unsigned_bin(publicKey, pub, pubSz) != MP_OKAY || + mp_read_unsigned_bin(privateKey, priv, privSz) != MP_OKAY) { + + ret = MP_READ_E; + } + + /* Calculate checkKey = g^privateKey mod p */ + if (ret == 0) { +#ifdef WOLFSSL_HAVE_SP_DH +#ifndef WOLFSSL_SP_NO_2048 + if (mp_count_bits(&key->p) == 2048) { + ret = sp_ModExp_2048(&key->g, privateKey, &key->p, checkKey); + if (ret != 0) + ret = MP_EXPTMOD_E; + } + else +#endif +#ifndef WOLFSSL_SP_NO_3072 + if (mp_count_bits(&key->p) == 3072) { + ret = sp_ModExp_3072(&key->g, privateKey, &key->p, checkKey); + if (ret != 0) + ret = MP_EXPTMOD_E; + } + else +#endif +#ifdef WOLFSSL_SP_4096 + if (mp_count_bits(&key->p) == 4096) { + ret = sp_ModExp_4096(&key->g, privateKey, &key->p, checkKey); + if (ret != 0) + ret = MP_EXPTMOD_E; + } + else +#endif +#endif + { +#ifndef WOLFSSL_SP_MATH + if (mp_exptmod(&key->g, privateKey, &key->p, checkKey) != MP_OKAY) + ret = MP_EXPTMOD_E; +#else + ret = WC_KEY_SIZE_E; +#endif + } + } + + /* Compare the calculated public key to the supplied check value. */ + if (ret == 0) { + if (mp_cmp(checkKey, publicKey) != MP_EQ) + ret = MP_CMP_E; + } + + mp_forcezero(privateKey); + mp_clear(privateKey); + mp_clear(publicKey); + mp_clear(checkKey); +#ifdef WOLFSSL_SMALL_STACK + XFREE(checkKey, key->heap, DYNAMIC_TYPE_DH); + XFREE(privateKey, key->heap, DYNAMIC_TYPE_DH); + XFREE(publicKey, key->heap, DYNAMIC_TYPE_DH); +#endif + + return ret; +} + + +int wc_DhGenerateKeyPair(DhKey* key, WC_RNG* rng, + byte* priv, word32* privSz, byte* pub, word32* pubSz) +{ + int ret; + + if (key == NULL || rng == NULL || priv == NULL || privSz == NULL || + pub == NULL || pubSz == NULL) { + return BAD_FUNC_ARG; + } + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_DH) + if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_DH) { + ret = wc_DhGenerateKeyPair_Async(key, rng, priv, privSz, pub, pubSz); + } + else +#endif + { + ret = wc_DhGenerateKeyPair_Sync(key, rng, priv, privSz, pub, pubSz); + } + + return ret; +} + + +static int wc_DhAgree_Sync(DhKey* key, byte* agree, word32* agreeSz, + const byte* priv, word32 privSz, const byte* otherPub, word32 pubSz) +{ + int ret = 0; +#ifdef WOLFSSL_SMALL_STACK + mp_int* y; +#ifndef WOLFSSL_SP_MATH + mp_int* x; + mp_int* z; +#endif +#else + mp_int y[1]; +#ifndef WOLFSSL_SP_MATH + mp_int x[1]; + mp_int z[1]; +#endif +#endif + +#ifdef WOLFSSL_VALIDATE_FFC_IMPORT + if (wc_DhCheckPrivKey(key, priv, privSz) != 0) { + WOLFSSL_MSG("wc_DhAgree wc_DhCheckPrivKey failed"); + return DH_CHECK_PRIV_E; + } + + if (wc_DhCheckPubKey(key, otherPub, pubSz) != 0) { + WOLFSSL_MSG("wc_DhAgree wc_DhCheckPubKey failed"); + return DH_CHECK_PUB_E; + } +#endif + +#ifdef WOLFSSL_SMALL_STACK + y = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_DH); + if (y == NULL) + return MEMORY_E; +#ifndef WOLFSSL_SP_MATH + x = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_DH); + if (x == NULL) { + XFREE(y, key->heap, DYNAMIC_TYPE_DH); + return MEMORY_E; + } + z = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_DH); + if (z == NULL) { + XFREE(x, key->heap, DYNAMIC_TYPE_DH); + XFREE(y, key->heap, DYNAMIC_TYPE_DH); + return MEMORY_E; + } +#endif +#endif + +#ifdef WOLFSSL_HAVE_SP_DH +#ifndef WOLFSSL_SP_NO_2048 + if (mp_count_bits(&key->p) == 2048) { + if (mp_init(y) != MP_OKAY) + return MP_INIT_E; + + if (ret == 0 && mp_read_unsigned_bin(y, otherPub, pubSz) != MP_OKAY) + ret = MP_READ_E; + + if (ret == 0) + ret = sp_DhExp_2048(y, priv, privSz, &key->p, agree, agreeSz); + + mp_clear(y); + #ifdef WOLFSSL_SMALL_STACK + #ifndef WOLFSSL_SP_MATH + XFREE(z, key->heap, DYNAMIC_TYPE_DH); + XFREE(x, key->heap, DYNAMIC_TYPE_DH); + #endif + XFREE(y, key->heap, DYNAMIC_TYPE_DH); + #endif + return ret; + } +#endif +#ifndef WOLFSSL_SP_NO_3072 + if (mp_count_bits(&key->p) == 3072) { + if (mp_init(y) != MP_OKAY) + return MP_INIT_E; + + if (ret == 0 && mp_read_unsigned_bin(y, otherPub, pubSz) != MP_OKAY) + ret = MP_READ_E; + + if (ret == 0) + ret = sp_DhExp_3072(y, priv, privSz, &key->p, agree, agreeSz); + + mp_clear(y); + #ifdef WOLFSSL_SMALL_STACK + #ifndef WOLFSSL_SP_MATH + XFREE(z, key->heap, DYNAMIC_TYPE_DH); + XFREE(x, key->heap, DYNAMIC_TYPE_DH); + #endif + XFREE(y, key->heap, DYNAMIC_TYPE_DH); + #endif + return ret; + } +#endif +#ifdef WOLFSSL_SP_4096 + if (mp_count_bits(&key->p) == 4096) { + if (mp_init(y) != MP_OKAY) + return MP_INIT_E; + + if (ret == 0 && mp_read_unsigned_bin(y, otherPub, pubSz) != MP_OKAY) + ret = MP_READ_E; + + if (ret == 0) + ret = sp_DhExp_4096(y, priv, privSz, &key->p, agree, agreeSz); + + mp_clear(y); + #ifdef WOLFSSL_SMALL_STACK + #ifndef WOLFSSL_SP_MATH + XFREE(z, key->heap, DYNAMIC_TYPE_DH); + XFREE(x, key->heap, DYNAMIC_TYPE_DH); + #endif + XFREE(y, key->heap, DYNAMIC_TYPE_DH); + #endif + return ret; + } +#endif +#endif + +#ifndef WOLFSSL_SP_MATH + if (mp_init_multi(x, y, z, 0, 0, 0) != MP_OKAY) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(z, key->heap, DYNAMIC_TYPE_DH); + XFREE(x, key->heap, DYNAMIC_TYPE_DH); + XFREE(y, key->heap, DYNAMIC_TYPE_DH); + #endif + return MP_INIT_E; + } + + if (mp_read_unsigned_bin(x, priv, privSz) != MP_OKAY) + ret = MP_READ_E; + + if (ret == 0 && mp_read_unsigned_bin(y, otherPub, pubSz) != MP_OKAY) + ret = MP_READ_E; + + if (ret == 0 && mp_exptmod(y, x, &key->p, z) != MP_OKAY) + ret = MP_EXPTMOD_E; + + /* make sure z is not one (SP800-56A, 5.7.1.1) */ + if (ret == 0 && (mp_cmp_d(z, 1) == MP_EQ)) + ret = MP_VAL; + + if (ret == 0 && mp_to_unsigned_bin(z, agree) != MP_OKAY) + ret = MP_TO_E; + + if (ret == 0) + *agreeSz = mp_unsigned_bin_size(z); + + mp_clear(z); + mp_clear(y); + mp_forcezero(x); +#endif + +#ifdef WOLFSSL_SMALL_STACK +#ifndef WOLFSSL_SP_MATH + XFREE(z, key->heap, DYNAMIC_TYPE_DH); + XFREE(x, key->heap, DYNAMIC_TYPE_DH); +#endif + XFREE(y, key->heap, DYNAMIC_TYPE_DH); +#endif + + return ret; +} + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_DH) +static int wc_DhAgree_Async(DhKey* key, byte* agree, word32* agreeSz, + const byte* priv, word32 privSz, const byte* otherPub, word32 pubSz) +{ + int ret; + +#if defined(HAVE_INTEL_QA) + word32 pBits; + + /* QAT DH sizes: 768, 1024, 1536, 2048, 3072 and 4096 bits */ + pBits = mp_unsigned_bin_size(&key->p) * 8; + if (pBits == 768 || pBits == 1024 || pBits == 1536 || + pBits == 2048 || pBits == 3072 || pBits == 4096) { + ret = wc_mp_to_bigint(&key->p, &key->p.raw); + if (ret == MP_OKAY) + ret = IntelQaDhAgree(&key->asyncDev, &key->p.raw, + agree, agreeSz, priv, privSz, otherPub, pubSz); + return ret; + } + +#elif defined(HAVE_CAVIUM) + /* TODO: Not implemented - use software for now */ + +#else /* WOLFSSL_ASYNC_CRYPT_TEST */ + if (wc_AsyncTestInit(&key->asyncDev, ASYNC_TEST_DH_AGREE)) { + WC_ASYNC_TEST* testDev = &key->asyncDev.test; + testDev->dhAgree.key = key; + testDev->dhAgree.agree = agree; + testDev->dhAgree.agreeSz = agreeSz; + testDev->dhAgree.priv = priv; + testDev->dhAgree.privSz = privSz; + testDev->dhAgree.otherPub = otherPub; + testDev->dhAgree.pubSz = pubSz; + return WC_PENDING_E; + } +#endif + + /* otherwise use software DH */ + ret = wc_DhAgree_Sync(key, agree, agreeSz, priv, privSz, otherPub, pubSz); + + return ret; +} +#endif /* WOLFSSL_ASYNC_CRYPT */ + +int wc_DhAgree(DhKey* key, byte* agree, word32* agreeSz, const byte* priv, + word32 privSz, const byte* otherPub, word32 pubSz) +{ + int ret = 0; + + if (key == NULL || agree == NULL || agreeSz == NULL || priv == NULL || + otherPub == NULL) { + return BAD_FUNC_ARG; + } + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_DH) + if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_DH) { + ret = wc_DhAgree_Async(key, agree, agreeSz, priv, privSz, otherPub, pubSz); + } + else +#endif + { + ret = wc_DhAgree_Sync(key, agree, agreeSz, priv, privSz, otherPub, pubSz); + } + + return ret; +} + +#if defined(WOLFSSL_QT) || defined(OPENSSL_ALL) +/* Sets private and public key in DhKey if both are available, otherwise sets + either private or public key, depending on which is available. + Returns WOLFSSL_SUCCESS if at least one of the keys was set. */ +WOLFSSL_LOCAL int wc_DhSetFullKeys(DhKey* key,const byte* priv_key,word32 privSz, + const byte* pub_key, word32 pubSz) +{ + byte havePriv = 0; + byte havePub = 0; + mp_int* keyPriv = NULL; + mp_int* keyPub = NULL; + + if (key == NULL) { + return BAD_FUNC_ARG; + } + + havePriv = ( (priv_key != NULL) && (privSz > 0) ); + havePub = ( (pub_key != NULL) && (pubSz > 0) ); + + if (!havePub && !havePriv) { + WOLFSSL_MSG("No Public or Private Key to Set"); + return BAD_FUNC_ARG; + } + /* Set Private Key */ + if (havePriv == TRUE) { + /* may have leading 0 */ + if (priv_key[0] == 0) { + privSz--; priv_key++; + } + if (mp_init(&key->priv) != MP_OKAY) + havePriv = FALSE; + } + + if (havePriv == TRUE) { + if (mp_read_unsigned_bin(&key->priv, priv_key, privSz) != MP_OKAY) { + havePriv = FALSE; + } else { + keyPriv = &key->priv; + WOLFSSL_MSG("DH Private Key Set."); + } + } + + /* Set Public Key */ + if (havePub == TRUE) { + /* may have leading 0 */ + if (pub_key[0] == 0) { + pubSz--; pub_key++; + } + if (mp_init(&key->pub) != MP_OKAY) + havePub = FALSE; + } + + if (havePub == TRUE) { + if (mp_read_unsigned_bin(&key->pub, pub_key, pubSz) != MP_OKAY) { + havePub = FALSE; + } else { + keyPub = &key->pub; + WOLFSSL_MSG("DH Public Key Set."); + } + } + /* Free Memory if error occured */ + if (havePriv == FALSE && keyPriv != NULL) + mp_clear(keyPriv); + if (havePub == FALSE && keyPub != NULL) + mp_clear(keyPub); + + /* WOLFSSL_SUCCESS if private or public was set else WOLFSSL_FAILURE */ + return havePriv || havePub; +} +#endif + +static int _DhSetKey(DhKey* key, const byte* p, word32 pSz, const byte* g, + word32 gSz, const byte* q, word32 qSz, int trusted, + WC_RNG* rng) +{ + int ret = 0; + mp_int* keyP = NULL; + mp_int* keyG = NULL; + + if (key == NULL || p == NULL || g == NULL || pSz == 0 || gSz == 0) { + ret = BAD_FUNC_ARG; + } + + if (ret == 0) { + /* may have leading 0 */ + if (p[0] == 0) { + pSz--; p++; + } + + if (g[0] == 0) { + gSz--; g++; + } + + if (q != NULL) { + if (q[0] == 0) { + qSz--; q++; + } + } + + if (mp_init(&key->p) != MP_OKAY) + ret = MP_INIT_E; + } + + if (ret == 0) { + if (mp_read_unsigned_bin(&key->p, p, pSz) != MP_OKAY) + ret = ASN_DH_KEY_E; + else + keyP = &key->p; + } + + if (ret == 0 && !trusted) { + int isPrime = 0; + if (rng != NULL) + ret = mp_prime_is_prime_ex(keyP, 8, &isPrime, rng); + else + ret = mp_prime_is_prime(keyP, 8, &isPrime); + + if (ret == 0 && isPrime == 0) + ret = DH_CHECK_PUB_E; + } + + if (ret == 0 && mp_init(&key->g) != MP_OKAY) + ret = MP_INIT_E; + if (ret == 0) { + if (mp_read_unsigned_bin(&key->g, g, gSz) != MP_OKAY) + ret = ASN_DH_KEY_E; + else + keyG = &key->g; + } + + if (ret == 0 && q != NULL) { + if (mp_init(&key->q) != MP_OKAY) + ret = MP_INIT_E; + } + if (ret == 0 && q != NULL) { + if (mp_read_unsigned_bin(&key->q, q, qSz) != MP_OKAY) + ret = MP_INIT_E; + } + + if (ret != 0 && key != NULL) { + if (keyG) + mp_clear(keyG); + if (keyP) + mp_clear(keyP); + } + + return ret; +} + + +int wc_DhSetCheckKey(DhKey* key, const byte* p, word32 pSz, const byte* g, + word32 gSz, const byte* q, word32 qSz, int trusted, + WC_RNG* rng) +{ + return _DhSetKey(key, p, pSz, g, gSz, q, qSz, trusted, rng); +} + + +int wc_DhSetKey_ex(DhKey* key, const byte* p, word32 pSz, const byte* g, + word32 gSz, const byte* q, word32 qSz) +{ + return _DhSetKey(key, p, pSz, g, gSz, q, qSz, 1, NULL); +} + + +/* not in asn anymore since no actual asn types used */ +int wc_DhSetKey(DhKey* key, const byte* p, word32 pSz, const byte* g, + word32 gSz) +{ + return _DhSetKey(key, p, pSz, g, gSz, NULL, 0, 1, NULL); +} + + +#ifdef WOLFSSL_KEY_GEN + +/* modulus_size in bits */ +int wc_DhGenerateParams(WC_RNG *rng, int modSz, DhKey *dh) +{ + mp_int tmp, tmp2; + int groupSz = 0, bufSz = 0, + primeCheckCount = 0, + primeCheck = MP_NO, + ret = 0; + unsigned char *buf = NULL; + + if (rng == NULL || dh == NULL) + ret = BAD_FUNC_ARG; + + /* set group size in bytes from modulus size + * FIPS 186-4 defines valid values (1024, 160) (2048, 256) (3072, 256) + */ + if (ret == 0) { + switch (modSz) { + case 1024: + groupSz = 20; + break; + case 2048: + case 3072: + groupSz = 32; + break; + default: + ret = BAD_FUNC_ARG; + break; + } + } + + if (ret == 0) { + /* modulus size in bytes */ + modSz /= WOLFSSL_BIT_SIZE; + bufSz = modSz - groupSz; + + /* allocate ram */ + buf = (unsigned char *)XMALLOC(bufSz, + dh->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (buf == NULL) + ret = MEMORY_E; + } + + /* make a random string that will be multiplied against q */ + if (ret == 0) + ret = wc_RNG_GenerateBlock(rng, buf, bufSz); + + if (ret == 0) { + /* force magnitude */ + buf[0] |= 0xC0; + /* force even */ + buf[bufSz - 1] &= ~1; + + if (mp_init_multi(&tmp, &tmp2, &dh->p, &dh->q, &dh->g, 0) + != MP_OKAY) { + ret = MP_INIT_E; + } + } + + if (ret == 0) { + if (mp_read_unsigned_bin(&tmp2, buf, bufSz) != MP_OKAY) + ret = MP_READ_E; + } + + /* make our prime q */ + if (ret == 0) { + if (mp_rand_prime(&dh->q, groupSz, rng, NULL) != MP_OKAY) + ret = PRIME_GEN_E; + } + + /* p = random * q */ + if (ret == 0) { + if (mp_mul(&dh->q, &tmp2, &dh->p) != MP_OKAY) + ret = MP_MUL_E; + } + + /* p = random * q + 1, so q is a prime divisor of p-1 */ + if (ret == 0) { + if (mp_add_d(&dh->p, 1, &dh->p) != MP_OKAY) + ret = MP_ADD_E; + } + + /* tmp = 2q */ + if (ret == 0) { + if (mp_add(&dh->q, &dh->q, &tmp) != MP_OKAY) + ret = MP_ADD_E; + } + + /* loop until p is prime */ + if (ret == 0) { + do { + if (mp_prime_is_prime_ex(&dh->p, 8, &primeCheck, rng) != MP_OKAY) + ret = PRIME_GEN_E; + + if (primeCheck != MP_YES) { + /* p += 2q */ + if (mp_add(&tmp, &dh->p, &dh->p) != MP_OKAY) + ret = MP_ADD_E; + else + primeCheckCount++; + } + } while (ret == 0 && primeCheck == MP_NO); + } + + /* tmp2 += (2*loop_check_prime) + * to have p = (q * tmp2) + 1 prime + */ + if ((ret == 0) && (primeCheckCount)) { + if (mp_add_d(&tmp2, 2 * primeCheckCount, &tmp2) != MP_OKAY) + ret = MP_ADD_E; + } + + /* find a value g for which g^tmp2 != 1 */ + if ((ret == 0) && (mp_set(&dh->g, 1) != MP_OKAY)) + ret = MP_ZERO_E; + + if (ret == 0) { + do { + if (mp_add_d(&dh->g, 1, &dh->g) != MP_OKAY) + ret = MP_ADD_E; + else if (mp_exptmod(&dh->g, &tmp2, &dh->p, &tmp) != MP_OKAY) + ret = MP_EXPTMOD_E; + } while (ret == 0 && mp_cmp_d(&tmp, 1) == MP_EQ); + } + + if (ret == 0) { + /* at this point tmp generates a group of order q mod p */ + mp_exch(&tmp, &dh->g); + } + + /* clear the parameters if there was an error */ + if ((ret != 0) && (dh != NULL)) { + mp_clear(&dh->q); + mp_clear(&dh->p); + mp_clear(&dh->g); + } + + if (buf != NULL) { + ForceZero(buf, bufSz); + if (dh != NULL) { + XFREE(buf, dh->heap, DYNAMIC_TYPE_TMP_BUFFER); + } + } + mp_clear(&tmp); + mp_clear(&tmp2); + + return ret; +} + + +/* Export raw DH parameters from DhKey structure + * + * dh - pointer to initialized DhKey structure + * p - output location for DH (p) parameter + * pSz - [IN/OUT] size of output buffer for p, size of p + * q - output location for DH (q) parameter + * qSz - [IN/OUT] size of output buffer for q, size of q + * g - output location for DH (g) parameter + * gSz - [IN/OUT] size of output buffer for g, size of g + * + * If p, q, and g pointers are all passed in as NULL, the function + * will set pSz, qSz, and gSz to the required output buffer sizes for p, + * q, and g. In this case, the function will return LENGTH_ONLY_E. + * + * returns 0 on success, negative upon failure + */ +int wc_DhExportParamsRaw(DhKey* dh, byte* p, word32* pSz, + byte* q, word32* qSz, byte* g, word32* gSz) +{ + int ret = 0; + word32 pLen = 0, qLen = 0, gLen = 0; + + if (dh == NULL || pSz == NULL || qSz == NULL || gSz == NULL) + ret = BAD_FUNC_ARG; + + /* get required output buffer sizes */ + if (ret == 0) { + pLen = mp_unsigned_bin_size(&dh->p); + qLen = mp_unsigned_bin_size(&dh->q); + gLen = mp_unsigned_bin_size(&dh->g); + + /* return buffer sizes and LENGTH_ONLY_E if buffers are NULL */ + if (p == NULL && q == NULL && g == NULL) { + *pSz = pLen; + *qSz = qLen; + *gSz = gLen; + ret = LENGTH_ONLY_E; + } + } + + if (ret == 0) { + if (p == NULL || q == NULL || g == NULL) + ret = BAD_FUNC_ARG; + } + + /* export p */ + if (ret == 0) { + if (*pSz < pLen) { + WOLFSSL_MSG("Output buffer for DH p parameter too small, " + "required size placed into pSz"); + *pSz = pLen; + ret = BUFFER_E; + } + } + + if (ret == 0) { + *pSz = pLen; + if (mp_to_unsigned_bin(&dh->p, p) != MP_OKAY) + ret = MP_TO_E; + } + + /* export q */ + if (ret == 0) { + if (*qSz < qLen) { + WOLFSSL_MSG("Output buffer for DH q parameter too small, " + "required size placed into qSz"); + *qSz = qLen; + ret = BUFFER_E; + } + } + + if (ret == 0) { + *qSz = qLen; + if (mp_to_unsigned_bin(&dh->q, q) != MP_OKAY) + ret = MP_TO_E; + } + + /* export g */ + if (ret == 0) { + if (*gSz < gLen) { + WOLFSSL_MSG("Output buffer for DH g parameter too small, " + "required size placed into gSz"); + *gSz = gLen; + ret = BUFFER_E; + } + } + + if (ret == 0) { + *gSz = gLen; + if (mp_to_unsigned_bin(&dh->g, g) != MP_OKAY) + ret = MP_TO_E; + } + + return ret; +} + +#endif /* WOLFSSL_KEY_GEN */ + +#endif /* NO_DH */ diff --git a/client/wolfssl/wolfcrypt/src/dsa.c b/client/wolfssl/wolfcrypt/src/dsa.c new file mode 100644 index 0000000..4b83a57 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/dsa.c @@ -0,0 +1,920 @@ +/* dsa.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifndef NO_DSA + +#include +#include +#include +#include +#include +#include + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +int wc_InitDsaKey(DsaKey* key) +{ + if (key == NULL) + return BAD_FUNC_ARG; + + key->type = -1; /* haven't decided yet */ + key->heap = NULL; + + return mp_init_multi( + /* public alloc parts */ + &key->p, + &key->q, + &key->g, + &key->y, + + /* private alloc parts */ + &key->x, + NULL + ); +} + + +int wc_InitDsaKey_h(DsaKey* key, void* h) +{ + int ret = wc_InitDsaKey(key); + if (ret == 0) + key->heap = h; + + return ret; +} + + +void wc_FreeDsaKey(DsaKey* key) +{ + if (key == NULL) + return; + + if (key->type == DSA_PRIVATE) + mp_forcezero(&key->x); + + mp_clear(&key->x); + mp_clear(&key->y); + mp_clear(&key->g); + mp_clear(&key->q); + mp_clear(&key->p); +} + + +/* validate that (L,N) match allowed sizes from FIPS 186-4, Section 4.2. + * modLen - represents L, the size of p (prime modulus) in bits + * divLen - represents N, the size of q (prime divisor) in bits + * return 0 on success, -1 on error */ +static int CheckDsaLN(int modLen, int divLen) +{ + int ret = -1; + + switch (modLen) { + case 1024: + if (divLen == 160) + ret = 0; + break; + case 2048: + if (divLen == 224 || divLen == 256) + ret = 0; + break; + case 3072: + if (divLen == 256) + ret = 0; + break; + default: + break; + } + + return ret; +} + + +#ifdef WOLFSSL_KEY_GEN + +/* Create DSA key pair (&dsa->x, &dsa->y) + * + * Based on NIST FIPS 186-4, + * "B.1.1 Key Pair Generation Using Extra Random Bits" + * + * rng - pointer to initialized WC_RNG structure + * dsa - pointer to initialized DsaKey structure, will hold generated key + * + * return 0 on success, negative on error */ +int wc_MakeDsaKey(WC_RNG *rng, DsaKey *dsa) +{ + byte* cBuf; + int qSz, pSz, cSz, err; + mp_int tmpQ; + + if (rng == NULL || dsa == NULL) + return BAD_FUNC_ARG; + + qSz = mp_unsigned_bin_size(&dsa->q); + pSz = mp_unsigned_bin_size(&dsa->p); + + /* verify (L,N) pair bit lengths */ + if (CheckDsaLN(pSz * WOLFSSL_BIT_SIZE, qSz * WOLFSSL_BIT_SIZE) != 0) + return BAD_FUNC_ARG; + + /* generate extra 64 bits so that bias from mod function is negligible */ + cSz = qSz + (64 / WOLFSSL_BIT_SIZE); + cBuf = (byte*)XMALLOC(cSz, dsa->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (cBuf == NULL) { + return MEMORY_E; + } + + if ((err = mp_init_multi(&dsa->x, &dsa->y, &tmpQ, NULL, NULL, NULL)) + != MP_OKAY) { + XFREE(cBuf, dsa->heap, DYNAMIC_TYPE_TMP_BUFFER); + return err; + } + + do { + /* generate N+64 bits (c) from RBG into &dsa->x, making sure positive. + * Hash_DRBG uses SHA-256 which matches maximum + * requested_security_strength of (L,N) */ + err = wc_RNG_GenerateBlock(rng, cBuf, cSz); + if (err != MP_OKAY) { + mp_clear(&dsa->x); + mp_clear(&dsa->y); + mp_clear(&tmpQ); + XFREE(cBuf, dsa->heap, DYNAMIC_TYPE_TMP_BUFFER); + return err; + } + + err = mp_read_unsigned_bin(&dsa->x, cBuf, cSz); + if (err != MP_OKAY) { + mp_clear(&dsa->x); + mp_clear(&dsa->y); + mp_clear(&tmpQ); + XFREE(cBuf, dsa->heap, DYNAMIC_TYPE_TMP_BUFFER); + return err; + } + } while (mp_cmp_d(&dsa->x, 1) != MP_GT); + + XFREE(cBuf, dsa->heap, DYNAMIC_TYPE_TMP_BUFFER); + + /* tmpQ = q - 1 */ + if (err == MP_OKAY) + err = mp_copy(&dsa->q, &tmpQ); + + if (err == MP_OKAY) + err = mp_sub_d(&tmpQ, 1, &tmpQ); + + /* x = c mod (q-1), &dsa->x holds c */ + if (err == MP_OKAY) + err = mp_mod(&dsa->x, &tmpQ, &dsa->x); + + /* x = c mod (q-1) + 1 */ + if (err == MP_OKAY) + err = mp_add_d(&dsa->x, 1, &dsa->x); + + /* public key : y = g^x mod p */ + if (err == MP_OKAY) + err = mp_exptmod_ex(&dsa->g, &dsa->x, dsa->q.used, &dsa->p, &dsa->y); + + if (err == MP_OKAY) + dsa->type = DSA_PRIVATE; + + if (err != MP_OKAY) { + mp_clear(&dsa->x); + mp_clear(&dsa->y); + } + mp_clear(&tmpQ); + + return err; +} + + +/* modulus_size in bits */ +int wc_MakeDsaParameters(WC_RNG *rng, int modulus_size, DsaKey *dsa) +{ + mp_int tmp, tmp2; + int err, msize, qsize, + loop_check_prime = 0, + check_prime = MP_NO; + unsigned char *buf; + + if (rng == NULL || dsa == NULL) + return BAD_FUNC_ARG; + + /* set group size in bytes from modulus size + * FIPS 186-4 defines valid values (1024, 160) (2048, 256) (3072, 256) + */ + switch (modulus_size) { + case 1024: + qsize = 20; + break; + case 2048: + case 3072: + qsize = 32; + break; + default: + return BAD_FUNC_ARG; + } + + /* modulus size in bytes */ + msize = modulus_size / WOLFSSL_BIT_SIZE; + + /* allocate ram */ + buf = (unsigned char *)XMALLOC(msize - qsize, + dsa->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (buf == NULL) { + return MEMORY_E; + } + + /* make a random string that will be multiplied against q */ + err = wc_RNG_GenerateBlock(rng, buf, msize - qsize); + if (err != MP_OKAY) { + XFREE(buf, dsa->heap, DYNAMIC_TYPE_TMP_BUFFER); + return err; + } + + /* force magnitude */ + buf[0] |= 0xC0; + + /* force even */ + buf[msize - qsize - 1] &= ~1; + + if (mp_init_multi(&tmp2, &dsa->p, &dsa->q, 0, 0, 0) != MP_OKAY) { + mp_clear(&dsa->q); + XFREE(buf, dsa->heap, DYNAMIC_TYPE_TMP_BUFFER); + return MP_INIT_E; + } + + err = mp_read_unsigned_bin(&tmp2, buf, msize - qsize); + if (err != MP_OKAY) { + mp_clear(&dsa->q); + mp_clear(&dsa->p); + mp_clear(&tmp2); + XFREE(buf, dsa->heap, DYNAMIC_TYPE_TMP_BUFFER); + return err; + } + XFREE(buf, dsa->heap, DYNAMIC_TYPE_TMP_BUFFER); + + /* make our prime q */ + err = mp_rand_prime(&dsa->q, qsize, rng, NULL); + if (err != MP_OKAY) { + mp_clear(&dsa->q); + mp_clear(&dsa->p); + mp_clear(&tmp2); + return err; + } + + /* p = random * q */ + err = mp_mul(&dsa->q, &tmp2, &dsa->p); + if (err != MP_OKAY) { + mp_clear(&dsa->q); + mp_clear(&dsa->p); + mp_clear(&tmp2); + return err; + } + + /* p = random * q + 1, so q is a prime divisor of p-1 */ + err = mp_add_d(&dsa->p, 1, &dsa->p); + if (err != MP_OKAY) { + mp_clear(&dsa->q); + mp_clear(&dsa->p); + mp_clear(&tmp2); + return err; + } + + if (mp_init(&tmp) != MP_OKAY) { + mp_clear(&dsa->q); + mp_clear(&dsa->p); + mp_clear(&tmp2); + return MP_INIT_E; + } + + /* tmp = 2q */ + err = mp_add(&dsa->q, &dsa->q, &tmp); + if (err != MP_OKAY) { + mp_clear(&dsa->q); + mp_clear(&dsa->p); + mp_clear(&tmp); + mp_clear(&tmp2); + return err; + } + + /* loop until p is prime */ + while (check_prime == MP_NO) { + err = mp_prime_is_prime_ex(&dsa->p, 8, &check_prime, rng); + if (err != MP_OKAY) { + mp_clear(&dsa->q); + mp_clear(&dsa->p); + mp_clear(&tmp); + mp_clear(&tmp2); + return err; + } + + if (check_prime != MP_YES) { + /* p += 2q */ + err = mp_add(&tmp, &dsa->p, &dsa->p); + if (err != MP_OKAY) { + mp_clear(&dsa->q); + mp_clear(&dsa->p); + mp_clear(&tmp); + mp_clear(&tmp2); + return err; + } + + loop_check_prime++; + } + } + + /* tmp2 += (2*loop_check_prime) + * to have p = (q * tmp2) + 1 prime + */ + if (loop_check_prime) { + err = mp_add_d(&tmp2, 2*loop_check_prime, &tmp2); + if (err != MP_OKAY) { + mp_clear(&dsa->q); + mp_clear(&dsa->p); + mp_clear(&tmp); + mp_clear(&tmp2); + return err; + } + } + + if (mp_init(&dsa->g) != MP_OKAY) { + mp_clear(&dsa->q); + mp_clear(&dsa->p); + mp_clear(&tmp); + mp_clear(&tmp2); + return MP_INIT_E; + } + + /* find a value g for which g^tmp2 != 1 */ + if (mp_set(&dsa->g, 1) != MP_OKAY) { + mp_clear(&dsa->q); + mp_clear(&dsa->p); + mp_clear(&tmp); + mp_clear(&tmp2); + return MP_INIT_E; + } + + do { + err = mp_add_d(&dsa->g, 1, &dsa->g); + if (err != MP_OKAY) { + mp_clear(&dsa->q); + mp_clear(&dsa->p); + mp_clear(&dsa->g); + mp_clear(&tmp); + mp_clear(&tmp2); + return err; + } + + err = mp_exptmod(&dsa->g, &tmp2, &dsa->p, &tmp); + if (err != MP_OKAY) { + mp_clear(&dsa->q); + mp_clear(&dsa->p); + mp_clear(&dsa->g); + mp_clear(&tmp); + mp_clear(&tmp2); + return err; + } + + } while (mp_cmp_d(&tmp, 1) == MP_EQ); + + /* at this point tmp generates a group of order q mod p */ + mp_exch(&tmp, &dsa->g); + + mp_clear(&tmp); + mp_clear(&tmp2); + + return MP_OKAY; +} +#endif /* WOLFSSL_KEY_GEN */ + + +static int _DsaImportParamsRaw(DsaKey* dsa, const char* p, const char* q, + const char* g, int trusted, WC_RNG* rng) +{ + int err; + word32 pSz, qSz; + + if (dsa == NULL || p == NULL || q == NULL || g == NULL) + return BAD_FUNC_ARG; + + /* read p */ + err = mp_read_radix(&dsa->p, p, MP_RADIX_HEX); + if (err == MP_OKAY && !trusted) { + int isPrime = 1; + if (rng == NULL) + err = mp_prime_is_prime(&dsa->p, 8, &isPrime); + else + err = mp_prime_is_prime_ex(&dsa->p, 8, &isPrime, rng); + + if (err == MP_OKAY) { + if (!isPrime) + err = DH_CHECK_PUB_E; + } + } + + /* read q */ + if (err == MP_OKAY) + err = mp_read_radix(&dsa->q, q, MP_RADIX_HEX); + + /* read g */ + if (err == MP_OKAY) + err = mp_read_radix(&dsa->g, g, MP_RADIX_HEX); + + /* verify (L,N) pair bit lengths */ + pSz = mp_unsigned_bin_size(&dsa->p); + qSz = mp_unsigned_bin_size(&dsa->q); + + if (CheckDsaLN(pSz * WOLFSSL_BIT_SIZE, qSz * WOLFSSL_BIT_SIZE) != 0) { + WOLFSSL_MSG("Invalid DSA p or q parameter size"); + err = BAD_FUNC_ARG; + } + + if (err != MP_OKAY) { + mp_clear(&dsa->p); + mp_clear(&dsa->q); + mp_clear(&dsa->g); + } + + return err; +} + + +/* Import raw DSA parameters into DsaKey structure for use with wc_MakeDsaKey(), + * input parameters (p,q,g) should be represented as ASCII hex values. + * + * dsa - pointer to initialized DsaKey structure + * p - DSA (p) parameter, ASCII hex string + * pSz - length of p + * q - DSA (q) parameter, ASCII hex string + * qSz - length of q + * g - DSA (g) parameter, ASCII hex string + * gSz - length of g + * + * returns 0 on success, negative upon failure + */ +int wc_DsaImportParamsRaw(DsaKey* dsa, const char* p, const char* q, + const char* g) +{ + return _DsaImportParamsRaw(dsa, p, q, g, 1, NULL); +} + + +/* Import raw DSA parameters into DsaKey structure for use with wc_MakeDsaKey(), + * input parameters (p,q,g) should be represented as ASCII hex values. Check + * that the p value is probably prime. + * + * dsa - pointer to initialized DsaKey structure + * p - DSA (p) parameter, ASCII hex string + * pSz - length of p + * q - DSA (q) parameter, ASCII hex string + * qSz - length of q + * g - DSA (g) parameter, ASCII hex string + * gSz - length of g + * trusted - trust that p is OK + * rng - random number generator for the prime test + * + * returns 0 on success, negative upon failure + */ +int wc_DsaImportParamsRawCheck(DsaKey* dsa, const char* p, const char* q, + const char* g, int trusted, WC_RNG* rng) +{ + return _DsaImportParamsRaw(dsa, p, q, g, trusted, rng); +} + + +/* Export raw DSA parameters from DsaKey structure + * + * dsa - pointer to initialized DsaKey structure + * p - output location for DSA (p) parameter + * pSz - [IN/OUT] size of output buffer for p, size of p + * q - output location for DSA (q) parameter + * qSz - [IN/OUT] size of output buffer for q, size of q + * g - output location for DSA (g) parameter + * gSz - [IN/OUT] size of output buffer for g, size of g + * + * If p, q, and g pointers are all passed in as NULL, the function + * will set pSz, qSz, and gSz to the required output buffer sizes for p, + * q, and g. In this case, the function will return LENGTH_ONLY_E. + * + * returns 0 on success, negative upon failure + */ +int wc_DsaExportParamsRaw(DsaKey* dsa, byte* p, word32* pSz, + byte* q, word32* qSz, byte* g, word32* gSz) +{ + int err; + word32 pLen, qLen, gLen; + + if (dsa == NULL || pSz == NULL || qSz == NULL || gSz == NULL) + return BAD_FUNC_ARG; + + /* get required output buffer sizes */ + pLen = mp_unsigned_bin_size(&dsa->p); + qLen = mp_unsigned_bin_size(&dsa->q); + gLen = mp_unsigned_bin_size(&dsa->g); + + /* return buffer sizes and LENGTH_ONLY_E if buffers are NULL */ + if (p == NULL && q == NULL && g == NULL) { + *pSz = pLen; + *qSz = qLen; + *gSz = gLen; + return LENGTH_ONLY_E; + } + + if (p == NULL || q == NULL || g == NULL) + return BAD_FUNC_ARG; + + /* export p */ + if (*pSz < pLen) { + WOLFSSL_MSG("Output buffer for DSA p parameter too small, " + "required size placed into pSz"); + *pSz = pLen; + return BUFFER_E; + } + *pSz = pLen; + err = mp_to_unsigned_bin(&dsa->p, p); + + /* export q */ + if (err == MP_OKAY) { + if (*qSz < qLen) { + WOLFSSL_MSG("Output buffer for DSA q parameter too small, " + "required size placed into qSz"); + *qSz = qLen; + return BUFFER_E; + } + *qSz = qLen; + err = mp_to_unsigned_bin(&dsa->q, q); + } + + /* export g */ + if (err == MP_OKAY) { + if (*gSz < gLen) { + WOLFSSL_MSG("Output buffer for DSA g parameter too small, " + "required size placed into gSz"); + *gSz = gLen; + return BUFFER_E; + } + *gSz = gLen; + err = mp_to_unsigned_bin(&dsa->g, g); + } + + return err; +} + + +/* Export raw DSA key (x, y) from DsaKey structure + * + * dsa - pointer to initialized DsaKey structure + * x - output location for private key + * xSz - [IN/OUT] size of output buffer for x, size of x + * y - output location for public key + * ySz - [IN/OUT] size of output buffer for y, size of y + * + * If x and y pointers are all passed in as NULL, the function + * will set xSz and ySz to the required output buffer sizes for x + * and y. In this case, the function will return LENGTH_ONLY_E. + * + * returns 0 on success, negative upon failure + */ +int wc_DsaExportKeyRaw(DsaKey* dsa, byte* x, word32* xSz, byte* y, word32* ySz) +{ + int err; + word32 xLen, yLen; + + if (dsa == NULL || xSz == NULL || ySz == NULL) + return BAD_FUNC_ARG; + + /* get required output buffer sizes */ + xLen = mp_unsigned_bin_size(&dsa->x); + yLen = mp_unsigned_bin_size(&dsa->y); + + /* return buffer sizes and LENGTH_ONLY_E if buffers are NULL */ + if (x == NULL && y == NULL) { + *xSz = xLen; + *ySz = yLen; + return LENGTH_ONLY_E; + } + + if (x == NULL || y == NULL) + return BAD_FUNC_ARG; + + /* export x */ + if (*xSz < xLen) { + WOLFSSL_MSG("Output buffer for DSA private key (x) too small, " + "required size placed into xSz"); + *xSz = xLen; + return BUFFER_E; + } + *xSz = xLen; + err = mp_to_unsigned_bin(&dsa->x, x); + + /* export y */ + if (err == MP_OKAY) { + if (*ySz < yLen) { + WOLFSSL_MSG("Output buffer to DSA public key (y) too small, " + "required size placed into ySz"); + *ySz = yLen; + return BUFFER_E; + } + *ySz = yLen; + err = mp_to_unsigned_bin(&dsa->y, y); + } + + return err; +} + + +int wc_DsaSign(const byte* digest, byte* out, DsaKey* key, WC_RNG* rng) +{ + mp_int k, kInv, r, s, H; +#ifndef WOLFSSL_MP_INVMOD_CONSTANT_TIME + mp_int b; +#endif + mp_int* qMinus1; + int ret = 0, sz; + byte buffer[DSA_HALF_SIZE]; + byte* tmp; /* initial output pointer */ + + if (digest == NULL || out == NULL || key == NULL || rng == NULL) { + return BAD_FUNC_ARG; + } + + tmp = out; + + sz = min((int)sizeof(buffer), mp_unsigned_bin_size(&key->q)); + +#ifdef WOLFSSL_MP_INVMOD_CONSTANT_TIME + if (mp_init_multi(&k, &kInv, &r, &s, &H, 0) != MP_OKAY) +#else + if (mp_init_multi(&k, &kInv, &r, &s, &H, &b) != MP_OKAY) +#endif + { + return MP_INIT_E; + } + qMinus1 = &kInv; + + /* NIST FIPS 186-4: B.2.2 + * Per-Message Secret Number Generation by Testing Candidates + * Generate k in range [1, q-1]. + * Check that k is less than q-1: range [0, q-2]. + * Add 1 to k: range [1, q-1]. + */ + if (mp_sub_d(&key->q, 1, qMinus1)) + ret = MP_SUB_E; + + if (ret == 0) { + do { + /* Step 4: generate k */ + ret = wc_RNG_GenerateBlock(rng, buffer, sz); + + /* Step 5 */ + if (ret == 0 && mp_read_unsigned_bin(&k, buffer, sz) != MP_OKAY) + ret = MP_READ_E; + + /* k is a random numnber and it should be less than q-1 + * if k greater than repeat + */ + /* Step 6 */ + } while (ret == 0 && mp_cmp(&k, qMinus1) != MP_LT); + } + /* Step 7 */ + if (ret == 0 && mp_add_d(&k, 1, &k) != MP_OKAY) + ret = MP_MOD_E; + +#ifdef WOLFSSL_MP_INVMOD_CONSTANT_TIME + /* inverse k mod q */ + if (ret == 0 && mp_invmod(&k, &key->q, &kInv) != MP_OKAY) + ret = MP_INVMOD_E; + + /* generate r, r = (g exp k mod p) mod q */ + if (ret == 0 && mp_exptmod_ex(&key->g, &k, key->q.used, &key->p, + &r) != MP_OKAY) { + ret = MP_EXPTMOD_E; + } + + if (ret == 0 && mp_mod(&r, &key->q, &r) != MP_OKAY) + ret = MP_MOD_E; + + /* generate H from sha digest */ + if (ret == 0 && mp_read_unsigned_bin(&H, digest,WC_SHA_DIGEST_SIZE) != MP_OKAY) + ret = MP_READ_E; + + /* generate s, s = (kInv * (H + x*r)) % q */ + if (ret == 0 && mp_mul(&key->x, &r, &s) != MP_OKAY) + ret = MP_MUL_E; + + if (ret == 0 && mp_add(&s, &H, &s) != MP_OKAY) + ret = MP_ADD_E; + + if (ret == 0 && mp_mulmod(&s, &kInv, &key->q, &s) != MP_OKAY) + ret = MP_MULMOD_E; +#else + /* Blinding value + * Generate b in range [1, q-1]. + */ + if (ret == 0) { + do { + ret = wc_RNG_GenerateBlock(rng, buffer, sz); + if (ret == 0 && mp_read_unsigned_bin(&b, buffer, sz) != MP_OKAY) + ret = MP_READ_E; + } while (ret == 0 && mp_cmp(&b, qMinus1) != MP_LT); + } + if (ret == 0 && mp_add_d(&b, 1, &b) != MP_OKAY) + ret = MP_MOD_E; + + /* set H from sha digest */ + if (ret == 0 && mp_read_unsigned_bin(&H, digest, + WC_SHA_DIGEST_SIZE) != MP_OKAY) { + ret = MP_READ_E; + } + + /* generate r, r = (g exp k mod p) mod q */ + if (ret == 0 && mp_exptmod_ex(&key->g, &k, key->q.used, &key->p, + &r) != MP_OKAY) { + ret = MP_EXPTMOD_E; + } + + /* calculate s = (H + xr)/k + = b.(H/k.b + x.r/k.b) */ + + /* k = k.b */ + if (ret == 0 && mp_mulmod(&k, &b, &key->q, &k) != MP_OKAY) + ret = MP_MULMOD_E; + + /* kInv = 1/k.b mod q */ + if (ret == 0 && mp_invmod(&k, &key->q, &kInv) != MP_OKAY) + ret = MP_INVMOD_E; + + if (ret == 0 && mp_mod(&r, &key->q, &r) != MP_OKAY) + ret = MP_MOD_E; + + /* s = x.r */ + if (ret == 0 && mp_mul(&key->x, &r, &s) != MP_OKAY) + ret = MP_MUL_E; + + /* s = x.r/k.b */ + if (ret == 0 && mp_mulmod(&s, &kInv, &key->q, &s) != MP_OKAY) + ret = MP_MULMOD_E; + + /* H = H/k.b */ + if (ret == 0 && mp_mulmod(&H, &kInv, &key->q, &H) != MP_OKAY) + ret = MP_MULMOD_E; + + /* s = H/k.b + x.r/k.b + = (H + x.r)/k.b */ + if (ret == 0 && mp_add(&s, &H, &s) != MP_OKAY) + ret = MP_ADD_E; + + /* s = b.(e + x.r)/k.b + = (e + x.r)/k */ + if (ret == 0 && mp_mulmod(&s, &b, &key->q, &s) != MP_OKAY) + ret = MP_MULMOD_E; + + /* s = (e + x.r)/k */ + if (ret == 0 && mp_mod(&s, &key->q, &s) != MP_OKAY) + ret = MP_MOD_E; +#endif + + /* detect zero r or s */ + if (ret == 0 && (mp_iszero(&r) == MP_YES || mp_iszero(&s) == MP_YES)) + ret = MP_ZERO_E; + + /* write out */ + if (ret == 0) { + int rSz = mp_unsigned_bin_size(&r); + int sSz = mp_unsigned_bin_size(&s); + + while (rSz++ < DSA_HALF_SIZE) { + *out++ = 0x00; /* pad front with zeros */ + } + + if (mp_to_unsigned_bin(&r, out) != MP_OKAY) + ret = MP_TO_E; + else { + out = tmp + DSA_HALF_SIZE; /* advance to s in output */ + while (sSz++ < DSA_HALF_SIZE) { + *out++ = 0x00; /* pad front with zeros */ + } + ret = mp_to_unsigned_bin(&s, out); + } + } + + ForceZero(buffer, sz); + mp_forcezero(&kInv); + mp_forcezero(&k); +#ifndef WOLFSSL_MP_INVMOD_CONSTANT_TIME + mp_forcezero(&b); + + mp_clear(&b); +#endif + mp_clear(&H); + mp_clear(&s); + mp_clear(&r); + mp_clear(&kInv); + mp_clear(&k); + + return ret; +} + + +int wc_DsaVerify(const byte* digest, const byte* sig, DsaKey* key, int* answer) +{ + mp_int w, u1, u2, v, r, s; + int ret = 0; + + if (digest == NULL || sig == NULL || key == NULL || answer == NULL) { + return BAD_FUNC_ARG; + } + + if (mp_init_multi(&w, &u1, &u2, &v, &r, &s) != MP_OKAY) + return MP_INIT_E; + + /* set r and s from signature */ + if (mp_read_unsigned_bin(&r, sig, DSA_HALF_SIZE) != MP_OKAY || + mp_read_unsigned_bin(&s, sig + DSA_HALF_SIZE, DSA_HALF_SIZE) != MP_OKAY) + ret = MP_READ_E; + + /* sanity checks */ + if (ret == 0) { + if (mp_iszero(&r) == MP_YES || mp_iszero(&s) == MP_YES || + mp_cmp(&r, &key->q) != MP_LT || mp_cmp(&s, &key->q) != MP_LT) { + ret = MP_ZERO_E; + } + } + + /* put H into u1 from sha digest */ + if (ret == 0 && mp_read_unsigned_bin(&u1,digest,WC_SHA_DIGEST_SIZE) != MP_OKAY) + ret = MP_READ_E; + + /* w = s invmod q */ + if (ret == 0 && mp_invmod(&s, &key->q, &w) != MP_OKAY) + ret = MP_INVMOD_E; + + /* u1 = (H * w) % q */ + if (ret == 0 && mp_mulmod(&u1, &w, &key->q, &u1) != MP_OKAY) + ret = MP_MULMOD_E; + + /* u2 = (r * w) % q */ + if (ret == 0 && mp_mulmod(&r, &w, &key->q, &u2) != MP_OKAY) + ret = MP_MULMOD_E; + + /* verify v = ((g^u1 * y^u2) mod p) mod q */ + if (ret == 0 && mp_exptmod(&key->g, &u1, &key->p, &u1) != MP_OKAY) + ret = MP_EXPTMOD_E; + + if (ret == 0 && mp_exptmod(&key->y, &u2, &key->p, &u2) != MP_OKAY) + ret = MP_EXPTMOD_E; + + if (ret == 0 && mp_mulmod(&u1, &u2, &key->p, &v) != MP_OKAY) + ret = MP_MULMOD_E; + + if (ret == 0 && mp_mod(&v, &key->q, &v) != MP_OKAY) + ret = MP_MULMOD_E; + + /* do they match */ + if (ret == 0 && mp_cmp(&r, &v) == MP_EQ) + *answer = 1; + else + *answer = 0; + + mp_clear(&s); + mp_clear(&r); + mp_clear(&u1); + mp_clear(&u2); + mp_clear(&w); + mp_clear(&v); + + return ret; +} + + +#endif /* NO_DSA */ + diff --git a/client/wolfssl/wolfcrypt/src/ecc.c b/client/wolfssl/wolfcrypt/src/ecc.c new file mode 100644 index 0000000..22db7f1 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/ecc.c @@ -0,0 +1,10761 @@ +/* ecc.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef HAVE_CONFIG_H + #include +#endif + +/* in case user set HAVE_ECC there */ +#include + +/* public ASN interface */ +#include + +/* +Possible ECC enable options: + * HAVE_ECC: Overall control of ECC default: on + * HAVE_ECC_ENCRYPT: ECC encrypt/decrypt w/AES and HKDF default: off + * HAVE_ECC_SIGN: ECC sign default: on + * HAVE_ECC_VERIFY: ECC verify default: on + * HAVE_ECC_DHE: ECC build shared secret default: on + * HAVE_ECC_CDH: ECC cofactor DH shared secret default: off + * HAVE_ECC_KEY_IMPORT: ECC Key import default: on + * HAVE_ECC_KEY_EXPORT: ECC Key export default: on + * ECC_SHAMIR: Enables Shamir calc method default: on + * HAVE_COMP_KEY: Enables compressed key default: off + * WOLFSSL_VALIDATE_ECC_IMPORT: Validate ECC key on import default: off + * WOLFSSL_VALIDATE_ECC_KEYGEN: Validate ECC key gen default: off + * WOLFSSL_CUSTOM_CURVES: Allow non-standard curves. default: off + * Includes the curve "a" variable in calculation + * ECC_DUMP_OID: Enables dump of OID encoding and sum default: off + * ECC_CACHE_CURVE: Enables cache of curve info to improve perofrmance + default: off + * FP_ECC: ECC Fixed Point Cache default: off + * USE_ECC_B_PARAM: Enable ECC curve B param default: off + (on for HAVE_COMP_KEY) + * WOLFSSL_ECC_CURVE_STATIC: default off (on for windows) + For the ECC curve paramaters `ecc_set_type` use fixed + array for hex string + */ + +/* +ECC Curve Types: + * NO_ECC_SECP Disables SECP curves default: off (not defined) + * HAVE_ECC_SECPR2 Enables SECP R2 curves default: off + * HAVE_ECC_SECPR3 Enables SECP R3 curves default: off + * HAVE_ECC_BRAINPOOL Enables Brainpool curves default: off + * HAVE_ECC_KOBLITZ Enables Koblitz curves default: off + */ + +/* +ECC Curve Sizes: + * ECC_USER_CURVES: Allows custom combination of key sizes below + * HAVE_ALL_CURVES: Enable all key sizes (on unless ECC_USER_CURVES is defined) + * HAVE_ECC112: 112 bit key + * HAVE_ECC128: 128 bit key + * HAVE_ECC160: 160 bit key + * HAVE_ECC192: 192 bit key + * HAVE_ECC224: 224 bit key + * HAVE_ECC239: 239 bit key + * NO_ECC256: Disables 256 bit key (on by default) + * HAVE_ECC320: 320 bit key + * HAVE_ECC384: 384 bit key + * HAVE_ECC512: 512 bit key + * HAVE_ECC521: 521 bit key + */ + + +#ifdef HAVE_ECC + +/* Make sure custom curves is enabled for Brainpool or Koblitz curve types */ +#if (defined(HAVE_ECC_BRAINPOOL) || defined(HAVE_ECC_KOBLITZ)) &&\ + !defined(WOLFSSL_CUSTOM_CURVES) + #error Brainpool and Koblitz curves requires WOLFSSL_CUSTOM_CURVES +#endif + +#if defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2) + /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */ + #define FIPS_NO_WRAPPERS + + #ifdef USE_WINDOWS_API + #pragma code_seg(".fipsA$f") + #pragma const_seg(".fipsB$f") + #endif +#endif + +#include +#include +#include +#include +#include + +#ifdef WOLFSSL_HAVE_SP_ECC +#include +#endif + +#ifdef HAVE_ECC_ENCRYPT + #include + #include +#endif + +#ifdef HAVE_X963_KDF + #include +#endif + +#ifdef WOLF_CRYPTO_CB + #include +#endif + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#if defined(FREESCALE_LTC_ECC) + #include +#endif + +#if defined(WOLFSSL_STM32_PKA) + #include +#endif + +#ifdef WOLFSSL_SP_MATH + #define GEN_MEM_ERR MP_MEM +#elif defined(USE_FAST_MATH) + #define GEN_MEM_ERR FP_MEM +#else + #define GEN_MEM_ERR MP_MEM +#endif + + +/* internal ECC states */ +enum { + ECC_STATE_NONE = 0, + + ECC_STATE_SHARED_SEC_GEN, + ECC_STATE_SHARED_SEC_RES, + + ECC_STATE_SIGN_DO, + ECC_STATE_SIGN_ENCODE, + + ECC_STATE_VERIFY_DECODE, + ECC_STATE_VERIFY_DO, + ECC_STATE_VERIFY_RES, +}; + + +/* map + ptmul -> mulmod +*/ + +/* 256-bit curve on by default whether user curves or not */ +#if defined(HAVE_ECC112) || defined(HAVE_ALL_CURVES) + #define ECC112 +#endif +#if defined(HAVE_ECC128) || defined(HAVE_ALL_CURVES) + #define ECC128 +#endif +#if defined(HAVE_ECC160) || defined(HAVE_ALL_CURVES) + #define ECC160 +#endif +#if defined(HAVE_ECC192) || defined(HAVE_ALL_CURVES) + #define ECC192 +#endif +#if defined(HAVE_ECC224) || defined(HAVE_ALL_CURVES) + #define ECC224 +#endif +#if defined(HAVE_ECC239) || defined(HAVE_ALL_CURVES) + #define ECC239 +#endif +#if !defined(NO_ECC256) || defined(HAVE_ALL_CURVES) + #define ECC256 +#endif +#if defined(HAVE_ECC320) || defined(HAVE_ALL_CURVES) + #define ECC320 +#endif +#if defined(HAVE_ECC384) || defined(HAVE_ALL_CURVES) + #define ECC384 +#endif +#if defined(HAVE_ECC512) || defined(HAVE_ALL_CURVES) + #define ECC512 +#endif +#if defined(HAVE_ECC521) || defined(HAVE_ALL_CURVES) + #define ECC521 +#endif + +/* The encoded OID's for ECC curves */ +#ifdef ECC112 + #ifndef NO_ECC_SECP + #ifdef HAVE_OID_ENCODING + #define CODED_SECP112R1 {1,3,132,0,6} + #define CODED_SECP112R1_SZ 5 + #else + #define CODED_SECP112R1 {0x2B,0x81,0x04,0x00,0x06} + #define CODED_SECP112R1_SZ 5 + #endif + #ifndef WOLFSSL_ECC_CURVE_STATIC + static const ecc_oid_t ecc_oid_secp112r1[] = CODED_SECP112R1; + #else + #define ecc_oid_secp112r1 CODED_SECP112R1 + #endif + #define ecc_oid_secp112r1_sz CODED_SECP112R1_SZ + #endif /* !NO_ECC_SECP */ + #ifdef HAVE_ECC_SECPR2 + #ifdef HAVE_OID_ENCODING + #define CODED_SECP112R2 {1,3,132,0,7} + #define CODED_SECP112R2_SZ 5 + #else + #define CODED_SECP112R2 {0x2B,0x81,0x04,0x00,0x07} + #define CODED_SECP112R2_SZ 5 + #endif + #ifndef WOLFSSL_ECC_CURVE_STATIC + static const ecc_oid_t ecc_oid_secp112r2[] = CODED_SECP112R2; + #else + #define ecc_oid_secp112r2 CODED_SECP112R2 + #endif + #define ecc_oid_secp112r2_sz CODED_SECP112R2_SZ + #endif /* HAVE_ECC_SECPR2 */ +#endif /* ECC112 */ +#ifdef ECC128 + #ifndef NO_ECC_SECP + #ifdef HAVE_OID_ENCODING + #define CODED_SECP128R1 {1,3,132,0,28} + #define CODED_SECP128R1_SZ 5 + #else + #define CODED_SECP128R1 {0x2B,0x81,0x04,0x00,0x1C} + #define CODED_SECP128R1_SZ 5 + #endif + #ifndef WOLFSSL_ECC_CURVE_STATIC + static const ecc_oid_t ecc_oid_secp128r1[] = CODED_SECP128R1; + #else + #define ecc_oid_secp128r1 CODED_SECP128R1 + #endif + #define ecc_oid_secp128r1_sz CODED_SECP128R1_SZ + #endif /* !NO_ECC_SECP */ + #ifdef HAVE_ECC_SECPR2 + #ifdef HAVE_OID_ENCODING + #define CODED_SECP128R2 {1,3,132,0,29} + #define CODED_SECP128R2_SZ 5 + #else + #define CODED_SECP128R2 {0x2B,0x81,0x04,0x00,0x1D} + #define CODED_SECP128R2_SZ 5 + #endif + #ifndef WOLFSSL_ECC_CURVE_STATIC + static const ecc_oid_t ecc_oid_secp128r2[] = CODED_SECP128R2; + #else + #define ecc_oid_secp128r2 CODED_SECP128R2 + #endif + #define ecc_oid_secp128r2_sz CODED_SECP128R2_SZ + #endif /* HAVE_ECC_SECPR2 */ +#endif /* ECC128 */ +#ifdef ECC160 + #ifndef NO_ECC_SECP + #ifdef HAVE_OID_ENCODING + #define CODED_SECP160R1 {1,3,132,0,8} + #define CODED_SECP160R1_SZ 5 + #else + #define CODED_SECP160R1 {0x2B,0x81,0x04,0x00,0x08} + #define CODED_SECP160R1_SZ 5 + #endif + #ifndef WOLFSSL_ECC_CURVE_STATIC + static const ecc_oid_t ecc_oid_secp160r1[] = CODED_SECP160R1; + #else + #define ecc_oid_secp160r1 CODED_SECP160R1 + #endif + #define ecc_oid_secp160r1_sz CODED_SECP160R1_SZ + #endif /* !NO_ECC_SECP */ + #ifdef HAVE_ECC_SECPR2 + #ifdef HAVE_OID_ENCODING + #define CODED_SECP160R2 {1,3,132,0,30} + #define CODED_SECP160R2_SZ 5 + #else + #define CODED_SECP160R2 {0x2B,0x81,0x04,0x00,0x1E} + #define CODED_SECP160R2_SZ 5 + #endif + #ifndef WOLFSSL_ECC_CURVE_STATIC + static const ecc_oid_t ecc_oid_secp160r2[] = CODED_SECP160R2; + #else + #define ecc_oid_secp160r2 CODED_SECP160R2 + #endif + #define ecc_oid_secp160r2_sz CODED_SECP160R2_SZ + #endif /* HAVE_ECC_SECPR2 */ + #ifdef HAVE_ECC_KOBLITZ + #ifdef HAVE_OID_ENCODING + #define CODED_SECP160K1 {1,3,132,0,9} + #define CODED_SECP160K1_SZ 5 + #else + #define CODED_SECP160K1 {0x2B,0x81,0x04,0x00,0x09} + #define CODED_SECP160K1_SZ 5 + #endif + #ifndef WOLFSSL_ECC_CURVE_STATIC + static const ecc_oid_t ecc_oid_secp160k1[] = CODED_SECP160K1; + #else + #define ecc_oid_secp160k1 CODED_SECP160K1 + #endif + #define ecc_oid_secp160k1_sz CODED_SECP160K1_SZ + #endif /* HAVE_ECC_KOBLITZ */ + #ifdef HAVE_ECC_BRAINPOOL + #ifdef HAVE_OID_ENCODING + #define CODED_BRAINPOOLP160R1 {1,3,36,3,3,2,8,1,1,1} + #define CODED_BRAINPOOLP160R1_SZ 10 + #else + #define CODED_BRAINPOOLP160R1 {0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x01} + #define CODED_BRAINPOOLP160R1_SZ 9 + #endif + #ifndef WOLFSSL_ECC_CURVE_STATIC + static const ecc_oid_t ecc_oid_brainpoolp160r1[] = CODED_BRAINPOOLP160R1; + #else + #define ecc_oid_brainpoolp160r1 CODED_BRAINPOOLP160R1 + #endif + #define ecc_oid_brainpoolp160r1_sz CODED_BRAINPOOLP160R1_SZ + #endif /* HAVE_ECC_BRAINPOOL */ +#endif /* ECC160 */ +#ifdef ECC192 + #ifndef NO_ECC_SECP + #ifdef HAVE_OID_ENCODING + #define CODED_SECP192R1 {1,2,840,10045,3,1,1} + #define CODED_SECP192R1_SZ 7 + #else + #define CODED_SECP192R1 {0x2A,0x86,0x48,0xCE,0x3D,0x03,0x01,0x01} + #define CODED_SECP192R1_SZ 8 + #endif + #ifndef WOLFSSL_ECC_CURVE_STATIC + static const ecc_oid_t ecc_oid_secp192r1[] = CODED_SECP192R1; + #else + #define ecc_oid_secp192r1 CODED_SECP192R1 + #endif + #define ecc_oid_secp192r1_sz CODED_SECP192R1_SZ + #endif /* !NO_ECC_SECP */ + #ifdef HAVE_ECC_SECPR2 + #ifdef HAVE_OID_ENCODING + #define CODED_PRIME192V2 {1,2,840,10045,3,1,2} + #define CODED_PRIME192V2_SZ 7 + #else + #define CODED_PRIME192V2 {0x2A,0x86,0x48,0xCE,0x3D,0x03,0x01,0x02} + #define CODED_PRIME192V2_SZ 8 + #endif + #ifndef WOLFSSL_ECC_CURVE_STATIC + static const ecc_oid_t ecc_oid_prime192v2[] = CODED_PRIME192V2; + #else + #define ecc_oid_prime192v2 CODED_PRIME192V2 + #endif + #define ecc_oid_prime192v2_sz CODED_PRIME192V2_SZ + #endif /* HAVE_ECC_SECPR2 */ + #ifdef HAVE_ECC_SECPR3 + #ifdef HAVE_OID_ENCODING + #define CODED_PRIME192V3 {1,2,840,10045,3,1,3} + #define CODED_PRIME192V3_SZ 7 + #else + #define CODED_PRIME192V3 {0x2A,0x86,0x48,0xCE,0x3D,0x03,0x01,0x03} + #define CODED_PRIME192V3_SZ 8 + #endif + #ifndef WOLFSSL_ECC_CURVE_STATIC + static const ecc_oid_t ecc_oid_prime192v3[] = CODED_PRIME192V3; + #else + #define ecc_oid_prime192v3 CODED_PRIME192V3 + #endif + #define ecc_oid_prime192v3_sz CODED_PRIME192V3_SZ + #endif /* HAVE_ECC_SECPR3 */ + #ifdef HAVE_ECC_KOBLITZ + #ifdef HAVE_OID_ENCODING + #define CODED_SECP192K1 {1,3,132,0,31} + #define CODED_SECP192K1_SZ 5 + #else + #define CODED_SECP192K1 {0x2B,0x81,0x04,0x00,0x1F} + #define CODED_SECP192K1_SZ 5 + #endif + #ifndef WOLFSSL_ECC_CURVE_STATIC + static const ecc_oid_t ecc_oid_secp192k1[] = CODED_SECP192K1; + #else + #define ecc_oid_secp192k1 CODED_SECP192K1 + #endif + #define ecc_oid_secp192k1_sz CODED_SECP192K1_SZ + #endif /* HAVE_ECC_KOBLITZ */ + #ifdef HAVE_ECC_BRAINPOOL + #ifdef HAVE_OID_ENCODING + #define CODED_BRAINPOOLP192R1 {1,3,36,3,3,2,8,1,1,3} + #define CODED_BRAINPOOLP192R1_SZ 10 + #else + #define CODED_BRAINPOOLP192R1 {0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x03} + #define CODED_BRAINPOOLP192R1_SZ 9 + #endif + #ifndef WOLFSSL_ECC_CURVE_STATIC + static const ecc_oid_t ecc_oid_brainpoolp192r1[] = CODED_BRAINPOOLP192R1; + #else + #define ecc_oid_brainpoolp192r1 CODED_BRAINPOOLP192R1 + #endif + #define ecc_oid_brainpoolp192r1_sz CODED_BRAINPOOLP192R1_SZ + #endif /* HAVE_ECC_BRAINPOOL */ +#endif /* ECC192 */ +#ifdef ECC224 + #ifndef NO_ECC_SECP + #ifdef HAVE_OID_ENCODING + #define CODED_SECP224R1 {1,3,132,0,33} + #define CODED_SECP224R1_SZ 5 + #else + #define CODED_SECP224R1 {0x2B,0x81,0x04,0x00,0x21} + #define CODED_SECP224R1_SZ 5 + #endif + #ifndef WOLFSSL_ECC_CURVE_STATIC + static const ecc_oid_t ecc_oid_secp224r1[] = CODED_SECP224R1; + #else + #define ecc_oid_secp224r1 CODED_SECP224R1 + #endif + #define ecc_oid_secp224r1_sz CODED_SECP224R1_SZ + #endif /* !NO_ECC_SECP */ + #ifdef HAVE_ECC_KOBLITZ + #ifdef HAVE_OID_ENCODING + #define CODED_SECP224K1 {1,3,132,0,32} + #define CODED_SECP224K1_SZ 5 + #else + #define CODED_SECP224K1 {0x2B,0x81,0x04,0x00,0x20} + #define CODED_SECP224K1_SZ 5 + #endif + #ifndef WOLFSSL_ECC_CURVE_STATIC + static const ecc_oid_t ecc_oid_secp224k1[] = CODED_SECP224K1; + #else + #define ecc_oid_secp224k1 CODED_SECP224K1 + #endif + #define ecc_oid_secp224k1_sz CODED_SECP224K1_SZ + #endif /* HAVE_ECC_KOBLITZ */ + #ifdef HAVE_ECC_BRAINPOOL + #ifdef HAVE_OID_ENCODING + #define CODED_BRAINPOOLP224R1 {1,3,36,3,3,2,8,1,1,5} + #define CODED_BRAINPOOLP224R1_SZ 10 + #else + #define CODED_BRAINPOOLP224R1 {0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x05} + #define CODED_BRAINPOOLP224R1_SZ 9 + #endif + #ifndef WOLFSSL_ECC_CURVE_STATIC + static const ecc_oid_t ecc_oid_brainpoolp224r1[] = CODED_BRAINPOOLP224R1; + #else + #define ecc_oid_brainpoolp224r1 CODED_BRAINPOOLP224R1 + #endif + #define ecc_oid_brainpoolp224r1_sz CODED_BRAINPOOLP224R1_SZ + #endif /* HAVE_ECC_BRAINPOOL */ +#endif /* ECC224 */ +#ifdef ECC239 + #ifndef NO_ECC_SECP + #ifdef HAVE_OID_ENCODING + #define CODED_PRIME239V1 {1,2,840,10045,3,1,4} + #define CODED_PRIME239V1_SZ 7 + #else + #define CODED_PRIME239V1 {0x2A,0x86,0x48,0xCE,0x3D,0x03,0x01,0x04} + #define CODED_PRIME239V1_SZ 8 + #endif + #ifndef WOLFSSL_ECC_CURVE_STATIC + static const ecc_oid_t ecc_oid_prime239v1[] = CODED_PRIME239V1; + #else + #define ecc_oid_prime239v1 CODED_PRIME239V1 + #endif + #define ecc_oid_prime239v1_sz CODED_PRIME239V1_SZ + #endif /* !NO_ECC_SECP */ + #ifdef HAVE_ECC_SECPR2 + #ifdef HAVE_OID_ENCODING + #define CODED_PRIME239V2 {1,2,840,10045,3,1,5} + #define CODED_PRIME239V2_SZ 7 + #else + #define CODED_PRIME239V2 {0x2A,0x86,0x48,0xCE,0x3D,0x03,0x01,0x05} + #define CODED_PRIME239V2_SZ 8 + #endif + #ifndef WOLFSSL_ECC_CURVE_STATIC + static const ecc_oid_t ecc_oid_prime239v2[] = CODED_PRIME239V2; + #else + #define ecc_oid_prime239v2 CODED_PRIME239V2 + #endif + #define ecc_oid_prime239v2_sz CODED_PRIME239V2_SZ + #endif /* HAVE_ECC_SECPR2 */ + #ifdef HAVE_ECC_SECPR3 + #ifdef HAVE_OID_ENCODING + #define CODED_PRIME239V3 {1,2,840,10045,3,1,6} + #define CODED_PRIME239V3_SZ 7 + #else + #define CODED_PRIME239V3 {0x2A,0x86,0x48,0xCE,0x3D,0x03,0x01,0x06} + #define CODED_PRIME239V3_SZ 8 + #endif + #ifndef WOLFSSL_ECC_CURVE_STATIC + static const ecc_oid_t ecc_oid_prime239v3[] = CODED_PRIME239V3; + #else + #define ecc_oid_prime239v3 CODED_PRIME239V3 + #endif + #define ecc_oid_prime239v3_sz CODED_PRIME239V3_SZ + #endif /* HAVE_ECC_SECPR3 */ +#endif /* ECC239 */ +#ifdef ECC256 + #ifndef NO_ECC_SECP + #ifdef HAVE_OID_ENCODING + #define CODED_SECP256R1 {1,2,840,10045,3,1,7} + #define CODED_SECP256R1_SZ 7 + #else + #define CODED_SECP256R1 {0x2A,0x86,0x48,0xCE,0x3D,0x03,0x01,0x07} + #define CODED_SECP256R1_SZ 8 + #endif + #ifndef WOLFSSL_ECC_CURVE_STATIC + static const ecc_oid_t ecc_oid_secp256r1[] = CODED_SECP256R1; + #else + #define ecc_oid_secp256r1 CODED_SECP256R1 + #endif + #define ecc_oid_secp256r1_sz CODED_SECP256R1_SZ + #endif /* !NO_ECC_SECP */ + #ifdef HAVE_ECC_KOBLITZ + #ifdef HAVE_OID_ENCODING + #define CODED_SECP256K1 {1,3,132,0,10} + #define CODED_SECP256K1_SZ 5 + #else + #define CODED_SECP256K1 {0x2B,0x81,0x04,0x00,0x0A} + #define CODED_SECP256K1_SZ 5 + #endif + #ifndef WOLFSSL_ECC_CURVE_STATIC + static const ecc_oid_t ecc_oid_secp256k1[] = CODED_SECP256K1; + #else + #define ecc_oid_secp256k1 CODED_SECP256K1 + #endif + #define ecc_oid_secp256k1_sz CODED_SECP256K1_SZ + #endif /* HAVE_ECC_KOBLITZ */ + #ifdef HAVE_ECC_BRAINPOOL + #ifdef HAVE_OID_ENCODING + #define CODED_BRAINPOOLP256R1 {1,3,36,3,3,2,8,1,1,7} + #define CODED_BRAINPOOLP256R1_SZ 10 + #else + #define CODED_BRAINPOOLP256R1 {0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x07} + #define CODED_BRAINPOOLP256R1_SZ 9 + #endif + #ifndef WOLFSSL_ECC_CURVE_STATIC + static const ecc_oid_t ecc_oid_brainpoolp256r1[] = CODED_BRAINPOOLP256R1; + #else + #define ecc_oid_brainpoolp256r1 CODED_BRAINPOOLP256R1 + #endif + #define ecc_oid_brainpoolp256r1_sz CODED_BRAINPOOLP256R1_SZ + #endif /* HAVE_ECC_BRAINPOOL */ +#endif /* ECC256 */ +#ifdef ECC320 + #ifdef HAVE_ECC_BRAINPOOL + #ifdef HAVE_OID_ENCODING + #define CODED_BRAINPOOLP320R1 {1,3,36,3,3,2,8,1,1,9} + #define CODED_BRAINPOOLP320R1_SZ 10 + #else + #define CODED_BRAINPOOLP320R1 {0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x09} + #define CODED_BRAINPOOLP320R1_SZ 9 + #endif + #ifndef WOLFSSL_ECC_CURVE_STATIC + static const ecc_oid_t ecc_oid_brainpoolp320r1[] = CODED_BRAINPOOLP320R1; + #else + #define ecc_oid_brainpoolp320r1 CODED_BRAINPOOLP320R1 + #endif + #define ecc_oid_brainpoolp320r1_sz CODED_BRAINPOOLP320R1_SZ + #endif /* HAVE_ECC_BRAINPOOL */ +#endif /* ECC320 */ +#ifdef ECC384 + #ifndef NO_ECC_SECP + #ifdef HAVE_OID_ENCODING + #define CODED_SECP384R1 {1,3,132,0,34} + #define CODED_SECP384R1_SZ 5 + #else + #define CODED_SECP384R1 {0x2B,0x81,0x04,0x00,0x22} + #define CODED_SECP384R1_SZ 5 + #endif + #ifndef WOLFSSL_ECC_CURVE_STATIC + static const ecc_oid_t ecc_oid_secp384r1[] = CODED_SECP384R1; + #define CODED_SECP384R1_OID ecc_oid_secp384r1 + #else + #define ecc_oid_secp384r1 CODED_SECP384R1 + #endif + #define ecc_oid_secp384r1_sz CODED_SECP384R1_SZ + #endif /* !NO_ECC_SECP */ + #ifdef HAVE_ECC_BRAINPOOL + #ifdef HAVE_OID_ENCODING + #define CODED_BRAINPOOLP384R1 {1,3,36,3,3,2,8,1,1,11} + #define CODED_BRAINPOOLP384R1_SZ 10 + #else + #define CODED_BRAINPOOLP384R1 {0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x0B} + #define CODED_BRAINPOOLP384R1_SZ 9 + #endif + #ifndef WOLFSSL_ECC_CURVE_STATIC + static const ecc_oid_t ecc_oid_brainpoolp384r1[] = CODED_BRAINPOOLP384R1; + #else + #define ecc_oid_brainpoolp384r1 CODED_BRAINPOOLP384R1 + #endif + #define ecc_oid_brainpoolp384r1_sz CODED_BRAINPOOLP384R1_SZ + #endif /* HAVE_ECC_BRAINPOOL */ +#endif /* ECC384 */ +#ifdef ECC512 + #ifdef HAVE_ECC_BRAINPOOL + #ifdef HAVE_OID_ENCODING + #define CODED_BRAINPOOLP512R1 {1,3,36,3,3,2,8,1,1,13} + #define CODED_BRAINPOOLP512R1_SZ 10 + #else + #define CODED_BRAINPOOLP512R1 {0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x0D} + #define CODED_BRAINPOOLP512R1_SZ 9 + #endif + #ifndef WOLFSSL_ECC_CURVE_STATIC + static const ecc_oid_t ecc_oid_brainpoolp512r1[] = CODED_BRAINPOOLP512R1; + #else + #define ecc_oid_brainpoolp512r1 CODED_BRAINPOOLP512R1 + #endif + #define ecc_oid_brainpoolp512r1_sz CODED_BRAINPOOLP512R1_SZ + #endif /* HAVE_ECC_BRAINPOOL */ +#endif /* ECC512 */ +#ifdef ECC521 + #ifndef NO_ECC_SECP + #ifdef HAVE_OID_ENCODING + #define CODED_SECP521R1 {1,3,132,0,35} + #define CODED_SECP521R1_SZ 5 + #else + #define CODED_SECP521R1 {0x2B,0x81,0x04,0x00,0x23} + #define CODED_SECP521R1_SZ 5 + #endif + #ifndef WOLFSSL_ECC_CURVE_STATIC + static const ecc_oid_t ecc_oid_secp521r1[] = CODED_SECP521R1; + #else + #define ecc_oid_secp521r1 CODED_SECP521R1 + #endif + #define ecc_oid_secp521r1_sz CODED_SECP521R1_SZ + #endif /* !NO_ECC_SECP */ +#endif /* ECC521 */ + + +/* This holds the key settings. + ***MUST*** be organized by size from smallest to largest. */ + +const ecc_set_type ecc_sets[] = { +#ifdef ECC112 + #ifndef NO_ECC_SECP + { + 14, /* size/bytes */ + ECC_SECP112R1, /* ID */ + "SECP112R1", /* curve name */ + "DB7C2ABF62E35E668076BEAD208B", /* prime */ + "DB7C2ABF62E35E668076BEAD2088", /* A */ + "659EF8BA043916EEDE8911702B22", /* B */ + "DB7C2ABF62E35E7628DFAC6561C5", /* order */ + "9487239995A5EE76B55F9C2F098", /* Gx */ + "A89CE5AF8724C0A23E0E0FF77500", /* Gy */ + ecc_oid_secp112r1, /* oid/oidSz */ + ecc_oid_secp112r1_sz, + ECC_SECP112R1_OID, /* oid sum */ + 1, /* cofactor */ + }, + #endif /* !NO_ECC_SECP */ + #ifdef HAVE_ECC_SECPR2 + { + 14, /* size/bytes */ + ECC_SECP112R2, /* ID */ + "SECP112R2", /* curve name */ + "DB7C2ABF62E35E668076BEAD208B", /* prime */ + "6127C24C05F38A0AAAF65C0EF02C", /* A */ + "51DEF1815DB5ED74FCC34C85D709", /* B */ + "36DF0AAFD8B8D7597CA10520D04B", /* order */ + "4BA30AB5E892B4E1649DD0928643", /* Gx */ + "ADCD46F5882E3747DEF36E956E97", /* Gy */ + ecc_oid_secp112r2, /* oid/oidSz */ + ecc_oid_secp112r2_sz, + ECC_SECP112R2_OID, /* oid sum */ + 4, /* cofactor */ + }, + #endif /* HAVE_ECC_SECPR2 */ +#endif /* ECC112 */ +#ifdef ECC128 + #ifndef NO_ECC_SECP + { + 16, /* size/bytes */ + ECC_SECP128R1, /* ID */ + "SECP128R1", /* curve name */ + "FFFFFFFDFFFFFFFFFFFFFFFFFFFFFFFF", /* prime */ + "FFFFFFFDFFFFFFFFFFFFFFFFFFFFFFFC", /* A */ + "E87579C11079F43DD824993C2CEE5ED3", /* B */ + "FFFFFFFE0000000075A30D1B9038A115", /* order */ + "161FF7528B899B2D0C28607CA52C5B86", /* Gx */ + "CF5AC8395BAFEB13C02DA292DDED7A83", /* Gy */ + ecc_oid_secp128r1, /* oid/oidSz */ + ecc_oid_secp128r1_sz, + ECC_SECP128R1_OID, /* oid sum */ + 1, /* cofactor */ + }, + #endif /* !NO_ECC_SECP */ + #ifdef HAVE_ECC_SECPR2 + { + 16, /* size/bytes */ + ECC_SECP128R2, /* ID */ + "SECP128R2", /* curve name */ + "FFFFFFFDFFFFFFFFFFFFFFFFFFFFFFFF", /* prime */ + "D6031998D1B3BBFEBF59CC9BBFF9AEE1", /* A */ + "5EEEFCA380D02919DC2C6558BB6D8A5D", /* B */ + "3FFFFFFF7FFFFFFFBE0024720613B5A3", /* order */ + "7B6AA5D85E572983E6FB32A7CDEBC140", /* Gx */ + "27B6916A894D3AEE7106FE805FC34B44", /* Gy */ + ecc_oid_secp128r2, /* oid/oidSz */ + ecc_oid_secp128r2_sz, + ECC_SECP128R2_OID, /* oid sum */ + 4, /* cofactor */ + }, + #endif /* HAVE_ECC_SECPR2 */ +#endif /* ECC128 */ +#ifdef ECC160 + #ifndef NO_ECC_SECP + { + 20, /* size/bytes */ + ECC_SECP160R1, /* ID */ + "SECP160R1", /* curve name */ + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF7FFFFFFF", /* prime */ + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF7FFFFFFC", /* A */ + "1C97BEFC54BD7A8B65ACF89F81D4D4ADC565FA45", /* B */ + "100000000000000000001F4C8F927AED3CA752257",/* order */ + "4A96B5688EF573284664698968C38BB913CBFC82", /* Gx */ + "23A628553168947D59DCC912042351377AC5FB32", /* Gy */ + ecc_oid_secp160r1, /* oid/oidSz */ + ecc_oid_secp160r1_sz, + ECC_SECP160R1_OID, /* oid sum */ + 1, /* cofactor */ + }, + #endif /* !NO_ECC_SECP */ + #ifdef HAVE_ECC_SECPR2 + { + 20, /* size/bytes */ + ECC_SECP160R2, /* ID */ + "SECP160R2", /* curve name */ + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFAC73", /* prime */ + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFAC70", /* A */ + "B4E134D3FB59EB8BAB57274904664D5AF50388BA", /* B */ + "100000000000000000000351EE786A818F3A1A16B",/* order */ + "52DCB034293A117E1F4FF11B30F7199D3144CE6D", /* Gx */ + "FEAFFEF2E331F296E071FA0DF9982CFEA7D43F2E", /* Gy */ + ecc_oid_secp160r2, /* oid/oidSz */ + ecc_oid_secp160r2_sz, + ECC_SECP160R2_OID, /* oid sum */ + 1, /* cofactor */ + }, + #endif /* HAVE_ECC_SECPR2 */ + #ifdef HAVE_ECC_KOBLITZ + { + 20, /* size/bytes */ + ECC_SECP160K1, /* ID */ + "SECP160K1", /* curve name */ + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFAC73", /* prime */ + "0000000000000000000000000000000000000000", /* A */ + "0000000000000000000000000000000000000007", /* B */ + "100000000000000000001B8FA16DFAB9ACA16B6B3",/* order */ + "3B4C382CE37AA192A4019E763036F4F5DD4D7EBB", /* Gx */ + "938CF935318FDCED6BC28286531733C3F03C4FEE", /* Gy */ + ecc_oid_secp160k1, /* oid/oidSz */ + ecc_oid_secp160k1_sz, + ECC_SECP160K1_OID, /* oid sum */ + 1, /* cofactor */ + }, + #endif /* HAVE_ECC_KOBLITZ */ + #ifdef HAVE_ECC_BRAINPOOL + { + 20, /* size/bytes */ + ECC_BRAINPOOLP160R1, /* ID */ + "BRAINPOOLP160R1", /* curve name */ + "E95E4A5F737059DC60DFC7AD95B3D8139515620F", /* prime */ + "340E7BE2A280EB74E2BE61BADA745D97E8F7C300", /* A */ + "1E589A8595423412134FAA2DBDEC95C8D8675E58", /* B */ + "E95E4A5F737059DC60DF5991D45029409E60FC09", /* order */ + "BED5AF16EA3F6A4F62938C4631EB5AF7BDBCDBC3", /* Gx */ + "1667CB477A1A8EC338F94741669C976316DA6321", /* Gy */ + ecc_oid_brainpoolp160r1, /* oid/oidSz */ + ecc_oid_brainpoolp160r1_sz, + ECC_BRAINPOOLP160R1_OID, /* oid sum */ + 1, /* cofactor */ + }, + #endif /* HAVE_ECC_BRAINPOOL */ +#endif /* ECC160 */ +#ifdef ECC192 + #ifndef NO_ECC_SECP + { + 24, /* size/bytes */ + ECC_SECP192R1, /* ID */ + "SECP192R1", /* curve name */ + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFFFFFFFFFF", /* prime */ + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFFFFFFFFFC", /* A */ + "64210519E59C80E70FA7E9AB72243049FEB8DEECC146B9B1", /* B */ + "FFFFFFFFFFFFFFFFFFFFFFFF99DEF836146BC9B1B4D22831", /* order */ + "188DA80EB03090F67CBF20EB43A18800F4FF0AFD82FF1012", /* Gx */ + "7192B95FFC8DA78631011ED6B24CDD573F977A11E794811", /* Gy */ + ecc_oid_secp192r1, /* oid/oidSz */ + ecc_oid_secp192r1_sz, + ECC_SECP192R1_OID, /* oid sum */ + 1, /* cofactor */ + }, + #endif /* !NO_ECC_SECP */ + #ifdef HAVE_ECC_SECPR2 + { + 24, /* size/bytes */ + ECC_PRIME192V2, /* ID */ + "PRIME192V2", /* curve name */ + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFFFFFFFFFF", /* prime */ + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFFFFFFFFFC", /* A */ + "CC22D6DFB95C6B25E49C0D6364A4E5980C393AA21668D953", /* B */ + "FFFFFFFFFFFFFFFFFFFFFFFE5FB1A724DC80418648D8DD31", /* order */ + "EEA2BAE7E1497842F2DE7769CFE9C989C072AD696F48034A", /* Gx */ + "6574D11D69B6EC7A672BB82A083DF2F2B0847DE970B2DE15", /* Gy */ + ecc_oid_prime192v2, /* oid/oidSz */ + ecc_oid_prime192v2_sz, + ECC_PRIME192V2_OID, /* oid sum */ + 1, /* cofactor */ + }, + #endif /* HAVE_ECC_SECPR2 */ + #ifdef HAVE_ECC_SECPR3 + { + 24, /* size/bytes */ + ECC_PRIME192V3, /* ID */ + "PRIME192V3", /* curve name */ + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFFFFFFFFFF", /* prime */ + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFFFFFFFFFC", /* A */ + "22123DC2395A05CAA7423DAECCC94760A7D462256BD56916", /* B */ + "FFFFFFFFFFFFFFFFFFFFFFFF7A62D031C83F4294F640EC13", /* order */ + "7D29778100C65A1DA1783716588DCE2B8B4AEE8E228F1896", /* Gx */ + "38A90F22637337334B49DCB66A6DC8F9978ACA7648A943B0", /* Gy */ + ecc_oid_prime192v3, /* oid/oidSz */ + ecc_oid_prime192v3_sz, + ECC_PRIME192V3_OID, /* oid sum */ + 1, /* cofactor */ + }, + #endif /* HAVE_ECC_SECPR3 */ + #ifdef HAVE_ECC_KOBLITZ + { + 24, /* size/bytes */ + ECC_SECP192K1, /* ID */ + "SECP192K1", /* curve name */ + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFEE37", /* prime */ + "000000000000000000000000000000000000000000000000", /* A */ + "000000000000000000000000000000000000000000000003", /* B */ + "FFFFFFFFFFFFFFFFFFFFFFFE26F2FC170F69466A74DEFD8D", /* order */ + "DB4FF10EC057E9AE26B07D0280B7F4341DA5D1B1EAE06C7D", /* Gx */ + "9B2F2F6D9C5628A7844163D015BE86344082AA88D95E2F9D", /* Gy */ + ecc_oid_secp192k1, /* oid/oidSz */ + ecc_oid_secp192k1_sz, + ECC_SECP192K1_OID, /* oid sum */ + 1, /* cofactor */ + }, + #endif /* HAVE_ECC_KOBLITZ */ + #ifdef HAVE_ECC_BRAINPOOL + { + 24, /* size/bytes */ + ECC_BRAINPOOLP192R1, /* ID */ + "BRAINPOOLP192R1", /* curve name */ + "C302F41D932A36CDA7A3463093D18DB78FCE476DE1A86297", /* prime */ + "6A91174076B1E0E19C39C031FE8685C1CAE040E5C69A28EF", /* A */ + "469A28EF7C28CCA3DC721D044F4496BCCA7EF4146FBF25C9", /* B */ + "C302F41D932A36CDA7A3462F9E9E916B5BE8F1029AC4ACC1", /* order */ + "C0A0647EAAB6A48753B033C56CB0F0900A2F5C4853375FD6", /* Gx */ + "14B690866ABD5BB88B5F4828C1490002E6773FA2FA299B8F", /* Gy */ + ecc_oid_brainpoolp192r1, /* oid/oidSz */ + ecc_oid_brainpoolp192r1_sz, + ECC_BRAINPOOLP192R1_OID, /* oid sum */ + 1, /* cofactor */ + }, + #endif /* HAVE_ECC_BRAINPOOL */ +#endif /* ECC192 */ +#ifdef ECC224 + #ifndef NO_ECC_SECP + { + 28, /* size/bytes */ + ECC_SECP224R1, /* ID */ + "SECP224R1", /* curve name */ + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF000000000000000000000001", /* prime */ + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFE", /* A */ + "B4050A850C04B3ABF54132565044B0B7D7BFD8BA270B39432355FFB4", /* B */ + "FFFFFFFFFFFFFFFFFFFFFFFFFFFF16A2E0B8F03E13DD29455C5C2A3D", /* order */ + "B70E0CBD6BB4BF7F321390B94A03C1D356C21122343280D6115C1D21", /* Gx */ + "BD376388B5F723FB4C22DFE6CD4375A05A07476444D5819985007E34", /* Gy */ + ecc_oid_secp224r1, /* oid/oidSz */ + ecc_oid_secp224r1_sz, + ECC_SECP224R1_OID, /* oid sum */ + 1, /* cofactor */ + }, + #endif /* !NO_ECC_SECP */ + #ifdef HAVE_ECC_KOBLITZ + { + 28, /* size/bytes */ + ECC_SECP224K1, /* ID */ + "SECP224K1", /* curve name */ + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFE56D", /* prime */ + "00000000000000000000000000000000000000000000000000000000", /* A */ + "00000000000000000000000000000000000000000000000000000005", /* B */ + "10000000000000000000000000001DCE8D2EC6184CAF0A971769FB1F7",/* order */ + "A1455B334DF099DF30FC28A169A467E9E47075A90F7E650EB6B7A45C", /* Gx */ + "7E089FED7FBA344282CAFBD6F7E319F7C0B0BD59E2CA4BDB556D61A5", /* Gy */ + ecc_oid_secp224k1, /* oid/oidSz */ + ecc_oid_secp224k1_sz, + ECC_SECP224K1_OID, /* oid sum */ + 1, /* cofactor */ + }, + #endif /* HAVE_ECC_KOBLITZ */ + #ifdef HAVE_ECC_BRAINPOOL + { + 28, /* size/bytes */ + ECC_BRAINPOOLP224R1, /* ID */ + "BRAINPOOLP224R1", /* curve name */ + "D7C134AA264366862A18302575D1D787B09F075797DA89F57EC8C0FF", /* prime */ + "68A5E62CA9CE6C1C299803A6C1530B514E182AD8B0042A59CAD29F43", /* A */ + "2580F63CCFE44138870713B1A92369E33E2135D266DBB372386C400B", /* B */ + "D7C134AA264366862A18302575D0FB98D116BC4B6DDEBCA3A5A7939F", /* order */ + "0D9029AD2C7E5CF4340823B2A87DC68C9E4CE3174C1E6EFDEE12C07D", /* Gx */ + "58AA56F772C0726F24C6B89E4ECDAC24354B9E99CAA3F6D3761402CD", /* Gy */ + ecc_oid_brainpoolp224r1, /* oid/oidSz */ + ecc_oid_brainpoolp224r1_sz, + ECC_BRAINPOOLP224R1_OID, /* oid sum */ + 1, /* cofactor */ + }, + #endif /* HAVE_ECC_BRAINPOOL */ +#endif /* ECC224 */ +#ifdef ECC239 + #ifndef NO_ECC_SECP + { + 30, /* size/bytes */ + ECC_PRIME239V1, /* ID */ + "PRIME239V1", /* curve name */ + "7FFFFFFFFFFFFFFFFFFFFFFF7FFFFFFFFFFF8000000000007FFFFFFFFFFF", /* prime */ + "7FFFFFFFFFFFFFFFFFFFFFFF7FFFFFFFFFFF8000000000007FFFFFFFFFFC", /* A */ + "6B016C3BDCF18941D0D654921475CA71A9DB2FB27D1D37796185C2942C0A", /* B */ + "7FFFFFFFFFFFFFFFFFFFFFFF7FFFFF9E5E9A9F5D9071FBD1522688909D0B", /* order */ + "0FFA963CDCA8816CCC33B8642BEDF905C3D358573D3F27FBBD3B3CB9AAAF", /* Gx */ + "7DEBE8E4E90A5DAE6E4054CA530BA04654B36818CE226B39FCCB7B02F1AE", /* Gy */ + ecc_oid_prime239v1, /* oid/oidSz */ + ecc_oid_prime239v1_sz, + ECC_PRIME239V1_OID, /* oid sum */ + 1, /* cofactor */ + }, + #endif /* !NO_ECC_SECP */ + #ifdef HAVE_ECC_SECPR2 + { + 30, /* size/bytes */ + ECC_PRIME239V2, /* ID */ + "PRIME239V2", /* curve name */ + "7FFFFFFFFFFFFFFFFFFFFFFF7FFFFFFFFFFF8000000000007FFFFFFFFFFF", /* prime */ + "7FFFFFFFFFFFFFFFFFFFFFFF7FFFFFFFFFFF8000000000007FFFFFFFFFFC", /* A */ + "617FAB6832576CBBFED50D99F0249C3FEE58B94BA0038C7AE84C8C832F2C", /* B */ + "7FFFFFFFFFFFFFFFFFFFFFFF800000CFA7E8594377D414C03821BC582063", /* order */ + "38AF09D98727705120C921BB5E9E26296A3CDCF2F35757A0EAFD87B830E7", /* Gx */ + "5B0125E4DBEA0EC7206DA0FC01D9B081329FB555DE6EF460237DFF8BE4BA", /* Gy */ + ecc_oid_prime239v2, /* oid/oidSz */ + ecc_oid_prime239v2_sz, + ECC_PRIME239V2_OID, /* oid sum */ + 1, /* cofactor */ + }, + #endif /* HAVE_ECC_SECPR2 */ + #ifdef HAVE_ECC_SECPR3 + { + 30, /* size/bytes */ + ECC_PRIME239V3, /* ID */ + "PRIME239V3", /* curve name */ + "7FFFFFFFFFFFFFFFFFFFFFFF7FFFFFFFFFFF8000000000007FFFFFFFFFFF", /* prime */ + "7FFFFFFFFFFFFFFFFFFFFFFF7FFFFFFFFFFF8000000000007FFFFFFFFFFC", /* A */ + "255705FA2A306654B1F4CB03D6A750A30C250102D4988717D9BA15AB6D3E", /* B */ + "7FFFFFFFFFFFFFFFFFFFFFFF7FFFFF975DEB41B3A6057C3C432146526551", /* order */ + "6768AE8E18BB92CFCF005C949AA2C6D94853D0E660BBF854B1C9505FE95A", /* Gx */ + "1607E6898F390C06BC1D552BAD226F3B6FCFE48B6E818499AF18E3ED6CF3", /* Gy */ + ecc_oid_prime239v3, /* oid/oidSz */ + ecc_oid_prime239v3_sz, + ECC_PRIME239V3_OID, /* oid sum */ + 1, /* cofactor */ + }, + #endif /* HAVE_ECC_SECPR3 */ +#endif /* ECC239 */ +#ifdef ECC256 + #ifndef NO_ECC_SECP + { + 32, /* size/bytes */ + ECC_SECP256R1, /* ID */ + "SECP256R1", /* curve name */ + "FFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFF", /* prime */ + "FFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFC", /* A */ + "5AC635D8AA3A93E7B3EBBD55769886BC651D06B0CC53B0F63BCE3C3E27D2604B", /* B */ + "FFFFFFFF00000000FFFFFFFFFFFFFFFFBCE6FAADA7179E84F3B9CAC2FC632551", /* order */ + "6B17D1F2E12C4247F8BCE6E563A440F277037D812DEB33A0F4A13945D898C296", /* Gx */ + "4FE342E2FE1A7F9B8EE7EB4A7C0F9E162BCE33576B315ECECBB6406837BF51F5", /* Gy */ + ecc_oid_secp256r1, /* oid/oidSz */ + ecc_oid_secp256r1_sz, + ECC_SECP256R1_OID, /* oid sum */ + 1, /* cofactor */ + }, + #endif /* !NO_ECC_SECP */ + #ifdef HAVE_ECC_KOBLITZ + { + 32, /* size/bytes */ + ECC_SECP256K1, /* ID */ + "SECP256K1", /* curve name */ + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F", /* prime */ + "0000000000000000000000000000000000000000000000000000000000000000", /* A */ + "0000000000000000000000000000000000000000000000000000000000000007", /* B */ + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141", /* order */ + "79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798", /* Gx */ + "483ADA7726A3C4655DA4FBFC0E1108A8FD17B448A68554199C47D08FFB10D4B8", /* Gy */ + ecc_oid_secp256k1, /* oid/oidSz */ + ecc_oid_secp256k1_sz, + ECC_SECP256K1_OID, /* oid sum */ + 1, /* cofactor */ + }, + #endif /* HAVE_ECC_KOBLITZ */ + #ifdef HAVE_ECC_BRAINPOOL + { + 32, /* size/bytes */ + ECC_BRAINPOOLP256R1, /* ID */ + "BRAINPOOLP256R1", /* curve name */ + "A9FB57DBA1EEA9BC3E660A909D838D726E3BF623D52620282013481D1F6E5377", /* prime */ + "7D5A0975FC2C3057EEF67530417AFFE7FB8055C126DC5C6CE94A4B44F330B5D9", /* A */ + "26DC5C6CE94A4B44F330B5D9BBD77CBF958416295CF7E1CE6BCCDC18FF8C07B6", /* B */ + "A9FB57DBA1EEA9BC3E660A909D838D718C397AA3B561A6F7901E0E82974856A7", /* order */ + "8BD2AEB9CB7E57CB2C4B482FFC81B7AFB9DE27E1E3BD23C23A4453BD9ACE3262", /* Gx */ + "547EF835C3DAC4FD97F8461A14611DC9C27745132DED8E545C1D54C72F046997", /* Gy */ + ecc_oid_brainpoolp256r1, /* oid/oidSz */ + ecc_oid_brainpoolp256r1_sz, + ECC_BRAINPOOLP256R1_OID, /* oid sum */ + 1, /* cofactor */ + }, + #endif /* HAVE_ECC_BRAINPOOL */ +#endif /* ECC256 */ +#ifdef ECC320 + #ifdef HAVE_ECC_BRAINPOOL + { + 40, /* size/bytes */ + ECC_BRAINPOOLP320R1, /* ID */ + "BRAINPOOLP320R1", /* curve name */ + "D35E472036BC4FB7E13C785ED201E065F98FCFA6F6F40DEF4F92B9EC7893EC28FCD412B1F1B32E27", /* prime */ + "3EE30B568FBAB0F883CCEBD46D3F3BB8A2A73513F5EB79DA66190EB085FFA9F492F375A97D860EB4", /* A */ + "520883949DFDBC42D3AD198640688A6FE13F41349554B49ACC31DCCD884539816F5EB4AC8FB1F1A6", /* B */ + "D35E472036BC4FB7E13C785ED201E065F98FCFA5B68F12A32D482EC7EE8658E98691555B44C59311", /* order */ + "43BD7E9AFB53D8B85289BCC48EE5BFE6F20137D10A087EB6E7871E2A10A599C710AF8D0D39E20611", /* Gx */ + "14FDD05545EC1CC8AB4093247F77275E0743FFED117182EAA9C77877AAAC6AC7D35245D1692E8EE1", /* Gy */ + ecc_oid_brainpoolp320r1, ecc_oid_brainpoolp320r1_sz, /* oid/oidSz */ + ECC_BRAINPOOLP320R1_OID, /* oid sum */ + 1, /* cofactor */ + }, + #endif /* HAVE_ECC_BRAINPOOL */ +#endif /* ECC320 */ +#ifdef ECC384 + #ifndef NO_ECC_SECP + { + 48, /* size/bytes */ + ECC_SECP384R1, /* ID */ + "SECP384R1", /* curve name */ + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFF0000000000000000FFFFFFFF", /* prime */ + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFF0000000000000000FFFFFFFC", /* A */ + "B3312FA7E23EE7E4988E056BE3F82D19181D9C6EFE8141120314088F5013875AC656398D8A2ED19D2A85C8EDD3EC2AEF", /* B */ + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFC7634D81F4372DDF581A0DB248B0A77AECEC196ACCC52973", /* order */ + "AA87CA22BE8B05378EB1C71EF320AD746E1D3B628BA79B9859F741E082542A385502F25DBF55296C3A545E3872760AB7", /* Gx */ + "3617DE4A96262C6F5D9E98BF9292DC29F8F41DBD289A147CE9DA3113B5F0B8C00A60B1CE1D7E819D7A431D7C90EA0E5F", /* Gy */ + ecc_oid_secp384r1, ecc_oid_secp384r1_sz, /* oid/oidSz */ + ECC_SECP384R1_OID, /* oid sum */ + 1, /* cofactor */ + }, + #endif /* !NO_ECC_SECP */ + #ifdef HAVE_ECC_BRAINPOOL + { + 48, /* size/bytes */ + ECC_BRAINPOOLP384R1, /* ID */ + "BRAINPOOLP384R1", /* curve name */ + "8CB91E82A3386D280F5D6F7E50E641DF152F7109ED5456B412B1DA197FB71123ACD3A729901D1A71874700133107EC53", /* prime */ + "7BC382C63D8C150C3C72080ACE05AFA0C2BEA28E4FB22787139165EFBA91F90F8AA5814A503AD4EB04A8C7DD22CE2826", /* A */ + "04A8C7DD22CE28268B39B55416F0447C2FB77DE107DCD2A62E880EA53EEB62D57CB4390295DBC9943AB78696FA504C11", /* B */ + "8CB91E82A3386D280F5D6F7E50E641DF152F7109ED5456B31F166E6CAC0425A7CF3AB6AF6B7FC3103B883202E9046565", /* order */ + "1D1C64F068CF45FFA2A63A81B7C13F6B8847A3E77EF14FE3DB7FCAFE0CBD10E8E826E03436D646AAEF87B2E247D4AF1E", /* Gx */ + "8ABE1D7520F9C2A45CB1EB8E95CFD55262B70B29FEEC5864E19C054FF99129280E4646217791811142820341263C5315", /* Gy */ + ecc_oid_brainpoolp384r1, ecc_oid_brainpoolp384r1_sz, /* oid/oidSz */ + ECC_BRAINPOOLP384R1_OID, /* oid sum */ + 1, /* cofactor */ + }, + #endif /* HAVE_ECC_BRAINPOOL */ +#endif /* ECC384 */ +#ifdef ECC512 + #ifdef HAVE_ECC_BRAINPOOL + { + 64, /* size/bytes */ + ECC_BRAINPOOLP512R1, /* ID */ + "BRAINPOOLP512R1", /* curve name */ + "AADD9DB8DBE9C48B3FD4E6AE33C9FC07CB308DB3B3C9D20ED6639CCA703308717D4D9B009BC66842AECDA12AE6A380E62881FF2F2D82C68528AA6056583A48F3", /* prime */ + "7830A3318B603B89E2327145AC234CC594CBDD8D3DF91610A83441CAEA9863BC2DED5D5AA8253AA10A2EF1C98B9AC8B57F1117A72BF2C7B9E7C1AC4D77FC94CA", /* A */ + "3DF91610A83441CAEA9863BC2DED5D5AA8253AA10A2EF1C98B9AC8B57F1117A72BF2C7B9E7C1AC4D77FC94CADC083E67984050B75EBAE5DD2809BD638016F723", /* B */ + "AADD9DB8DBE9C48B3FD4E6AE33C9FC07CB308DB3B3C9D20ED6639CCA70330870553E5C414CA92619418661197FAC10471DB1D381085DDADDB58796829CA90069", /* order */ + "81AEE4BDD82ED9645A21322E9C4C6A9385ED9F70B5D916C1B43B62EEF4D0098EFF3B1F78E2D0D48D50D1687B93B97D5F7C6D5047406A5E688B352209BCB9F822", /* Gx */ + "7DDE385D566332ECC0EABFA9CF7822FDF209F70024A57B1AA000C55B881F8111B2DCDE494A5F485E5BCA4BD88A2763AED1CA2B2FA8F0540678CD1E0F3AD80892", /* Gy */ + ecc_oid_brainpoolp512r1, ecc_oid_brainpoolp512r1_sz, /* oid/oidSz */ + ECC_BRAINPOOLP512R1_OID, /* oid sum */ + 1, /* cofactor */ + }, + #endif /* HAVE_ECC_BRAINPOOL */ +#endif /* ECC512 */ +#ifdef ECC521 + #ifndef NO_ECC_SECP + { + 66, /* size/bytes */ + ECC_SECP521R1, /* ID */ + "SECP521R1", /* curve name */ + "1FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", /* prime */ + "1FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFC", /* A */ + "51953EB9618E1C9A1F929A21A0B68540EEA2DA725B99B315F3B8B489918EF109E156193951EC7E937B1652C0BD3BB1BF073573DF883D2C34F1EF451FD46B503F00", /* B */ + "1FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFA51868783BF2F966B7FCC0148F709A5D03BB5C9B8899C47AEBB6FB71E91386409", /* order */ + "C6858E06B70404E9CD9E3ECB662395B4429C648139053FB521F828AF606B4D3DBAA14B5E77EFE75928FE1DC127A2FFA8DE3348B3C1856A429BF97E7E31C2E5BD66", /* Gx */ + "11839296A789A3BC0045C8A5FB42C7D1BD998F54449579B446817AFBD17273E662C97EE72995EF42640C550B9013FAD0761353C7086A272C24088BE94769FD16650", /* Gy */ + ecc_oid_secp521r1, ecc_oid_secp521r1_sz, /* oid/oidSz */ + ECC_SECP521R1_OID, /* oid sum */ + 1, /* cofactor */ + }, + #endif /* !NO_ECC_SECP */ +#endif /* ECC521 */ +#if defined(WOLFSSL_CUSTOM_CURVES) && defined(ECC_CACHE_CURVE) + /* place holder for custom curve index for cache */ + { + 1, /* non-zero */ + ECC_CURVE_CUSTOM, + #ifndef WOLFSSL_ECC_CURVE_STATIC + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + #else + {0},{0},{0},{0},{0},{0},{0},{0}, + #endif + 0, 0, 0 + }, +#endif + { + 0, + ECC_CURVE_INVALID, + #ifndef WOLFSSL_ECC_CURVE_STATIC + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + #else + {0},{0},{0},{0},{0},{0},{0},{0}, + #endif + 0, 0, 0 + } +}; +#define ECC_SET_COUNT (sizeof(ecc_sets)/sizeof(ecc_set_type)) +const size_t ecc_sets_count = ECC_SET_COUNT - 1; + + +#ifdef HAVE_OID_ENCODING + /* encoded OID cache */ + typedef struct { + word32 oidSz; + byte oid[ECC_MAX_OID_LEN]; + } oid_cache_t; + static oid_cache_t ecc_oid_cache[ECC_SET_COUNT]; +#endif + + +#ifdef HAVE_COMP_KEY +static int wc_ecc_export_x963_compressed(ecc_key*, byte* out, word32* outLen); +#endif + + +#if (defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || !defined(WOLFSSL_SP_MATH)) && \ + !defined(WOLFSSL_ATECC508A) +static int ecc_check_pubkey_order(ecc_key* key, ecc_point* pubkey, mp_int* a, + mp_int* prime, mp_int* order); +#endif + +int mp_jacobi(mp_int* a, mp_int* n, int* c); +int mp_sqrtmod_prime(mp_int* n, mp_int* prime, mp_int* ret); + + +/* Curve Specs */ +typedef struct ecc_curve_spec { + const ecc_set_type* dp; + + mp_int* prime; + mp_int* Af; + #ifdef USE_ECC_B_PARAM + mp_int* Bf; + #endif + mp_int* order; + mp_int* Gx; + mp_int* Gy; + +#ifdef ECC_CACHE_CURVE + mp_int prime_lcl; + mp_int Af_lcl; + #ifdef USE_ECC_B_PARAM + mp_int Bf_lcl; + #endif + mp_int order_lcl; + mp_int Gx_lcl; + mp_int Gy_lcl; +#else + mp_int* spec_ints; + word32 spec_count; + word32 spec_use; +#endif + + byte load_mask; +} ecc_curve_spec; + +enum ecc_curve_load_mask { + ECC_CURVE_FIELD_NONE = 0x00, + ECC_CURVE_FIELD_PRIME = 0x01, + ECC_CURVE_FIELD_AF = 0x02, +#ifdef USE_ECC_B_PARAM + ECC_CURVE_FIELD_BF = 0x04, +#endif + ECC_CURVE_FIELD_ORDER = 0x08, + ECC_CURVE_FIELD_GX = 0x10, + ECC_CURVE_FIELD_GY = 0x20, +#ifdef USE_ECC_B_PARAM + ECC_CURVE_FIELD_ALL = 0x3F, + ECC_CURVE_FIELD_COUNT = 6, +#else + ECC_CURVE_FIELD_ALL = 0x3B, + ECC_CURVE_FIELD_COUNT = 5, +#endif +}; + +#ifdef ECC_CACHE_CURVE + /* cache (mp_int) of the curve parameters */ + static ecc_curve_spec* ecc_curve_spec_cache[ECC_SET_COUNT]; + #ifndef SINGLE_THREADED + static wolfSSL_Mutex ecc_curve_cache_mutex; + #endif + + #define DECLARE_CURVE_SPECS(curve, intcount) ecc_curve_spec* curve = NULL + #define ALLOC_CURVE_SPECS(intcount) + #define FREE_CURVE_SPECS() +#elif defined(WOLFSSL_SMALL_STACK) + #define DECLARE_CURVE_SPECS(curve, intcount) \ + mp_int* spec_ints = NULL; \ + ecc_curve_spec curve_lcl; \ + ecc_curve_spec* curve = &curve_lcl; \ + XMEMSET(curve, 0, sizeof(ecc_curve_spec)); \ + curve->spec_count = intcount + + #define ALLOC_CURVE_SPECS(intcount) \ + spec_ints = (mp_int*)XMALLOC(sizeof(mp_int) * (intcount), NULL, \ + DYNAMIC_TYPE_ECC); \ + if (spec_ints == NULL) \ + return MEMORY_E; \ + curve->spec_ints = spec_ints + #define FREE_CURVE_SPECS() \ + XFREE(spec_ints, NULL, DYNAMIC_TYPE_ECC) +#else + #define DECLARE_CURVE_SPECS(curve, intcount) \ + mp_int spec_ints[(intcount)]; \ + ecc_curve_spec curve_lcl; \ + ecc_curve_spec* curve = &curve_lcl; \ + XMEMSET(curve, 0, sizeof(ecc_curve_spec)); \ + curve->spec_ints = spec_ints; \ + curve->spec_count = intcount + #define ALLOC_CURVE_SPECS(intcount) + #define FREE_CURVE_SPECS() +#endif /* ECC_CACHE_CURVE */ + +static void _wc_ecc_curve_free(ecc_curve_spec* curve) +{ + if (curve == NULL) { + return; + } + + if (curve->load_mask & ECC_CURVE_FIELD_PRIME) + mp_clear(curve->prime); + if (curve->load_mask & ECC_CURVE_FIELD_AF) + mp_clear(curve->Af); +#ifdef USE_ECC_B_PARAM + if (curve->load_mask & ECC_CURVE_FIELD_BF) + mp_clear(curve->Bf); +#endif + if (curve->load_mask & ECC_CURVE_FIELD_ORDER) + mp_clear(curve->order); + if (curve->load_mask & ECC_CURVE_FIELD_GX) + mp_clear(curve->Gx); + if (curve->load_mask & ECC_CURVE_FIELD_GY) + mp_clear(curve->Gy); + + curve->load_mask = 0; +} + +static void wc_ecc_curve_free(ecc_curve_spec* curve) +{ + /* don't free cached curves */ +#ifndef ECC_CACHE_CURVE + _wc_ecc_curve_free(curve); +#endif + (void)curve; +} + +static int wc_ecc_curve_load_item(const char* src, mp_int** dst, + ecc_curve_spec* curve, byte mask) +{ + int err; + +#ifndef ECC_CACHE_CURVE + /* get mp_int from temp */ + if (curve->spec_use >= curve->spec_count) { + WOLFSSL_MSG("Invalid DECLARE_CURVE_SPECS count"); + return ECC_BAD_ARG_E; + } + *dst = &curve->spec_ints[curve->spec_use++]; +#endif + + err = mp_init(*dst); + if (err == MP_OKAY) { + curve->load_mask |= mask; + + err = mp_read_radix(*dst, src, MP_RADIX_HEX); + + #ifdef HAVE_WOLF_BIGINT + if (err == MP_OKAY) + err = wc_mp_to_bigint(*dst, &(*dst)->raw); + #endif + } + return err; +} + +static int wc_ecc_curve_load(const ecc_set_type* dp, ecc_curve_spec** pCurve, + byte load_mask) +{ + int ret = 0, x; + ecc_curve_spec* curve; + byte load_items = 0; /* mask of items to load */ + + if (dp == NULL || pCurve == NULL) + return BAD_FUNC_ARG; + +#ifdef ECC_CACHE_CURVE + x = wc_ecc_get_curve_idx(dp->id); + if (x == ECC_CURVE_INVALID) + return ECC_BAD_ARG_E; + +#if !defined(SINGLE_THREADED) + ret = wc_LockMutex(&ecc_curve_cache_mutex); + if (ret != 0) { + return ret; + } +#endif + + /* make sure cache has been allocated */ + if (ecc_curve_spec_cache[x] == NULL) { + ecc_curve_spec_cache[x] = (ecc_curve_spec*)XMALLOC( + sizeof(ecc_curve_spec), NULL, DYNAMIC_TYPE_ECC); + if (ecc_curve_spec_cache[x] == NULL) { + #if defined(ECC_CACHE_CURVE) && !defined(SINGLE_THREADED) + wc_UnLockMutex(&ecc_curve_cache_mutex); + #endif + return MEMORY_E; + } + XMEMSET(ecc_curve_spec_cache[x], 0, sizeof(ecc_curve_spec)); + } + + /* set curve pointer to cache */ + *pCurve = ecc_curve_spec_cache[x]; + +#endif /* ECC_CACHE_CURVE */ + curve = *pCurve; + + /* make sure the curve is initialized */ + if (curve->dp != dp) { + curve->load_mask = 0; + + #ifdef ECC_CACHE_CURVE + curve->prime = &curve->prime_lcl; + curve->Af = &curve->Af_lcl; + #ifdef USE_ECC_B_PARAM + curve->Bf = &curve->Bf_lcl; + #endif + curve->order = &curve->order_lcl; + curve->Gx = &curve->Gx_lcl; + curve->Gy = &curve->Gy_lcl; + #endif + } + curve->dp = dp; /* set dp info */ + + /* determine items to load */ + load_items = (((byte)~(word32)curve->load_mask) & load_mask); + curve->load_mask |= load_items; + + /* load items */ + x = 0; + if (load_items & ECC_CURVE_FIELD_PRIME) + x += wc_ecc_curve_load_item(dp->prime, &curve->prime, curve, + ECC_CURVE_FIELD_PRIME); + if (load_items & ECC_CURVE_FIELD_AF) + x += wc_ecc_curve_load_item(dp->Af, &curve->Af, curve, + ECC_CURVE_FIELD_AF); +#ifdef USE_ECC_B_PARAM + if (load_items & ECC_CURVE_FIELD_BF) + x += wc_ecc_curve_load_item(dp->Bf, &curve->Bf, curve, + ECC_CURVE_FIELD_BF); +#endif + if (load_items & ECC_CURVE_FIELD_ORDER) + x += wc_ecc_curve_load_item(dp->order, &curve->order, curve, + ECC_CURVE_FIELD_ORDER); + if (load_items & ECC_CURVE_FIELD_GX) + x += wc_ecc_curve_load_item(dp->Gx, &curve->Gx, curve, + ECC_CURVE_FIELD_GX); + if (load_items & ECC_CURVE_FIELD_GY) + x += wc_ecc_curve_load_item(dp->Gy, &curve->Gy, curve, + ECC_CURVE_FIELD_GY); + + /* check for error */ + if (x != 0) { + wc_ecc_curve_free(curve); + ret = MP_READ_E; + } + +#if defined(ECC_CACHE_CURVE) && !defined(SINGLE_THREADED) + wc_UnLockMutex(&ecc_curve_cache_mutex); +#endif + + return ret; +} + +#ifdef ECC_CACHE_CURVE +int wc_ecc_curve_cache_init(void) +{ + int ret = 0; +#if defined(ECC_CACHE_CURVE) && !defined(SINGLE_THREADED) + ret = wc_InitMutex(&ecc_curve_cache_mutex); +#endif + return ret; +} + +void wc_ecc_curve_cache_free(void) +{ + int x; + + /* free all ECC curve caches */ + for (x = 0; x < (int)ECC_SET_COUNT; x++) { + if (ecc_curve_spec_cache[x]) { + _wc_ecc_curve_free(ecc_curve_spec_cache[x]); + XFREE(ecc_curve_spec_cache[x], NULL, DYNAMIC_TYPE_ECC); + ecc_curve_spec_cache[x] = NULL; + } + } + +#if defined(ECC_CACHE_CURVE) && !defined(SINGLE_THREADED) + wc_FreeMutex(&ecc_curve_cache_mutex); +#endif +} +#endif /* ECC_CACHE_CURVE */ + + +/* Retrieve the curve name for the ECC curve id. + * + * curve_id The id of the curve. + * returns the name stored from the curve if available, otherwise NULL. + */ +const char* wc_ecc_get_name(int curve_id) +{ + int curve_idx = wc_ecc_get_curve_idx(curve_id); + if (curve_idx == ECC_CURVE_INVALID) + return NULL; + return ecc_sets[curve_idx].name; +} + +int wc_ecc_set_curve(ecc_key* key, int keysize, int curve_id) +{ + if (keysize <= 0 && curve_id < 0) { + return BAD_FUNC_ARG; + } + + if (keysize > ECC_MAXSIZE) { + return ECC_BAD_ARG_E; + } + + /* handle custom case */ + if (key->idx != ECC_CUSTOM_IDX) { + int x; + + /* default values */ + key->idx = 0; + key->dp = NULL; + + /* find ecc_set based on curve_id or key size */ + for (x = 0; ecc_sets[x].size != 0; x++) { + if (curve_id > ECC_CURVE_DEF) { + if (curve_id == ecc_sets[x].id) + break; + } + else if (keysize <= ecc_sets[x].size) { + break; + } + } + if (ecc_sets[x].size == 0) { + WOLFSSL_MSG("ECC Curve not found"); + return ECC_CURVE_OID_E; + } + + key->idx = x; + key->dp = &ecc_sets[x]; + } + + return 0; +} + + +#ifdef ALT_ECC_SIZE +static void alt_fp_init(mp_int* a) +{ + a->size = FP_SIZE_ECC; + mp_zero(a); +} +#endif /* ALT_ECC_SIZE */ + + +#ifndef WOLFSSL_ATECC508A + +#if !defined(WOLFSSL_SP_MATH) || defined(WOLFSSL_PUBLIC_ECC_ADD_DBL) + +/** + Add two ECC points + P The point to add + Q The point to add + R [out] The destination of the double + a ECC curve parameter a + modulus The modulus of the field the ECC curve is in + mp The "b" value from montgomery_setup() + return MP_OKAY on success +*/ +int ecc_projective_add_point(ecc_point* P, ecc_point* Q, ecc_point* R, + mp_int* a, mp_int* modulus, mp_digit mp) +{ +#ifndef WOLFSSL_SP_MATH +#ifdef WOLFSSL_SMALL_STACK + mp_int* t1 = NULL; + mp_int* t2 = NULL; +#ifdef ALT_ECC_SIZE + mp_int* rx = NULL; + mp_int* ry = NULL; + mp_int* rz = NULL; +#endif +#else + mp_int t1[1], t2[1]; +#ifdef ALT_ECC_SIZE + mp_int rx[1], ry[1], rz[1]; +#endif +#endif + mp_int *x, *y, *z; + int err; + + if (P == NULL || Q == NULL || R == NULL || modulus == NULL) { + return ECC_BAD_ARG_E; + } + + /* if Q == R then swap P and Q, so we don't require a local x,y,z */ + if (Q == R) { + ecc_point* tPt = P; + P = Q; + Q = tPt; + } + +#ifdef WOLFSSL_SMALL_STACK +#ifdef WOLFSSL_SMALL_STACK_CACHE + if (R->key != NULL) { + t1 = R->key->t1; + t2 = R->key->t2; +#ifdef ALT_ECC_SIZE + rx = R->key->x; + ry = R->key->y; + rz = R->key->z; +#endif + } + else +#endif /* WOLFSSL_SMALL_STACK_CACHE */ + { + t1 = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC); + t2 = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC); + if (t1 == NULL || t2 == NULL) { + XFREE(t1, NULL, DYNAMIC_TYPE_ECC); + XFREE(t2, NULL, DYNAMIC_TYPE_ECC); + return MEMORY_E; + } +#ifdef ALT_ECC_SIZE + rx = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC); + ry = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC); + rz = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC); + if (rx == NULL || ry == NULL || rz == NULL) { + XFREE(rz, NULL, DYNAMIC_TYPE_ECC); + XFREE(ry, NULL, DYNAMIC_TYPE_ECC); + XFREE(rx, NULL, DYNAMIC_TYPE_ECC); + XFREE(t2, NULL, DYNAMIC_TYPE_ECC); + XFREE(t1, NULL, DYNAMIC_TYPE_ECC); + return MEMORY_E; + } +#endif + } +#endif /* WOLFSSL_SMALL_STACK */ + + if ((err = mp_init_multi(t1, t2, NULL, NULL, NULL, NULL)) != MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + #ifdef WOLFSSL_SMALL_STACK_CACHE + if (R->key == NULL) + #endif + { + #ifdef ALT_ECC_SIZE + XFREE(rz, NULL, DYNAMIC_TYPE_ECC); + XFREE(ry, NULL, DYNAMIC_TYPE_ECC); + XFREE(rx, NULL, DYNAMIC_TYPE_ECC); + #endif + XFREE(t2, NULL, DYNAMIC_TYPE_ECC); + XFREE(t1, NULL, DYNAMIC_TYPE_ECC); + } +#endif + return err; + } + + /* should we dbl instead? */ + if (err == MP_OKAY) + err = mp_sub(modulus, Q->y, t1); + if (err == MP_OKAY) { + if ( (mp_cmp(P->x, Q->x) == MP_EQ) && + (get_digit_count(Q->z) && mp_cmp(P->z, Q->z) == MP_EQ) && + (mp_cmp(P->y, Q->y) == MP_EQ || mp_cmp(P->y, t1) == MP_EQ)) { + mp_clear(t1); + mp_clear(t2); + #ifdef WOLFSSL_SMALL_STACK + #ifdef WOLFSSL_SMALL_STACK_CACHE + if (R->key == NULL) + #endif + { + #ifdef ALT_ECC_SIZE + XFREE(rz, NULL, DYNAMIC_TYPE_ECC); + XFREE(ry, NULL, DYNAMIC_TYPE_ECC); + XFREE(rx, NULL, DYNAMIC_TYPE_ECC); + #endif + XFREE(t2, NULL, DYNAMIC_TYPE_ECC); + XFREE(t1, NULL, DYNAMIC_TYPE_ECC); + } + #endif + return ecc_projective_dbl_point(P, R, a, modulus, mp); + } + } + + if (err != MP_OKAY) { + goto done; + } + +/* If use ALT_ECC_SIZE we need to use local stack variable since + ecc_point x,y,z is reduced size */ +#ifdef ALT_ECC_SIZE + /* Use local stack variable */ + x = rx; + y = ry; + z = rz; + + if ((err = mp_init_multi(x, y, z, NULL, NULL, NULL)) != MP_OKAY) { + goto done; + } +#else + /* Use destination directly */ + x = R->x; + y = R->y; + z = R->z; +#endif + + if (err == MP_OKAY) + err = mp_copy(P->x, x); + if (err == MP_OKAY) + err = mp_copy(P->y, y); + if (err == MP_OKAY) + err = mp_copy(P->z, z); + + /* if Z is one then these are no-operations */ + if (err == MP_OKAY) { + if (!mp_iszero(Q->z)) { + /* T1 = Z' * Z' */ + err = mp_sqr(Q->z, t1); + if (err == MP_OKAY) + err = mp_montgomery_reduce(t1, modulus, mp); + + /* X = X * T1 */ + if (err == MP_OKAY) + err = mp_mul(t1, x, x); + if (err == MP_OKAY) + err = mp_montgomery_reduce(x, modulus, mp); + + /* T1 = Z' * T1 */ + if (err == MP_OKAY) + err = mp_mul(Q->z, t1, t1); + if (err == MP_OKAY) + err = mp_montgomery_reduce(t1, modulus, mp); + + /* Y = Y * T1 */ + if (err == MP_OKAY) + err = mp_mul(t1, y, y); + if (err == MP_OKAY) + err = mp_montgomery_reduce(y, modulus, mp); + } + } + + /* T1 = Z*Z */ + if (err == MP_OKAY) + err = mp_sqr(z, t1); + if (err == MP_OKAY) + err = mp_montgomery_reduce(t1, modulus, mp); + + /* T2 = X' * T1 */ + if (err == MP_OKAY) + err = mp_mul(Q->x, t1, t2); + if (err == MP_OKAY) + err = mp_montgomery_reduce(t2, modulus, mp); + + /* T1 = Z * T1 */ + if (err == MP_OKAY) + err = mp_mul(z, t1, t1); + if (err == MP_OKAY) + err = mp_montgomery_reduce(t1, modulus, mp); + + /* T1 = Y' * T1 */ + if (err == MP_OKAY) + err = mp_mul(Q->y, t1, t1); + if (err == MP_OKAY) + err = mp_montgomery_reduce(t1, modulus, mp); + + /* Y = Y - T1 */ + if (err == MP_OKAY) + err = mp_sub(y, t1, y); + if (err == MP_OKAY) { + if (mp_isneg(y)) + err = mp_add(y, modulus, y); + } + /* T1 = 2T1 */ + if (err == MP_OKAY) + err = mp_add(t1, t1, t1); + if (err == MP_OKAY) { + if (mp_cmp(t1, modulus) != MP_LT) + err = mp_sub(t1, modulus, t1); + } + /* T1 = Y + T1 */ + if (err == MP_OKAY) + err = mp_add(t1, y, t1); + if (err == MP_OKAY) { + if (mp_cmp(t1, modulus) != MP_LT) + err = mp_sub(t1, modulus, t1); + } + /* X = X - T2 */ + if (err == MP_OKAY) + err = mp_sub(x, t2, x); + if (err == MP_OKAY) { + if (mp_isneg(x)) + err = mp_add(x, modulus, x); + } + /* T2 = 2T2 */ + if (err == MP_OKAY) + err = mp_add(t2, t2, t2); + if (err == MP_OKAY) { + if (mp_cmp(t2, modulus) != MP_LT) + err = mp_sub(t2, modulus, t2); + } + /* T2 = X + T2 */ + if (err == MP_OKAY) + err = mp_add(t2, x, t2); + if (err == MP_OKAY) { + if (mp_cmp(t2, modulus) != MP_LT) + err = mp_sub(t2, modulus, t2); + } + + if (err == MP_OKAY) { + if (!mp_iszero(Q->z)) { + /* Z = Z * Z' */ + err = mp_mul(z, Q->z, z); + if (err == MP_OKAY) + err = mp_montgomery_reduce(z, modulus, mp); + } + } + + /* Z = Z * X */ + if (err == MP_OKAY) + err = mp_mul(z, x, z); + if (err == MP_OKAY) + err = mp_montgomery_reduce(z, modulus, mp); + + /* T1 = T1 * X */ + if (err == MP_OKAY) + err = mp_mul(t1, x, t1); + if (err == MP_OKAY) + err = mp_montgomery_reduce(t1, modulus, mp); + + /* X = X * X */ + if (err == MP_OKAY) + err = mp_sqr(x, x); + if (err == MP_OKAY) + err = mp_montgomery_reduce(x, modulus, mp); + + /* T2 = T2 * x */ + if (err == MP_OKAY) + err = mp_mul(t2, x, t2); + if (err == MP_OKAY) + err = mp_montgomery_reduce(t2, modulus, mp); + + /* T1 = T1 * X */ + if (err == MP_OKAY) + err = mp_mul(t1, x, t1); + if (err == MP_OKAY) + err = mp_montgomery_reduce(t1, modulus, mp); + + /* X = Y*Y */ + if (err == MP_OKAY) + err = mp_sqr(y, x); + if (err == MP_OKAY) + err = mp_montgomery_reduce(x, modulus, mp); + + /* X = X - T2 */ + if (err == MP_OKAY) + err = mp_sub(x, t2, x); + if (err == MP_OKAY) { + if (mp_isneg(x)) + err = mp_add(x, modulus, x); + } + /* T2 = T2 - X */ + if (err == MP_OKAY) + err = mp_sub(t2, x, t2); + if (err == MP_OKAY) { + if (mp_isneg(t2)) + err = mp_add(t2, modulus, t2); + } + /* T2 = T2 - X */ + if (err == MP_OKAY) + err = mp_sub(t2, x, t2); + if (err == MP_OKAY) { + if (mp_isneg(t2)) + err = mp_add(t2, modulus, t2); + } + /* T2 = T2 * Y */ + if (err == MP_OKAY) + err = mp_mul(t2, y, t2); + if (err == MP_OKAY) + err = mp_montgomery_reduce(t2, modulus, mp); + + /* Y = T2 - T1 */ + if (err == MP_OKAY) + err = mp_sub(t2, t1, y); + if (err == MP_OKAY) { + if (mp_isneg(y)) + err = mp_add(y, modulus, y); + } + /* Y = Y/2 */ + if (err == MP_OKAY) { + if (mp_isodd(y) == MP_YES) + err = mp_add(y, modulus, y); + } + if (err == MP_OKAY) + err = mp_div_2(y, y); + +#ifdef ALT_ECC_SIZE + if (err == MP_OKAY) + err = mp_copy(x, R->x); + if (err == MP_OKAY) + err = mp_copy(y, R->y); + if (err == MP_OKAY) + err = mp_copy(z, R->z); +#endif + +done: + + /* clean up */ + mp_clear(t1); + mp_clear(t2); +#ifdef WOLFSSL_SMALL_STACK +#ifdef WOLFSSL_SMALL_STACK_CACHE + if (R->key == NULL) +#endif + { + #ifdef ALT_ECC_SIZE + XFREE(rz, NULL, DYNAMIC_TYPE_ECC); + XFREE(ry, NULL, DYNAMIC_TYPE_ECC); + XFREE(rx, NULL, DYNAMIC_TYPE_ECC); + #endif + XFREE(t2, NULL, DYNAMIC_TYPE_ECC); + XFREE(t1, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +#else + if (P == NULL || Q == NULL || R == NULL || modulus == NULL) { + return ECC_BAD_ARG_E; + } + + (void)a; + (void)mp; + +#ifndef WOLFSSL_SP_NO_256 + if (mp_count_bits(modulus) == 256) { + return sp_ecc_proj_add_point_256(P->x, P->y, P->z, Q->x, Q->y, Q->z, + R->x, R->y, R->z); + } +#endif +#ifdef WOLFSSL_SP_384 + if (mp_count_bits(modulus) == 384) { + return sp_ecc_proj_add_point_384(P->x, P->y, P->z, Q->x, Q->y, Q->z, + R->x, R->y, R->z); + } +#endif + return ECC_BAD_ARG_E; +#endif +} + +/* ### Point doubling in Jacobian coordinate system ### + * + * let us have a curve: y^2 = x^3 + a*x + b + * in Jacobian coordinates it becomes: y^2 = x^3 + a*x*z^4 + b*z^6 + * + * The doubling of P = (Xp, Yp, Zp) is given by R = (Xr, Yr, Zr) where: + * Xr = M^2 - 2*S + * Yr = M * (S - Xr) - 8*T + * Zr = 2 * Yp * Zp + * + * M = 3 * Xp^2 + a*Zp^4 + * T = Yp^4 + * S = 4 * Xp * Yp^2 + * + * SPECIAL CASE: when a == 3 we can compute M as + * M = 3 * (Xp^2 - Zp^4) = 3 * (Xp + Zp^2) * (Xp - Zp^2) + */ + +/** + Double an ECC point + P The point to double + R [out] The destination of the double + a ECC curve parameter a + modulus The modulus of the field the ECC curve is in + mp The "b" value from montgomery_setup() + return MP_OKAY on success +*/ +int ecc_projective_dbl_point(ecc_point *P, ecc_point *R, mp_int* a, + mp_int* modulus, mp_digit mp) +{ +#ifndef WOLFSSL_SP_MATH +#ifdef WOLFSSL_SMALL_STACK + mp_int* t1 = NULL; + mp_int* t2 = NULL; +#ifdef ALT_ECC_SIZE + mp_int* rx = NULL; + mp_int* ry = NULL; + mp_int* rz = NULL; +#endif +#else + mp_int t1[1], t2[1]; +#ifdef ALT_ECC_SIZE + mp_int rx[1], ry[1], rz[1]; +#endif +#endif + mp_int *x, *y, *z; + int err; + + if (P == NULL || R == NULL || modulus == NULL) + return ECC_BAD_ARG_E; + +#ifdef WOLFSSL_SMALL_STACK +#ifdef WOLFSSL_SMALL_STACK_CACHE + if (R->key != NULL) { + t1 = R->key->t1; + t2 = R->key->t2; + #ifdef ALT_ECC_SIZE + rx = R->key->x; + ry = R->key->y; + rz = R->key->z; + #endif + } + else +#endif /* WOLFSSL_SMALL_STACK_CACHE */ + { + t1 = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC); + t2 = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC); + if (t1 == NULL || t2 == NULL) { + XFREE(t2, NULL, DYNAMIC_TYPE_ECC); + XFREE(t1, NULL, DYNAMIC_TYPE_ECC); + return MEMORY_E; + } + #ifdef ALT_ECC_SIZE + rx = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC); + ry = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC); + rz = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC); + if (rx == NULL || ry == NULL || rz == NULL) { + XFREE(rz, NULL, DYNAMIC_TYPE_ECC); + XFREE(ry, NULL, DYNAMIC_TYPE_ECC); + XFREE(rx, NULL, DYNAMIC_TYPE_ECC); + XFREE(t2, NULL, DYNAMIC_TYPE_ECC); + XFREE(t1, NULL, DYNAMIC_TYPE_ECC); + return MEMORY_E; + } + #endif + } +#endif + + if ((err = mp_init_multi(t1, t2, NULL, NULL, NULL, NULL)) != MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK +#ifdef WOLFSSL_SMALL_STACK_CACHE + if (R->key == NULL) +#endif + { + #ifdef ALT_ECC_SIZE + XFREE(rz, NULL, DYNAMIC_TYPE_ECC); + XFREE(ry, NULL, DYNAMIC_TYPE_ECC); + XFREE(rx, NULL, DYNAMIC_TYPE_ECC); + #endif + XFREE(t2, NULL, DYNAMIC_TYPE_ECC); + XFREE(t1, NULL, DYNAMIC_TYPE_ECC); + } +#endif + return err; + } + +/* If use ALT_ECC_SIZE we need to use local stack variable since + ecc_point x,y,z is reduced size */ +#ifdef ALT_ECC_SIZE + /* Use local stack variable */ + x = rx; + y = ry; + z = rz; + + if ((err = mp_init_multi(x, y, z, NULL, NULL, NULL)) != MP_OKAY) { + mp_clear(t1); + mp_clear(t2); + #ifdef WOLFSSL_SMALL_STACK + #ifdef WOLFSSL_SMALL_STACK_CACHE + if (R->key == NULL) + #endif + { + #ifdef ALT_ECC_SIZE + XFREE(rz, NULL, DYNAMIC_TYPE_ECC); + XFREE(ry, NULL, DYNAMIC_TYPE_ECC); + XFREE(rx, NULL, DYNAMIC_TYPE_ECC); + #endif + XFREE(t2, NULL, DYNAMIC_TYPE_ECC); + XFREE(t1, NULL, DYNAMIC_TYPE_ECC); + } + #endif + return err; + } +#else + /* Use destination directly */ + x = R->x; + y = R->y; + z = R->z; +#endif + + if (err == MP_OKAY) + err = mp_copy(P->x, x); + if (err == MP_OKAY) + err = mp_copy(P->y, y); + if (err == MP_OKAY) + err = mp_copy(P->z, z); + + /* T1 = Z * Z */ + if (err == MP_OKAY) + err = mp_sqr(z, t1); + if (err == MP_OKAY) + err = mp_montgomery_reduce(t1, modulus, mp); + + /* Z = Y * Z */ + if (err == MP_OKAY) + err = mp_mul(z, y, z); + if (err == MP_OKAY) + err = mp_montgomery_reduce(z, modulus, mp); + + /* Z = 2Z */ + if (err == MP_OKAY) + err = mp_add(z, z, z); + if (err == MP_OKAY) { + if (mp_cmp(z, modulus) != MP_LT) + err = mp_sub(z, modulus, z); + } + + /* Determine if curve "a" should be used in calc */ +#ifdef WOLFSSL_CUSTOM_CURVES + if (err == MP_OKAY) { + /* Use a and prime to determine if a == 3 */ + err = mp_submod(modulus, a, modulus, t2); + } + if (err == MP_OKAY && mp_cmp_d(t2, 3) != MP_EQ) { + /* use "a" in calc */ + + /* T2 = T1 * T1 */ + if (err == MP_OKAY) + err = mp_sqr(t1, t2); + if (err == MP_OKAY) + err = mp_montgomery_reduce(t2, modulus, mp); + /* T1 = T2 * a */ + if (err == MP_OKAY) + err = mp_mulmod(t2, a, modulus, t1); + /* T2 = X * X */ + if (err == MP_OKAY) + err = mp_sqr(x, t2); + if (err == MP_OKAY) + err = mp_montgomery_reduce(t2, modulus, mp); + /* T1 = T2 + T1 */ + if (err == MP_OKAY) + err = mp_add(t1, t2, t1); + if (err == MP_OKAY) { + if (mp_cmp(t1, modulus) != MP_LT) + err = mp_sub(t1, modulus, t1); + } + /* T1 = T2 + T1 */ + if (err == MP_OKAY) + err = mp_add(t1, t2, t1); + if (err == MP_OKAY) { + if (mp_cmp(t1, modulus) != MP_LT) + err = mp_sub(t1, modulus, t1); + } + /* T1 = T2 + T1 */ + if (err == MP_OKAY) + err = mp_add(t1, t2, t1); + if (err == MP_OKAY) { + if (mp_cmp(t1, modulus) != MP_LT) + err = mp_sub(t1, modulus, t1); + } + } + else +#endif /* WOLFSSL_CUSTOM_CURVES */ + { + /* assumes "a" == 3 */ + (void)a; + + /* T2 = X - T1 */ + if (err == MP_OKAY) + err = mp_sub(x, t1, t2); + if (err == MP_OKAY) { + if (mp_isneg(t2)) + err = mp_add(t2, modulus, t2); + } + /* T1 = X + T1 */ + if (err == MP_OKAY) + err = mp_add(t1, x, t1); + if (err == MP_OKAY) { + if (mp_cmp(t1, modulus) != MP_LT) + err = mp_sub(t1, modulus, t1); + } + /* T2 = T1 * T2 */ + if (err == MP_OKAY) + err = mp_mul(t1, t2, t2); + if (err == MP_OKAY) + err = mp_montgomery_reduce(t2, modulus, mp); + + /* T1 = 2T2 */ + if (err == MP_OKAY) + err = mp_add(t2, t2, t1); + if (err == MP_OKAY) { + if (mp_cmp(t1, modulus) != MP_LT) + err = mp_sub(t1, modulus, t1); + } + /* T1 = T1 + T2 */ + if (err == MP_OKAY) + err = mp_add(t1, t2, t1); + if (err == MP_OKAY) { + if (mp_cmp(t1, modulus) != MP_LT) + err = mp_sub(t1, modulus, t1); + } + } + + /* Y = 2Y */ + if (err == MP_OKAY) + err = mp_add(y, y, y); + if (err == MP_OKAY) { + if (mp_cmp(y, modulus) != MP_LT) + err = mp_sub(y, modulus, y); + } + /* Y = Y * Y */ + if (err == MP_OKAY) + err = mp_sqr(y, y); + if (err == MP_OKAY) + err = mp_montgomery_reduce(y, modulus, mp); + + /* T2 = Y * Y */ + if (err == MP_OKAY) + err = mp_sqr(y, t2); + if (err == MP_OKAY) + err = mp_montgomery_reduce(t2, modulus, mp); + + /* T2 = T2/2 */ + if (err == MP_OKAY) { + if (mp_isodd(t2) == MP_YES) + err = mp_add(t2, modulus, t2); + } + if (err == MP_OKAY) + err = mp_div_2(t2, t2); + + /* Y = Y * X */ + if (err == MP_OKAY) + err = mp_mul(y, x, y); + if (err == MP_OKAY) + err = mp_montgomery_reduce(y, modulus, mp); + + /* X = T1 * T1 */ + if (err == MP_OKAY) + err = mp_sqr(t1, x); + if (err == MP_OKAY) + err = mp_montgomery_reduce(x, modulus, mp); + + /* X = X - Y */ + if (err == MP_OKAY) + err = mp_sub(x, y, x); + if (err == MP_OKAY) { + if (mp_isneg(x)) + err = mp_add(x, modulus, x); + } + /* X = X - Y */ + if (err == MP_OKAY) + err = mp_sub(x, y, x); + if (err == MP_OKAY) { + if (mp_isneg(x)) + err = mp_add(x, modulus, x); + } + + /* Y = Y - X */ + if (err == MP_OKAY) + err = mp_sub(y, x, y); + if (err == MP_OKAY) { + if (mp_isneg(y)) + err = mp_add(y, modulus, y); + } + /* Y = Y * T1 */ + if (err == MP_OKAY) + err = mp_mul(y, t1, y); + if (err == MP_OKAY) + err = mp_montgomery_reduce(y, modulus, mp); + + /* Y = Y - T2 */ + if (err == MP_OKAY) + err = mp_sub(y, t2, y); + if (err == MP_OKAY) { + if (mp_isneg(y)) + err = mp_add(y, modulus, y); + } + +#ifdef ALT_ECC_SIZE + if (err == MP_OKAY) + err = mp_copy(x, R->x); + if (err == MP_OKAY) + err = mp_copy(y, R->y); + if (err == MP_OKAY) + err = mp_copy(z, R->z); +#endif + + /* clean up */ + mp_clear(t1); + mp_clear(t2); + +#ifdef WOLFSSL_SMALL_STACK +#ifdef WOLFSSL_SMALL_STACK_CACHE + if (R->key == NULL) +#endif + { + #ifdef ALT_ECC_SIZE + XFREE(rz, NULL, DYNAMIC_TYPE_ECC); + XFREE(ry, NULL, DYNAMIC_TYPE_ECC); + XFREE(rx, NULL, DYNAMIC_TYPE_ECC); + #endif + XFREE(t2, NULL, DYNAMIC_TYPE_ECC); + XFREE(t1, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +#else + if (P == NULL || R == NULL || modulus == NULL) + return ECC_BAD_ARG_E; + + (void)a; + (void)mp; + +#ifndef WOLFSSL_SP_NO_256 + if (mp_count_bits(modulus) == 256) { + return sp_ecc_proj_dbl_point_256(P->x, P->y, P->z, R->x, R->y, R->z); + } +#endif +#ifdef WOLFSSL_SP_384 + if (mp_count_bits(modulus) == 384) { + return sp_ecc_proj_dbl_point_384(P->x, P->y, P->z, R->x, R->y, R->z); + } +#endif + return ECC_BAD_ARG_E; +#endif +} + + +/** + Map a projective Jacobian point back to affine space + P [in/out] The point to map + modulus The modulus of the field the ECC curve is in + mp The "b" value from montgomery_setup() + ct Operation should be constant time. + return MP_OKAY on success +*/ +int ecc_map_ex(ecc_point* P, mp_int* modulus, mp_digit mp, int ct) +{ +#ifndef WOLFSSL_SP_MATH +#ifdef WOLFSSL_SMALL_STACK + mp_int* t1 = NULL; + mp_int* t2 = NULL; +#ifdef ALT_ECC_SIZE + mp_int* rx = NULL; + mp_int* ry = NULL; + mp_int* rz = NULL; +#endif +#else + mp_int t1[1], t2[1]; +#ifdef ALT_ECC_SIZE + mp_int rx[1], ry[1], rz[1]; +#endif +#endif /* WOLFSSL_SMALL_STACK */ + mp_int *x, *y, *z; + int err; + + (void)ct; + + if (P == NULL || modulus == NULL) + return ECC_BAD_ARG_E; + + /* special case for point at infinity */ + if (mp_cmp_d(P->z, 0) == MP_EQ) { + err = mp_set(P->x, 0); + if (err == MP_OKAY) + err = mp_set(P->y, 0); + if (err == MP_OKAY) + err = mp_set(P->z, 1); + return err; + } + +#ifdef WOLFSSL_SMALL_STACK +#ifdef WOLFSSL_SMALL_STACK_CACHE + if (P->key != NULL) { + t1 = P->key->t1; + t2 = P->key->t2; + #ifdef ALT_ECC_SIZE + rx = P->key->x; + ry = P->key->y; + rz = P->key->z; + #endif + } + else +#endif /* WOLFSSL_SMALL_STACK_CACHE */ + { + t1 = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC); + t2 = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC); + if (t1 == NULL || t2 == NULL) { + XFREE(t2, NULL, DYNAMIC_TYPE_ECC); + XFREE(t1, NULL, DYNAMIC_TYPE_ECC); + return MEMORY_E; + } +#ifdef ALT_ECC_SIZE + rx = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC); + ry = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC); + rz = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC); + if (rx == NULL || ry == NULL || rz == NULL) { + XFREE(rz, NULL, DYNAMIC_TYPE_ECC); + XFREE(ry, NULL, DYNAMIC_TYPE_ECC); + XFREE(rx, NULL, DYNAMIC_TYPE_ECC); + XFREE(t2, NULL, DYNAMIC_TYPE_ECC); + XFREE(t1, NULL, DYNAMIC_TYPE_ECC); + return MEMORY_E; + } +#endif + } +#endif /* WOLFSSL_SMALL_STACK */ + + if ((err = mp_init_multi(t1, t2, NULL, NULL, NULL, NULL)) != MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK +#ifdef WOLFSSL_SMALL_STACK_CACHE + if (P->key == NULL) +#endif + { + #ifdef ALT_ECC_SIZE + XFREE(rz, NULL, DYNAMIC_TYPE_ECC); + XFREE(ry, NULL, DYNAMIC_TYPE_ECC); + XFREE(rx, NULL, DYNAMIC_TYPE_ECC); + #endif + XFREE(t2, NULL, DYNAMIC_TYPE_ECC); + XFREE(t1, NULL, DYNAMIC_TYPE_ECC); + } +#endif + return MEMORY_E; + } + +#ifdef ALT_ECC_SIZE + /* Use local stack variable */ + x = rx; + y = ry; + z = rz; + + if ((err = mp_init_multi(x, y, z, NULL, NULL, NULL)) != MP_OKAY) { + goto done; + } + + if (err == MP_OKAY) + err = mp_copy(P->x, x); + if (err == MP_OKAY) + err = mp_copy(P->y, y); + if (err == MP_OKAY) + err = mp_copy(P->z, z); + + if (err != MP_OKAY) { + goto done; + } +#else + /* Use destination directly */ + x = P->x; + y = P->y; + z = P->z; +#endif + + /* get 1/z */ + if (err == MP_OKAY) { +#if defined(ECC_TIMING_RESISTANT) && defined(USE_FAST_MATH) + if (ct) { + err = mp_invmod_mont_ct(z, modulus, t1, mp); + if (err == MP_OKAY) + err = mp_montgomery_reduce(t1, modulus, mp); + } + else +#endif + { + /* first map z back to normal */ + err = mp_montgomery_reduce(z, modulus, mp); + if (err == MP_OKAY) + err = mp_invmod(z, modulus, t1); + } + } + + /* get 1/z^2 and 1/z^3 */ + if (err == MP_OKAY) + err = mp_sqr(t1, t2); + if (err == MP_OKAY) + err = mp_mod(t2, modulus, t2); + if (err == MP_OKAY) + err = mp_mul(t1, t2, t1); + if (err == MP_OKAY) + err = mp_mod(t1, modulus, t1); + + /* multiply against x/y */ + if (err == MP_OKAY) + err = mp_mul(x, t2, x); + if (err == MP_OKAY) + err = mp_montgomery_reduce(x, modulus, mp); + if (err == MP_OKAY) + err = mp_mul(y, t1, y); + if (err == MP_OKAY) + err = mp_montgomery_reduce(y, modulus, mp); + + if (err == MP_OKAY) + err = mp_set(z, 1); + +#ifdef ALT_ECC_SIZE + /* return result */ + if (err == MP_OKAY) + err = mp_copy(x, P->x); + if (err == MP_OKAY) + err = mp_copy(y, P->y); + if (err == MP_OKAY) + err = mp_copy(z, P->z); + +done: +#endif + + /* clean up */ + mp_clear(t1); + mp_clear(t2); + +#ifdef WOLFSSL_SMALL_STACK +#ifdef WOLFSSL_SMALL_STACK_CACHE + if (P->key == NULL) +#endif + { + #ifdef ALT_ECC_SIZE + XFREE(rz, NULL, DYNAMIC_TYPE_ECC); + XFREE(ry, NULL, DYNAMIC_TYPE_ECC); + XFREE(rx, NULL, DYNAMIC_TYPE_ECC); + #endif + XFREE(t2, NULL, DYNAMIC_TYPE_ECC); + XFREE(t1, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +#else + if (P == NULL || modulus == NULL) + return ECC_BAD_ARG_E; + + (void)mp; + +#ifndef WOLFSSL_SP_NO_256 + if (mp_count_bits(modulus) == 256) { + return sp_ecc_map_256(P->x, P->y, P->z); + } +#endif +#ifdef WOLFSSL_SP_384 + if (mp_count_bits(modulus) == 384) { + return sp_ecc_map_384(P->x, P->y, P->z); + } +#endif + return ECC_BAD_ARG_E; +#endif +} + +int ecc_map(ecc_point* P, mp_int* modulus, mp_digit mp) +{ + return ecc_map_ex(P, modulus, mp, 0); +} +#endif /* !WOLFSSL_SP_MATH || WOLFSSL_PUBLIC_ECC_ADD_DBL */ + +#if !defined(FREESCALE_LTC_ECC) && !defined(WOLFSSL_STM32_PKA) + +#if !defined(FP_ECC) || !defined(WOLFSSL_SP_MATH) +/** + Perform a point multiplication + k The scalar to multiply by + G The base point + R [out] Destination for kG + a ECC curve parameter a + modulus The modulus of the field the ECC curve is in + map Boolean whether to map back to affine or not + (1==map, 0 == leave in projective) + return MP_OKAY on success +*/ +#ifdef FP_ECC +static int normal_ecc_mulmod(mp_int* k, ecc_point *G, ecc_point *R, + mp_int* a, mp_int* modulus, int map, + void* heap) +#else +int wc_ecc_mulmod_ex(mp_int* k, ecc_point *G, ecc_point *R, + mp_int* a, mp_int* modulus, int map, + void* heap) +#endif +{ +#ifndef WOLFSSL_SP_MATH +#ifndef ECC_TIMING_RESISTANT + /* size of sliding window, don't change this! */ + #define WINSIZE 4 + #define M_POINTS 8 + int first = 1, bitbuf = 0, bitcpy = 0, j; +#elif defined(WC_NO_CACHE_RESISTANT) + #define M_POINTS 4 +#else + #define M_POINTS 5 +#endif + + ecc_point *tG, *M[M_POINTS]; + int i, err; +#ifdef WOLFSSL_SMALL_STACK_CACHE + ecc_key key; +#endif +#ifdef WOLFSSL_SMALL_STACK + mp_int* mu = NULL; +#else + mp_int mu[1]; +#endif + mp_digit mp; + mp_digit buf; + int bitcnt = 0, mode = 0, digidx = 0; + + if (k == NULL || G == NULL || R == NULL || modulus == NULL) { + return ECC_BAD_ARG_E; + } + + /* init variables */ + tG = NULL; + XMEMSET(M, 0, sizeof(M)); +#ifdef WOLFSSL_SMALL_STACK + mu = (mp_int*)XMALLOC(sizeof(mp_int), heap, DYNAMIC_TYPE_ECC); + if (mu == NULL) + return MEMORY_E; +#endif +#ifdef WOLFSSL_SMALL_STACK_CACHE + key.t1 = (mp_int*)XMALLOC(sizeof(mp_int), heap, DYNAMIC_TYPE_ECC); + key.t2 = (mp_int*)XMALLOC(sizeof(mp_int), heap, DYNAMIC_TYPE_ECC); +#ifdef ALT_ECC_SIZE + key.x = (mp_int*)XMALLOC(sizeof(mp_int), heap, DYNAMIC_TYPE_ECC); + key.y = (mp_int*)XMALLOC(sizeof(mp_int), heap, DYNAMIC_TYPE_ECC); + key.z = (mp_int*)XMALLOC(sizeof(mp_int), heap, DYNAMIC_TYPE_ECC); +#endif + if (key.t1 == NULL || key.t2 == NULL +#ifdef ALT_ECC_SIZE + || key.x == NULL || key.y == NULL || key.z == NULL +#endif + ) { +#ifdef ALT_ECC_SIZE + XFREE(key.z, heap, DYNAMIC_TYPE_ECC); + XFREE(key.y, heap, DYNAMIC_TYPE_ECC); + XFREE(key.x, heap, DYNAMIC_TYPE_ECC); +#endif + XFREE(key.t2, heap, DYNAMIC_TYPE_ECC); + XFREE(key.t1, heap, DYNAMIC_TYPE_ECC); + XFREE(mu, heap, DYNAMIC_TYPE_ECC); + return MEMORY_E; + } +#endif /* WOLFSSL_SMALL_STACK_CACHE */ + + /* init montgomery reduction */ + if ((err = mp_montgomery_setup(modulus, &mp)) != MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK_CACHE +#ifdef ALT_ECC_SIZE + XFREE(key.z, heap, DYNAMIC_TYPE_ECC); + XFREE(key.y, heap, DYNAMIC_TYPE_ECC); + XFREE(key.x, heap, DYNAMIC_TYPE_ECC); +#endif + XFREE(key.t2, heap, DYNAMIC_TYPE_ECC); + XFREE(key.t1, heap, DYNAMIC_TYPE_ECC); +#endif /* WOLFSSL_SMALL_STACK_CACHE */ +#ifdef WOLFSSL_SMALL_STACK + XFREE(mu, heap, DYNAMIC_TYPE_ECC); +#endif + return err; + } + + if ((err = mp_init(mu)) != MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK_CACHE +#ifdef ALT_ECC_SIZE + XFREE(key.z, heap, DYNAMIC_TYPE_ECC); + XFREE(key.y, heap, DYNAMIC_TYPE_ECC); + XFREE(key.x, heap, DYNAMIC_TYPE_ECC); +#endif + XFREE(key.t2, heap, DYNAMIC_TYPE_ECC); + XFREE(key.t1, heap, DYNAMIC_TYPE_ECC); +#endif /* WOLFSSL_SMALL_STACK_CACHE */ +#ifdef WOLFSSL_SMALL_STACK + XFREE(mu, heap, DYNAMIC_TYPE_ECC); +#endif + return err; + } + if ((err = mp_montgomery_calc_normalization(mu, modulus)) != MP_OKAY) { + mp_clear(mu); +#ifdef WOLFSSL_SMALL_STACK_CACHE +#ifdef ALT_ECC_SIZE + XFREE(key.z, heap, DYNAMIC_TYPE_ECC); + XFREE(key.y, heap, DYNAMIC_TYPE_ECC); + XFREE(key.x, heap, DYNAMIC_TYPE_ECC); +#endif + XFREE(key.t2, heap, DYNAMIC_TYPE_ECC); + XFREE(key.t1, heap, DYNAMIC_TYPE_ECC); +#endif /* WOLFSSL_SMALL_STACK_CACHE */ +#ifdef WOLFSSL_SMALL_STACK + XFREE(mu, heap, DYNAMIC_TYPE_ECC); +#endif + return err; + } + + /* alloc ram for window temps */ + for (i = 0; i < M_POINTS; i++) { + M[i] = wc_ecc_new_point_h(heap); + if (M[i] == NULL) { + mp_clear(mu); + err = MEMORY_E; goto exit; + } +#ifdef WOLFSSL_SMALL_STACK_CACHE + M[i]->key = &key; +#endif + } + + /* make a copy of G in case R==G */ + tG = wc_ecc_new_point_h(heap); + if (tG == NULL) + err = MEMORY_E; + + /* tG = G and convert to montgomery */ + if (err == MP_OKAY) { + if (mp_cmp_d(mu, 1) == MP_EQ) { + err = mp_copy(G->x, tG->x); + if (err == MP_OKAY) + err = mp_copy(G->y, tG->y); + if (err == MP_OKAY) + err = mp_copy(G->z, tG->z); + } else { + err = mp_mulmod(G->x, mu, modulus, tG->x); + if (err == MP_OKAY) + err = mp_mulmod(G->y, mu, modulus, tG->y); + if (err == MP_OKAY) + err = mp_mulmod(G->z, mu, modulus, tG->z); + } + } + + /* done with mu */ + mp_clear(mu); + +#ifdef WOLFSSL_SMALL_STACK_CACHE + R->key = &key; +#endif +#ifndef ECC_TIMING_RESISTANT + + /* calc the M tab, which holds kG for k==8..15 */ + /* M[0] == 8G */ + if (err == MP_OKAY) + err = ecc_projective_dbl_point(tG, M[0], a, modulus, mp); + if (err == MP_OKAY) + err = ecc_projective_dbl_point(M[0], M[0], a, modulus, mp); + if (err == MP_OKAY) + err = ecc_projective_dbl_point(M[0], M[0], a, modulus, mp); + + /* now find (8+k)G for k=1..7 */ + if (err == MP_OKAY) + for (j = 9; j < 16; j++) { + err = ecc_projective_add_point(M[j-9], tG, M[j-M_POINTS], a, modulus, + mp); + if (err != MP_OKAY) break; + } + + /* setup sliding window */ + if (err == MP_OKAY) { + mode = 0; + bitcnt = 1; + buf = 0; + digidx = get_digit_count(k) - 1; + bitcpy = bitbuf = 0; + first = 1; + + /* perform ops */ + for (;;) { + /* grab next digit as required */ + if (--bitcnt == 0) { + if (digidx == -1) { + break; + } + buf = get_digit(k, digidx); + bitcnt = (int) DIGIT_BIT; + --digidx; + } + + /* grab the next msb from the ltiplicand */ + i = (int)(buf >> (DIGIT_BIT - 1)) & 1; + buf <<= 1; + + /* skip leading zero bits */ + if (mode == 0 && i == 0) + continue; + + /* if the bit is zero and mode == 1 then we double */ + if (mode == 1 && i == 0) { + err = ecc_projective_dbl_point(R, R, a, modulus, mp); + if (err != MP_OKAY) break; + continue; + } + + /* else we add it to the window */ + bitbuf |= (i << (WINSIZE - ++bitcpy)); + mode = 2; + + if (bitcpy == WINSIZE) { + /* if this is the first window we do a simple copy */ + if (first == 1) { + /* R = kG [k = first window] */ + err = mp_copy(M[bitbuf-M_POINTS]->x, R->x); + if (err != MP_OKAY) break; + + err = mp_copy(M[bitbuf-M_POINTS]->y, R->y); + if (err != MP_OKAY) break; + + err = mp_copy(M[bitbuf-M_POINTS]->z, R->z); + first = 0; + } else { + /* normal window */ + /* ok window is filled so double as required and add */ + /* double first */ + for (j = 0; j < WINSIZE; j++) { + err = ecc_projective_dbl_point(R, R, a, modulus, mp); + if (err != MP_OKAY) break; + } + if (err != MP_OKAY) break; /* out of first for(;;) */ + + /* then add, bitbuf will be 8..15 [8..2^WINSIZE] guaranteed */ + err = ecc_projective_add_point(R, M[bitbuf-M_POINTS], R, a, + modulus, mp); + } + if (err != MP_OKAY) break; + /* empty window and reset */ + bitcpy = bitbuf = 0; + mode = 1; + } + } + } + + /* if bits remain then double/add */ + if (err == MP_OKAY) { + if (mode == 2 && bitcpy > 0) { + /* double then add */ + for (j = 0; j < bitcpy; j++) { + /* only double if we have had at least one add first */ + if (first == 0) { + err = ecc_projective_dbl_point(R, R, a, modulus, mp); + if (err != MP_OKAY) break; + } + + bitbuf <<= 1; + if ((bitbuf & (1 << WINSIZE)) != 0) { + if (first == 1) { + /* first add, so copy */ + err = mp_copy(tG->x, R->x); + if (err != MP_OKAY) break; + + err = mp_copy(tG->y, R->y); + if (err != MP_OKAY) break; + + err = mp_copy(tG->z, R->z); + if (err != MP_OKAY) break; + first = 0; + } else { + /* then add */ + err = ecc_projective_add_point(R, tG, R, a, modulus, mp); + if (err != MP_OKAY) break; + } + } + } + } + } + + #undef WINSIZE + +#else /* ECC_TIMING_RESISTANT */ + + /* calc the M tab */ + /* M[0] == G */ + if (err == MP_OKAY) + err = mp_copy(tG->x, M[0]->x); + if (err == MP_OKAY) + err = mp_copy(tG->y, M[0]->y); + if (err == MP_OKAY) + err = mp_copy(tG->z, M[0]->z); + + /* M[1] == 2G */ + if (err == MP_OKAY) + err = ecc_projective_dbl_point(tG, M[1], a, modulus, mp); +#ifdef WC_NO_CACHE_RESISTANT + if (err == MP_OKAY) + err = wc_ecc_copy_point(M[0], M[2]); +#else + if (err == MP_OKAY) + err = wc_ecc_copy_point(M[0], M[3]); + if (err == MP_OKAY) + err = wc_ecc_copy_point(M[1], M[4]); +#endif + + /* setup sliding window */ + mode = 0; + bitcnt = 1; + buf = 0; + digidx = get_digit_count(modulus) - 1; + /* The order MAY be 1 bit longer than the modulus. */ + digidx += (modulus->dp[digidx] >> (DIGIT_BIT-1)); + + /* perform ops */ + if (err == MP_OKAY) { + for (;;) { + /* grab next digit as required */ + if (--bitcnt == 0) { + if (digidx == -1) { + break; + } + buf = get_digit(k, digidx); + bitcnt = (int)DIGIT_BIT; + --digidx; + } + + /* grab the next msb from the multiplicand */ + i = (buf >> (DIGIT_BIT - 1)) & 1; + buf <<= 1; + +#ifdef WC_NO_CACHE_RESISTANT + if (mode == 0) { + /* timing resistant - dummy operations */ + if (err == MP_OKAY) + err = ecc_projective_add_point(M[1], M[2], M[2], a, modulus, + mp); + if (err == MP_OKAY) + err = ecc_projective_dbl_point(M[2], M[3], a, modulus, mp); + } + else { + if (err == MP_OKAY) + err = ecc_projective_add_point(M[0], M[1], M[i^1], a, + modulus, mp); + if (err == MP_OKAY) + err = ecc_projective_dbl_point(M[i], M[i], a, modulus, mp); + } +#else + if (err == MP_OKAY) + err = ecc_projective_add_point(M[0], M[1], M[2], a, modulus, mp); + if (err == MP_OKAY) + err = mp_cond_copy(M[2]->x, i, M[0]->x); + if (err == MP_OKAY) + err = mp_cond_copy(M[2]->y, i, M[0]->y); + if (err == MP_OKAY) + err = mp_cond_copy(M[2]->z, i, M[0]->z); + if (err == MP_OKAY) + err = mp_cond_copy(M[2]->x, i ^ 1, M[1]->x); + if (err == MP_OKAY) + err = mp_cond_copy(M[2]->y, i ^ 1, M[1]->y); + if (err == MP_OKAY) + err = mp_cond_copy(M[2]->z, i ^ 1, M[1]->z); + + if (err == MP_OKAY) + err = mp_cond_copy(M[0]->x, i ^ 1, M[2]->x); + if (err == MP_OKAY) + err = mp_cond_copy(M[0]->y, i ^ 1, M[2]->y); + if (err == MP_OKAY) + err = mp_cond_copy(M[0]->z, i ^ 1, M[2]->z); + if (err == MP_OKAY) + err = mp_cond_copy(M[1]->x, i, M[2]->x); + if (err == MP_OKAY) + err = mp_cond_copy(M[1]->y, i, M[2]->y); + if (err == MP_OKAY) + err = mp_cond_copy(M[1]->z, i, M[2]->z); + + if (err == MP_OKAY) + err = ecc_projective_dbl_point(M[2], M[2], a, modulus, mp); + if (err == MP_OKAY) + err = mp_cond_copy(M[2]->x, i ^ 1, M[0]->x); + if (err == MP_OKAY) + err = mp_cond_copy(M[2]->y, i ^ 1, M[0]->y); + if (err == MP_OKAY) + err = mp_cond_copy(M[2]->z, i ^ 1, M[0]->z); + if (err == MP_OKAY) + err = mp_cond_copy(M[2]->x, i, M[1]->x); + if (err == MP_OKAY) + err = mp_cond_copy(M[2]->y, i, M[1]->y); + if (err == MP_OKAY) + err = mp_cond_copy(M[2]->z, i, M[1]->z); + + if (err == MP_OKAY) + err = mp_cond_copy(M[3]->x, (mode ^ 1) & i, M[0]->x); + if (err == MP_OKAY) + err = mp_cond_copy(M[3]->y, (mode ^ 1) & i, M[0]->y); + if (err == MP_OKAY) + err = mp_cond_copy(M[3]->z, (mode ^ 1) & i, M[0]->z); + if (err == MP_OKAY) + err = mp_cond_copy(M[4]->x, (mode ^ 1) & i, M[1]->x); + if (err == MP_OKAY) + err = mp_cond_copy(M[4]->y, (mode ^ 1) & i, M[1]->y); + if (err == MP_OKAY) + err = mp_cond_copy(M[4]->z, (mode ^ 1) & i, M[1]->z); +#endif /* WC_NO_CACHE_RESISTANT */ + + if (err != MP_OKAY) + break; + + mode |= i; + } /* end for */ + } + + /* copy result out */ + if (err == MP_OKAY) + err = mp_copy(M[0]->x, R->x); + if (err == MP_OKAY) + err = mp_copy(M[0]->y, R->y); + if (err == MP_OKAY) + err = mp_copy(M[0]->z, R->z); + +#endif /* ECC_TIMING_RESISTANT */ + + /* map R back from projective space */ + if (err == MP_OKAY && map) + err = ecc_map(R, modulus, mp); + +exit: + + /* done */ + wc_ecc_del_point_h(tG, heap); + for (i = 0; i < M_POINTS; i++) { + wc_ecc_del_point_h(M[i], heap); + } +#ifdef WOLFSSL_SMALL_STACK_CACHE + R->key = NULL; +#ifdef ALT_ECC_SIZE + XFREE(key.z, heap, DYNAMIC_TYPE_ECC); + XFREE(key.y, heap, DYNAMIC_TYPE_ECC); + XFREE(key.x, heap, DYNAMIC_TYPE_ECC); +#endif + XFREE(key.t2, heap, DYNAMIC_TYPE_ECC); + XFREE(key.t1, heap, DYNAMIC_TYPE_ECC); +#endif /* WOLFSSL_SMALL_STACK_CACHE */ +#ifdef WOLFSSL_SMALL_STACK + XFREE(mu, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +#else + if (k == NULL || G == NULL || R == NULL || modulus == NULL) { + return ECC_BAD_ARG_E; + } + + (void)a; + +#ifndef WOLFSSL_SP_NO_256 + if (mp_count_bits(modulus) == 256) { + return sp_ecc_mulmod_256(k, G, R, map, heap); + } +#endif +#ifdef WOLFSSL_SP_384 + if (mp_count_bits(modulus) == 384) { + return sp_ecc_mulmod_384(k, G, R, map, heap); + } +#endif + return ECC_BAD_ARG_E; +#endif +} + +#endif /* !FP_ECC || !WOLFSSL_SP_MATH */ + +#endif /* !FREESCALE_LTC_ECC && !WOLFSSL_STM32_PKA */ + +/** ECC Fixed Point mulmod global + k The multiplicand + G Base point to multiply + R [out] Destination of product + a ECC curve parameter a + modulus The modulus for the curve + map [boolean] If non-zero maps the point back to affine coordinates, + otherwise it's left in jacobian-montgomery form + return MP_OKAY if successful +*/ +int wc_ecc_mulmod(mp_int* k, ecc_point *G, ecc_point *R, mp_int* a, + mp_int* modulus, int map) +{ + return wc_ecc_mulmod_ex(k, G, R, a, modulus, map, NULL); +} + +#endif /* !WOLFSSL_ATECC508A */ + +/** + * use a heap hint when creating new ecc_point + * return an allocated point on success or NULL on failure + */ +ecc_point* wc_ecc_new_point_h(void* heap) +{ + ecc_point* p; + + (void)heap; + + p = (ecc_point*)XMALLOC(sizeof(ecc_point), heap, DYNAMIC_TYPE_ECC); + if (p == NULL) { + return NULL; + } + XMEMSET(p, 0, sizeof(ecc_point)); + +#ifndef ALT_ECC_SIZE + if (mp_init_multi(p->x, p->y, p->z, NULL, NULL, NULL) != MP_OKAY) { + XFREE(p, heap, DYNAMIC_TYPE_ECC); + return NULL; + } +#else + p->x = (mp_int*)&p->xyz[0]; + p->y = (mp_int*)&p->xyz[1]; + p->z = (mp_int*)&p->xyz[2]; + alt_fp_init(p->x); + alt_fp_init(p->y); + alt_fp_init(p->z); +#endif + + return p; +} + + +/** + Allocate a new ECC point + return A newly allocated point or NULL on error +*/ +ecc_point* wc_ecc_new_point(void) +{ + return wc_ecc_new_point_h(NULL); +} + + +void wc_ecc_del_point_h(ecc_point* p, void* heap) +{ + /* prevents free'ing null arguments */ + if (p != NULL) { + mp_clear(p->x); + mp_clear(p->y); + mp_clear(p->z); + XFREE(p, heap, DYNAMIC_TYPE_ECC); + } + (void)heap; +} + + +/** Free an ECC point from memory + p The point to free +*/ +void wc_ecc_del_point(ecc_point* p) +{ + wc_ecc_del_point_h(p, NULL); +} + + +/** Copy the value of a point to an other one + p The point to copy + r The created point +*/ +int wc_ecc_copy_point(ecc_point* p, ecc_point *r) +{ + int ret; + + /* prevents null arguments */ + if (p == NULL || r == NULL) + return ECC_BAD_ARG_E; + + ret = mp_copy(p->x, r->x); + if (ret != MP_OKAY) + return ret; + ret = mp_copy(p->y, r->y); + if (ret != MP_OKAY) + return ret; + ret = mp_copy(p->z, r->z); + if (ret != MP_OKAY) + return ret; + + return MP_OKAY; +} + +/** Compare the value of a point with an other one + a The point to compare + b The other point to compare + + return MP_EQ if equal, MP_LT/MP_GT if not, < 0 in case of error + */ +int wc_ecc_cmp_point(ecc_point* a, ecc_point *b) +{ + int ret; + + /* prevents null arguments */ + if (a == NULL || b == NULL) + return BAD_FUNC_ARG; + + ret = mp_cmp(a->x, b->x); + if (ret != MP_EQ) + return ret; + ret = mp_cmp(a->y, b->y); + if (ret != MP_EQ) + return ret; + ret = mp_cmp(a->z, b->z); + if (ret != MP_EQ) + return ret; + + return MP_EQ; +} + + +/** Returns whether an ECC idx is valid or not + n The idx number to check + return 1 if valid, 0 if not +*/ +int wc_ecc_is_valid_idx(int n) +{ + int x; + + for (x = 0; ecc_sets[x].size != 0; x++) + ; + /* -1 is a valid index --- indicating that the domain params + were supplied by the user */ + if ((n >= ECC_CUSTOM_IDX) && (n < x)) { + return 1; + } + + return 0; +} + +int wc_ecc_get_curve_idx(int curve_id) +{ + int curve_idx; + for (curve_idx = 0; ecc_sets[curve_idx].size != 0; curve_idx++) { + if (curve_id == ecc_sets[curve_idx].id) + break; + } + if (ecc_sets[curve_idx].size == 0) { + return ECC_CURVE_INVALID; + } + return curve_idx; +} + +int wc_ecc_get_curve_id(int curve_idx) +{ + if (wc_ecc_is_valid_idx(curve_idx)) { + return ecc_sets[curve_idx].id; + } + return ECC_CURVE_INVALID; +} + +/* Returns the curve size that corresponds to a given ecc_curve_id identifier + * + * id curve id, from ecc_curve_id enum in ecc.h + * return curve size, from ecc_sets[] on success, negative on error + */ +int wc_ecc_get_curve_size_from_id(int curve_id) +{ + int curve_idx = wc_ecc_get_curve_idx(curve_id); + if (curve_idx == ECC_CURVE_INVALID) + return ECC_BAD_ARG_E; + return ecc_sets[curve_idx].size; +} + +/* Returns the curve index that corresponds to a given curve name in + * ecc_sets[] of ecc.c + * + * name curve name, from ecc_sets[].name in ecc.c + * return curve index in ecc_sets[] on success, negative on error + */ +int wc_ecc_get_curve_idx_from_name(const char* curveName) +{ + int curve_idx; + word32 len; + + if (curveName == NULL) + return BAD_FUNC_ARG; + + len = (word32)XSTRLEN(curveName); + + for (curve_idx = 0; ecc_sets[curve_idx].size != 0; curve_idx++) { + if ( + #ifndef WOLFSSL_ECC_CURVE_STATIC + ecc_sets[curve_idx].name && + #endif + XSTRNCASECMP(ecc_sets[curve_idx].name, curveName, len) == 0) { + break; + } + } + if (ecc_sets[curve_idx].size == 0) { + WOLFSSL_MSG("ecc_set curve name not found"); + return ECC_CURVE_INVALID; + } + return curve_idx; +} + +/* Returns the curve size that corresponds to a given curve name, + * as listed in ecc_sets[] of ecc.c. + * + * name curve name, from ecc_sets[].name in ecc.c + * return curve size, from ecc_sets[] on success, negative on error + */ +int wc_ecc_get_curve_size_from_name(const char* curveName) +{ + int curve_idx; + + if (curveName == NULL) + return BAD_FUNC_ARG; + + curve_idx = wc_ecc_get_curve_idx_from_name(curveName); + if (curve_idx < 0) + return curve_idx; + + return ecc_sets[curve_idx].size; +} + +/* Returns the curve id that corresponds to a given curve name, + * as listed in ecc_sets[] of ecc.c. + * + * name curve name, from ecc_sets[].name in ecc.c + * return curve id, from ecc_sets[] on success, negative on error + */ +int wc_ecc_get_curve_id_from_name(const char* curveName) +{ + int curve_idx; + + if (curveName == NULL) + return BAD_FUNC_ARG; + + curve_idx = wc_ecc_get_curve_idx_from_name(curveName); + if (curve_idx < 0) + return curve_idx; + + return ecc_sets[curve_idx].id; +} + +/* Compares a curve parameter (hex, from ecc_sets[]) to given input + * parameter for equality. + * encType is WC_TYPE_UNSIGNED_BIN or WC_TYPE_HEX_STR + * Returns MP_EQ on success, negative on error */ +static int wc_ecc_cmp_param(const char* curveParam, + const byte* param, word32 paramSz, int encType) +{ + int err = MP_OKAY; +#ifdef WOLFSSL_SMALL_STACK + mp_int* a = NULL; + mp_int* b = NULL; +#else + mp_int a[1], b[1]; +#endif + + if (param == NULL || curveParam == NULL) + return BAD_FUNC_ARG; + + if (encType == WC_TYPE_HEX_STR) + return XSTRNCMP(curveParam, (char*) param, paramSz); + +#ifdef WOLFSSL_SMALL_STACK + a = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC); + if (a == NULL) + return MEMORY_E; + b = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC); + if (b == NULL) { + XFREE(a, NULL, DYNAMIC_TYPE_ECC); + return MEMORY_E; + } +#endif + + if ((err = mp_init_multi(a, b, NULL, NULL, NULL, NULL)) != MP_OKAY) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(a, NULL, DYNAMIC_TYPE_ECC); + XFREE(b, NULL, DYNAMIC_TYPE_ECC); + #endif + return err; + } + + if (err == MP_OKAY) { + err = mp_read_unsigned_bin(a, param, paramSz); + } + if (err == MP_OKAY) + err = mp_read_radix(b, curveParam, MP_RADIX_HEX); + + if (err == MP_OKAY) { + if (mp_cmp(a, b) != MP_EQ) { + err = -1; + } else { + err = MP_EQ; + } + } + + mp_clear(a); + mp_clear(b); +#ifdef WOLFSSL_SMALL_STACK + XFREE(b, NULL, DYNAMIC_TYPE_ECC); + XFREE(a, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/* Returns the curve id in ecc_sets[] that corresponds to a given set of + * curve parameters. + * + * fieldSize the field size in bits + * prime prime of the finite field + * primeSz size of prime in octets + * Af first coefficient a of the curve + * AfSz size of Af in octets + * Bf second coefficient b of the curve + * BfSz size of Bf in octets + * order curve order + * orderSz size of curve in octets + * Gx affine x coordinate of base point + * GxSz size of Gx in octets + * Gy affine y coordinate of base point + * GySz size of Gy in octets + * cofactor curve cofactor + * + * return curve id, from ecc_sets[] on success, negative on error + */ +int wc_ecc_get_curve_id_from_params(int fieldSize, + const byte* prime, word32 primeSz, const byte* Af, word32 AfSz, + const byte* Bf, word32 BfSz, const byte* order, word32 orderSz, + const byte* Gx, word32 GxSz, const byte* Gy, word32 GySz, int cofactor) +{ + int idx; + int curveSz; + + if (prime == NULL || Af == NULL || Bf == NULL || order == NULL || + Gx == NULL || Gy == NULL) + return BAD_FUNC_ARG; + + curveSz = (fieldSize + 1) / 8; /* round up */ + + for (idx = 0; ecc_sets[idx].size != 0; idx++) { + if (curveSz == ecc_sets[idx].size) { + if ((wc_ecc_cmp_param(ecc_sets[idx].prime, prime, + primeSz, WC_TYPE_UNSIGNED_BIN) == MP_EQ) && + (wc_ecc_cmp_param(ecc_sets[idx].Af, Af, AfSz, + WC_TYPE_UNSIGNED_BIN) == MP_EQ) && + (wc_ecc_cmp_param(ecc_sets[idx].Bf, Bf, BfSz, + WC_TYPE_UNSIGNED_BIN) == MP_EQ) && + (wc_ecc_cmp_param(ecc_sets[idx].order, order, + orderSz, WC_TYPE_UNSIGNED_BIN) == MP_EQ) && + (wc_ecc_cmp_param(ecc_sets[idx].Gx, Gx, GxSz, + WC_TYPE_UNSIGNED_BIN) == MP_EQ) && + (wc_ecc_cmp_param(ecc_sets[idx].Gy, Gy, GySz, + WC_TYPE_UNSIGNED_BIN) == MP_EQ) && + (cofactor == ecc_sets[idx].cofactor)) { + break; + } + } + } + + if (ecc_sets[idx].size == 0) + return ECC_CURVE_INVALID; + + return ecc_sets[idx].id; +} + +/* Returns the curve id in ecc_sets[] that corresponds + * to a given domain parameters pointer. + * + * dp domain parameters pointer + * + * return curve id, from ecc_sets[] on success, negative on error + */ +int wc_ecc_get_curve_id_from_dp_params(const ecc_set_type* dp) +{ + int idx; + + if (dp == NULL + #ifndef WOLFSSL_ECC_CURVE_STATIC + || dp->prime == NULL || dp->Af == NULL || + dp->Bf == NULL || dp->order == NULL || dp->Gx == NULL || dp->Gy == NULL + #endif + ) { + return BAD_FUNC_ARG; + } + + for (idx = 0; ecc_sets[idx].size != 0; idx++) { + if (dp->size == ecc_sets[idx].size) { + if ((wc_ecc_cmp_param(ecc_sets[idx].prime, (const byte*)dp->prime, + (word32)XSTRLEN(dp->prime), WC_TYPE_HEX_STR) == MP_EQ) && + (wc_ecc_cmp_param(ecc_sets[idx].Af, (const byte*)dp->Af, + (word32)XSTRLEN(dp->Af),WC_TYPE_HEX_STR) == MP_EQ) && + (wc_ecc_cmp_param(ecc_sets[idx].Bf, (const byte*)dp->Bf, + (word32)XSTRLEN(dp->Bf),WC_TYPE_HEX_STR) == MP_EQ) && + (wc_ecc_cmp_param(ecc_sets[idx].order, (const byte*)dp->order, + (word32)XSTRLEN(dp->order),WC_TYPE_HEX_STR) == MP_EQ) && + (wc_ecc_cmp_param(ecc_sets[idx].Gx, (const byte*)dp->Gx, + (word32)XSTRLEN(dp->Gx),WC_TYPE_HEX_STR) == MP_EQ) && + (wc_ecc_cmp_param(ecc_sets[idx].Gy, (const byte*)dp->Gy, + (word32)XSTRLEN(dp->Gy),WC_TYPE_HEX_STR) == MP_EQ) && + (dp->cofactor == ecc_sets[idx].cofactor)) { + break; + } + } + } + + if (ecc_sets[idx].size == 0) + return ECC_CURVE_INVALID; + + return ecc_sets[idx].id; +} + +/* Returns the curve id that corresponds to a given OID, + * as listed in ecc_sets[] of ecc.c. + * + * oid OID, from ecc_sets[].name in ecc.c + * len OID len, from ecc_sets[].name in ecc.c + * return curve id, from ecc_sets[] on success, negative on error + */ +int wc_ecc_get_curve_id_from_oid(const byte* oid, word32 len) +{ + int curve_idx; + + if (oid == NULL) + return BAD_FUNC_ARG; + + for (curve_idx = 0; ecc_sets[curve_idx].size != 0; curve_idx++) { + if ( + #ifndef WOLFSSL_ECC_CURVE_STATIC + ecc_sets[curve_idx].oid && + #endif + ecc_sets[curve_idx].oidSz == len && + XMEMCMP(ecc_sets[curve_idx].oid, oid, len) == 0) { + break; + } + } + if (ecc_sets[curve_idx].size == 0) { + WOLFSSL_MSG("ecc_set curve name not found"); + return ECC_CURVE_INVALID; + } + + return ecc_sets[curve_idx].id; +} + +/* Get curve parameters using curve index */ +const ecc_set_type* wc_ecc_get_curve_params(int curve_idx) +{ + const ecc_set_type* ecc_set = NULL; + + if (curve_idx >= 0 && curve_idx < (int)ECC_SET_COUNT) { + ecc_set = &ecc_sets[curve_idx]; + } + return ecc_set; +} + + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) +static WC_INLINE int wc_ecc_alloc_mpint(ecc_key* key, mp_int** mp) +{ + if (key == NULL || mp == NULL) + return BAD_FUNC_ARG; + if (*mp == NULL) { + *mp = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_BIGINT); + if (*mp == NULL) { + return MEMORY_E; + } + XMEMSET(*mp, 0, sizeof(mp_int)); + } + return 0; +} +static WC_INLINE void wc_ecc_free_mpint(ecc_key* key, mp_int** mp) +{ + if (key && mp && *mp) { + mp_clear(*mp); + XFREE(*mp, key->heap, DYNAMIC_TYPE_BIGINT); + *mp = NULL; + } +} + +static int wc_ecc_alloc_async(ecc_key* key) +{ + int err = wc_ecc_alloc_mpint(key, &key->r); + if (err == 0) + err = wc_ecc_alloc_mpint(key, &key->s); + return err; +} + +static void wc_ecc_free_async(ecc_key* key) +{ + wc_ecc_free_mpint(key, &key->r); + wc_ecc_free_mpint(key, &key->s); +#ifdef HAVE_CAVIUM_V + wc_ecc_free_mpint(key, &key->e); + wc_ecc_free_mpint(key, &key->signK); +#endif /* HAVE_CAVIUM_V */ +} +#endif /* WOLFSSL_ASYNC_CRYPT && WC_ASYNC_ENABLE_ECC */ + + +#ifdef HAVE_ECC_DHE +/** + Create an ECC shared secret between two keys + private_key The private ECC key (heap hint based off of private key) + public_key The public key + out [out] Destination of the shared secret + Conforms to EC-DH from ANSI X9.63 + outlen [in/out] The max size and resulting size of the shared secret + return MP_OKAY if successful +*/ +int wc_ecc_shared_secret(ecc_key* private_key, ecc_key* public_key, byte* out, + word32* outlen) +{ + int err; +#if defined(WOLFSSL_CRYPTOCELL) && !defined(WOLFSSL_ATECC508A) + CRYS_ECDH_TempData_t tempBuff; +#endif + if (private_key == NULL || public_key == NULL || out == NULL || + outlen == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef WOLF_CRYPTO_CB + if (private_key->devId != INVALID_DEVID) { + err = wc_CryptoCb_Ecdh(private_key, public_key, out, outlen); + if (err != CRYPTOCB_UNAVAILABLE) + return err; + /* fall-through when unavailable */ + } +#endif + + /* type valid? */ + if (private_key->type != ECC_PRIVATEKEY && + private_key->type != ECC_PRIVATEKEY_ONLY) { + return ECC_BAD_ARG_E; + } + + /* Verify domain params supplied */ + if (wc_ecc_is_valid_idx(private_key->idx) == 0 || + wc_ecc_is_valid_idx(public_key->idx) == 0) { + return ECC_BAD_ARG_E; + } + + /* Verify curve id matches */ + if (private_key->dp->id != public_key->dp->id) { + return ECC_BAD_ARG_E; + } + +#ifdef WOLFSSL_ATECC508A + /* For SECP256R1 use hardware */ + if (private_key->dp->id == ECC_SECP256R1) { + err = atmel_ecc_create_pms(private_key->slot, public_key->pubkey_raw, out); + *outlen = private_key->dp->size; + } + else { + err = NOT_COMPILED_IN; + } +#elif defined(WOLFSSL_CRYPTOCELL) + + /* generate a secret*/ + err = CRYS_ECDH_SVDP_DH(&public_key->ctx.pubKey, + &private_key->ctx.privKey, + out, + outlen, + &tempBuff); + + if (err != SA_SILIB_RET_OK){ + WOLFSSL_MSG("CRYS_ECDH_SVDP_DH for secret failed"); + return err; + } + +#else + err = wc_ecc_shared_secret_ex(private_key, &public_key->pubkey, out, outlen); +#endif /* WOLFSSL_ATECC508A */ + + return err; +} + + +#if !defined(WOLFSSL_ATECC508A) && !defined(WOLFSSL_CRYPTOCELL) + +static int wc_ecc_shared_secret_gen_sync(ecc_key* private_key, ecc_point* point, + byte* out, word32* outlen, ecc_curve_spec* curve) +{ + int err; +#ifndef WOLFSSL_SP_MATH + ecc_point* result = NULL; + word32 x = 0; +#endif + mp_int* k = &private_key->k; +#ifdef HAVE_ECC_CDH + mp_int k_lcl; + + /* if cofactor flag has been set */ + if (private_key->flags & WC_ECC_FLAG_COFACTOR) { + mp_digit cofactor = (mp_digit)private_key->dp->cofactor; + /* only perform cofactor calc if not equal to 1 */ + if (cofactor != 1) { + k = &k_lcl; + if (mp_init(k) != MP_OKAY) + return MEMORY_E; + /* multiply cofactor times private key "k" */ + err = mp_mul_d(&private_key->k, cofactor, k); + if (err != MP_OKAY) { + mp_clear(k); + return err; + } + } + } +#endif + +#ifdef WOLFSSL_HAVE_SP_ECC +#ifndef WOLFSSL_SP_NO_256 + if (private_key->idx != ECC_CUSTOM_IDX && + ecc_sets[private_key->idx].id == ECC_SECP256R1) { + err = sp_ecc_secret_gen_256(k, point, out, outlen, private_key->heap); + } + else +#endif +#ifdef WOLFSSL_SP_384 + if (private_key->idx != ECC_CUSTOM_IDX && + ecc_sets[private_key->idx].id == ECC_SECP384R1) { + err = sp_ecc_secret_gen_384(k, point, out, outlen, private_key->heap); + } + else +#endif +#endif +#ifdef WOLFSSL_SP_MATH + { + err = WC_KEY_SIZE_E; + + (void)curve; + } +#else + { + mp_digit mp = 0; + + /* make new point */ + result = wc_ecc_new_point_h(private_key->heap); + if (result == NULL) { +#ifdef HAVE_ECC_CDH + if (k == &k_lcl) + mp_clear(k); +#endif + return MEMORY_E; + } + + /* Map in a separate call as this should be constant time */ + err = wc_ecc_mulmod_ex(k, point, result, curve->Af, curve->prime, 0, + private_key->heap); + if (err == MP_OKAY) { + err = mp_montgomery_setup(curve->prime, &mp); + } + if (err == MP_OKAY) { + /* Use constant time map if compiled in */ + err = ecc_map_ex(result, curve->prime, mp, 1); + } + if (err == MP_OKAY) { + x = mp_unsigned_bin_size(curve->prime); + if (*outlen < x || (int)x < mp_unsigned_bin_size(result->x)) { + err = BUFFER_E; + } + } + + if (err == MP_OKAY) { + XMEMSET(out, 0, x); + err = mp_to_unsigned_bin(result->x,out + + (x - mp_unsigned_bin_size(result->x))); + } + *outlen = x; + + wc_ecc_del_point_h(result, private_key->heap); + } +#endif +#ifdef HAVE_ECC_CDH + if (k == &k_lcl) + mp_clear(k); +#endif + + return err; +} + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) +static int wc_ecc_shared_secret_gen_async(ecc_key* private_key, + ecc_point* point, byte* out, word32 *outlen, + ecc_curve_spec* curve) +{ + int err; + +#if defined(HAVE_CAVIUM_V) || defined(HAVE_INTEL_QA) +#ifdef HAVE_CAVIUM_V + /* verify the curve is supported by hardware */ + if (NitroxEccIsCurveSupported(private_key)) +#endif + { + word32 keySz = private_key->dp->size; + + /* sync public key x/y */ + err = wc_mp_to_bigint_sz(&private_key->k, &private_key->k.raw, keySz); + if (err == MP_OKAY) + err = wc_mp_to_bigint_sz(point->x, &point->x->raw, keySz); + if (err == MP_OKAY) + err = wc_mp_to_bigint_sz(point->y, &point->y->raw, keySz); + #ifdef HAVE_CAVIUM_V + /* allocate buffer for output */ + if (err == MP_OKAY) + err = wc_ecc_alloc_mpint(private_key, &private_key->e); + if (err == MP_OKAY) + err = wc_bigint_alloc(&private_key->e->raw, + NitroxEccGetSize(private_key)*2); + if (err == MP_OKAY) + err = NitroxEcdh(private_key, + &private_key->k.raw, &point->x->raw, &point->y->raw, + private_key->e->raw.buf, &private_key->e->raw.len, + &curve->prime->raw); + #else + if (err == MP_OKAY) + err = wc_ecc_curve_load(private_key->dp, &curve, ECC_CURVE_FIELD_BF); + if (err == MP_OKAY) + err = IntelQaEcdh(&private_key->asyncDev, + &private_key->k.raw, &point->x->raw, &point->y->raw, + out, outlen, + &curve->Af->raw, &curve->Bf->raw, &curve->prime->raw, + private_key->dp->cofactor); + #endif + return err; + } +#elif defined(WOLFSSL_ASYNC_CRYPT_TEST) + if (wc_AsyncTestInit(&private_key->asyncDev, ASYNC_TEST_ECC_SHARED_SEC)) { + WC_ASYNC_TEST* testDev = &private_key->asyncDev.test; + testDev->eccSharedSec.private_key = private_key; + testDev->eccSharedSec.public_point = point; + testDev->eccSharedSec.out = out; + testDev->eccSharedSec.outLen = outlen; + return WC_PENDING_E; + } +#endif + + /* use sync in other cases */ + err = wc_ecc_shared_secret_gen_sync(private_key, point, out, outlen, curve); + + return err; +} +#endif /* WOLFSSL_ASYNC_CRYPT && WC_ASYNC_ENABLE_ECC */ + +int wc_ecc_shared_secret_gen(ecc_key* private_key, ecc_point* point, + byte* out, word32 *outlen) +{ + int err; + DECLARE_CURVE_SPECS(curve, 2); + + if (private_key == NULL || point == NULL || out == NULL || + outlen == NULL) { + return BAD_FUNC_ARG; + } + + /* load curve info */ + ALLOC_CURVE_SPECS(2); + err = wc_ecc_curve_load(private_key->dp, &curve, + (ECC_CURVE_FIELD_PRIME | ECC_CURVE_FIELD_AF)); + if (err != MP_OKAY) { + FREE_CURVE_SPECS(); + return err; + } + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) + if (private_key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) { + err = wc_ecc_shared_secret_gen_async(private_key, point, + out, outlen, curve); + } + else +#endif + { + err = wc_ecc_shared_secret_gen_sync(private_key, point, + out, outlen, curve); + } + + wc_ecc_curve_free(curve); + FREE_CURVE_SPECS(); + + return err; +} + +/** + Create an ECC shared secret between private key and public point + private_key The private ECC key (heap hint based on private key) + point The point to use (public key) + out [out] Destination of the shared secret + Conforms to EC-DH from ANSI X9.63 + outlen [in/out] The max size and resulting size of the shared secret + return MP_OKAY if successful +*/ +int wc_ecc_shared_secret_ex(ecc_key* private_key, ecc_point* point, + byte* out, word32 *outlen) +{ + int err; + + if (private_key == NULL || point == NULL || out == NULL || + outlen == NULL) { + return BAD_FUNC_ARG; + } + + /* type valid? */ + if (private_key->type != ECC_PRIVATEKEY && + private_key->type != ECC_PRIVATEKEY_ONLY) { + return ECC_BAD_ARG_E; + } + + /* Verify domain params supplied */ + if (wc_ecc_is_valid_idx(private_key->idx) == 0) + return ECC_BAD_ARG_E; + + switch(private_key->state) { + case ECC_STATE_NONE: + case ECC_STATE_SHARED_SEC_GEN: + private_key->state = ECC_STATE_SHARED_SEC_GEN; + + err = wc_ecc_shared_secret_gen(private_key, point, out, outlen); + if (err < 0) { + break; + } + FALL_THROUGH; + + case ECC_STATE_SHARED_SEC_RES: + private_key->state = ECC_STATE_SHARED_SEC_RES; + #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) + if (private_key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) { + #ifdef HAVE_CAVIUM_V + /* verify the curve is supported by hardware */ + if (NitroxEccIsCurveSupported(private_key)) { + /* copy output */ + *outlen = private_key->dp->size; + XMEMCPY(out, private_key->e->raw.buf, *outlen); + } + #endif /* HAVE_CAVIUM_V */ + } + #endif /* WOLFSSL_ASYNC_CRYPT */ + err = 0; + break; + + default: + err = BAD_STATE_E; + } /* switch */ + + /* if async pending then return and skip done cleanup below */ + if (err == WC_PENDING_E) { + private_key->state++; + return err; + } + + /* cleanup */ +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) + wc_ecc_free_async(private_key); +#endif + private_key->state = ECC_STATE_NONE; + + return err; +} +#endif /* !WOLFSSL_ATECC508A && !WOLFSSL_CRYPTOCELL */ +#endif /* HAVE_ECC_DHE */ + + +#if !defined(WOLFSSL_ATECC508A) && !defined(WOLFSSL_CRYPTOCELL) +/* return 1 if point is at infinity, 0 if not, < 0 on error */ +int wc_ecc_point_is_at_infinity(ecc_point* p) +{ + if (p == NULL) + return BAD_FUNC_ARG; + + if (get_digit_count(p->x) == 0 && get_digit_count(p->y) == 0) + return 1; + + return 0; +} + +/* generate random and ensure its greater than 0 and less than order */ +int wc_ecc_gen_k(WC_RNG* rng, int size, mp_int* k, mp_int* order) +{ +#ifndef WC_NO_RNG + int err; + byte buf[ECC_MAXSIZE_GEN]; + + /*generate 8 extra bytes to mitigate bias from the modulo operation below*/ + /*see section A.1.2 in 'Suite B Implementor's Guide to FIPS 186-3 (ECDSA)'*/ + size += 8; + + /* make up random string */ + err = wc_RNG_GenerateBlock(rng, buf, size); + + /* load random buffer data into k */ + if (err == 0) + err = mp_read_unsigned_bin(k, (byte*)buf, size); + + /* the key should be smaller than the order of base point */ + if (err == MP_OKAY) { + if (mp_cmp(k, order) != MP_LT) { + err = mp_mod(k, order, k); + } + } + + /* quick sanity check to make sure we're not dealing with a 0 key */ + if (err == MP_OKAY) { + if (mp_iszero(k) == MP_YES) + err = MP_ZERO_E; + } + + ForceZero(buf, ECC_MAXSIZE); + + return err; +#else + (void)rng; + (void)size; + (void)k; + (void)order; + return NOT_COMPILED_IN; +#endif /* !WC_NO_RNG */ +} +#endif /* !WOLFSSL_ATECC508A && !WOLFSSL_CRYPTOCELL */ + +static WC_INLINE void wc_ecc_reset(ecc_key* key) +{ + /* make sure required key variables are reset */ + key->state = ECC_STATE_NONE; +} + + +/* create the public ECC key from a private key + * + * key an initialized private key to generate public part from + * curveIn [in]curve for key, can be NULL + * pubOut [out]ecc_point holding the public key, if NULL then public key part + * is cached in key instead. + * + * Note this function is local to the file because of the argument type + * ecc_curve_spec. Having this argument allows for not having to load the + * curve type multiple times when generating a key with wc_ecc_make_key(). + * + * returns MP_OKAY on success + */ +static int wc_ecc_make_pub_ex(ecc_key* key, ecc_curve_spec* curveIn, + ecc_point* pubOut) +{ + int err = MP_OKAY; +#ifndef WOLFSSL_ATECC508A +#ifndef WOLFSSL_SP_MATH + ecc_point* base = NULL; +#endif + ecc_point* pub; + DECLARE_CURVE_SPECS(curve, ECC_CURVE_FIELD_COUNT); +#endif /* !WOLFSSL_ATECC508A */ + + if (key == NULL) { + return BAD_FUNC_ARG; + } + +#ifndef WOLFSSL_ATECC508A + + /* if ecc_point passed in then use it as output for public key point */ + if (pubOut != NULL) { + pub = pubOut; + } + else { + /* caching public key making it a ECC_PRIVATEKEY instead of + ECC_PRIVATEKEY_ONLY */ + pub = &key->pubkey; + key->type = ECC_PRIVATEKEY_ONLY; + } + + /* avoid loading the curve unless it is not passed in */ + if (curveIn != NULL) { + curve = curveIn; + } + else { + /* load curve info */ + if (err == MP_OKAY) { + ALLOC_CURVE_SPECS(ECC_CURVE_FIELD_COUNT); + err = wc_ecc_curve_load(key->dp, &curve, ECC_CURVE_FIELD_ALL); + } + } + + if (err == MP_OKAY) { + #ifndef ALT_ECC_SIZE + err = mp_init_multi(pub->x, pub->y, pub->z, NULL, NULL, NULL); + #else + pub->x = (mp_int*)&pub->xyz[0]; + pub->y = (mp_int*)&pub->xyz[1]; + pub->z = (mp_int*)&pub->xyz[2]; + alt_fp_init(pub->x); + alt_fp_init(pub->y); + alt_fp_init(pub->z); + #endif + } + + + if (err != MP_OKAY) { + } + else +#ifdef WOLFSSL_HAVE_SP_ECC +#ifndef WOLFSSL_SP_NO_256 + if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP256R1) { + err = sp_ecc_mulmod_base_256(&key->k, pub, 1, key->heap); + } + else +#endif +#ifdef WOLFSSL_SP_384 + if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP384R1) { + err = sp_ecc_mulmod_base_384(&key->k, pub, 1, key->heap); + } + else +#endif +#endif +#ifdef WOLFSSL_SP_MATH + err = WC_KEY_SIZE_E; +#else + { + mp_digit mp; + + base = wc_ecc_new_point_h(key->heap); + if (base == NULL) + err = MEMORY_E; + /* read in the x/y for this key */ + if (err == MP_OKAY) + err = mp_copy(curve->Gx, base->x); + if (err == MP_OKAY) + err = mp_copy(curve->Gy, base->y); + if (err == MP_OKAY) + err = mp_set(base->z, 1); + + /* make the public key */ + if (err == MP_OKAY) { + /* Map in a separate call as this should be constant time */ + err = wc_ecc_mulmod_ex(&key->k, base, pub, curve->Af, curve->prime, + 0, key->heap); + if (err == MP_MEM) { + err = MEMORY_E; + } + } + if (err == MP_OKAY) { + err = mp_montgomery_setup(curve->prime, &mp); + } + if (err == MP_OKAY) { + /* Use constant time map if compiled in */ + err = ecc_map_ex(pub, curve->prime, mp, 1); + } + + wc_ecc_del_point_h(base, key->heap); + } +#endif + +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + /* validate the public key, order * pubkey = point at infinity */ + if (err == MP_OKAY) + err = ecc_check_pubkey_order(key, pub, curve->Af, curve->prime, + curve->order); +#endif /* WOLFSSL_VALIDATE_KEYGEN */ + + if (err != MP_OKAY) { + /* clean up if failed */ + #ifndef ALT_ECC_SIZE + mp_clear(pub->x); + mp_clear(pub->y); + mp_clear(pub->z); + #endif + } + + /* free up local curve */ + if (curveIn == NULL) { + wc_ecc_curve_free(curve); + FREE_CURVE_SPECS(); + } + +#else + (void)curveIn; + err = NOT_COMPILED_IN; +#endif /* WOLFSSL_ATECC508A */ + + /* change key state if public part is cached */ + if (key->type == ECC_PRIVATEKEY_ONLY && pubOut == NULL) { + key->type = ECC_PRIVATEKEY; + } + + return err; +} + + +/* create the public ECC key from a private key + * + * key an initialized private key to generate public part from + * pubOut [out]ecc_point holding the public key, if NULL then public key part + * is cached in key instead. + * + * + * returns MP_OKAY on success + */ +int wc_ecc_make_pub(ecc_key* key, ecc_point* pubOut) +{ + WOLFSSL_ENTER("wc_ecc_make_pub"); + + return wc_ecc_make_pub_ex(key, NULL, pubOut); +} + + +WOLFSSL_ABI +int wc_ecc_make_key_ex(WC_RNG* rng, int keysize, ecc_key* key, int curve_id) +{ + int err; +#if !defined(WOLFSSL_ATECC508A) && !defined(WOLFSSL_CRYPTOCELL) +#ifndef WOLFSSL_SP_MATH + DECLARE_CURVE_SPECS(curve, ECC_CURVE_FIELD_COUNT); +#endif +#endif /* !WOLFSSL_ATECC508A */ +#if defined(WOLFSSL_CRYPTOCELL) && !defined(WOLFSSL_ATECC508A) + const CRYS_ECPKI_Domain_t* pDomain; + CRYS_ECPKI_KG_TempData_t tempBuff; + CRYS_ECPKI_KG_FipsContext_t fipsCtx; + byte ucompressed_key[ECC_MAX_CRYPTO_HW_SIZE*2 + 1]; + word32 raw_size = 0; +#endif + if (key == NULL || rng == NULL) { + return BAD_FUNC_ARG; + } + + /* make sure required variables are reset */ + wc_ecc_reset(key); + + err = wc_ecc_set_curve(key, keysize, curve_id); + if (err != 0) { + return err; + } + +#ifdef WOLF_CRYPTO_CB + if (key->devId != INVALID_DEVID) { + err = wc_CryptoCb_MakeEccKey(rng, keysize, key, curve_id); + if (err != CRYPTOCB_UNAVAILABLE) + return err; + /* fall-through when unavailable */ + } +#endif + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) + if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) { + #ifdef HAVE_CAVIUM + /* TODO: Not implemented */ + #elif defined(HAVE_INTEL_QA) + /* TODO: Not implemented */ + #else + if (wc_AsyncTestInit(&key->asyncDev, ASYNC_TEST_ECC_MAKE)) { + WC_ASYNC_TEST* testDev = &key->asyncDev.test; + testDev->eccMake.rng = rng; + testDev->eccMake.key = key; + testDev->eccMake.size = keysize; + testDev->eccMake.curve_id = curve_id; + return WC_PENDING_E; + } + #endif + } +#endif /* WOLFSSL_ASYNC_CRYPT && WC_ASYNC_ENABLE_ECC */ + +#ifdef WOLFSSL_ATECC508A + if (key->dp->id == ECC_SECP256R1) { + key->type = ECC_PRIVATEKEY; + key->slot = atmel_ecc_alloc(ATMEL_SLOT_ECDHE); + err = atmel_ecc_create_key(key->slot, key->pubkey_raw); + + /* populate key->pubkey */ + if (err == 0 + #ifdef ALT_ECC_SIZE + && key->pubkey.x + #endif + ) { + err = mp_read_unsigned_bin(key->pubkey.x, key->pubkey_raw, + ECC_MAX_CRYPTO_HW_SIZE); + } + if (err == 0 + #ifdef ALT_ECC_SIZE + && key->pubkey.y + #endif + ) { + err = mp_read_unsigned_bin(key->pubkey.y, + key->pubkey_raw + ECC_MAX_CRYPTO_HW_SIZE, + ECC_MAX_CRYPTO_HW_SIZE); + } + } + else { + err = NOT_COMPILED_IN; + } +#elif defined(WOLFSSL_CRYPTOCELL) + + pDomain = CRYS_ECPKI_GetEcDomain(cc310_mapCurve(curve_id)); + raw_size = (word32)(key->dp->size)*2 + 1; + + /* generate first key pair */ + err = CRYS_ECPKI_GenKeyPair(&wc_rndState, + wc_rndGenVectFunc, + pDomain, + &key->ctx.privKey, + &key->ctx.pubKey, + &tempBuff, + &fipsCtx); + + if (err != SA_SILIB_RET_OK){ + WOLFSSL_MSG("CRYS_ECPKI_GenKeyPair for key pair failed"); + return err; + } + key->type = ECC_PRIVATEKEY; + + err = CRYS_ECPKI_ExportPublKey(&key->ctx.pubKey, + CRYS_EC_PointUncompressed, + &ucompressed_key[0], + &raw_size); + + if (err == SA_SILIB_RET_OK && key->pubkey.x && key->pubkey.y) { + err = mp_read_unsigned_bin(key->pubkey.x, + &ucompressed_key[1], key->dp->size); + if (err == MP_OKAY) { + err = mp_read_unsigned_bin(key->pubkey.y, + &ucompressed_key[1+key->dp->size],key->dp->size); + } + } + raw_size = key->dp->size; + if (err == MP_OKAY) { + err = CRYS_ECPKI_ExportPrivKey(&key->ctx.privKey, + ucompressed_key, + &raw_size); + } + + if (err == SA_SILIB_RET_OK) { + err = mp_read_unsigned_bin(&key->k, ucompressed_key, raw_size); + } + +#else + +#ifdef WOLFSSL_HAVE_SP_ECC +#ifndef WOLFSSL_SP_NO_256 + if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP256R1) { + err = sp_ecc_make_key_256(rng, &key->k, &key->pubkey, key->heap); + if (err == MP_OKAY) { + key->type = ECC_PRIVATEKEY; + } + } + else +#endif +#ifdef WOLFSSL_SP_384 + if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP384R1) { + err = sp_ecc_make_key_384(rng, &key->k, &key->pubkey, key->heap); + if (err == MP_OKAY) { + key->type = ECC_PRIVATEKEY; + } + } + else +#endif +#endif /* WOLFSSL_HAVE_SP_ECC */ + + { /* software key gen */ +#ifdef WOLFSSL_SP_MATH + err = WC_KEY_SIZE_E; +#else + + /* setup the key variables */ + err = mp_init(&key->k); + + /* load curve info */ + if (err == MP_OKAY) { + ALLOC_CURVE_SPECS(ECC_CURVE_FIELD_COUNT); + err = wc_ecc_curve_load(key->dp, &curve, ECC_CURVE_FIELD_ALL); + } + + /* generate k */ + if (err == MP_OKAY) + err = wc_ecc_gen_k(rng, key->dp->size, &key->k, curve->order); + + /* generate public key from k */ + if (err == MP_OKAY) + err = wc_ecc_make_pub_ex(key, curve, NULL); + + if (err == MP_OKAY) + key->type = ECC_PRIVATEKEY; + + /* cleanup these on failure case only */ + if (err != MP_OKAY) { + /* clean up */ + mp_forcezero(&key->k); + } + + /* cleanup allocations */ + wc_ecc_curve_free(curve); + FREE_CURVE_SPECS(); +#endif /* WOLFSSL_SP_MATH */ + } + +#ifdef HAVE_WOLF_BIGINT + if (err == MP_OKAY) + err = wc_mp_to_bigint(&key->k, &key->k.raw); + if (err == MP_OKAY) + err = wc_mp_to_bigint(key->pubkey.x, &key->pubkey.x->raw); + if (err == MP_OKAY) + err = wc_mp_to_bigint(key->pubkey.y, &key->pubkey.y->raw); + if (err == MP_OKAY) + err = wc_mp_to_bigint(key->pubkey.z, &key->pubkey.z->raw); +#endif + +#endif /* WOLFSSL_ATECC508A */ + + return err; +} + +#ifdef ECC_DUMP_OID +/* Optional dump of encoded OID for adding new curves */ +static int mOidDumpDone; +static void wc_ecc_dump_oids(void) +{ + int x; + + if (mOidDumpDone) { + return; + } + + /* find matching OID sum (based on encoded value) */ + for (x = 0; ecc_sets[x].size != 0; x++) { + int i; + byte* oid; + word32 oidSz, sum = 0; + + printf("ECC %s (%d):\n", ecc_sets[x].name, x); + + #ifdef HAVE_OID_ENCODING + byte oidEnc[ECC_MAX_OID_LEN]; + + oid = oidEnc; + oidSz = ECC_MAX_OID_LEN; + + printf("OID: "); + for (i = 0; i < (int)ecc_sets[x].oidSz; i++) { + printf("%d.", ecc_sets[x].oid[i]); + } + printf("\n"); + + EncodeObjectId(ecc_sets[x].oid, ecc_sets[x].oidSz, oidEnc, &oidSz); + #else + oid = (byte*)ecc_sets[x].oid; + oidSz = ecc_sets[x].oidSz; + #endif + + printf("OID Encoded: "); + for (i = 0; i < (int)oidSz; i++) { + printf("0x%02X,", oid[i]); + } + printf("\n"); + + for (i = 0; i < (int)oidSz; i++) { + sum += oid[i]; + } + printf("Sum: %d\n", sum); + + /* validate sum */ + if (ecc_sets[x].oidSum != sum) { + printf(" Sum %d Not Valid!\n", ecc_sets[x].oidSum); + } + } + mOidDumpDone = 1; +} +#endif /* ECC_DUMP_OID */ + + +WOLFSSL_ABI +ecc_key* wc_ecc_key_new(void* heap) +{ + ecc_key* key; + + key = (ecc_key*)XMALLOC(sizeof(ecc_key), heap, DYNAMIC_TYPE_ECC); + if (key) { + if (wc_ecc_init_ex(key, heap, INVALID_DEVID) != 0) { + XFREE(key, heap, DYNAMIC_TYPE_ECC); + key = NULL; + } + } + + return key; +} + + +WOLFSSL_ABI +void wc_ecc_key_free(ecc_key* key) +{ + if (key) { + void* heap = key->heap; + + wc_ecc_free(key); + ForceZero(key, sizeof(ecc_key)); + XFREE(key, heap, DYNAMIC_TYPE_ECC); + (void)heap; + } +} + + +/** + Make a new ECC key + rng An active RNG state + keysize The keysize for the new key (in octets from 20 to 65 bytes) + key [out] Destination of the newly created key + return MP_OKAY if successful, + upon error all allocated memory will be freed + */ +int wc_ecc_make_key(WC_RNG* rng, int keysize, ecc_key* key) +{ + return wc_ecc_make_key_ex(rng, keysize, key, ECC_CURVE_DEF); +} + +/* Setup dynamic pointers if using normal math for proper freeing */ +WOLFSSL_ABI +int wc_ecc_init_ex(ecc_key* key, void* heap, int devId) +{ + int ret = 0; + + if (key == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef ECC_DUMP_OID + wc_ecc_dump_oids(); +#endif + + XMEMSET(key, 0, sizeof(ecc_key)); + key->state = ECC_STATE_NONE; + +#if defined(PLUTON_CRYPTO_ECC) || defined(WOLF_CRYPTO_CB) + key->devId = devId; +#else + (void)devId; +#endif + +#ifdef WOLFSSL_ATECC508A + key->slot = ATECC_INVALID_SLOT; +#else +#ifdef ALT_ECC_SIZE + key->pubkey.x = (mp_int*)&key->pubkey.xyz[0]; + key->pubkey.y = (mp_int*)&key->pubkey.xyz[1]; + key->pubkey.z = (mp_int*)&key->pubkey.xyz[2]; + alt_fp_init(key->pubkey.x); + alt_fp_init(key->pubkey.y); + alt_fp_init(key->pubkey.z); + ret = mp_init(&key->k); + if (ret != MP_OKAY) { + return MEMORY_E; + } +#else + ret = mp_init_multi(&key->k, key->pubkey.x, key->pubkey.y, key->pubkey.z, + NULL, NULL); + if (ret != MP_OKAY) { + return MEMORY_E; + } +#endif /* ALT_ECC_SIZE */ +#endif /* WOLFSSL_ATECC508A */ + +#ifdef WOLFSSL_HEAP_TEST + key->heap = (void*)WOLFSSL_HEAP_TEST; +#else + key->heap = heap; +#endif + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) + /* handle as async */ + ret = wolfAsync_DevCtxInit(&key->asyncDev, WOLFSSL_ASYNC_MARKER_ECC, + key->heap, devId); +#endif + +#if defined(WOLFSSL_DSP) + key->handle = -1; +#endif + return ret; +} + +int wc_ecc_init(ecc_key* key) +{ + return wc_ecc_init_ex(key, NULL, INVALID_DEVID); +} + +#ifdef HAVE_PKCS11 +int wc_ecc_init_id(ecc_key* key, unsigned char* id, int len, void* heap, + int devId) +{ + int ret = 0; + + if (key == NULL) + ret = BAD_FUNC_ARG; + if (ret == 0 && (len < 0 || len > ECC_MAX_ID_LEN)) + ret = BUFFER_E; + + if (ret == 0) + ret = wc_ecc_init_ex(key, heap, devId); + + if (ret == 0 && id != NULL && len != 0) { + XMEMCPY(key->id, id, len); + key->idLen = len; + } + + return ret; +} +#endif + +int wc_ecc_set_flags(ecc_key* key, word32 flags) +{ + if (key == NULL) { + return BAD_FUNC_ARG; + } + key->flags |= flags; + return 0; +} + + +static int wc_ecc_get_curve_order_bit_count(const ecc_set_type* dp) +{ + int err; + word32 orderBits; + DECLARE_CURVE_SPECS(curve, 1); + + ALLOC_CURVE_SPECS(1); + err = wc_ecc_curve_load(dp, &curve, ECC_CURVE_FIELD_ORDER); + if (err != 0) { + FREE_CURVE_SPECS(); + return err; + } + orderBits = mp_count_bits(curve->order); + + wc_ecc_curve_free(curve); + FREE_CURVE_SPECS(); + return (int)orderBits; +} + +#ifdef HAVE_ECC_SIGN + +#ifndef NO_ASN + +#if defined(WOLFSSL_ATECC508A) || defined(PLUTON_CRYPTO_ECC) || \ + defined(WOLFSSL_CRYPTOCELL) +static int wc_ecc_sign_hash_hw(const byte* in, word32 inlen, + mp_int* r, mp_int* s, byte* out, word32 *outlen, WC_RNG* rng, + ecc_key* key) +{ + int err; +#ifdef PLUTON_CRYPTO_ECC + if (key->devId != INVALID_DEVID) /* use hardware */ +#endif + { + #if defined(WOLFSSL_CRYPTOCELL) && !defined(WOLFSSL_ATECC508A) + CRYS_ECDSA_SignUserContext_t sigCtxTemp; + word32 raw_sig_size = *outlen; + word32 msgLenInBytes = inlen; + CRYS_ECPKI_HASH_OpMode_t hash_mode; + #endif + word32 keysize = (word32)key->dp->size; + word32 orderBits = wc_ecc_get_curve_order_bit_count(key->dp); + + /* Check args */ + if (keysize > ECC_MAX_CRYPTO_HW_SIZE || *outlen < keysize*2) { + return ECC_BAD_ARG_E; + } + + #if defined(WOLFSSL_ATECC508A) + key->slot = atmel_ecc_alloc(ATMEL_SLOT_DEVICE); + if (key->slot == ATECC_INVALID_SLOT) { + return ECC_BAD_ARG_E; + } + + /* Sign: Result is 32-bytes of R then 32-bytes of S */ + err = atmel_ecc_sign(key->slot, in, out); + if (err != 0) { + return err; + } + #elif defined(PLUTON_CRYPTO_ECC) + { + /* if the input is larger than curve order, we must truncate */ + if ((inlen * WOLFSSL_BIT_SIZE) > orderBits) { + inlen = (orderBits + WOLFSSL_BIT_SIZE - 1) / WOLFSSL_BIT_SIZE; + } + + /* perform ECC sign */ + word32 raw_sig_size = *outlen; + err = Crypto_EccSign(in, inlen, out, &raw_sig_size); + if (err != CRYPTO_RES_SUCCESS || raw_sig_size != keysize*2){ + return BAD_COND_E; + } + } + #elif defined(WOLFSSL_CRYPTOCELL) + + hash_mode = cc310_hashModeECC(msgLenInBytes); + if (hash_mode == CRYS_ECPKI_HASH_OpModeLast) { + hash_mode = cc310_hashModeECC(keysize); + hash_mode = CRYS_ECPKI_HASH_SHA256_mode; + } + + /* truncate if hash is longer than key size */ + if (msgLenInBytes > keysize) { + msgLenInBytes = keysize; + } + + /* create signature from an input buffer using a private key*/ + err = CRYS_ECDSA_Sign(&wc_rndState, + wc_rndGenVectFunc, + &sigCtxTemp, + &key->ctx.privKey, + hash_mode, + (byte*)in, + msgLenInBytes, + out, + &raw_sig_size); + + if (err != SA_SILIB_RET_OK){ + WOLFSSL_MSG("CRYS_ECDSA_Sign failed"); + return err; + } + #endif + + /* Load R and S */ + err = mp_read_unsigned_bin(r, &out[0], keysize); + if (err != MP_OKAY) { + return err; + } + err = mp_read_unsigned_bin(s, &out[keysize], keysize); + if (err != MP_OKAY) { + return err; + } + + /* Check for zeros */ + if (mp_iszero(r) || mp_iszero(s)) { + return MP_ZERO_E; + } + } +#ifdef PLUTON_CRYPTO_ECC + else { + err = wc_ecc_sign_hash_ex(in, inlen, rng, key, r, s); + } +#endif + (void)rng; + + return err; +} +#endif /* WOLFSSL_ATECC508A || PLUTON_CRYPTO_ECC || WOLFSSL_CRYPTOCELL */ + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) +static int wc_ecc_sign_hash_async(const byte* in, word32 inlen, byte* out, + word32 *outlen, WC_RNG* rng, ecc_key* key) +{ + int err; + mp_int *r = NULL, *s = NULL; + + if (in == NULL || out == NULL || outlen == NULL || key == NULL || + rng == NULL) { + return ECC_BAD_ARG_E; + } + + err = wc_ecc_alloc_async(key); + if (err != 0) { + return err; + } + r = key->r; + s = key->s; + + switch(key->state) { + case ECC_STATE_NONE: + case ECC_STATE_SIGN_DO: + key->state = ECC_STATE_SIGN_DO; + + if ((err = mp_init_multi(r, s, NULL, NULL, NULL, NULL)) != MP_OKAY){ + break; + } + + err = wc_ecc_sign_hash_ex(in, inlen, rng, key, r, s); + if (err < 0) { + break; + } + + FALL_THROUGH; + + case ECC_STATE_SIGN_ENCODE: + key->state = ECC_STATE_SIGN_ENCODE; + + if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) { + #ifdef HAVE_CAVIUM_V + /* Nitrox requires r and s in sep buffer, so split it */ + NitroxEccRsSplit(key, &r->raw, &s->raw); + #endif + #ifndef WOLFSSL_ASYNC_CRYPT_TEST + /* only do this if not simulator, since it overwrites result */ + wc_bigint_to_mp(&r->raw, r); + wc_bigint_to_mp(&s->raw, s); + #endif + } + + /* encoded with DSA header */ + err = StoreECC_DSA_Sig(out, outlen, r, s); + + /* done with R/S */ + mp_clear(r); + mp_clear(s); + break; + + default: + err = BAD_STATE_E; + break; + } + + /* if async pending then return and skip done cleanup below */ + if (err == WC_PENDING_E) { + key->state++; + return err; + } + + /* cleanup */ + wc_ecc_free_async(key); + key->state = ECC_STATE_NONE; + + return err; +} +#endif /* WOLFSSL_ASYNC_CRYPT && WC_ASYNC_ENABLE_ECC */ + +/** + Sign a message digest + in The message digest to sign + inlen The length of the digest + out [out] The destination for the signature + outlen [in/out] The max size and resulting size of the signature + key A private ECC key + return MP_OKAY if successful + */ +WOLFSSL_ABI +int wc_ecc_sign_hash(const byte* in, word32 inlen, byte* out, word32 *outlen, + WC_RNG* rng, ecc_key* key) +{ + int err; +#if !defined(WOLFSSL_ASYNC_CRYPT) || !defined(WC_ASYNC_ENABLE_ECC) +#ifdef WOLFSSL_SMALL_STACK + mp_int *r = NULL, *s = NULL; +#else + mp_int r[1], s[1]; +#endif +#endif + + if (in == NULL || out == NULL || outlen == NULL || key == NULL || + rng == NULL) { + return ECC_BAD_ARG_E; + } + +#ifdef WOLF_CRYPTO_CB + if (key->devId != INVALID_DEVID) { + err = wc_CryptoCb_EccSign(in, inlen, out, outlen, rng, key); + if (err != CRYPTOCB_UNAVAILABLE) + return err; + /* fall-through when unavailable */ + } +#endif + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) + /* handle async cases */ + err = wc_ecc_sign_hash_async(in, inlen, out, outlen, rng, key); +#else + +#ifdef WOLFSSL_SMALL_STACK + r = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_ECC); + if (r == NULL) + return MEMORY_E; + s = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_ECC); + if (s == NULL) { + XFREE(r, key->heap, DYNAMIC_TYPE_ECC); + return MEMORY_E; + } +#endif + XMEMSET(r, 0, sizeof(mp_int)); + XMEMSET(s, 0, sizeof(mp_int)); + + if ((err = mp_init_multi(r, s, NULL, NULL, NULL, NULL)) != MP_OKAY){ + #ifdef WOLFSSL_SMALL_STACK + XFREE(s, key->heap, DYNAMIC_TYPE_ECC); + XFREE(r, key->heap, DYNAMIC_TYPE_ECC); + #endif + return err; + } + +/* hardware crypto */ +#if defined(WOLFSSL_ATECC508A) || defined(PLUTON_CRYPTO_ECC) || defined(WOLFSSL_CRYPTOCELL) + err = wc_ecc_sign_hash_hw(in, inlen, r, s, out, outlen, rng, key); +#else + err = wc_ecc_sign_hash_ex(in, inlen, rng, key, r, s); +#endif + if (err < 0) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(s, key->heap, DYNAMIC_TYPE_ECC); + XFREE(r, key->heap, DYNAMIC_TYPE_ECC); + #endif + return err; + } + + /* encoded with DSA header */ + err = StoreECC_DSA_Sig(out, outlen, r, s); + + /* cleanup */ + mp_clear(r); + mp_clear(s); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(s, key->heap, DYNAMIC_TYPE_ECC); + XFREE(r, key->heap, DYNAMIC_TYPE_ECC); +#endif +#endif /* WOLFSSL_ASYNC_CRYPT */ + + return err; +} +#endif /* !NO_ASN */ + +#if defined(WOLFSSL_STM32_PKA) +int wc_ecc_sign_hash_ex(const byte* in, word32 inlen, WC_RNG* rng, + ecc_key* key, mp_int *r, mp_int *s) +{ + return stm32_ecc_sign_hash_ex(in, inlen, rng, key, r, s); +} +#elif !defined(WOLFSSL_ATECC508A) && !defined(WOLFSSL_CRYPTOCELL) +/** + Sign a message digest + in The message digest to sign + inlen The length of the digest + key A private ECC key + r [out] The destination for r component of the signature + s [out] The destination for s component of the signature + return MP_OKAY if successful +*/ +int wc_ecc_sign_hash_ex(const byte* in, word32 inlen, WC_RNG* rng, + ecc_key* key, mp_int *r, mp_int *s) +{ + int err = 0; +#ifndef WOLFSSL_SP_MATH + mp_int* e; +#if (!defined(WOLFSSL_ASYNC_CRYPT) || !defined(HAVE_CAVIUM_V)) && \ + !defined(WOLFSSL_SMALL_STACK) + mp_int e_lcl; +#endif + +#if defined(WOLFSSL_ECDSA_SET_K) || \ + (defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) && \ + (defined(HAVE_CAVIUM_V) || defined(HAVE_INTEL_QA))) + DECLARE_CURVE_SPECS(curve, ECC_CURVE_FIELD_COUNT); +#else + DECLARE_CURVE_SPECS(curve, 1); +#endif +#endif /* !WOLFSSL_SP_MATH */ + + if (in == NULL || r == NULL || s == NULL || key == NULL || rng == NULL) { + return ECC_BAD_ARG_E; + } + + /* is this a private key? */ + if (key->type != ECC_PRIVATEKEY && key->type != ECC_PRIVATEKEY_ONLY) { + return ECC_BAD_ARG_E; + } + + /* is the IDX valid ? */ + if (wc_ecc_is_valid_idx(key->idx) != 1) { + return ECC_BAD_ARG_E; + } + +#ifdef WOLFSSL_SP_MATH +#ifndef WOLFSSL_SP_NO_256 + if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP256R1) { + #ifndef WOLFSSL_ECDSA_SET_K + return sp_ecc_sign_256(in, inlen, rng, &key->k, r, s, NULL, key->heap); + #else + return sp_ecc_sign_256(in, inlen, rng, &key->k, r, s, key->sign_k, + key->heap); + #endif + } +#endif +#ifdef WOLFSSL_SP_384 + if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP384R1) { + #ifndef WOLFSSL_ECDSA_SET_K + return sp_ecc_sign_384(in, inlen, rng, &key->k, r, s, NULL, key->heap); + #else + return sp_ecc_sign_384(in, inlen, rng, &key->k, r, s, key->sign_k, + key->heap); + #endif + } +#endif + return WC_KEY_SIZE_E; +#else +#ifdef WOLFSSL_HAVE_SP_ECC + #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) + if (key->asyncDev.marker != WOLFSSL_ASYNC_MARKER_ECC) + #endif + { +#ifndef WOLFSSL_SP_NO_256 + if (key->idx != ECC_CUSTOM_IDX && + ecc_sets[key->idx].id == ECC_SECP256R1) { + #ifndef WOLFSSL_ECDSA_SET_K + return sp_ecc_sign_256(in, inlen, rng, &key->k, r, s, NULL, + key->heap); + #else + return sp_ecc_sign_256(in, inlen, rng, &key->k, r, s, key->sign_k, + key->heap); + #endif + } +#endif +#ifdef WOLFSSL_SP_384 + if (key->idx != ECC_CUSTOM_IDX && + ecc_sets[key->idx].id == ECC_SECP384R1) { + #ifndef WOLFSSL_ECDSA_SET_K + return sp_ecc_sign_384(in, inlen, rng, &key->k, r, s, NULL, + key->heap); + #else + return sp_ecc_sign_384(in, inlen, rng, &key->k, r, s, key->sign_k, + key->heap); + #endif + } +#endif + } +#endif /* WOLFSSL_HAVE_SP_ECC */ + + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) && \ + defined(WOLFSSL_ASYNC_CRYPT_TEST) + if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) { + if (wc_AsyncTestInit(&key->asyncDev, ASYNC_TEST_ECC_SIGN)) { + WC_ASYNC_TEST* testDev = &key->asyncDev.test; + testDev->eccSign.in = in; + testDev->eccSign.inSz = inlen; + testDev->eccSign.rng = rng; + testDev->eccSign.key = key; + testDev->eccSign.r = r; + testDev->eccSign.s = s; + return WC_PENDING_E; + } + } +#endif + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM_V) + err = wc_ecc_alloc_mpint(key, &key->e); + if (err != 0) { + return err; + } + e = key->e; +#elif !defined(WOLFSSL_SMALL_STACK) + e = &e_lcl; +#else + e = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_ECC); + if (e == NULL) { + return MEMORY_E; + } +#endif + + /* get the hash and load it as a bignum into 'e' */ + /* init the bignums */ + if ((err = mp_init(e)) != MP_OKAY) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(e, key->heap, DYNAMIC_TYPE_ECC); + #endif + return err; + } + + /* load curve info */ +#if defined(WOLFSSL_ECDSA_SET_K) + ALLOC_CURVE_SPECS(ECC_CURVE_FIELD_COUNT); + err = wc_ecc_curve_load(key->dp, &curve, ECC_CURVE_FIELD_ALL); +#else + #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) && \ + (defined(HAVE_CAVIUM_V) || defined(HAVE_INTEL_QA)) + if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) { + ALLOC_CURVE_SPECS(ECC_CURVE_FIELD_COUNT); + err = wc_ecc_curve_load(key->dp, &curve, ECC_CURVE_FIELD_ALL); + } + else + #endif + { + ALLOC_CURVE_SPECS(1); + err = wc_ecc_curve_load(key->dp, &curve, ECC_CURVE_FIELD_ORDER); + } +#endif + + /* load digest into e */ + if (err == MP_OKAY) { + /* we may need to truncate if hash is longer than key size */ + word32 orderBits = mp_count_bits(curve->order); + + /* truncate down to byte size, may be all that's needed */ + if ((WOLFSSL_BIT_SIZE * inlen) > orderBits) + inlen = (orderBits + WOLFSSL_BIT_SIZE - 1) / WOLFSSL_BIT_SIZE; + err = mp_read_unsigned_bin(e, (byte*)in, inlen); + + /* may still need bit truncation too */ + if (err == MP_OKAY && (WOLFSSL_BIT_SIZE * inlen) > orderBits) + mp_rshb(e, WOLFSSL_BIT_SIZE - (orderBits & 0x7)); + } + + /* make up a key and export the public copy */ + if (err == MP_OKAY) { + int loop_check = 0; + #ifdef WOLFSSL_SMALL_STACK + ecc_key* pubkey; + #else + ecc_key pubkey[1]; + #endif + + #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) + if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) { + #if defined(HAVE_CAVIUM_V) || defined(HAVE_INTEL_QA) + #ifdef HAVE_CAVIUM_V + if (NitroxEccIsCurveSupported(key)) + #endif + { + word32 keySz = key->dp->size; + mp_int* k; + #ifdef HAVE_CAVIUM_V + err = wc_ecc_alloc_mpint(key, &key->signK); + if (err != 0) + return err; + k = key->signK; + #else + mp_int k_lcl; + k = &k_lcl; + #endif + + err = mp_init(k); + + /* make sure r and s are allocated */ + #ifdef HAVE_CAVIUM_V + /* Nitrox V needs single buffer for R and S */ + if (err == MP_OKAY) + err = wc_bigint_alloc(&key->r->raw, NitroxEccGetSize(key)*2); + /* Nitrox V only needs Prime and Order */ + if (err == MP_OKAY) + err = wc_ecc_curve_load(key->dp, &curve, + (ECC_CURVE_FIELD_PRIME | ECC_CURVE_FIELD_ORDER)); + #else + if (err == MP_OKAY) + err = wc_bigint_alloc(&key->r->raw, key->dp->size); + if (err == MP_OKAY) + err = wc_ecc_curve_load(key->dp, &curve, ECC_CURVE_FIELD_ALL); + #endif + if (err == MP_OKAY) + err = wc_bigint_alloc(&key->s->raw, key->dp->size); + + /* load e and k */ + if (err == MP_OKAY) + err = wc_mp_to_bigint_sz(e, &e->raw, keySz); + if (err == MP_OKAY) + err = wc_mp_to_bigint_sz(&key->k, &key->k.raw, keySz); + if (err == MP_OKAY) + err = wc_ecc_gen_k(rng, key->dp->size, k, curve->order); + if (err == MP_OKAY) + err = wc_mp_to_bigint_sz(k, &k->raw, keySz); + + #ifdef HAVE_CAVIUM_V + if (err == MP_OKAY) + err = NitroxEcdsaSign(key, &e->raw, &key->k.raw, &k->raw, + &r->raw, &s->raw, &curve->prime->raw, &curve->order->raw); + #else + if (err == MP_OKAY) + err = IntelQaEcdsaSign(&key->asyncDev, &e->raw, &key->k.raw, + &k->raw, &r->raw, &s->raw, &curve->Af->raw, &curve->Bf->raw, + &curve->prime->raw, &curve->order->raw, &curve->Gx->raw, + &curve->Gy->raw); + #endif + + #ifndef HAVE_CAVIUM_V + mp_clear(e); + mp_clear(k); + #endif + wc_ecc_curve_free(curve); + FREE_CURVE_SPECS(); + + return err; + } + #endif /* HAVE_CAVIUM_V || HAVE_INTEL_QA */ + } + #endif /* WOLFSSL_ASYNC_CRYPT && WC_ASYNC_ENABLE_ECC */ + + #ifdef WOLFSSL_SMALL_STACK + pubkey = (ecc_key*)XMALLOC(sizeof(ecc_key), key->heap, DYNAMIC_TYPE_ECC); + if (pubkey == NULL) + err = MEMORY_E; + #endif + + /* don't use async for key, since we don't support async return here */ + if (err == MP_OKAY && (err = wc_ecc_init_ex(pubkey, key->heap, + INVALID_DEVID)) == MP_OKAY) { + #ifdef WOLFSSL_SMALL_STACK + mp_int* b = NULL; + #else + mp_int b[1]; + #endif + + #ifdef WOLFSSL_SMALL_STACK + if (err == MP_OKAY) { + b = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, + DYNAMIC_TYPE_ECC); + if (b == NULL) + err = MEMORY_E; + } + #endif + + if (err == MP_OKAY) { + err = mp_init(b); + } + + #ifdef WOLFSSL_CUSTOM_CURVES + /* if custom curve, apply params to pubkey */ + if (err == MP_OKAY && key->idx == ECC_CUSTOM_IDX) { + err = wc_ecc_set_custom_curve(pubkey, key->dp); + } + #endif + + if (err == MP_OKAY) { + /* Generate blinding value - non-zero value. */ + do { + if (++loop_check > 64) { + err = RNG_FAILURE_E; + break; + } + + err = wc_ecc_gen_k(rng, key->dp->size, b, curve->order); + } + while (err == MP_ZERO_E); + loop_check = 0; + } + + for (; err == MP_OKAY;) { + if (++loop_check > 64) { + err = RNG_FAILURE_E; + break; + } + #ifdef WOLFSSL_ECDSA_SET_K + if (key->sign_k != NULL) { + if (loop_check > 1) { + err = RNG_FAILURE_E; + break; + } + + err = mp_copy(key->sign_k, &pubkey->k); + if (err != MP_OKAY) break; + + mp_forcezero(key->sign_k); + mp_free(key->sign_k); + XFREE(key->sign_k, key->heap, DYNAMIC_TYPE_ECC); + key->sign_k = NULL; + err = wc_ecc_make_pub_ex(pubkey, curve, NULL); + } + else + #endif + { + err = wc_ecc_make_key_ex(rng, key->dp->size, pubkey, + key->dp->id); + } + if (err != MP_OKAY) break; + + /* find r = x1 mod n */ + err = mp_mod(pubkey->pubkey.x, curve->order, r); + if (err != MP_OKAY) break; + + if (mp_iszero(r) == MP_YES) { + #ifndef ALT_ECC_SIZE + mp_clear(pubkey->pubkey.x); + mp_clear(pubkey->pubkey.y); + mp_clear(pubkey->pubkey.z); + #endif + mp_forcezero(&pubkey->k); + } + else { + /* find s = (e + xr)/k + = b.(e/k.b + x.r/k.b) */ + + /* k = k.b */ + err = mp_mulmod(&pubkey->k, b, curve->order, &pubkey->k); + if (err != MP_OKAY) break; + + /* k = 1/k.b */ + err = mp_invmod(&pubkey->k, curve->order, &pubkey->k); + if (err != MP_OKAY) break; + + /* s = x.r */ + err = mp_mulmod(&key->k, r, curve->order, s); + if (err != MP_OKAY) break; + + /* s = x.r/k.b */ + err = mp_mulmod(&pubkey->k, s, curve->order, s); + if (err != MP_OKAY) break; + + /* e = e/k.b */ + err = mp_mulmod(&pubkey->k, e, curve->order, e); + if (err != MP_OKAY) break; + + /* s = e/k.b + x.r/k.b + = (e + x.r)/k.b */ + err = mp_add(e, s, s); + if (err != MP_OKAY) break; + + /* s = b.(e + x.r)/k.b + = (e + x.r)/k */ + err = mp_mulmod(s, b, curve->order, s); + if (err != MP_OKAY) break; + + /* s = (e + xr)/k */ + err = mp_mod(s, curve->order, s); + if (err != MP_OKAY) break; + + if (mp_iszero(s) == MP_NO) + break; + } + } + mp_clear(b); + mp_free(b); + #ifdef WOLFSSL_SMALL_STACK + XFREE(b, key->heap, DYNAMIC_TYPE_ECC); + #endif + wc_ecc_free(pubkey); + #ifdef WOLFSSL_SMALL_STACK + XFREE(pubkey, key->heap, DYNAMIC_TYPE_ECC); + #endif + } + } + + mp_clear(e); + wc_ecc_curve_free(curve); +#ifdef WOLFSSL_SMALL_STACK + XFREE(e, key->heap, DYNAMIC_TYPE_ECC); +#endif + FREE_CURVE_SPECS(); +#endif /* WOLFSSL_SP_MATH */ + + return err; +} + +#ifdef WOLFSSL_ECDSA_SET_K +int wc_ecc_sign_set_k(const byte* k, word32 klen, ecc_key* key) +{ + int ret = 0; + + if (k == NULL || klen == 0 || key == NULL) { + ret = BAD_FUNC_ARG; + } + + if (ret == 0) { + if (key->sign_k == NULL) { + key->sign_k = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, + DYNAMIC_TYPE_ECC); + if (key->sign_k == NULL) { + ret = MEMORY_E; + } + } + } + + if (ret == 0) { + ret = mp_init(key->sign_k); + } + if (ret == 0) { + ret = mp_read_unsigned_bin(key->sign_k, k, klen); + } + + return ret; +} +#endif /* WOLFSSL_ECDSA_SET_K */ +#endif /* WOLFSSL_ATECC508A && WOLFSSL_CRYPTOCELL*/ + +#endif /* HAVE_ECC_SIGN */ + +#ifdef WOLFSSL_CUSTOM_CURVES +void wc_ecc_free_curve(const ecc_set_type* curve, void* heap) +{ +#ifndef WOLFSSL_ECC_CURVE_STATIC + if (curve->prime != NULL) + XFREE((void*)curve->prime, heap, DYNAMIC_TYPE_ECC_BUFFER); + if (curve->Af != NULL) + XFREE((void*)curve->Af, heap, DYNAMIC_TYPE_ECC_BUFFER); + if (curve->Bf != NULL) + XFREE((void*)curve->Bf, heap, DYNAMIC_TYPE_ECC_BUFFER); + if (curve->order != NULL) + XFREE((void*)curve->order, heap, DYNAMIC_TYPE_ECC_BUFFER); + if (curve->Gx != NULL) + XFREE((void*)curve->Gx, heap, DYNAMIC_TYPE_ECC_BUFFER); + if (curve->Gy != NULL) + XFREE((void*)curve->Gy, heap, DYNAMIC_TYPE_ECC_BUFFER); +#endif + + XFREE((void*)curve, heap, DYNAMIC_TYPE_ECC_BUFFER); + + (void)heap; +} +#endif /* WOLFSSL_CUSTOM_CURVES */ + +/** + Free an ECC key from memory + key The key you wish to free +*/ +WOLFSSL_ABI +int wc_ecc_free(ecc_key* key) +{ + if (key == NULL) { + return 0; + } + +#ifdef WOLFSSL_ECDSA_SET_K + if (key->sign_k != NULL) { + mp_forcezero(key->sign_k); + mp_free(key->sign_k); + XFREE(key->sign_k, key->heap, DYNAMIC_TYPE_ECC); + } +#endif + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) + #ifdef WC_ASYNC_ENABLE_ECC + wolfAsync_DevCtxFree(&key->asyncDev, WOLFSSL_ASYNC_MARKER_ECC); + #endif + wc_ecc_free_async(key); +#endif + +#ifdef WOLFSSL_ATECC508A + atmel_ecc_free(key->slot); + key->slot = ATECC_INVALID_SLOT; +#endif /* WOLFSSL_ATECC508A */ + + mp_clear(key->pubkey.x); + mp_clear(key->pubkey.y); + mp_clear(key->pubkey.z); + + mp_forcezero(&key->k); + +#ifdef WOLFSSL_CUSTOM_CURVES + if (key->deallocSet && key->dp != NULL) + wc_ecc_free_curve(key->dp, key->heap); +#endif + + return 0; +} + +#if !defined(WOLFSSL_SP_MATH) && !defined(WOLFSSL_ATECC508A) && !defined(WOLFSSL_CRYPTOCELL) +#ifdef ECC_SHAMIR + +/** Computes kA*A + kB*B = C using Shamir's Trick + A First point to multiply + kA What to multiple A by + B Second point to multiply + kB What to multiple B by + C [out] Destination point (can overlap with A or B) + a ECC curve parameter a + modulus Modulus for curve + return MP_OKAY on success +*/ +#ifdef FP_ECC +static int normal_ecc_mul2add(ecc_point* A, mp_int* kA, + ecc_point* B, mp_int* kB, + ecc_point* C, mp_int* a, mp_int* modulus, + void* heap) +#else +int ecc_mul2add(ecc_point* A, mp_int* kA, + ecc_point* B, mp_int* kB, + ecc_point* C, mp_int* a, mp_int* modulus, + void* heap) +#endif +{ +#ifdef WOLFSSL_SMALL_STACK_CACHE + ecc_key key; +#endif +#ifdef WOLFSSL_SMALL_STACK + ecc_point** precomp = NULL; +#else + ecc_point* precomp[SHAMIR_PRECOMP_SZ]; +#endif + unsigned bitbufA, bitbufB, lenA, lenB, len, nA, nB, nibble; + unsigned char* tA; + unsigned char* tB; + int err = MP_OKAY, first, x, y; + mp_digit mp = 0; + + /* argchks */ + if (A == NULL || kA == NULL || B == NULL || kB == NULL || C == NULL || + modulus == NULL) { + return ECC_BAD_ARG_E; + } + + /* allocate memory */ + tA = (unsigned char*)XMALLOC(ECC_BUFSIZE, heap, DYNAMIC_TYPE_ECC_BUFFER); + if (tA == NULL) { + return GEN_MEM_ERR; + } + tB = (unsigned char*)XMALLOC(ECC_BUFSIZE, heap, DYNAMIC_TYPE_ECC_BUFFER); + if (tB == NULL) { + XFREE(tA, heap, DYNAMIC_TYPE_ECC_BUFFER); + return GEN_MEM_ERR; + } +#ifdef WOLFSSL_SMALL_STACK + precomp = (ecc_point**)XMALLOC(sizeof(ecc_point*) * SHAMIR_PRECOMP_SZ, heap, + DYNAMIC_TYPE_ECC_BUFFER); + if (precomp == NULL) { + XFREE(tB, heap, DYNAMIC_TYPE_ECC_BUFFER); + XFREE(tA, heap, DYNAMIC_TYPE_ECC_BUFFER); + return GEN_MEM_ERR; + } +#endif +#ifdef WOLFSSL_SMALL_STACK_CACHE + key.t1 = (mp_int*)XMALLOC(sizeof(mp_int), heap, DYNAMIC_TYPE_ECC); + key.t2 = (mp_int*)XMALLOC(sizeof(mp_int), heap, DYNAMIC_TYPE_ECC); +#ifdef ALT_ECC_SIZE + key.x = (mp_int*)XMALLOC(sizeof(mp_int), heap, DYNAMIC_TYPE_ECC); + key.y = (mp_int*)XMALLOC(sizeof(mp_int), heap, DYNAMIC_TYPE_ECC); + key.z = (mp_int*)XMALLOC(sizeof(mp_int), heap, DYNAMIC_TYPE_ECC); +#endif + if (key.t1 == NULL || key.t2 == NULL +#ifdef ALT_ECC_SIZE + || key.x == NULL || key.y == NULL || key.z == NULL +#endif + ) { +#ifdef ALT_ECC_SIZE + XFREE(key.z, heap, DYNAMIC_TYPE_ECC); + XFREE(key.y, heap, DYNAMIC_TYPE_ECC); + XFREE(key.x, heap, DYNAMIC_TYPE_ECC); +#endif + XFREE(key.t2, heap, DYNAMIC_TYPE_ECC); + XFREE(key.t1, heap, DYNAMIC_TYPE_ECC); + XFREE(precomp, heap, DYNAMIC_TYPE_ECC_BUFFER); + XFREE(tB, heap, DYNAMIC_TYPE_ECC_BUFFER); + XFREE(tA, heap, DYNAMIC_TYPE_ECC_BUFFER); + return MEMORY_E; + } + C->key = &key; +#endif /* WOLFSSL_SMALL_STACK_CACHE */ + + /* init variables */ + XMEMSET(tA, 0, ECC_BUFSIZE); + XMEMSET(tB, 0, ECC_BUFSIZE); +#ifndef WOLFSSL_SMALL_STACK + XMEMSET(precomp, 0, sizeof(precomp)); +#else + XMEMSET(precomp, 0, sizeof(ecc_point*) * SHAMIR_PRECOMP_SZ); +#endif + + /* get sizes */ + lenA = mp_unsigned_bin_size(kA); + lenB = mp_unsigned_bin_size(kB); + len = MAX(lenA, lenB); + + /* sanity check */ + if ((lenA > ECC_BUFSIZE) || (lenB > ECC_BUFSIZE)) { + err = BAD_FUNC_ARG; + } + + if (err == MP_OKAY) { + /* extract and justify kA */ + err = mp_to_unsigned_bin(kA, (len - lenA) + tA); + + /* extract and justify kB */ + if (err == MP_OKAY) + err = mp_to_unsigned_bin(kB, (len - lenB) + tB); + + /* allocate the table */ + if (err == MP_OKAY) { + for (x = 0; x < SHAMIR_PRECOMP_SZ; x++) { + precomp[x] = wc_ecc_new_point_h(heap); + if (precomp[x] == NULL) { + err = GEN_MEM_ERR; + break; + } + #ifdef WOLFSSL_SMALL_STACK_CACHE + precomp[x]->key = &key; + #endif + } + } + } + + if (err == MP_OKAY) + /* init montgomery reduction */ + err = mp_montgomery_setup(modulus, &mp); + + if (err == MP_OKAY) { + #ifdef WOLFSSL_SMALL_STACK + mp_int* mu; + #else + mp_int mu[1]; + #endif + #ifdef WOLFSSL_SMALL_STACK + mu = (mp_int*)XMALLOC(sizeof(mp_int), heap, DYNAMIC_TYPE_ECC); + if (mu == NULL) + err = MEMORY_E; + #endif + if (err == MP_OKAY) { + err = mp_init(mu); + } + if (err == MP_OKAY) { + err = mp_montgomery_calc_normalization(mu, modulus); + + if (err == MP_OKAY) + /* copy ones ... */ + err = mp_mulmod(A->x, mu, modulus, precomp[1]->x); + + if (err == MP_OKAY) + err = mp_mulmod(A->y, mu, modulus, precomp[1]->y); + if (err == MP_OKAY) + err = mp_mulmod(A->z, mu, modulus, precomp[1]->z); + + if (err == MP_OKAY) + err = mp_mulmod(B->x, mu, modulus, precomp[1<<2]->x); + if (err == MP_OKAY) + err = mp_mulmod(B->y, mu, modulus, precomp[1<<2]->y); + if (err == MP_OKAY) + err = mp_mulmod(B->z, mu, modulus, precomp[1<<2]->z); + + /* done with mu */ + mp_clear(mu); + } + #ifdef WOLFSSL_SMALL_STACK + if (mu != NULL) { + XFREE(mu, heap, DYNAMIC_TYPE_ECC); + } + #endif + } + + if (err == MP_OKAY) + /* precomp [i,0](A + B) table */ + err = ecc_projective_dbl_point(precomp[1], precomp[2], a, modulus, mp); + + if (err == MP_OKAY) + err = ecc_projective_add_point(precomp[1], precomp[2], precomp[3], + a, modulus, mp); + if (err == MP_OKAY) + /* precomp [0,i](A + B) table */ + err = ecc_projective_dbl_point(precomp[1<<2], precomp[2<<2], a, modulus, mp); + + if (err == MP_OKAY) + err = ecc_projective_add_point(precomp[1<<2], precomp[2<<2], precomp[3<<2], + a, modulus, mp); + + if (err == MP_OKAY) { + /* precomp [i,j](A + B) table (i != 0, j != 0) */ + for (x = 1; x < 4; x++) { + for (y = 1; y < 4; y++) { + if (err == MP_OKAY) { + err = ecc_projective_add_point(precomp[x], precomp[(y<<2)], + precomp[x+(y<<2)], a, modulus, mp); + } + } + } + } + + if (err == MP_OKAY) { + nibble = 3; + first = 1; + bitbufA = tA[0]; + bitbufB = tB[0]; + + /* for every byte of the multiplicands */ + for (x = 0;; ) { + /* grab a nibble */ + if (++nibble == 4) { + if (x == (int)len) break; + bitbufA = tA[x]; + bitbufB = tB[x]; + nibble = 0; + x++; + } + + /* extract two bits from both, shift/update */ + nA = (bitbufA >> 6) & 0x03; + nB = (bitbufB >> 6) & 0x03; + bitbufA = (bitbufA << 2) & 0xFF; + bitbufB = (bitbufB << 2) & 0xFF; + + /* if both zero, if first, continue */ + if ((nA == 0) && (nB == 0) && (first == 1)) { + continue; + } + + /* double twice, only if this isn't the first */ + if (first == 0) { + /* double twice */ + if (err == MP_OKAY) + err = ecc_projective_dbl_point(C, C, a, modulus, mp); + if (err == MP_OKAY) + err = ecc_projective_dbl_point(C, C, a, modulus, mp); + else + break; + } + + /* if not both zero */ + if ((nA != 0) || (nB != 0)) { + if (first == 1) { + /* if first, copy from table */ + first = 0; + if (err == MP_OKAY) + err = mp_copy(precomp[nA + (nB<<2)]->x, C->x); + + if (err == MP_OKAY) + err = mp_copy(precomp[nA + (nB<<2)]->y, C->y); + + if (err == MP_OKAY) + err = mp_copy(precomp[nA + (nB<<2)]->z, C->z); + else + break; + } else { + /* if not first, add from table */ + if (err == MP_OKAY) + err = ecc_projective_add_point(C, precomp[nA + (nB<<2)], C, + a, modulus, mp); + if (err != MP_OKAY) + break; + if (mp_iszero(C->z)) { + /* When all zero then should have done an add */ + if (mp_iszero(C->x) && mp_iszero(C->y)) { + err = ecc_projective_dbl_point(precomp[nA + (nB<<2)], C, + a, modulus, mp); + if (err != MP_OKAY) + break; + } + /* When only Z zero then result is infinity */ + else { + err = mp_set(C->x, 0); + if (err != MP_OKAY) + break; + err = mp_set(C->y, 0); + if (err != MP_OKAY) + break; + err = mp_set(C->z, 1); + if (err != MP_OKAY) + break; + first = 1; + } + } + } + } + } + } + + /* reduce to affine */ + if (err == MP_OKAY) + err = ecc_map(C, modulus, mp); + + /* clean up */ + for (x = 0; x < SHAMIR_PRECOMP_SZ; x++) { + wc_ecc_del_point_h(precomp[x], heap); + } + + ForceZero(tA, ECC_BUFSIZE); + ForceZero(tB, ECC_BUFSIZE); +#ifdef WOLFSSL_SMALL_STACK_CACHE +#ifdef ALT_ECC_SIZE + XFREE(key.z, heap, DYNAMIC_TYPE_ECC); + XFREE(key.y, heap, DYNAMIC_TYPE_ECC); + XFREE(key.x, heap, DYNAMIC_TYPE_ECC); +#endif + XFREE(key.t2, heap, DYNAMIC_TYPE_ECC); + XFREE(key.t1, heap, DYNAMIC_TYPE_ECC); + C->key = NULL; +#endif +#ifdef WOLFSSL_SMALL_STACK + XFREE(precomp, heap, DYNAMIC_TYPE_ECC_BUFFER); +#endif + XFREE(tB, heap, DYNAMIC_TYPE_ECC_BUFFER); + XFREE(tA, heap, DYNAMIC_TYPE_ECC_BUFFER); + + return err; +} + +#endif /* ECC_SHAMIR */ +#endif /* !WOLFSSL_SP_MATH && !WOLFSSL_ATECC508A && !WOLFSSL_CRYPTOCEL*/ + + +#ifdef HAVE_ECC_VERIFY +#ifndef NO_ASN +/* verify + * + * w = s^-1 mod n + * u1 = xw + * u2 = rw + * X = u1*G + u2*Q + * v = X_x1 mod n + * accept if v == r + */ + +/** + Verify an ECC signature + sig The signature to verify + siglen The length of the signature (octets) + hash The hash (message digest) that was signed + hashlen The length of the hash (octets) + res Result of signature, 1==valid, 0==invalid + key The corresponding public ECC key + return MP_OKAY if successful (even if the signature is not valid) + */ +int wc_ecc_verify_hash(const byte* sig, word32 siglen, const byte* hash, + word32 hashlen, int* res, ecc_key* key) +{ + int err; + mp_int *r = NULL, *s = NULL; +#if (!defined(WOLFSSL_ASYNC_CRYPT) || !defined(WC_ASYNC_ENABLE_ECC)) && \ + !defined(WOLFSSL_SMALL_STACK) + mp_int r_lcl, s_lcl; +#endif + + if (sig == NULL || hash == NULL || res == NULL || key == NULL) { + return ECC_BAD_ARG_E; + } + +#ifdef WOLF_CRYPTO_CB + if (key->devId != INVALID_DEVID) { + err = wc_CryptoCb_EccVerify(sig, siglen, hash, hashlen, res, key); + if (err != CRYPTOCB_UNAVAILABLE) + return err; + /* fall-through when unavailable */ + } +#endif + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) + err = wc_ecc_alloc_async(key); + if (err != 0) + return err; + r = key->r; + s = key->s; +#else + #ifndef WOLFSSL_SMALL_STACK + r = &r_lcl; + s = &s_lcl; + #else + r = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_ECC); + if (r == NULL) + return MEMORY_E; + s = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_ECC); + if (s == NULL) { + XFREE(r, key->heap, DYNAMIC_TYPE_ECC); + return MEMORY_E; + } + #endif + XMEMSET(r, 0, sizeof(mp_int)); + XMEMSET(s, 0, sizeof(mp_int)); +#endif /* WOLFSSL_ASYNC_CRYPT */ + + switch (key->state) { + case ECC_STATE_NONE: + case ECC_STATE_VERIFY_DECODE: + key->state = ECC_STATE_VERIFY_DECODE; + + /* default to invalid signature */ + *res = 0; + + /* Note, DecodeECC_DSA_Sig() calls mp_init() on r and s. + * If either of those don't allocate correctly, none of + * the rest of this function will execute, and everything + * gets cleaned up at the end. */ + /* decode DSA header */ + err = DecodeECC_DSA_Sig(sig, siglen, r, s); + if (err < 0) { + break; + } + FALL_THROUGH; + + case ECC_STATE_VERIFY_DO: + key->state = ECC_STATE_VERIFY_DO; + + err = wc_ecc_verify_hash_ex(r, s, hash, hashlen, res, key); + + #ifndef WOLFSSL_ASYNC_CRYPT + /* done with R/S */ + mp_clear(r); + mp_clear(s); + #ifdef WOLFSSL_SMALL_STACK + XFREE(s, key->heap, DYNAMIC_TYPE_ECC); + XFREE(r, key->heap, DYNAMIC_TYPE_ECC); + r = NULL; + s = NULL; + #endif + #endif + + if (err < 0) { + break; + } + FALL_THROUGH; + + case ECC_STATE_VERIFY_RES: + key->state = ECC_STATE_VERIFY_RES; + err = 0; + break; + + default: + err = BAD_STATE_E; + } + + /* if async pending then return and skip done cleanup below */ + if (err == WC_PENDING_E) { + key->state++; + return err; + } + + /* cleanup */ +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) + wc_ecc_free_async(key); +#elif defined(WOLFSSL_SMALL_STACK) + XFREE(s, key->heap, DYNAMIC_TYPE_ECC); + XFREE(r, key->heap, DYNAMIC_TYPE_ECC); + r = NULL; + s = NULL; +#endif + + key->state = ECC_STATE_NONE; + + return err; +} +#endif /* !NO_ASN */ + + +/** + Verify an ECC signature + r The signature R component to verify + s The signature S component to verify + hash The hash (message digest) that was signed + hashlen The length of the hash (octets) + res Result of signature, 1==valid, 0==invalid + key The corresponding public ECC key + return MP_OKAY if successful (even if the signature is not valid) +*/ + +int wc_ecc_verify_hash_ex(mp_int *r, mp_int *s, const byte* hash, + word32 hashlen, int* res, ecc_key* key) +#if defined(WOLFSSL_STM32_PKA) +{ + return stm32_ecc_verify_hash_ex(r, s, hash, hashlen, res, key); +} +#else +{ + int err; + word32 keySz; +#ifdef WOLFSSL_ATECC508A + byte sigRS[ATECC_KEY_SIZE*2]; +#elif defined(WOLFSSL_CRYPTOCELL) + byte sigRS[ECC_MAX_CRYPTO_HW_SIZE*2]; + CRYS_ECDSA_VerifyUserContext_t sigCtxTemp; + word32 msgLenInBytes = hashlen; + CRYS_ECPKI_HASH_OpMode_t hash_mode; +#elif !defined(WOLFSSL_SP_MATH) || defined(FREESCALE_LTC_ECC) + int did_init = 0; + ecc_point *mG = NULL, *mQ = NULL; + #ifdef WOLFSSL_SMALL_STACK + mp_int* v = NULL; + mp_int* w = NULL; + mp_int* u1 = NULL; + mp_int* u2 = NULL; + #if !defined(WOLFSSL_ASYNC_CRYPT) || !defined(HAVE_CAVIUM_V) + mp_int* e_lcl = NULL; + #endif + #else /* WOLFSSL_SMALL_STACK */ + mp_int v[1]; + mp_int w[1]; + mp_int u1[1]; + mp_int u2[1]; + #if !defined(WOLFSSL_ASYNC_CRYPT) || !defined(HAVE_CAVIUM_V) + mp_int e_lcl[1]; + #endif + #endif /* WOLFSSL_SMALL_STACK */ + mp_int* e; + DECLARE_CURVE_SPECS(curve, ECC_CURVE_FIELD_COUNT); +#endif + + if (r == NULL || s == NULL || hash == NULL || res == NULL || key == NULL) + return ECC_BAD_ARG_E; + + /* default to invalid signature */ + *res = 0; + + /* is the IDX valid ? */ + if (wc_ecc_is_valid_idx(key->idx) != 1) { + return ECC_BAD_ARG_E; + } + + keySz = key->dp->size; + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) && \ + defined(WOLFSSL_ASYNC_CRYPT_TEST) + if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) { + if (wc_AsyncTestInit(&key->asyncDev, ASYNC_TEST_ECC_VERIFY)) { + WC_ASYNC_TEST* testDev = &key->asyncDev.test; + testDev->eccVerify.r = r; + testDev->eccVerify.s = s; + testDev->eccVerify.hash = hash; + testDev->eccVerify.hashlen = hashlen; + testDev->eccVerify.stat = res; + testDev->eccVerify.key = key; + return WC_PENDING_E; + } + } +#endif + +#ifdef WOLFSSL_ATECC508A + /* Extract R and S */ + err = mp_to_unsigned_bin(r, &sigRS[0]); + if (err != MP_OKAY) { + return err; + } + err = mp_to_unsigned_bin(s, &sigRS[keySz]); + if (err != MP_OKAY) { + return err; + } + + err = atmel_ecc_verify(hash, sigRS, key->pubkey_raw, res); + if (err != 0) { + return err; + } + (void)hashlen; +#elif defined(WOLFSSL_CRYPTOCELL) + + /* Extract R and S */ + + err = mp_to_unsigned_bin(r, &sigRS[0]); + if (err != MP_OKAY) { + return err; + } + err = mp_to_unsigned_bin(s, &sigRS[keySz]); + if (err != MP_OKAY) { + return err; + } + + hash_mode = cc310_hashModeECC(msgLenInBytes); + if (hash_mode == CRYS_ECPKI_HASH_OpModeLast) { + /* hash_mode = */ cc310_hashModeECC(keySz); + hash_mode = CRYS_ECPKI_HASH_SHA256_mode; + } + /* truncate if hash is longer than key size */ + if (msgLenInBytes > keySz) { + msgLenInBytes = keySz; + } + + /* verify the signature using the public key */ + err = CRYS_ECDSA_Verify(&sigCtxTemp, + &key->ctx.pubKey, + hash_mode, + &sigRS[0], + keySz*2, + (byte*)hash, + msgLenInBytes); + + if (err != SA_SILIB_RET_OK) { + WOLFSSL_MSG("CRYS_ECDSA_Verify failed"); + return err; + } + /* valid signature if we get to this point */ + *res = 1; +#else + /* checking if private key with no public part */ + if (key->type == ECC_PRIVATEKEY_ONLY) { + WOLFSSL_MSG("Verify called with private key, generating public part"); + err = wc_ecc_make_pub_ex(key, NULL, NULL); + if (err != MP_OKAY) { + WOLFSSL_MSG("Unable to extract public key"); + return err; + } + } + +#if defined(WOLFSSL_DSP) && !defined(FREESCALE_LTC_ECC) + if (key->handle != -1) { + return sp_dsp_ecc_verify_256(key->handle, hash, hashlen, key->pubkey.x, key->pubkey.y, + key->pubkey.z, r, s, res, key->heap); + } + if (wolfSSL_GetHandleCbSet() == 1) { + return sp_dsp_ecc_verify_256(0, hash, hashlen, key->pubkey.x, key->pubkey.y, + key->pubkey.z, r, s, res, key->heap); + } +#endif +#if defined(WOLFSSL_SP_MATH) && !defined(FREESCALE_LTC_ECC) +#ifndef WOLFSSL_SP_NO_256 + if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP256R1) { + return sp_ecc_verify_256(hash, hashlen, key->pubkey.x, key->pubkey.y, + key->pubkey.z, r, s, res, key->heap); + } +#endif +#ifdef WOLFSSL_SP_384 + if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP384R1) { + return sp_ecc_verify_384(hash, hashlen, key->pubkey.x, key->pubkey.y, + key->pubkey.z, r, s, res, key->heap); + } +#endif + return WC_KEY_SIZE_E; +#else +#if defined WOLFSSL_HAVE_SP_ECC && !defined(FREESCALE_LTC_ECC) + #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) + if (key->asyncDev.marker != WOLFSSL_ASYNC_MARKER_ECC) + #endif + { +#ifndef WOLFSSL_SP_NO_256 + if (key->idx != ECC_CUSTOM_IDX && + ecc_sets[key->idx].id == ECC_SECP256R1) { + return sp_ecc_verify_256(hash, hashlen, key->pubkey.x, + key->pubkey.y, key->pubkey.z,r, s, res, + key->heap); + } +#endif /* WOLFSSL_SP_NO_256 */ +#ifdef WOLFSSL_SP_384 + if (key->idx != ECC_CUSTOM_IDX && + ecc_sets[key->idx].id == ECC_SECP384R1) { + return sp_ecc_verify_384(hash, hashlen, key->pubkey.x, + key->pubkey.y, key->pubkey.z,r, s, res, + key->heap); + } +#endif /* WOLFSSL_SP_384 */ + } +#endif /* WOLFSSL_HAVE_SP_ECC */ + + ALLOC_CURVE_SPECS(ECC_CURVE_FIELD_COUNT); + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM_V) + err = wc_ecc_alloc_mpint(key, &key->e); + if (err != 0) { + FREE_CURVE_SPECS(); + return err; + } + e = key->e; +#else +#ifdef WOLFSSL_SMALL_STACK + e_lcl = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_ECC); + if (e_lcl == NULL) { + FREE_CURVE_SPECS(); + return MEMORY_E; + } +#endif + e = e_lcl; +#endif /* WOLFSSL_ASYNC_CRYPT && HAVE_CAVIUM_V */ + + err = mp_init(e); + if (err != MP_OKAY) + return MEMORY_E; + + /* read in the specs for this curve */ + err = wc_ecc_curve_load(key->dp, &curve, ECC_CURVE_FIELD_ALL); + + /* check for zero */ + if (err == MP_OKAY) { + if (mp_iszero(r) == MP_YES || mp_iszero(s) == MP_YES || + mp_cmp(r, curve->order) != MP_LT || + mp_cmp(s, curve->order) != MP_LT) { + err = MP_ZERO_E; + } + } + + /* read hash */ + if (err == MP_OKAY) { + /* we may need to truncate if hash is longer than key size */ + unsigned int orderBits = mp_count_bits(curve->order); + + /* truncate down to byte size, may be all that's needed */ + if ( (WOLFSSL_BIT_SIZE * hashlen) > orderBits) + hashlen = (orderBits + WOLFSSL_BIT_SIZE - 1) / WOLFSSL_BIT_SIZE; + err = mp_read_unsigned_bin(e, hash, hashlen); + + /* may still need bit truncation too */ + if (err == MP_OKAY && (WOLFSSL_BIT_SIZE * hashlen) > orderBits) + mp_rshb(e, WOLFSSL_BIT_SIZE - (orderBits & 0x7)); + } + + /* check for async hardware acceleration */ +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) + if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) { + #if defined(HAVE_CAVIUM_V) || defined(HAVE_INTEL_QA) + #ifdef HAVE_CAVIUM_V + if (NitroxEccIsCurveSupported(key)) + #endif + { + err = wc_mp_to_bigint_sz(e, &e->raw, keySz); + if (err == MP_OKAY) + err = wc_mp_to_bigint_sz(key->pubkey.x, &key->pubkey.x->raw, keySz); + if (err == MP_OKAY) + err = wc_mp_to_bigint_sz(key->pubkey.y, &key->pubkey.y->raw, keySz); + if (err == MP_OKAY) + #ifdef HAVE_CAVIUM_V + err = NitroxEcdsaVerify(key, &e->raw, &key->pubkey.x->raw, + &key->pubkey.y->raw, &r->raw, &s->raw, + &curve->prime->raw, &curve->order->raw, res); + #else + err = IntelQaEcdsaVerify(&key->asyncDev, &e->raw, &key->pubkey.x->raw, + &key->pubkey.y->raw, &r->raw, &s->raw, &curve->Af->raw, + &curve->Bf->raw, &curve->prime->raw, &curve->order->raw, + &curve->Gx->raw, &curve->Gy->raw, res); + #endif + + #ifndef HAVE_CAVIUM_V + mp_clear(e); + #endif + wc_ecc_curve_free(curve); + FREE_CURVE_SPECS(); + + return err; + } + #endif /* HAVE_CAVIUM_V || HAVE_INTEL_QA */ + } +#endif /* WOLFSSL_ASYNC_CRYPT && WC_ASYNC_ENABLE_ECC */ + +#ifdef WOLFSSL_SMALL_STACK + if (err == MP_OKAY) { + v = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_ECC); + if (v == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + w = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_ECC); + if (w == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + u1 = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_ECC); + if (u1 == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + u2 = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_ECC); + if (u2 == NULL) + err = MEMORY_E; + } +#endif + + /* allocate ints */ + if (err == MP_OKAY) { + if ((err = mp_init_multi(v, w, u1, u2, NULL, NULL)) != MP_OKAY) { + err = MEMORY_E; + } + did_init = 1; + } + + /* allocate points */ + if (err == MP_OKAY) { + mG = wc_ecc_new_point_h(key->heap); + mQ = wc_ecc_new_point_h(key->heap); + if (mQ == NULL || mG == NULL) + err = MEMORY_E; + } + + /* w = s^-1 mod n */ + if (err == MP_OKAY) + err = mp_invmod(s, curve->order, w); + + /* u1 = ew */ + if (err == MP_OKAY) + err = mp_mulmod(e, w, curve->order, u1); + + /* u2 = rw */ + if (err == MP_OKAY) + err = mp_mulmod(r, w, curve->order, u2); + + /* find mG and mQ */ + if (err == MP_OKAY) + err = mp_copy(curve->Gx, mG->x); + if (err == MP_OKAY) + err = mp_copy(curve->Gy, mG->y); + if (err == MP_OKAY) + err = mp_set(mG->z, 1); + + if (err == MP_OKAY) + err = mp_copy(key->pubkey.x, mQ->x); + if (err == MP_OKAY) + err = mp_copy(key->pubkey.y, mQ->y); + if (err == MP_OKAY) + err = mp_copy(key->pubkey.z, mQ->z); + +#if defined(FREESCALE_LTC_ECC) + /* use PKHA to compute u1*mG + u2*mQ */ + if (err == MP_OKAY) + err = wc_ecc_mulmod_ex(u1, mG, mG, curve->Af, curve->prime, 0, key->heap); + if (err == MP_OKAY) + err = wc_ecc_mulmod_ex(u2, mQ, mQ, curve->Af, curve->prime, 0, key->heap); + if (err == MP_OKAY) + err = wc_ecc_point_add(mG, mQ, mG, curve->prime); +#else +#ifndef ECC_SHAMIR + if (err == MP_OKAY) + { + mp_digit mp = 0; + + if (!mp_iszero(u1)) { + /* compute u1*mG + u2*mQ = mG */ + err = wc_ecc_mulmod_ex(u1, mG, mG, curve->Af, curve->prime, 0, + key->heap); + if (err == MP_OKAY) { + err = wc_ecc_mulmod_ex(u2, mQ, mQ, curve->Af, curve->prime, 0, + key->heap); + } + + /* find the montgomery mp */ + if (err == MP_OKAY) + err = mp_montgomery_setup(curve->prime, &mp); + + /* add them */ + if (err == MP_OKAY) + err = ecc_projective_add_point(mQ, mG, mG, curve->Af, + curve->prime, mp); + if (err == MP_OKAY && mp_iszero(mG->z)) { + /* When all zero then should have done an add */ + if (mp_iszero(mG->x) && mp_iszero(mG->y)) { + err = ecc_projective_dbl_point(mQ, mG, curve->Af, + curve->prime, mp); + } + /* When only Z zero then result is infinity */ + else { + err = mp_set(mG->x, 0); + if (err == MP_OKAY) + err = mp_set(mG->y, 0); + if (err == MP_OKAY) + err = mp_set(mG->z, 1); + } + } + } + else { + /* compute 0*mG + u2*mQ = mG */ + err = wc_ecc_mulmod_ex(u2, mQ, mG, curve->Af, curve->prime, 0, + key->heap); + /* find the montgomery mp */ + if (err == MP_OKAY) + err = mp_montgomery_setup(curve->prime, &mp); + } + + /* reduce */ + if (err == MP_OKAY) + err = ecc_map(mG, curve->prime, mp); + } +#else + /* use Shamir's trick to compute u1*mG + u2*mQ using half the doubles */ + if (err == MP_OKAY) { + err = ecc_mul2add(mG, u1, mQ, u2, mG, curve->Af, curve->prime, + key->heap); + } +#endif /* ECC_SHAMIR */ +#endif /* FREESCALE_LTC_ECC */ + /* v = X_x1 mod n */ + if (err == MP_OKAY) + err = mp_mod(mG->x, curve->order, v); + + /* does v == r */ + if (err == MP_OKAY) { + if (mp_cmp(v, r) == MP_EQ) + *res = 1; + } + + /* cleanup */ + wc_ecc_del_point_h(mG, key->heap); + wc_ecc_del_point_h(mQ, key->heap); + + mp_clear(e); + if (did_init) { + mp_clear(v); + mp_clear(w); + mp_clear(u1); + mp_clear(u2); + } +#ifdef WOLFSSL_SMALL_STACK + XFREE(u2, key->heap, DYNAMIC_TYPE_ECC); + XFREE(u1, key->heap, DYNAMIC_TYPE_ECC); + XFREE(w, key->heap, DYNAMIC_TYPE_ECC); + XFREE(v, key->heap, DYNAMIC_TYPE_ECC); +#if !defined(WOLFSSL_ASYNC_CRYPT) || !defined(HAVE_CAVIUM_V) + XFREE(e_lcl, key->heap, DYNAMIC_TYPE_ECC); +#endif +#endif + + wc_ecc_curve_free(curve); + FREE_CURVE_SPECS(); + +#endif /* WOLFSSL_SP_MATH */ +#endif /* WOLFSSL_ATECC508A */ + + (void)keySz; + (void)hashlen; + + return err; +} +#endif /* WOLFSSL_STM32_PKA */ +#endif /* HAVE_ECC_VERIFY */ + +#ifdef HAVE_ECC_KEY_IMPORT +/* import point from der + * if shortKeySize != 0 then keysize is always (inLen-1)>>1 */ +int wc_ecc_import_point_der_ex(byte* in, word32 inLen, const int curve_idx, + ecc_point* point, int shortKeySize) +{ + int err = 0; +#ifdef HAVE_COMP_KEY + int compressed = 0; +#endif + int keysize; + byte pointType; + +#ifndef HAVE_COMP_KEY + (void)shortKeySize; +#endif + + if (in == NULL || point == NULL || (curve_idx < 0) || + (wc_ecc_is_valid_idx(curve_idx) == 0)) + return ECC_BAD_ARG_E; + + /* must be odd */ + if ((inLen & 1) == 0) { + return ECC_BAD_ARG_E; + } + + /* init point */ +#ifdef ALT_ECC_SIZE + point->x = (mp_int*)&point->xyz[0]; + point->y = (mp_int*)&point->xyz[1]; + point->z = (mp_int*)&point->xyz[2]; + alt_fp_init(point->x); + alt_fp_init(point->y); + alt_fp_init(point->z); +#else + err = mp_init_multi(point->x, point->y, point->z, NULL, NULL, NULL); +#endif + if (err != MP_OKAY) + return MEMORY_E; + + /* check for point type (4, 2, or 3) */ + pointType = in[0]; + if (pointType != ECC_POINT_UNCOMP && pointType != ECC_POINT_COMP_EVEN && + pointType != ECC_POINT_COMP_ODD) { + err = ASN_PARSE_E; + } + + if (pointType == ECC_POINT_COMP_EVEN || pointType == ECC_POINT_COMP_ODD) { +#ifdef HAVE_COMP_KEY + compressed = 1; +#else + err = NOT_COMPILED_IN; +#endif + } + + /* adjust to skip first byte */ + inLen -= 1; + in += 1; + + /* calculate key size based on inLen / 2 if uncompressed or shortKeySize + * is true */ +#ifdef HAVE_COMP_KEY + keysize = compressed && !shortKeySize ? inLen : inLen>>1; +#else + keysize = inLen>>1; +#endif + + /* read data */ + if (err == MP_OKAY) + err = mp_read_unsigned_bin(point->x, (byte*)in, keysize); + +#ifdef HAVE_COMP_KEY + if (err == MP_OKAY && compressed == 1) { /* build y */ +#ifndef WOLFSSL_SP_MATH + int did_init = 0; + mp_int t1, t2; + DECLARE_CURVE_SPECS(curve, 3); + + ALLOC_CURVE_SPECS(3); + + if (mp_init_multi(&t1, &t2, NULL, NULL, NULL, NULL) != MP_OKAY) + err = MEMORY_E; + else + did_init = 1; + + /* load curve info */ + if (err == MP_OKAY) + err = wc_ecc_curve_load(&ecc_sets[curve_idx], &curve, + (ECC_CURVE_FIELD_PRIME | ECC_CURVE_FIELD_AF | + ECC_CURVE_FIELD_BF)); + + /* compute x^3 */ + if (err == MP_OKAY) + err = mp_sqr(point->x, &t1); + if (err == MP_OKAY) + err = mp_mulmod(&t1, point->x, curve->prime, &t1); + + /* compute x^3 + a*x */ + if (err == MP_OKAY) + err = mp_mulmod(curve->Af, point->x, curve->prime, &t2); + if (err == MP_OKAY) + err = mp_add(&t1, &t2, &t1); + + /* compute x^3 + a*x + b */ + if (err == MP_OKAY) + err = mp_add(&t1, curve->Bf, &t1); + + /* compute sqrt(x^3 + a*x + b) */ + if (err == MP_OKAY) + err = mp_sqrtmod_prime(&t1, curve->prime, &t2); + + /* adjust y */ + if (err == MP_OKAY) { + if ((mp_isodd(&t2) == MP_YES && pointType == ECC_POINT_COMP_ODD) || + (mp_isodd(&t2) == MP_NO && pointType == ECC_POINT_COMP_EVEN)) { + err = mp_mod(&t2, curve->prime, point->y); + } + else { + err = mp_submod(curve->prime, &t2, curve->prime, point->y); + } + } + + if (did_init) { + mp_clear(&t2); + mp_clear(&t1); + } + + wc_ecc_curve_free(curve); + FREE_CURVE_SPECS(); +#else + #ifndef WOLFSSL_SP_NO_256 + if (curve_idx != ECC_CUSTOM_IDX && + ecc_sets[curve_idx].id == ECC_SECP256R1) { + sp_ecc_uncompress_256(point->x, pointType, point->y); + } + else + #endif + #ifdef WOLFSSL_SP_384 + if (curve_idx != ECC_CUSTOM_IDX && + ecc_sets[curve_idx].id == ECC_SECP384R1) { + sp_ecc_uncompress_384(point->x, pointType, point->y); + } + else + #endif + { + err = WC_KEY_SIZE_E; + } +#endif + } +#endif + + if (err == MP_OKAY) { +#ifdef HAVE_COMP_KEY + if (compressed == 0) +#endif + err = mp_read_unsigned_bin(point->y, (byte*)in + keysize, keysize); + } + if (err == MP_OKAY) + err = mp_set(point->z, 1); + + if (err != MP_OKAY) { + mp_clear(point->x); + mp_clear(point->y); + mp_clear(point->z); + } + + return err; +} + +/* function for backwards compatiblity with previous implementations */ +int wc_ecc_import_point_der(byte* in, word32 inLen, const int curve_idx, + ecc_point* point) +{ + return wc_ecc_import_point_der_ex(in, inLen, curve_idx, point, 1); +} +#endif /* HAVE_ECC_KEY_IMPORT */ + +#ifdef HAVE_ECC_KEY_EXPORT +/* export point to der */ + +int wc_ecc_export_point_der_ex(const int curve_idx, ecc_point* point, byte* out, + word32* outLen, int compressed) +{ + if (compressed == 0) + return wc_ecc_export_point_der(curve_idx, point, out, outLen); +#ifdef HAVE_COMP_KEY + else + return wc_ecc_export_point_der_compressed(curve_idx, point, out, outLen); +#else + return NOT_COMPILED_IN; +#endif +} + +int wc_ecc_export_point_der(const int curve_idx, ecc_point* point, byte* out, + word32* outLen) +{ + int ret = MP_OKAY; + word32 numlen; +#ifdef WOLFSSL_SMALL_STACK + byte* buf; +#else + byte buf[ECC_BUFSIZE]; +#endif + + if ((curve_idx < 0) || (wc_ecc_is_valid_idx(curve_idx) == 0)) + return ECC_BAD_ARG_E; + + numlen = ecc_sets[curve_idx].size; + + /* return length needed only */ + if (point != NULL && out == NULL && outLen != NULL) { + *outLen = 1 + 2*numlen; + return LENGTH_ONLY_E; + } + + if (point == NULL || out == NULL || outLen == NULL) + return ECC_BAD_ARG_E; + + if (*outLen < (1 + 2*numlen)) { + *outLen = 1 + 2*numlen; + return BUFFER_E; + } + + /* store byte point type */ + out[0] = ECC_POINT_UNCOMP; + +#ifdef WOLFSSL_SMALL_STACK + buf = (byte*)XMALLOC(ECC_BUFSIZE, NULL, DYNAMIC_TYPE_ECC_BUFFER); + if (buf == NULL) + return MEMORY_E; +#endif + + /* pad and store x */ + XMEMSET(buf, 0, ECC_BUFSIZE); + ret = mp_to_unsigned_bin(point->x, buf + + (numlen - mp_unsigned_bin_size(point->x))); + if (ret != MP_OKAY) + goto done; + XMEMCPY(out+1, buf, numlen); + + /* pad and store y */ + XMEMSET(buf, 0, ECC_BUFSIZE); + ret = mp_to_unsigned_bin(point->y, buf + + (numlen - mp_unsigned_bin_size(point->y))); + if (ret != MP_OKAY) + goto done; + XMEMCPY(out+1+numlen, buf, numlen); + + *outLen = 1 + 2*numlen; + +done: +#ifdef WOLFSSL_SMALL_STACK + XFREE(buf, NULL, DYNAMIC_TYPE_ECC_BUFFER); +#endif + + return ret; +} + + +/* export point to der */ +#ifdef HAVE_COMP_KEY +int wc_ecc_export_point_der_compressed(const int curve_idx, ecc_point* point, + byte* out, word32* outLen) +{ + int ret = MP_OKAY; + word32 numlen; + word32 output_len; +#ifdef WOLFSSL_SMALL_STACK + byte* buf; +#else + byte buf[ECC_BUFSIZE]; +#endif + + if ((curve_idx < 0) || (wc_ecc_is_valid_idx(curve_idx) == 0)) + return ECC_BAD_ARG_E; + + numlen = ecc_sets[curve_idx].size; + output_len = 1 + numlen; /* y point type + x */ + + /* return length needed only */ + if (point != NULL && out == NULL && outLen != NULL) { + *outLen = output_len; + return LENGTH_ONLY_E; + } + + if (point == NULL || out == NULL || outLen == NULL) + return ECC_BAD_ARG_E; + + + if (*outLen < output_len) { + *outLen = output_len; + return BUFFER_E; + } + + /* store byte point type */ + out[0] = mp_isodd(point->y) == MP_YES ? ECC_POINT_COMP_ODD : + ECC_POINT_COMP_EVEN; + +#ifdef WOLFSSL_SMALL_STACK + buf = (byte*)XMALLOC(ECC_BUFSIZE, NULL, DYNAMIC_TYPE_ECC_BUFFER); + if (buf == NULL) + return MEMORY_E; +#endif + + /* pad and store x */ + XMEMSET(buf, 0, ECC_BUFSIZE); + ret = mp_to_unsigned_bin(point->x, buf + + (numlen - mp_unsigned_bin_size(point->x))); + if (ret != MP_OKAY) + goto done; + XMEMCPY(out+1, buf, numlen); + + *outLen = output_len; + +done: +#ifdef WOLFSSL_SMALL_STACK + XFREE(buf, NULL, DYNAMIC_TYPE_ECC_BUFFER); +#endif + + return ret; +} +#endif /* HAVE_COMP_KEY */ + +/* export public ECC key in ANSI X9.63 format */ +int wc_ecc_export_x963(ecc_key* key, byte* out, word32* outLen) +{ + int ret = MP_OKAY; + word32 numlen; +#ifdef WOLFSSL_SMALL_STACK + byte* buf; +#else + byte buf[ECC_BUFSIZE]; +#endif + word32 pubxlen, pubylen; + + /* return length needed only */ + if (key != NULL && out == NULL && outLen != NULL) { + /* if key hasn't been setup assume max bytes for size estimation */ + numlen = key->dp ? key->dp->size : MAX_ECC_BYTES; + *outLen = 1 + 2*numlen; + return LENGTH_ONLY_E; + } + + if (key == NULL || out == NULL || outLen == NULL) + return ECC_BAD_ARG_E; + + if (key->type == ECC_PRIVATEKEY_ONLY) + return ECC_PRIVATEONLY_E; + + if (wc_ecc_is_valid_idx(key->idx) == 0 || key->dp == NULL) { + return ECC_BAD_ARG_E; + } + numlen = key->dp->size; + + /* verify room in out buffer */ + if (*outLen < (1 + 2*numlen)) { + *outLen = 1 + 2*numlen; + return BUFFER_E; + } + + /* verify public key length is less than key size */ + pubxlen = mp_unsigned_bin_size(key->pubkey.x); + pubylen = mp_unsigned_bin_size(key->pubkey.y); + if ((pubxlen > numlen) || (pubylen > numlen)) { + WOLFSSL_MSG("Public key x/y invalid!"); + return BUFFER_E; + } + + /* store byte point type */ + out[0] = ECC_POINT_UNCOMP; + +#ifdef WOLFSSL_SMALL_STACK + buf = (byte*)XMALLOC(ECC_BUFSIZE, NULL, DYNAMIC_TYPE_ECC_BUFFER); + if (buf == NULL) + return MEMORY_E; +#endif + + /* pad and store x */ + XMEMSET(buf, 0, ECC_BUFSIZE); + ret = mp_to_unsigned_bin(key->pubkey.x, buf + (numlen - pubxlen)); + if (ret != MP_OKAY) + goto done; + XMEMCPY(out+1, buf, numlen); + + /* pad and store y */ + XMEMSET(buf, 0, ECC_BUFSIZE); + ret = mp_to_unsigned_bin(key->pubkey.y, buf + (numlen - pubylen)); + if (ret != MP_OKAY) + goto done; + XMEMCPY(out+1+numlen, buf, numlen); + + *outLen = 1 + 2*numlen; + +done: +#ifdef WOLFSSL_SMALL_STACK + XFREE(buf, NULL, DYNAMIC_TYPE_ECC_BUFFER); +#endif + + return ret; +} + + +/* export public ECC key in ANSI X9.63 format, extended with + * compression option */ +int wc_ecc_export_x963_ex(ecc_key* key, byte* out, word32* outLen, + int compressed) +{ + if (compressed == 0) + return wc_ecc_export_x963(key, out, outLen); +#ifdef HAVE_COMP_KEY + else + return wc_ecc_export_x963_compressed(key, out, outLen); +#else + return NOT_COMPILED_IN; +#endif +} +#endif /* HAVE_ECC_KEY_EXPORT */ + + +#if !defined(WOLFSSL_ATECC508A) && !defined(WOLFSSL_CRYPTOCELL) + +/* is ecc point on curve described by dp ? */ +int wc_ecc_is_point(ecc_point* ecp, mp_int* a, mp_int* b, mp_int* prime) +{ +#ifndef WOLFSSL_SP_MATH + int err; +#ifdef WOLFSSL_SMALL_STACK + mp_int* t1; + mp_int* t2; +#else + mp_int t1[1], t2[1]; +#endif + +#ifdef WOLFSSL_SMALL_STACK + t1 = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC); + if (t1 == NULL) + return MEMORY_E; + t2 = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC); + if (t2 == NULL) { + XFREE(t1, NULL, DYNAMIC_TYPE_ECC); + return MEMORY_E; + } +#endif + + if ((err = mp_init_multi(t1, t2, NULL, NULL, NULL, NULL)) != MP_OKAY) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(t2, NULL, DYNAMIC_TYPE_ECC); + XFREE(t1, NULL, DYNAMIC_TYPE_ECC); + #endif + return err; + } + + /* compute y^2 */ + if (err == MP_OKAY) + err = mp_sqr(ecp->y, t1); + + /* compute x^3 */ + if (err == MP_OKAY) + err = mp_sqr(ecp->x, t2); + if (err == MP_OKAY) + err = mp_mod(t2, prime, t2); + if (err == MP_OKAY) + err = mp_mul(ecp->x, t2, t2); + + /* compute y^2 - x^3 */ + if (err == MP_OKAY) + err = mp_sub(t1, t2, t1); + + /* Determine if curve "a" should be used in calc */ +#ifdef WOLFSSL_CUSTOM_CURVES + if (err == MP_OKAY) { + /* Use a and prime to determine if a == 3 */ + err = mp_set(t2, 0); + if (err == MP_OKAY) + err = mp_submod(prime, a, prime, t2); + } + if (err == MP_OKAY && mp_cmp_d(t2, 3) != MP_EQ) { + /* compute y^2 - x^3 + a*x */ + if (err == MP_OKAY) + err = mp_mulmod(t2, ecp->x, prime, t2); + if (err == MP_OKAY) + err = mp_addmod(t1, t2, prime, t1); + } + else +#endif /* WOLFSSL_CUSTOM_CURVES */ + { + /* assumes "a" == 3 */ + (void)a; + + /* compute y^2 - x^3 + 3x */ + if (err == MP_OKAY) + err = mp_add(t1, ecp->x, t1); + if (err == MP_OKAY) + err = mp_add(t1, ecp->x, t1); + if (err == MP_OKAY) + err = mp_add(t1, ecp->x, t1); + if (err == MP_OKAY) + err = mp_mod(t1, prime, t1); + } + + /* adjust range (0, prime) */ + while (err == MP_OKAY && mp_isneg(t1)) { + err = mp_add(t1, prime, t1); + } + while (err == MP_OKAY && mp_cmp(t1, prime) != MP_LT) { + err = mp_sub(t1, prime, t1); + } + + /* compare to b */ + if (err == MP_OKAY) { + if (mp_cmp(t1, b) != MP_EQ) { + err = MP_VAL; + } else { + err = MP_OKAY; + } + } + + mp_clear(t1); + mp_clear(t2); +#ifdef WOLFSSL_SMALL_STACK + XFREE(t2, NULL, DYNAMIC_TYPE_ECC); + XFREE(t1, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +#else + (void)a; + (void)b; + +#ifndef WOLFSSL_SP_NO_256 + if (mp_count_bits(prime) == 256) { + return sp_ecc_is_point_256(ecp->x, ecp->y); + } +#endif +#ifdef WOLFSSL_SP_384 + if (mp_count_bits(prime) == 384) { + return sp_ecc_is_point_384(ecp->x, ecp->y); + } +#endif + return WC_KEY_SIZE_E; +#endif +} + +#ifndef WOLFSSL_SP_MATH +/* validate privkey * generator == pubkey, 0 on success */ +static int ecc_check_privkey_gen(ecc_key* key, mp_int* a, mp_int* prime) +{ + int err = MP_OKAY; + ecc_point* base = NULL; + ecc_point* res = NULL; + DECLARE_CURVE_SPECS(curve, 2); + + if (key == NULL) + return BAD_FUNC_ARG; + + ALLOC_CURVE_SPECS(2); + + res = wc_ecc_new_point_h(key->heap); + if (res == NULL) + err = MEMORY_E; + +#ifdef WOLFSSL_HAVE_SP_ECC +#ifndef WOLFSSL_SP_NO_256 + if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP256R1) { + if (err == MP_OKAY) { + err = sp_ecc_mulmod_base_256(&key->k, res, 1, key->heap); + } + } + else +#endif +#ifdef WOLFSSL_SP_384 + if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP384R1) { + if (err == MP_OKAY) { + err = sp_ecc_mulmod_base_384(&key->k, res, 1, key->heap); + } + } + else +#endif +#endif + { + base = wc_ecc_new_point_h(key->heap); + if (base == NULL) + err = MEMORY_E; + + if (err == MP_OKAY) { + /* load curve info */ + err = wc_ecc_curve_load(key->dp, &curve, + (ECC_CURVE_FIELD_GX | ECC_CURVE_FIELD_GY)); + } + + /* set up base generator */ + if (err == MP_OKAY) + err = mp_copy(curve->Gx, base->x); + if (err == MP_OKAY) + err = mp_copy(curve->Gy, base->y); + if (err == MP_OKAY) + err = mp_set(base->z, 1); + + if (err == MP_OKAY) + err = wc_ecc_mulmod_ex(&key->k, base, res, a, prime, 1, key->heap); + } + + if (err == MP_OKAY) { + /* compare result to public key */ + if (mp_cmp(res->x, key->pubkey.x) != MP_EQ || + mp_cmp(res->y, key->pubkey.y) != MP_EQ || + mp_cmp(res->z, key->pubkey.z) != MP_EQ) { + /* didn't match */ + err = ECC_PRIV_KEY_E; + } + } + + wc_ecc_curve_free(curve); + wc_ecc_del_point_h(res, key->heap); + wc_ecc_del_point_h(base, key->heap); + FREE_CURVE_SPECS(); + + return err; +} +#endif + +#ifdef WOLFSSL_VALIDATE_ECC_IMPORT + +/* check privkey generator helper, creates prime needed */ +static int ecc_check_privkey_gen_helper(ecc_key* key) +{ + int err; +#ifndef WOLFSSL_ATECC508A + DECLARE_CURVE_SPECS(curve, 2); +#endif + + if (key == NULL) + return BAD_FUNC_ARG; + +#ifdef WOLFSSL_ATECC508A + /* Hardware based private key, so this operation is not supported */ + err = MP_OKAY; /* just report success */ + +#else + ALLOC_CURVE_SPECS(2); + + /* load curve info */ + err = wc_ecc_curve_load(key->dp, &curve, + (ECC_CURVE_FIELD_PRIME | ECC_CURVE_FIELD_AF)); + + if (err == MP_OKAY) + err = ecc_check_privkey_gen(key, curve->Af, curve->prime); + + wc_ecc_curve_free(curve); + FREE_CURVE_SPECS(); + +#endif /* WOLFSSL_ATECC508A */ + + return err; +} + +#endif /* WOLFSSL_VALIDATE_ECC_IMPORT */ + +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || !defined(WOLFSSL_SP_MATH) +/* validate order * pubkey = point at infinity, 0 on success */ +static int ecc_check_pubkey_order(ecc_key* key, ecc_point* pubkey, mp_int* a, + mp_int* prime, mp_int* order) +{ + ecc_point* inf = NULL; + int err; + + if (key == NULL) + return BAD_FUNC_ARG; + + inf = wc_ecc_new_point_h(key->heap); + if (inf == NULL) + err = MEMORY_E; + else { +#ifdef WOLFSSL_HAVE_SP_ECC +#ifndef WOLFSSL_SP_NO_256 + if (key->idx != ECC_CUSTOM_IDX && + ecc_sets[key->idx].id == ECC_SECP256R1) { + err = sp_ecc_mulmod_256(order, pubkey, inf, 1, key->heap); + } + else +#endif +#ifdef WOLFSSL_SP_384 + if (key->idx != ECC_CUSTOM_IDX && + ecc_sets[key->idx].id == ECC_SECP384R1) { + err = sp_ecc_mulmod_384(order, pubkey, inf, 1, key->heap); + } + else +#endif +#endif +#ifndef WOLFSSL_SP_MATH + err = wc_ecc_mulmod_ex(order, pubkey, inf, a, prime, 1, key->heap); + if (err == MP_OKAY && !wc_ecc_point_is_at_infinity(inf)) + err = ECC_INF_E; +#else + (void)a; + (void)prime; + + err = WC_KEY_SIZE_E; +#endif + } + + wc_ecc_del_point_h(inf, key->heap); + + return err; +} +#endif +#endif /* !WOLFSSL_ATECC508A && !WOLFSSL_CRYPTOCELL*/ + +#ifdef OPENSSL_EXTRA +int wc_ecc_get_generator(ecc_point* ecp, int curve_idx) +{ + int err = MP_OKAY; + DECLARE_CURVE_SPECS(curve, 2); + + if (!ecp || curve_idx < 0 || curve_idx > (int)(ECC_SET_COUNT-1)) + return BAD_FUNC_ARG; + + ALLOC_CURVE_SPECS(2); + + err = wc_ecc_curve_load(&ecc_sets[curve_idx], &curve, + (ECC_CURVE_FIELD_GX | ECC_CURVE_FIELD_GY)); + if (err == MP_OKAY) + err = mp_copy(curve->Gx, ecp->x); + if (err == MP_OKAY) + err = mp_copy(curve->Gy, ecp->y); + if (err == MP_OKAY) + err = mp_set(ecp->z, 1); + + wc_ecc_curve_free(curve); + FREE_CURVE_SPECS(); + + return err; +} +#endif /* OPENSSLALL */ + +/* perform sanity checks on ecc key validity, 0 on success */ +int wc_ecc_check_key(ecc_key* key) +{ + int err; +#ifndef WOLFSSL_SP_MATH +#if !defined(WOLFSSL_ATECC508A) && !defined(WOLFSSL_CRYPTOCELL) + mp_int* b = NULL; +#ifdef USE_ECC_B_PARAM + DECLARE_CURVE_SPECS(curve, 4); +#else +#ifndef WOLFSSL_SMALL_STACK + mp_int b_lcl; +#endif + DECLARE_CURVE_SPECS(curve, 3); +#endif /* USE_ECC_B_PARAM */ +#endif /* WOLFSSL_ATECC508A */ + + if (key == NULL) + return BAD_FUNC_ARG; + +#if defined(WOLFSSL_ATECC508A) || defined(WOLFSSL_CRYPTOCELL) + + err = 0; /* consider key check success on ATECC508A */ + +#else + #ifdef USE_ECC_B_PARAM + ALLOC_CURVE_SPECS(4); + #else + ALLOC_CURVE_SPECS(3); + #ifndef WOLFSSL_SMALL_STACK + b = &b_lcl; + #else + b = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_ECC); + if (b == NULL) { + FREE_CURVE_SPECS(); + return MEMORY_E; + } + #endif + XMEMSET(b, 0, sizeof(mp_int)); + #endif + + /* SP 800-56Ar3, section 5.6.2.3.3, process step 1 */ + /* pubkey point cannot be at infinity */ + if (wc_ecc_point_is_at_infinity(&key->pubkey)) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(b, key->heap, DYNAMIC_TYPE_ECC); + #endif + FREE_CURVE_SPECS(); + return ECC_INF_E; + } + + /* load curve info */ + err = wc_ecc_curve_load(key->dp, &curve, (ECC_CURVE_FIELD_PRIME | + ECC_CURVE_FIELD_AF | ECC_CURVE_FIELD_ORDER +#ifdef USE_ECC_B_PARAM + | ECC_CURVE_FIELD_BF +#endif + )); + +#ifndef USE_ECC_B_PARAM + /* load curve b parameter */ + if (err == MP_OKAY) + err = mp_init(b); + if (err == MP_OKAY) + err = mp_read_radix(b, key->dp->Bf, MP_RADIX_HEX); +#else + if (err == MP_OKAY) + b = curve->Bf; +#endif + + /* SP 800-56Ar3, section 5.6.2.3.3, process step 2 */ + /* Qx must be in the range [0, p-1] */ + if (err == MP_OKAY) { + if (mp_cmp(key->pubkey.x, curve->prime) != MP_LT) + err = ECC_OUT_OF_RANGE_E; + } + + /* Qy must be in the range [0, p-1] */ + if (err == MP_OKAY) { + if (mp_cmp(key->pubkey.y, curve->prime) != MP_LT) + err = ECC_OUT_OF_RANGE_E; + } + + /* SP 800-56Ar3, section 5.6.2.3.3, process steps 3 */ + /* make sure point is actually on curve */ + if (err == MP_OKAY) + err = wc_ecc_is_point(&key->pubkey, curve->Af, b, curve->prime); + + /* SP 800-56Ar3, section 5.6.2.3.3, process steps 4 */ + /* pubkey * order must be at infinity */ + if (err == MP_OKAY) + err = ecc_check_pubkey_order(key, &key->pubkey, curve->Af, curve->prime, + curve->order); + + /* SP 800-56Ar3, section 5.6.2.1.4, method (b) for ECC */ + /* private * base generator must equal pubkey */ + if (err == MP_OKAY && key->type == ECC_PRIVATEKEY) + err = ecc_check_privkey_gen(key, curve->Af, curve->prime); + + wc_ecc_curve_free(curve); + +#ifndef USE_ECC_B_PARAM + mp_clear(b); + #ifdef WOLFSSL_SMALL_STACK + XFREE(b, key->heap, DYNAMIC_TYPE_ECC); + #endif +#endif + + FREE_CURVE_SPECS(); + +#endif /* WOLFSSL_ATECC508A */ +#else + if (key == NULL) + return BAD_FUNC_ARG; + + /* pubkey point cannot be at infinity */ +#ifndef WOLFSSL_SP_NO_256 + if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP256R1) { + err = sp_ecc_check_key_256(key->pubkey.x, key->pubkey.y, &key->k, + key->heap); + } + else +#endif +#ifdef WOLFSSL_SP_384 + if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP384R1) { + err = sp_ecc_check_key_384(key->pubkey.x, key->pubkey.y, &key->k, + key->heap); + } + else +#endif + { + err = WC_KEY_SIZE_E; + } +#endif + + return err; +} + +#ifdef HAVE_ECC_KEY_IMPORT +/* import public ECC key in ANSI X9.63 format */ +int wc_ecc_import_x963_ex(const byte* in, word32 inLen, ecc_key* key, + int curve_id) +{ + int err = MP_OKAY; +#ifdef HAVE_COMP_KEY + int compressed = 0; +#endif + int keysize = 0; + byte pointType; + + if (in == NULL || key == NULL) + return BAD_FUNC_ARG; + + /* must be odd */ + if ((inLen & 1) == 0) { + return ECC_BAD_ARG_E; + } + + /* make sure required variables are reset */ + wc_ecc_reset(key); + + /* init key */ + #ifdef ALT_ECC_SIZE + key->pubkey.x = (mp_int*)&key->pubkey.xyz[0]; + key->pubkey.y = (mp_int*)&key->pubkey.xyz[1]; + key->pubkey.z = (mp_int*)&key->pubkey.xyz[2]; + alt_fp_init(key->pubkey.x); + alt_fp_init(key->pubkey.y); + alt_fp_init(key->pubkey.z); + err = mp_init(&key->k); + #else + err = mp_init_multi(&key->k, + key->pubkey.x, key->pubkey.y, key->pubkey.z, NULL, NULL); + #endif + if (err != MP_OKAY) + return MEMORY_E; + + /* check for point type (4, 2, or 3) */ + pointType = in[0]; + if (pointType != ECC_POINT_UNCOMP && pointType != ECC_POINT_COMP_EVEN && + pointType != ECC_POINT_COMP_ODD) { + err = ASN_PARSE_E; + } + + if (pointType == ECC_POINT_COMP_EVEN || pointType == ECC_POINT_COMP_ODD) { + #ifdef HAVE_COMP_KEY + compressed = 1; + #else + err = NOT_COMPILED_IN; + #endif + } + + /* adjust to skip first byte */ + inLen -= 1; + in += 1; + +#ifdef WOLFSSL_ATECC508A + /* For SECP256R1 only save raw public key for hardware */ + if (curve_id == ECC_SECP256R1 && inLen <= sizeof(key->pubkey_raw)) { + #ifdef HAVE_COMP_KEY + if (!compressed) + #endif + XMEMCPY(key->pubkey_raw, (byte*)in, inLen); + } +#endif + + if (err == MP_OKAY) { + #ifdef HAVE_COMP_KEY + /* adjust inLen if compressed */ + if (compressed) + inLen = inLen*2 + 1; /* used uncompressed len */ + #endif + + /* determine key size */ + keysize = (inLen>>1); + err = wc_ecc_set_curve(key, keysize, curve_id); + key->type = ECC_PUBLICKEY; + } + + /* read data */ + if (err == MP_OKAY) + err = mp_read_unsigned_bin(key->pubkey.x, (byte*)in, keysize); + +#ifdef HAVE_COMP_KEY + if (err == MP_OKAY && compressed == 1) { /* build y */ +#ifndef WOLFSSL_SP_MATH + mp_int t1, t2; + int did_init = 0; + + DECLARE_CURVE_SPECS(curve, 3); + ALLOC_CURVE_SPECS(3); + + if (mp_init_multi(&t1, &t2, NULL, NULL, NULL, NULL) != MP_OKAY) + err = MEMORY_E; + else + did_init = 1; + + /* load curve info */ + if (err == MP_OKAY) + err = wc_ecc_curve_load(key->dp, &curve, + (ECC_CURVE_FIELD_PRIME | ECC_CURVE_FIELD_AF | + ECC_CURVE_FIELD_BF)); + + /* compute x^3 */ + if (err == MP_OKAY) + err = mp_sqr(key->pubkey.x, &t1); + if (err == MP_OKAY) + err = mp_mulmod(&t1, key->pubkey.x, curve->prime, &t1); + + /* compute x^3 + a*x */ + if (err == MP_OKAY) + err = mp_mulmod(curve->Af, key->pubkey.x, curve->prime, &t2); + if (err == MP_OKAY) + err = mp_add(&t1, &t2, &t1); + + /* compute x^3 + a*x + b */ + if (err == MP_OKAY) + err = mp_add(&t1, curve->Bf, &t1); + + /* compute sqrt(x^3 + a*x + b) */ + if (err == MP_OKAY) + err = mp_sqrtmod_prime(&t1, curve->prime, &t2); + + /* adjust y */ + if (err == MP_OKAY) { + if ((mp_isodd(&t2) == MP_YES && pointType == ECC_POINT_COMP_ODD) || + (mp_isodd(&t2) == MP_NO && pointType == ECC_POINT_COMP_EVEN)) { + err = mp_mod(&t2, curve->prime, &t2); + } + else { + err = mp_submod(curve->prime, &t2, curve->prime, &t2); + } + if (err == MP_OKAY) + err = mp_copy(&t2, key->pubkey.y); + } + + if (did_init) { + mp_clear(&t2); + mp_clear(&t1); + } + + wc_ecc_curve_free(curve); + FREE_CURVE_SPECS(); +#else + #ifndef WOLFSSL_SP_NO_256 + if (key->dp->id == ECC_SECP256R1) { + sp_ecc_uncompress_256(key->pubkey.x, pointType, key->pubkey.y); + } + else + #endif + #ifdef WOLFSSL_SP_384 + if (key->dp->id == ECC_SECP384R1) { + sp_ecc_uncompress_384(key->pubkey.x, pointType, key->pubkey.y); + } + else + #endif + { + err = WC_KEY_SIZE_E; + } +#endif + } +#endif /* HAVE_COMP_KEY */ + + if (err == MP_OKAY) { + #ifdef HAVE_COMP_KEY + if (compressed == 0) + #endif + { + err = mp_read_unsigned_bin(key->pubkey.y, (byte*)in + keysize, + keysize); + } + } + if (err == MP_OKAY) + err = mp_set(key->pubkey.z, 1); + +#ifdef WOLFSSL_VALIDATE_ECC_IMPORT + if (err == MP_OKAY) + err = wc_ecc_check_key(key); +#endif + + if (err != MP_OKAY) { + mp_clear(key->pubkey.x); + mp_clear(key->pubkey.y); + mp_clear(key->pubkey.z); + mp_clear(&key->k); + } + + return err; +} + +WOLFSSL_ABI +int wc_ecc_import_x963(const byte* in, word32 inLen, ecc_key* key) +{ + return wc_ecc_import_x963_ex(in, inLen, key, ECC_CURVE_DEF); +} +#endif /* HAVE_ECC_KEY_IMPORT */ + +#ifdef HAVE_ECC_KEY_EXPORT + +/* export ecc key to component form, d is optional if only exporting public + * encType is WC_TYPE_UNSIGNED_BIN or WC_TYPE_HEX_STR + * return MP_OKAY on success */ +int wc_ecc_export_ex(ecc_key* key, byte* qx, word32* qxLen, + byte* qy, word32* qyLen, byte* d, word32* dLen, int encType) +{ + int err = 0; + word32 keySz; + + if (key == NULL) { + return BAD_FUNC_ARG; + } + + if (wc_ecc_is_valid_idx(key->idx) == 0) { + return ECC_BAD_ARG_E; + } + keySz = key->dp->size; + + /* private key, d */ + if (d != NULL) { + if (dLen == NULL || + (key->type != ECC_PRIVATEKEY && key->type != ECC_PRIVATEKEY_ONLY)) + return BAD_FUNC_ARG; + + #ifdef WOLFSSL_ATECC508A + /* Hardware cannot export private portion */ + return NOT_COMPILED_IN; + #else + err = wc_export_int(&key->k, d, dLen, keySz, encType); + if (err != MP_OKAY) + return err; + #endif + } + + /* public x component */ + if (qx != NULL) { + if (qxLen == NULL || key->type == ECC_PRIVATEKEY_ONLY) + return BAD_FUNC_ARG; + + err = wc_export_int(key->pubkey.x, qx, qxLen, keySz, encType); + if (err != MP_OKAY) + return err; + } + + /* public y component */ + if (qy != NULL) { + if (qyLen == NULL || key->type == ECC_PRIVATEKEY_ONLY) + return BAD_FUNC_ARG; + + err = wc_export_int(key->pubkey.y, qy, qyLen, keySz, encType); + if (err != MP_OKAY) + return err; + } + + return err; +} + + +/* export ecc private key only raw, outLen is in/out size as unsigned bin + return MP_OKAY on success */ +int wc_ecc_export_private_only(ecc_key* key, byte* out, word32* outLen) +{ + if (out == NULL || outLen == NULL) { + return BAD_FUNC_ARG; + } + + return wc_ecc_export_ex(key, NULL, NULL, NULL, NULL, out, outLen, + WC_TYPE_UNSIGNED_BIN); +} + +/* export public key to raw elements including public (Qx,Qy) as unsigned bin + * return MP_OKAY on success, negative on error */ +int wc_ecc_export_public_raw(ecc_key* key, byte* qx, word32* qxLen, + byte* qy, word32* qyLen) +{ + if (qx == NULL || qxLen == NULL || qy == NULL || qyLen == NULL) { + return BAD_FUNC_ARG; + } + + return wc_ecc_export_ex(key, qx, qxLen, qy, qyLen, NULL, NULL, + WC_TYPE_UNSIGNED_BIN); +} + +/* export ecc key to raw elements including public (Qx,Qy) and + * private (d) as unsigned bin + * return MP_OKAY on success, negative on error */ +int wc_ecc_export_private_raw(ecc_key* key, byte* qx, word32* qxLen, + byte* qy, word32* qyLen, byte* d, word32* dLen) +{ + return wc_ecc_export_ex(key, qx, qxLen, qy, qyLen, d, dLen, + WC_TYPE_UNSIGNED_BIN); +} + +#endif /* HAVE_ECC_KEY_EXPORT */ + +#ifdef HAVE_ECC_KEY_IMPORT +/* import private key, public part optional if (pub) passed as NULL */ +int wc_ecc_import_private_key_ex(const byte* priv, word32 privSz, + const byte* pub, word32 pubSz, ecc_key* key, + int curve_id) +{ + int ret; +#if defined(WOLFSSL_CRYPTOCELL) && !defined(WOLFSSL_ATECC508A) + const CRYS_ECPKI_Domain_t* pDomain; + CRYS_ECPKI_BUILD_TempData_t tempBuff; +#endif + if (key == NULL || priv == NULL) + return BAD_FUNC_ARG; + + /* public optional, NULL if only importing private */ + if (pub != NULL) { + #ifndef NO_ASN + word32 idx = 0; + ret = wc_ecc_import_x963_ex(pub, pubSz, key, curve_id); + if (ret < 0) + ret = wc_EccPublicKeyDecode(pub, &idx, key, pubSz); + key->type = ECC_PRIVATEKEY; + #else + ret = NOT_COMPILED_IN; + #endif + } + else { + /* make sure required variables are reset */ + wc_ecc_reset(key); + + /* set key size */ + ret = wc_ecc_set_curve(key, privSz, curve_id); + key->type = ECC_PRIVATEKEY_ONLY; + } + + if (ret != 0) + return ret; + +#ifdef WOLFSSL_ATECC508A + /* Hardware does not support loading private keys */ + return NOT_COMPILED_IN; +#elif defined(WOLFSSL_CRYPTOCELL) + pDomain = CRYS_ECPKI_GetEcDomain(cc310_mapCurve(curve_id)); + + if (pub != NULL && pub[0] != '\0') { + /* create public key from external key buffer */ + ret = CRYS_ECPKI_BuildPublKeyFullCheck(pDomain, + (byte*)pub, + pubSz, + &key->ctx.pubKey, + &tempBuff); + + if (ret != SA_SILIB_RET_OK){ + WOLFSSL_MSG("CRYS_ECPKI_BuildPublKeyFullCheck failed"); + return ret; + } + } + /* import private key */ + if (priv != NULL && priv[0] != '\0') { + + /* Create private key from external key buffer*/ + ret = CRYS_ECPKI_BuildPrivKey(pDomain, + priv, + privSz, + &key->ctx.privKey); + + if (ret != SA_SILIB_RET_OK) { + WOLFSSL_MSG("CRYS_ECPKI_BuildPrivKey failed"); + return ret; + } + + ret = mp_read_unsigned_bin(&key->k, priv, privSz); + } + +#else + + ret = mp_read_unsigned_bin(&key->k, priv, privSz); +#ifdef HAVE_WOLF_BIGINT + if (ret == 0 && + wc_bigint_from_unsigned_bin(&key->k.raw, priv, privSz) != 0) { + mp_clear(&key->k); + ret = ASN_GETINT_E; + } +#endif /* HAVE_WOLF_BIGINT */ + + +#endif /* WOLFSSL_ATECC508A */ + +#ifdef WOLFSSL_VALIDATE_ECC_IMPORT + if ((pub != NULL) && (ret == MP_OKAY)) + /* public key needed to perform key validation */ + ret = ecc_check_privkey_gen_helper(key); +#endif + + return ret; +} + +/* ecc private key import, public key in ANSI X9.63 format, private raw */ +int wc_ecc_import_private_key(const byte* priv, word32 privSz, const byte* pub, + word32 pubSz, ecc_key* key) +{ + return wc_ecc_import_private_key_ex(priv, privSz, pub, pubSz, key, + ECC_CURVE_DEF); +} +#endif /* HAVE_ECC_KEY_IMPORT */ + +#ifndef NO_ASN +/** + Convert ECC R,S to signature + r R component of signature + s S component of signature + out DER-encoded ECDSA signature + outlen [in/out] output buffer size, output signature size + return MP_OKAY on success +*/ +int wc_ecc_rs_to_sig(const char* r, const char* s, byte* out, word32* outlen) +{ + int err; +#ifdef WOLFSSL_SMALL_STACK + mp_int* rtmp = NULL; + mp_int* stmp = NULL; +#else + mp_int rtmp[1]; + mp_int stmp[1]; +#endif + + if (r == NULL || s == NULL || out == NULL || outlen == NULL) + return ECC_BAD_ARG_E; + +#ifdef WOLFSSL_SMALL_STACK + rtmp = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC); + if (rtmp == NULL) + return MEMORY_E; + stmp = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC); + if (stmp == NULL) { + XFREE(rtmp, NULL, DYNAMIC_TYPE_ECC); + return MEMORY_E; + } +#endif + + err = mp_init_multi(rtmp, stmp, NULL, NULL, NULL, NULL); + if (err != MP_OKAY) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(stmp, NULL, DYNAMIC_TYPE_ECC); + XFREE(rtmp, NULL, DYNAMIC_TYPE_ECC); + #endif + return err; + } + + err = mp_read_radix(rtmp, r, MP_RADIX_HEX); + if (err == MP_OKAY) + err = mp_read_radix(stmp, s, MP_RADIX_HEX); + + /* convert mp_ints to ECDSA sig, initializes rtmp and stmp internally */ + if (err == MP_OKAY) + err = StoreECC_DSA_Sig(out, outlen, rtmp, stmp); + + if (err == MP_OKAY) { + if (mp_iszero(rtmp) == MP_YES || mp_iszero(stmp) == MP_YES) + err = MP_ZERO_E; + } + + mp_clear(rtmp); + mp_clear(stmp); +#ifdef WOLFSSL_SMALL_STACK + XFREE(stmp, NULL, DYNAMIC_TYPE_ECC); + XFREE(rtmp, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/** + Convert ECC R,S raw unsigned bin to signature + r R component of signature + rSz R size + s S component of signature + sSz S size + out DER-encoded ECDSA signature + outlen [in/out] output buffer size, output signature size + return MP_OKAY on success +*/ +int wc_ecc_rs_raw_to_sig(const byte* r, word32 rSz, const byte* s, word32 sSz, + byte* out, word32* outlen) +{ + int err; +#ifdef WOLFSSL_SMALL_STACK + mp_int* rtmp = NULL; + mp_int* stmp = NULL; +#else + mp_int rtmp[1]; + mp_int stmp[1]; +#endif + + if (r == NULL || s == NULL || out == NULL || outlen == NULL) + return ECC_BAD_ARG_E; + +#ifdef WOLFSSL_SMALL_STACK + rtmp = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC); + if (rtmp == NULL) + return MEMORY_E; + stmp = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC); + if (stmp == NULL) { + XFREE(rtmp, NULL, DYNAMIC_TYPE_ECC); + return MEMORY_E; + } +#endif + + err = mp_init_multi(rtmp, stmp, NULL, NULL, NULL, NULL); + if (err != MP_OKAY) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(stmp, NULL, DYNAMIC_TYPE_ECC); + XFREE(rtmp, NULL, DYNAMIC_TYPE_ECC); + #endif + return err; + } + + err = mp_read_unsigned_bin(rtmp, r, rSz); + if (err == MP_OKAY) + err = mp_read_unsigned_bin(stmp, s, sSz); + + /* convert mp_ints to ECDSA sig, initializes rtmp and stmp internally */ + if (err == MP_OKAY) + err = StoreECC_DSA_Sig(out, outlen, rtmp, stmp); + + if (err == MP_OKAY) { + if (mp_iszero(rtmp) == MP_YES || mp_iszero(stmp) == MP_YES) + err = MP_ZERO_E; + } + + mp_clear(rtmp); + mp_clear(stmp); +#ifdef WOLFSSL_SMALL_STACK + XFREE(stmp, NULL, DYNAMIC_TYPE_ECC); + XFREE(rtmp, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/** + Convert ECC signature to R,S + sig DER-encoded ECDSA signature + sigLen length of signature in octets + r R component of signature + rLen [in/out] output "r" buffer size, output "r" size + s S component of signature + sLen [in/out] output "s" buffer size, output "s" size + return MP_OKAY on success, negative on error +*/ +int wc_ecc_sig_to_rs(const byte* sig, word32 sigLen, byte* r, word32* rLen, + byte* s, word32* sLen) +{ + int err; + int tmp_valid = 0; + word32 x = 0; +#ifdef WOLFSSL_SMALL_STACK + mp_int* rtmp = NULL; + mp_int* stmp = NULL; +#else + mp_int rtmp[1]; + mp_int stmp[1]; +#endif + + if (sig == NULL || r == NULL || rLen == NULL || s == NULL || sLen == NULL) + return ECC_BAD_ARG_E; + +#ifdef WOLFSSL_SMALL_STACK + rtmp = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC); + if (rtmp == NULL) + return MEMORY_E; + stmp = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC); + if (stmp == NULL) { + XFREE(rtmp, NULL, DYNAMIC_TYPE_ECC); + return MEMORY_E; + } +#endif + + err = DecodeECC_DSA_Sig(sig, sigLen, rtmp, stmp); + + /* rtmp and stmp are initialized */ + if (err == MP_OKAY) { + tmp_valid = 1; + + /* extract r */ + x = mp_unsigned_bin_size(rtmp); + if (*rLen < x) + err = BUFFER_E; + } + if (err == MP_OKAY) { + *rLen = x; + err = mp_to_unsigned_bin(rtmp, r); + } + + /* extract s */ + if (err == MP_OKAY) { + x = mp_unsigned_bin_size(stmp); + if (*sLen < x) + err = BUFFER_E; + + if (err == MP_OKAY) { + *sLen = x; + err = mp_to_unsigned_bin(stmp, s); + } + } + + if (tmp_valid) { + mp_clear(rtmp); + mp_clear(stmp); + } +#ifdef WOLFSSL_SMALL_STACK + XFREE(stmp, NULL, DYNAMIC_TYPE_ECC); + XFREE(rtmp, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} +#endif /* !NO_ASN */ + +#ifdef HAVE_ECC_KEY_IMPORT +static int wc_ecc_import_raw_private(ecc_key* key, const char* qx, + const char* qy, const char* d, int curve_id, int encType) +{ + int err = MP_OKAY; +#if defined(WOLFSSL_CRYPTOCELL) && !defined(WOLFSSL_ATECC508A) + const CRYS_ECPKI_Domain_t* pDomain; + CRYS_ECPKI_BUILD_TempData_t tempBuff; + byte key_raw[ECC_MAX_CRYPTO_HW_SIZE*2 + 1]; + word32 keySz = 0; +#endif + /* if d is NULL, only import as public key using Qx,Qy */ + if (key == NULL || qx == NULL || qy == NULL) { + return BAD_FUNC_ARG; + } + + /* make sure required variables are reset */ + wc_ecc_reset(key); + + /* set curve type and index */ + err = wc_ecc_set_curve(key, 0, curve_id); + if (err != 0) { + return err; + } + + /* init key */ +#ifdef ALT_ECC_SIZE + key->pubkey.x = (mp_int*)&key->pubkey.xyz[0]; + key->pubkey.y = (mp_int*)&key->pubkey.xyz[1]; + key->pubkey.z = (mp_int*)&key->pubkey.xyz[2]; + alt_fp_init(key->pubkey.x); + alt_fp_init(key->pubkey.y); + alt_fp_init(key->pubkey.z); + err = mp_init(&key->k); +#else + err = mp_init_multi(&key->k, key->pubkey.x, key->pubkey.y, key->pubkey.z, + NULL, NULL); +#endif + if (err != MP_OKAY) + return MEMORY_E; + + /* read Qx */ + if (err == MP_OKAY) { + if (encType == WC_TYPE_HEX_STR) + err = mp_read_radix(key->pubkey.x, qx, MP_RADIX_HEX); + else + err = mp_read_unsigned_bin(key->pubkey.x, (const byte*)qx, + key->dp->size); + } + + /* read Qy */ + if (err == MP_OKAY) { + if (encType == WC_TYPE_HEX_STR) + err = mp_read_radix(key->pubkey.y, qy, MP_RADIX_HEX); + else + err = mp_read_unsigned_bin(key->pubkey.y, (const byte*)qy, + key->dp->size); + + } + + if (err == MP_OKAY) + err = mp_set(key->pubkey.z, 1); + +#ifdef WOLFSSL_ATECC508A + /* For SECP256R1 only save raw public key for hardware */ + if (err == MP_OKAY && curve_id == ECC_SECP256R1) { + word32 keySz = key->dp->size; + err = wc_export_int(key->pubkey.x, key->pubkey_raw, + &keySz, keySz, WC_TYPE_UNSIGNED_BIN); + if (err == MP_OKAY) + err = wc_export_int(key->pubkey.y, &key->pubkey_raw[keySz], + &keySz, keySz, WC_TYPE_UNSIGNED_BIN); + } +#elif defined(WOLFSSL_CRYPTOCELL) + if (err == MP_OKAY) { + key_raw[0] = ECC_POINT_UNCOMP; + keySz = (word32)key->dp->size; + err = wc_export_int(key->pubkey.x, &key_raw[1], &keySz, keySz, + WC_TYPE_UNSIGNED_BIN); + if (err == MP_OKAY) { + err = wc_export_int(key->pubkey.y, &key_raw[1+keySz], + &keySz, keySz, WC_TYPE_UNSIGNED_BIN); + } + + if (err == MP_OKAY) { + pDomain = CRYS_ECPKI_GetEcDomain(cc310_mapCurve(curve_id)); + + /* create public key from external key buffer */ + err = CRYS_ECPKI_BuildPublKeyFullCheck(pDomain, + key_raw, + keySz*2 + 1, + &key->ctx.pubKey, + &tempBuff); + } + + if (err != SA_SILIB_RET_OK){ + WOLFSSL_MSG("CRYS_ECPKI_BuildPublKeyFullCheck failed"); + return err; + } + } + +#endif + + /* import private key */ + if (err == MP_OKAY) { + if (d != NULL && d[0] != '\0') { + #ifdef WOLFSSL_ATECC508A + /* Hardware doesn't support loading private key */ + err = NOT_COMPILED_IN; + + #elif defined(WOLFSSL_CRYPTOCELL) + + key->type = ECC_PRIVATEKEY; + + if (encType == WC_TYPE_HEX_STR) + err = mp_read_radix(&key->k, d, MP_RADIX_HEX); + else + err = mp_read_unsigned_bin(&key->k, (const byte*)d, + key->dp->size); + if (err == MP_OKAY) { + err = wc_export_int(&key->k, &key_raw[0], &keySz, keySz, + WC_TYPE_UNSIGNED_BIN); + } + + if (err == MP_OKAY) { + /* Create private key from external key buffer*/ + err = CRYS_ECPKI_BuildPrivKey(pDomain, + key_raw, + keySz, + &key->ctx.privKey); + + if (err != SA_SILIB_RET_OK){ + WOLFSSL_MSG("CRYS_ECPKI_BuildPrivKey failed"); + return err; + } + } + + #else + key->type = ECC_PRIVATEKEY; + + if (encType == WC_TYPE_HEX_STR) + err = mp_read_radix(&key->k, d, MP_RADIX_HEX); + else + err = mp_read_unsigned_bin(&key->k, (const byte*)d, + key->dp->size); + #endif /* WOLFSSL_ATECC508A */ + } else { + key->type = ECC_PUBLICKEY; + } + } + +#ifdef WOLFSSL_VALIDATE_ECC_IMPORT + if (err == MP_OKAY) + err = wc_ecc_check_key(key); +#endif + + if (err != MP_OKAY) { + mp_clear(key->pubkey.x); + mp_clear(key->pubkey.y); + mp_clear(key->pubkey.z); + mp_clear(&key->k); + } + + return err; +} + +/** + Import raw ECC key + key The destination ecc_key structure + qx x component of the public key, as ASCII hex string + qy y component of the public key, as ASCII hex string + d private key, as ASCII hex string, optional if importing public + key only + dp Custom ecc_set_type + return MP_OKAY on success +*/ +int wc_ecc_import_raw_ex(ecc_key* key, const char* qx, const char* qy, + const char* d, int curve_id) +{ + return wc_ecc_import_raw_private(key, qx, qy, d, curve_id, + WC_TYPE_HEX_STR); + +} + +/* Import x, y and optional private (d) as unsigned binary */ +int wc_ecc_import_unsigned(ecc_key* key, byte* qx, byte* qy, + byte* d, int curve_id) +{ + return wc_ecc_import_raw_private(key, (const char*)qx, (const char*)qy, + (const char*)d, curve_id, WC_TYPE_UNSIGNED_BIN); +} + +/** + Import raw ECC key + key The destination ecc_key structure + qx x component of the public key, as ASCII hex string + qy y component of the public key, as ASCII hex string + d private key, as ASCII hex string, optional if importing public + key only + curveName ECC curve name, from ecc_sets[] + return MP_OKAY on success +*/ +int wc_ecc_import_raw(ecc_key* key, const char* qx, const char* qy, + const char* d, const char* curveName) +{ + int err, x; + + /* if d is NULL, only import as public key using Qx,Qy */ + if (key == NULL || qx == NULL || qy == NULL || curveName == NULL) { + return BAD_FUNC_ARG; + } + + /* set curve type and index */ + for (x = 0; ecc_sets[x].size != 0; x++) { + if (XSTRNCMP(ecc_sets[x].name, curveName, + XSTRLEN(curveName)) == 0) { + break; + } + } + + if (ecc_sets[x].size == 0) { + WOLFSSL_MSG("ecc_set curve name not found"); + err = ASN_PARSE_E; + } else { + return wc_ecc_import_raw_private(key, qx, qy, d, ecc_sets[x].id, + WC_TYPE_HEX_STR); + } + + return err; +} +#endif /* HAVE_ECC_KEY_IMPORT */ + +/* key size in octets */ +int wc_ecc_size(ecc_key* key) +{ + if (key == NULL) + return 0; + + return key->dp->size; +} + +/* maximum signature size based on key size */ +int wc_ecc_sig_size_calc(int sz) +{ + int maxSigSz = 0; + + /* calculate based on key bits */ + /* maximum possible signature header size is 7 bytes plus 2 bytes padding */ + maxSigSz = (sz * 2) + SIG_HEADER_SZ + ECC_MAX_PAD_SZ; + + /* if total length is less than 128 + SEQ(1)+LEN(1) then subtract 1 */ + if (maxSigSz < (128 + 2)) { + maxSigSz -= 1; + } + + return maxSigSz; +} + +/* maximum signature size based on actual key curve */ +int wc_ecc_sig_size(ecc_key* key) +{ + int maxSigSz; + int orderBits, keySz; + + if (key == NULL || key->dp == NULL) + return 0; + + /* the signature r and s will always be less than order */ + /* if the order MSB (top bit of byte) is set then ASN encoding needs + extra byte for r and s, so add 2 */ + keySz = key->dp->size; + orderBits = wc_ecc_get_curve_order_bit_count(key->dp); + if (orderBits > keySz * 8) { + keySz = (orderBits + 7) / 8; + } + /* maximum possible signature header size is 7 bytes */ + maxSigSz = (keySz * 2) + SIG_HEADER_SZ; + if ((orderBits % 8) == 0) { + /* MSB can be set, so add 2 */ + maxSigSz += ECC_MAX_PAD_SZ; + } + /* if total length is less than 128 + SEQ(1)+LEN(1) then subtract 1 */ + if (maxSigSz < (128 + 2)) { + maxSigSz -= 1; + } + + return maxSigSz; +} + + +#ifdef FP_ECC + +/* fixed point ECC cache */ +/* number of entries in the cache */ +#ifndef FP_ENTRIES + #define FP_ENTRIES 15 +#endif + +/* number of bits in LUT */ +#ifndef FP_LUT + #define FP_LUT 8U +#endif + +#ifdef ECC_SHAMIR + /* Sharmir requires a bigger LUT, TAO */ + #if (FP_LUT > 12) || (FP_LUT < 4) + #error FP_LUT must be between 4 and 12 inclusively + #endif +#else + #if (FP_LUT > 12) || (FP_LUT < 2) + #error FP_LUT must be between 2 and 12 inclusively + #endif +#endif + + +#ifndef WOLFSSL_SP_MATH + +/** Our FP cache */ +typedef struct { + ecc_point* g; /* cached COPY of base point */ + ecc_point* LUT[1U< 6 + { 1, 0, 0 }, { 2, 1, 64 }, { 2, 2, 64 }, { 3, 3, 64 }, { 2, 4, 64 }, { 3, 5, 64 }, { 3, 6, 64 }, { 4, 7, 64 }, + { 2, 8, 64 }, { 3, 9, 64 }, { 3, 10, 64 }, { 4, 11, 64 }, { 3, 12, 64 }, { 4, 13, 64 }, { 4, 14, 64 }, { 5, 15, 64 }, + { 2, 16, 64 }, { 3, 17, 64 }, { 3, 18, 64 }, { 4, 19, 64 }, { 3, 20, 64 }, { 4, 21, 64 }, { 4, 22, 64 }, { 5, 23, 64 }, + { 3, 24, 64 }, { 4, 25, 64 }, { 4, 26, 64 }, { 5, 27, 64 }, { 4, 28, 64 }, { 5, 29, 64 }, { 5, 30, 64 }, { 6, 31, 64 }, + { 2, 32, 64 }, { 3, 33, 64 }, { 3, 34, 64 }, { 4, 35, 64 }, { 3, 36, 64 }, { 4, 37, 64 }, { 4, 38, 64 }, { 5, 39, 64 }, + { 3, 40, 64 }, { 4, 41, 64 }, { 4, 42, 64 }, { 5, 43, 64 }, { 4, 44, 64 }, { 5, 45, 64 }, { 5, 46, 64 }, { 6, 47, 64 }, + { 3, 48, 64 }, { 4, 49, 64 }, { 4, 50, 64 }, { 5, 51, 64 }, { 4, 52, 64 }, { 5, 53, 64 }, { 5, 54, 64 }, { 6, 55, 64 }, + { 4, 56, 64 }, { 5, 57, 64 }, { 5, 58, 64 }, { 6, 59, 64 }, { 5, 60, 64 }, { 6, 61, 64 }, { 6, 62, 64 }, { 7, 63, 64 }, +#if FP_LUT > 7 + { 1, 0, 0 }, { 2, 1, 128 }, { 2, 2, 128 }, { 3, 3, 128 }, { 2, 4, 128 }, { 3, 5, 128 }, { 3, 6, 128 }, { 4, 7, 128 }, + { 2, 8, 128 }, { 3, 9, 128 }, { 3, 10, 128 }, { 4, 11, 128 }, { 3, 12, 128 }, { 4, 13, 128 }, { 4, 14, 128 }, { 5, 15, 128 }, + { 2, 16, 128 }, { 3, 17, 128 }, { 3, 18, 128 }, { 4, 19, 128 }, { 3, 20, 128 }, { 4, 21, 128 }, { 4, 22, 128 }, { 5, 23, 128 }, + { 3, 24, 128 }, { 4, 25, 128 }, { 4, 26, 128 }, { 5, 27, 128 }, { 4, 28, 128 }, { 5, 29, 128 }, { 5, 30, 128 }, { 6, 31, 128 }, + { 2, 32, 128 }, { 3, 33, 128 }, { 3, 34, 128 }, { 4, 35, 128 }, { 3, 36, 128 }, { 4, 37, 128 }, { 4, 38, 128 }, { 5, 39, 128 }, + { 3, 40, 128 }, { 4, 41, 128 }, { 4, 42, 128 }, { 5, 43, 128 }, { 4, 44, 128 }, { 5, 45, 128 }, { 5, 46, 128 }, { 6, 47, 128 }, + { 3, 48, 128 }, { 4, 49, 128 }, { 4, 50, 128 }, { 5, 51, 128 }, { 4, 52, 128 }, { 5, 53, 128 }, { 5, 54, 128 }, { 6, 55, 128 }, + { 4, 56, 128 }, { 5, 57, 128 }, { 5, 58, 128 }, { 6, 59, 128 }, { 5, 60, 128 }, { 6, 61, 128 }, { 6, 62, 128 }, { 7, 63, 128 }, + { 2, 64, 128 }, { 3, 65, 128 }, { 3, 66, 128 }, { 4, 67, 128 }, { 3, 68, 128 }, { 4, 69, 128 }, { 4, 70, 128 }, { 5, 71, 128 }, + { 3, 72, 128 }, { 4, 73, 128 }, { 4, 74, 128 }, { 5, 75, 128 }, { 4, 76, 128 }, { 5, 77, 128 }, { 5, 78, 128 }, { 6, 79, 128 }, + { 3, 80, 128 }, { 4, 81, 128 }, { 4, 82, 128 }, { 5, 83, 128 }, { 4, 84, 128 }, { 5, 85, 128 }, { 5, 86, 128 }, { 6, 87, 128 }, + { 4, 88, 128 }, { 5, 89, 128 }, { 5, 90, 128 }, { 6, 91, 128 }, { 5, 92, 128 }, { 6, 93, 128 }, { 6, 94, 128 }, { 7, 95, 128 }, + { 3, 96, 128 }, { 4, 97, 128 }, { 4, 98, 128 }, { 5, 99, 128 }, { 4, 100, 128 }, { 5, 101, 128 }, { 5, 102, 128 }, { 6, 103, 128 }, + { 4, 104, 128 }, { 5, 105, 128 }, { 5, 106, 128 }, { 6, 107, 128 }, { 5, 108, 128 }, { 6, 109, 128 }, { 6, 110, 128 }, { 7, 111, 128 }, + { 4, 112, 128 }, { 5, 113, 128 }, { 5, 114, 128 }, { 6, 115, 128 }, { 5, 116, 128 }, { 6, 117, 128 }, { 6, 118, 128 }, { 7, 119, 128 }, + { 5, 120, 128 }, { 6, 121, 128 }, { 6, 122, 128 }, { 7, 123, 128 }, { 6, 124, 128 }, { 7, 125, 128 }, { 7, 126, 128 }, { 8, 127, 128 }, +#if FP_LUT > 8 + { 1, 0, 0 }, { 2, 1, 256 }, { 2, 2, 256 }, { 3, 3, 256 }, { 2, 4, 256 }, { 3, 5, 256 }, { 3, 6, 256 }, { 4, 7, 256 }, + { 2, 8, 256 }, { 3, 9, 256 }, { 3, 10, 256 }, { 4, 11, 256 }, { 3, 12, 256 }, { 4, 13, 256 }, { 4, 14, 256 }, { 5, 15, 256 }, + { 2, 16, 256 }, { 3, 17, 256 }, { 3, 18, 256 }, { 4, 19, 256 }, { 3, 20, 256 }, { 4, 21, 256 }, { 4, 22, 256 }, { 5, 23, 256 }, + { 3, 24, 256 }, { 4, 25, 256 }, { 4, 26, 256 }, { 5, 27, 256 }, { 4, 28, 256 }, { 5, 29, 256 }, { 5, 30, 256 }, { 6, 31, 256 }, + { 2, 32, 256 }, { 3, 33, 256 }, { 3, 34, 256 }, { 4, 35, 256 }, { 3, 36, 256 }, { 4, 37, 256 }, { 4, 38, 256 }, { 5, 39, 256 }, + { 3, 40, 256 }, { 4, 41, 256 }, { 4, 42, 256 }, { 5, 43, 256 }, { 4, 44, 256 }, { 5, 45, 256 }, { 5, 46, 256 }, { 6, 47, 256 }, + { 3, 48, 256 }, { 4, 49, 256 }, { 4, 50, 256 }, { 5, 51, 256 }, { 4, 52, 256 }, { 5, 53, 256 }, { 5, 54, 256 }, { 6, 55, 256 }, + { 4, 56, 256 }, { 5, 57, 256 }, { 5, 58, 256 }, { 6, 59, 256 }, { 5, 60, 256 }, { 6, 61, 256 }, { 6, 62, 256 }, { 7, 63, 256 }, + { 2, 64, 256 }, { 3, 65, 256 }, { 3, 66, 256 }, { 4, 67, 256 }, { 3, 68, 256 }, { 4, 69, 256 }, { 4, 70, 256 }, { 5, 71, 256 }, + { 3, 72, 256 }, { 4, 73, 256 }, { 4, 74, 256 }, { 5, 75, 256 }, { 4, 76, 256 }, { 5, 77, 256 }, { 5, 78, 256 }, { 6, 79, 256 }, + { 3, 80, 256 }, { 4, 81, 256 }, { 4, 82, 256 }, { 5, 83, 256 }, { 4, 84, 256 }, { 5, 85, 256 }, { 5, 86, 256 }, { 6, 87, 256 }, + { 4, 88, 256 }, { 5, 89, 256 }, { 5, 90, 256 }, { 6, 91, 256 }, { 5, 92, 256 }, { 6, 93, 256 }, { 6, 94, 256 }, { 7, 95, 256 }, + { 3, 96, 256 }, { 4, 97, 256 }, { 4, 98, 256 }, { 5, 99, 256 }, { 4, 100, 256 }, { 5, 101, 256 }, { 5, 102, 256 }, { 6, 103, 256 }, + { 4, 104, 256 }, { 5, 105, 256 }, { 5, 106, 256 }, { 6, 107, 256 }, { 5, 108, 256 }, { 6, 109, 256 }, { 6, 110, 256 }, { 7, 111, 256 }, + { 4, 112, 256 }, { 5, 113, 256 }, { 5, 114, 256 }, { 6, 115, 256 }, { 5, 116, 256 }, { 6, 117, 256 }, { 6, 118, 256 }, { 7, 119, 256 }, + { 5, 120, 256 }, { 6, 121, 256 }, { 6, 122, 256 }, { 7, 123, 256 }, { 6, 124, 256 }, { 7, 125, 256 }, { 7, 126, 256 }, { 8, 127, 256 }, + { 2, 128, 256 }, { 3, 129, 256 }, { 3, 130, 256 }, { 4, 131, 256 }, { 3, 132, 256 }, { 4, 133, 256 }, { 4, 134, 256 }, { 5, 135, 256 }, + { 3, 136, 256 }, { 4, 137, 256 }, { 4, 138, 256 }, { 5, 139, 256 }, { 4, 140, 256 }, { 5, 141, 256 }, { 5, 142, 256 }, { 6, 143, 256 }, + { 3, 144, 256 }, { 4, 145, 256 }, { 4, 146, 256 }, { 5, 147, 256 }, { 4, 148, 256 }, { 5, 149, 256 }, { 5, 150, 256 }, { 6, 151, 256 }, + { 4, 152, 256 }, { 5, 153, 256 }, { 5, 154, 256 }, { 6, 155, 256 }, { 5, 156, 256 }, { 6, 157, 256 }, { 6, 158, 256 }, { 7, 159, 256 }, + { 3, 160, 256 }, { 4, 161, 256 }, { 4, 162, 256 }, { 5, 163, 256 }, { 4, 164, 256 }, { 5, 165, 256 }, { 5, 166, 256 }, { 6, 167, 256 }, + { 4, 168, 256 }, { 5, 169, 256 }, { 5, 170, 256 }, { 6, 171, 256 }, { 5, 172, 256 }, { 6, 173, 256 }, { 6, 174, 256 }, { 7, 175, 256 }, + { 4, 176, 256 }, { 5, 177, 256 }, { 5, 178, 256 }, { 6, 179, 256 }, { 5, 180, 256 }, { 6, 181, 256 }, { 6, 182, 256 }, { 7, 183, 256 }, + { 5, 184, 256 }, { 6, 185, 256 }, { 6, 186, 256 }, { 7, 187, 256 }, { 6, 188, 256 }, { 7, 189, 256 }, { 7, 190, 256 }, { 8, 191, 256 }, + { 3, 192, 256 }, { 4, 193, 256 }, { 4, 194, 256 }, { 5, 195, 256 }, { 4, 196, 256 }, { 5, 197, 256 }, { 5, 198, 256 }, { 6, 199, 256 }, + { 4, 200, 256 }, { 5, 201, 256 }, { 5, 202, 256 }, { 6, 203, 256 }, { 5, 204, 256 }, { 6, 205, 256 }, { 6, 206, 256 }, { 7, 207, 256 }, + { 4, 208, 256 }, { 5, 209, 256 }, { 5, 210, 256 }, { 6, 211, 256 }, { 5, 212, 256 }, { 6, 213, 256 }, { 6, 214, 256 }, { 7, 215, 256 }, + { 5, 216, 256 }, { 6, 217, 256 }, { 6, 218, 256 }, { 7, 219, 256 }, { 6, 220, 256 }, { 7, 221, 256 }, { 7, 222, 256 }, { 8, 223, 256 }, + { 4, 224, 256 }, { 5, 225, 256 }, { 5, 226, 256 }, { 6, 227, 256 }, { 5, 228, 256 }, { 6, 229, 256 }, { 6, 230, 256 }, { 7, 231, 256 }, + { 5, 232, 256 }, { 6, 233, 256 }, { 6, 234, 256 }, { 7, 235, 256 }, { 6, 236, 256 }, { 7, 237, 256 }, { 7, 238, 256 }, { 8, 239, 256 }, + { 5, 240, 256 }, { 6, 241, 256 }, { 6, 242, 256 }, { 7, 243, 256 }, { 6, 244, 256 }, { 7, 245, 256 }, { 7, 246, 256 }, { 8, 247, 256 }, + { 6, 248, 256 }, { 7, 249, 256 }, { 7, 250, 256 }, { 8, 251, 256 }, { 7, 252, 256 }, { 8, 253, 256 }, { 8, 254, 256 }, { 9, 255, 256 }, +#if FP_LUT > 9 + { 1, 0, 0 }, { 2, 1, 512 }, { 2, 2, 512 }, { 3, 3, 512 }, { 2, 4, 512 }, { 3, 5, 512 }, { 3, 6, 512 }, { 4, 7, 512 }, + { 2, 8, 512 }, { 3, 9, 512 }, { 3, 10, 512 }, { 4, 11, 512 }, { 3, 12, 512 }, { 4, 13, 512 }, { 4, 14, 512 }, { 5, 15, 512 }, + { 2, 16, 512 }, { 3, 17, 512 }, { 3, 18, 512 }, { 4, 19, 512 }, { 3, 20, 512 }, { 4, 21, 512 }, { 4, 22, 512 }, { 5, 23, 512 }, + { 3, 24, 512 }, { 4, 25, 512 }, { 4, 26, 512 }, { 5, 27, 512 }, { 4, 28, 512 }, { 5, 29, 512 }, { 5, 30, 512 }, { 6, 31, 512 }, + { 2, 32, 512 }, { 3, 33, 512 }, { 3, 34, 512 }, { 4, 35, 512 }, { 3, 36, 512 }, { 4, 37, 512 }, { 4, 38, 512 }, { 5, 39, 512 }, + { 3, 40, 512 }, { 4, 41, 512 }, { 4, 42, 512 }, { 5, 43, 512 }, { 4, 44, 512 }, { 5, 45, 512 }, { 5, 46, 512 }, { 6, 47, 512 }, + { 3, 48, 512 }, { 4, 49, 512 }, { 4, 50, 512 }, { 5, 51, 512 }, { 4, 52, 512 }, { 5, 53, 512 }, { 5, 54, 512 }, { 6, 55, 512 }, + { 4, 56, 512 }, { 5, 57, 512 }, { 5, 58, 512 }, { 6, 59, 512 }, { 5, 60, 512 }, { 6, 61, 512 }, { 6, 62, 512 }, { 7, 63, 512 }, + { 2, 64, 512 }, { 3, 65, 512 }, { 3, 66, 512 }, { 4, 67, 512 }, { 3, 68, 512 }, { 4, 69, 512 }, { 4, 70, 512 }, { 5, 71, 512 }, + { 3, 72, 512 }, { 4, 73, 512 }, { 4, 74, 512 }, { 5, 75, 512 }, { 4, 76, 512 }, { 5, 77, 512 }, { 5, 78, 512 }, { 6, 79, 512 }, + { 3, 80, 512 }, { 4, 81, 512 }, { 4, 82, 512 }, { 5, 83, 512 }, { 4, 84, 512 }, { 5, 85, 512 }, { 5, 86, 512 }, { 6, 87, 512 }, + { 4, 88, 512 }, { 5, 89, 512 }, { 5, 90, 512 }, { 6, 91, 512 }, { 5, 92, 512 }, { 6, 93, 512 }, { 6, 94, 512 }, { 7, 95, 512 }, + { 3, 96, 512 }, { 4, 97, 512 }, { 4, 98, 512 }, { 5, 99, 512 }, { 4, 100, 512 }, { 5, 101, 512 }, { 5, 102, 512 }, { 6, 103, 512 }, + { 4, 104, 512 }, { 5, 105, 512 }, { 5, 106, 512 }, { 6, 107, 512 }, { 5, 108, 512 }, { 6, 109, 512 }, { 6, 110, 512 }, { 7, 111, 512 }, + { 4, 112, 512 }, { 5, 113, 512 }, { 5, 114, 512 }, { 6, 115, 512 }, { 5, 116, 512 }, { 6, 117, 512 }, { 6, 118, 512 }, { 7, 119, 512 }, + { 5, 120, 512 }, { 6, 121, 512 }, { 6, 122, 512 }, { 7, 123, 512 }, { 6, 124, 512 }, { 7, 125, 512 }, { 7, 126, 512 }, { 8, 127, 512 }, + { 2, 128, 512 }, { 3, 129, 512 }, { 3, 130, 512 }, { 4, 131, 512 }, { 3, 132, 512 }, { 4, 133, 512 }, { 4, 134, 512 }, { 5, 135, 512 }, + { 3, 136, 512 }, { 4, 137, 512 }, { 4, 138, 512 }, { 5, 139, 512 }, { 4, 140, 512 }, { 5, 141, 512 }, { 5, 142, 512 }, { 6, 143, 512 }, + { 3, 144, 512 }, { 4, 145, 512 }, { 4, 146, 512 }, { 5, 147, 512 }, { 4, 148, 512 }, { 5, 149, 512 }, { 5, 150, 512 }, { 6, 151, 512 }, + { 4, 152, 512 }, { 5, 153, 512 }, { 5, 154, 512 }, { 6, 155, 512 }, { 5, 156, 512 }, { 6, 157, 512 }, { 6, 158, 512 }, { 7, 159, 512 }, + { 3, 160, 512 }, { 4, 161, 512 }, { 4, 162, 512 }, { 5, 163, 512 }, { 4, 164, 512 }, { 5, 165, 512 }, { 5, 166, 512 }, { 6, 167, 512 }, + { 4, 168, 512 }, { 5, 169, 512 }, { 5, 170, 512 }, { 6, 171, 512 }, { 5, 172, 512 }, { 6, 173, 512 }, { 6, 174, 512 }, { 7, 175, 512 }, + { 4, 176, 512 }, { 5, 177, 512 }, { 5, 178, 512 }, { 6, 179, 512 }, { 5, 180, 512 }, { 6, 181, 512 }, { 6, 182, 512 }, { 7, 183, 512 }, + { 5, 184, 512 }, { 6, 185, 512 }, { 6, 186, 512 }, { 7, 187, 512 }, { 6, 188, 512 }, { 7, 189, 512 }, { 7, 190, 512 }, { 8, 191, 512 }, + { 3, 192, 512 }, { 4, 193, 512 }, { 4, 194, 512 }, { 5, 195, 512 }, { 4, 196, 512 }, { 5, 197, 512 }, { 5, 198, 512 }, { 6, 199, 512 }, + { 4, 200, 512 }, { 5, 201, 512 }, { 5, 202, 512 }, { 6, 203, 512 }, { 5, 204, 512 }, { 6, 205, 512 }, { 6, 206, 512 }, { 7, 207, 512 }, + { 4, 208, 512 }, { 5, 209, 512 }, { 5, 210, 512 }, { 6, 211, 512 }, { 5, 212, 512 }, { 6, 213, 512 }, { 6, 214, 512 }, { 7, 215, 512 }, + { 5, 216, 512 }, { 6, 217, 512 }, { 6, 218, 512 }, { 7, 219, 512 }, { 6, 220, 512 }, { 7, 221, 512 }, { 7, 222, 512 }, { 8, 223, 512 }, + { 4, 224, 512 }, { 5, 225, 512 }, { 5, 226, 512 }, { 6, 227, 512 }, { 5, 228, 512 }, { 6, 229, 512 }, { 6, 230, 512 }, { 7, 231, 512 }, + { 5, 232, 512 }, { 6, 233, 512 }, { 6, 234, 512 }, { 7, 235, 512 }, { 6, 236, 512 }, { 7, 237, 512 }, { 7, 238, 512 }, { 8, 239, 512 }, + { 5, 240, 512 }, { 6, 241, 512 }, { 6, 242, 512 }, { 7, 243, 512 }, { 6, 244, 512 }, { 7, 245, 512 }, { 7, 246, 512 }, { 8, 247, 512 }, + { 6, 248, 512 }, { 7, 249, 512 }, { 7, 250, 512 }, { 8, 251, 512 }, { 7, 252, 512 }, { 8, 253, 512 }, { 8, 254, 512 }, { 9, 255, 512 }, + { 2, 256, 512 }, { 3, 257, 512 }, { 3, 258, 512 }, { 4, 259, 512 }, { 3, 260, 512 }, { 4, 261, 512 }, { 4, 262, 512 }, { 5, 263, 512 }, + { 3, 264, 512 }, { 4, 265, 512 }, { 4, 266, 512 }, { 5, 267, 512 }, { 4, 268, 512 }, { 5, 269, 512 }, { 5, 270, 512 }, { 6, 271, 512 }, + { 3, 272, 512 }, { 4, 273, 512 }, { 4, 274, 512 }, { 5, 275, 512 }, { 4, 276, 512 }, { 5, 277, 512 }, { 5, 278, 512 }, { 6, 279, 512 }, + { 4, 280, 512 }, { 5, 281, 512 }, { 5, 282, 512 }, { 6, 283, 512 }, { 5, 284, 512 }, { 6, 285, 512 }, { 6, 286, 512 }, { 7, 287, 512 }, + { 3, 288, 512 }, { 4, 289, 512 }, { 4, 290, 512 }, { 5, 291, 512 }, { 4, 292, 512 }, { 5, 293, 512 }, { 5, 294, 512 }, { 6, 295, 512 }, + { 4, 296, 512 }, { 5, 297, 512 }, { 5, 298, 512 }, { 6, 299, 512 }, { 5, 300, 512 }, { 6, 301, 512 }, { 6, 302, 512 }, { 7, 303, 512 }, + { 4, 304, 512 }, { 5, 305, 512 }, { 5, 306, 512 }, { 6, 307, 512 }, { 5, 308, 512 }, { 6, 309, 512 }, { 6, 310, 512 }, { 7, 311, 512 }, + { 5, 312, 512 }, { 6, 313, 512 }, { 6, 314, 512 }, { 7, 315, 512 }, { 6, 316, 512 }, { 7, 317, 512 }, { 7, 318, 512 }, { 8, 319, 512 }, + { 3, 320, 512 }, { 4, 321, 512 }, { 4, 322, 512 }, { 5, 323, 512 }, { 4, 324, 512 }, { 5, 325, 512 }, { 5, 326, 512 }, { 6, 327, 512 }, + { 4, 328, 512 }, { 5, 329, 512 }, { 5, 330, 512 }, { 6, 331, 512 }, { 5, 332, 512 }, { 6, 333, 512 }, { 6, 334, 512 }, { 7, 335, 512 }, + { 4, 336, 512 }, { 5, 337, 512 }, { 5, 338, 512 }, { 6, 339, 512 }, { 5, 340, 512 }, { 6, 341, 512 }, { 6, 342, 512 }, { 7, 343, 512 }, + { 5, 344, 512 }, { 6, 345, 512 }, { 6, 346, 512 }, { 7, 347, 512 }, { 6, 348, 512 }, { 7, 349, 512 }, { 7, 350, 512 }, { 8, 351, 512 }, + { 4, 352, 512 }, { 5, 353, 512 }, { 5, 354, 512 }, { 6, 355, 512 }, { 5, 356, 512 }, { 6, 357, 512 }, { 6, 358, 512 }, { 7, 359, 512 }, + { 5, 360, 512 }, { 6, 361, 512 }, { 6, 362, 512 }, { 7, 363, 512 }, { 6, 364, 512 }, { 7, 365, 512 }, { 7, 366, 512 }, { 8, 367, 512 }, + { 5, 368, 512 }, { 6, 369, 512 }, { 6, 370, 512 }, { 7, 371, 512 }, { 6, 372, 512 }, { 7, 373, 512 }, { 7, 374, 512 }, { 8, 375, 512 }, + { 6, 376, 512 }, { 7, 377, 512 }, { 7, 378, 512 }, { 8, 379, 512 }, { 7, 380, 512 }, { 8, 381, 512 }, { 8, 382, 512 }, { 9, 383, 512 }, + { 3, 384, 512 }, { 4, 385, 512 }, { 4, 386, 512 }, { 5, 387, 512 }, { 4, 388, 512 }, { 5, 389, 512 }, { 5, 390, 512 }, { 6, 391, 512 }, + { 4, 392, 512 }, { 5, 393, 512 }, { 5, 394, 512 }, { 6, 395, 512 }, { 5, 396, 512 }, { 6, 397, 512 }, { 6, 398, 512 }, { 7, 399, 512 }, + { 4, 400, 512 }, { 5, 401, 512 }, { 5, 402, 512 }, { 6, 403, 512 }, { 5, 404, 512 }, { 6, 405, 512 }, { 6, 406, 512 }, { 7, 407, 512 }, + { 5, 408, 512 }, { 6, 409, 512 }, { 6, 410, 512 }, { 7, 411, 512 }, { 6, 412, 512 }, { 7, 413, 512 }, { 7, 414, 512 }, { 8, 415, 512 }, + { 4, 416, 512 }, { 5, 417, 512 }, { 5, 418, 512 }, { 6, 419, 512 }, { 5, 420, 512 }, { 6, 421, 512 }, { 6, 422, 512 }, { 7, 423, 512 }, + { 5, 424, 512 }, { 6, 425, 512 }, { 6, 426, 512 }, { 7, 427, 512 }, { 6, 428, 512 }, { 7, 429, 512 }, { 7, 430, 512 }, { 8, 431, 512 }, + { 5, 432, 512 }, { 6, 433, 512 }, { 6, 434, 512 }, { 7, 435, 512 }, { 6, 436, 512 }, { 7, 437, 512 }, { 7, 438, 512 }, { 8, 439, 512 }, + { 6, 440, 512 }, { 7, 441, 512 }, { 7, 442, 512 }, { 8, 443, 512 }, { 7, 444, 512 }, { 8, 445, 512 }, { 8, 446, 512 }, { 9, 447, 512 }, + { 4, 448, 512 }, { 5, 449, 512 }, { 5, 450, 512 }, { 6, 451, 512 }, { 5, 452, 512 }, { 6, 453, 512 }, { 6, 454, 512 }, { 7, 455, 512 }, + { 5, 456, 512 }, { 6, 457, 512 }, { 6, 458, 512 }, { 7, 459, 512 }, { 6, 460, 512 }, { 7, 461, 512 }, { 7, 462, 512 }, { 8, 463, 512 }, + { 5, 464, 512 }, { 6, 465, 512 }, { 6, 466, 512 }, { 7, 467, 512 }, { 6, 468, 512 }, { 7, 469, 512 }, { 7, 470, 512 }, { 8, 471, 512 }, + { 6, 472, 512 }, { 7, 473, 512 }, { 7, 474, 512 }, { 8, 475, 512 }, { 7, 476, 512 }, { 8, 477, 512 }, { 8, 478, 512 }, { 9, 479, 512 }, + { 5, 480, 512 }, { 6, 481, 512 }, { 6, 482, 512 }, { 7, 483, 512 }, { 6, 484, 512 }, { 7, 485, 512 }, { 7, 486, 512 }, { 8, 487, 512 }, + { 6, 488, 512 }, { 7, 489, 512 }, { 7, 490, 512 }, { 8, 491, 512 }, { 7, 492, 512 }, { 8, 493, 512 }, { 8, 494, 512 }, { 9, 495, 512 }, + { 6, 496, 512 }, { 7, 497, 512 }, { 7, 498, 512 }, { 8, 499, 512 }, { 7, 500, 512 }, { 8, 501, 512 }, { 8, 502, 512 }, { 9, 503, 512 }, + { 7, 504, 512 }, { 8, 505, 512 }, { 8, 506, 512 }, { 9, 507, 512 }, { 8, 508, 512 }, { 9, 509, 512 }, { 9, 510, 512 }, { 10, 511, 512 }, +#if FP_LUT > 10 + { 1, 0, 0 }, { 2, 1, 1024 }, { 2, 2, 1024 }, { 3, 3, 1024 }, { 2, 4, 1024 }, { 3, 5, 1024 }, { 3, 6, 1024 }, { 4, 7, 1024 }, + { 2, 8, 1024 }, { 3, 9, 1024 }, { 3, 10, 1024 }, { 4, 11, 1024 }, { 3, 12, 1024 }, { 4, 13, 1024 }, { 4, 14, 1024 }, { 5, 15, 1024 }, + { 2, 16, 1024 }, { 3, 17, 1024 }, { 3, 18, 1024 }, { 4, 19, 1024 }, { 3, 20, 1024 }, { 4, 21, 1024 }, { 4, 22, 1024 }, { 5, 23, 1024 }, + { 3, 24, 1024 }, { 4, 25, 1024 }, { 4, 26, 1024 }, { 5, 27, 1024 }, { 4, 28, 1024 }, { 5, 29, 1024 }, { 5, 30, 1024 }, { 6, 31, 1024 }, + { 2, 32, 1024 }, { 3, 33, 1024 }, { 3, 34, 1024 }, { 4, 35, 1024 }, { 3, 36, 1024 }, { 4, 37, 1024 }, { 4, 38, 1024 }, { 5, 39, 1024 }, + { 3, 40, 1024 }, { 4, 41, 1024 }, { 4, 42, 1024 }, { 5, 43, 1024 }, { 4, 44, 1024 }, { 5, 45, 1024 }, { 5, 46, 1024 }, { 6, 47, 1024 }, + { 3, 48, 1024 }, { 4, 49, 1024 }, { 4, 50, 1024 }, { 5, 51, 1024 }, { 4, 52, 1024 }, { 5, 53, 1024 }, { 5, 54, 1024 }, { 6, 55, 1024 }, + { 4, 56, 1024 }, { 5, 57, 1024 }, { 5, 58, 1024 }, { 6, 59, 1024 }, { 5, 60, 1024 }, { 6, 61, 1024 }, { 6, 62, 1024 }, { 7, 63, 1024 }, + { 2, 64, 1024 }, { 3, 65, 1024 }, { 3, 66, 1024 }, { 4, 67, 1024 }, { 3, 68, 1024 }, { 4, 69, 1024 }, { 4, 70, 1024 }, { 5, 71, 1024 }, + { 3, 72, 1024 }, { 4, 73, 1024 }, { 4, 74, 1024 }, { 5, 75, 1024 }, { 4, 76, 1024 }, { 5, 77, 1024 }, { 5, 78, 1024 }, { 6, 79, 1024 }, + { 3, 80, 1024 }, { 4, 81, 1024 }, { 4, 82, 1024 }, { 5, 83, 1024 }, { 4, 84, 1024 }, { 5, 85, 1024 }, { 5, 86, 1024 }, { 6, 87, 1024 }, + { 4, 88, 1024 }, { 5, 89, 1024 }, { 5, 90, 1024 }, { 6, 91, 1024 }, { 5, 92, 1024 }, { 6, 93, 1024 }, { 6, 94, 1024 }, { 7, 95, 1024 }, + { 3, 96, 1024 }, { 4, 97, 1024 }, { 4, 98, 1024 }, { 5, 99, 1024 }, { 4, 100, 1024 }, { 5, 101, 1024 }, { 5, 102, 1024 }, { 6, 103, 1024 }, + { 4, 104, 1024 }, { 5, 105, 1024 }, { 5, 106, 1024 }, { 6, 107, 1024 }, { 5, 108, 1024 }, { 6, 109, 1024 }, { 6, 110, 1024 }, { 7, 111, 1024 }, + { 4, 112, 1024 }, { 5, 113, 1024 }, { 5, 114, 1024 }, { 6, 115, 1024 }, { 5, 116, 1024 }, { 6, 117, 1024 }, { 6, 118, 1024 }, { 7, 119, 1024 }, + { 5, 120, 1024 }, { 6, 121, 1024 }, { 6, 122, 1024 }, { 7, 123, 1024 }, { 6, 124, 1024 }, { 7, 125, 1024 }, { 7, 126, 1024 }, { 8, 127, 1024 }, + { 2, 128, 1024 }, { 3, 129, 1024 }, { 3, 130, 1024 }, { 4, 131, 1024 }, { 3, 132, 1024 }, { 4, 133, 1024 }, { 4, 134, 1024 }, { 5, 135, 1024 }, + { 3, 136, 1024 }, { 4, 137, 1024 }, { 4, 138, 1024 }, { 5, 139, 1024 }, { 4, 140, 1024 }, { 5, 141, 1024 }, { 5, 142, 1024 }, { 6, 143, 1024 }, + { 3, 144, 1024 }, { 4, 145, 1024 }, { 4, 146, 1024 }, { 5, 147, 1024 }, { 4, 148, 1024 }, { 5, 149, 1024 }, { 5, 150, 1024 }, { 6, 151, 1024 }, + { 4, 152, 1024 }, { 5, 153, 1024 }, { 5, 154, 1024 }, { 6, 155, 1024 }, { 5, 156, 1024 }, { 6, 157, 1024 }, { 6, 158, 1024 }, { 7, 159, 1024 }, + { 3, 160, 1024 }, { 4, 161, 1024 }, { 4, 162, 1024 }, { 5, 163, 1024 }, { 4, 164, 1024 }, { 5, 165, 1024 }, { 5, 166, 1024 }, { 6, 167, 1024 }, + { 4, 168, 1024 }, { 5, 169, 1024 }, { 5, 170, 1024 }, { 6, 171, 1024 }, { 5, 172, 1024 }, { 6, 173, 1024 }, { 6, 174, 1024 }, { 7, 175, 1024 }, + { 4, 176, 1024 }, { 5, 177, 1024 }, { 5, 178, 1024 }, { 6, 179, 1024 }, { 5, 180, 1024 }, { 6, 181, 1024 }, { 6, 182, 1024 }, { 7, 183, 1024 }, + { 5, 184, 1024 }, { 6, 185, 1024 }, { 6, 186, 1024 }, { 7, 187, 1024 }, { 6, 188, 1024 }, { 7, 189, 1024 }, { 7, 190, 1024 }, { 8, 191, 1024 }, + { 3, 192, 1024 }, { 4, 193, 1024 }, { 4, 194, 1024 }, { 5, 195, 1024 }, { 4, 196, 1024 }, { 5, 197, 1024 }, { 5, 198, 1024 }, { 6, 199, 1024 }, + { 4, 200, 1024 }, { 5, 201, 1024 }, { 5, 202, 1024 }, { 6, 203, 1024 }, { 5, 204, 1024 }, { 6, 205, 1024 }, { 6, 206, 1024 }, { 7, 207, 1024 }, + { 4, 208, 1024 }, { 5, 209, 1024 }, { 5, 210, 1024 }, { 6, 211, 1024 }, { 5, 212, 1024 }, { 6, 213, 1024 }, { 6, 214, 1024 }, { 7, 215, 1024 }, + { 5, 216, 1024 }, { 6, 217, 1024 }, { 6, 218, 1024 }, { 7, 219, 1024 }, { 6, 220, 1024 }, { 7, 221, 1024 }, { 7, 222, 1024 }, { 8, 223, 1024 }, + { 4, 224, 1024 }, { 5, 225, 1024 }, { 5, 226, 1024 }, { 6, 227, 1024 }, { 5, 228, 1024 }, { 6, 229, 1024 }, { 6, 230, 1024 }, { 7, 231, 1024 }, + { 5, 232, 1024 }, { 6, 233, 1024 }, { 6, 234, 1024 }, { 7, 235, 1024 }, { 6, 236, 1024 }, { 7, 237, 1024 }, { 7, 238, 1024 }, { 8, 239, 1024 }, + { 5, 240, 1024 }, { 6, 241, 1024 }, { 6, 242, 1024 }, { 7, 243, 1024 }, { 6, 244, 1024 }, { 7, 245, 1024 }, { 7, 246, 1024 }, { 8, 247, 1024 }, + { 6, 248, 1024 }, { 7, 249, 1024 }, { 7, 250, 1024 }, { 8, 251, 1024 }, { 7, 252, 1024 }, { 8, 253, 1024 }, { 8, 254, 1024 }, { 9, 255, 1024 }, + { 2, 256, 1024 }, { 3, 257, 1024 }, { 3, 258, 1024 }, { 4, 259, 1024 }, { 3, 260, 1024 }, { 4, 261, 1024 }, { 4, 262, 1024 }, { 5, 263, 1024 }, + { 3, 264, 1024 }, { 4, 265, 1024 }, { 4, 266, 1024 }, { 5, 267, 1024 }, { 4, 268, 1024 }, { 5, 269, 1024 }, { 5, 270, 1024 }, { 6, 271, 1024 }, + { 3, 272, 1024 }, { 4, 273, 1024 }, { 4, 274, 1024 }, { 5, 275, 1024 }, { 4, 276, 1024 }, { 5, 277, 1024 }, { 5, 278, 1024 }, { 6, 279, 1024 }, + { 4, 280, 1024 }, { 5, 281, 1024 }, { 5, 282, 1024 }, { 6, 283, 1024 }, { 5, 284, 1024 }, { 6, 285, 1024 }, { 6, 286, 1024 }, { 7, 287, 1024 }, + { 3, 288, 1024 }, { 4, 289, 1024 }, { 4, 290, 1024 }, { 5, 291, 1024 }, { 4, 292, 1024 }, { 5, 293, 1024 }, { 5, 294, 1024 }, { 6, 295, 1024 }, + { 4, 296, 1024 }, { 5, 297, 1024 }, { 5, 298, 1024 }, { 6, 299, 1024 }, { 5, 300, 1024 }, { 6, 301, 1024 }, { 6, 302, 1024 }, { 7, 303, 1024 }, + { 4, 304, 1024 }, { 5, 305, 1024 }, { 5, 306, 1024 }, { 6, 307, 1024 }, { 5, 308, 1024 }, { 6, 309, 1024 }, { 6, 310, 1024 }, { 7, 311, 1024 }, + { 5, 312, 1024 }, { 6, 313, 1024 }, { 6, 314, 1024 }, { 7, 315, 1024 }, { 6, 316, 1024 }, { 7, 317, 1024 }, { 7, 318, 1024 }, { 8, 319, 1024 }, + { 3, 320, 1024 }, { 4, 321, 1024 }, { 4, 322, 1024 }, { 5, 323, 1024 }, { 4, 324, 1024 }, { 5, 325, 1024 }, { 5, 326, 1024 }, { 6, 327, 1024 }, + { 4, 328, 1024 }, { 5, 329, 1024 }, { 5, 330, 1024 }, { 6, 331, 1024 }, { 5, 332, 1024 }, { 6, 333, 1024 }, { 6, 334, 1024 }, { 7, 335, 1024 }, + { 4, 336, 1024 }, { 5, 337, 1024 }, { 5, 338, 1024 }, { 6, 339, 1024 }, { 5, 340, 1024 }, { 6, 341, 1024 }, { 6, 342, 1024 }, { 7, 343, 1024 }, + { 5, 344, 1024 }, { 6, 345, 1024 }, { 6, 346, 1024 }, { 7, 347, 1024 }, { 6, 348, 1024 }, { 7, 349, 1024 }, { 7, 350, 1024 }, { 8, 351, 1024 }, + { 4, 352, 1024 }, { 5, 353, 1024 }, { 5, 354, 1024 }, { 6, 355, 1024 }, { 5, 356, 1024 }, { 6, 357, 1024 }, { 6, 358, 1024 }, { 7, 359, 1024 }, + { 5, 360, 1024 }, { 6, 361, 1024 }, { 6, 362, 1024 }, { 7, 363, 1024 }, { 6, 364, 1024 }, { 7, 365, 1024 }, { 7, 366, 1024 }, { 8, 367, 1024 }, + { 5, 368, 1024 }, { 6, 369, 1024 }, { 6, 370, 1024 }, { 7, 371, 1024 }, { 6, 372, 1024 }, { 7, 373, 1024 }, { 7, 374, 1024 }, { 8, 375, 1024 }, + { 6, 376, 1024 }, { 7, 377, 1024 }, { 7, 378, 1024 }, { 8, 379, 1024 }, { 7, 380, 1024 }, { 8, 381, 1024 }, { 8, 382, 1024 }, { 9, 383, 1024 }, + { 3, 384, 1024 }, { 4, 385, 1024 }, { 4, 386, 1024 }, { 5, 387, 1024 }, { 4, 388, 1024 }, { 5, 389, 1024 }, { 5, 390, 1024 }, { 6, 391, 1024 }, + { 4, 392, 1024 }, { 5, 393, 1024 }, { 5, 394, 1024 }, { 6, 395, 1024 }, { 5, 396, 1024 }, { 6, 397, 1024 }, { 6, 398, 1024 }, { 7, 399, 1024 }, + { 4, 400, 1024 }, { 5, 401, 1024 }, { 5, 402, 1024 }, { 6, 403, 1024 }, { 5, 404, 1024 }, { 6, 405, 1024 }, { 6, 406, 1024 }, { 7, 407, 1024 }, + { 5, 408, 1024 }, { 6, 409, 1024 }, { 6, 410, 1024 }, { 7, 411, 1024 }, { 6, 412, 1024 }, { 7, 413, 1024 }, { 7, 414, 1024 }, { 8, 415, 1024 }, + { 4, 416, 1024 }, { 5, 417, 1024 }, { 5, 418, 1024 }, { 6, 419, 1024 }, { 5, 420, 1024 }, { 6, 421, 1024 }, { 6, 422, 1024 }, { 7, 423, 1024 }, + { 5, 424, 1024 }, { 6, 425, 1024 }, { 6, 426, 1024 }, { 7, 427, 1024 }, { 6, 428, 1024 }, { 7, 429, 1024 }, { 7, 430, 1024 }, { 8, 431, 1024 }, + { 5, 432, 1024 }, { 6, 433, 1024 }, { 6, 434, 1024 }, { 7, 435, 1024 }, { 6, 436, 1024 }, { 7, 437, 1024 }, { 7, 438, 1024 }, { 8, 439, 1024 }, + { 6, 440, 1024 }, { 7, 441, 1024 }, { 7, 442, 1024 }, { 8, 443, 1024 }, { 7, 444, 1024 }, { 8, 445, 1024 }, { 8, 446, 1024 }, { 9, 447, 1024 }, + { 4, 448, 1024 }, { 5, 449, 1024 }, { 5, 450, 1024 }, { 6, 451, 1024 }, { 5, 452, 1024 }, { 6, 453, 1024 }, { 6, 454, 1024 }, { 7, 455, 1024 }, + { 5, 456, 1024 }, { 6, 457, 1024 }, { 6, 458, 1024 }, { 7, 459, 1024 }, { 6, 460, 1024 }, { 7, 461, 1024 }, { 7, 462, 1024 }, { 8, 463, 1024 }, + { 5, 464, 1024 }, { 6, 465, 1024 }, { 6, 466, 1024 }, { 7, 467, 1024 }, { 6, 468, 1024 }, { 7, 469, 1024 }, { 7, 470, 1024 }, { 8, 471, 1024 }, + { 6, 472, 1024 }, { 7, 473, 1024 }, { 7, 474, 1024 }, { 8, 475, 1024 }, { 7, 476, 1024 }, { 8, 477, 1024 }, { 8, 478, 1024 }, { 9, 479, 1024 }, + { 5, 480, 1024 }, { 6, 481, 1024 }, { 6, 482, 1024 }, { 7, 483, 1024 }, { 6, 484, 1024 }, { 7, 485, 1024 }, { 7, 486, 1024 }, { 8, 487, 1024 }, + { 6, 488, 1024 }, { 7, 489, 1024 }, { 7, 490, 1024 }, { 8, 491, 1024 }, { 7, 492, 1024 }, { 8, 493, 1024 }, { 8, 494, 1024 }, { 9, 495, 1024 }, + { 6, 496, 1024 }, { 7, 497, 1024 }, { 7, 498, 1024 }, { 8, 499, 1024 }, { 7, 500, 1024 }, { 8, 501, 1024 }, { 8, 502, 1024 }, { 9, 503, 1024 }, + { 7, 504, 1024 }, { 8, 505, 1024 }, { 8, 506, 1024 }, { 9, 507, 1024 }, { 8, 508, 1024 }, { 9, 509, 1024 }, { 9, 510, 1024 }, { 10, 511, 1024 }, + { 2, 512, 1024 }, { 3, 513, 1024 }, { 3, 514, 1024 }, { 4, 515, 1024 }, { 3, 516, 1024 }, { 4, 517, 1024 }, { 4, 518, 1024 }, { 5, 519, 1024 }, + { 3, 520, 1024 }, { 4, 521, 1024 }, { 4, 522, 1024 }, { 5, 523, 1024 }, { 4, 524, 1024 }, { 5, 525, 1024 }, { 5, 526, 1024 }, { 6, 527, 1024 }, + { 3, 528, 1024 }, { 4, 529, 1024 }, { 4, 530, 1024 }, { 5, 531, 1024 }, { 4, 532, 1024 }, { 5, 533, 1024 }, { 5, 534, 1024 }, { 6, 535, 1024 }, + { 4, 536, 1024 }, { 5, 537, 1024 }, { 5, 538, 1024 }, { 6, 539, 1024 }, { 5, 540, 1024 }, { 6, 541, 1024 }, { 6, 542, 1024 }, { 7, 543, 1024 }, + { 3, 544, 1024 }, { 4, 545, 1024 }, { 4, 546, 1024 }, { 5, 547, 1024 }, { 4, 548, 1024 }, { 5, 549, 1024 }, { 5, 550, 1024 }, { 6, 551, 1024 }, + { 4, 552, 1024 }, { 5, 553, 1024 }, { 5, 554, 1024 }, { 6, 555, 1024 }, { 5, 556, 1024 }, { 6, 557, 1024 }, { 6, 558, 1024 }, { 7, 559, 1024 }, + { 4, 560, 1024 }, { 5, 561, 1024 }, { 5, 562, 1024 }, { 6, 563, 1024 }, { 5, 564, 1024 }, { 6, 565, 1024 }, { 6, 566, 1024 }, { 7, 567, 1024 }, + { 5, 568, 1024 }, { 6, 569, 1024 }, { 6, 570, 1024 }, { 7, 571, 1024 }, { 6, 572, 1024 }, { 7, 573, 1024 }, { 7, 574, 1024 }, { 8, 575, 1024 }, + { 3, 576, 1024 }, { 4, 577, 1024 }, { 4, 578, 1024 }, { 5, 579, 1024 }, { 4, 580, 1024 }, { 5, 581, 1024 }, { 5, 582, 1024 }, { 6, 583, 1024 }, + { 4, 584, 1024 }, { 5, 585, 1024 }, { 5, 586, 1024 }, { 6, 587, 1024 }, { 5, 588, 1024 }, { 6, 589, 1024 }, { 6, 590, 1024 }, { 7, 591, 1024 }, + { 4, 592, 1024 }, { 5, 593, 1024 }, { 5, 594, 1024 }, { 6, 595, 1024 }, { 5, 596, 1024 }, { 6, 597, 1024 }, { 6, 598, 1024 }, { 7, 599, 1024 }, + { 5, 600, 1024 }, { 6, 601, 1024 }, { 6, 602, 1024 }, { 7, 603, 1024 }, { 6, 604, 1024 }, { 7, 605, 1024 }, { 7, 606, 1024 }, { 8, 607, 1024 }, + { 4, 608, 1024 }, { 5, 609, 1024 }, { 5, 610, 1024 }, { 6, 611, 1024 }, { 5, 612, 1024 }, { 6, 613, 1024 }, { 6, 614, 1024 }, { 7, 615, 1024 }, + { 5, 616, 1024 }, { 6, 617, 1024 }, { 6, 618, 1024 }, { 7, 619, 1024 }, { 6, 620, 1024 }, { 7, 621, 1024 }, { 7, 622, 1024 }, { 8, 623, 1024 }, + { 5, 624, 1024 }, { 6, 625, 1024 }, { 6, 626, 1024 }, { 7, 627, 1024 }, { 6, 628, 1024 }, { 7, 629, 1024 }, { 7, 630, 1024 }, { 8, 631, 1024 }, + { 6, 632, 1024 }, { 7, 633, 1024 }, { 7, 634, 1024 }, { 8, 635, 1024 }, { 7, 636, 1024 }, { 8, 637, 1024 }, { 8, 638, 1024 }, { 9, 639, 1024 }, + { 3, 640, 1024 }, { 4, 641, 1024 }, { 4, 642, 1024 }, { 5, 643, 1024 }, { 4, 644, 1024 }, { 5, 645, 1024 }, { 5, 646, 1024 }, { 6, 647, 1024 }, + { 4, 648, 1024 }, { 5, 649, 1024 }, { 5, 650, 1024 }, { 6, 651, 1024 }, { 5, 652, 1024 }, { 6, 653, 1024 }, { 6, 654, 1024 }, { 7, 655, 1024 }, + { 4, 656, 1024 }, { 5, 657, 1024 }, { 5, 658, 1024 }, { 6, 659, 1024 }, { 5, 660, 1024 }, { 6, 661, 1024 }, { 6, 662, 1024 }, { 7, 663, 1024 }, + { 5, 664, 1024 }, { 6, 665, 1024 }, { 6, 666, 1024 }, { 7, 667, 1024 }, { 6, 668, 1024 }, { 7, 669, 1024 }, { 7, 670, 1024 }, { 8, 671, 1024 }, + { 4, 672, 1024 }, { 5, 673, 1024 }, { 5, 674, 1024 }, { 6, 675, 1024 }, { 5, 676, 1024 }, { 6, 677, 1024 }, { 6, 678, 1024 }, { 7, 679, 1024 }, + { 5, 680, 1024 }, { 6, 681, 1024 }, { 6, 682, 1024 }, { 7, 683, 1024 }, { 6, 684, 1024 }, { 7, 685, 1024 }, { 7, 686, 1024 }, { 8, 687, 1024 }, + { 5, 688, 1024 }, { 6, 689, 1024 }, { 6, 690, 1024 }, { 7, 691, 1024 }, { 6, 692, 1024 }, { 7, 693, 1024 }, { 7, 694, 1024 }, { 8, 695, 1024 }, + { 6, 696, 1024 }, { 7, 697, 1024 }, { 7, 698, 1024 }, { 8, 699, 1024 }, { 7, 700, 1024 }, { 8, 701, 1024 }, { 8, 702, 1024 }, { 9, 703, 1024 }, + { 4, 704, 1024 }, { 5, 705, 1024 }, { 5, 706, 1024 }, { 6, 707, 1024 }, { 5, 708, 1024 }, { 6, 709, 1024 }, { 6, 710, 1024 }, { 7, 711, 1024 }, + { 5, 712, 1024 }, { 6, 713, 1024 }, { 6, 714, 1024 }, { 7, 715, 1024 }, { 6, 716, 1024 }, { 7, 717, 1024 }, { 7, 718, 1024 }, { 8, 719, 1024 }, + { 5, 720, 1024 }, { 6, 721, 1024 }, { 6, 722, 1024 }, { 7, 723, 1024 }, { 6, 724, 1024 }, { 7, 725, 1024 }, { 7, 726, 1024 }, { 8, 727, 1024 }, + { 6, 728, 1024 }, { 7, 729, 1024 }, { 7, 730, 1024 }, { 8, 731, 1024 }, { 7, 732, 1024 }, { 8, 733, 1024 }, { 8, 734, 1024 }, { 9, 735, 1024 }, + { 5, 736, 1024 }, { 6, 737, 1024 }, { 6, 738, 1024 }, { 7, 739, 1024 }, { 6, 740, 1024 }, { 7, 741, 1024 }, { 7, 742, 1024 }, { 8, 743, 1024 }, + { 6, 744, 1024 }, { 7, 745, 1024 }, { 7, 746, 1024 }, { 8, 747, 1024 }, { 7, 748, 1024 }, { 8, 749, 1024 }, { 8, 750, 1024 }, { 9, 751, 1024 }, + { 6, 752, 1024 }, { 7, 753, 1024 }, { 7, 754, 1024 }, { 8, 755, 1024 }, { 7, 756, 1024 }, { 8, 757, 1024 }, { 8, 758, 1024 }, { 9, 759, 1024 }, + { 7, 760, 1024 }, { 8, 761, 1024 }, { 8, 762, 1024 }, { 9, 763, 1024 }, { 8, 764, 1024 }, { 9, 765, 1024 }, { 9, 766, 1024 }, { 10, 767, 1024 }, + { 3, 768, 1024 }, { 4, 769, 1024 }, { 4, 770, 1024 }, { 5, 771, 1024 }, { 4, 772, 1024 }, { 5, 773, 1024 }, { 5, 774, 1024 }, { 6, 775, 1024 }, + { 4, 776, 1024 }, { 5, 777, 1024 }, { 5, 778, 1024 }, { 6, 779, 1024 }, { 5, 780, 1024 }, { 6, 781, 1024 }, { 6, 782, 1024 }, { 7, 783, 1024 }, + { 4, 784, 1024 }, { 5, 785, 1024 }, { 5, 786, 1024 }, { 6, 787, 1024 }, { 5, 788, 1024 }, { 6, 789, 1024 }, { 6, 790, 1024 }, { 7, 791, 1024 }, + { 5, 792, 1024 }, { 6, 793, 1024 }, { 6, 794, 1024 }, { 7, 795, 1024 }, { 6, 796, 1024 }, { 7, 797, 1024 }, { 7, 798, 1024 }, { 8, 799, 1024 }, + { 4, 800, 1024 }, { 5, 801, 1024 }, { 5, 802, 1024 }, { 6, 803, 1024 }, { 5, 804, 1024 }, { 6, 805, 1024 }, { 6, 806, 1024 }, { 7, 807, 1024 }, + { 5, 808, 1024 }, { 6, 809, 1024 }, { 6, 810, 1024 }, { 7, 811, 1024 }, { 6, 812, 1024 }, { 7, 813, 1024 }, { 7, 814, 1024 }, { 8, 815, 1024 }, + { 5, 816, 1024 }, { 6, 817, 1024 }, { 6, 818, 1024 }, { 7, 819, 1024 }, { 6, 820, 1024 }, { 7, 821, 1024 }, { 7, 822, 1024 }, { 8, 823, 1024 }, + { 6, 824, 1024 }, { 7, 825, 1024 }, { 7, 826, 1024 }, { 8, 827, 1024 }, { 7, 828, 1024 }, { 8, 829, 1024 }, { 8, 830, 1024 }, { 9, 831, 1024 }, + { 4, 832, 1024 }, { 5, 833, 1024 }, { 5, 834, 1024 }, { 6, 835, 1024 }, { 5, 836, 1024 }, { 6, 837, 1024 }, { 6, 838, 1024 }, { 7, 839, 1024 }, + { 5, 840, 1024 }, { 6, 841, 1024 }, { 6, 842, 1024 }, { 7, 843, 1024 }, { 6, 844, 1024 }, { 7, 845, 1024 }, { 7, 846, 1024 }, { 8, 847, 1024 }, + { 5, 848, 1024 }, { 6, 849, 1024 }, { 6, 850, 1024 }, { 7, 851, 1024 }, { 6, 852, 1024 }, { 7, 853, 1024 }, { 7, 854, 1024 }, { 8, 855, 1024 }, + { 6, 856, 1024 }, { 7, 857, 1024 }, { 7, 858, 1024 }, { 8, 859, 1024 }, { 7, 860, 1024 }, { 8, 861, 1024 }, { 8, 862, 1024 }, { 9, 863, 1024 }, + { 5, 864, 1024 }, { 6, 865, 1024 }, { 6, 866, 1024 }, { 7, 867, 1024 }, { 6, 868, 1024 }, { 7, 869, 1024 }, { 7, 870, 1024 }, { 8, 871, 1024 }, + { 6, 872, 1024 }, { 7, 873, 1024 }, { 7, 874, 1024 }, { 8, 875, 1024 }, { 7, 876, 1024 }, { 8, 877, 1024 }, { 8, 878, 1024 }, { 9, 879, 1024 }, + { 6, 880, 1024 }, { 7, 881, 1024 }, { 7, 882, 1024 }, { 8, 883, 1024 }, { 7, 884, 1024 }, { 8, 885, 1024 }, { 8, 886, 1024 }, { 9, 887, 1024 }, + { 7, 888, 1024 }, { 8, 889, 1024 }, { 8, 890, 1024 }, { 9, 891, 1024 }, { 8, 892, 1024 }, { 9, 893, 1024 }, { 9, 894, 1024 }, { 10, 895, 1024 }, + { 4, 896, 1024 }, { 5, 897, 1024 }, { 5, 898, 1024 }, { 6, 899, 1024 }, { 5, 900, 1024 }, { 6, 901, 1024 }, { 6, 902, 1024 }, { 7, 903, 1024 }, + { 5, 904, 1024 }, { 6, 905, 1024 }, { 6, 906, 1024 }, { 7, 907, 1024 }, { 6, 908, 1024 }, { 7, 909, 1024 }, { 7, 910, 1024 }, { 8, 911, 1024 }, + { 5, 912, 1024 }, { 6, 913, 1024 }, { 6, 914, 1024 }, { 7, 915, 1024 }, { 6, 916, 1024 }, { 7, 917, 1024 }, { 7, 918, 1024 }, { 8, 919, 1024 }, + { 6, 920, 1024 }, { 7, 921, 1024 }, { 7, 922, 1024 }, { 8, 923, 1024 }, { 7, 924, 1024 }, { 8, 925, 1024 }, { 8, 926, 1024 }, { 9, 927, 1024 }, + { 5, 928, 1024 }, { 6, 929, 1024 }, { 6, 930, 1024 }, { 7, 931, 1024 }, { 6, 932, 1024 }, { 7, 933, 1024 }, { 7, 934, 1024 }, { 8, 935, 1024 }, + { 6, 936, 1024 }, { 7, 937, 1024 }, { 7, 938, 1024 }, { 8, 939, 1024 }, { 7, 940, 1024 }, { 8, 941, 1024 }, { 8, 942, 1024 }, { 9, 943, 1024 }, + { 6, 944, 1024 }, { 7, 945, 1024 }, { 7, 946, 1024 }, { 8, 947, 1024 }, { 7, 948, 1024 }, { 8, 949, 1024 }, { 8, 950, 1024 }, { 9, 951, 1024 }, + { 7, 952, 1024 }, { 8, 953, 1024 }, { 8, 954, 1024 }, { 9, 955, 1024 }, { 8, 956, 1024 }, { 9, 957, 1024 }, { 9, 958, 1024 }, { 10, 959, 1024 }, + { 5, 960, 1024 }, { 6, 961, 1024 }, { 6, 962, 1024 }, { 7, 963, 1024 }, { 6, 964, 1024 }, { 7, 965, 1024 }, { 7, 966, 1024 }, { 8, 967, 1024 }, + { 6, 968, 1024 }, { 7, 969, 1024 }, { 7, 970, 1024 }, { 8, 971, 1024 }, { 7, 972, 1024 }, { 8, 973, 1024 }, { 8, 974, 1024 }, { 9, 975, 1024 }, + { 6, 976, 1024 }, { 7, 977, 1024 }, { 7, 978, 1024 }, { 8, 979, 1024 }, { 7, 980, 1024 }, { 8, 981, 1024 }, { 8, 982, 1024 }, { 9, 983, 1024 }, + { 7, 984, 1024 }, { 8, 985, 1024 }, { 8, 986, 1024 }, { 9, 987, 1024 }, { 8, 988, 1024 }, { 9, 989, 1024 }, { 9, 990, 1024 }, { 10, 991, 1024 }, + { 6, 992, 1024 }, { 7, 993, 1024 }, { 7, 994, 1024 }, { 8, 995, 1024 }, { 7, 996, 1024 }, { 8, 997, 1024 }, { 8, 998, 1024 }, { 9, 999, 1024 }, + { 7, 1000, 1024 }, { 8, 1001, 1024 }, { 8, 1002, 1024 }, { 9, 1003, 1024 }, { 8, 1004, 1024 }, { 9, 1005, 1024 }, { 9, 1006, 1024 }, { 10, 1007, 1024 }, + { 7, 1008, 1024 }, { 8, 1009, 1024 }, { 8, 1010, 1024 }, { 9, 1011, 1024 }, { 8, 1012, 1024 }, { 9, 1013, 1024 }, { 9, 1014, 1024 }, { 10, 1015, 1024 }, + { 8, 1016, 1024 }, { 9, 1017, 1024 }, { 9, 1018, 1024 }, { 10, 1019, 1024 }, { 9, 1020, 1024 }, { 10, 1021, 1024 }, { 10, 1022, 1024 }, { 11, 1023, 1024 }, +#if FP_LUT > 11 + { 1, 0, 0 }, { 2, 1, 2048 }, { 2, 2, 2048 }, { 3, 3, 2048 }, { 2, 4, 2048 }, { 3, 5, 2048 }, { 3, 6, 2048 }, { 4, 7, 2048 }, + { 2, 8, 2048 }, { 3, 9, 2048 }, { 3, 10, 2048 }, { 4, 11, 2048 }, { 3, 12, 2048 }, { 4, 13, 2048 }, { 4, 14, 2048 }, { 5, 15, 2048 }, + { 2, 16, 2048 }, { 3, 17, 2048 }, { 3, 18, 2048 }, { 4, 19, 2048 }, { 3, 20, 2048 }, { 4, 21, 2048 }, { 4, 22, 2048 }, { 5, 23, 2048 }, + { 3, 24, 2048 }, { 4, 25, 2048 }, { 4, 26, 2048 }, { 5, 27, 2048 }, { 4, 28, 2048 }, { 5, 29, 2048 }, { 5, 30, 2048 }, { 6, 31, 2048 }, + { 2, 32, 2048 }, { 3, 33, 2048 }, { 3, 34, 2048 }, { 4, 35, 2048 }, { 3, 36, 2048 }, { 4, 37, 2048 }, { 4, 38, 2048 }, { 5, 39, 2048 }, + { 3, 40, 2048 }, { 4, 41, 2048 }, { 4, 42, 2048 }, { 5, 43, 2048 }, { 4, 44, 2048 }, { 5, 45, 2048 }, { 5, 46, 2048 }, { 6, 47, 2048 }, + { 3, 48, 2048 }, { 4, 49, 2048 }, { 4, 50, 2048 }, { 5, 51, 2048 }, { 4, 52, 2048 }, { 5, 53, 2048 }, { 5, 54, 2048 }, { 6, 55, 2048 }, + { 4, 56, 2048 }, { 5, 57, 2048 }, { 5, 58, 2048 }, { 6, 59, 2048 }, { 5, 60, 2048 }, { 6, 61, 2048 }, { 6, 62, 2048 }, { 7, 63, 2048 }, + { 2, 64, 2048 }, { 3, 65, 2048 }, { 3, 66, 2048 }, { 4, 67, 2048 }, { 3, 68, 2048 }, { 4, 69, 2048 }, { 4, 70, 2048 }, { 5, 71, 2048 }, + { 3, 72, 2048 }, { 4, 73, 2048 }, { 4, 74, 2048 }, { 5, 75, 2048 }, { 4, 76, 2048 }, { 5, 77, 2048 }, { 5, 78, 2048 }, { 6, 79, 2048 }, + { 3, 80, 2048 }, { 4, 81, 2048 }, { 4, 82, 2048 }, { 5, 83, 2048 }, { 4, 84, 2048 }, { 5, 85, 2048 }, { 5, 86, 2048 }, { 6, 87, 2048 }, + { 4, 88, 2048 }, { 5, 89, 2048 }, { 5, 90, 2048 }, { 6, 91, 2048 }, { 5, 92, 2048 }, { 6, 93, 2048 }, { 6, 94, 2048 }, { 7, 95, 2048 }, + { 3, 96, 2048 }, { 4, 97, 2048 }, { 4, 98, 2048 }, { 5, 99, 2048 }, { 4, 100, 2048 }, { 5, 101, 2048 }, { 5, 102, 2048 }, { 6, 103, 2048 }, + { 4, 104, 2048 }, { 5, 105, 2048 }, { 5, 106, 2048 }, { 6, 107, 2048 }, { 5, 108, 2048 }, { 6, 109, 2048 }, { 6, 110, 2048 }, { 7, 111, 2048 }, + { 4, 112, 2048 }, { 5, 113, 2048 }, { 5, 114, 2048 }, { 6, 115, 2048 }, { 5, 116, 2048 }, { 6, 117, 2048 }, { 6, 118, 2048 }, { 7, 119, 2048 }, + { 5, 120, 2048 }, { 6, 121, 2048 }, { 6, 122, 2048 }, { 7, 123, 2048 }, { 6, 124, 2048 }, { 7, 125, 2048 }, { 7, 126, 2048 }, { 8, 127, 2048 }, + { 2, 128, 2048 }, { 3, 129, 2048 }, { 3, 130, 2048 }, { 4, 131, 2048 }, { 3, 132, 2048 }, { 4, 133, 2048 }, { 4, 134, 2048 }, { 5, 135, 2048 }, + { 3, 136, 2048 }, { 4, 137, 2048 }, { 4, 138, 2048 }, { 5, 139, 2048 }, { 4, 140, 2048 }, { 5, 141, 2048 }, { 5, 142, 2048 }, { 6, 143, 2048 }, + { 3, 144, 2048 }, { 4, 145, 2048 }, { 4, 146, 2048 }, { 5, 147, 2048 }, { 4, 148, 2048 }, { 5, 149, 2048 }, { 5, 150, 2048 }, { 6, 151, 2048 }, + { 4, 152, 2048 }, { 5, 153, 2048 }, { 5, 154, 2048 }, { 6, 155, 2048 }, { 5, 156, 2048 }, { 6, 157, 2048 }, { 6, 158, 2048 }, { 7, 159, 2048 }, + { 3, 160, 2048 }, { 4, 161, 2048 }, { 4, 162, 2048 }, { 5, 163, 2048 }, { 4, 164, 2048 }, { 5, 165, 2048 }, { 5, 166, 2048 }, { 6, 167, 2048 }, + { 4, 168, 2048 }, { 5, 169, 2048 }, { 5, 170, 2048 }, { 6, 171, 2048 }, { 5, 172, 2048 }, { 6, 173, 2048 }, { 6, 174, 2048 }, { 7, 175, 2048 }, + { 4, 176, 2048 }, { 5, 177, 2048 }, { 5, 178, 2048 }, { 6, 179, 2048 }, { 5, 180, 2048 }, { 6, 181, 2048 }, { 6, 182, 2048 }, { 7, 183, 2048 }, + { 5, 184, 2048 }, { 6, 185, 2048 }, { 6, 186, 2048 }, { 7, 187, 2048 }, { 6, 188, 2048 }, { 7, 189, 2048 }, { 7, 190, 2048 }, { 8, 191, 2048 }, + { 3, 192, 2048 }, { 4, 193, 2048 }, { 4, 194, 2048 }, { 5, 195, 2048 }, { 4, 196, 2048 }, { 5, 197, 2048 }, { 5, 198, 2048 }, { 6, 199, 2048 }, + { 4, 200, 2048 }, { 5, 201, 2048 }, { 5, 202, 2048 }, { 6, 203, 2048 }, { 5, 204, 2048 }, { 6, 205, 2048 }, { 6, 206, 2048 }, { 7, 207, 2048 }, + { 4, 208, 2048 }, { 5, 209, 2048 }, { 5, 210, 2048 }, { 6, 211, 2048 }, { 5, 212, 2048 }, { 6, 213, 2048 }, { 6, 214, 2048 }, { 7, 215, 2048 }, + { 5, 216, 2048 }, { 6, 217, 2048 }, { 6, 218, 2048 }, { 7, 219, 2048 }, { 6, 220, 2048 }, { 7, 221, 2048 }, { 7, 222, 2048 }, { 8, 223, 2048 }, + { 4, 224, 2048 }, { 5, 225, 2048 }, { 5, 226, 2048 }, { 6, 227, 2048 }, { 5, 228, 2048 }, { 6, 229, 2048 }, { 6, 230, 2048 }, { 7, 231, 2048 }, + { 5, 232, 2048 }, { 6, 233, 2048 }, { 6, 234, 2048 }, { 7, 235, 2048 }, { 6, 236, 2048 }, { 7, 237, 2048 }, { 7, 238, 2048 }, { 8, 239, 2048 }, + { 5, 240, 2048 }, { 6, 241, 2048 }, { 6, 242, 2048 }, { 7, 243, 2048 }, { 6, 244, 2048 }, { 7, 245, 2048 }, { 7, 246, 2048 }, { 8, 247, 2048 }, + { 6, 248, 2048 }, { 7, 249, 2048 }, { 7, 250, 2048 }, { 8, 251, 2048 }, { 7, 252, 2048 }, { 8, 253, 2048 }, { 8, 254, 2048 }, { 9, 255, 2048 }, + { 2, 256, 2048 }, { 3, 257, 2048 }, { 3, 258, 2048 }, { 4, 259, 2048 }, { 3, 260, 2048 }, { 4, 261, 2048 }, { 4, 262, 2048 }, { 5, 263, 2048 }, + { 3, 264, 2048 }, { 4, 265, 2048 }, { 4, 266, 2048 }, { 5, 267, 2048 }, { 4, 268, 2048 }, { 5, 269, 2048 }, { 5, 270, 2048 }, { 6, 271, 2048 }, + { 3, 272, 2048 }, { 4, 273, 2048 }, { 4, 274, 2048 }, { 5, 275, 2048 }, { 4, 276, 2048 }, { 5, 277, 2048 }, { 5, 278, 2048 }, { 6, 279, 2048 }, + { 4, 280, 2048 }, { 5, 281, 2048 }, { 5, 282, 2048 }, { 6, 283, 2048 }, { 5, 284, 2048 }, { 6, 285, 2048 }, { 6, 286, 2048 }, { 7, 287, 2048 }, + { 3, 288, 2048 }, { 4, 289, 2048 }, { 4, 290, 2048 }, { 5, 291, 2048 }, { 4, 292, 2048 }, { 5, 293, 2048 }, { 5, 294, 2048 }, { 6, 295, 2048 }, + { 4, 296, 2048 }, { 5, 297, 2048 }, { 5, 298, 2048 }, { 6, 299, 2048 }, { 5, 300, 2048 }, { 6, 301, 2048 }, { 6, 302, 2048 }, { 7, 303, 2048 }, + { 4, 304, 2048 }, { 5, 305, 2048 }, { 5, 306, 2048 }, { 6, 307, 2048 }, { 5, 308, 2048 }, { 6, 309, 2048 }, { 6, 310, 2048 }, { 7, 311, 2048 }, + { 5, 312, 2048 }, { 6, 313, 2048 }, { 6, 314, 2048 }, { 7, 315, 2048 }, { 6, 316, 2048 }, { 7, 317, 2048 }, { 7, 318, 2048 }, { 8, 319, 2048 }, + { 3, 320, 2048 }, { 4, 321, 2048 }, { 4, 322, 2048 }, { 5, 323, 2048 }, { 4, 324, 2048 }, { 5, 325, 2048 }, { 5, 326, 2048 }, { 6, 327, 2048 }, + { 4, 328, 2048 }, { 5, 329, 2048 }, { 5, 330, 2048 }, { 6, 331, 2048 }, { 5, 332, 2048 }, { 6, 333, 2048 }, { 6, 334, 2048 }, { 7, 335, 2048 }, + { 4, 336, 2048 }, { 5, 337, 2048 }, { 5, 338, 2048 }, { 6, 339, 2048 }, { 5, 340, 2048 }, { 6, 341, 2048 }, { 6, 342, 2048 }, { 7, 343, 2048 }, + { 5, 344, 2048 }, { 6, 345, 2048 }, { 6, 346, 2048 }, { 7, 347, 2048 }, { 6, 348, 2048 }, { 7, 349, 2048 }, { 7, 350, 2048 }, { 8, 351, 2048 }, + { 4, 352, 2048 }, { 5, 353, 2048 }, { 5, 354, 2048 }, { 6, 355, 2048 }, { 5, 356, 2048 }, { 6, 357, 2048 }, { 6, 358, 2048 }, { 7, 359, 2048 }, + { 5, 360, 2048 }, { 6, 361, 2048 }, { 6, 362, 2048 }, { 7, 363, 2048 }, { 6, 364, 2048 }, { 7, 365, 2048 }, { 7, 366, 2048 }, { 8, 367, 2048 }, + { 5, 368, 2048 }, { 6, 369, 2048 }, { 6, 370, 2048 }, { 7, 371, 2048 }, { 6, 372, 2048 }, { 7, 373, 2048 }, { 7, 374, 2048 }, { 8, 375, 2048 }, + { 6, 376, 2048 }, { 7, 377, 2048 }, { 7, 378, 2048 }, { 8, 379, 2048 }, { 7, 380, 2048 }, { 8, 381, 2048 }, { 8, 382, 2048 }, { 9, 383, 2048 }, + { 3, 384, 2048 }, { 4, 385, 2048 }, { 4, 386, 2048 }, { 5, 387, 2048 }, { 4, 388, 2048 }, { 5, 389, 2048 }, { 5, 390, 2048 }, { 6, 391, 2048 }, + { 4, 392, 2048 }, { 5, 393, 2048 }, { 5, 394, 2048 }, { 6, 395, 2048 }, { 5, 396, 2048 }, { 6, 397, 2048 }, { 6, 398, 2048 }, { 7, 399, 2048 }, + { 4, 400, 2048 }, { 5, 401, 2048 }, { 5, 402, 2048 }, { 6, 403, 2048 }, { 5, 404, 2048 }, { 6, 405, 2048 }, { 6, 406, 2048 }, { 7, 407, 2048 }, + { 5, 408, 2048 }, { 6, 409, 2048 }, { 6, 410, 2048 }, { 7, 411, 2048 }, { 6, 412, 2048 }, { 7, 413, 2048 }, { 7, 414, 2048 }, { 8, 415, 2048 }, + { 4, 416, 2048 }, { 5, 417, 2048 }, { 5, 418, 2048 }, { 6, 419, 2048 }, { 5, 420, 2048 }, { 6, 421, 2048 }, { 6, 422, 2048 }, { 7, 423, 2048 }, + { 5, 424, 2048 }, { 6, 425, 2048 }, { 6, 426, 2048 }, { 7, 427, 2048 }, { 6, 428, 2048 }, { 7, 429, 2048 }, { 7, 430, 2048 }, { 8, 431, 2048 }, + { 5, 432, 2048 }, { 6, 433, 2048 }, { 6, 434, 2048 }, { 7, 435, 2048 }, { 6, 436, 2048 }, { 7, 437, 2048 }, { 7, 438, 2048 }, { 8, 439, 2048 }, + { 6, 440, 2048 }, { 7, 441, 2048 }, { 7, 442, 2048 }, { 8, 443, 2048 }, { 7, 444, 2048 }, { 8, 445, 2048 }, { 8, 446, 2048 }, { 9, 447, 2048 }, + { 4, 448, 2048 }, { 5, 449, 2048 }, { 5, 450, 2048 }, { 6, 451, 2048 }, { 5, 452, 2048 }, { 6, 453, 2048 }, { 6, 454, 2048 }, { 7, 455, 2048 }, + { 5, 456, 2048 }, { 6, 457, 2048 }, { 6, 458, 2048 }, { 7, 459, 2048 }, { 6, 460, 2048 }, { 7, 461, 2048 }, { 7, 462, 2048 }, { 8, 463, 2048 }, + { 5, 464, 2048 }, { 6, 465, 2048 }, { 6, 466, 2048 }, { 7, 467, 2048 }, { 6, 468, 2048 }, { 7, 469, 2048 }, { 7, 470, 2048 }, { 8, 471, 2048 }, + { 6, 472, 2048 }, { 7, 473, 2048 }, { 7, 474, 2048 }, { 8, 475, 2048 }, { 7, 476, 2048 }, { 8, 477, 2048 }, { 8, 478, 2048 }, { 9, 479, 2048 }, + { 5, 480, 2048 }, { 6, 481, 2048 }, { 6, 482, 2048 }, { 7, 483, 2048 }, { 6, 484, 2048 }, { 7, 485, 2048 }, { 7, 486, 2048 }, { 8, 487, 2048 }, + { 6, 488, 2048 }, { 7, 489, 2048 }, { 7, 490, 2048 }, { 8, 491, 2048 }, { 7, 492, 2048 }, { 8, 493, 2048 }, { 8, 494, 2048 }, { 9, 495, 2048 }, + { 6, 496, 2048 }, { 7, 497, 2048 }, { 7, 498, 2048 }, { 8, 499, 2048 }, { 7, 500, 2048 }, { 8, 501, 2048 }, { 8, 502, 2048 }, { 9, 503, 2048 }, + { 7, 504, 2048 }, { 8, 505, 2048 }, { 8, 506, 2048 }, { 9, 507, 2048 }, { 8, 508, 2048 }, { 9, 509, 2048 }, { 9, 510, 2048 }, { 10, 511, 2048 }, + { 2, 512, 2048 }, { 3, 513, 2048 }, { 3, 514, 2048 }, { 4, 515, 2048 }, { 3, 516, 2048 }, { 4, 517, 2048 }, { 4, 518, 2048 }, { 5, 519, 2048 }, + { 3, 520, 2048 }, { 4, 521, 2048 }, { 4, 522, 2048 }, { 5, 523, 2048 }, { 4, 524, 2048 }, { 5, 525, 2048 }, { 5, 526, 2048 }, { 6, 527, 2048 }, + { 3, 528, 2048 }, { 4, 529, 2048 }, { 4, 530, 2048 }, { 5, 531, 2048 }, { 4, 532, 2048 }, { 5, 533, 2048 }, { 5, 534, 2048 }, { 6, 535, 2048 }, + { 4, 536, 2048 }, { 5, 537, 2048 }, { 5, 538, 2048 }, { 6, 539, 2048 }, { 5, 540, 2048 }, { 6, 541, 2048 }, { 6, 542, 2048 }, { 7, 543, 2048 }, + { 3, 544, 2048 }, { 4, 545, 2048 }, { 4, 546, 2048 }, { 5, 547, 2048 }, { 4, 548, 2048 }, { 5, 549, 2048 }, { 5, 550, 2048 }, { 6, 551, 2048 }, + { 4, 552, 2048 }, { 5, 553, 2048 }, { 5, 554, 2048 }, { 6, 555, 2048 }, { 5, 556, 2048 }, { 6, 557, 2048 }, { 6, 558, 2048 }, { 7, 559, 2048 }, + { 4, 560, 2048 }, { 5, 561, 2048 }, { 5, 562, 2048 }, { 6, 563, 2048 }, { 5, 564, 2048 }, { 6, 565, 2048 }, { 6, 566, 2048 }, { 7, 567, 2048 }, + { 5, 568, 2048 }, { 6, 569, 2048 }, { 6, 570, 2048 }, { 7, 571, 2048 }, { 6, 572, 2048 }, { 7, 573, 2048 }, { 7, 574, 2048 }, { 8, 575, 2048 }, + { 3, 576, 2048 }, { 4, 577, 2048 }, { 4, 578, 2048 }, { 5, 579, 2048 }, { 4, 580, 2048 }, { 5, 581, 2048 }, { 5, 582, 2048 }, { 6, 583, 2048 }, + { 4, 584, 2048 }, { 5, 585, 2048 }, { 5, 586, 2048 }, { 6, 587, 2048 }, { 5, 588, 2048 }, { 6, 589, 2048 }, { 6, 590, 2048 }, { 7, 591, 2048 }, + { 4, 592, 2048 }, { 5, 593, 2048 }, { 5, 594, 2048 }, { 6, 595, 2048 }, { 5, 596, 2048 }, { 6, 597, 2048 }, { 6, 598, 2048 }, { 7, 599, 2048 }, + { 5, 600, 2048 }, { 6, 601, 2048 }, { 6, 602, 2048 }, { 7, 603, 2048 }, { 6, 604, 2048 }, { 7, 605, 2048 }, { 7, 606, 2048 }, { 8, 607, 2048 }, + { 4, 608, 2048 }, { 5, 609, 2048 }, { 5, 610, 2048 }, { 6, 611, 2048 }, { 5, 612, 2048 }, { 6, 613, 2048 }, { 6, 614, 2048 }, { 7, 615, 2048 }, + { 5, 616, 2048 }, { 6, 617, 2048 }, { 6, 618, 2048 }, { 7, 619, 2048 }, { 6, 620, 2048 }, { 7, 621, 2048 }, { 7, 622, 2048 }, { 8, 623, 2048 }, + { 5, 624, 2048 }, { 6, 625, 2048 }, { 6, 626, 2048 }, { 7, 627, 2048 }, { 6, 628, 2048 }, { 7, 629, 2048 }, { 7, 630, 2048 }, { 8, 631, 2048 }, + { 6, 632, 2048 }, { 7, 633, 2048 }, { 7, 634, 2048 }, { 8, 635, 2048 }, { 7, 636, 2048 }, { 8, 637, 2048 }, { 8, 638, 2048 }, { 9, 639, 2048 }, + { 3, 640, 2048 }, { 4, 641, 2048 }, { 4, 642, 2048 }, { 5, 643, 2048 }, { 4, 644, 2048 }, { 5, 645, 2048 }, { 5, 646, 2048 }, { 6, 647, 2048 }, + { 4, 648, 2048 }, { 5, 649, 2048 }, { 5, 650, 2048 }, { 6, 651, 2048 }, { 5, 652, 2048 }, { 6, 653, 2048 }, { 6, 654, 2048 }, { 7, 655, 2048 }, + { 4, 656, 2048 }, { 5, 657, 2048 }, { 5, 658, 2048 }, { 6, 659, 2048 }, { 5, 660, 2048 }, { 6, 661, 2048 }, { 6, 662, 2048 }, { 7, 663, 2048 }, + { 5, 664, 2048 }, { 6, 665, 2048 }, { 6, 666, 2048 }, { 7, 667, 2048 }, { 6, 668, 2048 }, { 7, 669, 2048 }, { 7, 670, 2048 }, { 8, 671, 2048 }, + { 4, 672, 2048 }, { 5, 673, 2048 }, { 5, 674, 2048 }, { 6, 675, 2048 }, { 5, 676, 2048 }, { 6, 677, 2048 }, { 6, 678, 2048 }, { 7, 679, 2048 }, + { 5, 680, 2048 }, { 6, 681, 2048 }, { 6, 682, 2048 }, { 7, 683, 2048 }, { 6, 684, 2048 }, { 7, 685, 2048 }, { 7, 686, 2048 }, { 8, 687, 2048 }, + { 5, 688, 2048 }, { 6, 689, 2048 }, { 6, 690, 2048 }, { 7, 691, 2048 }, { 6, 692, 2048 }, { 7, 693, 2048 }, { 7, 694, 2048 }, { 8, 695, 2048 }, + { 6, 696, 2048 }, { 7, 697, 2048 }, { 7, 698, 2048 }, { 8, 699, 2048 }, { 7, 700, 2048 }, { 8, 701, 2048 }, { 8, 702, 2048 }, { 9, 703, 2048 }, + { 4, 704, 2048 }, { 5, 705, 2048 }, { 5, 706, 2048 }, { 6, 707, 2048 }, { 5, 708, 2048 }, { 6, 709, 2048 }, { 6, 710, 2048 }, { 7, 711, 2048 }, + { 5, 712, 2048 }, { 6, 713, 2048 }, { 6, 714, 2048 }, { 7, 715, 2048 }, { 6, 716, 2048 }, { 7, 717, 2048 }, { 7, 718, 2048 }, { 8, 719, 2048 }, + { 5, 720, 2048 }, { 6, 721, 2048 }, { 6, 722, 2048 }, { 7, 723, 2048 }, { 6, 724, 2048 }, { 7, 725, 2048 }, { 7, 726, 2048 }, { 8, 727, 2048 }, + { 6, 728, 2048 }, { 7, 729, 2048 }, { 7, 730, 2048 }, { 8, 731, 2048 }, { 7, 732, 2048 }, { 8, 733, 2048 }, { 8, 734, 2048 }, { 9, 735, 2048 }, + { 5, 736, 2048 }, { 6, 737, 2048 }, { 6, 738, 2048 }, { 7, 739, 2048 }, { 6, 740, 2048 }, { 7, 741, 2048 }, { 7, 742, 2048 }, { 8, 743, 2048 }, + { 6, 744, 2048 }, { 7, 745, 2048 }, { 7, 746, 2048 }, { 8, 747, 2048 }, { 7, 748, 2048 }, { 8, 749, 2048 }, { 8, 750, 2048 }, { 9, 751, 2048 }, + { 6, 752, 2048 }, { 7, 753, 2048 }, { 7, 754, 2048 }, { 8, 755, 2048 }, { 7, 756, 2048 }, { 8, 757, 2048 }, { 8, 758, 2048 }, { 9, 759, 2048 }, + { 7, 760, 2048 }, { 8, 761, 2048 }, { 8, 762, 2048 }, { 9, 763, 2048 }, { 8, 764, 2048 }, { 9, 765, 2048 }, { 9, 766, 2048 }, { 10, 767, 2048 }, + { 3, 768, 2048 }, { 4, 769, 2048 }, { 4, 770, 2048 }, { 5, 771, 2048 }, { 4, 772, 2048 }, { 5, 773, 2048 }, { 5, 774, 2048 }, { 6, 775, 2048 }, + { 4, 776, 2048 }, { 5, 777, 2048 }, { 5, 778, 2048 }, { 6, 779, 2048 }, { 5, 780, 2048 }, { 6, 781, 2048 }, { 6, 782, 2048 }, { 7, 783, 2048 }, + { 4, 784, 2048 }, { 5, 785, 2048 }, { 5, 786, 2048 }, { 6, 787, 2048 }, { 5, 788, 2048 }, { 6, 789, 2048 }, { 6, 790, 2048 }, { 7, 791, 2048 }, + { 5, 792, 2048 }, { 6, 793, 2048 }, { 6, 794, 2048 }, { 7, 795, 2048 }, { 6, 796, 2048 }, { 7, 797, 2048 }, { 7, 798, 2048 }, { 8, 799, 2048 }, + { 4, 800, 2048 }, { 5, 801, 2048 }, { 5, 802, 2048 }, { 6, 803, 2048 }, { 5, 804, 2048 }, { 6, 805, 2048 }, { 6, 806, 2048 }, { 7, 807, 2048 }, + { 5, 808, 2048 }, { 6, 809, 2048 }, { 6, 810, 2048 }, { 7, 811, 2048 }, { 6, 812, 2048 }, { 7, 813, 2048 }, { 7, 814, 2048 }, { 8, 815, 2048 }, + { 5, 816, 2048 }, { 6, 817, 2048 }, { 6, 818, 2048 }, { 7, 819, 2048 }, { 6, 820, 2048 }, { 7, 821, 2048 }, { 7, 822, 2048 }, { 8, 823, 2048 }, + { 6, 824, 2048 }, { 7, 825, 2048 }, { 7, 826, 2048 }, { 8, 827, 2048 }, { 7, 828, 2048 }, { 8, 829, 2048 }, { 8, 830, 2048 }, { 9, 831, 2048 }, + { 4, 832, 2048 }, { 5, 833, 2048 }, { 5, 834, 2048 }, { 6, 835, 2048 }, { 5, 836, 2048 }, { 6, 837, 2048 }, { 6, 838, 2048 }, { 7, 839, 2048 }, + { 5, 840, 2048 }, { 6, 841, 2048 }, { 6, 842, 2048 }, { 7, 843, 2048 }, { 6, 844, 2048 }, { 7, 845, 2048 }, { 7, 846, 2048 }, { 8, 847, 2048 }, + { 5, 848, 2048 }, { 6, 849, 2048 }, { 6, 850, 2048 }, { 7, 851, 2048 }, { 6, 852, 2048 }, { 7, 853, 2048 }, { 7, 854, 2048 }, { 8, 855, 2048 }, + { 6, 856, 2048 }, { 7, 857, 2048 }, { 7, 858, 2048 }, { 8, 859, 2048 }, { 7, 860, 2048 }, { 8, 861, 2048 }, { 8, 862, 2048 }, { 9, 863, 2048 }, + { 5, 864, 2048 }, { 6, 865, 2048 }, { 6, 866, 2048 }, { 7, 867, 2048 }, { 6, 868, 2048 }, { 7, 869, 2048 }, { 7, 870, 2048 }, { 8, 871, 2048 }, + { 6, 872, 2048 }, { 7, 873, 2048 }, { 7, 874, 2048 }, { 8, 875, 2048 }, { 7, 876, 2048 }, { 8, 877, 2048 }, { 8, 878, 2048 }, { 9, 879, 2048 }, + { 6, 880, 2048 }, { 7, 881, 2048 }, { 7, 882, 2048 }, { 8, 883, 2048 }, { 7, 884, 2048 }, { 8, 885, 2048 }, { 8, 886, 2048 }, { 9, 887, 2048 }, + { 7, 888, 2048 }, { 8, 889, 2048 }, { 8, 890, 2048 }, { 9, 891, 2048 }, { 8, 892, 2048 }, { 9, 893, 2048 }, { 9, 894, 2048 }, { 10, 895, 2048 }, + { 4, 896, 2048 }, { 5, 897, 2048 }, { 5, 898, 2048 }, { 6, 899, 2048 }, { 5, 900, 2048 }, { 6, 901, 2048 }, { 6, 902, 2048 }, { 7, 903, 2048 }, + { 5, 904, 2048 }, { 6, 905, 2048 }, { 6, 906, 2048 }, { 7, 907, 2048 }, { 6, 908, 2048 }, { 7, 909, 2048 }, { 7, 910, 2048 }, { 8, 911, 2048 }, + { 5, 912, 2048 }, { 6, 913, 2048 }, { 6, 914, 2048 }, { 7, 915, 2048 }, { 6, 916, 2048 }, { 7, 917, 2048 }, { 7, 918, 2048 }, { 8, 919, 2048 }, + { 6, 920, 2048 }, { 7, 921, 2048 }, { 7, 922, 2048 }, { 8, 923, 2048 }, { 7, 924, 2048 }, { 8, 925, 2048 }, { 8, 926, 2048 }, { 9, 927, 2048 }, + { 5, 928, 2048 }, { 6, 929, 2048 }, { 6, 930, 2048 }, { 7, 931, 2048 }, { 6, 932, 2048 }, { 7, 933, 2048 }, { 7, 934, 2048 }, { 8, 935, 2048 }, + { 6, 936, 2048 }, { 7, 937, 2048 }, { 7, 938, 2048 }, { 8, 939, 2048 }, { 7, 940, 2048 }, { 8, 941, 2048 }, { 8, 942, 2048 }, { 9, 943, 2048 }, + { 6, 944, 2048 }, { 7, 945, 2048 }, { 7, 946, 2048 }, { 8, 947, 2048 }, { 7, 948, 2048 }, { 8, 949, 2048 }, { 8, 950, 2048 }, { 9, 951, 2048 }, + { 7, 952, 2048 }, { 8, 953, 2048 }, { 8, 954, 2048 }, { 9, 955, 2048 }, { 8, 956, 2048 }, { 9, 957, 2048 }, { 9, 958, 2048 }, { 10, 959, 2048 }, + { 5, 960, 2048 }, { 6, 961, 2048 }, { 6, 962, 2048 }, { 7, 963, 2048 }, { 6, 964, 2048 }, { 7, 965, 2048 }, { 7, 966, 2048 }, { 8, 967, 2048 }, + { 6, 968, 2048 }, { 7, 969, 2048 }, { 7, 970, 2048 }, { 8, 971, 2048 }, { 7, 972, 2048 }, { 8, 973, 2048 }, { 8, 974, 2048 }, { 9, 975, 2048 }, + { 6, 976, 2048 }, { 7, 977, 2048 }, { 7, 978, 2048 }, { 8, 979, 2048 }, { 7, 980, 2048 }, { 8, 981, 2048 }, { 8, 982, 2048 }, { 9, 983, 2048 }, + { 7, 984, 2048 }, { 8, 985, 2048 }, { 8, 986, 2048 }, { 9, 987, 2048 }, { 8, 988, 2048 }, { 9, 989, 2048 }, { 9, 990, 2048 }, { 10, 991, 2048 }, + { 6, 992, 2048 }, { 7, 993, 2048 }, { 7, 994, 2048 }, { 8, 995, 2048 }, { 7, 996, 2048 }, { 8, 997, 2048 }, { 8, 998, 2048 }, { 9, 999, 2048 }, + { 7, 1000, 2048 }, { 8, 1001, 2048 }, { 8, 1002, 2048 }, { 9, 1003, 2048 }, { 8, 1004, 2048 }, { 9, 1005, 2048 }, { 9, 1006, 2048 }, { 10, 1007, 2048 }, + { 7, 1008, 2048 }, { 8, 1009, 2048 }, { 8, 1010, 2048 }, { 9, 1011, 2048 }, { 8, 1012, 2048 }, { 9, 1013, 2048 }, { 9, 1014, 2048 }, { 10, 1015, 2048 }, + { 8, 1016, 2048 }, { 9, 1017, 2048 }, { 9, 1018, 2048 }, { 10, 1019, 2048 }, { 9, 1020, 2048 }, { 10, 1021, 2048 }, { 10, 1022, 2048 }, { 11, 1023, 2048 }, + { 2, 1024, 2048 }, { 3, 1025, 2048 }, { 3, 1026, 2048 }, { 4, 1027, 2048 }, { 3, 1028, 2048 }, { 4, 1029, 2048 }, { 4, 1030, 2048 }, { 5, 1031, 2048 }, + { 3, 1032, 2048 }, { 4, 1033, 2048 }, { 4, 1034, 2048 }, { 5, 1035, 2048 }, { 4, 1036, 2048 }, { 5, 1037, 2048 }, { 5, 1038, 2048 }, { 6, 1039, 2048 }, + { 3, 1040, 2048 }, { 4, 1041, 2048 }, { 4, 1042, 2048 }, { 5, 1043, 2048 }, { 4, 1044, 2048 }, { 5, 1045, 2048 }, { 5, 1046, 2048 }, { 6, 1047, 2048 }, + { 4, 1048, 2048 }, { 5, 1049, 2048 }, { 5, 1050, 2048 }, { 6, 1051, 2048 }, { 5, 1052, 2048 }, { 6, 1053, 2048 }, { 6, 1054, 2048 }, { 7, 1055, 2048 }, + { 3, 1056, 2048 }, { 4, 1057, 2048 }, { 4, 1058, 2048 }, { 5, 1059, 2048 }, { 4, 1060, 2048 }, { 5, 1061, 2048 }, { 5, 1062, 2048 }, { 6, 1063, 2048 }, + { 4, 1064, 2048 }, { 5, 1065, 2048 }, { 5, 1066, 2048 }, { 6, 1067, 2048 }, { 5, 1068, 2048 }, { 6, 1069, 2048 }, { 6, 1070, 2048 }, { 7, 1071, 2048 }, + { 4, 1072, 2048 }, { 5, 1073, 2048 }, { 5, 1074, 2048 }, { 6, 1075, 2048 }, { 5, 1076, 2048 }, { 6, 1077, 2048 }, { 6, 1078, 2048 }, { 7, 1079, 2048 }, + { 5, 1080, 2048 }, { 6, 1081, 2048 }, { 6, 1082, 2048 }, { 7, 1083, 2048 }, { 6, 1084, 2048 }, { 7, 1085, 2048 }, { 7, 1086, 2048 }, { 8, 1087, 2048 }, + { 3, 1088, 2048 }, { 4, 1089, 2048 }, { 4, 1090, 2048 }, { 5, 1091, 2048 }, { 4, 1092, 2048 }, { 5, 1093, 2048 }, { 5, 1094, 2048 }, { 6, 1095, 2048 }, + { 4, 1096, 2048 }, { 5, 1097, 2048 }, { 5, 1098, 2048 }, { 6, 1099, 2048 }, { 5, 1100, 2048 }, { 6, 1101, 2048 }, { 6, 1102, 2048 }, { 7, 1103, 2048 }, + { 4, 1104, 2048 }, { 5, 1105, 2048 }, { 5, 1106, 2048 }, { 6, 1107, 2048 }, { 5, 1108, 2048 }, { 6, 1109, 2048 }, { 6, 1110, 2048 }, { 7, 1111, 2048 }, + { 5, 1112, 2048 }, { 6, 1113, 2048 }, { 6, 1114, 2048 }, { 7, 1115, 2048 }, { 6, 1116, 2048 }, { 7, 1117, 2048 }, { 7, 1118, 2048 }, { 8, 1119, 2048 }, + { 4, 1120, 2048 }, { 5, 1121, 2048 }, { 5, 1122, 2048 }, { 6, 1123, 2048 }, { 5, 1124, 2048 }, { 6, 1125, 2048 }, { 6, 1126, 2048 }, { 7, 1127, 2048 }, + { 5, 1128, 2048 }, { 6, 1129, 2048 }, { 6, 1130, 2048 }, { 7, 1131, 2048 }, { 6, 1132, 2048 }, { 7, 1133, 2048 }, { 7, 1134, 2048 }, { 8, 1135, 2048 }, + { 5, 1136, 2048 }, { 6, 1137, 2048 }, { 6, 1138, 2048 }, { 7, 1139, 2048 }, { 6, 1140, 2048 }, { 7, 1141, 2048 }, { 7, 1142, 2048 }, { 8, 1143, 2048 }, + { 6, 1144, 2048 }, { 7, 1145, 2048 }, { 7, 1146, 2048 }, { 8, 1147, 2048 }, { 7, 1148, 2048 }, { 8, 1149, 2048 }, { 8, 1150, 2048 }, { 9, 1151, 2048 }, + { 3, 1152, 2048 }, { 4, 1153, 2048 }, { 4, 1154, 2048 }, { 5, 1155, 2048 }, { 4, 1156, 2048 }, { 5, 1157, 2048 }, { 5, 1158, 2048 }, { 6, 1159, 2048 }, + { 4, 1160, 2048 }, { 5, 1161, 2048 }, { 5, 1162, 2048 }, { 6, 1163, 2048 }, { 5, 1164, 2048 }, { 6, 1165, 2048 }, { 6, 1166, 2048 }, { 7, 1167, 2048 }, + { 4, 1168, 2048 }, { 5, 1169, 2048 }, { 5, 1170, 2048 }, { 6, 1171, 2048 }, { 5, 1172, 2048 }, { 6, 1173, 2048 }, { 6, 1174, 2048 }, { 7, 1175, 2048 }, + { 5, 1176, 2048 }, { 6, 1177, 2048 }, { 6, 1178, 2048 }, { 7, 1179, 2048 }, { 6, 1180, 2048 }, { 7, 1181, 2048 }, { 7, 1182, 2048 }, { 8, 1183, 2048 }, + { 4, 1184, 2048 }, { 5, 1185, 2048 }, { 5, 1186, 2048 }, { 6, 1187, 2048 }, { 5, 1188, 2048 }, { 6, 1189, 2048 }, { 6, 1190, 2048 }, { 7, 1191, 2048 }, + { 5, 1192, 2048 }, { 6, 1193, 2048 }, { 6, 1194, 2048 }, { 7, 1195, 2048 }, { 6, 1196, 2048 }, { 7, 1197, 2048 }, { 7, 1198, 2048 }, { 8, 1199, 2048 }, + { 5, 1200, 2048 }, { 6, 1201, 2048 }, { 6, 1202, 2048 }, { 7, 1203, 2048 }, { 6, 1204, 2048 }, { 7, 1205, 2048 }, { 7, 1206, 2048 }, { 8, 1207, 2048 }, + { 6, 1208, 2048 }, { 7, 1209, 2048 }, { 7, 1210, 2048 }, { 8, 1211, 2048 }, { 7, 1212, 2048 }, { 8, 1213, 2048 }, { 8, 1214, 2048 }, { 9, 1215, 2048 }, + { 4, 1216, 2048 }, { 5, 1217, 2048 }, { 5, 1218, 2048 }, { 6, 1219, 2048 }, { 5, 1220, 2048 }, { 6, 1221, 2048 }, { 6, 1222, 2048 }, { 7, 1223, 2048 }, + { 5, 1224, 2048 }, { 6, 1225, 2048 }, { 6, 1226, 2048 }, { 7, 1227, 2048 }, { 6, 1228, 2048 }, { 7, 1229, 2048 }, { 7, 1230, 2048 }, { 8, 1231, 2048 }, + { 5, 1232, 2048 }, { 6, 1233, 2048 }, { 6, 1234, 2048 }, { 7, 1235, 2048 }, { 6, 1236, 2048 }, { 7, 1237, 2048 }, { 7, 1238, 2048 }, { 8, 1239, 2048 }, + { 6, 1240, 2048 }, { 7, 1241, 2048 }, { 7, 1242, 2048 }, { 8, 1243, 2048 }, { 7, 1244, 2048 }, { 8, 1245, 2048 }, { 8, 1246, 2048 }, { 9, 1247, 2048 }, + { 5, 1248, 2048 }, { 6, 1249, 2048 }, { 6, 1250, 2048 }, { 7, 1251, 2048 }, { 6, 1252, 2048 }, { 7, 1253, 2048 }, { 7, 1254, 2048 }, { 8, 1255, 2048 }, + { 6, 1256, 2048 }, { 7, 1257, 2048 }, { 7, 1258, 2048 }, { 8, 1259, 2048 }, { 7, 1260, 2048 }, { 8, 1261, 2048 }, { 8, 1262, 2048 }, { 9, 1263, 2048 }, + { 6, 1264, 2048 }, { 7, 1265, 2048 }, { 7, 1266, 2048 }, { 8, 1267, 2048 }, { 7, 1268, 2048 }, { 8, 1269, 2048 }, { 8, 1270, 2048 }, { 9, 1271, 2048 }, + { 7, 1272, 2048 }, { 8, 1273, 2048 }, { 8, 1274, 2048 }, { 9, 1275, 2048 }, { 8, 1276, 2048 }, { 9, 1277, 2048 }, { 9, 1278, 2048 }, { 10, 1279, 2048 }, + { 3, 1280, 2048 }, { 4, 1281, 2048 }, { 4, 1282, 2048 }, { 5, 1283, 2048 }, { 4, 1284, 2048 }, { 5, 1285, 2048 }, { 5, 1286, 2048 }, { 6, 1287, 2048 }, + { 4, 1288, 2048 }, { 5, 1289, 2048 }, { 5, 1290, 2048 }, { 6, 1291, 2048 }, { 5, 1292, 2048 }, { 6, 1293, 2048 }, { 6, 1294, 2048 }, { 7, 1295, 2048 }, + { 4, 1296, 2048 }, { 5, 1297, 2048 }, { 5, 1298, 2048 }, { 6, 1299, 2048 }, { 5, 1300, 2048 }, { 6, 1301, 2048 }, { 6, 1302, 2048 }, { 7, 1303, 2048 }, + { 5, 1304, 2048 }, { 6, 1305, 2048 }, { 6, 1306, 2048 }, { 7, 1307, 2048 }, { 6, 1308, 2048 }, { 7, 1309, 2048 }, { 7, 1310, 2048 }, { 8, 1311, 2048 }, + { 4, 1312, 2048 }, { 5, 1313, 2048 }, { 5, 1314, 2048 }, { 6, 1315, 2048 }, { 5, 1316, 2048 }, { 6, 1317, 2048 }, { 6, 1318, 2048 }, { 7, 1319, 2048 }, + { 5, 1320, 2048 }, { 6, 1321, 2048 }, { 6, 1322, 2048 }, { 7, 1323, 2048 }, { 6, 1324, 2048 }, { 7, 1325, 2048 }, { 7, 1326, 2048 }, { 8, 1327, 2048 }, + { 5, 1328, 2048 }, { 6, 1329, 2048 }, { 6, 1330, 2048 }, { 7, 1331, 2048 }, { 6, 1332, 2048 }, { 7, 1333, 2048 }, { 7, 1334, 2048 }, { 8, 1335, 2048 }, + { 6, 1336, 2048 }, { 7, 1337, 2048 }, { 7, 1338, 2048 }, { 8, 1339, 2048 }, { 7, 1340, 2048 }, { 8, 1341, 2048 }, { 8, 1342, 2048 }, { 9, 1343, 2048 }, + { 4, 1344, 2048 }, { 5, 1345, 2048 }, { 5, 1346, 2048 }, { 6, 1347, 2048 }, { 5, 1348, 2048 }, { 6, 1349, 2048 }, { 6, 1350, 2048 }, { 7, 1351, 2048 }, + { 5, 1352, 2048 }, { 6, 1353, 2048 }, { 6, 1354, 2048 }, { 7, 1355, 2048 }, { 6, 1356, 2048 }, { 7, 1357, 2048 }, { 7, 1358, 2048 }, { 8, 1359, 2048 }, + { 5, 1360, 2048 }, { 6, 1361, 2048 }, { 6, 1362, 2048 }, { 7, 1363, 2048 }, { 6, 1364, 2048 }, { 7, 1365, 2048 }, { 7, 1366, 2048 }, { 8, 1367, 2048 }, + { 6, 1368, 2048 }, { 7, 1369, 2048 }, { 7, 1370, 2048 }, { 8, 1371, 2048 }, { 7, 1372, 2048 }, { 8, 1373, 2048 }, { 8, 1374, 2048 }, { 9, 1375, 2048 }, + { 5, 1376, 2048 }, { 6, 1377, 2048 }, { 6, 1378, 2048 }, { 7, 1379, 2048 }, { 6, 1380, 2048 }, { 7, 1381, 2048 }, { 7, 1382, 2048 }, { 8, 1383, 2048 }, + { 6, 1384, 2048 }, { 7, 1385, 2048 }, { 7, 1386, 2048 }, { 8, 1387, 2048 }, { 7, 1388, 2048 }, { 8, 1389, 2048 }, { 8, 1390, 2048 }, { 9, 1391, 2048 }, + { 6, 1392, 2048 }, { 7, 1393, 2048 }, { 7, 1394, 2048 }, { 8, 1395, 2048 }, { 7, 1396, 2048 }, { 8, 1397, 2048 }, { 8, 1398, 2048 }, { 9, 1399, 2048 }, + { 7, 1400, 2048 }, { 8, 1401, 2048 }, { 8, 1402, 2048 }, { 9, 1403, 2048 }, { 8, 1404, 2048 }, { 9, 1405, 2048 }, { 9, 1406, 2048 }, { 10, 1407, 2048 }, + { 4, 1408, 2048 }, { 5, 1409, 2048 }, { 5, 1410, 2048 }, { 6, 1411, 2048 }, { 5, 1412, 2048 }, { 6, 1413, 2048 }, { 6, 1414, 2048 }, { 7, 1415, 2048 }, + { 5, 1416, 2048 }, { 6, 1417, 2048 }, { 6, 1418, 2048 }, { 7, 1419, 2048 }, { 6, 1420, 2048 }, { 7, 1421, 2048 }, { 7, 1422, 2048 }, { 8, 1423, 2048 }, + { 5, 1424, 2048 }, { 6, 1425, 2048 }, { 6, 1426, 2048 }, { 7, 1427, 2048 }, { 6, 1428, 2048 }, { 7, 1429, 2048 }, { 7, 1430, 2048 }, { 8, 1431, 2048 }, + { 6, 1432, 2048 }, { 7, 1433, 2048 }, { 7, 1434, 2048 }, { 8, 1435, 2048 }, { 7, 1436, 2048 }, { 8, 1437, 2048 }, { 8, 1438, 2048 }, { 9, 1439, 2048 }, + { 5, 1440, 2048 }, { 6, 1441, 2048 }, { 6, 1442, 2048 }, { 7, 1443, 2048 }, { 6, 1444, 2048 }, { 7, 1445, 2048 }, { 7, 1446, 2048 }, { 8, 1447, 2048 }, + { 6, 1448, 2048 }, { 7, 1449, 2048 }, { 7, 1450, 2048 }, { 8, 1451, 2048 }, { 7, 1452, 2048 }, { 8, 1453, 2048 }, { 8, 1454, 2048 }, { 9, 1455, 2048 }, + { 6, 1456, 2048 }, { 7, 1457, 2048 }, { 7, 1458, 2048 }, { 8, 1459, 2048 }, { 7, 1460, 2048 }, { 8, 1461, 2048 }, { 8, 1462, 2048 }, { 9, 1463, 2048 }, + { 7, 1464, 2048 }, { 8, 1465, 2048 }, { 8, 1466, 2048 }, { 9, 1467, 2048 }, { 8, 1468, 2048 }, { 9, 1469, 2048 }, { 9, 1470, 2048 }, { 10, 1471, 2048 }, + { 5, 1472, 2048 }, { 6, 1473, 2048 }, { 6, 1474, 2048 }, { 7, 1475, 2048 }, { 6, 1476, 2048 }, { 7, 1477, 2048 }, { 7, 1478, 2048 }, { 8, 1479, 2048 }, + { 6, 1480, 2048 }, { 7, 1481, 2048 }, { 7, 1482, 2048 }, { 8, 1483, 2048 }, { 7, 1484, 2048 }, { 8, 1485, 2048 }, { 8, 1486, 2048 }, { 9, 1487, 2048 }, + { 6, 1488, 2048 }, { 7, 1489, 2048 }, { 7, 1490, 2048 }, { 8, 1491, 2048 }, { 7, 1492, 2048 }, { 8, 1493, 2048 }, { 8, 1494, 2048 }, { 9, 1495, 2048 }, + { 7, 1496, 2048 }, { 8, 1497, 2048 }, { 8, 1498, 2048 }, { 9, 1499, 2048 }, { 8, 1500, 2048 }, { 9, 1501, 2048 }, { 9, 1502, 2048 }, { 10, 1503, 2048 }, + { 6, 1504, 2048 }, { 7, 1505, 2048 }, { 7, 1506, 2048 }, { 8, 1507, 2048 }, { 7, 1508, 2048 }, { 8, 1509, 2048 }, { 8, 1510, 2048 }, { 9, 1511, 2048 }, + { 7, 1512, 2048 }, { 8, 1513, 2048 }, { 8, 1514, 2048 }, { 9, 1515, 2048 }, { 8, 1516, 2048 }, { 9, 1517, 2048 }, { 9, 1518, 2048 }, { 10, 1519, 2048 }, + { 7, 1520, 2048 }, { 8, 1521, 2048 }, { 8, 1522, 2048 }, { 9, 1523, 2048 }, { 8, 1524, 2048 }, { 9, 1525, 2048 }, { 9, 1526, 2048 }, { 10, 1527, 2048 }, + { 8, 1528, 2048 }, { 9, 1529, 2048 }, { 9, 1530, 2048 }, { 10, 1531, 2048 }, { 9, 1532, 2048 }, { 10, 1533, 2048 }, { 10, 1534, 2048 }, { 11, 1535, 2048 }, + { 3, 1536, 2048 }, { 4, 1537, 2048 }, { 4, 1538, 2048 }, { 5, 1539, 2048 }, { 4, 1540, 2048 }, { 5, 1541, 2048 }, { 5, 1542, 2048 }, { 6, 1543, 2048 }, + { 4, 1544, 2048 }, { 5, 1545, 2048 }, { 5, 1546, 2048 }, { 6, 1547, 2048 }, { 5, 1548, 2048 }, { 6, 1549, 2048 }, { 6, 1550, 2048 }, { 7, 1551, 2048 }, + { 4, 1552, 2048 }, { 5, 1553, 2048 }, { 5, 1554, 2048 }, { 6, 1555, 2048 }, { 5, 1556, 2048 }, { 6, 1557, 2048 }, { 6, 1558, 2048 }, { 7, 1559, 2048 }, + { 5, 1560, 2048 }, { 6, 1561, 2048 }, { 6, 1562, 2048 }, { 7, 1563, 2048 }, { 6, 1564, 2048 }, { 7, 1565, 2048 }, { 7, 1566, 2048 }, { 8, 1567, 2048 }, + { 4, 1568, 2048 }, { 5, 1569, 2048 }, { 5, 1570, 2048 }, { 6, 1571, 2048 }, { 5, 1572, 2048 }, { 6, 1573, 2048 }, { 6, 1574, 2048 }, { 7, 1575, 2048 }, + { 5, 1576, 2048 }, { 6, 1577, 2048 }, { 6, 1578, 2048 }, { 7, 1579, 2048 }, { 6, 1580, 2048 }, { 7, 1581, 2048 }, { 7, 1582, 2048 }, { 8, 1583, 2048 }, + { 5, 1584, 2048 }, { 6, 1585, 2048 }, { 6, 1586, 2048 }, { 7, 1587, 2048 }, { 6, 1588, 2048 }, { 7, 1589, 2048 }, { 7, 1590, 2048 }, { 8, 1591, 2048 }, + { 6, 1592, 2048 }, { 7, 1593, 2048 }, { 7, 1594, 2048 }, { 8, 1595, 2048 }, { 7, 1596, 2048 }, { 8, 1597, 2048 }, { 8, 1598, 2048 }, { 9, 1599, 2048 }, + { 4, 1600, 2048 }, { 5, 1601, 2048 }, { 5, 1602, 2048 }, { 6, 1603, 2048 }, { 5, 1604, 2048 }, { 6, 1605, 2048 }, { 6, 1606, 2048 }, { 7, 1607, 2048 }, + { 5, 1608, 2048 }, { 6, 1609, 2048 }, { 6, 1610, 2048 }, { 7, 1611, 2048 }, { 6, 1612, 2048 }, { 7, 1613, 2048 }, { 7, 1614, 2048 }, { 8, 1615, 2048 }, + { 5, 1616, 2048 }, { 6, 1617, 2048 }, { 6, 1618, 2048 }, { 7, 1619, 2048 }, { 6, 1620, 2048 }, { 7, 1621, 2048 }, { 7, 1622, 2048 }, { 8, 1623, 2048 }, + { 6, 1624, 2048 }, { 7, 1625, 2048 }, { 7, 1626, 2048 }, { 8, 1627, 2048 }, { 7, 1628, 2048 }, { 8, 1629, 2048 }, { 8, 1630, 2048 }, { 9, 1631, 2048 }, + { 5, 1632, 2048 }, { 6, 1633, 2048 }, { 6, 1634, 2048 }, { 7, 1635, 2048 }, { 6, 1636, 2048 }, { 7, 1637, 2048 }, { 7, 1638, 2048 }, { 8, 1639, 2048 }, + { 6, 1640, 2048 }, { 7, 1641, 2048 }, { 7, 1642, 2048 }, { 8, 1643, 2048 }, { 7, 1644, 2048 }, { 8, 1645, 2048 }, { 8, 1646, 2048 }, { 9, 1647, 2048 }, + { 6, 1648, 2048 }, { 7, 1649, 2048 }, { 7, 1650, 2048 }, { 8, 1651, 2048 }, { 7, 1652, 2048 }, { 8, 1653, 2048 }, { 8, 1654, 2048 }, { 9, 1655, 2048 }, + { 7, 1656, 2048 }, { 8, 1657, 2048 }, { 8, 1658, 2048 }, { 9, 1659, 2048 }, { 8, 1660, 2048 }, { 9, 1661, 2048 }, { 9, 1662, 2048 }, { 10, 1663, 2048 }, + { 4, 1664, 2048 }, { 5, 1665, 2048 }, { 5, 1666, 2048 }, { 6, 1667, 2048 }, { 5, 1668, 2048 }, { 6, 1669, 2048 }, { 6, 1670, 2048 }, { 7, 1671, 2048 }, + { 5, 1672, 2048 }, { 6, 1673, 2048 }, { 6, 1674, 2048 }, { 7, 1675, 2048 }, { 6, 1676, 2048 }, { 7, 1677, 2048 }, { 7, 1678, 2048 }, { 8, 1679, 2048 }, + { 5, 1680, 2048 }, { 6, 1681, 2048 }, { 6, 1682, 2048 }, { 7, 1683, 2048 }, { 6, 1684, 2048 }, { 7, 1685, 2048 }, { 7, 1686, 2048 }, { 8, 1687, 2048 }, + { 6, 1688, 2048 }, { 7, 1689, 2048 }, { 7, 1690, 2048 }, { 8, 1691, 2048 }, { 7, 1692, 2048 }, { 8, 1693, 2048 }, { 8, 1694, 2048 }, { 9, 1695, 2048 }, + { 5, 1696, 2048 }, { 6, 1697, 2048 }, { 6, 1698, 2048 }, { 7, 1699, 2048 }, { 6, 1700, 2048 }, { 7, 1701, 2048 }, { 7, 1702, 2048 }, { 8, 1703, 2048 }, + { 6, 1704, 2048 }, { 7, 1705, 2048 }, { 7, 1706, 2048 }, { 8, 1707, 2048 }, { 7, 1708, 2048 }, { 8, 1709, 2048 }, { 8, 1710, 2048 }, { 9, 1711, 2048 }, + { 6, 1712, 2048 }, { 7, 1713, 2048 }, { 7, 1714, 2048 }, { 8, 1715, 2048 }, { 7, 1716, 2048 }, { 8, 1717, 2048 }, { 8, 1718, 2048 }, { 9, 1719, 2048 }, + { 7, 1720, 2048 }, { 8, 1721, 2048 }, { 8, 1722, 2048 }, { 9, 1723, 2048 }, { 8, 1724, 2048 }, { 9, 1725, 2048 }, { 9, 1726, 2048 }, { 10, 1727, 2048 }, + { 5, 1728, 2048 }, { 6, 1729, 2048 }, { 6, 1730, 2048 }, { 7, 1731, 2048 }, { 6, 1732, 2048 }, { 7, 1733, 2048 }, { 7, 1734, 2048 }, { 8, 1735, 2048 }, + { 6, 1736, 2048 }, { 7, 1737, 2048 }, { 7, 1738, 2048 }, { 8, 1739, 2048 }, { 7, 1740, 2048 }, { 8, 1741, 2048 }, { 8, 1742, 2048 }, { 9, 1743, 2048 }, + { 6, 1744, 2048 }, { 7, 1745, 2048 }, { 7, 1746, 2048 }, { 8, 1747, 2048 }, { 7, 1748, 2048 }, { 8, 1749, 2048 }, { 8, 1750, 2048 }, { 9, 1751, 2048 }, + { 7, 1752, 2048 }, { 8, 1753, 2048 }, { 8, 1754, 2048 }, { 9, 1755, 2048 }, { 8, 1756, 2048 }, { 9, 1757, 2048 }, { 9, 1758, 2048 }, { 10, 1759, 2048 }, + { 6, 1760, 2048 }, { 7, 1761, 2048 }, { 7, 1762, 2048 }, { 8, 1763, 2048 }, { 7, 1764, 2048 }, { 8, 1765, 2048 }, { 8, 1766, 2048 }, { 9, 1767, 2048 }, + { 7, 1768, 2048 }, { 8, 1769, 2048 }, { 8, 1770, 2048 }, { 9, 1771, 2048 }, { 8, 1772, 2048 }, { 9, 1773, 2048 }, { 9, 1774, 2048 }, { 10, 1775, 2048 }, + { 7, 1776, 2048 }, { 8, 1777, 2048 }, { 8, 1778, 2048 }, { 9, 1779, 2048 }, { 8, 1780, 2048 }, { 9, 1781, 2048 }, { 9, 1782, 2048 }, { 10, 1783, 2048 }, + { 8, 1784, 2048 }, { 9, 1785, 2048 }, { 9, 1786, 2048 }, { 10, 1787, 2048 }, { 9, 1788, 2048 }, { 10, 1789, 2048 }, { 10, 1790, 2048 }, { 11, 1791, 2048 }, + { 4, 1792, 2048 }, { 5, 1793, 2048 }, { 5, 1794, 2048 }, { 6, 1795, 2048 }, { 5, 1796, 2048 }, { 6, 1797, 2048 }, { 6, 1798, 2048 }, { 7, 1799, 2048 }, + { 5, 1800, 2048 }, { 6, 1801, 2048 }, { 6, 1802, 2048 }, { 7, 1803, 2048 }, { 6, 1804, 2048 }, { 7, 1805, 2048 }, { 7, 1806, 2048 }, { 8, 1807, 2048 }, + { 5, 1808, 2048 }, { 6, 1809, 2048 }, { 6, 1810, 2048 }, { 7, 1811, 2048 }, { 6, 1812, 2048 }, { 7, 1813, 2048 }, { 7, 1814, 2048 }, { 8, 1815, 2048 }, + { 6, 1816, 2048 }, { 7, 1817, 2048 }, { 7, 1818, 2048 }, { 8, 1819, 2048 }, { 7, 1820, 2048 }, { 8, 1821, 2048 }, { 8, 1822, 2048 }, { 9, 1823, 2048 }, + { 5, 1824, 2048 }, { 6, 1825, 2048 }, { 6, 1826, 2048 }, { 7, 1827, 2048 }, { 6, 1828, 2048 }, { 7, 1829, 2048 }, { 7, 1830, 2048 }, { 8, 1831, 2048 }, + { 6, 1832, 2048 }, { 7, 1833, 2048 }, { 7, 1834, 2048 }, { 8, 1835, 2048 }, { 7, 1836, 2048 }, { 8, 1837, 2048 }, { 8, 1838, 2048 }, { 9, 1839, 2048 }, + { 6, 1840, 2048 }, { 7, 1841, 2048 }, { 7, 1842, 2048 }, { 8, 1843, 2048 }, { 7, 1844, 2048 }, { 8, 1845, 2048 }, { 8, 1846, 2048 }, { 9, 1847, 2048 }, + { 7, 1848, 2048 }, { 8, 1849, 2048 }, { 8, 1850, 2048 }, { 9, 1851, 2048 }, { 8, 1852, 2048 }, { 9, 1853, 2048 }, { 9, 1854, 2048 }, { 10, 1855, 2048 }, + { 5, 1856, 2048 }, { 6, 1857, 2048 }, { 6, 1858, 2048 }, { 7, 1859, 2048 }, { 6, 1860, 2048 }, { 7, 1861, 2048 }, { 7, 1862, 2048 }, { 8, 1863, 2048 }, + { 6, 1864, 2048 }, { 7, 1865, 2048 }, { 7, 1866, 2048 }, { 8, 1867, 2048 }, { 7, 1868, 2048 }, { 8, 1869, 2048 }, { 8, 1870, 2048 }, { 9, 1871, 2048 }, + { 6, 1872, 2048 }, { 7, 1873, 2048 }, { 7, 1874, 2048 }, { 8, 1875, 2048 }, { 7, 1876, 2048 }, { 8, 1877, 2048 }, { 8, 1878, 2048 }, { 9, 1879, 2048 }, + { 7, 1880, 2048 }, { 8, 1881, 2048 }, { 8, 1882, 2048 }, { 9, 1883, 2048 }, { 8, 1884, 2048 }, { 9, 1885, 2048 }, { 9, 1886, 2048 }, { 10, 1887, 2048 }, + { 6, 1888, 2048 }, { 7, 1889, 2048 }, { 7, 1890, 2048 }, { 8, 1891, 2048 }, { 7, 1892, 2048 }, { 8, 1893, 2048 }, { 8, 1894, 2048 }, { 9, 1895, 2048 }, + { 7, 1896, 2048 }, { 8, 1897, 2048 }, { 8, 1898, 2048 }, { 9, 1899, 2048 }, { 8, 1900, 2048 }, { 9, 1901, 2048 }, { 9, 1902, 2048 }, { 10, 1903, 2048 }, + { 7, 1904, 2048 }, { 8, 1905, 2048 }, { 8, 1906, 2048 }, { 9, 1907, 2048 }, { 8, 1908, 2048 }, { 9, 1909, 2048 }, { 9, 1910, 2048 }, { 10, 1911, 2048 }, + { 8, 1912, 2048 }, { 9, 1913, 2048 }, { 9, 1914, 2048 }, { 10, 1915, 2048 }, { 9, 1916, 2048 }, { 10, 1917, 2048 }, { 10, 1918, 2048 }, { 11, 1919, 2048 }, + { 5, 1920, 2048 }, { 6, 1921, 2048 }, { 6, 1922, 2048 }, { 7, 1923, 2048 }, { 6, 1924, 2048 }, { 7, 1925, 2048 }, { 7, 1926, 2048 }, { 8, 1927, 2048 }, + { 6, 1928, 2048 }, { 7, 1929, 2048 }, { 7, 1930, 2048 }, { 8, 1931, 2048 }, { 7, 1932, 2048 }, { 8, 1933, 2048 }, { 8, 1934, 2048 }, { 9, 1935, 2048 }, + { 6, 1936, 2048 }, { 7, 1937, 2048 }, { 7, 1938, 2048 }, { 8, 1939, 2048 }, { 7, 1940, 2048 }, { 8, 1941, 2048 }, { 8, 1942, 2048 }, { 9, 1943, 2048 }, + { 7, 1944, 2048 }, { 8, 1945, 2048 }, { 8, 1946, 2048 }, { 9, 1947, 2048 }, { 8, 1948, 2048 }, { 9, 1949, 2048 }, { 9, 1950, 2048 }, { 10, 1951, 2048 }, + { 6, 1952, 2048 }, { 7, 1953, 2048 }, { 7, 1954, 2048 }, { 8, 1955, 2048 }, { 7, 1956, 2048 }, { 8, 1957, 2048 }, { 8, 1958, 2048 }, { 9, 1959, 2048 }, + { 7, 1960, 2048 }, { 8, 1961, 2048 }, { 8, 1962, 2048 }, { 9, 1963, 2048 }, { 8, 1964, 2048 }, { 9, 1965, 2048 }, { 9, 1966, 2048 }, { 10, 1967, 2048 }, + { 7, 1968, 2048 }, { 8, 1969, 2048 }, { 8, 1970, 2048 }, { 9, 1971, 2048 }, { 8, 1972, 2048 }, { 9, 1973, 2048 }, { 9, 1974, 2048 }, { 10, 1975, 2048 }, + { 8, 1976, 2048 }, { 9, 1977, 2048 }, { 9, 1978, 2048 }, { 10, 1979, 2048 }, { 9, 1980, 2048 }, { 10, 1981, 2048 }, { 10, 1982, 2048 }, { 11, 1983, 2048 }, + { 6, 1984, 2048 }, { 7, 1985, 2048 }, { 7, 1986, 2048 }, { 8, 1987, 2048 }, { 7, 1988, 2048 }, { 8, 1989, 2048 }, { 8, 1990, 2048 }, { 9, 1991, 2048 }, + { 7, 1992, 2048 }, { 8, 1993, 2048 }, { 8, 1994, 2048 }, { 9, 1995, 2048 }, { 8, 1996, 2048 }, { 9, 1997, 2048 }, { 9, 1998, 2048 }, { 10, 1999, 2048 }, + { 7, 2000, 2048 }, { 8, 2001, 2048 }, { 8, 2002, 2048 }, { 9, 2003, 2048 }, { 8, 2004, 2048 }, { 9, 2005, 2048 }, { 9, 2006, 2048 }, { 10, 2007, 2048 }, + { 8, 2008, 2048 }, { 9, 2009, 2048 }, { 9, 2010, 2048 }, { 10, 2011, 2048 }, { 9, 2012, 2048 }, { 10, 2013, 2048 }, { 10, 2014, 2048 }, { 11, 2015, 2048 }, + { 7, 2016, 2048 }, { 8, 2017, 2048 }, { 8, 2018, 2048 }, { 9, 2019, 2048 }, { 8, 2020, 2048 }, { 9, 2021, 2048 }, { 9, 2022, 2048 }, { 10, 2023, 2048 }, + { 8, 2024, 2048 }, { 9, 2025, 2048 }, { 9, 2026, 2048 }, { 10, 2027, 2048 }, { 9, 2028, 2048 }, { 10, 2029, 2048 }, { 10, 2030, 2048 }, { 11, 2031, 2048 }, + { 8, 2032, 2048 }, { 9, 2033, 2048 }, { 9, 2034, 2048 }, { 10, 2035, 2048 }, { 9, 2036, 2048 }, { 10, 2037, 2048 }, { 10, 2038, 2048 }, { 11, 2039, 2048 }, + { 9, 2040, 2048 }, { 10, 2041, 2048 }, { 10, 2042, 2048 }, { 11, 2043, 2048 }, { 10, 2044, 2048 }, { 11, 2045, 2048 }, { 11, 2046, 2048 }, { 12, 2047, 2048 }, +#endif +#endif +#endif +#endif +#endif +#endif +}; + + +/* find a hole and free as required, return -1 if no hole found */ +static int find_hole(void) +{ + unsigned x; + int y, z; + for (z = -1, y = INT_MAX, x = 0; x < FP_ENTRIES; x++) { + if (fp_cache[x].lru_count < y && fp_cache[x].lock == 0) { + z = x; + y = fp_cache[x].lru_count; + } + } + + /* decrease all */ + for (x = 0; x < FP_ENTRIES; x++) { + if (fp_cache[x].lru_count > 3) { + --(fp_cache[x].lru_count); + } + } + + /* free entry z */ + if (z >= 0 && fp_cache[z].g) { + mp_clear(&fp_cache[z].mu); + wc_ecc_del_point(fp_cache[z].g); + fp_cache[z].g = NULL; + for (x = 0; x < (1U<x, g->x) == MP_EQ && + mp_cmp(fp_cache[x].g->y, g->y) == MP_EQ && + mp_cmp(fp_cache[x].g->z, g->z) == MP_EQ) { + break; + } + } + if (x == FP_ENTRIES) { + x = -1; + } + return x; +} + +/* add a new base to the cache */ +static int add_entry(int idx, ecc_point *g) +{ + unsigned x, y; + + /* allocate base and LUT */ + fp_cache[idx].g = wc_ecc_new_point(); + if (fp_cache[idx].g == NULL) { + return GEN_MEM_ERR; + } + + /* copy x and y */ + if ((mp_copy(g->x, fp_cache[idx].g->x) != MP_OKAY) || + (mp_copy(g->y, fp_cache[idx].g->y) != MP_OKAY) || + (mp_copy(g->z, fp_cache[idx].g->z) != MP_OKAY)) { + wc_ecc_del_point(fp_cache[idx].g); + fp_cache[idx].g = NULL; + return GEN_MEM_ERR; + } + + for (x = 0; x < (1U<x, mu, modulus, + fp_cache[idx].LUT[1]->x) != MP_OKAY) || + (mp_mulmod(fp_cache[idx].g->y, mu, modulus, + fp_cache[idx].LUT[1]->y) != MP_OKAY) || + (mp_mulmod(fp_cache[idx].g->z, mu, modulus, + fp_cache[idx].LUT[1]->z) != MP_OKAY)) { + err = MP_MULMOD_E; + } + } + + /* make all single bit entries */ + for (x = 1; x < FP_LUT; x++) { + if (err != MP_OKAY) + break; + if ((mp_copy(fp_cache[idx].LUT[1<<(x-1)]->x, + fp_cache[idx].LUT[1<x) != MP_OKAY) || + (mp_copy(fp_cache[idx].LUT[1<<(x-1)]->y, + fp_cache[idx].LUT[1<y) != MP_OKAY) || + (mp_copy(fp_cache[idx].LUT[1<<(x-1)]->z, + fp_cache[idx].LUT[1<z) != MP_OKAY)){ + err = MP_INIT_E; + break; + } else { + + /* now double it bitlen/FP_LUT times */ + for (y = 0; y < lut_gap; y++) { + if ((err = ecc_projective_dbl_point(fp_cache[idx].LUT[1<z, modulus, mp); + + /* invert it */ + if (err == MP_OKAY) + err = mp_invmod(fp_cache[idx].LUT[x]->z, modulus, + fp_cache[idx].LUT[x]->z); + + if (err == MP_OKAY) + /* now square it */ + err = mp_sqrmod(fp_cache[idx].LUT[x]->z, modulus, &tmp); + + if (err == MP_OKAY) + /* fix x */ + err = mp_mulmod(fp_cache[idx].LUT[x]->x, &tmp, modulus, + fp_cache[idx].LUT[x]->x); + + if (err == MP_OKAY) + /* get 1/z^3 */ + err = mp_mulmod(&tmp, fp_cache[idx].LUT[x]->z, modulus, &tmp); + + if (err == MP_OKAY) + /* fix y */ + err = mp_mulmod(fp_cache[idx].LUT[x]->y, &tmp, modulus, + fp_cache[idx].LUT[x]->y); + + if (err == MP_OKAY) + /* free z */ + mp_clear(fp_cache[idx].LUT[x]->z); + } + + mp_clear(&tmp); + + if (err == MP_OKAY) + return MP_OKAY; + + /* err cleanup */ + for (y = 0; y < (1U< mp_unsigned_bin_size(modulus)) { + /* find order */ + y = mp_unsigned_bin_size(modulus); + for (x = 0; ecc_sets[x].size; x++) { + if (y <= (unsigned)ecc_sets[x].size) break; + } + + /* back off if we are on the 521 bit curve */ + if (y == 66) --x; + + if ((err = mp_read_radix(&order, ecc_sets[x].order, + MP_RADIX_HEX)) != MP_OKAY) { + goto done; + } + + /* k must be less than modulus */ + if (mp_cmp(k, &order) != MP_LT) { + if ((err = mp_mod(k, &order, &tk)) != MP_OKAY) { + goto done; + } + } else { + if ((err = mp_copy(k, &tk)) != MP_OKAY) { + goto done; + } + } + } else { + if ((err = mp_copy(k, &tk)) != MP_OKAY) { + goto done; + } + } + + /* get bitlen and round up to next multiple of FP_LUT */ + bitlen = mp_unsigned_bin_size(modulus) << 3; + x = bitlen % FP_LUT; + if (x) { + bitlen += FP_LUT - x; + } + lut_gap = bitlen / FP_LUT; + + /* get the k value */ + if (mp_unsigned_bin_size(&tk) > (int)(KB_SIZE - 2)) { + err = BUFFER_E; goto done; + } + + /* store k */ +#ifdef WOLFSSL_SMALL_STACK + kb = (unsigned char*)XMALLOC(KB_SIZE, NULL, DYNAMIC_TYPE_ECC_BUFFER); + if (kb == NULL) { + err = MEMORY_E; goto done; + } +#endif + + XMEMSET(kb, 0, KB_SIZE); + if ((err = mp_to_unsigned_bin(&tk, kb)) == MP_OKAY) { + /* let's reverse kb so it's little endian */ + x = 0; + y = mp_unsigned_bin_size(&tk); + if (y > 0) { + y -= 1; + } + + while ((unsigned)x < y) { + z = kb[x]; kb[x] = kb[y]; kb[y] = (byte)z; + ++x; --y; + } + + /* at this point we can start, yipee */ + first = 1; + for (x = lut_gap-1; x >= 0; x--) { + /* extract FP_LUT bits from kb spread out by lut_gap bits and offset + by x bits from the start */ + bitpos = x; + for (y = z = 0; y < FP_LUT; y++) { + z |= ((kb[bitpos>>3] >> (bitpos&7)) & 1) << y; + bitpos += lut_gap; /* it's y*lut_gap + x, but here we can avoid + the mult in each loop */ + } + + /* double if not first */ + if (!first) { + if ((err = ecc_projective_dbl_point(R, R, a, modulus, + mp)) != MP_OKAY) { + break; + } + } + + /* add if not first, otherwise copy */ + if (!first && z) { + if ((err = ecc_projective_add_point(R, fp_cache[idx].LUT[z], R, a, + modulus, mp)) != MP_OKAY) { + break; + } + if (mp_iszero(R->z)) { + /* When all zero then should have done an add */ + if (mp_iszero(R->x) && mp_iszero(R->y)) { + if ((err = ecc_projective_dbl_point(fp_cache[idx].LUT[z], + R, a, modulus, mp)) != MP_OKAY) { + break; + } + } + /* When only Z zero then result is infinity */ + else { + err = mp_set(R->x, 0); + if (err != MP_OKAY) { + break; + } + err = mp_set(R->y, 0); + if (err != MP_OKAY) { + break; + } + err = mp_copy(&fp_cache[idx].mu, R->z); + if (err != MP_OKAY) { + break; + } + first = 1; + } + } + } else if (z) { + if ((mp_copy(fp_cache[idx].LUT[z]->x, R->x) != MP_OKAY) || + (mp_copy(fp_cache[idx].LUT[z]->y, R->y) != MP_OKAY) || + (mp_copy(&fp_cache[idx].mu, R->z) != MP_OKAY)) { + err = GEN_MEM_ERR; + break; + } + first = 0; + } + } + } + + if (err == MP_OKAY) { + (void) z; /* Acknowledge the unused assignment */ + ForceZero(kb, KB_SIZE); + + /* map R back from projective space */ + if (map) { + err = ecc_map(R, modulus, mp); + } else { + err = MP_OKAY; + } + } + +done: + /* cleanup */ + mp_clear(&order); + mp_clear(&tk); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(kb, NULL, DYNAMIC_TYPE_ECC_BUFFER); +#endif + +#undef KB_SIZE + + return err; +} +#endif + +#ifdef ECC_SHAMIR +#ifndef WOLFSSL_SP_MATH +/* perform a fixed point ECC mulmod */ +static int accel_fp_mul2add(int idx1, int idx2, + mp_int* kA, mp_int* kB, + ecc_point *R, mp_int* a, + mp_int* modulus, mp_digit mp) +{ +#define KB_SIZE 128 + +#ifdef WOLFSSL_SMALL_STACK + unsigned char* kb[2] = {NULL, NULL}; +#else + unsigned char kb[2][KB_SIZE]; +#endif + int x, err; + unsigned y, z, bitlen, bitpos, lut_gap, first, zA, zB; + mp_int tka, tkb, order; + + if (mp_init_multi(&tka, &tkb, &order, NULL, NULL, NULL) != MP_OKAY) + return MP_INIT_E; + + /* if it's smaller than modulus we fine */ + if (mp_unsigned_bin_size(kA) > mp_unsigned_bin_size(modulus)) { + /* find order */ + y = mp_unsigned_bin_size(modulus); + for (x = 0; ecc_sets[x].size; x++) { + if (y <= (unsigned)ecc_sets[x].size) break; + } + + /* back off if we are on the 521 bit curve */ + if (y == 66) --x; + + if ((err = mp_read_radix(&order, ecc_sets[x].order, + MP_RADIX_HEX)) != MP_OKAY) { + goto done; + } + + /* kA must be less than modulus */ + if (mp_cmp(kA, &order) != MP_LT) { + if ((err = mp_mod(kA, &order, &tka)) != MP_OKAY) { + goto done; + } + } else { + if ((err = mp_copy(kA, &tka)) != MP_OKAY) { + goto done; + } + } + } else { + if ((err = mp_copy(kA, &tka)) != MP_OKAY) { + goto done; + } + } + + /* if it's smaller than modulus we fine */ + if (mp_unsigned_bin_size(kB) > mp_unsigned_bin_size(modulus)) { + /* find order */ + y = mp_unsigned_bin_size(modulus); + for (x = 0; ecc_sets[x].size; x++) { + if (y <= (unsigned)ecc_sets[x].size) break; + } + + /* back off if we are on the 521 bit curve */ + if (y == 66) --x; + + if ((err = mp_read_radix(&order, ecc_sets[x].order, + MP_RADIX_HEX)) != MP_OKAY) { + goto done; + } + + /* kB must be less than modulus */ + if (mp_cmp(kB, &order) != MP_LT) { + if ((err = mp_mod(kB, &order, &tkb)) != MP_OKAY) { + goto done; + } + } else { + if ((err = mp_copy(kB, &tkb)) != MP_OKAY) { + goto done; + } + } + } else { + if ((err = mp_copy(kB, &tkb)) != MP_OKAY) { + goto done; + } + } + + /* get bitlen and round up to next multiple of FP_LUT */ + bitlen = mp_unsigned_bin_size(modulus) << 3; + x = bitlen % FP_LUT; + if (x) { + bitlen += FP_LUT - x; + } + lut_gap = bitlen / FP_LUT; + + /* get the k value */ + if ((mp_unsigned_bin_size(&tka) > (int)(KB_SIZE - 2)) || + (mp_unsigned_bin_size(&tkb) > (int)(KB_SIZE - 2)) ) { + err = BUFFER_E; goto done; + } + + /* store k */ +#ifdef WOLFSSL_SMALL_STACK + kb[0] = (unsigned char*)XMALLOC(KB_SIZE, NULL, DYNAMIC_TYPE_ECC_BUFFER); + if (kb[0] == NULL) { + err = MEMORY_E; goto done; + } +#endif + + XMEMSET(kb[0], 0, KB_SIZE); + if ((err = mp_to_unsigned_bin(&tka, kb[0])) != MP_OKAY) { + goto done; + } + + /* let's reverse kb so it's little endian */ + x = 0; + y = mp_unsigned_bin_size(&tka); + if (y > 0) { + y -= 1; + } + mp_clear(&tka); + while ((unsigned)x < y) { + z = kb[0][x]; kb[0][x] = kb[0][y]; kb[0][y] = (byte)z; + ++x; --y; + } + + /* store b */ +#ifdef WOLFSSL_SMALL_STACK + kb[1] = (unsigned char*)XMALLOC(KB_SIZE, NULL, DYNAMIC_TYPE_ECC_BUFFER); + if (kb[1] == NULL) { + err = MEMORY_E; goto done; + } +#endif + + XMEMSET(kb[1], 0, KB_SIZE); + if ((err = mp_to_unsigned_bin(&tkb, kb[1])) == MP_OKAY) { + x = 0; + y = mp_unsigned_bin_size(&tkb); + if (y > 0) { + y -= 1; + } + + while ((unsigned)x < y) { + z = kb[1][x]; kb[1][x] = kb[1][y]; kb[1][y] = (byte)z; + ++x; --y; + } + + /* at this point we can start, yipee */ + first = 1; + for (x = lut_gap-1; x >= 0; x--) { + /* extract FP_LUT bits from kb spread out by lut_gap bits and + offset by x bits from the start */ + bitpos = x; + for (y = zA = zB = 0; y < FP_LUT; y++) { + zA |= ((kb[0][bitpos>>3] >> (bitpos&7)) & 1) << y; + zB |= ((kb[1][bitpos>>3] >> (bitpos&7)) & 1) << y; + bitpos += lut_gap; /* it's y*lut_gap + x, but here we can avoid + the mult in each loop */ + } + + /* double if not first */ + if (!first) { + if ((err = ecc_projective_dbl_point(R, R, a, modulus, + mp)) != MP_OKAY) { + break; + } + + /* add if not first, otherwise copy */ + if (zA) { + if ((err = ecc_projective_add_point(R, fp_cache[idx1].LUT[zA], + R, a, modulus, mp)) != MP_OKAY) { + break; + } + if (mp_iszero(R->z)) { + /* When all zero then should have done an add */ + if (mp_iszero(R->x) && mp_iszero(R->y)) { + if ((err = ecc_projective_dbl_point( + fp_cache[idx1].LUT[zA], R, + a, modulus, mp)) != MP_OKAY) { + break; + } + } + /* When only Z zero then result is infinity */ + else { + err = mp_set(R->x, 0); + if (err != MP_OKAY) { + break; + } + err = mp_set(R->y, 0); + if (err != MP_OKAY) { + break; + } + err = mp_copy(&fp_cache[idx1].mu, R->z); + if (err != MP_OKAY) { + break; + } + first = 1; + } + } + } + + if (zB) { + if ((err = ecc_projective_add_point(R, fp_cache[idx2].LUT[zB], + R, a, modulus, mp)) != MP_OKAY) { + break; + } + if (mp_iszero(R->z)) { + /* When all zero then should have done an add */ + if (mp_iszero(R->x) && mp_iszero(R->y)) { + if ((err = ecc_projective_dbl_point( + fp_cache[idx2].LUT[zB], R, + a, modulus, mp)) != MP_OKAY) { + break; + } + } + /* When only Z zero then result is infinity */ + else { + err = mp_set(R->x, 0); + if (err != MP_OKAY) { + break; + } + err = mp_set(R->y, 0); + if (err != MP_OKAY) { + break; + } + err = mp_copy(&fp_cache[idx2].mu, R->z); + if (err != MP_OKAY) { + break; + } + first = 1; + } + } + } + } else { + if (zA) { + if ((mp_copy(fp_cache[idx1].LUT[zA]->x, R->x) != MP_OKAY) || + (mp_copy(fp_cache[idx1].LUT[zA]->y, R->y) != MP_OKAY) || + (mp_copy(&fp_cache[idx1].mu, R->z) != MP_OKAY)) { + err = GEN_MEM_ERR; + break; + } + first = 0; + } + if (zB && first == 0) { + if (zB) { + if ((err = ecc_projective_add_point(R, + fp_cache[idx2].LUT[zB], R, a, modulus, mp)) != MP_OKAY){ + break; + } + if (mp_iszero(R->z)) { + /* When all zero then should have done an add */ + if (mp_iszero(R->x) && mp_iszero(R->y)) { + if ((err = ecc_projective_dbl_point( + fp_cache[idx2].LUT[zB], R, + a, modulus, mp)) != MP_OKAY) { + break; + } + } + /* When only Z zero then result is infinity */ + else { + err = mp_set(R->x, 0); + if (err != MP_OKAY) { + break; + } + err = mp_set(R->y, 0); + if (err != MP_OKAY) { + break; + } + err = mp_copy(&fp_cache[idx2].mu, R->z); + if (err != MP_OKAY) { + break; + } + first = 1; + } + } + } + } else if (zB && first == 1) { + if ((mp_copy(fp_cache[idx2].LUT[zB]->x, R->x) != MP_OKAY) || + (mp_copy(fp_cache[idx2].LUT[zB]->y, R->y) != MP_OKAY) || + (mp_copy(&fp_cache[idx2].mu, R->z) != MP_OKAY)) { + err = GEN_MEM_ERR; + break; + } + first = 0; + } + } + } + } + +done: + /* cleanup */ + mp_clear(&tkb); + mp_clear(&tka); + mp_clear(&order); + +#ifdef WOLFSSL_SMALL_STACK + if (kb[0]) +#endif + ForceZero(kb[0], KB_SIZE); +#ifdef WOLFSSL_SMALL_STACK + if (kb[1]) +#endif + ForceZero(kb[1], KB_SIZE); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(kb[0], NULL, DYNAMIC_TYPE_ECC_BUFFER); + XFREE(kb[1], NULL, DYNAMIC_TYPE_ECC_BUFFER); +#endif + +#undef KB_SIZE + + if (err != MP_OKAY) + return err; + + return ecc_map(R, modulus, mp); +} + + +/** ECC Fixed Point mulmod global with heap hint used + Computes kA*A + kB*B = C using Shamir's Trick + A First point to multiply + kA What to multiple A by + B Second point to multiply + kB What to multiple B by + C [out] Destination point (can overlap with A or B) + a ECC curve parameter a + modulus Modulus for curve + return MP_OKAY on success +*/ +int ecc_mul2add(ecc_point* A, mp_int* kA, + ecc_point* B, mp_int* kB, + ecc_point* C, mp_int* a, mp_int* modulus, void* heap) +{ + int idx1 = -1, idx2 = -1, err, mpInit = 0; + mp_digit mp; + mp_int mu; + + err = mp_init(&mu); + if (err != MP_OKAY) + return err; + +#ifndef HAVE_THREAD_LS + if (initMutex == 0) { + wc_InitMutex(&ecc_fp_lock); + initMutex = 1; + } + if (wc_LockMutex(&ecc_fp_lock) != 0) + return BAD_MUTEX_E; +#endif /* HAVE_THREAD_LS */ + + /* find point */ + idx1 = find_base(A); + + /* no entry? */ + if (idx1 == -1) { + /* find hole and add it */ + if ((idx1 = find_hole()) >= 0) { + err = add_entry(idx1, A); + } + } + if (err == MP_OKAY && idx1 != -1) { + /* increment LRU */ + ++(fp_cache[idx1].lru_count); + } + + if (err == MP_OKAY) { + /* find point */ + idx2 = find_base(B); + + /* no entry? */ + if (idx2 == -1) { + /* find hole and add it */ + if ((idx2 = find_hole()) >= 0) + err = add_entry(idx2, B); + } + } + + if (err == MP_OKAY && idx2 != -1) { + /* increment LRU */ + ++(fp_cache[idx2].lru_count); + } + + if (err == MP_OKAY) { + /* if it's 2 build the LUT, if it's higher just use the LUT */ + if (idx1 >= 0 && fp_cache[idx1].lru_count == 2) { + /* compute mp */ + err = mp_montgomery_setup(modulus, &mp); + + if (err == MP_OKAY) { + mpInit = 1; + err = mp_montgomery_calc_normalization(&mu, modulus); + } + + if (err == MP_OKAY) + /* build the LUT */ + err = build_lut(idx1, a, modulus, mp, &mu); + } + } + + if (err == MP_OKAY) { + /* if it's 2 build the LUT, if it's higher just use the LUT */ + if (idx2 >= 0 && fp_cache[idx2].lru_count == 2) { + if (mpInit == 0) { + /* compute mp */ + err = mp_montgomery_setup(modulus, &mp); + if (err == MP_OKAY) { + mpInit = 1; + err = mp_montgomery_calc_normalization(&mu, modulus); + } + } + + if (err == MP_OKAY) + /* build the LUT */ + err = build_lut(idx2, a, modulus, mp, &mu); + } + } + + + if (err == MP_OKAY) { + if (idx1 >=0 && idx2 >= 0 && fp_cache[idx1].lru_count >= 2 && + fp_cache[idx2].lru_count >= 2) { + if (mpInit == 0) { + /* compute mp */ + err = mp_montgomery_setup(modulus, &mp); + } + if (err == MP_OKAY) + err = accel_fp_mul2add(idx1, idx2, kA, kB, C, a, modulus, mp); + } else { + err = normal_ecc_mul2add(A, kA, B, kB, C, a, modulus, heap); + } + } + +#ifndef HAVE_THREAD_LS + wc_UnLockMutex(&ecc_fp_lock); +#endif /* HAVE_THREAD_LS */ + mp_clear(&mu); + + return err; +} +#endif +#endif /* ECC_SHAMIR */ + +/** ECC Fixed Point mulmod global + k The multiplicand + G Base point to multiply + R [out] Destination of product + a ECC curve parameter a + modulus The modulus for the curve + map [boolean] If non-zero maps the point back to affine coordinates, + otherwise it's left in jacobian-montgomery form + return MP_OKAY if successful +*/ +int wc_ecc_mulmod_ex(mp_int* k, ecc_point *G, ecc_point *R, mp_int* a, + mp_int* modulus, int map, void* heap) +{ +#ifndef WOLFSSL_SP_MATH + int idx, err = MP_OKAY; + mp_digit mp; + mp_int mu; + int mpSetup = 0; + + if (k == NULL || G == NULL || R == NULL || a == NULL || modulus == NULL) { + return ECC_BAD_ARG_E; + } + + if (mp_init(&mu) != MP_OKAY) + return MP_INIT_E; + +#ifndef HAVE_THREAD_LS + if (initMutex == 0) { + wc_InitMutex(&ecc_fp_lock); + initMutex = 1; + } + + if (wc_LockMutex(&ecc_fp_lock) != 0) + return BAD_MUTEX_E; +#endif /* HAVE_THREAD_LS */ + + /* find point */ + idx = find_base(G); + + /* no entry? */ + if (idx == -1) { + /* find hole and add it */ + idx = find_hole(); + + if (idx >= 0) + err = add_entry(idx, G); + } + if (err == MP_OKAY && idx >= 0) { + /* increment LRU */ + ++(fp_cache[idx].lru_count); + } + + + if (err == MP_OKAY) { + /* if it's 2 build the LUT, if it's higher just use the LUT */ + if (idx >= 0 && fp_cache[idx].lru_count == 2) { + /* compute mp */ + err = mp_montgomery_setup(modulus, &mp); + + if (err == MP_OKAY) { + /* compute mu */ + mpSetup = 1; + err = mp_montgomery_calc_normalization(&mu, modulus); + } + + if (err == MP_OKAY) + /* build the LUT */ + err = build_lut(idx, a, modulus, mp, &mu); + } + } + + if (err == MP_OKAY) { + if (idx >= 0 && fp_cache[idx].lru_count >= 2) { + if (mpSetup == 0) { + /* compute mp */ + err = mp_montgomery_setup(modulus, &mp); + } + if (err == MP_OKAY) + err = accel_fp_mul(idx, k, R, a, modulus, mp, map); + } else { + err = normal_ecc_mulmod(k, G, R, a, modulus, map, heap); + } + } + +#ifndef HAVE_THREAD_LS + wc_UnLockMutex(&ecc_fp_lock); +#endif /* HAVE_THREAD_LS */ + mp_clear(&mu); + + return err; +#else + if (k == NULL || G == NULL || R == NULL || a == NULL || modulus == NULL) { + return ECC_BAD_ARG_E; + } + +#ifndef WOLFSSL_SP_NO_256 + if (mp_count_bits(modulus) == 256) { + return sp_ecc_mulmod_256(k, G, R, map, heap); + } +#endif +#ifdef WOLFSSL_SP_384 + if (mp_count_bits(modulus) == 384) { + return sp_ecc_mulmod_384(k, G, R, map, heap); + } +#endif + return WC_KEY_SIZE_E; +#endif +} + +#ifndef WOLFSSL_SP_MATH +/* helper function for freeing the cache ... + must be called with the cache mutex locked */ +static void wc_ecc_fp_free_cache(void) +{ + unsigned x, y; + for (x = 0; x < FP_ENTRIES; x++) { + if (fp_cache[x].g != NULL) { + for (y = 0; y < (1U<protocol == 0) + return NULL; + + if (ctx->protocol == REQ_RESP_CLIENT) { + if (ctx->cliSt == ecCLI_INIT) { + ctx->cliSt = ecCLI_SALT_GET; + return ctx->clientSalt; + } + else { + ctx->cliSt = ecCLI_BAD_STATE; + return NULL; + } + } + else if (ctx->protocol == REQ_RESP_SERVER) { + if (ctx->srvSt == ecSRV_INIT) { + ctx->srvSt = ecSRV_SALT_GET; + return ctx->serverSalt; + } + else { + ctx->srvSt = ecSRV_BAD_STATE; + return NULL; + } + } + + return NULL; +} + + +/* optional set info, can be called before or after set_peer_salt */ +int wc_ecc_ctx_set_info(ecEncCtx* ctx, const byte* info, int sz) +{ + if (ctx == NULL || info == 0 || sz < 0) + return BAD_FUNC_ARG; + + ctx->kdfInfo = info; + ctx->kdfInfoSz = sz; + + return 0; +} + + +static const char* exchange_info = "Secure Message Exchange"; + +int wc_ecc_ctx_set_peer_salt(ecEncCtx* ctx, const byte* salt) +{ + byte tmp[EXCHANGE_SALT_SZ/2]; + int halfSz = EXCHANGE_SALT_SZ/2; + + if (ctx == NULL || ctx->protocol == 0 || salt == NULL) + return BAD_FUNC_ARG; + + if (ctx->protocol == REQ_RESP_CLIENT) { + XMEMCPY(ctx->serverSalt, salt, EXCHANGE_SALT_SZ); + if (ctx->cliSt == ecCLI_SALT_GET) + ctx->cliSt = ecCLI_SALT_SET; + else { + ctx->cliSt = ecCLI_BAD_STATE; + return BAD_STATE_E; + } + } + else { + XMEMCPY(ctx->clientSalt, salt, EXCHANGE_SALT_SZ); + if (ctx->srvSt == ecSRV_SALT_GET) + ctx->srvSt = ecSRV_SALT_SET; + else { + ctx->srvSt = ecSRV_BAD_STATE; + return BAD_STATE_E; + } + } + + /* mix half and half */ + /* tmp stores 2nd half of client before overwrite */ + XMEMCPY(tmp, ctx->clientSalt + halfSz, halfSz); + XMEMCPY(ctx->clientSalt + halfSz, ctx->serverSalt, halfSz); + XMEMCPY(ctx->serverSalt, tmp, halfSz); + + ctx->kdfSalt = ctx->clientSalt; + ctx->kdfSaltSz = EXCHANGE_SALT_SZ; + + ctx->macSalt = ctx->serverSalt; + ctx->macSaltSz = EXCHANGE_SALT_SZ; + + if (ctx->kdfInfo == NULL) { + /* default info */ + ctx->kdfInfo = (const byte*)exchange_info; + ctx->kdfInfoSz = EXCHANGE_INFO_SZ; + } + + return 0; +} + + +static int ecc_ctx_set_salt(ecEncCtx* ctx, int flags, WC_RNG* rng) +{ + byte* saltBuffer = NULL; + + if (ctx == NULL || rng == NULL || flags == 0) + return BAD_FUNC_ARG; + + saltBuffer = (flags == REQ_RESP_CLIENT) ? ctx->clientSalt : ctx->serverSalt; + + return wc_RNG_GenerateBlock(rng, saltBuffer, EXCHANGE_SALT_SZ); +} + + +static void ecc_ctx_init(ecEncCtx* ctx, int flags) +{ + if (ctx) { + XMEMSET(ctx, 0, sizeof(ecEncCtx)); + + ctx->encAlgo = ecAES_128_CBC; + ctx->kdfAlgo = ecHKDF_SHA256; + ctx->macAlgo = ecHMAC_SHA256; + ctx->protocol = (byte)flags; + + if (flags == REQ_RESP_CLIENT) + ctx->cliSt = ecCLI_INIT; + if (flags == REQ_RESP_SERVER) + ctx->srvSt = ecSRV_INIT; + } +} + + +/* allow ecc context reset so user doesn't have to init/free for reuse */ +int wc_ecc_ctx_reset(ecEncCtx* ctx, WC_RNG* rng) +{ + if (ctx == NULL || rng == NULL) + return BAD_FUNC_ARG; + + ecc_ctx_init(ctx, ctx->protocol); + return ecc_ctx_set_salt(ctx, ctx->protocol, rng); +} + + +ecEncCtx* wc_ecc_ctx_new_ex(int flags, WC_RNG* rng, void* heap) +{ + int ret = 0; + ecEncCtx* ctx = (ecEncCtx*)XMALLOC(sizeof(ecEncCtx), heap, + DYNAMIC_TYPE_ECC); + + if (ctx) { + ctx->protocol = (byte)flags; + ctx->heap = heap; + } + + ret = wc_ecc_ctx_reset(ctx, rng); + if (ret != 0) { + wc_ecc_ctx_free(ctx); + ctx = NULL; + } + + return ctx; +} + + +/* alloc/init and set defaults, return new Context */ +ecEncCtx* wc_ecc_ctx_new(int flags, WC_RNG* rng) +{ + return wc_ecc_ctx_new_ex(flags, rng, NULL); +} + + +/* free any resources, clear any keys */ +void wc_ecc_ctx_free(ecEncCtx* ctx) +{ + if (ctx) { + ForceZero(ctx, sizeof(ecEncCtx)); + XFREE(ctx, ctx->heap, DYNAMIC_TYPE_ECC); + } +} + + +static int ecc_get_key_sizes(ecEncCtx* ctx, int* encKeySz, int* ivSz, + int* keysLen, word32* digestSz, word32* blockSz) +{ + if (ctx) { + switch (ctx->encAlgo) { + case ecAES_128_CBC: + *encKeySz = KEY_SIZE_128; + *ivSz = IV_SIZE_128; + *blockSz = AES_BLOCK_SIZE; + break; + default: + return BAD_FUNC_ARG; + } + + switch (ctx->macAlgo) { + case ecHMAC_SHA256: + *digestSz = WC_SHA256_DIGEST_SIZE; + break; + default: + return BAD_FUNC_ARG; + } + } else + return BAD_FUNC_ARG; + + *keysLen = *encKeySz + *ivSz + *digestSz; + + return 0; +} + + +/* ecc encrypt with shared secret run through kdf + ctx holds non default algos and inputs + msgSz should be the right size for encAlgo, i.e., already padded + return 0 on success */ +int wc_ecc_encrypt(ecc_key* privKey, ecc_key* pubKey, const byte* msg, + word32 msgSz, byte* out, word32* outSz, ecEncCtx* ctx) +{ + int ret = 0; + word32 blockSz; + word32 digestSz; + ecEncCtx localCtx; +#ifdef WOLFSSL_SMALL_STACK + byte* sharedSecret; + byte* keys; +#else + byte sharedSecret[ECC_MAXSIZE]; /* 521 max size */ + byte keys[ECC_BUFSIZE]; /* max size */ +#endif + word32 sharedSz = ECC_MAXSIZE; + int keysLen; + int encKeySz; + int ivSz; + int offset = 0; /* keys offset if doing msg exchange */ + byte* encKey; + byte* encIv; + byte* macKey; + + if (privKey == NULL || pubKey == NULL || msg == NULL || out == NULL || + outSz == NULL) + return BAD_FUNC_ARG; + + if (ctx == NULL) { /* use defaults */ + ecc_ctx_init(&localCtx, 0); + ctx = &localCtx; + } + + ret = ecc_get_key_sizes(ctx, &encKeySz, &ivSz, &keysLen, &digestSz, + &blockSz); + if (ret != 0) + return ret; + + if (ctx->protocol == REQ_RESP_SERVER) { + offset = keysLen; + keysLen *= 2; + + if (ctx->srvSt != ecSRV_RECV_REQ) + return BAD_STATE_E; + + ctx->srvSt = ecSRV_BAD_STATE; /* we're done no more ops allowed */ + } + else if (ctx->protocol == REQ_RESP_CLIENT) { + if (ctx->cliSt != ecCLI_SALT_SET) + return BAD_STATE_E; + + ctx->cliSt = ecCLI_SENT_REQ; /* only do this once */ + } + + if (keysLen > ECC_BUFSIZE) /* keys size */ + return BUFFER_E; + + if ( (msgSz%blockSz) != 0) + return BAD_PADDING_E; + + if (*outSz < (msgSz + digestSz)) + return BUFFER_E; + +#ifdef WOLFSSL_SMALL_STACK + sharedSecret = (byte*)XMALLOC(ECC_MAXSIZE, NULL, DYNAMIC_TYPE_ECC_BUFFER); + if (sharedSecret == NULL) + return MEMORY_E; + + keys = (byte*)XMALLOC(ECC_BUFSIZE, NULL, DYNAMIC_TYPE_ECC_BUFFER); + if (keys == NULL) { + XFREE(sharedSecret, NULL, DYNAMIC_TYPE_ECC_BUFFER); + return MEMORY_E; + } +#endif + + do { + #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) + ret = wc_AsyncWait(ret, &privKey->asyncDev, WC_ASYNC_FLAG_CALL_AGAIN); + if (ret != 0) + break; + #endif + ret = wc_ecc_shared_secret(privKey, pubKey, sharedSecret, &sharedSz); + } while (ret == WC_PENDING_E); + if (ret == 0) { + switch (ctx->kdfAlgo) { + case ecHKDF_SHA256 : + ret = wc_HKDF(WC_SHA256, sharedSecret, sharedSz, ctx->kdfSalt, + ctx->kdfSaltSz, ctx->kdfInfo, ctx->kdfInfoSz, + keys, keysLen); + break; + + default: + ret = BAD_FUNC_ARG; + break; + } + } + + if (ret == 0) { + encKey = keys + offset; + encIv = encKey + encKeySz; + macKey = encKey + encKeySz + ivSz; + + switch (ctx->encAlgo) { + case ecAES_128_CBC: + { + Aes aes; + ret = wc_AesInit(&aes, NULL, INVALID_DEVID); + if (ret == 0) { + ret = wc_AesSetKey(&aes, encKey, KEY_SIZE_128, encIv, + AES_ENCRYPTION); + if (ret == 0) { + ret = wc_AesCbcEncrypt(&aes, out, msg, msgSz); + #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES) + ret = wc_AsyncWait(ret, &aes.asyncDev, + WC_ASYNC_FLAG_NONE); + #endif + } + wc_AesFree(&aes); + } + if (ret != 0) + break; + } + break; + + default: + ret = BAD_FUNC_ARG; + break; + } + } + + if (ret == 0) { + switch (ctx->macAlgo) { + case ecHMAC_SHA256: + { + Hmac hmac; + ret = wc_HmacInit(&hmac, NULL, INVALID_DEVID); + if (ret == 0) { + ret = wc_HmacSetKey(&hmac, WC_SHA256, macKey, WC_SHA256_DIGEST_SIZE); + if (ret == 0) + ret = wc_HmacUpdate(&hmac, out, msgSz); + if (ret == 0) + ret = wc_HmacUpdate(&hmac, ctx->macSalt, ctx->macSaltSz); + if (ret == 0) + ret = wc_HmacFinal(&hmac, out+msgSz); + wc_HmacFree(&hmac); + } + } + break; + + default: + ret = BAD_FUNC_ARG; + break; + } + } + + if (ret == 0) + *outSz = msgSz + digestSz; + +#ifdef WOLFSSL_SMALL_STACK + XFREE(sharedSecret, NULL, DYNAMIC_TYPE_ECC_BUFFER); + XFREE(keys, NULL, DYNAMIC_TYPE_ECC_BUFFER); +#endif + + return ret; +} + + +/* ecc decrypt with shared secret run through kdf + ctx holds non default algos and inputs + return 0 on success */ +int wc_ecc_decrypt(ecc_key* privKey, ecc_key* pubKey, const byte* msg, + word32 msgSz, byte* out, word32* outSz, ecEncCtx* ctx) +{ + int ret = 0; + word32 blockSz; + word32 digestSz; + ecEncCtx localCtx; +#ifdef WOLFSSL_SMALL_STACK + byte* sharedSecret; + byte* keys; +#else + byte sharedSecret[ECC_MAXSIZE]; /* 521 max size */ + byte keys[ECC_BUFSIZE]; /* max size */ +#endif + word32 sharedSz = ECC_MAXSIZE; + int keysLen; + int encKeySz; + int ivSz; + int offset = 0; /* in case using msg exchange */ + byte* encKey; + byte* encIv; + byte* macKey; + + if (privKey == NULL || pubKey == NULL || msg == NULL || out == NULL || + outSz == NULL) + return BAD_FUNC_ARG; + + if (ctx == NULL) { /* use defaults */ + ecc_ctx_init(&localCtx, 0); + ctx = &localCtx; + } + + ret = ecc_get_key_sizes(ctx, &encKeySz, &ivSz, &keysLen, &digestSz, + &blockSz); + if (ret != 0) + return ret; + + if (ctx->protocol == REQ_RESP_CLIENT) { + offset = keysLen; + keysLen *= 2; + + if (ctx->cliSt != ecCLI_SENT_REQ) + return BAD_STATE_E; + + ctx->cliSt = ecSRV_BAD_STATE; /* we're done no more ops allowed */ + } + else if (ctx->protocol == REQ_RESP_SERVER) { + if (ctx->srvSt != ecSRV_SALT_SET) + return BAD_STATE_E; + + ctx->srvSt = ecSRV_RECV_REQ; /* only do this once */ + } + + if (keysLen > ECC_BUFSIZE) /* keys size */ + return BUFFER_E; + + if ( ((msgSz-digestSz) % blockSz) != 0) + return BAD_PADDING_E; + + if (*outSz < (msgSz - digestSz)) + return BUFFER_E; + +#ifdef WOLFSSL_SMALL_STACK + sharedSecret = (byte*)XMALLOC(ECC_MAXSIZE, NULL, DYNAMIC_TYPE_ECC_BUFFER); + if (sharedSecret == NULL) + return MEMORY_E; + + keys = (byte*)XMALLOC(ECC_BUFSIZE, NULL, DYNAMIC_TYPE_ECC_BUFFER); + if (keys == NULL) { + XFREE(sharedSecret, NULL, DYNAMIC_TYPE_ECC_BUFFER); + return MEMORY_E; + } +#endif + + do { + #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) + ret = wc_AsyncWait(ret, &privKey->asyncDev, WC_ASYNC_FLAG_CALL_AGAIN); + if (ret != 0) + break; + #endif + ret = wc_ecc_shared_secret(privKey, pubKey, sharedSecret, &sharedSz); + } while (ret == WC_PENDING_E); + if (ret == 0) { + switch (ctx->kdfAlgo) { + case ecHKDF_SHA256 : + ret = wc_HKDF(WC_SHA256, sharedSecret, sharedSz, ctx->kdfSalt, + ctx->kdfSaltSz, ctx->kdfInfo, ctx->kdfInfoSz, + keys, keysLen); + break; + + default: + ret = BAD_FUNC_ARG; + break; + } + } + + if (ret == 0) { + encKey = keys + offset; + encIv = encKey + encKeySz; + macKey = encKey + encKeySz + ivSz; + + switch (ctx->macAlgo) { + case ecHMAC_SHA256: + { + byte verify[WC_SHA256_DIGEST_SIZE]; + Hmac hmac; + + ret = wc_HmacInit(&hmac, NULL, INVALID_DEVID); + if (ret == 0) { + ret = wc_HmacSetKey(&hmac, WC_SHA256, macKey, WC_SHA256_DIGEST_SIZE); + if (ret == 0) + ret = wc_HmacUpdate(&hmac, msg, msgSz-digestSz); + if (ret == 0) + ret = wc_HmacUpdate(&hmac, ctx->macSalt, ctx->macSaltSz); + if (ret == 0) + ret = wc_HmacFinal(&hmac, verify); + if (ret == 0) { + if (XMEMCMP(verify, msg + msgSz - digestSz, digestSz) != 0) + ret = -1; + } + + wc_HmacFree(&hmac); + } + break; + } + + default: + ret = BAD_FUNC_ARG; + break; + } + } + + if (ret == 0) { + switch (ctx->encAlgo) { + #ifdef HAVE_AES_CBC + case ecAES_128_CBC: + { + Aes aes; + ret = wc_AesSetKey(&aes, encKey, KEY_SIZE_128, encIv, + AES_DECRYPTION); + if (ret != 0) + break; + ret = wc_AesCbcDecrypt(&aes, out, msg, msgSz-digestSz); + #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES) + ret = wc_AsyncWait(ret, &aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + } + break; + #endif + default: + ret = BAD_FUNC_ARG; + break; + } + } + + if (ret == 0) + *outSz = msgSz - digestSz; + +#ifdef WOLFSSL_SMALL_STACK + XFREE(sharedSecret, NULL, DYNAMIC_TYPE_ECC_BUFFER); + XFREE(keys, NULL, DYNAMIC_TYPE_ECC_BUFFER); +#endif + + return ret; +} + + +#endif /* HAVE_ECC_ENCRYPT */ + + +#ifdef HAVE_COMP_KEY +#if !defined(WOLFSSL_ATECC508A) && !defined(WOLFSSL_CRYPTOCELL) + +#ifndef WOLFSSL_SP_MATH +int do_mp_jacobi(mp_int* a, mp_int* n, int* c); + +int do_mp_jacobi(mp_int* a, mp_int* n, int* c) +{ + int k, s, res; + int r = 0; /* initialize to help static analysis out */ + mp_digit residue; + + /* if a < 0 return MP_VAL */ + if (mp_isneg(a) == MP_YES) { + return MP_VAL; + } + + /* if n <= 0 return MP_VAL */ + if (mp_cmp_d(n, 0) != MP_GT) { + return MP_VAL; + } + + /* step 1. handle case of a == 0 */ + if (mp_iszero (a) == MP_YES) { + /* special case of a == 0 and n == 1 */ + if (mp_cmp_d (n, 1) == MP_EQ) { + *c = 1; + } else { + *c = 0; + } + return MP_OKAY; + } + + /* step 2. if a == 1, return 1 */ + if (mp_cmp_d (a, 1) == MP_EQ) { + *c = 1; + return MP_OKAY; + } + + /* default */ + s = 0; + + /* divide out larger power of two */ + k = mp_cnt_lsb(a); + res = mp_div_2d(a, k, a, NULL); + + if (res == MP_OKAY) { + /* step 4. if e is even set s=1 */ + if ((k & 1) == 0) { + s = 1; + } else { + /* else set s=1 if p = 1/7 (mod 8) or s=-1 if p = 3/5 (mod 8) */ + residue = n->dp[0] & 7; + + if (residue == 1 || residue == 7) { + s = 1; + } else if (residue == 3 || residue == 5) { + s = -1; + } + } + + /* step 5. if p == 3 (mod 4) *and* a == 3 (mod 4) then s = -s */ + if ( ((n->dp[0] & 3) == 3) && ((a->dp[0] & 3) == 3)) { + s = -s; + } + } + + if (res == MP_OKAY) { + /* if a == 1 we're done */ + if (mp_cmp_d(a, 1) == MP_EQ) { + *c = s; + } else { + /* n1 = n mod a */ + res = mp_mod (n, a, n); + if (res == MP_OKAY) + res = do_mp_jacobi(n, a, &r); + + if (res == MP_OKAY) + *c = s * r; + } + } + + return res; +} + + +/* computes the jacobi c = (a | n) (or Legendre if n is prime) + * HAC pp. 73 Algorithm 2.149 + * HAC is wrong here, as the special case of (0 | 1) is not + * handled correctly. + */ +int mp_jacobi(mp_int* a, mp_int* n, int* c) +{ + mp_int a1, n1; + int res; + + /* step 3. write a = a1 * 2**k */ + if ((res = mp_init_multi(&a1, &n1, NULL, NULL, NULL, NULL)) != MP_OKAY) { + return res; + } + + if ((res = mp_copy(a, &a1)) != MP_OKAY) { + goto done; + } + + if ((res = mp_copy(n, &n1)) != MP_OKAY) { + goto done; + } + + res = do_mp_jacobi(&a1, &n1, c); + +done: + /* cleanup */ + mp_clear(&n1); + mp_clear(&a1); + + return res; +} + + +/* Solves the modular equation x^2 = n (mod p) + * where prime number is greater than 2 (odd prime). + * The result is returned in the third argument x + * the function returns MP_OKAY on success, MP_VAL or another error on failure + */ +int mp_sqrtmod_prime(mp_int* n, mp_int* prime, mp_int* ret) +{ +#ifdef SQRTMOD_USE_MOD_EXP + int res; + + mp_int e; + + res = mp_init(&e); + if (res == MP_OKAY) + res = mp_add_d(prime, 1, &e); + if (res == MP_OKAY) + res = mp_div_2d(&e, 2, &e, NULL); + if (res == MP_OKAY) + res = mp_exptmod(n, &e, prime, ret); + + mp_clear(&e); + + return res; +#else + int res, legendre, done = 0; + mp_int t1, C, Q, S, Z, M, T, R, two; + mp_digit i; + + /* first handle the simple cases n = 0 or n = 1 */ + if (mp_cmp_d(n, 0) == MP_EQ) { + mp_zero(ret); + return MP_OKAY; + } + if (mp_cmp_d(n, 1) == MP_EQ) { + return mp_set(ret, 1); + } + + /* prime must be odd */ + if (mp_cmp_d(prime, 2) == MP_EQ) { + return MP_VAL; + } + + /* is quadratic non-residue mod prime */ + if ((res = mp_jacobi(n, prime, &legendre)) != MP_OKAY) { + return res; + } + if (legendre == -1) { + return MP_VAL; + } + + if ((res = mp_init_multi(&t1, &C, &Q, &S, &Z, &M)) != MP_OKAY) + return res; + + if ((res = mp_init_multi(&T, &R, &two, NULL, NULL, NULL)) + != MP_OKAY) { + mp_clear(&t1); mp_clear(&C); mp_clear(&Q); mp_clear(&S); mp_clear(&Z); + mp_clear(&M); + return res; + } + + /* SPECIAL CASE: if prime mod 4 == 3 + * compute directly: res = n^(prime+1)/4 mod prime + * Handbook of Applied Cryptography algorithm 3.36 + */ + res = mp_mod_d(prime, 4, &i); + if (res == MP_OKAY && i == 3) { + res = mp_add_d(prime, 1, &t1); + + if (res == MP_OKAY) + res = mp_div_2(&t1, &t1); + if (res == MP_OKAY) + res = mp_div_2(&t1, &t1); + if (res == MP_OKAY) + res = mp_exptmod(n, &t1, prime, ret); + + done = 1; + } + + /* NOW: TonelliShanks algorithm */ + if (res == MP_OKAY && done == 0) { + + /* factor out powers of 2 from prime-1, defining Q and S + * as: prime-1 = Q*2^S */ + /* Q = prime - 1 */ + res = mp_copy(prime, &Q); + if (res == MP_OKAY) + res = mp_sub_d(&Q, 1, &Q); + + /* S = 0 */ + if (res == MP_OKAY) + mp_zero(&S); + + while (res == MP_OKAY && mp_iseven(&Q) == MP_YES) { + /* Q = Q / 2 */ + res = mp_div_2(&Q, &Q); + + /* S = S + 1 */ + if (res == MP_OKAY) + res = mp_add_d(&S, 1, &S); + } + + /* find a Z such that the Legendre symbol (Z|prime) == -1 */ + /* Z = 2 */ + if (res == MP_OKAY) + res = mp_set_int(&Z, 2); + + while (res == MP_OKAY) { + res = mp_jacobi(&Z, prime, &legendre); + if (res == MP_OKAY && legendre == -1) + break; + + /* Z = Z + 1 */ + if (res == MP_OKAY) + res = mp_add_d(&Z, 1, &Z); + } + + /* C = Z ^ Q mod prime */ + if (res == MP_OKAY) + res = mp_exptmod(&Z, &Q, prime, &C); + + /* t1 = (Q + 1) / 2 */ + if (res == MP_OKAY) + res = mp_add_d(&Q, 1, &t1); + if (res == MP_OKAY) + res = mp_div_2(&t1, &t1); + + /* R = n ^ ((Q + 1) / 2) mod prime */ + if (res == MP_OKAY) + res = mp_exptmod(n, &t1, prime, &R); + + /* T = n ^ Q mod prime */ + if (res == MP_OKAY) + res = mp_exptmod(n, &Q, prime, &T); + + /* M = S */ + if (res == MP_OKAY) + res = mp_copy(&S, &M); + + if (res == MP_OKAY) + res = mp_set_int(&two, 2); + + while (res == MP_OKAY && done == 0) { + res = mp_copy(&T, &t1); + + /* reduce to 1 and count */ + i = 0; + while (res == MP_OKAY) { + if (mp_cmp_d(&t1, 1) == MP_EQ) + break; + res = mp_exptmod(&t1, &two, prime, &t1); + if (res == MP_OKAY) + i++; + } + if (res == MP_OKAY && i == 0) { + res = mp_copy(&R, ret); + done = 1; + } + + if (done == 0) { + /* t1 = 2 ^ (M - i - 1) */ + if (res == MP_OKAY) + res = mp_sub_d(&M, i, &t1); + if (res == MP_OKAY) + res = mp_sub_d(&t1, 1, &t1); + if (res == MP_OKAY) + res = mp_exptmod(&two, &t1, prime, &t1); + + /* t1 = C ^ (2 ^ (M - i - 1)) mod prime */ + if (res == MP_OKAY) + res = mp_exptmod(&C, &t1, prime, &t1); + + /* C = (t1 * t1) mod prime */ + if (res == MP_OKAY) + res = mp_sqrmod(&t1, prime, &C); + + /* R = (R * t1) mod prime */ + if (res == MP_OKAY) + res = mp_mulmod(&R, &t1, prime, &R); + + /* T = (T * C) mod prime */ + if (res == MP_OKAY) + res = mp_mulmod(&T, &C, prime, &T); + + /* M = i */ + if (res == MP_OKAY) + res = mp_set(&M, i); + } + } + } + + /* done */ + mp_clear(&t1); + mp_clear(&C); + mp_clear(&Q); + mp_clear(&S); + mp_clear(&Z); + mp_clear(&M); + mp_clear(&T); + mp_clear(&R); + mp_clear(&two); + + return res; +#endif +} +#endif +#endif /* !WOLFSSL_ATECC508A && !WOLFSSL_CRYPTOCELL */ + + +/* export public ECC key in ANSI X9.63 format compressed */ +static int wc_ecc_export_x963_compressed(ecc_key* key, byte* out, word32* outLen) +{ + word32 numlen; + int ret = MP_OKAY; + + if (key == NULL || out == NULL || outLen == NULL) + return BAD_FUNC_ARG; + + if (wc_ecc_is_valid_idx(key->idx) == 0) { + return ECC_BAD_ARG_E; + } + numlen = key->dp->size; + + if (*outLen < (1 + numlen)) { + *outLen = 1 + numlen; + return BUFFER_E; + } + + /* store first byte */ + out[0] = mp_isodd(key->pubkey.y) == MP_YES ? ECC_POINT_COMP_ODD : ECC_POINT_COMP_EVEN; + + /* pad and store x */ + XMEMSET(out+1, 0, numlen); + ret = mp_to_unsigned_bin(key->pubkey.x, + out+1 + (numlen - mp_unsigned_bin_size(key->pubkey.x))); + *outLen = 1 + numlen; + + return ret; +} + +#endif /* HAVE_COMP_KEY */ + + +int wc_ecc_get_oid(word32 oidSum, const byte** oid, word32* oidSz) +{ + int x; + + if (oidSum == 0) { + return BAD_FUNC_ARG; + } + + /* find matching OID sum (based on encoded value) */ + for (x = 0; ecc_sets[x].size != 0; x++) { + if (ecc_sets[x].oidSum == oidSum) { + int ret; + #ifdef HAVE_OID_ENCODING + ret = 0; + /* check cache */ + oid_cache_t* o = &ecc_oid_cache[x]; + if (o->oidSz == 0) { + o->oidSz = sizeof(o->oid); + ret = EncodeObjectId(ecc_sets[x].oid, ecc_sets[x].oidSz, + o->oid, &o->oidSz); + } + if (oidSz) { + *oidSz = o->oidSz; + } + if (oid) { + *oid = o->oid; + } + /* on success return curve id */ + if (ret == 0) { + ret = ecc_sets[x].id; + } + #else + if (oidSz) { + *oidSz = ecc_sets[x].oidSz; + } + if (oid) { + *oid = ecc_sets[x].oid; + } + ret = ecc_sets[x].id; + #endif + return ret; + } + } + + return NOT_COMPILED_IN; +} + +#ifdef WOLFSSL_CUSTOM_CURVES +int wc_ecc_set_custom_curve(ecc_key* key, const ecc_set_type* dp) +{ + if (key == NULL || dp == NULL) { + return BAD_FUNC_ARG; + } + + key->idx = ECC_CUSTOM_IDX; + key->dp = dp; + + return 0; +} +#endif /* WOLFSSL_CUSTOM_CURVES */ + +#ifdef HAVE_X963_KDF + +static WC_INLINE void IncrementX963KdfCounter(byte* inOutCtr) +{ + int i; + + /* in network byte order so start at end and work back */ + for (i = 3; i >= 0; i--) { + if (++inOutCtr[i]) /* we're done unless we overflow */ + return; + } +} + +/* ASN X9.63 Key Derivation Function (SEC1) */ +int wc_X963_KDF(enum wc_HashType type, const byte* secret, word32 secretSz, + const byte* sinfo, word32 sinfoSz, byte* out, word32 outSz) +{ + int ret, i; + int digestSz, copySz; + int remaining = outSz; + byte* outIdx; + byte counter[4]; + byte tmp[WC_MAX_DIGEST_SIZE]; + +#ifdef WOLFSSL_SMALL_STACK + wc_HashAlg* hash; +#else + wc_HashAlg hash[1]; +#endif + + if (secret == NULL || secretSz == 0 || out == NULL) + return BAD_FUNC_ARG; + + /* X9.63 allowed algos only */ + if (type != WC_HASH_TYPE_SHA && type != WC_HASH_TYPE_SHA224 && + type != WC_HASH_TYPE_SHA256 && type != WC_HASH_TYPE_SHA384 && + type != WC_HASH_TYPE_SHA512) + return BAD_FUNC_ARG; + + digestSz = wc_HashGetDigestSize(type); + if (digestSz < 0) + return digestSz; + +#ifdef WOLFSSL_SMALL_STACK + hash = (wc_HashAlg*)XMALLOC(sizeof(wc_HashAlg), NULL, + DYNAMIC_TYPE_HASHES); + if (hash == NULL) + return MEMORY_E; +#endif + + ret = wc_HashInit(hash, type); + if (ret != 0) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(hash, NULL, DYNAMIC_TYPE_HASHES); +#endif + return ret; + } + + outIdx = out; + XMEMSET(counter, 0, sizeof(counter)); + + for (i = 1; remaining > 0; i++) { + + IncrementX963KdfCounter(counter); + + ret = wc_HashUpdate(hash, type, secret, secretSz); + if (ret != 0) { + break; + } + + ret = wc_HashUpdate(hash, type, counter, sizeof(counter)); + if (ret != 0) { + break; + } + + if (sinfo) { + ret = wc_HashUpdate(hash, type, sinfo, sinfoSz); + if (ret != 0) { + break; + } + } + + ret = wc_HashFinal(hash, type, tmp); + if (ret != 0) { + break; + } + + copySz = min(remaining, digestSz); + XMEMCPY(outIdx, tmp, copySz); + + remaining -= copySz; + outIdx += copySz; + } + + wc_HashFree(hash, type); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(hash, NULL, DYNAMIC_TYPE_HASHES); +#endif + + return ret; +} +#endif /* HAVE_X963_KDF */ + +#endif /* HAVE_ECC */ diff --git a/client/wolfssl/wolfcrypt/src/ecc_fp.c b/client/wolfssl/wolfcrypt/src/ecc_fp.c new file mode 100644 index 0000000..c8acf93 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/ecc_fp.c @@ -0,0 +1 @@ +/* dummy ecc_fp.c for dist */ diff --git a/client/wolfssl/wolfcrypt/src/ed25519.c b/client/wolfssl/wolfcrypt/src/ed25519.c new file mode 100644 index 0000000..8057caa --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/ed25519.c @@ -0,0 +1,814 @@ +/* ed25519.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + /* Based On Daniel J Bernstein's ed25519 Public Domain ref10 work. */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +/* in case user set HAVE_ED25519 there */ +#include + +#ifdef HAVE_ED25519 + +#include +#include +#include +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#ifdef FREESCALE_LTC_ECC + #include +#endif + +#if defined(HAVE_ED25519_SIGN) || defined(HAVE_ED25519_VERIFY) +#define ED25519CTX_SIZE 32 + +static const byte ed25519Ctx[ED25519CTX_SIZE+1] = + "SigEd25519 no Ed25519 collisions"; +#endif + +int wc_ed25519_make_public(ed25519_key* key, unsigned char* pubKey, + word32 pubKeySz) +{ + int ret = 0; + byte az[ED25519_PRV_KEY_SIZE]; +#if !defined(FREESCALE_LTC_ECC) + ge_p3 A; +#endif + + if (key == NULL || pubKeySz != ED25519_PUB_KEY_SIZE) + ret = BAD_FUNC_ARG; + + if (ret == 0) + ret = wc_Sha512Hash(key->k, ED25519_KEY_SIZE, az); + if (ret == 0) { + /* apply clamp */ + az[0] &= 248; + az[31] &= 63; /* same than az[31] &= 127 because of az[31] |= 64 */ + az[31] |= 64; + + #ifdef FREESCALE_LTC_ECC + ltc_pkha_ecc_point_t publicKey = {0}; + publicKey.X = key->pointX; + publicKey.Y = key->pointY; + LTC_PKHA_Ed25519_PointMul(LTC_PKHA_Ed25519_BasePoint(), az, + ED25519_KEY_SIZE, &publicKey, kLTC_Ed25519 /* result on Ed25519 */); + LTC_PKHA_Ed25519_Compress(&publicKey, pubKey); + #else + ge_scalarmult_base(&A, az); + ge_p3_tobytes(pubKey, &A); + #endif + } + + return ret; +} + +/* generate an ed25519 key pair. + * returns 0 on success + */ +int wc_ed25519_make_key(WC_RNG* rng, int keySz, ed25519_key* key) +{ + int ret; + + if (rng == NULL || key == NULL) + return BAD_FUNC_ARG; + + /* ed25519 has 32 byte key sizes */ + if (keySz != ED25519_KEY_SIZE) + return BAD_FUNC_ARG; + + ret = wc_RNG_GenerateBlock(rng, key->k, ED25519_KEY_SIZE); + if (ret != 0) + return ret; + + ret = wc_ed25519_make_public(key, key->p, ED25519_PUB_KEY_SIZE); + if (ret != 0) { + ForceZero(key->k, ED25519_KEY_SIZE); + return ret; + } + + /* put public key after private key, on the same buffer */ + XMEMMOVE(key->k + ED25519_KEY_SIZE, key->p, ED25519_PUB_KEY_SIZE); + + key->pubKeySet = 1; + + return ret; +} + + +#ifdef HAVE_ED25519_SIGN +/* + in contains the message to sign + inLen is the length of the message to sign + out is the buffer to write the signature + outLen [in/out] input size of out buf + output gets set as the final length of out + key is the ed25519 key to use when signing + type one of Ed25519, Ed25519ctx or Ed25519ph + context extra signing data + contextLen length of extra signing data + return 0 on success + */ +static int ed25519_sign_msg(const byte* in, word32 inLen, byte* out, + word32 *outLen, ed25519_key* key, byte type, + const byte* context, byte contextLen) +{ +#ifdef FREESCALE_LTC_ECC + byte tempBuf[ED25519_PRV_KEY_SIZE]; +#else + ge_p3 R; +#endif + byte nonce[WC_SHA512_DIGEST_SIZE]; + byte hram[WC_SHA512_DIGEST_SIZE]; + byte az[ED25519_PRV_KEY_SIZE]; + wc_Sha512 sha; + int ret; + + /* sanity check on arguments */ + if (in == NULL || out == NULL || outLen == NULL || key == NULL || + (context == NULL && contextLen != 0)) { + return BAD_FUNC_ARG; + } + if (!key->pubKeySet) + return BAD_FUNC_ARG; + + /* check and set up out length */ + if (*outLen < ED25519_SIG_SIZE) { + *outLen = ED25519_SIG_SIZE; + return BUFFER_E; + } + *outLen = ED25519_SIG_SIZE; + + /* step 1: create nonce to use where nonce is r in + r = H(h_b, ... ,h_2b-1,M) */ + ret = wc_Sha512Hash(key->k, ED25519_KEY_SIZE, az); + if (ret != 0) + return ret; + + /* apply clamp */ + az[0] &= 248; + az[31] &= 63; /* same than az[31] &= 127 because of az[31] |= 64 */ + az[31] |= 64; + + ret = wc_InitSha512(&sha); + if (ret != 0) + return ret; + if (type == Ed25519ctx || type == Ed25519ph) { + ret = wc_Sha512Update(&sha, ed25519Ctx, ED25519CTX_SIZE); + if (ret == 0) + ret = wc_Sha512Update(&sha, &type, sizeof(type)); + if (ret == 0) + ret = wc_Sha512Update(&sha, &contextLen, sizeof(contextLen)); + if (ret == 0 && context != NULL) + ret = wc_Sha512Update(&sha, context, contextLen); + } + if (ret == 0) + ret = wc_Sha512Update(&sha, az + ED25519_KEY_SIZE, ED25519_KEY_SIZE); + if (ret == 0) + ret = wc_Sha512Update(&sha, in, inLen); + if (ret == 0) + ret = wc_Sha512Final(&sha, nonce); + wc_Sha512Free(&sha); + if (ret != 0) + return ret; + +#ifdef FREESCALE_LTC_ECC + ltc_pkha_ecc_point_t ltcPoint = {0}; + ltcPoint.X = &tempBuf[0]; + ltcPoint.Y = &tempBuf[32]; + LTC_PKHA_sc_reduce(nonce); + LTC_PKHA_Ed25519_PointMul(LTC_PKHA_Ed25519_BasePoint(), nonce, + ED25519_KEY_SIZE, <cPoint, kLTC_Ed25519 /* result on Ed25519 */); + LTC_PKHA_Ed25519_Compress(<cPoint, out); +#else + sc_reduce(nonce); + + /* step 2: computing R = rB where rB is the scalar multiplication of + r and B */ + ge_scalarmult_base(&R,nonce); + ge_p3_tobytes(out,&R); +#endif + + /* step 3: hash R + public key + message getting H(R,A,M) then + creating S = (r + H(R,A,M)a) mod l */ + ret = wc_InitSha512(&sha); + if (ret != 0) + return ret; + if (type == Ed25519ctx || type == Ed25519ph) { + ret = wc_Sha512Update(&sha, ed25519Ctx, ED25519CTX_SIZE); + if (ret == 0) + ret = wc_Sha512Update(&sha, &type, sizeof(type)); + if (ret == 0) + ret = wc_Sha512Update(&sha, &contextLen, sizeof(contextLen)); + if (ret == 0 && context != NULL) + ret = wc_Sha512Update(&sha, context, contextLen); + } + if (ret == 0) + ret = wc_Sha512Update(&sha, out, ED25519_SIG_SIZE/2); + if (ret == 0) + ret = wc_Sha512Update(&sha, key->p, ED25519_PUB_KEY_SIZE); + if (ret == 0) + ret = wc_Sha512Update(&sha, in, inLen); + if (ret == 0) + ret = wc_Sha512Final(&sha, hram); + wc_Sha512Free(&sha); + if (ret != 0) + return ret; + +#ifdef FREESCALE_LTC_ECC + LTC_PKHA_sc_reduce(hram); + LTC_PKHA_sc_muladd(out + (ED25519_SIG_SIZE/2), hram, az, nonce); +#else + sc_reduce(hram); + sc_muladd(out + (ED25519_SIG_SIZE/2), hram, az, nonce); +#endif + + return ret; +} + +/* + in contains the message to sign + inLen is the length of the message to sign + out is the buffer to write the signature + outLen [in/out] input size of out buf + output gets set as the final length of out + key is the ed25519 key to use when signing + return 0 on success + */ +int wc_ed25519_sign_msg(const byte* in, word32 inLen, byte* out, + word32 *outLen, ed25519_key* key) +{ + return ed25519_sign_msg(in, inLen, out, outLen, key, (byte)Ed25519, NULL, 0); +} + +/* + in contains the message to sign + inLen is the length of the message to sign + out is the buffer to write the signature + outLen [in/out] input size of out buf + output gets set as the final length of out + key is the ed25519 key to use when signing + context extra signing data + contextLen length of extra signing data + return 0 on success + */ +int wc_ed25519ctx_sign_msg(const byte* in, word32 inLen, byte* out, + word32 *outLen, ed25519_key* key, + const byte* context, byte contextLen) +{ + return ed25519_sign_msg(in, inLen, out, outLen, key, Ed25519ctx, context, + contextLen); +} + +/* + hash contains the SHA-512 hash of the message to sign + hashLen is the length of the SHA-512 hash of the message to sign + out is the buffer to write the signature + outLen [in/out] input size of out buf + output gets set as the final length of out + key is the ed25519 key to use when signing + context extra signing data + contextLen length of extra signing data + return 0 on success + */ +int wc_ed25519ph_sign_hash(const byte* hash, word32 hashLen, byte* out, + word32 *outLen, ed25519_key* key, + const byte* context, byte contextLen) +{ + return ed25519_sign_msg(hash, hashLen, out, outLen, key, Ed25519ph, context, + contextLen); +} + +/* + in contains the message to sign + inLen is the length of the message to sign + out is the buffer to write the signature + outLen [in/out] input size of out buf + output gets set as the final length of out + key is the ed25519 key to use when signing + context extra signing data + contextLen length of extra signing data + return 0 on success + */ +int wc_ed25519ph_sign_msg(const byte* in, word32 inLen, byte* out, + word32 *outLen, ed25519_key* key, + const byte* context, byte contextLen) +{ + int ret; + byte hash[WC_SHA512_DIGEST_SIZE]; + + ret = wc_Sha512Hash(in, inLen, hash); + if (ret != 0) + return ret; + + return wc_ed25519ph_sign_hash(hash, sizeof(hash), out, outLen, key, context, + contextLen); +} +#endif /* HAVE_ED25519_SIGN */ + +#ifdef HAVE_ED25519_VERIFY + +/* + sig is array of bytes containing the signature + sigLen is the length of sig byte array + msg the array of bytes containing the message + msgLen length of msg array + res will be 1 on successful verify and 0 on unsuccessful + key Ed25519 public key + return 0 and res of 1 on success +*/ +static int ed25519_verify_msg(const byte* sig, word32 sigLen, const byte* msg, + word32 msgLen, int* res, ed25519_key* key, + byte type, const byte* context, byte contextLen) +{ + byte rcheck[ED25519_KEY_SIZE]; + byte h[WC_SHA512_DIGEST_SIZE]; +#ifndef FREESCALE_LTC_ECC + ge_p3 A; + ge_p2 R; +#endif + int ret; + wc_Sha512 sha; + + /* sanity check on arguments */ + if (sig == NULL || msg == NULL || res == NULL || key == NULL || + (context == NULL && contextLen != 0)) { + return BAD_FUNC_ARG; + } + + /* set verification failed by default */ + *res = 0; + + /* check on basics needed to verify signature */ + if (sigLen < ED25519_SIG_SIZE || (sig[ED25519_SIG_SIZE-1] & 224)) + return BAD_FUNC_ARG; + + /* uncompress A (public key), test if valid, and negate it */ +#ifndef FREESCALE_LTC_ECC + if (ge_frombytes_negate_vartime(&A, key->p) != 0) + return BAD_FUNC_ARG; +#endif + + /* find H(R,A,M) and store it as h */ + ret = wc_InitSha512(&sha); + if (ret != 0) + return ret; + if (type == Ed25519ctx || type == Ed25519ph) { + ret = wc_Sha512Update(&sha, ed25519Ctx, ED25519CTX_SIZE); + if (ret == 0) + ret = wc_Sha512Update(&sha, &type, sizeof(type)); + if (ret == 0) + ret = wc_Sha512Update(&sha, &contextLen, sizeof(contextLen)); + if (ret == 0 && context != NULL) + ret = wc_Sha512Update(&sha, context, contextLen); + } + if (ret == 0) + ret = wc_Sha512Update(&sha, sig, ED25519_SIG_SIZE/2); + if (ret == 0) + ret = wc_Sha512Update(&sha, key->p, ED25519_PUB_KEY_SIZE); + if (ret == 0) + ret = wc_Sha512Update(&sha, msg, msgLen); + if (ret == 0) + ret = wc_Sha512Final(&sha, h); + wc_Sha512Free(&sha); + if (ret != 0) + return ret; + +#ifdef FREESCALE_LTC_ECC + LTC_PKHA_sc_reduce(h); + LTC_PKHA_SignatureForVerify(rcheck, h, sig + (ED25519_SIG_SIZE/2), key); +#else + sc_reduce(h); + + /* + Uses a fast single-signature verification SB = R + H(R,A,M)A becomes + SB - H(R,A,M)A saving decompression of R + */ + ret = ge_double_scalarmult_vartime(&R, h, &A, sig + (ED25519_SIG_SIZE/2)); + if (ret != 0) + return ret; + + ge_tobytes(rcheck, &R); +#endif /* FREESCALE_LTC_ECC */ + + /* comparison of R created to R in sig */ + ret = ConstantCompare(rcheck, sig, ED25519_SIG_SIZE/2); + if (ret != 0) + return SIG_VERIFY_E; + + /* set the verification status */ + *res = 1; + + return ret; +} + +/* + sig is array of bytes containing the signature + sigLen is the length of sig byte array + msg the array of bytes containing the message + msgLen length of msg array + res will be 1 on successful verify and 0 on unsuccessful + key Ed25519 public key + return 0 and res of 1 on success +*/ +int wc_ed25519_verify_msg(const byte* sig, word32 sigLen, const byte* msg, + word32 msgLen, int* res, ed25519_key* key) +{ + return ed25519_verify_msg(sig, sigLen, msg, msgLen, res, key, (byte)Ed25519, + NULL, 0); +} + +/* + sig is array of bytes containing the signature + sigLen is the length of sig byte array + msg the array of bytes containing the message + msgLen length of msg array + res will be 1 on successful verify and 0 on unsuccessful + key Ed25519 public key + context extra sigining data + contextLen length of extra sigining data + return 0 and res of 1 on success +*/ +int wc_ed25519ctx_verify_msg(const byte* sig, word32 sigLen, const byte* msg, + word32 msgLen, int* res, ed25519_key* key, + const byte* context, byte contextLen) +{ + return ed25519_verify_msg(sig, sigLen, msg, msgLen, res, key, Ed25519ctx, + context, contextLen); +} + +/* + sig is array of bytes containing the signature + sigLen is the length of sig byte array + hash the array of bytes containing the SHA-512 hash of the message + hashLen length of hash array + res will be 1 on successful verify and 0 on unsuccessful + key Ed25519 public key + context extra sigining data + contextLen length of extra sigining data + return 0 and res of 1 on success +*/ +int wc_ed25519ph_verify_hash(const byte* sig, word32 sigLen, const byte* hash, + word32 hashLen, int* res, ed25519_key* key, + const byte* context, byte contextLen) +{ + return ed25519_verify_msg(sig, sigLen, hash, hashLen, res, key, Ed25519ph, + context, contextLen); +} + +/* + sig is array of bytes containing the signature + sigLen is the length of sig byte array + msg the array of bytes containing the message + msgLen length of msg array + res will be 1 on successful verify and 0 on unsuccessful + key Ed25519 public key + context extra sigining data + contextLen length of extra sigining data + return 0 and res of 1 on success +*/ +int wc_ed25519ph_verify_msg(const byte* sig, word32 sigLen, const byte* msg, + word32 msgLen, int* res, ed25519_key* key, + const byte* context, byte contextLen) +{ + int ret; + byte hash[WC_SHA512_DIGEST_SIZE]; + + ret = wc_Sha512Hash(msg, msgLen, hash); + if (ret != 0) + return ret; + + return wc_ed25519ph_verify_hash(sig, sigLen, hash, sizeof(hash), res, key, + context, contextLen); +} +#endif /* HAVE_ED25519_VERIFY */ + + +/* initialize information and memory for key */ +int wc_ed25519_init(ed25519_key* key) +{ + if (key == NULL) + return BAD_FUNC_ARG; + + XMEMSET(key, 0, sizeof(ed25519_key)); + +#ifndef FREESCALE_LTC_ECC + fe_init(); +#endif + + return 0; +} + + +/* clear memory of key */ +void wc_ed25519_free(ed25519_key* key) +{ + if (key == NULL) + return; + + ForceZero(key, sizeof(ed25519_key)); +} + + +#ifdef HAVE_ED25519_KEY_EXPORT + +/* + outLen should contain the size of out buffer when input. outLen is than set + to the final output length. + returns 0 on success + */ +int wc_ed25519_export_public(ed25519_key* key, byte* out, word32* outLen) +{ + /* sanity check on arguments */ + if (key == NULL || out == NULL || outLen == NULL) + return BAD_FUNC_ARG; + + if (*outLen < ED25519_PUB_KEY_SIZE) { + *outLen = ED25519_PUB_KEY_SIZE; + return BUFFER_E; + } + + *outLen = ED25519_PUB_KEY_SIZE; + XMEMCPY(out, key->p, ED25519_PUB_KEY_SIZE); + + return 0; +} + +#endif /* HAVE_ED25519_KEY_EXPORT */ + + +#ifdef HAVE_ED25519_KEY_IMPORT +/* + Imports a compressed/uncompressed public key. + in the byte array containing the public key + inLen the length of the byte array being passed in + key ed25519 key struct to put the public key in + */ +int wc_ed25519_import_public(const byte* in, word32 inLen, ed25519_key* key) +{ + int ret; + + /* sanity check on arguments */ + if (in == NULL || key == NULL) + return BAD_FUNC_ARG; + + if (inLen < ED25519_PUB_KEY_SIZE) + return BAD_FUNC_ARG; + + /* compressed prefix according to draft + http://www.ietf.org/id/draft-koch-eddsa-for-openpgp-02.txt */ + if (in[0] == 0x40 && inLen > ED25519_PUB_KEY_SIZE) { + /* key is stored in compressed format so just copy in */ + XMEMCPY(key->p, (in + 1), ED25519_PUB_KEY_SIZE); +#ifdef FREESCALE_LTC_ECC + /* recover X coordinate */ + ltc_pkha_ecc_point_t pubKey; + pubKey.X = key->pointX; + pubKey.Y = key->pointY; + LTC_PKHA_Ed25519_PointDecompress(key->p, ED25519_PUB_KEY_SIZE, &pubKey); +#endif + key->pubKeySet = 1; + return 0; + } + + /* importing uncompressed public key */ + if (in[0] == 0x04 && inLen > 2*ED25519_PUB_KEY_SIZE) { +#ifdef FREESCALE_LTC_ECC + /* reverse bytes for little endian byte order */ + for (int i = 0; i < ED25519_KEY_SIZE; i++) + { + key->pointX[i] = *(in + ED25519_KEY_SIZE - i); + key->pointY[i] = *(in + 2*ED25519_KEY_SIZE - i); + } + XMEMCPY(key->p, key->pointY, ED25519_KEY_SIZE); + key->pubKeySet = 1; + ret = 0; +#else + /* pass in (x,y) and store compressed key */ + ret = ge_compress_key(key->p, in+1, + in+1+ED25519_PUB_KEY_SIZE, ED25519_PUB_KEY_SIZE); + if (ret == 0) + key->pubKeySet = 1; +#endif /* FREESCALE_LTC_ECC */ + return ret; + } + + /* if not specified compressed or uncompressed check key size + if key size is equal to compressed key size copy in key */ + if (inLen == ED25519_PUB_KEY_SIZE) { + XMEMCPY(key->p, in, ED25519_PUB_KEY_SIZE); +#ifdef FREESCALE_LTC_ECC + /* recover X coordinate */ + ltc_pkha_ecc_point_t pubKey; + pubKey.X = key->pointX; + pubKey.Y = key->pointY; + LTC_PKHA_Ed25519_PointDecompress(key->p, ED25519_PUB_KEY_SIZE, &pubKey); +#endif + key->pubKeySet = 1; + return 0; + } + + /* bad public key format */ + return BAD_FUNC_ARG; +} + + +/* + For importing a private key. + */ +int wc_ed25519_import_private_only(const byte* priv, word32 privSz, + ed25519_key* key) +{ + /* sanity check on arguments */ + if (priv == NULL || key == NULL) + return BAD_FUNC_ARG; + + /* key size check */ + if (privSz < ED25519_KEY_SIZE) + return BAD_FUNC_ARG; + + XMEMCPY(key->k, priv, ED25519_KEY_SIZE); + + return 0; +} + +/* + For importing a private key and its associated public key. + */ +int wc_ed25519_import_private_key(const byte* priv, word32 privSz, + const byte* pub, word32 pubSz, ed25519_key* key) +{ + int ret; + + /* sanity check on arguments */ + if (priv == NULL || pub == NULL || key == NULL) + return BAD_FUNC_ARG; + + /* key size check */ + if (privSz < ED25519_KEY_SIZE || pubSz < ED25519_PUB_KEY_SIZE) + return BAD_FUNC_ARG; + + /* import public key */ + ret = wc_ed25519_import_public(pub, pubSz, key); + if (ret != 0) + return ret; + + /* make the private key (priv + pub) */ + XMEMCPY(key->k, priv, ED25519_KEY_SIZE); + XMEMCPY(key->k + ED25519_KEY_SIZE, key->p, ED25519_PUB_KEY_SIZE); + + return ret; +} + +#endif /* HAVE_ED25519_KEY_IMPORT */ + + +#ifdef HAVE_ED25519_KEY_EXPORT + +/* + export private key only (secret part so 32 bytes) + outLen should contain the size of out buffer when input. outLen is than set + to the final output length. + returns 0 on success + */ +int wc_ed25519_export_private_only(ed25519_key* key, byte* out, word32* outLen) +{ + /* sanity checks on arguments */ + if (key == NULL || out == NULL || outLen == NULL) + return BAD_FUNC_ARG; + + if (*outLen < ED25519_KEY_SIZE) { + *outLen = ED25519_KEY_SIZE; + return BUFFER_E; + } + + *outLen = ED25519_KEY_SIZE; + XMEMCPY(out, key->k, ED25519_KEY_SIZE); + + return 0; +} + +/* + export private key, including public part + outLen should contain the size of out buffer when input. outLen is than set + to the final output length. + returns 0 on success + */ +int wc_ed25519_export_private(ed25519_key* key, byte* out, word32* outLen) +{ + /* sanity checks on arguments */ + if (key == NULL || out == NULL || outLen == NULL) + return BAD_FUNC_ARG; + + if (*outLen < ED25519_PRV_KEY_SIZE) { + *outLen = ED25519_PRV_KEY_SIZE; + return BUFFER_E; + } + + *outLen = ED25519_PRV_KEY_SIZE; + XMEMCPY(out, key->k, ED25519_PRV_KEY_SIZE); + + return 0; +} + +/* export full private key and public key + return 0 on success + */ +int wc_ed25519_export_key(ed25519_key* key, + byte* priv, word32 *privSz, + byte* pub, word32 *pubSz) +{ + int ret; + + /* export 'full' private part */ + ret = wc_ed25519_export_private(key, priv, privSz); + if (ret != 0) + return ret; + + /* export public part */ + ret = wc_ed25519_export_public(key, pub, pubSz); + + return ret; +} + +#endif /* HAVE_ED25519_KEY_EXPORT */ + +/* check the private and public keys match */ +int wc_ed25519_check_key(ed25519_key* key) +{ + int ret = 0; + unsigned char pubKey[ED25519_PUB_KEY_SIZE]; + + if (!key->pubKeySet) + ret = PUBLIC_KEY_E; + if (ret == 0) + ret = wc_ed25519_make_public(key, pubKey, sizeof(pubKey)); + if (ret == 0 && XMEMCMP(pubKey, key->p, ED25519_PUB_KEY_SIZE) != 0) + ret = PUBLIC_KEY_E; + + return ret; +} + +/* returns the private key size (secret only) in bytes */ +int wc_ed25519_size(ed25519_key* key) +{ + if (key == NULL) + return BAD_FUNC_ARG; + + return ED25519_KEY_SIZE; +} + +/* returns the private key size (secret + public) in bytes */ +int wc_ed25519_priv_size(ed25519_key* key) +{ + if (key == NULL) + return BAD_FUNC_ARG; + + return ED25519_PRV_KEY_SIZE; +} + +/* returns the compressed key size in bytes (public key) */ +int wc_ed25519_pub_size(ed25519_key* key) +{ + if (key == NULL) + return BAD_FUNC_ARG; + + return ED25519_PUB_KEY_SIZE; +} + +/* returns the size of signature in bytes */ +int wc_ed25519_sig_size(ed25519_key* key) +{ + if (key == NULL) + return BAD_FUNC_ARG; + + return ED25519_SIG_SIZE; +} + +#endif /* HAVE_ED25519 */ + diff --git a/client/wolfssl/wolfcrypt/src/ed448.c b/client/wolfssl/wolfcrypt/src/ed448.c new file mode 100644 index 0000000..125ee38 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/ed448.c @@ -0,0 +1,917 @@ +/* ed448.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* Implemented to: RFC 8032 */ + +/* Based On Daniel J Bernstein's ed25519 Public Domain ref10 work. + * Reworked for curve448 by Sean Parkinson. + */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +/* in case user set HAVE_ED448 there */ +#include + +#ifdef HAVE_ED448 + +#include +#include +#include +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#if defined(HAVE_ED448_SIGN) || defined(HAVE_ED448_VERIFY) +/* Size of context bytes to use with hash when signing and verifying. */ +#define ED448CTX_SIZE 8 +/* Context to pass to hash when signing and verifying. */ +static const byte ed448Ctx[ED448CTX_SIZE+1] = "SigEd448"; +#endif + +/* Derive the public key for the private key. + * + * key [in] Ed448 key object. + * pubKey [in] Byte array to hold te public key. + * pubKeySz [in] Size of the array in bytes. + * returns BAD_FUNC_ARG when key is NULL or pubKeySz is not equal to + * ED448_PUB_KEY_SIZE, + * other -ve value on hash failure, + * 0 otherwise. + */ +int wc_ed448_make_public(ed448_key* key, unsigned char* pubKey, word32 pubKeySz) +{ + int ret = 0; + byte az[ED448_PRV_KEY_SIZE]; + ge448_p2 A; + + if ((key == NULL) || (pubKeySz != ED448_PUB_KEY_SIZE)) { + ret = BAD_FUNC_ARG; + } + + if (ret == 0) { + ret = wc_Shake256Hash(key->k, ED448_KEY_SIZE, az, sizeof(az)); + } + if (ret == 0) { + /* apply clamp */ + az[0] &= 0xfc; + az[55] |= 0x80; + az[56] = 0x00; + + ge448_scalarmult_base(&A, az); + ge448_to_bytes(pubKey, &A); + } + + return ret; +} + +/* Make a new ed448 private/public key. + * + * rng [in] Random number generator. + * keysize [in] Size of the key to generate. + * key [in] Ed448 key object. + * returns BAD_FUNC_ARG when rng or key is NULL or keySz is not equal to + * ED448_KEY_SIZE, + * other -ve value on random number or hash failure, + * 0 otherwise. + */ +int wc_ed448_make_key(WC_RNG* rng, int keySz, ed448_key* key) +{ + int ret = 0; + + if ((rng == NULL) || (key == NULL)) { + ret = BAD_FUNC_ARG; + } + + /* ed448 has 57 byte key sizes */ + if ((ret == 0) && (keySz != ED448_KEY_SIZE)) { + ret = BAD_FUNC_ARG; + } + + if (ret == 0) { + ret = wc_RNG_GenerateBlock(rng, key->k, ED448_KEY_SIZE); + } + if (ret == 0) { + ret = wc_ed448_make_public(key, key->p, ED448_PUB_KEY_SIZE); + if (ret != 0) { + ForceZero(key->k, ED448_KEY_SIZE); + } + } + if (ret == 0) { + /* put public key after private key, on the same buffer */ + XMEMMOVE(key->k + ED448_KEY_SIZE, key->p, ED448_PUB_KEY_SIZE); + + key->pubKeySet = 1; + } + + return ret; +} + + +#ifdef HAVE_ED448_SIGN +/* Sign the message using the ed448 private key. + * + * in [in] Message to sign. + * inLen [in] Length of the message in bytes. + * out [in] Buffer to write signature into. + * outLen [in/out] On in, size of buffer. + * On out, the length of the signature in bytes. + * key [in] Ed448 key to use when signing + * type [in] Type of signature to perform: Ed448 or Ed448ph + * context [in] Context of signing. + * contextLen [in] Length of context in bytes. + * returns BAD_FUNC_ARG when a parameter is NULL or contextLen is zero when and + * context is not NULL or public key not set, + * BUFFER_E when outLen is less than ED448_SIG_SIZE, + * other -ve values when hash fails, + * 0 otherwise. + */ +static int ed448_sign_msg(const byte* in, word32 inLen, byte* out, + word32 *outLen, ed448_key* key, byte type, + const byte* context, byte contextLen) +{ + ge448_p2 R; + byte nonce[ED448_SIG_SIZE]; + byte hram[ED448_SIG_SIZE]; + byte az[ED448_PRV_KEY_SIZE]; + wc_Shake sha; + int ret = 0; + + /* sanity check on arguments */ + if ((in == NULL) || (out == NULL) || (outLen == NULL) || (key == NULL) || + ((context == NULL) && (contextLen != 0))) { + ret = BAD_FUNC_ARG; + } + if ((ret == 0) && (!key->pubKeySet)) { + ret = BAD_FUNC_ARG; + } + + /* check and set up out length */ + if ((ret == 0) && (*outLen < ED448_SIG_SIZE)) { + *outLen = ED448_SIG_SIZE; + ret = BUFFER_E; + } + + if (ret == 0) { + *outLen = ED448_SIG_SIZE; + + /* step 1: create nonce to use where nonce is r in + r = H(h_b, ... ,h_2b-1,M) */ + ret = wc_Shake256Hash(key->k, ED448_KEY_SIZE, az, sizeof(az)); + } + if (ret == 0) { + /* apply clamp */ + az[0] &= 0xfc; + az[55] |= 0x80; + az[56] = 0x00; + + ret = wc_InitShake256(&sha, NULL, INVALID_DEVID); + if (ret == 0) { + ret = wc_Shake256_Update(&sha, ed448Ctx, ED448CTX_SIZE); + } + if (ret == 0) { + ret = wc_Shake256_Update(&sha, &type, sizeof(type)); + } + if (ret == 0) { + ret = wc_Shake256_Update(&sha, &contextLen, sizeof(contextLen)); + } + if (ret == 0 && context != NULL) { + ret = wc_Shake256_Update(&sha, context, contextLen); + } + if (ret == 0) { + ret = wc_Shake256_Update(&sha, az + ED448_KEY_SIZE, ED448_KEY_SIZE); + } + if (ret == 0) { + ret = wc_Shake256_Update(&sha, in, inLen); + } + if (ret == 0) { + ret = wc_Shake256_Final(&sha, nonce, sizeof(nonce)); + } + wc_Shake256_Free(&sha); + } + if (ret == 0) { + sc448_reduce(nonce); + + /* step 2: computing R = rB where rB is the scalar multiplication of + r and B */ + ge448_scalarmult_base(&R,nonce); + ge448_to_bytes(out,&R); + + /* step 3: hash R + public key + message getting H(R,A,M) then + creating S = (r + H(R,A,M)a) mod l */ + ret = wc_InitShake256(&sha, NULL, INVALID_DEVID); + if (ret == 0) { + ret = wc_Shake256_Update(&sha, ed448Ctx, ED448CTX_SIZE); + if (ret == 0) { + ret = wc_Shake256_Update(&sha, &type, sizeof(type)); + } + if (ret == 0) { + ret = wc_Shake256_Update(&sha, &contextLen, sizeof(contextLen)); + } + if (ret == 0 && context != NULL) { + ret = wc_Shake256_Update(&sha, context, contextLen); + } + if (ret == 0) { + ret = wc_Shake256_Update(&sha, out, ED448_SIG_SIZE/2); + } + if (ret == 0) { + ret = wc_Shake256_Update(&sha, key->p, ED448_PUB_KEY_SIZE); + } + if (ret == 0) { + ret = wc_Shake256_Update(&sha, in, inLen); + } + if (ret == 0) { + ret = wc_Shake256_Final(&sha, hram, sizeof(hram)); + } + wc_Shake256_Free(&sha); + } + } + + if (ret == 0) { + sc448_reduce(hram); + sc448_muladd(out + (ED448_SIG_SIZE/2), hram, az, nonce); + } + + return ret; +} + +/* Sign the message using the ed448 private key. + * Signature type is Ed448. + * + * in [in] Message to sign. + * inLen [in] Length of the message in bytes. + * out [in] Buffer to write signature into. + * outLen [in/out] On in, size of buffer. + * On out, the length of the signature in bytes. + * key [in] Ed448 key to use when signing + * context [in] Context of signing. + * contextLen [in] Length of context in bytes. + * returns BAD_FUNC_ARG when a parameter is NULL or contextLen is zero when and + * context is not NULL or public key not set, + * BUFFER_E when outLen is less than ED448_SIG_SIZE, + * other -ve values when hash fails, + * 0 otherwise. + */ +int wc_ed448_sign_msg(const byte* in, word32 inLen, byte* out, word32 *outLen, + ed448_key* key, const byte* context, byte contextLen) +{ + return ed448_sign_msg(in, inLen, out, outLen, key, Ed448, context, + contextLen); +} + +/* Sign the hash using the ed448 private key. + * Signature type is Ed448ph. + * + * hash [in] Hash of message to sign. + * hashLen [in] Length of hash of message in bytes. + * out [in] Buffer to write signature into. + * outLen [in/out] On in, size of buffer. + * On out, the length of the signature in bytes. + * key [in] Ed448 key to use when signing + * context [in] Context of signing. + * contextLen [in] Length of context in bytes. + * returns BAD_FUNC_ARG when a parameter is NULL or contextLen is zero when and + * context is not NULL or public key not set, + * BUFFER_E when outLen is less than ED448_SIG_SIZE, + * other -ve values when hash fails, + * 0 otherwise. + */ +int wc_ed448ph_sign_hash(const byte* hash, word32 hashLen, byte* out, + word32 *outLen, ed448_key* key, + const byte* context, byte contextLen) +{ + return ed448_sign_msg(hash, hashLen, out, outLen, key, Ed448ph, context, + contextLen); +} + +/* Sign the message using the ed448 private key. + * Signature type is Ed448ph. + * + * in [in] Message to sign. + * inLen [in] Length of the message to sign in bytes. + * out [in] Buffer to write signature into. + * outLen [in/out] On in, size of buffer. + * On out, the length of the signature in bytes. + * key [in] Ed448 key to use when signing + * context [in] Context of signing. + * contextLen [in] Length of context in bytes. + * returns BAD_FUNC_ARG when a parameter is NULL or contextLen is zero when and + * context is not NULL or public key not set, + * BUFFER_E when outLen is less than ED448_SIG_SIZE, + * other -ve values when hash fails, + * 0 otherwise. + */ +int wc_ed448ph_sign_msg(const byte* in, word32 inLen, byte* out, word32 *outLen, + ed448_key* key, const byte* context, byte contextLen) +{ + int ret = 0; + byte hash[64]; + + ret = wc_Shake256Hash(in, inLen, hash, sizeof(hash)); + if (ret == 0) { + ret = wc_ed448ph_sign_hash(hash, sizeof(hash), out, outLen, key, + context, contextLen); + } + + return ret; +} +#endif /* HAVE_ED448_SIGN */ + +#ifdef HAVE_ED448_VERIFY + +/* Verify the message using the ed448 public key. + * + * sig [in] Signature to verify. + * sigLen [in] Size of signature in bytes. + * msg [in] Message to verify. + * msgLen [in] Length of the message in bytes. + * key [in] Ed448 key to use to verify. + * type [in] Type of signature to verify: Ed448 or Ed448ph + * context [in] Context of verification. + * contextLen [in] Length of context in bytes. + * returns BAD_FUNC_ARG when a parameter is NULL or contextLen is zero when and + * context is not NULL or public key not set, + * BUFFER_E when sigLen is less than ED448_SIG_SIZE, + * other -ve values when hash fails, + * 0 otherwise. + */ +static int ed448_verify_msg(const byte* sig, word32 sigLen, const byte* msg, + word32 msgLen, int* res, ed448_key* key, + byte type, const byte* context, byte contextLen) +{ + byte rcheck[ED448_KEY_SIZE]; + byte h[ED448_SIG_SIZE]; + ge448_p2 A; + ge448_p2 R; + int ret = 0; + wc_Shake sha; + + /* sanity check on arguments */ + if ((sig == NULL) || (msg == NULL) || (res == NULL) || (key == NULL) || + ((context == NULL) && (contextLen != 0))) { + ret = BAD_FUNC_ARG; + } + + if (ret == 0) { + /* set verification failed by default */ + *res = 0; + + /* check on basics needed to verify signature */ + if (sigLen < ED448_SIG_SIZE) { + ret = BAD_FUNC_ARG; + } + } + + /* uncompress A (public key), test if valid, and negate it */ + if ((ret == 0) && (ge448_from_bytes_negate_vartime(&A, key->p) != 0)) { + ret = BAD_FUNC_ARG; + } + + if (ret == 0) { + /* find H(R,A,M) and store it as h */ + ret = wc_InitShake256(&sha, NULL, INVALID_DEVID); + if (ret == 0) { + ret = wc_Shake256_Update(&sha, ed448Ctx, ED448CTX_SIZE); + if (ret == 0) { + ret = wc_Shake256_Update(&sha, &type, sizeof(type)); + } + if (ret == 0) { + ret = wc_Shake256_Update(&sha, &contextLen, sizeof(contextLen)); + } + if (ret == 0 && context != NULL) { + ret = wc_Shake256_Update(&sha, context, contextLen); + } + if (ret == 0) { + ret = wc_Shake256_Update(&sha, sig, ED448_SIG_SIZE/2); + } + if (ret == 0) { + ret = wc_Shake256_Update(&sha, key->p, ED448_PUB_KEY_SIZE); + } + if (ret == 0) { + ret = wc_Shake256_Update(&sha, msg, msgLen); + } + if (ret == 0) { + ret = wc_Shake256_Final(&sha, h, sizeof(h)); + } + wc_Shake256_Free(&sha); + } + } + if (ret == 0) { + sc448_reduce(h); + + /* Uses a fast single-signature verification SB = R + H(R,A,M)A becomes + * SB - H(R,A,M)A saving decompression of R + */ + ret = ge448_double_scalarmult_vartime(&R, h, &A, + sig + (ED448_SIG_SIZE/2)); + } + + if (ret == 0) { + ge448_to_bytes(rcheck, &R); + + /* comparison of R created to R in sig */ + if (ConstantCompare(rcheck, sig, ED448_SIG_SIZE/2) != 0) { + ret = SIG_VERIFY_E; + } + else { + /* set the verification status */ + *res = 1; + } + } + + return ret; +} + +/* Verify the message using the ed448 public key. + * Signature type is Ed448. + * + * sig [in] Signature to verify. + * sigLen [in] Size of signature in bytes. + * msg [in] Message to verify. + * msgLen [in] Length of the message in bytes. + * key [in] Ed448 key to use to verify. + * context [in] Context of verification. + * contextLen [in] Length of context in bytes. + * returns BAD_FUNC_ARG when a parameter is NULL or contextLen is zero when and + * context is not NULL or public key not set, + * BUFFER_E when sigLen is less than ED448_SIG_SIZE, + * other -ve values when hash fails, + * 0 otherwise. + */ +int wc_ed448_verify_msg(const byte* sig, word32 sigLen, const byte* msg, + word32 msgLen, int* res, ed448_key* key, + const byte* context, byte contextLen) +{ + return ed448_verify_msg(sig, sigLen, msg, msgLen, res, key, Ed448, + context, contextLen); +} + +/* Verify the hash using the ed448 public key. + * Signature type is Ed448ph. + * + * sig [in] Signature to verify. + * sigLen [in] Size of signature in bytes. + * hash [in] Hash of message to verify. + * hashLen [in] Length of the hash in bytes. + * key [in] Ed448 key to use to verify. + * context [in] Context of verification. + * contextLen [in] Length of context in bytes. + * returns BAD_FUNC_ARG when a parameter is NULL or contextLen is zero when and + * context is not NULL or public key not set, + * BUFFER_E when sigLen is less than ED448_SIG_SIZE, + * other -ve values when hash fails, + * 0 otherwise. + */ +int wc_ed448ph_verify_hash(const byte* sig, word32 sigLen, const byte* hash, + word32 hashLen, int* res, ed448_key* key, + const byte* context, byte contextLen) +{ + return ed448_verify_msg(sig, sigLen, hash, hashLen, res, key, Ed448ph, + context, contextLen); +} + +/* Verify the message using the ed448 public key. + * Signature type is Ed448ph. + * + * sig [in] Signature to verify. + * sigLen [in] Size of signature in bytes. + * msg [in] Message to verify. + * msgLen [in] Length of the message in bytes. + * key [in] Ed448 key to use to verify. + * context [in] Context of verification. + * contextLen [in] Length of context in bytes. + * returns BAD_FUNC_ARG when a parameter is NULL or contextLen is zero when and + * context is not NULL or public key not set, + * BUFFER_E when sigLen is less than ED448_SIG_SIZE, + * other -ve values when hash fails, + * 0 otherwise. + */ +int wc_ed448ph_verify_msg(const byte* sig, word32 sigLen, const byte* msg, + word32 msgLen, int* res, ed448_key* key, + const byte* context, byte contextLen) +{ + int ret = 0; + byte hash[64]; + + ret = wc_Shake256Hash(msg, msgLen, hash, sizeof(hash)); + if (ret == 0) { + ret = wc_ed448ph_verify_hash(sig, sigLen, hash, sizeof(hash), res, key, + context, contextLen); + } + + return ret; +} +#endif /* HAVE_ED448_VERIFY */ + +/* Initialize the ed448 private/public key. + * + * key [in] Ed448 key. + * returns BAD_FUNC_ARG when key is NULL + */ +int wc_ed448_init(ed448_key* key) +{ + int ret = 0; + + if (key == NULL) { + ret = BAD_FUNC_ARG; + } + else { + XMEMSET(key, 0, sizeof(ed448_key)); + + fe448_init(); + } + + return ret; +} + + +/* Clears the ed448 key data + * + * key [in] Ed448 key. + */ +void wc_ed448_free(ed448_key* key) +{ + if (key != NULL) { + ForceZero(key, sizeof(ed448_key)); + } +} + + +#ifdef HAVE_ED448_KEY_EXPORT + +/* Export the ed448 public key. + * + * key [in] Ed448 public key. + * out [in] Array to hold public key. + * outLen [in/out] On in, the number of bytes in array. + * On out, the number bytes put into array. + * returns BAD_FUNC_ARG when a parameter is NULL, + * ECC_BAD_ARG_E when outLen is less than ED448_PUB_KEY_SIZE, + * 0 otherwise. + */ +int wc_ed448_export_public(ed448_key* key, byte* out, word32* outLen) +{ + int ret = 0; + + /* sanity check on arguments */ + if ((key == NULL) || (out == NULL) || (outLen == NULL)) { + ret = BAD_FUNC_ARG; + } + + if ((ret == 0) && (*outLen < ED448_PUB_KEY_SIZE)) { + *outLen = ED448_PUB_KEY_SIZE; + ret = BUFFER_E; + } + + if (ret == 0) { + *outLen = ED448_PUB_KEY_SIZE; + XMEMCPY(out, key->p, ED448_PUB_KEY_SIZE); + } + + return ret; +} + +#endif /* HAVE_ED448_KEY_EXPORT */ + + +#ifdef HAVE_ED448_KEY_IMPORT +/* Import a compressed or uncompressed ed448 public key from a byte array. + * Public key encoded in big-endian. + * + * in [in] Array holding public key. + * inLen [in] Number of bytes of data in array. + * key [in] Ed448 public key. + * returns BAD_FUNC_ARG when a parameter is NULL or key format is not supported, + * 0 otherwise. + */ +int wc_ed448_import_public(const byte* in, word32 inLen, ed448_key* key) +{ + int ret = 0; + + /* sanity check on arguments */ + if ((in == NULL) || (key == NULL)) { + ret = BAD_FUNC_ARG; + } + + if (ret == 0) { + /* compressed prefix according to draft + * https://tools.ietf.org/html/draft-ietf-openpgp-rfc4880bis-06 */ + if (in[0] == 0x40 && inLen > ED448_PUB_KEY_SIZE) { + /* key is stored in compressed format so just copy in */ + XMEMCPY(key->p, (in + 1), ED448_PUB_KEY_SIZE); + key->pubKeySet = 1; + } + /* importing uncompressed public key */ + else if (in[0] == 0x04 && inLen > 2*ED448_PUB_KEY_SIZE) { + /* pass in (x,y) and store compressed key */ + ret = ge448_compress_key(key->p, in+1, in+1+ED448_PUB_KEY_SIZE); + if (ret == 0) + key->pubKeySet = 1; + } + else if (inLen == ED448_PUB_KEY_SIZE) { + /* if not specified compressed or uncompressed check key size + * if key size is equal to compressed key size copy in key */ + XMEMCPY(key->p, in, ED448_PUB_KEY_SIZE); + key->pubKeySet = 1; + } + else { + /* bad public key format */ + ret = BAD_FUNC_ARG; + } + } + + return ret; +} + + +/* Import an ed448 private key from a byte array. + * + * priv [in] Array holding private key. + * privSz [in] Number of bytes of data in array. + * key [in] Ed448 private key. + * returns BAD_FUNC_ARG when a parameter is NULL or privSz is less than + * ED448_KEY_SIZE, + * 0 otherwise. + */ +int wc_ed448_import_private_only(const byte* priv, word32 privSz, + ed448_key* key) +{ + int ret = 0; + + /* sanity check on arguments */ + if ((priv == NULL) || (key == NULL)) { + ret = BAD_FUNC_ARG; + } + + /* key size check */ + if ((ret == 0) && (privSz < ED448_KEY_SIZE)) { + ret = BAD_FUNC_ARG; + } + + if (ret == 0) { + XMEMCPY(key->k, priv, ED448_KEY_SIZE); + } + + return ret; +} + +/* Import an ed448 private and public keys from a byte arrays. + * + * priv [in] Array holding private key. + * privSz [in] Number of bytes of data in private key array. + * pub [in] Array holding private key. + * pubSz [in] Number of bytes of data in public key array. + * key [in] Ed448 private/public key. + * returns BAD_FUNC_ARG when a parameter is NULL or privSz is less than + * ED448_KEY_SIZE or pubSz is less than ED448_PUB_KEY_SIZE, + * 0 otherwise. + */ +int wc_ed448_import_private_key(const byte* priv, word32 privSz, + const byte* pub, word32 pubSz, ed448_key* key) +{ + int ret = 0; + + /* sanity check on arguments */ + if ((priv == NULL) || (pub == NULL) || (key == NULL)) { + ret = BAD_FUNC_ARG; + } + + /* key size check */ + if ((ret == 0) && (privSz < ED448_KEY_SIZE || pubSz < ED448_PUB_KEY_SIZE)) { + ret = BAD_FUNC_ARG; + } + + if (ret == 0) { + /* import public key */ + ret = wc_ed448_import_public(pub, pubSz, key); + } + if (ret == 0) { + /* make the private key (priv + pub) */ + XMEMCPY(key->k, priv, ED448_KEY_SIZE); + XMEMCPY(key->k + ED448_KEY_SIZE, key->p, ED448_PUB_KEY_SIZE); + } + + return ret; +} + +#endif /* HAVE_ED448_KEY_IMPORT */ + + +#ifdef HAVE_ED448_KEY_EXPORT + +/* Export the ed448 private key. + * + * key [in] Ed448 private key. + * out [in] Array to hold private key. + * outLen [in/out] On in, the number of bytes in array. + * On out, the number bytes put into array. + * returns BAD_FUNC_ARG when a parameter is NULL, + * ECC_BAD_ARG_E when outLen is less than ED448_KEY_SIZE, + * 0 otherwise. + */ +int wc_ed448_export_private_only(ed448_key* key, byte* out, word32* outLen) +{ + int ret = 0; + + /* sanity checks on arguments */ + if ((key == NULL) || (out == NULL) || (outLen == NULL)) { + ret = BAD_FUNC_ARG; + } + + if ((ret == 0) && (*outLen < ED448_KEY_SIZE)) { + *outLen = ED448_KEY_SIZE; + ret = BUFFER_E; + } + + if (ret == 0) { + *outLen = ED448_KEY_SIZE; + XMEMCPY(out, key->k, ED448_KEY_SIZE); + } + + return ret; +} + +/* Export the ed448 private and public key. + * + * key [in] Ed448 private/public key. + * out [in] Array to hold private and public key. + * outLen [in/out] On in, the number of bytes in array. + * On out, the number bytes put into array. + * returns BAD_FUNC_ARG when a parameter is NULL, + * BUFFER_E when outLen is less than ED448_PRV_KEY_SIZE, + * 0 otherwise. + */ +int wc_ed448_export_private(ed448_key* key, byte* out, word32* outLen) +{ + int ret = 0; + + /* sanity checks on arguments */ + if ((key == NULL) || (out == NULL) || (outLen == NULL)) { + ret = BAD_FUNC_ARG; + } + + if ((ret == 0) && (*outLen < ED448_PRV_KEY_SIZE)) { + *outLen = ED448_PRV_KEY_SIZE; + ret = BUFFER_E; + } + + if (ret == 0) { + *outLen = ED448_PRV_KEY_SIZE; + XMEMCPY(out, key->k, ED448_PRV_KEY_SIZE); + } + + return ret; +} + +/* Export the ed448 private and public key. + * + * key [in] Ed448 private/public key. + * priv [in] Array to hold private key. + * privSz [in/out] On in, the number of bytes in private key array. + * pub [in] Array to hold public key. + * pubSz [in/out] On in, the number of bytes in public key array. + * On out, the number bytes put into array. + * returns BAD_FUNC_ARG when a parameter is NULL, + * BUFFER_E when privSz is less than ED448_PRV_KEY_SIZE or pubSz is less + * than ED448_PUB_KEY_SIZE, + * 0 otherwise. + */ +int wc_ed448_export_key(ed448_key* key, byte* priv, word32 *privSz, + byte* pub, word32 *pubSz) +{ + int ret = 0; + + /* export 'full' private part */ + ret = wc_ed448_export_private(key, priv, privSz); + if (ret == 0) { + /* export public part */ + ret = wc_ed448_export_public(key, pub, pubSz); + } + + return ret; +} + +#endif /* HAVE_ED448_KEY_EXPORT */ + +/* Check the public key of the ed448 key matches the private key. + * + * key [in] Ed448 private/public key. + * returns BAD_FUNC_ARG when key is NULL, + * PUBLIC_KEY_E when the public key is not set or doesn't match, + * other -ve value on hash failure, + * 0 otherwise. + */ +int wc_ed448_check_key(ed448_key* key) +{ + int ret = 0; + unsigned char pubKey[ED448_PUB_KEY_SIZE]; + + if (key == NULL) { + ret = BAD_FUNC_ARG; + } + + if (!key->pubKeySet) { + ret = PUBLIC_KEY_E; + } + if (ret == 0) { + ret = wc_ed448_make_public(key, pubKey, sizeof(pubKey)); + } + if ((ret == 0) && (XMEMCMP(pubKey, key->p, ED448_PUB_KEY_SIZE) != 0)) { + ret = PUBLIC_KEY_E; + } + + return ret; +} + +/* Returns the size of an ed448 private key. + * + * key [in] Ed448 private/public key. + * returns BAD_FUNC_ARG when key is NULL, + * ED448_KEY_SIZE otherwise. + */ +int wc_ed448_size(ed448_key* key) +{ + int ret = ED448_KEY_SIZE; + + if (key == NULL) { + ret = BAD_FUNC_ARG; + } + + return ret; +} + +/* Returns the size of an ed448 private plus public key. + * + * key [in] Ed448 private/public key. + * returns BAD_FUNC_ARG when key is NULL, + * ED448_PRV_KEY_SIZE otherwise. + */ +int wc_ed448_priv_size(ed448_key* key) +{ + int ret = ED448_PRV_KEY_SIZE; + + if (key == NULL) { + ret = BAD_FUNC_ARG; + } + + return ret; +} + +/* Returns the size of an ed448 public key. + * + * key [in] Ed448 private/public key. + * returns BAD_FUNC_ARG when key is NULL, + * ED448_PUB_KEY_SIZE otherwise. + */ +int wc_ed448_pub_size(ed448_key* key) +{ + int ret = ED448_PUB_KEY_SIZE; + + if (key == NULL) { + ret = BAD_FUNC_ARG; + } + + return ret; +} + +/* Returns the size of an ed448 signature. + * + * key [in] Ed448 private/public key. + * returns BAD_FUNC_ARG when key is NULL, + * ED448_SIG_SIZE otherwise. + */ +int wc_ed448_sig_size(ed448_key* key) +{ + int ret = ED448_SIG_SIZE; + + if (key == NULL) { + ret = BAD_FUNC_ARG; + } + + return ret; +} + +#endif /* HAVE_ED448 */ + diff --git a/client/wolfssl/wolfcrypt/src/error.c b/client/wolfssl/wolfcrypt/src/error.c new file mode 100644 index 0000000..87ded35 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/error.c @@ -0,0 +1,530 @@ +/* error.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#include + +#ifdef _MSC_VER + /* 4996 warning to use MS extensions e.g., strcpy_s instead of XSTRNCPY */ + #pragma warning(disable: 4996) +#endif + +#ifndef NO_ERROR_STRINGS +const char* wc_GetErrorString(int error) +{ + switch (error) { + + case OPEN_RAN_E : + return "opening random device error"; + + case READ_RAN_E : + return "reading random device error"; + + case WINCRYPT_E : + return "windows crypt init error"; + + case CRYPTGEN_E : + return "windows crypt generation error"; + + case RAN_BLOCK_E : + return "random device read would block error"; + + case BAD_MUTEX_E : + return "Bad mutex, operation failed"; + + case WC_TIMEOUT_E: + return "Timeout error"; + + case WC_PENDING_E: + return "wolfCrypt Operation Pending (would block / eagain) error"; + + case WC_NOT_PENDING_E: + return "wolfCrypt operation not pending error"; + + case MP_INIT_E : + return "mp_init error state"; + + case MP_READ_E : + return "mp_read error state"; + + case MP_EXPTMOD_E : + return "mp_exptmod error state"; + + case MP_TO_E : + return "mp_to_xxx error state, can't convert"; + + case MP_SUB_E : + return "mp_sub error state, can't subtract"; + + case MP_ADD_E : + return "mp_add error state, can't add"; + + case MP_MUL_E : + return "mp_mul error state, can't multiply"; + + case MP_MULMOD_E : + return "mp_mulmod error state, can't multiply mod"; + + case MP_MOD_E : + return "mp_mod error state, can't mod"; + + case MP_INVMOD_E : + return "mp_invmod error state, can't inv mod"; + + case MP_CMP_E : + return "mp_cmp error state"; + + case MP_ZERO_E : + return "mp zero result, not expected"; + + case MEMORY_E : + return "out of memory error"; + + case VAR_STATE_CHANGE_E : + return "Variable state modified by different thread"; + + case RSA_WRONG_TYPE_E : + return "RSA wrong block type for RSA function"; + + case RSA_BUFFER_E : + return "RSA buffer error, output too small or input too big"; + + case BUFFER_E : + return "Buffer error, output too small or input too big"; + + case ALGO_ID_E : + return "Setting Cert AlgoID error"; + + case PUBLIC_KEY_E : + return "Setting Cert Public Key error"; + + case DATE_E : + return "Setting Cert Date validity error"; + + case SUBJECT_E : + return "Setting Cert Subject name error"; + + case ISSUER_E : + return "Setting Cert Issuer name error"; + + case CA_TRUE_E : + return "Setting basic constraint CA true error"; + + case EXTENSIONS_E : + return "Setting extensions error"; + + case ASN_PARSE_E : + return "ASN parsing error, invalid input"; + + case ASN_VERSION_E : + return "ASN version error, invalid number"; + + case ASN_GETINT_E : + return "ASN get big int error, invalid data"; + + case ASN_RSA_KEY_E : + return "ASN key init error, invalid input"; + + case ASN_OBJECT_ID_E : + return "ASN object id error, invalid id"; + + case ASN_TAG_NULL_E : + return "ASN tag error, not null"; + + case ASN_EXPECT_0_E : + return "ASN expect error, not zero"; + + case ASN_BITSTR_E : + return "ASN bit string error, wrong id"; + + case ASN_UNKNOWN_OID_E : + return "ASN oid error, unknown sum id"; + + case ASN_DATE_SZ_E : + return "ASN date error, bad size"; + + case ASN_BEFORE_DATE_E : + return "ASN date error, current date before"; + + case ASN_AFTER_DATE_E : + return "ASN date error, current date after"; + + case ASN_SIG_OID_E : + return "ASN signature error, mismatched oid"; + + case ASN_TIME_E : + return "ASN time error, unknown time type"; + + case ASN_INPUT_E : + return "ASN input error, not enough data"; + + case ASN_SIG_CONFIRM_E : + return "ASN sig error, confirm failure"; + + case ASN_SIG_HASH_E : + return "ASN sig error, unsupported hash type"; + + case ASN_SIG_KEY_E : + return "ASN sig error, unsupported key type"; + + case ASN_DH_KEY_E : + return "ASN key init error, invalid input"; + + case ASN_NTRU_KEY_E : + return "ASN NTRU key decode error, invalid input"; + + case ASN_CRIT_EXT_E: + return "X.509 Critical extension ignored or invalid"; + + case ASN_ALT_NAME_E: + return "ASN alternate name error"; + + case ECC_BAD_ARG_E : + return "ECC input argument wrong type, invalid input"; + + case ASN_ECC_KEY_E : + return "ECC ASN1 bad key data, invalid input"; + + case ECC_CURVE_OID_E : + return "ECC curve sum OID unsupported, invalid input"; + + case BAD_FUNC_ARG : + return "Bad function argument"; + + case NOT_COMPILED_IN : + return "Feature not compiled in"; + + case UNICODE_SIZE_E : + return "Unicode password too big"; + + case NO_PASSWORD : + return "No password provided by user"; + + case ALT_NAME_E : + return "Alt Name problem, too big"; + + case AES_GCM_AUTH_E: + return "AES-GCM Authentication check fail"; + + case AES_CCM_AUTH_E: + return "AES-CCM Authentication check fail"; + + case ASYNC_INIT_E: + return "Async Init error"; + + case COMPRESS_INIT_E: + return "Compress Init error"; + + case COMPRESS_E: + return "Compress error"; + + case DECOMPRESS_INIT_E: + return "DeCompress Init error"; + + case DECOMPRESS_E: + return "DeCompress error"; + + case BAD_ALIGN_E: + return "Bad alignment error, no alloc help"; + + case ASN_NO_SIGNER_E : + return "ASN no signer error to confirm failure"; + + case ASN_CRL_CONFIRM_E : + return "ASN CRL sig error, confirm failure"; + + case ASN_CRL_NO_SIGNER_E : + return "ASN CRL no signer error to confirm failure"; + + case ASN_OCSP_CONFIRM_E : + return "ASN OCSP sig error, confirm failure"; + + case ASN_NO_PEM_HEADER: + return "ASN no PEM Header Error"; + + case BAD_STATE_E: + return "Bad state operation"; + + case BAD_PADDING_E: + return "Bad padding, message wrong length"; + + case REQ_ATTRIBUTE_E: + return "Setting cert request attributes error"; + + case PKCS7_OID_E: + return "PKCS#7 error: mismatched OID value"; + + case PKCS7_RECIP_E: + return "PKCS#7 error: no matching recipient found"; + + case WC_PKCS7_WANT_READ_E: + return "PKCS#7 operations wants more input, call again"; + + case FIPS_NOT_ALLOWED_E: + return "FIPS mode not allowed error"; + + case ASN_NAME_INVALID_E: + return "Name Constraint error"; + + case RNG_FAILURE_E: + return "Random Number Generator failed"; + + case HMAC_MIN_KEYLEN_E: + return "FIPS Mode HMAC Minimum Key Length error"; + + case RSA_PAD_E: + return "Rsa Padding error"; + + case LENGTH_ONLY_E: + return "Output length only set, not for other use error"; + + case IN_CORE_FIPS_E: + return "In Core Integrity check FIPS error"; + + case AES_KAT_FIPS_E: + return "AES Known Answer Test check FIPS error"; + + case DES3_KAT_FIPS_E: + return "DES3 Known Answer Test check FIPS error"; + + case HMAC_KAT_FIPS_E: + return "HMAC Known Answer Test check FIPS error"; + + case RSA_KAT_FIPS_E: + return "RSA Known Answer Test check FIPS error"; + + case DRBG_KAT_FIPS_E: + return "DRBG Known Answer Test check FIPS error"; + + case DRBG_CONT_FIPS_E: + return "DRBG Continuous Test FIPS error"; + + case AESGCM_KAT_FIPS_E: + return "AESGCM Known Answer Test check FIPS error"; + + case THREAD_STORE_KEY_E: + return "Thread Storage Key Create error"; + + case THREAD_STORE_SET_E: + return "Thread Storage Set error"; + + case MAC_CMP_FAILED_E: + return "MAC comparison failed"; + + case IS_POINT_E: + return "ECC is point on curve failed"; + + case ECC_INF_E: + return " ECC point at infinity error"; + + case ECC_OUT_OF_RANGE_E: + return " ECC Qx or Qy out of range error"; + + case ECC_PRIV_KEY_E: + return " ECC private key is not valid error"; + + case SRP_CALL_ORDER_E: + return "SRP function called in the wrong order error"; + + case SRP_VERIFY_E: + return "SRP proof verification error"; + + case SRP_BAD_KEY_E: + return "SRP bad key values error"; + + case ASN_NO_SKID: + return "ASN no Subject Key Identifier found error"; + + case ASN_NO_AKID: + return "ASN no Authority Key Identifier found error"; + + case ASN_NO_KEYUSAGE: + return "ASN no Key Usage found error"; + + case SKID_E: + return "Setting Subject Key Identifier error"; + + case AKID_E: + return "Setting Authority Key Identifier error"; + + case KEYUSAGE_E: + return "Key Usage value error"; + + case EXTKEYUSAGE_E: + return "Extended Key Usage value error"; + + case CERTPOLICIES_E: + return "Setting Certificate Policies error"; + + case WC_INIT_E: + return "wolfCrypt Initialize Failure error"; + + case SIG_VERIFY_E: + return "Signature verify error"; + + case BAD_COND_E: + return "Bad condition variable operation error"; + + case SIG_TYPE_E: + return "Signature type not enabled/available"; + + case HASH_TYPE_E: + return "Hash type not enabled/available"; + + case WC_KEY_SIZE_E: + return "Key size error, either too small or large"; + + case ASN_COUNTRY_SIZE_E: + return "Country code size error, either too small or large"; + + case MISSING_RNG_E: + return "RNG required but not provided"; + + case ASN_PATHLEN_SIZE_E: + return "ASN CA path length value too large error"; + + case ASN_PATHLEN_INV_E: + return "ASN CA path length larger than signer error"; + + case BAD_KEYWRAP_ALG_E: + return "Unsupported key wrap algorithm error"; + + case BAD_KEYWRAP_IV_E: + return "Decrypted AES key wrap IV does not match expected"; + + case WC_CLEANUP_E: + return "wolfcrypt cleanup failed"; + + case ECC_CDH_KAT_FIPS_E: + return "wolfcrypt FIPS ECC CDH Known Answer Test Failure"; + + case DH_CHECK_PUB_E: + return "DH Check Public Key failure"; + + case BAD_PATH_ERROR: + return "Bad path for opendir error"; + + case ASYNC_OP_E: + return "Async operation error"; + + case BAD_OCSP_RESPONDER: + return "Invalid OCSP Responder, missing specific key usage extensions"; + + case ECC_PRIVATEONLY_E: + return "Invalid use of private only ECC key"; + + case WC_HW_E: + return "Error with hardware crypto use"; + + case WC_HW_WAIT_E: + return "Hardware waiting on resource"; + + case PSS_SALTLEN_E: + return "PSS - Length of salt is too big for hash algorithm"; + + case PRIME_GEN_E: + return "Unable to find a prime for RSA key"; + + case BER_INDEF_E: + return "Unable to decode an indefinite length encoded message"; + + case RSA_OUT_OF_RANGE_E: + return "Ciphertext to decrypt is out of range"; + + case RSAPSS_PAT_FIPS_E: + return "wolfcrypt FIPS RSA-PSS Pairwise Agreement Test Failure"; + + case ECDSA_PAT_FIPS_E: + return "wolfcrypt FIPS ECDSA Pairwise Agreement Test Failure"; + + case DH_KAT_FIPS_E: + return "wolfcrypt FIPS DH Known Answer Test Failure"; + + case AESCCM_KAT_FIPS_E: + return "AESCCM Known Answer Test check FIPS error"; + + case SHA3_KAT_FIPS_E: + return "SHA-3 Known Answer Test check FIPS error"; + + case ECDHE_KAT_FIPS_E: + return "wolfcrypt FIPS ECDHE Known Answer Test Failure"; + + case AES_GCM_OVERFLOW_E: + return "AES-GCM invocation counter overflow"; + + case AES_CCM_OVERFLOW_E: + return "AES-CCM invocation counter overflow"; + + case RSA_KEY_PAIR_E: + return "RSA Key Pair-Wise Consistency check fail"; + + case DH_CHECK_PRIV_E: + return "DH Check Private Key failure"; + + case WC_AFALG_SOCK_E: + return "AF_ALG socket error"; + + case WC_DEVCRYPTO_E: + return "Error with /dev/crypto"; + + case ZLIB_INIT_ERROR: + return "zlib init error"; + + case ZLIB_COMPRESS_ERROR: + return "zlib compress error"; + + case ZLIB_DECOMPRESS_ERROR: + return "zlib decompress error"; + + case PKCS7_NO_SIGNER_E: + return "No signer in PKCS#7 signed data"; + + case CRYPTOCB_UNAVAILABLE: + return "Crypto callback unavailable"; + + case PKCS7_SIGNEEDS_CHECK: + return "Signature found but no certificate to verify"; + + case PSS_SALTLEN_RECOVER_E: + return "PSS - Salt length unable to be recovered"; + + case ASN_SELF_SIGNED_E: + return "ASN self-signed certificate error"; + + default: + return "unknown error number"; + + } +} + +void wc_ErrorString(int error, char* buffer) +{ + XSTRNCPY(buffer, wc_GetErrorString(error), WOLFSSL_MAX_ERROR_SZ); + buffer[WOLFSSL_MAX_ERROR_SZ-1] = 0; +} +#endif /* !NO_ERROR_STRINGS */ + diff --git a/client/wolfssl/wolfcrypt/src/evp.c b/client/wolfssl/wolfcrypt/src/evp.c new file mode 100644 index 0000000..d920790 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/evp.c @@ -0,0 +1,6595 @@ +/* evp.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#if !defined(WOLFSSL_EVP_INCLUDED) + #ifndef WOLFSSL_IGNORE_FILE_WARN + #warning evp.c does not need to be compiled separately from ssl.c + #endif +#elif defined(WOLFCRYPT_ONLY) +#else + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#include +#include + +#if defined(OPENSSL_EXTRA) + +#ifndef NO_AES + #ifdef HAVE_AES_CBC + #ifdef WOLFSSL_AES_128 + static char *EVP_AES_128_CBC = NULL; + #endif + #ifdef WOLFSSL_AES_192 + static char *EVP_AES_192_CBC = NULL; + #endif + #ifdef WOLFSSL_AES_256 + static char *EVP_AES_256_CBC = NULL; + #endif + #endif /* HAVE_AES_CBC */ + + #ifdef WOLFSSL_AES_OFB + #ifdef WOLFSSL_AES_128 + static char *EVP_AES_128_OFB = NULL; + #endif + #ifdef WOLFSSL_AES_192 + static char *EVP_AES_192_OFB = NULL; + #endif + #ifdef WOLFSSL_AES_256 + static char *EVP_AES_256_OFB = NULL; + #endif + #endif /* WOLFSSL_AES_OFB */ + + #ifdef WOLFSSL_AES_XTS + #ifdef WOLFSSL_AES_128 + static char *EVP_AES_128_XTS = NULL; + #endif + #ifdef WOLFSSL_AES_256 + static char *EVP_AES_256_XTS = NULL; + #endif + #endif /* WOLFSSL_AES_XTS */ + + #ifdef WOLFSSL_AES_CFB + #ifdef WOLFSSL_AES_128 + static char *EVP_AES_128_CFB1 = NULL; + #endif + #ifdef WOLFSSL_AES_192 + static char *EVP_AES_192_CFB1 = NULL; + #endif + #ifdef WOLFSSL_AES_256 + static char *EVP_AES_256_CFB1 = NULL; + #endif + + #ifdef WOLFSSL_AES_128 + static char *EVP_AES_128_CFB8 = NULL; + #endif + #ifdef WOLFSSL_AES_192 + static char *EVP_AES_192_CFB8 = NULL; + #endif + #ifdef WOLFSSL_AES_256 + static char *EVP_AES_256_CFB8 = NULL; + #endif + + #ifdef WOLFSSL_AES_128 + static char *EVP_AES_128_CFB128 = NULL; + #endif + #ifdef WOLFSSL_AES_192 + static char *EVP_AES_192_CFB128 = NULL; + #endif + #ifdef WOLFSSL_AES_256 + static char *EVP_AES_256_CFB128 = NULL; + #endif + #endif /* WOLFSSL_AES_CFB */ + + #ifdef HAVE_AESGCM + #ifdef WOLFSSL_AES_128 + static char *EVP_AES_128_GCM = NULL; + #endif + #ifdef WOLFSSL_AES_192 + static char *EVP_AES_192_GCM = NULL; + #endif + #ifdef WOLFSSL_AES_256 + static char *EVP_AES_256_GCM = NULL; + #endif + #endif /* HAVE_AESGCM */ + #ifdef WOLFSSL_AES_128 + static char *EVP_AES_128_CTR = NULL; + #endif + #ifdef WOLFSSL_AES_192 + static char *EVP_AES_192_CTR = NULL; + #endif + #ifdef WOLFSSL_AES_256 + static char *EVP_AES_256_CTR = NULL; + #endif + + #ifdef WOLFSSL_AES_128 + static char *EVP_AES_128_ECB = NULL; + #endif + #ifdef WOLFSSL_AES_192 + static char *EVP_AES_192_ECB = NULL; + #endif + #ifdef WOLFSSL_AES_256 + static char *EVP_AES_256_ECB = NULL; + #endif + #define EVP_AES_SIZE 11 + #ifdef WOLFSSL_AES_CFB + #define EVP_AESCFB_SIZE 14 + #endif +#endif + +#ifndef NO_DES3 + static char *EVP_DES_CBC = NULL; + static char *EVP_DES_ECB = NULL; + + static char *EVP_DES_EDE3_CBC = NULL; + static char *EVP_DES_EDE3_ECB = NULL; + + #define EVP_DES_SIZE 7 + #define EVP_DES_EDE3_SIZE 12 +#endif + +#ifdef HAVE_IDEA + static char *EVP_IDEA_CBC; + #define EVP_IDEA_SIZE 8 +#endif + +static unsigned int cipherType(const WOLFSSL_EVP_CIPHER *cipher); + + +/* Getter function for cipher key length + * + * c WOLFSSL_EVP_CIPHER structure to get key length from + * + * NOTE: OpenSSL_add_all_ciphers() should be called first before using this + * function + * + * Returns size of key in bytes + */ +int wolfSSL_EVP_Cipher_key_length(const WOLFSSL_EVP_CIPHER* c) +{ + WOLFSSL_ENTER("wolfSSL_EVP_Cipher_key_length"); + + if (c == NULL) { + return 0; + } + + switch (cipherType(c)) { +#if !defined(NO_AES) + #if defined(HAVE_AES_CBC) + case AES_128_CBC_TYPE: return 16; + case AES_192_CBC_TYPE: return 24; + case AES_256_CBC_TYPE: return 32; + #endif + #if defined(WOLFSSL_AES_CFB) + case AES_128_CFB1_TYPE: return 16; + case AES_192_CFB1_TYPE: return 24; + case AES_256_CFB1_TYPE: return 32; + case AES_128_CFB8_TYPE: return 16; + case AES_192_CFB8_TYPE: return 24; + case AES_256_CFB8_TYPE: return 32; + case AES_128_CFB128_TYPE: return 16; + case AES_192_CFB128_TYPE: return 24; + case AES_256_CFB128_TYPE: return 32; + #endif + #if defined(WOLFSSL_AES_OFB) + case AES_128_OFB_TYPE: return 16; + case AES_192_OFB_TYPE: return 24; + case AES_256_OFB_TYPE: return 32; + #endif + #if defined(WOLFSSL_AES_XTS) + case AES_128_XTS_TYPE: return 16; + case AES_256_XTS_TYPE: return 32; + #endif + #if defined(HAVE_AESGCM) + case AES_128_GCM_TYPE: return 16; + case AES_192_GCM_TYPE: return 24; + case AES_256_GCM_TYPE: return 32; + #endif + #if defined(WOLFSSL_AES_COUNTER) + case AES_128_CTR_TYPE: return 16; + case AES_192_CTR_TYPE: return 24; + case AES_256_CTR_TYPE: return 32; + #endif + #if defined(HAVE_AES_ECB) + case AES_128_ECB_TYPE: return 16; + case AES_192_ECB_TYPE: return 24; + case AES_256_ECB_TYPE: return 32; + #endif +#endif /* !NO_AES */ + #ifndef NO_DES3 + case DES_CBC_TYPE: return 8; + case DES_EDE3_CBC_TYPE: return 24; + case DES_ECB_TYPE: return 8; + case DES_EDE3_ECB_TYPE: return 24; + #endif + default: + return 0; + } +} + + +int wolfSSL_EVP_EncryptInit(WOLFSSL_EVP_CIPHER_CTX* ctx, + const WOLFSSL_EVP_CIPHER* type, + const unsigned char* key, + const unsigned char* iv) +{ + return wolfSSL_EVP_CipherInit(ctx, type, (byte*)key, (byte*)iv, 1); +} + +int wolfSSL_EVP_EncryptInit_ex(WOLFSSL_EVP_CIPHER_CTX* ctx, + const WOLFSSL_EVP_CIPHER* type, + WOLFSSL_ENGINE *impl, + const unsigned char* key, + const unsigned char* iv) +{ + (void) impl; + return wolfSSL_EVP_CipherInit(ctx, type, (byte*)key, (byte*)iv, 1); +} + +int wolfSSL_EVP_DecryptInit(WOLFSSL_EVP_CIPHER_CTX* ctx, + const WOLFSSL_EVP_CIPHER* type, + const unsigned char* key, + const unsigned char* iv) +{ + WOLFSSL_ENTER("wolfSSL_EVP_CipherInit"); + return wolfSSL_EVP_CipherInit(ctx, type, (byte*)key, (byte*)iv, 0); +} + +int wolfSSL_EVP_DecryptInit_ex(WOLFSSL_EVP_CIPHER_CTX* ctx, + const WOLFSSL_EVP_CIPHER* type, + WOLFSSL_ENGINE *impl, + const unsigned char* key, + const unsigned char* iv) +{ + (void) impl; + WOLFSSL_ENTER("wolfSSL_EVP_DecryptInit"); + return wolfSSL_EVP_CipherInit(ctx, type, (byte*)key, (byte*)iv, 0); +} + + +WOLFSSL_EVP_CIPHER_CTX *wolfSSL_EVP_CIPHER_CTX_new(void) +{ + WOLFSSL_EVP_CIPHER_CTX *ctx = (WOLFSSL_EVP_CIPHER_CTX*)XMALLOC(sizeof *ctx, + NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (ctx) { + WOLFSSL_ENTER("wolfSSL_EVP_CIPHER_CTX_new"); + wolfSSL_EVP_CIPHER_CTX_init(ctx); + } + return ctx; +} + +void wolfSSL_EVP_CIPHER_CTX_free(WOLFSSL_EVP_CIPHER_CTX *ctx) +{ + if (ctx) { + WOLFSSL_ENTER("wolfSSL_EVP_CIPHER_CTX_free"); + wolfSSL_EVP_CIPHER_CTX_cleanup(ctx); + XFREE(ctx, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +} + +int wolfSSL_EVP_CIPHER_CTX_reset(WOLFSSL_EVP_CIPHER_CTX *ctx) +{ + int ret = WOLFSSL_FAILURE; + + if (ctx != NULL) { + WOLFSSL_ENTER("wolfSSL_EVP_CIPHER_CTX_reset"); + wolfSSL_EVP_CIPHER_CTX_cleanup(ctx); + ret = WOLFSSL_SUCCESS; + } + + return ret; +} + +unsigned long wolfSSL_EVP_CIPHER_CTX_mode(const WOLFSSL_EVP_CIPHER_CTX *ctx) +{ + if (ctx == NULL) return 0; + return ctx->flags & WOLFSSL_EVP_CIPH_MODE; +} + +int wolfSSL_EVP_EncryptFinal(WOLFSSL_EVP_CIPHER_CTX *ctx, + unsigned char *out, int *outl) +{ + if (ctx && ctx->enc) { + WOLFSSL_ENTER("wolfSSL_EVP_EncryptFinal"); + return wolfSSL_EVP_CipherFinal(ctx, out, outl); + } + else + return WOLFSSL_FAILURE; +} + + +int wolfSSL_EVP_CipherInit_ex(WOLFSSL_EVP_CIPHER_CTX* ctx, + const WOLFSSL_EVP_CIPHER* type, + WOLFSSL_ENGINE *impl, + const unsigned char* key, + const unsigned char* iv, + int enc) +{ + (void)impl; + return wolfSSL_EVP_CipherInit(ctx, type, key, iv, enc); +} + +int wolfSSL_EVP_EncryptFinal_ex(WOLFSSL_EVP_CIPHER_CTX *ctx, + unsigned char *out, int *outl) +{ + if (ctx && ctx->enc) { + WOLFSSL_ENTER("wolfSSL_EVP_EncryptFinal_ex"); + return wolfSSL_EVP_CipherFinal(ctx, out, outl); + } + else + return WOLFSSL_FAILURE; +} + +int wolfSSL_EVP_DecryptFinal(WOLFSSL_EVP_CIPHER_CTX *ctx, + unsigned char *out, int *outl) +{ + if (ctx && !ctx->enc) { + WOLFSSL_ENTER("wolfSSL_EVP_DecryptFinal"); + return wolfSSL_EVP_CipherFinal(ctx, out, outl); + } + else { + return WOLFSSL_FAILURE; + } +} + +int wolfSSL_EVP_DecryptFinal_ex(WOLFSSL_EVP_CIPHER_CTX *ctx, + unsigned char *out, int *outl) +{ + if (ctx && !ctx->enc) { + WOLFSSL_ENTER("wolfSSL_EVP_DecryptFinal_ex"); + return wolfSSL_EVP_CipherFinal(ctx, out, outl); + } + else { + return WOLFSSL_FAILURE; + } +} + + +int wolfSSL_EVP_DigestInit_ex(WOLFSSL_EVP_MD_CTX* ctx, + const WOLFSSL_EVP_MD* type, + WOLFSSL_ENGINE *impl) +{ + (void) impl; + WOLFSSL_ENTER("wolfSSL_EVP_DigestInit_ex"); + return wolfSSL_EVP_DigestInit(ctx, type); +} + +#ifdef DEBUG_WOLFSSL_EVP +#define PRINT_BUF(b, sz) { int _i; for(_i=0; _i<(sz); _i++) { \ + printf("%02x(%c),", (b)[_i], (b)[_i]); if ((_i+1)%8==0)printf("\n");}} +#else +#define PRINT_BUF(b, sz) +#endif + +static int fillBuff(WOLFSSL_EVP_CIPHER_CTX *ctx, const unsigned char *in, int sz) +{ + int fill; + + if (sz > 0) { + if ((sz+ctx->bufUsed) > ctx->block_size) { + fill = ctx->block_size - ctx->bufUsed; + } else { + fill = sz; + } + XMEMCPY(&(ctx->buf[ctx->bufUsed]), in, fill); + ctx->bufUsed += fill; + return fill; + } else return 0; +} + +static int evpCipherBlock(WOLFSSL_EVP_CIPHER_CTX *ctx, + unsigned char *out, + const unsigned char *in, int inl) +{ + int ret = 0; + + switch (ctx->cipherType) { +#if !defined(NO_AES) + #if defined(HAVE_AES_CBC) + case AES_128_CBC_TYPE: + case AES_192_CBC_TYPE: + case AES_256_CBC_TYPE: + if (ctx->enc) + ret = wc_AesCbcEncrypt(&ctx->cipher.aes, out, in, inl); + else + ret = wc_AesCbcDecrypt(&ctx->cipher.aes, out, in, inl); + break; + #endif + #if defined(HAVE_AESGCM) + case AES_128_GCM_TYPE: + case AES_192_GCM_TYPE: + case AES_256_GCM_TYPE: + if (ctx->enc) { + if (out){ + /* encrypt confidential data*/ + ret = wc_AesGcmEncrypt(&ctx->cipher.aes, out, in, inl, + ctx->iv, ctx->ivSz, ctx->authTag, ctx->authTagSz, + NULL, 0); + } + else { + /* authenticated, non-confidential data */ + ret = wc_AesGcmEncrypt(&ctx->cipher.aes, NULL, NULL, 0, + ctx->iv, ctx->ivSz, ctx->authTag, ctx->authTagSz, + in, inl); + /* Reset partial authTag error for AAD*/ + if (ret == AES_GCM_AUTH_E) + ret = 0; + } + } + else { + if (out){ + /* decrypt confidential data*/ + ret = wc_AesGcmDecrypt(&ctx->cipher.aes, out, in, inl, + ctx->iv, ctx->ivSz, ctx->authTag, ctx->authTagSz, + NULL, 0); + } + else { + /* authenticated, non-confidential data*/ + ret = wc_AesGcmDecrypt(&ctx->cipher.aes, NULL, NULL, 0, + ctx->iv, ctx->ivSz, + ctx->authTag, ctx->authTagSz, + in, inl); + /* Reset partial authTag error for AAD*/ + if (ret == AES_GCM_AUTH_E) + ret = 0; + } + } + break; + #endif + #if defined(WOLFSSL_AES_COUNTER) + case AES_128_CTR_TYPE: + case AES_192_CTR_TYPE: + case AES_256_CTR_TYPE: + ret = wc_AesCtrEncrypt(&ctx->cipher.aes, out, in, inl); + break; + #endif + #if defined(HAVE_AES_ECB) + case AES_128_ECB_TYPE: + case AES_192_ECB_TYPE: + case AES_256_ECB_TYPE: + if (ctx->enc) + ret = wc_AesEcbEncrypt(&ctx->cipher.aes, out, in, inl); + else + ret = wc_AesEcbDecrypt(&ctx->cipher.aes, out, in, inl); + break; + #endif + #if defined(WOLFSSL_AES_OFB) + case AES_128_OFB_TYPE: + case AES_192_OFB_TYPE: + case AES_256_OFB_TYPE: + if (ctx->enc) + ret = wc_AesOfbEncrypt(&ctx->cipher.aes, out, in, inl); + else + ret = wc_AesOfbDecrypt(&ctx->cipher.aes, out, in, inl); + break; + #endif + #if defined(WOLFSSL_AES_CFB) + #if !defined(HAVE_SELFTEST) && !defined(HAVE_FIPS) + case AES_128_CFB1_TYPE: + case AES_192_CFB1_TYPE: + case AES_256_CFB1_TYPE: + if (ctx->enc) + ret = wc_AesCfb1Encrypt(&ctx->cipher.aes, out, in, + inl * WOLFSSL_BIT_SIZE); + else + ret = wc_AesCfb1Decrypt(&ctx->cipher.aes, out, in, + inl * WOLFSSL_BIT_SIZE); + break; + + case AES_128_CFB8_TYPE: + case AES_192_CFB8_TYPE: + case AES_256_CFB8_TYPE: + if (ctx->enc) + ret = wc_AesCfb8Encrypt(&ctx->cipher.aes, out, in, inl); + else + ret = wc_AesCfb8Decrypt(&ctx->cipher.aes, out, in, inl); + break; + #endif /* !HAVE_SELFTEST && !HAVE_FIPS */ + + case AES_128_CFB128_TYPE: + case AES_192_CFB128_TYPE: + case AES_256_CFB128_TYPE: + if (ctx->enc) + ret = wc_AesCfbEncrypt(&ctx->cipher.aes, out, in, inl); + else + ret = wc_AesCfbDecrypt(&ctx->cipher.aes, out, in, inl); + break; + #endif +#if defined(WOLFSSL_AES_XTS) + case AES_128_XTS_TYPE: + case AES_256_XTS_TYPE: + if (ctx->enc) + ret = wc_AesXtsEncrypt(&ctx->cipher.xts, out, in, inl, + ctx->iv, ctx->ivSz); + else + ret = wc_AesXtsDecrypt(&ctx->cipher.xts, out, in, inl, + ctx->iv, ctx->ivSz); + break; +#endif +#endif /* !NO_AES */ + #ifndef NO_DES3 + case DES_CBC_TYPE: + if (ctx->enc) + ret = wc_Des_CbcEncrypt(&ctx->cipher.des, out, in, inl); + else + ret = wc_Des_CbcDecrypt(&ctx->cipher.des, out, in, inl); + break; + case DES_EDE3_CBC_TYPE: + if (ctx->enc) + ret = wc_Des3_CbcEncrypt(&ctx->cipher.des3, out, in, inl); + else + ret = wc_Des3_CbcDecrypt(&ctx->cipher.des3, out, in, inl); + break; + #if defined(WOLFSSL_DES_ECB) + case DES_ECB_TYPE: + ret = wc_Des_EcbEncrypt(&ctx->cipher.des, out, in, inl); + break; + case DES_EDE3_ECB_TYPE: + ret = wc_Des3_EcbEncrypt(&ctx->cipher.des3, out, in, inl); + break; + #endif + #endif + #ifndef NO_RC4 + case ARC4_TYPE: + wc_Arc4Process(&ctx->cipher.arc4, out, in, inl); + break; + #endif + default: + return WOLFSSL_FAILURE; + } + + if (ret != 0) + return WOLFSSL_FAILURE; /* failure */ + + (void)in; + (void)inl; + (void)out; + + return WOLFSSL_SUCCESS; /* success */ +} + +#if defined(HAVE_AESGCM) +static int wolfSSL_EVP_CipherUpdate_GCM(WOLFSSL_EVP_CIPHER_CTX *ctx, + unsigned char *out, int *outl, + const unsigned char *in, int inl) +{ + /* process blocks */ + if (evpCipherBlock(ctx, out, in, inl) == 0) + return WOLFSSL_FAILURE; + *outl = inl; + return WOLFSSL_SUCCESS; +} +#endif + +/* returns WOLFSSL_SUCCESS on success and WOLFSSL_FAILURE on failure */ +WOLFSSL_API int wolfSSL_EVP_CipherUpdate(WOLFSSL_EVP_CIPHER_CTX *ctx, + unsigned char *out, int *outl, + const unsigned char *in, int inl) +{ + int blocks; + int fill; + + WOLFSSL_ENTER("wolfSSL_EVP_CipherUpdate"); + if ((ctx == NULL) || (inl < 0) || (outl == NULL)|| (in == NULL)) { + WOLFSSL_MSG("Bad argument"); + return WOLFSSL_FAILURE; + } + + *outl = 0; + if (inl == 0) { + return WOLFSSL_SUCCESS; + } + +#if !defined(NO_AES) && defined(HAVE_AESGCM) + switch (ctx->cipherType) { + case AES_128_GCM_TYPE: + case AES_192_GCM_TYPE: + case AES_256_GCM_TYPE: +/* if out == NULL, in/inl contains the additional authenticated data for GCM */ + return wolfSSL_EVP_CipherUpdate_GCM(ctx, out, outl, in, inl); + default: + /* fall-through */ + break; + } +#endif /* !defined(NO_AES) && defined(HAVE_AESGCM) */ + + if (out == NULL) { + return WOLFSSL_FAILURE; + } + + + if (ctx->bufUsed > 0) { /* concatenate them if there is anything */ + fill = fillBuff(ctx, in, inl); + inl -= fill; + in += fill; + } + + /* check if the buff is full, and if so flash it out */ + if (ctx->bufUsed == ctx->block_size) { + byte* output = out; + + /* During decryption we save the last block to check padding on Final. + * Update the last block stored if one has already been stored */ + if (ctx->enc == 0) { + if (ctx->lastUsed == 1) { + XMEMCPY(out, ctx->lastBlock, ctx->block_size); + *outl+= ctx->block_size; + out += ctx->block_size; + } + output = ctx->lastBlock; /* redirect output to last block buffer */ + ctx->lastUsed = 1; + } + + PRINT_BUF(ctx->buf, ctx->block_size); + if (evpCipherBlock(ctx, output, ctx->buf, ctx->block_size) == 0) { + return WOLFSSL_FAILURE; + } + PRINT_BUF(out, ctx->block_size); + ctx->bufUsed = 0; + + /* if doing encryption update the new output block, decryption will + * always have the last block saved for when Final is called */ + if ((ctx->enc != 0)) { + *outl+= ctx->block_size; + out += ctx->block_size; + } + } + + blocks = inl / ctx->block_size; + if (blocks > 0) { + /* During decryption we save the last block to check padding on Final. + * Update the last block stored if one has already been stored */ + if ((ctx->enc == 0) && (ctx->lastUsed == 1)) { + PRINT_BUF(ctx->lastBlock, ctx->block_size); + XMEMCPY(out, ctx->lastBlock, ctx->block_size); + *outl += ctx->block_size; + out += ctx->block_size; + ctx->lastUsed = 0; + } + + /* process blocks */ + if (evpCipherBlock(ctx, out, in, blocks * ctx->block_size) == 0) { + return WOLFSSL_FAILURE; + } + PRINT_BUF(in, ctx->block_size*blocks); + PRINT_BUF(out,ctx->block_size*blocks); + inl -= ctx->block_size * blocks; + in += ctx->block_size * blocks; + if (ctx->enc == 0) { + if ((ctx->flags & WOLFSSL_EVP_CIPH_NO_PADDING) || + (ctx->block_size == 1)) { + ctx->lastUsed = 0; + *outl += ctx->block_size * blocks; + } else { + /* in the case of decryption and padding, store the last block + * here in order to verify the padding when Final is called */ + if (inl == 0) { /* if not 0 then we know leftovers are checked*/ + ctx->lastUsed = 1; + blocks = blocks - 1; /* save last block to check padding in + * EVP_CipherFinal call */ + XMEMCPY(ctx->lastBlock, &out[ctx->block_size * blocks], + ctx->block_size); + } + *outl += ctx->block_size * blocks; + } + } else { + *outl += ctx->block_size * blocks; + } + } + + + if (inl > 0) { + /* put fraction into buff */ + fillBuff(ctx, in, inl); + /* no increase of outl */ + } + (void)out; /* silence warning in case not read */ + + return WOLFSSL_SUCCESS; +} + +static void padBlock(WOLFSSL_EVP_CIPHER_CTX *ctx) +{ + int i; + for (i = ctx->bufUsed; i < ctx->block_size; i++) + ctx->buf[i] = (byte)(ctx->block_size - ctx->bufUsed); +} + +static int checkPad(WOLFSSL_EVP_CIPHER_CTX *ctx, unsigned char *buff) +{ + int i; + int n; + n = buff[ctx->block_size-1]; + if (n > ctx->block_size) return -1; + for (i = 0; i < n; i++) { + if (buff[ctx->block_size-i-1] != n) + return -1; + } + return ctx->block_size - n; +} + +int wolfSSL_EVP_CipherFinal(WOLFSSL_EVP_CIPHER_CTX *ctx, + unsigned char *out, int *outl) +{ + int fl; + int ret = WOLFSSL_SUCCESS; + if (!ctx || !outl) + return WOLFSSL_FAILURE; + + WOLFSSL_ENTER("wolfSSL_EVP_CipherFinal"); + +#if !defined(NO_AES) && defined(HAVE_AESGCM) + switch (ctx->cipherType) { + case AES_128_GCM_TYPE: + case AES_192_GCM_TYPE: + case AES_256_GCM_TYPE: + *outl = 0; + /* Clear IV, since IV reuse is not recommended for AES GCM. */ + XMEMSET(ctx->iv, 0, AES_BLOCK_SIZE); + return WOLFSSL_SUCCESS; + default: + /* fall-through */ + break; + } +#endif /* !NO_AES && HAVE_AESGCM */ + + if (!out) + return WOLFSSL_FAILURE; + + if (ctx->flags & WOLFSSL_EVP_CIPH_NO_PADDING) { + if (ctx->bufUsed != 0) return WOLFSSL_FAILURE; + *outl = 0; + } + else if (ctx->enc) { + if (ctx->block_size == 1) { + *outl = 0; + } + else if ((ctx->bufUsed >= 0) && (ctx->block_size != 1)) { + padBlock(ctx); + PRINT_BUF(ctx->buf, ctx->block_size); + if (evpCipherBlock(ctx, out, ctx->buf, ctx->block_size) == 0) { + WOLFSSL_MSG("Final Cipher Block failed"); + ret = WOLFSSL_FAILURE; + } + else { + PRINT_BUF(out, ctx->block_size); + *outl = ctx->block_size; + } + } + } + else { + if (ctx->block_size == 1) { + *outl = 0; + } + else if ((ctx->bufUsed % ctx->block_size) != 0) { + *outl = 0; + /* not enough padding for decrypt */ + WOLFSSL_MSG("Final Cipher Block not enough padding"); + ret = WOLFSSL_FAILURE; + } + else if (ctx->lastUsed) { + PRINT_BUF(ctx->lastBlock, ctx->block_size); + if ((fl = checkPad(ctx, ctx->lastBlock)) >= 0) { + XMEMCPY(out, ctx->lastBlock, fl); + *outl = fl; + if (ctx->lastUsed == 0 && ctx->bufUsed == 0) { + /* return error in cases where the block length is incorrect */ + WOLFSSL_MSG("Final Cipher Block bad length"); + ret = WOLFSSL_FAILURE; + } + } + else { + ret = WOLFSSL_FAILURE; + } + } + else if (ctx->lastUsed == 0 && ctx->bufUsed == 0) { + /* return error in cases where the block length is incorrect */ + ret = WOLFSSL_FAILURE; + } + } + if (ret == WOLFSSL_SUCCESS) { + /* reset cipher state after final */ + wolfSSL_EVP_CipherInit(ctx, NULL, NULL, NULL, -1); + } + return ret; +} + + +#ifdef WOLFSSL_EVP_DECRYPT_LEGACY +/* This is a version of DecryptFinal to work with data encrypted with + * wolfSSL_EVP_EncryptFinal() with the broken padding. (pre-v3.12.0) + * Only call this after wolfSSL_EVP_CipherFinal() fails on a decrypt. + * Note, you don't know if the padding is good or bad with the old + * encrypt, but it is likely to be or bad. It will update the output + * length with the block_size so the last block is still captured. */ +WOLFSSL_API int wolfSSL_EVP_DecryptFinal_legacy(WOLFSSL_EVP_CIPHER_CTX *ctx, + unsigned char *out, int *outl) +{ + int fl; + if (ctx == NULL || out == NULL || outl == NULL) + return BAD_FUNC_ARG; + + WOLFSSL_ENTER("wolfSSL_EVP_DecryptFinal_legacy"); + if (ctx->block_size == 1) { + *outl = 0; + return WOLFSSL_SUCCESS; + } + if ((ctx->bufUsed % ctx->block_size) != 0) { + *outl = 0; + /* not enough padding for decrypt */ + return WOLFSSL_FAILURE; + } + /* The original behavior of CipherFinal() was like it is now, + * but checkPad would return 0 in case of a bad pad. It would + * treat the pad as 0, and leave the data in the output buffer, + * and not try to copy anything. This converts checkPad's -1 error + * code to block_size. + */ + if (ctx->lastUsed) { + PRINT_BUF(ctx->lastBlock, ctx->block_size); + if ((fl = checkPad(ctx, ctx->lastBlock)) < 0) { + fl = ctx->block_size; + } + else { + XMEMCPY(out, ctx->lastBlock, fl); + } + *outl = fl; + } + /* return error in cases where the block length is incorrect */ + if (ctx->lastUsed == 0 && ctx->bufUsed == 0) { + return WOLFSSL_FAILURE; + } + + return WOLFSSL_SUCCESS; +} +#endif + + +int wolfSSL_EVP_CIPHER_CTX_block_size(const WOLFSSL_EVP_CIPHER_CTX *ctx) +{ + if (ctx == NULL) return BAD_FUNC_ARG; + switch (ctx->cipherType) { +#if !defined(NO_AES) || !defined(NO_DES3) +#if !defined(NO_AES) +#if defined(HAVE_AES_CBC) + case AES_128_CBC_TYPE: + case AES_192_CBC_TYPE: + case AES_256_CBC_TYPE: +#endif +#if defined(HAVE_AESGCM) + case AES_128_GCM_TYPE: + case AES_192_GCM_TYPE: + case AES_256_GCM_TYPE: +#endif +#if defined(WOLFSSL_AES_COUNTER) + case AES_128_CTR_TYPE: + case AES_192_CTR_TYPE: + case AES_256_CTR_TYPE: +#endif +#if defined(WOLFSSL_AES_CFB) + case AES_128_CFB1_TYPE: + case AES_192_CFB1_TYPE: + case AES_256_CFB1_TYPE: + case AES_128_CFB8_TYPE: + case AES_192_CFB8_TYPE: + case AES_256_CFB8_TYPE: + case AES_128_CFB128_TYPE: + case AES_192_CFB128_TYPE: + case AES_256_CFB128_TYPE: +#endif +#if defined(WOLFSSL_AES_OFB) + case AES_128_OFB_TYPE: + case AES_192_OFB_TYPE: + case AES_256_OFB_TYPE: +#endif +#if defined(WOLFSSL_AES_XTS) + case AES_128_XTS_TYPE: + case AES_256_XTS_TYPE: +#endif + + case AES_128_ECB_TYPE: + case AES_192_ECB_TYPE: + case AES_256_ECB_TYPE: +#endif /* !NO_AES */ +#ifndef NO_DES3 + case DES_CBC_TYPE: + case DES_ECB_TYPE: + case DES_EDE3_CBC_TYPE: + case DES_EDE3_ECB_TYPE: +#endif + return ctx->block_size; +#endif /* !NO_AES || !NO_DES3 */ + default: + return 0; + } +} + +static unsigned int cipherType(const WOLFSSL_EVP_CIPHER *cipher) +{ + if (cipher == NULL) return 0; /* dummy for #ifdef */ +#ifndef NO_DES3 + else if (EVP_DES_CBC && XSTRNCMP(cipher, EVP_DES_CBC, EVP_DES_SIZE) == 0) + return DES_CBC_TYPE; + else if (EVP_DES_EDE3_CBC && XSTRNCMP(cipher, EVP_DES_EDE3_CBC, EVP_DES_EDE3_SIZE) == 0) + return DES_EDE3_CBC_TYPE; +#if !defined(NO_DES3) + else if (EVP_DES_ECB && XSTRNCMP(cipher, EVP_DES_ECB, EVP_DES_SIZE) == 0) + return DES_ECB_TYPE; + else if (EVP_DES_EDE3_ECB && XSTRNCMP(cipher, EVP_DES_EDE3_ECB, EVP_DES_EDE3_SIZE) == 0) + return DES_EDE3_ECB_TYPE; +#endif /* NO_DES3 && HAVE_AES_ECB */ +#endif +#if !defined(NO_AES) +#if defined(HAVE_AES_CBC) + #ifdef WOLFSSL_AES_128 + else if (EVP_AES_128_CBC && XSTRNCMP(cipher, EVP_AES_128_CBC, EVP_AES_SIZE) == 0) + return AES_128_CBC_TYPE; + #endif + #ifdef WOLFSSL_AES_192 + else if (EVP_AES_192_CBC && XSTRNCMP(cipher, EVP_AES_192_CBC, EVP_AES_SIZE) == 0) + return AES_192_CBC_TYPE; + #endif + #ifdef WOLFSSL_AES_256 + else if (EVP_AES_256_CBC && XSTRNCMP(cipher, EVP_AES_256_CBC, EVP_AES_SIZE) == 0) + return AES_256_CBC_TYPE; + #endif +#endif /* HAVE_AES_CBC */ +#if defined(HAVE_AESGCM) + #ifdef WOLFSSL_AES_128 + else if (EVP_AES_128_GCM && XSTRNCMP(cipher, EVP_AES_128_GCM, EVP_AES_SIZE) == 0) + return AES_128_GCM_TYPE; + #endif + #ifdef WOLFSSL_AES_192 + else if (EVP_AES_192_GCM && XSTRNCMP(cipher, EVP_AES_192_GCM, EVP_AES_SIZE) == 0) + return AES_192_GCM_TYPE; + #endif + #ifdef WOLFSSL_AES_256 + else if (EVP_AES_256_GCM && XSTRNCMP(cipher, EVP_AES_256_GCM, EVP_AES_SIZE) == 0) + return AES_256_GCM_TYPE; + #endif +#endif /* HAVE_AESGCM */ +#if defined(WOLFSSL_AES_COUNTER) + #ifdef WOLFSSL_AES_128 + else if (EVP_AES_128_CTR && XSTRNCMP(cipher, EVP_AES_128_CTR, EVP_AES_SIZE) == 0) + return AES_128_CTR_TYPE; + #endif + #ifdef WOLFSSL_AES_192 + else if (EVP_AES_192_CTR && XSTRNCMP(cipher, EVP_AES_192_CTR, EVP_AES_SIZE) == 0) + return AES_192_CTR_TYPE; + #endif + #ifdef WOLFSSL_AES_256 + else if (EVP_AES_256_CTR && XSTRNCMP(cipher, EVP_AES_256_CTR, EVP_AES_SIZE) == 0) + return AES_256_CTR_TYPE; + #endif +#endif /* HAVE_AES_CBC */ +#if defined(HAVE_AES_ECB) + #ifdef WOLFSSL_AES_128 + else if (EVP_AES_128_ECB && XSTRNCMP(cipher, EVP_AES_128_ECB, EVP_AES_SIZE) == 0) + return AES_128_ECB_TYPE; + #endif + #ifdef WOLFSSL_AES_192 + else if (EVP_AES_192_ECB && XSTRNCMP(cipher, EVP_AES_192_ECB, EVP_AES_SIZE) == 0) + return AES_192_ECB_TYPE; + #endif + #ifdef WOLFSSL_AES_256 + else if (EVP_AES_256_ECB && XSTRNCMP(cipher, EVP_AES_256_ECB, EVP_AES_SIZE) == 0) + return AES_256_ECB_TYPE; + #endif +#endif /*HAVE_AES_CBC */ +#if defined(WOLFSSL_AES_XTS) + #ifdef WOLFSSL_AES_128 + else if (EVP_AES_128_XTS && XSTRNCMP(cipher, EVP_AES_128_XTS, EVP_AES_SIZE) == 0) + return AES_128_XTS_TYPE; + #endif + #ifdef WOLFSSL_AES_256 + else if (EVP_AES_256_XTS && XSTRNCMP(cipher, EVP_AES_256_XTS, EVP_AES_SIZE) == 0) + return AES_256_XTS_TYPE; + #endif +#endif /* WOLFSSL_AES_XTS */ +#if defined(WOLFSSL_AES_CFB) + #ifdef WOLFSSL_AES_128 + else if (EVP_AES_128_CFB1 && XSTRNCMP(cipher, EVP_AES_128_CFB1, EVP_AESCFB_SIZE) == 0) + return AES_128_CFB1_TYPE; + #endif + #ifdef WOLFSSL_AES_192 + else if (EVP_AES_192_CFB1 && XSTRNCMP(cipher, EVP_AES_192_CFB1, EVP_AESCFB_SIZE) == 0) + return AES_192_CFB1_TYPE; + #endif + #ifdef WOLFSSL_AES_256 + else if (EVP_AES_256_CFB1 && XSTRNCMP(cipher, EVP_AES_256_CFB1, EVP_AESCFB_SIZE) == 0) + return AES_256_CFB1_TYPE; + #endif + #ifdef WOLFSSL_AES_128 + else if (EVP_AES_128_CFB8 && XSTRNCMP(cipher, EVP_AES_128_CFB8, EVP_AESCFB_SIZE) == 0) + return AES_128_CFB8_TYPE; + #endif + #ifdef WOLFSSL_AES_192 + else if (EVP_AES_192_CFB8 && XSTRNCMP(cipher, EVP_AES_192_CFB8, EVP_AESCFB_SIZE) == 0) + return AES_192_CFB8_TYPE; + #endif + #ifdef WOLFSSL_AES_256 + else if (EVP_AES_256_CFB8 && XSTRNCMP(cipher, EVP_AES_256_CFB8, EVP_AESCFB_SIZE) == 0) + return AES_256_CFB8_TYPE; + #endif + #ifdef WOLFSSL_AES_128 + else if (EVP_AES_128_CFB128 && XSTRNCMP(cipher, EVP_AES_128_CFB128, EVP_AESCFB_SIZE) == 0) + return AES_128_CFB128_TYPE; + #endif + #ifdef WOLFSSL_AES_192 + else if (EVP_AES_192_CFB128 && XSTRNCMP(cipher, EVP_AES_192_CFB128, EVP_AESCFB_SIZE) == 0) + return AES_192_CFB128_TYPE; + #endif + #ifdef WOLFSSL_AES_256 + else if (EVP_AES_256_CFB128 && XSTRNCMP(cipher, EVP_AES_256_CFB128, EVP_AESCFB_SIZE) == 0) + return AES_256_CFB128_TYPE; + #endif +#endif /*HAVE_AES_CBC */ +#endif /* !NO_AES */ + else return 0; +} + +int wolfSSL_EVP_CIPHER_block_size(const WOLFSSL_EVP_CIPHER *cipher) +{ + if (cipher == NULL) return BAD_FUNC_ARG; + switch (cipherType(cipher)) { +#if !defined(NO_AES) + #if defined(HAVE_AES_CBC) + case AES_128_CBC_TYPE: + case AES_192_CBC_TYPE: + case AES_256_CBC_TYPE: + return AES_BLOCK_SIZE; + #endif + #if defined(HAVE_AESGCM) + case AES_128_GCM_TYPE: + case AES_192_GCM_TYPE: + case AES_256_GCM_TYPE: + return AES_BLOCK_SIZE; + #endif + #if defined(WOLFSSL_AES_COUNTER) + case AES_128_CTR_TYPE: + case AES_192_CTR_TYPE: + case AES_256_CTR_TYPE: + return AES_BLOCK_SIZE; + #endif + #if defined(HAVE_AES_ECB) + case AES_128_ECB_TYPE: + case AES_192_ECB_TYPE: + case AES_256_ECB_TYPE: + return AES_BLOCK_SIZE; + #endif +#endif /* NO_AES */ + #ifndef NO_DES3 + case DES_CBC_TYPE: return 8; + case DES_EDE3_CBC_TYPE: return 8; + case DES_ECB_TYPE: return 8; + case DES_EDE3_ECB_TYPE: return 8; + #endif + default: + return 0; + } +} + +unsigned long WOLFSSL_CIPHER_mode(const WOLFSSL_EVP_CIPHER *cipher) +{ + switch (cipherType(cipher)) { +#if !defined(NO_AES) + #if defined(HAVE_AES_CBC) + case AES_128_CBC_TYPE: + case AES_192_CBC_TYPE: + case AES_256_CBC_TYPE: + return WOLFSSL_EVP_CIPH_CBC_MODE; + #endif + #if defined(HAVE_AESGCM) + case AES_128_GCM_TYPE: + case AES_192_GCM_TYPE: + case AES_256_GCM_TYPE: + return WOLFSSL_EVP_CIPH_GCM_MODE; + #endif + #if defined(WOLFSSL_AES_COUNTER) + case AES_128_CTR_TYPE: + case AES_192_CTR_TYPE: + case AES_256_CTR_TYPE: + return WOLFSSL_EVP_CIPH_CTR_MODE; + #endif + case AES_128_ECB_TYPE: + case AES_192_ECB_TYPE: + case AES_256_ECB_TYPE: + return WOLFSSL_EVP_CIPH_ECB_MODE; +#endif /* NO_ASE */ + #ifndef NO_DES3 + case DES_CBC_TYPE: + case DES_EDE3_CBC_TYPE: + return WOLFSSL_EVP_CIPH_CBC_MODE; + case DES_ECB_TYPE: + case DES_EDE3_ECB_TYPE: + return WOLFSSL_EVP_CIPH_ECB_MODE; + #endif + #ifndef NO_RC4 + case ARC4_TYPE: + return EVP_CIPH_STREAM_CIPHER; + #endif + default: + return 0; + } +} + +unsigned long WOLFSSL_EVP_CIPHER_mode(const WOLFSSL_EVP_CIPHER *cipher) +{ + if (cipher == NULL) return 0; + return WOLFSSL_CIPHER_mode(cipher); +} + +void wolfSSL_EVP_CIPHER_CTX_set_flags(WOLFSSL_EVP_CIPHER_CTX *ctx, int flags) +{ + if (ctx != NULL) { + ctx->flags |= flags; + } +} + +void wolfSSL_EVP_CIPHER_CTX_clear_flags(WOLFSSL_EVP_CIPHER_CTX *ctx, int flags) +{ + if (ctx != NULL) { + ctx->flags &= ~flags; + } +} + +unsigned long wolfSSL_EVP_CIPHER_flags(const WOLFSSL_EVP_CIPHER *cipher) +{ + if (cipher == NULL) return 0; + return WOLFSSL_CIPHER_mode(cipher); +} + +int wolfSSL_EVP_CIPHER_CTX_set_padding(WOLFSSL_EVP_CIPHER_CTX *ctx, int padding) +{ + if (ctx == NULL) return BAD_FUNC_ARG; + if (padding) { + ctx->flags &= ~WOLFSSL_EVP_CIPH_NO_PADDING; + } + else { + ctx->flags |= WOLFSSL_EVP_CIPH_NO_PADDING; + } + return 1; +} + +int wolfSSL_EVP_add_digest(const WOLFSSL_EVP_MD *digest) +{ + (void)digest; + /* nothing to do */ + return 0; +} + + +/* Frees the WOLFSSL_EVP_PKEY_CTX passed in. + * + * return WOLFSSL_SUCCESS on success + */ +int wolfSSL_EVP_PKEY_CTX_free(WOLFSSL_EVP_PKEY_CTX *ctx) +{ + if (ctx == NULL) return 0; + WOLFSSL_ENTER("EVP_PKEY_CTX_free"); + if (ctx->pkey != NULL) + wolfSSL_EVP_PKEY_free(ctx->pkey); + if (ctx->peerKey != NULL) + wolfSSL_EVP_PKEY_free(ctx->peerKey); + XFREE(ctx, NULL, DYNAMIC_TYPE_PUBLIC_KEY); + return WOLFSSL_SUCCESS; +} + + +/* Creates a new WOLFSSL_EVP_PKEY_CTX structure. + * + * pkey key structure to use with new WOLFSSL_EVP_PEKY_CTX + * e engine to use. It should be NULL at this time. + * + * return the new structure on success and NULL if failed. + */ +WOLFSSL_EVP_PKEY_CTX *wolfSSL_EVP_PKEY_CTX_new(WOLFSSL_EVP_PKEY *pkey, WOLFSSL_ENGINE *e) +{ + WOLFSSL_EVP_PKEY_CTX* ctx; + int type = NID_undef; + + if (pkey == NULL) return 0; + if (e != NULL) return 0; + WOLFSSL_ENTER("EVP_PKEY_CTX_new"); + + ctx = (WOLFSSL_EVP_PKEY_CTX*)XMALLOC(sizeof(WOLFSSL_EVP_PKEY_CTX), NULL, + DYNAMIC_TYPE_PUBLIC_KEY); + if (ctx == NULL) return NULL; + XMEMSET(ctx, 0, sizeof(WOLFSSL_EVP_PKEY_CTX)); + ctx->pkey = pkey; +#if !defined(NO_RSA) && !defined(HAVE_USER_RSA) + ctx->padding = RSA_PKCS1_PADDING; +#endif + type = wolfSSL_EVP_PKEY_type(pkey->type); + + if (type != NID_undef) { + if (wc_LockMutex(&pkey->refMutex) != 0) { + WOLFSSL_MSG("Couldn't lock pkey mutex"); + } + pkey->references++; + + wc_UnLockMutex(&pkey->refMutex); + } + return ctx; +} + + +/* Sets the type of RSA padding to use. + * + * ctx structure to set padding in. + * padding RSA padding type + * + * returns WOLFSSL_SUCCESS on success. + */ +int wolfSSL_EVP_PKEY_CTX_set_rsa_padding(WOLFSSL_EVP_PKEY_CTX *ctx, int padding) +{ + if (ctx == NULL) return 0; + WOLFSSL_ENTER("EVP_PKEY_CTX_set_rsa_padding"); + ctx->padding = padding; + return WOLFSSL_SUCCESS; +} + +/* create a PKEY contxt and return it */ +WOLFSSL_EVP_PKEY_CTX *wolfSSL_EVP_PKEY_CTX_new_id(int id, WOLFSSL_ENGINE *e) +{ + WOLFSSL_EVP_PKEY* pkey; + WOLFSSL_EVP_PKEY_CTX* ctx = NULL; + + WOLFSSL_ENTER("wolfSSL_EVP_PKEY_CTX_new_id"); + + pkey = wolfSSL_EVP_PKEY_new_ex(NULL); + if (pkey) { + pkey->type = id; + ctx = wolfSSL_EVP_PKEY_CTX_new(pkey, e); + if (ctx == NULL) { + wolfSSL_EVP_PKEY_free(pkey); + } + } + return ctx; +} + +/* Returns WOLFSSL_SUCCESS or error */ +int wolfSSL_EVP_PKEY_CTX_set_rsa_keygen_bits(WOLFSSL_EVP_PKEY_CTX *ctx, int bits) +{ + if (ctx) { + ctx->nbits = bits; + } + return WOLFSSL_SUCCESS; +} + + +int wolfSSL_EVP_PKEY_derive_init(WOLFSSL_EVP_PKEY_CTX *ctx) +{ + WOLFSSL_ENTER("wolfSSL_EVP_PKEY_derive_init"); + + if (!ctx) { + return WOLFSSL_FAILURE; + } + wolfSSL_EVP_PKEY_free(ctx->peerKey); + ctx->op = EVP_PKEY_OP_DERIVE; + ctx->padding = 0; + ctx->nbits = 0; + return WOLFSSL_SUCCESS; +} + +int wolfSSL_EVP_PKEY_derive_set_peer(WOLFSSL_EVP_PKEY_CTX *ctx, WOLFSSL_EVP_PKEY *peer) +{ + WOLFSSL_ENTER("wolfSSL_EVP_PKEY_derive_set_peer"); + + if (!ctx || ctx->op != EVP_PKEY_OP_DERIVE) { + return WOLFSSL_FAILURE; + } + wolfSSL_EVP_PKEY_free(ctx->peerKey); + ctx->peerKey = peer; + if (!wolfSSL_EVP_PKEY_up_ref(peer)) { + ctx->peerKey = NULL; + return WOLFSSL_FAILURE; + } + return WOLFSSL_SUCCESS; +} + +#if !defined(NO_DH) && defined(HAVE_ECC) +int wolfSSL_EVP_PKEY_derive(WOLFSSL_EVP_PKEY_CTX *ctx, unsigned char *key, size_t *keylen) +{ + int len; + + WOLFSSL_ENTER("wolfSSL_EVP_PKEY_derive"); + + if (!ctx || ctx->op != EVP_PKEY_OP_DERIVE || !ctx->pkey || !ctx->peerKey || !keylen + || ctx->pkey->type != ctx->peerKey->type) { + return WOLFSSL_FAILURE; + } + switch (ctx->pkey->type) { +#ifndef NO_DH + case EVP_PKEY_DH: + /* Use DH */ + if (!ctx->pkey->dh || !ctx->peerKey->dh || !ctx->peerKey->dh->pub_key) { + return WOLFSSL_FAILURE; + } + if ((len = wolfSSL_DH_size(ctx->pkey->dh)) <= 0) { + return WOLFSSL_FAILURE; + } + if (key) { + if (*keylen < (size_t)len) { + return WOLFSSL_FAILURE; + } + if (wolfSSL_DH_compute_key(key, ctx->peerKey->dh->pub_key, + ctx->pkey->dh) != len) { + return WOLFSSL_FAILURE; + } + } + *keylen = (size_t)len; + break; +#endif +#ifdef HAVE_ECC + case EVP_PKEY_EC: + /* Use ECDH */ + if (!ctx->pkey->ecc || !ctx->peerKey->ecc) { + return WOLFSSL_FAILURE; + } + /* set internal key if not done */ + if (!ctx->pkey->ecc->inSet) { + if (SetECKeyInternal(ctx->pkey->ecc) != WOLFSSL_SUCCESS) { + WOLFSSL_MSG("SetECKeyInternal failed"); + return WOLFSSL_FAILURE; + } + } + if (!ctx->peerKey->ecc->exSet || !ctx->peerKey->ecc->pub_key->internal) { + if (SetECKeyExternal(ctx->peerKey->ecc) != WOLFSSL_SUCCESS) { + WOLFSSL_MSG("SetECKeyExternal failed"); + return WOLFSSL_FAILURE; + } + } + if (!(len = wc_ecc_size((ecc_key*)ctx->pkey->ecc->internal))) { + return WOLFSSL_FAILURE; + } + if (key) { + word32 len32 = (word32)len; + if (*keylen < len32) { + WOLFSSL_MSG("buffer too short"); + return WOLFSSL_FAILURE; + } + if (wc_ecc_shared_secret_ssh((ecc_key*)ctx->pkey->ecc->internal, + (ecc_point*)ctx->peerKey->ecc->pub_key->internal, + key, &len32) != MP_OKAY) { + WOLFSSL_MSG("wc_ecc_shared_secret failed"); + return WOLFSSL_FAILURE; + } + len = (int)len32; + } + *keylen = (size_t)len; + break; +#endif + default: + WOLFSSL_MSG("Unknown key type"); + return WOLFSSL_FAILURE; + } + return WOLFSSL_SUCCESS; +} +#endif + +/* Uses the WOLFSSL_EVP_PKEY_CTX to decrypt a buffer. + * + * ctx structure to decrypt with + * out buffer to hold the results + * outlen initially holds size of out buffer and gets set to decrypt result size + * in buffer decrypt + * inlen length of in buffer + * + * returns WOLFSSL_SUCCESS on success. + */ +int wolfSSL_EVP_PKEY_decrypt(WOLFSSL_EVP_PKEY_CTX *ctx, + unsigned char *out, size_t *outlen, + const unsigned char *in, size_t inlen) +{ + int len = 0; + + if (ctx == NULL) return 0; + WOLFSSL_ENTER("EVP_PKEY_decrypt"); + + (void)out; + (void)outlen; + (void)in; + (void)inlen; + (void)len; + + switch (ctx->pkey->type) { +#if !defined(NO_RSA) && !defined(HAVE_USER_RSA) + case EVP_PKEY_RSA: + len = wolfSSL_RSA_private_decrypt((int)inlen, (unsigned char*)in, out, + ctx->pkey->rsa, ctx->padding); + if (len < 0) break; + else { + *outlen = len; + return WOLFSSL_SUCCESS; + } +#endif /* NO_RSA */ + + case EVP_PKEY_EC: + WOLFSSL_MSG("not implemented"); + FALL_THROUGH; + default: + break; + } + return WOLFSSL_FAILURE; +} + + +/* Initialize a WOLFSSL_EVP_PKEY_CTX structure for decryption + * + * ctx WOLFSSL_EVP_PKEY_CTX structure to use with decryption + * + * Returns WOLFSSL_FAILURE on failure and WOLFSSL_SUCCESS on success + */ +int wolfSSL_EVP_PKEY_decrypt_init(WOLFSSL_EVP_PKEY_CTX *ctx) +{ + if (ctx == NULL) return WOLFSSL_FAILURE; + WOLFSSL_ENTER("EVP_PKEY_decrypt_init"); + switch (ctx->pkey->type) { + case EVP_PKEY_RSA: + ctx->op = EVP_PKEY_OP_DECRYPT; + return WOLFSSL_SUCCESS; + case EVP_PKEY_EC: + WOLFSSL_MSG("not implemented"); + FALL_THROUGH; + default: + break; + } + return WOLFSSL_FAILURE; +} + + +/* Use a WOLFSSL_EVP_PKEY_CTX structure to encrypt data + * + * ctx WOLFSSL_EVP_PKEY_CTX structure to use with encryption + * out buffer to hold encrypted data + * outlen length of out buffer + * in data to be encrypted + * inlen length of in buffer + * + * Returns WOLFSSL_FAILURE on failure and WOLFSSL_SUCCESS on success + */ +int wolfSSL_EVP_PKEY_encrypt(WOLFSSL_EVP_PKEY_CTX *ctx, + unsigned char *out, size_t *outlen, + const unsigned char *in, size_t inlen) +{ + int len = 0; + if (ctx == NULL) return WOLFSSL_FAILURE; + WOLFSSL_ENTER("EVP_PKEY_encrypt"); + if (ctx->op != EVP_PKEY_OP_ENCRYPT) return WOLFSSL_FAILURE; + + (void)out; + (void)outlen; + (void)in; + (void)inlen; + (void)len; + switch (ctx->pkey->type) { +#if !defined(NO_RSA) && !defined(HAVE_USER_RSA) + case EVP_PKEY_RSA: + len = wolfSSL_RSA_public_encrypt((int)inlen, (unsigned char *)in, out, + ctx->pkey->rsa, ctx->padding); + if (len < 0) + break; + else { + *outlen = len; + return WOLFSSL_SUCCESS; + } +#endif /* NO_RSA */ + + case EVP_PKEY_EC: + WOLFSSL_MSG("not implemented"); + FALL_THROUGH; + default: + break; + } + return WOLFSSL_FAILURE; +} + + +/* Initialize a WOLFSSL_EVP_PKEY_CTX structure to encrypt data + * + * ctx WOLFSSL_EVP_PKEY_CTX structure to use with encryption + * + * Returns WOLFSSL_FAILURE on failure and WOLFSSL_SUCCESS on success + */ +int wolfSSL_EVP_PKEY_encrypt_init(WOLFSSL_EVP_PKEY_CTX *ctx) +{ + if (ctx == NULL) return WOLFSSL_FAILURE; + WOLFSSL_ENTER("EVP_PKEY_encrypt_init"); + + switch (ctx->pkey->type) { + case EVP_PKEY_RSA: + ctx->op = EVP_PKEY_OP_ENCRYPT; + return WOLFSSL_SUCCESS; + case EVP_PKEY_EC: + WOLFSSL_MSG("not implemented"); + FALL_THROUGH; + default: + break; + } + return WOLFSSL_FAILURE; +} +/****************************************************************************** +* wolfSSL_EVP_PKEY_sign_init - initializes a public key algorithm context for +* a signing operation. +* +* RETURNS: +* returns WOLFSSL_SUCCESS on success, otherwise returns -2 +*/ +WOLFSSL_API int wolfSSL_EVP_PKEY_sign_init(WOLFSSL_EVP_PKEY_CTX *ctx) +{ + int ret = -2; + + WOLFSSL_MSG("wolfSSL_EVP_PKEY_sign_init"); + if (!ctx || !ctx->pkey) + return ret; + + switch (ctx->pkey->type) { + case EVP_PKEY_RSA: + ctx->op = EVP_PKEY_OP_SIGN; + ret = WOLFSSL_SUCCESS; + break; + case EVP_PKEY_EC: + WOLFSSL_MSG("not implemented"); + FALL_THROUGH; + default: + ret = -2; + } + return ret; +} +/****************************************************************************** +* wolfSSL_EVP_PKEY_sign - performs a public key signing operation using ctx +* The data to be signed should be hashed since the function does not hash the data. +* +* RETURNS: +* returns WOLFSSL_SUCCESS on success, otherwise returns WOLFSSL_FAILURE +*/ + +WOLFSSL_API int wolfSSL_EVP_PKEY_sign(WOLFSSL_EVP_PKEY_CTX *ctx, unsigned char *sig, + size_t *siglen, const unsigned char *tbs, size_t tbslen) +{ + int len = 0; + + WOLFSSL_MSG("wolfSSL_EVP_PKEY_sign"); + + if (!ctx || ctx->op != EVP_PKEY_OP_SIGN || !ctx->pkey) + return WOLFSSL_FAILURE; + + (void)sig; + (void)siglen; + (void)tbs; + (void)tbslen; + (void)len; + + switch (ctx->pkey->type) { +#if !defined(NO_RSA) && !defined(HAVE_USER_RSA) + case EVP_PKEY_RSA: + len = wolfSSL_RSA_private_encrypt((int)tbslen, (unsigned char*)tbs, sig, + ctx->pkey->rsa, ctx->padding); + if (len < 0) + break; + else { + *siglen = len; + return WOLFSSL_SUCCESS; + } +#endif /* NO_RSA */ + + case EVP_PKEY_EC: + WOLFSSL_MSG("not implemented"); + FALL_THROUGH; + default: + break; + } + return WOLFSSL_FAILURE; +} + +/* Get the size in bits for WOLFSSL_EVP_PKEY key + * + * pkey WOLFSSL_EVP_PKEY structure to get key size of + * + * returns the size in bits of key on success + */ +int wolfSSL_EVP_PKEY_bits(const WOLFSSL_EVP_PKEY *pkey) +{ + int bytes; + + if (pkey == NULL) return 0; + WOLFSSL_ENTER("EVP_PKEY_bits"); + if ((bytes = wolfSSL_EVP_PKEY_size((WOLFSSL_EVP_PKEY*)pkey)) ==0) return 0; + return bytes*8; +} + + +int wolfSSL_EVP_PKEY_keygen_init(WOLFSSL_EVP_PKEY_CTX *ctx) +{ + (void)ctx; + return WOLFSSL_SUCCESS; +} + +int wolfSSL_EVP_PKEY_keygen(WOLFSSL_EVP_PKEY_CTX *ctx, + WOLFSSL_EVP_PKEY **ppkey) +{ + int ret = WOLFSSL_FAILURE; + int ownPkey = 0; + WOLFSSL_EVP_PKEY* pkey; + + if (ctx == NULL || ppkey == NULL) { + return BAD_FUNC_ARG; + } + + pkey = *ppkey; + if (pkey == NULL) { + ownPkey = 1; + pkey = wolfSSL_EVP_PKEY_new(); + + if (pkey == NULL) + return ret; + } + + switch (pkey->type) { +#if !defined(HAVE_FAST_RSA) && defined(WOLFSSL_KEY_GEN) && \ + !defined(NO_RSA) && !defined(HAVE_USER_RSA) + case EVP_PKEY_RSA: + pkey->rsa = wolfSSL_RSA_generate_key(ctx->nbits, WC_RSA_EXPONENT, + NULL, NULL); + if (pkey->rsa) { + pkey->ownRsa = 1; + pkey->pkey_sz = wolfSSL_i2d_RSAPrivateKey(pkey->rsa, + (unsigned char**)&pkey->pkey.ptr); + ret = WOLFSSL_SUCCESS; + } + break; +#endif +#ifdef HAVE_ECC + case EVP_PKEY_EC: + pkey->ecc = wolfSSL_EC_KEY_new(); + if (pkey->ecc) { + ret = wolfSSL_EC_KEY_generate_key(pkey->ecc); + if (ret == WOLFSSL_SUCCESS) { + pkey->ownEcc = 1; + } + } +#endif + default: + break; + } + + if (ret != WOLFSSL_SUCCESS && ownPkey) { + wolfSSL_EVP_PKEY_free(pkey); + pkey = NULL; + } + + *ppkey = pkey; + + return ret; +} + +/* Get the size in bytes for WOLFSSL_EVP_PKEY key + * + * pkey WOLFSSL_EVP_PKEY structure to get key size of + * + * returns the size of a key on success which is the maximum size of a + * signature + */ +int wolfSSL_EVP_PKEY_size(WOLFSSL_EVP_PKEY *pkey) +{ + if (pkey == NULL) return 0; + WOLFSSL_ENTER("EVP_PKEY_size"); + + switch (pkey->type) { +#ifndef NO_RSA + case EVP_PKEY_RSA: + return (int)wolfSSL_RSA_size((const WOLFSSL_RSA*)(pkey->rsa)); +#endif /* !NO_RSA */ + +#ifdef HAVE_ECC + case EVP_PKEY_EC: + if (pkey->ecc == NULL || pkey->ecc->internal == NULL) { + WOLFSSL_MSG("No ECC key has been set"); + break; + } + return wc_ecc_size((ecc_key*)(pkey->ecc->internal)); +#endif /* HAVE_ECC */ + + default: + break; + } + return 0; +} + +#ifndef NO_WOLFSSL_STUB +WOLFSSL_API int wolfSSL_EVP_PKEY_missing_parameters(WOLFSSL_EVP_PKEY *pkey) +{ + (void)pkey; + /* not using missing params callback and returning zero to indicate success */ + return 0; +} +#endif + +WOLFSSL_API int wolfSSL_EVP_PKEY_cmp(const WOLFSSL_EVP_PKEY *a, const WOLFSSL_EVP_PKEY *b) +{ + int ret = -1; /* failure */ + int a_sz = 0, b_sz = 0; + + if (a == NULL || b == NULL) + return ret; + + /* check its the same type of key */ + if (a->type != b->type) + return ret; + + /* get size based on key type */ + switch (a->type) { +#ifndef NO_RSA + case EVP_PKEY_RSA: + a_sz = (int)wolfSSL_RSA_size((const WOLFSSL_RSA*)(a->rsa)); + b_sz = (int)wolfSSL_RSA_size((const WOLFSSL_RSA*)(b->rsa)); + break; +#endif /* !NO_RSA */ +#ifdef HAVE_ECC + case EVP_PKEY_EC: + if (a->ecc == NULL || a->ecc->internal == NULL || + b->ecc == NULL || b->ecc->internal == NULL) { + return ret; + } + a_sz = wc_ecc_size((ecc_key*)(a->ecc->internal)); + b_sz = wc_ecc_size((ecc_key*)(b->ecc->internal)); + break; +#endif /* HAVE_ECC */ + default: + break; + } /* switch (a->type) */ + + /* check size */ + if (a_sz <= 0 || b_sz <= 0 || a_sz != b_sz) { + return ret; + } + + /* check public key size */ + if (a->pkey_sz > 0 && b->pkey_sz > 0 && a->pkey_sz != b->pkey_sz) { + return ret; + } + + /* check public key */ + if (a->pkey.ptr && b->pkey.ptr) { + if (XMEMCMP(a->pkey.ptr, b->pkey.ptr, a->pkey_sz) != 0) { + return ret; + } + } + ret = 0; /* success */ + + return ret; +} + +/* Initialize structure for signing + * + * ctx WOLFSSL_EVP_MD_CTX structure to initialize + * type is the type of message digest to use + * + * returns WOLFSSL_SUCCESS on success + */ +int wolfSSL_EVP_SignInit(WOLFSSL_EVP_MD_CTX *ctx, const WOLFSSL_EVP_MD *type) +{ + if (ctx == NULL) return WOLFSSL_FAILURE; + WOLFSSL_ENTER("EVP_SignInit"); + return wolfSSL_EVP_DigestInit(ctx,type); +} + +WOLFSSL_API int wolfSSL_EVP_SignInit_ex(WOLFSSL_EVP_MD_CTX* ctx, + const WOLFSSL_EVP_MD* type, + WOLFSSL_ENGINE *impl) +{ + if (ctx == NULL) return WOLFSSL_FAILURE; + WOLFSSL_ENTER("EVP_SignInit"); + return wolfSSL_EVP_DigestInit_ex(ctx,type,impl); +} + + +/* Update structure with data for signing + * + * ctx WOLFSSL_EVP_MD_CTX structure to update + * data buffer holding data to update with for sign + * len length of data buffer + * + * returns WOLFSSL_SUCCESS on success + */ +int wolfSSL_EVP_SignUpdate(WOLFSSL_EVP_MD_CTX *ctx, const void *data, size_t len) +{ + if (ctx == NULL) return 0; + WOLFSSL_ENTER("EVP_SignUpdate("); + return wolfSSL_EVP_DigestUpdate(ctx, data, len); +} + +static const struct s_ent { + const int macType; + const int nid; + const char *name; +} md_tbl[] = { +#ifndef NO_MD4 + {WC_HASH_TYPE_MD4, NID_md4, "MD4"}, +#endif /* NO_MD4 */ + +#ifndef NO_MD5 + {WC_HASH_TYPE_MD5, NID_md5, "MD5"}, +#endif /* NO_MD5 */ + +#ifndef NO_SHA + {WC_HASH_TYPE_SHA, NID_sha1, "SHA"}, +#endif /* NO_SHA */ + +#ifdef WOLFSSL_SHA224 + {WC_HASH_TYPE_SHA224, NID_sha224, "SHA224"}, +#endif /* WOLFSSL_SHA224 */ +#ifndef NO_SHA256 + {WC_HASH_TYPE_SHA256, NID_sha256, "SHA256"}, +#endif + +#ifdef WOLFSSL_SHA384 + {WC_HASH_TYPE_SHA384, NID_sha384, "SHA384"}, +#endif /* WOLFSSL_SHA384 */ +#ifdef WOLFSSL_SHA512 + {WC_HASH_TYPE_SHA512, NID_sha512, "SHA512"}, +#endif /* WOLFSSL_SHA512 */ +#ifndef WOLFSSL_NOSHA3_224 + {WC_HASH_TYPE_SHA3_224, NID_sha3_224, "SHA3_224"}, +#endif +#ifndef WOLFSSL_NOSHA3_256 + {WC_HASH_TYPE_SHA3_256, NID_sha3_256, "SHA3_256"}, +#endif + {WC_HASH_TYPE_SHA3_384, NID_sha3_384, "SHA3_384"}, +#ifndef WOLFSSL_NOSHA3_512 + {WC_HASH_TYPE_SHA3_512, NID_sha3_512, "SHA3_512"}, +#endif + {0, 0, NULL} +}; + +static int wolfSSL_EVP_md2macType(const WOLFSSL_EVP_MD *md) +{ + const struct s_ent *ent ; + + if (md != NULL) { + for( ent = md_tbl; ent->name != NULL; ent++) { + if(XSTRNCMP((const char *)md, ent->name, XSTRLEN(ent->name)+1) == 0) { + return ent->macType; + } + } + } + return WC_HASH_TYPE_NONE; +} + +/* Finalize structure for signing + * + * ctx WOLFSSL_EVP_MD_CTX structure to finalize + * sigret buffer to hold resulting signature + * siglen length of sigret buffer + * pkey key to sign with + * + * returns WOLFSSL_SUCCESS on success and WOLFSSL_FAILURE on failure + */ +int wolfSSL_EVP_SignFinal(WOLFSSL_EVP_MD_CTX *ctx, unsigned char *sigret, + unsigned int *siglen, WOLFSSL_EVP_PKEY *pkey) +{ + unsigned int mdsize; + unsigned char md[WC_MAX_DIGEST_SIZE]; + int ret; + if (ctx == NULL) return WOLFSSL_FAILURE; + WOLFSSL_ENTER("EVP_SignFinal"); + + ret = wolfSSL_EVP_DigestFinal(ctx, md, &mdsize); + if (ret <= 0) return ret; + + (void)sigret; + (void)siglen; + + switch (pkey->type) { +#if !defined(NO_RSA) && !defined(HAVE_USER_RSA) + case EVP_PKEY_RSA: { + int nid = wolfSSL_EVP_MD_type(wolfSSL_EVP_MD_CTX_md(ctx)); + if (nid < 0) break; + return wolfSSL_RSA_sign(nid, md, mdsize, sigret, + siglen, pkey->rsa); + } +#endif /* NO_RSA */ + + case EVP_PKEY_DSA: + case EVP_PKEY_EC: + WOLFSSL_MSG("not implemented"); + FALL_THROUGH; + default: + break; + } + return WOLFSSL_FAILURE; +} + + +/* Initialize structure for verifying signature + * + * ctx WOLFSSL_EVP_MD_CTX structure to initialize + * type is the type of message digest to use + * + * returns WOLFSSL_SUCCESS on success + */ +int wolfSSL_EVP_VerifyInit(WOLFSSL_EVP_MD_CTX *ctx, const WOLFSSL_EVP_MD *type) +{ + if (ctx == NULL) return WOLFSSL_FAILURE; + WOLFSSL_ENTER("EVP_VerifyInit"); + return wolfSSL_EVP_DigestInit(ctx,type); +} + + +/* Update structure for verifying signature + * + * ctx WOLFSSL_EVP_MD_CTX structure to update + * data buffer holding data to update with for verify + * len length of data buffer + * + * returns WOLFSSL_SUCCESS on success and WOLFSSL_FAILURE on failure + */ +int wolfSSL_EVP_VerifyUpdate(WOLFSSL_EVP_MD_CTX *ctx, const void *data, size_t len) +{ + if (ctx == NULL) return WOLFSSL_FAILURE; + WOLFSSL_ENTER("EVP_VerifyUpdate"); + return wolfSSL_EVP_DigestUpdate(ctx, data, len); +} + + +/* Finalize structure for verifying signature + * + * ctx WOLFSSL_EVP_MD_CTX structure to finalize + * sig buffer holding signature + * siglen length of sig buffer + * pkey key to verify with + * + * returns WOLFSSL_SUCCESS on success and WOLFSSL_FAILURE on failure + */ +int wolfSSL_EVP_VerifyFinal(WOLFSSL_EVP_MD_CTX *ctx, + unsigned char*sig, unsigned int siglen, WOLFSSL_EVP_PKEY *pkey) +{ + int ret; + unsigned char md[WC_MAX_DIGEST_SIZE]; + unsigned int mdsize; + + if (ctx == NULL) return WOLFSSL_FAILURE; + WOLFSSL_ENTER("EVP_VerifyFinal"); + ret = wolfSSL_EVP_DigestFinal(ctx, md, &mdsize); + if (ret <= 0) return ret; + + (void)sig; + (void)siglen; + + switch (pkey->type) { +#if !defined(NO_RSA) && !defined(HAVE_USER_RSA) + case EVP_PKEY_RSA: { + int nid = wolfSSL_EVP_MD_type(wolfSSL_EVP_MD_CTX_md(ctx)); + if (nid < 0) break; + return wolfSSL_RSA_verify(nid, md, mdsize, sig, + (unsigned int)siglen, pkey->rsa); + } +#endif /* NO_RSA */ + + case EVP_PKEY_DSA: + case EVP_PKEY_EC: + WOLFSSL_MSG("not implemented"); + FALL_THROUGH; + default: + break; + } + return WOLFSSL_FAILURE; +} + +int wolfSSL_EVP_add_cipher(const WOLFSSL_EVP_CIPHER *cipher) +{ + (void)cipher; + /* nothing to do */ + return 0; +} + + +WOLFSSL_EVP_PKEY* wolfSSL_EVP_PKEY_new_mac_key(int type, ENGINE* e, + const unsigned char* key, int keylen) +{ + WOLFSSL_EVP_PKEY* pkey; + + (void)e; + + if (type != EVP_PKEY_HMAC || (key == NULL && keylen != 0)) + return NULL; + + pkey = wolfSSL_EVP_PKEY_new(); + if (pkey != NULL) { + pkey->pkey.ptr = (char*)XMALLOC(keylen, NULL, DYNAMIC_TYPE_PUBLIC_KEY); + if (pkey->pkey.ptr == NULL && keylen > 0) { + wolfSSL_EVP_PKEY_free(pkey); + pkey = NULL; + } + else { + XMEMCPY(pkey->pkey.ptr, key, keylen); + pkey->pkey_sz = keylen; + pkey->type = pkey->save_type = type; + } + } + + return pkey; +} + + +const unsigned char* wolfSSL_EVP_PKEY_get0_hmac(const WOLFSSL_EVP_PKEY* pkey, + size_t* len) +{ + if (pkey == NULL || len == NULL) + return NULL; + + *len = (size_t)pkey->pkey_sz; + + return (const unsigned char*)pkey->pkey.ptr; +} + + +/* Initialize an EVP_DigestSign/Verify operation. + * Initialize a digest for RSA and ECC keys, or HMAC for HMAC key. + */ +static int wolfSSL_evp_digest_pk_init(WOLFSSL_EVP_MD_CTX *ctx, + WOLFSSL_EVP_PKEY_CTX **pctx, + const WOLFSSL_EVP_MD *type, + WOLFSSL_ENGINE *e, + WOLFSSL_EVP_PKEY *pkey) +{ + if (pkey->type == EVP_PKEY_HMAC) { + int hashType; + const unsigned char* key; + size_t keySz; + + if (XSTRNCMP(type, "SHA256", 6) == 0) { + hashType = WC_SHA256; + } + #ifdef WOLFSSL_SHA224 + else if (XSTRNCMP(type, "SHA224", 6) == 0) { + hashType = WC_SHA224; + } + #endif + #ifdef WOLFSSL_SHA384 + else if (XSTRNCMP(type, "SHA384", 6) == 0) { + hashType = WC_SHA384; + } + #endif + #ifdef WOLFSSL_SHA512 + else if (XSTRNCMP(type, "SHA512", 6) == 0) { + hashType = WC_SHA512; + } + #endif + #ifndef NO_MD5 + else if (XSTRNCMP(type, "MD5", 3) == 0) { + hashType = WC_MD5; + } + #endif + #ifndef NO_SHA + /* has to be last since would pick or 224, 256, 384, or 512 too */ + else if (XSTRNCMP(type, "SHA", 3) == 0) { + hashType = WC_SHA; + } + #endif /* NO_SHA */ + else + return BAD_FUNC_ARG; + + key = wolfSSL_EVP_PKEY_get0_hmac(pkey, &keySz); + + if (wc_HmacInit(&ctx->hash.hmac, NULL, INVALID_DEVID) != 0) + return WOLFSSL_FAILURE; + + if (wc_HmacSetKey(&ctx->hash.hmac, hashType, key, (word32)keySz) != 0) + return WOLFSSL_FAILURE; + + ctx->macType = NID_hmac; + } + else { + int ret; + + if (ctx->pctx == NULL) { + ctx->pctx = wolfSSL_EVP_PKEY_CTX_new(pkey, e); + if (ctx->pctx == NULL) + return WOLFSSL_FAILURE; + } + + ret = wolfSSL_EVP_DigestInit(ctx, type); + if (ret == WOLFSSL_SUCCESS && pctx != NULL) + *pctx = ctx->pctx; + return ret; + } + + return WOLFSSL_SUCCESS; +} + +/* Update an EVP_DigestSign/Verify operation. + * Update a digest for RSA and ECC keys, or HMAC for HMAC key. + */ +static int wolfssl_evp_digest_pk_update(WOLFSSL_EVP_MD_CTX *ctx, + const void *d, unsigned int cnt) +{ + if (ctx->pctx == NULL) { + if (ctx->macType != NID_hmac) + return WOLFSSL_FAILURE; + + if (wc_HmacUpdate(&ctx->hash.hmac, (const byte *)d, cnt) != 0) + return WOLFSSL_FAILURE; + + return WOLFSSL_SUCCESS; + } + else + return wolfSSL_EVP_DigestUpdate(ctx, d, cnt); +} + +/* Finalize an EVP_DigestSign/Verify operation - common part only. + * Finalize a digest for RSA and ECC keys, or HMAC for HMAC key. + * Copies the digest so that you can keep updating. + */ +static int wolfssl_evp_digest_pk_final(WOLFSSL_EVP_MD_CTX *ctx, + unsigned char *md, unsigned int* mdlen) +{ + int ret; + + if (ctx->pctx == NULL) { + Hmac hmacCopy; + + if (ctx->macType != NID_hmac) + return WOLFSSL_FAILURE; + + if (wolfSSL_HmacCopy(&hmacCopy, &ctx->hash.hmac) != WOLFSSL_SUCCESS) + return WOLFSSL_FAILURE; + ret = wc_HmacFinal(&hmacCopy, md) == 0; + wc_HmacFree(&hmacCopy); + return ret; + } + else { + WOLFSSL_EVP_MD_CTX ctxCopy; + + if (wolfSSL_EVP_MD_CTX_copy_ex(&ctxCopy, ctx) != WOLFSSL_SUCCESS) + return WOLFSSL_FAILURE; + + ret = wolfSSL_EVP_DigestFinal(&ctxCopy, md, mdlen); + wolfSSL_EVP_MD_CTX_cleanup(&ctxCopy); + return ret; + } +} + +/* Get the length of the mac based on the digest algorithm. */ +static int wolfssl_mac_len(unsigned char macType) +{ + int hashLen; + + switch (macType) { + #ifndef NO_MD5 + case WC_MD5: + hashLen = WC_MD5_DIGEST_SIZE; + break; + #endif /* !NO_MD5 */ + + #ifndef NO_SHA + case WC_SHA: + hashLen = WC_SHA_DIGEST_SIZE; + break; + #endif /* !NO_SHA */ + + #ifdef WOLFSSL_SHA224 + case WC_SHA224: + hashLen = WC_SHA224_DIGEST_SIZE; + break; + #endif /* WOLFSSL_SHA224 */ + + #ifndef NO_SHA256 + case WC_SHA256: + hashLen = WC_SHA256_DIGEST_SIZE; + break; + #endif /* !NO_SHA256 */ + + #ifdef WOLFSSL_SHA384 + case WC_SHA384: + hashLen = WC_SHA384_DIGEST_SIZE; + break; + #endif /* WOLFSSL_SHA384 */ + #ifdef WOLFSSL_SHA512 + case WC_SHA512: + hashLen = WC_SHA512_DIGEST_SIZE; + break; + #endif /* WOLFSSL_SHA512 */ + + #ifdef HAVE_BLAKE2 + case BLAKE2B_ID: + hashLen = BLAKE2B_OUTBYTES; + break; + #endif /* HAVE_BLAKE2 */ + + default: + hashLen = 0; + } + + return hashLen; +} + +int wolfSSL_EVP_DigestSignInit(WOLFSSL_EVP_MD_CTX *ctx, + WOLFSSL_EVP_PKEY_CTX **pctx, + const WOLFSSL_EVP_MD *type, + WOLFSSL_ENGINE *e, + WOLFSSL_EVP_PKEY *pkey) +{ + WOLFSSL_ENTER("EVP_DigestSignInit"); + + if (ctx == NULL || type == NULL || pkey == NULL) + return BAD_FUNC_ARG; + + return wolfSSL_evp_digest_pk_init(ctx, pctx, type, e, pkey); +} + + +int wolfSSL_EVP_DigestSignUpdate(WOLFSSL_EVP_MD_CTX *ctx, const void *d, + unsigned int cnt) +{ + WOLFSSL_ENTER("EVP_DigestSignUpdate"); + + if (ctx == NULL || d == NULL) + return BAD_FUNC_ARG; + + return wolfssl_evp_digest_pk_update(ctx, d, cnt); +} + +int wolfSSL_EVP_DigestSignFinal(WOLFSSL_EVP_MD_CTX *ctx, unsigned char *sig, + size_t *siglen) +{ + unsigned char digest[WC_MAX_DIGEST_SIZE]; + unsigned int hashLen; + int ret = WOLFSSL_FAILURE; + + WOLFSSL_ENTER("EVP_DigestSignFinal"); + + if (ctx == NULL || siglen == NULL) + return WOLFSSL_FAILURE; + + /* Return the maximum size of the signaure when sig is NULL. */ + if (ctx->pctx == NULL) { + if (ctx->macType != NID_hmac) + return WOLFSSL_FAILURE; + + hashLen = wolfssl_mac_len(ctx->hash.hmac.macType); + + if (sig == NULL) { + *siglen = hashLen; + return WOLFSSL_SUCCESS; + } + } +#ifndef NO_RSA + else if (ctx->pctx->pkey->type == EVP_PKEY_RSA) { + if (sig == NULL) { + *siglen = wolfSSL_RSA_size(ctx->pctx->pkey->rsa); + return WOLFSSL_SUCCESS; + } + } +#endif /* !NO_RSA */ +#ifdef HAVE_ECC + else if (ctx->pctx->pkey->type == EVP_PKEY_EC) { + if (sig == NULL) { + /* SEQ + INT + INT */ + *siglen = ecc_sets[ctx->pctx->pkey->ecc->group->curve_idx].size * 2 + + 8; + return WOLFSSL_SUCCESS; + } + } +#endif + + if (wolfssl_evp_digest_pk_final(ctx, digest, &hashLen) <= 0) + return WOLFSSL_FAILURE; + + if (ctx->pctx == NULL) { + /* Copy the HMAC result as signature. */ + if ((unsigned int)(*siglen) > hashLen) + *siglen = hashLen; + /* May be a truncated signature. */ + + XMEMCPY(sig, digest, *siglen); + ret = WOLFSSL_SUCCESS; + } + else { + /* Sign the digest. */ + switch (ctx->pctx->pkey->type) { + #if !defined(NO_RSA) && !defined(HAVE_USER_RSA) + case EVP_PKEY_RSA: { + unsigned int sigSz; + int nid = wolfSSL_EVP_MD_type(wolfSSL_EVP_MD_CTX_md(ctx)); + if (nid < 0) + break; + ret = wolfSSL_RSA_sign(nid, digest, hashLen, sig, &sigSz, + ctx->pctx->pkey->rsa); + if (ret >= 0) + *siglen = sigSz; + break; + } + #endif /* NO_RSA */ + + #ifdef HAVE_ECC + case EVP_PKEY_EC: { + WOLFSSL_ECDSA_SIG *ecdsaSig; + ecdsaSig = wolfSSL_ECDSA_do_sign(digest, hashLen, + ctx->pctx->pkey->ecc); + if (ecdsaSig == NULL) + break; + *siglen = wolfSSL_i2d_ECDSA_SIG(ecdsaSig, &sig); + wolfSSL_ECDSA_SIG_free(ecdsaSig); + ret = WOLFSSL_SUCCESS; + break; + } + #endif + default: + break; + } + } + + ForceZero(digest, sizeof(digest)); + return ret; +} +int wolfSSL_EVP_DigestVerifyInit(WOLFSSL_EVP_MD_CTX *ctx, + WOLFSSL_EVP_PKEY_CTX **pctx, + const WOLFSSL_EVP_MD *type, + WOLFSSL_ENGINE *e, + WOLFSSL_EVP_PKEY *pkey) +{ + WOLFSSL_ENTER("EVP_DigestVerifyInit"); + + if (ctx == NULL || type == NULL || pkey == NULL) + return BAD_FUNC_ARG; + + return wolfSSL_evp_digest_pk_init(ctx, pctx, type, e, pkey); +} + + +int wolfSSL_EVP_DigestVerifyUpdate(WOLFSSL_EVP_MD_CTX *ctx, const void *d, + size_t cnt) +{ + WOLFSSL_ENTER("EVP_DigestVerifyUpdate"); + + if (ctx == NULL || d == NULL) + return BAD_FUNC_ARG; + + return wolfssl_evp_digest_pk_update(ctx, d, (unsigned int)cnt); +} + + +int wolfSSL_EVP_DigestVerifyFinal(WOLFSSL_EVP_MD_CTX *ctx, + const unsigned char *sig, size_t siglen) +{ + unsigned char digest[WC_MAX_DIGEST_SIZE]; + unsigned int hashLen; + + WOLFSSL_ENTER("EVP_DigestVerifyFinal"); + + if (ctx == NULL || sig == NULL) + return WOLFSSL_FAILURE; + + if (ctx->pctx == NULL) { + if (ctx->macType != NID_hmac) + return WOLFSSL_FAILURE; + + hashLen = wolfssl_mac_len(ctx->hash.hmac.macType); + + if (siglen > hashLen) + return WOLFSSL_FAILURE; + /* May be a truncated signature. */ + } + + if (wolfssl_evp_digest_pk_final(ctx, digest, &hashLen) <= 0) + return WOLFSSL_FAILURE; + + if (ctx->pctx == NULL) { + /* Check HMAC result matches the signature. */ + if (XMEMCMP(sig, digest, siglen) == 0) + return WOLFSSL_SUCCESS; + return WOLFSSL_FAILURE; + } + else { + /* Verify the signature with the digest. */ + switch (ctx->pctx->pkey->type) { + #if !defined(NO_RSA) && !defined(HAVE_USER_RSA) + case EVP_PKEY_RSA: { + int nid = wolfSSL_EVP_MD_type(wolfSSL_EVP_MD_CTX_md(ctx)); + if (nid < 0) + return WOLFSSL_FAILURE; + return wolfSSL_RSA_verify(nid, digest, hashLen, sig, + (unsigned int)siglen, + ctx->pctx->pkey->rsa); + } + #endif /* NO_RSA */ + + #ifdef HAVE_ECC + case EVP_PKEY_EC: { + int ret; + WOLFSSL_ECDSA_SIG *ecdsaSig; + ecdsaSig = wolfSSL_d2i_ECDSA_SIG(NULL, &sig, (long)siglen); + if (ecdsaSig == NULL) + return WOLFSSL_FAILURE; + ret = wolfSSL_ECDSA_do_verify(digest, hashLen, ecdsaSig, + ctx->pctx->pkey->ecc); + wolfSSL_ECDSA_SIG_free(ecdsaSig); + return ret; + } + #endif + default: + break; + } + } + + return WOLFSSL_FAILURE; +} + + +#ifdef WOLFSSL_APACHE_HTTPD +#if !defined(USE_WINDOWS_API) && !defined(MICROCHIP_PIC32) + #include +#endif + +#ifndef XGETPASSWD + static int XGETPASSWD(char* buf, int bufSz) { + int ret = WOLFSSL_SUCCESS; + + /* turn off echo for passwords */ + #ifdef USE_WINDOWS_API + DWORD originalTerm; + DWORD newTerm; + CONSOLE_SCREEN_BUFFER_INFO screenOrig; + HANDLE stdinHandle = GetStdHandle(STD_INPUT_HANDLE); + if (GetConsoleMode(stdinHandle, &originalTerm) == 0) { + WOLFSSL_MSG("Couldn't get the original terminal settings"); + return WOLFSSL_FAILURE; + } + newTerm = originalTerm; + newTerm &= ~ENABLE_ECHO_INPUT; + if (SetConsoleMode(stdinHandle, newTerm) == 0) { + WOLFSSL_MSG("Couldn't turn off echo"); + return WOLFSSL_FAILURE; + } + #else + struct termios originalTerm; + struct termios newTerm; + if (tcgetattr(STDIN_FILENO, &originalTerm) != 0) { + WOLFSSL_MSG("Couldn't get the original terminal settings"); + return WOLFSSL_FAILURE; + } + XMEMCPY(&newTerm, &originalTerm, sizeof(struct termios)); + + newTerm.c_lflag &= ~ECHO; + newTerm.c_lflag |= (ICANON | ECHONL); + if (tcsetattr(STDIN_FILENO, TCSANOW, &newTerm) != 0) { + WOLFSSL_MSG("Couldn't turn off echo"); + return WOLFSSL_FAILURE; + } + #endif + + if (XFGETS(buf, bufSz, stdin) == NULL) { + ret = WOLFSSL_FAILURE; + } + + /* restore default echo */ + #ifdef USE_WINDOWS_API + if (SetConsoleMode(stdinHandle, originalTerm) == 0) { + WOLFSSL_MSG("Couldn't restore the terminal settings"); + return WOLFSSL_FAILURE; + } + #else + if (tcsetattr(STDIN_FILENO, TCSANOW, &originalTerm) != 0) { + WOLFSSL_MSG("Couldn't restore the terminal settings"); + return WOLFSSL_FAILURE; + } + #endif + return ret; + } +#endif + +/* returns 0 on success and -2 or -1 on failure */ +int wolfSSL_EVP_read_pw_string(char* buf, int bufSz, const char* banner, int v) +{ + printf("%s", banner); + if (XGETPASSWD(buf, bufSz) == WOLFSSL_FAILURE) { + return -1; + } + (void)v; /* fgets always sanity checks size of input vs buffer */ + return 0; +} +#endif /* WOLFSSL_APACHE_HTTPD */ + +#if !defined(NO_PWDBASED) && !defined(NO_SHA) +int wolfSSL_PKCS5_PBKDF2_HMAC_SHA1(const char *pass, int passlen, + const unsigned char *salt, + int saltlen, int iter, + int keylen, unsigned char *out) +{ + const char *nostring = ""; + int ret = 0; + + if (pass == NULL) { + passlen = 0; + pass = nostring; + } + else if (passlen == -1) { + passlen = (int)XSTRLEN(pass); + } + + ret = wc_PBKDF2((byte*)out, (byte*)pass, passlen, (byte*)salt, saltlen, + iter, keylen, WC_SHA); + if (ret == 0) + return WOLFSSL_SUCCESS; + else + return WOLFSSL_FAILURE; +} +#endif /* !NO_PWDBASED !NO_SHA*/ + +#if !defined(NO_PWDBASED) +WOLFSSL_API int wolfSSL_PKCS5_PBKDF2_HMAC(const char *pass, int passlen, + const unsigned char *salt, + int saltlen, int iter, + const WOLFSSL_EVP_MD *digest, + int keylen, unsigned char *out) +{ + const char *nostring = ""; + int ret = 0; + + if (pass == NULL) { + passlen = 0; + pass = nostring; + } else if (passlen == -1) { + passlen = (int)XSTRLEN(pass); + } + + ret = wc_PBKDF2((byte*)out, (byte*)pass, passlen, (byte*)salt, saltlen, + iter, keylen, wolfSSL_EVP_md2macType(digest)); + if (ret == 0) + return WOLFSSL_SUCCESS; + else + return WOLFSSL_FAILURE; +} +#endif /* !NO_PWDBASED */ + +static const struct cipher{ + unsigned char type; + const char *name; + int nid; +} cipher_tbl[] = { + +#ifndef NO_AES + #ifdef WOLFSSL_AES_128 + {AES_128_CBC_TYPE, "AES-128-CBC", NID_aes_128_cbc}, + #endif + #ifdef WOLFSSL_AES_192 + {AES_192_CBC_TYPE, "AES-192-CBC", NID_aes_192_cbc}, + #endif + #ifdef WOLFSSL_AES_256 + {AES_256_CBC_TYPE, "AES-256-CBC", NID_aes_256_cbc}, + #endif + + #ifdef WOLFSSL_AES_128 + {AES_128_CFB1_TYPE, "AES-128-CFB1", NID_aes_128_cfb1}, + #endif + #ifdef WOLFSSL_AES_192 + {AES_192_CFB1_TYPE, "AES-192-CFB1", NID_aes_192_cfb1}, + #endif + #ifdef WOLFSSL_AES_256 + {AES_256_CFB1_TYPE, "AES-256-CFB1", NID_aes_256_cfb1}, + #endif + + #ifdef WOLFSSL_AES_128 + {AES_128_CFB8_TYPE, "AES-128-CFB8", NID_aes_128_cfb8}, + #endif + #ifdef WOLFSSL_AES_192 + {AES_192_CFB8_TYPE, "AES-192-CFB8", NID_aes_192_cfb8}, + #endif + #ifdef WOLFSSL_AES_256 + {AES_256_CFB8_TYPE, "AES-256-CFB8", NID_aes_256_cfb8}, + #endif + + #ifdef WOLFSSL_AES_128 + {AES_128_CFB128_TYPE, "AES-128-CFB128", NID_aes_128_cfb128}, + #endif + #ifdef WOLFSSL_AES_192 + {AES_192_CFB128_TYPE, "AES-192-CFB128", NID_aes_192_cfb128}, + #endif + #ifdef WOLFSSL_AES_256 + {AES_256_CFB128_TYPE, "AES-256-CFB128", NID_aes_256_cfb128}, + #endif + + #ifdef WOLFSSL_AES_128 + {AES_128_OFB_TYPE, "AES-128-OFB", NID_aes_128_ofb}, + #endif + #ifdef WOLFSSL_AES_192 + {AES_192_OFB_TYPE, "AES-192-OFB", NID_aes_192_ofb}, + #endif + #ifdef WOLFSSL_AES_256 + {AES_256_OFB_TYPE, "AES-256-OFB", NID_aes_256_ofb}, + #endif + + #ifdef WOLFSSL_AES_128 + {AES_128_XTS_TYPE, "AES-128-XTS", NID_aes_128_xts}, + #endif + #ifdef WOLFSSL_AES_256 + {AES_256_XTS_TYPE, "AES-256-XTS", NID_aes_256_xts}, + #endif + + #ifdef WOLFSSL_AES_128 + {AES_128_GCM_TYPE, "AES-128-GCM", NID_aes_128_gcm}, + #endif + #ifdef WOLFSSL_AES_192 + {AES_192_GCM_TYPE, "AES-192-GCM", NID_aes_192_gcm}, + #endif + #ifdef WOLFSSL_AES_256 + {AES_256_GCM_TYPE, "AES-256-GCM", NID_aes_256_gcm}, + #endif + #ifdef WOLFSSL_AES_128 + {AES_128_CTR_TYPE, "AES-128-CTR", NID_aes_128_ctr}, + #endif + #ifdef WOLFSSL_AES_192 + {AES_192_CTR_TYPE, "AES-192-CTR", NID_aes_192_ctr}, + #endif + #ifdef WOLFSSL_AES_256 + {AES_256_CTR_TYPE, "AES-256-CTR", NID_aes_256_ctr}, + #endif + + #ifdef WOLFSSL_AES_128 + {AES_128_ECB_TYPE, "AES-128-ECB", NID_aes_128_ecb}, + #endif + #ifdef WOLFSSL_AES_192 + {AES_192_ECB_TYPE, "AES-192-ECB", NID_aes_192_ecb}, + #endif + #ifdef WOLFSSL_AES_256 + {AES_256_ECB_TYPE, "AES-256-ECB", NID_aes_256_ecb}, + #endif + +#endif + +#ifndef NO_DES3 + {DES_CBC_TYPE, "DES-CBC", NID_des_cbc}, + {DES_ECB_TYPE, "DES-ECB", NID_des_ecb}, + + {DES_EDE3_CBC_TYPE, "DES-EDE3-CBC", NID_des_ede3_cbc}, + {DES_EDE3_ECB_TYPE, "DES-EDE3-ECB", NID_des_ede3_ecb}, +#endif + +#ifndef NO_RC4 + {ARC4_TYPE, "ARC4", NID_undef}, +#endif + +#ifdef HAVE_IDEA + {IDEA_CBC_TYPE, "IDEA-CBC", NID_idea_cbc}, +#endif + { 0, NULL, 0} +}; + +/* returns cipher using provided ctx type */ +const WOLFSSL_EVP_CIPHER *wolfSSL_EVP_CIPHER_CTX_cipher( + const WOLFSSL_EVP_CIPHER_CTX *ctx) +{ + const struct cipher* c; + + if (!ctx || !ctx->cipherType) { + return NULL; + } + + for (c = cipher_tbl; c->type != 0; c++) { + if (ctx->cipherType == c->type) { + return wolfSSL_EVP_get_cipherbyname(c->name); + } + } + + return NULL; +} + +int wolfSSL_EVP_CIPHER_nid(const WOLFSSL_EVP_CIPHER *cipher) +{ + const struct cipher* c; + + if (!cipher) { + return 0; + } + + for (c = cipher_tbl; c->type != 0; c++) { + if (XSTRNCMP(cipher, c->name, XSTRLEN(c->name)+1) == 0) { + return c->nid; + } + } + + return 0; +} + +const WOLFSSL_EVP_CIPHER *wolfSSL_EVP_get_cipherbyname(const char *name) +{ + + static const struct alias { + const char *name; + const char *alias; + } alias_tbl[] = + { +#ifndef NO_DES3 + {"DES-CBC", "DES"}, + {"DES-CBC", "des"}, + {"DES-ECB", "DES-ECB"}, + {"DES-ECB", "des-ecb"}, + {"DES-EDE3-CBC", "DES3"}, + {"DES-EDE3-CBC", "des3"}, + {"DES-EDE3-ECB", "DES-EDE3"}, + {"DES-EDE3-ECB", "des-ede3"}, + {"DES-EDE3-ECB", "des-ede3-ecb"}, +#endif +#ifdef HAVE_IDEA + {"IDEA-CBC", "IDEA"}, + {"IDEA-CBC", "idea"}, +#endif +#ifndef NO_AES + #ifdef HAVE_AES_CBC + #ifdef WOLFSSL_AES_128 + {"AES-128-CBC", "AES128-CBC"}, + {"AES-128-CBC", "aes128-cbc"}, + #endif + #ifdef WOLFSSL_AES_192 + {"AES-192-CBC", "AES192-CBC"}, + {"AES-192-CBC", "aes192-cbc"}, + #endif + #ifdef WOLFSSL_AES_256 + {"AES-256-CBC", "AES256-CBC"}, + {"AES-256-CBC", "aes256-cbc"}, + #endif + #endif + #ifdef WOLFSSL_AES_128 + {"AES-128-ECB", "AES128-ECB"}, + {"AES-128-ECB", "aes128-ecb"}, + #endif + #ifdef WOLFSSL_AES_192 + {"AES-192-ECB", "AES192-ECB"}, + {"AES-192-ECB", "aes192-ecb"}, + #endif + #ifdef WOLFSSL_AES_256 + {"AES-256-ECB", "AES256-ECB"}, + #endif + #ifdef HAVE_AESGCM + #ifdef WOLFSSL_AES_128 + {"AES-128-GCM", "aes-128-gcm"}, + {"AES-128-GCM", "id-aes128-GCM"}, + #endif + #ifdef WOLFSSL_AES_192 + {"AES-192-GCM", "aes-192-gcm"}, + {"AES-192-GCM", "id-aes192-GCM"}, + #endif + #ifdef WOLFSSL_AES_256 + {"AES-256-GCM", "aes-256-gcm"}, + {"AES-256-GCM", "id-aes256-GCM"}, + #endif + #endif +#endif +#ifndef NO_RC4 + {"ARC4", "RC4"}, +#endif + { NULL, NULL} + }; + + const struct cipher *ent; + const struct alias *al; + + WOLFSSL_ENTER("EVP_get_cipherbyname"); + + for( al = alias_tbl; al->name != NULL; al++) + if(XSTRNCMP(name, al->alias, XSTRLEN(al->alias)+1) == 0) { + name = al->name; + break; + } + + for( ent = cipher_tbl; ent->name != NULL; ent++) + if(XSTRNCMP(name, ent->name, XSTRLEN(ent->name)+1) == 0) { + return (WOLFSSL_EVP_CIPHER *)ent->name; + } + + return NULL; +} + +/* + * return an EVP_CIPHER structure when cipher NID is passed. + * + * id cipher NID + * + * return WOLFSSL_EVP_CIPHER +*/ +const WOLFSSL_EVP_CIPHER *wolfSSL_EVP_get_cipherbynid(int id) +{ + WOLFSSL_ENTER("EVP_get_cipherbynid"); + + switch(id) { + +#ifndef NO_AES + #ifdef HAVE_AES_CBC + #ifdef WOLFSSL_AES_128 + case NID_aes_128_cbc: + return wolfSSL_EVP_aes_128_cbc(); + #endif + #ifdef WOLFSSL_AES_192 + case NID_aes_192_cbc: + return wolfSSL_EVP_aes_192_cbc(); + #endif + #ifdef WOLFSSL_AES_256 + case NID_aes_256_cbc: + return wolfSSL_EVP_aes_256_cbc(); + #endif + #endif + #ifdef WOLFSSL_AES_COUNTER + #ifdef WOLFSSL_AES_128 + case NID_aes_128_ctr: + return wolfSSL_EVP_aes_128_ctr(); + #endif + #ifdef WOLFSSL_AES_192 + case NID_aes_192_ctr: + return wolfSSL_EVP_aes_192_ctr(); + #endif + #ifdef WOLFSSL_AES_256 + case NID_aes_256_ctr: + return wolfSSL_EVP_aes_256_ctr(); + #endif + #endif /* WOLFSSL_AES_COUNTER */ + #ifdef HAVE_AES_ECB + #ifdef WOLFSSL_AES_128 + case NID_aes_128_ecb: + return wolfSSL_EVP_aes_128_ecb(); + #endif + #ifdef WOLFSSL_AES_192 + case NID_aes_192_ecb: + return wolfSSL_EVP_aes_192_ecb(); + #endif + #ifdef WOLFSSL_AES_256 + case NID_aes_256_ecb: + return wolfSSL_EVP_aes_256_ecb(); + #endif + #endif /* HAVE_AES_ECB */ + #ifdef HAVE_AESGCM + #ifdef WOLFSSL_AES_128 + case NID_aes_128_gcm: + return wolfSSL_EVP_aes_128_gcm(); + #endif + #ifdef WOLFSSL_AES_192 + case NID_aes_192_gcm: + return wolfSSL_EVP_aes_192_gcm(); + #endif + #ifdef WOLFSSL_AES_256 + case NID_aes_256_gcm: + return wolfSSL_EVP_aes_256_gcm(); + #endif + #endif +#endif + +#ifndef NO_DES3 + case NID_des_cbc: + return wolfSSL_EVP_des_cbc(); +#ifdef WOLFSSL_DES_ECB + case NID_des_ecb: + return wolfSSL_EVP_des_ecb(); +#endif + case NID_des_ede3_cbc: + return wolfSSL_EVP_des_ede3_cbc(); +#ifdef WOLFSSL_DES_ECB + case NID_des_ede3_ecb: + return wolfSSL_EVP_des_ede3_ecb(); +#endif +#endif /*NO_DES3*/ + +#ifdef HAVE_IDEA + case NID_idea_cbc: + return wolfSSL_EVP_idea_cbc(); +#endif + + default: + WOLFSSL_MSG("Bad cipher id value"); + } + + return NULL; +} + +void wolfSSL_EVP_init(void) +{ +#ifndef NO_AES + #ifdef HAVE_AES_CBC + #ifdef WOLFSSL_AES_128 + EVP_AES_128_CBC = (char *)EVP_get_cipherbyname("AES-128-CBC"); + #endif + #ifdef WOLFSSL_AES_192 + EVP_AES_192_CBC = (char *)EVP_get_cipherbyname("AES-192-CBC"); + #endif + #ifdef WOLFSSL_AES_256 + EVP_AES_256_CBC = (char *)EVP_get_cipherbyname("AES-256-CBC"); + #endif + #endif /* HAVE_AES_CBC */ + + #ifdef WOLFSSL_AES_CFB + #ifdef WOLFSSL_AES_128 + EVP_AES_128_CFB1 = (char *)EVP_get_cipherbyname("AES-128-CFB1"); + #endif + + #ifdef WOLFSSL_AES_192 + EVP_AES_192_CFB1 = (char *)EVP_get_cipherbyname("AES-192-CFB1"); + #endif + + #ifdef WOLFSSL_AES_256 + EVP_AES_256_CFB1 = (char *)EVP_get_cipherbyname("AES-256-CFB1"); + #endif + + #ifdef WOLFSSL_AES_128 + EVP_AES_128_CFB8 = (char *)EVP_get_cipherbyname("AES-128-CFB8"); + #endif + + #ifdef WOLFSSL_AES_192 + EVP_AES_192_CFB8 = (char *)EVP_get_cipherbyname("AES-192-CFB8"); + #endif + + #ifdef WOLFSSL_AES_256 + EVP_AES_256_CFB8 = (char *)EVP_get_cipherbyname("AES-256-CFB8"); + #endif + + #ifdef WOLFSSL_AES_128 + EVP_AES_128_CFB128 = (char *)EVP_get_cipherbyname("AES-128-CFB128"); + #endif + + #ifdef WOLFSSL_AES_192 + EVP_AES_192_CFB128 = (char *)EVP_get_cipherbyname("AES-192-CFB128"); + #endif + + #ifdef WOLFSSL_AES_256 + EVP_AES_256_CFB128 = (char *)EVP_get_cipherbyname("AES-256-CFB128"); + #endif + #endif /* WOLFSSL_AES_CFB */ + + #ifdef WOLFSSL_AES_OFB + #ifdef WOLFSSL_AES_128 + EVP_AES_128_OFB = (char *)EVP_get_cipherbyname("AES-128-OFB"); + #endif + + #ifdef WOLFSSL_AES_192 + EVP_AES_192_OFB = (char *)EVP_get_cipherbyname("AES-192-OFB"); + #endif + + #ifdef WOLFSSL_AES_256 + EVP_AES_256_OFB = (char *)EVP_get_cipherbyname("AES-256-OFB"); + #endif + #endif /* WOLFSSL_AES_OFB */ + + #ifdef WOLFSSL_AES_XTS + #ifdef WOLFSSL_AES_128 + EVP_AES_128_XTS = (char *)EVP_get_cipherbyname("AES-128-XTS"); + #endif + + #ifdef WOLFSSL_AES_256 + EVP_AES_256_XTS = (char *)EVP_get_cipherbyname("AES-256-XTS"); + #endif + #endif /* WOLFSSL_AES_XTS */ + + #ifdef HAVE_AESGCM + #ifdef WOLFSSL_AES_128 + EVP_AES_128_GCM = (char *)EVP_get_cipherbyname("AES-128-GCM"); + #endif + #ifdef WOLFSSL_AES_192 + EVP_AES_192_GCM = (char *)EVP_get_cipherbyname("AES-192-GCM"); + #endif + #ifdef WOLFSSL_AES_256 + EVP_AES_256_GCM = (char *)EVP_get_cipherbyname("AES-256-GCM"); + #endif + #endif /* HAVE_AESGCM*/ + #ifdef WOLFSSL_AES_128 + EVP_AES_128_CTR = (char *)EVP_get_cipherbyname("AES-128-CTR"); + #endif + #ifdef WOLFSSL_AES_192 + EVP_AES_192_CTR = (char *)EVP_get_cipherbyname("AES-192-CTR"); + #endif + #ifdef WOLFSSL_AES_256 + EVP_AES_256_CTR = (char *)EVP_get_cipherbyname("AES-256-CTR"); + #endif + + #ifdef WOLFSSL_AES_128 + EVP_AES_128_ECB = (char *)EVP_get_cipherbyname("AES-128-ECB"); + #endif + #ifdef WOLFSSL_AES_192 + EVP_AES_192_ECB = (char *)EVP_get_cipherbyname("AES-192-ECB"); + #endif + #ifdef WOLFSSL_AES_256 + EVP_AES_256_ECB = (char *)EVP_get_cipherbyname("AES-256-ECB"); + #endif +#endif /* ifndef NO_AES*/ + +#ifndef NO_DES3 + EVP_DES_CBC = (char *)EVP_get_cipherbyname("DES-CBC"); + EVP_DES_ECB = (char *)EVP_get_cipherbyname("DES-ECB"); + + EVP_DES_EDE3_CBC = (char *)EVP_get_cipherbyname("DES-EDE3-CBC"); + EVP_DES_EDE3_ECB = (char *)EVP_get_cipherbyname("DES-EDE3-ECB"); +#endif + +#ifdef HAVE_IDEA + EVP_IDEA_CBC = (char *)EVP_get_cipherbyname("IDEA-CBC"); +#endif +} + +#if !defined(NO_PWDBASED) +int wolfSSL_EVP_get_hashinfo(const WOLFSSL_EVP_MD* evp, + int* pHash, int* pHashSz) +{ + enum wc_HashType hash = WC_HASH_TYPE_NONE; + int hashSz; + + if (XSTRLEN(evp) < 3) { + /* do not try comparing strings if size is too small */ + return WOLFSSL_FAILURE; + } + + if (XSTRNCMP("SHA", evp, 3) == 0) { + if (XSTRLEN(evp) > 3) { + #ifndef NO_SHA256 + if (XSTRNCMP("SHA256", evp, 6) == 0) { + hash = WC_HASH_TYPE_SHA256; + } + else + #endif + #ifdef WOLFSSL_SHA384 + if (XSTRNCMP("SHA384", evp, 6) == 0) { + hash = WC_HASH_TYPE_SHA384; + } + else + #endif + #ifdef WOLFSSL_SHA512 + if (XSTRNCMP("SHA512", evp, 6) == 0) { + hash = WC_HASH_TYPE_SHA512; + } + else + #endif + { + WOLFSSL_MSG("Unknown SHA hash"); + } + } + else { + hash = WC_HASH_TYPE_SHA; + } + } +#ifdef WOLFSSL_MD2 + else if (XSTRNCMP("MD2", evp, 3) == 0) { + hash = WC_HASH_TYPE_MD2; + } +#endif +#ifndef NO_MD4 + else if (XSTRNCMP("MD4", evp, 3) == 0) { + hash = WC_HASH_TYPE_MD4; + } +#endif +#ifndef NO_MD5 + else if (XSTRNCMP("MD5", evp, 3) == 0) { + hash = WC_HASH_TYPE_MD5; + } +#endif + + if (pHash) + *pHash = hash; + + hashSz = wc_HashGetDigestSize(hash); + if (pHashSz) + *pHashSz = hashSz; + + if (hashSz < 0) { + return WOLFSSL_FAILURE; + } + + return WOLFSSL_SUCCESS; +} + +/* this function makes the assumption that out buffer is big enough for digest*/ +int wolfSSL_EVP_Digest(const unsigned char* in, int inSz, unsigned char* out, + unsigned int* outSz, const WOLFSSL_EVP_MD* evp, + WOLFSSL_ENGINE* eng) +{ + int err; + int hashType = WC_HASH_TYPE_NONE; + int hashSz; + + WOLFSSL_ENTER("wolfSSL_EVP_Digest"); + if (in == NULL || out == NULL || evp == NULL) { + WOLFSSL_MSG("Null argument passed in"); + return WOLFSSL_FAILURE; + } + + err = wolfSSL_EVP_get_hashinfo(evp, &hashType, &hashSz); + if (err != WOLFSSL_SUCCESS) + return err; + + if (wc_Hash((enum wc_HashType)hashType, in, inSz, out, hashSz) != 0) { + return WOLFSSL_FAILURE; + } + + if (outSz != NULL) + *outSz = hashSz; + + (void)eng; + return WOLFSSL_SUCCESS; +} +#endif + +const WOLFSSL_EVP_MD *wolfSSL_EVP_get_digestbyname(const char *name) +{ + static const struct alias { + const char *name; + const char *alias; + } alias_tbl[] = + { + {"MD4", "ssl3-md4"}, + {"MD5", "ssl3-md5"}, + {"SHA", "ssl3-sha1"}, + {"SHA", "SHA1"}, + { NULL, NULL} + }; + + const struct alias *al; + const struct s_ent *ent; + + + for (al = alias_tbl; al->name != NULL; al++) + if(XSTRNCMP(name, al->alias, XSTRLEN(al->alias)+1) == 0) { + name = al->name; + break; + } + + for (ent = md_tbl; ent->name != NULL; ent++) + if(XSTRNCMP(name, ent->name, XSTRLEN(ent->name)+1) == 0) { + return (EVP_MD *)ent->name; + } + return NULL; +} + +int wolfSSL_EVP_MD_type(const WOLFSSL_EVP_MD *md) +{ + const struct s_ent *ent ; + WOLFSSL_ENTER("EVP_MD_type"); + for( ent = md_tbl; ent->name != NULL; ent++){ + if(XSTRNCMP((const char *)md, ent->name, XSTRLEN(ent->name)+1) == 0) { + return ent->nid; + } + } + return 0; +} + +#ifndef NO_MD4 + + /* return a pointer to MD4 EVP type */ + const WOLFSSL_EVP_MD* wolfSSL_EVP_md4(void) + { + WOLFSSL_ENTER("wolfSSL_EVP_md4"); + return EVP_get_digestbyname("MD4"); + } + +#endif /* !NO_MD4 */ + + +#ifndef NO_MD5 + + const WOLFSSL_EVP_MD* wolfSSL_EVP_md5(void) + { + WOLFSSL_ENTER("EVP_md5"); + return EVP_get_digestbyname("MD5"); + } + +#endif /* !NO_MD5 */ + + +#ifndef NO_WOLFSSL_STUB + const WOLFSSL_EVP_MD* wolfSSL_EVP_mdc2(void) + { + WOLFSSL_STUB("EVP_mdc2"); + return NULL; + } +#endif + +#ifndef NO_SHA + const WOLFSSL_EVP_MD* wolfSSL_EVP_sha1(void) + { + WOLFSSL_ENTER("EVP_sha1"); + return EVP_get_digestbyname("SHA"); + } +#endif /* NO_SHA */ + +#ifdef WOLFSSL_SHA224 + + const WOLFSSL_EVP_MD* wolfSSL_EVP_sha224(void) + { + WOLFSSL_ENTER("EVP_sha224"); + return EVP_get_digestbyname("SHA224"); + } + +#endif /* WOLFSSL_SHA224 */ + + + const WOLFSSL_EVP_MD* wolfSSL_EVP_sha256(void) + { + WOLFSSL_ENTER("EVP_sha256"); + return EVP_get_digestbyname("SHA256"); + } + +#ifdef WOLFSSL_SHA384 + + const WOLFSSL_EVP_MD* wolfSSL_EVP_sha384(void) + { + WOLFSSL_ENTER("EVP_sha384"); + return EVP_get_digestbyname("SHA384"); + } + +#endif /* WOLFSSL_SHA384 */ + +#ifdef WOLFSSL_SHA512 + + const WOLFSSL_EVP_MD* wolfSSL_EVP_sha512(void) + { + WOLFSSL_ENTER("EVP_sha512"); + return EVP_get_digestbyname("SHA512"); + } + +#endif /* WOLFSSL_SHA512 */ + +#ifdef WOLFSSL_SHA3 +#ifndef WOLFSSL_NOSHA3_224 + const WOLFSSL_EVP_MD* wolfSSL_EVP_sha3_224(void) + { + WOLFSSL_ENTER("EVP_sha3_224"); + return EVP_get_digestbyname("SHA3_224"); + } +#endif /* WOLFSSL_NOSHA3_224 */ + + +#ifndef WOLFSSL_NOSHA3_256 + const WOLFSSL_EVP_MD* wolfSSL_EVP_sha3_256(void) + { + WOLFSSL_ENTER("EVP_sha3_256"); + return EVP_get_digestbyname("SHA3_256"); + } +#endif /* WOLFSSL_NOSHA3_256 */ + + const WOLFSSL_EVP_MD* wolfSSL_EVP_sha3_384(void) + { + WOLFSSL_ENTER("EVP_sha3_384"); + return EVP_get_digestbyname("SHA3_384"); + } + +#ifndef WOLFSSL_NOSHA3_512 + const WOLFSSL_EVP_MD* wolfSSL_EVP_sha3_512(void) + { + WOLFSSL_ENTER("EVP_sha3_512"); + return EVP_get_digestbyname("SHA3_512"); + } +#endif /* WOLFSSL_NOSHA3_512 */ +#endif /* WOLFSSL_SHA3 */ + + WOLFSSL_EVP_MD_CTX *wolfSSL_EVP_MD_CTX_new(void) + { + WOLFSSL_EVP_MD_CTX* ctx; + WOLFSSL_ENTER("EVP_MD_CTX_new"); + ctx = (WOLFSSL_EVP_MD_CTX*)XMALLOC(sizeof *ctx, NULL, + DYNAMIC_TYPE_OPENSSL); + if (ctx){ + wolfSSL_EVP_MD_CTX_init(ctx); + } + return ctx; + } + + WOLFSSL_API void wolfSSL_EVP_MD_CTX_free(WOLFSSL_EVP_MD_CTX *ctx) + { + if (ctx) { + WOLFSSL_ENTER("EVP_MD_CTX_free"); + wolfSSL_EVP_MD_CTX_cleanup(ctx); + XFREE(ctx, NULL, DYNAMIC_TYPE_OPENSSL); + } + } + + /* returns the NID of message digest used by the ctx */ + int wolfSSL_EVP_MD_CTX_type(const WOLFSSL_EVP_MD_CTX *ctx) { + const struct s_ent *ent; + + WOLFSSL_ENTER("EVP_MD_CTX_type"); + + if (ctx) { + for(ent = md_tbl; ent->name != NULL; ent++) { + if (ctx->macType == ent->macType) { + return ent->nid; + } + } + /* Return whatever we got */ + return ctx->macType; + } + return 0; + } + + + /* returns WOLFSSL_SUCCESS on success */ + int wolfSSL_EVP_MD_CTX_copy(WOLFSSL_EVP_MD_CTX *out, const WOLFSSL_EVP_MD_CTX *in) + { + return wolfSSL_EVP_MD_CTX_copy_ex(out, in); + } + + /* returns digest size */ + int wolfSSL_EVP_MD_CTX_size(const WOLFSSL_EVP_MD_CTX *ctx) { + return(wolfSSL_EVP_MD_size(wolfSSL_EVP_MD_CTX_md(ctx))); + } + /* returns block size */ + int wolfSSL_EVP_MD_CTX_block_size(const WOLFSSL_EVP_MD_CTX *ctx) { + return(wolfSSL_EVP_MD_block_size(wolfSSL_EVP_MD_CTX_md(ctx))); + } + + /* Deep copy of EVP_MD hasher + * return WOLFSSL_SUCCESS on success */ + static int wolfSSL_EVP_MD_Copy_Hasher(WOLFSSL_EVP_MD_CTX* des, + const WOLFSSL_EVP_MD_CTX* src) + { + if (src->macType == NID_hmac) { + wolfSSL_HmacCopy(&des->hash.hmac, (Hmac*)&src->hash.hmac); + } + else { + switch (src->macType) { + #ifndef NO_MD5 + case WC_HASH_TYPE_MD5: + wc_Md5Copy((wc_Md5*)&src->hash.digest, + (wc_Md5*)&des->hash.digest); + break; + #endif /* !NO_MD5 */ + + #ifndef NO_SHA + case WC_HASH_TYPE_SHA: + wc_ShaCopy((wc_Sha*)&src->hash.digest, + (wc_Sha*)&des->hash.digest); + break; + #endif /* !NO_SHA */ + + #ifdef WOLFSSL_SHA224 + case WC_HASH_TYPE_SHA224: + wc_Sha224Copy((wc_Sha224*)&src->hash.digest, + (wc_Sha224*)&des->hash.digest); + break; + #endif /* WOLFSSL_SHA224 */ + + #ifndef NO_SHA256 + case WC_HASH_TYPE_SHA256: + wc_Sha256Copy((wc_Sha256*)&src->hash.digest, + (wc_Sha256*)&des->hash.digest); + break; + #endif /* !NO_SHA256 */ + + #ifdef WOLFSSL_SHA384 + case WC_HASH_TYPE_SHA384: + wc_Sha384Copy((wc_Sha384*)&src->hash.digest, + (wc_Sha384*)&des->hash.digest); + break; + #endif /* WOLFSSL_SHA384 */ + #ifdef WOLFSSL_SHA512 + case WC_HASH_TYPE_SHA512: + wc_Sha512Copy((wc_Sha512*)&src->hash.digest, + (wc_Sha512*)&des->hash.digest); + break; + #endif /* WOLFSSL_SHA512 */ + #ifdef WOLFSSL_SHA3 + #ifndef WOLFSSL_NOSHA3_224 + case WC_HASH_TYPE_SHA3_224: + wc_Sha3_224_Copy((wc_Sha3*)&src->hash.digest, + (wc_Sha3*)&des->hash.digest); + break; + #endif + + #ifndef WOLFSSL_NOSHA3_256 + case WC_HASH_TYPE_SHA3_256: + wc_Sha3_256_Copy((wc_Sha3*)&src->hash.digest, + (wc_Sha3*)&des->hash.digest); + break; + #endif + + case WC_HASH_TYPE_SHA3_384: + wc_Sha3_384_Copy((wc_Sha3*)&src->hash.digest, + (wc_Sha3*)&des->hash.digest); + break; + + #ifndef WOLFSSL_NOSHA3_512 + case WC_HASH_TYPE_SHA3_512: + wc_Sha3_512_Copy((wc_Sha3*)&src->hash.digest, + (wc_Sha3*)&des->hash.digest); + break; + #endif + #endif + default: + return WOLFSSL_FAILURE; + } + } + return WOLFSSL_SUCCESS; + } + + /* copies structure in to the structure out + * + * returns WOLFSSL_SUCCESS on success */ + int wolfSSL_EVP_MD_CTX_copy_ex(WOLFSSL_EVP_MD_CTX *out, const WOLFSSL_EVP_MD_CTX *in) + { + if ((out == NULL) || (in == NULL)) return WOLFSSL_FAILURE; + WOLFSSL_ENTER("EVP_CIPHER_MD_CTX_copy_ex"); + XMEMCPY(out, in, sizeof(WOLFSSL_EVP_MD_CTX)); + if (in->pctx != NULL) { + out->pctx = wolfSSL_EVP_PKEY_CTX_new(in->pctx->pkey, NULL); + if (out->pctx == NULL) + return WOLFSSL_FAILURE; + } + return wolfSSL_EVP_MD_Copy_Hasher(out, (WOLFSSL_EVP_MD_CTX*)in); + } + + void wolfSSL_EVP_MD_CTX_init(WOLFSSL_EVP_MD_CTX* ctx) + { + WOLFSSL_ENTER("EVP_CIPHER_MD_CTX_init"); + XMEMSET(ctx, 0, sizeof(WOLFSSL_EVP_MD_CTX)); + } + + const WOLFSSL_EVP_MD *wolfSSL_EVP_MD_CTX_md(const WOLFSSL_EVP_MD_CTX *ctx) + { + const struct s_ent *ent; + if (ctx == NULL) + return NULL; + WOLFSSL_ENTER("EVP_MD_CTX_md"); + for(ent = md_tbl; ent->name != NULL; ent++) { + if(ctx->macType == ent->macType) { + return (const WOLFSSL_EVP_MD *)ent->name; + } + } + return (WOLFSSL_EVP_MD *)NULL; + } + + #ifndef NO_AES + + #ifdef HAVE_AES_CBC + #ifdef WOLFSSL_AES_128 + const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_cbc(void) + { + WOLFSSL_ENTER("wolfSSL_EVP_aes_128_cbc"); + if (EVP_AES_128_CBC == NULL) + wolfSSL_EVP_init(); + return EVP_AES_128_CBC; + } + #endif /* WOLFSSL_AES_128 */ + + + #ifdef WOLFSSL_AES_192 + const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_cbc(void) + { + WOLFSSL_ENTER("wolfSSL_EVP_aes_192_cbc"); + if (EVP_AES_192_CBC == NULL) + wolfSSL_EVP_init(); + return EVP_AES_192_CBC; + } + #endif /* WOLFSSL_AES_192 */ + + + #ifdef WOLFSSL_AES_256 + const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_cbc(void) + { + WOLFSSL_ENTER("wolfSSL_EVP_aes_256_cbc"); + if (EVP_AES_256_CBC == NULL) + wolfSSL_EVP_init(); + return EVP_AES_256_CBC; + } + #endif /* WOLFSSL_AES_256 */ + #endif /* HAVE_AES_CBC */ + + #ifdef WOLFSSL_AES_CFB +#if !defined(HAVE_SELFTEST) && !defined(HAVE_FIPS) + #ifdef WOLFSSL_AES_128 + const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_cfb1(void) + { + WOLFSSL_ENTER("wolfSSL_EVP_aes_128_cfb1"); + if (EVP_AES_128_CFB1 == NULL) + wolfSSL_EVP_init(); + return EVP_AES_128_CFB1; + } + #endif /* WOLFSSL_AES_128 */ + + #ifdef WOLFSSL_AES_192 + const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_cfb1(void) + { + WOLFSSL_ENTER("wolfSSL_EVP_aes_192_cfb1"); + if (EVP_AES_192_CFB1 == NULL) + wolfSSL_EVP_init(); + return EVP_AES_192_CFB1; + } + #endif /* WOLFSSL_AES_192 */ + + #ifdef WOLFSSL_AES_256 + const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_cfb1(void) + { + WOLFSSL_ENTER("wolfSSL_EVP_aes_256_cfb1"); + if (EVP_AES_256_CFB1 == NULL) + wolfSSL_EVP_init(); + return EVP_AES_256_CFB1; + } + #endif /* WOLFSSL_AES_256 */ + + #ifdef WOLFSSL_AES_128 + const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_cfb8(void) + { + WOLFSSL_ENTER("wolfSSL_EVP_aes_128_cfb8"); + if (EVP_AES_128_CFB8 == NULL) + wolfSSL_EVP_init(); + return EVP_AES_128_CFB8; + } + #endif /* WOLFSSL_AES_128 */ + + #ifdef WOLFSSL_AES_192 + const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_cfb8(void) + { + WOLFSSL_ENTER("wolfSSL_EVP_aes_192_cfb8"); + if (EVP_AES_192_CFB8 == NULL) + wolfSSL_EVP_init(); + return EVP_AES_192_CFB8; + } + #endif /* WOLFSSL_AES_192 */ + + #ifdef WOLFSSL_AES_256 + const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_cfb8(void) + { + WOLFSSL_ENTER("wolfSSL_EVP_aes_256_cfb8"); + if (EVP_AES_256_CFB8 == NULL) + wolfSSL_EVP_init(); + return EVP_AES_256_CFB8; + } + #endif /* WOLFSSL_AES_256 */ +#endif /* !HAVE_SELFTEST && !HAVE_FIPS */ + + #ifdef WOLFSSL_AES_128 + const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_cfb128(void) + { + WOLFSSL_ENTER("wolfSSL_EVP_aes_128_cfb128"); + if (EVP_AES_128_CFB128 == NULL) + wolfSSL_EVP_init(); + return EVP_AES_128_CFB128; + } + #endif /* WOLFSSL_AES_128 */ + + #ifdef WOLFSSL_AES_192 + const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_cfb128(void) + { + WOLFSSL_ENTER("wolfSSL_EVP_aes_192_cfb128"); + if (EVP_AES_192_CFB128 == NULL) + wolfSSL_EVP_init(); + return EVP_AES_192_CFB128; + } + #endif /* WOLFSSL_AES_192 */ + + #ifdef WOLFSSL_AES_256 + const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_cfb128(void) + { + WOLFSSL_ENTER("wolfSSL_EVP_aes_256_cfb128"); + if (EVP_AES_256_CFB128 == NULL) + wolfSSL_EVP_init(); + return EVP_AES_256_CFB128; + } + #endif /* WOLFSSL_AES_256 */ + #endif /* WOLFSSL_AES_CFB */ + + #ifdef WOLFSSL_AES_OFB + #ifdef WOLFSSL_AES_128 + const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_ofb(void) + { + WOLFSSL_ENTER("wolfSSL_EVP_aes_128_ofb"); + if (EVP_AES_128_OFB == NULL) + wolfSSL_EVP_init(); + return EVP_AES_128_OFB; + } + #endif /* WOLFSSL_AES_128 */ + + #ifdef WOLFSSL_AES_192 + const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_ofb(void) + { + WOLFSSL_ENTER("wolfSSL_EVP_aes_192_ofb"); + if (EVP_AES_192_OFB == NULL) + wolfSSL_EVP_init(); + return EVP_AES_192_OFB; + } + #endif /* WOLFSSL_AES_192 */ + + #ifdef WOLFSSL_AES_256 + const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_ofb(void) + { + WOLFSSL_ENTER("wolfSSL_EVP_aes_256_ofb"); + if (EVP_AES_256_OFB == NULL) + wolfSSL_EVP_init(); + return EVP_AES_256_OFB; + } + #endif /* WOLFSSL_AES_256 */ + #endif /* WOLFSSL_AES_OFB */ + + #ifdef WOLFSSL_AES_XTS + #ifdef WOLFSSL_AES_128 + const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_xts(void) + { + WOLFSSL_ENTER("wolfSSL_EVP_aes_128_xts"); + if (EVP_AES_128_XTS == NULL) + wolfSSL_EVP_init(); + return EVP_AES_128_XTS; + } + #endif /* WOLFSSL_AES_128 */ + + #ifdef WOLFSSL_AES_256 + const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_xts(void) + { + WOLFSSL_ENTER("wolfSSL_EVP_aes_256_xts"); + if (EVP_AES_256_XTS == NULL) + wolfSSL_EVP_init(); + return EVP_AES_256_XTS; + } + #endif /* WOLFSSL_AES_256 */ + #endif /* WOLFSSL_AES_XTS */ + + #ifdef HAVE_AESGCM + #ifdef WOLFSSL_AES_128 + const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_gcm(void) + { + WOLFSSL_ENTER("wolfSSL_EVP_aes_128_gcm"); + if (EVP_AES_128_GCM == NULL) + wolfSSL_EVP_init(); + return EVP_AES_128_GCM; + } + #endif /* WOLFSSL_GCM_128 */ + + #ifdef WOLFSSL_AES_192 + const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_gcm(void) + { + WOLFSSL_ENTER("wolfSSL_EVP_aes_192_gcm"); + if (EVP_AES_192_GCM == NULL) + wolfSSL_EVP_init(); + return EVP_AES_192_GCM; + } + #endif /* WOLFSSL_AES_192 */ + + #ifdef WOLFSSL_AES_256 + const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_gcm(void) + { + WOLFSSL_ENTER("wolfSSL_EVP_aes_256_gcm"); + if (EVP_AES_256_GCM == NULL) + wolfSSL_EVP_init(); + return EVP_AES_256_GCM; + } + #endif /* WOLFSSL_AES_256 */ + #endif /* HAVE_AESGCM */ + + #ifdef WOLFSSL_AES_128 + const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_ctr(void) + { + WOLFSSL_ENTER("wolfSSL_EVP_aes_128_ctr"); + if (EVP_AES_128_CTR == NULL) + wolfSSL_EVP_init(); + return EVP_AES_128_CTR; + } + #endif /* WOLFSSL_AES_2128 */ + + + #ifdef WOLFSSL_AES_192 + const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_ctr(void) + { + WOLFSSL_ENTER("wolfSSL_EVP_aes_192_ctr"); + if (EVP_AES_192_CTR == NULL) + wolfSSL_EVP_init(); + return EVP_AES_192_CTR; + } + #endif /* WOLFSSL_AES_192 */ + + + #ifdef WOLFSSL_AES_256 + const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_ctr(void) + { + WOLFSSL_ENTER("wolfSSL_EVP_aes_256_ctr"); + if (EVP_AES_256_CTR == NULL) + wolfSSL_EVP_init(); + return EVP_AES_256_CTR; + } + #endif /* WOLFSSL_AES_256 */ + + #ifdef WOLFSSL_AES_128 + const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_ecb(void) + { + WOLFSSL_ENTER("wolfSSL_EVP_aes_128_ecb"); + if (EVP_AES_128_ECB == NULL) + wolfSSL_EVP_init(); + return EVP_AES_128_ECB; + } + #endif /* WOLFSSL_AES_128 */ + + + #ifdef WOLFSSL_AES_192 + const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_ecb(void) + { + WOLFSSL_ENTER("wolfSSL_EVP_aes_192_ecb"); + if (EVP_AES_192_ECB == NULL) + wolfSSL_EVP_init(); + return EVP_AES_192_ECB; + } + #endif /* WOLFSSL_AES_192*/ + + + #ifdef WOLFSSL_AES_256 + const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_ecb(void) + { + WOLFSSL_ENTER("wolfSSL_EVP_aes_256_ecb"); + if (EVP_AES_256_ECB == NULL) + wolfSSL_EVP_init(); + return EVP_AES_256_ECB; + } + #endif /* WOLFSSL_AES_256 */ + #endif /* NO_AES */ + +#ifndef NO_DES3 + const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_des_cbc(void) + { + WOLFSSL_ENTER("wolfSSL_EVP_des_cbc"); + if (EVP_DES_CBC == NULL) + wolfSSL_EVP_init(); + return EVP_DES_CBC; + } +#ifdef WOLFSSL_DES_ECB + const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_des_ecb(void) + { + WOLFSSL_ENTER("wolfSSL_EVP_des_ecb"); + if (EVP_DES_ECB == NULL) + wolfSSL_EVP_init(); + return EVP_DES_ECB; + } +#endif + const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_des_ede3_cbc(void) + { + WOLFSSL_ENTER("wolfSSL_EVP_des_ede3_cbc"); + if (EVP_DES_EDE3_CBC == NULL) + wolfSSL_EVP_init(); + return EVP_DES_EDE3_CBC; + } +#ifdef WOLFSSL_DES_ECB + const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_des_ede3_ecb(void) + { + WOLFSSL_ENTER("wolfSSL_EVP_des_ede3_ecb"); + if (EVP_DES_EDE3_ECB == NULL) + wolfSSL_EVP_init(); + return EVP_DES_EDE3_ECB; + } +#endif +#endif /* NO_DES3 */ + +#ifndef NO_RC4 + const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_rc4(void) + { + static const char* type = "ARC4"; + WOLFSSL_ENTER("wolfSSL_EVP_rc4"); + return type; + } +#endif + +#ifdef HAVE_IDEA + const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_idea_cbc(void) + { + WOLFSSL_ENTER("wolfSSL_EVP_idea_cbc"); + if (EVP_IDEA_CBC == NULL) + wolfSSL_EVP_init(); + return EVP_IDEA_CBC; + } +#endif + const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_enc_null(void) + { + static const char* type = "NULL"; + WOLFSSL_ENTER("wolfSSL_EVP_enc_null"); + return type; + } + + int wolfSSL_EVP_MD_CTX_cleanup(WOLFSSL_EVP_MD_CTX* ctx) + { + WOLFSSL_ENTER("EVP_MD_CTX_cleanup"); + if (ctx->pctx != NULL) + wolfSSL_EVP_PKEY_CTX_free(ctx->pctx); + + if (ctx->macType == NID_hmac) { + wc_HmacFree(&ctx->hash.hmac); + } + else { + switch (ctx->macType) { + #ifndef NO_MD5 + case WC_HASH_TYPE_MD5: + wc_Md5Free((wc_Md5*)&ctx->hash.digest); + break; + #endif /* !NO_MD5 */ + + #ifndef NO_SHA + case WC_HASH_TYPE_SHA: + wc_ShaFree((wc_Sha*)&ctx->hash.digest); + break; + #endif /* !NO_SHA */ + + #ifdef WOLFSSL_SHA224 + case WC_HASH_TYPE_SHA224: + wc_Sha224Free((wc_Sha224*)&ctx->hash.digest); + break; + #endif /* WOLFSSL_SHA224 */ + + #ifndef NO_SHA256 + case WC_HASH_TYPE_SHA256: + wc_Sha256Free((wc_Sha256*)&ctx->hash.digest); + break; + #endif /* !NO_SHA256 */ + + #ifdef WOLFSSL_SHA384 + case WC_HASH_TYPE_SHA384: + wc_Sha384Free((wc_Sha384*)&ctx->hash.digest); + break; + #endif /* WOLFSSL_SHA384 */ + #ifdef WOLFSSL_SHA512 + case WC_HASH_TYPE_SHA512: + wc_Sha512Free((wc_Sha512*)&ctx->hash.digest); + break; + #endif /* WOLFSSL_SHA512 */ + #ifdef WOLFSSL_SHA3 + #ifndef WOLFSSL_NOSHA3_224 + case WC_HASH_TYPE_SHA3_224: + wc_Sha3_224_Free((wc_Sha3*)&ctx->hash.digest); + break; + #endif + + #ifndef WOLFSSL_NOSHA3_256 + case WC_HASH_TYPE_SHA3_256: + wc_Sha3_256_Free((wc_Sha3*)&ctx->hash.digest); + break; + #endif + + case WC_HASH_TYPE_SHA3_384: + wc_Sha3_384_Free((wc_Sha3*)&ctx->hash.digest); + break; + + #ifndef WOLFSSL_NOSHA3_512 + case WC_HASH_TYPE_SHA3_512: + wc_Sha3_512_Free((wc_Sha3*)&ctx->hash.digest); + break; + #endif + #endif + default: + return WOLFSSL_FAILURE; + } + } + ForceZero(ctx, sizeof(*ctx)); + ctx->macType = WC_HASH_TYPE_NONE; + return 1; + } + + void wolfSSL_EVP_CIPHER_CTX_init(WOLFSSL_EVP_CIPHER_CTX* ctx) + { + WOLFSSL_ENTER("EVP_CIPHER_CTX_init"); + if (ctx) { + XMEMSET(ctx, 0, sizeof(WOLFSSL_EVP_CIPHER_CTX)); + ctx->cipherType = WOLFSSL_EVP_CIPH_TYPE_INIT; /* not yet initialized */ + ctx->keyLen = 0; + ctx->enc = 1; /* start in encrypt mode */ + } + } + +#if defined(HAVE_AESGCM) && !defined(HAVE_SELFTEST) + static WC_INLINE void IncCtr(byte* ctr, word32 ctrSz) + { + int i; + for (i = ctrSz-1; i >= 0; i--) { + if (++ctr[i]) + break; + } + } +#endif + + /* This function allows cipher specific parameters to be + determined and set. */ + int wolfSSL_EVP_CIPHER_CTX_ctrl(WOLFSSL_EVP_CIPHER_CTX *ctx, int type, \ + int arg, void *ptr) + { + int ret = WOLFSSL_FAILURE; +#if defined(HAVE_AESGCM) && !defined(HAVE_SELFTEST) && !defined(WC_NO_RNG) + WC_RNG rng; +#endif + if (ctx == NULL) + return WOLFSSL_FAILURE; + + (void)arg; + (void)ptr; + + WOLFSSL_ENTER("EVP_CIPHER_CTX_ctrl"); + + switch(type) { + case EVP_CTRL_INIT: + wolfSSL_EVP_CIPHER_CTX_init(ctx); + if(ctx) + ret = WOLFSSL_SUCCESS; + break; + case EVP_CTRL_SET_KEY_LENGTH: + ret = wolfSSL_EVP_CIPHER_CTX_set_key_length(ctx, arg); + break; +#if defined(HAVE_AESGCM) && !defined(HAVE_SELFTEST) && !defined(WC_NO_RNG) + case EVP_CTRL_GCM_SET_IVLEN: + if(arg <= 0 || arg > 16) + return WOLFSSL_FAILURE; + ret = wolfSSL_EVP_CIPHER_CTX_set_iv_length(ctx, arg); + break; + case EVP_CTRL_AEAD_SET_IV_FIXED: + if (arg == -1) { + /* arg == -1 copies ctx->ivSz from ptr */ + ret = wolfSSL_EVP_CIPHER_CTX_set_iv(ctx, (byte*)ptr, ctx->ivSz); + } + else { + /* + * Fixed field must be at least 4 bytes and invocation + * field at least 8. + */ + if ((arg < 4) || (ctx->ivSz - arg) < 8) { + WOLFSSL_MSG("Fixed field or invocation field too short"); + ret = WOLFSSL_FAILURE; + break; + } + if (wc_InitRng(&rng) != 0) { + WOLFSSL_MSG("wc_InitRng failed"); + ret = WOLFSSL_FAILURE; + break; + } + if (arg) { + XMEMCPY(ctx->iv, ptr, arg); + } + if (wc_RNG_GenerateBlock(&rng, ctx->iv + arg, + ctx->ivSz - arg) != 0) { + /* rng is freed immediately after if block so no need + * to do it here + */ + WOLFSSL_MSG("wc_RNG_GenerateBlock failed"); + ret = WOLFSSL_FAILURE; + } + + if (wc_FreeRng(&rng) != 0) { + WOLFSSL_MSG("wc_FreeRng failed"); + ret = WOLFSSL_FAILURE; + break; + } + } + break; +#if !defined(_WIN32) && !defined(HAVE_FIPS) + case EVP_CTRL_GCM_IV_GEN: + if (ctx->cipher.aes.keylen == 0 || ctx->ivSz == 0) { + ret = WOLFSSL_FAILURE; + WOLFSSL_MSG("Key or IV not set"); + break; + } + if ((ret = wc_AesGcmSetExtIV(&ctx->cipher.aes, ctx->iv, ctx->ivSz)) != 0) { + WOLFSSL_MSG("wc_AesGcmSetIV failed"); + ret = WOLFSSL_FAILURE; + } + /* OpenSSL increments the IV. Not sure why */ + IncCtr(ctx->iv, ctx->ivSz); + break; +#endif + case EVP_CTRL_AEAD_SET_TAG: + if(arg <= 0 || arg > 16 || (ptr == NULL)) + return WOLFSSL_FAILURE; + + XMEMCPY(ctx->authTag, ptr, arg); + ctx->authTagSz = arg; + ret = WOLFSSL_SUCCESS; + + break; + case EVP_CTRL_AEAD_GET_TAG: + if(arg <= 0 || arg > 16) + return WOLFSSL_FAILURE; + + XMEMCPY(ptr, ctx->authTag, arg); + ret = WOLFSSL_SUCCESS; + break; +#endif /* HAVE_AESGCM && !HAVE_SELFTEST && !WC_NO_RNG */ + default: + WOLFSSL_MSG("EVP_CIPHER_CTX_ctrl operation not yet handled"); + ret = WOLFSSL_FAILURE; + } + return ret; + } + + /* WOLFSSL_SUCCESS on ok */ + int wolfSSL_EVP_CIPHER_CTX_cleanup(WOLFSSL_EVP_CIPHER_CTX* ctx) + { + WOLFSSL_ENTER("EVP_CIPHER_CTX_cleanup"); + if (ctx) { + ctx->cipherType = WOLFSSL_EVP_CIPH_TYPE_INIT; /* not yet initialized */ + ctx->keyLen = 0; + } + + return WOLFSSL_SUCCESS; + } + + /* Permanent stub for Qt compilation. */ + #if defined(WOLFSSL_QT) && !defined(NO_WOLFSSL_STUB) + const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_rc2_cbc(void) + { + WOLFSSL_ENTER("wolfSSL_EVP_rc2_cbc"); + WOLFSSL_STUB("EVP_rc2_cbc"); + return NULL; + } + #endif + +#if defined(WOLFSSL_ENCRYPTED_KEYS) && !defined(NO_PWDBASED) + + int wolfSSL_EVP_BytesToKey(const WOLFSSL_EVP_CIPHER* type, + const WOLFSSL_EVP_MD* md, const byte* salt, + const byte* data, int sz, int count, byte* key, byte* iv) + { + int ret; + int hashType = WC_HASH_TYPE_NONE; + #ifdef WOLFSSL_SMALL_STACK + EncryptedInfo* info; + #else + EncryptedInfo info[1]; + #endif + + #ifdef WOLFSSL_SMALL_STACK + info = (EncryptedInfo*)XMALLOC(sizeof(EncryptedInfo), NULL, + DYNAMIC_TYPE_ENCRYPTEDINFO); + if (info == NULL) { + WOLFSSL_MSG("malloc failed"); + return WOLFSSL_FAILURE; + } + #endif + + XMEMSET(info, 0, sizeof(EncryptedInfo)); + + ret = wc_EncryptedInfoGet(info, type); + if (ret < 0) + goto end; + + if (data == NULL) { + ret = info->keySz; + goto end; + } + + ret = wolfSSL_EVP_get_hashinfo(md, &hashType, NULL); + if (ret == WOLFSSL_FAILURE) + goto end; + + ret = wc_PBKDF1_ex(key, info->keySz, iv, info->ivSz, data, sz, salt, + EVP_SALT_SIZE, count, hashType, NULL); + if (ret == 0) + ret = info->keySz; + + end: + #ifdef WOLFSSL_SMALL_STACK + XFREE(info, NULL, DYNAMIC_TYPE_ENCRYPTEDINFO); + #endif + if (ret < 0) + return 0; /* failure - for compatibility */ + + return ret; + } + +#endif /* WOLFSSL_ENCRYPTED_KEYS && !NO_PWDBASED */ + +#ifndef NO_AES + static int AesSetKey_ex(Aes* aes, const byte* key, word32 len, + const byte* iv, int dir, int direct) + { + int ret; + /* wc_AesSetKey clear aes.reg if iv == NULL. + Keep IV for openSSL compatibility */ + if (iv == NULL) + XMEMCPY((byte *)aes->tmp, (byte *)aes->reg, AES_BLOCK_SIZE); + if (direct) { + #if defined(WOLFSSL_AES_DIRECT) + ret = wc_AesSetKeyDirect(aes, key, len, iv, dir); + #else + ret = NOT_COMPILED_IN; + #endif + } + else { + ret = wc_AesSetKey(aes, key, len, iv, dir); + } + if (iv == NULL) + XMEMCPY((byte *)aes->reg, (byte *)aes->tmp, AES_BLOCK_SIZE); + return ret; + } +#endif + + /* return WOLFSSL_SUCCESS on ok, 0 on failure to match API compatibility */ + int wolfSSL_EVP_CipherInit(WOLFSSL_EVP_CIPHER_CTX* ctx, + const WOLFSSL_EVP_CIPHER* type, const byte* key, + const byte* iv, int enc) + { + int ret = 0; + (void)key; + (void)iv; + (void)enc; + + WOLFSSL_ENTER("wolfSSL_EVP_CipherInit"); + if (ctx == NULL) { + WOLFSSL_MSG("no ctx"); + return WOLFSSL_FAILURE; + } + + if (type == NULL && ctx->cipherType == WOLFSSL_EVP_CIPH_TYPE_INIT) { + WOLFSSL_MSG("no type set"); + return WOLFSSL_FAILURE; + } + if (ctx->cipherType == WOLFSSL_EVP_CIPH_TYPE_INIT){ + /* only first EVP_CipherInit invoke. ctx->cipherType is set below */ + XMEMSET(&ctx->cipher, 0, sizeof(ctx->cipher)); + ctx->flags = 0; + } + /* always clear buffer state */ + ctx->bufUsed = 0; + ctx->lastUsed = 0; + +#ifdef HAVE_WOLFSSL_EVP_CIPHER_CTX_IV + if (!iv && ctx->ivSz) { + iv = ctx->iv; + } +#endif + +#ifndef NO_AES + #ifdef HAVE_AES_CBC + #ifdef WOLFSSL_AES_128 + if (ctx->cipherType == AES_128_CBC_TYPE || + (type && XSTRNCMP(type, EVP_AES_128_CBC, EVP_AES_SIZE) == 0)) { + WOLFSSL_MSG("EVP_AES_128_CBC"); + ctx->cipherType = AES_128_CBC_TYPE; + ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE; + ctx->flags |= WOLFSSL_EVP_CIPH_CBC_MODE; + ctx->keyLen = 16; + ctx->block_size = AES_BLOCK_SIZE; + ctx->ivSz = AES_BLOCK_SIZE; + if (enc == 0 || enc == 1) + ctx->enc = enc ? 1 : 0; + if (key) { + ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv, + ctx->enc ? AES_ENCRYPTION : AES_DECRYPTION, 0); + if (ret != 0) + return WOLFSSL_FAILURE; + } + if (iv && key == NULL) { + ret = wc_AesSetIV(&ctx->cipher.aes, iv); + if (ret != 0) + return WOLFSSL_FAILURE; + } + } + #endif /* WOLFSSL_AES_128 */ + #ifdef WOLFSSL_AES_192 + if (ctx->cipherType == AES_192_CBC_TYPE || + (type && XSTRNCMP(type, EVP_AES_192_CBC, EVP_AES_SIZE) == 0)) { + WOLFSSL_MSG("EVP_AES_192_CBC"); + ctx->cipherType = AES_192_CBC_TYPE; + ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE; + ctx->flags |= WOLFSSL_EVP_CIPH_CBC_MODE; + ctx->keyLen = 24; + ctx->block_size = AES_BLOCK_SIZE; + ctx->ivSz = AES_BLOCK_SIZE; + if (enc == 0 || enc == 1) + ctx->enc = enc ? 1 : 0; + if (key) { + ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv, + ctx->enc ? AES_ENCRYPTION : AES_DECRYPTION, 0); + if (ret != 0) + return WOLFSSL_FAILURE; + } + if (iv && key == NULL) { + ret = wc_AesSetIV(&ctx->cipher.aes, iv); + if (ret != 0) + return WOLFSSL_FAILURE; + } + } + #endif /* WOLFSSL_AES_192 */ + #ifdef WOLFSSL_AES_256 + if (ctx->cipherType == AES_256_CBC_TYPE || + (type && XSTRNCMP(type, EVP_AES_256_CBC, EVP_AES_SIZE) == 0)) { + WOLFSSL_MSG("EVP_AES_256_CBC"); + ctx->cipherType = AES_256_CBC_TYPE; + ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE; + ctx->flags |= WOLFSSL_EVP_CIPH_CBC_MODE; + ctx->keyLen = 32; + ctx->block_size = AES_BLOCK_SIZE; + ctx->ivSz = AES_BLOCK_SIZE; + if (enc == 0 || enc == 1) + ctx->enc = enc ? 1 : 0; + if (key) { + ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv, + ctx->enc ? AES_ENCRYPTION : AES_DECRYPTION, 0); + if (ret != 0){ + WOLFSSL_MSG("AesSetKey() failed"); + return WOLFSSL_FAILURE; + } + } + if (iv && key == NULL) { + ret = wc_AesSetIV(&ctx->cipher.aes, iv); + if (ret != 0){ + WOLFSSL_MSG("wc_AesSetIV() failed"); + return WOLFSSL_FAILURE; + } + } + } + #endif /* WOLFSSL_AES_256 */ + #endif /* HAVE_AES_CBC */ +#if !defined(_WIN32) && !defined(HAVE_FIPS) && !defined(HAVE_SELFTEST) + #ifdef HAVE_AESGCM + #ifdef WOLFSSL_AES_128 + if (ctx->cipherType == AES_128_GCM_TYPE || + (type && XSTRNCMP(type, EVP_AES_128_GCM, EVP_AES_SIZE) == 0)) { + WOLFSSL_MSG("EVP_AES_128_GCM"); + ctx->cipherType = AES_128_GCM_TYPE; + ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE; + ctx->flags |= WOLFSSL_EVP_CIPH_GCM_MODE; + ctx->keyLen = 16; + ctx->block_size = AES_BLOCK_SIZE; + ctx->authTagSz = AES_BLOCK_SIZE; + ctx->ivSz = GCM_NONCE_MID_SZ; + + XMEMSET(ctx->authTag, 0, ctx->authTagSz); + if (key && wc_AesGcmSetKey(&ctx->cipher.aes, key, ctx->keyLen)) { + WOLFSSL_MSG("wc_AesGcmSetKey() failed"); + return WOLFSSL_FAILURE; + } + if (iv && wc_AesGcmSetExtIV(&ctx->cipher.aes, iv, GCM_NONCE_MID_SZ)) { + WOLFSSL_MSG("wc_AesGcmSetExtIV() failed"); + return WOLFSSL_FAILURE; + } + if (enc == 0 || enc == 1) + ctx->enc = enc ? 1 : 0; + } + #endif /* WOLFSSL_AES_128 */ + #ifdef WOLFSSL_AES_192 + if (ctx->cipherType == AES_192_GCM_TYPE || + (type && XSTRNCMP(type, EVP_AES_192_GCM, EVP_AES_SIZE) == 0)) { + WOLFSSL_MSG("EVP_AES_192_GCM"); + ctx->cipherType = AES_192_GCM_TYPE; + ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE; + ctx->flags |= WOLFSSL_EVP_CIPH_GCM_MODE; + ctx->keyLen = 24; + ctx->block_size = AES_BLOCK_SIZE; + ctx->authTagSz = AES_BLOCK_SIZE; + ctx->ivSz = GCM_NONCE_MID_SZ; + + XMEMSET(ctx->authTag, 0, ctx->authTagSz); + if (key && wc_AesGcmSetKey(&ctx->cipher.aes, key, ctx->keyLen)) { + WOLFSSL_MSG("wc_AesGcmSetKey() failed"); + return WOLFSSL_FAILURE; + } + if (iv && wc_AesGcmSetExtIV(&ctx->cipher.aes, iv, GCM_NONCE_MID_SZ)) { + WOLFSSL_MSG("wc_AesGcmSetExtIV() failed"); + return WOLFSSL_FAILURE; + } + if (enc == 0 || enc == 1) + ctx->enc = enc ? 1 : 0; + } + #endif /* WOLFSSL_AES_192 */ + #ifdef WOLFSSL_AES_256 + if (ctx->cipherType == AES_256_GCM_TYPE || + (type && XSTRNCMP(type, EVP_AES_256_GCM, EVP_AES_SIZE) == 0)) { + WOLFSSL_MSG("EVP_AES_256_GCM"); + ctx->cipherType = AES_256_GCM_TYPE; + ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE; + ctx->flags |= WOLFSSL_EVP_CIPH_GCM_MODE; + ctx->keyLen = 32; + ctx->block_size = AES_BLOCK_SIZE; + ctx->authTagSz = AES_BLOCK_SIZE; + ctx->ivSz = GCM_NONCE_MID_SZ; + + XMEMSET(ctx->authTag, 0, ctx->authTagSz); + if (key && wc_AesGcmSetKey(&ctx->cipher.aes, key, ctx->keyLen)) { + WOLFSSL_MSG("wc_AesGcmSetKey() failed"); + return WOLFSSL_FAILURE; + } + if (iv && wc_AesGcmSetExtIV(&ctx->cipher.aes, iv, GCM_NONCE_MID_SZ)) { + WOLFSSL_MSG("wc_AesGcmSetExtIV() failed"); + return WOLFSSL_FAILURE; + } + if (enc == 0 || enc == 1) + ctx->enc = enc ? 1 : 0; + } + #endif /* WOLFSSL_AES_256 */ + #endif /* HAVE_AESGCM */ +#endif /* !defined(_WIN32) && !defined(HAVE_FIPS) && !defined(HAVE_SELFTEST) */ +#ifdef WOLFSSL_AES_COUNTER + #ifdef WOLFSSL_AES_128 + if (ctx->cipherType == AES_128_CTR_TYPE || + (type && XSTRNCMP(type, EVP_AES_128_CTR, EVP_AES_SIZE) == 0)) { + WOLFSSL_MSG("EVP_AES_128_CTR"); + ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE; + ctx->cipherType = AES_128_CTR_TYPE; + ctx->flags |= WOLFSSL_EVP_CIPH_CTR_MODE; + ctx->keyLen = 16; + ctx->block_size = NO_PADDING_BLOCK_SIZE; + ctx->ivSz = AES_BLOCK_SIZE; +#if defined(WOLFSSL_AES_COUNTER) || defined(WOLFSSL_AES_CFB) + ctx->cipher.aes.left = 0; +#endif + if (enc == 0 || enc == 1) + ctx->enc = enc ? 1 : 0; + if (key) { + ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv, + AES_ENCRYPTION, 1); + if (ret != 0) + return WOLFSSL_FAILURE; + } + if (iv && key == NULL) { + ret = wc_AesSetIV(&ctx->cipher.aes, iv); + if (ret != 0) + return WOLFSSL_FAILURE; + } + } + #endif /* WOLFSSL_AES_128 */ + #ifdef WOLFSSL_AES_192 + if (ctx->cipherType == AES_192_CTR_TYPE || + (type && XSTRNCMP(type, EVP_AES_192_CTR, EVP_AES_SIZE) == 0)) { + WOLFSSL_MSG("EVP_AES_192_CTR"); + ctx->cipherType = AES_192_CTR_TYPE; + ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE; + ctx->flags |= WOLFSSL_EVP_CIPH_CTR_MODE; + ctx->keyLen = 24; + ctx->block_size = NO_PADDING_BLOCK_SIZE; + ctx->ivSz = AES_BLOCK_SIZE; +#if defined(WOLFSSL_AES_COUNTER) || defined(WOLFSSL_AES_CFB) + ctx->cipher.aes.left = 0; +#endif + if (enc == 0 || enc == 1) + ctx->enc = enc ? 1 : 0; + if (key) { + ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv, + AES_ENCRYPTION, 1); + if (ret != 0) + return WOLFSSL_FAILURE; + } + if (iv && key == NULL) { + ret = wc_AesSetIV(&ctx->cipher.aes, iv); + if (ret != 0) + return WOLFSSL_FAILURE; + } + } + #endif /* WOLFSSL_AES_192 */ + #ifdef WOLFSSL_AES_256 + if (ctx->cipherType == AES_256_CTR_TYPE || + (type && XSTRNCMP(type, EVP_AES_256_CTR, EVP_AES_SIZE) == 0)) { + WOLFSSL_MSG("EVP_AES_256_CTR"); + ctx->cipherType = AES_256_CTR_TYPE; + ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE; + ctx->flags |= WOLFSSL_EVP_CIPH_CTR_MODE; + ctx->keyLen = 32; + ctx->block_size = NO_PADDING_BLOCK_SIZE; + ctx->ivSz = AES_BLOCK_SIZE; +#if defined(WOLFSSL_AES_COUNTER) || defined(WOLFSSL_AES_CFB) + ctx->cipher.aes.left = 0; +#endif + if (enc == 0 || enc == 1) + ctx->enc = enc ? 1 : 0; + if (key) { + ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv, + AES_ENCRYPTION, 1); + if (ret != 0) + return WOLFSSL_FAILURE; + } + if (iv && key == NULL) { + ret = wc_AesSetIV(&ctx->cipher.aes, iv); + if (ret != 0) + return WOLFSSL_FAILURE; + } + } + #endif /* WOLFSSL_AES_256 */ +#endif /* WOLFSSL_AES_COUNTER */ + #ifdef WOLFSSL_AES_128 + if (ctx->cipherType == AES_128_ECB_TYPE || + (type && XSTRNCMP(type, EVP_AES_128_ECB, EVP_AES_SIZE) == 0)) { + WOLFSSL_MSG("EVP_AES_128_ECB"); + ctx->cipherType = AES_128_ECB_TYPE; + ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE; + ctx->flags |= WOLFSSL_EVP_CIPH_ECB_MODE; + ctx->keyLen = 16; + ctx->block_size = AES_BLOCK_SIZE; + if (enc == 0 || enc == 1) + ctx->enc = enc ? 1 : 0; + if (key) { + ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, NULL, + ctx->enc ? AES_ENCRYPTION : AES_DECRYPTION, 1); + } + if (ret != 0) + return WOLFSSL_FAILURE; + } + #endif /* WOLFSSL_AES_128 */ + #ifdef WOLFSSL_AES_192 + if (ctx->cipherType == AES_192_ECB_TYPE || + (type && XSTRNCMP(type, EVP_AES_192_ECB, EVP_AES_SIZE) == 0)) { + WOLFSSL_MSG("EVP_AES_192_ECB"); + ctx->cipherType = AES_192_ECB_TYPE; + ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE; + ctx->flags |= WOLFSSL_EVP_CIPH_ECB_MODE; + ctx->keyLen = 24; + ctx->block_size = AES_BLOCK_SIZE; + if (enc == 0 || enc == 1) + ctx->enc = enc ? 1 : 0; + if (key) { + ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, NULL, + ctx->enc ? AES_ENCRYPTION : AES_DECRYPTION, 1); + } + if (ret != 0) + return WOLFSSL_FAILURE; + } + #endif /* WOLFSSL_AES_192 */ + #ifdef WOLFSSL_AES_256 + if (ctx->cipherType == AES_256_ECB_TYPE || + (type && XSTRNCMP(type, EVP_AES_256_ECB, EVP_AES_SIZE) == 0)) { + WOLFSSL_MSG("EVP_AES_256_ECB"); + ctx->cipherType = AES_256_ECB_TYPE; + ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE; + ctx->flags |= WOLFSSL_EVP_CIPH_ECB_MODE; + ctx->keyLen = 32; + ctx->block_size = AES_BLOCK_SIZE; + if (enc == 0 || enc == 1) + ctx->enc = enc ? 1 : 0; + if (key) { + ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, NULL, + ctx->enc ? AES_ENCRYPTION : AES_DECRYPTION, 1); + } + if (ret != 0) + return WOLFSSL_FAILURE; + } + #endif /* WOLFSSL_AES_256 */ + #ifdef WOLFSSL_AES_CFB + #ifdef WOLFSSL_AES_128 + if (ctx->cipherType == AES_128_CFB1_TYPE || + (type && XSTRNCMP(type, EVP_AES_128_CFB1, EVP_AESCFB_SIZE) == 0)) { + WOLFSSL_MSG("EVP_AES_128_CFB1"); + ctx->cipherType = AES_128_CFB1_TYPE; + ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE; + ctx->flags |= WOLFSSL_EVP_CIPH_CFB_MODE; + ctx->keyLen = 16; + ctx->block_size = 1; + if (enc == 0 || enc == 1) + ctx->enc = enc ? 1 : 0; + if (key) { + ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv, + AES_ENCRYPTION, 0); + if (ret != 0) + return WOLFSSL_FAILURE; + } + if (iv && key == NULL) { + ret = wc_AesSetIV(&ctx->cipher.aes, iv); + if (ret != 0) + return WOLFSSL_FAILURE; + } + } + #endif /* WOLFSSL_AES_128 */ + #ifdef WOLFSSL_AES_192 + if (ctx->cipherType == AES_192_CFB1_TYPE || + (type && XSTRNCMP(type, EVP_AES_192_CFB1, EVP_AESCFB_SIZE) == 0)) { + WOLFSSL_MSG("EVP_AES_192_CFB1"); + ctx->cipherType = AES_192_CFB1_TYPE; + ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE; + ctx->flags |= WOLFSSL_EVP_CIPH_CFB_MODE; + ctx->keyLen = 24; + ctx->block_size = 1; + if (enc == 0 || enc == 1) + ctx->enc = enc ? 1 : 0; + if (key) { + ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv, + AES_ENCRYPTION, 0); + if (ret != 0) + return WOLFSSL_FAILURE; + } + if (iv && key == NULL) { + ret = wc_AesSetIV(&ctx->cipher.aes, iv); + if (ret != 0) + return WOLFSSL_FAILURE; + } + } + #endif /* WOLFSSL_AES_192 */ + #ifdef WOLFSSL_AES_256 + if (ctx->cipherType == AES_256_CFB1_TYPE || + (type && XSTRNCMP(type, EVP_AES_256_CFB1, EVP_AESCFB_SIZE) == 0)) { + WOLFSSL_MSG("EVP_AES_256_CFB1"); + ctx->cipherType = AES_256_CFB1_TYPE; + ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE; + ctx->flags |= WOLFSSL_EVP_CIPH_CFB_MODE; + ctx->keyLen = 32; + ctx->block_size = 1; + if (enc == 0 || enc == 1) + ctx->enc = enc ? 1 : 0; + if (key) { + ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv, + AES_ENCRYPTION, 0); + if (ret != 0){ + WOLFSSL_MSG("AesSetKey() failed"); + return WOLFSSL_FAILURE; + } + } + if (iv && key == NULL) { + ret = wc_AesSetIV(&ctx->cipher.aes, iv); + if (ret != 0){ + WOLFSSL_MSG("wc_AesSetIV() failed"); + return WOLFSSL_FAILURE; + } + } + } + #endif /* WOLFSSL_AES_256 */ + #ifdef WOLFSSL_AES_128 + if (ctx->cipherType == AES_128_CFB8_TYPE || + (type && XSTRNCMP(type, EVP_AES_128_CFB8, EVP_AESCFB_SIZE) == 0)) { + WOLFSSL_MSG("EVP_AES_128_CFB8"); + ctx->cipherType = AES_128_CFB8_TYPE; + ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE; + ctx->flags |= WOLFSSL_EVP_CIPH_CFB_MODE; + ctx->keyLen = 16; + ctx->block_size = 1; + if (enc == 0 || enc == 1) + ctx->enc = enc ? 1 : 0; + if (key) { + ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv, + AES_ENCRYPTION, 0); + if (ret != 0) + return WOLFSSL_FAILURE; + } + if (iv && key == NULL) { + ret = wc_AesSetIV(&ctx->cipher.aes, iv); + if (ret != 0) + return WOLFSSL_FAILURE; + } + } + #endif /* WOLFSSL_AES_128 */ + #ifdef WOLFSSL_AES_192 + if (ctx->cipherType == AES_192_CFB8_TYPE || + (type && XSTRNCMP(type, EVP_AES_192_CFB8, EVP_AESCFB_SIZE) == 0)) { + WOLFSSL_MSG("EVP_AES_192_CFB8"); + ctx->cipherType = AES_192_CFB8_TYPE; + ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE; + ctx->flags |= WOLFSSL_EVP_CIPH_CFB_MODE; + ctx->keyLen = 24; + ctx->block_size = 1; + if (enc == 0 || enc == 1) + ctx->enc = enc ? 1 : 0; + if (key) { + ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv, + AES_ENCRYPTION, 0); + if (ret != 0) + return WOLFSSL_FAILURE; + } + if (iv && key == NULL) { + ret = wc_AesSetIV(&ctx->cipher.aes, iv); + if (ret != 0) + return WOLFSSL_FAILURE; + } + } + #endif /* WOLFSSL_AES_192 */ + #ifdef WOLFSSL_AES_256 + if (ctx->cipherType == AES_256_CFB8_TYPE || + (type && XSTRNCMP(type, EVP_AES_256_CFB8, EVP_AESCFB_SIZE) == 0)) { + WOLFSSL_MSG("EVP_AES_256_CFB8"); + ctx->cipherType = AES_256_CFB8_TYPE; + ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE; + ctx->flags |= WOLFSSL_EVP_CIPH_CFB_MODE; + ctx->keyLen = 32; + ctx->block_size = 1; + if (enc == 0 || enc == 1) + ctx->enc = enc ? 1 : 0; + if (key) { + ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv, + AES_ENCRYPTION, 0); + if (ret != 0){ + WOLFSSL_MSG("AesSetKey() failed"); + return WOLFSSL_FAILURE; + } + } + if (iv && key == NULL) { + ret = wc_AesSetIV(&ctx->cipher.aes, iv); + if (ret != 0){ + WOLFSSL_MSG("wc_AesSetIV() failed"); + return WOLFSSL_FAILURE; + } + } + } + #endif /* WOLFSSL_AES_256 */ + #ifdef WOLFSSL_AES_128 + if (ctx->cipherType == AES_128_CFB128_TYPE || + (type && XSTRNCMP(type, EVP_AES_128_CFB128, EVP_AESCFB_SIZE) == 0)) { + WOLFSSL_MSG("EVP_AES_128_CFB128"); + ctx->cipherType = AES_128_CFB128_TYPE; + ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE; + ctx->flags |= WOLFSSL_EVP_CIPH_CFB_MODE; + ctx->keyLen = 16; + ctx->block_size = 1; + if (enc == 0 || enc == 1) + ctx->enc = enc ? 1 : 0; + if (key) { + ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv, + AES_ENCRYPTION, 0); + if (ret != 0) + return WOLFSSL_FAILURE; + } + if (iv && key == NULL) { + ret = wc_AesSetIV(&ctx->cipher.aes, iv); + if (ret != 0) + return WOLFSSL_FAILURE; + } + } + #endif /* WOLFSSL_AES_128 */ + #ifdef WOLFSSL_AES_192 + if (ctx->cipherType == AES_192_CFB128_TYPE || + (type && XSTRNCMP(type, EVP_AES_192_CFB128, EVP_AESCFB_SIZE) == 0)) { + WOLFSSL_MSG("EVP_AES_192_CFB128"); + ctx->cipherType = AES_192_CFB128_TYPE; + ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE; + ctx->flags |= WOLFSSL_EVP_CIPH_CFB_MODE; + ctx->keyLen = 24; + ctx->block_size = 1; + if (enc == 0 || enc == 1) + ctx->enc = enc ? 1 : 0; + if (key) { + ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv, + AES_ENCRYPTION, 0); + if (ret != 0) + return WOLFSSL_FAILURE; + } + if (iv && key == NULL) { + ret = wc_AesSetIV(&ctx->cipher.aes, iv); + if (ret != 0) + return WOLFSSL_FAILURE; + } + } + #endif /* WOLFSSL_AES_192 */ + #ifdef WOLFSSL_AES_256 + if (ctx->cipherType == AES_256_CFB128_TYPE || + (type && XSTRNCMP(type, EVP_AES_256_CFB128, EVP_AESCFB_SIZE) == 0)) { + WOLFSSL_MSG("EVP_AES_256_CFB128"); + ctx->cipherType = AES_256_CFB128_TYPE; + ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE; + ctx->flags |= WOLFSSL_EVP_CIPH_CFB_MODE; + ctx->keyLen = 32; + ctx->block_size = 1; + if (enc == 0 || enc == 1) + ctx->enc = enc ? 1 : 0; + if (key) { + ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv, + AES_ENCRYPTION, 0); + if (ret != 0){ + WOLFSSL_MSG("AesSetKey() failed"); + return WOLFSSL_FAILURE; + } + } + if (iv && key == NULL) { + ret = wc_AesSetIV(&ctx->cipher.aes, iv); + if (ret != 0){ + WOLFSSL_MSG("wc_AesSetIV() failed"); + return WOLFSSL_FAILURE; + } + } + } + #endif /* WOLFSSL_AES_256 */ + #endif /* HAVE_AES_CFB */ + #ifdef WOLFSSL_AES_OFB + #ifdef WOLFSSL_AES_128 + if (ctx->cipherType == AES_128_OFB_TYPE || + (type && XSTRNCMP(type, EVP_AES_128_OFB, EVP_AES_SIZE) == 0)) { + WOLFSSL_MSG("EVP_AES_128_OFB"); + ctx->cipherType = AES_128_OFB_TYPE; + ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE; + ctx->flags |= WOLFSSL_EVP_CIPH_OFB_MODE; + ctx->keyLen = 16; + ctx->block_size = 1; + if (enc == 0 || enc == 1) + ctx->enc = enc ? 1 : 0; + if (key) { + ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv, + AES_ENCRYPTION, 0); + if (ret != 0) + return WOLFSSL_FAILURE; + } + if (iv && key == NULL) { + ret = wc_AesSetIV(&ctx->cipher.aes, iv); + if (ret != 0) + return WOLFSSL_FAILURE; + } + } + #endif /* WOLFSSL_AES_128 */ + #ifdef WOLFSSL_AES_192 + if (ctx->cipherType == AES_192_OFB_TYPE || + (type && XSTRNCMP(type, EVP_AES_192_OFB, EVP_AES_SIZE) == 0)) { + WOLFSSL_MSG("EVP_AES_192_OFB"); + ctx->cipherType = AES_192_OFB_TYPE; + ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE; + ctx->flags |= WOLFSSL_EVP_CIPH_OFB_MODE; + ctx->keyLen = 24; + ctx->block_size = 1; + if (enc == 0 || enc == 1) + ctx->enc = enc ? 1 : 0; + if (key) { + ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv, + AES_ENCRYPTION, 0); + if (ret != 0) + return WOLFSSL_FAILURE; + } + if (iv && key == NULL) { + ret = wc_AesSetIV(&ctx->cipher.aes, iv); + if (ret != 0) + return WOLFSSL_FAILURE; + } + } + #endif /* WOLFSSL_AES_192 */ + #ifdef WOLFSSL_AES_256 + if (ctx->cipherType == AES_256_OFB_TYPE || + (type && XSTRNCMP(type, EVP_AES_256_OFB, EVP_AES_SIZE) == 0)) { + WOLFSSL_MSG("EVP_AES_256_OFB"); + ctx->cipherType = AES_256_OFB_TYPE; + ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE; + ctx->flags |= WOLFSSL_EVP_CIPH_OFB_MODE; + ctx->keyLen = 32; + ctx->block_size = 1; + if (enc == 0 || enc == 1) + ctx->enc = enc ? 1 : 0; + if (key) { + ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv, + AES_ENCRYPTION, 0); + if (ret != 0){ + WOLFSSL_MSG("AesSetKey() failed"); + return WOLFSSL_FAILURE; + } + } + if (iv && key == NULL) { + ret = wc_AesSetIV(&ctx->cipher.aes, iv); + if (ret != 0){ + WOLFSSL_MSG("wc_AesSetIV() failed"); + return WOLFSSL_FAILURE; + } + } + } + #endif /* WOLFSSL_AES_256 */ + #endif /* HAVE_AES_OFB */ + #ifdef WOLFSSL_AES_XTS + #ifdef WOLFSSL_AES_128 + if (ctx->cipherType == AES_128_XTS_TYPE || + (type && XSTRNCMP(type, EVP_AES_128_XTS, EVP_AES_SIZE) == 0)) { + WOLFSSL_MSG("EVP_AES_128_XTS"); + ctx->cipherType = AES_128_XTS_TYPE; + ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE; + ctx->flags |= WOLFSSL_EVP_CIPH_XTS_MODE; + ctx->keyLen = 32; + ctx->block_size = 1; + ctx->ivSz = AES_BLOCK_SIZE; + + if (iv != NULL) { + if (iv != ctx->iv) /* Valgrind error when src == dst */ + XMEMCPY(ctx->iv, iv, ctx->ivSz); + } + else + XMEMSET(ctx->iv, 0, AES_BLOCK_SIZE); + + if (enc == 0 || enc == 1) + ctx->enc = enc ? 1 : 0; + if (key) { + ret = wc_AesXtsSetKey(&ctx->cipher.xts, key, ctx->keyLen, + ctx->enc ? AES_ENCRYPTION : AES_DECRYPTION, NULL, 0); + if (ret != 0) { + WOLFSSL_MSG("wc_AesXtsSetKey() failed"); + return WOLFSSL_FAILURE; + } + } + } + #endif /* WOLFSSL_AES_128 */ + #ifdef WOLFSSL_AES_256 + if (ctx->cipherType == AES_256_XTS_TYPE || + (type && XSTRNCMP(type, EVP_AES_256_XTS, EVP_AES_SIZE) == 0)) { + WOLFSSL_MSG("EVP_AES_256_XTS"); + ctx->cipherType = AES_256_XTS_TYPE; + ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE; + ctx->flags |= WOLFSSL_EVP_CIPH_XTS_MODE; + ctx->keyLen = 64; + ctx->block_size = 1; + ctx->ivSz = AES_BLOCK_SIZE; + + if (iv != NULL) { + if (iv != ctx->iv) /* Valgrind error when src == dst */ + XMEMCPY(ctx->iv, iv, ctx->ivSz); + } + else + XMEMSET(ctx->iv, 0, AES_BLOCK_SIZE); + + if (enc == 0 || enc == 1) + ctx->enc = enc ? 1 : 0; + if (key) { + ret = wc_AesXtsSetKey(&ctx->cipher.xts, key, ctx->keyLen, + ctx->enc ? AES_ENCRYPTION : AES_DECRYPTION, NULL, 0); + if (ret != 0) { + WOLFSSL_MSG("wc_AesXtsSetKey() failed"); + return WOLFSSL_FAILURE; + } + } + } + #endif /* WOLFSSL_AES_256 */ + #endif /* HAVE_AES_XTS */ +#endif /* NO_AES */ + +#ifndef NO_DES3 + if (ctx->cipherType == DES_CBC_TYPE || + (type && XSTRNCMP(type, EVP_DES_CBC, EVP_DES_SIZE) == 0)) { + WOLFSSL_MSG("EVP_DES_CBC"); + ctx->cipherType = DES_CBC_TYPE; + ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE; + ctx->flags |= WOLFSSL_EVP_CIPH_CBC_MODE; + ctx->keyLen = 8; + ctx->block_size = DES_BLOCK_SIZE; + ctx->ivSz = DES_BLOCK_SIZE; + if (enc == 0 || enc == 1) + ctx->enc = enc ? 1 : 0; + if (key) { + ret = wc_Des_SetKey(&ctx->cipher.des, key, iv, + ctx->enc ? DES_ENCRYPTION : DES_DECRYPTION); + if (ret != 0) + return WOLFSSL_FAILURE; + } + + if (iv && key == NULL) + wc_Des_SetIV(&ctx->cipher.des, iv); + } +#ifdef WOLFSSL_DES_ECB + else if (ctx->cipherType == DES_ECB_TYPE || + (type && XSTRNCMP(type, EVP_DES_ECB, EVP_DES_SIZE) == 0)) { + WOLFSSL_MSG("EVP_DES_ECB"); + ctx->cipherType = DES_ECB_TYPE; + ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE; + ctx->flags |= WOLFSSL_EVP_CIPH_ECB_MODE; + ctx->keyLen = 8; + ctx->block_size = DES_BLOCK_SIZE; + if (enc == 0 || enc == 1) + ctx->enc = enc ? 1 : 0; + if (key) { + WOLFSSL_MSG("Des_SetKey"); + ret = wc_Des_SetKey(&ctx->cipher.des, key, NULL, + ctx->enc ? DES_ENCRYPTION : DES_DECRYPTION); + if (ret != 0) + return WOLFSSL_FAILURE; + } + } +#endif + else if (ctx->cipherType == DES_EDE3_CBC_TYPE || + (type && + XSTRNCMP(type, EVP_DES_EDE3_CBC, EVP_DES_EDE3_SIZE) == 0)) { + WOLFSSL_MSG("EVP_DES_EDE3_CBC"); + ctx->cipherType = DES_EDE3_CBC_TYPE; + ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE; + ctx->flags |= WOLFSSL_EVP_CIPH_CBC_MODE; + ctx->keyLen = 24; + ctx->block_size = DES_BLOCK_SIZE; + ctx->ivSz = DES_BLOCK_SIZE; + if (enc == 0 || enc == 1) + ctx->enc = enc ? 1 : 0; + if (key) { + ret = wc_Des3_SetKey(&ctx->cipher.des3, key, iv, + ctx->enc ? DES_ENCRYPTION : DES_DECRYPTION); + if (ret != 0) + return WOLFSSL_FAILURE; + } + + if (iv && key == NULL) { + ret = wc_Des3_SetIV(&ctx->cipher.des3, iv); + if (ret != 0) + return WOLFSSL_FAILURE; + } + } + else if (ctx->cipherType == DES_EDE3_ECB_TYPE || + (type && + XSTRNCMP(type, EVP_DES_EDE3_ECB, EVP_DES_EDE3_SIZE) == 0)) { + WOLFSSL_MSG("EVP_DES_EDE3_ECB"); + ctx->cipherType = DES_EDE3_ECB_TYPE; + ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE; + ctx->flags |= WOLFSSL_EVP_CIPH_ECB_MODE; + ctx->keyLen = 24; + ctx->block_size = DES_BLOCK_SIZE; + if (enc == 0 || enc == 1) + ctx->enc = enc ? 1 : 0; + if (key) { + ret = wc_Des3_SetKey(&ctx->cipher.des3, key, NULL, + ctx->enc ? DES_ENCRYPTION : DES_DECRYPTION); + if (ret != 0) + return WOLFSSL_FAILURE; + } + } +#endif /* NO_DES3 */ +#ifndef NO_RC4 + if (ctx->cipherType == ARC4_TYPE || (type && + XSTRNCMP(type, "ARC4", 4) == 0)) { + WOLFSSL_MSG("ARC4"); + ctx->cipherType = ARC4_TYPE; + ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE; + ctx->flags |= WOLFSSL_EVP_CIPH_STREAM_CIPHER; + ctx->block_size = 1; + if (ctx->keyLen == 0) /* user may have already set */ + ctx->keyLen = 16; /* default to 128 */ + if (key) + wc_Arc4SetKey(&ctx->cipher.arc4, key, ctx->keyLen); + } +#endif /* NO_RC4 */ +#ifdef HAVE_IDEA + if (ctx->cipherType == IDEA_CBC_TYPE || + (type && XSTRNCMP(type, EVP_IDEA_CBC, EVP_IDEA_SIZE) == 0)) { + WOLFSSL_MSG("EVP_IDEA_CBC"); + ctx->cipherType = IDEA_CBC_TYPE; + ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE; + ctx->flags |= WOLFSSL_EVP_CIPH_CBC_MODE; + ctx->keyLen = IDEA_KEY_SIZE; + ctx->block_size = 8; + ctx->ivSz = IDEA_BLOCK_SIZE; + if (enc == 0 || enc == 1) + ctx->enc = enc ? 1 : 0; + if (key) { + ret = wc_IdeaSetKey(&ctx->cipher.idea, key, (word16)ctx->keyLen, + iv, ctx->enc ? IDEA_ENCRYPTION : + IDEA_DECRYPTION); + if (ret != 0) + return WOLFSSL_FAILURE; + } + + if (iv && key == NULL) + wc_IdeaSetIV(&ctx->cipher.idea, iv); + } +#endif /* HAVE_IDEA */ + if (ctx->cipherType == NULL_CIPHER_TYPE || (type && + XSTRNCMP(type, "NULL", 4) == 0)) { + WOLFSSL_MSG("NULL cipher"); + ctx->cipherType = NULL_CIPHER_TYPE; + ctx->keyLen = 0; + ctx->block_size = 16; + } +#ifdef HAVE_WOLFSSL_EVP_CIPHER_CTX_IV + if (iv && iv != ctx->iv) { + if (wolfSSL_StoreExternalIV(ctx) != WOLFSSL_SUCCESS) { + return WOLFSSL_FAILURE; + } + } +#endif + (void)ret; /* remove warning. If execution reaches this point, ret=0 */ + return WOLFSSL_SUCCESS; + } + + /* WOLFSSL_SUCCESS on ok */ + int wolfSSL_EVP_CIPHER_CTX_key_length(WOLFSSL_EVP_CIPHER_CTX* ctx) + { + WOLFSSL_ENTER("wolfSSL_EVP_CIPHER_CTX_key_length"); + if (ctx) + return ctx->keyLen; + + return 0; /* failure */ + } + + /* WOLFSSL_SUCCESS on ok */ + int wolfSSL_EVP_CIPHER_CTX_set_key_length(WOLFSSL_EVP_CIPHER_CTX* ctx, + int keylen) + { + WOLFSSL_ENTER("wolfSSL_EVP_CIPHER_CTX_set_key_length"); + if (ctx) + ctx->keyLen = keylen; + else + return 0; /* failure */ + + return WOLFSSL_SUCCESS; + } +#if defined(HAVE_AESGCM) + /* returns WOLFSSL_SUCCESS on success, otherwise returns WOLFSSL_FAILURE */ + int wolfSSL_EVP_CIPHER_CTX_set_iv_length(WOLFSSL_EVP_CIPHER_CTX* ctx, + int ivLen) + { + WOLFSSL_ENTER("wolfSSL_EVP_CIPHER_CTX_set_iv_length"); + if (ctx) + ctx->ivSz= ivLen; + else + return WOLFSSL_FAILURE; + + return WOLFSSL_SUCCESS; + } + + /* returns WOLFSSL_SUCCESS on success, otherwise returns WOLFSSL_FAILURE */ + int wolfSSL_EVP_CIPHER_CTX_set_iv(WOLFSSL_EVP_CIPHER_CTX* ctx, byte* iv, + int ivLen) + { + int expectedIvLen; + + WOLFSSL_ENTER("wolfSSL_EVP_CIPHER_CTX_set_iv_length"); + if (!ctx || !iv || !ivLen) { + return WOLFSSL_FAILURE; + } + + expectedIvLen = wolfSSL_EVP_CIPHER_CTX_iv_length(ctx); + + if (expectedIvLen == 0 || expectedIvLen != ivLen) { + WOLFSSL_MSG("Wrong ivLen value"); + return WOLFSSL_FAILURE; + } + + return wolfSSL_EVP_CipherInit(ctx, NULL, NULL, iv, -1); + } +#endif + + /* WOLFSSL_SUCCESS on ok */ + int wolfSSL_EVP_Cipher(WOLFSSL_EVP_CIPHER_CTX* ctx, byte* dst, byte* src, + word32 len) + { + int ret = 0; + WOLFSSL_ENTER("wolfSSL_EVP_Cipher"); + + if (ctx == NULL || src == NULL || + (dst == NULL && + ctx->cipherType != AES_128_GCM_TYPE && + ctx->cipherType != AES_192_GCM_TYPE && + ctx->cipherType != AES_256_GCM_TYPE)) { + WOLFSSL_MSG("Bad function argument"); + return 0; /* failure */ + } + + if (ctx->cipherType == 0xff) { + WOLFSSL_MSG("no init"); + return 0; /* failure */ + } + + switch (ctx->cipherType) { + +#ifndef NO_AES +#ifdef HAVE_AES_CBC + case AES_128_CBC_TYPE : + case AES_192_CBC_TYPE : + case AES_256_CBC_TYPE : + WOLFSSL_MSG("AES CBC"); + if (ctx->enc) + ret = wc_AesCbcEncrypt(&ctx->cipher.aes, dst, src, len); + else + ret = wc_AesCbcDecrypt(&ctx->cipher.aes, dst, src, len); + break; +#endif /* HAVE_AES_CBC */ + +#ifdef WOLFSSL_AES_CFB +#if !defined(HAVE_SELFTEST) && !defined(HAVE_FIPS) + case AES_128_CFB1_TYPE: + case AES_192_CFB1_TYPE: + case AES_256_CFB1_TYPE: + WOLFSSL_MSG("AES CFB1"); + if (ctx->enc) + ret = wc_AesCfb1Encrypt(&ctx->cipher.aes, dst, src, len); + else + ret = wc_AesCfb1Decrypt(&ctx->cipher.aes, dst, src, len); + break; + case AES_128_CFB8_TYPE: + case AES_192_CFB8_TYPE: + case AES_256_CFB8_TYPE: + WOLFSSL_MSG("AES CFB8"); + if (ctx->enc) + ret = wc_AesCfb8Encrypt(&ctx->cipher.aes, dst, src, len); + else + ret = wc_AesCfb8Decrypt(&ctx->cipher.aes, dst, src, len); + break; +#endif /* !HAVE_SELFTEST && !HAVE_FIPS */ + case AES_128_CFB128_TYPE: + case AES_192_CFB128_TYPE: + case AES_256_CFB128_TYPE: + WOLFSSL_MSG("AES CFB128"); + if (ctx->enc) + ret = wc_AesCfbEncrypt(&ctx->cipher.aes, dst, src, len); + else + ret = wc_AesCfbDecrypt(&ctx->cipher.aes, dst, src, len); + break; +#endif /* WOLFSSL_AES_CFB */ +#if defined(WOLFSSL_AES_OFB) + case AES_128_OFB_TYPE: + case AES_192_OFB_TYPE: + case AES_256_OFB_TYPE: + WOLFSSL_MSG("AES OFB"); + if (ctx->enc) + ret = wc_AesOfbEncrypt(&ctx->cipher.aes, dst, src, len); + else + ret = wc_AesOfbDecrypt(&ctx->cipher.aes, dst, src, len); + break; +#endif /* WOLFSSL_AES_OFB */ +#if defined(WOLFSSL_AES_XTS) + case AES_128_XTS_TYPE: + case AES_256_XTS_TYPE: + WOLFSSL_MSG("AES XTS"); + if (ctx->enc) + ret = wc_AesXtsEncrypt(&ctx->cipher.xts, dst, src, len, + ctx->iv, ctx->ivSz); + else + ret = wc_AesXtsDecrypt(&ctx->cipher.xts, dst, src, len, + ctx->iv, ctx->ivSz); + break; +#endif /* WOLFSSL_AES_XTS */ + +#ifdef HAVE_AESGCM + case AES_128_GCM_TYPE : + case AES_192_GCM_TYPE : + case AES_256_GCM_TYPE : + WOLFSSL_MSG("AES GCM"); + if (ctx->enc) { + if (dst){ + /* encrypt confidential data*/ + ret = wc_AesGcmEncrypt(&ctx->cipher.aes, dst, src, len, + ctx->iv, ctx->ivSz, ctx->authTag, ctx->authTagSz, + NULL, 0); + } + else { + /* authenticated, non-confidential data */ + ret = wc_AesGcmEncrypt(&ctx->cipher.aes, NULL, NULL, 0, + ctx->iv, ctx->ivSz, ctx->authTag, ctx->authTagSz, + src, len); + /* Reset partial authTag error for AAD*/ + if (ret == AES_GCM_AUTH_E) + ret = 0; + } + } + else { + if (dst){ + /* decrypt confidential data*/ + ret = wc_AesGcmDecrypt(&ctx->cipher.aes, dst, src, len, + ctx->iv, ctx->ivSz, ctx->authTag, ctx->authTagSz, + NULL, 0); + } + else { + /* authenticated, non-confidential data*/ + ret = wc_AesGcmDecrypt(&ctx->cipher.aes, NULL, NULL, 0, + ctx->iv, ctx->ivSz, + ctx->authTag, ctx->authTagSz, + src, len); + /* Reset partial authTag error for AAD*/ + if (ret == AES_GCM_AUTH_E) + ret = 0; + } + } + break; +#endif /* HAVE_AESGCM */ +#ifdef HAVE_AES_ECB + case AES_128_ECB_TYPE : + case AES_192_ECB_TYPE : + case AES_256_ECB_TYPE : + WOLFSSL_MSG("AES ECB"); + if (ctx->enc) + ret = wc_AesEcbEncrypt(&ctx->cipher.aes, dst, src, len); + else + ret = wc_AesEcbDecrypt(&ctx->cipher.aes, dst, src, len); + break; +#endif +#ifdef WOLFSSL_AES_COUNTER + case AES_128_CTR_TYPE : + case AES_192_CTR_TYPE : + case AES_256_CTR_TYPE : + WOLFSSL_MSG("AES CTR"); + ret = wc_AesCtrEncrypt(&ctx->cipher.aes, dst, src, len); + break; +#endif /* WOLFSSL_AES_COUNTER */ +#endif /* NO_AES */ + +#ifndef NO_DES3 + case DES_CBC_TYPE : + WOLFSSL_MSG("DES CBC"); + if (ctx->enc) + wc_Des_CbcEncrypt(&ctx->cipher.des, dst, src, len); + else + wc_Des_CbcDecrypt(&ctx->cipher.des, dst, src, len); + break; + case DES_EDE3_CBC_TYPE : + WOLFSSL_MSG("DES3 CBC"); + if (ctx->enc) + ret = wc_Des3_CbcEncrypt(&ctx->cipher.des3, dst, src, len); + else + ret = wc_Des3_CbcDecrypt(&ctx->cipher.des3, dst, src, len); + break; +#ifdef WOLFSSL_DES_ECB + case DES_ECB_TYPE : + WOLFSSL_MSG("DES ECB"); + ret = wc_Des_EcbEncrypt(&ctx->cipher.des, dst, src, len); + break; + case DES_EDE3_ECB_TYPE : + WOLFSSL_MSG("DES3 ECB"); + ret = wc_Des3_EcbEncrypt(&ctx->cipher.des3, dst, src, len); + break; +#endif +#endif /* !NO_DES3 */ + +#ifndef NO_RC4 + case ARC4_TYPE : + WOLFSSL_MSG("ARC4"); + wc_Arc4Process(&ctx->cipher.arc4, dst, src, len); + break; +#endif + +#ifdef HAVE_IDEA + case IDEA_CBC_TYPE : + WOLFSSL_MSG("IDEA CBC"); + if (ctx->enc) + wc_IdeaCbcEncrypt(&ctx->cipher.idea, dst, src, len); + else + wc_IdeaCbcDecrypt(&ctx->cipher.idea, dst, src, len); + break; +#endif + case NULL_CIPHER_TYPE : + WOLFSSL_MSG("NULL CIPHER"); + XMEMCPY(dst, src, len); + break; + + default: { + WOLFSSL_MSG("bad type"); + return 0; /* failure */ + } + } + + if (ret != 0) { + WOLFSSL_MSG("wolfSSL_EVP_Cipher failure"); + return 0; /* failure */ + } + + if (wolfSSL_StoreExternalIV(ctx) != WOLFSSL_SUCCESS) { + return WOLFSSL_FAILURE; + } + + WOLFSSL_MSG("wolfSSL_EVP_Cipher success"); + return WOLFSSL_SUCCESS; /* success */ + } + + /* WOLFSSL_SUCCESS on ok */ + int wolfSSL_EVP_DigestInit(WOLFSSL_EVP_MD_CTX* ctx, + const WOLFSSL_EVP_MD* md) + { + int ret = WOLFSSL_SUCCESS; + + WOLFSSL_ENTER("EVP_DigestInit"); + + if (ctx == NULL || md == NULL) { + return BAD_FUNC_ARG; + } + + + #ifdef WOLFSSL_ASYNC_CRYPT + /* compile-time validation of ASYNC_CTX_SIZE */ + typedef char async_test[WC_ASYNC_DEV_SIZE >= sizeof(WC_ASYNC_DEV) ? + 1 : -1]; + (void)sizeof(async_test); + #endif + + /* Set to 0 if no match */ + ctx->macType = wolfSSL_EVP_md2macType(md); + if (XSTRNCMP(md, "SHA256", 6) == 0) { + ret = wolfSSL_SHA256_Init(&(ctx->hash.digest.sha256)); + } + #ifdef WOLFSSL_SHA224 + else if (XSTRNCMP(md, "SHA224", 6) == 0) { + ret = wolfSSL_SHA224_Init(&(ctx->hash.digest.sha224)); + } + #endif + #ifdef WOLFSSL_SHA384 + else if (XSTRNCMP(md, "SHA384", 6) == 0) { + ret = wolfSSL_SHA384_Init(&(ctx->hash.digest.sha384)); + } + #endif + #ifdef WOLFSSL_SHA512 + else if (XSTRNCMP(md, "SHA512", 6) == 0) { + ret = wolfSSL_SHA512_Init(&(ctx->hash.digest.sha512)); + } + #endif + #ifndef NO_MD4 + else if (XSTRNCMP(md, "MD4", 3) == 0) { + wolfSSL_MD4_Init(&(ctx->hash.digest.md4)); + } + #endif + #ifndef NO_MD5 + else if (XSTRNCMP(md, "MD5", 3) == 0) { + ret = wolfSSL_MD5_Init(&(ctx->hash.digest.md5)); + } + #endif +#ifdef WOLFSSL_SHA3 + #ifndef WOLFSSL_NOSHA3_224 + else if (XSTRNCMP(md, "SHA3_224", 8) == 0) { + ret = wolfSSL_SHA3_224_Init(&(ctx->hash.digest.sha3_224)); + } + #endif + #ifndef WOLFSSL_NOSHA3_256 + else if (XSTRNCMP(md, "SHA3_256", 8) == 0) { + ret = wolfSSL_SHA3_256_Init(&(ctx->hash.digest.sha3_256)); + } + #endif + else if (XSTRNCMP(md, "SHA3_384", 8) == 0) { + ret = wolfSSL_SHA3_384_Init(&(ctx->hash.digest.sha3_384)); + } + #ifndef WOLFSSL_NOSHA3_512 + else if (XSTRNCMP(md, "SHA3_512", 8) == 0) { + ret = wolfSSL_SHA3_512_Init(&(ctx->hash.digest.sha3_512)); + } + #endif +#endif + #ifndef NO_SHA + /* has to be last since would pick or 224, 256, 384, or 512 too */ + else if (XSTRNCMP(md, "SHA", 3) == 0) { + ret = wolfSSL_SHA_Init(&(ctx->hash.digest.sha)); + } + #endif /* NO_SHA */ + else { + ctx->macType = WC_HASH_TYPE_NONE; + return BAD_FUNC_ARG; + } + + return ret; + } + + /* WOLFSSL_SUCCESS on ok, WOLFSSL_FAILURE on failure */ + int wolfSSL_EVP_DigestUpdate(WOLFSSL_EVP_MD_CTX* ctx, const void* data, + size_t sz) + { + int macType; + + WOLFSSL_ENTER("EVP_DigestUpdate"); + + macType = wolfSSL_EVP_md2macType(EVP_MD_CTX_md(ctx)); + switch (macType) { +#ifndef NO_MD4 + case WC_HASH_TYPE_MD4: + wolfSSL_MD4_Update((MD4_CTX*)&ctx->hash, data, + (unsigned long)sz); + break; +#endif +#ifndef NO_MD5 + case WC_HASH_TYPE_MD5: + wolfSSL_MD5_Update((MD5_CTX*)&ctx->hash, data, + (unsigned long)sz); + break; +#endif +#ifndef NO_SHA + case WC_HASH_TYPE_SHA: + wolfSSL_SHA_Update((SHA_CTX*)&ctx->hash, data, + (unsigned long)sz); + break; +#endif +#ifdef WOLFSSL_SHA224 + case WC_HASH_TYPE_SHA224: + wolfSSL_SHA224_Update((SHA224_CTX*)&ctx->hash, data, + (unsigned long)sz); + break; +#endif +#ifndef NO_SHA256 + case WC_HASH_TYPE_SHA256: + wolfSSL_SHA256_Update((SHA256_CTX*)&ctx->hash, data, + (unsigned long)sz); + break; +#endif /* !NO_SHA256 */ +#ifdef WOLFSSL_SHA384 + case WC_HASH_TYPE_SHA384: + wolfSSL_SHA384_Update((SHA384_CTX*)&ctx->hash, data, + (unsigned long)sz); + break; +#endif +#ifdef WOLFSSL_SHA512 + case WC_HASH_TYPE_SHA512: + wolfSSL_SHA512_Update((SHA512_CTX*)&ctx->hash, data, + (unsigned long)sz); + break; +#endif /* WOLFSSL_SHA512 */ + #ifdef WOLFSSL_SHA3 + #ifndef WOLFSSL_NOSHA3_224 + case WC_HASH_TYPE_SHA3_224: + wolfSSL_SHA3_224_Update((SHA3_224_CTX*)&ctx->hash, data, + (unsigned long)sz); + break; + #endif + #ifndef WOLFSSL_NOSHA3_256 + case WC_HASH_TYPE_SHA3_256: + wolfSSL_SHA3_256_Update((SHA3_256_CTX*)&ctx->hash, data, + (unsigned long)sz); + break; + #endif + case WC_HASH_TYPE_SHA3_384: + wolfSSL_SHA3_384_Update((SHA3_384_CTX*)&ctx->hash, data, + (unsigned long)sz); + break; + #ifndef WOLFSSL_NOSHA3_512 + case WC_HASH_TYPE_SHA3_512: + wolfSSL_SHA3_512_Update((SHA3_512_CTX*)&ctx->hash, data, + (unsigned long)sz); + break; + #endif + #endif + default: + return WOLFSSL_FAILURE; + } + + return WOLFSSL_SUCCESS; + } + + /* WOLFSSL_SUCCESS on ok */ + int wolfSSL_EVP_DigestFinal(WOLFSSL_EVP_MD_CTX* ctx, unsigned char* md, + unsigned int* s) + { + int macType; + + WOLFSSL_ENTER("EVP_DigestFinal"); + macType = wolfSSL_EVP_md2macType(EVP_MD_CTX_md(ctx)); + switch (macType) { +#ifndef NO_MD4 + case WC_HASH_TYPE_MD4: + wolfSSL_MD4_Final(md, (MD4_CTX*)&ctx->hash); + if (s) *s = MD4_DIGEST_SIZE; + break; +#endif +#ifndef NO_MD5 + case WC_HASH_TYPE_MD5: + wolfSSL_MD5_Final(md, (MD5_CTX*)&ctx->hash); + if (s) *s = WC_MD5_DIGEST_SIZE; + break; +#endif +#ifndef NO_SHA + case WC_HASH_TYPE_SHA: + wolfSSL_SHA_Final(md, (SHA_CTX*)&ctx->hash); + if (s) *s = WC_SHA_DIGEST_SIZE; + break; +#endif +#ifdef WOLFSSL_SHA224 + case WC_HASH_TYPE_SHA224: + wolfSSL_SHA224_Final(md, (SHA224_CTX*)&ctx->hash); + if (s) *s = WC_SHA224_DIGEST_SIZE; + break; +#endif +#ifndef NO_SHA256 + case WC_HASH_TYPE_SHA256: + wolfSSL_SHA256_Final(md, (SHA256_CTX*)&ctx->hash); + if (s) *s = WC_SHA256_DIGEST_SIZE; + break; +#endif /* !NO_SHA256 */ +#ifdef WOLFSSL_SHA384 + case WC_HASH_TYPE_SHA384: + wolfSSL_SHA384_Final(md, (SHA384_CTX*)&ctx->hash); + if (s) *s = WC_SHA384_DIGEST_SIZE; + break; +#endif +#ifdef WOLFSSL_SHA512 + case WC_HASH_TYPE_SHA512: + wolfSSL_SHA512_Final(md, (SHA512_CTX*)&ctx->hash); + if (s) *s = WC_SHA512_DIGEST_SIZE; + break; +#endif /* WOLFSSL_SHA512 */ + #ifdef WOLFSSL_SHA3 + #ifndef WOLFSSL_NOSHA3_224 + case WC_HASH_TYPE_SHA3_224: + wolfSSL_SHA3_224_Final(md, (SHA3_224_CTX*)&ctx->hash); + if (s) *s = WC_SHA3_224_DIGEST_SIZE; + break; + #endif + #ifndef WOLFSSL_NOSHA3_256 + case WC_HASH_TYPE_SHA3_256: + wolfSSL_SHA3_256_Final(md, (SHA3_256_CTX*)&ctx->hash); + if (s) *s = WC_SHA3_256_DIGEST_SIZE; + break; + #endif + case WC_HASH_TYPE_SHA3_384: + wolfSSL_SHA3_384_Final(md, (SHA3_384_CTX*)&ctx->hash); + if (s) *s = WC_SHA3_384_DIGEST_SIZE; + break; + #ifndef WOLFSSL_NOSHA3_512 + case WC_HASH_TYPE_SHA3_512: + wolfSSL_SHA3_512_Final(md, (SHA3_512_CTX*)&ctx->hash); + if (s) *s = WC_SHA3_512_DIGEST_SIZE; + break; + #endif + #endif + default: + return WOLFSSL_FAILURE; + } + + return WOLFSSL_SUCCESS; + } + + /* WOLFSSL_SUCCESS on ok */ + int wolfSSL_EVP_DigestFinal_ex(WOLFSSL_EVP_MD_CTX* ctx, unsigned char* md, + unsigned int* s) + { + WOLFSSL_ENTER("EVP_DigestFinal_ex"); + return EVP_DigestFinal(ctx, md, s); + } + + void wolfSSL_EVP_cleanup(void) + { + /* nothing to do here */ + } + +const WOLFSSL_EVP_MD* wolfSSL_EVP_get_digestbynid(int id) +{ + WOLFSSL_MSG("wolfSSL_get_digestbynid"); + + switch(id) { +#ifndef NO_MD5 + case NID_md5: + return wolfSSL_EVP_md5(); +#endif +#ifndef NO_SHA + case NID_sha1: + return wolfSSL_EVP_sha1(); +#endif + default: + WOLFSSL_MSG("Bad digest id value"); + } + + return NULL; +} + +#ifndef NO_RSA +WOLFSSL_RSA* wolfSSL_EVP_PKEY_get0_RSA(WOLFSSL_EVP_PKEY *pkey) +{ + if (!pkey) { + return NULL; + } + return pkey->rsa; +} + +WOLFSSL_RSA* wolfSSL_EVP_PKEY_get1_RSA(WOLFSSL_EVP_PKEY* key) +{ + WOLFSSL_RSA* local; + + WOLFSSL_MSG("wolfSSL_EVP_PKEY_get1_RSA"); + + if (key == NULL) { + return NULL; + } + + local = wolfSSL_RSA_new(); + if (local == NULL) { + WOLFSSL_MSG("Error creating a new WOLFSSL_RSA structure"); + return NULL; + } + + if (key->type == EVP_PKEY_RSA) { + if (wolfSSL_RSA_LoadDer(local, (const unsigned char*)key->pkey.ptr, + key->pkey_sz) != SSL_SUCCESS) { + /* now try public key */ + if (wolfSSL_RSA_LoadDer_ex(local, + (const unsigned char*)key->pkey.ptr, key->pkey_sz, + WOLFSSL_RSA_LOAD_PUBLIC) != SSL_SUCCESS) { + wolfSSL_RSA_free(local); + local = NULL; + } + } + } + else { + WOLFSSL_MSG("WOLFSSL_EVP_PKEY does not hold an RSA key"); + wolfSSL_RSA_free(local); + local = NULL; + } + return local; +} + +/* with set1 functions the pkey struct does not own the RSA structure + * + * returns WOLFSSL_SUCCESS on success and WOLFSSL_FAILURE on failure + */ +int wolfSSL_EVP_PKEY_set1_RSA(WOLFSSL_EVP_PKEY *pkey, WOLFSSL_RSA *key) +{ +#if defined(WOLFSSL_KEY_GEN) && !defined(HAVE_USER_RSA) + int derMax = 0; + int derSz = 0; + byte* derBuf = NULL; + RsaKey* rsa = NULL; +#endif + WOLFSSL_ENTER("wolfSSL_EVP_PKEY_set1_RSA"); + if ((pkey == NULL) || (key == NULL)) + return WOLFSSL_FAILURE; + + if (pkey->rsa != NULL && pkey->ownRsa == 1) { + wolfSSL_RSA_free(pkey->rsa); + } + pkey->rsa = key; + pkey->ownRsa = 0; /* pkey does not own RSA */ + pkey->type = EVP_PKEY_RSA; + if (key->inSet == 0) { + if (SetRsaInternal(key) != WOLFSSL_SUCCESS) { + WOLFSSL_MSG("SetRsaInternal failed"); + return WOLFSSL_FAILURE; + } + } + +#if defined(WOLFSSL_KEY_GEN) && !defined(HAVE_USER_RSA) + rsa = (RsaKey*)key->internal; + /* 5 > size of n, d, p, q, d%(p-1), d(q-1), 1/q%p, e + ASN.1 additional + * information */ + derMax = 5 * wolfSSL_RSA_size(key) + (2 * AES_BLOCK_SIZE); + + derBuf = (byte*)XMALLOC(derMax, pkey->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (derBuf == NULL) { + WOLFSSL_MSG("malloc failed"); + return WOLFSSL_FAILURE; + } + + if (rsa->type == RSA_PRIVATE) { + /* Private key to DER */ + derSz = wc_RsaKeyToDer(rsa, derBuf, derMax); + } + else { + /* Public key to DER */ + derSz = wc_RsaKeyToPublicDer(rsa, derBuf, derMax); + } + + if (derSz < 0) { + if (rsa->type == RSA_PRIVATE) { + WOLFSSL_MSG("wc_RsaKeyToDer failed"); + } + else { + WOLFSSL_MSG("wc_RsaKeyToPublicDer failed"); + } + XFREE(derBuf, pkey->heap, DYNAMIC_TYPE_TMP_BUFFER); + return WOLFSSL_FAILURE; + } + + pkey->pkey.ptr = (char*)XMALLOC(derSz, pkey->heap, DYNAMIC_TYPE_DER); + if (pkey->pkey.ptr == NULL) { + WOLFSSL_MSG("key malloc failed"); + XFREE(derBuf, pkey->heap, DYNAMIC_TYPE_TMP_BUFFER); + return WOLFSSL_FAILURE; + } + pkey->pkey_sz = derSz; + XMEMCPY(pkey->pkey.ptr, derBuf, derSz); + XFREE(derBuf, pkey->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif /* WOLFSSL_KEY_GEN && !HAVE_USER_RSA */ + +#ifdef WC_RSA_BLINDING + if (key->ownRng == 0) { + if (wc_RsaSetRNG((RsaKey*)(pkey->rsa->internal), &(pkey->rng)) != 0) { + WOLFSSL_MSG("Error setting RSA rng"); + return WOLFSSL_FAILURE; + } + } +#endif + return WOLFSSL_SUCCESS; +} +#endif /* !NO_RSA */ + +#if !defined (NO_DSA) && !defined(HAVE_SELFTEST) && defined(WOLFSSL_KEY_GEN) +/* with set1 functions the pkey struct does not own the DSA structure + * + * returns WOLFSSL_SUCCESS on success and WOLFSSL_FAILURE on failure + */ +int wolfSSL_EVP_PKEY_set1_DSA(WOLFSSL_EVP_PKEY *pkey, WOLFSSL_DSA *key) +{ + int derMax = 0; + int derSz = 0; + DsaKey* dsa = NULL; + byte* derBuf = NULL; + + WOLFSSL_ENTER("wolfSSL_EVP_PKEY_set1_DSA"); + + if((pkey == NULL) || (key == NULL))return WOLFSSL_FAILURE; + if (pkey->dsa != NULL && pkey->ownDsa == 1) { + wolfSSL_DSA_free(pkey->dsa); + } + pkey->dsa = key; + pkey->ownDsa = 0; /* pkey does not own DSA */ + pkey->type = EVP_PKEY_DSA; + if (key->inSet == 0) { + if (SetDsaInternal(key) != WOLFSSL_SUCCESS) { + WOLFSSL_MSG("SetDsaInternal failed"); + return WOLFSSL_FAILURE; + } + } + dsa = (DsaKey*)key->internal; + + /* 4 > size of pub, priv, p, q, g + ASN.1 additional information */ + derMax = 4 * wolfSSL_BN_num_bytes(key->g) + AES_BLOCK_SIZE; + + derBuf = (byte*)XMALLOC(derMax, pkey->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (derBuf == NULL) { + WOLFSSL_MSG("malloc failed"); + return WOLFSSL_FAILURE; + } + + if (dsa->type == DSA_PRIVATE) { + /* Private key to DER */ + derSz = wc_DsaKeyToDer(dsa, derBuf, derMax); + } + else { + /* Public key to DER */ + derSz = wc_DsaKeyToPublicDer(dsa, derBuf, derMax); + } + + if (derSz < 0) { + if (dsa->type == DSA_PRIVATE) { + WOLFSSL_MSG("wc_DsaKeyToDer failed"); + } + else { + WOLFSSL_MSG("wc_DsaKeyToPublicDer failed"); + } + XFREE(derBuf, pkey->heap, DYNAMIC_TYPE_TMP_BUFFER); + return WOLFSSL_FAILURE; + } + + pkey->pkey.ptr = (char*)XMALLOC(derSz, pkey->heap, DYNAMIC_TYPE_DER); + if (pkey->pkey.ptr == NULL) { + WOLFSSL_MSG("key malloc failed"); + XFREE(derBuf, pkey->heap, DYNAMIC_TYPE_TMP_BUFFER); + return WOLFSSL_FAILURE; + } + pkey->pkey_sz = derSz; + XMEMCPY(pkey->pkey.ptr, derBuf, derSz); + XFREE(derBuf, pkey->heap, DYNAMIC_TYPE_TMP_BUFFER); + + return WOLFSSL_SUCCESS; +} + +WOLFSSL_DSA* wolfSSL_EVP_PKEY_get1_DSA(WOLFSSL_EVP_PKEY* key) +{ + WOLFSSL_DSA* local; + + WOLFSSL_ENTER("wolfSSL_EVP_PKEY_get1_DSA"); + + if (key == NULL) { + WOLFSSL_MSG("Bad function argument"); + return NULL; + } + + local = wolfSSL_DSA_new(); + if (local == NULL) { + WOLFSSL_MSG("Error creating a new WOLFSSL_DSA structure"); + return NULL; + } + + if (key->type == EVP_PKEY_DSA) { + if (wolfSSL_DSA_LoadDer(local, (const unsigned char*)key->pkey.ptr, + key->pkey_sz) != SSL_SUCCESS) { + /* now try public key */ + if (wolfSSL_DSA_LoadDer_ex(local, + (const unsigned char*)key->pkey.ptr, key->pkey_sz, + WOLFSSL_DSA_LOAD_PUBLIC) != SSL_SUCCESS) { + wolfSSL_DSA_free(local); + local = NULL; + } + } + } + else { + WOLFSSL_MSG("WOLFSSL_EVP_PKEY does not hold a DSA key"); + wolfSSL_DSA_free(local); + local = NULL; + } + return local; +} +#endif /* !NO_DSA && !HAVE_SELFTEST && WOLFSSL_KEY_GEN */ + +#ifdef HAVE_ECC +WOLFSSL_EC_KEY *wolfSSL_EVP_PKEY_get0_EC_KEY(WOLFSSL_EVP_PKEY *pkey) +{ + WOLFSSL_EC_KEY *eckey = NULL; + if (pkey) { +#ifdef HAVE_ECC + eckey = pkey->ecc; +#endif + } + return eckey; +} + +WOLFSSL_EC_KEY* wolfSSL_EVP_PKEY_get1_EC_KEY(WOLFSSL_EVP_PKEY* key) +{ + WOLFSSL_EC_KEY* local; + WOLFSSL_ENTER("wolfSSL_EVP_PKEY_get1_EC_KEY"); + + if (key == NULL) { + return NULL; + } + + local = wolfSSL_EC_KEY_new(); + if (local == NULL) { + WOLFSSL_MSG("Error creating a new WOLFSSL_EC_KEY structure"); + return NULL; + } + + if (key->type == EVP_PKEY_EC) { + if (wolfSSL_EC_KEY_LoadDer(local, (const unsigned char*)key->pkey.ptr, + key->pkey_sz) != SSL_SUCCESS) { + /* now try public key */ + if (wolfSSL_EC_KEY_LoadDer_ex(local, + (const unsigned char*)key->pkey.ptr, + key->pkey_sz, WOLFSSL_EC_KEY_LOAD_PUBLIC) != SSL_SUCCESS) { + + wolfSSL_EC_KEY_free(local); + local = NULL; + } + } + } + else { + WOLFSSL_MSG("WOLFSSL_EVP_PKEY does not hold an EC key"); + wolfSSL_EC_KEY_free(local); + local = NULL; + } +#ifdef OPENSSL_ALL + if (!local && key->ecc) { + local = wolfSSL_EC_KEY_dup(key->ecc); + } +#endif + return local; +} +#endif /* HAVE_ECC */ + +#if defined(OPENSSL_ALL) || defined(WOLFSSL_QT) +#if !defined(NO_DH) && !defined(NO_FILESYSTEM) +/* with set1 functions the pkey struct does not own the DH structure + * Build the following DH Key format from the passed in WOLFSSL_DH + * then store in WOLFSSL_EVP_PKEY in DER format. + * + * returns WOLFSSL_SUCCESS on success and WOLFSSL_FAILURE on failure + */ +int wolfSSL_EVP_PKEY_set1_DH(WOLFSSL_EVP_PKEY *pkey, WOLFSSL_DH *key) +{ + byte havePublic = 0, havePrivate = 0; + int ret; + word32 derSz = 0; + byte* derBuf = NULL; + DhKey* dhkey = NULL; + + WOLFSSL_ENTER("wolfSSL_EVP_PKEY_set1_DH"); + + if (pkey == NULL || key == NULL) + return WOLFSSL_FAILURE; + + if (pkey->dh != NULL && pkey->ownDh == 1) + wolfSSL_DH_free(pkey->dh); + + pkey->dh = key; + pkey->ownDh = 0; /* pkey does not own DH */ + pkey->type = EVP_PKEY_DH; + if (key->inSet == 0) { + if (SetDhInternal(key) != WOLFSSL_SUCCESS) { + WOLFSSL_MSG("SetDhInternal failed"); + return WOLFSSL_FAILURE; + } + } + + dhkey = (DhKey*)key->internal; + + havePublic = mp_unsigned_bin_size(&dhkey->pub) > 0; + havePrivate = mp_unsigned_bin_size(&dhkey->priv) > 0; + + /* Get size of DER buffer only */ + if (havePublic && !havePrivate) { + ret = wc_DhPubKeyToDer(dhkey, NULL, &derSz); + } else if (havePrivate && !havePublic) { + ret = wc_DhPrivKeyToDer(dhkey, NULL, &derSz); + } else { + ret = wc_DhParamsToDer(dhkey,NULL,&derSz); + } + + if (derSz <= 0 || ret != LENGTH_ONLY_E) { + WOLFSSL_MSG("Failed to get size of DH Key"); + return WOLFSSL_FAILURE; + } + + derBuf = (byte*)XMALLOC(derSz, pkey->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (derBuf == NULL) { + WOLFSSL_MSG("malloc failed"); + return WOLFSSL_FAILURE; + } + + /* Fill DER buffer */ + if (havePublic && !havePrivate) { + ret = wc_DhPubKeyToDer(dhkey, derBuf, &derSz); + } else if (havePrivate && !havePublic) { + ret = wc_DhPrivKeyToDer(dhkey, derBuf, &derSz); + } else { + ret = wc_DhParamsToDer(dhkey,derBuf,&derSz); + } + + if (ret <= 0) { + WOLFSSL_MSG("Failed to export DH Key"); + XFREE(derBuf, pkey->heap, DYNAMIC_TYPE_TMP_BUFFER); + return WOLFSSL_FAILURE; + } + + /* Store DH key into pkey (DER format) */ + pkey->pkey.ptr = (char*)derBuf; + pkey->pkey_sz = derSz; + + return WOLFSSL_SUCCESS; +} + +WOLFSSL_DH* wolfSSL_EVP_PKEY_get0_DH(WOLFSSL_EVP_PKEY* key) +{ + if (!key) { + return NULL; + } + return key->dh; +} + +WOLFSSL_DH* wolfSSL_EVP_PKEY_get1_DH(WOLFSSL_EVP_PKEY* key) +{ + WOLFSSL_DH* local = NULL; + + WOLFSSL_ENTER("wolfSSL_EVP_PKEY_get1_DH"); + + if (key == NULL || key->dh == NULL) { + WOLFSSL_MSG("Bad function argument"); + return NULL; + } + + if (key->type == EVP_PKEY_DH) { + local = wolfSSL_DH_new(); + if (local == NULL) { + WOLFSSL_MSG("Error creating a new WOLFSSL_DH structure"); + return NULL; + } + + if (wolfSSL_DH_LoadDer(local, (const unsigned char*)key->pkey.ptr, + key->pkey_sz) != SSL_SUCCESS) { + wolfSSL_DH_free(local); + WOLFSSL_MSG("Error wolfSSL_DH_LoadDer"); + local = NULL; + } + } + else { + WOLFSSL_MSG("WOLFSSL_EVP_PKEY does not hold a DH key"); + wolfSSL_DH_free(local); + return NULL; + } + + return local; +} +#endif /* NO_DH && NO_FILESYSTEM */ + +int wolfSSL_EVP_PKEY_assign(WOLFSSL_EVP_PKEY *pkey, int type, void *key) +{ + int ret; + + WOLFSSL_ENTER("wolfSSL_EVP_PKEY_assign"); + + /* pkey and key checked if NULL in subsequent assign functions */ + switch(type) { + #ifndef NO_RSA + case EVP_PKEY_RSA: + ret = wolfSSL_EVP_PKEY_assign_RSA(pkey, (WOLFSSL_RSA*)key); + break; + #endif + #ifndef NO_DSA + case EVP_PKEY_DSA: + ret = wolfSSL_EVP_PKEY_assign_DSA(pkey, (WOLFSSL_DSA*)key); + break; + #endif + #ifdef HAVE_ECC + case EVP_PKEY_EC: + ret = wolfSSL_EVP_PKEY_assign_EC_KEY(pkey, (WOLFSSL_EC_KEY*)key); + break; + #endif + #ifdef NO_DH + case EVP_PKEY_DH: + ret = wolfSSL_EVP_PKEY_assign_DH(pkey, (WOLFSSL_DH*)key); + break; + #endif + default: + WOLFSSL_MSG("Unknown EVP_PKEY type in wolfSSL_EVP_PKEY_assign."); + ret = WOLFSSL_FAILURE; + } + + return ret; +} +#endif /* WOLFSSL_QT || OPENSSL_ALL */ + +#if defined(HAVE_ECC) +/* try and populate public pkey_sz and pkey.ptr */ +static void ECC_populate_EVP_PKEY(EVP_PKEY* pkey, ecc_key* ecc) +{ + int ret; + if (!pkey || !ecc) + return; + if ((ret = wc_EccPublicKeyDerSize(ecc, 1)) > 0) { + int derSz = ret; + char* derBuf = (char*)XMALLOC(derSz, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (derBuf) { + ret = wc_EccPublicKeyToDer(ecc, (byte*)derBuf, derSz, 1); + if (ret >= 0) { + if (pkey->pkey.ptr) { + XFREE(pkey->pkey.ptr, NULL, DYNAMIC_TYPE_OPENSSL); + } + pkey->pkey_sz = ret; + pkey->pkey.ptr = derBuf; + } + else { /* failure - okay to ignore */ + XFREE(derBuf, NULL, DYNAMIC_TYPE_TMP_BUFFER); + derBuf = NULL; + } + } + } +} + +WOLFSSL_API int wolfSSL_EVP_PKEY_set1_EC_KEY(WOLFSSL_EVP_PKEY *pkey, WOLFSSL_EC_KEY *key) +{ +#ifdef HAVE_ECC + if((pkey == NULL) || (key ==NULL))return WOLFSSL_FAILURE; + WOLFSSL_ENTER("wolfSSL_EVP_PKEY_set1_EC_KEY"); +#ifndef NO_RSA + if (pkey->rsa != NULL && pkey->ownRsa == 1) { + wolfSSL_RSA_free(pkey->rsa); + } + pkey->ownRsa = 0; +#endif +#ifndef NO_DSA + if (pkey->dsa != NULL && pkey->ownDsa == 1) { + wolfSSL_DSA_free(pkey->dsa); + } + pkey->ownDsa = 0; +#endif +#ifndef NO_DH + if (pkey->dh != NULL && pkey->ownDh == 1) { + wolfSSL_DH_free(pkey->dh); + } + pkey->ownDh = 0; +#endif + if (pkey->ecc != NULL && pkey->ownEcc == 1) { + wolfSSL_EC_KEY_free(pkey->ecc); + } + pkey->ecc = key; + pkey->ownEcc = 0; /* pkey does not own EC key */ + pkey->type = EVP_PKEY_EC; + ECC_populate_EVP_PKEY(pkey, (ecc_key*)key->internal); + return WOLFSSL_SUCCESS; +#else + (void)pkey; + (void)key; + return WOLFSSL_FAILURE; +#endif +} + +void* wolfSSL_EVP_X_STATE(const WOLFSSL_EVP_CIPHER_CTX* ctx) +{ + WOLFSSL_MSG("wolfSSL_EVP_X_STATE"); + + if (ctx) { + switch (ctx->cipherType) { + case ARC4_TYPE: + WOLFSSL_MSG("returning arc4 state"); + return (void*)&ctx->cipher.arc4.x; + + default: + WOLFSSL_MSG("bad x state type"); + return 0; + } + } + + return NULL; +} +int wolfSSL_EVP_PKEY_assign_EC_KEY(EVP_PKEY* pkey, WOLFSSL_EC_KEY* key) +{ + if (pkey == NULL || key == NULL) + return WOLFSSL_FAILURE; + + pkey->type = EVP_PKEY_EC; + pkey->ecc = key; + pkey->ownEcc = 1; + + /* try and populate public pkey_sz and pkey.ptr */ + ECC_populate_EVP_PKEY(pkey, (ecc_key*)key->internal); + + return WOLFSSL_SUCCESS; +} +#endif /* HAVE_ECC */ + +#ifndef NO_WOLFSSL_STUB +const WOLFSSL_EVP_MD* wolfSSL_EVP_ripemd160(void) +{ + WOLFSSL_MSG("wolfSSL_ripemd160"); + WOLFSSL_STUB("EVP_ripemd160"); + return NULL; +} +#endif + + +int wolfSSL_EVP_MD_block_size(const WOLFSSL_EVP_MD* type) +{ + WOLFSSL_MSG("wolfSSL_EVP_MD_block_size"); + + if (type == NULL) { + WOLFSSL_MSG("No md type arg"); + return BAD_FUNC_ARG; + } + + if (XSTRNCMP(type, "SHA256", 6) == 0) { + return WC_SHA256_BLOCK_SIZE; + } +#ifndef NO_MD5 + else if (XSTRNCMP(type, "MD5", 3) == 0) { + return WC_MD5_BLOCK_SIZE; + } +#endif +#ifdef WOLFSSL_SHA224 + else if (XSTRNCMP(type, "SHA224", 6) == 0) { + return WC_SHA224_BLOCK_SIZE; + } +#endif +#ifdef WOLFSSL_SHA384 + else if (XSTRNCMP(type, "SHA384", 6) == 0) { + return WC_SHA384_BLOCK_SIZE; + } +#endif +#ifdef WOLFSSL_SHA512 + else if (XSTRNCMP(type, "SHA512", 6) == 0) { + return WC_SHA512_BLOCK_SIZE; + } +#endif +#ifndef NO_SHA + /* has to be last since would pick or 256, 384, or 512 too */ + else if (XSTRNCMP(type, "SHA", 3) == 0) { + return WC_SHA_BLOCK_SIZE; + } +#endif + + return BAD_FUNC_ARG; +} + +int wolfSSL_EVP_MD_size(const WOLFSSL_EVP_MD* type) +{ + WOLFSSL_MSG("wolfSSL_EVP_MD_size"); + + if (type == NULL) { + WOLFSSL_MSG("No md type arg"); + return BAD_FUNC_ARG; + } + + if (XSTRNCMP(type, "SHA256", 6) == 0) { + return WC_SHA256_DIGEST_SIZE; + } +#ifndef NO_MD5 + else if (XSTRNCMP(type, "MD5", 3) == 0) { + return WC_MD5_DIGEST_SIZE; + } +#endif +#ifdef WOLFSSL_SHA224 + else if (XSTRNCMP(type, "SHA224", 6) == 0) { + return WC_SHA224_DIGEST_SIZE; + } +#endif +#ifdef WOLFSSL_SHA384 + else if (XSTRNCMP(type, "SHA384", 6) == 0) { + return WC_SHA384_DIGEST_SIZE; + } +#endif +#ifdef WOLFSSL_SHA512 + else if (XSTRNCMP(type, "SHA512", 6) == 0) { + return WC_SHA512_DIGEST_SIZE; + } +#endif +#ifndef NO_SHA + /* has to be last since would pick or 256, 384, or 512 too */ + else if (XSTRNCMP(type, "SHA", 3) == 0) { + return WC_SHA_DIGEST_SIZE; + } +#endif + + return BAD_FUNC_ARG; +} + + +int wolfSSL_EVP_CIPHER_CTX_iv_length(const WOLFSSL_EVP_CIPHER_CTX* ctx) +{ + WOLFSSL_MSG("wolfSSL_EVP_CIPHER_CTX_iv_length"); + + switch (ctx->cipherType) { + +#ifdef HAVE_AES_CBC + case AES_128_CBC_TYPE : + case AES_192_CBC_TYPE : + case AES_256_CBC_TYPE : + WOLFSSL_MSG("AES CBC"); + return AES_BLOCK_SIZE; +#endif +#ifdef HAVE_AESGCM + case AES_128_GCM_TYPE : + case AES_192_GCM_TYPE : + case AES_256_GCM_TYPE : + WOLFSSL_MSG("AES GCM"); + return GCM_NONCE_MID_SZ; +#endif +#ifdef WOLFSSL_AES_COUNTER + case AES_128_CTR_TYPE : + case AES_192_CTR_TYPE : + case AES_256_CTR_TYPE : + WOLFSSL_MSG("AES CTR"); + return AES_BLOCK_SIZE; +#endif +#ifndef NO_DES3 + case DES_CBC_TYPE : + WOLFSSL_MSG("DES CBC"); + return DES_BLOCK_SIZE; + + case DES_EDE3_CBC_TYPE : + WOLFSSL_MSG("DES EDE3 CBC"); + return DES_BLOCK_SIZE; +#endif +#ifdef HAVE_IDEA + case IDEA_CBC_TYPE : + WOLFSSL_MSG("IDEA CBC"); + return IDEA_BLOCK_SIZE; +#endif +#ifndef NO_RC4 + case ARC4_TYPE : + WOLFSSL_MSG("ARC4"); + return 0; +#endif +#ifdef WOLFSSL_AES_CFB +#if !defined(HAVE_SELFTEST) && !defined(HAVE_FIPS) + case AES_128_CFB1_TYPE: + case AES_192_CFB1_TYPE: + case AES_256_CFB1_TYPE: + WOLFSSL_MSG("AES CFB1"); + return AES_BLOCK_SIZE; + case AES_128_CFB8_TYPE: + case AES_192_CFB8_TYPE: + case AES_256_CFB8_TYPE: + WOLFSSL_MSG("AES CFB8"); + return AES_BLOCK_SIZE; +#endif /* !HAVE_SELFTEST && !HAVE_FIPS */ + case AES_128_CFB128_TYPE: + case AES_192_CFB128_TYPE: + case AES_256_CFB128_TYPE: + WOLFSSL_MSG("AES CFB128"); + return AES_BLOCK_SIZE; +#endif /* WOLFSSL_AES_CFB */ +#if defined(WOLFSSL_AES_OFB) + case AES_128_OFB_TYPE: + case AES_192_OFB_TYPE: + case AES_256_OFB_TYPE: + WOLFSSL_MSG("AES OFB"); + return AES_BLOCK_SIZE; +#endif /* WOLFSSL_AES_OFB */ +#ifdef WOLFSSL_AES_XTS + case AES_128_XTS_TYPE: + case AES_256_XTS_TYPE: + WOLFSSL_MSG("AES XTS"); + return AES_BLOCK_SIZE; +#endif /* WOLFSSL_AES_XTS */ + + case NULL_CIPHER_TYPE : + WOLFSSL_MSG("NULL"); + return 0; + + default: { + WOLFSSL_MSG("bad type"); + } + } + return 0; +} + +int wolfSSL_EVP_CIPHER_iv_length(const WOLFSSL_EVP_CIPHER* cipher) +{ + const char *name = (const char *)cipher; + WOLFSSL_MSG("wolfSSL_EVP_CIPHER_iv_length"); + +#ifndef NO_AES +#ifdef HAVE_AES_CBC + #ifdef WOLFSSL_AES_128 + if (EVP_AES_128_CBC && XSTRNCMP(name, EVP_AES_128_CBC, XSTRLEN(EVP_AES_128_CBC)) == 0) + return AES_BLOCK_SIZE; + #endif + #ifdef WOLFSSL_AES_192 + if (EVP_AES_192_CBC && XSTRNCMP(name, EVP_AES_192_CBC, XSTRLEN(EVP_AES_192_CBC)) == 0) + return AES_BLOCK_SIZE; + #endif + #ifdef WOLFSSL_AES_256 + if (EVP_AES_256_CBC && XSTRNCMP(name, EVP_AES_256_CBC, XSTRLEN(EVP_AES_256_CBC)) == 0) + return AES_BLOCK_SIZE; + #endif +#endif /* HAVE_AES_CBC */ +#ifdef HAVE_AESGCM + #ifdef WOLFSSL_AES_128 + if (EVP_AES_128_GCM && XSTRNCMP(name, EVP_AES_128_GCM, XSTRLEN(EVP_AES_128_GCM)) == 0) + return GCM_NONCE_MID_SZ; + #endif + #ifdef WOLFSSL_AES_192 + if (EVP_AES_192_GCM && XSTRNCMP(name, EVP_AES_192_GCM, XSTRLEN(EVP_AES_192_GCM)) == 0) + return GCM_NONCE_MID_SZ; + #endif + #ifdef WOLFSSL_AES_256 + if (EVP_AES_256_GCM && XSTRNCMP(name, EVP_AES_256_GCM, XSTRLEN(EVP_AES_256_GCM)) == 0) + return GCM_NONCE_MID_SZ; + #endif +#endif /* HAVE_AESGCM */ +#ifdef WOLFSSL_AES_COUNTER + #ifdef WOLFSSL_AES_128 + if (EVP_AES_128_CTR && XSTRNCMP(name, EVP_AES_128_CTR, XSTRLEN(EVP_AES_128_CTR)) == 0) + return AES_BLOCK_SIZE; + #endif + #ifdef WOLFSSL_AES_192 + if (EVP_AES_192_CTR && XSTRNCMP(name, EVP_AES_192_CTR, XSTRLEN(EVP_AES_192_CTR)) == 0) + return AES_BLOCK_SIZE; + #endif + #ifdef WOLFSSL_AES_256 + if (EVP_AES_256_CTR && XSTRNCMP(name, EVP_AES_256_CTR, XSTRLEN(EVP_AES_256_CTR)) == 0) + return AES_BLOCK_SIZE; + #endif +#endif +#ifdef WOLFSSL_AES_XTS + #ifdef WOLFSSL_AES_128 + if (EVP_AES_128_XTS && XSTRNCMP(name, EVP_AES_128_XTS, XSTRLEN(EVP_AES_128_XTS)) == 0) + return AES_BLOCK_SIZE; + #endif /* WOLFSSL_AES_128 */ + + #ifdef WOLFSSL_AES_256 + if (EVP_AES_256_XTS && XSTRNCMP(name, EVP_AES_256_XTS, XSTRLEN(EVP_AES_256_XTS)) == 0) + return AES_BLOCK_SIZE; + #endif /* WOLFSSL_AES_256 */ +#endif /* WOLFSSL_AES_XTS */ + +#endif + +#ifndef NO_DES3 + if ((EVP_DES_CBC && XSTRNCMP(name, EVP_DES_CBC, XSTRLEN(EVP_DES_CBC)) == 0) || + (EVP_DES_EDE3_CBC && XSTRNCMP(name, EVP_DES_EDE3_CBC, XSTRLEN(EVP_DES_EDE3_CBC)) == 0)) { + return DES_BLOCK_SIZE; + } +#endif + +#ifdef HAVE_IDEA + if (EVP_IDEA_CBC && XSTRNCMP(name, EVP_IDEA_CBC, XSTRLEN(EVP_IDEA_CBC)) == 0) + return IDEA_BLOCK_SIZE; +#endif + + (void)name; + + return 0; +} + + +int wolfSSL_EVP_X_STATE_LEN(const WOLFSSL_EVP_CIPHER_CTX* ctx) +{ + WOLFSSL_MSG("wolfSSL_EVP_X_STATE_LEN"); + + if (ctx) { + switch (ctx->cipherType) { + case ARC4_TYPE: + WOLFSSL_MSG("returning arc4 state size"); + return sizeof(Arc4); + + default: + WOLFSSL_MSG("bad x state type"); + return 0; + } + } + + return 0; +} + + +/* return of pkey->type which will be EVP_PKEY_RSA for example. + * + * type type of EVP_PKEY + * + * returns type or if type is not found then NID_undef + */ +int wolfSSL_EVP_PKEY_type(int type) +{ + WOLFSSL_MSG("wolfSSL_EVP_PKEY_type"); + + switch (type) { + case EVP_PKEY_RSA: + return EVP_PKEY_RSA; + case EVP_PKEY_DSA: + return EVP_PKEY_DSA; + case EVP_PKEY_EC: + return EVP_PKEY_EC; + case EVP_PKEY_DH: + return EVP_PKEY_DH; + default: + return NID_undef; + } +} + + +int wolfSSL_EVP_PKEY_id(const EVP_PKEY *pkey) +{ + if (pkey != NULL) + return pkey->type; + return 0; +} + + +int wolfSSL_EVP_PKEY_base_id(const EVP_PKEY *pkey) +{ + if (pkey == NULL) + return NID_undef; + return wolfSSL_EVP_PKEY_type(pkey->type); +} + + +/* increments ref count of WOLFSSL_EVP_PKEY. Return 1 on success, 0 on error */ +int wolfSSL_EVP_PKEY_up_ref(WOLFSSL_EVP_PKEY* pkey) +{ + if (pkey) { + if (wc_LockMutex(&pkey->refMutex) != 0) { + WOLFSSL_MSG("Failed to lock pkey mutex"); + } + pkey->references++; + wc_UnLockMutex(&pkey->refMutex); + + return 1; + } + + return 0; +} + +#ifndef NO_RSA +int wolfSSL_EVP_PKEY_assign_RSA(EVP_PKEY* pkey, WOLFSSL_RSA* key) +{ + if (pkey == NULL || key == NULL) + return WOLFSSL_FAILURE; + + pkey->type = EVP_PKEY_RSA; + pkey->rsa = key; + pkey->ownRsa = 1; + + /* try and populate public pkey_sz and pkey.ptr */ + if (key->internal) { + RsaKey* rsa = (RsaKey*)key->internal; + int ret = wc_RsaPublicKeyDerSize(rsa, 1); + if (ret > 0) { + int derSz = ret; + char* derBuf = (char*)XMALLOC(derSz, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (derBuf) { + ret = wc_RsaKeyToPublicDer(rsa, (byte*)derBuf, derSz); + if (ret >= 0) { + pkey->pkey_sz = ret; + pkey->pkey.ptr = derBuf; + } + else { /* failure - okay to ignore */ + XFREE(derBuf, NULL, DYNAMIC_TYPE_TMP_BUFFER); + derBuf = NULL; + } + } + } + } + + return WOLFSSL_SUCCESS; +} +#endif /* !NO_RSA */ + +#ifndef NO_DSA +int wolfSSL_EVP_PKEY_assign_DSA(EVP_PKEY* pkey, WOLFSSL_DSA* key) +{ + if (pkey == NULL || key == NULL) + return WOLFSSL_FAILURE; + + pkey->type = EVP_PKEY_DSA; + pkey->dsa = key; + pkey->ownDsa = 1; + + return WOLFSSL_SUCCESS; +} +#endif /* !NO_DSA */ + +#ifndef NO_DH +int wolfSSL_EVP_PKEY_assign_DH(EVP_PKEY* pkey, WOLFSSL_DH* key) +{ + if (pkey == NULL || key == NULL) + return WOLFSSL_FAILURE; + + pkey->type = EVP_PKEY_DH; + pkey->dh = key; + pkey->ownDh = 1; + + return WOLFSSL_SUCCESS; +} +#endif /* !NO_DH */ + +#endif /* OPENSSL_EXTRA */ + +#if defined(OPENSSL_EXTRA_X509_SMALL) +/* Subset of OPENSSL_EXTRA for PKEY operations PKEY free is needed by the + * subset of X509 API */ + +WOLFSSL_EVP_PKEY* wolfSSL_EVP_PKEY_new(void){ + return wolfSSL_EVP_PKEY_new_ex(NULL); +} + +WOLFSSL_EVP_PKEY* wolfSSL_EVP_PKEY_new_ex(void* heap) +{ + WOLFSSL_EVP_PKEY* pkey; + int ret; + WOLFSSL_ENTER("wolfSSL_EVP_PKEY_new_ex"); + pkey = (WOLFSSL_EVP_PKEY*)XMALLOC(sizeof(WOLFSSL_EVP_PKEY), heap, + DYNAMIC_TYPE_PUBLIC_KEY); + if (pkey != NULL) { + XMEMSET(pkey, 0, sizeof(WOLFSSL_EVP_PKEY)); + pkey->heap = heap; + pkey->type = WOLFSSL_EVP_PKEY_DEFAULT; +#ifndef HAVE_FIPS + ret = wc_InitRng_ex(&pkey->rng, heap, INVALID_DEVID); +#else + ret = wc_InitRng(&pkey->rng); +#endif + if (ret != 0){ + wolfSSL_EVP_PKEY_free(pkey); + WOLFSSL_MSG("memory failure"); + return NULL; + } + pkey->references = 1; + wc_InitMutex(&pkey->refMutex); + } + else { + WOLFSSL_MSG("memory failure"); + } + + return pkey; +} + +void wolfSSL_EVP_PKEY_free(WOLFSSL_EVP_PKEY* key) +{ + int doFree = 0; + WOLFSSL_ENTER("wolfSSL_EVP_PKEY_free"); + if (key != NULL) { + if (wc_LockMutex(&key->refMutex) != 0) { + WOLFSSL_MSG("Couldn't lock pkey mutex"); + } + + /* only free if all references to it are done */ + key->references--; + if (key->references == 0) { + doFree = 1; + } + wc_UnLockMutex(&key->refMutex); + + if (doFree) { + wc_FreeRng(&key->rng); + + if (key->pkey.ptr != NULL) { + XFREE(key->pkey.ptr, key->heap, DYNAMIC_TYPE_PUBLIC_KEY); + key->pkey.ptr = NULL; + } + switch(key->type) + { + #ifndef NO_RSA + case EVP_PKEY_RSA: + if (key->rsa != NULL && key->ownRsa == 1) { + wolfSSL_RSA_free(key->rsa); + key->rsa = NULL; + } + break; + #endif /* NO_RSA */ + + #if defined(HAVE_ECC) && defined(OPENSSL_EXTRA) + case EVP_PKEY_EC: + if (key->ecc != NULL && key->ownEcc == 1) { + wolfSSL_EC_KEY_free(key->ecc); + key->ecc = NULL; + } + break; + #endif /* HAVE_ECC && OPENSSL_EXTRA */ + + #ifndef NO_DSA + case EVP_PKEY_DSA: + if (key->dsa != NULL && key->ownDsa == 1) { + wolfSSL_DSA_free(key->dsa); + key->dsa = NULL; + } + break; + #endif /* NO_DSA */ + + #if !defined(NO_DH) && (defined(WOLFSSL_QT) || defined(OPENSSL_ALL)) + case EVP_PKEY_DH: + if (key->dh != NULL && key->ownDh == 1) { + wolfSSL_DH_free(key->dh); + key->dh = NULL; + } + break; + #endif /* ! NO_DH ... */ + + default: + break; + } + + if (wc_FreeMutex(&key->refMutex) != 0) { + WOLFSSL_MSG("Couldn't free pkey mutex"); + } + XFREE(key, key->heap, DYNAMIC_TYPE_PUBLIC_KEY); + } + } +} + +#endif /* OPENSSL_EXTRA_X509_SMALL */ + +#endif /* WOLFSSL_EVP_INCLUDED */ diff --git a/client/wolfssl/wolfcrypt/src/fe_448.c b/client/wolfssl/wolfcrypt/src/fe_448.c new file mode 100644 index 0000000..bc38c11 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fe_448.c @@ -0,0 +1,2458 @@ +/* fe_448.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* Based On Daniel J Bernstein's curve25519 Public Domain ref10 work. + * Small implementation based on Daniel Beer's curve25519 public domain work. + * Reworked for curve448 by Sean Parkinson. + */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#if defined(HAVE_CURVE448) || defined(HAVE_ED448) + +#include +#include + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#if defined(CURVE448_SMALL) || defined(ED448_SMALL) + +/* Initialize the field element operations. + */ +void fe448_init(void) +{ +} + +/* Normalize the field element. + * Ensure result is in range: 0..2^448-2^224-2 + * + * a [in] Field element in range 0..2^448-1. + */ +void fe448_norm(uint8_t* a) +{ + int i; + int16_t c = 0; + int16_t o = 0; + + for (i = 0; i < 56; i++) { + c += a[i]; + if ((i == 0) || (i == 28)) + c += 1; + c >>= 8; + } + + for (i = 0; i < 56; i++) { + if ((i == 0) || (i == 28)) o += c; + o += a[i]; + a[i] = (uint8_t)o; + o >>= 8; + } +} + +/* Copy one field element into another: d = a. + * + * d [in] Destination field element. + * a [in] Source field element. + */ +void fe448_copy(uint8_t* d, const uint8_t* a) +{ + int i; + for (i = 0; i < 56; i++) { + d[i] = a[i]; + } +} + +/* Conditionally swap the elements. + * Constant time implementation. + * + * a [in] First field element. + * b [in] Second field element. + * c [in] Swap when 1. Valid values: 0, 1. + */ +static void fe448_cswap(uint8_t* a, uint8_t* b, int c) +{ + int i; + uint8_t mask = -(uint8_t)c; + uint8_t t[56]; + + for (i = 0; i < 56; i++) + t[i] = (a[i] ^ b[i]) & mask; + for (i = 0; i < 56; i++) + a[i] ^= t[i]; + for (i = 0; i < 56; i++) + b[i] ^= t[i]; +} + +/* Add two field elements. r = (a + b) mod (2^448 - 2^224 - 1) + * + * r [in] Field element to hold sum. + * a [in] Field element to add. + * b [in] Field element to add. + */ +void fe448_add(uint8_t* r, const uint8_t* a, const uint8_t* b) +{ + int i; + int16_t c = 0; + int16_t o = 0; + + for (i = 0; i < 56; i++) { + c += a[i]; + c += b[i]; + r[i] = (uint8_t)c; + c >>= 8; + } + + for (i = 0; i < 56; i++) { + if ((i == 0) || (i == 28)) o += c; + o += r[i]; + r[i] = (uint8_t)o; + o >>= 8; + } +} + +/* Subtract a field element from another. r = (a - b) mod (2^448 - 2^224 - 1) + * + * r [in] Field element to hold difference. + * a [in] Field element to subtract from. + * b [in] Field element to subtract. + */ +void fe448_sub(uint8_t* r, const uint8_t* a, const uint8_t* b) +{ + int i; + int16_t c = 0; + int16_t o = 0; + + for (i = 0; i < 56; i++) { + if (i == 28) + c += 0x1fc; + else + c += 0x1fe; + c += a[i]; + c -= b[i]; + r[i] = (uint8_t)c; + c >>= 8; + } + + for (i = 0; i < 56; i++) { + if ((i == 0) || (i == 28)) o += c; + o += r[i]; + r[i] = (uint8_t)o; + o >>= 8; + } +} + +/* Mulitply a field element by 39081. r = (39081 * a) mod (2^448 - 2^224 - 1) + * + * r [in] Field element to hold result. + * a [in] Field element to multiply. + */ +void fe448_mul39081(uint8_t* r, const uint8_t* a) +{ + int i; + int32_t c = 0; + int32_t o = 0; + + for (i = 0; i < 56; i++) { + c += a[i] * (int32_t)39081; + r[i] = (uint8_t)c; + c >>= 8; + } + + for (i = 0; i < 56; i++) { + if ((i == 0) || (i == 28)) o += c; + o += r[i]; + r[i] = (uint8_t)o; + o >>= 8; + } +} + +/* Mulitply two field elements. r = (a * b) mod (2^448 - 2^224 - 1) + * + * r [in] Field element to hold result. + * a [in] Field element to multiply. + * b [in] Field element to multiply. + */ +void fe448_mul(uint8_t* r, const uint8_t* a, const uint8_t* b) +{ + int i, k; + int32_t c = 0; + int16_t o = 0, cc = 0; + uint8_t t[112]; + + for (k = 0; k < 56; k++) { + i = 0; + for (; i <= k; i++) { + c += (int32_t)a[i] * b[k - i]; + } + t[k] = (uint8_t)c; + c >>= 8; + } + for (; k < 111; k++) { + i = k - 55; + for (; i < 56; i++) { + c += (int32_t)a[i] * b[k - i]; + } + t[k] = (uint8_t)c; + c >>= 8; + } + t[k] = (uint8_t)c; + + for (i = 0; i < 28; i++) { + o += t[i]; + o += t[i + 56]; + o += t[i + 84]; + r[i] = (uint8_t)o; + o >>= 8; + } + for (i = 28; i < 56; i++) { + o += t[i]; + o += t[i + 56]; + o += t[i + 28]; + o += t[i + 56]; + r[i] = (uint8_t)o; + o >>= 8; + } + for (i = 0; i < 56; i++) { + if ((i == 0) || (i == 28)) cc += o; + cc += r[i]; + r[i] = (uint8_t)cc; + cc >>= 8; + } +} + +/* Square a field element. r = (a * a) mod (2^448 - 2^224 - 1) + * + * r [in] Field element to hold result. + * a [in] Field element to square. + */ +void fe448_sqr(uint8_t* r, const uint8_t* a) +{ + int i, k; + int32_t c = 0; + int32_t p; + int16_t o = 0, cc = 0; + uint8_t t[112]; + + for (k = 0; k < 56; k++) { + i = 0; + for (; i <= k; i++) { + if (k - i < i) + break; + p = (int32_t)a[i] * a[k - i]; + if (k - i != i) + p *= 2; + c += p; + } + t[k] = (uint8_t)c; + c >>= 8; + } + for (; k < 111; k++) { + i = k - 55; + for (; i < 56; i++) { + if (k - i < i) + break; + p = (int32_t)a[i] * a[k - i]; + if (k - i != i) + p *= 2; + c += p; + } + t[k] = (uint8_t)c; + c >>= 8; + } + t[k] = (uint8_t)c; + + for (i = 0; i < 28; i++) { + o += t[i]; + o += t[i + 56]; + o += t[i + 84]; + r[i] = (uint8_t)o; + o >>= 8; + } + for (i = 28; i < 56; i++) { + o += t[i]; + o += t[i + 56]; + o += t[i + 28]; + o += t[i + 56]; + r[i] = (uint8_t)o; + o >>= 8; + } + for (i = 0; i < 56; i++) { + if ((i == 0) || (i == 28)) cc += o; + cc += r[i]; + r[i] = (uint8_t)cc; + cc >>= 8; + } + fe448_norm(r); +} + +/* Invert the field element. (r * a) mod (2^448 - 2^224 - 1) = 1 + * Constant time implementation - using Fermat's little theorem: + * a^(p-1) mod p = 1 => a^(p-2) mod p = 1/a + * For curve448: p - 2 = 2^448 - 2^224 - 3 + * + * r [in] Field element to hold result. + * a [in] Field element to invert. + */ +void fe448_invert(uint8_t* r, const uint8_t* a) +{ + int i; + uint8_t t[56]; + + fe448_sqr(t, a); + fe448_mul(t, t, a); + for (i = 0; i < 221; i++) { + fe448_sqr(t, t); + fe448_mul(t, t, a); + } + fe448_sqr(t, t); + for (i = 0; i < 222; i++) { + fe448_sqr(t, t); + fe448_mul(t, t, a); + } + fe448_sqr(t, t); + fe448_sqr(t, t); + fe448_mul(r, t, a); +} + +/* Scalar multiply the point by a number. r = n.a + * Uses Montogmery ladder and only requires the x-ordinate. + * + * r [in] Field element to hold result. + * n [in] Scalar as an array of bytes. + * a [in] Point to multiply - x-ordinate only. + */ +int curve448(byte* r, const byte* n, const byte* a) +{ + uint8_t x1[56]; + uint8_t x2[56] = {1}; + uint8_t z2[56] = {0}; + uint8_t x3[56]; + uint8_t z3[56] = {1}; + uint8_t t0[56]; + uint8_t t1[56]; + int i; + unsigned int swap; + unsigned int b; + + fe448_copy(x1, a); + fe448_copy(x3, a); + + swap = 0; + for (i = 447; i >= 0; --i) { + b = (n[i >> 3] >> (i & 7)) & 1; + swap ^= b; + fe448_cswap(x2, x3, swap); + fe448_cswap(z2, z3, swap); + swap = b; + + /* Montgomery Ladder - double and add */ + fe448_add(t0, x2, z2); + fe448_add(t1, x3, z3); + fe448_sub(x2, x2, z2); + fe448_sub(x3, x3, z3); + fe448_mul(t1, t1, x2); + fe448_mul(z3, x3, t0); + fe448_sqr(t0, t0); + fe448_sqr(x2, x2); + fe448_add(x3, z3, t1); + fe448_sqr(x3, x3); + fe448_sub(z3, z3, t1); + fe448_sqr(z3, z3); + fe448_mul(z3, z3, x1); + fe448_sub(t1, t0, x2); + fe448_mul(x2, t0, x2); + fe448_mul39081(z2, t1); + fe448_add(z2, t0, z2); + fe448_mul(z2, z2, t1); + } + fe448_cswap(x2, x3, swap); + fe448_cswap(z2, z3, swap); + + fe448_invert(z2, z2); + fe448_mul(r, x2, z2); + fe448_norm(r); + + return 0; +} + +#ifdef HAVE_ED448 +/* Check whether field element is not 0. + * Field element must have been normalized before call. + * + * a [in] Field element. + * returns 0 when zero, and any other value otherwise. + */ +int fe448_isnonzero(const uint8_t* a) +{ + int i; + uint8_t c = 0; + for (i = 0; i < 56; i++) + c |= a[i]; + return c; +} + +/* Negates the field element. r = -a mod (2^448 - 2^224 - 1) + * Add 0x200 to each element and subtract 2 from next. + * Top element overflow handled by subtracting 2 from index 0 and 28. + * + * r [in] Field element to hold result. + * a [in] Field element. + */ +void fe448_neg(uint8_t* r, const uint8_t* a) +{ + int i; + int16_t c = 0; + int16_t o = 0; + + for (i = 0; i < 56; i++) { + if (i == 28) + c += 0x1fc; + else + c += 0x1fe; + c -= a[i]; + r[i] = (uint8_t)c; + c >>= 8; + } + + for (i = 0; i < 56; i++) { + if ((i == 0) || (i == 28)) o += c; + o += r[i]; + r[i] = (uint8_t)o; + o >>= 8; + } +} + +/* Raise field element to (p-3) / 4: 2^446 - 2^222 - 1 + * Used for calcualting y-ordinate from x-ordinate for Ed448. + * + * r [in] Field element to hold result. + * a [in] Field element to exponentiate. + */ +void fe448_pow_2_446_222_1(uint8_t* r, const uint8_t* a) +{ + int i; + uint8_t t[56]; + + fe448_sqr(t, a); + fe448_mul(t, t, a); + for (i = 0; i < 221; i++) { + fe448_sqr(t, t); + fe448_mul(t, t, a); + } + fe448_sqr(t, t); + for (i = 0; i < 221; i++) { + fe448_sqr(t, t); + fe448_mul(t, t, a); + } + fe448_sqr(t, t); + fe448_mul(r, t, a); +} + +/* Constant time, conditional move of b into a. + * a is not changed if the condition is 0. + * + * a A field element. + * b A field element. + * c If 1 then copy and if 0 then don't copy. + */ +void fe448_cmov(uint8_t* a, const uint8_t* b, int c) +{ + int i; + uint8_t m = -(uint8_t)c; + uint8_t t[56]; + + for (i = 0; i < 56; i++) + t[i] = m & (a[i] ^ b[i]); + for (i = 0; i < 56; i++) + a[i] ^= t[i]; +} + +#endif /* HAVE_ED448 */ +#elif defined(CURVED448_128BIT) + +/* Initialize the field element operations. + */ +void fe448_init(void) +{ +} + +/* Convert the field element from a byte array to an array of 56-bits. + * + * r [in] Array to encode into. + * b [in] Byte array. + */ +void fe448_from_bytes(int64_t* r, const unsigned char* b) +{ + r[ 0] = ((int64_t) (b[ 0]) << 0) + | ((int64_t) (b[ 1]) << 8) + | ((int64_t) (b[ 2]) << 16) + | ((int64_t) (b[ 3]) << 24) + | ((int64_t) (b[ 4]) << 32) + | ((int64_t) (b[ 5]) << 40) + | ((int64_t) (b[ 6]) << 48); + r[ 1] = ((int64_t) (b[ 7]) << 0) + | ((int64_t) (b[ 8]) << 8) + | ((int64_t) (b[ 9]) << 16) + | ((int64_t) (b[10]) << 24) + | ((int64_t) (b[11]) << 32) + | ((int64_t) (b[12]) << 40) + | ((int64_t) (b[13]) << 48); + r[ 2] = ((int64_t) (b[14]) << 0) + | ((int64_t) (b[15]) << 8) + | ((int64_t) (b[16]) << 16) + | ((int64_t) (b[17]) << 24) + | ((int64_t) (b[18]) << 32) + | ((int64_t) (b[19]) << 40) + | ((int64_t) (b[20]) << 48); + r[ 3] = ((int64_t) (b[21]) << 0) + | ((int64_t) (b[22]) << 8) + | ((int64_t) (b[23]) << 16) + | ((int64_t) (b[24]) << 24) + | ((int64_t) (b[25]) << 32) + | ((int64_t) (b[26]) << 40) + | ((int64_t) (b[27]) << 48); + r[ 4] = ((int64_t) (b[28]) << 0) + | ((int64_t) (b[29]) << 8) + | ((int64_t) (b[30]) << 16) + | ((int64_t) (b[31]) << 24) + | ((int64_t) (b[32]) << 32) + | ((int64_t) (b[33]) << 40) + | ((int64_t) (b[34]) << 48); + r[ 5] = ((int64_t) (b[35]) << 0) + | ((int64_t) (b[36]) << 8) + | ((int64_t) (b[37]) << 16) + | ((int64_t) (b[38]) << 24) + | ((int64_t) (b[39]) << 32) + | ((int64_t) (b[40]) << 40) + | ((int64_t) (b[41]) << 48); + r[ 6] = ((int64_t) (b[42]) << 0) + | ((int64_t) (b[43]) << 8) + | ((int64_t) (b[44]) << 16) + | ((int64_t) (b[45]) << 24) + | ((int64_t) (b[46]) << 32) + | ((int64_t) (b[47]) << 40) + | ((int64_t) (b[48]) << 48); + r[ 7] = ((int64_t) (b[49]) << 0) + | ((int64_t) (b[50]) << 8) + | ((int64_t) (b[51]) << 16) + | ((int64_t) (b[52]) << 24) + | ((int64_t) (b[53]) << 32) + | ((int64_t) (b[54]) << 40) + | ((int64_t) (b[55]) << 48); +} + +/* Convert the field element to a byte array from an array of 56-bits. + * + * b [in] Byte array. + * a [in] Array to encode into. + */ +void fe448_to_bytes(unsigned char* b, const int64_t* a) +{ + int128_t t; + /* Mod */ + int64_t in0 = a[0]; + int64_t in1 = a[1]; + int64_t in2 = a[2]; + int64_t in3 = a[3]; + int64_t in4 = a[4]; + int64_t in5 = a[5]; + int64_t in6 = a[6]; + int64_t in7 = a[7]; + int64_t o = in7 >> 56; + in7 -= o << 56; + in0 += o; + in4 += o; + o = (in0 + 1) >> 56; + o = (o + in1) >> 56; + o = (o + in2) >> 56; + o = (o + in3) >> 56; + o = (o + in4 + 1) >> 56; + o = (o + in5) >> 56; + o = (o + in6) >> 56; + o = (o + in7) >> 56; + in0 += o; + in4 += o; + in7 -= o << 56; + o = in0 >> 56; in1 += o; t = o << 56; in0 -= t; + o = in1 >> 56; in2 += o; t = o << 56; in1 -= t; + o = in2 >> 56; in3 += o; t = o << 56; in2 -= t; + o = in3 >> 56; in4 += o; t = o << 56; in3 -= t; + o = in4 >> 56; in5 += o; t = o << 56; in4 -= t; + o = in5 >> 56; in6 += o; t = o << 56; in5 -= t; + o = in6 >> 56; in7 += o; t = o << 56; in6 -= t; + o = in7 >> 56; in0 += o; + in4 += o; t = o << 56; in7 -= t; + + /* Output as bytes */ + b[ 0] = (in0 >> 0); + b[ 1] = (in0 >> 8); + b[ 2] = (in0 >> 16); + b[ 3] = (in0 >> 24); + b[ 4] = (in0 >> 32); + b[ 5] = (in0 >> 40); + b[ 6] = (in0 >> 48); + b[ 7] = (in1 >> 0); + b[ 8] = (in1 >> 8); + b[ 9] = (in1 >> 16); + b[10] = (in1 >> 24); + b[11] = (in1 >> 32); + b[12] = (in1 >> 40); + b[13] = (in1 >> 48); + b[14] = (in2 >> 0); + b[15] = (in2 >> 8); + b[16] = (in2 >> 16); + b[17] = (in2 >> 24); + b[18] = (in2 >> 32); + b[19] = (in2 >> 40); + b[20] = (in2 >> 48); + b[21] = (in3 >> 0); + b[22] = (in3 >> 8); + b[23] = (in3 >> 16); + b[24] = (in3 >> 24); + b[25] = (in3 >> 32); + b[26] = (in3 >> 40); + b[27] = (in3 >> 48); + b[28] = (in4 >> 0); + b[29] = (in4 >> 8); + b[30] = (in4 >> 16); + b[31] = (in4 >> 24); + b[32] = (in4 >> 32); + b[33] = (in4 >> 40); + b[34] = (in4 >> 48); + b[35] = (in5 >> 0); + b[36] = (in5 >> 8); + b[37] = (in5 >> 16); + b[38] = (in5 >> 24); + b[39] = (in5 >> 32); + b[40] = (in5 >> 40); + b[41] = (in5 >> 48); + b[42] = (in6 >> 0); + b[43] = (in6 >> 8); + b[44] = (in6 >> 16); + b[45] = (in6 >> 24); + b[46] = (in6 >> 32); + b[47] = (in6 >> 40); + b[48] = (in6 >> 48); + b[49] = (in7 >> 0); + b[50] = (in7 >> 8); + b[51] = (in7 >> 16); + b[52] = (in7 >> 24); + b[53] = (in7 >> 32); + b[54] = (in7 >> 40); + b[55] = (in7 >> 48); +} + +/* Set the field element to 0. + * + * a [in] Field element. + */ +void fe448_1(int64_t* a) +{ + a[0] = 1; + a[1] = 0; + a[2] = 0; + a[3] = 0; + a[4] = 0; + a[5] = 0; + a[6] = 0; + a[7] = 0; +} + +/* Set the field element to 0. + * + * a [in] Field element. + */ +void fe448_0(int64_t* a) +{ + a[0] = 0; + a[1] = 0; + a[2] = 0; + a[3] = 0; + a[4] = 0; + a[5] = 0; + a[6] = 0; + a[7] = 0; +} + +/* Copy one field element into another: d = a. + * + * d [in] Destination field element. + * a [in] Source field element. + */ +void fe448_copy(int64_t* d, const int64_t* a) +{ + d[0] = a[0]; + d[1] = a[1]; + d[2] = a[2]; + d[3] = a[3]; + d[4] = a[4]; + d[5] = a[5]; + d[6] = a[6]; + d[7] = a[7]; +} + +/* Conditionally swap the elements. + * Constant time implementation. + * + * a [in] First field element. + * b [in] Second field element. + * c [in] Swap when 1. Valid values: 0, 1. + */ +static void fe448_cswap(int64_t* a, int64_t* b, int c) +{ + int64_t mask = -(int64_t)c; + int64_t t0 = (a[0] ^ b[0]) & mask; + int64_t t1 = (a[1] ^ b[1]) & mask; + int64_t t2 = (a[2] ^ b[2]) & mask; + int64_t t3 = (a[3] ^ b[3]) & mask; + int64_t t4 = (a[4] ^ b[4]) & mask; + int64_t t5 = (a[5] ^ b[5]) & mask; + int64_t t6 = (a[6] ^ b[6]) & mask; + int64_t t7 = (a[7] ^ b[7]) & mask; + a[0] ^= t0; + a[1] ^= t1; + a[2] ^= t2; + a[3] ^= t3; + a[4] ^= t4; + a[5] ^= t5; + a[6] ^= t6; + a[7] ^= t7; + b[0] ^= t0; + b[1] ^= t1; + b[2] ^= t2; + b[3] ^= t3; + b[4] ^= t4; + b[5] ^= t5; + b[6] ^= t6; + b[7] ^= t7; +} + +/* Add two field elements. r = (a + b) mod (2^448 - 2^224 - 1) + * + * r [in] Field element to hold sum. + * a [in] Field element to add. + * b [in] Field element to add. + */ +void fe448_add(int64_t* r, const int64_t* a, const int64_t* b) +{ + r[0] = a[0] + b[0]; + r[1] = a[1] + b[1]; + r[2] = a[2] + b[2]; + r[3] = a[3] + b[3]; + r[4] = a[4] + b[4]; + r[5] = a[5] + b[5]; + r[6] = a[6] + b[6]; + r[7] = a[7] + b[7]; +} + +/* Subtract a field element from another. r = (a - b) mod (2^448 - 2^224 - 1) + * + * r [in] Field element to hold difference. + * a [in] Field element to subtract from. + * b [in] Field element to subtract. + */ +void fe448_sub(int64_t* r, const int64_t* a, const int64_t* b) +{ + r[0] = a[0] - b[0]; + r[1] = a[1] - b[1]; + r[2] = a[2] - b[2]; + r[3] = a[3] - b[3]; + r[4] = a[4] - b[4]; + r[5] = a[5] - b[5]; + r[6] = a[6] - b[6]; + r[7] = a[7] - b[7]; +} + +/* Mulitply a field element by 39081. r = (39081 * a) mod (2^448 - 2^224 - 1) + * + * r [in] Field element to hold result. + * a [in] Field element to multiply. + */ +void fe448_mul39081(int64_t* r, const int64_t* a) +{ + int128_t t; + int64_t o; + int128_t t0 = a[0] * (int128_t)39081; + int128_t t1 = a[1] * (int128_t)39081; + int128_t t2 = a[2] * (int128_t)39081; + int128_t t3 = a[3] * (int128_t)39081; + int128_t t4 = a[4] * (int128_t)39081; + int128_t t5 = a[5] * (int128_t)39081; + int128_t t6 = a[6] * (int128_t)39081; + int128_t t7 = a[7] * (int128_t)39081; + o = t0 >> 56; t1 += o; t = (int128_t)o << 56; t0 -= t; + o = t1 >> 56; t2 += o; t = (int128_t)o << 56; t1 -= t; + o = t2 >> 56; t3 += o; t = (int128_t)o << 56; t2 -= t; + o = t3 >> 56; t4 += o; t = (int128_t)o << 56; t3 -= t; + o = t4 >> 56; t5 += o; t = (int128_t)o << 56; t4 -= t; + o = t5 >> 56; t6 += o; t = (int128_t)o << 56; t5 -= t; + o = t6 >> 56; t7 += o; t = (int128_t)o << 56; t6 -= t; + o = t7 >> 56; t0 += o; + t4 += o; t = (int128_t)o << 56; t7 -= t; + + /* Store */ + r[0] = t0; + r[1] = t1; + r[2] = t2; + r[3] = t3; + r[4] = t4; + r[5] = t5; + r[6] = t6; + r[7] = t7; +} + +/* Mulitply two field elements. r = (a * b) mod (2^448 - 2^224 - 1) + * + * r [in] Field element to hold result. + * a [in] Field element to multiply. + * b [in] Field element to multiply. + */ +void fe448_mul(int64_t* r, const int64_t* a, const int64_t* b) +{ + int128_t t; + int64_t o; + int128_t t0 = (int128_t)a[ 0] * b[ 0]; + int128_t t1 = (int128_t)a[ 0] * b[ 1]; + int128_t t101 = (int128_t)a[ 1] * b[ 0]; + int128_t t2 = (int128_t)a[ 0] * b[ 2]; + int128_t t102 = (int128_t)a[ 1] * b[ 1]; + int128_t t202 = (int128_t)a[ 2] * b[ 0]; + int128_t t3 = (int128_t)a[ 0] * b[ 3]; + int128_t t103 = (int128_t)a[ 1] * b[ 2]; + int128_t t203 = (int128_t)a[ 2] * b[ 1]; + int128_t t303 = (int128_t)a[ 3] * b[ 0]; + int128_t t4 = (int128_t)a[ 0] * b[ 4]; + int128_t t104 = (int128_t)a[ 1] * b[ 3]; + int128_t t204 = (int128_t)a[ 2] * b[ 2]; + int128_t t304 = (int128_t)a[ 3] * b[ 1]; + int128_t t404 = (int128_t)a[ 4] * b[ 0]; + int128_t t5 = (int128_t)a[ 0] * b[ 5]; + int128_t t105 = (int128_t)a[ 1] * b[ 4]; + int128_t t205 = (int128_t)a[ 2] * b[ 3]; + int128_t t305 = (int128_t)a[ 3] * b[ 2]; + int128_t t405 = (int128_t)a[ 4] * b[ 1]; + int128_t t505 = (int128_t)a[ 5] * b[ 0]; + int128_t t6 = (int128_t)a[ 0] * b[ 6]; + int128_t t106 = (int128_t)a[ 1] * b[ 5]; + int128_t t206 = (int128_t)a[ 2] * b[ 4]; + int128_t t306 = (int128_t)a[ 3] * b[ 3]; + int128_t t406 = (int128_t)a[ 4] * b[ 2]; + int128_t t506 = (int128_t)a[ 5] * b[ 1]; + int128_t t606 = (int128_t)a[ 6] * b[ 0]; + int128_t t7 = (int128_t)a[ 0] * b[ 7]; + int128_t t107 = (int128_t)a[ 1] * b[ 6]; + int128_t t207 = (int128_t)a[ 2] * b[ 5]; + int128_t t307 = (int128_t)a[ 3] * b[ 4]; + int128_t t407 = (int128_t)a[ 4] * b[ 3]; + int128_t t507 = (int128_t)a[ 5] * b[ 2]; + int128_t t607 = (int128_t)a[ 6] * b[ 1]; + int128_t t707 = (int128_t)a[ 7] * b[ 0]; + int128_t t8 = (int128_t)a[ 1] * b[ 7]; + int128_t t108 = (int128_t)a[ 2] * b[ 6]; + int128_t t208 = (int128_t)a[ 3] * b[ 5]; + int128_t t308 = (int128_t)a[ 4] * b[ 4]; + int128_t t408 = (int128_t)a[ 5] * b[ 3]; + int128_t t508 = (int128_t)a[ 6] * b[ 2]; + int128_t t608 = (int128_t)a[ 7] * b[ 1]; + int128_t t9 = (int128_t)a[ 2] * b[ 7]; + int128_t t109 = (int128_t)a[ 3] * b[ 6]; + int128_t t209 = (int128_t)a[ 4] * b[ 5]; + int128_t t309 = (int128_t)a[ 5] * b[ 4]; + int128_t t409 = (int128_t)a[ 6] * b[ 3]; + int128_t t509 = (int128_t)a[ 7] * b[ 2]; + int128_t t10 = (int128_t)a[ 3] * b[ 7]; + int128_t t110 = (int128_t)a[ 4] * b[ 6]; + int128_t t210 = (int128_t)a[ 5] * b[ 5]; + int128_t t310 = (int128_t)a[ 6] * b[ 4]; + int128_t t410 = (int128_t)a[ 7] * b[ 3]; + int128_t t11 = (int128_t)a[ 4] * b[ 7]; + int128_t t111 = (int128_t)a[ 5] * b[ 6]; + int128_t t211 = (int128_t)a[ 6] * b[ 5]; + int128_t t311 = (int128_t)a[ 7] * b[ 4]; + int128_t t12 = (int128_t)a[ 5] * b[ 7]; + int128_t t112 = (int128_t)a[ 6] * b[ 6]; + int128_t t212 = (int128_t)a[ 7] * b[ 5]; + int128_t t13 = (int128_t)a[ 6] * b[ 7]; + int128_t t113 = (int128_t)a[ 7] * b[ 6]; + int128_t t14 = (int128_t)a[ 7] * b[ 7]; + t1 += t101; + t2 += t102; t2 += t202; + t3 += t103; t3 += t203; t3 += t303; + t4 += t104; t4 += t204; t4 += t304; t4 += t404; + t5 += t105; t5 += t205; t5 += t305; t5 += t405; t5 += t505; + t6 += t106; t6 += t206; t6 += t306; t6 += t406; t6 += t506; + t6 += t606; + t7 += t107; t7 += t207; t7 += t307; t7 += t407; t7 += t507; + t7 += t607; + t7 += t707; + t8 += t108; t8 += t208; t8 += t308; t8 += t408; t8 += t508; + t8 += t608; + t9 += t109; t9 += t209; t9 += t309; t9 += t409; t9 += t509; + t10 += t110; t10 += t210; t10 += t310; t10 += t410; + t11 += t111; t11 += t211; t11 += t311; + t12 += t112; t12 += t212; + t13 += t113; + + /* Reduce */ + t0 += t8 + t12; + t1 += t9 + t13; + t2 += t10 + t14; + t3 += t11; + t4 += t12 + t8 + t12; + t5 += t13 + t9 + t13; + t6 += t14 + t10 + t14; + t7 += t11; + o = t7 >> 56; t0 += o; + t4 += o; t = (int128_t)o << 56; t7 -= t; + o = t0 >> 56; t1 += o; t = (int128_t)o << 56; t0 -= t; + o = t1 >> 56; t2 += o; t = (int128_t)o << 56; t1 -= t; + o = t2 >> 56; t3 += o; t = (int128_t)o << 56; t2 -= t; + o = t3 >> 56; t4 += o; t = (int128_t)o << 56; t3 -= t; + o = t4 >> 56; t5 += o; t = (int128_t)o << 56; t4 -= t; + o = t5 >> 56; t6 += o; t = (int128_t)o << 56; t5 -= t; + o = t6 >> 56; t7 += o; t = (int128_t)o << 56; t6 -= t; + o = t7 >> 56; t0 += o; + t4 += o; t = (int128_t)o << 56; t7 -= t; + + /* Store */ + r[0] = t0; + r[1] = t1; + r[2] = t2; + r[3] = t3; + r[4] = t4; + r[5] = t5; + r[6] = t6; + r[7] = t7; +} + +/* Square a field element. r = (a * a) mod (2^448 - 2^224 - 1) + * + * r [in] Field element to hold result. + * a [in] Field element to square. + */ +void fe448_sqr(int64_t* r, const int64_t* a) +{ + int128_t t; + int64_t o; + int128_t t0 = (int128_t)a[ 0] * a[ 0]; + int128_t t1 = 2 * (int128_t)a[ 0] * a[ 1]; + int128_t t2 = 2 * (int128_t)a[ 0] * a[ 2]; + int128_t t102 = (int128_t)a[ 1] * a[ 1]; + int128_t t3 = 2 * (int128_t)a[ 0] * a[ 3]; + int128_t t103 = 2 * (int128_t)a[ 1] * a[ 2]; + int128_t t4 = 2 * (int128_t)a[ 0] * a[ 4]; + int128_t t104 = 2 * (int128_t)a[ 1] * a[ 3]; + int128_t t204 = (int128_t)a[ 2] * a[ 2]; + int128_t t5 = 2 * (int128_t)a[ 0] * a[ 5]; + int128_t t105 = 2 * (int128_t)a[ 1] * a[ 4]; + int128_t t205 = 2 * (int128_t)a[ 2] * a[ 3]; + int128_t t6 = 2 * (int128_t)a[ 0] * a[ 6]; + int128_t t106 = 2 * (int128_t)a[ 1] * a[ 5]; + int128_t t206 = 2 * (int128_t)a[ 2] * a[ 4]; + int128_t t306 = (int128_t)a[ 3] * a[ 3]; + int128_t t7 = 2 * (int128_t)a[ 0] * a[ 7]; + int128_t t107 = 2 * (int128_t)a[ 1] * a[ 6]; + int128_t t207 = 2 * (int128_t)a[ 2] * a[ 5]; + int128_t t307 = 2 * (int128_t)a[ 3] * a[ 4]; + int128_t t8 = 2 * (int128_t)a[ 1] * a[ 7]; + int128_t t108 = 2 * (int128_t)a[ 2] * a[ 6]; + int128_t t208 = 2 * (int128_t)a[ 3] * a[ 5]; + int128_t t308 = (int128_t)a[ 4] * a[ 4]; + int128_t t9 = 2 * (int128_t)a[ 2] * a[ 7]; + int128_t t109 = 2 * (int128_t)a[ 3] * a[ 6]; + int128_t t209 = 2 * (int128_t)a[ 4] * a[ 5]; + int128_t t10 = 2 * (int128_t)a[ 3] * a[ 7]; + int128_t t110 = 2 * (int128_t)a[ 4] * a[ 6]; + int128_t t210 = (int128_t)a[ 5] * a[ 5]; + int128_t t11 = 2 * (int128_t)a[ 4] * a[ 7]; + int128_t t111 = 2 * (int128_t)a[ 5] * a[ 6]; + int128_t t12 = 2 * (int128_t)a[ 5] * a[ 7]; + int128_t t112 = (int128_t)a[ 6] * a[ 6]; + int128_t t13 = 2 * (int128_t)a[ 6] * a[ 7]; + int128_t t14 = (int128_t)a[ 7] * a[ 7]; + t2 += t102; + t3 += t103; + t4 += t104; t4 += t204; + t5 += t105; t5 += t205; + t6 += t106; t6 += t206; t6 += t306; + t7 += t107; t7 += t207; t7 += t307; + t8 += t108; t8 += t208; t8 += t308; + t9 += t109; t9 += t209; + t10 += t110; t10 += t210; + t11 += t111; + t12 += t112; + + /* Reduce */ + t0 += t8 + t12; + t1 += t9 + t13; + t2 += t10 + t14; + t3 += t11; + t4 += t12 + t8 + t12; + t5 += t13 + t9 + t13; + t6 += t14 + t10 + t14; + t7 += t11; + o = t7 >> 56; t0 += o; + t4 += o; t = (int128_t)o << 56; t7 -= t; + o = t0 >> 56; t1 += o; t = (int128_t)o << 56; t0 -= t; + o = t1 >> 56; t2 += o; t = (int128_t)o << 56; t1 -= t; + o = t2 >> 56; t3 += o; t = (int128_t)o << 56; t2 -= t; + o = t3 >> 56; t4 += o; t = (int128_t)o << 56; t3 -= t; + o = t4 >> 56; t5 += o; t = (int128_t)o << 56; t4 -= t; + o = t5 >> 56; t6 += o; t = (int128_t)o << 56; t5 -= t; + o = t6 >> 56; t7 += o; t = (int128_t)o << 56; t6 -= t; + o = t7 >> 56; t0 += o; + t4 += o; t = (int128_t)o << 56; t7 -= t; + + /* Store */ + r[0] = t0; + r[1] = t1; + r[2] = t2; + r[3] = t3; + r[4] = t4; + r[5] = t5; + r[6] = t6; + r[7] = t7; +} + +/* Invert the field element. (r * a) mod (2^448 - 2^224 - 1) = 1 + * Constant time implementation - using Fermat's little theorem: + * a^(p-1) mod p = 1 => a^(p-2) mod p = 1/a + * For curve448: p - 2 = 2^448 - 2^224 - 3 + * + * r [in] Field element to hold result. + * a [in] Field element to invert. + */ +void fe448_invert(int64_t* r, const int64_t* a) +{ + int64_t t1[8]; + int64_t t2[8]; + int64_t t3[8]; + int64_t t4[8]; + int i; + + fe448_sqr(t1, a); + /* t1 = 2 */ + fe448_mul(t1, t1, a); + /* t1 = 3 */ + fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2); + /* t2 = c */ + fe448_mul(t3, t2, a); + /* t3 = d */ + fe448_mul(t1, t2, t1); + /* t1 = f */ + fe448_sqr(t2, t1); + /* t2 = 1e */ + fe448_mul(t4, t2, a); + /* t4 = 1f */ + fe448_sqr(t2, t4); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2); + /* t2 = 3e0 */ + fe448_mul(t1, t2, t4); + /* t1 = 3ff */ + fe448_sqr(t2, t1); for (i = 1; i < 10; ++i) fe448_sqr(t2, t2); + /* t2 = ffc00 */ + fe448_mul(t1, t2, t1); + /* t1 = fffff */ + fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2); + /* t2 = 1ffffe0 */ + fe448_mul(t1, t2, t4); + /* t1 = 1ffffff */ + fe448_sqr(t2, t1); for (i = 1; i < 25; ++i) fe448_sqr(t2, t2); + /* t2 = 3fffffe000000 */ + fe448_mul(t1, t2, t1); + /* t1 = 3ffffffffffff */ + fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2); + /* t2 = 7fffffffffffe0 */ + fe448_mul(t1, t2, t4); + /* t1 = 7fffffffffffff */ + fe448_sqr(t2, t1); for (i = 1; i < 55; ++i) fe448_sqr(t2, t2); + /* t2 = 3fffffffffffff80000000000000 */ + fe448_mul(t1, t2, t1); + /* t1 = 3fffffffffffffffffffffffffff */ + fe448_sqr(t2, t1); for (i = 1; i < 110; ++i) fe448_sqr(t2, t2); + /* t2 = fffffffffffffffffffffffffffc000000000000000000000000000 */ + fe448_mul(t1, t2, t1); + /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2); + /* t2 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff0 */ + fe448_mul(t3, t3, t2); + /* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffd */ + fe448_mul(t1, t3, a); + /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */ + fe448_sqr(t1, t1); for (i = 1; i < 224; ++i) fe448_sqr(t1, t1); + /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000000000000000000000000000000000000000000000000000 */ + fe448_mul(r, t3, t1); + /* r = fffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffffffffffffffffffffffffffffffffffffffffffffffffffffd */ +} + +/* Scalar multiply the point by a number. r = n.a + * Uses Montogmery ladder and only requires the x-ordinate. + * + * r [in] Field element to hold result. + * n [in] Scalar as an array of bytes. + * a [in] Point to multiply - x-ordinate only. + */ +int curve448(byte* r, const byte* n, const byte* a) +{ + int64_t x1[8]; + int64_t x2[8]; + int64_t z2[8]; + int64_t x3[8]; + int64_t z3[8]; + int64_t t0[8]; + int64_t t1[8]; + int i; + unsigned int swap; + unsigned int b; + + fe448_from_bytes(x1, a); + fe448_1(x2); + fe448_0(z2); + fe448_copy(x3, x1); + fe448_1(z3); + + swap = 0; + for (i = 447; i >= 0; --i) { + b = (n[i >> 3] >> (i & 7)) & 1; + swap ^= b; + fe448_cswap(x2, x3, swap); + fe448_cswap(z2, z3, swap); + swap = b; + + /* Montgomery Ladder - double and add */ + fe448_add(t0, x2, z2); + fe448_reduce(t0); + fe448_add(t1, x3, z3); + fe448_reduce(t1); + fe448_sub(x2, x2, z2); + fe448_sub(x3, x3, z3); + fe448_mul(t1, t1, x2); + fe448_mul(z3, x3, t0); + fe448_sqr(t0, t0); + fe448_sqr(x2, x2); + fe448_add(x3, z3, t1); + fe448_reduce(x3); + fe448_sqr(x3, x3); + fe448_sub(z3, z3, t1); + fe448_sqr(z3, z3); + fe448_mul(z3, z3, x1); + fe448_sub(t1, t0, x2); + fe448_mul(x2, t0, x2); + fe448_mul39081(z2, t1); + fe448_add(z2, t0, z2); + fe448_mul(z2, z2, t1); + } + /* Last two bits are 0 - no final swap check required. */ + + fe448_invert(z2, z2); + fe448_mul(x2, x2, z2); + fe448_to_bytes(r, x2); + + return 0; +} + +#ifdef HAVE_ED448 +/* Check whether field element is not 0. + * Must convert to a normalized form before checking. + * + * a [in] Field element. + * returns 0 when zero, and any other value otherwise. + */ +int fe448_isnonzero(const int64_t* a) +{ + uint8_t b[56]; + int i; + uint8_t c = 0; + fe448_to_bytes(b, a); + for (i = 0; i < 56; i++) + c |= b[i]; + return c; +} + +/* Check whether field element is negative. + * Must convert to a normalized form before checking. + * + * a [in] Field element. + * returns 1 when negative, and 0 otherwise. + */ +int fe448_isnegative(const int64_t* a) +{ + uint8_t b[56]; + fe448_to_bytes(b, a); + return b[0] & 1; +} + +/* Negates the field element. r = -a + * + * r [in] Field element to hold result. + * a [in] Field element. + */ +void fe448_neg(int64_t* r, const int64_t* a) +{ + r[0] = -a[0]; + r[1] = -a[1]; + r[2] = -a[2]; + r[3] = -a[3]; + r[4] = -a[4]; + r[5] = -a[5]; + r[6] = -a[6]; + r[7] = -a[7]; +} + +/* Raise field element to (p-3) / 4: 2^446 - 2^222 - 1 + * Used for calcualting y-ordinate from x-ordinate for Ed448. + * + * r [in] Field element to hold result. + * a [in] Field element to exponentiate. + */ +void fe448_pow_2_446_222_1(int64_t* r, const int64_t* a) +{ + int64_t t1[8]; + int64_t t2[8]; + int64_t t3[8]; + int64_t t4[8]; + int64_t t5[8]; + int i; + + fe448_sqr(t3, a); + /* t3 = 2 */ + fe448_mul(t1, t3, a); + /* t1 = 3 */ + fe448_sqr(t5, t1); + /* t5 = 6 */ + fe448_mul(t5, t5, a); + /* t5 = 7 */ + fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2); + /* t2 = c */ + fe448_mul(t3, t2, t3); + /* t3 = e */ + fe448_mul(t1, t2, t1); + /* t1 = f */ + fe448_sqr(t2, t1); for (i = 1; i < 3; ++i) fe448_sqr(t2, t2); + /* t2 = 78 */ + fe448_mul(t5, t2, t5); + /* t5 = 7f */ + fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2); + /* t2 = f0 */ + fe448_mul(t1, t2, t1); + /* t1 = ff */ + fe448_mul(t3, t3, t2); + /* t3 = fe */ + fe448_sqr(t2, t1); for (i = 1; i < 7; ++i) fe448_sqr(t2, t2); + /* t2 = 7f80 */ + fe448_mul(t5, t2, t5); + /* t5 = 7fff */ + fe448_sqr(t2, t1); for (i = 1; i < 8; ++i) fe448_sqr(t2, t2); + /* t2 = ff00 */ + fe448_mul(t1, t2, t1); + /* t1 = ffff */ + fe448_mul(t3, t3, t2); + /* t3 = fffe */ + fe448_sqr(t2, t5); for (i = 1; i < 15; ++i) fe448_sqr(t2, t2); + /* t2 = 3fff8000 */ + fe448_mul(t5, t2, t5); + /* t5 = 3fffffff */ + fe448_sqr(t2, t1); for (i = 1; i < 16; ++i) fe448_sqr(t2, t2); + /* t2 = ffff0000 */ + fe448_mul(t1, t2, t1); + /* t1 = ffffffff */ + fe448_mul(t3, t3, t2); + /* t3 = fffffffe */ + fe448_sqr(t2, t1); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2); + /* t2 = ffffffff00000000 */ + fe448_mul(t2, t2, t1); + /* t2 = ffffffffffffffff */ + fe448_sqr(t1, t2); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1); + /* t1 = ffffffffffffffff0000000000000000 */ + fe448_mul(t1, t1, t2); + /* t1 = ffffffffffffffffffffffffffffffff */ + fe448_sqr(t1, t1); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1); + /* t1 = ffffffffffffffffffffffffffffffff0000000000000000 */ + fe448_mul(t4, t1, t2); + /* t4 = ffffffffffffffffffffffffffffffffffffffffffffffff */ + fe448_sqr(t2, t4); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2); + /* t2 = ffffffffffffffffffffffffffffffffffffffffffffffff00000000 */ + fe448_mul(t3, t3, t2); + /* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */ + fe448_sqr(t1, t3); for (i = 1; i < 192; ++i) fe448_sqr(t1, t1); + /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe000000000000000000000000000000000000000000000000 */ + fe448_mul(t1, t1, t4); + /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffffffffffffffffffffffffffffffffffffffffffff */ + fe448_sqr(t1, t1); for (i = 1; i < 30; ++i) fe448_sqr(t1, t1); + /* t1 = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffc0000000 */ + fe448_mul(r, t5, t1); + /* r = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffffffffff */ +} + +/* Constant time, conditional move of b into a. + * a is not changed if the condition is 0. + * + * a A field element. + * b A field element. + * c If 1 then copy and if 0 then don't copy. + */ +void fe448_cmov(int64_t* a, const int64_t* b, int c) +{ + int64_t m = -(int64_t)c; + int64_t t0 = m & (a[0] ^ b[0]); + int64_t t1 = m & (a[1] ^ b[1]); + int64_t t2 = m & (a[2] ^ b[2]); + int64_t t3 = m & (a[3] ^ b[3]); + int64_t t4 = m & (a[4] ^ b[4]); + int64_t t5 = m & (a[5] ^ b[5]); + int64_t t6 = m & (a[6] ^ b[6]); + int64_t t7 = m & (a[7] ^ b[7]); + + a[0] ^= t0; + a[1] ^= t1; + a[2] ^= t2; + a[3] ^= t3; + a[4] ^= t4; + a[5] ^= t5; + a[6] ^= t6; + a[7] ^= t7; +} + +#endif /* HAVE_ED448 */ +#else + +/* Initialize the field element operations. + */ +void fe448_init(void) +{ +} + +/* Convert the field element from a byte array to an array of 28-bits. + * + * r [in] Array to encode into. + * b [in] Byte array. + */ +void fe448_from_bytes(int32_t* r, const unsigned char* b) +{ + r[ 0] = (((int32_t)((b[ 0] ) >> 0)) << 0) + | (((int32_t)((b[ 1] ) >> 0)) << 8) + | (((int32_t)((b[ 2] ) >> 0)) << 16) + | ((((int32_t)((b[ 3] & 0xf )) >> 0)) << 24); + r[ 1] = (((int32_t)((b[ 3] ) >> 4)) << 0) + | (((int32_t)((b[ 4] ) >> 0)) << 4) + | (((int32_t)((b[ 5] ) >> 0)) << 12) + | (((int32_t)((b[ 6] ) >> 0)) << 20); + r[ 2] = (((int32_t)((b[ 7] ) >> 0)) << 0) + | (((int32_t)((b[ 8] ) >> 0)) << 8) + | (((int32_t)((b[ 9] ) >> 0)) << 16) + | ((((int32_t)((b[10] & 0xf )) >> 0)) << 24); + r[ 3] = (((int32_t)((b[10] ) >> 4)) << 0) + | (((int32_t)((b[11] ) >> 0)) << 4) + | (((int32_t)((b[12] ) >> 0)) << 12) + | (((int32_t)((b[13] ) >> 0)) << 20); + r[ 4] = (((int32_t)((b[14] ) >> 0)) << 0) + | (((int32_t)((b[15] ) >> 0)) << 8) + | (((int32_t)((b[16] ) >> 0)) << 16) + | ((((int32_t)((b[17] & 0xf )) >> 0)) << 24); + r[ 5] = (((int32_t)((b[17] ) >> 4)) << 0) + | (((int32_t)((b[18] ) >> 0)) << 4) + | (((int32_t)((b[19] ) >> 0)) << 12) + | (((int32_t)((b[20] ) >> 0)) << 20); + r[ 6] = (((int32_t)((b[21] ) >> 0)) << 0) + | (((int32_t)((b[22] ) >> 0)) << 8) + | (((int32_t)((b[23] ) >> 0)) << 16) + | ((((int32_t)((b[24] & 0xf )) >> 0)) << 24); + r[ 7] = (((int32_t)((b[24] ) >> 4)) << 0) + | (((int32_t)((b[25] ) >> 0)) << 4) + | (((int32_t)((b[26] ) >> 0)) << 12) + | (((int32_t)((b[27] ) >> 0)) << 20); + r[ 8] = (((int32_t)((b[28] ) >> 0)) << 0) + | (((int32_t)((b[29] ) >> 0)) << 8) + | (((int32_t)((b[30] ) >> 0)) << 16) + | ((((int32_t)((b[31] & 0xf )) >> 0)) << 24); + r[ 9] = (((int32_t)((b[31] ) >> 4)) << 0) + | (((int32_t)((b[32] ) >> 0)) << 4) + | (((int32_t)((b[33] ) >> 0)) << 12) + | (((int32_t)((b[34] ) >> 0)) << 20); + r[10] = (((int32_t)((b[35] ) >> 0)) << 0) + | (((int32_t)((b[36] ) >> 0)) << 8) + | (((int32_t)((b[37] ) >> 0)) << 16) + | ((((int32_t)((b[38] & 0xf )) >> 0)) << 24); + r[11] = (((int32_t)((b[38] ) >> 4)) << 0) + | (((int32_t)((b[39] ) >> 0)) << 4) + | (((int32_t)((b[40] ) >> 0)) << 12) + | (((int32_t)((b[41] ) >> 0)) << 20); + r[12] = (((int32_t)((b[42] ) >> 0)) << 0) + | (((int32_t)((b[43] ) >> 0)) << 8) + | (((int32_t)((b[44] ) >> 0)) << 16) + | ((((int32_t)((b[45] & 0xf )) >> 0)) << 24); + r[13] = (((int32_t)((b[45] ) >> 4)) << 0) + | (((int32_t)((b[46] ) >> 0)) << 4) + | (((int32_t)((b[47] ) >> 0)) << 12) + | (((int32_t)((b[48] ) >> 0)) << 20); + r[14] = (((int32_t)((b[49] ) >> 0)) << 0) + | (((int32_t)((b[50] ) >> 0)) << 8) + | (((int32_t)((b[51] ) >> 0)) << 16) + | ((((int32_t)((b[52] & 0xf )) >> 0)) << 24); + r[15] = (((int32_t)((b[52] ) >> 4)) << 0) + | (((int32_t)((b[53] ) >> 0)) << 4) + | (((int32_t)((b[54] ) >> 0)) << 12) + | (((int32_t)((b[55] ) >> 0)) << 20); +} + +/* Convert the field element to a byte array from an array of 28-bits. + * + * b [in] Byte array. + * a [in] Array to encode into. + */ +void fe448_to_bytes(unsigned char* b, const int32_t* a) +{ + int64_t t; + /* Mod */ + int32_t in0 = a[0]; + int32_t in1 = a[1]; + int32_t in2 = a[2]; + int32_t in3 = a[3]; + int32_t in4 = a[4]; + int32_t in5 = a[5]; + int32_t in6 = a[6]; + int32_t in7 = a[7]; + int32_t in8 = a[8]; + int32_t in9 = a[9]; + int32_t in10 = a[10]; + int32_t in11 = a[11]; + int32_t in12 = a[12]; + int32_t in13 = a[13]; + int32_t in14 = a[14]; + int32_t in15 = a[15]; + int32_t o = in15 >> 28; + in15 -= o << 28; + in0 += o; + in8 += o; + o = (in0 + 1) >> 28; + o = (o + in1) >> 28; + o = (o + in2) >> 28; + o = (o + in3) >> 28; + o = (o + in4) >> 28; + o = (o + in5) >> 28; + o = (o + in6) >> 28; + o = (o + in7) >> 28; + o = (o + in8 + 1) >> 28; + o = (o + in9) >> 28; + o = (o + in10) >> 28; + o = (o + in11) >> 28; + o = (o + in12) >> 28; + o = (o + in13) >> 28; + o = (o + in14) >> 28; + o = (o + in15) >> 28; + in0 += o; + in8 += o; + in15 -= o << 28; + o = in0 >> 28; in1 += o; t = o << 28; in0 -= t; + o = in1 >> 28; in2 += o; t = o << 28; in1 -= t; + o = in2 >> 28; in3 += o; t = o << 28; in2 -= t; + o = in3 >> 28; in4 += o; t = o << 28; in3 -= t; + o = in4 >> 28; in5 += o; t = o << 28; in4 -= t; + o = in5 >> 28; in6 += o; t = o << 28; in5 -= t; + o = in6 >> 28; in7 += o; t = o << 28; in6 -= t; + o = in7 >> 28; in8 += o; t = o << 28; in7 -= t; + o = in8 >> 28; in9 += o; t = o << 28; in8 -= t; + o = in9 >> 28; in10 += o; t = o << 28; in9 -= t; + o = in10 >> 28; in11 += o; t = o << 28; in10 -= t; + o = in11 >> 28; in12 += o; t = o << 28; in11 -= t; + o = in12 >> 28; in13 += o; t = o << 28; in12 -= t; + o = in13 >> 28; in14 += o; t = o << 28; in13 -= t; + o = in14 >> 28; in15 += o; t = o << 28; in14 -= t; + o = in15 >> 28; in0 += o; + in8 += o; t = o << 28; in15 -= t; + + /* Output as bytes */ + b[ 0] = (in0 >> 0); + b[ 1] = (in0 >> 8); + b[ 2] = (in0 >> 16); + b[ 3] = (in0 >> 24) + ((in1 >> 0) << 4); + b[ 4] = (in1 >> 4); + b[ 5] = (in1 >> 12); + b[ 6] = (in1 >> 20); + b[ 7] = (in2 >> 0); + b[ 8] = (in2 >> 8); + b[ 9] = (in2 >> 16); + b[10] = (in2 >> 24) + ((in3 >> 0) << 4); + b[11] = (in3 >> 4); + b[12] = (in3 >> 12); + b[13] = (in3 >> 20); + b[14] = (in4 >> 0); + b[15] = (in4 >> 8); + b[16] = (in4 >> 16); + b[17] = (in4 >> 24) + ((in5 >> 0) << 4); + b[18] = (in5 >> 4); + b[19] = (in5 >> 12); + b[20] = (in5 >> 20); + b[21] = (in6 >> 0); + b[22] = (in6 >> 8); + b[23] = (in6 >> 16); + b[24] = (in6 >> 24) + ((in7 >> 0) << 4); + b[25] = (in7 >> 4); + b[26] = (in7 >> 12); + b[27] = (in7 >> 20); + b[28] = (in8 >> 0); + b[29] = (in8 >> 8); + b[30] = (in8 >> 16); + b[31] = (in8 >> 24) + ((in9 >> 0) << 4); + b[32] = (in9 >> 4); + b[33] = (in9 >> 12); + b[34] = (in9 >> 20); + b[35] = (in10 >> 0); + b[36] = (in10 >> 8); + b[37] = (in10 >> 16); + b[38] = (in10 >> 24) + ((in11 >> 0) << 4); + b[39] = (in11 >> 4); + b[40] = (in11 >> 12); + b[41] = (in11 >> 20); + b[42] = (in12 >> 0); + b[43] = (in12 >> 8); + b[44] = (in12 >> 16); + b[45] = (in12 >> 24) + ((in13 >> 0) << 4); + b[46] = (in13 >> 4); + b[47] = (in13 >> 12); + b[48] = (in13 >> 20); + b[49] = (in14 >> 0); + b[50] = (in14 >> 8); + b[51] = (in14 >> 16); + b[52] = (in14 >> 24) + ((in15 >> 0) << 4); + b[53] = (in15 >> 4); + b[54] = (in15 >> 12); + b[55] = (in15 >> 20); +} + +/* Set the field element to 0. + * + * a [in] Field element. + */ +void fe448_1(int32_t* a) +{ + a[0] = 1; + a[1] = 0; + a[2] = 0; + a[3] = 0; + a[4] = 0; + a[5] = 0; + a[6] = 0; + a[7] = 0; + a[8] = 0; + a[9] = 0; + a[10] = 0; + a[11] = 0; + a[12] = 0; + a[13] = 0; + a[14] = 0; + a[15] = 0; +} + +/* Set the field element to 0. + * + * a [in] Field element. + */ +void fe448_0(int32_t* a) +{ + a[0] = 0; + a[1] = 0; + a[2] = 0; + a[3] = 0; + a[4] = 0; + a[5] = 0; + a[6] = 0; + a[7] = 0; + a[8] = 0; + a[9] = 0; + a[10] = 0; + a[11] = 0; + a[12] = 0; + a[13] = 0; + a[14] = 0; + a[15] = 0; +} + +/* Copy one field element into another: d = a. + * + * d [in] Destination field element. + * a [in] Source field element. + */ +void fe448_copy(int32_t* d, const int32_t* a) +{ + d[0] = a[0]; + d[1] = a[1]; + d[2] = a[2]; + d[3] = a[3]; + d[4] = a[4]; + d[5] = a[5]; + d[6] = a[6]; + d[7] = a[7]; + d[8] = a[8]; + d[9] = a[9]; + d[10] = a[10]; + d[11] = a[11]; + d[12] = a[12]; + d[13] = a[13]; + d[14] = a[14]; + d[15] = a[15]; +} + +/* Conditionally swap the elements. + * Constant time implementation. + * + * a [in] First field element. + * b [in] Second field element. + * c [in] Swap when 1. Valid values: 0, 1. + */ +static void fe448_cswap(int32_t* a, int32_t* b, int c) +{ + int32_t mask = -(int32_t)c; + int32_t t0 = (a[0] ^ b[0]) & mask; + int32_t t1 = (a[1] ^ b[1]) & mask; + int32_t t2 = (a[2] ^ b[2]) & mask; + int32_t t3 = (a[3] ^ b[3]) & mask; + int32_t t4 = (a[4] ^ b[4]) & mask; + int32_t t5 = (a[5] ^ b[5]) & mask; + int32_t t6 = (a[6] ^ b[6]) & mask; + int32_t t7 = (a[7] ^ b[7]) & mask; + int32_t t8 = (a[8] ^ b[8]) & mask; + int32_t t9 = (a[9] ^ b[9]) & mask; + int32_t t10 = (a[10] ^ b[10]) & mask; + int32_t t11 = (a[11] ^ b[11]) & mask; + int32_t t12 = (a[12] ^ b[12]) & mask; + int32_t t13 = (a[13] ^ b[13]) & mask; + int32_t t14 = (a[14] ^ b[14]) & mask; + int32_t t15 = (a[15] ^ b[15]) & mask; + a[0] ^= t0; + a[1] ^= t1; + a[2] ^= t2; + a[3] ^= t3; + a[4] ^= t4; + a[5] ^= t5; + a[6] ^= t6; + a[7] ^= t7; + a[8] ^= t8; + a[9] ^= t9; + a[10] ^= t10; + a[11] ^= t11; + a[12] ^= t12; + a[13] ^= t13; + a[14] ^= t14; + a[15] ^= t15; + b[0] ^= t0; + b[1] ^= t1; + b[2] ^= t2; + b[3] ^= t3; + b[4] ^= t4; + b[5] ^= t5; + b[6] ^= t6; + b[7] ^= t7; + b[8] ^= t8; + b[9] ^= t9; + b[10] ^= t10; + b[11] ^= t11; + b[12] ^= t12; + b[13] ^= t13; + b[14] ^= t14; + b[15] ^= t15; +} + +/* Add two field elements. r = (a + b) mod (2^448 - 2^224 - 1) + * + * r [in] Field element to hold sum. + * a [in] Field element to add. + * b [in] Field element to add. + */ +void fe448_add(int32_t* r, const int32_t* a, const int32_t* b) +{ + r[0] = a[0] + b[0]; + r[1] = a[1] + b[1]; + r[2] = a[2] + b[2]; + r[3] = a[3] + b[3]; + r[4] = a[4] + b[4]; + r[5] = a[5] + b[5]; + r[6] = a[6] + b[6]; + r[7] = a[7] + b[7]; + r[8] = a[8] + b[8]; + r[9] = a[9] + b[9]; + r[10] = a[10] + b[10]; + r[11] = a[11] + b[11]; + r[12] = a[12] + b[12]; + r[13] = a[13] + b[13]; + r[14] = a[14] + b[14]; + r[15] = a[15] + b[15]; +} + +/* Subtract a field element from another. r = (a - b) mod (2^448 - 2^224 - 1) + * + * r [in] Field element to hold difference. + * a [in] Field element to subtract from. + * b [in] Field element to subtract. + */ +void fe448_sub(int32_t* r, const int32_t* a, const int32_t* b) +{ + r[0] = a[0] - b[0]; + r[1] = a[1] - b[1]; + r[2] = a[2] - b[2]; + r[3] = a[3] - b[3]; + r[4] = a[4] - b[4]; + r[5] = a[5] - b[5]; + r[6] = a[6] - b[6]; + r[7] = a[7] - b[7]; + r[8] = a[8] - b[8]; + r[9] = a[9] - b[9]; + r[10] = a[10] - b[10]; + r[11] = a[11] - b[11]; + r[12] = a[12] - b[12]; + r[13] = a[13] - b[13]; + r[14] = a[14] - b[14]; + r[15] = a[15] - b[15]; +} + +void fe448_reduce(int32_t* a) +{ + int64_t o; + + o = a[0 ] >> 28; a[1 ] += o; a[0 ] -= o << 28; + o = a[1 ] >> 28; a[2 ] += o; a[1 ] -= o << 28; + o = a[2 ] >> 28; a[3 ] += o; a[2 ] -= o << 28; + o = a[3 ] >> 28; a[4 ] += o; a[3 ] -= o << 28; + o = a[4 ] >> 28; a[5 ] += o; a[4 ] -= o << 28; + o = a[5 ] >> 28; a[6 ] += o; a[5 ] -= o << 28; + o = a[6 ] >> 28; a[7 ] += o; a[6 ] -= o << 28; + o = a[7 ] >> 28; a[8 ] += o; a[7 ] -= o << 28; + o = a[8 ] >> 28; a[9 ] += o; a[8 ] -= o << 28; + o = a[9 ] >> 28; a[10] += o; a[9 ] -= o << 28; + o = a[10] >> 28; a[11] += o; a[10] -= o << 28; + o = a[11] >> 28; a[12] += o; a[11] -= o << 28; + o = a[12] >> 28; a[13] += o; a[12] -= o << 28; + o = a[13] >> 28; a[14] += o; a[13] -= o << 28; + o = a[14] >> 28; a[15] += o; a[14] -= o << 28; + o = a[15] >> 28; a[0] += o; + a[8] += o; a[15] -= o << 28; +} +/* Mulitply a field element by 39081. r = (39081 * a) mod (2^448 - 2^224 - 1) + * + * r [in] Field element to hold result. + * a [in] Field element to multiply. + */ +void fe448_mul39081(int32_t* r, const int32_t* a) +{ + int64_t t; + int32_t o; + int64_t t0 = a[0] * (int64_t)39081; + int64_t t1 = a[1] * (int64_t)39081; + int64_t t2 = a[2] * (int64_t)39081; + int64_t t3 = a[3] * (int64_t)39081; + int64_t t4 = a[4] * (int64_t)39081; + int64_t t5 = a[5] * (int64_t)39081; + int64_t t6 = a[6] * (int64_t)39081; + int64_t t7 = a[7] * (int64_t)39081; + int64_t t8 = a[8] * (int64_t)39081; + int64_t t9 = a[9] * (int64_t)39081; + int64_t t10 = a[10] * (int64_t)39081; + int64_t t11 = a[11] * (int64_t)39081; + int64_t t12 = a[12] * (int64_t)39081; + int64_t t13 = a[13] * (int64_t)39081; + int64_t t14 = a[14] * (int64_t)39081; + int64_t t15 = a[15] * (int64_t)39081; + o = t0 >> 28; t1 += o; t = (int64_t)o << 28; t0 -= t; + o = t1 >> 28; t2 += o; t = (int64_t)o << 28; t1 -= t; + o = t2 >> 28; t3 += o; t = (int64_t)o << 28; t2 -= t; + o = t3 >> 28; t4 += o; t = (int64_t)o << 28; t3 -= t; + o = t4 >> 28; t5 += o; t = (int64_t)o << 28; t4 -= t; + o = t5 >> 28; t6 += o; t = (int64_t)o << 28; t5 -= t; + o = t6 >> 28; t7 += o; t = (int64_t)o << 28; t6 -= t; + o = t7 >> 28; t8 += o; t = (int64_t)o << 28; t7 -= t; + o = t8 >> 28; t9 += o; t = (int64_t)o << 28; t8 -= t; + o = t9 >> 28; t10 += o; t = (int64_t)o << 28; t9 -= t; + o = t10 >> 28; t11 += o; t = (int64_t)o << 28; t10 -= t; + o = t11 >> 28; t12 += o; t = (int64_t)o << 28; t11 -= t; + o = t12 >> 28; t13 += o; t = (int64_t)o << 28; t12 -= t; + o = t13 >> 28; t14 += o; t = (int64_t)o << 28; t13 -= t; + o = t14 >> 28; t15 += o; t = (int64_t)o << 28; t14 -= t; + o = t15 >> 28; t0 += o; + t8 += o; t = (int64_t)o << 28; t15 -= t; + + /* Store */ + r[0] = t0; + r[1] = t1; + r[2] = t2; + r[3] = t3; + r[4] = t4; + r[5] = t5; + r[6] = t6; + r[7] = t7; + r[8] = t8; + r[9] = t9; + r[10] = t10; + r[11] = t11; + r[12] = t12; + r[13] = t13; + r[14] = t14; + r[15] = t15; +} + +/* Mulitply two field elements. r = a * b + * + * r [in] Field element to hold result. + * a [in] Field element to multiply. + * b [in] Field element to multiply. + */ +static WC_INLINE void fe448_mul_8(int32_t* r, const int32_t* a, const int32_t* b) +{ + int64_t t; + int64_t t0 = (int64_t)a[ 0] * b[ 0]; + int64_t t1 = (int64_t)a[ 0] * b[ 1]; + int64_t t101 = (int64_t)a[ 1] * b[ 0]; + int64_t t2 = (int64_t)a[ 0] * b[ 2]; + int64_t t102 = (int64_t)a[ 1] * b[ 1]; + int64_t t202 = (int64_t)a[ 2] * b[ 0]; + int64_t t3 = (int64_t)a[ 0] * b[ 3]; + int64_t t103 = (int64_t)a[ 1] * b[ 2]; + int64_t t203 = (int64_t)a[ 2] * b[ 1]; + int64_t t303 = (int64_t)a[ 3] * b[ 0]; + int64_t t4 = (int64_t)a[ 0] * b[ 4]; + int64_t t104 = (int64_t)a[ 1] * b[ 3]; + int64_t t204 = (int64_t)a[ 2] * b[ 2]; + int64_t t304 = (int64_t)a[ 3] * b[ 1]; + int64_t t404 = (int64_t)a[ 4] * b[ 0]; + int64_t t5 = (int64_t)a[ 0] * b[ 5]; + int64_t t105 = (int64_t)a[ 1] * b[ 4]; + int64_t t205 = (int64_t)a[ 2] * b[ 3]; + int64_t t305 = (int64_t)a[ 3] * b[ 2]; + int64_t t405 = (int64_t)a[ 4] * b[ 1]; + int64_t t505 = (int64_t)a[ 5] * b[ 0]; + int64_t t6 = (int64_t)a[ 0] * b[ 6]; + int64_t t106 = (int64_t)a[ 1] * b[ 5]; + int64_t t206 = (int64_t)a[ 2] * b[ 4]; + int64_t t306 = (int64_t)a[ 3] * b[ 3]; + int64_t t406 = (int64_t)a[ 4] * b[ 2]; + int64_t t506 = (int64_t)a[ 5] * b[ 1]; + int64_t t606 = (int64_t)a[ 6] * b[ 0]; + int64_t t7 = (int64_t)a[ 0] * b[ 7]; + int64_t t107 = (int64_t)a[ 1] * b[ 6]; + int64_t t207 = (int64_t)a[ 2] * b[ 5]; + int64_t t307 = (int64_t)a[ 3] * b[ 4]; + int64_t t407 = (int64_t)a[ 4] * b[ 3]; + int64_t t507 = (int64_t)a[ 5] * b[ 2]; + int64_t t607 = (int64_t)a[ 6] * b[ 1]; + int64_t t707 = (int64_t)a[ 7] * b[ 0]; + int64_t t8 = (int64_t)a[ 1] * b[ 7]; + int64_t t108 = (int64_t)a[ 2] * b[ 6]; + int64_t t208 = (int64_t)a[ 3] * b[ 5]; + int64_t t308 = (int64_t)a[ 4] * b[ 4]; + int64_t t408 = (int64_t)a[ 5] * b[ 3]; + int64_t t508 = (int64_t)a[ 6] * b[ 2]; + int64_t t608 = (int64_t)a[ 7] * b[ 1]; + int64_t t9 = (int64_t)a[ 2] * b[ 7]; + int64_t t109 = (int64_t)a[ 3] * b[ 6]; + int64_t t209 = (int64_t)a[ 4] * b[ 5]; + int64_t t309 = (int64_t)a[ 5] * b[ 4]; + int64_t t409 = (int64_t)a[ 6] * b[ 3]; + int64_t t509 = (int64_t)a[ 7] * b[ 2]; + int64_t t10 = (int64_t)a[ 3] * b[ 7]; + int64_t t110 = (int64_t)a[ 4] * b[ 6]; + int64_t t210 = (int64_t)a[ 5] * b[ 5]; + int64_t t310 = (int64_t)a[ 6] * b[ 4]; + int64_t t410 = (int64_t)a[ 7] * b[ 3]; + int64_t t11 = (int64_t)a[ 4] * b[ 7]; + int64_t t111 = (int64_t)a[ 5] * b[ 6]; + int64_t t211 = (int64_t)a[ 6] * b[ 5]; + int64_t t311 = (int64_t)a[ 7] * b[ 4]; + int64_t t12 = (int64_t)a[ 5] * b[ 7]; + int64_t t112 = (int64_t)a[ 6] * b[ 6]; + int64_t t212 = (int64_t)a[ 7] * b[ 5]; + int64_t t13 = (int64_t)a[ 6] * b[ 7]; + int64_t t113 = (int64_t)a[ 7] * b[ 6]; + int64_t t14 = (int64_t)a[ 7] * b[ 7]; + t1 += t101; + t2 += t102; t2 += t202; + t3 += t103; t3 += t203; t3 += t303; + t4 += t104; t4 += t204; t4 += t304; t4 += t404; + t5 += t105; t5 += t205; t5 += t305; t5 += t405; t5 += t505; + t6 += t106; t6 += t206; t6 += t306; t6 += t406; t6 += t506; + t6 += t606; + t7 += t107; t7 += t207; t7 += t307; t7 += t407; t7 += t507; + t7 += t607; + t7 += t707; + t8 += t108; t8 += t208; t8 += t308; t8 += t408; t8 += t508; + t8 += t608; + t9 += t109; t9 += t209; t9 += t309; t9 += t409; t9 += t509; + t10 += t110; t10 += t210; t10 += t310; t10 += t410; + t11 += t111; t11 += t211; t11 += t311; + t12 += t112; t12 += t212; + t13 += t113; + int64_t o = t14 >> 28; + int64_t t15 = o; + t14 -= o << 28; + o = t0 >> 28; t1 += o; t = (int64_t)o << 28; t0 -= t; + o = t1 >> 28; t2 += o; t = (int64_t)o << 28; t1 -= t; + o = t2 >> 28; t3 += o; t = (int64_t)o << 28; t2 -= t; + o = t3 >> 28; t4 += o; t = (int64_t)o << 28; t3 -= t; + o = t4 >> 28; t5 += o; t = (int64_t)o << 28; t4 -= t; + o = t5 >> 28; t6 += o; t = (int64_t)o << 28; t5 -= t; + o = t6 >> 28; t7 += o; t = (int64_t)o << 28; t6 -= t; + o = t7 >> 28; t8 += o; t = (int64_t)o << 28; t7 -= t; + o = t8 >> 28; t9 += o; t = (int64_t)o << 28; t8 -= t; + o = t9 >> 28; t10 += o; t = (int64_t)o << 28; t9 -= t; + o = t10 >> 28; t11 += o; t = (int64_t)o << 28; t10 -= t; + o = t11 >> 28; t12 += o; t = (int64_t)o << 28; t11 -= t; + o = t12 >> 28; t13 += o; t = (int64_t)o << 28; t12 -= t; + o = t13 >> 28; t14 += o; t = (int64_t)o << 28; t13 -= t; + o = t14 >> 28; t15 += o; t = (int64_t)o << 28; t14 -= t; + o = t15 >> 28; t0 += o; + t8 += o; t = (int64_t)o << 28; t15 -= t; + + /* Store */ + r[0] = t0; + r[1] = t1; + r[2] = t2; + r[3] = t3; + r[4] = t4; + r[5] = t5; + r[6] = t6; + r[7] = t7; + r[8] = t8; + r[9] = t9; + r[10] = t10; + r[11] = t11; + r[12] = t12; + r[13] = t13; + r[14] = t14; + r[15] = t15; +} + +/* Mulitply two field elements. r = (a * b) mod (2^448 - 2^224 - 1) + * + * r [in] Field element to hold result. + * a [in] Field element to multiply. + * b [in] Field element to multiply. + */ +void fe448_mul(int32_t* r, const int32_t* a, const int32_t* b) +{ + int32_t r0[16]; + int32_t r1[16]; + int32_t* a1 = r1; + int32_t b1[8]; + int32_t r2[16]; + a1[0] = a[0] + a[8]; + a1[1] = a[1] + a[9]; + a1[2] = a[2] + a[10]; + a1[3] = a[3] + a[11]; + a1[4] = a[4] + a[12]; + a1[5] = a[5] + a[13]; + a1[6] = a[6] + a[14]; + a1[7] = a[7] + a[15]; + b1[0] = b[0] + b[8]; + b1[1] = b[1] + b[9]; + b1[2] = b[2] + b[10]; + b1[3] = b[3] + b[11]; + b1[4] = b[4] + b[12]; + b1[5] = b[5] + b[13]; + b1[6] = b[6] + b[14]; + b1[7] = b[7] + b[15]; + fe448_mul_8(r2, a + 8, b + 8); + fe448_mul_8(r0, a, b); + fe448_mul_8(r1, a1, b1); + r[ 0] = r0[ 0] + r2[ 0] + r1[ 8] - r0[ 8]; + r[ 1] = r0[ 1] + r2[ 1] + r1[ 9] - r0[ 9]; + r[ 2] = r0[ 2] + r2[ 2] + r1[10] - r0[10]; + r[ 3] = r0[ 3] + r2[ 3] + r1[11] - r0[11]; + r[ 4] = r0[ 4] + r2[ 4] + r1[12] - r0[12]; + r[ 5] = r0[ 5] + r2[ 5] + r1[13] - r0[13]; + r[ 6] = r0[ 6] + r2[ 6] + r1[14] - r0[14]; + r[ 7] = r0[ 7] + r2[ 7] + r1[15] - r0[15]; + r[ 8] = r2[ 8] + r1[ 0] - r0[ 0] + r1[ 8]; + r[ 9] = r2[ 9] + r1[ 1] - r0[ 1] + r1[ 9]; + r[10] = r2[10] + r1[ 2] - r0[ 2] + r1[10]; + r[11] = r2[11] + r1[ 3] - r0[ 3] + r1[11]; + r[12] = r2[12] + r1[ 4] - r0[ 4] + r1[12]; + r[13] = r2[13] + r1[ 5] - r0[ 5] + r1[13]; + r[14] = r2[14] + r1[ 6] - r0[ 6] + r1[14]; + r[15] = r2[15] + r1[ 7] - r0[ 7] + r1[15]; +} + +/* Square a field element. r = a * a + * + * r [in] Field element to hold result. + * a [in] Field element to square. + */ +static WC_INLINE void fe448_sqr_8(int32_t* r, const int32_t* a) +{ + int64_t t; + int64_t t0 = (int64_t)a[ 0] * a[ 0]; + int64_t t1 = 2 * (int64_t)a[ 0] * a[ 1]; + int64_t t2 = 2 * (int64_t)a[ 0] * a[ 2]; + int64_t t102 = (int64_t)a[ 1] * a[ 1]; + int64_t t3 = 2 * (int64_t)a[ 0] * a[ 3]; + int64_t t103 = 2 * (int64_t)a[ 1] * a[ 2]; + int64_t t4 = 2 * (int64_t)a[ 0] * a[ 4]; + int64_t t104 = 2 * (int64_t)a[ 1] * a[ 3]; + int64_t t204 = (int64_t)a[ 2] * a[ 2]; + int64_t t5 = 2 * (int64_t)a[ 0] * a[ 5]; + int64_t t105 = 2 * (int64_t)a[ 1] * a[ 4]; + int64_t t205 = 2 * (int64_t)a[ 2] * a[ 3]; + int64_t t6 = 2 * (int64_t)a[ 0] * a[ 6]; + int64_t t106 = 2 * (int64_t)a[ 1] * a[ 5]; + int64_t t206 = 2 * (int64_t)a[ 2] * a[ 4]; + int64_t t306 = (int64_t)a[ 3] * a[ 3]; + int64_t t7 = 2 * (int64_t)a[ 0] * a[ 7]; + int64_t t107 = 2 * (int64_t)a[ 1] * a[ 6]; + int64_t t207 = 2 * (int64_t)a[ 2] * a[ 5]; + int64_t t307 = 2 * (int64_t)a[ 3] * a[ 4]; + int64_t t8 = 2 * (int64_t)a[ 1] * a[ 7]; + int64_t t108 = 2 * (int64_t)a[ 2] * a[ 6]; + int64_t t208 = 2 * (int64_t)a[ 3] * a[ 5]; + int64_t t308 = (int64_t)a[ 4] * a[ 4]; + int64_t t9 = 2 * (int64_t)a[ 2] * a[ 7]; + int64_t t109 = 2 * (int64_t)a[ 3] * a[ 6]; + int64_t t209 = 2 * (int64_t)a[ 4] * a[ 5]; + int64_t t10 = 2 * (int64_t)a[ 3] * a[ 7]; + int64_t t110 = 2 * (int64_t)a[ 4] * a[ 6]; + int64_t t210 = (int64_t)a[ 5] * a[ 5]; + int64_t t11 = 2 * (int64_t)a[ 4] * a[ 7]; + int64_t t111 = 2 * (int64_t)a[ 5] * a[ 6]; + int64_t t12 = 2 * (int64_t)a[ 5] * a[ 7]; + int64_t t112 = (int64_t)a[ 6] * a[ 6]; + int64_t t13 = 2 * (int64_t)a[ 6] * a[ 7]; + int64_t t14 = (int64_t)a[ 7] * a[ 7]; + t2 += t102; + t3 += t103; + t4 += t104; t4 += t204; + t5 += t105; t5 += t205; + t6 += t106; t6 += t206; t6 += t306; + t7 += t107; t7 += t207; t7 += t307; + t8 += t108; t8 += t208; t8 += t308; + t9 += t109; t9 += t209; + t10 += t110; t10 += t210; + t11 += t111; + t12 += t112; + int64_t o = t14 >> 28; + int64_t t15 = o; + t14 -= o << 28; + o = t0 >> 28; t1 += o; t = (int64_t)o << 28; t0 -= t; + o = t1 >> 28; t2 += o; t = (int64_t)o << 28; t1 -= t; + o = t2 >> 28; t3 += o; t = (int64_t)o << 28; t2 -= t; + o = t3 >> 28; t4 += o; t = (int64_t)o << 28; t3 -= t; + o = t4 >> 28; t5 += o; t = (int64_t)o << 28; t4 -= t; + o = t5 >> 28; t6 += o; t = (int64_t)o << 28; t5 -= t; + o = t6 >> 28; t7 += o; t = (int64_t)o << 28; t6 -= t; + o = t7 >> 28; t8 += o; t = (int64_t)o << 28; t7 -= t; + o = t8 >> 28; t9 += o; t = (int64_t)o << 28; t8 -= t; + o = t9 >> 28; t10 += o; t = (int64_t)o << 28; t9 -= t; + o = t10 >> 28; t11 += o; t = (int64_t)o << 28; t10 -= t; + o = t11 >> 28; t12 += o; t = (int64_t)o << 28; t11 -= t; + o = t12 >> 28; t13 += o; t = (int64_t)o << 28; t12 -= t; + o = t13 >> 28; t14 += o; t = (int64_t)o << 28; t13 -= t; + o = t14 >> 28; t15 += o; t = (int64_t)o << 28; t14 -= t; + o = t15 >> 28; t0 += o; + t8 += o; t = (int64_t)o << 28; t15 -= t; + + /* Store */ + r[0] = t0; + r[1] = t1; + r[2] = t2; + r[3] = t3; + r[4] = t4; + r[5] = t5; + r[6] = t6; + r[7] = t7; + r[8] = t8; + r[9] = t9; + r[10] = t10; + r[11] = t11; + r[12] = t12; + r[13] = t13; + r[14] = t14; + r[15] = t15; +} + +/* Square a field element. r = (a * a) mod (2^448 - 2^224 - 1) + * + * r [in] Field element to hold result. + * a [in] Field element to square. + */ +void fe448_sqr(int32_t* r, const int32_t* a) +{ + int32_t r0[16]; + int32_t r1[16]; + int32_t* a1 = r1; + int32_t r2[16]; + a1[0] = a[0] + a[8]; + a1[1] = a[1] + a[9]; + a1[2] = a[2] + a[10]; + a1[3] = a[3] + a[11]; + a1[4] = a[4] + a[12]; + a1[5] = a[5] + a[13]; + a1[6] = a[6] + a[14]; + a1[7] = a[7] + a[15]; + fe448_sqr_8(r2, a + 8); + fe448_sqr_8(r0, a); + fe448_sqr_8(r1, a1); + r[ 0] = r0[ 0] + r2[ 0] + r1[ 8] - r0[ 8]; + r[ 1] = r0[ 1] + r2[ 1] + r1[ 9] - r0[ 9]; + r[ 2] = r0[ 2] + r2[ 2] + r1[10] - r0[10]; + r[ 3] = r0[ 3] + r2[ 3] + r1[11] - r0[11]; + r[ 4] = r0[ 4] + r2[ 4] + r1[12] - r0[12]; + r[ 5] = r0[ 5] + r2[ 5] + r1[13] - r0[13]; + r[ 6] = r0[ 6] + r2[ 6] + r1[14] - r0[14]; + r[ 7] = r0[ 7] + r2[ 7] + r1[15] - r0[15]; + r[ 8] = r2[ 8] + r1[ 0] - r0[ 0] + r1[ 8]; + r[ 9] = r2[ 9] + r1[ 1] - r0[ 1] + r1[ 9]; + r[10] = r2[10] + r1[ 2] - r0[ 2] + r1[10]; + r[11] = r2[11] + r1[ 3] - r0[ 3] + r1[11]; + r[12] = r2[12] + r1[ 4] - r0[ 4] + r1[12]; + r[13] = r2[13] + r1[ 5] - r0[ 5] + r1[13]; + r[14] = r2[14] + r1[ 6] - r0[ 6] + r1[14]; + r[15] = r2[15] + r1[ 7] - r0[ 7] + r1[15]; +} + +/* Invert the field element. (r * a) mod (2^448 - 2^224 - 1) = 1 + * Constant time implementation - using Fermat's little theorem: + * a^(p-1) mod p = 1 => a^(p-2) mod p = 1/a + * For curve448: p - 2 = 2^448 - 2^224 - 3 + * + * r [in] Field element to hold result. + * a [in] Field element to invert. + */ +void fe448_invert(int32_t* r, const int32_t* a) +{ + int32_t t1[16]; + int32_t t2[16]; + int32_t t3[16]; + int32_t t4[16]; + int i; + + fe448_sqr(t1, a); + /* t1 = 2 */ + fe448_mul(t1, t1, a); + /* t1 = 3 */ + fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2); + /* t2 = c */ + fe448_mul(t3, t2, a); + /* t3 = d */ + fe448_mul(t1, t2, t1); + /* t1 = f */ + fe448_sqr(t2, t1); + /* t2 = 1e */ + fe448_mul(t4, t2, a); + /* t4 = 1f */ + fe448_sqr(t2, t4); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2); + /* t2 = 3e0 */ + fe448_mul(t1, t2, t4); + /* t1 = 3ff */ + fe448_sqr(t2, t1); for (i = 1; i < 10; ++i) fe448_sqr(t2, t2); + /* t2 = ffc00 */ + fe448_mul(t1, t2, t1); + /* t1 = fffff */ + fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2); + /* t2 = 1ffffe0 */ + fe448_mul(t1, t2, t4); + /* t1 = 1ffffff */ + fe448_sqr(t2, t1); for (i = 1; i < 25; ++i) fe448_sqr(t2, t2); + /* t2 = 3fffffe000000 */ + fe448_mul(t1, t2, t1); + /* t1 = 3ffffffffffff */ + fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2); + /* t2 = 7fffffffffffe0 */ + fe448_mul(t1, t2, t4); + /* t1 = 7fffffffffffff */ + fe448_sqr(t2, t1); for (i = 1; i < 55; ++i) fe448_sqr(t2, t2); + /* t2 = 3fffffffffffff80000000000000 */ + fe448_mul(t1, t2, t1); + /* t1 = 3fffffffffffffffffffffffffff */ + fe448_sqr(t2, t1); for (i = 1; i < 110; ++i) fe448_sqr(t2, t2); + /* t2 = fffffffffffffffffffffffffffc000000000000000000000000000 */ + fe448_mul(t1, t2, t1); + /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2); + /* t2 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff0 */ + fe448_mul(t3, t3, t2); + /* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffd */ + fe448_mul(t1, t3, a); + /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */ + fe448_sqr(t1, t1); for (i = 1; i < 224; ++i) fe448_sqr(t1, t1); + /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000000000000000000000000000000000000000000000000000 */ + fe448_mul(r, t3, t1); + /* r = fffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffffffffffffffffffffffffffffffffffffffffffffffffffffd */ +} + +/* Scalar multiply the point by a number. r = n.a + * Uses Montogmery ladder and only requires the x-ordinate. + * + * r [in] Field element to hold result. + * n [in] Scalar as an array of bytes. + * a [in] Point to multiply - x-ordinate only. + */ +int curve448(byte* r, const byte* n, const byte* a) +{ + int32_t x1[16]; + int32_t x2[16]; + int32_t z2[16]; + int32_t x3[16]; + int32_t z3[16]; + int32_t t0[16]; + int32_t t1[16]; + int i; + unsigned int swap; + unsigned int b; + + fe448_from_bytes(x1, a); + fe448_1(x2); + fe448_0(z2); + fe448_copy(x3, x1); + fe448_1(z3); + + swap = 0; + for (i = 447; i >= 0; --i) { + b = (n[i >> 3] >> (i & 7)) & 1; + swap ^= b; + fe448_cswap(x2, x3, swap); + fe448_cswap(z2, z3, swap); + swap = b; + + /* Montgomery Ladder - double and add */ + fe448_add(t0, x2, z2); + fe448_reduce(t0); + fe448_add(t1, x3, z3); + fe448_reduce(t1); + fe448_sub(x2, x2, z2); + fe448_sub(x3, x3, z3); + fe448_mul(t1, t1, x2); + fe448_mul(z3, x3, t0); + fe448_sqr(t0, t0); + fe448_sqr(x2, x2); + fe448_add(x3, z3, t1); + fe448_reduce(x3); + fe448_sqr(x3, x3); + fe448_sub(z3, z3, t1); + fe448_sqr(z3, z3); + fe448_mul(z3, z3, x1); + fe448_sub(t1, t0, x2); + fe448_mul(x2, t0, x2); + fe448_mul39081(z2, t1); + fe448_add(z2, t0, z2); + fe448_mul(z2, z2, t1); + } + /* Last two bits are 0 - no final swap check required. */ + + fe448_invert(z2, z2); + fe448_mul(x2, x2, z2); + fe448_to_bytes(r, x2); + + return 0; +} + +#ifdef HAVE_ED448 +/* Check whether field element is not 0. + * Must convert to a normalized form before checking. + * + * a [in] Field element. + * returns 0 when zero, and any other value otherwise. + */ +int fe448_isnonzero(const int32_t* a) +{ + uint8_t b[56]; + int i; + uint8_t c = 0; + fe448_to_bytes(b, a); + for (i = 0; i < 56; i++) + c |= b[i]; + return c; +} + +/* Check whether field element is negative. + * Must convert to a normalized form before checking. + * + * a [in] Field element. + * returns 1 when negative, and 0 otherwise. + */ +int fe448_isnegative(const int32_t* a) +{ + uint8_t b[56]; + fe448_to_bytes(b, a); + return b[0] & 1; +} + +/* Negates the field element. r = -a + * + * r [in] Field element to hold result. + * a [in] Field element. + */ +void fe448_neg(int32_t* r, const int32_t* a) +{ + r[0] = -a[0]; + r[1] = -a[1]; + r[2] = -a[2]; + r[3] = -a[3]; + r[4] = -a[4]; + r[5] = -a[5]; + r[6] = -a[6]; + r[7] = -a[7]; + r[8] = -a[8]; + r[9] = -a[9]; + r[10] = -a[10]; + r[11] = -a[11]; + r[12] = -a[12]; + r[13] = -a[13]; + r[14] = -a[14]; + r[15] = -a[15]; +} + +/* Raise field element to (p-3) / 4: 2^446 - 2^222 - 1 + * Used for calcualting y-ordinate from x-ordinate for Ed448. + * + * r [in] Field element to hold result. + * a [in] Field element to exponentiate. + */ +void fe448_pow_2_446_222_1(int32_t* r, const int32_t* a) +{ + int32_t t1[16]; + int32_t t2[16]; + int32_t t3[16]; + int32_t t4[16]; + int32_t t5[16]; + int i; + + fe448_sqr(t3, a); + /* t3 = 2 */ + fe448_mul(t1, t3, a); + /* t1 = 3 */ + fe448_sqr(t5, t1); + /* t5 = 6 */ + fe448_mul(t5, t5, a); + /* t5 = 7 */ + fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2); + /* t2 = c */ + fe448_mul(t3, t2, t3); + /* t3 = e */ + fe448_mul(t1, t2, t1); + /* t1 = f */ + fe448_sqr(t2, t1); for (i = 1; i < 3; ++i) fe448_sqr(t2, t2); + /* t2 = 78 */ + fe448_mul(t5, t2, t5); + /* t5 = 7f */ + fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2); + /* t2 = f0 */ + fe448_mul(t1, t2, t1); + /* t1 = ff */ + fe448_mul(t3, t3, t2); + /* t3 = fe */ + fe448_sqr(t2, t1); for (i = 1; i < 7; ++i) fe448_sqr(t2, t2); + /* t2 = 7f80 */ + fe448_mul(t5, t2, t5); + /* t5 = 7fff */ + fe448_sqr(t2, t1); for (i = 1; i < 8; ++i) fe448_sqr(t2, t2); + /* t2 = ff00 */ + fe448_mul(t1, t2, t1); + /* t1 = ffff */ + fe448_mul(t3, t3, t2); + /* t3 = fffe */ + fe448_sqr(t2, t5); for (i = 1; i < 15; ++i) fe448_sqr(t2, t2); + /* t2 = 3fff8000 */ + fe448_mul(t5, t2, t5); + /* t5 = 3fffffff */ + fe448_sqr(t2, t1); for (i = 1; i < 16; ++i) fe448_sqr(t2, t2); + /* t2 = ffff0000 */ + fe448_mul(t1, t2, t1); + /* t1 = ffffffff */ + fe448_mul(t3, t3, t2); + /* t3 = fffffffe */ + fe448_sqr(t2, t1); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2); + /* t2 = ffffffff00000000 */ + fe448_mul(t2, t2, t1); + /* t2 = ffffffffffffffff */ + fe448_sqr(t1, t2); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1); + /* t1 = ffffffffffffffff0000000000000000 */ + fe448_mul(t1, t1, t2); + /* t1 = ffffffffffffffffffffffffffffffff */ + fe448_sqr(t1, t1); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1); + /* t1 = ffffffffffffffffffffffffffffffff0000000000000000 */ + fe448_mul(t4, t1, t2); + /* t4 = ffffffffffffffffffffffffffffffffffffffffffffffff */ + fe448_sqr(t2, t4); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2); + /* t2 = ffffffffffffffffffffffffffffffffffffffffffffffff00000000 */ + fe448_mul(t3, t3, t2); + /* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */ + fe448_sqr(t1, t3); for (i = 1; i < 192; ++i) fe448_sqr(t1, t1); + /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe000000000000000000000000000000000000000000000000 */ + fe448_mul(t1, t1, t4); + /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffffffffffffffffffffffffffffffffffffffffffff */ + fe448_sqr(t1, t1); for (i = 1; i < 30; ++i) fe448_sqr(t1, t1); + /* t1 = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffc0000000 */ + fe448_mul(r, t5, t1); + /* r = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffffffffff */ +} + +/* Constant time, conditional move of b into a. + * a is not changed if the condition is 0. + * + * a A field element. + * b A field element. + * c If 1 then copy and if 0 then don't copy. + */ +void fe448_cmov(int32_t* a, const int32_t* b, int c) +{ + int32_t m = -(int32_t)c; + int32_t t0 = m & (a[0] ^ b[0]); + int32_t t1 = m & (a[1] ^ b[1]); + int32_t t2 = m & (a[2] ^ b[2]); + int32_t t3 = m & (a[3] ^ b[3]); + int32_t t4 = m & (a[4] ^ b[4]); + int32_t t5 = m & (a[5] ^ b[5]); + int32_t t6 = m & (a[6] ^ b[6]); + int32_t t7 = m & (a[7] ^ b[7]); + int32_t t8 = m & (a[8] ^ b[8]); + int32_t t9 = m & (a[9] ^ b[9]); + int32_t t10 = m & (a[10] ^ b[10]); + int32_t t11 = m & (a[11] ^ b[11]); + int32_t t12 = m & (a[12] ^ b[12]); + int32_t t13 = m & (a[13] ^ b[13]); + int32_t t14 = m & (a[14] ^ b[14]); + int32_t t15 = m & (a[15] ^ b[15]); + + a[0] ^= t0; + a[1] ^= t1; + a[2] ^= t2; + a[3] ^= t3; + a[4] ^= t4; + a[5] ^= t5; + a[6] ^= t6; + a[7] ^= t7; + a[8] ^= t8; + a[9] ^= t9; + a[10] ^= t10; + a[11] ^= t11; + a[12] ^= t12; + a[13] ^= t13; + a[14] ^= t14; + a[15] ^= t15; +} + +#endif /* HAVE_ED448 */ +#endif + +#endif /* HAVE_CURVE448 || HAVE_ED448 */ diff --git a/client/wolfssl/wolfcrypt/src/fe_low_mem.c b/client/wolfssl/wolfcrypt/src/fe_low_mem.c new file mode 100644 index 0000000..13c88cb --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fe_low_mem.c @@ -0,0 +1,611 @@ +/* fe_low_mem.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +/* Based from Daniel Beer's public domain work. */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#if defined(HAVE_CURVE25519) || defined(HAVE_ED25519) +#if defined(CURVE25519_SMALL) || defined(ED25519_SMALL) /* use slower code that takes less memory */ + +#include + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +void fprime_copy(byte *x, const byte *a) +{ + int i; + for (i = 0; i < F25519_SIZE; i++) + x[i] = a[i]; +} + + +void lm_copy(byte* x, const byte* a) +{ + int i; + for (i = 0; i < F25519_SIZE; i++) + x[i] = a[i]; +} + +#if ((defined(HAVE_CURVE25519) && !defined(CURVE25519_SMALL)) || \ + (defined(HAVE_ED25519) && !defined(ED25519_SMALL))) && \ + !defined(FREESCALE_LTC_ECC) + /* to be Complementary to fe_low_mem.c */ +#else +void fe_init(void) +{ +} +#endif + +#ifdef CURVE25519_SMALL + +/* Double an X-coordinate */ +static void xc_double(byte *x3, byte *z3, + const byte *x1, const byte *z1) +{ + /* Explicit formulas database: dbl-1987-m + * + * source 1987 Montgomery "Speeding the Pollard and elliptic + * curve methods of factorization", page 261, fourth display + * compute X3 = (X1^2-Z1^2)^2 + * compute Z3 = 4 X1 Z1 (X1^2 + a X1 Z1 + Z1^2) + */ + byte x1sq[F25519_SIZE]; + byte z1sq[F25519_SIZE]; + byte x1z1[F25519_SIZE]; + byte a[F25519_SIZE]; + + fe_mul__distinct(x1sq, x1, x1); + fe_mul__distinct(z1sq, z1, z1); + fe_mul__distinct(x1z1, x1, z1); + + lm_sub(a, x1sq, z1sq); + fe_mul__distinct(x3, a, a); + + fe_mul_c(a, x1z1, 486662); + lm_add(a, x1sq, a); + lm_add(a, z1sq, a); + fe_mul__distinct(x1sq, x1z1, a); + fe_mul_c(z3, x1sq, 4); +} + + +/* Differential addition */ +static void xc_diffadd(byte *x5, byte *z5, + const byte *x1, const byte *z1, + const byte *x2, const byte *z2, + const byte *x3, const byte *z3) +{ + /* Explicit formulas database: dbl-1987-m3 + * + * source 1987 Montgomery "Speeding the Pollard and elliptic curve + * methods of factorization", page 261, fifth display, plus + * common-subexpression elimination + * compute A = X2+Z2 + * compute B = X2-Z2 + * compute C = X3+Z3 + * compute D = X3-Z3 + * compute DA = D A + * compute CB = C B + * compute X5 = Z1(DA+CB)^2 + * compute Z5 = X1(DA-CB)^2 + */ + byte da[F25519_SIZE]; + byte cb[F25519_SIZE]; + byte a[F25519_SIZE]; + byte b[F25519_SIZE]; + + lm_add(a, x2, z2); + lm_sub(b, x3, z3); /* D */ + fe_mul__distinct(da, a, b); + + lm_sub(b, x2, z2); + lm_add(a, x3, z3); /* C */ + fe_mul__distinct(cb, a, b); + + lm_add(a, da, cb); + fe_mul__distinct(b, a, a); + fe_mul__distinct(x5, z1, b); + + lm_sub(a, da, cb); + fe_mul__distinct(b, a, a); + fe_mul__distinct(z5, x1, b); +} + +#ifndef FREESCALE_LTC_ECC +int curve25519(byte *result, byte *e, byte *q) +{ + /* Current point: P_m */ + byte xm[F25519_SIZE]; + byte zm[F25519_SIZE] = {1}; + + /* Predecessor: P_(m-1) */ + byte xm1[F25519_SIZE] = {1}; + byte zm1[F25519_SIZE] = {0}; + + int i; + + /* Note: bit 254 is assumed to be 1 */ + lm_copy(xm, q); + + for (i = 253; i >= 0; i--) { + const int bit = (e[i >> 3] >> (i & 7)) & 1; + byte xms[F25519_SIZE]; + byte zms[F25519_SIZE]; + + /* From P_m and P_(m-1), compute P_(2m) and P_(2m-1) */ + xc_diffadd(xm1, zm1, q, f25519_one, xm, zm, xm1, zm1); + xc_double(xm, zm, xm, zm); + + /* Compute P_(2m+1) */ + xc_diffadd(xms, zms, xm1, zm1, xm, zm, q, f25519_one); + + /* Select: + * bit = 1 --> (P_(2m+1), P_(2m)) + * bit = 0 --> (P_(2m), P_(2m-1)) + */ + fe_select(xm1, xm1, xm, bit); + fe_select(zm1, zm1, zm, bit); + fe_select(xm, xm, xms, bit); + fe_select(zm, zm, zms, bit); + } + + /* Freeze out of projective coordinates */ + fe_inv__distinct(zm1, zm); + fe_mul__distinct(result, zm1, xm); + fe_normalize(result); + return 0; +} +#endif /* !FREESCALE_LTC_ECC */ +#endif /* CURVE25519_SMALL */ + + +static void raw_add(byte *x, const byte *p) +{ + word16 c = 0; + int i; + + for (i = 0; i < F25519_SIZE; i++) { + c += ((word16)x[i]) + ((word16)p[i]); + x[i] = (byte)c; + c >>= 8; + } +} + + +static void raw_try_sub(byte *x, const byte *p) +{ + byte minusp[F25519_SIZE]; + word16 c = 0; + int i; + + for (i = 0; i < F25519_SIZE; i++) { + c = ((word16)x[i]) - ((word16)p[i]) - c; + minusp[i] = (byte)c; + c = (c >> 8) & 1; + } + + fprime_select(x, minusp, x, (byte)c); +} + + +static int prime_msb(const byte *p) +{ + int i; + byte x; + int shift = 1; + int z = F25519_SIZE - 1; + + /* + Test for any hot bits. + As soon as one instance is encountered set shift to 0. + */ + for (i = F25519_SIZE - 1; i >= 0; i--) { + shift &= ((shift ^ ((-p[i] | p[i]) >> 7)) & 1); + z -= shift; + } + x = p[z]; + z <<= 3; + shift = 1; + for (i = 0; i < 8; i++) { + shift &= ((-(x >> i) | (x >> i)) >> (7 - i) & 1); + z += shift; + } + + return z - 1; +} + + +void fprime_select(byte *dst, const byte *zero, const byte *one, byte condition) +{ + const byte mask = -condition; + int i; + + for (i = 0; i < F25519_SIZE; i++) + dst[i] = zero[i] ^ (mask & (one[i] ^ zero[i])); +} + + +void fprime_add(byte *r, const byte *a, const byte *modulus) +{ + raw_add(r, a); + raw_try_sub(r, modulus); +} + + +void fprime_sub(byte *r, const byte *a, const byte *modulus) +{ + raw_add(r, modulus); + raw_try_sub(r, a); + raw_try_sub(r, modulus); +} + + +void fprime_mul(byte *r, const byte *a, const byte *b, + const byte *modulus) +{ + word16 c = 0; + int i,j; + + XMEMSET(r, 0, F25519_SIZE); + + for (i = prime_msb(modulus); i >= 0; i--) { + const byte bit = (b[i >> 3] >> (i & 7)) & 1; + byte plusa[F25519_SIZE]; + + for (j = 0; j < F25519_SIZE; j++) { + c |= ((word16)r[j]) << 1; + r[j] = (byte)c; + c >>= 8; + } + raw_try_sub(r, modulus); + + fprime_copy(plusa, r); + fprime_add(plusa, a, modulus); + + fprime_select(r, r, plusa, bit); + } +} + + +void fe_load(byte *x, word32 c) +{ + word32 i; + + for (i = 0; i < sizeof(c); i++) { + x[i] = c; + c >>= 8; + } + + for (; i < F25519_SIZE; i++) + x[i] = 0; +} + + +void fe_normalize(byte *x) +{ + byte minusp[F25519_SIZE]; + word16 c; + int i; + + /* Reduce using 2^255 = 19 mod p */ + c = (x[31] >> 7) * 19; + x[31] &= 127; + + for (i = 0; i < F25519_SIZE; i++) { + c += x[i]; + x[i] = (byte)c; + c >>= 8; + } + + /* The number is now less than 2^255 + 18, and therefore less than + * 2p. Try subtracting p, and conditionally load the subtracted + * value if underflow did not occur. + */ + c = 19; + + for (i = 0; i + 1 < F25519_SIZE; i++) { + c += x[i]; + minusp[i] = (byte)c; + c >>= 8; + } + + c += ((word16)x[i]) - 128; + minusp[31] = (byte)c; + + /* Load x-p if no underflow */ + fe_select(x, minusp, x, (c >> 15) & 1); +} + + +void fe_select(byte *dst, + const byte *zero, const byte *one, + byte condition) +{ + const byte mask = -condition; + int i; + + for (i = 0; i < F25519_SIZE; i++) + dst[i] = zero[i] ^ (mask & (one[i] ^ zero[i])); +} + + +void lm_add(byte* r, const byte* a, const byte* b) +{ + word16 c = 0; + int i; + + /* Add */ + for (i = 0; i < F25519_SIZE; i++) { + c >>= 8; + c += ((word16)a[i]) + ((word16)b[i]); + r[i] = (byte)c; + } + + /* Reduce with 2^255 = 19 mod p */ + r[31] &= 127; + c = (c >> 7) * 19; + + for (i = 0; i < F25519_SIZE; i++) { + c += r[i]; + r[i] = (byte)c; + c >>= 8; + } +} + + +void lm_sub(byte* r, const byte* a, const byte* b) +{ + word32 c = 0; + int i; + + /* Calculate a + 2p - b, to avoid underflow */ + c = 218; + for (i = 0; i + 1 < F25519_SIZE; i++) { + c += 65280 + ((word32)a[i]) - ((word32)b[i]); + r[i] = c; + c >>= 8; + } + + c += ((word32)a[31]) - ((word32)b[31]); + r[31] = c & 127; + c = (c >> 7) * 19; + + for (i = 0; i < F25519_SIZE; i++) { + c += r[i]; + r[i] = c; + c >>= 8; + } +} + + +void lm_neg(byte* r, const byte* a) +{ + word32 c = 0; + int i; + + /* Calculate 2p - a, to avoid underflow */ + c = 218; + for (i = 0; i + 1 < F25519_SIZE; i++) { + c += 65280 - ((word32)a[i]); + r[i] = c; + c >>= 8; + } + + c -= ((word32)a[31]); + r[31] = c & 127; + c = (c >> 7) * 19; + + for (i = 0; i < F25519_SIZE; i++) { + c += r[i]; + r[i] = c; + c >>= 8; + } +} + + +void fe_mul__distinct(byte *r, const byte *a, const byte *b) +{ + word32 c = 0; + int i; + + for (i = 0; i < F25519_SIZE; i++) { + int j; + + c >>= 8; + for (j = 0; j <= i; j++) + c += ((word32)a[j]) * ((word32)b[i - j]); + + for (; j < F25519_SIZE; j++) + c += ((word32)a[j]) * + ((word32)b[i + F25519_SIZE - j]) * 38; + + r[i] = c; + } + + r[31] &= 127; + c = (c >> 7) * 19; + + for (i = 0; i < F25519_SIZE; i++) { + c += r[i]; + r[i] = c; + c >>= 8; + } +} + + +void lm_mul(byte *r, const byte* a, const byte *b) +{ + byte tmp[F25519_SIZE]; + + fe_mul__distinct(tmp, a, b); + lm_copy(r, tmp); +} + + +void fe_mul_c(byte *r, const byte *a, word32 b) +{ + word32 c = 0; + int i; + + for (i = 0; i < F25519_SIZE; i++) { + c >>= 8; + c += b * ((word32)a[i]); + r[i] = c; + } + + r[31] &= 127; + c >>= 7; + c *= 19; + + for (i = 0; i < F25519_SIZE; i++) { + c += r[i]; + r[i] = c; + c >>= 8; + } +} + + +void fe_inv__distinct(byte *r, const byte *x) +{ + byte s[F25519_SIZE]; + int i; + + /* This is a prime field, so by Fermat's little theorem: + * + * x^(p-1) = 1 mod p + * + * Therefore, raise to (p-2) = 2^255-21 to get a multiplicative + * inverse. + * + * This is a 255-bit binary number with the digits: + * + * 11111111... 01011 + * + * We compute the result by the usual binary chain, but + * alternate between keeping the accumulator in r and s, so as + * to avoid copying temporaries. + */ + + /* 1 1 */ + fe_mul__distinct(s, x, x); + fe_mul__distinct(r, s, x); + + /* 1 x 248 */ + for (i = 0; i < 248; i++) { + fe_mul__distinct(s, r, r); + fe_mul__distinct(r, s, x); + } + + /* 0 */ + fe_mul__distinct(s, r, r); + + /* 1 */ + fe_mul__distinct(r, s, s); + fe_mul__distinct(s, r, x); + + /* 0 */ + fe_mul__distinct(r, s, s); + + /* 1 */ + fe_mul__distinct(s, r, r); + fe_mul__distinct(r, s, x); + + /* 1 */ + fe_mul__distinct(s, r, r); + fe_mul__distinct(r, s, x); +} + + +void lm_invert(byte *r, const byte *x) +{ + byte tmp[F25519_SIZE]; + + fe_inv__distinct(tmp, x); + lm_copy(r, tmp); +} + + +/* Raise x to the power of (p-5)/8 = 2^252-3, using s for temporary + * storage. + */ +static void exp2523(byte *r, const byte *x, byte *s) +{ + int i; + + /* This number is a 252-bit number with the binary expansion: + * + * 111111... 01 + */ + + /* 1 1 */ + fe_mul__distinct(r, x, x); + fe_mul__distinct(s, r, x); + + /* 1 x 248 */ + for (i = 0; i < 248; i++) { + fe_mul__distinct(r, s, s); + fe_mul__distinct(s, r, x); + } + + /* 0 */ + fe_mul__distinct(r, s, s); + + /* 1 */ + fe_mul__distinct(s, r, r); + fe_mul__distinct(r, s, x); +} + + +void fe_sqrt(byte *r, const byte *a) +{ + byte v[F25519_SIZE]; + byte i[F25519_SIZE]; + byte x[F25519_SIZE]; + byte y[F25519_SIZE]; + + /* v = (2a)^((p-5)/8) [x = 2a] */ + fe_mul_c(x, a, 2); + exp2523(v, x, y); + + /* i = 2av^2 - 1 */ + fe_mul__distinct(y, v, v); + fe_mul__distinct(i, x, y); + fe_load(y, 1); + lm_sub(i, i, y); + + /* r = avi */ + fe_mul__distinct(x, v, a); + fe_mul__distinct(r, x, i); +} + +#endif /* CURVE25519_SMALL || ED25519_SMALL */ +#endif /* HAVE_CURVE25519 || HAVE_ED25519 */ diff --git a/client/wolfssl/wolfcrypt/src/fe_operations.c b/client/wolfssl/wolfcrypt/src/fe_operations.c new file mode 100644 index 0000000..1e1c92b --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fe_operations.c @@ -0,0 +1,1438 @@ +/* fe_operations.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + /* Based On Daniel J Bernstein's curve25519 Public Domain ref10 work. */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#if defined(HAVE_CURVE25519) || defined(HAVE_ED25519) +#if !defined(CURVE25519_SMALL) || !defined(ED25519_SMALL) /* run when not defined to use small memory math */ + +#include +#include + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#ifdef CURVED25519_X64 +/* Assembly code in fe_x25519_asm.* */ +#elif defined(WOLFSSL_ARMASM) +/* Assembly code in fe_armv[78]_x25519.* */ +#elif defined(CURVED25519_128BIT) +#include "fe_x25519_128.i" +#else + +#if defined(HAVE_CURVE25519) || \ + (defined(HAVE_ED25519) && !defined(ED25519_SMALL)) +/* +fe means field element. +Here the field is \Z/(2^255-19). +An element t, entries t[0]...t[9], represents the integer +t[0]+2^26 t[1]+2^51 t[2]+2^77 t[3]+2^102 t[4]+...+2^230 t[9]. +Bounds on each t[i] vary depending on context. +*/ + +uint64_t load_3(const unsigned char *in) +{ + uint64_t result; + result = (uint64_t) in[0]; + result |= ((uint64_t) in[1]) << 8; + result |= ((uint64_t) in[2]) << 16; + return result; +} + + +uint64_t load_4(const unsigned char *in) +{ + uint64_t result; + result = (uint64_t) in[0]; + result |= ((uint64_t) in[1]) << 8; + result |= ((uint64_t) in[2]) << 16; + result |= ((uint64_t) in[3]) << 24; + return result; +} +#endif + +/* +h = 1 +*/ + +void fe_1(fe h) +{ + h[0] = 1; + h[1] = 0; + h[2] = 0; + h[3] = 0; + h[4] = 0; + h[5] = 0; + h[6] = 0; + h[7] = 0; + h[8] = 0; + h[9] = 0; +} + + +/* +h = 0 +*/ + +void fe_0(fe h) +{ + h[0] = 0; + h[1] = 0; + h[2] = 0; + h[3] = 0; + h[4] = 0; + h[5] = 0; + h[6] = 0; + h[7] = 0; + h[8] = 0; + h[9] = 0; +} + + +#if ((defined(HAVE_CURVE25519) && !defined(CURVE25519_SMALL)) || \ + (defined(HAVE_ED25519) && !defined(ED25519_SMALL))) && \ + !defined(FREESCALE_LTC_ECC) +/* to be Complementary to fe_low_mem.c */ +void fe_init(void) +{ +} +#endif + +#if defined(HAVE_CURVE25519) && !defined(CURVE25519_SMALL) && \ + !defined(FREESCALE_LTC_ECC) +int curve25519(byte* q, byte* n, byte* p) +{ +#if 0 + unsigned char e[32]; +#endif + fe x1 = {0}; + fe x2 = {0}; + fe z2 = {0}; + fe x3 = {0}; + fe z3 = {0}; + fe tmp0 = {0}; + fe tmp1 = {0}; + int pos = 0; + unsigned int swap = 0; + unsigned int b = 0; + + /* Clamp already done during key generation and import */ +#if 0 + { + unsigned int i; + for (i = 0;i < 32;++i) e[i] = n[i]; + e[0] &= 248; + e[31] &= 127; + e[31] |= 64; + } +#endif + + fe_frombytes(x1,p); + fe_1(x2); + fe_0(z2); + fe_copy(x3,x1); + fe_1(z3); + + swap = 0; + for (pos = 254;pos >= 0;--pos) { +#if 0 + b = e[pos / 8] >> (pos & 7); +#else + b = n[pos / 8] >> (pos & 7); +#endif + b &= 1; + swap ^= b; + fe_cswap(x2,x3,swap); + fe_cswap(z2,z3,swap); + swap = b; + + /* montgomery */ + fe_sub(tmp0,x3,z3); + fe_sub(tmp1,x2,z2); + fe_add(x2,x2,z2); + fe_add(z2,x3,z3); + fe_mul(z3,tmp0,x2); + fe_mul(z2,z2,tmp1); + fe_sq(tmp0,tmp1); + fe_sq(tmp1,x2); + fe_add(x3,z3,z2); + fe_sub(z2,z3,z2); + fe_mul(x2,tmp1,tmp0); + fe_sub(tmp1,tmp1,tmp0); + fe_sq(z2,z2); + fe_mul121666(z3,tmp1); + fe_sq(x3,x3); + fe_add(tmp0,tmp0,z3); + fe_mul(z3,x1,z2); + fe_mul(z2,tmp1,tmp0); + } + fe_cswap(x2,x3,swap); + fe_cswap(z2,z3,swap); + + fe_invert(z2,z2); + fe_mul(x2,x2,z2); + fe_tobytes(q,x2); + + return 0; +} +#endif /* HAVE_CURVE25519 && !CURVE25519_SMALL && !FREESCALE_LTC_ECC */ + + +/* +h = f * f +Can overlap h with f. + +Preconditions: + |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. + +Postconditions: + |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. +*/ + +/* +See fe_mul.c for discussion of implementation strategy. +*/ + +void fe_sq(fe h,const fe f) +{ + int32_t f0 = f[0]; + int32_t f1 = f[1]; + int32_t f2 = f[2]; + int32_t f3 = f[3]; + int32_t f4 = f[4]; + int32_t f5 = f[5]; + int32_t f6 = f[6]; + int32_t f7 = f[7]; + int32_t f8 = f[8]; + int32_t f9 = f[9]; + int32_t f0_2 = 2 * f0; + int32_t f1_2 = 2 * f1; + int32_t f2_2 = 2 * f2; + int32_t f3_2 = 2 * f3; + int32_t f4_2 = 2 * f4; + int32_t f5_2 = 2 * f5; + int32_t f6_2 = 2 * f6; + int32_t f7_2 = 2 * f7; + int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */ + int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */ + int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */ + int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */ + int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */ + int64_t f0f0 = f0 * (int64_t) f0; + int64_t f0f1_2 = f0_2 * (int64_t) f1; + int64_t f0f2_2 = f0_2 * (int64_t) f2; + int64_t f0f3_2 = f0_2 * (int64_t) f3; + int64_t f0f4_2 = f0_2 * (int64_t) f4; + int64_t f0f5_2 = f0_2 * (int64_t) f5; + int64_t f0f6_2 = f0_2 * (int64_t) f6; + int64_t f0f7_2 = f0_2 * (int64_t) f7; + int64_t f0f8_2 = f0_2 * (int64_t) f8; + int64_t f0f9_2 = f0_2 * (int64_t) f9; + int64_t f1f1_2 = f1_2 * (int64_t) f1; + int64_t f1f2_2 = f1_2 * (int64_t) f2; + int64_t f1f3_4 = f1_2 * (int64_t) f3_2; + int64_t f1f4_2 = f1_2 * (int64_t) f4; + int64_t f1f5_4 = f1_2 * (int64_t) f5_2; + int64_t f1f6_2 = f1_2 * (int64_t) f6; + int64_t f1f7_4 = f1_2 * (int64_t) f7_2; + int64_t f1f8_2 = f1_2 * (int64_t) f8; + int64_t f1f9_76 = f1_2 * (int64_t) f9_38; + int64_t f2f2 = f2 * (int64_t) f2; + int64_t f2f3_2 = f2_2 * (int64_t) f3; + int64_t f2f4_2 = f2_2 * (int64_t) f4; + int64_t f2f5_2 = f2_2 * (int64_t) f5; + int64_t f2f6_2 = f2_2 * (int64_t) f6; + int64_t f2f7_2 = f2_2 * (int64_t) f7; + int64_t f2f8_38 = f2_2 * (int64_t) f8_19; + int64_t f2f9_38 = f2 * (int64_t) f9_38; + int64_t f3f3_2 = f3_2 * (int64_t) f3; + int64_t f3f4_2 = f3_2 * (int64_t) f4; + int64_t f3f5_4 = f3_2 * (int64_t) f5_2; + int64_t f3f6_2 = f3_2 * (int64_t) f6; + int64_t f3f7_76 = f3_2 * (int64_t) f7_38; + int64_t f3f8_38 = f3_2 * (int64_t) f8_19; + int64_t f3f9_76 = f3_2 * (int64_t) f9_38; + int64_t f4f4 = f4 * (int64_t) f4; + int64_t f4f5_2 = f4_2 * (int64_t) f5; + int64_t f4f6_38 = f4_2 * (int64_t) f6_19; + int64_t f4f7_38 = f4 * (int64_t) f7_38; + int64_t f4f8_38 = f4_2 * (int64_t) f8_19; + int64_t f4f9_38 = f4 * (int64_t) f9_38; + int64_t f5f5_38 = f5 * (int64_t) f5_38; + int64_t f5f6_38 = f5_2 * (int64_t) f6_19; + int64_t f5f7_76 = f5_2 * (int64_t) f7_38; + int64_t f5f8_38 = f5_2 * (int64_t) f8_19; + int64_t f5f9_76 = f5_2 * (int64_t) f9_38; + int64_t f6f6_19 = f6 * (int64_t) f6_19; + int64_t f6f7_38 = f6 * (int64_t) f7_38; + int64_t f6f8_38 = f6_2 * (int64_t) f8_19; + int64_t f6f9_38 = f6 * (int64_t) f9_38; + int64_t f7f7_38 = f7 * (int64_t) f7_38; + int64_t f7f8_38 = f7_2 * (int64_t) f8_19; + int64_t f7f9_76 = f7_2 * (int64_t) f9_38; + int64_t f8f8_19 = f8 * (int64_t) f8_19; + int64_t f8f9_38 = f8 * (int64_t) f9_38; + int64_t f9f9_38 = f9 * (int64_t) f9_38; + int64_t h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38; + int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38; + int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19; + int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38; + int64_t h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38; + int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38; + int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19; + int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38; + int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38; + int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2; + int64_t carry0; + int64_t carry1; + int64_t carry2; + int64_t carry3; + int64_t carry4; + int64_t carry5; + int64_t carry6; + int64_t carry7; + int64_t carry8; + int64_t carry9; + + carry0 = (h0 + (int64_t) (1UL<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; + carry4 = (h4 + (int64_t) (1UL<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; + + carry1 = (h1 + (int64_t) (1UL<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; + carry5 = (h5 + (int64_t) (1UL<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; + + carry2 = (h2 + (int64_t) (1UL<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; + carry6 = (h6 + (int64_t) (1UL<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; + + carry3 = (h3 + (int64_t) (1UL<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; + carry7 = (h7 + (int64_t) (1UL<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; + + carry4 = (h4 + (int64_t) (1UL<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; + carry8 = (h8 + (int64_t) (1UL<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; + + carry9 = (h9 + (int64_t) (1UL<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; + + carry0 = (h0 + (int64_t) (1UL<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; + + h[0] = (int32_t)h0; + h[1] = (int32_t)h1; + h[2] = (int32_t)h2; + h[3] = (int32_t)h3; + h[4] = (int32_t)h4; + h[5] = (int32_t)h5; + h[6] = (int32_t)h6; + h[7] = (int32_t)h7; + h[8] = (int32_t)h8; + h[9] = (int32_t)h9; +} + + +/* +h = f + g +Can overlap h with f or g. + +Preconditions: + |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. + |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. + +Postconditions: + |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. +*/ + +void fe_add(fe h,const fe f,const fe g) +{ + int32_t f0 = f[0]; + int32_t f1 = f[1]; + int32_t f2 = f[2]; + int32_t f3 = f[3]; + int32_t f4 = f[4]; + int32_t f5 = f[5]; + int32_t f6 = f[6]; + int32_t f7 = f[7]; + int32_t f8 = f[8]; + int32_t f9 = f[9]; + int32_t g0 = g[0]; + int32_t g1 = g[1]; + int32_t g2 = g[2]; + int32_t g3 = g[3]; + int32_t g4 = g[4]; + int32_t g5 = g[5]; + int32_t g6 = g[6]; + int32_t g7 = g[7]; + int32_t g8 = g[8]; + int32_t g9 = g[9]; + int32_t h0 = f0 + g0; + int32_t h1 = f1 + g1; + int32_t h2 = f2 + g2; + int32_t h3 = f3 + g3; + int32_t h4 = f4 + g4; + int32_t h5 = f5 + g5; + int32_t h6 = f6 + g6; + int32_t h7 = f7 + g7; + int32_t h8 = f8 + g8; + int32_t h9 = f9 + g9; + h[0] = h0; + h[1] = h1; + h[2] = h2; + h[3] = h3; + h[4] = h4; + h[5] = h5; + h[6] = h6; + h[7] = h7; + h[8] = h8; + h[9] = h9; +} + + +/* +Preconditions: + |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. + +Write p=2^255-19; q=floor(h/p). +Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))). + +Proof: + Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4. + Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4. + + Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9). + Then 0> 25; + q = (h0 + q) >> 26; + q = (h1 + q) >> 25; + q = (h2 + q) >> 26; + q = (h3 + q) >> 25; + q = (h4 + q) >> 26; + q = (h5 + q) >> 25; + q = (h6 + q) >> 26; + q = (h7 + q) >> 25; + q = (h8 + q) >> 26; + q = (h9 + q) >> 25; + + /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */ + h0 += 19 * q; + /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */ + + carry0 = h0 >> 26; h1 += carry0; h0 -= carry0 << 26; + carry1 = h1 >> 25; h2 += carry1; h1 -= carry1 << 25; + carry2 = h2 >> 26; h3 += carry2; h2 -= carry2 << 26; + carry3 = h3 >> 25; h4 += carry3; h3 -= carry3 << 25; + carry4 = h4 >> 26; h5 += carry4; h4 -= carry4 << 26; + carry5 = h5 >> 25; h6 += carry5; h5 -= carry5 << 25; + carry6 = h6 >> 26; h7 += carry6; h6 -= carry6 << 26; + carry7 = h7 >> 25; h8 += carry7; h7 -= carry7 << 25; + carry8 = h8 >> 26; h9 += carry8; h8 -= carry8 << 26; + carry9 = h9 >> 25; h9 -= carry9 << 25; + /* h10 = carry9 */ + + /* + Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20. + Have h0+...+2^230 h9 between 0 and 2^255-1; + evidently 2^255 h10-2^255 q = 0. + Goal: Output h0+...+2^230 h9. + */ + + s[0] = (byte)(h0 >> 0); + s[1] = (byte)(h0 >> 8); + s[2] = (byte)(h0 >> 16); + s[3] = (byte)((h0 >> 24) | (h1 << 2)); + s[4] = (byte)(h1 >> 6); + s[5] = (byte)(h1 >> 14); + s[6] = (byte)((h1 >> 22) | (h2 << 3)); + s[7] = (byte)(h2 >> 5); + s[8] = (byte)(h2 >> 13); + s[9] = (byte)((h2 >> 21) | (h3 << 5)); + s[10] = (byte)(h3 >> 3); + s[11] = (byte)(h3 >> 11); + s[12] = (byte)((h3 >> 19) | (h4 << 6)); + s[13] = (byte)(h4 >> 2); + s[14] = (byte)(h4 >> 10); + s[15] = (byte)(h4 >> 18); + s[16] = (byte)(h5 >> 0); + s[17] = (byte)(h5 >> 8); + s[18] = (byte)(h5 >> 16); + s[19] = (byte)((h5 >> 24) | (h6 << 1)); + s[20] = (byte)(h6 >> 7); + s[21] = (byte)(h6 >> 15); + s[22] = (byte)((h6 >> 23) | (h7 << 3)); + s[23] = (byte)(h7 >> 5); + s[24] = (byte)(h7 >> 13); + s[25] = (byte)((h7 >> 21) | (h8 << 4)); + s[26] = (byte)(h8 >> 4); + s[27] = (byte)(h8 >> 12); + s[28] = (byte)((h8 >> 20) | (h9 << 6)); + s[29] = (byte)(h9 >> 2); + s[30] = (byte)(h9 >> 10); + s[31] = (byte)(h9 >> 18); +} + + +/* +h = f - g +Can overlap h with f or g. + +Preconditions: + |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. + |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. + +Postconditions: + |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. +*/ + +void fe_sub(fe h,const fe f,const fe g) +{ + int32_t f0 = f[0]; + int32_t f1 = f[1]; + int32_t f2 = f[2]; + int32_t f3 = f[3]; + int32_t f4 = f[4]; + int32_t f5 = f[5]; + int32_t f6 = f[6]; + int32_t f7 = f[7]; + int32_t f8 = f[8]; + int32_t f9 = f[9]; + int32_t g0 = g[0]; + int32_t g1 = g[1]; + int32_t g2 = g[2]; + int32_t g3 = g[3]; + int32_t g4 = g[4]; + int32_t g5 = g[5]; + int32_t g6 = g[6]; + int32_t g7 = g[7]; + int32_t g8 = g[8]; + int32_t g9 = g[9]; + int32_t h0 = f0 - g0; + int32_t h1 = f1 - g1; + int32_t h2 = f2 - g2; + int32_t h3 = f3 - g3; + int32_t h4 = f4 - g4; + int32_t h5 = f5 - g5; + int32_t h6 = f6 - g6; + int32_t h7 = f7 - g7; + int32_t h8 = f8 - g8; + int32_t h9 = f9 - g9; + h[0] = h0; + h[1] = h1; + h[2] = h2; + h[3] = h3; + h[4] = h4; + h[5] = h5; + h[6] = h6; + h[7] = h7; + h[8] = h8; + h[9] = h9; +} + + +#if defined(HAVE_CURVE25519) || \ + (defined(HAVE_ED25519) && !defined(ED25519_SMALL)) +/* +Ignores top bit of h. +*/ + +void fe_frombytes(fe h,const unsigned char *s) +{ + int64_t h0 = load_4(s); + int64_t h1 = load_3(s + 4) << 6; + int64_t h2 = load_3(s + 7) << 5; + int64_t h3 = load_3(s + 10) << 3; + int64_t h4 = load_3(s + 13) << 2; + int64_t h5 = load_4(s + 16); + int64_t h6 = load_3(s + 20) << 7; + int64_t h7 = load_3(s + 23) << 5; + int64_t h8 = load_3(s + 26) << 4; + int64_t h9 = (load_3(s + 29) & 8388607) << 2; + int64_t carry0; + int64_t carry1; + int64_t carry2; + int64_t carry3; + int64_t carry4; + int64_t carry5; + int64_t carry6; + int64_t carry7; + int64_t carry8; + int64_t carry9; + + carry9 = (h9 + (int64_t) (1UL<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; + carry1 = (h1 + (int64_t) (1UL<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; + carry3 = (h3 + (int64_t) (1UL<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; + carry5 = (h5 + (int64_t) (1UL<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; + carry7 = (h7 + (int64_t) (1UL<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; + + carry0 = (h0 + (int64_t) (1UL<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; + carry2 = (h2 + (int64_t) (1UL<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; + carry4 = (h4 + (int64_t) (1UL<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; + carry6 = (h6 + (int64_t) (1UL<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; + carry8 = (h8 + (int64_t) (1UL<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; + + h[0] = (int32_t)h0; + h[1] = (int32_t)h1; + h[2] = (int32_t)h2; + h[3] = (int32_t)h3; + h[4] = (int32_t)h4; + h[5] = (int32_t)h5; + h[6] = (int32_t)h6; + h[7] = (int32_t)h7; + h[8] = (int32_t)h8; + h[9] = (int32_t)h9; +} +#endif + + +void fe_invert(fe out,const fe z) +{ + fe t0 = {0}; + fe t1 = {0}; + fe t2 = {0}; + fe t3 = {0}; + int i = 0; + + /* pow225521 */ + fe_sq(t0,z); for (i = 1;i < 1;++i) fe_sq(t0,t0); + fe_sq(t1,t0); for (i = 1;i < 2;++i) fe_sq(t1,t1); + fe_mul(t1,z,t1); + fe_mul(t0,t0,t1); + fe_sq(t2,t0); for (i = 1;i < 1;++i) fe_sq(t2,t2); + fe_mul(t1,t1,t2); + fe_sq(t2,t1); for (i = 1;i < 5;++i) fe_sq(t2,t2); + fe_mul(t1,t2,t1); + fe_sq(t2,t1); for (i = 1;i < 10;++i) fe_sq(t2,t2); + fe_mul(t2,t2,t1); + fe_sq(t3,t2); for (i = 1;i < 20;++i) fe_sq(t3,t3); + fe_mul(t2,t3,t2); + fe_sq(t2,t2); for (i = 1;i < 10;++i) fe_sq(t2,t2); + fe_mul(t1,t2,t1); + fe_sq(t2,t1); for (i = 1;i < 50;++i) fe_sq(t2,t2); + fe_mul(t2,t2,t1); + fe_sq(t3,t2); for (i = 1;i < 100;++i) fe_sq(t3,t3); + fe_mul(t2,t3,t2); + fe_sq(t2,t2); for (i = 1;i < 50;++i) fe_sq(t2,t2); + fe_mul(t1,t2,t1); + fe_sq(t1,t1); for (i = 1;i < 5;++i) fe_sq(t1,t1); + fe_mul(out,t1,t0); + + return; +} + + +/* +h = f +*/ + +void fe_copy(fe h,const fe f) +{ + int32_t f0 = f[0]; + int32_t f1 = f[1]; + int32_t f2 = f[2]; + int32_t f3 = f[3]; + int32_t f4 = f[4]; + int32_t f5 = f[5]; + int32_t f6 = f[6]; + int32_t f7 = f[7]; + int32_t f8 = f[8]; + int32_t f9 = f[9]; + h[0] = f0; + h[1] = f1; + h[2] = f2; + h[3] = f3; + h[4] = f4; + h[5] = f5; + h[6] = f6; + h[7] = f7; + h[8] = f8; + h[9] = f9; +} + + +/* +h = f * g +Can overlap h with f or g. + +Preconditions: + |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. + |g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. + +Postconditions: + |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. +*/ + +/* +Notes on implementation strategy: + +Using schoolbook multiplication. +Karatsuba would save a little in some cost models. + +Most multiplications by 2 and 19 are 32-bit precomputations; +cheaper than 64-bit postcomputations. + +There is one remaining multiplication by 19 in the carry chain; +one *19 precomputation can be merged into this, +but the resulting data flow is considerably less clean. + +There are 12 carries below. +10 of them are 2-way parallelizable and vectorizable. +Can get away with 11 carries, but then data flow is much deeper. + +With tighter constraints on inputs can squeeze carries into int32. +*/ + +void fe_mul(fe h,const fe f,const fe g) +{ + int32_t f0 = f[0]; + int32_t f1 = f[1]; + int32_t f2 = f[2]; + int32_t f3 = f[3]; + int32_t f4 = f[4]; + int32_t f5 = f[5]; + int32_t f6 = f[6]; + int32_t f7 = f[7]; + int32_t f8 = f[8]; + int32_t f9 = f[9]; + int32_t g0 = g[0]; + int32_t g1 = g[1]; + int32_t g2 = g[2]; + int32_t g3 = g[3]; + int32_t g4 = g[4]; + int32_t g5 = g[5]; + int32_t g6 = g[6]; + int32_t g7 = g[7]; + int32_t g8 = g[8]; + int32_t g9 = g[9]; + int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */ + int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */ + int32_t g3_19 = 19 * g3; + int32_t g4_19 = 19 * g4; + int32_t g5_19 = 19 * g5; + int32_t g6_19 = 19 * g6; + int32_t g7_19 = 19 * g7; + int32_t g8_19 = 19 * g8; + int32_t g9_19 = 19 * g9; + int32_t f1_2 = 2 * f1; + int32_t f3_2 = 2 * f3; + int32_t f5_2 = 2 * f5; + int32_t f7_2 = 2 * f7; + int32_t f9_2 = 2 * f9; + int64_t f0g0 = f0 * (int64_t) g0; + int64_t f0g1 = f0 * (int64_t) g1; + int64_t f0g2 = f0 * (int64_t) g2; + int64_t f0g3 = f0 * (int64_t) g3; + int64_t f0g4 = f0 * (int64_t) g4; + int64_t f0g5 = f0 * (int64_t) g5; + int64_t f0g6 = f0 * (int64_t) g6; + int64_t f0g7 = f0 * (int64_t) g7; + int64_t f0g8 = f0 * (int64_t) g8; + int64_t f0g9 = f0 * (int64_t) g9; + int64_t f1g0 = f1 * (int64_t) g0; + int64_t f1g1_2 = f1_2 * (int64_t) g1; + int64_t f1g2 = f1 * (int64_t) g2; + int64_t f1g3_2 = f1_2 * (int64_t) g3; + int64_t f1g4 = f1 * (int64_t) g4; + int64_t f1g5_2 = f1_2 * (int64_t) g5; + int64_t f1g6 = f1 * (int64_t) g6; + int64_t f1g7_2 = f1_2 * (int64_t) g7; + int64_t f1g8 = f1 * (int64_t) g8; + int64_t f1g9_38 = f1_2 * (int64_t) g9_19; + int64_t f2g0 = f2 * (int64_t) g0; + int64_t f2g1 = f2 * (int64_t) g1; + int64_t f2g2 = f2 * (int64_t) g2; + int64_t f2g3 = f2 * (int64_t) g3; + int64_t f2g4 = f2 * (int64_t) g4; + int64_t f2g5 = f2 * (int64_t) g5; + int64_t f2g6 = f2 * (int64_t) g6; + int64_t f2g7 = f2 * (int64_t) g7; + int64_t f2g8_19 = f2 * (int64_t) g8_19; + int64_t f2g9_19 = f2 * (int64_t) g9_19; + int64_t f3g0 = f3 * (int64_t) g0; + int64_t f3g1_2 = f3_2 * (int64_t) g1; + int64_t f3g2 = f3 * (int64_t) g2; + int64_t f3g3_2 = f3_2 * (int64_t) g3; + int64_t f3g4 = f3 * (int64_t) g4; + int64_t f3g5_2 = f3_2 * (int64_t) g5; + int64_t f3g6 = f3 * (int64_t) g6; + int64_t f3g7_38 = f3_2 * (int64_t) g7_19; + int64_t f3g8_19 = f3 * (int64_t) g8_19; + int64_t f3g9_38 = f3_2 * (int64_t) g9_19; + int64_t f4g0 = f4 * (int64_t) g0; + int64_t f4g1 = f4 * (int64_t) g1; + int64_t f4g2 = f4 * (int64_t) g2; + int64_t f4g3 = f4 * (int64_t) g3; + int64_t f4g4 = f4 * (int64_t) g4; + int64_t f4g5 = f4 * (int64_t) g5; + int64_t f4g6_19 = f4 * (int64_t) g6_19; + int64_t f4g7_19 = f4 * (int64_t) g7_19; + int64_t f4g8_19 = f4 * (int64_t) g8_19; + int64_t f4g9_19 = f4 * (int64_t) g9_19; + int64_t f5g0 = f5 * (int64_t) g0; + int64_t f5g1_2 = f5_2 * (int64_t) g1; + int64_t f5g2 = f5 * (int64_t) g2; + int64_t f5g3_2 = f5_2 * (int64_t) g3; + int64_t f5g4 = f5 * (int64_t) g4; + int64_t f5g5_38 = f5_2 * (int64_t) g5_19; + int64_t f5g6_19 = f5 * (int64_t) g6_19; + int64_t f5g7_38 = f5_2 * (int64_t) g7_19; + int64_t f5g8_19 = f5 * (int64_t) g8_19; + int64_t f5g9_38 = f5_2 * (int64_t) g9_19; + int64_t f6g0 = f6 * (int64_t) g0; + int64_t f6g1 = f6 * (int64_t) g1; + int64_t f6g2 = f6 * (int64_t) g2; + int64_t f6g3 = f6 * (int64_t) g3; + int64_t f6g4_19 = f6 * (int64_t) g4_19; + int64_t f6g5_19 = f6 * (int64_t) g5_19; + int64_t f6g6_19 = f6 * (int64_t) g6_19; + int64_t f6g7_19 = f6 * (int64_t) g7_19; + int64_t f6g8_19 = f6 * (int64_t) g8_19; + int64_t f6g9_19 = f6 * (int64_t) g9_19; + int64_t f7g0 = f7 * (int64_t) g0; + int64_t f7g1_2 = f7_2 * (int64_t) g1; + int64_t f7g2 = f7 * (int64_t) g2; + int64_t f7g3_38 = f7_2 * (int64_t) g3_19; + int64_t f7g4_19 = f7 * (int64_t) g4_19; + int64_t f7g5_38 = f7_2 * (int64_t) g5_19; + int64_t f7g6_19 = f7 * (int64_t) g6_19; + int64_t f7g7_38 = f7_2 * (int64_t) g7_19; + int64_t f7g8_19 = f7 * (int64_t) g8_19; + int64_t f7g9_38 = f7_2 * (int64_t) g9_19; + int64_t f8g0 = f8 * (int64_t) g0; + int64_t f8g1 = f8 * (int64_t) g1; + int64_t f8g2_19 = f8 * (int64_t) g2_19; + int64_t f8g3_19 = f8 * (int64_t) g3_19; + int64_t f8g4_19 = f8 * (int64_t) g4_19; + int64_t f8g5_19 = f8 * (int64_t) g5_19; + int64_t f8g6_19 = f8 * (int64_t) g6_19; + int64_t f8g7_19 = f8 * (int64_t) g7_19; + int64_t f8g8_19 = f8 * (int64_t) g8_19; + int64_t f8g9_19 = f8 * (int64_t) g9_19; + int64_t f9g0 = f9 * (int64_t) g0; + int64_t f9g1_38 = f9_2 * (int64_t) g1_19; + int64_t f9g2_19 = f9 * (int64_t) g2_19; + int64_t f9g3_38 = f9_2 * (int64_t) g3_19; + int64_t f9g4_19 = f9 * (int64_t) g4_19; + int64_t f9g5_38 = f9_2 * (int64_t) g5_19; + int64_t f9g6_19 = f9 * (int64_t) g6_19; + int64_t f9g7_38 = f9_2 * (int64_t) g7_19; + int64_t f9g8_19 = f9 * (int64_t) g8_19; + int64_t f9g9_38 = f9_2 * (int64_t) g9_19; + int64_t h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38; + int64_t h1 = f0g1+f1g0 +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19; + int64_t h2 = f0g2+f1g1_2 +f2g0 +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38; + int64_t h3 = f0g3+f1g2 +f2g1 +f3g0 +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19; + int64_t h4 = f0g4+f1g3_2 +f2g2 +f3g1_2 +f4g0 +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38; + int64_t h5 = f0g5+f1g4 +f2g3 +f3g2 +f4g1 +f5g0 +f6g9_19+f7g8_19+f8g7_19+f9g6_19; + int64_t h6 = f0g6+f1g5_2 +f2g4 +f3g3_2 +f4g2 +f5g1_2 +f6g0 +f7g9_38+f8g8_19+f9g7_38; + int64_t h7 = f0g7+f1g6 +f2g5 +f3g4 +f4g3 +f5g2 +f6g1 +f7g0 +f8g9_19+f9g8_19; + int64_t h8 = f0g8+f1g7_2 +f2g6 +f3g5_2 +f4g4 +f5g3_2 +f6g2 +f7g1_2 +f8g0 +f9g9_38; + int64_t h9 = f0g9+f1g8 +f2g7 +f3g6 +f4g5 +f5g4 +f6g3 +f7g2 +f8g1 +f9g0 ; + int64_t carry0; + int64_t carry1; + int64_t carry2; + int64_t carry3; + int64_t carry4; + int64_t carry5; + int64_t carry6; + int64_t carry7; + int64_t carry8; + int64_t carry9; + + /* + |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38)) + i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8 + |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19)) + i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9 + */ + + carry0 = (h0 + (int64_t) (1UL<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; + carry4 = (h4 + (int64_t) (1UL<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; + /* |h0| <= 2^25 */ + /* |h4| <= 2^25 */ + /* |h1| <= 1.71*2^59 */ + /* |h5| <= 1.71*2^59 */ + + carry1 = (h1 + (int64_t) (1UL<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; + carry5 = (h5 + (int64_t) (1UL<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; + /* |h1| <= 2^24; from now on fits into int32 */ + /* |h5| <= 2^24; from now on fits into int32 */ + /* |h2| <= 1.41*2^60 */ + /* |h6| <= 1.41*2^60 */ + + carry2 = (h2 + (int64_t) (1UL<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; + carry6 = (h6 + (int64_t) (1UL<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; + /* |h2| <= 2^25; from now on fits into int32 unchanged */ + /* |h6| <= 2^25; from now on fits into int32 unchanged */ + /* |h3| <= 1.71*2^59 */ + /* |h7| <= 1.71*2^59 */ + + carry3 = (h3 + (int64_t) (1UL<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; + carry7 = (h7 + (int64_t) (1UL<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; + /* |h3| <= 2^24; from now on fits into int32 unchanged */ + /* |h7| <= 2^24; from now on fits into int32 unchanged */ + /* |h4| <= 1.72*2^34 */ + /* |h8| <= 1.41*2^60 */ + + carry4 = (h4 + (int64_t) (1UL<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; + carry8 = (h8 + (int64_t) (1UL<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; + /* |h4| <= 2^25; from now on fits into int32 unchanged */ + /* |h8| <= 2^25; from now on fits into int32 unchanged */ + /* |h5| <= 1.01*2^24 */ + /* |h9| <= 1.71*2^59 */ + + carry9 = (h9 + (int64_t) (1UL<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; + /* |h9| <= 2^24; from now on fits into int32 unchanged */ + /* |h0| <= 1.1*2^39 */ + + carry0 = (h0 + (int64_t) (1UL<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; + /* |h0| <= 2^25; from now on fits into int32 unchanged */ + /* |h1| <= 1.01*2^24 */ + + h[0] = (int32_t)h0; + h[1] = (int32_t)h1; + h[2] = (int32_t)h2; + h[3] = (int32_t)h3; + h[4] = (int32_t)h4; + h[5] = (int32_t)h5; + h[6] = (int32_t)h6; + h[7] = (int32_t)h7; + h[8] = (int32_t)h8; + h[9] = (int32_t)h9; +} + + +/* +Replace (f,g) with (g,f) if b == 1; +replace (f,g) with (f,g) if b == 0. + +Preconditions: b in {0,1}. +*/ + +void fe_cswap(fe f, fe g, int b) +{ + int32_t f0 = f[0]; + int32_t f1 = f[1]; + int32_t f2 = f[2]; + int32_t f3 = f[3]; + int32_t f4 = f[4]; + int32_t f5 = f[5]; + int32_t f6 = f[6]; + int32_t f7 = f[7]; + int32_t f8 = f[8]; + int32_t f9 = f[9]; + int32_t g0 = g[0]; + int32_t g1 = g[1]; + int32_t g2 = g[2]; + int32_t g3 = g[3]; + int32_t g4 = g[4]; + int32_t g5 = g[5]; + int32_t g6 = g[6]; + int32_t g7 = g[7]; + int32_t g8 = g[8]; + int32_t g9 = g[9]; + int32_t x0 = f0 ^ g0; + int32_t x1 = f1 ^ g1; + int32_t x2 = f2 ^ g2; + int32_t x3 = f3 ^ g3; + int32_t x4 = f4 ^ g4; + int32_t x5 = f5 ^ g5; + int32_t x6 = f6 ^ g6; + int32_t x7 = f7 ^ g7; + int32_t x8 = f8 ^ g8; + int32_t x9 = f9 ^ g9; + b = -b; + x0 &= b; + x1 &= b; + x2 &= b; + x3 &= b; + x4 &= b; + x5 &= b; + x6 &= b; + x7 &= b; + x8 &= b; + x9 &= b; + f[0] = f0 ^ x0; + f[1] = f1 ^ x1; + f[2] = f2 ^ x2; + f[3] = f3 ^ x3; + f[4] = f4 ^ x4; + f[5] = f5 ^ x5; + f[6] = f6 ^ x6; + f[7] = f7 ^ x7; + f[8] = f8 ^ x8; + f[9] = f9 ^ x9; + g[0] = g0 ^ x0; + g[1] = g1 ^ x1; + g[2] = g2 ^ x2; + g[3] = g3 ^ x3; + g[4] = g4 ^ x4; + g[5] = g5 ^ x5; + g[6] = g6 ^ x6; + g[7] = g7 ^ x7; + g[8] = g8 ^ x8; + g[9] = g9 ^ x9; +} + + +/* +h = f * 121666 +Can overlap h with f. + +Preconditions: + |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. + +Postconditions: + |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. +*/ + +void fe_mul121666(fe h,fe f) +{ + int32_t f0 = f[0]; + int32_t f1 = f[1]; + int32_t f2 = f[2]; + int32_t f3 = f[3]; + int32_t f4 = f[4]; + int32_t f5 = f[5]; + int32_t f6 = f[6]; + int32_t f7 = f[7]; + int32_t f8 = f[8]; + int32_t f9 = f[9]; + int64_t h0 = f0 * (int64_t) 121666; + int64_t h1 = f1 * (int64_t) 121666; + int64_t h2 = f2 * (int64_t) 121666; + int64_t h3 = f3 * (int64_t) 121666; + int64_t h4 = f4 * (int64_t) 121666; + int64_t h5 = f5 * (int64_t) 121666; + int64_t h6 = f6 * (int64_t) 121666; + int64_t h7 = f7 * (int64_t) 121666; + int64_t h8 = f8 * (int64_t) 121666; + int64_t h9 = f9 * (int64_t) 121666; + int64_t carry0; + int64_t carry1; + int64_t carry2; + int64_t carry3; + int64_t carry4; + int64_t carry5; + int64_t carry6; + int64_t carry7; + int64_t carry8; + int64_t carry9; + + carry9 = (h9 + (int64_t) (1UL<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; + carry1 = (h1 + (int64_t) (1UL<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; + carry3 = (h3 + (int64_t) (1UL<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; + carry5 = (h5 + (int64_t) (1UL<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; + carry7 = (h7 + (int64_t) (1UL<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; + + carry0 = (h0 + (int64_t) (1UL<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; + carry2 = (h2 + (int64_t) (1UL<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; + carry4 = (h4 + (int64_t) (1UL<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; + carry6 = (h6 + (int64_t) (1UL<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; + carry8 = (h8 + (int64_t) (1UL<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; + + h[0] = (int32_t)h0; + h[1] = (int32_t)h1; + h[2] = (int32_t)h2; + h[3] = (int32_t)h3; + h[4] = (int32_t)h4; + h[5] = (int32_t)h5; + h[6] = (int32_t)h6; + h[7] = (int32_t)h7; + h[8] = (int32_t)h8; + h[9] = (int32_t)h9; +} + + +/* +h = 2 * f * f +Can overlap h with f. + +Preconditions: + |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. + +Postconditions: + |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. +*/ + +/* +See fe_mul.c for discussion of implementation strategy. +*/ + +void fe_sq2(fe h,const fe f) +{ + int32_t f0 = f[0]; + int32_t f1 = f[1]; + int32_t f2 = f[2]; + int32_t f3 = f[3]; + int32_t f4 = f[4]; + int32_t f5 = f[5]; + int32_t f6 = f[6]; + int32_t f7 = f[7]; + int32_t f8 = f[8]; + int32_t f9 = f[9]; + int32_t f0_2 = 2 * f0; + int32_t f1_2 = 2 * f1; + int32_t f2_2 = 2 * f2; + int32_t f3_2 = 2 * f3; + int32_t f4_2 = 2 * f4; + int32_t f5_2 = 2 * f5; + int32_t f6_2 = 2 * f6; + int32_t f7_2 = 2 * f7; + int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */ + int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */ + int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */ + int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */ + int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */ + int64_t f0f0 = f0 * (int64_t) f0; + int64_t f0f1_2 = f0_2 * (int64_t) f1; + int64_t f0f2_2 = f0_2 * (int64_t) f2; + int64_t f0f3_2 = f0_2 * (int64_t) f3; + int64_t f0f4_2 = f0_2 * (int64_t) f4; + int64_t f0f5_2 = f0_2 * (int64_t) f5; + int64_t f0f6_2 = f0_2 * (int64_t) f6; + int64_t f0f7_2 = f0_2 * (int64_t) f7; + int64_t f0f8_2 = f0_2 * (int64_t) f8; + int64_t f0f9_2 = f0_2 * (int64_t) f9; + int64_t f1f1_2 = f1_2 * (int64_t) f1; + int64_t f1f2_2 = f1_2 * (int64_t) f2; + int64_t f1f3_4 = f1_2 * (int64_t) f3_2; + int64_t f1f4_2 = f1_2 * (int64_t) f4; + int64_t f1f5_4 = f1_2 * (int64_t) f5_2; + int64_t f1f6_2 = f1_2 * (int64_t) f6; + int64_t f1f7_4 = f1_2 * (int64_t) f7_2; + int64_t f1f8_2 = f1_2 * (int64_t) f8; + int64_t f1f9_76 = f1_2 * (int64_t) f9_38; + int64_t f2f2 = f2 * (int64_t) f2; + int64_t f2f3_2 = f2_2 * (int64_t) f3; + int64_t f2f4_2 = f2_2 * (int64_t) f4; + int64_t f2f5_2 = f2_2 * (int64_t) f5; + int64_t f2f6_2 = f2_2 * (int64_t) f6; + int64_t f2f7_2 = f2_2 * (int64_t) f7; + int64_t f2f8_38 = f2_2 * (int64_t) f8_19; + int64_t f2f9_38 = f2 * (int64_t) f9_38; + int64_t f3f3_2 = f3_2 * (int64_t) f3; + int64_t f3f4_2 = f3_2 * (int64_t) f4; + int64_t f3f5_4 = f3_2 * (int64_t) f5_2; + int64_t f3f6_2 = f3_2 * (int64_t) f6; + int64_t f3f7_76 = f3_2 * (int64_t) f7_38; + int64_t f3f8_38 = f3_2 * (int64_t) f8_19; + int64_t f3f9_76 = f3_2 * (int64_t) f9_38; + int64_t f4f4 = f4 * (int64_t) f4; + int64_t f4f5_2 = f4_2 * (int64_t) f5; + int64_t f4f6_38 = f4_2 * (int64_t) f6_19; + int64_t f4f7_38 = f4 * (int64_t) f7_38; + int64_t f4f8_38 = f4_2 * (int64_t) f8_19; + int64_t f4f9_38 = f4 * (int64_t) f9_38; + int64_t f5f5_38 = f5 * (int64_t) f5_38; + int64_t f5f6_38 = f5_2 * (int64_t) f6_19; + int64_t f5f7_76 = f5_2 * (int64_t) f7_38; + int64_t f5f8_38 = f5_2 * (int64_t) f8_19; + int64_t f5f9_76 = f5_2 * (int64_t) f9_38; + int64_t f6f6_19 = f6 * (int64_t) f6_19; + int64_t f6f7_38 = f6 * (int64_t) f7_38; + int64_t f6f8_38 = f6_2 * (int64_t) f8_19; + int64_t f6f9_38 = f6 * (int64_t) f9_38; + int64_t f7f7_38 = f7 * (int64_t) f7_38; + int64_t f7f8_38 = f7_2 * (int64_t) f8_19; + int64_t f7f9_76 = f7_2 * (int64_t) f9_38; + int64_t f8f8_19 = f8 * (int64_t) f8_19; + int64_t f8f9_38 = f8 * (int64_t) f9_38; + int64_t f9f9_38 = f9 * (int64_t) f9_38; + int64_t h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38; + int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38; + int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19; + int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38; + int64_t h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38; + int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38; + int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19; + int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38; + int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38; + int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2; + int64_t carry0; + int64_t carry1; + int64_t carry2; + int64_t carry3; + int64_t carry4; + int64_t carry5; + int64_t carry6; + int64_t carry7; + int64_t carry8; + int64_t carry9; + + h0 += h0; + h1 += h1; + h2 += h2; + h3 += h3; + h4 += h4; + h5 += h5; + h6 += h6; + h7 += h7; + h8 += h8; + h9 += h9; + + carry0 = (h0 + (int64_t) (1UL<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; + carry4 = (h4 + (int64_t) (1UL<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; + + carry1 = (h1 + (int64_t) (1UL<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; + carry5 = (h5 + (int64_t) (1UL<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; + + carry2 = (h2 + (int64_t) (1UL<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; + carry6 = (h6 + (int64_t) (1UL<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; + + carry3 = (h3 + (int64_t) (1UL<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; + carry7 = (h7 + (int64_t) (1UL<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; + + carry4 = (h4 + (int64_t) (1UL<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; + carry8 = (h8 + (int64_t) (1UL<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; + + carry9 = (h9 + (int64_t) (1UL<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; + + carry0 = (h0 + (int64_t) (1UL<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; + + h[0] = (int32_t)h0; + h[1] = (int32_t)h1; + h[2] = (int32_t)h2; + h[3] = (int32_t)h3; + h[4] = (int32_t)h4; + h[5] = (int32_t)h5; + h[6] = (int32_t)h6; + h[7] = (int32_t)h7; + h[8] = (int32_t)h8; + h[9] = (int32_t)h9; +} + + +void fe_pow22523(fe out,const fe z) +{ + fe t0 = {0}; + fe t1 = {0}; + fe t2 = {0}; + int i = 0; + + fe_sq(t0,z); for (i = 1;i < 1;++i) fe_sq(t0,t0); + fe_sq(t1,t0); for (i = 1;i < 2;++i) fe_sq(t1,t1); + fe_mul(t1,z,t1); + fe_mul(t0,t0,t1); + fe_sq(t0,t0); for (i = 1;i < 1;++i) fe_sq(t0,t0); + fe_mul(t0,t1,t0); + fe_sq(t1,t0); for (i = 1;i < 5;++i) fe_sq(t1,t1); + fe_mul(t0,t1,t0); + fe_sq(t1,t0); for (i = 1;i < 10;++i) fe_sq(t1,t1); + fe_mul(t1,t1,t0); + fe_sq(t2,t1); for (i = 1;i < 20;++i) fe_sq(t2,t2); + fe_mul(t1,t2,t1); + fe_sq(t1,t1); for (i = 1;i < 10;++i) fe_sq(t1,t1); + fe_mul(t0,t1,t0); + fe_sq(t1,t0); for (i = 1;i < 50;++i) fe_sq(t1,t1); + fe_mul(t1,t1,t0); + fe_sq(t2,t1); for (i = 1;i < 100;++i) fe_sq(t2,t2); + fe_mul(t1,t2,t1); + fe_sq(t1,t1); for (i = 1;i < 50;++i) fe_sq(t1,t1); + fe_mul(t0,t1,t0); + fe_sq(t0,t0); for (i = 1;i < 2;++i) fe_sq(t0,t0); + fe_mul(out,t0,z); + + return; +} + + +/* +h = -f + +Preconditions: + |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. + +Postconditions: + |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. +*/ + +void fe_neg(fe h,const fe f) +{ + int32_t f0 = f[0]; + int32_t f1 = f[1]; + int32_t f2 = f[2]; + int32_t f3 = f[3]; + int32_t f4 = f[4]; + int32_t f5 = f[5]; + int32_t f6 = f[6]; + int32_t f7 = f[7]; + int32_t f8 = f[8]; + int32_t f9 = f[9]; + int32_t h0 = -f0; + int32_t h1 = -f1; + int32_t h2 = -f2; + int32_t h3 = -f3; + int32_t h4 = -f4; + int32_t h5 = -f5; + int32_t h6 = -f6; + int32_t h7 = -f7; + int32_t h8 = -f8; + int32_t h9 = -f9; + h[0] = h0; + h[1] = h1; + h[2] = h2; + h[3] = h3; + h[4] = h4; + h[5] = h5; + h[6] = h6; + h[7] = h7; + h[8] = h8; + h[9] = h9; +} + + +/* +Preconditions: + |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. +*/ + +static const unsigned char zero[32] = {0}; + +int fe_isnonzero(const fe f) +{ + unsigned char s[32]; + fe_tobytes(s,f); + return ConstantCompare(s,zero,32); +} + + +/* +return 1 if f is in {1,3,5,...,q-2} +return 0 if f is in {0,2,4,...,q-1} + +Preconditions: + |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. +*/ + +int fe_isnegative(const fe f) +{ + unsigned char s[32]; + fe_tobytes(s,f); + return s[0] & 1; +} + + +/* +Replace (f,g) with (g,g) if b == 1; +replace (f,g) with (f,g) if b == 0. + +Preconditions: b in {0,1}. +*/ + +void fe_cmov(fe f, const fe g, int b) +{ + int32_t f0 = f[0]; + int32_t f1 = f[1]; + int32_t f2 = f[2]; + int32_t f3 = f[3]; + int32_t f4 = f[4]; + int32_t f5 = f[5]; + int32_t f6 = f[6]; + int32_t f7 = f[7]; + int32_t f8 = f[8]; + int32_t f9 = f[9]; + int32_t g0 = g[0]; + int32_t g1 = g[1]; + int32_t g2 = g[2]; + int32_t g3 = g[3]; + int32_t g4 = g[4]; + int32_t g5 = g[5]; + int32_t g6 = g[6]; + int32_t g7 = g[7]; + int32_t g8 = g[8]; + int32_t g9 = g[9]; + int32_t x0 = f0 ^ g0; + int32_t x1 = f1 ^ g1; + int32_t x2 = f2 ^ g2; + int32_t x3 = f3 ^ g3; + int32_t x4 = f4 ^ g4; + int32_t x5 = f5 ^ g5; + int32_t x6 = f6 ^ g6; + int32_t x7 = f7 ^ g7; + int32_t x8 = f8 ^ g8; + int32_t x9 = f9 ^ g9; + b = -b; + x0 &= b; + x1 &= b; + x2 &= b; + x3 &= b; + x4 &= b; + x5 &= b; + x6 &= b; + x7 &= b; + x8 &= b; + x9 &= b; + f[0] = f0 ^ x0; + f[1] = f1 ^ x1; + f[2] = f2 ^ x2; + f[3] = f3 ^ x3; + f[4] = f4 ^ x4; + f[5] = f5 ^ x5; + f[6] = f6 ^ x6; + f[7] = f7 ^ x7; + f[8] = f8 ^ x8; + f[9] = f9 ^ x9; +} +#endif + +#endif /* !CURVE25519_SMALL || !ED25519_SMALL */ +#endif /* HAVE_CURVE25519 || HAVE_ED25519 */ diff --git a/client/wolfssl/wolfcrypt/src/fe_x25519_128.i b/client/wolfssl/wolfcrypt/src/fe_x25519_128.i new file mode 100644 index 0000000..10e43d9 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fe_x25519_128.i @@ -0,0 +1,625 @@ +/* fe_x25519_128.i + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +void fe_init(void) +{ +} + +/* Convert a number represented as an array of bytes to an array of words with + * 51-bits of data in each word. + * + * in An array of bytes. + * out An array of words. + */ +void fe_frombytes(fe out, const unsigned char *in) +{ + out[0] = (((int64_t)((in[ 0] ) )) ) + | (((int64_t)((in[ 1] ) )) << 8) + | (((int64_t)((in[ 2] ) )) << 16) + | (((int64_t)((in[ 3] ) )) << 24) + | (((int64_t)((in[ 4] ) )) << 32) + | (((int64_t)((in[ 5] ) )) << 40) + | (((int64_t)((in[ 6] ) & 0x07)) << 48); + out[1] = (((int64_t)((in[ 6] >> 3) & 0x1f)) ) + | (((int64_t)((in[ 7] ) )) << 5) + | (((int64_t)((in[ 8] ) )) << 13) + | (((int64_t)((in[ 9] ) )) << 21) + | (((int64_t)((in[10] ) )) << 29) + | (((int64_t)((in[11] ) )) << 37) + | (((int64_t)((in[12] ) & 0x3f)) << 45); + out[2] = (((int64_t)((in[12] >> 6) & 0x03)) ) + | (((int64_t)((in[13] ) )) << 2) + | (((int64_t)((in[14] ) )) << 10) + | (((int64_t)((in[15] ) )) << 18) + | (((int64_t)((in[16] ) )) << 26) + | (((int64_t)((in[17] ) )) << 34) + | (((int64_t)((in[18] ) )) << 42) + | (((int64_t)((in[19] ) & 0x01)) << 50); + out[3] = (((int64_t)((in[19] >> 1) & 0x7f)) ) + | (((int64_t)((in[20] ) )) << 7) + | (((int64_t)((in[21] ) )) << 15) + | (((int64_t)((in[22] ) )) << 23) + | (((int64_t)((in[23] ) )) << 31) + | (((int64_t)((in[24] ) )) << 39) + | (((int64_t)((in[25] ) & 0x0f)) << 47); + out[4] = (((int64_t)((in[25] >> 4) & 0x0f)) ) + | (((int64_t)((in[26] ) )) << 4) + | (((int64_t)((in[27] ) )) << 12) + | (((int64_t)((in[28] ) )) << 20) + | (((int64_t)((in[29] ) )) << 28) + | (((int64_t)((in[30] ) )) << 36) + | (((int64_t)((in[31] ) & 0x7f)) << 44); +} + +/* Convert a number represented as an array of words to an array of bytes. + * The array of words is normalized to an array of 51-bit data words and if + * greater than the mod, modulo reduced by the prime 2^255 - 1. + * + * n An array of words. + * out An array of bytes. + */ +void fe_tobytes(unsigned char *out, const fe n) +{ + fe in; + int64_t c; + + in[0] = n[0]; + in[1] = n[1]; + in[2] = n[2]; + in[3] = n[3]; + in[4] = n[4]; + + /* Normalize to 51-bits of data per word. */ + in[0] += (in[4] >> 51) * 19; in[4] &= 0x7ffffffffffff; + + in[1] += in[0] >> 51; in[0] &= 0x7ffffffffffff; + in[2] += in[1] >> 51; in[1] &= 0x7ffffffffffff; + in[3] += in[2] >> 51; in[2] &= 0x7ffffffffffff; + in[4] += in[3] >> 51; in[3] &= 0x7ffffffffffff; + in[0] += (in[4] >> 51) * 19; + in[4] &= 0x7ffffffffffff; + + c = (in[0] + 19) >> 51; + c = (in[1] + c) >> 51; + c = (in[2] + c) >> 51; + c = (in[3] + c) >> 51; + c = (in[4] + c) >> 51; + in[0] += c * 19; + in[1] += in[0] >> 51; in[0] &= 0x7ffffffffffff; + in[2] += in[1] >> 51; in[1] &= 0x7ffffffffffff; + in[3] += in[2] >> 51; in[2] &= 0x7ffffffffffff; + in[4] += in[3] >> 51; in[3] &= 0x7ffffffffffff; + in[4] &= 0x7ffffffffffff; + + out[ 0] = (((byte)((in[0] ) )) ); + out[ 1] = (((byte)((in[0] >> 8) )) ); + out[ 2] = (((byte)((in[0] >> 16) )) ); + out[ 3] = (((byte)((in[0] >> 24) )) ); + out[ 4] = (((byte)((in[0] >> 32) )) ); + out[ 5] = (((byte)((in[0] >> 40) )) ); + out[ 6] = (((byte)((in[0] >> 48) & 0x07)) ) + | (((byte)((in[1] ) & 0x1f)) << 3); + out[ 7] = (((byte)((in[1] >> 5) )) ); + out[ 8] = (((byte)((in[1] >> 13) )) ); + out[ 9] = (((byte)((in[1] >> 21) )) ); + out[10] = (((byte)((in[1] >> 29) )) ); + out[11] = (((byte)((in[1] >> 37) )) ); + out[12] = (((byte)((in[1] >> 45) & 0x3f)) ) + | (((byte)((in[2] ) & 0x03)) << 6); + out[13] = (((byte)((in[2] >> 2) )) ); + out[14] = (((byte)((in[2] >> 10) )) ); + out[15] = (((byte)((in[2] >> 18) )) ); + out[16] = (((byte)((in[2] >> 26) )) ); + out[17] = (((byte)((in[2] >> 34) )) ); + out[18] = (((byte)((in[2] >> 42) )) ); + out[19] = (((byte)((in[2] >> 50) & 0x01)) ) + | (((byte)((in[3] ) & 0x7f)) << 1); + out[20] = (((byte)((in[3] >> 7) )) ); + out[21] = (((byte)((in[3] >> 15) )) ); + out[22] = (((byte)((in[3] >> 23) )) ); + out[23] = (((byte)((in[3] >> 31) )) ); + out[24] = (((byte)((in[3] >> 39) )) ); + out[25] = (((byte)((in[3] >> 47) & 0x0f)) ) + | (((byte)((in[4] ) & 0x0f)) << 4); + out[26] = (((byte)((in[4] >> 4) )) ); + out[27] = (((byte)((in[4] >> 12) )) ); + out[28] = (((byte)((in[4] >> 20) )) ); + out[29] = (((byte)((in[4] >> 28) )) ); + out[30] = (((byte)((in[4] >> 36) )) ); + out[31] = (((byte)((in[4] >> 44) & 0x7f)) ); +} + +/* Set the field element to 1. + * + * n The field element number. + */ +void fe_1(fe n) +{ + n[0] = 0x0000000000001; + n[1] = 0x0000000000000; + n[2] = 0x0000000000000; + n[3] = 0x0000000000000; + n[4] = 0x0000000000000; +} + +/* Set the field element to 0. + * + * n The field element number. + */ +void fe_0(fe n) +{ + n[0] = 0x0000000000000; + n[1] = 0x0000000000000; + n[2] = 0x0000000000000; + n[3] = 0x0000000000000; + n[4] = 0x0000000000000; +} + +/* Copy field element a into field element r. + * + * r Field element to copy into. + * a Field element to copy. + */ +void fe_copy(fe r, const fe a) +{ + r[0] = a[0]; + r[1] = a[1]; + r[2] = a[2]; + r[3] = a[3]; + r[4] = a[4]; +} + +/* Constant time, conditional swap of field elements a and b. + * + * a A field element. + * b A field element. + * c If 1 then swap and if 0 then don't swap. + */ +void fe_cswap(fe a, fe b, int c) +{ + int64_t m = c; + int64_t t0, t1, t2, t3, t4; + + /* Convert conditional into mask. */ + m = -m; + t0 = m & (a[0] ^ b[0]); + t1 = m & (a[1] ^ b[1]); + t2 = m & (a[2] ^ b[2]); + t3 = m & (a[3] ^ b[3]); + t4 = m & (a[4] ^ b[4]); + + a[0] ^= t0; + a[1] ^= t1; + a[2] ^= t2; + a[3] ^= t3; + a[4] ^= t4; + + b[0] ^= t0; + b[1] ^= t1; + b[2] ^= t2; + b[3] ^= t3; + b[4] ^= t4; +} + +/* Subtract b from a into r. (r = a - b) + * + * r A field element. + * a A field element. + * b A field element. + */ +void fe_sub(fe r, const fe a, const fe b) +{ + r[0] = a[0] - b[0]; + r[1] = a[1] - b[1]; + r[2] = a[2] - b[2]; + r[3] = a[3] - b[3]; + r[4] = a[4] - b[4]; +} + +/* Add b to a into r. (r = a + b) + * + * r A field element. + * a A field element. + * b A field element. + */ +void fe_add(fe r, const fe a, const fe b) +{ + r[0] = a[0] + b[0]; + r[1] = a[1] + b[1]; + r[2] = a[2] + b[2]; + r[3] = a[3] + b[3]; + r[4] = a[4] + b[4]; +} + +/* Multiply a and b into r. (r = a * b) + * + * r A field element. + * a A field element. + * b A field element. + */ +void fe_mul(fe r, const fe a, const fe b) +{ + const __int128_t k19 = 19; + __int128_t t0 = ((__int128_t)a[0]) * b[0]; + __int128_t t1 = ((__int128_t)a[0]) * b[1] + + ((__int128_t)a[1]) * b[0]; + __int128_t t2 = ((__int128_t)a[0]) * b[2] + + ((__int128_t)a[1]) * b[1] + + ((__int128_t)a[2]) * b[0]; + __int128_t t3 = ((__int128_t)a[0]) * b[3] + + ((__int128_t)a[1]) * b[2] + + ((__int128_t)a[2]) * b[1] + + ((__int128_t)a[3]) * b[0]; + __int128_t t4 = ((__int128_t)a[0]) * b[4] + + ((__int128_t)a[1]) * b[3] + + ((__int128_t)a[2]) * b[2] + + ((__int128_t)a[3]) * b[1] + + ((__int128_t)a[4]) * b[0]; + __int128_t t5 = ((__int128_t)a[1]) * b[4] + + ((__int128_t)a[2]) * b[3] + + ((__int128_t)a[3]) * b[2] + + ((__int128_t)a[4]) * b[1]; + __int128_t t6 = ((__int128_t)a[2]) * b[4] + + ((__int128_t)a[3]) * b[3] + + ((__int128_t)a[4]) * b[2]; + __int128_t t7 = ((__int128_t)a[3]) * b[4] + + ((__int128_t)a[4]) * b[3]; + __int128_t t8 = ((__int128_t)a[4]) * b[4]; + + /* Modulo reduce double long word. */ + t0 += t5 * k19; + t1 += t6 * k19; + t2 += t7 * k19; + t3 += t8 * k19; + + /* Normalize to 51-bits of data per word. */ + t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff; + + t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff; + t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff; + t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff; + t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff; + r[0] += (t4 >> 51) * k19; + r[4] = t4 & 0x7ffffffffffff; +} + +/* Square a and put result in r. (r = a * a) + * + * r A field element. + * a A field element. + * b A field element. + */ +void fe_sq(fe r, const fe a) +{ + const __int128_t k19 = 19; + const __int128_t k2 = 2; + __int128_t t0 = ((__int128_t)a[0]) * a[0]; + __int128_t t1 = ((__int128_t)a[0]) * a[1] * k2; + __int128_t t2 = ((__int128_t)a[0]) * a[2] * k2 + + ((__int128_t)a[1]) * a[1]; + __int128_t t3 = ((__int128_t)a[0]) * a[3] * k2 + + ((__int128_t)a[1]) * a[2] * k2; + __int128_t t4 = ((__int128_t)a[0]) * a[4] * k2 + + ((__int128_t)a[1]) * a[3] * k2 + + ((__int128_t)a[2]) * a[2]; + __int128_t t5 = ((__int128_t)a[1]) * a[4] * k2 + + ((__int128_t)a[2]) * a[3] * k2; + __int128_t t6 = ((__int128_t)a[2]) * a[4] * k2 + + ((__int128_t)a[3]) * a[3]; + __int128_t t7 = ((__int128_t)a[3]) * a[4] * k2; + __int128_t t8 = ((__int128_t)a[4]) * a[4]; + + /* Modulo reduce double long word. */ + t0 += t5 * k19; + t1 += t6 * k19; + t2 += t7 * k19; + t3 += t8 * k19; + + /* Normalize to 51-bits of data per word. */ + t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff; + + t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff; + t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff; + t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff; + t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff; + r[0] += (t4 >> 51) * k19; + r[4] = t4 & 0x7ffffffffffff; +} + +/* Multiply a by 121666 and put result in r. (r = 121666 * a) + * + * r A field element. + * a A field element. + * b A field element. + */ +void fe_mul121666(fe r, fe a) +{ + const __int128_t k19 = 19; + const __int128_t k121666 = 121666; + __int128_t t0 = ((__int128_t)a[0]) * k121666; + __int128_t t1 = ((__int128_t)a[1]) * k121666; + __int128_t t2 = ((__int128_t)a[2]) * k121666; + __int128_t t3 = ((__int128_t)a[3]) * k121666; + __int128_t t4 = ((__int128_t)a[4]) * k121666; + + /* Normalize to 51-bits of data per word. */ + t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff; + + t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff; + t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff; + t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff; + t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff; + r[0] += (t4 >> 51) * k19; + r[4] = t4 & 0x7ffffffffffff; +} + +/* Find the inverse of a modulo 2^255 - 1 and put result in r. + * (r * a) mod (2^255 - 1) = 1 + * Implementation is constant time. + * + * r A field element. + * a A field element. + */ +void fe_invert(fe r, const fe a) +{ + fe t0, t1, t2, t3; + int i; + + /* a ^ (2^255 - 21) */ + fe_sq(t0, a); for (i = 1; i < 1; ++i) fe_sq(t0, t0); + fe_sq(t1, t0); for (i = 1; i < 2; ++i) fe_sq(t1, t1); fe_mul(t1, a, t1); + fe_mul(t0, t0, t1); + fe_sq(t2, t0); for (i = 1; i < 1; ++i) fe_sq(t2, t2); fe_mul(t1, t1, t2); + fe_sq(t2, t1); for (i = 1; i < 5; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1); + fe_sq(t2, t1); for (i = 1; i < 10; ++i) fe_sq(t2, t2); fe_mul(t2, t2, t1); + fe_sq(t3, t2); for (i = 1; i < 20; ++i) fe_sq(t3, t3); fe_mul(t2, t3, t2); + fe_sq(t2, t2); for (i = 1; i < 10; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1); + fe_sq(t2, t1); for (i = 1; i < 50; ++i) fe_sq(t2, t2); fe_mul(t2, t2, t1); + fe_sq(t3, t2); for (i = 1; i < 100; ++i) fe_sq(t3, t3); fe_mul(t2, t3, t2); + fe_sq(t2, t2); for (i = 1; i < 50; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1); + fe_sq(t1, t1); for (i = 1; i < 5; ++i) fe_sq(t1, t1); fe_mul( r, t1, t0); +} + +#ifndef CURVE25519_SMALL +/* Scalar multiply the field element a by n using Montgomery Ladder and places + * result in r. + * + * r A field element as an array of bytes. + * n The scalar as an array of bytes. + * a A field element as an array of bytes. + */ +int curve25519(byte* r, byte* n, byte* a) +{ + fe x1, x2, z2, x3, z3; + fe t0, t1; + int pos; + unsigned int swap; + unsigned int b; + + fe_frombytes(x1, a); + fe_1(x2); + fe_0(z2); + fe_copy(x3, x1); + fe_1(z3); + + swap = 0; + for (pos = 254;pos >= 0;--pos) { + b = n[pos / 8] >> (pos & 7); + b &= 1; + swap ^= b; + fe_cswap(x2, x3, swap); + fe_cswap(z2, z3, swap); + swap = b; + + fe_sub(t0, x3, z3); + fe_sub(t1, x2, z2); + fe_add(x2, x2, z2); + fe_add(z2, x3, z3); + fe_mul(z3, t0, x2); + fe_mul(z2, z2, t1); + fe_sq(t0, t1); + fe_sq(t1, x2); + fe_add(x3, z3, z2); + fe_sub(z2, z3, z2); + fe_mul(x2, t1, t0); + fe_sub(t1, t1, t0); + fe_sq(z2, z2); + fe_mul121666(z3, t1); + fe_sq(x3, x3); + fe_add(t0, t0, z3); + fe_mul(z3, x1, z2); + fe_mul(z2, t1, t0); + } + fe_cswap(x2, x3, swap); + fe_cswap(z2, z3, swap); + + fe_invert(z2, z2); + fe_mul(x2, x2, z2); + fe_tobytes(r, x2); + + return 0; +} +#endif /* !CURVE25519_SMALL */ + +/* The field element value 0 as an array of bytes. */ +static const unsigned char zero[32] = {0}; + +/* Constant time check as to whether a is not 0. + * + * a A field element. + */ +int fe_isnonzero(const fe a) +{ + unsigned char s[32]; + fe_tobytes(s, a); + return ConstantCompare(s, zero, 32); +} + +/* Checks whether a is negative. + * + * a A field element. + */ +int fe_isnegative(const fe a) +{ + unsigned char s[32]; + fe_tobytes(s, a); + return s[0] & 1; +} + +/* Negates field element a and stores the result in r. + * + * r A field element. + * a A field element. + */ +void fe_neg(fe r, const fe a) +{ + r[0] = -a[0]; + r[1] = -a[1]; + r[2] = -a[2]; + r[3] = -a[3]; + r[4] = -a[4]; +} + +/* Constant time, conditional move of b into a. + * a is not changed if the condition is 0. + * + * a A field element. + * b A field element. + * c If 1 then copy and if 0 then don't copy. + */ +void fe_cmov(fe a, const fe b, int c) +{ + int64_t m = c; + int64_t t0, t1, t2, t3, t4; + + /* Convert conditional into mask. */ + m = -m; + t0 = m & (a[0] ^ b[0]); + t1 = m & (a[1] ^ b[1]); + t2 = m & (a[2] ^ b[2]); + t3 = m & (a[3] ^ b[3]); + t4 = m & (a[4] ^ b[4]); + + a[0] ^= t0; + a[1] ^= t1; + a[2] ^= t2; + a[3] ^= t3; + a[4] ^= t4; +} + +void fe_pow22523(fe r, const fe a) +{ + fe t0, t1, t2; + int i; + + /* a ^ (2^255 - 23) */ + fe_sq(t0, a); for (i = 1; i < 1; ++i) fe_sq(t0, t0); + fe_sq(t1, t0); for (i = 1; i < 2; ++i) fe_sq(t1, t1); fe_mul(t1, a, t1); + fe_mul(t0, t0, t1); + fe_sq(t0, t0); for (i = 1; i < 1; ++i) fe_sq(t0, t0); fe_mul(t0, t1, t0); + fe_sq(t1, t0); for (i = 1; i < 5; ++i) fe_sq(t1, t1); fe_mul(t0, t1, t0); + fe_sq(t1, t0); for (i = 1; i < 10; ++i) fe_sq(t1, t1); fe_mul(t1, t1, t0); + fe_sq(t2, t1); for (i = 1; i < 20; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1); + fe_sq(t1, t1); for (i = 1; i < 10; ++i) fe_sq(t1, t1); fe_mul(t0, t1, t0); + fe_sq(t1, t0); for (i = 1; i < 50; ++i) fe_sq(t1, t1); fe_mul(t1, t1, t0); + fe_sq(t2, t1); for (i = 1; i < 100; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1); + fe_sq(t1, t1); for (i = 1; i < 50; ++i) fe_sq(t1, t1); fe_mul(t0, t1, t0); + fe_sq(t0, t0); for (i = 1; i < 2; ++i) fe_sq(t0, t0); fe_mul( r, t0, a); + + return; +} + +/* Double the square of a and put result in r. (r = 2 * a * a) + * + * r A field element. + * a A field element. + * b A field element. + */ +void fe_sq2(fe r, const fe a) +{ + const __int128_t k2 = 2; + const __int128_t k19 = 19; + __int128_t t0 = k2 * (((__int128_t)a[0]) * a[0]); + __int128_t t1 = k2 * (((__int128_t)a[0]) * a[1] * k2); + __int128_t t2 = k2 * (((__int128_t)a[0]) * a[2] * k2 + + ((__int128_t)a[1]) * a[1]); + __int128_t t3 = k2 * (((__int128_t)a[0]) * a[3] * k2 + + ((__int128_t)a[1]) * a[2] * k2); + __int128_t t4 = k2 * (((__int128_t)a[0]) * a[4] * k2 + + ((__int128_t)a[1]) * a[3] * k2 + + ((__int128_t)a[2]) * a[2]); + __int128_t t5 = k2 * (((__int128_t)a[1]) * a[4] * k2 + + ((__int128_t)a[2]) * a[3] * k2); + __int128_t t6 = k2 * (((__int128_t)a[2]) * a[4] * k2 + + ((__int128_t)a[3]) * a[3]); + __int128_t t7 = k2 * (((__int128_t)a[3]) * a[4] * k2); + __int128_t t8 = k2 * (((__int128_t)a[4]) * a[4]); + + /* Modulo reduce double long word. */ + t0 += t5 * k19; + t1 += t6 * k19; + t2 += t7 * k19; + t3 += t8 * k19; + + /* Normalize to 51-bits of data per word. */ + t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff; + + t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff; + t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff; + t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff; + t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff; + r[0] += (t4 >> 51) * k19; + r[4] = t4 & 0x7ffffffffffff; +} + +/* Load 3 little endian bytes into a 64-bit word. + * + * in An array of bytes. + * returns a 64-bit word. + */ +uint64_t load_3(const unsigned char *in) +{ + uint64_t result; + + result = ((((uint64_t)in[0]) ) | + (((uint64_t)in[1]) << 8) | + (((uint64_t)in[2]) << 16)); + + return result; +} + +/* Load 4 little endian bytes into a 64-bit word. + * + * in An array of bytes. + * returns a 64-bit word. + */ +uint64_t load_4(const unsigned char *in) +{ + uint64_t result; + + result = ((((uint64_t)in[0]) ) | + (((uint64_t)in[1]) << 8) | + (((uint64_t)in[2]) << 16) | + (((uint64_t)in[3]) << 24)); + + return result; +} + diff --git a/client/wolfssl/wolfcrypt/src/fe_x25519_asm.S b/client/wolfssl/wolfcrypt/src/fe_x25519_asm.S new file mode 100644 index 0000000..6d0f638 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fe_x25519_asm.S @@ -0,0 +1,16542 @@ +/* fe_x25519_asm + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#ifndef HAVE_INTEL_AVX1 +#define HAVE_INTEL_AVX1 +#endif /* HAVE_INTEL_AVX1 */ +#ifndef NO_AVX2_SUPPORT +#define HAVE_INTEL_AVX2 +#endif /* NO_AVX2_SUPPORT */ + +#ifndef __APPLE__ +.text +.globl fe_init +.type fe_init,@function +.align 4 +fe_init: +#else +.section __TEXT,__text +.globl _fe_init +.p2align 2 +_fe_init: +#endif /* __APPLE__ */ +#ifdef HAVE_INTEL_AVX2 +#ifndef __APPLE__ + movq cpuFlagsSet@GOTPCREL(%rip), %rax + movl (%rax), %eax +#else + movl _cpuFlagsSet(%rip), %eax +#endif /* __APPLE__ */ + testl %eax, %eax + je L_fe_init_get_flags + repz retq +L_fe_init_get_flags: +#ifndef __APPLE__ + callq cpuid_get_flags@plt +#else + callq _cpuid_get_flags +#endif /* __APPLE__ */ +#ifndef __APPLE__ + movq intelFlags@GOTPCREL(%rip), %rdx + movl %eax, (%rdx) +#else + movl %eax, _intelFlags(%rip) +#endif /* __APPLE__ */ + andl $0x50, %eax + cmpl $0x50, %eax + jne L_fe_init_flags_done +#ifndef __APPLE__ + movq fe_mul_avx2@GOTPCREL(%rip), %rax +#else + leaq _fe_mul_avx2(%rip), %rax +#endif /* __APPLE__ */ +#ifndef __APPLE__ + movq fe_mul_p@GOTPCREL(%rip), %rdx + movq %rax, (%rdx) +#else + movq %rax, _fe_mul_p(%rip) +#endif /* __APPLE__ */ +#ifndef __APPLE__ + movq fe_sq_avx2@GOTPCREL(%rip), %rax +#else + leaq _fe_sq_avx2(%rip), %rax +#endif /* __APPLE__ */ +#ifndef __APPLE__ + movq fe_sq_p@GOTPCREL(%rip), %rdx + movq %rax, (%rdx) +#else + movq %rax, _fe_sq_p(%rip) +#endif /* __APPLE__ */ +#ifndef __APPLE__ + movq fe_mul121666_avx2@GOTPCREL(%rip), %rax +#else + leaq _fe_mul121666_avx2(%rip), %rax +#endif /* __APPLE__ */ +#ifndef __APPLE__ + movq fe_mul121666_p@GOTPCREL(%rip), %rdx + movq %rax, (%rdx) +#else + movq %rax, _fe_mul121666_p(%rip) +#endif /* __APPLE__ */ +#ifndef __APPLE__ + movq fe_sq2_avx2@GOTPCREL(%rip), %rax +#else + leaq _fe_sq2_avx2(%rip), %rax +#endif /* __APPLE__ */ +#ifndef __APPLE__ + movq fe_sq2_p@GOTPCREL(%rip), %rdx + movq %rax, (%rdx) +#else + movq %rax, _fe_sq2_p(%rip) +#endif /* __APPLE__ */ +#ifndef __APPLE__ + movq fe_invert_avx2@GOTPCREL(%rip), %rax +#else + leaq _fe_invert_avx2(%rip), %rax +#endif /* __APPLE__ */ +#ifndef __APPLE__ + movq fe_invert_p@GOTPCREL(%rip), %rdx + movq %rax, (%rdx) +#else + movq %rax, _fe_invert_p(%rip) +#endif /* __APPLE__ */ +#ifndef __APPLE__ + movq curve25519_avx2@GOTPCREL(%rip), %rax +#else + leaq _curve25519_avx2(%rip), %rax +#endif /* __APPLE__ */ +#ifndef __APPLE__ + movq curve25519_p@GOTPCREL(%rip), %rdx + movq %rax, (%rdx) +#else + movq %rax, _curve25519_p(%rip) +#endif /* __APPLE__ */ +#ifndef __APPLE__ + movq fe_pow22523_avx2@GOTPCREL(%rip), %rax +#else + leaq _fe_pow22523_avx2(%rip), %rax +#endif /* __APPLE__ */ +#ifndef __APPLE__ + movq fe_pow22523_p@GOTPCREL(%rip), %rdx + movq %rax, (%rdx) +#else + movq %rax, _fe_pow22523_p(%rip) +#endif /* __APPLE__ */ +#ifndef __APPLE__ + movq fe_ge_to_p2_avx2@GOTPCREL(%rip), %rax +#else + leaq _fe_ge_to_p2_avx2(%rip), %rax +#endif /* __APPLE__ */ +#ifndef __APPLE__ + movq fe_ge_to_p2_p@GOTPCREL(%rip), %rdx + movq %rax, (%rdx) +#else + movq %rax, _fe_ge_to_p2_p(%rip) +#endif /* __APPLE__ */ +#ifndef __APPLE__ + movq fe_ge_to_p3_avx2@GOTPCREL(%rip), %rax +#else + leaq _fe_ge_to_p3_avx2(%rip), %rax +#endif /* __APPLE__ */ +#ifndef __APPLE__ + movq fe_ge_to_p3_p@GOTPCREL(%rip), %rdx + movq %rax, (%rdx) +#else + movq %rax, _fe_ge_to_p3_p(%rip) +#endif /* __APPLE__ */ +#ifndef __APPLE__ + movq fe_ge_dbl_avx2@GOTPCREL(%rip), %rax +#else + leaq _fe_ge_dbl_avx2(%rip), %rax +#endif /* __APPLE__ */ +#ifndef __APPLE__ + movq fe_ge_dbl_p@GOTPCREL(%rip), %rdx + movq %rax, (%rdx) +#else + movq %rax, _fe_ge_dbl_p(%rip) +#endif /* __APPLE__ */ +#ifndef __APPLE__ + movq fe_ge_madd_avx2@GOTPCREL(%rip), %rax +#else + leaq _fe_ge_madd_avx2(%rip), %rax +#endif /* __APPLE__ */ +#ifndef __APPLE__ + movq fe_ge_madd_p@GOTPCREL(%rip), %rdx + movq %rax, (%rdx) +#else + movq %rax, _fe_ge_madd_p(%rip) +#endif /* __APPLE__ */ +#ifndef __APPLE__ + movq fe_ge_msub_avx2@GOTPCREL(%rip), %rax +#else + leaq _fe_ge_msub_avx2(%rip), %rax +#endif /* __APPLE__ */ +#ifndef __APPLE__ + movq fe_ge_msub_p@GOTPCREL(%rip), %rdx + movq %rax, (%rdx) +#else + movq %rax, _fe_ge_msub_p(%rip) +#endif /* __APPLE__ */ +#ifndef __APPLE__ + movq fe_ge_add_avx2@GOTPCREL(%rip), %rax +#else + leaq _fe_ge_add_avx2(%rip), %rax +#endif /* __APPLE__ */ +#ifndef __APPLE__ + movq fe_ge_add_p@GOTPCREL(%rip), %rdx + movq %rax, (%rdx) +#else + movq %rax, _fe_ge_add_p(%rip) +#endif /* __APPLE__ */ +#ifndef __APPLE__ + movq fe_ge_sub_avx2@GOTPCREL(%rip), %rax +#else + leaq _fe_ge_sub_avx2(%rip), %rax +#endif /* __APPLE__ */ +#ifndef __APPLE__ + movq fe_ge_sub_p@GOTPCREL(%rip), %rdx + movq %rax, (%rdx) +#else + movq %rax, _fe_ge_sub_p(%rip) +#endif /* __APPLE__ */ +L_fe_init_flags_done: +#ifndef __APPLE__ + movq cpuFlagsSet@GOTPCREL(%rip), %rdx + movl $0x1, (%rdx) +#else + movl $0x1, _cpuFlagsSet(%rip) +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX2 */ + repz retq +#ifndef __APPLE__ +.size fe_init,.-fe_init +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_frombytes +.type fe_frombytes,@function +.align 4 +fe_frombytes: +#else +.section __TEXT,__text +.globl _fe_frombytes +.p2align 2 +_fe_frombytes: +#endif /* __APPLE__ */ + movq $0x7fffffffffffffff, %r9 + movq (%rsi), %rdx + movq 8(%rsi), %rax + movq 16(%rsi), %rcx + movq 24(%rsi), %r8 + andq %r9, %r8 + movq %rdx, (%rdi) + movq %rax, 8(%rdi) + movq %rcx, 16(%rdi) + movq %r8, 24(%rdi) + repz retq +#ifndef __APPLE__ +.size fe_frombytes,.-fe_frombytes +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_tobytes +.type fe_tobytes,@function +.align 4 +fe_tobytes: +#else +.section __TEXT,__text +.globl _fe_tobytes +.p2align 2 +_fe_tobytes: +#endif /* __APPLE__ */ + movq $0x7fffffffffffffff, %r10 + movq (%rsi), %rdx + movq 8(%rsi), %rax + movq 16(%rsi), %rcx + movq 24(%rsi), %r8 + addq $19, %rdx + adcq $0x00, %rax + adcq $0x00, %rcx + adcq $0x00, %r8 + shrq $63, %r8 + imulq $19, %r8, %r9 + movq (%rsi), %rdx + movq 8(%rsi), %rax + movq 16(%rsi), %rcx + movq 24(%rsi), %r8 + addq %r9, %rdx + adcq $0x00, %rax + adcq $0x00, %rcx + adcq $0x00, %r8 + andq %r10, %r8 + movq %rdx, (%rdi) + movq %rax, 8(%rdi) + movq %rcx, 16(%rdi) + movq %r8, 24(%rdi) + repz retq +#ifndef __APPLE__ +.size fe_tobytes,.-fe_tobytes +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_1 +.type fe_1,@function +.align 4 +fe_1: +#else +.section __TEXT,__text +.globl _fe_1 +.p2align 2 +_fe_1: +#endif /* __APPLE__ */ + # Set one + movq $0x01, (%rdi) + movq $0x00, 8(%rdi) + movq $0x00, 16(%rdi) + movq $0x00, 24(%rdi) + repz retq +#ifndef __APPLE__ +.size fe_1,.-fe_1 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_0 +.type fe_0,@function +.align 4 +fe_0: +#else +.section __TEXT,__text +.globl _fe_0 +.p2align 2 +_fe_0: +#endif /* __APPLE__ */ + # Set zero + movq $0x00, (%rdi) + movq $0x00, 8(%rdi) + movq $0x00, 16(%rdi) + movq $0x00, 24(%rdi) + repz retq +#ifndef __APPLE__ +.size fe_0,.-fe_0 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_copy +.type fe_copy,@function +.align 4 +fe_copy: +#else +.section __TEXT,__text +.globl _fe_copy +.p2align 2 +_fe_copy: +#endif /* __APPLE__ */ + # Copy + movq (%rsi), %rdx + movq 8(%rsi), %rax + movq 16(%rsi), %rcx + movq 24(%rsi), %r8 + movq %rdx, (%rdi) + movq %rax, 8(%rdi) + movq %rcx, 16(%rdi) + movq %r8, 24(%rdi) + repz retq +#ifndef __APPLE__ +.size fe_copy,.-fe_copy +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_sub +.type fe_sub,@function +.align 4 +fe_sub: +#else +.section __TEXT,__text +.globl _fe_sub +.p2align 2 +_fe_sub: +#endif /* __APPLE__ */ + pushq %r12 + # Sub + movq (%rsi), %rax + movq 8(%rsi), %rcx + movq 16(%rsi), %r8 + movq 24(%rsi), %r9 + subq (%rdx), %rax + movq $0x00, %r10 + sbbq 8(%rdx), %rcx + movq $-19, %r11 + sbbq 16(%rdx), %r8 + movq $0x7fffffffffffffff, %r12 + sbbq 24(%rdx), %r9 + sbbq $0x00, %r10 + # Mask the modulus + andq %r10, %r11 + andq %r10, %r12 + # Add modulus (if underflow) + addq %r11, %rax + adcq %r10, %rcx + adcq %r10, %r8 + adcq %r12, %r9 + movq %rax, (%rdi) + movq %rcx, 8(%rdi) + movq %r8, 16(%rdi) + movq %r9, 24(%rdi) + popq %r12 + repz retq +#ifndef __APPLE__ +.size fe_sub,.-fe_sub +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_add +.type fe_add,@function +.align 4 +fe_add: +#else +.section __TEXT,__text +.globl _fe_add +.p2align 2 +_fe_add: +#endif /* __APPLE__ */ + pushq %r12 + # Add + movq (%rsi), %rax + movq 8(%rsi), %rcx + addq (%rdx), %rax + movq 16(%rsi), %r8 + adcq 8(%rdx), %rcx + movq 24(%rsi), %r10 + adcq 16(%rdx), %r8 + movq $-19, %r11 + adcq 24(%rdx), %r10 + movq $0x7fffffffffffffff, %r12 + movq %r10, %r9 + sarq $63, %r10 + # Mask the modulus + andq %r10, %r11 + andq %r10, %r12 + # Sub modulus (if overflow) + subq %r11, %rax + sbbq %r10, %rcx + sbbq %r10, %r8 + sbbq %r12, %r9 + movq %rax, (%rdi) + movq %rcx, 8(%rdi) + movq %r8, 16(%rdi) + movq %r9, 24(%rdi) + popq %r12 + repz retq +#ifndef __APPLE__ +.size fe_add,.-fe_add +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_neg +.type fe_neg,@function +.align 4 +fe_neg: +#else +.section __TEXT,__text +.globl _fe_neg +.p2align 2 +_fe_neg: +#endif /* __APPLE__ */ + movq $-19, %rdx + movq $-1, %rax + movq $-1, %rcx + movq $0x7fffffffffffffff, %r8 + subq (%rsi), %rdx + sbbq 8(%rsi), %rax + sbbq 16(%rsi), %rcx + sbbq 24(%rsi), %r8 + movq %rdx, (%rdi) + movq %rax, 8(%rdi) + movq %rcx, 16(%rdi) + movq %r8, 24(%rdi) + repz retq +#ifndef __APPLE__ +.size fe_neg,.-fe_neg +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_cmov +.type fe_cmov,@function +.align 4 +fe_cmov: +#else +.section __TEXT,__text +.globl _fe_cmov +.p2align 2 +_fe_cmov: +#endif /* __APPLE__ */ + cmpl $0x01, %edx + movq (%rdi), %rcx + movq 8(%rdi), %r8 + movq 16(%rdi), %r9 + movq 24(%rdi), %r10 + cmoveq (%rsi), %rcx + cmoveq 8(%rsi), %r8 + cmoveq 16(%rsi), %r9 + cmoveq 24(%rsi), %r10 + movq %rcx, (%rdi) + movq %r8, 8(%rdi) + movq %r9, 16(%rdi) + movq %r10, 24(%rdi) + repz retq +#ifndef __APPLE__ +.size fe_cmov,.-fe_cmov +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_isnonzero +.type fe_isnonzero,@function +.align 4 +fe_isnonzero: +#else +.section __TEXT,__text +.globl _fe_isnonzero +.p2align 2 +_fe_isnonzero: +#endif /* __APPLE__ */ + movq $0x7fffffffffffffff, %r10 + movq (%rdi), %rax + movq 8(%rdi), %rdx + movq 16(%rdi), %rcx + movq 24(%rdi), %r8 + addq $19, %rax + adcq $0x00, %rdx + adcq $0x00, %rcx + adcq $0x00, %r8 + shrq $63, %r8 + imulq $19, %r8, %r9 + movq (%rdi), %rax + movq 8(%rdi), %rdx + movq 16(%rdi), %rcx + movq 24(%rdi), %r8 + addq %r9, %rax + adcq $0x00, %rdx + adcq $0x00, %rcx + adcq $0x00, %r8 + andq %r10, %r8 + orq %rdx, %rax + orq %rcx, %rax + orq %r8, %rax + repz retq +#ifndef __APPLE__ +.size fe_isnonzero,.-fe_isnonzero +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_isnegative +.type fe_isnegative,@function +.align 4 +fe_isnegative: +#else +.section __TEXT,__text +.globl _fe_isnegative +.p2align 2 +_fe_isnegative: +#endif /* __APPLE__ */ + movq $0x7fffffffffffffff, %r11 + movq (%rdi), %rdx + movq 8(%rdi), %rcx + movq 16(%rdi), %r8 + movq 24(%rdi), %r9 + movq %rdx, %rax + addq $19, %rdx + adcq $0x00, %rcx + adcq $0x00, %r8 + adcq $0x00, %r9 + shrq $63, %r9 + imulq $19, %r9, %r10 + addq %r10, %rax + andq $0x01, %rax + repz retq +#ifndef __APPLE__ +.size fe_isnegative,.-fe_isnegative +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_cmov_table +.type fe_cmov_table,@function +.align 4 +fe_cmov_table: +#else +.section __TEXT,__text +.globl _fe_cmov_table +.p2align 2 +_fe_cmov_table: +#endif /* __APPLE__ */ + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rdx, %rcx + movsbq %cl, %rax + cdq + xorb %dl, %al + subb %dl, %al + movb %al, %r15b + movq $0x01, %rax + xorq %rdx, %rdx + xorq %r8, %r8 + xorq %r9, %r9 + movq $0x01, %r10 + xorq %r11, %r11 + xorq %r12, %r12 + xorq %r13, %r13 + cmpb $0x01, %r15b + movq (%rsi), %r14 + cmoveq %r14, %rax + movq 8(%rsi), %r14 + cmoveq %r14, %rdx + movq 16(%rsi), %r14 + cmoveq %r14, %r8 + movq 24(%rsi), %r14 + cmoveq %r14, %r9 + movq 32(%rsi), %r14 + cmoveq %r14, %r10 + movq 40(%rsi), %r14 + cmoveq %r14, %r11 + movq 48(%rsi), %r14 + cmoveq %r14, %r12 + movq 56(%rsi), %r14 + cmoveq %r14, %r13 + cmpb $2, %r15b + movq 96(%rsi), %r14 + cmoveq %r14, %rax + movq 104(%rsi), %r14 + cmoveq %r14, %rdx + movq 112(%rsi), %r14 + cmoveq %r14, %r8 + movq 120(%rsi), %r14 + cmoveq %r14, %r9 + movq 128(%rsi), %r14 + cmoveq %r14, %r10 + movq 136(%rsi), %r14 + cmoveq %r14, %r11 + movq 144(%rsi), %r14 + cmoveq %r14, %r12 + movq 152(%rsi), %r14 + cmoveq %r14, %r13 + cmpb $3, %r15b + movq 192(%rsi), %r14 + cmoveq %r14, %rax + movq 200(%rsi), %r14 + cmoveq %r14, %rdx + movq 208(%rsi), %r14 + cmoveq %r14, %r8 + movq 216(%rsi), %r14 + cmoveq %r14, %r9 + movq 224(%rsi), %r14 + cmoveq %r14, %r10 + movq 232(%rsi), %r14 + cmoveq %r14, %r11 + movq 240(%rsi), %r14 + cmoveq %r14, %r12 + movq 248(%rsi), %r14 + cmoveq %r14, %r13 + cmpb $4, %r15b + movq 288(%rsi), %r14 + cmoveq %r14, %rax + movq 296(%rsi), %r14 + cmoveq %r14, %rdx + movq 304(%rsi), %r14 + cmoveq %r14, %r8 + movq 312(%rsi), %r14 + cmoveq %r14, %r9 + movq 320(%rsi), %r14 + cmoveq %r14, %r10 + movq 328(%rsi), %r14 + cmoveq %r14, %r11 + movq 336(%rsi), %r14 + cmoveq %r14, %r12 + movq 344(%rsi), %r14 + cmoveq %r14, %r13 + cmpb $5, %r15b + movq 384(%rsi), %r14 + cmoveq %r14, %rax + movq 392(%rsi), %r14 + cmoveq %r14, %rdx + movq 400(%rsi), %r14 + cmoveq %r14, %r8 + movq 408(%rsi), %r14 + cmoveq %r14, %r9 + movq 416(%rsi), %r14 + cmoveq %r14, %r10 + movq 424(%rsi), %r14 + cmoveq %r14, %r11 + movq 432(%rsi), %r14 + cmoveq %r14, %r12 + movq 440(%rsi), %r14 + cmoveq %r14, %r13 + cmpb $6, %r15b + movq 480(%rsi), %r14 + cmoveq %r14, %rax + movq 488(%rsi), %r14 + cmoveq %r14, %rdx + movq 496(%rsi), %r14 + cmoveq %r14, %r8 + movq 504(%rsi), %r14 + cmoveq %r14, %r9 + movq 512(%rsi), %r14 + cmoveq %r14, %r10 + movq 520(%rsi), %r14 + cmoveq %r14, %r11 + movq 528(%rsi), %r14 + cmoveq %r14, %r12 + movq 536(%rsi), %r14 + cmoveq %r14, %r13 + cmpb $7, %r15b + movq 576(%rsi), %r14 + cmoveq %r14, %rax + movq 584(%rsi), %r14 + cmoveq %r14, %rdx + movq 592(%rsi), %r14 + cmoveq %r14, %r8 + movq 600(%rsi), %r14 + cmoveq %r14, %r9 + movq 608(%rsi), %r14 + cmoveq %r14, %r10 + movq 616(%rsi), %r14 + cmoveq %r14, %r11 + movq 624(%rsi), %r14 + cmoveq %r14, %r12 + movq 632(%rsi), %r14 + cmoveq %r14, %r13 + cmpb $8, %r15b + movq 672(%rsi), %r14 + cmoveq %r14, %rax + movq 680(%rsi), %r14 + cmoveq %r14, %rdx + movq 688(%rsi), %r14 + cmoveq %r14, %r8 + movq 696(%rsi), %r14 + cmoveq %r14, %r9 + movq 704(%rsi), %r14 + cmoveq %r14, %r10 + movq 712(%rsi), %r14 + cmoveq %r14, %r11 + movq 720(%rsi), %r14 + cmoveq %r14, %r12 + movq 728(%rsi), %r14 + cmoveq %r14, %r13 + cmpb $0x00, %cl + movq %rax, %r14 + cmovlq %r10, %rax + cmovlq %r14, %r10 + movq %rdx, %r14 + cmovlq %r11, %rdx + cmovlq %r14, %r11 + movq %r8, %r14 + cmovlq %r12, %r8 + cmovlq %r14, %r12 + movq %r9, %r14 + cmovlq %r13, %r9 + cmovlq %r14, %r13 + movq %rax, (%rdi) + movq %rdx, 8(%rdi) + movq %r8, 16(%rdi) + movq %r9, 24(%rdi) + movq %r10, 32(%rdi) + movq %r11, 40(%rdi) + movq %r12, 48(%rdi) + movq %r13, 56(%rdi) + xorq %rax, %rax + xorq %rdx, %rdx + xorq %r8, %r8 + xorq %r9, %r9 + cmpb $0x01, %r15b + movq 64(%rsi), %r14 + cmoveq %r14, %rax + movq 72(%rsi), %r14 + cmoveq %r14, %rdx + movq 80(%rsi), %r14 + cmoveq %r14, %r8 + movq 88(%rsi), %r14 + cmoveq %r14, %r9 + cmpb $2, %r15b + movq 160(%rsi), %r14 + cmoveq %r14, %rax + movq 168(%rsi), %r14 + cmoveq %r14, %rdx + movq 176(%rsi), %r14 + cmoveq %r14, %r8 + movq 184(%rsi), %r14 + cmoveq %r14, %r9 + cmpb $3, %r15b + movq 256(%rsi), %r14 + cmoveq %r14, %rax + movq 264(%rsi), %r14 + cmoveq %r14, %rdx + movq 272(%rsi), %r14 + cmoveq %r14, %r8 + movq 280(%rsi), %r14 + cmoveq %r14, %r9 + cmpb $4, %r15b + movq 352(%rsi), %r14 + cmoveq %r14, %rax + movq 360(%rsi), %r14 + cmoveq %r14, %rdx + movq 368(%rsi), %r14 + cmoveq %r14, %r8 + movq 376(%rsi), %r14 + cmoveq %r14, %r9 + cmpb $5, %r15b + movq 448(%rsi), %r14 + cmoveq %r14, %rax + movq 456(%rsi), %r14 + cmoveq %r14, %rdx + movq 464(%rsi), %r14 + cmoveq %r14, %r8 + movq 472(%rsi), %r14 + cmoveq %r14, %r9 + cmpb $6, %r15b + movq 544(%rsi), %r14 + cmoveq %r14, %rax + movq 552(%rsi), %r14 + cmoveq %r14, %rdx + movq 560(%rsi), %r14 + cmoveq %r14, %r8 + movq 568(%rsi), %r14 + cmoveq %r14, %r9 + cmpb $7, %r15b + movq 640(%rsi), %r14 + cmoveq %r14, %rax + movq 648(%rsi), %r14 + cmoveq %r14, %rdx + movq 656(%rsi), %r14 + cmoveq %r14, %r8 + movq 664(%rsi), %r14 + cmoveq %r14, %r9 + cmpb $8, %r15b + movq 736(%rsi), %r14 + cmoveq %r14, %rax + movq 744(%rsi), %r14 + cmoveq %r14, %rdx + movq 752(%rsi), %r14 + cmoveq %r14, %r8 + movq 760(%rsi), %r14 + cmoveq %r14, %r9 + movq $-19, %r10 + movq $-1, %r11 + movq $-1, %r12 + movq $0x7fffffffffffffff, %r13 + subq %rax, %r10 + sbbq %rdx, %r11 + sbbq %r8, %r12 + sbbq %r9, %r13 + cmpb $0x00, %cl + cmovlq %r10, %rax + cmovlq %r11, %rdx + cmovlq %r12, %r8 + cmovlq %r13, %r9 + movq %rax, 64(%rdi) + movq %rdx, 72(%rdi) + movq %r8, 80(%rdi) + movq %r9, 88(%rdi) + popq %r15 + popq %r14 + popq %r13 + popq %r12 + repz retq +#ifndef __APPLE__ +.size fe_cmov_table,.-fe_cmov_table +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_mul +.type fe_mul,@function +.align 4 +fe_mul: +#else +.section __TEXT,__text +.globl _fe_mul +.p2align 2 +_fe_mul: +#endif /* __APPLE__ */ +#ifndef __APPLE__ + jmpq *fe_mul_p(%rip) +#else + jmpq *_fe_mul_p(%rip) +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.size fe_mul,.-fe_mul +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_sq +.type fe_sq,@function +.align 4 +fe_sq: +#else +.section __TEXT,__text +.globl _fe_sq +.p2align 2 +_fe_sq: +#endif /* __APPLE__ */ +#ifndef __APPLE__ + jmpq *fe_sq_p(%rip) +#else + jmpq *_fe_sq_p(%rip) +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.size fe_sq,.-fe_sq +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_mul121666 +.type fe_mul121666,@function +.align 4 +fe_mul121666: +#else +.section __TEXT,__text +.globl _fe_mul121666 +.p2align 2 +_fe_mul121666: +#endif /* __APPLE__ */ +#ifndef __APPLE__ + jmpq *fe_mul121666_p(%rip) +#else + jmpq *_fe_mul121666_p(%rip) +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.size fe_mul121666,.-fe_mul121666 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_sq2 +.type fe_sq2,@function +.align 4 +fe_sq2: +#else +.section __TEXT,__text +.globl _fe_sq2 +.p2align 2 +_fe_sq2: +#endif /* __APPLE__ */ +#ifndef __APPLE__ + jmpq *fe_sq2_p(%rip) +#else + jmpq *_fe_sq2_p(%rip) +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.size fe_sq2,.-fe_sq2 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_invert +.type fe_invert,@function +.align 4 +fe_invert: +#else +.section __TEXT,__text +.globl _fe_invert +.p2align 2 +_fe_invert: +#endif /* __APPLE__ */ +#ifndef __APPLE__ + jmpq *fe_invert_p(%rip) +#else + jmpq *_fe_invert_p(%rip) +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.size fe_invert,.-fe_invert +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl curve25519 +.type curve25519,@function +.align 4 +curve25519: +#else +.section __TEXT,__text +.globl _curve25519 +.p2align 2 +_curve25519: +#endif /* __APPLE__ */ +#ifndef __APPLE__ + jmpq *curve25519_p(%rip) +#else + jmpq *_curve25519_p(%rip) +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.size curve25519,.-curve25519 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_pow22523 +.type fe_pow22523,@function +.align 4 +fe_pow22523: +#else +.section __TEXT,__text +.globl _fe_pow22523 +.p2align 2 +_fe_pow22523: +#endif /* __APPLE__ */ +#ifndef __APPLE__ + jmpq *fe_pow22523_p(%rip) +#else + jmpq *_fe_pow22523_p(%rip) +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.size fe_pow22523,.-fe_pow22523 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_ge_to_p2 +.type fe_ge_to_p2,@function +.align 4 +fe_ge_to_p2: +#else +.section __TEXT,__text +.globl _fe_ge_to_p2 +.p2align 2 +_fe_ge_to_p2: +#endif /* __APPLE__ */ +#ifndef __APPLE__ + jmpq *fe_ge_to_p2_p(%rip) +#else + jmpq *_fe_ge_to_p2_p(%rip) +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.size fe_ge_to_p2,.-fe_ge_to_p2 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_ge_to_p3 +.type fe_ge_to_p3,@function +.align 4 +fe_ge_to_p3: +#else +.section __TEXT,__text +.globl _fe_ge_to_p3 +.p2align 2 +_fe_ge_to_p3: +#endif /* __APPLE__ */ +#ifndef __APPLE__ + jmpq *fe_ge_to_p3_p(%rip) +#else + jmpq *_fe_ge_to_p3_p(%rip) +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.size fe_ge_to_p3,.-fe_ge_to_p3 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_ge_dbl +.type fe_ge_dbl,@function +.align 4 +fe_ge_dbl: +#else +.section __TEXT,__text +.globl _fe_ge_dbl +.p2align 2 +_fe_ge_dbl: +#endif /* __APPLE__ */ +#ifndef __APPLE__ + jmpq *fe_ge_dbl_p(%rip) +#else + jmpq *_fe_ge_dbl_p(%rip) +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.size fe_ge_dbl,.-fe_ge_dbl +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_ge_madd +.type fe_ge_madd,@function +.align 4 +fe_ge_madd: +#else +.section __TEXT,__text +.globl _fe_ge_madd +.p2align 2 +_fe_ge_madd: +#endif /* __APPLE__ */ +#ifndef __APPLE__ + jmpq *fe_ge_madd_p(%rip) +#else + jmpq *_fe_ge_madd_p(%rip) +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.size fe_ge_madd,.-fe_ge_madd +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_ge_msub +.type fe_ge_msub,@function +.align 4 +fe_ge_msub: +#else +.section __TEXT,__text +.globl _fe_ge_msub +.p2align 2 +_fe_ge_msub: +#endif /* __APPLE__ */ +#ifndef __APPLE__ + jmpq *fe_ge_msub_p(%rip) +#else + jmpq *_fe_ge_msub_p(%rip) +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.size fe_ge_msub,.-fe_ge_msub +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_ge_add +.type fe_ge_add,@function +.align 4 +fe_ge_add: +#else +.section __TEXT,__text +.globl _fe_ge_add +.p2align 2 +_fe_ge_add: +#endif /* __APPLE__ */ +#ifndef __APPLE__ + jmpq *fe_ge_add_p(%rip) +#else + jmpq *_fe_ge_add_p(%rip) +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.size fe_ge_add,.-fe_ge_add +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_ge_sub +.type fe_ge_sub,@function +.align 4 +fe_ge_sub: +#else +.section __TEXT,__text +.globl _fe_ge_sub +.p2align 2 +_fe_ge_sub: +#endif /* __APPLE__ */ +#ifndef __APPLE__ + jmpq *fe_ge_sub_p(%rip) +#else + jmpq *_fe_ge_sub_p(%rip) +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.size fe_ge_sub,.-fe_ge_sub +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.data +.type cpuFlagsSet, @object +.size cpuFlagsSet,4 +cpuFlagsSet: + .long 0 +#else +.section __DATA,__data +.p2align 2 +_cpuFlagsSet: + .long 0 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.data +.type intelFlags, @object +.size intelFlags,4 +intelFlags: + .long 0 +#else +.section __DATA,__data +.p2align 2 +_intelFlags: + .long 0 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.data +.type fe_mul_p, @object +.size fe_mul_p,8 +fe_mul_p: + .quad fe_mul_x64 +#else +.section __DATA,__data +.p2align 2 +_fe_mul_p: + .quad _fe_mul_x64 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.data +.type fe_sq_p, @object +.size fe_sq_p,8 +fe_sq_p: + .quad fe_sq_x64 +#else +.section __DATA,__data +.p2align 2 +_fe_sq_p: + .quad _fe_sq_x64 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.data +.type fe_mul121666_p, @object +.size fe_mul121666_p,8 +fe_mul121666_p: + .quad fe_mul121666_x64 +#else +.section __DATA,__data +.p2align 2 +_fe_mul121666_p: + .quad _fe_mul121666_x64 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.data +.type fe_sq2_p, @object +.size fe_sq2_p,8 +fe_sq2_p: + .quad fe_sq2_x64 +#else +.section __DATA,__data +.p2align 2 +_fe_sq2_p: + .quad _fe_sq2_x64 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.data +.type fe_invert_p, @object +.size fe_invert_p,8 +fe_invert_p: + .quad fe_invert_x64 +#else +.section __DATA,__data +.p2align 2 +_fe_invert_p: + .quad _fe_invert_x64 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.data +.type curve25519_p, @object +.size curve25519_p,8 +curve25519_p: + .quad curve25519_x64 +#else +.section __DATA,__data +.p2align 2 +_curve25519_p: + .quad _curve25519_x64 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.data +.type fe_pow22523_p, @object +.size fe_pow22523_p,8 +fe_pow22523_p: + .quad fe_pow22523_x64 +#else +.section __DATA,__data +.p2align 2 +_fe_pow22523_p: + .quad _fe_pow22523_x64 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.data +.type fe_ge_to_p2_p, @object +.size fe_ge_to_p2_p,8 +fe_ge_to_p2_p: + .quad fe_ge_to_p2_x64 +#else +.section __DATA,__data +.p2align 2 +_fe_ge_to_p2_p: + .quad _fe_ge_to_p2_x64 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.data +.type fe_ge_to_p3_p, @object +.size fe_ge_to_p3_p,8 +fe_ge_to_p3_p: + .quad fe_ge_to_p3_x64 +#else +.section __DATA,__data +.p2align 2 +_fe_ge_to_p3_p: + .quad _fe_ge_to_p3_x64 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.data +.type fe_ge_dbl_p, @object +.size fe_ge_dbl_p,8 +fe_ge_dbl_p: + .quad fe_ge_dbl_x64 +#else +.section __DATA,__data +.p2align 2 +_fe_ge_dbl_p: + .quad _fe_ge_dbl_x64 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.data +.type fe_ge_madd_p, @object +.size fe_ge_madd_p,8 +fe_ge_madd_p: + .quad fe_ge_madd_x64 +#else +.section __DATA,__data +.p2align 2 +_fe_ge_madd_p: + .quad _fe_ge_madd_x64 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.data +.type fe_ge_msub_p, @object +.size fe_ge_msub_p,8 +fe_ge_msub_p: + .quad fe_ge_msub_x64 +#else +.section __DATA,__data +.p2align 2 +_fe_ge_msub_p: + .quad _fe_ge_msub_x64 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.data +.type fe_ge_add_p, @object +.size fe_ge_add_p,8 +fe_ge_add_p: + .quad fe_ge_add_x64 +#else +.section __DATA,__data +.p2align 2 +_fe_ge_add_p: + .quad _fe_ge_add_x64 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.data +.type fe_ge_sub_p, @object +.size fe_ge_sub_p,8 +fe_ge_sub_p: + .quad fe_ge_sub_x64 +#else +.section __DATA,__data +.p2align 2 +_fe_ge_sub_p: + .quad _fe_ge_sub_x64 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_mul_x64 +.type fe_mul_x64,@function +.align 4 +fe_mul_x64: +#else +.section __TEXT,__text +.globl _fe_mul_x64 +.p2align 2 +_fe_mul_x64: +#endif /* __APPLE__ */ + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq %rbx + movq %rdx, %rcx + # Multiply + # A[0] * B[0] + movq (%rcx), %rax + mulq (%rsi) + movq %rax, %r8 + movq %rdx, %r9 + # A[0] * B[1] + movq 8(%rcx), %rax + mulq (%rsi) + xorq %r10, %r10 + addq %rax, %r9 + adcq %rdx, %r10 + # A[1] * B[0] + movq (%rcx), %rax + mulq 8(%rsi) + xorq %r11, %r11 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0x00, %r11 + # A[0] * B[2] + movq 16(%rcx), %rax + mulq (%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + # A[1] * B[1] + movq 8(%rcx), %rax + mulq 8(%rsi) + xorq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[2] * B[0] + movq (%rcx), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[0] * B[3] + movq 24(%rcx), %rax + mulq (%rsi) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[2] + movq 16(%rcx), %rax + mulq 8(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[2] * B[1] + movq 8(%rcx), %rax + mulq 16(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[3] * B[0] + movq (%rcx), %rax + mulq 24(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[3] + movq 24(%rcx), %rax + mulq 8(%rsi) + xorq %r14, %r14 + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[2] + movq 16(%rcx), %rax + mulq 16(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[3] * B[1] + movq 8(%rcx), %rax + mulq 24(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[3] + movq 24(%rcx), %rax + mulq 16(%rsi) + xorq %r15, %r15 + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[2] + movq 16(%rcx), %rax + mulq 24(%rsi) + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[3] + movq 24(%rcx), %rax + mulq 24(%rsi) + addq %rax, %r14 + adcq %rdx, %r15 + # Reduce + movq $0x7fffffffffffffff, %rbx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rbx, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %r8 + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rbx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rbx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + popq %rbx + popq %r15 + popq %r14 + popq %r13 + popq %r12 + repz retq +#ifndef __APPLE__ +.size fe_mul_x64,.-fe_mul_x64 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_sq_x64 +.type fe_sq_x64,@function +.align 4 +fe_sq_x64: +#else +.section __TEXT,__text +.globl _fe_sq_x64 +.p2align 2 +_fe_sq_x64: +#endif /* __APPLE__ */ + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + # Square + # A[0] * A[1] + movq (%rsi), %rax + mulq 8(%rsi) + movq %rax, %r8 + movq %rdx, %r9 + # A[0] * A[2] + movq (%rsi), %rax + mulq 16(%rsi) + xorq %r10, %r10 + addq %rax, %r9 + adcq %rdx, %r10 + # A[0] * A[3] + movq (%rsi), %rax + mulq 24(%rsi) + xorq %r11, %r11 + addq %rax, %r10 + adcq %rdx, %r11 + # A[1] * A[2] + movq 8(%rsi), %rax + mulq 16(%rsi) + xorq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[1] * A[3] + movq 8(%rsi), %rax + mulq 24(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + # A[2] * A[3] + movq 16(%rsi), %rax + mulq 24(%rsi) + xorq %r13, %r13 + addq %rax, %r12 + adcq %rdx, %r13 + # Double + xorq %r14, %r14 + addq %r8, %r8 + adcq %r9, %r9 + adcq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + adcq %r13, %r13 + adcq $0x00, %r14 + # A[0] * A[0] + movq (%rsi), %rax + mulq %rax + movq %rax, %rcx + movq %rdx, %r15 + # A[1] * A[1] + movq 8(%rsi), %rax + mulq %rax + addq %r15, %r8 + adcq %rax, %r9 + adcq $0x00, %rdx + movq %rdx, %r15 + # A[2] * A[2] + movq 16(%rsi), %rax + mulq %rax + addq %r15, %r10 + adcq %rax, %r11 + adcq $0x00, %rdx + movq %rdx, %r15 + # A[3] * A[3] + movq 24(%rsi), %rax + mulq %rax + addq %rax, %r13 + adcq %rdx, %r14 + addq %r15, %r12 + adcq $0x00, %r13 + adcq $0x00, %r14 + # Reduce + movq $0x7fffffffffffffff, %r15 + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + shldq $0x01, %r10, %r11 + andq %r15, %r10 + # Multiply top half by 19 + movq $19, %rax + mulq %r11 + xorq %r11, %r11 + addq %rax, %rcx + movq $19, %rax + adcq %rdx, %r11 + mulq %r12 + xorq %r12, %r12 + addq %rax, %r8 + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + # Add remaining product results in + addq %r11, %r8 + adcq %r12, %r9 + adcq %r13, %r10 + adcq %rax, %r10 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r10, %rdx + imulq $19, %rdx, %rax + andq %r15, %r10 + addq %rax, %rcx + adcq $0x00, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + # Reduce if top bit set + movq %r10, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %r15, %r10 + addq %rax, %rcx + adcq $0x00, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + # Store + movq %rcx, (%rdi) + movq %r8, 8(%rdi) + movq %r9, 16(%rdi) + movq %r10, 24(%rdi) + popq %r15 + popq %r14 + popq %r13 + popq %r12 + repz retq +#ifndef __APPLE__ +.size fe_sq_x64,.-fe_sq_x64 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_sq_n_x64 +.type fe_sq_n_x64,@function +.align 4 +fe_sq_n_x64: +#else +.section __TEXT,__text +.globl _fe_sq_n_x64 +.p2align 2 +_fe_sq_n_x64: +#endif /* __APPLE__ */ + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq %rbx + movq %rdx, %rcx +L_fe_sq_n_x64: + # Square + # A[0] * A[1] + movq (%rsi), %rax + mulq 8(%rsi) + movq %rax, %r9 + movq %rdx, %r10 + # A[0] * A[2] + movq (%rsi), %rax + mulq 16(%rsi) + xorq %r11, %r11 + addq %rax, %r10 + adcq %rdx, %r11 + # A[0] * A[3] + movq (%rsi), %rax + mulq 24(%rsi) + xorq %r12, %r12 + addq %rax, %r11 + adcq %rdx, %r12 + # A[1] * A[2] + movq 8(%rsi), %rax + mulq 16(%rsi) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * A[3] + movq 8(%rsi), %rax + mulq 24(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + # A[2] * A[3] + movq 16(%rsi), %rax + mulq 24(%rsi) + xorq %r14, %r14 + addq %rax, %r13 + adcq %rdx, %r14 + # Double + xorq %r15, %r15 + addq %r9, %r9 + adcq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + adcq %r13, %r13 + adcq %r14, %r14 + adcq $0x00, %r15 + # A[0] * A[0] + movq (%rsi), %rax + mulq %rax + movq %rax, %r8 + movq %rdx, %rbx + # A[1] * A[1] + movq 8(%rsi), %rax + mulq %rax + addq %rbx, %r9 + adcq %rax, %r10 + adcq $0x00, %rdx + movq %rdx, %rbx + # A[2] * A[2] + movq 16(%rsi), %rax + mulq %rax + addq %rbx, %r11 + adcq %rax, %r12 + adcq $0x00, %rdx + movq %rdx, %rbx + # A[3] * A[3] + movq 24(%rsi), %rax + mulq %rax + addq %rax, %r14 + adcq %rdx, %r15 + addq %rbx, %r13 + adcq $0x00, %r14 + adcq $0x00, %r15 + # Reduce + movq $0x7fffffffffffffff, %rbx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rbx, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %r8 + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rbx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rbx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + decb %cl + jnz L_fe_sq_n_x64 + popq %rbx + popq %r15 + popq %r14 + popq %r13 + popq %r12 + repz retq +#ifndef __APPLE__ +.size fe_sq_n_x64,.-fe_sq_n_x64 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_mul121666_x64 +.type fe_mul121666_x64,@function +.align 4 +fe_mul121666_x64: +#else +.section __TEXT,__text +.globl _fe_mul121666_x64 +.p2align 2 +_fe_mul121666_x64: +#endif /* __APPLE__ */ + pushq %r12 + # Multiply by 121666 + movq $0x1db42, %rax + mulq (%rsi) + xorq %r10, %r10 + movq %rax, %r8 + movq %rdx, %r9 + movq $0x1db42, %rax + mulq 8(%rsi) + xorq %r11, %r11 + addq %rax, %r9 + adcq %rdx, %r10 + movq $0x1db42, %rax + mulq 16(%rsi) + xorq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + movq $0x1db42, %rax + mulq 24(%rsi) + movq $0x7fffffffffffffff, %rcx + addq %rax, %r11 + adcq %rdx, %r12 + shldq $0x01, %r11, %r12 + andq %rcx, %r11 + movq $19, %rax + mulq %r12 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + popq %r12 + repz retq +#ifndef __APPLE__ +.size fe_mul121666_x64,.-fe_mul121666_x64 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_sq2_x64 +.type fe_sq2_x64,@function +.align 4 +fe_sq2_x64: +#else +.section __TEXT,__text +.globl _fe_sq2_x64 +.p2align 2 +_fe_sq2_x64: +#endif /* __APPLE__ */ + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq %rbx + # Square * 2 + # A[0] * A[1] + movq (%rsi), %rax + mulq 8(%rsi) + movq %rax, %r8 + movq %rdx, %r9 + # A[0] * A[2] + movq (%rsi), %rax + mulq 16(%rsi) + xorq %r10, %r10 + addq %rax, %r9 + adcq %rdx, %r10 + # A[0] * A[3] + movq (%rsi), %rax + mulq 24(%rsi) + xorq %r11, %r11 + addq %rax, %r10 + adcq %rdx, %r11 + # A[1] * A[2] + movq 8(%rsi), %rax + mulq 16(%rsi) + xorq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[1] * A[3] + movq 8(%rsi), %rax + mulq 24(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + # A[2] * A[3] + movq 16(%rsi), %rax + mulq 24(%rsi) + xorq %r13, %r13 + addq %rax, %r12 + adcq %rdx, %r13 + # Double + xorq %r14, %r14 + addq %r8, %r8 + adcq %r9, %r9 + adcq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + adcq %r13, %r13 + adcq $0x00, %r14 + # A[0] * A[0] + movq (%rsi), %rax + mulq %rax + movq %rax, %rcx + movq %rdx, %r15 + # A[1] * A[1] + movq 8(%rsi), %rax + mulq %rax + addq %r15, %r8 + adcq %rax, %r9 + adcq $0x00, %rdx + movq %rdx, %r15 + # A[2] * A[2] + movq 16(%rsi), %rax + mulq %rax + addq %r15, %r10 + adcq %rax, %r11 + adcq $0x00, %rdx + movq %rdx, %r15 + # A[3] * A[3] + movq 24(%rsi), %rax + mulq %rax + addq %rax, %r13 + adcq %rdx, %r14 + addq %r15, %r12 + adcq $0x00, %r13 + adcq $0x00, %r14 + # Reduce + movq $0x7fffffffffffffff, %rbx + xorq %rax, %rax + # Move top half into t4-t7 and remove top bit from t3 + shldq $3, %r14, %rax + shldq $2, %r13, %r14 + shldq $2, %r12, %r13 + shldq $2, %r11, %r12 + shldq $2, %r10, %r11 + shldq $0x01, %r9, %r10 + shldq $0x01, %r8, %r9 + shldq $0x01, %rcx, %r8 + shlq $0x01, %rcx + andq %rbx, %r10 + # Two out left, one in right + andq %rbx, %r14 + # Multiply top bits by 19*19 + imulq $0x169, %rax, %r15 + # Multiply top half by 19 + movq $19, %rax + mulq %r11 + xorq %r11, %r11 + addq %rax, %rcx + movq $19, %rax + adcq %rdx, %r11 + mulq %r12 + xorq %r12, %r12 + addq %rax, %r8 + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + # Add remaining produce results in + addq %r15, %rcx + adcq %r11, %r8 + adcq %r12, %r9 + adcq %r13, %r10 + adcq %rax, %r10 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r10, %rdx + imulq $19, %rdx, %rax + andq %rbx, %r10 + addq %rax, %rcx + adcq $0x00, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + # Reduce if top bit set + movq %r10, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rbx, %r10 + addq %rax, %rcx + adcq $0x00, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + # Store + movq %rcx, (%rdi) + movq %r8, 8(%rdi) + movq %r9, 16(%rdi) + movq %r10, 24(%rdi) + popq %rbx + popq %r15 + popq %r14 + popq %r13 + popq %r12 + repz retq +#ifndef __APPLE__ +.size fe_sq2_x64,.-fe_sq2_x64 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_invert_x64 +.type fe_invert_x64,@function +.align 4 +fe_invert_x64: +#else +.section __TEXT,__text +.globl _fe_invert_x64 +.p2align 2 +_fe_invert_x64: +#endif /* __APPLE__ */ + subq $0x90, %rsp + # Invert + movq %rdi, 128(%rsp) + movq %rsi, 136(%rsp) + movq %rsp, %rdi + movq 136(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + movq %rsp, %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 32(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + movq 136(%rsp), %rsi + leaq 32(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_x64@plt +#else + callq _fe_mul_x64 +#endif /* __APPLE__ */ + movq %rsp, %rdi + movq %rsp, %rsi + leaq 32(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_x64@plt +#else + callq _fe_mul_x64 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + movq %rsp, %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 32(%rsp), %rsi + leaq 64(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_x64@plt +#else + callq _fe_mul_x64 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 32(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 64(%rsp), %rsi + movq $4, %rdx +#ifndef __APPLE__ + callq fe_sq_n_x64@plt +#else + callq _fe_sq_n_x64 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 64(%rsp), %rsi + leaq 32(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_x64@plt +#else + callq _fe_mul_x64 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 32(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 64(%rsp), %rsi + movq $9, %rdx +#ifndef __APPLE__ + callq fe_sq_n_x64@plt +#else + callq _fe_sq_n_x64 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 64(%rsp), %rsi + leaq 32(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_x64@plt +#else + callq _fe_mul_x64 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 64(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 96(%rsp), %rsi + movq $19, %rdx +#ifndef __APPLE__ + callq fe_sq_n_x64@plt +#else + callq _fe_sq_n_x64 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 96(%rsp), %rsi + leaq 64(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_x64@plt +#else + callq _fe_mul_x64 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 64(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 64(%rsp), %rsi + movq $9, %rdx +#ifndef __APPLE__ + callq fe_sq_n_x64@plt +#else + callq _fe_sq_n_x64 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 64(%rsp), %rsi + leaq 32(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_x64@plt +#else + callq _fe_mul_x64 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 32(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 64(%rsp), %rsi + movq $49, %rdx +#ifndef __APPLE__ + callq fe_sq_n_x64@plt +#else + callq _fe_sq_n_x64 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 64(%rsp), %rsi + leaq 32(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_x64@plt +#else + callq _fe_mul_x64 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 64(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 96(%rsp), %rsi + movq $0x63, %rdx +#ifndef __APPLE__ + callq fe_sq_n_x64@plt +#else + callq _fe_sq_n_x64 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 96(%rsp), %rsi + leaq 64(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_x64@plt +#else + callq _fe_mul_x64 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 64(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 64(%rsp), %rsi + movq $49, %rdx +#ifndef __APPLE__ + callq fe_sq_n_x64@plt +#else + callq _fe_sq_n_x64 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 64(%rsp), %rsi + leaq 32(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_x64@plt +#else + callq _fe_mul_x64 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 32(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 32(%rsp), %rsi + movq $4, %rdx +#ifndef __APPLE__ + callq fe_sq_n_x64@plt +#else + callq _fe_sq_n_x64 +#endif /* __APPLE__ */ + movq 128(%rsp), %rdi + leaq 32(%rsp), %rsi + movq %rsp, %rdx +#ifndef __APPLE__ + callq fe_mul_x64@plt +#else + callq _fe_mul_x64 +#endif /* __APPLE__ */ + movq 136(%rsp), %rsi + movq 128(%rsp), %rdi + addq $0x90, %rsp + repz retq +#ifndef __APPLE__ +.text +.globl curve25519_x64 +.type curve25519_x64,@function +.align 4 +curve25519_x64: +#else +.section __TEXT,__text +.globl _curve25519_x64 +.p2align 2 +_curve25519_x64: +#endif /* __APPLE__ */ + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq %rbx + pushq %rbp + movq %rdx, %r8 + subq $0xb8, %rsp + xorq %rbx, %rbx + movq %rdi, 176(%rsp) + # Set one + movq $0x01, (%rdi) + movq $0x00, 8(%rdi) + movq $0x00, 16(%rdi) + movq $0x00, 24(%rdi) + # Set zero + movq $0x00, (%rsp) + movq $0x00, 8(%rsp) + movq $0x00, 16(%rsp) + movq $0x00, 24(%rsp) + # Set one + movq $0x01, 32(%rsp) + movq $0x00, 40(%rsp) + movq $0x00, 48(%rsp) + movq $0x00, 56(%rsp) + # Copy + movq (%r8), %rcx + movq 8(%r8), %r9 + movq 16(%r8), %r10 + movq 24(%r8), %r11 + movq %rcx, 64(%rsp) + movq %r9, 72(%rsp) + movq %r10, 80(%rsp) + movq %r11, 88(%rsp) + movb $62, 168(%rsp) + movq $3, 160(%rsp) +L_curve25519_x64_words: +L_curve25519_x64_bits: + movq 160(%rsp), %r9 + movb 168(%rsp), %cl + movq (%rsi,%r9,8), %rbp + shrq %cl, %rbp + andq $0x01, %rbp + xorq %rbp, %rbx + negq %rbx + # Conditional Swap + movq (%rdi), %rcx + movq 8(%rdi), %r9 + movq 16(%rdi), %r10 + movq 24(%rdi), %r11 + xorq 64(%rsp), %rcx + xorq 72(%rsp), %r9 + xorq 80(%rsp), %r10 + xorq 88(%rsp), %r11 + andq %rbx, %rcx + andq %rbx, %r9 + andq %rbx, %r10 + andq %rbx, %r11 + xorq %rcx, (%rdi) + xorq %r9, 8(%rdi) + xorq %r10, 16(%rdi) + xorq %r11, 24(%rdi) + xorq %rcx, 64(%rsp) + xorq %r9, 72(%rsp) + xorq %r10, 80(%rsp) + xorq %r11, 88(%rsp) + # Conditional Swap + movq (%rsp), %rcx + movq 8(%rsp), %r9 + movq 16(%rsp), %r10 + movq 24(%rsp), %r11 + xorq 32(%rsp), %rcx + xorq 40(%rsp), %r9 + xorq 48(%rsp), %r10 + xorq 56(%rsp), %r11 + andq %rbx, %rcx + andq %rbx, %r9 + andq %rbx, %r10 + andq %rbx, %r11 + xorq %rcx, (%rsp) + xorq %r9, 8(%rsp) + xorq %r10, 16(%rsp) + xorq %r11, 24(%rsp) + xorq %rcx, 32(%rsp) + xorq %r9, 40(%rsp) + xorq %r10, 48(%rsp) + xorq %r11, 56(%rsp) + movq %rbp, %rbx + # Add + movq (%rdi), %rcx + movq 8(%rdi), %r9 + movq 16(%rdi), %r10 + movq 24(%rdi), %rbp + movq %rcx, %r12 + addq (%rsp), %rcx + movq %r9, %r13 + adcq 8(%rsp), %r9 + movq %r10, %r14 + adcq 16(%rsp), %r10 + movq %rbp, %r15 + adcq 24(%rsp), %rbp + movq $-19, %rax + movq %rbp, %r11 + movq $0x7fffffffffffffff, %rdx + sarq $63, %rbp + # Mask the modulus + andq %rbp, %rax + andq %rbp, %rdx + # Sub modulus (if overflow) + subq %rax, %rcx + sbbq %rbp, %r9 + sbbq %rbp, %r10 + sbbq %rdx, %r11 + # Sub + subq (%rsp), %r12 + movq $0x00, %rbp + sbbq 8(%rsp), %r13 + movq $-19, %rax + sbbq 16(%rsp), %r14 + movq $0x7fffffffffffffff, %rdx + sbbq 24(%rsp), %r15 + sbbq $0x00, %rbp + # Mask the modulus + andq %rbp, %rax + andq %rbp, %rdx + # Add modulus (if underflow) + addq %rax, %r12 + adcq %rbp, %r13 + adcq %rbp, %r14 + adcq %rdx, %r15 + movq %rcx, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq %r12, 128(%rsp) + movq %r13, 136(%rsp) + movq %r14, 144(%rsp) + movq %r15, 152(%rsp) + # Add + movq 64(%rsp), %rcx + movq 72(%rsp), %r9 + movq 80(%rsp), %r10 + movq 88(%rsp), %rbp + movq %rcx, %r12 + addq 32(%rsp), %rcx + movq %r9, %r13 + adcq 40(%rsp), %r9 + movq %r10, %r14 + adcq 48(%rsp), %r10 + movq %rbp, %r15 + adcq 56(%rsp), %rbp + movq $-19, %rax + movq %rbp, %r11 + movq $0x7fffffffffffffff, %rdx + sarq $63, %rbp + # Mask the modulus + andq %rbp, %rax + andq %rbp, %rdx + # Sub modulus (if overflow) + subq %rax, %rcx + sbbq %rbp, %r9 + sbbq %rbp, %r10 + sbbq %rdx, %r11 + # Sub + subq 32(%rsp), %r12 + movq $0x00, %rbp + sbbq 40(%rsp), %r13 + movq $-19, %rax + sbbq 48(%rsp), %r14 + movq $0x7fffffffffffffff, %rdx + sbbq 56(%rsp), %r15 + sbbq $0x00, %rbp + # Mask the modulus + andq %rbp, %rax + andq %rbp, %rdx + # Add modulus (if underflow) + addq %rax, %r12 + adcq %rbp, %r13 + adcq %rbp, %r14 + adcq %rdx, %r15 + movq %rcx, (%rsp) + movq %r9, 8(%rsp) + movq %r10, 16(%rsp) + movq %r11, 24(%rsp) + movq %r12, 96(%rsp) + movq %r13, 104(%rsp) + movq %r14, 112(%rsp) + movq %r15, 120(%rsp) + # Multiply + # A[0] * B[0] + movq (%rdi), %rax + mulq 96(%rsp) + movq %rax, %rcx + movq %rdx, %r9 + # A[0] * B[1] + movq 8(%rdi), %rax + mulq 96(%rsp) + xorq %r10, %r10 + addq %rax, %r9 + adcq %rdx, %r10 + # A[1] * B[0] + movq (%rdi), %rax + mulq 104(%rsp) + xorq %r11, %r11 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0x00, %r11 + # A[0] * B[2] + movq 16(%rdi), %rax + mulq 96(%rsp) + addq %rax, %r10 + adcq %rdx, %r11 + # A[1] * B[1] + movq 8(%rdi), %rax + mulq 104(%rsp) + xorq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[2] * B[0] + movq (%rdi), %rax + mulq 112(%rsp) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[0] * B[3] + movq 24(%rdi), %rax + mulq 96(%rsp) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[2] + movq 16(%rdi), %rax + mulq 104(%rsp) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[2] * B[1] + movq 8(%rdi), %rax + mulq 112(%rsp) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[3] * B[0] + movq (%rdi), %rax + mulq 120(%rsp) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[3] + movq 24(%rdi), %rax + mulq 104(%rsp) + xorq %r14, %r14 + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[2] + movq 16(%rdi), %rax + mulq 112(%rsp) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[3] * B[1] + movq 8(%rdi), %rax + mulq 120(%rsp) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[3] + movq 24(%rdi), %rax + mulq 112(%rsp) + xorq %r15, %r15 + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[2] + movq 16(%rdi), %rax + mulq 120(%rsp) + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[3] + movq 24(%rdi), %rax + mulq 120(%rsp) + addq %rax, %r14 + adcq %rdx, %r15 + # Reduce + movq $0x7fffffffffffffff, %rbp + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rbp, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %rcx + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rbp, %r11 + addq %rax, %rcx + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rbp, %r11 + addq %rax, %rcx + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %rcx, 32(%rsp) + movq %r9, 40(%rsp) + movq %r10, 48(%rsp) + movq %r11, 56(%rsp) + # Multiply + # A[0] * B[0] + movq 128(%rsp), %rax + mulq (%rsp) + movq %rax, %rcx + movq %rdx, %r9 + # A[0] * B[1] + movq 136(%rsp), %rax + mulq (%rsp) + xorq %r10, %r10 + addq %rax, %r9 + adcq %rdx, %r10 + # A[1] * B[0] + movq 128(%rsp), %rax + mulq 8(%rsp) + xorq %r11, %r11 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0x00, %r11 + # A[0] * B[2] + movq 144(%rsp), %rax + mulq (%rsp) + addq %rax, %r10 + adcq %rdx, %r11 + # A[1] * B[1] + movq 136(%rsp), %rax + mulq 8(%rsp) + xorq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[2] * B[0] + movq 128(%rsp), %rax + mulq 16(%rsp) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[0] * B[3] + movq 152(%rsp), %rax + mulq (%rsp) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[2] + movq 144(%rsp), %rax + mulq 8(%rsp) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[2] * B[1] + movq 136(%rsp), %rax + mulq 16(%rsp) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[3] * B[0] + movq 128(%rsp), %rax + mulq 24(%rsp) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[3] + movq 152(%rsp), %rax + mulq 8(%rsp) + xorq %r14, %r14 + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[2] + movq 144(%rsp), %rax + mulq 16(%rsp) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[3] * B[1] + movq 136(%rsp), %rax + mulq 24(%rsp) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[3] + movq 152(%rsp), %rax + mulq 16(%rsp) + xorq %r15, %r15 + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[2] + movq 144(%rsp), %rax + mulq 24(%rsp) + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[3] + movq 152(%rsp), %rax + mulq 24(%rsp) + addq %rax, %r14 + adcq %rdx, %r15 + # Reduce + movq $0x7fffffffffffffff, %rbp + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rbp, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %rcx + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rbp, %r11 + addq %rax, %rcx + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rbp, %r11 + addq %rax, %rcx + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %rcx, (%rsp) + movq %r9, 8(%rsp) + movq %r10, 16(%rsp) + movq %r11, 24(%rsp) + # Square + # A[0] * A[1] + movq 128(%rsp), %rax + mulq 136(%rsp) + movq %rax, %r9 + movq %rdx, %r10 + # A[0] * A[2] + movq 128(%rsp), %rax + mulq 144(%rsp) + xorq %r11, %r11 + addq %rax, %r10 + adcq %rdx, %r11 + # A[0] * A[3] + movq 128(%rsp), %rax + mulq 152(%rsp) + xorq %r12, %r12 + addq %rax, %r11 + adcq %rdx, %r12 + # A[1] * A[2] + movq 136(%rsp), %rax + mulq 144(%rsp) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * A[3] + movq 136(%rsp), %rax + mulq 152(%rsp) + addq %rax, %r12 + adcq %rdx, %r13 + # A[2] * A[3] + movq 144(%rsp), %rax + mulq 152(%rsp) + xorq %r14, %r14 + addq %rax, %r13 + adcq %rdx, %r14 + # Double + xorq %r15, %r15 + addq %r9, %r9 + adcq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + adcq %r13, %r13 + adcq %r14, %r14 + adcq $0x00, %r15 + # A[0] * A[0] + movq 128(%rsp), %rax + mulq %rax + movq %rax, %rcx + movq %rdx, %rbp + # A[1] * A[1] + movq 136(%rsp), %rax + mulq %rax + addq %rbp, %r9 + adcq %rax, %r10 + adcq $0x00, %rdx + movq %rdx, %rbp + # A[2] * A[2] + movq 144(%rsp), %rax + mulq %rax + addq %rbp, %r11 + adcq %rax, %r12 + adcq $0x00, %rdx + movq %rdx, %rbp + # A[3] * A[3] + movq 152(%rsp), %rax + mulq %rax + addq %rax, %r14 + adcq %rdx, %r15 + addq %rbp, %r13 + adcq $0x00, %r14 + adcq $0x00, %r15 + # Reduce + movq $0x7fffffffffffffff, %rbp + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rbp, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %rcx + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rbp, %r11 + addq %rax, %rcx + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rbp, %r11 + addq %rax, %rcx + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %rcx, 96(%rsp) + movq %r9, 104(%rsp) + movq %r10, 112(%rsp) + movq %r11, 120(%rsp) + # Square + # A[0] * A[1] + movq (%rdi), %rax + mulq 8(%rdi) + movq %rax, %r9 + movq %rdx, %r10 + # A[0] * A[2] + movq (%rdi), %rax + mulq 16(%rdi) + xorq %r11, %r11 + addq %rax, %r10 + adcq %rdx, %r11 + # A[0] * A[3] + movq (%rdi), %rax + mulq 24(%rdi) + xorq %r12, %r12 + addq %rax, %r11 + adcq %rdx, %r12 + # A[1] * A[2] + movq 8(%rdi), %rax + mulq 16(%rdi) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * A[3] + movq 8(%rdi), %rax + mulq 24(%rdi) + addq %rax, %r12 + adcq %rdx, %r13 + # A[2] * A[3] + movq 16(%rdi), %rax + mulq 24(%rdi) + xorq %r14, %r14 + addq %rax, %r13 + adcq %rdx, %r14 + # Double + xorq %r15, %r15 + addq %r9, %r9 + adcq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + adcq %r13, %r13 + adcq %r14, %r14 + adcq $0x00, %r15 + # A[0] * A[0] + movq (%rdi), %rax + mulq %rax + movq %rax, %rcx + movq %rdx, %rbp + # A[1] * A[1] + movq 8(%rdi), %rax + mulq %rax + addq %rbp, %r9 + adcq %rax, %r10 + adcq $0x00, %rdx + movq %rdx, %rbp + # A[2] * A[2] + movq 16(%rdi), %rax + mulq %rax + addq %rbp, %r11 + adcq %rax, %r12 + adcq $0x00, %rdx + movq %rdx, %rbp + # A[3] * A[3] + movq 24(%rdi), %rax + mulq %rax + addq %rax, %r14 + adcq %rdx, %r15 + addq %rbp, %r13 + adcq $0x00, %r14 + adcq $0x00, %r15 + # Reduce + movq $0x7fffffffffffffff, %rbp + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rbp, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %rcx + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rbp, %r11 + addq %rax, %rcx + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rbp, %r11 + addq %rax, %rcx + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %rcx, 128(%rsp) + movq %r9, 136(%rsp) + movq %r10, 144(%rsp) + movq %r11, 152(%rsp) + # Add + movq 32(%rsp), %rcx + movq 40(%rsp), %r9 + movq 48(%rsp), %r10 + movq 56(%rsp), %rbp + movq %rcx, %r12 + addq (%rsp), %rcx + movq %r9, %r13 + adcq 8(%rsp), %r9 + movq %r10, %r14 + adcq 16(%rsp), %r10 + movq %rbp, %r15 + adcq 24(%rsp), %rbp + movq $-19, %rax + movq %rbp, %r11 + movq $0x7fffffffffffffff, %rdx + sarq $63, %rbp + # Mask the modulus + andq %rbp, %rax + andq %rbp, %rdx + # Sub modulus (if overflow) + subq %rax, %rcx + sbbq %rbp, %r9 + sbbq %rbp, %r10 + sbbq %rdx, %r11 + # Sub + subq (%rsp), %r12 + movq $0x00, %rbp + sbbq 8(%rsp), %r13 + movq $-19, %rax + sbbq 16(%rsp), %r14 + movq $0x7fffffffffffffff, %rdx + sbbq 24(%rsp), %r15 + sbbq $0x00, %rbp + # Mask the modulus + andq %rbp, %rax + andq %rbp, %rdx + # Add modulus (if underflow) + addq %rax, %r12 + adcq %rbp, %r13 + adcq %rbp, %r14 + adcq %rdx, %r15 + movq %rcx, 64(%rsp) + movq %r9, 72(%rsp) + movq %r10, 80(%rsp) + movq %r11, 88(%rsp) + movq %r12, (%rsp) + movq %r13, 8(%rsp) + movq %r14, 16(%rsp) + movq %r15, 24(%rsp) + # Multiply + # A[0] * B[0] + movq 96(%rsp), %rax + mulq 128(%rsp) + movq %rax, %rcx + movq %rdx, %r9 + # A[0] * B[1] + movq 104(%rsp), %rax + mulq 128(%rsp) + xorq %r10, %r10 + addq %rax, %r9 + adcq %rdx, %r10 + # A[1] * B[0] + movq 96(%rsp), %rax + mulq 136(%rsp) + xorq %r11, %r11 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0x00, %r11 + # A[0] * B[2] + movq 112(%rsp), %rax + mulq 128(%rsp) + addq %rax, %r10 + adcq %rdx, %r11 + # A[1] * B[1] + movq 104(%rsp), %rax + mulq 136(%rsp) + xorq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[2] * B[0] + movq 96(%rsp), %rax + mulq 144(%rsp) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[0] * B[3] + movq 120(%rsp), %rax + mulq 128(%rsp) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[2] + movq 112(%rsp), %rax + mulq 136(%rsp) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[2] * B[1] + movq 104(%rsp), %rax + mulq 144(%rsp) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[3] * B[0] + movq 96(%rsp), %rax + mulq 152(%rsp) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[3] + movq 120(%rsp), %rax + mulq 136(%rsp) + xorq %r14, %r14 + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[2] + movq 112(%rsp), %rax + mulq 144(%rsp) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[3] * B[1] + movq 104(%rsp), %rax + mulq 152(%rsp) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[3] + movq 120(%rsp), %rax + mulq 144(%rsp) + xorq %r15, %r15 + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[2] + movq 112(%rsp), %rax + mulq 152(%rsp) + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[3] + movq 120(%rsp), %rax + mulq 152(%rsp) + addq %rax, %r14 + adcq %rdx, %r15 + # Reduce + movq $0x7fffffffffffffff, %rbp + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rbp, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %rcx + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rbp, %r11 + addq %rax, %rcx + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rbp, %r11 + addq %rax, %rcx + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %rcx, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + # Sub + movq 128(%rsp), %rcx + movq 136(%rsp), %r9 + movq 144(%rsp), %r10 + movq 152(%rsp), %r11 + subq 96(%rsp), %rcx + movq $0x00, %rbp + sbbq 104(%rsp), %r9 + movq $-19, %rax + sbbq 112(%rsp), %r10 + movq $0x7fffffffffffffff, %rdx + sbbq 120(%rsp), %r11 + sbbq $0x00, %rbp + # Mask the modulus + andq %rbp, %rax + andq %rbp, %rdx + # Add modulus (if underflow) + addq %rax, %rcx + adcq %rbp, %r9 + adcq %rbp, %r10 + adcq %rdx, %r11 + movq %rcx, 128(%rsp) + movq %r9, 136(%rsp) + movq %r10, 144(%rsp) + movq %r11, 152(%rsp) + # Square + # A[0] * A[1] + movq (%rsp), %rax + mulq 8(%rsp) + movq %rax, %r9 + movq %rdx, %r10 + # A[0] * A[2] + movq (%rsp), %rax + mulq 16(%rsp) + xorq %r11, %r11 + addq %rax, %r10 + adcq %rdx, %r11 + # A[0] * A[3] + movq (%rsp), %rax + mulq 24(%rsp) + xorq %r12, %r12 + addq %rax, %r11 + adcq %rdx, %r12 + # A[1] * A[2] + movq 8(%rsp), %rax + mulq 16(%rsp) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * A[3] + movq 8(%rsp), %rax + mulq 24(%rsp) + addq %rax, %r12 + adcq %rdx, %r13 + # A[2] * A[3] + movq 16(%rsp), %rax + mulq 24(%rsp) + xorq %r14, %r14 + addq %rax, %r13 + adcq %rdx, %r14 + # Double + xorq %r15, %r15 + addq %r9, %r9 + adcq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + adcq %r13, %r13 + adcq %r14, %r14 + adcq $0x00, %r15 + # A[0] * A[0] + movq (%rsp), %rax + mulq %rax + movq %rax, %rcx + movq %rdx, %rbp + # A[1] * A[1] + movq 8(%rsp), %rax + mulq %rax + addq %rbp, %r9 + adcq %rax, %r10 + adcq $0x00, %rdx + movq %rdx, %rbp + # A[2] * A[2] + movq 16(%rsp), %rax + mulq %rax + addq %rbp, %r11 + adcq %rax, %r12 + adcq $0x00, %rdx + movq %rdx, %rbp + # A[3] * A[3] + movq 24(%rsp), %rax + mulq %rax + addq %rax, %r14 + adcq %rdx, %r15 + addq %rbp, %r13 + adcq $0x00, %r14 + adcq $0x00, %r15 + # Reduce + movq $0x7fffffffffffffff, %rbp + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rbp, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %rcx + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rbp, %r11 + addq %rax, %rcx + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rbp, %r11 + addq %rax, %rcx + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %rcx, (%rsp) + movq %r9, 8(%rsp) + movq %r10, 16(%rsp) + movq %r11, 24(%rsp) + # Multiply by 121666 + movq $0x1db42, %rax + mulq 128(%rsp) + xorq %r10, %r10 + movq %rax, %rcx + movq %rdx, %r9 + movq $0x1db42, %rax + mulq 136(%rsp) + xorq %r11, %r11 + addq %rax, %r9 + adcq %rdx, %r10 + movq $0x1db42, %rax + mulq 144(%rsp) + xorq %r13, %r13 + addq %rax, %r10 + adcq %rdx, %r11 + movq $0x1db42, %rax + mulq 152(%rsp) + movq $0x7fffffffffffffff, %r12 + addq %rax, %r11 + adcq %rdx, %r13 + shldq $0x01, %r11, %r13 + andq %r12, %r11 + movq $19, %rax + mulq %r13 + addq %rax, %rcx + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + movq %rcx, 32(%rsp) + movq %r9, 40(%rsp) + movq %r10, 48(%rsp) + movq %r11, 56(%rsp) + # Square + # A[0] * A[1] + movq 64(%rsp), %rax + mulq 72(%rsp) + movq %rax, %r9 + movq %rdx, %r10 + # A[0] * A[2] + movq 64(%rsp), %rax + mulq 80(%rsp) + xorq %r11, %r11 + addq %rax, %r10 + adcq %rdx, %r11 + # A[0] * A[3] + movq 64(%rsp), %rax + mulq 88(%rsp) + xorq %r12, %r12 + addq %rax, %r11 + adcq %rdx, %r12 + # A[1] * A[2] + movq 72(%rsp), %rax + mulq 80(%rsp) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * A[3] + movq 72(%rsp), %rax + mulq 88(%rsp) + addq %rax, %r12 + adcq %rdx, %r13 + # A[2] * A[3] + movq 80(%rsp), %rax + mulq 88(%rsp) + xorq %r14, %r14 + addq %rax, %r13 + adcq %rdx, %r14 + # Double + xorq %r15, %r15 + addq %r9, %r9 + adcq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + adcq %r13, %r13 + adcq %r14, %r14 + adcq $0x00, %r15 + # A[0] * A[0] + movq 64(%rsp), %rax + mulq %rax + movq %rax, %rcx + movq %rdx, %rbp + # A[1] * A[1] + movq 72(%rsp), %rax + mulq %rax + addq %rbp, %r9 + adcq %rax, %r10 + adcq $0x00, %rdx + movq %rdx, %rbp + # A[2] * A[2] + movq 80(%rsp), %rax + mulq %rax + addq %rbp, %r11 + adcq %rax, %r12 + adcq $0x00, %rdx + movq %rdx, %rbp + # A[3] * A[3] + movq 88(%rsp), %rax + mulq %rax + addq %rax, %r14 + adcq %rdx, %r15 + addq %rbp, %r13 + adcq $0x00, %r14 + adcq $0x00, %r15 + # Reduce + movq $0x7fffffffffffffff, %rbp + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rbp, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %rcx + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rbp, %r11 + addq %rax, %rcx + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rbp, %r11 + addq %rax, %rcx + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %rcx, 64(%rsp) + movq %r9, 72(%rsp) + movq %r10, 80(%rsp) + movq %r11, 88(%rsp) + # Add + movq 96(%rsp), %rcx + movq 104(%rsp), %r9 + addq 32(%rsp), %rcx + movq 112(%rsp), %r10 + adcq 40(%rsp), %r9 + movq 120(%rsp), %rbp + adcq 48(%rsp), %r10 + movq $-19, %rax + adcq 56(%rsp), %rbp + movq $0x7fffffffffffffff, %rdx + movq %rbp, %r11 + sarq $63, %rbp + # Mask the modulus + andq %rbp, %rax + andq %rbp, %rdx + # Sub modulus (if overflow) + subq %rax, %rcx + sbbq %rbp, %r9 + sbbq %rbp, %r10 + sbbq %rdx, %r11 + movq %rcx, 96(%rsp) + movq %r9, 104(%rsp) + movq %r10, 112(%rsp) + movq %r11, 120(%rsp) + # Multiply + # A[0] * B[0] + movq (%rsp), %rax + mulq (%r8) + movq %rax, %rcx + movq %rdx, %r9 + # A[0] * B[1] + movq 8(%rsp), %rax + mulq (%r8) + xorq %r10, %r10 + addq %rax, %r9 + adcq %rdx, %r10 + # A[1] * B[0] + movq (%rsp), %rax + mulq 8(%r8) + xorq %r11, %r11 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0x00, %r11 + # A[0] * B[2] + movq 16(%rsp), %rax + mulq (%r8) + addq %rax, %r10 + adcq %rdx, %r11 + # A[1] * B[1] + movq 8(%rsp), %rax + mulq 8(%r8) + xorq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[2] * B[0] + movq (%rsp), %rax + mulq 16(%r8) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[0] * B[3] + movq 24(%rsp), %rax + mulq (%r8) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[2] + movq 16(%rsp), %rax + mulq 8(%r8) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[2] * B[1] + movq 8(%rsp), %rax + mulq 16(%r8) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[3] * B[0] + movq (%rsp), %rax + mulq 24(%r8) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[3] + movq 24(%rsp), %rax + mulq 8(%r8) + xorq %r14, %r14 + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[2] + movq 16(%rsp), %rax + mulq 16(%r8) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[3] * B[1] + movq 8(%rsp), %rax + mulq 24(%r8) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[3] + movq 24(%rsp), %rax + mulq 16(%r8) + xorq %r15, %r15 + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[2] + movq 16(%rsp), %rax + mulq 24(%r8) + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[3] + movq 24(%rsp), %rax + mulq 24(%r8) + addq %rax, %r14 + adcq %rdx, %r15 + # Reduce + movq $0x7fffffffffffffff, %rbp + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rbp, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %rcx + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rbp, %r11 + addq %rax, %rcx + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rbp, %r11 + addq %rax, %rcx + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %rcx, 32(%rsp) + movq %r9, 40(%rsp) + movq %r10, 48(%rsp) + movq %r11, 56(%rsp) + # Multiply + # A[0] * B[0] + movq 96(%rsp), %rax + mulq 128(%rsp) + movq %rax, %rcx + movq %rdx, %r9 + # A[0] * B[1] + movq 104(%rsp), %rax + mulq 128(%rsp) + xorq %r10, %r10 + addq %rax, %r9 + adcq %rdx, %r10 + # A[1] * B[0] + movq 96(%rsp), %rax + mulq 136(%rsp) + xorq %r11, %r11 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0x00, %r11 + # A[0] * B[2] + movq 112(%rsp), %rax + mulq 128(%rsp) + addq %rax, %r10 + adcq %rdx, %r11 + # A[1] * B[1] + movq 104(%rsp), %rax + mulq 136(%rsp) + xorq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[2] * B[0] + movq 96(%rsp), %rax + mulq 144(%rsp) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[0] * B[3] + movq 120(%rsp), %rax + mulq 128(%rsp) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[2] + movq 112(%rsp), %rax + mulq 136(%rsp) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[2] * B[1] + movq 104(%rsp), %rax + mulq 144(%rsp) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[3] * B[0] + movq 96(%rsp), %rax + mulq 152(%rsp) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[3] + movq 120(%rsp), %rax + mulq 136(%rsp) + xorq %r14, %r14 + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[2] + movq 112(%rsp), %rax + mulq 144(%rsp) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[3] * B[1] + movq 104(%rsp), %rax + mulq 152(%rsp) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[3] + movq 120(%rsp), %rax + mulq 144(%rsp) + xorq %r15, %r15 + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[2] + movq 112(%rsp), %rax + mulq 152(%rsp) + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[3] + movq 120(%rsp), %rax + mulq 152(%rsp) + addq %rax, %r14 + adcq %rdx, %r15 + # Reduce + movq $0x7fffffffffffffff, %rbp + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rbp, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %rcx + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rbp, %r11 + addq %rax, %rcx + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rbp, %r11 + addq %rax, %rcx + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %rcx, (%rsp) + movq %r9, 8(%rsp) + movq %r10, 16(%rsp) + movq %r11, 24(%rsp) + decb 168(%rsp) + jge L_curve25519_x64_bits + movq $63, 168(%rsp) + decb 160(%rsp) + jge L_curve25519_x64_words + # Invert + leaq 32(%rsp), %rdi + movq %rsp, %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 32(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 64(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + movq %rsp, %rsi + leaq 64(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_x64@plt +#else + callq _fe_mul_x64 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 32(%rsp), %rsi + leaq 64(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_x64@plt +#else + callq _fe_mul_x64 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 32(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 64(%rsp), %rsi + leaq 96(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_x64@plt +#else + callq _fe_mul_x64 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 64(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 96(%rsp), %rsi + movq $4, %rdx +#ifndef __APPLE__ + callq fe_sq_n_x64@plt +#else + callq _fe_sq_n_x64 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 96(%rsp), %rsi + leaq 64(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_x64@plt +#else + callq _fe_mul_x64 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 64(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 96(%rsp), %rsi + movq $9, %rdx +#ifndef __APPLE__ + callq fe_sq_n_x64@plt +#else + callq _fe_sq_n_x64 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 96(%rsp), %rsi + leaq 64(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_x64@plt +#else + callq _fe_mul_x64 +#endif /* __APPLE__ */ + leaq 128(%rsp), %rdi + leaq 96(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + leaq 128(%rsp), %rdi + leaq 128(%rsp), %rsi + movq $19, %rdx +#ifndef __APPLE__ + callq fe_sq_n_x64@plt +#else + callq _fe_sq_n_x64 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 128(%rsp), %rsi + leaq 96(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_x64@plt +#else + callq _fe_mul_x64 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 96(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 96(%rsp), %rsi + movq $9, %rdx +#ifndef __APPLE__ + callq fe_sq_n_x64@plt +#else + callq _fe_sq_n_x64 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 96(%rsp), %rsi + leaq 64(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_x64@plt +#else + callq _fe_mul_x64 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 64(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 96(%rsp), %rsi + movq $49, %rdx +#ifndef __APPLE__ + callq fe_sq_n_x64@plt +#else + callq _fe_sq_n_x64 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 96(%rsp), %rsi + leaq 64(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_x64@plt +#else + callq _fe_mul_x64 +#endif /* __APPLE__ */ + leaq 128(%rsp), %rdi + leaq 96(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + leaq 128(%rsp), %rdi + leaq 128(%rsp), %rsi + movq $0x63, %rdx +#ifndef __APPLE__ + callq fe_sq_n_x64@plt +#else + callq _fe_sq_n_x64 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 128(%rsp), %rsi + leaq 96(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_x64@plt +#else + callq _fe_mul_x64 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 96(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 96(%rsp), %rsi + movq $49, %rdx +#ifndef __APPLE__ + callq fe_sq_n_x64@plt +#else + callq _fe_sq_n_x64 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 96(%rsp), %rsi + leaq 64(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_x64@plt +#else + callq _fe_mul_x64 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 64(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 64(%rsp), %rsi + movq $4, %rdx +#ifndef __APPLE__ + callq fe_sq_n_x64@plt +#else + callq _fe_sq_n_x64 +#endif /* __APPLE__ */ + movq %rsp, %rdi + leaq 64(%rsp), %rsi + leaq 32(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_x64@plt +#else + callq _fe_mul_x64 +#endif /* __APPLE__ */ + movq 176(%rsp), %rdi + # Multiply + # A[0] * B[0] + movq (%rsp), %rax + mulq (%rdi) + movq %rax, %rcx + movq %rdx, %r9 + # A[0] * B[1] + movq 8(%rsp), %rax + mulq (%rdi) + xorq %r10, %r10 + addq %rax, %r9 + adcq %rdx, %r10 + # A[1] * B[0] + movq (%rsp), %rax + mulq 8(%rdi) + xorq %r11, %r11 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0x00, %r11 + # A[0] * B[2] + movq 16(%rsp), %rax + mulq (%rdi) + addq %rax, %r10 + adcq %rdx, %r11 + # A[1] * B[1] + movq 8(%rsp), %rax + mulq 8(%rdi) + xorq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[2] * B[0] + movq (%rsp), %rax + mulq 16(%rdi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[0] * B[3] + movq 24(%rsp), %rax + mulq (%rdi) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[2] + movq 16(%rsp), %rax + mulq 8(%rdi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[2] * B[1] + movq 8(%rsp), %rax + mulq 16(%rdi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[3] * B[0] + movq (%rsp), %rax + mulq 24(%rdi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[3] + movq 24(%rsp), %rax + mulq 8(%rdi) + xorq %r14, %r14 + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[2] + movq 16(%rsp), %rax + mulq 16(%rdi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[3] * B[1] + movq 8(%rsp), %rax + mulq 24(%rdi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[3] + movq 24(%rsp), %rax + mulq 16(%rdi) + xorq %r15, %r15 + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[2] + movq 16(%rsp), %rax + mulq 24(%rdi) + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[3] + movq 24(%rsp), %rax + mulq 24(%rdi) + addq %rax, %r14 + adcq %rdx, %r15 + # Reduce + movq $0x7fffffffffffffff, %rbp + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rbp, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %rcx + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rbp, %r11 + addq %rax, %rcx + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rbp, %r11 + addq %rax, %rcx + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %rcx, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + xorq %rax, %rax + addq $0xb8, %rsp + popq %rbp + popq %rbx + popq %r15 + popq %r14 + popq %r13 + popq %r12 + repz retq +#ifndef __APPLE__ +.size curve25519_x64,.-curve25519_x64 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_pow22523_x64 +.type fe_pow22523_x64,@function +.align 4 +fe_pow22523_x64: +#else +.section __TEXT,__text +.globl _fe_pow22523_x64 +.p2align 2 +_fe_pow22523_x64: +#endif /* __APPLE__ */ + subq $0x70, %rsp + # pow22523 + movq %rdi, 96(%rsp) + movq %rsi, 104(%rsp) + movq %rsp, %rdi + movq 104(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + movq %rsp, %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 32(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + movq 104(%rsp), %rsi + leaq 32(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_x64@plt +#else + callq _fe_mul_x64 +#endif /* __APPLE__ */ + movq %rsp, %rdi + movq %rsp, %rsi + leaq 32(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_x64@plt +#else + callq _fe_mul_x64 +#endif /* __APPLE__ */ + movq %rsp, %rdi + movq %rsp, %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + movq %rsp, %rdi + leaq 32(%rsp), %rsi + movq %rsp, %rdx +#ifndef __APPLE__ + callq fe_mul_x64@plt +#else + callq _fe_mul_x64 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + movq %rsp, %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 32(%rsp), %rsi + movq $4, %rdx +#ifndef __APPLE__ + callq fe_sq_n_x64@plt +#else + callq _fe_sq_n_x64 +#endif /* __APPLE__ */ + movq %rsp, %rdi + leaq 32(%rsp), %rsi + movq %rsp, %rdx +#ifndef __APPLE__ + callq fe_mul_x64@plt +#else + callq _fe_mul_x64 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + movq %rsp, %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 32(%rsp), %rsi + movq $9, %rdx +#ifndef __APPLE__ + callq fe_sq_n_x64@plt +#else + callq _fe_sq_n_x64 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 32(%rsp), %rsi + movq %rsp, %rdx +#ifndef __APPLE__ + callq fe_mul_x64@plt +#else + callq _fe_mul_x64 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 32(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 64(%rsp), %rsi + movq $19, %rdx +#ifndef __APPLE__ + callq fe_sq_n_x64@plt +#else + callq _fe_sq_n_x64 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 64(%rsp), %rsi + leaq 32(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_x64@plt +#else + callq _fe_mul_x64 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 32(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 32(%rsp), %rsi + movq $9, %rdx +#ifndef __APPLE__ + callq fe_sq_n_x64@plt +#else + callq _fe_sq_n_x64 +#endif /* __APPLE__ */ + movq %rsp, %rdi + leaq 32(%rsp), %rsi + movq %rsp, %rdx +#ifndef __APPLE__ + callq fe_mul_x64@plt +#else + callq _fe_mul_x64 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + movq %rsp, %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 32(%rsp), %rsi + movq $49, %rdx +#ifndef __APPLE__ + callq fe_sq_n_x64@plt +#else + callq _fe_sq_n_x64 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 32(%rsp), %rsi + movq %rsp, %rdx +#ifndef __APPLE__ + callq fe_mul_x64@plt +#else + callq _fe_mul_x64 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 32(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 64(%rsp), %rsi + movq $0x63, %rdx +#ifndef __APPLE__ + callq fe_sq_n_x64@plt +#else + callq _fe_sq_n_x64 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 64(%rsp), %rsi + leaq 32(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_x64@plt +#else + callq _fe_mul_x64 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 32(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 32(%rsp), %rsi + movq $49, %rdx +#ifndef __APPLE__ + callq fe_sq_n_x64@plt +#else + callq _fe_sq_n_x64 +#endif /* __APPLE__ */ + movq %rsp, %rdi + leaq 32(%rsp), %rsi + movq %rsp, %rdx +#ifndef __APPLE__ + callq fe_mul_x64@plt +#else + callq _fe_mul_x64 +#endif /* __APPLE__ */ + movq %rsp, %rdi + movq %rsp, %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + movq %rsp, %rdi + movq %rsp, %rsi +#ifndef __APPLE__ + callq fe_sq_x64@plt +#else + callq _fe_sq_x64 +#endif /* __APPLE__ */ + movq 96(%rsp), %rdi + movq %rsp, %rsi + movq 104(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_x64@plt +#else + callq _fe_mul_x64 +#endif /* __APPLE__ */ + movq 104(%rsp), %rsi + movq 96(%rsp), %rdi + addq $0x70, %rsp + repz retq +#ifndef __APPLE__ +.text +.globl fe_ge_to_p2_x64 +.type fe_ge_to_p2_x64,@function +.align 4 +fe_ge_to_p2_x64: +#else +.section __TEXT,__text +.globl _fe_ge_to_p2_x64 +.p2align 2 +_fe_ge_to_p2_x64: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $40, %rsp + movq %rsi, (%rsp) + movq %rdx, 8(%rsp) + movq %rcx, 16(%rsp) + movq %r8, 24(%rsp) + movq %r9, 32(%rsp) + movq 16(%rsp), %rsi + movq 88(%rsp), %rbx + # Multiply + # A[0] * B[0] + movq (%rbx), %rax + mulq (%rsi) + movq %rax, %r8 + movq %rdx, %r9 + # A[0] * B[1] + movq 8(%rbx), %rax + mulq (%rsi) + xorq %r10, %r10 + addq %rax, %r9 + adcq %rdx, %r10 + # A[1] * B[0] + movq (%rbx), %rax + mulq 8(%rsi) + xorq %r11, %r11 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0x00, %r11 + # A[0] * B[2] + movq 16(%rbx), %rax + mulq (%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + # A[1] * B[1] + movq 8(%rbx), %rax + mulq 8(%rsi) + xorq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[2] * B[0] + movq (%rbx), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[0] * B[3] + movq 24(%rbx), %rax + mulq (%rsi) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[2] + movq 16(%rbx), %rax + mulq 8(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[2] * B[1] + movq 8(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[3] * B[0] + movq (%rbx), %rax + mulq 24(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[3] + movq 24(%rbx), %rax + mulq 8(%rsi) + xorq %r14, %r14 + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[2] + movq 16(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[3] * B[1] + movq 8(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[3] + movq 24(%rbx), %rax + mulq 16(%rsi) + xorq %r15, %r15 + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[2] + movq 16(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[3] + movq 24(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r14 + adcq %rdx, %r15 + # Reduce + movq $0x7fffffffffffffff, %rcx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rcx, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %r8 + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq (%rsp), %rdi + movq 24(%rsp), %rsi + movq 32(%rsp), %rbx + # Multiply + # A[0] * B[0] + movq (%rbx), %rax + mulq (%rsi) + movq %rax, %r8 + movq %rdx, %r9 + # A[0] * B[1] + movq 8(%rbx), %rax + mulq (%rsi) + xorq %r10, %r10 + addq %rax, %r9 + adcq %rdx, %r10 + # A[1] * B[0] + movq (%rbx), %rax + mulq 8(%rsi) + xorq %r11, %r11 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0x00, %r11 + # A[0] * B[2] + movq 16(%rbx), %rax + mulq (%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + # A[1] * B[1] + movq 8(%rbx), %rax + mulq 8(%rsi) + xorq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[2] * B[0] + movq (%rbx), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[0] * B[3] + movq 24(%rbx), %rax + mulq (%rsi) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[2] + movq 16(%rbx), %rax + mulq 8(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[2] * B[1] + movq 8(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[3] * B[0] + movq (%rbx), %rax + mulq 24(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[3] + movq 24(%rbx), %rax + mulq 8(%rsi) + xorq %r14, %r14 + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[2] + movq 16(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[3] * B[1] + movq 8(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[3] + movq 24(%rbx), %rax + mulq 16(%rsi) + xorq %r15, %r15 + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[2] + movq 16(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[3] + movq 24(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r14 + adcq %rdx, %r15 + # Reduce + movq $0x7fffffffffffffff, %rcx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rcx, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %r8 + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 8(%rsp), %rdi + movq 32(%rsp), %rsi + movq 88(%rsp), %rbx + # Multiply + # A[0] * B[0] + movq (%rbx), %rax + mulq (%rsi) + movq %rax, %r8 + movq %rdx, %r9 + # A[0] * B[1] + movq 8(%rbx), %rax + mulq (%rsi) + xorq %r10, %r10 + addq %rax, %r9 + adcq %rdx, %r10 + # A[1] * B[0] + movq (%rbx), %rax + mulq 8(%rsi) + xorq %r11, %r11 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0x00, %r11 + # A[0] * B[2] + movq 16(%rbx), %rax + mulq (%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + # A[1] * B[1] + movq 8(%rbx), %rax + mulq 8(%rsi) + xorq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[2] * B[0] + movq (%rbx), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[0] * B[3] + movq 24(%rbx), %rax + mulq (%rsi) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[2] + movq 16(%rbx), %rax + mulq 8(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[2] * B[1] + movq 8(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[3] * B[0] + movq (%rbx), %rax + mulq 24(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[3] + movq 24(%rbx), %rax + mulq 8(%rsi) + xorq %r14, %r14 + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[2] + movq 16(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[3] * B[1] + movq 8(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[3] + movq 24(%rbx), %rax + mulq 16(%rsi) + xorq %r15, %r15 + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[2] + movq 16(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[3] + movq 24(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r14 + adcq %rdx, %r15 + # Reduce + movq $0x7fffffffffffffff, %rcx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rcx, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %r8 + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + addq $40, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size fe_ge_to_p2_x64,.-fe_ge_to_p2_x64 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_ge_to_p3_x64 +.type fe_ge_to_p3_x64,@function +.align 4 +fe_ge_to_p3_x64: +#else +.section __TEXT,__text +.globl _fe_ge_to_p3_x64 +.p2align 2 +_fe_ge_to_p3_x64: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $40, %rsp + movq %rsi, (%rsp) + movq %rdx, 8(%rsp) + movq %rcx, 16(%rsp) + movq %r8, 24(%rsp) + movq %r9, 32(%rsp) + movq 24(%rsp), %rsi + movq 96(%rsp), %rbx + # Multiply + # A[0] * B[0] + movq (%rbx), %rax + mulq (%rsi) + movq %rax, %r8 + movq %rdx, %r9 + # A[0] * B[1] + movq 8(%rbx), %rax + mulq (%rsi) + xorq %r10, %r10 + addq %rax, %r9 + adcq %rdx, %r10 + # A[1] * B[0] + movq (%rbx), %rax + mulq 8(%rsi) + xorq %r11, %r11 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0x00, %r11 + # A[0] * B[2] + movq 16(%rbx), %rax + mulq (%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + # A[1] * B[1] + movq 8(%rbx), %rax + mulq 8(%rsi) + xorq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[2] * B[0] + movq (%rbx), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[0] * B[3] + movq 24(%rbx), %rax + mulq (%rsi) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[2] + movq 16(%rbx), %rax + mulq 8(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[2] * B[1] + movq 8(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[3] * B[0] + movq (%rbx), %rax + mulq 24(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[3] + movq 24(%rbx), %rax + mulq 8(%rsi) + xorq %r14, %r14 + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[2] + movq 16(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[3] * B[1] + movq 8(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[3] + movq 24(%rbx), %rax + mulq 16(%rsi) + xorq %r15, %r15 + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[2] + movq 16(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[3] + movq 24(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r14 + adcq %rdx, %r15 + # Reduce + movq $0x7fffffffffffffff, %rcx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rcx, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %r8 + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq (%rsp), %rdi + movq 32(%rsp), %rsi + movq 88(%rsp), %rbx + # Multiply + # A[0] * B[0] + movq (%rbx), %rax + mulq (%rsi) + movq %rax, %r8 + movq %rdx, %r9 + # A[0] * B[1] + movq 8(%rbx), %rax + mulq (%rsi) + xorq %r10, %r10 + addq %rax, %r9 + adcq %rdx, %r10 + # A[1] * B[0] + movq (%rbx), %rax + mulq 8(%rsi) + xorq %r11, %r11 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0x00, %r11 + # A[0] * B[2] + movq 16(%rbx), %rax + mulq (%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + # A[1] * B[1] + movq 8(%rbx), %rax + mulq 8(%rsi) + xorq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[2] * B[0] + movq (%rbx), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[0] * B[3] + movq 24(%rbx), %rax + mulq (%rsi) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[2] + movq 16(%rbx), %rax + mulq 8(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[2] * B[1] + movq 8(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[3] * B[0] + movq (%rbx), %rax + mulq 24(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[3] + movq 24(%rbx), %rax + mulq 8(%rsi) + xorq %r14, %r14 + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[2] + movq 16(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[3] * B[1] + movq 8(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[3] + movq 24(%rbx), %rax + mulq 16(%rsi) + xorq %r15, %r15 + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[2] + movq 16(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[3] + movq 24(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r14 + adcq %rdx, %r15 + # Reduce + movq $0x7fffffffffffffff, %rcx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rcx, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %r8 + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 8(%rsp), %rdi + movq 88(%rsp), %rsi + movq 96(%rsp), %rbx + # Multiply + # A[0] * B[0] + movq (%rbx), %rax + mulq (%rsi) + movq %rax, %r8 + movq %rdx, %r9 + # A[0] * B[1] + movq 8(%rbx), %rax + mulq (%rsi) + xorq %r10, %r10 + addq %rax, %r9 + adcq %rdx, %r10 + # A[1] * B[0] + movq (%rbx), %rax + mulq 8(%rsi) + xorq %r11, %r11 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0x00, %r11 + # A[0] * B[2] + movq 16(%rbx), %rax + mulq (%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + # A[1] * B[1] + movq 8(%rbx), %rax + mulq 8(%rsi) + xorq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[2] * B[0] + movq (%rbx), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[0] * B[3] + movq 24(%rbx), %rax + mulq (%rsi) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[2] + movq 16(%rbx), %rax + mulq 8(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[2] * B[1] + movq 8(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[3] * B[0] + movq (%rbx), %rax + mulq 24(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[3] + movq 24(%rbx), %rax + mulq 8(%rsi) + xorq %r14, %r14 + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[2] + movq 16(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[3] * B[1] + movq 8(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[3] + movq 24(%rbx), %rax + mulq 16(%rsi) + xorq %r15, %r15 + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[2] + movq 16(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[3] + movq 24(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r14 + adcq %rdx, %r15 + # Reduce + movq $0x7fffffffffffffff, %rcx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rcx, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %r8 + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 16(%rsp), %rdi + movq 24(%rsp), %rsi + movq 32(%rsp), %rbx + # Multiply + # A[0] * B[0] + movq (%rbx), %rax + mulq (%rsi) + movq %rax, %r8 + movq %rdx, %r9 + # A[0] * B[1] + movq 8(%rbx), %rax + mulq (%rsi) + xorq %r10, %r10 + addq %rax, %r9 + adcq %rdx, %r10 + # A[1] * B[0] + movq (%rbx), %rax + mulq 8(%rsi) + xorq %r11, %r11 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0x00, %r11 + # A[0] * B[2] + movq 16(%rbx), %rax + mulq (%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + # A[1] * B[1] + movq 8(%rbx), %rax + mulq 8(%rsi) + xorq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[2] * B[0] + movq (%rbx), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[0] * B[3] + movq 24(%rbx), %rax + mulq (%rsi) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[2] + movq 16(%rbx), %rax + mulq 8(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[2] * B[1] + movq 8(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[3] * B[0] + movq (%rbx), %rax + mulq 24(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[3] + movq 24(%rbx), %rax + mulq 8(%rsi) + xorq %r14, %r14 + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[2] + movq 16(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[3] * B[1] + movq 8(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[3] + movq 24(%rbx), %rax + mulq 16(%rsi) + xorq %r15, %r15 + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[2] + movq 16(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[3] + movq 24(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r14 + adcq %rdx, %r15 + # Reduce + movq $0x7fffffffffffffff, %rcx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rcx, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %r8 + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + addq $40, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size fe_ge_to_p3_x64,.-fe_ge_to_p3_x64 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_ge_dbl_x64 +.type fe_ge_dbl_x64,@function +.align 4 +fe_ge_dbl_x64: +#else +.section __TEXT,__text +.globl _fe_ge_dbl_x64 +.p2align 2 +_fe_ge_dbl_x64: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $0x50, %rsp + movq %rdi, (%rsp) + movq %rsi, 8(%rsp) + movq %rdx, 16(%rsp) + movq %rcx, 24(%rsp) + movq %r8, 32(%rsp) + movq %r9, 40(%rsp) + movq (%rsp), %rdi + movq 32(%rsp), %rsi + # Square + # A[0] * A[1] + movq (%rsi), %rax + mulq 8(%rsi) + movq %rax, %r9 + movq %rdx, %r10 + # A[0] * A[2] + movq (%rsi), %rax + mulq 16(%rsi) + xorq %r11, %r11 + addq %rax, %r10 + adcq %rdx, %r11 + # A[0] * A[3] + movq (%rsi), %rax + mulq 24(%rsi) + xorq %r12, %r12 + addq %rax, %r11 + adcq %rdx, %r12 + # A[1] * A[2] + movq 8(%rsi), %rax + mulq 16(%rsi) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * A[3] + movq 8(%rsi), %rax + mulq 24(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + # A[2] * A[3] + movq 16(%rsi), %rax + mulq 24(%rsi) + xorq %r14, %r14 + addq %rax, %r13 + adcq %rdx, %r14 + # Double + xorq %r15, %r15 + addq %r9, %r9 + adcq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + adcq %r13, %r13 + adcq %r14, %r14 + adcq $0x00, %r15 + # A[0] * A[0] + movq (%rsi), %rax + mulq %rax + movq %rax, %r8 + movq %rdx, %rcx + # A[1] * A[1] + movq 8(%rsi), %rax + mulq %rax + addq %rcx, %r9 + adcq %rax, %r10 + adcq $0x00, %rdx + movq %rdx, %rcx + # A[2] * A[2] + movq 16(%rsi), %rax + mulq %rax + addq %rcx, %r11 + adcq %rax, %r12 + adcq $0x00, %rdx + movq %rdx, %rcx + # A[3] * A[3] + movq 24(%rsi), %rax + mulq %rax + addq %rax, %r14 + adcq %rdx, %r15 + addq %rcx, %r13 + adcq $0x00, %r14 + adcq $0x00, %r15 + # Reduce + movq $0x7fffffffffffffff, %rcx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rcx, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %r8 + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 16(%rsp), %rdi + movq 40(%rsp), %rsi + # Square + # A[0] * A[1] + movq (%rsi), %rax + mulq 8(%rsi) + movq %rax, %r9 + movq %rdx, %r10 + # A[0] * A[2] + movq (%rsi), %rax + mulq 16(%rsi) + xorq %r11, %r11 + addq %rax, %r10 + adcq %rdx, %r11 + # A[0] * A[3] + movq (%rsi), %rax + mulq 24(%rsi) + xorq %r12, %r12 + addq %rax, %r11 + adcq %rdx, %r12 + # A[1] * A[2] + movq 8(%rsi), %rax + mulq 16(%rsi) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * A[3] + movq 8(%rsi), %rax + mulq 24(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + # A[2] * A[3] + movq 16(%rsi), %rax + mulq 24(%rsi) + xorq %r14, %r14 + addq %rax, %r13 + adcq %rdx, %r14 + # Double + xorq %r15, %r15 + addq %r9, %r9 + adcq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + adcq %r13, %r13 + adcq %r14, %r14 + adcq $0x00, %r15 + # A[0] * A[0] + movq (%rsi), %rax + mulq %rax + movq %rax, %r8 + movq %rdx, %rcx + # A[1] * A[1] + movq 8(%rsi), %rax + mulq %rax + addq %rcx, %r9 + adcq %rax, %r10 + adcq $0x00, %rdx + movq %rdx, %rcx + # A[2] * A[2] + movq 16(%rsi), %rax + mulq %rax + addq %rcx, %r11 + adcq %rax, %r12 + adcq $0x00, %rdx + movq %rdx, %rcx + # A[3] * A[3] + movq 24(%rsi), %rax + mulq %rax + addq %rax, %r14 + adcq %rdx, %r15 + addq %rcx, %r13 + adcq $0x00, %r14 + adcq $0x00, %r15 + # Reduce + movq $0x7fffffffffffffff, %rcx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rcx, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %r8 + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 24(%rsp), %rdi + movq 128(%rsp), %rsi + # Square * 2 + # A[0] * A[1] + movq (%rsi), %rax + mulq 8(%rsi) + movq %rax, %r9 + movq %rdx, %r10 + # A[0] * A[2] + movq (%rsi), %rax + mulq 16(%rsi) + xorq %r11, %r11 + addq %rax, %r10 + adcq %rdx, %r11 + # A[0] * A[3] + movq (%rsi), %rax + mulq 24(%rsi) + xorq %r12, %r12 + addq %rax, %r11 + adcq %rdx, %r12 + # A[1] * A[2] + movq 8(%rsi), %rax + mulq 16(%rsi) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * A[3] + movq 8(%rsi), %rax + mulq 24(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + # A[2] * A[3] + movq 16(%rsi), %rax + mulq 24(%rsi) + xorq %r14, %r14 + addq %rax, %r13 + adcq %rdx, %r14 + # Double + xorq %r15, %r15 + addq %r9, %r9 + adcq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + adcq %r13, %r13 + adcq %r14, %r14 + adcq $0x00, %r15 + # A[0] * A[0] + movq (%rsi), %rax + mulq %rax + movq %rax, %r8 + movq %rdx, %rcx + # A[1] * A[1] + movq 8(%rsi), %rax + mulq %rax + addq %rcx, %r9 + adcq %rax, %r10 + adcq $0x00, %rdx + movq %rdx, %rcx + # A[2] * A[2] + movq 16(%rsi), %rax + mulq %rax + addq %rcx, %r11 + adcq %rax, %r12 + adcq $0x00, %rdx + movq %rdx, %rcx + # A[3] * A[3] + movq 24(%rsi), %rax + mulq %rax + addq %rax, %r14 + adcq %rdx, %r15 + addq %rcx, %r13 + adcq $0x00, %r14 + adcq $0x00, %r15 + # Reduce + movq $0x7fffffffffffffff, %rbx + xorq %rax, %rax + # Move top half into t4-t7 and remove top bit from t3 + shldq $3, %r15, %rax + shldq $2, %r14, %r15 + shldq $2, %r13, %r14 + shldq $2, %r12, %r13 + shldq $2, %r11, %r12 + shldq $0x01, %r10, %r11 + shldq $0x01, %r9, %r10 + shldq $0x01, %r8, %r9 + shlq $0x01, %r8 + andq %rbx, %r11 + # Two out left, one in right + andq %rbx, %r15 + # Multiply top bits by 19*19 + imulq $0x169, %rax, %rcx + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %r8 + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining produce results in + addq %rcx, %r8 + adcq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rbx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rbx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 8(%rsp), %rdi + movq 32(%rsp), %rsi + movq 40(%rsp), %rbx + # Add + movq (%rsi), %r8 + movq 8(%rsi), %r9 + addq (%rbx), %r8 + movq 16(%rsi), %r10 + adcq 8(%rbx), %r9 + movq 24(%rsi), %rcx + adcq 16(%rbx), %r10 + movq $-19, %rax + adcq 24(%rbx), %rcx + movq $0x7fffffffffffffff, %rdx + movq %rcx, %r11 + sarq $63, %rcx + # Mask the modulus + andq %rcx, %rax + andq %rcx, %rdx + # Sub modulus (if overflow) + subq %rax, %r8 + sbbq %rcx, %r9 + sbbq %rcx, %r10 + sbbq %rdx, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + leaq 48(%rsp), %rdi + movq 8(%rsp), %rsi + # Square + # A[0] * A[1] + movq (%rsi), %rax + mulq 8(%rsi) + movq %rax, %r9 + movq %rdx, %r10 + # A[0] * A[2] + movq (%rsi), %rax + mulq 16(%rsi) + xorq %r11, %r11 + addq %rax, %r10 + adcq %rdx, %r11 + # A[0] * A[3] + movq (%rsi), %rax + mulq 24(%rsi) + xorq %r12, %r12 + addq %rax, %r11 + adcq %rdx, %r12 + # A[1] * A[2] + movq 8(%rsi), %rax + mulq 16(%rsi) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * A[3] + movq 8(%rsi), %rax + mulq 24(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + # A[2] * A[3] + movq 16(%rsi), %rax + mulq 24(%rsi) + xorq %r14, %r14 + addq %rax, %r13 + adcq %rdx, %r14 + # Double + xorq %r15, %r15 + addq %r9, %r9 + adcq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + adcq %r13, %r13 + adcq %r14, %r14 + adcq $0x00, %r15 + # A[0] * A[0] + movq (%rsi), %rax + mulq %rax + movq %rax, %r8 + movq %rdx, %rcx + # A[1] * A[1] + movq 8(%rsi), %rax + mulq %rax + addq %rcx, %r9 + adcq %rax, %r10 + adcq $0x00, %rdx + movq %rdx, %rcx + # A[2] * A[2] + movq 16(%rsi), %rax + mulq %rax + addq %rcx, %r11 + adcq %rax, %r12 + adcq $0x00, %rdx + movq %rdx, %rcx + # A[3] * A[3] + movq 24(%rsi), %rax + mulq %rax + addq %rax, %r14 + adcq %rdx, %r15 + addq %rcx, %r13 + adcq $0x00, %r14 + adcq $0x00, %r15 + # Reduce + movq $0x7fffffffffffffff, %rcx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rcx, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %r8 + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 8(%rsp), %rdi + movq 16(%rsp), %rsi + movq (%rsp), %rbx + # Add + movq (%rsi), %r8 + movq 8(%rsi), %r9 + addq (%rbx), %r8 + movq 16(%rsi), %r10 + adcq 8(%rbx), %r9 + movq 24(%rsi), %rcx + adcq 16(%rbx), %r10 + movq $-19, %rax + adcq 24(%rbx), %rcx + movq $0x7fffffffffffffff, %rdx + movq %rcx, %r11 + sarq $63, %rcx + # Mask the modulus + andq %rcx, %rax + andq %rcx, %rdx + # Sub modulus (if overflow) + subq %rax, %r8 + sbbq %rcx, %r9 + sbbq %rcx, %r10 + sbbq %rdx, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 16(%rsp), %rdi + movq 16(%rsp), %rsi + movq (%rsp), %rbx + # Sub + movq (%rsi), %r8 + movq 8(%rsi), %r9 + movq 16(%rsi), %r10 + movq 24(%rsi), %r11 + subq (%rbx), %r8 + movq $0x00, %rcx + sbbq 8(%rbx), %r9 + movq $-19, %rax + sbbq 16(%rbx), %r10 + movq $0x7fffffffffffffff, %rdx + sbbq 24(%rbx), %r11 + sbbq $0x00, %rcx + # Mask the modulus + andq %rcx, %rax + andq %rcx, %rdx + # Add modulus (if underflow) + addq %rax, %r8 + adcq %rcx, %r9 + adcq %rcx, %r10 + adcq %rdx, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq (%rsp), %rdi + leaq 48(%rsp), %rsi + movq 8(%rsp), %rbx + # Sub + movq (%rsi), %r8 + movq 8(%rsi), %r9 + movq 16(%rsi), %r10 + movq 24(%rsi), %r11 + subq (%rbx), %r8 + movq $0x00, %rcx + sbbq 8(%rbx), %r9 + movq $-19, %rax + sbbq 16(%rbx), %r10 + movq $0x7fffffffffffffff, %rdx + sbbq 24(%rbx), %r11 + sbbq $0x00, %rcx + # Mask the modulus + andq %rcx, %rax + andq %rcx, %rdx + # Add modulus (if underflow) + addq %rax, %r8 + adcq %rcx, %r9 + adcq %rcx, %r10 + adcq %rdx, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 24(%rsp), %rdi + movq 24(%rsp), %rsi + movq 16(%rsp), %rbx + # Sub + movq (%rsi), %r8 + movq 8(%rsi), %r9 + movq 16(%rsi), %r10 + movq 24(%rsi), %r11 + subq (%rbx), %r8 + movq $0x00, %rcx + sbbq 8(%rbx), %r9 + movq $-19, %rax + sbbq 16(%rbx), %r10 + movq $0x7fffffffffffffff, %rdx + sbbq 24(%rbx), %r11 + sbbq $0x00, %rcx + # Mask the modulus + andq %rcx, %rax + andq %rcx, %rdx + # Add modulus (if underflow) + addq %rax, %r8 + adcq %rcx, %r9 + adcq %rcx, %r10 + adcq %rdx, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + addq $0x50, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size fe_ge_dbl_x64,.-fe_ge_dbl_x64 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_ge_madd_x64 +.type fe_ge_madd_x64,@function +.align 4 +fe_ge_madd_x64: +#else +.section __TEXT,__text +.globl _fe_ge_madd_x64 +.p2align 2 +_fe_ge_madd_x64: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $0x50, %rsp + movq %rdi, (%rsp) + movq %rsi, 8(%rsp) + movq %rdx, 16(%rsp) + movq %rcx, 24(%rsp) + movq %r8, 32(%rsp) + movq %r9, 40(%rsp) + movq (%rsp), %rdi + movq 40(%rsp), %rsi + movq 32(%rsp), %rbx + # Add + movq (%rsi), %r8 + movq 8(%rsi), %r9 + addq (%rbx), %r8 + movq 16(%rsi), %r10 + adcq 8(%rbx), %r9 + movq 24(%rsi), %rcx + adcq 16(%rbx), %r10 + movq $-19, %rax + adcq 24(%rbx), %rcx + movq $0x7fffffffffffffff, %rdx + movq %rcx, %r11 + sarq $63, %rcx + # Mask the modulus + andq %rcx, %rax + andq %rcx, %rdx + # Sub modulus (if overflow) + subq %rax, %r8 + sbbq %rcx, %r9 + sbbq %rcx, %r10 + sbbq %rdx, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 8(%rsp), %rdi + movq 40(%rsp), %rsi + movq 32(%rsp), %rbx + # Sub + movq (%rsi), %r8 + movq 8(%rsi), %r9 + movq 16(%rsi), %r10 + movq 24(%rsi), %r11 + subq (%rbx), %r8 + movq $0x00, %rcx + sbbq 8(%rbx), %r9 + movq $-19, %rax + sbbq 16(%rbx), %r10 + movq $0x7fffffffffffffff, %rdx + sbbq 24(%rbx), %r11 + sbbq $0x00, %rcx + # Mask the modulus + andq %rcx, %rax + andq %rcx, %rdx + # Add modulus (if underflow) + addq %rax, %r8 + adcq %rcx, %r9 + adcq %rcx, %r10 + adcq %rdx, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 16(%rsp), %rdi + movq (%rsp), %rsi + movq 152(%rsp), %rbx + # Multiply + # A[0] * B[0] + movq (%rbx), %rax + mulq (%rsi) + movq %rax, %r8 + movq %rdx, %r9 + # A[0] * B[1] + movq 8(%rbx), %rax + mulq (%rsi) + xorq %r10, %r10 + addq %rax, %r9 + adcq %rdx, %r10 + # A[1] * B[0] + movq (%rbx), %rax + mulq 8(%rsi) + xorq %r11, %r11 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0x00, %r11 + # A[0] * B[2] + movq 16(%rbx), %rax + mulq (%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + # A[1] * B[1] + movq 8(%rbx), %rax + mulq 8(%rsi) + xorq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[2] * B[0] + movq (%rbx), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[0] * B[3] + movq 24(%rbx), %rax + mulq (%rsi) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[2] + movq 16(%rbx), %rax + mulq 8(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[2] * B[1] + movq 8(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[3] * B[0] + movq (%rbx), %rax + mulq 24(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[3] + movq 24(%rbx), %rax + mulq 8(%rsi) + xorq %r14, %r14 + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[2] + movq 16(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[3] * B[1] + movq 8(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[3] + movq 24(%rbx), %rax + mulq 16(%rsi) + xorq %r15, %r15 + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[2] + movq 16(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[3] + movq 24(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r14 + adcq %rdx, %r15 + # Reduce + movq $0x7fffffffffffffff, %rcx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rcx, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %r8 + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 8(%rsp), %rdi + movq 8(%rsp), %rsi + movq 160(%rsp), %rbx + # Multiply + # A[0] * B[0] + movq (%rbx), %rax + mulq (%rsi) + movq %rax, %r8 + movq %rdx, %r9 + # A[0] * B[1] + movq 8(%rbx), %rax + mulq (%rsi) + xorq %r10, %r10 + addq %rax, %r9 + adcq %rdx, %r10 + # A[1] * B[0] + movq (%rbx), %rax + mulq 8(%rsi) + xorq %r11, %r11 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0x00, %r11 + # A[0] * B[2] + movq 16(%rbx), %rax + mulq (%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + # A[1] * B[1] + movq 8(%rbx), %rax + mulq 8(%rsi) + xorq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[2] * B[0] + movq (%rbx), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[0] * B[3] + movq 24(%rbx), %rax + mulq (%rsi) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[2] + movq 16(%rbx), %rax + mulq 8(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[2] * B[1] + movq 8(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[3] * B[0] + movq (%rbx), %rax + mulq 24(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[3] + movq 24(%rbx), %rax + mulq 8(%rsi) + xorq %r14, %r14 + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[2] + movq 16(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[3] * B[1] + movq 8(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[3] + movq 24(%rbx), %rax + mulq 16(%rsi) + xorq %r15, %r15 + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[2] + movq 16(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[3] + movq 24(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r14 + adcq %rdx, %r15 + # Reduce + movq $0x7fffffffffffffff, %rcx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rcx, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %r8 + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 24(%rsp), %rdi + movq 144(%rsp), %rsi + movq 136(%rsp), %rbx + # Multiply + # A[0] * B[0] + movq (%rbx), %rax + mulq (%rsi) + movq %rax, %r8 + movq %rdx, %r9 + # A[0] * B[1] + movq 8(%rbx), %rax + mulq (%rsi) + xorq %r10, %r10 + addq %rax, %r9 + adcq %rdx, %r10 + # A[1] * B[0] + movq (%rbx), %rax + mulq 8(%rsi) + xorq %r11, %r11 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0x00, %r11 + # A[0] * B[2] + movq 16(%rbx), %rax + mulq (%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + # A[1] * B[1] + movq 8(%rbx), %rax + mulq 8(%rsi) + xorq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[2] * B[0] + movq (%rbx), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[0] * B[3] + movq 24(%rbx), %rax + mulq (%rsi) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[2] + movq 16(%rbx), %rax + mulq 8(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[2] * B[1] + movq 8(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[3] * B[0] + movq (%rbx), %rax + mulq 24(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[3] + movq 24(%rbx), %rax + mulq 8(%rsi) + xorq %r14, %r14 + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[2] + movq 16(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[3] * B[1] + movq 8(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[3] + movq 24(%rbx), %rax + mulq 16(%rsi) + xorq %r15, %r15 + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[2] + movq 16(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[3] + movq 24(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r14 + adcq %rdx, %r15 + # Reduce + movq $0x7fffffffffffffff, %rcx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rcx, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %r8 + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + leaq 48(%rsp), %rdi + movq 128(%rsp), %rsi + movq 128(%rsp), %rbx + # Add + movq (%rsi), %r8 + movq 8(%rsi), %r9 + addq (%rbx), %r8 + movq 16(%rsi), %r10 + adcq 8(%rbx), %r9 + movq 24(%rsi), %rcx + adcq 16(%rbx), %r10 + movq $-19, %rax + adcq 24(%rbx), %rcx + movq $0x7fffffffffffffff, %rdx + movq %rcx, %r11 + sarq $63, %rcx + # Mask the modulus + andq %rcx, %rax + andq %rcx, %rdx + # Sub modulus (if overflow) + subq %rax, %r8 + sbbq %rcx, %r9 + sbbq %rcx, %r10 + sbbq %rdx, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq (%rsp), %rdi + movq 16(%rsp), %rsi + movq 8(%rsp), %rbx + # Sub + movq (%rsi), %r8 + movq 8(%rsi), %r9 + movq 16(%rsi), %r10 + movq 24(%rsi), %r11 + subq (%rbx), %r8 + movq $0x00, %rcx + sbbq 8(%rbx), %r9 + movq $-19, %rax + sbbq 16(%rbx), %r10 + movq $0x7fffffffffffffff, %rdx + sbbq 24(%rbx), %r11 + sbbq $0x00, %rcx + # Mask the modulus + andq %rcx, %rax + andq %rcx, %rdx + # Add modulus (if underflow) + addq %rax, %r8 + adcq %rcx, %r9 + adcq %rcx, %r10 + adcq %rdx, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 8(%rsp), %rdi + movq 16(%rsp), %rsi + movq 8(%rsp), %rbx + # Add + movq (%rsi), %r8 + movq 8(%rsi), %r9 + addq (%rbx), %r8 + movq 16(%rsi), %r10 + adcq 8(%rbx), %r9 + movq 24(%rsi), %rcx + adcq 16(%rbx), %r10 + movq $-19, %rax + adcq 24(%rbx), %rcx + movq $0x7fffffffffffffff, %rdx + movq %rcx, %r11 + sarq $63, %rcx + # Mask the modulus + andq %rcx, %rax + andq %rcx, %rdx + # Sub modulus (if overflow) + subq %rax, %r8 + sbbq %rcx, %r9 + sbbq %rcx, %r10 + sbbq %rdx, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 16(%rsp), %rdi + leaq 48(%rsp), %rsi + movq 24(%rsp), %rbx + # Add + movq (%rsi), %r8 + movq 8(%rsi), %r9 + addq (%rbx), %r8 + movq 16(%rsi), %r10 + adcq 8(%rbx), %r9 + movq 24(%rsi), %rcx + adcq 16(%rbx), %r10 + movq $-19, %rax + adcq 24(%rbx), %rcx + movq $0x7fffffffffffffff, %rdx + movq %rcx, %r11 + sarq $63, %rcx + # Mask the modulus + andq %rcx, %rax + andq %rcx, %rdx + # Sub modulus (if overflow) + subq %rax, %r8 + sbbq %rcx, %r9 + sbbq %rcx, %r10 + sbbq %rdx, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 24(%rsp), %rdi + leaq 48(%rsp), %rsi + movq 24(%rsp), %rbx + # Sub + movq (%rsi), %r8 + movq 8(%rsi), %r9 + movq 16(%rsi), %r10 + movq 24(%rsi), %r11 + subq (%rbx), %r8 + movq $0x00, %rcx + sbbq 8(%rbx), %r9 + movq $-19, %rax + sbbq 16(%rbx), %r10 + movq $0x7fffffffffffffff, %rdx + sbbq 24(%rbx), %r11 + sbbq $0x00, %rcx + # Mask the modulus + andq %rcx, %rax + andq %rcx, %rdx + # Add modulus (if underflow) + addq %rax, %r8 + adcq %rcx, %r9 + adcq %rcx, %r10 + adcq %rdx, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + addq $0x50, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size fe_ge_madd_x64,.-fe_ge_madd_x64 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_ge_msub_x64 +.type fe_ge_msub_x64,@function +.align 4 +fe_ge_msub_x64: +#else +.section __TEXT,__text +.globl _fe_ge_msub_x64 +.p2align 2 +_fe_ge_msub_x64: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $0x50, %rsp + movq %rdi, (%rsp) + movq %rsi, 8(%rsp) + movq %rdx, 16(%rsp) + movq %rcx, 24(%rsp) + movq %r8, 32(%rsp) + movq %r9, 40(%rsp) + movq (%rsp), %rdi + movq 40(%rsp), %rsi + movq 32(%rsp), %rbx + # Add + movq (%rsi), %r8 + movq 8(%rsi), %r9 + addq (%rbx), %r8 + movq 16(%rsi), %r10 + adcq 8(%rbx), %r9 + movq 24(%rsi), %rcx + adcq 16(%rbx), %r10 + movq $-19, %rax + adcq 24(%rbx), %rcx + movq $0x7fffffffffffffff, %rdx + movq %rcx, %r11 + sarq $63, %rcx + # Mask the modulus + andq %rcx, %rax + andq %rcx, %rdx + # Sub modulus (if overflow) + subq %rax, %r8 + sbbq %rcx, %r9 + sbbq %rcx, %r10 + sbbq %rdx, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 8(%rsp), %rdi + movq 40(%rsp), %rsi + movq 32(%rsp), %rbx + # Sub + movq (%rsi), %r8 + movq 8(%rsi), %r9 + movq 16(%rsi), %r10 + movq 24(%rsi), %r11 + subq (%rbx), %r8 + movq $0x00, %rcx + sbbq 8(%rbx), %r9 + movq $-19, %rax + sbbq 16(%rbx), %r10 + movq $0x7fffffffffffffff, %rdx + sbbq 24(%rbx), %r11 + sbbq $0x00, %rcx + # Mask the modulus + andq %rcx, %rax + andq %rcx, %rdx + # Add modulus (if underflow) + addq %rax, %r8 + adcq %rcx, %r9 + adcq %rcx, %r10 + adcq %rdx, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 16(%rsp), %rdi + movq (%rsp), %rsi + movq 160(%rsp), %rbx + # Multiply + # A[0] * B[0] + movq (%rbx), %rax + mulq (%rsi) + movq %rax, %r8 + movq %rdx, %r9 + # A[0] * B[1] + movq 8(%rbx), %rax + mulq (%rsi) + xorq %r10, %r10 + addq %rax, %r9 + adcq %rdx, %r10 + # A[1] * B[0] + movq (%rbx), %rax + mulq 8(%rsi) + xorq %r11, %r11 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0x00, %r11 + # A[0] * B[2] + movq 16(%rbx), %rax + mulq (%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + # A[1] * B[1] + movq 8(%rbx), %rax + mulq 8(%rsi) + xorq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[2] * B[0] + movq (%rbx), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[0] * B[3] + movq 24(%rbx), %rax + mulq (%rsi) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[2] + movq 16(%rbx), %rax + mulq 8(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[2] * B[1] + movq 8(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[3] * B[0] + movq (%rbx), %rax + mulq 24(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[3] + movq 24(%rbx), %rax + mulq 8(%rsi) + xorq %r14, %r14 + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[2] + movq 16(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[3] * B[1] + movq 8(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[3] + movq 24(%rbx), %rax + mulq 16(%rsi) + xorq %r15, %r15 + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[2] + movq 16(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[3] + movq 24(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r14 + adcq %rdx, %r15 + # Reduce + movq $0x7fffffffffffffff, %rcx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rcx, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %r8 + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 8(%rsp), %rdi + movq 8(%rsp), %rsi + movq 152(%rsp), %rbx + # Multiply + # A[0] * B[0] + movq (%rbx), %rax + mulq (%rsi) + movq %rax, %r8 + movq %rdx, %r9 + # A[0] * B[1] + movq 8(%rbx), %rax + mulq (%rsi) + xorq %r10, %r10 + addq %rax, %r9 + adcq %rdx, %r10 + # A[1] * B[0] + movq (%rbx), %rax + mulq 8(%rsi) + xorq %r11, %r11 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0x00, %r11 + # A[0] * B[2] + movq 16(%rbx), %rax + mulq (%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + # A[1] * B[1] + movq 8(%rbx), %rax + mulq 8(%rsi) + xorq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[2] * B[0] + movq (%rbx), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[0] * B[3] + movq 24(%rbx), %rax + mulq (%rsi) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[2] + movq 16(%rbx), %rax + mulq 8(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[2] * B[1] + movq 8(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[3] * B[0] + movq (%rbx), %rax + mulq 24(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[3] + movq 24(%rbx), %rax + mulq 8(%rsi) + xorq %r14, %r14 + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[2] + movq 16(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[3] * B[1] + movq 8(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[3] + movq 24(%rbx), %rax + mulq 16(%rsi) + xorq %r15, %r15 + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[2] + movq 16(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[3] + movq 24(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r14 + adcq %rdx, %r15 + # Reduce + movq $0x7fffffffffffffff, %rcx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rcx, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %r8 + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 24(%rsp), %rdi + movq 144(%rsp), %rsi + movq 136(%rsp), %rbx + # Multiply + # A[0] * B[0] + movq (%rbx), %rax + mulq (%rsi) + movq %rax, %r8 + movq %rdx, %r9 + # A[0] * B[1] + movq 8(%rbx), %rax + mulq (%rsi) + xorq %r10, %r10 + addq %rax, %r9 + adcq %rdx, %r10 + # A[1] * B[0] + movq (%rbx), %rax + mulq 8(%rsi) + xorq %r11, %r11 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0x00, %r11 + # A[0] * B[2] + movq 16(%rbx), %rax + mulq (%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + # A[1] * B[1] + movq 8(%rbx), %rax + mulq 8(%rsi) + xorq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[2] * B[0] + movq (%rbx), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[0] * B[3] + movq 24(%rbx), %rax + mulq (%rsi) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[2] + movq 16(%rbx), %rax + mulq 8(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[2] * B[1] + movq 8(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[3] * B[0] + movq (%rbx), %rax + mulq 24(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[3] + movq 24(%rbx), %rax + mulq 8(%rsi) + xorq %r14, %r14 + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[2] + movq 16(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[3] * B[1] + movq 8(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[3] + movq 24(%rbx), %rax + mulq 16(%rsi) + xorq %r15, %r15 + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[2] + movq 16(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[3] + movq 24(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r14 + adcq %rdx, %r15 + # Reduce + movq $0x7fffffffffffffff, %rcx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rcx, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %r8 + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + leaq 48(%rsp), %rdi + movq 128(%rsp), %rsi + movq 128(%rsp), %rbx + # Add + movq (%rsi), %r8 + movq 8(%rsi), %r9 + addq (%rbx), %r8 + movq 16(%rsi), %r10 + adcq 8(%rbx), %r9 + movq 24(%rsi), %rcx + adcq 16(%rbx), %r10 + movq $-19, %rax + adcq 24(%rbx), %rcx + movq $0x7fffffffffffffff, %rdx + movq %rcx, %r11 + sarq $63, %rcx + # Mask the modulus + andq %rcx, %rax + andq %rcx, %rdx + # Sub modulus (if overflow) + subq %rax, %r8 + sbbq %rcx, %r9 + sbbq %rcx, %r10 + sbbq %rdx, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq (%rsp), %rdi + movq 16(%rsp), %rsi + movq 8(%rsp), %rbx + # Sub + movq (%rsi), %r8 + movq 8(%rsi), %r9 + movq 16(%rsi), %r10 + movq 24(%rsi), %r11 + subq (%rbx), %r8 + movq $0x00, %rcx + sbbq 8(%rbx), %r9 + movq $-19, %rax + sbbq 16(%rbx), %r10 + movq $0x7fffffffffffffff, %rdx + sbbq 24(%rbx), %r11 + sbbq $0x00, %rcx + # Mask the modulus + andq %rcx, %rax + andq %rcx, %rdx + # Add modulus (if underflow) + addq %rax, %r8 + adcq %rcx, %r9 + adcq %rcx, %r10 + adcq %rdx, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 8(%rsp), %rdi + movq 16(%rsp), %rsi + movq 8(%rsp), %rbx + # Add + movq (%rsi), %r8 + movq 8(%rsi), %r9 + addq (%rbx), %r8 + movq 16(%rsi), %r10 + adcq 8(%rbx), %r9 + movq 24(%rsi), %rcx + adcq 16(%rbx), %r10 + movq $-19, %rax + adcq 24(%rbx), %rcx + movq $0x7fffffffffffffff, %rdx + movq %rcx, %r11 + sarq $63, %rcx + # Mask the modulus + andq %rcx, %rax + andq %rcx, %rdx + # Sub modulus (if overflow) + subq %rax, %r8 + sbbq %rcx, %r9 + sbbq %rcx, %r10 + sbbq %rdx, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 16(%rsp), %rdi + leaq 48(%rsp), %rsi + movq 24(%rsp), %rbx + # Sub + movq (%rsi), %r8 + movq 8(%rsi), %r9 + movq 16(%rsi), %r10 + movq 24(%rsi), %r11 + subq (%rbx), %r8 + movq $0x00, %rcx + sbbq 8(%rbx), %r9 + movq $-19, %rax + sbbq 16(%rbx), %r10 + movq $0x7fffffffffffffff, %rdx + sbbq 24(%rbx), %r11 + sbbq $0x00, %rcx + # Mask the modulus + andq %rcx, %rax + andq %rcx, %rdx + # Add modulus (if underflow) + addq %rax, %r8 + adcq %rcx, %r9 + adcq %rcx, %r10 + adcq %rdx, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 24(%rsp), %rdi + leaq 48(%rsp), %rsi + movq 24(%rsp), %rbx + # Add + movq (%rsi), %r8 + movq 8(%rsi), %r9 + addq (%rbx), %r8 + movq 16(%rsi), %r10 + adcq 8(%rbx), %r9 + movq 24(%rsi), %rcx + adcq 16(%rbx), %r10 + movq $-19, %rax + adcq 24(%rbx), %rcx + movq $0x7fffffffffffffff, %rdx + movq %rcx, %r11 + sarq $63, %rcx + # Mask the modulus + andq %rcx, %rax + andq %rcx, %rdx + # Sub modulus (if overflow) + subq %rax, %r8 + sbbq %rcx, %r9 + sbbq %rcx, %r10 + sbbq %rdx, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + addq $0x50, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size fe_ge_msub_x64,.-fe_ge_msub_x64 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_ge_add_x64 +.type fe_ge_add_x64,@function +.align 4 +fe_ge_add_x64: +#else +.section __TEXT,__text +.globl _fe_ge_add_x64 +.p2align 2 +_fe_ge_add_x64: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $0x50, %rsp + movq %rdi, (%rsp) + movq %rsi, 8(%rsp) + movq %rdx, 16(%rsp) + movq %rcx, 24(%rsp) + movq %r8, 32(%rsp) + movq %r9, 40(%rsp) + movq (%rsp), %rdi + movq 40(%rsp), %rsi + movq 32(%rsp), %rbx + # Add + movq (%rsi), %r8 + movq 8(%rsi), %r9 + addq (%rbx), %r8 + movq 16(%rsi), %r10 + adcq 8(%rbx), %r9 + movq 24(%rsi), %rcx + adcq 16(%rbx), %r10 + movq $-19, %rax + adcq 24(%rbx), %rcx + movq $0x7fffffffffffffff, %rdx + movq %rcx, %r11 + sarq $63, %rcx + # Mask the modulus + andq %rcx, %rax + andq %rcx, %rdx + # Sub modulus (if overflow) + subq %rax, %r8 + sbbq %rcx, %r9 + sbbq %rcx, %r10 + sbbq %rdx, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 8(%rsp), %rdi + movq 40(%rsp), %rsi + movq 32(%rsp), %rbx + # Sub + movq (%rsi), %r8 + movq 8(%rsi), %r9 + movq 16(%rsi), %r10 + movq 24(%rsi), %r11 + subq (%rbx), %r8 + movq $0x00, %rcx + sbbq 8(%rbx), %r9 + movq $-19, %rax + sbbq 16(%rbx), %r10 + movq $0x7fffffffffffffff, %rdx + sbbq 24(%rbx), %r11 + sbbq $0x00, %rcx + # Mask the modulus + andq %rcx, %rax + andq %rcx, %rdx + # Add modulus (if underflow) + addq %rax, %r8 + adcq %rcx, %r9 + adcq %rcx, %r10 + adcq %rdx, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 16(%rsp), %rdi + movq (%rsp), %rsi + movq 160(%rsp), %rbx + # Multiply + # A[0] * B[0] + movq (%rbx), %rax + mulq (%rsi) + movq %rax, %r8 + movq %rdx, %r9 + # A[0] * B[1] + movq 8(%rbx), %rax + mulq (%rsi) + xorq %r10, %r10 + addq %rax, %r9 + adcq %rdx, %r10 + # A[1] * B[0] + movq (%rbx), %rax + mulq 8(%rsi) + xorq %r11, %r11 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0x00, %r11 + # A[0] * B[2] + movq 16(%rbx), %rax + mulq (%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + # A[1] * B[1] + movq 8(%rbx), %rax + mulq 8(%rsi) + xorq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[2] * B[0] + movq (%rbx), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[0] * B[3] + movq 24(%rbx), %rax + mulq (%rsi) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[2] + movq 16(%rbx), %rax + mulq 8(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[2] * B[1] + movq 8(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[3] * B[0] + movq (%rbx), %rax + mulq 24(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[3] + movq 24(%rbx), %rax + mulq 8(%rsi) + xorq %r14, %r14 + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[2] + movq 16(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[3] * B[1] + movq 8(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[3] + movq 24(%rbx), %rax + mulq 16(%rsi) + xorq %r15, %r15 + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[2] + movq 16(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[3] + movq 24(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r14 + adcq %rdx, %r15 + # Reduce + movq $0x7fffffffffffffff, %rcx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rcx, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %r8 + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 8(%rsp), %rdi + movq 8(%rsp), %rsi + movq 168(%rsp), %rbx + # Multiply + # A[0] * B[0] + movq (%rbx), %rax + mulq (%rsi) + movq %rax, %r8 + movq %rdx, %r9 + # A[0] * B[1] + movq 8(%rbx), %rax + mulq (%rsi) + xorq %r10, %r10 + addq %rax, %r9 + adcq %rdx, %r10 + # A[1] * B[0] + movq (%rbx), %rax + mulq 8(%rsi) + xorq %r11, %r11 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0x00, %r11 + # A[0] * B[2] + movq 16(%rbx), %rax + mulq (%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + # A[1] * B[1] + movq 8(%rbx), %rax + mulq 8(%rsi) + xorq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[2] * B[0] + movq (%rbx), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[0] * B[3] + movq 24(%rbx), %rax + mulq (%rsi) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[2] + movq 16(%rbx), %rax + mulq 8(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[2] * B[1] + movq 8(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[3] * B[0] + movq (%rbx), %rax + mulq 24(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[3] + movq 24(%rbx), %rax + mulq 8(%rsi) + xorq %r14, %r14 + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[2] + movq 16(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[3] * B[1] + movq 8(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[3] + movq 24(%rbx), %rax + mulq 16(%rsi) + xorq %r15, %r15 + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[2] + movq 16(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[3] + movq 24(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r14 + adcq %rdx, %r15 + # Reduce + movq $0x7fffffffffffffff, %rcx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rcx, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %r8 + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 24(%rsp), %rdi + movq 152(%rsp), %rsi + movq 136(%rsp), %rbx + # Multiply + # A[0] * B[0] + movq (%rbx), %rax + mulq (%rsi) + movq %rax, %r8 + movq %rdx, %r9 + # A[0] * B[1] + movq 8(%rbx), %rax + mulq (%rsi) + xorq %r10, %r10 + addq %rax, %r9 + adcq %rdx, %r10 + # A[1] * B[0] + movq (%rbx), %rax + mulq 8(%rsi) + xorq %r11, %r11 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0x00, %r11 + # A[0] * B[2] + movq 16(%rbx), %rax + mulq (%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + # A[1] * B[1] + movq 8(%rbx), %rax + mulq 8(%rsi) + xorq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[2] * B[0] + movq (%rbx), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[0] * B[3] + movq 24(%rbx), %rax + mulq (%rsi) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[2] + movq 16(%rbx), %rax + mulq 8(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[2] * B[1] + movq 8(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[3] * B[0] + movq (%rbx), %rax + mulq 24(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[3] + movq 24(%rbx), %rax + mulq 8(%rsi) + xorq %r14, %r14 + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[2] + movq 16(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[3] * B[1] + movq 8(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[3] + movq 24(%rbx), %rax + mulq 16(%rsi) + xorq %r15, %r15 + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[2] + movq 16(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[3] + movq 24(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r14 + adcq %rdx, %r15 + # Reduce + movq $0x7fffffffffffffff, %rcx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rcx, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %r8 + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq (%rsp), %rdi + movq 128(%rsp), %rsi + movq 144(%rsp), %rbx + # Multiply + # A[0] * B[0] + movq (%rbx), %rax + mulq (%rsi) + movq %rax, %r8 + movq %rdx, %r9 + # A[0] * B[1] + movq 8(%rbx), %rax + mulq (%rsi) + xorq %r10, %r10 + addq %rax, %r9 + adcq %rdx, %r10 + # A[1] * B[0] + movq (%rbx), %rax + mulq 8(%rsi) + xorq %r11, %r11 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0x00, %r11 + # A[0] * B[2] + movq 16(%rbx), %rax + mulq (%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + # A[1] * B[1] + movq 8(%rbx), %rax + mulq 8(%rsi) + xorq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[2] * B[0] + movq (%rbx), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[0] * B[3] + movq 24(%rbx), %rax + mulq (%rsi) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[2] + movq 16(%rbx), %rax + mulq 8(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[2] * B[1] + movq 8(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[3] * B[0] + movq (%rbx), %rax + mulq 24(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[3] + movq 24(%rbx), %rax + mulq 8(%rsi) + xorq %r14, %r14 + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[2] + movq 16(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[3] * B[1] + movq 8(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[3] + movq 24(%rbx), %rax + mulq 16(%rsi) + xorq %r15, %r15 + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[2] + movq 16(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[3] + movq 24(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r14 + adcq %rdx, %r15 + # Reduce + movq $0x7fffffffffffffff, %rcx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rcx, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %r8 + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + leaq 48(%rsp), %rdi + movq (%rsp), %rsi + movq (%rsp), %rbx + # Add + movq (%rsi), %r8 + movq 8(%rsi), %r9 + addq (%rbx), %r8 + movq 16(%rsi), %r10 + adcq 8(%rbx), %r9 + movq 24(%rsi), %rcx + adcq 16(%rbx), %r10 + movq $-19, %rax + adcq 24(%rbx), %rcx + movq $0x7fffffffffffffff, %rdx + movq %rcx, %r11 + sarq $63, %rcx + # Mask the modulus + andq %rcx, %rax + andq %rcx, %rdx + # Sub modulus (if overflow) + subq %rax, %r8 + sbbq %rcx, %r9 + sbbq %rcx, %r10 + sbbq %rdx, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq (%rsp), %rdi + movq 16(%rsp), %rsi + movq 8(%rsp), %rbx + # Sub + movq (%rsi), %r8 + movq 8(%rsi), %r9 + movq 16(%rsi), %r10 + movq 24(%rsi), %r11 + subq (%rbx), %r8 + movq $0x00, %rcx + sbbq 8(%rbx), %r9 + movq $-19, %rax + sbbq 16(%rbx), %r10 + movq $0x7fffffffffffffff, %rdx + sbbq 24(%rbx), %r11 + sbbq $0x00, %rcx + # Mask the modulus + andq %rcx, %rax + andq %rcx, %rdx + # Add modulus (if underflow) + addq %rax, %r8 + adcq %rcx, %r9 + adcq %rcx, %r10 + adcq %rdx, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 8(%rsp), %rdi + movq 16(%rsp), %rsi + movq 8(%rsp), %rbx + # Add + movq (%rsi), %r8 + movq 8(%rsi), %r9 + addq (%rbx), %r8 + movq 16(%rsi), %r10 + adcq 8(%rbx), %r9 + movq 24(%rsi), %rcx + adcq 16(%rbx), %r10 + movq $-19, %rax + adcq 24(%rbx), %rcx + movq $0x7fffffffffffffff, %rdx + movq %rcx, %r11 + sarq $63, %rcx + # Mask the modulus + andq %rcx, %rax + andq %rcx, %rdx + # Sub modulus (if overflow) + subq %rax, %r8 + sbbq %rcx, %r9 + sbbq %rcx, %r10 + sbbq %rdx, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 16(%rsp), %rdi + leaq 48(%rsp), %rsi + movq 24(%rsp), %rbx + # Add + movq (%rsi), %r8 + movq 8(%rsi), %r9 + addq (%rbx), %r8 + movq 16(%rsi), %r10 + adcq 8(%rbx), %r9 + movq 24(%rsi), %rcx + adcq 16(%rbx), %r10 + movq $-19, %rax + adcq 24(%rbx), %rcx + movq $0x7fffffffffffffff, %rdx + movq %rcx, %r11 + sarq $63, %rcx + # Mask the modulus + andq %rcx, %rax + andq %rcx, %rdx + # Sub modulus (if overflow) + subq %rax, %r8 + sbbq %rcx, %r9 + sbbq %rcx, %r10 + sbbq %rdx, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 24(%rsp), %rdi + leaq 48(%rsp), %rsi + movq 24(%rsp), %rbx + # Sub + movq (%rsi), %r8 + movq 8(%rsi), %r9 + movq 16(%rsi), %r10 + movq 24(%rsi), %r11 + subq (%rbx), %r8 + movq $0x00, %rcx + sbbq 8(%rbx), %r9 + movq $-19, %rax + sbbq 16(%rbx), %r10 + movq $0x7fffffffffffffff, %rdx + sbbq 24(%rbx), %r11 + sbbq $0x00, %rcx + # Mask the modulus + andq %rcx, %rax + andq %rcx, %rdx + # Add modulus (if underflow) + addq %rax, %r8 + adcq %rcx, %r9 + adcq %rcx, %r10 + adcq %rdx, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + addq $0x50, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size fe_ge_add_x64,.-fe_ge_add_x64 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_ge_sub_x64 +.type fe_ge_sub_x64,@function +.align 4 +fe_ge_sub_x64: +#else +.section __TEXT,__text +.globl _fe_ge_sub_x64 +.p2align 2 +_fe_ge_sub_x64: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $0x50, %rsp + movq %rdi, (%rsp) + movq %rsi, 8(%rsp) + movq %rdx, 16(%rsp) + movq %rcx, 24(%rsp) + movq %r8, 32(%rsp) + movq %r9, 40(%rsp) + movq (%rsp), %rdi + movq 40(%rsp), %rsi + movq 32(%rsp), %rbx + # Add + movq (%rsi), %r8 + movq 8(%rsi), %r9 + addq (%rbx), %r8 + movq 16(%rsi), %r10 + adcq 8(%rbx), %r9 + movq 24(%rsi), %rcx + adcq 16(%rbx), %r10 + movq $-19, %rax + adcq 24(%rbx), %rcx + movq $0x7fffffffffffffff, %rdx + movq %rcx, %r11 + sarq $63, %rcx + # Mask the modulus + andq %rcx, %rax + andq %rcx, %rdx + # Sub modulus (if overflow) + subq %rax, %r8 + sbbq %rcx, %r9 + sbbq %rcx, %r10 + sbbq %rdx, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 8(%rsp), %rdi + movq 40(%rsp), %rsi + movq 32(%rsp), %rbx + # Sub + movq (%rsi), %r8 + movq 8(%rsi), %r9 + movq 16(%rsi), %r10 + movq 24(%rsi), %r11 + subq (%rbx), %r8 + movq $0x00, %rcx + sbbq 8(%rbx), %r9 + movq $-19, %rax + sbbq 16(%rbx), %r10 + movq $0x7fffffffffffffff, %rdx + sbbq 24(%rbx), %r11 + sbbq $0x00, %rcx + # Mask the modulus + andq %rcx, %rax + andq %rcx, %rdx + # Add modulus (if underflow) + addq %rax, %r8 + adcq %rcx, %r9 + adcq %rcx, %r10 + adcq %rdx, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 16(%rsp), %rdi + movq (%rsp), %rsi + movq 168(%rsp), %rbx + # Multiply + # A[0] * B[0] + movq (%rbx), %rax + mulq (%rsi) + movq %rax, %r8 + movq %rdx, %r9 + # A[0] * B[1] + movq 8(%rbx), %rax + mulq (%rsi) + xorq %r10, %r10 + addq %rax, %r9 + adcq %rdx, %r10 + # A[1] * B[0] + movq (%rbx), %rax + mulq 8(%rsi) + xorq %r11, %r11 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0x00, %r11 + # A[0] * B[2] + movq 16(%rbx), %rax + mulq (%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + # A[1] * B[1] + movq 8(%rbx), %rax + mulq 8(%rsi) + xorq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[2] * B[0] + movq (%rbx), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[0] * B[3] + movq 24(%rbx), %rax + mulq (%rsi) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[2] + movq 16(%rbx), %rax + mulq 8(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[2] * B[1] + movq 8(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[3] * B[0] + movq (%rbx), %rax + mulq 24(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[3] + movq 24(%rbx), %rax + mulq 8(%rsi) + xorq %r14, %r14 + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[2] + movq 16(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[3] * B[1] + movq 8(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[3] + movq 24(%rbx), %rax + mulq 16(%rsi) + xorq %r15, %r15 + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[2] + movq 16(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[3] + movq 24(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r14 + adcq %rdx, %r15 + # Reduce + movq $0x7fffffffffffffff, %rcx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rcx, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %r8 + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 8(%rsp), %rdi + movq 8(%rsp), %rsi + movq 160(%rsp), %rbx + # Multiply + # A[0] * B[0] + movq (%rbx), %rax + mulq (%rsi) + movq %rax, %r8 + movq %rdx, %r9 + # A[0] * B[1] + movq 8(%rbx), %rax + mulq (%rsi) + xorq %r10, %r10 + addq %rax, %r9 + adcq %rdx, %r10 + # A[1] * B[0] + movq (%rbx), %rax + mulq 8(%rsi) + xorq %r11, %r11 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0x00, %r11 + # A[0] * B[2] + movq 16(%rbx), %rax + mulq (%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + # A[1] * B[1] + movq 8(%rbx), %rax + mulq 8(%rsi) + xorq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[2] * B[0] + movq (%rbx), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[0] * B[3] + movq 24(%rbx), %rax + mulq (%rsi) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[2] + movq 16(%rbx), %rax + mulq 8(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[2] * B[1] + movq 8(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[3] * B[0] + movq (%rbx), %rax + mulq 24(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[3] + movq 24(%rbx), %rax + mulq 8(%rsi) + xorq %r14, %r14 + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[2] + movq 16(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[3] * B[1] + movq 8(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[3] + movq 24(%rbx), %rax + mulq 16(%rsi) + xorq %r15, %r15 + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[2] + movq 16(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[3] + movq 24(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r14 + adcq %rdx, %r15 + # Reduce + movq $0x7fffffffffffffff, %rcx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rcx, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %r8 + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 24(%rsp), %rdi + movq 152(%rsp), %rsi + movq 136(%rsp), %rbx + # Multiply + # A[0] * B[0] + movq (%rbx), %rax + mulq (%rsi) + movq %rax, %r8 + movq %rdx, %r9 + # A[0] * B[1] + movq 8(%rbx), %rax + mulq (%rsi) + xorq %r10, %r10 + addq %rax, %r9 + adcq %rdx, %r10 + # A[1] * B[0] + movq (%rbx), %rax + mulq 8(%rsi) + xorq %r11, %r11 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0x00, %r11 + # A[0] * B[2] + movq 16(%rbx), %rax + mulq (%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + # A[1] * B[1] + movq 8(%rbx), %rax + mulq 8(%rsi) + xorq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[2] * B[0] + movq (%rbx), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[0] * B[3] + movq 24(%rbx), %rax + mulq (%rsi) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[2] + movq 16(%rbx), %rax + mulq 8(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[2] * B[1] + movq 8(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[3] * B[0] + movq (%rbx), %rax + mulq 24(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[3] + movq 24(%rbx), %rax + mulq 8(%rsi) + xorq %r14, %r14 + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[2] + movq 16(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[3] * B[1] + movq 8(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[3] + movq 24(%rbx), %rax + mulq 16(%rsi) + xorq %r15, %r15 + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[2] + movq 16(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[3] + movq 24(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r14 + adcq %rdx, %r15 + # Reduce + movq $0x7fffffffffffffff, %rcx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rcx, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %r8 + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq (%rsp), %rdi + movq 128(%rsp), %rsi + movq 144(%rsp), %rbx + # Multiply + # A[0] * B[0] + movq (%rbx), %rax + mulq (%rsi) + movq %rax, %r8 + movq %rdx, %r9 + # A[0] * B[1] + movq 8(%rbx), %rax + mulq (%rsi) + xorq %r10, %r10 + addq %rax, %r9 + adcq %rdx, %r10 + # A[1] * B[0] + movq (%rbx), %rax + mulq 8(%rsi) + xorq %r11, %r11 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0x00, %r11 + # A[0] * B[2] + movq 16(%rbx), %rax + mulq (%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + # A[1] * B[1] + movq 8(%rbx), %rax + mulq 8(%rsi) + xorq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[2] * B[0] + movq (%rbx), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + # A[0] * B[3] + movq 24(%rbx), %rax + mulq (%rsi) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[2] + movq 16(%rbx), %rax + mulq 8(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[2] * B[1] + movq 8(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[3] * B[0] + movq (%rbx), %rax + mulq 24(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + # A[1] * B[3] + movq 24(%rbx), %rax + mulq 8(%rsi) + xorq %r14, %r14 + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[2] + movq 16(%rbx), %rax + mulq 16(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[3] * B[1] + movq 8(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x00, %r14 + # A[2] * B[3] + movq 24(%rbx), %rax + mulq 16(%rsi) + xorq %r15, %r15 + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[2] + movq 16(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x00, %r15 + # A[3] * B[3] + movq 24(%rbx), %rax + mulq 24(%rsi) + addq %rax, %r14 + adcq %rdx, %r15 + # Reduce + movq $0x7fffffffffffffff, %rcx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rcx, %r11 + # Multiply top half by 19 + movq $19, %rax + mulq %r12 + xorq %r12, %r12 + addq %rax, %r8 + movq $19, %rax + adcq %rdx, %r12 + mulq %r13 + xorq %r13, %r13 + addq %rax, %r9 + movq $19, %rax + adcq %rdx, %r13 + mulq %r14 + xorq %r14, %r14 + addq %rax, %r10 + movq $19, %rax + adcq %rdx, %r14 + mulq %r15 + # Add remaining product results in + addq %r12, %r9 + adcq %r13, %r10 + adcq %r14, %r11 + adcq %rax, %r11 + adcq $0x00, %rdx + # Overflow + shldq $0x01, %r11, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + leaq 48(%rsp), %rdi + movq (%rsp), %rsi + movq (%rsp), %rbx + # Add + movq (%rsi), %r8 + movq 8(%rsi), %r9 + addq (%rbx), %r8 + movq 16(%rsi), %r10 + adcq 8(%rbx), %r9 + movq 24(%rsi), %rcx + adcq 16(%rbx), %r10 + movq $-19, %rax + adcq 24(%rbx), %rcx + movq $0x7fffffffffffffff, %rdx + movq %rcx, %r11 + sarq $63, %rcx + # Mask the modulus + andq %rcx, %rax + andq %rcx, %rdx + # Sub modulus (if overflow) + subq %rax, %r8 + sbbq %rcx, %r9 + sbbq %rcx, %r10 + sbbq %rdx, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq (%rsp), %rdi + movq 16(%rsp), %rsi + movq 8(%rsp), %rbx + # Sub + movq (%rsi), %r8 + movq 8(%rsi), %r9 + movq 16(%rsi), %r10 + movq 24(%rsi), %r11 + subq (%rbx), %r8 + movq $0x00, %rcx + sbbq 8(%rbx), %r9 + movq $-19, %rax + sbbq 16(%rbx), %r10 + movq $0x7fffffffffffffff, %rdx + sbbq 24(%rbx), %r11 + sbbq $0x00, %rcx + # Mask the modulus + andq %rcx, %rax + andq %rcx, %rdx + # Add modulus (if underflow) + addq %rax, %r8 + adcq %rcx, %r9 + adcq %rcx, %r10 + adcq %rdx, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 8(%rsp), %rdi + movq 16(%rsp), %rsi + movq 8(%rsp), %rbx + # Add + movq (%rsi), %r8 + movq 8(%rsi), %r9 + addq (%rbx), %r8 + movq 16(%rsi), %r10 + adcq 8(%rbx), %r9 + movq 24(%rsi), %rcx + adcq 16(%rbx), %r10 + movq $-19, %rax + adcq 24(%rbx), %rcx + movq $0x7fffffffffffffff, %rdx + movq %rcx, %r11 + sarq $63, %rcx + # Mask the modulus + andq %rcx, %rax + andq %rcx, %rdx + # Sub modulus (if overflow) + subq %rax, %r8 + sbbq %rcx, %r9 + sbbq %rcx, %r10 + sbbq %rdx, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 16(%rsp), %rdi + leaq 48(%rsp), %rsi + movq 24(%rsp), %rbx + # Sub + movq (%rsi), %r8 + movq 8(%rsi), %r9 + movq 16(%rsi), %r10 + movq 24(%rsi), %r11 + subq (%rbx), %r8 + movq $0x00, %rcx + sbbq 8(%rbx), %r9 + movq $-19, %rax + sbbq 16(%rbx), %r10 + movq $0x7fffffffffffffff, %rdx + sbbq 24(%rbx), %r11 + sbbq $0x00, %rcx + # Mask the modulus + andq %rcx, %rax + andq %rcx, %rdx + # Add modulus (if underflow) + addq %rax, %r8 + adcq %rcx, %r9 + adcq %rcx, %r10 + adcq %rdx, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 24(%rsp), %rdi + leaq 48(%rsp), %rsi + movq 24(%rsp), %rbx + # Add + movq (%rsi), %r8 + movq 8(%rsi), %r9 + addq (%rbx), %r8 + movq 16(%rsi), %r10 + adcq 8(%rbx), %r9 + movq 24(%rsi), %rcx + adcq 16(%rbx), %r10 + movq $-19, %rax + adcq 24(%rbx), %rcx + movq $0x7fffffffffffffff, %rdx + movq %rcx, %r11 + sarq $63, %rcx + # Mask the modulus + andq %rcx, %rax + andq %rcx, %rdx + # Sub modulus (if overflow) + subq %rax, %r8 + sbbq %rcx, %r9 + sbbq %rcx, %r10 + sbbq %rdx, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + addq $0x50, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size fe_ge_sub_x64,.-fe_ge_sub_x64 +#endif /* __APPLE__ */ +#ifdef HAVE_INTEL_AVX2 +#ifndef __APPLE__ +.text +.globl fe_mul_avx2 +.type fe_mul_avx2,@function +.align 4 +fe_mul_avx2: +#else +.section __TEXT,__text +.globl _fe_mul_avx2 +.p2align 2 +_fe_mul_avx2: +#endif /* __APPLE__ */ + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq %rbx + movq %rdx, %rbx + # Multiply + # A[0] * B[0] + movq (%rbx), %rdx + mulxq (%rsi), %r8, %r9 + # A[2] * B[0] + mulxq 16(%rsi), %r10, %r11 + # A[1] * B[0] + mulxq 8(%rsi), %rax, %rcx + xorq %r15, %r15 + adcxq %rax, %r9 + # A[1] * B[3] + movq 24(%rbx), %rdx + mulxq 8(%rsi), %r12, %r13 + adcxq %rcx, %r10 + # A[0] * B[1] + movq 8(%rbx), %rdx + mulxq (%rsi), %rax, %rcx + adoxq %rax, %r9 + # A[2] * B[1] + mulxq 16(%rsi), %rax, %r14 + adoxq %rcx, %r10 + adcxq %rax, %r11 + # A[1] * B[2] + movq 16(%rbx), %rdx + mulxq 8(%rsi), %rax, %rcx + adcxq %r14, %r12 + adoxq %rax, %r11 + adcxq %r15, %r13 + adoxq %rcx, %r12 + # A[0] * B[2] + mulxq (%rsi), %rax, %rcx + adoxq %r15, %r13 + xorq %r14, %r14 + adcxq %rax, %r10 + # A[1] * B[1] + movq 8(%rbx), %rdx + mulxq 8(%rsi), %rdx, %rax + adcxq %rcx, %r11 + adoxq %rdx, %r10 + # A[3] * B[1] + movq 8(%rbx), %rdx + adoxq %rax, %r11 + mulxq 24(%rsi), %rax, %rcx + adcxq %rax, %r12 + # A[2] * B[2] + movq 16(%rbx), %rdx + mulxq 16(%rsi), %rdx, %rax + adcxq %rcx, %r13 + adoxq %rdx, %r12 + # A[3] * B[3] + movq 24(%rbx), %rdx + adoxq %rax, %r13 + mulxq 24(%rsi), %rax, %rcx + adoxq %r15, %r14 + adcxq %rax, %r14 + # A[0] * B[3] + mulxq (%rsi), %rdx, %rax + adcxq %rcx, %r15 + xorq %rcx, %rcx + adcxq %rdx, %r11 + # A[3] * B[0] + movq (%rbx), %rdx + adcxq %rax, %r12 + mulxq 24(%rsi), %rdx, %rax + adoxq %rdx, %r11 + adoxq %rax, %r12 + # A[2] * B[3] + movq 24(%rbx), %rdx + mulxq 16(%rsi), %rdx, %rax + adcxq %rdx, %r13 + # A[3] * B[2] + movq 16(%rbx), %rdx + adcxq %rax, %r14 + mulxq 24(%rsi), %rax, %rdx + adcxq %rcx, %r15 + adoxq %rax, %r13 + adoxq %rdx, %r14 + adoxq %rcx, %r15 + # Reduce + movq $0x7fffffffffffffff, %rcx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rcx, %r11 + # Multiply top half by 19 + movq $19, %rdx + xorq %rcx, %rcx + mulxq %r12, %rax, %r12 + adcxq %rax, %r8 + adoxq %r12, %r9 + mulxq %r13, %rax, %r13 + adcxq %rax, %r9 + adoxq %r13, %r10 + mulxq %r14, %rax, %r14 + adcxq %rax, %r10 + adoxq %r14, %r11 + mulxq %r15, %r15, %rdx + adcxq %r15, %r11 + adoxq %rcx, %rdx + adcxq %rcx, %rdx + # Overflow + shldq $0x01, %r11, %rdx + movq $0x7fffffffffffffff, %rcx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + popq %rbx + popq %r15 + popq %r14 + popq %r13 + popq %r12 + repz retq +#ifndef __APPLE__ +.size fe_mul_avx2,.-fe_mul_avx2 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_sq_avx2 +.type fe_sq_avx2,@function +.align 4 +fe_sq_avx2: +#else +.section __TEXT,__text +.globl _fe_sq_avx2 +.p2align 2 +_fe_sq_avx2: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + # Square + # A[0] * A[1] + movq (%rsi), %rdx + mulxq 8(%rsi), %r9, %r10 + # A[0] * A[3] + mulxq 24(%rsi), %r11, %r12 + # A[2] * A[1] + movq 16(%rsi), %rdx + mulxq 8(%rsi), %rcx, %rbx + xorq %r15, %r15 + adoxq %rcx, %r11 + # A[2] * A[3] + mulxq 24(%rsi), %r13, %r14 + adoxq %rbx, %r12 + # A[2] * A[0] + mulxq (%rsi), %rcx, %rbx + adoxq %r15, %r13 + adcxq %rcx, %r10 + adoxq %r15, %r14 + # A[1] * A[3] + movq 8(%rsi), %rdx + mulxq 24(%rsi), %rax, %r8 + adcxq %rbx, %r11 + adcxq %rax, %r12 + adcxq %r8, %r13 + adcxq %r15, %r14 + # Double with Carry Flag + xorq %r15, %r15 + # A[0] * A[0] + movq (%rsi), %rdx + mulxq %rdx, %r8, %rax + adcxq %r9, %r9 + # A[1] * A[1] + movq 8(%rsi), %rdx + mulxq %rdx, %rcx, %rbx + adcxq %r10, %r10 + adoxq %rax, %r9 + adcxq %r11, %r11 + adoxq %rcx, %r10 + # A[2] * A[2] + movq 16(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adcxq %r12, %r12 + adoxq %rbx, %r11 + adcxq %r13, %r13 + adoxq %rax, %r12 + # A[3] * A[3] + movq 24(%rsi), %rdx + mulxq %rdx, %rax, %rbx + adcxq %r14, %r14 + adoxq %rcx, %r13 + adcxq %r15, %r15 + adoxq %rax, %r14 + adoxq %rbx, %r15 + # Reduce + movq $0x7fffffffffffffff, %rcx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rcx, %r11 + # Multiply top half by 19 + movq $19, %rdx + xorq %rcx, %rcx + mulxq %r12, %rax, %r12 + adcxq %rax, %r8 + adoxq %r12, %r9 + mulxq %r13, %rax, %r13 + adcxq %rax, %r9 + adoxq %r13, %r10 + mulxq %r14, %rax, %r14 + adcxq %rax, %r10 + adoxq %r14, %r11 + mulxq %r15, %r15, %rdx + adcxq %r15, %r11 + adoxq %rcx, %rdx + adcxq %rcx, %rdx + # Overflow + shldq $0x01, %r11, %rdx + movq $0x7fffffffffffffff, %rcx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size fe_sq_avx2,.-fe_sq_avx2 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_sq_n_avx2 +.type fe_sq_n_avx2,@function +.align 4 +fe_sq_n_avx2: +#else +.section __TEXT,__text +.globl _fe_sq_n_avx2 +.p2align 2 +_fe_sq_n_avx2: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq %rbp + movq %rdx, %rbp +L_fe_sq_n_avx2: + # Square + # A[0] * A[1] + movq (%rsi), %rdx + mulxq 8(%rsi), %r9, %r10 + # A[0] * A[3] + mulxq 24(%rsi), %r11, %r12 + # A[2] * A[1] + movq 16(%rsi), %rdx + mulxq 8(%rsi), %rcx, %rbx + xorq %r15, %r15 + adoxq %rcx, %r11 + # A[2] * A[3] + mulxq 24(%rsi), %r13, %r14 + adoxq %rbx, %r12 + # A[2] * A[0] + mulxq (%rsi), %rcx, %rbx + adoxq %r15, %r13 + adcxq %rcx, %r10 + adoxq %r15, %r14 + # A[1] * A[3] + movq 8(%rsi), %rdx + mulxq 24(%rsi), %rax, %r8 + adcxq %rbx, %r11 + adcxq %rax, %r12 + adcxq %r8, %r13 + adcxq %r15, %r14 + # Double with Carry Flag + xorq %r15, %r15 + # A[0] * A[0] + movq (%rsi), %rdx + mulxq %rdx, %r8, %rax + adcxq %r9, %r9 + # A[1] * A[1] + movq 8(%rsi), %rdx + mulxq %rdx, %rcx, %rbx + adcxq %r10, %r10 + adoxq %rax, %r9 + adcxq %r11, %r11 + adoxq %rcx, %r10 + # A[2] * A[2] + movq 16(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adcxq %r12, %r12 + adoxq %rbx, %r11 + adcxq %r13, %r13 + adoxq %rax, %r12 + # A[3] * A[3] + movq 24(%rsi), %rdx + mulxq %rdx, %rax, %rbx + adcxq %r14, %r14 + adoxq %rcx, %r13 + adcxq %r15, %r15 + adoxq %rax, %r14 + adoxq %rbx, %r15 + # Reduce + movq $0x7fffffffffffffff, %rcx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rcx, %r11 + # Multiply top half by 19 + movq $19, %rdx + xorq %rcx, %rcx + mulxq %r12, %rax, %r12 + adcxq %rax, %r8 + adoxq %r12, %r9 + mulxq %r13, %rax, %r13 + adcxq %rax, %r9 + adoxq %r13, %r10 + mulxq %r14, %rax, %r14 + adcxq %rax, %r10 + adoxq %r14, %r11 + mulxq %r15, %r15, %rdx + adcxq %r15, %r11 + adoxq %rcx, %rdx + adcxq %rcx, %rdx + # Overflow + shldq $0x01, %r11, %rdx + movq $0x7fffffffffffffff, %rcx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + decb %bpl + jnz L_fe_sq_n_avx2 + popq %rbp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size fe_sq_n_avx2,.-fe_sq_n_avx2 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_mul121666_avx2 +.type fe_mul121666_avx2,@function +.align 4 +fe_mul121666_avx2: +#else +.section __TEXT,__text +.globl _fe_mul121666_avx2 +.p2align 2 +_fe_mul121666_avx2: +#endif /* __APPLE__ */ + pushq %r12 + pushq %r13 + movq $0x1db42, %rdx + mulxq (%rsi), %rax, %r13 + mulxq 8(%rsi), %rcx, %r12 + mulxq 16(%rsi), %r8, %r11 + mulxq 24(%rsi), %r9, %r10 + addq %r13, %rcx + adcq %r12, %r8 + adcq %r11, %r9 + adcq $0x00, %r10 + movq $0x7fffffffffffffff, %r13 + shldq $0x01, %r9, %r10 + andq %r13, %r9 + imulq $19, %r10, %r10 + addq %r10, %rax + adcq $0x00, %rcx + adcq $0x00, %r8 + adcq $0x00, %r9 + movq %rax, (%rdi) + movq %rcx, 8(%rdi) + movq %r8, 16(%rdi) + movq %r9, 24(%rdi) + popq %r13 + popq %r12 + repz retq +#ifndef __APPLE__ +.size fe_mul121666_avx2,.-fe_mul121666_avx2 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_sq2_avx2 +.type fe_sq2_avx2,@function +.align 4 +fe_sq2_avx2: +#else +.section __TEXT,__text +.globl _fe_sq2_avx2 +.p2align 2 +_fe_sq2_avx2: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + # Square * 2 + # A[0] * A[1] + movq (%rsi), %rdx + mulxq 8(%rsi), %r9, %r10 + # A[0] * A[3] + mulxq 24(%rsi), %r11, %r12 + # A[2] * A[1] + movq 16(%rsi), %rdx + mulxq 8(%rsi), %rcx, %rbx + xorq %r15, %r15 + adoxq %rcx, %r11 + # A[2] * A[3] + mulxq 24(%rsi), %r13, %r14 + adoxq %rbx, %r12 + # A[2] * A[0] + mulxq (%rsi), %rcx, %rbx + adoxq %r15, %r13 + adcxq %rcx, %r10 + adoxq %r15, %r14 + # A[1] * A[3] + movq 8(%rsi), %rdx + mulxq 24(%rsi), %rax, %r8 + adcxq %rbx, %r11 + adcxq %rax, %r12 + adcxq %r8, %r13 + adcxq %r15, %r14 + # Double with Carry Flag + xorq %r15, %r15 + # A[0] * A[0] + movq (%rsi), %rdx + mulxq %rdx, %r8, %rax + adcxq %r9, %r9 + # A[1] * A[1] + movq 8(%rsi), %rdx + mulxq %rdx, %rcx, %rbx + adcxq %r10, %r10 + adoxq %rax, %r9 + adcxq %r11, %r11 + adoxq %rcx, %r10 + # A[2] * A[2] + movq 16(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adcxq %r12, %r12 + adoxq %rbx, %r11 + adcxq %r13, %r13 + adoxq %rax, %r12 + # A[3] * A[3] + movq 24(%rsi), %rdx + mulxq %rdx, %rax, %rbx + adcxq %r14, %r14 + adoxq %rcx, %r13 + adcxq %r15, %r15 + adoxq %rax, %r14 + adoxq %rbx, %r15 + # Reduce + movq $0x7fffffffffffffff, %rbx + xorq %rax, %rax + # Move top half into t4-t7 and remove top bit from t3 and double + shldq $3, %r15, %rax + shldq $2, %r14, %r15 + shldq $2, %r13, %r14 + shldq $2, %r12, %r13 + shldq $2, %r11, %r12 + shldq $0x01, %r10, %r11 + shldq $0x01, %r9, %r10 + shldq $0x01, %r8, %r9 + shlq $0x01, %r8 + andq %rbx, %r11 + # Two out left, one in right + andq %rbx, %r15 + # Multiply top bits by 19*19 + imulq $0x169, %rax, %rcx + xorq %rbx, %rbx + # Multiply top half by 19 + movq $19, %rdx + adoxq %rcx, %r8 + mulxq %r12, %rax, %r12 + adcxq %rax, %r8 + adoxq %r12, %r9 + mulxq %r13, %rax, %r13 + adcxq %rax, %r9 + adoxq %r13, %r10 + mulxq %r14, %rax, %r14 + adcxq %rax, %r10 + adoxq %r14, %r11 + mulxq %r15, %r15, %rdx + adcxq %r15, %r11 + adoxq %rbx, %rdx + adcxq %rbx, %rdx + # Overflow + shldq $0x01, %r11, %rdx + movq $0x7fffffffffffffff, %rbx + imulq $19, %rdx, %rax + andq %rbx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rbx, %r11 + addq %rax, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size fe_sq2_avx2,.-fe_sq2_avx2 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_invert_avx2 +.type fe_invert_avx2,@function +.align 4 +fe_invert_avx2: +#else +.section __TEXT,__text +.globl _fe_invert_avx2 +.p2align 2 +_fe_invert_avx2: +#endif /* __APPLE__ */ + subq $0x90, %rsp + # Invert + movq %rdi, 128(%rsp) + movq %rsi, 136(%rsp) + movq %rsp, %rdi + movq 136(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + movq %rsp, %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 32(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + movq 136(%rsp), %rsi + leaq 32(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_avx2@plt +#else + callq _fe_mul_avx2 +#endif /* __APPLE__ */ + movq %rsp, %rdi + movq %rsp, %rsi + leaq 32(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_avx2@plt +#else + callq _fe_mul_avx2 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + movq %rsp, %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 32(%rsp), %rsi + leaq 64(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_avx2@plt +#else + callq _fe_mul_avx2 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 32(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 64(%rsp), %rsi + movq $4, %rdx +#ifndef __APPLE__ + callq fe_sq_n_avx2@plt +#else + callq _fe_sq_n_avx2 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 64(%rsp), %rsi + leaq 32(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_avx2@plt +#else + callq _fe_mul_avx2 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 32(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 64(%rsp), %rsi + movq $9, %rdx +#ifndef __APPLE__ + callq fe_sq_n_avx2@plt +#else + callq _fe_sq_n_avx2 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 64(%rsp), %rsi + leaq 32(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_avx2@plt +#else + callq _fe_mul_avx2 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 64(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 96(%rsp), %rsi + movq $19, %rdx +#ifndef __APPLE__ + callq fe_sq_n_avx2@plt +#else + callq _fe_sq_n_avx2 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 96(%rsp), %rsi + leaq 64(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_avx2@plt +#else + callq _fe_mul_avx2 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 64(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 64(%rsp), %rsi + movq $9, %rdx +#ifndef __APPLE__ + callq fe_sq_n_avx2@plt +#else + callq _fe_sq_n_avx2 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 64(%rsp), %rsi + leaq 32(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_avx2@plt +#else + callq _fe_mul_avx2 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 32(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 64(%rsp), %rsi + movq $49, %rdx +#ifndef __APPLE__ + callq fe_sq_n_avx2@plt +#else + callq _fe_sq_n_avx2 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 64(%rsp), %rsi + leaq 32(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_avx2@plt +#else + callq _fe_mul_avx2 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 64(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 96(%rsp), %rsi + movq $0x63, %rdx +#ifndef __APPLE__ + callq fe_sq_n_avx2@plt +#else + callq _fe_sq_n_avx2 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 96(%rsp), %rsi + leaq 64(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_avx2@plt +#else + callq _fe_mul_avx2 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 64(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 64(%rsp), %rsi + movq $49, %rdx +#ifndef __APPLE__ + callq fe_sq_n_avx2@plt +#else + callq _fe_sq_n_avx2 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 64(%rsp), %rsi + leaq 32(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_avx2@plt +#else + callq _fe_mul_avx2 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 32(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 32(%rsp), %rsi + movq $4, %rdx +#ifndef __APPLE__ + callq fe_sq_n_avx2@plt +#else + callq _fe_sq_n_avx2 +#endif /* __APPLE__ */ + movq 128(%rsp), %rdi + leaq 32(%rsp), %rsi + movq %rsp, %rdx +#ifndef __APPLE__ + callq fe_mul_avx2@plt +#else + callq _fe_mul_avx2 +#endif /* __APPLE__ */ + movq 136(%rsp), %rsi + movq 128(%rsp), %rdi + addq $0x90, %rsp + repz retq +#ifndef __APPLE__ +.text +.globl curve25519_avx2 +.type curve25519_avx2,@function +.align 4 +curve25519_avx2: +#else +.section __TEXT,__text +.globl _curve25519_avx2 +.p2align 2 +_curve25519_avx2: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq %rbp + movq %rdx, %r8 + subq $0xc0, %rsp + movq $0x00, 184(%rsp) + movq %rdi, 176(%rsp) + # Set one + movq $0x01, (%rdi) + movq $0x00, 8(%rdi) + movq $0x00, 16(%rdi) + movq $0x00, 24(%rdi) + # Set zero + movq $0x00, (%rsp) + movq $0x00, 8(%rsp) + movq $0x00, 16(%rsp) + movq $0x00, 24(%rsp) + # Set one + movq $0x01, 32(%rsp) + movq $0x00, 40(%rsp) + movq $0x00, 48(%rsp) + movq $0x00, 56(%rsp) + # Copy + movq (%r8), %r9 + movq 8(%r8), %r10 + movq 16(%r8), %r11 + movq 24(%r8), %r12 + movq %r9, 64(%rsp) + movq %r10, 72(%rsp) + movq %r11, 80(%rsp) + movq %r12, 88(%rsp) + movb $62, 168(%rsp) + movq $3, 160(%rsp) +L_curve25519_avx2_words: +L_curve25519_avx2_bits: + movq 184(%rsp), %rbx + movq 160(%rsp), %r9 + movb 168(%rsp), %cl + movq (%rsi,%r9,8), %rax + shrq %cl, %rax + andq $0x01, %rax + xorq %rax, %rbx + negq %rbx + # Conditional Swap + movq (%rdi), %r9 + movq 8(%rdi), %r10 + movq 16(%rdi), %r11 + movq 24(%rdi), %r12 + xorq 64(%rsp), %r9 + xorq 72(%rsp), %r10 + xorq 80(%rsp), %r11 + xorq 88(%rsp), %r12 + andq %rbx, %r9 + andq %rbx, %r10 + andq %rbx, %r11 + andq %rbx, %r12 + xorq %r9, (%rdi) + xorq %r10, 8(%rdi) + xorq %r11, 16(%rdi) + xorq %r12, 24(%rdi) + xorq %r9, 64(%rsp) + xorq %r10, 72(%rsp) + xorq %r11, 80(%rsp) + xorq %r12, 88(%rsp) + # Conditional Swap + movq (%rsp), %r9 + movq 8(%rsp), %r10 + movq 16(%rsp), %r11 + movq 24(%rsp), %r12 + xorq 32(%rsp), %r9 + xorq 40(%rsp), %r10 + xorq 48(%rsp), %r11 + xorq 56(%rsp), %r12 + andq %rbx, %r9 + andq %rbx, %r10 + andq %rbx, %r11 + andq %rbx, %r12 + xorq %r9, (%rsp) + xorq %r10, 8(%rsp) + xorq %r11, 16(%rsp) + xorq %r12, 24(%rsp) + xorq %r9, 32(%rsp) + xorq %r10, 40(%rsp) + xorq %r11, 48(%rsp) + xorq %r12, 56(%rsp) + movq %rax, 184(%rsp) + # Add + movq (%rdi), %r9 + movq 8(%rdi), %r10 + movq 16(%rdi), %r11 + movq 24(%rdi), %rax + movq %r9, %r13 + addq (%rsp), %r9 + movq %r10, %r14 + adcq 8(%rsp), %r10 + movq %r11, %r15 + adcq 16(%rsp), %r11 + movq %rax, %rbp + adcq 24(%rsp), %rax + movq $-19, %rcx + movq %rax, %r12 + movq $0x7fffffffffffffff, %rbx + sarq $63, %rax + # Mask the modulus + andq %rax, %rcx + andq %rax, %rbx + # Sub modulus (if overflow) + subq %rcx, %r9 + sbbq %rax, %r10 + sbbq %rax, %r11 + sbbq %rbx, %r12 + # Sub + subq (%rsp), %r13 + movq $0x00, %rax + sbbq 8(%rsp), %r14 + movq $-19, %rcx + sbbq 16(%rsp), %r15 + movq $0x7fffffffffffffff, %rbx + sbbq 24(%rsp), %rbp + sbbq $0x00, %rax + # Mask the modulus + andq %rax, %rcx + andq %rax, %rbx + # Add modulus (if underflow) + addq %rcx, %r13 + adcq %rax, %r14 + adcq %rax, %r15 + adcq %rbx, %rbp + movq %r9, (%rdi) + movq %r10, 8(%rdi) + movq %r11, 16(%rdi) + movq %r12, 24(%rdi) + movq %r13, 128(%rsp) + movq %r14, 136(%rsp) + movq %r15, 144(%rsp) + movq %rbp, 152(%rsp) + # Add + movq 64(%rsp), %r9 + movq 72(%rsp), %r10 + movq 80(%rsp), %r11 + movq 88(%rsp), %rax + movq %r9, %r13 + addq 32(%rsp), %r9 + movq %r10, %r14 + adcq 40(%rsp), %r10 + movq %r11, %r15 + adcq 48(%rsp), %r11 + movq %rax, %rbp + adcq 56(%rsp), %rax + movq $-19, %rcx + movq %rax, %r12 + movq $0x7fffffffffffffff, %rbx + sarq $63, %rax + # Mask the modulus + andq %rax, %rcx + andq %rax, %rbx + # Sub modulus (if overflow) + subq %rcx, %r9 + sbbq %rax, %r10 + sbbq %rax, %r11 + sbbq %rbx, %r12 + # Sub + subq 32(%rsp), %r13 + movq $0x00, %rax + sbbq 40(%rsp), %r14 + movq $-19, %rcx + sbbq 48(%rsp), %r15 + movq $0x7fffffffffffffff, %rbx + sbbq 56(%rsp), %rbp + sbbq $0x00, %rax + # Mask the modulus + andq %rax, %rcx + andq %rax, %rbx + # Add modulus (if underflow) + addq %rcx, %r13 + adcq %rax, %r14 + adcq %rax, %r15 + adcq %rbx, %rbp + movq %r9, (%rsp) + movq %r10, 8(%rsp) + movq %r11, 16(%rsp) + movq %r12, 24(%rsp) + movq %r13, 96(%rsp) + movq %r14, 104(%rsp) + movq %r15, 112(%rsp) + movq %rbp, 120(%rsp) + # Multiply + # A[0] * B[0] + movq (%rdi), %rdx + mulxq 96(%rsp), %r9, %r10 + # A[2] * B[0] + mulxq 112(%rsp), %r11, %r12 + # A[1] * B[0] + mulxq 104(%rsp), %rcx, %rbx + xorq %rbp, %rbp + adcxq %rcx, %r10 + # A[1] * B[3] + movq 24(%rdi), %rdx + mulxq 104(%rsp), %r13, %r14 + adcxq %rbx, %r11 + # A[0] * B[1] + movq 8(%rdi), %rdx + mulxq 96(%rsp), %rcx, %rbx + adoxq %rcx, %r10 + # A[2] * B[1] + mulxq 112(%rsp), %rcx, %r15 + adoxq %rbx, %r11 + adcxq %rcx, %r12 + # A[1] * B[2] + movq 16(%rdi), %rdx + mulxq 104(%rsp), %rcx, %rbx + adcxq %r15, %r13 + adoxq %rcx, %r12 + adcxq %rbp, %r14 + adoxq %rbx, %r13 + # A[0] * B[2] + mulxq 96(%rsp), %rcx, %rbx + adoxq %rbp, %r14 + xorq %r15, %r15 + adcxq %rcx, %r11 + # A[1] * B[1] + movq 8(%rdi), %rdx + mulxq 104(%rsp), %rdx, %rcx + adcxq %rbx, %r12 + adoxq %rdx, %r11 + # A[3] * B[1] + movq 8(%rdi), %rdx + adoxq %rcx, %r12 + mulxq 120(%rsp), %rcx, %rbx + adcxq %rcx, %r13 + # A[2] * B[2] + movq 16(%rdi), %rdx + mulxq 112(%rsp), %rdx, %rcx + adcxq %rbx, %r14 + adoxq %rdx, %r13 + # A[3] * B[3] + movq 24(%rdi), %rdx + adoxq %rcx, %r14 + mulxq 120(%rsp), %rcx, %rbx + adoxq %rbp, %r15 + adcxq %rcx, %r15 + # A[0] * B[3] + mulxq 96(%rsp), %rdx, %rcx + adcxq %rbx, %rbp + xorq %rbx, %rbx + adcxq %rdx, %r12 + # A[3] * B[0] + movq (%rdi), %rdx + adcxq %rcx, %r13 + mulxq 120(%rsp), %rdx, %rcx + adoxq %rdx, %r12 + adoxq %rcx, %r13 + # A[2] * B[3] + movq 24(%rdi), %rdx + mulxq 112(%rsp), %rdx, %rcx + adcxq %rdx, %r14 + # A[3] * B[2] + movq 16(%rdi), %rdx + adcxq %rcx, %r15 + mulxq 120(%rsp), %rcx, %rdx + adcxq %rbx, %rbp + adoxq %rcx, %r14 + adoxq %rdx, %r15 + adoxq %rbx, %rbp + # Reduce + movq $0x7fffffffffffffff, %rbx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r15, %rbp + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + andq %rbx, %r12 + # Multiply top half by 19 + movq $19, %rdx + xorq %rbx, %rbx + mulxq %r13, %rcx, %r13 + adcxq %rcx, %r9 + adoxq %r13, %r10 + mulxq %r14, %rcx, %r14 + adcxq %rcx, %r10 + adoxq %r14, %r11 + mulxq %r15, %rcx, %r15 + adcxq %rcx, %r11 + adoxq %r15, %r12 + mulxq %rbp, %rbp, %rdx + adcxq %rbp, %r12 + adoxq %rbx, %rdx + adcxq %rbx, %rdx + # Overflow + shldq $0x01, %r12, %rdx + movq $0x7fffffffffffffff, %rbx + imulq $19, %rdx, %rcx + andq %rbx, %r12 + addq %rcx, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + adcq $0x00, %r12 + # Reduce if top bit set + movq %r12, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rcx + andq %rbx, %r12 + addq %rcx, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + adcq $0x00, %r12 + # Store + movq %r9, 32(%rsp) + movq %r10, 40(%rsp) + movq %r11, 48(%rsp) + movq %r12, 56(%rsp) + # Multiply + # A[0] * B[0] + movq 128(%rsp), %rdx + mulxq (%rsp), %r9, %r10 + # A[2] * B[0] + mulxq 16(%rsp), %r11, %r12 + # A[1] * B[0] + mulxq 8(%rsp), %rcx, %rbx + xorq %rbp, %rbp + adcxq %rcx, %r10 + # A[1] * B[3] + movq 152(%rsp), %rdx + mulxq 8(%rsp), %r13, %r14 + adcxq %rbx, %r11 + # A[0] * B[1] + movq 136(%rsp), %rdx + mulxq (%rsp), %rcx, %rbx + adoxq %rcx, %r10 + # A[2] * B[1] + mulxq 16(%rsp), %rcx, %r15 + adoxq %rbx, %r11 + adcxq %rcx, %r12 + # A[1] * B[2] + movq 144(%rsp), %rdx + mulxq 8(%rsp), %rcx, %rbx + adcxq %r15, %r13 + adoxq %rcx, %r12 + adcxq %rbp, %r14 + adoxq %rbx, %r13 + # A[0] * B[2] + mulxq (%rsp), %rcx, %rbx + adoxq %rbp, %r14 + xorq %r15, %r15 + adcxq %rcx, %r11 + # A[1] * B[1] + movq 136(%rsp), %rdx + mulxq 8(%rsp), %rdx, %rcx + adcxq %rbx, %r12 + adoxq %rdx, %r11 + # A[3] * B[1] + movq 136(%rsp), %rdx + adoxq %rcx, %r12 + mulxq 24(%rsp), %rcx, %rbx + adcxq %rcx, %r13 + # A[2] * B[2] + movq 144(%rsp), %rdx + mulxq 16(%rsp), %rdx, %rcx + adcxq %rbx, %r14 + adoxq %rdx, %r13 + # A[3] * B[3] + movq 152(%rsp), %rdx + adoxq %rcx, %r14 + mulxq 24(%rsp), %rcx, %rbx + adoxq %rbp, %r15 + adcxq %rcx, %r15 + # A[0] * B[3] + mulxq (%rsp), %rdx, %rcx + adcxq %rbx, %rbp + xorq %rbx, %rbx + adcxq %rdx, %r12 + # A[3] * B[0] + movq 128(%rsp), %rdx + adcxq %rcx, %r13 + mulxq 24(%rsp), %rdx, %rcx + adoxq %rdx, %r12 + adoxq %rcx, %r13 + # A[2] * B[3] + movq 152(%rsp), %rdx + mulxq 16(%rsp), %rdx, %rcx + adcxq %rdx, %r14 + # A[3] * B[2] + movq 144(%rsp), %rdx + adcxq %rcx, %r15 + mulxq 24(%rsp), %rcx, %rdx + adcxq %rbx, %rbp + adoxq %rcx, %r14 + adoxq %rdx, %r15 + adoxq %rbx, %rbp + # Reduce + movq $0x7fffffffffffffff, %rbx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r15, %rbp + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + andq %rbx, %r12 + # Multiply top half by 19 + movq $19, %rdx + xorq %rbx, %rbx + mulxq %r13, %rcx, %r13 + adcxq %rcx, %r9 + adoxq %r13, %r10 + mulxq %r14, %rcx, %r14 + adcxq %rcx, %r10 + adoxq %r14, %r11 + mulxq %r15, %rcx, %r15 + adcxq %rcx, %r11 + adoxq %r15, %r12 + mulxq %rbp, %rbp, %rdx + adcxq %rbp, %r12 + adoxq %rbx, %rdx + adcxq %rbx, %rdx + # Overflow + shldq $0x01, %r12, %rdx + movq $0x7fffffffffffffff, %rbx + imulq $19, %rdx, %rcx + andq %rbx, %r12 + addq %rcx, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + adcq $0x00, %r12 + # Reduce if top bit set + movq %r12, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rcx + andq %rbx, %r12 + addq %rcx, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + adcq $0x00, %r12 + # Store + movq %r9, (%rsp) + movq %r10, 8(%rsp) + movq %r11, 16(%rsp) + movq %r12, 24(%rsp) + # Square + # A[0] * A[1] + movq 128(%rsp), %rdx + mulxq 136(%rsp), %r10, %r11 + # A[0] * A[3] + mulxq 152(%rsp), %r12, %r13 + # A[2] * A[1] + movq 144(%rsp), %rdx + mulxq 136(%rsp), %rcx, %rbx + xorq %rbp, %rbp + adoxq %rcx, %r12 + # A[2] * A[3] + mulxq 152(%rsp), %r14, %r15 + adoxq %rbx, %r13 + # A[2] * A[0] + mulxq 128(%rsp), %rcx, %rbx + adoxq %rbp, %r14 + adcxq %rcx, %r11 + adoxq %rbp, %r15 + # A[1] * A[3] + movq 136(%rsp), %rdx + mulxq 152(%rsp), %rax, %r9 + adcxq %rbx, %r12 + adcxq %rax, %r13 + adcxq %r9, %r14 + adcxq %rbp, %r15 + # Double with Carry Flag + xorq %rbp, %rbp + # A[0] * A[0] + movq 128(%rsp), %rdx + mulxq %rdx, %r9, %rax + adcxq %r10, %r10 + # A[1] * A[1] + movq 136(%rsp), %rdx + mulxq %rdx, %rcx, %rbx + adcxq %r11, %r11 + adoxq %rax, %r10 + adcxq %r12, %r12 + adoxq %rcx, %r11 + # A[2] * A[2] + movq 144(%rsp), %rdx + mulxq %rdx, %rax, %rcx + adcxq %r13, %r13 + adoxq %rbx, %r12 + adcxq %r14, %r14 + adoxq %rax, %r13 + # A[3] * A[3] + movq 152(%rsp), %rdx + mulxq %rdx, %rax, %rbx + adcxq %r15, %r15 + adoxq %rcx, %r14 + adcxq %rbp, %rbp + adoxq %rax, %r15 + adoxq %rbx, %rbp + # Reduce + movq $0x7fffffffffffffff, %rcx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r15, %rbp + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + andq %rcx, %r12 + # Multiply top half by 19 + movq $19, %rdx + xorq %rcx, %rcx + mulxq %r13, %rax, %r13 + adcxq %rax, %r9 + adoxq %r13, %r10 + mulxq %r14, %rax, %r14 + adcxq %rax, %r10 + adoxq %r14, %r11 + mulxq %r15, %rax, %r15 + adcxq %rax, %r11 + adoxq %r15, %r12 + mulxq %rbp, %rbp, %rdx + adcxq %rbp, %r12 + adoxq %rcx, %rdx + adcxq %rcx, %rdx + # Overflow + shldq $0x01, %r12, %rdx + movq $0x7fffffffffffffff, %rcx + imulq $19, %rdx, %rax + andq %rcx, %r12 + addq %rax, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + adcq $0x00, %r12 + # Reduce if top bit set + movq %r12, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r12 + addq %rax, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + adcq $0x00, %r12 + # Store + movq %r9, 96(%rsp) + movq %r10, 104(%rsp) + movq %r11, 112(%rsp) + movq %r12, 120(%rsp) + # Square + # A[0] * A[1] + movq (%rdi), %rdx + mulxq 8(%rdi), %r10, %r11 + # A[0] * A[3] + mulxq 24(%rdi), %r12, %r13 + # A[2] * A[1] + movq 16(%rdi), %rdx + mulxq 8(%rdi), %rcx, %rbx + xorq %rbp, %rbp + adoxq %rcx, %r12 + # A[2] * A[3] + mulxq 24(%rdi), %r14, %r15 + adoxq %rbx, %r13 + # A[2] * A[0] + mulxq (%rdi), %rcx, %rbx + adoxq %rbp, %r14 + adcxq %rcx, %r11 + adoxq %rbp, %r15 + # A[1] * A[3] + movq 8(%rdi), %rdx + mulxq 24(%rdi), %rax, %r9 + adcxq %rbx, %r12 + adcxq %rax, %r13 + adcxq %r9, %r14 + adcxq %rbp, %r15 + # Double with Carry Flag + xorq %rbp, %rbp + # A[0] * A[0] + movq (%rdi), %rdx + mulxq %rdx, %r9, %rax + adcxq %r10, %r10 + # A[1] * A[1] + movq 8(%rdi), %rdx + mulxq %rdx, %rcx, %rbx + adcxq %r11, %r11 + adoxq %rax, %r10 + adcxq %r12, %r12 + adoxq %rcx, %r11 + # A[2] * A[2] + movq 16(%rdi), %rdx + mulxq %rdx, %rax, %rcx + adcxq %r13, %r13 + adoxq %rbx, %r12 + adcxq %r14, %r14 + adoxq %rax, %r13 + # A[3] * A[3] + movq 24(%rdi), %rdx + mulxq %rdx, %rax, %rbx + adcxq %r15, %r15 + adoxq %rcx, %r14 + adcxq %rbp, %rbp + adoxq %rax, %r15 + adoxq %rbx, %rbp + # Reduce + movq $0x7fffffffffffffff, %rcx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r15, %rbp + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + andq %rcx, %r12 + # Multiply top half by 19 + movq $19, %rdx + xorq %rcx, %rcx + mulxq %r13, %rax, %r13 + adcxq %rax, %r9 + adoxq %r13, %r10 + mulxq %r14, %rax, %r14 + adcxq %rax, %r10 + adoxq %r14, %r11 + mulxq %r15, %rax, %r15 + adcxq %rax, %r11 + adoxq %r15, %r12 + mulxq %rbp, %rbp, %rdx + adcxq %rbp, %r12 + adoxq %rcx, %rdx + adcxq %rcx, %rdx + # Overflow + shldq $0x01, %r12, %rdx + movq $0x7fffffffffffffff, %rcx + imulq $19, %rdx, %rax + andq %rcx, %r12 + addq %rax, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + adcq $0x00, %r12 + # Reduce if top bit set + movq %r12, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r12 + addq %rax, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + adcq $0x00, %r12 + # Store + movq %r9, 128(%rsp) + movq %r10, 136(%rsp) + movq %r11, 144(%rsp) + movq %r12, 152(%rsp) + # Add + movq 32(%rsp), %r9 + movq 40(%rsp), %r10 + movq 48(%rsp), %r11 + movq 56(%rsp), %rax + movq %r9, %r13 + addq (%rsp), %r9 + movq %r10, %r14 + adcq 8(%rsp), %r10 + movq %r11, %r15 + adcq 16(%rsp), %r11 + movq %rax, %rbp + adcq 24(%rsp), %rax + movq $-19, %rcx + movq %rax, %r12 + movq $0x7fffffffffffffff, %rbx + sarq $63, %rax + # Mask the modulus + andq %rax, %rcx + andq %rax, %rbx + # Sub modulus (if overflow) + subq %rcx, %r9 + sbbq %rax, %r10 + sbbq %rax, %r11 + sbbq %rbx, %r12 + # Sub + subq (%rsp), %r13 + movq $0x00, %rax + sbbq 8(%rsp), %r14 + movq $-19, %rcx + sbbq 16(%rsp), %r15 + movq $0x7fffffffffffffff, %rbx + sbbq 24(%rsp), %rbp + sbbq $0x00, %rax + # Mask the modulus + andq %rax, %rcx + andq %rax, %rbx + # Add modulus (if underflow) + addq %rcx, %r13 + adcq %rax, %r14 + adcq %rax, %r15 + adcq %rbx, %rbp + movq %r9, 64(%rsp) + movq %r10, 72(%rsp) + movq %r11, 80(%rsp) + movq %r12, 88(%rsp) + movq %r13, (%rsp) + movq %r14, 8(%rsp) + movq %r15, 16(%rsp) + movq %rbp, 24(%rsp) + # Multiply + # A[0] * B[0] + movq 96(%rsp), %rdx + mulxq 128(%rsp), %r9, %r10 + # A[2] * B[0] + mulxq 144(%rsp), %r11, %r12 + # A[1] * B[0] + mulxq 136(%rsp), %rcx, %rbx + xorq %rbp, %rbp + adcxq %rcx, %r10 + # A[1] * B[3] + movq 120(%rsp), %rdx + mulxq 136(%rsp), %r13, %r14 + adcxq %rbx, %r11 + # A[0] * B[1] + movq 104(%rsp), %rdx + mulxq 128(%rsp), %rcx, %rbx + adoxq %rcx, %r10 + # A[2] * B[1] + mulxq 144(%rsp), %rcx, %r15 + adoxq %rbx, %r11 + adcxq %rcx, %r12 + # A[1] * B[2] + movq 112(%rsp), %rdx + mulxq 136(%rsp), %rcx, %rbx + adcxq %r15, %r13 + adoxq %rcx, %r12 + adcxq %rbp, %r14 + adoxq %rbx, %r13 + # A[0] * B[2] + mulxq 128(%rsp), %rcx, %rbx + adoxq %rbp, %r14 + xorq %r15, %r15 + adcxq %rcx, %r11 + # A[1] * B[1] + movq 104(%rsp), %rdx + mulxq 136(%rsp), %rdx, %rcx + adcxq %rbx, %r12 + adoxq %rdx, %r11 + # A[3] * B[1] + movq 104(%rsp), %rdx + adoxq %rcx, %r12 + mulxq 152(%rsp), %rcx, %rbx + adcxq %rcx, %r13 + # A[2] * B[2] + movq 112(%rsp), %rdx + mulxq 144(%rsp), %rdx, %rcx + adcxq %rbx, %r14 + adoxq %rdx, %r13 + # A[3] * B[3] + movq 120(%rsp), %rdx + adoxq %rcx, %r14 + mulxq 152(%rsp), %rcx, %rbx + adoxq %rbp, %r15 + adcxq %rcx, %r15 + # A[0] * B[3] + mulxq 128(%rsp), %rdx, %rcx + adcxq %rbx, %rbp + xorq %rbx, %rbx + adcxq %rdx, %r12 + # A[3] * B[0] + movq 96(%rsp), %rdx + adcxq %rcx, %r13 + mulxq 152(%rsp), %rdx, %rcx + adoxq %rdx, %r12 + adoxq %rcx, %r13 + # A[2] * B[3] + movq 120(%rsp), %rdx + mulxq 144(%rsp), %rdx, %rcx + adcxq %rdx, %r14 + # A[3] * B[2] + movq 112(%rsp), %rdx + adcxq %rcx, %r15 + mulxq 152(%rsp), %rcx, %rdx + adcxq %rbx, %rbp + adoxq %rcx, %r14 + adoxq %rdx, %r15 + adoxq %rbx, %rbp + # Reduce + movq $0x7fffffffffffffff, %rbx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r15, %rbp + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + andq %rbx, %r12 + # Multiply top half by 19 + movq $19, %rdx + xorq %rbx, %rbx + mulxq %r13, %rcx, %r13 + adcxq %rcx, %r9 + adoxq %r13, %r10 + mulxq %r14, %rcx, %r14 + adcxq %rcx, %r10 + adoxq %r14, %r11 + mulxq %r15, %rcx, %r15 + adcxq %rcx, %r11 + adoxq %r15, %r12 + mulxq %rbp, %rbp, %rdx + adcxq %rbp, %r12 + adoxq %rbx, %rdx + adcxq %rbx, %rdx + # Overflow + shldq $0x01, %r12, %rdx + movq $0x7fffffffffffffff, %rbx + imulq $19, %rdx, %rcx + andq %rbx, %r12 + addq %rcx, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + adcq $0x00, %r12 + # Reduce if top bit set + movq %r12, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rcx + andq %rbx, %r12 + addq %rcx, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + adcq $0x00, %r12 + # Store + movq %r9, (%rdi) + movq %r10, 8(%rdi) + movq %r11, 16(%rdi) + movq %r12, 24(%rdi) + # Sub + movq 128(%rsp), %r9 + movq 136(%rsp), %r10 + movq 144(%rsp), %r11 + movq 152(%rsp), %r12 + subq 96(%rsp), %r9 + movq $0x00, %rax + sbbq 104(%rsp), %r10 + movq $-19, %rcx + sbbq 112(%rsp), %r11 + movq $0x7fffffffffffffff, %rbx + sbbq 120(%rsp), %r12 + sbbq $0x00, %rax + # Mask the modulus + andq %rax, %rcx + andq %rax, %rbx + # Add modulus (if underflow) + addq %rcx, %r9 + adcq %rax, %r10 + adcq %rax, %r11 + adcq %rbx, %r12 + movq %r9, 128(%rsp) + movq %r10, 136(%rsp) + movq %r11, 144(%rsp) + movq %r12, 152(%rsp) + # Square + # A[0] * A[1] + movq (%rsp), %rdx + mulxq 8(%rsp), %r10, %r11 + # A[0] * A[3] + mulxq 24(%rsp), %r12, %r13 + # A[2] * A[1] + movq 16(%rsp), %rdx + mulxq 8(%rsp), %rcx, %rbx + xorq %rbp, %rbp + adoxq %rcx, %r12 + # A[2] * A[3] + mulxq 24(%rsp), %r14, %r15 + adoxq %rbx, %r13 + # A[2] * A[0] + mulxq (%rsp), %rcx, %rbx + adoxq %rbp, %r14 + adcxq %rcx, %r11 + adoxq %rbp, %r15 + # A[1] * A[3] + movq 8(%rsp), %rdx + mulxq 24(%rsp), %rax, %r9 + adcxq %rbx, %r12 + adcxq %rax, %r13 + adcxq %r9, %r14 + adcxq %rbp, %r15 + # Double with Carry Flag + xorq %rbp, %rbp + # A[0] * A[0] + movq (%rsp), %rdx + mulxq %rdx, %r9, %rax + adcxq %r10, %r10 + # A[1] * A[1] + movq 8(%rsp), %rdx + mulxq %rdx, %rcx, %rbx + adcxq %r11, %r11 + adoxq %rax, %r10 + adcxq %r12, %r12 + adoxq %rcx, %r11 + # A[2] * A[2] + movq 16(%rsp), %rdx + mulxq %rdx, %rax, %rcx + adcxq %r13, %r13 + adoxq %rbx, %r12 + adcxq %r14, %r14 + adoxq %rax, %r13 + # A[3] * A[3] + movq 24(%rsp), %rdx + mulxq %rdx, %rax, %rbx + adcxq %r15, %r15 + adoxq %rcx, %r14 + adcxq %rbp, %rbp + adoxq %rax, %r15 + adoxq %rbx, %rbp + # Reduce + movq $0x7fffffffffffffff, %rcx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r15, %rbp + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + andq %rcx, %r12 + # Multiply top half by 19 + movq $19, %rdx + xorq %rcx, %rcx + mulxq %r13, %rax, %r13 + adcxq %rax, %r9 + adoxq %r13, %r10 + mulxq %r14, %rax, %r14 + adcxq %rax, %r10 + adoxq %r14, %r11 + mulxq %r15, %rax, %r15 + adcxq %rax, %r11 + adoxq %r15, %r12 + mulxq %rbp, %rbp, %rdx + adcxq %rbp, %r12 + adoxq %rcx, %rdx + adcxq %rcx, %rdx + # Overflow + shldq $0x01, %r12, %rdx + movq $0x7fffffffffffffff, %rcx + imulq $19, %rdx, %rax + andq %rcx, %r12 + addq %rax, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + adcq $0x00, %r12 + # Reduce if top bit set + movq %r12, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r12 + addq %rax, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + adcq $0x00, %r12 + # Store + movq %r9, (%rsp) + movq %r10, 8(%rsp) + movq %r11, 16(%rsp) + movq %r12, 24(%rsp) + movq $0x1db42, %rdx + mulxq 128(%rsp), %r9, %rbp + mulxq 136(%rsp), %r10, %r15 + mulxq 144(%rsp), %r11, %r14 + mulxq 152(%rsp), %r12, %r13 + addq %rbp, %r10 + adcq %r15, %r11 + adcq %r14, %r12 + adcq $0x00, %r13 + movq $0x7fffffffffffffff, %rbp + shldq $0x01, %r12, %r13 + andq %rbp, %r12 + imulq $19, %r13, %r13 + addq %r13, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + adcq $0x00, %r12 + movq %r9, 32(%rsp) + movq %r10, 40(%rsp) + movq %r11, 48(%rsp) + movq %r12, 56(%rsp) + # Square + # A[0] * A[1] + movq 64(%rsp), %rdx + mulxq 72(%rsp), %r10, %r11 + # A[0] * A[3] + mulxq 88(%rsp), %r12, %r13 + # A[2] * A[1] + movq 80(%rsp), %rdx + mulxq 72(%rsp), %rcx, %rbx + xorq %rbp, %rbp + adoxq %rcx, %r12 + # A[2] * A[3] + mulxq 88(%rsp), %r14, %r15 + adoxq %rbx, %r13 + # A[2] * A[0] + mulxq 64(%rsp), %rcx, %rbx + adoxq %rbp, %r14 + adcxq %rcx, %r11 + adoxq %rbp, %r15 + # A[1] * A[3] + movq 72(%rsp), %rdx + mulxq 88(%rsp), %rax, %r9 + adcxq %rbx, %r12 + adcxq %rax, %r13 + adcxq %r9, %r14 + adcxq %rbp, %r15 + # Double with Carry Flag + xorq %rbp, %rbp + # A[0] * A[0] + movq 64(%rsp), %rdx + mulxq %rdx, %r9, %rax + adcxq %r10, %r10 + # A[1] * A[1] + movq 72(%rsp), %rdx + mulxq %rdx, %rcx, %rbx + adcxq %r11, %r11 + adoxq %rax, %r10 + adcxq %r12, %r12 + adoxq %rcx, %r11 + # A[2] * A[2] + movq 80(%rsp), %rdx + mulxq %rdx, %rax, %rcx + adcxq %r13, %r13 + adoxq %rbx, %r12 + adcxq %r14, %r14 + adoxq %rax, %r13 + # A[3] * A[3] + movq 88(%rsp), %rdx + mulxq %rdx, %rax, %rbx + adcxq %r15, %r15 + adoxq %rcx, %r14 + adcxq %rbp, %rbp + adoxq %rax, %r15 + adoxq %rbx, %rbp + # Reduce + movq $0x7fffffffffffffff, %rcx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r15, %rbp + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + andq %rcx, %r12 + # Multiply top half by 19 + movq $19, %rdx + xorq %rcx, %rcx + mulxq %r13, %rax, %r13 + adcxq %rax, %r9 + adoxq %r13, %r10 + mulxq %r14, %rax, %r14 + adcxq %rax, %r10 + adoxq %r14, %r11 + mulxq %r15, %rax, %r15 + adcxq %rax, %r11 + adoxq %r15, %r12 + mulxq %rbp, %rbp, %rdx + adcxq %rbp, %r12 + adoxq %rcx, %rdx + adcxq %rcx, %rdx + # Overflow + shldq $0x01, %r12, %rdx + movq $0x7fffffffffffffff, %rcx + imulq $19, %rdx, %rax + andq %rcx, %r12 + addq %rax, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + adcq $0x00, %r12 + # Reduce if top bit set + movq %r12, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rax + andq %rcx, %r12 + addq %rax, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + adcq $0x00, %r12 + # Store + movq %r9, 64(%rsp) + movq %r10, 72(%rsp) + movq %r11, 80(%rsp) + movq %r12, 88(%rsp) + # Add + movq 96(%rsp), %r9 + movq 104(%rsp), %r10 + addq 32(%rsp), %r9 + movq 112(%rsp), %r11 + adcq 40(%rsp), %r10 + movq 120(%rsp), %rax + adcq 48(%rsp), %r11 + movq $-19, %rcx + adcq 56(%rsp), %rax + movq $0x7fffffffffffffff, %rbx + movq %rax, %r12 + sarq $63, %rax + # Mask the modulus + andq %rax, %rcx + andq %rax, %rbx + # Sub modulus (if overflow) + subq %rcx, %r9 + sbbq %rax, %r10 + sbbq %rax, %r11 + sbbq %rbx, %r12 + movq %r9, 96(%rsp) + movq %r10, 104(%rsp) + movq %r11, 112(%rsp) + movq %r12, 120(%rsp) + # Multiply + # A[0] * B[0] + movq (%rsp), %rdx + mulxq (%r8), %r9, %r10 + # A[2] * B[0] + mulxq 16(%r8), %r11, %r12 + # A[1] * B[0] + mulxq 8(%r8), %rcx, %rbx + xorq %rbp, %rbp + adcxq %rcx, %r10 + # A[1] * B[3] + movq 24(%rsp), %rdx + mulxq 8(%r8), %r13, %r14 + adcxq %rbx, %r11 + # A[0] * B[1] + movq 8(%rsp), %rdx + mulxq (%r8), %rcx, %rbx + adoxq %rcx, %r10 + # A[2] * B[1] + mulxq 16(%r8), %rcx, %r15 + adoxq %rbx, %r11 + adcxq %rcx, %r12 + # A[1] * B[2] + movq 16(%rsp), %rdx + mulxq 8(%r8), %rcx, %rbx + adcxq %r15, %r13 + adoxq %rcx, %r12 + adcxq %rbp, %r14 + adoxq %rbx, %r13 + # A[0] * B[2] + mulxq (%r8), %rcx, %rbx + adoxq %rbp, %r14 + xorq %r15, %r15 + adcxq %rcx, %r11 + # A[1] * B[1] + movq 8(%rsp), %rdx + mulxq 8(%r8), %rdx, %rcx + adcxq %rbx, %r12 + adoxq %rdx, %r11 + # A[3] * B[1] + movq 8(%rsp), %rdx + adoxq %rcx, %r12 + mulxq 24(%r8), %rcx, %rbx + adcxq %rcx, %r13 + # A[2] * B[2] + movq 16(%rsp), %rdx + mulxq 16(%r8), %rdx, %rcx + adcxq %rbx, %r14 + adoxq %rdx, %r13 + # A[3] * B[3] + movq 24(%rsp), %rdx + adoxq %rcx, %r14 + mulxq 24(%r8), %rcx, %rbx + adoxq %rbp, %r15 + adcxq %rcx, %r15 + # A[0] * B[3] + mulxq (%r8), %rdx, %rcx + adcxq %rbx, %rbp + xorq %rbx, %rbx + adcxq %rdx, %r12 + # A[3] * B[0] + movq (%rsp), %rdx + adcxq %rcx, %r13 + mulxq 24(%r8), %rdx, %rcx + adoxq %rdx, %r12 + adoxq %rcx, %r13 + # A[2] * B[3] + movq 24(%rsp), %rdx + mulxq 16(%r8), %rdx, %rcx + adcxq %rdx, %r14 + # A[3] * B[2] + movq 16(%rsp), %rdx + adcxq %rcx, %r15 + mulxq 24(%r8), %rcx, %rdx + adcxq %rbx, %rbp + adoxq %rcx, %r14 + adoxq %rdx, %r15 + adoxq %rbx, %rbp + # Reduce + movq $0x7fffffffffffffff, %rbx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r15, %rbp + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + andq %rbx, %r12 + # Multiply top half by 19 + movq $19, %rdx + xorq %rbx, %rbx + mulxq %r13, %rcx, %r13 + adcxq %rcx, %r9 + adoxq %r13, %r10 + mulxq %r14, %rcx, %r14 + adcxq %rcx, %r10 + adoxq %r14, %r11 + mulxq %r15, %rcx, %r15 + adcxq %rcx, %r11 + adoxq %r15, %r12 + mulxq %rbp, %rbp, %rdx + adcxq %rbp, %r12 + adoxq %rbx, %rdx + adcxq %rbx, %rdx + # Overflow + shldq $0x01, %r12, %rdx + movq $0x7fffffffffffffff, %rbx + imulq $19, %rdx, %rcx + andq %rbx, %r12 + addq %rcx, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + adcq $0x00, %r12 + # Reduce if top bit set + movq %r12, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rcx + andq %rbx, %r12 + addq %rcx, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + adcq $0x00, %r12 + # Store + movq %r9, 32(%rsp) + movq %r10, 40(%rsp) + movq %r11, 48(%rsp) + movq %r12, 56(%rsp) + # Multiply + # A[0] * B[0] + movq 96(%rsp), %rdx + mulxq 128(%rsp), %r9, %r10 + # A[2] * B[0] + mulxq 144(%rsp), %r11, %r12 + # A[1] * B[0] + mulxq 136(%rsp), %rcx, %rbx + xorq %rbp, %rbp + adcxq %rcx, %r10 + # A[1] * B[3] + movq 120(%rsp), %rdx + mulxq 136(%rsp), %r13, %r14 + adcxq %rbx, %r11 + # A[0] * B[1] + movq 104(%rsp), %rdx + mulxq 128(%rsp), %rcx, %rbx + adoxq %rcx, %r10 + # A[2] * B[1] + mulxq 144(%rsp), %rcx, %r15 + adoxq %rbx, %r11 + adcxq %rcx, %r12 + # A[1] * B[2] + movq 112(%rsp), %rdx + mulxq 136(%rsp), %rcx, %rbx + adcxq %r15, %r13 + adoxq %rcx, %r12 + adcxq %rbp, %r14 + adoxq %rbx, %r13 + # A[0] * B[2] + mulxq 128(%rsp), %rcx, %rbx + adoxq %rbp, %r14 + xorq %r15, %r15 + adcxq %rcx, %r11 + # A[1] * B[1] + movq 104(%rsp), %rdx + mulxq 136(%rsp), %rdx, %rcx + adcxq %rbx, %r12 + adoxq %rdx, %r11 + # A[3] * B[1] + movq 104(%rsp), %rdx + adoxq %rcx, %r12 + mulxq 152(%rsp), %rcx, %rbx + adcxq %rcx, %r13 + # A[2] * B[2] + movq 112(%rsp), %rdx + mulxq 144(%rsp), %rdx, %rcx + adcxq %rbx, %r14 + adoxq %rdx, %r13 + # A[3] * B[3] + movq 120(%rsp), %rdx + adoxq %rcx, %r14 + mulxq 152(%rsp), %rcx, %rbx + adoxq %rbp, %r15 + adcxq %rcx, %r15 + # A[0] * B[3] + mulxq 128(%rsp), %rdx, %rcx + adcxq %rbx, %rbp + xorq %rbx, %rbx + adcxq %rdx, %r12 + # A[3] * B[0] + movq 96(%rsp), %rdx + adcxq %rcx, %r13 + mulxq 152(%rsp), %rdx, %rcx + adoxq %rdx, %r12 + adoxq %rcx, %r13 + # A[2] * B[3] + movq 120(%rsp), %rdx + mulxq 144(%rsp), %rdx, %rcx + adcxq %rdx, %r14 + # A[3] * B[2] + movq 112(%rsp), %rdx + adcxq %rcx, %r15 + mulxq 152(%rsp), %rcx, %rdx + adcxq %rbx, %rbp + adoxq %rcx, %r14 + adoxq %rdx, %r15 + adoxq %rbx, %rbp + # Reduce + movq $0x7fffffffffffffff, %rbx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r15, %rbp + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + andq %rbx, %r12 + # Multiply top half by 19 + movq $19, %rdx + xorq %rbx, %rbx + mulxq %r13, %rcx, %r13 + adcxq %rcx, %r9 + adoxq %r13, %r10 + mulxq %r14, %rcx, %r14 + adcxq %rcx, %r10 + adoxq %r14, %r11 + mulxq %r15, %rcx, %r15 + adcxq %rcx, %r11 + adoxq %r15, %r12 + mulxq %rbp, %rbp, %rdx + adcxq %rbp, %r12 + adoxq %rbx, %rdx + adcxq %rbx, %rdx + # Overflow + shldq $0x01, %r12, %rdx + movq $0x7fffffffffffffff, %rbx + imulq $19, %rdx, %rcx + andq %rbx, %r12 + addq %rcx, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + adcq $0x00, %r12 + # Reduce if top bit set + movq %r12, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rcx + andq %rbx, %r12 + addq %rcx, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + adcq $0x00, %r12 + # Store + movq %r9, (%rsp) + movq %r10, 8(%rsp) + movq %r11, 16(%rsp) + movq %r12, 24(%rsp) + decb 168(%rsp) + jge L_curve25519_avx2_bits + movq $63, 168(%rsp) + decb 160(%rsp) + jge L_curve25519_avx2_words + # Invert + leaq 32(%rsp), %rdi + movq %rsp, %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 32(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 64(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + movq %rsp, %rsi + leaq 64(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_avx2@plt +#else + callq _fe_mul_avx2 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 32(%rsp), %rsi + leaq 64(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_avx2@plt +#else + callq _fe_mul_avx2 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 32(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 64(%rsp), %rsi + leaq 96(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_avx2@plt +#else + callq _fe_mul_avx2 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 64(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 96(%rsp), %rsi + movq $4, %rdx +#ifndef __APPLE__ + callq fe_sq_n_avx2@plt +#else + callq _fe_sq_n_avx2 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 96(%rsp), %rsi + leaq 64(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_avx2@plt +#else + callq _fe_mul_avx2 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 64(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 96(%rsp), %rsi + movq $9, %rdx +#ifndef __APPLE__ + callq fe_sq_n_avx2@plt +#else + callq _fe_sq_n_avx2 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 96(%rsp), %rsi + leaq 64(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_avx2@plt +#else + callq _fe_mul_avx2 +#endif /* __APPLE__ */ + leaq 128(%rsp), %rdi + leaq 96(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + leaq 128(%rsp), %rdi + leaq 128(%rsp), %rsi + movq $19, %rdx +#ifndef __APPLE__ + callq fe_sq_n_avx2@plt +#else + callq _fe_sq_n_avx2 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 128(%rsp), %rsi + leaq 96(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_avx2@plt +#else + callq _fe_mul_avx2 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 96(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 96(%rsp), %rsi + movq $9, %rdx +#ifndef __APPLE__ + callq fe_sq_n_avx2@plt +#else + callq _fe_sq_n_avx2 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 96(%rsp), %rsi + leaq 64(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_avx2@plt +#else + callq _fe_mul_avx2 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 64(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 96(%rsp), %rsi + movq $49, %rdx +#ifndef __APPLE__ + callq fe_sq_n_avx2@plt +#else + callq _fe_sq_n_avx2 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 96(%rsp), %rsi + leaq 64(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_avx2@plt +#else + callq _fe_mul_avx2 +#endif /* __APPLE__ */ + leaq 128(%rsp), %rdi + leaq 96(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + leaq 128(%rsp), %rdi + leaq 128(%rsp), %rsi + movq $0x63, %rdx +#ifndef __APPLE__ + callq fe_sq_n_avx2@plt +#else + callq _fe_sq_n_avx2 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 128(%rsp), %rsi + leaq 96(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_avx2@plt +#else + callq _fe_mul_avx2 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 96(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + leaq 96(%rsp), %rdi + leaq 96(%rsp), %rsi + movq $49, %rdx +#ifndef __APPLE__ + callq fe_sq_n_avx2@plt +#else + callq _fe_sq_n_avx2 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 96(%rsp), %rsi + leaq 64(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_avx2@plt +#else + callq _fe_mul_avx2 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 64(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 64(%rsp), %rsi + movq $4, %rdx +#ifndef __APPLE__ + callq fe_sq_n_avx2@plt +#else + callq _fe_sq_n_avx2 +#endif /* __APPLE__ */ + movq %rsp, %rdi + leaq 64(%rsp), %rsi + leaq 32(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_avx2@plt +#else + callq _fe_mul_avx2 +#endif /* __APPLE__ */ + movq 176(%rsp), %rdi + # Multiply + # A[0] * B[0] + movq (%rsp), %rdx + mulxq (%rdi), %r9, %r10 + # A[2] * B[0] + mulxq 16(%rdi), %r11, %r12 + # A[1] * B[0] + mulxq 8(%rdi), %rcx, %rbx + xorq %rbp, %rbp + adcxq %rcx, %r10 + # A[1] * B[3] + movq 24(%rsp), %rdx + mulxq 8(%rdi), %r13, %r14 + adcxq %rbx, %r11 + # A[0] * B[1] + movq 8(%rsp), %rdx + mulxq (%rdi), %rcx, %rbx + adoxq %rcx, %r10 + # A[2] * B[1] + mulxq 16(%rdi), %rcx, %r15 + adoxq %rbx, %r11 + adcxq %rcx, %r12 + # A[1] * B[2] + movq 16(%rsp), %rdx + mulxq 8(%rdi), %rcx, %rbx + adcxq %r15, %r13 + adoxq %rcx, %r12 + adcxq %rbp, %r14 + adoxq %rbx, %r13 + # A[0] * B[2] + mulxq (%rdi), %rcx, %rbx + adoxq %rbp, %r14 + xorq %r15, %r15 + adcxq %rcx, %r11 + # A[1] * B[1] + movq 8(%rsp), %rdx + mulxq 8(%rdi), %rdx, %rcx + adcxq %rbx, %r12 + adoxq %rdx, %r11 + # A[3] * B[1] + movq 8(%rsp), %rdx + adoxq %rcx, %r12 + mulxq 24(%rdi), %rcx, %rbx + adcxq %rcx, %r13 + # A[2] * B[2] + movq 16(%rsp), %rdx + mulxq 16(%rdi), %rdx, %rcx + adcxq %rbx, %r14 + adoxq %rdx, %r13 + # A[3] * B[3] + movq 24(%rsp), %rdx + adoxq %rcx, %r14 + mulxq 24(%rdi), %rcx, %rbx + adoxq %rbp, %r15 + adcxq %rcx, %r15 + # A[0] * B[3] + mulxq (%rdi), %rdx, %rcx + adcxq %rbx, %rbp + xorq %rbx, %rbx + adcxq %rdx, %r12 + # A[3] * B[0] + movq (%rsp), %rdx + adcxq %rcx, %r13 + mulxq 24(%rdi), %rdx, %rcx + adoxq %rdx, %r12 + adoxq %rcx, %r13 + # A[2] * B[3] + movq 24(%rsp), %rdx + mulxq 16(%rdi), %rdx, %rcx + adcxq %rdx, %r14 + # A[3] * B[2] + movq 16(%rsp), %rdx + adcxq %rcx, %r15 + mulxq 24(%rdi), %rcx, %rdx + adcxq %rbx, %rbp + adoxq %rcx, %r14 + adoxq %rdx, %r15 + adoxq %rbx, %rbp + # Reduce + movq $0x7fffffffffffffff, %rbx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r15, %rbp + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + andq %rbx, %r12 + # Multiply top half by 19 + movq $19, %rdx + xorq %rbx, %rbx + mulxq %r13, %rcx, %r13 + adcxq %rcx, %r9 + adoxq %r13, %r10 + mulxq %r14, %rcx, %r14 + adcxq %rcx, %r10 + adoxq %r14, %r11 + mulxq %r15, %rcx, %r15 + adcxq %rcx, %r11 + adoxq %r15, %r12 + mulxq %rbp, %rbp, %rdx + adcxq %rbp, %r12 + adoxq %rbx, %rdx + adcxq %rbx, %rdx + # Overflow + shldq $0x01, %r12, %rdx + movq $0x7fffffffffffffff, %rbx + imulq $19, %rdx, %rcx + andq %rbx, %r12 + addq %rcx, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + adcq $0x00, %r12 + # Reduce if top bit set + movq %r12, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rcx + andq %rbx, %r12 + addq %rcx, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + adcq $0x00, %r12 + # Store + movq %r9, (%rdi) + movq %r10, 8(%rdi) + movq %r11, 16(%rdi) + movq %r12, 24(%rdi) + xorq %rax, %rax + addq $0xc0, %rsp + popq %rbp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size curve25519_avx2,.-curve25519_avx2 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_pow22523_avx2 +.type fe_pow22523_avx2,@function +.align 4 +fe_pow22523_avx2: +#else +.section __TEXT,__text +.globl _fe_pow22523_avx2 +.p2align 2 +_fe_pow22523_avx2: +#endif /* __APPLE__ */ + subq $0x70, %rsp + # pow22523 + movq %rdi, 96(%rsp) + movq %rsi, 104(%rsp) + movq %rsp, %rdi + movq 104(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + movq %rsp, %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 32(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + movq 104(%rsp), %rsi + leaq 32(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_avx2@plt +#else + callq _fe_mul_avx2 +#endif /* __APPLE__ */ + movq %rsp, %rdi + movq %rsp, %rsi + leaq 32(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_avx2@plt +#else + callq _fe_mul_avx2 +#endif /* __APPLE__ */ + movq %rsp, %rdi + movq %rsp, %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + movq %rsp, %rdi + leaq 32(%rsp), %rsi + movq %rsp, %rdx +#ifndef __APPLE__ + callq fe_mul_avx2@plt +#else + callq _fe_mul_avx2 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + movq %rsp, %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 32(%rsp), %rsi + movb $4, %dl +#ifndef __APPLE__ + callq fe_sq_n_avx2@plt +#else + callq _fe_sq_n_avx2 +#endif /* __APPLE__ */ + movq %rsp, %rdi + leaq 32(%rsp), %rsi + movq %rsp, %rdx +#ifndef __APPLE__ + callq fe_mul_avx2@plt +#else + callq _fe_mul_avx2 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + movq %rsp, %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 32(%rsp), %rsi + movb $9, %dl +#ifndef __APPLE__ + callq fe_sq_n_avx2@plt +#else + callq _fe_sq_n_avx2 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 32(%rsp), %rsi + movq %rsp, %rdx +#ifndef __APPLE__ + callq fe_mul_avx2@plt +#else + callq _fe_mul_avx2 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 32(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 64(%rsp), %rsi + movb $19, %dl +#ifndef __APPLE__ + callq fe_sq_n_avx2@plt +#else + callq _fe_sq_n_avx2 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 64(%rsp), %rsi + leaq 32(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_avx2@plt +#else + callq _fe_mul_avx2 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 32(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 32(%rsp), %rsi + movb $9, %dl +#ifndef __APPLE__ + callq fe_sq_n_avx2@plt +#else + callq _fe_sq_n_avx2 +#endif /* __APPLE__ */ + movq %rsp, %rdi + leaq 32(%rsp), %rsi + movq %rsp, %rdx +#ifndef __APPLE__ + callq fe_mul_avx2@plt +#else + callq _fe_mul_avx2 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + movq %rsp, %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 32(%rsp), %rsi + movb $49, %dl +#ifndef __APPLE__ + callq fe_sq_n_avx2@plt +#else + callq _fe_sq_n_avx2 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 32(%rsp), %rsi + movq %rsp, %rdx +#ifndef __APPLE__ + callq fe_mul_avx2@plt +#else + callq _fe_mul_avx2 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 32(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + leaq 64(%rsp), %rdi + leaq 64(%rsp), %rsi + movb $0x63, %dl +#ifndef __APPLE__ + callq fe_sq_n_avx2@plt +#else + callq _fe_sq_n_avx2 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 64(%rsp), %rsi + leaq 32(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_avx2@plt +#else + callq _fe_mul_avx2 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 32(%rsp), %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + leaq 32(%rsp), %rdi + leaq 32(%rsp), %rsi + movb $49, %dl +#ifndef __APPLE__ + callq fe_sq_n_avx2@plt +#else + callq _fe_sq_n_avx2 +#endif /* __APPLE__ */ + movq %rsp, %rdi + leaq 32(%rsp), %rsi + movq %rsp, %rdx +#ifndef __APPLE__ + callq fe_mul_avx2@plt +#else + callq _fe_mul_avx2 +#endif /* __APPLE__ */ + movq %rsp, %rdi + movq %rsp, %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + movq %rsp, %rdi + movq %rsp, %rsi +#ifndef __APPLE__ + callq fe_sq_avx2@plt +#else + callq _fe_sq_avx2 +#endif /* __APPLE__ */ + movq 96(%rsp), %rdi + movq %rsp, %rsi + movq 104(%rsp), %rdx +#ifndef __APPLE__ + callq fe_mul_avx2@plt +#else + callq _fe_mul_avx2 +#endif /* __APPLE__ */ + movq 104(%rsp), %rsi + movq 96(%rsp), %rdi + addq $0x70, %rsp + repz retq +#ifndef __APPLE__ +.text +.globl fe_ge_to_p2_avx2 +.type fe_ge_to_p2_avx2,@function +.align 4 +fe_ge_to_p2_avx2: +#else +.section __TEXT,__text +.globl _fe_ge_to_p2_avx2 +.p2align 2 +_fe_ge_to_p2_avx2: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $40, %rsp + movq %rsi, (%rsp) + movq %rdx, 8(%rsp) + movq %rcx, 16(%rsp) + movq %r8, 24(%rsp) + movq %r9, 32(%rsp) + movq 16(%rsp), %rsi + movq 88(%rsp), %rbx + # Multiply + # A[0] * B[0] + movq (%rbx), %rdx + mulxq (%rsi), %r8, %r9 + # A[2] * B[0] + mulxq 16(%rsi), %r10, %r11 + # A[1] * B[0] + mulxq 8(%rsi), %rcx, %rax + xorq %r15, %r15 + adcxq %rcx, %r9 + # A[1] * B[3] + movq 24(%rbx), %rdx + mulxq 8(%rsi), %r12, %r13 + adcxq %rax, %r10 + # A[0] * B[1] + movq 8(%rbx), %rdx + mulxq (%rsi), %rcx, %rax + adoxq %rcx, %r9 + # A[2] * B[1] + mulxq 16(%rsi), %rcx, %r14 + adoxq %rax, %r10 + adcxq %rcx, %r11 + # A[1] * B[2] + movq 16(%rbx), %rdx + mulxq 8(%rsi), %rcx, %rax + adcxq %r14, %r12 + adoxq %rcx, %r11 + adcxq %r15, %r13 + adoxq %rax, %r12 + # A[0] * B[2] + mulxq (%rsi), %rcx, %rax + adoxq %r15, %r13 + xorq %r14, %r14 + adcxq %rcx, %r10 + # A[1] * B[1] + movq 8(%rbx), %rdx + mulxq 8(%rsi), %rdx, %rcx + adcxq %rax, %r11 + adoxq %rdx, %r10 + # A[3] * B[1] + movq 8(%rbx), %rdx + adoxq %rcx, %r11 + mulxq 24(%rsi), %rcx, %rax + adcxq %rcx, %r12 + # A[2] * B[2] + movq 16(%rbx), %rdx + mulxq 16(%rsi), %rdx, %rcx + adcxq %rax, %r13 + adoxq %rdx, %r12 + # A[3] * B[3] + movq 24(%rbx), %rdx + adoxq %rcx, %r13 + mulxq 24(%rsi), %rcx, %rax + adoxq %r15, %r14 + adcxq %rcx, %r14 + # A[0] * B[3] + mulxq (%rsi), %rdx, %rcx + adcxq %rax, %r15 + xorq %rax, %rax + adcxq %rdx, %r11 + # A[3] * B[0] + movq (%rbx), %rdx + adcxq %rcx, %r12 + mulxq 24(%rsi), %rdx, %rcx + adoxq %rdx, %r11 + adoxq %rcx, %r12 + # A[2] * B[3] + movq 24(%rbx), %rdx + mulxq 16(%rsi), %rdx, %rcx + adcxq %rdx, %r13 + # A[3] * B[2] + movq 16(%rbx), %rdx + adcxq %rcx, %r14 + mulxq 24(%rsi), %rcx, %rdx + adcxq %rax, %r15 + adoxq %rcx, %r13 + adoxq %rdx, %r14 + adoxq %rax, %r15 + # Reduce + movq $0x7fffffffffffffff, %rax + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rax, %r11 + # Multiply top half by 19 + movq $19, %rdx + xorq %rax, %rax + mulxq %r12, %rcx, %r12 + adcxq %rcx, %r8 + adoxq %r12, %r9 + mulxq %r13, %rcx, %r13 + adcxq %rcx, %r9 + adoxq %r13, %r10 + mulxq %r14, %rcx, %r14 + adcxq %rcx, %r10 + adoxq %r14, %r11 + mulxq %r15, %r15, %rdx + adcxq %r15, %r11 + adoxq %rax, %rdx + adcxq %rax, %rdx + # Overflow + shldq $0x01, %r11, %rdx + movq $0x7fffffffffffffff, %rax + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq (%rsp), %rdi + movq 24(%rsp), %rsi + movq 32(%rsp), %rbx + # Multiply + # A[0] * B[0] + movq (%rbx), %rdx + mulxq (%rsi), %r8, %r9 + # A[2] * B[0] + mulxq 16(%rsi), %r10, %r11 + # A[1] * B[0] + mulxq 8(%rsi), %rcx, %rax + xorq %r15, %r15 + adcxq %rcx, %r9 + # A[1] * B[3] + movq 24(%rbx), %rdx + mulxq 8(%rsi), %r12, %r13 + adcxq %rax, %r10 + # A[0] * B[1] + movq 8(%rbx), %rdx + mulxq (%rsi), %rcx, %rax + adoxq %rcx, %r9 + # A[2] * B[1] + mulxq 16(%rsi), %rcx, %r14 + adoxq %rax, %r10 + adcxq %rcx, %r11 + # A[1] * B[2] + movq 16(%rbx), %rdx + mulxq 8(%rsi), %rcx, %rax + adcxq %r14, %r12 + adoxq %rcx, %r11 + adcxq %r15, %r13 + adoxq %rax, %r12 + # A[0] * B[2] + mulxq (%rsi), %rcx, %rax + adoxq %r15, %r13 + xorq %r14, %r14 + adcxq %rcx, %r10 + # A[1] * B[1] + movq 8(%rbx), %rdx + mulxq 8(%rsi), %rdx, %rcx + adcxq %rax, %r11 + adoxq %rdx, %r10 + # A[3] * B[1] + movq 8(%rbx), %rdx + adoxq %rcx, %r11 + mulxq 24(%rsi), %rcx, %rax + adcxq %rcx, %r12 + # A[2] * B[2] + movq 16(%rbx), %rdx + mulxq 16(%rsi), %rdx, %rcx + adcxq %rax, %r13 + adoxq %rdx, %r12 + # A[3] * B[3] + movq 24(%rbx), %rdx + adoxq %rcx, %r13 + mulxq 24(%rsi), %rcx, %rax + adoxq %r15, %r14 + adcxq %rcx, %r14 + # A[0] * B[3] + mulxq (%rsi), %rdx, %rcx + adcxq %rax, %r15 + xorq %rax, %rax + adcxq %rdx, %r11 + # A[3] * B[0] + movq (%rbx), %rdx + adcxq %rcx, %r12 + mulxq 24(%rsi), %rdx, %rcx + adoxq %rdx, %r11 + adoxq %rcx, %r12 + # A[2] * B[3] + movq 24(%rbx), %rdx + mulxq 16(%rsi), %rdx, %rcx + adcxq %rdx, %r13 + # A[3] * B[2] + movq 16(%rbx), %rdx + adcxq %rcx, %r14 + mulxq 24(%rsi), %rcx, %rdx + adcxq %rax, %r15 + adoxq %rcx, %r13 + adoxq %rdx, %r14 + adoxq %rax, %r15 + # Reduce + movq $0x7fffffffffffffff, %rax + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rax, %r11 + # Multiply top half by 19 + movq $19, %rdx + xorq %rax, %rax + mulxq %r12, %rcx, %r12 + adcxq %rcx, %r8 + adoxq %r12, %r9 + mulxq %r13, %rcx, %r13 + adcxq %rcx, %r9 + adoxq %r13, %r10 + mulxq %r14, %rcx, %r14 + adcxq %rcx, %r10 + adoxq %r14, %r11 + mulxq %r15, %r15, %rdx + adcxq %r15, %r11 + adoxq %rax, %rdx + adcxq %rax, %rdx + # Overflow + shldq $0x01, %r11, %rdx + movq $0x7fffffffffffffff, %rax + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 8(%rsp), %rdi + movq 88(%rsp), %rsi + # Multiply + # A[0] * B[0] + movq (%rsi), %rdx + mulxq (%rbx), %r8, %r9 + # A[2] * B[0] + mulxq 16(%rbx), %r10, %r11 + # A[1] * B[0] + mulxq 8(%rbx), %rcx, %rax + xorq %r15, %r15 + adcxq %rcx, %r9 + # A[1] * B[3] + movq 24(%rsi), %rdx + mulxq 8(%rbx), %r12, %r13 + adcxq %rax, %r10 + # A[0] * B[1] + movq 8(%rsi), %rdx + mulxq (%rbx), %rcx, %rax + adoxq %rcx, %r9 + # A[2] * B[1] + mulxq 16(%rbx), %rcx, %r14 + adoxq %rax, %r10 + adcxq %rcx, %r11 + # A[1] * B[2] + movq 16(%rsi), %rdx + mulxq 8(%rbx), %rcx, %rax + adcxq %r14, %r12 + adoxq %rcx, %r11 + adcxq %r15, %r13 + adoxq %rax, %r12 + # A[0] * B[2] + mulxq (%rbx), %rcx, %rax + adoxq %r15, %r13 + xorq %r14, %r14 + adcxq %rcx, %r10 + # A[1] * B[1] + movq 8(%rsi), %rdx + mulxq 8(%rbx), %rdx, %rcx + adcxq %rax, %r11 + adoxq %rdx, %r10 + # A[3] * B[1] + movq 8(%rsi), %rdx + adoxq %rcx, %r11 + mulxq 24(%rbx), %rcx, %rax + adcxq %rcx, %r12 + # A[2] * B[2] + movq 16(%rsi), %rdx + mulxq 16(%rbx), %rdx, %rcx + adcxq %rax, %r13 + adoxq %rdx, %r12 + # A[3] * B[3] + movq 24(%rsi), %rdx + adoxq %rcx, %r13 + mulxq 24(%rbx), %rcx, %rax + adoxq %r15, %r14 + adcxq %rcx, %r14 + # A[0] * B[3] + mulxq (%rbx), %rdx, %rcx + adcxq %rax, %r15 + xorq %rax, %rax + adcxq %rdx, %r11 + # A[3] * B[0] + movq (%rsi), %rdx + adcxq %rcx, %r12 + mulxq 24(%rbx), %rdx, %rcx + adoxq %rdx, %r11 + adoxq %rcx, %r12 + # A[2] * B[3] + movq 24(%rsi), %rdx + mulxq 16(%rbx), %rdx, %rcx + adcxq %rdx, %r13 + # A[3] * B[2] + movq 16(%rsi), %rdx + adcxq %rcx, %r14 + mulxq 24(%rbx), %rcx, %rdx + adcxq %rax, %r15 + adoxq %rcx, %r13 + adoxq %rdx, %r14 + adoxq %rax, %r15 + # Reduce + movq $0x7fffffffffffffff, %rax + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rax, %r11 + # Multiply top half by 19 + movq $19, %rdx + xorq %rax, %rax + mulxq %r12, %rcx, %r12 + adcxq %rcx, %r8 + adoxq %r12, %r9 + mulxq %r13, %rcx, %r13 + adcxq %rcx, %r9 + adoxq %r13, %r10 + mulxq %r14, %rcx, %r14 + adcxq %rcx, %r10 + adoxq %r14, %r11 + mulxq %r15, %r15, %rdx + adcxq %r15, %r11 + adoxq %rax, %rdx + adcxq %rax, %rdx + # Overflow + shldq $0x01, %r11, %rdx + movq $0x7fffffffffffffff, %rax + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + addq $40, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size fe_ge_to_p2_avx2,.-fe_ge_to_p2_avx2 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_ge_to_p3_avx2 +.type fe_ge_to_p3_avx2,@function +.align 4 +fe_ge_to_p3_avx2: +#else +.section __TEXT,__text +.globl _fe_ge_to_p3_avx2 +.p2align 2 +_fe_ge_to_p3_avx2: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $40, %rsp + movq %rsi, (%rsp) + movq %rdx, 8(%rsp) + movq %rcx, 16(%rsp) + movq %r8, 24(%rsp) + movq %r9, 32(%rsp) + movq 24(%rsp), %rsi + movq 96(%rsp), %rbx + # Multiply + # A[0] * B[0] + movq (%rbx), %rdx + mulxq (%rsi), %r8, %r9 + # A[2] * B[0] + mulxq 16(%rsi), %r10, %r11 + # A[1] * B[0] + mulxq 8(%rsi), %rcx, %rax + xorq %r15, %r15 + adcxq %rcx, %r9 + # A[1] * B[3] + movq 24(%rbx), %rdx + mulxq 8(%rsi), %r12, %r13 + adcxq %rax, %r10 + # A[0] * B[1] + movq 8(%rbx), %rdx + mulxq (%rsi), %rcx, %rax + adoxq %rcx, %r9 + # A[2] * B[1] + mulxq 16(%rsi), %rcx, %r14 + adoxq %rax, %r10 + adcxq %rcx, %r11 + # A[1] * B[2] + movq 16(%rbx), %rdx + mulxq 8(%rsi), %rcx, %rax + adcxq %r14, %r12 + adoxq %rcx, %r11 + adcxq %r15, %r13 + adoxq %rax, %r12 + # A[0] * B[2] + mulxq (%rsi), %rcx, %rax + adoxq %r15, %r13 + xorq %r14, %r14 + adcxq %rcx, %r10 + # A[1] * B[1] + movq 8(%rbx), %rdx + mulxq 8(%rsi), %rdx, %rcx + adcxq %rax, %r11 + adoxq %rdx, %r10 + # A[3] * B[1] + movq 8(%rbx), %rdx + adoxq %rcx, %r11 + mulxq 24(%rsi), %rcx, %rax + adcxq %rcx, %r12 + # A[2] * B[2] + movq 16(%rbx), %rdx + mulxq 16(%rsi), %rdx, %rcx + adcxq %rax, %r13 + adoxq %rdx, %r12 + # A[3] * B[3] + movq 24(%rbx), %rdx + adoxq %rcx, %r13 + mulxq 24(%rsi), %rcx, %rax + adoxq %r15, %r14 + adcxq %rcx, %r14 + # A[0] * B[3] + mulxq (%rsi), %rdx, %rcx + adcxq %rax, %r15 + xorq %rax, %rax + adcxq %rdx, %r11 + # A[3] * B[0] + movq (%rbx), %rdx + adcxq %rcx, %r12 + mulxq 24(%rsi), %rdx, %rcx + adoxq %rdx, %r11 + adoxq %rcx, %r12 + # A[2] * B[3] + movq 24(%rbx), %rdx + mulxq 16(%rsi), %rdx, %rcx + adcxq %rdx, %r13 + # A[3] * B[2] + movq 16(%rbx), %rdx + adcxq %rcx, %r14 + mulxq 24(%rsi), %rcx, %rdx + adcxq %rax, %r15 + adoxq %rcx, %r13 + adoxq %rdx, %r14 + adoxq %rax, %r15 + # Reduce + movq $0x7fffffffffffffff, %rax + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rax, %r11 + # Multiply top half by 19 + movq $19, %rdx + xorq %rax, %rax + mulxq %r12, %rcx, %r12 + adcxq %rcx, %r8 + adoxq %r12, %r9 + mulxq %r13, %rcx, %r13 + adcxq %rcx, %r9 + adoxq %r13, %r10 + mulxq %r14, %rcx, %r14 + adcxq %rcx, %r10 + adoxq %r14, %r11 + mulxq %r15, %r15, %rdx + adcxq %r15, %r11 + adoxq %rax, %rdx + adcxq %rax, %rdx + # Overflow + shldq $0x01, %r11, %rdx + movq $0x7fffffffffffffff, %rax + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq (%rsp), %rdi + movq 32(%rsp), %rsi + movq 88(%rsp), %rbx + # Multiply + # A[0] * B[0] + movq (%rbx), %rdx + mulxq (%rsi), %r8, %r9 + # A[2] * B[0] + mulxq 16(%rsi), %r10, %r11 + # A[1] * B[0] + mulxq 8(%rsi), %rcx, %rax + xorq %r15, %r15 + adcxq %rcx, %r9 + # A[1] * B[3] + movq 24(%rbx), %rdx + mulxq 8(%rsi), %r12, %r13 + adcxq %rax, %r10 + # A[0] * B[1] + movq 8(%rbx), %rdx + mulxq (%rsi), %rcx, %rax + adoxq %rcx, %r9 + # A[2] * B[1] + mulxq 16(%rsi), %rcx, %r14 + adoxq %rax, %r10 + adcxq %rcx, %r11 + # A[1] * B[2] + movq 16(%rbx), %rdx + mulxq 8(%rsi), %rcx, %rax + adcxq %r14, %r12 + adoxq %rcx, %r11 + adcxq %r15, %r13 + adoxq %rax, %r12 + # A[0] * B[2] + mulxq (%rsi), %rcx, %rax + adoxq %r15, %r13 + xorq %r14, %r14 + adcxq %rcx, %r10 + # A[1] * B[1] + movq 8(%rbx), %rdx + mulxq 8(%rsi), %rdx, %rcx + adcxq %rax, %r11 + adoxq %rdx, %r10 + # A[3] * B[1] + movq 8(%rbx), %rdx + adoxq %rcx, %r11 + mulxq 24(%rsi), %rcx, %rax + adcxq %rcx, %r12 + # A[2] * B[2] + movq 16(%rbx), %rdx + mulxq 16(%rsi), %rdx, %rcx + adcxq %rax, %r13 + adoxq %rdx, %r12 + # A[3] * B[3] + movq 24(%rbx), %rdx + adoxq %rcx, %r13 + mulxq 24(%rsi), %rcx, %rax + adoxq %r15, %r14 + adcxq %rcx, %r14 + # A[0] * B[3] + mulxq (%rsi), %rdx, %rcx + adcxq %rax, %r15 + xorq %rax, %rax + adcxq %rdx, %r11 + # A[3] * B[0] + movq (%rbx), %rdx + adcxq %rcx, %r12 + mulxq 24(%rsi), %rdx, %rcx + adoxq %rdx, %r11 + adoxq %rcx, %r12 + # A[2] * B[3] + movq 24(%rbx), %rdx + mulxq 16(%rsi), %rdx, %rcx + adcxq %rdx, %r13 + # A[3] * B[2] + movq 16(%rbx), %rdx + adcxq %rcx, %r14 + mulxq 24(%rsi), %rcx, %rdx + adcxq %rax, %r15 + adoxq %rcx, %r13 + adoxq %rdx, %r14 + adoxq %rax, %r15 + # Reduce + movq $0x7fffffffffffffff, %rax + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rax, %r11 + # Multiply top half by 19 + movq $19, %rdx + xorq %rax, %rax + mulxq %r12, %rcx, %r12 + adcxq %rcx, %r8 + adoxq %r12, %r9 + mulxq %r13, %rcx, %r13 + adcxq %rcx, %r9 + adoxq %r13, %r10 + mulxq %r14, %rcx, %r14 + adcxq %rcx, %r10 + adoxq %r14, %r11 + mulxq %r15, %r15, %rdx + adcxq %r15, %r11 + adoxq %rax, %rdx + adcxq %rax, %rdx + # Overflow + shldq $0x01, %r11, %rdx + movq $0x7fffffffffffffff, %rax + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 8(%rsp), %rdi + movq 96(%rsp), %rsi + # Multiply + # A[0] * B[0] + movq (%rsi), %rdx + mulxq (%rbx), %r8, %r9 + # A[2] * B[0] + mulxq 16(%rbx), %r10, %r11 + # A[1] * B[0] + mulxq 8(%rbx), %rcx, %rax + xorq %r15, %r15 + adcxq %rcx, %r9 + # A[1] * B[3] + movq 24(%rsi), %rdx + mulxq 8(%rbx), %r12, %r13 + adcxq %rax, %r10 + # A[0] * B[1] + movq 8(%rsi), %rdx + mulxq (%rbx), %rcx, %rax + adoxq %rcx, %r9 + # A[2] * B[1] + mulxq 16(%rbx), %rcx, %r14 + adoxq %rax, %r10 + adcxq %rcx, %r11 + # A[1] * B[2] + movq 16(%rsi), %rdx + mulxq 8(%rbx), %rcx, %rax + adcxq %r14, %r12 + adoxq %rcx, %r11 + adcxq %r15, %r13 + adoxq %rax, %r12 + # A[0] * B[2] + mulxq (%rbx), %rcx, %rax + adoxq %r15, %r13 + xorq %r14, %r14 + adcxq %rcx, %r10 + # A[1] * B[1] + movq 8(%rsi), %rdx + mulxq 8(%rbx), %rdx, %rcx + adcxq %rax, %r11 + adoxq %rdx, %r10 + # A[3] * B[1] + movq 8(%rsi), %rdx + adoxq %rcx, %r11 + mulxq 24(%rbx), %rcx, %rax + adcxq %rcx, %r12 + # A[2] * B[2] + movq 16(%rsi), %rdx + mulxq 16(%rbx), %rdx, %rcx + adcxq %rax, %r13 + adoxq %rdx, %r12 + # A[3] * B[3] + movq 24(%rsi), %rdx + adoxq %rcx, %r13 + mulxq 24(%rbx), %rcx, %rax + adoxq %r15, %r14 + adcxq %rcx, %r14 + # A[0] * B[3] + mulxq (%rbx), %rdx, %rcx + adcxq %rax, %r15 + xorq %rax, %rax + adcxq %rdx, %r11 + # A[3] * B[0] + movq (%rsi), %rdx + adcxq %rcx, %r12 + mulxq 24(%rbx), %rdx, %rcx + adoxq %rdx, %r11 + adoxq %rcx, %r12 + # A[2] * B[3] + movq 24(%rsi), %rdx + mulxq 16(%rbx), %rdx, %rcx + adcxq %rdx, %r13 + # A[3] * B[2] + movq 16(%rsi), %rdx + adcxq %rcx, %r14 + mulxq 24(%rbx), %rcx, %rdx + adcxq %rax, %r15 + adoxq %rcx, %r13 + adoxq %rdx, %r14 + adoxq %rax, %r15 + # Reduce + movq $0x7fffffffffffffff, %rax + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rax, %r11 + # Multiply top half by 19 + movq $19, %rdx + xorq %rax, %rax + mulxq %r12, %rcx, %r12 + adcxq %rcx, %r8 + adoxq %r12, %r9 + mulxq %r13, %rcx, %r13 + adcxq %rcx, %r9 + adoxq %r13, %r10 + mulxq %r14, %rcx, %r14 + adcxq %rcx, %r10 + adoxq %r14, %r11 + mulxq %r15, %r15, %rdx + adcxq %r15, %r11 + adoxq %rax, %rdx + adcxq %rax, %rdx + # Overflow + shldq $0x01, %r11, %rdx + movq $0x7fffffffffffffff, %rax + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 16(%rsp), %rdi + movq 24(%rsp), %rsi + movq 32(%rsp), %rbx + # Multiply + # A[0] * B[0] + movq (%rbx), %rdx + mulxq (%rsi), %r8, %r9 + # A[2] * B[0] + mulxq 16(%rsi), %r10, %r11 + # A[1] * B[0] + mulxq 8(%rsi), %rcx, %rax + xorq %r15, %r15 + adcxq %rcx, %r9 + # A[1] * B[3] + movq 24(%rbx), %rdx + mulxq 8(%rsi), %r12, %r13 + adcxq %rax, %r10 + # A[0] * B[1] + movq 8(%rbx), %rdx + mulxq (%rsi), %rcx, %rax + adoxq %rcx, %r9 + # A[2] * B[1] + mulxq 16(%rsi), %rcx, %r14 + adoxq %rax, %r10 + adcxq %rcx, %r11 + # A[1] * B[2] + movq 16(%rbx), %rdx + mulxq 8(%rsi), %rcx, %rax + adcxq %r14, %r12 + adoxq %rcx, %r11 + adcxq %r15, %r13 + adoxq %rax, %r12 + # A[0] * B[2] + mulxq (%rsi), %rcx, %rax + adoxq %r15, %r13 + xorq %r14, %r14 + adcxq %rcx, %r10 + # A[1] * B[1] + movq 8(%rbx), %rdx + mulxq 8(%rsi), %rdx, %rcx + adcxq %rax, %r11 + adoxq %rdx, %r10 + # A[3] * B[1] + movq 8(%rbx), %rdx + adoxq %rcx, %r11 + mulxq 24(%rsi), %rcx, %rax + adcxq %rcx, %r12 + # A[2] * B[2] + movq 16(%rbx), %rdx + mulxq 16(%rsi), %rdx, %rcx + adcxq %rax, %r13 + adoxq %rdx, %r12 + # A[3] * B[3] + movq 24(%rbx), %rdx + adoxq %rcx, %r13 + mulxq 24(%rsi), %rcx, %rax + adoxq %r15, %r14 + adcxq %rcx, %r14 + # A[0] * B[3] + mulxq (%rsi), %rdx, %rcx + adcxq %rax, %r15 + xorq %rax, %rax + adcxq %rdx, %r11 + # A[3] * B[0] + movq (%rbx), %rdx + adcxq %rcx, %r12 + mulxq 24(%rsi), %rdx, %rcx + adoxq %rdx, %r11 + adoxq %rcx, %r12 + # A[2] * B[3] + movq 24(%rbx), %rdx + mulxq 16(%rsi), %rdx, %rcx + adcxq %rdx, %r13 + # A[3] * B[2] + movq 16(%rbx), %rdx + adcxq %rcx, %r14 + mulxq 24(%rsi), %rcx, %rdx + adcxq %rax, %r15 + adoxq %rcx, %r13 + adoxq %rdx, %r14 + adoxq %rax, %r15 + # Reduce + movq $0x7fffffffffffffff, %rax + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rax, %r11 + # Multiply top half by 19 + movq $19, %rdx + xorq %rax, %rax + mulxq %r12, %rcx, %r12 + adcxq %rcx, %r8 + adoxq %r12, %r9 + mulxq %r13, %rcx, %r13 + adcxq %rcx, %r9 + adoxq %r13, %r10 + mulxq %r14, %rcx, %r14 + adcxq %rcx, %r10 + adoxq %r14, %r11 + mulxq %r15, %r15, %rdx + adcxq %r15, %r11 + adoxq %rax, %rdx + adcxq %rax, %rdx + # Overflow + shldq $0x01, %r11, %rdx + movq $0x7fffffffffffffff, %rax + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + addq $40, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size fe_ge_to_p3_avx2,.-fe_ge_to_p3_avx2 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_ge_dbl_avx2 +.type fe_ge_dbl_avx2,@function +.align 4 +fe_ge_dbl_avx2: +#else +.section __TEXT,__text +.globl _fe_ge_dbl_avx2 +.p2align 2 +_fe_ge_dbl_avx2: +#endif /* __APPLE__ */ + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $48, %rsp + movq %rdi, (%rsp) + movq %rsi, 8(%rsp) + movq %rdx, 16(%rsp) + movq %rcx, 24(%rsp) + movq %r8, 32(%rsp) + movq %r9, 40(%rsp) + movq 32(%rsp), %rsi + # Square + # A[0] * A[1] + movq (%rsi), %rdx + mulxq 8(%rsi), %r9, %r10 + # A[0] * A[3] + mulxq 24(%rsi), %r11, %r12 + # A[2] * A[1] + movq 16(%rsi), %rdx + mulxq 8(%rsi), %rcx, %rax + xorq %r15, %r15 + adoxq %rcx, %r11 + # A[2] * A[3] + mulxq 24(%rsi), %r13, %r14 + adoxq %rax, %r12 + # A[2] * A[0] + mulxq (%rsi), %rcx, %rax + adoxq %r15, %r13 + adcxq %rcx, %r10 + adoxq %r15, %r14 + # A[1] * A[3] + movq 8(%rsi), %rdx + mulxq 24(%rsi), %rbp, %r8 + adcxq %rax, %r11 + adcxq %rbp, %r12 + adcxq %r8, %r13 + adcxq %r15, %r14 + # Double with Carry Flag + xorq %r15, %r15 + # A[0] * A[0] + movq (%rsi), %rdx + mulxq %rdx, %r8, %rbp + adcxq %r9, %r9 + # A[1] * A[1] + movq 8(%rsi), %rdx + mulxq %rdx, %rcx, %rax + adcxq %r10, %r10 + adoxq %rbp, %r9 + adcxq %r11, %r11 + adoxq %rcx, %r10 + # A[2] * A[2] + movq 16(%rsi), %rdx + mulxq %rdx, %rbp, %rcx + adcxq %r12, %r12 + adoxq %rax, %r11 + adcxq %r13, %r13 + adoxq %rbp, %r12 + # A[3] * A[3] + movq 24(%rsi), %rdx + mulxq %rdx, %rbp, %rax + adcxq %r14, %r14 + adoxq %rcx, %r13 + adcxq %r15, %r15 + adoxq %rbp, %r14 + adoxq %rax, %r15 + # Reduce + movq $0x7fffffffffffffff, %rcx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rcx, %r11 + # Multiply top half by 19 + movq $19, %rdx + xorq %rcx, %rcx + mulxq %r12, %rbp, %r12 + adcxq %rbp, %r8 + adoxq %r12, %r9 + mulxq %r13, %rbp, %r13 + adcxq %rbp, %r9 + adoxq %r13, %r10 + mulxq %r14, %rbp, %r14 + adcxq %rbp, %r10 + adoxq %r14, %r11 + mulxq %r15, %r15, %rdx + adcxq %r15, %r11 + adoxq %rcx, %rdx + adcxq %rcx, %rdx + # Overflow + shldq $0x01, %r11, %rdx + movq $0x7fffffffffffffff, %rcx + imulq $19, %rdx, %rbp + andq %rcx, %r11 + addq %rbp, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rbp + andq %rcx, %r11 + addq %rbp, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 16(%rsp), %rdi + movq 40(%rsp), %rbx + # Square + # A[0] * A[1] + movq (%rbx), %rdx + mulxq 8(%rbx), %r9, %r10 + # A[0] * A[3] + mulxq 24(%rbx), %r11, %r12 + # A[2] * A[1] + movq 16(%rbx), %rdx + mulxq 8(%rbx), %rcx, %rax + xorq %r15, %r15 + adoxq %rcx, %r11 + # A[2] * A[3] + mulxq 24(%rbx), %r13, %r14 + adoxq %rax, %r12 + # A[2] * A[0] + mulxq (%rbx), %rcx, %rax + adoxq %r15, %r13 + adcxq %rcx, %r10 + adoxq %r15, %r14 + # A[1] * A[3] + movq 8(%rbx), %rdx + mulxq 24(%rbx), %rbp, %r8 + adcxq %rax, %r11 + adcxq %rbp, %r12 + adcxq %r8, %r13 + adcxq %r15, %r14 + # Double with Carry Flag + xorq %r15, %r15 + # A[0] * A[0] + movq (%rbx), %rdx + mulxq %rdx, %r8, %rbp + adcxq %r9, %r9 + # A[1] * A[1] + movq 8(%rbx), %rdx + mulxq %rdx, %rcx, %rax + adcxq %r10, %r10 + adoxq %rbp, %r9 + adcxq %r11, %r11 + adoxq %rcx, %r10 + # A[2] * A[2] + movq 16(%rbx), %rdx + mulxq %rdx, %rbp, %rcx + adcxq %r12, %r12 + adoxq %rax, %r11 + adcxq %r13, %r13 + adoxq %rbp, %r12 + # A[3] * A[3] + movq 24(%rbx), %rdx + mulxq %rdx, %rbp, %rax + adcxq %r14, %r14 + adoxq %rcx, %r13 + adcxq %r15, %r15 + adoxq %rbp, %r14 + adoxq %rax, %r15 + # Reduce + movq $0x7fffffffffffffff, %rcx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rcx, %r11 + # Multiply top half by 19 + movq $19, %rdx + xorq %rcx, %rcx + mulxq %r12, %rbp, %r12 + adcxq %rbp, %r8 + adoxq %r12, %r9 + mulxq %r13, %rbp, %r13 + adcxq %rbp, %r9 + adoxq %r13, %r10 + mulxq %r14, %rbp, %r14 + adcxq %rbp, %r10 + adoxq %r14, %r11 + mulxq %r15, %r15, %rdx + adcxq %r15, %r11 + adoxq %rcx, %rdx + adcxq %rcx, %rdx + # Overflow + shldq $0x01, %r11, %rdx + movq $0x7fffffffffffffff, %rcx + imulq $19, %rdx, %rbp + andq %rcx, %r11 + addq %rbp, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rbp + andq %rcx, %r11 + addq %rbp, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 8(%rsp), %rdi + # Add + movq (%rsi), %r8 + movq 8(%rsi), %r9 + addq (%rbx), %r8 + movq 16(%rsi), %r10 + adcq 8(%rbx), %r9 + movq 24(%rsi), %rdx + adcq 16(%rbx), %r10 + movq $-19, %rcx + adcq 24(%rbx), %rdx + movq $0x7fffffffffffffff, %rax + movq %rdx, %r11 + sarq $63, %rdx + # Mask the modulus + andq %rdx, %rcx + andq %rdx, %rax + # Sub modulus (if overflow) + subq %rcx, %r8 + sbbq %rdx, %r9 + sbbq %rdx, %r10 + sbbq %rax, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 24(%rsp), %rsi + # Square + # A[0] * A[1] + movq (%rdi), %rdx + mulxq 8(%rdi), %r9, %r10 + # A[0] * A[3] + mulxq 24(%rdi), %r11, %r12 + # A[2] * A[1] + movq 16(%rdi), %rdx + mulxq 8(%rdi), %rcx, %rax + xorq %r15, %r15 + adoxq %rcx, %r11 + # A[2] * A[3] + mulxq 24(%rdi), %r13, %r14 + adoxq %rax, %r12 + # A[2] * A[0] + mulxq (%rdi), %rcx, %rax + adoxq %r15, %r13 + adcxq %rcx, %r10 + adoxq %r15, %r14 + # A[1] * A[3] + movq 8(%rdi), %rdx + mulxq 24(%rdi), %rbp, %r8 + adcxq %rax, %r11 + adcxq %rbp, %r12 + adcxq %r8, %r13 + adcxq %r15, %r14 + # Double with Carry Flag + xorq %r15, %r15 + # A[0] * A[0] + movq (%rdi), %rdx + mulxq %rdx, %r8, %rbp + adcxq %r9, %r9 + # A[1] * A[1] + movq 8(%rdi), %rdx + mulxq %rdx, %rcx, %rax + adcxq %r10, %r10 + adoxq %rbp, %r9 + adcxq %r11, %r11 + adoxq %rcx, %r10 + # A[2] * A[2] + movq 16(%rdi), %rdx + mulxq %rdx, %rbp, %rcx + adcxq %r12, %r12 + adoxq %rax, %r11 + adcxq %r13, %r13 + adoxq %rbp, %r12 + # A[3] * A[3] + movq 24(%rdi), %rdx + mulxq %rdx, %rbp, %rax + adcxq %r14, %r14 + adoxq %rcx, %r13 + adcxq %r15, %r15 + adoxq %rbp, %r14 + adoxq %rax, %r15 + # Reduce + movq $0x7fffffffffffffff, %rcx + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rcx, %r11 + # Multiply top half by 19 + movq $19, %rdx + xorq %rcx, %rcx + mulxq %r12, %rbp, %r12 + adcxq %rbp, %r8 + adoxq %r12, %r9 + mulxq %r13, %rbp, %r13 + adcxq %rbp, %r9 + adoxq %r13, %r10 + mulxq %r14, %rbp, %r14 + adcxq %rbp, %r10 + adoxq %r14, %r11 + mulxq %r15, %r15, %rdx + adcxq %r15, %r11 + adoxq %rcx, %rdx + adcxq %rcx, %rdx + # Overflow + shldq $0x01, %r11, %rdx + movq $0x7fffffffffffffff, %rcx + imulq $19, %rdx, %rbp + andq %rcx, %r11 + addq %rbp, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rbp + andq %rcx, %r11 + addq %rbp, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rsi) + movq %r9, 8(%rsi) + movq %r10, 16(%rsi) + movq %r11, 24(%rsi) + movq 16(%rsp), %rsi + movq (%rsp), %rbx + # Add + movq (%rsi), %r8 + movq 8(%rsi), %r9 + movq 16(%rsi), %r10 + movq 24(%rsi), %rdx + movq %r8, %r12 + addq (%rbx), %r8 + movq %r9, %r13 + adcq 8(%rbx), %r9 + movq %r10, %r14 + adcq 16(%rbx), %r10 + movq %rdx, %r15 + adcq 24(%rbx), %rdx + movq $-19, %rcx + movq %rdx, %r11 + movq $0x7fffffffffffffff, %rax + sarq $63, %rdx + # Mask the modulus + andq %rdx, %rcx + andq %rdx, %rax + # Sub modulus (if overflow) + subq %rcx, %r8 + sbbq %rdx, %r9 + sbbq %rdx, %r10 + sbbq %rax, %r11 + # Sub + subq (%rbx), %r12 + movq $0x00, %rdx + sbbq 8(%rbx), %r13 + movq $-19, %rcx + sbbq 16(%rbx), %r14 + movq $0x7fffffffffffffff, %rax + sbbq 24(%rbx), %r15 + sbbq $0x00, %rdx + # Mask the modulus + andq %rdx, %rcx + andq %rdx, %rax + # Add modulus (if underflow) + addq %rcx, %r12 + adcq %rdx, %r13 + adcq %rdx, %r14 + adcq %rax, %r15 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq %r12, (%rsi) + movq %r13, 8(%rsi) + movq %r14, 16(%rsi) + movq %r15, 24(%rsi) + movq 24(%rsp), %rsi + # Sub + movq (%rsi), %r8 + movq 8(%rsi), %r9 + movq 16(%rsi), %r10 + movq 24(%rsi), %r11 + subq (%rdi), %r8 + movq $0x00, %rdx + sbbq 8(%rdi), %r9 + movq $-19, %rcx + sbbq 16(%rdi), %r10 + movq $0x7fffffffffffffff, %rax + sbbq 24(%rdi), %r11 + sbbq $0x00, %rdx + # Mask the modulus + andq %rdx, %rcx + andq %rdx, %rax + # Add modulus (if underflow) + addq %rcx, %r8 + adcq %rdx, %r9 + adcq %rdx, %r10 + adcq %rax, %r11 + movq %r8, (%rbx) + movq %r9, 8(%rbx) + movq %r10, 16(%rbx) + movq %r11, 24(%rbx) + movq 104(%rsp), %rdi + # Square * 2 + # A[0] * A[1] + movq (%rdi), %rdx + mulxq 8(%rdi), %r9, %r10 + # A[0] * A[3] + mulxq 24(%rdi), %r11, %r12 + # A[2] * A[1] + movq 16(%rdi), %rdx + mulxq 8(%rdi), %rcx, %rax + xorq %r15, %r15 + adoxq %rcx, %r11 + # A[2] * A[3] + mulxq 24(%rdi), %r13, %r14 + adoxq %rax, %r12 + # A[2] * A[0] + mulxq (%rdi), %rcx, %rax + adoxq %r15, %r13 + adcxq %rcx, %r10 + adoxq %r15, %r14 + # A[1] * A[3] + movq 8(%rdi), %rdx + mulxq 24(%rdi), %rbp, %r8 + adcxq %rax, %r11 + adcxq %rbp, %r12 + adcxq %r8, %r13 + adcxq %r15, %r14 + # Double with Carry Flag + xorq %r15, %r15 + # A[0] * A[0] + movq (%rdi), %rdx + mulxq %rdx, %r8, %rbp + adcxq %r9, %r9 + # A[1] * A[1] + movq 8(%rdi), %rdx + mulxq %rdx, %rcx, %rax + adcxq %r10, %r10 + adoxq %rbp, %r9 + adcxq %r11, %r11 + adoxq %rcx, %r10 + # A[2] * A[2] + movq 16(%rdi), %rdx + mulxq %rdx, %rbp, %rcx + adcxq %r12, %r12 + adoxq %rax, %r11 + adcxq %r13, %r13 + adoxq %rbp, %r12 + # A[3] * A[3] + movq 24(%rdi), %rdx + mulxq %rdx, %rbp, %rax + adcxq %r14, %r14 + adoxq %rcx, %r13 + adcxq %r15, %r15 + adoxq %rbp, %r14 + adoxq %rax, %r15 + # Reduce + movq $0x7fffffffffffffff, %rax + xorq %rbp, %rbp + # Move top half into t4-t7 and remove top bit from t3 and double + shldq $3, %r15, %rbp + shldq $2, %r14, %r15 + shldq $2, %r13, %r14 + shldq $2, %r12, %r13 + shldq $2, %r11, %r12 + shldq $0x01, %r10, %r11 + shldq $0x01, %r9, %r10 + shldq $0x01, %r8, %r9 + shlq $0x01, %r8 + andq %rax, %r11 + # Two out left, one in right + andq %rax, %r15 + # Multiply top bits by 19*19 + imulq $0x169, %rbp, %rcx + xorq %rax, %rax + # Multiply top half by 19 + movq $19, %rdx + adoxq %rcx, %r8 + mulxq %r12, %rbp, %r12 + adcxq %rbp, %r8 + adoxq %r12, %r9 + mulxq %r13, %rbp, %r13 + adcxq %rbp, %r9 + adoxq %r13, %r10 + mulxq %r14, %rbp, %r14 + adcxq %rbp, %r10 + adoxq %r14, %r11 + mulxq %r15, %r15, %rdx + adcxq %r15, %r11 + adoxq %rax, %rdx + adcxq %rax, %rdx + # Overflow + shldq $0x01, %r11, %rdx + movq $0x7fffffffffffffff, %rax + imulq $19, %rdx, %rbp + andq %rax, %r11 + addq %rbp, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rbp + andq %rax, %r11 + addq %rbp, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rsi) + movq %r9, 8(%rsi) + movq %r10, 16(%rsi) + movq %r11, 24(%rsi) + movq 16(%rsp), %rdi + # Sub + movq (%rsi), %r8 + movq 8(%rsi), %r9 + movq 16(%rsi), %r10 + movq 24(%rsi), %r11 + subq (%rdi), %r8 + movq $0x00, %rdx + sbbq 8(%rdi), %r9 + movq $-19, %rcx + sbbq 16(%rdi), %r10 + movq $0x7fffffffffffffff, %rax + sbbq 24(%rdi), %r11 + sbbq $0x00, %rdx + # Mask the modulus + andq %rdx, %rcx + andq %rdx, %rax + # Add modulus (if underflow) + addq %rcx, %r8 + adcq %rdx, %r9 + adcq %rdx, %r10 + adcq %rax, %r11 + movq %r8, (%rsi) + movq %r9, 8(%rsi) + movq %r10, 16(%rsi) + movq %r11, 24(%rsi) + addq $48, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + repz retq +#ifndef __APPLE__ +.size fe_ge_dbl_avx2,.-fe_ge_dbl_avx2 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_ge_madd_avx2 +.type fe_ge_madd_avx2,@function +.align 4 +fe_ge_madd_avx2: +#else +.section __TEXT,__text +.globl _fe_ge_madd_avx2 +.p2align 2 +_fe_ge_madd_avx2: +#endif /* __APPLE__ */ + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $48, %rsp + movq %rdi, (%rsp) + movq %rsi, 8(%rsp) + movq %rdx, 16(%rsp) + movq %rcx, 24(%rsp) + movq %r8, 32(%rsp) + movq %r9, 40(%rsp) + movq 8(%rsp), %rsi + movq 40(%rsp), %rbx + movq 32(%rsp), %rbp + # Add + movq (%rbx), %r8 + movq 8(%rbx), %r9 + movq 16(%rbx), %r10 + movq 24(%rbx), %rdx + movq %r8, %r12 + addq (%rbp), %r8 + movq %r9, %r13 + adcq 8(%rbp), %r9 + movq %r10, %r14 + adcq 16(%rbp), %r10 + movq %rdx, %r15 + adcq 24(%rbp), %rdx + movq $-19, %rcx + movq %rdx, %r11 + movq $0x7fffffffffffffff, %rax + sarq $63, %rdx + # Mask the modulus + andq %rdx, %rcx + andq %rdx, %rax + # Sub modulus (if overflow) + subq %rcx, %r8 + sbbq %rdx, %r9 + sbbq %rdx, %r10 + sbbq %rax, %r11 + # Sub + subq (%rbp), %r12 + movq $0x00, %rdx + sbbq 8(%rbp), %r13 + movq $-19, %rcx + sbbq 16(%rbp), %r14 + movq $0x7fffffffffffffff, %rax + sbbq 24(%rbp), %r15 + sbbq $0x00, %rdx + # Mask the modulus + andq %rdx, %rcx + andq %rdx, %rax + # Add modulus (if underflow) + addq %rcx, %r12 + adcq %rdx, %r13 + adcq %rdx, %r14 + adcq %rax, %r15 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq %r12, (%rsi) + movq %r13, 8(%rsi) + movq %r14, 16(%rsi) + movq %r15, 24(%rsi) + movq 16(%rsp), %rbx + movq 128(%rsp), %rbp + # Multiply + # A[0] * B[0] + movq (%rbp), %rdx + mulxq (%rdi), %r8, %r9 + # A[2] * B[0] + mulxq 16(%rdi), %r10, %r11 + # A[1] * B[0] + mulxq 8(%rdi), %rcx, %rax + xorq %r15, %r15 + adcxq %rcx, %r9 + # A[1] * B[3] + movq 24(%rbp), %rdx + mulxq 8(%rdi), %r12, %r13 + adcxq %rax, %r10 + # A[0] * B[1] + movq 8(%rbp), %rdx + mulxq (%rdi), %rcx, %rax + adoxq %rcx, %r9 + # A[2] * B[1] + mulxq 16(%rdi), %rcx, %r14 + adoxq %rax, %r10 + adcxq %rcx, %r11 + # A[1] * B[2] + movq 16(%rbp), %rdx + mulxq 8(%rdi), %rcx, %rax + adcxq %r14, %r12 + adoxq %rcx, %r11 + adcxq %r15, %r13 + adoxq %rax, %r12 + # A[0] * B[2] + mulxq (%rdi), %rcx, %rax + adoxq %r15, %r13 + xorq %r14, %r14 + adcxq %rcx, %r10 + # A[1] * B[1] + movq 8(%rbp), %rdx + mulxq 8(%rdi), %rdx, %rcx + adcxq %rax, %r11 + adoxq %rdx, %r10 + # A[3] * B[1] + movq 8(%rbp), %rdx + adoxq %rcx, %r11 + mulxq 24(%rdi), %rcx, %rax + adcxq %rcx, %r12 + # A[2] * B[2] + movq 16(%rbp), %rdx + mulxq 16(%rdi), %rdx, %rcx + adcxq %rax, %r13 + adoxq %rdx, %r12 + # A[3] * B[3] + movq 24(%rbp), %rdx + adoxq %rcx, %r13 + mulxq 24(%rdi), %rcx, %rax + adoxq %r15, %r14 + adcxq %rcx, %r14 + # A[0] * B[3] + mulxq (%rdi), %rdx, %rcx + adcxq %rax, %r15 + xorq %rax, %rax + adcxq %rdx, %r11 + # A[3] * B[0] + movq (%rbp), %rdx + adcxq %rcx, %r12 + mulxq 24(%rdi), %rdx, %rcx + adoxq %rdx, %r11 + adoxq %rcx, %r12 + # A[2] * B[3] + movq 24(%rbp), %rdx + mulxq 16(%rdi), %rdx, %rcx + adcxq %rdx, %r13 + # A[3] * B[2] + movq 16(%rbp), %rdx + adcxq %rcx, %r14 + mulxq 24(%rdi), %rcx, %rdx + adcxq %rax, %r15 + adoxq %rcx, %r13 + adoxq %rdx, %r14 + adoxq %rax, %r15 + # Reduce + movq $0x7fffffffffffffff, %rax + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rax, %r11 + # Multiply top half by 19 + movq $19, %rdx + xorq %rax, %rax + mulxq %r12, %rcx, %r12 + adcxq %rcx, %r8 + adoxq %r12, %r9 + mulxq %r13, %rcx, %r13 + adcxq %rcx, %r9 + adoxq %r13, %r10 + mulxq %r14, %rcx, %r14 + adcxq %rcx, %r10 + adoxq %r14, %r11 + mulxq %r15, %r15, %rdx + adcxq %r15, %r11 + adoxq %rax, %rdx + adcxq %rax, %rdx + # Overflow + shldq $0x01, %r11, %rdx + movq $0x7fffffffffffffff, %rax + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rbx) + movq %r9, 8(%rbx) + movq %r10, 16(%rbx) + movq %r11, 24(%rbx) + movq 136(%rsp), %rdi + # Multiply + # A[0] * B[0] + movq (%rdi), %rdx + mulxq (%rsi), %r8, %r9 + # A[2] * B[0] + mulxq 16(%rsi), %r10, %r11 + # A[1] * B[0] + mulxq 8(%rsi), %rcx, %rax + xorq %r15, %r15 + adcxq %rcx, %r9 + # A[1] * B[3] + movq 24(%rdi), %rdx + mulxq 8(%rsi), %r12, %r13 + adcxq %rax, %r10 + # A[0] * B[1] + movq 8(%rdi), %rdx + mulxq (%rsi), %rcx, %rax + adoxq %rcx, %r9 + # A[2] * B[1] + mulxq 16(%rsi), %rcx, %r14 + adoxq %rax, %r10 + adcxq %rcx, %r11 + # A[1] * B[2] + movq 16(%rdi), %rdx + mulxq 8(%rsi), %rcx, %rax + adcxq %r14, %r12 + adoxq %rcx, %r11 + adcxq %r15, %r13 + adoxq %rax, %r12 + # A[0] * B[2] + mulxq (%rsi), %rcx, %rax + adoxq %r15, %r13 + xorq %r14, %r14 + adcxq %rcx, %r10 + # A[1] * B[1] + movq 8(%rdi), %rdx + mulxq 8(%rsi), %rdx, %rcx + adcxq %rax, %r11 + adoxq %rdx, %r10 + # A[3] * B[1] + movq 8(%rdi), %rdx + adoxq %rcx, %r11 + mulxq 24(%rsi), %rcx, %rax + adcxq %rcx, %r12 + # A[2] * B[2] + movq 16(%rdi), %rdx + mulxq 16(%rsi), %rdx, %rcx + adcxq %rax, %r13 + adoxq %rdx, %r12 + # A[3] * B[3] + movq 24(%rdi), %rdx + adoxq %rcx, %r13 + mulxq 24(%rsi), %rcx, %rax + adoxq %r15, %r14 + adcxq %rcx, %r14 + # A[0] * B[3] + mulxq (%rsi), %rdx, %rcx + adcxq %rax, %r15 + xorq %rax, %rax + adcxq %rdx, %r11 + # A[3] * B[0] + movq (%rdi), %rdx + adcxq %rcx, %r12 + mulxq 24(%rsi), %rdx, %rcx + adoxq %rdx, %r11 + adoxq %rcx, %r12 + # A[2] * B[3] + movq 24(%rdi), %rdx + mulxq 16(%rsi), %rdx, %rcx + adcxq %rdx, %r13 + # A[3] * B[2] + movq 16(%rdi), %rdx + adcxq %rcx, %r14 + mulxq 24(%rsi), %rcx, %rdx + adcxq %rax, %r15 + adoxq %rcx, %r13 + adoxq %rdx, %r14 + adoxq %rax, %r15 + # Reduce + movq $0x7fffffffffffffff, %rax + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rax, %r11 + # Multiply top half by 19 + movq $19, %rdx + xorq %rax, %rax + mulxq %r12, %rcx, %r12 + adcxq %rcx, %r8 + adoxq %r12, %r9 + mulxq %r13, %rcx, %r13 + adcxq %rcx, %r9 + adoxq %r13, %r10 + mulxq %r14, %rcx, %r14 + adcxq %rcx, %r10 + adoxq %r14, %r11 + mulxq %r15, %r15, %rdx + adcxq %r15, %r11 + adoxq %rax, %rdx + adcxq %rax, %rdx + # Overflow + shldq $0x01, %r11, %rdx + movq $0x7fffffffffffffff, %rax + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rsi) + movq %r9, 8(%rsi) + movq %r10, 16(%rsi) + movq %r11, 24(%rsi) + movq 24(%rsp), %rdi + movq 120(%rsp), %rsi + movq 112(%rsp), %rbp + # Multiply + # A[0] * B[0] + movq (%rbp), %rdx + mulxq (%rsi), %r8, %r9 + # A[2] * B[0] + mulxq 16(%rsi), %r10, %r11 + # A[1] * B[0] + mulxq 8(%rsi), %rcx, %rax + xorq %r15, %r15 + adcxq %rcx, %r9 + # A[1] * B[3] + movq 24(%rbp), %rdx + mulxq 8(%rsi), %r12, %r13 + adcxq %rax, %r10 + # A[0] * B[1] + movq 8(%rbp), %rdx + mulxq (%rsi), %rcx, %rax + adoxq %rcx, %r9 + # A[2] * B[1] + mulxq 16(%rsi), %rcx, %r14 + adoxq %rax, %r10 + adcxq %rcx, %r11 + # A[1] * B[2] + movq 16(%rbp), %rdx + mulxq 8(%rsi), %rcx, %rax + adcxq %r14, %r12 + adoxq %rcx, %r11 + adcxq %r15, %r13 + adoxq %rax, %r12 + # A[0] * B[2] + mulxq (%rsi), %rcx, %rax + adoxq %r15, %r13 + xorq %r14, %r14 + adcxq %rcx, %r10 + # A[1] * B[1] + movq 8(%rbp), %rdx + mulxq 8(%rsi), %rdx, %rcx + adcxq %rax, %r11 + adoxq %rdx, %r10 + # A[3] * B[1] + movq 8(%rbp), %rdx + adoxq %rcx, %r11 + mulxq 24(%rsi), %rcx, %rax + adcxq %rcx, %r12 + # A[2] * B[2] + movq 16(%rbp), %rdx + mulxq 16(%rsi), %rdx, %rcx + adcxq %rax, %r13 + adoxq %rdx, %r12 + # A[3] * B[3] + movq 24(%rbp), %rdx + adoxq %rcx, %r13 + mulxq 24(%rsi), %rcx, %rax + adoxq %r15, %r14 + adcxq %rcx, %r14 + # A[0] * B[3] + mulxq (%rsi), %rdx, %rcx + adcxq %rax, %r15 + xorq %rax, %rax + adcxq %rdx, %r11 + # A[3] * B[0] + movq (%rbp), %rdx + adcxq %rcx, %r12 + mulxq 24(%rsi), %rdx, %rcx + adoxq %rdx, %r11 + adoxq %rcx, %r12 + # A[2] * B[3] + movq 24(%rbp), %rdx + mulxq 16(%rsi), %rdx, %rcx + adcxq %rdx, %r13 + # A[3] * B[2] + movq 16(%rbp), %rdx + adcxq %rcx, %r14 + mulxq 24(%rsi), %rcx, %rdx + adcxq %rax, %r15 + adoxq %rcx, %r13 + adoxq %rdx, %r14 + adoxq %rax, %r15 + # Reduce + movq $0x7fffffffffffffff, %rax + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rax, %r11 + # Multiply top half by 19 + movq $19, %rdx + xorq %rax, %rax + mulxq %r12, %rcx, %r12 + adcxq %rcx, %r8 + adoxq %r12, %r9 + mulxq %r13, %rcx, %r13 + adcxq %rcx, %r9 + adoxq %r13, %r10 + mulxq %r14, %rcx, %r14 + adcxq %rcx, %r10 + adoxq %r14, %r11 + mulxq %r15, %r15, %rdx + adcxq %r15, %r11 + adoxq %rax, %rdx + adcxq %rax, %rdx + # Overflow + shldq $0x01, %r11, %rdx + movq $0x7fffffffffffffff, %rax + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 8(%rsp), %rdi + movq (%rsp), %rsi + # Add + movq (%rbx), %r8 + movq 8(%rbx), %r9 + movq 16(%rbx), %r10 + movq 24(%rbx), %rdx + movq %r8, %r12 + addq (%rdi), %r8 + movq %r9, %r13 + adcq 8(%rdi), %r9 + movq %r10, %r14 + adcq 16(%rdi), %r10 + movq %rdx, %r15 + adcq 24(%rdi), %rdx + movq $-19, %rcx + movq %rdx, %r11 + movq $0x7fffffffffffffff, %rax + sarq $63, %rdx + # Mask the modulus + andq %rdx, %rcx + andq %rdx, %rax + # Sub modulus (if overflow) + subq %rcx, %r8 + sbbq %rdx, %r9 + sbbq %rdx, %r10 + sbbq %rax, %r11 + # Sub + subq (%rdi), %r12 + movq $0x00, %rdx + sbbq 8(%rdi), %r13 + movq $-19, %rcx + sbbq 16(%rdi), %r14 + movq $0x7fffffffffffffff, %rax + sbbq 24(%rdi), %r15 + sbbq $0x00, %rdx + # Mask the modulus + andq %rdx, %rcx + andq %rdx, %rax + # Add modulus (if underflow) + addq %rcx, %r12 + adcq %rdx, %r13 + adcq %rdx, %r14 + adcq %rax, %r15 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq %r12, (%rsi) + movq %r13, 8(%rsi) + movq %r14, 16(%rsi) + movq %r15, 24(%rsi) + movq 104(%rsp), %rdi + # Double + movq (%rdi), %r8 + movq 8(%rdi), %r9 + addq %r8, %r8 + movq 16(%rdi), %r10 + adcq %r9, %r9 + movq 24(%rdi), %rdx + adcq %r10, %r10 + movq $-19, %rcx + adcq %rdx, %rdx + movq $0x7fffffffffffffff, %rax + movq %rdx, %r11 + sarq $63, %rdx + # Mask the modulus + andq %rdx, %rcx + andq %rdx, %rax + # Sub modulus (if overflow) + subq %rcx, %r8 + sbbq %rdx, %r9 + sbbq %rdx, %r10 + sbbq %rax, %r11 + movq %r8, (%rbx) + movq %r9, 8(%rbx) + movq %r10, 16(%rbx) + movq %r11, 24(%rbx) + movq 24(%rsp), %rdi + # Add + movq (%rbx), %r8 + movq 8(%rbx), %r9 + movq 16(%rbx), %r10 + movq 24(%rbx), %rdx + movq %r8, %r12 + addq (%rdi), %r8 + movq %r9, %r13 + adcq 8(%rdi), %r9 + movq %r10, %r14 + adcq 16(%rdi), %r10 + movq %rdx, %r15 + adcq 24(%rdi), %rdx + movq $-19, %rcx + movq %rdx, %r11 + movq $0x7fffffffffffffff, %rax + sarq $63, %rdx + # Mask the modulus + andq %rdx, %rcx + andq %rdx, %rax + # Sub modulus (if overflow) + subq %rcx, %r8 + sbbq %rdx, %r9 + sbbq %rdx, %r10 + sbbq %rax, %r11 + # Sub + subq (%rdi), %r12 + movq $0x00, %rdx + sbbq 8(%rdi), %r13 + movq $-19, %rcx + sbbq 16(%rdi), %r14 + movq $0x7fffffffffffffff, %rax + sbbq 24(%rdi), %r15 + sbbq $0x00, %rdx + # Mask the modulus + andq %rdx, %rcx + andq %rdx, %rax + # Add modulus (if underflow) + addq %rcx, %r12 + adcq %rdx, %r13 + adcq %rdx, %r14 + adcq %rax, %r15 + movq %r8, (%rbx) + movq %r9, 8(%rbx) + movq %r10, 16(%rbx) + movq %r11, 24(%rbx) + movq %r12, (%rdi) + movq %r13, 8(%rdi) + movq %r14, 16(%rdi) + movq %r15, 24(%rdi) + addq $48, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + repz retq +#ifndef __APPLE__ +.size fe_ge_madd_avx2,.-fe_ge_madd_avx2 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_ge_msub_avx2 +.type fe_ge_msub_avx2,@function +.align 4 +fe_ge_msub_avx2: +#else +.section __TEXT,__text +.globl _fe_ge_msub_avx2 +.p2align 2 +_fe_ge_msub_avx2: +#endif /* __APPLE__ */ + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $48, %rsp + movq %rdi, (%rsp) + movq %rsi, 8(%rsp) + movq %rdx, 16(%rsp) + movq %rcx, 24(%rsp) + movq %r8, 32(%rsp) + movq %r9, 40(%rsp) + movq 8(%rsp), %rsi + movq 40(%rsp), %rbx + movq 32(%rsp), %rbp + # Add + movq (%rbx), %r8 + movq 8(%rbx), %r9 + movq 16(%rbx), %r10 + movq 24(%rbx), %rdx + movq %r8, %r12 + addq (%rbp), %r8 + movq %r9, %r13 + adcq 8(%rbp), %r9 + movq %r10, %r14 + adcq 16(%rbp), %r10 + movq %rdx, %r15 + adcq 24(%rbp), %rdx + movq $-19, %rcx + movq %rdx, %r11 + movq $0x7fffffffffffffff, %rax + sarq $63, %rdx + # Mask the modulus + andq %rdx, %rcx + andq %rdx, %rax + # Sub modulus (if overflow) + subq %rcx, %r8 + sbbq %rdx, %r9 + sbbq %rdx, %r10 + sbbq %rax, %r11 + # Sub + subq (%rbp), %r12 + movq $0x00, %rdx + sbbq 8(%rbp), %r13 + movq $-19, %rcx + sbbq 16(%rbp), %r14 + movq $0x7fffffffffffffff, %rax + sbbq 24(%rbp), %r15 + sbbq $0x00, %rdx + # Mask the modulus + andq %rdx, %rcx + andq %rdx, %rax + # Add modulus (if underflow) + addq %rcx, %r12 + adcq %rdx, %r13 + adcq %rdx, %r14 + adcq %rax, %r15 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq %r12, (%rsi) + movq %r13, 8(%rsi) + movq %r14, 16(%rsi) + movq %r15, 24(%rsi) + movq 16(%rsp), %rbx + movq 136(%rsp), %rbp + # Multiply + # A[0] * B[0] + movq (%rbp), %rdx + mulxq (%rdi), %r8, %r9 + # A[2] * B[0] + mulxq 16(%rdi), %r10, %r11 + # A[1] * B[0] + mulxq 8(%rdi), %rcx, %rax + xorq %r15, %r15 + adcxq %rcx, %r9 + # A[1] * B[3] + movq 24(%rbp), %rdx + mulxq 8(%rdi), %r12, %r13 + adcxq %rax, %r10 + # A[0] * B[1] + movq 8(%rbp), %rdx + mulxq (%rdi), %rcx, %rax + adoxq %rcx, %r9 + # A[2] * B[1] + mulxq 16(%rdi), %rcx, %r14 + adoxq %rax, %r10 + adcxq %rcx, %r11 + # A[1] * B[2] + movq 16(%rbp), %rdx + mulxq 8(%rdi), %rcx, %rax + adcxq %r14, %r12 + adoxq %rcx, %r11 + adcxq %r15, %r13 + adoxq %rax, %r12 + # A[0] * B[2] + mulxq (%rdi), %rcx, %rax + adoxq %r15, %r13 + xorq %r14, %r14 + adcxq %rcx, %r10 + # A[1] * B[1] + movq 8(%rbp), %rdx + mulxq 8(%rdi), %rdx, %rcx + adcxq %rax, %r11 + adoxq %rdx, %r10 + # A[3] * B[1] + movq 8(%rbp), %rdx + adoxq %rcx, %r11 + mulxq 24(%rdi), %rcx, %rax + adcxq %rcx, %r12 + # A[2] * B[2] + movq 16(%rbp), %rdx + mulxq 16(%rdi), %rdx, %rcx + adcxq %rax, %r13 + adoxq %rdx, %r12 + # A[3] * B[3] + movq 24(%rbp), %rdx + adoxq %rcx, %r13 + mulxq 24(%rdi), %rcx, %rax + adoxq %r15, %r14 + adcxq %rcx, %r14 + # A[0] * B[3] + mulxq (%rdi), %rdx, %rcx + adcxq %rax, %r15 + xorq %rax, %rax + adcxq %rdx, %r11 + # A[3] * B[0] + movq (%rbp), %rdx + adcxq %rcx, %r12 + mulxq 24(%rdi), %rdx, %rcx + adoxq %rdx, %r11 + adoxq %rcx, %r12 + # A[2] * B[3] + movq 24(%rbp), %rdx + mulxq 16(%rdi), %rdx, %rcx + adcxq %rdx, %r13 + # A[3] * B[2] + movq 16(%rbp), %rdx + adcxq %rcx, %r14 + mulxq 24(%rdi), %rcx, %rdx + adcxq %rax, %r15 + adoxq %rcx, %r13 + adoxq %rdx, %r14 + adoxq %rax, %r15 + # Reduce + movq $0x7fffffffffffffff, %rax + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rax, %r11 + # Multiply top half by 19 + movq $19, %rdx + xorq %rax, %rax + mulxq %r12, %rcx, %r12 + adcxq %rcx, %r8 + adoxq %r12, %r9 + mulxq %r13, %rcx, %r13 + adcxq %rcx, %r9 + adoxq %r13, %r10 + mulxq %r14, %rcx, %r14 + adcxq %rcx, %r10 + adoxq %r14, %r11 + mulxq %r15, %r15, %rdx + adcxq %r15, %r11 + adoxq %rax, %rdx + adcxq %rax, %rdx + # Overflow + shldq $0x01, %r11, %rdx + movq $0x7fffffffffffffff, %rax + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rbx) + movq %r9, 8(%rbx) + movq %r10, 16(%rbx) + movq %r11, 24(%rbx) + movq 128(%rsp), %rdi + # Multiply + # A[0] * B[0] + movq (%rdi), %rdx + mulxq (%rsi), %r8, %r9 + # A[2] * B[0] + mulxq 16(%rsi), %r10, %r11 + # A[1] * B[0] + mulxq 8(%rsi), %rcx, %rax + xorq %r15, %r15 + adcxq %rcx, %r9 + # A[1] * B[3] + movq 24(%rdi), %rdx + mulxq 8(%rsi), %r12, %r13 + adcxq %rax, %r10 + # A[0] * B[1] + movq 8(%rdi), %rdx + mulxq (%rsi), %rcx, %rax + adoxq %rcx, %r9 + # A[2] * B[1] + mulxq 16(%rsi), %rcx, %r14 + adoxq %rax, %r10 + adcxq %rcx, %r11 + # A[1] * B[2] + movq 16(%rdi), %rdx + mulxq 8(%rsi), %rcx, %rax + adcxq %r14, %r12 + adoxq %rcx, %r11 + adcxq %r15, %r13 + adoxq %rax, %r12 + # A[0] * B[2] + mulxq (%rsi), %rcx, %rax + adoxq %r15, %r13 + xorq %r14, %r14 + adcxq %rcx, %r10 + # A[1] * B[1] + movq 8(%rdi), %rdx + mulxq 8(%rsi), %rdx, %rcx + adcxq %rax, %r11 + adoxq %rdx, %r10 + # A[3] * B[1] + movq 8(%rdi), %rdx + adoxq %rcx, %r11 + mulxq 24(%rsi), %rcx, %rax + adcxq %rcx, %r12 + # A[2] * B[2] + movq 16(%rdi), %rdx + mulxq 16(%rsi), %rdx, %rcx + adcxq %rax, %r13 + adoxq %rdx, %r12 + # A[3] * B[3] + movq 24(%rdi), %rdx + adoxq %rcx, %r13 + mulxq 24(%rsi), %rcx, %rax + adoxq %r15, %r14 + adcxq %rcx, %r14 + # A[0] * B[3] + mulxq (%rsi), %rdx, %rcx + adcxq %rax, %r15 + xorq %rax, %rax + adcxq %rdx, %r11 + # A[3] * B[0] + movq (%rdi), %rdx + adcxq %rcx, %r12 + mulxq 24(%rsi), %rdx, %rcx + adoxq %rdx, %r11 + adoxq %rcx, %r12 + # A[2] * B[3] + movq 24(%rdi), %rdx + mulxq 16(%rsi), %rdx, %rcx + adcxq %rdx, %r13 + # A[3] * B[2] + movq 16(%rdi), %rdx + adcxq %rcx, %r14 + mulxq 24(%rsi), %rcx, %rdx + adcxq %rax, %r15 + adoxq %rcx, %r13 + adoxq %rdx, %r14 + adoxq %rax, %r15 + # Reduce + movq $0x7fffffffffffffff, %rax + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rax, %r11 + # Multiply top half by 19 + movq $19, %rdx + xorq %rax, %rax + mulxq %r12, %rcx, %r12 + adcxq %rcx, %r8 + adoxq %r12, %r9 + mulxq %r13, %rcx, %r13 + adcxq %rcx, %r9 + adoxq %r13, %r10 + mulxq %r14, %rcx, %r14 + adcxq %rcx, %r10 + adoxq %r14, %r11 + mulxq %r15, %r15, %rdx + adcxq %r15, %r11 + adoxq %rax, %rdx + adcxq %rax, %rdx + # Overflow + shldq $0x01, %r11, %rdx + movq $0x7fffffffffffffff, %rax + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rsi) + movq %r9, 8(%rsi) + movq %r10, 16(%rsi) + movq %r11, 24(%rsi) + movq 24(%rsp), %rdi + movq 120(%rsp), %rsi + movq 112(%rsp), %rbp + # Multiply + # A[0] * B[0] + movq (%rbp), %rdx + mulxq (%rsi), %r8, %r9 + # A[2] * B[0] + mulxq 16(%rsi), %r10, %r11 + # A[1] * B[0] + mulxq 8(%rsi), %rcx, %rax + xorq %r15, %r15 + adcxq %rcx, %r9 + # A[1] * B[3] + movq 24(%rbp), %rdx + mulxq 8(%rsi), %r12, %r13 + adcxq %rax, %r10 + # A[0] * B[1] + movq 8(%rbp), %rdx + mulxq (%rsi), %rcx, %rax + adoxq %rcx, %r9 + # A[2] * B[1] + mulxq 16(%rsi), %rcx, %r14 + adoxq %rax, %r10 + adcxq %rcx, %r11 + # A[1] * B[2] + movq 16(%rbp), %rdx + mulxq 8(%rsi), %rcx, %rax + adcxq %r14, %r12 + adoxq %rcx, %r11 + adcxq %r15, %r13 + adoxq %rax, %r12 + # A[0] * B[2] + mulxq (%rsi), %rcx, %rax + adoxq %r15, %r13 + xorq %r14, %r14 + adcxq %rcx, %r10 + # A[1] * B[1] + movq 8(%rbp), %rdx + mulxq 8(%rsi), %rdx, %rcx + adcxq %rax, %r11 + adoxq %rdx, %r10 + # A[3] * B[1] + movq 8(%rbp), %rdx + adoxq %rcx, %r11 + mulxq 24(%rsi), %rcx, %rax + adcxq %rcx, %r12 + # A[2] * B[2] + movq 16(%rbp), %rdx + mulxq 16(%rsi), %rdx, %rcx + adcxq %rax, %r13 + adoxq %rdx, %r12 + # A[3] * B[3] + movq 24(%rbp), %rdx + adoxq %rcx, %r13 + mulxq 24(%rsi), %rcx, %rax + adoxq %r15, %r14 + adcxq %rcx, %r14 + # A[0] * B[3] + mulxq (%rsi), %rdx, %rcx + adcxq %rax, %r15 + xorq %rax, %rax + adcxq %rdx, %r11 + # A[3] * B[0] + movq (%rbp), %rdx + adcxq %rcx, %r12 + mulxq 24(%rsi), %rdx, %rcx + adoxq %rdx, %r11 + adoxq %rcx, %r12 + # A[2] * B[3] + movq 24(%rbp), %rdx + mulxq 16(%rsi), %rdx, %rcx + adcxq %rdx, %r13 + # A[3] * B[2] + movq 16(%rbp), %rdx + adcxq %rcx, %r14 + mulxq 24(%rsi), %rcx, %rdx + adcxq %rax, %r15 + adoxq %rcx, %r13 + adoxq %rdx, %r14 + adoxq %rax, %r15 + # Reduce + movq $0x7fffffffffffffff, %rax + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rax, %r11 + # Multiply top half by 19 + movq $19, %rdx + xorq %rax, %rax + mulxq %r12, %rcx, %r12 + adcxq %rcx, %r8 + adoxq %r12, %r9 + mulxq %r13, %rcx, %r13 + adcxq %rcx, %r9 + adoxq %r13, %r10 + mulxq %r14, %rcx, %r14 + adcxq %rcx, %r10 + adoxq %r14, %r11 + mulxq %r15, %r15, %rdx + adcxq %r15, %r11 + adoxq %rax, %rdx + adcxq %rax, %rdx + # Overflow + shldq $0x01, %r11, %rdx + movq $0x7fffffffffffffff, %rax + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 8(%rsp), %rsi + movq (%rsp), %rbp + # Add + movq (%rbx), %r8 + movq 8(%rbx), %r9 + movq 16(%rbx), %r10 + movq 24(%rbx), %rdx + movq %r8, %r12 + addq (%rsi), %r8 + movq %r9, %r13 + adcq 8(%rsi), %r9 + movq %r10, %r14 + adcq 16(%rsi), %r10 + movq %rdx, %r15 + adcq 24(%rsi), %rdx + movq $-19, %rcx + movq %rdx, %r11 + movq $0x7fffffffffffffff, %rax + sarq $63, %rdx + # Mask the modulus + andq %rdx, %rcx + andq %rdx, %rax + # Sub modulus (if overflow) + subq %rcx, %r8 + sbbq %rdx, %r9 + sbbq %rdx, %r10 + sbbq %rax, %r11 + # Sub + subq (%rsi), %r12 + movq $0x00, %rdx + sbbq 8(%rsi), %r13 + movq $-19, %rcx + sbbq 16(%rsi), %r14 + movq $0x7fffffffffffffff, %rax + sbbq 24(%rsi), %r15 + sbbq $0x00, %rdx + # Mask the modulus + andq %rdx, %rcx + andq %rdx, %rax + # Add modulus (if underflow) + addq %rcx, %r12 + adcq %rdx, %r13 + adcq %rdx, %r14 + adcq %rax, %r15 + movq %r8, (%rsi) + movq %r9, 8(%rsi) + movq %r10, 16(%rsi) + movq %r11, 24(%rsi) + movq %r12, (%rbp) + movq %r13, 8(%rbp) + movq %r14, 16(%rbp) + movq %r15, 24(%rbp) + movq 104(%rsp), %rsi + # Double + movq (%rsi), %r8 + movq 8(%rsi), %r9 + addq %r8, %r8 + movq 16(%rsi), %r10 + adcq %r9, %r9 + movq 24(%rsi), %rdx + adcq %r10, %r10 + movq $-19, %rcx + adcq %rdx, %rdx + movq $0x7fffffffffffffff, %rax + movq %rdx, %r11 + sarq $63, %rdx + # Mask the modulus + andq %rdx, %rcx + andq %rdx, %rax + # Sub modulus (if overflow) + subq %rcx, %r8 + sbbq %rdx, %r9 + sbbq %rdx, %r10 + sbbq %rax, %r11 + movq %r8, (%rbx) + movq %r9, 8(%rbx) + movq %r10, 16(%rbx) + movq %r11, 24(%rbx) + # Add + movq (%rbx), %r8 + movq 8(%rbx), %r9 + movq 16(%rbx), %r10 + movq 24(%rbx), %rdx + movq %r8, %r12 + addq (%rdi), %r8 + movq %r9, %r13 + adcq 8(%rdi), %r9 + movq %r10, %r14 + adcq 16(%rdi), %r10 + movq %rdx, %r15 + adcq 24(%rdi), %rdx + movq $-19, %rcx + movq %rdx, %r11 + movq $0x7fffffffffffffff, %rax + sarq $63, %rdx + # Mask the modulus + andq %rdx, %rcx + andq %rdx, %rax + # Sub modulus (if overflow) + subq %rcx, %r8 + sbbq %rdx, %r9 + sbbq %rdx, %r10 + sbbq %rax, %r11 + # Sub + subq (%rdi), %r12 + movq $0x00, %rdx + sbbq 8(%rdi), %r13 + movq $-19, %rcx + sbbq 16(%rdi), %r14 + movq $0x7fffffffffffffff, %rax + sbbq 24(%rdi), %r15 + sbbq $0x00, %rdx + # Mask the modulus + andq %rdx, %rcx + andq %rdx, %rax + # Add modulus (if underflow) + addq %rcx, %r12 + adcq %rdx, %r13 + adcq %rdx, %r14 + adcq %rax, %r15 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq %r12, (%rbx) + movq %r13, 8(%rbx) + movq %r14, 16(%rbx) + movq %r15, 24(%rbx) + addq $48, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + repz retq +#ifndef __APPLE__ +.size fe_ge_msub_avx2,.-fe_ge_msub_avx2 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_ge_add_avx2 +.type fe_ge_add_avx2,@function +.align 4 +fe_ge_add_avx2: +#else +.section __TEXT,__text +.globl _fe_ge_add_avx2 +.p2align 2 +_fe_ge_add_avx2: +#endif /* __APPLE__ */ + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $0x50, %rsp + movq %rdi, (%rsp) + movq %rsi, 8(%rsp) + movq %rdx, 16(%rsp) + movq %rcx, 24(%rsp) + movq %r8, 32(%rsp) + movq %r9, 40(%rsp) + movq 8(%rsp), %rsi + movq 40(%rsp), %rbx + movq 32(%rsp), %rbp + # Add + movq (%rbx), %r8 + movq 8(%rbx), %r9 + movq 16(%rbx), %r10 + movq 24(%rbx), %rdx + movq %r8, %r12 + addq (%rbp), %r8 + movq %r9, %r13 + adcq 8(%rbp), %r9 + movq %r10, %r14 + adcq 16(%rbp), %r10 + movq %rdx, %r15 + adcq 24(%rbp), %rdx + movq $-19, %rcx + movq %rdx, %r11 + movq $0x7fffffffffffffff, %rax + sarq $63, %rdx + # Mask the modulus + andq %rdx, %rcx + andq %rdx, %rax + # Sub modulus (if overflow) + subq %rcx, %r8 + sbbq %rdx, %r9 + sbbq %rdx, %r10 + sbbq %rax, %r11 + # Sub + subq (%rbp), %r12 + movq $0x00, %rdx + sbbq 8(%rbp), %r13 + movq $-19, %rcx + sbbq 16(%rbp), %r14 + movq $0x7fffffffffffffff, %rax + sbbq 24(%rbp), %r15 + sbbq $0x00, %rdx + # Mask the modulus + andq %rdx, %rcx + andq %rdx, %rax + # Add modulus (if underflow) + addq %rcx, %r12 + adcq %rdx, %r13 + adcq %rdx, %r14 + adcq %rax, %r15 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq %r12, (%rsi) + movq %r13, 8(%rsi) + movq %r14, 16(%rsi) + movq %r15, 24(%rsi) + movq 16(%rsp), %rbx + movq 168(%rsp), %rbp + # Multiply + # A[0] * B[0] + movq (%rbp), %rdx + mulxq (%rdi), %r8, %r9 + # A[2] * B[0] + mulxq 16(%rdi), %r10, %r11 + # A[1] * B[0] + mulxq 8(%rdi), %rcx, %rax + xorq %r15, %r15 + adcxq %rcx, %r9 + # A[1] * B[3] + movq 24(%rbp), %rdx + mulxq 8(%rdi), %r12, %r13 + adcxq %rax, %r10 + # A[0] * B[1] + movq 8(%rbp), %rdx + mulxq (%rdi), %rcx, %rax + adoxq %rcx, %r9 + # A[2] * B[1] + mulxq 16(%rdi), %rcx, %r14 + adoxq %rax, %r10 + adcxq %rcx, %r11 + # A[1] * B[2] + movq 16(%rbp), %rdx + mulxq 8(%rdi), %rcx, %rax + adcxq %r14, %r12 + adoxq %rcx, %r11 + adcxq %r15, %r13 + adoxq %rax, %r12 + # A[0] * B[2] + mulxq (%rdi), %rcx, %rax + adoxq %r15, %r13 + xorq %r14, %r14 + adcxq %rcx, %r10 + # A[1] * B[1] + movq 8(%rbp), %rdx + mulxq 8(%rdi), %rdx, %rcx + adcxq %rax, %r11 + adoxq %rdx, %r10 + # A[3] * B[1] + movq 8(%rbp), %rdx + adoxq %rcx, %r11 + mulxq 24(%rdi), %rcx, %rax + adcxq %rcx, %r12 + # A[2] * B[2] + movq 16(%rbp), %rdx + mulxq 16(%rdi), %rdx, %rcx + adcxq %rax, %r13 + adoxq %rdx, %r12 + # A[3] * B[3] + movq 24(%rbp), %rdx + adoxq %rcx, %r13 + mulxq 24(%rdi), %rcx, %rax + adoxq %r15, %r14 + adcxq %rcx, %r14 + # A[0] * B[3] + mulxq (%rdi), %rdx, %rcx + adcxq %rax, %r15 + xorq %rax, %rax + adcxq %rdx, %r11 + # A[3] * B[0] + movq (%rbp), %rdx + adcxq %rcx, %r12 + mulxq 24(%rdi), %rdx, %rcx + adoxq %rdx, %r11 + adoxq %rcx, %r12 + # A[2] * B[3] + movq 24(%rbp), %rdx + mulxq 16(%rdi), %rdx, %rcx + adcxq %rdx, %r13 + # A[3] * B[2] + movq 16(%rbp), %rdx + adcxq %rcx, %r14 + mulxq 24(%rdi), %rcx, %rdx + adcxq %rax, %r15 + adoxq %rcx, %r13 + adoxq %rdx, %r14 + adoxq %rax, %r15 + # Reduce + movq $0x7fffffffffffffff, %rax + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rax, %r11 + # Multiply top half by 19 + movq $19, %rdx + xorq %rax, %rax + mulxq %r12, %rcx, %r12 + adcxq %rcx, %r8 + adoxq %r12, %r9 + mulxq %r13, %rcx, %r13 + adcxq %rcx, %r9 + adoxq %r13, %r10 + mulxq %r14, %rcx, %r14 + adcxq %rcx, %r10 + adoxq %r14, %r11 + mulxq %r15, %r15, %rdx + adcxq %r15, %r11 + adoxq %rax, %rdx + adcxq %rax, %rdx + # Overflow + shldq $0x01, %r11, %rdx + movq $0x7fffffffffffffff, %rax + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rbx) + movq %r9, 8(%rbx) + movq %r10, 16(%rbx) + movq %r11, 24(%rbx) + movq 176(%rsp), %rbx + # Multiply + # A[0] * B[0] + movq (%rbx), %rdx + mulxq (%rsi), %r8, %r9 + # A[2] * B[0] + mulxq 16(%rsi), %r10, %r11 + # A[1] * B[0] + mulxq 8(%rsi), %rcx, %rax + xorq %r15, %r15 + adcxq %rcx, %r9 + # A[1] * B[3] + movq 24(%rbx), %rdx + mulxq 8(%rsi), %r12, %r13 + adcxq %rax, %r10 + # A[0] * B[1] + movq 8(%rbx), %rdx + mulxq (%rsi), %rcx, %rax + adoxq %rcx, %r9 + # A[2] * B[1] + mulxq 16(%rsi), %rcx, %r14 + adoxq %rax, %r10 + adcxq %rcx, %r11 + # A[1] * B[2] + movq 16(%rbx), %rdx + mulxq 8(%rsi), %rcx, %rax + adcxq %r14, %r12 + adoxq %rcx, %r11 + adcxq %r15, %r13 + adoxq %rax, %r12 + # A[0] * B[2] + mulxq (%rsi), %rcx, %rax + adoxq %r15, %r13 + xorq %r14, %r14 + adcxq %rcx, %r10 + # A[1] * B[1] + movq 8(%rbx), %rdx + mulxq 8(%rsi), %rdx, %rcx + adcxq %rax, %r11 + adoxq %rdx, %r10 + # A[3] * B[1] + movq 8(%rbx), %rdx + adoxq %rcx, %r11 + mulxq 24(%rsi), %rcx, %rax + adcxq %rcx, %r12 + # A[2] * B[2] + movq 16(%rbx), %rdx + mulxq 16(%rsi), %rdx, %rcx + adcxq %rax, %r13 + adoxq %rdx, %r12 + # A[3] * B[3] + movq 24(%rbx), %rdx + adoxq %rcx, %r13 + mulxq 24(%rsi), %rcx, %rax + adoxq %r15, %r14 + adcxq %rcx, %r14 + # A[0] * B[3] + mulxq (%rsi), %rdx, %rcx + adcxq %rax, %r15 + xorq %rax, %rax + adcxq %rdx, %r11 + # A[3] * B[0] + movq (%rbx), %rdx + adcxq %rcx, %r12 + mulxq 24(%rsi), %rdx, %rcx + adoxq %rdx, %r11 + adoxq %rcx, %r12 + # A[2] * B[3] + movq 24(%rbx), %rdx + mulxq 16(%rsi), %rdx, %rcx + adcxq %rdx, %r13 + # A[3] * B[2] + movq 16(%rbx), %rdx + adcxq %rcx, %r14 + mulxq 24(%rsi), %rcx, %rdx + adcxq %rax, %r15 + adoxq %rcx, %r13 + adoxq %rdx, %r14 + adoxq %rax, %r15 + # Reduce + movq $0x7fffffffffffffff, %rax + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rax, %r11 + # Multiply top half by 19 + movq $19, %rdx + xorq %rax, %rax + mulxq %r12, %rcx, %r12 + adcxq %rcx, %r8 + adoxq %r12, %r9 + mulxq %r13, %rcx, %r13 + adcxq %rcx, %r9 + adoxq %r13, %r10 + mulxq %r14, %rcx, %r14 + adcxq %rcx, %r10 + adoxq %r14, %r11 + mulxq %r15, %r15, %rdx + adcxq %r15, %r11 + adoxq %rax, %rdx + adcxq %rax, %rdx + # Overflow + shldq $0x01, %r11, %rdx + movq $0x7fffffffffffffff, %rax + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rsi) + movq %r9, 8(%rsi) + movq %r10, 16(%rsi) + movq %r11, 24(%rsi) + movq 24(%rsp), %rsi + movq 160(%rsp), %rbx + movq 144(%rsp), %rbp + # Multiply + # A[0] * B[0] + movq (%rbp), %rdx + mulxq (%rbx), %r8, %r9 + # A[2] * B[0] + mulxq 16(%rbx), %r10, %r11 + # A[1] * B[0] + mulxq 8(%rbx), %rcx, %rax + xorq %r15, %r15 + adcxq %rcx, %r9 + # A[1] * B[3] + movq 24(%rbp), %rdx + mulxq 8(%rbx), %r12, %r13 + adcxq %rax, %r10 + # A[0] * B[1] + movq 8(%rbp), %rdx + mulxq (%rbx), %rcx, %rax + adoxq %rcx, %r9 + # A[2] * B[1] + mulxq 16(%rbx), %rcx, %r14 + adoxq %rax, %r10 + adcxq %rcx, %r11 + # A[1] * B[2] + movq 16(%rbp), %rdx + mulxq 8(%rbx), %rcx, %rax + adcxq %r14, %r12 + adoxq %rcx, %r11 + adcxq %r15, %r13 + adoxq %rax, %r12 + # A[0] * B[2] + mulxq (%rbx), %rcx, %rax + adoxq %r15, %r13 + xorq %r14, %r14 + adcxq %rcx, %r10 + # A[1] * B[1] + movq 8(%rbp), %rdx + mulxq 8(%rbx), %rdx, %rcx + adcxq %rax, %r11 + adoxq %rdx, %r10 + # A[3] * B[1] + movq 8(%rbp), %rdx + adoxq %rcx, %r11 + mulxq 24(%rbx), %rcx, %rax + adcxq %rcx, %r12 + # A[2] * B[2] + movq 16(%rbp), %rdx + mulxq 16(%rbx), %rdx, %rcx + adcxq %rax, %r13 + adoxq %rdx, %r12 + # A[3] * B[3] + movq 24(%rbp), %rdx + adoxq %rcx, %r13 + mulxq 24(%rbx), %rcx, %rax + adoxq %r15, %r14 + adcxq %rcx, %r14 + # A[0] * B[3] + mulxq (%rbx), %rdx, %rcx + adcxq %rax, %r15 + xorq %rax, %rax + adcxq %rdx, %r11 + # A[3] * B[0] + movq (%rbp), %rdx + adcxq %rcx, %r12 + mulxq 24(%rbx), %rdx, %rcx + adoxq %rdx, %r11 + adoxq %rcx, %r12 + # A[2] * B[3] + movq 24(%rbp), %rdx + mulxq 16(%rbx), %rdx, %rcx + adcxq %rdx, %r13 + # A[3] * B[2] + movq 16(%rbp), %rdx + adcxq %rcx, %r14 + mulxq 24(%rbx), %rcx, %rdx + adcxq %rax, %r15 + adoxq %rcx, %r13 + adoxq %rdx, %r14 + adoxq %rax, %r15 + # Reduce + movq $0x7fffffffffffffff, %rax + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rax, %r11 + # Multiply top half by 19 + movq $19, %rdx + xorq %rax, %rax + mulxq %r12, %rcx, %r12 + adcxq %rcx, %r8 + adoxq %r12, %r9 + mulxq %r13, %rcx, %r13 + adcxq %rcx, %r9 + adoxq %r13, %r10 + mulxq %r14, %rcx, %r14 + adcxq %rcx, %r10 + adoxq %r14, %r11 + mulxq %r15, %r15, %rdx + adcxq %r15, %r11 + adoxq %rax, %rdx + adcxq %rax, %rdx + # Overflow + shldq $0x01, %r11, %rdx + movq $0x7fffffffffffffff, %rax + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rsi) + movq %r9, 8(%rsi) + movq %r10, 16(%rsi) + movq %r11, 24(%rsi) + movq 136(%rsp), %rsi + movq 152(%rsp), %rbx + # Multiply + # A[0] * B[0] + movq (%rbx), %rdx + mulxq (%rsi), %r8, %r9 + # A[2] * B[0] + mulxq 16(%rsi), %r10, %r11 + # A[1] * B[0] + mulxq 8(%rsi), %rcx, %rax + xorq %r15, %r15 + adcxq %rcx, %r9 + # A[1] * B[3] + movq 24(%rbx), %rdx + mulxq 8(%rsi), %r12, %r13 + adcxq %rax, %r10 + # A[0] * B[1] + movq 8(%rbx), %rdx + mulxq (%rsi), %rcx, %rax + adoxq %rcx, %r9 + # A[2] * B[1] + mulxq 16(%rsi), %rcx, %r14 + adoxq %rax, %r10 + adcxq %rcx, %r11 + # A[1] * B[2] + movq 16(%rbx), %rdx + mulxq 8(%rsi), %rcx, %rax + adcxq %r14, %r12 + adoxq %rcx, %r11 + adcxq %r15, %r13 + adoxq %rax, %r12 + # A[0] * B[2] + mulxq (%rsi), %rcx, %rax + adoxq %r15, %r13 + xorq %r14, %r14 + adcxq %rcx, %r10 + # A[1] * B[1] + movq 8(%rbx), %rdx + mulxq 8(%rsi), %rdx, %rcx + adcxq %rax, %r11 + adoxq %rdx, %r10 + # A[3] * B[1] + movq 8(%rbx), %rdx + adoxq %rcx, %r11 + mulxq 24(%rsi), %rcx, %rax + adcxq %rcx, %r12 + # A[2] * B[2] + movq 16(%rbx), %rdx + mulxq 16(%rsi), %rdx, %rcx + adcxq %rax, %r13 + adoxq %rdx, %r12 + # A[3] * B[3] + movq 24(%rbx), %rdx + adoxq %rcx, %r13 + mulxq 24(%rsi), %rcx, %rax + adoxq %r15, %r14 + adcxq %rcx, %r14 + # A[0] * B[3] + mulxq (%rsi), %rdx, %rcx + adcxq %rax, %r15 + xorq %rax, %rax + adcxq %rdx, %r11 + # A[3] * B[0] + movq (%rbx), %rdx + adcxq %rcx, %r12 + mulxq 24(%rsi), %rdx, %rcx + adoxq %rdx, %r11 + adoxq %rcx, %r12 + # A[2] * B[3] + movq 24(%rbx), %rdx + mulxq 16(%rsi), %rdx, %rcx + adcxq %rdx, %r13 + # A[3] * B[2] + movq 16(%rbx), %rdx + adcxq %rcx, %r14 + mulxq 24(%rsi), %rcx, %rdx + adcxq %rax, %r15 + adoxq %rcx, %r13 + adoxq %rdx, %r14 + adoxq %rax, %r15 + # Reduce + movq $0x7fffffffffffffff, %rax + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rax, %r11 + # Multiply top half by 19 + movq $19, %rdx + xorq %rax, %rax + mulxq %r12, %rcx, %r12 + adcxq %rcx, %r8 + adoxq %r12, %r9 + mulxq %r13, %rcx, %r13 + adcxq %rcx, %r9 + adoxq %r13, %r10 + mulxq %r14, %rcx, %r14 + adcxq %rcx, %r10 + adoxq %r14, %r11 + mulxq %r15, %r15, %rdx + adcxq %r15, %r11 + adoxq %rax, %rdx + adcxq %rax, %rdx + # Overflow + shldq $0x01, %r11, %rdx + movq $0x7fffffffffffffff, %rax + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + leaq 48(%rsp), %rsi + # Double + movq (%rdi), %r8 + movq 8(%rdi), %r9 + addq %r8, %r8 + movq 16(%rdi), %r10 + adcq %r9, %r9 + movq 24(%rdi), %rdx + adcq %r10, %r10 + movq $-19, %rcx + adcq %rdx, %rdx + movq $0x7fffffffffffffff, %rax + movq %rdx, %r11 + sarq $63, %rdx + # Mask the modulus + andq %rdx, %rcx + andq %rdx, %rax + # Sub modulus (if overflow) + subq %rcx, %r8 + sbbq %rdx, %r9 + sbbq %rdx, %r10 + sbbq %rax, %r11 + movq %r8, (%rsi) + movq %r9, 8(%rsi) + movq %r10, 16(%rsi) + movq %r11, 24(%rsi) + movq 8(%rsp), %rbx + movq 16(%rsp), %rbp + # Add + movq (%rbp), %r8 + movq 8(%rbp), %r9 + movq 16(%rbp), %r10 + movq 24(%rbp), %rdx + movq %r8, %r12 + addq (%rbx), %r8 + movq %r9, %r13 + adcq 8(%rbx), %r9 + movq %r10, %r14 + adcq 16(%rbx), %r10 + movq %rdx, %r15 + adcq 24(%rbx), %rdx + movq $-19, %rcx + movq %rdx, %r11 + movq $0x7fffffffffffffff, %rax + sarq $63, %rdx + # Mask the modulus + andq %rdx, %rcx + andq %rdx, %rax + # Sub modulus (if overflow) + subq %rcx, %r8 + sbbq %rdx, %r9 + sbbq %rdx, %r10 + sbbq %rax, %r11 + # Sub + subq (%rbx), %r12 + movq $0x00, %rdx + sbbq 8(%rbx), %r13 + movq $-19, %rcx + sbbq 16(%rbx), %r14 + movq $0x7fffffffffffffff, %rax + sbbq 24(%rbx), %r15 + sbbq $0x00, %rdx + # Mask the modulus + andq %rdx, %rcx + andq %rdx, %rax + # Add modulus (if underflow) + addq %rcx, %r12 + adcq %rdx, %r13 + adcq %rdx, %r14 + adcq %rax, %r15 + movq %r8, (%rbx) + movq %r9, 8(%rbx) + movq %r10, 16(%rbx) + movq %r11, 24(%rbx) + movq %r12, (%rdi) + movq %r13, 8(%rdi) + movq %r14, 16(%rdi) + movq %r15, 24(%rdi) + movq 24(%rsp), %rdi + # Add + movq (%rsi), %r8 + movq 8(%rsi), %r9 + movq 16(%rsi), %r10 + movq 24(%rsi), %rdx + movq %r8, %r12 + addq (%rdi), %r8 + movq %r9, %r13 + adcq 8(%rdi), %r9 + movq %r10, %r14 + adcq 16(%rdi), %r10 + movq %rdx, %r15 + adcq 24(%rdi), %rdx + movq $-19, %rcx + movq %rdx, %r11 + movq $0x7fffffffffffffff, %rax + sarq $63, %rdx + # Mask the modulus + andq %rdx, %rcx + andq %rdx, %rax + # Sub modulus (if overflow) + subq %rcx, %r8 + sbbq %rdx, %r9 + sbbq %rdx, %r10 + sbbq %rax, %r11 + # Sub + subq (%rdi), %r12 + movq $0x00, %rdx + sbbq 8(%rdi), %r13 + movq $-19, %rcx + sbbq 16(%rdi), %r14 + movq $0x7fffffffffffffff, %rax + sbbq 24(%rdi), %r15 + sbbq $0x00, %rdx + # Mask the modulus + andq %rdx, %rcx + andq %rdx, %rax + # Add modulus (if underflow) + addq %rcx, %r12 + adcq %rdx, %r13 + adcq %rdx, %r14 + adcq %rax, %r15 + movq %r8, (%rbp) + movq %r9, 8(%rbp) + movq %r10, 16(%rbp) + movq %r11, 24(%rbp) + movq %r12, (%rdi) + movq %r13, 8(%rdi) + movq %r14, 16(%rdi) + movq %r15, 24(%rdi) + addq $0x50, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + repz retq +#ifndef __APPLE__ +.size fe_ge_add_avx2,.-fe_ge_add_avx2 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl fe_ge_sub_avx2 +.type fe_ge_sub_avx2,@function +.align 4 +fe_ge_sub_avx2: +#else +.section __TEXT,__text +.globl _fe_ge_sub_avx2 +.p2align 2 +_fe_ge_sub_avx2: +#endif /* __APPLE__ */ + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $0x50, %rsp + movq %rdi, (%rsp) + movq %rsi, 8(%rsp) + movq %rdx, 16(%rsp) + movq %rcx, 24(%rsp) + movq %r8, 32(%rsp) + movq %r9, 40(%rsp) + movq 8(%rsp), %rsi + movq 40(%rsp), %rbx + movq 32(%rsp), %rbp + # Add + movq (%rbx), %r8 + movq 8(%rbx), %r9 + movq 16(%rbx), %r10 + movq 24(%rbx), %rdx + movq %r8, %r12 + addq (%rbp), %r8 + movq %r9, %r13 + adcq 8(%rbp), %r9 + movq %r10, %r14 + adcq 16(%rbp), %r10 + movq %rdx, %r15 + adcq 24(%rbp), %rdx + movq $-19, %rcx + movq %rdx, %r11 + movq $0x7fffffffffffffff, %rax + sarq $63, %rdx + # Mask the modulus + andq %rdx, %rcx + andq %rdx, %rax + # Sub modulus (if overflow) + subq %rcx, %r8 + sbbq %rdx, %r9 + sbbq %rdx, %r10 + sbbq %rax, %r11 + # Sub + subq (%rbp), %r12 + movq $0x00, %rdx + sbbq 8(%rbp), %r13 + movq $-19, %rcx + sbbq 16(%rbp), %r14 + movq $0x7fffffffffffffff, %rax + sbbq 24(%rbp), %r15 + sbbq $0x00, %rdx + # Mask the modulus + andq %rdx, %rcx + andq %rdx, %rax + # Add modulus (if underflow) + addq %rcx, %r12 + adcq %rdx, %r13 + adcq %rdx, %r14 + adcq %rax, %r15 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq %r12, (%rsi) + movq %r13, 8(%rsi) + movq %r14, 16(%rsi) + movq %r15, 24(%rsi) + movq 16(%rsp), %rbx + movq 176(%rsp), %rbp + # Multiply + # A[0] * B[0] + movq (%rbp), %rdx + mulxq (%rdi), %r8, %r9 + # A[2] * B[0] + mulxq 16(%rdi), %r10, %r11 + # A[1] * B[0] + mulxq 8(%rdi), %rcx, %rax + xorq %r15, %r15 + adcxq %rcx, %r9 + # A[1] * B[3] + movq 24(%rbp), %rdx + mulxq 8(%rdi), %r12, %r13 + adcxq %rax, %r10 + # A[0] * B[1] + movq 8(%rbp), %rdx + mulxq (%rdi), %rcx, %rax + adoxq %rcx, %r9 + # A[2] * B[1] + mulxq 16(%rdi), %rcx, %r14 + adoxq %rax, %r10 + adcxq %rcx, %r11 + # A[1] * B[2] + movq 16(%rbp), %rdx + mulxq 8(%rdi), %rcx, %rax + adcxq %r14, %r12 + adoxq %rcx, %r11 + adcxq %r15, %r13 + adoxq %rax, %r12 + # A[0] * B[2] + mulxq (%rdi), %rcx, %rax + adoxq %r15, %r13 + xorq %r14, %r14 + adcxq %rcx, %r10 + # A[1] * B[1] + movq 8(%rbp), %rdx + mulxq 8(%rdi), %rdx, %rcx + adcxq %rax, %r11 + adoxq %rdx, %r10 + # A[3] * B[1] + movq 8(%rbp), %rdx + adoxq %rcx, %r11 + mulxq 24(%rdi), %rcx, %rax + adcxq %rcx, %r12 + # A[2] * B[2] + movq 16(%rbp), %rdx + mulxq 16(%rdi), %rdx, %rcx + adcxq %rax, %r13 + adoxq %rdx, %r12 + # A[3] * B[3] + movq 24(%rbp), %rdx + adoxq %rcx, %r13 + mulxq 24(%rdi), %rcx, %rax + adoxq %r15, %r14 + adcxq %rcx, %r14 + # A[0] * B[3] + mulxq (%rdi), %rdx, %rcx + adcxq %rax, %r15 + xorq %rax, %rax + adcxq %rdx, %r11 + # A[3] * B[0] + movq (%rbp), %rdx + adcxq %rcx, %r12 + mulxq 24(%rdi), %rdx, %rcx + adoxq %rdx, %r11 + adoxq %rcx, %r12 + # A[2] * B[3] + movq 24(%rbp), %rdx + mulxq 16(%rdi), %rdx, %rcx + adcxq %rdx, %r13 + # A[3] * B[2] + movq 16(%rbp), %rdx + adcxq %rcx, %r14 + mulxq 24(%rdi), %rcx, %rdx + adcxq %rax, %r15 + adoxq %rcx, %r13 + adoxq %rdx, %r14 + adoxq %rax, %r15 + # Reduce + movq $0x7fffffffffffffff, %rax + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rax, %r11 + # Multiply top half by 19 + movq $19, %rdx + xorq %rax, %rax + mulxq %r12, %rcx, %r12 + adcxq %rcx, %r8 + adoxq %r12, %r9 + mulxq %r13, %rcx, %r13 + adcxq %rcx, %r9 + adoxq %r13, %r10 + mulxq %r14, %rcx, %r14 + adcxq %rcx, %r10 + adoxq %r14, %r11 + mulxq %r15, %r15, %rdx + adcxq %r15, %r11 + adoxq %rax, %rdx + adcxq %rax, %rdx + # Overflow + shldq $0x01, %r11, %rdx + movq $0x7fffffffffffffff, %rax + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rbx) + movq %r9, 8(%rbx) + movq %r10, 16(%rbx) + movq %r11, 24(%rbx) + movq 168(%rsp), %rbx + # Multiply + # A[0] * B[0] + movq (%rbx), %rdx + mulxq (%rsi), %r8, %r9 + # A[2] * B[0] + mulxq 16(%rsi), %r10, %r11 + # A[1] * B[0] + mulxq 8(%rsi), %rcx, %rax + xorq %r15, %r15 + adcxq %rcx, %r9 + # A[1] * B[3] + movq 24(%rbx), %rdx + mulxq 8(%rsi), %r12, %r13 + adcxq %rax, %r10 + # A[0] * B[1] + movq 8(%rbx), %rdx + mulxq (%rsi), %rcx, %rax + adoxq %rcx, %r9 + # A[2] * B[1] + mulxq 16(%rsi), %rcx, %r14 + adoxq %rax, %r10 + adcxq %rcx, %r11 + # A[1] * B[2] + movq 16(%rbx), %rdx + mulxq 8(%rsi), %rcx, %rax + adcxq %r14, %r12 + adoxq %rcx, %r11 + adcxq %r15, %r13 + adoxq %rax, %r12 + # A[0] * B[2] + mulxq (%rsi), %rcx, %rax + adoxq %r15, %r13 + xorq %r14, %r14 + adcxq %rcx, %r10 + # A[1] * B[1] + movq 8(%rbx), %rdx + mulxq 8(%rsi), %rdx, %rcx + adcxq %rax, %r11 + adoxq %rdx, %r10 + # A[3] * B[1] + movq 8(%rbx), %rdx + adoxq %rcx, %r11 + mulxq 24(%rsi), %rcx, %rax + adcxq %rcx, %r12 + # A[2] * B[2] + movq 16(%rbx), %rdx + mulxq 16(%rsi), %rdx, %rcx + adcxq %rax, %r13 + adoxq %rdx, %r12 + # A[3] * B[3] + movq 24(%rbx), %rdx + adoxq %rcx, %r13 + mulxq 24(%rsi), %rcx, %rax + adoxq %r15, %r14 + adcxq %rcx, %r14 + # A[0] * B[3] + mulxq (%rsi), %rdx, %rcx + adcxq %rax, %r15 + xorq %rax, %rax + adcxq %rdx, %r11 + # A[3] * B[0] + movq (%rbx), %rdx + adcxq %rcx, %r12 + mulxq 24(%rsi), %rdx, %rcx + adoxq %rdx, %r11 + adoxq %rcx, %r12 + # A[2] * B[3] + movq 24(%rbx), %rdx + mulxq 16(%rsi), %rdx, %rcx + adcxq %rdx, %r13 + # A[3] * B[2] + movq 16(%rbx), %rdx + adcxq %rcx, %r14 + mulxq 24(%rsi), %rcx, %rdx + adcxq %rax, %r15 + adoxq %rcx, %r13 + adoxq %rdx, %r14 + adoxq %rax, %r15 + # Reduce + movq $0x7fffffffffffffff, %rax + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rax, %r11 + # Multiply top half by 19 + movq $19, %rdx + xorq %rax, %rax + mulxq %r12, %rcx, %r12 + adcxq %rcx, %r8 + adoxq %r12, %r9 + mulxq %r13, %rcx, %r13 + adcxq %rcx, %r9 + adoxq %r13, %r10 + mulxq %r14, %rcx, %r14 + adcxq %rcx, %r10 + adoxq %r14, %r11 + mulxq %r15, %r15, %rdx + adcxq %r15, %r11 + adoxq %rax, %rdx + adcxq %rax, %rdx + # Overflow + shldq $0x01, %r11, %rdx + movq $0x7fffffffffffffff, %rax + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rsi) + movq %r9, 8(%rsi) + movq %r10, 16(%rsi) + movq %r11, 24(%rsi) + movq 24(%rsp), %rsi + movq 160(%rsp), %rbx + movq 144(%rsp), %rbp + # Multiply + # A[0] * B[0] + movq (%rbp), %rdx + mulxq (%rbx), %r8, %r9 + # A[2] * B[0] + mulxq 16(%rbx), %r10, %r11 + # A[1] * B[0] + mulxq 8(%rbx), %rcx, %rax + xorq %r15, %r15 + adcxq %rcx, %r9 + # A[1] * B[3] + movq 24(%rbp), %rdx + mulxq 8(%rbx), %r12, %r13 + adcxq %rax, %r10 + # A[0] * B[1] + movq 8(%rbp), %rdx + mulxq (%rbx), %rcx, %rax + adoxq %rcx, %r9 + # A[2] * B[1] + mulxq 16(%rbx), %rcx, %r14 + adoxq %rax, %r10 + adcxq %rcx, %r11 + # A[1] * B[2] + movq 16(%rbp), %rdx + mulxq 8(%rbx), %rcx, %rax + adcxq %r14, %r12 + adoxq %rcx, %r11 + adcxq %r15, %r13 + adoxq %rax, %r12 + # A[0] * B[2] + mulxq (%rbx), %rcx, %rax + adoxq %r15, %r13 + xorq %r14, %r14 + adcxq %rcx, %r10 + # A[1] * B[1] + movq 8(%rbp), %rdx + mulxq 8(%rbx), %rdx, %rcx + adcxq %rax, %r11 + adoxq %rdx, %r10 + # A[3] * B[1] + movq 8(%rbp), %rdx + adoxq %rcx, %r11 + mulxq 24(%rbx), %rcx, %rax + adcxq %rcx, %r12 + # A[2] * B[2] + movq 16(%rbp), %rdx + mulxq 16(%rbx), %rdx, %rcx + adcxq %rax, %r13 + adoxq %rdx, %r12 + # A[3] * B[3] + movq 24(%rbp), %rdx + adoxq %rcx, %r13 + mulxq 24(%rbx), %rcx, %rax + adoxq %r15, %r14 + adcxq %rcx, %r14 + # A[0] * B[3] + mulxq (%rbx), %rdx, %rcx + adcxq %rax, %r15 + xorq %rax, %rax + adcxq %rdx, %r11 + # A[3] * B[0] + movq (%rbp), %rdx + adcxq %rcx, %r12 + mulxq 24(%rbx), %rdx, %rcx + adoxq %rdx, %r11 + adoxq %rcx, %r12 + # A[2] * B[3] + movq 24(%rbp), %rdx + mulxq 16(%rbx), %rdx, %rcx + adcxq %rdx, %r13 + # A[3] * B[2] + movq 16(%rbp), %rdx + adcxq %rcx, %r14 + mulxq 24(%rbx), %rcx, %rdx + adcxq %rax, %r15 + adoxq %rcx, %r13 + adoxq %rdx, %r14 + adoxq %rax, %r15 + # Reduce + movq $0x7fffffffffffffff, %rax + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rax, %r11 + # Multiply top half by 19 + movq $19, %rdx + xorq %rax, %rax + mulxq %r12, %rcx, %r12 + adcxq %rcx, %r8 + adoxq %r12, %r9 + mulxq %r13, %rcx, %r13 + adcxq %rcx, %r9 + adoxq %r13, %r10 + mulxq %r14, %rcx, %r14 + adcxq %rcx, %r10 + adoxq %r14, %r11 + mulxq %r15, %r15, %rdx + adcxq %r15, %r11 + adoxq %rax, %rdx + adcxq %rax, %rdx + # Overflow + shldq $0x01, %r11, %rdx + movq $0x7fffffffffffffff, %rax + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rsi) + movq %r9, 8(%rsi) + movq %r10, 16(%rsi) + movq %r11, 24(%rsi) + movq 136(%rsp), %rsi + movq 152(%rsp), %rbx + # Multiply + # A[0] * B[0] + movq (%rbx), %rdx + mulxq (%rsi), %r8, %r9 + # A[2] * B[0] + mulxq 16(%rsi), %r10, %r11 + # A[1] * B[0] + mulxq 8(%rsi), %rcx, %rax + xorq %r15, %r15 + adcxq %rcx, %r9 + # A[1] * B[3] + movq 24(%rbx), %rdx + mulxq 8(%rsi), %r12, %r13 + adcxq %rax, %r10 + # A[0] * B[1] + movq 8(%rbx), %rdx + mulxq (%rsi), %rcx, %rax + adoxq %rcx, %r9 + # A[2] * B[1] + mulxq 16(%rsi), %rcx, %r14 + adoxq %rax, %r10 + adcxq %rcx, %r11 + # A[1] * B[2] + movq 16(%rbx), %rdx + mulxq 8(%rsi), %rcx, %rax + adcxq %r14, %r12 + adoxq %rcx, %r11 + adcxq %r15, %r13 + adoxq %rax, %r12 + # A[0] * B[2] + mulxq (%rsi), %rcx, %rax + adoxq %r15, %r13 + xorq %r14, %r14 + adcxq %rcx, %r10 + # A[1] * B[1] + movq 8(%rbx), %rdx + mulxq 8(%rsi), %rdx, %rcx + adcxq %rax, %r11 + adoxq %rdx, %r10 + # A[3] * B[1] + movq 8(%rbx), %rdx + adoxq %rcx, %r11 + mulxq 24(%rsi), %rcx, %rax + adcxq %rcx, %r12 + # A[2] * B[2] + movq 16(%rbx), %rdx + mulxq 16(%rsi), %rdx, %rcx + adcxq %rax, %r13 + adoxq %rdx, %r12 + # A[3] * B[3] + movq 24(%rbx), %rdx + adoxq %rcx, %r13 + mulxq 24(%rsi), %rcx, %rax + adoxq %r15, %r14 + adcxq %rcx, %r14 + # A[0] * B[3] + mulxq (%rsi), %rdx, %rcx + adcxq %rax, %r15 + xorq %rax, %rax + adcxq %rdx, %r11 + # A[3] * B[0] + movq (%rbx), %rdx + adcxq %rcx, %r12 + mulxq 24(%rsi), %rdx, %rcx + adoxq %rdx, %r11 + adoxq %rcx, %r12 + # A[2] * B[3] + movq 24(%rbx), %rdx + mulxq 16(%rsi), %rdx, %rcx + adcxq %rdx, %r13 + # A[3] * B[2] + movq 16(%rbx), %rdx + adcxq %rcx, %r14 + mulxq 24(%rsi), %rcx, %rdx + adcxq %rax, %r15 + adoxq %rcx, %r13 + adoxq %rdx, %r14 + adoxq %rax, %r15 + # Reduce + movq $0x7fffffffffffffff, %rax + # Move top half into t4-t7 and remove top bit from t3 + shldq $0x01, %r14, %r15 + shldq $0x01, %r13, %r14 + shldq $0x01, %r12, %r13 + shldq $0x01, %r11, %r12 + andq %rax, %r11 + # Multiply top half by 19 + movq $19, %rdx + xorq %rax, %rax + mulxq %r12, %rcx, %r12 + adcxq %rcx, %r8 + adoxq %r12, %r9 + mulxq %r13, %rcx, %r13 + adcxq %rcx, %r9 + adoxq %r13, %r10 + mulxq %r14, %rcx, %r14 + adcxq %rcx, %r10 + adoxq %r14, %r11 + mulxq %r15, %r15, %rdx + adcxq %r15, %r11 + adoxq %rax, %rdx + adcxq %rax, %rdx + # Overflow + shldq $0x01, %r11, %rdx + movq $0x7fffffffffffffff, %rax + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Reduce if top bit set + movq %r11, %rdx + shrq $63, %rdx + imulq $19, %rdx, %rcx + andq %rax, %r11 + addq %rcx, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + adcq $0x00, %r11 + # Store + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + leaq 48(%rsp), %rsi + # Double + movq (%rdi), %r8 + movq 8(%rdi), %r9 + addq %r8, %r8 + movq 16(%rdi), %r10 + adcq %r9, %r9 + movq 24(%rdi), %rdx + adcq %r10, %r10 + movq $-19, %rcx + adcq %rdx, %rdx + movq $0x7fffffffffffffff, %rax + movq %rdx, %r11 + sarq $63, %rdx + # Mask the modulus + andq %rdx, %rcx + andq %rdx, %rax + # Sub modulus (if overflow) + subq %rcx, %r8 + sbbq %rdx, %r9 + sbbq %rdx, %r10 + sbbq %rax, %r11 + movq %r8, (%rsi) + movq %r9, 8(%rsi) + movq %r10, 16(%rsi) + movq %r11, 24(%rsi) + movq 8(%rsp), %rbx + movq 16(%rsp), %rbp + # Add + movq (%rbp), %r8 + movq 8(%rbp), %r9 + movq 16(%rbp), %r10 + movq 24(%rbp), %rdx + movq %r8, %r12 + addq (%rbx), %r8 + movq %r9, %r13 + adcq 8(%rbx), %r9 + movq %r10, %r14 + adcq 16(%rbx), %r10 + movq %rdx, %r15 + adcq 24(%rbx), %rdx + movq $-19, %rcx + movq %rdx, %r11 + movq $0x7fffffffffffffff, %rax + sarq $63, %rdx + # Mask the modulus + andq %rdx, %rcx + andq %rdx, %rax + # Sub modulus (if overflow) + subq %rcx, %r8 + sbbq %rdx, %r9 + sbbq %rdx, %r10 + sbbq %rax, %r11 + # Sub + subq (%rbx), %r12 + movq $0x00, %rdx + sbbq 8(%rbx), %r13 + movq $-19, %rcx + sbbq 16(%rbx), %r14 + movq $0x7fffffffffffffff, %rax + sbbq 24(%rbx), %r15 + sbbq $0x00, %rdx + # Mask the modulus + andq %rdx, %rcx + andq %rdx, %rax + # Add modulus (if underflow) + addq %rcx, %r12 + adcq %rdx, %r13 + adcq %rdx, %r14 + adcq %rax, %r15 + movq %r8, (%rbx) + movq %r9, 8(%rbx) + movq %r10, 16(%rbx) + movq %r11, 24(%rbx) + movq %r12, (%rdi) + movq %r13, 8(%rdi) + movq %r14, 16(%rdi) + movq %r15, 24(%rdi) + movq 24(%rsp), %rdi + # Add + movq (%rsi), %r8 + movq 8(%rsi), %r9 + movq 16(%rsi), %r10 + movq 24(%rsi), %rdx + movq %r8, %r12 + addq (%rdi), %r8 + movq %r9, %r13 + adcq 8(%rdi), %r9 + movq %r10, %r14 + adcq 16(%rdi), %r10 + movq %rdx, %r15 + adcq 24(%rdi), %rdx + movq $-19, %rcx + movq %rdx, %r11 + movq $0x7fffffffffffffff, %rax + sarq $63, %rdx + # Mask the modulus + andq %rdx, %rcx + andq %rdx, %rax + # Sub modulus (if overflow) + subq %rcx, %r8 + sbbq %rdx, %r9 + sbbq %rdx, %r10 + sbbq %rax, %r11 + # Sub + subq (%rdi), %r12 + movq $0x00, %rdx + sbbq 8(%rdi), %r13 + movq $-19, %rcx + sbbq 16(%rdi), %r14 + movq $0x7fffffffffffffff, %rax + sbbq 24(%rdi), %r15 + sbbq $0x00, %rdx + # Mask the modulus + andq %rdx, %rcx + andq %rdx, %rax + # Add modulus (if underflow) + addq %rcx, %r12 + adcq %rdx, %r13 + adcq %rdx, %r14 + adcq %rax, %r15 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq %r12, (%rbp) + movq %r13, 8(%rbp) + movq %r14, 16(%rbp) + movq %r15, 24(%rbp) + addq $0x50, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + repz retq +#ifndef __APPLE__ +.size fe_ge_sub_avx2,.-fe_ge_sub_avx2 +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX2 */ diff --git a/client/wolfssl/wolfcrypt/src/fips.c b/client/wolfssl/wolfcrypt/src/fips.c new file mode 100644 index 0000000..e69de29 diff --git a/client/wolfssl/wolfcrypt/src/fips_test.c b/client/wolfssl/wolfcrypt/src/fips_test.c new file mode 100644 index 0000000..e69de29 diff --git a/client/wolfssl/wolfcrypt/src/fp_mont_small.i b/client/wolfssl/wolfcrypt/src/fp_mont_small.i new file mode 100644 index 0000000..380b0a2 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fp_mont_small.i @@ -0,0 +1,3874 @@ +/* fp_mont_small.i + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef TFM_SMALL_MONT_SET +/* computes x/R == x (mod N) via Montgomery Reduction */ +int fp_montgomery_reduce_small(fp_int *a, fp_int *m, fp_digit mp) +{ +#ifndef WOLFSSL_SMALL_STACK + fp_digit c[FP_SIZE]; +#else + fp_digit *c; +#endif + fp_digit *_c, *tmpm, mu, cy; + int oldused, x, y, pa; + +#ifdef WOLFSSL_SMALL_STACK + /* only allocate space for what's needed for window plus res */ + c = (fp_digit*)XMALLOC(sizeof(fp_digit)*FP_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (c == NULL) { + return FP_MEM; + } +#endif + + /* now zero the buff */ + XMEMSET(c, 0, sizeof(fp_digit)*(FP_SIZE)); + + pa = m->used; + + /* copy the input */ + oldused = a->used; + for (x = 0; x < oldused; x++) { + c[x] = a->dp[x]; + } + + MONT_START; + + switch (pa) { + case 1: + x = 0; cy = 0; + LOOP_START; + _c = c + 0; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL; ++_c; +#else + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + break; + case 2: + x = 0; cy = 0; + LOOP_START; + _c = c + 0; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 1; cy = 0; + LOOP_START; + _c = c + 1; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + break; + case 3: + x = 0; cy = 0; + LOOP_START; + _c = c + 0; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 1; cy = 0; + LOOP_START; + _c = c + 1; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 2; cy = 0; + LOOP_START; + _c = c + 2; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + break; + case 4: + x = 0; cy = 0; + LOOP_START; + _c = c + 0; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 1; cy = 0; + LOOP_START; + _c = c + 1; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 2; cy = 0; + LOOP_START; + _c = c + 2; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 3; cy = 0; + LOOP_START; + _c = c + 3; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + break; + case 5: + x = 0; cy = 0; + LOOP_START; + _c = c + 0; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 1; cy = 0; + LOOP_START; + _c = c + 1; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 2; cy = 0; + LOOP_START; + _c = c + 2; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 3; cy = 0; + LOOP_START; + _c = c + 3; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 4; cy = 0; + LOOP_START; + _c = c + 4; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + break; + case 6: + x = 0; cy = 0; + LOOP_START; + _c = c + 0; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 1; cy = 0; + LOOP_START; + _c = c + 1; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 2; cy = 0; + LOOP_START; + _c = c + 2; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 3; cy = 0; + LOOP_START; + _c = c + 3; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 4; cy = 0; + LOOP_START; + _c = c + 4; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 5; cy = 0; + LOOP_START; + _c = c + 5; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + break; + case 7: + x = 0; cy = 0; + LOOP_START; + _c = c + 0; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 1; cy = 0; + LOOP_START; + _c = c + 1; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 2; cy = 0; + LOOP_START; + _c = c + 2; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 3; cy = 0; + LOOP_START; + _c = c + 3; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 4; cy = 0; + LOOP_START; + _c = c + 4; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 5; cy = 0; + LOOP_START; + _c = c + 5; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 6; cy = 0; + LOOP_START; + _c = c + 6; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + break; + case 8: + x = 0; cy = 0; + LOOP_START; + _c = c + 0; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 1; cy = 0; + LOOP_START; + _c = c + 1; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 2; cy = 0; + LOOP_START; + _c = c + 2; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 3; cy = 0; + LOOP_START; + _c = c + 3; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 4; cy = 0; + LOOP_START; + _c = c + 4; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 5; cy = 0; + LOOP_START; + _c = c + 5; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 6; cy = 0; + LOOP_START; + _c = c + 6; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 7; cy = 0; + LOOP_START; + _c = c + 7; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + break; + case 9: + x = 0; cy = 0; + LOOP_START; + _c = c + 0; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 1; cy = 0; + LOOP_START; + _c = c + 1; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 2; cy = 0; + LOOP_START; + _c = c + 2; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 3; cy = 0; + LOOP_START; + _c = c + 3; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 4; cy = 0; + LOOP_START; + _c = c + 4; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 5; cy = 0; + LOOP_START; + _c = c + 5; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 6; cy = 0; + LOOP_START; + _c = c + 6; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 7; cy = 0; + LOOP_START; + _c = c + 7; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 8; cy = 0; + LOOP_START; + _c = c + 8; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + break; + case 10: + x = 0; cy = 0; + LOOP_START; + _c = c + 0; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 1; cy = 0; + LOOP_START; + _c = c + 1; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 2; cy = 0; + LOOP_START; + _c = c + 2; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 3; cy = 0; + LOOP_START; + _c = c + 3; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 4; cy = 0; + LOOP_START; + _c = c + 4; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 5; cy = 0; + LOOP_START; + _c = c + 5; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 6; cy = 0; + LOOP_START; + _c = c + 6; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 7; cy = 0; + LOOP_START; + _c = c + 7; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 8; cy = 0; + LOOP_START; + _c = c + 8; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 9; cy = 0; + LOOP_START; + _c = c + 9; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + break; + case 11: + x = 0; cy = 0; + LOOP_START; + _c = c + 0; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 1; cy = 0; + LOOP_START; + _c = c + 1; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 2; cy = 0; + LOOP_START; + _c = c + 2; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 3; cy = 0; + LOOP_START; + _c = c + 3; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 4; cy = 0; + LOOP_START; + _c = c + 4; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 5; cy = 0; + LOOP_START; + _c = c + 5; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 6; cy = 0; + LOOP_START; + _c = c + 6; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 7; cy = 0; + LOOP_START; + _c = c + 7; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 8; cy = 0; + LOOP_START; + _c = c + 8; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 9; cy = 0; + LOOP_START; + _c = c + 9; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 10; cy = 0; + LOOP_START; + _c = c + 10; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + break; + case 12: + x = 0; cy = 0; + LOOP_START; + _c = c + 0; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 1; cy = 0; + LOOP_START; + _c = c + 1; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 2; cy = 0; + LOOP_START; + _c = c + 2; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 3; cy = 0; + LOOP_START; + _c = c + 3; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 4; cy = 0; + LOOP_START; + _c = c + 4; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 5; cy = 0; + LOOP_START; + _c = c + 5; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 6; cy = 0; + LOOP_START; + _c = c + 6; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 7; cy = 0; + LOOP_START; + _c = c + 7; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 8; cy = 0; + LOOP_START; + _c = c + 8; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 9; cy = 0; + LOOP_START; + _c = c + 9; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 10; cy = 0; + LOOP_START; + _c = c + 10; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 11; cy = 0; + LOOP_START; + _c = c + 11; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + break; + case 13: + x = 0; cy = 0; + LOOP_START; + _c = c + 0; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 1; cy = 0; + LOOP_START; + _c = c + 1; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 2; cy = 0; + LOOP_START; + _c = c + 2; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 3; cy = 0; + LOOP_START; + _c = c + 3; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 4; cy = 0; + LOOP_START; + _c = c + 4; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 5; cy = 0; + LOOP_START; + _c = c + 5; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 6; cy = 0; + LOOP_START; + _c = c + 6; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 7; cy = 0; + LOOP_START; + _c = c + 7; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 8; cy = 0; + LOOP_START; + _c = c + 8; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 9; cy = 0; + LOOP_START; + _c = c + 9; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 10; cy = 0; + LOOP_START; + _c = c + 10; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 11; cy = 0; + LOOP_START; + _c = c + 11; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 12; cy = 0; + LOOP_START; + _c = c + 12; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + break; + case 14: + x = 0; cy = 0; + LOOP_START; + _c = c + 0; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 1; cy = 0; + LOOP_START; + _c = c + 1; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 2; cy = 0; + LOOP_START; + _c = c + 2; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 3; cy = 0; + LOOP_START; + _c = c + 3; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 4; cy = 0; + LOOP_START; + _c = c + 4; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 5; cy = 0; + LOOP_START; + _c = c + 5; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 6; cy = 0; + LOOP_START; + _c = c + 6; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 7; cy = 0; + LOOP_START; + _c = c + 7; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 8; cy = 0; + LOOP_START; + _c = c + 8; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 9; cy = 0; + LOOP_START; + _c = c + 9; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 10; cy = 0; + LOOP_START; + _c = c + 10; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 11; cy = 0; + LOOP_START; + _c = c + 11; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 12; cy = 0; + LOOP_START; + _c = c + 12; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 13; cy = 0; + LOOP_START; + _c = c + 13; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + break; + case 15: + x = 0; cy = 0; + LOOP_START; + _c = c + 0; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 1; cy = 0; + LOOP_START; + _c = c + 1; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 2; cy = 0; + LOOP_START; + _c = c + 2; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 3; cy = 0; + LOOP_START; + _c = c + 3; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 4; cy = 0; + LOOP_START; + _c = c + 4; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 5; cy = 0; + LOOP_START; + _c = c + 5; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 6; cy = 0; + LOOP_START; + _c = c + 6; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 7; cy = 0; + LOOP_START; + _c = c + 7; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 8; cy = 0; + LOOP_START; + _c = c + 8; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 9; cy = 0; + LOOP_START; + _c = c + 9; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 10; cy = 0; + LOOP_START; + _c = c + 10; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 11; cy = 0; + LOOP_START; + _c = c + 11; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 12; cy = 0; + LOOP_START; + _c = c + 12; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 13; cy = 0; + LOOP_START; + _c = c + 13; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 14; cy = 0; + LOOP_START; + _c = c + 14; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + break; + case 16: + x = 0; cy = 0; + LOOP_START; + _c = c + 0; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL8; _c += 8; tmpm += 8; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 1; cy = 0; + LOOP_START; + _c = c + 1; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL8; _c += 8; tmpm += 8; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 2; cy = 0; + LOOP_START; + _c = c + 2; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL8; _c += 8; tmpm += 8; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 3; cy = 0; + LOOP_START; + _c = c + 3; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL8; _c += 8; tmpm += 8; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 4; cy = 0; + LOOP_START; + _c = c + 4; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL8; _c += 8; tmpm += 8; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 5; cy = 0; + LOOP_START; + _c = c + 5; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL8; _c += 8; tmpm += 8; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 6; cy = 0; + LOOP_START; + _c = c + 6; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL8; _c += 8; tmpm += 8; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 7; cy = 0; + LOOP_START; + _c = c + 7; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL8; _c += 8; tmpm += 8; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 8; cy = 0; + LOOP_START; + _c = c + 8; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL8; _c += 8; tmpm += 8; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 9; cy = 0; + LOOP_START; + _c = c + 9; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL8; _c += 8; tmpm += 8; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 10; cy = 0; + LOOP_START; + _c = c + 10; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL8; _c += 8; tmpm += 8; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 11; cy = 0; + LOOP_START; + _c = c + 11; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL8; _c += 8; tmpm += 8; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 12; cy = 0; + LOOP_START; + _c = c + 12; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL8; _c += 8; tmpm += 8; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 13; cy = 0; + LOOP_START; + _c = c + 13; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL8; _c += 8; tmpm += 8; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 14; cy = 0; + LOOP_START; + _c = c + 14; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL8; _c += 8; tmpm += 8; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + x = 15; cy = 0; + LOOP_START; + _c = c + 15; + tmpm = m->dp; +#ifdef INNERMUL8 + INNERMUL8; _c += 8; tmpm += 8; + INNERMUL8; _c += 8; tmpm += 8; +#else + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; + INNERMUL; ++_c; +#endif + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + break; + } + /* now copy out */ + _c = c + pa; + tmpm = a->dp; + for (x = 0; x < pa+1; x++) { + *tmpm++ = *_c++; + } + + for (; x < oldused; x++) { + *tmpm++ = 0; + } + + MONT_FINI; + + a->used = pa+1; + fp_clamp(a); + + /* if A >= m then A = A - m */ + if (fp_cmp_mag (a, m) != FP_LT) { + s_fp_sub (a, m, a); + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(c, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return FP_OKAY; +} + +#endif diff --git a/client/wolfssl/wolfcrypt/src/fp_mul_comba_12.i b/client/wolfssl/wolfcrypt/src/fp_mul_comba_12.i new file mode 100644 index 0000000..0f0683d --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fp_mul_comba_12.i @@ -0,0 +1,147 @@ +/* fp_mul_comba_12.i + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef TFM_MUL12 +int fp_mul_comba12(fp_int *A, fp_int *B, fp_int *C) +{ + fp_digit c0, c1, c2; +#ifndef WOLFSSL_SMALL_STACK + fp_digit at[24]; +#else + fp_digit *at; +#endif + +#ifdef WOLFSSL_SMALL_STACK + at = (fp_digit*)XMALLOC(sizeof(fp_digit) * 24, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (at == NULL) + return FP_MEM; +#endif + + XMEMCPY(at, A->dp, 12 * sizeof(fp_digit)); + XMEMCPY(at+12, B->dp, 12 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[12]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[13]); MULADD(at[1], at[12]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[14]); MULADD(at[1], at[13]); MULADD(at[2], at[12]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[15]); MULADD(at[1], at[14]); MULADD(at[2], at[13]); MULADD(at[3], at[12]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[16]); MULADD(at[1], at[15]); MULADD(at[2], at[14]); MULADD(at[3], at[13]); MULADD(at[4], at[12]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[17]); MULADD(at[1], at[16]); MULADD(at[2], at[15]); MULADD(at[3], at[14]); MULADD(at[4], at[13]); MULADD(at[5], at[12]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[18]); MULADD(at[1], at[17]); MULADD(at[2], at[16]); MULADD(at[3], at[15]); MULADD(at[4], at[14]); MULADD(at[5], at[13]); MULADD(at[6], at[12]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[19]); MULADD(at[1], at[18]); MULADD(at[2], at[17]); MULADD(at[3], at[16]); MULADD(at[4], at[15]); MULADD(at[5], at[14]); MULADD(at[6], at[13]); MULADD(at[7], at[12]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[0], at[20]); MULADD(at[1], at[19]); MULADD(at[2], at[18]); MULADD(at[3], at[17]); MULADD(at[4], at[16]); MULADD(at[5], at[15]); MULADD(at[6], at[14]); MULADD(at[7], at[13]); MULADD(at[8], at[12]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[0], at[21]); MULADD(at[1], at[20]); MULADD(at[2], at[19]); MULADD(at[3], at[18]); MULADD(at[4], at[17]); MULADD(at[5], at[16]); MULADD(at[6], at[15]); MULADD(at[7], at[14]); MULADD(at[8], at[13]); MULADD(at[9], at[12]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[0], at[22]); MULADD(at[1], at[21]); MULADD(at[2], at[20]); MULADD(at[3], at[19]); MULADD(at[4], at[18]); MULADD(at[5], at[17]); MULADD(at[6], at[16]); MULADD(at[7], at[15]); MULADD(at[8], at[14]); MULADD(at[9], at[13]); MULADD(at[10], at[12]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[0], at[23]); MULADD(at[1], at[22]); MULADD(at[2], at[21]); MULADD(at[3], at[20]); MULADD(at[4], at[19]); MULADD(at[5], at[18]); MULADD(at[6], at[17]); MULADD(at[7], at[16]); MULADD(at[8], at[15]); MULADD(at[9], at[14]); MULADD(at[10], at[13]); MULADD(at[11], at[12]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[1], at[23]); MULADD(at[2], at[22]); MULADD(at[3], at[21]); MULADD(at[4], at[20]); MULADD(at[5], at[19]); MULADD(at[6], at[18]); MULADD(at[7], at[17]); MULADD(at[8], at[16]); MULADD(at[9], at[15]); MULADD(at[10], at[14]); MULADD(at[11], at[13]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[2], at[23]); MULADD(at[3], at[22]); MULADD(at[4], at[21]); MULADD(at[5], at[20]); MULADD(at[6], at[19]); MULADD(at[7], at[18]); MULADD(at[8], at[17]); MULADD(at[9], at[16]); MULADD(at[10], at[15]); MULADD(at[11], at[14]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[3], at[23]); MULADD(at[4], at[22]); MULADD(at[5], at[21]); MULADD(at[6], at[20]); MULADD(at[7], at[19]); MULADD(at[8], at[18]); MULADD(at[9], at[17]); MULADD(at[10], at[16]); MULADD(at[11], at[15]); + COMBA_STORE(C->dp[14]); + /* 15 */ + COMBA_FORWARD; + MULADD(at[4], at[23]); MULADD(at[5], at[22]); MULADD(at[6], at[21]); MULADD(at[7], at[20]); MULADD(at[8], at[19]); MULADD(at[9], at[18]); MULADD(at[10], at[17]); MULADD(at[11], at[16]); + COMBA_STORE(C->dp[15]); + /* 16 */ + COMBA_FORWARD; + MULADD(at[5], at[23]); MULADD(at[6], at[22]); MULADD(at[7], at[21]); MULADD(at[8], at[20]); MULADD(at[9], at[19]); MULADD(at[10], at[18]); MULADD(at[11], at[17]); + COMBA_STORE(C->dp[16]); + /* 17 */ + COMBA_FORWARD; + MULADD(at[6], at[23]); MULADD(at[7], at[22]); MULADD(at[8], at[21]); MULADD(at[9], at[20]); MULADD(at[10], at[19]); MULADD(at[11], at[18]); + COMBA_STORE(C->dp[17]); + /* 18 */ + COMBA_FORWARD; + MULADD(at[7], at[23]); MULADD(at[8], at[22]); MULADD(at[9], at[21]); MULADD(at[10], at[20]); MULADD(at[11], at[19]); + COMBA_STORE(C->dp[18]); + /* 19 */ + COMBA_FORWARD; + MULADD(at[8], at[23]); MULADD(at[9], at[22]); MULADD(at[10], at[21]); MULADD(at[11], at[20]); + COMBA_STORE(C->dp[19]); + /* 20 */ + COMBA_FORWARD; + MULADD(at[9], at[23]); MULADD(at[10], at[22]); MULADD(at[11], at[21]); + COMBA_STORE(C->dp[20]); + /* 21 */ + COMBA_FORWARD; + MULADD(at[10], at[23]); MULADD(at[11], at[22]); + COMBA_STORE(C->dp[21]); + /* 22 */ + COMBA_FORWARD; + MULADD(at[11], at[23]); + COMBA_STORE(C->dp[22]); + COMBA_STORE2(C->dp[23]); + C->used = 24; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; + +#ifdef WOLFSSL_SMALL_STACK + XFREE(at, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return FP_OKAY; +} +#endif diff --git a/client/wolfssl/wolfcrypt/src/fp_mul_comba_17.i b/client/wolfssl/wolfcrypt/src/fp_mul_comba_17.i new file mode 100644 index 0000000..fb32055 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fp_mul_comba_17.i @@ -0,0 +1,187 @@ +/* fp_mul_comba_17.i + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef TFM_MUL17 +int fp_mul_comba17(fp_int *A, fp_int *B, fp_int *C) +{ + fp_digit c0, c1, c2; +#ifndef WOLFSSL_SMALL_STACK + fp_digit at[34]; +#else + fp_digit *at; +#endif + +#ifdef WOLFSSL_SMALL_STACK + at = (fp_digit*)XMALLOC(sizeof(fp_digit) * 34, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (at == NULL) + return FP_MEM; +#endif + + XMEMCPY(at, A->dp, 17 * sizeof(fp_digit)); + XMEMCPY(at+17, B->dp, 17 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[17]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[18]); MULADD(at[1], at[17]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[19]); MULADD(at[1], at[18]); MULADD(at[2], at[17]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[20]); MULADD(at[1], at[19]); MULADD(at[2], at[18]); MULADD(at[3], at[17]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[21]); MULADD(at[1], at[20]); MULADD(at[2], at[19]); MULADD(at[3], at[18]); MULADD(at[4], at[17]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[22]); MULADD(at[1], at[21]); MULADD(at[2], at[20]); MULADD(at[3], at[19]); MULADD(at[4], at[18]); MULADD(at[5], at[17]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[23]); MULADD(at[1], at[22]); MULADD(at[2], at[21]); MULADD(at[3], at[20]); MULADD(at[4], at[19]); MULADD(at[5], at[18]); MULADD(at[6], at[17]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[24]); MULADD(at[1], at[23]); MULADD(at[2], at[22]); MULADD(at[3], at[21]); MULADD(at[4], at[20]); MULADD(at[5], at[19]); MULADD(at[6], at[18]); MULADD(at[7], at[17]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[0], at[25]); MULADD(at[1], at[24]); MULADD(at[2], at[23]); MULADD(at[3], at[22]); MULADD(at[4], at[21]); MULADD(at[5], at[20]); MULADD(at[6], at[19]); MULADD(at[7], at[18]); MULADD(at[8], at[17]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[0], at[26]); MULADD(at[1], at[25]); MULADD(at[2], at[24]); MULADD(at[3], at[23]); MULADD(at[4], at[22]); MULADD(at[5], at[21]); MULADD(at[6], at[20]); MULADD(at[7], at[19]); MULADD(at[8], at[18]); MULADD(at[9], at[17]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[0], at[27]); MULADD(at[1], at[26]); MULADD(at[2], at[25]); MULADD(at[3], at[24]); MULADD(at[4], at[23]); MULADD(at[5], at[22]); MULADD(at[6], at[21]); MULADD(at[7], at[20]); MULADD(at[8], at[19]); MULADD(at[9], at[18]); MULADD(at[10], at[17]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[0], at[28]); MULADD(at[1], at[27]); MULADD(at[2], at[26]); MULADD(at[3], at[25]); MULADD(at[4], at[24]); MULADD(at[5], at[23]); MULADD(at[6], at[22]); MULADD(at[7], at[21]); MULADD(at[8], at[20]); MULADD(at[9], at[19]); MULADD(at[10], at[18]); MULADD(at[11], at[17]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[0], at[29]); MULADD(at[1], at[28]); MULADD(at[2], at[27]); MULADD(at[3], at[26]); MULADD(at[4], at[25]); MULADD(at[5], at[24]); MULADD(at[6], at[23]); MULADD(at[7], at[22]); MULADD(at[8], at[21]); MULADD(at[9], at[20]); MULADD(at[10], at[19]); MULADD(at[11], at[18]); MULADD(at[12], at[17]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[0], at[30]); MULADD(at[1], at[29]); MULADD(at[2], at[28]); MULADD(at[3], at[27]); MULADD(at[4], at[26]); MULADD(at[5], at[25]); MULADD(at[6], at[24]); MULADD(at[7], at[23]); MULADD(at[8], at[22]); MULADD(at[9], at[21]); MULADD(at[10], at[20]); MULADD(at[11], at[19]); MULADD(at[12], at[18]); MULADD(at[13], at[17]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[0], at[31]); MULADD(at[1], at[30]); MULADD(at[2], at[29]); MULADD(at[3], at[28]); MULADD(at[4], at[27]); MULADD(at[5], at[26]); MULADD(at[6], at[25]); MULADD(at[7], at[24]); MULADD(at[8], at[23]); MULADD(at[9], at[22]); MULADD(at[10], at[21]); MULADD(at[11], at[20]); MULADD(at[12], at[19]); MULADD(at[13], at[18]); MULADD(at[14], at[17]); + COMBA_STORE(C->dp[14]); + /* 15 */ + COMBA_FORWARD; + MULADD(at[0], at[32]); MULADD(at[1], at[31]); MULADD(at[2], at[30]); MULADD(at[3], at[29]); MULADD(at[4], at[28]); MULADD(at[5], at[27]); MULADD(at[6], at[26]); MULADD(at[7], at[25]); MULADD(at[8], at[24]); MULADD(at[9], at[23]); MULADD(at[10], at[22]); MULADD(at[11], at[21]); MULADD(at[12], at[20]); MULADD(at[13], at[19]); MULADD(at[14], at[18]); MULADD(at[15], at[17]); + COMBA_STORE(C->dp[15]); + /* 16 */ + COMBA_FORWARD; + MULADD(at[0], at[33]); MULADD(at[1], at[32]); MULADD(at[2], at[31]); MULADD(at[3], at[30]); MULADD(at[4], at[29]); MULADD(at[5], at[28]); MULADD(at[6], at[27]); MULADD(at[7], at[26]); MULADD(at[8], at[25]); MULADD(at[9], at[24]); MULADD(at[10], at[23]); MULADD(at[11], at[22]); MULADD(at[12], at[21]); MULADD(at[13], at[20]); MULADD(at[14], at[19]); MULADD(at[15], at[18]); MULADD(at[16], at[17]); + COMBA_STORE(C->dp[16]); + /* 17 */ + COMBA_FORWARD; + MULADD(at[1], at[33]); MULADD(at[2], at[32]); MULADD(at[3], at[31]); MULADD(at[4], at[30]); MULADD(at[5], at[29]); MULADD(at[6], at[28]); MULADD(at[7], at[27]); MULADD(at[8], at[26]); MULADD(at[9], at[25]); MULADD(at[10], at[24]); MULADD(at[11], at[23]); MULADD(at[12], at[22]); MULADD(at[13], at[21]); MULADD(at[14], at[20]); MULADD(at[15], at[19]); MULADD(at[16], at[18]); + COMBA_STORE(C->dp[17]); + /* 18 */ + COMBA_FORWARD; + MULADD(at[2], at[33]); MULADD(at[3], at[32]); MULADD(at[4], at[31]); MULADD(at[5], at[30]); MULADD(at[6], at[29]); MULADD(at[7], at[28]); MULADD(at[8], at[27]); MULADD(at[9], at[26]); MULADD(at[10], at[25]); MULADD(at[11], at[24]); MULADD(at[12], at[23]); MULADD(at[13], at[22]); MULADD(at[14], at[21]); MULADD(at[15], at[20]); MULADD(at[16], at[19]); + COMBA_STORE(C->dp[18]); + /* 19 */ + COMBA_FORWARD; + MULADD(at[3], at[33]); MULADD(at[4], at[32]); MULADD(at[5], at[31]); MULADD(at[6], at[30]); MULADD(at[7], at[29]); MULADD(at[8], at[28]); MULADD(at[9], at[27]); MULADD(at[10], at[26]); MULADD(at[11], at[25]); MULADD(at[12], at[24]); MULADD(at[13], at[23]); MULADD(at[14], at[22]); MULADD(at[15], at[21]); MULADD(at[16], at[20]); + COMBA_STORE(C->dp[19]); + /* 20 */ + COMBA_FORWARD; + MULADD(at[4], at[33]); MULADD(at[5], at[32]); MULADD(at[6], at[31]); MULADD(at[7], at[30]); MULADD(at[8], at[29]); MULADD(at[9], at[28]); MULADD(at[10], at[27]); MULADD(at[11], at[26]); MULADD(at[12], at[25]); MULADD(at[13], at[24]); MULADD(at[14], at[23]); MULADD(at[15], at[22]); MULADD(at[16], at[21]); + COMBA_STORE(C->dp[20]); + /* 21 */ + COMBA_FORWARD; + MULADD(at[5], at[33]); MULADD(at[6], at[32]); MULADD(at[7], at[31]); MULADD(at[8], at[30]); MULADD(at[9], at[29]); MULADD(at[10], at[28]); MULADD(at[11], at[27]); MULADD(at[12], at[26]); MULADD(at[13], at[25]); MULADD(at[14], at[24]); MULADD(at[15], at[23]); MULADD(at[16], at[22]); + COMBA_STORE(C->dp[21]); + /* 22 */ + COMBA_FORWARD; + MULADD(at[6], at[33]); MULADD(at[7], at[32]); MULADD(at[8], at[31]); MULADD(at[9], at[30]); MULADD(at[10], at[29]); MULADD(at[11], at[28]); MULADD(at[12], at[27]); MULADD(at[13], at[26]); MULADD(at[14], at[25]); MULADD(at[15], at[24]); MULADD(at[16], at[23]); + COMBA_STORE(C->dp[22]); + /* 23 */ + COMBA_FORWARD; + MULADD(at[7], at[33]); MULADD(at[8], at[32]); MULADD(at[9], at[31]); MULADD(at[10], at[30]); MULADD(at[11], at[29]); MULADD(at[12], at[28]); MULADD(at[13], at[27]); MULADD(at[14], at[26]); MULADD(at[15], at[25]); MULADD(at[16], at[24]); + COMBA_STORE(C->dp[23]); + /* 24 */ + COMBA_FORWARD; + MULADD(at[8], at[33]); MULADD(at[9], at[32]); MULADD(at[10], at[31]); MULADD(at[11], at[30]); MULADD(at[12], at[29]); MULADD(at[13], at[28]); MULADD(at[14], at[27]); MULADD(at[15], at[26]); MULADD(at[16], at[25]); + COMBA_STORE(C->dp[24]); + /* 25 */ + COMBA_FORWARD; + MULADD(at[9], at[33]); MULADD(at[10], at[32]); MULADD(at[11], at[31]); MULADD(at[12], at[30]); MULADD(at[13], at[29]); MULADD(at[14], at[28]); MULADD(at[15], at[27]); MULADD(at[16], at[26]); + COMBA_STORE(C->dp[25]); + /* 26 */ + COMBA_FORWARD; + MULADD(at[10], at[33]); MULADD(at[11], at[32]); MULADD(at[12], at[31]); MULADD(at[13], at[30]); MULADD(at[14], at[29]); MULADD(at[15], at[28]); MULADD(at[16], at[27]); + COMBA_STORE(C->dp[26]); + /* 27 */ + COMBA_FORWARD; + MULADD(at[11], at[33]); MULADD(at[12], at[32]); MULADD(at[13], at[31]); MULADD(at[14], at[30]); MULADD(at[15], at[29]); MULADD(at[16], at[28]); + COMBA_STORE(C->dp[27]); + /* 28 */ + COMBA_FORWARD; + MULADD(at[12], at[33]); MULADD(at[13], at[32]); MULADD(at[14], at[31]); MULADD(at[15], at[30]); MULADD(at[16], at[29]); + COMBA_STORE(C->dp[28]); + /* 29 */ + COMBA_FORWARD; + MULADD(at[13], at[33]); MULADD(at[14], at[32]); MULADD(at[15], at[31]); MULADD(at[16], at[30]); + COMBA_STORE(C->dp[29]); + /* 30 */ + COMBA_FORWARD; + MULADD(at[14], at[33]); MULADD(at[15], at[32]); MULADD(at[16], at[31]); + COMBA_STORE(C->dp[30]); + /* 31 */ + COMBA_FORWARD; + MULADD(at[15], at[33]); MULADD(at[16], at[32]); + COMBA_STORE(C->dp[31]); + /* 32 */ + COMBA_FORWARD; + MULADD(at[16], at[33]); + COMBA_STORE(C->dp[32]); + COMBA_STORE2(C->dp[33]); + C->used = 34; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; + +#ifdef WOLFSSL_SMALL_STACK + XFREE(at, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return FP_OKAY; +} +#endif diff --git a/client/wolfssl/wolfcrypt/src/fp_mul_comba_20.i b/client/wolfssl/wolfcrypt/src/fp_mul_comba_20.i new file mode 100644 index 0000000..372f51f --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fp_mul_comba_20.i @@ -0,0 +1,210 @@ +/* fp_mul_comba_20.i + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef TFM_MUL20 +int fp_mul_comba20(fp_int *A, fp_int *B, fp_int *C) +{ + fp_digit c0, c1, c2; +#ifndef WOLFSSL_SMALL_STACK + fp_digit at[40]; +#else + fp_digit *at; +#endif + +#ifdef WOLFSSL_SMALL_STACK + at = (fp_digit*)XMALLOC(sizeof(fp_digit) * 40, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (at == NULL) + return FP_MEM; +#endif + + XMEMCPY(at, A->dp, 20 * sizeof(fp_digit)); + XMEMCPY(at+20, B->dp, 20 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[20]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[21]); MULADD(at[1], at[20]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[22]); MULADD(at[1], at[21]); MULADD(at[2], at[20]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[23]); MULADD(at[1], at[22]); MULADD(at[2], at[21]); MULADD(at[3], at[20]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[24]); MULADD(at[1], at[23]); MULADD(at[2], at[22]); MULADD(at[3], at[21]); MULADD(at[4], at[20]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[25]); MULADD(at[1], at[24]); MULADD(at[2], at[23]); MULADD(at[3], at[22]); MULADD(at[4], at[21]); MULADD(at[5], at[20]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[26]); MULADD(at[1], at[25]); MULADD(at[2], at[24]); MULADD(at[3], at[23]); MULADD(at[4], at[22]); MULADD(at[5], at[21]); MULADD(at[6], at[20]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[27]); MULADD(at[1], at[26]); MULADD(at[2], at[25]); MULADD(at[3], at[24]); MULADD(at[4], at[23]); MULADD(at[5], at[22]); MULADD(at[6], at[21]); MULADD(at[7], at[20]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[0], at[28]); MULADD(at[1], at[27]); MULADD(at[2], at[26]); MULADD(at[3], at[25]); MULADD(at[4], at[24]); MULADD(at[5], at[23]); MULADD(at[6], at[22]); MULADD(at[7], at[21]); MULADD(at[8], at[20]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[0], at[29]); MULADD(at[1], at[28]); MULADD(at[2], at[27]); MULADD(at[3], at[26]); MULADD(at[4], at[25]); MULADD(at[5], at[24]); MULADD(at[6], at[23]); MULADD(at[7], at[22]); MULADD(at[8], at[21]); MULADD(at[9], at[20]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[0], at[30]); MULADD(at[1], at[29]); MULADD(at[2], at[28]); MULADD(at[3], at[27]); MULADD(at[4], at[26]); MULADD(at[5], at[25]); MULADD(at[6], at[24]); MULADD(at[7], at[23]); MULADD(at[8], at[22]); MULADD(at[9], at[21]); MULADD(at[10], at[20]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[0], at[31]); MULADD(at[1], at[30]); MULADD(at[2], at[29]); MULADD(at[3], at[28]); MULADD(at[4], at[27]); MULADD(at[5], at[26]); MULADD(at[6], at[25]); MULADD(at[7], at[24]); MULADD(at[8], at[23]); MULADD(at[9], at[22]); MULADD(at[10], at[21]); MULADD(at[11], at[20]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[0], at[32]); MULADD(at[1], at[31]); MULADD(at[2], at[30]); MULADD(at[3], at[29]); MULADD(at[4], at[28]); MULADD(at[5], at[27]); MULADD(at[6], at[26]); MULADD(at[7], at[25]); MULADD(at[8], at[24]); MULADD(at[9], at[23]); MULADD(at[10], at[22]); MULADD(at[11], at[21]); MULADD(at[12], at[20]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[0], at[33]); MULADD(at[1], at[32]); MULADD(at[2], at[31]); MULADD(at[3], at[30]); MULADD(at[4], at[29]); MULADD(at[5], at[28]); MULADD(at[6], at[27]); MULADD(at[7], at[26]); MULADD(at[8], at[25]); MULADD(at[9], at[24]); MULADD(at[10], at[23]); MULADD(at[11], at[22]); MULADD(at[12], at[21]); MULADD(at[13], at[20]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[0], at[34]); MULADD(at[1], at[33]); MULADD(at[2], at[32]); MULADD(at[3], at[31]); MULADD(at[4], at[30]); MULADD(at[5], at[29]); MULADD(at[6], at[28]); MULADD(at[7], at[27]); MULADD(at[8], at[26]); MULADD(at[9], at[25]); MULADD(at[10], at[24]); MULADD(at[11], at[23]); MULADD(at[12], at[22]); MULADD(at[13], at[21]); MULADD(at[14], at[20]); + COMBA_STORE(C->dp[14]); + /* 15 */ + COMBA_FORWARD; + MULADD(at[0], at[35]); MULADD(at[1], at[34]); MULADD(at[2], at[33]); MULADD(at[3], at[32]); MULADD(at[4], at[31]); MULADD(at[5], at[30]); MULADD(at[6], at[29]); MULADD(at[7], at[28]); MULADD(at[8], at[27]); MULADD(at[9], at[26]); MULADD(at[10], at[25]); MULADD(at[11], at[24]); MULADD(at[12], at[23]); MULADD(at[13], at[22]); MULADD(at[14], at[21]); MULADD(at[15], at[20]); + COMBA_STORE(C->dp[15]); + /* 16 */ + COMBA_FORWARD; + MULADD(at[0], at[36]); MULADD(at[1], at[35]); MULADD(at[2], at[34]); MULADD(at[3], at[33]); MULADD(at[4], at[32]); MULADD(at[5], at[31]); MULADD(at[6], at[30]); MULADD(at[7], at[29]); MULADD(at[8], at[28]); MULADD(at[9], at[27]); MULADD(at[10], at[26]); MULADD(at[11], at[25]); MULADD(at[12], at[24]); MULADD(at[13], at[23]); MULADD(at[14], at[22]); MULADD(at[15], at[21]); MULADD(at[16], at[20]); + COMBA_STORE(C->dp[16]); + /* 17 */ + COMBA_FORWARD; + MULADD(at[0], at[37]); MULADD(at[1], at[36]); MULADD(at[2], at[35]); MULADD(at[3], at[34]); MULADD(at[4], at[33]); MULADD(at[5], at[32]); MULADD(at[6], at[31]); MULADD(at[7], at[30]); MULADD(at[8], at[29]); MULADD(at[9], at[28]); MULADD(at[10], at[27]); MULADD(at[11], at[26]); MULADD(at[12], at[25]); MULADD(at[13], at[24]); MULADD(at[14], at[23]); MULADD(at[15], at[22]); MULADD(at[16], at[21]); MULADD(at[17], at[20]); + COMBA_STORE(C->dp[17]); + /* 18 */ + COMBA_FORWARD; + MULADD(at[0], at[38]); MULADD(at[1], at[37]); MULADD(at[2], at[36]); MULADD(at[3], at[35]); MULADD(at[4], at[34]); MULADD(at[5], at[33]); MULADD(at[6], at[32]); MULADD(at[7], at[31]); MULADD(at[8], at[30]); MULADD(at[9], at[29]); MULADD(at[10], at[28]); MULADD(at[11], at[27]); MULADD(at[12], at[26]); MULADD(at[13], at[25]); MULADD(at[14], at[24]); MULADD(at[15], at[23]); MULADD(at[16], at[22]); MULADD(at[17], at[21]); MULADD(at[18], at[20]); + COMBA_STORE(C->dp[18]); + /* 19 */ + COMBA_FORWARD; + MULADD(at[0], at[39]); MULADD(at[1], at[38]); MULADD(at[2], at[37]); MULADD(at[3], at[36]); MULADD(at[4], at[35]); MULADD(at[5], at[34]); MULADD(at[6], at[33]); MULADD(at[7], at[32]); MULADD(at[8], at[31]); MULADD(at[9], at[30]); MULADD(at[10], at[29]); MULADD(at[11], at[28]); MULADD(at[12], at[27]); MULADD(at[13], at[26]); MULADD(at[14], at[25]); MULADD(at[15], at[24]); MULADD(at[16], at[23]); MULADD(at[17], at[22]); MULADD(at[18], at[21]); MULADD(at[19], at[20]); + COMBA_STORE(C->dp[19]); + /* 20 */ + COMBA_FORWARD; + MULADD(at[1], at[39]); MULADD(at[2], at[38]); MULADD(at[3], at[37]); MULADD(at[4], at[36]); MULADD(at[5], at[35]); MULADD(at[6], at[34]); MULADD(at[7], at[33]); MULADD(at[8], at[32]); MULADD(at[9], at[31]); MULADD(at[10], at[30]); MULADD(at[11], at[29]); MULADD(at[12], at[28]); MULADD(at[13], at[27]); MULADD(at[14], at[26]); MULADD(at[15], at[25]); MULADD(at[16], at[24]); MULADD(at[17], at[23]); MULADD(at[18], at[22]); MULADD(at[19], at[21]); + COMBA_STORE(C->dp[20]); + /* 21 */ + COMBA_FORWARD; + MULADD(at[2], at[39]); MULADD(at[3], at[38]); MULADD(at[4], at[37]); MULADD(at[5], at[36]); MULADD(at[6], at[35]); MULADD(at[7], at[34]); MULADD(at[8], at[33]); MULADD(at[9], at[32]); MULADD(at[10], at[31]); MULADD(at[11], at[30]); MULADD(at[12], at[29]); MULADD(at[13], at[28]); MULADD(at[14], at[27]); MULADD(at[15], at[26]); MULADD(at[16], at[25]); MULADD(at[17], at[24]); MULADD(at[18], at[23]); MULADD(at[19], at[22]); + COMBA_STORE(C->dp[21]); + /* 22 */ + COMBA_FORWARD; + MULADD(at[3], at[39]); MULADD(at[4], at[38]); MULADD(at[5], at[37]); MULADD(at[6], at[36]); MULADD(at[7], at[35]); MULADD(at[8], at[34]); MULADD(at[9], at[33]); MULADD(at[10], at[32]); MULADD(at[11], at[31]); MULADD(at[12], at[30]); MULADD(at[13], at[29]); MULADD(at[14], at[28]); MULADD(at[15], at[27]); MULADD(at[16], at[26]); MULADD(at[17], at[25]); MULADD(at[18], at[24]); MULADD(at[19], at[23]); + COMBA_STORE(C->dp[22]); + /* 23 */ + COMBA_FORWARD; + MULADD(at[4], at[39]); MULADD(at[5], at[38]); MULADD(at[6], at[37]); MULADD(at[7], at[36]); MULADD(at[8], at[35]); MULADD(at[9], at[34]); MULADD(at[10], at[33]); MULADD(at[11], at[32]); MULADD(at[12], at[31]); MULADD(at[13], at[30]); MULADD(at[14], at[29]); MULADD(at[15], at[28]); MULADD(at[16], at[27]); MULADD(at[17], at[26]); MULADD(at[18], at[25]); MULADD(at[19], at[24]); + COMBA_STORE(C->dp[23]); + /* 24 */ + COMBA_FORWARD; + MULADD(at[5], at[39]); MULADD(at[6], at[38]); MULADD(at[7], at[37]); MULADD(at[8], at[36]); MULADD(at[9], at[35]); MULADD(at[10], at[34]); MULADD(at[11], at[33]); MULADD(at[12], at[32]); MULADD(at[13], at[31]); MULADD(at[14], at[30]); MULADD(at[15], at[29]); MULADD(at[16], at[28]); MULADD(at[17], at[27]); MULADD(at[18], at[26]); MULADD(at[19], at[25]); + COMBA_STORE(C->dp[24]); + /* 25 */ + COMBA_FORWARD; + MULADD(at[6], at[39]); MULADD(at[7], at[38]); MULADD(at[8], at[37]); MULADD(at[9], at[36]); MULADD(at[10], at[35]); MULADD(at[11], at[34]); MULADD(at[12], at[33]); MULADD(at[13], at[32]); MULADD(at[14], at[31]); MULADD(at[15], at[30]); MULADD(at[16], at[29]); MULADD(at[17], at[28]); MULADD(at[18], at[27]); MULADD(at[19], at[26]); + COMBA_STORE(C->dp[25]); + /* 26 */ + COMBA_FORWARD; + MULADD(at[7], at[39]); MULADD(at[8], at[38]); MULADD(at[9], at[37]); MULADD(at[10], at[36]); MULADD(at[11], at[35]); MULADD(at[12], at[34]); MULADD(at[13], at[33]); MULADD(at[14], at[32]); MULADD(at[15], at[31]); MULADD(at[16], at[30]); MULADD(at[17], at[29]); MULADD(at[18], at[28]); MULADD(at[19], at[27]); + COMBA_STORE(C->dp[26]); + /* 27 */ + COMBA_FORWARD; + MULADD(at[8], at[39]); MULADD(at[9], at[38]); MULADD(at[10], at[37]); MULADD(at[11], at[36]); MULADD(at[12], at[35]); MULADD(at[13], at[34]); MULADD(at[14], at[33]); MULADD(at[15], at[32]); MULADD(at[16], at[31]); MULADD(at[17], at[30]); MULADD(at[18], at[29]); MULADD(at[19], at[28]); + COMBA_STORE(C->dp[27]); + /* 28 */ + COMBA_FORWARD; + MULADD(at[9], at[39]); MULADD(at[10], at[38]); MULADD(at[11], at[37]); MULADD(at[12], at[36]); MULADD(at[13], at[35]); MULADD(at[14], at[34]); MULADD(at[15], at[33]); MULADD(at[16], at[32]); MULADD(at[17], at[31]); MULADD(at[18], at[30]); MULADD(at[19], at[29]); + COMBA_STORE(C->dp[28]); + /* 29 */ + COMBA_FORWARD; + MULADD(at[10], at[39]); MULADD(at[11], at[38]); MULADD(at[12], at[37]); MULADD(at[13], at[36]); MULADD(at[14], at[35]); MULADD(at[15], at[34]); MULADD(at[16], at[33]); MULADD(at[17], at[32]); MULADD(at[18], at[31]); MULADD(at[19], at[30]); + COMBA_STORE(C->dp[29]); + /* 30 */ + COMBA_FORWARD; + MULADD(at[11], at[39]); MULADD(at[12], at[38]); MULADD(at[13], at[37]); MULADD(at[14], at[36]); MULADD(at[15], at[35]); MULADD(at[16], at[34]); MULADD(at[17], at[33]); MULADD(at[18], at[32]); MULADD(at[19], at[31]); + COMBA_STORE(C->dp[30]); + /* 31 */ + COMBA_FORWARD; + MULADD(at[12], at[39]); MULADD(at[13], at[38]); MULADD(at[14], at[37]); MULADD(at[15], at[36]); MULADD(at[16], at[35]); MULADD(at[17], at[34]); MULADD(at[18], at[33]); MULADD(at[19], at[32]); + COMBA_STORE(C->dp[31]); + /* 32 */ + COMBA_FORWARD; + MULADD(at[13], at[39]); MULADD(at[14], at[38]); MULADD(at[15], at[37]); MULADD(at[16], at[36]); MULADD(at[17], at[35]); MULADD(at[18], at[34]); MULADD(at[19], at[33]); + COMBA_STORE(C->dp[32]); + /* 33 */ + COMBA_FORWARD; + MULADD(at[14], at[39]); MULADD(at[15], at[38]); MULADD(at[16], at[37]); MULADD(at[17], at[36]); MULADD(at[18], at[35]); MULADD(at[19], at[34]); + COMBA_STORE(C->dp[33]); + /* 34 */ + COMBA_FORWARD; + MULADD(at[15], at[39]); MULADD(at[16], at[38]); MULADD(at[17], at[37]); MULADD(at[18], at[36]); MULADD(at[19], at[35]); + COMBA_STORE(C->dp[34]); + /* 35 */ + COMBA_FORWARD; + MULADD(at[16], at[39]); MULADD(at[17], at[38]); MULADD(at[18], at[37]); MULADD(at[19], at[36]); + COMBA_STORE(C->dp[35]); + /* 36 */ + COMBA_FORWARD; + MULADD(at[17], at[39]); MULADD(at[18], at[38]); MULADD(at[19], at[37]); + COMBA_STORE(C->dp[36]); + /* 37 */ + COMBA_FORWARD; + MULADD(at[18], at[39]); MULADD(at[19], at[38]); + COMBA_STORE(C->dp[37]); + /* 38 */ + COMBA_FORWARD; + MULADD(at[19], at[39]); + COMBA_STORE(C->dp[38]); + COMBA_STORE2(C->dp[39]); + C->used = 40; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; + +#ifdef WOLFSSL_SMALL_STACK + XFREE(at, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return FP_OKAY; +} +#endif diff --git a/client/wolfssl/wolfcrypt/src/fp_mul_comba_24.i b/client/wolfssl/wolfcrypt/src/fp_mul_comba_24.i new file mode 100644 index 0000000..17705f7 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fp_mul_comba_24.i @@ -0,0 +1,243 @@ +/* fp_mul_comba_24.i + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef TFM_MUL24 +int fp_mul_comba24(fp_int *A, fp_int *B, fp_int *C) +{ + fp_digit c0, c1, c2; +#ifndef WOLFSSL_SMALL_STACK + fp_digit at[48]; +#else + fp_digit *at; +#endif + +#ifdef WOLFSSL_SMALL_STACK + at = (fp_digit*)XMALLOC(sizeof(fp_digit) * 48, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (at == NULL) + return FP_MEM; +#endif + + XMEMCPY(at, A->dp, 24 * sizeof(fp_digit)); + XMEMCPY(at+24, B->dp, 24 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[24]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[25]); MULADD(at[1], at[24]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[26]); MULADD(at[1], at[25]); MULADD(at[2], at[24]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[27]); MULADD(at[1], at[26]); MULADD(at[2], at[25]); MULADD(at[3], at[24]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[28]); MULADD(at[1], at[27]); MULADD(at[2], at[26]); MULADD(at[3], at[25]); MULADD(at[4], at[24]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[29]); MULADD(at[1], at[28]); MULADD(at[2], at[27]); MULADD(at[3], at[26]); MULADD(at[4], at[25]); MULADD(at[5], at[24]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[30]); MULADD(at[1], at[29]); MULADD(at[2], at[28]); MULADD(at[3], at[27]); MULADD(at[4], at[26]); MULADD(at[5], at[25]); MULADD(at[6], at[24]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[31]); MULADD(at[1], at[30]); MULADD(at[2], at[29]); MULADD(at[3], at[28]); MULADD(at[4], at[27]); MULADD(at[5], at[26]); MULADD(at[6], at[25]); MULADD(at[7], at[24]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[0], at[32]); MULADD(at[1], at[31]); MULADD(at[2], at[30]); MULADD(at[3], at[29]); MULADD(at[4], at[28]); MULADD(at[5], at[27]); MULADD(at[6], at[26]); MULADD(at[7], at[25]); MULADD(at[8], at[24]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[0], at[33]); MULADD(at[1], at[32]); MULADD(at[2], at[31]); MULADD(at[3], at[30]); MULADD(at[4], at[29]); MULADD(at[5], at[28]); MULADD(at[6], at[27]); MULADD(at[7], at[26]); MULADD(at[8], at[25]); MULADD(at[9], at[24]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[0], at[34]); MULADD(at[1], at[33]); MULADD(at[2], at[32]); MULADD(at[3], at[31]); MULADD(at[4], at[30]); MULADD(at[5], at[29]); MULADD(at[6], at[28]); MULADD(at[7], at[27]); MULADD(at[8], at[26]); MULADD(at[9], at[25]); MULADD(at[10], at[24]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[0], at[35]); MULADD(at[1], at[34]); MULADD(at[2], at[33]); MULADD(at[3], at[32]); MULADD(at[4], at[31]); MULADD(at[5], at[30]); MULADD(at[6], at[29]); MULADD(at[7], at[28]); MULADD(at[8], at[27]); MULADD(at[9], at[26]); MULADD(at[10], at[25]); MULADD(at[11], at[24]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[0], at[36]); MULADD(at[1], at[35]); MULADD(at[2], at[34]); MULADD(at[3], at[33]); MULADD(at[4], at[32]); MULADD(at[5], at[31]); MULADD(at[6], at[30]); MULADD(at[7], at[29]); MULADD(at[8], at[28]); MULADD(at[9], at[27]); MULADD(at[10], at[26]); MULADD(at[11], at[25]); MULADD(at[12], at[24]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[0], at[37]); MULADD(at[1], at[36]); MULADD(at[2], at[35]); MULADD(at[3], at[34]); MULADD(at[4], at[33]); MULADD(at[5], at[32]); MULADD(at[6], at[31]); MULADD(at[7], at[30]); MULADD(at[8], at[29]); MULADD(at[9], at[28]); MULADD(at[10], at[27]); MULADD(at[11], at[26]); MULADD(at[12], at[25]); MULADD(at[13], at[24]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[0], at[38]); MULADD(at[1], at[37]); MULADD(at[2], at[36]); MULADD(at[3], at[35]); MULADD(at[4], at[34]); MULADD(at[5], at[33]); MULADD(at[6], at[32]); MULADD(at[7], at[31]); MULADD(at[8], at[30]); MULADD(at[9], at[29]); MULADD(at[10], at[28]); MULADD(at[11], at[27]); MULADD(at[12], at[26]); MULADD(at[13], at[25]); MULADD(at[14], at[24]); + COMBA_STORE(C->dp[14]); + /* 15 */ + COMBA_FORWARD; + MULADD(at[0], at[39]); MULADD(at[1], at[38]); MULADD(at[2], at[37]); MULADD(at[3], at[36]); MULADD(at[4], at[35]); MULADD(at[5], at[34]); MULADD(at[6], at[33]); MULADD(at[7], at[32]); MULADD(at[8], at[31]); MULADD(at[9], at[30]); MULADD(at[10], at[29]); MULADD(at[11], at[28]); MULADD(at[12], at[27]); MULADD(at[13], at[26]); MULADD(at[14], at[25]); MULADD(at[15], at[24]); + COMBA_STORE(C->dp[15]); + /* 16 */ + COMBA_FORWARD; + MULADD(at[0], at[40]); MULADD(at[1], at[39]); MULADD(at[2], at[38]); MULADD(at[3], at[37]); MULADD(at[4], at[36]); MULADD(at[5], at[35]); MULADD(at[6], at[34]); MULADD(at[7], at[33]); MULADD(at[8], at[32]); MULADD(at[9], at[31]); MULADD(at[10], at[30]); MULADD(at[11], at[29]); MULADD(at[12], at[28]); MULADD(at[13], at[27]); MULADD(at[14], at[26]); MULADD(at[15], at[25]); MULADD(at[16], at[24]); + COMBA_STORE(C->dp[16]); + /* 17 */ + COMBA_FORWARD; + MULADD(at[0], at[41]); MULADD(at[1], at[40]); MULADD(at[2], at[39]); MULADD(at[3], at[38]); MULADD(at[4], at[37]); MULADD(at[5], at[36]); MULADD(at[6], at[35]); MULADD(at[7], at[34]); MULADD(at[8], at[33]); MULADD(at[9], at[32]); MULADD(at[10], at[31]); MULADD(at[11], at[30]); MULADD(at[12], at[29]); MULADD(at[13], at[28]); MULADD(at[14], at[27]); MULADD(at[15], at[26]); MULADD(at[16], at[25]); MULADD(at[17], at[24]); + COMBA_STORE(C->dp[17]); + /* 18 */ + COMBA_FORWARD; + MULADD(at[0], at[42]); MULADD(at[1], at[41]); MULADD(at[2], at[40]); MULADD(at[3], at[39]); MULADD(at[4], at[38]); MULADD(at[5], at[37]); MULADD(at[6], at[36]); MULADD(at[7], at[35]); MULADD(at[8], at[34]); MULADD(at[9], at[33]); MULADD(at[10], at[32]); MULADD(at[11], at[31]); MULADD(at[12], at[30]); MULADD(at[13], at[29]); MULADD(at[14], at[28]); MULADD(at[15], at[27]); MULADD(at[16], at[26]); MULADD(at[17], at[25]); MULADD(at[18], at[24]); + COMBA_STORE(C->dp[18]); + /* 19 */ + COMBA_FORWARD; + MULADD(at[0], at[43]); MULADD(at[1], at[42]); MULADD(at[2], at[41]); MULADD(at[3], at[40]); MULADD(at[4], at[39]); MULADD(at[5], at[38]); MULADD(at[6], at[37]); MULADD(at[7], at[36]); MULADD(at[8], at[35]); MULADD(at[9], at[34]); MULADD(at[10], at[33]); MULADD(at[11], at[32]); MULADD(at[12], at[31]); MULADD(at[13], at[30]); MULADD(at[14], at[29]); MULADD(at[15], at[28]); MULADD(at[16], at[27]); MULADD(at[17], at[26]); MULADD(at[18], at[25]); MULADD(at[19], at[24]); + COMBA_STORE(C->dp[19]); + /* 20 */ + COMBA_FORWARD; + MULADD(at[0], at[44]); MULADD(at[1], at[43]); MULADD(at[2], at[42]); MULADD(at[3], at[41]); MULADD(at[4], at[40]); MULADD(at[5], at[39]); MULADD(at[6], at[38]); MULADD(at[7], at[37]); MULADD(at[8], at[36]); MULADD(at[9], at[35]); MULADD(at[10], at[34]); MULADD(at[11], at[33]); MULADD(at[12], at[32]); MULADD(at[13], at[31]); MULADD(at[14], at[30]); MULADD(at[15], at[29]); MULADD(at[16], at[28]); MULADD(at[17], at[27]); MULADD(at[18], at[26]); MULADD(at[19], at[25]); MULADD(at[20], at[24]); + COMBA_STORE(C->dp[20]); + /* 21 */ + COMBA_FORWARD; + MULADD(at[0], at[45]); MULADD(at[1], at[44]); MULADD(at[2], at[43]); MULADD(at[3], at[42]); MULADD(at[4], at[41]); MULADD(at[5], at[40]); MULADD(at[6], at[39]); MULADD(at[7], at[38]); MULADD(at[8], at[37]); MULADD(at[9], at[36]); MULADD(at[10], at[35]); MULADD(at[11], at[34]); MULADD(at[12], at[33]); MULADD(at[13], at[32]); MULADD(at[14], at[31]); MULADD(at[15], at[30]); MULADD(at[16], at[29]); MULADD(at[17], at[28]); MULADD(at[18], at[27]); MULADD(at[19], at[26]); MULADD(at[20], at[25]); MULADD(at[21], at[24]); + COMBA_STORE(C->dp[21]); + /* 22 */ + COMBA_FORWARD; + MULADD(at[0], at[46]); MULADD(at[1], at[45]); MULADD(at[2], at[44]); MULADD(at[3], at[43]); MULADD(at[4], at[42]); MULADD(at[5], at[41]); MULADD(at[6], at[40]); MULADD(at[7], at[39]); MULADD(at[8], at[38]); MULADD(at[9], at[37]); MULADD(at[10], at[36]); MULADD(at[11], at[35]); MULADD(at[12], at[34]); MULADD(at[13], at[33]); MULADD(at[14], at[32]); MULADD(at[15], at[31]); MULADD(at[16], at[30]); MULADD(at[17], at[29]); MULADD(at[18], at[28]); MULADD(at[19], at[27]); MULADD(at[20], at[26]); MULADD(at[21], at[25]); MULADD(at[22], at[24]); + COMBA_STORE(C->dp[22]); + /* 23 */ + COMBA_FORWARD; + MULADD(at[0], at[47]); MULADD(at[1], at[46]); MULADD(at[2], at[45]); MULADD(at[3], at[44]); MULADD(at[4], at[43]); MULADD(at[5], at[42]); MULADD(at[6], at[41]); MULADD(at[7], at[40]); MULADD(at[8], at[39]); MULADD(at[9], at[38]); MULADD(at[10], at[37]); MULADD(at[11], at[36]); MULADD(at[12], at[35]); MULADD(at[13], at[34]); MULADD(at[14], at[33]); MULADD(at[15], at[32]); MULADD(at[16], at[31]); MULADD(at[17], at[30]); MULADD(at[18], at[29]); MULADD(at[19], at[28]); MULADD(at[20], at[27]); MULADD(at[21], at[26]); MULADD(at[22], at[25]); MULADD(at[23], at[24]); + COMBA_STORE(C->dp[23]); + /* 24 */ + COMBA_FORWARD; + MULADD(at[1], at[47]); MULADD(at[2], at[46]); MULADD(at[3], at[45]); MULADD(at[4], at[44]); MULADD(at[5], at[43]); MULADD(at[6], at[42]); MULADD(at[7], at[41]); MULADD(at[8], at[40]); MULADD(at[9], at[39]); MULADD(at[10], at[38]); MULADD(at[11], at[37]); MULADD(at[12], at[36]); MULADD(at[13], at[35]); MULADD(at[14], at[34]); MULADD(at[15], at[33]); MULADD(at[16], at[32]); MULADD(at[17], at[31]); MULADD(at[18], at[30]); MULADD(at[19], at[29]); MULADD(at[20], at[28]); MULADD(at[21], at[27]); MULADD(at[22], at[26]); MULADD(at[23], at[25]); + COMBA_STORE(C->dp[24]); + /* 25 */ + COMBA_FORWARD; + MULADD(at[2], at[47]); MULADD(at[3], at[46]); MULADD(at[4], at[45]); MULADD(at[5], at[44]); MULADD(at[6], at[43]); MULADD(at[7], at[42]); MULADD(at[8], at[41]); MULADD(at[9], at[40]); MULADD(at[10], at[39]); MULADD(at[11], at[38]); MULADD(at[12], at[37]); MULADD(at[13], at[36]); MULADD(at[14], at[35]); MULADD(at[15], at[34]); MULADD(at[16], at[33]); MULADD(at[17], at[32]); MULADD(at[18], at[31]); MULADD(at[19], at[30]); MULADD(at[20], at[29]); MULADD(at[21], at[28]); MULADD(at[22], at[27]); MULADD(at[23], at[26]); + COMBA_STORE(C->dp[25]); + /* 26 */ + COMBA_FORWARD; + MULADD(at[3], at[47]); MULADD(at[4], at[46]); MULADD(at[5], at[45]); MULADD(at[6], at[44]); MULADD(at[7], at[43]); MULADD(at[8], at[42]); MULADD(at[9], at[41]); MULADD(at[10], at[40]); MULADD(at[11], at[39]); MULADD(at[12], at[38]); MULADD(at[13], at[37]); MULADD(at[14], at[36]); MULADD(at[15], at[35]); MULADD(at[16], at[34]); MULADD(at[17], at[33]); MULADD(at[18], at[32]); MULADD(at[19], at[31]); MULADD(at[20], at[30]); MULADD(at[21], at[29]); MULADD(at[22], at[28]); MULADD(at[23], at[27]); + COMBA_STORE(C->dp[26]); + /* 27 */ + COMBA_FORWARD; + MULADD(at[4], at[47]); MULADD(at[5], at[46]); MULADD(at[6], at[45]); MULADD(at[7], at[44]); MULADD(at[8], at[43]); MULADD(at[9], at[42]); MULADD(at[10], at[41]); MULADD(at[11], at[40]); MULADD(at[12], at[39]); MULADD(at[13], at[38]); MULADD(at[14], at[37]); MULADD(at[15], at[36]); MULADD(at[16], at[35]); MULADD(at[17], at[34]); MULADD(at[18], at[33]); MULADD(at[19], at[32]); MULADD(at[20], at[31]); MULADD(at[21], at[30]); MULADD(at[22], at[29]); MULADD(at[23], at[28]); + COMBA_STORE(C->dp[27]); + /* 28 */ + COMBA_FORWARD; + MULADD(at[5], at[47]); MULADD(at[6], at[46]); MULADD(at[7], at[45]); MULADD(at[8], at[44]); MULADD(at[9], at[43]); MULADD(at[10], at[42]); MULADD(at[11], at[41]); MULADD(at[12], at[40]); MULADD(at[13], at[39]); MULADD(at[14], at[38]); MULADD(at[15], at[37]); MULADD(at[16], at[36]); MULADD(at[17], at[35]); MULADD(at[18], at[34]); MULADD(at[19], at[33]); MULADD(at[20], at[32]); MULADD(at[21], at[31]); MULADD(at[22], at[30]); MULADD(at[23], at[29]); + COMBA_STORE(C->dp[28]); + /* 29 */ + COMBA_FORWARD; + MULADD(at[6], at[47]); MULADD(at[7], at[46]); MULADD(at[8], at[45]); MULADD(at[9], at[44]); MULADD(at[10], at[43]); MULADD(at[11], at[42]); MULADD(at[12], at[41]); MULADD(at[13], at[40]); MULADD(at[14], at[39]); MULADD(at[15], at[38]); MULADD(at[16], at[37]); MULADD(at[17], at[36]); MULADD(at[18], at[35]); MULADD(at[19], at[34]); MULADD(at[20], at[33]); MULADD(at[21], at[32]); MULADD(at[22], at[31]); MULADD(at[23], at[30]); + COMBA_STORE(C->dp[29]); + /* 30 */ + COMBA_FORWARD; + MULADD(at[7], at[47]); MULADD(at[8], at[46]); MULADD(at[9], at[45]); MULADD(at[10], at[44]); MULADD(at[11], at[43]); MULADD(at[12], at[42]); MULADD(at[13], at[41]); MULADD(at[14], at[40]); MULADD(at[15], at[39]); MULADD(at[16], at[38]); MULADD(at[17], at[37]); MULADD(at[18], at[36]); MULADD(at[19], at[35]); MULADD(at[20], at[34]); MULADD(at[21], at[33]); MULADD(at[22], at[32]); MULADD(at[23], at[31]); + COMBA_STORE(C->dp[30]); + /* 31 */ + COMBA_FORWARD; + MULADD(at[8], at[47]); MULADD(at[9], at[46]); MULADD(at[10], at[45]); MULADD(at[11], at[44]); MULADD(at[12], at[43]); MULADD(at[13], at[42]); MULADD(at[14], at[41]); MULADD(at[15], at[40]); MULADD(at[16], at[39]); MULADD(at[17], at[38]); MULADD(at[18], at[37]); MULADD(at[19], at[36]); MULADD(at[20], at[35]); MULADD(at[21], at[34]); MULADD(at[22], at[33]); MULADD(at[23], at[32]); + COMBA_STORE(C->dp[31]); + /* 32 */ + COMBA_FORWARD; + MULADD(at[9], at[47]); MULADD(at[10], at[46]); MULADD(at[11], at[45]); MULADD(at[12], at[44]); MULADD(at[13], at[43]); MULADD(at[14], at[42]); MULADD(at[15], at[41]); MULADD(at[16], at[40]); MULADD(at[17], at[39]); MULADD(at[18], at[38]); MULADD(at[19], at[37]); MULADD(at[20], at[36]); MULADD(at[21], at[35]); MULADD(at[22], at[34]); MULADD(at[23], at[33]); + COMBA_STORE(C->dp[32]); + /* 33 */ + COMBA_FORWARD; + MULADD(at[10], at[47]); MULADD(at[11], at[46]); MULADD(at[12], at[45]); MULADD(at[13], at[44]); MULADD(at[14], at[43]); MULADD(at[15], at[42]); MULADD(at[16], at[41]); MULADD(at[17], at[40]); MULADD(at[18], at[39]); MULADD(at[19], at[38]); MULADD(at[20], at[37]); MULADD(at[21], at[36]); MULADD(at[22], at[35]); MULADD(at[23], at[34]); + COMBA_STORE(C->dp[33]); + /* 34 */ + COMBA_FORWARD; + MULADD(at[11], at[47]); MULADD(at[12], at[46]); MULADD(at[13], at[45]); MULADD(at[14], at[44]); MULADD(at[15], at[43]); MULADD(at[16], at[42]); MULADD(at[17], at[41]); MULADD(at[18], at[40]); MULADD(at[19], at[39]); MULADD(at[20], at[38]); MULADD(at[21], at[37]); MULADD(at[22], at[36]); MULADD(at[23], at[35]); + COMBA_STORE(C->dp[34]); + /* 35 */ + COMBA_FORWARD; + MULADD(at[12], at[47]); MULADD(at[13], at[46]); MULADD(at[14], at[45]); MULADD(at[15], at[44]); MULADD(at[16], at[43]); MULADD(at[17], at[42]); MULADD(at[18], at[41]); MULADD(at[19], at[40]); MULADD(at[20], at[39]); MULADD(at[21], at[38]); MULADD(at[22], at[37]); MULADD(at[23], at[36]); + COMBA_STORE(C->dp[35]); + /* 36 */ + COMBA_FORWARD; + MULADD(at[13], at[47]); MULADD(at[14], at[46]); MULADD(at[15], at[45]); MULADD(at[16], at[44]); MULADD(at[17], at[43]); MULADD(at[18], at[42]); MULADD(at[19], at[41]); MULADD(at[20], at[40]); MULADD(at[21], at[39]); MULADD(at[22], at[38]); MULADD(at[23], at[37]); + COMBA_STORE(C->dp[36]); + /* 37 */ + COMBA_FORWARD; + MULADD(at[14], at[47]); MULADD(at[15], at[46]); MULADD(at[16], at[45]); MULADD(at[17], at[44]); MULADD(at[18], at[43]); MULADD(at[19], at[42]); MULADD(at[20], at[41]); MULADD(at[21], at[40]); MULADD(at[22], at[39]); MULADD(at[23], at[38]); + COMBA_STORE(C->dp[37]); + /* 38 */ + COMBA_FORWARD; + MULADD(at[15], at[47]); MULADD(at[16], at[46]); MULADD(at[17], at[45]); MULADD(at[18], at[44]); MULADD(at[19], at[43]); MULADD(at[20], at[42]); MULADD(at[21], at[41]); MULADD(at[22], at[40]); MULADD(at[23], at[39]); + COMBA_STORE(C->dp[38]); + /* 39 */ + COMBA_FORWARD; + MULADD(at[16], at[47]); MULADD(at[17], at[46]); MULADD(at[18], at[45]); MULADD(at[19], at[44]); MULADD(at[20], at[43]); MULADD(at[21], at[42]); MULADD(at[22], at[41]); MULADD(at[23], at[40]); + COMBA_STORE(C->dp[39]); + /* 40 */ + COMBA_FORWARD; + MULADD(at[17], at[47]); MULADD(at[18], at[46]); MULADD(at[19], at[45]); MULADD(at[20], at[44]); MULADD(at[21], at[43]); MULADD(at[22], at[42]); MULADD(at[23], at[41]); + COMBA_STORE(C->dp[40]); + /* 41 */ + COMBA_FORWARD; + MULADD(at[18], at[47]); MULADD(at[19], at[46]); MULADD(at[20], at[45]); MULADD(at[21], at[44]); MULADD(at[22], at[43]); MULADD(at[23], at[42]); + COMBA_STORE(C->dp[41]); + /* 42 */ + COMBA_FORWARD; + MULADD(at[19], at[47]); MULADD(at[20], at[46]); MULADD(at[21], at[45]); MULADD(at[22], at[44]); MULADD(at[23], at[43]); + COMBA_STORE(C->dp[42]); + /* 43 */ + COMBA_FORWARD; + MULADD(at[20], at[47]); MULADD(at[21], at[46]); MULADD(at[22], at[45]); MULADD(at[23], at[44]); + COMBA_STORE(C->dp[43]); + /* 44 */ + COMBA_FORWARD; + MULADD(at[21], at[47]); MULADD(at[22], at[46]); MULADD(at[23], at[45]); + COMBA_STORE(C->dp[44]); + /* 45 */ + COMBA_FORWARD; + MULADD(at[22], at[47]); MULADD(at[23], at[46]); + COMBA_STORE(C->dp[45]); + /* 46 */ + COMBA_FORWARD; + MULADD(at[23], at[47]); + COMBA_STORE(C->dp[46]); + COMBA_STORE2(C->dp[47]); + C->used = 48; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; + +#ifdef WOLFSSL_SMALL_STACK + XFREE(at, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return FP_OKAY; +} +#endif diff --git a/client/wolfssl/wolfcrypt/src/fp_mul_comba_28.i b/client/wolfssl/wolfcrypt/src/fp_mul_comba_28.i new file mode 100644 index 0000000..594db74 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fp_mul_comba_28.i @@ -0,0 +1,275 @@ +/* fp_mul_comba_28.i + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef TFM_MUL28 +int fp_mul_comba28(fp_int *A, fp_int *B, fp_int *C) +{ + fp_digit c0, c1, c2; +#ifndef WOLFSSL_SMALL_STACK + fp_digit at[56]; +#else + fp_digit *at; +#endif + +#ifdef WOLFSSL_SMALL_STACK + at = (fp_digit*)XMALLOC(sizeof(fp_digit) * 56, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (at == NULL) + return FP_MEM; +#endif + + XMEMCPY(at, A->dp, 28 * sizeof(fp_digit)); + XMEMCPY(at+28, B->dp, 28 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[28]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[29]); MULADD(at[1], at[28]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[30]); MULADD(at[1], at[29]); MULADD(at[2], at[28]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[31]); MULADD(at[1], at[30]); MULADD(at[2], at[29]); MULADD(at[3], at[28]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[32]); MULADD(at[1], at[31]); MULADD(at[2], at[30]); MULADD(at[3], at[29]); MULADD(at[4], at[28]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[33]); MULADD(at[1], at[32]); MULADD(at[2], at[31]); MULADD(at[3], at[30]); MULADD(at[4], at[29]); MULADD(at[5], at[28]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[34]); MULADD(at[1], at[33]); MULADD(at[2], at[32]); MULADD(at[3], at[31]); MULADD(at[4], at[30]); MULADD(at[5], at[29]); MULADD(at[6], at[28]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[35]); MULADD(at[1], at[34]); MULADD(at[2], at[33]); MULADD(at[3], at[32]); MULADD(at[4], at[31]); MULADD(at[5], at[30]); MULADD(at[6], at[29]); MULADD(at[7], at[28]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[0], at[36]); MULADD(at[1], at[35]); MULADD(at[2], at[34]); MULADD(at[3], at[33]); MULADD(at[4], at[32]); MULADD(at[5], at[31]); MULADD(at[6], at[30]); MULADD(at[7], at[29]); MULADD(at[8], at[28]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[0], at[37]); MULADD(at[1], at[36]); MULADD(at[2], at[35]); MULADD(at[3], at[34]); MULADD(at[4], at[33]); MULADD(at[5], at[32]); MULADD(at[6], at[31]); MULADD(at[7], at[30]); MULADD(at[8], at[29]); MULADD(at[9], at[28]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[0], at[38]); MULADD(at[1], at[37]); MULADD(at[2], at[36]); MULADD(at[3], at[35]); MULADD(at[4], at[34]); MULADD(at[5], at[33]); MULADD(at[6], at[32]); MULADD(at[7], at[31]); MULADD(at[8], at[30]); MULADD(at[9], at[29]); MULADD(at[10], at[28]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[0], at[39]); MULADD(at[1], at[38]); MULADD(at[2], at[37]); MULADD(at[3], at[36]); MULADD(at[4], at[35]); MULADD(at[5], at[34]); MULADD(at[6], at[33]); MULADD(at[7], at[32]); MULADD(at[8], at[31]); MULADD(at[9], at[30]); MULADD(at[10], at[29]); MULADD(at[11], at[28]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[0], at[40]); MULADD(at[1], at[39]); MULADD(at[2], at[38]); MULADD(at[3], at[37]); MULADD(at[4], at[36]); MULADD(at[5], at[35]); MULADD(at[6], at[34]); MULADD(at[7], at[33]); MULADD(at[8], at[32]); MULADD(at[9], at[31]); MULADD(at[10], at[30]); MULADD(at[11], at[29]); MULADD(at[12], at[28]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[0], at[41]); MULADD(at[1], at[40]); MULADD(at[2], at[39]); MULADD(at[3], at[38]); MULADD(at[4], at[37]); MULADD(at[5], at[36]); MULADD(at[6], at[35]); MULADD(at[7], at[34]); MULADD(at[8], at[33]); MULADD(at[9], at[32]); MULADD(at[10], at[31]); MULADD(at[11], at[30]); MULADD(at[12], at[29]); MULADD(at[13], at[28]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[0], at[42]); MULADD(at[1], at[41]); MULADD(at[2], at[40]); MULADD(at[3], at[39]); MULADD(at[4], at[38]); MULADD(at[5], at[37]); MULADD(at[6], at[36]); MULADD(at[7], at[35]); MULADD(at[8], at[34]); MULADD(at[9], at[33]); MULADD(at[10], at[32]); MULADD(at[11], at[31]); MULADD(at[12], at[30]); MULADD(at[13], at[29]); MULADD(at[14], at[28]); + COMBA_STORE(C->dp[14]); + /* 15 */ + COMBA_FORWARD; + MULADD(at[0], at[43]); MULADD(at[1], at[42]); MULADD(at[2], at[41]); MULADD(at[3], at[40]); MULADD(at[4], at[39]); MULADD(at[5], at[38]); MULADD(at[6], at[37]); MULADD(at[7], at[36]); MULADD(at[8], at[35]); MULADD(at[9], at[34]); MULADD(at[10], at[33]); MULADD(at[11], at[32]); MULADD(at[12], at[31]); MULADD(at[13], at[30]); MULADD(at[14], at[29]); MULADD(at[15], at[28]); + COMBA_STORE(C->dp[15]); + /* 16 */ + COMBA_FORWARD; + MULADD(at[0], at[44]); MULADD(at[1], at[43]); MULADD(at[2], at[42]); MULADD(at[3], at[41]); MULADD(at[4], at[40]); MULADD(at[5], at[39]); MULADD(at[6], at[38]); MULADD(at[7], at[37]); MULADD(at[8], at[36]); MULADD(at[9], at[35]); MULADD(at[10], at[34]); MULADD(at[11], at[33]); MULADD(at[12], at[32]); MULADD(at[13], at[31]); MULADD(at[14], at[30]); MULADD(at[15], at[29]); MULADD(at[16], at[28]); + COMBA_STORE(C->dp[16]); + /* 17 */ + COMBA_FORWARD; + MULADD(at[0], at[45]); MULADD(at[1], at[44]); MULADD(at[2], at[43]); MULADD(at[3], at[42]); MULADD(at[4], at[41]); MULADD(at[5], at[40]); MULADD(at[6], at[39]); MULADD(at[7], at[38]); MULADD(at[8], at[37]); MULADD(at[9], at[36]); MULADD(at[10], at[35]); MULADD(at[11], at[34]); MULADD(at[12], at[33]); MULADD(at[13], at[32]); MULADD(at[14], at[31]); MULADD(at[15], at[30]); MULADD(at[16], at[29]); MULADD(at[17], at[28]); + COMBA_STORE(C->dp[17]); + /* 18 */ + COMBA_FORWARD; + MULADD(at[0], at[46]); MULADD(at[1], at[45]); MULADD(at[2], at[44]); MULADD(at[3], at[43]); MULADD(at[4], at[42]); MULADD(at[5], at[41]); MULADD(at[6], at[40]); MULADD(at[7], at[39]); MULADD(at[8], at[38]); MULADD(at[9], at[37]); MULADD(at[10], at[36]); MULADD(at[11], at[35]); MULADD(at[12], at[34]); MULADD(at[13], at[33]); MULADD(at[14], at[32]); MULADD(at[15], at[31]); MULADD(at[16], at[30]); MULADD(at[17], at[29]); MULADD(at[18], at[28]); + COMBA_STORE(C->dp[18]); + /* 19 */ + COMBA_FORWARD; + MULADD(at[0], at[47]); MULADD(at[1], at[46]); MULADD(at[2], at[45]); MULADD(at[3], at[44]); MULADD(at[4], at[43]); MULADD(at[5], at[42]); MULADD(at[6], at[41]); MULADD(at[7], at[40]); MULADD(at[8], at[39]); MULADD(at[9], at[38]); MULADD(at[10], at[37]); MULADD(at[11], at[36]); MULADD(at[12], at[35]); MULADD(at[13], at[34]); MULADD(at[14], at[33]); MULADD(at[15], at[32]); MULADD(at[16], at[31]); MULADD(at[17], at[30]); MULADD(at[18], at[29]); MULADD(at[19], at[28]); + COMBA_STORE(C->dp[19]); + /* 20 */ + COMBA_FORWARD; + MULADD(at[0], at[48]); MULADD(at[1], at[47]); MULADD(at[2], at[46]); MULADD(at[3], at[45]); MULADD(at[4], at[44]); MULADD(at[5], at[43]); MULADD(at[6], at[42]); MULADD(at[7], at[41]); MULADD(at[8], at[40]); MULADD(at[9], at[39]); MULADD(at[10], at[38]); MULADD(at[11], at[37]); MULADD(at[12], at[36]); MULADD(at[13], at[35]); MULADD(at[14], at[34]); MULADD(at[15], at[33]); MULADD(at[16], at[32]); MULADD(at[17], at[31]); MULADD(at[18], at[30]); MULADD(at[19], at[29]); MULADD(at[20], at[28]); + COMBA_STORE(C->dp[20]); + /* 21 */ + COMBA_FORWARD; + MULADD(at[0], at[49]); MULADD(at[1], at[48]); MULADD(at[2], at[47]); MULADD(at[3], at[46]); MULADD(at[4], at[45]); MULADD(at[5], at[44]); MULADD(at[6], at[43]); MULADD(at[7], at[42]); MULADD(at[8], at[41]); MULADD(at[9], at[40]); MULADD(at[10], at[39]); MULADD(at[11], at[38]); MULADD(at[12], at[37]); MULADD(at[13], at[36]); MULADD(at[14], at[35]); MULADD(at[15], at[34]); MULADD(at[16], at[33]); MULADD(at[17], at[32]); MULADD(at[18], at[31]); MULADD(at[19], at[30]); MULADD(at[20], at[29]); MULADD(at[21], at[28]); + COMBA_STORE(C->dp[21]); + /* 22 */ + COMBA_FORWARD; + MULADD(at[0], at[50]); MULADD(at[1], at[49]); MULADD(at[2], at[48]); MULADD(at[3], at[47]); MULADD(at[4], at[46]); MULADD(at[5], at[45]); MULADD(at[6], at[44]); MULADD(at[7], at[43]); MULADD(at[8], at[42]); MULADD(at[9], at[41]); MULADD(at[10], at[40]); MULADD(at[11], at[39]); MULADD(at[12], at[38]); MULADD(at[13], at[37]); MULADD(at[14], at[36]); MULADD(at[15], at[35]); MULADD(at[16], at[34]); MULADD(at[17], at[33]); MULADD(at[18], at[32]); MULADD(at[19], at[31]); MULADD(at[20], at[30]); MULADD(at[21], at[29]); MULADD(at[22], at[28]); + COMBA_STORE(C->dp[22]); + /* 23 */ + COMBA_FORWARD; + MULADD(at[0], at[51]); MULADD(at[1], at[50]); MULADD(at[2], at[49]); MULADD(at[3], at[48]); MULADD(at[4], at[47]); MULADD(at[5], at[46]); MULADD(at[6], at[45]); MULADD(at[7], at[44]); MULADD(at[8], at[43]); MULADD(at[9], at[42]); MULADD(at[10], at[41]); MULADD(at[11], at[40]); MULADD(at[12], at[39]); MULADD(at[13], at[38]); MULADD(at[14], at[37]); MULADD(at[15], at[36]); MULADD(at[16], at[35]); MULADD(at[17], at[34]); MULADD(at[18], at[33]); MULADD(at[19], at[32]); MULADD(at[20], at[31]); MULADD(at[21], at[30]); MULADD(at[22], at[29]); MULADD(at[23], at[28]); + COMBA_STORE(C->dp[23]); + /* 24 */ + COMBA_FORWARD; + MULADD(at[0], at[52]); MULADD(at[1], at[51]); MULADD(at[2], at[50]); MULADD(at[3], at[49]); MULADD(at[4], at[48]); MULADD(at[5], at[47]); MULADD(at[6], at[46]); MULADD(at[7], at[45]); MULADD(at[8], at[44]); MULADD(at[9], at[43]); MULADD(at[10], at[42]); MULADD(at[11], at[41]); MULADD(at[12], at[40]); MULADD(at[13], at[39]); MULADD(at[14], at[38]); MULADD(at[15], at[37]); MULADD(at[16], at[36]); MULADD(at[17], at[35]); MULADD(at[18], at[34]); MULADD(at[19], at[33]); MULADD(at[20], at[32]); MULADD(at[21], at[31]); MULADD(at[22], at[30]); MULADD(at[23], at[29]); MULADD(at[24], at[28]); + COMBA_STORE(C->dp[24]); + /* 25 */ + COMBA_FORWARD; + MULADD(at[0], at[53]); MULADD(at[1], at[52]); MULADD(at[2], at[51]); MULADD(at[3], at[50]); MULADD(at[4], at[49]); MULADD(at[5], at[48]); MULADD(at[6], at[47]); MULADD(at[7], at[46]); MULADD(at[8], at[45]); MULADD(at[9], at[44]); MULADD(at[10], at[43]); MULADD(at[11], at[42]); MULADD(at[12], at[41]); MULADD(at[13], at[40]); MULADD(at[14], at[39]); MULADD(at[15], at[38]); MULADD(at[16], at[37]); MULADD(at[17], at[36]); MULADD(at[18], at[35]); MULADD(at[19], at[34]); MULADD(at[20], at[33]); MULADD(at[21], at[32]); MULADD(at[22], at[31]); MULADD(at[23], at[30]); MULADD(at[24], at[29]); MULADD(at[25], at[28]); + COMBA_STORE(C->dp[25]); + /* 26 */ + COMBA_FORWARD; + MULADD(at[0], at[54]); MULADD(at[1], at[53]); MULADD(at[2], at[52]); MULADD(at[3], at[51]); MULADD(at[4], at[50]); MULADD(at[5], at[49]); MULADD(at[6], at[48]); MULADD(at[7], at[47]); MULADD(at[8], at[46]); MULADD(at[9], at[45]); MULADD(at[10], at[44]); MULADD(at[11], at[43]); MULADD(at[12], at[42]); MULADD(at[13], at[41]); MULADD(at[14], at[40]); MULADD(at[15], at[39]); MULADD(at[16], at[38]); MULADD(at[17], at[37]); MULADD(at[18], at[36]); MULADD(at[19], at[35]); MULADD(at[20], at[34]); MULADD(at[21], at[33]); MULADD(at[22], at[32]); MULADD(at[23], at[31]); MULADD(at[24], at[30]); MULADD(at[25], at[29]); MULADD(at[26], at[28]); + COMBA_STORE(C->dp[26]); + /* 27 */ + COMBA_FORWARD; + MULADD(at[0], at[55]); MULADD(at[1], at[54]); MULADD(at[2], at[53]); MULADD(at[3], at[52]); MULADD(at[4], at[51]); MULADD(at[5], at[50]); MULADD(at[6], at[49]); MULADD(at[7], at[48]); MULADD(at[8], at[47]); MULADD(at[9], at[46]); MULADD(at[10], at[45]); MULADD(at[11], at[44]); MULADD(at[12], at[43]); MULADD(at[13], at[42]); MULADD(at[14], at[41]); MULADD(at[15], at[40]); MULADD(at[16], at[39]); MULADD(at[17], at[38]); MULADD(at[18], at[37]); MULADD(at[19], at[36]); MULADD(at[20], at[35]); MULADD(at[21], at[34]); MULADD(at[22], at[33]); MULADD(at[23], at[32]); MULADD(at[24], at[31]); MULADD(at[25], at[30]); MULADD(at[26], at[29]); MULADD(at[27], at[28]); + COMBA_STORE(C->dp[27]); + /* 28 */ + COMBA_FORWARD; + MULADD(at[1], at[55]); MULADD(at[2], at[54]); MULADD(at[3], at[53]); MULADD(at[4], at[52]); MULADD(at[5], at[51]); MULADD(at[6], at[50]); MULADD(at[7], at[49]); MULADD(at[8], at[48]); MULADD(at[9], at[47]); MULADD(at[10], at[46]); MULADD(at[11], at[45]); MULADD(at[12], at[44]); MULADD(at[13], at[43]); MULADD(at[14], at[42]); MULADD(at[15], at[41]); MULADD(at[16], at[40]); MULADD(at[17], at[39]); MULADD(at[18], at[38]); MULADD(at[19], at[37]); MULADD(at[20], at[36]); MULADD(at[21], at[35]); MULADD(at[22], at[34]); MULADD(at[23], at[33]); MULADD(at[24], at[32]); MULADD(at[25], at[31]); MULADD(at[26], at[30]); MULADD(at[27], at[29]); + COMBA_STORE(C->dp[28]); + /* 29 */ + COMBA_FORWARD; + MULADD(at[2], at[55]); MULADD(at[3], at[54]); MULADD(at[4], at[53]); MULADD(at[5], at[52]); MULADD(at[6], at[51]); MULADD(at[7], at[50]); MULADD(at[8], at[49]); MULADD(at[9], at[48]); MULADD(at[10], at[47]); MULADD(at[11], at[46]); MULADD(at[12], at[45]); MULADD(at[13], at[44]); MULADD(at[14], at[43]); MULADD(at[15], at[42]); MULADD(at[16], at[41]); MULADD(at[17], at[40]); MULADD(at[18], at[39]); MULADD(at[19], at[38]); MULADD(at[20], at[37]); MULADD(at[21], at[36]); MULADD(at[22], at[35]); MULADD(at[23], at[34]); MULADD(at[24], at[33]); MULADD(at[25], at[32]); MULADD(at[26], at[31]); MULADD(at[27], at[30]); + COMBA_STORE(C->dp[29]); + /* 30 */ + COMBA_FORWARD; + MULADD(at[3], at[55]); MULADD(at[4], at[54]); MULADD(at[5], at[53]); MULADD(at[6], at[52]); MULADD(at[7], at[51]); MULADD(at[8], at[50]); MULADD(at[9], at[49]); MULADD(at[10], at[48]); MULADD(at[11], at[47]); MULADD(at[12], at[46]); MULADD(at[13], at[45]); MULADD(at[14], at[44]); MULADD(at[15], at[43]); MULADD(at[16], at[42]); MULADD(at[17], at[41]); MULADD(at[18], at[40]); MULADD(at[19], at[39]); MULADD(at[20], at[38]); MULADD(at[21], at[37]); MULADD(at[22], at[36]); MULADD(at[23], at[35]); MULADD(at[24], at[34]); MULADD(at[25], at[33]); MULADD(at[26], at[32]); MULADD(at[27], at[31]); + COMBA_STORE(C->dp[30]); + /* 31 */ + COMBA_FORWARD; + MULADD(at[4], at[55]); MULADD(at[5], at[54]); MULADD(at[6], at[53]); MULADD(at[7], at[52]); MULADD(at[8], at[51]); MULADD(at[9], at[50]); MULADD(at[10], at[49]); MULADD(at[11], at[48]); MULADD(at[12], at[47]); MULADD(at[13], at[46]); MULADD(at[14], at[45]); MULADD(at[15], at[44]); MULADD(at[16], at[43]); MULADD(at[17], at[42]); MULADD(at[18], at[41]); MULADD(at[19], at[40]); MULADD(at[20], at[39]); MULADD(at[21], at[38]); MULADD(at[22], at[37]); MULADD(at[23], at[36]); MULADD(at[24], at[35]); MULADD(at[25], at[34]); MULADD(at[26], at[33]); MULADD(at[27], at[32]); + COMBA_STORE(C->dp[31]); + /* 32 */ + COMBA_FORWARD; + MULADD(at[5], at[55]); MULADD(at[6], at[54]); MULADD(at[7], at[53]); MULADD(at[8], at[52]); MULADD(at[9], at[51]); MULADD(at[10], at[50]); MULADD(at[11], at[49]); MULADD(at[12], at[48]); MULADD(at[13], at[47]); MULADD(at[14], at[46]); MULADD(at[15], at[45]); MULADD(at[16], at[44]); MULADD(at[17], at[43]); MULADD(at[18], at[42]); MULADD(at[19], at[41]); MULADD(at[20], at[40]); MULADD(at[21], at[39]); MULADD(at[22], at[38]); MULADD(at[23], at[37]); MULADD(at[24], at[36]); MULADD(at[25], at[35]); MULADD(at[26], at[34]); MULADD(at[27], at[33]); + COMBA_STORE(C->dp[32]); + /* 33 */ + COMBA_FORWARD; + MULADD(at[6], at[55]); MULADD(at[7], at[54]); MULADD(at[8], at[53]); MULADD(at[9], at[52]); MULADD(at[10], at[51]); MULADD(at[11], at[50]); MULADD(at[12], at[49]); MULADD(at[13], at[48]); MULADD(at[14], at[47]); MULADD(at[15], at[46]); MULADD(at[16], at[45]); MULADD(at[17], at[44]); MULADD(at[18], at[43]); MULADD(at[19], at[42]); MULADD(at[20], at[41]); MULADD(at[21], at[40]); MULADD(at[22], at[39]); MULADD(at[23], at[38]); MULADD(at[24], at[37]); MULADD(at[25], at[36]); MULADD(at[26], at[35]); MULADD(at[27], at[34]); + COMBA_STORE(C->dp[33]); + /* 34 */ + COMBA_FORWARD; + MULADD(at[7], at[55]); MULADD(at[8], at[54]); MULADD(at[9], at[53]); MULADD(at[10], at[52]); MULADD(at[11], at[51]); MULADD(at[12], at[50]); MULADD(at[13], at[49]); MULADD(at[14], at[48]); MULADD(at[15], at[47]); MULADD(at[16], at[46]); MULADD(at[17], at[45]); MULADD(at[18], at[44]); MULADD(at[19], at[43]); MULADD(at[20], at[42]); MULADD(at[21], at[41]); MULADD(at[22], at[40]); MULADD(at[23], at[39]); MULADD(at[24], at[38]); MULADD(at[25], at[37]); MULADD(at[26], at[36]); MULADD(at[27], at[35]); + COMBA_STORE(C->dp[34]); + /* 35 */ + COMBA_FORWARD; + MULADD(at[8], at[55]); MULADD(at[9], at[54]); MULADD(at[10], at[53]); MULADD(at[11], at[52]); MULADD(at[12], at[51]); MULADD(at[13], at[50]); MULADD(at[14], at[49]); MULADD(at[15], at[48]); MULADD(at[16], at[47]); MULADD(at[17], at[46]); MULADD(at[18], at[45]); MULADD(at[19], at[44]); MULADD(at[20], at[43]); MULADD(at[21], at[42]); MULADD(at[22], at[41]); MULADD(at[23], at[40]); MULADD(at[24], at[39]); MULADD(at[25], at[38]); MULADD(at[26], at[37]); MULADD(at[27], at[36]); + COMBA_STORE(C->dp[35]); + /* 36 */ + COMBA_FORWARD; + MULADD(at[9], at[55]); MULADD(at[10], at[54]); MULADD(at[11], at[53]); MULADD(at[12], at[52]); MULADD(at[13], at[51]); MULADD(at[14], at[50]); MULADD(at[15], at[49]); MULADD(at[16], at[48]); MULADD(at[17], at[47]); MULADD(at[18], at[46]); MULADD(at[19], at[45]); MULADD(at[20], at[44]); MULADD(at[21], at[43]); MULADD(at[22], at[42]); MULADD(at[23], at[41]); MULADD(at[24], at[40]); MULADD(at[25], at[39]); MULADD(at[26], at[38]); MULADD(at[27], at[37]); + COMBA_STORE(C->dp[36]); + /* 37 */ + COMBA_FORWARD; + MULADD(at[10], at[55]); MULADD(at[11], at[54]); MULADD(at[12], at[53]); MULADD(at[13], at[52]); MULADD(at[14], at[51]); MULADD(at[15], at[50]); MULADD(at[16], at[49]); MULADD(at[17], at[48]); MULADD(at[18], at[47]); MULADD(at[19], at[46]); MULADD(at[20], at[45]); MULADD(at[21], at[44]); MULADD(at[22], at[43]); MULADD(at[23], at[42]); MULADD(at[24], at[41]); MULADD(at[25], at[40]); MULADD(at[26], at[39]); MULADD(at[27], at[38]); + COMBA_STORE(C->dp[37]); + /* 38 */ + COMBA_FORWARD; + MULADD(at[11], at[55]); MULADD(at[12], at[54]); MULADD(at[13], at[53]); MULADD(at[14], at[52]); MULADD(at[15], at[51]); MULADD(at[16], at[50]); MULADD(at[17], at[49]); MULADD(at[18], at[48]); MULADD(at[19], at[47]); MULADD(at[20], at[46]); MULADD(at[21], at[45]); MULADD(at[22], at[44]); MULADD(at[23], at[43]); MULADD(at[24], at[42]); MULADD(at[25], at[41]); MULADD(at[26], at[40]); MULADD(at[27], at[39]); + COMBA_STORE(C->dp[38]); + /* 39 */ + COMBA_FORWARD; + MULADD(at[12], at[55]); MULADD(at[13], at[54]); MULADD(at[14], at[53]); MULADD(at[15], at[52]); MULADD(at[16], at[51]); MULADD(at[17], at[50]); MULADD(at[18], at[49]); MULADD(at[19], at[48]); MULADD(at[20], at[47]); MULADD(at[21], at[46]); MULADD(at[22], at[45]); MULADD(at[23], at[44]); MULADD(at[24], at[43]); MULADD(at[25], at[42]); MULADD(at[26], at[41]); MULADD(at[27], at[40]); + COMBA_STORE(C->dp[39]); + /* 40 */ + COMBA_FORWARD; + MULADD(at[13], at[55]); MULADD(at[14], at[54]); MULADD(at[15], at[53]); MULADD(at[16], at[52]); MULADD(at[17], at[51]); MULADD(at[18], at[50]); MULADD(at[19], at[49]); MULADD(at[20], at[48]); MULADD(at[21], at[47]); MULADD(at[22], at[46]); MULADD(at[23], at[45]); MULADD(at[24], at[44]); MULADD(at[25], at[43]); MULADD(at[26], at[42]); MULADD(at[27], at[41]); + COMBA_STORE(C->dp[40]); + /* 41 */ + COMBA_FORWARD; + MULADD(at[14], at[55]); MULADD(at[15], at[54]); MULADD(at[16], at[53]); MULADD(at[17], at[52]); MULADD(at[18], at[51]); MULADD(at[19], at[50]); MULADD(at[20], at[49]); MULADD(at[21], at[48]); MULADD(at[22], at[47]); MULADD(at[23], at[46]); MULADD(at[24], at[45]); MULADD(at[25], at[44]); MULADD(at[26], at[43]); MULADD(at[27], at[42]); + COMBA_STORE(C->dp[41]); + /* 42 */ + COMBA_FORWARD; + MULADD(at[15], at[55]); MULADD(at[16], at[54]); MULADD(at[17], at[53]); MULADD(at[18], at[52]); MULADD(at[19], at[51]); MULADD(at[20], at[50]); MULADD(at[21], at[49]); MULADD(at[22], at[48]); MULADD(at[23], at[47]); MULADD(at[24], at[46]); MULADD(at[25], at[45]); MULADD(at[26], at[44]); MULADD(at[27], at[43]); + COMBA_STORE(C->dp[42]); + /* 43 */ + COMBA_FORWARD; + MULADD(at[16], at[55]); MULADD(at[17], at[54]); MULADD(at[18], at[53]); MULADD(at[19], at[52]); MULADD(at[20], at[51]); MULADD(at[21], at[50]); MULADD(at[22], at[49]); MULADD(at[23], at[48]); MULADD(at[24], at[47]); MULADD(at[25], at[46]); MULADD(at[26], at[45]); MULADD(at[27], at[44]); + COMBA_STORE(C->dp[43]); + /* 44 */ + COMBA_FORWARD; + MULADD(at[17], at[55]); MULADD(at[18], at[54]); MULADD(at[19], at[53]); MULADD(at[20], at[52]); MULADD(at[21], at[51]); MULADD(at[22], at[50]); MULADD(at[23], at[49]); MULADD(at[24], at[48]); MULADD(at[25], at[47]); MULADD(at[26], at[46]); MULADD(at[27], at[45]); + COMBA_STORE(C->dp[44]); + /* 45 */ + COMBA_FORWARD; + MULADD(at[18], at[55]); MULADD(at[19], at[54]); MULADD(at[20], at[53]); MULADD(at[21], at[52]); MULADD(at[22], at[51]); MULADD(at[23], at[50]); MULADD(at[24], at[49]); MULADD(at[25], at[48]); MULADD(at[26], at[47]); MULADD(at[27], at[46]); + COMBA_STORE(C->dp[45]); + /* 46 */ + COMBA_FORWARD; + MULADD(at[19], at[55]); MULADD(at[20], at[54]); MULADD(at[21], at[53]); MULADD(at[22], at[52]); MULADD(at[23], at[51]); MULADD(at[24], at[50]); MULADD(at[25], at[49]); MULADD(at[26], at[48]); MULADD(at[27], at[47]); + COMBA_STORE(C->dp[46]); + /* 47 */ + COMBA_FORWARD; + MULADD(at[20], at[55]); MULADD(at[21], at[54]); MULADD(at[22], at[53]); MULADD(at[23], at[52]); MULADD(at[24], at[51]); MULADD(at[25], at[50]); MULADD(at[26], at[49]); MULADD(at[27], at[48]); + COMBA_STORE(C->dp[47]); + /* 48 */ + COMBA_FORWARD; + MULADD(at[21], at[55]); MULADD(at[22], at[54]); MULADD(at[23], at[53]); MULADD(at[24], at[52]); MULADD(at[25], at[51]); MULADD(at[26], at[50]); MULADD(at[27], at[49]); + COMBA_STORE(C->dp[48]); + /* 49 */ + COMBA_FORWARD; + MULADD(at[22], at[55]); MULADD(at[23], at[54]); MULADD(at[24], at[53]); MULADD(at[25], at[52]); MULADD(at[26], at[51]); MULADD(at[27], at[50]); + COMBA_STORE(C->dp[49]); + /* 50 */ + COMBA_FORWARD; + MULADD(at[23], at[55]); MULADD(at[24], at[54]); MULADD(at[25], at[53]); MULADD(at[26], at[52]); MULADD(at[27], at[51]); + COMBA_STORE(C->dp[50]); + /* 51 */ + COMBA_FORWARD; + MULADD(at[24], at[55]); MULADD(at[25], at[54]); MULADD(at[26], at[53]); MULADD(at[27], at[52]); + COMBA_STORE(C->dp[51]); + /* 52 */ + COMBA_FORWARD; + MULADD(at[25], at[55]); MULADD(at[26], at[54]); MULADD(at[27], at[53]); + COMBA_STORE(C->dp[52]); + /* 53 */ + COMBA_FORWARD; + MULADD(at[26], at[55]); MULADD(at[27], at[54]); + COMBA_STORE(C->dp[53]); + /* 54 */ + COMBA_FORWARD; + MULADD(at[27], at[55]); + COMBA_STORE(C->dp[54]); + COMBA_STORE2(C->dp[55]); + C->used = 56; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; + +#ifdef WOLFSSL_SMALL_STACK + XFREE(at, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return FP_OKAY; +} +#endif diff --git a/client/wolfssl/wolfcrypt/src/fp_mul_comba_3.i b/client/wolfssl/wolfcrypt/src/fp_mul_comba_3.i new file mode 100644 index 0000000..0befff8 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fp_mul_comba_3.i @@ -0,0 +1,61 @@ +/* fp_mul_comba_3.i + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef TFM_MUL3 +int fp_mul_comba3(fp_int *A, fp_int *B, fp_int *C) +{ + fp_digit c0, c1, c2, at[6]; + + XMEMCPY(at, A->dp, 3 * sizeof(fp_digit)); + XMEMCPY(at+3, B->dp, 3 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[3]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[4]); MULADD(at[1], at[3]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[5]); MULADD(at[1], at[4]); MULADD(at[2], at[3]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[1], at[5]); MULADD(at[2], at[4]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[2], at[5]); + COMBA_STORE(C->dp[4]); + COMBA_STORE2(C->dp[5]); + C->used = 6; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; + + return FP_OKAY; +} +#endif diff --git a/client/wolfssl/wolfcrypt/src/fp_mul_comba_32.i b/client/wolfssl/wolfcrypt/src/fp_mul_comba_32.i new file mode 100644 index 0000000..97dc076 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fp_mul_comba_32.i @@ -0,0 +1,321 @@ +/* fp_mul_comba_32.i + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef TFM_MUL32 +int fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C) +{ + int out_size; + fp_digit c0, c1, c2; +#ifndef WOLFSSL_SMALL_STACK + fp_digit at[64]; +#else + fp_digit *at; +#endif + +#ifdef WOLFSSL_SMALL_STACK + at = (fp_digit*)XMALLOC(sizeof(fp_digit) * 64, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (at == NULL) + return FP_MEM; +#endif + + out_size = A->used + B->used; + XMEMCPY(at, A->dp, 32 * sizeof(fp_digit)); + XMEMCPY(at+32, B->dp, 32 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[32]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[33]); MULADD(at[1], at[32]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[34]); MULADD(at[1], at[33]); MULADD(at[2], at[32]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[35]); MULADD(at[1], at[34]); MULADD(at[2], at[33]); MULADD(at[3], at[32]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[36]); MULADD(at[1], at[35]); MULADD(at[2], at[34]); MULADD(at[3], at[33]); MULADD(at[4], at[32]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[37]); MULADD(at[1], at[36]); MULADD(at[2], at[35]); MULADD(at[3], at[34]); MULADD(at[4], at[33]); MULADD(at[5], at[32]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[38]); MULADD(at[1], at[37]); MULADD(at[2], at[36]); MULADD(at[3], at[35]); MULADD(at[4], at[34]); MULADD(at[5], at[33]); MULADD(at[6], at[32]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[39]); MULADD(at[1], at[38]); MULADD(at[2], at[37]); MULADD(at[3], at[36]); MULADD(at[4], at[35]); MULADD(at[5], at[34]); MULADD(at[6], at[33]); MULADD(at[7], at[32]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[0], at[40]); MULADD(at[1], at[39]); MULADD(at[2], at[38]); MULADD(at[3], at[37]); MULADD(at[4], at[36]); MULADD(at[5], at[35]); MULADD(at[6], at[34]); MULADD(at[7], at[33]); MULADD(at[8], at[32]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[0], at[41]); MULADD(at[1], at[40]); MULADD(at[2], at[39]); MULADD(at[3], at[38]); MULADD(at[4], at[37]); MULADD(at[5], at[36]); MULADD(at[6], at[35]); MULADD(at[7], at[34]); MULADD(at[8], at[33]); MULADD(at[9], at[32]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[0], at[42]); MULADD(at[1], at[41]); MULADD(at[2], at[40]); MULADD(at[3], at[39]); MULADD(at[4], at[38]); MULADD(at[5], at[37]); MULADD(at[6], at[36]); MULADD(at[7], at[35]); MULADD(at[8], at[34]); MULADD(at[9], at[33]); MULADD(at[10], at[32]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[0], at[43]); MULADD(at[1], at[42]); MULADD(at[2], at[41]); MULADD(at[3], at[40]); MULADD(at[4], at[39]); MULADD(at[5], at[38]); MULADD(at[6], at[37]); MULADD(at[7], at[36]); MULADD(at[8], at[35]); MULADD(at[9], at[34]); MULADD(at[10], at[33]); MULADD(at[11], at[32]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[0], at[44]); MULADD(at[1], at[43]); MULADD(at[2], at[42]); MULADD(at[3], at[41]); MULADD(at[4], at[40]); MULADD(at[5], at[39]); MULADD(at[6], at[38]); MULADD(at[7], at[37]); MULADD(at[8], at[36]); MULADD(at[9], at[35]); MULADD(at[10], at[34]); MULADD(at[11], at[33]); MULADD(at[12], at[32]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[0], at[45]); MULADD(at[1], at[44]); MULADD(at[2], at[43]); MULADD(at[3], at[42]); MULADD(at[4], at[41]); MULADD(at[5], at[40]); MULADD(at[6], at[39]); MULADD(at[7], at[38]); MULADD(at[8], at[37]); MULADD(at[9], at[36]); MULADD(at[10], at[35]); MULADD(at[11], at[34]); MULADD(at[12], at[33]); MULADD(at[13], at[32]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[0], at[46]); MULADD(at[1], at[45]); MULADD(at[2], at[44]); MULADD(at[3], at[43]); MULADD(at[4], at[42]); MULADD(at[5], at[41]); MULADD(at[6], at[40]); MULADD(at[7], at[39]); MULADD(at[8], at[38]); MULADD(at[9], at[37]); MULADD(at[10], at[36]); MULADD(at[11], at[35]); MULADD(at[12], at[34]); MULADD(at[13], at[33]); MULADD(at[14], at[32]); + COMBA_STORE(C->dp[14]); + /* 15 */ + COMBA_FORWARD; + MULADD(at[0], at[47]); MULADD(at[1], at[46]); MULADD(at[2], at[45]); MULADD(at[3], at[44]); MULADD(at[4], at[43]); MULADD(at[5], at[42]); MULADD(at[6], at[41]); MULADD(at[7], at[40]); MULADD(at[8], at[39]); MULADD(at[9], at[38]); MULADD(at[10], at[37]); MULADD(at[11], at[36]); MULADD(at[12], at[35]); MULADD(at[13], at[34]); MULADD(at[14], at[33]); MULADD(at[15], at[32]); + COMBA_STORE(C->dp[15]); + /* 16 */ + COMBA_FORWARD; + MULADD(at[0], at[48]); MULADD(at[1], at[47]); MULADD(at[2], at[46]); MULADD(at[3], at[45]); MULADD(at[4], at[44]); MULADD(at[5], at[43]); MULADD(at[6], at[42]); MULADD(at[7], at[41]); MULADD(at[8], at[40]); MULADD(at[9], at[39]); MULADD(at[10], at[38]); MULADD(at[11], at[37]); MULADD(at[12], at[36]); MULADD(at[13], at[35]); MULADD(at[14], at[34]); MULADD(at[15], at[33]); MULADD(at[16], at[32]); + COMBA_STORE(C->dp[16]); + /* 17 */ + COMBA_FORWARD; + MULADD(at[0], at[49]); MULADD(at[1], at[48]); MULADD(at[2], at[47]); MULADD(at[3], at[46]); MULADD(at[4], at[45]); MULADD(at[5], at[44]); MULADD(at[6], at[43]); MULADD(at[7], at[42]); MULADD(at[8], at[41]); MULADD(at[9], at[40]); MULADD(at[10], at[39]); MULADD(at[11], at[38]); MULADD(at[12], at[37]); MULADD(at[13], at[36]); MULADD(at[14], at[35]); MULADD(at[15], at[34]); MULADD(at[16], at[33]); MULADD(at[17], at[32]); + COMBA_STORE(C->dp[17]); + /* 18 */ + COMBA_FORWARD; + MULADD(at[0], at[50]); MULADD(at[1], at[49]); MULADD(at[2], at[48]); MULADD(at[3], at[47]); MULADD(at[4], at[46]); MULADD(at[5], at[45]); MULADD(at[6], at[44]); MULADD(at[7], at[43]); MULADD(at[8], at[42]); MULADD(at[9], at[41]); MULADD(at[10], at[40]); MULADD(at[11], at[39]); MULADD(at[12], at[38]); MULADD(at[13], at[37]); MULADD(at[14], at[36]); MULADD(at[15], at[35]); MULADD(at[16], at[34]); MULADD(at[17], at[33]); MULADD(at[18], at[32]); + COMBA_STORE(C->dp[18]); + /* 19 */ + COMBA_FORWARD; + MULADD(at[0], at[51]); MULADD(at[1], at[50]); MULADD(at[2], at[49]); MULADD(at[3], at[48]); MULADD(at[4], at[47]); MULADD(at[5], at[46]); MULADD(at[6], at[45]); MULADD(at[7], at[44]); MULADD(at[8], at[43]); MULADD(at[9], at[42]); MULADD(at[10], at[41]); MULADD(at[11], at[40]); MULADD(at[12], at[39]); MULADD(at[13], at[38]); MULADD(at[14], at[37]); MULADD(at[15], at[36]); MULADD(at[16], at[35]); MULADD(at[17], at[34]); MULADD(at[18], at[33]); MULADD(at[19], at[32]); + COMBA_STORE(C->dp[19]); + /* 20 */ + COMBA_FORWARD; + MULADD(at[0], at[52]); MULADD(at[1], at[51]); MULADD(at[2], at[50]); MULADD(at[3], at[49]); MULADD(at[4], at[48]); MULADD(at[5], at[47]); MULADD(at[6], at[46]); MULADD(at[7], at[45]); MULADD(at[8], at[44]); MULADD(at[9], at[43]); MULADD(at[10], at[42]); MULADD(at[11], at[41]); MULADD(at[12], at[40]); MULADD(at[13], at[39]); MULADD(at[14], at[38]); MULADD(at[15], at[37]); MULADD(at[16], at[36]); MULADD(at[17], at[35]); MULADD(at[18], at[34]); MULADD(at[19], at[33]); MULADD(at[20], at[32]); + COMBA_STORE(C->dp[20]); + /* 21 */ + COMBA_FORWARD; + MULADD(at[0], at[53]); MULADD(at[1], at[52]); MULADD(at[2], at[51]); MULADD(at[3], at[50]); MULADD(at[4], at[49]); MULADD(at[5], at[48]); MULADD(at[6], at[47]); MULADD(at[7], at[46]); MULADD(at[8], at[45]); MULADD(at[9], at[44]); MULADD(at[10], at[43]); MULADD(at[11], at[42]); MULADD(at[12], at[41]); MULADD(at[13], at[40]); MULADD(at[14], at[39]); MULADD(at[15], at[38]); MULADD(at[16], at[37]); MULADD(at[17], at[36]); MULADD(at[18], at[35]); MULADD(at[19], at[34]); MULADD(at[20], at[33]); MULADD(at[21], at[32]); + COMBA_STORE(C->dp[21]); + /* 22 */ + COMBA_FORWARD; + MULADD(at[0], at[54]); MULADD(at[1], at[53]); MULADD(at[2], at[52]); MULADD(at[3], at[51]); MULADD(at[4], at[50]); MULADD(at[5], at[49]); MULADD(at[6], at[48]); MULADD(at[7], at[47]); MULADD(at[8], at[46]); MULADD(at[9], at[45]); MULADD(at[10], at[44]); MULADD(at[11], at[43]); MULADD(at[12], at[42]); MULADD(at[13], at[41]); MULADD(at[14], at[40]); MULADD(at[15], at[39]); MULADD(at[16], at[38]); MULADD(at[17], at[37]); MULADD(at[18], at[36]); MULADD(at[19], at[35]); MULADD(at[20], at[34]); MULADD(at[21], at[33]); MULADD(at[22], at[32]); + COMBA_STORE(C->dp[22]); + /* 23 */ + COMBA_FORWARD; + MULADD(at[0], at[55]); MULADD(at[1], at[54]); MULADD(at[2], at[53]); MULADD(at[3], at[52]); MULADD(at[4], at[51]); MULADD(at[5], at[50]); MULADD(at[6], at[49]); MULADD(at[7], at[48]); MULADD(at[8], at[47]); MULADD(at[9], at[46]); MULADD(at[10], at[45]); MULADD(at[11], at[44]); MULADD(at[12], at[43]); MULADD(at[13], at[42]); MULADD(at[14], at[41]); MULADD(at[15], at[40]); MULADD(at[16], at[39]); MULADD(at[17], at[38]); MULADD(at[18], at[37]); MULADD(at[19], at[36]); MULADD(at[20], at[35]); MULADD(at[21], at[34]); MULADD(at[22], at[33]); MULADD(at[23], at[32]); + COMBA_STORE(C->dp[23]); + /* 24 */ + COMBA_FORWARD; + MULADD(at[0], at[56]); MULADD(at[1], at[55]); MULADD(at[2], at[54]); MULADD(at[3], at[53]); MULADD(at[4], at[52]); MULADD(at[5], at[51]); MULADD(at[6], at[50]); MULADD(at[7], at[49]); MULADD(at[8], at[48]); MULADD(at[9], at[47]); MULADD(at[10], at[46]); MULADD(at[11], at[45]); MULADD(at[12], at[44]); MULADD(at[13], at[43]); MULADD(at[14], at[42]); MULADD(at[15], at[41]); MULADD(at[16], at[40]); MULADD(at[17], at[39]); MULADD(at[18], at[38]); MULADD(at[19], at[37]); MULADD(at[20], at[36]); MULADD(at[21], at[35]); MULADD(at[22], at[34]); MULADD(at[23], at[33]); MULADD(at[24], at[32]); + COMBA_STORE(C->dp[24]); + /* 25 */ + COMBA_FORWARD; + MULADD(at[0], at[57]); MULADD(at[1], at[56]); MULADD(at[2], at[55]); MULADD(at[3], at[54]); MULADD(at[4], at[53]); MULADD(at[5], at[52]); MULADD(at[6], at[51]); MULADD(at[7], at[50]); MULADD(at[8], at[49]); MULADD(at[9], at[48]); MULADD(at[10], at[47]); MULADD(at[11], at[46]); MULADD(at[12], at[45]); MULADD(at[13], at[44]); MULADD(at[14], at[43]); MULADD(at[15], at[42]); MULADD(at[16], at[41]); MULADD(at[17], at[40]); MULADD(at[18], at[39]); MULADD(at[19], at[38]); MULADD(at[20], at[37]); MULADD(at[21], at[36]); MULADD(at[22], at[35]); MULADD(at[23], at[34]); MULADD(at[24], at[33]); MULADD(at[25], at[32]); + COMBA_STORE(C->dp[25]); + /* 26 */ + COMBA_FORWARD; + MULADD(at[0], at[58]); MULADD(at[1], at[57]); MULADD(at[2], at[56]); MULADD(at[3], at[55]); MULADD(at[4], at[54]); MULADD(at[5], at[53]); MULADD(at[6], at[52]); MULADD(at[7], at[51]); MULADD(at[8], at[50]); MULADD(at[9], at[49]); MULADD(at[10], at[48]); MULADD(at[11], at[47]); MULADD(at[12], at[46]); MULADD(at[13], at[45]); MULADD(at[14], at[44]); MULADD(at[15], at[43]); MULADD(at[16], at[42]); MULADD(at[17], at[41]); MULADD(at[18], at[40]); MULADD(at[19], at[39]); MULADD(at[20], at[38]); MULADD(at[21], at[37]); MULADD(at[22], at[36]); MULADD(at[23], at[35]); MULADD(at[24], at[34]); MULADD(at[25], at[33]); MULADD(at[26], at[32]); + COMBA_STORE(C->dp[26]); + /* 27 */ + COMBA_FORWARD; + MULADD(at[0], at[59]); MULADD(at[1], at[58]); MULADD(at[2], at[57]); MULADD(at[3], at[56]); MULADD(at[4], at[55]); MULADD(at[5], at[54]); MULADD(at[6], at[53]); MULADD(at[7], at[52]); MULADD(at[8], at[51]); MULADD(at[9], at[50]); MULADD(at[10], at[49]); MULADD(at[11], at[48]); MULADD(at[12], at[47]); MULADD(at[13], at[46]); MULADD(at[14], at[45]); MULADD(at[15], at[44]); MULADD(at[16], at[43]); MULADD(at[17], at[42]); MULADD(at[18], at[41]); MULADD(at[19], at[40]); MULADD(at[20], at[39]); MULADD(at[21], at[38]); MULADD(at[22], at[37]); MULADD(at[23], at[36]); MULADD(at[24], at[35]); MULADD(at[25], at[34]); MULADD(at[26], at[33]); MULADD(at[27], at[32]); + COMBA_STORE(C->dp[27]); + /* 28 */ + COMBA_FORWARD; + MULADD(at[0], at[60]); MULADD(at[1], at[59]); MULADD(at[2], at[58]); MULADD(at[3], at[57]); MULADD(at[4], at[56]); MULADD(at[5], at[55]); MULADD(at[6], at[54]); MULADD(at[7], at[53]); MULADD(at[8], at[52]); MULADD(at[9], at[51]); MULADD(at[10], at[50]); MULADD(at[11], at[49]); MULADD(at[12], at[48]); MULADD(at[13], at[47]); MULADD(at[14], at[46]); MULADD(at[15], at[45]); MULADD(at[16], at[44]); MULADD(at[17], at[43]); MULADD(at[18], at[42]); MULADD(at[19], at[41]); MULADD(at[20], at[40]); MULADD(at[21], at[39]); MULADD(at[22], at[38]); MULADD(at[23], at[37]); MULADD(at[24], at[36]); MULADD(at[25], at[35]); MULADD(at[26], at[34]); MULADD(at[27], at[33]); MULADD(at[28], at[32]); + COMBA_STORE(C->dp[28]); + /* 29 */ + COMBA_FORWARD; + MULADD(at[0], at[61]); MULADD(at[1], at[60]); MULADD(at[2], at[59]); MULADD(at[3], at[58]); MULADD(at[4], at[57]); MULADD(at[5], at[56]); MULADD(at[6], at[55]); MULADD(at[7], at[54]); MULADD(at[8], at[53]); MULADD(at[9], at[52]); MULADD(at[10], at[51]); MULADD(at[11], at[50]); MULADD(at[12], at[49]); MULADD(at[13], at[48]); MULADD(at[14], at[47]); MULADD(at[15], at[46]); MULADD(at[16], at[45]); MULADD(at[17], at[44]); MULADD(at[18], at[43]); MULADD(at[19], at[42]); MULADD(at[20], at[41]); MULADD(at[21], at[40]); MULADD(at[22], at[39]); MULADD(at[23], at[38]); MULADD(at[24], at[37]); MULADD(at[25], at[36]); MULADD(at[26], at[35]); MULADD(at[27], at[34]); MULADD(at[28], at[33]); MULADD(at[29], at[32]); + COMBA_STORE(C->dp[29]); + /* 30 */ + COMBA_FORWARD; + MULADD(at[0], at[62]); MULADD(at[1], at[61]); MULADD(at[2], at[60]); MULADD(at[3], at[59]); MULADD(at[4], at[58]); MULADD(at[5], at[57]); MULADD(at[6], at[56]); MULADD(at[7], at[55]); MULADD(at[8], at[54]); MULADD(at[9], at[53]); MULADD(at[10], at[52]); MULADD(at[11], at[51]); MULADD(at[12], at[50]); MULADD(at[13], at[49]); MULADD(at[14], at[48]); MULADD(at[15], at[47]); MULADD(at[16], at[46]); MULADD(at[17], at[45]); MULADD(at[18], at[44]); MULADD(at[19], at[43]); MULADD(at[20], at[42]); MULADD(at[21], at[41]); MULADD(at[22], at[40]); MULADD(at[23], at[39]); MULADD(at[24], at[38]); MULADD(at[25], at[37]); MULADD(at[26], at[36]); MULADD(at[27], at[35]); MULADD(at[28], at[34]); MULADD(at[29], at[33]); MULADD(at[30], at[32]); + COMBA_STORE(C->dp[30]); + /* 31 */ + COMBA_FORWARD; + MULADD(at[0], at[63]); MULADD(at[1], at[62]); MULADD(at[2], at[61]); MULADD(at[3], at[60]); MULADD(at[4], at[59]); MULADD(at[5], at[58]); MULADD(at[6], at[57]); MULADD(at[7], at[56]); MULADD(at[8], at[55]); MULADD(at[9], at[54]); MULADD(at[10], at[53]); MULADD(at[11], at[52]); MULADD(at[12], at[51]); MULADD(at[13], at[50]); MULADD(at[14], at[49]); MULADD(at[15], at[48]); MULADD(at[16], at[47]); MULADD(at[17], at[46]); MULADD(at[18], at[45]); MULADD(at[19], at[44]); MULADD(at[20], at[43]); MULADD(at[21], at[42]); MULADD(at[22], at[41]); MULADD(at[23], at[40]); MULADD(at[24], at[39]); MULADD(at[25], at[38]); MULADD(at[26], at[37]); MULADD(at[27], at[36]); MULADD(at[28], at[35]); MULADD(at[29], at[34]); MULADD(at[30], at[33]); MULADD(at[31], at[32]); + COMBA_STORE(C->dp[31]); + /* 32 */ + COMBA_FORWARD; + MULADD(at[1], at[63]); MULADD(at[2], at[62]); MULADD(at[3], at[61]); MULADD(at[4], at[60]); MULADD(at[5], at[59]); MULADD(at[6], at[58]); MULADD(at[7], at[57]); MULADD(at[8], at[56]); MULADD(at[9], at[55]); MULADD(at[10], at[54]); MULADD(at[11], at[53]); MULADD(at[12], at[52]); MULADD(at[13], at[51]); MULADD(at[14], at[50]); MULADD(at[15], at[49]); MULADD(at[16], at[48]); MULADD(at[17], at[47]); MULADD(at[18], at[46]); MULADD(at[19], at[45]); MULADD(at[20], at[44]); MULADD(at[21], at[43]); MULADD(at[22], at[42]); MULADD(at[23], at[41]); MULADD(at[24], at[40]); MULADD(at[25], at[39]); MULADD(at[26], at[38]); MULADD(at[27], at[37]); MULADD(at[28], at[36]); MULADD(at[29], at[35]); MULADD(at[30], at[34]); MULADD(at[31], at[33]); + COMBA_STORE(C->dp[32]); + /* 33 */ + COMBA_FORWARD; + MULADD(at[2], at[63]); MULADD(at[3], at[62]); MULADD(at[4], at[61]); MULADD(at[5], at[60]); MULADD(at[6], at[59]); MULADD(at[7], at[58]); MULADD(at[8], at[57]); MULADD(at[9], at[56]); MULADD(at[10], at[55]); MULADD(at[11], at[54]); MULADD(at[12], at[53]); MULADD(at[13], at[52]); MULADD(at[14], at[51]); MULADD(at[15], at[50]); MULADD(at[16], at[49]); MULADD(at[17], at[48]); MULADD(at[18], at[47]); MULADD(at[19], at[46]); MULADD(at[20], at[45]); MULADD(at[21], at[44]); MULADD(at[22], at[43]); MULADD(at[23], at[42]); MULADD(at[24], at[41]); MULADD(at[25], at[40]); MULADD(at[26], at[39]); MULADD(at[27], at[38]); MULADD(at[28], at[37]); MULADD(at[29], at[36]); MULADD(at[30], at[35]); MULADD(at[31], at[34]); + COMBA_STORE(C->dp[33]); + /* 34 */ + COMBA_FORWARD; + MULADD(at[3], at[63]); MULADD(at[4], at[62]); MULADD(at[5], at[61]); MULADD(at[6], at[60]); MULADD(at[7], at[59]); MULADD(at[8], at[58]); MULADD(at[9], at[57]); MULADD(at[10], at[56]); MULADD(at[11], at[55]); MULADD(at[12], at[54]); MULADD(at[13], at[53]); MULADD(at[14], at[52]); MULADD(at[15], at[51]); MULADD(at[16], at[50]); MULADD(at[17], at[49]); MULADD(at[18], at[48]); MULADD(at[19], at[47]); MULADD(at[20], at[46]); MULADD(at[21], at[45]); MULADD(at[22], at[44]); MULADD(at[23], at[43]); MULADD(at[24], at[42]); MULADD(at[25], at[41]); MULADD(at[26], at[40]); MULADD(at[27], at[39]); MULADD(at[28], at[38]); MULADD(at[29], at[37]); MULADD(at[30], at[36]); MULADD(at[31], at[35]); + COMBA_STORE(C->dp[34]); + /* 35 */ + COMBA_FORWARD; + MULADD(at[4], at[63]); MULADD(at[5], at[62]); MULADD(at[6], at[61]); MULADD(at[7], at[60]); MULADD(at[8], at[59]); MULADD(at[9], at[58]); MULADD(at[10], at[57]); MULADD(at[11], at[56]); MULADD(at[12], at[55]); MULADD(at[13], at[54]); MULADD(at[14], at[53]); MULADD(at[15], at[52]); MULADD(at[16], at[51]); MULADD(at[17], at[50]); MULADD(at[18], at[49]); MULADD(at[19], at[48]); MULADD(at[20], at[47]); MULADD(at[21], at[46]); MULADD(at[22], at[45]); MULADD(at[23], at[44]); MULADD(at[24], at[43]); MULADD(at[25], at[42]); MULADD(at[26], at[41]); MULADD(at[27], at[40]); MULADD(at[28], at[39]); MULADD(at[29], at[38]); MULADD(at[30], at[37]); MULADD(at[31], at[36]); + COMBA_STORE(C->dp[35]); + /* 36 */ + COMBA_FORWARD; + MULADD(at[5], at[63]); MULADD(at[6], at[62]); MULADD(at[7], at[61]); MULADD(at[8], at[60]); MULADD(at[9], at[59]); MULADD(at[10], at[58]); MULADD(at[11], at[57]); MULADD(at[12], at[56]); MULADD(at[13], at[55]); MULADD(at[14], at[54]); MULADD(at[15], at[53]); MULADD(at[16], at[52]); MULADD(at[17], at[51]); MULADD(at[18], at[50]); MULADD(at[19], at[49]); MULADD(at[20], at[48]); MULADD(at[21], at[47]); MULADD(at[22], at[46]); MULADD(at[23], at[45]); MULADD(at[24], at[44]); MULADD(at[25], at[43]); MULADD(at[26], at[42]); MULADD(at[27], at[41]); MULADD(at[28], at[40]); MULADD(at[29], at[39]); MULADD(at[30], at[38]); MULADD(at[31], at[37]); + COMBA_STORE(C->dp[36]); + /* 37 */ + COMBA_FORWARD; + MULADD(at[6], at[63]); MULADD(at[7], at[62]); MULADD(at[8], at[61]); MULADD(at[9], at[60]); MULADD(at[10], at[59]); MULADD(at[11], at[58]); MULADD(at[12], at[57]); MULADD(at[13], at[56]); MULADD(at[14], at[55]); MULADD(at[15], at[54]); MULADD(at[16], at[53]); MULADD(at[17], at[52]); MULADD(at[18], at[51]); MULADD(at[19], at[50]); MULADD(at[20], at[49]); MULADD(at[21], at[48]); MULADD(at[22], at[47]); MULADD(at[23], at[46]); MULADD(at[24], at[45]); MULADD(at[25], at[44]); MULADD(at[26], at[43]); MULADD(at[27], at[42]); MULADD(at[28], at[41]); MULADD(at[29], at[40]); MULADD(at[30], at[39]); MULADD(at[31], at[38]); + COMBA_STORE(C->dp[37]); + /* 38 */ + COMBA_FORWARD; + MULADD(at[7], at[63]); MULADD(at[8], at[62]); MULADD(at[9], at[61]); MULADD(at[10], at[60]); MULADD(at[11], at[59]); MULADD(at[12], at[58]); MULADD(at[13], at[57]); MULADD(at[14], at[56]); MULADD(at[15], at[55]); MULADD(at[16], at[54]); MULADD(at[17], at[53]); MULADD(at[18], at[52]); MULADD(at[19], at[51]); MULADD(at[20], at[50]); MULADD(at[21], at[49]); MULADD(at[22], at[48]); MULADD(at[23], at[47]); MULADD(at[24], at[46]); MULADD(at[25], at[45]); MULADD(at[26], at[44]); MULADD(at[27], at[43]); MULADD(at[28], at[42]); MULADD(at[29], at[41]); MULADD(at[30], at[40]); MULADD(at[31], at[39]); + COMBA_STORE(C->dp[38]); + + /* early out at 40 digits, 40*32==1280, or two 640 bit operands */ + if (out_size <= 40) { COMBA_STORE2(C->dp[39]); C->used = 40; C->sign = A->sign ^ B->sign; fp_clamp(C); COMBA_FINI; return FP_OKAY; } + + /* 39 */ + COMBA_FORWARD; + MULADD(at[8], at[63]); MULADD(at[9], at[62]); MULADD(at[10], at[61]); MULADD(at[11], at[60]); MULADD(at[12], at[59]); MULADD(at[13], at[58]); MULADD(at[14], at[57]); MULADD(at[15], at[56]); MULADD(at[16], at[55]); MULADD(at[17], at[54]); MULADD(at[18], at[53]); MULADD(at[19], at[52]); MULADD(at[20], at[51]); MULADD(at[21], at[50]); MULADD(at[22], at[49]); MULADD(at[23], at[48]); MULADD(at[24], at[47]); MULADD(at[25], at[46]); MULADD(at[26], at[45]); MULADD(at[27], at[44]); MULADD(at[28], at[43]); MULADD(at[29], at[42]); MULADD(at[30], at[41]); MULADD(at[31], at[40]); + COMBA_STORE(C->dp[39]); + /* 40 */ + COMBA_FORWARD; + MULADD(at[9], at[63]); MULADD(at[10], at[62]); MULADD(at[11], at[61]); MULADD(at[12], at[60]); MULADD(at[13], at[59]); MULADD(at[14], at[58]); MULADD(at[15], at[57]); MULADD(at[16], at[56]); MULADD(at[17], at[55]); MULADD(at[18], at[54]); MULADD(at[19], at[53]); MULADD(at[20], at[52]); MULADD(at[21], at[51]); MULADD(at[22], at[50]); MULADD(at[23], at[49]); MULADD(at[24], at[48]); MULADD(at[25], at[47]); MULADD(at[26], at[46]); MULADD(at[27], at[45]); MULADD(at[28], at[44]); MULADD(at[29], at[43]); MULADD(at[30], at[42]); MULADD(at[31], at[41]); + COMBA_STORE(C->dp[40]); + /* 41 */ + COMBA_FORWARD; + MULADD(at[10], at[63]); MULADD(at[11], at[62]); MULADD(at[12], at[61]); MULADD(at[13], at[60]); MULADD(at[14], at[59]); MULADD(at[15], at[58]); MULADD(at[16], at[57]); MULADD(at[17], at[56]); MULADD(at[18], at[55]); MULADD(at[19], at[54]); MULADD(at[20], at[53]); MULADD(at[21], at[52]); MULADD(at[22], at[51]); MULADD(at[23], at[50]); MULADD(at[24], at[49]); MULADD(at[25], at[48]); MULADD(at[26], at[47]); MULADD(at[27], at[46]); MULADD(at[28], at[45]); MULADD(at[29], at[44]); MULADD(at[30], at[43]); MULADD(at[31], at[42]); + COMBA_STORE(C->dp[41]); + /* 42 */ + COMBA_FORWARD; + MULADD(at[11], at[63]); MULADD(at[12], at[62]); MULADD(at[13], at[61]); MULADD(at[14], at[60]); MULADD(at[15], at[59]); MULADD(at[16], at[58]); MULADD(at[17], at[57]); MULADD(at[18], at[56]); MULADD(at[19], at[55]); MULADD(at[20], at[54]); MULADD(at[21], at[53]); MULADD(at[22], at[52]); MULADD(at[23], at[51]); MULADD(at[24], at[50]); MULADD(at[25], at[49]); MULADD(at[26], at[48]); MULADD(at[27], at[47]); MULADD(at[28], at[46]); MULADD(at[29], at[45]); MULADD(at[30], at[44]); MULADD(at[31], at[43]); + COMBA_STORE(C->dp[42]); + /* 43 */ + COMBA_FORWARD; + MULADD(at[12], at[63]); MULADD(at[13], at[62]); MULADD(at[14], at[61]); MULADD(at[15], at[60]); MULADD(at[16], at[59]); MULADD(at[17], at[58]); MULADD(at[18], at[57]); MULADD(at[19], at[56]); MULADD(at[20], at[55]); MULADD(at[21], at[54]); MULADD(at[22], at[53]); MULADD(at[23], at[52]); MULADD(at[24], at[51]); MULADD(at[25], at[50]); MULADD(at[26], at[49]); MULADD(at[27], at[48]); MULADD(at[28], at[47]); MULADD(at[29], at[46]); MULADD(at[30], at[45]); MULADD(at[31], at[44]); + COMBA_STORE(C->dp[43]); + /* 44 */ + COMBA_FORWARD; + MULADD(at[13], at[63]); MULADD(at[14], at[62]); MULADD(at[15], at[61]); MULADD(at[16], at[60]); MULADD(at[17], at[59]); MULADD(at[18], at[58]); MULADD(at[19], at[57]); MULADD(at[20], at[56]); MULADD(at[21], at[55]); MULADD(at[22], at[54]); MULADD(at[23], at[53]); MULADD(at[24], at[52]); MULADD(at[25], at[51]); MULADD(at[26], at[50]); MULADD(at[27], at[49]); MULADD(at[28], at[48]); MULADD(at[29], at[47]); MULADD(at[30], at[46]); MULADD(at[31], at[45]); + COMBA_STORE(C->dp[44]); + /* 45 */ + COMBA_FORWARD; + MULADD(at[14], at[63]); MULADD(at[15], at[62]); MULADD(at[16], at[61]); MULADD(at[17], at[60]); MULADD(at[18], at[59]); MULADD(at[19], at[58]); MULADD(at[20], at[57]); MULADD(at[21], at[56]); MULADD(at[22], at[55]); MULADD(at[23], at[54]); MULADD(at[24], at[53]); MULADD(at[25], at[52]); MULADD(at[26], at[51]); MULADD(at[27], at[50]); MULADD(at[28], at[49]); MULADD(at[29], at[48]); MULADD(at[30], at[47]); MULADD(at[31], at[46]); + COMBA_STORE(C->dp[45]); + /* 46 */ + COMBA_FORWARD; + MULADD(at[15], at[63]); MULADD(at[16], at[62]); MULADD(at[17], at[61]); MULADD(at[18], at[60]); MULADD(at[19], at[59]); MULADD(at[20], at[58]); MULADD(at[21], at[57]); MULADD(at[22], at[56]); MULADD(at[23], at[55]); MULADD(at[24], at[54]); MULADD(at[25], at[53]); MULADD(at[26], at[52]); MULADD(at[27], at[51]); MULADD(at[28], at[50]); MULADD(at[29], at[49]); MULADD(at[30], at[48]); MULADD(at[31], at[47]); + COMBA_STORE(C->dp[46]); + + /* early out at 48 digits, 48*32==1536, or two 768 bit operands */ + if (out_size <= 48) { COMBA_STORE2(C->dp[47]); C->used = 48; C->sign = A->sign ^ B->sign; fp_clamp(C); COMBA_FINI; return FP_OKAY; } + + /* 47 */ + COMBA_FORWARD; + MULADD(at[16], at[63]); MULADD(at[17], at[62]); MULADD(at[18], at[61]); MULADD(at[19], at[60]); MULADD(at[20], at[59]); MULADD(at[21], at[58]); MULADD(at[22], at[57]); MULADD(at[23], at[56]); MULADD(at[24], at[55]); MULADD(at[25], at[54]); MULADD(at[26], at[53]); MULADD(at[27], at[52]); MULADD(at[28], at[51]); MULADD(at[29], at[50]); MULADD(at[30], at[49]); MULADD(at[31], at[48]); + COMBA_STORE(C->dp[47]); + /* 48 */ + COMBA_FORWARD; + MULADD(at[17], at[63]); MULADD(at[18], at[62]); MULADD(at[19], at[61]); MULADD(at[20], at[60]); MULADD(at[21], at[59]); MULADD(at[22], at[58]); MULADD(at[23], at[57]); MULADD(at[24], at[56]); MULADD(at[25], at[55]); MULADD(at[26], at[54]); MULADD(at[27], at[53]); MULADD(at[28], at[52]); MULADD(at[29], at[51]); MULADD(at[30], at[50]); MULADD(at[31], at[49]); + COMBA_STORE(C->dp[48]); + /* 49 */ + COMBA_FORWARD; + MULADD(at[18], at[63]); MULADD(at[19], at[62]); MULADD(at[20], at[61]); MULADD(at[21], at[60]); MULADD(at[22], at[59]); MULADD(at[23], at[58]); MULADD(at[24], at[57]); MULADD(at[25], at[56]); MULADD(at[26], at[55]); MULADD(at[27], at[54]); MULADD(at[28], at[53]); MULADD(at[29], at[52]); MULADD(at[30], at[51]); MULADD(at[31], at[50]); + COMBA_STORE(C->dp[49]); + /* 50 */ + COMBA_FORWARD; + MULADD(at[19], at[63]); MULADD(at[20], at[62]); MULADD(at[21], at[61]); MULADD(at[22], at[60]); MULADD(at[23], at[59]); MULADD(at[24], at[58]); MULADD(at[25], at[57]); MULADD(at[26], at[56]); MULADD(at[27], at[55]); MULADD(at[28], at[54]); MULADD(at[29], at[53]); MULADD(at[30], at[52]); MULADD(at[31], at[51]); + COMBA_STORE(C->dp[50]); + /* 51 */ + COMBA_FORWARD; + MULADD(at[20], at[63]); MULADD(at[21], at[62]); MULADD(at[22], at[61]); MULADD(at[23], at[60]); MULADD(at[24], at[59]); MULADD(at[25], at[58]); MULADD(at[26], at[57]); MULADD(at[27], at[56]); MULADD(at[28], at[55]); MULADD(at[29], at[54]); MULADD(at[30], at[53]); MULADD(at[31], at[52]); + COMBA_STORE(C->dp[51]); + /* 52 */ + COMBA_FORWARD; + MULADD(at[21], at[63]); MULADD(at[22], at[62]); MULADD(at[23], at[61]); MULADD(at[24], at[60]); MULADD(at[25], at[59]); MULADD(at[26], at[58]); MULADD(at[27], at[57]); MULADD(at[28], at[56]); MULADD(at[29], at[55]); MULADD(at[30], at[54]); MULADD(at[31], at[53]); + COMBA_STORE(C->dp[52]); + /* 53 */ + COMBA_FORWARD; + MULADD(at[22], at[63]); MULADD(at[23], at[62]); MULADD(at[24], at[61]); MULADD(at[25], at[60]); MULADD(at[26], at[59]); MULADD(at[27], at[58]); MULADD(at[28], at[57]); MULADD(at[29], at[56]); MULADD(at[30], at[55]); MULADD(at[31], at[54]); + COMBA_STORE(C->dp[53]); + /* 54 */ + COMBA_FORWARD; + MULADD(at[23], at[63]); MULADD(at[24], at[62]); MULADD(at[25], at[61]); MULADD(at[26], at[60]); MULADD(at[27], at[59]); MULADD(at[28], at[58]); MULADD(at[29], at[57]); MULADD(at[30], at[56]); MULADD(at[31], at[55]); + COMBA_STORE(C->dp[54]); + + /* early out at 56 digits, 56*32==1792, or two 896 bit operands */ + if (out_size <= 56) { COMBA_STORE2(C->dp[55]); C->used = 56; C->sign = A->sign ^ B->sign; fp_clamp(C); COMBA_FINI; return FP_OKAY; } + + /* 55 */ + COMBA_FORWARD; + MULADD(at[24], at[63]); MULADD(at[25], at[62]); MULADD(at[26], at[61]); MULADD(at[27], at[60]); MULADD(at[28], at[59]); MULADD(at[29], at[58]); MULADD(at[30], at[57]); MULADD(at[31], at[56]); + COMBA_STORE(C->dp[55]); + /* 56 */ + COMBA_FORWARD; + MULADD(at[25], at[63]); MULADD(at[26], at[62]); MULADD(at[27], at[61]); MULADD(at[28], at[60]); MULADD(at[29], at[59]); MULADD(at[30], at[58]); MULADD(at[31], at[57]); + COMBA_STORE(C->dp[56]); + /* 57 */ + COMBA_FORWARD; + MULADD(at[26], at[63]); MULADD(at[27], at[62]); MULADD(at[28], at[61]); MULADD(at[29], at[60]); MULADD(at[30], at[59]); MULADD(at[31], at[58]); + COMBA_STORE(C->dp[57]); + /* 58 */ + COMBA_FORWARD; + MULADD(at[27], at[63]); MULADD(at[28], at[62]); MULADD(at[29], at[61]); MULADD(at[30], at[60]); MULADD(at[31], at[59]); + COMBA_STORE(C->dp[58]); + /* 59 */ + COMBA_FORWARD; + MULADD(at[28], at[63]); MULADD(at[29], at[62]); MULADD(at[30], at[61]); MULADD(at[31], at[60]); + COMBA_STORE(C->dp[59]); + /* 60 */ + COMBA_FORWARD; + MULADD(at[29], at[63]); MULADD(at[30], at[62]); MULADD(at[31], at[61]); + COMBA_STORE(C->dp[60]); + /* 61 */ + COMBA_FORWARD; + MULADD(at[30], at[63]); MULADD(at[31], at[62]); + COMBA_STORE(C->dp[61]); + /* 62 */ + COMBA_FORWARD; + MULADD(at[31], at[63]); + COMBA_STORE(C->dp[62]); + COMBA_STORE2(C->dp[63]); + C->used = 64; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; + +#ifdef WOLFSSL_SMALL_STACK + XFREE(at, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return FP_OKAY; +} +#endif diff --git a/client/wolfssl/wolfcrypt/src/fp_mul_comba_4.i b/client/wolfssl/wolfcrypt/src/fp_mul_comba_4.i new file mode 100644 index 0000000..803c615 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fp_mul_comba_4.i @@ -0,0 +1,83 @@ +/* fp_mul_comba_4.i + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef TFM_MUL4 +int fp_mul_comba4(fp_int *A, fp_int *B, fp_int *C) +{ + fp_digit c0, c1, c2; +#ifndef WOLFSSL_SMALL_STACK + fp_digit at[8]; +#else + fp_digit *at; +#endif + +#ifdef WOLFSSL_SMALL_STACK + at = (fp_digit*)XMALLOC(sizeof(fp_digit) * 8, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (at == NULL) + return FP_MEM; +#endif + + XMEMCPY(at, A->dp, 4 * sizeof(fp_digit)); + XMEMCPY(at+4, B->dp, 4 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[4]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[5]); MULADD(at[1], at[4]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[6]); MULADD(at[1], at[5]); MULADD(at[2], at[4]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[7]); MULADD(at[1], at[6]); MULADD(at[2], at[5]); MULADD(at[3], at[4]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[1], at[7]); MULADD(at[2], at[6]); MULADD(at[3], at[5]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[2], at[7]); MULADD(at[3], at[6]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[3], at[7]); + COMBA_STORE(C->dp[6]); + COMBA_STORE2(C->dp[7]); + C->used = 8; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; + +#ifdef WOLFSSL_SMALL_STACK + XFREE(at, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return FP_OKAY; +} +#endif diff --git a/client/wolfssl/wolfcrypt/src/fp_mul_comba_48.i b/client/wolfssl/wolfcrypt/src/fp_mul_comba_48.i new file mode 100644 index 0000000..0d15334 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fp_mul_comba_48.i @@ -0,0 +1,435 @@ +/* fp_mul_comba_48.i + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef TFM_MUL48 +int fp_mul_comba48(fp_int *A, fp_int *B, fp_int *C) +{ + fp_digit c0, c1, c2; +#ifndef WOLFSSL_SMALL_STACK + fp_digit at[96]; +#else + fp_digit *at; +#endif + +#ifdef WOLFSSL_SMALL_STACK + at = (fp_digit*)XMALLOC(sizeof(fp_digit) * 96, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (at == NULL) + return FP_MEM; +#endif + + XMEMCPY(at, A->dp, 48 * sizeof(fp_digit)); + XMEMCPY(at+48, B->dp, 48 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[48]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[49]); MULADD(at[1], at[48]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[50]); MULADD(at[1], at[49]); MULADD(at[2], at[48]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[51]); MULADD(at[1], at[50]); MULADD(at[2], at[49]); MULADD(at[3], at[48]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[52]); MULADD(at[1], at[51]); MULADD(at[2], at[50]); MULADD(at[3], at[49]); MULADD(at[4], at[48]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[53]); MULADD(at[1], at[52]); MULADD(at[2], at[51]); MULADD(at[3], at[50]); MULADD(at[4], at[49]); MULADD(at[5], at[48]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[54]); MULADD(at[1], at[53]); MULADD(at[2], at[52]); MULADD(at[3], at[51]); MULADD(at[4], at[50]); MULADD(at[5], at[49]); MULADD(at[6], at[48]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[55]); MULADD(at[1], at[54]); MULADD(at[2], at[53]); MULADD(at[3], at[52]); MULADD(at[4], at[51]); MULADD(at[5], at[50]); MULADD(at[6], at[49]); MULADD(at[7], at[48]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[0], at[56]); MULADD(at[1], at[55]); MULADD(at[2], at[54]); MULADD(at[3], at[53]); MULADD(at[4], at[52]); MULADD(at[5], at[51]); MULADD(at[6], at[50]); MULADD(at[7], at[49]); MULADD(at[8], at[48]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[0], at[57]); MULADD(at[1], at[56]); MULADD(at[2], at[55]); MULADD(at[3], at[54]); MULADD(at[4], at[53]); MULADD(at[5], at[52]); MULADD(at[6], at[51]); MULADD(at[7], at[50]); MULADD(at[8], at[49]); MULADD(at[9], at[48]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[0], at[58]); MULADD(at[1], at[57]); MULADD(at[2], at[56]); MULADD(at[3], at[55]); MULADD(at[4], at[54]); MULADD(at[5], at[53]); MULADD(at[6], at[52]); MULADD(at[7], at[51]); MULADD(at[8], at[50]); MULADD(at[9], at[49]); MULADD(at[10], at[48]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[0], at[59]); MULADD(at[1], at[58]); MULADD(at[2], at[57]); MULADD(at[3], at[56]); MULADD(at[4], at[55]); MULADD(at[5], at[54]); MULADD(at[6], at[53]); MULADD(at[7], at[52]); MULADD(at[8], at[51]); MULADD(at[9], at[50]); MULADD(at[10], at[49]); MULADD(at[11], at[48]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[0], at[60]); MULADD(at[1], at[59]); MULADD(at[2], at[58]); MULADD(at[3], at[57]); MULADD(at[4], at[56]); MULADD(at[5], at[55]); MULADD(at[6], at[54]); MULADD(at[7], at[53]); MULADD(at[8], at[52]); MULADD(at[9], at[51]); MULADD(at[10], at[50]); MULADD(at[11], at[49]); MULADD(at[12], at[48]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[0], at[61]); MULADD(at[1], at[60]); MULADD(at[2], at[59]); MULADD(at[3], at[58]); MULADD(at[4], at[57]); MULADD(at[5], at[56]); MULADD(at[6], at[55]); MULADD(at[7], at[54]); MULADD(at[8], at[53]); MULADD(at[9], at[52]); MULADD(at[10], at[51]); MULADD(at[11], at[50]); MULADD(at[12], at[49]); MULADD(at[13], at[48]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[0], at[62]); MULADD(at[1], at[61]); MULADD(at[2], at[60]); MULADD(at[3], at[59]); MULADD(at[4], at[58]); MULADD(at[5], at[57]); MULADD(at[6], at[56]); MULADD(at[7], at[55]); MULADD(at[8], at[54]); MULADD(at[9], at[53]); MULADD(at[10], at[52]); MULADD(at[11], at[51]); MULADD(at[12], at[50]); MULADD(at[13], at[49]); MULADD(at[14], at[48]); + COMBA_STORE(C->dp[14]); + /* 15 */ + COMBA_FORWARD; + MULADD(at[0], at[63]); MULADD(at[1], at[62]); MULADD(at[2], at[61]); MULADD(at[3], at[60]); MULADD(at[4], at[59]); MULADD(at[5], at[58]); MULADD(at[6], at[57]); MULADD(at[7], at[56]); MULADD(at[8], at[55]); MULADD(at[9], at[54]); MULADD(at[10], at[53]); MULADD(at[11], at[52]); MULADD(at[12], at[51]); MULADD(at[13], at[50]); MULADD(at[14], at[49]); MULADD(at[15], at[48]); + COMBA_STORE(C->dp[15]); + /* 16 */ + COMBA_FORWARD; + MULADD(at[0], at[64]); MULADD(at[1], at[63]); MULADD(at[2], at[62]); MULADD(at[3], at[61]); MULADD(at[4], at[60]); MULADD(at[5], at[59]); MULADD(at[6], at[58]); MULADD(at[7], at[57]); MULADD(at[8], at[56]); MULADD(at[9], at[55]); MULADD(at[10], at[54]); MULADD(at[11], at[53]); MULADD(at[12], at[52]); MULADD(at[13], at[51]); MULADD(at[14], at[50]); MULADD(at[15], at[49]); MULADD(at[16], at[48]); + COMBA_STORE(C->dp[16]); + /* 17 */ + COMBA_FORWARD; + MULADD(at[0], at[65]); MULADD(at[1], at[64]); MULADD(at[2], at[63]); MULADD(at[3], at[62]); MULADD(at[4], at[61]); MULADD(at[5], at[60]); MULADD(at[6], at[59]); MULADD(at[7], at[58]); MULADD(at[8], at[57]); MULADD(at[9], at[56]); MULADD(at[10], at[55]); MULADD(at[11], at[54]); MULADD(at[12], at[53]); MULADD(at[13], at[52]); MULADD(at[14], at[51]); MULADD(at[15], at[50]); MULADD(at[16], at[49]); MULADD(at[17], at[48]); + COMBA_STORE(C->dp[17]); + /* 18 */ + COMBA_FORWARD; + MULADD(at[0], at[66]); MULADD(at[1], at[65]); MULADD(at[2], at[64]); MULADD(at[3], at[63]); MULADD(at[4], at[62]); MULADD(at[5], at[61]); MULADD(at[6], at[60]); MULADD(at[7], at[59]); MULADD(at[8], at[58]); MULADD(at[9], at[57]); MULADD(at[10], at[56]); MULADD(at[11], at[55]); MULADD(at[12], at[54]); MULADD(at[13], at[53]); MULADD(at[14], at[52]); MULADD(at[15], at[51]); MULADD(at[16], at[50]); MULADD(at[17], at[49]); MULADD(at[18], at[48]); + COMBA_STORE(C->dp[18]); + /* 19 */ + COMBA_FORWARD; + MULADD(at[0], at[67]); MULADD(at[1], at[66]); MULADD(at[2], at[65]); MULADD(at[3], at[64]); MULADD(at[4], at[63]); MULADD(at[5], at[62]); MULADD(at[6], at[61]); MULADD(at[7], at[60]); MULADD(at[8], at[59]); MULADD(at[9], at[58]); MULADD(at[10], at[57]); MULADD(at[11], at[56]); MULADD(at[12], at[55]); MULADD(at[13], at[54]); MULADD(at[14], at[53]); MULADD(at[15], at[52]); MULADD(at[16], at[51]); MULADD(at[17], at[50]); MULADD(at[18], at[49]); MULADD(at[19], at[48]); + COMBA_STORE(C->dp[19]); + /* 20 */ + COMBA_FORWARD; + MULADD(at[0], at[68]); MULADD(at[1], at[67]); MULADD(at[2], at[66]); MULADD(at[3], at[65]); MULADD(at[4], at[64]); MULADD(at[5], at[63]); MULADD(at[6], at[62]); MULADD(at[7], at[61]); MULADD(at[8], at[60]); MULADD(at[9], at[59]); MULADD(at[10], at[58]); MULADD(at[11], at[57]); MULADD(at[12], at[56]); MULADD(at[13], at[55]); MULADD(at[14], at[54]); MULADD(at[15], at[53]); MULADD(at[16], at[52]); MULADD(at[17], at[51]); MULADD(at[18], at[50]); MULADD(at[19], at[49]); MULADD(at[20], at[48]); + COMBA_STORE(C->dp[20]); + /* 21 */ + COMBA_FORWARD; + MULADD(at[0], at[69]); MULADD(at[1], at[68]); MULADD(at[2], at[67]); MULADD(at[3], at[66]); MULADD(at[4], at[65]); MULADD(at[5], at[64]); MULADD(at[6], at[63]); MULADD(at[7], at[62]); MULADD(at[8], at[61]); MULADD(at[9], at[60]); MULADD(at[10], at[59]); MULADD(at[11], at[58]); MULADD(at[12], at[57]); MULADD(at[13], at[56]); MULADD(at[14], at[55]); MULADD(at[15], at[54]); MULADD(at[16], at[53]); MULADD(at[17], at[52]); MULADD(at[18], at[51]); MULADD(at[19], at[50]); MULADD(at[20], at[49]); MULADD(at[21], at[48]); + COMBA_STORE(C->dp[21]); + /* 22 */ + COMBA_FORWARD; + MULADD(at[0], at[70]); MULADD(at[1], at[69]); MULADD(at[2], at[68]); MULADD(at[3], at[67]); MULADD(at[4], at[66]); MULADD(at[5], at[65]); MULADD(at[6], at[64]); MULADD(at[7], at[63]); MULADD(at[8], at[62]); MULADD(at[9], at[61]); MULADD(at[10], at[60]); MULADD(at[11], at[59]); MULADD(at[12], at[58]); MULADD(at[13], at[57]); MULADD(at[14], at[56]); MULADD(at[15], at[55]); MULADD(at[16], at[54]); MULADD(at[17], at[53]); MULADD(at[18], at[52]); MULADD(at[19], at[51]); MULADD(at[20], at[50]); MULADD(at[21], at[49]); MULADD(at[22], at[48]); + COMBA_STORE(C->dp[22]); + /* 23 */ + COMBA_FORWARD; + MULADD(at[0], at[71]); MULADD(at[1], at[70]); MULADD(at[2], at[69]); MULADD(at[3], at[68]); MULADD(at[4], at[67]); MULADD(at[5], at[66]); MULADD(at[6], at[65]); MULADD(at[7], at[64]); MULADD(at[8], at[63]); MULADD(at[9], at[62]); MULADD(at[10], at[61]); MULADD(at[11], at[60]); MULADD(at[12], at[59]); MULADD(at[13], at[58]); MULADD(at[14], at[57]); MULADD(at[15], at[56]); MULADD(at[16], at[55]); MULADD(at[17], at[54]); MULADD(at[18], at[53]); MULADD(at[19], at[52]); MULADD(at[20], at[51]); MULADD(at[21], at[50]); MULADD(at[22], at[49]); MULADD(at[23], at[48]); + COMBA_STORE(C->dp[23]); + /* 24 */ + COMBA_FORWARD; + MULADD(at[0], at[72]); MULADD(at[1], at[71]); MULADD(at[2], at[70]); MULADD(at[3], at[69]); MULADD(at[4], at[68]); MULADD(at[5], at[67]); MULADD(at[6], at[66]); MULADD(at[7], at[65]); MULADD(at[8], at[64]); MULADD(at[9], at[63]); MULADD(at[10], at[62]); MULADD(at[11], at[61]); MULADD(at[12], at[60]); MULADD(at[13], at[59]); MULADD(at[14], at[58]); MULADD(at[15], at[57]); MULADD(at[16], at[56]); MULADD(at[17], at[55]); MULADD(at[18], at[54]); MULADD(at[19], at[53]); MULADD(at[20], at[52]); MULADD(at[21], at[51]); MULADD(at[22], at[50]); MULADD(at[23], at[49]); MULADD(at[24], at[48]); + COMBA_STORE(C->dp[24]); + /* 25 */ + COMBA_FORWARD; + MULADD(at[0], at[73]); MULADD(at[1], at[72]); MULADD(at[2], at[71]); MULADD(at[3], at[70]); MULADD(at[4], at[69]); MULADD(at[5], at[68]); MULADD(at[6], at[67]); MULADD(at[7], at[66]); MULADD(at[8], at[65]); MULADD(at[9], at[64]); MULADD(at[10], at[63]); MULADD(at[11], at[62]); MULADD(at[12], at[61]); MULADD(at[13], at[60]); MULADD(at[14], at[59]); MULADD(at[15], at[58]); MULADD(at[16], at[57]); MULADD(at[17], at[56]); MULADD(at[18], at[55]); MULADD(at[19], at[54]); MULADD(at[20], at[53]); MULADD(at[21], at[52]); MULADD(at[22], at[51]); MULADD(at[23], at[50]); MULADD(at[24], at[49]); MULADD(at[25], at[48]); + COMBA_STORE(C->dp[25]); + /* 26 */ + COMBA_FORWARD; + MULADD(at[0], at[74]); MULADD(at[1], at[73]); MULADD(at[2], at[72]); MULADD(at[3], at[71]); MULADD(at[4], at[70]); MULADD(at[5], at[69]); MULADD(at[6], at[68]); MULADD(at[7], at[67]); MULADD(at[8], at[66]); MULADD(at[9], at[65]); MULADD(at[10], at[64]); MULADD(at[11], at[63]); MULADD(at[12], at[62]); MULADD(at[13], at[61]); MULADD(at[14], at[60]); MULADD(at[15], at[59]); MULADD(at[16], at[58]); MULADD(at[17], at[57]); MULADD(at[18], at[56]); MULADD(at[19], at[55]); MULADD(at[20], at[54]); MULADD(at[21], at[53]); MULADD(at[22], at[52]); MULADD(at[23], at[51]); MULADD(at[24], at[50]); MULADD(at[25], at[49]); MULADD(at[26], at[48]); + COMBA_STORE(C->dp[26]); + /* 27 */ + COMBA_FORWARD; + MULADD(at[0], at[75]); MULADD(at[1], at[74]); MULADD(at[2], at[73]); MULADD(at[3], at[72]); MULADD(at[4], at[71]); MULADD(at[5], at[70]); MULADD(at[6], at[69]); MULADD(at[7], at[68]); MULADD(at[8], at[67]); MULADD(at[9], at[66]); MULADD(at[10], at[65]); MULADD(at[11], at[64]); MULADD(at[12], at[63]); MULADD(at[13], at[62]); MULADD(at[14], at[61]); MULADD(at[15], at[60]); MULADD(at[16], at[59]); MULADD(at[17], at[58]); MULADD(at[18], at[57]); MULADD(at[19], at[56]); MULADD(at[20], at[55]); MULADD(at[21], at[54]); MULADD(at[22], at[53]); MULADD(at[23], at[52]); MULADD(at[24], at[51]); MULADD(at[25], at[50]); MULADD(at[26], at[49]); MULADD(at[27], at[48]); + COMBA_STORE(C->dp[27]); + /* 28 */ + COMBA_FORWARD; + MULADD(at[0], at[76]); MULADD(at[1], at[75]); MULADD(at[2], at[74]); MULADD(at[3], at[73]); MULADD(at[4], at[72]); MULADD(at[5], at[71]); MULADD(at[6], at[70]); MULADD(at[7], at[69]); MULADD(at[8], at[68]); MULADD(at[9], at[67]); MULADD(at[10], at[66]); MULADD(at[11], at[65]); MULADD(at[12], at[64]); MULADD(at[13], at[63]); MULADD(at[14], at[62]); MULADD(at[15], at[61]); MULADD(at[16], at[60]); MULADD(at[17], at[59]); MULADD(at[18], at[58]); MULADD(at[19], at[57]); MULADD(at[20], at[56]); MULADD(at[21], at[55]); MULADD(at[22], at[54]); MULADD(at[23], at[53]); MULADD(at[24], at[52]); MULADD(at[25], at[51]); MULADD(at[26], at[50]); MULADD(at[27], at[49]); MULADD(at[28], at[48]); + COMBA_STORE(C->dp[28]); + /* 29 */ + COMBA_FORWARD; + MULADD(at[0], at[77]); MULADD(at[1], at[76]); MULADD(at[2], at[75]); MULADD(at[3], at[74]); MULADD(at[4], at[73]); MULADD(at[5], at[72]); MULADD(at[6], at[71]); MULADD(at[7], at[70]); MULADD(at[8], at[69]); MULADD(at[9], at[68]); MULADD(at[10], at[67]); MULADD(at[11], at[66]); MULADD(at[12], at[65]); MULADD(at[13], at[64]); MULADD(at[14], at[63]); MULADD(at[15], at[62]); MULADD(at[16], at[61]); MULADD(at[17], at[60]); MULADD(at[18], at[59]); MULADD(at[19], at[58]); MULADD(at[20], at[57]); MULADD(at[21], at[56]); MULADD(at[22], at[55]); MULADD(at[23], at[54]); MULADD(at[24], at[53]); MULADD(at[25], at[52]); MULADD(at[26], at[51]); MULADD(at[27], at[50]); MULADD(at[28], at[49]); MULADD(at[29], at[48]); + COMBA_STORE(C->dp[29]); + /* 30 */ + COMBA_FORWARD; + MULADD(at[0], at[78]); MULADD(at[1], at[77]); MULADD(at[2], at[76]); MULADD(at[3], at[75]); MULADD(at[4], at[74]); MULADD(at[5], at[73]); MULADD(at[6], at[72]); MULADD(at[7], at[71]); MULADD(at[8], at[70]); MULADD(at[9], at[69]); MULADD(at[10], at[68]); MULADD(at[11], at[67]); MULADD(at[12], at[66]); MULADD(at[13], at[65]); MULADD(at[14], at[64]); MULADD(at[15], at[63]); MULADD(at[16], at[62]); MULADD(at[17], at[61]); MULADD(at[18], at[60]); MULADD(at[19], at[59]); MULADD(at[20], at[58]); MULADD(at[21], at[57]); MULADD(at[22], at[56]); MULADD(at[23], at[55]); MULADD(at[24], at[54]); MULADD(at[25], at[53]); MULADD(at[26], at[52]); MULADD(at[27], at[51]); MULADD(at[28], at[50]); MULADD(at[29], at[49]); MULADD(at[30], at[48]); + COMBA_STORE(C->dp[30]); + /* 31 */ + COMBA_FORWARD; + MULADD(at[0], at[79]); MULADD(at[1], at[78]); MULADD(at[2], at[77]); MULADD(at[3], at[76]); MULADD(at[4], at[75]); MULADD(at[5], at[74]); MULADD(at[6], at[73]); MULADD(at[7], at[72]); MULADD(at[8], at[71]); MULADD(at[9], at[70]); MULADD(at[10], at[69]); MULADD(at[11], at[68]); MULADD(at[12], at[67]); MULADD(at[13], at[66]); MULADD(at[14], at[65]); MULADD(at[15], at[64]); MULADD(at[16], at[63]); MULADD(at[17], at[62]); MULADD(at[18], at[61]); MULADD(at[19], at[60]); MULADD(at[20], at[59]); MULADD(at[21], at[58]); MULADD(at[22], at[57]); MULADD(at[23], at[56]); MULADD(at[24], at[55]); MULADD(at[25], at[54]); MULADD(at[26], at[53]); MULADD(at[27], at[52]); MULADD(at[28], at[51]); MULADD(at[29], at[50]); MULADD(at[30], at[49]); MULADD(at[31], at[48]); + COMBA_STORE(C->dp[31]); + /* 32 */ + COMBA_FORWARD; + MULADD(at[0], at[80]); MULADD(at[1], at[79]); MULADD(at[2], at[78]); MULADD(at[3], at[77]); MULADD(at[4], at[76]); MULADD(at[5], at[75]); MULADD(at[6], at[74]); MULADD(at[7], at[73]); MULADD(at[8], at[72]); MULADD(at[9], at[71]); MULADD(at[10], at[70]); MULADD(at[11], at[69]); MULADD(at[12], at[68]); MULADD(at[13], at[67]); MULADD(at[14], at[66]); MULADD(at[15], at[65]); MULADD(at[16], at[64]); MULADD(at[17], at[63]); MULADD(at[18], at[62]); MULADD(at[19], at[61]); MULADD(at[20], at[60]); MULADD(at[21], at[59]); MULADD(at[22], at[58]); MULADD(at[23], at[57]); MULADD(at[24], at[56]); MULADD(at[25], at[55]); MULADD(at[26], at[54]); MULADD(at[27], at[53]); MULADD(at[28], at[52]); MULADD(at[29], at[51]); MULADD(at[30], at[50]); MULADD(at[31], at[49]); MULADD(at[32], at[48]); + COMBA_STORE(C->dp[32]); + /* 33 */ + COMBA_FORWARD; + MULADD(at[0], at[81]); MULADD(at[1], at[80]); MULADD(at[2], at[79]); MULADD(at[3], at[78]); MULADD(at[4], at[77]); MULADD(at[5], at[76]); MULADD(at[6], at[75]); MULADD(at[7], at[74]); MULADD(at[8], at[73]); MULADD(at[9], at[72]); MULADD(at[10], at[71]); MULADD(at[11], at[70]); MULADD(at[12], at[69]); MULADD(at[13], at[68]); MULADD(at[14], at[67]); MULADD(at[15], at[66]); MULADD(at[16], at[65]); MULADD(at[17], at[64]); MULADD(at[18], at[63]); MULADD(at[19], at[62]); MULADD(at[20], at[61]); MULADD(at[21], at[60]); MULADD(at[22], at[59]); MULADD(at[23], at[58]); MULADD(at[24], at[57]); MULADD(at[25], at[56]); MULADD(at[26], at[55]); MULADD(at[27], at[54]); MULADD(at[28], at[53]); MULADD(at[29], at[52]); MULADD(at[30], at[51]); MULADD(at[31], at[50]); MULADD(at[32], at[49]); MULADD(at[33], at[48]); + COMBA_STORE(C->dp[33]); + /* 34 */ + COMBA_FORWARD; + MULADD(at[0], at[82]); MULADD(at[1], at[81]); MULADD(at[2], at[80]); MULADD(at[3], at[79]); MULADD(at[4], at[78]); MULADD(at[5], at[77]); MULADD(at[6], at[76]); MULADD(at[7], at[75]); MULADD(at[8], at[74]); MULADD(at[9], at[73]); MULADD(at[10], at[72]); MULADD(at[11], at[71]); MULADD(at[12], at[70]); MULADD(at[13], at[69]); MULADD(at[14], at[68]); MULADD(at[15], at[67]); MULADD(at[16], at[66]); MULADD(at[17], at[65]); MULADD(at[18], at[64]); MULADD(at[19], at[63]); MULADD(at[20], at[62]); MULADD(at[21], at[61]); MULADD(at[22], at[60]); MULADD(at[23], at[59]); MULADD(at[24], at[58]); MULADD(at[25], at[57]); MULADD(at[26], at[56]); MULADD(at[27], at[55]); MULADD(at[28], at[54]); MULADD(at[29], at[53]); MULADD(at[30], at[52]); MULADD(at[31], at[51]); MULADD(at[32], at[50]); MULADD(at[33], at[49]); MULADD(at[34], at[48]); + COMBA_STORE(C->dp[34]); + /* 35 */ + COMBA_FORWARD; + MULADD(at[0], at[83]); MULADD(at[1], at[82]); MULADD(at[2], at[81]); MULADD(at[3], at[80]); MULADD(at[4], at[79]); MULADD(at[5], at[78]); MULADD(at[6], at[77]); MULADD(at[7], at[76]); MULADD(at[8], at[75]); MULADD(at[9], at[74]); MULADD(at[10], at[73]); MULADD(at[11], at[72]); MULADD(at[12], at[71]); MULADD(at[13], at[70]); MULADD(at[14], at[69]); MULADD(at[15], at[68]); MULADD(at[16], at[67]); MULADD(at[17], at[66]); MULADD(at[18], at[65]); MULADD(at[19], at[64]); MULADD(at[20], at[63]); MULADD(at[21], at[62]); MULADD(at[22], at[61]); MULADD(at[23], at[60]); MULADD(at[24], at[59]); MULADD(at[25], at[58]); MULADD(at[26], at[57]); MULADD(at[27], at[56]); MULADD(at[28], at[55]); MULADD(at[29], at[54]); MULADD(at[30], at[53]); MULADD(at[31], at[52]); MULADD(at[32], at[51]); MULADD(at[33], at[50]); MULADD(at[34], at[49]); MULADD(at[35], at[48]); + COMBA_STORE(C->dp[35]); + /* 36 */ + COMBA_FORWARD; + MULADD(at[0], at[84]); MULADD(at[1], at[83]); MULADD(at[2], at[82]); MULADD(at[3], at[81]); MULADD(at[4], at[80]); MULADD(at[5], at[79]); MULADD(at[6], at[78]); MULADD(at[7], at[77]); MULADD(at[8], at[76]); MULADD(at[9], at[75]); MULADD(at[10], at[74]); MULADD(at[11], at[73]); MULADD(at[12], at[72]); MULADD(at[13], at[71]); MULADD(at[14], at[70]); MULADD(at[15], at[69]); MULADD(at[16], at[68]); MULADD(at[17], at[67]); MULADD(at[18], at[66]); MULADD(at[19], at[65]); MULADD(at[20], at[64]); MULADD(at[21], at[63]); MULADD(at[22], at[62]); MULADD(at[23], at[61]); MULADD(at[24], at[60]); MULADD(at[25], at[59]); MULADD(at[26], at[58]); MULADD(at[27], at[57]); MULADD(at[28], at[56]); MULADD(at[29], at[55]); MULADD(at[30], at[54]); MULADD(at[31], at[53]); MULADD(at[32], at[52]); MULADD(at[33], at[51]); MULADD(at[34], at[50]); MULADD(at[35], at[49]); MULADD(at[36], at[48]); + COMBA_STORE(C->dp[36]); + /* 37 */ + COMBA_FORWARD; + MULADD(at[0], at[85]); MULADD(at[1], at[84]); MULADD(at[2], at[83]); MULADD(at[3], at[82]); MULADD(at[4], at[81]); MULADD(at[5], at[80]); MULADD(at[6], at[79]); MULADD(at[7], at[78]); MULADD(at[8], at[77]); MULADD(at[9], at[76]); MULADD(at[10], at[75]); MULADD(at[11], at[74]); MULADD(at[12], at[73]); MULADD(at[13], at[72]); MULADD(at[14], at[71]); MULADD(at[15], at[70]); MULADD(at[16], at[69]); MULADD(at[17], at[68]); MULADD(at[18], at[67]); MULADD(at[19], at[66]); MULADD(at[20], at[65]); MULADD(at[21], at[64]); MULADD(at[22], at[63]); MULADD(at[23], at[62]); MULADD(at[24], at[61]); MULADD(at[25], at[60]); MULADD(at[26], at[59]); MULADD(at[27], at[58]); MULADD(at[28], at[57]); MULADD(at[29], at[56]); MULADD(at[30], at[55]); MULADD(at[31], at[54]); MULADD(at[32], at[53]); MULADD(at[33], at[52]); MULADD(at[34], at[51]); MULADD(at[35], at[50]); MULADD(at[36], at[49]); MULADD(at[37], at[48]); + COMBA_STORE(C->dp[37]); + /* 38 */ + COMBA_FORWARD; + MULADD(at[0], at[86]); MULADD(at[1], at[85]); MULADD(at[2], at[84]); MULADD(at[3], at[83]); MULADD(at[4], at[82]); MULADD(at[5], at[81]); MULADD(at[6], at[80]); MULADD(at[7], at[79]); MULADD(at[8], at[78]); MULADD(at[9], at[77]); MULADD(at[10], at[76]); MULADD(at[11], at[75]); MULADD(at[12], at[74]); MULADD(at[13], at[73]); MULADD(at[14], at[72]); MULADD(at[15], at[71]); MULADD(at[16], at[70]); MULADD(at[17], at[69]); MULADD(at[18], at[68]); MULADD(at[19], at[67]); MULADD(at[20], at[66]); MULADD(at[21], at[65]); MULADD(at[22], at[64]); MULADD(at[23], at[63]); MULADD(at[24], at[62]); MULADD(at[25], at[61]); MULADD(at[26], at[60]); MULADD(at[27], at[59]); MULADD(at[28], at[58]); MULADD(at[29], at[57]); MULADD(at[30], at[56]); MULADD(at[31], at[55]); MULADD(at[32], at[54]); MULADD(at[33], at[53]); MULADD(at[34], at[52]); MULADD(at[35], at[51]); MULADD(at[36], at[50]); MULADD(at[37], at[49]); MULADD(at[38], at[48]); + COMBA_STORE(C->dp[38]); + /* 39 */ + COMBA_FORWARD; + MULADD(at[0], at[87]); MULADD(at[1], at[86]); MULADD(at[2], at[85]); MULADD(at[3], at[84]); MULADD(at[4], at[83]); MULADD(at[5], at[82]); MULADD(at[6], at[81]); MULADD(at[7], at[80]); MULADD(at[8], at[79]); MULADD(at[9], at[78]); MULADD(at[10], at[77]); MULADD(at[11], at[76]); MULADD(at[12], at[75]); MULADD(at[13], at[74]); MULADD(at[14], at[73]); MULADD(at[15], at[72]); MULADD(at[16], at[71]); MULADD(at[17], at[70]); MULADD(at[18], at[69]); MULADD(at[19], at[68]); MULADD(at[20], at[67]); MULADD(at[21], at[66]); MULADD(at[22], at[65]); MULADD(at[23], at[64]); MULADD(at[24], at[63]); MULADD(at[25], at[62]); MULADD(at[26], at[61]); MULADD(at[27], at[60]); MULADD(at[28], at[59]); MULADD(at[29], at[58]); MULADD(at[30], at[57]); MULADD(at[31], at[56]); MULADD(at[32], at[55]); MULADD(at[33], at[54]); MULADD(at[34], at[53]); MULADD(at[35], at[52]); MULADD(at[36], at[51]); MULADD(at[37], at[50]); MULADD(at[38], at[49]); MULADD(at[39], at[48]); + COMBA_STORE(C->dp[39]); + /* 40 */ + COMBA_FORWARD; + MULADD(at[0], at[88]); MULADD(at[1], at[87]); MULADD(at[2], at[86]); MULADD(at[3], at[85]); MULADD(at[4], at[84]); MULADD(at[5], at[83]); MULADD(at[6], at[82]); MULADD(at[7], at[81]); MULADD(at[8], at[80]); MULADD(at[9], at[79]); MULADD(at[10], at[78]); MULADD(at[11], at[77]); MULADD(at[12], at[76]); MULADD(at[13], at[75]); MULADD(at[14], at[74]); MULADD(at[15], at[73]); MULADD(at[16], at[72]); MULADD(at[17], at[71]); MULADD(at[18], at[70]); MULADD(at[19], at[69]); MULADD(at[20], at[68]); MULADD(at[21], at[67]); MULADD(at[22], at[66]); MULADD(at[23], at[65]); MULADD(at[24], at[64]); MULADD(at[25], at[63]); MULADD(at[26], at[62]); MULADD(at[27], at[61]); MULADD(at[28], at[60]); MULADD(at[29], at[59]); MULADD(at[30], at[58]); MULADD(at[31], at[57]); MULADD(at[32], at[56]); MULADD(at[33], at[55]); MULADD(at[34], at[54]); MULADD(at[35], at[53]); MULADD(at[36], at[52]); MULADD(at[37], at[51]); MULADD(at[38], at[50]); MULADD(at[39], at[49]); MULADD(at[40], at[48]); + COMBA_STORE(C->dp[40]); + /* 41 */ + COMBA_FORWARD; + MULADD(at[0], at[89]); MULADD(at[1], at[88]); MULADD(at[2], at[87]); MULADD(at[3], at[86]); MULADD(at[4], at[85]); MULADD(at[5], at[84]); MULADD(at[6], at[83]); MULADD(at[7], at[82]); MULADD(at[8], at[81]); MULADD(at[9], at[80]); MULADD(at[10], at[79]); MULADD(at[11], at[78]); MULADD(at[12], at[77]); MULADD(at[13], at[76]); MULADD(at[14], at[75]); MULADD(at[15], at[74]); MULADD(at[16], at[73]); MULADD(at[17], at[72]); MULADD(at[18], at[71]); MULADD(at[19], at[70]); MULADD(at[20], at[69]); MULADD(at[21], at[68]); MULADD(at[22], at[67]); MULADD(at[23], at[66]); MULADD(at[24], at[65]); MULADD(at[25], at[64]); MULADD(at[26], at[63]); MULADD(at[27], at[62]); MULADD(at[28], at[61]); MULADD(at[29], at[60]); MULADD(at[30], at[59]); MULADD(at[31], at[58]); MULADD(at[32], at[57]); MULADD(at[33], at[56]); MULADD(at[34], at[55]); MULADD(at[35], at[54]); MULADD(at[36], at[53]); MULADD(at[37], at[52]); MULADD(at[38], at[51]); MULADD(at[39], at[50]); MULADD(at[40], at[49]); MULADD(at[41], at[48]); + COMBA_STORE(C->dp[41]); + /* 42 */ + COMBA_FORWARD; + MULADD(at[0], at[90]); MULADD(at[1], at[89]); MULADD(at[2], at[88]); MULADD(at[3], at[87]); MULADD(at[4], at[86]); MULADD(at[5], at[85]); MULADD(at[6], at[84]); MULADD(at[7], at[83]); MULADD(at[8], at[82]); MULADD(at[9], at[81]); MULADD(at[10], at[80]); MULADD(at[11], at[79]); MULADD(at[12], at[78]); MULADD(at[13], at[77]); MULADD(at[14], at[76]); MULADD(at[15], at[75]); MULADD(at[16], at[74]); MULADD(at[17], at[73]); MULADD(at[18], at[72]); MULADD(at[19], at[71]); MULADD(at[20], at[70]); MULADD(at[21], at[69]); MULADD(at[22], at[68]); MULADD(at[23], at[67]); MULADD(at[24], at[66]); MULADD(at[25], at[65]); MULADD(at[26], at[64]); MULADD(at[27], at[63]); MULADD(at[28], at[62]); MULADD(at[29], at[61]); MULADD(at[30], at[60]); MULADD(at[31], at[59]); MULADD(at[32], at[58]); MULADD(at[33], at[57]); MULADD(at[34], at[56]); MULADD(at[35], at[55]); MULADD(at[36], at[54]); MULADD(at[37], at[53]); MULADD(at[38], at[52]); MULADD(at[39], at[51]); MULADD(at[40], at[50]); MULADD(at[41], at[49]); MULADD(at[42], at[48]); + COMBA_STORE(C->dp[42]); + /* 43 */ + COMBA_FORWARD; + MULADD(at[0], at[91]); MULADD(at[1], at[90]); MULADD(at[2], at[89]); MULADD(at[3], at[88]); MULADD(at[4], at[87]); MULADD(at[5], at[86]); MULADD(at[6], at[85]); MULADD(at[7], at[84]); MULADD(at[8], at[83]); MULADD(at[9], at[82]); MULADD(at[10], at[81]); MULADD(at[11], at[80]); MULADD(at[12], at[79]); MULADD(at[13], at[78]); MULADD(at[14], at[77]); MULADD(at[15], at[76]); MULADD(at[16], at[75]); MULADD(at[17], at[74]); MULADD(at[18], at[73]); MULADD(at[19], at[72]); MULADD(at[20], at[71]); MULADD(at[21], at[70]); MULADD(at[22], at[69]); MULADD(at[23], at[68]); MULADD(at[24], at[67]); MULADD(at[25], at[66]); MULADD(at[26], at[65]); MULADD(at[27], at[64]); MULADD(at[28], at[63]); MULADD(at[29], at[62]); MULADD(at[30], at[61]); MULADD(at[31], at[60]); MULADD(at[32], at[59]); MULADD(at[33], at[58]); MULADD(at[34], at[57]); MULADD(at[35], at[56]); MULADD(at[36], at[55]); MULADD(at[37], at[54]); MULADD(at[38], at[53]); MULADD(at[39], at[52]); MULADD(at[40], at[51]); MULADD(at[41], at[50]); MULADD(at[42], at[49]); MULADD(at[43], at[48]); + COMBA_STORE(C->dp[43]); + /* 44 */ + COMBA_FORWARD; + MULADD(at[0], at[92]); MULADD(at[1], at[91]); MULADD(at[2], at[90]); MULADD(at[3], at[89]); MULADD(at[4], at[88]); MULADD(at[5], at[87]); MULADD(at[6], at[86]); MULADD(at[7], at[85]); MULADD(at[8], at[84]); MULADD(at[9], at[83]); MULADD(at[10], at[82]); MULADD(at[11], at[81]); MULADD(at[12], at[80]); MULADD(at[13], at[79]); MULADD(at[14], at[78]); MULADD(at[15], at[77]); MULADD(at[16], at[76]); MULADD(at[17], at[75]); MULADD(at[18], at[74]); MULADD(at[19], at[73]); MULADD(at[20], at[72]); MULADD(at[21], at[71]); MULADD(at[22], at[70]); MULADD(at[23], at[69]); MULADD(at[24], at[68]); MULADD(at[25], at[67]); MULADD(at[26], at[66]); MULADD(at[27], at[65]); MULADD(at[28], at[64]); MULADD(at[29], at[63]); MULADD(at[30], at[62]); MULADD(at[31], at[61]); MULADD(at[32], at[60]); MULADD(at[33], at[59]); MULADD(at[34], at[58]); MULADD(at[35], at[57]); MULADD(at[36], at[56]); MULADD(at[37], at[55]); MULADD(at[38], at[54]); MULADD(at[39], at[53]); MULADD(at[40], at[52]); MULADD(at[41], at[51]); MULADD(at[42], at[50]); MULADD(at[43], at[49]); MULADD(at[44], at[48]); + COMBA_STORE(C->dp[44]); + /* 45 */ + COMBA_FORWARD; + MULADD(at[0], at[93]); MULADD(at[1], at[92]); MULADD(at[2], at[91]); MULADD(at[3], at[90]); MULADD(at[4], at[89]); MULADD(at[5], at[88]); MULADD(at[6], at[87]); MULADD(at[7], at[86]); MULADD(at[8], at[85]); MULADD(at[9], at[84]); MULADD(at[10], at[83]); MULADD(at[11], at[82]); MULADD(at[12], at[81]); MULADD(at[13], at[80]); MULADD(at[14], at[79]); MULADD(at[15], at[78]); MULADD(at[16], at[77]); MULADD(at[17], at[76]); MULADD(at[18], at[75]); MULADD(at[19], at[74]); MULADD(at[20], at[73]); MULADD(at[21], at[72]); MULADD(at[22], at[71]); MULADD(at[23], at[70]); MULADD(at[24], at[69]); MULADD(at[25], at[68]); MULADD(at[26], at[67]); MULADD(at[27], at[66]); MULADD(at[28], at[65]); MULADD(at[29], at[64]); MULADD(at[30], at[63]); MULADD(at[31], at[62]); MULADD(at[32], at[61]); MULADD(at[33], at[60]); MULADD(at[34], at[59]); MULADD(at[35], at[58]); MULADD(at[36], at[57]); MULADD(at[37], at[56]); MULADD(at[38], at[55]); MULADD(at[39], at[54]); MULADD(at[40], at[53]); MULADD(at[41], at[52]); MULADD(at[42], at[51]); MULADD(at[43], at[50]); MULADD(at[44], at[49]); MULADD(at[45], at[48]); + COMBA_STORE(C->dp[45]); + /* 46 */ + COMBA_FORWARD; + MULADD(at[0], at[94]); MULADD(at[1], at[93]); MULADD(at[2], at[92]); MULADD(at[3], at[91]); MULADD(at[4], at[90]); MULADD(at[5], at[89]); MULADD(at[6], at[88]); MULADD(at[7], at[87]); MULADD(at[8], at[86]); MULADD(at[9], at[85]); MULADD(at[10], at[84]); MULADD(at[11], at[83]); MULADD(at[12], at[82]); MULADD(at[13], at[81]); MULADD(at[14], at[80]); MULADD(at[15], at[79]); MULADD(at[16], at[78]); MULADD(at[17], at[77]); MULADD(at[18], at[76]); MULADD(at[19], at[75]); MULADD(at[20], at[74]); MULADD(at[21], at[73]); MULADD(at[22], at[72]); MULADD(at[23], at[71]); MULADD(at[24], at[70]); MULADD(at[25], at[69]); MULADD(at[26], at[68]); MULADD(at[27], at[67]); MULADD(at[28], at[66]); MULADD(at[29], at[65]); MULADD(at[30], at[64]); MULADD(at[31], at[63]); MULADD(at[32], at[62]); MULADD(at[33], at[61]); MULADD(at[34], at[60]); MULADD(at[35], at[59]); MULADD(at[36], at[58]); MULADD(at[37], at[57]); MULADD(at[38], at[56]); MULADD(at[39], at[55]); MULADD(at[40], at[54]); MULADD(at[41], at[53]); MULADD(at[42], at[52]); MULADD(at[43], at[51]); MULADD(at[44], at[50]); MULADD(at[45], at[49]); MULADD(at[46], at[48]); + COMBA_STORE(C->dp[46]); + /* 47 */ + COMBA_FORWARD; + MULADD(at[0], at[95]); MULADD(at[1], at[94]); MULADD(at[2], at[93]); MULADD(at[3], at[92]); MULADD(at[4], at[91]); MULADD(at[5], at[90]); MULADD(at[6], at[89]); MULADD(at[7], at[88]); MULADD(at[8], at[87]); MULADD(at[9], at[86]); MULADD(at[10], at[85]); MULADD(at[11], at[84]); MULADD(at[12], at[83]); MULADD(at[13], at[82]); MULADD(at[14], at[81]); MULADD(at[15], at[80]); MULADD(at[16], at[79]); MULADD(at[17], at[78]); MULADD(at[18], at[77]); MULADD(at[19], at[76]); MULADD(at[20], at[75]); MULADD(at[21], at[74]); MULADD(at[22], at[73]); MULADD(at[23], at[72]); MULADD(at[24], at[71]); MULADD(at[25], at[70]); MULADD(at[26], at[69]); MULADD(at[27], at[68]); MULADD(at[28], at[67]); MULADD(at[29], at[66]); MULADD(at[30], at[65]); MULADD(at[31], at[64]); MULADD(at[32], at[63]); MULADD(at[33], at[62]); MULADD(at[34], at[61]); MULADD(at[35], at[60]); MULADD(at[36], at[59]); MULADD(at[37], at[58]); MULADD(at[38], at[57]); MULADD(at[39], at[56]); MULADD(at[40], at[55]); MULADD(at[41], at[54]); MULADD(at[42], at[53]); MULADD(at[43], at[52]); MULADD(at[44], at[51]); MULADD(at[45], at[50]); MULADD(at[46], at[49]); MULADD(at[47], at[48]); + COMBA_STORE(C->dp[47]); + /* 48 */ + COMBA_FORWARD; + MULADD(at[1], at[95]); MULADD(at[2], at[94]); MULADD(at[3], at[93]); MULADD(at[4], at[92]); MULADD(at[5], at[91]); MULADD(at[6], at[90]); MULADD(at[7], at[89]); MULADD(at[8], at[88]); MULADD(at[9], at[87]); MULADD(at[10], at[86]); MULADD(at[11], at[85]); MULADD(at[12], at[84]); MULADD(at[13], at[83]); MULADD(at[14], at[82]); MULADD(at[15], at[81]); MULADD(at[16], at[80]); MULADD(at[17], at[79]); MULADD(at[18], at[78]); MULADD(at[19], at[77]); MULADD(at[20], at[76]); MULADD(at[21], at[75]); MULADD(at[22], at[74]); MULADD(at[23], at[73]); MULADD(at[24], at[72]); MULADD(at[25], at[71]); MULADD(at[26], at[70]); MULADD(at[27], at[69]); MULADD(at[28], at[68]); MULADD(at[29], at[67]); MULADD(at[30], at[66]); MULADD(at[31], at[65]); MULADD(at[32], at[64]); MULADD(at[33], at[63]); MULADD(at[34], at[62]); MULADD(at[35], at[61]); MULADD(at[36], at[60]); MULADD(at[37], at[59]); MULADD(at[38], at[58]); MULADD(at[39], at[57]); MULADD(at[40], at[56]); MULADD(at[41], at[55]); MULADD(at[42], at[54]); MULADD(at[43], at[53]); MULADD(at[44], at[52]); MULADD(at[45], at[51]); MULADD(at[46], at[50]); MULADD(at[47], at[49]); + COMBA_STORE(C->dp[48]); + /* 49 */ + COMBA_FORWARD; + MULADD(at[2], at[95]); MULADD(at[3], at[94]); MULADD(at[4], at[93]); MULADD(at[5], at[92]); MULADD(at[6], at[91]); MULADD(at[7], at[90]); MULADD(at[8], at[89]); MULADD(at[9], at[88]); MULADD(at[10], at[87]); MULADD(at[11], at[86]); MULADD(at[12], at[85]); MULADD(at[13], at[84]); MULADD(at[14], at[83]); MULADD(at[15], at[82]); MULADD(at[16], at[81]); MULADD(at[17], at[80]); MULADD(at[18], at[79]); MULADD(at[19], at[78]); MULADD(at[20], at[77]); MULADD(at[21], at[76]); MULADD(at[22], at[75]); MULADD(at[23], at[74]); MULADD(at[24], at[73]); MULADD(at[25], at[72]); MULADD(at[26], at[71]); MULADD(at[27], at[70]); MULADD(at[28], at[69]); MULADD(at[29], at[68]); MULADD(at[30], at[67]); MULADD(at[31], at[66]); MULADD(at[32], at[65]); MULADD(at[33], at[64]); MULADD(at[34], at[63]); MULADD(at[35], at[62]); MULADD(at[36], at[61]); MULADD(at[37], at[60]); MULADD(at[38], at[59]); MULADD(at[39], at[58]); MULADD(at[40], at[57]); MULADD(at[41], at[56]); MULADD(at[42], at[55]); MULADD(at[43], at[54]); MULADD(at[44], at[53]); MULADD(at[45], at[52]); MULADD(at[46], at[51]); MULADD(at[47], at[50]); + COMBA_STORE(C->dp[49]); + /* 50 */ + COMBA_FORWARD; + MULADD(at[3], at[95]); MULADD(at[4], at[94]); MULADD(at[5], at[93]); MULADD(at[6], at[92]); MULADD(at[7], at[91]); MULADD(at[8], at[90]); MULADD(at[9], at[89]); MULADD(at[10], at[88]); MULADD(at[11], at[87]); MULADD(at[12], at[86]); MULADD(at[13], at[85]); MULADD(at[14], at[84]); MULADD(at[15], at[83]); MULADD(at[16], at[82]); MULADD(at[17], at[81]); MULADD(at[18], at[80]); MULADD(at[19], at[79]); MULADD(at[20], at[78]); MULADD(at[21], at[77]); MULADD(at[22], at[76]); MULADD(at[23], at[75]); MULADD(at[24], at[74]); MULADD(at[25], at[73]); MULADD(at[26], at[72]); MULADD(at[27], at[71]); MULADD(at[28], at[70]); MULADD(at[29], at[69]); MULADD(at[30], at[68]); MULADD(at[31], at[67]); MULADD(at[32], at[66]); MULADD(at[33], at[65]); MULADD(at[34], at[64]); MULADD(at[35], at[63]); MULADD(at[36], at[62]); MULADD(at[37], at[61]); MULADD(at[38], at[60]); MULADD(at[39], at[59]); MULADD(at[40], at[58]); MULADD(at[41], at[57]); MULADD(at[42], at[56]); MULADD(at[43], at[55]); MULADD(at[44], at[54]); MULADD(at[45], at[53]); MULADD(at[46], at[52]); MULADD(at[47], at[51]); + COMBA_STORE(C->dp[50]); + /* 51 */ + COMBA_FORWARD; + MULADD(at[4], at[95]); MULADD(at[5], at[94]); MULADD(at[6], at[93]); MULADD(at[7], at[92]); MULADD(at[8], at[91]); MULADD(at[9], at[90]); MULADD(at[10], at[89]); MULADD(at[11], at[88]); MULADD(at[12], at[87]); MULADD(at[13], at[86]); MULADD(at[14], at[85]); MULADD(at[15], at[84]); MULADD(at[16], at[83]); MULADD(at[17], at[82]); MULADD(at[18], at[81]); MULADD(at[19], at[80]); MULADD(at[20], at[79]); MULADD(at[21], at[78]); MULADD(at[22], at[77]); MULADD(at[23], at[76]); MULADD(at[24], at[75]); MULADD(at[25], at[74]); MULADD(at[26], at[73]); MULADD(at[27], at[72]); MULADD(at[28], at[71]); MULADD(at[29], at[70]); MULADD(at[30], at[69]); MULADD(at[31], at[68]); MULADD(at[32], at[67]); MULADD(at[33], at[66]); MULADD(at[34], at[65]); MULADD(at[35], at[64]); MULADD(at[36], at[63]); MULADD(at[37], at[62]); MULADD(at[38], at[61]); MULADD(at[39], at[60]); MULADD(at[40], at[59]); MULADD(at[41], at[58]); MULADD(at[42], at[57]); MULADD(at[43], at[56]); MULADD(at[44], at[55]); MULADD(at[45], at[54]); MULADD(at[46], at[53]); MULADD(at[47], at[52]); + COMBA_STORE(C->dp[51]); + /* 52 */ + COMBA_FORWARD; + MULADD(at[5], at[95]); MULADD(at[6], at[94]); MULADD(at[7], at[93]); MULADD(at[8], at[92]); MULADD(at[9], at[91]); MULADD(at[10], at[90]); MULADD(at[11], at[89]); MULADD(at[12], at[88]); MULADD(at[13], at[87]); MULADD(at[14], at[86]); MULADD(at[15], at[85]); MULADD(at[16], at[84]); MULADD(at[17], at[83]); MULADD(at[18], at[82]); MULADD(at[19], at[81]); MULADD(at[20], at[80]); MULADD(at[21], at[79]); MULADD(at[22], at[78]); MULADD(at[23], at[77]); MULADD(at[24], at[76]); MULADD(at[25], at[75]); MULADD(at[26], at[74]); MULADD(at[27], at[73]); MULADD(at[28], at[72]); MULADD(at[29], at[71]); MULADD(at[30], at[70]); MULADD(at[31], at[69]); MULADD(at[32], at[68]); MULADD(at[33], at[67]); MULADD(at[34], at[66]); MULADD(at[35], at[65]); MULADD(at[36], at[64]); MULADD(at[37], at[63]); MULADD(at[38], at[62]); MULADD(at[39], at[61]); MULADD(at[40], at[60]); MULADD(at[41], at[59]); MULADD(at[42], at[58]); MULADD(at[43], at[57]); MULADD(at[44], at[56]); MULADD(at[45], at[55]); MULADD(at[46], at[54]); MULADD(at[47], at[53]); + COMBA_STORE(C->dp[52]); + /* 53 */ + COMBA_FORWARD; + MULADD(at[6], at[95]); MULADD(at[7], at[94]); MULADD(at[8], at[93]); MULADD(at[9], at[92]); MULADD(at[10], at[91]); MULADD(at[11], at[90]); MULADD(at[12], at[89]); MULADD(at[13], at[88]); MULADD(at[14], at[87]); MULADD(at[15], at[86]); MULADD(at[16], at[85]); MULADD(at[17], at[84]); MULADD(at[18], at[83]); MULADD(at[19], at[82]); MULADD(at[20], at[81]); MULADD(at[21], at[80]); MULADD(at[22], at[79]); MULADD(at[23], at[78]); MULADD(at[24], at[77]); MULADD(at[25], at[76]); MULADD(at[26], at[75]); MULADD(at[27], at[74]); MULADD(at[28], at[73]); MULADD(at[29], at[72]); MULADD(at[30], at[71]); MULADD(at[31], at[70]); MULADD(at[32], at[69]); MULADD(at[33], at[68]); MULADD(at[34], at[67]); MULADD(at[35], at[66]); MULADD(at[36], at[65]); MULADD(at[37], at[64]); MULADD(at[38], at[63]); MULADD(at[39], at[62]); MULADD(at[40], at[61]); MULADD(at[41], at[60]); MULADD(at[42], at[59]); MULADD(at[43], at[58]); MULADD(at[44], at[57]); MULADD(at[45], at[56]); MULADD(at[46], at[55]); MULADD(at[47], at[54]); + COMBA_STORE(C->dp[53]); + /* 54 */ + COMBA_FORWARD; + MULADD(at[7], at[95]); MULADD(at[8], at[94]); MULADD(at[9], at[93]); MULADD(at[10], at[92]); MULADD(at[11], at[91]); MULADD(at[12], at[90]); MULADD(at[13], at[89]); MULADD(at[14], at[88]); MULADD(at[15], at[87]); MULADD(at[16], at[86]); MULADD(at[17], at[85]); MULADD(at[18], at[84]); MULADD(at[19], at[83]); MULADD(at[20], at[82]); MULADD(at[21], at[81]); MULADD(at[22], at[80]); MULADD(at[23], at[79]); MULADD(at[24], at[78]); MULADD(at[25], at[77]); MULADD(at[26], at[76]); MULADD(at[27], at[75]); MULADD(at[28], at[74]); MULADD(at[29], at[73]); MULADD(at[30], at[72]); MULADD(at[31], at[71]); MULADD(at[32], at[70]); MULADD(at[33], at[69]); MULADD(at[34], at[68]); MULADD(at[35], at[67]); MULADD(at[36], at[66]); MULADD(at[37], at[65]); MULADD(at[38], at[64]); MULADD(at[39], at[63]); MULADD(at[40], at[62]); MULADD(at[41], at[61]); MULADD(at[42], at[60]); MULADD(at[43], at[59]); MULADD(at[44], at[58]); MULADD(at[45], at[57]); MULADD(at[46], at[56]); MULADD(at[47], at[55]); + COMBA_STORE(C->dp[54]); + /* 55 */ + COMBA_FORWARD; + MULADD(at[8], at[95]); MULADD(at[9], at[94]); MULADD(at[10], at[93]); MULADD(at[11], at[92]); MULADD(at[12], at[91]); MULADD(at[13], at[90]); MULADD(at[14], at[89]); MULADD(at[15], at[88]); MULADD(at[16], at[87]); MULADD(at[17], at[86]); MULADD(at[18], at[85]); MULADD(at[19], at[84]); MULADD(at[20], at[83]); MULADD(at[21], at[82]); MULADD(at[22], at[81]); MULADD(at[23], at[80]); MULADD(at[24], at[79]); MULADD(at[25], at[78]); MULADD(at[26], at[77]); MULADD(at[27], at[76]); MULADD(at[28], at[75]); MULADD(at[29], at[74]); MULADD(at[30], at[73]); MULADD(at[31], at[72]); MULADD(at[32], at[71]); MULADD(at[33], at[70]); MULADD(at[34], at[69]); MULADD(at[35], at[68]); MULADD(at[36], at[67]); MULADD(at[37], at[66]); MULADD(at[38], at[65]); MULADD(at[39], at[64]); MULADD(at[40], at[63]); MULADD(at[41], at[62]); MULADD(at[42], at[61]); MULADD(at[43], at[60]); MULADD(at[44], at[59]); MULADD(at[45], at[58]); MULADD(at[46], at[57]); MULADD(at[47], at[56]); + COMBA_STORE(C->dp[55]); + /* 56 */ + COMBA_FORWARD; + MULADD(at[9], at[95]); MULADD(at[10], at[94]); MULADD(at[11], at[93]); MULADD(at[12], at[92]); MULADD(at[13], at[91]); MULADD(at[14], at[90]); MULADD(at[15], at[89]); MULADD(at[16], at[88]); MULADD(at[17], at[87]); MULADD(at[18], at[86]); MULADD(at[19], at[85]); MULADD(at[20], at[84]); MULADD(at[21], at[83]); MULADD(at[22], at[82]); MULADD(at[23], at[81]); MULADD(at[24], at[80]); MULADD(at[25], at[79]); MULADD(at[26], at[78]); MULADD(at[27], at[77]); MULADD(at[28], at[76]); MULADD(at[29], at[75]); MULADD(at[30], at[74]); MULADD(at[31], at[73]); MULADD(at[32], at[72]); MULADD(at[33], at[71]); MULADD(at[34], at[70]); MULADD(at[35], at[69]); MULADD(at[36], at[68]); MULADD(at[37], at[67]); MULADD(at[38], at[66]); MULADD(at[39], at[65]); MULADD(at[40], at[64]); MULADD(at[41], at[63]); MULADD(at[42], at[62]); MULADD(at[43], at[61]); MULADD(at[44], at[60]); MULADD(at[45], at[59]); MULADD(at[46], at[58]); MULADD(at[47], at[57]); + COMBA_STORE(C->dp[56]); + /* 57 */ + COMBA_FORWARD; + MULADD(at[10], at[95]); MULADD(at[11], at[94]); MULADD(at[12], at[93]); MULADD(at[13], at[92]); MULADD(at[14], at[91]); MULADD(at[15], at[90]); MULADD(at[16], at[89]); MULADD(at[17], at[88]); MULADD(at[18], at[87]); MULADD(at[19], at[86]); MULADD(at[20], at[85]); MULADD(at[21], at[84]); MULADD(at[22], at[83]); MULADD(at[23], at[82]); MULADD(at[24], at[81]); MULADD(at[25], at[80]); MULADD(at[26], at[79]); MULADD(at[27], at[78]); MULADD(at[28], at[77]); MULADD(at[29], at[76]); MULADD(at[30], at[75]); MULADD(at[31], at[74]); MULADD(at[32], at[73]); MULADD(at[33], at[72]); MULADD(at[34], at[71]); MULADD(at[35], at[70]); MULADD(at[36], at[69]); MULADD(at[37], at[68]); MULADD(at[38], at[67]); MULADD(at[39], at[66]); MULADD(at[40], at[65]); MULADD(at[41], at[64]); MULADD(at[42], at[63]); MULADD(at[43], at[62]); MULADD(at[44], at[61]); MULADD(at[45], at[60]); MULADD(at[46], at[59]); MULADD(at[47], at[58]); + COMBA_STORE(C->dp[57]); + /* 58 */ + COMBA_FORWARD; + MULADD(at[11], at[95]); MULADD(at[12], at[94]); MULADD(at[13], at[93]); MULADD(at[14], at[92]); MULADD(at[15], at[91]); MULADD(at[16], at[90]); MULADD(at[17], at[89]); MULADD(at[18], at[88]); MULADD(at[19], at[87]); MULADD(at[20], at[86]); MULADD(at[21], at[85]); MULADD(at[22], at[84]); MULADD(at[23], at[83]); MULADD(at[24], at[82]); MULADD(at[25], at[81]); MULADD(at[26], at[80]); MULADD(at[27], at[79]); MULADD(at[28], at[78]); MULADD(at[29], at[77]); MULADD(at[30], at[76]); MULADD(at[31], at[75]); MULADD(at[32], at[74]); MULADD(at[33], at[73]); MULADD(at[34], at[72]); MULADD(at[35], at[71]); MULADD(at[36], at[70]); MULADD(at[37], at[69]); MULADD(at[38], at[68]); MULADD(at[39], at[67]); MULADD(at[40], at[66]); MULADD(at[41], at[65]); MULADD(at[42], at[64]); MULADD(at[43], at[63]); MULADD(at[44], at[62]); MULADD(at[45], at[61]); MULADD(at[46], at[60]); MULADD(at[47], at[59]); + COMBA_STORE(C->dp[58]); + /* 59 */ + COMBA_FORWARD; + MULADD(at[12], at[95]); MULADD(at[13], at[94]); MULADD(at[14], at[93]); MULADD(at[15], at[92]); MULADD(at[16], at[91]); MULADD(at[17], at[90]); MULADD(at[18], at[89]); MULADD(at[19], at[88]); MULADD(at[20], at[87]); MULADD(at[21], at[86]); MULADD(at[22], at[85]); MULADD(at[23], at[84]); MULADD(at[24], at[83]); MULADD(at[25], at[82]); MULADD(at[26], at[81]); MULADD(at[27], at[80]); MULADD(at[28], at[79]); MULADD(at[29], at[78]); MULADD(at[30], at[77]); MULADD(at[31], at[76]); MULADD(at[32], at[75]); MULADD(at[33], at[74]); MULADD(at[34], at[73]); MULADD(at[35], at[72]); MULADD(at[36], at[71]); MULADD(at[37], at[70]); MULADD(at[38], at[69]); MULADD(at[39], at[68]); MULADD(at[40], at[67]); MULADD(at[41], at[66]); MULADD(at[42], at[65]); MULADD(at[43], at[64]); MULADD(at[44], at[63]); MULADD(at[45], at[62]); MULADD(at[46], at[61]); MULADD(at[47], at[60]); + COMBA_STORE(C->dp[59]); + /* 60 */ + COMBA_FORWARD; + MULADD(at[13], at[95]); MULADD(at[14], at[94]); MULADD(at[15], at[93]); MULADD(at[16], at[92]); MULADD(at[17], at[91]); MULADD(at[18], at[90]); MULADD(at[19], at[89]); MULADD(at[20], at[88]); MULADD(at[21], at[87]); MULADD(at[22], at[86]); MULADD(at[23], at[85]); MULADD(at[24], at[84]); MULADD(at[25], at[83]); MULADD(at[26], at[82]); MULADD(at[27], at[81]); MULADD(at[28], at[80]); MULADD(at[29], at[79]); MULADD(at[30], at[78]); MULADD(at[31], at[77]); MULADD(at[32], at[76]); MULADD(at[33], at[75]); MULADD(at[34], at[74]); MULADD(at[35], at[73]); MULADD(at[36], at[72]); MULADD(at[37], at[71]); MULADD(at[38], at[70]); MULADD(at[39], at[69]); MULADD(at[40], at[68]); MULADD(at[41], at[67]); MULADD(at[42], at[66]); MULADD(at[43], at[65]); MULADD(at[44], at[64]); MULADD(at[45], at[63]); MULADD(at[46], at[62]); MULADD(at[47], at[61]); + COMBA_STORE(C->dp[60]); + /* 61 */ + COMBA_FORWARD; + MULADD(at[14], at[95]); MULADD(at[15], at[94]); MULADD(at[16], at[93]); MULADD(at[17], at[92]); MULADD(at[18], at[91]); MULADD(at[19], at[90]); MULADD(at[20], at[89]); MULADD(at[21], at[88]); MULADD(at[22], at[87]); MULADD(at[23], at[86]); MULADD(at[24], at[85]); MULADD(at[25], at[84]); MULADD(at[26], at[83]); MULADD(at[27], at[82]); MULADD(at[28], at[81]); MULADD(at[29], at[80]); MULADD(at[30], at[79]); MULADD(at[31], at[78]); MULADD(at[32], at[77]); MULADD(at[33], at[76]); MULADD(at[34], at[75]); MULADD(at[35], at[74]); MULADD(at[36], at[73]); MULADD(at[37], at[72]); MULADD(at[38], at[71]); MULADD(at[39], at[70]); MULADD(at[40], at[69]); MULADD(at[41], at[68]); MULADD(at[42], at[67]); MULADD(at[43], at[66]); MULADD(at[44], at[65]); MULADD(at[45], at[64]); MULADD(at[46], at[63]); MULADD(at[47], at[62]); + COMBA_STORE(C->dp[61]); + /* 62 */ + COMBA_FORWARD; + MULADD(at[15], at[95]); MULADD(at[16], at[94]); MULADD(at[17], at[93]); MULADD(at[18], at[92]); MULADD(at[19], at[91]); MULADD(at[20], at[90]); MULADD(at[21], at[89]); MULADD(at[22], at[88]); MULADD(at[23], at[87]); MULADD(at[24], at[86]); MULADD(at[25], at[85]); MULADD(at[26], at[84]); MULADD(at[27], at[83]); MULADD(at[28], at[82]); MULADD(at[29], at[81]); MULADD(at[30], at[80]); MULADD(at[31], at[79]); MULADD(at[32], at[78]); MULADD(at[33], at[77]); MULADD(at[34], at[76]); MULADD(at[35], at[75]); MULADD(at[36], at[74]); MULADD(at[37], at[73]); MULADD(at[38], at[72]); MULADD(at[39], at[71]); MULADD(at[40], at[70]); MULADD(at[41], at[69]); MULADD(at[42], at[68]); MULADD(at[43], at[67]); MULADD(at[44], at[66]); MULADD(at[45], at[65]); MULADD(at[46], at[64]); MULADD(at[47], at[63]); + COMBA_STORE(C->dp[62]); + /* 63 */ + COMBA_FORWARD; + MULADD(at[16], at[95]); MULADD(at[17], at[94]); MULADD(at[18], at[93]); MULADD(at[19], at[92]); MULADD(at[20], at[91]); MULADD(at[21], at[90]); MULADD(at[22], at[89]); MULADD(at[23], at[88]); MULADD(at[24], at[87]); MULADD(at[25], at[86]); MULADD(at[26], at[85]); MULADD(at[27], at[84]); MULADD(at[28], at[83]); MULADD(at[29], at[82]); MULADD(at[30], at[81]); MULADD(at[31], at[80]); MULADD(at[32], at[79]); MULADD(at[33], at[78]); MULADD(at[34], at[77]); MULADD(at[35], at[76]); MULADD(at[36], at[75]); MULADD(at[37], at[74]); MULADD(at[38], at[73]); MULADD(at[39], at[72]); MULADD(at[40], at[71]); MULADD(at[41], at[70]); MULADD(at[42], at[69]); MULADD(at[43], at[68]); MULADD(at[44], at[67]); MULADD(at[45], at[66]); MULADD(at[46], at[65]); MULADD(at[47], at[64]); + COMBA_STORE(C->dp[63]); + /* 64 */ + COMBA_FORWARD; + MULADD(at[17], at[95]); MULADD(at[18], at[94]); MULADD(at[19], at[93]); MULADD(at[20], at[92]); MULADD(at[21], at[91]); MULADD(at[22], at[90]); MULADD(at[23], at[89]); MULADD(at[24], at[88]); MULADD(at[25], at[87]); MULADD(at[26], at[86]); MULADD(at[27], at[85]); MULADD(at[28], at[84]); MULADD(at[29], at[83]); MULADD(at[30], at[82]); MULADD(at[31], at[81]); MULADD(at[32], at[80]); MULADD(at[33], at[79]); MULADD(at[34], at[78]); MULADD(at[35], at[77]); MULADD(at[36], at[76]); MULADD(at[37], at[75]); MULADD(at[38], at[74]); MULADD(at[39], at[73]); MULADD(at[40], at[72]); MULADD(at[41], at[71]); MULADD(at[42], at[70]); MULADD(at[43], at[69]); MULADD(at[44], at[68]); MULADD(at[45], at[67]); MULADD(at[46], at[66]); MULADD(at[47], at[65]); + COMBA_STORE(C->dp[64]); + /* 65 */ + COMBA_FORWARD; + MULADD(at[18], at[95]); MULADD(at[19], at[94]); MULADD(at[20], at[93]); MULADD(at[21], at[92]); MULADD(at[22], at[91]); MULADD(at[23], at[90]); MULADD(at[24], at[89]); MULADD(at[25], at[88]); MULADD(at[26], at[87]); MULADD(at[27], at[86]); MULADD(at[28], at[85]); MULADD(at[29], at[84]); MULADD(at[30], at[83]); MULADD(at[31], at[82]); MULADD(at[32], at[81]); MULADD(at[33], at[80]); MULADD(at[34], at[79]); MULADD(at[35], at[78]); MULADD(at[36], at[77]); MULADD(at[37], at[76]); MULADD(at[38], at[75]); MULADD(at[39], at[74]); MULADD(at[40], at[73]); MULADD(at[41], at[72]); MULADD(at[42], at[71]); MULADD(at[43], at[70]); MULADD(at[44], at[69]); MULADD(at[45], at[68]); MULADD(at[46], at[67]); MULADD(at[47], at[66]); + COMBA_STORE(C->dp[65]); + /* 66 */ + COMBA_FORWARD; + MULADD(at[19], at[95]); MULADD(at[20], at[94]); MULADD(at[21], at[93]); MULADD(at[22], at[92]); MULADD(at[23], at[91]); MULADD(at[24], at[90]); MULADD(at[25], at[89]); MULADD(at[26], at[88]); MULADD(at[27], at[87]); MULADD(at[28], at[86]); MULADD(at[29], at[85]); MULADD(at[30], at[84]); MULADD(at[31], at[83]); MULADD(at[32], at[82]); MULADD(at[33], at[81]); MULADD(at[34], at[80]); MULADD(at[35], at[79]); MULADD(at[36], at[78]); MULADD(at[37], at[77]); MULADD(at[38], at[76]); MULADD(at[39], at[75]); MULADD(at[40], at[74]); MULADD(at[41], at[73]); MULADD(at[42], at[72]); MULADD(at[43], at[71]); MULADD(at[44], at[70]); MULADD(at[45], at[69]); MULADD(at[46], at[68]); MULADD(at[47], at[67]); + COMBA_STORE(C->dp[66]); + /* 67 */ + COMBA_FORWARD; + MULADD(at[20], at[95]); MULADD(at[21], at[94]); MULADD(at[22], at[93]); MULADD(at[23], at[92]); MULADD(at[24], at[91]); MULADD(at[25], at[90]); MULADD(at[26], at[89]); MULADD(at[27], at[88]); MULADD(at[28], at[87]); MULADD(at[29], at[86]); MULADD(at[30], at[85]); MULADD(at[31], at[84]); MULADD(at[32], at[83]); MULADD(at[33], at[82]); MULADD(at[34], at[81]); MULADD(at[35], at[80]); MULADD(at[36], at[79]); MULADD(at[37], at[78]); MULADD(at[38], at[77]); MULADD(at[39], at[76]); MULADD(at[40], at[75]); MULADD(at[41], at[74]); MULADD(at[42], at[73]); MULADD(at[43], at[72]); MULADD(at[44], at[71]); MULADD(at[45], at[70]); MULADD(at[46], at[69]); MULADD(at[47], at[68]); + COMBA_STORE(C->dp[67]); + /* 68 */ + COMBA_FORWARD; + MULADD(at[21], at[95]); MULADD(at[22], at[94]); MULADD(at[23], at[93]); MULADD(at[24], at[92]); MULADD(at[25], at[91]); MULADD(at[26], at[90]); MULADD(at[27], at[89]); MULADD(at[28], at[88]); MULADD(at[29], at[87]); MULADD(at[30], at[86]); MULADD(at[31], at[85]); MULADD(at[32], at[84]); MULADD(at[33], at[83]); MULADD(at[34], at[82]); MULADD(at[35], at[81]); MULADD(at[36], at[80]); MULADD(at[37], at[79]); MULADD(at[38], at[78]); MULADD(at[39], at[77]); MULADD(at[40], at[76]); MULADD(at[41], at[75]); MULADD(at[42], at[74]); MULADD(at[43], at[73]); MULADD(at[44], at[72]); MULADD(at[45], at[71]); MULADD(at[46], at[70]); MULADD(at[47], at[69]); + COMBA_STORE(C->dp[68]); + /* 69 */ + COMBA_FORWARD; + MULADD(at[22], at[95]); MULADD(at[23], at[94]); MULADD(at[24], at[93]); MULADD(at[25], at[92]); MULADD(at[26], at[91]); MULADD(at[27], at[90]); MULADD(at[28], at[89]); MULADD(at[29], at[88]); MULADD(at[30], at[87]); MULADD(at[31], at[86]); MULADD(at[32], at[85]); MULADD(at[33], at[84]); MULADD(at[34], at[83]); MULADD(at[35], at[82]); MULADD(at[36], at[81]); MULADD(at[37], at[80]); MULADD(at[38], at[79]); MULADD(at[39], at[78]); MULADD(at[40], at[77]); MULADD(at[41], at[76]); MULADD(at[42], at[75]); MULADD(at[43], at[74]); MULADD(at[44], at[73]); MULADD(at[45], at[72]); MULADD(at[46], at[71]); MULADD(at[47], at[70]); + COMBA_STORE(C->dp[69]); + /* 70 */ + COMBA_FORWARD; + MULADD(at[23], at[95]); MULADD(at[24], at[94]); MULADD(at[25], at[93]); MULADD(at[26], at[92]); MULADD(at[27], at[91]); MULADD(at[28], at[90]); MULADD(at[29], at[89]); MULADD(at[30], at[88]); MULADD(at[31], at[87]); MULADD(at[32], at[86]); MULADD(at[33], at[85]); MULADD(at[34], at[84]); MULADD(at[35], at[83]); MULADD(at[36], at[82]); MULADD(at[37], at[81]); MULADD(at[38], at[80]); MULADD(at[39], at[79]); MULADD(at[40], at[78]); MULADD(at[41], at[77]); MULADD(at[42], at[76]); MULADD(at[43], at[75]); MULADD(at[44], at[74]); MULADD(at[45], at[73]); MULADD(at[46], at[72]); MULADD(at[47], at[71]); + COMBA_STORE(C->dp[70]); + /* 71 */ + COMBA_FORWARD; + MULADD(at[24], at[95]); MULADD(at[25], at[94]); MULADD(at[26], at[93]); MULADD(at[27], at[92]); MULADD(at[28], at[91]); MULADD(at[29], at[90]); MULADD(at[30], at[89]); MULADD(at[31], at[88]); MULADD(at[32], at[87]); MULADD(at[33], at[86]); MULADD(at[34], at[85]); MULADD(at[35], at[84]); MULADD(at[36], at[83]); MULADD(at[37], at[82]); MULADD(at[38], at[81]); MULADD(at[39], at[80]); MULADD(at[40], at[79]); MULADD(at[41], at[78]); MULADD(at[42], at[77]); MULADD(at[43], at[76]); MULADD(at[44], at[75]); MULADD(at[45], at[74]); MULADD(at[46], at[73]); MULADD(at[47], at[72]); + COMBA_STORE(C->dp[71]); + /* 72 */ + COMBA_FORWARD; + MULADD(at[25], at[95]); MULADD(at[26], at[94]); MULADD(at[27], at[93]); MULADD(at[28], at[92]); MULADD(at[29], at[91]); MULADD(at[30], at[90]); MULADD(at[31], at[89]); MULADD(at[32], at[88]); MULADD(at[33], at[87]); MULADD(at[34], at[86]); MULADD(at[35], at[85]); MULADD(at[36], at[84]); MULADD(at[37], at[83]); MULADD(at[38], at[82]); MULADD(at[39], at[81]); MULADD(at[40], at[80]); MULADD(at[41], at[79]); MULADD(at[42], at[78]); MULADD(at[43], at[77]); MULADD(at[44], at[76]); MULADD(at[45], at[75]); MULADD(at[46], at[74]); MULADD(at[47], at[73]); + COMBA_STORE(C->dp[72]); + /* 73 */ + COMBA_FORWARD; + MULADD(at[26], at[95]); MULADD(at[27], at[94]); MULADD(at[28], at[93]); MULADD(at[29], at[92]); MULADD(at[30], at[91]); MULADD(at[31], at[90]); MULADD(at[32], at[89]); MULADD(at[33], at[88]); MULADD(at[34], at[87]); MULADD(at[35], at[86]); MULADD(at[36], at[85]); MULADD(at[37], at[84]); MULADD(at[38], at[83]); MULADD(at[39], at[82]); MULADD(at[40], at[81]); MULADD(at[41], at[80]); MULADD(at[42], at[79]); MULADD(at[43], at[78]); MULADD(at[44], at[77]); MULADD(at[45], at[76]); MULADD(at[46], at[75]); MULADD(at[47], at[74]); + COMBA_STORE(C->dp[73]); + /* 74 */ + COMBA_FORWARD; + MULADD(at[27], at[95]); MULADD(at[28], at[94]); MULADD(at[29], at[93]); MULADD(at[30], at[92]); MULADD(at[31], at[91]); MULADD(at[32], at[90]); MULADD(at[33], at[89]); MULADD(at[34], at[88]); MULADD(at[35], at[87]); MULADD(at[36], at[86]); MULADD(at[37], at[85]); MULADD(at[38], at[84]); MULADD(at[39], at[83]); MULADD(at[40], at[82]); MULADD(at[41], at[81]); MULADD(at[42], at[80]); MULADD(at[43], at[79]); MULADD(at[44], at[78]); MULADD(at[45], at[77]); MULADD(at[46], at[76]); MULADD(at[47], at[75]); + COMBA_STORE(C->dp[74]); + /* 75 */ + COMBA_FORWARD; + MULADD(at[28], at[95]); MULADD(at[29], at[94]); MULADD(at[30], at[93]); MULADD(at[31], at[92]); MULADD(at[32], at[91]); MULADD(at[33], at[90]); MULADD(at[34], at[89]); MULADD(at[35], at[88]); MULADD(at[36], at[87]); MULADD(at[37], at[86]); MULADD(at[38], at[85]); MULADD(at[39], at[84]); MULADD(at[40], at[83]); MULADD(at[41], at[82]); MULADD(at[42], at[81]); MULADD(at[43], at[80]); MULADD(at[44], at[79]); MULADD(at[45], at[78]); MULADD(at[46], at[77]); MULADD(at[47], at[76]); + COMBA_STORE(C->dp[75]); + /* 76 */ + COMBA_FORWARD; + MULADD(at[29], at[95]); MULADD(at[30], at[94]); MULADD(at[31], at[93]); MULADD(at[32], at[92]); MULADD(at[33], at[91]); MULADD(at[34], at[90]); MULADD(at[35], at[89]); MULADD(at[36], at[88]); MULADD(at[37], at[87]); MULADD(at[38], at[86]); MULADD(at[39], at[85]); MULADD(at[40], at[84]); MULADD(at[41], at[83]); MULADD(at[42], at[82]); MULADD(at[43], at[81]); MULADD(at[44], at[80]); MULADD(at[45], at[79]); MULADD(at[46], at[78]); MULADD(at[47], at[77]); + COMBA_STORE(C->dp[76]); + /* 77 */ + COMBA_FORWARD; + MULADD(at[30], at[95]); MULADD(at[31], at[94]); MULADD(at[32], at[93]); MULADD(at[33], at[92]); MULADD(at[34], at[91]); MULADD(at[35], at[90]); MULADD(at[36], at[89]); MULADD(at[37], at[88]); MULADD(at[38], at[87]); MULADD(at[39], at[86]); MULADD(at[40], at[85]); MULADD(at[41], at[84]); MULADD(at[42], at[83]); MULADD(at[43], at[82]); MULADD(at[44], at[81]); MULADD(at[45], at[80]); MULADD(at[46], at[79]); MULADD(at[47], at[78]); + COMBA_STORE(C->dp[77]); + /* 78 */ + COMBA_FORWARD; + MULADD(at[31], at[95]); MULADD(at[32], at[94]); MULADD(at[33], at[93]); MULADD(at[34], at[92]); MULADD(at[35], at[91]); MULADD(at[36], at[90]); MULADD(at[37], at[89]); MULADD(at[38], at[88]); MULADD(at[39], at[87]); MULADD(at[40], at[86]); MULADD(at[41], at[85]); MULADD(at[42], at[84]); MULADD(at[43], at[83]); MULADD(at[44], at[82]); MULADD(at[45], at[81]); MULADD(at[46], at[80]); MULADD(at[47], at[79]); + COMBA_STORE(C->dp[78]); + /* 79 */ + COMBA_FORWARD; + MULADD(at[32], at[95]); MULADD(at[33], at[94]); MULADD(at[34], at[93]); MULADD(at[35], at[92]); MULADD(at[36], at[91]); MULADD(at[37], at[90]); MULADD(at[38], at[89]); MULADD(at[39], at[88]); MULADD(at[40], at[87]); MULADD(at[41], at[86]); MULADD(at[42], at[85]); MULADD(at[43], at[84]); MULADD(at[44], at[83]); MULADD(at[45], at[82]); MULADD(at[46], at[81]); MULADD(at[47], at[80]); + COMBA_STORE(C->dp[79]); + /* 80 */ + COMBA_FORWARD; + MULADD(at[33], at[95]); MULADD(at[34], at[94]); MULADD(at[35], at[93]); MULADD(at[36], at[92]); MULADD(at[37], at[91]); MULADD(at[38], at[90]); MULADD(at[39], at[89]); MULADD(at[40], at[88]); MULADD(at[41], at[87]); MULADD(at[42], at[86]); MULADD(at[43], at[85]); MULADD(at[44], at[84]); MULADD(at[45], at[83]); MULADD(at[46], at[82]); MULADD(at[47], at[81]); + COMBA_STORE(C->dp[80]); + /* 81 */ + COMBA_FORWARD; + MULADD(at[34], at[95]); MULADD(at[35], at[94]); MULADD(at[36], at[93]); MULADD(at[37], at[92]); MULADD(at[38], at[91]); MULADD(at[39], at[90]); MULADD(at[40], at[89]); MULADD(at[41], at[88]); MULADD(at[42], at[87]); MULADD(at[43], at[86]); MULADD(at[44], at[85]); MULADD(at[45], at[84]); MULADD(at[46], at[83]); MULADD(at[47], at[82]); + COMBA_STORE(C->dp[81]); + /* 82 */ + COMBA_FORWARD; + MULADD(at[35], at[95]); MULADD(at[36], at[94]); MULADD(at[37], at[93]); MULADD(at[38], at[92]); MULADD(at[39], at[91]); MULADD(at[40], at[90]); MULADD(at[41], at[89]); MULADD(at[42], at[88]); MULADD(at[43], at[87]); MULADD(at[44], at[86]); MULADD(at[45], at[85]); MULADD(at[46], at[84]); MULADD(at[47], at[83]); + COMBA_STORE(C->dp[82]); + /* 83 */ + COMBA_FORWARD; + MULADD(at[36], at[95]); MULADD(at[37], at[94]); MULADD(at[38], at[93]); MULADD(at[39], at[92]); MULADD(at[40], at[91]); MULADD(at[41], at[90]); MULADD(at[42], at[89]); MULADD(at[43], at[88]); MULADD(at[44], at[87]); MULADD(at[45], at[86]); MULADD(at[46], at[85]); MULADD(at[47], at[84]); + COMBA_STORE(C->dp[83]); + /* 84 */ + COMBA_FORWARD; + MULADD(at[37], at[95]); MULADD(at[38], at[94]); MULADD(at[39], at[93]); MULADD(at[40], at[92]); MULADD(at[41], at[91]); MULADD(at[42], at[90]); MULADD(at[43], at[89]); MULADD(at[44], at[88]); MULADD(at[45], at[87]); MULADD(at[46], at[86]); MULADD(at[47], at[85]); + COMBA_STORE(C->dp[84]); + /* 85 */ + COMBA_FORWARD; + MULADD(at[38], at[95]); MULADD(at[39], at[94]); MULADD(at[40], at[93]); MULADD(at[41], at[92]); MULADD(at[42], at[91]); MULADD(at[43], at[90]); MULADD(at[44], at[89]); MULADD(at[45], at[88]); MULADD(at[46], at[87]); MULADD(at[47], at[86]); + COMBA_STORE(C->dp[85]); + /* 86 */ + COMBA_FORWARD; + MULADD(at[39], at[95]); MULADD(at[40], at[94]); MULADD(at[41], at[93]); MULADD(at[42], at[92]); MULADD(at[43], at[91]); MULADD(at[44], at[90]); MULADD(at[45], at[89]); MULADD(at[46], at[88]); MULADD(at[47], at[87]); + COMBA_STORE(C->dp[86]); + /* 87 */ + COMBA_FORWARD; + MULADD(at[40], at[95]); MULADD(at[41], at[94]); MULADD(at[42], at[93]); MULADD(at[43], at[92]); MULADD(at[44], at[91]); MULADD(at[45], at[90]); MULADD(at[46], at[89]); MULADD(at[47], at[88]); + COMBA_STORE(C->dp[87]); + /* 88 */ + COMBA_FORWARD; + MULADD(at[41], at[95]); MULADD(at[42], at[94]); MULADD(at[43], at[93]); MULADD(at[44], at[92]); MULADD(at[45], at[91]); MULADD(at[46], at[90]); MULADD(at[47], at[89]); + COMBA_STORE(C->dp[88]); + /* 89 */ + COMBA_FORWARD; + MULADD(at[42], at[95]); MULADD(at[43], at[94]); MULADD(at[44], at[93]); MULADD(at[45], at[92]); MULADD(at[46], at[91]); MULADD(at[47], at[90]); + COMBA_STORE(C->dp[89]); + /* 90 */ + COMBA_FORWARD; + MULADD(at[43], at[95]); MULADD(at[44], at[94]); MULADD(at[45], at[93]); MULADD(at[46], at[92]); MULADD(at[47], at[91]); + COMBA_STORE(C->dp[90]); + /* 91 */ + COMBA_FORWARD; + MULADD(at[44], at[95]); MULADD(at[45], at[94]); MULADD(at[46], at[93]); MULADD(at[47], at[92]); + COMBA_STORE(C->dp[91]); + /* 92 */ + COMBA_FORWARD; + MULADD(at[45], at[95]); MULADD(at[46], at[94]); MULADD(at[47], at[93]); + COMBA_STORE(C->dp[92]); + /* 93 */ + COMBA_FORWARD; + MULADD(at[46], at[95]); MULADD(at[47], at[94]); + COMBA_STORE(C->dp[93]); + /* 94 */ + COMBA_FORWARD; + MULADD(at[47], at[95]); + COMBA_STORE(C->dp[94]); + COMBA_STORE2(C->dp[95]); + C->used = 96; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; + +#ifdef WOLFSSL_SMALL_STACK + XFREE(at, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return FP_OKAY; +} +#endif diff --git a/client/wolfssl/wolfcrypt/src/fp_mul_comba_6.i b/client/wolfssl/wolfcrypt/src/fp_mul_comba_6.i new file mode 100644 index 0000000..815badc --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fp_mul_comba_6.i @@ -0,0 +1,99 @@ +/* fp_mul_comba_6.i + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef TFM_MUL6 +int fp_mul_comba6(fp_int *A, fp_int *B, fp_int *C) +{ + fp_digit c0, c1, c2; +#ifndef WOLFSSL_SMALL_STACK + fp_digit at[12]; +#else + fp_digit *at; +#endif + +#ifdef WOLFSSL_SMALL_STACK + at = (fp_digit*)XMALLOC(sizeof(fp_digit) * 12, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (at == NULL) + return FP_MEM; +#endif + + XMEMCPY(at, A->dp, 6 * sizeof(fp_digit)); + XMEMCPY(at+6, B->dp, 6 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[6]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[7]); MULADD(at[1], at[6]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[8]); MULADD(at[1], at[7]); MULADD(at[2], at[6]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[9]); MULADD(at[1], at[8]); MULADD(at[2], at[7]); MULADD(at[3], at[6]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[10]); MULADD(at[1], at[9]); MULADD(at[2], at[8]); MULADD(at[3], at[7]); MULADD(at[4], at[6]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[11]); MULADD(at[1], at[10]); MULADD(at[2], at[9]); MULADD(at[3], at[8]); MULADD(at[4], at[7]); MULADD(at[5], at[6]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[1], at[11]); MULADD(at[2], at[10]); MULADD(at[3], at[9]); MULADD(at[4], at[8]); MULADD(at[5], at[7]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[2], at[11]); MULADD(at[3], at[10]); MULADD(at[4], at[9]); MULADD(at[5], at[8]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[3], at[11]); MULADD(at[4], at[10]); MULADD(at[5], at[9]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[4], at[11]); MULADD(at[5], at[10]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[5], at[11]); + COMBA_STORE(C->dp[10]); + COMBA_STORE2(C->dp[11]); + C->used = 12; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; + +#ifdef WOLFSSL_SMALL_STACK + XFREE(at, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return FP_OKAY; +} +#endif diff --git a/client/wolfssl/wolfcrypt/src/fp_mul_comba_64.i b/client/wolfssl/wolfcrypt/src/fp_mul_comba_64.i new file mode 100644 index 0000000..7080fa2 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fp_mul_comba_64.i @@ -0,0 +1,563 @@ +/* fp_mul_comba_64.i + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef TFM_MUL64 +int fp_mul_comba64(fp_int *A, fp_int *B, fp_int *C) +{ + fp_digit c0, c1, c2; +#ifndef WOLFSSL_SMALL_STACK + fp_digit at[128]; +#else + fp_digit *at; +#endif + +#ifdef WOLFSSL_SMALL_STACK + at = (fp_digit*)XMALLOC(sizeof(fp_digit) * 128, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (at == NULL) + return FP_MEM; +#endif + + XMEMCPY(at, A->dp, 64 * sizeof(fp_digit)); + XMEMCPY(at+64, B->dp, 64 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[64]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[65]); MULADD(at[1], at[64]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[66]); MULADD(at[1], at[65]); MULADD(at[2], at[64]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[67]); MULADD(at[1], at[66]); MULADD(at[2], at[65]); MULADD(at[3], at[64]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[68]); MULADD(at[1], at[67]); MULADD(at[2], at[66]); MULADD(at[3], at[65]); MULADD(at[4], at[64]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[69]); MULADD(at[1], at[68]); MULADD(at[2], at[67]); MULADD(at[3], at[66]); MULADD(at[4], at[65]); MULADD(at[5], at[64]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[70]); MULADD(at[1], at[69]); MULADD(at[2], at[68]); MULADD(at[3], at[67]); MULADD(at[4], at[66]); MULADD(at[5], at[65]); MULADD(at[6], at[64]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[71]); MULADD(at[1], at[70]); MULADD(at[2], at[69]); MULADD(at[3], at[68]); MULADD(at[4], at[67]); MULADD(at[5], at[66]); MULADD(at[6], at[65]); MULADD(at[7], at[64]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[0], at[72]); MULADD(at[1], at[71]); MULADD(at[2], at[70]); MULADD(at[3], at[69]); MULADD(at[4], at[68]); MULADD(at[5], at[67]); MULADD(at[6], at[66]); MULADD(at[7], at[65]); MULADD(at[8], at[64]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[0], at[73]); MULADD(at[1], at[72]); MULADD(at[2], at[71]); MULADD(at[3], at[70]); MULADD(at[4], at[69]); MULADD(at[5], at[68]); MULADD(at[6], at[67]); MULADD(at[7], at[66]); MULADD(at[8], at[65]); MULADD(at[9], at[64]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[0], at[74]); MULADD(at[1], at[73]); MULADD(at[2], at[72]); MULADD(at[3], at[71]); MULADD(at[4], at[70]); MULADD(at[5], at[69]); MULADD(at[6], at[68]); MULADD(at[7], at[67]); MULADD(at[8], at[66]); MULADD(at[9], at[65]); MULADD(at[10], at[64]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[0], at[75]); MULADD(at[1], at[74]); MULADD(at[2], at[73]); MULADD(at[3], at[72]); MULADD(at[4], at[71]); MULADD(at[5], at[70]); MULADD(at[6], at[69]); MULADD(at[7], at[68]); MULADD(at[8], at[67]); MULADD(at[9], at[66]); MULADD(at[10], at[65]); MULADD(at[11], at[64]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[0], at[76]); MULADD(at[1], at[75]); MULADD(at[2], at[74]); MULADD(at[3], at[73]); MULADD(at[4], at[72]); MULADD(at[5], at[71]); MULADD(at[6], at[70]); MULADD(at[7], at[69]); MULADD(at[8], at[68]); MULADD(at[9], at[67]); MULADD(at[10], at[66]); MULADD(at[11], at[65]); MULADD(at[12], at[64]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[0], at[77]); MULADD(at[1], at[76]); MULADD(at[2], at[75]); MULADD(at[3], at[74]); MULADD(at[4], at[73]); MULADD(at[5], at[72]); MULADD(at[6], at[71]); MULADD(at[7], at[70]); MULADD(at[8], at[69]); MULADD(at[9], at[68]); MULADD(at[10], at[67]); MULADD(at[11], at[66]); MULADD(at[12], at[65]); MULADD(at[13], at[64]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[0], at[78]); MULADD(at[1], at[77]); MULADD(at[2], at[76]); MULADD(at[3], at[75]); MULADD(at[4], at[74]); MULADD(at[5], at[73]); MULADD(at[6], at[72]); MULADD(at[7], at[71]); MULADD(at[8], at[70]); MULADD(at[9], at[69]); MULADD(at[10], at[68]); MULADD(at[11], at[67]); MULADD(at[12], at[66]); MULADD(at[13], at[65]); MULADD(at[14], at[64]); + COMBA_STORE(C->dp[14]); + /* 15 */ + COMBA_FORWARD; + MULADD(at[0], at[79]); MULADD(at[1], at[78]); MULADD(at[2], at[77]); MULADD(at[3], at[76]); MULADD(at[4], at[75]); MULADD(at[5], at[74]); MULADD(at[6], at[73]); MULADD(at[7], at[72]); MULADD(at[8], at[71]); MULADD(at[9], at[70]); MULADD(at[10], at[69]); MULADD(at[11], at[68]); MULADD(at[12], at[67]); MULADD(at[13], at[66]); MULADD(at[14], at[65]); MULADD(at[15], at[64]); + COMBA_STORE(C->dp[15]); + /* 16 */ + COMBA_FORWARD; + MULADD(at[0], at[80]); MULADD(at[1], at[79]); MULADD(at[2], at[78]); MULADD(at[3], at[77]); MULADD(at[4], at[76]); MULADD(at[5], at[75]); MULADD(at[6], at[74]); MULADD(at[7], at[73]); MULADD(at[8], at[72]); MULADD(at[9], at[71]); MULADD(at[10], at[70]); MULADD(at[11], at[69]); MULADD(at[12], at[68]); MULADD(at[13], at[67]); MULADD(at[14], at[66]); MULADD(at[15], at[65]); MULADD(at[16], at[64]); + COMBA_STORE(C->dp[16]); + /* 17 */ + COMBA_FORWARD; + MULADD(at[0], at[81]); MULADD(at[1], at[80]); MULADD(at[2], at[79]); MULADD(at[3], at[78]); MULADD(at[4], at[77]); MULADD(at[5], at[76]); MULADD(at[6], at[75]); MULADD(at[7], at[74]); MULADD(at[8], at[73]); MULADD(at[9], at[72]); MULADD(at[10], at[71]); MULADD(at[11], at[70]); MULADD(at[12], at[69]); MULADD(at[13], at[68]); MULADD(at[14], at[67]); MULADD(at[15], at[66]); MULADD(at[16], at[65]); MULADD(at[17], at[64]); + COMBA_STORE(C->dp[17]); + /* 18 */ + COMBA_FORWARD; + MULADD(at[0], at[82]); MULADD(at[1], at[81]); MULADD(at[2], at[80]); MULADD(at[3], at[79]); MULADD(at[4], at[78]); MULADD(at[5], at[77]); MULADD(at[6], at[76]); MULADD(at[7], at[75]); MULADD(at[8], at[74]); MULADD(at[9], at[73]); MULADD(at[10], at[72]); MULADD(at[11], at[71]); MULADD(at[12], at[70]); MULADD(at[13], at[69]); MULADD(at[14], at[68]); MULADD(at[15], at[67]); MULADD(at[16], at[66]); MULADD(at[17], at[65]); MULADD(at[18], at[64]); + COMBA_STORE(C->dp[18]); + /* 19 */ + COMBA_FORWARD; + MULADD(at[0], at[83]); MULADD(at[1], at[82]); MULADD(at[2], at[81]); MULADD(at[3], at[80]); MULADD(at[4], at[79]); MULADD(at[5], at[78]); MULADD(at[6], at[77]); MULADD(at[7], at[76]); MULADD(at[8], at[75]); MULADD(at[9], at[74]); MULADD(at[10], at[73]); MULADD(at[11], at[72]); MULADD(at[12], at[71]); MULADD(at[13], at[70]); MULADD(at[14], at[69]); MULADD(at[15], at[68]); MULADD(at[16], at[67]); MULADD(at[17], at[66]); MULADD(at[18], at[65]); MULADD(at[19], at[64]); + COMBA_STORE(C->dp[19]); + /* 20 */ + COMBA_FORWARD; + MULADD(at[0], at[84]); MULADD(at[1], at[83]); MULADD(at[2], at[82]); MULADD(at[3], at[81]); MULADD(at[4], at[80]); MULADD(at[5], at[79]); MULADD(at[6], at[78]); MULADD(at[7], at[77]); MULADD(at[8], at[76]); MULADD(at[9], at[75]); MULADD(at[10], at[74]); MULADD(at[11], at[73]); MULADD(at[12], at[72]); MULADD(at[13], at[71]); MULADD(at[14], at[70]); MULADD(at[15], at[69]); MULADD(at[16], at[68]); MULADD(at[17], at[67]); MULADD(at[18], at[66]); MULADD(at[19], at[65]); MULADD(at[20], at[64]); + COMBA_STORE(C->dp[20]); + /* 21 */ + COMBA_FORWARD; + MULADD(at[0], at[85]); MULADD(at[1], at[84]); MULADD(at[2], at[83]); MULADD(at[3], at[82]); MULADD(at[4], at[81]); MULADD(at[5], at[80]); MULADD(at[6], at[79]); MULADD(at[7], at[78]); MULADD(at[8], at[77]); MULADD(at[9], at[76]); MULADD(at[10], at[75]); MULADD(at[11], at[74]); MULADD(at[12], at[73]); MULADD(at[13], at[72]); MULADD(at[14], at[71]); MULADD(at[15], at[70]); MULADD(at[16], at[69]); MULADD(at[17], at[68]); MULADD(at[18], at[67]); MULADD(at[19], at[66]); MULADD(at[20], at[65]); MULADD(at[21], at[64]); + COMBA_STORE(C->dp[21]); + /* 22 */ + COMBA_FORWARD; + MULADD(at[0], at[86]); MULADD(at[1], at[85]); MULADD(at[2], at[84]); MULADD(at[3], at[83]); MULADD(at[4], at[82]); MULADD(at[5], at[81]); MULADD(at[6], at[80]); MULADD(at[7], at[79]); MULADD(at[8], at[78]); MULADD(at[9], at[77]); MULADD(at[10], at[76]); MULADD(at[11], at[75]); MULADD(at[12], at[74]); MULADD(at[13], at[73]); MULADD(at[14], at[72]); MULADD(at[15], at[71]); MULADD(at[16], at[70]); MULADD(at[17], at[69]); MULADD(at[18], at[68]); MULADD(at[19], at[67]); MULADD(at[20], at[66]); MULADD(at[21], at[65]); MULADD(at[22], at[64]); + COMBA_STORE(C->dp[22]); + /* 23 */ + COMBA_FORWARD; + MULADD(at[0], at[87]); MULADD(at[1], at[86]); MULADD(at[2], at[85]); MULADD(at[3], at[84]); MULADD(at[4], at[83]); MULADD(at[5], at[82]); MULADD(at[6], at[81]); MULADD(at[7], at[80]); MULADD(at[8], at[79]); MULADD(at[9], at[78]); MULADD(at[10], at[77]); MULADD(at[11], at[76]); MULADD(at[12], at[75]); MULADD(at[13], at[74]); MULADD(at[14], at[73]); MULADD(at[15], at[72]); MULADD(at[16], at[71]); MULADD(at[17], at[70]); MULADD(at[18], at[69]); MULADD(at[19], at[68]); MULADD(at[20], at[67]); MULADD(at[21], at[66]); MULADD(at[22], at[65]); MULADD(at[23], at[64]); + COMBA_STORE(C->dp[23]); + /* 24 */ + COMBA_FORWARD; + MULADD(at[0], at[88]); MULADD(at[1], at[87]); MULADD(at[2], at[86]); MULADD(at[3], at[85]); MULADD(at[4], at[84]); MULADD(at[5], at[83]); MULADD(at[6], at[82]); MULADD(at[7], at[81]); MULADD(at[8], at[80]); MULADD(at[9], at[79]); MULADD(at[10], at[78]); MULADD(at[11], at[77]); MULADD(at[12], at[76]); MULADD(at[13], at[75]); MULADD(at[14], at[74]); MULADD(at[15], at[73]); MULADD(at[16], at[72]); MULADD(at[17], at[71]); MULADD(at[18], at[70]); MULADD(at[19], at[69]); MULADD(at[20], at[68]); MULADD(at[21], at[67]); MULADD(at[22], at[66]); MULADD(at[23], at[65]); MULADD(at[24], at[64]); + COMBA_STORE(C->dp[24]); + /* 25 */ + COMBA_FORWARD; + MULADD(at[0], at[89]); MULADD(at[1], at[88]); MULADD(at[2], at[87]); MULADD(at[3], at[86]); MULADD(at[4], at[85]); MULADD(at[5], at[84]); MULADD(at[6], at[83]); MULADD(at[7], at[82]); MULADD(at[8], at[81]); MULADD(at[9], at[80]); MULADD(at[10], at[79]); MULADD(at[11], at[78]); MULADD(at[12], at[77]); MULADD(at[13], at[76]); MULADD(at[14], at[75]); MULADD(at[15], at[74]); MULADD(at[16], at[73]); MULADD(at[17], at[72]); MULADD(at[18], at[71]); MULADD(at[19], at[70]); MULADD(at[20], at[69]); MULADD(at[21], at[68]); MULADD(at[22], at[67]); MULADD(at[23], at[66]); MULADD(at[24], at[65]); MULADD(at[25], at[64]); + COMBA_STORE(C->dp[25]); + /* 26 */ + COMBA_FORWARD; + MULADD(at[0], at[90]); MULADD(at[1], at[89]); MULADD(at[2], at[88]); MULADD(at[3], at[87]); MULADD(at[4], at[86]); MULADD(at[5], at[85]); MULADD(at[6], at[84]); MULADD(at[7], at[83]); MULADD(at[8], at[82]); MULADD(at[9], at[81]); MULADD(at[10], at[80]); MULADD(at[11], at[79]); MULADD(at[12], at[78]); MULADD(at[13], at[77]); MULADD(at[14], at[76]); MULADD(at[15], at[75]); MULADD(at[16], at[74]); MULADD(at[17], at[73]); MULADD(at[18], at[72]); MULADD(at[19], at[71]); MULADD(at[20], at[70]); MULADD(at[21], at[69]); MULADD(at[22], at[68]); MULADD(at[23], at[67]); MULADD(at[24], at[66]); MULADD(at[25], at[65]); MULADD(at[26], at[64]); + COMBA_STORE(C->dp[26]); + /* 27 */ + COMBA_FORWARD; + MULADD(at[0], at[91]); MULADD(at[1], at[90]); MULADD(at[2], at[89]); MULADD(at[3], at[88]); MULADD(at[4], at[87]); MULADD(at[5], at[86]); MULADD(at[6], at[85]); MULADD(at[7], at[84]); MULADD(at[8], at[83]); MULADD(at[9], at[82]); MULADD(at[10], at[81]); MULADD(at[11], at[80]); MULADD(at[12], at[79]); MULADD(at[13], at[78]); MULADD(at[14], at[77]); MULADD(at[15], at[76]); MULADD(at[16], at[75]); MULADD(at[17], at[74]); MULADD(at[18], at[73]); MULADD(at[19], at[72]); MULADD(at[20], at[71]); MULADD(at[21], at[70]); MULADD(at[22], at[69]); MULADD(at[23], at[68]); MULADD(at[24], at[67]); MULADD(at[25], at[66]); MULADD(at[26], at[65]); MULADD(at[27], at[64]); + COMBA_STORE(C->dp[27]); + /* 28 */ + COMBA_FORWARD; + MULADD(at[0], at[92]); MULADD(at[1], at[91]); MULADD(at[2], at[90]); MULADD(at[3], at[89]); MULADD(at[4], at[88]); MULADD(at[5], at[87]); MULADD(at[6], at[86]); MULADD(at[7], at[85]); MULADD(at[8], at[84]); MULADD(at[9], at[83]); MULADD(at[10], at[82]); MULADD(at[11], at[81]); MULADD(at[12], at[80]); MULADD(at[13], at[79]); MULADD(at[14], at[78]); MULADD(at[15], at[77]); MULADD(at[16], at[76]); MULADD(at[17], at[75]); MULADD(at[18], at[74]); MULADD(at[19], at[73]); MULADD(at[20], at[72]); MULADD(at[21], at[71]); MULADD(at[22], at[70]); MULADD(at[23], at[69]); MULADD(at[24], at[68]); MULADD(at[25], at[67]); MULADD(at[26], at[66]); MULADD(at[27], at[65]); MULADD(at[28], at[64]); + COMBA_STORE(C->dp[28]); + /* 29 */ + COMBA_FORWARD; + MULADD(at[0], at[93]); MULADD(at[1], at[92]); MULADD(at[2], at[91]); MULADD(at[3], at[90]); MULADD(at[4], at[89]); MULADD(at[5], at[88]); MULADD(at[6], at[87]); MULADD(at[7], at[86]); MULADD(at[8], at[85]); MULADD(at[9], at[84]); MULADD(at[10], at[83]); MULADD(at[11], at[82]); MULADD(at[12], at[81]); MULADD(at[13], at[80]); MULADD(at[14], at[79]); MULADD(at[15], at[78]); MULADD(at[16], at[77]); MULADD(at[17], at[76]); MULADD(at[18], at[75]); MULADD(at[19], at[74]); MULADD(at[20], at[73]); MULADD(at[21], at[72]); MULADD(at[22], at[71]); MULADD(at[23], at[70]); MULADD(at[24], at[69]); MULADD(at[25], at[68]); MULADD(at[26], at[67]); MULADD(at[27], at[66]); MULADD(at[28], at[65]); MULADD(at[29], at[64]); + COMBA_STORE(C->dp[29]); + /* 30 */ + COMBA_FORWARD; + MULADD(at[0], at[94]); MULADD(at[1], at[93]); MULADD(at[2], at[92]); MULADD(at[3], at[91]); MULADD(at[4], at[90]); MULADD(at[5], at[89]); MULADD(at[6], at[88]); MULADD(at[7], at[87]); MULADD(at[8], at[86]); MULADD(at[9], at[85]); MULADD(at[10], at[84]); MULADD(at[11], at[83]); MULADD(at[12], at[82]); MULADD(at[13], at[81]); MULADD(at[14], at[80]); MULADD(at[15], at[79]); MULADD(at[16], at[78]); MULADD(at[17], at[77]); MULADD(at[18], at[76]); MULADD(at[19], at[75]); MULADD(at[20], at[74]); MULADD(at[21], at[73]); MULADD(at[22], at[72]); MULADD(at[23], at[71]); MULADD(at[24], at[70]); MULADD(at[25], at[69]); MULADD(at[26], at[68]); MULADD(at[27], at[67]); MULADD(at[28], at[66]); MULADD(at[29], at[65]); MULADD(at[30], at[64]); + COMBA_STORE(C->dp[30]); + /* 31 */ + COMBA_FORWARD; + MULADD(at[0], at[95]); MULADD(at[1], at[94]); MULADD(at[2], at[93]); MULADD(at[3], at[92]); MULADD(at[4], at[91]); MULADD(at[5], at[90]); MULADD(at[6], at[89]); MULADD(at[7], at[88]); MULADD(at[8], at[87]); MULADD(at[9], at[86]); MULADD(at[10], at[85]); MULADD(at[11], at[84]); MULADD(at[12], at[83]); MULADD(at[13], at[82]); MULADD(at[14], at[81]); MULADD(at[15], at[80]); MULADD(at[16], at[79]); MULADD(at[17], at[78]); MULADD(at[18], at[77]); MULADD(at[19], at[76]); MULADD(at[20], at[75]); MULADD(at[21], at[74]); MULADD(at[22], at[73]); MULADD(at[23], at[72]); MULADD(at[24], at[71]); MULADD(at[25], at[70]); MULADD(at[26], at[69]); MULADD(at[27], at[68]); MULADD(at[28], at[67]); MULADD(at[29], at[66]); MULADD(at[30], at[65]); MULADD(at[31], at[64]); + COMBA_STORE(C->dp[31]); + /* 32 */ + COMBA_FORWARD; + MULADD(at[0], at[96]); MULADD(at[1], at[95]); MULADD(at[2], at[94]); MULADD(at[3], at[93]); MULADD(at[4], at[92]); MULADD(at[5], at[91]); MULADD(at[6], at[90]); MULADD(at[7], at[89]); MULADD(at[8], at[88]); MULADD(at[9], at[87]); MULADD(at[10], at[86]); MULADD(at[11], at[85]); MULADD(at[12], at[84]); MULADD(at[13], at[83]); MULADD(at[14], at[82]); MULADD(at[15], at[81]); MULADD(at[16], at[80]); MULADD(at[17], at[79]); MULADD(at[18], at[78]); MULADD(at[19], at[77]); MULADD(at[20], at[76]); MULADD(at[21], at[75]); MULADD(at[22], at[74]); MULADD(at[23], at[73]); MULADD(at[24], at[72]); MULADD(at[25], at[71]); MULADD(at[26], at[70]); MULADD(at[27], at[69]); MULADD(at[28], at[68]); MULADD(at[29], at[67]); MULADD(at[30], at[66]); MULADD(at[31], at[65]); MULADD(at[32], at[64]); + COMBA_STORE(C->dp[32]); + /* 33 */ + COMBA_FORWARD; + MULADD(at[0], at[97]); MULADD(at[1], at[96]); MULADD(at[2], at[95]); MULADD(at[3], at[94]); MULADD(at[4], at[93]); MULADD(at[5], at[92]); MULADD(at[6], at[91]); MULADD(at[7], at[90]); MULADD(at[8], at[89]); MULADD(at[9], at[88]); MULADD(at[10], at[87]); MULADD(at[11], at[86]); MULADD(at[12], at[85]); MULADD(at[13], at[84]); MULADD(at[14], at[83]); MULADD(at[15], at[82]); MULADD(at[16], at[81]); MULADD(at[17], at[80]); MULADD(at[18], at[79]); MULADD(at[19], at[78]); MULADD(at[20], at[77]); MULADD(at[21], at[76]); MULADD(at[22], at[75]); MULADD(at[23], at[74]); MULADD(at[24], at[73]); MULADD(at[25], at[72]); MULADD(at[26], at[71]); MULADD(at[27], at[70]); MULADD(at[28], at[69]); MULADD(at[29], at[68]); MULADD(at[30], at[67]); MULADD(at[31], at[66]); MULADD(at[32], at[65]); MULADD(at[33], at[64]); + COMBA_STORE(C->dp[33]); + /* 34 */ + COMBA_FORWARD; + MULADD(at[0], at[98]); MULADD(at[1], at[97]); MULADD(at[2], at[96]); MULADD(at[3], at[95]); MULADD(at[4], at[94]); MULADD(at[5], at[93]); MULADD(at[6], at[92]); MULADD(at[7], at[91]); MULADD(at[8], at[90]); MULADD(at[9], at[89]); MULADD(at[10], at[88]); MULADD(at[11], at[87]); MULADD(at[12], at[86]); MULADD(at[13], at[85]); MULADD(at[14], at[84]); MULADD(at[15], at[83]); MULADD(at[16], at[82]); MULADD(at[17], at[81]); MULADD(at[18], at[80]); MULADD(at[19], at[79]); MULADD(at[20], at[78]); MULADD(at[21], at[77]); MULADD(at[22], at[76]); MULADD(at[23], at[75]); MULADD(at[24], at[74]); MULADD(at[25], at[73]); MULADD(at[26], at[72]); MULADD(at[27], at[71]); MULADD(at[28], at[70]); MULADD(at[29], at[69]); MULADD(at[30], at[68]); MULADD(at[31], at[67]); MULADD(at[32], at[66]); MULADD(at[33], at[65]); MULADD(at[34], at[64]); + COMBA_STORE(C->dp[34]); + /* 35 */ + COMBA_FORWARD; + MULADD(at[0], at[99]); MULADD(at[1], at[98]); MULADD(at[2], at[97]); MULADD(at[3], at[96]); MULADD(at[4], at[95]); MULADD(at[5], at[94]); MULADD(at[6], at[93]); MULADD(at[7], at[92]); MULADD(at[8], at[91]); MULADD(at[9], at[90]); MULADD(at[10], at[89]); MULADD(at[11], at[88]); MULADD(at[12], at[87]); MULADD(at[13], at[86]); MULADD(at[14], at[85]); MULADD(at[15], at[84]); MULADD(at[16], at[83]); MULADD(at[17], at[82]); MULADD(at[18], at[81]); MULADD(at[19], at[80]); MULADD(at[20], at[79]); MULADD(at[21], at[78]); MULADD(at[22], at[77]); MULADD(at[23], at[76]); MULADD(at[24], at[75]); MULADD(at[25], at[74]); MULADD(at[26], at[73]); MULADD(at[27], at[72]); MULADD(at[28], at[71]); MULADD(at[29], at[70]); MULADD(at[30], at[69]); MULADD(at[31], at[68]); MULADD(at[32], at[67]); MULADD(at[33], at[66]); MULADD(at[34], at[65]); MULADD(at[35], at[64]); + COMBA_STORE(C->dp[35]); + /* 36 */ + COMBA_FORWARD; + MULADD(at[0], at[100]); MULADD(at[1], at[99]); MULADD(at[2], at[98]); MULADD(at[3], at[97]); MULADD(at[4], at[96]); MULADD(at[5], at[95]); MULADD(at[6], at[94]); MULADD(at[7], at[93]); MULADD(at[8], at[92]); MULADD(at[9], at[91]); MULADD(at[10], at[90]); MULADD(at[11], at[89]); MULADD(at[12], at[88]); MULADD(at[13], at[87]); MULADD(at[14], at[86]); MULADD(at[15], at[85]); MULADD(at[16], at[84]); MULADD(at[17], at[83]); MULADD(at[18], at[82]); MULADD(at[19], at[81]); MULADD(at[20], at[80]); MULADD(at[21], at[79]); MULADD(at[22], at[78]); MULADD(at[23], at[77]); MULADD(at[24], at[76]); MULADD(at[25], at[75]); MULADD(at[26], at[74]); MULADD(at[27], at[73]); MULADD(at[28], at[72]); MULADD(at[29], at[71]); MULADD(at[30], at[70]); MULADD(at[31], at[69]); MULADD(at[32], at[68]); MULADD(at[33], at[67]); MULADD(at[34], at[66]); MULADD(at[35], at[65]); MULADD(at[36], at[64]); + COMBA_STORE(C->dp[36]); + /* 37 */ + COMBA_FORWARD; + MULADD(at[0], at[101]); MULADD(at[1], at[100]); MULADD(at[2], at[99]); MULADD(at[3], at[98]); MULADD(at[4], at[97]); MULADD(at[5], at[96]); MULADD(at[6], at[95]); MULADD(at[7], at[94]); MULADD(at[8], at[93]); MULADD(at[9], at[92]); MULADD(at[10], at[91]); MULADD(at[11], at[90]); MULADD(at[12], at[89]); MULADD(at[13], at[88]); MULADD(at[14], at[87]); MULADD(at[15], at[86]); MULADD(at[16], at[85]); MULADD(at[17], at[84]); MULADD(at[18], at[83]); MULADD(at[19], at[82]); MULADD(at[20], at[81]); MULADD(at[21], at[80]); MULADD(at[22], at[79]); MULADD(at[23], at[78]); MULADD(at[24], at[77]); MULADD(at[25], at[76]); MULADD(at[26], at[75]); MULADD(at[27], at[74]); MULADD(at[28], at[73]); MULADD(at[29], at[72]); MULADD(at[30], at[71]); MULADD(at[31], at[70]); MULADD(at[32], at[69]); MULADD(at[33], at[68]); MULADD(at[34], at[67]); MULADD(at[35], at[66]); MULADD(at[36], at[65]); MULADD(at[37], at[64]); + COMBA_STORE(C->dp[37]); + /* 38 */ + COMBA_FORWARD; + MULADD(at[0], at[102]); MULADD(at[1], at[101]); MULADD(at[2], at[100]); MULADD(at[3], at[99]); MULADD(at[4], at[98]); MULADD(at[5], at[97]); MULADD(at[6], at[96]); MULADD(at[7], at[95]); MULADD(at[8], at[94]); MULADD(at[9], at[93]); MULADD(at[10], at[92]); MULADD(at[11], at[91]); MULADD(at[12], at[90]); MULADD(at[13], at[89]); MULADD(at[14], at[88]); MULADD(at[15], at[87]); MULADD(at[16], at[86]); MULADD(at[17], at[85]); MULADD(at[18], at[84]); MULADD(at[19], at[83]); MULADD(at[20], at[82]); MULADD(at[21], at[81]); MULADD(at[22], at[80]); MULADD(at[23], at[79]); MULADD(at[24], at[78]); MULADD(at[25], at[77]); MULADD(at[26], at[76]); MULADD(at[27], at[75]); MULADD(at[28], at[74]); MULADD(at[29], at[73]); MULADD(at[30], at[72]); MULADD(at[31], at[71]); MULADD(at[32], at[70]); MULADD(at[33], at[69]); MULADD(at[34], at[68]); MULADD(at[35], at[67]); MULADD(at[36], at[66]); MULADD(at[37], at[65]); MULADD(at[38], at[64]); + COMBA_STORE(C->dp[38]); + /* 39 */ + COMBA_FORWARD; + MULADD(at[0], at[103]); MULADD(at[1], at[102]); MULADD(at[2], at[101]); MULADD(at[3], at[100]); MULADD(at[4], at[99]); MULADD(at[5], at[98]); MULADD(at[6], at[97]); MULADD(at[7], at[96]); MULADD(at[8], at[95]); MULADD(at[9], at[94]); MULADD(at[10], at[93]); MULADD(at[11], at[92]); MULADD(at[12], at[91]); MULADD(at[13], at[90]); MULADD(at[14], at[89]); MULADD(at[15], at[88]); MULADD(at[16], at[87]); MULADD(at[17], at[86]); MULADD(at[18], at[85]); MULADD(at[19], at[84]); MULADD(at[20], at[83]); MULADD(at[21], at[82]); MULADD(at[22], at[81]); MULADD(at[23], at[80]); MULADD(at[24], at[79]); MULADD(at[25], at[78]); MULADD(at[26], at[77]); MULADD(at[27], at[76]); MULADD(at[28], at[75]); MULADD(at[29], at[74]); MULADD(at[30], at[73]); MULADD(at[31], at[72]); MULADD(at[32], at[71]); MULADD(at[33], at[70]); MULADD(at[34], at[69]); MULADD(at[35], at[68]); MULADD(at[36], at[67]); MULADD(at[37], at[66]); MULADD(at[38], at[65]); MULADD(at[39], at[64]); + COMBA_STORE(C->dp[39]); + /* 40 */ + COMBA_FORWARD; + MULADD(at[0], at[104]); MULADD(at[1], at[103]); MULADD(at[2], at[102]); MULADD(at[3], at[101]); MULADD(at[4], at[100]); MULADD(at[5], at[99]); MULADD(at[6], at[98]); MULADD(at[7], at[97]); MULADD(at[8], at[96]); MULADD(at[9], at[95]); MULADD(at[10], at[94]); MULADD(at[11], at[93]); MULADD(at[12], at[92]); MULADD(at[13], at[91]); MULADD(at[14], at[90]); MULADD(at[15], at[89]); MULADD(at[16], at[88]); MULADD(at[17], at[87]); MULADD(at[18], at[86]); MULADD(at[19], at[85]); MULADD(at[20], at[84]); MULADD(at[21], at[83]); MULADD(at[22], at[82]); MULADD(at[23], at[81]); MULADD(at[24], at[80]); MULADD(at[25], at[79]); MULADD(at[26], at[78]); MULADD(at[27], at[77]); MULADD(at[28], at[76]); MULADD(at[29], at[75]); MULADD(at[30], at[74]); MULADD(at[31], at[73]); MULADD(at[32], at[72]); MULADD(at[33], at[71]); MULADD(at[34], at[70]); MULADD(at[35], at[69]); MULADD(at[36], at[68]); MULADD(at[37], at[67]); MULADD(at[38], at[66]); MULADD(at[39], at[65]); MULADD(at[40], at[64]); + COMBA_STORE(C->dp[40]); + /* 41 */ + COMBA_FORWARD; + MULADD(at[0], at[105]); MULADD(at[1], at[104]); MULADD(at[2], at[103]); MULADD(at[3], at[102]); MULADD(at[4], at[101]); MULADD(at[5], at[100]); MULADD(at[6], at[99]); MULADD(at[7], at[98]); MULADD(at[8], at[97]); MULADD(at[9], at[96]); MULADD(at[10], at[95]); MULADD(at[11], at[94]); MULADD(at[12], at[93]); MULADD(at[13], at[92]); MULADD(at[14], at[91]); MULADD(at[15], at[90]); MULADD(at[16], at[89]); MULADD(at[17], at[88]); MULADD(at[18], at[87]); MULADD(at[19], at[86]); MULADD(at[20], at[85]); MULADD(at[21], at[84]); MULADD(at[22], at[83]); MULADD(at[23], at[82]); MULADD(at[24], at[81]); MULADD(at[25], at[80]); MULADD(at[26], at[79]); MULADD(at[27], at[78]); MULADD(at[28], at[77]); MULADD(at[29], at[76]); MULADD(at[30], at[75]); MULADD(at[31], at[74]); MULADD(at[32], at[73]); MULADD(at[33], at[72]); MULADD(at[34], at[71]); MULADD(at[35], at[70]); MULADD(at[36], at[69]); MULADD(at[37], at[68]); MULADD(at[38], at[67]); MULADD(at[39], at[66]); MULADD(at[40], at[65]); MULADD(at[41], at[64]); + COMBA_STORE(C->dp[41]); + /* 42 */ + COMBA_FORWARD; + MULADD(at[0], at[106]); MULADD(at[1], at[105]); MULADD(at[2], at[104]); MULADD(at[3], at[103]); MULADD(at[4], at[102]); MULADD(at[5], at[101]); MULADD(at[6], at[100]); MULADD(at[7], at[99]); MULADD(at[8], at[98]); MULADD(at[9], at[97]); MULADD(at[10], at[96]); MULADD(at[11], at[95]); MULADD(at[12], at[94]); MULADD(at[13], at[93]); MULADD(at[14], at[92]); MULADD(at[15], at[91]); MULADD(at[16], at[90]); MULADD(at[17], at[89]); MULADD(at[18], at[88]); MULADD(at[19], at[87]); MULADD(at[20], at[86]); MULADD(at[21], at[85]); MULADD(at[22], at[84]); MULADD(at[23], at[83]); MULADD(at[24], at[82]); MULADD(at[25], at[81]); MULADD(at[26], at[80]); MULADD(at[27], at[79]); MULADD(at[28], at[78]); MULADD(at[29], at[77]); MULADD(at[30], at[76]); MULADD(at[31], at[75]); MULADD(at[32], at[74]); MULADD(at[33], at[73]); MULADD(at[34], at[72]); MULADD(at[35], at[71]); MULADD(at[36], at[70]); MULADD(at[37], at[69]); MULADD(at[38], at[68]); MULADD(at[39], at[67]); MULADD(at[40], at[66]); MULADD(at[41], at[65]); MULADD(at[42], at[64]); + COMBA_STORE(C->dp[42]); + /* 43 */ + COMBA_FORWARD; + MULADD(at[0], at[107]); MULADD(at[1], at[106]); MULADD(at[2], at[105]); MULADD(at[3], at[104]); MULADD(at[4], at[103]); MULADD(at[5], at[102]); MULADD(at[6], at[101]); MULADD(at[7], at[100]); MULADD(at[8], at[99]); MULADD(at[9], at[98]); MULADD(at[10], at[97]); MULADD(at[11], at[96]); MULADD(at[12], at[95]); MULADD(at[13], at[94]); MULADD(at[14], at[93]); MULADD(at[15], at[92]); MULADD(at[16], at[91]); MULADD(at[17], at[90]); MULADD(at[18], at[89]); MULADD(at[19], at[88]); MULADD(at[20], at[87]); MULADD(at[21], at[86]); MULADD(at[22], at[85]); MULADD(at[23], at[84]); MULADD(at[24], at[83]); MULADD(at[25], at[82]); MULADD(at[26], at[81]); MULADD(at[27], at[80]); MULADD(at[28], at[79]); MULADD(at[29], at[78]); MULADD(at[30], at[77]); MULADD(at[31], at[76]); MULADD(at[32], at[75]); MULADD(at[33], at[74]); MULADD(at[34], at[73]); MULADD(at[35], at[72]); MULADD(at[36], at[71]); MULADD(at[37], at[70]); MULADD(at[38], at[69]); MULADD(at[39], at[68]); MULADD(at[40], at[67]); MULADD(at[41], at[66]); MULADD(at[42], at[65]); MULADD(at[43], at[64]); + COMBA_STORE(C->dp[43]); + /* 44 */ + COMBA_FORWARD; + MULADD(at[0], at[108]); MULADD(at[1], at[107]); MULADD(at[2], at[106]); MULADD(at[3], at[105]); MULADD(at[4], at[104]); MULADD(at[5], at[103]); MULADD(at[6], at[102]); MULADD(at[7], at[101]); MULADD(at[8], at[100]); MULADD(at[9], at[99]); MULADD(at[10], at[98]); MULADD(at[11], at[97]); MULADD(at[12], at[96]); MULADD(at[13], at[95]); MULADD(at[14], at[94]); MULADD(at[15], at[93]); MULADD(at[16], at[92]); MULADD(at[17], at[91]); MULADD(at[18], at[90]); MULADD(at[19], at[89]); MULADD(at[20], at[88]); MULADD(at[21], at[87]); MULADD(at[22], at[86]); MULADD(at[23], at[85]); MULADD(at[24], at[84]); MULADD(at[25], at[83]); MULADD(at[26], at[82]); MULADD(at[27], at[81]); MULADD(at[28], at[80]); MULADD(at[29], at[79]); MULADD(at[30], at[78]); MULADD(at[31], at[77]); MULADD(at[32], at[76]); MULADD(at[33], at[75]); MULADD(at[34], at[74]); MULADD(at[35], at[73]); MULADD(at[36], at[72]); MULADD(at[37], at[71]); MULADD(at[38], at[70]); MULADD(at[39], at[69]); MULADD(at[40], at[68]); MULADD(at[41], at[67]); MULADD(at[42], at[66]); MULADD(at[43], at[65]); MULADD(at[44], at[64]); + COMBA_STORE(C->dp[44]); + /* 45 */ + COMBA_FORWARD; + MULADD(at[0], at[109]); MULADD(at[1], at[108]); MULADD(at[2], at[107]); MULADD(at[3], at[106]); MULADD(at[4], at[105]); MULADD(at[5], at[104]); MULADD(at[6], at[103]); MULADD(at[7], at[102]); MULADD(at[8], at[101]); MULADD(at[9], at[100]); MULADD(at[10], at[99]); MULADD(at[11], at[98]); MULADD(at[12], at[97]); MULADD(at[13], at[96]); MULADD(at[14], at[95]); MULADD(at[15], at[94]); MULADD(at[16], at[93]); MULADD(at[17], at[92]); MULADD(at[18], at[91]); MULADD(at[19], at[90]); MULADD(at[20], at[89]); MULADD(at[21], at[88]); MULADD(at[22], at[87]); MULADD(at[23], at[86]); MULADD(at[24], at[85]); MULADD(at[25], at[84]); MULADD(at[26], at[83]); MULADD(at[27], at[82]); MULADD(at[28], at[81]); MULADD(at[29], at[80]); MULADD(at[30], at[79]); MULADD(at[31], at[78]); MULADD(at[32], at[77]); MULADD(at[33], at[76]); MULADD(at[34], at[75]); MULADD(at[35], at[74]); MULADD(at[36], at[73]); MULADD(at[37], at[72]); MULADD(at[38], at[71]); MULADD(at[39], at[70]); MULADD(at[40], at[69]); MULADD(at[41], at[68]); MULADD(at[42], at[67]); MULADD(at[43], at[66]); MULADD(at[44], at[65]); MULADD(at[45], at[64]); + COMBA_STORE(C->dp[45]); + /* 46 */ + COMBA_FORWARD; + MULADD(at[0], at[110]); MULADD(at[1], at[109]); MULADD(at[2], at[108]); MULADD(at[3], at[107]); MULADD(at[4], at[106]); MULADD(at[5], at[105]); MULADD(at[6], at[104]); MULADD(at[7], at[103]); MULADD(at[8], at[102]); MULADD(at[9], at[101]); MULADD(at[10], at[100]); MULADD(at[11], at[99]); MULADD(at[12], at[98]); MULADD(at[13], at[97]); MULADD(at[14], at[96]); MULADD(at[15], at[95]); MULADD(at[16], at[94]); MULADD(at[17], at[93]); MULADD(at[18], at[92]); MULADD(at[19], at[91]); MULADD(at[20], at[90]); MULADD(at[21], at[89]); MULADD(at[22], at[88]); MULADD(at[23], at[87]); MULADD(at[24], at[86]); MULADD(at[25], at[85]); MULADD(at[26], at[84]); MULADD(at[27], at[83]); MULADD(at[28], at[82]); MULADD(at[29], at[81]); MULADD(at[30], at[80]); MULADD(at[31], at[79]); MULADD(at[32], at[78]); MULADD(at[33], at[77]); MULADD(at[34], at[76]); MULADD(at[35], at[75]); MULADD(at[36], at[74]); MULADD(at[37], at[73]); MULADD(at[38], at[72]); MULADD(at[39], at[71]); MULADD(at[40], at[70]); MULADD(at[41], at[69]); MULADD(at[42], at[68]); MULADD(at[43], at[67]); MULADD(at[44], at[66]); MULADD(at[45], at[65]); MULADD(at[46], at[64]); + COMBA_STORE(C->dp[46]); + /* 47 */ + COMBA_FORWARD; + MULADD(at[0], at[111]); MULADD(at[1], at[110]); MULADD(at[2], at[109]); MULADD(at[3], at[108]); MULADD(at[4], at[107]); MULADD(at[5], at[106]); MULADD(at[6], at[105]); MULADD(at[7], at[104]); MULADD(at[8], at[103]); MULADD(at[9], at[102]); MULADD(at[10], at[101]); MULADD(at[11], at[100]); MULADD(at[12], at[99]); MULADD(at[13], at[98]); MULADD(at[14], at[97]); MULADD(at[15], at[96]); MULADD(at[16], at[95]); MULADD(at[17], at[94]); MULADD(at[18], at[93]); MULADD(at[19], at[92]); MULADD(at[20], at[91]); MULADD(at[21], at[90]); MULADD(at[22], at[89]); MULADD(at[23], at[88]); MULADD(at[24], at[87]); MULADD(at[25], at[86]); MULADD(at[26], at[85]); MULADD(at[27], at[84]); MULADD(at[28], at[83]); MULADD(at[29], at[82]); MULADD(at[30], at[81]); MULADD(at[31], at[80]); MULADD(at[32], at[79]); MULADD(at[33], at[78]); MULADD(at[34], at[77]); MULADD(at[35], at[76]); MULADD(at[36], at[75]); MULADD(at[37], at[74]); MULADD(at[38], at[73]); MULADD(at[39], at[72]); MULADD(at[40], at[71]); MULADD(at[41], at[70]); MULADD(at[42], at[69]); MULADD(at[43], at[68]); MULADD(at[44], at[67]); MULADD(at[45], at[66]); MULADD(at[46], at[65]); MULADD(at[47], at[64]); + COMBA_STORE(C->dp[47]); + /* 48 */ + COMBA_FORWARD; + MULADD(at[0], at[112]); MULADD(at[1], at[111]); MULADD(at[2], at[110]); MULADD(at[3], at[109]); MULADD(at[4], at[108]); MULADD(at[5], at[107]); MULADD(at[6], at[106]); MULADD(at[7], at[105]); MULADD(at[8], at[104]); MULADD(at[9], at[103]); MULADD(at[10], at[102]); MULADD(at[11], at[101]); MULADD(at[12], at[100]); MULADD(at[13], at[99]); MULADD(at[14], at[98]); MULADD(at[15], at[97]); MULADD(at[16], at[96]); MULADD(at[17], at[95]); MULADD(at[18], at[94]); MULADD(at[19], at[93]); MULADD(at[20], at[92]); MULADD(at[21], at[91]); MULADD(at[22], at[90]); MULADD(at[23], at[89]); MULADD(at[24], at[88]); MULADD(at[25], at[87]); MULADD(at[26], at[86]); MULADD(at[27], at[85]); MULADD(at[28], at[84]); MULADD(at[29], at[83]); MULADD(at[30], at[82]); MULADD(at[31], at[81]); MULADD(at[32], at[80]); MULADD(at[33], at[79]); MULADD(at[34], at[78]); MULADD(at[35], at[77]); MULADD(at[36], at[76]); MULADD(at[37], at[75]); MULADD(at[38], at[74]); MULADD(at[39], at[73]); MULADD(at[40], at[72]); MULADD(at[41], at[71]); MULADD(at[42], at[70]); MULADD(at[43], at[69]); MULADD(at[44], at[68]); MULADD(at[45], at[67]); MULADD(at[46], at[66]); MULADD(at[47], at[65]); MULADD(at[48], at[64]); + COMBA_STORE(C->dp[48]); + /* 49 */ + COMBA_FORWARD; + MULADD(at[0], at[113]); MULADD(at[1], at[112]); MULADD(at[2], at[111]); MULADD(at[3], at[110]); MULADD(at[4], at[109]); MULADD(at[5], at[108]); MULADD(at[6], at[107]); MULADD(at[7], at[106]); MULADD(at[8], at[105]); MULADD(at[9], at[104]); MULADD(at[10], at[103]); MULADD(at[11], at[102]); MULADD(at[12], at[101]); MULADD(at[13], at[100]); MULADD(at[14], at[99]); MULADD(at[15], at[98]); MULADD(at[16], at[97]); MULADD(at[17], at[96]); MULADD(at[18], at[95]); MULADD(at[19], at[94]); MULADD(at[20], at[93]); MULADD(at[21], at[92]); MULADD(at[22], at[91]); MULADD(at[23], at[90]); MULADD(at[24], at[89]); MULADD(at[25], at[88]); MULADD(at[26], at[87]); MULADD(at[27], at[86]); MULADD(at[28], at[85]); MULADD(at[29], at[84]); MULADD(at[30], at[83]); MULADD(at[31], at[82]); MULADD(at[32], at[81]); MULADD(at[33], at[80]); MULADD(at[34], at[79]); MULADD(at[35], at[78]); MULADD(at[36], at[77]); MULADD(at[37], at[76]); MULADD(at[38], at[75]); MULADD(at[39], at[74]); MULADD(at[40], at[73]); MULADD(at[41], at[72]); MULADD(at[42], at[71]); MULADD(at[43], at[70]); MULADD(at[44], at[69]); MULADD(at[45], at[68]); MULADD(at[46], at[67]); MULADD(at[47], at[66]); MULADD(at[48], at[65]); MULADD(at[49], at[64]); + COMBA_STORE(C->dp[49]); + /* 50 */ + COMBA_FORWARD; + MULADD(at[0], at[114]); MULADD(at[1], at[113]); MULADD(at[2], at[112]); MULADD(at[3], at[111]); MULADD(at[4], at[110]); MULADD(at[5], at[109]); MULADD(at[6], at[108]); MULADD(at[7], at[107]); MULADD(at[8], at[106]); MULADD(at[9], at[105]); MULADD(at[10], at[104]); MULADD(at[11], at[103]); MULADD(at[12], at[102]); MULADD(at[13], at[101]); MULADD(at[14], at[100]); MULADD(at[15], at[99]); MULADD(at[16], at[98]); MULADD(at[17], at[97]); MULADD(at[18], at[96]); MULADD(at[19], at[95]); MULADD(at[20], at[94]); MULADD(at[21], at[93]); MULADD(at[22], at[92]); MULADD(at[23], at[91]); MULADD(at[24], at[90]); MULADD(at[25], at[89]); MULADD(at[26], at[88]); MULADD(at[27], at[87]); MULADD(at[28], at[86]); MULADD(at[29], at[85]); MULADD(at[30], at[84]); MULADD(at[31], at[83]); MULADD(at[32], at[82]); MULADD(at[33], at[81]); MULADD(at[34], at[80]); MULADD(at[35], at[79]); MULADD(at[36], at[78]); MULADD(at[37], at[77]); MULADD(at[38], at[76]); MULADD(at[39], at[75]); MULADD(at[40], at[74]); MULADD(at[41], at[73]); MULADD(at[42], at[72]); MULADD(at[43], at[71]); MULADD(at[44], at[70]); MULADD(at[45], at[69]); MULADD(at[46], at[68]); MULADD(at[47], at[67]); MULADD(at[48], at[66]); MULADD(at[49], at[65]); MULADD(at[50], at[64]); + COMBA_STORE(C->dp[50]); + /* 51 */ + COMBA_FORWARD; + MULADD(at[0], at[115]); MULADD(at[1], at[114]); MULADD(at[2], at[113]); MULADD(at[3], at[112]); MULADD(at[4], at[111]); MULADD(at[5], at[110]); MULADD(at[6], at[109]); MULADD(at[7], at[108]); MULADD(at[8], at[107]); MULADD(at[9], at[106]); MULADD(at[10], at[105]); MULADD(at[11], at[104]); MULADD(at[12], at[103]); MULADD(at[13], at[102]); MULADD(at[14], at[101]); MULADD(at[15], at[100]); MULADD(at[16], at[99]); MULADD(at[17], at[98]); MULADD(at[18], at[97]); MULADD(at[19], at[96]); MULADD(at[20], at[95]); MULADD(at[21], at[94]); MULADD(at[22], at[93]); MULADD(at[23], at[92]); MULADD(at[24], at[91]); MULADD(at[25], at[90]); MULADD(at[26], at[89]); MULADD(at[27], at[88]); MULADD(at[28], at[87]); MULADD(at[29], at[86]); MULADD(at[30], at[85]); MULADD(at[31], at[84]); MULADD(at[32], at[83]); MULADD(at[33], at[82]); MULADD(at[34], at[81]); MULADD(at[35], at[80]); MULADD(at[36], at[79]); MULADD(at[37], at[78]); MULADD(at[38], at[77]); MULADD(at[39], at[76]); MULADD(at[40], at[75]); MULADD(at[41], at[74]); MULADD(at[42], at[73]); MULADD(at[43], at[72]); MULADD(at[44], at[71]); MULADD(at[45], at[70]); MULADD(at[46], at[69]); MULADD(at[47], at[68]); MULADD(at[48], at[67]); MULADD(at[49], at[66]); MULADD(at[50], at[65]); MULADD(at[51], at[64]); + COMBA_STORE(C->dp[51]); + /* 52 */ + COMBA_FORWARD; + MULADD(at[0], at[116]); MULADD(at[1], at[115]); MULADD(at[2], at[114]); MULADD(at[3], at[113]); MULADD(at[4], at[112]); MULADD(at[5], at[111]); MULADD(at[6], at[110]); MULADD(at[7], at[109]); MULADD(at[8], at[108]); MULADD(at[9], at[107]); MULADD(at[10], at[106]); MULADD(at[11], at[105]); MULADD(at[12], at[104]); MULADD(at[13], at[103]); MULADD(at[14], at[102]); MULADD(at[15], at[101]); MULADD(at[16], at[100]); MULADD(at[17], at[99]); MULADD(at[18], at[98]); MULADD(at[19], at[97]); MULADD(at[20], at[96]); MULADD(at[21], at[95]); MULADD(at[22], at[94]); MULADD(at[23], at[93]); MULADD(at[24], at[92]); MULADD(at[25], at[91]); MULADD(at[26], at[90]); MULADD(at[27], at[89]); MULADD(at[28], at[88]); MULADD(at[29], at[87]); MULADD(at[30], at[86]); MULADD(at[31], at[85]); MULADD(at[32], at[84]); MULADD(at[33], at[83]); MULADD(at[34], at[82]); MULADD(at[35], at[81]); MULADD(at[36], at[80]); MULADD(at[37], at[79]); MULADD(at[38], at[78]); MULADD(at[39], at[77]); MULADD(at[40], at[76]); MULADD(at[41], at[75]); MULADD(at[42], at[74]); MULADD(at[43], at[73]); MULADD(at[44], at[72]); MULADD(at[45], at[71]); MULADD(at[46], at[70]); MULADD(at[47], at[69]); MULADD(at[48], at[68]); MULADD(at[49], at[67]); MULADD(at[50], at[66]); MULADD(at[51], at[65]); MULADD(at[52], at[64]); + COMBA_STORE(C->dp[52]); + /* 53 */ + COMBA_FORWARD; + MULADD(at[0], at[117]); MULADD(at[1], at[116]); MULADD(at[2], at[115]); MULADD(at[3], at[114]); MULADD(at[4], at[113]); MULADD(at[5], at[112]); MULADD(at[6], at[111]); MULADD(at[7], at[110]); MULADD(at[8], at[109]); MULADD(at[9], at[108]); MULADD(at[10], at[107]); MULADD(at[11], at[106]); MULADD(at[12], at[105]); MULADD(at[13], at[104]); MULADD(at[14], at[103]); MULADD(at[15], at[102]); MULADD(at[16], at[101]); MULADD(at[17], at[100]); MULADD(at[18], at[99]); MULADD(at[19], at[98]); MULADD(at[20], at[97]); MULADD(at[21], at[96]); MULADD(at[22], at[95]); MULADD(at[23], at[94]); MULADD(at[24], at[93]); MULADD(at[25], at[92]); MULADD(at[26], at[91]); MULADD(at[27], at[90]); MULADD(at[28], at[89]); MULADD(at[29], at[88]); MULADD(at[30], at[87]); MULADD(at[31], at[86]); MULADD(at[32], at[85]); MULADD(at[33], at[84]); MULADD(at[34], at[83]); MULADD(at[35], at[82]); MULADD(at[36], at[81]); MULADD(at[37], at[80]); MULADD(at[38], at[79]); MULADD(at[39], at[78]); MULADD(at[40], at[77]); MULADD(at[41], at[76]); MULADD(at[42], at[75]); MULADD(at[43], at[74]); MULADD(at[44], at[73]); MULADD(at[45], at[72]); MULADD(at[46], at[71]); MULADD(at[47], at[70]); MULADD(at[48], at[69]); MULADD(at[49], at[68]); MULADD(at[50], at[67]); MULADD(at[51], at[66]); MULADD(at[52], at[65]); MULADD(at[53], at[64]); + COMBA_STORE(C->dp[53]); + /* 54 */ + COMBA_FORWARD; + MULADD(at[0], at[118]); MULADD(at[1], at[117]); MULADD(at[2], at[116]); MULADD(at[3], at[115]); MULADD(at[4], at[114]); MULADD(at[5], at[113]); MULADD(at[6], at[112]); MULADD(at[7], at[111]); MULADD(at[8], at[110]); MULADD(at[9], at[109]); MULADD(at[10], at[108]); MULADD(at[11], at[107]); MULADD(at[12], at[106]); MULADD(at[13], at[105]); MULADD(at[14], at[104]); MULADD(at[15], at[103]); MULADD(at[16], at[102]); MULADD(at[17], at[101]); MULADD(at[18], at[100]); MULADD(at[19], at[99]); MULADD(at[20], at[98]); MULADD(at[21], at[97]); MULADD(at[22], at[96]); MULADD(at[23], at[95]); MULADD(at[24], at[94]); MULADD(at[25], at[93]); MULADD(at[26], at[92]); MULADD(at[27], at[91]); MULADD(at[28], at[90]); MULADD(at[29], at[89]); MULADD(at[30], at[88]); MULADD(at[31], at[87]); MULADD(at[32], at[86]); MULADD(at[33], at[85]); MULADD(at[34], at[84]); MULADD(at[35], at[83]); MULADD(at[36], at[82]); MULADD(at[37], at[81]); MULADD(at[38], at[80]); MULADD(at[39], at[79]); MULADD(at[40], at[78]); MULADD(at[41], at[77]); MULADD(at[42], at[76]); MULADD(at[43], at[75]); MULADD(at[44], at[74]); MULADD(at[45], at[73]); MULADD(at[46], at[72]); MULADD(at[47], at[71]); MULADD(at[48], at[70]); MULADD(at[49], at[69]); MULADD(at[50], at[68]); MULADD(at[51], at[67]); MULADD(at[52], at[66]); MULADD(at[53], at[65]); MULADD(at[54], at[64]); + COMBA_STORE(C->dp[54]); + /* 55 */ + COMBA_FORWARD; + MULADD(at[0], at[119]); MULADD(at[1], at[118]); MULADD(at[2], at[117]); MULADD(at[3], at[116]); MULADD(at[4], at[115]); MULADD(at[5], at[114]); MULADD(at[6], at[113]); MULADD(at[7], at[112]); MULADD(at[8], at[111]); MULADD(at[9], at[110]); MULADD(at[10], at[109]); MULADD(at[11], at[108]); MULADD(at[12], at[107]); MULADD(at[13], at[106]); MULADD(at[14], at[105]); MULADD(at[15], at[104]); MULADD(at[16], at[103]); MULADD(at[17], at[102]); MULADD(at[18], at[101]); MULADD(at[19], at[100]); MULADD(at[20], at[99]); MULADD(at[21], at[98]); MULADD(at[22], at[97]); MULADD(at[23], at[96]); MULADD(at[24], at[95]); MULADD(at[25], at[94]); MULADD(at[26], at[93]); MULADD(at[27], at[92]); MULADD(at[28], at[91]); MULADD(at[29], at[90]); MULADD(at[30], at[89]); MULADD(at[31], at[88]); MULADD(at[32], at[87]); MULADD(at[33], at[86]); MULADD(at[34], at[85]); MULADD(at[35], at[84]); MULADD(at[36], at[83]); MULADD(at[37], at[82]); MULADD(at[38], at[81]); MULADD(at[39], at[80]); MULADD(at[40], at[79]); MULADD(at[41], at[78]); MULADD(at[42], at[77]); MULADD(at[43], at[76]); MULADD(at[44], at[75]); MULADD(at[45], at[74]); MULADD(at[46], at[73]); MULADD(at[47], at[72]); MULADD(at[48], at[71]); MULADD(at[49], at[70]); MULADD(at[50], at[69]); MULADD(at[51], at[68]); MULADD(at[52], at[67]); MULADD(at[53], at[66]); MULADD(at[54], at[65]); MULADD(at[55], at[64]); + COMBA_STORE(C->dp[55]); + /* 56 */ + COMBA_FORWARD; + MULADD(at[0], at[120]); MULADD(at[1], at[119]); MULADD(at[2], at[118]); MULADD(at[3], at[117]); MULADD(at[4], at[116]); MULADD(at[5], at[115]); MULADD(at[6], at[114]); MULADD(at[7], at[113]); MULADD(at[8], at[112]); MULADD(at[9], at[111]); MULADD(at[10], at[110]); MULADD(at[11], at[109]); MULADD(at[12], at[108]); MULADD(at[13], at[107]); MULADD(at[14], at[106]); MULADD(at[15], at[105]); MULADD(at[16], at[104]); MULADD(at[17], at[103]); MULADD(at[18], at[102]); MULADD(at[19], at[101]); MULADD(at[20], at[100]); MULADD(at[21], at[99]); MULADD(at[22], at[98]); MULADD(at[23], at[97]); MULADD(at[24], at[96]); MULADD(at[25], at[95]); MULADD(at[26], at[94]); MULADD(at[27], at[93]); MULADD(at[28], at[92]); MULADD(at[29], at[91]); MULADD(at[30], at[90]); MULADD(at[31], at[89]); MULADD(at[32], at[88]); MULADD(at[33], at[87]); MULADD(at[34], at[86]); MULADD(at[35], at[85]); MULADD(at[36], at[84]); MULADD(at[37], at[83]); MULADD(at[38], at[82]); MULADD(at[39], at[81]); MULADD(at[40], at[80]); MULADD(at[41], at[79]); MULADD(at[42], at[78]); MULADD(at[43], at[77]); MULADD(at[44], at[76]); MULADD(at[45], at[75]); MULADD(at[46], at[74]); MULADD(at[47], at[73]); MULADD(at[48], at[72]); MULADD(at[49], at[71]); MULADD(at[50], at[70]); MULADD(at[51], at[69]); MULADD(at[52], at[68]); MULADD(at[53], at[67]); MULADD(at[54], at[66]); MULADD(at[55], at[65]); MULADD(at[56], at[64]); + COMBA_STORE(C->dp[56]); + /* 57 */ + COMBA_FORWARD; + MULADD(at[0], at[121]); MULADD(at[1], at[120]); MULADD(at[2], at[119]); MULADD(at[3], at[118]); MULADD(at[4], at[117]); MULADD(at[5], at[116]); MULADD(at[6], at[115]); MULADD(at[7], at[114]); MULADD(at[8], at[113]); MULADD(at[9], at[112]); MULADD(at[10], at[111]); MULADD(at[11], at[110]); MULADD(at[12], at[109]); MULADD(at[13], at[108]); MULADD(at[14], at[107]); MULADD(at[15], at[106]); MULADD(at[16], at[105]); MULADD(at[17], at[104]); MULADD(at[18], at[103]); MULADD(at[19], at[102]); MULADD(at[20], at[101]); MULADD(at[21], at[100]); MULADD(at[22], at[99]); MULADD(at[23], at[98]); MULADD(at[24], at[97]); MULADD(at[25], at[96]); MULADD(at[26], at[95]); MULADD(at[27], at[94]); MULADD(at[28], at[93]); MULADD(at[29], at[92]); MULADD(at[30], at[91]); MULADD(at[31], at[90]); MULADD(at[32], at[89]); MULADD(at[33], at[88]); MULADD(at[34], at[87]); MULADD(at[35], at[86]); MULADD(at[36], at[85]); MULADD(at[37], at[84]); MULADD(at[38], at[83]); MULADD(at[39], at[82]); MULADD(at[40], at[81]); MULADD(at[41], at[80]); MULADD(at[42], at[79]); MULADD(at[43], at[78]); MULADD(at[44], at[77]); MULADD(at[45], at[76]); MULADD(at[46], at[75]); MULADD(at[47], at[74]); MULADD(at[48], at[73]); MULADD(at[49], at[72]); MULADD(at[50], at[71]); MULADD(at[51], at[70]); MULADD(at[52], at[69]); MULADD(at[53], at[68]); MULADD(at[54], at[67]); MULADD(at[55], at[66]); MULADD(at[56], at[65]); MULADD(at[57], at[64]); + COMBA_STORE(C->dp[57]); + /* 58 */ + COMBA_FORWARD; + MULADD(at[0], at[122]); MULADD(at[1], at[121]); MULADD(at[2], at[120]); MULADD(at[3], at[119]); MULADD(at[4], at[118]); MULADD(at[5], at[117]); MULADD(at[6], at[116]); MULADD(at[7], at[115]); MULADD(at[8], at[114]); MULADD(at[9], at[113]); MULADD(at[10], at[112]); MULADD(at[11], at[111]); MULADD(at[12], at[110]); MULADD(at[13], at[109]); MULADD(at[14], at[108]); MULADD(at[15], at[107]); MULADD(at[16], at[106]); MULADD(at[17], at[105]); MULADD(at[18], at[104]); MULADD(at[19], at[103]); MULADD(at[20], at[102]); MULADD(at[21], at[101]); MULADD(at[22], at[100]); MULADD(at[23], at[99]); MULADD(at[24], at[98]); MULADD(at[25], at[97]); MULADD(at[26], at[96]); MULADD(at[27], at[95]); MULADD(at[28], at[94]); MULADD(at[29], at[93]); MULADD(at[30], at[92]); MULADD(at[31], at[91]); MULADD(at[32], at[90]); MULADD(at[33], at[89]); MULADD(at[34], at[88]); MULADD(at[35], at[87]); MULADD(at[36], at[86]); MULADD(at[37], at[85]); MULADD(at[38], at[84]); MULADD(at[39], at[83]); MULADD(at[40], at[82]); MULADD(at[41], at[81]); MULADD(at[42], at[80]); MULADD(at[43], at[79]); MULADD(at[44], at[78]); MULADD(at[45], at[77]); MULADD(at[46], at[76]); MULADD(at[47], at[75]); MULADD(at[48], at[74]); MULADD(at[49], at[73]); MULADD(at[50], at[72]); MULADD(at[51], at[71]); MULADD(at[52], at[70]); MULADD(at[53], at[69]); MULADD(at[54], at[68]); MULADD(at[55], at[67]); MULADD(at[56], at[66]); MULADD(at[57], at[65]); MULADD(at[58], at[64]); + COMBA_STORE(C->dp[58]); + /* 59 */ + COMBA_FORWARD; + MULADD(at[0], at[123]); MULADD(at[1], at[122]); MULADD(at[2], at[121]); MULADD(at[3], at[120]); MULADD(at[4], at[119]); MULADD(at[5], at[118]); MULADD(at[6], at[117]); MULADD(at[7], at[116]); MULADD(at[8], at[115]); MULADD(at[9], at[114]); MULADD(at[10], at[113]); MULADD(at[11], at[112]); MULADD(at[12], at[111]); MULADD(at[13], at[110]); MULADD(at[14], at[109]); MULADD(at[15], at[108]); MULADD(at[16], at[107]); MULADD(at[17], at[106]); MULADD(at[18], at[105]); MULADD(at[19], at[104]); MULADD(at[20], at[103]); MULADD(at[21], at[102]); MULADD(at[22], at[101]); MULADD(at[23], at[100]); MULADD(at[24], at[99]); MULADD(at[25], at[98]); MULADD(at[26], at[97]); MULADD(at[27], at[96]); MULADD(at[28], at[95]); MULADD(at[29], at[94]); MULADD(at[30], at[93]); MULADD(at[31], at[92]); MULADD(at[32], at[91]); MULADD(at[33], at[90]); MULADD(at[34], at[89]); MULADD(at[35], at[88]); MULADD(at[36], at[87]); MULADD(at[37], at[86]); MULADD(at[38], at[85]); MULADD(at[39], at[84]); MULADD(at[40], at[83]); MULADD(at[41], at[82]); MULADD(at[42], at[81]); MULADD(at[43], at[80]); MULADD(at[44], at[79]); MULADD(at[45], at[78]); MULADD(at[46], at[77]); MULADD(at[47], at[76]); MULADD(at[48], at[75]); MULADD(at[49], at[74]); MULADD(at[50], at[73]); MULADD(at[51], at[72]); MULADD(at[52], at[71]); MULADD(at[53], at[70]); MULADD(at[54], at[69]); MULADD(at[55], at[68]); MULADD(at[56], at[67]); MULADD(at[57], at[66]); MULADD(at[58], at[65]); MULADD(at[59], at[64]); + COMBA_STORE(C->dp[59]); + /* 60 */ + COMBA_FORWARD; + MULADD(at[0], at[124]); MULADD(at[1], at[123]); MULADD(at[2], at[122]); MULADD(at[3], at[121]); MULADD(at[4], at[120]); MULADD(at[5], at[119]); MULADD(at[6], at[118]); MULADD(at[7], at[117]); MULADD(at[8], at[116]); MULADD(at[9], at[115]); MULADD(at[10], at[114]); MULADD(at[11], at[113]); MULADD(at[12], at[112]); MULADD(at[13], at[111]); MULADD(at[14], at[110]); MULADD(at[15], at[109]); MULADD(at[16], at[108]); MULADD(at[17], at[107]); MULADD(at[18], at[106]); MULADD(at[19], at[105]); MULADD(at[20], at[104]); MULADD(at[21], at[103]); MULADD(at[22], at[102]); MULADD(at[23], at[101]); MULADD(at[24], at[100]); MULADD(at[25], at[99]); MULADD(at[26], at[98]); MULADD(at[27], at[97]); MULADD(at[28], at[96]); MULADD(at[29], at[95]); MULADD(at[30], at[94]); MULADD(at[31], at[93]); MULADD(at[32], at[92]); MULADD(at[33], at[91]); MULADD(at[34], at[90]); MULADD(at[35], at[89]); MULADD(at[36], at[88]); MULADD(at[37], at[87]); MULADD(at[38], at[86]); MULADD(at[39], at[85]); MULADD(at[40], at[84]); MULADD(at[41], at[83]); MULADD(at[42], at[82]); MULADD(at[43], at[81]); MULADD(at[44], at[80]); MULADD(at[45], at[79]); MULADD(at[46], at[78]); MULADD(at[47], at[77]); MULADD(at[48], at[76]); MULADD(at[49], at[75]); MULADD(at[50], at[74]); MULADD(at[51], at[73]); MULADD(at[52], at[72]); MULADD(at[53], at[71]); MULADD(at[54], at[70]); MULADD(at[55], at[69]); MULADD(at[56], at[68]); MULADD(at[57], at[67]); MULADD(at[58], at[66]); MULADD(at[59], at[65]); MULADD(at[60], at[64]); + COMBA_STORE(C->dp[60]); + /* 61 */ + COMBA_FORWARD; + MULADD(at[0], at[125]); MULADD(at[1], at[124]); MULADD(at[2], at[123]); MULADD(at[3], at[122]); MULADD(at[4], at[121]); MULADD(at[5], at[120]); MULADD(at[6], at[119]); MULADD(at[7], at[118]); MULADD(at[8], at[117]); MULADD(at[9], at[116]); MULADD(at[10], at[115]); MULADD(at[11], at[114]); MULADD(at[12], at[113]); MULADD(at[13], at[112]); MULADD(at[14], at[111]); MULADD(at[15], at[110]); MULADD(at[16], at[109]); MULADD(at[17], at[108]); MULADD(at[18], at[107]); MULADD(at[19], at[106]); MULADD(at[20], at[105]); MULADD(at[21], at[104]); MULADD(at[22], at[103]); MULADD(at[23], at[102]); MULADD(at[24], at[101]); MULADD(at[25], at[100]); MULADD(at[26], at[99]); MULADD(at[27], at[98]); MULADD(at[28], at[97]); MULADD(at[29], at[96]); MULADD(at[30], at[95]); MULADD(at[31], at[94]); MULADD(at[32], at[93]); MULADD(at[33], at[92]); MULADD(at[34], at[91]); MULADD(at[35], at[90]); MULADD(at[36], at[89]); MULADD(at[37], at[88]); MULADD(at[38], at[87]); MULADD(at[39], at[86]); MULADD(at[40], at[85]); MULADD(at[41], at[84]); MULADD(at[42], at[83]); MULADD(at[43], at[82]); MULADD(at[44], at[81]); MULADD(at[45], at[80]); MULADD(at[46], at[79]); MULADD(at[47], at[78]); MULADD(at[48], at[77]); MULADD(at[49], at[76]); MULADD(at[50], at[75]); MULADD(at[51], at[74]); MULADD(at[52], at[73]); MULADD(at[53], at[72]); MULADD(at[54], at[71]); MULADD(at[55], at[70]); MULADD(at[56], at[69]); MULADD(at[57], at[68]); MULADD(at[58], at[67]); MULADD(at[59], at[66]); MULADD(at[60], at[65]); MULADD(at[61], at[64]); + COMBA_STORE(C->dp[61]); + /* 62 */ + COMBA_FORWARD; + MULADD(at[0], at[126]); MULADD(at[1], at[125]); MULADD(at[2], at[124]); MULADD(at[3], at[123]); MULADD(at[4], at[122]); MULADD(at[5], at[121]); MULADD(at[6], at[120]); MULADD(at[7], at[119]); MULADD(at[8], at[118]); MULADD(at[9], at[117]); MULADD(at[10], at[116]); MULADD(at[11], at[115]); MULADD(at[12], at[114]); MULADD(at[13], at[113]); MULADD(at[14], at[112]); MULADD(at[15], at[111]); MULADD(at[16], at[110]); MULADD(at[17], at[109]); MULADD(at[18], at[108]); MULADD(at[19], at[107]); MULADD(at[20], at[106]); MULADD(at[21], at[105]); MULADD(at[22], at[104]); MULADD(at[23], at[103]); MULADD(at[24], at[102]); MULADD(at[25], at[101]); MULADD(at[26], at[100]); MULADD(at[27], at[99]); MULADD(at[28], at[98]); MULADD(at[29], at[97]); MULADD(at[30], at[96]); MULADD(at[31], at[95]); MULADD(at[32], at[94]); MULADD(at[33], at[93]); MULADD(at[34], at[92]); MULADD(at[35], at[91]); MULADD(at[36], at[90]); MULADD(at[37], at[89]); MULADD(at[38], at[88]); MULADD(at[39], at[87]); MULADD(at[40], at[86]); MULADD(at[41], at[85]); MULADD(at[42], at[84]); MULADD(at[43], at[83]); MULADD(at[44], at[82]); MULADD(at[45], at[81]); MULADD(at[46], at[80]); MULADD(at[47], at[79]); MULADD(at[48], at[78]); MULADD(at[49], at[77]); MULADD(at[50], at[76]); MULADD(at[51], at[75]); MULADD(at[52], at[74]); MULADD(at[53], at[73]); MULADD(at[54], at[72]); MULADD(at[55], at[71]); MULADD(at[56], at[70]); MULADD(at[57], at[69]); MULADD(at[58], at[68]); MULADD(at[59], at[67]); MULADD(at[60], at[66]); MULADD(at[61], at[65]); MULADD(at[62], at[64]); + COMBA_STORE(C->dp[62]); + /* 63 */ + COMBA_FORWARD; + MULADD(at[0], at[127]); MULADD(at[1], at[126]); MULADD(at[2], at[125]); MULADD(at[3], at[124]); MULADD(at[4], at[123]); MULADD(at[5], at[122]); MULADD(at[6], at[121]); MULADD(at[7], at[120]); MULADD(at[8], at[119]); MULADD(at[9], at[118]); MULADD(at[10], at[117]); MULADD(at[11], at[116]); MULADD(at[12], at[115]); MULADD(at[13], at[114]); MULADD(at[14], at[113]); MULADD(at[15], at[112]); MULADD(at[16], at[111]); MULADD(at[17], at[110]); MULADD(at[18], at[109]); MULADD(at[19], at[108]); MULADD(at[20], at[107]); MULADD(at[21], at[106]); MULADD(at[22], at[105]); MULADD(at[23], at[104]); MULADD(at[24], at[103]); MULADD(at[25], at[102]); MULADD(at[26], at[101]); MULADD(at[27], at[100]); MULADD(at[28], at[99]); MULADD(at[29], at[98]); MULADD(at[30], at[97]); MULADD(at[31], at[96]); MULADD(at[32], at[95]); MULADD(at[33], at[94]); MULADD(at[34], at[93]); MULADD(at[35], at[92]); MULADD(at[36], at[91]); MULADD(at[37], at[90]); MULADD(at[38], at[89]); MULADD(at[39], at[88]); MULADD(at[40], at[87]); MULADD(at[41], at[86]); MULADD(at[42], at[85]); MULADD(at[43], at[84]); MULADD(at[44], at[83]); MULADD(at[45], at[82]); MULADD(at[46], at[81]); MULADD(at[47], at[80]); MULADD(at[48], at[79]); MULADD(at[49], at[78]); MULADD(at[50], at[77]); MULADD(at[51], at[76]); MULADD(at[52], at[75]); MULADD(at[53], at[74]); MULADD(at[54], at[73]); MULADD(at[55], at[72]); MULADD(at[56], at[71]); MULADD(at[57], at[70]); MULADD(at[58], at[69]); MULADD(at[59], at[68]); MULADD(at[60], at[67]); MULADD(at[61], at[66]); MULADD(at[62], at[65]); MULADD(at[63], at[64]); + COMBA_STORE(C->dp[63]); + /* 64 */ + COMBA_FORWARD; + MULADD(at[1], at[127]); MULADD(at[2], at[126]); MULADD(at[3], at[125]); MULADD(at[4], at[124]); MULADD(at[5], at[123]); MULADD(at[6], at[122]); MULADD(at[7], at[121]); MULADD(at[8], at[120]); MULADD(at[9], at[119]); MULADD(at[10], at[118]); MULADD(at[11], at[117]); MULADD(at[12], at[116]); MULADD(at[13], at[115]); MULADD(at[14], at[114]); MULADD(at[15], at[113]); MULADD(at[16], at[112]); MULADD(at[17], at[111]); MULADD(at[18], at[110]); MULADD(at[19], at[109]); MULADD(at[20], at[108]); MULADD(at[21], at[107]); MULADD(at[22], at[106]); MULADD(at[23], at[105]); MULADD(at[24], at[104]); MULADD(at[25], at[103]); MULADD(at[26], at[102]); MULADD(at[27], at[101]); MULADD(at[28], at[100]); MULADD(at[29], at[99]); MULADD(at[30], at[98]); MULADD(at[31], at[97]); MULADD(at[32], at[96]); MULADD(at[33], at[95]); MULADD(at[34], at[94]); MULADD(at[35], at[93]); MULADD(at[36], at[92]); MULADD(at[37], at[91]); MULADD(at[38], at[90]); MULADD(at[39], at[89]); MULADD(at[40], at[88]); MULADD(at[41], at[87]); MULADD(at[42], at[86]); MULADD(at[43], at[85]); MULADD(at[44], at[84]); MULADD(at[45], at[83]); MULADD(at[46], at[82]); MULADD(at[47], at[81]); MULADD(at[48], at[80]); MULADD(at[49], at[79]); MULADD(at[50], at[78]); MULADD(at[51], at[77]); MULADD(at[52], at[76]); MULADD(at[53], at[75]); MULADD(at[54], at[74]); MULADD(at[55], at[73]); MULADD(at[56], at[72]); MULADD(at[57], at[71]); MULADD(at[58], at[70]); MULADD(at[59], at[69]); MULADD(at[60], at[68]); MULADD(at[61], at[67]); MULADD(at[62], at[66]); MULADD(at[63], at[65]); + COMBA_STORE(C->dp[64]); + /* 65 */ + COMBA_FORWARD; + MULADD(at[2], at[127]); MULADD(at[3], at[126]); MULADD(at[4], at[125]); MULADD(at[5], at[124]); MULADD(at[6], at[123]); MULADD(at[7], at[122]); MULADD(at[8], at[121]); MULADD(at[9], at[120]); MULADD(at[10], at[119]); MULADD(at[11], at[118]); MULADD(at[12], at[117]); MULADD(at[13], at[116]); MULADD(at[14], at[115]); MULADD(at[15], at[114]); MULADD(at[16], at[113]); MULADD(at[17], at[112]); MULADD(at[18], at[111]); MULADD(at[19], at[110]); MULADD(at[20], at[109]); MULADD(at[21], at[108]); MULADD(at[22], at[107]); MULADD(at[23], at[106]); MULADD(at[24], at[105]); MULADD(at[25], at[104]); MULADD(at[26], at[103]); MULADD(at[27], at[102]); MULADD(at[28], at[101]); MULADD(at[29], at[100]); MULADD(at[30], at[99]); MULADD(at[31], at[98]); MULADD(at[32], at[97]); MULADD(at[33], at[96]); MULADD(at[34], at[95]); MULADD(at[35], at[94]); MULADD(at[36], at[93]); MULADD(at[37], at[92]); MULADD(at[38], at[91]); MULADD(at[39], at[90]); MULADD(at[40], at[89]); MULADD(at[41], at[88]); MULADD(at[42], at[87]); MULADD(at[43], at[86]); MULADD(at[44], at[85]); MULADD(at[45], at[84]); MULADD(at[46], at[83]); MULADD(at[47], at[82]); MULADD(at[48], at[81]); MULADD(at[49], at[80]); MULADD(at[50], at[79]); MULADD(at[51], at[78]); MULADD(at[52], at[77]); MULADD(at[53], at[76]); MULADD(at[54], at[75]); MULADD(at[55], at[74]); MULADD(at[56], at[73]); MULADD(at[57], at[72]); MULADD(at[58], at[71]); MULADD(at[59], at[70]); MULADD(at[60], at[69]); MULADD(at[61], at[68]); MULADD(at[62], at[67]); MULADD(at[63], at[66]); + COMBA_STORE(C->dp[65]); + /* 66 */ + COMBA_FORWARD; + MULADD(at[3], at[127]); MULADD(at[4], at[126]); MULADD(at[5], at[125]); MULADD(at[6], at[124]); MULADD(at[7], at[123]); MULADD(at[8], at[122]); MULADD(at[9], at[121]); MULADD(at[10], at[120]); MULADD(at[11], at[119]); MULADD(at[12], at[118]); MULADD(at[13], at[117]); MULADD(at[14], at[116]); MULADD(at[15], at[115]); MULADD(at[16], at[114]); MULADD(at[17], at[113]); MULADD(at[18], at[112]); MULADD(at[19], at[111]); MULADD(at[20], at[110]); MULADD(at[21], at[109]); MULADD(at[22], at[108]); MULADD(at[23], at[107]); MULADD(at[24], at[106]); MULADD(at[25], at[105]); MULADD(at[26], at[104]); MULADD(at[27], at[103]); MULADD(at[28], at[102]); MULADD(at[29], at[101]); MULADD(at[30], at[100]); MULADD(at[31], at[99]); MULADD(at[32], at[98]); MULADD(at[33], at[97]); MULADD(at[34], at[96]); MULADD(at[35], at[95]); MULADD(at[36], at[94]); MULADD(at[37], at[93]); MULADD(at[38], at[92]); MULADD(at[39], at[91]); MULADD(at[40], at[90]); MULADD(at[41], at[89]); MULADD(at[42], at[88]); MULADD(at[43], at[87]); MULADD(at[44], at[86]); MULADD(at[45], at[85]); MULADD(at[46], at[84]); MULADD(at[47], at[83]); MULADD(at[48], at[82]); MULADD(at[49], at[81]); MULADD(at[50], at[80]); MULADD(at[51], at[79]); MULADD(at[52], at[78]); MULADD(at[53], at[77]); MULADD(at[54], at[76]); MULADD(at[55], at[75]); MULADD(at[56], at[74]); MULADD(at[57], at[73]); MULADD(at[58], at[72]); MULADD(at[59], at[71]); MULADD(at[60], at[70]); MULADD(at[61], at[69]); MULADD(at[62], at[68]); MULADD(at[63], at[67]); + COMBA_STORE(C->dp[66]); + /* 67 */ + COMBA_FORWARD; + MULADD(at[4], at[127]); MULADD(at[5], at[126]); MULADD(at[6], at[125]); MULADD(at[7], at[124]); MULADD(at[8], at[123]); MULADD(at[9], at[122]); MULADD(at[10], at[121]); MULADD(at[11], at[120]); MULADD(at[12], at[119]); MULADD(at[13], at[118]); MULADD(at[14], at[117]); MULADD(at[15], at[116]); MULADD(at[16], at[115]); MULADD(at[17], at[114]); MULADD(at[18], at[113]); MULADD(at[19], at[112]); MULADD(at[20], at[111]); MULADD(at[21], at[110]); MULADD(at[22], at[109]); MULADD(at[23], at[108]); MULADD(at[24], at[107]); MULADD(at[25], at[106]); MULADD(at[26], at[105]); MULADD(at[27], at[104]); MULADD(at[28], at[103]); MULADD(at[29], at[102]); MULADD(at[30], at[101]); MULADD(at[31], at[100]); MULADD(at[32], at[99]); MULADD(at[33], at[98]); MULADD(at[34], at[97]); MULADD(at[35], at[96]); MULADD(at[36], at[95]); MULADD(at[37], at[94]); MULADD(at[38], at[93]); MULADD(at[39], at[92]); MULADD(at[40], at[91]); MULADD(at[41], at[90]); MULADD(at[42], at[89]); MULADD(at[43], at[88]); MULADD(at[44], at[87]); MULADD(at[45], at[86]); MULADD(at[46], at[85]); MULADD(at[47], at[84]); MULADD(at[48], at[83]); MULADD(at[49], at[82]); MULADD(at[50], at[81]); MULADD(at[51], at[80]); MULADD(at[52], at[79]); MULADD(at[53], at[78]); MULADD(at[54], at[77]); MULADD(at[55], at[76]); MULADD(at[56], at[75]); MULADD(at[57], at[74]); MULADD(at[58], at[73]); MULADD(at[59], at[72]); MULADD(at[60], at[71]); MULADD(at[61], at[70]); MULADD(at[62], at[69]); MULADD(at[63], at[68]); + COMBA_STORE(C->dp[67]); + /* 68 */ + COMBA_FORWARD; + MULADD(at[5], at[127]); MULADD(at[6], at[126]); MULADD(at[7], at[125]); MULADD(at[8], at[124]); MULADD(at[9], at[123]); MULADD(at[10], at[122]); MULADD(at[11], at[121]); MULADD(at[12], at[120]); MULADD(at[13], at[119]); MULADD(at[14], at[118]); MULADD(at[15], at[117]); MULADD(at[16], at[116]); MULADD(at[17], at[115]); MULADD(at[18], at[114]); MULADD(at[19], at[113]); MULADD(at[20], at[112]); MULADD(at[21], at[111]); MULADD(at[22], at[110]); MULADD(at[23], at[109]); MULADD(at[24], at[108]); MULADD(at[25], at[107]); MULADD(at[26], at[106]); MULADD(at[27], at[105]); MULADD(at[28], at[104]); MULADD(at[29], at[103]); MULADD(at[30], at[102]); MULADD(at[31], at[101]); MULADD(at[32], at[100]); MULADD(at[33], at[99]); MULADD(at[34], at[98]); MULADD(at[35], at[97]); MULADD(at[36], at[96]); MULADD(at[37], at[95]); MULADD(at[38], at[94]); MULADD(at[39], at[93]); MULADD(at[40], at[92]); MULADD(at[41], at[91]); MULADD(at[42], at[90]); MULADD(at[43], at[89]); MULADD(at[44], at[88]); MULADD(at[45], at[87]); MULADD(at[46], at[86]); MULADD(at[47], at[85]); MULADD(at[48], at[84]); MULADD(at[49], at[83]); MULADD(at[50], at[82]); MULADD(at[51], at[81]); MULADD(at[52], at[80]); MULADD(at[53], at[79]); MULADD(at[54], at[78]); MULADD(at[55], at[77]); MULADD(at[56], at[76]); MULADD(at[57], at[75]); MULADD(at[58], at[74]); MULADD(at[59], at[73]); MULADD(at[60], at[72]); MULADD(at[61], at[71]); MULADD(at[62], at[70]); MULADD(at[63], at[69]); + COMBA_STORE(C->dp[68]); + /* 69 */ + COMBA_FORWARD; + MULADD(at[6], at[127]); MULADD(at[7], at[126]); MULADD(at[8], at[125]); MULADD(at[9], at[124]); MULADD(at[10], at[123]); MULADD(at[11], at[122]); MULADD(at[12], at[121]); MULADD(at[13], at[120]); MULADD(at[14], at[119]); MULADD(at[15], at[118]); MULADD(at[16], at[117]); MULADD(at[17], at[116]); MULADD(at[18], at[115]); MULADD(at[19], at[114]); MULADD(at[20], at[113]); MULADD(at[21], at[112]); MULADD(at[22], at[111]); MULADD(at[23], at[110]); MULADD(at[24], at[109]); MULADD(at[25], at[108]); MULADD(at[26], at[107]); MULADD(at[27], at[106]); MULADD(at[28], at[105]); MULADD(at[29], at[104]); MULADD(at[30], at[103]); MULADD(at[31], at[102]); MULADD(at[32], at[101]); MULADD(at[33], at[100]); MULADD(at[34], at[99]); MULADD(at[35], at[98]); MULADD(at[36], at[97]); MULADD(at[37], at[96]); MULADD(at[38], at[95]); MULADD(at[39], at[94]); MULADD(at[40], at[93]); MULADD(at[41], at[92]); MULADD(at[42], at[91]); MULADD(at[43], at[90]); MULADD(at[44], at[89]); MULADD(at[45], at[88]); MULADD(at[46], at[87]); MULADD(at[47], at[86]); MULADD(at[48], at[85]); MULADD(at[49], at[84]); MULADD(at[50], at[83]); MULADD(at[51], at[82]); MULADD(at[52], at[81]); MULADD(at[53], at[80]); MULADD(at[54], at[79]); MULADD(at[55], at[78]); MULADD(at[56], at[77]); MULADD(at[57], at[76]); MULADD(at[58], at[75]); MULADD(at[59], at[74]); MULADD(at[60], at[73]); MULADD(at[61], at[72]); MULADD(at[62], at[71]); MULADD(at[63], at[70]); + COMBA_STORE(C->dp[69]); + /* 70 */ + COMBA_FORWARD; + MULADD(at[7], at[127]); MULADD(at[8], at[126]); MULADD(at[9], at[125]); MULADD(at[10], at[124]); MULADD(at[11], at[123]); MULADD(at[12], at[122]); MULADD(at[13], at[121]); MULADD(at[14], at[120]); MULADD(at[15], at[119]); MULADD(at[16], at[118]); MULADD(at[17], at[117]); MULADD(at[18], at[116]); MULADD(at[19], at[115]); MULADD(at[20], at[114]); MULADD(at[21], at[113]); MULADD(at[22], at[112]); MULADD(at[23], at[111]); MULADD(at[24], at[110]); MULADD(at[25], at[109]); MULADD(at[26], at[108]); MULADD(at[27], at[107]); MULADD(at[28], at[106]); MULADD(at[29], at[105]); MULADD(at[30], at[104]); MULADD(at[31], at[103]); MULADD(at[32], at[102]); MULADD(at[33], at[101]); MULADD(at[34], at[100]); MULADD(at[35], at[99]); MULADD(at[36], at[98]); MULADD(at[37], at[97]); MULADD(at[38], at[96]); MULADD(at[39], at[95]); MULADD(at[40], at[94]); MULADD(at[41], at[93]); MULADD(at[42], at[92]); MULADD(at[43], at[91]); MULADD(at[44], at[90]); MULADD(at[45], at[89]); MULADD(at[46], at[88]); MULADD(at[47], at[87]); MULADD(at[48], at[86]); MULADD(at[49], at[85]); MULADD(at[50], at[84]); MULADD(at[51], at[83]); MULADD(at[52], at[82]); MULADD(at[53], at[81]); MULADD(at[54], at[80]); MULADD(at[55], at[79]); MULADD(at[56], at[78]); MULADD(at[57], at[77]); MULADD(at[58], at[76]); MULADD(at[59], at[75]); MULADD(at[60], at[74]); MULADD(at[61], at[73]); MULADD(at[62], at[72]); MULADD(at[63], at[71]); + COMBA_STORE(C->dp[70]); + /* 71 */ + COMBA_FORWARD; + MULADD(at[8], at[127]); MULADD(at[9], at[126]); MULADD(at[10], at[125]); MULADD(at[11], at[124]); MULADD(at[12], at[123]); MULADD(at[13], at[122]); MULADD(at[14], at[121]); MULADD(at[15], at[120]); MULADD(at[16], at[119]); MULADD(at[17], at[118]); MULADD(at[18], at[117]); MULADD(at[19], at[116]); MULADD(at[20], at[115]); MULADD(at[21], at[114]); MULADD(at[22], at[113]); MULADD(at[23], at[112]); MULADD(at[24], at[111]); MULADD(at[25], at[110]); MULADD(at[26], at[109]); MULADD(at[27], at[108]); MULADD(at[28], at[107]); MULADD(at[29], at[106]); MULADD(at[30], at[105]); MULADD(at[31], at[104]); MULADD(at[32], at[103]); MULADD(at[33], at[102]); MULADD(at[34], at[101]); MULADD(at[35], at[100]); MULADD(at[36], at[99]); MULADD(at[37], at[98]); MULADD(at[38], at[97]); MULADD(at[39], at[96]); MULADD(at[40], at[95]); MULADD(at[41], at[94]); MULADD(at[42], at[93]); MULADD(at[43], at[92]); MULADD(at[44], at[91]); MULADD(at[45], at[90]); MULADD(at[46], at[89]); MULADD(at[47], at[88]); MULADD(at[48], at[87]); MULADD(at[49], at[86]); MULADD(at[50], at[85]); MULADD(at[51], at[84]); MULADD(at[52], at[83]); MULADD(at[53], at[82]); MULADD(at[54], at[81]); MULADD(at[55], at[80]); MULADD(at[56], at[79]); MULADD(at[57], at[78]); MULADD(at[58], at[77]); MULADD(at[59], at[76]); MULADD(at[60], at[75]); MULADD(at[61], at[74]); MULADD(at[62], at[73]); MULADD(at[63], at[72]); + COMBA_STORE(C->dp[71]); + /* 72 */ + COMBA_FORWARD; + MULADD(at[9], at[127]); MULADD(at[10], at[126]); MULADD(at[11], at[125]); MULADD(at[12], at[124]); MULADD(at[13], at[123]); MULADD(at[14], at[122]); MULADD(at[15], at[121]); MULADD(at[16], at[120]); MULADD(at[17], at[119]); MULADD(at[18], at[118]); MULADD(at[19], at[117]); MULADD(at[20], at[116]); MULADD(at[21], at[115]); MULADD(at[22], at[114]); MULADD(at[23], at[113]); MULADD(at[24], at[112]); MULADD(at[25], at[111]); MULADD(at[26], at[110]); MULADD(at[27], at[109]); MULADD(at[28], at[108]); MULADD(at[29], at[107]); MULADD(at[30], at[106]); MULADD(at[31], at[105]); MULADD(at[32], at[104]); MULADD(at[33], at[103]); MULADD(at[34], at[102]); MULADD(at[35], at[101]); MULADD(at[36], at[100]); MULADD(at[37], at[99]); MULADD(at[38], at[98]); MULADD(at[39], at[97]); MULADD(at[40], at[96]); MULADD(at[41], at[95]); MULADD(at[42], at[94]); MULADD(at[43], at[93]); MULADD(at[44], at[92]); MULADD(at[45], at[91]); MULADD(at[46], at[90]); MULADD(at[47], at[89]); MULADD(at[48], at[88]); MULADD(at[49], at[87]); MULADD(at[50], at[86]); MULADD(at[51], at[85]); MULADD(at[52], at[84]); MULADD(at[53], at[83]); MULADD(at[54], at[82]); MULADD(at[55], at[81]); MULADD(at[56], at[80]); MULADD(at[57], at[79]); MULADD(at[58], at[78]); MULADD(at[59], at[77]); MULADD(at[60], at[76]); MULADD(at[61], at[75]); MULADD(at[62], at[74]); MULADD(at[63], at[73]); + COMBA_STORE(C->dp[72]); + /* 73 */ + COMBA_FORWARD; + MULADD(at[10], at[127]); MULADD(at[11], at[126]); MULADD(at[12], at[125]); MULADD(at[13], at[124]); MULADD(at[14], at[123]); MULADD(at[15], at[122]); MULADD(at[16], at[121]); MULADD(at[17], at[120]); MULADD(at[18], at[119]); MULADD(at[19], at[118]); MULADD(at[20], at[117]); MULADD(at[21], at[116]); MULADD(at[22], at[115]); MULADD(at[23], at[114]); MULADD(at[24], at[113]); MULADD(at[25], at[112]); MULADD(at[26], at[111]); MULADD(at[27], at[110]); MULADD(at[28], at[109]); MULADD(at[29], at[108]); MULADD(at[30], at[107]); MULADD(at[31], at[106]); MULADD(at[32], at[105]); MULADD(at[33], at[104]); MULADD(at[34], at[103]); MULADD(at[35], at[102]); MULADD(at[36], at[101]); MULADD(at[37], at[100]); MULADD(at[38], at[99]); MULADD(at[39], at[98]); MULADD(at[40], at[97]); MULADD(at[41], at[96]); MULADD(at[42], at[95]); MULADD(at[43], at[94]); MULADD(at[44], at[93]); MULADD(at[45], at[92]); MULADD(at[46], at[91]); MULADD(at[47], at[90]); MULADD(at[48], at[89]); MULADD(at[49], at[88]); MULADD(at[50], at[87]); MULADD(at[51], at[86]); MULADD(at[52], at[85]); MULADD(at[53], at[84]); MULADD(at[54], at[83]); MULADD(at[55], at[82]); MULADD(at[56], at[81]); MULADD(at[57], at[80]); MULADD(at[58], at[79]); MULADD(at[59], at[78]); MULADD(at[60], at[77]); MULADD(at[61], at[76]); MULADD(at[62], at[75]); MULADD(at[63], at[74]); + COMBA_STORE(C->dp[73]); + /* 74 */ + COMBA_FORWARD; + MULADD(at[11], at[127]); MULADD(at[12], at[126]); MULADD(at[13], at[125]); MULADD(at[14], at[124]); MULADD(at[15], at[123]); MULADD(at[16], at[122]); MULADD(at[17], at[121]); MULADD(at[18], at[120]); MULADD(at[19], at[119]); MULADD(at[20], at[118]); MULADD(at[21], at[117]); MULADD(at[22], at[116]); MULADD(at[23], at[115]); MULADD(at[24], at[114]); MULADD(at[25], at[113]); MULADD(at[26], at[112]); MULADD(at[27], at[111]); MULADD(at[28], at[110]); MULADD(at[29], at[109]); MULADD(at[30], at[108]); MULADD(at[31], at[107]); MULADD(at[32], at[106]); MULADD(at[33], at[105]); MULADD(at[34], at[104]); MULADD(at[35], at[103]); MULADD(at[36], at[102]); MULADD(at[37], at[101]); MULADD(at[38], at[100]); MULADD(at[39], at[99]); MULADD(at[40], at[98]); MULADD(at[41], at[97]); MULADD(at[42], at[96]); MULADD(at[43], at[95]); MULADD(at[44], at[94]); MULADD(at[45], at[93]); MULADD(at[46], at[92]); MULADD(at[47], at[91]); MULADD(at[48], at[90]); MULADD(at[49], at[89]); MULADD(at[50], at[88]); MULADD(at[51], at[87]); MULADD(at[52], at[86]); MULADD(at[53], at[85]); MULADD(at[54], at[84]); MULADD(at[55], at[83]); MULADD(at[56], at[82]); MULADD(at[57], at[81]); MULADD(at[58], at[80]); MULADD(at[59], at[79]); MULADD(at[60], at[78]); MULADD(at[61], at[77]); MULADD(at[62], at[76]); MULADD(at[63], at[75]); + COMBA_STORE(C->dp[74]); + /* 75 */ + COMBA_FORWARD; + MULADD(at[12], at[127]); MULADD(at[13], at[126]); MULADD(at[14], at[125]); MULADD(at[15], at[124]); MULADD(at[16], at[123]); MULADD(at[17], at[122]); MULADD(at[18], at[121]); MULADD(at[19], at[120]); MULADD(at[20], at[119]); MULADD(at[21], at[118]); MULADD(at[22], at[117]); MULADD(at[23], at[116]); MULADD(at[24], at[115]); MULADD(at[25], at[114]); MULADD(at[26], at[113]); MULADD(at[27], at[112]); MULADD(at[28], at[111]); MULADD(at[29], at[110]); MULADD(at[30], at[109]); MULADD(at[31], at[108]); MULADD(at[32], at[107]); MULADD(at[33], at[106]); MULADD(at[34], at[105]); MULADD(at[35], at[104]); MULADD(at[36], at[103]); MULADD(at[37], at[102]); MULADD(at[38], at[101]); MULADD(at[39], at[100]); MULADD(at[40], at[99]); MULADD(at[41], at[98]); MULADD(at[42], at[97]); MULADD(at[43], at[96]); MULADD(at[44], at[95]); MULADD(at[45], at[94]); MULADD(at[46], at[93]); MULADD(at[47], at[92]); MULADD(at[48], at[91]); MULADD(at[49], at[90]); MULADD(at[50], at[89]); MULADD(at[51], at[88]); MULADD(at[52], at[87]); MULADD(at[53], at[86]); MULADD(at[54], at[85]); MULADD(at[55], at[84]); MULADD(at[56], at[83]); MULADD(at[57], at[82]); MULADD(at[58], at[81]); MULADD(at[59], at[80]); MULADD(at[60], at[79]); MULADD(at[61], at[78]); MULADD(at[62], at[77]); MULADD(at[63], at[76]); + COMBA_STORE(C->dp[75]); + /* 76 */ + COMBA_FORWARD; + MULADD(at[13], at[127]); MULADD(at[14], at[126]); MULADD(at[15], at[125]); MULADD(at[16], at[124]); MULADD(at[17], at[123]); MULADD(at[18], at[122]); MULADD(at[19], at[121]); MULADD(at[20], at[120]); MULADD(at[21], at[119]); MULADD(at[22], at[118]); MULADD(at[23], at[117]); MULADD(at[24], at[116]); MULADD(at[25], at[115]); MULADD(at[26], at[114]); MULADD(at[27], at[113]); MULADD(at[28], at[112]); MULADD(at[29], at[111]); MULADD(at[30], at[110]); MULADD(at[31], at[109]); MULADD(at[32], at[108]); MULADD(at[33], at[107]); MULADD(at[34], at[106]); MULADD(at[35], at[105]); MULADD(at[36], at[104]); MULADD(at[37], at[103]); MULADD(at[38], at[102]); MULADD(at[39], at[101]); MULADD(at[40], at[100]); MULADD(at[41], at[99]); MULADD(at[42], at[98]); MULADD(at[43], at[97]); MULADD(at[44], at[96]); MULADD(at[45], at[95]); MULADD(at[46], at[94]); MULADD(at[47], at[93]); MULADD(at[48], at[92]); MULADD(at[49], at[91]); MULADD(at[50], at[90]); MULADD(at[51], at[89]); MULADD(at[52], at[88]); MULADD(at[53], at[87]); MULADD(at[54], at[86]); MULADD(at[55], at[85]); MULADD(at[56], at[84]); MULADD(at[57], at[83]); MULADD(at[58], at[82]); MULADD(at[59], at[81]); MULADD(at[60], at[80]); MULADD(at[61], at[79]); MULADD(at[62], at[78]); MULADD(at[63], at[77]); + COMBA_STORE(C->dp[76]); + /* 77 */ + COMBA_FORWARD; + MULADD(at[14], at[127]); MULADD(at[15], at[126]); MULADD(at[16], at[125]); MULADD(at[17], at[124]); MULADD(at[18], at[123]); MULADD(at[19], at[122]); MULADD(at[20], at[121]); MULADD(at[21], at[120]); MULADD(at[22], at[119]); MULADD(at[23], at[118]); MULADD(at[24], at[117]); MULADD(at[25], at[116]); MULADD(at[26], at[115]); MULADD(at[27], at[114]); MULADD(at[28], at[113]); MULADD(at[29], at[112]); MULADD(at[30], at[111]); MULADD(at[31], at[110]); MULADD(at[32], at[109]); MULADD(at[33], at[108]); MULADD(at[34], at[107]); MULADD(at[35], at[106]); MULADD(at[36], at[105]); MULADD(at[37], at[104]); MULADD(at[38], at[103]); MULADD(at[39], at[102]); MULADD(at[40], at[101]); MULADD(at[41], at[100]); MULADD(at[42], at[99]); MULADD(at[43], at[98]); MULADD(at[44], at[97]); MULADD(at[45], at[96]); MULADD(at[46], at[95]); MULADD(at[47], at[94]); MULADD(at[48], at[93]); MULADD(at[49], at[92]); MULADD(at[50], at[91]); MULADD(at[51], at[90]); MULADD(at[52], at[89]); MULADD(at[53], at[88]); MULADD(at[54], at[87]); MULADD(at[55], at[86]); MULADD(at[56], at[85]); MULADD(at[57], at[84]); MULADD(at[58], at[83]); MULADD(at[59], at[82]); MULADD(at[60], at[81]); MULADD(at[61], at[80]); MULADD(at[62], at[79]); MULADD(at[63], at[78]); + COMBA_STORE(C->dp[77]); + /* 78 */ + COMBA_FORWARD; + MULADD(at[15], at[127]); MULADD(at[16], at[126]); MULADD(at[17], at[125]); MULADD(at[18], at[124]); MULADD(at[19], at[123]); MULADD(at[20], at[122]); MULADD(at[21], at[121]); MULADD(at[22], at[120]); MULADD(at[23], at[119]); MULADD(at[24], at[118]); MULADD(at[25], at[117]); MULADD(at[26], at[116]); MULADD(at[27], at[115]); MULADD(at[28], at[114]); MULADD(at[29], at[113]); MULADD(at[30], at[112]); MULADD(at[31], at[111]); MULADD(at[32], at[110]); MULADD(at[33], at[109]); MULADD(at[34], at[108]); MULADD(at[35], at[107]); MULADD(at[36], at[106]); MULADD(at[37], at[105]); MULADD(at[38], at[104]); MULADD(at[39], at[103]); MULADD(at[40], at[102]); MULADD(at[41], at[101]); MULADD(at[42], at[100]); MULADD(at[43], at[99]); MULADD(at[44], at[98]); MULADD(at[45], at[97]); MULADD(at[46], at[96]); MULADD(at[47], at[95]); MULADD(at[48], at[94]); MULADD(at[49], at[93]); MULADD(at[50], at[92]); MULADD(at[51], at[91]); MULADD(at[52], at[90]); MULADD(at[53], at[89]); MULADD(at[54], at[88]); MULADD(at[55], at[87]); MULADD(at[56], at[86]); MULADD(at[57], at[85]); MULADD(at[58], at[84]); MULADD(at[59], at[83]); MULADD(at[60], at[82]); MULADD(at[61], at[81]); MULADD(at[62], at[80]); MULADD(at[63], at[79]); + COMBA_STORE(C->dp[78]); + /* 79 */ + COMBA_FORWARD; + MULADD(at[16], at[127]); MULADD(at[17], at[126]); MULADD(at[18], at[125]); MULADD(at[19], at[124]); MULADD(at[20], at[123]); MULADD(at[21], at[122]); MULADD(at[22], at[121]); MULADD(at[23], at[120]); MULADD(at[24], at[119]); MULADD(at[25], at[118]); MULADD(at[26], at[117]); MULADD(at[27], at[116]); MULADD(at[28], at[115]); MULADD(at[29], at[114]); MULADD(at[30], at[113]); MULADD(at[31], at[112]); MULADD(at[32], at[111]); MULADD(at[33], at[110]); MULADD(at[34], at[109]); MULADD(at[35], at[108]); MULADD(at[36], at[107]); MULADD(at[37], at[106]); MULADD(at[38], at[105]); MULADD(at[39], at[104]); MULADD(at[40], at[103]); MULADD(at[41], at[102]); MULADD(at[42], at[101]); MULADD(at[43], at[100]); MULADD(at[44], at[99]); MULADD(at[45], at[98]); MULADD(at[46], at[97]); MULADD(at[47], at[96]); MULADD(at[48], at[95]); MULADD(at[49], at[94]); MULADD(at[50], at[93]); MULADD(at[51], at[92]); MULADD(at[52], at[91]); MULADD(at[53], at[90]); MULADD(at[54], at[89]); MULADD(at[55], at[88]); MULADD(at[56], at[87]); MULADD(at[57], at[86]); MULADD(at[58], at[85]); MULADD(at[59], at[84]); MULADD(at[60], at[83]); MULADD(at[61], at[82]); MULADD(at[62], at[81]); MULADD(at[63], at[80]); + COMBA_STORE(C->dp[79]); + /* 80 */ + COMBA_FORWARD; + MULADD(at[17], at[127]); MULADD(at[18], at[126]); MULADD(at[19], at[125]); MULADD(at[20], at[124]); MULADD(at[21], at[123]); MULADD(at[22], at[122]); MULADD(at[23], at[121]); MULADD(at[24], at[120]); MULADD(at[25], at[119]); MULADD(at[26], at[118]); MULADD(at[27], at[117]); MULADD(at[28], at[116]); MULADD(at[29], at[115]); MULADD(at[30], at[114]); MULADD(at[31], at[113]); MULADD(at[32], at[112]); MULADD(at[33], at[111]); MULADD(at[34], at[110]); MULADD(at[35], at[109]); MULADD(at[36], at[108]); MULADD(at[37], at[107]); MULADD(at[38], at[106]); MULADD(at[39], at[105]); MULADD(at[40], at[104]); MULADD(at[41], at[103]); MULADD(at[42], at[102]); MULADD(at[43], at[101]); MULADD(at[44], at[100]); MULADD(at[45], at[99]); MULADD(at[46], at[98]); MULADD(at[47], at[97]); MULADD(at[48], at[96]); MULADD(at[49], at[95]); MULADD(at[50], at[94]); MULADD(at[51], at[93]); MULADD(at[52], at[92]); MULADD(at[53], at[91]); MULADD(at[54], at[90]); MULADD(at[55], at[89]); MULADD(at[56], at[88]); MULADD(at[57], at[87]); MULADD(at[58], at[86]); MULADD(at[59], at[85]); MULADD(at[60], at[84]); MULADD(at[61], at[83]); MULADD(at[62], at[82]); MULADD(at[63], at[81]); + COMBA_STORE(C->dp[80]); + /* 81 */ + COMBA_FORWARD; + MULADD(at[18], at[127]); MULADD(at[19], at[126]); MULADD(at[20], at[125]); MULADD(at[21], at[124]); MULADD(at[22], at[123]); MULADD(at[23], at[122]); MULADD(at[24], at[121]); MULADD(at[25], at[120]); MULADD(at[26], at[119]); MULADD(at[27], at[118]); MULADD(at[28], at[117]); MULADD(at[29], at[116]); MULADD(at[30], at[115]); MULADD(at[31], at[114]); MULADD(at[32], at[113]); MULADD(at[33], at[112]); MULADD(at[34], at[111]); MULADD(at[35], at[110]); MULADD(at[36], at[109]); MULADD(at[37], at[108]); MULADD(at[38], at[107]); MULADD(at[39], at[106]); MULADD(at[40], at[105]); MULADD(at[41], at[104]); MULADD(at[42], at[103]); MULADD(at[43], at[102]); MULADD(at[44], at[101]); MULADD(at[45], at[100]); MULADD(at[46], at[99]); MULADD(at[47], at[98]); MULADD(at[48], at[97]); MULADD(at[49], at[96]); MULADD(at[50], at[95]); MULADD(at[51], at[94]); MULADD(at[52], at[93]); MULADD(at[53], at[92]); MULADD(at[54], at[91]); MULADD(at[55], at[90]); MULADD(at[56], at[89]); MULADD(at[57], at[88]); MULADD(at[58], at[87]); MULADD(at[59], at[86]); MULADD(at[60], at[85]); MULADD(at[61], at[84]); MULADD(at[62], at[83]); MULADD(at[63], at[82]); + COMBA_STORE(C->dp[81]); + /* 82 */ + COMBA_FORWARD; + MULADD(at[19], at[127]); MULADD(at[20], at[126]); MULADD(at[21], at[125]); MULADD(at[22], at[124]); MULADD(at[23], at[123]); MULADD(at[24], at[122]); MULADD(at[25], at[121]); MULADD(at[26], at[120]); MULADD(at[27], at[119]); MULADD(at[28], at[118]); MULADD(at[29], at[117]); MULADD(at[30], at[116]); MULADD(at[31], at[115]); MULADD(at[32], at[114]); MULADD(at[33], at[113]); MULADD(at[34], at[112]); MULADD(at[35], at[111]); MULADD(at[36], at[110]); MULADD(at[37], at[109]); MULADD(at[38], at[108]); MULADD(at[39], at[107]); MULADD(at[40], at[106]); MULADD(at[41], at[105]); MULADD(at[42], at[104]); MULADD(at[43], at[103]); MULADD(at[44], at[102]); MULADD(at[45], at[101]); MULADD(at[46], at[100]); MULADD(at[47], at[99]); MULADD(at[48], at[98]); MULADD(at[49], at[97]); MULADD(at[50], at[96]); MULADD(at[51], at[95]); MULADD(at[52], at[94]); MULADD(at[53], at[93]); MULADD(at[54], at[92]); MULADD(at[55], at[91]); MULADD(at[56], at[90]); MULADD(at[57], at[89]); MULADD(at[58], at[88]); MULADD(at[59], at[87]); MULADD(at[60], at[86]); MULADD(at[61], at[85]); MULADD(at[62], at[84]); MULADD(at[63], at[83]); + COMBA_STORE(C->dp[82]); + /* 83 */ + COMBA_FORWARD; + MULADD(at[20], at[127]); MULADD(at[21], at[126]); MULADD(at[22], at[125]); MULADD(at[23], at[124]); MULADD(at[24], at[123]); MULADD(at[25], at[122]); MULADD(at[26], at[121]); MULADD(at[27], at[120]); MULADD(at[28], at[119]); MULADD(at[29], at[118]); MULADD(at[30], at[117]); MULADD(at[31], at[116]); MULADD(at[32], at[115]); MULADD(at[33], at[114]); MULADD(at[34], at[113]); MULADD(at[35], at[112]); MULADD(at[36], at[111]); MULADD(at[37], at[110]); MULADD(at[38], at[109]); MULADD(at[39], at[108]); MULADD(at[40], at[107]); MULADD(at[41], at[106]); MULADD(at[42], at[105]); MULADD(at[43], at[104]); MULADD(at[44], at[103]); MULADD(at[45], at[102]); MULADD(at[46], at[101]); MULADD(at[47], at[100]); MULADD(at[48], at[99]); MULADD(at[49], at[98]); MULADD(at[50], at[97]); MULADD(at[51], at[96]); MULADD(at[52], at[95]); MULADD(at[53], at[94]); MULADD(at[54], at[93]); MULADD(at[55], at[92]); MULADD(at[56], at[91]); MULADD(at[57], at[90]); MULADD(at[58], at[89]); MULADD(at[59], at[88]); MULADD(at[60], at[87]); MULADD(at[61], at[86]); MULADD(at[62], at[85]); MULADD(at[63], at[84]); + COMBA_STORE(C->dp[83]); + /* 84 */ + COMBA_FORWARD; + MULADD(at[21], at[127]); MULADD(at[22], at[126]); MULADD(at[23], at[125]); MULADD(at[24], at[124]); MULADD(at[25], at[123]); MULADD(at[26], at[122]); MULADD(at[27], at[121]); MULADD(at[28], at[120]); MULADD(at[29], at[119]); MULADD(at[30], at[118]); MULADD(at[31], at[117]); MULADD(at[32], at[116]); MULADD(at[33], at[115]); MULADD(at[34], at[114]); MULADD(at[35], at[113]); MULADD(at[36], at[112]); MULADD(at[37], at[111]); MULADD(at[38], at[110]); MULADD(at[39], at[109]); MULADD(at[40], at[108]); MULADD(at[41], at[107]); MULADD(at[42], at[106]); MULADD(at[43], at[105]); MULADD(at[44], at[104]); MULADD(at[45], at[103]); MULADD(at[46], at[102]); MULADD(at[47], at[101]); MULADD(at[48], at[100]); MULADD(at[49], at[99]); MULADD(at[50], at[98]); MULADD(at[51], at[97]); MULADD(at[52], at[96]); MULADD(at[53], at[95]); MULADD(at[54], at[94]); MULADD(at[55], at[93]); MULADD(at[56], at[92]); MULADD(at[57], at[91]); MULADD(at[58], at[90]); MULADD(at[59], at[89]); MULADD(at[60], at[88]); MULADD(at[61], at[87]); MULADD(at[62], at[86]); MULADD(at[63], at[85]); + COMBA_STORE(C->dp[84]); + /* 85 */ + COMBA_FORWARD; + MULADD(at[22], at[127]); MULADD(at[23], at[126]); MULADD(at[24], at[125]); MULADD(at[25], at[124]); MULADD(at[26], at[123]); MULADD(at[27], at[122]); MULADD(at[28], at[121]); MULADD(at[29], at[120]); MULADD(at[30], at[119]); MULADD(at[31], at[118]); MULADD(at[32], at[117]); MULADD(at[33], at[116]); MULADD(at[34], at[115]); MULADD(at[35], at[114]); MULADD(at[36], at[113]); MULADD(at[37], at[112]); MULADD(at[38], at[111]); MULADD(at[39], at[110]); MULADD(at[40], at[109]); MULADD(at[41], at[108]); MULADD(at[42], at[107]); MULADD(at[43], at[106]); MULADD(at[44], at[105]); MULADD(at[45], at[104]); MULADD(at[46], at[103]); MULADD(at[47], at[102]); MULADD(at[48], at[101]); MULADD(at[49], at[100]); MULADD(at[50], at[99]); MULADD(at[51], at[98]); MULADD(at[52], at[97]); MULADD(at[53], at[96]); MULADD(at[54], at[95]); MULADD(at[55], at[94]); MULADD(at[56], at[93]); MULADD(at[57], at[92]); MULADD(at[58], at[91]); MULADD(at[59], at[90]); MULADD(at[60], at[89]); MULADD(at[61], at[88]); MULADD(at[62], at[87]); MULADD(at[63], at[86]); + COMBA_STORE(C->dp[85]); + /* 86 */ + COMBA_FORWARD; + MULADD(at[23], at[127]); MULADD(at[24], at[126]); MULADD(at[25], at[125]); MULADD(at[26], at[124]); MULADD(at[27], at[123]); MULADD(at[28], at[122]); MULADD(at[29], at[121]); MULADD(at[30], at[120]); MULADD(at[31], at[119]); MULADD(at[32], at[118]); MULADD(at[33], at[117]); MULADD(at[34], at[116]); MULADD(at[35], at[115]); MULADD(at[36], at[114]); MULADD(at[37], at[113]); MULADD(at[38], at[112]); MULADD(at[39], at[111]); MULADD(at[40], at[110]); MULADD(at[41], at[109]); MULADD(at[42], at[108]); MULADD(at[43], at[107]); MULADD(at[44], at[106]); MULADD(at[45], at[105]); MULADD(at[46], at[104]); MULADD(at[47], at[103]); MULADD(at[48], at[102]); MULADD(at[49], at[101]); MULADD(at[50], at[100]); MULADD(at[51], at[99]); MULADD(at[52], at[98]); MULADD(at[53], at[97]); MULADD(at[54], at[96]); MULADD(at[55], at[95]); MULADD(at[56], at[94]); MULADD(at[57], at[93]); MULADD(at[58], at[92]); MULADD(at[59], at[91]); MULADD(at[60], at[90]); MULADD(at[61], at[89]); MULADD(at[62], at[88]); MULADD(at[63], at[87]); + COMBA_STORE(C->dp[86]); + /* 87 */ + COMBA_FORWARD; + MULADD(at[24], at[127]); MULADD(at[25], at[126]); MULADD(at[26], at[125]); MULADD(at[27], at[124]); MULADD(at[28], at[123]); MULADD(at[29], at[122]); MULADD(at[30], at[121]); MULADD(at[31], at[120]); MULADD(at[32], at[119]); MULADD(at[33], at[118]); MULADD(at[34], at[117]); MULADD(at[35], at[116]); MULADD(at[36], at[115]); MULADD(at[37], at[114]); MULADD(at[38], at[113]); MULADD(at[39], at[112]); MULADD(at[40], at[111]); MULADD(at[41], at[110]); MULADD(at[42], at[109]); MULADD(at[43], at[108]); MULADD(at[44], at[107]); MULADD(at[45], at[106]); MULADD(at[46], at[105]); MULADD(at[47], at[104]); MULADD(at[48], at[103]); MULADD(at[49], at[102]); MULADD(at[50], at[101]); MULADD(at[51], at[100]); MULADD(at[52], at[99]); MULADD(at[53], at[98]); MULADD(at[54], at[97]); MULADD(at[55], at[96]); MULADD(at[56], at[95]); MULADD(at[57], at[94]); MULADD(at[58], at[93]); MULADD(at[59], at[92]); MULADD(at[60], at[91]); MULADD(at[61], at[90]); MULADD(at[62], at[89]); MULADD(at[63], at[88]); + COMBA_STORE(C->dp[87]); + /* 88 */ + COMBA_FORWARD; + MULADD(at[25], at[127]); MULADD(at[26], at[126]); MULADD(at[27], at[125]); MULADD(at[28], at[124]); MULADD(at[29], at[123]); MULADD(at[30], at[122]); MULADD(at[31], at[121]); MULADD(at[32], at[120]); MULADD(at[33], at[119]); MULADD(at[34], at[118]); MULADD(at[35], at[117]); MULADD(at[36], at[116]); MULADD(at[37], at[115]); MULADD(at[38], at[114]); MULADD(at[39], at[113]); MULADD(at[40], at[112]); MULADD(at[41], at[111]); MULADD(at[42], at[110]); MULADD(at[43], at[109]); MULADD(at[44], at[108]); MULADD(at[45], at[107]); MULADD(at[46], at[106]); MULADD(at[47], at[105]); MULADD(at[48], at[104]); MULADD(at[49], at[103]); MULADD(at[50], at[102]); MULADD(at[51], at[101]); MULADD(at[52], at[100]); MULADD(at[53], at[99]); MULADD(at[54], at[98]); MULADD(at[55], at[97]); MULADD(at[56], at[96]); MULADD(at[57], at[95]); MULADD(at[58], at[94]); MULADD(at[59], at[93]); MULADD(at[60], at[92]); MULADD(at[61], at[91]); MULADD(at[62], at[90]); MULADD(at[63], at[89]); + COMBA_STORE(C->dp[88]); + /* 89 */ + COMBA_FORWARD; + MULADD(at[26], at[127]); MULADD(at[27], at[126]); MULADD(at[28], at[125]); MULADD(at[29], at[124]); MULADD(at[30], at[123]); MULADD(at[31], at[122]); MULADD(at[32], at[121]); MULADD(at[33], at[120]); MULADD(at[34], at[119]); MULADD(at[35], at[118]); MULADD(at[36], at[117]); MULADD(at[37], at[116]); MULADD(at[38], at[115]); MULADD(at[39], at[114]); MULADD(at[40], at[113]); MULADD(at[41], at[112]); MULADD(at[42], at[111]); MULADD(at[43], at[110]); MULADD(at[44], at[109]); MULADD(at[45], at[108]); MULADD(at[46], at[107]); MULADD(at[47], at[106]); MULADD(at[48], at[105]); MULADD(at[49], at[104]); MULADD(at[50], at[103]); MULADD(at[51], at[102]); MULADD(at[52], at[101]); MULADD(at[53], at[100]); MULADD(at[54], at[99]); MULADD(at[55], at[98]); MULADD(at[56], at[97]); MULADD(at[57], at[96]); MULADD(at[58], at[95]); MULADD(at[59], at[94]); MULADD(at[60], at[93]); MULADD(at[61], at[92]); MULADD(at[62], at[91]); MULADD(at[63], at[90]); + COMBA_STORE(C->dp[89]); + /* 90 */ + COMBA_FORWARD; + MULADD(at[27], at[127]); MULADD(at[28], at[126]); MULADD(at[29], at[125]); MULADD(at[30], at[124]); MULADD(at[31], at[123]); MULADD(at[32], at[122]); MULADD(at[33], at[121]); MULADD(at[34], at[120]); MULADD(at[35], at[119]); MULADD(at[36], at[118]); MULADD(at[37], at[117]); MULADD(at[38], at[116]); MULADD(at[39], at[115]); MULADD(at[40], at[114]); MULADD(at[41], at[113]); MULADD(at[42], at[112]); MULADD(at[43], at[111]); MULADD(at[44], at[110]); MULADD(at[45], at[109]); MULADD(at[46], at[108]); MULADD(at[47], at[107]); MULADD(at[48], at[106]); MULADD(at[49], at[105]); MULADD(at[50], at[104]); MULADD(at[51], at[103]); MULADD(at[52], at[102]); MULADD(at[53], at[101]); MULADD(at[54], at[100]); MULADD(at[55], at[99]); MULADD(at[56], at[98]); MULADD(at[57], at[97]); MULADD(at[58], at[96]); MULADD(at[59], at[95]); MULADD(at[60], at[94]); MULADD(at[61], at[93]); MULADD(at[62], at[92]); MULADD(at[63], at[91]); + COMBA_STORE(C->dp[90]); + /* 91 */ + COMBA_FORWARD; + MULADD(at[28], at[127]); MULADD(at[29], at[126]); MULADD(at[30], at[125]); MULADD(at[31], at[124]); MULADD(at[32], at[123]); MULADD(at[33], at[122]); MULADD(at[34], at[121]); MULADD(at[35], at[120]); MULADD(at[36], at[119]); MULADD(at[37], at[118]); MULADD(at[38], at[117]); MULADD(at[39], at[116]); MULADD(at[40], at[115]); MULADD(at[41], at[114]); MULADD(at[42], at[113]); MULADD(at[43], at[112]); MULADD(at[44], at[111]); MULADD(at[45], at[110]); MULADD(at[46], at[109]); MULADD(at[47], at[108]); MULADD(at[48], at[107]); MULADD(at[49], at[106]); MULADD(at[50], at[105]); MULADD(at[51], at[104]); MULADD(at[52], at[103]); MULADD(at[53], at[102]); MULADD(at[54], at[101]); MULADD(at[55], at[100]); MULADD(at[56], at[99]); MULADD(at[57], at[98]); MULADD(at[58], at[97]); MULADD(at[59], at[96]); MULADD(at[60], at[95]); MULADD(at[61], at[94]); MULADD(at[62], at[93]); MULADD(at[63], at[92]); + COMBA_STORE(C->dp[91]); + /* 92 */ + COMBA_FORWARD; + MULADD(at[29], at[127]); MULADD(at[30], at[126]); MULADD(at[31], at[125]); MULADD(at[32], at[124]); MULADD(at[33], at[123]); MULADD(at[34], at[122]); MULADD(at[35], at[121]); MULADD(at[36], at[120]); MULADD(at[37], at[119]); MULADD(at[38], at[118]); MULADD(at[39], at[117]); MULADD(at[40], at[116]); MULADD(at[41], at[115]); MULADD(at[42], at[114]); MULADD(at[43], at[113]); MULADD(at[44], at[112]); MULADD(at[45], at[111]); MULADD(at[46], at[110]); MULADD(at[47], at[109]); MULADD(at[48], at[108]); MULADD(at[49], at[107]); MULADD(at[50], at[106]); MULADD(at[51], at[105]); MULADD(at[52], at[104]); MULADD(at[53], at[103]); MULADD(at[54], at[102]); MULADD(at[55], at[101]); MULADD(at[56], at[100]); MULADD(at[57], at[99]); MULADD(at[58], at[98]); MULADD(at[59], at[97]); MULADD(at[60], at[96]); MULADD(at[61], at[95]); MULADD(at[62], at[94]); MULADD(at[63], at[93]); + COMBA_STORE(C->dp[92]); + /* 93 */ + COMBA_FORWARD; + MULADD(at[30], at[127]); MULADD(at[31], at[126]); MULADD(at[32], at[125]); MULADD(at[33], at[124]); MULADD(at[34], at[123]); MULADD(at[35], at[122]); MULADD(at[36], at[121]); MULADD(at[37], at[120]); MULADD(at[38], at[119]); MULADD(at[39], at[118]); MULADD(at[40], at[117]); MULADD(at[41], at[116]); MULADD(at[42], at[115]); MULADD(at[43], at[114]); MULADD(at[44], at[113]); MULADD(at[45], at[112]); MULADD(at[46], at[111]); MULADD(at[47], at[110]); MULADD(at[48], at[109]); MULADD(at[49], at[108]); MULADD(at[50], at[107]); MULADD(at[51], at[106]); MULADD(at[52], at[105]); MULADD(at[53], at[104]); MULADD(at[54], at[103]); MULADD(at[55], at[102]); MULADD(at[56], at[101]); MULADD(at[57], at[100]); MULADD(at[58], at[99]); MULADD(at[59], at[98]); MULADD(at[60], at[97]); MULADD(at[61], at[96]); MULADD(at[62], at[95]); MULADD(at[63], at[94]); + COMBA_STORE(C->dp[93]); + /* 94 */ + COMBA_FORWARD; + MULADD(at[31], at[127]); MULADD(at[32], at[126]); MULADD(at[33], at[125]); MULADD(at[34], at[124]); MULADD(at[35], at[123]); MULADD(at[36], at[122]); MULADD(at[37], at[121]); MULADD(at[38], at[120]); MULADD(at[39], at[119]); MULADD(at[40], at[118]); MULADD(at[41], at[117]); MULADD(at[42], at[116]); MULADD(at[43], at[115]); MULADD(at[44], at[114]); MULADD(at[45], at[113]); MULADD(at[46], at[112]); MULADD(at[47], at[111]); MULADD(at[48], at[110]); MULADD(at[49], at[109]); MULADD(at[50], at[108]); MULADD(at[51], at[107]); MULADD(at[52], at[106]); MULADD(at[53], at[105]); MULADD(at[54], at[104]); MULADD(at[55], at[103]); MULADD(at[56], at[102]); MULADD(at[57], at[101]); MULADD(at[58], at[100]); MULADD(at[59], at[99]); MULADD(at[60], at[98]); MULADD(at[61], at[97]); MULADD(at[62], at[96]); MULADD(at[63], at[95]); + COMBA_STORE(C->dp[94]); + /* 95 */ + COMBA_FORWARD; + MULADD(at[32], at[127]); MULADD(at[33], at[126]); MULADD(at[34], at[125]); MULADD(at[35], at[124]); MULADD(at[36], at[123]); MULADD(at[37], at[122]); MULADD(at[38], at[121]); MULADD(at[39], at[120]); MULADD(at[40], at[119]); MULADD(at[41], at[118]); MULADD(at[42], at[117]); MULADD(at[43], at[116]); MULADD(at[44], at[115]); MULADD(at[45], at[114]); MULADD(at[46], at[113]); MULADD(at[47], at[112]); MULADD(at[48], at[111]); MULADD(at[49], at[110]); MULADD(at[50], at[109]); MULADD(at[51], at[108]); MULADD(at[52], at[107]); MULADD(at[53], at[106]); MULADD(at[54], at[105]); MULADD(at[55], at[104]); MULADD(at[56], at[103]); MULADD(at[57], at[102]); MULADD(at[58], at[101]); MULADD(at[59], at[100]); MULADD(at[60], at[99]); MULADD(at[61], at[98]); MULADD(at[62], at[97]); MULADD(at[63], at[96]); + COMBA_STORE(C->dp[95]); + /* 96 */ + COMBA_FORWARD; + MULADD(at[33], at[127]); MULADD(at[34], at[126]); MULADD(at[35], at[125]); MULADD(at[36], at[124]); MULADD(at[37], at[123]); MULADD(at[38], at[122]); MULADD(at[39], at[121]); MULADD(at[40], at[120]); MULADD(at[41], at[119]); MULADD(at[42], at[118]); MULADD(at[43], at[117]); MULADD(at[44], at[116]); MULADD(at[45], at[115]); MULADD(at[46], at[114]); MULADD(at[47], at[113]); MULADD(at[48], at[112]); MULADD(at[49], at[111]); MULADD(at[50], at[110]); MULADD(at[51], at[109]); MULADD(at[52], at[108]); MULADD(at[53], at[107]); MULADD(at[54], at[106]); MULADD(at[55], at[105]); MULADD(at[56], at[104]); MULADD(at[57], at[103]); MULADD(at[58], at[102]); MULADD(at[59], at[101]); MULADD(at[60], at[100]); MULADD(at[61], at[99]); MULADD(at[62], at[98]); MULADD(at[63], at[97]); + COMBA_STORE(C->dp[96]); + /* 97 */ + COMBA_FORWARD; + MULADD(at[34], at[127]); MULADD(at[35], at[126]); MULADD(at[36], at[125]); MULADD(at[37], at[124]); MULADD(at[38], at[123]); MULADD(at[39], at[122]); MULADD(at[40], at[121]); MULADD(at[41], at[120]); MULADD(at[42], at[119]); MULADD(at[43], at[118]); MULADD(at[44], at[117]); MULADD(at[45], at[116]); MULADD(at[46], at[115]); MULADD(at[47], at[114]); MULADD(at[48], at[113]); MULADD(at[49], at[112]); MULADD(at[50], at[111]); MULADD(at[51], at[110]); MULADD(at[52], at[109]); MULADD(at[53], at[108]); MULADD(at[54], at[107]); MULADD(at[55], at[106]); MULADD(at[56], at[105]); MULADD(at[57], at[104]); MULADD(at[58], at[103]); MULADD(at[59], at[102]); MULADD(at[60], at[101]); MULADD(at[61], at[100]); MULADD(at[62], at[99]); MULADD(at[63], at[98]); + COMBA_STORE(C->dp[97]); + /* 98 */ + COMBA_FORWARD; + MULADD(at[35], at[127]); MULADD(at[36], at[126]); MULADD(at[37], at[125]); MULADD(at[38], at[124]); MULADD(at[39], at[123]); MULADD(at[40], at[122]); MULADD(at[41], at[121]); MULADD(at[42], at[120]); MULADD(at[43], at[119]); MULADD(at[44], at[118]); MULADD(at[45], at[117]); MULADD(at[46], at[116]); MULADD(at[47], at[115]); MULADD(at[48], at[114]); MULADD(at[49], at[113]); MULADD(at[50], at[112]); MULADD(at[51], at[111]); MULADD(at[52], at[110]); MULADD(at[53], at[109]); MULADD(at[54], at[108]); MULADD(at[55], at[107]); MULADD(at[56], at[106]); MULADD(at[57], at[105]); MULADD(at[58], at[104]); MULADD(at[59], at[103]); MULADD(at[60], at[102]); MULADD(at[61], at[101]); MULADD(at[62], at[100]); MULADD(at[63], at[99]); + COMBA_STORE(C->dp[98]); + /* 99 */ + COMBA_FORWARD; + MULADD(at[36], at[127]); MULADD(at[37], at[126]); MULADD(at[38], at[125]); MULADD(at[39], at[124]); MULADD(at[40], at[123]); MULADD(at[41], at[122]); MULADD(at[42], at[121]); MULADD(at[43], at[120]); MULADD(at[44], at[119]); MULADD(at[45], at[118]); MULADD(at[46], at[117]); MULADD(at[47], at[116]); MULADD(at[48], at[115]); MULADD(at[49], at[114]); MULADD(at[50], at[113]); MULADD(at[51], at[112]); MULADD(at[52], at[111]); MULADD(at[53], at[110]); MULADD(at[54], at[109]); MULADD(at[55], at[108]); MULADD(at[56], at[107]); MULADD(at[57], at[106]); MULADD(at[58], at[105]); MULADD(at[59], at[104]); MULADD(at[60], at[103]); MULADD(at[61], at[102]); MULADD(at[62], at[101]); MULADD(at[63], at[100]); + COMBA_STORE(C->dp[99]); + /* 100 */ + COMBA_FORWARD; + MULADD(at[37], at[127]); MULADD(at[38], at[126]); MULADD(at[39], at[125]); MULADD(at[40], at[124]); MULADD(at[41], at[123]); MULADD(at[42], at[122]); MULADD(at[43], at[121]); MULADD(at[44], at[120]); MULADD(at[45], at[119]); MULADD(at[46], at[118]); MULADD(at[47], at[117]); MULADD(at[48], at[116]); MULADD(at[49], at[115]); MULADD(at[50], at[114]); MULADD(at[51], at[113]); MULADD(at[52], at[112]); MULADD(at[53], at[111]); MULADD(at[54], at[110]); MULADD(at[55], at[109]); MULADD(at[56], at[108]); MULADD(at[57], at[107]); MULADD(at[58], at[106]); MULADD(at[59], at[105]); MULADD(at[60], at[104]); MULADD(at[61], at[103]); MULADD(at[62], at[102]); MULADD(at[63], at[101]); + COMBA_STORE(C->dp[100]); + /* 101 */ + COMBA_FORWARD; + MULADD(at[38], at[127]); MULADD(at[39], at[126]); MULADD(at[40], at[125]); MULADD(at[41], at[124]); MULADD(at[42], at[123]); MULADD(at[43], at[122]); MULADD(at[44], at[121]); MULADD(at[45], at[120]); MULADD(at[46], at[119]); MULADD(at[47], at[118]); MULADD(at[48], at[117]); MULADD(at[49], at[116]); MULADD(at[50], at[115]); MULADD(at[51], at[114]); MULADD(at[52], at[113]); MULADD(at[53], at[112]); MULADD(at[54], at[111]); MULADD(at[55], at[110]); MULADD(at[56], at[109]); MULADD(at[57], at[108]); MULADD(at[58], at[107]); MULADD(at[59], at[106]); MULADD(at[60], at[105]); MULADD(at[61], at[104]); MULADD(at[62], at[103]); MULADD(at[63], at[102]); + COMBA_STORE(C->dp[101]); + /* 102 */ + COMBA_FORWARD; + MULADD(at[39], at[127]); MULADD(at[40], at[126]); MULADD(at[41], at[125]); MULADD(at[42], at[124]); MULADD(at[43], at[123]); MULADD(at[44], at[122]); MULADD(at[45], at[121]); MULADD(at[46], at[120]); MULADD(at[47], at[119]); MULADD(at[48], at[118]); MULADD(at[49], at[117]); MULADD(at[50], at[116]); MULADD(at[51], at[115]); MULADD(at[52], at[114]); MULADD(at[53], at[113]); MULADD(at[54], at[112]); MULADD(at[55], at[111]); MULADD(at[56], at[110]); MULADD(at[57], at[109]); MULADD(at[58], at[108]); MULADD(at[59], at[107]); MULADD(at[60], at[106]); MULADD(at[61], at[105]); MULADD(at[62], at[104]); MULADD(at[63], at[103]); + COMBA_STORE(C->dp[102]); + /* 103 */ + COMBA_FORWARD; + MULADD(at[40], at[127]); MULADD(at[41], at[126]); MULADD(at[42], at[125]); MULADD(at[43], at[124]); MULADD(at[44], at[123]); MULADD(at[45], at[122]); MULADD(at[46], at[121]); MULADD(at[47], at[120]); MULADD(at[48], at[119]); MULADD(at[49], at[118]); MULADD(at[50], at[117]); MULADD(at[51], at[116]); MULADD(at[52], at[115]); MULADD(at[53], at[114]); MULADD(at[54], at[113]); MULADD(at[55], at[112]); MULADD(at[56], at[111]); MULADD(at[57], at[110]); MULADD(at[58], at[109]); MULADD(at[59], at[108]); MULADD(at[60], at[107]); MULADD(at[61], at[106]); MULADD(at[62], at[105]); MULADD(at[63], at[104]); + COMBA_STORE(C->dp[103]); + /* 104 */ + COMBA_FORWARD; + MULADD(at[41], at[127]); MULADD(at[42], at[126]); MULADD(at[43], at[125]); MULADD(at[44], at[124]); MULADD(at[45], at[123]); MULADD(at[46], at[122]); MULADD(at[47], at[121]); MULADD(at[48], at[120]); MULADD(at[49], at[119]); MULADD(at[50], at[118]); MULADD(at[51], at[117]); MULADD(at[52], at[116]); MULADD(at[53], at[115]); MULADD(at[54], at[114]); MULADD(at[55], at[113]); MULADD(at[56], at[112]); MULADD(at[57], at[111]); MULADD(at[58], at[110]); MULADD(at[59], at[109]); MULADD(at[60], at[108]); MULADD(at[61], at[107]); MULADD(at[62], at[106]); MULADD(at[63], at[105]); + COMBA_STORE(C->dp[104]); + /* 105 */ + COMBA_FORWARD; + MULADD(at[42], at[127]); MULADD(at[43], at[126]); MULADD(at[44], at[125]); MULADD(at[45], at[124]); MULADD(at[46], at[123]); MULADD(at[47], at[122]); MULADD(at[48], at[121]); MULADD(at[49], at[120]); MULADD(at[50], at[119]); MULADD(at[51], at[118]); MULADD(at[52], at[117]); MULADD(at[53], at[116]); MULADD(at[54], at[115]); MULADD(at[55], at[114]); MULADD(at[56], at[113]); MULADD(at[57], at[112]); MULADD(at[58], at[111]); MULADD(at[59], at[110]); MULADD(at[60], at[109]); MULADD(at[61], at[108]); MULADD(at[62], at[107]); MULADD(at[63], at[106]); + COMBA_STORE(C->dp[105]); + /* 106 */ + COMBA_FORWARD; + MULADD(at[43], at[127]); MULADD(at[44], at[126]); MULADD(at[45], at[125]); MULADD(at[46], at[124]); MULADD(at[47], at[123]); MULADD(at[48], at[122]); MULADD(at[49], at[121]); MULADD(at[50], at[120]); MULADD(at[51], at[119]); MULADD(at[52], at[118]); MULADD(at[53], at[117]); MULADD(at[54], at[116]); MULADD(at[55], at[115]); MULADD(at[56], at[114]); MULADD(at[57], at[113]); MULADD(at[58], at[112]); MULADD(at[59], at[111]); MULADD(at[60], at[110]); MULADD(at[61], at[109]); MULADD(at[62], at[108]); MULADD(at[63], at[107]); + COMBA_STORE(C->dp[106]); + /* 107 */ + COMBA_FORWARD; + MULADD(at[44], at[127]); MULADD(at[45], at[126]); MULADD(at[46], at[125]); MULADD(at[47], at[124]); MULADD(at[48], at[123]); MULADD(at[49], at[122]); MULADD(at[50], at[121]); MULADD(at[51], at[120]); MULADD(at[52], at[119]); MULADD(at[53], at[118]); MULADD(at[54], at[117]); MULADD(at[55], at[116]); MULADD(at[56], at[115]); MULADD(at[57], at[114]); MULADD(at[58], at[113]); MULADD(at[59], at[112]); MULADD(at[60], at[111]); MULADD(at[61], at[110]); MULADD(at[62], at[109]); MULADD(at[63], at[108]); + COMBA_STORE(C->dp[107]); + /* 108 */ + COMBA_FORWARD; + MULADD(at[45], at[127]); MULADD(at[46], at[126]); MULADD(at[47], at[125]); MULADD(at[48], at[124]); MULADD(at[49], at[123]); MULADD(at[50], at[122]); MULADD(at[51], at[121]); MULADD(at[52], at[120]); MULADD(at[53], at[119]); MULADD(at[54], at[118]); MULADD(at[55], at[117]); MULADD(at[56], at[116]); MULADD(at[57], at[115]); MULADD(at[58], at[114]); MULADD(at[59], at[113]); MULADD(at[60], at[112]); MULADD(at[61], at[111]); MULADD(at[62], at[110]); MULADD(at[63], at[109]); + COMBA_STORE(C->dp[108]); + /* 109 */ + COMBA_FORWARD; + MULADD(at[46], at[127]); MULADD(at[47], at[126]); MULADD(at[48], at[125]); MULADD(at[49], at[124]); MULADD(at[50], at[123]); MULADD(at[51], at[122]); MULADD(at[52], at[121]); MULADD(at[53], at[120]); MULADD(at[54], at[119]); MULADD(at[55], at[118]); MULADD(at[56], at[117]); MULADD(at[57], at[116]); MULADD(at[58], at[115]); MULADD(at[59], at[114]); MULADD(at[60], at[113]); MULADD(at[61], at[112]); MULADD(at[62], at[111]); MULADD(at[63], at[110]); + COMBA_STORE(C->dp[109]); + /* 110 */ + COMBA_FORWARD; + MULADD(at[47], at[127]); MULADD(at[48], at[126]); MULADD(at[49], at[125]); MULADD(at[50], at[124]); MULADD(at[51], at[123]); MULADD(at[52], at[122]); MULADD(at[53], at[121]); MULADD(at[54], at[120]); MULADD(at[55], at[119]); MULADD(at[56], at[118]); MULADD(at[57], at[117]); MULADD(at[58], at[116]); MULADD(at[59], at[115]); MULADD(at[60], at[114]); MULADD(at[61], at[113]); MULADD(at[62], at[112]); MULADD(at[63], at[111]); + COMBA_STORE(C->dp[110]); + /* 111 */ + COMBA_FORWARD; + MULADD(at[48], at[127]); MULADD(at[49], at[126]); MULADD(at[50], at[125]); MULADD(at[51], at[124]); MULADD(at[52], at[123]); MULADD(at[53], at[122]); MULADD(at[54], at[121]); MULADD(at[55], at[120]); MULADD(at[56], at[119]); MULADD(at[57], at[118]); MULADD(at[58], at[117]); MULADD(at[59], at[116]); MULADD(at[60], at[115]); MULADD(at[61], at[114]); MULADD(at[62], at[113]); MULADD(at[63], at[112]); + COMBA_STORE(C->dp[111]); + /* 112 */ + COMBA_FORWARD; + MULADD(at[49], at[127]); MULADD(at[50], at[126]); MULADD(at[51], at[125]); MULADD(at[52], at[124]); MULADD(at[53], at[123]); MULADD(at[54], at[122]); MULADD(at[55], at[121]); MULADD(at[56], at[120]); MULADD(at[57], at[119]); MULADD(at[58], at[118]); MULADD(at[59], at[117]); MULADD(at[60], at[116]); MULADD(at[61], at[115]); MULADD(at[62], at[114]); MULADD(at[63], at[113]); + COMBA_STORE(C->dp[112]); + /* 113 */ + COMBA_FORWARD; + MULADD(at[50], at[127]); MULADD(at[51], at[126]); MULADD(at[52], at[125]); MULADD(at[53], at[124]); MULADD(at[54], at[123]); MULADD(at[55], at[122]); MULADD(at[56], at[121]); MULADD(at[57], at[120]); MULADD(at[58], at[119]); MULADD(at[59], at[118]); MULADD(at[60], at[117]); MULADD(at[61], at[116]); MULADD(at[62], at[115]); MULADD(at[63], at[114]); + COMBA_STORE(C->dp[113]); + /* 114 */ + COMBA_FORWARD; + MULADD(at[51], at[127]); MULADD(at[52], at[126]); MULADD(at[53], at[125]); MULADD(at[54], at[124]); MULADD(at[55], at[123]); MULADD(at[56], at[122]); MULADD(at[57], at[121]); MULADD(at[58], at[120]); MULADD(at[59], at[119]); MULADD(at[60], at[118]); MULADD(at[61], at[117]); MULADD(at[62], at[116]); MULADD(at[63], at[115]); + COMBA_STORE(C->dp[114]); + /* 115 */ + COMBA_FORWARD; + MULADD(at[52], at[127]); MULADD(at[53], at[126]); MULADD(at[54], at[125]); MULADD(at[55], at[124]); MULADD(at[56], at[123]); MULADD(at[57], at[122]); MULADD(at[58], at[121]); MULADD(at[59], at[120]); MULADD(at[60], at[119]); MULADD(at[61], at[118]); MULADD(at[62], at[117]); MULADD(at[63], at[116]); + COMBA_STORE(C->dp[115]); + /* 116 */ + COMBA_FORWARD; + MULADD(at[53], at[127]); MULADD(at[54], at[126]); MULADD(at[55], at[125]); MULADD(at[56], at[124]); MULADD(at[57], at[123]); MULADD(at[58], at[122]); MULADD(at[59], at[121]); MULADD(at[60], at[120]); MULADD(at[61], at[119]); MULADD(at[62], at[118]); MULADD(at[63], at[117]); + COMBA_STORE(C->dp[116]); + /* 117 */ + COMBA_FORWARD; + MULADD(at[54], at[127]); MULADD(at[55], at[126]); MULADD(at[56], at[125]); MULADD(at[57], at[124]); MULADD(at[58], at[123]); MULADD(at[59], at[122]); MULADD(at[60], at[121]); MULADD(at[61], at[120]); MULADD(at[62], at[119]); MULADD(at[63], at[118]); + COMBA_STORE(C->dp[117]); + /* 118 */ + COMBA_FORWARD; + MULADD(at[55], at[127]); MULADD(at[56], at[126]); MULADD(at[57], at[125]); MULADD(at[58], at[124]); MULADD(at[59], at[123]); MULADD(at[60], at[122]); MULADD(at[61], at[121]); MULADD(at[62], at[120]); MULADD(at[63], at[119]); + COMBA_STORE(C->dp[118]); + /* 119 */ + COMBA_FORWARD; + MULADD(at[56], at[127]); MULADD(at[57], at[126]); MULADD(at[58], at[125]); MULADD(at[59], at[124]); MULADD(at[60], at[123]); MULADD(at[61], at[122]); MULADD(at[62], at[121]); MULADD(at[63], at[120]); + COMBA_STORE(C->dp[119]); + /* 120 */ + COMBA_FORWARD; + MULADD(at[57], at[127]); MULADD(at[58], at[126]); MULADD(at[59], at[125]); MULADD(at[60], at[124]); MULADD(at[61], at[123]); MULADD(at[62], at[122]); MULADD(at[63], at[121]); + COMBA_STORE(C->dp[120]); + /* 121 */ + COMBA_FORWARD; + MULADD(at[58], at[127]); MULADD(at[59], at[126]); MULADD(at[60], at[125]); MULADD(at[61], at[124]); MULADD(at[62], at[123]); MULADD(at[63], at[122]); + COMBA_STORE(C->dp[121]); + /* 122 */ + COMBA_FORWARD; + MULADD(at[59], at[127]); MULADD(at[60], at[126]); MULADD(at[61], at[125]); MULADD(at[62], at[124]); MULADD(at[63], at[123]); + COMBA_STORE(C->dp[122]); + /* 123 */ + COMBA_FORWARD; + MULADD(at[60], at[127]); MULADD(at[61], at[126]); MULADD(at[62], at[125]); MULADD(at[63], at[124]); + COMBA_STORE(C->dp[123]); + /* 124 */ + COMBA_FORWARD; + MULADD(at[61], at[127]); MULADD(at[62], at[126]); MULADD(at[63], at[125]); + COMBA_STORE(C->dp[124]); + /* 125 */ + COMBA_FORWARD; + MULADD(at[62], at[127]); MULADD(at[63], at[126]); + COMBA_STORE(C->dp[125]); + /* 126 */ + COMBA_FORWARD; + MULADD(at[63], at[127]); + COMBA_STORE(C->dp[126]); + COMBA_STORE2(C->dp[127]); + C->used = 128; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; + +#ifdef WOLFSSL_SMALL_STACK + XFREE(at, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return FP_OKAY; +} +#endif diff --git a/client/wolfssl/wolfcrypt/src/fp_mul_comba_7.i b/client/wolfssl/wolfcrypt/src/fp_mul_comba_7.i new file mode 100644 index 0000000..b969a9a --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fp_mul_comba_7.i @@ -0,0 +1,107 @@ +/* fp_mul_comba_7.i + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef TFM_MUL7 +int fp_mul_comba7(fp_int *A, fp_int *B, fp_int *C) +{ + fp_digit c0, c1, c2; +#ifndef WOLFSSL_SMALL_STACK + fp_digit at[14]; +#else + fp_digit *at; +#endif + +#ifdef WOLFSSL_SMALL_STACK + at = (fp_digit*)XMALLOC(sizeof(fp_digit) * 14, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (at == NULL) + return FP_MEM; +#endif + + XMEMCPY(at, A->dp, 7 * sizeof(fp_digit)); + XMEMCPY(at+7, B->dp, 7 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[7]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[8]); MULADD(at[1], at[7]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[9]); MULADD(at[1], at[8]); MULADD(at[2], at[7]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[10]); MULADD(at[1], at[9]); MULADD(at[2], at[8]); MULADD(at[3], at[7]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[11]); MULADD(at[1], at[10]); MULADD(at[2], at[9]); MULADD(at[3], at[8]); MULADD(at[4], at[7]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[12]); MULADD(at[1], at[11]); MULADD(at[2], at[10]); MULADD(at[3], at[9]); MULADD(at[4], at[8]); MULADD(at[5], at[7]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[13]); MULADD(at[1], at[12]); MULADD(at[2], at[11]); MULADD(at[3], at[10]); MULADD(at[4], at[9]); MULADD(at[5], at[8]); MULADD(at[6], at[7]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[1], at[13]); MULADD(at[2], at[12]); MULADD(at[3], at[11]); MULADD(at[4], at[10]); MULADD(at[5], at[9]); MULADD(at[6], at[8]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[2], at[13]); MULADD(at[3], at[12]); MULADD(at[4], at[11]); MULADD(at[5], at[10]); MULADD(at[6], at[9]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[3], at[13]); MULADD(at[4], at[12]); MULADD(at[5], at[11]); MULADD(at[6], at[10]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[4], at[13]); MULADD(at[5], at[12]); MULADD(at[6], at[11]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[5], at[13]); MULADD(at[6], at[12]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[6], at[13]); + COMBA_STORE(C->dp[12]); + COMBA_STORE2(C->dp[13]); + C->used = 14; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; + +#ifdef WOLFSSL_SMALL_STACK + XFREE(at, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return FP_OKAY; +} +#endif diff --git a/client/wolfssl/wolfcrypt/src/fp_mul_comba_8.i b/client/wolfssl/wolfcrypt/src/fp_mul_comba_8.i new file mode 100644 index 0000000..1d61a77 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fp_mul_comba_8.i @@ -0,0 +1,115 @@ +/* fp_mul_comba_8.i + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef TFM_MUL8 +int fp_mul_comba8(fp_int *A, fp_int *B, fp_int *C) +{ + fp_digit c0, c1, c2; +#ifndef WOLFSSL_SMALL_STACK + fp_digit at[16]; +#else + fp_digit *at; +#endif + +#ifdef WOLFSSL_SMALL_STACK + at = (fp_digit*)XMALLOC(sizeof(fp_digit) * 16, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (at == NULL) + return FP_MEM; +#endif + + XMEMCPY(at, A->dp, 8 * sizeof(fp_digit)); + XMEMCPY(at+8, B->dp, 8 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[8]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[9]); MULADD(at[1], at[8]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[10]); MULADD(at[1], at[9]); MULADD(at[2], at[8]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[11]); MULADD(at[1], at[10]); MULADD(at[2], at[9]); MULADD(at[3], at[8]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[12]); MULADD(at[1], at[11]); MULADD(at[2], at[10]); MULADD(at[3], at[9]); MULADD(at[4], at[8]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[13]); MULADD(at[1], at[12]); MULADD(at[2], at[11]); MULADD(at[3], at[10]); MULADD(at[4], at[9]); MULADD(at[5], at[8]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[14]); MULADD(at[1], at[13]); MULADD(at[2], at[12]); MULADD(at[3], at[11]); MULADD(at[4], at[10]); MULADD(at[5], at[9]); MULADD(at[6], at[8]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[15]); MULADD(at[1], at[14]); MULADD(at[2], at[13]); MULADD(at[3], at[12]); MULADD(at[4], at[11]); MULADD(at[5], at[10]); MULADD(at[6], at[9]); MULADD(at[7], at[8]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[1], at[15]); MULADD(at[2], at[14]); MULADD(at[3], at[13]); MULADD(at[4], at[12]); MULADD(at[5], at[11]); MULADD(at[6], at[10]); MULADD(at[7], at[9]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[2], at[15]); MULADD(at[3], at[14]); MULADD(at[4], at[13]); MULADD(at[5], at[12]); MULADD(at[6], at[11]); MULADD(at[7], at[10]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[3], at[15]); MULADD(at[4], at[14]); MULADD(at[5], at[13]); MULADD(at[6], at[12]); MULADD(at[7], at[11]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[4], at[15]); MULADD(at[5], at[14]); MULADD(at[6], at[13]); MULADD(at[7], at[12]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[5], at[15]); MULADD(at[6], at[14]); MULADD(at[7], at[13]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[6], at[15]); MULADD(at[7], at[14]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[7], at[15]); + COMBA_STORE(C->dp[14]); + COMBA_STORE2(C->dp[15]); + C->used = 16; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; + +#ifdef WOLFSSL_SMALL_STACK + XFREE(at, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return FP_OKAY; +} +#endif diff --git a/client/wolfssl/wolfcrypt/src/fp_mul_comba_9.i b/client/wolfssl/wolfcrypt/src/fp_mul_comba_9.i new file mode 100644 index 0000000..0eedd75 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fp_mul_comba_9.i @@ -0,0 +1,123 @@ +/* fp_mul_comba_9.i + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef TFM_MUL9 +int fp_mul_comba9(fp_int *A, fp_int *B, fp_int *C) +{ + fp_digit c0, c1, c2; +#ifndef WOLFSSL_SMALL_STACK + fp_digit at[18]; +#else + fp_digit *at; +#endif + +#ifdef WOLFSSL_SMALL_STACK + at = (fp_digit*)XMALLOC(sizeof(fp_digit) * 18, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (at == NULL) + return FP_MEM; +#endif + + XMEMCPY(at, A->dp, 9 * sizeof(fp_digit)); + XMEMCPY(at+9, B->dp, 9 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[9]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[10]); MULADD(at[1], at[9]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[11]); MULADD(at[1], at[10]); MULADD(at[2], at[9]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[12]); MULADD(at[1], at[11]); MULADD(at[2], at[10]); MULADD(at[3], at[9]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[13]); MULADD(at[1], at[12]); MULADD(at[2], at[11]); MULADD(at[3], at[10]); MULADD(at[4], at[9]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[14]); MULADD(at[1], at[13]); MULADD(at[2], at[12]); MULADD(at[3], at[11]); MULADD(at[4], at[10]); MULADD(at[5], at[9]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[15]); MULADD(at[1], at[14]); MULADD(at[2], at[13]); MULADD(at[3], at[12]); MULADD(at[4], at[11]); MULADD(at[5], at[10]); MULADD(at[6], at[9]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[16]); MULADD(at[1], at[15]); MULADD(at[2], at[14]); MULADD(at[3], at[13]); MULADD(at[4], at[12]); MULADD(at[5], at[11]); MULADD(at[6], at[10]); MULADD(at[7], at[9]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[0], at[17]); MULADD(at[1], at[16]); MULADD(at[2], at[15]); MULADD(at[3], at[14]); MULADD(at[4], at[13]); MULADD(at[5], at[12]); MULADD(at[6], at[11]); MULADD(at[7], at[10]); MULADD(at[8], at[9]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[1], at[17]); MULADD(at[2], at[16]); MULADD(at[3], at[15]); MULADD(at[4], at[14]); MULADD(at[5], at[13]); MULADD(at[6], at[12]); MULADD(at[7], at[11]); MULADD(at[8], at[10]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[2], at[17]); MULADD(at[3], at[16]); MULADD(at[4], at[15]); MULADD(at[5], at[14]); MULADD(at[6], at[13]); MULADD(at[7], at[12]); MULADD(at[8], at[11]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[3], at[17]); MULADD(at[4], at[16]); MULADD(at[5], at[15]); MULADD(at[6], at[14]); MULADD(at[7], at[13]); MULADD(at[8], at[12]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[4], at[17]); MULADD(at[5], at[16]); MULADD(at[6], at[15]); MULADD(at[7], at[14]); MULADD(at[8], at[13]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[5], at[17]); MULADD(at[6], at[16]); MULADD(at[7], at[15]); MULADD(at[8], at[14]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[6], at[17]); MULADD(at[7], at[16]); MULADD(at[8], at[15]); + COMBA_STORE(C->dp[14]); + /* 15 */ + COMBA_FORWARD; + MULADD(at[7], at[17]); MULADD(at[8], at[16]); + COMBA_STORE(C->dp[15]); + /* 16 */ + COMBA_FORWARD; + MULADD(at[8], at[17]); + COMBA_STORE(C->dp[16]); + COMBA_STORE2(C->dp[17]); + C->used = 18; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; + +#ifdef WOLFSSL_SMALL_STACK + XFREE(at, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return FP_OKAY; +} +#endif diff --git a/client/wolfssl/wolfcrypt/src/fp_mul_comba_small_set.i b/client/wolfssl/wolfcrypt/src/fp_mul_comba_small_set.i new file mode 100644 index 0000000..62ab909 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fp_mul_comba_small_set.i @@ -0,0 +1,1268 @@ +/* fp_mul_comba_small_set.i + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#if defined(TFM_SMALL_SET) +int fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C) +{ + fp_digit c0, c1, c2; +#ifndef WOLFSSL_SMALL_STACK + fp_digit at[32]; +#else + fp_digit *at; +#endif + +#ifdef WOLFSSL_SMALL_STACK + at = (fp_digit*)XMALLOC(sizeof(fp_digit) * 32, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (at == NULL) + return FP_MEM; +#endif + + switch (MAX(A->used, B->used)) { + + case 1: + XMEMCPY(at, A->dp, 1 * sizeof(fp_digit)); + XMEMCPY(at+1, B->dp, 1 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[1]); + COMBA_STORE(C->dp[0]); + COMBA_STORE2(C->dp[1]); + C->used = 2; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; + break; + + case 2: + XMEMCPY(at, A->dp, 2 * sizeof(fp_digit)); + XMEMCPY(at+2, B->dp, 2 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[2]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[3]); MULADD(at[1], at[2]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[1], at[3]); + COMBA_STORE(C->dp[2]); + COMBA_STORE2(C->dp[3]); + C->used = 4; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; + break; + + case 3: + XMEMCPY(at, A->dp, 3 * sizeof(fp_digit)); + XMEMCPY(at+3, B->dp, 3 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[3]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[4]); MULADD(at[1], at[3]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[5]); MULADD(at[1], at[4]); MULADD(at[2], at[3]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[1], at[5]); MULADD(at[2], at[4]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[2], at[5]); + COMBA_STORE(C->dp[4]); + COMBA_STORE2(C->dp[5]); + C->used = 6; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; + break; + + case 4: + XMEMCPY(at, A->dp, 4 * sizeof(fp_digit)); + XMEMCPY(at+4, B->dp, 4 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[4]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[5]); MULADD(at[1], at[4]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[6]); MULADD(at[1], at[5]); MULADD(at[2], at[4]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[7]); MULADD(at[1], at[6]); MULADD(at[2], at[5]); MULADD(at[3], at[4]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[1], at[7]); MULADD(at[2], at[6]); MULADD(at[3], at[5]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[2], at[7]); MULADD(at[3], at[6]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[3], at[7]); + COMBA_STORE(C->dp[6]); + COMBA_STORE2(C->dp[7]); + C->used = 8; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; + break; + + case 5: + XMEMCPY(at, A->dp, 5 * sizeof(fp_digit)); + XMEMCPY(at+5, B->dp, 5 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[5]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[6]); MULADD(at[1], at[5]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[7]); MULADD(at[1], at[6]); MULADD(at[2], at[5]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[8]); MULADD(at[1], at[7]); MULADD(at[2], at[6]); MULADD(at[3], at[5]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[9]); MULADD(at[1], at[8]); MULADD(at[2], at[7]); MULADD(at[3], at[6]); MULADD(at[4], at[5]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[1], at[9]); MULADD(at[2], at[8]); MULADD(at[3], at[7]); MULADD(at[4], at[6]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[2], at[9]); MULADD(at[3], at[8]); MULADD(at[4], at[7]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[3], at[9]); MULADD(at[4], at[8]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[4], at[9]); + COMBA_STORE(C->dp[8]); + COMBA_STORE2(C->dp[9]); + C->used = 10; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; + break; + + case 6: + XMEMCPY(at, A->dp, 6 * sizeof(fp_digit)); + XMEMCPY(at+6, B->dp, 6 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[6]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[7]); MULADD(at[1], at[6]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[8]); MULADD(at[1], at[7]); MULADD(at[2], at[6]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[9]); MULADD(at[1], at[8]); MULADD(at[2], at[7]); MULADD(at[3], at[6]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[10]); MULADD(at[1], at[9]); MULADD(at[2], at[8]); MULADD(at[3], at[7]); MULADD(at[4], at[6]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[11]); MULADD(at[1], at[10]); MULADD(at[2], at[9]); MULADD(at[3], at[8]); MULADD(at[4], at[7]); MULADD(at[5], at[6]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[1], at[11]); MULADD(at[2], at[10]); MULADD(at[3], at[9]); MULADD(at[4], at[8]); MULADD(at[5], at[7]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[2], at[11]); MULADD(at[3], at[10]); MULADD(at[4], at[9]); MULADD(at[5], at[8]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[3], at[11]); MULADD(at[4], at[10]); MULADD(at[5], at[9]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[4], at[11]); MULADD(at[5], at[10]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[5], at[11]); + COMBA_STORE(C->dp[10]); + COMBA_STORE2(C->dp[11]); + C->used = 12; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; + break; + + case 7: + XMEMCPY(at, A->dp, 7 * sizeof(fp_digit)); + XMEMCPY(at+7, B->dp, 7 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[7]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[8]); MULADD(at[1], at[7]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[9]); MULADD(at[1], at[8]); MULADD(at[2], at[7]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[10]); MULADD(at[1], at[9]); MULADD(at[2], at[8]); MULADD(at[3], at[7]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[11]); MULADD(at[1], at[10]); MULADD(at[2], at[9]); MULADD(at[3], at[8]); MULADD(at[4], at[7]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[12]); MULADD(at[1], at[11]); MULADD(at[2], at[10]); MULADD(at[3], at[9]); MULADD(at[4], at[8]); MULADD(at[5], at[7]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[13]); MULADD(at[1], at[12]); MULADD(at[2], at[11]); MULADD(at[3], at[10]); MULADD(at[4], at[9]); MULADD(at[5], at[8]); MULADD(at[6], at[7]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[1], at[13]); MULADD(at[2], at[12]); MULADD(at[3], at[11]); MULADD(at[4], at[10]); MULADD(at[5], at[9]); MULADD(at[6], at[8]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[2], at[13]); MULADD(at[3], at[12]); MULADD(at[4], at[11]); MULADD(at[5], at[10]); MULADD(at[6], at[9]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[3], at[13]); MULADD(at[4], at[12]); MULADD(at[5], at[11]); MULADD(at[6], at[10]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[4], at[13]); MULADD(at[5], at[12]); MULADD(at[6], at[11]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[5], at[13]); MULADD(at[6], at[12]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[6], at[13]); + COMBA_STORE(C->dp[12]); + COMBA_STORE2(C->dp[13]); + C->used = 14; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; + break; + + case 8: + XMEMCPY(at, A->dp, 8 * sizeof(fp_digit)); + XMEMCPY(at+8, B->dp, 8 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[8]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[9]); MULADD(at[1], at[8]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[10]); MULADD(at[1], at[9]); MULADD(at[2], at[8]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[11]); MULADD(at[1], at[10]); MULADD(at[2], at[9]); MULADD(at[3], at[8]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[12]); MULADD(at[1], at[11]); MULADD(at[2], at[10]); MULADD(at[3], at[9]); MULADD(at[4], at[8]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[13]); MULADD(at[1], at[12]); MULADD(at[2], at[11]); MULADD(at[3], at[10]); MULADD(at[4], at[9]); MULADD(at[5], at[8]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[14]); MULADD(at[1], at[13]); MULADD(at[2], at[12]); MULADD(at[3], at[11]); MULADD(at[4], at[10]); MULADD(at[5], at[9]); MULADD(at[6], at[8]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[15]); MULADD(at[1], at[14]); MULADD(at[2], at[13]); MULADD(at[3], at[12]); MULADD(at[4], at[11]); MULADD(at[5], at[10]); MULADD(at[6], at[9]); MULADD(at[7], at[8]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[1], at[15]); MULADD(at[2], at[14]); MULADD(at[3], at[13]); MULADD(at[4], at[12]); MULADD(at[5], at[11]); MULADD(at[6], at[10]); MULADD(at[7], at[9]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[2], at[15]); MULADD(at[3], at[14]); MULADD(at[4], at[13]); MULADD(at[5], at[12]); MULADD(at[6], at[11]); MULADD(at[7], at[10]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[3], at[15]); MULADD(at[4], at[14]); MULADD(at[5], at[13]); MULADD(at[6], at[12]); MULADD(at[7], at[11]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[4], at[15]); MULADD(at[5], at[14]); MULADD(at[6], at[13]); MULADD(at[7], at[12]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[5], at[15]); MULADD(at[6], at[14]); MULADD(at[7], at[13]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[6], at[15]); MULADD(at[7], at[14]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[7], at[15]); + COMBA_STORE(C->dp[14]); + COMBA_STORE2(C->dp[15]); + C->used = 16; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; + break; + + case 9: + XMEMCPY(at, A->dp, 9 * sizeof(fp_digit)); + XMEMCPY(at+9, B->dp, 9 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[9]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[10]); MULADD(at[1], at[9]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[11]); MULADD(at[1], at[10]); MULADD(at[2], at[9]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[12]); MULADD(at[1], at[11]); MULADD(at[2], at[10]); MULADD(at[3], at[9]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[13]); MULADD(at[1], at[12]); MULADD(at[2], at[11]); MULADD(at[3], at[10]); MULADD(at[4], at[9]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[14]); MULADD(at[1], at[13]); MULADD(at[2], at[12]); MULADD(at[3], at[11]); MULADD(at[4], at[10]); MULADD(at[5], at[9]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[15]); MULADD(at[1], at[14]); MULADD(at[2], at[13]); MULADD(at[3], at[12]); MULADD(at[4], at[11]); MULADD(at[5], at[10]); MULADD(at[6], at[9]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[16]); MULADD(at[1], at[15]); MULADD(at[2], at[14]); MULADD(at[3], at[13]); MULADD(at[4], at[12]); MULADD(at[5], at[11]); MULADD(at[6], at[10]); MULADD(at[7], at[9]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[0], at[17]); MULADD(at[1], at[16]); MULADD(at[2], at[15]); MULADD(at[3], at[14]); MULADD(at[4], at[13]); MULADD(at[5], at[12]); MULADD(at[6], at[11]); MULADD(at[7], at[10]); MULADD(at[8], at[9]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[1], at[17]); MULADD(at[2], at[16]); MULADD(at[3], at[15]); MULADD(at[4], at[14]); MULADD(at[5], at[13]); MULADD(at[6], at[12]); MULADD(at[7], at[11]); MULADD(at[8], at[10]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[2], at[17]); MULADD(at[3], at[16]); MULADD(at[4], at[15]); MULADD(at[5], at[14]); MULADD(at[6], at[13]); MULADD(at[7], at[12]); MULADD(at[8], at[11]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[3], at[17]); MULADD(at[4], at[16]); MULADD(at[5], at[15]); MULADD(at[6], at[14]); MULADD(at[7], at[13]); MULADD(at[8], at[12]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[4], at[17]); MULADD(at[5], at[16]); MULADD(at[6], at[15]); MULADD(at[7], at[14]); MULADD(at[8], at[13]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[5], at[17]); MULADD(at[6], at[16]); MULADD(at[7], at[15]); MULADD(at[8], at[14]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[6], at[17]); MULADD(at[7], at[16]); MULADD(at[8], at[15]); + COMBA_STORE(C->dp[14]); + /* 15 */ + COMBA_FORWARD; + MULADD(at[7], at[17]); MULADD(at[8], at[16]); + COMBA_STORE(C->dp[15]); + /* 16 */ + COMBA_FORWARD; + MULADD(at[8], at[17]); + COMBA_STORE(C->dp[16]); + COMBA_STORE2(C->dp[17]); + C->used = 18; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; + break; + + case 10: + XMEMCPY(at, A->dp, 10 * sizeof(fp_digit)); + XMEMCPY(at+10, B->dp, 10 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[10]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[11]); MULADD(at[1], at[10]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[12]); MULADD(at[1], at[11]); MULADD(at[2], at[10]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[13]); MULADD(at[1], at[12]); MULADD(at[2], at[11]); MULADD(at[3], at[10]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[14]); MULADD(at[1], at[13]); MULADD(at[2], at[12]); MULADD(at[3], at[11]); MULADD(at[4], at[10]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[15]); MULADD(at[1], at[14]); MULADD(at[2], at[13]); MULADD(at[3], at[12]); MULADD(at[4], at[11]); MULADD(at[5], at[10]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[16]); MULADD(at[1], at[15]); MULADD(at[2], at[14]); MULADD(at[3], at[13]); MULADD(at[4], at[12]); MULADD(at[5], at[11]); MULADD(at[6], at[10]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[17]); MULADD(at[1], at[16]); MULADD(at[2], at[15]); MULADD(at[3], at[14]); MULADD(at[4], at[13]); MULADD(at[5], at[12]); MULADD(at[6], at[11]); MULADD(at[7], at[10]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[0], at[18]); MULADD(at[1], at[17]); MULADD(at[2], at[16]); MULADD(at[3], at[15]); MULADD(at[4], at[14]); MULADD(at[5], at[13]); MULADD(at[6], at[12]); MULADD(at[7], at[11]); MULADD(at[8], at[10]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[0], at[19]); MULADD(at[1], at[18]); MULADD(at[2], at[17]); MULADD(at[3], at[16]); MULADD(at[4], at[15]); MULADD(at[5], at[14]); MULADD(at[6], at[13]); MULADD(at[7], at[12]); MULADD(at[8], at[11]); MULADD(at[9], at[10]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[1], at[19]); MULADD(at[2], at[18]); MULADD(at[3], at[17]); MULADD(at[4], at[16]); MULADD(at[5], at[15]); MULADD(at[6], at[14]); MULADD(at[7], at[13]); MULADD(at[8], at[12]); MULADD(at[9], at[11]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[2], at[19]); MULADD(at[3], at[18]); MULADD(at[4], at[17]); MULADD(at[5], at[16]); MULADD(at[6], at[15]); MULADD(at[7], at[14]); MULADD(at[8], at[13]); MULADD(at[9], at[12]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[3], at[19]); MULADD(at[4], at[18]); MULADD(at[5], at[17]); MULADD(at[6], at[16]); MULADD(at[7], at[15]); MULADD(at[8], at[14]); MULADD(at[9], at[13]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[4], at[19]); MULADD(at[5], at[18]); MULADD(at[6], at[17]); MULADD(at[7], at[16]); MULADD(at[8], at[15]); MULADD(at[9], at[14]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[5], at[19]); MULADD(at[6], at[18]); MULADD(at[7], at[17]); MULADD(at[8], at[16]); MULADD(at[9], at[15]); + COMBA_STORE(C->dp[14]); + /* 15 */ + COMBA_FORWARD; + MULADD(at[6], at[19]); MULADD(at[7], at[18]); MULADD(at[8], at[17]); MULADD(at[9], at[16]); + COMBA_STORE(C->dp[15]); + /* 16 */ + COMBA_FORWARD; + MULADD(at[7], at[19]); MULADD(at[8], at[18]); MULADD(at[9], at[17]); + COMBA_STORE(C->dp[16]); + /* 17 */ + COMBA_FORWARD; + MULADD(at[8], at[19]); MULADD(at[9], at[18]); + COMBA_STORE(C->dp[17]); + /* 18 */ + COMBA_FORWARD; + MULADD(at[9], at[19]); + COMBA_STORE(C->dp[18]); + COMBA_STORE2(C->dp[19]); + C->used = 20; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; + break; + + case 11: + XMEMCPY(at, A->dp, 11 * sizeof(fp_digit)); + XMEMCPY(at+11, B->dp, 11 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[11]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[12]); MULADD(at[1], at[11]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[13]); MULADD(at[1], at[12]); MULADD(at[2], at[11]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[14]); MULADD(at[1], at[13]); MULADD(at[2], at[12]); MULADD(at[3], at[11]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[15]); MULADD(at[1], at[14]); MULADD(at[2], at[13]); MULADD(at[3], at[12]); MULADD(at[4], at[11]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[16]); MULADD(at[1], at[15]); MULADD(at[2], at[14]); MULADD(at[3], at[13]); MULADD(at[4], at[12]); MULADD(at[5], at[11]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[17]); MULADD(at[1], at[16]); MULADD(at[2], at[15]); MULADD(at[3], at[14]); MULADD(at[4], at[13]); MULADD(at[5], at[12]); MULADD(at[6], at[11]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[18]); MULADD(at[1], at[17]); MULADD(at[2], at[16]); MULADD(at[3], at[15]); MULADD(at[4], at[14]); MULADD(at[5], at[13]); MULADD(at[6], at[12]); MULADD(at[7], at[11]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[0], at[19]); MULADD(at[1], at[18]); MULADD(at[2], at[17]); MULADD(at[3], at[16]); MULADD(at[4], at[15]); MULADD(at[5], at[14]); MULADD(at[6], at[13]); MULADD(at[7], at[12]); MULADD(at[8], at[11]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[0], at[20]); MULADD(at[1], at[19]); MULADD(at[2], at[18]); MULADD(at[3], at[17]); MULADD(at[4], at[16]); MULADD(at[5], at[15]); MULADD(at[6], at[14]); MULADD(at[7], at[13]); MULADD(at[8], at[12]); MULADD(at[9], at[11]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[0], at[21]); MULADD(at[1], at[20]); MULADD(at[2], at[19]); MULADD(at[3], at[18]); MULADD(at[4], at[17]); MULADD(at[5], at[16]); MULADD(at[6], at[15]); MULADD(at[7], at[14]); MULADD(at[8], at[13]); MULADD(at[9], at[12]); MULADD(at[10], at[11]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[1], at[21]); MULADD(at[2], at[20]); MULADD(at[3], at[19]); MULADD(at[4], at[18]); MULADD(at[5], at[17]); MULADD(at[6], at[16]); MULADD(at[7], at[15]); MULADD(at[8], at[14]); MULADD(at[9], at[13]); MULADD(at[10], at[12]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[2], at[21]); MULADD(at[3], at[20]); MULADD(at[4], at[19]); MULADD(at[5], at[18]); MULADD(at[6], at[17]); MULADD(at[7], at[16]); MULADD(at[8], at[15]); MULADD(at[9], at[14]); MULADD(at[10], at[13]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[3], at[21]); MULADD(at[4], at[20]); MULADD(at[5], at[19]); MULADD(at[6], at[18]); MULADD(at[7], at[17]); MULADD(at[8], at[16]); MULADD(at[9], at[15]); MULADD(at[10], at[14]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[4], at[21]); MULADD(at[5], at[20]); MULADD(at[6], at[19]); MULADD(at[7], at[18]); MULADD(at[8], at[17]); MULADD(at[9], at[16]); MULADD(at[10], at[15]); + COMBA_STORE(C->dp[14]); + /* 15 */ + COMBA_FORWARD; + MULADD(at[5], at[21]); MULADD(at[6], at[20]); MULADD(at[7], at[19]); MULADD(at[8], at[18]); MULADD(at[9], at[17]); MULADD(at[10], at[16]); + COMBA_STORE(C->dp[15]); + /* 16 */ + COMBA_FORWARD; + MULADD(at[6], at[21]); MULADD(at[7], at[20]); MULADD(at[8], at[19]); MULADD(at[9], at[18]); MULADD(at[10], at[17]); + COMBA_STORE(C->dp[16]); + /* 17 */ + COMBA_FORWARD; + MULADD(at[7], at[21]); MULADD(at[8], at[20]); MULADD(at[9], at[19]); MULADD(at[10], at[18]); + COMBA_STORE(C->dp[17]); + /* 18 */ + COMBA_FORWARD; + MULADD(at[8], at[21]); MULADD(at[9], at[20]); MULADD(at[10], at[19]); + COMBA_STORE(C->dp[18]); + /* 19 */ + COMBA_FORWARD; + MULADD(at[9], at[21]); MULADD(at[10], at[20]); + COMBA_STORE(C->dp[19]); + /* 20 */ + COMBA_FORWARD; + MULADD(at[10], at[21]); + COMBA_STORE(C->dp[20]); + COMBA_STORE2(C->dp[21]); + C->used = 22; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; + break; + + case 12: + XMEMCPY(at, A->dp, 12 * sizeof(fp_digit)); + XMEMCPY(at+12, B->dp, 12 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[12]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[13]); MULADD(at[1], at[12]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[14]); MULADD(at[1], at[13]); MULADD(at[2], at[12]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[15]); MULADD(at[1], at[14]); MULADD(at[2], at[13]); MULADD(at[3], at[12]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[16]); MULADD(at[1], at[15]); MULADD(at[2], at[14]); MULADD(at[3], at[13]); MULADD(at[4], at[12]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[17]); MULADD(at[1], at[16]); MULADD(at[2], at[15]); MULADD(at[3], at[14]); MULADD(at[4], at[13]); MULADD(at[5], at[12]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[18]); MULADD(at[1], at[17]); MULADD(at[2], at[16]); MULADD(at[3], at[15]); MULADD(at[4], at[14]); MULADD(at[5], at[13]); MULADD(at[6], at[12]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[19]); MULADD(at[1], at[18]); MULADD(at[2], at[17]); MULADD(at[3], at[16]); MULADD(at[4], at[15]); MULADD(at[5], at[14]); MULADD(at[6], at[13]); MULADD(at[7], at[12]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[0], at[20]); MULADD(at[1], at[19]); MULADD(at[2], at[18]); MULADD(at[3], at[17]); MULADD(at[4], at[16]); MULADD(at[5], at[15]); MULADD(at[6], at[14]); MULADD(at[7], at[13]); MULADD(at[8], at[12]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[0], at[21]); MULADD(at[1], at[20]); MULADD(at[2], at[19]); MULADD(at[3], at[18]); MULADD(at[4], at[17]); MULADD(at[5], at[16]); MULADD(at[6], at[15]); MULADD(at[7], at[14]); MULADD(at[8], at[13]); MULADD(at[9], at[12]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[0], at[22]); MULADD(at[1], at[21]); MULADD(at[2], at[20]); MULADD(at[3], at[19]); MULADD(at[4], at[18]); MULADD(at[5], at[17]); MULADD(at[6], at[16]); MULADD(at[7], at[15]); MULADD(at[8], at[14]); MULADD(at[9], at[13]); MULADD(at[10], at[12]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[0], at[23]); MULADD(at[1], at[22]); MULADD(at[2], at[21]); MULADD(at[3], at[20]); MULADD(at[4], at[19]); MULADD(at[5], at[18]); MULADD(at[6], at[17]); MULADD(at[7], at[16]); MULADD(at[8], at[15]); MULADD(at[9], at[14]); MULADD(at[10], at[13]); MULADD(at[11], at[12]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[1], at[23]); MULADD(at[2], at[22]); MULADD(at[3], at[21]); MULADD(at[4], at[20]); MULADD(at[5], at[19]); MULADD(at[6], at[18]); MULADD(at[7], at[17]); MULADD(at[8], at[16]); MULADD(at[9], at[15]); MULADD(at[10], at[14]); MULADD(at[11], at[13]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[2], at[23]); MULADD(at[3], at[22]); MULADD(at[4], at[21]); MULADD(at[5], at[20]); MULADD(at[6], at[19]); MULADD(at[7], at[18]); MULADD(at[8], at[17]); MULADD(at[9], at[16]); MULADD(at[10], at[15]); MULADD(at[11], at[14]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[3], at[23]); MULADD(at[4], at[22]); MULADD(at[5], at[21]); MULADD(at[6], at[20]); MULADD(at[7], at[19]); MULADD(at[8], at[18]); MULADD(at[9], at[17]); MULADD(at[10], at[16]); MULADD(at[11], at[15]); + COMBA_STORE(C->dp[14]); + /* 15 */ + COMBA_FORWARD; + MULADD(at[4], at[23]); MULADD(at[5], at[22]); MULADD(at[6], at[21]); MULADD(at[7], at[20]); MULADD(at[8], at[19]); MULADD(at[9], at[18]); MULADD(at[10], at[17]); MULADD(at[11], at[16]); + COMBA_STORE(C->dp[15]); + /* 16 */ + COMBA_FORWARD; + MULADD(at[5], at[23]); MULADD(at[6], at[22]); MULADD(at[7], at[21]); MULADD(at[8], at[20]); MULADD(at[9], at[19]); MULADD(at[10], at[18]); MULADD(at[11], at[17]); + COMBA_STORE(C->dp[16]); + /* 17 */ + COMBA_FORWARD; + MULADD(at[6], at[23]); MULADD(at[7], at[22]); MULADD(at[8], at[21]); MULADD(at[9], at[20]); MULADD(at[10], at[19]); MULADD(at[11], at[18]); + COMBA_STORE(C->dp[17]); + /* 18 */ + COMBA_FORWARD; + MULADD(at[7], at[23]); MULADD(at[8], at[22]); MULADD(at[9], at[21]); MULADD(at[10], at[20]); MULADD(at[11], at[19]); + COMBA_STORE(C->dp[18]); + /* 19 */ + COMBA_FORWARD; + MULADD(at[8], at[23]); MULADD(at[9], at[22]); MULADD(at[10], at[21]); MULADD(at[11], at[20]); + COMBA_STORE(C->dp[19]); + /* 20 */ + COMBA_FORWARD; + MULADD(at[9], at[23]); MULADD(at[10], at[22]); MULADD(at[11], at[21]); + COMBA_STORE(C->dp[20]); + /* 21 */ + COMBA_FORWARD; + MULADD(at[10], at[23]); MULADD(at[11], at[22]); + COMBA_STORE(C->dp[21]); + /* 22 */ + COMBA_FORWARD; + MULADD(at[11], at[23]); + COMBA_STORE(C->dp[22]); + COMBA_STORE2(C->dp[23]); + C->used = 24; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; + break; + + case 13: + XMEMCPY(at, A->dp, 13 * sizeof(fp_digit)); + XMEMCPY(at+13, B->dp, 13 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[13]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[14]); MULADD(at[1], at[13]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[15]); MULADD(at[1], at[14]); MULADD(at[2], at[13]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[16]); MULADD(at[1], at[15]); MULADD(at[2], at[14]); MULADD(at[3], at[13]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[17]); MULADD(at[1], at[16]); MULADD(at[2], at[15]); MULADD(at[3], at[14]); MULADD(at[4], at[13]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[18]); MULADD(at[1], at[17]); MULADD(at[2], at[16]); MULADD(at[3], at[15]); MULADD(at[4], at[14]); MULADD(at[5], at[13]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[19]); MULADD(at[1], at[18]); MULADD(at[2], at[17]); MULADD(at[3], at[16]); MULADD(at[4], at[15]); MULADD(at[5], at[14]); MULADD(at[6], at[13]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[20]); MULADD(at[1], at[19]); MULADD(at[2], at[18]); MULADD(at[3], at[17]); MULADD(at[4], at[16]); MULADD(at[5], at[15]); MULADD(at[6], at[14]); MULADD(at[7], at[13]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[0], at[21]); MULADD(at[1], at[20]); MULADD(at[2], at[19]); MULADD(at[3], at[18]); MULADD(at[4], at[17]); MULADD(at[5], at[16]); MULADD(at[6], at[15]); MULADD(at[7], at[14]); MULADD(at[8], at[13]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[0], at[22]); MULADD(at[1], at[21]); MULADD(at[2], at[20]); MULADD(at[3], at[19]); MULADD(at[4], at[18]); MULADD(at[5], at[17]); MULADD(at[6], at[16]); MULADD(at[7], at[15]); MULADD(at[8], at[14]); MULADD(at[9], at[13]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[0], at[23]); MULADD(at[1], at[22]); MULADD(at[2], at[21]); MULADD(at[3], at[20]); MULADD(at[4], at[19]); MULADD(at[5], at[18]); MULADD(at[6], at[17]); MULADD(at[7], at[16]); MULADD(at[8], at[15]); MULADD(at[9], at[14]); MULADD(at[10], at[13]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[0], at[24]); MULADD(at[1], at[23]); MULADD(at[2], at[22]); MULADD(at[3], at[21]); MULADD(at[4], at[20]); MULADD(at[5], at[19]); MULADD(at[6], at[18]); MULADD(at[7], at[17]); MULADD(at[8], at[16]); MULADD(at[9], at[15]); MULADD(at[10], at[14]); MULADD(at[11], at[13]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[0], at[25]); MULADD(at[1], at[24]); MULADD(at[2], at[23]); MULADD(at[3], at[22]); MULADD(at[4], at[21]); MULADD(at[5], at[20]); MULADD(at[6], at[19]); MULADD(at[7], at[18]); MULADD(at[8], at[17]); MULADD(at[9], at[16]); MULADD(at[10], at[15]); MULADD(at[11], at[14]); MULADD(at[12], at[13]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[1], at[25]); MULADD(at[2], at[24]); MULADD(at[3], at[23]); MULADD(at[4], at[22]); MULADD(at[5], at[21]); MULADD(at[6], at[20]); MULADD(at[7], at[19]); MULADD(at[8], at[18]); MULADD(at[9], at[17]); MULADD(at[10], at[16]); MULADD(at[11], at[15]); MULADD(at[12], at[14]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[2], at[25]); MULADD(at[3], at[24]); MULADD(at[4], at[23]); MULADD(at[5], at[22]); MULADD(at[6], at[21]); MULADD(at[7], at[20]); MULADD(at[8], at[19]); MULADD(at[9], at[18]); MULADD(at[10], at[17]); MULADD(at[11], at[16]); MULADD(at[12], at[15]); + COMBA_STORE(C->dp[14]); + /* 15 */ + COMBA_FORWARD; + MULADD(at[3], at[25]); MULADD(at[4], at[24]); MULADD(at[5], at[23]); MULADD(at[6], at[22]); MULADD(at[7], at[21]); MULADD(at[8], at[20]); MULADD(at[9], at[19]); MULADD(at[10], at[18]); MULADD(at[11], at[17]); MULADD(at[12], at[16]); + COMBA_STORE(C->dp[15]); + /* 16 */ + COMBA_FORWARD; + MULADD(at[4], at[25]); MULADD(at[5], at[24]); MULADD(at[6], at[23]); MULADD(at[7], at[22]); MULADD(at[8], at[21]); MULADD(at[9], at[20]); MULADD(at[10], at[19]); MULADD(at[11], at[18]); MULADD(at[12], at[17]); + COMBA_STORE(C->dp[16]); + /* 17 */ + COMBA_FORWARD; + MULADD(at[5], at[25]); MULADD(at[6], at[24]); MULADD(at[7], at[23]); MULADD(at[8], at[22]); MULADD(at[9], at[21]); MULADD(at[10], at[20]); MULADD(at[11], at[19]); MULADD(at[12], at[18]); + COMBA_STORE(C->dp[17]); + /* 18 */ + COMBA_FORWARD; + MULADD(at[6], at[25]); MULADD(at[7], at[24]); MULADD(at[8], at[23]); MULADD(at[9], at[22]); MULADD(at[10], at[21]); MULADD(at[11], at[20]); MULADD(at[12], at[19]); + COMBA_STORE(C->dp[18]); + /* 19 */ + COMBA_FORWARD; + MULADD(at[7], at[25]); MULADD(at[8], at[24]); MULADD(at[9], at[23]); MULADD(at[10], at[22]); MULADD(at[11], at[21]); MULADD(at[12], at[20]); + COMBA_STORE(C->dp[19]); + /* 20 */ + COMBA_FORWARD; + MULADD(at[8], at[25]); MULADD(at[9], at[24]); MULADD(at[10], at[23]); MULADD(at[11], at[22]); MULADD(at[12], at[21]); + COMBA_STORE(C->dp[20]); + /* 21 */ + COMBA_FORWARD; + MULADD(at[9], at[25]); MULADD(at[10], at[24]); MULADD(at[11], at[23]); MULADD(at[12], at[22]); + COMBA_STORE(C->dp[21]); + /* 22 */ + COMBA_FORWARD; + MULADD(at[10], at[25]); MULADD(at[11], at[24]); MULADD(at[12], at[23]); + COMBA_STORE(C->dp[22]); + /* 23 */ + COMBA_FORWARD; + MULADD(at[11], at[25]); MULADD(at[12], at[24]); + COMBA_STORE(C->dp[23]); + /* 24 */ + COMBA_FORWARD; + MULADD(at[12], at[25]); + COMBA_STORE(C->dp[24]); + COMBA_STORE2(C->dp[25]); + C->used = 26; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; + break; + + case 14: + XMEMCPY(at, A->dp, 14 * sizeof(fp_digit)); + XMEMCPY(at+14, B->dp, 14 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[14]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[15]); MULADD(at[1], at[14]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[16]); MULADD(at[1], at[15]); MULADD(at[2], at[14]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[17]); MULADD(at[1], at[16]); MULADD(at[2], at[15]); MULADD(at[3], at[14]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[18]); MULADD(at[1], at[17]); MULADD(at[2], at[16]); MULADD(at[3], at[15]); MULADD(at[4], at[14]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[19]); MULADD(at[1], at[18]); MULADD(at[2], at[17]); MULADD(at[3], at[16]); MULADD(at[4], at[15]); MULADD(at[5], at[14]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[20]); MULADD(at[1], at[19]); MULADD(at[2], at[18]); MULADD(at[3], at[17]); MULADD(at[4], at[16]); MULADD(at[5], at[15]); MULADD(at[6], at[14]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[21]); MULADD(at[1], at[20]); MULADD(at[2], at[19]); MULADD(at[3], at[18]); MULADD(at[4], at[17]); MULADD(at[5], at[16]); MULADD(at[6], at[15]); MULADD(at[7], at[14]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[0], at[22]); MULADD(at[1], at[21]); MULADD(at[2], at[20]); MULADD(at[3], at[19]); MULADD(at[4], at[18]); MULADD(at[5], at[17]); MULADD(at[6], at[16]); MULADD(at[7], at[15]); MULADD(at[8], at[14]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[0], at[23]); MULADD(at[1], at[22]); MULADD(at[2], at[21]); MULADD(at[3], at[20]); MULADD(at[4], at[19]); MULADD(at[5], at[18]); MULADD(at[6], at[17]); MULADD(at[7], at[16]); MULADD(at[8], at[15]); MULADD(at[9], at[14]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[0], at[24]); MULADD(at[1], at[23]); MULADD(at[2], at[22]); MULADD(at[3], at[21]); MULADD(at[4], at[20]); MULADD(at[5], at[19]); MULADD(at[6], at[18]); MULADD(at[7], at[17]); MULADD(at[8], at[16]); MULADD(at[9], at[15]); MULADD(at[10], at[14]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[0], at[25]); MULADD(at[1], at[24]); MULADD(at[2], at[23]); MULADD(at[3], at[22]); MULADD(at[4], at[21]); MULADD(at[5], at[20]); MULADD(at[6], at[19]); MULADD(at[7], at[18]); MULADD(at[8], at[17]); MULADD(at[9], at[16]); MULADD(at[10], at[15]); MULADD(at[11], at[14]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[0], at[26]); MULADD(at[1], at[25]); MULADD(at[2], at[24]); MULADD(at[3], at[23]); MULADD(at[4], at[22]); MULADD(at[5], at[21]); MULADD(at[6], at[20]); MULADD(at[7], at[19]); MULADD(at[8], at[18]); MULADD(at[9], at[17]); MULADD(at[10], at[16]); MULADD(at[11], at[15]); MULADD(at[12], at[14]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[0], at[27]); MULADD(at[1], at[26]); MULADD(at[2], at[25]); MULADD(at[3], at[24]); MULADD(at[4], at[23]); MULADD(at[5], at[22]); MULADD(at[6], at[21]); MULADD(at[7], at[20]); MULADD(at[8], at[19]); MULADD(at[9], at[18]); MULADD(at[10], at[17]); MULADD(at[11], at[16]); MULADD(at[12], at[15]); MULADD(at[13], at[14]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[1], at[27]); MULADD(at[2], at[26]); MULADD(at[3], at[25]); MULADD(at[4], at[24]); MULADD(at[5], at[23]); MULADD(at[6], at[22]); MULADD(at[7], at[21]); MULADD(at[8], at[20]); MULADD(at[9], at[19]); MULADD(at[10], at[18]); MULADD(at[11], at[17]); MULADD(at[12], at[16]); MULADD(at[13], at[15]); + COMBA_STORE(C->dp[14]); + /* 15 */ + COMBA_FORWARD; + MULADD(at[2], at[27]); MULADD(at[3], at[26]); MULADD(at[4], at[25]); MULADD(at[5], at[24]); MULADD(at[6], at[23]); MULADD(at[7], at[22]); MULADD(at[8], at[21]); MULADD(at[9], at[20]); MULADD(at[10], at[19]); MULADD(at[11], at[18]); MULADD(at[12], at[17]); MULADD(at[13], at[16]); + COMBA_STORE(C->dp[15]); + /* 16 */ + COMBA_FORWARD; + MULADD(at[3], at[27]); MULADD(at[4], at[26]); MULADD(at[5], at[25]); MULADD(at[6], at[24]); MULADD(at[7], at[23]); MULADD(at[8], at[22]); MULADD(at[9], at[21]); MULADD(at[10], at[20]); MULADD(at[11], at[19]); MULADD(at[12], at[18]); MULADD(at[13], at[17]); + COMBA_STORE(C->dp[16]); + /* 17 */ + COMBA_FORWARD; + MULADD(at[4], at[27]); MULADD(at[5], at[26]); MULADD(at[6], at[25]); MULADD(at[7], at[24]); MULADD(at[8], at[23]); MULADD(at[9], at[22]); MULADD(at[10], at[21]); MULADD(at[11], at[20]); MULADD(at[12], at[19]); MULADD(at[13], at[18]); + COMBA_STORE(C->dp[17]); + /* 18 */ + COMBA_FORWARD; + MULADD(at[5], at[27]); MULADD(at[6], at[26]); MULADD(at[7], at[25]); MULADD(at[8], at[24]); MULADD(at[9], at[23]); MULADD(at[10], at[22]); MULADD(at[11], at[21]); MULADD(at[12], at[20]); MULADD(at[13], at[19]); + COMBA_STORE(C->dp[18]); + /* 19 */ + COMBA_FORWARD; + MULADD(at[6], at[27]); MULADD(at[7], at[26]); MULADD(at[8], at[25]); MULADD(at[9], at[24]); MULADD(at[10], at[23]); MULADD(at[11], at[22]); MULADD(at[12], at[21]); MULADD(at[13], at[20]); + COMBA_STORE(C->dp[19]); + /* 20 */ + COMBA_FORWARD; + MULADD(at[7], at[27]); MULADD(at[8], at[26]); MULADD(at[9], at[25]); MULADD(at[10], at[24]); MULADD(at[11], at[23]); MULADD(at[12], at[22]); MULADD(at[13], at[21]); + COMBA_STORE(C->dp[20]); + /* 21 */ + COMBA_FORWARD; + MULADD(at[8], at[27]); MULADD(at[9], at[26]); MULADD(at[10], at[25]); MULADD(at[11], at[24]); MULADD(at[12], at[23]); MULADD(at[13], at[22]); + COMBA_STORE(C->dp[21]); + /* 22 */ + COMBA_FORWARD; + MULADD(at[9], at[27]); MULADD(at[10], at[26]); MULADD(at[11], at[25]); MULADD(at[12], at[24]); MULADD(at[13], at[23]); + COMBA_STORE(C->dp[22]); + /* 23 */ + COMBA_FORWARD; + MULADD(at[10], at[27]); MULADD(at[11], at[26]); MULADD(at[12], at[25]); MULADD(at[13], at[24]); + COMBA_STORE(C->dp[23]); + /* 24 */ + COMBA_FORWARD; + MULADD(at[11], at[27]); MULADD(at[12], at[26]); MULADD(at[13], at[25]); + COMBA_STORE(C->dp[24]); + /* 25 */ + COMBA_FORWARD; + MULADD(at[12], at[27]); MULADD(at[13], at[26]); + COMBA_STORE(C->dp[25]); + /* 26 */ + COMBA_FORWARD; + MULADD(at[13], at[27]); + COMBA_STORE(C->dp[26]); + COMBA_STORE2(C->dp[27]); + C->used = 28; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; + break; + + case 15: + XMEMCPY(at, A->dp, 15 * sizeof(fp_digit)); + XMEMCPY(at+15, B->dp, 15 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[15]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[16]); MULADD(at[1], at[15]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[17]); MULADD(at[1], at[16]); MULADD(at[2], at[15]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[18]); MULADD(at[1], at[17]); MULADD(at[2], at[16]); MULADD(at[3], at[15]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[19]); MULADD(at[1], at[18]); MULADD(at[2], at[17]); MULADD(at[3], at[16]); MULADD(at[4], at[15]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[20]); MULADD(at[1], at[19]); MULADD(at[2], at[18]); MULADD(at[3], at[17]); MULADD(at[4], at[16]); MULADD(at[5], at[15]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[21]); MULADD(at[1], at[20]); MULADD(at[2], at[19]); MULADD(at[3], at[18]); MULADD(at[4], at[17]); MULADD(at[5], at[16]); MULADD(at[6], at[15]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[22]); MULADD(at[1], at[21]); MULADD(at[2], at[20]); MULADD(at[3], at[19]); MULADD(at[4], at[18]); MULADD(at[5], at[17]); MULADD(at[6], at[16]); MULADD(at[7], at[15]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[0], at[23]); MULADD(at[1], at[22]); MULADD(at[2], at[21]); MULADD(at[3], at[20]); MULADD(at[4], at[19]); MULADD(at[5], at[18]); MULADD(at[6], at[17]); MULADD(at[7], at[16]); MULADD(at[8], at[15]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[0], at[24]); MULADD(at[1], at[23]); MULADD(at[2], at[22]); MULADD(at[3], at[21]); MULADD(at[4], at[20]); MULADD(at[5], at[19]); MULADD(at[6], at[18]); MULADD(at[7], at[17]); MULADD(at[8], at[16]); MULADD(at[9], at[15]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[0], at[25]); MULADD(at[1], at[24]); MULADD(at[2], at[23]); MULADD(at[3], at[22]); MULADD(at[4], at[21]); MULADD(at[5], at[20]); MULADD(at[6], at[19]); MULADD(at[7], at[18]); MULADD(at[8], at[17]); MULADD(at[9], at[16]); MULADD(at[10], at[15]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[0], at[26]); MULADD(at[1], at[25]); MULADD(at[2], at[24]); MULADD(at[3], at[23]); MULADD(at[4], at[22]); MULADD(at[5], at[21]); MULADD(at[6], at[20]); MULADD(at[7], at[19]); MULADD(at[8], at[18]); MULADD(at[9], at[17]); MULADD(at[10], at[16]); MULADD(at[11], at[15]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[0], at[27]); MULADD(at[1], at[26]); MULADD(at[2], at[25]); MULADD(at[3], at[24]); MULADD(at[4], at[23]); MULADD(at[5], at[22]); MULADD(at[6], at[21]); MULADD(at[7], at[20]); MULADD(at[8], at[19]); MULADD(at[9], at[18]); MULADD(at[10], at[17]); MULADD(at[11], at[16]); MULADD(at[12], at[15]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[0], at[28]); MULADD(at[1], at[27]); MULADD(at[2], at[26]); MULADD(at[3], at[25]); MULADD(at[4], at[24]); MULADD(at[5], at[23]); MULADD(at[6], at[22]); MULADD(at[7], at[21]); MULADD(at[8], at[20]); MULADD(at[9], at[19]); MULADD(at[10], at[18]); MULADD(at[11], at[17]); MULADD(at[12], at[16]); MULADD(at[13], at[15]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[0], at[29]); MULADD(at[1], at[28]); MULADD(at[2], at[27]); MULADD(at[3], at[26]); MULADD(at[4], at[25]); MULADD(at[5], at[24]); MULADD(at[6], at[23]); MULADD(at[7], at[22]); MULADD(at[8], at[21]); MULADD(at[9], at[20]); MULADD(at[10], at[19]); MULADD(at[11], at[18]); MULADD(at[12], at[17]); MULADD(at[13], at[16]); MULADD(at[14], at[15]); + COMBA_STORE(C->dp[14]); + /* 15 */ + COMBA_FORWARD; + MULADD(at[1], at[29]); MULADD(at[2], at[28]); MULADD(at[3], at[27]); MULADD(at[4], at[26]); MULADD(at[5], at[25]); MULADD(at[6], at[24]); MULADD(at[7], at[23]); MULADD(at[8], at[22]); MULADD(at[9], at[21]); MULADD(at[10], at[20]); MULADD(at[11], at[19]); MULADD(at[12], at[18]); MULADD(at[13], at[17]); MULADD(at[14], at[16]); + COMBA_STORE(C->dp[15]); + /* 16 */ + COMBA_FORWARD; + MULADD(at[2], at[29]); MULADD(at[3], at[28]); MULADD(at[4], at[27]); MULADD(at[5], at[26]); MULADD(at[6], at[25]); MULADD(at[7], at[24]); MULADD(at[8], at[23]); MULADD(at[9], at[22]); MULADD(at[10], at[21]); MULADD(at[11], at[20]); MULADD(at[12], at[19]); MULADD(at[13], at[18]); MULADD(at[14], at[17]); + COMBA_STORE(C->dp[16]); + /* 17 */ + COMBA_FORWARD; + MULADD(at[3], at[29]); MULADD(at[4], at[28]); MULADD(at[5], at[27]); MULADD(at[6], at[26]); MULADD(at[7], at[25]); MULADD(at[8], at[24]); MULADD(at[9], at[23]); MULADD(at[10], at[22]); MULADD(at[11], at[21]); MULADD(at[12], at[20]); MULADD(at[13], at[19]); MULADD(at[14], at[18]); + COMBA_STORE(C->dp[17]); + /* 18 */ + COMBA_FORWARD; + MULADD(at[4], at[29]); MULADD(at[5], at[28]); MULADD(at[6], at[27]); MULADD(at[7], at[26]); MULADD(at[8], at[25]); MULADD(at[9], at[24]); MULADD(at[10], at[23]); MULADD(at[11], at[22]); MULADD(at[12], at[21]); MULADD(at[13], at[20]); MULADD(at[14], at[19]); + COMBA_STORE(C->dp[18]); + /* 19 */ + COMBA_FORWARD; + MULADD(at[5], at[29]); MULADD(at[6], at[28]); MULADD(at[7], at[27]); MULADD(at[8], at[26]); MULADD(at[9], at[25]); MULADD(at[10], at[24]); MULADD(at[11], at[23]); MULADD(at[12], at[22]); MULADD(at[13], at[21]); MULADD(at[14], at[20]); + COMBA_STORE(C->dp[19]); + /* 20 */ + COMBA_FORWARD; + MULADD(at[6], at[29]); MULADD(at[7], at[28]); MULADD(at[8], at[27]); MULADD(at[9], at[26]); MULADD(at[10], at[25]); MULADD(at[11], at[24]); MULADD(at[12], at[23]); MULADD(at[13], at[22]); MULADD(at[14], at[21]); + COMBA_STORE(C->dp[20]); + /* 21 */ + COMBA_FORWARD; + MULADD(at[7], at[29]); MULADD(at[8], at[28]); MULADD(at[9], at[27]); MULADD(at[10], at[26]); MULADD(at[11], at[25]); MULADD(at[12], at[24]); MULADD(at[13], at[23]); MULADD(at[14], at[22]); + COMBA_STORE(C->dp[21]); + /* 22 */ + COMBA_FORWARD; + MULADD(at[8], at[29]); MULADD(at[9], at[28]); MULADD(at[10], at[27]); MULADD(at[11], at[26]); MULADD(at[12], at[25]); MULADD(at[13], at[24]); MULADD(at[14], at[23]); + COMBA_STORE(C->dp[22]); + /* 23 */ + COMBA_FORWARD; + MULADD(at[9], at[29]); MULADD(at[10], at[28]); MULADD(at[11], at[27]); MULADD(at[12], at[26]); MULADD(at[13], at[25]); MULADD(at[14], at[24]); + COMBA_STORE(C->dp[23]); + /* 24 */ + COMBA_FORWARD; + MULADD(at[10], at[29]); MULADD(at[11], at[28]); MULADD(at[12], at[27]); MULADD(at[13], at[26]); MULADD(at[14], at[25]); + COMBA_STORE(C->dp[24]); + /* 25 */ + COMBA_FORWARD; + MULADD(at[11], at[29]); MULADD(at[12], at[28]); MULADD(at[13], at[27]); MULADD(at[14], at[26]); + COMBA_STORE(C->dp[25]); + /* 26 */ + COMBA_FORWARD; + MULADD(at[12], at[29]); MULADD(at[13], at[28]); MULADD(at[14], at[27]); + COMBA_STORE(C->dp[26]); + /* 27 */ + COMBA_FORWARD; + MULADD(at[13], at[29]); MULADD(at[14], at[28]); + COMBA_STORE(C->dp[27]); + /* 28 */ + COMBA_FORWARD; + MULADD(at[14], at[29]); + COMBA_STORE(C->dp[28]); + COMBA_STORE2(C->dp[29]); + C->used = 30; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; + break; + + case 16: + XMEMCPY(at, A->dp, 16 * sizeof(fp_digit)); + XMEMCPY(at+16, B->dp, 16 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[16]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[17]); MULADD(at[1], at[16]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[18]); MULADD(at[1], at[17]); MULADD(at[2], at[16]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[19]); MULADD(at[1], at[18]); MULADD(at[2], at[17]); MULADD(at[3], at[16]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[20]); MULADD(at[1], at[19]); MULADD(at[2], at[18]); MULADD(at[3], at[17]); MULADD(at[4], at[16]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[21]); MULADD(at[1], at[20]); MULADD(at[2], at[19]); MULADD(at[3], at[18]); MULADD(at[4], at[17]); MULADD(at[5], at[16]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[22]); MULADD(at[1], at[21]); MULADD(at[2], at[20]); MULADD(at[3], at[19]); MULADD(at[4], at[18]); MULADD(at[5], at[17]); MULADD(at[6], at[16]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[23]); MULADD(at[1], at[22]); MULADD(at[2], at[21]); MULADD(at[3], at[20]); MULADD(at[4], at[19]); MULADD(at[5], at[18]); MULADD(at[6], at[17]); MULADD(at[7], at[16]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[0], at[24]); MULADD(at[1], at[23]); MULADD(at[2], at[22]); MULADD(at[3], at[21]); MULADD(at[4], at[20]); MULADD(at[5], at[19]); MULADD(at[6], at[18]); MULADD(at[7], at[17]); MULADD(at[8], at[16]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[0], at[25]); MULADD(at[1], at[24]); MULADD(at[2], at[23]); MULADD(at[3], at[22]); MULADD(at[4], at[21]); MULADD(at[5], at[20]); MULADD(at[6], at[19]); MULADD(at[7], at[18]); MULADD(at[8], at[17]); MULADD(at[9], at[16]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[0], at[26]); MULADD(at[1], at[25]); MULADD(at[2], at[24]); MULADD(at[3], at[23]); MULADD(at[4], at[22]); MULADD(at[5], at[21]); MULADD(at[6], at[20]); MULADD(at[7], at[19]); MULADD(at[8], at[18]); MULADD(at[9], at[17]); MULADD(at[10], at[16]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[0], at[27]); MULADD(at[1], at[26]); MULADD(at[2], at[25]); MULADD(at[3], at[24]); MULADD(at[4], at[23]); MULADD(at[5], at[22]); MULADD(at[6], at[21]); MULADD(at[7], at[20]); MULADD(at[8], at[19]); MULADD(at[9], at[18]); MULADD(at[10], at[17]); MULADD(at[11], at[16]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[0], at[28]); MULADD(at[1], at[27]); MULADD(at[2], at[26]); MULADD(at[3], at[25]); MULADD(at[4], at[24]); MULADD(at[5], at[23]); MULADD(at[6], at[22]); MULADD(at[7], at[21]); MULADD(at[8], at[20]); MULADD(at[9], at[19]); MULADD(at[10], at[18]); MULADD(at[11], at[17]); MULADD(at[12], at[16]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[0], at[29]); MULADD(at[1], at[28]); MULADD(at[2], at[27]); MULADD(at[3], at[26]); MULADD(at[4], at[25]); MULADD(at[5], at[24]); MULADD(at[6], at[23]); MULADD(at[7], at[22]); MULADD(at[8], at[21]); MULADD(at[9], at[20]); MULADD(at[10], at[19]); MULADD(at[11], at[18]); MULADD(at[12], at[17]); MULADD(at[13], at[16]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[0], at[30]); MULADD(at[1], at[29]); MULADD(at[2], at[28]); MULADD(at[3], at[27]); MULADD(at[4], at[26]); MULADD(at[5], at[25]); MULADD(at[6], at[24]); MULADD(at[7], at[23]); MULADD(at[8], at[22]); MULADD(at[9], at[21]); MULADD(at[10], at[20]); MULADD(at[11], at[19]); MULADD(at[12], at[18]); MULADD(at[13], at[17]); MULADD(at[14], at[16]); + COMBA_STORE(C->dp[14]); + /* 15 */ + COMBA_FORWARD; + MULADD(at[0], at[31]); MULADD(at[1], at[30]); MULADD(at[2], at[29]); MULADD(at[3], at[28]); MULADD(at[4], at[27]); MULADD(at[5], at[26]); MULADD(at[6], at[25]); MULADD(at[7], at[24]); MULADD(at[8], at[23]); MULADD(at[9], at[22]); MULADD(at[10], at[21]); MULADD(at[11], at[20]); MULADD(at[12], at[19]); MULADD(at[13], at[18]); MULADD(at[14], at[17]); MULADD(at[15], at[16]); + COMBA_STORE(C->dp[15]); + /* 16 */ + COMBA_FORWARD; + MULADD(at[1], at[31]); MULADD(at[2], at[30]); MULADD(at[3], at[29]); MULADD(at[4], at[28]); MULADD(at[5], at[27]); MULADD(at[6], at[26]); MULADD(at[7], at[25]); MULADD(at[8], at[24]); MULADD(at[9], at[23]); MULADD(at[10], at[22]); MULADD(at[11], at[21]); MULADD(at[12], at[20]); MULADD(at[13], at[19]); MULADD(at[14], at[18]); MULADD(at[15], at[17]); + COMBA_STORE(C->dp[16]); + /* 17 */ + COMBA_FORWARD; + MULADD(at[2], at[31]); MULADD(at[3], at[30]); MULADD(at[4], at[29]); MULADD(at[5], at[28]); MULADD(at[6], at[27]); MULADD(at[7], at[26]); MULADD(at[8], at[25]); MULADD(at[9], at[24]); MULADD(at[10], at[23]); MULADD(at[11], at[22]); MULADD(at[12], at[21]); MULADD(at[13], at[20]); MULADD(at[14], at[19]); MULADD(at[15], at[18]); + COMBA_STORE(C->dp[17]); + /* 18 */ + COMBA_FORWARD; + MULADD(at[3], at[31]); MULADD(at[4], at[30]); MULADD(at[5], at[29]); MULADD(at[6], at[28]); MULADD(at[7], at[27]); MULADD(at[8], at[26]); MULADD(at[9], at[25]); MULADD(at[10], at[24]); MULADD(at[11], at[23]); MULADD(at[12], at[22]); MULADD(at[13], at[21]); MULADD(at[14], at[20]); MULADD(at[15], at[19]); + COMBA_STORE(C->dp[18]); + /* 19 */ + COMBA_FORWARD; + MULADD(at[4], at[31]); MULADD(at[5], at[30]); MULADD(at[6], at[29]); MULADD(at[7], at[28]); MULADD(at[8], at[27]); MULADD(at[9], at[26]); MULADD(at[10], at[25]); MULADD(at[11], at[24]); MULADD(at[12], at[23]); MULADD(at[13], at[22]); MULADD(at[14], at[21]); MULADD(at[15], at[20]); + COMBA_STORE(C->dp[19]); + /* 20 */ + COMBA_FORWARD; + MULADD(at[5], at[31]); MULADD(at[6], at[30]); MULADD(at[7], at[29]); MULADD(at[8], at[28]); MULADD(at[9], at[27]); MULADD(at[10], at[26]); MULADD(at[11], at[25]); MULADD(at[12], at[24]); MULADD(at[13], at[23]); MULADD(at[14], at[22]); MULADD(at[15], at[21]); + COMBA_STORE(C->dp[20]); + /* 21 */ + COMBA_FORWARD; + MULADD(at[6], at[31]); MULADD(at[7], at[30]); MULADD(at[8], at[29]); MULADD(at[9], at[28]); MULADD(at[10], at[27]); MULADD(at[11], at[26]); MULADD(at[12], at[25]); MULADD(at[13], at[24]); MULADD(at[14], at[23]); MULADD(at[15], at[22]); + COMBA_STORE(C->dp[21]); + /* 22 */ + COMBA_FORWARD; + MULADD(at[7], at[31]); MULADD(at[8], at[30]); MULADD(at[9], at[29]); MULADD(at[10], at[28]); MULADD(at[11], at[27]); MULADD(at[12], at[26]); MULADD(at[13], at[25]); MULADD(at[14], at[24]); MULADD(at[15], at[23]); + COMBA_STORE(C->dp[22]); + /* 23 */ + COMBA_FORWARD; + MULADD(at[8], at[31]); MULADD(at[9], at[30]); MULADD(at[10], at[29]); MULADD(at[11], at[28]); MULADD(at[12], at[27]); MULADD(at[13], at[26]); MULADD(at[14], at[25]); MULADD(at[15], at[24]); + COMBA_STORE(C->dp[23]); + /* 24 */ + COMBA_FORWARD; + MULADD(at[9], at[31]); MULADD(at[10], at[30]); MULADD(at[11], at[29]); MULADD(at[12], at[28]); MULADD(at[13], at[27]); MULADD(at[14], at[26]); MULADD(at[15], at[25]); + COMBA_STORE(C->dp[24]); + /* 25 */ + COMBA_FORWARD; + MULADD(at[10], at[31]); MULADD(at[11], at[30]); MULADD(at[12], at[29]); MULADD(at[13], at[28]); MULADD(at[14], at[27]); MULADD(at[15], at[26]); + COMBA_STORE(C->dp[25]); + /* 26 */ + COMBA_FORWARD; + MULADD(at[11], at[31]); MULADD(at[12], at[30]); MULADD(at[13], at[29]); MULADD(at[14], at[28]); MULADD(at[15], at[27]); + COMBA_STORE(C->dp[26]); + /* 27 */ + COMBA_FORWARD; + MULADD(at[12], at[31]); MULADD(at[13], at[30]); MULADD(at[14], at[29]); MULADD(at[15], at[28]); + COMBA_STORE(C->dp[27]); + /* 28 */ + COMBA_FORWARD; + MULADD(at[13], at[31]); MULADD(at[14], at[30]); MULADD(at[15], at[29]); + COMBA_STORE(C->dp[28]); + /* 29 */ + COMBA_FORWARD; + MULADD(at[14], at[31]); MULADD(at[15], at[30]); + COMBA_STORE(C->dp[29]); + /* 30 */ + COMBA_FORWARD; + MULADD(at[15], at[31]); + COMBA_STORE(C->dp[30]); + COMBA_STORE2(C->dp[31]); + C->used = 32; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; + break; + + default: + break; + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(at, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return FP_OKAY; +} + +#endif diff --git a/client/wolfssl/wolfcrypt/src/fp_sqr_comba_12.i b/client/wolfssl/wolfcrypt/src/fp_sqr_comba_12.i new file mode 100644 index 0000000..cded4b1 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fp_sqr_comba_12.i @@ -0,0 +1,177 @@ +/* fp_sqr_comba_12.i + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef TFM_SQR12 +int fp_sqr_comba12(fp_int *A, fp_int *B) +{ + fp_digit *a, c0, c1, c2, sc0 = 0, sc1 = 0, sc2 = 0; +#ifdef TFM_ISO + fp_word tt; +#endif +#ifndef WOLFSSL_SMALL_STACK + fp_digit b[24]; +#else + fp_digit *b; +#endif + +#ifdef WOLFSSL_SMALL_STACK + b = (fp_digit*)XMALLOC(sizeof(fp_digit) * 24, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (b == NULL) + return FP_MEM; +#endif + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADDSC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADDSC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADDSC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + + /* output 15 */ + CARRY_FORWARD; + SQRADDSC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; + COMBA_STORE(b[15]); + + /* output 16 */ + CARRY_FORWARD; + SQRADDSC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); + COMBA_STORE(b[16]); + + /* output 17 */ + CARRY_FORWARD; + SQRADDSC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; + COMBA_STORE(b[17]); + + /* output 18 */ + CARRY_FORWARD; + SQRADD2(a[7], a[11]); SQRADD2(a[8], a[10]); SQRADD(a[9], a[9]); + COMBA_STORE(b[18]); + + /* output 19 */ + CARRY_FORWARD; + SQRADD2(a[8], a[11]); SQRADD2(a[9], a[10]); + COMBA_STORE(b[19]); + + /* output 20 */ + CARRY_FORWARD; + SQRADD2(a[9], a[11]); SQRADD(a[10], a[10]); + COMBA_STORE(b[20]); + + /* output 21 */ + CARRY_FORWARD; + SQRADD2(a[10], a[11]); + COMBA_STORE(b[21]); + + /* output 22 */ + CARRY_FORWARD; + SQRADD(a[11], a[11]); + COMBA_STORE(b[22]); + COMBA_STORE2(b[23]); + COMBA_FINI; + + B->used = 24; + B->sign = FP_ZPOS; + XMEMCPY(B->dp, b, 24 * sizeof(fp_digit)); + fp_clamp(B); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return FP_OKAY; +} +#endif + + diff --git a/client/wolfssl/wolfcrypt/src/fp_sqr_comba_17.i b/client/wolfssl/wolfcrypt/src/fp_sqr_comba_17.i new file mode 100644 index 0000000..d2418d9 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fp_sqr_comba_17.i @@ -0,0 +1,227 @@ +/* fp_sqr_comba_17.i + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef TFM_SQR17 +int fp_sqr_comba17(fp_int *A, fp_int *B) +{ + fp_digit *a, c0, c1, c2, sc0 = 0, sc1 = 0, sc2 = 0; +#ifdef TFM_ISO + fp_word tt; +#endif +#ifndef WOLFSSL_SMALL_STACK + fp_digit b[34]; +#else + fp_digit *b; +#endif + +#ifdef WOLFSSL_SMALL_STACK + b = (fp_digit*)XMALLOC(sizeof(fp_digit) * 34, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (b == NULL) + return FP_MEM; +#endif + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + + /* output 15 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; + COMBA_STORE(b[15]); + + /* output 16 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[16]); SQRADDAC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); + COMBA_STORE(b[16]); + + /* output 17 */ + CARRY_FORWARD; + SQRADDSC(a[1], a[16]); SQRADDAC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; + COMBA_STORE(b[17]); + + /* output 18 */ + CARRY_FORWARD; + SQRADDSC(a[2], a[16]); SQRADDAC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]); + COMBA_STORE(b[18]); + + /* output 19 */ + CARRY_FORWARD; + SQRADDSC(a[3], a[16]); SQRADDAC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB; + COMBA_STORE(b[19]); + + /* output 20 */ + CARRY_FORWARD; + SQRADDSC(a[4], a[16]); SQRADDAC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]); + COMBA_STORE(b[20]); + + /* output 21 */ + CARRY_FORWARD; + SQRADDSC(a[5], a[16]); SQRADDAC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB; + COMBA_STORE(b[21]); + + /* output 22 */ + CARRY_FORWARD; + SQRADDSC(a[6], a[16]); SQRADDAC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]); + COMBA_STORE(b[22]); + + /* output 23 */ + CARRY_FORWARD; + SQRADDSC(a[7], a[16]); SQRADDAC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB; + COMBA_STORE(b[23]); + + /* output 24 */ + CARRY_FORWARD; + SQRADDSC(a[8], a[16]); SQRADDAC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]); + COMBA_STORE(b[24]); + + /* output 25 */ + CARRY_FORWARD; + SQRADDSC(a[9], a[16]); SQRADDAC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB; + COMBA_STORE(b[25]); + + /* output 26 */ + CARRY_FORWARD; + SQRADDSC(a[10], a[16]); SQRADDAC(a[11], a[15]); SQRADDAC(a[12], a[14]); SQRADDDB; SQRADD(a[13], a[13]); + COMBA_STORE(b[26]); + + /* output 27 */ + CARRY_FORWARD; + SQRADDSC(a[11], a[16]); SQRADDAC(a[12], a[15]); SQRADDAC(a[13], a[14]); SQRADDDB; + COMBA_STORE(b[27]); + + /* output 28 */ + CARRY_FORWARD; + SQRADD2(a[12], a[16]); SQRADD2(a[13], a[15]); SQRADD(a[14], a[14]); + COMBA_STORE(b[28]); + + /* output 29 */ + CARRY_FORWARD; + SQRADD2(a[13], a[16]); SQRADD2(a[14], a[15]); + COMBA_STORE(b[29]); + + /* output 30 */ + CARRY_FORWARD; + SQRADD2(a[14], a[16]); SQRADD(a[15], a[15]); + COMBA_STORE(b[30]); + + /* output 31 */ + CARRY_FORWARD; + SQRADD2(a[15], a[16]); + COMBA_STORE(b[31]); + + /* output 32 */ + CARRY_FORWARD; + SQRADD(a[16], a[16]); + COMBA_STORE(b[32]); + COMBA_STORE2(b[33]); + COMBA_FINI; + + B->used = 34; + B->sign = FP_ZPOS; + XMEMCPY(B->dp, b, 34 * sizeof(fp_digit)); + fp_clamp(B); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return FP_OKAY; +} +#endif + + diff --git a/client/wolfssl/wolfcrypt/src/fp_sqr_comba_20.i b/client/wolfssl/wolfcrypt/src/fp_sqr_comba_20.i new file mode 100644 index 0000000..78fd3fd --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fp_sqr_comba_20.i @@ -0,0 +1,257 @@ +/* fp_sqr_comba_20.i + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef TFM_SQR20 +int fp_sqr_comba20(fp_int *A, fp_int *B) +{ + fp_digit *a, c0, c1, c2, sc0 = 0, sc1 = 0, sc2 = 0; +#ifdef TFM_ISO + fp_word tt; +#endif +#ifndef WOLFSSL_SMALL_STACK + fp_digit b[40]; +#else + fp_digit *b; +#endif + +#ifdef WOLFSSL_SMALL_STACK + b = (fp_digit*)XMALLOC(sizeof(fp_digit) * 40, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (b == NULL) + return FP_MEM; +#endif + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + + /* output 15 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; + COMBA_STORE(b[15]); + + /* output 16 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[16]); SQRADDAC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); + COMBA_STORE(b[16]); + + /* output 17 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[17]); SQRADDAC(a[1], a[16]); SQRADDAC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; + COMBA_STORE(b[17]); + + /* output 18 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[18]); SQRADDAC(a[1], a[17]); SQRADDAC(a[2], a[16]); SQRADDAC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]); + COMBA_STORE(b[18]); + + /* output 19 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[19]); SQRADDAC(a[1], a[18]); SQRADDAC(a[2], a[17]); SQRADDAC(a[3], a[16]); SQRADDAC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB; + COMBA_STORE(b[19]); + + /* output 20 */ + CARRY_FORWARD; + SQRADDSC(a[1], a[19]); SQRADDAC(a[2], a[18]); SQRADDAC(a[3], a[17]); SQRADDAC(a[4], a[16]); SQRADDAC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]); + COMBA_STORE(b[20]); + + /* output 21 */ + CARRY_FORWARD; + SQRADDSC(a[2], a[19]); SQRADDAC(a[3], a[18]); SQRADDAC(a[4], a[17]); SQRADDAC(a[5], a[16]); SQRADDAC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB; + COMBA_STORE(b[21]); + + /* output 22 */ + CARRY_FORWARD; + SQRADDSC(a[3], a[19]); SQRADDAC(a[4], a[18]); SQRADDAC(a[5], a[17]); SQRADDAC(a[6], a[16]); SQRADDAC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]); + COMBA_STORE(b[22]); + + /* output 23 */ + CARRY_FORWARD; + SQRADDSC(a[4], a[19]); SQRADDAC(a[5], a[18]); SQRADDAC(a[6], a[17]); SQRADDAC(a[7], a[16]); SQRADDAC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB; + COMBA_STORE(b[23]); + + /* output 24 */ + CARRY_FORWARD; + SQRADDSC(a[5], a[19]); SQRADDAC(a[6], a[18]); SQRADDAC(a[7], a[17]); SQRADDAC(a[8], a[16]); SQRADDAC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]); + COMBA_STORE(b[24]); + + /* output 25 */ + CARRY_FORWARD; + SQRADDSC(a[6], a[19]); SQRADDAC(a[7], a[18]); SQRADDAC(a[8], a[17]); SQRADDAC(a[9], a[16]); SQRADDAC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB; + COMBA_STORE(b[25]); + + /* output 26 */ + CARRY_FORWARD; + SQRADDSC(a[7], a[19]); SQRADDAC(a[8], a[18]); SQRADDAC(a[9], a[17]); SQRADDAC(a[10], a[16]); SQRADDAC(a[11], a[15]); SQRADDAC(a[12], a[14]); SQRADDDB; SQRADD(a[13], a[13]); + COMBA_STORE(b[26]); + + /* output 27 */ + CARRY_FORWARD; + SQRADDSC(a[8], a[19]); SQRADDAC(a[9], a[18]); SQRADDAC(a[10], a[17]); SQRADDAC(a[11], a[16]); SQRADDAC(a[12], a[15]); SQRADDAC(a[13], a[14]); SQRADDDB; + COMBA_STORE(b[27]); + + /* output 28 */ + CARRY_FORWARD; + SQRADDSC(a[9], a[19]); SQRADDAC(a[10], a[18]); SQRADDAC(a[11], a[17]); SQRADDAC(a[12], a[16]); SQRADDAC(a[13], a[15]); SQRADDDB; SQRADD(a[14], a[14]); + COMBA_STORE(b[28]); + + /* output 29 */ + CARRY_FORWARD; + SQRADDSC(a[10], a[19]); SQRADDAC(a[11], a[18]); SQRADDAC(a[12], a[17]); SQRADDAC(a[13], a[16]); SQRADDAC(a[14], a[15]); SQRADDDB; + COMBA_STORE(b[29]); + + /* output 30 */ + CARRY_FORWARD; + SQRADDSC(a[11], a[19]); SQRADDAC(a[12], a[18]); SQRADDAC(a[13], a[17]); SQRADDAC(a[14], a[16]); SQRADDDB; SQRADD(a[15], a[15]); + COMBA_STORE(b[30]); + + /* output 31 */ + CARRY_FORWARD; + SQRADDSC(a[12], a[19]); SQRADDAC(a[13], a[18]); SQRADDAC(a[14], a[17]); SQRADDAC(a[15], a[16]); SQRADDDB; + COMBA_STORE(b[31]); + + /* output 32 */ + CARRY_FORWARD; + SQRADDSC(a[13], a[19]); SQRADDAC(a[14], a[18]); SQRADDAC(a[15], a[17]); SQRADDDB; SQRADD(a[16], a[16]); + COMBA_STORE(b[32]); + + /* output 33 */ + CARRY_FORWARD; + SQRADDSC(a[14], a[19]); SQRADDAC(a[15], a[18]); SQRADDAC(a[16], a[17]); SQRADDDB; + COMBA_STORE(b[33]); + + /* output 34 */ + CARRY_FORWARD; + SQRADD2(a[15], a[19]); SQRADD2(a[16], a[18]); SQRADD(a[17], a[17]); + COMBA_STORE(b[34]); + + /* output 35 */ + CARRY_FORWARD; + SQRADD2(a[16], a[19]); SQRADD2(a[17], a[18]); + COMBA_STORE(b[35]); + + /* output 36 */ + CARRY_FORWARD; + SQRADD2(a[17], a[19]); SQRADD(a[18], a[18]); + COMBA_STORE(b[36]); + + /* output 37 */ + CARRY_FORWARD; + SQRADD2(a[18], a[19]); + COMBA_STORE(b[37]); + + /* output 38 */ + CARRY_FORWARD; + SQRADD(a[19], a[19]); + COMBA_STORE(b[38]); + COMBA_STORE2(b[39]); + COMBA_FINI; + + B->used = 40; + B->sign = FP_ZPOS; + XMEMCPY(B->dp, b, 40 * sizeof(fp_digit)); + fp_clamp(B); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return FP_OKAY; +} +#endif + + diff --git a/client/wolfssl/wolfcrypt/src/fp_sqr_comba_24.i b/client/wolfssl/wolfcrypt/src/fp_sqr_comba_24.i new file mode 100644 index 0000000..602b36c --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fp_sqr_comba_24.i @@ -0,0 +1,297 @@ +/* fp_sqr_comba_24.i + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef TFM_SQR24 +int fp_sqr_comba24(fp_int *A, fp_int *B) +{ + fp_digit *a, c0, c1, c2, sc0 = 0, sc1 = 0, sc2 = 0; +#ifdef TFM_ISO + fp_word tt; +#endif +#ifndef WOLFSSL_SMALL_STACK + fp_digit b[48]; +#else + fp_digit *b; +#endif + +#ifdef WOLFSSL_SMALL_STACK + b = (fp_digit*)XMALLOC(sizeof(fp_digit) * 48, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (b == NULL) + return FP_MEM; +#endif + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + + /* output 15 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; + COMBA_STORE(b[15]); + + /* output 16 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[16]); SQRADDAC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); + COMBA_STORE(b[16]); + + /* output 17 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[17]); SQRADDAC(a[1], a[16]); SQRADDAC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; + COMBA_STORE(b[17]); + + /* output 18 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[18]); SQRADDAC(a[1], a[17]); SQRADDAC(a[2], a[16]); SQRADDAC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]); + COMBA_STORE(b[18]); + + /* output 19 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[19]); SQRADDAC(a[1], a[18]); SQRADDAC(a[2], a[17]); SQRADDAC(a[3], a[16]); SQRADDAC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB; + COMBA_STORE(b[19]); + + /* output 20 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[20]); SQRADDAC(a[1], a[19]); SQRADDAC(a[2], a[18]); SQRADDAC(a[3], a[17]); SQRADDAC(a[4], a[16]); SQRADDAC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]); + COMBA_STORE(b[20]); + + /* output 21 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[21]); SQRADDAC(a[1], a[20]); SQRADDAC(a[2], a[19]); SQRADDAC(a[3], a[18]); SQRADDAC(a[4], a[17]); SQRADDAC(a[5], a[16]); SQRADDAC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB; + COMBA_STORE(b[21]); + + /* output 22 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[22]); SQRADDAC(a[1], a[21]); SQRADDAC(a[2], a[20]); SQRADDAC(a[3], a[19]); SQRADDAC(a[4], a[18]); SQRADDAC(a[5], a[17]); SQRADDAC(a[6], a[16]); SQRADDAC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]); + COMBA_STORE(b[22]); + + /* output 23 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[23]); SQRADDAC(a[1], a[22]); SQRADDAC(a[2], a[21]); SQRADDAC(a[3], a[20]); SQRADDAC(a[4], a[19]); SQRADDAC(a[5], a[18]); SQRADDAC(a[6], a[17]); SQRADDAC(a[7], a[16]); SQRADDAC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB; + COMBA_STORE(b[23]); + + /* output 24 */ + CARRY_FORWARD; + SQRADDSC(a[1], a[23]); SQRADDAC(a[2], a[22]); SQRADDAC(a[3], a[21]); SQRADDAC(a[4], a[20]); SQRADDAC(a[5], a[19]); SQRADDAC(a[6], a[18]); SQRADDAC(a[7], a[17]); SQRADDAC(a[8], a[16]); SQRADDAC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]); + COMBA_STORE(b[24]); + + /* output 25 */ + CARRY_FORWARD; + SQRADDSC(a[2], a[23]); SQRADDAC(a[3], a[22]); SQRADDAC(a[4], a[21]); SQRADDAC(a[5], a[20]); SQRADDAC(a[6], a[19]); SQRADDAC(a[7], a[18]); SQRADDAC(a[8], a[17]); SQRADDAC(a[9], a[16]); SQRADDAC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB; + COMBA_STORE(b[25]); + + /* output 26 */ + CARRY_FORWARD; + SQRADDSC(a[3], a[23]); SQRADDAC(a[4], a[22]); SQRADDAC(a[5], a[21]); SQRADDAC(a[6], a[20]); SQRADDAC(a[7], a[19]); SQRADDAC(a[8], a[18]); SQRADDAC(a[9], a[17]); SQRADDAC(a[10], a[16]); SQRADDAC(a[11], a[15]); SQRADDAC(a[12], a[14]); SQRADDDB; SQRADD(a[13], a[13]); + COMBA_STORE(b[26]); + + /* output 27 */ + CARRY_FORWARD; + SQRADDSC(a[4], a[23]); SQRADDAC(a[5], a[22]); SQRADDAC(a[6], a[21]); SQRADDAC(a[7], a[20]); SQRADDAC(a[8], a[19]); SQRADDAC(a[9], a[18]); SQRADDAC(a[10], a[17]); SQRADDAC(a[11], a[16]); SQRADDAC(a[12], a[15]); SQRADDAC(a[13], a[14]); SQRADDDB; + COMBA_STORE(b[27]); + + /* output 28 */ + CARRY_FORWARD; + SQRADDSC(a[5], a[23]); SQRADDAC(a[6], a[22]); SQRADDAC(a[7], a[21]); SQRADDAC(a[8], a[20]); SQRADDAC(a[9], a[19]); SQRADDAC(a[10], a[18]); SQRADDAC(a[11], a[17]); SQRADDAC(a[12], a[16]); SQRADDAC(a[13], a[15]); SQRADDDB; SQRADD(a[14], a[14]); + COMBA_STORE(b[28]); + + /* output 29 */ + CARRY_FORWARD; + SQRADDSC(a[6], a[23]); SQRADDAC(a[7], a[22]); SQRADDAC(a[8], a[21]); SQRADDAC(a[9], a[20]); SQRADDAC(a[10], a[19]); SQRADDAC(a[11], a[18]); SQRADDAC(a[12], a[17]); SQRADDAC(a[13], a[16]); SQRADDAC(a[14], a[15]); SQRADDDB; + COMBA_STORE(b[29]); + + /* output 30 */ + CARRY_FORWARD; + SQRADDSC(a[7], a[23]); SQRADDAC(a[8], a[22]); SQRADDAC(a[9], a[21]); SQRADDAC(a[10], a[20]); SQRADDAC(a[11], a[19]); SQRADDAC(a[12], a[18]); SQRADDAC(a[13], a[17]); SQRADDAC(a[14], a[16]); SQRADDDB; SQRADD(a[15], a[15]); + COMBA_STORE(b[30]); + + /* output 31 */ + CARRY_FORWARD; + SQRADDSC(a[8], a[23]); SQRADDAC(a[9], a[22]); SQRADDAC(a[10], a[21]); SQRADDAC(a[11], a[20]); SQRADDAC(a[12], a[19]); SQRADDAC(a[13], a[18]); SQRADDAC(a[14], a[17]); SQRADDAC(a[15], a[16]); SQRADDDB; + COMBA_STORE(b[31]); + + /* output 32 */ + CARRY_FORWARD; + SQRADDSC(a[9], a[23]); SQRADDAC(a[10], a[22]); SQRADDAC(a[11], a[21]); SQRADDAC(a[12], a[20]); SQRADDAC(a[13], a[19]); SQRADDAC(a[14], a[18]); SQRADDAC(a[15], a[17]); SQRADDDB; SQRADD(a[16], a[16]); + COMBA_STORE(b[32]); + + /* output 33 */ + CARRY_FORWARD; + SQRADDSC(a[10], a[23]); SQRADDAC(a[11], a[22]); SQRADDAC(a[12], a[21]); SQRADDAC(a[13], a[20]); SQRADDAC(a[14], a[19]); SQRADDAC(a[15], a[18]); SQRADDAC(a[16], a[17]); SQRADDDB; + COMBA_STORE(b[33]); + + /* output 34 */ + CARRY_FORWARD; + SQRADDSC(a[11], a[23]); SQRADDAC(a[12], a[22]); SQRADDAC(a[13], a[21]); SQRADDAC(a[14], a[20]); SQRADDAC(a[15], a[19]); SQRADDAC(a[16], a[18]); SQRADDDB; SQRADD(a[17], a[17]); + COMBA_STORE(b[34]); + + /* output 35 */ + CARRY_FORWARD; + SQRADDSC(a[12], a[23]); SQRADDAC(a[13], a[22]); SQRADDAC(a[14], a[21]); SQRADDAC(a[15], a[20]); SQRADDAC(a[16], a[19]); SQRADDAC(a[17], a[18]); SQRADDDB; + COMBA_STORE(b[35]); + + /* output 36 */ + CARRY_FORWARD; + SQRADDSC(a[13], a[23]); SQRADDAC(a[14], a[22]); SQRADDAC(a[15], a[21]); SQRADDAC(a[16], a[20]); SQRADDAC(a[17], a[19]); SQRADDDB; SQRADD(a[18], a[18]); + COMBA_STORE(b[36]); + + /* output 37 */ + CARRY_FORWARD; + SQRADDSC(a[14], a[23]); SQRADDAC(a[15], a[22]); SQRADDAC(a[16], a[21]); SQRADDAC(a[17], a[20]); SQRADDAC(a[18], a[19]); SQRADDDB; + COMBA_STORE(b[37]); + + /* output 38 */ + CARRY_FORWARD; + SQRADDSC(a[15], a[23]); SQRADDAC(a[16], a[22]); SQRADDAC(a[17], a[21]); SQRADDAC(a[18], a[20]); SQRADDDB; SQRADD(a[19], a[19]); + COMBA_STORE(b[38]); + + /* output 39 */ + CARRY_FORWARD; + SQRADDSC(a[16], a[23]); SQRADDAC(a[17], a[22]); SQRADDAC(a[18], a[21]); SQRADDAC(a[19], a[20]); SQRADDDB; + COMBA_STORE(b[39]); + + /* output 40 */ + CARRY_FORWARD; + SQRADDSC(a[17], a[23]); SQRADDAC(a[18], a[22]); SQRADDAC(a[19], a[21]); SQRADDDB; SQRADD(a[20], a[20]); + COMBA_STORE(b[40]); + + /* output 41 */ + CARRY_FORWARD; + SQRADDSC(a[18], a[23]); SQRADDAC(a[19], a[22]); SQRADDAC(a[20], a[21]); SQRADDDB; + COMBA_STORE(b[41]); + + /* output 42 */ + CARRY_FORWARD; + SQRADD2(a[19], a[23]); SQRADD2(a[20], a[22]); SQRADD(a[21], a[21]); + COMBA_STORE(b[42]); + + /* output 43 */ + CARRY_FORWARD; + SQRADD2(a[20], a[23]); SQRADD2(a[21], a[22]); + COMBA_STORE(b[43]); + + /* output 44 */ + CARRY_FORWARD; + SQRADD2(a[21], a[23]); SQRADD(a[22], a[22]); + COMBA_STORE(b[44]); + + /* output 45 */ + CARRY_FORWARD; + SQRADD2(a[22], a[23]); + COMBA_STORE(b[45]); + + /* output 46 */ + CARRY_FORWARD; + SQRADD(a[23], a[23]); + COMBA_STORE(b[46]); + COMBA_STORE2(b[47]); + COMBA_FINI; + + B->used = 48; + B->sign = FP_ZPOS; + XMEMCPY(B->dp, b, 48 * sizeof(fp_digit)); + fp_clamp(B); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return FP_OKAY; +} +#endif + + diff --git a/client/wolfssl/wolfcrypt/src/fp_sqr_comba_28.i b/client/wolfssl/wolfcrypt/src/fp_sqr_comba_28.i new file mode 100644 index 0000000..57c1acc --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fp_sqr_comba_28.i @@ -0,0 +1,337 @@ +/* fp_sqr_comba_28.i + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef TFM_SQR28 +int fp_sqr_comba28(fp_int *A, fp_int *B) +{ + fp_digit *a, c0, c1, c2, sc0 = 0, sc1 = 0, sc2 = 0; +#ifdef TFM_ISO + fp_word tt; +#endif +#ifndef WOLFSSL_SMALL_STACK + fp_digit b[56]; +#else + fp_digit *b; +#endif + +#ifdef WOLFSSL_SMALL_STACK + b = (fp_digit*)XMALLOC(sizeof(fp_digit) * 56, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (b == NULL) + return FP_MEM; +#endif + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + + /* output 15 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; + COMBA_STORE(b[15]); + + /* output 16 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[16]); SQRADDAC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); + COMBA_STORE(b[16]); + + /* output 17 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[17]); SQRADDAC(a[1], a[16]); SQRADDAC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; + COMBA_STORE(b[17]); + + /* output 18 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[18]); SQRADDAC(a[1], a[17]); SQRADDAC(a[2], a[16]); SQRADDAC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]); + COMBA_STORE(b[18]); + + /* output 19 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[19]); SQRADDAC(a[1], a[18]); SQRADDAC(a[2], a[17]); SQRADDAC(a[3], a[16]); SQRADDAC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB; + COMBA_STORE(b[19]); + + /* output 20 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[20]); SQRADDAC(a[1], a[19]); SQRADDAC(a[2], a[18]); SQRADDAC(a[3], a[17]); SQRADDAC(a[4], a[16]); SQRADDAC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]); + COMBA_STORE(b[20]); + + /* output 21 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[21]); SQRADDAC(a[1], a[20]); SQRADDAC(a[2], a[19]); SQRADDAC(a[3], a[18]); SQRADDAC(a[4], a[17]); SQRADDAC(a[5], a[16]); SQRADDAC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB; + COMBA_STORE(b[21]); + + /* output 22 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[22]); SQRADDAC(a[1], a[21]); SQRADDAC(a[2], a[20]); SQRADDAC(a[3], a[19]); SQRADDAC(a[4], a[18]); SQRADDAC(a[5], a[17]); SQRADDAC(a[6], a[16]); SQRADDAC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]); + COMBA_STORE(b[22]); + + /* output 23 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[23]); SQRADDAC(a[1], a[22]); SQRADDAC(a[2], a[21]); SQRADDAC(a[3], a[20]); SQRADDAC(a[4], a[19]); SQRADDAC(a[5], a[18]); SQRADDAC(a[6], a[17]); SQRADDAC(a[7], a[16]); SQRADDAC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB; + COMBA_STORE(b[23]); + + /* output 24 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[24]); SQRADDAC(a[1], a[23]); SQRADDAC(a[2], a[22]); SQRADDAC(a[3], a[21]); SQRADDAC(a[4], a[20]); SQRADDAC(a[5], a[19]); SQRADDAC(a[6], a[18]); SQRADDAC(a[7], a[17]); SQRADDAC(a[8], a[16]); SQRADDAC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]); + COMBA_STORE(b[24]); + + /* output 25 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[25]); SQRADDAC(a[1], a[24]); SQRADDAC(a[2], a[23]); SQRADDAC(a[3], a[22]); SQRADDAC(a[4], a[21]); SQRADDAC(a[5], a[20]); SQRADDAC(a[6], a[19]); SQRADDAC(a[7], a[18]); SQRADDAC(a[8], a[17]); SQRADDAC(a[9], a[16]); SQRADDAC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB; + COMBA_STORE(b[25]); + + /* output 26 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[26]); SQRADDAC(a[1], a[25]); SQRADDAC(a[2], a[24]); SQRADDAC(a[3], a[23]); SQRADDAC(a[4], a[22]); SQRADDAC(a[5], a[21]); SQRADDAC(a[6], a[20]); SQRADDAC(a[7], a[19]); SQRADDAC(a[8], a[18]); SQRADDAC(a[9], a[17]); SQRADDAC(a[10], a[16]); SQRADDAC(a[11], a[15]); SQRADDAC(a[12], a[14]); SQRADDDB; SQRADD(a[13], a[13]); + COMBA_STORE(b[26]); + + /* output 27 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[27]); SQRADDAC(a[1], a[26]); SQRADDAC(a[2], a[25]); SQRADDAC(a[3], a[24]); SQRADDAC(a[4], a[23]); SQRADDAC(a[5], a[22]); SQRADDAC(a[6], a[21]); SQRADDAC(a[7], a[20]); SQRADDAC(a[8], a[19]); SQRADDAC(a[9], a[18]); SQRADDAC(a[10], a[17]); SQRADDAC(a[11], a[16]); SQRADDAC(a[12], a[15]); SQRADDAC(a[13], a[14]); SQRADDDB; + COMBA_STORE(b[27]); + + /* output 28 */ + CARRY_FORWARD; + SQRADDSC(a[1], a[27]); SQRADDAC(a[2], a[26]); SQRADDAC(a[3], a[25]); SQRADDAC(a[4], a[24]); SQRADDAC(a[5], a[23]); SQRADDAC(a[6], a[22]); SQRADDAC(a[7], a[21]); SQRADDAC(a[8], a[20]); SQRADDAC(a[9], a[19]); SQRADDAC(a[10], a[18]); SQRADDAC(a[11], a[17]); SQRADDAC(a[12], a[16]); SQRADDAC(a[13], a[15]); SQRADDDB; SQRADD(a[14], a[14]); + COMBA_STORE(b[28]); + + /* output 29 */ + CARRY_FORWARD; + SQRADDSC(a[2], a[27]); SQRADDAC(a[3], a[26]); SQRADDAC(a[4], a[25]); SQRADDAC(a[5], a[24]); SQRADDAC(a[6], a[23]); SQRADDAC(a[7], a[22]); SQRADDAC(a[8], a[21]); SQRADDAC(a[9], a[20]); SQRADDAC(a[10], a[19]); SQRADDAC(a[11], a[18]); SQRADDAC(a[12], a[17]); SQRADDAC(a[13], a[16]); SQRADDAC(a[14], a[15]); SQRADDDB; + COMBA_STORE(b[29]); + + /* output 30 */ + CARRY_FORWARD; + SQRADDSC(a[3], a[27]); SQRADDAC(a[4], a[26]); SQRADDAC(a[5], a[25]); SQRADDAC(a[6], a[24]); SQRADDAC(a[7], a[23]); SQRADDAC(a[8], a[22]); SQRADDAC(a[9], a[21]); SQRADDAC(a[10], a[20]); SQRADDAC(a[11], a[19]); SQRADDAC(a[12], a[18]); SQRADDAC(a[13], a[17]); SQRADDAC(a[14], a[16]); SQRADDDB; SQRADD(a[15], a[15]); + COMBA_STORE(b[30]); + + /* output 31 */ + CARRY_FORWARD; + SQRADDSC(a[4], a[27]); SQRADDAC(a[5], a[26]); SQRADDAC(a[6], a[25]); SQRADDAC(a[7], a[24]); SQRADDAC(a[8], a[23]); SQRADDAC(a[9], a[22]); SQRADDAC(a[10], a[21]); SQRADDAC(a[11], a[20]); SQRADDAC(a[12], a[19]); SQRADDAC(a[13], a[18]); SQRADDAC(a[14], a[17]); SQRADDAC(a[15], a[16]); SQRADDDB; + COMBA_STORE(b[31]); + + /* output 32 */ + CARRY_FORWARD; + SQRADDSC(a[5], a[27]); SQRADDAC(a[6], a[26]); SQRADDAC(a[7], a[25]); SQRADDAC(a[8], a[24]); SQRADDAC(a[9], a[23]); SQRADDAC(a[10], a[22]); SQRADDAC(a[11], a[21]); SQRADDAC(a[12], a[20]); SQRADDAC(a[13], a[19]); SQRADDAC(a[14], a[18]); SQRADDAC(a[15], a[17]); SQRADDDB; SQRADD(a[16], a[16]); + COMBA_STORE(b[32]); + + /* output 33 */ + CARRY_FORWARD; + SQRADDSC(a[6], a[27]); SQRADDAC(a[7], a[26]); SQRADDAC(a[8], a[25]); SQRADDAC(a[9], a[24]); SQRADDAC(a[10], a[23]); SQRADDAC(a[11], a[22]); SQRADDAC(a[12], a[21]); SQRADDAC(a[13], a[20]); SQRADDAC(a[14], a[19]); SQRADDAC(a[15], a[18]); SQRADDAC(a[16], a[17]); SQRADDDB; + COMBA_STORE(b[33]); + + /* output 34 */ + CARRY_FORWARD; + SQRADDSC(a[7], a[27]); SQRADDAC(a[8], a[26]); SQRADDAC(a[9], a[25]); SQRADDAC(a[10], a[24]); SQRADDAC(a[11], a[23]); SQRADDAC(a[12], a[22]); SQRADDAC(a[13], a[21]); SQRADDAC(a[14], a[20]); SQRADDAC(a[15], a[19]); SQRADDAC(a[16], a[18]); SQRADDDB; SQRADD(a[17], a[17]); + COMBA_STORE(b[34]); + + /* output 35 */ + CARRY_FORWARD; + SQRADDSC(a[8], a[27]); SQRADDAC(a[9], a[26]); SQRADDAC(a[10], a[25]); SQRADDAC(a[11], a[24]); SQRADDAC(a[12], a[23]); SQRADDAC(a[13], a[22]); SQRADDAC(a[14], a[21]); SQRADDAC(a[15], a[20]); SQRADDAC(a[16], a[19]); SQRADDAC(a[17], a[18]); SQRADDDB; + COMBA_STORE(b[35]); + + /* output 36 */ + CARRY_FORWARD; + SQRADDSC(a[9], a[27]); SQRADDAC(a[10], a[26]); SQRADDAC(a[11], a[25]); SQRADDAC(a[12], a[24]); SQRADDAC(a[13], a[23]); SQRADDAC(a[14], a[22]); SQRADDAC(a[15], a[21]); SQRADDAC(a[16], a[20]); SQRADDAC(a[17], a[19]); SQRADDDB; SQRADD(a[18], a[18]); + COMBA_STORE(b[36]); + + /* output 37 */ + CARRY_FORWARD; + SQRADDSC(a[10], a[27]); SQRADDAC(a[11], a[26]); SQRADDAC(a[12], a[25]); SQRADDAC(a[13], a[24]); SQRADDAC(a[14], a[23]); SQRADDAC(a[15], a[22]); SQRADDAC(a[16], a[21]); SQRADDAC(a[17], a[20]); SQRADDAC(a[18], a[19]); SQRADDDB; + COMBA_STORE(b[37]); + + /* output 38 */ + CARRY_FORWARD; + SQRADDSC(a[11], a[27]); SQRADDAC(a[12], a[26]); SQRADDAC(a[13], a[25]); SQRADDAC(a[14], a[24]); SQRADDAC(a[15], a[23]); SQRADDAC(a[16], a[22]); SQRADDAC(a[17], a[21]); SQRADDAC(a[18], a[20]); SQRADDDB; SQRADD(a[19], a[19]); + COMBA_STORE(b[38]); + + /* output 39 */ + CARRY_FORWARD; + SQRADDSC(a[12], a[27]); SQRADDAC(a[13], a[26]); SQRADDAC(a[14], a[25]); SQRADDAC(a[15], a[24]); SQRADDAC(a[16], a[23]); SQRADDAC(a[17], a[22]); SQRADDAC(a[18], a[21]); SQRADDAC(a[19], a[20]); SQRADDDB; + COMBA_STORE(b[39]); + + /* output 40 */ + CARRY_FORWARD; + SQRADDSC(a[13], a[27]); SQRADDAC(a[14], a[26]); SQRADDAC(a[15], a[25]); SQRADDAC(a[16], a[24]); SQRADDAC(a[17], a[23]); SQRADDAC(a[18], a[22]); SQRADDAC(a[19], a[21]); SQRADDDB; SQRADD(a[20], a[20]); + COMBA_STORE(b[40]); + + /* output 41 */ + CARRY_FORWARD; + SQRADDSC(a[14], a[27]); SQRADDAC(a[15], a[26]); SQRADDAC(a[16], a[25]); SQRADDAC(a[17], a[24]); SQRADDAC(a[18], a[23]); SQRADDAC(a[19], a[22]); SQRADDAC(a[20], a[21]); SQRADDDB; + COMBA_STORE(b[41]); + + /* output 42 */ + CARRY_FORWARD; + SQRADDSC(a[15], a[27]); SQRADDAC(a[16], a[26]); SQRADDAC(a[17], a[25]); SQRADDAC(a[18], a[24]); SQRADDAC(a[19], a[23]); SQRADDAC(a[20], a[22]); SQRADDDB; SQRADD(a[21], a[21]); + COMBA_STORE(b[42]); + + /* output 43 */ + CARRY_FORWARD; + SQRADDSC(a[16], a[27]); SQRADDAC(a[17], a[26]); SQRADDAC(a[18], a[25]); SQRADDAC(a[19], a[24]); SQRADDAC(a[20], a[23]); SQRADDAC(a[21], a[22]); SQRADDDB; + COMBA_STORE(b[43]); + + /* output 44 */ + CARRY_FORWARD; + SQRADDSC(a[17], a[27]); SQRADDAC(a[18], a[26]); SQRADDAC(a[19], a[25]); SQRADDAC(a[20], a[24]); SQRADDAC(a[21], a[23]); SQRADDDB; SQRADD(a[22], a[22]); + COMBA_STORE(b[44]); + + /* output 45 */ + CARRY_FORWARD; + SQRADDSC(a[18], a[27]); SQRADDAC(a[19], a[26]); SQRADDAC(a[20], a[25]); SQRADDAC(a[21], a[24]); SQRADDAC(a[22], a[23]); SQRADDDB; + COMBA_STORE(b[45]); + + /* output 46 */ + CARRY_FORWARD; + SQRADDSC(a[19], a[27]); SQRADDAC(a[20], a[26]); SQRADDAC(a[21], a[25]); SQRADDAC(a[22], a[24]); SQRADDDB; SQRADD(a[23], a[23]); + COMBA_STORE(b[46]); + + /* output 47 */ + CARRY_FORWARD; + SQRADDSC(a[20], a[27]); SQRADDAC(a[21], a[26]); SQRADDAC(a[22], a[25]); SQRADDAC(a[23], a[24]); SQRADDDB; + COMBA_STORE(b[47]); + + /* output 48 */ + CARRY_FORWARD; + SQRADDSC(a[21], a[27]); SQRADDAC(a[22], a[26]); SQRADDAC(a[23], a[25]); SQRADDDB; SQRADD(a[24], a[24]); + COMBA_STORE(b[48]); + + /* output 49 */ + CARRY_FORWARD; + SQRADDSC(a[22], a[27]); SQRADDAC(a[23], a[26]); SQRADDAC(a[24], a[25]); SQRADDDB; + COMBA_STORE(b[49]); + + /* output 50 */ + CARRY_FORWARD; + SQRADD2(a[23], a[27]); SQRADD2(a[24], a[26]); SQRADD(a[25], a[25]); + COMBA_STORE(b[50]); + + /* output 51 */ + CARRY_FORWARD; + SQRADD2(a[24], a[27]); SQRADD2(a[25], a[26]); + COMBA_STORE(b[51]); + + /* output 52 */ + CARRY_FORWARD; + SQRADD2(a[25], a[27]); SQRADD(a[26], a[26]); + COMBA_STORE(b[52]); + + /* output 53 */ + CARRY_FORWARD; + SQRADD2(a[26], a[27]); + COMBA_STORE(b[53]); + + /* output 54 */ + CARRY_FORWARD; + SQRADD(a[27], a[27]); + COMBA_STORE(b[54]); + COMBA_STORE2(b[55]); + COMBA_FINI; + + B->used = 56; + B->sign = FP_ZPOS; + XMEMCPY(B->dp, b, 56 * sizeof(fp_digit)); + fp_clamp(B); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return FP_OKAY; +} +#endif + + diff --git a/client/wolfssl/wolfcrypt/src/fp_sqr_comba_3.i b/client/wolfssl/wolfcrypt/src/fp_sqr_comba_3.i new file mode 100644 index 0000000..51c3d74 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fp_sqr_comba_3.i @@ -0,0 +1,73 @@ +/* fp_sqr_comba_3.i + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef TFM_SQR3 +int fp_sqr_comba3(fp_int *A, fp_int *B) +{ + fp_digit *a, b[6], c0, c1, c2; +#ifdef TFM_ISO + fp_word tt; +#endif + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + COMBA_STORE2(b[5]); + COMBA_FINI; + + B->used = 6; + B->sign = FP_ZPOS; + XMEMCPY(B->dp, b, 6 * sizeof(fp_digit)); + fp_clamp(B); + + return FP_OKAY; +} +#endif + + diff --git a/client/wolfssl/wolfcrypt/src/fp_sqr_comba_32.i b/client/wolfssl/wolfcrypt/src/fp_sqr_comba_32.i new file mode 100644 index 0000000..4fcf349 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fp_sqr_comba_32.i @@ -0,0 +1,377 @@ +/* fp_sqr_comba_32.i + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef TFM_SQR32 +int fp_sqr_comba32(fp_int *A, fp_int *B) +{ + fp_digit *a, c0, c1, c2, sc0 = 0, sc1 = 0, sc2 = 0; +#ifdef TFM_ISO + fp_word tt; +#endif +#ifndef WOLFSSL_SMALL_STACK + fp_digit b[64]; +#else + fp_digit *b; +#endif + +#ifdef WOLFSSL_SMALL_STACK + b = (fp_digit*)XMALLOC(sizeof(fp_digit) * 64, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (b == NULL) + return FP_MEM; +#endif + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + + /* output 15 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; + COMBA_STORE(b[15]); + + /* output 16 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[16]); SQRADDAC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); + COMBA_STORE(b[16]); + + /* output 17 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[17]); SQRADDAC(a[1], a[16]); SQRADDAC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; + COMBA_STORE(b[17]); + + /* output 18 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[18]); SQRADDAC(a[1], a[17]); SQRADDAC(a[2], a[16]); SQRADDAC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]); + COMBA_STORE(b[18]); + + /* output 19 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[19]); SQRADDAC(a[1], a[18]); SQRADDAC(a[2], a[17]); SQRADDAC(a[3], a[16]); SQRADDAC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB; + COMBA_STORE(b[19]); + + /* output 20 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[20]); SQRADDAC(a[1], a[19]); SQRADDAC(a[2], a[18]); SQRADDAC(a[3], a[17]); SQRADDAC(a[4], a[16]); SQRADDAC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]); + COMBA_STORE(b[20]); + + /* output 21 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[21]); SQRADDAC(a[1], a[20]); SQRADDAC(a[2], a[19]); SQRADDAC(a[3], a[18]); SQRADDAC(a[4], a[17]); SQRADDAC(a[5], a[16]); SQRADDAC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB; + COMBA_STORE(b[21]); + + /* output 22 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[22]); SQRADDAC(a[1], a[21]); SQRADDAC(a[2], a[20]); SQRADDAC(a[3], a[19]); SQRADDAC(a[4], a[18]); SQRADDAC(a[5], a[17]); SQRADDAC(a[6], a[16]); SQRADDAC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]); + COMBA_STORE(b[22]); + + /* output 23 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[23]); SQRADDAC(a[1], a[22]); SQRADDAC(a[2], a[21]); SQRADDAC(a[3], a[20]); SQRADDAC(a[4], a[19]); SQRADDAC(a[5], a[18]); SQRADDAC(a[6], a[17]); SQRADDAC(a[7], a[16]); SQRADDAC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB; + COMBA_STORE(b[23]); + + /* output 24 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[24]); SQRADDAC(a[1], a[23]); SQRADDAC(a[2], a[22]); SQRADDAC(a[3], a[21]); SQRADDAC(a[4], a[20]); SQRADDAC(a[5], a[19]); SQRADDAC(a[6], a[18]); SQRADDAC(a[7], a[17]); SQRADDAC(a[8], a[16]); SQRADDAC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]); + COMBA_STORE(b[24]); + + /* output 25 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[25]); SQRADDAC(a[1], a[24]); SQRADDAC(a[2], a[23]); SQRADDAC(a[3], a[22]); SQRADDAC(a[4], a[21]); SQRADDAC(a[5], a[20]); SQRADDAC(a[6], a[19]); SQRADDAC(a[7], a[18]); SQRADDAC(a[8], a[17]); SQRADDAC(a[9], a[16]); SQRADDAC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB; + COMBA_STORE(b[25]); + + /* output 26 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[26]); SQRADDAC(a[1], a[25]); SQRADDAC(a[2], a[24]); SQRADDAC(a[3], a[23]); SQRADDAC(a[4], a[22]); SQRADDAC(a[5], a[21]); SQRADDAC(a[6], a[20]); SQRADDAC(a[7], a[19]); SQRADDAC(a[8], a[18]); SQRADDAC(a[9], a[17]); SQRADDAC(a[10], a[16]); SQRADDAC(a[11], a[15]); SQRADDAC(a[12], a[14]); SQRADDDB; SQRADD(a[13], a[13]); + COMBA_STORE(b[26]); + + /* output 27 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[27]); SQRADDAC(a[1], a[26]); SQRADDAC(a[2], a[25]); SQRADDAC(a[3], a[24]); SQRADDAC(a[4], a[23]); SQRADDAC(a[5], a[22]); SQRADDAC(a[6], a[21]); SQRADDAC(a[7], a[20]); SQRADDAC(a[8], a[19]); SQRADDAC(a[9], a[18]); SQRADDAC(a[10], a[17]); SQRADDAC(a[11], a[16]); SQRADDAC(a[12], a[15]); SQRADDAC(a[13], a[14]); SQRADDDB; + COMBA_STORE(b[27]); + + /* output 28 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[28]); SQRADDAC(a[1], a[27]); SQRADDAC(a[2], a[26]); SQRADDAC(a[3], a[25]); SQRADDAC(a[4], a[24]); SQRADDAC(a[5], a[23]); SQRADDAC(a[6], a[22]); SQRADDAC(a[7], a[21]); SQRADDAC(a[8], a[20]); SQRADDAC(a[9], a[19]); SQRADDAC(a[10], a[18]); SQRADDAC(a[11], a[17]); SQRADDAC(a[12], a[16]); SQRADDAC(a[13], a[15]); SQRADDDB; SQRADD(a[14], a[14]); + COMBA_STORE(b[28]); + + /* output 29 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[29]); SQRADDAC(a[1], a[28]); SQRADDAC(a[2], a[27]); SQRADDAC(a[3], a[26]); SQRADDAC(a[4], a[25]); SQRADDAC(a[5], a[24]); SQRADDAC(a[6], a[23]); SQRADDAC(a[7], a[22]); SQRADDAC(a[8], a[21]); SQRADDAC(a[9], a[20]); SQRADDAC(a[10], a[19]); SQRADDAC(a[11], a[18]); SQRADDAC(a[12], a[17]); SQRADDAC(a[13], a[16]); SQRADDAC(a[14], a[15]); SQRADDDB; + COMBA_STORE(b[29]); + + /* output 30 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[30]); SQRADDAC(a[1], a[29]); SQRADDAC(a[2], a[28]); SQRADDAC(a[3], a[27]); SQRADDAC(a[4], a[26]); SQRADDAC(a[5], a[25]); SQRADDAC(a[6], a[24]); SQRADDAC(a[7], a[23]); SQRADDAC(a[8], a[22]); SQRADDAC(a[9], a[21]); SQRADDAC(a[10], a[20]); SQRADDAC(a[11], a[19]); SQRADDAC(a[12], a[18]); SQRADDAC(a[13], a[17]); SQRADDAC(a[14], a[16]); SQRADDDB; SQRADD(a[15], a[15]); + COMBA_STORE(b[30]); + + /* output 31 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[31]); SQRADDAC(a[1], a[30]); SQRADDAC(a[2], a[29]); SQRADDAC(a[3], a[28]); SQRADDAC(a[4], a[27]); SQRADDAC(a[5], a[26]); SQRADDAC(a[6], a[25]); SQRADDAC(a[7], a[24]); SQRADDAC(a[8], a[23]); SQRADDAC(a[9], a[22]); SQRADDAC(a[10], a[21]); SQRADDAC(a[11], a[20]); SQRADDAC(a[12], a[19]); SQRADDAC(a[13], a[18]); SQRADDAC(a[14], a[17]); SQRADDAC(a[15], a[16]); SQRADDDB; + COMBA_STORE(b[31]); + + /* output 32 */ + CARRY_FORWARD; + SQRADDSC(a[1], a[31]); SQRADDAC(a[2], a[30]); SQRADDAC(a[3], a[29]); SQRADDAC(a[4], a[28]); SQRADDAC(a[5], a[27]); SQRADDAC(a[6], a[26]); SQRADDAC(a[7], a[25]); SQRADDAC(a[8], a[24]); SQRADDAC(a[9], a[23]); SQRADDAC(a[10], a[22]); SQRADDAC(a[11], a[21]); SQRADDAC(a[12], a[20]); SQRADDAC(a[13], a[19]); SQRADDAC(a[14], a[18]); SQRADDAC(a[15], a[17]); SQRADDDB; SQRADD(a[16], a[16]); + COMBA_STORE(b[32]); + + /* output 33 */ + CARRY_FORWARD; + SQRADDSC(a[2], a[31]); SQRADDAC(a[3], a[30]); SQRADDAC(a[4], a[29]); SQRADDAC(a[5], a[28]); SQRADDAC(a[6], a[27]); SQRADDAC(a[7], a[26]); SQRADDAC(a[8], a[25]); SQRADDAC(a[9], a[24]); SQRADDAC(a[10], a[23]); SQRADDAC(a[11], a[22]); SQRADDAC(a[12], a[21]); SQRADDAC(a[13], a[20]); SQRADDAC(a[14], a[19]); SQRADDAC(a[15], a[18]); SQRADDAC(a[16], a[17]); SQRADDDB; + COMBA_STORE(b[33]); + + /* output 34 */ + CARRY_FORWARD; + SQRADDSC(a[3], a[31]); SQRADDAC(a[4], a[30]); SQRADDAC(a[5], a[29]); SQRADDAC(a[6], a[28]); SQRADDAC(a[7], a[27]); SQRADDAC(a[8], a[26]); SQRADDAC(a[9], a[25]); SQRADDAC(a[10], a[24]); SQRADDAC(a[11], a[23]); SQRADDAC(a[12], a[22]); SQRADDAC(a[13], a[21]); SQRADDAC(a[14], a[20]); SQRADDAC(a[15], a[19]); SQRADDAC(a[16], a[18]); SQRADDDB; SQRADD(a[17], a[17]); + COMBA_STORE(b[34]); + + /* output 35 */ + CARRY_FORWARD; + SQRADDSC(a[4], a[31]); SQRADDAC(a[5], a[30]); SQRADDAC(a[6], a[29]); SQRADDAC(a[7], a[28]); SQRADDAC(a[8], a[27]); SQRADDAC(a[9], a[26]); SQRADDAC(a[10], a[25]); SQRADDAC(a[11], a[24]); SQRADDAC(a[12], a[23]); SQRADDAC(a[13], a[22]); SQRADDAC(a[14], a[21]); SQRADDAC(a[15], a[20]); SQRADDAC(a[16], a[19]); SQRADDAC(a[17], a[18]); SQRADDDB; + COMBA_STORE(b[35]); + + /* output 36 */ + CARRY_FORWARD; + SQRADDSC(a[5], a[31]); SQRADDAC(a[6], a[30]); SQRADDAC(a[7], a[29]); SQRADDAC(a[8], a[28]); SQRADDAC(a[9], a[27]); SQRADDAC(a[10], a[26]); SQRADDAC(a[11], a[25]); SQRADDAC(a[12], a[24]); SQRADDAC(a[13], a[23]); SQRADDAC(a[14], a[22]); SQRADDAC(a[15], a[21]); SQRADDAC(a[16], a[20]); SQRADDAC(a[17], a[19]); SQRADDDB; SQRADD(a[18], a[18]); + COMBA_STORE(b[36]); + + /* output 37 */ + CARRY_FORWARD; + SQRADDSC(a[6], a[31]); SQRADDAC(a[7], a[30]); SQRADDAC(a[8], a[29]); SQRADDAC(a[9], a[28]); SQRADDAC(a[10], a[27]); SQRADDAC(a[11], a[26]); SQRADDAC(a[12], a[25]); SQRADDAC(a[13], a[24]); SQRADDAC(a[14], a[23]); SQRADDAC(a[15], a[22]); SQRADDAC(a[16], a[21]); SQRADDAC(a[17], a[20]); SQRADDAC(a[18], a[19]); SQRADDDB; + COMBA_STORE(b[37]); + + /* output 38 */ + CARRY_FORWARD; + SQRADDSC(a[7], a[31]); SQRADDAC(a[8], a[30]); SQRADDAC(a[9], a[29]); SQRADDAC(a[10], a[28]); SQRADDAC(a[11], a[27]); SQRADDAC(a[12], a[26]); SQRADDAC(a[13], a[25]); SQRADDAC(a[14], a[24]); SQRADDAC(a[15], a[23]); SQRADDAC(a[16], a[22]); SQRADDAC(a[17], a[21]); SQRADDAC(a[18], a[20]); SQRADDDB; SQRADD(a[19], a[19]); + COMBA_STORE(b[38]); + + /* output 39 */ + CARRY_FORWARD; + SQRADDSC(a[8], a[31]); SQRADDAC(a[9], a[30]); SQRADDAC(a[10], a[29]); SQRADDAC(a[11], a[28]); SQRADDAC(a[12], a[27]); SQRADDAC(a[13], a[26]); SQRADDAC(a[14], a[25]); SQRADDAC(a[15], a[24]); SQRADDAC(a[16], a[23]); SQRADDAC(a[17], a[22]); SQRADDAC(a[18], a[21]); SQRADDAC(a[19], a[20]); SQRADDDB; + COMBA_STORE(b[39]); + + /* output 40 */ + CARRY_FORWARD; + SQRADDSC(a[9], a[31]); SQRADDAC(a[10], a[30]); SQRADDAC(a[11], a[29]); SQRADDAC(a[12], a[28]); SQRADDAC(a[13], a[27]); SQRADDAC(a[14], a[26]); SQRADDAC(a[15], a[25]); SQRADDAC(a[16], a[24]); SQRADDAC(a[17], a[23]); SQRADDAC(a[18], a[22]); SQRADDAC(a[19], a[21]); SQRADDDB; SQRADD(a[20], a[20]); + COMBA_STORE(b[40]); + + /* output 41 */ + CARRY_FORWARD; + SQRADDSC(a[10], a[31]); SQRADDAC(a[11], a[30]); SQRADDAC(a[12], a[29]); SQRADDAC(a[13], a[28]); SQRADDAC(a[14], a[27]); SQRADDAC(a[15], a[26]); SQRADDAC(a[16], a[25]); SQRADDAC(a[17], a[24]); SQRADDAC(a[18], a[23]); SQRADDAC(a[19], a[22]); SQRADDAC(a[20], a[21]); SQRADDDB; + COMBA_STORE(b[41]); + + /* output 42 */ + CARRY_FORWARD; + SQRADDSC(a[11], a[31]); SQRADDAC(a[12], a[30]); SQRADDAC(a[13], a[29]); SQRADDAC(a[14], a[28]); SQRADDAC(a[15], a[27]); SQRADDAC(a[16], a[26]); SQRADDAC(a[17], a[25]); SQRADDAC(a[18], a[24]); SQRADDAC(a[19], a[23]); SQRADDAC(a[20], a[22]); SQRADDDB; SQRADD(a[21], a[21]); + COMBA_STORE(b[42]); + + /* output 43 */ + CARRY_FORWARD; + SQRADDSC(a[12], a[31]); SQRADDAC(a[13], a[30]); SQRADDAC(a[14], a[29]); SQRADDAC(a[15], a[28]); SQRADDAC(a[16], a[27]); SQRADDAC(a[17], a[26]); SQRADDAC(a[18], a[25]); SQRADDAC(a[19], a[24]); SQRADDAC(a[20], a[23]); SQRADDAC(a[21], a[22]); SQRADDDB; + COMBA_STORE(b[43]); + + /* output 44 */ + CARRY_FORWARD; + SQRADDSC(a[13], a[31]); SQRADDAC(a[14], a[30]); SQRADDAC(a[15], a[29]); SQRADDAC(a[16], a[28]); SQRADDAC(a[17], a[27]); SQRADDAC(a[18], a[26]); SQRADDAC(a[19], a[25]); SQRADDAC(a[20], a[24]); SQRADDAC(a[21], a[23]); SQRADDDB; SQRADD(a[22], a[22]); + COMBA_STORE(b[44]); + + /* output 45 */ + CARRY_FORWARD; + SQRADDSC(a[14], a[31]); SQRADDAC(a[15], a[30]); SQRADDAC(a[16], a[29]); SQRADDAC(a[17], a[28]); SQRADDAC(a[18], a[27]); SQRADDAC(a[19], a[26]); SQRADDAC(a[20], a[25]); SQRADDAC(a[21], a[24]); SQRADDAC(a[22], a[23]); SQRADDDB; + COMBA_STORE(b[45]); + + /* output 46 */ + CARRY_FORWARD; + SQRADDSC(a[15], a[31]); SQRADDAC(a[16], a[30]); SQRADDAC(a[17], a[29]); SQRADDAC(a[18], a[28]); SQRADDAC(a[19], a[27]); SQRADDAC(a[20], a[26]); SQRADDAC(a[21], a[25]); SQRADDAC(a[22], a[24]); SQRADDDB; SQRADD(a[23], a[23]); + COMBA_STORE(b[46]); + + /* output 47 */ + CARRY_FORWARD; + SQRADDSC(a[16], a[31]); SQRADDAC(a[17], a[30]); SQRADDAC(a[18], a[29]); SQRADDAC(a[19], a[28]); SQRADDAC(a[20], a[27]); SQRADDAC(a[21], a[26]); SQRADDAC(a[22], a[25]); SQRADDAC(a[23], a[24]); SQRADDDB; + COMBA_STORE(b[47]); + + /* output 48 */ + CARRY_FORWARD; + SQRADDSC(a[17], a[31]); SQRADDAC(a[18], a[30]); SQRADDAC(a[19], a[29]); SQRADDAC(a[20], a[28]); SQRADDAC(a[21], a[27]); SQRADDAC(a[22], a[26]); SQRADDAC(a[23], a[25]); SQRADDDB; SQRADD(a[24], a[24]); + COMBA_STORE(b[48]); + + /* output 49 */ + CARRY_FORWARD; + SQRADDSC(a[18], a[31]); SQRADDAC(a[19], a[30]); SQRADDAC(a[20], a[29]); SQRADDAC(a[21], a[28]); SQRADDAC(a[22], a[27]); SQRADDAC(a[23], a[26]); SQRADDAC(a[24], a[25]); SQRADDDB; + COMBA_STORE(b[49]); + + /* output 50 */ + CARRY_FORWARD; + SQRADDSC(a[19], a[31]); SQRADDAC(a[20], a[30]); SQRADDAC(a[21], a[29]); SQRADDAC(a[22], a[28]); SQRADDAC(a[23], a[27]); SQRADDAC(a[24], a[26]); SQRADDDB; SQRADD(a[25], a[25]); + COMBA_STORE(b[50]); + + /* output 51 */ + CARRY_FORWARD; + SQRADDSC(a[20], a[31]); SQRADDAC(a[21], a[30]); SQRADDAC(a[22], a[29]); SQRADDAC(a[23], a[28]); SQRADDAC(a[24], a[27]); SQRADDAC(a[25], a[26]); SQRADDDB; + COMBA_STORE(b[51]); + + /* output 52 */ + CARRY_FORWARD; + SQRADDSC(a[21], a[31]); SQRADDAC(a[22], a[30]); SQRADDAC(a[23], a[29]); SQRADDAC(a[24], a[28]); SQRADDAC(a[25], a[27]); SQRADDDB; SQRADD(a[26], a[26]); + COMBA_STORE(b[52]); + + /* output 53 */ + CARRY_FORWARD; + SQRADDSC(a[22], a[31]); SQRADDAC(a[23], a[30]); SQRADDAC(a[24], a[29]); SQRADDAC(a[25], a[28]); SQRADDAC(a[26], a[27]); SQRADDDB; + COMBA_STORE(b[53]); + + /* output 54 */ + CARRY_FORWARD; + SQRADDSC(a[23], a[31]); SQRADDAC(a[24], a[30]); SQRADDAC(a[25], a[29]); SQRADDAC(a[26], a[28]); SQRADDDB; SQRADD(a[27], a[27]); + COMBA_STORE(b[54]); + + /* output 55 */ + CARRY_FORWARD; + SQRADDSC(a[24], a[31]); SQRADDAC(a[25], a[30]); SQRADDAC(a[26], a[29]); SQRADDAC(a[27], a[28]); SQRADDDB; + COMBA_STORE(b[55]); + + /* output 56 */ + CARRY_FORWARD; + SQRADDSC(a[25], a[31]); SQRADDAC(a[26], a[30]); SQRADDAC(a[27], a[29]); SQRADDDB; SQRADD(a[28], a[28]); + COMBA_STORE(b[56]); + + /* output 57 */ + CARRY_FORWARD; + SQRADDSC(a[26], a[31]); SQRADDAC(a[27], a[30]); SQRADDAC(a[28], a[29]); SQRADDDB; + COMBA_STORE(b[57]); + + /* output 58 */ + CARRY_FORWARD; + SQRADD2(a[27], a[31]); SQRADD2(a[28], a[30]); SQRADD(a[29], a[29]); + COMBA_STORE(b[58]); + + /* output 59 */ + CARRY_FORWARD; + SQRADD2(a[28], a[31]); SQRADD2(a[29], a[30]); + COMBA_STORE(b[59]); + + /* output 60 */ + CARRY_FORWARD; + SQRADD2(a[29], a[31]); SQRADD(a[30], a[30]); + COMBA_STORE(b[60]); + + /* output 61 */ + CARRY_FORWARD; + SQRADD2(a[30], a[31]); + COMBA_STORE(b[61]); + + /* output 62 */ + CARRY_FORWARD; + SQRADD(a[31], a[31]); + COMBA_STORE(b[62]); + COMBA_STORE2(b[63]); + COMBA_FINI; + + B->used = 64; + B->sign = FP_ZPOS; + XMEMCPY(B->dp, b, 64 * sizeof(fp_digit)); + fp_clamp(B); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return FP_OKAY; +} +#endif + + diff --git a/client/wolfssl/wolfcrypt/src/fp_sqr_comba_4.i b/client/wolfssl/wolfcrypt/src/fp_sqr_comba_4.i new file mode 100644 index 0000000..b7f2572 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fp_sqr_comba_4.i @@ -0,0 +1,97 @@ +/* fp_sqr_comba_4.i + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef TFM_SQR4 +int fp_sqr_comba4(fp_int *A, fp_int *B) +{ + fp_digit *a, c0, c1, c2; +#ifdef TFM_ISO + fp_word tt; +#endif +#ifndef WOLFSSL_SMALL_STACK + fp_digit b[8]; +#else + fp_digit *b; +#endif + +#ifdef WOLFSSL_SMALL_STACK + b = (fp_digit*)XMALLOC(sizeof(fp_digit) * 8, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (b == NULL) + return FP_MEM; +#endif + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADD2(a[2], a[3]); + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + COMBA_STORE2(b[7]); + COMBA_FINI; + + B->used = 8; + B->sign = FP_ZPOS; + XMEMCPY(B->dp, b, 8 * sizeof(fp_digit)); + fp_clamp(B); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return FP_OKAY; +} +#endif + + diff --git a/client/wolfssl/wolfcrypt/src/fp_sqr_comba_48.i b/client/wolfssl/wolfcrypt/src/fp_sqr_comba_48.i new file mode 100644 index 0000000..0f24532 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fp_sqr_comba_48.i @@ -0,0 +1,537 @@ +/* fp_sqr_comba_48.i + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef TFM_SQR48 +int fp_sqr_comba48(fp_int *A, fp_int *B) +{ + fp_digit *a, c0, c1, c2, sc0 = 0, sc1 = 0, sc2 = 0; +#ifdef TFM_ISO + fp_word tt; +#endif +#ifndef WOLFSSL_SMALL_STACK + fp_digit b[96]; +#else + fp_digit *b; +#endif + +#ifdef WOLFSSL_SMALL_STACK + b = (fp_digit*)XMALLOC(sizeof(fp_digit) * 96, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (b == NULL) + return FP_MEM; +#endif + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + + /* output 15 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; + COMBA_STORE(b[15]); + + /* output 16 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[16]); SQRADDAC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); + COMBA_STORE(b[16]); + + /* output 17 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[17]); SQRADDAC(a[1], a[16]); SQRADDAC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; + COMBA_STORE(b[17]); + + /* output 18 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[18]); SQRADDAC(a[1], a[17]); SQRADDAC(a[2], a[16]); SQRADDAC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]); + COMBA_STORE(b[18]); + + /* output 19 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[19]); SQRADDAC(a[1], a[18]); SQRADDAC(a[2], a[17]); SQRADDAC(a[3], a[16]); SQRADDAC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB; + COMBA_STORE(b[19]); + + /* output 20 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[20]); SQRADDAC(a[1], a[19]); SQRADDAC(a[2], a[18]); SQRADDAC(a[3], a[17]); SQRADDAC(a[4], a[16]); SQRADDAC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]); + COMBA_STORE(b[20]); + + /* output 21 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[21]); SQRADDAC(a[1], a[20]); SQRADDAC(a[2], a[19]); SQRADDAC(a[3], a[18]); SQRADDAC(a[4], a[17]); SQRADDAC(a[5], a[16]); SQRADDAC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB; + COMBA_STORE(b[21]); + + /* output 22 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[22]); SQRADDAC(a[1], a[21]); SQRADDAC(a[2], a[20]); SQRADDAC(a[3], a[19]); SQRADDAC(a[4], a[18]); SQRADDAC(a[5], a[17]); SQRADDAC(a[6], a[16]); SQRADDAC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]); + COMBA_STORE(b[22]); + + /* output 23 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[23]); SQRADDAC(a[1], a[22]); SQRADDAC(a[2], a[21]); SQRADDAC(a[3], a[20]); SQRADDAC(a[4], a[19]); SQRADDAC(a[5], a[18]); SQRADDAC(a[6], a[17]); SQRADDAC(a[7], a[16]); SQRADDAC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB; + COMBA_STORE(b[23]); + + /* output 24 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[24]); SQRADDAC(a[1], a[23]); SQRADDAC(a[2], a[22]); SQRADDAC(a[3], a[21]); SQRADDAC(a[4], a[20]); SQRADDAC(a[5], a[19]); SQRADDAC(a[6], a[18]); SQRADDAC(a[7], a[17]); SQRADDAC(a[8], a[16]); SQRADDAC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]); + COMBA_STORE(b[24]); + + /* output 25 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[25]); SQRADDAC(a[1], a[24]); SQRADDAC(a[2], a[23]); SQRADDAC(a[3], a[22]); SQRADDAC(a[4], a[21]); SQRADDAC(a[5], a[20]); SQRADDAC(a[6], a[19]); SQRADDAC(a[7], a[18]); SQRADDAC(a[8], a[17]); SQRADDAC(a[9], a[16]); SQRADDAC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB; + COMBA_STORE(b[25]); + + /* output 26 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[26]); SQRADDAC(a[1], a[25]); SQRADDAC(a[2], a[24]); SQRADDAC(a[3], a[23]); SQRADDAC(a[4], a[22]); SQRADDAC(a[5], a[21]); SQRADDAC(a[6], a[20]); SQRADDAC(a[7], a[19]); SQRADDAC(a[8], a[18]); SQRADDAC(a[9], a[17]); SQRADDAC(a[10], a[16]); SQRADDAC(a[11], a[15]); SQRADDAC(a[12], a[14]); SQRADDDB; SQRADD(a[13], a[13]); + COMBA_STORE(b[26]); + + /* output 27 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[27]); SQRADDAC(a[1], a[26]); SQRADDAC(a[2], a[25]); SQRADDAC(a[3], a[24]); SQRADDAC(a[4], a[23]); SQRADDAC(a[5], a[22]); SQRADDAC(a[6], a[21]); SQRADDAC(a[7], a[20]); SQRADDAC(a[8], a[19]); SQRADDAC(a[9], a[18]); SQRADDAC(a[10], a[17]); SQRADDAC(a[11], a[16]); SQRADDAC(a[12], a[15]); SQRADDAC(a[13], a[14]); SQRADDDB; + COMBA_STORE(b[27]); + + /* output 28 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[28]); SQRADDAC(a[1], a[27]); SQRADDAC(a[2], a[26]); SQRADDAC(a[3], a[25]); SQRADDAC(a[4], a[24]); SQRADDAC(a[5], a[23]); SQRADDAC(a[6], a[22]); SQRADDAC(a[7], a[21]); SQRADDAC(a[8], a[20]); SQRADDAC(a[9], a[19]); SQRADDAC(a[10], a[18]); SQRADDAC(a[11], a[17]); SQRADDAC(a[12], a[16]); SQRADDAC(a[13], a[15]); SQRADDDB; SQRADD(a[14], a[14]); + COMBA_STORE(b[28]); + + /* output 29 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[29]); SQRADDAC(a[1], a[28]); SQRADDAC(a[2], a[27]); SQRADDAC(a[3], a[26]); SQRADDAC(a[4], a[25]); SQRADDAC(a[5], a[24]); SQRADDAC(a[6], a[23]); SQRADDAC(a[7], a[22]); SQRADDAC(a[8], a[21]); SQRADDAC(a[9], a[20]); SQRADDAC(a[10], a[19]); SQRADDAC(a[11], a[18]); SQRADDAC(a[12], a[17]); SQRADDAC(a[13], a[16]); SQRADDAC(a[14], a[15]); SQRADDDB; + COMBA_STORE(b[29]); + + /* output 30 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[30]); SQRADDAC(a[1], a[29]); SQRADDAC(a[2], a[28]); SQRADDAC(a[3], a[27]); SQRADDAC(a[4], a[26]); SQRADDAC(a[5], a[25]); SQRADDAC(a[6], a[24]); SQRADDAC(a[7], a[23]); SQRADDAC(a[8], a[22]); SQRADDAC(a[9], a[21]); SQRADDAC(a[10], a[20]); SQRADDAC(a[11], a[19]); SQRADDAC(a[12], a[18]); SQRADDAC(a[13], a[17]); SQRADDAC(a[14], a[16]); SQRADDDB; SQRADD(a[15], a[15]); + COMBA_STORE(b[30]); + + /* output 31 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[31]); SQRADDAC(a[1], a[30]); SQRADDAC(a[2], a[29]); SQRADDAC(a[3], a[28]); SQRADDAC(a[4], a[27]); SQRADDAC(a[5], a[26]); SQRADDAC(a[6], a[25]); SQRADDAC(a[7], a[24]); SQRADDAC(a[8], a[23]); SQRADDAC(a[9], a[22]); SQRADDAC(a[10], a[21]); SQRADDAC(a[11], a[20]); SQRADDAC(a[12], a[19]); SQRADDAC(a[13], a[18]); SQRADDAC(a[14], a[17]); SQRADDAC(a[15], a[16]); SQRADDDB; + COMBA_STORE(b[31]); + + /* output 32 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[32]); SQRADDAC(a[1], a[31]); SQRADDAC(a[2], a[30]); SQRADDAC(a[3], a[29]); SQRADDAC(a[4], a[28]); SQRADDAC(a[5], a[27]); SQRADDAC(a[6], a[26]); SQRADDAC(a[7], a[25]); SQRADDAC(a[8], a[24]); SQRADDAC(a[9], a[23]); SQRADDAC(a[10], a[22]); SQRADDAC(a[11], a[21]); SQRADDAC(a[12], a[20]); SQRADDAC(a[13], a[19]); SQRADDAC(a[14], a[18]); SQRADDAC(a[15], a[17]); SQRADDDB; SQRADD(a[16], a[16]); + COMBA_STORE(b[32]); + + /* output 33 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[33]); SQRADDAC(a[1], a[32]); SQRADDAC(a[2], a[31]); SQRADDAC(a[3], a[30]); SQRADDAC(a[4], a[29]); SQRADDAC(a[5], a[28]); SQRADDAC(a[6], a[27]); SQRADDAC(a[7], a[26]); SQRADDAC(a[8], a[25]); SQRADDAC(a[9], a[24]); SQRADDAC(a[10], a[23]); SQRADDAC(a[11], a[22]); SQRADDAC(a[12], a[21]); SQRADDAC(a[13], a[20]); SQRADDAC(a[14], a[19]); SQRADDAC(a[15], a[18]); SQRADDAC(a[16], a[17]); SQRADDDB; + COMBA_STORE(b[33]); + + /* output 34 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[34]); SQRADDAC(a[1], a[33]); SQRADDAC(a[2], a[32]); SQRADDAC(a[3], a[31]); SQRADDAC(a[4], a[30]); SQRADDAC(a[5], a[29]); SQRADDAC(a[6], a[28]); SQRADDAC(a[7], a[27]); SQRADDAC(a[8], a[26]); SQRADDAC(a[9], a[25]); SQRADDAC(a[10], a[24]); SQRADDAC(a[11], a[23]); SQRADDAC(a[12], a[22]); SQRADDAC(a[13], a[21]); SQRADDAC(a[14], a[20]); SQRADDAC(a[15], a[19]); SQRADDAC(a[16], a[18]); SQRADDDB; SQRADD(a[17], a[17]); + COMBA_STORE(b[34]); + + /* output 35 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[35]); SQRADDAC(a[1], a[34]); SQRADDAC(a[2], a[33]); SQRADDAC(a[3], a[32]); SQRADDAC(a[4], a[31]); SQRADDAC(a[5], a[30]); SQRADDAC(a[6], a[29]); SQRADDAC(a[7], a[28]); SQRADDAC(a[8], a[27]); SQRADDAC(a[9], a[26]); SQRADDAC(a[10], a[25]); SQRADDAC(a[11], a[24]); SQRADDAC(a[12], a[23]); SQRADDAC(a[13], a[22]); SQRADDAC(a[14], a[21]); SQRADDAC(a[15], a[20]); SQRADDAC(a[16], a[19]); SQRADDAC(a[17], a[18]); SQRADDDB; + COMBA_STORE(b[35]); + + /* output 36 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[36]); SQRADDAC(a[1], a[35]); SQRADDAC(a[2], a[34]); SQRADDAC(a[3], a[33]); SQRADDAC(a[4], a[32]); SQRADDAC(a[5], a[31]); SQRADDAC(a[6], a[30]); SQRADDAC(a[7], a[29]); SQRADDAC(a[8], a[28]); SQRADDAC(a[9], a[27]); SQRADDAC(a[10], a[26]); SQRADDAC(a[11], a[25]); SQRADDAC(a[12], a[24]); SQRADDAC(a[13], a[23]); SQRADDAC(a[14], a[22]); SQRADDAC(a[15], a[21]); SQRADDAC(a[16], a[20]); SQRADDAC(a[17], a[19]); SQRADDDB; SQRADD(a[18], a[18]); + COMBA_STORE(b[36]); + + /* output 37 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[37]); SQRADDAC(a[1], a[36]); SQRADDAC(a[2], a[35]); SQRADDAC(a[3], a[34]); SQRADDAC(a[4], a[33]); SQRADDAC(a[5], a[32]); SQRADDAC(a[6], a[31]); SQRADDAC(a[7], a[30]); SQRADDAC(a[8], a[29]); SQRADDAC(a[9], a[28]); SQRADDAC(a[10], a[27]); SQRADDAC(a[11], a[26]); SQRADDAC(a[12], a[25]); SQRADDAC(a[13], a[24]); SQRADDAC(a[14], a[23]); SQRADDAC(a[15], a[22]); SQRADDAC(a[16], a[21]); SQRADDAC(a[17], a[20]); SQRADDAC(a[18], a[19]); SQRADDDB; + COMBA_STORE(b[37]); + + /* output 38 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[38]); SQRADDAC(a[1], a[37]); SQRADDAC(a[2], a[36]); SQRADDAC(a[3], a[35]); SQRADDAC(a[4], a[34]); SQRADDAC(a[5], a[33]); SQRADDAC(a[6], a[32]); SQRADDAC(a[7], a[31]); SQRADDAC(a[8], a[30]); SQRADDAC(a[9], a[29]); SQRADDAC(a[10], a[28]); SQRADDAC(a[11], a[27]); SQRADDAC(a[12], a[26]); SQRADDAC(a[13], a[25]); SQRADDAC(a[14], a[24]); SQRADDAC(a[15], a[23]); SQRADDAC(a[16], a[22]); SQRADDAC(a[17], a[21]); SQRADDAC(a[18], a[20]); SQRADDDB; SQRADD(a[19], a[19]); + COMBA_STORE(b[38]); + + /* output 39 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[39]); SQRADDAC(a[1], a[38]); SQRADDAC(a[2], a[37]); SQRADDAC(a[3], a[36]); SQRADDAC(a[4], a[35]); SQRADDAC(a[5], a[34]); SQRADDAC(a[6], a[33]); SQRADDAC(a[7], a[32]); SQRADDAC(a[8], a[31]); SQRADDAC(a[9], a[30]); SQRADDAC(a[10], a[29]); SQRADDAC(a[11], a[28]); SQRADDAC(a[12], a[27]); SQRADDAC(a[13], a[26]); SQRADDAC(a[14], a[25]); SQRADDAC(a[15], a[24]); SQRADDAC(a[16], a[23]); SQRADDAC(a[17], a[22]); SQRADDAC(a[18], a[21]); SQRADDAC(a[19], a[20]); SQRADDDB; + COMBA_STORE(b[39]); + + /* output 40 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[40]); SQRADDAC(a[1], a[39]); SQRADDAC(a[2], a[38]); SQRADDAC(a[3], a[37]); SQRADDAC(a[4], a[36]); SQRADDAC(a[5], a[35]); SQRADDAC(a[6], a[34]); SQRADDAC(a[7], a[33]); SQRADDAC(a[8], a[32]); SQRADDAC(a[9], a[31]); SQRADDAC(a[10], a[30]); SQRADDAC(a[11], a[29]); SQRADDAC(a[12], a[28]); SQRADDAC(a[13], a[27]); SQRADDAC(a[14], a[26]); SQRADDAC(a[15], a[25]); SQRADDAC(a[16], a[24]); SQRADDAC(a[17], a[23]); SQRADDAC(a[18], a[22]); SQRADDAC(a[19], a[21]); SQRADDDB; SQRADD(a[20], a[20]); + COMBA_STORE(b[40]); + + /* output 41 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[41]); SQRADDAC(a[1], a[40]); SQRADDAC(a[2], a[39]); SQRADDAC(a[3], a[38]); SQRADDAC(a[4], a[37]); SQRADDAC(a[5], a[36]); SQRADDAC(a[6], a[35]); SQRADDAC(a[7], a[34]); SQRADDAC(a[8], a[33]); SQRADDAC(a[9], a[32]); SQRADDAC(a[10], a[31]); SQRADDAC(a[11], a[30]); SQRADDAC(a[12], a[29]); SQRADDAC(a[13], a[28]); SQRADDAC(a[14], a[27]); SQRADDAC(a[15], a[26]); SQRADDAC(a[16], a[25]); SQRADDAC(a[17], a[24]); SQRADDAC(a[18], a[23]); SQRADDAC(a[19], a[22]); SQRADDAC(a[20], a[21]); SQRADDDB; + COMBA_STORE(b[41]); + + /* output 42 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[42]); SQRADDAC(a[1], a[41]); SQRADDAC(a[2], a[40]); SQRADDAC(a[3], a[39]); SQRADDAC(a[4], a[38]); SQRADDAC(a[5], a[37]); SQRADDAC(a[6], a[36]); SQRADDAC(a[7], a[35]); SQRADDAC(a[8], a[34]); SQRADDAC(a[9], a[33]); SQRADDAC(a[10], a[32]); SQRADDAC(a[11], a[31]); SQRADDAC(a[12], a[30]); SQRADDAC(a[13], a[29]); SQRADDAC(a[14], a[28]); SQRADDAC(a[15], a[27]); SQRADDAC(a[16], a[26]); SQRADDAC(a[17], a[25]); SQRADDAC(a[18], a[24]); SQRADDAC(a[19], a[23]); SQRADDAC(a[20], a[22]); SQRADDDB; SQRADD(a[21], a[21]); + COMBA_STORE(b[42]); + + /* output 43 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[43]); SQRADDAC(a[1], a[42]); SQRADDAC(a[2], a[41]); SQRADDAC(a[3], a[40]); SQRADDAC(a[4], a[39]); SQRADDAC(a[5], a[38]); SQRADDAC(a[6], a[37]); SQRADDAC(a[7], a[36]); SQRADDAC(a[8], a[35]); SQRADDAC(a[9], a[34]); SQRADDAC(a[10], a[33]); SQRADDAC(a[11], a[32]); SQRADDAC(a[12], a[31]); SQRADDAC(a[13], a[30]); SQRADDAC(a[14], a[29]); SQRADDAC(a[15], a[28]); SQRADDAC(a[16], a[27]); SQRADDAC(a[17], a[26]); SQRADDAC(a[18], a[25]); SQRADDAC(a[19], a[24]); SQRADDAC(a[20], a[23]); SQRADDAC(a[21], a[22]); SQRADDDB; + COMBA_STORE(b[43]); + + /* output 44 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[44]); SQRADDAC(a[1], a[43]); SQRADDAC(a[2], a[42]); SQRADDAC(a[3], a[41]); SQRADDAC(a[4], a[40]); SQRADDAC(a[5], a[39]); SQRADDAC(a[6], a[38]); SQRADDAC(a[7], a[37]); SQRADDAC(a[8], a[36]); SQRADDAC(a[9], a[35]); SQRADDAC(a[10], a[34]); SQRADDAC(a[11], a[33]); SQRADDAC(a[12], a[32]); SQRADDAC(a[13], a[31]); SQRADDAC(a[14], a[30]); SQRADDAC(a[15], a[29]); SQRADDAC(a[16], a[28]); SQRADDAC(a[17], a[27]); SQRADDAC(a[18], a[26]); SQRADDAC(a[19], a[25]); SQRADDAC(a[20], a[24]); SQRADDAC(a[21], a[23]); SQRADDDB; SQRADD(a[22], a[22]); + COMBA_STORE(b[44]); + + /* output 45 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[45]); SQRADDAC(a[1], a[44]); SQRADDAC(a[2], a[43]); SQRADDAC(a[3], a[42]); SQRADDAC(a[4], a[41]); SQRADDAC(a[5], a[40]); SQRADDAC(a[6], a[39]); SQRADDAC(a[7], a[38]); SQRADDAC(a[8], a[37]); SQRADDAC(a[9], a[36]); SQRADDAC(a[10], a[35]); SQRADDAC(a[11], a[34]); SQRADDAC(a[12], a[33]); SQRADDAC(a[13], a[32]); SQRADDAC(a[14], a[31]); SQRADDAC(a[15], a[30]); SQRADDAC(a[16], a[29]); SQRADDAC(a[17], a[28]); SQRADDAC(a[18], a[27]); SQRADDAC(a[19], a[26]); SQRADDAC(a[20], a[25]); SQRADDAC(a[21], a[24]); SQRADDAC(a[22], a[23]); SQRADDDB; + COMBA_STORE(b[45]); + + /* output 46 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[46]); SQRADDAC(a[1], a[45]); SQRADDAC(a[2], a[44]); SQRADDAC(a[3], a[43]); SQRADDAC(a[4], a[42]); SQRADDAC(a[5], a[41]); SQRADDAC(a[6], a[40]); SQRADDAC(a[7], a[39]); SQRADDAC(a[8], a[38]); SQRADDAC(a[9], a[37]); SQRADDAC(a[10], a[36]); SQRADDAC(a[11], a[35]); SQRADDAC(a[12], a[34]); SQRADDAC(a[13], a[33]); SQRADDAC(a[14], a[32]); SQRADDAC(a[15], a[31]); SQRADDAC(a[16], a[30]); SQRADDAC(a[17], a[29]); SQRADDAC(a[18], a[28]); SQRADDAC(a[19], a[27]); SQRADDAC(a[20], a[26]); SQRADDAC(a[21], a[25]); SQRADDAC(a[22], a[24]); SQRADDDB; SQRADD(a[23], a[23]); + COMBA_STORE(b[46]); + + /* output 47 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[47]); SQRADDAC(a[1], a[46]); SQRADDAC(a[2], a[45]); SQRADDAC(a[3], a[44]); SQRADDAC(a[4], a[43]); SQRADDAC(a[5], a[42]); SQRADDAC(a[6], a[41]); SQRADDAC(a[7], a[40]); SQRADDAC(a[8], a[39]); SQRADDAC(a[9], a[38]); SQRADDAC(a[10], a[37]); SQRADDAC(a[11], a[36]); SQRADDAC(a[12], a[35]); SQRADDAC(a[13], a[34]); SQRADDAC(a[14], a[33]); SQRADDAC(a[15], a[32]); SQRADDAC(a[16], a[31]); SQRADDAC(a[17], a[30]); SQRADDAC(a[18], a[29]); SQRADDAC(a[19], a[28]); SQRADDAC(a[20], a[27]); SQRADDAC(a[21], a[26]); SQRADDAC(a[22], a[25]); SQRADDAC(a[23], a[24]); SQRADDDB; + COMBA_STORE(b[47]); + + /* output 48 */ + CARRY_FORWARD; + SQRADDSC(a[1], a[47]); SQRADDAC(a[2], a[46]); SQRADDAC(a[3], a[45]); SQRADDAC(a[4], a[44]); SQRADDAC(a[5], a[43]); SQRADDAC(a[6], a[42]); SQRADDAC(a[7], a[41]); SQRADDAC(a[8], a[40]); SQRADDAC(a[9], a[39]); SQRADDAC(a[10], a[38]); SQRADDAC(a[11], a[37]); SQRADDAC(a[12], a[36]); SQRADDAC(a[13], a[35]); SQRADDAC(a[14], a[34]); SQRADDAC(a[15], a[33]); SQRADDAC(a[16], a[32]); SQRADDAC(a[17], a[31]); SQRADDAC(a[18], a[30]); SQRADDAC(a[19], a[29]); SQRADDAC(a[20], a[28]); SQRADDAC(a[21], a[27]); SQRADDAC(a[22], a[26]); SQRADDAC(a[23], a[25]); SQRADDDB; SQRADD(a[24], a[24]); + COMBA_STORE(b[48]); + + /* output 49 */ + CARRY_FORWARD; + SQRADDSC(a[2], a[47]); SQRADDAC(a[3], a[46]); SQRADDAC(a[4], a[45]); SQRADDAC(a[5], a[44]); SQRADDAC(a[6], a[43]); SQRADDAC(a[7], a[42]); SQRADDAC(a[8], a[41]); SQRADDAC(a[9], a[40]); SQRADDAC(a[10], a[39]); SQRADDAC(a[11], a[38]); SQRADDAC(a[12], a[37]); SQRADDAC(a[13], a[36]); SQRADDAC(a[14], a[35]); SQRADDAC(a[15], a[34]); SQRADDAC(a[16], a[33]); SQRADDAC(a[17], a[32]); SQRADDAC(a[18], a[31]); SQRADDAC(a[19], a[30]); SQRADDAC(a[20], a[29]); SQRADDAC(a[21], a[28]); SQRADDAC(a[22], a[27]); SQRADDAC(a[23], a[26]); SQRADDAC(a[24], a[25]); SQRADDDB; + COMBA_STORE(b[49]); + + /* output 50 */ + CARRY_FORWARD; + SQRADDSC(a[3], a[47]); SQRADDAC(a[4], a[46]); SQRADDAC(a[5], a[45]); SQRADDAC(a[6], a[44]); SQRADDAC(a[7], a[43]); SQRADDAC(a[8], a[42]); SQRADDAC(a[9], a[41]); SQRADDAC(a[10], a[40]); SQRADDAC(a[11], a[39]); SQRADDAC(a[12], a[38]); SQRADDAC(a[13], a[37]); SQRADDAC(a[14], a[36]); SQRADDAC(a[15], a[35]); SQRADDAC(a[16], a[34]); SQRADDAC(a[17], a[33]); SQRADDAC(a[18], a[32]); SQRADDAC(a[19], a[31]); SQRADDAC(a[20], a[30]); SQRADDAC(a[21], a[29]); SQRADDAC(a[22], a[28]); SQRADDAC(a[23], a[27]); SQRADDAC(a[24], a[26]); SQRADDDB; SQRADD(a[25], a[25]); + COMBA_STORE(b[50]); + + /* output 51 */ + CARRY_FORWARD; + SQRADDSC(a[4], a[47]); SQRADDAC(a[5], a[46]); SQRADDAC(a[6], a[45]); SQRADDAC(a[7], a[44]); SQRADDAC(a[8], a[43]); SQRADDAC(a[9], a[42]); SQRADDAC(a[10], a[41]); SQRADDAC(a[11], a[40]); SQRADDAC(a[12], a[39]); SQRADDAC(a[13], a[38]); SQRADDAC(a[14], a[37]); SQRADDAC(a[15], a[36]); SQRADDAC(a[16], a[35]); SQRADDAC(a[17], a[34]); SQRADDAC(a[18], a[33]); SQRADDAC(a[19], a[32]); SQRADDAC(a[20], a[31]); SQRADDAC(a[21], a[30]); SQRADDAC(a[22], a[29]); SQRADDAC(a[23], a[28]); SQRADDAC(a[24], a[27]); SQRADDAC(a[25], a[26]); SQRADDDB; + COMBA_STORE(b[51]); + + /* output 52 */ + CARRY_FORWARD; + SQRADDSC(a[5], a[47]); SQRADDAC(a[6], a[46]); SQRADDAC(a[7], a[45]); SQRADDAC(a[8], a[44]); SQRADDAC(a[9], a[43]); SQRADDAC(a[10], a[42]); SQRADDAC(a[11], a[41]); SQRADDAC(a[12], a[40]); SQRADDAC(a[13], a[39]); SQRADDAC(a[14], a[38]); SQRADDAC(a[15], a[37]); SQRADDAC(a[16], a[36]); SQRADDAC(a[17], a[35]); SQRADDAC(a[18], a[34]); SQRADDAC(a[19], a[33]); SQRADDAC(a[20], a[32]); SQRADDAC(a[21], a[31]); SQRADDAC(a[22], a[30]); SQRADDAC(a[23], a[29]); SQRADDAC(a[24], a[28]); SQRADDAC(a[25], a[27]); SQRADDDB; SQRADD(a[26], a[26]); + COMBA_STORE(b[52]); + + /* output 53 */ + CARRY_FORWARD; + SQRADDSC(a[6], a[47]); SQRADDAC(a[7], a[46]); SQRADDAC(a[8], a[45]); SQRADDAC(a[9], a[44]); SQRADDAC(a[10], a[43]); SQRADDAC(a[11], a[42]); SQRADDAC(a[12], a[41]); SQRADDAC(a[13], a[40]); SQRADDAC(a[14], a[39]); SQRADDAC(a[15], a[38]); SQRADDAC(a[16], a[37]); SQRADDAC(a[17], a[36]); SQRADDAC(a[18], a[35]); SQRADDAC(a[19], a[34]); SQRADDAC(a[20], a[33]); SQRADDAC(a[21], a[32]); SQRADDAC(a[22], a[31]); SQRADDAC(a[23], a[30]); SQRADDAC(a[24], a[29]); SQRADDAC(a[25], a[28]); SQRADDAC(a[26], a[27]); SQRADDDB; + COMBA_STORE(b[53]); + + /* output 54 */ + CARRY_FORWARD; + SQRADDSC(a[7], a[47]); SQRADDAC(a[8], a[46]); SQRADDAC(a[9], a[45]); SQRADDAC(a[10], a[44]); SQRADDAC(a[11], a[43]); SQRADDAC(a[12], a[42]); SQRADDAC(a[13], a[41]); SQRADDAC(a[14], a[40]); SQRADDAC(a[15], a[39]); SQRADDAC(a[16], a[38]); SQRADDAC(a[17], a[37]); SQRADDAC(a[18], a[36]); SQRADDAC(a[19], a[35]); SQRADDAC(a[20], a[34]); SQRADDAC(a[21], a[33]); SQRADDAC(a[22], a[32]); SQRADDAC(a[23], a[31]); SQRADDAC(a[24], a[30]); SQRADDAC(a[25], a[29]); SQRADDAC(a[26], a[28]); SQRADDDB; SQRADD(a[27], a[27]); + COMBA_STORE(b[54]); + + /* output 55 */ + CARRY_FORWARD; + SQRADDSC(a[8], a[47]); SQRADDAC(a[9], a[46]); SQRADDAC(a[10], a[45]); SQRADDAC(a[11], a[44]); SQRADDAC(a[12], a[43]); SQRADDAC(a[13], a[42]); SQRADDAC(a[14], a[41]); SQRADDAC(a[15], a[40]); SQRADDAC(a[16], a[39]); SQRADDAC(a[17], a[38]); SQRADDAC(a[18], a[37]); SQRADDAC(a[19], a[36]); SQRADDAC(a[20], a[35]); SQRADDAC(a[21], a[34]); SQRADDAC(a[22], a[33]); SQRADDAC(a[23], a[32]); SQRADDAC(a[24], a[31]); SQRADDAC(a[25], a[30]); SQRADDAC(a[26], a[29]); SQRADDAC(a[27], a[28]); SQRADDDB; + COMBA_STORE(b[55]); + + /* output 56 */ + CARRY_FORWARD; + SQRADDSC(a[9], a[47]); SQRADDAC(a[10], a[46]); SQRADDAC(a[11], a[45]); SQRADDAC(a[12], a[44]); SQRADDAC(a[13], a[43]); SQRADDAC(a[14], a[42]); SQRADDAC(a[15], a[41]); SQRADDAC(a[16], a[40]); SQRADDAC(a[17], a[39]); SQRADDAC(a[18], a[38]); SQRADDAC(a[19], a[37]); SQRADDAC(a[20], a[36]); SQRADDAC(a[21], a[35]); SQRADDAC(a[22], a[34]); SQRADDAC(a[23], a[33]); SQRADDAC(a[24], a[32]); SQRADDAC(a[25], a[31]); SQRADDAC(a[26], a[30]); SQRADDAC(a[27], a[29]); SQRADDDB; SQRADD(a[28], a[28]); + COMBA_STORE(b[56]); + + /* output 57 */ + CARRY_FORWARD; + SQRADDSC(a[10], a[47]); SQRADDAC(a[11], a[46]); SQRADDAC(a[12], a[45]); SQRADDAC(a[13], a[44]); SQRADDAC(a[14], a[43]); SQRADDAC(a[15], a[42]); SQRADDAC(a[16], a[41]); SQRADDAC(a[17], a[40]); SQRADDAC(a[18], a[39]); SQRADDAC(a[19], a[38]); SQRADDAC(a[20], a[37]); SQRADDAC(a[21], a[36]); SQRADDAC(a[22], a[35]); SQRADDAC(a[23], a[34]); SQRADDAC(a[24], a[33]); SQRADDAC(a[25], a[32]); SQRADDAC(a[26], a[31]); SQRADDAC(a[27], a[30]); SQRADDAC(a[28], a[29]); SQRADDDB; + COMBA_STORE(b[57]); + + /* output 58 */ + CARRY_FORWARD; + SQRADDSC(a[11], a[47]); SQRADDAC(a[12], a[46]); SQRADDAC(a[13], a[45]); SQRADDAC(a[14], a[44]); SQRADDAC(a[15], a[43]); SQRADDAC(a[16], a[42]); SQRADDAC(a[17], a[41]); SQRADDAC(a[18], a[40]); SQRADDAC(a[19], a[39]); SQRADDAC(a[20], a[38]); SQRADDAC(a[21], a[37]); SQRADDAC(a[22], a[36]); SQRADDAC(a[23], a[35]); SQRADDAC(a[24], a[34]); SQRADDAC(a[25], a[33]); SQRADDAC(a[26], a[32]); SQRADDAC(a[27], a[31]); SQRADDAC(a[28], a[30]); SQRADDDB; SQRADD(a[29], a[29]); + COMBA_STORE(b[58]); + + /* output 59 */ + CARRY_FORWARD; + SQRADDSC(a[12], a[47]); SQRADDAC(a[13], a[46]); SQRADDAC(a[14], a[45]); SQRADDAC(a[15], a[44]); SQRADDAC(a[16], a[43]); SQRADDAC(a[17], a[42]); SQRADDAC(a[18], a[41]); SQRADDAC(a[19], a[40]); SQRADDAC(a[20], a[39]); SQRADDAC(a[21], a[38]); SQRADDAC(a[22], a[37]); SQRADDAC(a[23], a[36]); SQRADDAC(a[24], a[35]); SQRADDAC(a[25], a[34]); SQRADDAC(a[26], a[33]); SQRADDAC(a[27], a[32]); SQRADDAC(a[28], a[31]); SQRADDAC(a[29], a[30]); SQRADDDB; + COMBA_STORE(b[59]); + + /* output 60 */ + CARRY_FORWARD; + SQRADDSC(a[13], a[47]); SQRADDAC(a[14], a[46]); SQRADDAC(a[15], a[45]); SQRADDAC(a[16], a[44]); SQRADDAC(a[17], a[43]); SQRADDAC(a[18], a[42]); SQRADDAC(a[19], a[41]); SQRADDAC(a[20], a[40]); SQRADDAC(a[21], a[39]); SQRADDAC(a[22], a[38]); SQRADDAC(a[23], a[37]); SQRADDAC(a[24], a[36]); SQRADDAC(a[25], a[35]); SQRADDAC(a[26], a[34]); SQRADDAC(a[27], a[33]); SQRADDAC(a[28], a[32]); SQRADDAC(a[29], a[31]); SQRADDDB; SQRADD(a[30], a[30]); + COMBA_STORE(b[60]); + + /* output 61 */ + CARRY_FORWARD; + SQRADDSC(a[14], a[47]); SQRADDAC(a[15], a[46]); SQRADDAC(a[16], a[45]); SQRADDAC(a[17], a[44]); SQRADDAC(a[18], a[43]); SQRADDAC(a[19], a[42]); SQRADDAC(a[20], a[41]); SQRADDAC(a[21], a[40]); SQRADDAC(a[22], a[39]); SQRADDAC(a[23], a[38]); SQRADDAC(a[24], a[37]); SQRADDAC(a[25], a[36]); SQRADDAC(a[26], a[35]); SQRADDAC(a[27], a[34]); SQRADDAC(a[28], a[33]); SQRADDAC(a[29], a[32]); SQRADDAC(a[30], a[31]); SQRADDDB; + COMBA_STORE(b[61]); + + /* output 62 */ + CARRY_FORWARD; + SQRADDSC(a[15], a[47]); SQRADDAC(a[16], a[46]); SQRADDAC(a[17], a[45]); SQRADDAC(a[18], a[44]); SQRADDAC(a[19], a[43]); SQRADDAC(a[20], a[42]); SQRADDAC(a[21], a[41]); SQRADDAC(a[22], a[40]); SQRADDAC(a[23], a[39]); SQRADDAC(a[24], a[38]); SQRADDAC(a[25], a[37]); SQRADDAC(a[26], a[36]); SQRADDAC(a[27], a[35]); SQRADDAC(a[28], a[34]); SQRADDAC(a[29], a[33]); SQRADDAC(a[30], a[32]); SQRADDDB; SQRADD(a[31], a[31]); + COMBA_STORE(b[62]); + + /* output 63 */ + CARRY_FORWARD; + SQRADDSC(a[16], a[47]); SQRADDAC(a[17], a[46]); SQRADDAC(a[18], a[45]); SQRADDAC(a[19], a[44]); SQRADDAC(a[20], a[43]); SQRADDAC(a[21], a[42]); SQRADDAC(a[22], a[41]); SQRADDAC(a[23], a[40]); SQRADDAC(a[24], a[39]); SQRADDAC(a[25], a[38]); SQRADDAC(a[26], a[37]); SQRADDAC(a[27], a[36]); SQRADDAC(a[28], a[35]); SQRADDAC(a[29], a[34]); SQRADDAC(a[30], a[33]); SQRADDAC(a[31], a[32]); SQRADDDB; + COMBA_STORE(b[63]); + + /* output 64 */ + CARRY_FORWARD; + SQRADDSC(a[17], a[47]); SQRADDAC(a[18], a[46]); SQRADDAC(a[19], a[45]); SQRADDAC(a[20], a[44]); SQRADDAC(a[21], a[43]); SQRADDAC(a[22], a[42]); SQRADDAC(a[23], a[41]); SQRADDAC(a[24], a[40]); SQRADDAC(a[25], a[39]); SQRADDAC(a[26], a[38]); SQRADDAC(a[27], a[37]); SQRADDAC(a[28], a[36]); SQRADDAC(a[29], a[35]); SQRADDAC(a[30], a[34]); SQRADDAC(a[31], a[33]); SQRADDDB; SQRADD(a[32], a[32]); + COMBA_STORE(b[64]); + + /* output 65 */ + CARRY_FORWARD; + SQRADDSC(a[18], a[47]); SQRADDAC(a[19], a[46]); SQRADDAC(a[20], a[45]); SQRADDAC(a[21], a[44]); SQRADDAC(a[22], a[43]); SQRADDAC(a[23], a[42]); SQRADDAC(a[24], a[41]); SQRADDAC(a[25], a[40]); SQRADDAC(a[26], a[39]); SQRADDAC(a[27], a[38]); SQRADDAC(a[28], a[37]); SQRADDAC(a[29], a[36]); SQRADDAC(a[30], a[35]); SQRADDAC(a[31], a[34]); SQRADDAC(a[32], a[33]); SQRADDDB; + COMBA_STORE(b[65]); + + /* output 66 */ + CARRY_FORWARD; + SQRADDSC(a[19], a[47]); SQRADDAC(a[20], a[46]); SQRADDAC(a[21], a[45]); SQRADDAC(a[22], a[44]); SQRADDAC(a[23], a[43]); SQRADDAC(a[24], a[42]); SQRADDAC(a[25], a[41]); SQRADDAC(a[26], a[40]); SQRADDAC(a[27], a[39]); SQRADDAC(a[28], a[38]); SQRADDAC(a[29], a[37]); SQRADDAC(a[30], a[36]); SQRADDAC(a[31], a[35]); SQRADDAC(a[32], a[34]); SQRADDDB; SQRADD(a[33], a[33]); + COMBA_STORE(b[66]); + + /* output 67 */ + CARRY_FORWARD; + SQRADDSC(a[20], a[47]); SQRADDAC(a[21], a[46]); SQRADDAC(a[22], a[45]); SQRADDAC(a[23], a[44]); SQRADDAC(a[24], a[43]); SQRADDAC(a[25], a[42]); SQRADDAC(a[26], a[41]); SQRADDAC(a[27], a[40]); SQRADDAC(a[28], a[39]); SQRADDAC(a[29], a[38]); SQRADDAC(a[30], a[37]); SQRADDAC(a[31], a[36]); SQRADDAC(a[32], a[35]); SQRADDAC(a[33], a[34]); SQRADDDB; + COMBA_STORE(b[67]); + + /* output 68 */ + CARRY_FORWARD; + SQRADDSC(a[21], a[47]); SQRADDAC(a[22], a[46]); SQRADDAC(a[23], a[45]); SQRADDAC(a[24], a[44]); SQRADDAC(a[25], a[43]); SQRADDAC(a[26], a[42]); SQRADDAC(a[27], a[41]); SQRADDAC(a[28], a[40]); SQRADDAC(a[29], a[39]); SQRADDAC(a[30], a[38]); SQRADDAC(a[31], a[37]); SQRADDAC(a[32], a[36]); SQRADDAC(a[33], a[35]); SQRADDDB; SQRADD(a[34], a[34]); + COMBA_STORE(b[68]); + + /* output 69 */ + CARRY_FORWARD; + SQRADDSC(a[22], a[47]); SQRADDAC(a[23], a[46]); SQRADDAC(a[24], a[45]); SQRADDAC(a[25], a[44]); SQRADDAC(a[26], a[43]); SQRADDAC(a[27], a[42]); SQRADDAC(a[28], a[41]); SQRADDAC(a[29], a[40]); SQRADDAC(a[30], a[39]); SQRADDAC(a[31], a[38]); SQRADDAC(a[32], a[37]); SQRADDAC(a[33], a[36]); SQRADDAC(a[34], a[35]); SQRADDDB; + COMBA_STORE(b[69]); + + /* output 70 */ + CARRY_FORWARD; + SQRADDSC(a[23], a[47]); SQRADDAC(a[24], a[46]); SQRADDAC(a[25], a[45]); SQRADDAC(a[26], a[44]); SQRADDAC(a[27], a[43]); SQRADDAC(a[28], a[42]); SQRADDAC(a[29], a[41]); SQRADDAC(a[30], a[40]); SQRADDAC(a[31], a[39]); SQRADDAC(a[32], a[38]); SQRADDAC(a[33], a[37]); SQRADDAC(a[34], a[36]); SQRADDDB; SQRADD(a[35], a[35]); + COMBA_STORE(b[70]); + + /* output 71 */ + CARRY_FORWARD; + SQRADDSC(a[24], a[47]); SQRADDAC(a[25], a[46]); SQRADDAC(a[26], a[45]); SQRADDAC(a[27], a[44]); SQRADDAC(a[28], a[43]); SQRADDAC(a[29], a[42]); SQRADDAC(a[30], a[41]); SQRADDAC(a[31], a[40]); SQRADDAC(a[32], a[39]); SQRADDAC(a[33], a[38]); SQRADDAC(a[34], a[37]); SQRADDAC(a[35], a[36]); SQRADDDB; + COMBA_STORE(b[71]); + + /* output 72 */ + CARRY_FORWARD; + SQRADDSC(a[25], a[47]); SQRADDAC(a[26], a[46]); SQRADDAC(a[27], a[45]); SQRADDAC(a[28], a[44]); SQRADDAC(a[29], a[43]); SQRADDAC(a[30], a[42]); SQRADDAC(a[31], a[41]); SQRADDAC(a[32], a[40]); SQRADDAC(a[33], a[39]); SQRADDAC(a[34], a[38]); SQRADDAC(a[35], a[37]); SQRADDDB; SQRADD(a[36], a[36]); + COMBA_STORE(b[72]); + + /* output 73 */ + CARRY_FORWARD; + SQRADDSC(a[26], a[47]); SQRADDAC(a[27], a[46]); SQRADDAC(a[28], a[45]); SQRADDAC(a[29], a[44]); SQRADDAC(a[30], a[43]); SQRADDAC(a[31], a[42]); SQRADDAC(a[32], a[41]); SQRADDAC(a[33], a[40]); SQRADDAC(a[34], a[39]); SQRADDAC(a[35], a[38]); SQRADDAC(a[36], a[37]); SQRADDDB; + COMBA_STORE(b[73]); + + /* output 74 */ + CARRY_FORWARD; + SQRADDSC(a[27], a[47]); SQRADDAC(a[28], a[46]); SQRADDAC(a[29], a[45]); SQRADDAC(a[30], a[44]); SQRADDAC(a[31], a[43]); SQRADDAC(a[32], a[42]); SQRADDAC(a[33], a[41]); SQRADDAC(a[34], a[40]); SQRADDAC(a[35], a[39]); SQRADDAC(a[36], a[38]); SQRADDDB; SQRADD(a[37], a[37]); + COMBA_STORE(b[74]); + + /* output 75 */ + CARRY_FORWARD; + SQRADDSC(a[28], a[47]); SQRADDAC(a[29], a[46]); SQRADDAC(a[30], a[45]); SQRADDAC(a[31], a[44]); SQRADDAC(a[32], a[43]); SQRADDAC(a[33], a[42]); SQRADDAC(a[34], a[41]); SQRADDAC(a[35], a[40]); SQRADDAC(a[36], a[39]); SQRADDAC(a[37], a[38]); SQRADDDB; + COMBA_STORE(b[75]); + + /* output 76 */ + CARRY_FORWARD; + SQRADDSC(a[29], a[47]); SQRADDAC(a[30], a[46]); SQRADDAC(a[31], a[45]); SQRADDAC(a[32], a[44]); SQRADDAC(a[33], a[43]); SQRADDAC(a[34], a[42]); SQRADDAC(a[35], a[41]); SQRADDAC(a[36], a[40]); SQRADDAC(a[37], a[39]); SQRADDDB; SQRADD(a[38], a[38]); + COMBA_STORE(b[76]); + + /* output 77 */ + CARRY_FORWARD; + SQRADDSC(a[30], a[47]); SQRADDAC(a[31], a[46]); SQRADDAC(a[32], a[45]); SQRADDAC(a[33], a[44]); SQRADDAC(a[34], a[43]); SQRADDAC(a[35], a[42]); SQRADDAC(a[36], a[41]); SQRADDAC(a[37], a[40]); SQRADDAC(a[38], a[39]); SQRADDDB; + COMBA_STORE(b[77]); + + /* output 78 */ + CARRY_FORWARD; + SQRADDSC(a[31], a[47]); SQRADDAC(a[32], a[46]); SQRADDAC(a[33], a[45]); SQRADDAC(a[34], a[44]); SQRADDAC(a[35], a[43]); SQRADDAC(a[36], a[42]); SQRADDAC(a[37], a[41]); SQRADDAC(a[38], a[40]); SQRADDDB; SQRADD(a[39], a[39]); + COMBA_STORE(b[78]); + + /* output 79 */ + CARRY_FORWARD; + SQRADDSC(a[32], a[47]); SQRADDAC(a[33], a[46]); SQRADDAC(a[34], a[45]); SQRADDAC(a[35], a[44]); SQRADDAC(a[36], a[43]); SQRADDAC(a[37], a[42]); SQRADDAC(a[38], a[41]); SQRADDAC(a[39], a[40]); SQRADDDB; + COMBA_STORE(b[79]); + + /* output 80 */ + CARRY_FORWARD; + SQRADDSC(a[33], a[47]); SQRADDAC(a[34], a[46]); SQRADDAC(a[35], a[45]); SQRADDAC(a[36], a[44]); SQRADDAC(a[37], a[43]); SQRADDAC(a[38], a[42]); SQRADDAC(a[39], a[41]); SQRADDDB; SQRADD(a[40], a[40]); + COMBA_STORE(b[80]); + + /* output 81 */ + CARRY_FORWARD; + SQRADDSC(a[34], a[47]); SQRADDAC(a[35], a[46]); SQRADDAC(a[36], a[45]); SQRADDAC(a[37], a[44]); SQRADDAC(a[38], a[43]); SQRADDAC(a[39], a[42]); SQRADDAC(a[40], a[41]); SQRADDDB; + COMBA_STORE(b[81]); + + /* output 82 */ + CARRY_FORWARD; + SQRADDSC(a[35], a[47]); SQRADDAC(a[36], a[46]); SQRADDAC(a[37], a[45]); SQRADDAC(a[38], a[44]); SQRADDAC(a[39], a[43]); SQRADDAC(a[40], a[42]); SQRADDDB; SQRADD(a[41], a[41]); + COMBA_STORE(b[82]); + + /* output 83 */ + CARRY_FORWARD; + SQRADDSC(a[36], a[47]); SQRADDAC(a[37], a[46]); SQRADDAC(a[38], a[45]); SQRADDAC(a[39], a[44]); SQRADDAC(a[40], a[43]); SQRADDAC(a[41], a[42]); SQRADDDB; + COMBA_STORE(b[83]); + + /* output 84 */ + CARRY_FORWARD; + SQRADDSC(a[37], a[47]); SQRADDAC(a[38], a[46]); SQRADDAC(a[39], a[45]); SQRADDAC(a[40], a[44]); SQRADDAC(a[41], a[43]); SQRADDDB; SQRADD(a[42], a[42]); + COMBA_STORE(b[84]); + + /* output 85 */ + CARRY_FORWARD; + SQRADDSC(a[38], a[47]); SQRADDAC(a[39], a[46]); SQRADDAC(a[40], a[45]); SQRADDAC(a[41], a[44]); SQRADDAC(a[42], a[43]); SQRADDDB; + COMBA_STORE(b[85]); + + /* output 86 */ + CARRY_FORWARD; + SQRADDSC(a[39], a[47]); SQRADDAC(a[40], a[46]); SQRADDAC(a[41], a[45]); SQRADDAC(a[42], a[44]); SQRADDDB; SQRADD(a[43], a[43]); + COMBA_STORE(b[86]); + + /* output 87 */ + CARRY_FORWARD; + SQRADDSC(a[40], a[47]); SQRADDAC(a[41], a[46]); SQRADDAC(a[42], a[45]); SQRADDAC(a[43], a[44]); SQRADDDB; + COMBA_STORE(b[87]); + + /* output 88 */ + CARRY_FORWARD; + SQRADDSC(a[41], a[47]); SQRADDAC(a[42], a[46]); SQRADDAC(a[43], a[45]); SQRADDDB; SQRADD(a[44], a[44]); + COMBA_STORE(b[88]); + + /* output 89 */ + CARRY_FORWARD; + SQRADDSC(a[42], a[47]); SQRADDAC(a[43], a[46]); SQRADDAC(a[44], a[45]); SQRADDDB; + COMBA_STORE(b[89]); + + /* output 90 */ + CARRY_FORWARD; + SQRADD2(a[43], a[47]); SQRADD2(a[44], a[46]); SQRADD(a[45], a[45]); + COMBA_STORE(b[90]); + + /* output 91 */ + CARRY_FORWARD; + SQRADD2(a[44], a[47]); SQRADD2(a[45], a[46]); + COMBA_STORE(b[91]); + + /* output 92 */ + CARRY_FORWARD; + SQRADD2(a[45], a[47]); SQRADD(a[46], a[46]); + COMBA_STORE(b[92]); + + /* output 93 */ + CARRY_FORWARD; + SQRADD2(a[46], a[47]); + COMBA_STORE(b[93]); + + /* output 94 */ + CARRY_FORWARD; + SQRADD(a[47], a[47]); + COMBA_STORE(b[94]); + COMBA_STORE2(b[95]); + COMBA_FINI; + + B->used = 96; + B->sign = FP_ZPOS; + XMEMCPY(B->dp, b, 96 * sizeof(fp_digit)); + fp_clamp(B); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return FP_OKAY; +} +#endif + + diff --git a/client/wolfssl/wolfcrypt/src/fp_sqr_comba_6.i b/client/wolfssl/wolfcrypt/src/fp_sqr_comba_6.i new file mode 100644 index 0000000..b364168 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fp_sqr_comba_6.i @@ -0,0 +1,117 @@ +/* fp_sqr_comba_6.i + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef TFM_SQR6 +int fp_sqr_comba6(fp_int *A, fp_int *B) +{ + fp_digit *a, c0, c1, c2, sc0 = 0, sc1 = 0, sc2 = 0; +#ifdef TFM_ISO + fp_word tt; +#endif +#ifndef WOLFSSL_SMALL_STACK + fp_digit b[12]; +#else + fp_digit *b; +#endif + +#ifdef WOLFSSL_SMALL_STACK + b = (fp_digit*)XMALLOC(sizeof(fp_digit) * 12, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (b == NULL) + return FP_MEM; +#endif + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADD2(a[1], a[5]); SQRADD2(a[2], a[4]); SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADD2(a[2], a[5]); SQRADD2(a[3], a[4]); + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADD2(a[3], a[5]); SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADD2(a[4], a[5]); + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + COMBA_STORE2(b[11]); + COMBA_FINI; + + B->used = 12; + B->sign = FP_ZPOS; + XMEMCPY(B->dp, b, 12 * sizeof(fp_digit)); + fp_clamp(B); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return FP_OKAY; +} +#endif + + diff --git a/client/wolfssl/wolfcrypt/src/fp_sqr_comba_64.i b/client/wolfssl/wolfcrypt/src/fp_sqr_comba_64.i new file mode 100644 index 0000000..b9b2c8a --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fp_sqr_comba_64.i @@ -0,0 +1,697 @@ +/* fp_sqr_comba_64.i + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef TFM_SQR64 +int fp_sqr_comba64(fp_int *A, fp_int *B) +{ + fp_digit *a, c0, c1, c2, sc0 = 0, sc1 = 0, sc2 = 0; +#ifdef TFM_ISO + fp_word tt; +#endif +#ifndef WOLFSSL_SMALL_STACK + fp_digit b[128]; +#else + fp_digit *b; +#endif + +#ifdef WOLFSSL_SMALL_STACK + b = (fp_digit*)XMALLOC(sizeof(fp_digit) * 128, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (b == NULL) + return FP_MEM; +#endif + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + + /* output 15 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; + COMBA_STORE(b[15]); + + /* output 16 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[16]); SQRADDAC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); + COMBA_STORE(b[16]); + + /* output 17 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[17]); SQRADDAC(a[1], a[16]); SQRADDAC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; + COMBA_STORE(b[17]); + + /* output 18 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[18]); SQRADDAC(a[1], a[17]); SQRADDAC(a[2], a[16]); SQRADDAC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]); + COMBA_STORE(b[18]); + + /* output 19 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[19]); SQRADDAC(a[1], a[18]); SQRADDAC(a[2], a[17]); SQRADDAC(a[3], a[16]); SQRADDAC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB; + COMBA_STORE(b[19]); + + /* output 20 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[20]); SQRADDAC(a[1], a[19]); SQRADDAC(a[2], a[18]); SQRADDAC(a[3], a[17]); SQRADDAC(a[4], a[16]); SQRADDAC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]); + COMBA_STORE(b[20]); + + /* output 21 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[21]); SQRADDAC(a[1], a[20]); SQRADDAC(a[2], a[19]); SQRADDAC(a[3], a[18]); SQRADDAC(a[4], a[17]); SQRADDAC(a[5], a[16]); SQRADDAC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB; + COMBA_STORE(b[21]); + + /* output 22 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[22]); SQRADDAC(a[1], a[21]); SQRADDAC(a[2], a[20]); SQRADDAC(a[3], a[19]); SQRADDAC(a[4], a[18]); SQRADDAC(a[5], a[17]); SQRADDAC(a[6], a[16]); SQRADDAC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]); + COMBA_STORE(b[22]); + + /* output 23 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[23]); SQRADDAC(a[1], a[22]); SQRADDAC(a[2], a[21]); SQRADDAC(a[3], a[20]); SQRADDAC(a[4], a[19]); SQRADDAC(a[5], a[18]); SQRADDAC(a[6], a[17]); SQRADDAC(a[7], a[16]); SQRADDAC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB; + COMBA_STORE(b[23]); + + /* output 24 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[24]); SQRADDAC(a[1], a[23]); SQRADDAC(a[2], a[22]); SQRADDAC(a[3], a[21]); SQRADDAC(a[4], a[20]); SQRADDAC(a[5], a[19]); SQRADDAC(a[6], a[18]); SQRADDAC(a[7], a[17]); SQRADDAC(a[8], a[16]); SQRADDAC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]); + COMBA_STORE(b[24]); + + /* output 25 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[25]); SQRADDAC(a[1], a[24]); SQRADDAC(a[2], a[23]); SQRADDAC(a[3], a[22]); SQRADDAC(a[4], a[21]); SQRADDAC(a[5], a[20]); SQRADDAC(a[6], a[19]); SQRADDAC(a[7], a[18]); SQRADDAC(a[8], a[17]); SQRADDAC(a[9], a[16]); SQRADDAC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB; + COMBA_STORE(b[25]); + + /* output 26 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[26]); SQRADDAC(a[1], a[25]); SQRADDAC(a[2], a[24]); SQRADDAC(a[3], a[23]); SQRADDAC(a[4], a[22]); SQRADDAC(a[5], a[21]); SQRADDAC(a[6], a[20]); SQRADDAC(a[7], a[19]); SQRADDAC(a[8], a[18]); SQRADDAC(a[9], a[17]); SQRADDAC(a[10], a[16]); SQRADDAC(a[11], a[15]); SQRADDAC(a[12], a[14]); SQRADDDB; SQRADD(a[13], a[13]); + COMBA_STORE(b[26]); + + /* output 27 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[27]); SQRADDAC(a[1], a[26]); SQRADDAC(a[2], a[25]); SQRADDAC(a[3], a[24]); SQRADDAC(a[4], a[23]); SQRADDAC(a[5], a[22]); SQRADDAC(a[6], a[21]); SQRADDAC(a[7], a[20]); SQRADDAC(a[8], a[19]); SQRADDAC(a[9], a[18]); SQRADDAC(a[10], a[17]); SQRADDAC(a[11], a[16]); SQRADDAC(a[12], a[15]); SQRADDAC(a[13], a[14]); SQRADDDB; + COMBA_STORE(b[27]); + + /* output 28 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[28]); SQRADDAC(a[1], a[27]); SQRADDAC(a[2], a[26]); SQRADDAC(a[3], a[25]); SQRADDAC(a[4], a[24]); SQRADDAC(a[5], a[23]); SQRADDAC(a[6], a[22]); SQRADDAC(a[7], a[21]); SQRADDAC(a[8], a[20]); SQRADDAC(a[9], a[19]); SQRADDAC(a[10], a[18]); SQRADDAC(a[11], a[17]); SQRADDAC(a[12], a[16]); SQRADDAC(a[13], a[15]); SQRADDDB; SQRADD(a[14], a[14]); + COMBA_STORE(b[28]); + + /* output 29 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[29]); SQRADDAC(a[1], a[28]); SQRADDAC(a[2], a[27]); SQRADDAC(a[3], a[26]); SQRADDAC(a[4], a[25]); SQRADDAC(a[5], a[24]); SQRADDAC(a[6], a[23]); SQRADDAC(a[7], a[22]); SQRADDAC(a[8], a[21]); SQRADDAC(a[9], a[20]); SQRADDAC(a[10], a[19]); SQRADDAC(a[11], a[18]); SQRADDAC(a[12], a[17]); SQRADDAC(a[13], a[16]); SQRADDAC(a[14], a[15]); SQRADDDB; + COMBA_STORE(b[29]); + + /* output 30 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[30]); SQRADDAC(a[1], a[29]); SQRADDAC(a[2], a[28]); SQRADDAC(a[3], a[27]); SQRADDAC(a[4], a[26]); SQRADDAC(a[5], a[25]); SQRADDAC(a[6], a[24]); SQRADDAC(a[7], a[23]); SQRADDAC(a[8], a[22]); SQRADDAC(a[9], a[21]); SQRADDAC(a[10], a[20]); SQRADDAC(a[11], a[19]); SQRADDAC(a[12], a[18]); SQRADDAC(a[13], a[17]); SQRADDAC(a[14], a[16]); SQRADDDB; SQRADD(a[15], a[15]); + COMBA_STORE(b[30]); + + /* output 31 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[31]); SQRADDAC(a[1], a[30]); SQRADDAC(a[2], a[29]); SQRADDAC(a[3], a[28]); SQRADDAC(a[4], a[27]); SQRADDAC(a[5], a[26]); SQRADDAC(a[6], a[25]); SQRADDAC(a[7], a[24]); SQRADDAC(a[8], a[23]); SQRADDAC(a[9], a[22]); SQRADDAC(a[10], a[21]); SQRADDAC(a[11], a[20]); SQRADDAC(a[12], a[19]); SQRADDAC(a[13], a[18]); SQRADDAC(a[14], a[17]); SQRADDAC(a[15], a[16]); SQRADDDB; + COMBA_STORE(b[31]); + + /* output 32 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[32]); SQRADDAC(a[1], a[31]); SQRADDAC(a[2], a[30]); SQRADDAC(a[3], a[29]); SQRADDAC(a[4], a[28]); SQRADDAC(a[5], a[27]); SQRADDAC(a[6], a[26]); SQRADDAC(a[7], a[25]); SQRADDAC(a[8], a[24]); SQRADDAC(a[9], a[23]); SQRADDAC(a[10], a[22]); SQRADDAC(a[11], a[21]); SQRADDAC(a[12], a[20]); SQRADDAC(a[13], a[19]); SQRADDAC(a[14], a[18]); SQRADDAC(a[15], a[17]); SQRADDDB; SQRADD(a[16], a[16]); + COMBA_STORE(b[32]); + + /* output 33 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[33]); SQRADDAC(a[1], a[32]); SQRADDAC(a[2], a[31]); SQRADDAC(a[3], a[30]); SQRADDAC(a[4], a[29]); SQRADDAC(a[5], a[28]); SQRADDAC(a[6], a[27]); SQRADDAC(a[7], a[26]); SQRADDAC(a[8], a[25]); SQRADDAC(a[9], a[24]); SQRADDAC(a[10], a[23]); SQRADDAC(a[11], a[22]); SQRADDAC(a[12], a[21]); SQRADDAC(a[13], a[20]); SQRADDAC(a[14], a[19]); SQRADDAC(a[15], a[18]); SQRADDAC(a[16], a[17]); SQRADDDB; + COMBA_STORE(b[33]); + + /* output 34 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[34]); SQRADDAC(a[1], a[33]); SQRADDAC(a[2], a[32]); SQRADDAC(a[3], a[31]); SQRADDAC(a[4], a[30]); SQRADDAC(a[5], a[29]); SQRADDAC(a[6], a[28]); SQRADDAC(a[7], a[27]); SQRADDAC(a[8], a[26]); SQRADDAC(a[9], a[25]); SQRADDAC(a[10], a[24]); SQRADDAC(a[11], a[23]); SQRADDAC(a[12], a[22]); SQRADDAC(a[13], a[21]); SQRADDAC(a[14], a[20]); SQRADDAC(a[15], a[19]); SQRADDAC(a[16], a[18]); SQRADDDB; SQRADD(a[17], a[17]); + COMBA_STORE(b[34]); + + /* output 35 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[35]); SQRADDAC(a[1], a[34]); SQRADDAC(a[2], a[33]); SQRADDAC(a[3], a[32]); SQRADDAC(a[4], a[31]); SQRADDAC(a[5], a[30]); SQRADDAC(a[6], a[29]); SQRADDAC(a[7], a[28]); SQRADDAC(a[8], a[27]); SQRADDAC(a[9], a[26]); SQRADDAC(a[10], a[25]); SQRADDAC(a[11], a[24]); SQRADDAC(a[12], a[23]); SQRADDAC(a[13], a[22]); SQRADDAC(a[14], a[21]); SQRADDAC(a[15], a[20]); SQRADDAC(a[16], a[19]); SQRADDAC(a[17], a[18]); SQRADDDB; + COMBA_STORE(b[35]); + + /* output 36 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[36]); SQRADDAC(a[1], a[35]); SQRADDAC(a[2], a[34]); SQRADDAC(a[3], a[33]); SQRADDAC(a[4], a[32]); SQRADDAC(a[5], a[31]); SQRADDAC(a[6], a[30]); SQRADDAC(a[7], a[29]); SQRADDAC(a[8], a[28]); SQRADDAC(a[9], a[27]); SQRADDAC(a[10], a[26]); SQRADDAC(a[11], a[25]); SQRADDAC(a[12], a[24]); SQRADDAC(a[13], a[23]); SQRADDAC(a[14], a[22]); SQRADDAC(a[15], a[21]); SQRADDAC(a[16], a[20]); SQRADDAC(a[17], a[19]); SQRADDDB; SQRADD(a[18], a[18]); + COMBA_STORE(b[36]); + + /* output 37 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[37]); SQRADDAC(a[1], a[36]); SQRADDAC(a[2], a[35]); SQRADDAC(a[3], a[34]); SQRADDAC(a[4], a[33]); SQRADDAC(a[5], a[32]); SQRADDAC(a[6], a[31]); SQRADDAC(a[7], a[30]); SQRADDAC(a[8], a[29]); SQRADDAC(a[9], a[28]); SQRADDAC(a[10], a[27]); SQRADDAC(a[11], a[26]); SQRADDAC(a[12], a[25]); SQRADDAC(a[13], a[24]); SQRADDAC(a[14], a[23]); SQRADDAC(a[15], a[22]); SQRADDAC(a[16], a[21]); SQRADDAC(a[17], a[20]); SQRADDAC(a[18], a[19]); SQRADDDB; + COMBA_STORE(b[37]); + + /* output 38 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[38]); SQRADDAC(a[1], a[37]); SQRADDAC(a[2], a[36]); SQRADDAC(a[3], a[35]); SQRADDAC(a[4], a[34]); SQRADDAC(a[5], a[33]); SQRADDAC(a[6], a[32]); SQRADDAC(a[7], a[31]); SQRADDAC(a[8], a[30]); SQRADDAC(a[9], a[29]); SQRADDAC(a[10], a[28]); SQRADDAC(a[11], a[27]); SQRADDAC(a[12], a[26]); SQRADDAC(a[13], a[25]); SQRADDAC(a[14], a[24]); SQRADDAC(a[15], a[23]); SQRADDAC(a[16], a[22]); SQRADDAC(a[17], a[21]); SQRADDAC(a[18], a[20]); SQRADDDB; SQRADD(a[19], a[19]); + COMBA_STORE(b[38]); + + /* output 39 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[39]); SQRADDAC(a[1], a[38]); SQRADDAC(a[2], a[37]); SQRADDAC(a[3], a[36]); SQRADDAC(a[4], a[35]); SQRADDAC(a[5], a[34]); SQRADDAC(a[6], a[33]); SQRADDAC(a[7], a[32]); SQRADDAC(a[8], a[31]); SQRADDAC(a[9], a[30]); SQRADDAC(a[10], a[29]); SQRADDAC(a[11], a[28]); SQRADDAC(a[12], a[27]); SQRADDAC(a[13], a[26]); SQRADDAC(a[14], a[25]); SQRADDAC(a[15], a[24]); SQRADDAC(a[16], a[23]); SQRADDAC(a[17], a[22]); SQRADDAC(a[18], a[21]); SQRADDAC(a[19], a[20]); SQRADDDB; + COMBA_STORE(b[39]); + + /* output 40 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[40]); SQRADDAC(a[1], a[39]); SQRADDAC(a[2], a[38]); SQRADDAC(a[3], a[37]); SQRADDAC(a[4], a[36]); SQRADDAC(a[5], a[35]); SQRADDAC(a[6], a[34]); SQRADDAC(a[7], a[33]); SQRADDAC(a[8], a[32]); SQRADDAC(a[9], a[31]); SQRADDAC(a[10], a[30]); SQRADDAC(a[11], a[29]); SQRADDAC(a[12], a[28]); SQRADDAC(a[13], a[27]); SQRADDAC(a[14], a[26]); SQRADDAC(a[15], a[25]); SQRADDAC(a[16], a[24]); SQRADDAC(a[17], a[23]); SQRADDAC(a[18], a[22]); SQRADDAC(a[19], a[21]); SQRADDDB; SQRADD(a[20], a[20]); + COMBA_STORE(b[40]); + + /* output 41 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[41]); SQRADDAC(a[1], a[40]); SQRADDAC(a[2], a[39]); SQRADDAC(a[3], a[38]); SQRADDAC(a[4], a[37]); SQRADDAC(a[5], a[36]); SQRADDAC(a[6], a[35]); SQRADDAC(a[7], a[34]); SQRADDAC(a[8], a[33]); SQRADDAC(a[9], a[32]); SQRADDAC(a[10], a[31]); SQRADDAC(a[11], a[30]); SQRADDAC(a[12], a[29]); SQRADDAC(a[13], a[28]); SQRADDAC(a[14], a[27]); SQRADDAC(a[15], a[26]); SQRADDAC(a[16], a[25]); SQRADDAC(a[17], a[24]); SQRADDAC(a[18], a[23]); SQRADDAC(a[19], a[22]); SQRADDAC(a[20], a[21]); SQRADDDB; + COMBA_STORE(b[41]); + + /* output 42 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[42]); SQRADDAC(a[1], a[41]); SQRADDAC(a[2], a[40]); SQRADDAC(a[3], a[39]); SQRADDAC(a[4], a[38]); SQRADDAC(a[5], a[37]); SQRADDAC(a[6], a[36]); SQRADDAC(a[7], a[35]); SQRADDAC(a[8], a[34]); SQRADDAC(a[9], a[33]); SQRADDAC(a[10], a[32]); SQRADDAC(a[11], a[31]); SQRADDAC(a[12], a[30]); SQRADDAC(a[13], a[29]); SQRADDAC(a[14], a[28]); SQRADDAC(a[15], a[27]); SQRADDAC(a[16], a[26]); SQRADDAC(a[17], a[25]); SQRADDAC(a[18], a[24]); SQRADDAC(a[19], a[23]); SQRADDAC(a[20], a[22]); SQRADDDB; SQRADD(a[21], a[21]); + COMBA_STORE(b[42]); + + /* output 43 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[43]); SQRADDAC(a[1], a[42]); SQRADDAC(a[2], a[41]); SQRADDAC(a[3], a[40]); SQRADDAC(a[4], a[39]); SQRADDAC(a[5], a[38]); SQRADDAC(a[6], a[37]); SQRADDAC(a[7], a[36]); SQRADDAC(a[8], a[35]); SQRADDAC(a[9], a[34]); SQRADDAC(a[10], a[33]); SQRADDAC(a[11], a[32]); SQRADDAC(a[12], a[31]); SQRADDAC(a[13], a[30]); SQRADDAC(a[14], a[29]); SQRADDAC(a[15], a[28]); SQRADDAC(a[16], a[27]); SQRADDAC(a[17], a[26]); SQRADDAC(a[18], a[25]); SQRADDAC(a[19], a[24]); SQRADDAC(a[20], a[23]); SQRADDAC(a[21], a[22]); SQRADDDB; + COMBA_STORE(b[43]); + + /* output 44 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[44]); SQRADDAC(a[1], a[43]); SQRADDAC(a[2], a[42]); SQRADDAC(a[3], a[41]); SQRADDAC(a[4], a[40]); SQRADDAC(a[5], a[39]); SQRADDAC(a[6], a[38]); SQRADDAC(a[7], a[37]); SQRADDAC(a[8], a[36]); SQRADDAC(a[9], a[35]); SQRADDAC(a[10], a[34]); SQRADDAC(a[11], a[33]); SQRADDAC(a[12], a[32]); SQRADDAC(a[13], a[31]); SQRADDAC(a[14], a[30]); SQRADDAC(a[15], a[29]); SQRADDAC(a[16], a[28]); SQRADDAC(a[17], a[27]); SQRADDAC(a[18], a[26]); SQRADDAC(a[19], a[25]); SQRADDAC(a[20], a[24]); SQRADDAC(a[21], a[23]); SQRADDDB; SQRADD(a[22], a[22]); + COMBA_STORE(b[44]); + + /* output 45 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[45]); SQRADDAC(a[1], a[44]); SQRADDAC(a[2], a[43]); SQRADDAC(a[3], a[42]); SQRADDAC(a[4], a[41]); SQRADDAC(a[5], a[40]); SQRADDAC(a[6], a[39]); SQRADDAC(a[7], a[38]); SQRADDAC(a[8], a[37]); SQRADDAC(a[9], a[36]); SQRADDAC(a[10], a[35]); SQRADDAC(a[11], a[34]); SQRADDAC(a[12], a[33]); SQRADDAC(a[13], a[32]); SQRADDAC(a[14], a[31]); SQRADDAC(a[15], a[30]); SQRADDAC(a[16], a[29]); SQRADDAC(a[17], a[28]); SQRADDAC(a[18], a[27]); SQRADDAC(a[19], a[26]); SQRADDAC(a[20], a[25]); SQRADDAC(a[21], a[24]); SQRADDAC(a[22], a[23]); SQRADDDB; + COMBA_STORE(b[45]); + + /* output 46 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[46]); SQRADDAC(a[1], a[45]); SQRADDAC(a[2], a[44]); SQRADDAC(a[3], a[43]); SQRADDAC(a[4], a[42]); SQRADDAC(a[5], a[41]); SQRADDAC(a[6], a[40]); SQRADDAC(a[7], a[39]); SQRADDAC(a[8], a[38]); SQRADDAC(a[9], a[37]); SQRADDAC(a[10], a[36]); SQRADDAC(a[11], a[35]); SQRADDAC(a[12], a[34]); SQRADDAC(a[13], a[33]); SQRADDAC(a[14], a[32]); SQRADDAC(a[15], a[31]); SQRADDAC(a[16], a[30]); SQRADDAC(a[17], a[29]); SQRADDAC(a[18], a[28]); SQRADDAC(a[19], a[27]); SQRADDAC(a[20], a[26]); SQRADDAC(a[21], a[25]); SQRADDAC(a[22], a[24]); SQRADDDB; SQRADD(a[23], a[23]); + COMBA_STORE(b[46]); + + /* output 47 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[47]); SQRADDAC(a[1], a[46]); SQRADDAC(a[2], a[45]); SQRADDAC(a[3], a[44]); SQRADDAC(a[4], a[43]); SQRADDAC(a[5], a[42]); SQRADDAC(a[6], a[41]); SQRADDAC(a[7], a[40]); SQRADDAC(a[8], a[39]); SQRADDAC(a[9], a[38]); SQRADDAC(a[10], a[37]); SQRADDAC(a[11], a[36]); SQRADDAC(a[12], a[35]); SQRADDAC(a[13], a[34]); SQRADDAC(a[14], a[33]); SQRADDAC(a[15], a[32]); SQRADDAC(a[16], a[31]); SQRADDAC(a[17], a[30]); SQRADDAC(a[18], a[29]); SQRADDAC(a[19], a[28]); SQRADDAC(a[20], a[27]); SQRADDAC(a[21], a[26]); SQRADDAC(a[22], a[25]); SQRADDAC(a[23], a[24]); SQRADDDB; + COMBA_STORE(b[47]); + + /* output 48 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[48]); SQRADDAC(a[1], a[47]); SQRADDAC(a[2], a[46]); SQRADDAC(a[3], a[45]); SQRADDAC(a[4], a[44]); SQRADDAC(a[5], a[43]); SQRADDAC(a[6], a[42]); SQRADDAC(a[7], a[41]); SQRADDAC(a[8], a[40]); SQRADDAC(a[9], a[39]); SQRADDAC(a[10], a[38]); SQRADDAC(a[11], a[37]); SQRADDAC(a[12], a[36]); SQRADDAC(a[13], a[35]); SQRADDAC(a[14], a[34]); SQRADDAC(a[15], a[33]); SQRADDAC(a[16], a[32]); SQRADDAC(a[17], a[31]); SQRADDAC(a[18], a[30]); SQRADDAC(a[19], a[29]); SQRADDAC(a[20], a[28]); SQRADDAC(a[21], a[27]); SQRADDAC(a[22], a[26]); SQRADDAC(a[23], a[25]); SQRADDDB; SQRADD(a[24], a[24]); + COMBA_STORE(b[48]); + + /* output 49 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[49]); SQRADDAC(a[1], a[48]); SQRADDAC(a[2], a[47]); SQRADDAC(a[3], a[46]); SQRADDAC(a[4], a[45]); SQRADDAC(a[5], a[44]); SQRADDAC(a[6], a[43]); SQRADDAC(a[7], a[42]); SQRADDAC(a[8], a[41]); SQRADDAC(a[9], a[40]); SQRADDAC(a[10], a[39]); SQRADDAC(a[11], a[38]); SQRADDAC(a[12], a[37]); SQRADDAC(a[13], a[36]); SQRADDAC(a[14], a[35]); SQRADDAC(a[15], a[34]); SQRADDAC(a[16], a[33]); SQRADDAC(a[17], a[32]); SQRADDAC(a[18], a[31]); SQRADDAC(a[19], a[30]); SQRADDAC(a[20], a[29]); SQRADDAC(a[21], a[28]); SQRADDAC(a[22], a[27]); SQRADDAC(a[23], a[26]); SQRADDAC(a[24], a[25]); SQRADDDB; + COMBA_STORE(b[49]); + + /* output 50 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[50]); SQRADDAC(a[1], a[49]); SQRADDAC(a[2], a[48]); SQRADDAC(a[3], a[47]); SQRADDAC(a[4], a[46]); SQRADDAC(a[5], a[45]); SQRADDAC(a[6], a[44]); SQRADDAC(a[7], a[43]); SQRADDAC(a[8], a[42]); SQRADDAC(a[9], a[41]); SQRADDAC(a[10], a[40]); SQRADDAC(a[11], a[39]); SQRADDAC(a[12], a[38]); SQRADDAC(a[13], a[37]); SQRADDAC(a[14], a[36]); SQRADDAC(a[15], a[35]); SQRADDAC(a[16], a[34]); SQRADDAC(a[17], a[33]); SQRADDAC(a[18], a[32]); SQRADDAC(a[19], a[31]); SQRADDAC(a[20], a[30]); SQRADDAC(a[21], a[29]); SQRADDAC(a[22], a[28]); SQRADDAC(a[23], a[27]); SQRADDAC(a[24], a[26]); SQRADDDB; SQRADD(a[25], a[25]); + COMBA_STORE(b[50]); + + /* output 51 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[51]); SQRADDAC(a[1], a[50]); SQRADDAC(a[2], a[49]); SQRADDAC(a[3], a[48]); SQRADDAC(a[4], a[47]); SQRADDAC(a[5], a[46]); SQRADDAC(a[6], a[45]); SQRADDAC(a[7], a[44]); SQRADDAC(a[8], a[43]); SQRADDAC(a[9], a[42]); SQRADDAC(a[10], a[41]); SQRADDAC(a[11], a[40]); SQRADDAC(a[12], a[39]); SQRADDAC(a[13], a[38]); SQRADDAC(a[14], a[37]); SQRADDAC(a[15], a[36]); SQRADDAC(a[16], a[35]); SQRADDAC(a[17], a[34]); SQRADDAC(a[18], a[33]); SQRADDAC(a[19], a[32]); SQRADDAC(a[20], a[31]); SQRADDAC(a[21], a[30]); SQRADDAC(a[22], a[29]); SQRADDAC(a[23], a[28]); SQRADDAC(a[24], a[27]); SQRADDAC(a[25], a[26]); SQRADDDB; + COMBA_STORE(b[51]); + + /* output 52 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[52]); SQRADDAC(a[1], a[51]); SQRADDAC(a[2], a[50]); SQRADDAC(a[3], a[49]); SQRADDAC(a[4], a[48]); SQRADDAC(a[5], a[47]); SQRADDAC(a[6], a[46]); SQRADDAC(a[7], a[45]); SQRADDAC(a[8], a[44]); SQRADDAC(a[9], a[43]); SQRADDAC(a[10], a[42]); SQRADDAC(a[11], a[41]); SQRADDAC(a[12], a[40]); SQRADDAC(a[13], a[39]); SQRADDAC(a[14], a[38]); SQRADDAC(a[15], a[37]); SQRADDAC(a[16], a[36]); SQRADDAC(a[17], a[35]); SQRADDAC(a[18], a[34]); SQRADDAC(a[19], a[33]); SQRADDAC(a[20], a[32]); SQRADDAC(a[21], a[31]); SQRADDAC(a[22], a[30]); SQRADDAC(a[23], a[29]); SQRADDAC(a[24], a[28]); SQRADDAC(a[25], a[27]); SQRADDDB; SQRADD(a[26], a[26]); + COMBA_STORE(b[52]); + + /* output 53 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[53]); SQRADDAC(a[1], a[52]); SQRADDAC(a[2], a[51]); SQRADDAC(a[3], a[50]); SQRADDAC(a[4], a[49]); SQRADDAC(a[5], a[48]); SQRADDAC(a[6], a[47]); SQRADDAC(a[7], a[46]); SQRADDAC(a[8], a[45]); SQRADDAC(a[9], a[44]); SQRADDAC(a[10], a[43]); SQRADDAC(a[11], a[42]); SQRADDAC(a[12], a[41]); SQRADDAC(a[13], a[40]); SQRADDAC(a[14], a[39]); SQRADDAC(a[15], a[38]); SQRADDAC(a[16], a[37]); SQRADDAC(a[17], a[36]); SQRADDAC(a[18], a[35]); SQRADDAC(a[19], a[34]); SQRADDAC(a[20], a[33]); SQRADDAC(a[21], a[32]); SQRADDAC(a[22], a[31]); SQRADDAC(a[23], a[30]); SQRADDAC(a[24], a[29]); SQRADDAC(a[25], a[28]); SQRADDAC(a[26], a[27]); SQRADDDB; + COMBA_STORE(b[53]); + + /* output 54 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[54]); SQRADDAC(a[1], a[53]); SQRADDAC(a[2], a[52]); SQRADDAC(a[3], a[51]); SQRADDAC(a[4], a[50]); SQRADDAC(a[5], a[49]); SQRADDAC(a[6], a[48]); SQRADDAC(a[7], a[47]); SQRADDAC(a[8], a[46]); SQRADDAC(a[9], a[45]); SQRADDAC(a[10], a[44]); SQRADDAC(a[11], a[43]); SQRADDAC(a[12], a[42]); SQRADDAC(a[13], a[41]); SQRADDAC(a[14], a[40]); SQRADDAC(a[15], a[39]); SQRADDAC(a[16], a[38]); SQRADDAC(a[17], a[37]); SQRADDAC(a[18], a[36]); SQRADDAC(a[19], a[35]); SQRADDAC(a[20], a[34]); SQRADDAC(a[21], a[33]); SQRADDAC(a[22], a[32]); SQRADDAC(a[23], a[31]); SQRADDAC(a[24], a[30]); SQRADDAC(a[25], a[29]); SQRADDAC(a[26], a[28]); SQRADDDB; SQRADD(a[27], a[27]); + COMBA_STORE(b[54]); + + /* output 55 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[55]); SQRADDAC(a[1], a[54]); SQRADDAC(a[2], a[53]); SQRADDAC(a[3], a[52]); SQRADDAC(a[4], a[51]); SQRADDAC(a[5], a[50]); SQRADDAC(a[6], a[49]); SQRADDAC(a[7], a[48]); SQRADDAC(a[8], a[47]); SQRADDAC(a[9], a[46]); SQRADDAC(a[10], a[45]); SQRADDAC(a[11], a[44]); SQRADDAC(a[12], a[43]); SQRADDAC(a[13], a[42]); SQRADDAC(a[14], a[41]); SQRADDAC(a[15], a[40]); SQRADDAC(a[16], a[39]); SQRADDAC(a[17], a[38]); SQRADDAC(a[18], a[37]); SQRADDAC(a[19], a[36]); SQRADDAC(a[20], a[35]); SQRADDAC(a[21], a[34]); SQRADDAC(a[22], a[33]); SQRADDAC(a[23], a[32]); SQRADDAC(a[24], a[31]); SQRADDAC(a[25], a[30]); SQRADDAC(a[26], a[29]); SQRADDAC(a[27], a[28]); SQRADDDB; + COMBA_STORE(b[55]); + + /* output 56 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[56]); SQRADDAC(a[1], a[55]); SQRADDAC(a[2], a[54]); SQRADDAC(a[3], a[53]); SQRADDAC(a[4], a[52]); SQRADDAC(a[5], a[51]); SQRADDAC(a[6], a[50]); SQRADDAC(a[7], a[49]); SQRADDAC(a[8], a[48]); SQRADDAC(a[9], a[47]); SQRADDAC(a[10], a[46]); SQRADDAC(a[11], a[45]); SQRADDAC(a[12], a[44]); SQRADDAC(a[13], a[43]); SQRADDAC(a[14], a[42]); SQRADDAC(a[15], a[41]); SQRADDAC(a[16], a[40]); SQRADDAC(a[17], a[39]); SQRADDAC(a[18], a[38]); SQRADDAC(a[19], a[37]); SQRADDAC(a[20], a[36]); SQRADDAC(a[21], a[35]); SQRADDAC(a[22], a[34]); SQRADDAC(a[23], a[33]); SQRADDAC(a[24], a[32]); SQRADDAC(a[25], a[31]); SQRADDAC(a[26], a[30]); SQRADDAC(a[27], a[29]); SQRADDDB; SQRADD(a[28], a[28]); + COMBA_STORE(b[56]); + + /* output 57 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[57]); SQRADDAC(a[1], a[56]); SQRADDAC(a[2], a[55]); SQRADDAC(a[3], a[54]); SQRADDAC(a[4], a[53]); SQRADDAC(a[5], a[52]); SQRADDAC(a[6], a[51]); SQRADDAC(a[7], a[50]); SQRADDAC(a[8], a[49]); SQRADDAC(a[9], a[48]); SQRADDAC(a[10], a[47]); SQRADDAC(a[11], a[46]); SQRADDAC(a[12], a[45]); SQRADDAC(a[13], a[44]); SQRADDAC(a[14], a[43]); SQRADDAC(a[15], a[42]); SQRADDAC(a[16], a[41]); SQRADDAC(a[17], a[40]); SQRADDAC(a[18], a[39]); SQRADDAC(a[19], a[38]); SQRADDAC(a[20], a[37]); SQRADDAC(a[21], a[36]); SQRADDAC(a[22], a[35]); SQRADDAC(a[23], a[34]); SQRADDAC(a[24], a[33]); SQRADDAC(a[25], a[32]); SQRADDAC(a[26], a[31]); SQRADDAC(a[27], a[30]); SQRADDAC(a[28], a[29]); SQRADDDB; + COMBA_STORE(b[57]); + + /* output 58 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[58]); SQRADDAC(a[1], a[57]); SQRADDAC(a[2], a[56]); SQRADDAC(a[3], a[55]); SQRADDAC(a[4], a[54]); SQRADDAC(a[5], a[53]); SQRADDAC(a[6], a[52]); SQRADDAC(a[7], a[51]); SQRADDAC(a[8], a[50]); SQRADDAC(a[9], a[49]); SQRADDAC(a[10], a[48]); SQRADDAC(a[11], a[47]); SQRADDAC(a[12], a[46]); SQRADDAC(a[13], a[45]); SQRADDAC(a[14], a[44]); SQRADDAC(a[15], a[43]); SQRADDAC(a[16], a[42]); SQRADDAC(a[17], a[41]); SQRADDAC(a[18], a[40]); SQRADDAC(a[19], a[39]); SQRADDAC(a[20], a[38]); SQRADDAC(a[21], a[37]); SQRADDAC(a[22], a[36]); SQRADDAC(a[23], a[35]); SQRADDAC(a[24], a[34]); SQRADDAC(a[25], a[33]); SQRADDAC(a[26], a[32]); SQRADDAC(a[27], a[31]); SQRADDAC(a[28], a[30]); SQRADDDB; SQRADD(a[29], a[29]); + COMBA_STORE(b[58]); + + /* output 59 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[59]); SQRADDAC(a[1], a[58]); SQRADDAC(a[2], a[57]); SQRADDAC(a[3], a[56]); SQRADDAC(a[4], a[55]); SQRADDAC(a[5], a[54]); SQRADDAC(a[6], a[53]); SQRADDAC(a[7], a[52]); SQRADDAC(a[8], a[51]); SQRADDAC(a[9], a[50]); SQRADDAC(a[10], a[49]); SQRADDAC(a[11], a[48]); SQRADDAC(a[12], a[47]); SQRADDAC(a[13], a[46]); SQRADDAC(a[14], a[45]); SQRADDAC(a[15], a[44]); SQRADDAC(a[16], a[43]); SQRADDAC(a[17], a[42]); SQRADDAC(a[18], a[41]); SQRADDAC(a[19], a[40]); SQRADDAC(a[20], a[39]); SQRADDAC(a[21], a[38]); SQRADDAC(a[22], a[37]); SQRADDAC(a[23], a[36]); SQRADDAC(a[24], a[35]); SQRADDAC(a[25], a[34]); SQRADDAC(a[26], a[33]); SQRADDAC(a[27], a[32]); SQRADDAC(a[28], a[31]); SQRADDAC(a[29], a[30]); SQRADDDB; + COMBA_STORE(b[59]); + + /* output 60 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[60]); SQRADDAC(a[1], a[59]); SQRADDAC(a[2], a[58]); SQRADDAC(a[3], a[57]); SQRADDAC(a[4], a[56]); SQRADDAC(a[5], a[55]); SQRADDAC(a[6], a[54]); SQRADDAC(a[7], a[53]); SQRADDAC(a[8], a[52]); SQRADDAC(a[9], a[51]); SQRADDAC(a[10], a[50]); SQRADDAC(a[11], a[49]); SQRADDAC(a[12], a[48]); SQRADDAC(a[13], a[47]); SQRADDAC(a[14], a[46]); SQRADDAC(a[15], a[45]); SQRADDAC(a[16], a[44]); SQRADDAC(a[17], a[43]); SQRADDAC(a[18], a[42]); SQRADDAC(a[19], a[41]); SQRADDAC(a[20], a[40]); SQRADDAC(a[21], a[39]); SQRADDAC(a[22], a[38]); SQRADDAC(a[23], a[37]); SQRADDAC(a[24], a[36]); SQRADDAC(a[25], a[35]); SQRADDAC(a[26], a[34]); SQRADDAC(a[27], a[33]); SQRADDAC(a[28], a[32]); SQRADDAC(a[29], a[31]); SQRADDDB; SQRADD(a[30], a[30]); + COMBA_STORE(b[60]); + + /* output 61 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[61]); SQRADDAC(a[1], a[60]); SQRADDAC(a[2], a[59]); SQRADDAC(a[3], a[58]); SQRADDAC(a[4], a[57]); SQRADDAC(a[5], a[56]); SQRADDAC(a[6], a[55]); SQRADDAC(a[7], a[54]); SQRADDAC(a[8], a[53]); SQRADDAC(a[9], a[52]); SQRADDAC(a[10], a[51]); SQRADDAC(a[11], a[50]); SQRADDAC(a[12], a[49]); SQRADDAC(a[13], a[48]); SQRADDAC(a[14], a[47]); SQRADDAC(a[15], a[46]); SQRADDAC(a[16], a[45]); SQRADDAC(a[17], a[44]); SQRADDAC(a[18], a[43]); SQRADDAC(a[19], a[42]); SQRADDAC(a[20], a[41]); SQRADDAC(a[21], a[40]); SQRADDAC(a[22], a[39]); SQRADDAC(a[23], a[38]); SQRADDAC(a[24], a[37]); SQRADDAC(a[25], a[36]); SQRADDAC(a[26], a[35]); SQRADDAC(a[27], a[34]); SQRADDAC(a[28], a[33]); SQRADDAC(a[29], a[32]); SQRADDAC(a[30], a[31]); SQRADDDB; + COMBA_STORE(b[61]); + + /* output 62 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[62]); SQRADDAC(a[1], a[61]); SQRADDAC(a[2], a[60]); SQRADDAC(a[3], a[59]); SQRADDAC(a[4], a[58]); SQRADDAC(a[5], a[57]); SQRADDAC(a[6], a[56]); SQRADDAC(a[7], a[55]); SQRADDAC(a[8], a[54]); SQRADDAC(a[9], a[53]); SQRADDAC(a[10], a[52]); SQRADDAC(a[11], a[51]); SQRADDAC(a[12], a[50]); SQRADDAC(a[13], a[49]); SQRADDAC(a[14], a[48]); SQRADDAC(a[15], a[47]); SQRADDAC(a[16], a[46]); SQRADDAC(a[17], a[45]); SQRADDAC(a[18], a[44]); SQRADDAC(a[19], a[43]); SQRADDAC(a[20], a[42]); SQRADDAC(a[21], a[41]); SQRADDAC(a[22], a[40]); SQRADDAC(a[23], a[39]); SQRADDAC(a[24], a[38]); SQRADDAC(a[25], a[37]); SQRADDAC(a[26], a[36]); SQRADDAC(a[27], a[35]); SQRADDAC(a[28], a[34]); SQRADDAC(a[29], a[33]); SQRADDAC(a[30], a[32]); SQRADDDB; SQRADD(a[31], a[31]); + COMBA_STORE(b[62]); + + /* output 63 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[63]); SQRADDAC(a[1], a[62]); SQRADDAC(a[2], a[61]); SQRADDAC(a[3], a[60]); SQRADDAC(a[4], a[59]); SQRADDAC(a[5], a[58]); SQRADDAC(a[6], a[57]); SQRADDAC(a[7], a[56]); SQRADDAC(a[8], a[55]); SQRADDAC(a[9], a[54]); SQRADDAC(a[10], a[53]); SQRADDAC(a[11], a[52]); SQRADDAC(a[12], a[51]); SQRADDAC(a[13], a[50]); SQRADDAC(a[14], a[49]); SQRADDAC(a[15], a[48]); SQRADDAC(a[16], a[47]); SQRADDAC(a[17], a[46]); SQRADDAC(a[18], a[45]); SQRADDAC(a[19], a[44]); SQRADDAC(a[20], a[43]); SQRADDAC(a[21], a[42]); SQRADDAC(a[22], a[41]); SQRADDAC(a[23], a[40]); SQRADDAC(a[24], a[39]); SQRADDAC(a[25], a[38]); SQRADDAC(a[26], a[37]); SQRADDAC(a[27], a[36]); SQRADDAC(a[28], a[35]); SQRADDAC(a[29], a[34]); SQRADDAC(a[30], a[33]); SQRADDAC(a[31], a[32]); SQRADDDB; + COMBA_STORE(b[63]); + + /* output 64 */ + CARRY_FORWARD; + SQRADDSC(a[1], a[63]); SQRADDAC(a[2], a[62]); SQRADDAC(a[3], a[61]); SQRADDAC(a[4], a[60]); SQRADDAC(a[5], a[59]); SQRADDAC(a[6], a[58]); SQRADDAC(a[7], a[57]); SQRADDAC(a[8], a[56]); SQRADDAC(a[9], a[55]); SQRADDAC(a[10], a[54]); SQRADDAC(a[11], a[53]); SQRADDAC(a[12], a[52]); SQRADDAC(a[13], a[51]); SQRADDAC(a[14], a[50]); SQRADDAC(a[15], a[49]); SQRADDAC(a[16], a[48]); SQRADDAC(a[17], a[47]); SQRADDAC(a[18], a[46]); SQRADDAC(a[19], a[45]); SQRADDAC(a[20], a[44]); SQRADDAC(a[21], a[43]); SQRADDAC(a[22], a[42]); SQRADDAC(a[23], a[41]); SQRADDAC(a[24], a[40]); SQRADDAC(a[25], a[39]); SQRADDAC(a[26], a[38]); SQRADDAC(a[27], a[37]); SQRADDAC(a[28], a[36]); SQRADDAC(a[29], a[35]); SQRADDAC(a[30], a[34]); SQRADDAC(a[31], a[33]); SQRADDDB; SQRADD(a[32], a[32]); + COMBA_STORE(b[64]); + + /* output 65 */ + CARRY_FORWARD; + SQRADDSC(a[2], a[63]); SQRADDAC(a[3], a[62]); SQRADDAC(a[4], a[61]); SQRADDAC(a[5], a[60]); SQRADDAC(a[6], a[59]); SQRADDAC(a[7], a[58]); SQRADDAC(a[8], a[57]); SQRADDAC(a[9], a[56]); SQRADDAC(a[10], a[55]); SQRADDAC(a[11], a[54]); SQRADDAC(a[12], a[53]); SQRADDAC(a[13], a[52]); SQRADDAC(a[14], a[51]); SQRADDAC(a[15], a[50]); SQRADDAC(a[16], a[49]); SQRADDAC(a[17], a[48]); SQRADDAC(a[18], a[47]); SQRADDAC(a[19], a[46]); SQRADDAC(a[20], a[45]); SQRADDAC(a[21], a[44]); SQRADDAC(a[22], a[43]); SQRADDAC(a[23], a[42]); SQRADDAC(a[24], a[41]); SQRADDAC(a[25], a[40]); SQRADDAC(a[26], a[39]); SQRADDAC(a[27], a[38]); SQRADDAC(a[28], a[37]); SQRADDAC(a[29], a[36]); SQRADDAC(a[30], a[35]); SQRADDAC(a[31], a[34]); SQRADDAC(a[32], a[33]); SQRADDDB; + COMBA_STORE(b[65]); + + /* output 66 */ + CARRY_FORWARD; + SQRADDSC(a[3], a[63]); SQRADDAC(a[4], a[62]); SQRADDAC(a[5], a[61]); SQRADDAC(a[6], a[60]); SQRADDAC(a[7], a[59]); SQRADDAC(a[8], a[58]); SQRADDAC(a[9], a[57]); SQRADDAC(a[10], a[56]); SQRADDAC(a[11], a[55]); SQRADDAC(a[12], a[54]); SQRADDAC(a[13], a[53]); SQRADDAC(a[14], a[52]); SQRADDAC(a[15], a[51]); SQRADDAC(a[16], a[50]); SQRADDAC(a[17], a[49]); SQRADDAC(a[18], a[48]); SQRADDAC(a[19], a[47]); SQRADDAC(a[20], a[46]); SQRADDAC(a[21], a[45]); SQRADDAC(a[22], a[44]); SQRADDAC(a[23], a[43]); SQRADDAC(a[24], a[42]); SQRADDAC(a[25], a[41]); SQRADDAC(a[26], a[40]); SQRADDAC(a[27], a[39]); SQRADDAC(a[28], a[38]); SQRADDAC(a[29], a[37]); SQRADDAC(a[30], a[36]); SQRADDAC(a[31], a[35]); SQRADDAC(a[32], a[34]); SQRADDDB; SQRADD(a[33], a[33]); + COMBA_STORE(b[66]); + + /* output 67 */ + CARRY_FORWARD; + SQRADDSC(a[4], a[63]); SQRADDAC(a[5], a[62]); SQRADDAC(a[6], a[61]); SQRADDAC(a[7], a[60]); SQRADDAC(a[8], a[59]); SQRADDAC(a[9], a[58]); SQRADDAC(a[10], a[57]); SQRADDAC(a[11], a[56]); SQRADDAC(a[12], a[55]); SQRADDAC(a[13], a[54]); SQRADDAC(a[14], a[53]); SQRADDAC(a[15], a[52]); SQRADDAC(a[16], a[51]); SQRADDAC(a[17], a[50]); SQRADDAC(a[18], a[49]); SQRADDAC(a[19], a[48]); SQRADDAC(a[20], a[47]); SQRADDAC(a[21], a[46]); SQRADDAC(a[22], a[45]); SQRADDAC(a[23], a[44]); SQRADDAC(a[24], a[43]); SQRADDAC(a[25], a[42]); SQRADDAC(a[26], a[41]); SQRADDAC(a[27], a[40]); SQRADDAC(a[28], a[39]); SQRADDAC(a[29], a[38]); SQRADDAC(a[30], a[37]); SQRADDAC(a[31], a[36]); SQRADDAC(a[32], a[35]); SQRADDAC(a[33], a[34]); SQRADDDB; + COMBA_STORE(b[67]); + + /* output 68 */ + CARRY_FORWARD; + SQRADDSC(a[5], a[63]); SQRADDAC(a[6], a[62]); SQRADDAC(a[7], a[61]); SQRADDAC(a[8], a[60]); SQRADDAC(a[9], a[59]); SQRADDAC(a[10], a[58]); SQRADDAC(a[11], a[57]); SQRADDAC(a[12], a[56]); SQRADDAC(a[13], a[55]); SQRADDAC(a[14], a[54]); SQRADDAC(a[15], a[53]); SQRADDAC(a[16], a[52]); SQRADDAC(a[17], a[51]); SQRADDAC(a[18], a[50]); SQRADDAC(a[19], a[49]); SQRADDAC(a[20], a[48]); SQRADDAC(a[21], a[47]); SQRADDAC(a[22], a[46]); SQRADDAC(a[23], a[45]); SQRADDAC(a[24], a[44]); SQRADDAC(a[25], a[43]); SQRADDAC(a[26], a[42]); SQRADDAC(a[27], a[41]); SQRADDAC(a[28], a[40]); SQRADDAC(a[29], a[39]); SQRADDAC(a[30], a[38]); SQRADDAC(a[31], a[37]); SQRADDAC(a[32], a[36]); SQRADDAC(a[33], a[35]); SQRADDDB; SQRADD(a[34], a[34]); + COMBA_STORE(b[68]); + + /* output 69 */ + CARRY_FORWARD; + SQRADDSC(a[6], a[63]); SQRADDAC(a[7], a[62]); SQRADDAC(a[8], a[61]); SQRADDAC(a[9], a[60]); SQRADDAC(a[10], a[59]); SQRADDAC(a[11], a[58]); SQRADDAC(a[12], a[57]); SQRADDAC(a[13], a[56]); SQRADDAC(a[14], a[55]); SQRADDAC(a[15], a[54]); SQRADDAC(a[16], a[53]); SQRADDAC(a[17], a[52]); SQRADDAC(a[18], a[51]); SQRADDAC(a[19], a[50]); SQRADDAC(a[20], a[49]); SQRADDAC(a[21], a[48]); SQRADDAC(a[22], a[47]); SQRADDAC(a[23], a[46]); SQRADDAC(a[24], a[45]); SQRADDAC(a[25], a[44]); SQRADDAC(a[26], a[43]); SQRADDAC(a[27], a[42]); SQRADDAC(a[28], a[41]); SQRADDAC(a[29], a[40]); SQRADDAC(a[30], a[39]); SQRADDAC(a[31], a[38]); SQRADDAC(a[32], a[37]); SQRADDAC(a[33], a[36]); SQRADDAC(a[34], a[35]); SQRADDDB; + COMBA_STORE(b[69]); + + /* output 70 */ + CARRY_FORWARD; + SQRADDSC(a[7], a[63]); SQRADDAC(a[8], a[62]); SQRADDAC(a[9], a[61]); SQRADDAC(a[10], a[60]); SQRADDAC(a[11], a[59]); SQRADDAC(a[12], a[58]); SQRADDAC(a[13], a[57]); SQRADDAC(a[14], a[56]); SQRADDAC(a[15], a[55]); SQRADDAC(a[16], a[54]); SQRADDAC(a[17], a[53]); SQRADDAC(a[18], a[52]); SQRADDAC(a[19], a[51]); SQRADDAC(a[20], a[50]); SQRADDAC(a[21], a[49]); SQRADDAC(a[22], a[48]); SQRADDAC(a[23], a[47]); SQRADDAC(a[24], a[46]); SQRADDAC(a[25], a[45]); SQRADDAC(a[26], a[44]); SQRADDAC(a[27], a[43]); SQRADDAC(a[28], a[42]); SQRADDAC(a[29], a[41]); SQRADDAC(a[30], a[40]); SQRADDAC(a[31], a[39]); SQRADDAC(a[32], a[38]); SQRADDAC(a[33], a[37]); SQRADDAC(a[34], a[36]); SQRADDDB; SQRADD(a[35], a[35]); + COMBA_STORE(b[70]); + + /* output 71 */ + CARRY_FORWARD; + SQRADDSC(a[8], a[63]); SQRADDAC(a[9], a[62]); SQRADDAC(a[10], a[61]); SQRADDAC(a[11], a[60]); SQRADDAC(a[12], a[59]); SQRADDAC(a[13], a[58]); SQRADDAC(a[14], a[57]); SQRADDAC(a[15], a[56]); SQRADDAC(a[16], a[55]); SQRADDAC(a[17], a[54]); SQRADDAC(a[18], a[53]); SQRADDAC(a[19], a[52]); SQRADDAC(a[20], a[51]); SQRADDAC(a[21], a[50]); SQRADDAC(a[22], a[49]); SQRADDAC(a[23], a[48]); SQRADDAC(a[24], a[47]); SQRADDAC(a[25], a[46]); SQRADDAC(a[26], a[45]); SQRADDAC(a[27], a[44]); SQRADDAC(a[28], a[43]); SQRADDAC(a[29], a[42]); SQRADDAC(a[30], a[41]); SQRADDAC(a[31], a[40]); SQRADDAC(a[32], a[39]); SQRADDAC(a[33], a[38]); SQRADDAC(a[34], a[37]); SQRADDAC(a[35], a[36]); SQRADDDB; + COMBA_STORE(b[71]); + + /* output 72 */ + CARRY_FORWARD; + SQRADDSC(a[9], a[63]); SQRADDAC(a[10], a[62]); SQRADDAC(a[11], a[61]); SQRADDAC(a[12], a[60]); SQRADDAC(a[13], a[59]); SQRADDAC(a[14], a[58]); SQRADDAC(a[15], a[57]); SQRADDAC(a[16], a[56]); SQRADDAC(a[17], a[55]); SQRADDAC(a[18], a[54]); SQRADDAC(a[19], a[53]); SQRADDAC(a[20], a[52]); SQRADDAC(a[21], a[51]); SQRADDAC(a[22], a[50]); SQRADDAC(a[23], a[49]); SQRADDAC(a[24], a[48]); SQRADDAC(a[25], a[47]); SQRADDAC(a[26], a[46]); SQRADDAC(a[27], a[45]); SQRADDAC(a[28], a[44]); SQRADDAC(a[29], a[43]); SQRADDAC(a[30], a[42]); SQRADDAC(a[31], a[41]); SQRADDAC(a[32], a[40]); SQRADDAC(a[33], a[39]); SQRADDAC(a[34], a[38]); SQRADDAC(a[35], a[37]); SQRADDDB; SQRADD(a[36], a[36]); + COMBA_STORE(b[72]); + + /* output 73 */ + CARRY_FORWARD; + SQRADDSC(a[10], a[63]); SQRADDAC(a[11], a[62]); SQRADDAC(a[12], a[61]); SQRADDAC(a[13], a[60]); SQRADDAC(a[14], a[59]); SQRADDAC(a[15], a[58]); SQRADDAC(a[16], a[57]); SQRADDAC(a[17], a[56]); SQRADDAC(a[18], a[55]); SQRADDAC(a[19], a[54]); SQRADDAC(a[20], a[53]); SQRADDAC(a[21], a[52]); SQRADDAC(a[22], a[51]); SQRADDAC(a[23], a[50]); SQRADDAC(a[24], a[49]); SQRADDAC(a[25], a[48]); SQRADDAC(a[26], a[47]); SQRADDAC(a[27], a[46]); SQRADDAC(a[28], a[45]); SQRADDAC(a[29], a[44]); SQRADDAC(a[30], a[43]); SQRADDAC(a[31], a[42]); SQRADDAC(a[32], a[41]); SQRADDAC(a[33], a[40]); SQRADDAC(a[34], a[39]); SQRADDAC(a[35], a[38]); SQRADDAC(a[36], a[37]); SQRADDDB; + COMBA_STORE(b[73]); + + /* output 74 */ + CARRY_FORWARD; + SQRADDSC(a[11], a[63]); SQRADDAC(a[12], a[62]); SQRADDAC(a[13], a[61]); SQRADDAC(a[14], a[60]); SQRADDAC(a[15], a[59]); SQRADDAC(a[16], a[58]); SQRADDAC(a[17], a[57]); SQRADDAC(a[18], a[56]); SQRADDAC(a[19], a[55]); SQRADDAC(a[20], a[54]); SQRADDAC(a[21], a[53]); SQRADDAC(a[22], a[52]); SQRADDAC(a[23], a[51]); SQRADDAC(a[24], a[50]); SQRADDAC(a[25], a[49]); SQRADDAC(a[26], a[48]); SQRADDAC(a[27], a[47]); SQRADDAC(a[28], a[46]); SQRADDAC(a[29], a[45]); SQRADDAC(a[30], a[44]); SQRADDAC(a[31], a[43]); SQRADDAC(a[32], a[42]); SQRADDAC(a[33], a[41]); SQRADDAC(a[34], a[40]); SQRADDAC(a[35], a[39]); SQRADDAC(a[36], a[38]); SQRADDDB; SQRADD(a[37], a[37]); + COMBA_STORE(b[74]); + + /* output 75 */ + CARRY_FORWARD; + SQRADDSC(a[12], a[63]); SQRADDAC(a[13], a[62]); SQRADDAC(a[14], a[61]); SQRADDAC(a[15], a[60]); SQRADDAC(a[16], a[59]); SQRADDAC(a[17], a[58]); SQRADDAC(a[18], a[57]); SQRADDAC(a[19], a[56]); SQRADDAC(a[20], a[55]); SQRADDAC(a[21], a[54]); SQRADDAC(a[22], a[53]); SQRADDAC(a[23], a[52]); SQRADDAC(a[24], a[51]); SQRADDAC(a[25], a[50]); SQRADDAC(a[26], a[49]); SQRADDAC(a[27], a[48]); SQRADDAC(a[28], a[47]); SQRADDAC(a[29], a[46]); SQRADDAC(a[30], a[45]); SQRADDAC(a[31], a[44]); SQRADDAC(a[32], a[43]); SQRADDAC(a[33], a[42]); SQRADDAC(a[34], a[41]); SQRADDAC(a[35], a[40]); SQRADDAC(a[36], a[39]); SQRADDAC(a[37], a[38]); SQRADDDB; + COMBA_STORE(b[75]); + + /* output 76 */ + CARRY_FORWARD; + SQRADDSC(a[13], a[63]); SQRADDAC(a[14], a[62]); SQRADDAC(a[15], a[61]); SQRADDAC(a[16], a[60]); SQRADDAC(a[17], a[59]); SQRADDAC(a[18], a[58]); SQRADDAC(a[19], a[57]); SQRADDAC(a[20], a[56]); SQRADDAC(a[21], a[55]); SQRADDAC(a[22], a[54]); SQRADDAC(a[23], a[53]); SQRADDAC(a[24], a[52]); SQRADDAC(a[25], a[51]); SQRADDAC(a[26], a[50]); SQRADDAC(a[27], a[49]); SQRADDAC(a[28], a[48]); SQRADDAC(a[29], a[47]); SQRADDAC(a[30], a[46]); SQRADDAC(a[31], a[45]); SQRADDAC(a[32], a[44]); SQRADDAC(a[33], a[43]); SQRADDAC(a[34], a[42]); SQRADDAC(a[35], a[41]); SQRADDAC(a[36], a[40]); SQRADDAC(a[37], a[39]); SQRADDDB; SQRADD(a[38], a[38]); + COMBA_STORE(b[76]); + + /* output 77 */ + CARRY_FORWARD; + SQRADDSC(a[14], a[63]); SQRADDAC(a[15], a[62]); SQRADDAC(a[16], a[61]); SQRADDAC(a[17], a[60]); SQRADDAC(a[18], a[59]); SQRADDAC(a[19], a[58]); SQRADDAC(a[20], a[57]); SQRADDAC(a[21], a[56]); SQRADDAC(a[22], a[55]); SQRADDAC(a[23], a[54]); SQRADDAC(a[24], a[53]); SQRADDAC(a[25], a[52]); SQRADDAC(a[26], a[51]); SQRADDAC(a[27], a[50]); SQRADDAC(a[28], a[49]); SQRADDAC(a[29], a[48]); SQRADDAC(a[30], a[47]); SQRADDAC(a[31], a[46]); SQRADDAC(a[32], a[45]); SQRADDAC(a[33], a[44]); SQRADDAC(a[34], a[43]); SQRADDAC(a[35], a[42]); SQRADDAC(a[36], a[41]); SQRADDAC(a[37], a[40]); SQRADDAC(a[38], a[39]); SQRADDDB; + COMBA_STORE(b[77]); + + /* output 78 */ + CARRY_FORWARD; + SQRADDSC(a[15], a[63]); SQRADDAC(a[16], a[62]); SQRADDAC(a[17], a[61]); SQRADDAC(a[18], a[60]); SQRADDAC(a[19], a[59]); SQRADDAC(a[20], a[58]); SQRADDAC(a[21], a[57]); SQRADDAC(a[22], a[56]); SQRADDAC(a[23], a[55]); SQRADDAC(a[24], a[54]); SQRADDAC(a[25], a[53]); SQRADDAC(a[26], a[52]); SQRADDAC(a[27], a[51]); SQRADDAC(a[28], a[50]); SQRADDAC(a[29], a[49]); SQRADDAC(a[30], a[48]); SQRADDAC(a[31], a[47]); SQRADDAC(a[32], a[46]); SQRADDAC(a[33], a[45]); SQRADDAC(a[34], a[44]); SQRADDAC(a[35], a[43]); SQRADDAC(a[36], a[42]); SQRADDAC(a[37], a[41]); SQRADDAC(a[38], a[40]); SQRADDDB; SQRADD(a[39], a[39]); + COMBA_STORE(b[78]); + + /* output 79 */ + CARRY_FORWARD; + SQRADDSC(a[16], a[63]); SQRADDAC(a[17], a[62]); SQRADDAC(a[18], a[61]); SQRADDAC(a[19], a[60]); SQRADDAC(a[20], a[59]); SQRADDAC(a[21], a[58]); SQRADDAC(a[22], a[57]); SQRADDAC(a[23], a[56]); SQRADDAC(a[24], a[55]); SQRADDAC(a[25], a[54]); SQRADDAC(a[26], a[53]); SQRADDAC(a[27], a[52]); SQRADDAC(a[28], a[51]); SQRADDAC(a[29], a[50]); SQRADDAC(a[30], a[49]); SQRADDAC(a[31], a[48]); SQRADDAC(a[32], a[47]); SQRADDAC(a[33], a[46]); SQRADDAC(a[34], a[45]); SQRADDAC(a[35], a[44]); SQRADDAC(a[36], a[43]); SQRADDAC(a[37], a[42]); SQRADDAC(a[38], a[41]); SQRADDAC(a[39], a[40]); SQRADDDB; + COMBA_STORE(b[79]); + + /* output 80 */ + CARRY_FORWARD; + SQRADDSC(a[17], a[63]); SQRADDAC(a[18], a[62]); SQRADDAC(a[19], a[61]); SQRADDAC(a[20], a[60]); SQRADDAC(a[21], a[59]); SQRADDAC(a[22], a[58]); SQRADDAC(a[23], a[57]); SQRADDAC(a[24], a[56]); SQRADDAC(a[25], a[55]); SQRADDAC(a[26], a[54]); SQRADDAC(a[27], a[53]); SQRADDAC(a[28], a[52]); SQRADDAC(a[29], a[51]); SQRADDAC(a[30], a[50]); SQRADDAC(a[31], a[49]); SQRADDAC(a[32], a[48]); SQRADDAC(a[33], a[47]); SQRADDAC(a[34], a[46]); SQRADDAC(a[35], a[45]); SQRADDAC(a[36], a[44]); SQRADDAC(a[37], a[43]); SQRADDAC(a[38], a[42]); SQRADDAC(a[39], a[41]); SQRADDDB; SQRADD(a[40], a[40]); + COMBA_STORE(b[80]); + + /* output 81 */ + CARRY_FORWARD; + SQRADDSC(a[18], a[63]); SQRADDAC(a[19], a[62]); SQRADDAC(a[20], a[61]); SQRADDAC(a[21], a[60]); SQRADDAC(a[22], a[59]); SQRADDAC(a[23], a[58]); SQRADDAC(a[24], a[57]); SQRADDAC(a[25], a[56]); SQRADDAC(a[26], a[55]); SQRADDAC(a[27], a[54]); SQRADDAC(a[28], a[53]); SQRADDAC(a[29], a[52]); SQRADDAC(a[30], a[51]); SQRADDAC(a[31], a[50]); SQRADDAC(a[32], a[49]); SQRADDAC(a[33], a[48]); SQRADDAC(a[34], a[47]); SQRADDAC(a[35], a[46]); SQRADDAC(a[36], a[45]); SQRADDAC(a[37], a[44]); SQRADDAC(a[38], a[43]); SQRADDAC(a[39], a[42]); SQRADDAC(a[40], a[41]); SQRADDDB; + COMBA_STORE(b[81]); + + /* output 82 */ + CARRY_FORWARD; + SQRADDSC(a[19], a[63]); SQRADDAC(a[20], a[62]); SQRADDAC(a[21], a[61]); SQRADDAC(a[22], a[60]); SQRADDAC(a[23], a[59]); SQRADDAC(a[24], a[58]); SQRADDAC(a[25], a[57]); SQRADDAC(a[26], a[56]); SQRADDAC(a[27], a[55]); SQRADDAC(a[28], a[54]); SQRADDAC(a[29], a[53]); SQRADDAC(a[30], a[52]); SQRADDAC(a[31], a[51]); SQRADDAC(a[32], a[50]); SQRADDAC(a[33], a[49]); SQRADDAC(a[34], a[48]); SQRADDAC(a[35], a[47]); SQRADDAC(a[36], a[46]); SQRADDAC(a[37], a[45]); SQRADDAC(a[38], a[44]); SQRADDAC(a[39], a[43]); SQRADDAC(a[40], a[42]); SQRADDDB; SQRADD(a[41], a[41]); + COMBA_STORE(b[82]); + + /* output 83 */ + CARRY_FORWARD; + SQRADDSC(a[20], a[63]); SQRADDAC(a[21], a[62]); SQRADDAC(a[22], a[61]); SQRADDAC(a[23], a[60]); SQRADDAC(a[24], a[59]); SQRADDAC(a[25], a[58]); SQRADDAC(a[26], a[57]); SQRADDAC(a[27], a[56]); SQRADDAC(a[28], a[55]); SQRADDAC(a[29], a[54]); SQRADDAC(a[30], a[53]); SQRADDAC(a[31], a[52]); SQRADDAC(a[32], a[51]); SQRADDAC(a[33], a[50]); SQRADDAC(a[34], a[49]); SQRADDAC(a[35], a[48]); SQRADDAC(a[36], a[47]); SQRADDAC(a[37], a[46]); SQRADDAC(a[38], a[45]); SQRADDAC(a[39], a[44]); SQRADDAC(a[40], a[43]); SQRADDAC(a[41], a[42]); SQRADDDB; + COMBA_STORE(b[83]); + + /* output 84 */ + CARRY_FORWARD; + SQRADDSC(a[21], a[63]); SQRADDAC(a[22], a[62]); SQRADDAC(a[23], a[61]); SQRADDAC(a[24], a[60]); SQRADDAC(a[25], a[59]); SQRADDAC(a[26], a[58]); SQRADDAC(a[27], a[57]); SQRADDAC(a[28], a[56]); SQRADDAC(a[29], a[55]); SQRADDAC(a[30], a[54]); SQRADDAC(a[31], a[53]); SQRADDAC(a[32], a[52]); SQRADDAC(a[33], a[51]); SQRADDAC(a[34], a[50]); SQRADDAC(a[35], a[49]); SQRADDAC(a[36], a[48]); SQRADDAC(a[37], a[47]); SQRADDAC(a[38], a[46]); SQRADDAC(a[39], a[45]); SQRADDAC(a[40], a[44]); SQRADDAC(a[41], a[43]); SQRADDDB; SQRADD(a[42], a[42]); + COMBA_STORE(b[84]); + + /* output 85 */ + CARRY_FORWARD; + SQRADDSC(a[22], a[63]); SQRADDAC(a[23], a[62]); SQRADDAC(a[24], a[61]); SQRADDAC(a[25], a[60]); SQRADDAC(a[26], a[59]); SQRADDAC(a[27], a[58]); SQRADDAC(a[28], a[57]); SQRADDAC(a[29], a[56]); SQRADDAC(a[30], a[55]); SQRADDAC(a[31], a[54]); SQRADDAC(a[32], a[53]); SQRADDAC(a[33], a[52]); SQRADDAC(a[34], a[51]); SQRADDAC(a[35], a[50]); SQRADDAC(a[36], a[49]); SQRADDAC(a[37], a[48]); SQRADDAC(a[38], a[47]); SQRADDAC(a[39], a[46]); SQRADDAC(a[40], a[45]); SQRADDAC(a[41], a[44]); SQRADDAC(a[42], a[43]); SQRADDDB; + COMBA_STORE(b[85]); + + /* output 86 */ + CARRY_FORWARD; + SQRADDSC(a[23], a[63]); SQRADDAC(a[24], a[62]); SQRADDAC(a[25], a[61]); SQRADDAC(a[26], a[60]); SQRADDAC(a[27], a[59]); SQRADDAC(a[28], a[58]); SQRADDAC(a[29], a[57]); SQRADDAC(a[30], a[56]); SQRADDAC(a[31], a[55]); SQRADDAC(a[32], a[54]); SQRADDAC(a[33], a[53]); SQRADDAC(a[34], a[52]); SQRADDAC(a[35], a[51]); SQRADDAC(a[36], a[50]); SQRADDAC(a[37], a[49]); SQRADDAC(a[38], a[48]); SQRADDAC(a[39], a[47]); SQRADDAC(a[40], a[46]); SQRADDAC(a[41], a[45]); SQRADDAC(a[42], a[44]); SQRADDDB; SQRADD(a[43], a[43]); + COMBA_STORE(b[86]); + + /* output 87 */ + CARRY_FORWARD; + SQRADDSC(a[24], a[63]); SQRADDAC(a[25], a[62]); SQRADDAC(a[26], a[61]); SQRADDAC(a[27], a[60]); SQRADDAC(a[28], a[59]); SQRADDAC(a[29], a[58]); SQRADDAC(a[30], a[57]); SQRADDAC(a[31], a[56]); SQRADDAC(a[32], a[55]); SQRADDAC(a[33], a[54]); SQRADDAC(a[34], a[53]); SQRADDAC(a[35], a[52]); SQRADDAC(a[36], a[51]); SQRADDAC(a[37], a[50]); SQRADDAC(a[38], a[49]); SQRADDAC(a[39], a[48]); SQRADDAC(a[40], a[47]); SQRADDAC(a[41], a[46]); SQRADDAC(a[42], a[45]); SQRADDAC(a[43], a[44]); SQRADDDB; + COMBA_STORE(b[87]); + + /* output 88 */ + CARRY_FORWARD; + SQRADDSC(a[25], a[63]); SQRADDAC(a[26], a[62]); SQRADDAC(a[27], a[61]); SQRADDAC(a[28], a[60]); SQRADDAC(a[29], a[59]); SQRADDAC(a[30], a[58]); SQRADDAC(a[31], a[57]); SQRADDAC(a[32], a[56]); SQRADDAC(a[33], a[55]); SQRADDAC(a[34], a[54]); SQRADDAC(a[35], a[53]); SQRADDAC(a[36], a[52]); SQRADDAC(a[37], a[51]); SQRADDAC(a[38], a[50]); SQRADDAC(a[39], a[49]); SQRADDAC(a[40], a[48]); SQRADDAC(a[41], a[47]); SQRADDAC(a[42], a[46]); SQRADDAC(a[43], a[45]); SQRADDDB; SQRADD(a[44], a[44]); + COMBA_STORE(b[88]); + + /* output 89 */ + CARRY_FORWARD; + SQRADDSC(a[26], a[63]); SQRADDAC(a[27], a[62]); SQRADDAC(a[28], a[61]); SQRADDAC(a[29], a[60]); SQRADDAC(a[30], a[59]); SQRADDAC(a[31], a[58]); SQRADDAC(a[32], a[57]); SQRADDAC(a[33], a[56]); SQRADDAC(a[34], a[55]); SQRADDAC(a[35], a[54]); SQRADDAC(a[36], a[53]); SQRADDAC(a[37], a[52]); SQRADDAC(a[38], a[51]); SQRADDAC(a[39], a[50]); SQRADDAC(a[40], a[49]); SQRADDAC(a[41], a[48]); SQRADDAC(a[42], a[47]); SQRADDAC(a[43], a[46]); SQRADDAC(a[44], a[45]); SQRADDDB; + COMBA_STORE(b[89]); + + /* output 90 */ + CARRY_FORWARD; + SQRADDSC(a[27], a[63]); SQRADDAC(a[28], a[62]); SQRADDAC(a[29], a[61]); SQRADDAC(a[30], a[60]); SQRADDAC(a[31], a[59]); SQRADDAC(a[32], a[58]); SQRADDAC(a[33], a[57]); SQRADDAC(a[34], a[56]); SQRADDAC(a[35], a[55]); SQRADDAC(a[36], a[54]); SQRADDAC(a[37], a[53]); SQRADDAC(a[38], a[52]); SQRADDAC(a[39], a[51]); SQRADDAC(a[40], a[50]); SQRADDAC(a[41], a[49]); SQRADDAC(a[42], a[48]); SQRADDAC(a[43], a[47]); SQRADDAC(a[44], a[46]); SQRADDDB; SQRADD(a[45], a[45]); + COMBA_STORE(b[90]); + + /* output 91 */ + CARRY_FORWARD; + SQRADDSC(a[28], a[63]); SQRADDAC(a[29], a[62]); SQRADDAC(a[30], a[61]); SQRADDAC(a[31], a[60]); SQRADDAC(a[32], a[59]); SQRADDAC(a[33], a[58]); SQRADDAC(a[34], a[57]); SQRADDAC(a[35], a[56]); SQRADDAC(a[36], a[55]); SQRADDAC(a[37], a[54]); SQRADDAC(a[38], a[53]); SQRADDAC(a[39], a[52]); SQRADDAC(a[40], a[51]); SQRADDAC(a[41], a[50]); SQRADDAC(a[42], a[49]); SQRADDAC(a[43], a[48]); SQRADDAC(a[44], a[47]); SQRADDAC(a[45], a[46]); SQRADDDB; + COMBA_STORE(b[91]); + + /* output 92 */ + CARRY_FORWARD; + SQRADDSC(a[29], a[63]); SQRADDAC(a[30], a[62]); SQRADDAC(a[31], a[61]); SQRADDAC(a[32], a[60]); SQRADDAC(a[33], a[59]); SQRADDAC(a[34], a[58]); SQRADDAC(a[35], a[57]); SQRADDAC(a[36], a[56]); SQRADDAC(a[37], a[55]); SQRADDAC(a[38], a[54]); SQRADDAC(a[39], a[53]); SQRADDAC(a[40], a[52]); SQRADDAC(a[41], a[51]); SQRADDAC(a[42], a[50]); SQRADDAC(a[43], a[49]); SQRADDAC(a[44], a[48]); SQRADDAC(a[45], a[47]); SQRADDDB; SQRADD(a[46], a[46]); + COMBA_STORE(b[92]); + + /* output 93 */ + CARRY_FORWARD; + SQRADDSC(a[30], a[63]); SQRADDAC(a[31], a[62]); SQRADDAC(a[32], a[61]); SQRADDAC(a[33], a[60]); SQRADDAC(a[34], a[59]); SQRADDAC(a[35], a[58]); SQRADDAC(a[36], a[57]); SQRADDAC(a[37], a[56]); SQRADDAC(a[38], a[55]); SQRADDAC(a[39], a[54]); SQRADDAC(a[40], a[53]); SQRADDAC(a[41], a[52]); SQRADDAC(a[42], a[51]); SQRADDAC(a[43], a[50]); SQRADDAC(a[44], a[49]); SQRADDAC(a[45], a[48]); SQRADDAC(a[46], a[47]); SQRADDDB; + COMBA_STORE(b[93]); + + /* output 94 */ + CARRY_FORWARD; + SQRADDSC(a[31], a[63]); SQRADDAC(a[32], a[62]); SQRADDAC(a[33], a[61]); SQRADDAC(a[34], a[60]); SQRADDAC(a[35], a[59]); SQRADDAC(a[36], a[58]); SQRADDAC(a[37], a[57]); SQRADDAC(a[38], a[56]); SQRADDAC(a[39], a[55]); SQRADDAC(a[40], a[54]); SQRADDAC(a[41], a[53]); SQRADDAC(a[42], a[52]); SQRADDAC(a[43], a[51]); SQRADDAC(a[44], a[50]); SQRADDAC(a[45], a[49]); SQRADDAC(a[46], a[48]); SQRADDDB; SQRADD(a[47], a[47]); + COMBA_STORE(b[94]); + + /* output 95 */ + CARRY_FORWARD; + SQRADDSC(a[32], a[63]); SQRADDAC(a[33], a[62]); SQRADDAC(a[34], a[61]); SQRADDAC(a[35], a[60]); SQRADDAC(a[36], a[59]); SQRADDAC(a[37], a[58]); SQRADDAC(a[38], a[57]); SQRADDAC(a[39], a[56]); SQRADDAC(a[40], a[55]); SQRADDAC(a[41], a[54]); SQRADDAC(a[42], a[53]); SQRADDAC(a[43], a[52]); SQRADDAC(a[44], a[51]); SQRADDAC(a[45], a[50]); SQRADDAC(a[46], a[49]); SQRADDAC(a[47], a[48]); SQRADDDB; + COMBA_STORE(b[95]); + + /* output 96 */ + CARRY_FORWARD; + SQRADDSC(a[33], a[63]); SQRADDAC(a[34], a[62]); SQRADDAC(a[35], a[61]); SQRADDAC(a[36], a[60]); SQRADDAC(a[37], a[59]); SQRADDAC(a[38], a[58]); SQRADDAC(a[39], a[57]); SQRADDAC(a[40], a[56]); SQRADDAC(a[41], a[55]); SQRADDAC(a[42], a[54]); SQRADDAC(a[43], a[53]); SQRADDAC(a[44], a[52]); SQRADDAC(a[45], a[51]); SQRADDAC(a[46], a[50]); SQRADDAC(a[47], a[49]); SQRADDDB; SQRADD(a[48], a[48]); + COMBA_STORE(b[96]); + + /* output 97 */ + CARRY_FORWARD; + SQRADDSC(a[34], a[63]); SQRADDAC(a[35], a[62]); SQRADDAC(a[36], a[61]); SQRADDAC(a[37], a[60]); SQRADDAC(a[38], a[59]); SQRADDAC(a[39], a[58]); SQRADDAC(a[40], a[57]); SQRADDAC(a[41], a[56]); SQRADDAC(a[42], a[55]); SQRADDAC(a[43], a[54]); SQRADDAC(a[44], a[53]); SQRADDAC(a[45], a[52]); SQRADDAC(a[46], a[51]); SQRADDAC(a[47], a[50]); SQRADDAC(a[48], a[49]); SQRADDDB; + COMBA_STORE(b[97]); + + /* output 98 */ + CARRY_FORWARD; + SQRADDSC(a[35], a[63]); SQRADDAC(a[36], a[62]); SQRADDAC(a[37], a[61]); SQRADDAC(a[38], a[60]); SQRADDAC(a[39], a[59]); SQRADDAC(a[40], a[58]); SQRADDAC(a[41], a[57]); SQRADDAC(a[42], a[56]); SQRADDAC(a[43], a[55]); SQRADDAC(a[44], a[54]); SQRADDAC(a[45], a[53]); SQRADDAC(a[46], a[52]); SQRADDAC(a[47], a[51]); SQRADDAC(a[48], a[50]); SQRADDDB; SQRADD(a[49], a[49]); + COMBA_STORE(b[98]); + + /* output 99 */ + CARRY_FORWARD; + SQRADDSC(a[36], a[63]); SQRADDAC(a[37], a[62]); SQRADDAC(a[38], a[61]); SQRADDAC(a[39], a[60]); SQRADDAC(a[40], a[59]); SQRADDAC(a[41], a[58]); SQRADDAC(a[42], a[57]); SQRADDAC(a[43], a[56]); SQRADDAC(a[44], a[55]); SQRADDAC(a[45], a[54]); SQRADDAC(a[46], a[53]); SQRADDAC(a[47], a[52]); SQRADDAC(a[48], a[51]); SQRADDAC(a[49], a[50]); SQRADDDB; + COMBA_STORE(b[99]); + + /* output 100 */ + CARRY_FORWARD; + SQRADDSC(a[37], a[63]); SQRADDAC(a[38], a[62]); SQRADDAC(a[39], a[61]); SQRADDAC(a[40], a[60]); SQRADDAC(a[41], a[59]); SQRADDAC(a[42], a[58]); SQRADDAC(a[43], a[57]); SQRADDAC(a[44], a[56]); SQRADDAC(a[45], a[55]); SQRADDAC(a[46], a[54]); SQRADDAC(a[47], a[53]); SQRADDAC(a[48], a[52]); SQRADDAC(a[49], a[51]); SQRADDDB; SQRADD(a[50], a[50]); + COMBA_STORE(b[100]); + + /* output 101 */ + CARRY_FORWARD; + SQRADDSC(a[38], a[63]); SQRADDAC(a[39], a[62]); SQRADDAC(a[40], a[61]); SQRADDAC(a[41], a[60]); SQRADDAC(a[42], a[59]); SQRADDAC(a[43], a[58]); SQRADDAC(a[44], a[57]); SQRADDAC(a[45], a[56]); SQRADDAC(a[46], a[55]); SQRADDAC(a[47], a[54]); SQRADDAC(a[48], a[53]); SQRADDAC(a[49], a[52]); SQRADDAC(a[50], a[51]); SQRADDDB; + COMBA_STORE(b[101]); + + /* output 102 */ + CARRY_FORWARD; + SQRADDSC(a[39], a[63]); SQRADDAC(a[40], a[62]); SQRADDAC(a[41], a[61]); SQRADDAC(a[42], a[60]); SQRADDAC(a[43], a[59]); SQRADDAC(a[44], a[58]); SQRADDAC(a[45], a[57]); SQRADDAC(a[46], a[56]); SQRADDAC(a[47], a[55]); SQRADDAC(a[48], a[54]); SQRADDAC(a[49], a[53]); SQRADDAC(a[50], a[52]); SQRADDDB; SQRADD(a[51], a[51]); + COMBA_STORE(b[102]); + + /* output 103 */ + CARRY_FORWARD; + SQRADDSC(a[40], a[63]); SQRADDAC(a[41], a[62]); SQRADDAC(a[42], a[61]); SQRADDAC(a[43], a[60]); SQRADDAC(a[44], a[59]); SQRADDAC(a[45], a[58]); SQRADDAC(a[46], a[57]); SQRADDAC(a[47], a[56]); SQRADDAC(a[48], a[55]); SQRADDAC(a[49], a[54]); SQRADDAC(a[50], a[53]); SQRADDAC(a[51], a[52]); SQRADDDB; + COMBA_STORE(b[103]); + + /* output 104 */ + CARRY_FORWARD; + SQRADDSC(a[41], a[63]); SQRADDAC(a[42], a[62]); SQRADDAC(a[43], a[61]); SQRADDAC(a[44], a[60]); SQRADDAC(a[45], a[59]); SQRADDAC(a[46], a[58]); SQRADDAC(a[47], a[57]); SQRADDAC(a[48], a[56]); SQRADDAC(a[49], a[55]); SQRADDAC(a[50], a[54]); SQRADDAC(a[51], a[53]); SQRADDDB; SQRADD(a[52], a[52]); + COMBA_STORE(b[104]); + + /* output 105 */ + CARRY_FORWARD; + SQRADDSC(a[42], a[63]); SQRADDAC(a[43], a[62]); SQRADDAC(a[44], a[61]); SQRADDAC(a[45], a[60]); SQRADDAC(a[46], a[59]); SQRADDAC(a[47], a[58]); SQRADDAC(a[48], a[57]); SQRADDAC(a[49], a[56]); SQRADDAC(a[50], a[55]); SQRADDAC(a[51], a[54]); SQRADDAC(a[52], a[53]); SQRADDDB; + COMBA_STORE(b[105]); + + /* output 106 */ + CARRY_FORWARD; + SQRADDSC(a[43], a[63]); SQRADDAC(a[44], a[62]); SQRADDAC(a[45], a[61]); SQRADDAC(a[46], a[60]); SQRADDAC(a[47], a[59]); SQRADDAC(a[48], a[58]); SQRADDAC(a[49], a[57]); SQRADDAC(a[50], a[56]); SQRADDAC(a[51], a[55]); SQRADDAC(a[52], a[54]); SQRADDDB; SQRADD(a[53], a[53]); + COMBA_STORE(b[106]); + + /* output 107 */ + CARRY_FORWARD; + SQRADDSC(a[44], a[63]); SQRADDAC(a[45], a[62]); SQRADDAC(a[46], a[61]); SQRADDAC(a[47], a[60]); SQRADDAC(a[48], a[59]); SQRADDAC(a[49], a[58]); SQRADDAC(a[50], a[57]); SQRADDAC(a[51], a[56]); SQRADDAC(a[52], a[55]); SQRADDAC(a[53], a[54]); SQRADDDB; + COMBA_STORE(b[107]); + + /* output 108 */ + CARRY_FORWARD; + SQRADDSC(a[45], a[63]); SQRADDAC(a[46], a[62]); SQRADDAC(a[47], a[61]); SQRADDAC(a[48], a[60]); SQRADDAC(a[49], a[59]); SQRADDAC(a[50], a[58]); SQRADDAC(a[51], a[57]); SQRADDAC(a[52], a[56]); SQRADDAC(a[53], a[55]); SQRADDDB; SQRADD(a[54], a[54]); + COMBA_STORE(b[108]); + + /* output 109 */ + CARRY_FORWARD; + SQRADDSC(a[46], a[63]); SQRADDAC(a[47], a[62]); SQRADDAC(a[48], a[61]); SQRADDAC(a[49], a[60]); SQRADDAC(a[50], a[59]); SQRADDAC(a[51], a[58]); SQRADDAC(a[52], a[57]); SQRADDAC(a[53], a[56]); SQRADDAC(a[54], a[55]); SQRADDDB; + COMBA_STORE(b[109]); + + /* output 110 */ + CARRY_FORWARD; + SQRADDSC(a[47], a[63]); SQRADDAC(a[48], a[62]); SQRADDAC(a[49], a[61]); SQRADDAC(a[50], a[60]); SQRADDAC(a[51], a[59]); SQRADDAC(a[52], a[58]); SQRADDAC(a[53], a[57]); SQRADDAC(a[54], a[56]); SQRADDDB; SQRADD(a[55], a[55]); + COMBA_STORE(b[110]); + + /* output 111 */ + CARRY_FORWARD; + SQRADDSC(a[48], a[63]); SQRADDAC(a[49], a[62]); SQRADDAC(a[50], a[61]); SQRADDAC(a[51], a[60]); SQRADDAC(a[52], a[59]); SQRADDAC(a[53], a[58]); SQRADDAC(a[54], a[57]); SQRADDAC(a[55], a[56]); SQRADDDB; + COMBA_STORE(b[111]); + + /* output 112 */ + CARRY_FORWARD; + SQRADDSC(a[49], a[63]); SQRADDAC(a[50], a[62]); SQRADDAC(a[51], a[61]); SQRADDAC(a[52], a[60]); SQRADDAC(a[53], a[59]); SQRADDAC(a[54], a[58]); SQRADDAC(a[55], a[57]); SQRADDDB; SQRADD(a[56], a[56]); + COMBA_STORE(b[112]); + + /* output 113 */ + CARRY_FORWARD; + SQRADDSC(a[50], a[63]); SQRADDAC(a[51], a[62]); SQRADDAC(a[52], a[61]); SQRADDAC(a[53], a[60]); SQRADDAC(a[54], a[59]); SQRADDAC(a[55], a[58]); SQRADDAC(a[56], a[57]); SQRADDDB; + COMBA_STORE(b[113]); + + /* output 114 */ + CARRY_FORWARD; + SQRADDSC(a[51], a[63]); SQRADDAC(a[52], a[62]); SQRADDAC(a[53], a[61]); SQRADDAC(a[54], a[60]); SQRADDAC(a[55], a[59]); SQRADDAC(a[56], a[58]); SQRADDDB; SQRADD(a[57], a[57]); + COMBA_STORE(b[114]); + + /* output 115 */ + CARRY_FORWARD; + SQRADDSC(a[52], a[63]); SQRADDAC(a[53], a[62]); SQRADDAC(a[54], a[61]); SQRADDAC(a[55], a[60]); SQRADDAC(a[56], a[59]); SQRADDAC(a[57], a[58]); SQRADDDB; + COMBA_STORE(b[115]); + + /* output 116 */ + CARRY_FORWARD; + SQRADDSC(a[53], a[63]); SQRADDAC(a[54], a[62]); SQRADDAC(a[55], a[61]); SQRADDAC(a[56], a[60]); SQRADDAC(a[57], a[59]); SQRADDDB; SQRADD(a[58], a[58]); + COMBA_STORE(b[116]); + + /* output 117 */ + CARRY_FORWARD; + SQRADDSC(a[54], a[63]); SQRADDAC(a[55], a[62]); SQRADDAC(a[56], a[61]); SQRADDAC(a[57], a[60]); SQRADDAC(a[58], a[59]); SQRADDDB; + COMBA_STORE(b[117]); + + /* output 118 */ + CARRY_FORWARD; + SQRADDSC(a[55], a[63]); SQRADDAC(a[56], a[62]); SQRADDAC(a[57], a[61]); SQRADDAC(a[58], a[60]); SQRADDDB; SQRADD(a[59], a[59]); + COMBA_STORE(b[118]); + + /* output 119 */ + CARRY_FORWARD; + SQRADDSC(a[56], a[63]); SQRADDAC(a[57], a[62]); SQRADDAC(a[58], a[61]); SQRADDAC(a[59], a[60]); SQRADDDB; + COMBA_STORE(b[119]); + + /* output 120 */ + CARRY_FORWARD; + SQRADDSC(a[57], a[63]); SQRADDAC(a[58], a[62]); SQRADDAC(a[59], a[61]); SQRADDDB; SQRADD(a[60], a[60]); + COMBA_STORE(b[120]); + + /* output 121 */ + CARRY_FORWARD; + SQRADDSC(a[58], a[63]); SQRADDAC(a[59], a[62]); SQRADDAC(a[60], a[61]); SQRADDDB; + COMBA_STORE(b[121]); + + /* output 122 */ + CARRY_FORWARD; + SQRADD2(a[59], a[63]); SQRADD2(a[60], a[62]); SQRADD(a[61], a[61]); + COMBA_STORE(b[122]); + + /* output 123 */ + CARRY_FORWARD; + SQRADD2(a[60], a[63]); SQRADD2(a[61], a[62]); + COMBA_STORE(b[123]); + + /* output 124 */ + CARRY_FORWARD; + SQRADD2(a[61], a[63]); SQRADD(a[62], a[62]); + COMBA_STORE(b[124]); + + /* output 125 */ + CARRY_FORWARD; + SQRADD2(a[62], a[63]); + COMBA_STORE(b[125]); + + /* output 126 */ + CARRY_FORWARD; + SQRADD(a[63], a[63]); + COMBA_STORE(b[126]); + COMBA_STORE2(b[127]); + COMBA_FINI; + + B->used = 128; + B->sign = FP_ZPOS; + XMEMCPY(B->dp, b, 128 * sizeof(fp_digit)); + fp_clamp(B); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return FP_OKAY; +} +#endif + + diff --git a/client/wolfssl/wolfcrypt/src/fp_sqr_comba_7.i b/client/wolfssl/wolfcrypt/src/fp_sqr_comba_7.i new file mode 100644 index 0000000..09bf995 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fp_sqr_comba_7.i @@ -0,0 +1,127 @@ +/* fp_sqr_comba_7.i + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef TFM_SQR7 +int fp_sqr_comba7(fp_int *A, fp_int *B) +{ + fp_digit *a, c0, c1, c2, sc0 = 0, sc1 = 0, sc2 = 0; +#ifdef TFM_ISO + fp_word tt; +#endif +#ifndef WOLFSSL_SMALL_STACK + fp_digit b[14]; +#else + fp_digit *b; +#endif + +#ifdef WOLFSSL_SMALL_STACK + b = (fp_digit*)XMALLOC(sizeof(fp_digit) * 14, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (b == NULL) + return FP_MEM; +#endif + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADDSC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADD2(a[2], a[6]); SQRADD2(a[3], a[5]); SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADD2(a[3], a[6]); SQRADD2(a[4], a[5]); + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADD2(a[4], a[6]); SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADD2(a[5], a[6]); + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + COMBA_STORE2(b[13]); + COMBA_FINI; + + B->used = 14; + B->sign = FP_ZPOS; + XMEMCPY(B->dp, b, 14 * sizeof(fp_digit)); + fp_clamp(B); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return FP_OKAY; +} +#endif + + diff --git a/client/wolfssl/wolfcrypt/src/fp_sqr_comba_8.i b/client/wolfssl/wolfcrypt/src/fp_sqr_comba_8.i new file mode 100644 index 0000000..23fd8e4 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fp_sqr_comba_8.i @@ -0,0 +1,137 @@ +/* fp_sqr_comba_8.i + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef TFM_SQR8 +int fp_sqr_comba8(fp_int *A, fp_int *B) +{ + fp_digit *a, c0, c1, c2, sc0 = 0, sc1 = 0, sc2 = 0; +#ifdef TFM_ISO + fp_word tt; +#endif +#ifndef WOLFSSL_SMALL_STACK + fp_digit b[16]; +#else + fp_digit *b; +#endif + +#ifdef WOLFSSL_SMALL_STACK + b = (fp_digit*)XMALLOC(sizeof(fp_digit) * 16, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (b == NULL) + return FP_MEM; +#endif + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADDSC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADDSC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADD2(a[3], a[7]); SQRADD2(a[4], a[6]); SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADD2(a[4], a[7]); SQRADD2(a[5], a[6]); + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADD2(a[5], a[7]); SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADD2(a[6], a[7]); + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + COMBA_STORE2(b[15]); + COMBA_FINI; + + B->used = 16; + B->sign = FP_ZPOS; + XMEMCPY(B->dp, b, 16 * sizeof(fp_digit)); + fp_clamp(B); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return FP_OKAY; +} +#endif + + diff --git a/client/wolfssl/wolfcrypt/src/fp_sqr_comba_9.i b/client/wolfssl/wolfcrypt/src/fp_sqr_comba_9.i new file mode 100644 index 0000000..ed6451a --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fp_sqr_comba_9.i @@ -0,0 +1,147 @@ +/* fp_sqr_comba_9.i + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef TFM_SQR9 +int fp_sqr_comba9(fp_int *A, fp_int *B) +{ + fp_digit *a, c0, c1, c2, sc0 = 0, sc1 = 0, sc2 = 0; +#ifdef TFM_ISO + fp_word tt; +#endif +#ifndef WOLFSSL_SMALL_STACK + fp_digit b[18]; +#else + fp_digit *b; +#endif + +#ifdef WOLFSSL_SMALL_STACK + b = (fp_digit*)XMALLOC(sizeof(fp_digit) * 18, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (b == NULL) + return FP_MEM; +#endif + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADDSC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADDSC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADDSC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADD2(a[4], a[8]); SQRADD2(a[5], a[7]); SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADD2(a[5], a[8]); SQRADD2(a[6], a[7]); + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADD2(a[6], a[8]); SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + + /* output 15 */ + CARRY_FORWARD; + SQRADD2(a[7], a[8]); + COMBA_STORE(b[15]); + + /* output 16 */ + CARRY_FORWARD; + SQRADD(a[8], a[8]); + COMBA_STORE(b[16]); + COMBA_STORE2(b[17]); + COMBA_FINI; + + B->used = 18; + B->sign = FP_ZPOS; + XMEMCPY(B->dp, b, 18 * sizeof(fp_digit)); + fp_clamp(B); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return FP_OKAY; +} +#endif + + diff --git a/client/wolfssl/wolfcrypt/src/fp_sqr_comba_small_set.i b/client/wolfssl/wolfcrypt/src/fp_sqr_comba_small_set.i new file mode 100644 index 0000000..a81ee10 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/fp_sqr_comba_small_set.i @@ -0,0 +1,1558 @@ +/* fp_sqr_comba_small_set.i + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#if defined(TFM_SMALL_SET) +int fp_sqr_comba_small(fp_int *A, fp_int *B) +{ + fp_digit *a, c0, c1, c2, sc0 = 0, sc1 = 0, sc2 = 0; +#ifdef TFM_ISO + fp_word tt; +#endif +#ifndef WOLFSSL_SMALL_STACK + fp_digit b[32]; +#else + fp_digit *b; +#endif + +#ifdef WOLFSSL_SMALL_STACK + b = (fp_digit*)XMALLOC(sizeof(fp_digit) * 32, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (b == NULL) + return FP_MEM; +#endif + + switch (A->used) { + case 1: + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + COMBA_STORE2(b[1]); + COMBA_FINI; + + B->used = 2; + B->sign = FP_ZPOS; + XMEMCPY(B->dp, b, 2 * sizeof(fp_digit)); + fp_clamp(B); + break; + + case 2: + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + COMBA_STORE2(b[3]); + COMBA_FINI; + + B->used = 4; + B->sign = FP_ZPOS; + XMEMCPY(B->dp, b, 4 * sizeof(fp_digit)); + fp_clamp(B); + break; + + case 3: + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + COMBA_STORE2(b[5]); + COMBA_FINI; + + B->used = 6; + B->sign = FP_ZPOS; + XMEMCPY(B->dp, b, 6 * sizeof(fp_digit)); + fp_clamp(B); + break; + + case 4: + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADD2(a[2], a[3]); + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + COMBA_STORE2(b[7]); + COMBA_FINI; + + B->used = 8; + B->sign = FP_ZPOS; + XMEMCPY(B->dp, b, 8 * sizeof(fp_digit)); + fp_clamp(B); + break; + + case 5: + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADD2(a[1], a[4]); SQRADD2(a[2], a[3]); + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADD2(a[2], a[4]); SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADD2(a[3], a[4]); + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + COMBA_STORE2(b[9]); + COMBA_FINI; + + B->used = 10; + B->sign = FP_ZPOS; + XMEMCPY(B->dp, b, 10 * sizeof(fp_digit)); + fp_clamp(B); + break; + + case 6: + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADD2(a[1], a[5]); SQRADD2(a[2], a[4]); SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADD2(a[2], a[5]); SQRADD2(a[3], a[4]); + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADD2(a[3], a[5]); SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADD2(a[4], a[5]); + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + COMBA_STORE2(b[11]); + COMBA_FINI; + + B->used = 12; + B->sign = FP_ZPOS; + XMEMCPY(B->dp, b, 12 * sizeof(fp_digit)); + fp_clamp(B); + break; + + case 7: + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADDSC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADD2(a[2], a[6]); SQRADD2(a[3], a[5]); SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADD2(a[3], a[6]); SQRADD2(a[4], a[5]); + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADD2(a[4], a[6]); SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADD2(a[5], a[6]); + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + COMBA_STORE2(b[13]); + COMBA_FINI; + + B->used = 14; + B->sign = FP_ZPOS; + XMEMCPY(B->dp, b, 14 * sizeof(fp_digit)); + fp_clamp(B); + break; + + case 8: + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADDSC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADDSC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADD2(a[3], a[7]); SQRADD2(a[4], a[6]); SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADD2(a[4], a[7]); SQRADD2(a[5], a[6]); + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADD2(a[5], a[7]); SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADD2(a[6], a[7]); + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + COMBA_STORE2(b[15]); + COMBA_FINI; + + B->used = 16; + B->sign = FP_ZPOS; + XMEMCPY(B->dp, b, 16 * sizeof(fp_digit)); + fp_clamp(B); + break; + + case 9: + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADDSC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADDSC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADDSC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADD2(a[4], a[8]); SQRADD2(a[5], a[7]); SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADD2(a[5], a[8]); SQRADD2(a[6], a[7]); + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADD2(a[6], a[8]); SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + + /* output 15 */ + CARRY_FORWARD; + SQRADD2(a[7], a[8]); + COMBA_STORE(b[15]); + + /* output 16 */ + CARRY_FORWARD; + SQRADD(a[8], a[8]); + COMBA_STORE(b[16]); + COMBA_STORE2(b[17]); + COMBA_FINI; + + B->used = 18; + B->sign = FP_ZPOS; + XMEMCPY(B->dp, b, 18 * sizeof(fp_digit)); + fp_clamp(B); + break; + + case 10: + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADDSC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADDSC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADDSC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADDSC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADD2(a[5], a[9]); SQRADD2(a[6], a[8]); SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + + /* output 15 */ + CARRY_FORWARD; + SQRADD2(a[6], a[9]); SQRADD2(a[7], a[8]); + COMBA_STORE(b[15]); + + /* output 16 */ + CARRY_FORWARD; + SQRADD2(a[7], a[9]); SQRADD(a[8], a[8]); + COMBA_STORE(b[16]); + + /* output 17 */ + CARRY_FORWARD; + SQRADD2(a[8], a[9]); + COMBA_STORE(b[17]); + + /* output 18 */ + CARRY_FORWARD; + SQRADD(a[9], a[9]); + COMBA_STORE(b[18]); + COMBA_STORE2(b[19]); + COMBA_FINI; + + B->used = 20; + B->sign = FP_ZPOS; + XMEMCPY(B->dp, b, 20 * sizeof(fp_digit)); + fp_clamp(B); + break; + + case 11: + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADDSC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADDSC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADDSC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADDSC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + + /* output 15 */ + CARRY_FORWARD; + SQRADDSC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; + COMBA_STORE(b[15]); + + /* output 16 */ + CARRY_FORWARD; + SQRADD2(a[6], a[10]); SQRADD2(a[7], a[9]); SQRADD(a[8], a[8]); + COMBA_STORE(b[16]); + + /* output 17 */ + CARRY_FORWARD; + SQRADD2(a[7], a[10]); SQRADD2(a[8], a[9]); + COMBA_STORE(b[17]); + + /* output 18 */ + CARRY_FORWARD; + SQRADD2(a[8], a[10]); SQRADD(a[9], a[9]); + COMBA_STORE(b[18]); + + /* output 19 */ + CARRY_FORWARD; + SQRADD2(a[9], a[10]); + COMBA_STORE(b[19]); + + /* output 20 */ + CARRY_FORWARD; + SQRADD(a[10], a[10]); + COMBA_STORE(b[20]); + COMBA_STORE2(b[21]); + COMBA_FINI; + + B->used = 22; + B->sign = FP_ZPOS; + XMEMCPY(B->dp, b, 22 * sizeof(fp_digit)); + fp_clamp(B); + break; + + case 12: + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADDSC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADDSC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADDSC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + + /* output 15 */ + CARRY_FORWARD; + SQRADDSC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; + COMBA_STORE(b[15]); + + /* output 16 */ + CARRY_FORWARD; + SQRADDSC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); + COMBA_STORE(b[16]); + + /* output 17 */ + CARRY_FORWARD; + SQRADDSC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; + COMBA_STORE(b[17]); + + /* output 18 */ + CARRY_FORWARD; + SQRADD2(a[7], a[11]); SQRADD2(a[8], a[10]); SQRADD(a[9], a[9]); + COMBA_STORE(b[18]); + + /* output 19 */ + CARRY_FORWARD; + SQRADD2(a[8], a[11]); SQRADD2(a[9], a[10]); + COMBA_STORE(b[19]); + + /* output 20 */ + CARRY_FORWARD; + SQRADD2(a[9], a[11]); SQRADD(a[10], a[10]); + COMBA_STORE(b[20]); + + /* output 21 */ + CARRY_FORWARD; + SQRADD2(a[10], a[11]); + COMBA_STORE(b[21]); + + /* output 22 */ + CARRY_FORWARD; + SQRADD(a[11], a[11]); + COMBA_STORE(b[22]); + COMBA_STORE2(b[23]); + COMBA_FINI; + + B->used = 24; + B->sign = FP_ZPOS; + XMEMCPY(B->dp, b, 24 * sizeof(fp_digit)); + fp_clamp(B); + break; + + case 13: + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADDSC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADDSC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + + /* output 15 */ + CARRY_FORWARD; + SQRADDSC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; + COMBA_STORE(b[15]); + + /* output 16 */ + CARRY_FORWARD; + SQRADDSC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); + COMBA_STORE(b[16]); + + /* output 17 */ + CARRY_FORWARD; + SQRADDSC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; + COMBA_STORE(b[17]); + + /* output 18 */ + CARRY_FORWARD; + SQRADDSC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]); + COMBA_STORE(b[18]); + + /* output 19 */ + CARRY_FORWARD; + SQRADDSC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB; + COMBA_STORE(b[19]); + + /* output 20 */ + CARRY_FORWARD; + SQRADD2(a[8], a[12]); SQRADD2(a[9], a[11]); SQRADD(a[10], a[10]); + COMBA_STORE(b[20]); + + /* output 21 */ + CARRY_FORWARD; + SQRADD2(a[9], a[12]); SQRADD2(a[10], a[11]); + COMBA_STORE(b[21]); + + /* output 22 */ + CARRY_FORWARD; + SQRADD2(a[10], a[12]); SQRADD(a[11], a[11]); + COMBA_STORE(b[22]); + + /* output 23 */ + CARRY_FORWARD; + SQRADD2(a[11], a[12]); + COMBA_STORE(b[23]); + + /* output 24 */ + CARRY_FORWARD; + SQRADD(a[12], a[12]); + COMBA_STORE(b[24]); + COMBA_STORE2(b[25]); + COMBA_FINI; + + B->used = 26; + B->sign = FP_ZPOS; + XMEMCPY(B->dp, b, 26 * sizeof(fp_digit)); + fp_clamp(B); + break; + + case 14: + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADDSC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + + /* output 15 */ + CARRY_FORWARD; + SQRADDSC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; + COMBA_STORE(b[15]); + + /* output 16 */ + CARRY_FORWARD; + SQRADDSC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); + COMBA_STORE(b[16]); + + /* output 17 */ + CARRY_FORWARD; + SQRADDSC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; + COMBA_STORE(b[17]); + + /* output 18 */ + CARRY_FORWARD; + SQRADDSC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]); + COMBA_STORE(b[18]); + + /* output 19 */ + CARRY_FORWARD; + SQRADDSC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB; + COMBA_STORE(b[19]); + + /* output 20 */ + CARRY_FORWARD; + SQRADDSC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]); + COMBA_STORE(b[20]); + + /* output 21 */ + CARRY_FORWARD; + SQRADDSC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB; + COMBA_STORE(b[21]); + + /* output 22 */ + CARRY_FORWARD; + SQRADD2(a[9], a[13]); SQRADD2(a[10], a[12]); SQRADD(a[11], a[11]); + COMBA_STORE(b[22]); + + /* output 23 */ + CARRY_FORWARD; + SQRADD2(a[10], a[13]); SQRADD2(a[11], a[12]); + COMBA_STORE(b[23]); + + /* output 24 */ + CARRY_FORWARD; + SQRADD2(a[11], a[13]); SQRADD(a[12], a[12]); + COMBA_STORE(b[24]); + + /* output 25 */ + CARRY_FORWARD; + SQRADD2(a[12], a[13]); + COMBA_STORE(b[25]); + + /* output 26 */ + CARRY_FORWARD; + SQRADD(a[13], a[13]); + COMBA_STORE(b[26]); + COMBA_STORE2(b[27]); + COMBA_FINI; + + B->used = 28; + B->sign = FP_ZPOS; + XMEMCPY(B->dp, b, 28 * sizeof(fp_digit)); + fp_clamp(B); + break; + + case 15: + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + + /* output 15 */ + CARRY_FORWARD; + SQRADDSC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; + COMBA_STORE(b[15]); + + /* output 16 */ + CARRY_FORWARD; + SQRADDSC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); + COMBA_STORE(b[16]); + + /* output 17 */ + CARRY_FORWARD; + SQRADDSC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; + COMBA_STORE(b[17]); + + /* output 18 */ + CARRY_FORWARD; + SQRADDSC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]); + COMBA_STORE(b[18]); + + /* output 19 */ + CARRY_FORWARD; + SQRADDSC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB; + COMBA_STORE(b[19]); + + /* output 20 */ + CARRY_FORWARD; + SQRADDSC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]); + COMBA_STORE(b[20]); + + /* output 21 */ + CARRY_FORWARD; + SQRADDSC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB; + COMBA_STORE(b[21]); + + /* output 22 */ + CARRY_FORWARD; + SQRADDSC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]); + COMBA_STORE(b[22]); + + /* output 23 */ + CARRY_FORWARD; + SQRADDSC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB; + COMBA_STORE(b[23]); + + /* output 24 */ + CARRY_FORWARD; + SQRADD2(a[10], a[14]); SQRADD2(a[11], a[13]); SQRADD(a[12], a[12]); + COMBA_STORE(b[24]); + + /* output 25 */ + CARRY_FORWARD; + SQRADD2(a[11], a[14]); SQRADD2(a[12], a[13]); + COMBA_STORE(b[25]); + + /* output 26 */ + CARRY_FORWARD; + SQRADD2(a[12], a[14]); SQRADD(a[13], a[13]); + COMBA_STORE(b[26]); + + /* output 27 */ + CARRY_FORWARD; + SQRADD2(a[13], a[14]); + COMBA_STORE(b[27]); + + /* output 28 */ + CARRY_FORWARD; + SQRADD(a[14], a[14]); + COMBA_STORE(b[28]); + COMBA_STORE2(b[29]); + COMBA_FINI; + + B->used = 30; + B->sign = FP_ZPOS; + XMEMCPY(B->dp, b, 30 * sizeof(fp_digit)); + fp_clamp(B); + break; + + case 16: + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + + /* output 15 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; + COMBA_STORE(b[15]); + + /* output 16 */ + CARRY_FORWARD; + SQRADDSC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); + COMBA_STORE(b[16]); + + /* output 17 */ + CARRY_FORWARD; + SQRADDSC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; + COMBA_STORE(b[17]); + + /* output 18 */ + CARRY_FORWARD; + SQRADDSC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]); + COMBA_STORE(b[18]); + + /* output 19 */ + CARRY_FORWARD; + SQRADDSC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB; + COMBA_STORE(b[19]); + + /* output 20 */ + CARRY_FORWARD; + SQRADDSC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]); + COMBA_STORE(b[20]); + + /* output 21 */ + CARRY_FORWARD; + SQRADDSC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB; + COMBA_STORE(b[21]); + + /* output 22 */ + CARRY_FORWARD; + SQRADDSC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]); + COMBA_STORE(b[22]); + + /* output 23 */ + CARRY_FORWARD; + SQRADDSC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB; + COMBA_STORE(b[23]); + + /* output 24 */ + CARRY_FORWARD; + SQRADDSC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]); + COMBA_STORE(b[24]); + + /* output 25 */ + CARRY_FORWARD; + SQRADDSC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB; + COMBA_STORE(b[25]); + + /* output 26 */ + CARRY_FORWARD; + SQRADD2(a[11], a[15]); SQRADD2(a[12], a[14]); SQRADD(a[13], a[13]); + COMBA_STORE(b[26]); + + /* output 27 */ + CARRY_FORWARD; + SQRADD2(a[12], a[15]); SQRADD2(a[13], a[14]); + COMBA_STORE(b[27]); + + /* output 28 */ + CARRY_FORWARD; + SQRADD2(a[13], a[15]); SQRADD(a[14], a[14]); + COMBA_STORE(b[28]); + + /* output 29 */ + CARRY_FORWARD; + SQRADD2(a[14], a[15]); + COMBA_STORE(b[29]); + + /* output 30 */ + CARRY_FORWARD; + SQRADD(a[15], a[15]); + COMBA_STORE(b[30]); + COMBA_STORE2(b[31]); + COMBA_FINI; + + B->used = 32; + B->sign = FP_ZPOS; + XMEMCPY(B->dp, b, 32 * sizeof(fp_digit)); + fp_clamp(B); + break; + + default: + break; + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return FP_OKAY; +} + +#endif /* TFM_SMALL_SET */ diff --git a/client/wolfssl/wolfcrypt/src/ge_448.c b/client/wolfssl/wolfcrypt/src/ge_448.c new file mode 100644 index 0000000..7795a96 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/ge_448.c @@ -0,0 +1,10780 @@ +/* ge_448.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* Based On Daniel J Bernstein's ed25519 Public Domain ref10 work. + * Small implementation based on Daniel Beer's ed25519 public domain work. + * Reworked for ed448 by Sean Parkinson. + */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef HAVE_ED448 + +#include +#include +#include +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +/* +sc means scalar. +ge means group element. + +Here the group is the set of pairs (x,y) of field elements (see ge_448.h) +satisfying -x^2 + y^2 = 1 + d x^2y^2 +where d = -39081 + +Representations: + ge448_p2 (projective) : (X:Y:Z) satisfying x=X/Z, y=Y/Z + ge448_precomp (affine): (X:Y) +*/ + + +#ifdef ED448_SMALL + +/* Base point of ed448 */ +static const ge448_p2 ed448_base = { + { 0x5e, 0xc0, 0x0c, 0xc7, 0x2b, 0xa8, 0x26, 0x26, 0x8e, 0x93, 0x00, 0x8b, + 0xe1, 0x80, 0x3b, 0x43, 0x11, 0x65, 0xb6, 0x2a, 0xf7, 0x1a, 0xae, 0x12, + 0x64, 0xa4, 0xd3, 0xa3, 0x24, 0xe3, 0x6d, 0xea, 0x67, 0x17, 0x0f, 0x47, + 0x70, 0x65, 0x14, 0x9e, 0xda, 0x36, 0xbf, 0x22, 0xa6, 0x15, 0x1d, 0x22, + 0xed, 0x0d, 0xed, 0x6b, 0xc6, 0x70, 0x19, 0x4f }, + { 0x14, 0xfa, 0x30, 0xf2, 0x5b, 0x79, 0x08, 0x98, 0xad, 0xc8, 0xd7, 0x4e, + 0x2c, 0x13, 0xbd, 0xfd, 0xc4, 0x39, 0x7c, 0xe6, 0x1c, 0xff, 0xd3, 0x3a, + 0xd7, 0xc2, 0xa0, 0x05, 0x1e, 0x9c, 0x78, 0x87, 0x40, 0x98, 0xa3, 0x6c, + 0x73, 0x73, 0xea, 0x4b, 0x62, 0xc7, 0xc9, 0x56, 0x37, 0x20, 0x76, 0x88, + 0x24, 0xbc, 0xb6, 0x6e, 0x71, 0x46, 0x3f, 0x69 }, + { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } +}; + +/* Part of order of ed448 that needs tp be multiplied when reducing */ +static const uint8_t ed448_order_mul[56] = { + 0x0d, 0xbb, 0xa7, 0x54, 0x6d, 0x3d, 0x87, 0xdc, 0xaa, 0x70, 0x3a, 0x72, + 0x8d, 0x3d, 0x93, 0xde, 0x6f, 0xc9, 0x29, 0x51, 0xb6, 0x24, 0xb1, 0x3b, + 0x16, 0xdc, 0x35, 0x83, +}; + +/* Reduce scalar mod the order of the curve. + * Scalar Will be 114 bytes. + * + * b [in] Scalar to reduce. + */ +void sc448_reduce(uint8_t* b) +{ + int i, j; + uint32_t t[114]; + uint8_t o; + + for (i = 0; i < 86; i++) { + t[i] = b[i]; + } + for (i = 0; i < 58; i++) { + for (j = 0; j < 28; j++) + t[i+j] += b[i+56] * ((uint32_t)ed448_order_mul[j] << 2); + t[i+56] = 0; + } + for (i = 54; i < 87; i++) { + t[i+1] += t[i] >> 8; + t[i] &= 0xff; + } + for (i = 0; i < 31; i++) { + for (j = 0; j < 28; j++) + t[i+j] += t[i+56] * ((uint32_t)ed448_order_mul[j] << 2); + t[i+56] = 0; + } + for (i = 54; i < 60; i++) { + t[i+1] += t[i] >> 8; + t[i] &= 0xff; + } + for (i = 0; i < 4; i++) { + for (j = 0; j < 28; j++) + t[i+j] += t[i+56] * ((uint32_t)ed448_order_mul[j] << 2); + t[i+56] = 0; + } + for (i = 0; i < 55; i++) { + t[i+1] += t[i] >> 8; + t[i] &= 0xff; + } + o = t[55] >> 6; + t[55] &= 0x3f; + for (j = 0; j < 28; j++) + t[j] += o * (uint32_t)ed448_order_mul[j]; + for (i = 0; i < 55; i++) { + t[i+1] += t[i] >> 8; + b[i] = t[i] & 0xff; + } + b[i] = t[i] & 0xff; + b[i+1] = 0; +} + +/* Multiply a by b and add d. r = (a * b + d) mod order + * + * r [in] Scalar to hold result. + * a [in] Scalar to multiply. + * b [in] Scalar to multiply. + * d [in] Scalar to add to multiplicative result. + */ +void sc448_muladd(uint8_t* r, const uint8_t* a, const uint8_t* b, + const uint8_t* d) +{ + int i, j; + uint32_t t[112]; + uint8_t o; + + /* a * b + d */ + for (i = 0; i < 56; i++) + t[i] = d[i]; + for (i = 0; i < 56; i++) { + for (j = 0; j < 56; j++) + t[i+j] += (int16_t)a[i] * b[j]; + t[i+56] = 0; + } + + for (i = 0; i < 111; i++) { + t[i+1] += t[i] >> 8; + t[i] &= 0xff; + } + for (i = 0; i < 56; i++) { + for (j = 0; j < 28; j++) + t[i+j] += t[i+56] * ((uint32_t)ed448_order_mul[j] << 2); + t[i+56] = 0; + } + for (i = 54; i < 85; i++) { + t[i+1] += t[i] >> 8; + t[i] &= 0xff; + } + for (i = 0; i < 29; i++) { + for (j = 0; j < 28; j++) + t[i+j] += t[i+56] * ((uint32_t)ed448_order_mul[j] << 2); + t[i+56] = 0; + } + for (i = 54; i < 58; i++) { + t[i+1] += t[i] >> 8; + t[i] &= 0xff; + } + for (i = 0; i < 2; i++) { + for (j = 0; j < 28; j++) + t[i+j] += t[i+56] * ((uint32_t)ed448_order_mul[j] << 2); + t[i+56] = 0; + } + for (i = 0; i < 55; i++) { + t[i+1] += t[i] >> 8; + t[i] &= 0xff; + } + o = t[55] >> 6; + t[55] &= 0x3f; + for (j = 0; j < 28; j++) + t[j] += o * (uint32_t)ed448_order_mul[j]; + for (i = 0; i < 55; i++) { + t[i+1] += t[i] >> 8; + r[i] = t[i] & 0xff; + } + r[i] = t[i] & 0xff; + r[i+1] = 0; +} + +/* Double the point on the Twisted Edwards curve. r = 2.p + * + * r [in] Point to hold result. + * p [in] Point to double. + */ +static WC_INLINE void ge448_dbl(ge448_p2 *r,const ge448_p2 *p) +{ + ge448 t0[GE448_WORDS]; + ge448 t1[GE448_WORDS]; + + fe448_add(t0, p->X, p->Y); /* t0 = B1 = X1+Y1 */ + fe448_reduce(t0); + fe448_sqr(t0, t0); /* t0 = B = (X1+Y1)^2 */ + fe448_sqr(r->X, p->X); /* r->X = C = X1^2 */ + fe448_sqr(r->Y, p->Y); /* r->Y = D = Y1^2 */ + fe448_add(t1, r->X, r->Y); /* t1 = E = C+D */ + fe448_reduce(t1); + fe448_sub(r->Y, r->X, r->Y); /* r->Y = Y31 = C-D */ + fe448_sqr(r->Z, p->Z); /* r->Z = H = Z1^2 */ + fe448_add(r->Z, r->Z, r->Z); /* r->Z = J1 = 2*H */ + fe448_sub(r->Z, t1, r->Z); /* r->Z = J = E-2*H */ + fe448_reduce(r->Z); + fe448_sub(r->X, t0, t1); /* r->X = X31 = B-E */ + fe448_mul(r->X, r->X, r->Z); /* r->X = X3 = (B-E)*J */ + fe448_mul(r->Y, r->Y, t1); /* r->Y = Y3 = E*(C-D) */ + fe448_mul(r->Z, t1, r->Z); /* r->Z = Z3 = E*J */ +} + +/* Add two point on the Twisted Edwards curve. r = p + q + * + * r [in] Point to hold result. + * p [in] Point to add. + * q [in] Point to add. + */ +static WC_INLINE void ge448_add(ge448_p2* r, const ge448_p2* p, + const ge448_p2* q) +{ + ge448 t0[GE448_WORDS]; + ge448 t1[GE448_WORDS]; + ge448 t2[GE448_WORDS]; + ge448 t3[GE448_WORDS]; + ge448 t4[GE448_WORDS]; + + fe448_mul(t1, p->X, q->X); /* t1 = C = X1*X2 */ + fe448_mul(t2, p->Y, q->Y); /* t2 = D = Y1*Y2 */ + fe448_mul(t3, t1, t2); /* t3 = E1 = C*D */ + fe448_mul39081(t3, t3); /* t3 = E = d*C*D */ + fe448_mul(r->Z, p->Z, q->Z); /* r->Z = A = Z1*Z2 */ + fe448_sqr(t0, r->Z); /* t0 = B = A^2 */ + fe448_add(t4, t0, t3); /* t4 = F = B-(-E) */ + fe448_sub(t0, t0, t3); /* t0 = G = B+(-E) */ + fe448_reduce(t0); + fe448_add(r->X, p->X, p->Y); /* r->X = H1 = X1+Y1 */ + fe448_reduce(r->X); + fe448_add(r->Y, q->X, q->Y); /* r->Y = H2 = X2+Y2 */ + fe448_reduce(r->Y); + fe448_mul(r->X, r->X, r->Y); /* r->X = H = (X1+Y1)*(X2+Y2) */ + fe448_sub(r->X, r->X, t1); /* r->X = X31 = H-C */ + fe448_sub(r->X, r->X, t2); /* r->X = X32 = H-C-D */ + fe448_reduce(r->X); + fe448_mul(r->X, r->X, t4); /* r->X = X33 = F*(H-C-D) */ + fe448_mul(r->X, r->X, r->Z); /* r->X = X3 = A*F*(H-C-D) */ + fe448_sub(r->Y, t2, t1); /* r->Y = Y31 = D-C */ + fe448_reduce(r->Y); + fe448_mul(r->Y, r->Y, t0); /* r->Y = Y32 = G*(D-C) */ + fe448_mul(r->Y, r->Y, r->Z); /* r->Y = Y3 = A*F*(D-C) */ + fe448_mul(r->Z, t4, t0); /* r->Z = Z3 = F*G */ +} + +/* Convert point to byte array assuming projective ordinates. + * + * b [in] Array of bytes to hold compressed point. + * p [in] Point to convert. + */ +void ge448_to_bytes(uint8_t *s, const ge448_p2 *h) +{ + ge448 recip[56]; + ge448 x[56]; + + fe448_invert(recip, h->Z); + fe448_mul(x, h->X, recip); + fe448_mul(s, h->Y, recip); + fe448_norm(x); + fe448_norm(s); + s[56] = (x[0] & 1) << 7; +} + +/* Compress the point to y-ordinate and negative bit. + * + * out [in] Array of bytes to hold compressed key. + * xIn [in] The x-ordinate. + * yIn [in] The y-ordinate. + */ +int ge448_compress_key(uint8_t* out, const uint8_t* xIn, const uint8_t* yIn) +{ + ge448 x[56]; + + fe448_copy(x, xIn); + fe448_copy(out, yIn); + fe448_norm(x); + fe448_norm(out); + out[56] = (x[0] & 1) << 7; + + return 0; +} + +/* Perform a scalar multiplication of the a point. r = p * base + * + * r [in] Point to hold result. + * a [in] Scalar to multiply by. + */ +static void ge448_scalarmult(ge448_p2* h, const ge448_p2* p, const uint8_t* a) +{ + ge448_p2 r; + ge448_p2 s; + int i; + + XMEMSET(&r, 0, sizeof(r)); + r.Y[0] = 1; + r.Z[0] = 1; + + for (i = 447; i >= 0; i--) { + const byte bit = (a[i >> 3] >> (i & 7)) & 1; + + ge448_dbl(&r, &r); + ge448_add(&s, &r, p); + + fe448_cmov(r.X, s.X, bit); + fe448_cmov(r.Y, s.Y, bit); + fe448_cmov(r.Z, s.Z, bit); + } + + XMEMCPY(h, &r, sizeof(r)); +} + +/* Perform a scalar multiplication of the base point. r = a * base + * + * r [in] Point to hold result. + * a [in] Scalar to multiply by. + */ +void ge448_scalarmult_base(ge448_p2* h, const uint8_t* a) +{ + ge448_scalarmult(h, &ed448_base, a); +} + +/* Perform a scalar multplication of the base point and public point. + * r = a * p + b * base + * Uses a sliding window of 5 bits. + * Not constant time. + * + * r [in] Point to hold result. + * a [in] Scalar to multiply by. + */ +int ge448_double_scalarmult_vartime(ge448_p2 *r, const uint8_t *a, + const ge448_p2 *A, const uint8_t *b) +{ + ge448_p2 t; + + ge448_scalarmult(&t, &ed448_base, b); + ge448_scalarmult(r, A, a); + ge448_add(r, r, &t); + + return 0; +} + +/* Convert compressed point to negative of affine point. + * Calculates x from the y and the negative bit. + * Not constant time. + * + * r [in] Uncompressed point. + * b [in] Array of bytes representing point. + * returns 0 on success and -1 on failure. + */ +int ge448_from_bytes_negate_vartime(ge448_p2 *r, const uint8_t *b) +{ + int ret = 0; + ge448 u[GE448_WORDS]; + ge448 v[GE448_WORDS]; + ge448 u3[GE448_WORDS]; + ge448 vxx[GE448_WORDS]; + ge448 check[GE448_WORDS]; + + fe448_copy(r->Y, b); + XMEMSET(r->Z, 0, sizeof(r->Z)); + r->Z[0] = 1; + fe448_sqr(u, r->Y); /* u = y^2 */ + fe448_mul39081(v, u); /* v = 39081.y^2 */ + fe448_sub(u, u, r->Z); /* u = y^2-1 */ + fe448_add(v, v, r->Z); /* v = 39081.y^2-1 */ + fe448_neg(v, v); /* v = -39081.y^2-1 = d.y^2-1 */ + + fe448_sqr(r->X, v); /* x = v^2 */ + fe448_mul(r->X, r->X, v); /* x = v^3 */ + fe448_sqr(u3, u); /* x = u^2.v^3 */ + fe448_mul(r->X, r->X, u3); /* x = u^2.v^3 */ + fe448_mul(u3, u3, u); /* u3 = u^3 */ + fe448_mul(r->X, r->X, u3); /* x = u^5.v^3 */ + + fe448_pow_2_446_222_1(r->X, r->X); /* x = (u^5.v^3)^((q-3)/4) */ + fe448_mul(r->X, r->X, u3); /* x = u^3(u^5.v^3)^((q-3)/4) */ + fe448_mul(r->X, r->X, v); /* x = u^3.v(u^5.v^3)^((q-3)/4) */ + + fe448_sqr(vxx, r->X); + fe448_mul(vxx, vxx, v); + fe448_sub(check, vxx, u); /* check = v.x^2-u */ + fe448_norm(check); + fe448_norm(r->X); + fe448_norm(r->Y); + /* Note; vx^2+u is NOT correct. */ + if (fe448_isnonzero(check)) { + ret = -1; + } + + /* Calculating negative of point in bytes - negate only if X is correct. */ + if ((r->X[0] & 1) == (b[56] >> 7)) { + fe448_neg(r->X, r->X); + } + + return ret; +} + +#else /* !ED448_SMALL */ + +#if defined(CURVED448_128BIT) + +/* Reduce scalar mod the order of the curve. + * Scalar Will be 114 bytes. + * + * b [in] Scalar to reduce. + */ +void sc448_reduce(uint8_t* b) +{ + uint64_t d[8]; + uint128_t t[17]; + uint128_t c; + uint64_t o; + + /* Load from bytes */ + t[ 0] = ((int64_t) (b[ 0]) << 0) + | ((int64_t) (b[ 1]) << 8) + | ((int64_t) (b[ 2]) << 16) + | ((int64_t) (b[ 3]) << 24) + | ((int64_t) (b[ 4]) << 32) + | ((int64_t) (b[ 5]) << 40) + | ((int64_t) (b[ 6]) << 48); + t[ 1] = ((int64_t) (b[ 7]) << 0) + | ((int64_t) (b[ 8]) << 8) + | ((int64_t) (b[ 9]) << 16) + | ((int64_t) (b[10]) << 24) + | ((int64_t) (b[11]) << 32) + | ((int64_t) (b[12]) << 40) + | ((int64_t) (b[13]) << 48); + t[ 2] = ((int64_t) (b[14]) << 0) + | ((int64_t) (b[15]) << 8) + | ((int64_t) (b[16]) << 16) + | ((int64_t) (b[17]) << 24) + | ((int64_t) (b[18]) << 32) + | ((int64_t) (b[19]) << 40) + | ((int64_t) (b[20]) << 48); + t[ 3] = ((int64_t) (b[21]) << 0) + | ((int64_t) (b[22]) << 8) + | ((int64_t) (b[23]) << 16) + | ((int64_t) (b[24]) << 24) + | ((int64_t) (b[25]) << 32) + | ((int64_t) (b[26]) << 40) + | ((int64_t) (b[27]) << 48); + t[ 4] = ((int64_t) (b[28]) << 0) + | ((int64_t) (b[29]) << 8) + | ((int64_t) (b[30]) << 16) + | ((int64_t) (b[31]) << 24) + | ((int64_t) (b[32]) << 32) + | ((int64_t) (b[33]) << 40) + | ((int64_t) (b[34]) << 48); + t[ 5] = ((int64_t) (b[35]) << 0) + | ((int64_t) (b[36]) << 8) + | ((int64_t) (b[37]) << 16) + | ((int64_t) (b[38]) << 24) + | ((int64_t) (b[39]) << 32) + | ((int64_t) (b[40]) << 40) + | ((int64_t) (b[41]) << 48); + t[ 6] = ((int64_t) (b[42]) << 0) + | ((int64_t) (b[43]) << 8) + | ((int64_t) (b[44]) << 16) + | ((int64_t) (b[45]) << 24) + | ((int64_t) (b[46]) << 32) + | ((int64_t) (b[47]) << 40) + | ((int64_t) (b[48]) << 48); + t[ 7] = ((int64_t) (b[49]) << 0) + | ((int64_t) (b[50]) << 8) + | ((int64_t) (b[51]) << 16) + | ((int64_t) (b[52]) << 24) + | ((int64_t) (b[53]) << 32) + | ((int64_t) (b[54]) << 40) + | ((int64_t) (b[55]) << 48); + t[ 8] = ((int64_t) (b[56]) << 0) + | ((int64_t) (b[57]) << 8) + | ((int64_t) (b[58]) << 16) + | ((int64_t) (b[59]) << 24) + | ((int64_t) (b[60]) << 32) + | ((int64_t) (b[61]) << 40) + | ((int64_t) (b[62]) << 48); + t[ 9] = ((int64_t) (b[63]) << 0) + | ((int64_t) (b[64]) << 8) + | ((int64_t) (b[65]) << 16) + | ((int64_t) (b[66]) << 24) + | ((int64_t) (b[67]) << 32) + | ((int64_t) (b[68]) << 40) + | ((int64_t) (b[69]) << 48); + t[10] = ((int64_t) (b[70]) << 0) + | ((int64_t) (b[71]) << 8) + | ((int64_t) (b[72]) << 16) + | ((int64_t) (b[73]) << 24) + | ((int64_t) (b[74]) << 32) + | ((int64_t) (b[75]) << 40) + | ((int64_t) (b[76]) << 48); + t[11] = ((int64_t) (b[77]) << 0) + | ((int64_t) (b[78]) << 8) + | ((int64_t) (b[79]) << 16) + | ((int64_t) (b[80]) << 24) + | ((int64_t) (b[81]) << 32) + | ((int64_t) (b[82]) << 40) + | ((int64_t) (b[83]) << 48); + t[12] = ((int64_t) (b[84]) << 0) + | ((int64_t) (b[85]) << 8) + | ((int64_t) (b[86]) << 16) + | ((int64_t) (b[87]) << 24) + | ((int64_t) (b[88]) << 32) + | ((int64_t) (b[89]) << 40) + | ((int64_t) (b[90]) << 48); + t[13] = ((int64_t) (b[91]) << 0) + | ((int64_t) (b[92]) << 8) + | ((int64_t) (b[93]) << 16) + | ((int64_t) (b[94]) << 24) + | ((int64_t) (b[95]) << 32) + | ((int64_t) (b[96]) << 40) + | ((int64_t) (b[97]) << 48); + t[14] = ((int64_t) (b[98]) << 0) + | ((int64_t) (b[99]) << 8) + | ((int64_t) (b[100]) << 16) + | ((int64_t) (b[101]) << 24) + | ((int64_t) (b[102]) << 32) + | ((int64_t) (b[103]) << 40) + | ((int64_t) (b[104]) << 48); + t[15] = ((int64_t) (b[105]) << 0) + | ((int64_t) (b[106]) << 8) + | ((int64_t) (b[107]) << 16) + | ((int64_t) (b[108]) << 24) + | ((int64_t) (b[109]) << 32) + | ((int64_t) (b[110]) << 40) + | ((int64_t) (b[111]) << 48); + t[16] = ((int64_t) (b[112]) << 0) + | ((int64_t) (b[113]) << 8); + + /* Mod curve order */ + /* 2^446 - 0x8335dc163bb124b65129c96fde933d8d723a70aadc873d6d54a7bb0d */ + /* Mod top half of extra words */ + t[ 4] += (int128_t)0x21cf5b5529eec34L * t[12]; + t[ 5] += (int128_t)0x0f635c8e9c2ab70L * t[12]; + t[ 6] += (int128_t)0x2d944a725bf7a4cL * t[12]; + t[ 7] += (int128_t)0x20cd77058eec490L * t[12]; + t[ 5] += (int128_t)0x21cf5b5529eec34L * t[13]; + t[ 6] += (int128_t)0x0f635c8e9c2ab70L * t[13]; + t[ 7] += (int128_t)0x2d944a725bf7a4cL * t[13]; + t[ 8] += (int128_t)0x20cd77058eec490L * t[13]; + t[ 6] += (int128_t)0x21cf5b5529eec34L * t[14]; + t[ 7] += (int128_t)0x0f635c8e9c2ab70L * t[14]; + t[ 8] += (int128_t)0x2d944a725bf7a4cL * t[14]; + t[ 9] += (int128_t)0x20cd77058eec490L * t[14]; + t[ 7] += (int128_t)0x21cf5b5529eec34L * t[15]; + t[ 8] += (int128_t)0x0f635c8e9c2ab70L * t[15]; + t[ 9] += (int128_t)0x2d944a725bf7a4cL * t[15]; + t[10] += (int128_t)0x20cd77058eec490L * t[15]; + t[ 8] += (int128_t)0x21cf5b5529eec34L * t[16]; + t[ 9] += (int128_t)0x0f635c8e9c2ab70L * t[16]; + t[10] += (int128_t)0x2d944a725bf7a4cL * t[16]; + t[11] += (int128_t)0x20cd77058eec490L * t[16]; + t[12] = 0; + /* Propagate carries */ + c = t[ 4] >> 56; t[ 5] += c; t[ 4] = t[ 4] & 0xffffffffffffff; + c = t[ 5] >> 56; t[ 6] += c; t[ 5] = t[ 5] & 0xffffffffffffff; + c = t[ 6] >> 56; t[ 7] += c; t[ 6] = t[ 6] & 0xffffffffffffff; + c = t[ 7] >> 56; t[ 8] += c; t[ 7] = t[ 7] & 0xffffffffffffff; + c = t[ 8] >> 56; t[ 9] += c; t[ 8] = t[ 8] & 0xffffffffffffff; + c = t[ 9] >> 56; t[10] += c; t[ 9] = t[ 9] & 0xffffffffffffff; + c = t[10] >> 56; t[11] += c; t[10] = t[10] & 0xffffffffffffff; + c = t[11] >> 56; t[12] += c; t[11] = t[11] & 0xffffffffffffff; + /* Mod bottom half of extra words */ + t[ 0] += (int128_t)0x21cf5b5529eec34L * t[ 8]; + t[ 1] += (int128_t)0x0f635c8e9c2ab70L * t[ 8]; + t[ 2] += (int128_t)0x2d944a725bf7a4cL * t[ 8]; + t[ 3] += (int128_t)0x20cd77058eec490L * t[ 8]; + t[ 1] += (int128_t)0x21cf5b5529eec34L * t[ 9]; + t[ 2] += (int128_t)0x0f635c8e9c2ab70L * t[ 9]; + t[ 3] += (int128_t)0x2d944a725bf7a4cL * t[ 9]; + t[ 4] += (int128_t)0x20cd77058eec490L * t[ 9]; + t[ 2] += (int128_t)0x21cf5b5529eec34L * t[10]; + t[ 3] += (int128_t)0x0f635c8e9c2ab70L * t[10]; + t[ 4] += (int128_t)0x2d944a725bf7a4cL * t[10]; + t[ 5] += (int128_t)0x20cd77058eec490L * t[10]; + t[ 3] += (int128_t)0x21cf5b5529eec34L * t[11]; + t[ 4] += (int128_t)0x0f635c8e9c2ab70L * t[11]; + t[ 5] += (int128_t)0x2d944a725bf7a4cL * t[11]; + t[ 6] += (int128_t)0x20cd77058eec490L * t[11]; + t[ 4] += (int128_t)0x21cf5b5529eec34L * t[12]; + t[ 5] += (int128_t)0x0f635c8e9c2ab70L * t[12]; + t[ 6] += (int128_t)0x2d944a725bf7a4cL * t[12]; + t[ 7] += (int128_t)0x20cd77058eec490L * t[12]; + t[ 8] = 0; + /* Propagate carries */ + c = t[ 0] >> 56; t[ 1] += c; t[ 0] = t[ 0] & 0xffffffffffffff; + c = t[ 1] >> 56; t[ 2] += c; t[ 1] = t[ 1] & 0xffffffffffffff; + c = t[ 2] >> 56; t[ 3] += c; t[ 2] = t[ 2] & 0xffffffffffffff; + c = t[ 3] >> 56; t[ 4] += c; t[ 3] = t[ 3] & 0xffffffffffffff; + c = t[ 4] >> 56; t[ 5] += c; t[ 4] = t[ 4] & 0xffffffffffffff; + c = t[ 5] >> 56; t[ 6] += c; t[ 5] = t[ 5] & 0xffffffffffffff; + c = t[ 6] >> 56; t[ 7] += c; t[ 6] = t[ 6] & 0xffffffffffffff; + c = t[ 7] >> 56; t[ 8] += c; t[ 7] = t[ 7] & 0xffffffffffffff; + t[ 0] += (int128_t)0x21cf5b5529eec34L * t[ 8]; + t[ 1] += (int128_t)0x0f635c8e9c2ab70L * t[ 8]; + t[ 2] += (int128_t)0x2d944a725bf7a4cL * t[ 8]; + t[ 3] += (int128_t)0x20cd77058eec490L * t[ 8]; + /* Propagate carries */ + c = t[ 0] >> 56; t[ 1] += c; d[ 0] = (int64_t)(t[ 0] & 0xffffffffffffff); + c = t[ 1] >> 56; t[ 2] += c; d[ 1] = (int64_t)(t[ 1] & 0xffffffffffffff); + c = t[ 2] >> 56; t[ 3] += c; d[ 2] = (int64_t)(t[ 2] & 0xffffffffffffff); + c = t[ 3] >> 56; t[ 4] += c; d[ 3] = (int64_t)(t[ 3] & 0xffffffffffffff); + c = t[ 4] >> 56; t[ 5] += c; d[ 4] = (int64_t)(t[ 4] & 0xffffffffffffff); + c = t[ 5] >> 56; t[ 6] += c; d[ 5] = (int64_t)(t[ 5] & 0xffffffffffffff); + c = t[ 6] >> 56; t[ 7] += c; d[ 6] = (int64_t)(t[ 6] & 0xffffffffffffff); + d[ 7] = t[7]; + /* Mod bits over 56 in last word */ + o = d[7] >> 54; d[ 7] &= 0x3fffffffffffff; + d[ 0] += 0x873d6d54a7bb0dL * o; + d[ 1] += 0x3d8d723a70aadcL * o; + d[ 2] += 0xb65129c96fde93L * o; + d[ 3] += 0x8335dc163bb124L * o; + /* Propagate carries */ + o = d[ 0] >> 56; d[ 1] += o; d[ 0] = d[ 0] & 0xffffffffffffff; + o = d[ 1] >> 56; d[ 2] += o; d[ 1] = d[ 1] & 0xffffffffffffff; + o = d[ 2] >> 56; d[ 3] += o; d[ 2] = d[ 2] & 0xffffffffffffff; + o = d[ 3] >> 56; d[ 4] += o; d[ 3] = d[ 3] & 0xffffffffffffff; + o = d[ 4] >> 56; d[ 5] += o; d[ 4] = d[ 4] & 0xffffffffffffff; + o = d[ 5] >> 56; d[ 6] += o; d[ 5] = d[ 5] & 0xffffffffffffff; + o = d[ 6] >> 56; d[ 7] += o; d[ 6] = d[ 6] & 0xffffffffffffff; + + /* Convert to bytes */ + b[ 0] = (d[0 ] >> 0); + b[ 1] = (d[0 ] >> 8); + b[ 2] = (d[0 ] >> 16); + b[ 3] = (d[0 ] >> 24); + b[ 4] = (d[0 ] >> 32); + b[ 5] = (d[0 ] >> 40); + b[ 6] = (d[0 ] >> 48); + b[ 7] = (d[1 ] >> 0); + b[ 8] = (d[1 ] >> 8); + b[ 9] = (d[1 ] >> 16); + b[10] = (d[1 ] >> 24); + b[11] = (d[1 ] >> 32); + b[12] = (d[1 ] >> 40); + b[13] = (d[1 ] >> 48); + b[14] = (d[2 ] >> 0); + b[15] = (d[2 ] >> 8); + b[16] = (d[2 ] >> 16); + b[17] = (d[2 ] >> 24); + b[18] = (d[2 ] >> 32); + b[19] = (d[2 ] >> 40); + b[20] = (d[2 ] >> 48); + b[21] = (d[3 ] >> 0); + b[22] = (d[3 ] >> 8); + b[23] = (d[3 ] >> 16); + b[24] = (d[3 ] >> 24); + b[25] = (d[3 ] >> 32); + b[26] = (d[3 ] >> 40); + b[27] = (d[3 ] >> 48); + b[28] = (d[4 ] >> 0); + b[29] = (d[4 ] >> 8); + b[30] = (d[4 ] >> 16); + b[31] = (d[4 ] >> 24); + b[32] = (d[4 ] >> 32); + b[33] = (d[4 ] >> 40); + b[34] = (d[4 ] >> 48); + b[35] = (d[5 ] >> 0); + b[36] = (d[5 ] >> 8); + b[37] = (d[5 ] >> 16); + b[38] = (d[5 ] >> 24); + b[39] = (d[5 ] >> 32); + b[40] = (d[5 ] >> 40); + b[41] = (d[5 ] >> 48); + b[42] = (d[6 ] >> 0); + b[43] = (d[6 ] >> 8); + b[44] = (d[6 ] >> 16); + b[45] = (d[6 ] >> 24); + b[46] = (d[6 ] >> 32); + b[47] = (d[6 ] >> 40); + b[48] = (d[6 ] >> 48); + b[49] = (d[7 ] >> 0); + b[50] = (d[7 ] >> 8); + b[51] = (d[7 ] >> 16); + b[52] = (d[7 ] >> 24); + b[53] = (d[7 ] >> 32); + b[54] = (d[7 ] >> 40); + b[55] = (d[7 ] >> 48); + b[56] = 0; +} + +/* Multiply a by b and add d. r = (a * b + d) mod order + * + * r [in] Scalar to hold result. + * a [in] Scalar to multiply. + * b [in] Scalar to multiply. + * d [in] Scalar to add to multiplicative result. + */ +void sc448_muladd(uint8_t* r, const uint8_t* a, const uint8_t* b, + const uint8_t* d) +{ + uint64_t ad[8], bd[8], dd[8], rd[8]; + uint128_t t[16]; + uint128_t c; + uint64_t o; + + /* Load from bytes */ + ad[ 0] = ((int64_t) (a[ 0]) << 0) + | ((int64_t) (a[ 1]) << 8) + | ((int64_t) (a[ 2]) << 16) + | ((int64_t) (a[ 3]) << 24) + | ((int64_t) (a[ 4]) << 32) + | ((int64_t) (a[ 5]) << 40) + | ((int64_t) (a[ 6]) << 48); + ad[ 1] = ((int64_t) (a[ 7]) << 0) + | ((int64_t) (a[ 8]) << 8) + | ((int64_t) (a[ 9]) << 16) + | ((int64_t) (a[10]) << 24) + | ((int64_t) (a[11]) << 32) + | ((int64_t) (a[12]) << 40) + | ((int64_t) (a[13]) << 48); + ad[ 2] = ((int64_t) (a[14]) << 0) + | ((int64_t) (a[15]) << 8) + | ((int64_t) (a[16]) << 16) + | ((int64_t) (a[17]) << 24) + | ((int64_t) (a[18]) << 32) + | ((int64_t) (a[19]) << 40) + | ((int64_t) (a[20]) << 48); + ad[ 3] = ((int64_t) (a[21]) << 0) + | ((int64_t) (a[22]) << 8) + | ((int64_t) (a[23]) << 16) + | ((int64_t) (a[24]) << 24) + | ((int64_t) (a[25]) << 32) + | ((int64_t) (a[26]) << 40) + | ((int64_t) (a[27]) << 48); + ad[ 4] = ((int64_t) (a[28]) << 0) + | ((int64_t) (a[29]) << 8) + | ((int64_t) (a[30]) << 16) + | ((int64_t) (a[31]) << 24) + | ((int64_t) (a[32]) << 32) + | ((int64_t) (a[33]) << 40) + | ((int64_t) (a[34]) << 48); + ad[ 5] = ((int64_t) (a[35]) << 0) + | ((int64_t) (a[36]) << 8) + | ((int64_t) (a[37]) << 16) + | ((int64_t) (a[38]) << 24) + | ((int64_t) (a[39]) << 32) + | ((int64_t) (a[40]) << 40) + | ((int64_t) (a[41]) << 48); + ad[ 6] = ((int64_t) (a[42]) << 0) + | ((int64_t) (a[43]) << 8) + | ((int64_t) (a[44]) << 16) + | ((int64_t) (a[45]) << 24) + | ((int64_t) (a[46]) << 32) + | ((int64_t) (a[47]) << 40) + | ((int64_t) (a[48]) << 48); + ad[ 7] = ((int64_t) (a[49]) << 0) + | ((int64_t) (a[50]) << 8) + | ((int64_t) (a[51]) << 16) + | ((int64_t) (a[52]) << 24) + | ((int64_t) (a[53]) << 32) + | ((int64_t) (a[54]) << 40) + | ((int64_t) (a[55]) << 48); + /* Load from bytes */ + bd[ 0] = ((int64_t) (b[ 0]) << 0) + | ((int64_t) (b[ 1]) << 8) + | ((int64_t) (b[ 2]) << 16) + | ((int64_t) (b[ 3]) << 24) + | ((int64_t) (b[ 4]) << 32) + | ((int64_t) (b[ 5]) << 40) + | ((int64_t) (b[ 6]) << 48); + bd[ 1] = ((int64_t) (b[ 7]) << 0) + | ((int64_t) (b[ 8]) << 8) + | ((int64_t) (b[ 9]) << 16) + | ((int64_t) (b[10]) << 24) + | ((int64_t) (b[11]) << 32) + | ((int64_t) (b[12]) << 40) + | ((int64_t) (b[13]) << 48); + bd[ 2] = ((int64_t) (b[14]) << 0) + | ((int64_t) (b[15]) << 8) + | ((int64_t) (b[16]) << 16) + | ((int64_t) (b[17]) << 24) + | ((int64_t) (b[18]) << 32) + | ((int64_t) (b[19]) << 40) + | ((int64_t) (b[20]) << 48); + bd[ 3] = ((int64_t) (b[21]) << 0) + | ((int64_t) (b[22]) << 8) + | ((int64_t) (b[23]) << 16) + | ((int64_t) (b[24]) << 24) + | ((int64_t) (b[25]) << 32) + | ((int64_t) (b[26]) << 40) + | ((int64_t) (b[27]) << 48); + bd[ 4] = ((int64_t) (b[28]) << 0) + | ((int64_t) (b[29]) << 8) + | ((int64_t) (b[30]) << 16) + | ((int64_t) (b[31]) << 24) + | ((int64_t) (b[32]) << 32) + | ((int64_t) (b[33]) << 40) + | ((int64_t) (b[34]) << 48); + bd[ 5] = ((int64_t) (b[35]) << 0) + | ((int64_t) (b[36]) << 8) + | ((int64_t) (b[37]) << 16) + | ((int64_t) (b[38]) << 24) + | ((int64_t) (b[39]) << 32) + | ((int64_t) (b[40]) << 40) + | ((int64_t) (b[41]) << 48); + bd[ 6] = ((int64_t) (b[42]) << 0) + | ((int64_t) (b[43]) << 8) + | ((int64_t) (b[44]) << 16) + | ((int64_t) (b[45]) << 24) + | ((int64_t) (b[46]) << 32) + | ((int64_t) (b[47]) << 40) + | ((int64_t) (b[48]) << 48); + bd[ 7] = ((int64_t) (b[49]) << 0) + | ((int64_t) (b[50]) << 8) + | ((int64_t) (b[51]) << 16) + | ((int64_t) (b[52]) << 24) + | ((int64_t) (b[53]) << 32) + | ((int64_t) (b[54]) << 40) + | ((int64_t) (b[55]) << 48); + /* Load from bytes */ + dd[ 0] = ((int64_t) (d[ 0]) << 0) + | ((int64_t) (d[ 1]) << 8) + | ((int64_t) (d[ 2]) << 16) + | ((int64_t) (d[ 3]) << 24) + | ((int64_t) (d[ 4]) << 32) + | ((int64_t) (d[ 5]) << 40) + | ((int64_t) (d[ 6]) << 48); + dd[ 1] = ((int64_t) (d[ 7]) << 0) + | ((int64_t) (d[ 8]) << 8) + | ((int64_t) (d[ 9]) << 16) + | ((int64_t) (d[10]) << 24) + | ((int64_t) (d[11]) << 32) + | ((int64_t) (d[12]) << 40) + | ((int64_t) (d[13]) << 48); + dd[ 2] = ((int64_t) (d[14]) << 0) + | ((int64_t) (d[15]) << 8) + | ((int64_t) (d[16]) << 16) + | ((int64_t) (d[17]) << 24) + | ((int64_t) (d[18]) << 32) + | ((int64_t) (d[19]) << 40) + | ((int64_t) (d[20]) << 48); + dd[ 3] = ((int64_t) (d[21]) << 0) + | ((int64_t) (d[22]) << 8) + | ((int64_t) (d[23]) << 16) + | ((int64_t) (d[24]) << 24) + | ((int64_t) (d[25]) << 32) + | ((int64_t) (d[26]) << 40) + | ((int64_t) (d[27]) << 48); + dd[ 4] = ((int64_t) (d[28]) << 0) + | ((int64_t) (d[29]) << 8) + | ((int64_t) (d[30]) << 16) + | ((int64_t) (d[31]) << 24) + | ((int64_t) (d[32]) << 32) + | ((int64_t) (d[33]) << 40) + | ((int64_t) (d[34]) << 48); + dd[ 5] = ((int64_t) (d[35]) << 0) + | ((int64_t) (d[36]) << 8) + | ((int64_t) (d[37]) << 16) + | ((int64_t) (d[38]) << 24) + | ((int64_t) (d[39]) << 32) + | ((int64_t) (d[40]) << 40) + | ((int64_t) (d[41]) << 48); + dd[ 6] = ((int64_t) (d[42]) << 0) + | ((int64_t) (d[43]) << 8) + | ((int64_t) (d[44]) << 16) + | ((int64_t) (d[45]) << 24) + | ((int64_t) (d[46]) << 32) + | ((int64_t) (d[47]) << 40) + | ((int64_t) (d[48]) << 48); + dd[ 7] = ((int64_t) (d[49]) << 0) + | ((int64_t) (d[50]) << 8) + | ((int64_t) (d[51]) << 16) + | ((int64_t) (d[52]) << 24) + | ((int64_t) (d[53]) << 32) + | ((int64_t) (d[54]) << 40) + | ((int64_t) (d[55]) << 48); + + /* a * b + d */ + t[ 0] = dd[ 0] + (int128_t)ad[ 0] * bd[ 0]; + t[ 1] = dd[ 1] + (int128_t)ad[ 0] * bd[ 1] + + (int128_t)ad[ 1] * bd[ 0]; + t[ 2] = dd[ 2] + (int128_t)ad[ 0] * bd[ 2] + + (int128_t)ad[ 1] * bd[ 1] + + (int128_t)ad[ 2] * bd[ 0]; + t[ 3] = dd[ 3] + (int128_t)ad[ 0] * bd[ 3] + + (int128_t)ad[ 1] * bd[ 2] + + (int128_t)ad[ 2] * bd[ 1] + + (int128_t)ad[ 3] * bd[ 0]; + t[ 4] = dd[ 4] + (int128_t)ad[ 0] * bd[ 4] + + (int128_t)ad[ 1] * bd[ 3] + + (int128_t)ad[ 2] * bd[ 2] + + (int128_t)ad[ 3] * bd[ 1] + + (int128_t)ad[ 4] * bd[ 0]; + t[ 5] = dd[ 5] + (int128_t)ad[ 0] * bd[ 5] + + (int128_t)ad[ 1] * bd[ 4] + + (int128_t)ad[ 2] * bd[ 3] + + (int128_t)ad[ 3] * bd[ 2] + + (int128_t)ad[ 4] * bd[ 1] + + (int128_t)ad[ 5] * bd[ 0]; + t[ 6] = dd[ 6] + (int128_t)ad[ 0] * bd[ 6] + + (int128_t)ad[ 1] * bd[ 5] + + (int128_t)ad[ 2] * bd[ 4] + + (int128_t)ad[ 3] * bd[ 3] + + (int128_t)ad[ 4] * bd[ 2] + + (int128_t)ad[ 5] * bd[ 1] + + (int128_t)ad[ 6] * bd[ 0]; + t[ 7] = dd[ 7] + (int128_t)ad[ 0] * bd[ 7] + + (int128_t)ad[ 1] * bd[ 6] + + (int128_t)ad[ 2] * bd[ 5] + + (int128_t)ad[ 3] * bd[ 4] + + (int128_t)ad[ 4] * bd[ 3] + + (int128_t)ad[ 5] * bd[ 2] + + (int128_t)ad[ 6] * bd[ 1] + + (int128_t)ad[ 7] * bd[ 0]; + t[ 8] = (int128_t)ad[ 1] * bd[ 7] + + (int128_t)ad[ 2] * bd[ 6] + + (int128_t)ad[ 3] * bd[ 5] + + (int128_t)ad[ 4] * bd[ 4] + + (int128_t)ad[ 5] * bd[ 3] + + (int128_t)ad[ 6] * bd[ 2] + + (int128_t)ad[ 7] * bd[ 1]; + t[ 9] = (int128_t)ad[ 2] * bd[ 7] + + (int128_t)ad[ 3] * bd[ 6] + + (int128_t)ad[ 4] * bd[ 5] + + (int128_t)ad[ 5] * bd[ 4] + + (int128_t)ad[ 6] * bd[ 3] + + (int128_t)ad[ 7] * bd[ 2]; + t[10] = (int128_t)ad[ 3] * bd[ 7] + + (int128_t)ad[ 4] * bd[ 6] + + (int128_t)ad[ 5] * bd[ 5] + + (int128_t)ad[ 6] * bd[ 4] + + (int128_t)ad[ 7] * bd[ 3]; + t[11] = (int128_t)ad[ 4] * bd[ 7] + + (int128_t)ad[ 5] * bd[ 6] + + (int128_t)ad[ 6] * bd[ 5] + + (int128_t)ad[ 7] * bd[ 4]; + t[12] = (int128_t)ad[ 5] * bd[ 7] + + (int128_t)ad[ 6] * bd[ 6] + + (int128_t)ad[ 7] * bd[ 5]; + t[13] = (int128_t)ad[ 6] * bd[ 7] + + (int128_t)ad[ 7] * bd[ 6]; + t[14] = (int128_t)ad[ 7] * bd[ 7]; + t[15] = 0; + + /* Mod curve order */ + /* 2^446 - 0x8335dc163bb124b65129c96fde933d8d723a70aadc873d6d54a7bb0d */ + /* Propagate carries */ + c = t[ 0] >> 56; t[ 1] += c; t[ 0] = t[ 0] & 0xffffffffffffff; + c = t[ 1] >> 56; t[ 2] += c; t[ 1] = t[ 1] & 0xffffffffffffff; + c = t[ 2] >> 56; t[ 3] += c; t[ 2] = t[ 2] & 0xffffffffffffff; + c = t[ 3] >> 56; t[ 4] += c; t[ 3] = t[ 3] & 0xffffffffffffff; + c = t[ 4] >> 56; t[ 5] += c; t[ 4] = t[ 4] & 0xffffffffffffff; + c = t[ 5] >> 56; t[ 6] += c; t[ 5] = t[ 5] & 0xffffffffffffff; + c = t[ 6] >> 56; t[ 7] += c; t[ 6] = t[ 6] & 0xffffffffffffff; + c = t[ 7] >> 56; t[ 8] += c; t[ 7] = t[ 7] & 0xffffffffffffff; + c = t[ 8] >> 56; t[ 9] += c; t[ 8] = t[ 8] & 0xffffffffffffff; + c = t[ 9] >> 56; t[10] += c; t[ 9] = t[ 9] & 0xffffffffffffff; + c = t[10] >> 56; t[11] += c; t[10] = t[10] & 0xffffffffffffff; + c = t[11] >> 56; t[12] += c; t[11] = t[11] & 0xffffffffffffff; + c = t[12] >> 56; t[13] += c; t[12] = t[12] & 0xffffffffffffff; + c = t[13] >> 56; t[14] += c; t[13] = t[13] & 0xffffffffffffff; + c = t[14] >> 56; t[15] += c; t[14] = t[14] & 0xffffffffffffff; + /* Mod top half of extra words */ + t[ 4] += (int128_t)0x21cf5b5529eec34L * t[12]; + t[ 5] += (int128_t)0x0f635c8e9c2ab70L * t[12]; + t[ 6] += (int128_t)0x2d944a725bf7a4cL * t[12]; + t[ 7] += (int128_t)0x20cd77058eec490L * t[12]; + t[ 5] += (int128_t)0x21cf5b5529eec34L * t[13]; + t[ 6] += (int128_t)0x0f635c8e9c2ab70L * t[13]; + t[ 7] += (int128_t)0x2d944a725bf7a4cL * t[13]; + t[ 8] += (int128_t)0x20cd77058eec490L * t[13]; + t[ 6] += (int128_t)0x21cf5b5529eec34L * t[14]; + t[ 7] += (int128_t)0x0f635c8e9c2ab70L * t[14]; + t[ 8] += (int128_t)0x2d944a725bf7a4cL * t[14]; + t[ 9] += (int128_t)0x20cd77058eec490L * t[14]; + t[ 7] += (int128_t)0x21cf5b5529eec34L * t[15]; + t[ 8] += (int128_t)0x0f635c8e9c2ab70L * t[15]; + t[ 9] += (int128_t)0x2d944a725bf7a4cL * t[15]; + t[10] += (int128_t)0x20cd77058eec490L * t[15]; + /* Propagate carries */ + c = t[ 4] >> 56; t[ 5] += c; t[ 4] = t[ 4] & 0xffffffffffffff; + c = t[ 5] >> 56; t[ 6] += c; t[ 5] = t[ 5] & 0xffffffffffffff; + c = t[ 6] >> 56; t[ 7] += c; t[ 6] = t[ 6] & 0xffffffffffffff; + c = t[ 7] >> 56; t[ 8] += c; t[ 7] = t[ 7] & 0xffffffffffffff; + c = t[ 8] >> 56; t[ 9] += c; t[ 8] = t[ 8] & 0xffffffffffffff; + c = t[ 9] >> 56; t[10] += c; t[ 9] = t[ 9] & 0xffffffffffffff; + c = t[10] >> 56; t[11] += c; t[10] = t[10] & 0xffffffffffffff; + /* Mod bottom half of extra words */ + t[ 0] += (int128_t)0x21cf5b5529eec34L * t[ 8]; + t[ 1] += (int128_t)0x0f635c8e9c2ab70L * t[ 8]; + t[ 2] += (int128_t)0x2d944a725bf7a4cL * t[ 8]; + t[ 3] += (int128_t)0x20cd77058eec490L * t[ 8]; + t[ 1] += (int128_t)0x21cf5b5529eec34L * t[ 9]; + t[ 2] += (int128_t)0x0f635c8e9c2ab70L * t[ 9]; + t[ 3] += (int128_t)0x2d944a725bf7a4cL * t[ 9]; + t[ 4] += (int128_t)0x20cd77058eec490L * t[ 9]; + t[ 2] += (int128_t)0x21cf5b5529eec34L * t[10]; + t[ 3] += (int128_t)0x0f635c8e9c2ab70L * t[10]; + t[ 4] += (int128_t)0x2d944a725bf7a4cL * t[10]; + t[ 5] += (int128_t)0x20cd77058eec490L * t[10]; + t[ 3] += (int128_t)0x21cf5b5529eec34L * t[11]; + t[ 4] += (int128_t)0x0f635c8e9c2ab70L * t[11]; + t[ 5] += (int128_t)0x2d944a725bf7a4cL * t[11]; + t[ 6] += (int128_t)0x20cd77058eec490L * t[11]; + /* Propagate carries */ + c = t[ 0] >> 56; t[ 1] += c; rd[ 0] = (int64_t)(t[ 0] & 0xffffffffffffff); + c = t[ 1] >> 56; t[ 2] += c; rd[ 1] = (int64_t)(t[ 1] & 0xffffffffffffff); + c = t[ 2] >> 56; t[ 3] += c; rd[ 2] = (int64_t)(t[ 2] & 0xffffffffffffff); + c = t[ 3] >> 56; t[ 4] += c; rd[ 3] = (int64_t)(t[ 3] & 0xffffffffffffff); + c = t[ 4] >> 56; t[ 5] += c; rd[ 4] = (int64_t)(t[ 4] & 0xffffffffffffff); + c = t[ 5] >> 56; t[ 6] += c; rd[ 5] = (int64_t)(t[ 5] & 0xffffffffffffff); + c = t[ 6] >> 56; t[ 7] += c; rd[ 6] = (int64_t)(t[ 6] & 0xffffffffffffff); + rd[ 7] = t[7]; + /* Mod bits over 56 in last word */ + o = rd[7] >> 54; rd[ 7] &= 0x3fffffffffffff; + rd[ 0] += 0x873d6d54a7bb0dL * o; + rd[ 1] += 0x3d8d723a70aadcL * o; + rd[ 2] += 0xb65129c96fde93L * o; + rd[ 3] += 0x8335dc163bb124L * o; + /* Propagate carries */ + o = rd[ 0] >> 56; rd[ 1] += o; rd[ 0] = rd[ 0] & 0xffffffffffffff; + o = rd[ 1] >> 56; rd[ 2] += o; rd[ 1] = rd[ 1] & 0xffffffffffffff; + o = rd[ 2] >> 56; rd[ 3] += o; rd[ 2] = rd[ 2] & 0xffffffffffffff; + o = rd[ 3] >> 56; rd[ 4] += o; rd[ 3] = rd[ 3] & 0xffffffffffffff; + o = rd[ 4] >> 56; rd[ 5] += o; rd[ 4] = rd[ 4] & 0xffffffffffffff; + o = rd[ 5] >> 56; rd[ 6] += o; rd[ 5] = rd[ 5] & 0xffffffffffffff; + o = rd[ 6] >> 56; rd[ 7] += o; rd[ 6] = rd[ 6] & 0xffffffffffffff; + + /* Convert to bytes */ + r[ 0] = (rd[0 ] >> 0); + r[ 1] = (rd[0 ] >> 8); + r[ 2] = (rd[0 ] >> 16); + r[ 3] = (rd[0 ] >> 24); + r[ 4] = (rd[0 ] >> 32); + r[ 5] = (rd[0 ] >> 40); + r[ 6] = (rd[0 ] >> 48); + r[ 7] = (rd[1 ] >> 0); + r[ 8] = (rd[1 ] >> 8); + r[ 9] = (rd[1 ] >> 16); + r[10] = (rd[1 ] >> 24); + r[11] = (rd[1 ] >> 32); + r[12] = (rd[1 ] >> 40); + r[13] = (rd[1 ] >> 48); + r[14] = (rd[2 ] >> 0); + r[15] = (rd[2 ] >> 8); + r[16] = (rd[2 ] >> 16); + r[17] = (rd[2 ] >> 24); + r[18] = (rd[2 ] >> 32); + r[19] = (rd[2 ] >> 40); + r[20] = (rd[2 ] >> 48); + r[21] = (rd[3 ] >> 0); + r[22] = (rd[3 ] >> 8); + r[23] = (rd[3 ] >> 16); + r[24] = (rd[3 ] >> 24); + r[25] = (rd[3 ] >> 32); + r[26] = (rd[3 ] >> 40); + r[27] = (rd[3 ] >> 48); + r[28] = (rd[4 ] >> 0); + r[29] = (rd[4 ] >> 8); + r[30] = (rd[4 ] >> 16); + r[31] = (rd[4 ] >> 24); + r[32] = (rd[4 ] >> 32); + r[33] = (rd[4 ] >> 40); + r[34] = (rd[4 ] >> 48); + r[35] = (rd[5 ] >> 0); + r[36] = (rd[5 ] >> 8); + r[37] = (rd[5 ] >> 16); + r[38] = (rd[5 ] >> 24); + r[39] = (rd[5 ] >> 32); + r[40] = (rd[5 ] >> 40); + r[41] = (rd[5 ] >> 48); + r[42] = (rd[6 ] >> 0); + r[43] = (rd[6 ] >> 8); + r[44] = (rd[6 ] >> 16); + r[45] = (rd[6 ] >> 24); + r[46] = (rd[6 ] >> 32); + r[47] = (rd[6 ] >> 40); + r[48] = (rd[6 ] >> 48); + r[49] = (rd[7 ] >> 0); + r[50] = (rd[7 ] >> 8); + r[51] = (rd[7 ] >> 16); + r[52] = (rd[7 ] >> 24); + r[53] = (rd[7 ] >> 32); + r[54] = (rd[7 ] >> 40); + r[55] = (rd[7 ] >> 48); + r[56] = 0; +} + +/* Precomputed multiples of the base point. */ +static const ge448_precomp base[58][8] = { +{ + { + { 0x26a82bc70cc05eL, 0x80e18b00938e26L, 0xf72ab66511433bL, + 0xa3d3a46412ae1aL, 0x0f1767ea6de324L, 0x36da9e14657047L, + 0xed221d15a622bfL, 0x4f1970c66bed0dL }, + { 0x08795bf230fa14L, 0x132c4ed7c8ad98L, 0x1ce67c39c4fdbdL, + 0x05a0c2d73ad3ffL, 0xa3984087789c1eL, 0xc7624bea73736cL, + 0x248876203756c9L, 0x693f46716eb6bcL } + }, + { + { 0x55555555555555L, 0x55555555555555L, 0x55555555555555L, + 0x55555555555555L, 0xaaaaaaaaaaaaa9L, 0xaaaaaaaaaaaaaaL, + 0xaaaaaaaaaaaaaaL, 0xaaaaaaaaaaaaaaL }, + { 0xeafbcdea9386edL, 0xb2bed1cda06bdaL, 0x833a2a3098bbbcL, + 0x8ad8c4b80d6565L, 0x884dd7b7e36d72L, 0xc2b0036ed7a035L, + 0x8db359d6205086L, 0xae05e9634ad704L } + }, + { + { 0x28173286ff2f8fL, 0xb769465da85757L, 0xf7f6271fd6e862L, + 0x4a3fcfe8daa9cbL, 0xda82c7e2ba077aL, 0x943332241b8b8cL, + 0x6455bd64316cb6L, 0x0865886b9108afL }, + { 0x22ac13588ed6fcL, 0x9a68fed02dafb8L, 0x1bdb6767f0bffaL, + 0xec4e1d58bb3a33L, 0x56c3b9fce43c82L, 0xa6449a4a8d9523L, + 0xf706cbda7ad43aL, 0xe005a8dbd5125cL } + }, + { + { 0xce42ac48ba7f30L, 0xe1798949e120e2L, 0xf1515dd8ba21aeL, + 0x70c74cc301b7bdL, 0x0891c693fda4beL, 0x29ea255a09cf4eL, + 0x2c1419a17226f9L, 0x49dcbc5c6c0cceL }, + { 0xe236f86de51839L, 0x44285d0d4f5b32L, 0x7ea1ca9472b5d4L, + 0x7b8a5bc1c0d8f9L, 0x57d845c90dc322L, 0x1b979cb7c02f04L, + 0x27164b33a5de02L, 0xd49077e4accde5L } + }, + { + { 0xa99d1092030034L, 0x2d8cefc6f950d0L, 0x7a920c3c96f07bL, + 0x958812808bc0d5L, 0x62ada756d761e8L, 0x0def80cbcf7285L, + 0x0e2ba7601eedb5L, 0x7a9f9335a48dcbL }, + { 0xb4731472f435ebL, 0x5512881f225443L, 0xee59d2b33c5840L, + 0xb698017127d7a4L, 0xb18fced86551f7L, 0x0ade260ca1823aL, + 0xd3b9109ce4fd58L, 0xadfd751a2517edL } + }, + { + { 0x7fd7652abef79cL, 0x6c20a07443a878L, 0x5c1840d12a7109L, + 0x4a06e4a876451cL, 0x3bed0b4ad95f65L, 0x25d2e673fb0260L, + 0x2e00349aebd971L, 0x54523e04498b72L }, + { 0xea5d1da07c7bccL, 0xcce776938ea98cL, 0x80284e861d2b3eL, + 0x48de76b6e1ff1bL, 0x7b121869c58522L, 0xbfd053a2765a1aL, + 0x2d743ec056c667L, 0x3f99b9cd8ab61cL } + }, + { + { 0xdf9567ceb5eaf7L, 0x110a6b478ac7d7L, 0x2d335014706e0bL, + 0x0df9c7b0b5a209L, 0xba4223d568e684L, 0xd78af2d8c3719bL, + 0x77467b9a5291b6L, 0x079748e5c89befL }, + { 0xe20d3fadac377fL, 0x34e866972b5c09L, 0xd8687a3c40bbb7L, + 0x7b3946fd2f84c9L, 0xd00e40ca78f50eL, 0xb87594417e7179L, + 0x9c7373bcb23583L, 0x7ddeda3c90fd69L } + }, + { + { 0x2538a67153bde0L, 0x223aca9406b696L, 0xf9080dc1ad713eL, + 0x6c4cb47d816a64L, 0xbc285685dc8b97L, 0xd97b037c08e2d7L, + 0x5b63fb45d0e66bL, 0xd1f1bc5520e8a3L }, + { 0x4eb873ce69e09bL, 0x1663164bc8ee45L, 0x08f7003ba8d89fL, + 0x4b98ead386ad82L, 0xa4b93b7bd94c7bL, 0x46ba408c6b38b3L, + 0xdae87d1f3574ffL, 0xc7564f4e9bea9bL } + }, +}, +{ + { + { 0x2e4fdb25bfac1cL, 0xf0d79aaf5f3bcaL, 0xe756b0d20fb7ccL, + 0xe3696beb39609aL, 0xa019fc35a5ab58L, 0xa2b24853b281ddL, + 0xe3e2be761ac0a2L, 0xf19c34feb56730L }, + { 0x2d25ce8a30241eL, 0xf5661eab73d7a1L, 0x4611ed0daac9f4L, + 0xd5442344ced72cL, 0xce78f52e92e985L, 0x6fe5dd44da4aadL, + 0xfcaddc61d363ceL, 0x3beb69cc9111bfL } + }, + { + { 0xd2e7660940ebc9L, 0xe032018b17bbe0L, 0xad4939175c0575L, + 0xdd0b14721c7f34L, 0x52c2ba43e147e0L, 0x7dd03c60ee8973L, + 0x5472e8decf2754L, 0x17a1cd1d6482bbL }, + { 0xdd43b848128b3fL, 0xf0cae34ea7dd25L, 0x81ca99fff07df2L, + 0x1c8959792ebbdcL, 0x45c7a6872155e6L, 0x907a50e39ddd08L, + 0xbe398c2bb2d89bL, 0x38063f91b3b536L } + }, + { + { 0x149fafbf843b23L, 0x00ab582ac7f22aL, 0xa3b981bf2f4d4cL, + 0x2ce1a654341a22L, 0x68a40747c03b63L, 0x63206a212f2cf8L, + 0xc9961d35149741L, 0xfb85430bc7099eL }, + { 0x9c9107290a9e59L, 0x734e94a06de367L, 0x5cf3cbedb99214L, + 0xc6bce3245b1fb9L, 0x1a82abedd7be0dL, 0xf74976aede7d1cL, + 0x7025b7c21503bdL, 0xf7894910d096abL } + }, + { + { 0x6bd48bb555a41bL, 0xfbdd0d067de206L, 0x98bc477dd6dfd1L, + 0x1d0693b3e40b8aL, 0x6e15563da32ae4L, 0x0194a20fcebaa2L, + 0xda116150980a93L, 0x8e119200109cecL }, + { 0x8ea0552ffb9726L, 0xeba50a4047e44bL, 0xc050d2460ddf76L, + 0xe009204ac690e0L, 0x47b86399b18edcL, 0x2f5b76ac77f23fL, + 0x4296c240792905L, 0x73f6b4a06f6dc7L } + }, + { + { 0xb6ef9ea3b10cadL, 0x312843df7c8fceL, 0x5bdcd528bedf86L, + 0x2889059f6dd823L, 0x04578e908bfde0L, 0x3245df3123e2e5L, + 0xbf461d57ee9e3aL, 0xddec2d46f94cebL }, + { 0x21b43b9145768fL, 0xe79a8f9dae962aL, 0xff1972bcbb043fL, + 0xe3dcf6d239649bL, 0xed592bdc533b85L, 0x14ff94fdbe22d0L, + 0x6c4eb87f1d8e22L, 0xd8d4c71d18cf6dL } + }, + { + { 0xcda666c8d96345L, 0x9ecaa25836cd21L, 0x6e885bd984606eL, + 0x1dd5fef804f054L, 0x9dfff6b6959ae4L, 0x99b9cf8c9b55ccL, + 0xb4716b062b9b80L, 0x13ec87c554b128L }, + { 0xe696d1f75aacc2L, 0xf78c99387fc5ffL, 0x76c09473809d42L, + 0x99ce62db618fa8L, 0x35e3e022f53341L, 0x62fc1ac0db6c5eL, + 0xa1fb8e600d8b47L, 0x0bc107058f0d1eL } + }, + { + { 0x1f4526916da513L, 0x1f2fc04f5cf341L, 0xae9208664d23e0L, + 0x4e33082da8a113L, 0x2688ec61cfc085L, 0x6f2e8de6e5327fL, + 0x2070db3b4e48a8L, 0xd6626973240adeL }, + { 0xa6b317ffbd997bL, 0x9fa1b5649e26bdL, 0xcbf0d258cba0f3L, + 0x4a7791b17b4745L, 0x25f555b5c9e190L, 0x7cd3940923ec4cL, + 0x16f4c6ae98f1b6L, 0x7962116bcd4e0fL } + }, + { + { 0x8d58fa302491e3L, 0x7cf76c67ab3898L, 0xbc2f657647ebc7L, + 0x5f4bfe0d25f5a3L, 0x503f478d69505dL, 0x4a889fc3fb6645L, + 0x33e1bc1fa86b18L, 0xabb234f5508dd8L }, + { 0x5348e1b9a05b48L, 0x57ac5f164dc858L, 0x21f4d38ec8a2d3L, + 0x5ec6d3ca3a3e9dL, 0xcd4062e560a0b8L, 0x49b74f73433f59L, + 0xefd9d87cab14e3L, 0x858ce7feb964f5L } + }, +}, +{ + { + { 0x7577254eb731b4L, 0x9fff1fb4e2397eL, 0x749b145c821715L, + 0x40619fe2e65e67L, 0x57b82812e618d8L, 0x063186c707b83eL, + 0xcfc80cb31b24a2L, 0xcca6185ac75169L }, + { 0x6539f44b255818L, 0x5895da00368bceL, 0x841a30917c7482L, + 0x85469e1b1a9c9eL, 0x05664c0e4f7d9dL, 0x8a063187b35cc0L, + 0x214763aa0e9b0aL, 0x1bd872c4b26ac2L } + }, + { + { 0x3578f97a93762bL, 0x434f69a72d52bcL, 0xddcca4022cb565L, + 0xa7d1e41ff20544L, 0x823475d8a66588L, 0x9fc97c799d7bafL, + 0x15542f1660e421L, 0xa7d1f60843faf6L }, + { 0xbbfaab54063cccL, 0x3ad9bada49855aL, 0xffd5f1c5bddbfeL, + 0x0e419c2ae87e59L, 0xdce6ed6f89956bL, 0xf047c21ccd8951L, + 0x6ed4a1ba83c991L, 0x85af86e2d28e0aL } + }, + { + { 0x04433c49ed48a8L, 0xeffa8580bc375dL, 0xfb0e1b2fa6e3b5L, + 0x51483a2a1aaddaL, 0x733448df8b2ea8L, 0xaa0513cf639f0cL, + 0x6bc61a3a23bf84L, 0x3e64f68dc2430dL }, + { 0x51bf502c5876b1L, 0x6b833751c0dd2aL, 0xe597be1342914fL, + 0x43d5ab0f8e632cL, 0x2696715d62587bL, 0xe87d20aed34f24L, + 0x25b7e14e18baf7L, 0xf5eb753e22e084L } + }, + { + { 0x51da71724d8295L, 0xd478e4318d1340L, 0xacf94f42cf7f66L, + 0x230d7d13760711L, 0x078a66a5abc626L, 0xd78b0bd6b5f6daL, + 0x23a971396d1d0bL, 0x87623d64bd960fL }, + { 0x0841a9977db53fL, 0x23c1a53f4d03eeL, 0x2f62c2e1f95df1L, + 0xd1e2ec1116f4e7L, 0x896d2fe34811a9L, 0xad65e2bec8096eL, + 0x09d36f9b1744a6L, 0x564bac7ff5ddf7L } + }, + { + { 0x48b41e2c3f77cbL, 0x52276730968938L, 0xff1b899fd9b452L, + 0x67cf3bf2e03908L, 0x3731d90248a6fbL, 0xd800a05256598fL, + 0x347d2f2bdc8530L, 0xc72a3007ad08a1L }, + { 0x5e5be741d65f73L, 0x183d4ae4206eadL, 0xcb50c1cade4013L, + 0x39db43d3102483L, 0x0eb49fa70d6325L, 0xa18f6a2c1f02b9L, + 0x3e6fe30dbf5e66L, 0xac4eeb93a82aa5L } + }, + { + { 0x295affd3613d47L, 0x7b7e68ab56f343L, 0x980629692b173bL, + 0x937061ebad35fbL, 0x25019785c21eeaL, 0xe92721b787a746L, + 0x463c46c3651631L, 0x6da4b5dc6f2d5aL }, + { 0xcb67cc16e6d18cL, 0x1b30d520010588L, 0x1bb6ea6db1d1e8L, + 0x9c6308aad11474L, 0xc3167413d19b1cL, 0xf2e84d7be4fb79L, + 0xeccb873e050f77L, 0xf7c8d80cc2bf86L } + }, + { + { 0x16fe2e17ab20e5L, 0x274deadecf3a92L, 0x9f434870972f67L, + 0x9a65a454605751L, 0x9351f07b8980b2L, 0x412962b0eb08a5L, + 0xb8c9bfd733f440L, 0xac2cd641ca250fL }, + { 0x68cdd0f2ba7d26L, 0xd3d2a4a4e0beeaL, 0x50135c19f4a258L, + 0xb475e53f0d02e4L, 0x432d8c6589283aL, 0x29141bfa0a2b6cL, + 0xd7379ec13704bcL, 0x831562c52459bfL } + }, + { + { 0x676b366eeec506L, 0xdd6cad545da557L, 0x9de39cb77057d2L, + 0x388c5fedf05bf1L, 0x6e55650dfb1f03L, 0xdbceffa52126c9L, + 0xe4d187b3a4a220L, 0xac914f9eb27020L }, + { 0x3f4ab98d2e5f30L, 0x6ae97dadd94451L, 0x64af6950d80981L, + 0x36b4b90f2aa2ceL, 0x6adcd7a18fcf59L, 0x3ddfe6dc116c81L, + 0x661072b549b9e3L, 0xd9e3134ec4584dL } + }, +}, +{ + { + { 0x6e46707a1e400cL, 0xcdc990b551e806L, 0xfa512513a07724L, + 0x500553f1b3e4f5L, 0x67e8b58ef4dac3L, 0x958349f2cb4cc7L, + 0x948b4ed7f9143cL, 0xe646d092b7822bL }, + { 0xd185dd52bc3c26L, 0x34ba16ec837fc9L, 0x516d4ba5a788b7L, + 0x72f2de756142b0L, 0x5846f61f445b3dL, 0xdaec5c9f4631a1L, + 0xa10b18d169ea9bL, 0x85d2998af6751bL } + }, + { + { 0xda0cac443ddf31L, 0x0966e171860911L, 0x9c3a7173cba600L, + 0x5781880571f895L, 0x5e2a927737ac21L, 0x8a461486c253fbL, + 0xe801cf595ee626L, 0x271166a5f84fc0L }, + { 0x306937fba856bdL, 0x80cb179be80a43L, 0x70393b2ffb5980L, + 0xa8e4a1c660fc64L, 0x5078abfc0d5c98L, 0x62ba530fbd31ffL, + 0xda608449e51b88L, 0xdb6ecb0355ae15L } + }, + { + { 0xbcbb6ea23c5d49L, 0x08906ba87959bcL, 0x61cc0880991665L, + 0x21d6b41d90d13cL, 0x0c27ac1d03afe9L, 0x159995f5cfea52L, + 0x4057e20bdfe220L, 0xdd1b349cbdf058L }, + { 0x0cd66262e37159L, 0x8cea8e43eb0d17L, 0x553af085bce7f0L, + 0xb94cb5f5b6511dL, 0x7b8d3a550e0330L, 0x415911057ab7e7L, + 0x320820e6aa886fL, 0x130d4d6c5b6b81L } + }, + { + { 0x2f98059c7bb2edL, 0x33ebf4ca49bdfbL, 0x04c72a1b0a675bL, + 0x94f9ea4adb6c14L, 0x03376d8cf728c0L, 0x5c059d34c6eb6aL, + 0x0178408eb8da48L, 0x8bf607b2956817L }, + { 0x7ad2822ceb3d28L, 0xd07a40337ae653L, 0xbc68739c1e46b2L, + 0x15d7cca9154ba9L, 0x6b97103a26617dL, 0xa610314b2e0d28L, + 0x52a08bafd4d363L, 0x80c2638c7dc2afL } + }, + { + { 0x0cde7ef3187140L, 0x93b92ca4b70acdL, 0x5696e507a79cdcL, + 0x73cc9728eaab66L, 0x6b8c5b68f1b0c7L, 0xb39a3184f7e0b1L, + 0x72cfb0d376108aL, 0x0c53efc98536a7L }, + { 0x03b52a824c2f1eL, 0x717132e6399b78L, 0x31ebd25349a85dL, + 0x265ee811a200d4L, 0x0b1aad2407d7adL, 0x9a9ebc894d2962L, + 0x994e6cd41171d9L, 0x09178d86c8fa83L } + }, + { + { 0x7d1d238a2593a1L, 0x863e93ab38fb19L, 0xd23a4cce7712a9L, + 0x7477b1327efcd5L, 0x3ba69ff1392f6cL, 0x63e0c32f7bb5a5L, + 0x20412c0026effdL, 0xd3ee8e4ef424abL }, + { 0x14c0b2d64e5174L, 0x2a611f2e58c47bL, 0xaa58a06c1e8635L, + 0x1870c3ecf17034L, 0xb0d5e3483f1bf3L, 0xb19905c16c7eb3L, + 0xbf85d626efa4caL, 0xfd16b2f180f92bL } + }, + { + { 0xc0431af3adcb48L, 0xc9a7a8dba90496L, 0xd765a163895294L, + 0xb02a41a551de70L, 0xb71b261749b8a1L, 0x0dfa89ec6f3e47L, + 0x392c0d80f5d9ceL, 0x43c59d831aee3cL }, + { 0x94bfb6d4d76f49L, 0xe8f5b8227d68a5L, 0x78ae1d9630fd08L, + 0x1379029ce1bdaeL, 0x9689da066715dcL, 0x5d4cb24d3278c7L, + 0x77c98339e84fbcL, 0xc8478dcea1048cL } + }, + { + { 0xe4b8f31770d2baL, 0x744f65242ea095L, 0xd06e090036f138L, + 0xd3a3d5b3b078caL, 0xc7ae54178b8417L, 0xad6c5d4c738fd7L, + 0x61789844676454L, 0xfbf34235d9a392L }, + { 0x8e451a7fff772fL, 0x8605bb75ffbeadL, 0x6f75cc1930d59fL, + 0xd4f47558f3f460L, 0xefd2d796700c8aL, 0xceb462a2406421L, + 0x8ed0f979dfe8f1L, 0x0280bf1d1d7600L } + }, +}, +{ + { + { 0x761c219dd9a54dL, 0x1127fcb86a39c0L, 0x7d0e4f04c9beddL, + 0x27c017a4d976b6L, 0x800c973da042cfL, 0xe7419af2593f11L, + 0xbd49448ae67960L, 0xd3b60b7744fd85L }, + { 0x5e74ed961676feL, 0x7383ef339af627L, 0x34407e05e62df7L, + 0xb0534618bf3196L, 0xd6b7184583b407L, 0xe3d068555011beL, + 0x94083d02124b52L, 0xa908324f780aafL } + }, + { + { 0xb27af1a73ec9c3L, 0xb66ad9f70fa725L, 0x07724f58cf73e4L, + 0xc3fcd579949358L, 0x06efb79da0cc01L, 0x1e977d210597c9L, + 0xcd732be703e8d6L, 0x6fd29bf6d0b69eL }, + { 0xca658ac667128eL, 0xca0036ac7872b3L, 0xc9698585355837L, + 0x59f3be8075cf1cL, 0x9f1b9b03809a11L, 0x6881ced9733871L, + 0x8cda0fbe902a5fL, 0x4d8c69b4e3871eL } + }, + { + { 0x5c3bd07ddee82fL, 0xe52dd312f9723bL, 0xcf8761174f1be8L, + 0xd9ecbd835f8657L, 0x4f77393fbfea17L, 0xec9579fd78fe2cL, + 0x320de920fb0450L, 0xbfc9b8d95d9c47L }, + { 0x818bd425e1b4c3L, 0x0e0c41c40e2c78L, 0x0f7ce9abccb0d0L, + 0xc7e9fa45ef81fbL, 0x2561d6f73574adL, 0xa2d8d99d2efb0bL, + 0xcf8f316e96cd0aL, 0x088f0f14964807L } + }, + { + { 0x0a8498945d5a19L, 0x47ab39c6c2131fL, 0x5c02824f3fc35dL, + 0x3be77c89ee8127L, 0xa8491b7c90b80aL, 0x5397631a28aa93L, + 0x54d6e816c0b344L, 0x22878be876d0e4L }, + { 0xeecb8a46db3bf6L, 0x340f29554577a3L, 0xa7798689a00f85L, + 0x98465d74bb9147L, 0x9532d7dda3c736L, 0x6d574f17504b20L, + 0x6e356f4d86e435L, 0x70c2e8d4533887L } + }, + { + { 0xdce5a0ad293980L, 0x32d7210069010eL, 0x64af59f06deaaaL, + 0xd6b43c459239e4L, 0x74bf2559199c29L, 0x3efff4111e1e2bL, + 0x1aa7b5ecb0f8d8L, 0x9baa22b989e395L }, + { 0xf78db807b33ac1L, 0x05a3b4354ce80aL, 0x371defc7bc8e12L, + 0x63305a01224610L, 0x028b1ae6d697efL, 0x7aba39c1cd8051L, + 0x76ed7a928ee4b4L, 0x31bd02a7f99901L } + }, + { + { 0xf9dab7af075566L, 0x84e29a5f56f18bL, 0x3a4c45af64e56dL, + 0xcf3644a6a7302dL, 0xfb40808156b658L, 0xf33ef9cf96be52L, + 0xfe92038caa2f08L, 0xcfaf2e3b261894L }, + { 0xf2a0dbc224ce3fL, 0xed05009592eb27L, 0x501743f95889d0L, + 0xa88a47877c95c2L, 0x86755fbdd63da9L, 0x9024acfc7ee828L, + 0x634b020f38113bL, 0x3c5aacc6056e64L } + }, + { + { 0xe03ff3aa2ef760L, 0x3b95767b1c3bacL, 0x51ce6aa940d754L, + 0x7cbac3f47a9a3dL, 0xa864ac434f8d1aL, 0x1eff3f280dbd47L, + 0xd8ab6607ebd5caL, 0xc4df5c405b07edL }, + { 0x3dc92dfa4f095bL, 0x5ae36a57cdbd9aL, 0x7ff29737891e04L, + 0x37c03130a5fe7bL, 0x210d7b0aa6e35eL, 0x6edfb53bf200d8L, + 0x787b68d84afb85L, 0x9b5c49b72c6de3L } + }, + { + { 0x51857164010f4eL, 0xe0b144b0536ebeL, 0xacabb14887d663L, + 0xac1caededf584fL, 0xb43fb8faf175a3L, 0x310b6d5f992a3cL, + 0xf2c4aa285178a4L, 0x69c99698bd56bfL }, + { 0x73d6372a4d972eL, 0x3d5bb2e9583803L, 0x7bf7d18d891581L, + 0xa5ce5d7568a34aL, 0x670b4331f45c81L, 0x97265a71f96910L, + 0xdb14eb3b07c1eaL, 0xdf008eafed447cL } + }, +}, +{ + { + { 0x0379f5a00c2f10L, 0xb320b4fd350285L, 0x74e560e8efdd7dL, + 0xf2f017ef46a140L, 0x2ced1a60f34624L, 0x7c4b4e3ca08ec9L, + 0xdffc2a15d8bc6bL, 0xcc8f3f3527b007L }, + { 0x59f8ac4861fe83L, 0x8d48d2cd03144cL, 0xa8457d2bfa6dceL, + 0xd7ed333677c136L, 0xcb8e219c228e18L, 0x5f70bc916ab1e4L, + 0x2ae3a3d3780370L, 0x9f3365488f17adL } + }, + { + { 0xeab0710960e4bbL, 0xc668a78ab9cfd3L, 0x2e85553b0ef946L, + 0xa43c4b98df5df3L, 0x0ecd5593cb3646L, 0x6f543c418dbe71L, + 0xee7edaaf59818bL, 0xc44e8d290911c1L }, + { 0xafb38b1269b509L, 0x9e2737c52afe2cL, 0x5b2ef02ccfa664L, + 0x1e0aeace1cc58bL, 0x37a57e95ea134eL, 0xc9c465a83b9fc2L, + 0x4b9e8c76e3eccaL, 0xca07dbe9bdbab5L } + }, + { + { 0xd297f3cb0d7807L, 0xee441a5f59ce61L, 0x728553bb2db844L, + 0x90f87e5640e9e0L, 0xaa72cbfcb76dffL, 0x065c6864012d57L, + 0xd5ee88f9678b44L, 0x3d74b852177603L }, + { 0x3f9c947748b68eL, 0x03856d98f44d44L, 0xde34b84462426cL, + 0xc16d1bb845ab29L, 0x9df6217d2e18deL, 0xec6d219b154643L, + 0x22a8ec32ee0f8fL, 0x632ad3891c5175L } + }, + { + { 0x19d9d236869267L, 0x628df94fe5532aL, 0x458d76c6dc9a01L, + 0x405fe6c2cc39c8L, 0x7dddc67f3a04baL, 0xfee630312500c7L, + 0x580b6f0a50e9deL, 0xfb5918a6090604L }, + { 0xd7159253af6b2dL, 0x83d62d61c7d1ecL, 0x94398c185858c4L, + 0x94643dc14bfb64L, 0x758fa38af7db80L, 0xe2d7d93a8a1557L, + 0xa569e853562af1L, 0xd226bdd84346aaL } + }, + { + { 0xc2d0a5ed0ccd20L, 0xeb9adb85dbc0cfL, 0xe0a29ee26d7e88L, + 0x8bb39f884a8e98L, 0x511f1c137396eaL, 0xbc9ec5ac8b2fb3L, + 0x299d81c090e5bcL, 0xe1dfe344cdd587L }, + { 0x80f61f45e465b7L, 0x5699c531bad59eL, 0x85e92e4b79ff92L, + 0x1e64fce9db244cL, 0x3748574a22097dL, 0xe2aa6b9efff24eL, + 0xb951be70a10bc6L, 0x66853269067a1cL } + }, + { + { 0xf716ddfa6114d3L, 0x9e515f5037ec1fL, 0x773454144944a6L, + 0x1540c4caba97ccL, 0xe41e5488b54bb7L, 0x4363156cae37bcL, + 0xc384eaff3d2ce8L, 0x72a4f454c58ba4L }, + { 0x0ceb530dcaf3fcL, 0x72d536578dcdbbL, 0x9b44084c6320faL, + 0x6262d34eb74c70L, 0x8abac85608e6dcL, 0x82a526410dd38dL, + 0xbc39911a819b8dL, 0xbda15fe03ad0d9L } + }, + { + { 0xadbf587f9dc60bL, 0xf9d814f7d846d2L, 0xccdd241b77bde0L, + 0x89cb6d72242f50L, 0x95c0e3ee6360a8L, 0x7c7dd5adf49713L, + 0x68e0e4957d5814L, 0x3aa097d0c16571L }, + { 0xb56b672267d03aL, 0x4f557088c44af4L, 0x67c49e7f3252a5L, + 0x871d6cfc94a469L, 0x57ae99801fbfaaL, 0x5c0e48f48a5d8eL, + 0xe9bf9c85e240b9L, 0xa41018999d41caL } + }, + { + { 0x6beb0c7b2889b4L, 0x78b7f899455370L, 0xd43421447ca364L, + 0xdd9d2da9f21e5bL, 0xa0c7c180a7e4aaL, 0x022c0d4da1660cL, + 0xe1f5c165a57002L, 0x51c7c9e518f68fL }, + { 0x6d521b62586502L, 0xa0f2cb3183ec1bL, 0x578b4e0caa5e16L, + 0x7bd4fbd764997fL, 0x7ec56c364b1804L, 0xb75a2540ee08e4L, + 0x6bf74a6dc19080L, 0x6ec793d97d6e59L } + }, +}, +{ + { + { 0x16789d60a4beb9L, 0x512b2cd9b9c801L, 0xf8b6d108c7bb9cL, + 0xd85651e9ebdc8cL, 0xc9450829ba971aL, 0x852d9ea7e1cf78L, + 0x6a45e350af01e2L, 0xe6cdadf6151dcfL }, + { 0xc454bb42b8c01bL, 0x59e0c493d54cd2L, 0x8e1e686454d608L, + 0x0dbae4bd8c6103L, 0xa5603a16c18b18L, 0x227a6b23369093L, + 0xf1e89295f3de1cL, 0x42f0b588ab63c5L } + }, + { + { 0xf1974cc5b596d8L, 0xee8093f44719f0L, 0x40ba933f6f5b54L, + 0xd6e53652f3d654L, 0x9aeb83526d73b8L, 0x50ed5350776382L, + 0x3be47d6ad43875L, 0x21d56dfc786e48L }, + { 0x8a75e18b73bb39L, 0x9eba84cf265a78L, 0x7c02a4d2e772e7L, + 0xf7df6d44c1ecd2L, 0xa8d9ea06cef71bL, 0x86e8f91cae3b68L, + 0x2fd141199efefaL, 0x0b36ab2214e6f6L } + }, + { + { 0xd79065cbdce61cL, 0xcb562ffdecb229L, 0xef5d3d14600849L, + 0x348b31b1d23ac8L, 0xb2ea69915c36b8L, 0x268683d4822836L, + 0x083edbec6f0b7dL, 0xaf4f39d1a7821cL }, + { 0x23be6e84e64841L, 0xe9e246365bf791L, 0xa3208ac02bfd7cL, + 0x231989cd01357dL, 0x79b8aad6422ab4L, 0x57d2b7e91b8564L, + 0x28ebbcc8c04421L, 0xdc787d87d09c05L } + }, + { + { 0xeb99f626c7bed5L, 0x326b15f39cd0e8L, 0xd9d53dcd860615L, + 0xdf636e71bf4205L, 0x1eaa0bf0752209L, 0x17ce69a4744abbL, + 0x474572df3ea2fbL, 0xc4f6f73224a7f3L }, + { 0x7ed86ad63081b4L, 0xcd4cdc74a20afbL, 0x7563831b301b2eL, + 0x5b4d2b1e038699L, 0xa15d1fa802a15fL, 0x6687aaf13e9172L, + 0x3eccd36ba6da90L, 0x34e829d7474e83L } + }, + { + { 0x4cea19b19c9b27L, 0xa14c37a5f52523L, 0x248b16d726625cL, + 0x8c40f9f6cabc21L, 0x918470c32a5c65L, 0x314056b2a98d5bL, + 0x6c974cf34a0714L, 0x0c8f8a94f6314aL }, + { 0x484455770bccfdL, 0xf5835db740c9fdL, 0x12e59b5a21407cL, + 0xbe338e0db1689dL, 0x5a50ce9dd5e915L, 0xb1780e9ef99f39L, + 0x1262b55ee4d833L, 0x4be3f2289c5340L } + }, + { + { 0xbb99b906c4b858L, 0xa7724d1550ca53L, 0x7d31f5a826962eL, + 0xf239322a5804daL, 0x3e113200275048L, 0xcbb1bb83ee4cb6L, + 0xdb865251331191L, 0xb7caf9e7d1d903L }, + { 0x06e3b0577d7a9dL, 0x7a132b0b3bbbf5L, 0xd61fbc57c50575L, + 0x393f712af4b646L, 0xef77972cb7efe9L, 0x20e6d5d5ea4995L, + 0x0ac23d4fbbe4c6L, 0x8456617c807f2aL } + }, + { + { 0x4995fb35396143L, 0xa8b4bd1b99dc46L, 0x2293e8e4150064L, + 0x2f77d4922a3545L, 0xe866b03b2192c4L, 0x58b01f05e0aa38L, + 0xe406b232ed246bL, 0x447edb3ed60974L }, + { 0xf541b338869703L, 0x6959fe0383420aL, 0xd6b39db4be4e48L, + 0x048f3b4b5714efL, 0x68b49685d9e4b8L, 0xbda8e6c2177963L, + 0x5094e35c4211feL, 0xea591c32d46d1aL } + }, + { + { 0x3a768ff2fef780L, 0x4218d2832970c6L, 0xce598e4ec6da17L, + 0xf675645fbb126aL, 0xb04c23f0427617L, 0xc9f93fbe4fce74L, + 0x44a414b3c91b00L, 0x4d982f31d3b3ccL }, + { 0xb1d40e8b24cce0L, 0x5a21c07133e73dL, 0x6e9358e0bb589dL, + 0x39cfb172399844L, 0x83f7647166080eL, 0xcfe7bf8450b468L, + 0x2a288f71e8434fL, 0xd39f1e521a81e3L } + }, +}, +{ + { + { 0x78c6f13528af6fL, 0x0001fe294b74d9L, 0xae7742501aab44L, + 0x7cbe937ef0039cL, 0xaf3e4f00fa2a67L, 0xe28175fda1378eL, + 0x72adeed8ccd90eL, 0x16a8ce100af22fL }, + { 0x69fae17cbf63ddL, 0x67861729e39e26L, 0xe92b3d5f827a18L, + 0x4d75e418403682L, 0x01a4fd99056a79L, 0x89efb2d20008f5L, + 0xa2f6918b78ff15L, 0xf41c870a3437f5L } + }, + { + { 0xc840ae57be353cL, 0x465a5eb3fb2691L, 0x34a89f07eba833L, + 0xf620896013346eL, 0x563b5f0e875df2L, 0x5f7fc8bfbc44ceL, + 0x22fcb5acfedf9dL, 0x7cf68d47dc691bL }, + { 0x37f7c2d76a103fL, 0x728a128fd87b7dL, 0x7db2ad8ccf2132L, + 0xa4c13feb100e63L, 0xcd28a517b511d5L, 0xb910280721ca5cL, + 0xec1305fd84bd52L, 0xb9646422729791L } + }, + { + { 0x83fccdf5bc7462L, 0x01f3ddad6f012fL, 0x57f11713a6a87cL, + 0xedb47ceff403acL, 0x6c184e5baab073L, 0x5b17c7d6f0d6a1L, + 0x45a4c4f3ef2c91L, 0x26c3f7e86a8f41L }, + { 0x81a6db0b646514L, 0xf84059fca8b9aeL, 0xd73dab69f02305L, + 0x0de3faec4b7c6cL, 0x18abb88696df2fL, 0x45dd1b975d7740L, + 0x3aeccc69ee35bcL, 0x478252eb029f88L } + }, + { + { 0x66bf85b8b2ce15L, 0x1175425335709dL, 0x00169ef8123874L, + 0xfd3c18c9b89868L, 0xb3612f9775204eL, 0x4b8d09dc2cd510L, + 0xafa12e614559adL, 0x1ddaa889657493L }, + { 0x87d700b1e77a08L, 0xaf4cf2f14d2e71L, 0xe00835dbf90c94L, + 0xb16a6ec6dc8429L, 0x02a7210f8a4d92L, 0x5a5ab403d0c48dL, + 0x0052b3ab5b9beaL, 0x6242739e138f89L } + }, + { + { 0x7c215d316b2819L, 0xdacb65efeb9d7aL, 0xc3c569ed833423L, + 0xbc08435886a058L, 0x132c4db7e5cb61L, 0x6373a279422affL, + 0x43b9d7efca9fc4L, 0xe3319a5dbe465fL }, + { 0x51d36870b39da7L, 0xcb6d7984b75492L, 0x77eb272eadd87aL, + 0xf2fb47de0d3f6cL, 0x807fd86f9f791cL, 0xf01086b975e885L, + 0xf9314b5b6a3604L, 0x8cd453867be852L } + }, + { + { 0x7c1e6b3858f79bL, 0xf0477c4938caf9L, 0xb311bbf3e88c44L, + 0x9234c091e3a3c1L, 0x531af2b95a1d4dL, 0xf3cc969b8d1c64L, + 0x6f3c328b51e78dL, 0x5a1bd6c34e8881L }, + { 0x2e312393a9336fL, 0x020f0cc5ced897L, 0x4b45d7b5fab121L, + 0x8068b1c1841210L, 0x1bd85fc8349170L, 0xfe816d80f97fe5L, + 0x108981814b84fcL, 0x1d4fabbb93cd48L } + }, + { + { 0x1f11d45aef599eL, 0x8d91243b09c58aL, 0xd2eec7bd08c3c3L, + 0x5a6039b3b02793L, 0xb27fed58fb2c00L, 0xb5de44de8acf5eL, + 0x2c3e0cd6e6c698L, 0x2f96ed4777180dL }, + { 0x67de8bf96d0e36L, 0xd36a2b6c9b6d65L, 0x8df5d37637d59cL, + 0x951899fc8d9878L, 0x0fa090db13fcf8L, 0xa5270811f5c7b4L, + 0x56a6560513a37aL, 0xc6f553014dc1feL } + }, + { + { 0x7f6def794945d6L, 0x2f52fe38cc8832L, 0x0228ad9a812ff5L, + 0xcd282e5bb8478aL, 0xa0bc9afbe91b07L, 0x0360cdc11165e2L, + 0xb5240fd7b857e4L, 0x67f1665fa36b08L }, + { 0x84ce588ad2c93fL, 0x94db722e8ff4c0L, 0xad2edbb489c8a3L, + 0x6b2d5b87e5f278L, 0x0265e58d1d0798L, 0xd2c9f264c5589eL, + 0xde81f094e4074dL, 0xc539595303089fL } + }, +}, +{ + { + { 0x183492f83e882cL, 0x4d58203b5e6c12L, 0x1ac96c3efec20bL, + 0xabd5a5be1cd15eL, 0x7e1e242cbbb14bL, 0x9f03f45d0543b3L, + 0xc94bc47d678158L, 0x7917be0a446cadL }, + { 0x53f2be29b37394L, 0x0cb0a6c064cc76L, 0x3a857bcfba3da3L, + 0xac86bc580fcb49L, 0x9d5336e30ab146L, 0xafb093d5bc1270L, + 0x996689de5c3b6eL, 0x55189faea076baL } + }, + { + { 0x99ef986646ce03L, 0xa155f8130e6100L, 0x75bef1729b6b07L, + 0xc46f08e1de077bL, 0xf52fdc57ed0526L, 0xe09d98961a299aL, + 0x95273297b8e93aL, 0x11255b50acd185L }, + { 0x57919db4a6acddL, 0x708a5784451d74L, 0x5b0bd01283f7b3L, + 0xe82f40cc3d9260L, 0x2ab96ec82bbdc2L, 0x921f680c164d87L, + 0xf0f7883c17a6a9L, 0xc366478382a001L } + }, + { + { 0x5c9aa072e40791L, 0xf0b72d6a0776bfL, 0x445f9b2eaa50dcL, + 0xa929fa96bda47fL, 0x539dc713bbfc49L, 0x4f16dd0006a78bL, + 0x331ba3deef39c7L, 0xbfa0a24c34157cL }, + { 0x0220beb6a3b482L, 0x3164d4d6c43885L, 0xa03bb5dacdea23L, + 0xd6b8b5a9d8f450L, 0xd218e65bd208feL, 0x43948ed35c476fL, + 0x29a0dd80a2ed2bL, 0xa6ccf3325295b7L } + }, + { + { 0xf68f15fac38939L, 0xb3dd5a2f8010c1L, 0xf7ac290a35f141L, + 0xdc8f3b27388574L, 0x7ec3de1e95fed2L, 0xc625451257ac7dL, + 0x66fc33e664e55aL, 0xd3968d34832ba5L }, + { 0x980291bc026448L, 0xfcb212524da4a5L, 0xbca7df4827a360L, + 0xfcc395c85ca63bL, 0xcf566ec8e9f733L, 0x835ee9bd465f70L, + 0xe66d111372f916L, 0xc066cf904d9211L } + }, + { + { 0xb9763a38b48818L, 0xa6d23cc4288f96L, 0xe27fcf5ed3a229L, + 0x6aebf9cabaff00L, 0xf3375038131cd1L, 0x13ad41dffabd58L, + 0x1bee6af861c83bL, 0x274fe969c142e7L }, + { 0x70ebcc99b84b5bL, 0xe1a57d78191cfcL, 0x46ccd06cbf00b8L, + 0xc233e8eefe402dL, 0xb4ab215beebeb3L, 0xb7424eabd14e7bL, + 0x351259aa679578L, 0x6d6d01e471d684L } + }, + { + { 0x755c465815ae38L, 0xadc3e85611db56L, 0x633999b188dd50L, + 0xfdf7509c12d907L, 0x25bcfde238b6afL, 0x50d705d397f5e7L, + 0xb65f60b944c974L, 0x8867fc327ac325L }, + { 0x2edc4413763effL, 0x892c0b3341fb63L, 0xb34b83ab3a7f28L, + 0x9aa106d15c2f18L, 0x720bbc61bb2277L, 0x637f72a5cfaefdL, + 0xf57db6ef43e565L, 0xceb7c67b58e772L } + }, + { + { 0x2793da56ecc1deL, 0x4e1097438f31b2L, 0x4229b4f8781267L, + 0xe5d2272dec04a1L, 0x6abb463ec17cffL, 0x28aaa7e0cbb048L, + 0x41dc081d22ef85L, 0xcbc361e5e63d0fL }, + { 0xb78aafcad5dbaaL, 0x0111505fc1edc3L, 0x63ed66d92c7bfaL, + 0x2982284e468919L, 0x30f1f21b8c0d8cL, 0xf0567472685093L, + 0x0e085b6f03dd0fL, 0xa8c8db85581e66L } + }, + { + { 0x42009a6264ad0cL, 0x13bf2b8593bef4L, 0x1d111905d4e8b1L, + 0xfe3e940ef7bddcL, 0xa012275624e62cL, 0xcb659241d6d3ccL, + 0xc7bcc70edb7ab6L, 0xff9fafbb750b1cL }, + { 0xf65df297fea84bL, 0x17c84a890b0e02L, 0xa92a859301e821L, + 0xbee8cb2fb480d1L, 0x7010b8c59c604eL, 0x47bf3f4e803c43L, + 0xd64514247b3fffL, 0xc4c5dcb9f0da13L } + }, +}, +{ + { + { 0x8af700cb5253b3L, 0x31ca605206957aL, 0x25744393eafdcdL, + 0x2ba5ae1d3ae15eL, 0x710b7385b82579L, 0x145ab57112b95aL, + 0x4b133a038c55c5L, 0xf7559c92a16fefL }, + { 0x70c3e68d9ba896L, 0x475dd32c33d07aL, 0xe084e473a41e40L, + 0xddc9382fd2e706L, 0x34b727579510bdL, 0x5e78a69a5f901eL, + 0x429dfd7dcfb823L, 0x1d9dc18014f0a3L } + }, + { + { 0x364fcdfaf403d7L, 0xd9ea4ffb7d7b34L, 0x21a3426cbb1dacL, + 0xfa51052143b4f5L, 0x2bca0736df2409L, 0x7e6985a8ad7285L, + 0x3a1a9d04aaa27fL, 0x1a815e19fc0c6cL }, + { 0xfab6147bb65bb3L, 0xa36dc0d33ced0bL, 0x26a88592062d78L, + 0x343861728a5fb7L, 0xe82da254ebb1adL, 0x70f5071d05aa11L, + 0x0b7f847adaac48L, 0xeb812bc93cb269L } + }, + { + { 0xcb317ccf7cacccL, 0xd3410d9cf85098L, 0xca68c8d7f078d7L, + 0xfe9e812b782efcL, 0x32e7c0f5f544b5L, 0x44fe95a3a7b7f2L, + 0xf4f1543e91327bL, 0x27d118d76645edL }, + { 0x690547cd7abc2cL, 0xf64680fb53c8afL, 0xbe0cbe079ea989L, + 0x6cf0ccea91af28L, 0xa3b85a29daa2f9L, 0xd4b663c91faed0L, + 0x782c7b7a8b20baL, 0xf494fafb8d98ceL } + }, + { + { 0x080c0d7002f55aL, 0xf4f8f142d6d9ddL, 0xb326229382f025L, + 0x58fd0b5ad28c20L, 0x704b9928d06a15L, 0xf4545d97fbd8e4L, + 0xc32fa63ed55581L, 0x3ab793601ac0fdL }, + { 0x13ece526099fd1L, 0x776dba89c79178L, 0x8d28212ce26c45L, + 0x09fddaf60d739cL, 0xf9931eda84826eL, 0x6e73d90b29439eL, + 0x94cfefc9095e61L, 0x3050d16802f474L } + }, + { + { 0x0898f8f9f6394bL, 0x48b8cea88b0e91L, 0x4bc99254c1b362L, + 0xe3fccb4827d9ecL, 0x5d4cf9ad950d6aL, 0xa16f1ef39b5b38L, + 0x3c76d1d620f288L, 0x9fdd059e119390L }, + { 0x7b5de9efb5edf8L, 0x3e290b9769d14eL, 0x4df3a916bd10b5L, + 0xae99bca82f8f7bL, 0x5481d5dc9524afL, 0xf112e4f69504f1L, + 0xb048f0951931ecL, 0xbff876a18f51b1L } + }, + { + { 0x932e2a746c1c37L, 0x903ad529aea4c1L, 0x717ac918f161f2L, + 0xa57d197f425e2aL, 0xae89dac7f39e0eL, 0x91655c0baa2a58L, + 0xe3dc28654836ddL, 0xb5f0baaa9ec9e6L }, + { 0xf7c4662bdbda04L, 0xbe5393b51059c0L, 0xb16d552dd95b0fL, + 0xde495b31b3bd96L, 0xb2a6e02c0206c5L, 0x045cc09014d3a9L, + 0xf66a3152a2f490L, 0x208c108c5dea05L } + }, + { + { 0x6e38b6865237eaL, 0x93a13039f27fc6L, 0x9a6d510a95068aL, + 0x6fbf216e7c9e54L, 0x7824290571ac1dL, 0x8cb23ba91c2a0cL, + 0x611202ec7e434dL, 0x8f901bf76058b4L }, + { 0xef0ac050849588L, 0xe0d2ddedd31804L, 0xaf5417ceb2ca81L, + 0x420ac065d1a509L, 0x46e345e9683bb6L, 0x6daf635f613f7fL, + 0xc9e829148a9576L, 0x5f9f1d1176d147L } + }, + { + { 0xd24ae1d77e9709L, 0x77751dc0047b8aL, 0xe325334c6a1593L, + 0x9baf962671f86aL, 0x425af6ac29a15eL, 0x31086002796e33L, + 0xb6ea78cfc253a5L, 0x4c733e0afae0eaL }, + { 0x4b7443a97c99b9L, 0xc14e9e450203a6L, 0xd1bb51552680baL, + 0xa56a3efd55533aL, 0xa66e38c169e1a0L, 0xb3e4df9eed7da0L, + 0x022c937ddce3d9L, 0x8552089f6e36b4L } + }, +}, +{ + { + { 0x8e4bf95f5cc82eL, 0x2ad80c3c3ed6c9L, 0xf2e5b2cc9045e1L, + 0x42c906559b06d4L, 0xc1f73797b43b84L, 0x1710dbf72d7992L, + 0xe98cf47767b41cL, 0xe713fce7bfb9e9L }, + { 0x9f54ae99fa5134L, 0x3002fd8de40d0eL, 0xdc282b79311334L, + 0x5519810bfeb360L, 0x31539c70f96ffeL, 0x04eacc0d27777bL, + 0x59824108ff5053L, 0x598236632b67adL } + }, + { + { 0x6eb45546bea5c2L, 0x82cfae0d509a33L, 0x6a69bd8394bb59L, + 0x1880d8d5770ee1L, 0x63518447dacf9eL, 0x5b1ecc5f02b891L, + 0xeb7d900b6c9a5aL, 0xdab8a768897da8L }, + { 0x28c7be598851a6L, 0x0101d4f4d73c3bL, 0x3c2569c5084996L, + 0xb9bc911280bde0L, 0x513a22acd0d4f9L, 0xdf2986d2a15f3bL, + 0x231c28f2aa4943L, 0x29623ad0333870L } + }, + { + { 0x2ceb1784084416L, 0x924cf1c49516cdL, 0x76536c04be856fL, + 0x11b59cd47a265bL, 0x720dc844999494L, 0x910f794007b795L, + 0x8434e142d3df83L, 0x8f53878bd478d3L }, + { 0xd9b072eaeb9c2fL, 0x16f87eafd8a29fL, 0x8c42f9b2fd0de1L, + 0x916721e0e816efL, 0x2ecb47018bde37L, 0xcde3b7a2375da2L, + 0x30d0657ef94281L, 0x51054565cd7af8L } + }, + { + { 0x7230b334bdced3L, 0x0c6a3e10838569L, 0xf19c9ece3493b8L, + 0xf2759270d97c57L, 0xf14181e0c862ebL, 0xfd3bac132c72bcL, + 0x620563ff3be362L, 0x672ccaf47283b7L }, + { 0x191e3fa2b7bf16L, 0xf838633520dad7L, 0xd3dde553629d87L, + 0x14d8836af86ebeL, 0x3db7dfb221b2ceL, 0x3872abb0aed72aL, + 0xb60de528c665b7L, 0x89c259644982cbL } + }, + { + { 0x799a2de4dbba25L, 0xd818aaea42715eL, 0xbc88f4df55c362L, + 0x142a163713c9aeL, 0x411e8eefbfb33fL, 0x34b46296bb684aL, + 0x4344becdc81817L, 0xcc9573d17f9d46L }, + { 0xf85f8bcff38a7dL, 0xa14bf730caf117L, 0x126874f4ba6429L, + 0xcc9bf22aa5db97L, 0x62b56df6aba827L, 0xfee1cb89c9772aL, + 0xe36838f177e541L, 0x698815dadd438fL } + }, + { + { 0xc9fd89438ed1adL, 0x73cd79d7b6a601L, 0x2210e6205e8d20L, + 0x72384ac3592af5L, 0x5ccc079763d07eL, 0x2f31a4aa5f79ebL, + 0x693f4ed2945a95L, 0xc7120178056fdcL }, + { 0x361ecd2df4b09aL, 0xa5644eab7d929aL, 0x34abc0b3fabe9aL, + 0x1a2473ce942a8cL, 0xe00c9246454bc3L, 0xab324bcdff7366L, + 0xe1412f121b8f99L, 0x970b572e33551eL } + }, + { + { 0x6ca4cacbd0a6b5L, 0x5584787921d654L, 0x18e5253c809bdaL, + 0x01b32c3f0cbe5eL, 0xb9aa7540f987ddL, 0x628f4bb6dfa4dbL, + 0x0255f0b891890bL, 0x25b7df4874e590L }, + { 0xbded3188ed5f95L, 0x9dc428dca93023L, 0xc68f25abccf520L, + 0xc4f3764e616e6cL, 0xd9a57f1a1d9993L, 0xd1964a5533431bL, + 0x06cd77f02ab6d0L, 0xa66079103e52e0L } + }, + { + { 0xab088645f72700L, 0xf77b2ff0a1a44eL, 0x43ebdd8c2a24b5L, + 0xa6d67114f564d7L, 0x495df63f414160L, 0xf5bacd776f6de6L, + 0x3011aff7c2b43dL, 0xbb1e64c3241928L }, + { 0xf70c5725034073L, 0x891c62a68f1e97L, 0xed8eb2eb22e374L, + 0xd3a53e97dbcc2fL, 0x1d06281dc8f220L, 0x9eef48face4393L, + 0x96014f5d2abecdL, 0x1da7e092653cebL } + }, +}, +{ + { + { 0x7593318d00bc94L, 0x586f3c6c7262a2L, 0xea68f52958ad31L, + 0x6707fccd4e8bedL, 0xb7e35d6cb3f9ceL, 0x2cbb6f7f4b1be8L, + 0xa5352687b41aeeL, 0x1d77845f7b39b8L }, + { 0xb1f3995eaf9554L, 0x3250f70fe9e7d4L, 0x62e5d1ba00c23cL, + 0x5e422f5c10e3bfL, 0x7a18039c25cec4L, 0xb4e66a17cc4d5bL, + 0xad7c5f636d0e0cL, 0x9f40b12a4cf347L } + }, + { + { 0x697f88251e3696L, 0xc89bc40ab0a648L, 0x8f261a59785804L, + 0x4c7f900b51a2bdL, 0xd00e7af8a2dfcfL, 0xf9c534db642aebL, + 0xea2a79fb63df0eL, 0x392a69af2f64a4L }, + { 0x0c0f01cc331b6cL, 0x414bf2e6a5edb5L, 0xfe5ed815068391L, + 0x0a8078d62fbc34L, 0x78a438254bca98L, 0xf7a49ae3d727c7L, + 0x96c1de1ab4dffeL, 0x45901f73b9440aL } + }, + { + { 0x3f1189facfe46eL, 0xdca6f464467443L, 0xac385422eb5bcfL, + 0xb02dce9906bf72L, 0xdd8cdacfe1d454L, 0xc26f04c65f7218L, + 0xb4748596ea145dL, 0xc53dc6b5bdb315L }, + { 0xbe5be749ad7197L, 0x627e91918b5eccL, 0x57c889c9ea405dL, + 0x2e5650c1a5360bL, 0x42290df1b30b27L, 0x4a071575242687L, + 0x553ed1fd379133L, 0xb9d7a0701db019L } + }, + { + { 0xcfe551c56597dcL, 0x81af92a925ebd6L, 0x83efe16f4e8d57L, + 0x61bb4311f640d3L, 0xf80440f78b414aL, 0x72f3c636c9e3b4L, + 0xb55f43a6a03c66L, 0x47a9dede417037L }, + { 0x1a7e287dbb612bL, 0x895c3c7dbb9220L, 0xd50c86e6c04764L, + 0xed5269853cf7caL, 0xc78d799f74af55L, 0xb2ba0f2b969ff2L, + 0x06d48151c6530bL, 0x764a1fe165a575L } + }, + { + { 0x4383a3bc1b5eceL, 0x0563c8854ff148L, 0x9a452795af796eL, + 0xffba7c088e9953L, 0xfe9fb5eb6a3001L, 0x795098825b6b19L, + 0x67c899ad81be5eL, 0xc89ac8d2f9d29bL }, + { 0x7c76ba329ab8f7L, 0xb2a18c96e40f74L, 0x1b5056e3864d9bL, + 0xdfa503d9b582b8L, 0xfb035197c9c68eL, 0xdc501316b3c22bL, + 0x38ab231a6c96ffL, 0x4ea527c8cb1c10L } + }, + { + { 0xd632f20c05b4edL, 0xe0199fab2a032dL, 0x373295626812d7L, + 0x2aed855013df13L, 0x92ca24b39f96acL, 0x620273dbb9751aL, + 0x5d0d21ef7437a1L, 0x9de2a43077de56L }, + { 0x0569b1211a4674L, 0xfc3923e89c3989L, 0x3d127042c5c770L, + 0x0072b9084e8c37L, 0x7178d4dac39f9aL, 0x5f8292f778d345L, + 0x9e5bf0f77c7307L, 0x7691610c3a20f5L } + }, + { + { 0x7c4ead5705fe96L, 0x377ec35c8e464cL, 0x3e5b9907689954L, + 0xc0f6949a2d31eaL, 0x839d395c580671L, 0x2f347a6b215b09L, + 0xfdcfa33683df83L, 0x6e12cc26af39a8L }, + { 0xae46ec813a3bd2L, 0x03a7d3b59366f8L, 0xe2029d5b87aed4L, + 0xbdc4e43fe1b83dL, 0x768437cdb8a1a8L, 0xe47acc3ea0dd7fL, + 0x550e0cc62a0af4L, 0xcaf2cbc1a20962L } + }, + { + { 0x5a784f7f28a78fL, 0x952a9b507e9724L, 0x8ac5e411bab7a3L, + 0x1251e3fb7bc1e1L, 0xe360f82dc15e22L, 0x3ac72da95213f5L, + 0x65ee9ba4dcd47bL, 0xdfeab7b3af5952L }, + { 0x34c5c8026fd3c6L, 0xd977b08f3ac7eeL, 0x003bd017dba2f6L, + 0xcfc5cf8ac98c8dL, 0x05eb6040e46922L, 0xc248b17faa9352L, + 0xfa41c0f395c7a7L, 0x29931d4b71ee44L } + }, +}, +{ + { + { 0xac087bb07861c5L, 0x3bd37db5ae8240L, 0x94c68ecf94518fL, + 0xd32a378ff88a5bL, 0x42c8aaf9b441d1L, 0x089db70fc07f12L, + 0x211c386d3d4455L, 0x1db9af7546b158L }, + { 0xdfd1b6551bc927L, 0x69c04930733df4L, 0xdc72cd42aeb586L, + 0xeebdace823aa13L, 0x51b3b3c56ad643L, 0xb983a99d4e0426L, + 0xa1e5b6c69c4eccL, 0x37cd38245e6668L } + }, + { + { 0x158ce6d9f73aeaL, 0x36a774914ff475L, 0x0d4e424dc0b018L, + 0xc2c44483946f09L, 0x7a7de3ffacda62L, 0x49a19e6b486709L, + 0x65094d8db61da7L, 0x09edfd98f5ee87L }, + { 0xe460fcfb37226dL, 0x3b9d03969bf470L, 0x3d4d511247ca22L, + 0xc7248d6c782cb1L, 0x91189a000ad293L, 0x1244942e8abe75L, + 0x9f88d12bf52cdbL, 0x368463ebbbcadfL } + }, + { + { 0x419e4b38074f45L, 0xd3f8e2e0771c83L, 0xd2743b42e68d34L, + 0xc68b7dbb116a00L, 0xfad2cf7d84cc37L, 0xcfd27c0b7a0f4dL, + 0x3b9e23f190e587L, 0x7bab499751ca9eL }, + { 0x3270861a8f12eeL, 0xee1f38d31b36d5L, 0x748bb31e4c0eedL, + 0x9be5c9b110ebadL, 0x728660bc8b6cb6L, 0x7bc9df793d914aL, + 0x73a4f2cc88c859L, 0xbe4a2fdb4e7f0eL } + }, + { + { 0xe566ff8a450e77L, 0xb0b40066a13abaL, 0x483a510cd7dc90L, + 0xb1a20135fa9cccL, 0xeb0b631a80e67cL, 0x7c34e1f020801aL, + 0x0257dc8f4e447cL, 0x7abe7d174c6f0fL }, + { 0xf115a3ab19a576L, 0x8f0474a064ca0eL, 0x999bb6b351f99bL, + 0x855254b773edc3L, 0x49f6c2f427d717L, 0x9f682532e0cef2L, + 0x1fe126c2ee34f5L, 0x1ec2cae80150f7L } + }, + { + { 0x862c5afc005b7aL, 0x61adea7ec4ef17L, 0xf885fd3007b446L, + 0x25c129d9b0e30eL, 0xbc10f25feec7e0L, 0x3901ac4df79ee1L, + 0xad49db7fe9e19fL, 0xc8624d9360d050L }, + { 0xc74a576bf3260bL, 0xbde80248c010c2L, 0xf15532909b6977L, + 0x6a5a82ed52dcf8L, 0x4fbf59d29b9dfcL, 0x337d049c7b730cL, + 0xb3deac63a89cd4L, 0x1e07595ad2f2ebL } + }, + { + { 0xa0b0a4d3b7c84eL, 0xf132c378cf2b00L, 0x192814beaaa8ecL, + 0xe7929f97b4b5dfL, 0xf08a68e42d0ab7L, 0x814afb17b60cddL, + 0x78c348c7d9c160L, 0xf8a948844db217L }, + { 0xcdefd88eaa2578L, 0xf717f56bd0e260L, 0x7754e131694d02L, + 0x1254c14181dbd8L, 0x0dacdd26e5f312L, 0xb8abdfbcef87bfL, + 0xb985972e74e2eaL, 0x1717621002b424L } + }, + { + { 0x92cc75e162df70L, 0x1e20c0618ee849L, 0xc036b4626aa590L, + 0x31be67e4da5155L, 0x04911b5f7213b0L, 0x39261d7bb2e72eL, + 0x9e844665c015a3L, 0x2f59fc0298ae67L }, + { 0xa3ea7ba1701fccL, 0x87a5fa90ebd651L, 0xa607ed4301d7b1L, + 0xbd4ec5f3b2e271L, 0x732a1a2dc4180fL, 0xbe15d82feaa8c1L, + 0x103670266f2f3fL, 0xccfd3979e79ce8L } + }, + { + { 0x82ab83570a54adL, 0x5c1dee8e3bec75L, 0xf583ff454b556bL, + 0x9220199f461e60L, 0xdf61ca887fc4e7L, 0x6641fd20776dadL, + 0x00c6edd8edd061L, 0xaf9b14255f7e87L }, + { 0x73f15e49bbe3ecL, 0xdd3b788f8bc1faL, 0xb24cc071b8ff86L, + 0x6c260d241be58bL, 0xec1c4e36b10adaL, 0xf6b42097fdb985L, + 0x0d0ac85d47c212L, 0x967191c07d78d1L } + }, +}, +{ + { + { 0x3b11638843d0f3L, 0x4b89297f27f10eL, 0x477236e863ba2aL, + 0x1949622add280cL, 0x7cd523504da757L, 0xe0e99d279e4ff7L, + 0xb4ef894537da41L, 0xc55dde45a24ff1L }, + { 0x18d8e21b587521L, 0x8010b5d3777833L, 0x4af522dd3a54c8L, + 0x7cd476b4c0ac13L, 0x4587e614099f67L, 0x494d0ed605ee64L, + 0x3218ba2cc80903L, 0x5ff56aa0b2e169L } + }, + { + { 0x51ec94e3a06c69L, 0xa26d7be5e65c52L, 0x156f113d44ee96L, + 0x70f0968bf5b9b4L, 0x9b7e4695f5332dL, 0x36c295f6703829L, + 0x1522690d04f492L, 0xcf35ca4728043bL }, + { 0xf9ca3e1190a7c3L, 0x53d2413f971b07L, 0xae596529c48b49L, + 0x74672b8fefff5cL, 0x0a3018ba7643b0L, 0x51919e83e9b0a8L, + 0x89ad33dc932fb5L, 0x52a4419643e687L } + }, + { + { 0x7778990d2d0acdL, 0x3bdbcce487fdf1L, 0xdc413ca2b03dd2L, + 0x278755b9a2b7d0L, 0x4ebb8b535ddd7fL, 0x0465152bcbdb92L, + 0x34f22d6671d051L, 0x1ba04c787192b9L }, + { 0xb1693f483560c1L, 0xe08a5937d174e9L, 0x47ffdc464dc9afL, + 0x1123596ce8126cL, 0x632d95f1124628L, 0x66287abfee7c76L, + 0xb40fe60c552332L, 0x3f11729e304e1eL } + }, + { + { 0x97a6ea05030a8cL, 0x692419809c27b2L, 0x3308501ac9dd5dL, + 0x9fed7fabe73fdcL, 0xea555440535286L, 0xc7c07ab6c9b832L, + 0x178c882c51b967L, 0x6fa0c6986ee075L }, + { 0xbaa4a15b8b5c4aL, 0xf83c0ea3130c0aL, 0xcf8624b2800331L, + 0xade85cd7ccbcb8L, 0x971d7f6f08445dL, 0xfd480b76a546dcL, + 0xdc15a38c93761cL, 0xc4c495c9d04631L } + }, + { + { 0x5f4cee89470efeL, 0x9fe896188d93adL, 0x24783b3f4e49ceL, + 0x1bc7ed752ffb3eL, 0xa3abe6a6d81e17L, 0xd6bb8b47a333c3L, + 0x3485c0b10a3527L, 0x7cddc9c31a9d10L }, + { 0x0c78112c38ca37L, 0x10e249ddd2f8d8L, 0x72c88ccc511911L, + 0x4d75b5a29a6c84L, 0xc74b267a227b1eL, 0x698390cf8e35adL, + 0x8f27edfe98d230L, 0xec922f26bdc7f4L } + }, + { + { 0xac34023fc32e11L, 0xe0ae2f547200d1L, 0xa7c7492bd98c82L, + 0x3910b687b02154L, 0x6fdd06ce28ab6dL, 0xd3a7e49d98b012L, + 0x4c1c82b9f54207L, 0xef5bbe645c176fL }, + { 0x3d17960d3e71ebL, 0x90d7e84080e70cL, 0x83e6438bff5d9eL, + 0x1877e1f535d85cL, 0x931ed6efbb69ccL, 0xcf962651247848L, + 0x76d618b750da4eL, 0xc076708717fbf6L } + }, + { + { 0x80a5ac5eec5126L, 0x6d05dd13379c80L, 0x514b0892336d32L, + 0x586c0066725137L, 0xab2365a574f954L, 0x3c89ea0ac7d356L, + 0xf1f2edd27460baL, 0xf200ddbab9870fL }, + { 0xc8f1b2ca35e885L, 0x5d22f86e6e7550L, 0x24b9a409554615L, + 0xcb41107616314fL, 0xca752f0c976a11L, 0x3e2f839a08291aL, + 0x0cff22ff2c420eL, 0xafd603e82b9747L } + }, + { + { 0xaddeddc810a3daL, 0x78b6c2dd3a87bfL, 0xbc7020bde3a04cL, + 0x47ab9739b6d045L, 0x3b046d60959358L, 0x0f953e7509ee3eL, + 0x803dc8669fc61bL, 0xcceaec0893c8d4L }, + { 0x21f8c40b048a45L, 0xb535073fcaea8aL, 0xe712c3590e360bL, + 0x5d0f3f48403338L, 0xe0ea26c7207f2dL, 0x20f6b57ffd9e05L, + 0xb97d68e4788b00L, 0xb1215541889cceL } + }, +}, +{ + { + { 0x0079817464238eL, 0x21103020d381caL, 0x1cc4c6ed9f01b5L, + 0x5e35dc55a131b1L, 0xb61848d06944ebL, 0x83792a029631a3L, + 0xbe1017fafca0ddL, 0x70aaa01782fcbbL }, + { 0xc63b7a099945e7L, 0xe9164ecc4486c1L, 0xb133e35885f2c1L, + 0x186f0d3c99ae02L, 0x2fca4922bf53e6L, 0xf922aa248a02bcL, + 0x4fe64900dd3dcaL, 0xe8c313ff6a8207L } + }, + { + { 0xc5b358397caf1eL, 0xa001922922a4b6L, 0x67e36bedf07c95L, + 0xabaa0aeb2f4f34L, 0x66dc926dedc333L, 0x82021c438ec5b3L, + 0x82b4f2600ab176L, 0x1b7c22e69c45afL }, + { 0x07b0dbe0924ad9L, 0xe030936a407ddeL, 0x66e1ce926ccd06L, + 0xb50c108e3505a9L, 0x8b921e1da98f51L, 0x449ca1a20cf7c7L, + 0xadb80c7e67d079L, 0x205aa54834372dL } + }, + { + { 0x1482b4819bf847L, 0xd6c16ab5906f0fL, 0x323fb1723ad060L, + 0x0346389c832be7L, 0xe71b2d82ee45bfL, 0x761c37dfb22276L, + 0xa9b33345d70be2L, 0x81a06565a0627aL }, + { 0x337750399a6282L, 0xafc8d2ed0436f0L, 0x22f71d3c53342fL, + 0x66ca56d8939ad3L, 0x15a919230e09baL, 0x261091ea6de890L, + 0x609d700e78f2d5L, 0x8aa52ee8eaaf78L } + }, + { + { 0xa398788ce76258L, 0x3031d07494b975L, 0x4a6d652043dfe2L, + 0xdb1a849b4401ecL, 0xf81ebbbce8bbccL, 0x937dd4716efe9eL, + 0x9c19350ef85eccL, 0x260d932214273bL }, + { 0x1d7e21e77bf1a3L, 0x199d689a544eb7L, 0x9da594194ced50L, + 0x71a60be8a0aeaaL, 0x183a0ae26d3b51L, 0x49f176a8df9728L, + 0x744376e3230674L, 0xb2cb21ae25541cL } + }, + { + { 0x7a721589a0071fL, 0xe19dd29e7d2a6bL, 0x3deb34e55113f0L, + 0xef1f8ebede573bL, 0xa8f7ff95665e37L, 0xa2c21eaf2d7777L, + 0x1387afa91e2e39L, 0x04057b97db68f6L }, + { 0x8b9d5ae1c241f7L, 0x689588a8e75993L, 0x79585b45c0e2d4L, + 0xba1ef167b64974L, 0x72685bc1c08a75L, 0xf0a5814d572eddL, + 0x71464a35ab0e70L, 0xc93c92b339aea7L } + }, + { + { 0x1917e2a5b8a87dL, 0xea5db763a82756L, 0x5bba2fb6420e2bL, + 0x5cc0501019372aL, 0xb1ef8beccc5efdL, 0xaf06393f49c57dL, + 0x3ab1adf87a0bc4L, 0x2ee4cca34fe6b6L }, + { 0xd1606686b8ba9bL, 0xef137d97efec13L, 0x7b6046550abb76L, + 0xb40ec2bf753a00L, 0x696ed22eaf8f1dL, 0x398c91fd8ba3d8L, + 0x11f203437db313L, 0xe1ec33bfe5079eL } + }, + { + { 0x8a10c00bdc81f0L, 0x5f392566fe8e05L, 0xa595dab14a368eL, + 0x32b318138cec6bL, 0xd77afde1b00d00L, 0x3c979284d9923dL, + 0x78f0e7a76e13ddL, 0x5ee8e59bf75675L }, + { 0x49ec89391b130cL, 0x9416182a47a441L, 0x54555b576e2ce8L, + 0xcbdd2fd349c40bL, 0x10ae7379392bbeL, 0x270b1112e2dab0L, + 0x5cb7712af293f4L, 0xfc22a33d6095c6L } + }, + { + { 0xdcb5bbd0f15878L, 0xbcf27adb6bba48L, 0x979913e7b70ebaL, + 0x4c0f34b158578aL, 0x53f59a76ed6088L, 0x19b3b2c75b0fc2L, + 0xad628dc0153f3cL, 0x5195a2bcec1607L }, + { 0x95f8b84dfe0f7aL, 0x935c6b0152920bL, 0x25f9e314da1056L, + 0x4910a94b28c229L, 0x54b03b48ee4d6eL, 0xc991fc3694e3edL, + 0x68c4c26dbe5709L, 0xc9cfce463d7657L } + }, +}, +{ + { + { 0x21c9227f52a44eL, 0x7f105a2e85bfbdL, 0x887781f6268fc2L, + 0x56ee808a2d7e35L, 0x14f9de52d3930fL, 0x4a4e356dcb561aL, + 0x87362267f95598L, 0x211c3425f34151L }, + { 0x8fcb75b0eaf9cbL, 0xcc9edf93d60ce2L, 0x54412c9a5fe627L, + 0x6036a72842dd09L, 0x71ce668a6c6099L, 0x02b30d75386764L, + 0xb69bed36f18e23L, 0x124c9b1d1de9f4L } + }, + { + { 0xe8f8d95e69b531L, 0xe1e115eaff1049L, 0x9087cd1eddea0cL, + 0x8ed55a57449916L, 0x8009f547808404L, 0x990f21617fea55L, + 0x68ba624fe8ecf9L, 0x8ac295056d1f47L }, + { 0x3257887529dfb0L, 0xc4a613f244c080L, 0xabb1ac028672faL, + 0xb2915c531eb291L, 0x6e368ca8fababaL, 0x6b8c2591fde498L, + 0x67724a1f2a548cL, 0x6b3b7e8f90409bL } + }, + { + { 0x5415003fae20aaL, 0x95858a985df5ceL, 0x42bc9870ac6beeL, + 0x8d843c539ea1a9L, 0x5de200cb571043L, 0x084fcd51741a33L, + 0xe1ca20c0009d1cL, 0x0271d28e957e6dL }, + { 0x84cbf809e3be55L, 0xc804dda1c578c6L, 0xea85489409a93aL, + 0x64a450a972021dL, 0xc6a2161e681312L, 0x280bff965bc111L, + 0xd358a4b0f8526fL, 0xd967be8953a3abL } + }, + { + { 0x4c5e6157dd066cL, 0x37afd33634c8d4L, 0xa3ac88a42d8b87L, + 0x9681e9b938b607L, 0x7a286ab37fe4c8L, 0xdeee5742494245L, + 0x184b9d36af75a8L, 0x20f696a3670c04L }, + { 0x1340adfa39e8b9L, 0x03c19290850b2eL, 0x435ebd42c0e1efL, + 0x49de18b142ee9bL, 0xb440b273f116f2L, 0xd94e9fa2214463L, + 0x1b0ddd36311543L, 0x1ae042a991ba3cL } + }, + { + { 0xbc322f85bb47aaL, 0x9e2562554a5845L, 0x96b65ae21115f3L, + 0x46fbed4bb5757bL, 0x18aec4f4c42dceL, 0xc59caf68d801f0L, + 0x91894631205521L, 0x66bd8e089feb7aL }, + { 0x39ebe95c529ee7L, 0x28d89928eadb99L, 0x6058c786927544L, + 0x877e7a5d3808ecL, 0x8f651111c52eafL, 0xfb59812ae221cdL, + 0x22289c6f890391L, 0xa97695b4966e92L } + }, + { + { 0xf0a91226ff10f0L, 0x49a931ba2a65c8L, 0x3fcebbcb1d3cb0L, + 0x70eb79bca9685fL, 0x82520b5ab38cb6L, 0xccf991b76304c3L, + 0x575aab1af8b07cL, 0xec8166a5ed5efbL }, + { 0xddc5698c8689b1L, 0x227c949b2e78d7L, 0x61323218e07d91L, + 0x658a11d22cfd62L, 0x908fb44004dd5fL, 0xe3d14f090d21b1L, + 0x6f3db9da6a1639L, 0x09d86c0333a525L } + }, + { + { 0xd83eaf06f043f7L, 0x88ab648b52d5f6L, 0x67c664d57144d7L, + 0x55d7644eafc8b5L, 0x1c89f20cceb291L, 0x51aec7b831ac47L, + 0x51172fa6148854L, 0x8fabf7ef6d7bfeL }, + { 0x5910316477ee27L, 0x5f299dd20fe61eL, 0x48079a842826abL, + 0xf4a83ba22591faL, 0x8fac66055482ecL, 0x48fd5f16b65b3bL, + 0x4288a7c9fd9e19L, 0x27db8199377894L } + }, + { + { 0x2936ee47fd9dd6L, 0xcce5f0e9ec87c6L, 0x15a50e3db6e3b4L, + 0x61df105ad701c8L, 0x3601add1dff1f7L, 0xb761e06e8a16e1L, + 0x4341e021af3f91L, 0x9156a4a933fa3fL }, + { 0x9dc46ae54bc01dL, 0x605577a64eb910L, 0x22b99f85a59a99L, + 0xab2dbaf0a229d8L, 0xa8bfb656599364L, 0x39ed4a5e94ebf0L, + 0x7b46a1e0dbb23eL, 0x117b1958751422L } + }, +}, +{ + { + { 0xd19e8fd423bddfL, 0x9d77042387ef59L, 0x315cbdd849590aL, + 0xfdc637c7866c1eL, 0x72be83d03515a6L, 0xd44a4a00376780L, + 0x3b9613119e0c2bL, 0x023aca37b1a689L }, + { 0xf5f368782282eaL, 0x44710898a8b5c7L, 0xcd2f00a17a3066L, + 0x754e11281ed681L, 0x9c6c70c0bfcefdL, 0xd6aced03b6f29bL, + 0xe443d562817a2aL, 0xe590ef4e7c0012L } + }, + { + { 0xc2f96763e62e2aL, 0x661816eb2daa26L, 0x3515fd2dd5f512L, + 0xdc36e2756b6e75L, 0x0bdde4674cc658L, 0x102908600e7644L, + 0xfdf00451694a09L, 0x454bcb6ceac169L }, + { 0xf4c92ab6481eb6L, 0x8b77afa09750e7L, 0xe6f42316362d6dL, + 0x0d45deef53a3aeL, 0xdac7aacd7dcf98L, 0x628cb7f125ec4aL, + 0x41e8a20aec0320L, 0x7418c7eea2e35bL } + }, + { + { 0x4d649abdf40519L, 0x8cb22d43525833L, 0x15f6d137a5333fL, + 0x8c3991b72c23eeL, 0x248b9a50cd44a3L, 0x6b4c4e0ccc1a75L, + 0x3221efb15c99a9L, 0x236d5040a9c504L }, + { 0x401c7fbd559100L, 0xcf0e07507c524dL, 0x39647c034a9275L, + 0x2355422f7e8683L, 0x3e0a16eb3ae670L, 0x1c83bcbad61b7fL, + 0x491bcb19ca6cbeL, 0xe668dc45e29458L } + }, + { + { 0xe44c65b219379eL, 0x211381bbb607eeL, 0xd4c7428b7bc6dbL, + 0xba62a03b76a2e8L, 0xe1729c98bb0b31L, 0x3caeb50c6bbc10L, + 0x6c66727b0187aaL, 0xbf9d2f0fb90dcfL }, + { 0xec693501184dc6L, 0xd58d2a32698eb5L, 0xb366d8da316b07L, + 0xe1e39bb251c017L, 0xbe44ba9adb157fL, 0xbaa9a9a8a8b06cL, + 0xd0f46356e473e1L, 0xd25a8f61d681c6L } + }, + { + { 0xba39d5fcb102c7L, 0x66eba21d8aa1ebL, 0xcc2591a697fbf4L, + 0x5adb5792317f54L, 0xa01ae71f76c6f9L, 0x2c525de5042705L, + 0xc8f42724f4479fL, 0x26ab54ae6d7a5bL }, + { 0xda217b5dc28106L, 0xc7cadeaeb2ae6aL, 0x0b1609453ea3b2L, + 0xcddcc1ccc6111bL, 0x5c47affa7a7bebL, 0xf9931bd0e52dabL, + 0x5231835c6dcf96L, 0x7095bdef27ea4eL } + }, + { + { 0xee8adaec33b4e2L, 0x300665163ceb44L, 0xf1476fb880b086L, + 0x07033289569ce8L, 0x2cabf9a238b595L, 0x85017bc26c8158L, + 0x420b5b568d5144L, 0xa9f5f1ef9c696fL }, + { 0x1409c3ac8fec5aL, 0x541516f28e9579L, 0x06573f70e1f446L, + 0x3e3c7062311b96L, 0x0033f1a3c2ffd8L, 0x8e808fcca6711cL, + 0x716752d07aef98L, 0x5e53e9a92525b3L } + }, + { + { 0xce98a425a1c29fL, 0xaa703483ca6dc9L, 0xe77d822edfa48bL, + 0xd2e3455068abcaL, 0xb456e81482cfcaL, 0xc5aa9817fbfb08L, + 0x8979f258243194L, 0x727f2172cd043dL }, + { 0x7cca616aa53923L, 0x387c5aee9bcb72L, 0x0173fd437580bbL, + 0xdd7795b75fc0d9L, 0x47d1c37345deaeL, 0x2eb5d7fb0d1c03L, + 0xf7a1b92958f002L, 0x7365cf48f61b67L } + }, + { + { 0x4b22c3b562a5edL, 0x711216f5c7cd07L, 0x51f72c49ba0648L, + 0xc10d0930de9e6fL, 0xaca479bfda63baL, 0x4722a55af532b0L, + 0x8d59eb77236f39L, 0x5cad8744465c34L }, + { 0xa2119e5722b0c1L, 0xb670264f343ea4L, 0x6910f02c19f387L, + 0xcfec5bc0381fbaL, 0x5f5de0d52c0a1dL, 0x4e474d56378cb6L, + 0x2fc802727e2ba3L, 0xa215da3159b541L } + }, +}, +{ + { + { 0xed535858499895L, 0xa0aefd565c998dL, 0x210d8502d5a561L, + 0xc2cc23ca2cd9d6L, 0x2371d46c4d297eL, 0x88b2143d18d441L, + 0xbebdad9043993dL, 0x6ba91e7ad5f28dL }, + { 0xc2bb3f13a731f4L, 0xd35cfac5d0d5c3L, 0x995099835ac427L, + 0x8938bb55458adbL, 0x0bd738cab26f3bL, 0x56db3d5a28cd8dL, + 0x87eb95fa1d8b4bL, 0xd6700efe7f3b4bL } + }, + { + { 0x962c920ea1e57bL, 0xd3be37e6dded6dL, 0xf499b622c96a73L, + 0x3eaf7b46c99752L, 0xa310c89025590bL, 0x535aa4a721db23L, + 0x56ab57819714a0L, 0xeecb4fad4048c1L }, + { 0x7b79ec4470c466L, 0xc4e8f2e1383ceeL, 0x0f5d7765750c45L, + 0xa3b3bc3725527dL, 0x2f5deb66d00cceL, 0x5d5a0f495a8d81L, + 0x50a442ee02b824L, 0xafb04462a11628L } + }, + { + { 0x72b67bc0c613deL, 0x0150d4be6f0b24L, 0x847854e8ed289dL, + 0xe08292fa320f88L, 0xd5b6da329c6160L, 0x2a48e2d4fb9d06L, + 0x55d9e412de087cL, 0x65683b54f02100L }, + { 0x4dc8c2ea8886c6L, 0xe966dd220d6114L, 0x99745eba57af97L, + 0x23a9a71b854725L, 0x8effe05621a047L, 0xf16d284049a4beL, + 0x95828c25b0660fL, 0xd5b69ba56e96b0L } + }, + { + { 0x0b5b4244ffa0b8L, 0x0585b45096cc5eL, 0x413e1aef505d37L, + 0xe5652a30c7ab8dL, 0xab32fb72990120L, 0x6b8b16e3f09368L, + 0xbf9fadbefe128eL, 0x85f366b14b7671L }, + { 0xcb2f294090608dL, 0x25e2769ac3045fL, 0x069c4f06131904L, + 0x1c57cf1329a779L, 0x72fe0d5b7cace7L, 0x04d9f430897a45L, + 0xbaf32f6359a645L, 0x0fa854ffa7485aL } + }, + { + { 0xae3533c5f56f60L, 0x9773bbb0ad9360L, 0x769b34a38fbe6bL, + 0xb5ba8e9ffb0c00L, 0xa93931875472e4L, 0x12cac92ce5f30fL, + 0x514fc06a9e7dbcL, 0xd7ca86558b4734L }, + { 0xd101ff365a730bL, 0x92da451abe70e9L, 0xfb5f94aef7bf4bL, + 0x8c3ef4c1d56c7bL, 0xb0857668435c10L, 0x7fbbbdae7ed4ccL, + 0x1da6eaf24f372fL, 0x0ab2c1f59b8ae3L } + }, + { + { 0x63a1a78f10a4b9L, 0xbb5278d0c7e510L, 0x97b224ef874142L, + 0x0a9ff52b2517b1L, 0x1b5a485c5cd920L, 0x1a8e2eba1823b9L, + 0x2b088c00e914a8L, 0xe5ec3adcf13432L }, + { 0x0d6ab3e6e7e253L, 0x9f0f5cd6f18458L, 0x839a744f459a6dL, + 0xb4b4f941eb15f7L, 0xe0313acc72cb14L, 0x58ee933b20472dL, + 0x5f73d7a872543eL, 0xb1700c5501f067L } + }, + { + { 0xb70428e085f67fL, 0x5441d5143cabe5L, 0x4d0e8c2e0a6055L, + 0x8d39a080882e4fL, 0x615bb32c1cb39dL, 0x113f18df7a1642L, + 0xbab8cf5250681fL, 0x3017ba2677b72aL }, + { 0xcd2b6e95a3a876L, 0x04765012035a69L, 0x31d6440efa2ea0L, + 0xde8f8d156874d5L, 0xcbc71cd0199d4aL, 0xc546b61e7f2170L, + 0x4e57e4e112c4c3L, 0x58955a8d1622baL } + }, + { + { 0x0064cd704e2f6fL, 0xe9d458de0edd38L, 0xeb1a5977e0a5c8L, + 0xe322ece01fc0a8L, 0x8b9d1661032a19L, 0x3e7b539a89de94L, + 0xfa30262001c754L, 0xe33de4ddb588f6L }, + { 0x4dafbdb954eb94L, 0xbb436480584c1bL, 0x622c93e5dbe29bL, + 0x968f9e3f57b931L, 0x98f03be0f6453bL, 0xb0ecc7f08f696cL, + 0x5af55f4a505335L, 0x028533efb3fa9bL } + }, +}, +{ + { + { 0x3bc8e6827e8d86L, 0x4e43b3063f105aL, 0x5301b7d4981250L, + 0x8b0a75e9f72fa8L, 0x88f59db357348cL, 0x5f0ebb1ec4208eL, + 0x4712561c043d3bL, 0x9e5ded0c806b97L }, + { 0xf9bd0a62121d09L, 0x1759ecbe337cd1L, 0xd1acc0ee945542L, + 0x3683febbd2f63aL, 0x44f1bccda5dfe9L, 0xa3606c9707f22fL, + 0x45ef0642d96ca5L, 0xfc3107d9022df9L } + }, + { + { 0xe81320b44be755L, 0xdf213d55c7c761L, 0xf43d2d5b4e5db9L, + 0x3bcfd828dedcd2L, 0xdf368a6d37a9ecL, 0xfef20aef475a77L, + 0x22f5894162c064L, 0x956bc660142a7dL }, + { 0xaaa10e27daec78L, 0x3cb9b72b6e9a78L, 0xa740bade383f72L, + 0xc31b4017759007L, 0xdada964a7afc50L, 0x6bf062cfd3d11fL, + 0x9470d535db3679L, 0x339447303abf13L } + }, + { + { 0x533f44046e5d7fL, 0xd1793e349048c8L, 0x59e11501929b94L, + 0xcddbbcb8364134L, 0x795c794582774fL, 0x114dfc4e03081aL, + 0x541ef68ef54042L, 0x159295b23f18cdL }, + { 0xfb7e2ba48a2c8cL, 0xe2d4572bb6d116L, 0x7bb0b22d750b53L, + 0xc58888cd142ee8L, 0xd11537a90c9e2dL, 0x77d5858d02eb9eL, + 0x1fa4c75d444a79L, 0xf19b2d3d58a68dL } + }, + { + { 0x37e5b73eb8b90fL, 0x3737f7a3f2a963L, 0x87913fa9de35e0L, + 0xec7f9928731eddL, 0x6e6259e219491eL, 0xb2148a04de236cL, + 0x89700e8fdd309bL, 0x9ce51e49f0bf80L }, + { 0xe7ec421301f17bL, 0xa4b570a3bc5f4fL, 0xc2b1b2a1285ee2L, + 0x5e86bc8c53db73L, 0xb65fceaf24fa90L, 0x9e74c5608ab024L, + 0x5c8003df9ed877L, 0xa632e9e4a2cbbcL } + }, + { + { 0x32a4546c91c8b5L, 0xc122b5ac969363L, 0xbbbec5e3648b3aL, + 0xd5a365e25143b0L, 0xcf3e46454157ceL, 0x9712f04f9bab64L, + 0xc12d43a04b4008L, 0x51932d72edf1c7L }, + { 0xaef1655b2f8470L, 0xaa8e3f36c24aceL, 0x7da75da6b4e761L, + 0xd371827b90bca2L, 0x84db4500afb45cL, 0xae12045ef46b5dL, + 0x91639a5d962f98L, 0x669cbe672f2ac0L } + }, + { + { 0x851bb3183a4356L, 0x7d436bf9a1bf15L, 0x46a3f0e120b378L, + 0x9302abc3f5b357L, 0x1e0672693fef53L, 0xb12f4a95fd2ee9L, + 0x94a884c7de9433L, 0x2645234a6f2874L }, + { 0x6fb56f5cdb8dfaL, 0x4a17dfc9e0ee4eL, 0xe269d8383ab01eL, + 0xda932dab77c10fL, 0x463af0c0321243L, 0xbe1d68216fc8a3L, + 0x2eae3ea48b39e3L, 0x94230213b03e7bL } + }, + { + { 0xaeb507cb22f28aL, 0xa77458b49a6b44L, 0x232ed5ac03dc17L, + 0x79dfc169c61ac6L, 0x7c48be9cd71b93L, 0x983d68ac429cd9L, + 0x7709c4798ae2c8L, 0xe4765c0a5df075L }, + { 0x23c4deb3367f33L, 0xbdf2b7e37d72a7L, 0xbaab5c70af2d26L, + 0xd609f7ffd026abL, 0x23b72b2541b039L, 0x8d06bac83be852L, + 0x911d4a9cb23d1cL, 0xeae815cfb0dbd7L } + }, + { + { 0x487c35c2c33481L, 0xffab636b6136dbL, 0xccd4daea3d3aa4L, + 0x87149bbc3704e0L, 0x9de8119c0e8396L, 0xd49357a58e7ca6L, + 0x68789181562d75L, 0xc7453815ab1fadL }, + { 0x0f1579802c9b91L, 0x7ffc3f0b1ddde5L, 0xa01d5e06aae50dL, + 0x6a97e65e279873L, 0x4bcf42fb5b1b41L, 0x1c6410f32f5982L, + 0xd4f760050701c8L, 0xff02663873b90dL } + }, +}, +{ + { + { 0xdc53ea2e5b2de2L, 0x94b352d38acecbL, 0x37d960b0d9d5e5L, + 0xabd868f90bd997L, 0x781668f35a7376L, 0x043d59710118bfL, + 0xd4da719f57928aL, 0x01942f6983e46cL }, + { 0xab97fc8728bd76L, 0x825956b4b5c1c5L, 0x202809fc82a104L, + 0xdb63e9cc8e3132L, 0xa41c701c2181afL, 0xd28018043e066aL, + 0xc734e4124044ceL, 0x4d9ab23505193cL } + }, + { + { 0x0bcd42af9f0c3fL, 0xda21a46b94a218L, 0xe55243c0ffc788L, + 0x318aae647a5551L, 0x8c2938b79af9cbL, 0x5d15232ec1dce5L, + 0x3d310ba8ad2e5cL, 0xd3d972494f792aL }, + { 0xdeb4ca112a9553L, 0x2f1ed04eb54d9dL, 0xaa9c9cf69fb7a1L, + 0xeb73c3a54dcd3aL, 0xee3eddcf5f201fL, 0x35f9e1cba7d234L, + 0x1d1d04cd2e242fL, 0x48df9d80df7515L } + }, + { + { 0x4ecc77da81dd9aL, 0xa6ac4bb03aa015L, 0x7645842bbc4fedL, + 0x9ae34cd9d6cf52L, 0xf8ff0335917e0bL, 0x7c9da37c2cc175L, + 0x1e74dccaaacfbeL, 0xa8f2df07999af8L }, + { 0xd06c4ea102a466L, 0x2156e87ae190ddL, 0xc95db8aec4a863L, + 0x49edffd244a6feL, 0x110fae6904f81eL, 0xbaa3e50a1cd104L, + 0x5bd38a20478b65L, 0x2b57d05daefbccL } + }, + { + { 0x1ce92ba86f4534L, 0xb2a8592414f5e3L, 0xdd7a4c69979436L, + 0x7599aff3f0add7L, 0xe0ce4d3e2d4f64L, 0x74475cc401a29fL, + 0xaef6541a2377d9L, 0x54048f53f917b6L }, + { 0x1b86b2205312ecL, 0x779ba2231493cbL, 0xc718369aac9320L, + 0xeab01a8617fce4L, 0x17b1f10f7187faL, 0xe68eda0a1aca46L, + 0x61033fe2586342L, 0xfc14e790b6ca43L } + }, + { + { 0x9f2231913d2491L, 0x66bdb537997202L, 0x0bafb0c4617f34L, + 0x5917831f3bb7b3L, 0x6feb2a6b45bddbL, 0x08662b30202c19L, + 0x0bc2b5705852f6L, 0x2c00fd491818c2L }, + { 0xca7672cda37dacL, 0xfe4c04c5a30865L, 0x5f1399f322e92aL, + 0xe7d67ea25b1bebL, 0xe08b014dce7f68L, 0x24df52af2f2b3cL, + 0x2028b23750ecd1L, 0x9b25d4bc810a45L } + }, + { + { 0xa35b7157a9d799L, 0x6da1eb301f9c99L, 0x33ef91ce363ba8L, + 0x21c0e2ece140daL, 0xb0b11bf158cd84L, 0x6a8744293da438L, + 0x924f10d3db585bL, 0xf5ddd7310c6159L }, + { 0xb72dcb86a74c21L, 0x6d14198cc8f79fL, 0x99f4b6c9c5a8d6L, + 0x063968890e135cL, 0x330edb883f6385L, 0xe1a5a6b9079675L, + 0x6e37fa8b8f5fe0L, 0x60e2fd961dca1eL } + }, + { + { 0xc6cb40366c395eL, 0x03b21a7b51d0f1L, 0xbc478a5e693181L, + 0x0017c2fc6cff33L, 0x740a5b839d8d1eL, 0x3968d664d9ec6dL, + 0xfd53738b0ef1b0L, 0x73ca8fd1ed0a04L }, + { 0x4ace93875ab371L, 0xd602936ddad7e9L, 0x1f5424a750bcc2L, + 0xfe09b3668c7a17L, 0x165f7de58341ecL, 0x95b825a6ce61e5L, + 0x9d31e1966c83c4L, 0x65b3e08cc5887bL } + }, + { + { 0xd37e93221482d1L, 0x9af659708b6380L, 0x279426a7d61e4bL, + 0x80dd0ec80997adL, 0x7239b0dd5b76d4L, 0x92e6c73e76c098L, + 0xeeb2321eab3e1dL, 0xa69c4a7eb1a910L }, + { 0x46d6aa7833d9aeL, 0x3ee6957572b0feL, 0x44ccbedcdb3d97L, + 0x342f29dcbea01bL, 0x0d518c58926876L, 0xaaabae75585d2cL, + 0xc548c77e008f58L, 0x819e2fa21fab2cL } + }, +}, +{ + { + { 0x468e149c16e981L, 0x286c7909ddbb7cL, 0x2a92d47db7a38aL, + 0xde614e68a27cb2L, 0x8dc8822e5b0ab6L, 0x38441aecf48565L, + 0x11ed5c9089435bL, 0x238928682d0d31L }, + { 0xc6698d472f2f31L, 0x295242c56d76afL, 0x4099205eba563bL, + 0xae7de5a3ab7384L, 0xccdf127d0ed86cL, 0xb9b6d5b965c3c3L, + 0xe351a8f2c31ad7L, 0xa761dd8ac12f13L } + }, + { + { 0xda115ddf171ab7L, 0x2de17b1401f93dL, 0x95019ca40964b4L, + 0x169d1f465ba3c3L, 0x534a0070090d08L, 0x805c5e282bf410L, + 0x15dfe1165f8d90L, 0x827a416ca72456L }, + { 0x5af888433a36c4L, 0x8bfa54cd8ee604L, 0x08fd1419ce290fL, + 0x2db5e8c287b3a6L, 0xe5be98103cdad2L, 0x155b874bf810b9L, + 0x2ae42de670f473L, 0x22185847f74657L } + }, + { + { 0x54b2a5023ffa43L, 0xcf87b16a24d919L, 0x1ff540263524e8L, + 0x73c94e056d1e54L, 0x76515523899fb5L, 0x13a721418723bfL, + 0x39afbdd3561517L, 0x49b790a9f2862eL }, + { 0xc8c1f4f527d2ceL, 0x1997aec7609bb7L, 0x583ad8002a3400L, + 0xac2374e4f79706L, 0xbf1f9a821b7183L, 0x06158ab6600fe0L, + 0xfcc9b2ebd56751L, 0xe1de5acddaaec7L } + }, + { + { 0x230baa1788fdabL, 0xf30860a7d04597L, 0xa2c7ece99f4caaL, + 0xbd39f106ad065eL, 0xfd92f5d3bef7bdL, 0x6069fad96d2203L, + 0xbff38cac4d9e0dL, 0x419a0171fda313L }, + { 0x5d77fd8572f035L, 0x5af99f2b282b40L, 0x7257d3b23facffL, + 0xf2ee22358c90afL, 0xcc2687d9b6a52aL, 0x140892c302430eL, + 0xa934d5e3ec4f38L, 0xc087d7c3bd18beL } + }, + { + { 0x7e94138a2c5ed7L, 0xbc8ceef53610bfL, 0xe89356bd86f803L, + 0x9a3a3805a55330L, 0xe894aba11ad648L, 0x2e68fbaba95918L, + 0x643e2bafcad344L, 0x0dd025661640aaL }, + { 0xc02e479e25cbddL, 0xd78c4d813a1b3fL, 0xa6dae8fcca9692L, + 0x3dd91e9e5de8a0L, 0x78ae0ce764ea36L, 0xb4ad99985dbc5eL, + 0x967ff23e82a169L, 0xaeb26ecbaee1fcL } + }, + { + { 0x8c502559a6f90cL, 0x56e7abe0ea374aL, 0x675c72256413b2L, + 0xd3fc17e946753fL, 0x28c4e1fe235f7cL, 0xe209bcdb028eb0L, + 0x7d0f93a489fe88L, 0xb966a2e063706aL }, + { 0xb6c228c4a30319L, 0x6868efeca6d674L, 0x0610a70057311aL, + 0x0808112bad7f89L, 0x2a2462c1dd6181L, 0x52ed9feb58e88aL, + 0xbbff16f33821a2L, 0xda53e9617f882aL } + }, + { + { 0xb6ffca38c30e5dL, 0xa90f9915c905f5L, 0x72fb200d753e88L, + 0xe509d4c7256c6aL, 0x369e552d866500L, 0xee4b7e033cf8aeL, + 0x280d954efcf6ebL, 0x5b275d3d557f0eL }, + { 0xeb17211b5cecf8L, 0xd6ad50fbdb2f8dL, 0x2478c7b35e04b7L, + 0x97e7143ac73bd3L, 0x09d6ede4817e24L, 0x68fea712c405e1L, + 0x34adbc905f67a1L, 0xd20ab7073edf99L } + }, + { + { 0xe116a96569f191L, 0xb3f0bce4d6e29aL, 0x30b9e1af51dbabL, + 0x1dd36f3346d276L, 0x83151030749a27L, 0x242f148ab47f70L, + 0xe8a5bcf5585681L, 0x8b801845ed79baL }, + { 0xa4042fd3894ad1L, 0x82f781d2b88bc6L, 0x2d34cacbe4c397L, + 0x8731aeadd99c9fL, 0x0f95498ef1d382L, 0xcaba2e1dd0bbc9L, + 0x78889e954064e8L, 0x8cd9c9761a8ab9L } + }, +}, +{ + { + { 0xf31f53ffa0459eL, 0xf8742a1315cd6bL, 0xabe2f50ae64e97L, + 0xbd787419b9da48L, 0x4521a3351e526eL, 0xfa05935e10ba45L, + 0x5c947e1e8f903cL, 0x0aa47d15a754eeL }, + { 0xb2849efd814825L, 0x9c2a5d25c9968dL, 0x24dbb2604e634cL, + 0x33f3a4cdb38194L, 0xe04f609c8a2b6bL, 0xcaefd8eabbbfdbL, + 0x683119a404498bL, 0x24ab7a98b21cbdL } + }, + { + { 0x6f1326921fa2ddL, 0xd79e61cc10a4bcL, 0xac4b3ce4bd6d46L, + 0x52459b6bd3f37bL, 0xce0f0a3a396966L, 0x050d1d5a1ed488L, + 0x1b9c403e0b17faL, 0xee1abd004a2e66L }, + { 0x97065c35cf3e3bL, 0x6513d5fbe33441L, 0xcd3463479047aeL, + 0x45cbb1cfd22df1L, 0x7a173ae967b17cL, 0x75f5ba72223cdaL, + 0xe3d12dbefe0a73L, 0x3b7f94dfd7adcfL } + }, + { + { 0xd596a13f1e9b7dL, 0x04f5bdd6734e0cL, 0x18b694f8be163aL, + 0x15620c7d959fa3L, 0x65fc2c553d2a3bL, 0xd44a364c4d36f2L, + 0xc8b421f268ceabL, 0x564139abfe2bd4L }, + { 0xb52461019d4633L, 0x5ab3f886346934L, 0x96691fe9819422L, + 0xdfdec898b39b82L, 0x84b1c7997cfb27L, 0xe59a98d4d6d004L, + 0x5e5d0c612c350fL, 0xb431220d415774L } + }, + { + { 0x3d0ca736aae0a2L, 0x7b1991f48c2d8cL, 0x00ae8565cdae72L, + 0xdbb6ca0bd55128L, 0x3c2ab2a45c82bfL, 0xea5a55979545caL, + 0xeba9a26d5927d0L, 0xb52e40183257fcL }, + { 0x55ed517ca9650aL, 0xbdaa081e3ebff2L, 0x8cf7ce49f8831bL, + 0x1d0b5bd6e3b8d3L, 0xa314a9fd8fc869L, 0x07f2079b892babL, + 0xb700dbfa0cc9d9L, 0x7105a086dc0a39L } + }, + { + { 0x0c7e05d8c7d901L, 0xa7ff681af3182bL, 0xb88e3caf9a0d06L, + 0xfe20a12c343b7fL, 0x9f0257703251f9L, 0xf225dedc40c5ebL, + 0x50e0cecb208ea7L, 0x5b250f0e6eeb65L }, + { 0x807a1534806b6eL, 0xded120afa94139L, 0x237ddc749366fbL, + 0xdd3674e5a34bcbL, 0xef6cdff9c4a61dL, 0x036194bb2fb896L, + 0x38659539528cd9L, 0x0723c596936a52L } + }, + { + { 0x1f84cd5e17719dL, 0x545939bc73b394L, 0xefbf3c583e84e7L, + 0x6cc46f1f77fd66L, 0xa629f591383ab8L, 0x9177ffacd35cd2L, + 0x039187f9dd411bL, 0xa9cf1cf7b7eea8L }, + { 0xa3b105aac47e5dL, 0xa755bead0a9da4L, 0x50cfbae73da15eL, + 0x9456cbc60b628cL, 0x7ffc3629b7a910L, 0x30b5924cd6d6a4L, + 0x198629f0b04ab6L, 0xc74609c624dea9L } + }, + { + { 0x27d4d77af12fa6L, 0xdd8a216690aeb2L, 0xe48fc02fe24417L, + 0x1970403720e17eL, 0x95013fdce37b42L, 0x06817d2de4bd9bL, + 0xc5863e763d0ba2L, 0xa1bafc0a556f5dL }, + { 0xf28ec7b410a78aL, 0x0dcac420a01a63L, 0xfcd3fa4b5bce11L, + 0x054d7e5d278b89L, 0x5195db85ce49e3L, 0x4c0b1672c73d96L, + 0xd94307720a1bdbL, 0x66fa8b359c77a7L } + }, + { + { 0xb9e93aed7462feL, 0xbfe54b218dde4fL, 0xaabb5283dbb08eL, + 0x8c367020e5fc45L, 0x35028888e69be3L, 0x6d2efc1c12a11dL, + 0xfce5cebf265e30L, 0x58c8bb35742c7eL }, + { 0x32e89dcccf7fa0L, 0xa811f33dd020a4L, 0xa10d6205129fe5L, + 0x3841c88e4ed29bL, 0xf3303a9d8b1ea6L, 0xa9a0cad1781f58L, + 0x4502b388f3ef0bL, 0x2b7587e74c6d35L } + }, +}, +{ + { + { 0xc6eaea123ae7cdL, 0xa1884d473c0caaL, 0x901e76fef1ea88L, + 0xdb9935ca14269dL, 0xe8b2486947f1deL, 0x4ad56f4a657588L, + 0xe7680542913fb1L, 0x2abff5d37600daL }, + { 0xa814813a81a797L, 0x63e76a446acb69L, 0xb1038394ab8277L, + 0x587de349d8e759L, 0xdfaeb8dddf62dfL, 0x24fe1cf9239d49L, + 0x7de7409e130d1cL, 0x3ecfef9581d070L } + }, + { + { 0x8d177a0f87c72dL, 0xae7e5818c6d1deL, 0x0077b5f8cece85L, + 0x382483832d2187L, 0x49d8b156db2bd2L, 0xe9e5513c8d85b9L, + 0x63c410ce05c53fL, 0xceaf2fbd86f752L }, + { 0x0b432fe93806c5L, 0x18eb15d3d06c75L, 0xcaad82612cfc02L, + 0x581e0401e2d045L, 0xd573cb595edcfdL, 0xce71948dbc66e3L, + 0xcf68721acc14eaL, 0xf68bea26cac4dcL } + }, + { + { 0xd8576afcb74da2L, 0x8771c29c433f46L, 0x7315af6e2f5b8eL, + 0xc195481ba33928L, 0xb77dcc22fb1f94L, 0xcb3e57ca610f75L, + 0xeb2a92753907dfL, 0x916f14923eff95L }, + { 0xbb378e4b6cd291L, 0xa2a5e2b2f13ce1L, 0xa8a0e60bcd00b0L, + 0x5902741682b75aL, 0xa0882c93f65a77L, 0x2069f75c93cfffL, + 0x1ede40570c0cb9L, 0x13840c90d526c4L } + }, + { + { 0xdc2caaa03ced48L, 0x2079219a0315beL, 0xca493563b1f642L, + 0x0202dc7b0665f2L, 0xe5d6bbdb7a5238L, 0x36fbd5e26eab32L, + 0xb3988f1f5819b4L, 0x5b15dc84aa4d69L }, + { 0xa52feed54e5c24L, 0x927471be91a797L, 0xd119bfdd57f677L, + 0xde38f7b78e4c4fL, 0xa7af516b150bc3L, 0x403b21e26b76c2L, + 0x589067d92300dcL, 0x04e406a066802aL } + }, + { + { 0x28e7d09a9ca9bbL, 0xaa84fd5fccf4a0L, 0xdbe9fb8635b7edL, + 0x9ede3f5d56fc7cL, 0xa4b5031b01cb29L, 0x584299d7f93703L, + 0xbd28868b6fe825L, 0x1d385d48b9c2d9L }, + { 0x6606f4a822be80L, 0xb5a0165626d0fdL, 0x9920a2014568adL, + 0x7d430f41c6d174L, 0xc243e16e02e9e9L, 0x367f1d2a6bd649L, + 0x693910071b8c36L, 0x2ede1314de2984L } + }, + { + { 0xdc781875beec32L, 0x1fff0cca525ff4L, 0x6e86425676df34L, + 0x2b4e8a63f638e1L, 0xc4991d29b1e59fL, 0x399d0011589717L, + 0x406464ebe041cdL, 0x901cb3d9e65bb0L }, + { 0xf5f4572fb42307L, 0xf81b3b0f1b7307L, 0x8fb695cf2094d1L, + 0x7db4792db56f7bL, 0x36836d55a794e0L, 0x2da477b09bc879L, + 0x1cdfadb1887c40L, 0x65dc6c2f2699b6L } + }, + { + { 0x36f9f214737972L, 0x48f0c8b7a387b0L, 0xa156ed339a1d24L, + 0x375293a0fed268L, 0xf679f487ff75cbL, 0xd15a00f1cc9e62L, + 0x92a7dc722c3877L, 0xe9870636fb0ed4L }, + { 0xfd8e59c16f5f3cL, 0x375732eaeeb48eL, 0x2dd9213ca1ab42L, + 0xcb062099ffcceaL, 0xfc611f6b23edfdL, 0x271634999b060eL, + 0xb938b5d820de8aL, 0x138f6e7eb49a32L } + }, + { + { 0x7feda63e485f70L, 0x646380aeb27b2cL, 0xcf8fe32c4511c7L, + 0x2c68e1eff9406aL, 0xa9f2fd920b6020L, 0x1c98fc63b3e465L, + 0xb8dac3593e53aaL, 0x2fb47b6a750e96L }, + { 0xea373ef1950bb3L, 0x81566944ac7aecL, 0x8d6b3c2b55b931L, + 0x5d13f2db62ef7dL, 0x4647f2aab9182bL, 0x8f56c5a33bf07cL, + 0xc5ab284b35a221L, 0x0747ab75a46a6bL } + }, +}, +{ + { + { 0x5b9236c86b85c5L, 0x5967a0dc482448L, 0x397c9557df6ae0L, + 0xf83ee1c5378f2bL, 0xf82df656e05dd1L, 0x4c424f619d7c8bL, + 0xa612550a6d5f2aL, 0xfe8482a63c3ebfL }, + { 0xcb8d4030142c82L, 0x08b06623679e6cL, 0x3ea51463eca5eeL, + 0x089eb3b1370500L, 0xcbfb19c5a0d306L, 0x2f6858842a65bbL, + 0xe3e1db5e51e119L, 0x2c150e7110895eL } + }, + { + { 0xf323488f6d4c4cL, 0x5fc931f63b87e2L, 0x8867da035c759fL, + 0xb6f1eff9746d4cL, 0x8a8172d990be0aL, 0x1113eee5c407b4L, + 0xd80dacf378ed8aL, 0x99b57cf3fa7fd1L }, + { 0xf5bb6d95176405L, 0x6b8963a92e83b5L, 0xac55b6b8a7ef8dL, + 0xe73fa126c1fbf0L, 0xdb3756060148dfL, 0x72f1a98f3f1fbaL, + 0x1f71d0aea550f2L, 0xc3ea4f09544a87L } + }, + { + { 0x5b09da24322bf3L, 0x2a573d561264e1L, 0x93cb2e1803acc4L, + 0x397b4fbe502fc6L, 0xddfb21239e0ebcL, 0xeccd8f5bbcbc57L, + 0x49d3bed4663788L, 0x37192aa1218df9L }, + { 0x8a05bc92ffa3c6L, 0xc38c28123ebf4dL, 0xc80d547fe343a8L, + 0xa8d5a5b6c63516L, 0xc5d8ce18d8fa6bL, 0xeb5e87224a87c0L, + 0x9806e9e75bfa23L, 0x11f0889689469aL } + }, + { + { 0x81005f68e75666L, 0xb84d861d349505L, 0xe0832829f321eaL, + 0xb751d7acfa33a1L, 0x793cf6f067c550L, 0x073a6b21027e56L, + 0x53f40ee66a6012L, 0x70bfaa8c210fa9L }, + { 0x1518e39e4b5998L, 0x8f0b53024b8d9cL, 0xd91c281afdf923L, + 0xc5cfb2824e3f69L, 0x63a529a870871fL, 0x3d3e8872128dadL, + 0xed658dccb30cceL, 0xf9373b9afb7baeL } + }, + { + { 0x22d4dbede58ed2L, 0x4fefc1d03f8789L, 0x6b0a1fe344817fL, + 0x96bef40a56b0b2L, 0x32684eeda249faL, 0x8298864524a91bL, + 0xa958baf0c736a1L, 0xd033a7def2f3e5L }, + { 0x5be3edc43f4d6aL, 0x326a39d9c89abbL, 0x90c44f755d997aL, + 0x20581066e966c2L, 0xdbae4906548038L, 0xac7bc97d473fc1L, + 0xb34488b4b2603aL, 0x27aea275e9bb98L } + }, + { + { 0xa59e7281b88773L, 0xe2f05d40c241f6L, 0xa56229e4e75749L, + 0x8f00c0b1b10705L, 0x855994619394d3L, 0x0d7e352aaf5e32L, + 0x526c462787b8eaL, 0x89297d9a179d48L }, + { 0xeff17e6ef43892L, 0x17091eb221f841L, 0x82f5eb34a4b848L, + 0x6bea4778eb7b76L, 0x21f227176c536cL, 0xd9ef2c896c81bbL, + 0x7c2754654bf4d3L, 0x9dd4662d7c28c8L } + }, + { + { 0xe7fff0020e1a6bL, 0x26a35c6a08d467L, 0xb3c773d3248c91L, + 0xa646615ba7d935L, 0xa91f453b0d26faL, 0xdcf9c3460c6d32L, + 0x63668619e3e3dcL, 0x3012813f30f3e2L }, + { 0xac6623dc2fc61aL, 0x108dc252bfd2ffL, 0xd7f5c0d231d6eaL, + 0xa904f9aad1107eL, 0x46941c20d1e9c8L, 0xe5b6451c810cf2L, + 0xaba8e674f511d1L, 0x5b4b94f08373feL } + }, + { + { 0x002d4e2849c230L, 0x9bed0efd8ba391L, 0x745e0c0828e319L, + 0xcd40907ca58de2L, 0x2c87ab11abaa4aL, 0x3c17a97db64391L, + 0x36b184e86c72d2L, 0xb03d202485f7aaL }, + { 0x2b6b79bde24abaL, 0xdcb78542325fb2L, 0xf5d1db966ebae2L, + 0x35a4d5b903840aL, 0x7afeb09190e9daL, 0x1818f6a35c1792L, + 0x90091fa3faa269L, 0xc4ccff62570235L } + }, +}, +{ + { + { 0xa177619ec85940L, 0xfca24db7ef7eeeL, 0xb2450f37a90c11L, + 0x29d256ddbf4f85L, 0x920c8d051316c3L, 0x2f7f7ba04474daL, + 0x308117f2ec9a0bL, 0xd0a231ad0d2085L }, + { 0xf3288fc7ab641dL, 0xc68bade9f4fa32L, 0x768f014bbf8253L, + 0x5eff260c0a33f0L, 0xc71b4536bb93ceL, 0xa71d045680697fL, + 0xb62444cce72bc3L, 0x11f03e8d1379f3L } + }, + { + { 0x1f54789c16df92L, 0x874c642e3ed142L, 0x6699f60fa2a9f1L, + 0xbd1b8d33fecfc1L, 0x59682d58a3d953L, 0xf17c0214a36b81L, + 0xeb9621d181a666L, 0x7c2c3ab3cf1ad8L }, + { 0xe6888c3e529f7cL, 0x197b66ab355315L, 0x63b558a83e31acL, + 0x4aa7bc5891c68eL, 0xc17d989592e360L, 0xc750a291363666L, + 0x0d534704909ac0L, 0xd6d02724594a10L } + }, + { + { 0x35c541b3fbb635L, 0x50016d05982afaL, 0x58ebce496b0ca0L, + 0xb940027577ea56L, 0xf29d305e38480fL, 0x43705b0ebd6a2cL, + 0x0e4acdae90c639L, 0xbe94a29f56e05eL }, + { 0xc61f4a030659adL, 0x39074adc402211L, 0xfe0d8d551b621dL, + 0x2d02e8dd1d5222L, 0x05ece3c46c2683L, 0xf70705ac689d41L, + 0xe3caf444d837bfL, 0xfda058475ba6d0L } + }, + { + { 0x1098163cb7d458L, 0x12b645ff5ba834L, 0x70a318128af72cL, + 0x5f4727ef32e5ddL, 0x7cbae1510a21b4L, 0xa80bf806785389L, + 0x9827402b8f93b7L, 0xe385f8208349daL }, + { 0x2d054619589f6eL, 0x6aa5b26e7c0191L, 0xe79ae12bd5574dL, + 0x5d13f914148e61L, 0x7b2be0f13716ffL, 0x82b0fe680bb81fL, + 0x697633c3e2569cL, 0x6c1f083873f8b3L } + }, + { + { 0x6e26d850be1674L, 0xe4e47f6ab8044fL, 0xfdf46e882fc434L, + 0x639ae2cc89cadcL, 0x2244a524b85bdcL, 0xb1e4790b7cf4eaL, + 0x51dce037e0bb8fL, 0xdd143352716ceeL }, + { 0x1c049b48e8841dL, 0x6bf26dcb97c621L, 0x21d6255ba01178L, + 0x477258a8e4f0e4L, 0xf5e437e68f8ef1L, 0xd118fbc8b03e1eL, + 0x3d6bc51e1c91b3L, 0xa259486d5b6907L } + }, + { + { 0x4159cfc7b6f5dcL, 0x05a52b3493694aL, 0xeeb511c83b8883L, + 0x19d79e42b06400L, 0x8e503a2738f37eL, 0xa30e5795a94ad9L, + 0x3981c75262618dL, 0x06b6c692dcba19L }, + { 0xd7242ee4d1b051L, 0x6274ccb3b350c4L, 0x66df0bbf540019L, + 0x4d66be65ae12d5L, 0xcea29601049cbaL, 0x40473398df84b3L, + 0x7d6c96b75a31c8L, 0xbb80159874174cL } + }, + { + { 0xf0f7be059f1aa4L, 0x798f39adcff451L, 0x96763ff8014e1eL, + 0x03987a809cc5ecL, 0x4919656893650aL, 0x92e8eef75e24dfL, + 0x54e97cde89d639L, 0x8081d067682cc0L }, + { 0xb9ef41aa8ceb71L, 0xb8173a4a4d7aaaL, 0x93d81b1c54ee10L, + 0xabe180570a445aL, 0xac0ff9764d569dL, 0x86946b23e570beL, + 0x8e11dd24180641L, 0x3d0b33c99f67dcL } + }, + { + { 0x2c9637e48bf5a4L, 0x9fdec19ccaf112L, 0xe5cde9d5c42023L, + 0x9869620878f0ccL, 0xcf970a21fe6ebaL, 0x1df5ec854e678bL, + 0x4667f0128d00ddL, 0xfa7260db0b3fa8L }, + { 0x6bd2895b34239bL, 0x04c8bc52d2a50dL, 0x14e55ef6cb23e2L, + 0x6440c273a278d5L, 0xf4b12e32193046L, 0x46adf645dd4c08L, + 0x70e29984656e8cL, 0xe7b36eae4acd44L } + }, +}, +{ + { + { 0xea64a5716cf664L, 0x8497ee426fd357L, 0x44d94b4814e851L, + 0xf4aac225a6a2cfL, 0x947b30980c301fL, 0xf390ba17865383L, + 0x16c4fc6d1773d3L, 0x61b98146227220L }, + { 0x07dd03a1dd0270L, 0x290ca820f160dfL, 0x8f2205444ba955L, + 0x4e85e450b6f1b3L, 0xfd73ce9ad78089L, 0x67c12702f2cb0eL, + 0xa7de0d7ee33a61L, 0x6a811cc6553261L } + }, + { + { 0x5ef05742d0a427L, 0xe8d2e95220a341L, 0xdd28cbf8044886L, + 0xdad7b4ba1aa58bL, 0xb28f3738ec901bL, 0x1841a935bbe3dbL, + 0x8fd7cd1a075feeL, 0x93b603fc0d3cddL }, + { 0xca54fd55edd859L, 0xa4cb05f64ed687L, 0x3138668ed1a3d7L, + 0x1224fdaee32be5L, 0xf1f532bc80aeb3L, 0xa4f65d0e8d4d69L, + 0xc697a015905fe5L, 0x514da7a6690ce4L } + }, + { + { 0xc7b9af83de4a55L, 0xc79bad7b318d93L, 0x1808071f5b1c83L, + 0x92112efb965b16L, 0x655ab387bb740aL, 0x53dbc8b384ff87L, + 0xd153c2872dc6f2L, 0x2ec20e199c7819L }, + { 0x65e46ea3b854b5L, 0x272d5aec711db5L, 0xfd1bb5326e19e8L, + 0x33280b83dc0665L, 0x95b986eb8f1c4aL, 0xa671fc4a685c4aL, + 0xa03cbd583bdbbfL, 0xd329402ab77544L } + }, + { + { 0x40fa6518e62b35L, 0x3913b11f9e55a6L, 0x4e8089b5270a41L, + 0x565f52a80d1886L, 0x93b5f05512749bL, 0x35c869c141c547L, + 0x9a44a1af86717fL, 0x2b9984b9c2b2cbL }, + { 0x61fb6074952322L, 0x2d4072f7af1464L, 0x9b2fa8c600eb30L, + 0x6071fb7f10668eL, 0x27cc24d90634caL, 0x3875bc2471d32bL, + 0x678590ba11210cL, 0x352b447fcc5a9aL } + }, + { + { 0x795d5415fa3200L, 0xadaa557a92949fL, 0x42fff063cc88c4L, + 0x26d683171b68a5L, 0x3286549e67ad8cL, 0x5bf636386396b2L, + 0x41229b6e12c8eaL, 0x05320c9748952eL }, + { 0xae36b63900b460L, 0x9354ff2f2b6affL, 0x10b810b065ee0cL, + 0x4d6925fcc8bb38L, 0x31c03fd7a22f14L, 0x76b7f4457544e8L, + 0x3a9123cc0eed26L, 0x77acd67e0cd1ccL } + }, + { + { 0x2e9053007ec527L, 0x32388ef62937cfL, 0xa445389e229188L, + 0xa44b68e33bcebeL, 0x5a8722e4c4e701L, 0xfd066e8cf07e41L, + 0xa3c1a4f95fab62L, 0xb4d6a1be542f24L }, + { 0xe6a92e4af6c9b5L, 0x9452484c83d61dL, 0x422b55b0062276L, + 0x261973a5279688L, 0xde8be263999fb2L, 0x64e96287b029caL, + 0xd8edfaa06897d4L, 0x408319c6955511L } + }, + { + { 0xff6baed50a5632L, 0x922b7d05c5885aL, 0xdf0f3b31b45864L, + 0x27e49c0c04340eL, 0x618c566122c447L, 0x7863a38eafee7eL, + 0x7143affb828cb0L, 0x51fcf4cf9d054eL }, + { 0xc4a4b3127f5e09L, 0x021f47a90be2bdL, 0x1a060197ab956dL, + 0xe77fa1586ea86bL, 0x9ccde87d550ef3L, 0x7dee53a6532654L, + 0x8b4f060e826387L, 0xda38637ad077b5L } + }, + { + { 0xbc901b30e9fac8L, 0xfa082046fb2a2aL, 0x92f68ab5e04efcL, + 0x184a30a9ac12d0L, 0x1aa11aab25d479L, 0x8bc5f4c0f03161L, + 0x7e3a083cfc8817L, 0x84d9355597f93fL }, + { 0xc014478239abc6L, 0xb226b098d37b04L, 0xb056942f575789L, + 0x816b95aba745ebL, 0x2a49d39b98ddb6L, 0xc41ca26291af81L, + 0xb3afe99ab26347L, 0x59c31bc604b638L } + }, +}, +{ + { + { 0xa16a8b9c42befdL, 0x731c9c92052f00L, 0x1ad49b41f5dfa0L, + 0x7a289e3bffce36L, 0x868fac00c79cf1L, 0x6d6d28486721abL, + 0x590f928e726c94L, 0x0e802cb51f3841L }, + { 0x6a6a57a0b694bcL, 0xb9bb0cd8120fb8L, 0xad96ac79c05826L, + 0x294da8c7768df0L, 0xfe32311b56c6c6L, 0x291c2c6ae8d050L, + 0x1c765e7e7db4c9L, 0xe058298d65f9f7L } + }, + { + { 0x4bfa85b7e8d345L, 0xa04ef95de1dfc8L, 0xb5f7f21324ace3L, + 0x4b350a1574b14aL, 0x11436bff8e5c8dL, 0x1c789f97642369L, + 0xeb5e335fb623ceL, 0x9deacd2442d562L }, + { 0x4ff989f531ee71L, 0x43e2c49aacb52aL, 0xa76319885bfadcL, + 0x08b6d5cd0161a0L, 0x010e3fa541f197L, 0x83a589e3279a16L, + 0xf0991376309f9bL, 0x07c093bf1cea10L } + }, + { + { 0x1ce3f0f33d2192L, 0x07b559ac37ce73L, 0xaa2ad38207be27L, + 0x84f053b7ed93deL, 0xbc5c7973b98a4bL, 0xc92346163aa9b9L, + 0x807cc16231a10cL, 0x8ffdf57a061209L }, + { 0xa9ca741497070fL, 0xf608ec9d113b3aL, 0x51327268d0384dL, + 0x96686acf5ec307L, 0x437bbbd71c4665L, 0xdef09d57c379caL, + 0xf8be033621747cL, 0x2775b378ae8047L } + }, + { + { 0x4009798b2c4fc2L, 0x148d7d1203772eL, 0x9d9392df8423fbL, + 0xa5bd72eaf8cef4L, 0x579d58d4380b53L, 0x2ff88f18c39d24L, + 0x9ca2fbc5706466L, 0xb42987d1e56af2L }, + { 0xcc2556e5d94ea8L, 0x4e5c2b35369d76L, 0x5de35742a94f9cL, + 0x8d068c95cb4145L, 0x4d553ff51bfcbfL, 0x3ab71648a23fceL, + 0xc9cb3a9d0fa7f3L, 0xf81209bed9ced1L } + }, + { + { 0xde7356ee5b66f5L, 0x7b2bf1ae8a25e0L, 0x09a444a2c9b725L, + 0xfd8a2f44906c55L, 0x409cc8082514f3L, 0x47e009928999a9L, + 0x0a582a66a312f4L, 0xf7946f8f6723deL }, + { 0xa55f6ba92d8affL, 0xb62c3c8a544b1cL, 0xa1d14115c16a94L, + 0xc3783192ad5e71L, 0x13d784706b1dd6L, 0x99005f8ee7ff55L, + 0xfb5ea3f8a1e7d8L, 0xdc7f53cb4cac39L } + }, + { + { 0x482abaf36e3794L, 0xc23e9e5c74684fL, 0x4544cf6f1629beL, + 0xd8a8ee52f40374L, 0x2eea87ff433bdbL, 0x489a99cae9990eL, + 0xefc131e54b23b6L, 0x25fe6998600270L }, + { 0x03d2d9ec059a7eL, 0xa6445b56979c3cL, 0x491a10c9bfbceaL, + 0x15b5974e937af1L, 0x4be8002797c7fcL, 0xbed8a49fedcfeeL, + 0x35751cea9e0691L, 0xe9a9fa39ef5982L } + }, + { + { 0xeffeaca3065de7L, 0x841d544ac4d4e2L, 0x8144679caf199fL, + 0x98cf4f9443967aL, 0x8cd57f4f33183cL, 0x390832ac1b15ebL, + 0xc4b1feaa53b500L, 0xd762a10dff24b5L }, + { 0xccd3eedb0ee2a9L, 0xa6dd4a9362d485L, 0xeb4ff26f1d047aL, + 0xc0771fd23860fcL, 0xdbb4e394b64114L, 0x2ff3f244d29b29L, + 0x9cac005387b365L, 0x05b7aa6de5994aL } + }, + { + { 0x5e71752c03dd63L, 0xad10fe9bc74687L, 0x51a5b0c54c76abL, + 0x763fd501f586d4L, 0xc7bd5ce816048bL, 0x8fc83d23f744dcL, + 0x0561802109df9aL, 0x18fb01fccf0e43L }, + { 0xe4606fc038ab23L, 0x5878f1fa664c98L, 0x3aedbbd5da7356L, + 0x3c578f5516746aL, 0x259477f1a17210L, 0xc7a869d028248fL, + 0x6517a6148cbf95L, 0xbc5f91d3d04d47L } + }, +}, +{ + { + { 0x15fd9a9083ca53L, 0x1161da02697ca6L, 0xf516af356b676cL, + 0x8a420d575eec13L, 0x72d67421a9526bL, 0x8d8c29e76b463fL, + 0x38a4f588815627L, 0xf7e528be0650f9L }, + { 0x2cfa78e382edcaL, 0x638d183c4ad83cL, 0x96d3b9de4a0119L, + 0x5769ccba7c1101L, 0xc3b3b792b8d04aL, 0x96212f64951bdeL, + 0xad7905a481161eL, 0x8fd676241c5edfL } + }, + { + { 0xf7b063539d6cdeL, 0x69d0549115a84aL, 0x4a976c6cbd9fe4L, + 0xc92953f950ff96L, 0x1d7f0fe654d127L, 0x7293870da0f75dL, + 0x7bb3652cf2277fL, 0x64798c9834484fL }, + { 0xb94d8bfac3a76cL, 0xf5721a97ff776bL, 0x23a6e9f2722e31L, + 0xe9da9969a5c034L, 0xb9bbf83456ebc3L, 0x239f58a96956a4L, + 0x8b75beb18b7f00L, 0x6c2b5b8a51cb97L } + }, + { + { 0x78b1c627eb41f3L, 0x0638fcf17c4352L, 0x939edd80c5709cL, + 0x0a8dfc3edc906cL, 0x3942f47efb01edL, 0x4c8275749986feL, + 0x792545c4dffa57L, 0xeee68836c3ff26L }, + { 0x824d08e12b1218L, 0x515a478902457fL, 0xc70cc9cbae55b3L, + 0x1240737bcef9d4L, 0xf22e6162f9db7fL, 0x98c4f0291f8da2L, + 0xa89219cafaaa67L, 0xf35fd87e7d27e2L } + }, + { + { 0x19b0cd701b80d0L, 0x3d7e29df9aebd1L, 0xd39c9ca0477cbcL, + 0xac0f6155ff0d3dL, 0x8a51993520fd01L, 0x508ff54b22d6fbL, + 0x8786c47318d3abL, 0x4312c464a683f8L }, + { 0x73b1d3995359f6L, 0x0d94fa5963011eL, 0x5723af29bfe83eL, + 0xafa90016841df3L, 0x791e92ab7c498aL, 0xbc931ad7ea4253L, + 0x438e016b783c06L, 0x1347db22ca662bL } + }, + { + { 0x41df37dfbaa861L, 0x98ecb23329e4deL, 0xdaf1560507e018L, + 0xa902269b088e32L, 0xad898a5e4cab2fL, 0xd84e9ed02c1e1bL, + 0xc20a5d58488af3L, 0xc7165af6cc77c6L }, + { 0x8526f3adeb7461L, 0x03577b14a2d332L, 0x28e469de4760b5L, + 0x442c7f9b276266L, 0x90d5c77f9c90faL, 0x7aa87163e211bdL, + 0x56d8ff05decfd6L, 0xa204b56ee23e6eL } + }, + { + { 0x2e4374e4aceafcL, 0x978743b6fcd5e5L, 0xa0f6345c4855caL, + 0x9bc7e4fe98074bL, 0x3835d57c33d08aL, 0xeec7c8b6f00566L, + 0x71628a21acf55cL, 0x5da375097fb19eL }, + { 0x6904a8e01a7125L, 0xad33c85e6e3780L, 0x1702928c19f94aL, + 0xb424ff27c04b3dL, 0xb212e3919e2ba3L, 0x4cca8e8c9af4c9L, + 0x98ab7aefd9bf0eL, 0x21d245d9799db5L } + }, + { + { 0x6b034dcec08806L, 0xfd763f2b40f2d9L, 0x5e16de029cb906L, + 0x02b70148a0e16aL, 0x463c8eee071e12L, 0x644728125ad509L, + 0x9ee6f2ddc0e07aL, 0x188895c68d4d97L }, + { 0x092fff3b27f971L, 0xb3c159fc9b7722L, 0xe27d8ff3cae42dL, + 0xf8a5ed6e87071dL, 0x318388f607ebd2L, 0x924967b53486f1L, + 0x77304947c46e1fL, 0xf279c60f21d196L } + }, + { + { 0xef2bc0384f3201L, 0xf8750c71f94c51L, 0xbaa4f5a986ec65L, + 0x6f8a5de2732a33L, 0x0f13d80299e365L, 0x2709530e85261fL, + 0x097d922f527d56L, 0x4969687be1f3f8L }, + { 0x9f3f5043e1708dL, 0xac67b874aa4be4L, 0x75fb042320a87eL, + 0xa361ad36e2cad6L, 0xcb01470203e9f6L, 0xe3807b7c9b76c6L, + 0xf086833b907c09L, 0xe9bed3c7e85a01L } + }, +}, +{ + { + { 0xa7ea98991780c7L, 0x04e4eccd2476b6L, 0x0af9f58c494b68L, + 0xe0f269fdee64fdL, 0x85a61f6021bd26L, 0xc265c35b5d284bL, + 0x58755ea3775afdL, 0x617f1742ecf2c6L }, + { 0x50109e25ec556aL, 0x235366bfd57e39L, 0x7b3c97644b6b2eL, + 0xf7f9e82b2b7b9cL, 0xb6196ab0ec6409L, 0x88f1d160a20d9eL, + 0xe3be3b4586f761L, 0x9983c26e26395dL } + }, + { + { 0x1d7605c6909ee2L, 0xfc4d970995ec8aL, 0x2d82e9dcf2b361L, + 0x07f0ef61225f55L, 0xa240c13aee9c55L, 0xd449d1e5627b54L, + 0x07164a73a44575L, 0x61a15fdbd4bd71L }, + { 0x30696b9d3a9fe4L, 0x68308c77e7e326L, 0x3ac222bce0b8c8L, + 0x83ee319304db8eL, 0xeca503b5e5db0bL, 0x78a8dceb1c6539L, + 0x4a8b05e2d256bcL, 0xa1c3cb8bd9fd57L } + }, + { + { 0x5685531d95aa96L, 0xc6f11746bd51ffL, 0xb38308ac9c2343L, + 0x52ee64a2921841L, 0x60809c478f3b01L, 0xe297a99ae403acL, + 0x7edc18fcb09a5bL, 0x4808bcb81ac92aL }, + { 0x3ec1bb234dc89aL, 0x1e8b42e4e39da5L, 0xde67d5ee526486L, + 0x237654876f0684L, 0x0a583bd285a3ddL, 0x3d8b87dfe9b009L, + 0x45bd7360413979L, 0xb5d5f9038a727fL } + }, + { + { 0x7b8820f4bde3eeL, 0xea712ef24d5170L, 0x517f88cdf6ec7bL, + 0xb15cecf983ea9aL, 0x9eeee4431a4592L, 0x786c784ebb013eL, + 0x2f06cb31f4e15dL, 0x5603fd84f4fda1L }, + { 0xf6790e99e1321fL, 0x274c66a74a4c09L, 0xa4b70b49a41a4eL, + 0x7700bddada5157L, 0xe54a60d51be8dcL, 0xfaf92761a477e0L, + 0x6661c72b027eacL, 0x50e2340280b917L } + }, + { + { 0x635f40f96ec123L, 0x4a331337a766a4L, 0x9ce4416b935587L, + 0xbb6e1f595d97e4L, 0x26147239d4197dL, 0xabd4478490e896L, + 0xf6a1b2a8bba895L, 0x401fa405e27a45L }, + { 0x7354ba50620900L, 0xc443a29385678bL, 0x48aba1053cf5faL, + 0xd67e723bbe152dL, 0x4b858e02a63d68L, 0x174e1ee72be4eeL, + 0xad0fbb39ab8d46L, 0xa0fdffbce17dd7L } + }, + { + { 0xa1ea3259c46fd8L, 0xeca122e9fb96efL, 0xf9074a26767acdL, + 0x9b004a22787082L, 0x389f8077f3ba8eL, 0x6463de90d5aabeL, + 0xf30ceaab090585L, 0x71b31e85634ab8L }, + { 0x0dee65caf02aedL, 0x506886e20ac252L, 0x0665f7886b8a59L, + 0xb9b784df2bb328L, 0x46e443adc6b089L, 0x3d5de1966c27fdL, + 0x0419265f0fde70L, 0xed946122b5c034L } + }, + { + { 0x5a52ad213b0056L, 0x9fbeb92b909ee3L, 0xb42ba18bdaab08L, + 0xec127c4ffc8a77L, 0xc6d2985fda906aL, 0x5355547994bbe7L, + 0xa7470c09cdfd62L, 0x31a3971d2e675aL }, + { 0x8d8311ccc8b356L, 0xabb0bf801b4372L, 0x33c1cad0294566L, + 0xe2e649ce07b672L, 0x9084d882ae3284L, 0x7a90d4c1835ce2L, + 0xb4d1cd5809d44cL, 0x78227149f0528fL } + }, + { + { 0xca884cfbf5844bL, 0x9dd05c48524cf9L, 0xdbffa1936ba889L, + 0xef94fdd29e7666L, 0x358f81b3eaf48fL, 0x96734d51530d56L, + 0x378b2d14adf9e5L, 0x2f850464731f61L }, + { 0xd6ae90599dcb83L, 0xa4f89e06199239L, 0x64052498f0f958L, + 0x2866d99cc27707L, 0x64681a2f551c0fL, 0x2c7b0d04c37080L, + 0x218925b00ac301L, 0x8d57fb354df895L } + }, +}, +{ + { + { 0xdaebde0809c8d7L, 0x58c761c0e95ea1L, 0xbd9965000ae5e2L, + 0x6117a85cd51acdL, 0xc4424d87c55d56L, 0xe9b1ddedfbeeafL, + 0xda98bb50db4791L, 0xff3a5a63fca108L }, + { 0x172fb8e5ccbea1L, 0x9fe12a7a9f6cc9L, 0x1de4b0b8967ce2L, + 0xc1ab60f671dbc6L, 0x338385a5dedcdaL, 0x647a4203a043feL, + 0xe9abc6428ebc89L, 0xc357ff003ba3c8L } + }, + { + { 0x37061e7de39ebdL, 0xebb91352be567aL, 0xa9a6f6bd6bb80aL, + 0x039345d99f0ba2L, 0x215494e98bbf47L, 0xf2cb7a4a2a1ccbL, + 0xf51aa1037f67c9L, 0xd29c85c17fff71L }, + { 0x8d4e4f24d30b87L, 0x20fdf5593a8309L, 0x9b9f9cf757075cL, + 0x09142adcd70101L, 0x901d0ee766ca55L, 0x6a5d86a32e418bL, + 0x550ad92d7fcaecL, 0x64e8818d91b26eL } + }, + { + { 0x5cea0f747e5ee5L, 0x8ca1d31be99699L, 0x52db8465c136c7L, + 0x8cecb3890e0d74L, 0xb8efe9dede2ad8L, 0x18d6ff8f17ade8L, + 0xd2227352d66c20L, 0xc46593ef2005fdL }, + { 0xe5ebe6ff7141e1L, 0xc968315e0126f2L, 0x95adc731cb91b6L, + 0x753b54c38a6003L, 0xa6141254230a61L, 0x23ac6eb559feceL, + 0x9816b603865c23L, 0x567014e543a570L } + }, + { + { 0xd46091ddd2b71fL, 0x3999a5d97d24ffL, 0xce2a4f11ecff3cL, + 0xab2687c581c6f0L, 0xa9fb2ebcba70b4L, 0x6fde35642093e1L, + 0x00253ecaee724aL, 0xa08ce3c2b81bddL }, + { 0xa251238935a2b3L, 0x8cae1d4584f750L, 0x011469e988a219L, + 0x61f7ed35a6a50eL, 0xe13ebaa01fcebdL, 0x794b97631d8867L, + 0xf25755ccda32e7L, 0x368a97b4564cd1L } + }, + { + { 0x0d22224aa3397bL, 0x1dbb3e638066dbL, 0xfe0b5ee0ce8e32L, + 0x09c17c87bab4dcL, 0x5cc65ddf188b64L, 0x74c4abf211b5faL, + 0xdcc17b7ab0ba86L, 0xfbdf46fa535501L }, + { 0x4775087aca569eL, 0x6575f9006a1718L, 0xb5c45a9b94de93L, + 0x0fc80068497171L, 0x775d965489f7abL, 0x8775b58f5c0c89L, + 0x05d4e201a06254L, 0x8cab349b6d73a5L } + }, + { + { 0xca7816339465b0L, 0x3ef914814498fdL, 0x9ca1f346255c11L, + 0x389fd15b7f38f1L, 0xdac2089354b8f3L, 0x82d07fca840a70L, + 0xf53fd731dd483aL, 0xa6e4eae1590578L }, + { 0x7bf65af3c01b77L, 0x27542f3a75c982L, 0xc5bd947716cfceL, + 0xba5fe76884b9e7L, 0x39bae14d55725dL, 0x982f64efae0eabL, + 0xcfae6627a5293aL, 0x22a25a1d60f464L } + }, + { + { 0x74caecc7dd5e16L, 0x23678a2ce7bca3L, 0x467393257f1ba1L, + 0x4eb9948a4c1697L, 0x5d400e8eaba18dL, 0x128d1c89807871L, + 0x78f9627bff38a6L, 0xf80b813a39d4ccL }, + { 0x8aeefa031d3aadL, 0x504219927db664L, 0x244fc694cb6383L, + 0x319047772192a3L, 0xcc86075bbfb57bL, 0xbae3a134451511L, + 0x16cf416f6174f0L, 0xb343cc0d376813L } + }, + { + { 0x31ac9b9d1824b7L, 0x6282260ec8f61aL, 0xbbeb9f8c781765L, + 0x06ab5c02d110daL, 0xd583e2247146b8L, 0x79a16084100d05L, + 0x16dbbb4f0a5c95L, 0xfe2af1de331667L }, + { 0x26f0364af8710eL, 0x1cb8c91eec08feL, 0x436bce61d95e9fL, + 0xfe9050c57944a0L, 0x5f45acf07b626bL, 0x48dc93f9cf1276L, + 0x4491371a05bfb7L, 0x51063044bcf785L } + }, +}, +{ + { + { 0xac2e294ed0b3b6L, 0x5c5ade6671637bL, 0x2f289ce1140677L, + 0xaf446e2754eb53L, 0x70911b720421adL, 0x4b73836e0b7556L, + 0xcadf1042a97827L, 0x4824e498005bc6L }, + { 0xb0eeccd937c28aL, 0x1ce061d0c3ee97L, 0xcb076319f33faaL, + 0x9980bf4aea66dcL, 0x2bd0755d111d98L, 0x43feaf67fe4de0L, + 0xe76fb80b077b2fL, 0x227dc9f5793b04L } + }, + { + { 0xea24ae514f49baL, 0xbc39ea611436e7L, 0x9d7fed278485d8L, + 0xb6ef00cdf8b131L, 0x0237b4bfdbc7afL, 0x08745b564ccd27L, + 0xaf8595dafc5a76L, 0x43657af29f5500L }, + { 0x300718348470f8L, 0x51f91fd640fd53L, 0x859c807be15512L, + 0x7d1a474ab3e9c5L, 0x5d714d981553e5L, 0x07573436f62310L, + 0xedc5be06b02a62L, 0x5a4b9b7ea47832L } + }, + { + { 0x03e0a24e93dbb3L, 0x25841dccadc884L, 0xabc1a818d10ad5L, + 0x207e38a2042dddL, 0x7fffbdbfeba8d8L, 0x74efebba3ec9b5L, + 0x0bc39ca0b40a9fL, 0x69ee9c90267febL }, + { 0xd402facbc62919L, 0xe9f8fc11cf53c6L, 0xe76fa5a7cc7d81L, + 0x4f2d87696bb19dL, 0xd4fb7f9adc67c7L, 0x40621d596702dcL, + 0x5b6a98e438f6c5L, 0xa7c64def1a1036L } + }, + { + { 0x84c5e809a092c7L, 0x9e40e0a11c22b7L, 0x820a091d06c99bL, + 0x45fdc77eecca8fL, 0xfe1b8a35794f16L, 0x31f7e5b4ce3d6dL, + 0xfd5e01082c74c8L, 0xfdabf30c1f6f7dL }, + { 0xbfa6017b9248a0L, 0xe898d30546b941L, 0x878c492207ff65L, + 0xbf22e8db874e64L, 0x43fdb1b53a547eL, 0xb66deda5fbd464L, + 0x59127a6c7ae1b5L, 0xa4636466a7515aL } + }, + { + { 0x22c4e66de9ab2eL, 0xfaf60c20203c58L, 0xed2d7bf0d5c5edL, + 0xdbc16fe4ca0f19L, 0x54e8ef6465b979L, 0xe2d64b1a310ef9L, + 0xa0f2c953778636L, 0xf3b4aa4281883bL }, + { 0x4ac9af09be6629L, 0xba455e11ca90c5L, 0x0147538856f492L, + 0xc80db7eabd7840L, 0xb3526d96beb9cdL, 0x37657fb9d81503L, + 0x8729a16193cec3L, 0xd9a93fbd69952aL } + }, + { + { 0xfce017594f47c6L, 0x228da21e366d05L, 0x27ce0b2dc8baf3L, + 0x8cc660b6b4a951L, 0xf678947384bb01L, 0xc629d7d44d980cL, + 0x47980e4e85e81fL, 0xa2e636a1cd723eL }, + { 0x6b6ebae77fb207L, 0x70179614c92891L, 0x5569541b4d279cL, + 0xbb6b36a41758cbL, 0xecaa22227a8e30L, 0x8b6746ab470ad9L, + 0x4c4601763e2d3dL, 0xe19c4edd3edaecL } + }, + { + { 0x0b43fec34718c8L, 0x553c407f33499fL, 0x8272efb970d1dbL, + 0x008c62ca8e8d1cL, 0xe4b79d763eec45L, 0x1fd4230f2d71a3L, + 0x090fdafa368c36L, 0xf62c101fca7baaL }, + { 0x1c9e6c8d2395b3L, 0x671ed6304c5513L, 0x577d933299a465L, + 0x286890e63f9986L, 0xd92a95dbfc979cL, 0xcebd79d2b51019L, + 0xe74d88b3d07251L, 0x8b6db73906f9adL } + }, + { + { 0xc0c43db7b3d90cL, 0x85d154e4304a06L, 0xe8aceefaf2f38eL, + 0x5e0429383d9459L, 0x65e5e32431afd1L, 0x9e5f050a900a65L, + 0xcbaa1718a26671L, 0x33d0b249c93de7L }, + { 0x3dcbf92d5b6680L, 0xc47e5ec20006f9L, 0xc9711299a51924L, + 0x665d9b8cd0ed46L, 0xed2d63fa5fcab6L, 0xa817eb6cfbfc5aL, + 0xb38169fb76eb76L, 0x8b93544f11160bL } + }, +}, +{ + { + { 0x02eca52693bdcdL, 0xbbf09232ae01d6L, 0x0b0a2de8b44b3eL, + 0xdb82449b250dffL, 0x0c42b866e1c530L, 0xcd226dca64c2c4L, + 0xcfb2bb1f046b5fL, 0x97e2fae3fccb0dL }, + { 0xdf9290745ed156L, 0x224dcb9f641229L, 0x2126abc5f1f67eL, + 0xa7eed5ae9c8a6bL, 0x40abedc9857d9bL, 0x3f9c7f6de941c6L, + 0x2158d42d725ddfL, 0xbdd10158c69543L } + }, + { + { 0xa7dd24e8df2fbcL, 0x3adbcfd13d1aeeL, 0xf6a32d113b2177L, + 0x89a72327a9a14cL, 0xe3aef43dc65df9L, 0xeaec3e3a64d74cL, + 0x4d387d84fec33bL, 0xaba2a0521a2128L }, + { 0x2382c226b85e30L, 0x4352d85cd2aad3L, 0xb0c6001d9772c4L, + 0x7ed82635f3653fL, 0x3626a6f0300f47L, 0x23909de6ca7e4eL, + 0xb43dd81c154141L, 0x9a49fad7e4bc68L } + }, + { + { 0xa3661df2428f88L, 0xbe48b0256e0db2L, 0x3cd1871ce79aa9L, + 0x90ab87123dddacL, 0x9c58fb971871a6L, 0xf031f7fa34910eL, + 0xb501eea81060e4L, 0xdb668ba791224eL }, + { 0x240bbcb6a705bcL, 0x7e76fbd2d1865eL, 0x6e2cd022513641L, + 0xe6c522546365c9L, 0xe46a8b8a5a01fbL, 0x696fa7bb67618bL, + 0x418b3b90db6792L, 0x7204acd7108b9cL } + }, + { + { 0xb5a143b8456b45L, 0x8a3ab25f53b4d9L, 0xb112a58e13a570L, + 0x613ca3281487d2L, 0x837d8233b1e7c9L, 0x592baded41e9d5L, + 0xdc1893a5cd02f2L, 0x08795028972e23L }, + { 0x7003c08cb76261L, 0x14bde9e332a5e0L, 0x14b2872cbbd78eL, + 0x5594061de238e8L, 0xad12645067466cL, 0xa8d0e64f5e4952L, + 0x5b44b82c7f8d06L, 0xb51bea8fb1b828L } + }, + { + { 0xebad6853f0daccL, 0x5c31b8b1cbebbcL, 0x6746975fa5a2dcL, + 0x2d9596531d9faaL, 0x343797d00fc0e4L, 0x38d821c55fe01bL, + 0x0bfdb247323aa0L, 0x42613c4f962a8eL }, + { 0x599a211e134bc0L, 0x75fa4a147a7084L, 0x6e719487f734b5L, + 0xd5ced2d6dfca2bL, 0x9fa0fdc8aeabd2L, 0x5e6b03f12361daL, + 0xad23d315859fcfL, 0x3120ef125a5fc8L } + }, + { + { 0x990ef628e9f638L, 0xfdaa240626a60cL, 0x4a3de202abddabL, + 0xd5d10b7d8872b2L, 0xa01b7301ea5880L, 0x481697fa81b9d8L, + 0x29841533471ed8L, 0xefd73f8292d37cL }, + { 0xdda76269994bebL, 0xa0377036a4f865L, 0xda992ece5b47d5L, + 0x912a427e53edbaL, 0x64675989264e45L, 0xd3b68c3af71222L, + 0x9d3436c6dedc5fL, 0x1e027af076b2adL } + }, + { + { 0xd56fca14382f4aL, 0x83712a48966b7bL, 0xd6b2cf5a4c9ddbL, + 0xa66be29f602875L, 0x70e4266894f3d0L, 0x007d220b3195caL, + 0xba38d8f82c74d4L, 0xdccc5fcd975cbdL }, + { 0x03e1610c88b38bL, 0xeb9f9a152e0d8dL, 0x6a57ecab646eb7L, + 0x161641fc76b6c1L, 0xf9025adbd2e12bL, 0x87c74db5c0e26dL, + 0xed5cb51bfeca74L, 0x603dfb6e34a08cL } + }, + { + { 0xc4be728cb03307L, 0xde34c0ec2741ccL, 0xe01db05a74eb17L, + 0x1bfce0c8905e4bL, 0xb18830ad1b1826L, 0xcacbb41e87bbfbL, + 0x8696842d2f1a79L, 0xa80e5fb08c83eaL }, + { 0xe48f1633f1439cL, 0xc1d4108cd6987bL, 0x05705c4b751814L, + 0xa9bffd0c1c622dL, 0x23de4af46cd053L, 0xf782f5e39457c3L, + 0x815276b5e5d243L, 0x31320416161ae3L } + }, +}, +{ + { + { 0x245966177f2542L, 0x203be7e8372b25L, 0xc7c9426ee2007bL, + 0xc5641380621799L, 0xda56589c28c3ceL, 0x13e8a7c7afc1e3L, + 0xdba81e9e352082L, 0xf43054904435c7L }, + { 0x4d26533691de4aL, 0x364408cfb777abL, 0xccdfb43eae7f88L, + 0xbc40f44a525b11L, 0x8e112a53c60627L, 0x7f7c581e17e696L, + 0x0fd78781ea774aL, 0xd09e6320b1f582L } + }, + { + { 0x44390bd70aab15L, 0x41112bc889c3f2L, 0x6b02894d685349L, + 0x71030015584dfeL, 0x373cb1b1ba7887L, 0x53d286c2a017c7L, + 0x2ed03883c81fdcL, 0x3bfc5e3fbcc6fcL }, + { 0xd38ac6ffd6418dL, 0xc667e96bfad89eL, 0x46f4f77eab4d66L, + 0x194c04f0911293L, 0x0fd09cf68c48d5L, 0x6f5b05563cf7f4L, + 0x0c0a8c4acd562fL, 0x94c1d8336d965dL } + }, + { + { 0x94fc8f0caa127aL, 0xc762d5dd803690L, 0x8bfdfd11ebf0d3L, + 0xa98cdf248eac50L, 0x3d7365d8b5ff10L, 0x20dc29bc65b4deL, + 0x62ac28e8ec7c68L, 0x7f5a13290372d2L }, + { 0xf3d8a253246658L, 0xa4bebd39ac202aL, 0x078ede75cc1697L, + 0x5525800c8fc022L, 0x302a8025fae77bL, 0x018013957917b6L, + 0x7c8806d864bf55L, 0x4e2d87812f06f1L } + }, + { + { 0x8d351183d66e88L, 0xfb861a1a91d02aL, 0x8c27c2a7850e5fL, + 0x9fd6399a5496f6L, 0x52152ae8080049L, 0x600e2fffd1c2dcL, + 0xc75902affe8b2eL, 0x5c4d2cce03b175L }, + { 0x8ad7c424f57e78L, 0x77cf6061736f87L, 0x2876012f85038aL, + 0xff328451b97b95L, 0x3cc6dd5392dfc8L, 0x72f1363a6f5075L, + 0x028ec4471de894L, 0x7030f2f6f45a86L } + }, + { + { 0x66400f59695817L, 0xeda0a7df20ea36L, 0x855be51d394992L, + 0x2d082c18336f62L, 0x30944ddf28c868L, 0xfb5f8530dc86d0L, + 0x9562ae5564a0bdL, 0x1f7ea12b6b9b51L }, + { 0x5bd74e0d0a7148L, 0x6c8247fb91e572L, 0x699aba547da498L, + 0xed825811f7c814L, 0x434674b62057b9L, 0x8b4df5e15c15b4L, + 0x2a97da1b110081L, 0x2a96b0c4c417feL } + }, + { + { 0x4f75dfc237639dL, 0xe5ad6bc1db7029L, 0xd43e06eb3d28f7L, + 0x89f3bb5e447989L, 0xc426a2c01a1a6eL, 0x33ea71c315878fL, + 0x8a7784ab1b5705L, 0xa59e86e77ca811L }, + { 0xddb133c36ae155L, 0x49f1d4c0d51b42L, 0x55080829d05519L, + 0x20e23be5291816L, 0x35047ec67181ecL, 0x6237dc47aad091L, + 0xa1d3ce1e2e25a2L, 0x1de05220d3db4cL } + }, + { + { 0xe9a5e19d9fd423L, 0x0c2c3d09801e43L, 0x043c2dd28df2daL, + 0x4eecab4e1ad12aL, 0x97e17979615aa5L, 0xe57b879ca7bb5eL, + 0xa2a903ccc92619L, 0x5cef370aa56e93L }, + { 0xbef29fa7f3232cL, 0x1cf35ed2b7ad5cL, 0x35c48933b6077aL, + 0xe0651487a1d47dL, 0xedb4673ce14572L, 0xdc9e98c0b17629L, + 0xef98ebe9a02a5cL, 0x1f772e311d03c0L } + }, + { + { 0xcbdbdcd4608f72L, 0xb4352235a13c6fL, 0xa6497f64bb3c21L, + 0x3af238312c15c9L, 0xfbbf4b36322d11L, 0x520a5c6c641775L, + 0x18cd967e81e0e1L, 0x980b2c63de3871L }, + { 0xfa9db619ae44a2L, 0x0281dd2176bc56L, 0xfd037118a7f817L, + 0x9c485454129b30L, 0xb439648039626dL, 0x355050ee4ada6bL, + 0xc9c16d67f5d98cL, 0xf53ccc318c4d5eL } + }, +}, +{ + { + { 0x50ae9423ffb20bL, 0xa6c0b426865eb4L, 0x4677f7d09930f1L, + 0x742e0b64a16427L, 0x521d18ef976f9aL, 0x43ac9cfa454749L, + 0xda3a91dc51f50dL, 0xf657029ad6f954L }, + { 0xfe5f0646b4f99aL, 0xd92a5d963ad4ceL, 0xfcb55092e0e081L, + 0xadc85ab8d8a858L, 0x8e9b9660632f0fL, 0xe7a4f168d7216dL, + 0x00a4cc559c3b99L, 0xed6d0bdba09dc1L } + }, + { + { 0x7236d141621bebL, 0x1751fd4bc7ca95L, 0xaa619d12f5319cL, + 0xfc2b15b4e9316fL, 0x2d1a9069fd4d33L, 0x28c3bac8ced829L, + 0xf2efab51dd998fL, 0x2c133303b149edL }, + { 0x65237c9f601ac6L, 0xb54dd6507d6a45L, 0xa1ce391fb1a4cfL, + 0x2957533115f67eL, 0x6456da8465279bL, 0x02890aaa993e02L, + 0x6891853b7175e4L, 0x3fda2030f3e59bL } + }, + { + { 0xe99fe12d8c6e0bL, 0x7cb07ff5341c56L, 0xc292c7bdf77b24L, + 0xf52dfd0ca29906L, 0x4a6aa26772f02cL, 0x26f7684e1bbd09L, + 0xec56b2bee7c2a8L, 0x67709e6ad4a312L }, + { 0x99c57b2c570263L, 0xeb0100b2faafaeL, 0x980d5d1ff25ecaL, + 0xace35e682cf936L, 0x5a82ce544679edL, 0x5c76a41074b81eL, + 0xf36fa43a00abb1L, 0x064281904ffb2dL } + }, + { + { 0x68f6bc804bdd28L, 0xc311d96b5dc7adL, 0xff0d646ed32e45L, + 0xaf3cdc6e0f712dL, 0xd4508e9d483861L, 0xb624be50e1c277L, + 0xc510275c5dd841L, 0x451c5c3298dc02L }, + { 0xf87d479dd34d6bL, 0xda7f293dd06a38L, 0x575e129b699e9fL, + 0x79e5fb2215b2ccL, 0xd280028657e690L, 0x7fecd09e702a71L, + 0x85160abfa13677L, 0x5de3427ce65f64L } + }, + { + { 0x84e4bf6e8fff38L, 0x16f3725b358b1cL, 0x360371c3b472a5L, + 0xe64c06152f217aL, 0x8e673790501241L, 0x88e81d6ab2dd96L, + 0xf3e218a1385604L, 0x9736cafe84184dL }, + { 0xb55a043dbb93a3L, 0x335088f9301088L, 0xcea7a2db2a4959L, + 0x48e5d4ab882c33L, 0x114f09bad46179L, 0x4416467b446576L, + 0x01cb23e34c6c2fL, 0xddebf04a02db8aL } + }, + { + { 0x36d60cc9bde8a1L, 0x20fd2f2676e4adL, 0xebdcfb78936581L, + 0x245d0d5dbfc2c3L, 0x104c62ca9f82e5L, 0x7387457d654d9bL, + 0xe966777ae7f10eL, 0xefeb16f1d8e582L }, + { 0x4faf4f170364b5L, 0x0e1ab58d612472L, 0x11bbfe7fed6085L, + 0xb360a14a59a09aL, 0x61d96e9722fdb6L, 0x16a12f194068bdL, + 0x225bf07f73c2beL, 0x1e64665c8bd24eL } + }, + { + { 0x27a478a3698c75L, 0x778ccd36202aa2L, 0x0149c638d87f1fL, + 0xa660e5f784edaeL, 0xe0d4d2f82adfa8L, 0xf512dd61ba1f9dL, + 0x90cfed96245c58L, 0x6c3a54818b53ddL }, + { 0x833f70cbdc094fL, 0xa5f26f5b1514e7L, 0x93e7cf51c8cf13L, + 0x1436601186ec43L, 0x81924ace78170aL, 0xcc880a08694368L, + 0x2dfa9550b62cbbL, 0x0bc6aa496b4a2cL } + }, + { + { 0x5157a7e3561aa2L, 0x525c5008645c1eL, 0x22feb4ece7cbb3L, + 0x36d0d25c89a58bL, 0x43131f7c9bde9cL, 0x74afdda881f731L, + 0x99ab87c7c8e36aL, 0xf07a476c1d4fb2L }, + { 0x1b82056bebc606L, 0x95a1e5afcf089fL, 0xc5bccfa2b55d5cL, + 0x8fbc18e00eb0b1L, 0x93a06fe9efb483L, 0xcafd7252d74c57L, + 0xc7518f03de4350L, 0x9a719bfc6fd762L } + }, +}, +{ + { + { 0x5ee0d832362087L, 0x7f2c0d70b167e8L, 0xb7327895e0e865L, + 0xef5b2e898c4e65L, 0x222797d8fe9cc1L, 0xfe6d73e82d1e15L, + 0xc7c0e9cf62dc4bL, 0x962acfe937cedaL }, + { 0xd763711c1e85c7L, 0x8f2dbbc2836978L, 0xbadc0558c44e98L, + 0xed63eaba3e93f8L, 0x807e85741b55c7L, 0xd51ae5e6d1207bL, + 0xa0ef9a639d541bL, 0x58855f9a0c56a5L } + }, + { + { 0x7d88eaa213091dL, 0xcbdfee745b6a0dL, 0x826a0124f5e077L, + 0xb04fc1390f1e4cL, 0x1961ac3aea69aaL, 0x3afb719d5bb63eL, + 0x2a378374ac7e5cL, 0x78efcc1c50ca45L }, + { 0x346e8f0b8abdefL, 0x27e3dbd88095d0L, 0x56d3379ffc6c22L, + 0x67d416cfa4b291L, 0xc3baaf63b1b373L, 0x0184e1fdf73baeL, + 0x38ae8f79167528L, 0x7329d4c35d6297L } + }, + { + { 0x45d2ac9f568c52L, 0x51348149808593L, 0x0c92d8331b7ed8L, + 0x921327a0876ecdL, 0xf752d75052736aL, 0x7b56487bc6b837L, + 0x6b1a320a23b4ccL, 0x1983937ec0d665L }, + { 0x2c3017c08554abL, 0x40ad955366e87fL, 0x88c4edf8ed7f02L, + 0x64a7db13cc5e6dL, 0x5ac91fa2dc978bL, 0x016a20d925d2a2L, + 0x3604dfeabb57b4L, 0xc3683ecd7e2e85L } + }, + { + { 0xc47150a4c0c6d0L, 0x30af45ee22adcfL, 0x39b5acb022ea4bL, + 0xfbe318577203b5L, 0xe5aaa346fd9b59L, 0x0062c90dd1c8dcL, + 0xcf113f354049acL, 0xd8fba4d63a31b5L }, + { 0x73b54881056a69L, 0x3be6cbcd780bdaL, 0x5776ec230ba2b9L, + 0xbe883cf8e8d6f7L, 0x64efe945c2be6fL, 0x064f704f1ade8dL, + 0x41cfd17743110eL, 0xaac94114c20abeL } + }, + { + { 0x91f9192f1c1468L, 0x8176e744563e13L, 0xa48b5f90bda15dL, + 0x2a085aeda42af6L, 0xfd38ab2425c018L, 0x2884ba408abafbL, + 0x356f318cbd091dL, 0x454e450817871bL }, + { 0xe080e818ada531L, 0xa40f1eb3152ba8L, 0x051049f0c38eb1L, + 0x37e4bb3bd45003L, 0x6d0980454a01e5L, 0x6de932feeb824aL, + 0xccdef37dc93481L, 0x8633e0793a05e8L } + }, + { + { 0xbe94256034675cL, 0x376c01d08db789L, 0x8707ee79af1b6bL, + 0x633b3ef11bfbacL, 0x694f33fd06db60L, 0x2a68bfcbb13407L, + 0x1c860c9da27c3aL, 0xbca16ded701ac3L }, + { 0x2b76cfac59ffd0L, 0xf9a116554d718dL, 0xf86a1db67f0878L, + 0xe313e05af34e85L, 0xa1888113343159L, 0xdbe4c3f0bb7ed1L, + 0x73b67e80c732bcL, 0xa4e1c87e74110eL } + }, + { + { 0xce1106b5c6770cL, 0x422c70b5c0bcb7L, 0x32a39908195e7fL, + 0xa24968d1ccd4aaL, 0x8f08ecf720e557L, 0x5da10a454bcc81L, + 0x9d3c73b6cd846eL, 0xaeb12c7368d065L }, + { 0x2110859cf9fd1bL, 0xd2a4801ee2bd6dL, 0x376e556e9466acL, + 0x767803b3b5aa35L, 0x343f842b8a89baL, 0x3263cc16726bbfL, + 0x26caf1725871b0L, 0xef66ad641b8578L } + }, + { + { 0xc9f2249638068cL, 0x96d282c1ccf9afL, 0x71df30c69b435aL, + 0x88c943acb9d5c9L, 0xbf98ef12a8f378L, 0xffc1824114c6ffL, + 0xda3ad2cd52e8c7L, 0xf1222bc1afcb59L }, + { 0x459e94b0ee334aL, 0xd4477b8421933aL, 0x60fb7b0a1e401eL, + 0xfde6e820d1e330L, 0xcecfe9b3233fdeL, 0x09ec4662e93523L, + 0xa5ba64930775b9L, 0xcc397e5adf80f2L } + }, +}, +{ + { + { 0x2fe182d4ddc8a8L, 0x88d6e79ac056bfL, 0xc3ff2d10e41e4eL, + 0x32ec7f92c3679fL, 0x3561f094e61051L, 0x4553f5a6c6250aL, + 0x2b765efdd25c5bL, 0xe3a40a26a1cd7fL }, + { 0xb27309b5d821ddL, 0x950fb8dc2c17caL, 0xfeed0158fb0d4cL, + 0x762c479f550179L, 0x306cf44e095840L, 0x84b413ad379e66L, + 0xd6e5d5abb2e4f1L, 0x8bc12b794b085dL } + }, + { + { 0xc0d4cb804b5532L, 0x7a31525b9940a6L, 0x010e7dd68c69d1L, + 0xd81f29d2a18c35L, 0x08ae7703f11e73L, 0x5358f876e55106L, + 0x299e8cac960ef5L, 0x89a6fb4acfc8dcL }, + { 0x5996a406dc7d4aL, 0x21e5112e51b96eL, 0x95b8c3d09a202bL, + 0x306ab0fd441f1fL, 0x2834fed98d4245L, 0xc29c387d0abbdeL, + 0xf6a9bf1b805c15L, 0x602f4f8c4e458dL } + }, + { + { 0xf041486e5a893aL, 0x53b891d8934327L, 0x11e000d4000758L, + 0xa4ccde8662bad9L, 0xe34d3edb9a1b64L, 0x72d967584e7a6dL, + 0x773da2f6627be4L, 0xa11c946e835ae3L }, + { 0x02e8203650bc15L, 0x2d35936e58b78dL, 0xe9cfbe8f21a3ccL, + 0x55ad8311049222L, 0xbf99de438fff47L, 0xebbfd803831db5L, + 0xe990636af2af42L, 0xc26ae52b7f5a0eL } + }, + { + { 0xb5d85b1fa8f846L, 0x4166489b3b1455L, 0x768260dd36a305L, + 0xc6a82354ff5645L, 0xd241cd8d6e93e5L, 0xeed9aa1a406e74L, + 0x9e96ab05f600d9L, 0xa26b8b56eca2a1L }, + { 0x78321cfd705aefL, 0xc4fb6b3c0161ecL, 0xdc324415199cf1L, + 0x33627d0d0a5067L, 0x13490cb15143eeL, 0x77e0ede85b4f44L, + 0x904f12e394b165L, 0x90f50f5efab32dL } + }, + { + { 0x4aa0a16bc2de96L, 0x172596aaa9c12bL, 0xd512e1e60e8a29L, + 0x77d35c1f637e83L, 0xbb0d141d2aae0bL, 0x8a878a58c03738L, + 0x6d24c01ab0e525L, 0xb7d3136f760887L }, + { 0xdbc3f8f3f91b7cL, 0xe7b4bcaa8722c0L, 0x3286a91da0ae65L, + 0x8372274225b084L, 0x5884cd5ae1886cL, 0xb4e63ef3a23cf7L, + 0xfe5f202f2dd0daL, 0x951fac9653916cL } + }, + { + { 0x05e2e8f854fa4eL, 0xf411f941edaf10L, 0x26cc562a0a928dL, + 0x78fd34e4abce65L, 0x1d8760998a32e2L, 0x85dc76f4c37518L, + 0xdcaeef500e8021L, 0x7fcb2f84e9b2a5L }, + { 0x9eba91ef382c06L, 0x2052e8524cae53L, 0x617336ef5c1519L, + 0xf1546d5b4e632bL, 0xa9edc81d7b8ffdL, 0xdb2914f29ab68cL, + 0xe805070debbabaL, 0x775e53bc3b719eL } + }, + { + { 0xa40e294065256aL, 0x9f113868fb031aL, 0xac03af8059667cL, + 0x432eb3a0475f58L, 0x22332bf01faad0L, 0xc8132e9bc57a11L, + 0x27d5a173bc3f8bL, 0x5471fc6930bf3eL }, + { 0xba28bc0e6bff40L, 0x198d57e555e564L, 0x13ce8319c65b8fL, + 0xb0a5c9d5681b51L, 0x467588bdeb9e11L, 0xf1891a7bb4250bL, + 0x10b938bd12b433L, 0x0b8c80224dcda4L } + }, + { + { 0xc428703cf332d3L, 0x9d0053cf2a5b98L, 0x4e4c6207838a15L, + 0x2e92919fbf8a43L, 0x39ad52421cd9a5L, 0x584ed6c1561588L, + 0x20af30517a95c8L, 0xa223077b70e1c8L }, + { 0x679cfea2fa4871L, 0x54f2a46ac633c7L, 0x60306514cdc5f1L, + 0xc4facda75a1dc7L, 0x710a2882d07d19L, 0xd55864e6b44992L, + 0x44d4b6c454c5b2L, 0x2855d2872f9981L } + }, +}, +{ + { + { 0x4071b3ec7b0674L, 0x800eb14f8794d5L, 0x70573afbe6783eL, + 0xafaa4407785901L, 0x112d2a1405f32cL, 0x3761a52169b3e2L, + 0xe168b31842a366L, 0x5bc322f9bf4734L }, + { 0x36ef240976c4a0L, 0x066f3d6fea4e64L, 0x0e954bda989e57L, + 0xe36ef5ef9466e4L, 0x6bb615abeb9226L, 0x5571e5f3d5a2caL, + 0xa86efe24897a86L, 0xed7e9cf28a9f77L } + }, + { + { 0xdf10c971f82c68L, 0x796ba1e3b597e6L, 0x1ac77ece718cbfL, + 0xc8175bb410eac8L, 0x0cdf9a1bc555efL, 0x6b889f17524e05L, + 0x6bf1e61ae26d82L, 0xb3f6ad5d2e97d9L }, + { 0x94dcff9f226487L, 0x60e6356be03ddeL, 0xda1f93b6a3dd7dL, + 0xf1be72179ca90cL, 0x05ed3131e6bce5L, 0xcf50908d48af3eL, + 0x3b0e85c61e554fL, 0xfe7e35ba2778d3L } + }, + { + { 0x42c503275ac5a9L, 0xa66a66dda062c2L, 0xa4f4f82caa7023L, + 0x489d47664b4f86L, 0x10b108897311adL, 0x55dd637177b2ecL, + 0xa5ccff09a267b1L, 0xf07690bff327b0L }, + { 0x39162ed2250cd2L, 0x1426de08b255f1L, 0xf227afd1bdd731L, + 0x78f8a36fa4c844L, 0x267a211157379cL, 0x3f05f92cc04acbL, + 0x374496cfc69caeL, 0xbf2c5d016ebfecL } + }, + { + { 0x605418bd0518d1L, 0x3237f809e1cbc6L, 0x37a7005286c019L, + 0xf1fb0e0b15af0bL, 0xfc3b97caa853c0L, 0x1f48bd0e6beba2L, + 0x8e5d7c5e6a72f1L, 0x575e66d26ebf0cL }, + { 0x099477662eae3dL, 0x53f074f96c9c65L, 0x6cfbfdbb81badeL, + 0x98b4efe3fed7d1L, 0xdaa112338c3382L, 0xdf88b7347b8ec6L, + 0x9b0fe4b9504a4fL, 0x2e7df4cf30c1c3L } + }, + { + { 0x25380cb2fc1833L, 0xb8e248c18d62deL, 0x91c8f59d82f9dbL, + 0x5ec2b202444750L, 0x3f3a1f766b6f74L, 0x0180aa9dd7d14dL, + 0xd0a342d2956b9cL, 0x26e910e7139873L }, + { 0x2261dc4139e23dL, 0x7edb181b8343ddL, 0xfcf1073b4038ddL, + 0x88870efa3bfea3L, 0x4e98ba964a263eL, 0x3c6e5dc70811f5L, + 0x17d28f5f86055dL, 0xca9c27666e4199L } + }, + { + { 0x0b2d8bd964ef8cL, 0x5a99b8588e2ba6L, 0x9e927b204498ceL, + 0x9ff20c5756eb25L, 0x97cc27b3f27736L, 0xf32dd6d4729583L, + 0xbdc26580381a94L, 0x70fef15ef2c06fL }, + { 0x50a619149252ccL, 0x9eb4a14236b4b9L, 0x9b1b2158e00f78L, + 0x27add366ea9c23L, 0xef61763c3a8e79L, 0xed4542fd82ce56L, + 0xa8737e70caed75L, 0xeca0ac2d452d76L } + }, + { + { 0x20c07793d082d0L, 0x6e3ce64c9e9f3bL, 0xb3a4dce75a195fL, + 0x3a3c305bdd9f24L, 0xe2545c88688942L, 0xa463c82080f32bL, + 0x442974842686b8L, 0xf50e20d7213866L }, + { 0x265ac523826e74L, 0x26fba57228e8ecL, 0x8a1e1dbe6b3ed8L, + 0x7c7b278f0fe65aL, 0x9a6df23c395234L, 0x99562060b0f114L, + 0x440c8c4ef90837L, 0x21ad22a3645f65L } + }, + { + { 0x1e023a6edd31b2L, 0xf76d1459ff8668L, 0x970705617b45c8L, + 0x06120781e88e37L, 0x85c51c8922faacL, 0x4df392e22756d9L, + 0x8907fd0a03c98eL, 0x626f46a52ea51cL }, + { 0xf8f766a486c8a2L, 0x8c499a288ed18cL, 0x44d2dc63c4f0deL, + 0x47dde686f2a0b6L, 0x9a655f84a973fdL, 0x3e7124e786ac80L, + 0x699e61ce8a0574L, 0xdf0ba9a31cdd0dL } + }, +}, +{ + { + { 0x76270add73e69bL, 0x991120fc67d38aL, 0x7be58309469f0cL, + 0x93aba597db40acL, 0x2b707bc822fc08L, 0x4199fc069551cdL, + 0x38deed4f367324L, 0xca518e12228787L }, + { 0x72f1befd9a9277L, 0x57d4aabe49ae90L, 0x13810d5db23478L, + 0x2a8b7809b4b77fL, 0xb542f4e1b4e004L, 0x4080fd03ec77f0L, + 0xb49e9fecec6596L, 0x20338d33f16037L } + }, + { + { 0x4adcdae53554b0L, 0xfea4906e04c4dbL, 0x0808bec7748233L, + 0xde7477c47148d7L, 0xdd9124c03da38cL, 0x6b2503125ee8e9L, + 0xae67399b0d6161L, 0x70c4acd82203b6L }, + { 0x9683916d31dae8L, 0x34775031ac7f69L, 0x9553153988e4adL, + 0xb58f41153a15e1L, 0xb65a2d492ba2ddL, 0x7c3efb1a90169cL, + 0x210f45e6b1747dL, 0x16e8d1bcff488dL } + }, + { + { 0x252adf89d703dbL, 0x259ac1dfdfeb39L, 0x7faf6af115e806L, + 0x7aaefd6c1aff21L, 0x80542107c0113dL, 0x481f1a5e19b4b1L, + 0x7c17d43fcc8c61L, 0x8b04452bb0bbbeL }, + { 0xe51e5f54cebae1L, 0x05341ba56a414cL, 0x0083a2c7fb8a30L, + 0xb4663f277f4952L, 0xce72eec4bb0074L, 0x74fdd66a3584d1L, + 0x6b9e58eb02e076L, 0x5be45d53b961f4L } + }, + { + { 0xc7474f31ab2e0bL, 0x2838ccbf4bf454L, 0x634392ef3c3eacL, + 0x440e40a137602bL, 0xeea67e9d1ae8e3L, 0xafdf93a77e221eL, + 0x3c9f3da2719a10L, 0x466ecef32c8256L }, + { 0x1061c19f9c432fL, 0xa1332d9b1c7d98L, 0xbc735f2a425c2cL, + 0x1429cdf4b1bccbL, 0x77b42a16bbb5f9L, 0x30078e35955ae4L, + 0x8acd77721cc315L, 0xaa90d5fe86fa99L } + }, + { + { 0xfcfd460721115aL, 0x6a7de3e08269b8L, 0xe5964a696dd47eL, + 0x6717cd58dca975L, 0x7ea4ebe98b149eL, 0x6f894d5b7b8057L, + 0xbd6f9607f30e31L, 0x61ca45323df092L }, + { 0x32241f99d782f3L, 0x55173b02abfae2L, 0x0abe0edd15bbbdL, + 0xb6d3c0ab438abbL, 0x62fb4679ffa20bL, 0x30926b5d31560aL, + 0x44bf27c2a0aa6dL, 0xf7473131a4cb97L } + }, + { + { 0xa2f6c0db0535deL, 0xcb02ae1c855166L, 0xc699e6bb3422f0L, + 0x774febe281ba8aL, 0x1d9d24fffabcc7L, 0x0b31ba1fe12ba5L, + 0x4c8680313d0af7L, 0x90640d32f47160L }, + { 0xa0c4bf45876603L, 0x717f6fa950ab08L, 0xf12bb53a710de8L, + 0xc500c616a88f50L, 0x0070f992645351L, 0x57aab5d2446893L, + 0xd553fa8b68f657L, 0xe8537c1693c55dL } + }, + { + { 0x58e86eb7fc7684L, 0xdf330f7bfc73a9L, 0x41e337dcc11936L, + 0x36d92006e35759L, 0x01327033500d8bL, 0xfa684059483354L, + 0xc8f2980667851bL, 0x538ec8918296b0L }, + { 0xa2a2c4fcff55f9L, 0xb260d4d60d20bdL, 0x3ed576fd9cc59fL, + 0x4ed8c64d514fccL, 0x37ebfb2c22b315L, 0xca67a3694c212cL, + 0x4f8e08c3a1795eL, 0x498f9264e7261fL } + }, + { + { 0xfea7382c59b3d4L, 0xb9942ed3f2925fL, 0xe4b00dc8ea77e8L, + 0x74a18ec3cab02eL, 0xbbbb752ef16d0bL, 0x639da4fffab032L, + 0xc371a4a3aa30f0L, 0x8e26b22caa175bL }, + { 0x94e41567e2b62eL, 0x7cceea625a794cL, 0x931d2f4479f015L, + 0x946183d90b25b2L, 0x1504e9768a2807L, 0xa7577d3fa49dddL, + 0x24fc87edd48699L, 0x9edefd63d7d99cL } + }, +}, +{ + { + { 0x0508b340f0b450L, 0xe0069a5c36f7f4L, 0x26556642a5a761L, + 0x0193fd8848e04dL, 0xc108cf573fe2e7L, 0x05eb0ecfd787d4L, + 0x1555ccbff28985L, 0xb5af09f651b995L }, + { 0x167d72ce1134beL, 0xd6d98bf57c669aL, 0x40fb7166dd76faL, + 0xeabbf202a41b31L, 0x300ff0e09b75b0L, 0x32b6fadd9a0c1eL, + 0x805188365a80e0L, 0x8bef69332110feL } + }, + { + { 0x637802fbef47d4L, 0xfac114b2d16eaaL, 0x7b3f3ab0415644L, + 0x17ab8d12dd895bL, 0x271b7fe87195f3L, 0xa3f867ea71f65fL, + 0x39ba40cc80583aL, 0x6db067256e1fccL }, + { 0x4feab4e06662a8L, 0xc857415c74bd46L, 0x18032ed732b126L, + 0x87c8aea7a099eaL, 0xb4a753536fe0a8L, 0x33a98da27673f6L, + 0x3e40c022b8e549L, 0x2def1af9a4c587L } + }, + { + { 0x9618b68a8c9ad9L, 0xd70b4aa49defdaL, 0xae8b1385f788efL, + 0x87c3542dd523f4L, 0xe42c7055c5b004L, 0x6303360fa7df57L, + 0x33e27a75f6d068L, 0x9b3268e8ff331aL }, + { 0x845cc9623ee0c3L, 0x003af70ac80084L, 0x6a9f931530c41dL, + 0xa1d7051bb127f0L, 0x642ce05ca36245L, 0xc34205b0323ee9L, + 0x7cc8912b7b3513L, 0x6252cc8076cbdbL } + }, + { + { 0x10e68a07089522L, 0x36c136158fc658L, 0x490397d74723a4L, + 0x42692c0519d56cL, 0x69d251bf1ff235L, 0xe689d03c2cbf37L, + 0xf04ceba825b7f4L, 0xd6b9bee2281c2eL }, + { 0xc52ef3fe0043abL, 0x351bf28d1d1be8L, 0x277615f0f18a5aL, + 0x31f717f5d6800fL, 0xf5fb82dab922e2L, 0x99aee2f2d6ae43L, + 0x42477fec63b982L, 0x904aeb1a594a01L } + }, + { + { 0xaa82174eb39974L, 0xbc38e6195e6aa0L, 0x6a3df8a25c0675L, + 0xf324203ffbe739L, 0xfa5a0b4a3f0649L, 0x79c87327a7a6b8L, + 0xeb65ecd40ad3f5L, 0x718d416e4e45c5L }, + { 0x029dbf4e2326fdL, 0x0c63416e7942f0L, 0x6d0c7286f4e678L, + 0x59f0b10a138601L, 0x8a1d9788d92ea9L, 0x9f8d712c22eca5L, + 0x73970447b6b96bL, 0xa2d49eee6fb955L } + }, + { + { 0x249f900bf14a19L, 0xd3522da63a8cd2L, 0x28a32f386964d2L, + 0xacf712bc1fa743L, 0x98a9bfc0bb94d3L, 0x318ece1bc06824L, + 0xfc476754fce7f0L, 0x19caec9e4135b7L }, + { 0x6de68a8c6817bbL, 0x7121960f3b6d89L, 0xa7d4261f5a818eL, + 0x0c0ba519157455L, 0x78b6acf450d5ffL, 0x198b4934e8649aL, + 0x0941a3cfd05da3L, 0x264ea4adb55951L } + }, + { + { 0xcfee91c46e5a31L, 0x47b6806fff7366L, 0xdb14be45df849dL, + 0x3c5e22bac66cc7L, 0x7f3f284a5f4769L, 0x4e00815383be36L, + 0x39a9f0b8072b0bL, 0x9887cd5c7eadd6L }, + { 0x7dd8f05b659511L, 0x15c796dd2e1cb9L, 0xe5edb0c0d31345L, + 0x2025df06939c60L, 0x6314c08bf15de1L, 0x03c154804c7fb5L, + 0x413337fbb5d3edL, 0xfc20b40477e983L } + }, + { + { 0x7f968805db0ef9L, 0x05562dee9c2a70L, 0x071e5bc7dae133L, + 0xa8cdd12237fc4aL, 0x6d565e74ea492bL, 0xa17cf94381ee52L, + 0x6ab8a4e9f5c546L, 0xbb642f340288efL }, + { 0x64e59215df5c2dL, 0x43696e3bb906f4L, 0x73a841a74ae46cL, + 0xe264883c506b8aL, 0x9542e1aa1be548L, 0x89385395e81b4aL, + 0x5642cfaeaca6ceL, 0xed8077b806e0f9L } + }, +}, +{ + { + { 0x1c776c47e13597L, 0x0ec8b289e584fdL, 0x0bb6043b8b61e8L, + 0xdcc17489cd835bL, 0x493e6ac39fef9aL, 0xb44eb34d133e17L, + 0xfebcd0071cb6f9L, 0xe6cf543d20eff2L }, + { 0xf265cad0a004c7L, 0x9b06c9dd35cc12L, 0x769f985cb4ea53L, + 0x29160a20993434L, 0xdf8dd108d939c4L, 0xefa177c6711e2fL, + 0x1695790cd7a2cdL, 0x38da3d777f6642L } + }, + { + { 0x9bfcfd96307b74L, 0xc26a36dbfdabc3L, 0x9341be04abe28eL, + 0xdb20b5273d1387L, 0xf8d229c3d1949cL, 0xf1e0afeb8b3a41L, + 0x29c60dfed565d0L, 0x6930bb58b43b2cL }, + { 0x1d76527fc0718fL, 0xdb981431f67189L, 0x0c62f6451f32ccL, + 0x70a66268bd35e5L, 0x1725641c1cece7L, 0x7f130a8f96f4a4L, + 0x72319e9f06ee98L, 0x215b73867bf9b2L } + }, + { + { 0x8d1bec20aaddd7L, 0xfb8b95bb8be4f9L, 0xeac193efde1026L, + 0xa5edea79d5860cL, 0x4adbaea44280d3L, 0xce8b67038f4798L, + 0x914c107ec30deaL, 0xbdc5cf7000776bL }, + { 0xb6fd7d1a206a13L, 0x9941ebadae986eL, 0x76c27a81f1caaaL, + 0x6967c123f108b4L, 0x6f115284aea2d0L, 0x9bb4319144ddacL, + 0x1a4d3eac8ec6fcL, 0xfe4b0b8bf37420L } + }, + { + { 0x5d9a4a1ec0ac6fL, 0x84b79f2fc7c80dL, 0x64222f7c14fac3L, + 0xdd9e039c23b3f2L, 0x4a84abdea956bbL, 0x370dcbaebe09dcL, + 0x79a9ea8e0eaf82L, 0x4cfb60aaee375fL }, + { 0x6a10dbf9106827L, 0xa3ba5cf43f305bL, 0x481b885c1bb083L, + 0x2f52380b3117b1L, 0x0066122ddd6791L, 0x4f8923e63bace3L, + 0x5c5f499ecb88d4L, 0xfdc780a3bac146L } + }, + { + { 0x34b70ae7ba1f71L, 0x909182945bd184L, 0x3b39778e707313L, + 0xdeefc5e6164e91L, 0xbb55bed4971f39L, 0x7d523398dafc8bL, + 0x82391bfa6adf0fL, 0xfd6f90ae319522L }, + { 0x60fdf77f29bbc9L, 0xeff9ed8aaa4030L, 0x978e045f8c0d3fL, + 0xe0502c3eed65cdL, 0x3104d8f3cfd4c8L, 0xab1be44a639005L, + 0xe83f4319eeab3fL, 0x01970e8451d797L } + }, + { + { 0xbc972f83180f4bL, 0xac053c0617779dL, 0x89392c57fa149fL, + 0xdc4699bbcb6263L, 0x0ae8b28ce12882L, 0xdca19a7af1a4dcL, + 0xd3d719f64e1a74L, 0xbb50201affdd5dL }, + { 0x56f73107ac30e9L, 0x65cc9c71878900L, 0x83f586627338a3L, + 0x122adefac5bb13L, 0x97de2001bcd4d5L, 0x6ed3985b8aa3a0L, + 0x8680f1d6821f9bL, 0xcb42028dda9f98L } + }, + { + { 0xcdb07080ec2db3L, 0xe28c8333dad1a1L, 0x2093e32de2da07L, + 0x731707383b8987L, 0xad17871f552b8dL, 0x846da9851cf70aL, + 0xf94a16e5c4f5e1L, 0x84299960f8348aL }, + { 0x4bf3f6898db78aL, 0xad77fa83d19b52L, 0x69767728b972dcL, + 0x7dfa35a5321be0L, 0x9881846dd344a6L, 0xe550292ad4e2a8L, + 0x8075217bc68bf1L, 0xdd837c4893be15L } + }, + { + { 0x09c931ed4fab5bL, 0xb2dcf08b77a0f1L, 0x7dac5c0e0d38a6L, + 0xa5570b00ae73afL, 0xc7c19d3f5aed28L, 0x575fa6f5251e92L, + 0xb843cd6cdf7275L, 0xd9d3d8e9a01287L }, + { 0xf94e356b3c370bL, 0xc62b99ffe464b0L, 0x7792650a986057L, + 0xeaa67d5c4b1874L, 0xba1ba4d0b07078L, 0xdbf636d7a03699L, + 0x1a16c34edd32a3L, 0x6ce2495a45cb5dL } + }, +}, +{ + { + { 0xd7c4d9aa684441L, 0xce62af630cd42aL, 0xcd2669b43014c4L, + 0xce7e7116f65b24L, 0x1847ce9576fa19L, 0x82585ac9dd8ca6L, + 0x3009096b42e1dbL, 0x2b2c83e384ab8bL }, + { 0xe171ffcb4e9a6eL, 0x9de42187374b40L, 0x5701f9fdb1d616L, + 0x211e122a3e8cbcL, 0x04e8c1a1e400bfL, 0x02974700f37159L, + 0x41775d13df8c28L, 0xcfaad4a61ac2dbL } + }, + { + { 0x6341b4d7dc0f49L, 0xaff6c2df471a53L, 0x20ec795fb8e91eL, + 0x4c7a4dfc3b7b62L, 0x9f33ff2d374938L, 0x38f8c653a60f2eL, + 0xc1168ac2efef73L, 0x046146fce408eeL }, + { 0x9b39ac0308b0c3L, 0xe032d6136b8570L, 0xee07d8dfc4aacfL, + 0x0a82acbd5a41ddL, 0xbe0ded27c3d726L, 0xce51d60b926ce9L, + 0xfa2f7f45806c1eL, 0xe367c6d1dec59cL } + }, + { + { 0x64511b6da2547bL, 0x76a349c0761405L, 0x37d662601223abL, + 0x0e243c1f4d7c48L, 0xdc9c8b4da756a0L, 0xc7430dfd72e7e9L, + 0x0eb130827b4210L, 0x7a9c044cf11cbdL }, + { 0x2c08ff6e8dd150L, 0x18b738c2932fc6L, 0x07d565104513e8L, + 0x0ca5cffaa40a17L, 0xd48634101baa8fL, 0xfb20fafb72b79eL, + 0x1a051e5654020fL, 0xe3b33174e17f23L } + }, + { + { 0x05910484de9428L, 0x620542a5abdf97L, 0xaa0ededa16a4d1L, + 0xa93f71c6d65bb9L, 0x88be135b8dfaf9L, 0x1d9f4e557ca8eeL, + 0x4c896aa26781adL, 0xd3fbe316c6c49fL }, + { 0x088d8522c34c3dL, 0xbb6d645badff1eL, 0xe3080b8385450dL, + 0x5ccc54c50ab1f3L, 0x4e07e6eac0657dL, 0xa7ba596b7ef2c0L, + 0xcceca8a73a81e9L, 0xa0b804c8284c35L } + }, + { + { 0x7c55956f17a6a2L, 0xb451d81789cfa8L, 0xdf414e82506eaaL, + 0x6ef40fbae96562L, 0x63ea2830e0297eL, 0xf5df26e73c46faL, + 0xe00641caac8bceL, 0xc89ed8f64371f3L }, + { 0xd22b08e793202eL, 0x39a9033875cb50L, 0xe64eec0f85ddb4L, + 0xdce45a77acf7b5L, 0x39d1e71b9b802dL, 0xafdfe7cbd559acL, + 0x17ec1f8809eeb5L, 0x8c0e38a4889b8cL } + }, + { + { 0x47eabfe17089daL, 0x2d18466ec90c50L, 0xa511aa45861531L, + 0xebb3d348c39b39L, 0xa0ac4daf1b5282L, 0xea26be7a9dadbaL, + 0x8992ba8554d86eL, 0x7fcbdb6d5f2ef5L }, + { 0x320e79b56863e7L, 0xeb9d0c0a7dce2dL, 0xb9f4031784cbc6L, + 0x68823ee7ac1f81L, 0xa6b6f4f9d87497L, 0x83c67b657f9b6eL, + 0x37357470fef2a7L, 0xf38028f59596e2L } + }, + { + { 0x9ea57ab7e82886L, 0x18221c548c44d5L, 0xbf8e6cf314a24fL, + 0x70ff18efd025e5L, 0x08d03de5334468L, 0x2b206d57404fb7L, + 0xb92327155e36b0L, 0xcc7604ab88ddd9L }, + { 0x3df51524a746f0L, 0x8fdebd8168e3fcL, 0xffc550c7f8c32cL, + 0x1dbbc17148743eL, 0xd48af29b88e18bL, 0x8dca11c750027cL, + 0x717f9db1832be3L, 0x22923e02b06019L } + }, + { + { 0xd4e06f5c1cc4d3L, 0x0fa32e32b4f03aL, 0x956b9afc4628d0L, + 0x95c39ce939dad1L, 0x39d41e08a00416L, 0xfd7ff266fb01aaL, + 0xc6033d545af340L, 0x2f655428e36584L }, + { 0x14cfb1f8dff960L, 0x7236ffcda81474L, 0xc6a6788d452d0fL, + 0x2ad4a5277f6094L, 0x369d65a07eea74L, 0x27c6c38d6229aaL, + 0xe590e098863976L, 0x361ca6eb38b142L } + }, +}, +{ + { + { 0x6803413dfeb7efL, 0xb669d71d3f4fadL, 0x5df402ac941606L, + 0xe5d17768e6c5b7L, 0x131bcb392ab236L, 0x7f1fb31ce2e0e0L, + 0xa2c020d9e98c35L, 0x33b23c0f28657bL }, + { 0xed14e739cf7879L, 0x10d4867b4357b3L, 0x127cea331e4e04L, + 0xc60d25faa5f8a7L, 0xfef840a025b987L, 0x78081d666f2a0aL, + 0x0fa0b97ac36198L, 0xe0bb919134dc9fL } + }, + { + { 0xc1d2461cc32eaeL, 0x0fdbfdf0f79a37L, 0x70f2bc21c95f02L, + 0x7d68bec372cddfL, 0x44f78178439342L, 0xa3d56784843a6cL, + 0xbadf77a07f8959L, 0xf45819873db4caL }, + { 0xe8eaaf3d54f805L, 0x2f529d1b84c1e7L, 0x404e32e21e535cL, + 0xabac85c159b5f5L, 0x4e8e594b00466fL, 0x40fcaabc941873L, + 0x3b4e370be407c6L, 0xccd57885b2e58dL } + }, + { + { 0x3ee615e88b74a8L, 0xd7d6608eab4e69L, 0x27cf9f1e4ace36L, + 0x282359e7aebabbL, 0x96e509bf6d162fL, 0xad906f3f1a290aL, + 0xe7d6c4f1314a58L, 0xeecffe4218431dL }, + { 0xa66e0e9e2cfed9L, 0xb0887ec71f0544L, 0xd34e36ba04c5d7L, + 0x094daa5ed4392dL, 0xcda83adc8aa925L, 0x1adef91b979786L, + 0x3124dcbfddc5d6L, 0x5cc27ed0b70c14L } + }, + { + { 0x386dbc00eac2d8L, 0xa716ecbc50ca30L, 0x9e3fc0580d9f04L, + 0x37dde44cfeacebL, 0xd88d74da3522d5L, 0x6bb9e9f2cf239aL, + 0x9e7fb49a7cbfecL, 0xe1a75f00a5c0efL }, + { 0x6e434e7fb9229dL, 0x0ec6df5c8a79b3L, 0x7046380d3fb311L, + 0xe957ef052e20faL, 0x0f4fe9a9ef4614L, 0x1b37d9c54d8f2bL, + 0x23b2dc139d84a2L, 0xf62c4f6724e713L } + }, + { + { 0xbd6922c747e219L, 0x34d14383869b7bL, 0x8c875a596f2272L, + 0xd9602c03fe361eL, 0x081348f744839fL, 0x61bd16c61ac1f1L, + 0x993b727d8da4e1L, 0xbb40ba87741271L }, + { 0xe6dcc9881dcfffL, 0x9f513f593ce616L, 0xdc09683618cd8fL, + 0xc3b1d1026639beL, 0xe8f149fc762ee2L, 0x59f26efb244aaeL, + 0x3f2de27693dd96L, 0xd8b68f79c3a7deL } + }, + { + { 0x6fa20b9970bd5bL, 0x87242d775f6179L, 0xa95a6c672d9308L, + 0x6eb251837a8a58L, 0xfdea12ac59562cL, 0x4419c1e20f1fc3L, + 0x0c1bd999d66788L, 0x4b7428832c0547L }, + { 0x4f38accdf479abL, 0x01f6271c52a942L, 0xe3298f402ca9a7L, + 0x533dacab718fc8L, 0x133602ab093ca8L, 0xc04da808f98104L, + 0xd0f2e23af08620L, 0x882c817178b164L } + }, + { + { 0x28e6678ec30a71L, 0xe646879f78aca1L, 0x868a64b88fa078L, + 0x671030afee3433L, 0xb2a06bb87c0211L, 0x202eca946c406aL, + 0x64d6284e4f0f59L, 0x56ae4a23c9f907L }, + { 0x5abbb561dcc100L, 0x6fef6cf07c7784L, 0xb6e25cddb7302dL, + 0xa26785b42980e8L, 0xe7d4043fb96801L, 0x46df55d8e4282bL, + 0x9c0a5f5c602d6eL, 0xf06560475dfe29L } + }, + { + { 0x0e82a1a3dcbc90L, 0xb1ee285656feacL, 0xfa4353b0d3d3b2L, + 0xc2e7a6edd5c5dfL, 0x13707e1416ce53L, 0xc84ce0787ebc07L, + 0xdd273ce8a9a834L, 0x432a6175e8e1e7L }, + { 0xa359670bd0064aL, 0xc899dd56534516L, 0x666560edb27169L, + 0x1537b22a19a068L, 0x3420507eac7527L, 0x479f25e6fc13a7L, + 0xc847acc1bc19b3L, 0xecdecf00b20d45L } + }, +}, +{ + { + { 0x6f241004acea57L, 0xdace1c6da68597L, 0xea7dd4150ce77fL, + 0x1aecb841585884L, 0x92ff208ea4a85cL, 0xde9433c88eebd2L, + 0x53cd3183f4d289L, 0x397085826539afL }, + { 0x4b57599b827d87L, 0xdc82ac03d77638L, 0x694336652f6e61L, + 0xb8fc4b0ad5e8a6L, 0x1b6f7dcf388642L, 0x6f24533a74dd57L, + 0xc66937841750cfL, 0x06757eb28a37afL } + }, + { + { 0x0e70d53c133995L, 0x88a5e0c7c8c97dL, 0x4e59dbf85f3be3L, + 0x0f364ac0e92698L, 0x3a1e79bef6940fL, 0xc8a3941d85d23aL, + 0x143bb999a00e58L, 0x61cf7d6c6f2f10L }, + { 0x979c99485150feL, 0xcfd0df259d773fL, 0xce97b9daab7bcdL, + 0xc9fff8e6afd8fcL, 0x246befd89a4628L, 0xf6302821567090L, + 0x15393426749c58L, 0xff47d0ea0f3fd3L } + }, + { + { 0x09b0bfd35f6706L, 0x74645812c82e69L, 0xb60729f50d5fe9L, + 0xf13324595c74f1L, 0x33647e3bb76c89L, 0x01264045a9afccL, + 0x46d57ee0f154abL, 0x2efa55525680a4L }, + { 0x12ebfc65329d90L, 0xcb37ae579800afL, 0x5bb53496f8e310L, + 0x9b59c63f1bb936L, 0x5b49baaf4610e9L, 0x2bbeeef4f2d6acL, + 0x87ee21e0badc67L, 0x12e2aadf1ddfa0L } + }, + { + { 0x5b4668fa9109eeL, 0xfa951338a6cea2L, 0xe45e6fc4068e16L, + 0x8ae9a0c0205ed8L, 0x2993b96679b79bL, 0xc6b878fed604d3L, + 0x01d020832c77f3L, 0xd45d890495a1abL }, + { 0x99348fa29d2030L, 0x961f9a661f8f7aL, 0xfd53212674f74bL, + 0x45cee23b3e72bcL, 0x3fccb86b77e2d5L, 0xdff03104219cb7L, + 0x233771dc056871L, 0x1214e327d2c521L } + }, + { + { 0x9f51e15ff2a8e1L, 0x86571c5138bc70L, 0xbfc4caf0c09d46L, + 0x65e33fec2a0c18L, 0x8214392426867dL, 0x51ce6c080ae4edL, + 0x6cbe8d7b110de6L, 0x7f6e947fd22ea4L }, + { 0x7373a75cadefc4L, 0x6fca1d2b0c682fL, 0xcd2140df3c7c1eL, + 0x8653a37558b7a5L, 0x653e74e55eb321L, 0xbe0c6b3c31af73L, + 0x3376379f4fc365L, 0x3570b3771add4dL } + }, + { + { 0x9061ec183c3494L, 0xaf2f28d677bc95L, 0x6fe72793bf8768L, + 0xc5f50e30fa86d8L, 0x6c03060a3293ceL, 0x4d53357e2355a6L, + 0x43a59eae4df931L, 0x6f48f5d13b79c6L }, + { 0xa4d073dddc5192L, 0x6d0e318a65773fL, 0x1008792765de9eL, + 0xa724ed239a0375L, 0x510ff1497d7c9eL, 0x251f6225baa863L, + 0x86464fe648a351L, 0xf85e98fd50fd91L } + }, + { + { 0x29c963486ee987L, 0x93e8e5210dcc9fL, 0xa1fc4d1c910b1fL, + 0x015acacfeb603eL, 0xc9f25f80844a5fL, 0x50de93c73f4dacL, + 0x1758783310a4aaL, 0x544d570358f106L }, + { 0x4eeec7b1dc68caL, 0x6238e6fe00fbcbL, 0x34d394cb4e83c9L, + 0x764ffa22292656L, 0x5614cd1f641f2eL, 0x4252eb69e07234L, + 0xcbaef4568d2ba4L, 0x8c9c5508a98b17L } + }, + { + { 0xf235d9d4106140L, 0x1bf2fc39eb601eL, 0x6fb6ca9375e0c3L, + 0x4bf5492c0024d2L, 0x3d97093eb54cc6L, 0xc60931f5c90cb5L, + 0xfa88808fbe0f1aL, 0xc22b83dd33e7d4L }, + { 0x9cfec53c0abbf5L, 0x52c3f0a93723dfL, 0x0622b7e39b96b6L, + 0x300de281667270L, 0x50b66c79ef426aL, 0x8849189c6eb295L, + 0xeaec3a98914a7eL, 0x7ed56b0c4c99e0L } + }, +}, +{ + { + { 0x7926403687e557L, 0xa3498165310017L, 0x1b06e91d43a8fdL, + 0xf201db46ac23cbL, 0x6f172ad4f48750L, 0x5ed8c8ce74bd3eL, + 0x492a654daba648L, 0x123010ba9b64ffL }, + { 0xa83125b6e89f93L, 0x3a3b0b0398378aL, 0x9622e0b0aebe7cL, + 0xb9cbfdc49512a4L, 0x13edffd6aaf12aL, 0x555dff59f5eafdL, + 0x3cba6fe1212efaL, 0xd07b744d9bb0f8L } + }, + { + { 0x45732b09a48920L, 0xf3080fc13ff36dL, 0x9347395de8f950L, + 0x14d025a382b897L, 0x60c5a7404d72adL, 0x30be7e511a9c71L, + 0x43ffabd31ac33aL, 0x97b06f335cbb14L }, + { 0xe4ff5c57740de9L, 0x5fed090aacf81eL, 0x97196eee8b7c9dL, + 0x316dcd1045910bL, 0x7a2b2f55ad8c63L, 0x674fffdc5b03bbL, + 0xc1cd133e65953cL, 0x3c060520a83556L } + }, + { + { 0x797c3f6091c23dL, 0x2ea2de339c9c05L, 0x5d958b4a31f67cL, + 0xf97afe5d5f088cL, 0xbcfbd2a0b37243L, 0xc43ad3eeca630cL, + 0xb92a33742845e0L, 0x970bff7a9a0f16L }, + { 0x86355115970a79L, 0xcee332ef205928L, 0x2c58d70c04c208L, + 0xdbfe19a3f5e5bfL, 0x8f8f2c88e51c56L, 0xb61f58e8e2da75L, + 0x4046a19624d93fL, 0x7de64dbe1f9538L } + }, + { + { 0xd018e1cc2d850eL, 0x8cdb64363a723cL, 0x9a65abe90a42afL, + 0xfeece9616f20ccL, 0xc906800d5cff56L, 0x0acf23a3f0deedL, + 0x2143061728dd3aL, 0x66276e2b8ce34cL }, + { 0x23700dc73cc9c7L, 0xdb448515b1778bL, 0x330f41e4aab669L, + 0x2f5aabcf5282a4L, 0xff837a930f9e01L, 0x1a1eb2f901cc98L, + 0xd3f4ed9e69bd7fL, 0xa6b11418a72a7dL } + }, + { + { 0x34bde809ea3b43L, 0x5ddcb705ced6aeL, 0x8257f5b95a6cb8L, + 0xaac205dc77dcb8L, 0x77d740d035b397L, 0xca7847fcf7e0a6L, + 0x9404dd6085601bL, 0x0a5046c457e4f9L }, + { 0xcaee868bc11470L, 0xb118796005c5f6L, 0xcc04976ec79173L, + 0x7f51ba721f6827L, 0xa8e3f0c486ff7eL, 0x327163af87838cL, + 0xcf2883e6d039fdL, 0x6fb7ab6db8b0e2L } + }, + { + { 0x8ca5bac620d669L, 0xff707c8ed7caa9L, 0xdaefa2b927909bL, + 0x1d2f9557029da3L, 0x52a3ba46d131a0L, 0xe5a94fd3ab1041L, + 0x508917799bc0aeL, 0xf750354fa1bd16L }, + { 0xdd4e83a6cd31fdL, 0xd33505392fac84L, 0xf914cbc1691382L, + 0x669683fda6ade6L, 0x69446438878513L, 0x429d3cc4b1a72dL, + 0x655c46a61eec36L, 0x881eded4bc4970L } + }, + { + { 0x5b39d377ca647fL, 0x41533c1e917b34L, 0xea2aeb57daf734L, + 0xf1ef1eb1286560L, 0x582f2e008e0473L, 0x5913d7d5edc74aL, + 0x588c7ec3c1e754L, 0xbd6db057146fe1L }, + { 0x3b0d49e7634907L, 0x4c65ce4e43b9ccL, 0xb87e9582d92d5bL, + 0x05135727ab1519L, 0x03ec0848c3aed0L, 0x4d7aa21561a641L, + 0xe5f821199e92adL, 0x379b55f48a457cL } + }, + { + { 0x8317c34d6a8442L, 0xb0ab4a5ae499daL, 0xebcb16e720e8ebL, + 0xfd5c5639a96908L, 0xcab4d67ad23acfL, 0xa600a79bcdf748L, + 0x18a6340a2a6a51L, 0xf2f415c3aabd69L }, + { 0xdb38a4f747258aL, 0xb6ea5602e24415L, 0xfad1ea9f1f7655L, + 0x4e27eb5c957684L, 0xf8283e1b2e1cfcL, 0x8f83bd6aa6291cL, + 0x28d23b55619e84L, 0xb9f34e893770a4L } + }, +}, +{ + { + { 0x1bb84377515fb1L, 0xac73f2a7b860a6L, 0x78afdfa22b390fL, + 0x815502b66048aaL, 0xf513b9785bf620L, 0x2524e653fc5d7cL, + 0xa10adc0178c969L, 0xa1d53965391c8dL }, + { 0x09fccc5a8bcc45L, 0xa1f97d67710e1eL, 0xd694442897d0a1L, + 0x7030beb5f42400L, 0xdebe08c7127908L, 0x96b715c2187637L, + 0xc598250b528129L, 0x0f62f45a1ccb07L } + }, + { + { 0x8404941b765479L, 0xfdecff45837dc4L, 0x1796372adbd465L, + 0x5f84c793159806L, 0x6d2e46b6aaad34L, 0xd303b4a384b375L, + 0x440acd5b392002L, 0x4f2a4a7c475e87L }, + { 0x038e1da5606fc2L, 0x2d821c29c2f050L, 0xc074cb3f139db4L, + 0xde2fee74ec59beL, 0x5a819eea84ed59L, 0xd65c62c3e98711L, + 0x72eb440b9723c1L, 0xb92775401be611L } + }, + { + { 0x929fe64ab9e9fcL, 0x04379fd0bf1e85L, 0xb322093bc28ee3L, + 0x78ac4e2e4555e1L, 0xdb42b58abc5588L, 0x1c1b5e177c8b12L, + 0xf6d78dd40366c4L, 0xc21ff75bdae22eL }, + { 0x1e3d28ea211df2L, 0xc5a65a13617c0aL, 0x3fa02c058140d5L, + 0x155c346b62d10cL, 0xc9cf142e48268fL, 0xdc140831993bc3L, + 0x07c44d40ee69dcL, 0x61699505e2ac46L } + }, + { + { 0x44e4a51d0fb585L, 0x00846bef1f3ce8L, 0xedef39a8e2de1eL, + 0x430afe333b3934L, 0xac78b054337188L, 0x0f39de4c9a3f24L, + 0x039edddc9ae6a4L, 0xf4701578eacd51L }, + { 0x1e396949a2f31aL, 0xc8a40f4b19a8b1L, 0xdddd10c9d239d8L, + 0xf974245887e066L, 0xfdb51113ea28c6L, 0xb5af0fbe1122a9L, + 0xd30c89f36e0267L, 0x7b1c0f774f024cL } + }, + { + { 0x1ec995607a39bfL, 0x1c3ecf23a68d15L, 0xd8a5c4e4f59fe9L, + 0xacb2032271abc3L, 0xbc6bdf071ef239L, 0x660d7abb39b391L, + 0x2e73bb2b627a0eL, 0x3464d7e248fc7eL }, + { 0xaa492491666760L, 0xa257b6a8582659L, 0xf572cef5593089L, + 0x2f51bde73ca6bfL, 0x234b63f764cff5L, 0x29f48ead411a35L, + 0xd837840afe1db1L, 0x58ec0b1d9f4c4bL } + }, + { + { 0x8e1deba5e6f3dcL, 0xc636cf406a5ff7L, 0xe172b06c80ca0fL, + 0x56dc0985ffb90aL, 0x895c2189a05e83L, 0x6ddfaec7561ac2L, + 0xaa3574996283a0L, 0x6dfb2627e7cd43L }, + { 0x6576de52c8ca27L, 0x6a4a87249018ebL, 0x00c275c5c34342L, + 0xe34805ad2d90c4L, 0x651b161d8743c4L, 0xb3b9d9b7312bf3L, + 0x5d4b8e20bf7e00L, 0x8899bdf78d3d7eL } + }, + { + { 0x9644ad8faa9cd1L, 0x34c98bf6e0e58eL, 0x6022aad404c637L, + 0x2a11a737ac013bL, 0x5bdd1035540899L, 0x2e675721e022a4L, + 0xe32045db834c33L, 0x74a260c2f2d01cL }, + { 0x20d59e9c48841cL, 0x05045dde560359L, 0xeba779cac998acL, + 0x5bed10c00a6218L, 0x25d4f8e5327ef4L, 0xa2784744597794L, + 0xefd68ca831d11eL, 0x9ad370d934446aL } + }, + { + { 0x3089b3e73c92acL, 0x0ff3f27957a75cL, 0x843d3d9d676f50L, + 0xe547a19d496d43L, 0x68911c98e924a4L, 0xfab38f885b5522L, + 0x104881183e0ac5L, 0xcaccea9dc788c4L }, + { 0xfbe2e95e3c6aadL, 0xa7b3992b3a6cf1L, 0x5302ec587d78b1L, + 0xf589a0e1826100L, 0x2acdb978610632L, 0x1e4ea8f9232b26L, + 0xb21194e9c09a15L, 0xab13645849b909L } + }, +}, +{ + { + { 0x92e5d6df3a71c1L, 0x349ed29297d661L, 0xe58bd521713fc9L, + 0xad999a7b9ddfb5L, 0x271c30f3c28ce0L, 0xf6cd7dc2a9d460L, + 0xaf728e9207dec7L, 0x9c2a532fcb8bf0L }, + { 0xd70218468bf486L, 0x73b45be7ab8ea8L, 0xddfc6581795c93L, + 0x79416606bb8da2L, 0x658f19788e07a2L, 0xa9d5b0826d3d12L, + 0x4d7c95f9535b52L, 0xad55e25268ef8aL } + }, + { + { 0x94a9b0ba2bc326L, 0x485ecc5167e5f3L, 0x8340bc7c97fc74L, + 0x06f882b07aaa5cL, 0x4b57455849698aL, 0xd9281ebb36a0baL, + 0x8918c6c8b8108fL, 0xedd1eea5b50d1dL }, + { 0x94d737d2a25f50L, 0x0e5a8232446ad0L, 0x02a54357ced3e2L, + 0xb09a92a4af8cedL, 0x85fc498eeecef2L, 0x06a02b9e71e3d4L, + 0x00ad30784bb49aL, 0xf61585e64a5b4aL } + }, + { + { 0x915f6d8b86a4c9L, 0x944bc6ba861e1fL, 0x3091ca754465efL, + 0x11df859eb53a38L, 0xd44dde50144679L, 0x6c8da9a0994eddL, + 0xeebcebf91241efL, 0xc419354c2f6859L }, + { 0x1f4969349581b6L, 0x5712b10bb26cb4L, 0x8fcaa41b09fd59L, + 0xbd39aad72e22e3L, 0xf70e794b1199b0L, 0xdf63c0cc6f863dL, + 0xd58166fee9df4fL, 0xb9224eac45e70bL } + }, + { + { 0x80072face525f4L, 0x8597bd666a5502L, 0xf65e203dbc9725L, + 0xeccfbe3f2222a4L, 0x490aa422339834L, 0x134889162489e8L, + 0xaff3f80a735084L, 0x69d53d2f3f1bd6L }, + { 0xb123ffc813341aL, 0x359084c1173848L, 0x751425ed29b08dL, + 0x1edda523890ad4L, 0xb64974c607cf20L, 0xa8c8cb8b42ac7cL, + 0xd5cb305edd42e5L, 0xf3034dc44c090aL } + }, + { + { 0x428921dbb18e19L, 0x4cfd680fed2127L, 0x671144d92ac8c3L, + 0x2121901132c894L, 0x25d0e567604cd9L, 0xa372223afbc2a0L, + 0xcf98a5256c16f7L, 0x71f129ab5459e1L }, + { 0xf4afdc5b668b2eL, 0xc5d937a0c2d410L, 0xe2cc4af285d54aL, + 0x1c827778c53e18L, 0x270f2c369a92f6L, 0x799f9ac616327aL, + 0xce658d9d4246f2L, 0x0fb681ffb12e36L } + }, + { + { 0xc5ab11ee0690feL, 0x80261e33f74249L, 0x8eb4b4758c1cf2L, + 0x4895a80184ae9bL, 0x4a4bdb6d3e27ebL, 0xa7a1638bfd251cL, + 0x29ec144417a7e3L, 0xd0736093f1b960L }, + { 0xcb1ed8349c73d1L, 0x33fc84a8d1945aL, 0x9f668dbe965118L, + 0x3331743a82811fL, 0xf394dec28ba540L, 0x44ce601654a454L, + 0x240dbb63623645L, 0xf07e7f22e61048L } + }, + { + { 0x7c9f1763d45213L, 0x3eefa709c1f77fL, 0xde3c3c51b48350L, + 0x4a2bc649d481a7L, 0xfd4a58a7874f3dL, 0x96655d4037b302L, + 0x945252868bf5abL, 0x1b6d46a75177f6L }, + { 0x7de6763efb8d00L, 0xb2c1ba7a741b7bL, 0xcca6af47bae6edL, + 0xe4378ca5b68b3fL, 0xfb757deaf71948L, 0x7f07b5ebc6ac99L, + 0x752a56827d636dL, 0xc8b7d1d4b8a34fL } + }, + { + { 0x76cb78e325331bL, 0x41f41c9add2eedL, 0x03db2385c5f623L, + 0xbbc1d177102fa2L, 0x80f137a60182ecL, 0xfdd856955adf15L, + 0x4f53f5ee3373dcL, 0xec6faf021b669bL }, + { 0x7d4e9830b86081L, 0x10d3cd9f2d979cL, 0x0f48f5824a22c8L, + 0x86c540c02f99eeL, 0xf4c66545e6c5fcL, 0xaf0c588bc404c8L, + 0x2e6edbd423118aL, 0x86e32e90690eabL } + }, +}, +{ + { + { 0x1d12656dfbfa6fL, 0xa4980957646018L, 0x2f1071bc3597d0L, + 0x3df83f91dda80aL, 0x5853e28f3ae449L, 0xb853d319e19aadL, + 0x863f01ba0d8a46L, 0xa84fca62fef108L }, + { 0xbe4c0b7fb84de9L, 0x40a03dcc0727bfL, 0x781f841b18575cL, + 0x6a63045466cddbL, 0x6be758205dc7a2L, 0x420f87f07ae811L, + 0x28082423bf96c8L, 0x723998c51c6821L } + }, + { + { 0x38ab64181f5863L, 0xd82ecbd05ff9e1L, 0x339c94ea065856L, + 0x143054aa45156dL, 0xe6d64bf065628cL, 0xe530086a938589L, + 0x22d3a49385d79bL, 0x0b107900ab8245L }, + { 0xb0d80fbca387b5L, 0x698206e35551d7L, 0x199685da10bb73L, + 0xa8e5fa89107378L, 0x36e5724d99dbbfL, 0xd67f476d581b03L, + 0x7a15be788dd1e6L, 0x8dac8e4e5baa31L } + }, + { + { 0x4d5d88fe170ef8L, 0xb6ba5de1e9e600L, 0x4a89d41edeabc5L, + 0x737c66b8fac936L, 0x8d05b2365c3125L, 0x85a5cbcb61b68eL, + 0x8fea62620a6af9L, 0x85115ded8b50ecL }, + { 0x5430c8d6a6f30bL, 0x8bef9cf8474295L, 0x0648f5bbe77f38L, + 0xfe2b72f9e47bd7L, 0xad6c5da93106e2L, 0x4fa6f3dfa7a6c3L, + 0xdcd2ed8b396650L, 0x7de1cce1157ef9L } + }, + { + { 0x70a5f6c1f241d1L, 0x6c354d8798cd5cL, 0x23c78381a729fbL, + 0xcff8f15523cbdaL, 0x5683ff43493697L, 0xef7dbab7534f53L, + 0xd7bd08e2243d53L, 0x6f644cbf8072a9L }, + { 0xac960f9b22db63L, 0xa97f41723af04dL, 0x692b652d9798afL, + 0x0e35967fedb156L, 0x14b5e50dfe6ee8L, 0x7597edeb411070L, + 0x116f3ce442b3f9L, 0xe9b5ae81b2b6dbL } + }, + { + { 0xf4385ee2315930L, 0xc8d029827a8740L, 0x7907a8dd934a43L, + 0x20bc946c582191L, 0xa4acb3e6a405e7L, 0x8c1d6c843df2f5L, + 0x9df1593991f0b5L, 0xbb9df984d9be9dL }, + { 0x63620088e4b190L, 0xee1421eada3a88L, 0xb84f0ccf93b027L, + 0x7a5d6678e95091L, 0x3974462f3e3704L, 0xfa6fb5ec593e98L, + 0x44b6cf7a6477d2L, 0xe885b57b09a562L } + }, + { + { 0x6e339e909a0c02L, 0x57afff00e75f29L, 0x797d8d6fb7db03L, + 0xc6e11a3d25a236L, 0x643ce1c0107260L, 0xe644ec462eae1cL, + 0x821d5b83f5a3f5L, 0xa8ad453c0579d6L }, + { 0x6518ed417d43a4L, 0x46e76a53f87ccdL, 0xd6cbaabf9bef95L, + 0x25688324f7cbcfL, 0x367159a08476b4L, 0x1d1b401be6d324L, + 0x348cb98a605026L, 0x144f3fe43b6b1eL } + }, + { + { 0xbabbd787b1822cL, 0xd34ba7e2aa51f8L, 0x086f1cc41fbea4L, + 0x96f7eac746f3d9L, 0xad97f26281ecafL, 0x751a905a14ee2cL, + 0xb4e7fe90d7335fL, 0x0d97b8f4892ff0L }, + { 0xdb8a3155a5c40eL, 0x64e5de77ba567bL, 0x4f155f71eefe88L, + 0xe2297e9fb6fbf4L, 0xfe24bf96c16be5L, 0x2251847cdd83e2L, + 0x13ac2c85eda444L, 0x49d1b85283275fL } + }, + { + { 0xca08731423e08fL, 0x7046bb087d2f14L, 0x876f10c3bc846cL, + 0x2202b76358fbe3L, 0x0d4fc1c0e26ac6L, 0x1fc748bb986881L, + 0x609e61c8384a18L, 0x28a72d60d88e00L }, + { 0x1332a3178c6e2fL, 0x0367919b3526a4L, 0x53989e4698fe3eL, + 0x14b1145b16a99bL, 0xef9ec80ddbb75fL, 0x76256240e53955L, + 0x54e087a8744ae1L, 0xce50e8a672b875L } + }, +}, +{ + { + { 0x4c88b2ba29629cL, 0x946559c7b2642fL, 0x933d432f7ebe4cL, + 0x97109b663632c9L, 0x799b3fbe53184dL, 0xd4628710f069a6L, + 0x0c182a13a68351L, 0x974a8399a2437aL }, + { 0x29f19972a70278L, 0x01b98b6d9c424bL, 0xd85a60b08f4c37L, + 0xcc3523f2b1da15L, 0xf922115ddffb0fL, 0xee0fe4dde84ae2L, + 0x810440c55365beL, 0xd2f66391a457e8L } + }, + { + { 0x5e6879fe2ddd05L, 0x92a7545abdfc61L, 0x7dedd63a5cede8L, + 0x8a03b3f70df4bdL, 0xa5d1f6591f6cbbL, 0x372fde610f3fb2L, + 0x4537f9ea9dee05L, 0x7eb85bbdf7aa50L }, + { 0x963edf8e8c504dL, 0x53c8dcae7bdb6bL, 0xa246e4c6fedf2dL, + 0x75533400c55bdeL, 0x2aa748d0270a54L, 0xadb6cf005860ddL, + 0x8d314509b84763L, 0x626720deb405efL } + }, + { + { 0xa3709ae6601328L, 0x68e94fd2ac2478L, 0x38793439d5d247L, + 0xfa467af392c198L, 0x49e7b0d15df607L, 0x8c5812261792a8L, + 0x79f76581d3762fL, 0xaa38895244a39dL }, + { 0xef60af9c5cd0bcL, 0x2b0db53a33b3bbL, 0xe3e0b1f251015dL, + 0xc608afce64489eL, 0xe52b05703651aaL, 0x1dda8b91c6f7b9L, + 0x833f022ff41893L, 0x58eb0a0192818cL } + }, + { + { 0x6c1300cfc7b5a7L, 0x6d2ffe1a83ab33L, 0x7b3cd019c02eefL, + 0x6c64559ba60d55L, 0x2e9c16c19e2f73L, 0x11b24aedbe47b1L, + 0xc10a2ee1b8153bL, 0x35c0e081e02e1aL }, + { 0xa9f470c1dd6f16L, 0x4ea93b6f41a290L, 0xac240f825ee03fL, + 0x6cd88adb85aabdL, 0x378a64a1be2f8fL, 0xbf254da417bac1L, + 0x7e4e5a59231142L, 0x057aadc3b8c057L } + }, + { + { 0x607c77a80af479L, 0xd3e01ff5ccdf74L, 0x9680aaf101b4c7L, + 0xd2a7be12fc50a6L, 0x92a788db72d782L, 0x35daf2e4640b52L, + 0xc170d6939e601cL, 0x16e05f57b25c2fL }, + { 0x47a42a66fe37f8L, 0xeb74271beca298L, 0x401e11e179da16L, + 0xfb8da82aa53873L, 0xd657d635bb4783L, 0x6847758fcea0b1L, + 0x2f261fb0993154L, 0x868abe3592853aL } + }, + { + { 0x1a4c54335766abL, 0xa1c84d66f4e4eaL, 0x5d737a660ba199L, + 0x4a7b1e298b15a2L, 0x207877ffd967d3L, 0xcaec82dc262b4dL, + 0x0b278494f2a37dL, 0x34781416ac1711L }, + { 0x28e3df18fc6856L, 0xbec03f816d003fL, 0x2bd705bff39ebdL, + 0x1dcb53b2d776d3L, 0xabafa7d5c0e7ceL, 0x5b9c8c24a53332L, + 0xe9f90d99d90214L, 0x789747ec129690L } + }, + { + { 0x94d3c3954e2dfaL, 0x919f406afb2a8fL, 0x159ef0534e3927L, + 0xcdb4d14a165c37L, 0xa23e5e8288f337L, 0x95867c00f90242L, + 0x2528150e34e781L, 0x104e5016657b95L }, + { 0x695a6c9bcdda24L, 0x609b99523eb5faL, 0xcbce4f516a60f8L, + 0xec63f7df084a29L, 0x3075ada20c811fL, 0x129a1928c716a1L, + 0xd65f4d4cd4cd4aL, 0xe18fa9c62188beL } + }, + { + { 0x1672757bac60e3L, 0x525b3b9577144bL, 0x38fc997887055bL, + 0x7a7712631e4408L, 0x884f173cba2fcfL, 0x783cbdc5962ac0L, + 0x4f3ed0a22287dcL, 0x8a73e3450e20e6L }, + { 0xe7a1cd0d764583L, 0x8997d8d0d58ee6L, 0x0ea08e9aa13ed6L, + 0xed478d0cf363cbL, 0x068523d5b37bf4L, 0x8b5a9e8783f13cL, + 0xde47bbd87528a9L, 0xd6499cccaec313L } + }, +}, +{ + { + { 0x54781bbe09859dL, 0x89b6e067f5e648L, 0xb006dfe7075824L, + 0x17316600717f68L, 0x9c865540b4efe2L, 0xdbdb2575e30d8eL, + 0xa6a5db13b4d50fL, 0x3b5662cfa47bebL }, + { 0x9d4091f89d4a59L, 0x790517b550a7dcL, 0x19eae96c52965eL, + 0x1a7b3c5b5ed7a4L, 0x19e9ac6eb16541L, 0x5f6262fef66852L, + 0x1b83091c4cda27L, 0xa4adf6f3bf742bL } + }, + { + { 0x8cc2365a5100e7L, 0x3026f508592422L, 0xa4de79a3d714d0L, + 0xefa0d3f90fcb30L, 0x126d559474ada0L, 0xd68fa77c94350aL, + 0xfa80e570c7cb45L, 0xe042bb83985fbfL }, + { 0x51c80f1fe13dbaL, 0xeace234cf055d7L, 0x6b8197b73f95f7L, + 0x9ca5a89dcdbe89L, 0x2124d5fdfd9896L, 0x7c695569e7ca37L, + 0x58e806a8babb37L, 0x91b4cc7baf99ceL } + }, + { + { 0x874e253197e968L, 0x36277f53160668L, 0x0b65dda8b95dbeL, + 0x477a792f0872a1L, 0x03a7e3a314268dL, 0xa96c8420c805c7L, + 0xb941968b7bc4a8L, 0x79dce3075db390L }, + { 0x577d4ef6f4cc14L, 0x5b0d205b5d1107L, 0x64ff20f9f93624L, + 0x0b15e315034a2fL, 0x3a0f6bb8b6f35cL, 0x0399a84e0d0ec5L, + 0xd0e58230d5d521L, 0xdeb3da1cb1dd54L } + }, + { + { 0x24684ae182401aL, 0x0b79c1c21a706fL, 0xe1d81f8d8998afL, + 0xadf870f4bb069fL, 0xd57f85cf3dd7aaL, 0x62d8e06e4a40f8L, + 0x0c5228c8b55aa1L, 0xc34244aa9c0a1aL }, + { 0xb5c6cf968f544eL, 0xa560533de23ab7L, 0xaa5512047c690cL, + 0x20eda5b12aaaa6L, 0xea0a49a751a6a0L, 0x6d6cfff2baa272L, + 0x95b756ebf4c28aL, 0xd747074e6178a4L } + }, + { + { 0xa27b453221a94bL, 0xd56ad13e635f20L, 0x03574b08c95117L, + 0xf0ee953ed30b70L, 0xb48d733957796fL, 0xf5d958358c336bL, + 0x6170cd882db529L, 0xcd3ef00ec9d1eaL }, + { 0xd1bea0de4d105fL, 0xd2d670fad6a559L, 0x652d01252f9690L, + 0x5f51fb2c2529b0L, 0x5e88bf0e89df2aL, 0x9a90684cd686e4L, + 0xf519ccd882c7a1L, 0x933a0dfc2f4d37L } + }, + { + { 0x0720a9f3f66938L, 0x99356b6d8149dfL, 0xb89c419a3d7f61L, + 0xe6581344ba6e31L, 0xd130561ab936c8L, 0x0625f6c40dbef1L, + 0x7b2d6a2b6bb847L, 0x3ca8b2984d506bL }, + { 0x6bf729afb011b0L, 0x01c307833448c9L, 0x6ae95080837420L, + 0xf781a8da207fb8L, 0xcc54d5857562a9L, 0xc9b7364858c5abL, + 0xdfb5035359908fL, 0x8bf77fd9631138L } + }, + { + { 0xf523365c13fbb1L, 0x88532ea9993ed5L, 0x5318b025a73492L, + 0x94bff5ce5a8f3cL, 0x73f9e61306c2a0L, 0x00abbacf2668a3L, + 0x23ce332076237dL, 0xc867f1734c0f9bL }, + { 0x1e50995cfd2136L, 0x0026a6eb2b70f8L, 0x66cb1845077a7dL, + 0xc31b2b8a3b498eL, 0xc12035b260ec86L, 0x1cbee81e1b3df0L, + 0xfd7b8048d55a42L, 0x912a41cf47a8c8L } + }, + { + { 0xab9ffe79e157e3L, 0x9cfe46d44dc158L, 0x435551c8a4a3efL, + 0x638acc03b7e3a8L, 0x08a4ebd49954a7L, 0x295390c13194f7L, + 0x3a2b68b253892aL, 0xc1662c225d5b11L }, + { 0xcfba0723a5d2bbL, 0xffaf6d3cc327c9L, 0x6c6314bc67e254L, + 0x66616312f32208L, 0xf780f97bea72e1L, 0x495af40002122fL, + 0x3562f247578a99L, 0x5f479a377ce51eL } + }, +}, +{ + { + { 0x91a58841a82a12L, 0xa75417580f3a62L, 0x399009ff73417aL, + 0x2db1fb90a8c5cdL, 0x82c8912c046d51L, 0x0a3f5778f18274L, + 0x2ad0ede26ccae2L, 0x7d6bd8b8a4e9c2L }, + { 0xaa0d7974b3de44L, 0xf8658b996ac9bbL, 0x31e7be25f6c334L, + 0x23836ce4df12c9L, 0x029027b59eb5c9L, 0x2f225315b8649dL, + 0xa0fdf03d907162L, 0x101d9df9e80226L } + }, + { + { 0xf12037a9a90835L, 0xd2d0882f0222a7L, 0xeaf8d40c3814e2L, + 0xa986dc68b8146bL, 0x147a3318504653L, 0x734e0032feaf67L, + 0x6f27bbf602bec5L, 0xa1e21f16a688f3L }, + { 0x5a8eeab73c4ae5L, 0x4dbaddbe70b412L, 0x871cebacfd2af1L, + 0x18603827d7a286L, 0x024059db5bb401L, 0x2557c093c39b73L, + 0xfc5a7116681697L, 0xf881c0f891b57cL } + }, + { + { 0x3c443f18ea191aL, 0x76faa58d700ad0L, 0x6fe6cfabe7fcbfL, + 0xaefc5288990ef7L, 0x44e30fa80004ccL, 0xc744adc6d8ef85L, + 0xafcd931912df70L, 0xf62a9d1572a6d8L }, + { 0x47158a03219f27L, 0x76fb27ead73136L, 0x41bb2adcc2d614L, + 0x8858cb9de1ec21L, 0xab402c45f15866L, 0x6675d5bbc82bbfL, + 0x4ee9dd6f1b28d3L, 0x875884fe373c17L } + }, + { + { 0x17806dd2a67d36L, 0xaa23a8632c9ec1L, 0xd914126fc1ee55L, + 0xbf8f7bd653701bL, 0x9b0111aea71367L, 0x61fd4aba98e417L, + 0xeb45298561c5a5L, 0x2187b0ae7af394L }, + { 0x71f12db1616ddeL, 0x061760907da7b4L, 0x414d37602ddb04L, + 0x1100be7286fb58L, 0xd7cf88d6f0d95bL, 0x8539d23746d703L, + 0xdccc9d64e23d73L, 0xaeef1d2ec89680L } + }, + { + { 0x82ccf1a336508dL, 0xa128c1f5bad150L, 0x551d8c029a188dL, + 0xef13dd4771404fL, 0xdd67696c37b993L, 0x428c0e20dddad2L, + 0x222278d038c94cL, 0x1a24a51078e3f2L }, + { 0xd297fe6edb0db9L, 0x00988d28251a87L, 0xbb946f8bfaa0d7L, + 0x380f7b9df45ea0L, 0x8526415afccf5eL, 0x909bfbfe9ec7bcL, + 0x2ed7093124755cL, 0x436802889404e2L } + }, + { + { 0x21b9fa036d9ef1L, 0xfd64b7ce433526L, 0xd9d7eb76544849L, + 0x201620cd5b54b3L, 0x25fab3dbb61159L, 0x90d4eb0c53e0d3L, + 0xba098319e74772L, 0x8749658ec1681cL }, + { 0xa354349fec316bL, 0x639a9b1a743ea2L, 0x2e514ca37c50e6L, + 0x9f4a4fddbaf6c5L, 0x0df87ef6f511c9L, 0xadd4cef0c00d95L, + 0x401c0ebaa1433fL, 0x3c3a59ebb38af9L } + }, + { + { 0x8706245f0e7dcaL, 0xad238cd3fb29caL, 0x03304439b7d8f0L, + 0xfdcd6e6154f495L, 0xc67e24a7d4ad09L, 0x1b209e85438390L, + 0xf893b81b0c211eL, 0x1aa86f07e11e36L }, + { 0x2cca3ffedea8b1L, 0x7eedd073b306cdL, 0x78e37bc12ee222L, + 0x257870bbc42a1dL, 0x5fb2bb91fbd397L, 0x470247009d6c60L, + 0x11748a320bdc36L, 0x3ff24dc04280e8L } + }, + { + { 0x0eb1c679839b52L, 0x5bcca27acfbd32L, 0xb506c1674898e3L, + 0x37d662e2489e5eL, 0x8dc0731f694887L, 0x571149ef43f1dcL, + 0x6430a3766d63dcL, 0x0d2640eb50dd70L }, + { 0x2b561493b2675bL, 0x1b4806588c604fL, 0x55c86a8aafbabcL, + 0xa7b9447608aabaL, 0xa42f63504cad8cL, 0x0f72b1dcee7788L, + 0x1d68374755d99aL, 0xd7cdd8f5be2531L } + }, +}, +{ + { + { 0x67873bdbcdfee1L, 0xa5a0c0afcd0a3fL, 0x59389f93cfa3d4L, + 0x14e945ce1c865cL, 0x62d2f8e1d588ccL, 0xfd02f8a8e228b4L, + 0x208f791b42b649L, 0x0e0dff1ab397adL }, + { 0x30ac3d90bc6eb1L, 0xf14f16a5f313bbL, 0x70fa447e2a0ad2L, + 0x6e406855a0db84L, 0xd52282be32e1e7L, 0x315a02a15ca330L, + 0x9a57a70867c2feL, 0x55f07650054923L } + }, + { + { 0x2d729f6c0cf08fL, 0x6b80138ebaf57fL, 0x6285bcc0200c25L, + 0xee845192cd2ac7L, 0x28fce4d922778aL, 0x761325ccd1011cL, + 0xd01f2475100e47L, 0xc7a1665c60d8e1L }, + { 0x950966d7ceb064L, 0x0a88e8578420dbL, 0x44f2cfce096f29L, + 0x9d9325f640f1d2L, 0x6a4a81fd2426f1L, 0x3ed6b189c905acL, + 0xba3c0e2008854dL, 0x1df0bd6a0d321bL } + }, + { + { 0x0117ad63feb1e7L, 0xa058ba2f1ae02fL, 0x5eee5aa31b3f06L, + 0x540d9d4afacd4dL, 0x38992f41571d91L, 0xef2738ebf2c7deL, + 0x28bfcab92a798dL, 0x37c7c5d2286733L }, + { 0xb99936e6470df0L, 0x3d762d58af6a42L, 0xa8c357ac74eec5L, + 0x9917bebf13afbcL, 0x28f0941f2dc073L, 0x306abf36ce7df7L, + 0xa3c5f6fd6973c8L, 0x640209b3677632L } + }, + { + { 0xee872a2e23aef7L, 0xb497b6feb9b08eL, 0xfb94d973f33c63L, + 0x9ea1ff42b32315L, 0x537b49249a4166L, 0x89c7fe6ab4f8beL, + 0xf68007fdad8f0fL, 0xe56ef0b71b8474L }, + { 0x478b2e83f333f9L, 0x144e718b2607f5L, 0x13aa605a4c7ab5L, + 0xfc1fc991d0730dL, 0xe7a04375ab3ea1L, 0xc59986a306d8d3L, + 0x24f6111702a8b1L, 0x7741394e040ad2L } + }, + { + { 0x34c6a2560723a7L, 0x8aabd0df4ea691L, 0x9d676a55d7497fL, + 0x12c09577d91fa4L, 0x581c7a86479284L, 0xa54f3daf4fd449L, + 0x2f89f3c4ef44cfL, 0xfc266b5c9ec97cL }, + { 0xfcd3fbe88b142aL, 0x9f3109f4bd69c1L, 0x08839c0b5f5a6aL, + 0x63ca8502e68303L, 0x2f0628dbba0a74L, 0x743cccf5d56b54L, + 0xbd4b06613e09fdL, 0x7a8415bde2ba3eL } + }, + { + { 0x2234a3bc076ab2L, 0xd6953e54977a98L, 0xc12215831ebe2eL, + 0x632145fbad78e2L, 0xd7ba78aa5c4b08L, 0x6f4ea71998e32aL, + 0x25900d23485a63L, 0x97ac6286a5176fL }, + { 0x5df91181093f7bL, 0x2bf9829c844563L, 0x525d99d6272449L, + 0x4281cb5b5c8a18L, 0x35df2780544a08L, 0xf4c3d2dbaeb8f4L, + 0xc7ff3175230447L, 0x6b4d7645d2fbffL } + }, + { + { 0x4837f802b0c9cbL, 0xb65f8168ce8418L, 0xdf66ea99fc1428L, + 0x9788ee804ea7e8L, 0x9eae9008334e3cL, 0xbc91058d6ba1b6L, + 0x634aba1d7064b6L, 0x12d9bb3397b368L }, + { 0x0645c85c413aa8L, 0xb09dea6ac6b5e3L, 0x29a620d289a50bL, + 0x104db3bbbcceb1L, 0x42e479287b3309L, 0xdfc373eec97f01L, + 0xe953f94b93f84eL, 0x3274b7f052dfbfL } + }, + { + { 0x9d5670a1bd6fa9L, 0xec42fc9db6c4d4L, 0xaecd4ed1b42845L, + 0x4eed90e1b03549L, 0xeb3225cbbab1faL, 0x5345e1d28a2816L, + 0x3741cfa0b77d2aL, 0x712b19f7ea8caaL }, + { 0x42e6844661853eL, 0x4cf4126e4a6e5dL, 0x196a9cfc3649f6L, + 0x06621bcf21b6b1L, 0x887021c32e29eaL, 0x5703aeb8c5680fL, + 0x974be24660f6d7L, 0xaf09badc71864eL } + }, +}, +{ + { + { 0x3483535a81b6d3L, 0x19e7301ca037dcL, 0x748cab763ddfebL, + 0xe5d87f66f01a38L, 0xbba4a5c2795cd6L, 0x411c5d4615c36cL, + 0xff48efc706f412L, 0x205bafc4b519dfL }, + { 0xfcaa5be5227110L, 0x7832f463ad0af0L, 0x34ef2c42642b1bL, + 0x7bbef7b072f822L, 0x93cb0a8923a616L, 0x5df02366d91ba7L, + 0x5da94f142f7d21L, 0x3478298a14e891L } + }, + { + { 0xad79a0fc831d39L, 0x24d19484803c44L, 0x4f8a86486aeeb2L, + 0x0ca284b926f6b9L, 0x501829c1acd7cdL, 0x9f6038b3d12c52L, + 0x77223abf371ef5L, 0x2e0351613bf4deL }, + { 0x7a5a4f2b4468ccL, 0xdcea921470ae46L, 0xf23b7e811be696L, + 0xe59ad0d720d6fbL, 0x9eacac22983469L, 0x4dd4110c4397eeL, + 0x4ef85bdcbe2675L, 0xe4999f7aa7c74bL } + }, + { + { 0x031838c8ea1e98L, 0x539b38304d96a2L, 0x5fbdef0163956eL, + 0x6bd4d35ce3f52aL, 0xe538c2355e897fL, 0x6078d3a472dd3fL, + 0x590241eca9f452L, 0x2bc8495fd7fc07L }, + { 0x23d0c89ead4c8cL, 0x1ea55a9601c66eL, 0x41493c94f5b833L, + 0xc49a300aa5a978L, 0xc98bdc90c69594L, 0x4e44cedccbdc8cL, + 0xb0d4e916adccbfL, 0xd56e36b32c37aeL } + }, + { + { 0x052bd405b93152L, 0x688b1d44f1dbfaL, 0xe77ba1abe5cc5fL, + 0x11f8a38a6ac543L, 0x3355fd6e4bb988L, 0xdf29c5af8dffb4L, + 0x751f58981f20eeL, 0x22a0f74da9b7fbL }, + { 0xec8f2bc6397b49L, 0xff59fc93639201L, 0xb7f130aa048264L, + 0xe156a63afdc4ccL, 0x0fd7c34b13acafL, 0x87698d40cb4999L, + 0x6d6ecae7f26f24L, 0xae51fad0f296e2L } + }, + { + { 0xd0ad5ebdd0f58dL, 0x6ec6a2c5c67880L, 0xe1ce0349af1e0fL, + 0x08014853996d32L, 0x59af51e5e69d20L, 0x0ef743aaa48ecfL, + 0x8d3d2ea7dafcb0L, 0x4ac4fad89189b6L }, + { 0x92d91c2eae97f1L, 0xef5eca262b4662L, 0x440b213b38b10aL, + 0xec90187fc661daL, 0x85f3f25f64cf8dL, 0xcee53ca457ad1bL, + 0x8deed4bf517672L, 0x7706fb34761828L } + }, + { + { 0x1577d9117494feL, 0x52d29be2fd7239L, 0x9a0eef00186d37L, + 0x241d0f527fe108L, 0x42824bae6fb59fL, 0xb8d33df0d48c25L, + 0xfffdb0a47af4b0L, 0x534c601073b0b6L }, + { 0xe6df35951c033bL, 0x3e1002b86c0f94L, 0xa7cb55548fb9b6L, + 0x999818ba7bbff8L, 0xe4ba3d684d8bf2L, 0x53dbb326358f0aL, + 0xeebc1e2f2568e8L, 0xc6917ebb3e0f68L } + }, + { + { 0xbe1bbfc19f8d13L, 0xc3951b62d4795cL, 0x9371c49ed535a9L, + 0x77c389f68cebeaL, 0xfc1a947a141d0eL, 0x4b48d7ade44f8bL, + 0x3db1f058580a26L, 0xeed1466258b5fcL }, + { 0x5daa4a19854b21L, 0x5bfa46f1ab1eadL, 0xc152e3559957ebL, + 0xdc84277ea48adaL, 0x68709cffc169b5L, 0xde50ce3720e617L, + 0xe42f262dd9a832L, 0xddffd4d2d6ce29L } + }, + { + { 0xd5ba5578fa0a56L, 0x0d7d0f1fafaf4cL, 0x7666e4138b63edL, + 0x04e65135d87f02L, 0xdca8866c958f32L, 0xaa8486d3ce2686L, + 0xe3785caf1cbcd3L, 0x8a9b11403c8335L }, + { 0x5c1dca22e0ef60L, 0x775af5b7d3fb20L, 0xe690ffc2b373a8L, + 0x30fe15d28330e6L, 0x8a1022bdd0f393L, 0x6bd7364966a828L, + 0x8d4b154949208aL, 0xfb38c6bb9d9828L } + }, +}, +{ + { + { 0x6d197640340ac2L, 0x969f473ecab5ffL, 0xead46f7c458e42L, + 0x168646a1d00eedL, 0xf70c878e0ce0cfL, 0xa7291d38d8d15aL, + 0x92cf916fdd10ccL, 0x6d3613424f86d5L }, + { 0xba50d172d5c4b4L, 0xe0af5024626f15L, 0x76f3809d76098aL, + 0x433dc27d6caaa8L, 0x72dc67a70d97a7L, 0x935b360f5c7355L, + 0xdbaac93179bb31L, 0x76738487ed1a33L } + }, + { + { 0x8d1ca668f9fa0dL, 0x4ed95d8a02f2bfL, 0xd19fc79f630d7bL, + 0x0448ec4f46fa51L, 0xb371dd8623bf3fL, 0xe94fabcd650e94L, + 0x3af3fcacd90a70L, 0x0f720c403ce3b7L }, + { 0x590814cd636c3bL, 0xcf6928d4469945L, 0x5843aaf484a4c6L, + 0xb5a4c1af9b4722L, 0x25116b36cfb2f9L, 0xf248cf032c2640L, + 0x8cd059e27412a1L, 0x866d536862fc5dL } + }, + { + { 0x156e62f6de4a2eL, 0x0365af7aafcc78L, 0x65c861819e925eL, + 0x4db5c01f8b2191L, 0x1fd26d1ad564faL, 0x16bbc5319c8610L, + 0x0718eef815f262L, 0x8684f4727f83d1L }, + { 0xa30fd28b0f48dbL, 0x6fef5066ab8278L, 0xd164e771a652dfL, + 0x5a486f3c6ebc8cL, 0xb68b498dc3132bL, 0x264b6efd73323fL, + 0xc261eb669b2262L, 0xd17015f2a35748L } + }, + { + { 0x4241f657c4bb1dL, 0x5671702f5187c4L, 0x8a9449f3973753L, + 0x272f772cc0c0cdL, 0x1b7efee58e280cL, 0x7b323494b5ee9cL, + 0xf23af4731142a5L, 0x80c0e1dd62cc9eL }, + { 0xcbc05bf675ffe3L, 0x66215cf258ce3cL, 0xc5d223928c9110L, + 0x30e12a32a69bc2L, 0x5ef5e8076a9f48L, 0x77964ed2329d5fL, + 0xdf81ba58a72cf2L, 0x38ea70d6e1b365L } + }, + { + { 0x1b186802f75c80L, 0x0c153a0698665aL, 0x6f5a7fe522e8ddL, + 0x96738668ddfc27L, 0x7e421d50d3bdceL, 0x2d737cf25001b2L, + 0x568840f0e8490cL, 0xea2610be30c8daL }, + { 0xe7b1bc09561fd4L, 0xeda786c26decb0L, 0x22369906a76160L, + 0x371c71478a3da3L, 0x1db8fce2a2d9bfL, 0x59d7b843292f92L, + 0x8097af95a665f9L, 0x7cb4662542b7a9L } + }, + { + { 0xa5c53aec6b0c2fL, 0xc4b87327312d84L, 0xfc374cbc732736L, + 0xa8d78fe9310cc0L, 0xd980e8665d1752L, 0xa62692d6004727L, + 0x5d079280146220L, 0xbd1fedb860fea5L }, + { 0xcbc4f8ab35d111L, 0x5ba8cdf3e32f77L, 0xd5b71adb614b93L, + 0x7b3a2df2f8808dL, 0x09b89c26ef2721L, 0x55a505447c3030L, + 0x21044312986ae6L, 0x427a0112367d4cL } + }, + { + { 0xe9fe256c1942d8L, 0x9e7377d96e3546L, 0x43e734cb0c1744L, + 0x5f46821211fbcaL, 0x44f83dc32b6203L, 0x84513086ad1d96L, + 0x54dd5192fbb455L, 0xc2a18222f10089L }, + { 0x01055a21855bfaL, 0x9e6d7b477078b4L, 0x3f8df6d30cea0eL, + 0x81c215032973f7L, 0x17dd761c0b3d40L, 0x040424c50d0abeL, + 0x5599413783deabL, 0xde9271e8f3146fL } + }, + { + { 0x5edfd25af4a11dL, 0x3a3c5307846783L, 0xb20086873edd31L, + 0x74e00ecfe0eef8L, 0xba65d2f3dd78c7L, 0xab1364371999f1L, + 0xfa9be5dde9a7e8L, 0xeb146ce87a8609L }, + { 0x76afd6565353e9L, 0xfa7023dd51ba1cL, 0x7a09f2237ede4fL, + 0xca085760ba7a1bL, 0xd973882b99950aL, 0xe894266ea5057aL, + 0xd01c4217f55e49L, 0x69cfb9c5555679L } + }, +}, +{ + { + { 0x67867e7c5d631aL, 0x1de88c55bcf47bL, 0x8366d06afd1352L, + 0xd7dbdef6e20337L, 0xb0f9e2f1253ec7L, 0x1be984510ad240L, + 0x63ec533f4a6118L, 0xd5e4c5b96ce633L }, + { 0x1d0b6c34df4a25L, 0xef9486a5a1b554L, 0x2f0e59e47b6ef3L, + 0x4d8042f2ff84d7L, 0x3e74aa3da359c9L, 0x1baa16fd21c160L, + 0xb4cff210191cbaL, 0x50032d8ebc6472L } + }, + { + { 0xb6833e01fc1b13L, 0x8a8b7ba1a5ad8fL, 0xc0cafa2622b820L, + 0xc6663af738ed20L, 0xd8944868b18f97L, 0xcf0c1f9774fbe4L, + 0xeedd4355be814fL, 0xd81c02db57e543L }, + { 0x5e32afc310bad8L, 0x065bc819b813d1L, 0x8efc5fc3142795L, + 0x5006514732d59cL, 0x91e39df2b5a3ceL, 0x2ad4477faf4204L, + 0x1a96b184d9bd4fL, 0xc3fee95a4d9c07L } + }, + { + { 0xfac7df06b4ba61L, 0xa6ed551061aaefL, 0x35aa2d6133f609L, + 0x420cfba20ed13dL, 0x861c63eea03d0cL, 0x75f0c56f936d6eL, + 0xa25f68f3d9a3d5L, 0xba0b7fecd9f66eL }, + { 0x292e1354680772L, 0x6f6a2dba73f405L, 0xca6add924ea9e4L, + 0x81cfd61268daaaL, 0x7a4cb6ce6f147aL, 0x8ec3454bded8f5L, + 0xc8a893b11d61cbL, 0x2256ffc7656022L } + }, + { + { 0x6b33271575cb78L, 0x560d305adcd23eL, 0xeedbd3ad6d834bL, + 0x614a64a5a31e27L, 0xe40b47647ee0c8L, 0x8ef4ff68bd7c2cL, + 0xa5297fc0b77727L, 0x8759208baf88adL }, + { 0x86cfe64918df68L, 0x9d60a73cdd882eL, 0x546b642b953014L, + 0xbaceae38bbef55L, 0xdf58e43f1c3467L, 0x99a83fee9f9babL, + 0xcd52cbf57a4a8bL, 0xf744e968ae36ecL } + }, + { + { 0xb945869a607124L, 0x810dbe9440e6f6L, 0x9911e60738e381L, + 0x51df68c343b80bL, 0xe424336f7a3f39L, 0x2d32acb989015cL, + 0xa69b14931019e8L, 0x8a31a38ec12f93L }, + { 0x0d0d36997c916aL, 0xdc95f3b8885372L, 0xcf1a2613549040L, + 0x60f6f5eabe95a2L, 0xa909e9fe141325L, 0x7d598f2355c865L, + 0x70c6442931a9c9L, 0x2354a85b423850L } + }, + { + { 0x4cdd22497f9619L, 0x4776fffc22162eL, 0xee5ec330cd31c2L, + 0x7c04c10f209bb8L, 0x35bbfde579e211L, 0x0e3832515cdfc2L, + 0x657e6d3e26ffa7L, 0xc66a7c3c65c604L }, + { 0x322acd7b45e567L, 0x1589cf0296db9bL, 0x1fd0bd3ba1db73L, + 0xe8826109337a40L, 0xf505a50b3035c7L, 0x4d5af066ed08d7L, + 0xb3c376b5eda400L, 0x9c7b7001944748L } + }, + { + { 0xd76832570c3716L, 0xda62af0dd540e0L, 0x76b155d6580feaL, + 0x4f42acc32b5464L, 0x881bb603f5b72bL, 0x09c130ee68b9baL, + 0x37ede3b5c50342L, 0xce61a9cfd15e7dL }, + { 0xfff1d8572605d0L, 0x62ac2d3062abc2L, 0xa85e02efbe43ddL, + 0x859d2baa947020L, 0x2ebc8a9111c20bL, 0x7f590a7a656f66L, + 0x0e1384316b21a6L, 0x29b30c500c7db6L } + }, + { + { 0x61e55e2906b8deL, 0x6a97e96949974dL, 0x24b52b526eef67L, + 0x512f5361aa595aL, 0x81cc7b83c48fcbL, 0xa64af2328115adL, + 0x9edf6f93d44b8eL, 0x68d7f7c1fe22e3L }, + { 0x2b2116a520d151L, 0x66a0b7d6aa3efbL, 0x48ae70a9b0f791L, + 0xcf12174037db88L, 0x36868cd317d9f3L, 0xb57305922fc344L, + 0xbaa852646a5d23L, 0xad6569137fc10dL } + }, +}, +{ + { + { 0xcf8e5f512c78d5L, 0xeb94d98805cdbdL, 0xad1dcdf2ab50b5L, + 0xf33c136f33cd31L, 0x0d6226b10aeff5L, 0xf7ff493f2f8fc5L, + 0x7e520d4df57165L, 0x41fbae505271a7L }, + { 0x72c898776480baL, 0x260835925f4523L, 0xed36b8d49f5f01L, + 0x3bc1dcef3d49ebL, 0x30c1c1a4940322L, 0x78c1cda7e0f731L, + 0x51f2dc86d05a31L, 0x57b0aa807f3522L } + }, + { + { 0x7ab628e71f88bcL, 0xcf585f38018f21L, 0xdbbe3a413d64f6L, + 0x0f86df1ec493a5L, 0x8355e6c7725de9L, 0x3954ffee00fe1eL, + 0xbb8978f9924e32L, 0x1c192987812714L }, + { 0x7c4ce3eaabca8bL, 0xf861eb59bf7019L, 0x31a84fc682e541L, + 0x2307ca9acd1b92L, 0x6f8b6ce4bf2842L, 0xde252accb9f9a9L, + 0x7f0611d93c46d1L, 0x8e2bd80751dc98L } + }, + { + { 0xf2fd8fbe27d54bL, 0x2a1e37ec248071L, 0x2fcc888ab8f49aL, + 0x42c62a3c18a9e5L, 0xe30290870b2446L, 0x90277fac5ac55dL, + 0x8d97d56d6dde41L, 0xf4cf8a95db04feL }, + { 0x3e280f5d30d077L, 0x2c903073cb3293L, 0xe0be2ac24eb0ddL, + 0xa2d1a498bcb4f0L, 0x16db466cd0cd45L, 0x3b28aa79a80232L, + 0xdd7e52f17b008eL, 0x20685f2868e4daL } + }, + { + { 0x0a68c147c7a486L, 0xd8ef234c429633L, 0x470667bffe7506L, + 0x55a13c88828d51L, 0x5f327412e44befL, 0x537d92a5929f92L, + 0x0a01d5b31c5cd5L, 0xb77aa7867eb3d7L }, + { 0x36ec45f8b82e4dL, 0x6821da0b37b199L, 0x8af37aad7fa94eL, + 0xf0206421085010L, 0x9b886787e56851L, 0x35f394452948ceL, + 0x125c2baafc1361L, 0x8a57d0e453e332L } + }, + { + { 0xefe99488043664L, 0xb8b8509db1aa55L, 0x1a2e5a9332523fL, + 0x5e255dd1045c0fL, 0xe68dd8a7ae7180L, 0x55f1cf345bf532L, + 0xe00722ee63a716L, 0xd1c21386116bacL }, + { 0x626221f1c6d1f4L, 0x240b8303773278L, 0xe393a0d88def16L, + 0x229266eca0495cL, 0x7b5c6c9d3e4608L, 0xdc559cb7927190L, + 0x06afe42c7b3c57L, 0x8a2ad0bb439c9bL } + }, + { + { 0xd7360fbffc3e2fL, 0xf721317fbd2e95L, 0x8cacbab5748e69L, + 0x7c89f279054bb9L, 0xcbe50faaa86881L, 0x7aa05d375206e4L, + 0x1ea01bcc752c66L, 0x5968cde1f2c2bcL }, + { 0x487c55f09a853eL, 0x82cbef1e09204bL, 0xad5c492abd8670L, + 0x7175963f12dcb3L, 0x7a85762bf6aa06L, 0x02e5697f8d5237L, + 0xccf7d1937c6157L, 0x3b14ca6c2fd59cL } + }, + { + { 0x5e610d81b9f77fL, 0x85876d0051b02fL, 0x5d81c63b8020ddL, + 0xd0b4116d6ce614L, 0x91810e5aa8bf0cL, 0xf27f91fcbf8c66L, + 0x2e5dc5f38480aeL, 0x0a13ffebec7633L }, + { 0x61ff6492bf6af8L, 0xe6aef2d641f827L, 0xad5708a5de5f04L, + 0xe5c3a80cdfee20L, 0x88466e268fcfa2L, 0x8e5bb3ad6e1d7bL, + 0xa514f06ed236b8L, 0x51c9c7ba5f5274L } + }, + { + { 0xa19d228f9bc3d8L, 0xf89c3f03381069L, 0xfee890e5c3f379L, + 0x3d3ef3d32fb857L, 0x39988495b418ddL, 0x6786f73c46e89aL, + 0x79691a59e0f12fL, 0x76916bf3bc022bL }, + { 0xea073b62cd8a0aL, 0x1fbedd4102fdbcL, 0x1888b14cb9d015L, + 0x98f2cfd76655f7L, 0xb9b591059f0494L, 0xa3dbbe1e6986a3L, + 0xef016a5eaf2b04L, 0xf671ba7cd2d876L } + }, +}, +{ + { + { 0x1dae3bf1ae05e9L, 0x6a029961f21fefL, 0x95df2b97aec3c6L, + 0x9abbc5ad83189bL, 0xaf994af2d13140L, 0xc3f884686aa406L, + 0xcd77e5075284c5L, 0x1c1e13d2a9a4d7L }, + { 0x7f8815d744b89dL, 0xb1891332ba673eL, 0x55ea93cd594570L, + 0x19c8a18d61b041L, 0x938ebaa8d2c580L, 0x9b4344d05ba078L, + 0x622da438eaf9b7L, 0x809b8079fea368L } + }, + { + { 0x3780e51c33b7a2L, 0xd7a205c387b1c8L, 0x79515f84be60e4L, + 0xde02a8b1e18277L, 0x4645c96f0d9150L, 0x45f8acbe0b3fd1L, + 0x5d532ba9b53ac3L, 0x7984dcdb0557c9L }, + { 0x5ae5ca68a92f01L, 0xd2fbb3c9d569caL, 0x668cc570c297c1L, + 0xa4829436295e89L, 0xf646bc1a33ad40L, 0x066aaa4c3f425dL, + 0x23434cdd005de2L, 0x5aca9e9db35af4L } + }, + { + { 0x2bca35c6877c56L, 0xab864b4f0ddd7dL, 0x5f6aa74404f46cL, + 0x72be164539c279L, 0x1b1d73ee0283cfL, 0xe550f46ad583d9L, + 0x4ac6518e739ad1L, 0x6b6def78d42100L }, + { 0x4d36b8cfa8468dL, 0x2cb37735a3d7b8L, 0x577f86f5016281L, + 0xdb6fe5f9124733L, 0xacb6d2ae29e039L, 0x2ab8330580b8a1L, + 0x130a4ac643b2d0L, 0xa7996e35e6884eL } + }, + { + { 0x6fb627760a0aa8L, 0xe046843cbe04f0L, 0xc01d120e6ad443L, + 0xa42a05cabef2fcL, 0x6b793f112ff09cL, 0x5734ea8a3e5854L, + 0xe482b36775f0adL, 0x2f4f60df864a34L }, + { 0xf521c5884f2449L, 0x58734a99186a71L, 0x157f5d5ac5eaccL, + 0x858d9a4248ee61L, 0x0727e6d48149c3L, 0xd5c3eaaac9ec50L, + 0xa63a64a20ee9b5L, 0x3f0dfc487be9deL } + }, + { + { 0x836349db13e3f4L, 0xebdd0263e9316dL, 0x3fd61e8324fd6cL, + 0x85dddfa0964f41L, 0x06e72de52add1bL, 0xb752cff8c4a9e2L, + 0x53b0894fdf09f7L, 0xd5220ab0bc24fdL }, + { 0x8442b35fb1981aL, 0xa733a373edd701L, 0x42b60c3d0ef089L, + 0xa1b16ec46e7bcaL, 0xc0df179a09aaf4L, 0xcd4f187638f3a1L, + 0x9af64f79eab1c2L, 0x86fed79d1d78e3L } + }, + { + { 0x42c8d86fe29980L, 0x6657b816575660L, 0x82d52c680f92caL, + 0x8587af102d42beL, 0xb5151316e8bdf0L, 0x706e2d9c333495L, + 0xd53601a9673064L, 0x27b1fbb8219099L }, + { 0x3f0929d705f7c8L, 0xff40b10f3d6e6fL, 0x673c703026af5cL, + 0x2c1dce4e25a422L, 0x5348bd73dad8b6L, 0xc39b6b6be2c329L, + 0x47854ffb921084L, 0xb347b8bb391f20L } + }, + { + { 0x79fc841eb9b774L, 0xf32da25b4b6c1dL, 0xcbba76bfe492cbL, + 0x76c51fcd623903L, 0x114cf6fcf0705aL, 0x6b720497815dafL, + 0x630b362473382eL, 0xbf40c3a9704db5L }, + { 0xa8a9ddcc5456ebL, 0x2b4472a72f2dc1L, 0x9874444d6d6ef3L, + 0x27e8d85a0ba5edL, 0x5d225b4194849fL, 0xe852cd6ebaa40dL, + 0xb669c248d4bf3fL, 0xa8601eb2343991L } + }, + { + { 0x8a0485459502d3L, 0xcab27eee269a7bL, 0x41793074875adaL, + 0x179e685e2405f9L, 0x0d7b6987b28963L, 0x80c9db8422a43eL, + 0xf5ff318a0f43eeL, 0x7a928054ba7aa7L }, + { 0xa5c79fe0c0834eL, 0x837ca0d1f849ecL, 0xfe0d7fa628ab7bL, + 0x94bcb956edd19aL, 0xa18bc932226fbfL, 0x2795379aad54a3L, + 0xceeacf8371129eL, 0x65ca57fa588be5L } + }, +}, +{ + { + { 0x7a578b52caa330L, 0x7c21944d8ca34aL, 0x6c0fbbb6447282L, + 0xa8a9957f90b2e5L, 0xbbe10666586b71L, 0x716a90249138a2L, + 0x2fa6034e7ed66dL, 0x56f77ed2b9916aL }, + { 0x69f1e26bddefb3L, 0xa4978098c08420L, 0xc3377eb09bc184L, + 0x796ce0cbe6dadeL, 0x3be0625d103bbbL, 0x01be27c992685cL, + 0xc0e25597755f9fL, 0x165c40d1c0dbfaL } + }, + { + { 0xc63a397659c761L, 0x10a0e5b630fbadL, 0xf21e8a6655ac56L, + 0xe8580fac1181e2L, 0xbfc2d9c0a84b5cL, 0x2cdbaff7afd5d1L, + 0x95f1182f61e85aL, 0x1173e96719eaf4L }, + { 0xc06d55ec6de8b9L, 0x1b4c8ebafcbcaaL, 0x52af5cbbc2bbcdL, + 0x564fab877bcd10L, 0xfd53a18ae85a6eL, 0x225785994c712fL, + 0x29b11d71352121L, 0xab1cb76c40491aL } + }, + { + { 0xb4e8ca8ce32eb4L, 0x7e484acb250b49L, 0x062c6f7a3e31a2L, + 0x497fd83625d1fcL, 0x98f821c362dda7L, 0xcae1f8f6be3111L, + 0x9077e955d4fa42L, 0xa589971a65855aL }, + { 0xda6321d28832a9L, 0xf9ef5dc3936e9eL, 0xa37f117c9797efL, + 0x0eb3c80db581beL, 0x207c5c4baa0002L, 0xc0401b5f38faa0L, + 0xceee523d0f1e6eL, 0x8d27a5fd1f0045L } + }, + { + { 0x9411063cf0af29L, 0x304385789a6693L, 0x9a9fb8f640145eL, + 0x7d82fe954832ebL, 0xf2789e1898c520L, 0x448b402f948dc0L, + 0xeca8fdf68996ddL, 0x22227e9a149b2fL }, + { 0x63509ff8e62d6aL, 0xe98d81c8c9c57fL, 0xd3874071fe3bedL, + 0xf1db013539538fL, 0xb04092e48418ceL, 0xbbf8e76d6d9d4dL, + 0x2ea9cda2cec5aeL, 0x8414b3e5078fa9L } + }, + { + { 0x5ad1cdbd68a073L, 0xd4cedafc18b591L, 0x78267078e4c1c9L, + 0x9b8d9209ca302aL, 0x3101bd2326115bL, 0x6f154b54c2717aL, + 0x618c31b263e84bL, 0x12c4138bbd6942L }, + { 0xf9ead2580da426L, 0xe748e9947d9680L, 0x9b396a38a4210eL, + 0xfaf03ddf4b8f72L, 0xbd94a5266159e7L, 0x5e730491d4c7cbL, + 0x31d1f9a7910f38L, 0x4fd10ca08d6dd1L } + }, + { + { 0x4f510ac9f2331eL, 0xee872dc7e3dcc2L, 0x4a11a32a0a0c73L, + 0x27e5803aa5a630L, 0xe5ae5037af4a8aL, 0x2dcdeba9fffeb0L, + 0x8c27748719d91fL, 0xd3b5b62b9cc61cL }, + { 0x998ac90cca7939L, 0xc22b59864514e5L, 0x950aaa1b35738aL, + 0x4b208bbdab0264L, 0x6677931a557d2eL, 0x2c696d8f7c17d3L, + 0x1672d4a3e15c51L, 0x95fab663db0e82L } + }, + { + { 0x3d427346ff205eL, 0x7f187d90ea9fbeL, 0xbd9367f466b2afL, + 0x188e53203daf2fL, 0xefe132927b54d8L, 0x14faf85ef70435L, + 0xa5061281ec95c4L, 0xad01705c22cba7L }, + { 0x7d2dfa66197333L, 0xedd7f078b4f6edL, 0xe0cb68575df105L, + 0x47c9ddb80f76bcL, 0x49ab5319073c54L, 0x845255ae607f44L, + 0x0b4ed9fcc74b7cL, 0xcfb52d50f5c3a6L } + }, + { + { 0x545c7c6c278776L, 0x92a39ae98c30f0L, 0x8aa8c01d2f4680L, + 0xa5409ed6b7f840L, 0x0c450acdcb24e7L, 0x5da6fb2c5770d9L, + 0x5b8e8be8658333L, 0xb26bf4a67ea4adL }, + { 0x2e30c81c7d91faL, 0x6e50a490eeb69fL, 0x9458c2bee4bc26L, + 0x419acf233be250L, 0x79d6f8187881abL, 0x694565d403b1beL, + 0x34b3990234fe1dL, 0x60997d72132b38L } + }, +}, +{ + { + { 0x00a974126975dcL, 0x42161c46cf94e7L, 0xcc9fe4bc64ed99L, + 0x020019a4680570L, 0x885595a698da0dL, 0x008444b77dd962L, + 0xbf3c22da4fea0eL, 0xc4630482c81245L }, + { 0xcb248c5793ab18L, 0x4dc7a20eb4320bL, 0x9a0906f1572b7dL, + 0xd5b3019f9ac20fL, 0x79b1bf534520a3L, 0x788dfe869b5322L, + 0x9a05298455b7e2L, 0x2f4aecb016bca9L } + }, + { + { 0x414d3798745618L, 0x64ba22eb7c983cL, 0x9a5d19f9f9d532L, + 0x81a00d844a80c8L, 0xb9e24f5cae98d6L, 0x6c3769caca965aL, + 0x50d6081f6e4e6dL, 0x0d9698054422a6L }, + { 0xbd7e7925cdd790L, 0xcff65da6a35219L, 0x40dc3638b60ebeL, + 0x84bee7492a50dcL, 0x57d4be415ad65eL, 0xc54256b1a6d1d3L, + 0x141c64945717ccL, 0x05eb609cd1c736L } + }, + { + { 0xfd52eab1e3c7ecL, 0xa4a5eca9f24895L, 0xaaa2a8d79fdb83L, + 0xd105e6072bdfdaL, 0x59e6ae2681d97eL, 0xfedf8e08e8077fL, + 0xb06d0ad629e462L, 0x8c7c2d096fa863L }, + { 0x5eecc4cee8fc91L, 0x5e83ab29e61174L, 0x1fd8925b28c02dL, + 0x93be5382072864L, 0xda0c88624c984eL, 0xdcf9f0ca008286L, + 0x1ecb5a6a58ba75L, 0x1d9b890c2e3c83L } + }, + { + { 0x19e866eeeee062L, 0x31c1c7f4f7b387L, 0x9be60181c06652L, + 0xc00a93a2b68bbbL, 0x54c65d69d52b2bL, 0x4591416e8b744aL, + 0x641bcca9a64ab6L, 0xf22bcb1ab08098L }, + { 0x3c0db8ff1f726cL, 0x4f5739e9d2e6a6L, 0x5cb669b45c9530L, + 0x861b04e7b472d0L, 0x3e30515894da77L, 0x3344685c9ac39bL, + 0x9e1730573bdd29L, 0x9cac12c808dc85L } + }, + { + { 0xf152b865e27087L, 0x267bd8590a580eL, 0xba79cec8baafc1L, + 0x6140ab19442686L, 0xa67090c5b31693L, 0x50a103a28b4117L, + 0x7722e610ddc08fL, 0x5d19d43e6569b2L }, + { 0x70e0c525962bf6L, 0x808e316fb5fb02L, 0x3fb80da5b667beL, + 0x8aa366efcfacecL, 0xcb0b3e7134280eL, 0x0bf1de4cd7d944L, + 0x0cd23bed092df5L, 0xc9a6a79a153a0cL } + }, + { + { 0x1c69ad02d5a4b7L, 0x4bb28d0d9e6f4aL, 0x815308ca984fc6L, + 0x40929c79037ca5L, 0x0ea2b491bd0357L, 0xec17e5b42aad4eL, + 0x1f32ade18e7235L, 0xbc60b05a96a9d3L }, + { 0x3b0229ae20f707L, 0xd63505056bdfadL, 0xac2d922d8b2e1eL, + 0x92b2998235c748L, 0x6002c3ad766f97L, 0x99198001a2a862L, + 0x2af7567b58b684L, 0xd8fe707aaafce5L } + }, + { + { 0x54487ab5df7a4bL, 0x51cccdec57ccc2L, 0x23943277510b53L, + 0x3a09f02f555de3L, 0xa696aec1be484dL, 0x56f459f37817a2L, + 0x8d8f61c623dcb4L, 0xc52223c5335656L }, + { 0xf634111b49914aL, 0xbf8e1ab8e4f9bbL, 0x2f59578f4dba02L, + 0x2a94199e004319L, 0x87931f0654d005L, 0x7df57d96fa0814L, + 0xc8da316a154031L, 0x2a44ac041f658bL } + }, + { + { 0xfb5f4f89e34ac6L, 0x0a1b10b97790f2L, 0x58fe4e74b8a06cL, + 0x10c1710955f27cL, 0x77b798ad5ebe19L, 0xaf1c35b1f1c2dcL, + 0xc25b8e6a1f8d69L, 0x49cf751f76bf23L }, + { 0x15cb2db436f7b7L, 0x186d7c27e74d1aL, 0x60731dec00a415L, + 0xea1e15615f0772L, 0xf02d591714463fL, 0x26a0c6451adeb1L, + 0x20174cdcc5229eL, 0xb817e50efd512aL } + }, +}, +}; + +static const ge448_precomp base_i[16] = { + { + { 0x26a82bc70cc05eL, 0x80e18b00938e26L, 0xf72ab66511433bL, + 0xa3d3a46412ae1aL, 0x0f1767ea6de324L, 0x36da9e14657047L, + 0xed221d15a622bfL, 0x4f1970c66bed0dL }, + { 0x08795bf230fa14L, 0x132c4ed7c8ad98L, 0x1ce67c39c4fdbdL, + 0x05a0c2d73ad3ffL, 0xa3984087789c1eL, 0xc7624bea73736cL, + 0x248876203756c9L, 0x693f46716eb6bcL } + }, + { + { 0x28173286ff2f8fL, 0xb769465da85757L, 0xf7f6271fd6e862L, + 0x4a3fcfe8daa9cbL, 0xda82c7e2ba077aL, 0x943332241b8b8cL, + 0x6455bd64316cb6L, 0x0865886b9108afL }, + { 0x22ac13588ed6fcL, 0x9a68fed02dafb8L, 0x1bdb6767f0bffaL, + 0xec4e1d58bb3a33L, 0x56c3b9fce43c82L, 0xa6449a4a8d9523L, + 0xf706cbda7ad43aL, 0xe005a8dbd5125cL } + }, + { + { 0xa99d1092030034L, 0x2d8cefc6f950d0L, 0x7a920c3c96f07bL, + 0x958812808bc0d5L, 0x62ada756d761e8L, 0x0def80cbcf7285L, + 0x0e2ba7601eedb5L, 0x7a9f9335a48dcbL }, + { 0xb4731472f435ebL, 0x5512881f225443L, 0xee59d2b33c5840L, + 0xb698017127d7a4L, 0xb18fced86551f7L, 0x0ade260ca1823aL, + 0xd3b9109ce4fd58L, 0xadfd751a2517edL } + }, + { + { 0xdf9567ceb5eaf7L, 0x110a6b478ac7d7L, 0x2d335014706e0bL, + 0x0df9c7b0b5a209L, 0xba4223d568e684L, 0xd78af2d8c3719bL, + 0x77467b9a5291b6L, 0x079748e5c89befL }, + { 0xe20d3fadac377fL, 0x34e866972b5c09L, 0xd8687a3c40bbb7L, + 0x7b3946fd2f84c9L, 0xd00e40ca78f50eL, 0xb87594417e7179L, + 0x9c7373bcb23583L, 0x7ddeda3c90fd69L } + }, + { + { 0x3d0def76ab686bL, 0x1a467ec49f7c79L, 0x3e53f4fc8989edL, + 0x101e344430a0d9L, 0xa3ae7318ad44eeL, 0xaefa6cdae1d134L, + 0xaa8cd7d824ad4dL, 0xef1650ced584fcL }, + { 0xa74df674f4754fL, 0xf52cea8ef3fb8bL, 0x47c32d42971140L, + 0x391c15da256fbbL, 0xc165faba605671L, 0xf2518c687993b9L, + 0x2daf7acbd5a84dL, 0x1560b6298f12aeL } + }, + { + { 0xef4da0254dc10aL, 0x63118655940db8L, 0xe20b14982f2948L, + 0x67b93775581dbaL, 0x422ee7104f5029L, 0x5d440db5122d34L, + 0xb1e56d71a4c640L, 0xbf12abbc2408eeL }, + { 0x0cc9f86016af01L, 0x88366abf3d8cabL, 0x85dda13a2efe12L, + 0x390df605d00674L, 0xf18f5806d187f7L, 0x28c900ff0c5d20L, + 0xad308123e01733L, 0x42d35b554bf2fdL } + }, + { + { 0x009135f2ffb1f1L, 0x099fc7e8f9c605L, 0xcc67da626bfa5aL, + 0xc186d12344552bL, 0xb5232501b339e1L, 0x70a544fc9708c5L, + 0x06baaec1e928e7L, 0x0baedd2ef0f50fL }, + { 0x535d6d8bf479e5L, 0x156e536e4ec3e9L, 0x3165741ddb9be2L, + 0x988af7159fd736L, 0x13d8a782e33dddL, 0x54604214e69002L, + 0x34d56e0804a268L, 0xc59b84f0e52a4cL } + }, + { + { 0x525d45f24729d9L, 0x5768aba8712327L, 0xa25e43b43035dbL, + 0x15a1ee8927ef21L, 0xa785d216056112L, 0x45e2fbfd508af9L, + 0xb6f721a37ba969L, 0x30d6d8c216d8d3L }, + { 0x3065e0852074c3L, 0xfa40b4a2a0684eL, 0x851325a763f955L, + 0xd4ef19c9f25900L, 0x799c869f665756L, 0x7b052223312990L, + 0xc986c2b28db802L, 0xf48fb8f28ade0aL } + }, + { + { 0x1e461731649b68L, 0xa96e5d65beb9dcL, 0x765ddff481935dL, + 0x6cf132c9f3bf2aL, 0x9f6c5c97c35658L, 0x99cd1394696e60L, + 0x99fa9249c0d5e4L, 0x1acd0638845a95L }, + { 0x0b065413636087L, 0xea20e78ea17b7fL, 0x20afc5f6161967L, + 0xfd6c8a2dc81028L, 0x4ef1357e32c8fdL, 0x8aa400400e4a88L, + 0xd6fcaef48cb82fL, 0x7ba7c6db3cd4faL } + }, + { + { 0xf843473d19c7abL, 0x968e76dc655c4dL, 0x52c87d9c4b9c2fL, + 0x65f641ae4aa082L, 0x491a39733c3603L, 0xa606ffe5810098L, + 0x09920e68bf8ad4L, 0x691a0c86db7882L }, + { 0x5205883a4d3ef5L, 0xee839b7acf2efeL, 0x4b78e2ac00ca66L, + 0xbe3f071f9fcb91L, 0x61e66c9bf6943aL, 0xe9b4e57061b79dL, + 0x8d1b01b56c06bdL, 0x0dfa315df76ae5L } + }, + { + { 0x803df65f1fd093L, 0x1cd6523489b77eL, 0x2cd2e15c20e295L, + 0xcd490be9b912d1L, 0xdd9a2ff2e886d2L, 0xa3c836dfe9d72aL, + 0xfcad5f2298e0c1L, 0xed126e24bcf067L }, + { 0x1e339533dc81bcL, 0xbea4d76ece6a08L, 0x1d15de3991b252L, + 0x74cc5cfe6daf97L, 0x5ad343f0826493L, 0x2d38a471064049L, + 0xf7f47b9ffcfa4dL, 0xef14490418066cL } + }, + { + { 0x4e7f86b9bb55abL, 0x310d7853f496a3L, 0xbd682fc0dec42cL, + 0xbde047a411d32aL, 0xea639b4c5a5ea2L, 0x5052078ba08fa1L, + 0xc968b2307729f2L, 0x567b5a623d3e28L }, + { 0x171e825977fbf7L, 0x0319c70be990aaL, 0x8f65023e12cd69L, + 0x1fb9b19f5015e6L, 0x0083f603568a7cL, 0xba3d30b1f3c5acL, + 0xe7b509d3d7a988L, 0x2318b99cd0f6b6L } + }, + { + { 0x54d3b8793ab2cfL, 0x366abead2d8306L, 0x66e8eb6d7a4977L, + 0xa61888cae0072eL, 0x9eeeef5dbc3315L, 0x93f09db163e7f5L, + 0xee9095959ade9aL, 0xaf7f578ce59be0L }, + { 0x24bfd8d5ece59eL, 0x8aa698b3689523L, 0xa9a65de2de92cfL, + 0xec11dbca6ad300L, 0x217f3fa09f88caL, 0xf6c33e3b4d6af7L, + 0xcd3bfa21d86d2dL, 0x1497f835f13f25L } + }, + { + { 0xa579568cd03d1dL, 0xd717cdae158af6L, 0x59eda97389a19fL, + 0xb32c370099e99cL, 0xa2dba91dabb591L, 0x6d697d577c2c97L, + 0x5423fc2d43fa6dL, 0x56ea8a50b382bfL }, + { 0x4a987bad80c11aL, 0xe4cde217d590a5L, 0x3dd8860f97e559L, + 0xff45e2543b593cL, 0x00eb4535343cb5L, 0x06b9b997bbfbddL, + 0x4da36b716aea24L, 0x247651757a624eL } + }, + { + { 0x32207d03474e0dL, 0x3ffbf04b41cc73L, 0x5c4dc45319eb39L, + 0xfee29be758b463L, 0xcc8a381c30c7a7L, 0x147f4e49fe0e53L, + 0x05b2e26e35a2deL, 0x4362f0292f3666L }, + { 0x0476d0c8474b85L, 0x9d8c65fccaf108L, 0xf58d4041d54b6aL, + 0x3ee6862f38e4b0L, 0x7c7c9d53b44f54L, 0x36a3fd80fb0db5L, + 0xfcd94ba18a8ac8L, 0xc1b1d568f35c05L } + }, + { + { 0x16539fc1bdd30dL, 0x1356e538df4afbL, 0xc0545d85a1aedbL, + 0xeb2037a489396bL, 0x897fcbd5660894L, 0x02a58a9b7d104aL, + 0x57fa24cc96b980L, 0xf6448e35bd8946L }, + { 0xee727418805c83L, 0x10fa274992cfc6L, 0x95141939e66b21L, + 0xe0ffa44bd08009L, 0x174332220da22bL, 0x4891ff359e6831L, + 0x407ed73a7d687bL, 0x2fb4e0751d99cfL } + }, +}; +#else + +/* Reduce scalar mod the order of the curve. + * Scalar Will be 114 bytes. + * + * b [in] Scalar to reduce. + */ +void sc448_reduce(uint8_t* b) +{ + uint32_t d[16]; + uint64_t t[33]; + uint64_t c; + uint32_t o; + + /* Load from bytes */ + t[ 0] = (((int32_t)((b[ 0] ) >> 0)) << 0) + | (((int32_t)((b[ 1] ) >> 0)) << 8) + | (((int32_t)((b[ 2] ) >> 0)) << 16) + | ((((int32_t)((b[ 3] & 0xf )) >> 0)) << 24); + t[ 1] = (((int32_t)((b[ 3] ) >> 4)) << 0) + | (((int32_t)((b[ 4] ) >> 0)) << 4) + | (((int32_t)((b[ 5] ) >> 0)) << 12) + | (((int32_t)((b[ 6] ) >> 0)) << 20); + t[ 2] = (((int32_t)((b[ 7] ) >> 0)) << 0) + | (((int32_t)((b[ 8] ) >> 0)) << 8) + | (((int32_t)((b[ 9] ) >> 0)) << 16) + | ((((int32_t)((b[10] & 0xf )) >> 0)) << 24); + t[ 3] = (((int32_t)((b[10] ) >> 4)) << 0) + | (((int32_t)((b[11] ) >> 0)) << 4) + | (((int32_t)((b[12] ) >> 0)) << 12) + | (((int32_t)((b[13] ) >> 0)) << 20); + t[ 4] = (((int32_t)((b[14] ) >> 0)) << 0) + | (((int32_t)((b[15] ) >> 0)) << 8) + | (((int32_t)((b[16] ) >> 0)) << 16) + | ((((int32_t)((b[17] & 0xf )) >> 0)) << 24); + t[ 5] = (((int32_t)((b[17] ) >> 4)) << 0) + | (((int32_t)((b[18] ) >> 0)) << 4) + | (((int32_t)((b[19] ) >> 0)) << 12) + | (((int32_t)((b[20] ) >> 0)) << 20); + t[ 6] = (((int32_t)((b[21] ) >> 0)) << 0) + | (((int32_t)((b[22] ) >> 0)) << 8) + | (((int32_t)((b[23] ) >> 0)) << 16) + | ((((int32_t)((b[24] & 0xf )) >> 0)) << 24); + t[ 7] = (((int32_t)((b[24] ) >> 4)) << 0) + | (((int32_t)((b[25] ) >> 0)) << 4) + | (((int32_t)((b[26] ) >> 0)) << 12) + | (((int32_t)((b[27] ) >> 0)) << 20); + t[ 8] = (((int32_t)((b[28] ) >> 0)) << 0) + | (((int32_t)((b[29] ) >> 0)) << 8) + | (((int32_t)((b[30] ) >> 0)) << 16) + | ((((int32_t)((b[31] & 0xf )) >> 0)) << 24); + t[ 9] = (((int32_t)((b[31] ) >> 4)) << 0) + | (((int32_t)((b[32] ) >> 0)) << 4) + | (((int32_t)((b[33] ) >> 0)) << 12) + | (((int32_t)((b[34] ) >> 0)) << 20); + t[10] = (((int32_t)((b[35] ) >> 0)) << 0) + | (((int32_t)((b[36] ) >> 0)) << 8) + | (((int32_t)((b[37] ) >> 0)) << 16) + | ((((int32_t)((b[38] & 0xf )) >> 0)) << 24); + t[11] = (((int32_t)((b[38] ) >> 4)) << 0) + | (((int32_t)((b[39] ) >> 0)) << 4) + | (((int32_t)((b[40] ) >> 0)) << 12) + | (((int32_t)((b[41] ) >> 0)) << 20); + t[12] = (((int32_t)((b[42] ) >> 0)) << 0) + | (((int32_t)((b[43] ) >> 0)) << 8) + | (((int32_t)((b[44] ) >> 0)) << 16) + | ((((int32_t)((b[45] & 0xf )) >> 0)) << 24); + t[13] = (((int32_t)((b[45] ) >> 4)) << 0) + | (((int32_t)((b[46] ) >> 0)) << 4) + | (((int32_t)((b[47] ) >> 0)) << 12) + | (((int32_t)((b[48] ) >> 0)) << 20); + t[14] = (((int32_t)((b[49] ) >> 0)) << 0) + | (((int32_t)((b[50] ) >> 0)) << 8) + | (((int32_t)((b[51] ) >> 0)) << 16) + | ((((int32_t)((b[52] & 0xf )) >> 0)) << 24); + t[15] = (((int32_t)((b[52] ) >> 4)) << 0) + | (((int32_t)((b[53] ) >> 0)) << 4) + | (((int32_t)((b[54] ) >> 0)) << 12) + | (((int32_t)((b[55] ) >> 0)) << 20); + t[16] = (((int32_t)((b[56] ) >> 0)) << 0) + | (((int32_t)((b[57] ) >> 0)) << 8) + | (((int32_t)((b[58] ) >> 0)) << 16) + | ((((int32_t)((b[59] & 0xf )) >> 0)) << 24); + t[17] = (((int32_t)((b[59] ) >> 4)) << 0) + | (((int32_t)((b[60] ) >> 0)) << 4) + | (((int32_t)((b[61] ) >> 0)) << 12) + | (((int32_t)((b[62] ) >> 0)) << 20); + t[18] = (((int32_t)((b[63] ) >> 0)) << 0) + | (((int32_t)((b[64] ) >> 0)) << 8) + | (((int32_t)((b[65] ) >> 0)) << 16) + | ((((int32_t)((b[66] & 0xf )) >> 0)) << 24); + t[19] = (((int32_t)((b[66] ) >> 4)) << 0) + | (((int32_t)((b[67] ) >> 0)) << 4) + | (((int32_t)((b[68] ) >> 0)) << 12) + | (((int32_t)((b[69] ) >> 0)) << 20); + t[20] = (((int32_t)((b[70] ) >> 0)) << 0) + | (((int32_t)((b[71] ) >> 0)) << 8) + | (((int32_t)((b[72] ) >> 0)) << 16) + | ((((int32_t)((b[73] & 0xf )) >> 0)) << 24); + t[21] = (((int32_t)((b[73] ) >> 4)) << 0) + | (((int32_t)((b[74] ) >> 0)) << 4) + | (((int32_t)((b[75] ) >> 0)) << 12) + | (((int32_t)((b[76] ) >> 0)) << 20); + t[22] = (((int32_t)((b[77] ) >> 0)) << 0) + | (((int32_t)((b[78] ) >> 0)) << 8) + | (((int32_t)((b[79] ) >> 0)) << 16) + | ((((int32_t)((b[80] & 0xf )) >> 0)) << 24); + t[23] = (((int32_t)((b[80] ) >> 4)) << 0) + | (((int32_t)((b[81] ) >> 0)) << 4) + | (((int32_t)((b[82] ) >> 0)) << 12) + | (((int32_t)((b[83] ) >> 0)) << 20); + t[24] = (((int32_t)((b[84] ) >> 0)) << 0) + | (((int32_t)((b[85] ) >> 0)) << 8) + | (((int32_t)((b[86] ) >> 0)) << 16) + | ((((int32_t)((b[87] & 0xf )) >> 0)) << 24); + t[25] = (((int32_t)((b[87] ) >> 4)) << 0) + | (((int32_t)((b[88] ) >> 0)) << 4) + | (((int32_t)((b[89] ) >> 0)) << 12) + | (((int32_t)((b[90] ) >> 0)) << 20); + t[26] = (((int32_t)((b[91] ) >> 0)) << 0) + | (((int32_t)((b[92] ) >> 0)) << 8) + | (((int32_t)((b[93] ) >> 0)) << 16) + | ((((int32_t)((b[94] & 0xf )) >> 0)) << 24); + t[27] = (((int32_t)((b[94] ) >> 4)) << 0) + | (((int32_t)((b[95] ) >> 0)) << 4) + | (((int32_t)((b[96] ) >> 0)) << 12) + | (((int32_t)((b[97] ) >> 0)) << 20); + t[28] = (((int32_t)((b[98] ) >> 0)) << 0) + | (((int32_t)((b[99] ) >> 0)) << 8) + | (((int32_t)((b[100] ) >> 0)) << 16) + | ((((int32_t)((b[101] & 0xf )) >> 0)) << 24); + t[29] = (((int32_t)((b[101] ) >> 4)) << 0) + | (((int32_t)((b[102] ) >> 0)) << 4) + | (((int32_t)((b[103] ) >> 0)) << 12) + | (((int32_t)((b[104] ) >> 0)) << 20); + t[30] = (((int32_t)((b[105] ) >> 0)) << 0) + | (((int32_t)((b[106] ) >> 0)) << 8) + | (((int32_t)((b[107] ) >> 0)) << 16) + | ((((int32_t)((b[108] & 0xf )) >> 0)) << 24); + t[31] = (((int32_t)((b[108] ) >> 4)) << 0) + | (((int32_t)((b[109] ) >> 0)) << 4) + | (((int32_t)((b[110] ) >> 0)) << 12) + | (((int32_t)((b[111] ) >> 0)) << 20); + t[32] = (((int32_t)((b[112] ) >> 0)) << 0) + | (((int32_t)((b[113] ) >> 0)) << 8); + + /* Mod curve order */ + /* 2^446 - 0x8335dc163bb124b65129c96fde933d8d723a70aadc873d6d54a7bb0d */ + /* Mod top half of extra words */ + t[ 8] += (int64_t)0x129eec34 * t[24]; + t[ 9] += (int64_t)0x21cf5b54 * t[24]; + t[10] += (int64_t)0x29c2ab70 * t[24]; + t[11] += (int64_t)0x0f635c8c * t[24]; + t[12] += (int64_t)0x25bf7a4c * t[24]; + t[13] += (int64_t)0x2d944a70 * t[24]; + t[14] += (int64_t)0x18eec490 * t[24]; + t[15] += (int64_t)0x20cd7704 * t[24]; + t[ 9] += (int64_t)0x129eec34 * t[25]; + t[10] += (int64_t)0x21cf5b54 * t[25]; + t[11] += (int64_t)0x29c2ab70 * t[25]; + t[12] += (int64_t)0x0f635c8c * t[25]; + t[13] += (int64_t)0x25bf7a4c * t[25]; + t[14] += (int64_t)0x2d944a70 * t[25]; + t[15] += (int64_t)0x18eec490 * t[25]; + t[16] += (int64_t)0x20cd7704 * t[25]; + t[10] += (int64_t)0x129eec34 * t[26]; + t[11] += (int64_t)0x21cf5b54 * t[26]; + t[12] += (int64_t)0x29c2ab70 * t[26]; + t[13] += (int64_t)0x0f635c8c * t[26]; + t[14] += (int64_t)0x25bf7a4c * t[26]; + t[15] += (int64_t)0x2d944a70 * t[26]; + t[16] += (int64_t)0x18eec490 * t[26]; + t[17] += (int64_t)0x20cd7704 * t[26]; + t[11] += (int64_t)0x129eec34 * t[27]; + t[12] += (int64_t)0x21cf5b54 * t[27]; + t[13] += (int64_t)0x29c2ab70 * t[27]; + t[14] += (int64_t)0x0f635c8c * t[27]; + t[15] += (int64_t)0x25bf7a4c * t[27]; + t[16] += (int64_t)0x2d944a70 * t[27]; + t[17] += (int64_t)0x18eec490 * t[27]; + t[18] += (int64_t)0x20cd7704 * t[27]; + t[12] += (int64_t)0x129eec34 * t[28]; + t[13] += (int64_t)0x21cf5b54 * t[28]; + t[14] += (int64_t)0x29c2ab70 * t[28]; + t[15] += (int64_t)0x0f635c8c * t[28]; + t[16] += (int64_t)0x25bf7a4c * t[28]; + t[17] += (int64_t)0x2d944a70 * t[28]; + t[18] += (int64_t)0x18eec490 * t[28]; + t[19] += (int64_t)0x20cd7704 * t[28]; + t[13] += (int64_t)0x129eec34 * t[29]; + t[14] += (int64_t)0x21cf5b54 * t[29]; + t[15] += (int64_t)0x29c2ab70 * t[29]; + t[16] += (int64_t)0x0f635c8c * t[29]; + t[17] += (int64_t)0x25bf7a4c * t[29]; + t[18] += (int64_t)0x2d944a70 * t[29]; + t[19] += (int64_t)0x18eec490 * t[29]; + t[20] += (int64_t)0x20cd7704 * t[29]; + t[14] += (int64_t)0x129eec34 * t[30]; + t[15] += (int64_t)0x21cf5b54 * t[30]; + t[16] += (int64_t)0x29c2ab70 * t[30]; + t[17] += (int64_t)0x0f635c8c * t[30]; + t[18] += (int64_t)0x25bf7a4c * t[30]; + t[19] += (int64_t)0x2d944a70 * t[30]; + t[20] += (int64_t)0x18eec490 * t[30]; + t[21] += (int64_t)0x20cd7704 * t[30]; + t[15] += (int64_t)0x129eec34 * t[31]; + t[16] += (int64_t)0x21cf5b54 * t[31]; + t[17] += (int64_t)0x29c2ab70 * t[31]; + t[18] += (int64_t)0x0f635c8c * t[31]; + t[19] += (int64_t)0x25bf7a4c * t[31]; + t[20] += (int64_t)0x2d944a70 * t[31]; + t[21] += (int64_t)0x18eec490 * t[31]; + t[22] += (int64_t)0x20cd7704 * t[31]; + t[16] += (int64_t)0x129eec34 * t[32]; + t[17] += (int64_t)0x21cf5b54 * t[32]; + t[18] += (int64_t)0x29c2ab70 * t[32]; + t[19] += (int64_t)0x0f635c8c * t[32]; + t[20] += (int64_t)0x25bf7a4c * t[32]; + t[21] += (int64_t)0x2d944a70 * t[32]; + t[22] += (int64_t)0x18eec490 * t[32]; + t[23] += (int64_t)0x20cd7704 * t[32]; + t[24] = 0; + /* Propagate carries */ + c = t[ 8] >> 28; t[ 9] += c; t[ 8] = t[ 8] & 0xfffffff; + c = t[ 9] >> 28; t[10] += c; t[ 9] = t[ 9] & 0xfffffff; + c = t[10] >> 28; t[11] += c; t[10] = t[10] & 0xfffffff; + c = t[11] >> 28; t[12] += c; t[11] = t[11] & 0xfffffff; + c = t[12] >> 28; t[13] += c; t[12] = t[12] & 0xfffffff; + c = t[13] >> 28; t[14] += c; t[13] = t[13] & 0xfffffff; + c = t[14] >> 28; t[15] += c; t[14] = t[14] & 0xfffffff; + c = t[15] >> 28; t[16] += c; t[15] = t[15] & 0xfffffff; + c = t[16] >> 28; t[17] += c; t[16] = t[16] & 0xfffffff; + c = t[17] >> 28; t[18] += c; t[17] = t[17] & 0xfffffff; + c = t[18] >> 28; t[19] += c; t[18] = t[18] & 0xfffffff; + c = t[19] >> 28; t[20] += c; t[19] = t[19] & 0xfffffff; + c = t[20] >> 28; t[21] += c; t[20] = t[20] & 0xfffffff; + c = t[21] >> 28; t[22] += c; t[21] = t[21] & 0xfffffff; + c = t[22] >> 28; t[23] += c; t[22] = t[22] & 0xfffffff; + c = t[23] >> 28; t[24] += c; t[23] = t[23] & 0xfffffff; + /* Mod bottom half of extra words */ + t[ 0] += (int64_t)0x129eec34 * t[16]; + t[ 1] += (int64_t)0x21cf5b54 * t[16]; + t[ 2] += (int64_t)0x29c2ab70 * t[16]; + t[ 3] += (int64_t)0x0f635c8c * t[16]; + t[ 4] += (int64_t)0x25bf7a4c * t[16]; + t[ 5] += (int64_t)0x2d944a70 * t[16]; + t[ 6] += (int64_t)0x18eec490 * t[16]; + t[ 7] += (int64_t)0x20cd7704 * t[16]; + t[ 1] += (int64_t)0x129eec34 * t[17]; + t[ 2] += (int64_t)0x21cf5b54 * t[17]; + t[ 3] += (int64_t)0x29c2ab70 * t[17]; + t[ 4] += (int64_t)0x0f635c8c * t[17]; + t[ 5] += (int64_t)0x25bf7a4c * t[17]; + t[ 6] += (int64_t)0x2d944a70 * t[17]; + t[ 7] += (int64_t)0x18eec490 * t[17]; + t[ 8] += (int64_t)0x20cd7704 * t[17]; + t[ 2] += (int64_t)0x129eec34 * t[18]; + t[ 3] += (int64_t)0x21cf5b54 * t[18]; + t[ 4] += (int64_t)0x29c2ab70 * t[18]; + t[ 5] += (int64_t)0x0f635c8c * t[18]; + t[ 6] += (int64_t)0x25bf7a4c * t[18]; + t[ 7] += (int64_t)0x2d944a70 * t[18]; + t[ 8] += (int64_t)0x18eec490 * t[18]; + t[ 9] += (int64_t)0x20cd7704 * t[18]; + t[ 3] += (int64_t)0x129eec34 * t[19]; + t[ 4] += (int64_t)0x21cf5b54 * t[19]; + t[ 5] += (int64_t)0x29c2ab70 * t[19]; + t[ 6] += (int64_t)0x0f635c8c * t[19]; + t[ 7] += (int64_t)0x25bf7a4c * t[19]; + t[ 8] += (int64_t)0x2d944a70 * t[19]; + t[ 9] += (int64_t)0x18eec490 * t[19]; + t[10] += (int64_t)0x20cd7704 * t[19]; + t[ 4] += (int64_t)0x129eec34 * t[20]; + t[ 5] += (int64_t)0x21cf5b54 * t[20]; + t[ 6] += (int64_t)0x29c2ab70 * t[20]; + t[ 7] += (int64_t)0x0f635c8c * t[20]; + t[ 8] += (int64_t)0x25bf7a4c * t[20]; + t[ 9] += (int64_t)0x2d944a70 * t[20]; + t[10] += (int64_t)0x18eec490 * t[20]; + t[11] += (int64_t)0x20cd7704 * t[20]; + t[ 5] += (int64_t)0x129eec34 * t[21]; + t[ 6] += (int64_t)0x21cf5b54 * t[21]; + t[ 7] += (int64_t)0x29c2ab70 * t[21]; + t[ 8] += (int64_t)0x0f635c8c * t[21]; + t[ 9] += (int64_t)0x25bf7a4c * t[21]; + t[10] += (int64_t)0x2d944a70 * t[21]; + t[11] += (int64_t)0x18eec490 * t[21]; + t[12] += (int64_t)0x20cd7704 * t[21]; + t[ 6] += (int64_t)0x129eec34 * t[22]; + t[ 7] += (int64_t)0x21cf5b54 * t[22]; + t[ 8] += (int64_t)0x29c2ab70 * t[22]; + t[ 9] += (int64_t)0x0f635c8c * t[22]; + t[10] += (int64_t)0x25bf7a4c * t[22]; + t[11] += (int64_t)0x2d944a70 * t[22]; + t[12] += (int64_t)0x18eec490 * t[22]; + t[13] += (int64_t)0x20cd7704 * t[22]; + t[ 7] += (int64_t)0x129eec34 * t[23]; + t[ 8] += (int64_t)0x21cf5b54 * t[23]; + t[ 9] += (int64_t)0x29c2ab70 * t[23]; + t[10] += (int64_t)0x0f635c8c * t[23]; + t[11] += (int64_t)0x25bf7a4c * t[23]; + t[12] += (int64_t)0x2d944a70 * t[23]; + t[13] += (int64_t)0x18eec490 * t[23]; + t[14] += (int64_t)0x20cd7704 * t[23]; + t[ 8] += (int64_t)0x129eec34 * t[24]; + t[ 9] += (int64_t)0x21cf5b54 * t[24]; + t[10] += (int64_t)0x29c2ab70 * t[24]; + t[11] += (int64_t)0x0f635c8c * t[24]; + t[12] += (int64_t)0x25bf7a4c * t[24]; + t[13] += (int64_t)0x2d944a70 * t[24]; + t[14] += (int64_t)0x18eec490 * t[24]; + t[15] += (int64_t)0x20cd7704 * t[24]; + t[16] = 0; + /* Propagate carries */ + c = t[ 0] >> 28; t[ 1] += c; t[ 0] = t[ 0] & 0xfffffff; + c = t[ 1] >> 28; t[ 2] += c; t[ 1] = t[ 1] & 0xfffffff; + c = t[ 2] >> 28; t[ 3] += c; t[ 2] = t[ 2] & 0xfffffff; + c = t[ 3] >> 28; t[ 4] += c; t[ 3] = t[ 3] & 0xfffffff; + c = t[ 4] >> 28; t[ 5] += c; t[ 4] = t[ 4] & 0xfffffff; + c = t[ 5] >> 28; t[ 6] += c; t[ 5] = t[ 5] & 0xfffffff; + c = t[ 6] >> 28; t[ 7] += c; t[ 6] = t[ 6] & 0xfffffff; + c = t[ 7] >> 28; t[ 8] += c; t[ 7] = t[ 7] & 0xfffffff; + c = t[ 8] >> 28; t[ 9] += c; t[ 8] = t[ 8] & 0xfffffff; + c = t[ 9] >> 28; t[10] += c; t[ 9] = t[ 9] & 0xfffffff; + c = t[10] >> 28; t[11] += c; t[10] = t[10] & 0xfffffff; + c = t[11] >> 28; t[12] += c; t[11] = t[11] & 0xfffffff; + c = t[12] >> 28; t[13] += c; t[12] = t[12] & 0xfffffff; + c = t[13] >> 28; t[14] += c; t[13] = t[13] & 0xfffffff; + c = t[14] >> 28; t[15] += c; t[14] = t[14] & 0xfffffff; + c = t[15] >> 28; t[16] += c; t[15] = t[15] & 0xfffffff; + t[ 0] += (int64_t)0x129eec34 * t[16]; + t[ 1] += (int64_t)0x21cf5b54 * t[16]; + t[ 2] += (int64_t)0x29c2ab70 * t[16]; + t[ 3] += (int64_t)0x0f635c8c * t[16]; + t[ 4] += (int64_t)0x25bf7a4c * t[16]; + t[ 5] += (int64_t)0x2d944a70 * t[16]; + t[ 6] += (int64_t)0x18eec490 * t[16]; + t[ 7] += (int64_t)0x20cd7704 * t[16]; + /* Propagate carries */ + c = t[ 0] >> 28; t[ 1] += c; d[ 0] = (int32_t)(t[ 0] & 0xfffffff); + c = t[ 1] >> 28; t[ 2] += c; d[ 1] = (int32_t)(t[ 1] & 0xfffffff); + c = t[ 2] >> 28; t[ 3] += c; d[ 2] = (int32_t)(t[ 2] & 0xfffffff); + c = t[ 3] >> 28; t[ 4] += c; d[ 3] = (int32_t)(t[ 3] & 0xfffffff); + c = t[ 4] >> 28; t[ 5] += c; d[ 4] = (int32_t)(t[ 4] & 0xfffffff); + c = t[ 5] >> 28; t[ 6] += c; d[ 5] = (int32_t)(t[ 5] & 0xfffffff); + c = t[ 6] >> 28; t[ 7] += c; d[ 6] = (int32_t)(t[ 6] & 0xfffffff); + c = t[ 7] >> 28; t[ 8] += c; d[ 7] = (int32_t)(t[ 7] & 0xfffffff); + c = t[ 8] >> 28; t[ 9] += c; d[ 8] = (int32_t)(t[ 8] & 0xfffffff); + c = t[ 9] >> 28; t[10] += c; d[ 9] = (int32_t)(t[ 9] & 0xfffffff); + c = t[10] >> 28; t[11] += c; d[10] = (int32_t)(t[10] & 0xfffffff); + c = t[11] >> 28; t[12] += c; d[11] = (int32_t)(t[11] & 0xfffffff); + c = t[12] >> 28; t[13] += c; d[12] = (int32_t)(t[12] & 0xfffffff); + c = t[13] >> 28; t[14] += c; d[13] = (int32_t)(t[13] & 0xfffffff); + c = t[14] >> 28; t[15] += c; d[14] = (int32_t)(t[14] & 0xfffffff); + d[15] = t[15]; + /* Mod bits over 28 in last word */ + o = d[15] >> 26; d[15] &= 0x3ffffff; + d[ 0] += 0x4a7bb0d * o; + d[ 1] += 0x873d6d5 * o; + d[ 2] += 0xa70aadc * o; + d[ 3] += 0x3d8d723 * o; + d[ 4] += 0x96fde93 * o; + d[ 5] += 0xb65129c * o; + d[ 6] += 0x63bb124 * o; + d[ 7] += 0x8335dc1 * o; + /* Propagate carries */ + o = d[ 0] >> 28; d[ 1] += o; d[ 0] = d[ 0] & 0xfffffff; + o = d[ 1] >> 28; d[ 2] += o; d[ 1] = d[ 1] & 0xfffffff; + o = d[ 2] >> 28; d[ 3] += o; d[ 2] = d[ 2] & 0xfffffff; + o = d[ 3] >> 28; d[ 4] += o; d[ 3] = d[ 3] & 0xfffffff; + o = d[ 4] >> 28; d[ 5] += o; d[ 4] = d[ 4] & 0xfffffff; + o = d[ 5] >> 28; d[ 6] += o; d[ 5] = d[ 5] & 0xfffffff; + o = d[ 6] >> 28; d[ 7] += o; d[ 6] = d[ 6] & 0xfffffff; + o = d[ 7] >> 28; d[ 8] += o; d[ 7] = d[ 7] & 0xfffffff; + o = d[ 8] >> 28; d[ 9] += o; d[ 8] = d[ 8] & 0xfffffff; + o = d[ 9] >> 28; d[10] += o; d[ 9] = d[ 9] & 0xfffffff; + o = d[10] >> 28; d[11] += o; d[10] = d[10] & 0xfffffff; + o = d[11] >> 28; d[12] += o; d[11] = d[11] & 0xfffffff; + o = d[12] >> 28; d[13] += o; d[12] = d[12] & 0xfffffff; + o = d[13] >> 28; d[14] += o; d[13] = d[13] & 0xfffffff; + o = d[14] >> 28; d[15] += o; d[14] = d[14] & 0xfffffff; + + /* Convert to bytes */ + b[ 0] = (d[0 ] >> 0); + b[ 1] = (d[0 ] >> 8); + b[ 2] = (d[0 ] >> 16); + b[ 3] = (d[0 ] >> 24) + ((d[1 ] >> 0) << 4); + b[ 4] = (d[1 ] >> 4); + b[ 5] = (d[1 ] >> 12); + b[ 6] = (d[1 ] >> 20); + b[ 7] = (d[2 ] >> 0); + b[ 8] = (d[2 ] >> 8); + b[ 9] = (d[2 ] >> 16); + b[10] = (d[2 ] >> 24) + ((d[3 ] >> 0) << 4); + b[11] = (d[3 ] >> 4); + b[12] = (d[3 ] >> 12); + b[13] = (d[3 ] >> 20); + b[14] = (d[4 ] >> 0); + b[15] = (d[4 ] >> 8); + b[16] = (d[4 ] >> 16); + b[17] = (d[4 ] >> 24) + ((d[5 ] >> 0) << 4); + b[18] = (d[5 ] >> 4); + b[19] = (d[5 ] >> 12); + b[20] = (d[5 ] >> 20); + b[21] = (d[6 ] >> 0); + b[22] = (d[6 ] >> 8); + b[23] = (d[6 ] >> 16); + b[24] = (d[6 ] >> 24) + ((d[7 ] >> 0) << 4); + b[25] = (d[7 ] >> 4); + b[26] = (d[7 ] >> 12); + b[27] = (d[7 ] >> 20); + b[28] = (d[8 ] >> 0); + b[29] = (d[8 ] >> 8); + b[30] = (d[8 ] >> 16); + b[31] = (d[8 ] >> 24) + ((d[9 ] >> 0) << 4); + b[32] = (d[9 ] >> 4); + b[33] = (d[9 ] >> 12); + b[34] = (d[9 ] >> 20); + b[35] = (d[10] >> 0); + b[36] = (d[10] >> 8); + b[37] = (d[10] >> 16); + b[38] = (d[10] >> 24) + ((d[11] >> 0) << 4); + b[39] = (d[11] >> 4); + b[40] = (d[11] >> 12); + b[41] = (d[11] >> 20); + b[42] = (d[12] >> 0); + b[43] = (d[12] >> 8); + b[44] = (d[12] >> 16); + b[45] = (d[12] >> 24) + ((d[13] >> 0) << 4); + b[46] = (d[13] >> 4); + b[47] = (d[13] >> 12); + b[48] = (d[13] >> 20); + b[49] = (d[14] >> 0); + b[50] = (d[14] >> 8); + b[51] = (d[14] >> 16); + b[52] = (d[14] >> 24) + ((d[15] >> 0) << 4); + b[53] = (d[15] >> 4); + b[54] = (d[15] >> 12); + b[55] = (d[15] >> 20); + b[56] = 0; +} + +/* Multiply a by b and add d. r = (a * b + d) mod order + * + * r [in] Scalar to hold result. + * a [in] Scalar to multiply. + * b [in] Scalar to multiply. + * d [in] Scalar to add to multiplicative result. + */ +void sc448_muladd(uint8_t* r, const uint8_t* a, const uint8_t* b, + const uint8_t* d) +{ + uint32_t ad[16], bd[16], dd[16], rd[16]; + uint64_t t[32]; + uint64_t c; + uint32_t o; + + /* Load from bytes */ + ad[ 0] = (((int32_t)((a[ 0] ) >> 0)) << 0) + | (((int32_t)((a[ 1] ) >> 0)) << 8) + | (((int32_t)((a[ 2] ) >> 0)) << 16) + | ((((int32_t)((a[ 3] & 0xf )) >> 0)) << 24); + ad[ 1] = (((int32_t)((a[ 3] ) >> 4)) << 0) + | (((int32_t)((a[ 4] ) >> 0)) << 4) + | (((int32_t)((a[ 5] ) >> 0)) << 12) + | (((int32_t)((a[ 6] ) >> 0)) << 20); + ad[ 2] = (((int32_t)((a[ 7] ) >> 0)) << 0) + | (((int32_t)((a[ 8] ) >> 0)) << 8) + | (((int32_t)((a[ 9] ) >> 0)) << 16) + | ((((int32_t)((a[10] & 0xf )) >> 0)) << 24); + ad[ 3] = (((int32_t)((a[10] ) >> 4)) << 0) + | (((int32_t)((a[11] ) >> 0)) << 4) + | (((int32_t)((a[12] ) >> 0)) << 12) + | (((int32_t)((a[13] ) >> 0)) << 20); + ad[ 4] = (((int32_t)((a[14] ) >> 0)) << 0) + | (((int32_t)((a[15] ) >> 0)) << 8) + | (((int32_t)((a[16] ) >> 0)) << 16) + | ((((int32_t)((a[17] & 0xf )) >> 0)) << 24); + ad[ 5] = (((int32_t)((a[17] ) >> 4)) << 0) + | (((int32_t)((a[18] ) >> 0)) << 4) + | (((int32_t)((a[19] ) >> 0)) << 12) + | (((int32_t)((a[20] ) >> 0)) << 20); + ad[ 6] = (((int32_t)((a[21] ) >> 0)) << 0) + | (((int32_t)((a[22] ) >> 0)) << 8) + | (((int32_t)((a[23] ) >> 0)) << 16) + | ((((int32_t)((a[24] & 0xf )) >> 0)) << 24); + ad[ 7] = (((int32_t)((a[24] ) >> 4)) << 0) + | (((int32_t)((a[25] ) >> 0)) << 4) + | (((int32_t)((a[26] ) >> 0)) << 12) + | (((int32_t)((a[27] ) >> 0)) << 20); + ad[ 8] = (((int32_t)((a[28] ) >> 0)) << 0) + | (((int32_t)((a[29] ) >> 0)) << 8) + | (((int32_t)((a[30] ) >> 0)) << 16) + | ((((int32_t)((a[31] & 0xf )) >> 0)) << 24); + ad[ 9] = (((int32_t)((a[31] ) >> 4)) << 0) + | (((int32_t)((a[32] ) >> 0)) << 4) + | (((int32_t)((a[33] ) >> 0)) << 12) + | (((int32_t)((a[34] ) >> 0)) << 20); + ad[10] = (((int32_t)((a[35] ) >> 0)) << 0) + | (((int32_t)((a[36] ) >> 0)) << 8) + | (((int32_t)((a[37] ) >> 0)) << 16) + | ((((int32_t)((a[38] & 0xf )) >> 0)) << 24); + ad[11] = (((int32_t)((a[38] ) >> 4)) << 0) + | (((int32_t)((a[39] ) >> 0)) << 4) + | (((int32_t)((a[40] ) >> 0)) << 12) + | (((int32_t)((a[41] ) >> 0)) << 20); + ad[12] = (((int32_t)((a[42] ) >> 0)) << 0) + | (((int32_t)((a[43] ) >> 0)) << 8) + | (((int32_t)((a[44] ) >> 0)) << 16) + | ((((int32_t)((a[45] & 0xf )) >> 0)) << 24); + ad[13] = (((int32_t)((a[45] ) >> 4)) << 0) + | (((int32_t)((a[46] ) >> 0)) << 4) + | (((int32_t)((a[47] ) >> 0)) << 12) + | (((int32_t)((a[48] ) >> 0)) << 20); + ad[14] = (((int32_t)((a[49] ) >> 0)) << 0) + | (((int32_t)((a[50] ) >> 0)) << 8) + | (((int32_t)((a[51] ) >> 0)) << 16) + | ((((int32_t)((a[52] & 0xf )) >> 0)) << 24); + ad[15] = (((int32_t)((a[52] ) >> 4)) << 0) + | (((int32_t)((a[53] ) >> 0)) << 4) + | (((int32_t)((a[54] ) >> 0)) << 12) + | (((int32_t)((a[55] ) >> 0)) << 20); + /* Load from bytes */ + bd[ 0] = (((int32_t)((b[ 0] ) >> 0)) << 0) + | (((int32_t)((b[ 1] ) >> 0)) << 8) + | (((int32_t)((b[ 2] ) >> 0)) << 16) + | ((((int32_t)((b[ 3] & 0xf )) >> 0)) << 24); + bd[ 1] = (((int32_t)((b[ 3] ) >> 4)) << 0) + | (((int32_t)((b[ 4] ) >> 0)) << 4) + | (((int32_t)((b[ 5] ) >> 0)) << 12) + | (((int32_t)((b[ 6] ) >> 0)) << 20); + bd[ 2] = (((int32_t)((b[ 7] ) >> 0)) << 0) + | (((int32_t)((b[ 8] ) >> 0)) << 8) + | (((int32_t)((b[ 9] ) >> 0)) << 16) + | ((((int32_t)((b[10] & 0xf )) >> 0)) << 24); + bd[ 3] = (((int32_t)((b[10] ) >> 4)) << 0) + | (((int32_t)((b[11] ) >> 0)) << 4) + | (((int32_t)((b[12] ) >> 0)) << 12) + | (((int32_t)((b[13] ) >> 0)) << 20); + bd[ 4] = (((int32_t)((b[14] ) >> 0)) << 0) + | (((int32_t)((b[15] ) >> 0)) << 8) + | (((int32_t)((b[16] ) >> 0)) << 16) + | ((((int32_t)((b[17] & 0xf )) >> 0)) << 24); + bd[ 5] = (((int32_t)((b[17] ) >> 4)) << 0) + | (((int32_t)((b[18] ) >> 0)) << 4) + | (((int32_t)((b[19] ) >> 0)) << 12) + | (((int32_t)((b[20] ) >> 0)) << 20); + bd[ 6] = (((int32_t)((b[21] ) >> 0)) << 0) + | (((int32_t)((b[22] ) >> 0)) << 8) + | (((int32_t)((b[23] ) >> 0)) << 16) + | ((((int32_t)((b[24] & 0xf )) >> 0)) << 24); + bd[ 7] = (((int32_t)((b[24] ) >> 4)) << 0) + | (((int32_t)((b[25] ) >> 0)) << 4) + | (((int32_t)((b[26] ) >> 0)) << 12) + | (((int32_t)((b[27] ) >> 0)) << 20); + bd[ 8] = (((int32_t)((b[28] ) >> 0)) << 0) + | (((int32_t)((b[29] ) >> 0)) << 8) + | (((int32_t)((b[30] ) >> 0)) << 16) + | ((((int32_t)((b[31] & 0xf )) >> 0)) << 24); + bd[ 9] = (((int32_t)((b[31] ) >> 4)) << 0) + | (((int32_t)((b[32] ) >> 0)) << 4) + | (((int32_t)((b[33] ) >> 0)) << 12) + | (((int32_t)((b[34] ) >> 0)) << 20); + bd[10] = (((int32_t)((b[35] ) >> 0)) << 0) + | (((int32_t)((b[36] ) >> 0)) << 8) + | (((int32_t)((b[37] ) >> 0)) << 16) + | ((((int32_t)((b[38] & 0xf )) >> 0)) << 24); + bd[11] = (((int32_t)((b[38] ) >> 4)) << 0) + | (((int32_t)((b[39] ) >> 0)) << 4) + | (((int32_t)((b[40] ) >> 0)) << 12) + | (((int32_t)((b[41] ) >> 0)) << 20); + bd[12] = (((int32_t)((b[42] ) >> 0)) << 0) + | (((int32_t)((b[43] ) >> 0)) << 8) + | (((int32_t)((b[44] ) >> 0)) << 16) + | ((((int32_t)((b[45] & 0xf )) >> 0)) << 24); + bd[13] = (((int32_t)((b[45] ) >> 4)) << 0) + | (((int32_t)((b[46] ) >> 0)) << 4) + | (((int32_t)((b[47] ) >> 0)) << 12) + | (((int32_t)((b[48] ) >> 0)) << 20); + bd[14] = (((int32_t)((b[49] ) >> 0)) << 0) + | (((int32_t)((b[50] ) >> 0)) << 8) + | (((int32_t)((b[51] ) >> 0)) << 16) + | ((((int32_t)((b[52] & 0xf )) >> 0)) << 24); + bd[15] = (((int32_t)((b[52] ) >> 4)) << 0) + | (((int32_t)((b[53] ) >> 0)) << 4) + | (((int32_t)((b[54] ) >> 0)) << 12) + | (((int32_t)((b[55] ) >> 0)) << 20); + /* Load from bytes */ + dd[ 0] = (((int32_t)((d[ 0] ) >> 0)) << 0) + | (((int32_t)((d[ 1] ) >> 0)) << 8) + | (((int32_t)((d[ 2] ) >> 0)) << 16) + | ((((int32_t)((d[ 3] & 0xf )) >> 0)) << 24); + dd[ 1] = (((int32_t)((d[ 3] ) >> 4)) << 0) + | (((int32_t)((d[ 4] ) >> 0)) << 4) + | (((int32_t)((d[ 5] ) >> 0)) << 12) + | (((int32_t)((d[ 6] ) >> 0)) << 20); + dd[ 2] = (((int32_t)((d[ 7] ) >> 0)) << 0) + | (((int32_t)((d[ 8] ) >> 0)) << 8) + | (((int32_t)((d[ 9] ) >> 0)) << 16) + | ((((int32_t)((d[10] & 0xf )) >> 0)) << 24); + dd[ 3] = (((int32_t)((d[10] ) >> 4)) << 0) + | (((int32_t)((d[11] ) >> 0)) << 4) + | (((int32_t)((d[12] ) >> 0)) << 12) + | (((int32_t)((d[13] ) >> 0)) << 20); + dd[ 4] = (((int32_t)((d[14] ) >> 0)) << 0) + | (((int32_t)((d[15] ) >> 0)) << 8) + | (((int32_t)((d[16] ) >> 0)) << 16) + | ((((int32_t)((d[17] & 0xf )) >> 0)) << 24); + dd[ 5] = (((int32_t)((d[17] ) >> 4)) << 0) + | (((int32_t)((d[18] ) >> 0)) << 4) + | (((int32_t)((d[19] ) >> 0)) << 12) + | (((int32_t)((d[20] ) >> 0)) << 20); + dd[ 6] = (((int32_t)((d[21] ) >> 0)) << 0) + | (((int32_t)((d[22] ) >> 0)) << 8) + | (((int32_t)((d[23] ) >> 0)) << 16) + | ((((int32_t)((d[24] & 0xf )) >> 0)) << 24); + dd[ 7] = (((int32_t)((d[24] ) >> 4)) << 0) + | (((int32_t)((d[25] ) >> 0)) << 4) + | (((int32_t)((d[26] ) >> 0)) << 12) + | (((int32_t)((d[27] ) >> 0)) << 20); + dd[ 8] = (((int32_t)((d[28] ) >> 0)) << 0) + | (((int32_t)((d[29] ) >> 0)) << 8) + | (((int32_t)((d[30] ) >> 0)) << 16) + | ((((int32_t)((d[31] & 0xf )) >> 0)) << 24); + dd[ 9] = (((int32_t)((d[31] ) >> 4)) << 0) + | (((int32_t)((d[32] ) >> 0)) << 4) + | (((int32_t)((d[33] ) >> 0)) << 12) + | (((int32_t)((d[34] ) >> 0)) << 20); + dd[10] = (((int32_t)((d[35] ) >> 0)) << 0) + | (((int32_t)((d[36] ) >> 0)) << 8) + | (((int32_t)((d[37] ) >> 0)) << 16) + | ((((int32_t)((d[38] & 0xf )) >> 0)) << 24); + dd[11] = (((int32_t)((d[38] ) >> 4)) << 0) + | (((int32_t)((d[39] ) >> 0)) << 4) + | (((int32_t)((d[40] ) >> 0)) << 12) + | (((int32_t)((d[41] ) >> 0)) << 20); + dd[12] = (((int32_t)((d[42] ) >> 0)) << 0) + | (((int32_t)((d[43] ) >> 0)) << 8) + | (((int32_t)((d[44] ) >> 0)) << 16) + | ((((int32_t)((d[45] & 0xf )) >> 0)) << 24); + dd[13] = (((int32_t)((d[45] ) >> 4)) << 0) + | (((int32_t)((d[46] ) >> 0)) << 4) + | (((int32_t)((d[47] ) >> 0)) << 12) + | (((int32_t)((d[48] ) >> 0)) << 20); + dd[14] = (((int32_t)((d[49] ) >> 0)) << 0) + | (((int32_t)((d[50] ) >> 0)) << 8) + | (((int32_t)((d[51] ) >> 0)) << 16) + | ((((int32_t)((d[52] & 0xf )) >> 0)) << 24); + dd[15] = (((int32_t)((d[52] ) >> 4)) << 0) + | (((int32_t)((d[53] ) >> 0)) << 4) + | (((int32_t)((d[54] ) >> 0)) << 12) + | (((int32_t)((d[55] ) >> 0)) << 20); + + /* a * b + d */ + t[ 0] = dd[ 0] + (int64_t)ad[ 0] * bd[ 0]; + t[ 1] = dd[ 1] + (int64_t)ad[ 0] * bd[ 1] + + (int64_t)ad[ 1] * bd[ 0]; + t[ 2] = dd[ 2] + (int64_t)ad[ 0] * bd[ 2] + + (int64_t)ad[ 1] * bd[ 1] + + (int64_t)ad[ 2] * bd[ 0]; + t[ 3] = dd[ 3] + (int64_t)ad[ 0] * bd[ 3] + + (int64_t)ad[ 1] * bd[ 2] + + (int64_t)ad[ 2] * bd[ 1] + + (int64_t)ad[ 3] * bd[ 0]; + t[ 4] = dd[ 4] + (int64_t)ad[ 0] * bd[ 4] + + (int64_t)ad[ 1] * bd[ 3] + + (int64_t)ad[ 2] * bd[ 2] + + (int64_t)ad[ 3] * bd[ 1] + + (int64_t)ad[ 4] * bd[ 0]; + t[ 5] = dd[ 5] + (int64_t)ad[ 0] * bd[ 5] + + (int64_t)ad[ 1] * bd[ 4] + + (int64_t)ad[ 2] * bd[ 3] + + (int64_t)ad[ 3] * bd[ 2] + + (int64_t)ad[ 4] * bd[ 1] + + (int64_t)ad[ 5] * bd[ 0]; + t[ 6] = dd[ 6] + (int64_t)ad[ 0] * bd[ 6] + + (int64_t)ad[ 1] * bd[ 5] + + (int64_t)ad[ 2] * bd[ 4] + + (int64_t)ad[ 3] * bd[ 3] + + (int64_t)ad[ 4] * bd[ 2] + + (int64_t)ad[ 5] * bd[ 1] + + (int64_t)ad[ 6] * bd[ 0]; + t[ 7] = dd[ 7] + (int64_t)ad[ 0] * bd[ 7] + + (int64_t)ad[ 1] * bd[ 6] + + (int64_t)ad[ 2] * bd[ 5] + + (int64_t)ad[ 3] * bd[ 4] + + (int64_t)ad[ 4] * bd[ 3] + + (int64_t)ad[ 5] * bd[ 2] + + (int64_t)ad[ 6] * bd[ 1] + + (int64_t)ad[ 7] * bd[ 0]; + t[ 8] = dd[ 8] + (int64_t)ad[ 0] * bd[ 8] + + (int64_t)ad[ 1] * bd[ 7] + + (int64_t)ad[ 2] * bd[ 6] + + (int64_t)ad[ 3] * bd[ 5] + + (int64_t)ad[ 4] * bd[ 4] + + (int64_t)ad[ 5] * bd[ 3] + + (int64_t)ad[ 6] * bd[ 2] + + (int64_t)ad[ 7] * bd[ 1] + + (int64_t)ad[ 8] * bd[ 0]; + t[ 9] = dd[ 9] + (int64_t)ad[ 0] * bd[ 9] + + (int64_t)ad[ 1] * bd[ 8] + + (int64_t)ad[ 2] * bd[ 7] + + (int64_t)ad[ 3] * bd[ 6] + + (int64_t)ad[ 4] * bd[ 5] + + (int64_t)ad[ 5] * bd[ 4] + + (int64_t)ad[ 6] * bd[ 3] + + (int64_t)ad[ 7] * bd[ 2] + + (int64_t)ad[ 8] * bd[ 1] + + (int64_t)ad[ 9] * bd[ 0]; + t[10] = dd[10] + (int64_t)ad[ 0] * bd[10] + + (int64_t)ad[ 1] * bd[ 9] + + (int64_t)ad[ 2] * bd[ 8] + + (int64_t)ad[ 3] * bd[ 7] + + (int64_t)ad[ 4] * bd[ 6] + + (int64_t)ad[ 5] * bd[ 5] + + (int64_t)ad[ 6] * bd[ 4] + + (int64_t)ad[ 7] * bd[ 3] + + (int64_t)ad[ 8] * bd[ 2] + + (int64_t)ad[ 9] * bd[ 1] + + (int64_t)ad[10] * bd[ 0]; + t[11] = dd[11] + (int64_t)ad[ 0] * bd[11] + + (int64_t)ad[ 1] * bd[10] + + (int64_t)ad[ 2] * bd[ 9] + + (int64_t)ad[ 3] * bd[ 8] + + (int64_t)ad[ 4] * bd[ 7] + + (int64_t)ad[ 5] * bd[ 6] + + (int64_t)ad[ 6] * bd[ 5] + + (int64_t)ad[ 7] * bd[ 4] + + (int64_t)ad[ 8] * bd[ 3] + + (int64_t)ad[ 9] * bd[ 2] + + (int64_t)ad[10] * bd[ 1] + + (int64_t)ad[11] * bd[ 0]; + t[12] = dd[12] + (int64_t)ad[ 0] * bd[12] + + (int64_t)ad[ 1] * bd[11] + + (int64_t)ad[ 2] * bd[10] + + (int64_t)ad[ 3] * bd[ 9] + + (int64_t)ad[ 4] * bd[ 8] + + (int64_t)ad[ 5] * bd[ 7] + + (int64_t)ad[ 6] * bd[ 6] + + (int64_t)ad[ 7] * bd[ 5] + + (int64_t)ad[ 8] * bd[ 4] + + (int64_t)ad[ 9] * bd[ 3] + + (int64_t)ad[10] * bd[ 2] + + (int64_t)ad[11] * bd[ 1] + + (int64_t)ad[12] * bd[ 0]; + t[13] = dd[13] + (int64_t)ad[ 0] * bd[13] + + (int64_t)ad[ 1] * bd[12] + + (int64_t)ad[ 2] * bd[11] + + (int64_t)ad[ 3] * bd[10] + + (int64_t)ad[ 4] * bd[ 9] + + (int64_t)ad[ 5] * bd[ 8] + + (int64_t)ad[ 6] * bd[ 7] + + (int64_t)ad[ 7] * bd[ 6] + + (int64_t)ad[ 8] * bd[ 5] + + (int64_t)ad[ 9] * bd[ 4] + + (int64_t)ad[10] * bd[ 3] + + (int64_t)ad[11] * bd[ 2] + + (int64_t)ad[12] * bd[ 1] + + (int64_t)ad[13] * bd[ 0]; + t[14] = dd[14] + (int64_t)ad[ 0] * bd[14] + + (int64_t)ad[ 1] * bd[13] + + (int64_t)ad[ 2] * bd[12] + + (int64_t)ad[ 3] * bd[11] + + (int64_t)ad[ 4] * bd[10] + + (int64_t)ad[ 5] * bd[ 9] + + (int64_t)ad[ 6] * bd[ 8] + + (int64_t)ad[ 7] * bd[ 7] + + (int64_t)ad[ 8] * bd[ 6] + + (int64_t)ad[ 9] * bd[ 5] + + (int64_t)ad[10] * bd[ 4] + + (int64_t)ad[11] * bd[ 3] + + (int64_t)ad[12] * bd[ 2] + + (int64_t)ad[13] * bd[ 1] + + (int64_t)ad[14] * bd[ 0]; + t[15] = dd[15] + (int64_t)ad[ 0] * bd[15] + + (int64_t)ad[ 1] * bd[14] + + (int64_t)ad[ 2] * bd[13] + + (int64_t)ad[ 3] * bd[12] + + (int64_t)ad[ 4] * bd[11] + + (int64_t)ad[ 5] * bd[10] + + (int64_t)ad[ 6] * bd[ 9] + + (int64_t)ad[ 7] * bd[ 8] + + (int64_t)ad[ 8] * bd[ 7] + + (int64_t)ad[ 9] * bd[ 6] + + (int64_t)ad[10] * bd[ 5] + + (int64_t)ad[11] * bd[ 4] + + (int64_t)ad[12] * bd[ 3] + + (int64_t)ad[13] * bd[ 2] + + (int64_t)ad[14] * bd[ 1] + + (int64_t)ad[15] * bd[ 0]; + t[16] = (int64_t)ad[ 1] * bd[15] + + (int64_t)ad[ 2] * bd[14] + + (int64_t)ad[ 3] * bd[13] + + (int64_t)ad[ 4] * bd[12] + + (int64_t)ad[ 5] * bd[11] + + (int64_t)ad[ 6] * bd[10] + + (int64_t)ad[ 7] * bd[ 9] + + (int64_t)ad[ 8] * bd[ 8] + + (int64_t)ad[ 9] * bd[ 7] + + (int64_t)ad[10] * bd[ 6] + + (int64_t)ad[11] * bd[ 5] + + (int64_t)ad[12] * bd[ 4] + + (int64_t)ad[13] * bd[ 3] + + (int64_t)ad[14] * bd[ 2] + + (int64_t)ad[15] * bd[ 1]; + t[17] = (int64_t)ad[ 2] * bd[15] + + (int64_t)ad[ 3] * bd[14] + + (int64_t)ad[ 4] * bd[13] + + (int64_t)ad[ 5] * bd[12] + + (int64_t)ad[ 6] * bd[11] + + (int64_t)ad[ 7] * bd[10] + + (int64_t)ad[ 8] * bd[ 9] + + (int64_t)ad[ 9] * bd[ 8] + + (int64_t)ad[10] * bd[ 7] + + (int64_t)ad[11] * bd[ 6] + + (int64_t)ad[12] * bd[ 5] + + (int64_t)ad[13] * bd[ 4] + + (int64_t)ad[14] * bd[ 3] + + (int64_t)ad[15] * bd[ 2]; + t[18] = (int64_t)ad[ 3] * bd[15] + + (int64_t)ad[ 4] * bd[14] + + (int64_t)ad[ 5] * bd[13] + + (int64_t)ad[ 6] * bd[12] + + (int64_t)ad[ 7] * bd[11] + + (int64_t)ad[ 8] * bd[10] + + (int64_t)ad[ 9] * bd[ 9] + + (int64_t)ad[10] * bd[ 8] + + (int64_t)ad[11] * bd[ 7] + + (int64_t)ad[12] * bd[ 6] + + (int64_t)ad[13] * bd[ 5] + + (int64_t)ad[14] * bd[ 4] + + (int64_t)ad[15] * bd[ 3]; + t[19] = (int64_t)ad[ 4] * bd[15] + + (int64_t)ad[ 5] * bd[14] + + (int64_t)ad[ 6] * bd[13] + + (int64_t)ad[ 7] * bd[12] + + (int64_t)ad[ 8] * bd[11] + + (int64_t)ad[ 9] * bd[10] + + (int64_t)ad[10] * bd[ 9] + + (int64_t)ad[11] * bd[ 8] + + (int64_t)ad[12] * bd[ 7] + + (int64_t)ad[13] * bd[ 6] + + (int64_t)ad[14] * bd[ 5] + + (int64_t)ad[15] * bd[ 4]; + t[20] = (int64_t)ad[ 5] * bd[15] + + (int64_t)ad[ 6] * bd[14] + + (int64_t)ad[ 7] * bd[13] + + (int64_t)ad[ 8] * bd[12] + + (int64_t)ad[ 9] * bd[11] + + (int64_t)ad[10] * bd[10] + + (int64_t)ad[11] * bd[ 9] + + (int64_t)ad[12] * bd[ 8] + + (int64_t)ad[13] * bd[ 7] + + (int64_t)ad[14] * bd[ 6] + + (int64_t)ad[15] * bd[ 5]; + t[21] = (int64_t)ad[ 6] * bd[15] + + (int64_t)ad[ 7] * bd[14] + + (int64_t)ad[ 8] * bd[13] + + (int64_t)ad[ 9] * bd[12] + + (int64_t)ad[10] * bd[11] + + (int64_t)ad[11] * bd[10] + + (int64_t)ad[12] * bd[ 9] + + (int64_t)ad[13] * bd[ 8] + + (int64_t)ad[14] * bd[ 7] + + (int64_t)ad[15] * bd[ 6]; + t[22] = (int64_t)ad[ 7] * bd[15] + + (int64_t)ad[ 8] * bd[14] + + (int64_t)ad[ 9] * bd[13] + + (int64_t)ad[10] * bd[12] + + (int64_t)ad[11] * bd[11] + + (int64_t)ad[12] * bd[10] + + (int64_t)ad[13] * bd[ 9] + + (int64_t)ad[14] * bd[ 8] + + (int64_t)ad[15] * bd[ 7]; + t[23] = (int64_t)ad[ 8] * bd[15] + + (int64_t)ad[ 9] * bd[14] + + (int64_t)ad[10] * bd[13] + + (int64_t)ad[11] * bd[12] + + (int64_t)ad[12] * bd[11] + + (int64_t)ad[13] * bd[10] + + (int64_t)ad[14] * bd[ 9] + + (int64_t)ad[15] * bd[ 8]; + t[24] = (int64_t)ad[ 9] * bd[15] + + (int64_t)ad[10] * bd[14] + + (int64_t)ad[11] * bd[13] + + (int64_t)ad[12] * bd[12] + + (int64_t)ad[13] * bd[11] + + (int64_t)ad[14] * bd[10] + + (int64_t)ad[15] * bd[ 9]; + t[25] = (int64_t)ad[10] * bd[15] + + (int64_t)ad[11] * bd[14] + + (int64_t)ad[12] * bd[13] + + (int64_t)ad[13] * bd[12] + + (int64_t)ad[14] * bd[11] + + (int64_t)ad[15] * bd[10]; + t[26] = (int64_t)ad[11] * bd[15] + + (int64_t)ad[12] * bd[14] + + (int64_t)ad[13] * bd[13] + + (int64_t)ad[14] * bd[12] + + (int64_t)ad[15] * bd[11]; + t[27] = (int64_t)ad[12] * bd[15] + + (int64_t)ad[13] * bd[14] + + (int64_t)ad[14] * bd[13] + + (int64_t)ad[15] * bd[12]; + t[28] = (int64_t)ad[13] * bd[15] + + (int64_t)ad[14] * bd[14] + + (int64_t)ad[15] * bd[13]; + t[29] = (int64_t)ad[14] * bd[15] + + (int64_t)ad[15] * bd[14]; + t[30] = (int64_t)ad[15] * bd[15]; + t[31] = 0; + + /* Mod curve order */ + /* 2^446 - 0x8335dc163bb124b65129c96fde933d8d723a70aadc873d6d54a7bb0d */ + /* Propagate carries */ + c = t[ 0] >> 28; t[ 1] += c; t[ 0] = t[ 0] & 0xfffffff; + c = t[ 1] >> 28; t[ 2] += c; t[ 1] = t[ 1] & 0xfffffff; + c = t[ 2] >> 28; t[ 3] += c; t[ 2] = t[ 2] & 0xfffffff; + c = t[ 3] >> 28; t[ 4] += c; t[ 3] = t[ 3] & 0xfffffff; + c = t[ 4] >> 28; t[ 5] += c; t[ 4] = t[ 4] & 0xfffffff; + c = t[ 5] >> 28; t[ 6] += c; t[ 5] = t[ 5] & 0xfffffff; + c = t[ 6] >> 28; t[ 7] += c; t[ 6] = t[ 6] & 0xfffffff; + c = t[ 7] >> 28; t[ 8] += c; t[ 7] = t[ 7] & 0xfffffff; + c = t[ 8] >> 28; t[ 9] += c; t[ 8] = t[ 8] & 0xfffffff; + c = t[ 9] >> 28; t[10] += c; t[ 9] = t[ 9] & 0xfffffff; + c = t[10] >> 28; t[11] += c; t[10] = t[10] & 0xfffffff; + c = t[11] >> 28; t[12] += c; t[11] = t[11] & 0xfffffff; + c = t[12] >> 28; t[13] += c; t[12] = t[12] & 0xfffffff; + c = t[13] >> 28; t[14] += c; t[13] = t[13] & 0xfffffff; + c = t[14] >> 28; t[15] += c; t[14] = t[14] & 0xfffffff; + c = t[15] >> 28; t[16] += c; t[15] = t[15] & 0xfffffff; + c = t[16] >> 28; t[17] += c; t[16] = t[16] & 0xfffffff; + c = t[17] >> 28; t[18] += c; t[17] = t[17] & 0xfffffff; + c = t[18] >> 28; t[19] += c; t[18] = t[18] & 0xfffffff; + c = t[19] >> 28; t[20] += c; t[19] = t[19] & 0xfffffff; + c = t[20] >> 28; t[21] += c; t[20] = t[20] & 0xfffffff; + c = t[21] >> 28; t[22] += c; t[21] = t[21] & 0xfffffff; + c = t[22] >> 28; t[23] += c; t[22] = t[22] & 0xfffffff; + c = t[23] >> 28; t[24] += c; t[23] = t[23] & 0xfffffff; + c = t[24] >> 28; t[25] += c; t[24] = t[24] & 0xfffffff; + c = t[25] >> 28; t[26] += c; t[25] = t[25] & 0xfffffff; + c = t[26] >> 28; t[27] += c; t[26] = t[26] & 0xfffffff; + c = t[27] >> 28; t[28] += c; t[27] = t[27] & 0xfffffff; + c = t[28] >> 28; t[29] += c; t[28] = t[28] & 0xfffffff; + c = t[29] >> 28; t[30] += c; t[29] = t[29] & 0xfffffff; + c = t[30] >> 28; t[31] += c; t[30] = t[30] & 0xfffffff; + /* Mod top half of extra words */ + t[ 8] += (int64_t)0x129eec34 * t[24]; + t[ 9] += (int64_t)0x21cf5b54 * t[24]; + t[10] += (int64_t)0x29c2ab70 * t[24]; + t[11] += (int64_t)0x0f635c8c * t[24]; + t[12] += (int64_t)0x25bf7a4c * t[24]; + t[13] += (int64_t)0x2d944a70 * t[24]; + t[14] += (int64_t)0x18eec490 * t[24]; + t[15] += (int64_t)0x20cd7704 * t[24]; + t[ 9] += (int64_t)0x129eec34 * t[25]; + t[10] += (int64_t)0x21cf5b54 * t[25]; + t[11] += (int64_t)0x29c2ab70 * t[25]; + t[12] += (int64_t)0x0f635c8c * t[25]; + t[13] += (int64_t)0x25bf7a4c * t[25]; + t[14] += (int64_t)0x2d944a70 * t[25]; + t[15] += (int64_t)0x18eec490 * t[25]; + t[16] += (int64_t)0x20cd7704 * t[25]; + t[10] += (int64_t)0x129eec34 * t[26]; + t[11] += (int64_t)0x21cf5b54 * t[26]; + t[12] += (int64_t)0x29c2ab70 * t[26]; + t[13] += (int64_t)0x0f635c8c * t[26]; + t[14] += (int64_t)0x25bf7a4c * t[26]; + t[15] += (int64_t)0x2d944a70 * t[26]; + t[16] += (int64_t)0x18eec490 * t[26]; + t[17] += (int64_t)0x20cd7704 * t[26]; + t[11] += (int64_t)0x129eec34 * t[27]; + t[12] += (int64_t)0x21cf5b54 * t[27]; + t[13] += (int64_t)0x29c2ab70 * t[27]; + t[14] += (int64_t)0x0f635c8c * t[27]; + t[15] += (int64_t)0x25bf7a4c * t[27]; + t[16] += (int64_t)0x2d944a70 * t[27]; + t[17] += (int64_t)0x18eec490 * t[27]; + t[18] += (int64_t)0x20cd7704 * t[27]; + t[12] += (int64_t)0x129eec34 * t[28]; + t[13] += (int64_t)0x21cf5b54 * t[28]; + t[14] += (int64_t)0x29c2ab70 * t[28]; + t[15] += (int64_t)0x0f635c8c * t[28]; + t[16] += (int64_t)0x25bf7a4c * t[28]; + t[17] += (int64_t)0x2d944a70 * t[28]; + t[18] += (int64_t)0x18eec490 * t[28]; + t[19] += (int64_t)0x20cd7704 * t[28]; + t[13] += (int64_t)0x129eec34 * t[29]; + t[14] += (int64_t)0x21cf5b54 * t[29]; + t[15] += (int64_t)0x29c2ab70 * t[29]; + t[16] += (int64_t)0x0f635c8c * t[29]; + t[17] += (int64_t)0x25bf7a4c * t[29]; + t[18] += (int64_t)0x2d944a70 * t[29]; + t[19] += (int64_t)0x18eec490 * t[29]; + t[20] += (int64_t)0x20cd7704 * t[29]; + t[14] += (int64_t)0x129eec34 * t[30]; + t[15] += (int64_t)0x21cf5b54 * t[30]; + t[16] += (int64_t)0x29c2ab70 * t[30]; + t[17] += (int64_t)0x0f635c8c * t[30]; + t[18] += (int64_t)0x25bf7a4c * t[30]; + t[19] += (int64_t)0x2d944a70 * t[30]; + t[20] += (int64_t)0x18eec490 * t[30]; + t[21] += (int64_t)0x20cd7704 * t[30]; + t[15] += (int64_t)0x129eec34 * t[31]; + t[16] += (int64_t)0x21cf5b54 * t[31]; + t[17] += (int64_t)0x29c2ab70 * t[31]; + t[18] += (int64_t)0x0f635c8c * t[31]; + t[19] += (int64_t)0x25bf7a4c * t[31]; + t[20] += (int64_t)0x2d944a70 * t[31]; + t[21] += (int64_t)0x18eec490 * t[31]; + t[22] += (int64_t)0x20cd7704 * t[31]; + /* Propagate carries */ + c = t[ 8] >> 28; t[ 9] += c; t[ 8] = t[ 8] & 0xfffffff; + c = t[ 9] >> 28; t[10] += c; t[ 9] = t[ 9] & 0xfffffff; + c = t[10] >> 28; t[11] += c; t[10] = t[10] & 0xfffffff; + c = t[11] >> 28; t[12] += c; t[11] = t[11] & 0xfffffff; + c = t[12] >> 28; t[13] += c; t[12] = t[12] & 0xfffffff; + c = t[13] >> 28; t[14] += c; t[13] = t[13] & 0xfffffff; + c = t[14] >> 28; t[15] += c; t[14] = t[14] & 0xfffffff; + c = t[15] >> 28; t[16] += c; t[15] = t[15] & 0xfffffff; + c = t[16] >> 28; t[17] += c; t[16] = t[16] & 0xfffffff; + c = t[17] >> 28; t[18] += c; t[17] = t[17] & 0xfffffff; + c = t[18] >> 28; t[19] += c; t[18] = t[18] & 0xfffffff; + c = t[19] >> 28; t[20] += c; t[19] = t[19] & 0xfffffff; + c = t[20] >> 28; t[21] += c; t[20] = t[20] & 0xfffffff; + c = t[21] >> 28; t[22] += c; t[21] = t[21] & 0xfffffff; + c = t[22] >> 28; t[23] += c; t[22] = t[22] & 0xfffffff; + /* Mod bottom half of extra words */ + t[ 0] += (int64_t)0x129eec34 * t[16]; + t[ 1] += (int64_t)0x21cf5b54 * t[16]; + t[ 2] += (int64_t)0x29c2ab70 * t[16]; + t[ 3] += (int64_t)0x0f635c8c * t[16]; + t[ 4] += (int64_t)0x25bf7a4c * t[16]; + t[ 5] += (int64_t)0x2d944a70 * t[16]; + t[ 6] += (int64_t)0x18eec490 * t[16]; + t[ 7] += (int64_t)0x20cd7704 * t[16]; + t[ 1] += (int64_t)0x129eec34 * t[17]; + t[ 2] += (int64_t)0x21cf5b54 * t[17]; + t[ 3] += (int64_t)0x29c2ab70 * t[17]; + t[ 4] += (int64_t)0x0f635c8c * t[17]; + t[ 5] += (int64_t)0x25bf7a4c * t[17]; + t[ 6] += (int64_t)0x2d944a70 * t[17]; + t[ 7] += (int64_t)0x18eec490 * t[17]; + t[ 8] += (int64_t)0x20cd7704 * t[17]; + t[ 2] += (int64_t)0x129eec34 * t[18]; + t[ 3] += (int64_t)0x21cf5b54 * t[18]; + t[ 4] += (int64_t)0x29c2ab70 * t[18]; + t[ 5] += (int64_t)0x0f635c8c * t[18]; + t[ 6] += (int64_t)0x25bf7a4c * t[18]; + t[ 7] += (int64_t)0x2d944a70 * t[18]; + t[ 8] += (int64_t)0x18eec490 * t[18]; + t[ 9] += (int64_t)0x20cd7704 * t[18]; + t[ 3] += (int64_t)0x129eec34 * t[19]; + t[ 4] += (int64_t)0x21cf5b54 * t[19]; + t[ 5] += (int64_t)0x29c2ab70 * t[19]; + t[ 6] += (int64_t)0x0f635c8c * t[19]; + t[ 7] += (int64_t)0x25bf7a4c * t[19]; + t[ 8] += (int64_t)0x2d944a70 * t[19]; + t[ 9] += (int64_t)0x18eec490 * t[19]; + t[10] += (int64_t)0x20cd7704 * t[19]; + t[ 4] += (int64_t)0x129eec34 * t[20]; + t[ 5] += (int64_t)0x21cf5b54 * t[20]; + t[ 6] += (int64_t)0x29c2ab70 * t[20]; + t[ 7] += (int64_t)0x0f635c8c * t[20]; + t[ 8] += (int64_t)0x25bf7a4c * t[20]; + t[ 9] += (int64_t)0x2d944a70 * t[20]; + t[10] += (int64_t)0x18eec490 * t[20]; + t[11] += (int64_t)0x20cd7704 * t[20]; + t[ 5] += (int64_t)0x129eec34 * t[21]; + t[ 6] += (int64_t)0x21cf5b54 * t[21]; + t[ 7] += (int64_t)0x29c2ab70 * t[21]; + t[ 8] += (int64_t)0x0f635c8c * t[21]; + t[ 9] += (int64_t)0x25bf7a4c * t[21]; + t[10] += (int64_t)0x2d944a70 * t[21]; + t[11] += (int64_t)0x18eec490 * t[21]; + t[12] += (int64_t)0x20cd7704 * t[21]; + t[ 6] += (int64_t)0x129eec34 * t[22]; + t[ 7] += (int64_t)0x21cf5b54 * t[22]; + t[ 8] += (int64_t)0x29c2ab70 * t[22]; + t[ 9] += (int64_t)0x0f635c8c * t[22]; + t[10] += (int64_t)0x25bf7a4c * t[22]; + t[11] += (int64_t)0x2d944a70 * t[22]; + t[12] += (int64_t)0x18eec490 * t[22]; + t[13] += (int64_t)0x20cd7704 * t[22]; + t[ 7] += (int64_t)0x129eec34 * t[23]; + t[ 8] += (int64_t)0x21cf5b54 * t[23]; + t[ 9] += (int64_t)0x29c2ab70 * t[23]; + t[10] += (int64_t)0x0f635c8c * t[23]; + t[11] += (int64_t)0x25bf7a4c * t[23]; + t[12] += (int64_t)0x2d944a70 * t[23]; + t[13] += (int64_t)0x18eec490 * t[23]; + t[14] += (int64_t)0x20cd7704 * t[23]; + /* Propagate carries */ + c = t[ 0] >> 28; t[ 1] += c; rd[ 0] = (int32_t)(t[ 0] & 0xfffffff); + c = t[ 1] >> 28; t[ 2] += c; rd[ 1] = (int32_t)(t[ 1] & 0xfffffff); + c = t[ 2] >> 28; t[ 3] += c; rd[ 2] = (int32_t)(t[ 2] & 0xfffffff); + c = t[ 3] >> 28; t[ 4] += c; rd[ 3] = (int32_t)(t[ 3] & 0xfffffff); + c = t[ 4] >> 28; t[ 5] += c; rd[ 4] = (int32_t)(t[ 4] & 0xfffffff); + c = t[ 5] >> 28; t[ 6] += c; rd[ 5] = (int32_t)(t[ 5] & 0xfffffff); + c = t[ 6] >> 28; t[ 7] += c; rd[ 6] = (int32_t)(t[ 6] & 0xfffffff); + c = t[ 7] >> 28; t[ 8] += c; rd[ 7] = (int32_t)(t[ 7] & 0xfffffff); + c = t[ 8] >> 28; t[ 9] += c; rd[ 8] = (int32_t)(t[ 8] & 0xfffffff); + c = t[ 9] >> 28; t[10] += c; rd[ 9] = (int32_t)(t[ 9] & 0xfffffff); + c = t[10] >> 28; t[11] += c; rd[10] = (int32_t)(t[10] & 0xfffffff); + c = t[11] >> 28; t[12] += c; rd[11] = (int32_t)(t[11] & 0xfffffff); + c = t[12] >> 28; t[13] += c; rd[12] = (int32_t)(t[12] & 0xfffffff); + c = t[13] >> 28; t[14] += c; rd[13] = (int32_t)(t[13] & 0xfffffff); + c = t[14] >> 28; t[15] += c; rd[14] = (int32_t)(t[14] & 0xfffffff); + rd[15] = t[15]; + /* Mod bits over 28 in last word */ + o = rd[15] >> 26; rd[15] &= 0x3ffffff; + rd[ 0] += 0x4a7bb0d * o; + rd[ 1] += 0x873d6d5 * o; + rd[ 2] += 0xa70aadc * o; + rd[ 3] += 0x3d8d723 * o; + rd[ 4] += 0x96fde93 * o; + rd[ 5] += 0xb65129c * o; + rd[ 6] += 0x63bb124 * o; + rd[ 7] += 0x8335dc1 * o; + /* Propagate carries */ + o = rd[ 0] >> 28; rd[ 1] += o; rd[ 0] = rd[ 0] & 0xfffffff; + o = rd[ 1] >> 28; rd[ 2] += o; rd[ 1] = rd[ 1] & 0xfffffff; + o = rd[ 2] >> 28; rd[ 3] += o; rd[ 2] = rd[ 2] & 0xfffffff; + o = rd[ 3] >> 28; rd[ 4] += o; rd[ 3] = rd[ 3] & 0xfffffff; + o = rd[ 4] >> 28; rd[ 5] += o; rd[ 4] = rd[ 4] & 0xfffffff; + o = rd[ 5] >> 28; rd[ 6] += o; rd[ 5] = rd[ 5] & 0xfffffff; + o = rd[ 6] >> 28; rd[ 7] += o; rd[ 6] = rd[ 6] & 0xfffffff; + o = rd[ 7] >> 28; rd[ 8] += o; rd[ 7] = rd[ 7] & 0xfffffff; + o = rd[ 8] >> 28; rd[ 9] += o; rd[ 8] = rd[ 8] & 0xfffffff; + o = rd[ 9] >> 28; rd[10] += o; rd[ 9] = rd[ 9] & 0xfffffff; + o = rd[10] >> 28; rd[11] += o; rd[10] = rd[10] & 0xfffffff; + o = rd[11] >> 28; rd[12] += o; rd[11] = rd[11] & 0xfffffff; + o = rd[12] >> 28; rd[13] += o; rd[12] = rd[12] & 0xfffffff; + o = rd[13] >> 28; rd[14] += o; rd[13] = rd[13] & 0xfffffff; + o = rd[14] >> 28; rd[15] += o; rd[14] = rd[14] & 0xfffffff; + + /* Convert to bytes */ + r[ 0] = (rd[0 ] >> 0); + r[ 1] = (rd[0 ] >> 8); + r[ 2] = (rd[0 ] >> 16); + r[ 3] = (rd[0 ] >> 24) + ((rd[1 ] >> 0) << 4); + r[ 4] = (rd[1 ] >> 4); + r[ 5] = (rd[1 ] >> 12); + r[ 6] = (rd[1 ] >> 20); + r[ 7] = (rd[2 ] >> 0); + r[ 8] = (rd[2 ] >> 8); + r[ 9] = (rd[2 ] >> 16); + r[10] = (rd[2 ] >> 24) + ((rd[3 ] >> 0) << 4); + r[11] = (rd[3 ] >> 4); + r[12] = (rd[3 ] >> 12); + r[13] = (rd[3 ] >> 20); + r[14] = (rd[4 ] >> 0); + r[15] = (rd[4 ] >> 8); + r[16] = (rd[4 ] >> 16); + r[17] = (rd[4 ] >> 24) + ((rd[5 ] >> 0) << 4); + r[18] = (rd[5 ] >> 4); + r[19] = (rd[5 ] >> 12); + r[20] = (rd[5 ] >> 20); + r[21] = (rd[6 ] >> 0); + r[22] = (rd[6 ] >> 8); + r[23] = (rd[6 ] >> 16); + r[24] = (rd[6 ] >> 24) + ((rd[7 ] >> 0) << 4); + r[25] = (rd[7 ] >> 4); + r[26] = (rd[7 ] >> 12); + r[27] = (rd[7 ] >> 20); + r[28] = (rd[8 ] >> 0); + r[29] = (rd[8 ] >> 8); + r[30] = (rd[8 ] >> 16); + r[31] = (rd[8 ] >> 24) + ((rd[9 ] >> 0) << 4); + r[32] = (rd[9 ] >> 4); + r[33] = (rd[9 ] >> 12); + r[34] = (rd[9 ] >> 20); + r[35] = (rd[10] >> 0); + r[36] = (rd[10] >> 8); + r[37] = (rd[10] >> 16); + r[38] = (rd[10] >> 24) + ((rd[11] >> 0) << 4); + r[39] = (rd[11] >> 4); + r[40] = (rd[11] >> 12); + r[41] = (rd[11] >> 20); + r[42] = (rd[12] >> 0); + r[43] = (rd[12] >> 8); + r[44] = (rd[12] >> 16); + r[45] = (rd[12] >> 24) + ((rd[13] >> 0) << 4); + r[46] = (rd[13] >> 4); + r[47] = (rd[13] >> 12); + r[48] = (rd[13] >> 20); + r[49] = (rd[14] >> 0); + r[50] = (rd[14] >> 8); + r[51] = (rd[14] >> 16); + r[52] = (rd[14] >> 24) + ((rd[15] >> 0) << 4); + r[53] = (rd[15] >> 4); + r[54] = (rd[15] >> 12); + r[55] = (rd[15] >> 20); + r[56] = 0; +} + +/* Precomputed multiples of the base point. */ +static const ge448_precomp base[58][8] = { +{ + { + { 0x70cc05e, 0x26a82bc, 0x0938e26, 0x80e18b0, 0x511433b, 0xf72ab66, + 0x412ae1a, 0xa3d3a46, 0xa6de324, 0x0f1767e, 0x4657047, 0x36da9e1, + 0x5a622bf, 0xed221d1, 0x66bed0d, 0x4f1970c }, + { 0x230fa14, 0x08795bf, 0x7c8ad98, 0x132c4ed, 0x9c4fdbd, 0x1ce67c3, + 0x73ad3ff, 0x05a0c2d, 0x7789c1e, 0xa398408, 0xa73736c, 0xc7624be, + 0x03756c9, 0x2488762, 0x16eb6bc, 0x693f467 } + }, + { + { 0x5555555, 0x5555555, 0x5555555, 0x5555555, 0x5555555, 0x5555555, + 0x5555555, 0x5555555, 0xaaaaaa9, 0xaaaaaaa, 0xaaaaaaa, 0xaaaaaaa, + 0xaaaaaaa, 0xaaaaaaa, 0xaaaaaaa, 0xaaaaaaa }, + { 0xa9386ed, 0xeafbcde, 0xda06bda, 0xb2bed1c, 0x098bbbc, 0x833a2a3, + 0x80d6565, 0x8ad8c4b, 0x7e36d72, 0x884dd7b, 0xed7a035, 0xc2b0036, + 0x6205086, 0x8db359d, 0x34ad704, 0xae05e96 } + }, + { + { 0x6ff2f8f, 0x2817328, 0xda85757, 0xb769465, 0xfd6e862, 0xf7f6271, + 0x8daa9cb, 0x4a3fcfe, 0x2ba077a, 0xda82c7e, 0x41b8b8c, 0x9433322, + 0x4316cb6, 0x6455bd6, 0xb9108af, 0x0865886 }, + { 0x88ed6fc, 0x22ac135, 0x02dafb8, 0x9a68fed, 0x7f0bffa, 0x1bdb676, + 0x8bb3a33, 0xec4e1d5, 0xce43c82, 0x56c3b9f, 0xa8d9523, 0xa6449a4, + 0xa7ad43a, 0xf706cbd, 0xbd5125c, 0xe005a8d } + }, + { + { 0x8ba7f30, 0xce42ac4, 0x9e120e2, 0xe179894, 0x8ba21ae, 0xf1515dd, + 0x301b7bd, 0x70c74cc, 0x3fda4be, 0x0891c69, 0xa09cf4e, 0x29ea255, + 0x17226f9, 0x2c1419a, 0xc6c0cce, 0x49dcbc5 }, + { 0xde51839, 0xe236f86, 0xd4f5b32, 0x44285d0, 0x472b5d4, 0x7ea1ca9, + 0x1c0d8f9, 0x7b8a5bc, 0x90dc322, 0x57d845c, 0x7c02f04, 0x1b979cb, + 0x3a5de02, 0x27164b3, 0x4accde5, 0xd49077e } + }, + { + { 0x2030034, 0xa99d109, 0x6f950d0, 0x2d8cefc, 0xc96f07b, 0x7a920c3, + 0x08bc0d5, 0x9588128, 0x6d761e8, 0x62ada75, 0xbcf7285, 0x0def80c, + 0x01eedb5, 0x0e2ba76, 0x5a48dcb, 0x7a9f933 }, + { 0x2f435eb, 0xb473147, 0xf225443, 0x5512881, 0x33c5840, 0xee59d2b, + 0x127d7a4, 0xb698017, 0x86551f7, 0xb18fced, 0xca1823a, 0x0ade260, + 0xce4fd58, 0xd3b9109, 0xa2517ed, 0xadfd751 } + }, + { + { 0xabef79c, 0x7fd7652, 0x443a878, 0x6c20a07, 0x12a7109, 0x5c1840d, + 0x876451c, 0x4a06e4a, 0xad95f65, 0x3bed0b4, 0x3fb0260, 0x25d2e67, + 0xaebd971, 0x2e00349, 0x4498b72, 0x54523e0 }, + { 0x07c7bcc, 0xea5d1da, 0x38ea98c, 0xcce7769, 0x61d2b3e, 0x80284e8, + 0x6e1ff1b, 0x48de76b, 0x9c58522, 0x7b12186, 0x2765a1a, 0xbfd053a, + 0x056c667, 0x2d743ec, 0xd8ab61c, 0x3f99b9c } + }, + { + { 0xeb5eaf7, 0xdf9567c, 0x78ac7d7, 0x110a6b4, 0x4706e0b, 0x2d33501, + 0x0b5a209, 0x0df9c7b, 0x568e684, 0xba4223d, 0x8c3719b, 0xd78af2d, + 0xa5291b6, 0x77467b9, 0x5c89bef, 0x079748e }, + { 0xdac377f, 0xe20d3fa, 0x72b5c09, 0x34e8669, 0xc40bbb7, 0xd8687a3, + 0xd2f84c9, 0x7b3946f, 0xa78f50e, 0xd00e40c, 0x17e7179, 0xb875944, + 0xcb23583, 0x9c7373b, 0xc90fd69, 0x7ddeda3 } + }, + { + { 0x153bde0, 0x2538a67, 0x406b696, 0x223aca9, 0x1ad713e, 0xf9080dc, + 0xd816a64, 0x6c4cb47, 0x5dc8b97, 0xbc28568, 0xc08e2d7, 0xd97b037, + 0x5d0e66b, 0x5b63fb4, 0x520e8a3, 0xd1f1bc5 }, + { 0xe69e09b, 0x4eb873c, 0xbc8ee45, 0x1663164, 0xba8d89f, 0x08f7003, + 0x386ad82, 0x4b98ead, 0xbd94c7b, 0xa4b93b7, 0xc6b38b3, 0x46ba408, + 0xf3574ff, 0xdae87d1, 0xe9bea9b, 0xc7564f4 } + }, +}, +{ + { + { 0x5bfac1c, 0x2e4fdb2, 0xf5f3bca, 0xf0d79aa, 0x20fb7cc, 0xe756b0d, + 0xb39609a, 0xe3696be, 0x5a5ab58, 0xa019fc3, 0x3b281dd, 0xa2b2485, + 0x61ac0a2, 0xe3e2be7, 0xeb56730, 0xf19c34f }, + { 0xa30241e, 0x2d25ce8, 0xb73d7a1, 0xf5661ea, 0xdaac9f4, 0x4611ed0, + 0x4ced72c, 0xd544234, 0xe92e985, 0xce78f52, 0x4da4aad, 0x6fe5dd4, + 0x1d363ce, 0xfcaddc6, 0xc9111bf, 0x3beb69c } + }, + { + { 0x940ebc9, 0xd2e7660, 0xb17bbe0, 0xe032018, 0x75c0575, 0xad49391, + 0x21c7f34, 0xdd0b147, 0x3e147e0, 0x52c2ba4, 0x0ee8973, 0x7dd03c6, + 0xecf2754, 0x5472e8d, 0xd6482bb, 0x17a1cd1 }, + { 0x8128b3f, 0xdd43b84, 0xea7dd25, 0xf0cae34, 0xff07df2, 0x81ca99f, + 0x92ebbdc, 0x1c89597, 0x72155e6, 0x45c7a68, 0x39ddd08, 0x907a50e, + 0xbb2d89b, 0xbe398c2, 0x1b3b536, 0x38063f9 } + }, + { + { 0xf843b23, 0x149fafb, 0xac7f22a, 0x00ab582, 0xf2f4d4c, 0xa3b981b, + 0x4341a22, 0x2ce1a65, 0x7c03b63, 0x68a4074, 0x12f2cf8, 0x63206a2, + 0x5149741, 0xc9961d3, 0xbc7099e, 0xfb85430 }, + { 0x90a9e59, 0x9c91072, 0x06de367, 0x734e94a, 0xdb99214, 0x5cf3cbe, + 0x45b1fb9, 0xc6bce32, 0xdd7be0d, 0x1a82abe, 0xede7d1c, 0xf74976a, + 0x21503bd, 0x7025b7c, 0x0d096ab, 0xf789491 } + }, + { + { 0x555a41b, 0x6bd48bb, 0x67de206, 0xfbdd0d0, 0xdd6dfd1, 0x98bc477, + 0x3e40b8a, 0x1d0693b, 0xda32ae4, 0x6e15563, 0xfcebaa2, 0x0194a20, + 0x0980a93, 0xda11615, 0x0109cec, 0x8e11920 }, + { 0xffb9726, 0x8ea0552, 0x047e44b, 0xeba50a4, 0x60ddf76, 0xc050d24, + 0xac690e0, 0xe009204, 0x9b18edc, 0x47b8639, 0xc77f23f, 0x2f5b76a, + 0x0792905, 0x4296c24, 0x06f6dc7, 0x73f6b4a } + }, + { + { 0x3b10cad, 0xb6ef9ea, 0xf7c8fce, 0x312843d, 0x8bedf86, 0x5bdcd52, + 0xf6dd823, 0x2889059, 0x08bfde0, 0x04578e9, 0x123e2e5, 0x3245df3, + 0x7ee9e3a, 0xbf461d5, 0x6f94ceb, 0xddec2d4 }, + { 0x145768f, 0x21b43b9, 0xdae962a, 0xe79a8f9, 0xcbb043f, 0xff1972b, + 0x239649b, 0xe3dcf6d, 0xc533b85, 0xed592bd, 0xdbe22d0, 0x14ff94f, + 0xf1d8e22, 0x6c4eb87, 0xd18cf6d, 0xd8d4c71 } + }, + { + { 0x8d96345, 0xcda666c, 0x836cd21, 0x9ecaa25, 0x984606e, 0x6e885bd, + 0x804f054, 0x1dd5fef, 0x6959ae4, 0x9dfff6b, 0xc9b55cc, 0x99b9cf8, + 0x62b9b80, 0xb4716b0, 0x554b128, 0x13ec87c }, + { 0x75aacc2, 0xe696d1f, 0x87fc5ff, 0xf78c993, 0x3809d42, 0x76c0947, + 0xb618fa8, 0x99ce62d, 0x2f53341, 0x35e3e02, 0x0db6c5e, 0x62fc1ac, + 0x00d8b47, 0xa1fb8e6, 0x58f0d1e, 0x0bc1070 } + }, + { + { 0x16da513, 0x1f45269, 0xf5cf341, 0x1f2fc04, 0x64d23e0, 0xae92086, + 0xda8a113, 0x4e33082, 0x1cfc085, 0x2688ec6, 0x6e5327f, 0x6f2e8de, + 0xb4e48a8, 0x2070db3, 0x3240ade, 0xd662697 }, + { 0xfbd997b, 0xa6b317f, 0x49e26bd, 0x9fa1b56, 0x8cba0f3, 0xcbf0d25, + 0x17b4745, 0x4a7791b, 0x5c9e190, 0x25f555b, 0x923ec4c, 0x7cd3940, + 0xe98f1b6, 0x16f4c6a, 0xbcd4e0f, 0x7962116 } + }, + { + { 0x02491e3, 0x8d58fa3, 0x7ab3898, 0x7cf76c6, 0x647ebc7, 0xbc2f657, + 0xd25f5a3, 0x5f4bfe0, 0xd69505d, 0x503f478, 0x3fb6645, 0x4a889fc, + 0xfa86b18, 0x33e1bc1, 0x5508dd8, 0xabb234f }, + { 0x9a05b48, 0x5348e1b, 0x64dc858, 0x57ac5f1, 0xec8a2d3, 0x21f4d38, + 0xa3a3e9d, 0x5ec6d3c, 0x560a0b8, 0xcd4062e, 0x3433f59, 0x49b74f7, + 0xcab14e3, 0xefd9d87, 0xeb964f5, 0x858ce7f } + }, +}, +{ + { + { 0xeb731b4, 0x7577254, 0x4e2397e, 0x9fff1fb, 0xc821715, 0x749b145, + 0x2e65e67, 0x40619fe, 0x2e618d8, 0x57b8281, 0x707b83e, 0x063186c, + 0x31b24a2, 0xcfc80cb, 0xac75169, 0xcca6185 }, + { 0xb255818, 0x6539f44, 0x0368bce, 0x5895da0, 0x17c7482, 0x841a309, + 0xb1a9c9e, 0x85469e1, 0xe4f7d9d, 0x05664c0, 0x7b35cc0, 0x8a06318, + 0xa0e9b0a, 0x214763a, 0x4b26ac2, 0x1bd872c } + }, + { + { 0xa93762b, 0x3578f97, 0x72d52bc, 0x434f69a, 0x22cb565, 0xddcca40, + 0xff20544, 0xa7d1e41, 0x8a66588, 0x823475d, 0x99d7baf, 0x9fc97c7, + 0x660e421, 0x15542f1, 0x843faf6, 0xa7d1f60 }, + { 0x4063ccc, 0xbbfaab5, 0xa49855a, 0x3ad9bad, 0x5bddbfe, 0xffd5f1c, + 0xae87e59, 0x0e419c2, 0xf89956b, 0xdce6ed6, 0xccd8951, 0xf047c21, + 0xa83c991, 0x6ed4a1b, 0x2d28e0a, 0x85af86e } + }, + { + { 0x9ed48a8, 0x04433c4, 0x0bc375d, 0xeffa858, 0xfa6e3b5, 0xfb0e1b2, + 0xa1aadda, 0x51483a2, 0xf8b2ea8, 0x733448d, 0xf639f0c, 0xaa0513c, + 0xa23bf84, 0x6bc61a3, 0xdc2430d, 0x3e64f68 }, + { 0xc5876b1, 0x51bf502, 0x1c0dd2a, 0x6b83375, 0x342914f, 0xe597be1, + 0xf8e632c, 0x43d5ab0, 0xd62587b, 0x2696715, 0xed34f24, 0xe87d20a, + 0xe18baf7, 0x25b7e14, 0xe22e084, 0xf5eb753 } + }, + { + { 0x24d8295, 0x51da717, 0x18d1340, 0xd478e43, 0x2cf7f66, 0xacf94f4, + 0x3760711, 0x230d7d1, 0x5abc626, 0x078a66a, 0x6b5f6da, 0xd78b0bd, + 0x96d1d0b, 0x23a9713, 0x4bd960f, 0x87623d6 }, + { 0x77db53f, 0x0841a99, 0xf4d03ee, 0x23c1a53, 0x1f95df1, 0x2f62c2e, + 0x116f4e7, 0xd1e2ec1, 0x34811a9, 0x896d2fe, 0xec8096e, 0xad65e2b, + 0xb1744a6, 0x09d36f9, 0xff5ddf7, 0x564bac7 } + }, + { + { 0xc3f77cb, 0x48b41e2, 0x0968938, 0x5227673, 0xfd9b452, 0xff1b899, + 0x2e03908, 0x67cf3bf, 0x248a6fb, 0x3731d90, 0x256598f, 0xd800a05, + 0xbdc8530, 0x347d2f2, 0x7ad08a1, 0xc72a300 }, + { 0x1d65f73, 0x5e5be74, 0x4206ead, 0x183d4ae, 0xade4013, 0xcb50c1c, + 0x3102483, 0x39db43d, 0x70d6325, 0x0eb49fa, 0xc1f02b9, 0xa18f6a2, + 0xdbf5e66, 0x3e6fe30, 0x3a82aa5, 0xac4eeb9 } + }, + { + { 0x3613d47, 0x295affd, 0xb56f343, 0x7b7e68a, 0x92b173b, 0x9806296, + 0xbad35fb, 0x937061e, 0x5c21eea, 0x2501978, 0x787a746, 0xe92721b, + 0x3651631, 0x463c46c, 0xc6f2d5a, 0x6da4b5d }, + { 0x6e6d18c, 0xcb67cc1, 0x0010588, 0x1b30d52, 0xdb1d1e8, 0x1bb6ea6, + 0xad11474, 0x9c6308a, 0x3d19b1c, 0xc316741, 0xbe4fb79, 0xf2e84d7, + 0xe050f77, 0xeccb873, 0xcc2bf86, 0xf7c8d80 } + }, + { + { 0x7ab20e5, 0x16fe2e1, 0xecf3a92, 0x274dead, 0x0972f67, 0x9f43487, + 0x4605751, 0x9a65a45, 0xb8980b2, 0x9351f07, 0x0eb08a5, 0x412962b, + 0x733f440, 0xb8c9bfd, 0x1ca250f, 0xac2cd64 }, + { 0x2ba7d26, 0x68cdd0f, 0x4e0beea, 0xd3d2a4a, 0x9f4a258, 0x50135c1, + 0xf0d02e4, 0xb475e53, 0x589283a, 0x432d8c6, 0xa0a2b6c, 0x29141bf, + 0x13704bc, 0xd7379ec, 0x52459bf, 0x831562c } + }, + { + { 0xeeec506, 0x676b366, 0x45da557, 0xdd6cad5, 0x77057d2, 0x9de39cb, + 0xdf05bf1, 0x388c5fe, 0xdfb1f03, 0x6e55650, 0x52126c9, 0xdbceffa, + 0x3a4a220, 0xe4d187b, 0xeb27020, 0xac914f9 }, + { 0xd2e5f30, 0x3f4ab98, 0xdd94451, 0x6ae97da, 0x0d80981, 0x64af695, + 0xf2aa2ce, 0x36b4b90, 0x18fcf59, 0x6adcd7a, 0xc116c81, 0x3ddfe6d, + 0x549b9e3, 0x661072b, 0xec4584d, 0xd9e3134 } + }, +}, +{ + { + { 0xa1e400c, 0x6e46707, 0x551e806, 0xcdc990b, 0x3a07724, 0xfa51251, + 0x1b3e4f5, 0x500553f, 0xef4dac3, 0x67e8b58, 0x2cb4cc7, 0x958349f, + 0x7f9143c, 0x948b4ed, 0x2b7822b, 0xe646d09 }, + { 0x2bc3c26, 0xd185dd5, 0xc837fc9, 0x34ba16e, 0x5a788b7, 0x516d4ba, + 0x56142b0, 0x72f2de7, 0xf445b3d, 0x5846f61, 0xf4631a1, 0xdaec5c9, + 0x169ea9b, 0xa10b18d, 0xaf6751b, 0x85d2998 } + }, + { + { 0x43ddf31, 0xda0cac4, 0x1860911, 0x0966e17, 0x3cba600, 0x9c3a717, + 0x571f895, 0x5781880, 0x737ac21, 0x5e2a927, 0x6c253fb, 0x8a46148, + 0x95ee626, 0xe801cf5, 0x5f84fc0, 0x271166a }, + { 0xba856bd, 0x306937f, 0xbe80a43, 0x80cb179, 0xffb5980, 0x70393b2, + 0x660fc64, 0xa8e4a1c, 0xc0d5c98, 0x5078abf, 0xfbd31ff, 0x62ba530, + 0x9e51b88, 0xda60844, 0x355ae15, 0xdb6ecb0 } + }, + { + { 0x23c5d49, 0xbcbb6ea, 0x87959bc, 0x08906ba, 0x0991665, 0x61cc088, + 0xd90d13c, 0x21d6b41, 0xd03afe9, 0x0c27ac1, 0x5cfea52, 0x159995f, + 0xbdfe220, 0x4057e20, 0xcbdf058, 0xdd1b349 }, + { 0x2e37159, 0x0cd6626, 0x3eb0d17, 0x8cea8e4, 0x5bce7f0, 0x553af08, + 0x5b6511d, 0xb94cb5f, 0x50e0330, 0x7b8d3a5, 0x57ab7e7, 0x4159110, + 0x6aa886f, 0x320820e, 0xc5b6b81, 0x130d4d6 } + }, + { + { 0xc7bb2ed, 0x2f98059, 0xa49bdfb, 0x33ebf4c, 0xb0a675b, 0x04c72a1, + 0xadb6c14, 0x94f9ea4, 0xcf728c0, 0x03376d8, 0x4c6eb6a, 0x5c059d3, + 0xeb8da48, 0x0178408, 0x2956817, 0x8bf607b }, + { 0xceb3d28, 0x7ad2822, 0x37ae653, 0xd07a403, 0xc1e46b2, 0xbc68739, + 0x9154ba9, 0x15d7cca, 0xa26617d, 0x6b97103, 0xb2e0d28, 0xa610314, + 0xfd4d363, 0x52a08ba, 0xc7dc2af, 0x80c2638 } + }, + { + { 0x3187140, 0x0cde7ef, 0x4b70acd, 0x93b92ca, 0x7a79cdc, 0x5696e50, + 0x8eaab66, 0x73cc972, 0x8f1b0c7, 0x6b8c5b6, 0x4f7e0b1, 0xb39a318, + 0x376108a, 0x72cfb0d, 0x98536a7, 0x0c53efc }, + { 0x24c2f1e, 0x03b52a8, 0x6399b78, 0x717132e, 0x349a85d, 0x31ebd25, + 0x1a200d4, 0x265ee81, 0x407d7ad, 0x0b1aad2, 0x94d2962, 0x9a9ebc8, + 0x41171d9, 0x994e6cd, 0x6c8fa83, 0x09178d8 } + }, + { + { 0xa2593a1, 0x7d1d238, 0xb38fb19, 0x863e93a, 0xe7712a9, 0xd23a4cc, + 0x27efcd5, 0x7477b13, 0x1392f6c, 0x3ba69ff, 0xf7bb5a5, 0x63e0c32, + 0x026effd, 0x20412c0, 0xef424ab, 0xd3ee8e4 }, + { 0x64e5174, 0x14c0b2d, 0xe58c47b, 0x2a611f2, 0xc1e8635, 0xaa58a06, + 0xcf17034, 0x1870c3e, 0x83f1bf3, 0xb0d5e34, 0x16c7eb3, 0xb19905c, + 0x6efa4ca, 0xbf85d62, 0x180f92b, 0xfd16b2f } + }, + { + { 0x3adcb48, 0xc0431af, 0xba90496, 0xc9a7a8d, 0x3895294, 0xd765a16, + 0x551de70, 0xb02a41a, 0x749b8a1, 0xb71b261, 0xc6f3e47, 0x0dfa89e, + 0x0f5d9ce, 0x392c0d8, 0x31aee3c, 0x43c59d8 }, + { 0x4d76f49, 0x94bfb6d, 0x27d68a5, 0xe8f5b82, 0x630fd08, 0x78ae1d9, + 0xce1bdae, 0x1379029, 0x66715dc, 0x9689da0, 0xd3278c7, 0x5d4cb24, + 0x9e84fbc, 0x77c9833, 0xea1048c, 0xc8478dc } + }, + { + { 0x770d2ba, 0xe4b8f31, 0x42ea095, 0x744f652, 0x036f138, 0xd06e090, + 0x3b078ca, 0xd3a3d5b, 0x78b8417, 0xc7ae541, 0xc738fd7, 0xad6c5d4, + 0x4676454, 0x6178984, 0x5d9a392, 0xfbf3423 }, + { 0xfff772f, 0x8e451a7, 0x5ffbead, 0x8605bb7, 0x930d59f, 0x6f75cc1, + 0x8f3f460, 0xd4f4755, 0x6700c8a, 0xefd2d79, 0x2406421, 0xceb462a, + 0x9dfe8f1, 0x8ed0f97, 0xd1d7600, 0x0280bf1 } + }, +}, +{ + { + { 0xdd9a54d, 0x761c219, 0x86a39c0, 0x1127fcb, 0x4c9bedd, 0x7d0e4f0, + 0x4d976b6, 0x27c017a, 0xda042cf, 0x800c973, 0x2593f11, 0xe7419af, + 0xae67960, 0xbd49448, 0x744fd85, 0xd3b60b7 }, + { 0x61676fe, 0x5e74ed9, 0x39af627, 0x7383ef3, 0x5e62df7, 0x34407e0, + 0x8bf3196, 0xb053461, 0x583b407, 0xd6b7184, 0x55011be, 0xe3d0685, + 0x2124b52, 0x94083d0, 0xf780aaf, 0xa908324 } + }, + { + { 0x73ec9c3, 0xb27af1a, 0x70fa725, 0xb66ad9f, 0x8cf73e4, 0x07724f5, + 0x9949358, 0xc3fcd57, 0xda0cc01, 0x06efb79, 0x10597c9, 0x1e977d2, + 0x703e8d6, 0xcd732be, 0x6d0b69e, 0x6fd29bf }, + { 0x667128e, 0xca658ac, 0xc7872b3, 0xca0036a, 0x5355837, 0xc969858, + 0x075cf1c, 0x59f3be8, 0x3809a11, 0x9f1b9b0, 0x9733871, 0x6881ced, + 0xe902a5f, 0x8cda0fb, 0x4e3871e, 0x4d8c69b } + }, + { + { 0xddee82f, 0x5c3bd07, 0x2f9723b, 0xe52dd31, 0x74f1be8, 0xcf87611, + 0x35f8657, 0xd9ecbd8, 0xfbfea17, 0x4f77393, 0xd78fe2c, 0xec9579f, + 0x0fb0450, 0x320de92, 0x95d9c47, 0xbfc9b8d }, + { 0x5e1b4c3, 0x818bd42, 0x40e2c78, 0x0e0c41c, 0xbccb0d0, 0x0f7ce9a, + 0x5ef81fb, 0xc7e9fa4, 0x73574ad, 0x2561d6f, 0xd2efb0b, 0xa2d8d99, + 0xe96cd0a, 0xcf8f316, 0x4964807, 0x088f0f1 } + }, + { + { 0x45d5a19, 0x0a84989, 0x6c2131f, 0x47ab39c, 0xf3fc35d, 0x5c02824, + 0x9ee8127, 0x3be77c8, 0xc90b80a, 0xa8491b7, 0xa28aa93, 0x5397631, + 0x6c0b344, 0x54d6e81, 0x876d0e4, 0x22878be }, + { 0x6db3bf6, 0xeecb8a4, 0x54577a3, 0x340f295, 0x9a00f85, 0xa779868, + 0x4bb9147, 0x98465d7, 0xda3c736, 0x9532d7d, 0x7504b20, 0x6d574f1, + 0xd86e435, 0x6e356f4, 0x4533887, 0x70c2e8d } + }, + { + { 0xd293980, 0xdce5a0a, 0x069010e, 0x32d7210, 0x06deaaa, 0x64af59f, + 0x59239e4, 0xd6b43c4, 0x9199c29, 0x74bf255, 0x11e1e2b, 0x3efff41, + 0xcb0f8d8, 0x1aa7b5e, 0x989e395, 0x9baa22b }, + { 0x7b33ac1, 0xf78db80, 0x54ce80a, 0x05a3b43, 0x7bc8e12, 0x371defc, + 0x1224610, 0x63305a0, 0x6d697ef, 0x028b1ae, 0x1cd8051, 0x7aba39c, + 0x28ee4b4, 0x76ed7a9, 0x7f99901, 0x31bd02a } + }, + { + { 0xf075566, 0xf9dab7a, 0xf56f18b, 0x84e29a5, 0xf64e56d, 0x3a4c45a, + 0x6a7302d, 0xcf3644a, 0x156b658, 0xfb40808, 0xf96be52, 0xf33ef9c, + 0xcaa2f08, 0xfe92038, 0xb261894, 0xcfaf2e3 }, + { 0x224ce3f, 0xf2a0dbc, 0x592eb27, 0xed05009, 0x95889d0, 0x501743f, + 0x77c95c2, 0xa88a478, 0xdd63da9, 0x86755fb, 0xc7ee828, 0x9024acf, + 0xf38113b, 0x634b020, 0x6056e64, 0x3c5aacc } + }, + { + { 0xa2ef760, 0xe03ff3a, 0xb1c3bac, 0x3b95767, 0x940d754, 0x51ce6aa, + 0x47a9a3d, 0x7cbac3f, 0x34f8d1a, 0xa864ac4, 0x80dbd47, 0x1eff3f2, + 0x7ebd5ca, 0xd8ab660, 0x05b07ed, 0xc4df5c4 }, + { 0xa4f095b, 0x3dc92df, 0x7cdbd9a, 0x5ae36a5, 0x7891e04, 0x7ff2973, + 0x0a5fe7b, 0x37c0313, 0xaa6e35e, 0x210d7b0, 0xbf200d8, 0x6edfb53, + 0x84afb85, 0x787b68d, 0x72c6de3, 0x9b5c49b } + }, + { + { 0x4010f4e, 0x5185716, 0x0536ebe, 0xe0b144b, 0x887d663, 0xacabb14, + 0xedf584f, 0xac1caed, 0xaf175a3, 0xb43fb8f, 0xf992a3c, 0x310b6d5, + 0x85178a4, 0xf2c4aa2, 0x8bd56bf, 0x69c9969 }, + { 0xa4d972e, 0x73d6372, 0x9583803, 0x3d5bb2e, 0xd891581, 0x7bf7d18, + 0x568a34a, 0xa5ce5d7, 0x1f45c81, 0x670b433, 0x1f96910, 0x97265a7, + 0xb07c1ea, 0xdb14eb3, 0xfed447c, 0xdf008ea } + }, +}, +{ + { + { 0x00c2f10, 0x0379f5a, 0xd350285, 0xb320b4f, 0x8efdd7d, 0x74e560e, + 0xf46a140, 0xf2f017e, 0x0f34624, 0x2ced1a6, 0xca08ec9, 0x7c4b4e3, + 0x5d8bc6b, 0xdffc2a1, 0x527b007, 0xcc8f3f3 }, + { 0x861fe83, 0x59f8ac4, 0xd03144c, 0x8d48d2c, 0xbfa6dce, 0xa8457d2, + 0x677c136, 0xd7ed333, 0xc228e18, 0xcb8e219, 0x16ab1e4, 0x5f70bc9, + 0x3780370, 0x2ae3a3d, 0x88f17ad, 0x9f33654 } + }, + { + { 0x960e4bb, 0xeab0710, 0xab9cfd3, 0xc668a78, 0xb0ef946, 0x2e85553, + 0x8df5df3, 0xa43c4b9, 0x3cb3646, 0x0ecd559, 0x18dbe71, 0x6f543c4, + 0xf59818b, 0xee7edaa, 0x90911c1, 0xc44e8d2 }, + { 0x269b509, 0xafb38b1, 0x52afe2c, 0x9e2737c, 0xccfa664, 0x5b2ef02, + 0xe1cc58b, 0x1e0aeac, 0x5ea134e, 0x37a57e9, 0x83b9fc2, 0xc9c465a, + 0x6e3ecca, 0x4b9e8c7, 0x9bdbab5, 0xca07dbe } + }, + { + { 0xb0d7807, 0xd297f3c, 0xf59ce61, 0xee441a5, 0xb2db844, 0x728553b, + 0x640e9e0, 0x90f87e5, 0xcb76dff, 0xaa72cbf, 0x4012d57, 0x065c686, + 0x9678b44, 0xd5ee88f, 0x2177603, 0x3d74b85 }, + { 0x748b68e, 0x3f9c947, 0x8f44d44, 0x03856d9, 0x462426c, 0xde34b84, + 0x845ab29, 0xc16d1bb, 0xd2e18de, 0x9df6217, 0xb154643, 0xec6d219, + 0x2ee0f8f, 0x22a8ec3, 0x91c5175, 0x632ad38 } + }, + { + { 0x6869267, 0x19d9d23, 0xfe5532a, 0x628df94, 0x6dc9a01, 0x458d76c, + 0x2cc39c8, 0x405fe6c, 0xf3a04ba, 0x7dddc67, 0x12500c7, 0xfee6303, + 0xa50e9de, 0x580b6f0, 0x6090604, 0xfb5918a }, + { 0x3af6b2d, 0xd715925, 0x1c7d1ec, 0x83d62d6, 0x85858c4, 0x94398c1, + 0x14bfb64, 0x94643dc, 0xaf7db80, 0x758fa38, 0xa8a1557, 0xe2d7d93, + 0x3562af1, 0xa569e85, 0x84346aa, 0xd226bdd } + }, + { + { 0xd0ccd20, 0xc2d0a5e, 0x5dbc0cf, 0xeb9adb8, 0x26d7e88, 0xe0a29ee, + 0x84a8e98, 0x8bb39f8, 0x37396ea, 0x511f1c1, 0xc8b2fb3, 0xbc9ec5a, + 0x090e5bc, 0x299d81c, 0x4cdd587, 0xe1dfe34 }, + { 0x5e465b7, 0x80f61f4, 0x1bad59e, 0x5699c53, 0xb79ff92, 0x85e92e4, + 0x9db244c, 0x1e64fce, 0xa22097d, 0x3748574, 0xefff24e, 0xe2aa6b9, + 0x0a10bc6, 0xb951be7, 0x9067a1c, 0x6685326 } + }, + { + { 0xa6114d3, 0xf716ddf, 0x037ec1f, 0x9e515f5, 0x44944a6, 0x7734541, + 0xaba97cc, 0x1540c4c, 0x8b54bb7, 0xe41e548, 0xcae37bc, 0x4363156, + 0xf3d2ce8, 0xc384eaf, 0x4c58ba4, 0x72a4f45 }, + { 0xdcaf3fc, 0x0ceb530, 0x78dcdbb, 0x72d5365, 0xc6320fa, 0x9b44084, + 0xeb74c70, 0x6262d34, 0x608e6dc, 0x8abac85, 0x10dd38d, 0x82a5264, + 0xa819b8d, 0xbc39911, 0x03ad0d9, 0xbda15fe } + }, + { + { 0xf9dc60b, 0xadbf587, 0x7d846d2, 0xf9d814f, 0xb77bde0, 0xccdd241, + 0x2242f50, 0x89cb6d7, 0xe6360a8, 0x95c0e3e, 0xdf49713, 0x7c7dd5a, + 0x57d5814, 0x68e0e49, 0x0c16571, 0x3aa097d }, + { 0x267d03a, 0xb56b672, 0x8c44af4, 0x4f55708, 0xf3252a5, 0x67c49e7, + 0xc94a469, 0x871d6cf, 0x01fbfaa, 0x57ae998, 0x48a5d8e, 0x5c0e48f, + 0x5e240b9, 0xe9bf9c8, 0x99d41ca, 0xa410189 } + }, + { + { 0xb2889b4, 0x6beb0c7, 0x9455370, 0x78b7f89, 0x47ca364, 0xd434214, + 0x9f21e5b, 0xdd9d2da, 0x0a7e4aa, 0xa0c7c18, 0xda1660c, 0x022c0d4, + 0x5a57002, 0xe1f5c16, 0x518f68f, 0x51c7c9e }, + { 0x2586502, 0x6d521b6, 0x183ec1b, 0xa0f2cb3, 0xcaa5e16, 0x578b4e0, + 0x764997f, 0x7bd4fbd, 0x64b1804, 0x7ec56c3, 0x0ee08e4, 0xb75a254, + 0xdc19080, 0x6bf74a6, 0x97d6e59, 0x6ec793d } + }, +}, +{ + { + { 0x0a4beb9, 0x16789d6, 0x9b9c801, 0x512b2cd, 0x8c7bb9c, 0xf8b6d10, + 0x9ebdc8c, 0xd85651e, 0x9ba971a, 0xc945082, 0x7e1cf78, 0x852d9ea, + 0x0af01e2, 0x6a45e35, 0x6151dcf, 0xe6cdadf }, + { 0x2b8c01b, 0xc454bb4, 0x3d54cd2, 0x59e0c49, 0x454d608, 0x8e1e686, + 0xd8c6103, 0x0dbae4b, 0x6c18b18, 0xa5603a1, 0x3369093, 0x227a6b2, + 0x5f3de1c, 0xf1e8929, 0x8ab63c5, 0x42f0b58 } + }, + { + { 0x5b596d8, 0xf1974cc, 0x44719f0, 0xee8093f, 0xf6f5b54, 0x40ba933, + 0x2f3d654, 0xd6e5365, 0x26d73b8, 0x9aeb835, 0x0776382, 0x50ed535, + 0xad43875, 0x3be47d6, 0xc786e48, 0x21d56df }, + { 0xb73bb39, 0x8a75e18, 0xf265a78, 0x9eba84c, 0x2e772e7, 0x7c02a4d, + 0x4c1ecd2, 0xf7df6d4, 0x6cef71b, 0xa8d9ea0, 0xcae3b68, 0x86e8f91, + 0x99efefa, 0x2fd1411, 0x214e6f6, 0x0b36ab2 } + }, + { + { 0xbdce61c, 0xd79065c, 0xdecb229, 0xcb562ff, 0x4600849, 0xef5d3d1, + 0x1d23ac8, 0x348b31b, 0x15c36b8, 0xb2ea699, 0x4822836, 0x268683d, + 0xc6f0b7d, 0x083edbe, 0x1a7821c, 0xaf4f39d }, + { 0x4e64841, 0x23be6e8, 0x65bf791, 0xe9e2463, 0x02bfd7c, 0xa3208ac, + 0xd01357d, 0x231989c, 0x6422ab4, 0x79b8aad, 0x91b8564, 0x57d2b7e, + 0x8c04421, 0x28ebbcc, 0x7d09c05, 0xdc787d8 } + }, + { + { 0x6c7bed5, 0xeb99f62, 0x39cd0e8, 0x326b15f, 0xd860615, 0xd9d53dc, + 0x1bf4205, 0xdf636e7, 0x0752209, 0x1eaa0bf, 0x4744abb, 0x17ce69a, + 0xf3ea2fb, 0x474572d, 0x224a7f3, 0xc4f6f73 }, + { 0x63081b4, 0x7ed86ad, 0x4a20afb, 0xcd4cdc7, 0xb301b2e, 0x7563831, + 0xe038699, 0x5b4d2b1, 0x802a15f, 0xa15d1fa, 0x13e9172, 0x6687aaf, + 0xba6da90, 0x3eccd36, 0x7474e83, 0x34e829d } + }, + { + { 0x19c9b27, 0x4cea19b, 0x5f52523, 0xa14c37a, 0x726625c, 0x248b16d, + 0x6cabc21, 0x8c40f9f, 0x32a5c65, 0x918470c, 0x2a98d5b, 0x314056b, + 0x34a0714, 0x6c974cf, 0x4f6314a, 0x0c8f8a9 }, + { 0x70bccfd, 0x4844557, 0x740c9fd, 0xf5835db, 0xa21407c, 0x12e59b5, + 0xdb1689d, 0xbe338e0, 0xdd5e915, 0x5a50ce9, 0xef99f39, 0xb1780e9, + 0xee4d833, 0x1262b55, 0x89c5340, 0x4be3f22 } + }, + { + { 0x6c4b858, 0xbb99b90, 0x550ca53, 0xa7724d1, 0x826962e, 0x7d31f5a, + 0xa5804da, 0xf239322, 0x0275048, 0x3e11320, 0x3ee4cb6, 0xcbb1bb8, + 0x1331191, 0xdb86525, 0x7d1d903, 0xb7caf9e }, + { 0x77d7a9d, 0x06e3b05, 0xb3bbbf5, 0x7a132b0, 0x7c50575, 0xd61fbc5, + 0xaf4b646, 0x393f712, 0xcb7efe9, 0xef77972, 0x5ea4995, 0x20e6d5d, + 0xfbbe4c6, 0x0ac23d4, 0xc807f2a, 0x8456617 } + }, + { + { 0x5396143, 0x4995fb3, 0xb99dc46, 0xa8b4bd1, 0x4150064, 0x2293e8e, + 0x22a3545, 0x2f77d49, 0xb2192c4, 0xe866b03, 0x5e0aa38, 0x58b01f0, + 0x2ed246b, 0xe406b23, 0xed60974, 0x447edb3 }, + { 0x8869703, 0xf541b33, 0x383420a, 0x6959fe0, 0x4be4e48, 0xd6b39db, + 0xb5714ef, 0x048f3b4, 0x5d9e4b8, 0x68b4968, 0x2177963, 0xbda8e6c, + 0xc4211fe, 0x5094e35, 0x2d46d1a, 0xea591c3 } + }, + { + { 0x2fef780, 0x3a768ff, 0x32970c6, 0x4218d28, 0xec6da17, 0xce598e4, + 0xfbb126a, 0xf675645, 0x0427617, 0xb04c23f, 0xe4fce74, 0xc9f93fb, + 0x3c91b00, 0x44a414b, 0x1d3b3cc, 0x4d982f3 }, + { 0xb24cce0, 0xb1d40e8, 0x133e73d, 0x5a21c07, 0x0bb589d, 0x6e9358e, + 0x2399844, 0x39cfb17, 0x166080e, 0x83f7647, 0x450b468, 0xcfe7bf8, + 0x1e8434f, 0x2a288f7, 0x21a81e3, 0xd39f1e5 } + }, +}, +{ + { + { 0x528af6f, 0x78c6f13, 0x94b74d9, 0x0001fe2, 0x01aab44, 0xae77425, + 0xef0039c, 0x7cbe937, 0x0fa2a67, 0xaf3e4f0, 0xda1378e, 0xe28175f, + 0x8ccd90e, 0x72adeed, 0x00af22f, 0x16a8ce1 }, + { 0xcbf63dd, 0x69fae17, 0x9e39e26, 0x6786172, 0xf827a18, 0xe92b3d5, + 0x8403682, 0x4d75e41, 0x9056a79, 0x01a4fd9, 0x20008f5, 0x89efb2d, + 0xb78ff15, 0xa2f6918, 0xa3437f5, 0xf41c870 } + }, + { + { 0x7be353c, 0xc840ae5, 0x3fb2691, 0x465a5eb, 0x7eba833, 0x34a89f0, + 0x013346e, 0xf620896, 0xe875df2, 0x563b5f0, 0xfbc44ce, 0x5f7fc8b, + 0xcfedf9d, 0x22fcb5a, 0x7dc691b, 0x7cf68d4 }, + { 0x76a103f, 0x37f7c2d, 0xfd87b7d, 0x728a128, 0xccf2132, 0x7db2ad8, + 0xb100e63, 0xa4c13fe, 0x7b511d5, 0xcd28a51, 0x721ca5c, 0xb910280, + 0xd84bd52, 0xec1305f, 0x2729791, 0xb964642 } + }, + { + { 0x5bc7462, 0x83fccdf, 0xd6f012f, 0x01f3dda, 0x3a6a87c, 0x57f1171, + 0xff403ac, 0xedb47ce, 0xbaab073, 0x6c184e5, 0x6f0d6a1, 0x5b17c7d, + 0x3ef2c91, 0x45a4c4f, 0x86a8f41, 0x26c3f7e }, + { 0xb646514, 0x81a6db0, 0xca8b9ae, 0xf84059f, 0x9f02305, 0xd73dab6, + 0xc4b7c6c, 0x0de3fae, 0x696df2f, 0x18abb88, 0x75d7740, 0x45dd1b9, + 0x9ee35bc, 0x3aeccc6, 0xb029f88, 0x478252e } + }, + { + { 0x8b2ce15, 0x66bf85b, 0x335709d, 0x1175425, 0x8123874, 0x00169ef, + 0x9b89868, 0xfd3c18c, 0x775204e, 0xb3612f9, 0xc2cd510, 0x4b8d09d, + 0x14559ad, 0xafa12e6, 0x9657493, 0x1ddaa88 }, + { 0x1e77a08, 0x87d700b, 0x14d2e71, 0xaf4cf2f, 0xbf90c94, 0xe00835d, + 0x6dc8429, 0xb16a6ec, 0xf8a4d92, 0x02a7210, 0x3d0c48d, 0x5a5ab40, + 0xb5b9bea, 0x0052b3a, 0xe138f89, 0x6242739 } + }, + { + { 0x16b2819, 0x7c215d3, 0xfeb9d7a, 0xdacb65e, 0xd833423, 0xc3c569e, + 0x886a058, 0xbc08435, 0x7e5cb61, 0x132c4db, 0x9422aff, 0x6373a27, + 0xfca9fc4, 0x43b9d7e, 0xdbe465f, 0xe3319a5 }, + { 0x0b39da7, 0x51d3687, 0x4b75492, 0xcb6d798, 0xeadd87a, 0x77eb272, + 0xe0d3f6c, 0xf2fb47d, 0xf9f791c, 0x807fd86, 0x975e885, 0xf01086b, + 0xb6a3604, 0xf9314b5, 0x67be852, 0x8cd4538 } + }, + { + { 0x858f79b, 0x7c1e6b3, 0x938caf9, 0xf0477c4, 0x3e88c44, 0xb311bbf, + 0x1e3a3c1, 0x9234c09, 0x95a1d4d, 0x531af2b, 0xb8d1c64, 0xf3cc969, + 0xb51e78d, 0x6f3c328, 0x34e8881, 0x5a1bd6c }, + { 0x3a9336f, 0x2e31239, 0x5ced897, 0x020f0cc, 0x5fab121, 0x4b45d7b, + 0x1841210, 0x8068b1c, 0x8349170, 0x1bd85fc, 0x0f97fe5, 0xfe816d8, + 0x14b84fc, 0x1089818, 0xb93cd48, 0x1d4fabb } + }, + { + { 0xaef599e, 0x1f11d45, 0xb09c58a, 0x8d91243, 0xd08c3c3, 0xd2eec7b, + 0x3b02793, 0x5a6039b, 0x8fb2c00, 0xb27fed5, 0xe8acf5e, 0xb5de44d, + 0x6e6c698, 0x2c3e0cd, 0x777180d, 0x2f96ed4 }, + { 0x96d0e36, 0x67de8bf, 0xc9b6d65, 0xd36a2b6, 0x637d59c, 0x8df5d37, + 0xc8d9878, 0x951899f, 0xb13fcf8, 0x0fa090d, 0x1f5c7b4, 0xa527081, + 0x513a37a, 0x56a6560, 0x14dc1fe, 0xc6f5530 } + }, + { + { 0x94945d6, 0x7f6def7, 0x8cc8832, 0x2f52fe3, 0xa812ff5, 0x0228ad9, + 0xbb8478a, 0xcd282e5, 0xbe91b07, 0xa0bc9af, 0x11165e2, 0x0360cdc, + 0x7b857e4, 0xb5240fd, 0xfa36b08, 0x67f1665 }, + { 0xad2c93f, 0x84ce588, 0xe8ff4c0, 0x94db722, 0x489c8a3, 0xad2edbb, + 0x7e5f278, 0x6b2d5b8, 0xd1d0798, 0x0265e58, 0x4c5589e, 0xd2c9f26, + 0x4e4074d, 0xde81f09, 0x303089f, 0xc539595 } + }, +}, +{ + { + { 0x83e882c, 0x183492f, 0xb5e6c12, 0x4d58203, 0xefec20b, 0x1ac96c3, + 0xe1cd15e, 0xabd5a5b, 0xcbbb14b, 0x7e1e242, 0xd0543b3, 0x9f03f45, + 0xd678158, 0xc94bc47, 0xa446cad, 0x7917be0 }, + { 0x9b37394, 0x53f2be2, 0x064cc76, 0x0cb0a6c, 0xfba3da3, 0x3a857bc, + 0x80fcb49, 0xac86bc5, 0x30ab146, 0x9d5336e, 0x5bc1270, 0xafb093d, + 0xe5c3b6e, 0x996689d, 0xea076ba, 0x55189fa } + }, + { + { 0x646ce03, 0x99ef986, 0x30e6100, 0xa155f81, 0x29b6b07, 0x75bef17, + 0x1de077b, 0xc46f08e, 0x7ed0526, 0xf52fdc5, 0x61a299a, 0xe09d989, + 0x7b8e93a, 0x9527329, 0x0acd185, 0x11255b5 }, + { 0x4a6acdd, 0x57919db, 0x4451d74, 0x708a578, 0x283f7b3, 0x5b0bd01, + 0xc3d9260, 0xe82f40c, 0x82bbdc2, 0x2ab96ec, 0xc164d87, 0x921f680, + 0xc17a6a9, 0xf0f7883, 0x382a001, 0xc366478 } + }, + { + { 0x2e40791, 0x5c9aa07, 0xa0776bf, 0xf0b72d6, 0xeaa50dc, 0x445f9b2, + 0x6bda47f, 0xa929fa9, 0x3bbfc49, 0x539dc71, 0x006a78b, 0x4f16dd0, + 0xeef39c7, 0x331ba3d, 0xc34157c, 0xbfa0a24 }, + { 0x6a3b482, 0x0220beb, 0x6c43885, 0x3164d4d, 0xacdea23, 0xa03bb5d, + 0x9d8f450, 0xd6b8b5a, 0xbd208fe, 0xd218e65, 0x35c476f, 0x43948ed, + 0x0a2ed2b, 0x29a0dd8, 0x25295b7, 0xa6ccf33 } + }, + { + { 0xac38939, 0xf68f15f, 0xf8010c1, 0xb3dd5a2, 0xa35f141, 0xf7ac290, + 0x7388574, 0xdc8f3b2, 0xe95fed2, 0x7ec3de1, 0x257ac7d, 0xc625451, + 0x664e55a, 0x66fc33e, 0x4832ba5, 0xd3968d3 }, + { 0xc026448, 0x980291b, 0x24da4a5, 0xfcb2125, 0x827a360, 0xbca7df4, + 0x85ca63b, 0xfcc395c, 0x8e9f733, 0xcf566ec, 0xd465f70, 0x835ee9b, + 0x372f916, 0xe66d111, 0x04d9211, 0xc066cf9 } + }, + { + { 0x8b48818, 0xb9763a3, 0x4288f96, 0xa6d23cc, 0xed3a229, 0xe27fcf5, + 0xabaff00, 0x6aebf9c, 0x8131cd1, 0xf337503, 0xffabd58, 0x13ad41d, + 0x861c83b, 0x1bee6af, 0x9c142e7, 0x274fe96 }, + { 0x9b84b5b, 0x70ebcc9, 0x8191cfc, 0xe1a57d7, 0xcbf00b8, 0x46ccd06, + 0xefe402d, 0xc233e8e, 0xbeebeb3, 0xb4ab215, 0xbd14e7b, 0xb7424ea, + 0xa679578, 0x351259a, 0x471d684, 0x6d6d01e } + }, + { + { 0x815ae38, 0x755c465, 0x611db56, 0xadc3e85, 0x188dd50, 0x633999b, + 0xc12d907, 0xfdf7509, 0x238b6af, 0x25bcfde, 0x397f5e7, 0x50d705d, + 0x944c974, 0xb65f60b, 0x27ac325, 0x8867fc3 }, + { 0x3763eff, 0x2edc441, 0x341fb63, 0x892c0b3, 0xb3a7f28, 0xb34b83a, + 0x15c2f18, 0x9aa106d, 0x1bb2277, 0x720bbc6, 0x5cfaefd, 0x637f72a, + 0xf43e565, 0xf57db6e, 0xb58e772, 0xceb7c67 } + }, + { + { 0x6ecc1de, 0x2793da5, 0x38f31b2, 0x4e10974, 0x8781267, 0x4229b4f, + 0xdec04a1, 0xe5d2272, 0xec17cff, 0x6abb463, 0x0cbb048, 0x28aaa7e, + 0xd22ef85, 0x41dc081, 0x5e63d0f, 0xcbc361e }, + { 0xad5dbaa, 0xb78aafc, 0xfc1edc3, 0x0111505, 0x92c7bfa, 0x63ed66d, + 0xe468919, 0x2982284, 0xb8c0d8c, 0x30f1f21, 0x2685093, 0xf056747, + 0xf03dd0f, 0x0e085b6, 0x5581e66, 0xa8c8db8 } + }, + { + { 0x264ad0c, 0x42009a6, 0x593bef4, 0x13bf2b8, 0x5d4e8b1, 0x1d11190, + 0xef7bddc, 0xfe3e940, 0x624e62c, 0xa012275, 0x1d6d3cc, 0xcb65924, + 0xedb7ab6, 0xc7bcc70, 0xb750b1c, 0xff9fafb }, + { 0x7fea84b, 0xf65df29, 0x90b0e02, 0x17c84a8, 0x301e821, 0xa92a859, + 0xfb480d1, 0xbee8cb2, 0x59c604e, 0x7010b8c, 0xe803c43, 0x47bf3f4, + 0x47b3fff, 0xd645142, 0x9f0da13, 0xc4c5dcb } + }, +}, +{ + { + { 0xb5253b3, 0x8af700c, 0x206957a, 0x31ca605, 0x3eafdcd, 0x2574439, + 0xd3ae15e, 0x2ba5ae1, 0x5b82579, 0x710b738, 0x112b95a, 0x145ab57, + 0x38c55c5, 0x4b133a0, 0x2a16fef, 0xf7559c9 }, + { 0xd9ba896, 0x70c3e68, 0xc33d07a, 0x475dd32, 0x3a41e40, 0xe084e47, + 0xfd2e706, 0xddc9382, 0x79510bd, 0x34b7275, 0xa5f901e, 0x5e78a69, + 0xdcfb823, 0x429dfd7, 0x014f0a3, 0x1d9dc18 } + }, + { + { 0xaf403d7, 0x364fcdf, 0xb7d7b34, 0xd9ea4ff, 0xcbb1dac, 0x21a3426, + 0x143b4f5, 0xfa51052, 0x6df2409, 0x2bca073, 0x8ad7285, 0x7e6985a, + 0x4aaa27f, 0x3a1a9d0, 0x9fc0c6c, 0x1a815e1 }, + { 0xbb65bb3, 0xfab6147, 0x33ced0b, 0xa36dc0d, 0x2062d78, 0x26a8859, + 0x28a5fb7, 0x3438617, 0x4ebb1ad, 0xe82da25, 0xd05aa11, 0x70f5071, + 0xadaac48, 0x0b7f847, 0x93cb269, 0xeb812bc } + }, + { + { 0xf7caccc, 0xcb317cc, 0xcf85098, 0xd3410d9, 0x7f078d7, 0xca68c8d, + 0xb782efc, 0xfe9e812, 0x5f544b5, 0x32e7c0f, 0x3a7b7f2, 0x44fe95a, + 0xe91327b, 0xf4f1543, 0x76645ed, 0x27d118d }, + { 0xd7abc2c, 0x690547c, 0xb53c8af, 0xf64680f, 0x79ea989, 0xbe0cbe0, + 0xa91af28, 0x6cf0cce, 0x9daa2f9, 0xa3b85a2, 0x91faed0, 0xd4b663c, + 0xa8b20ba, 0x782c7b7, 0xb8d98ce, 0xf494faf } + }, + { + { 0x002f55a, 0x080c0d7, 0x2d6d9dd, 0xf4f8f14, 0x382f025, 0xb326229, + 0xad28c20, 0x58fd0b5, 0x8d06a15, 0x704b992, 0x7fbd8e4, 0xf4545d9, + 0xed55581, 0xc32fa63, 0x01ac0fd, 0x3ab7936 }, + { 0x6099fd1, 0x13ece52, 0x9c79178, 0x776dba8, 0xce26c45, 0x8d28212, + 0x60d739c, 0x09fddaf, 0xa84826e, 0xf9931ed, 0xb29439e, 0x6e73d90, + 0x9095e61, 0x94cfefc, 0x802f474, 0x3050d16 } + }, + { + { 0x9f6394b, 0x0898f8f, 0x88b0e91, 0x48b8cea, 0x4c1b362, 0x4bc9925, + 0x827d9ec, 0xe3fccb4, 0xd950d6a, 0x5d4cf9a, 0x39b5b38, 0xa16f1ef, + 0x620f288, 0x3c76d1d, 0xe119390, 0x9fdd059 }, + { 0xfb5edf8, 0x7b5de9e, 0x769d14e, 0x3e290b9, 0x6bd10b5, 0x4df3a91, + 0x82f8f7b, 0xae99bca, 0xc9524af, 0x5481d5d, 0x69504f1, 0xf112e4f, + 0x51931ec, 0xb048f09, 0x18f51b1, 0xbff876a } + }, + { + { 0x46c1c37, 0x932e2a7, 0x9aea4c1, 0x903ad52, 0x8f161f2, 0x717ac91, + 0xf425e2a, 0xa57d197, 0x7f39e0e, 0xae89dac, 0xbaa2a58, 0x91655c0, + 0x54836dd, 0xe3dc286, 0xa9ec9e6, 0xb5f0baa }, + { 0xbdbda04, 0xf7c4662, 0x51059c0, 0xbe5393b, 0xdd95b0f, 0xb16d552, + 0x1b3bd96, 0xde495b3, 0xc0206c5, 0xb2a6e02, 0x014d3a9, 0x045cc09, + 0x2a2f490, 0xf66a315, 0xc5dea05, 0x208c108 } + }, + { + { 0x65237ea, 0x6e38b68, 0x9f27fc6, 0x93a1303, 0xa95068a, 0x9a6d510, + 0xe7c9e54, 0x6fbf216, 0x571ac1d, 0x7824290, 0x91c2a0c, 0x8cb23ba, + 0xc7e434d, 0x611202e, 0x76058b4, 0x8f901bf }, + { 0x0849588, 0xef0ac05, 0xdd31804, 0xe0d2dde, 0xeb2ca81, 0xaf5417c, + 0x5d1a509, 0x420ac06, 0x9683bb6, 0x46e345e, 0xf613f7f, 0x6daf635, + 0x48a9576, 0xc9e8291, 0x176d147, 0x5f9f1d1 } + }, + { + { 0x77e9709, 0xd24ae1d, 0x0047b8a, 0x77751dc, 0xc6a1593, 0xe325334, + 0x671f86a, 0x9baf962, 0xc29a15e, 0x425af6a, 0x2796e33, 0x3108600, + 0xfc253a5, 0xb6ea78c, 0xafae0ea, 0x4c733e0 }, + { 0x97c99b9, 0x4b7443a, 0x50203a6, 0xc14e9e4, 0x52680ba, 0xd1bb515, + 0xd55533a, 0xa56a3ef, 0x169e1a0, 0xa66e38c, 0xeed7da0, 0xb3e4df9, + 0xddce3d9, 0x022c937, 0xf6e36b4, 0x8552089 } + }, +}, +{ + { + { 0xf5cc82e, 0x8e4bf95, 0xc3ed6c9, 0x2ad80c3, 0xc9045e1, 0xf2e5b2c, + 0x59b06d4, 0x42c9065, 0x7b43b84, 0xc1f7379, 0x72d7992, 0x1710dbf, + 0x767b41c, 0xe98cf47, 0x7bfb9e9, 0xe713fce }, + { 0x9fa5134, 0x9f54ae9, 0xde40d0e, 0x3002fd8, 0x9311334, 0xdc282b7, + 0xbfeb360, 0x5519810, 0x0f96ffe, 0x31539c7, 0xd27777b, 0x04eacc0, + 0x8ff5053, 0x5982410, 0x32b67ad, 0x5982366 } + }, + { + { 0x6bea5c2, 0x6eb4554, 0xd509a33, 0x82cfae0, 0x394bb59, 0x6a69bd8, + 0x5770ee1, 0x1880d8d, 0x7dacf9e, 0x6351844, 0xf02b891, 0x5b1ecc5, + 0xb6c9a5a, 0xeb7d900, 0x8897da8, 0xdab8a76 }, + { 0x98851a6, 0x28c7be5, 0x4d73c3b, 0x0101d4f, 0x5084996, 0x3c2569c, + 0x280bde0, 0xb9bc911, 0xcd0d4f9, 0x513a22a, 0x2a15f3b, 0xdf2986d, + 0x2aa4943, 0x231c28f, 0x0333870, 0x29623ad } + }, + { + { 0x4084416, 0x2ceb178, 0x49516cd, 0x924cf1c, 0x4be856f, 0x76536c0, + 0x47a265b, 0x11b59cd, 0x4999494, 0x720dc84, 0x007b795, 0x910f794, + 0x2d3df83, 0x8434e14, 0xbd478d3, 0x8f53878 }, + { 0xaeb9c2f, 0xd9b072e, 0xfd8a29f, 0x16f87ea, 0x2fd0de1, 0x8c42f9b, + 0x0e816ef, 0x916721e, 0x18bde37, 0x2ecb470, 0x2375da2, 0xcde3b7a, + 0xef94281, 0x30d0657, 0x5cd7af8, 0x5105456 } + }, + { + { 0x4bdced3, 0x7230b33, 0x0838569, 0x0c6a3e1, 0xe3493b8, 0xf19c9ec, + 0x0d97c57, 0xf275927, 0x0c862eb, 0xf14181e, 0x32c72bc, 0xfd3bac1, + 0xf3be362, 0x620563f, 0x47283b7, 0x672ccaf }, + { 0x2b7bf16, 0x191e3fa, 0x520dad7, 0xf838633, 0x3629d87, 0xd3dde55, + 0xaf86ebe, 0x14d8836, 0x221b2ce, 0x3db7dfb, 0x0aed72a, 0x3872abb, + 0x8c665b7, 0xb60de52, 0x44982cb, 0x89c2596 } + }, + { + { 0x4dbba25, 0x799a2de, 0xa42715e, 0xd818aae, 0xf55c362, 0xbc88f4d, + 0x713c9ae, 0x142a163, 0xfbfb33f, 0x411e8ee, 0x6bb684a, 0x34b4629, + 0xdc81817, 0x4344bec, 0x17f9d46, 0xcc9573d }, + { 0xff38a7d, 0xf85f8bc, 0x0caf117, 0xa14bf73, 0x4ba6429, 0x126874f, + 0xaa5db97, 0xcc9bf22, 0x6aba827, 0x62b56df, 0x9c9772a, 0xfee1cb8, + 0x177e541, 0xe36838f, 0xadd438f, 0x698815d } + }, + { + { 0x38ed1ad, 0xc9fd894, 0x7b6a601, 0x73cd79d, 0x05e8d20, 0x2210e62, + 0x3592af5, 0x72384ac, 0x763d07e, 0x5ccc079, 0xa5f79eb, 0x2f31a4a, + 0x2945a95, 0x693f4ed, 0x8056fdc, 0xc712017 }, + { 0xdf4b09a, 0x361ecd2, 0xb7d929a, 0xa5644ea, 0x3fabe9a, 0x34abc0b, + 0xe942a8c, 0x1a2473c, 0x6454bc3, 0xe00c924, 0xdff7366, 0xab324bc, + 0x21b8f99, 0xe1412f1, 0xe33551e, 0x970b572 } + }, + { + { 0xbd0a6b5, 0x6ca4cac, 0x921d654, 0x5584787, 0xc809bda, 0x18e5253, + 0xf0cbe5e, 0x01b32c3, 0x0f987dd, 0xb9aa754, 0x6dfa4db, 0x628f4bb, + 0x891890b, 0x0255f0b, 0x874e590, 0x25b7df4 }, + { 0x8ed5f95, 0xbded318, 0xca93023, 0x9dc428d, 0xbccf520, 0xc68f25a, + 0xe616e6c, 0xc4f3764, 0xa1d9993, 0xd9a57f1, 0x533431b, 0xd1964a5, + 0x02ab6d0, 0x06cd77f, 0x03e52e0, 0xa660791 } + }, + { + { 0x5f72700, 0xab08864, 0x0a1a44e, 0xf77b2ff, 0xc2a24b5, 0x43ebdd8, + 0x4f564d7, 0xa6d6711, 0xf414160, 0x495df63, 0x76f6de6, 0xf5bacd7, + 0x7c2b43d, 0x3011aff, 0x3241928, 0xbb1e64c }, + { 0x5034073, 0xf70c572, 0x68f1e97, 0x891c62a, 0xb22e374, 0xed8eb2e, + 0x7dbcc2f, 0xd3a53e9, 0xdc8f220, 0x1d06281, 0xace4393, 0x9eef48f, + 0xd2abecd, 0x96014f5, 0x2653ceb, 0x1da7e09 } + }, +}, +{ + { + { 0xd00bc94, 0x7593318, 0xc7262a2, 0x586f3c6, 0x958ad31, 0xea68f52, + 0xd4e8bed, 0x6707fcc, 0xcb3f9ce, 0xb7e35d6, 0xf4b1be8, 0x2cbb6f7, + 0x7b41aee, 0xa535268, 0xf7b39b8, 0x1d77845 }, + { 0xeaf9554, 0xb1f3995, 0xfe9e7d4, 0x3250f70, 0xa00c23c, 0x62e5d1b, + 0xc10e3bf, 0x5e422f5, 0xc25cec4, 0x7a18039, 0x7cc4d5b, 0xb4e66a1, + 0x36d0e0c, 0xad7c5f6, 0xa4cf347, 0x9f40b12 } + }, + { + { 0x51e3696, 0x697f882, 0xab0a648, 0xc89bc40, 0x9785804, 0x8f261a5, + 0xb51a2bd, 0x4c7f900, 0x8a2dfcf, 0xd00e7af, 0xb642aeb, 0xf9c534d, + 0xb63df0e, 0xea2a79f, 0xf2f64a4, 0x392a69a }, + { 0xc331b6c, 0x0c0f01c, 0x6a5edb5, 0x414bf2e, 0x5068391, 0xfe5ed81, + 0x62fbc34, 0x0a8078d, 0x54bca98, 0x78a4382, 0x3d727c7, 0xf7a49ae, + 0xab4dffe, 0x96c1de1, 0x3b9440a, 0x45901f7 } + }, + { + { 0xacfe46e, 0x3f1189f, 0x4467443, 0xdca6f46, 0x2eb5bcf, 0xac38542, + 0x906bf72, 0xb02dce9, 0xfe1d454, 0xdd8cdac, 0x65f7218, 0xc26f04c, + 0x6ea145d, 0xb474859, 0x5bdb315, 0xc53dc6b }, + { 0x9ad7197, 0xbe5be74, 0x18b5ecc, 0x627e919, 0x9ea405d, 0x57c889c, + 0x1a5360b, 0x2e5650c, 0x1b30b27, 0x42290df, 0x5242687, 0x4a07157, + 0xd379133, 0x553ed1f, 0x01db019, 0xb9d7a07 } + }, + { + { 0x56597dc, 0xcfe551c, 0x925ebd6, 0x81af92a, 0xf4e8d57, 0x83efe16, + 0x1f640d3, 0x61bb431, 0x78b414a, 0xf80440f, 0x6c9e3b4, 0x72f3c63, + 0x6a03c66, 0xb55f43a, 0xe417037, 0x47a9ded }, + { 0xdbb612b, 0x1a7e287, 0xdbb9220, 0x895c3c7, 0x6c04764, 0xd50c86e, + 0x53cf7ca, 0xed52698, 0xf74af55, 0xc78d799, 0xb969ff2, 0xb2ba0f2, + 0x1c6530b, 0x06d4815, 0x165a575, 0x764a1fe } + }, + { + { 0xc1b5ece, 0x4383a3b, 0x54ff148, 0x0563c88, 0x5af796e, 0x9a45279, + 0x88e9953, 0xffba7c0, 0xb6a3001, 0xfe9fb5e, 0x25b6b19, 0x7950988, + 0xd81be5e, 0x67c899a, 0x2f9d29b, 0xc89ac8d }, + { 0x29ab8f7, 0x7c76ba3, 0x6e40f74, 0xb2a18c9, 0x3864d9b, 0x1b5056e, + 0x9b582b8, 0xdfa503d, 0x7c9c68e, 0xfb03519, 0x6b3c22b, 0xdc50131, + 0xa6c96ff, 0x38ab231, 0x8cb1c10, 0x4ea527c } + }, + { + { 0xc05b4ed, 0xd632f20, 0xb2a032d, 0xe0199fa, 0x26812d7, 0x3732956, + 0x013df13, 0x2aed855, 0x39f96ac, 0x92ca24b, 0xbb9751a, 0x620273d, + 0xf7437a1, 0x5d0d21e, 0x077de56, 0x9de2a43 }, + { 0x11a4674, 0x0569b12, 0x89c3989, 0xfc3923e, 0x2c5c770, 0x3d12704, + 0x84e8c37, 0x0072b90, 0xac39f9a, 0x7178d4d, 0x778d345, 0x5f8292f, + 0x77c7307, 0x9e5bf0f, 0xc3a20f5, 0x7691610 } + }, + { + { 0x705fe96, 0x7c4ead5, 0xc8e464c, 0x377ec35, 0x7689954, 0x3e5b990, + 0xa2d31ea, 0xc0f6949, 0xc580671, 0x839d395, 0xb215b09, 0x2f347a6, + 0x683df83, 0xfdcfa33, 0x6af39a8, 0x6e12cc2 }, + { 0x13a3bd2, 0xae46ec8, 0x59366f8, 0x03a7d3b, 0xb87aed4, 0xe2029d5, + 0xfe1b83d, 0xbdc4e43, 0xdb8a1a8, 0x768437c, 0xea0dd7f, 0xe47acc3, + 0x62a0af4, 0x550e0cc, 0x1a20962, 0xcaf2cbc } + }, + { + { 0xf28a78f, 0x5a784f7, 0x07e9724, 0x952a9b5, 0x1bab7a3, 0x8ac5e41, + 0xb7bc1e1, 0x1251e3f, 0xdc15e22, 0xe360f82, 0x95213f5, 0x3ac72da, + 0x4dcd47b, 0x65ee9ba, 0x3af5952, 0xdfeab7b }, + { 0x26fd3c6, 0x34c5c80, 0xf3ac7ee, 0xd977b08, 0x7dba2f6, 0x003bd01, + 0xac98c8d, 0xcfc5cf8, 0x0e46922, 0x05eb604, 0xfaa9352, 0xc248b17, + 0x395c7a7, 0xfa41c0f, 0xb71ee44, 0x29931d4 } + }, +}, +{ + { + { 0x07861c5, 0xac087bb, 0x5ae8240, 0x3bd37db, 0xf94518f, 0x94c68ec, + 0xff88a5b, 0xd32a378, 0x9b441d1, 0x42c8aaf, 0xfc07f12, 0x089db70, + 0xd3d4455, 0x211c386, 0x546b158, 0x1db9af7 }, + { 0x51bc927, 0xdfd1b65, 0x0733df4, 0x69c0493, 0x2aeb586, 0xdc72cd4, + 0x823aa13, 0xeebdace, 0x56ad643, 0x51b3b3c, 0xd4e0426, 0xb983a99, + 0x69c4ecc, 0xa1e5b6c, 0x45e6668, 0x37cd382 } + }, + { + { 0x9f73aea, 0x158ce6d, 0x14ff475, 0x36a7749, 0xdc0b018, 0x0d4e424, + 0x3946f09, 0xc2c4448, 0xfacda62, 0x7a7de3f, 0xb486709, 0x49a19e6, + 0xdb61da7, 0x65094d8, 0x8f5ee87, 0x09edfd9 }, + { 0xb37226d, 0xe460fcf, 0x69bf470, 0x3b9d039, 0x247ca22, 0x3d4d511, + 0xc782cb1, 0xc7248d6, 0x00ad293, 0x91189a0, 0xe8abe75, 0x1244942, + 0xbf52cdb, 0x9f88d12, 0xbbbcadf, 0x368463e } + }, + { + { 0x8074f45, 0x419e4b3, 0x0771c83, 0xd3f8e2e, 0x2e68d34, 0xd2743b4, + 0xb116a00, 0xc68b7db, 0xd84cc37, 0xfad2cf7, 0xb7a0f4d, 0xcfd27c0, + 0x190e587, 0x3b9e23f, 0x751ca9e, 0x7bab499 }, + { 0xa8f12ee, 0x3270861, 0x31b36d5, 0xee1f38d, 0xe4c0eed, 0x748bb31, + 0x110ebad, 0x9be5c9b, 0xc8b6cb6, 0x728660b, 0x93d914a, 0x7bc9df7, + 0xc88c859, 0x73a4f2c, 0xb4e7f0e, 0xbe4a2fd } + }, + { + { 0xa450e77, 0xe566ff8, 0x6a13aba, 0xb0b4006, 0xcd7dc90, 0x483a510, + 0x5fa9ccc, 0xb1a2013, 0xa80e67c, 0xeb0b631, 0x020801a, 0x7c34e1f, + 0xf4e447c, 0x0257dc8, 0x74c6f0f, 0x7abe7d1 }, + { 0xb19a576, 0xf115a3a, 0x064ca0e, 0x8f0474a, 0x351f99b, 0x999bb6b, + 0x773edc3, 0x855254b, 0x427d717, 0x49f6c2f, 0x2e0cef2, 0x9f68253, + 0x2ee34f5, 0x1fe126c, 0x80150f7, 0x1ec2cae } + }, + { + { 0xc005b7a, 0x862c5af, 0xec4ef17, 0x61adea7, 0x007b446, 0xf885fd3, + 0x9b0e30e, 0x25c129d, 0xfeec7e0, 0xbc10f25, 0xdf79ee1, 0x3901ac4, + 0xfe9e19f, 0xad49db7, 0x360d050, 0xc8624d9 }, + { 0xbf3260b, 0xc74a576, 0x8c010c2, 0xbde8024, 0x09b6977, 0xf155329, + 0xd52dcf8, 0x6a5a82e, 0x29b9dfc, 0x4fbf59d, 0xc7b730c, 0x337d049, + 0x3a89cd4, 0xb3deac6, 0xad2f2eb, 0x1e07595 } + }, + { + { 0x3b7c84e, 0xa0b0a4d, 0x8cf2b00, 0xf132c37, 0xeaaa8ec, 0x192814b, + 0x7b4b5df, 0xe7929f9, 0x42d0ab7, 0xf08a68e, 0x7b60cdd, 0x814afb1, + 0x7d9c160, 0x78c348c, 0x44db217, 0xf8a9488 }, + { 0xeaa2578, 0xcdefd88, 0xbd0e260, 0xf717f56, 0x1694d02, 0x7754e13, + 0x181dbd8, 0x1254c14, 0x6e5f312, 0x0dacdd2, 0xcef87bf, 0xb8abdfb, + 0xe74e2ea, 0xb985972, 0x002b424, 0x1717621 } + }, + { + { 0x162df70, 0x92cc75e, 0x18ee849, 0x1e20c06, 0x26aa590, 0xc036b46, + 0x4da5155, 0x31be67e, 0xf7213b0, 0x04911b5, 0xbb2e72e, 0x39261d7, + 0x5c015a3, 0x9e84466, 0x298ae67, 0x2f59fc0 }, + { 0x1701fcc, 0xa3ea7ba, 0x0ebd651, 0x87a5fa9, 0x301d7b1, 0xa607ed4, + 0x3b2e271, 0xbd4ec5f, 0xdc4180f, 0x732a1a2, 0xfeaa8c1, 0xbe15d82, + 0x66f2f3f, 0x1036702, 0x9e79ce8, 0xccfd397 } + }, + { + { 0x70a54ad, 0x82ab835, 0xe3bec75, 0x5c1dee8, 0x54b556b, 0xf583ff4, + 0xf461e60, 0x9220199, 0x87fc4e7, 0xdf61ca8, 0x0776dad, 0x6641fd2, + 0x8edd061, 0x00c6edd, 0x55f7e87, 0xaf9b142 }, + { 0x9bbe3ec, 0x73f15e4, 0xf8bc1fa, 0xdd3b788, 0x1b8ff86, 0xb24cc07, + 0x41be58b, 0x6c260d2, 0x6b10ada, 0xec1c4e3, 0x7fdb985, 0xf6b4209, + 0xd47c212, 0x0d0ac85, 0x07d78d1, 0x967191c } + }, +}, +{ + { + { 0x843d0f3, 0x3b11638, 0xf27f10e, 0x4b89297, 0x863ba2a, 0x477236e, + 0xadd280c, 0x1949622, 0x04da757, 0x7cd5235, 0x79e4ff7, 0xe0e99d2, + 0x537da41, 0xb4ef894, 0x5a24ff1, 0xc55dde4 }, + { 0xb587521, 0x18d8e21, 0x3777833, 0x8010b5d, 0xd3a54c8, 0x4af522d, + 0x4c0ac13, 0x7cd476b, 0x4099f67, 0x4587e61, 0x605ee64, 0x494d0ed, + 0xcc80903, 0x3218ba2, 0x0b2e169, 0x5ff56aa } + }, + { + { 0x3a06c69, 0x51ec94e, 0x5e65c52, 0xa26d7be, 0xd44ee96, 0x156f113, + 0xbf5b9b4, 0x70f0968, 0x5f5332d, 0x9b7e469, 0x6703829, 0x36c295f, + 0xd04f492, 0x1522690, 0x728043b, 0xcf35ca4 }, + { 0x190a7c3, 0xf9ca3e1, 0xf971b07, 0x53d2413, 0x9c48b49, 0xae59652, + 0xfefff5c, 0x74672b8, 0xa7643b0, 0x0a3018b, 0x3e9b0a8, 0x51919e8, + 0xc932fb5, 0x89ad33d, 0x643e687, 0x52a4419 } + }, + { + { 0xd2d0acd, 0x7778990, 0x487fdf1, 0x3bdbcce, 0x2b03dd2, 0xdc413ca, + 0x9a2b7d0, 0x278755b, 0x35ddd7f, 0x4ebb8b5, 0xbcbdb92, 0x0465152, + 0x671d051, 0x34f22d6, 0x87192b9, 0x1ba04c7 }, + { 0x83560c1, 0xb1693f4, 0x7d174e9, 0xe08a593, 0x64dc9af, 0x47ffdc4, + 0xce8126c, 0x1123596, 0x1124628, 0x632d95f, 0xfee7c76, 0x66287ab, + 0xc552332, 0xb40fe60, 0xe304e1e, 0x3f11729 } + }, + { + { 0x5030a8c, 0x97a6ea0, 0x09c27b2, 0x6924198, 0xac9dd5d, 0x3308501, + 0xbe73fdc, 0x9fed7fa, 0x0535286, 0xea55544, 0x6c9b832, 0xc7c07ab, + 0xc51b967, 0x178c882, 0x86ee075, 0x6fa0c69 }, + { 0xb8b5c4a, 0xbaa4a15, 0x3130c0a, 0xf83c0ea, 0x2800331, 0xcf8624b, + 0x7ccbcb8, 0xade85cd, 0xf08445d, 0x971d7f6, 0x6a546dc, 0xfd480b7, + 0xc93761c, 0xdc15a38, 0x9d04631, 0xc4c495c } + }, + { + { 0x9470efe, 0x5f4cee8, 0x88d93ad, 0x9fe8961, 0xf4e49ce, 0x24783b3, + 0x52ffb3e, 0x1bc7ed7, 0x6d81e17, 0xa3abe6a, 0x7a333c3, 0xd6bb8b4, + 0x10a3527, 0x3485c0b, 0x31a9d10, 0x7cddc9c }, + { 0xc38ca37, 0x0c78112, 0xdd2f8d8, 0x10e249d, 0xc511911, 0x72c88cc, + 0x29a6c84, 0x4d75b5a, 0xa227b1e, 0xc74b267, 0xf8e35ad, 0x698390c, + 0xe98d230, 0x8f27edf, 0x6bdc7f4, 0xec922f2 } + }, + { + { 0xfc32e11, 0xac34023, 0x47200d1, 0xe0ae2f5, 0xbd98c82, 0xa7c7492, + 0x7b02154, 0x3910b68, 0xe28ab6d, 0x6fdd06c, 0xd98b012, 0xd3a7e49, + 0x9f54207, 0x4c1c82b, 0x45c176f, 0xef5bbe6 }, + { 0xd3e71eb, 0x3d17960, 0x080e70c, 0x90d7e84, 0xbff5d9e, 0x83e6438, + 0x535d85c, 0x1877e1f, 0xfbb69cc, 0x931ed6e, 0x1247848, 0xcf96265, + 0x750da4e, 0x76d618b, 0x717fbf6, 0xc076708 } + }, + { + { 0xeec5126, 0x80a5ac5, 0x3379c80, 0x6d05dd1, 0x2336d32, 0x514b089, + 0x6725137, 0x586c006, 0x574f954, 0xab2365a, 0xac7d356, 0x3c89ea0, + 0x27460ba, 0xf1f2edd, 0xab9870f, 0xf200ddb }, + { 0xa35e885, 0xc8f1b2c, 0xe6e7550, 0x5d22f86, 0x9554615, 0x24b9a40, + 0x616314f, 0xcb41107, 0xc976a11, 0xca752f0, 0xa08291a, 0x3e2f839, + 0xf2c420e, 0x0cff22f, 0x82b9747, 0xafd603e } + }, + { + { 0x810a3da, 0xaddeddc, 0xd3a87bf, 0x78b6c2d, 0xde3a04c, 0xbc7020b, + 0x9b6d045, 0x47ab973, 0x0959358, 0x3b046d6, 0x509ee3e, 0x0f953e7, + 0x69fc61b, 0x803dc86, 0x893c8d4, 0xcceaec0 }, + { 0xb048a45, 0x21f8c40, 0xfcaea8a, 0xb535073, 0x90e360b, 0xe712c35, + 0x8403338, 0x5d0f3f4, 0x7207f2d, 0xe0ea26c, 0xffd9e05, 0x20f6b57, + 0x4788b00, 0xb97d68e, 0x1889cce, 0xb121554 } + }, +}, +{ + { + { 0x464238e, 0x0079817, 0x0d381ca, 0x2110302, 0xd9f01b5, 0x1cc4c6e, + 0x5a131b1, 0x5e35dc5, 0x06944eb, 0xb61848d, 0x29631a3, 0x83792a0, + 0xafca0dd, 0xbe1017f, 0x782fcbb, 0x70aaa01 }, + { 0x99945e7, 0xc63b7a0, 0xc4486c1, 0xe9164ec, 0x885f2c1, 0xb133e35, + 0xc99ae02, 0x186f0d3, 0x2bf53e6, 0x2fca492, 0x48a02bc, 0xf922aa2, + 0x0dd3dca, 0x4fe6490, 0xf6a8207, 0xe8c313f } + }, + { + { 0x97caf1e, 0xc5b3583, 0x922a4b6, 0xa001922, 0xdf07c95, 0x67e36be, + 0xb2f4f34, 0xabaa0ae, 0xdedc333, 0x66dc926, 0x38ec5b3, 0x82021c4, + 0x00ab176, 0x82b4f26, 0x69c45af, 0x1b7c22e }, + { 0x0924ad9, 0x07b0dbe, 0xa407dde, 0xe030936, 0x26ccd06, 0x66e1ce9, + 0xe3505a9, 0xb50c108, 0xda98f51, 0x8b921e1, 0x20cf7c7, 0x449ca1a, + 0xe67d079, 0xadb80c7, 0x834372d, 0x205aa54 } + }, + { + { 0x19bf847, 0x1482b48, 0x5906f0f, 0xd6c16ab, 0x23ad060, 0x323fb17, + 0xc832be7, 0x0346389, 0x2ee45bf, 0xe71b2d8, 0xfb22276, 0x761c37d, + 0x5d70be2, 0xa9b3334, 0x5a0627a, 0x81a0656 }, + { 0x99a6282, 0x3377503, 0xd0436f0, 0xafc8d2e, 0xc53342f, 0x22f71d3, + 0x8939ad3, 0x66ca56d, 0x30e09ba, 0x15a9192, 0xa6de890, 0x261091e, + 0xe78f2d5, 0x609d700, 0x8eaaf78, 0x8aa52ee } + }, + { + { 0xce76258, 0xa398788, 0x494b975, 0x3031d07, 0x043dfe2, 0x4a6d652, + 0xb4401ec, 0xdb1a849, 0xce8bbcc, 0xf81ebbb, 0x16efe9e, 0x937dd47, + 0xef85ecc, 0x9c19350, 0x214273b, 0x260d932 }, + { 0x77bf1a3, 0x1d7e21e, 0xa544eb7, 0x199d689, 0x94ced50, 0x9da5941, + 0x8a0aeaa, 0x71a60be, 0x26d3b51, 0x183a0ae, 0x8df9728, 0x49f176a, + 0x3230674, 0x744376e, 0xe25541c, 0xb2cb21a } + }, + { + { 0x9a0071f, 0x7a72158, 0xe7d2a6b, 0xe19dd29, 0x55113f0, 0x3deb34e, + 0xede573b, 0xef1f8eb, 0x5665e37, 0xa8f7ff9, 0xf2d7777, 0xa2c21ea, + 0x91e2e39, 0x1387afa, 0x7db68f6, 0x04057b9 }, + { 0x1c241f7, 0x8b9d5ae, 0x8e75993, 0x689588a, 0x5c0e2d4, 0x79585b4, + 0x7b64974, 0xba1ef16, 0x1c08a75, 0x72685bc, 0xd572edd, 0xf0a5814, + 0x5ab0e70, 0x71464a3, 0x339aea7, 0xc93c92b } + }, + { + { 0x5b8a87d, 0x1917e2a, 0x3a82756, 0xea5db76, 0x6420e2b, 0x5bba2fb, + 0x019372a, 0x5cc0501, 0xccc5efd, 0xb1ef8be, 0xf49c57d, 0xaf06393, + 0x87a0bc4, 0x3ab1adf, 0x34fe6b6, 0x2ee4cca }, + { 0x6b8ba9b, 0xd160668, 0x7efec13, 0xef137d9, 0x50abb76, 0x7b60465, + 0xf753a00, 0xb40ec2b, 0xeaf8f1d, 0x696ed22, 0xd8ba3d8, 0x398c91f, + 0x37db313, 0x11f2034, 0xfe5079e, 0xe1ec33b } + }, + { + { 0xbdc81f0, 0x8a10c00, 0x6fe8e05, 0x5f39256, 0x14a368e, 0xa595dab, + 0x38cec6b, 0x32b3181, 0x1b00d00, 0xd77afde, 0x4d9923d, 0x3c97928, + 0x76e13dd, 0x78f0e7a, 0xbf75675, 0x5ee8e59 }, + { 0x91b130c, 0x49ec893, 0xa47a441, 0x9416182, 0x76e2ce8, 0x54555b5, + 0x349c40b, 0xcbdd2fd, 0x9392bbe, 0x10ae737, 0x2e2dab0, 0x270b111, + 0xaf293f4, 0x5cb7712, 0xd6095c6, 0xfc22a33 } + }, + { + { 0x0f15878, 0xdcb5bbd, 0xb6bba48, 0xbcf27ad, 0x7b70eba, 0x979913e, + 0x158578a, 0x4c0f34b, 0x6ed6088, 0x53f59a7, 0x75b0fc2, 0x19b3b2c, + 0x0153f3c, 0xad628dc, 0xcec1607, 0x5195a2b }, + { 0xdfe0f7a, 0x95f8b84, 0x152920b, 0x935c6b0, 0x4da1056, 0x25f9e31, + 0xb28c229, 0x4910a94, 0x8ee4d6e, 0x54b03b4, 0x694e3ed, 0xc991fc3, + 0xdbe5709, 0x68c4c26, 0x63d7657, 0xc9cfce4 } + }, +}, +{ + { + { 0xf52a44e, 0x21c9227, 0xe85bfbd, 0x7f105a2, 0x6268fc2, 0x887781f, + 0xa2d7e35, 0x56ee808, 0x2d3930f, 0x14f9de5, 0xdcb561a, 0x4a4e356, + 0x7f95598, 0x8736226, 0x5f34151, 0x211c342 }, + { 0x0eaf9cb, 0x8fcb75b, 0x3d60ce2, 0xcc9edf9, 0xa5fe627, 0x54412c9, + 0x842dd09, 0x6036a72, 0xa6c6099, 0x71ce668, 0x5386764, 0x02b30d7, + 0x6f18e23, 0xb69bed3, 0xd1de9f4, 0x124c9b1 } + }, + { + { 0xe69b531, 0xe8f8d95, 0xaff1049, 0xe1e115e, 0xeddea0c, 0x9087cd1, + 0x7449916, 0x8ed55a5, 0x7808404, 0x8009f54, 0x17fea55, 0x990f216, + 0xfe8ecf9, 0x68ba624, 0x56d1f47, 0x8ac2950 }, + { 0x529dfb0, 0x3257887, 0x244c080, 0xc4a613f, 0x28672fa, 0xabb1ac0, + 0x31eb291, 0xb2915c5, 0x8fababa, 0x6e368ca, 0x1fde498, 0x6b8c259, + 0xf2a548c, 0x67724a1, 0xf90409b, 0x6b3b7e8 } + }, + { + { 0xfae20aa, 0x5415003, 0x85df5ce, 0x95858a9, 0x0ac6bee, 0x42bc987, + 0x39ea1a9, 0x8d843c5, 0xb571043, 0x5de200c, 0x1741a33, 0x084fcd5, + 0x0009d1c, 0xe1ca20c, 0xe957e6d, 0x0271d28 }, + { 0x9e3be55, 0x84cbf80, 0x1c578c6, 0xc804dda, 0x409a93a, 0xea85489, + 0x972021d, 0x64a450a, 0xe681312, 0xc6a2161, 0x65bc111, 0x280bff9, + 0x0f8526f, 0xd358a4b, 0x953a3ab, 0xd967be8 } + }, + { + { 0x7dd066c, 0x4c5e615, 0x634c8d4, 0x37afd33, 0x42d8b87, 0xa3ac88a, + 0x938b607, 0x9681e9b, 0x37fe4c8, 0x7a286ab, 0x2494245, 0xdeee574, + 0x6af75a8, 0x184b9d3, 0x3670c04, 0x20f696a }, + { 0xa39e8b9, 0x1340adf, 0x0850b2e, 0x03c1929, 0x2c0e1ef, 0x435ebd4, + 0x142ee9b, 0x49de18b, 0x3f116f2, 0xb440b27, 0x2214463, 0xd94e9fa, + 0x6311543, 0x1b0ddd3, 0x991ba3c, 0x1ae042a } + }, + { + { 0x5bb47aa, 0xbc322f8, 0x54a5845, 0x9e25625, 0x21115f3, 0x96b65ae, + 0xbb5757b, 0x46fbed4, 0x4c42dce, 0x18aec4f, 0x8d801f0, 0xc59caf6, + 0x1205521, 0x9189463, 0x89feb7a, 0x66bd8e0 }, + { 0xc529ee7, 0x39ebe95, 0x8eadb99, 0x28d8992, 0x6927544, 0x6058c78, + 0xd3808ec, 0x877e7a5, 0x1c52eaf, 0x8f65111, 0xae221cd, 0xfb59812, + 0xf890391, 0x22289c6, 0x4966e92, 0xa97695b } + }, + { + { 0x6ff10f0, 0xf0a9122, 0xa2a65c8, 0x49a931b, 0xb1d3cb0, 0x3fcebbc, + 0xca9685f, 0x70eb79b, 0xab38cb6, 0x82520b5, 0x76304c3, 0xccf991b, + 0xaf8b07c, 0x575aab1, 0x5ed5efb, 0xec8166a }, + { 0xc8689b1, 0xddc5698, 0xb2e78d7, 0x227c949, 0x8e07d91, 0x6132321, + 0x22cfd62, 0x658a11d, 0x004dd5f, 0x908fb44, 0x90d21b1, 0xe3d14f0, + 0xa6a1639, 0x6f3db9d, 0x333a525, 0x09d86c0 } + }, + { + { 0x6f043f7, 0xd83eaf0, 0xb52d5f6, 0x88ab648, 0x57144d7, 0x67c664d, + 0xeafc8b5, 0x55d7644, 0xcceb291, 0x1c89f20, 0x831ac47, 0x51aec7b, + 0x6148854, 0x51172fa, 0xf6d7bfe, 0x8fabf7e }, + { 0x477ee27, 0x5910316, 0x20fe61e, 0x5f299dd, 0x42826ab, 0x48079a8, + 0x22591fa, 0xf4a83ba, 0x55482ec, 0x8fac660, 0x6b65b3b, 0x48fd5f1, + 0x9fd9e19, 0x4288a7c, 0x9377894, 0x27db819 } + }, + { + { 0x7fd9dd6, 0x2936ee4, 0x9ec87c6, 0xcce5f0e, 0xdb6e3b4, 0x15a50e3, + 0xad701c8, 0x61df105, 0x1dff1f7, 0x3601add, 0xe8a16e1, 0xb761e06, + 0x1af3f91, 0x4341e02, 0x933fa3f, 0x9156a4a }, + { 0x54bc01d, 0x9dc46ae, 0x64eb910, 0x605577a, 0x5a59a99, 0x22b99f8, + 0x0a229d8, 0xab2dbaf, 0x6599364, 0xa8bfb65, 0xe94ebf0, 0x39ed4a5, + 0x0dbb23e, 0x7b46a1e, 0x8751422, 0x117b195 } + }, +}, +{ + { + { 0x423bddf, 0xd19e8fd, 0x387ef59, 0x9d77042, 0x849590a, 0x315cbdd, + 0x7866c1e, 0xfdc637c, 0x03515a6, 0x72be83d, 0x0376780, 0xd44a4a0, + 0x19e0c2b, 0x3b96131, 0x7b1a689, 0x023aca3 }, + { 0x82282ea, 0xf5f3687, 0x8a8b5c7, 0x4471089, 0x17a3066, 0xcd2f00a, + 0x81ed681, 0x754e112, 0x0bfcefd, 0x9c6c70c, 0x3b6f29b, 0xd6aced0, + 0x2817a2a, 0xe443d56, 0xe7c0012, 0xe590ef4 } + }, + { + { 0x3e62e2a, 0xc2f9676, 0xb2daa26, 0x661816e, 0xdd5f512, 0x3515fd2, + 0x56b6e75, 0xdc36e27, 0x74cc658, 0x0bdde46, 0x00e7644, 0x1029086, + 0x1694a09, 0xfdf0045, 0xceac169, 0x454bcb6 }, + { 0x6481eb6, 0xf4c92ab, 0x09750e7, 0x8b77afa, 0x6362d6d, 0xe6f4231, + 0xf53a3ae, 0x0d45dee, 0xd7dcf98, 0xdac7aac, 0x125ec4a, 0x628cb7f, + 0xaec0320, 0x41e8a20, 0xea2e35b, 0x7418c7e } + }, + { + { 0xdf40519, 0x4d649ab, 0x3525833, 0x8cb22d4, 0x7a5333f, 0x15f6d13, + 0x72c23ee, 0x8c3991b, 0x0cd44a3, 0x248b9a5, 0xccc1a75, 0x6b4c4e0, + 0x15c99a9, 0x3221efb, 0x0a9c504, 0x236d504 }, + { 0xd559100, 0x401c7fb, 0x07c524d, 0xcf0e075, 0x34a9275, 0x39647c0, + 0xf7e8683, 0x2355422, 0xb3ae670, 0x3e0a16e, 0xad61b7f, 0x1c83bcb, + 0x9ca6cbe, 0x491bcb1, 0x5e29458, 0xe668dc4 } + }, + { + { 0x219379e, 0xe44c65b, 0xbb607ee, 0x211381b, 0xb7bc6db, 0xd4c7428, + 0xb76a2e8, 0xba62a03, 0x8bb0b31, 0xe1729c9, 0xc6bbc10, 0x3caeb50, + 0xb0187aa, 0x6c66727, 0xfb90dcf, 0xbf9d2f0 }, + { 0x1184dc6, 0xec69350, 0x2698eb5, 0xd58d2a3, 0xa316b07, 0xb366d8d, + 0x251c017, 0xe1e39bb, 0xadb157f, 0xbe44ba9, 0x8a8b06c, 0xbaa9a9a, + 0x6e473e1, 0xd0f4635, 0x1d681c6, 0xd25a8f6 } + }, + { + { 0xcb102c7, 0xba39d5f, 0xd8aa1eb, 0x66eba21, 0x697fbf4, 0xcc2591a, + 0x2317f54, 0x5adb579, 0xf76c6f9, 0xa01ae71, 0x5042705, 0x2c525de, + 0x4f4479f, 0xc8f4272, 0xe6d7a5b, 0x26ab54a }, + { 0xdc28106, 0xda217b5, 0xeb2ae6a, 0xc7cadea, 0x53ea3b2, 0x0b16094, + 0xcc6111b, 0xcddcc1c, 0xa7a7beb, 0x5c47aff, 0x0e52dab, 0xf9931bd, + 0xc6dcf96, 0x5231835, 0xf27ea4e, 0x7095bde } + }, + { + { 0xc33b4e2, 0xee8adae, 0x63ceb44, 0x3006651, 0x880b086, 0xf1476fb, + 0x9569ce8, 0x0703328, 0x238b595, 0x2cabf9a, 0x26c8158, 0x85017bc, + 0x68d5144, 0x420b5b5, 0xf9c696f, 0xa9f5f1e }, + { 0xc8fec5a, 0x1409c3a, 0x28e9579, 0x541516f, 0x0e1f446, 0x06573f7, + 0x2311b96, 0x3e3c706, 0x3c2ffd8, 0x0033f1a, 0xca6711c, 0x8e808fc, + 0x07aef98, 0x716752d, 0x92525b3, 0x5e53e9a } + }, + { + { 0x5a1c29f, 0xce98a42, 0x3ca6dc9, 0xaa70348, 0xedfa48b, 0xe77d822, + 0x068abca, 0xd2e3455, 0x482cfca, 0xb456e81, 0x7fbfb08, 0xc5aa981, + 0x8243194, 0x8979f25, 0x2cd043d, 0x727f217 }, + { 0xaa53923, 0x7cca616, 0xe9bcb72, 0x387c5ae, 0x37580bb, 0x0173fd4, + 0x75fc0d9, 0xdd7795b, 0x345deae, 0x47d1c37, 0xb0d1c03, 0x2eb5d7f, + 0x958f002, 0xf7a1b92, 0x8f61b67, 0x7365cf4 } + }, + { + { 0x562a5ed, 0x4b22c3b, 0x5c7cd07, 0x711216f, 0x9ba0648, 0x51f72c4, + 0x0de9e6f, 0xc10d093, 0xfda63ba, 0xaca479b, 0xaf532b0, 0x4722a55, + 0x7236f39, 0x8d59eb7, 0x4465c34, 0x5cad874 }, + { 0x722b0c1, 0xa2119e5, 0xf343ea4, 0xb670264, 0xc19f387, 0x6910f02, + 0x0381fba, 0xcfec5bc, 0x52c0a1d, 0x5f5de0d, 0x6378cb6, 0x4e474d5, + 0x27e2ba3, 0x2fc8027, 0x159b541, 0xa215da3 } + }, +}, +{ + { + { 0x8499895, 0xed53585, 0x65c998d, 0xa0aefd5, 0x2d5a561, 0x210d850, + 0xa2cd9d6, 0xc2cc23c, 0xc4d297e, 0x2371d46, 0xd18d441, 0x88b2143, + 0x043993d, 0xbebdad9, 0xad5f28d, 0x6ba91e7 }, + { 0x3a731f4, 0xc2bb3f1, 0x5d0d5c3, 0xd35cfac, 0x35ac427, 0x9950998, + 0x5458adb, 0x8938bb5, 0xab26f3b, 0x0bd738c, 0xa28cd8d, 0x56db3d5, + 0xa1d8b4b, 0x87eb95f, 0xe7f3b4b, 0xd6700ef } + }, + { + { 0xea1e57b, 0x962c920, 0x6dded6d, 0xd3be37e, 0x2c96a73, 0xf499b62, + 0x6c99752, 0x3eaf7b4, 0x025590b, 0xa310c89, 0x721db23, 0x535aa4a, + 0x19714a0, 0x56ab578, 0xd4048c1, 0xeecb4fa }, + { 0x470c466, 0x7b79ec4, 0x1383cee, 0xc4e8f2e, 0x5750c45, 0x0f5d776, + 0x725527d, 0xa3b3bc3, 0x6d00cce, 0x2f5deb6, 0x95a8d81, 0x5d5a0f4, + 0xe02b824, 0x50a442e, 0x2a11628, 0xafb0446 } + }, + { + { 0x0c613de, 0x72b67bc, 0xe6f0b24, 0x0150d4b, 0x8ed289d, 0x847854e, + 0xa320f88, 0xe08292f, 0x29c6160, 0xd5b6da3, 0x4fb9d06, 0x2a48e2d, + 0x2de087c, 0x55d9e41, 0x4f02100, 0x65683b5 }, + { 0xa8886c6, 0x4dc8c2e, 0x20d6114, 0xe966dd2, 0xa57af97, 0x99745eb, + 0xb854725, 0x23a9a71, 0x621a047, 0x8effe05, 0x049a4be, 0xf16d284, + 0x5b0660f, 0x95828c2, 0x56e96b0, 0xd5b69ba } + }, + { + { 0x4ffa0b8, 0x0b5b424, 0x096cc5e, 0x0585b45, 0xf505d37, 0x413e1ae, + 0x0c7ab8d, 0xe5652a3, 0x2990120, 0xab32fb7, 0x3f09368, 0x6b8b16e, + 0xefe128e, 0xbf9fadb, 0x14b7671, 0x85f366b }, + { 0x090608d, 0xcb2f294, 0xac3045f, 0x25e2769, 0x6131904, 0x069c4f0, + 0x329a779, 0x1c57cf1, 0xb7cace7, 0x72fe0d5, 0x0897a45, 0x04d9f43, + 0x359a645, 0xbaf32f6, 0xfa7485a, 0x0fa854f } + }, + { + { 0x5f56f60, 0xae3533c, 0x0ad9360, 0x9773bbb, 0x38fbe6b, 0x769b34a, + 0xffb0c00, 0xb5ba8e9, 0x75472e4, 0xa939318, 0xce5f30f, 0x12cac92, + 0xa9e7dbc, 0x514fc06, 0x58b4734, 0xd7ca865 }, + { 0x65a730b, 0xd101ff3, 0xabe70e9, 0x92da451, 0xef7bf4b, 0xfb5f94a, + 0x1d56c7b, 0x8c3ef4c, 0x8435c10, 0xb085766, 0xe7ed4cc, 0x7fbbbda, + 0x24f372f, 0x1da6eaf, 0x59b8ae3, 0x0ab2c1f } + }, + { + { 0xf10a4b9, 0x63a1a78, 0x0c7e510, 0xbb5278d, 0xf874142, 0x97b224e, + 0xb2517b1, 0x0a9ff52, 0xc5cd920, 0x1b5a485, 0xa1823b9, 0x1a8e2eb, + 0x0e914a8, 0x2b088c0, 0xcf13432, 0xe5ec3ad }, + { 0x6e7e253, 0x0d6ab3e, 0x6f18458, 0x9f0f5cd, 0xf459a6d, 0x839a744, + 0x1eb15f7, 0xb4b4f94, 0xc72cb14, 0xe0313ac, 0xb20472d, 0x58ee933, + 0x872543e, 0x5f73d7a, 0x501f067, 0xb1700c5 } + }, + { + { 0x085f67f, 0xb70428e, 0x43cabe5, 0x5441d51, 0xe0a6055, 0x4d0e8c2, + 0x0882e4f, 0x8d39a08, 0xc1cb39d, 0x615bb32, 0xf7a1642, 0x113f18d, + 0x250681f, 0xbab8cf5, 0x677b72a, 0x3017ba2 }, + { 0x5a3a876, 0xcd2b6e9, 0x2035a69, 0x0476501, 0xefa2ea0, 0x31d6440, + 0x56874d5, 0xde8f8d1, 0x0199d4a, 0xcbc71cd, 0xe7f2170, 0xc546b61, + 0x112c4c3, 0x4e57e4e, 0xd1622ba, 0x58955a8 } + }, + { + { 0x04e2f6f, 0x0064cd7, 0xe0edd38, 0xe9d458d, 0x7e0a5c8, 0xeb1a597, + 0x01fc0a8, 0xe322ece, 0x1032a19, 0x8b9d166, 0xa89de94, 0x3e7b539, + 0x001c754, 0xfa30262, 0xdb588f6, 0xe33de4d }, + { 0x954eb94, 0x4dafbdb, 0x0584c1b, 0xbb43648, 0x5dbe29b, 0x622c93e, + 0xf57b931, 0x968f9e3, 0x0f6453b, 0x98f03be, 0x08f696c, 0xb0ecc7f, + 0xa505335, 0x5af55f4, 0xfb3fa9b, 0x028533e } + }, +}, +{ + { + { 0x27e8d86, 0x3bc8e68, 0x63f105a, 0x4e43b30, 0x4981250, 0x5301b7d, + 0x9f72fa8, 0x8b0a75e, 0x357348c, 0x88f59db, 0xec4208e, 0x5f0ebb1, + 0xc043d3b, 0x4712561, 0xc806b97, 0x9e5ded0 }, + { 0x2121d09, 0xf9bd0a6, 0xe337cd1, 0x1759ecb, 0xe945542, 0xd1acc0e, + 0xbd2f63a, 0x3683feb, 0xda5dfe9, 0x44f1bcc, 0x707f22f, 0xa3606c9, + 0x2d96ca5, 0x45ef064, 0x9022df9, 0xfc3107d } + }, + { + { 0x44be755, 0xe81320b, 0x5c7c761, 0xdf213d5, 0xb4e5db9, 0xf43d2d5, + 0x8dedcd2, 0x3bcfd82, 0xd37a9ec, 0xdf368a6, 0xf475a77, 0xfef20ae, + 0x162c064, 0x22f5894, 0x0142a7d, 0x956bc66 }, + { 0x7daec78, 0xaaa10e2, 0xb6e9a78, 0x3cb9b72, 0xe383f72, 0xa740bad, + 0x7759007, 0xc31b401, 0xa7afc50, 0xdada964, 0xfd3d11f, 0x6bf062c, + 0x5db3679, 0x9470d53, 0x03abf13, 0x3394473 } + }, + { + { 0x46e5d7f, 0x533f440, 0x49048c8, 0xd1793e3, 0x1929b94, 0x59e1150, + 0x8364134, 0xcddbbcb, 0x582774f, 0x795c794, 0xe03081a, 0x114dfc4, + 0xef54042, 0x541ef68, 0x23f18cd, 0x159295b }, + { 0x48a2c8c, 0xfb7e2ba, 0xbb6d116, 0xe2d4572, 0xd750b53, 0x7bb0b22, + 0xd142ee8, 0xc58888c, 0x90c9e2d, 0xd11537a, 0xd02eb9e, 0x77d5858, + 0xd444a79, 0x1fa4c75, 0xd58a68d, 0xf19b2d3 } + }, + { + { 0xeb8b90f, 0x37e5b73, 0x3f2a963, 0x3737f7a, 0x9de35e0, 0x87913fa, + 0x8731edd, 0xec7f992, 0x219491e, 0x6e6259e, 0x4de236c, 0xb2148a0, + 0xfdd309b, 0x89700e8, 0x9f0bf80, 0x9ce51e4 }, + { 0x301f17b, 0xe7ec421, 0x3bc5f4f, 0xa4b570a, 0x1285ee2, 0xc2b1b2a, + 0xc53db73, 0x5e86bc8, 0xf24fa90, 0xb65fcea, 0x08ab024, 0x9e74c56, + 0xf9ed877, 0x5c8003d, 0x4a2cbbc, 0xa632e9e } + }, + { + { 0xc91c8b5, 0x32a4546, 0xc969363, 0xc122b5a, 0x3648b3a, 0xbbbec5e, + 0x25143b0, 0xd5a365e, 0x54157ce, 0xcf3e464, 0xf9bab64, 0x9712f04, + 0x04b4008, 0xc12d43a, 0x2edf1c7, 0x51932d7 }, + { 0xb2f8470, 0xaef1655, 0x6c24ace, 0xaa8e3f3, 0x6b4e761, 0x7da75da, + 0xb90bca2, 0xd371827, 0x0afb45c, 0x84db450, 0xef46b5d, 0xae12045, + 0xd962f98, 0x91639a5, 0x72f2ac0, 0x669cbe6 } + }, + { + { 0x83a4356, 0x851bb31, 0x9a1bf15, 0x7d436bf, 0x120b378, 0x46a3f0e, + 0x3f5b357, 0x9302abc, 0x93fef53, 0x1e06726, 0x5fd2ee9, 0xb12f4a9, + 0x7de9433, 0x94a884c, 0xa6f2874, 0x2645234 }, + { 0xcdb8dfa, 0x6fb56f5, 0x9e0ee4e, 0x4a17dfc, 0x83ab01e, 0xe269d83, + 0xb77c10f, 0xda932da, 0x0321243, 0x463af0c, 0x16fc8a3, 0xbe1d682, + 0x48b39e3, 0x2eae3ea, 0x3b03e7b, 0x9423021 } + }, + { + { 0xb22f28a, 0xaeb507c, 0x49a6b44, 0xa77458b, 0xc03dc17, 0x232ed5a, + 0x9c61ac6, 0x79dfc16, 0xcd71b93, 0x7c48be9, 0xc429cd9, 0x983d68a, + 0x98ae2c8, 0x7709c47, 0xa5df075, 0xe4765c0 }, + { 0x3367f33, 0x23c4deb, 0x37d72a7, 0xbdf2b7e, 0x0af2d26, 0xbaab5c7, + 0xfd026ab, 0xd609f7f, 0x541b039, 0x23b72b2, 0x83be852, 0x8d06bac, + 0xcb23d1c, 0x911d4a9, 0xfb0dbd7, 0xeae815c } + }, + { + { 0x2c33481, 0x487c35c, 0xb6136db, 0xffab636, 0xa3d3aa4, 0xccd4dae, + 0xc3704e0, 0x87149bb, 0xc0e8396, 0x9de8119, 0x58e7ca6, 0xd49357a, + 0x1562d75, 0x6878918, 0x5ab1fad, 0xc745381 }, + { 0x02c9b91, 0x0f15798, 0xb1ddde5, 0x7ffc3f0, 0x6aae50d, 0xa01d5e0, + 0xe279873, 0x6a97e65, 0xb5b1b41, 0x4bcf42f, 0x32f5982, 0x1c6410f, + 0x50701c8, 0xd4f7600, 0x873b90d, 0xff02663 } + }, +}, +{ + { + { 0xe5b2de2, 0xdc53ea2, 0x38acecb, 0x94b352d, 0x0d9d5e5, 0x37d960b, + 0x90bd997, 0xabd868f, 0x35a7376, 0x781668f, 0x10118bf, 0x043d597, + 0xf57928a, 0xd4da719, 0x983e46c, 0x01942f6 }, + { 0x728bd76, 0xab97fc8, 0x4b5c1c5, 0x825956b, 0xc82a104, 0x202809f, + 0xc8e3132, 0xdb63e9c, 0xc2181af, 0xa41c701, 0x43e066a, 0xd280180, + 0x24044ce, 0xc734e41, 0x505193c, 0x4d9ab23 } + }, + { + { 0xf9f0c3f, 0x0bcd42a, 0xb94a218, 0xda21a46, 0x0ffc788, 0xe55243c, + 0x47a5551, 0x318aae6, 0x79af9cb, 0x8c2938b, 0xec1dce5, 0x5d15232, + 0x8ad2e5c, 0x3d310ba, 0x94f792a, 0xd3d9724 }, + { 0x12a9553, 0xdeb4ca1, 0xeb54d9d, 0x2f1ed04, 0x69fb7a1, 0xaa9c9cf, + 0x54dcd3a, 0xeb73c3a, 0xf5f201f, 0xee3eddc, 0xba7d234, 0x35f9e1c, + 0xd2e242f, 0x1d1d04c, 0x0df7515, 0x48df9d8 } + }, + { + { 0xa81dd9a, 0x4ecc77d, 0x03aa015, 0xa6ac4bb, 0xbbc4fed, 0x7645842, + 0x9d6cf52, 0x9ae34cd, 0x5917e0b, 0xf8ff033, 0xc2cc175, 0x7c9da37, + 0xaaacfbe, 0x1e74dcc, 0x7999af8, 0xa8f2df0 }, + { 0x102a466, 0xd06c4ea, 0xae190dd, 0x2156e87, 0xec4a863, 0xc95db8a, + 0x244a6fe, 0x49edffd, 0x904f81e, 0x110fae6, 0xa1cd104, 0xbaa3e50, + 0x0478b65, 0x5bd38a2, 0xdaefbcc, 0x2b57d05 } + }, + { + { 0x86f4534, 0x1ce92ba, 0x414f5e3, 0xb2a8592, 0x9979436, 0xdd7a4c6, + 0x3f0add7, 0x7599aff, 0xe2d4f64, 0xe0ce4d3, 0x401a29f, 0x74475cc, + 0xa2377d9, 0xaef6541, 0x3f917b6, 0x54048f5 }, + { 0x05312ec, 0x1b86b22, 0x31493cb, 0x779ba22, 0xaac9320, 0xc718369, + 0x617fce4, 0xeab01a8, 0xf7187fa, 0x17b1f10, 0xa1aca46, 0xe68eda0, + 0x2586342, 0x61033fe, 0x0b6ca43, 0xfc14e79 } + }, + { + { 0x13d2491, 0x9f22319, 0x7997202, 0x66bdb53, 0x4617f34, 0x0bafb0c, + 0xf3bb7b3, 0x5917831, 0xb45bddb, 0x6feb2a6, 0x0202c19, 0x08662b3, + 0x05852f6, 0x0bc2b57, 0x91818c2, 0x2c00fd4 }, + { 0xda37dac, 0xca7672c, 0x5a30865, 0xfe4c04c, 0x322e92a, 0x5f1399f, + 0x25b1beb, 0xe7d67ea, 0xdce7f68, 0xe08b014, 0xf2f2b3c, 0x24df52a, + 0x750ecd1, 0x2028b23, 0xc810a45, 0x9b25d4b } + }, + { + { 0x7a9d799, 0xa35b715, 0x01f9c99, 0x6da1eb3, 0xe363ba8, 0x33ef91c, + 0xce140da, 0x21c0e2e, 0x158cd84, 0xb0b11bf, 0x93da438, 0x6a87442, + 0x3db585b, 0x924f10d, 0x10c6159, 0xf5ddd73 }, + { 0x6a74c21, 0xb72dcb8, 0xcc8f79f, 0x6d14198, 0x9c5a8d6, 0x99f4b6c, + 0x90e135c, 0x0639688, 0x83f6385, 0x330edb8, 0x9079675, 0xe1a5a6b, + 0xb8f5fe0, 0x6e37fa8, 0x61dca1e, 0x60e2fd9 } + }, + { + { 0x66c395e, 0xc6cb403, 0xb51d0f1, 0x03b21a7, 0xe693181, 0xbc478a5, + 0xc6cff33, 0x0017c2f, 0x39d8d1e, 0x740a5b8, 0x4d9ec6d, 0x3968d66, + 0xb0ef1b0, 0xfd53738, 0x1ed0a04, 0x73ca8fd }, + { 0x75ab371, 0x4ace938, 0xddad7e9, 0xd602936, 0x750bcc2, 0x1f5424a, + 0x68c7a17, 0xfe09b36, 0x58341ec, 0x165f7de, 0x6ce61e5, 0x95b825a, + 0x66c83c4, 0x9d31e19, 0xcc5887b, 0x65b3e08 } + }, + { + { 0x21482d1, 0xd37e932, 0x08b6380, 0x9af6597, 0x7d61e4b, 0x279426a, + 0x80997ad, 0x80dd0ec, 0xd5b76d4, 0x7239b0d, 0xe76c098, 0x92e6c73, + 0xeab3e1d, 0xeeb2321, 0xeb1a910, 0xa69c4a7 }, + { 0x833d9ae, 0x46d6aa7, 0x572b0fe, 0x3ee6957, 0xcdb3d97, 0x44ccbed, + 0xcbea01b, 0x342f29d, 0x8926876, 0x0d518c5, 0x5585d2c, 0xaaabae7, + 0xe008f58, 0xc548c77, 0x21fab2c, 0x819e2fa } + }, +}, +{ + { + { 0xc16e981, 0x468e149, 0x9ddbb7c, 0x286c790, 0xdb7a38a, 0x2a92d47, + 0x8a27cb2, 0xde614e6, 0xe5b0ab6, 0x8dc8822, 0xcf48565, 0x38441ae, + 0x089435b, 0x11ed5c9, 0x82d0d31, 0x2389286 }, + { 0x72f2f31, 0xc6698d4, 0x56d76af, 0x295242c, 0xeba563b, 0x4099205, + 0x3ab7384, 0xae7de5a, 0xd0ed86c, 0xccdf127, 0x965c3c3, 0xb9b6d5b, + 0x2c31ad7, 0xe351a8f, 0xac12f13, 0xa761dd8 } + }, + { + { 0xf171ab7, 0xda115dd, 0x401f93d, 0x2de17b1, 0x40964b4, 0x95019ca, + 0x65ba3c3, 0x169d1f4, 0x0090d08, 0x534a007, 0x82bf410, 0x805c5e2, + 0x65f8d90, 0x15dfe11, 0xca72456, 0x827a416 }, + { 0x33a36c4, 0x5af8884, 0xd8ee604, 0x8bfa54c, 0x9ce290f, 0x08fd141, + 0x287b3a6, 0x2db5e8c, 0x03cdad2, 0xe5be981, 0xbf810b9, 0x155b874, + 0x670f473, 0x2ae42de, 0x7f74657, 0x2218584 } + }, + { + { 0x23ffa43, 0x54b2a50, 0xa24d919, 0xcf87b16, 0x63524e8, 0x1ff5402, + 0x56d1e54, 0x73c94e0, 0x3899fb5, 0x7651552, 0x18723bf, 0x13a7214, + 0x3561517, 0x39afbdd, 0x9f2862e, 0x49b790a }, + { 0x527d2ce, 0xc8c1f4f, 0x7609bb7, 0x1997aec, 0x02a3400, 0x583ad80, + 0x4f79706, 0xac2374e, 0x21b7183, 0xbf1f9a8, 0x6600fe0, 0x06158ab, + 0xbd56751, 0xfcc9b2e, 0xddaaec7, 0xe1de5ac } + }, + { + { 0x788fdab, 0x230baa1, 0x7d04597, 0xf30860a, 0x99f4caa, 0xa2c7ece, + 0x6ad065e, 0xbd39f10, 0x3bef7bd, 0xfd92f5d, 0x96d2203, 0x6069fad, + 0xc4d9e0d, 0xbff38ca, 0x1fda313, 0x419a017 }, + { 0x572f035, 0x5d77fd8, 0xb282b40, 0x5af99f2, 0x23facff, 0x7257d3b, + 0x58c90af, 0xf2ee223, 0x9b6a52a, 0xcc2687d, 0x302430e, 0x140892c, + 0x3ec4f38, 0xa934d5e, 0x3bd18be, 0xc087d7c } + }, + { + { 0xa2c5ed7, 0x7e94138, 0x53610bf, 0xbc8ceef, 0xd86f803, 0xe89356b, + 0x5a55330, 0x9a3a380, 0x11ad648, 0xe894aba, 0xba95918, 0x2e68fba, + 0xfcad344, 0x643e2ba, 0x61640aa, 0x0dd0256 }, + { 0xe25cbdd, 0xc02e479, 0x13a1b3f, 0xd78c4d8, 0xcca9692, 0xa6dae8f, + 0xe5de8a0, 0x3dd91e9, 0x764ea36, 0x78ae0ce, 0x85dbc5e, 0xb4ad999, + 0xe82a169, 0x967ff23, 0xbaee1fc, 0xaeb26ec } + }, + { + { 0x9a6f90c, 0x8c50255, 0x0ea374a, 0x56e7abe, 0x56413b2, 0x675c722, + 0x946753f, 0xd3fc17e, 0xe235f7c, 0x28c4e1f, 0xb028eb0, 0xe209bcd, + 0x489fe88, 0x7d0f93a, 0x063706a, 0xb966a2e }, + { 0x4a30319, 0xb6c228c, 0xca6d674, 0x6868efe, 0x057311a, 0x0610a70, + 0xbad7f89, 0x0808112, 0x1dd6181, 0x2a2462c, 0xb58e88a, 0x52ed9fe, + 0x33821a2, 0xbbff16f, 0x17f882a, 0xda53e96 } + }, + { + { 0x8c30e5d, 0xb6ffca3, 0x5c905f5, 0xa90f991, 0xd753e88, 0x72fb200, + 0x7256c6a, 0xe509d4c, 0xd866500, 0x369e552, 0x33cf8ae, 0xee4b7e0, + 0xefcf6eb, 0x280d954, 0xd557f0e, 0x5b275d3 }, + { 0xb5cecf8, 0xeb17211, 0xbdb2f8d, 0xd6ad50f, 0x35e04b7, 0x2478c7b, + 0xac73bd3, 0x97e7143, 0x4817e24, 0x09d6ede, 0x2c405e1, 0x68fea71, + 0x05f67a1, 0x34adbc9, 0x73edf99, 0xd20ab70 } + }, + { + { 0x569f191, 0xe116a96, 0x4d6e29a, 0xb3f0bce, 0xf51dbab, 0x30b9e1a, + 0x346d276, 0x1dd36f3, 0x0749a27, 0x8315103, 0xab47f70, 0x242f148, + 0x5585681, 0xe8a5bcf, 0x5ed79ba, 0x8b80184 }, + { 0x3894ad1, 0xa4042fd, 0x2b88bc6, 0x82f781d, 0xbe4c397, 0x2d34cac, + 0xdd99c9f, 0x8731aea, 0xef1d382, 0x0f95498, 0xdd0bbc9, 0xcaba2e1, + 0x54064e8, 0x78889e9, 0x61a8ab9, 0x8cd9c97 } + }, +}, +{ + { + { 0xfa0459e, 0xf31f53f, 0x315cd6b, 0xf8742a1, 0xae64e97, 0xabe2f50, + 0x9b9da48, 0xbd78741, 0x51e526e, 0x4521a33, 0xe10ba45, 0xfa05935, + 0xe8f903c, 0x5c947e1, 0x5a754ee, 0x0aa47d1 }, + { 0xd814825, 0xb2849ef, 0x5c9968d, 0x9c2a5d2, 0x04e634c, 0x24dbb26, + 0xdb38194, 0x33f3a4c, 0xc8a2b6b, 0xe04f609, 0xabbbfdb, 0xcaefd8e, + 0x404498b, 0x683119a, 0x8b21cbd, 0x24ab7a9 } + }, + { + { 0x21fa2dd, 0x6f13269, 0xc10a4bc, 0xd79e61c, 0x4bd6d46, 0xac4b3ce, + 0xbd3f37b, 0x52459b6, 0xa396966, 0xce0f0a3, 0xa1ed488, 0x050d1d5, + 0xe0b17fa, 0x1b9c403, 0x04a2e66, 0xee1abd0 }, + { 0x5cf3e3b, 0x97065c3, 0xbe33441, 0x6513d5f, 0x79047ae, 0xcd34634, + 0xfd22df1, 0x45cbb1c, 0x967b17c, 0x7a173ae, 0x2223cda, 0x75f5ba7, + 0xefe0a73, 0xe3d12db, 0xfd7adcf, 0x3b7f94d } + }, + { + { 0xf1e9b7d, 0xd596a13, 0x6734e0c, 0x04f5bdd, 0x8be163a, 0x18b694f, + 0xd959fa3, 0x15620c7, 0x53d2a3b, 0x65fc2c5, 0xc4d36f2, 0xd44a364, + 0x268ceab, 0xc8b421f, 0xbfe2bd4, 0x564139a }, + { 0x19d4633, 0xb524610, 0x6346934, 0x5ab3f88, 0x9819422, 0x96691fe, + 0x8b39b82, 0xdfdec89, 0x97cfb27, 0x84b1c79, 0x4d6d004, 0xe59a98d, + 0x12c350f, 0x5e5d0c6, 0xd415774, 0xb431220 } + }, + { + { 0x6aae0a2, 0x3d0ca73, 0x48c2d8c, 0x7b1991f, 0x5cdae72, 0x00ae856, + 0xbd55128, 0xdbb6ca0, 0x45c82bf, 0x3c2ab2a, 0x79545ca, 0xea5a559, + 0xd5927d0, 0xeba9a26, 0x83257fc, 0xb52e401 }, + { 0xca9650a, 0x55ed517, 0xe3ebff2, 0xbdaa081, 0x9f8831b, 0x8cf7ce4, + 0x6e3b8d3, 0x1d0b5bd, 0xd8fc869, 0xa314a9f, 0xb892bab, 0x07f2079, + 0xa0cc9d9, 0xb700dbf, 0x6dc0a39, 0x7105a08 } + }, + { + { 0x8c7d901, 0x0c7e05d, 0xaf3182b, 0xa7ff681, 0xf9a0d06, 0xb88e3ca, + 0xc343b7f, 0xfe20a12, 0x03251f9, 0x9f02577, 0xc40c5eb, 0xf225ded, + 0xb208ea7, 0x50e0cec, 0xe6eeb65, 0x5b250f0 }, + { 0x4806b6e, 0x807a153, 0xfa94139, 0xded120a, 0x49366fb, 0x237ddc7, + 0x5a34bcb, 0xdd3674e, 0x9c4a61d, 0xef6cdff, 0xb2fb896, 0x036194b, + 0x9528cd9, 0x3865953, 0x6936a52, 0x0723c59 } + }, + { + { 0xe17719d, 0x1f84cd5, 0xc73b394, 0x545939b, 0x83e84e7, 0xefbf3c5, + 0xf77fd66, 0x6cc46f1, 0x1383ab8, 0xa629f59, 0xcd35cd2, 0x9177ffa, + 0x9dd411b, 0x039187f, 0x7b7eea8, 0xa9cf1cf }, + { 0xac47e5d, 0xa3b105a, 0xd0a9da4, 0xa755bea, 0x73da15e, 0x50cfbae, + 0x60b628c, 0x9456cbc, 0x9b7a910, 0x7ffc362, 0xcd6d6a4, 0x30b5924, + 0x0b04ab6, 0x198629f, 0x624dea9, 0xc74609c } + }, + { + { 0xaf12fa6, 0x27d4d77, 0x690aeb2, 0xdd8a216, 0xfe24417, 0xe48fc02, + 0x720e17e, 0x1970403, 0xce37b42, 0x95013fd, 0xde4bd9b, 0x06817d2, + 0x63d0ba2, 0xc5863e7, 0xa556f5d, 0xa1bafc0 }, + { 0x410a78a, 0xf28ec7b, 0x0a01a63, 0x0dcac42, 0xb5bce11, 0xfcd3fa4, + 0xd278b89, 0x054d7e5, 0x5ce49e3, 0x5195db8, 0x2c73d96, 0x4c0b167, + 0x20a1bdb, 0xd943077, 0x59c77a7, 0x66fa8b3 } + }, + { + { 0xd7462fe, 0xb9e93ae, 0x18dde4f, 0xbfe54b2, 0x3dbb08e, 0xaabb528, + 0x0e5fc45, 0x8c36702, 0x8e69be3, 0x3502888, 0xc12a11d, 0x6d2efc1, + 0xf265e30, 0xfce5ceb, 0x5742c7e, 0x58c8bb3 }, + { 0xccf7fa0, 0x32e89dc, 0xdd020a4, 0xa811f33, 0x5129fe5, 0xa10d620, + 0xe4ed29b, 0x3841c88, 0xd8b1ea6, 0xf3303a9, 0x1781f58, 0xa9a0cad, + 0x8f3ef0b, 0x4502b38, 0x74c6d35, 0x2b7587e } + }, +}, +{ + { + { 0x23ae7cd, 0xc6eaea1, 0x73c0caa, 0xa1884d4, 0xef1ea88, 0x901e76f, + 0xa14269d, 0xdb9935c, 0x947f1de, 0xe8b2486, 0xa657588, 0x4ad56f4, + 0x2913fb1, 0xe768054, 0x37600da, 0x2abff5d }, + { 0xa81a797, 0xa814813, 0x46acb69, 0x63e76a4, 0x4ab8277, 0xb103839, + 0x9d8e759, 0x587de34, 0xddf62df, 0xdfaeb8d, 0x9239d49, 0x24fe1cf, + 0xe130d1c, 0x7de7409, 0x581d070, 0x3ecfef9 } + }, + { + { 0xf87c72d, 0x8d177a0, 0x8c6d1de, 0xae7e581, 0x8cece85, 0x0077b5f, + 0x32d2187, 0x3824838, 0x6db2bd2, 0x49d8b15, 0xc8d85b9, 0xe9e5513, + 0xe05c53f, 0x63c410c, 0xd86f752, 0xceaf2fb }, + { 0x93806c5, 0x0b432fe, 0x3d06c75, 0x18eb15d, 0x12cfc02, 0xcaad826, + 0x1e2d045, 0x581e040, 0x95edcfd, 0xd573cb5, 0xdbc66e3, 0xce71948, + 0xacc14ea, 0xcf68721, 0x6cac4dc, 0xf68bea2 } + }, + { + { 0xcb74da2, 0xd8576af, 0xc433f46, 0x8771c29, 0xe2f5b8e, 0x7315af6, + 0xba33928, 0xc195481, 0x2fb1f94, 0xb77dcc2, 0xa610f75, 0xcb3e57c, + 0x53907df, 0xeb2a927, 0x23eff95, 0x916f149 }, + { 0xb6cd291, 0xbb378e4, 0x2f13ce1, 0xa2a5e2b, 0xbcd00b0, 0xa8a0e60, + 0x682b75a, 0x5902741, 0x3f65a77, 0xa0882c9, 0xc93cfff, 0x2069f75, + 0x70c0cb9, 0x1ede405, 0x0d526c4, 0x13840c9 } + }, + { + { 0x03ced48, 0xdc2caaa, 0xa0315be, 0x2079219, 0x3b1f642, 0xca49356, + 0xb0665f2, 0x0202dc7, 0xb7a5238, 0xe5d6bbd, 0x26eab32, 0x36fbd5e, + 0xf5819b4, 0xb3988f1, 0x4aa4d69, 0x5b15dc8 }, + { 0x54e5c24, 0xa52feed, 0xe91a797, 0x927471b, 0xd57f677, 0xd119bfd, + 0x78e4c4f, 0xde38f7b, 0xb150bc3, 0xa7af516, 0x26b76c2, 0x403b21e, + 0x92300dc, 0x589067d, 0x066802a, 0x04e406a } + }, + { + { 0xa9ca9bb, 0x28e7d09, 0xfccf4a0, 0xaa84fd5, 0x635b7ed, 0xdbe9fb8, + 0xd56fc7c, 0x9ede3f5, 0xb01cb29, 0xa4b5031, 0x7f93703, 0x584299d, + 0xb6fe825, 0xbd28868, 0x8b9c2d9, 0x1d385d4 }, + { 0x822be80, 0x6606f4a, 0x626d0fd, 0xb5a0165, 0x14568ad, 0x9920a20, + 0x1c6d174, 0x7d430f4, 0xe02e9e9, 0xc243e16, 0xa6bd649, 0x367f1d2, + 0x71b8c36, 0x6939100, 0x4de2984, 0x2ede131 } + }, + { + { 0x5beec32, 0xdc78187, 0xa525ff4, 0x1fff0cc, 0x676df34, 0x6e86425, + 0x3f638e1, 0x2b4e8a6, 0x9b1e59f, 0xc4991d2, 0x1589717, 0x399d001, + 0xbe041cd, 0x406464e, 0x9e65bb0, 0x901cb3d }, + { 0xfb42307, 0xf5f4572, 0xf1b7307, 0xf81b3b0, 0xf2094d1, 0x8fb695c, + 0xdb56f7b, 0x7db4792, 0x5a794e0, 0x36836d5, 0x09bc879, 0x2da477b, + 0x1887c40, 0x1cdfadb, 0xf2699b6, 0x65dc6c2 } + }, + { + { 0x4737972, 0x36f9f21, 0x7a387b0, 0x48f0c8b, 0x39a1d24, 0xa156ed3, + 0x0fed268, 0x375293a, 0x7ff75cb, 0xf679f48, 0x1cc9e62, 0xd15a00f, + 0x22c3877, 0x92a7dc7, 0x6fb0ed4, 0xe987063 }, + { 0x16f5f3c, 0xfd8e59c, 0xaeeb48e, 0x375732e, 0xca1ab42, 0x2dd9213, + 0x9ffccea, 0xcb06209, 0xb23edfd, 0xfc611f6, 0x99b060e, 0x2716349, + 0x820de8a, 0xb938b5d, 0xeb49a32, 0x138f6e7 } + }, + { + { 0xe485f70, 0x7feda63, 0xeb27b2c, 0x646380a, 0xc4511c7, 0xcf8fe32, + 0xff9406a, 0x2c68e1e, 0x20b6020, 0xa9f2fd9, 0x3b3e465, 0x1c98fc6, + 0x93e53aa, 0xb8dac35, 0xa750e96, 0x2fb47b6 }, + { 0x1950bb3, 0xea373ef, 0x4ac7aec, 0x8156694, 0xb55b931, 0x8d6b3c2, + 0xb62ef7d, 0x5d13f2d, 0xab9182b, 0x4647f2a, 0x33bf07c, 0x8f56c5a, + 0xb35a221, 0xc5ab284, 0x5a46a6b, 0x0747ab7 } + }, +}, +{ + { + { 0x86b85c5, 0x5b9236c, 0xc482448, 0x5967a0d, 0x7df6ae0, 0x397c955, + 0x5378f2b, 0xf83ee1c, 0x6e05dd1, 0xf82df65, 0x19d7c8b, 0x4c424f6, + 0xa6d5f2a, 0xa612550, 0x63c3ebf, 0xfe8482a }, + { 0x0142c82, 0xcb8d403, 0x3679e6c, 0x08b0662, 0x3eca5ee, 0x3ea5146, + 0x1370500, 0x089eb3b, 0x5a0d306, 0xcbfb19c, 0x42a65bb, 0x2f68588, + 0xe51e119, 0xe3e1db5, 0x110895e, 0x2c150e7 } + }, + { + { 0xf6d4c4c, 0xf323488, 0x63b87e2, 0x5fc931f, 0x35c759f, 0x8867da0, + 0x9746d4c, 0xb6f1eff, 0x990be0a, 0x8a8172d, 0x5c407b4, 0x1113eee, + 0x378ed8a, 0xd80dacf, 0x3fa7fd1, 0x99b57cf }, + { 0x5176405, 0xf5bb6d9, 0x92e83b5, 0x6b8963a, 0x8a7ef8d, 0xac55b6b, + 0x6c1fbf0, 0xe73fa12, 0x60148df, 0xdb37560, 0xf3f1fba, 0x72f1a98, + 0xea550f2, 0x1f71d0a, 0x9544a87, 0xc3ea4f0 } + }, + { + { 0x4322bf3, 0x5b09da2, 0x61264e1, 0x2a573d5, 0x803acc4, 0x93cb2e1, + 0xe502fc6, 0x397b4fb, 0x39e0ebc, 0xddfb212, 0xbbcbc57, 0xeccd8f5, + 0x4663788, 0x49d3bed, 0x1218df9, 0x37192aa }, + { 0x2ffa3c6, 0x8a05bc9, 0x23ebf4d, 0xc38c281, 0xfe343a8, 0xc80d547, + 0x6c63516, 0xa8d5a5b, 0x8d8fa6b, 0xc5d8ce1, 0x24a87c0, 0xeb5e872, + 0x75bfa23, 0x9806e9e, 0x689469a, 0x11f0889 } + }, + { + { 0x8e75666, 0x81005f6, 0xd349505, 0xb84d861, 0x9f321ea, 0xe083282, + 0xcfa33a1, 0xb751d7a, 0x067c550, 0x793cf6f, 0x1027e56, 0x073a6b2, + 0x66a6012, 0x53f40ee, 0xc210fa9, 0x70bfaa8 }, + { 0xe4b5998, 0x1518e39, 0x24b8d9c, 0x8f0b530, 0xafdf923, 0xd91c281, + 0x24e3f69, 0xc5cfb28, 0x870871f, 0x63a529a, 0x2128dad, 0x3d3e887, + 0xcb30cce, 0xed658dc, 0xafb7bae, 0xf9373b9 } + }, + { + { 0xde58ed2, 0x22d4dbe, 0x03f8789, 0x4fefc1d, 0x344817f, 0x6b0a1fe, + 0xa56b0b2, 0x96bef40, 0xda249fa, 0x32684ee, 0x524a91b, 0x8298864, + 0x0c736a1, 0xa958baf, 0xef2f3e5, 0xd033a7d }, + { 0x43f4d6a, 0x5be3edc, 0x9c89abb, 0x326a39d, 0x55d997a, 0x90c44f7, + 0x6e966c2, 0x2058106, 0x6548038, 0xdbae490, 0xd473fc1, 0xac7bc97, + 0x4b2603a, 0xb34488b, 0x5e9bb98, 0x27aea27 } + }, + { + { 0x1b88773, 0xa59e728, 0x0c241f6, 0xe2f05d4, 0x4e75749, 0xa56229e, + 0x1b10705, 0x8f00c0b, 0x19394d3, 0x8559946, 0xaaf5e32, 0x0d7e352, + 0x787b8ea, 0x526c462, 0xa179d48, 0x89297d9 }, + { 0xef43892, 0xeff17e6, 0x221f841, 0x17091eb, 0x4a4b848, 0x82f5eb3, + 0x8eb7b76, 0x6bea477, 0x76c536c, 0x21f2271, 0x96c81bb, 0xd9ef2c8, + 0x54bf4d3, 0x7c27546, 0xd7c28c8, 0x9dd4662 } + }, + { + { 0x20e1a6b, 0xe7fff00, 0xa08d467, 0x26a35c6, 0x3248c91, 0xb3c773d, + 0xba7d935, 0xa646615, 0xb0d26fa, 0xa91f453, 0x60c6d32, 0xdcf9c34, + 0x9e3e3dc, 0x6366861, 0xf30f3e2, 0x3012813 }, + { 0xc2fc61a, 0xac6623d, 0x2bfd2ff, 0x108dc25, 0x231d6ea, 0xd7f5c0d, + 0xad1107e, 0xa904f9a, 0x0d1e9c8, 0x46941c2, 0xc810cf2, 0xe5b6451, + 0x4f511d1, 0xaba8e67, 0x08373fe, 0x5b4b94f } + }, + { + { 0x849c230, 0x002d4e2, 0xd8ba391, 0x9bed0ef, 0x828e319, 0x745e0c0, + 0xca58de2, 0xcd40907, 0x1abaa4a, 0x2c87ab1, 0xdb64391, 0x3c17a97, + 0x86c72d2, 0x36b184e, 0x485f7aa, 0xb03d202 }, + { 0xde24aba, 0x2b6b79b, 0x2325fb2, 0xdcb7854, 0x66ebae2, 0xf5d1db9, + 0x903840a, 0x35a4d5b, 0x190e9da, 0x7afeb09, 0x35c1792, 0x1818f6a, + 0x3faa269, 0x90091fa, 0x2570235, 0xc4ccff6 } + }, +}, +{ + { + { 0xec85940, 0xa177619, 0x7ef7eee, 0xfca24db, 0x7a90c11, 0xb2450f3, + 0xdbf4f85, 0x29d256d, 0x51316c3, 0x920c8d0, 0x04474da, 0x2f7f7ba, + 0x2ec9a0b, 0x308117f, 0xd0d2085, 0xd0a231a }, + { 0x7ab641d, 0xf3288fc, 0x9f4fa32, 0xc68bade, 0xbbf8253, 0x768f014, + 0xc0a33f0, 0x5eff260, 0x6bb93ce, 0xc71b453, 0x680697f, 0xa71d045, + 0xce72bc3, 0xb62444c, 0xd1379f3, 0x11f03e8 } + }, + { + { 0xc16df92, 0x1f54789, 0xe3ed142, 0x874c642, 0xfa2a9f1, 0x6699f60, + 0x3fecfc1, 0xbd1b8d3, 0x8a3d953, 0x59682d5, 0x4a36b81, 0xf17c021, + 0x181a666, 0xeb9621d, 0x3cf1ad8, 0x7c2c3ab }, + { 0xe529f7c, 0xe6888c3, 0xb355315, 0x197b66a, 0x83e31ac, 0x63b558a, + 0x891c68e, 0x4aa7bc5, 0x592e360, 0xc17d989, 0x1363666, 0xc750a29, + 0x4909ac0, 0x0d53470, 0x4594a10, 0xd6d0272 } + }, + { + { 0x3fbb635, 0x35c541b, 0x5982afa, 0x50016d0, 0x96b0ca0, 0x58ebce4, + 0x577ea56, 0xb940027, 0xe38480f, 0xf29d305, 0xebd6a2c, 0x43705b0, + 0xe90c639, 0x0e4acda, 0xf56e05e, 0xbe94a29 }, + { 0x30659ad, 0xc61f4a0, 0xc402211, 0x39074ad, 0x51b621d, 0xfe0d8d5, + 0xd1d5222, 0x2d02e8d, 0x46c2683, 0x05ece3c, 0xc689d41, 0xf70705a, + 0x4d837bf, 0xe3caf44, 0x75ba6d0, 0xfda0584 } + }, + { + { 0xcb7d458, 0x1098163, 0xf5ba834, 0x12b645f, 0x28af72c, 0x70a3181, + 0xf32e5dd, 0x5f4727e, 0x10a21b4, 0x7cbae15, 0x6785389, 0xa80bf80, + 0xb8f93b7, 0x9827402, 0x08349da, 0xe385f82 }, + { 0x9589f6e, 0x2d05461, 0xe7c0191, 0x6aa5b26, 0xbd5574d, 0xe79ae12, + 0x4148e61, 0x5d13f91, 0x13716ff, 0x7b2be0f, 0x80bb81f, 0x82b0fe6, + 0x3e2569c, 0x697633c, 0x873f8b3, 0x6c1f083 } + }, + { + { 0x0be1674, 0x6e26d85, 0xab8044f, 0xe4e47f6, 0x82fc434, 0xfdf46e8, + 0xc89cadc, 0x639ae2c, 0x4b85bdc, 0x2244a52, 0xb7cf4ea, 0xb1e4790, + 0x7e0bb8f, 0x51dce03, 0x2716cee, 0xdd14335 }, + { 0x8e8841d, 0x1c049b4, 0xb97c621, 0x6bf26dc, 0xba01178, 0x21d6255, + 0x8e4f0e4, 0x477258a, 0x68f8ef1, 0xf5e437e, 0x8b03e1e, 0xd118fbc, + 0xe1c91b3, 0x3d6bc51, 0xd5b6907, 0xa259486 } + }, + { + { 0x7b6f5dc, 0x4159cfc, 0x493694a, 0x05a52b3, 0x83b8883, 0xeeb511c, + 0x2b06400, 0x19d79e4, 0x738f37e, 0x8e503a2, 0x5a94ad9, 0xa30e579, + 0x262618d, 0x3981c75, 0x2dcba19, 0x06b6c69 }, + { 0x4d1b051, 0xd7242ee, 0x3b350c4, 0x6274ccb, 0xf540019, 0x66df0bb, + 0x5ae12d5, 0x4d66be6, 0x1049cba, 0xcea2960, 0x8df84b3, 0x4047339, + 0x75a31c8, 0x7d6c96b, 0x874174c, 0xbb80159 } + }, + { + { 0x59f1aa4, 0xf0f7be0, 0xdcff451, 0x798f39a, 0x8014e1e, 0x96763ff, + 0x09cc5ec, 0x03987a8, 0x893650a, 0x4919656, 0x75e24df, 0x92e8eef, + 0xe89d639, 0x54e97cd, 0x7682cc0, 0x8081d06 }, + { 0xa8ceb71, 0xb9ef41a, 0xa4d7aaa, 0xb8173a4, 0xc54ee10, 0x93d81b1, + 0x70a445a, 0xabe1805, 0x64d569d, 0xac0ff97, 0x3e570be, 0x86946b2, + 0x4180641, 0x8e11dd2, 0x99f67dc, 0x3d0b33c } + }, + { + { 0x48bf5a4, 0x2c9637e, 0xccaf112, 0x9fdec19, 0x5c42023, 0xe5cde9d, + 0x878f0cc, 0x9869620, 0x1fe6eba, 0xcf970a2, 0x54e678b, 0x1df5ec8, + 0x28d00dd, 0x4667f01, 0xb0b3fa8, 0xfa7260d }, + { 0xb34239b, 0x6bd2895, 0x2d2a50d, 0x04c8bc5, 0x6cb23e2, 0x14e55ef, + 0x3a278d5, 0x6440c27, 0x2193046, 0xf4b12e3, 0x5dd4c08, 0x46adf64, + 0x4656e8c, 0x70e2998, 0xe4acd44, 0xe7b36ea } + }, +}, +{ + { + { 0x16cf664, 0xea64a57, 0x26fd357, 0x8497ee4, 0x814e851, 0x44d94b4, + 0x5a6a2cf, 0xf4aac22, 0x80c301f, 0x947b309, 0x7865383, 0xf390ba1, + 0xd1773d3, 0x16c4fc6, 0x6227220, 0x61b9814 }, + { 0x1dd0270, 0x07dd03a, 0x0f160df, 0x290ca82, 0x44ba955, 0x8f22054, + 0x0b6f1b3, 0x4e85e45, 0xad78089, 0xfd73ce9, 0x2f2cb0e, 0x67c1270, + 0xee33a61, 0xa7de0d7, 0x6553261, 0x6a811cc } + }, + { + { 0x2d0a427, 0x5ef0574, 0x220a341, 0xe8d2e95, 0x8044886, 0xdd28cbf, + 0xa1aa58b, 0xdad7b4b, 0x8ec901b, 0xb28f373, 0x5bbe3db, 0x1841a93, + 0xa075fee, 0x8fd7cd1, 0xc0d3cdd, 0x93b603f }, + { 0x5edd859, 0xca54fd5, 0x64ed687, 0xa4cb05f, 0xed1a3d7, 0x3138668, + 0xee32be5, 0x1224fda, 0xc80aeb3, 0xf1f532b, 0xe8d4d69, 0xa4f65d0, + 0x5905fe5, 0xc697a01, 0x6690ce4, 0x514da7a } + }, + { + { 0x3de4a55, 0xc7b9af8, 0xb318d93, 0xc79bad7, 0xf5b1c83, 0x1808071, + 0xb965b16, 0x92112ef, 0x7bb740a, 0x655ab38, 0x384ff87, 0x53dbc8b, + 0x72dc6f2, 0xd153c28, 0x99c7819, 0x2ec20e1 }, + { 0x3b854b5, 0x65e46ea, 0xc711db5, 0x272d5ae, 0x26e19e8, 0xfd1bb53, + 0x3dc0665, 0x33280b8, 0xb8f1c4a, 0x95b986e, 0xa685c4a, 0xa671fc4, + 0x83bdbbf, 0xa03cbd5, 0xab77544, 0xd329402 } + }, + { + { 0x8e62b35, 0x40fa651, 0xf9e55a6, 0x3913b11, 0x5270a41, 0x4e8089b, + 0x80d1886, 0x565f52a, 0x512749b, 0x93b5f05, 0x141c547, 0x35c869c, + 0xf86717f, 0x9a44a1a, 0x9c2b2cb, 0x2b9984b }, + { 0x4952322, 0x61fb607, 0x7af1464, 0x2d4072f, 0x600eb30, 0x9b2fa8c, + 0xf10668e, 0x6071fb7, 0x90634ca, 0x27cc24d, 0x471d32b, 0x3875bc2, + 0xa11210c, 0x678590b, 0xfcc5a9a, 0x352b447 } + }, + { + { 0x5fa3200, 0x795d541, 0xa92949f, 0xadaa557, 0x3cc88c4, 0x42fff06, + 0x71b68a5, 0x26d6831, 0xe67ad8c, 0x3286549, 0x86396b2, 0x5bf6363, + 0xe12c8ea, 0x41229b6, 0x748952e, 0x05320c9 }, + { 0x900b460, 0xae36b63, 0xf2b6aff, 0x9354ff2, 0x065ee0c, 0x10b810b, + 0xcc8bb38, 0x4d6925f, 0x7a22f14, 0x31c03fd, 0x57544e8, 0x76b7f44, + 0xc0eed26, 0x3a9123c, 0xe0cd1cc, 0x77acd67 } + }, + { + { 0x07ec527, 0x2e90530, 0x62937cf, 0x32388ef, 0xe229188, 0xa445389, + 0x33bcebe, 0xa44b68e, 0x4c4e701, 0x5a8722e, 0xcf07e41, 0xfd066e8, + 0x95fab62, 0xa3c1a4f, 0xe542f24, 0xb4d6a1b }, + { 0xaf6c9b5, 0xe6a92e4, 0xc83d61d, 0x9452484, 0x0062276, 0x422b55b, + 0x5279688, 0x261973a, 0x3999fb2, 0xde8be26, 0x7b029ca, 0x64e9628, + 0x06897d4, 0xd8edfaa, 0x6955511, 0x408319c } + }, + { + { 0x50a5632, 0xff6baed, 0x5c5885a, 0x922b7d0, 0x1b45864, 0xdf0f3b3, + 0xc04340e, 0x27e49c0, 0x122c447, 0x618c566, 0xeafee7e, 0x7863a38, + 0xb828cb0, 0x7143aff, 0xf9d054e, 0x51fcf4c }, + { 0x27f5e09, 0xc4a4b31, 0x90be2bd, 0x021f47a, 0x7ab956d, 0x1a06019, + 0x86ea86b, 0xe77fa15, 0xd550ef3, 0x9ccde87, 0x6532654, 0x7dee53a, + 0xe826387, 0x8b4f060, 0xad077b5, 0xda38637 } + }, + { + { 0x0e9fac8, 0xbc901b3, 0x6fb2a2a, 0xfa08204, 0x5e04efc, 0x92f68ab, + 0x9ac12d0, 0x184a30a, 0xb25d479, 0x1aa11aa, 0x0f03161, 0x8bc5f4c, + 0xcfc8817, 0x7e3a083, 0x597f93f, 0x84d9355 }, + { 0x239abc6, 0xc014478, 0x8d37b04, 0xb226b09, 0xf575789, 0xb056942, + 0xba745eb, 0x816b95a, 0xb98ddb6, 0x2a49d39, 0x291af81, 0xc41ca26, + 0xab26347, 0xb3afe99, 0x604b638, 0x59c31bc } + }, +}, +{ + { + { 0xc42befd, 0xa16a8b9, 0x2052f00, 0x731c9c9, 0x1f5dfa0, 0x1ad49b4, + 0xbffce36, 0x7a289e3, 0x0c79cf1, 0x868fac0, 0x86721ab, 0x6d6d284, + 0xe726c94, 0x590f928, 0x51f3841, 0x0e802cb }, + { 0x0b694bc, 0x6a6a57a, 0x8120fb8, 0xb9bb0cd, 0x9c05826, 0xad96ac7, + 0x7768df0, 0x294da8c, 0xb56c6c6, 0xfe32311, 0xae8d050, 0x291c2c6, + 0xe7db4c9, 0x1c765e7, 0xd65f9f7, 0xe058298 } + }, + { + { 0x7e8d345, 0x4bfa85b, 0xde1dfc8, 0xa04ef95, 0x324ace3, 0xb5f7f21, + 0x574b14a, 0x4b350a1, 0xf8e5c8d, 0x11436bf, 0x7642369, 0x1c789f9, + 0xfb623ce, 0xeb5e335, 0x442d562, 0x9deacd2 }, + { 0x531ee71, 0x4ff989f, 0xaacb52a, 0x43e2c49, 0x85bfadc, 0xa763198, + 0xd0161a0, 0x08b6d5c, 0x541f197, 0x010e3fa, 0x3279a16, 0x83a589e, + 0x6309f9b, 0xf099137, 0xf1cea10, 0x07c093b } + }, + { + { 0x33d2192, 0x1ce3f0f, 0xc37ce73, 0x07b559a, 0x207be27, 0xaa2ad38, + 0x7ed93de, 0x84f053b, 0x3b98a4b, 0xbc5c797, 0x63aa9b9, 0xc923461, + 0x231a10c, 0x807cc16, 0xa061209, 0x8ffdf57 }, + { 0x497070f, 0xa9ca741, 0xd113b3a, 0xf608ec9, 0x8d0384d, 0x5132726, + 0xf5ec307, 0x96686ac, 0x71c4665, 0x437bbbd, 0x7c379ca, 0xdef09d5, + 0x621747c, 0xf8be033, 0x8ae8047, 0x2775b37 } + }, + { + { 0xb2c4fc2, 0x4009798, 0x203772e, 0x148d7d1, 0xf8423fb, 0x9d9392d, + 0xaf8cef4, 0xa5bd72e, 0x4380b53, 0x579d58d, 0x8c39d24, 0x2ff88f1, + 0x5706466, 0x9ca2fbc, 0x1e56af2, 0xb42987d }, + { 0x5d94ea8, 0xcc2556e, 0x5369d76, 0x4e5c2b3, 0x2a94f9c, 0x5de3574, + 0x5cb4145, 0x8d068c9, 0x51bfcbf, 0x4d553ff, 0x8a23fce, 0x3ab7164, + 0xd0fa7f3, 0xc9cb3a9, 0xed9ced1, 0xf81209b } + }, + { + { 0xe5b66f5, 0xde7356e, 0xe8a25e0, 0x7b2bf1a, 0x2c9b725, 0x09a444a, + 0x4906c55, 0xfd8a2f4, 0x82514f3, 0x409cc80, 0x28999a9, 0x47e0099, + 0x6a312f4, 0x0a582a6, 0xf6723de, 0xf7946f8 }, + { 0x92d8aff, 0xa55f6ba, 0xa544b1c, 0xb62c3c8, 0x5c16a94, 0xa1d1411, + 0x2ad5e71, 0xc378319, 0x06b1dd6, 0x13d7847, 0xee7ff55, 0x99005f8, + 0x8a1e7d8, 0xfb5ea3f, 0xb4cac39, 0xdc7f53c } + }, + { + { 0x36e3794, 0x482abaf, 0xc74684f, 0xc23e9e5, 0xf1629be, 0x4544cf6, + 0x2f40374, 0xd8a8ee5, 0xf433bdb, 0x2eea87f, 0xae9990e, 0x489a99c, + 0x54b23b6, 0xefc131e, 0x8600270, 0x25fe699 }, + { 0xc059a7e, 0x03d2d9e, 0x6979c3c, 0xa6445b5, 0x9bfbcea, 0x491a10c, + 0xe937af1, 0x15b5974, 0x797c7fc, 0x4be8002, 0xfedcfee, 0xbed8a49, + 0xa9e0691, 0x35751ce, 0x9ef5982, 0xe9a9fa3 } + }, + { + { 0x3065de7, 0xeffeaca, 0xac4d4e2, 0x841d544, 0xcaf199f, 0x8144679, + 0x443967a, 0x98cf4f9, 0xf33183c, 0x8cd57f4, 0xc1b15eb, 0x390832a, + 0xa53b500, 0xc4b1fea, 0xdff24b5, 0xd762a10 }, + { 0xb0ee2a9, 0xccd3eed, 0x362d485, 0xa6dd4a9, 0xf1d047a, 0xeb4ff26, + 0x23860fc, 0xc0771fd, 0x4b64114, 0xdbb4e39, 0x4d29b29, 0x2ff3f24, + 0x387b365, 0x9cac005, 0xde5994a, 0x05b7aa6 } + }, + { + { 0xc03dd63, 0x5e71752, 0xbc74687, 0xad10fe9, 0x54c76ab, 0x51a5b0c, + 0x1f586d4, 0x763fd50, 0x816048b, 0xc7bd5ce, 0x3f744dc, 0x8fc83d2, + 0x109df9a, 0x0561802, 0xccf0e43, 0x18fb01f }, + { 0x038ab23, 0xe4606fc, 0xa664c98, 0x5878f1f, 0x5da7356, 0x3aedbbd, + 0x516746a, 0x3c578f5, 0x1a17210, 0x259477f, 0x028248f, 0xc7a869d, + 0x48cbf95, 0x6517a61, 0x3d04d47, 0xbc5f91d } + }, +}, +{ + { + { 0x083ca53, 0x15fd9a9, 0x2697ca6, 0x1161da0, 0x56b676c, 0xf516af3, + 0x75eec13, 0x8a420d5, 0x1a9526b, 0x72d6742, 0x76b463f, 0x8d8c29e, + 0x8815627, 0x38a4f58, 0xe0650f9, 0xf7e528b }, + { 0x382edca, 0x2cfa78e, 0xc4ad83c, 0x638d183, 0xe4a0119, 0x96d3b9d, + 0xa7c1101, 0x5769ccb, 0x2b8d04a, 0xc3b3b79, 0x4951bde, 0x96212f6, + 0x481161e, 0xad7905a, 0x41c5edf, 0x8fd6762 } + }, + { + { 0x39d6cde, 0xf7b0635, 0x115a84a, 0x69d0549, 0xcbd9fe4, 0x4a976c6, + 0x950ff96, 0xc92953f, 0x654d127, 0x1d7f0fe, 0xda0f75d, 0x7293870, + 0xcf2277f, 0x7bb3652, 0x834484f, 0x64798c9 }, + { 0xac3a76c, 0xb94d8bf, 0x7ff776b, 0xf5721a9, 0x2722e31, 0x23a6e9f, + 0x9a5c034, 0xe9da996, 0x456ebc3, 0xb9bbf83, 0x96956a4, 0x239f58a, + 0x18b7f00, 0x8b75beb, 0xa51cb97, 0x6c2b5b8 } + }, + { + { 0x7eb41f3, 0x78b1c62, 0x17c4352, 0x0638fcf, 0x0c5709c, 0x939edd8, + 0xedc906c, 0x0a8dfc3, 0xefb01ed, 0x3942f47, 0x49986fe, 0x4c82757, + 0x4dffa57, 0x792545c, 0x6c3ff26, 0xeee6883 }, + { 0x12b1218, 0x824d08e, 0x902457f, 0x515a478, 0xbae55b3, 0xc70cc9c, + 0xbcef9d4, 0x1240737, 0x2f9db7f, 0xf22e616, 0x91f8da2, 0x98c4f02, + 0xafaaa67, 0xa89219c, 0xe7d27e2, 0xf35fd87 } + }, + { + { 0x01b80d0, 0x19b0cd7, 0xf9aebd1, 0x3d7e29d, 0x0477cbc, 0xd39c9ca, + 0x5ff0d3d, 0xac0f615, 0x520fd01, 0x8a51993, 0xb22d6fb, 0x508ff54, + 0x318d3ab, 0x8786c47, 0x4a683f8, 0x4312c46 }, + { 0x95359f6, 0x73b1d39, 0x963011e, 0x0d94fa5, 0x9bfe83e, 0x5723af2, + 0x6841df3, 0xafa9001, 0xb7c498a, 0x791e92a, 0x7ea4253, 0xbc931ad, + 0xb783c06, 0x438e016, 0x2ca662b, 0x1347db2 } + }, + { + { 0xfbaa861, 0x41df37d, 0x329e4de, 0x98ecb23, 0x507e018, 0xdaf1560, + 0xb088e32, 0xa902269, 0xe4cab2f, 0xad898a5, 0x02c1e1b, 0xd84e9ed, + 0x8488af3, 0xc20a5d5, 0x6cc77c6, 0xc7165af }, + { 0xdeb7461, 0x8526f3a, 0x4a2d332, 0x03577b1, 0xe4760b5, 0x28e469d, + 0xb276266, 0x442c7f9, 0xf9c90fa, 0x90d5c77, 0x3e211bd, 0x7aa8716, + 0x5decfd6, 0x56d8ff0, 0xee23e6e, 0xa204b56 } + }, + { + { 0x4aceafc, 0x2e4374e, 0x6fcd5e5, 0x978743b, 0xc4855ca, 0xa0f6345, + 0xe98074b, 0x9bc7e4f, 0xc33d08a, 0x3835d57, 0x6f00566, 0xeec7c8b, + 0x1acf55c, 0x71628a2, 0x97fb19e, 0x5da3750 }, + { 0x01a7125, 0x6904a8e, 0xe6e3780, 0xad33c85, 0xc19f94a, 0x1702928, + 0x7c04b3d, 0xb424ff2, 0x19e2ba3, 0xb212e39, 0xc9af4c9, 0x4cca8e8, + 0xfd9bf0e, 0x98ab7ae, 0x9799db5, 0x21d245d } + }, + { + { 0xec08806, 0x6b034dc, 0xb40f2d9, 0xfd763f2, 0x29cb906, 0x5e16de0, + 0x8a0e16a, 0x02b7014, 0xe071e12, 0x463c8ee, 0x25ad509, 0x6447281, + 0xdc0e07a, 0x9ee6f2d, 0x68d4d97, 0x188895c }, + { 0xb27f971, 0x092fff3, 0xc9b7722, 0xb3c159f, 0x3cae42d, 0xe27d8ff, + 0xe87071d, 0xf8a5ed6, 0x607ebd2, 0x318388f, 0x53486f1, 0x924967b, + 0x7c46e1f, 0x7730494, 0xf21d196, 0xf279c60 } + }, + { + { 0x84f3201, 0xef2bc03, 0x1f94c51, 0xf8750c7, 0x986ec65, 0xbaa4f5a, + 0x2732a33, 0x6f8a5de, 0x299e365, 0x0f13d80, 0xe85261f, 0x2709530, + 0xf527d56, 0x097d922, 0xbe1f3f8, 0x4969687 }, + { 0x3e1708d, 0x9f3f504, 0x4aa4be4, 0xac67b87, 0x320a87e, 0x75fb042, + 0x6e2cad6, 0xa361ad3, 0x203e9f6, 0xcb01470, 0xc9b76c6, 0xe3807b7, + 0xb907c09, 0xf086833, 0x7e85a01, 0xe9bed3c } + }, +}, +{ + { + { 0x91780c7, 0xa7ea989, 0xd2476b6, 0x04e4ecc, 0xc494b68, 0x0af9f58, + 0xdee64fd, 0xe0f269f, 0x021bd26, 0x85a61f6, 0xb5d284b, 0xc265c35, + 0x3775afd, 0x58755ea, 0x2ecf2c6, 0x617f174 }, + { 0x5ec556a, 0x50109e2, 0xfd57e39, 0x235366b, 0x44b6b2e, 0x7b3c976, + 0xb2b7b9c, 0xf7f9e82, 0x0ec6409, 0xb6196ab, 0x0a20d9e, 0x88f1d16, + 0x586f761, 0xe3be3b4, 0xe26395d, 0x9983c26 } + }, + { + { 0x6909ee2, 0x1d7605c, 0x995ec8a, 0xfc4d970, 0xcf2b361, 0x2d82e9d, + 0x1225f55, 0x07f0ef6, 0xaee9c55, 0xa240c13, 0x5627b54, 0xd449d1e, + 0x3a44575, 0x07164a7, 0xbd4bd71, 0x61a15fd }, + { 0xd3a9fe4, 0x30696b9, 0x7e7e326, 0x68308c7, 0xce0b8c8, 0x3ac222b, + 0x304db8e, 0x83ee319, 0x5e5db0b, 0xeca503b, 0xb1c6539, 0x78a8dce, + 0x2d256bc, 0x4a8b05e, 0xbd9fd57, 0xa1c3cb8 } + }, + { + { 0xd95aa96, 0x5685531, 0x6bd51ff, 0xc6f1174, 0xc9c2343, 0xb38308a, + 0x2921841, 0x52ee64a, 0x78f3b01, 0x60809c4, 0xae403ac, 0xe297a99, + 0xcb09a5b, 0x7edc18f, 0x81ac92a, 0x4808bcb }, + { 0x34dc89a, 0x3ec1bb2, 0x4e39da5, 0x1e8b42e, 0xe526486, 0xde67d5e, + 0x76f0684, 0x2376548, 0x285a3dd, 0x0a583bd, 0xfe9b009, 0x3d8b87d, + 0x0413979, 0x45bd736, 0x38a727f, 0xb5d5f90 } + }, + { + { 0x4bde3ee, 0x7b8820f, 0x24d5170, 0xea712ef, 0xdf6ec7b, 0x517f88c, + 0x983ea9a, 0xb15cecf, 0x31a4592, 0x9eeee44, 0xebb013e, 0x786c784, + 0x1f4e15d, 0x2f06cb3, 0x4f4fda1, 0x5603fd8 }, + { 0x9e1321f, 0xf6790e9, 0x74a4c09, 0x274c66a, 0x9a41a4e, 0xa4b70b4, + 0xada5157, 0x7700bdd, 0x51be8dc, 0xe54a60d, 0x1a477e0, 0xfaf9276, + 0xb027eac, 0x6661c72, 0x280b917, 0x50e2340 } + }, + { + { 0x96ec123, 0x635f40f, 0x7a766a4, 0x4a33133, 0xb935587, 0x9ce4416, + 0x95d97e4, 0xbb6e1f5, 0x9d4197d, 0x2614723, 0x490e896, 0xabd4478, + 0x8bba895, 0xf6a1b2a, 0x5e27a45, 0x401fa40 }, + { 0x0620900, 0x7354ba5, 0x385678b, 0xc443a29, 0x53cf5fa, 0x48aba10, + 0xbbe152d, 0xd67e723, 0x2a63d68, 0x4b858e0, 0x72be4ee, 0x174e1ee, + 0x9ab8d46, 0xad0fbb3, 0xce17dd7, 0xa0fdffb } + }, + { + { 0x9c46fd8, 0xa1ea325, 0x9fb96ef, 0xeca122e, 0x6767acd, 0xf9074a2, + 0x2787082, 0x9b004a2, 0x7f3ba8e, 0x389f807, 0x0d5aabe, 0x6463de9, + 0xb090585, 0xf30ceaa, 0x5634ab8, 0x71b31e8 }, + { 0xaf02aed, 0x0dee65c, 0x20ac252, 0x506886e, 0x86b8a59, 0x0665f78, + 0xf2bb328, 0xb9b784d, 0xdc6b089, 0x46e443a, 0x66c27fd, 0x3d5de19, + 0xf0fde70, 0x0419265, 0x2b5c034, 0xed94612 } + }, + { + { 0x13b0056, 0x5a52ad2, 0xb909ee3, 0x9fbeb92, 0xbdaab08, 0xb42ba18, + 0xffc8a77, 0xec127c4, 0xfda906a, 0xc6d2985, 0x994bbe7, 0x5355547, + 0x9cdfd62, 0xa7470c0, 0xd2e675a, 0x31a3971 }, + { 0xcc8b356, 0x8d8311c, 0x01b4372, 0xabb0bf8, 0x0294566, 0x33c1cad, + 0xe07b672, 0xe2e649c, 0x2ae3284, 0x9084d88, 0x1835ce2, 0x7a90d4c, + 0x809d44c, 0xb4d1cd5, 0x9f0528f, 0x7822714 } + }, + { + { 0xbf5844b, 0xca884cf, 0x8524cf9, 0x9dd05c4, 0x36ba889, 0xdbffa19, + 0x29e7666, 0xef94fdd, 0x3eaf48f, 0x358f81b, 0x1530d56, 0x96734d5, + 0x4adf9e5, 0x378b2d1, 0x4731f61, 0x2f85046 }, + { 0x99dcb83, 0xd6ae905, 0x6199239, 0xa4f89e0, 0x8f0f958, 0x6405249, + 0xcc27707, 0x2866d99, 0xf551c0f, 0x64681a2, 0x4c37080, 0x2c7b0d0, + 0x00ac301, 0x218925b, 0x54df895, 0x8d57fb3 } + }, +}, +{ + { + { 0x809c8d7, 0xdaebde0, 0x0e95ea1, 0x58c761c, 0x00ae5e2, 0xbd99650, + 0xcd51acd, 0x6117a85, 0x7c55d56, 0xc4424d8, 0xdfbeeaf, 0xe9b1dde, + 0x0db4791, 0xda98bb5, 0x3fca108, 0xff3a5a6 }, + { 0x5ccbea1, 0x172fb8e, 0xa9f6cc9, 0x9fe12a7, 0x8967ce2, 0x1de4b0b, + 0x671dbc6, 0xc1ab60f, 0x5dedcda, 0x338385a, 0x3a043fe, 0x647a420, + 0x28ebc89, 0xe9abc64, 0x03ba3c8, 0xc357ff0 } + }, + { + { 0xde39ebd, 0x37061e7, 0x2be567a, 0xebb9135, 0xd6bb80a, 0xa9a6f6b, + 0x99f0ba2, 0x039345d, 0x98bbf47, 0x215494e, 0xa2a1ccb, 0xf2cb7a4, + 0x37f67c9, 0xf51aa10, 0x17fff71, 0xd29c85c }, + { 0x4d30b87, 0x8d4e4f2, 0x93a8309, 0x20fdf55, 0x757075c, 0x9b9f9cf, + 0xcd70101, 0x09142ad, 0x766ca55, 0x901d0ee, 0x32e418b, 0x6a5d86a, + 0xd7fcaec, 0x550ad92, 0xd91b26e, 0x64e8818 } + }, + { + { 0x47e5ee5, 0x5cea0f7, 0xbe99699, 0x8ca1d31, 0x5c136c7, 0x52db846, + 0x90e0d74, 0x8cecb38, 0xede2ad8, 0xb8efe9d, 0xf17ade8, 0x18d6ff8, + 0x2d66c20, 0xd222735, 0xf2005fd, 0xc46593e }, + { 0xf7141e1, 0xe5ebe6f, 0xe0126f2, 0xc968315, 0x1cb91b6, 0x95adc73, + 0x38a6003, 0x753b54c, 0x4230a61, 0xa614125, 0x559fece, 0x23ac6eb, + 0x3865c23, 0x9816b60, 0x543a570, 0x567014e } + }, + { + { 0xdd2b71f, 0xd46091d, 0x97d24ff, 0x3999a5d, 0x1ecff3c, 0xce2a4f1, + 0x581c6f0, 0xab2687c, 0xcba70b4, 0xa9fb2eb, 0x42093e1, 0x6fde356, + 0xaee724a, 0x00253ec, 0x2b81bdd, 0xa08ce3c }, + { 0x935a2b3, 0xa251238, 0x584f750, 0x8cae1d4, 0x988a219, 0x011469e, + 0x5a6a50e, 0x61f7ed3, 0x01fcebd, 0xe13ebaa, 0x31d8867, 0x794b976, + 0xcda32e7, 0xf25755c, 0x4564cd1, 0x368a97b } + }, + { + { 0xaa3397b, 0x0d22224, 0x38066db, 0x1dbb3e6, 0x0ce8e32, 0xfe0b5ee, + 0x7bab4dc, 0x09c17c8, 0xf188b64, 0x5cc65dd, 0x211b5fa, 0x74c4abf, + 0xab0ba86, 0xdcc17b7, 0xa535501, 0xfbdf46f }, + { 0xaca569e, 0x4775087, 0x06a1718, 0x6575f90, 0xb94de93, 0xb5c45a9, + 0x8497171, 0x0fc8006, 0x489f7ab, 0x775d965, 0xf5c0c89, 0x8775b58, + 0x1a06254, 0x05d4e20, 0xb6d73a5, 0x8cab349 } + }, + { + { 0x39465b0, 0xca78163, 0x14498fd, 0x3ef9148, 0x6255c11, 0x9ca1f34, + 0xb7f38f1, 0x389fd15, 0x354b8f3, 0xdac2089, 0xa840a70, 0x82d07fc, + 0x1dd483a, 0xf53fd73, 0x1590578, 0xa6e4eae }, + { 0x3c01b77, 0x7bf65af, 0xa75c982, 0x27542f3, 0x716cfce, 0xc5bd947, + 0x884b9e7, 0xba5fe76, 0xd55725d, 0x39bae14, 0xfae0eab, 0x982f64e, + 0x7a5293a, 0xcfae662, 0xd60f464, 0x22a25a1 } + }, + { + { 0x7dd5e16, 0x74caecc, 0xce7bca3, 0x23678a2, 0x57f1ba1, 0x4673932, + 0xa4c1697, 0x4eb9948, 0xeaba18d, 0x5d400e8, 0x9807871, 0x128d1c8, + 0xbff38a6, 0x78f9627, 0xa39d4cc, 0xf80b813 }, + { 0x31d3aad, 0x8aeefa0, 0x27db664, 0x5042199, 0x4cb6383, 0x244fc69, + 0x72192a3, 0x3190477, 0xbbfb57b, 0xcc86075, 0x4451511, 0xbae3a13, + 0xf6174f0, 0x16cf416, 0xd376813, 0xb343cc0 } + }, + { + { 0xd1824b7, 0x31ac9b9, 0xec8f61a, 0x6282260, 0xc781765, 0xbbeb9f8, + 0x2d110da, 0x06ab5c0, 0x47146b8, 0xd583e22, 0x4100d05, 0x79a1608, + 0xf0a5c95, 0x16dbbb4, 0xe331667, 0xfe2af1d }, + { 0xaf8710e, 0x26f0364, 0xeec08fe, 0x1cb8c91, 0x1d95e9f, 0x436bce6, + 0x57944a0, 0xfe9050c, 0x07b626b, 0x5f45acf, 0x9cf1276, 0x48dc93f, + 0xa05bfb7, 0x4491371, 0x4bcf785, 0x5106304 } + }, +}, +{ + { + { 0xed0b3b6, 0xac2e294, 0x671637b, 0x5c5ade6, 0x1140677, 0x2f289ce, + 0x754eb53, 0xaf446e2, 0x20421ad, 0x70911b7, 0xe0b7556, 0x4b73836, + 0x2a97827, 0xcadf104, 0x8005bc6, 0x4824e49 }, + { 0x937c28a, 0xb0eeccd, 0x0c3ee97, 0x1ce061d, 0x9f33faa, 0xcb07631, + 0xaea66dc, 0x9980bf4, 0xd111d98, 0x2bd0755, 0x7fe4de0, 0x43feaf6, + 0xb077b2f, 0xe76fb80, 0x5793b04, 0x227dc9f } + }, + { + { 0x14f49ba, 0xea24ae5, 0x11436e7, 0xbc39ea6, 0x78485d8, 0x9d7fed2, + 0xdf8b131, 0xb6ef00c, 0xfdbc7af, 0x0237b4b, 0x64ccd27, 0x08745b5, + 0xafc5a76, 0xaf8595d, 0x29f5500, 0x43657af }, + { 0x48470f8, 0x3007183, 0x640fd53, 0x51f91fd, 0xbe15512, 0x859c807, + 0xab3e9c5, 0x7d1a474, 0x81553e5, 0x5d714d9, 0x6f62310, 0x0757343, + 0x6b02a62, 0xedc5be0, 0xea47832, 0x5a4b9b7 } + }, + { + { 0xe93dbb3, 0x03e0a24, 0xcadc884, 0x25841dc, 0x8d10ad5, 0xabc1a81, + 0x2042ddd, 0x207e38a, 0xfeba8d8, 0x7fffbdb, 0xa3ec9b5, 0x74efebb, + 0x0b40a9f, 0x0bc39ca, 0x0267feb, 0x69ee9c9 }, + { 0xbc62919, 0xd402fac, 0x1cf53c6, 0xe9f8fc1, 0x7cc7d81, 0xe76fa5a, + 0x96bb19d, 0x4f2d876, 0xadc67c7, 0xd4fb7f9, 0x96702dc, 0x40621d5, + 0x438f6c5, 0x5b6a98e, 0xf1a1036, 0xa7c64de } + }, + { + { 0x9a092c7, 0x84c5e80, 0x11c22b7, 0x9e40e0a, 0xd06c99b, 0x820a091, + 0xeecca8f, 0x45fdc77, 0x5794f16, 0xfe1b8a3, 0x4ce3d6d, 0x31f7e5b, + 0x82c74c8, 0xfd5e010, 0xc1f6f7d, 0xfdabf30 }, + { 0xb9248a0, 0xbfa6017, 0x546b941, 0xe898d30, 0x207ff65, 0x878c492, + 0xb874e64, 0xbf22e8d, 0x53a547e, 0x43fdb1b, 0x5fbd464, 0xb66deda, + 0xc7ae1b5, 0x59127a6, 0x6a7515a, 0xa463646 } + }, + { + { 0xde9ab2e, 0x22c4e66, 0x0203c58, 0xfaf60c2, 0x0d5c5ed, 0xed2d7bf, + 0x4ca0f19, 0xdbc16fe, 0x465b979, 0x54e8ef6, 0xa310ef9, 0xe2d64b1, + 0x3778636, 0xa0f2c95, 0x281883b, 0xf3b4aa4 }, + { 0x9be6629, 0x4ac9af0, 0x1ca90c5, 0xba455e1, 0x856f492, 0x0147538, + 0xabd7840, 0xc80db7e, 0x6beb9cd, 0xb3526d9, 0x9d81503, 0x37657fb, + 0x193cec3, 0x8729a16, 0xd69952a, 0xd9a93fb } + }, + { + { 0x94f47c6, 0xfce0175, 0xe366d05, 0x228da21, 0xdc8baf3, 0x27ce0b2, + 0x6b4a951, 0x8cc660b, 0x384bb01, 0xf678947, 0x44d980c, 0xc629d7d, + 0xe85e81f, 0x47980e4, 0x1cd723e, 0xa2e636a }, + { 0x77fb207, 0x6b6ebae, 0x4c92891, 0x7017961, 0xb4d279c, 0x5569541, + 0x41758cb, 0xbb6b36a, 0x27a8e30, 0xecaa222, 0xb470ad9, 0x8b6746a, + 0x63e2d3d, 0x4c46017, 0xd3edaec, 0xe19c4ed } + }, + { + { 0x34718c8, 0x0b43fec, 0xf33499f, 0x553c407, 0x970d1db, 0x8272efb, + 0xa8e8d1c, 0x008c62c, 0x63eec45, 0xe4b79d7, 0xf2d71a3, 0x1fd4230, + 0xa368c36, 0x090fdaf, 0xfca7baa, 0xf62c101 }, + { 0xd2395b3, 0x1c9e6c8, 0x04c5513, 0x671ed63, 0x299a465, 0x577d933, + 0x63f9986, 0x286890e, 0xbfc979c, 0xd92a95d, 0x2b51019, 0xcebd79d, + 0x3d07251, 0xe74d88b, 0x906f9ad, 0x8b6db73 } + }, + { + { 0x7b3d90c, 0xc0c43db, 0x4304a06, 0x85d154e, 0xaf2f38e, 0xe8aceef, + 0x83d9459, 0x5e04293, 0x431afd1, 0x65e5e32, 0xa900a65, 0x9e5f050, + 0x8a26671, 0xcbaa171, 0x9c93de7, 0x33d0b24 }, + { 0xd5b6680, 0x3dcbf92, 0x20006f9, 0xc47e5ec, 0x9a51924, 0xc971129, + 0xcd0ed46, 0x665d9b8, 0xa5fcab6, 0xed2d63f, 0xcfbfc5a, 0xa817eb6, + 0xb76eb76, 0xb38169f, 0xf11160b, 0x8b93544 } + }, +}, +{ + { + { 0x693bdcd, 0x02eca52, 0x2ae01d6, 0xbbf0923, 0x8b44b3e, 0x0b0a2de, + 0xb250dff, 0xdb82449, 0x6e1c530, 0x0c42b86, 0xa64c2c4, 0xcd226dc, + 0xf046b5f, 0xcfb2bb1, 0x3fccb0d, 0x97e2fae }, + { 0x45ed156, 0xdf92907, 0xf641229, 0x224dcb9, 0x5f1f67e, 0x2126abc, + 0xe9c8a6b, 0xa7eed5a, 0x9857d9b, 0x40abedc, 0xde941c6, 0x3f9c7f6, + 0xd725ddf, 0x2158d42, 0x8c69543, 0xbdd1015 } + }, + { + { 0x8df2fbc, 0xa7dd24e, 0x13d1aee, 0x3adbcfd, 0x13b2177, 0xf6a32d1, + 0x7a9a14c, 0x89a7232, 0xdc65df9, 0xe3aef43, 0xa64d74c, 0xeaec3e3, + 0x4fec33b, 0x4d387d8, 0x21a2128, 0xaba2a05 }, + { 0x6b85e30, 0x2382c22, 0xcd2aad3, 0x4352d85, 0xd9772c4, 0xb0c6001, + 0x5f3653f, 0x7ed8263, 0x0300f47, 0x3626a6f, 0x6ca7e4e, 0x23909de, + 0xc154141, 0xb43dd81, 0x7e4bc68, 0x9a49fad } + }, + { + { 0x2428f88, 0xa3661df, 0x56e0db2, 0xbe48b02, 0xce79aa9, 0x3cd1871, + 0x23dddac, 0x90ab871, 0x71871a6, 0x9c58fb9, 0xa34910e, 0xf031f7f, + 0x81060e4, 0xb501eea, 0x791224e, 0xdb668ba }, + { 0x6a705bc, 0x240bbcb, 0x2d1865e, 0x7e76fbd, 0x2513641, 0x6e2cd02, + 0x46365c9, 0xe6c5225, 0xa5a01fb, 0xe46a8b8, 0xb67618b, 0x696fa7b, + 0x0db6792, 0x418b3b9, 0x7108b9c, 0x7204acd } + }, + { + { 0x8456b45, 0xb5a143b, 0xf53b4d9, 0x8a3ab25, 0xe13a570, 0xb112a58, + 0x81487d2, 0x613ca32, 0x3b1e7c9, 0x837d823, 0xd41e9d5, 0x592bade, + 0x5cd02f2, 0xdc1893a, 0x8972e23, 0x0879502 }, + { 0xcb76261, 0x7003c08, 0x332a5e0, 0x14bde9e, 0xcbbd78e, 0x14b2872, + 0xde238e8, 0x5594061, 0x067466c, 0xad12645, 0xf5e4952, 0xa8d0e64, + 0xc7f8d06, 0x5b44b82, 0xfb1b828, 0xb51bea8 } + }, + { + { 0x3f0dacc, 0xebad685, 0x1cbebbc, 0x5c31b8b, 0xfa5a2dc, 0x6746975, + 0x31d9faa, 0x2d95965, 0x00fc0e4, 0x343797d, 0x55fe01b, 0x38d821c, + 0x7323aa0, 0x0bfdb24, 0xf962a8e, 0x42613c4 }, + { 0xe134bc0, 0x599a211, 0x47a7084, 0x75fa4a1, 0x7f734b5, 0x6e71948, + 0x6dfca2b, 0xd5ced2d, 0x8aeabd2, 0x9fa0fdc, 0x12361da, 0x5e6b03f, + 0x5859fcf, 0xad23d31, 0x25a5fc8, 0x3120ef1 } + }, + { + { 0x8e9f638, 0x990ef62, 0x626a60c, 0xfdaa240, 0x2abddab, 0x4a3de20, + 0xd8872b2, 0xd5d10b7, 0x1ea5880, 0xa01b730, 0xa81b9d8, 0x481697f, + 0x3471ed8, 0x2984153, 0x292d37c, 0xefd73f8 }, + { 0x9994beb, 0xdda7626, 0x6a4f865, 0xa037703, 0xe5b47d5, 0xda992ec, + 0xe53edba, 0x912a427, 0x9264e45, 0x6467598, 0xaf71222, 0xd3b68c3, + 0x6dedc5f, 0x9d3436c, 0x076b2ad, 0x1e027af } + }, + { + { 0x4382f4a, 0xd56fca1, 0x8966b7b, 0x83712a4, 0xa4c9ddb, 0xd6b2cf5, + 0xf602875, 0xa66be29, 0x894f3d0, 0x70e4266, 0xb3195ca, 0x007d220, + 0x82c74d4, 0xba38d8f, 0xd975cbd, 0xdccc5fc }, + { 0xc88b38b, 0x03e1610, 0x52e0d8d, 0xeb9f9a1, 0xb646eb7, 0x6a57eca, + 0xc76b6c1, 0x161641f, 0xbd2e12b, 0xf9025ad, 0x5c0e26d, 0x87c74db, + 0xbfeca74, 0xed5cb51, 0xe34a08c, 0x603dfb6 } + }, + { + { 0xcb03307, 0xc4be728, 0xc2741cc, 0xde34c0e, 0xa74eb17, 0xe01db05, + 0x8905e4b, 0x1bfce0c, 0xd1b1826, 0xb18830a, 0xe87bbfb, 0xcacbb41, + 0xd2f1a79, 0x8696842, 0x08c83ea, 0xa80e5fb }, + { 0x3f1439c, 0xe48f163, 0xcd6987b, 0xc1d4108, 0xb751814, 0x05705c4, + 0xc1c622d, 0xa9bffd0, 0x46cd053, 0x23de4af, 0x39457c3, 0xf782f5e, + 0x5e5d243, 0x815276b, 0x6161ae3, 0x3132041 } + }, +}, +{ + { + { 0x77f2542, 0x2459661, 0x8372b25, 0x203be7e, 0xee2007b, 0xc7c9426, + 0x0621799, 0xc564138, 0xc28c3ce, 0xda56589, 0x7afc1e3, 0x13e8a7c, + 0xe352082, 0xdba81e9, 0x04435c7, 0xf430549 }, + { 0x691de4a, 0x4d26533, 0xfb777ab, 0x364408c, 0xeae7f88, 0xccdfb43, + 0xa525b11, 0xbc40f44, 0x3c60627, 0x8e112a5, 0xe17e696, 0x7f7c581, + 0x1ea774a, 0x0fd7878, 0x0b1f582, 0xd09e632 } + }, + { + { 0x70aab15, 0x44390bd, 0x889c3f2, 0x41112bc, 0xd685349, 0x6b02894, + 0x5584dfe, 0x7103001, 0x1ba7887, 0x373cb1b, 0x2a017c7, 0x53d286c, + 0x3c81fdc, 0x2ed0388, 0xfbcc6fc, 0x3bfc5e3 }, + { 0xfd6418d, 0xd38ac6f, 0xbfad89e, 0xc667e96, 0xeab4d66, 0x46f4f77, + 0x0911293, 0x194c04f, 0x68c48d5, 0x0fd09cf, 0x63cf7f4, 0x6f5b055, + 0xacd562f, 0x0c0a8c4, 0x36d965d, 0x94c1d83 } + }, + { + { 0xcaa127a, 0x94fc8f0, 0xd803690, 0xc762d5d, 0x1ebf0d3, 0x8bfdfd1, + 0x48eac50, 0xa98cdf2, 0x8b5ff10, 0x3d7365d, 0xc65b4de, 0x20dc29b, + 0x8ec7c68, 0x62ac28e, 0x90372d2, 0x7f5a132 }, + { 0x3246658, 0xf3d8a25, 0x9ac202a, 0xa4bebd3, 0x5cc1697, 0x078ede7, + 0xc8fc022, 0x5525800, 0x5fae77b, 0x302a802, 0x57917b6, 0x0180139, + 0x864bf55, 0x7c8806d, 0x12f06f1, 0x4e2d878 } + }, + { + { 0x3d66e88, 0x8d35118, 0xa91d02a, 0xfb861a1, 0x7850e5f, 0x8c27c2a, + 0xa5496f6, 0x9fd6399, 0x8080049, 0x52152ae, 0xfd1c2dc, 0x600e2ff, + 0xffe8b2e, 0xc75902a, 0xe03b175, 0x5c4d2cc }, + { 0x4f57e78, 0x8ad7c42, 0x1736f87, 0x77cf606, 0xf85038a, 0x2876012, + 0x1b97b95, 0xff32845, 0x392dfc8, 0x3cc6dd5, 0xa6f5075, 0x72f1363, + 0x71de894, 0x028ec44, 0x6f45a86, 0x7030f2f } + }, + { + { 0x9695817, 0x66400f5, 0xf20ea36, 0xeda0a7d, 0xd394992, 0x855be51, + 0x8336f62, 0x2d082c1, 0xf28c868, 0x30944dd, 0x0dc86d0, 0xfb5f853, + 0x564a0bd, 0x9562ae5, 0xb6b9b51, 0x1f7ea12 }, + { 0xd0a7148, 0x5bd74e0, 0xb91e572, 0x6c8247f, 0x47da498, 0x699aba5, + 0x1f7c814, 0xed82581, 0x62057b9, 0x434674b, 0x15c15b4, 0x8b4df5e, + 0xb110081, 0x2a97da1, 0x4c417fe, 0x2a96b0c } + }, + { + { 0x237639d, 0x4f75dfc, 0x1db7029, 0xe5ad6bc, 0xb3d28f7, 0xd43e06e, + 0xe447989, 0x89f3bb5, 0x01a1a6e, 0xc426a2c, 0x315878f, 0x33ea71c, + 0xb1b5705, 0x8a7784a, 0x77ca811, 0xa59e86e }, + { 0x36ae155, 0xddb133c, 0x0d51b42, 0x49f1d4c, 0x9d05519, 0x5508082, + 0x5291816, 0x20e23be, 0x67181ec, 0x35047ec, 0x7aad091, 0x6237dc4, + 0xe2e25a2, 0xa1d3ce1, 0x0d3db4c, 0x1de0522 } + }, + { + { 0xd9fd423, 0xe9a5e19, 0x9801e43, 0x0c2c3d0, 0x28df2da, 0x043c2dd, + 0xe1ad12a, 0x4eecab4, 0x9615aa5, 0x97e1797, 0xca7bb5e, 0xe57b879, + 0xcc92619, 0xa2a903c, 0xaa56e93, 0x5cef370 }, + { 0x7f3232c, 0xbef29fa, 0x2b7ad5c, 0x1cf35ed, 0x3b6077a, 0x35c4893, + 0x7a1d47d, 0xe065148, 0xce14572, 0xedb4673, 0x0b17629, 0xdc9e98c, + 0x9a02a5c, 0xef98ebe, 0x11d03c0, 0x1f772e3 } + }, + { + { 0x4608f72, 0xcbdbdcd, 0x5a13c6f, 0xb435223, 0x4bb3c21, 0xa6497f6, + 0x12c15c9, 0x3af2383, 0x6322d11, 0xfbbf4b3, 0xc641775, 0x520a5c6, + 0xe81e0e1, 0x18cd967, 0x3de3871, 0x980b2c6 }, + { 0x9ae44a2, 0xfa9db61, 0x176bc56, 0x0281dd2, 0x8a7f817, 0xfd03711, + 0x4129b30, 0x9c48545, 0x039626d, 0xb439648, 0xe4ada6b, 0x355050e, + 0x7f5d98c, 0xc9c16d6, 0x18c4d5e, 0xf53ccc3 } + }, +}, +{ + { + { 0x3ffb20b, 0x50ae942, 0x6865eb4, 0xa6c0b42, 0x09930f1, 0x4677f7d, + 0x4a16427, 0x742e0b6, 0xf976f9a, 0x521d18e, 0xa454749, 0x43ac9cf, + 0xc51f50d, 0xda3a91d, 0xad6f954, 0xf657029 }, + { 0x6b4f99a, 0xfe5f064, 0x63ad4ce, 0xd92a5d9, 0x2e0e081, 0xfcb5509, + 0x8d8a858, 0xadc85ab, 0x0632f0f, 0x8e9b966, 0x8d7216d, 0xe7a4f16, + 0x59c3b99, 0x00a4cc5, 0xba09dc1, 0xed6d0bd } + }, + { + { 0x1621beb, 0x7236d14, 0xbc7ca95, 0x1751fd4, 0x2f5319c, 0xaa619d1, + 0x4e9316f, 0xfc2b15b, 0x9fd4d33, 0x2d1a906, 0x8ced829, 0x28c3bac, + 0x1dd998f, 0xf2efab5, 0x3b149ed, 0x2c13330 }, + { 0xf601ac6, 0x65237c9, 0x07d6a45, 0xb54dd65, 0xfb1a4cf, 0xa1ce391, + 0x115f67e, 0x2957533, 0x465279b, 0x6456da8, 0xa993e02, 0x02890aa, + 0xb7175e4, 0x6891853, 0x0f3e59b, 0x3fda203 } + }, + { + { 0xd8c6e0b, 0xe99fe12, 0x5341c56, 0x7cb07ff, 0xdf77b24, 0xc292c7b, + 0xca29906, 0xf52dfd0, 0x772f02c, 0x4a6aa26, 0xe1bbd09, 0x26f7684, + 0xee7c2a8, 0xec56b2b, 0xad4a312, 0x67709e6 }, + { 0xc570263, 0x99c57b2, 0x2faafae, 0xeb0100b, 0xff25eca, 0x980d5d1, + 0x82cf936, 0xace35e6, 0x44679ed, 0x5a82ce5, 0x074b81e, 0x5c76a41, + 0xa00abb1, 0xf36fa43, 0x04ffb2d, 0x0642819 } + }, + { + { 0x04bdd28, 0x68f6bc8, 0xb5dc7ad, 0xc311d96, 0xed32e45, 0xff0d646, + 0xe0f712d, 0xaf3cdc6, 0xd483861, 0xd4508e9, 0x0e1c277, 0xb624be5, + 0xc5dd841, 0xc510275, 0x298dc02, 0x451c5c3 }, + { 0xdd34d6b, 0xf87d479, 0xdd06a38, 0xda7f293, 0xb699e9f, 0x575e129, + 0x215b2cc, 0x79e5fb2, 0x657e690, 0xd280028, 0xe702a71, 0x7fecd09, + 0xfa13677, 0x85160ab, 0xce65f64, 0x5de3427 } + }, + { + { 0xe8fff38, 0x84e4bf6, 0xb358b1c, 0x16f3725, 0x3b472a5, 0x360371c, + 0x52f217a, 0xe64c061, 0x0501241, 0x8e67379, 0xab2dd96, 0x88e81d6, + 0x1385604, 0xf3e218a, 0xe84184d, 0x9736caf }, + { 0xdbb93a3, 0xb55a043, 0x9301088, 0x335088f, 0xb2a4959, 0xcea7a2d, + 0xb882c33, 0x48e5d4a, 0xad46179, 0x114f09b, 0xb446576, 0x4416467, + 0x34c6c2f, 0x01cb23e, 0xa02db8a, 0xddebf04 } + }, + { + { 0x9bde8a1, 0x36d60cc, 0x676e4ad, 0x20fd2f2, 0x8936581, 0xebdcfb7, + 0xdbfc2c3, 0x245d0d5, 0xa9f82e5, 0x104c62c, 0xd654d9b, 0x7387457, + 0xae7f10e, 0xe966777, 0x1d8e582, 0xefeb16f }, + { 0x70364b5, 0x4faf4f1, 0xd612472, 0x0e1ab58, 0xfed6085, 0x11bbfe7, + 0xa59a09a, 0xb360a14, 0x722fdb6, 0x61d96e9, 0x94068bd, 0x16a12f1, + 0xf73c2be, 0x225bf07, 0xc8bd24e, 0x1e64665 } + }, + { + { 0x3698c75, 0x27a478a, 0x6202aa2, 0x778ccd3, 0x8d87f1f, 0x0149c63, + 0x784edae, 0xa660e5f, 0x82adfa8, 0xe0d4d2f, 0x1ba1f9d, 0xf512dd6, + 0x6245c58, 0x90cfed9, 0x18b53dd, 0x6c3a548 }, + { 0xbdc094f, 0x833f70c, 0xb1514e7, 0xa5f26f5, 0x1c8cf13, 0x93e7cf5, + 0x186ec43, 0x1436601, 0xe78170a, 0x81924ac, 0x8694368, 0xcc880a0, + 0x0b62cbb, 0x2dfa955, 0x96b4a2c, 0x0bc6aa4 } + }, + { + { 0x3561aa2, 0x5157a7e, 0x8645c1e, 0x525c500, 0xce7cbb3, 0x22feb4e, + 0xc89a58b, 0x36d0d25, 0xc9bde9c, 0x43131f7, 0x881f731, 0x74afdda, + 0x7c8e36a, 0x99ab87c, 0xc1d4fb2, 0xf07a476 }, + { 0xbebc606, 0x1b82056, 0xfcf089f, 0x95a1e5a, 0x2b55d5c, 0xc5bccfa, + 0x00eb0b1, 0x8fbc18e, 0x9efb483, 0x93a06fe, 0x2d74c57, 0xcafd725, + 0x3de4350, 0xc7518f0, 0xc6fd762, 0x9a719bf } + }, +}, +{ + { + { 0x2362087, 0x5ee0d83, 0x0b167e8, 0x7f2c0d7, 0x5e0e865, 0xb732789, + 0x98c4e65, 0xef5b2e8, 0x8fe9cc1, 0x222797d, 0x82d1e15, 0xfe6d73e, + 0xf62dc4b, 0xc7c0e9c, 0x937ceda, 0x962acfe }, + { 0xc1e85c7, 0xd763711, 0x2836978, 0x8f2dbbc, 0x8c44e98, 0xbadc055, + 0xa3e93f8, 0xed63eab, 0x41b55c7, 0x807e857, 0x6d1207b, 0xd51ae5e, + 0x39d541b, 0xa0ef9a6, 0xa0c56a5, 0x58855f9 } + }, + { + { 0x213091d, 0x7d88eaa, 0x45b6a0d, 0xcbdfee7, 0x4f5e077, 0x826a012, + 0x90f1e4c, 0xb04fc13, 0xaea69aa, 0x1961ac3, 0xd5bb63e, 0x3afb719, + 0x4ac7e5c, 0x2a37837, 0xc50ca45, 0x78efcc1 }, + { 0xb8abdef, 0x346e8f0, 0x88095d0, 0x27e3dbd, 0xffc6c22, 0x56d3379, + 0xfa4b291, 0x67d416c, 0x3b1b373, 0xc3baaf6, 0xdf73bae, 0x0184e1f, + 0x9167528, 0x38ae8f7, 0x35d6297, 0x7329d4c } + }, + { + { 0xf568c52, 0x45d2ac9, 0x9808593, 0x5134814, 0x31b7ed8, 0x0c92d83, + 0x0876ecd, 0x921327a, 0x052736a, 0xf752d75, 0xbc6b837, 0x7b56487, + 0xa23b4cc, 0x6b1a320, 0xec0d665, 0x1983937 }, + { 0x08554ab, 0x2c3017c, 0x366e87f, 0x40ad955, 0x8ed7f02, 0x88c4edf, + 0x3cc5e6d, 0x64a7db1, 0x2dc978b, 0x5ac91fa, 0x925d2a2, 0x016a20d, + 0xabb57b4, 0x3604dfe, 0xd7e2e85, 0xc3683ec } + }, + { + { 0x4c0c6d0, 0xc47150a, 0xe22adcf, 0x30af45e, 0x022ea4b, 0x39b5acb, + 0x77203b5, 0xfbe3185, 0x6fd9b59, 0xe5aaa34, 0xdd1c8dc, 0x0062c90, + 0x54049ac, 0xcf113f3, 0x63a31b5, 0xd8fba4d }, + { 0x1056a69, 0x73b5488, 0xd780bda, 0x3be6cbc, 0x30ba2b9, 0x5776ec2, + 0x8e8d6f7, 0xbe883cf, 0x5c2be6f, 0x64efe94, 0xf1ade8d, 0x064f704, + 0x743110e, 0x41cfd17, 0x4c20abe, 0xaac9411 } + }, + { + { 0xf1c1468, 0x91f9192, 0x4563e13, 0x8176e74, 0x0bda15d, 0xa48b5f9, + 0xda42af6, 0x2a085ae, 0x425c018, 0xfd38ab2, 0x08abafb, 0x2884ba4, + 0xcbd091d, 0x356f318, 0x817871b, 0x454e450 }, + { 0x8ada531, 0xe080e81, 0x3152ba8, 0xa40f1eb, 0x0c38eb1, 0x051049f, + 0xbd45003, 0x37e4bb3, 0x54a01e5, 0x6d09804, 0xeeb824a, 0x6de932f, + 0xdc93481, 0xccdef37, 0x93a05e8, 0x8633e07 } + }, + { + { 0x034675c, 0xbe94256, 0x08db789, 0x376c01d, 0x9af1b6b, 0x8707ee7, + 0x11bfbac, 0x633b3ef, 0xd06db60, 0x694f33f, 0xbb13407, 0x2a68bfc, + 0xda27c3a, 0x1c860c9, 0xd701ac3, 0xbca16de }, + { 0xc59ffd0, 0x2b76cfa, 0x54d718d, 0xf9a1165, 0x67f0878, 0xf86a1db, + 0xaf34e85, 0xe313e05, 0x3343159, 0xa188811, 0x0bb7ed1, 0xdbe4c3f, + 0x0c732bc, 0x73b67e8, 0xe74110e, 0xa4e1c87 } + }, + { + { 0x5c6770c, 0xce1106b, 0x5c0bcb7, 0x422c70b, 0x8195e7f, 0x32a3990, + 0x1ccd4aa, 0xa24968d, 0x720e557, 0x8f08ecf, 0x54bcc81, 0x5da10a4, + 0x6cd846e, 0x9d3c73b, 0x368d065, 0xaeb12c7 }, + { 0xcf9fd1b, 0x2110859, 0xee2bd6d, 0xd2a4801, 0xe9466ac, 0x376e556, + 0x3b5aa35, 0x767803b, 0xb8a89ba, 0x343f842, 0x6726bbf, 0x3263cc1, + 0x25871b0, 0x26caf17, 0x41b8578, 0xef66ad6 } + }, + { + { 0x638068c, 0xc9f2249, 0x1ccf9af, 0x96d282c, 0x69b435a, 0x71df30c, + 0xcb9d5c9, 0x88c943a, 0x2a8f378, 0xbf98ef1, 0x114c6ff, 0xffc1824, + 0xd52e8c7, 0xda3ad2c, 0x1afcb59, 0xf1222bc }, + { 0x0ee334a, 0x459e94b, 0x421933a, 0xd4477b8, 0xa1e401e, 0x60fb7b0, + 0x0d1e330, 0xfde6e82, 0x3233fde, 0xcecfe9b, 0x2e93523, 0x09ec466, + 0x30775b9, 0xa5ba649, 0xadf80f2, 0xcc397e5 } + }, +}, +{ + { + { 0x4ddc8a8, 0x2fe182d, 0xac056bf, 0x88d6e79, 0x0e41e4e, 0xc3ff2d1, + 0x2c3679f, 0x32ec7f9, 0x4e61051, 0x3561f09, 0x6c6250a, 0x4553f5a, + 0xdd25c5b, 0x2b765ef, 0x6a1cd7f, 0xe3a40a2 }, + { 0x5d821dd, 0xb27309b, 0xc2c17ca, 0x950fb8d, 0x8fb0d4c, 0xfeed015, + 0xf550179, 0x762c479, 0xe095840, 0x306cf44, 0xd379e66, 0x84b413a, + 0xbb2e4f1, 0xd6e5d5a, 0x94b085d, 0x8bc12b7 } + }, + { + { 0x04b5532, 0xc0d4cb8, 0xb9940a6, 0x7a31525, 0x68c69d1, 0x010e7dd, + 0x2a18c35, 0xd81f29d, 0x3f11e73, 0x08ae770, 0x6e55106, 0x5358f87, + 0xc960ef5, 0x299e8ca, 0xacfc8dc, 0x89a6fb4 }, + { 0x6dc7d4a, 0x5996a40, 0xe51b96e, 0x21e5112, 0x09a202b, 0x95b8c3d, + 0xd441f1f, 0x306ab0f, 0x98d4245, 0x2834fed, 0xd0abbde, 0xc29c387, + 0xb805c15, 0xf6a9bf1, 0xc4e458d, 0x602f4f8 } + }, + { + { 0xe5a893a, 0xf041486, 0x8934327, 0x53b891d, 0x4000758, 0x11e000d, + 0x662bad9, 0xa4ccde8, 0xb9a1b64, 0xe34d3ed, 0x84e7a6d, 0x72d9675, + 0x6627be4, 0x773da2f, 0xe835ae3, 0xa11c946 }, + { 0x650bc15, 0x02e8203, 0xe58b78d, 0x2d35936, 0xf21a3cc, 0xe9cfbe8, + 0x1049222, 0x55ad831, 0x38fff47, 0xbf99de4, 0x3831db5, 0xebbfd80, + 0xaf2af42, 0xe990636, 0xb7f5a0e, 0xc26ae52 } + }, + { + { 0xfa8f846, 0xb5d85b1, 0xb3b1455, 0x4166489, 0xd36a305, 0x768260d, + 0x4ff5645, 0xc6a8235, 0xd6e93e5, 0xd241cd8, 0xa406e74, 0xeed9aa1, + 0x5f600d9, 0x9e96ab0, 0x6eca2a1, 0xa26b8b5 }, + { 0xd705aef, 0x78321cf, 0xc0161ec, 0xc4fb6b3, 0x5199cf1, 0xdc32441, + 0xd0a5067, 0x33627d0, 0x15143ee, 0x13490cb, 0x85b4f44, 0x77e0ede, + 0x394b165, 0x904f12e, 0xefab32d, 0x90f50f5 } + }, + { + { 0xbc2de96, 0x4aa0a16, 0xaa9c12b, 0x172596a, 0x60e8a29, 0xd512e1e, + 0xf637e83, 0x77d35c1, 0xd2aae0b, 0xbb0d141, 0x8c03738, 0x8a878a5, + 0xab0e525, 0x6d24c01, 0xf760887, 0xb7d3136 }, + { 0x3f91b7c, 0xdbc3f8f, 0xa8722c0, 0xe7b4bca, 0xda0ae65, 0x3286a91, + 0x225b084, 0x8372274, 0xae1886c, 0x5884cd5, 0x3a23cf7, 0xb4e63ef, + 0xf2dd0da, 0xfe5f202, 0x653916c, 0x951fac9 } + }, + { + { 0x854fa4e, 0x05e2e8f, 0x1edaf10, 0xf411f94, 0xa0a928d, 0x26cc562, + 0x4abce65, 0x78fd34e, 0x98a32e2, 0x1d87609, 0x4c37518, 0x85dc76f, + 0x00e8021, 0xdcaeef5, 0x4e9b2a5, 0x7fcb2f8 }, + { 0xf382c06, 0x9eba91e, 0x24cae53, 0x2052e85, 0xf5c1519, 0x617336e, + 0xb4e632b, 0xf1546d5, 0xd7b8ffd, 0xa9edc81, 0x29ab68c, 0xdb2914f, + 0xdebbaba, 0xe805070, 0xc3b719e, 0x775e53b } + }, + { + { 0x065256a, 0xa40e294, 0x8fb031a, 0x9f11386, 0x059667c, 0xac03af8, + 0x0475f58, 0x432eb3a, 0x01faad0, 0x22332bf, 0xbc57a11, 0xc8132e9, + 0x3bc3f8b, 0x27d5a17, 0x930bf3e, 0x5471fc6 }, + { 0xe6bff40, 0xba28bc0, 0x555e564, 0x198d57e, 0x9c65b8f, 0x13ce831, + 0x5681b51, 0xb0a5c9d, 0xdeb9e11, 0x467588b, 0xbb4250b, 0xf1891a7, + 0xd12b433, 0x10b938b, 0x24dcda4, 0x0b8c802 } + }, + { + { 0xcf332d3, 0xc428703, 0xf2a5b98, 0x9d0053c, 0x7838a15, 0x4e4c620, + 0xfbf8a43, 0x2e92919, 0x21cd9a5, 0x39ad524, 0x1561588, 0x584ed6c, + 0x17a95c8, 0x20af305, 0xb70e1c8, 0xa223077 }, + { 0x2fa4871, 0x679cfea, 0xac633c7, 0x54f2a46, 0x4cdc5f1, 0x6030651, + 0x75a1dc7, 0xc4facda, 0x2d07d19, 0x710a288, 0x6b44992, 0xd55864e, + 0x454c5b2, 0x44d4b6c, 0x72f9981, 0x2855d28 } + }, +}, +{ + { + { 0xc7b0674, 0x4071b3e, 0xf8794d5, 0x800eb14, 0xbe6783e, 0x70573af, + 0x7785901, 0xafaa440, 0x405f32c, 0x112d2a1, 0x169b3e2, 0x3761a52, + 0x842a366, 0xe168b31, 0x9bf4734, 0x5bc322f }, + { 0x976c4a0, 0x36ef240, 0xfea4e64, 0x066f3d6, 0xa989e57, 0x0e954bd, + 0xf9466e4, 0xe36ef5e, 0xbeb9226, 0x6bb615a, 0x3d5a2ca, 0x5571e5f, + 0x4897a86, 0xa86efe2, 0x28a9f77, 0xed7e9cf } + }, + { + { 0x1f82c68, 0xdf10c97, 0x3b597e6, 0x796ba1e, 0xe718cbf, 0x1ac77ec, + 0x410eac8, 0xc8175bb, 0xbc555ef, 0x0cdf9a1, 0x7524e05, 0x6b889f1, + 0xae26d82, 0x6bf1e61, 0xd2e97d9, 0xb3f6ad5 }, + { 0xf226487, 0x94dcff9, 0xbe03dde, 0x60e6356, 0x6a3dd7d, 0xda1f93b, + 0x79ca90c, 0xf1be721, 0x1e6bce5, 0x05ed313, 0xd48af3e, 0xcf50908, + 0x61e554f, 0x3b0e85c, 0xa2778d3, 0xfe7e35b } + }, + { + { 0x75ac5a9, 0x42c5032, 0xda062c2, 0xa66a66d, 0xcaa7023, 0xa4f4f82, + 0x64b4f86, 0x489d476, 0x97311ad, 0x10b1088, 0x177b2ec, 0x55dd637, + 0x9a267b1, 0xa5ccff0, 0xff327b0, 0xf07690b }, + { 0x2250cd2, 0x39162ed, 0x8b255f1, 0x1426de0, 0x1bdd731, 0xf227afd, + 0xfa4c844, 0x78f8a36, 0x157379c, 0x267a211, 0xcc04acb, 0x3f05f92, + 0xfc69cae, 0x374496c, 0x16ebfec, 0xbf2c5d0 } + }, + { + { 0xd0518d1, 0x605418b, 0x9e1cbc6, 0x3237f80, 0x286c019, 0x37a7005, + 0xb15af0b, 0xf1fb0e0, 0xaa853c0, 0xfc3b97c, 0xe6beba2, 0x1f48bd0, + 0xe6a72f1, 0x8e5d7c5, 0x26ebf0c, 0x575e66d }, + { 0x62eae3d, 0x0994776, 0x96c9c65, 0x53f074f, 0xb81bade, 0x6cfbfdb, + 0x3fed7d1, 0x98b4efe, 0x38c3382, 0xdaa1123, 0x47b8ec6, 0xdf88b73, + 0x9504a4f, 0x9b0fe4b, 0xf30c1c3, 0x2e7df4c } + }, + { + { 0x2fc1833, 0x25380cb, 0x18d62de, 0xb8e248c, 0xd82f9db, 0x91c8f59, + 0x2444750, 0x5ec2b20, 0x66b6f74, 0x3f3a1f7, 0xdd7d14d, 0x0180aa9, + 0x2956b9c, 0xd0a342d, 0x7139873, 0x26e910e }, + { 0x139e23d, 0x2261dc4, 0xb8343dd, 0x7edb181, 0xb4038dd, 0xfcf1073, + 0xa3bfea3, 0x88870ef, 0x64a263e, 0x4e98ba9, 0x70811f5, 0x3c6e5dc, + 0xf86055d, 0x17d28f5, 0x66e4199, 0xca9c276 } + }, + { + { 0x964ef8c, 0x0b2d8bd, 0x88e2ba6, 0x5a99b85, 0x04498ce, 0x9e927b2, + 0x756eb25, 0x9ff20c5, 0x3f27736, 0x97cc27b, 0x4729583, 0xf32dd6d, + 0x0381a94, 0xbdc2658, 0xef2c06f, 0x70fef15 }, + { 0x49252cc, 0x50a6191, 0x236b4b9, 0x9eb4a14, 0x8e00f78, 0x9b1b215, + 0x6ea9c23, 0x27add36, 0xc3a8e79, 0xef61763, 0xd82ce56, 0xed4542f, + 0x0caed75, 0xa8737e7, 0xd452d76, 0xeca0ac2 } + }, + { + { 0x3d082d0, 0x20c0779, 0xc9e9f3b, 0x6e3ce64, 0x75a195f, 0xb3a4dce, + 0xbdd9f24, 0x3a3c305, 0x8688942, 0xe2545c8, 0x080f32b, 0xa463c82, + 0x42686b8, 0x4429748, 0x7213866, 0xf50e20d }, + { 0x3826e74, 0x265ac52, 0x228e8ec, 0x26fba57, 0xe6b3ed8, 0x8a1e1db, + 0xf0fe65a, 0x7c7b278, 0xc395234, 0x9a6df23, 0x0b0f114, 0x9956206, + 0xef90837, 0x440c8c4, 0x3645f65, 0x21ad22a } + }, + { + { 0xedd31b2, 0x1e023a6, 0x9ff8668, 0xf76d145, 0x17b45c8, 0x9707056, + 0x1e88e37, 0x0612078, 0x922faac, 0x85c51c8, 0x22756d9, 0x4df392e, + 0xa03c98e, 0x8907fd0, 0x52ea51c, 0x626f46a }, + { 0x486c8a2, 0xf8f766a, 0x88ed18c, 0x8c499a2, 0x3c4f0de, 0x44d2dc6, + 0x6f2a0b6, 0x47dde68, 0x4a973fd, 0x9a655f8, 0x786ac80, 0x3e7124e, + 0xe8a0574, 0x699e61c, 0x31cdd0d, 0xdf0ba9a } + }, +}, +{ + { + { 0xd73e69b, 0x76270ad, 0xc67d38a, 0x991120f, 0x9469f0c, 0x7be5830, + 0x7db40ac, 0x93aba59, 0x822fc08, 0x2b707bc, 0x69551cd, 0x4199fc0, + 0xf367324, 0x38deed4, 0x2228787, 0xca518e1 }, + { 0xd9a9277, 0x72f1bef, 0xe49ae90, 0x57d4aab, 0xdb23478, 0x13810d5, + 0x9b4b77f, 0x2a8b780, 0x1b4e004, 0xb542f4e, 0x3ec77f0, 0x4080fd0, + 0xcec6596, 0xb49e9fe, 0x3f16037, 0x20338d3 } + }, + { + { 0x53554b0, 0x4adcdae, 0xe04c4db, 0xfea4906, 0x7748233, 0x0808bec, + 0x47148d7, 0xde7477c, 0x03da38c, 0xdd9124c, 0x25ee8e9, 0x6b25031, + 0xb0d6161, 0xae67399, 0x82203b6, 0x70c4acd }, + { 0xd31dae8, 0x9683916, 0x1ac7f69, 0x3477503, 0x988e4ad, 0x9553153, + 0x53a15e1, 0xb58f411, 0x92ba2dd, 0xb65a2d4, 0xa90169c, 0x7c3efb1, + 0x6b1747d, 0x210f45e, 0xcff488d, 0x16e8d1b } + }, + { + { 0x9d703db, 0x252adf8, 0xfdfeb39, 0x259ac1d, 0x115e806, 0x7faf6af, + 0xc1aff21, 0x7aaefd6, 0x7c0113d, 0x8054210, 0xe19b4b1, 0x481f1a5, + 0xfcc8c61, 0x7c17d43, 0xbb0bbbe, 0x8b04452 }, + { 0x4cebae1, 0xe51e5f5, 0x56a414c, 0x05341ba, 0x7fb8a30, 0x0083a2c, + 0x77f4952, 0xb4663f2, 0x4bb0074, 0xce72eec, 0xa3584d1, 0x74fdd66, + 0xb02e076, 0x6b9e58e, 0x3b961f4, 0x5be45d5 } + }, + { + { 0x1ab2e0b, 0xc7474f3, 0xf4bf454, 0x2838ccb, 0xf3c3eac, 0x634392e, + 0x137602b, 0x440e40a, 0xd1ae8e3, 0xeea67e9, 0x77e221e, 0xafdf93a, + 0x2719a10, 0x3c9f3da, 0x32c8256, 0x466ecef }, + { 0xf9c432f, 0x1061c19, 0xb1c7d98, 0xa1332d9, 0xa425c2c, 0xbc735f2, + 0x4b1bccb, 0x1429cdf, 0x6bbb5f9, 0x77b42a1, 0x5955ae4, 0x30078e3, + 0x21cc315, 0x8acd777, 0xe86fa99, 0xaa90d5f } + }, + { + { 0x721115a, 0xfcfd460, 0x08269b8, 0x6a7de3e, 0x96dd47e, 0xe5964a6, + 0x8dca975, 0x6717cd5, 0x98b149e, 0x7ea4ebe, 0xb7b8057, 0x6f894d5, + 0x7f30e31, 0xbd6f960, 0x23df092, 0x61ca453 }, + { 0x9d782f3, 0x32241f9, 0x2abfae2, 0x55173b0, 0xd15bbbd, 0x0abe0ed, + 0xb438abb, 0xb6d3c0a, 0x9ffa20b, 0x62fb467, 0xd31560a, 0x30926b5, + 0x2a0aa6d, 0x44bf27c, 0x1a4cb97, 0xf747313 } + }, + { + { 0xb0535de, 0xa2f6c0d, 0xc855166, 0xcb02ae1, 0xb3422f0, 0xc699e6b, + 0x281ba8a, 0x774febe, 0xffabcc7, 0x1d9d24f, 0xfe12ba5, 0x0b31ba1, + 0x13d0af7, 0x4c86803, 0x2f47160, 0x90640d3 }, + { 0x5876603, 0xa0c4bf4, 0x950ab08, 0x717f6fa, 0xa710de8, 0xf12bb53, + 0x6a88f50, 0xc500c61, 0x2645351, 0x0070f99, 0x2446893, 0x57aab5d, + 0xb68f657, 0xd553fa8, 0x693c55d, 0xe8537c1 } + }, + { + { 0x7fc7684, 0x58e86eb, 0xbfc73a9, 0xdf330f7, 0xcc11936, 0x41e337d, + 0x6e35759, 0x36d9200, 0x3500d8b, 0x0132703, 0x9483354, 0xfa68405, + 0x667851b, 0xc8f2980, 0x18296b0, 0x538ec89 }, + { 0xcff55f9, 0xa2a2c4f, 0x60d20bd, 0xb260d4d, 0xd9cc59f, 0x3ed576f, + 0xd514fcc, 0x4ed8c64, 0xc22b315, 0x37ebfb2, 0x94c212c, 0xca67a36, + 0x3a1795e, 0x4f8e08c, 0x4e7261f, 0x498f926 } + }, + { + { 0xc59b3d4, 0xfea7382, 0x3f2925f, 0xb9942ed, 0x8ea77e8, 0xe4b00dc, + 0x3cab02e, 0x74a18ec, 0xef16d0b, 0xbbbb752, 0xffab032, 0x639da4f, + 0x3aa30f0, 0xc371a4a, 0xcaa175b, 0x8e26b22 }, + { 0x7e2b62e, 0x94e4156, 0x25a794c, 0x7cceea6, 0x479f015, 0x931d2f4, + 0x90b25b2, 0x946183d, 0x68a2807, 0x1504e97, 0xfa49ddd, 0xa7577d3, + 0xdd48699, 0x24fc87e, 0x3d7d99c, 0x9edefd6 } + }, +}, +{ + { + { 0x0f0b450, 0x0508b34, 0xc36f7f4, 0xe0069a5, 0x2a5a761, 0x2655664, + 0x848e04d, 0x0193fd8, 0x73fe2e7, 0xc108cf5, 0xfd787d4, 0x05eb0ec, + 0xff28985, 0x1555ccb, 0x651b995, 0xb5af09f }, + { 0xe1134be, 0x167d72c, 0x57c669a, 0xd6d98bf, 0x6dd76fa, 0x40fb716, + 0x2a41b31, 0xeabbf20, 0x09b75b0, 0x300ff0e, 0xd9a0c1e, 0x32b6fad, + 0x65a80e0, 0x8051883, 0x32110fe, 0x8bef693 } + }, + { + { 0xbef47d4, 0x637802f, 0x2d16eaa, 0xfac114b, 0x0415644, 0x7b3f3ab, + 0x2dd895b, 0x17ab8d1, 0x87195f3, 0x271b7fe, 0xa71f65f, 0xa3f867e, + 0xc80583a, 0x39ba40c, 0x56e1fcc, 0x6db0672 }, + { 0x06662a8, 0x4feab4e, 0xc74bd46, 0xc857415, 0x732b126, 0x18032ed, + 0x7a099ea, 0x87c8aea, 0x36fe0a8, 0xb4a7535, 0x27673f6, 0x33a98da, + 0x2b8e549, 0x3e40c02, 0x9a4c587, 0x2def1af } + }, + { + { 0xa8c9ad9, 0x9618b68, 0x49defda, 0xd70b4aa, 0x5f788ef, 0xae8b138, + 0xdd523f4, 0x87c3542, 0x5c5b004, 0xe42c705, 0xfa7df57, 0x6303360, + 0x5f6d068, 0x33e27a7, 0x8ff331a, 0x9b3268e }, + { 0x23ee0c3, 0x845cc96, 0xac80084, 0x003af70, 0x530c41d, 0x6a9f931, + 0xbb127f0, 0xa1d7051, 0xca36245, 0x642ce05, 0x0323ee9, 0xc34205b, + 0xb7b3513, 0x7cc8912, 0x076cbdb, 0x6252cc8 } + }, + { + { 0x7089522, 0x10e68a0, 0x58fc658, 0x36c1361, 0x74723a4, 0x490397d, + 0x519d56c, 0x42692c0, 0xf1ff235, 0x69d251b, 0xc2cbf37, 0xe689d03, + 0x825b7f4, 0xf04ceba, 0x2281c2e, 0xd6b9bee }, + { 0xe0043ab, 0xc52ef3f, 0xd1d1be8, 0x351bf28, 0x0f18a5a, 0x277615f, + 0x5d6800f, 0x31f717f, 0xab922e2, 0xf5fb82d, 0x2d6ae43, 0x99aee2f, + 0xc63b982, 0x42477fe, 0xa594a01, 0x904aeb1 } + }, + { + { 0xeb39974, 0xaa82174, 0x95e6aa0, 0xbc38e61, 0x25c0675, 0x6a3df8a, + 0xffbe739, 0xf324203, 0xa3f0649, 0xfa5a0b4, 0x7a7a6b8, 0x79c8732, + 0x40ad3f5, 0xeb65ecd, 0xe4e45c5, 0x718d416 }, + { 0xe2326fd, 0x029dbf4, 0xe7942f0, 0x0c63416, 0x6f4e678, 0x6d0c728, + 0xa138601, 0x59f0b10, 0x8d92ea9, 0x8a1d978, 0xc22eca5, 0x9f8d712, + 0x7b6b96b, 0x7397044, 0xe6fb955, 0xa2d49ee } + }, + { + { 0xbf14a19, 0x249f900, 0x63a8cd2, 0xd3522da, 0x86964d2, 0x28a32f3, + 0xc1fa743, 0xacf712b, 0x0bb94d3, 0x98a9bfc, 0xbc06824, 0x318ece1, + 0x4fce7f0, 0xfc47675, 0xe4135b7, 0x19caec9 }, + { 0xc6817bb, 0x6de68a8, 0xf3b6d89, 0x7121960, 0xf5a818e, 0xa7d4261, + 0x9157455, 0x0c0ba51, 0x450d5ff, 0x78b6acf, 0x4e8649a, 0x198b493, + 0xfd05da3, 0x0941a3c, 0xdb55951, 0x264ea4a } + }, + { + { 0x46e5a31, 0xcfee91c, 0xfff7366, 0x47b6806, 0x5df849d, 0xdb14be4, + 0xac66cc7, 0x3c5e22b, 0xa5f4769, 0x7f3f284, 0x383be36, 0x4e00815, + 0x8072b0b, 0x39a9f0b, 0xc7eadd6, 0x9887cd5 }, + { 0xb659511, 0x7dd8f05, 0xd2e1cb9, 0x15c796d, 0x0d31345, 0xe5edb0c, + 0x6939c60, 0x2025df0, 0xbf15de1, 0x6314c08, 0x04c7fb5, 0x03c1548, + 0xbb5d3ed, 0x413337f, 0x477e983, 0xfc20b40 } + }, + { + { 0x5db0ef9, 0x7f96880, 0xe9c2a70, 0x05562de, 0x7dae133, 0x071e5bc, + 0x237fc4a, 0xa8cdd12, 0x4ea492b, 0x6d565e7, 0x381ee52, 0xa17cf94, + 0x9f5c546, 0x6ab8a4e, 0x40288ef, 0xbb642f3 }, + { 0x5df5c2d, 0x64e5921, 0xbb906f4, 0x43696e3, 0x74ae46c, 0x73a841a, + 0xc506b8a, 0xe264883, 0xa1be548, 0x9542e1a, 0x5e81b4a, 0x8938539, + 0xeaca6ce, 0x5642cfa, 0x806e0f9, 0xed8077b } + }, +}, +{ + { + { 0x7e13597, 0x1c776c4, 0x9e584fd, 0x0ec8b28, 0xb8b61e8, 0x0bb6043, + 0x9cd835b, 0xdcc1748, 0x39fef9a, 0x493e6ac, 0xd133e17, 0xb44eb34, + 0x71cb6f9, 0xfebcd00, 0xd20eff2, 0xe6cf543 }, + { 0x0a004c7, 0xf265cad, 0xd35cc12, 0x9b06c9d, 0xcb4ea53, 0x769f985, + 0x0993434, 0x29160a2, 0x8d939c4, 0xdf8dd10, 0x6711e2f, 0xefa177c, + 0xcd7a2cd, 0x1695790, 0x77f6642, 0x38da3d7 } + }, + { + { 0x6307b74, 0x9bfcfd9, 0xbfdabc3, 0xc26a36d, 0x4abe28e, 0x9341be0, + 0x73d1387, 0xdb20b52, 0x3d1949c, 0xf8d229c, 0xb8b3a41, 0xf1e0afe, + 0xed565d0, 0x29c60df, 0x8b43b2c, 0x6930bb5 }, + { 0xfc0718f, 0x1d76527, 0x1f67189, 0xdb98143, 0x51f32cc, 0x0c62f64, + 0x8bd35e5, 0x70a6626, 0xc1cece7, 0x1725641, 0xf96f4a4, 0x7f130a8, + 0xf06ee98, 0x72319e9, 0x67bf9b2, 0x215b738 } + }, + { + { 0x0aaddd7, 0x8d1bec2, 0xb8be4f9, 0xfb8b95b, 0xfde1026, 0xeac193e, + 0x9d5860c, 0xa5edea7, 0x44280d3, 0x4adbaea, 0x38f4798, 0xce8b670, + 0xec30dea, 0x914c107, 0x000776b, 0xbdc5cf7 }, + { 0xa206a13, 0xb6fd7d1, 0xdae986e, 0x9941eba, 0x1f1caaa, 0x76c27a8, + 0x3f108b4, 0x6967c12, 0x4aea2d0, 0x6f11528, 0x144ddac, 0x9bb4319, + 0xc8ec6fc, 0x1a4d3ea, 0xbf37420, 0xfe4b0b8 } + }, + { + { 0xec0ac6f, 0x5d9a4a1, 0xfc7c80d, 0x84b79f2, 0xc14fac3, 0x64222f7, + 0xc23b3f2, 0xdd9e039, 0xea956bb, 0x4a84abd, 0xebe09dc, 0x370dcba, + 0xe0eaf82, 0x79a9ea8, 0xaee375f, 0x4cfb60a }, + { 0x9106827, 0x6a10dbf, 0x43f305b, 0xa3ba5cf, 0xc1bb083, 0x481b885, + 0xb3117b1, 0x2f52380, 0xddd6791, 0x0066122, 0x63bace3, 0x4f8923e, + 0xecb88d4, 0x5c5f499, 0x3bac146, 0xfdc780a } + }, + { + { 0x7ba1f71, 0x34b70ae, 0x45bd184, 0x9091829, 0xe707313, 0x3b39778, + 0x6164e91, 0xdeefc5e, 0x4971f39, 0xbb55bed, 0x8dafc8b, 0x7d52339, + 0xa6adf0f, 0x82391bf, 0xe319522, 0xfd6f90a }, + { 0xf29bbc9, 0x60fdf77, 0xaaa4030, 0xeff9ed8, 0xf8c0d3f, 0x978e045, + 0xeed65cd, 0xe0502c3, 0x3cfd4c8, 0x3104d8f, 0xa639005, 0xab1be44, + 0x9eeab3f, 0xe83f431, 0x451d797, 0x01970e8 } + }, + { + { 0x3180f4b, 0xbc972f8, 0x617779d, 0xac053c0, 0x7fa149f, 0x89392c5, + 0xbcb6263, 0xdc4699b, 0xce12882, 0x0ae8b28, 0xaf1a4dc, 0xdca19a7, + 0x64e1a74, 0xd3d719f, 0xaffdd5d, 0xbb50201 }, + { 0x7ac30e9, 0x56f7310, 0x1878900, 0x65cc9c7, 0x27338a3, 0x83f5866, + 0xac5bb13, 0x122adef, 0x1bcd4d5, 0x97de200, 0xb8aa3a0, 0x6ed3985, + 0x6821f9b, 0x8680f1d, 0xdda9f98, 0xcb42028 } + }, + { + { 0x0ec2db3, 0xcdb0708, 0x3dad1a1, 0xe28c833, 0xde2da07, 0x2093e32, + 0x83b8987, 0x7317073, 0xf552b8d, 0xad17871, 0x51cf70a, 0x846da98, + 0x5c4f5e1, 0xf94a16e, 0x0f8348a, 0x8429996 }, + { 0x98db78a, 0x4bf3f68, 0x3d19b52, 0xad77fa8, 0x8b972dc, 0x6976772, + 0x5321be0, 0x7dfa35a, 0xdd344a6, 0x9881846, 0xad4e2a8, 0xe550292, + 0xbc68bf1, 0x8075217, 0x893be15, 0xdd837c4 } + }, + { + { 0xd4fab5b, 0x09c931e, 0xb77a0f1, 0xb2dcf08, 0xe0d38a6, 0x7dac5c0, + 0x0ae73af, 0xa5570b0, 0xf5aed28, 0xc7c19d3, 0x5251e92, 0x575fa6f, + 0xcdf7275, 0xb843cd6, 0x9a01287, 0xd9d3d8e }, + { 0xb3c370b, 0xf94e356, 0xfe464b0, 0xc62b99f, 0xa986057, 0x7792650, + 0xc4b1874, 0xeaa67d5, 0x0b07078, 0xba1ba4d, 0x7a03699, 0xdbf636d, + 0xedd32a3, 0x1a16c34, 0xa45cb5d, 0x6ce2495 } + }, +}, +{ + { + { 0xa684441, 0xd7c4d9a, 0x30cd42a, 0xce62af6, 0x43014c4, 0xcd2669b, + 0x6f65b24, 0xce7e711, 0x576fa19, 0x1847ce9, 0x9dd8ca6, 0x82585ac, + 0xb42e1db, 0x3009096, 0x384ab8b, 0x2b2c83e }, + { 0xb4e9a6e, 0xe171ffc, 0x7374b40, 0x9de4218, 0xdb1d616, 0x5701f9f, + 0xa3e8cbc, 0x211e122, 0x1e400bf, 0x04e8c1a, 0x0f37159, 0x0297470, + 0x3df8c28, 0x41775d1, 0x61ac2db, 0xcfaad4a } + }, + { + { 0x7dc0f49, 0x6341b4d, 0xf471a53, 0xaff6c2d, 0xfb8e91e, 0x20ec795, + 0xc3b7b62, 0x4c7a4df, 0xd374938, 0x9f33ff2, 0x3a60f2e, 0x38f8c65, + 0x2efef73, 0xc1168ac, 0xce408ee, 0x046146f }, + { 0x308b0c3, 0x9b39ac0, 0x36b8570, 0xe032d61, 0xfc4aacf, 0xee07d8d, + 0xd5a41dd, 0x0a82acb, 0x7c3d726, 0xbe0ded2, 0xb926ce9, 0xce51d60, + 0x5806c1e, 0xfa2f7f4, 0x1dec59c, 0xe367c6d } + }, + { + { 0xda2547b, 0x64511b6, 0x0761405, 0x76a349c, 0x01223ab, 0x37d6626, + 0xf4d7c48, 0x0e243c1, 0xda756a0, 0xdc9c8b4, 0xd72e7e9, 0xc7430df, + 0x27b4210, 0x0eb1308, 0xcf11cbd, 0x7a9c044 }, + { 0xe8dd150, 0x2c08ff6, 0x2932fc6, 0x18b738c, 0x04513e8, 0x07d5651, + 0xaa40a17, 0x0ca5cff, 0x01baa8f, 0xd486341, 0xb72b79e, 0xfb20faf, + 0x654020f, 0x1a051e5, 0x4e17f23, 0xe3b3317 } + }, + { + { 0x4de9428, 0x0591048, 0x5abdf97, 0x620542a, 0xa16a4d1, 0xaa0eded, + 0x6d65bb9, 0xa93f71c, 0xb8dfaf9, 0x88be135, 0x57ca8ee, 0x1d9f4e5, + 0x26781ad, 0x4c896aa, 0x6c6c49f, 0xd3fbe31 }, + { 0x2c34c3d, 0x088d852, 0xbadff1e, 0xbb6d645, 0x385450d, 0xe3080b8, + 0x50ab1f3, 0x5ccc54c, 0xac0657d, 0x4e07e6e, 0xb7ef2c0, 0xa7ba596, + 0x73a81e9, 0xcceca8a, 0x8284c35, 0xa0b804c } + }, + { + { 0xf17a6a2, 0x7c55956, 0x789cfa8, 0xb451d81, 0x2506eaa, 0xdf414e8, + 0xae96562, 0x6ef40fb, 0x0e0297e, 0x63ea283, 0x73c46fa, 0xf5df26e, + 0xaac8bce, 0xe00641c, 0x64371f3, 0xc89ed8f }, + { 0x793202e, 0xd22b08e, 0x875cb50, 0x39a9033, 0xf85ddb4, 0xe64eec0, + 0x7acf7b5, 0xdce45a7, 0xb9b802d, 0x39d1e71, 0xbd559ac, 0xafdfe7c, + 0x809eeb5, 0x17ec1f8, 0x4889b8c, 0x8c0e38a } + }, + { + { 0x17089da, 0x47eabfe, 0xec90c50, 0x2d18466, 0x5861531, 0xa511aa4, + 0x8c39b39, 0xebb3d34, 0xf1b5282, 0xa0ac4da, 0xa9dadba, 0xea26be7, + 0x554d86e, 0x8992ba8, 0xd5f2ef5, 0x7fcbdb6 }, + { 0x56863e7, 0x320e79b, 0xa7dce2d, 0xeb9d0c0, 0x784cbc6, 0xb9f4031, + 0x7ac1f81, 0x68823ee, 0x9d87497, 0xa6b6f4f, 0x57f9b6e, 0x83c67b6, + 0x0fef2a7, 0x3735747, 0x59596e2, 0xf38028f } + }, + { + { 0x7e82886, 0x9ea57ab, 0x48c44d5, 0x18221c5, 0x314a24f, 0xbf8e6cf, + 0xfd025e5, 0x70ff18e, 0x5334468, 0x08d03de, 0x7404fb7, 0x2b206d5, + 0x55e36b0, 0xb923271, 0xb88ddd9, 0xcc7604a }, + { 0x4a746f0, 0x3df5152, 0x168e3fc, 0x8fdebd8, 0x7f8c32c, 0xffc550c, + 0x148743e, 0x1dbbc17, 0xb88e18b, 0xd48af29, 0x750027c, 0x8dca11c, + 0x1832be3, 0x717f9db, 0x2b06019, 0x22923e0 } + }, + { + { 0xc1cc4d3, 0xd4e06f5, 0x2b4f03a, 0x0fa32e3, 0xc4628d0, 0x956b9af, + 0x939dad1, 0x95c39ce, 0x8a00416, 0x39d41e0, 0x6fb01aa, 0xfd7ff26, + 0x45af340, 0xc6033d5, 0x8e36584, 0x2f65542 }, + { 0x8dff960, 0x14cfb1f, 0xda81474, 0x7236ffc, 0xd452d0f, 0xc6a6788, + 0x77f6094, 0x2ad4a52, 0x07eea74, 0x369d65a, 0xd6229aa, 0x27c6c38, + 0x8863976, 0xe590e09, 0xb38b142, 0x361ca6e } + }, +}, +{ + { + { 0xdfeb7ef, 0x6803413, 0xd3f4fad, 0xb669d71, 0xc941606, 0x5df402a, + 0x8e6c5b7, 0xe5d1776, 0x92ab236, 0x131bcb3, 0xce2e0e0, 0x7f1fb31, + 0x9e98c35, 0xa2c020d, 0xf28657b, 0x33b23c0 }, + { 0x9cf7879, 0xed14e73, 0xb4357b3, 0x10d4867, 0x31e4e04, 0x127cea3, + 0xaa5f8a7, 0xc60d25f, 0x025b987, 0xfef840a, 0x66f2a0a, 0x78081d6, + 0xac36198, 0x0fa0b97, 0x134dc9f, 0xe0bb919 } + }, + { + { 0xcc32eae, 0xc1d2461, 0x0f79a37, 0x0fdbfdf, 0x1c95f02, 0x70f2bc2, + 0x372cddf, 0x7d68bec, 0x8439342, 0x44f7817, 0x4843a6c, 0xa3d5678, + 0x07f8959, 0xbadf77a, 0x73db4ca, 0xf458198 }, + { 0xd54f805, 0xe8eaaf3, 0xb84c1e7, 0x2f529d1, 0x21e535c, 0x404e32e, + 0x159b5f5, 0xabac85c, 0xb00466f, 0x4e8e594, 0xc941873, 0x40fcaab, + 0xbe407c6, 0x3b4e370, 0x5b2e58d, 0xccd5788 } + }, + { + { 0x88b74a8, 0x3ee615e, 0xeab4e69, 0xd7d6608, 0xe4ace36, 0x27cf9f1, + 0x7aebabb, 0x282359e, 0xf6d162f, 0x96e509b, 0xf1a290a, 0xad906f3, + 0x1314a58, 0xe7d6c4f, 0x218431d, 0xeecffe4 }, + { 0xe2cfed9, 0xa66e0e9, 0x71f0544, 0xb0887ec, 0xa04c5d7, 0xd34e36b, + 0xed4392d, 0x094daa5, 0xc8aa925, 0xcda83ad, 0xb979786, 0x1adef91, + 0xfddc5d6, 0x3124dcb, 0x0b70c14, 0x5cc27ed } + }, + { + { 0x0eac2d8, 0x386dbc0, 0xc50ca30, 0xa716ecb, 0x80d9f04, 0x9e3fc05, + 0xcfeaceb, 0x37dde44, 0xa3522d5, 0xd88d74d, 0x2cf239a, 0x6bb9e9f, + 0xa7cbfec, 0x9e7fb49, 0x0a5c0ef, 0xe1a75f0 }, + { 0xfb9229d, 0x6e434e7, 0xc8a79b3, 0x0ec6df5, 0xd3fb311, 0x7046380, + 0x52e20fa, 0xe957ef0, 0x9ef4614, 0x0f4fe9a, 0x54d8f2b, 0x1b37d9c, + 0x39d84a2, 0x23b2dc1, 0x724e713, 0xf62c4f6 } + }, + { + { 0x747e219, 0xbd6922c, 0x3869b7b, 0x34d1438, 0x96f2272, 0x8c875a5, + 0x3fe361e, 0xd9602c0, 0x744839f, 0x081348f, 0x61ac1f1, 0x61bd16c, + 0xd8da4e1, 0x993b727, 0x7741271, 0xbb40ba8 }, + { 0x81dcfff, 0xe6dcc98, 0x93ce616, 0x9f513f5, 0x618cd8f, 0xdc09683, + 0x26639be, 0xc3b1d10, 0xc762ee2, 0xe8f149f, 0xb244aae, 0x59f26ef, + 0x693dd96, 0x3f2de27, 0x9c3a7de, 0xd8b68f7 } + }, + { + { 0x970bd5b, 0x6fa20b9, 0x75f6179, 0x87242d7, 0x72d9308, 0xa95a6c6, + 0x37a8a58, 0x6eb2518, 0xc59562c, 0xfdea12a, 0x20f1fc3, 0x4419c1e, + 0x9d66788, 0x0c1bd99, 0x32c0547, 0x4b74288 }, + { 0xdf479ab, 0x4f38acc, 0xc52a942, 0x01f6271, 0x02ca9a7, 0xe3298f4, + 0xb718fc8, 0x533daca, 0xb093ca8, 0x133602a, 0x8f98104, 0xc04da80, + 0xaf08620, 0xd0f2e23, 0x178b164, 0x882c817 } + }, + { + { 0xec30a71, 0x28e6678, 0xf78aca1, 0xe646879, 0x88fa078, 0x868a64b, + 0xfee3433, 0x671030a, 0x87c0211, 0xb2a06bb, 0x46c406a, 0x202eca9, + 0xe4f0f59, 0x64d6284, 0x3c9f907, 0x56ae4a2 }, + { 0x1dcc100, 0x5abbb56, 0x07c7784, 0x6fef6cf, 0xdb7302d, 0xb6e25cd, + 0x42980e8, 0xa26785b, 0xfb96801, 0xe7d4043, 0x8e4282b, 0x46df55d, + 0xc602d6e, 0x9c0a5f5, 0x75dfe29, 0xf065604 } + }, + { + { 0x3dcbc90, 0x0e82a1a, 0x656feac, 0xb1ee285, 0x0d3d3b2, 0xfa4353b, + 0xdd5c5df, 0xc2e7a6e, 0x416ce53, 0x13707e1, 0x87ebc07, 0xc84ce07, + 0x8a9a834, 0xdd273ce, 0x5e8e1e7, 0x432a617 }, + { 0xbd0064a, 0xa359670, 0x6534516, 0xc899dd5, 0xdb27169, 0x666560e, + 0xa19a068, 0x1537b22, 0xeac7527, 0x3420507, 0x6fc13a7, 0x479f25e, + 0x1bc19b3, 0xc847acc, 0x0b20d45, 0xecdecf0 } + }, +}, +{ + { + { 0x4acea57, 0x6f24100, 0xda68597, 0xdace1c6, 0x50ce77f, 0xea7dd41, + 0x1585884, 0x1aecb84, 0xea4a85c, 0x92ff208, 0x88eebd2, 0xde9433c, + 0x3f4d289, 0x53cd318, 0x26539af, 0x3970858 }, + { 0xb827d87, 0x4b57599, 0x3d77638, 0xdc82ac0, 0x52f6e61, 0x6943366, + 0xad5e8a6, 0xb8fc4b0, 0xf388642, 0x1b6f7dc, 0xa74dd57, 0x6f24533, + 0x41750cf, 0xc669378, 0x28a37af, 0x06757eb } + }, + { + { 0xc133995, 0x0e70d53, 0x7c8c97d, 0x88a5e0c, 0x85f3be3, 0x4e59dbf, + 0x0e92698, 0x0f364ac, 0xef6940f, 0x3a1e79b, 0xd85d23a, 0xc8a3941, + 0x9a00e58, 0x143bb99, 0xc6f2f10, 0x61cf7d6 }, + { 0x85150fe, 0x979c994, 0x59d773f, 0xcfd0df2, 0xaab7bcd, 0xce97b9d, + 0x6afd8fc, 0xc9fff8e, 0x89a4628, 0x246befd, 0x1567090, 0xf630282, + 0x6749c58, 0x1539342, 0xa0f3fd3, 0xff47d0e } + }, + { + { 0x35f6706, 0x09b0bfd, 0x2c82e69, 0x7464581, 0x50d5fe9, 0xb60729f, + 0x95c74f1, 0xf133245, 0xbb76c89, 0x33647e3, 0x5a9afcc, 0x0126404, + 0x0f154ab, 0x46d57ee, 0x25680a4, 0x2efa555 }, + { 0x5329d90, 0x12ebfc6, 0x79800af, 0xcb37ae5, 0x6f8e310, 0x5bb5349, + 0xf1bb936, 0x9b59c63, 0xf4610e9, 0x5b49baa, 0x4f2d6ac, 0x2bbeeef, + 0x0badc67, 0x87ee21e, 0xf1ddfa0, 0x12e2aad } + }, + { + { 0xa9109ee, 0x5b4668f, 0x8a6cea2, 0xfa95133, 0x4068e16, 0xe45e6fc, + 0x0205ed8, 0x8ae9a0c, 0x679b79b, 0x2993b96, 0xed604d3, 0xc6b878f, + 0x32c77f3, 0x01d0208, 0x495a1ab, 0xd45d890 }, + { 0x29d2030, 0x99348fa, 0x61f8f7a, 0x961f9a6, 0x674f74b, 0xfd53212, + 0xb3e72bc, 0x45cee23, 0xb77e2d5, 0x3fccb86, 0x4219cb7, 0xdff0310, + 0xc056871, 0x233771d, 0x7d2c521, 0x1214e32 } + }, + { + { 0xff2a8e1, 0x9f51e15, 0x138bc70, 0x86571c5, 0x0c09d46, 0xbfc4caf, + 0xc2a0c18, 0x65e33fe, 0x426867d, 0x8214392, 0x80ae4ed, 0x51ce6c0, + 0xb110de6, 0x6cbe8d7, 0xfd22ea4, 0x7f6e947 }, + { 0xcadefc4, 0x7373a75, 0xb0c682f, 0x6fca1d2, 0xf3c7c1e, 0xcd2140d, + 0x558b7a5, 0x8653a37, 0x55eb321, 0x653e74e, 0xc31af73, 0xbe0c6b3, + 0xf4fc365, 0x3376379, 0x71add4d, 0x3570b37 } + }, + { + { 0x83c3494, 0x9061ec1, 0x677bc95, 0xaf2f28d, 0x3bf8768, 0x6fe7279, + 0x0fa86d8, 0xc5f50e3, 0xa3293ce, 0x6c03060, 0xe2355a6, 0x4d53357, + 0xe4df931, 0x43a59ea, 0x13b79c6, 0x6f48f5d }, + { 0xddc5192, 0xa4d073d, 0xa65773f, 0x6d0e318, 0x765de9e, 0x1008792, + 0x39a0375, 0xa724ed2, 0x97d7c9e, 0x510ff14, 0x5baa863, 0x251f622, + 0x648a351, 0x86464fe, 0xd50fd91, 0xf85e98f } + }, + { + { 0x86ee987, 0x29c9634, 0x10dcc9f, 0x93e8e52, 0xc910b1f, 0xa1fc4d1, + 0xfeb603e, 0x015acac, 0x0844a5f, 0xc9f25f8, 0x73f4dac, 0x50de93c, + 0x310a4aa, 0x1758783, 0x358f106, 0x544d570 }, + { 0x1dc68ca, 0x4eeec7b, 0xe00fbcb, 0x6238e6f, 0xb4e83c9, 0x34d394c, + 0x2292656, 0x764ffa2, 0xf641f2e, 0x5614cd1, 0x9e07234, 0x4252eb6, + 0x68d2ba4, 0xcbaef45, 0x8a98b17, 0x8c9c550 } + }, + { + { 0x4106140, 0xf235d9d, 0x9eb601e, 0x1bf2fc3, 0x375e0c3, 0x6fb6ca9, + 0xc0024d2, 0x4bf5492, 0xeb54cc6, 0x3d97093, 0x5c90cb5, 0xc60931f, + 0xfbe0f1a, 0xfa88808, 0xd33e7d4, 0xc22b83d }, + { 0xc0abbf5, 0x9cfec53, 0x93723df, 0x52c3f0a, 0x39b96b6, 0x0622b7e, + 0x1667270, 0x300de28, 0x9ef426a, 0x50b66c7, 0xc6eb295, 0x8849189, + 0x8914a7e, 0xeaec3a9, 0xc4c99e0, 0x7ed56b0 } + }, +}, +{ + { + { 0x687e557, 0x7926403, 0x5310017, 0xa349816, 0xd43a8fd, 0x1b06e91, + 0x6ac23cb, 0xf201db4, 0x4f48750, 0x6f172ad, 0xe74bd3e, 0x5ed8c8c, + 0xdaba648, 0x492a654, 0xa9b64ff, 0x123010b }, + { 0x6e89f93, 0xa83125b, 0x398378a, 0x3a3b0b0, 0x0aebe7c, 0x9622e0b, + 0x49512a4, 0xb9cbfdc, 0x6aaf12a, 0x13edffd, 0x9f5eafd, 0x555dff5, + 0x1212efa, 0x3cba6fe, 0xd9bb0f8, 0xd07b744 } + }, + { + { 0x9a48920, 0x45732b0, 0x13ff36d, 0xf3080fc, 0xde8f950, 0x9347395, + 0x382b897, 0x14d025a, 0x04d72ad, 0x60c5a74, 0x11a9c71, 0x30be7e5, + 0x31ac33a, 0x43ffabd, 0x35cbb14, 0x97b06f3 }, + { 0x7740de9, 0xe4ff5c5, 0xaacf81e, 0x5fed090, 0xe8b7c9d, 0x97196ee, + 0x045910b, 0x316dcd1, 0x5ad8c63, 0x7a2b2f5, 0xc5b03bb, 0x674fffd, + 0xe65953c, 0xc1cd133, 0x0a83556, 0x3c06052 } + }, + { + { 0x091c23d, 0x797c3f6, 0x39c9c05, 0x2ea2de3, 0xa31f67c, 0x5d958b4, + 0xd5f088c, 0xf97afe5, 0x0b37243, 0xbcfbd2a, 0xeca630c, 0xc43ad3e, + 0x42845e0, 0xb92a337, 0xa9a0f16, 0x970bff7 }, + { 0x5970a79, 0x8635511, 0xf205928, 0xcee332e, 0xc04c208, 0x2c58d70, + 0x3f5e5bf, 0xdbfe19a, 0x8e51c56, 0x8f8f2c8, 0x8e2da75, 0xb61f58e, + 0x624d93f, 0x4046a19, 0xe1f9538, 0x7de64db } + }, + { + { 0xc2d850e, 0xd018e1c, 0x63a723c, 0x8cdb643, 0x90a42af, 0x9a65abe, + 0x16f20cc, 0xfeece96, 0xd5cff56, 0xc906800, 0x3f0deed, 0x0acf23a, + 0x728dd3a, 0x2143061, 0xb8ce34c, 0x66276e2 }, + { 0x73cc9c7, 0x23700dc, 0x5b1778b, 0xdb44851, 0x4aab669, 0x330f41e, + 0xf5282a4, 0x2f5aabc, 0x30f9e01, 0xff837a9, 0x901cc98, 0x1a1eb2f, + 0xe69bd7f, 0xd3f4ed9, 0x8a72a7d, 0xa6b1141 } + }, + { + { 0x9ea3b43, 0x34bde80, 0x5ced6ae, 0x5ddcb70, 0x95a6cb8, 0x8257f5b, + 0xc77dcb8, 0xaac205d, 0x035b397, 0x77d740d, 0xcf7e0a6, 0xca7847f, + 0x085601b, 0x9404dd6, 0x457e4f9, 0x0a5046c }, + { 0xbc11470, 0xcaee868, 0x005c5f6, 0xb118796, 0xec79173, 0xcc04976, + 0x21f6827, 0x7f51ba7, 0x486ff7e, 0xa8e3f0c, 0xf87838c, 0x327163a, + 0x6d039fd, 0xcf2883e, 0xdb8b0e2, 0x6fb7ab6 } + }, + { + { 0x620d669, 0x8ca5bac, 0xed7caa9, 0xff707c8, 0x927909b, 0xdaefa2b, + 0x7029da3, 0x1d2f955, 0x6d131a0, 0x52a3ba4, 0x3ab1041, 0xe5a94fd, + 0x99bc0ae, 0x5089177, 0xfa1bd16, 0xf750354 }, + { 0x6cd31fd, 0xdd4e83a, 0x92fac84, 0xd335053, 0x1691382, 0xf914cbc, + 0xda6ade6, 0x669683f, 0x8878513, 0x6944643, 0x4b1a72d, 0x429d3cc, + 0x61eec36, 0x655c46a, 0x4bc4970, 0x881eded } + }, + { + { 0x7ca647f, 0x5b39d37, 0xe917b34, 0x41533c1, 0x7daf734, 0xea2aeb5, + 0x1286560, 0xf1ef1eb, 0x08e0473, 0x582f2e0, 0x5edc74a, 0x5913d7d, + 0x3c1e754, 0x588c7ec, 0x7146fe1, 0xbd6db05 }, + { 0x7634907, 0x3b0d49e, 0xe43b9cc, 0x4c65ce4, 0x2d92d5b, 0xb87e958, + 0x7ab1519, 0x0513572, 0x8c3aed0, 0x03ec084, 0x561a641, 0x4d7aa21, + 0x99e92ad, 0xe5f8211, 0x48a457c, 0x379b55f } + }, + { + { 0xd6a8442, 0x8317c34, 0xae499da, 0xb0ab4a5, 0x720e8eb, 0xebcb16e, + 0x9a96908, 0xfd5c563, 0xad23acf, 0xcab4d67, 0xbcdf748, 0xa600a79, + 0xa2a6a51, 0x18a6340, 0x3aabd69, 0xf2f415c }, + { 0x747258a, 0xdb38a4f, 0x2e24415, 0xb6ea560, 0xf1f7655, 0xfad1ea9, + 0xc957684, 0x4e27eb5, 0xb2e1cfc, 0xf8283e1, 0xaa6291c, 0x8f83bd6, + 0x5619e84, 0x28d23b5, 0x93770a4, 0xb9f34e8 } + }, +}, +{ + { + { 0x7515fb1, 0x1bb8437, 0x7b860a6, 0xac73f2a, 0x22b390f, 0x78afdfa, + 0x66048aa, 0x815502b, 0x85bf620, 0xf513b97, 0x3fc5d7c, 0x2524e65, + 0x178c969, 0xa10adc0, 0x5391c8d, 0xa1d5396 }, + { 0xa8bcc45, 0x09fccc5, 0x7710e1e, 0xa1f97d6, 0x897d0a1, 0xd694442, + 0x5f42400, 0x7030beb, 0x7127908, 0xdebe08c, 0x2187637, 0x96b715c, + 0xb528129, 0xc598250, 0xa1ccb07, 0x0f62f45 } + }, + { + { 0xb765479, 0x8404941, 0x5837dc4, 0xfdecff4, 0xadbd465, 0x1796372, + 0x3159806, 0x5f84c79, 0x6aaad34, 0x6d2e46b, 0x384b375, 0xd303b4a, + 0xb392002, 0x440acd5, 0xc475e87, 0x4f2a4a7 }, + { 0x5606fc2, 0x038e1da, 0x9c2f050, 0x2d821c2, 0xf139db4, 0xc074cb3, + 0x4ec59be, 0xde2fee7, 0xa84ed59, 0x5a819ee, 0x3e98711, 0xd65c62c, + 0xb9723c1, 0x72eb440, 0x01be611, 0xb927754 } + }, + { + { 0xab9e9fc, 0x929fe64, 0x0bf1e85, 0x04379fd, 0xbc28ee3, 0xb322093, + 0xe4555e1, 0x78ac4e2, 0xabc5588, 0xdb42b58, 0x77c8b12, 0x1c1b5e1, + 0x40366c4, 0xf6d78dd, 0xbdae22e, 0xc21ff75 }, + { 0xa211df2, 0x1e3d28e, 0x3617c0a, 0xc5a65a1, 0x58140d5, 0x3fa02c0, + 0xb62d10c, 0x155c346, 0xe48268f, 0xc9cf142, 0x1993bc3, 0xdc14083, + 0x0ee69dc, 0x07c44d4, 0x5e2ac46, 0x6169950 } + }, + { + { 0xd0fb585, 0x44e4a51, 0xf1f3ce8, 0x00846be, 0x8e2de1e, 0xedef39a, + 0x33b3934, 0x430afe3, 0x4337188, 0xac78b05, 0xc9a3f24, 0x0f39de4, + 0xc9ae6a4, 0x039eddd, 0x8eacd51, 0xf470157 }, + { 0x9a2f31a, 0x1e39694, 0xb19a8b1, 0xc8a40f4, 0x9d239d8, 0xdddd10c, + 0x887e066, 0xf974245, 0x3ea28c6, 0xfdb5111, 0xe1122a9, 0xb5af0fb, + 0x36e0267, 0xd30c89f, 0x74f024c, 0x7b1c0f7 } + }, + { + { 0x07a39bf, 0x1ec9956, 0x3a68d15, 0x1c3ecf2, 0x4f59fe9, 0xd8a5c4e, + 0x271abc3, 0xacb2032, 0x71ef239, 0xbc6bdf0, 0xb39b391, 0x660d7ab, + 0xb627a0e, 0x2e73bb2, 0x248fc7e, 0x3464d7e }, + { 0x1666760, 0xaa49249, 0x8582659, 0xa257b6a, 0x5593089, 0xf572cef, + 0x73ca6bf, 0x2f51bde, 0x764cff5, 0x234b63f, 0xd411a35, 0x29f48ea, + 0xafe1db1, 0xd837840, 0xd9f4c4b, 0x58ec0b1 } + }, + { + { 0x5e6f3dc, 0x8e1deba, 0x06a5ff7, 0xc636cf4, 0xc80ca0f, 0xe172b06, + 0x5ffb90a, 0x56dc098, 0x9a05e83, 0x895c218, 0x7561ac2, 0x6ddfaec, + 0x96283a0, 0xaa35749, 0x7e7cd43, 0x6dfb262 }, + { 0x2c8ca27, 0x6576de5, 0x49018eb, 0x6a4a872, 0x5c34342, 0x00c275c, + 0xd2d90c4, 0xe34805a, 0xd8743c4, 0x651b161, 0x7312bf3, 0xb3b9d9b, + 0x0bf7e00, 0x5d4b8e2, 0x78d3d7e, 0x8899bdf } + }, + { + { 0xfaa9cd1, 0x9644ad8, 0x6e0e58e, 0x34c98bf, 0x404c637, 0x6022aad, + 0x7ac013b, 0x2a11a73, 0x5540899, 0x5bdd103, 0x1e022a4, 0x2e67572, + 0xb834c33, 0xe32045d, 0x2f2d01c, 0x74a260c }, + { 0xc48841c, 0x20d59e9, 0xe560359, 0x05045dd, 0xac998ac, 0xeba779c, + 0x00a6218, 0x5bed10c, 0x5327ef4, 0x25d4f8e, 0x4597794, 0xa278474, + 0x831d11e, 0xefd68ca, 0x934446a, 0x9ad370d } + }, + { + { 0x73c92ac, 0x3089b3e, 0x957a75c, 0x0ff3f27, 0xd676f50, 0x843d3d9, + 0xd496d43, 0xe547a19, 0x8e924a4, 0x68911c9, 0x85b5522, 0xfab38f8, + 0x83e0ac5, 0x1048811, 0xdc788c4, 0xcaccea9 }, + { 0xe3c6aad, 0xfbe2e95, 0xb3a6cf1, 0xa7b3992, 0x87d78b1, 0x5302ec5, + 0x1826100, 0xf589a0e, 0x8610632, 0x2acdb97, 0x9232b26, 0x1e4ea8f, + 0x9c09a15, 0xb21194e, 0x849b909, 0xab13645 } + }, +}, +{ + { + { 0xf3a71c1, 0x92e5d6d, 0x297d661, 0x349ed29, 0x1713fc9, 0xe58bd52, + 0xb9ddfb5, 0xad999a7, 0x3c28ce0, 0x271c30f, 0x2a9d460, 0xf6cd7dc, + 0x207dec7, 0xaf728e9, 0xfcb8bf0, 0x9c2a532 }, + { 0x68bf486, 0xd702184, 0x7ab8ea8, 0x73b45be, 0x1795c93, 0xddfc658, + 0x6bb8da2, 0x7941660, 0x88e07a2, 0x658f197, 0x26d3d12, 0xa9d5b08, + 0x9535b52, 0x4d7c95f, 0x268ef8a, 0xad55e25 } + }, + { + { 0xa2bc326, 0x94a9b0b, 0x167e5f3, 0x485ecc5, 0xc97fc74, 0x8340bc7, + 0x07aaa5c, 0x06f882b, 0x849698a, 0x4b57455, 0xb36a0ba, 0xd9281eb, + 0x8b8108f, 0x8918c6c, 0x5b50d1d, 0xedd1eea }, + { 0x2a25f50, 0x94d737d, 0x2446ad0, 0x0e5a823, 0x7ced3e2, 0x02a5435, + 0x4af8ced, 0xb09a92a, 0xeeecef2, 0x85fc498, 0xe71e3d4, 0x06a02b9, + 0x84bb49a, 0x00ad307, 0x64a5b4a, 0xf61585e } + }, + { + { 0xb86a4c9, 0x915f6d8, 0xa861e1f, 0x944bc6b, 0x54465ef, 0x3091ca7, + 0xeb53a38, 0x11df859, 0x0144679, 0xd44dde5, 0x0994edd, 0x6c8da9a, + 0x91241ef, 0xeebcebf, 0xc2f6859, 0xc419354 }, + { 0x49581b6, 0x1f49693, 0xbb26cb4, 0x5712b10, 0xb09fd59, 0x8fcaa41, + 0x72e22e3, 0xbd39aad, 0xb1199b0, 0xf70e794, 0xc6f863d, 0xdf63c0c, + 0xee9df4f, 0xd58166f, 0xc45e70b, 0xb9224ea } + }, + { + { 0xce525f4, 0x80072fa, 0x66a5502, 0x8597bd6, 0xdbc9725, 0xf65e203, + 0xf2222a4, 0xeccfbe3, 0x2339834, 0x490aa42, 0x62489e8, 0x1348891, + 0xa735084, 0xaff3f80, 0xf3f1bd6, 0x69d53d2 }, + { 0x813341a, 0xb123ffc, 0x1173848, 0x359084c, 0xd29b08d, 0x751425e, + 0x3890ad4, 0x1edda52, 0x607cf20, 0xb64974c, 0xb42ac7c, 0xa8c8cb8, + 0xedd42e5, 0xd5cb305, 0x44c090a, 0xf3034dc } + }, + { + { 0xbb18e19, 0x428921d, 0xfed2127, 0x4cfd680, 0x92ac8c3, 0x671144d, + 0x132c894, 0x2121901, 0x7604cd9, 0x25d0e56, 0xafbc2a0, 0xa372223, + 0x56c16f7, 0xcf98a52, 0xb5459e1, 0x71f129a }, + { 0xb668b2e, 0xf4afdc5, 0x0c2d410, 0xc5d937a, 0x285d54a, 0xe2cc4af, + 0x8c53e18, 0x1c82777, 0x69a92f6, 0x270f2c3, 0x616327a, 0x799f9ac, + 0xd4246f2, 0xce658d9, 0xfb12e36, 0x0fb681f } + }, + { + { 0xe0690fe, 0xc5ab11e, 0x3f74249, 0x80261e3, 0x58c1cf2, 0x8eb4b47, + 0x184ae9b, 0x4895a80, 0xd3e27eb, 0x4a4bdb6, 0xbfd251c, 0xa7a1638, + 0x417a7e3, 0x29ec144, 0x3f1b960, 0xd073609 }, + { 0x49c73d1, 0xcb1ed83, 0x8d1945a, 0x33fc84a, 0xe965118, 0x9f668db, + 0xa82811f, 0x3331743, 0x28ba540, 0xf394dec, 0x654a454, 0x44ce601, + 0x3623645, 0x240dbb6, 0x2e61048, 0xf07e7f2 } + }, + { + { 0x3d45213, 0x7c9f176, 0x9c1f77f, 0x3eefa70, 0x1b48350, 0xde3c3c5, + 0x9d481a7, 0x4a2bc64, 0x7874f3d, 0xfd4a58a, 0x037b302, 0x96655d4, + 0x68bf5ab, 0x9452528, 0x75177f6, 0x1b6d46a }, + { 0xefb8d00, 0x7de6763, 0xa741b7b, 0xb2c1ba7, 0x7bae6ed, 0xcca6af4, + 0x5b68b3f, 0xe4378ca, 0xaf71948, 0xfb757de, 0xbc6ac99, 0x7f07b5e, + 0x27d636d, 0x752a568, 0x4b8a34f, 0xc8b7d1d } + }, + { + { 0x325331b, 0x76cb78e, 0xadd2eed, 0x41f41c9, 0x5c5f623, 0x03db238, + 0x7102fa2, 0xbbc1d17, 0x60182ec, 0x80f137a, 0x55adf15, 0xfdd8569, + 0xe3373dc, 0x4f53f5e, 0x21b669b, 0xec6faf0 }, + { 0x0b86081, 0x7d4e983, 0xf2d979c, 0x10d3cd9, 0x24a22c8, 0x0f48f58, + 0x02f99ee, 0x86c540c, 0x5e6c5fc, 0xf4c6654, 0xbc404c8, 0xaf0c588, + 0x423118a, 0x2e6edbd, 0x0690eab, 0x86e32e9 } + }, +}, +{ + { + { 0xdfbfa6f, 0x1d12656, 0x7646018, 0xa498095, 0xc3597d0, 0x2f1071b, + 0x1dda80a, 0x3df83f9, 0xf3ae449, 0x5853e28, 0x9e19aad, 0xb853d31, + 0xa0d8a46, 0x863f01b, 0x2fef108, 0xa84fca6 }, + { 0xfb84de9, 0xbe4c0b7, 0xc0727bf, 0x40a03dc, 0xb18575c, 0x781f841, + 0x466cddb, 0x6a63045, 0x05dc7a2, 0x6be7582, 0x07ae811, 0x420f87f, + 0x3bf96c8, 0x2808242, 0x51c6821, 0x723998c } + }, + { + { 0x81f5863, 0x38ab641, 0x05ff9e1, 0xd82ecbd, 0xa065856, 0x339c94e, + 0xa45156d, 0x143054a, 0x065628c, 0xe6d64bf, 0xa938589, 0xe530086, + 0x385d79b, 0x22d3a49, 0x0ab8245, 0x0b10790 }, + { 0xca387b5, 0xb0d80fb, 0x35551d7, 0x698206e, 0xa10bb73, 0x199685d, + 0x9107378, 0xa8e5fa8, 0xd99dbbf, 0x36e5724, 0xd581b03, 0xd67f476, + 0x88dd1e6, 0x7a15be7, 0xe5baa31, 0x8dac8e4 } + }, + { + { 0xe170ef8, 0x4d5d88f, 0x1e9e600, 0xb6ba5de, 0xedeabc5, 0x4a89d41, + 0x8fac936, 0x737c66b, 0x65c3125, 0x8d05b23, 0xb61b68e, 0x85a5cbc, + 0x20a6af9, 0x8fea626, 0xd8b50ec, 0x85115de }, + { 0x6a6f30b, 0x5430c8d, 0x8474295, 0x8bef9cf, 0xbe77f38, 0x0648f5b, + 0x9e47bd7, 0xfe2b72f, 0x93106e2, 0xad6c5da, 0xfa7a6c3, 0x4fa6f3d, + 0xb396650, 0xdcd2ed8, 0x1157ef9, 0x7de1cce } + }, + { + { 0x1f241d1, 0x70a5f6c, 0x798cd5c, 0x6c354d8, 0x1a729fb, 0x23c7838, + 0x523cbda, 0xcff8f15, 0x3493697, 0x5683ff4, 0x7534f53, 0xef7dbab, + 0x2243d53, 0xd7bd08e, 0xf8072a9, 0x6f644cb }, + { 0xb22db63, 0xac960f9, 0x23af04d, 0xa97f417, 0xd9798af, 0x692b652, + 0xfedb156, 0x0e35967, 0xdfe6ee8, 0x14b5e50, 0xb411070, 0x7597ede, + 0x442b3f9, 0x116f3ce, 0x1b2b6db, 0xe9b5ae8 } + }, + { + { 0x2315930, 0xf4385ee, 0x27a8740, 0xc8d0298, 0xd934a43, 0x7907a8d, + 0xc582191, 0x20bc946, 0x6a405e7, 0xa4acb3e, 0x43df2f5, 0x8c1d6c8, + 0x991f0b5, 0x9df1593, 0x4d9be9d, 0xbb9df98 }, + { 0x8e4b190, 0x6362008, 0xada3a88, 0xee1421e, 0xf93b027, 0xb84f0cc, + 0x8e95091, 0x7a5d667, 0xf3e3704, 0x3974462, 0xc593e98, 0xfa6fb5e, + 0xa6477d2, 0x44b6cf7, 0xb09a562, 0xe885b57 } + }, + { + { 0x09a0c02, 0x6e339e9, 0x0e75f29, 0x57afff0, 0xfb7db03, 0x797d8d6, + 0xd25a236, 0xc6e11a3, 0x0107260, 0x643ce1c, 0x62eae1c, 0xe644ec4, + 0x3f5a3f5, 0x821d5b8, 0xc0579d6, 0xa8ad453 }, + { 0x17d43a4, 0x6518ed4, 0x3f87ccd, 0x46e76a5, 0xf9bef95, 0xd6cbaab, + 0x4f7cbcf, 0x2568832, 0x08476b4, 0x367159a, 0xbe6d324, 0x1d1b401, + 0xa605026, 0x348cb98, 0x43b6b1e, 0x144f3fe } + }, + { + { 0x7b1822c, 0xbabbd78, 0x2aa51f8, 0xd34ba7e, 0x41fbea4, 0x086f1cc, + 0x746f3d9, 0x96f7eac, 0x281ecaf, 0xad97f26, 0xa14ee2c, 0x751a905, + 0x0d7335f, 0xb4e7fe9, 0x4892ff0, 0x0d97b8f }, + { 0x5a5c40e, 0xdb8a315, 0x7ba567b, 0x64e5de7, 0x1eefe88, 0x4f155f7, + 0xfb6fbf4, 0xe2297e9, 0x6c16be5, 0xfe24bf9, 0xcdd83e2, 0x2251847, + 0x5eda444, 0x13ac2c8, 0x283275f, 0x49d1b85 } + }, + { + { 0x423e08f, 0xca08731, 0x87d2f14, 0x7046bb0, 0x3bc846c, 0x876f10c, + 0x358fbe3, 0x2202b76, 0x0e26ac6, 0x0d4fc1c, 0xb986881, 0x1fc748b, + 0x8384a18, 0x609e61c, 0x0d88e00, 0x28a72d6 }, + { 0x78c6e2f, 0x1332a31, 0xb3526a4, 0x0367919, 0x698fe3e, 0x53989e4, + 0xb16a99b, 0x14b1145, 0xddbb75f, 0xef9ec80, 0x0e53955, 0x7625624, + 0x8744ae1, 0x54e087a, 0x672b875, 0xce50e8a } + }, +}, +{ + { + { 0xa29629c, 0x4c88b2b, 0x7b2642f, 0x946559c, 0xf7ebe4c, 0x933d432, + 0x63632c9, 0x97109b6, 0xe53184d, 0x799b3fb, 0x0f069a6, 0xd462871, + 0x3a68351, 0x0c182a1, 0x9a2437a, 0x974a839 }, + { 0x2a70278, 0x29f1997, 0xd9c424b, 0x01b98b6, 0x08f4c37, 0xd85a60b, + 0x2b1da15, 0xcc3523f, 0xddffb0f, 0xf922115, 0xde84ae2, 0xee0fe4d, + 0x55365be, 0x810440c, 0x1a457e8, 0xd2f6639 } + }, + { + { 0xe2ddd05, 0x5e6879f, 0xabdfc61, 0x92a7545, 0xa5cede8, 0x7dedd63, + 0x70df4bd, 0x8a03b3f, 0x91f6cbb, 0xa5d1f65, 0x10f3fb2, 0x372fde6, + 0xa9dee05, 0x4537f9e, 0xdf7aa50, 0x7eb85bb }, + { 0xe8c504d, 0x963edf8, 0xe7bdb6b, 0x53c8dca, 0x6fedf2d, 0xa246e4c, + 0x0c55bde, 0x7553340, 0x0270a54, 0x2aa748d, 0x05860dd, 0xadb6cf0, + 0x9b84763, 0x8d31450, 0xeb405ef, 0x626720d } + }, + { + { 0x6601328, 0xa3709ae, 0x2ac2478, 0x68e94fd, 0x9d5d247, 0x3879343, + 0x392c198, 0xfa467af, 0x15df607, 0x49e7b0d, 0x61792a8, 0x8c58122, + 0x1d3762f, 0x79f7658, 0x244a39d, 0xaa38895 }, + { 0xc5cd0bc, 0xef60af9, 0xa33b3bb, 0x2b0db53, 0x251015d, 0xe3e0b1f, + 0xe64489e, 0xc608afc, 0x03651aa, 0xe52b057, 0x1c6f7b9, 0x1dda8b9, + 0xff41893, 0x833f022, 0x192818c, 0x58eb0a0 } + }, + { + { 0xfc7b5a7, 0x6c1300c, 0xa83ab33, 0x6d2ffe1, 0x9c02eef, 0x7b3cd01, + 0xba60d55, 0x6c64559, 0x19e2f73, 0x2e9c16c, 0xdbe47b1, 0x11b24ae, + 0x1b8153b, 0xc10a2ee, 0x1e02e1a, 0x35c0e08 }, + { 0x1dd6f16, 0xa9f470c, 0xf41a290, 0x4ea93b6, 0x25ee03f, 0xac240f8, + 0xb85aabd, 0x6cd88ad, 0x1be2f8f, 0x378a64a, 0x417bac1, 0xbf254da, + 0x9231142, 0x7e4e5a5, 0x3b8c057, 0x057aadc } + }, + { + { 0x80af479, 0x607c77a, 0x5ccdf74, 0xd3e01ff, 0x101b4c7, 0x9680aaf, + 0x2fc50a6, 0xd2a7be1, 0xb72d782, 0x92a788d, 0x4640b52, 0x35daf2e, + 0x39e601c, 0xc170d69, 0x7b25c2f, 0x16e05f5 }, + { 0x6fe37f8, 0x47a42a6, 0xbeca298, 0xeb74271, 0x179da16, 0x401e11e, + 0xaa53873, 0xfb8da82, 0x5bb4783, 0xd657d63, 0xfcea0b1, 0x6847758, + 0x0993154, 0x2f261fb, 0x592853a, 0x868abe3 } + }, + { + { 0x35766ab, 0x1a4c543, 0x6f4e4ea, 0xa1c84d6, 0x60ba199, 0x5d737a6, + 0x98b15a2, 0x4a7b1e2, 0xfd967d3, 0x207877f, 0xc262b4d, 0xcaec82d, + 0x4f2a37d, 0x0b27849, 0x6ac1711, 0x3478141 }, + { 0x8fc6856, 0x28e3df1, 0x16d003f, 0xbec03f8, 0xff39ebd, 0x2bd705b, + 0x2d776d3, 0x1dcb53b, 0x5c0e7ce, 0xabafa7d, 0x4a53332, 0x5b9c8c2, + 0x9d90214, 0xe9f90d9, 0xc129690, 0x789747e } + }, + { + { 0x54e2dfa, 0x94d3c39, 0xafb2a8f, 0x919f406, 0x34e3927, 0x159ef05, + 0xa165c37, 0xcdb4d14, 0x288f337, 0xa23e5e8, 0x0f90242, 0x95867c0, + 0xe34e781, 0x2528150, 0x6657b95, 0x104e501 }, + { 0xbcdda24, 0x695a6c9, 0x23eb5fa, 0x609b995, 0x16a60f8, 0xcbce4f5, + 0xf084a29, 0xec63f7d, 0x20c811f, 0x3075ada, 0x8c716a1, 0x129a192, + 0xcd4cd4a, 0xd65f4d4, 0x62188be, 0xe18fa9c } + }, + { + { 0xbac60e3, 0x1672757, 0x577144b, 0x525b3b9, 0x887055b, 0x38fc997, + 0x31e4408, 0x7a77126, 0xcba2fcf, 0x884f173, 0x5962ac0, 0x783cbdc, + 0x22287dc, 0x4f3ed0a, 0x50e20e6, 0x8a73e34 }, + { 0xd764583, 0xe7a1cd0, 0x0d58ee6, 0x8997d8d, 0xaa13ed6, 0x0ea08e9, + 0xcf363cb, 0xed478d0, 0x5b37bf4, 0x068523d, 0x783f13c, 0x8b5a9e8, + 0x87528a9, 0xde47bbd, 0xcaec313, 0xd6499cc } + }, +}, +{ + { + { 0xe09859d, 0x54781bb, 0x7f5e648, 0x89b6e06, 0x7075824, 0xb006dfe, + 0x0717f68, 0x1731660, 0x0b4efe2, 0x9c86554, 0x5e30d8e, 0xdbdb257, + 0x3b4d50f, 0xa6a5db1, 0xfa47beb, 0x3b5662c }, + { 0x89d4a59, 0x9d4091f, 0x550a7dc, 0x790517b, 0xc52965e, 0x19eae96, + 0xb5ed7a4, 0x1a7b3c5, 0xeb16541, 0x19e9ac6, 0xef66852, 0x5f6262f, + 0xc4cda27, 0x1b83091, 0x3bf742b, 0xa4adf6f } + }, + { + { 0xa5100e7, 0x8cc2365, 0x8592422, 0x3026f50, 0x3d714d0, 0xa4de79a, + 0x90fcb30, 0xefa0d3f, 0x474ada0, 0x126d559, 0xc94350a, 0xd68fa77, + 0x0c7cb45, 0xfa80e57, 0x3985fbf, 0xe042bb8 }, + { 0xfe13dba, 0x51c80f1, 0xcf055d7, 0xeace234, 0x73f95f7, 0x6b8197b, + 0xdcdbe89, 0x9ca5a89, 0xdfd9896, 0x2124d5f, 0x9e7ca37, 0x7c69556, + 0x8babb37, 0x58e806a, 0xbaf99ce, 0x91b4cc7 } + }, + { + { 0x197e968, 0x874e253, 0x3160668, 0x36277f5, 0x8b95dbe, 0x0b65dda, + 0xf0872a1, 0x477a792, 0x314268d, 0x03a7e3a, 0x0c805c7, 0xa96c842, + 0xb7bc4a8, 0xb941968, 0x75db390, 0x79dce30 }, + { 0x6f4cc14, 0x577d4ef, 0xb5d1107, 0x5b0d205, 0x9f93624, 0x64ff20f, + 0x5034a2f, 0x0b15e31, 0x8b6f35c, 0x3a0f6bb, 0xe0d0ec5, 0x0399a84, + 0x0d5d521, 0xd0e5823, 0xcb1dd54, 0xdeb3da1 } + }, + { + { 0x182401a, 0x24684ae, 0x21a706f, 0x0b79c1c, 0xd8998af, 0xe1d81f8, + 0x4bb069f, 0xadf870f, 0xf3dd7aa, 0xd57f85c, 0xe4a40f8, 0x62d8e06, + 0x8b55aa1, 0x0c5228c, 0xa9c0a1a, 0xc34244a }, + { 0x68f544e, 0xb5c6cf9, 0xde23ab7, 0xa560533, 0x47c690c, 0xaa55120, + 0x12aaaa6, 0x20eda5b, 0x751a6a0, 0xea0a49a, 0x2baa272, 0x6d6cfff, + 0xbf4c28a, 0x95b756e, 0xe6178a4, 0xd747074 } + }, + { + { 0x221a94b, 0xa27b453, 0xe635f20, 0xd56ad13, 0x8c95117, 0x03574b0, + 0xed30b70, 0xf0ee953, 0x957796f, 0xb48d733, 0x58c336b, 0xf5d9583, + 0x82db529, 0x6170cd8, 0xec9d1ea, 0xcd3ef00 }, + { 0xe4d105f, 0xd1bea0d, 0xad6a559, 0xd2d670f, 0x52f9690, 0x652d012, + 0xc2529b0, 0x5f51fb2, 0xe89df2a, 0x5e88bf0, 0xcd686e4, 0x9a90684, + 0x882c7a1, 0xf519ccd, 0xc2f4d37, 0x933a0df } + }, + { + { 0x3f66938, 0x0720a9f, 0xd8149df, 0x99356b6, 0xa3d7f61, 0xb89c419, + 0x4ba6e31, 0xe658134, 0xab936c8, 0xd130561, 0x40dbef1, 0x0625f6c, + 0xb6bb847, 0x7b2d6a2, 0x84d506b, 0x3ca8b29 }, + { 0xfb011b0, 0x6bf729a, 0x33448c9, 0x01c3078, 0x0837420, 0x6ae9508, + 0xa207fb8, 0xf781a8d, 0x57562a9, 0xcc54d58, 0x858c5ab, 0xc9b7364, + 0x359908f, 0xdfb5035, 0x9631138, 0x8bf77fd } + }, + { + { 0xc13fbb1, 0xf523365, 0x9993ed5, 0x88532ea, 0x5a73492, 0x5318b02, + 0xe5a8f3c, 0x94bff5c, 0x306c2a0, 0x73f9e61, 0xf2668a3, 0x00abbac, + 0x076237d, 0x23ce332, 0x34c0f9b, 0xc867f17 }, + { 0xcfd2136, 0x1e50995, 0xb2b70f8, 0x0026a6e, 0x5077a7d, 0x66cb184, + 0xa3b498e, 0xc31b2b8, 0x260ec86, 0xc12035b, 0xe1b3df0, 0x1cbee81, + 0x8d55a42, 0xfd7b804, 0xf47a8c8, 0x912a41c } + }, + { + { 0x9e157e3, 0xab9ffe7, 0x44dc158, 0x9cfe46d, 0x8a4a3ef, 0x435551c, + 0x3b7e3a8, 0x638acc0, 0x49954a7, 0x08a4ebd, 0x13194f7, 0x295390c, + 0x253892a, 0x3a2b68b, 0x25d5b11, 0xc1662c2 }, + { 0x3a5d2bb, 0xcfba072, 0xcc327c9, 0xffaf6d3, 0xc67e254, 0x6c6314b, + 0x2f32208, 0x6661631, 0xbea72e1, 0xf780f97, 0x002122f, 0x495af40, + 0x7578a99, 0x3562f24, 0x77ce51e, 0x5f479a3 } + }, +}, +{ + { + { 0x1a82a12, 0x91a5884, 0x80f3a62, 0xa754175, 0xf73417a, 0x399009f, + 0x0a8c5cd, 0x2db1fb9, 0xc046d51, 0x82c8912, 0x8f18274, 0x0a3f577, + 0x26ccae2, 0x2ad0ede, 0x8a4e9c2, 0x7d6bd8b }, + { 0x4b3de44, 0xaa0d797, 0x96ac9bb, 0xf8658b9, 0x5f6c334, 0x31e7be2, + 0x4df12c9, 0x23836ce, 0x59eb5c9, 0x029027b, 0x5b8649d, 0x2f22531, + 0xd907162, 0xa0fdf03, 0x9e80226, 0x101d9df } + }, + { + { 0x9a90835, 0xf12037a, 0xf0222a7, 0xd2d0882, 0xc3814e2, 0xeaf8d40, + 0x8b8146b, 0xa986dc6, 0x8504653, 0x147a331, 0x2feaf67, 0x734e003, + 0x602bec5, 0x6f27bbf, 0x6a688f3, 0xa1e21f1 }, + { 0x73c4ae5, 0x5a8eeab, 0xe70b412, 0x4dbaddb, 0xcfd2af1, 0x871ceba, + 0x7d7a286, 0x1860382, 0xb5bb401, 0x024059d, 0x3c39b73, 0x2557c09, + 0x6681697, 0xfc5a711, 0x891b57c, 0xf881c0f } + }, + { + { 0x8ea191a, 0x3c443f1, 0xd700ad0, 0x76faa58, 0xbe7fcbf, 0x6fe6cfa, + 0x8990ef7, 0xaefc528, 0x80004cc, 0x44e30fa, 0x6d8ef85, 0xc744adc, + 0x912df70, 0xafcd931, 0x572a6d8, 0xf62a9d1 }, + { 0x3219f27, 0x47158a0, 0xad73136, 0x76fb27e, 0xcc2d614, 0x41bb2ad, + 0xde1ec21, 0x8858cb9, 0x5f15866, 0xab402c4, 0xbc82bbf, 0x6675d5b, + 0xf1b28d3, 0x4ee9dd6, 0xe373c17, 0x875884f } + }, + { + { 0x2a67d36, 0x17806dd, 0x32c9ec1, 0xaa23a86, 0xfc1ee55, 0xd914126, + 0x653701b, 0xbf8f7bd, 0xea71367, 0x9b0111a, 0xa98e417, 0x61fd4ab, + 0x561c5a5, 0xeb45298, 0xe7af394, 0x2187b0a }, + { 0x1616dde, 0x71f12db, 0x07da7b4, 0x0617609, 0x02ddb04, 0x414d376, + 0x286fb58, 0x1100be7, 0x6f0d95b, 0xd7cf88d, 0x746d703, 0x8539d23, + 0x4e23d73, 0xdccc9d6, 0xec89680, 0xaeef1d2 } + }, + { + { 0x336508d, 0x82ccf1a, 0x5bad150, 0xa128c1f, 0x29a188d, 0x551d8c0, + 0x771404f, 0xef13dd4, 0xc37b993, 0xdd67696, 0x0dddad2, 0x428c0e2, + 0x038c94c, 0x222278d, 0x078e3f2, 0x1a24a51 }, + { 0xedb0db9, 0xd297fe6, 0x8251a87, 0x00988d2, 0xbfaa0d7, 0xbb946f8, + 0xdf45ea0, 0x380f7b9, 0xafccf5e, 0x8526415, 0xe9ec7bc, 0x909bfbf, + 0x124755c, 0x2ed7093, 0x89404e2, 0x4368028 } + }, + { + { 0x36d9ef1, 0x21b9fa0, 0xe433526, 0xfd64b7c, 0x6544849, 0xd9d7eb7, + 0xd5b54b3, 0x201620c, 0xbb61159, 0x25fab3d, 0xc53e0d3, 0x90d4eb0, + 0x9e74772, 0xba09831, 0xec1681c, 0x8749658 }, + { 0xfec316b, 0xa354349, 0xa743ea2, 0x639a9b1, 0x37c50e6, 0x2e514ca, + 0xdbaf6c5, 0x9f4a4fd, 0x6f511c9, 0x0df87ef, 0x0c00d95, 0xadd4cef, + 0xaa1433f, 0x401c0eb, 0xbb38af9, 0x3c3a59e } + }, + { + { 0xf0e7dca, 0x8706245, 0x3fb29ca, 0xad238cd, 0x9b7d8f0, 0x0330443, + 0x154f495, 0xfdcd6e6, 0x7d4ad09, 0xc67e24a, 0x5438390, 0x1b209e8, + 0xb0c211e, 0xf893b81, 0x7e11e36, 0x1aa86f0 }, + { 0xedea8b1, 0x2cca3ff, 0x3b306cd, 0x7eedd07, 0x12ee222, 0x78e37bc, + 0xbc42a1d, 0x257870b, 0x1fbd397, 0x5fb2bb9, 0x09d6c60, 0x4702470, + 0x20bdc36, 0x11748a3, 0x04280e8, 0x3ff24dc } + }, + { + { 0x9839b52, 0x0eb1c67, 0xacfbd32, 0x5bcca27, 0x74898e3, 0xb506c16, + 0x2489e5e, 0x37d662e, 0xf694887, 0x8dc0731, 0xf43f1dc, 0x571149e, + 0x66d63dc, 0x6430a37, 0xb50dd70, 0x0d2640e }, + { 0x3b2675b, 0x2b56149, 0x88c604f, 0x1b48065, 0xaafbabc, 0x55c86a8, + 0x608aaba, 0xa7b9447, 0x04cad8c, 0xa42f635, 0xcee7788, 0x0f72b1d, + 0x755d99a, 0x1d68374, 0x5be2531, 0xd7cdd8f } + }, +}, +{ + { + { 0xbcdfee1, 0x67873bd, 0xfcd0a3f, 0xa5a0c0a, 0x3cfa3d4, 0x59389f9, + 0xe1c865c, 0x14e945c, 0x1d588cc, 0x62d2f8e, 0x8e228b4, 0xfd02f8a, + 0xb42b649, 0x208f791, 0xab397ad, 0x0e0dff1 }, + { 0x0bc6eb1, 0x30ac3d9, 0x5f313bb, 0xf14f16a, 0xe2a0ad2, 0x70fa447, + 0x5a0db84, 0x6e40685, 0xe32e1e7, 0xd52282b, 0x15ca330, 0x315a02a, + 0x867c2fe, 0x9a57a70, 0x0054923, 0x55f0765 } + }, + { + { 0xc0cf08f, 0x2d729f6, 0xebaf57f, 0x6b80138, 0x0200c25, 0x6285bcc, + 0x2cd2ac7, 0xee84519, 0x922778a, 0x28fce4d, 0xcd1011c, 0x761325c, + 0x5100e47, 0xd01f247, 0xc60d8e1, 0xc7a1665 }, + { 0x7ceb064, 0x950966d, 0x78420db, 0x0a88e85, 0xe096f29, 0x44f2cfc, + 0x640f1d2, 0x9d9325f, 0xd2426f1, 0x6a4a81f, 0x9c905ac, 0x3ed6b18, + 0x008854d, 0xba3c0e2, 0xa0d321b, 0x1df0bd6 } + }, + { + { 0x3feb1e7, 0x0117ad6, 0xf1ae02f, 0xa058ba2, 0x31b3f06, 0x5eee5aa, + 0xafacd4d, 0x540d9d4, 0x1571d91, 0x38992f4, 0xbf2c7de, 0xef2738e, + 0x92a798d, 0x28bfcab, 0x2286733, 0x37c7c5d }, + { 0x6470df0, 0xb99936e, 0x8af6a42, 0x3d762d5, 0xc74eec5, 0xa8c357a, + 0xf13afbc, 0x9917beb, 0xf2dc073, 0x28f0941, 0x6ce7df7, 0x306abf3, + 0xd6973c8, 0xa3c5f6f, 0x3677632, 0x640209b } + }, + { + { 0xe23aef7, 0xee872a2, 0xeb9b08e, 0xb497b6f, 0x3f33c63, 0xfb94d97, + 0x2b32315, 0x9ea1ff4, 0x49a4166, 0x537b492, 0xab4f8be, 0x89c7fe6, + 0xdad8f0f, 0xf68007f, 0x71b8474, 0xe56ef0b }, + { 0x3f333f9, 0x478b2e8, 0xb2607f5, 0x144e718, 0xa4c7ab5, 0x13aa605, + 0x1d0730d, 0xfc1fc99, 0x5ab3ea1, 0xe7a0437, 0x306d8d3, 0xc59986a, + 0x702a8b1, 0x24f6111, 0xe040ad2, 0x7741394 } + }, + { + { 0x60723a7, 0x34c6a25, 0xf4ea691, 0x8aabd0d, 0x5d7497f, 0x9d676a5, + 0x7d91fa4, 0x12c0957, 0x6479284, 0x581c7a8, 0xf4fd449, 0xa54f3da, + 0x4ef44cf, 0x2f89f3c, 0xc9ec97c, 0xfc266b5 }, + { 0x88b142a, 0xfcd3fbe, 0x4bd69c1, 0x9f3109f, 0xb5f5a6a, 0x08839c0, + 0x2e68303, 0x63ca850, 0xbba0a74, 0x2f0628d, 0x5d56b54, 0x743cccf, + 0x13e09fd, 0xbd4b066, 0xde2ba3e, 0x7a8415b } + }, + { + { 0xc076ab2, 0x2234a3b, 0x4977a98, 0xd6953e5, 0x31ebe2e, 0xc122158, + 0xbad78e2, 0x632145f, 0xa5c4b08, 0xd7ba78a, 0x998e32a, 0x6f4ea71, + 0x3485a63, 0x25900d2, 0x6a5176f, 0x97ac628 }, + { 0x1093f7b, 0x5df9118, 0xc844563, 0x2bf9829, 0x6272449, 0x525d99d, + 0xb5c8a18, 0x4281cb5, 0x0544a08, 0x35df278, 0xbaeb8f4, 0xf4c3d2d, + 0x5230447, 0xc7ff317, 0x5d2fbff, 0x6b4d764 } + }, + { + { 0x2b0c9cb, 0x4837f80, 0x8ce8418, 0xb65f816, 0x9fc1428, 0xdf66ea9, + 0x04ea7e8, 0x9788ee8, 0x8334e3c, 0x9eae900, 0xd6ba1b6, 0xbc91058, + 0xd7064b6, 0x634aba1, 0x397b368, 0x12d9bb3 }, + { 0xc413aa8, 0x0645c85, 0xac6b5e3, 0xb09dea6, 0x289a50b, 0x29a620d, + 0xbbcceb1, 0x104db3b, 0x87b3309, 0x42e4792, 0xec97f01, 0xdfc373e, + 0xb93f84e, 0xe953f94, 0x052dfbf, 0x3274b7f } + }, + { + { 0x1bd6fa9, 0x9d5670a, 0xdb6c4d4, 0xec42fc9, 0x1b42845, 0xaecd4ed, + 0x1b03549, 0x4eed90e, 0xbbab1fa, 0xeb3225c, 0x28a2816, 0x5345e1d, + 0x0b77d2a, 0x3741cfa, 0x7ea8caa, 0x712b19f }, + { 0x661853e, 0x42e6844, 0xe4a6e5d, 0x4cf4126, 0xc3649f6, 0x196a9cf, + 0xf21b6b1, 0x06621bc, 0x32e29ea, 0x887021c, 0x8c5680f, 0x5703aeb, + 0x660f6d7, 0x974be24, 0xc71864e, 0xaf09bad } + }, +}, +{ + { + { 0xa81b6d3, 0x3483535, 0xca037dc, 0x19e7301, 0x63ddfeb, 0x748cab7, + 0x6f01a38, 0xe5d87f6, 0x2795cd6, 0xbba4a5c, 0x615c36c, 0x411c5d4, + 0x706f412, 0xff48efc, 0x4b519df, 0x205bafc }, + { 0x5227110, 0xfcaa5be, 0x3ad0af0, 0x7832f46, 0x2642b1b, 0x34ef2c4, + 0x072f822, 0x7bbef7b, 0x923a616, 0x93cb0a8, 0x6d91ba7, 0x5df0236, + 0x42f7d21, 0x5da94f1, 0xa14e891, 0x3478298 } + }, + { + { 0xc831d39, 0xad79a0f, 0x4803c44, 0x24d1948, 0x86aeeb2, 0x4f8a864, + 0x926f6b9, 0x0ca284b, 0x1acd7cd, 0x501829c, 0x3d12c52, 0x9f6038b, + 0xf371ef5, 0x77223ab, 0x13bf4de, 0x2e03516 }, + { 0xb4468cc, 0x7a5a4f2, 0x470ae46, 0xdcea921, 0x11be696, 0xf23b7e8, + 0x720d6fb, 0xe59ad0d, 0x2983469, 0x9eacac2, 0xc4397ee, 0x4dd4110, + 0xcbe2675, 0x4ef85bd, 0xaa7c74b, 0xe4999f7 } + }, + { + { 0x8ea1e98, 0x031838c, 0x04d96a2, 0x539b383, 0x163956e, 0x5fbdef0, + 0xce3f52a, 0x6bd4d35, 0x55e897f, 0xe538c23, 0x472dd3f, 0x6078d3a, + 0xca9f452, 0x590241e, 0xfd7fc07, 0x2bc8495 }, + { 0xead4c8c, 0x23d0c89, 0x601c66e, 0x1ea55a9, 0x4f5b833, 0x41493c9, + 0xaa5a978, 0xc49a300, 0x0c69594, 0xc98bdc9, 0xccbdc8c, 0x4e44ced, + 0x6adccbf, 0xb0d4e91, 0x32c37ae, 0xd56e36b } + }, + { + { 0x5b93152, 0x052bd40, 0x4f1dbfa, 0x688b1d4, 0xbe5cc5f, 0xe77ba1a, + 0xa6ac543, 0x11f8a38, 0xe4bb988, 0x3355fd6, 0xf8dffb4, 0xdf29c5a, + 0x81f20ee, 0x751f589, 0xda9b7fb, 0x22a0f74 }, + { 0x6397b49, 0xec8f2bc, 0x3639201, 0xff59fc9, 0xa048264, 0xb7f130a, + 0xafdc4cc, 0xe156a63, 0xb13acaf, 0x0fd7c34, 0x0cb4999, 0x87698d4, + 0x7f26f24, 0x6d6ecae, 0x0f296e2, 0xae51fad } + }, + { + { 0xdd0f58d, 0xd0ad5eb, 0x5c67880, 0x6ec6a2c, 0x9af1e0f, 0xe1ce034, + 0x3996d32, 0x0801485, 0x5e69d20, 0x59af51e, 0xaa48ecf, 0x0ef743a, + 0x7dafcb0, 0x8d3d2ea, 0x89189b6, 0x4ac4fad }, + { 0xeae97f1, 0x92d91c2, 0x62b4662, 0xef5eca2, 0xb38b10a, 0x440b213, + 0xfc661da, 0xec90187, 0xf64cf8d, 0x85f3f25, 0x457ad1b, 0xcee53ca, + 0xf517672, 0x8deed4b, 0x4761828, 0x7706fb3 } + }, + { + { 0x17494fe, 0x1577d91, 0x2fd7239, 0x52d29be, 0x0186d37, 0x9a0eef0, + 0x27fe108, 0x241d0f5, 0xe6fb59f, 0x42824ba, 0x0d48c25, 0xb8d33df, + 0x47af4b0, 0xfffdb0a, 0x073b0b6, 0x534c601 }, + { 0x51c033b, 0xe6df359, 0x86c0f94, 0x3e1002b, 0x48fb9b6, 0xa7cb555, + 0xa7bbff8, 0x999818b, 0x84d8bf2, 0xe4ba3d6, 0x6358f0a, 0x53dbb32, + 0xf2568e8, 0xeebc1e2, 0xb3e0f68, 0xc6917eb } + }, + { + { 0x19f8d13, 0xbe1bbfc, 0x2d4795c, 0xc3951b6, 0xed535a9, 0x9371c49, + 0x68cebea, 0x77c389f, 0xa141d0e, 0xfc1a947, 0xde44f8b, 0x4b48d7a, + 0x8580a26, 0x3db1f05, 0x258b5fc, 0xeed1466 }, + { 0x9854b21, 0x5daa4a1, 0x1ab1ead, 0x5bfa46f, 0x59957eb, 0xc152e35, + 0xea48ada, 0xdc84277, 0xfc169b5, 0x68709cf, 0x720e617, 0xde50ce3, + 0xdd9a832, 0xe42f262, 0x2d6ce29, 0xddffd4d } + }, + { + { 0x8fa0a56, 0xd5ba557, 0xfafaf4c, 0x0d7d0f1, 0x38b63ed, 0x7666e41, + 0x5d87f02, 0x04e6513, 0xc958f32, 0xdca8866, 0x3ce2686, 0xaa8486d, + 0xf1cbcd3, 0xe3785ca, 0x03c8335, 0x8a9b114 }, + { 0x2e0ef60, 0x5c1dca2, 0x7d3fb20, 0x775af5b, 0x2b373a8, 0xe690ffc, + 0x28330e6, 0x30fe15d, 0xdd0f393, 0x8a1022b, 0x966a828, 0x6bd7364, + 0x949208a, 0x8d4b154, 0xb9d9828, 0xfb38c6b } + }, +}, +{ + { + { 0x0340ac2, 0x6d19764, 0xecab5ff, 0x969f473, 0xc458e42, 0xead46f7, + 0x1d00eed, 0x168646a, 0xe0ce0cf, 0xf70c878, 0x8d8d15a, 0xa7291d3, + 0xfdd10cc, 0x92cf916, 0x24f86d5, 0x6d36134 }, + { 0x2d5c4b4, 0xba50d17, 0x4626f15, 0xe0af502, 0xd76098a, 0x76f3809, + 0xd6caaa8, 0x433dc27, 0x70d97a7, 0x72dc67a, 0xf5c7355, 0x935b360, + 0x179bb31, 0xdbaac93, 0x7ed1a33, 0x7673848 } + }, + { + { 0x8f9fa0d, 0x8d1ca66, 0xa02f2bf, 0x4ed95d8, 0xf630d7b, 0xd19fc79, + 0xf46fa51, 0x0448ec4, 0x623bf3f, 0xb371dd8, 0xd650e94, 0xe94fabc, + 0xcd90a70, 0x3af3fca, 0x03ce3b7, 0x0f720c4 }, + { 0xd636c3b, 0x590814c, 0x4469945, 0xcf6928d, 0x484a4c6, 0x5843aaf, + 0xf9b4722, 0xb5a4c1a, 0x6cfb2f9, 0x25116b3, 0x32c2640, 0xf248cf0, + 0x27412a1, 0x8cd059e, 0x862fc5d, 0x866d536 } + }, + { + { 0x6de4a2e, 0x156e62f, 0xaafcc78, 0x0365af7, 0x19e925e, 0x65c8618, + 0xf8b2191, 0x4db5c01, 0xad564fa, 0x1fd26d1, 0x19c8610, 0x16bbc53, + 0x815f262, 0x0718eef, 0x27f83d1, 0x8684f47 }, + { 0xb0f48db, 0xa30fd28, 0x6ab8278, 0x6fef506, 0x1a652df, 0xd164e77, + 0xc6ebc8c, 0x5a486f3, 0xdc3132b, 0xb68b498, 0xd73323f, 0x264b6ef, + 0x69b2262, 0xc261eb6, 0x2a35748, 0xd17015f } + }, + { + { 0x7c4bb1d, 0x4241f65, 0xf5187c4, 0x5671702, 0x3973753, 0x8a9449f, + 0xcc0c0cd, 0x272f772, 0x58e280c, 0x1b7efee, 0x4b5ee9c, 0x7b32349, + 0x31142a5, 0xf23af47, 0xd62cc9e, 0x80c0e1d }, + { 0x675ffe3, 0xcbc05bf, 0x258ce3c, 0x66215cf, 0x28c9110, 0xc5d2239, + 0x2a69bc2, 0x30e12a3, 0x76a9f48, 0x5ef5e80, 0x2329d5f, 0x77964ed, + 0x8a72cf2, 0xdf81ba5, 0x6e1b365, 0x38ea70d } + }, + { + { 0x2f75c80, 0x1b18680, 0x698665a, 0x0c153a0, 0x522e8dd, 0x6f5a7fe, + 0x8ddfc27, 0x9673866, 0x0d3bdce, 0x7e421d5, 0x25001b2, 0x2d737cf, + 0x0e8490c, 0x568840f, 0xe30c8da, 0xea2610b }, + { 0x9561fd4, 0xe7b1bc0, 0x26decb0, 0xeda786c, 0x6a76160, 0x2236990, + 0x78a3da3, 0x371c714, 0x2a2d9bf, 0x1db8fce, 0x3292f92, 0x59d7b84, + 0x5a665f9, 0x8097af9, 0x542b7a9, 0x7cb4662 } + }, + { + { 0xc6b0c2f, 0xa5c53ae, 0x7312d84, 0xc4b8732, 0xc732736, 0xfc374cb, + 0x9310cc0, 0xa8d78fe, 0x65d1752, 0xd980e86, 0x6004727, 0xa62692d, + 0x0146220, 0x5d07928, 0x860fea5, 0xbd1fedb }, + { 0xb35d111, 0xcbc4f8a, 0x3e32f77, 0x5ba8cdf, 0xb614b93, 0xd5b71ad, + 0x2f8808d, 0x7b3a2df, 0x6ef2721, 0x09b89c2, 0x47c3030, 0x55a5054, + 0x2986ae6, 0x2104431, 0x2367d4c, 0x427a011 } + }, + { + { 0xc1942d8, 0xe9fe256, 0x96e3546, 0x9e7377d, 0xb0c1744, 0x43e734c, + 0x211fbca, 0x5f46821, 0x32b6203, 0x44f83dc, 0x6ad1d96, 0x8451308, + 0x2fbb455, 0x54dd519, 0x2f10089, 0xc2a1822 }, + { 0x1855bfa, 0x01055a2, 0x77078b4, 0x9e6d7b4, 0x30cea0e, 0x3f8df6d, + 0x32973f7, 0x81c2150, 0xc0b3d40, 0x17dd761, 0x50d0abe, 0x040424c, + 0x783deab, 0x5599413, 0x8f3146f, 0xde9271e } + }, + { + { 0xaf4a11d, 0x5edfd25, 0x7846783, 0x3a3c530, 0x73edd31, 0xb200868, + 0xfe0eef8, 0x74e00ec, 0x3dd78c7, 0xba65d2f, 0x71999f1, 0xab13643, + 0xde9a7e8, 0xfa9be5d, 0x87a8609, 0xeb146ce }, + { 0x65353e9, 0x76afd65, 0xd51ba1c, 0xfa7023d, 0x37ede4f, 0x7a09f22, + 0x0ba7a1b, 0xca08576, 0xb99950a, 0xd973882, 0xea5057a, 0xe894266, + 0x7f55e49, 0xd01c421, 0x5555679, 0x69cfb9c } + }, +}, +{ + { + { 0xc5d631a, 0x67867e7, 0x5bcf47b, 0x1de88c5, 0xafd1352, 0x8366d06, + 0x6e20337, 0xd7dbdef, 0x1253ec7, 0xb0f9e2f, 0x10ad240, 0x1be9845, + 0xf4a6118, 0x63ec533, 0x96ce633, 0xd5e4c5b }, + { 0x4df4a25, 0x1d0b6c3, 0x5a1b554, 0xef9486a, 0x47b6ef3, 0x2f0e59e, + 0x2ff84d7, 0x4d8042f, 0xda359c9, 0x3e74aa3, 0xd21c160, 0x1baa16f, + 0x0191cba, 0xb4cff21, 0xebc6472, 0x50032d8 } + }, + { + { 0x1fc1b13, 0xb6833e0, 0x1a5ad8f, 0x8a8b7ba, 0x622b820, 0xc0cafa2, + 0x738ed20, 0xc6663af, 0x8b18f97, 0xd894486, 0x774fbe4, 0xcf0c1f9, + 0x5be814f, 0xeedd435, 0xb57e543, 0xd81c02d }, + { 0x310bad8, 0x5e32afc, 0x9b813d1, 0x065bc81, 0x3142795, 0x8efc5fc, + 0x732d59c, 0x5006514, 0x2b5a3ce, 0x91e39df, 0xfaf4204, 0x2ad4477, + 0x4d9bd4f, 0x1a96b18, 0xa4d9c07, 0xc3fee95 } + }, + { + { 0x6b4ba61, 0xfac7df0, 0x061aaef, 0xa6ed551, 0x133f609, 0x35aa2d6, + 0x20ed13d, 0x420cfba, 0xea03d0c, 0x861c63e, 0xf936d6e, 0x75f0c56, + 0x3d9a3d5, 0xa25f68f, 0xcd9f66e, 0xba0b7fe }, + { 0x4680772, 0x292e135, 0xa73f405, 0x6f6a2db, 0x24ea9e4, 0xca6add9, + 0x268daaa, 0x81cfd61, 0xe6f147a, 0x7a4cb6c, 0xbded8f5, 0x8ec3454, + 0x11d61cb, 0xc8a893b, 0x7656022, 0x2256ffc } + }, + { + { 0x575cb78, 0x6b33271, 0xadcd23e, 0x560d305, 0xd6d834b, 0xeedbd3a, + 0x5a31e27, 0x614a64a, 0x47ee0c8, 0xe40b476, 0x8bd7c2c, 0x8ef4ff6, + 0x0b77727, 0xa5297fc, 0xbaf88ad, 0x8759208 }, + { 0x918df68, 0x86cfe64, 0xcdd882e, 0x9d60a73, 0xb953014, 0x546b642, + 0x8bbef55, 0xbaceae3, 0xf1c3467, 0xdf58e43, 0xe9f9bab, 0x99a83fe, + 0x57a4a8b, 0xcd52cbf, 0x8ae36ec, 0xf744e96 } + }, + { + { 0xa607124, 0xb945869, 0x440e6f6, 0x810dbe9, 0x738e381, 0x9911e60, + 0x343b80b, 0x51df68c, 0xf7a3f39, 0xe424336, 0x989015c, 0x2d32acb, + 0x31019e8, 0xa69b149, 0xec12f93, 0x8a31a38 }, + { 0x97c916a, 0x0d0d369, 0x8885372, 0xdc95f3b, 0x3549040, 0xcf1a261, + 0xabe95a2, 0x60f6f5e, 0xe141325, 0xa909e9f, 0x355c865, 0x7d598f2, + 0x931a9c9, 0x70c6442, 0xb423850, 0x2354a85 } + }, + { + { 0x97f9619, 0x4cdd224, 0xc22162e, 0x4776fff, 0x0cd31c2, 0xee5ec33, + 0xf209bb8, 0x7c04c10, 0x579e211, 0x35bbfde, 0x15cdfc2, 0x0e38325, + 0xe26ffa7, 0x657e6d3, 0xc65c604, 0xc66a7c3 }, + { 0xb45e567, 0x322acd7, 0x296db9b, 0x1589cf0, 0xba1db73, 0x1fd0bd3, + 0x9337a40, 0xe882610, 0xb3035c7, 0xf505a50, 0x6ed08d7, 0x4d5af06, + 0x5eda400, 0xb3c376b, 0x1944748, 0x9c7b700 } + }, + { + { 0x70c3716, 0xd768325, 0xdd540e0, 0xda62af0, 0x6580fea, 0x76b155d, + 0x32b5464, 0x4f42acc, 0x3f5b72b, 0x881bb60, 0xe68b9ba, 0x09c130e, + 0x5c50342, 0x37ede3b, 0xfd15e7d, 0xce61a9c }, + { 0x72605d0, 0xfff1d85, 0x062abc2, 0x62ac2d3, 0xfbe43dd, 0xa85e02e, + 0xa947020, 0x859d2ba, 0x111c20b, 0x2ebc8a9, 0xa656f66, 0x7f590a7, + 0x16b21a6, 0x0e13843, 0x00c7db6, 0x29b30c5 } + }, + { + { 0x906b8de, 0x61e55e2, 0x949974d, 0x6a97e96, 0x26eef67, 0x24b52b5, + 0x1aa595a, 0x512f536, 0x3c48fcb, 0x81cc7b8, 0x28115ad, 0xa64af23, + 0x3d44b8e, 0x9edf6f9, 0x1fe22e3, 0x68d7f7c }, + { 0x520d151, 0x2b2116a, 0x6aa3efb, 0x66a0b7d, 0x9b0f791, 0x48ae70a, + 0x037db88, 0xcf12174, 0x317d9f3, 0x36868cd, 0x22fc344, 0xb573059, + 0x46a5d23, 0xbaa8526, 0x37fc10d, 0xad65691 } + }, +}, +{ + { + { 0x12c78d5, 0xcf8e5f5, 0x805cdbd, 0xeb94d98, 0x2ab50b5, 0xad1dcdf, + 0xf33cd31, 0xf33c136, 0x10aeff5, 0x0d6226b, 0xf2f8fc5, 0xf7ff493, + 0xdf57165, 0x7e520d4, 0x05271a7, 0x41fbae5 }, + { 0x76480ba, 0x72c8987, 0x25f4523, 0x2608359, 0x49f5f01, 0xed36b8d, + 0xf3d49eb, 0x3bc1dce, 0x4940322, 0x30c1c1a, 0x7e0f731, 0x78c1cda, + 0x6d05a31, 0x51f2dc8, 0x07f3522, 0x57b0aa8 } + }, + { + { 0x71f88bc, 0x7ab628e, 0x8018f21, 0xcf585f3, 0x13d64f6, 0xdbbe3a4, + 0xec493a5, 0x0f86df1, 0x7725de9, 0x8355e6c, 0xe00fe1e, 0x3954ffe, + 0x9924e32, 0xbb8978f, 0x7812714, 0x1c19298 }, + { 0xaabca8b, 0x7c4ce3e, 0x9bf7019, 0xf861eb5, 0x682e541, 0x31a84fc, + 0xacd1b92, 0x2307ca9, 0x4bf2842, 0x6f8b6ce, 0xcb9f9a9, 0xde252ac, + 0x93c46d1, 0x7f0611d, 0x751dc98, 0x8e2bd80 } + }, + { + { 0xe27d54b, 0xf2fd8fb, 0xc248071, 0x2a1e37e, 0xab8f49a, 0x2fcc888, + 0xc18a9e5, 0x42c62a3, 0x70b2446, 0xe302908, 0xc5ac55d, 0x90277fa, + 0xd6dde41, 0x8d97d56, 0x5db04fe, 0xf4cf8a9 }, + { 0xd30d077, 0x3e280f5, 0x3cb3293, 0x2c90307, 0x24eb0dd, 0xe0be2ac, + 0x8bcb4f0, 0xa2d1a49, 0xcd0cd45, 0x16db466, 0x9a80232, 0x3b28aa7, + 0x17b008e, 0xdd7e52f, 0x868e4da, 0x20685f2 } + }, + { + { 0x7c7a486, 0x0a68c14, 0xc429633, 0xd8ef234, 0xffe7506, 0x470667b, + 0x8828d51, 0x55a13c8, 0x2e44bef, 0x5f32741, 0x5929f92, 0x537d92a, + 0x31c5cd5, 0x0a01d5b, 0x67eb3d7, 0xb77aa78 }, + { 0x8b82e4d, 0x36ec45f, 0xb37b199, 0x6821da0, 0xd7fa94e, 0x8af37aa, + 0x1085010, 0xf020642, 0x7e56851, 0x9b88678, 0x52948ce, 0x35f3944, + 0xafc1361, 0x125c2ba, 0x453e332, 0x8a57d0e } + }, + { + { 0x8043664, 0xefe9948, 0xdb1aa55, 0xb8b8509, 0x332523f, 0x1a2e5a9, + 0x1045c0f, 0x5e255dd, 0x7ae7180, 0xe68dd8a, 0x45bf532, 0x55f1cf3, + 0xe63a716, 0xe00722e, 0x6116bac, 0xd1c2138 }, + { 0x1c6d1f4, 0x626221f, 0x3773278, 0x240b830, 0x88def16, 0xe393a0d, + 0xca0495c, 0x229266e, 0xd3e4608, 0x7b5c6c9, 0x7927190, 0xdc559cb, + 0xc7b3c57, 0x06afe42, 0xb439c9b, 0x8a2ad0b } + }, + { + { 0xffc3e2f, 0xd7360fb, 0xfbd2e95, 0xf721317, 0x5748e69, 0x8cacbab, + 0x9054bb9, 0x7c89f27, 0xaa86881, 0xcbe50fa, 0x75206e4, 0x7aa05d3, + 0xc752c66, 0x1ea01bc, 0x1f2c2bc, 0x5968cde }, + { 0x09a853e, 0x487c55f, 0xe09204b, 0x82cbef1, 0xabd8670, 0xad5c492, + 0xf12dcb3, 0x7175963, 0xbf6aa06, 0x7a85762, 0xf8d5237, 0x02e5697, + 0x37c6157, 0xccf7d19, 0xc2fd59c, 0x3b14ca6 } + }, + { + { 0x1b9f77f, 0x5e610d8, 0x051b02f, 0x85876d0, 0xb8020dd, 0x5d81c63, + 0xd6ce614, 0xd0b4116, 0xaa8bf0c, 0x91810e5, 0xcbf8c66, 0xf27f91f, + 0x38480ae, 0x2e5dc5f, 0xbec7633, 0x0a13ffe }, + { 0x2bf6af8, 0x61ff649, 0x641f827, 0xe6aef2d, 0x5de5f04, 0xad5708a, + 0xcdfee20, 0xe5c3a80, 0x68fcfa2, 0x88466e2, 0xd6e1d7b, 0x8e5bb3a, + 0xed236b8, 0xa514f06, 0xa5f5274, 0x51c9c7b } + }, + { + { 0xf9bc3d8, 0xa19d228, 0x3381069, 0xf89c3f0, 0x5c3f379, 0xfee890e, + 0x32fb857, 0x3d3ef3d, 0x5b418dd, 0x3998849, 0xc46e89a, 0x6786f73, + 0x9e0f12f, 0x79691a5, 0x3bc022b, 0x76916bf }, + { 0x2cd8a0a, 0xea073b6, 0x102fdbc, 0x1fbedd4, 0xcb9d015, 0x1888b14, + 0x76655f7, 0x98f2cfd, 0x59f0494, 0xb9b5910, 0xe6986a3, 0xa3dbbe1, + 0xeaf2b04, 0xef016a5, 0xcd2d876, 0xf671ba7 } + }, +}, +{ + { + { 0x1ae05e9, 0x1dae3bf, 0x1f21fef, 0x6a02996, 0x7aec3c6, 0x95df2b9, + 0xd83189b, 0x9abbc5a, 0x2d13140, 0xaf994af, 0x86aa406, 0xc3f8846, + 0x75284c5, 0xcd77e50, 0x2a9a4d7, 0x1c1e13d }, + { 0x744b89d, 0x7f8815d, 0x2ba673e, 0xb189133, 0xd594570, 0x55ea93c, + 0xd61b041, 0x19c8a18, 0x8d2c580, 0x938ebaa, 0x05ba078, 0x9b4344d, + 0x8eaf9b7, 0x622da43, 0x9fea368, 0x809b807 } + }, + { + { 0xc33b7a2, 0x3780e51, 0x387b1c8, 0xd7a205c, 0x4be60e4, 0x79515f8, + 0x1e18277, 0xde02a8b, 0xf0d9150, 0x4645c96, 0xe0b3fd1, 0x45f8acb, + 0x9b53ac3, 0x5d532ba, 0xb0557c9, 0x7984dcd }, + { 0x8a92f01, 0x5ae5ca6, 0x9d569ca, 0xd2fbb3c, 0x0c297c1, 0x668cc57, + 0x6295e89, 0xa482943, 0xa33ad40, 0xf646bc1, 0xc3f425d, 0x066aaa4, + 0xd005de2, 0x23434cd, 0xdb35af4, 0x5aca9e9 } + }, + { + { 0x6877c56, 0x2bca35c, 0xf0ddd7d, 0xab864b4, 0x404f46c, 0x5f6aa74, + 0x539c279, 0x72be164, 0xe0283cf, 0x1b1d73e, 0xad583d9, 0xe550f46, + 0xe739ad1, 0x4ac6518, 0x8d42100, 0x6b6def7 }, + { 0xfa8468d, 0x4d36b8c, 0x5a3d7b8, 0x2cb3773, 0x5016281, 0x577f86f, + 0x9124733, 0xdb6fe5f, 0xe29e039, 0xacb6d2a, 0x580b8a1, 0x2ab8330, + 0x643b2d0, 0x130a4ac, 0x5e6884e, 0xa7996e3 } + }, + { + { 0x60a0aa8, 0x6fb6277, 0xcbe04f0, 0xe046843, 0xe6ad443, 0xc01d120, + 0xabef2fc, 0xa42a05c, 0x12ff09c, 0x6b793f1, 0xa3e5854, 0x5734ea8, + 0x775f0ad, 0xe482b36, 0xf864a34, 0x2f4f60d }, + { 0x84f2449, 0xf521c58, 0x9186a71, 0x58734a9, 0xac5eacc, 0x157f5d5, + 0x248ee61, 0x858d9a4, 0x48149c3, 0x0727e6d, 0xac9ec50, 0xd5c3eaa, + 0x20ee9b5, 0xa63a64a, 0x87be9de, 0x3f0dfc4 } + }, + { + { 0xb13e3f4, 0x836349d, 0x3e9316d, 0xebdd026, 0x324fd6c, 0x3fd61e8, + 0x0964f41, 0x85dddfa, 0x52add1b, 0x06e72de, 0x8c4a9e2, 0xb752cff, + 0xfdf09f7, 0x53b0894, 0x0bc24fd, 0xd5220ab }, + { 0xfb1981a, 0x8442b35, 0x3edd701, 0xa733a37, 0xd0ef089, 0x42b60c3, + 0x46e7bca, 0xa1b16ec, 0xa09aaf4, 0xc0df179, 0x638f3a1, 0xcd4f187, + 0x9eab1c2, 0x9af64f7, 0xd1d78e3, 0x86fed79 } + }, + { + { 0xfe29980, 0x42c8d86, 0x6575660, 0x6657b81, 0x80f92ca, 0x82d52c6, + 0x02d42be, 0x8587af1, 0x6e8bdf0, 0xb515131, 0xc333495, 0x706e2d9, + 0x9673064, 0xd53601a, 0x8219099, 0x27b1fbb }, + { 0x705f7c8, 0x3f0929d, 0xf3d6e6f, 0xff40b10, 0x026af5c, 0x673c703, + 0xe25a422, 0x2c1dce4, 0x3dad8b6, 0x5348bd7, 0xbe2c329, 0xc39b6b6, + 0xb921084, 0x47854ff, 0xb391f20, 0xb347b8b } + }, + { + { 0xeb9b774, 0x79fc841, 0xb4b6c1d, 0xf32da25, 0xfe492cb, 0xcbba76b, + 0xd623903, 0x76c51fc, 0xcf0705a, 0x114cf6f, 0x7815daf, 0x6b72049, + 0x473382e, 0x630b362, 0x9704db5, 0xbf40c3a }, + { 0xc5456eb, 0xa8a9ddc, 0x72f2dc1, 0x2b4472a, 0xd6d6ef3, 0x9874444, + 0xa0ba5ed, 0x27e8d85, 0x194849f, 0x5d225b4, 0xebaa40d, 0xe852cd6, + 0x8d4bf3f, 0xb669c24, 0x2343991, 0xa8601eb } + }, + { + { 0x59502d3, 0x8a04854, 0xe269a7b, 0xcab27ee, 0x4875ada, 0x4179307, + 0xe2405f9, 0x179e685, 0x7b28963, 0x0d7b698, 0x422a43e, 0x80c9db8, + 0xa0f43ee, 0xf5ff318, 0x4ba7aa7, 0x7a92805 }, + { 0x0c0834e, 0xa5c79fe, 0x1f849ec, 0x837ca0d, 0x628ab7b, 0xfe0d7fa, + 0x6edd19a, 0x94bcb95, 0x2226fbf, 0xa18bc93, 0xaad54a3, 0x2795379, + 0x371129e, 0xceeacf8, 0xa588be5, 0x65ca57f } + }, +}, +{ + { + { 0x2caa330, 0x7a578b5, 0xd8ca34a, 0x7c21944, 0x6447282, 0x6c0fbbb, + 0xf90b2e5, 0xa8a9957, 0x6586b71, 0xbbe1066, 0x49138a2, 0x716a902, + 0xe7ed66d, 0x2fa6034, 0x2b9916a, 0x56f77ed }, + { 0xbddefb3, 0x69f1e26, 0x8c08420, 0xa497809, 0x09bc184, 0xc3377eb, + 0xbe6dade, 0x796ce0c, 0xd103bbb, 0x3be0625, 0x992685c, 0x01be27c, + 0x7755f9f, 0xc0e2559, 0x1c0dbfa, 0x165c40d } + }, + { + { 0x659c761, 0xc63a397, 0x630fbad, 0x10a0e5b, 0x655ac56, 0xf21e8a6, + 0xc1181e2, 0xe8580fa, 0x0a84b5c, 0xbfc2d9c, 0x7afd5d1, 0x2cdbaff, + 0xf61e85a, 0x95f1182, 0x719eaf4, 0x1173e96 }, + { 0xc6de8b9, 0xc06d55e, 0xafcbcaa, 0x1b4c8eb, 0xbc2bbcd, 0x52af5cb, + 0x77bcd10, 0x564fab8, 0xae85a6e, 0xfd53a18, 0x94c712f, 0x2257859, + 0x1352121, 0x29b11d7, 0xc40491a, 0xab1cb76 } + }, + { + { 0xce32eb4, 0xb4e8ca8, 0xb250b49, 0x7e484ac, 0xa3e31a2, 0x062c6f7, + 0x625d1fc, 0x497fd83, 0x362dda7, 0x98f821c, 0x6be3111, 0xcae1f8f, + 0x5d4fa42, 0x9077e95, 0xa65855a, 0xa589971 }, + { 0x28832a9, 0xda6321d, 0x3936e9e, 0xf9ef5dc, 0xc9797ef, 0xa37f117, + 0xdb581be, 0x0eb3c80, 0xbaa0002, 0x207c5c4, 0xf38faa0, 0xc0401b5, + 0xd0f1e6e, 0xceee523, 0xd1f0045, 0x8d27a5f } + }, + { + { 0xcf0af29, 0x9411063, 0x89a6693, 0x3043857, 0x640145e, 0x9a9fb8f, + 0x54832eb, 0x7d82fe9, 0x898c520, 0xf2789e1, 0xf948dc0, 0x448b402, + 0x68996dd, 0xeca8fdf, 0xa149b2f, 0x22227e9 }, + { 0x8e62d6a, 0x63509ff, 0x8c9c57f, 0xe98d81c, 0x1fe3bed, 0xd387407, + 0x539538f, 0xf1db013, 0x48418ce, 0xb04092e, 0xd6d9d4d, 0xbbf8e76, + 0x2cec5ae, 0x2ea9cda, 0x5078fa9, 0x8414b3e } + }, + { + { 0xd68a073, 0x5ad1cdb, 0xc18b591, 0xd4cedaf, 0x8e4c1c9, 0x7826707, + 0x9ca302a, 0x9b8d920, 0x326115b, 0x3101bd2, 0x4c2717a, 0x6f154b5, + 0x263e84b, 0x618c31b, 0xbbd6942, 0x12c4138 }, + { 0x80da426, 0xf9ead25, 0x47d9680, 0xe748e99, 0x8a4210e, 0x9b396a3, + 0xf4b8f72, 0xfaf03dd, 0x66159e7, 0xbd94a52, 0x1d4c7cb, 0x5e73049, + 0x7910f38, 0x31d1f9a, 0x08d6dd1, 0x4fd10ca } + }, + { + { 0x9f2331e, 0x4f510ac, 0x7e3dcc2, 0xee872dc, 0xa0a0c73, 0x4a11a32, + 0xaa5a630, 0x27e5803, 0x7af4a8a, 0xe5ae503, 0x9fffeb0, 0x2dcdeba, + 0x719d91f, 0x8c27748, 0xb9cc61c, 0xd3b5b62 }, + { 0xcca7939, 0x998ac90, 0x64514e5, 0xc22b598, 0xb35738a, 0x950aaa1, + 0xdab0264, 0x4b208bb, 0xa557d2e, 0x6677931, 0xf7c17d3, 0x2c696d8, + 0x3e15c51, 0x1672d4a, 0x3db0e82, 0x95fab66 } + }, + { + { 0x6ff205e, 0x3d42734, 0x0ea9fbe, 0x7f187d9, 0x466b2af, 0xbd9367f, + 0x03daf2f, 0x188e532, 0x27b54d8, 0xefe1329, 0xef70435, 0x14faf85, + 0x1ec95c4, 0xa506128, 0xc22cba7, 0xad01705 }, + { 0x6197333, 0x7d2dfa6, 0x8b4f6ed, 0xedd7f07, 0x75df105, 0xe0cb685, + 0x80f76bc, 0x47c9ddb, 0x9073c54, 0x49ab531, 0xe607f44, 0x845255a, + 0xcc74b7c, 0x0b4ed9f, 0x0f5c3a6, 0xcfb52d5 } + }, + { + { 0xc278776, 0x545c7c6, 0x98c30f0, 0x92a39ae, 0xd2f4680, 0x8aa8c01, + 0x6b7f840, 0xa5409ed, 0xdcb24e7, 0x0c450ac, 0xc5770d9, 0x5da6fb2, + 0x8658333, 0x5b8e8be, 0x67ea4ad, 0xb26bf4a }, + { 0xc7d91fa, 0x2e30c81, 0x0eeb69f, 0x6e50a49, 0xee4bc26, 0x9458c2b, + 0x33be250, 0x419acf2, 0x87881ab, 0x79d6f81, 0x403b1be, 0x694565d, + 0x234fe1d, 0x34b3990, 0x2132b38, 0x60997d7 } + }, +}, +{ + { + { 0x26975dc, 0x00a9741, 0x6cf94e7, 0x42161c4, 0xc64ed99, 0xcc9fe4b, + 0x4680570, 0x020019a, 0x698da0d, 0x885595a, 0x77dd962, 0x008444b, + 0xa4fea0e, 0xbf3c22d, 0x2c81245, 0xc463048 }, + { 0x793ab18, 0xcb248c5, 0xeb4320b, 0x4dc7a20, 0x1572b7d, 0x9a0906f, + 0xf9ac20f, 0xd5b3019, 0x34520a3, 0x79b1bf5, 0x69b5322, 0x788dfe8, + 0x455b7e2, 0x9a05298, 0x016bca9, 0x2f4aecb } + }, + { + { 0x8745618, 0x414d379, 0xb7c983c, 0x64ba22e, 0x9f9d532, 0x9a5d19f, + 0x44a80c8, 0x81a00d8, 0xcae98d6, 0xb9e24f5, 0xaca965a, 0x6c3769c, + 0xf6e4e6d, 0x50d6081, 0x54422a6, 0x0d96980 }, + { 0x5cdd790, 0xbd7e792, 0x6a35219, 0xcff65da, 0x8b60ebe, 0x40dc363, + 0x92a50dc, 0x84bee74, 0x15ad65e, 0x57d4be4, 0x1a6d1d3, 0xc54256b, + 0x45717cc, 0x141c649, 0xcd1c736, 0x05eb609 } + }, + { + { 0x1e3c7ec, 0xfd52eab, 0x9f24895, 0xa4a5eca, 0x79fdb83, 0xaaa2a8d, + 0x72bdfda, 0xd105e60, 0x681d97e, 0x59e6ae2, 0x8e8077f, 0xfedf8e0, + 0x629e462, 0xb06d0ad, 0x96fa863, 0x8c7c2d0 }, + { 0xee8fc91, 0x5eecc4c, 0x9e61174, 0x5e83ab2, 0xb28c02d, 0x1fd8925, + 0x2072864, 0x93be538, 0x24c984e, 0xda0c886, 0xa008286, 0xdcf9f0c, + 0xa58ba75, 0x1ecb5a6, 0xc2e3c83, 0x1d9b890 } + }, + { + { 0xeeee062, 0x19e866e, 0x4f7b387, 0x31c1c7f, 0x1c06652, 0x9be6018, + 0x2b68bbb, 0xc00a93a, 0x9d52b2b, 0x54c65d6, 0xe8b744a, 0x4591416, + 0x9a64ab6, 0x641bcca, 0xab08098, 0xf22bcb1 }, + { 0xf1f726c, 0x3c0db8f, 0x9d2e6a6, 0x4f5739e, 0x45c9530, 0x5cb669b, + 0x7b472d0, 0x861b04e, 0x894da77, 0x3e30515, 0xc9ac39b, 0x3344685, + 0x73bdd29, 0x9e17305, 0x808dc85, 0x9cac12c } + }, + { + { 0x5e27087, 0xf152b86, 0x90a580e, 0x267bd85, 0x8baafc1, 0xba79cec, + 0x9442686, 0x6140ab1, 0x5b31693, 0xa67090c, 0x28b4117, 0x50a103a, + 0x0ddc08f, 0x7722e61, 0xe6569b2, 0x5d19d43 }, + { 0x5962bf6, 0x70e0c52, 0xfb5fb02, 0x808e316, 0x5b667be, 0x3fb80da, + 0xfcfacec, 0x8aa366e, 0x134280e, 0xcb0b3e7, 0xcd7d944, 0x0bf1de4, + 0xd092df5, 0x0cd23be, 0xa153a0c, 0xc9a6a79 } + }, + { + { 0x2d5a4b7, 0x1c69ad0, 0xd9e6f4a, 0x4bb28d0, 0xa984fc6, 0x815308c, + 0x9037ca5, 0x40929c7, 0x1bd0357, 0x0ea2b49, 0x42aad4e, 0xec17e5b, + 0x18e7235, 0x1f32ade, 0xa96a9d3, 0xbc60b05 }, + { 0xe20f707, 0x3b0229a, 0x56bdfad, 0xd635050, 0xd8b2e1e, 0xac2d922, + 0x235c748, 0x92b2998, 0xd766f97, 0x6002c3a, 0x1a2a862, 0x9919800, + 0xb58b684, 0x2af7567, 0xaaafce5, 0xd8fe707 } + }, + { + { 0x5df7a4b, 0x54487ab, 0xc57ccc2, 0x51cccde, 0x7510b53, 0x2394327, + 0xf555de3, 0x3a09f02, 0x1be484d, 0xa696aec, 0x37817a2, 0x56f459f, + 0x623dcb4, 0x8d8f61c, 0x5335656, 0xc52223c }, + { 0xb49914a, 0xf634111, 0x8e4f9bb, 0xbf8e1ab, 0xf4dba02, 0x2f59578, + 0xe004319, 0x2a94199, 0x654d005, 0x87931f0, 0x6fa0814, 0x7df57d9, + 0xa154031, 0xc8da316, 0x41f658b, 0x2a44ac0 } + }, + { + { 0x9e34ac6, 0xfb5f4f8, 0x97790f2, 0x0a1b10b, 0x4b8a06c, 0x58fe4e7, + 0x955f27c, 0x10c1710, 0xd5ebe19, 0x77b798a, 0x1f1c2dc, 0xaf1c35b, + 0xa1f8d69, 0xc25b8e6, 0xf76bf23, 0x49cf751 }, + { 0x436f7b7, 0x15cb2db, 0x7e74d1a, 0x186d7c2, 0xc00a415, 0x60731de, + 0x15f0772, 0xea1e156, 0x714463f, 0xf02d591, 0x51adeb1, 0x26a0c64, + 0xcc5229e, 0x20174cd, 0xefd512a, 0xb817e50 } + }, +}, +}; + +static const ge448_precomp base_i[16] = { + { + { 0x70cc05e, 0x26a82bc, 0x0938e26, 0x80e18b0, 0x511433b, 0xf72ab66, + 0x412ae1a, 0xa3d3a46, 0xa6de324, 0x0f1767e, 0x4657047, 0x36da9e1, + 0x5a622bf, 0xed221d1, 0x66bed0d, 0x4f1970c }, + { 0x230fa14, 0x08795bf, 0x7c8ad98, 0x132c4ed, 0x9c4fdbd, 0x1ce67c3, + 0x73ad3ff, 0x05a0c2d, 0x7789c1e, 0xa398408, 0xa73736c, 0xc7624be, + 0x03756c9, 0x2488762, 0x16eb6bc, 0x693f467 } + }, + { + { 0x6ff2f8f, 0x2817328, 0xda85757, 0xb769465, 0xfd6e862, 0xf7f6271, + 0x8daa9cb, 0x4a3fcfe, 0x2ba077a, 0xda82c7e, 0x41b8b8c, 0x9433322, + 0x4316cb6, 0x6455bd6, 0xb9108af, 0x0865886 }, + { 0x88ed6fc, 0x22ac135, 0x02dafb8, 0x9a68fed, 0x7f0bffa, 0x1bdb676, + 0x8bb3a33, 0xec4e1d5, 0xce43c82, 0x56c3b9f, 0xa8d9523, 0xa6449a4, + 0xa7ad43a, 0xf706cbd, 0xbd5125c, 0xe005a8d } + }, + { + { 0x2030034, 0xa99d109, 0x6f950d0, 0x2d8cefc, 0xc96f07b, 0x7a920c3, + 0x08bc0d5, 0x9588128, 0x6d761e8, 0x62ada75, 0xbcf7285, 0x0def80c, + 0x01eedb5, 0x0e2ba76, 0x5a48dcb, 0x7a9f933 }, + { 0x2f435eb, 0xb473147, 0xf225443, 0x5512881, 0x33c5840, 0xee59d2b, + 0x127d7a4, 0xb698017, 0x86551f7, 0xb18fced, 0xca1823a, 0x0ade260, + 0xce4fd58, 0xd3b9109, 0xa2517ed, 0xadfd751 } + }, + { + { 0xeb5eaf7, 0xdf9567c, 0x78ac7d7, 0x110a6b4, 0x4706e0b, 0x2d33501, + 0x0b5a209, 0x0df9c7b, 0x568e684, 0xba4223d, 0x8c3719b, 0xd78af2d, + 0xa5291b6, 0x77467b9, 0x5c89bef, 0x079748e }, + { 0xdac377f, 0xe20d3fa, 0x72b5c09, 0x34e8669, 0xc40bbb7, 0xd8687a3, + 0xd2f84c9, 0x7b3946f, 0xa78f50e, 0xd00e40c, 0x17e7179, 0xb875944, + 0xcb23583, 0x9c7373b, 0xc90fd69, 0x7ddeda3 } + }, + { + { 0x6ab686b, 0x3d0def7, 0x49f7c79, 0x1a467ec, 0xc8989ed, 0x3e53f4f, + 0x430a0d9, 0x101e344, 0x8ad44ee, 0xa3ae731, 0xae1d134, 0xaefa6cd, + 0x824ad4d, 0xaa8cd7d, 0xed584fc, 0xef1650c }, + { 0x4f4754f, 0xa74df67, 0xef3fb8b, 0xf52cea8, 0x2971140, 0x47c32d4, + 0xa256fbb, 0x391c15d, 0xa605671, 0xc165fab, 0x87993b9, 0xf2518c6, + 0xbd5a84d, 0x2daf7ac, 0x98f12ae, 0x1560b62 } + }, + { + { 0x54dc10a, 0xef4da02, 0x5940db8, 0x6311865, 0x82f2948, 0xe20b149, + 0x5581dba, 0x67b9377, 0x04f5029, 0x422ee71, 0x5122d34, 0x5d440db, + 0x1a4c640, 0xb1e56d7, 0xc2408ee, 0xbf12abb }, + { 0x016af01, 0x0cc9f86, 0xf3d8cab, 0x88366ab, 0xa2efe12, 0x85dda13, + 0x5d00674, 0x390df60, 0x6d187f7, 0xf18f580, 0xf0c5d20, 0x28c900f, + 0x3e01733, 0xad30812, 0x54bf2fd, 0x42d35b5 } + }, + { + { 0x2ffb1f1, 0x009135f, 0x8f9c605, 0x099fc7e, 0x26bfa5a, 0xcc67da6, + 0x344552b, 0xc186d12, 0x1b339e1, 0xb523250, 0xc9708c5, 0x70a544f, + 0x1e928e7, 0x06baaec, 0xef0f50f, 0x0baedd2 }, + { 0xbf479e5, 0x535d6d8, 0xe4ec3e9, 0x156e536, 0xddb9be2, 0x3165741, + 0x59fd736, 0x988af71, 0x2e33ddd, 0x13d8a78, 0x4e69002, 0x5460421, + 0x804a268, 0x34d56e0, 0x0e52a4c, 0xc59b84f } + }, + { + { 0x24729d9, 0x525d45f, 0x8712327, 0x5768aba, 0x43035db, 0xa25e43b, + 0x927ef21, 0x15a1ee8, 0x6056112, 0xa785d21, 0xd508af9, 0x45e2fbf, + 0x37ba969, 0xb6f721a, 0x216d8d3, 0x30d6d8c }, + { 0x52074c3, 0x3065e08, 0x2a0684e, 0xfa40b4a, 0x763f955, 0x851325a, + 0x9f25900, 0xd4ef19c, 0xf665756, 0x799c869, 0x3312990, 0x7b05222, + 0x28db802, 0xc986c2b, 0x28ade0a, 0xf48fb8f } + }, + { + { 0x1649b68, 0x1e46173, 0x5beb9dc, 0xa96e5d6, 0x481935d, 0x765ddff, + 0x9f3bf2a, 0x6cf132c, 0x7c35658, 0x9f6c5c9, 0x4696e60, 0x99cd139, + 0x9c0d5e4, 0x99fa924, 0x8845a95, 0x1acd063 }, + { 0x3636087, 0x0b06541, 0xea17b7f, 0xea20e78, 0x6161967, 0x20afc5f, + 0xdc81028, 0xfd6c8a2, 0xe32c8fd, 0x4ef1357, 0x00e4a88, 0x8aa4004, + 0x48cb82f, 0xd6fcaef, 0xb3cd4fa, 0x7ba7c6d } + }, + { + { 0xd19c7ab, 0xf843473, 0xc655c4d, 0x968e76d, 0xc4b9c2f, 0x52c87d9, + 0xe4aa082, 0x65f641a, 0x33c3603, 0x491a397, 0x5810098, 0xa606ffe, + 0x8bf8ad4, 0x09920e6, 0x6db7882, 0x691a0c8 }, + { 0xa4d3ef5, 0x5205883, 0xacf2efe, 0xee839b7, 0xc00ca66, 0x4b78e2a, + 0xf9fcb91, 0xbe3f071, 0xbf6943a, 0x61e66c9, 0x061b79d, 0xe9b4e57, + 0x56c06bd, 0x8d1b01b, 0xdf76ae5, 0x0dfa315 } + }, + { + { 0xf1fd093, 0x803df65, 0x489b77e, 0x1cd6523, 0xc20e295, 0x2cd2e15, + 0x9b912d1, 0xcd490be, 0x2e886d2, 0xdd9a2ff, 0xfe9d72a, 0xa3c836d, + 0x298e0c1, 0xfcad5f2, 0x4bcf067, 0xed126e2 }, + { 0x3dc81bc, 0x1e33953, 0xece6a08, 0xbea4d76, 0x991b252, 0x1d15de3, + 0xe6daf97, 0x74cc5cf, 0x0826493, 0x5ad343f, 0x1064049, 0x2d38a47, + 0xffcfa4d, 0xf7f47b9, 0x418066c, 0xef14490 } + }, + { + { 0x9bb55ab, 0x4e7f86b, 0x3f496a3, 0x310d785, 0x0dec42c, 0xbd682fc, + 0x411d32a, 0xbde047a, 0xc5a5ea2, 0xea639b4, 0xba08fa1, 0x5052078, + 0x07729f2, 0xc968b23, 0x23d3e28, 0x567b5a6 }, + { 0x977fbf7, 0x171e825, 0xbe990aa, 0x0319c70, 0xe12cd69, 0x8f65023, + 0xf5015e6, 0x1fb9b19, 0x3568a7c, 0x0083f60, 0x1f3c5ac, 0xba3d30b, + 0x3d7a988, 0xe7b509d, 0xcd0f6b6, 0x2318b99 } + }, + { + { 0x93ab2cf, 0x54d3b87, 0xd2d8306, 0x366abea, 0xd7a4977, 0x66e8eb6, + 0xae0072e, 0xa61888c, 0xdbc3315, 0x9eeeef5, 0x163e7f5, 0x93f09db, + 0x59ade9a, 0xee90959, 0xce59be0, 0xaf7f578 }, + { 0x5ece59e, 0x24bfd8d, 0x3689523, 0x8aa698b, 0x2de92cf, 0xa9a65de, + 0xa6ad300, 0xec11dbc, 0x09f88ca, 0x217f3fa, 0xb4d6af7, 0xf6c33e3, + 0x1d86d2d, 0xcd3bfa2, 0x5f13f25, 0x1497f83 } + }, + { + { 0xcd03d1d, 0xa579568, 0xe158af6, 0xd717cda, 0x389a19f, 0x59eda97, + 0x099e99c, 0xb32c370, 0xdabb591, 0xa2dba91, 0x77c2c97, 0x6d697d5, + 0xd43fa6d, 0x5423fc2, 0x0b382bf, 0x56ea8a5 }, + { 0xd80c11a, 0x4a987ba, 0x7d590a5, 0xe4cde21, 0xf97e559, 0x3dd8860, + 0x43b593c, 0xff45e25, 0x5343cb5, 0x00eb453, 0x7bbfbdd, 0x06b9b99, + 0x16aea24, 0x4da36b7, 0x57a624e, 0x2476517 } + }, + { + { 0x3474e0d, 0x32207d0, 0xb41cc73, 0x3ffbf04, 0x319eb39, 0x5c4dc45, + 0x758b463, 0xfee29be, 0xc30c7a7, 0xcc8a381, 0x9fe0e53, 0x147f4e4, + 0xe35a2de, 0x05b2e26, 0x92f3666, 0x4362f02 }, + { 0x8474b85, 0x0476d0c, 0xccaf108, 0x9d8c65f, 0x1d54b6a, 0xf58d404, + 0xf38e4b0, 0x3ee6862, 0x3b44f54, 0x7c7c9d5, 0x0fb0db5, 0x36a3fd8, + 0x18a8ac8, 0xfcd94ba, 0x8f35c05, 0xc1b1d56 } + }, + { + { 0x1bdd30d, 0x16539fc, 0x8df4afb, 0x1356e53, 0x5a1aedb, 0xc0545d8, + 0x489396b, 0xeb2037a, 0x5660894, 0x897fcbd, 0xb7d104a, 0x02a58a9, + 0xc96b980, 0x57fa24c, 0x5bd8946, 0xf6448e3 }, + { 0x8805c83, 0xee72741, 0x992cfc6, 0x10fa274, 0x9e66b21, 0x9514193, + 0xbd08009, 0xe0ffa44, 0x20da22b, 0x1743322, 0x59e6831, 0x4891ff3, + 0xa7d687b, 0x407ed73, 0x51d99cf, 0x2fb4e07 } + }, +}; +#endif + +/* Set the 0 point. + * + * p [in] Point to set to 0. + */ +static WC_INLINE void ge448_0(ge448_p2 *p) +{ + fe448_0(p->X); + fe448_1(p->Y); + fe448_1(p->Z); +} + +/* Set the precompute point to 0. + * + * p [in] Precompute point to set. + */ +static void ge448_precomp_0(ge448_precomp *p) +{ + fe448_0(p->x); + fe448_1(p->y); +} + +/* Double the point on the Twisted Edwards curve. r = 2.p + * + * r [in] Point to hold result. + * p [in] Point to double. + */ +static WC_INLINE void ge448_dbl(ge448_p2 *r,const ge448_p2 *p) +{ + ge448 t0[GE448_WORDS]; + ge448 t1[GE448_WORDS]; + + fe448_add(t0, p->X, p->Y); /* t0 = B1 = X1+Y1 */ + fe448_reduce(t0); + fe448_sqr(t0, t0); /* t0 = B = (X1+Y1)^2 */ + fe448_sqr(r->X, p->X); /* r->X = C = X1^2 */ + fe448_sqr(r->Y, p->Y); /* r->Y = D = Y1^2 */ + fe448_add(t1, r->X, r->Y); /* t1 = E = C+D */ + fe448_reduce(t1); + fe448_sub(r->Y, r->X, r->Y); /* r->Y = Y31 = C-D */ + fe448_sqr(r->Z, p->Z); /* r->Z = H = Z1^2 */ + fe448_add(r->Z, r->Z, r->Z); /* r->Z = J1 = 2*H */ + fe448_sub(r->Z, t1, r->Z); /* r->Z = J = E-2*H */ + fe448_reduce(r->Z); + fe448_sub(r->X, t0, t1); /* r->X = X31 = B-E */ + fe448_mul(r->X, r->X, r->Z); /* r->X = X3 = (B-E)*J */ + fe448_mul(r->Y, r->Y, t1); /* r->Y = Y3 = E*(C-D) */ + fe448_mul(r->Z, t1, r->Z); /* r->Z = Z3 = E*J */ +} + +/* Add two point on the Twisted Edwards curve. r = p + q + * Second point has z-ordinate of 1. + * + * r [in] Point to hold result. + * p [in] Point to add. + * q [in] Point to add. + */ +static WC_INLINE void ge448_madd(ge448_p2 *r, const ge448_p2 *p, + const ge448_precomp *q) +{ + ge448 t0[GE448_WORDS]; + ge448 t1[GE448_WORDS]; + ge448 t2[GE448_WORDS]; + ge448 t3[GE448_WORDS]; + ge448 t4[GE448_WORDS]; + + /* p->Z = A */ + fe448_mul(t1, p->X, q->x); /* t1 = C = X1*X2 */ + fe448_mul(t2, p->Y, q->y); /* t2 = D = Y1*Y2 */ + fe448_mul(t3, t1, t2); /* t3 = E1 = C*D */ + fe448_mul39081(t3, t3); /* t3 = E = d*C*D */ + fe448_sqr(t0, p->Z); /* t0 = B = A^2 */ + fe448_add(t4, t0, t3); /* t4 = F = B-(-E) */ + fe448_sub(t0, t0, t3); /* t0 = G = B+(-E) */ + fe448_reduce(t0); + fe448_add(r->X, p->X, p->Y); /* r->X = H1 = X1+Y1 */ + fe448_reduce(r->X); + fe448_add(r->Y, q->x, q->y); /* r->Y = H2 = X2+Y2 */ + fe448_reduce(r->Y); + fe448_mul(r->X, r->X, r->Y); /* r->X = H = (X1+Y1)*(X2+Y2) */ + fe448_sub(r->X, r->X, t1); /* r->X = X31 = H-C */ + fe448_sub(r->X, r->X, t2); /* r->X = X32 = H-C-D */ + fe448_reduce(r->X); + fe448_mul(r->X, r->X, t4); /* r->X = X33 = F*(H-C-D) */ + fe448_mul(r->X, r->X, p->Z); /* r->X = X3 = A*F*(H-C-D) */ + fe448_sub(r->Y, t2, t1); /* r->Y = Y31 = D-C */ + fe448_reduce(r->Y); + fe448_mul(r->Y, r->Y, t0); /* r->Y = Y32 = G*(D-C) */ + fe448_mul(r->Y, r->Y, p->Z); /* r->Y = Y3 = A*F*(D-C) */ + fe448_mul(r->Z, t4, t0); /* r->Z = Z3 = F*G */ +} + +/* Subtract one point from another on the Twisted Edwards curve. r = p - q + * Second point has z-ordinate of 1. + * + * r [in] Point to hold result. + * p [in] Point to subtract from. + * q [in] Point to subtract. + */ +static WC_INLINE void ge448_msub(ge448_p2 *r, const ge448_p2 *p, + const ge448_precomp *q) +{ + ge448 t0[GE448_WORDS]; + ge448 t1[GE448_WORDS]; + ge448 t2[GE448_WORDS]; + ge448 t3[GE448_WORDS]; + ge448 t4[GE448_WORDS]; + + /* p->Z = A */ + fe448_sqr(t0, p->Z); /* t0 = B = A^2 */ + fe448_mul(t1, p->X, q->x); /* t1 = C = X1*X2 */ + fe448_mul(t2, p->Y, q->y); /* t2 = D = Y1*Y2 */ + fe448_mul(t3, t1, t2); /* t3 = E1 = C*D */ + fe448_mul39081(t3, t3); /* t3 = E = d*C*D */ + fe448_sub(t4, t0, t3); /* t4 = F = B-(--E) */ + fe448_add(t0, t0, t3); /* t0 = G = B+(--E) */ + fe448_reduce(t0); + fe448_add(r->X, p->X, p->Y); /* r->X = H1 = X1+Y1 */ + fe448_reduce(r->X); + fe448_sub(r->Y, q->y, q->x); /* r->Y = H2 = Y2+(-X2) */ + fe448_reduce(r->Y); + fe448_mul(r->X, r->X, r->Y); /* r->X = H = (X1+Y1)*(X2+Y2) */ + fe448_add(r->X, r->X, t1); /* r->X = X31 = H-(-C) */ + fe448_sub(r->X, r->X, t2); /* r->X = X32 = H-(-C)-D */ + fe448_reduce(r->X); + fe448_mul(r->X, r->X, t4); /* r->X = X33 = F*(H-C-D) */ + fe448_mul(r->X, r->X, p->Z); /* r->X = X3 = A*F*(H-C-D) */ + fe448_add(r->Y, t2, t1); /* r->Y = Y31 = D-C */ + fe448_reduce(r->Y); + fe448_mul(r->Y, r->Y, t0); /* r->Y = Y32 = G*(D-C) */ + fe448_mul(r->Y, r->Y, p->Z); /* r->Y = Y3 = A*F*(D-C) */ + fe448_mul(r->Z, t4, t0); /* r->Z = Z3 = F*G */ +} + +/* Add two point on the Twisted Edwards curve. r = p + q + * + * r [in] Point to hold result. + * p [in] Point to add. + * q [in] Point to add. + */ +static WC_INLINE void ge448_add(ge448_p2* r, const ge448_p2* p, + const ge448_p2* q) +{ + ge448 t0[GE448_WORDS]; + ge448 t1[GE448_WORDS]; + ge448 t2[GE448_WORDS]; + ge448 t3[GE448_WORDS]; + ge448 t4[GE448_WORDS]; + + fe448_mul(t1, p->X, q->X); /* t1 = C = X1*X2 */ + fe448_mul(t2, p->Y, q->Y); /* t2 = D = Y1*Y2 */ + fe448_mul(t3, t1, t2); /* t3 = E1 = C*D */ + fe448_mul39081(t3, t3); /* t3 = E = d*C*D */ + fe448_mul(r->Z, p->Z, q->Z); /* r->Z = A = Z1*Z2 */ + fe448_sqr(t0, r->Z); /* t0 = B = A^2 */ + fe448_add(t4, t0, t3); /* t4 = F = B-(-E) */ + fe448_sub(t0, t0, t3); /* t0 = G = B+(-E) */ + fe448_reduce(t0); + fe448_add(r->X, p->X, p->Y); /* r->X = H1 = X1+Y1 */ + fe448_reduce(r->X); + fe448_add(r->Y, q->X, q->Y); /* r->Y = H2 = X2+Y2 */ + fe448_reduce(r->Y); + fe448_mul(r->X, r->X, r->Y); /* r->X = H = (X1+Y1)*(X2+Y2) */ + fe448_sub(r->X, r->X, t1); /* r->X = X31 = H-C */ + fe448_sub(r->X, r->X, t2); /* r->X = X32 = H-C-D */ + fe448_reduce(r->X); + fe448_mul(r->X, r->X, t4); /* r->X = X33 = F*(H-C-D) */ + fe448_mul(r->X, r->X, r->Z); /* r->X = X3 = A*F*(H-C-D) */ + fe448_sub(r->Y, t2, t1); /* r->Y = Y31 = D-C */ + fe448_reduce(r->Y); + fe448_mul(r->Y, r->Y, t0); /* r->Y = Y32 = G*(D-C) */ + fe448_mul(r->Y, r->Y, r->Z); /* r->Y = Y3 = A*F*(D-C) */ + fe448_mul(r->Z, t4, t0); /* r->Z = Z3 = F*G */ +} + +/* Subtract one point from another on the Twisted Edwards curve. r = p - q + * + * r [in] Point to hold result. + * p [in] Point to subtract from. + * q [in] Point to subtract. + */ +static WC_INLINE void ge448_sub(ge448_p2 *r, const ge448_p2 *p, + const ge448_p2 *q) +{ + ge448 t0[GE448_WORDS]; + ge448 t1[GE448_WORDS]; + ge448 t2[GE448_WORDS]; + ge448 t3[GE448_WORDS]; + ge448 t4[GE448_WORDS]; + + fe448_mul(t1, p->X, q->X); /* t1 = C = X1*X2 */ + fe448_mul(t2, p->Y, q->Y); /* t2 = D = Y1*Y2 */ + fe448_mul(t3, t1, t2); /* t3 = E1 = C*D */ + fe448_mul39081(t3, t3); /* t3 = E = d*C*D */ + fe448_mul(r->Z, p->Z, q->Z); /* r->Z = A = Z1*Z2 */ + fe448_sqr(t0, p->Z); /* t0 = B = A^2 */ + fe448_sub(t4, t0, t3); /* t4 = F = B-(--E) */ + fe448_add(t0, t0, t3); /* t0 = G = B+(--E) */ + fe448_reduce(t0); + fe448_add(r->X, p->X, p->Y); /* r->X = H1 = X1+Y1 */ + fe448_reduce(r->X); + fe448_sub(r->Y, q->Y, q->X); /* r->Y = H2 = Y2+(-X2) */ + fe448_reduce(r->Y); + fe448_mul(r->X, r->X, r->Y); /* r->X = H = (X1+Y1)*(X2+Y2) */ + fe448_add(r->X, r->X, t1); /* r->X = X31 = H-(-C) */ + fe448_sub(r->X, r->X, t2); /* r->X = X32 = H-(-C)-D */ + fe448_reduce(r->X); + fe448_mul(r->X, r->X, t4); /* r->X = X33 = F*(H-C-D) */ + fe448_mul(r->X, r->X, r->Z); /* r->X = X3 = A*F*(H-C-D) */ + fe448_add(r->Y, t2, t1); /* r->Y = Y31 = D-C */ + fe448_reduce(r->Y); + fe448_mul(r->Y, r->Y, t0); /* r->Y = Y32 = G*(D-C) */ + fe448_mul(r->Y, r->Y, r->Z); /* r->Y = Y3 = A*F*(D-C) */ + fe448_mul(r->Z, t4, t0); /* r->Z = Z3 = F*G */ +} + +/* Convert point to byte array assuming projective ordinates. + * + * b [in] Array of bytes to hold compressed point. + * p [in] Point to convert. + */ +void ge448_to_bytes(uint8_t *b, const ge448_p2 *p) +{ + ge448 recip[GE448_WORDS]; + ge448 x[GE448_WORDS]; + ge448 y[GE448_WORDS]; + + fe448_invert(recip, p->Z); + fe448_mul(x, p->X, recip); + fe448_mul(y, p->Y, recip); + fe448_to_bytes(b, y); + b[56] = fe448_isnegative(x) << 7; +} + +/* Convert point to byte array assuming z is 1. + * + * b [in] Array of bytes to hold compressed point. + * p [in] Point to convert. + */ +static void ge448_p2z1_to_bytes(uint8_t *b, const ge448_p2 *p) +{ + fe448_to_bytes(b, p->Y); + b[56] = fe448_isnegative(p->X) << 7; +} + +/* Compress the point to y-ordinate and negative bit. + * + * out [in] Array of bytes to hold compressed key. + * xIn [in] The x-ordinate. + * yIn [in] The y-ordinate. + */ +int ge448_compress_key(uint8_t* out, const uint8_t* xIn, const uint8_t* yIn) +{ + ge448_p2 g; + uint8_t bArray[ED448_KEY_SIZE]; + uint32_t i; + + fe448_from_bytes(g.X, xIn); + fe448_from_bytes(g.Y, yIn); + fe448_1(g.Z); + + ge448_p2z1_to_bytes(bArray, &g); + + for (i = 0; i < 57; i++) { + out[57 - 1 - i] = bArray[i]; + } + + return 0; +} + +/* Determine whether the value is negative. + * + * b [in] An 8-bit signed value. + * returns 1 when negative and 0 otherwise. + */ +static uint8_t negative(int8_t b) +{ + return ((uint8_t)b) >> 7; +} + +/* Determine whether two values are equal. a == b + * Constant time implementation. + * + * a [in] An 8-bit unsigned value. + * b [in] An 8-bit unsigned value. + * returns 1 when equal and 0 otherwise. + */ +static uint8_t equal(uint8_t a, uint8_t b) +{ + return (uint8_t)(((uint32_t)(a ^ b) - 1) >> 31); +} + +/* Conditional move the point into result point if two values are equal. + * Constant time implementation. + * + * f [in] Point to conditionally overwrite. + * p [in] Point to conditionally copy. + * b [in] An 8-bit unsigned value. + * n [in] An 8-bit unsigned value. + */ +static WC_INLINE void cmov(ge448_precomp* r, const ge448_precomp* p, uint8_t b, + uint8_t n) +{ + b = equal(b, n); + fe448_cmov(r->x, p->x, b); + fe448_cmov(r->y, p->y, b); +} + +/* Select one of the entries from the precomputed table and negate if required. + * Constant time implementation. + * + * r [in] Point to hold chosen point. + * pos [in] Position of array of entries to choose from. + * b [in] Index of point to select. -ve value means negate the point. + */ +static void ge448_select(ge448_precomp* r, int pos, int8_t b) +{ + ge448 minusx[16]; + uint8_t bnegative = negative(b); + uint8_t babs = b - (((-bnegative) & b) << 1); + + ge448_precomp_0(r); + cmov(r, &base[pos][0], babs, 1); + cmov(r, &base[pos][1], babs, 2); + cmov(r, &base[pos][2], babs, 3); + cmov(r, &base[pos][3], babs, 4); + cmov(r, &base[pos][4], babs, 5); + cmov(r, &base[pos][5], babs, 6); + cmov(r, &base[pos][6], babs, 7); + cmov(r, &base[pos][7], babs, 8); + fe448_neg(minusx, r->x); + fe448_cmov(r->x, minusx, bnegative); +} + +/* Perform a scalar multiplication of the base point. r = a * base + * + * r [in] Point to hold result. + * a [in] Scalar to multiply by. + */ +void ge448_scalarmult_base(ge448_p2* r, const uint8_t* a) +{ + int8_t carry; + ge448_precomp t; + int i; + int8_t e[113]; + + carry = 0; + for (i = 0; i < 56; ++i) { + e[2 * i + 0] = ((a[i] >> 0) & 0xf) + carry; + carry = e[2 * i + 0] + 8; + carry >>= 4; + e[2 * i + 0] -= carry << 4; + + e[2 * i + 1] = ((a[i] >> 4) & 0xf) + carry; + carry = e[2 * i + 1] + 8; + carry >>= 4; + e[2 * i + 1] -= carry << 4; + } + e[112] = carry; + /* each e[i] is between -8 and 8 */ + + /* Odd indeces first - sum based on even index so multiply by 16 */ + ge448_select(&t, 0, e[1]); + fe448_copy(r->X, t.x); + fe448_copy(r->Y, t.y); + fe448_1(r->Z); + for (i = 3; i < 112; i += 2) { + ge448_select(&t, i / 2, e[i]); + ge448_madd(r, r, &t); + } + + ge448_dbl(r, r); + ge448_dbl(r, r); + ge448_dbl(r, r); + ge448_dbl(r, r); + + /* Add even indeces */ + for (i = 0; i <= 112; i += 2) { + ge448_select(&t, i / 2, e[i]); + ge448_madd(r, r, &t); + } +} + +/* Create to a sliding window for the scalar multiplicaton. + * + * r [in] Array of indeces. + * a [in] Scalar to break up. + */ +static void slide(int8_t *r, const uint8_t *a) +{ + int i; + int b; + int k; + + for (i = 0; i < 448; ++i) { + r[i] = (a[i >> 3] >> (i & 7)) & 1; + } + + for (i = 0; i < 448; ++i) { + if (r[i] == 0) { + continue; + } + + for (b = 1; b <= 7 && i + b < 448; ++b) { + if (r[i + b] == 0) { + continue; + } + + if (r[i] + (r[i + b] << b) <= 31) { + r[i] += r[i + b] << b; r[i + b] = 0; + } + else if (r[i] - (r[i + b] << b) >= -31) { + r[i] -= r[i + b] << b; + for (k = i + b; k < 448; ++k) { + if (!r[k]) { + r[k] = 1; + break; + } + r[k] = 0; + } + } + else { + break; + } + } + } +} + +/* Perform a scalar multplication of the base point and public point. + * r = a * p + b * base + * Uses a sliding window of 5 bits. + * Not constant time. + * + * r [in] Point to hold result. + * a [in] Scalar to multiply by. + */ +int ge448_double_scalarmult_vartime(ge448_p2 *r, const uint8_t *a, + const ge448_p2 *p, const uint8_t *b) +{ + int8_t aslide[448]; + int8_t bslide[448]; + ge448_p2 pi[16]; /* p,3p,..,31p */ + ge448_p2 p2; + int i; + + slide(aslide, a); + slide(bslide, b); + + fe448_copy(pi[0].X, p->X); + fe448_copy(pi[0].Y, p->Y); + fe448_copy(pi[0].Z, p->Z); + ge448_dbl(&p2, p); + ge448_add(&pi[1], &p2, &pi[0]); + ge448_add(&pi[2], &p2, &pi[1]); + ge448_add(&pi[3], &p2, &pi[2]); + ge448_add(&pi[4], &p2, &pi[3]); + ge448_add(&pi[5], &p2, &pi[4]); + ge448_add(&pi[6], &p2, &pi[5]); + ge448_add(&pi[7], &p2, &pi[6]); + ge448_add(&pi[8], &p2, &pi[7]); + ge448_add(&pi[9], &p2, &pi[8]); + ge448_add(&pi[10], &p2, &pi[9]); + ge448_add(&pi[11], &p2, &pi[10]); + ge448_add(&pi[12], &p2, &pi[11]); + ge448_add(&pi[13], &p2, &pi[12]); + ge448_add(&pi[14], &p2, &pi[13]); + ge448_add(&pi[15], &p2, &pi[14]); + + ge448_0(r); + + /* Find first index that is not 0. */ + for (i = 447; i >= 0; --i) { + if (aslide[i] || bslide[i]) { + break; + } + } + + for (; i >= 0; --i) { + ge448_dbl(r, r); + + if (aslide[i] > 0) + ge448_add(r, r, &pi[aslide[i]/2]); + else if (aslide[i] < 0) + ge448_sub(r, r ,&pi[(-aslide[i])/2]); + + if (bslide[i] > 0) + ge448_madd(r, r, &base_i[bslide[i]/2]); + else if (bslide[i] < 0) + ge448_msub(r, r, &base_i[(-bslide[i])/2]); + } + + return 0; +} + +/* Convert compressed point to negative of affine point. + * Calculates x from the y and the negative bit. + * Not constant time. + * + * r [in] Uncompressed point. + * b [in] Array of bytes representing point. + * returns 0 on success and -1 on failure. + */ +int ge448_from_bytes_negate_vartime(ge448_p2 *r, const uint8_t *b) +{ + int ret = 0; + ge448 u[GE448_WORDS]; + ge448 v[GE448_WORDS]; + ge448 u3[GE448_WORDS]; + ge448 vxx[GE448_WORDS]; + ge448 check[GE448_WORDS]; + + fe448_from_bytes(r->Y, b); + fe448_1(r->Z); + fe448_sqr(u, r->Y); /* u = y^2 */ + fe448_mul39081(v, u); /* v = 39081.y^2 */ + fe448_sub(u, u, r->Z); /* u = y^2-1 */ + fe448_reduce(u); + fe448_add(v, v, r->Z); /* v = 39081.y^2-1 */ + fe448_reduce(v); + fe448_neg(v, v); /* v = -39081.y^2-1 = d.y^2-1 */ + + fe448_sqr(r->X, v); /* x = v^2 */ + fe448_mul(r->X, r->X, v); /* x = v^3 */ + fe448_sqr(u3, u); /* x = u^2.v^3 */ + fe448_mul(r->X, r->X, u3); /* x = u^2.v^3 */ + fe448_mul(u3, u3, u); /* u3 = u^3 */ + fe448_mul(r->X, r->X, u3); /* x = u^5.v^3 */ + + fe448_pow_2_446_222_1(r->X, r->X); /* x = (u^5.v^3)^((q-3)/4) */ + fe448_mul(r->X, r->X, u3); /* x = u^3(u^5.v^3)^((q-3)/4) */ + fe448_mul(r->X, r->X, v); /* x = u^3.v(u^5.v^3)^((q-3)/4) */ + + fe448_sqr(vxx, r->X); + fe448_mul(vxx, vxx, v); + fe448_sub(check, vxx, u); /* check = v.x^2-u */ + fe448_reduce(check); + /* Note; vx^2+u is NOT correct. */ + if (fe448_isnonzero(check)) { + ret = -1; + } + + /* Calculating negative of point in bytes - negate only if X is correct. */ + if (fe448_isnegative(r->X) == (b[56] >> 7)) { + fe448_neg(r->X, r->X); + } + + return ret; +} + +#endif /* ED448_SMALL */ +#endif /* HAVE_CURVE448 || HAVE_ED448 */ diff --git a/client/wolfssl/wolfcrypt/src/ge_low_mem.c b/client/wolfssl/wolfcrypt/src/ge_low_mem.c new file mode 100644 index 0000000..3b72b96 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/ge_low_mem.c @@ -0,0 +1,563 @@ +/* ge_low_mem.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + /* Based from Daniel Beer's public domain work. */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef HAVE_ED25519 +#ifdef ED25519_SMALL /* use slower code that takes less memory */ + +#include +#include +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +void ed25519_smult(ge_p3 *r, const ge_p3 *a, const byte *e); +void ed25519_add(ge_p3 *r, const ge_p3 *a, const ge_p3 *b); +void ed25519_double(ge_p3 *r, const ge_p3 *a); + + +static const byte ed25519_order[F25519_SIZE] = { + 0xed, 0xd3, 0xf5, 0x5c, 0x1a, 0x63, 0x12, 0x58, + 0xd6, 0x9c, 0xf7, 0xa2, 0xde, 0xf9, 0xde, 0x14, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10 +}; + +/*Arithmetic modulo the group order m = 2^252 + + 27742317777372353535851937790883648493 = + 7237005577332262213973186563042994240857116359379907606001950938285454250989 */ + +static const word32 m[32] = { + 0xED,0xD3,0xF5,0x5C,0x1A,0x63,0x12,0x58,0xD6,0x9C,0xF7,0xA2,0xDE,0xF9, + 0xDE,0x14,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x10 +}; + +static const word32 mu[33] = { + 0x1B,0x13,0x2C,0x0A,0xA3,0xE5,0x9C,0xED,0xA7,0x29,0x63,0x08,0x5D,0x21, + 0x06,0x21,0xEB,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + 0xFF,0xFF,0xFF,0xFF,0x0F +}; + + +int ge_compress_key(byte* out, const byte* xIn, const byte* yIn, + word32 keySz) +{ + byte tmp[F25519_SIZE]; + byte parity; + byte pt[32]; + int i; + + lm_copy(tmp, xIn); + parity = (tmp[0] & 1) << 7; + + lm_copy(pt, yIn); + pt[31] |= parity; + + for(i = 0; i < 32; i++) { + out[32-i-1] = pt[i]; + } + (void)keySz; + return 0; +} + + +static word32 lt(word32 a,word32 b) /* 16-bit inputs */ +{ + word32 x = a; + x -= (unsigned int) b; /* 0..65535: no; 4294901761..4294967295: yes */ + x >>= 31; /* 0: no; 1: yes */ + return x; +} + + +/* Reduce coefficients of r before calling reduce_add_sub */ +static void reduce_add_sub(word32 *r) +{ + word32 pb = 0; + word32 b; + word32 mask; + int i; + unsigned char t[32]; + + for(i=0;i<32;i++) + { + pb += m[i]; + b = lt(r[i],pb); + t[i] = r[i]-pb+(b<<8); + pb = b; + } + mask = b - 1; + for(i=0;i<32;i++) + r[i] ^= mask & (r[i] ^ t[i]); +} + + +/* Reduce coefficients of x before calling barrett_reduce */ +static void barrett_reduce(word32* r, word32 x[64]) +{ + /* See HAC, Alg. 14.42 */ + int i,j; + word32 q2[66]; + word32 *q3 = q2 + 33; + word32 r1[33]; + word32 r2[33]; + word32 carry; + word32 pb = 0; + word32 b; + + for (i = 0;i < 66;++i) q2[i] = 0; + for (i = 0;i < 33;++i) r2[i] = 0; + + for(i=0;i<33;i++) + for(j=0;j<33;j++) + if(i+j >= 31) q2[i+j] += mu[i]*x[j+31]; + carry = q2[31] >> 8; + q2[32] += carry; + carry = q2[32] >> 8; + q2[33] += carry; + + for(i=0;i<33;i++)r1[i] = x[i]; + for(i=0;i<32;i++) + for(j=0;j<33;j++) + if(i+j < 33) r2[i+j] += m[i]*q3[j]; + + for(i=0;i<32;i++) + { + carry = r2[i] >> 8; + r2[i+1] += carry; + r2[i] &= 0xff; + } + + for(i=0;i<32;i++) + { + pb += r2[i]; + b = lt(r1[i],pb); + r[i] = r1[i]-pb+(b<<8); + pb = b; + } + + /* XXX: Can it really happen that r<0?, See HAC, Alg 14.42, Step 3 + * r is an unsigned type. + * If so: Handle it here! + */ + + reduce_add_sub(r); + reduce_add_sub(r); +} + + +void sc_reduce(unsigned char x[64]) +{ + int i; + word32 t[64]; + word32 r[32]; + for(i=0;i<64;i++) t[i] = x[i]; + barrett_reduce(r, t); + for(i=0;i<32;i++) x[i] = (r[i] & 0xFF); +} + + +void sc_muladd(byte* out, const byte* a, const byte* b, const byte* c) +{ + + byte s[32]; + byte e[64]; + + XMEMSET(e, 0, sizeof(e)); + XMEMCPY(e, b, 32); + + /* Obtain e */ + sc_reduce(e); + + /* Compute s = ze + k */ + fprime_mul(s, a, e, ed25519_order); + fprime_add(s, c, ed25519_order); + + XMEMCPY(out, s, 32); +} + + +/* Base point is (numbers wrapped): + * + * x = 151122213495354007725011514095885315114 + * 54012693041857206046113283949847762202 + * y = 463168356949264781694283940034751631413 + * 07993866256225615783033603165251855960 + * + * y is derived by transforming the original Montgomery base (u=9). x + * is the corresponding positive coordinate for the new curve equation. + * t is x*y. + */ +const ge_p3 ed25519_base = { + { + 0x1a, 0xd5, 0x25, 0x8f, 0x60, 0x2d, 0x56, 0xc9, + 0xb2, 0xa7, 0x25, 0x95, 0x60, 0xc7, 0x2c, 0x69, + 0x5c, 0xdc, 0xd6, 0xfd, 0x31, 0xe2, 0xa4, 0xc0, + 0xfe, 0x53, 0x6e, 0xcd, 0xd3, 0x36, 0x69, 0x21 + }, + { + 0x58, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, + 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, + 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, + 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66 + }, + {1, 0}, + { + 0xa3, 0xdd, 0xb7, 0xa5, 0xb3, 0x8a, 0xde, 0x6d, + 0xf5, 0x52, 0x51, 0x77, 0x80, 0x9f, 0xf0, 0x20, + 0x7d, 0xe3, 0xab, 0x64, 0x8e, 0x4e, 0xea, 0x66, + 0x65, 0x76, 0x8b, 0xd7, 0x0f, 0x5f, 0x87, 0x67 + }, + +}; + + +const ge_p3 ed25519_neutral = { + {0}, + {1, 0}, + {1, 0}, + {0}, + +}; + + +static const byte ed25519_d[F25519_SIZE] = { + 0xa3, 0x78, 0x59, 0x13, 0xca, 0x4d, 0xeb, 0x75, + 0xab, 0xd8, 0x41, 0x41, 0x4d, 0x0a, 0x70, 0x00, + 0x98, 0xe8, 0x79, 0x77, 0x79, 0x40, 0xc7, 0x8c, + 0x73, 0xfe, 0x6f, 0x2b, 0xee, 0x6c, 0x03, 0x52 +}; + + +/* k = 2d */ +static const byte ed25519_k[F25519_SIZE] = { + 0x59, 0xf1, 0xb2, 0x26, 0x94, 0x9b, 0xd6, 0xeb, + 0x56, 0xb1, 0x83, 0x82, 0x9a, 0x14, 0xe0, 0x00, + 0x30, 0xd1, 0xf3, 0xee, 0xf2, 0x80, 0x8e, 0x19, + 0xe7, 0xfc, 0xdf, 0x56, 0xdc, 0xd9, 0x06, 0x24 +}; + + +void ed25519_add(ge_p3 *r, + const ge_p3 *p1, const ge_p3 *p2) +{ + /* Explicit formulas database: add-2008-hwcd-3 + * + * source 2008 Hisil--Wong--Carter--Dawson, + * http://eprint.iacr.org/2008/522, Section 3.1 + * appliesto extended-1 + * parameter k + * assume k = 2 d + * compute A = (Y1-X1)(Y2-X2) + * compute B = (Y1+X1)(Y2+X2) + * compute C = T1 k T2 + * compute D = Z1 2 Z2 + * compute E = B - A + * compute F = D - C + * compute G = D + C + * compute H = B + A + * compute X3 = E F + * compute Y3 = G H + * compute T3 = E H + * compute Z3 = F G + */ + byte a[F25519_SIZE]; + byte b[F25519_SIZE]; + byte c[F25519_SIZE]; + byte d[F25519_SIZE]; + byte e[F25519_SIZE]; + byte f[F25519_SIZE]; + byte g[F25519_SIZE]; + byte h[F25519_SIZE]; + + /* A = (Y1-X1)(Y2-X2) */ + lm_sub(c, p1->Y, p1->X); + lm_sub(d, p2->Y, p2->X); + fe_mul__distinct(a, c, d); + + /* B = (Y1+X1)(Y2+X2) */ + lm_add(c, p1->Y, p1->X); + lm_add(d, p2->Y, p2->X); + fe_mul__distinct(b, c, d); + + /* C = T1 k T2 */ + fe_mul__distinct(d, p1->T, p2->T); + fe_mul__distinct(c, d, ed25519_k); + + /* D = Z1 2 Z2 */ + fe_mul__distinct(d, p1->Z, p2->Z); + lm_add(d, d, d); + + /* E = B - A */ + lm_sub(e, b, a); + + /* F = D - C */ + lm_sub(f, d, c); + + /* G = D + C */ + lm_add(g, d, c); + + /* H = B + A */ + lm_add(h, b, a); + + /* X3 = E F */ + fe_mul__distinct(r->X, e, f); + + /* Y3 = G H */ + fe_mul__distinct(r->Y, g, h); + + /* T3 = E H */ + fe_mul__distinct(r->T, e, h); + + /* Z3 = F G */ + fe_mul__distinct(r->Z, f, g); +} + + +void ed25519_double(ge_p3 *r, const ge_p3 *p) +{ + /* Explicit formulas database: dbl-2008-hwcd + * + * source 2008 Hisil--Wong--Carter--Dawson, + * http://eprint.iacr.org/2008/522, Section 3.3 + * compute A = X1^2 + * compute B = Y1^2 + * compute C = 2 Z1^2 + * compute D = a A + * compute E = (X1+Y1)^2-A-B + * compute G = D + B + * compute F = G - C + * compute H = D - B + * compute X3 = E F + * compute Y3 = G H + * compute T3 = E H + * compute Z3 = F G + */ + byte a[F25519_SIZE]; + byte b[F25519_SIZE]; + byte c[F25519_SIZE]; + byte e[F25519_SIZE]; + byte f[F25519_SIZE]; + byte g[F25519_SIZE]; + byte h[F25519_SIZE]; + + /* A = X1^2 */ + fe_mul__distinct(a, p->X, p->X); + + /* B = Y1^2 */ + fe_mul__distinct(b, p->Y, p->Y); + + /* C = 2 Z1^2 */ + fe_mul__distinct(c, p->Z, p->Z); + lm_add(c, c, c); + + /* D = a A (alter sign) */ + /* E = (X1+Y1)^2-A-B */ + lm_add(f, p->X, p->Y); + fe_mul__distinct(e, f, f); + lm_sub(e, e, a); + lm_sub(e, e, b); + + /* G = D + B */ + lm_sub(g, b, a); + + /* F = G - C */ + lm_sub(f, g, c); + + /* H = D - B */ + lm_neg(h, b); + lm_sub(h, h, a); + + /* X3 = E F */ + fe_mul__distinct(r->X, e, f); + + /* Y3 = G H */ + fe_mul__distinct(r->Y, g, h); + + /* T3 = E H */ + fe_mul__distinct(r->T, e, h); + + /* Z3 = F G */ + fe_mul__distinct(r->Z, f, g); +} + + +void ed25519_smult(ge_p3 *r_out, const ge_p3 *p, const byte *e) +{ + ge_p3 r; + int i; + + XMEMCPY(&r, &ed25519_neutral, sizeof(r)); + + for (i = 255; i >= 0; i--) { + const byte bit = (e[i >> 3] >> (i & 7)) & 1; + ge_p3 s; + + ed25519_double(&r, &r); + ed25519_add(&s, &r, p); + + fe_select(r.X, r.X, s.X, bit); + fe_select(r.Y, r.Y, s.Y, bit); + fe_select(r.Z, r.Z, s.Z, bit); + fe_select(r.T, r.T, s.T, bit); + } + XMEMCPY(r_out, &r, sizeof(r)); +} + + +void ge_scalarmult_base(ge_p3 *R,const unsigned char *nonce) +{ + ed25519_smult(R, &ed25519_base, nonce); +} + + +/* pack the point h into array s */ +void ge_p3_tobytes(unsigned char *s,const ge_p3 *h) +{ + byte x[F25519_SIZE]; + byte y[F25519_SIZE]; + byte z1[F25519_SIZE]; + byte parity; + + fe_inv__distinct(z1, h->Z); + fe_mul__distinct(x, h->X, z1); + fe_mul__distinct(y, h->Y, z1); + + fe_normalize(x); + fe_normalize(y); + + parity = (x[0] & 1) << 7; + lm_copy(s, y); + fe_normalize(s); + s[31] |= parity; +} + + +/* pack the point h into array s */ +void ge_tobytes(unsigned char *s,const ge_p2 *h) +{ + byte x[F25519_SIZE]; + byte y[F25519_SIZE]; + byte z1[F25519_SIZE]; + byte parity; + + fe_inv__distinct(z1, h->Z); + fe_mul__distinct(x, h->X, z1); + fe_mul__distinct(y, h->Y, z1); + + fe_normalize(x); + fe_normalize(y); + + parity = (x[0] & 1) << 7; + lm_copy(s, y); + fe_normalize(s); + s[31] |= parity; +} + + +/* + Test if the public key can be uncompressed and negate it (-X,Y,Z,-T) + return 0 on success + */ +int ge_frombytes_negate_vartime(ge_p3 *p,const unsigned char *s) +{ + + byte parity; + byte x[F25519_SIZE]; + byte y[F25519_SIZE]; + byte a[F25519_SIZE]; + byte b[F25519_SIZE]; + byte c[F25519_SIZE]; + int ret = 0; + + /* unpack the key s */ + parity = s[31] >> 7; + lm_copy(y, s); + y[31] &= 127; + + fe_mul__distinct(c, y, y); + fe_mul__distinct(b, c, ed25519_d); + lm_add(a, b, f25519_one); + fe_inv__distinct(b, a); + lm_sub(a, c, f25519_one); + fe_mul__distinct(c, a, b); + fe_sqrt(a, c); + lm_neg(b, a); + fe_select(x, a, b, (a[0] ^ parity) & 1); + + /* test that x^2 is equal to c */ + fe_mul__distinct(a, x, x); + fe_normalize(a); + fe_normalize(c); + ret |= ConstantCompare(a, c, F25519_SIZE); + + /* project the key s onto p */ + lm_copy(p->X, x); + lm_copy(p->Y, y); + fe_load(p->Z, 1); + fe_mul__distinct(p->T, x, y); + + /* negate, the point becomes (-X,Y,Z,-T) */ + lm_neg(p->X,p->X); + lm_neg(p->T,p->T); + + return ret; +} + + +int ge_double_scalarmult_vartime(ge_p2* R, const unsigned char *h, + const ge_p3 *inA,const unsigned char *sig) +{ + ge_p3 p, A; + int ret = 0; + + XMEMCPY(&A, inA, sizeof(ge_p3)); + + /* find SB */ + ed25519_smult(&p, &ed25519_base, sig); + + /* find H(R,A,M) * -A */ + ed25519_smult(&A, &A, h); + + /* SB + -H(R,A,M)A */ + ed25519_add(&A, &p, &A); + + lm_copy(R->X, A.X); + lm_copy(R->Y, A.Y); + lm_copy(R->Z, A.Z); + + return ret; +} + +#endif /* ED25519_SMALL */ +#endif /* HAVE_ED25519 */ diff --git a/client/wolfssl/wolfcrypt/src/ge_operations.c b/client/wolfssl/wolfcrypt/src/ge_operations.c new file mode 100644 index 0000000..73fa06e --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/ge_operations.c @@ -0,0 +1,9803 @@ +/* ge_operations.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + /* Based On Daniel J Bernstein's ed25519 Public Domain ref10 work. */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef HAVE_ED25519 +#ifndef ED25519_SMALL /* run when not defined to use small memory math */ + +#include +#include +#include +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#if defined(CURVED25519_X64) + #define CURVED25519_ASM_64BIT + #define CURVED25519_ASM +#endif +#if defined(WOLFSSL_ARMASM) + #if defined(__aarch64__) + #define CURVED25519_ASM_64BIT + #else + #define CURVED25519_ASM_32BIT + #endif + #define CURVED25519_ASM +#endif + + +static void ge_p2_0(ge_p2 *); +#ifndef CURVED25519_ASM +static void ge_precomp_0(ge_precomp *); +#endif +static void ge_p3_to_p2(ge_p2 *,const ge_p3 *); +static void ge_p3_to_cached(ge_cached *,const ge_p3 *); +static void ge_p1p1_to_p2(ge_p2 *,const ge_p1p1 *); +static void ge_p1p1_to_p3(ge_p3 *,const ge_p1p1 *); +static void ge_p2_dbl(ge_p1p1 *,const ge_p2 *); +static void ge_p3_dbl(ge_p1p1 *,const ge_p3 *); + +static void ge_madd(ge_p1p1 *,const ge_p3 *,const ge_precomp *); +static void ge_msub(ge_p1p1 *,const ge_p3 *,const ge_precomp *); +static void ge_add(ge_p1p1 *,const ge_p3 *,const ge_cached *); +static void ge_sub(ge_p1p1 *,const ge_p3 *,const ge_cached *); + +/* +ge means group element. + +Here the group is the set of pairs (x,y) of field elements (see ge_operations.h) +satisfying -x^2 + y^2 = 1 + d x^2y^2 +where d = -121665/121666. + +Representations: + ge_p2 (projective): (X:Y:Z) satisfying x=X/Z, y=Y/Z + ge_p3 (extended): (X:Y:Z:T) satisfying x=X/Z, y=Y/Z, XY=ZT + ge_p1p1 (completed): ((X:Z),(Y:T)) satisfying x=X/Z, y=Y/T + ge_precomp (Duif): (y+x,y-x,2dxy) +*/ + +#if !defined(HAVE___UINT128_T) || defined(NO_CURVED25519_128BIT) +#define MASK_21 0x1fffff +#define ORDER_0 0x15d3ed +#define ORDER_1 0x18d2e7 +#define ORDER_2 0x160498 +#define ORDER_3 0xf39ac +#define ORDER_4 0x1dea2f +#define ORDER_5 0xa6f7c + +#ifdef CURVED25519_ASM_32BIT +uint64_t load_3(const unsigned char *in) +{ + uint64_t result; + result = (uint64_t) in[0]; + result |= ((uint64_t) in[1]) << 8; + result |= ((uint64_t) in[2]) << 16; + return result; +} + + +uint64_t load_4(const unsigned char *in) +{ + uint64_t result; + result = (uint64_t) in[0]; + result |= ((uint64_t) in[1]) << 8; + result |= ((uint64_t) in[2]) << 16; + result |= ((uint64_t) in[3]) << 24; + return result; +} +#endif + +/* +Input: + s[0]+256*s[1]+...+256^63*s[63] = s + +Output: + s[0]+256*s[1]+...+256^31*s[31] = s mod l + where l = 2^252 + 27742317777372353535851937790883648493. + Overwrites s in place. +*/ +void sc_reduce(byte* s) +{ + int64_t t[24]; + int64_t carry; + + t[ 0] = MASK_21 & (load_3(s + 0) >> 0); + t[ 1] = MASK_21 & (load_4(s + 2) >> 5); + t[ 2] = MASK_21 & (load_3(s + 5) >> 2); + t[ 3] = MASK_21 & (load_4(s + 7) >> 7); + t[ 4] = MASK_21 & (load_4(s + 10) >> 4); + t[ 5] = MASK_21 & (load_3(s + 13) >> 1); + t[ 6] = MASK_21 & (load_4(s + 15) >> 6); + t[ 7] = MASK_21 & (load_3(s + 18) >> 3); + t[ 8] = MASK_21 & (load_3(s + 21) >> 0); + t[ 9] = MASK_21 & (load_4(s + 23) >> 5); + t[10] = MASK_21 & (load_3(s + 26) >> 2); + t[11] = MASK_21 & (load_4(s + 28) >> 7); + t[12] = MASK_21 & (load_4(s + 31) >> 4); + t[13] = MASK_21 & (load_3(s + 34) >> 1); + t[14] = MASK_21 & (load_4(s + 36) >> 6); + t[15] = MASK_21 & (load_3(s + 39) >> 3); + t[16] = MASK_21 & (load_3(s + 42) >> 0); + t[17] = MASK_21 & (load_4(s + 44) >> 5); + t[18] = MASK_21 & (load_3(s + 47) >> 2); + t[19] = MASK_21 & (load_4(s + 49) >> 7); + t[20] = MASK_21 & (load_4(s + 52) >> 4); + t[21] = MASK_21 & (load_3(s + 55) >> 1); + t[22] = MASK_21 & (load_4(s + 57) >> 6); + t[23] = (load_4(s + 60) >> 3); + + t[11] -= t[23] * ORDER_0; + t[12] -= t[23] * ORDER_1; + t[13] -= t[23] * ORDER_2; + t[14] -= t[23] * ORDER_3; + t[15] -= t[23] * ORDER_4; + t[16] -= t[23] * ORDER_5; + + t[10] -= t[22] * ORDER_0; + t[11] -= t[22] * ORDER_1; + t[12] -= t[22] * ORDER_2; + t[13] -= t[22] * ORDER_3; + t[14] -= t[22] * ORDER_4; + t[15] -= t[22] * ORDER_5; + + t[ 9] -= t[21] * ORDER_0; + t[10] -= t[21] * ORDER_1; + t[11] -= t[21] * ORDER_2; + t[12] -= t[21] * ORDER_3; + t[13] -= t[21] * ORDER_4; + t[14] -= t[21] * ORDER_5; + + t[ 8] -= t[20] * ORDER_0; + t[ 9] -= t[20] * ORDER_1; + t[10] -= t[20] * ORDER_2; + t[11] -= t[20] * ORDER_3; + t[12] -= t[20] * ORDER_4; + t[13] -= t[20] * ORDER_5; + + t[ 7] -= t[19] * ORDER_0; + t[ 8] -= t[19] * ORDER_1; + t[ 9] -= t[19] * ORDER_2; + t[10] -= t[19] * ORDER_3; + t[11] -= t[19] * ORDER_4; + t[12] -= t[19] * ORDER_5; + + t[ 6] -= t[18] * ORDER_0; + t[ 7] -= t[18] * ORDER_1; + t[ 8] -= t[18] * ORDER_2; + t[ 9] -= t[18] * ORDER_3; + t[10] -= t[18] * ORDER_4; + t[11] -= t[18] * ORDER_5; + + carry = t[ 6] >> 21; t[ 7] += carry; t[ 6] &= MASK_21; + carry = t[ 8] >> 21; t[ 9] += carry; t[ 8] &= MASK_21; + carry = t[10] >> 21; t[11] += carry; t[10] &= MASK_21; + carry = t[12] >> 21; t[13] += carry; t[12] &= MASK_21; + carry = t[14] >> 21; t[15] += carry; t[14] &= MASK_21; + carry = t[16] >> 21; t[17] += carry; t[16] &= MASK_21; + carry = t[ 7] >> 21; t[ 8] += carry; t[ 7] &= MASK_21; + carry = t[ 9] >> 21; t[10] += carry; t[ 9] &= MASK_21; + carry = t[11] >> 21; t[12] += carry; t[11] &= MASK_21; + carry = t[13] >> 21; t[14] += carry; t[13] &= MASK_21; + carry = t[15] >> 21; t[16] += carry; t[15] &= MASK_21; + + t[ 5] -= t[17] * ORDER_0; + t[ 6] -= t[17] * ORDER_1; + t[ 7] -= t[17] * ORDER_2; + t[ 8] -= t[17] * ORDER_3; + t[ 9] -= t[17] * ORDER_4; + t[10] -= t[17] * ORDER_5; + + t[ 4] -= t[16] * ORDER_0; + t[ 5] -= t[16] * ORDER_1; + t[ 6] -= t[16] * ORDER_2; + t[ 7] -= t[16] * ORDER_3; + t[ 8] -= t[16] * ORDER_4; + t[ 9] -= t[16] * ORDER_5; + + t[ 3] -= t[15] * ORDER_0; + t[ 4] -= t[15] * ORDER_1; + t[ 5] -= t[15] * ORDER_2; + t[ 6] -= t[15] * ORDER_3; + t[ 7] -= t[15] * ORDER_4; + t[ 8] -= t[15] * ORDER_5; + + t[ 2] -= t[14] * ORDER_0; + t[ 3] -= t[14] * ORDER_1; + t[ 4] -= t[14] * ORDER_2; + t[ 5] -= t[14] * ORDER_3; + t[ 6] -= t[14] * ORDER_4; + t[ 7] -= t[14] * ORDER_5; + + t[ 1] -= t[13] * ORDER_0; + t[ 2] -= t[13] * ORDER_1; + t[ 3] -= t[13] * ORDER_2; + t[ 4] -= t[13] * ORDER_3; + t[ 5] -= t[13] * ORDER_4; + t[ 6] -= t[13] * ORDER_5; + + t[ 0] -= t[12] * ORDER_0; + t[ 1] -= t[12] * ORDER_1; + t[ 2] -= t[12] * ORDER_2; + t[ 3] -= t[12] * ORDER_3; + t[ 4] -= t[12] * ORDER_4; + t[ 5] -= t[12] * ORDER_5; + t[12] = 0; + + carry = t[ 0] >> 21; t[ 1] += carry; t[ 0] &= MASK_21; + carry = t[ 1] >> 21; t[ 2] += carry; t[ 1] &= MASK_21; + carry = t[ 2] >> 21; t[ 3] += carry; t[ 2] &= MASK_21; + carry = t[ 3] >> 21; t[ 4] += carry; t[ 3] &= MASK_21; + carry = t[ 4] >> 21; t[ 5] += carry; t[ 4] &= MASK_21; + carry = t[ 5] >> 21; t[ 6] += carry; t[ 5] &= MASK_21; + carry = t[ 6] >> 21; t[ 7] += carry; t[ 6] &= MASK_21; + carry = t[ 7] >> 21; t[ 8] += carry; t[ 7] &= MASK_21; + carry = t[ 8] >> 21; t[ 9] += carry; t[ 8] &= MASK_21; + carry = t[ 9] >> 21; t[10] += carry; t[ 9] &= MASK_21; + carry = t[10] >> 21; t[11] += carry; t[10] &= MASK_21; + carry = t[11] >> 21; t[12] += carry; t[11] &= MASK_21; + + t[ 0] -= t[12] * ORDER_0; + t[ 1] -= t[12] * ORDER_1; + t[ 2] -= t[12] * ORDER_2; + t[ 3] -= t[12] * ORDER_3; + t[ 4] -= t[12] * ORDER_4; + t[ 5] -= t[12] * ORDER_5; + + carry = t[ 0] >> 21; t[ 1] += carry; t[ 0] &= MASK_21; + carry = t[ 1] >> 21; t[ 2] += carry; t[ 1] &= MASK_21; + carry = t[ 2] >> 21; t[ 3] += carry; t[ 2] &= MASK_21; + carry = t[ 3] >> 21; t[ 4] += carry; t[ 3] &= MASK_21; + carry = t[ 4] >> 21; t[ 5] += carry; t[ 4] &= MASK_21; + carry = t[ 5] >> 21; t[ 6] += carry; t[ 5] &= MASK_21; + carry = t[ 6] >> 21; t[ 7] += carry; t[ 6] &= MASK_21; + carry = t[ 7] >> 21; t[ 8] += carry; t[ 7] &= MASK_21; + carry = t[ 8] >> 21; t[ 9] += carry; t[ 8] &= MASK_21; + carry = t[ 9] >> 21; t[10] += carry; t[ 9] &= MASK_21; + carry = t[10] >> 21; t[11] += carry; t[10] &= MASK_21; + + s[ 0] = (byte)(t[ 0] >> 0); + s[ 1] = (byte)(t[ 0] >> 8); + s[ 2] = (byte)((t[ 0] >> 16) | (t[ 1] << 5)); + s[ 3] = (byte)(t[ 1] >> 3); + s[ 4] = (byte)(t[ 1] >> 11); + s[ 5] = (byte)((t[ 1] >> 19) | (t[ 2] << 2)); + s[ 6] = (byte)(t[ 2] >> 6); + s[ 7] = (byte)((t[ 2] >> 14) | (t[ 3] << 7)); + s[ 8] = (byte)(t[ 3] >> 1); + s[ 9] = (byte)(t[ 3] >> 9); + s[10] = (byte)((t[ 3] >> 17) | (t[ 4] << 4)); + s[11] = (byte)(t[ 4] >> 4); + s[12] = (byte)(t[ 4] >> 12); + s[13] = (byte)((t[ 4] >> 20) | (t[ 5] << 1)); + s[14] = (byte)(t[ 5] >> 7); + s[15] = (byte)((t[ 5] >> 15) | (t[ 6] << 6)); + s[16] = (byte)(t[ 6] >> 2); + s[17] = (byte)(t[ 6] >> 10); + s[18] = (byte)((t[ 6] >> 18) | (t[ 7] << 3)); + s[19] = (byte)(t[ 7] >> 5); + s[20] = (byte)(t[ 7] >> 13); + s[21] = (byte)(t[ 8] >> 0); + s[22] = (byte)(t[ 8] >> 8); + s[23] = (byte)((t[ 8] >> 16) | (t[ 9] << 5)); + s[24] = (byte)(t[ 9] >> 3); + s[25] = (byte)(t[ 9] >> 11); + s[26] = (byte)((t[ 9] >> 19) | (t[10] << 2)); + s[27] = (byte)(t[10] >> 6); + s[28] = (byte)((t[10] >> 14) | (t[11] << 7)); + s[29] = (byte)(t[11] >> 1); + s[30] = (byte)(t[11] >> 9); + s[31] = (byte)(t[11] >> 17); +} + +/* +Input: + a[0]+256*a[1]+...+256^31*a[31] = a + b[0]+256*b[1]+...+256^31*b[31] = b + c[0]+256*c[1]+...+256^31*c[31] = c + +Output: + s[0]+256*s[1]+...+256^31*s[31] = (ab+c) mod l + where l = 2^252 + 27742317777372353535851937790883648493. +*/ +void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) +{ + uint32_t ad[12], bd[12], cd[12]; + int64_t t[24]; + int64_t carry; + + ad[ 0] = MASK_21 & (load_3(a + 0) >> 0); + ad[ 1] = MASK_21 & (load_4(a + 2) >> 5); + ad[ 2] = MASK_21 & (load_3(a + 5) >> 2); + ad[ 3] = MASK_21 & (load_4(a + 7) >> 7); + ad[ 4] = MASK_21 & (load_4(a + 10) >> 4); + ad[ 5] = MASK_21 & (load_3(a + 13) >> 1); + ad[ 6] = MASK_21 & (load_4(a + 15) >> 6); + ad[ 7] = MASK_21 & (load_3(a + 18) >> 3); + ad[ 8] = MASK_21 & (load_3(a + 21) >> 0); + ad[ 9] = MASK_21 & (load_4(a + 23) >> 5); + ad[10] = MASK_21 & (load_3(a + 26) >> 2); + ad[11] = (uint32_t)(load_4(a + 28) >> 7); + bd[ 0] = MASK_21 & (load_3(b + 0) >> 0); + bd[ 1] = MASK_21 & (load_4(b + 2) >> 5); + bd[ 2] = MASK_21 & (load_3(b + 5) >> 2); + bd[ 3] = MASK_21 & (load_4(b + 7) >> 7); + bd[ 4] = MASK_21 & (load_4(b + 10) >> 4); + bd[ 5] = MASK_21 & (load_3(b + 13) >> 1); + bd[ 6] = MASK_21 & (load_4(b + 15) >> 6); + bd[ 7] = MASK_21 & (load_3(b + 18) >> 3); + bd[ 8] = MASK_21 & (load_3(b + 21) >> 0); + bd[ 9] = MASK_21 & (load_4(b + 23) >> 5); + bd[10] = MASK_21 & (load_3(b + 26) >> 2); + bd[11] = (uint32_t)(load_4(b + 28) >> 7); + cd[ 0] = MASK_21 & (load_3(c + 0) >> 0); + cd[ 1] = MASK_21 & (load_4(c + 2) >> 5); + cd[ 2] = MASK_21 & (load_3(c + 5) >> 2); + cd[ 3] = MASK_21 & (load_4(c + 7) >> 7); + cd[ 4] = MASK_21 & (load_4(c + 10) >> 4); + cd[ 5] = MASK_21 & (load_3(c + 13) >> 1); + cd[ 6] = MASK_21 & (load_4(c + 15) >> 6); + cd[ 7] = MASK_21 & (load_3(c + 18) >> 3); + cd[ 8] = MASK_21 & (load_3(c + 21) >> 0); + cd[ 9] = MASK_21 & (load_4(c + 23) >> 5); + cd[10] = MASK_21 & (load_3(c + 26) >> 2); + cd[11] = (uint32_t)(load_4(c + 28) >> 7); + + t[ 0] = cd[ 0] + (int64_t)ad[ 0] * bd[ 0]; + t[ 1] = cd[ 1] + (int64_t)ad[ 0] * bd[ 1] + (int64_t)ad[ 1] * bd[ 0]; + t[ 2] = cd[ 2] + (int64_t)ad[ 0] * bd[ 2] + (int64_t)ad[ 1] * bd[ 1] + + (int64_t)ad[ 2] * bd[ 0]; + t[ 3] = cd[ 3] + (int64_t)ad[ 0] * bd[ 3] + (int64_t)ad[ 1] * bd[ 2] + + (int64_t)ad[ 2] * bd[ 1] + (int64_t)ad[ 3] * bd[ 0]; + t[ 4] = cd[ 4] + (int64_t)ad[ 0] * bd[ 4] + (int64_t)ad[ 1] * bd[ 3] + + (int64_t)ad[ 2] * bd[ 2] + (int64_t)ad[ 3] * bd[ 1] + + (int64_t)ad[ 4] * bd[ 0]; + t[ 5] = cd[ 5] + (int64_t)ad[ 0] * bd[ 5] + (int64_t)ad[ 1] * bd[ 4] + + (int64_t)ad[ 2] * bd[ 3] + (int64_t)ad[ 3] * bd[ 2] + + (int64_t)ad[ 4] * bd[ 1] + (int64_t)ad[ 5] * bd[ 0]; + t[ 6] = cd[ 6] + (int64_t)ad[ 0] * bd[ 6] + (int64_t)ad[ 1] * bd[ 5] + + (int64_t)ad[ 2] * bd[ 4] + (int64_t)ad[ 3] * bd[ 3] + + (int64_t)ad[ 4] * bd[ 2] + (int64_t)ad[ 5] * bd[ 1] + + (int64_t)ad[ 6] * bd[ 0]; + t[ 7] = cd[ 7] + (int64_t)ad[ 0] * bd[ 7] + (int64_t)ad[ 1] * bd[ 6] + + (int64_t)ad[ 2] * bd[ 5] + (int64_t)ad[ 3] * bd[ 4] + + (int64_t)ad[ 4] * bd[ 3] + (int64_t)ad[ 5] * bd[ 2] + + (int64_t)ad[ 6] * bd[ 1] + (int64_t)ad[ 7] * bd[ 0]; + t[ 8] = cd[ 8] + (int64_t)ad[ 0] * bd[ 8] + (int64_t)ad[ 1] * bd[ 7] + + (int64_t)ad[ 2] * bd[ 6] + (int64_t)ad[ 3] * bd[ 5] + + (int64_t)ad[ 4] * bd[ 4] + (int64_t)ad[ 5] * bd[ 3] + + (int64_t)ad[ 6] * bd[ 2] + (int64_t)ad[ 7] * bd[ 1] + + (int64_t)ad[ 8] * bd[ 0]; + t[ 9] = cd[ 9] + (int64_t)ad[ 0] * bd[ 9] + (int64_t)ad[ 1] * bd[ 8] + + (int64_t)ad[ 2] * bd[ 7] + (int64_t)ad[ 3] * bd[ 6] + + (int64_t)ad[ 4] * bd[ 5] + (int64_t)ad[ 5] * bd[ 4] + + (int64_t)ad[ 6] * bd[ 3] + (int64_t)ad[ 7] * bd[ 2] + + (int64_t)ad[ 8] * bd[ 1] + (int64_t)ad[ 9] * bd[ 0]; + t[10] = cd[10] + (int64_t)ad[ 0] * bd[10] + (int64_t)ad[ 1] * bd[ 9] + + (int64_t)ad[ 2] * bd[ 8] + (int64_t)ad[ 3] * bd[ 7] + + (int64_t)ad[ 4] * bd[ 6] + (int64_t)ad[ 5] * bd[ 5] + + (int64_t)ad[ 6] * bd[ 4] + (int64_t)ad[ 7] * bd[ 3] + + (int64_t)ad[ 8] * bd[ 2] + (int64_t)ad[ 9] * bd[ 1] + + (int64_t)ad[10] * bd[ 0]; + t[11] = cd[11] + (int64_t)ad[ 0] * bd[11] + (int64_t)ad[ 1] * bd[10] + + (int64_t)ad[ 2] * bd[ 9] + (int64_t)ad[ 3] * bd[ 8] + + (int64_t)ad[ 4] * bd[ 7] + (int64_t)ad[ 5] * bd[ 6] + + (int64_t)ad[ 6] * bd[ 5] + (int64_t)ad[ 7] * bd[ 4] + + (int64_t)ad[ 8] * bd[ 3] + (int64_t)ad[ 9] * bd[ 2] + + (int64_t)ad[10] * bd[ 1] + (int64_t)ad[11] * bd[ 0]; + t[12] = (int64_t)ad[ 1] * bd[11] + (int64_t)ad[ 2] * bd[10] + + (int64_t)ad[ 3] * bd[ 9] + (int64_t)ad[ 4] * bd[ 8] + + (int64_t)ad[ 5] * bd[ 7] + (int64_t)ad[ 6] * bd[ 6] + + (int64_t)ad[ 7] * bd[ 5] + (int64_t)ad[ 8] * bd[ 4] + + (int64_t)ad[ 9] * bd[ 3] + (int64_t)ad[10] * bd[ 2] + + (int64_t)ad[11] * bd[ 1]; + t[13] = (int64_t)ad[ 2] * bd[11] + (int64_t)ad[ 3] * bd[10] + + (int64_t)ad[ 4] * bd[ 9] + (int64_t)ad[ 5] * bd[ 8] + + (int64_t)ad[ 6] * bd[ 7] + (int64_t)ad[ 7] * bd[ 6] + + (int64_t)ad[ 8] * bd[ 5] + (int64_t)ad[ 9] * bd[ 4] + + (int64_t)ad[10] * bd[ 3] + (int64_t)ad[11] * bd[ 2]; + t[14] = (int64_t)ad[ 3] * bd[11] + (int64_t)ad[ 4] * bd[10] + + (int64_t)ad[ 5] * bd[ 9] + (int64_t)ad[ 6] * bd[ 8] + + (int64_t)ad[ 7] * bd[ 7] + (int64_t)ad[ 8] * bd[ 6] + + (int64_t)ad[ 9] * bd[ 5] + (int64_t)ad[10] * bd[ 4] + + (int64_t)ad[11] * bd[ 3]; + t[15] = (int64_t)ad[ 4] * bd[11] + (int64_t)ad[ 5] * bd[10] + + (int64_t)ad[ 6] * bd[ 9] + (int64_t)ad[ 7] * bd[ 8] + + (int64_t)ad[ 8] * bd[ 7] + (int64_t)ad[ 9] * bd[ 6] + + (int64_t)ad[10] * bd[ 5] + (int64_t)ad[11] * bd[ 4]; + t[16] = (int64_t)ad[ 5] * bd[11] + (int64_t)ad[ 6] * bd[10] + + (int64_t)ad[ 7] * bd[ 9] + (int64_t)ad[ 8] * bd[ 8] + + (int64_t)ad[ 9] * bd[ 7] + (int64_t)ad[10] * bd[ 6] + + (int64_t)ad[11] * bd[ 5]; + t[17] = (int64_t)ad[ 6] * bd[11] + (int64_t)ad[ 7] * bd[10] + + (int64_t)ad[ 8] * bd[ 9] + (int64_t)ad[ 9] * bd[ 8] + + (int64_t)ad[10] * bd[ 7] + (int64_t)ad[11] * bd[ 6]; + t[18] = (int64_t)ad[ 7] * bd[11] + (int64_t)ad[ 8] * bd[10] + + (int64_t)ad[ 9] * bd[ 9] + (int64_t)ad[10] * bd[ 8] + + (int64_t)ad[11] * bd[ 7]; + t[19] = (int64_t)ad[ 8] * bd[11] + (int64_t)ad[ 9] * bd[10] + + (int64_t)ad[10] * bd[ 9] + (int64_t)ad[11] * bd[ 8]; + t[20] = (int64_t)ad[ 9] * bd[11] + (int64_t)ad[10] * bd[10] + + (int64_t)ad[11] * bd[ 9]; + t[21] = (int64_t)ad[10] * bd[11] + (int64_t)ad[11] * bd[10]; + t[22] = (int64_t)ad[11] * bd[11]; + t[23] = 0; + + carry = t[ 0] >> 21; t[ 1] += carry; t[ 0] &= MASK_21; + carry = t[ 2] >> 21; t[ 3] += carry; t[ 2] &= MASK_21; + carry = t[ 4] >> 21; t[ 5] += carry; t[ 4] &= MASK_21; + carry = t[ 6] >> 21; t[ 7] += carry; t[ 6] &= MASK_21; + carry = t[ 8] >> 21; t[ 9] += carry; t[ 8] &= MASK_21; + carry = t[10] >> 21; t[11] += carry; t[10] &= MASK_21; + carry = t[12] >> 21; t[13] += carry; t[12] &= MASK_21; + carry = t[14] >> 21; t[15] += carry; t[14] &= MASK_21; + carry = t[16] >> 21; t[17] += carry; t[16] &= MASK_21; + carry = t[18] >> 21; t[19] += carry; t[18] &= MASK_21; + carry = t[20] >> 21; t[21] += carry; t[20] &= MASK_21; + carry = t[22] >> 21; t[23] += carry; t[22] &= MASK_21; + carry = t[ 1] >> 21; t[ 2] += carry; t[ 1] &= MASK_21; + carry = t[ 3] >> 21; t[ 4] += carry; t[ 3] &= MASK_21; + carry = t[ 5] >> 21; t[ 6] += carry; t[ 5] &= MASK_21; + carry = t[ 7] >> 21; t[ 8] += carry; t[ 7] &= MASK_21; + carry = t[ 9] >> 21; t[10] += carry; t[ 9] &= MASK_21; + carry = t[11] >> 21; t[12] += carry; t[11] &= MASK_21; + carry = t[13] >> 21; t[14] += carry; t[13] &= MASK_21; + carry = t[15] >> 21; t[16] += carry; t[15] &= MASK_21; + carry = t[17] >> 21; t[18] += carry; t[17] &= MASK_21; + carry = t[19] >> 21; t[20] += carry; t[19] &= MASK_21; + carry = t[21] >> 21; t[22] += carry; t[21] &= MASK_21; + + t[11] -= t[23] * ORDER_0; + t[12] -= t[23] * ORDER_1; + t[13] -= t[23] * ORDER_2; + t[14] -= t[23] * ORDER_3; + t[15] -= t[23] * ORDER_4; + t[16] -= t[23] * ORDER_5; + + t[10] -= t[22] * ORDER_0; + t[11] -= t[22] * ORDER_1; + t[12] -= t[22] * ORDER_2; + t[13] -= t[22] * ORDER_3; + t[14] -= t[22] * ORDER_4; + t[15] -= t[22] * ORDER_5; + + t[ 9] -= t[21] * ORDER_0; + t[10] -= t[21] * ORDER_1; + t[11] -= t[21] * ORDER_2; + t[12] -= t[21] * ORDER_3; + t[13] -= t[21] * ORDER_4; + t[14] -= t[21] * ORDER_5; + + t[ 8] -= t[20] * ORDER_0; + t[ 9] -= t[20] * ORDER_1; + t[10] -= t[20] * ORDER_2; + t[11] -= t[20] * ORDER_3; + t[12] -= t[20] * ORDER_4; + t[13] -= t[20] * ORDER_5; + + t[ 7] -= t[19] * ORDER_0; + t[ 8] -= t[19] * ORDER_1; + t[ 9] -= t[19] * ORDER_2; + t[10] -= t[19] * ORDER_3; + t[11] -= t[19] * ORDER_4; + t[12] -= t[19] * ORDER_5; + + t[ 6] -= t[18] * ORDER_0; + t[ 7] -= t[18] * ORDER_1; + t[ 8] -= t[18] * ORDER_2; + t[ 9] -= t[18] * ORDER_3; + t[10] -= t[18] * ORDER_4; + t[11] -= t[18] * ORDER_5; + + carry = t[ 6] >> 21; t[ 7] += carry; t[ 6] &= MASK_21; + carry = t[ 8] >> 21; t[ 9] += carry; t[ 8] &= MASK_21; + carry = t[10] >> 21; t[11] += carry; t[10] &= MASK_21; + carry = t[12] >> 21; t[13] += carry; t[12] &= MASK_21; + carry = t[14] >> 21; t[15] += carry; t[14] &= MASK_21; + carry = t[16] >> 21; t[17] += carry; t[16] &= MASK_21; + carry = t[ 7] >> 21; t[ 8] += carry; t[ 7] &= MASK_21; + carry = t[ 9] >> 21; t[10] += carry; t[ 9] &= MASK_21; + carry = t[11] >> 21; t[12] += carry; t[11] &= MASK_21; + carry = t[13] >> 21; t[14] += carry; t[13] &= MASK_21; + carry = t[15] >> 21; t[16] += carry; t[15] &= MASK_21; + + t[ 5] -= t[17] * ORDER_0; + t[ 6] -= t[17] * ORDER_1; + t[ 7] -= t[17] * ORDER_2; + t[ 8] -= t[17] * ORDER_3; + t[ 9] -= t[17] * ORDER_4; + t[10] -= t[17] * ORDER_5; + + t[ 4] -= t[16] * ORDER_0; + t[ 5] -= t[16] * ORDER_1; + t[ 6] -= t[16] * ORDER_2; + t[ 7] -= t[16] * ORDER_3; + t[ 8] -= t[16] * ORDER_4; + t[ 9] -= t[16] * ORDER_5; + + t[ 3] -= t[15] * ORDER_0; + t[ 4] -= t[15] * ORDER_1; + t[ 5] -= t[15] * ORDER_2; + t[ 6] -= t[15] * ORDER_3; + t[ 7] -= t[15] * ORDER_4; + t[ 8] -= t[15] * ORDER_5; + + t[ 2] -= t[14] * ORDER_0; + t[ 3] -= t[14] * ORDER_1; + t[ 4] -= t[14] * ORDER_2; + t[ 5] -= t[14] * ORDER_3; + t[ 6] -= t[14] * ORDER_4; + t[ 7] -= t[14] * ORDER_5; + + t[ 1] -= t[13] * ORDER_0; + t[ 2] -= t[13] * ORDER_1; + t[ 3] -= t[13] * ORDER_2; + t[ 4] -= t[13] * ORDER_3; + t[ 5] -= t[13] * ORDER_4; + t[ 6] -= t[13] * ORDER_5; + + t[ 0] -= t[12] * ORDER_0; + t[ 1] -= t[12] * ORDER_1; + t[ 2] -= t[12] * ORDER_2; + t[ 3] -= t[12] * ORDER_3; + t[ 4] -= t[12] * ORDER_4; + t[ 5] -= t[12] * ORDER_5; + t[12] = 0; + + carry = t[ 0] >> 21; t[ 1] += carry; t[ 0] &= MASK_21; + carry = t[ 1] >> 21; t[ 2] += carry; t[ 1] &= MASK_21; + carry = t[ 2] >> 21; t[ 3] += carry; t[ 2] &= MASK_21; + carry = t[ 3] >> 21; t[ 4] += carry; t[ 3] &= MASK_21; + carry = t[ 4] >> 21; t[ 5] += carry; t[ 4] &= MASK_21; + carry = t[ 5] >> 21; t[ 6] += carry; t[ 5] &= MASK_21; + carry = t[ 6] >> 21; t[ 7] += carry; t[ 6] &= MASK_21; + carry = t[ 7] >> 21; t[ 8] += carry; t[ 7] &= MASK_21; + carry = t[ 8] >> 21; t[ 9] += carry; t[ 8] &= MASK_21; + carry = t[ 9] >> 21; t[10] += carry; t[ 9] &= MASK_21; + carry = t[10] >> 21; t[11] += carry; t[10] &= MASK_21; + carry = t[11] >> 21; t[12] += carry; t[11] &= MASK_21; + + t[ 0] -= t[12] * ORDER_0; + t[ 1] -= t[12] * ORDER_1; + t[ 2] -= t[12] * ORDER_2; + t[ 3] -= t[12] * ORDER_3; + t[ 4] -= t[12] * ORDER_4; + t[ 5] -= t[12] * ORDER_5; + + carry = t[ 0] >> 21; t[ 1] += carry; t[ 0] &= MASK_21; + carry = t[ 1] >> 21; t[ 2] += carry; t[ 1] &= MASK_21; + carry = t[ 2] >> 21; t[ 3] += carry; t[ 2] &= MASK_21; + carry = t[ 3] >> 21; t[ 4] += carry; t[ 3] &= MASK_21; + carry = t[ 4] >> 21; t[ 5] += carry; t[ 4] &= MASK_21; + carry = t[ 5] >> 21; t[ 6] += carry; t[ 5] &= MASK_21; + carry = t[ 6] >> 21; t[ 7] += carry; t[ 6] &= MASK_21; + carry = t[ 7] >> 21; t[ 8] += carry; t[ 7] &= MASK_21; + carry = t[ 8] >> 21; t[ 9] += carry; t[ 8] &= MASK_21; + carry = t[ 9] >> 21; t[10] += carry; t[ 9] &= MASK_21; + carry = t[10] >> 21; t[11] += carry; t[10] &= MASK_21; + + s[ 0] = (byte)(t[ 0] >> 0); + s[ 1] = (byte)(t[ 0] >> 8); + s[ 2] = (byte)((t[ 0] >> 16) | (t[ 1] << 5)); + s[ 3] = (byte)(t[ 1] >> 3); + s[ 4] = (byte)(t[ 1] >> 11); + s[ 5] = (byte)((t[ 1] >> 19) | (t[ 2] << 2)); + s[ 6] = (byte)(t[ 2] >> 6); + s[ 7] = (byte)((t[ 2] >> 14) | (t[ 3] << 7)); + s[ 8] = (byte)(t[ 3] >> 1); + s[ 9] = (byte)(t[ 3] >> 9); + s[10] = (byte)((t[ 3] >> 17) | (t[ 4] << 4)); + s[11] = (byte)(t[ 4] >> 4); + s[12] = (byte)(t[ 4] >> 12); + s[13] = (byte)((t[ 4] >> 20) | (t[ 5] << 1)); + s[14] = (byte)(t[ 5] >> 7); + s[15] = (byte)((t[ 5] >> 15) | (t[ 6] << 6)); + s[16] = (byte)(t[ 6] >> 2); + s[17] = (byte)(t[ 6] >> 10); + s[18] = (byte)((t[ 6] >> 18) | (t[ 7] << 3)); + s[19] = (byte)(t[ 7] >> 5); + s[20] = (byte)(t[ 7] >> 13); + s[21] = (byte)(t[ 8] >> 0); + s[22] = (byte)(t[ 8] >> 8); + s[23] = (byte)((t[ 8] >> 16) | (t[ 9] << 5)); + s[24] = (byte)(t[ 9] >> 3); + s[25] = (byte)(t[ 9] >> 11); + s[26] = (byte)((t[ 9] >> 19) | (t[10] << 2)); + s[27] = (byte)(t[10] >> 6); + s[28] = (byte)((t[10] >> 14) | (t[11] << 7)); + s[29] = (byte)(t[11] >> 1); + s[30] = (byte)(t[11] >> 9); + s[31] = (byte)(t[11] >> 17); +} +#else +static uint64_t load_6(const byte* a) +{ + uint64_t n; + n = ((uint64_t)a[0] << 0) | + ((uint64_t)a[1] << 8) | + ((uint64_t)a[2] << 16) | + ((uint64_t)a[3] << 24) | + ((uint64_t)a[4] << 32) | + ((uint64_t)a[5] << 40); + return n; +} + +static uint64_t load_7(const byte* a) +{ + uint64_t n; + n = ((uint64_t)a[0] << 0) | + ((uint64_t)a[1] << 8) | + ((uint64_t)a[2] << 16) | + ((uint64_t)a[3] << 24) | + ((uint64_t)a[4] << 32) | + ((uint64_t)a[5] << 40) | + ((uint64_t)a[6] << 48); + return n; +} + +#define MASK_42 0x3ffffffffffl +#define ORDER_0 0x31a5cf5d3edl +#define ORDER_1 0x1e735960498l +#define ORDER_2 0x14def9dea2fl + +/* +Input: + s[0]+256*s[1]+...+256^63*s[63] = s + +Output: + s[0]+256*s[1]+...+256^31*s[31] = s mod l + where l = 2^252 + 27742317777372353535851937790883648493. + Overwrites s in place. +*/ +void sc_reduce(byte* s) +{ + __int128_t t[12]; + __int128_t carry; + + t[ 0] = MASK_42 & (load_6(s + 0) >> 0); + t[ 1] = MASK_42 & (load_6(s + 5) >> 2); + t[ 2] = MASK_42 & (load_6(s + 10) >> 4); + t[ 3] = MASK_42 & (load_6(s + 15) >> 6); + t[ 4] = MASK_42 & (load_6(s + 21) >> 0); + t[ 5] = MASK_42 & (load_6(s + 26) >> 2); + t[ 6] = MASK_42 & (load_6(s + 31) >> 4); + t[ 7] = MASK_42 & (load_6(s + 36) >> 6); + t[ 8] = MASK_42 & (load_6(s + 42) >> 0); + t[ 9] = MASK_42 & (load_6(s + 47) >> 2); + t[10] = MASK_42 & (load_6(s + 52) >> 4); + t[11] = (load_7(s + 57) >> 6); + + t[ 5] -= t[11] * ORDER_0; + t[ 6] -= t[11] * ORDER_1; + t[ 7] -= t[11] * ORDER_2; + + t[ 4] -= t[10] * ORDER_0; + t[ 5] -= t[10] * ORDER_1; + t[ 6] -= t[10] * ORDER_2; + + t[ 3] -= t[ 9] * ORDER_0; + t[ 4] -= t[ 9] * ORDER_1; + t[ 5] -= t[ 9] * ORDER_2; + + carry = t[ 3] >> 42; t[ 4] += carry; t[ 3] &= MASK_42; + carry = t[ 5] >> 42; t[ 6] += carry; t[ 5] &= MASK_42; + carry = t[ 7] >> 42; t[ 8] += carry; t[ 7] &= MASK_42; + carry = t[ 4] >> 42; t[ 5] += carry; t[ 4] &= MASK_42; + carry = t[ 6] >> 42; t[ 7] += carry; t[ 6] &= MASK_42; + + t[ 2] -= t[ 8] * ORDER_0; + t[ 3] -= t[ 8] * ORDER_1; + t[ 4] -= t[ 8] * ORDER_2; + + t[ 1] -= t[ 7] * ORDER_0; + t[ 2] -= t[ 7] * ORDER_1; + t[ 3] -= t[ 7] * ORDER_2; + + t[ 0] -= t[ 6] * ORDER_0; + t[ 1] -= t[ 6] * ORDER_1; + t[ 2] -= t[ 6] * ORDER_2; + t[ 6] = 0; + + carry = t[ 0] >> 42; t[ 1] += carry; t[ 0] &= MASK_42; + carry = t[ 1] >> 42; t[ 2] += carry; t[ 1] &= MASK_42; + carry = t[ 2] >> 42; t[ 3] += carry; t[ 2] &= MASK_42; + carry = t[ 3] >> 42; t[ 4] += carry; t[ 3] &= MASK_42; + carry = t[ 4] >> 42; t[ 5] += carry; t[ 4] &= MASK_42; + carry = t[ 5] >> 42; t[ 6] += carry; t[ 5] &= MASK_42; + + t[ 0] -= t[ 6] * ORDER_0; + t[ 1] -= t[ 6] * ORDER_1; + t[ 2] -= t[ 6] * ORDER_2; + + carry = t[ 0] >> 42; t[ 1] += carry; t[ 0] &= MASK_42; + carry = t[ 1] >> 42; t[ 2] += carry; t[ 1] &= MASK_42; + carry = t[ 2] >> 42; t[ 3] += carry; t[ 2] &= MASK_42; + carry = t[ 3] >> 42; t[ 4] += carry; t[ 3] &= MASK_42; + carry = t[ 4] >> 42; t[ 5] += carry; t[ 4] &= MASK_42; + + s[ 0] = (t[ 0] >> 0); + s[ 1] = (t[ 0] >> 8); + s[ 2] = (t[ 0] >> 16); + s[ 3] = (t[ 0] >> 24); + s[ 4] = (t[ 0] >> 32); + s[ 5] = (t[ 0] >> 40) | (t[ 1] << 2); + s[ 6] = (t[ 1] >> 6); + s[ 7] = (t[ 1] >> 14); + s[ 8] = (t[ 1] >> 22); + s[ 9] = (t[ 1] >> 30); + s[10] = (t[ 1] >> 38) | (t[ 2] << 4); + s[11] = (t[ 2] >> 4); + s[12] = (t[ 2] >> 12); + s[13] = (t[ 2] >> 20); + s[14] = (t[ 2] >> 28); + s[15] = (t[ 2] >> 36) | (t[ 3] << 6); + s[16] = (t[ 3] >> 2); + s[17] = (t[ 3] >> 10); + s[18] = (t[ 3] >> 18); + s[19] = (t[ 3] >> 26); + s[20] = (t[ 3] >> 34); + s[21] = (t[ 4] >> 0); + s[22] = (t[ 4] >> 8); + s[23] = (t[ 4] >> 16); + s[24] = (t[ 4] >> 24); + s[25] = (t[ 4] >> 32); + s[26] = (t[ 4] >> 40) | (t[ 5] << 2); + s[27] = (t[ 5] >> 6); + s[28] = (t[ 5] >> 14); + s[29] = (t[ 5] >> 22); + s[30] = (t[ 5] >> 30); + s[31] = (t[ 5] >> 38); +} + +/* +Input: + a[0]+256*a[1]+...+256^31*a[31] = a + b[0]+256*b[1]+...+256^31*b[31] = b + c[0]+256*c[1]+...+256^31*c[31] = c + +Output: + s[0]+256*s[1]+...+256^31*s[31] = (ab+c) mod l + where l = 2^252 + 27742317777372353535851937790883648493. +*/ +void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) +{ + uint64_t ad[6], bd[6], cd[6]; + __int128_t t[12]; + __int128_t carry; + + ad[ 0] = MASK_42 & (load_6(a + 0) >> 0); + ad[ 1] = MASK_42 & (load_6(a + 5) >> 2); + ad[ 2] = MASK_42 & (load_6(a + 10) >> 4); + ad[ 3] = MASK_42 & (load_6(a + 15) >> 6); + ad[ 4] = MASK_42 & (load_6(a + 21) >> 0); + ad[ 5] = (load_6(a + 26) >> 2); + bd[ 0] = MASK_42 & (load_6(b + 0) >> 0); + bd[ 1] = MASK_42 & (load_6(b + 5) >> 2); + bd[ 2] = MASK_42 & (load_6(b + 10) >> 4); + bd[ 3] = MASK_42 & (load_6(b + 15) >> 6); + bd[ 4] = MASK_42 & (load_6(b + 21) >> 0); + bd[ 5] = (load_6(b + 26) >> 2); + cd[ 0] = MASK_42 & (load_6(c + 0) >> 0); + cd[ 1] = MASK_42 & (load_6(c + 5) >> 2); + cd[ 2] = MASK_42 & (load_6(c + 10) >> 4); + cd[ 3] = MASK_42 & (load_6(c + 15) >> 6); + cd[ 4] = MASK_42 & (load_6(c + 21) >> 0); + cd[ 5] = (load_6(c + 26) >> 2); + + t[ 0] = cd[ 0] + (__int128_t)ad[ 0] * bd[ 0]; + t[ 1] = cd[ 1] + (__int128_t)ad[ 0] * bd[ 1] + (__int128_t)ad[ 1] * bd[ 0]; + t[ 2] = cd[ 2] + (__int128_t)ad[ 0] * bd[ 2] + (__int128_t)ad[ 1] * bd[ 1] + + (__int128_t)ad[ 2] * bd[ 0]; + t[ 3] = cd[ 3] + (__int128_t)ad[ 0] * bd[ 3] + (__int128_t)ad[ 1] * bd[ 2] + + (__int128_t)ad[ 2] * bd[ 1] + (__int128_t)ad[ 3] * bd[ 0]; + t[ 4] = cd[ 4] + (__int128_t)ad[ 0] * bd[ 4] + (__int128_t)ad[ 1] * bd[ 3] + + (__int128_t)ad[ 2] * bd[ 2] + (__int128_t)ad[ 3] * bd[ 1] + + (__int128_t)ad[ 4] * bd[ 0]; + t[ 5] = cd[ 5] + (__int128_t)ad[ 0] * bd[ 5] + (__int128_t)ad[ 1] * bd[ 4] + + (__int128_t)ad[ 2] * bd[ 3] + (__int128_t)ad[ 3] * bd[ 2] + + (__int128_t)ad[ 4] * bd[ 1] + (__int128_t)ad[ 5] * bd[ 0]; + t[ 6] = (__int128_t)ad[ 1] * bd[ 5] + (__int128_t)ad[ 2] * bd[ 4] + + (__int128_t)ad[ 3] * bd[ 3] + (__int128_t)ad[ 4] * bd[ 2] + + (__int128_t)ad[ 5] * bd[ 1]; + t[ 7] = (__int128_t)ad[ 2] * bd[ 5] + (__int128_t)ad[ 3] * bd[ 4] + + (__int128_t)ad[ 4] * bd[ 3] + (__int128_t)ad[ 5] * bd[ 2]; + t[ 8] = (__int128_t)ad[ 3] * bd[ 5] + (__int128_t)ad[ 4] * bd[ 4] + + (__int128_t)ad[ 5] * bd[ 3]; + t[ 9] = (__int128_t)ad[ 4] * bd[ 5] + (__int128_t)ad[ 5] * bd[ 4]; + t[10] = (__int128_t)ad[ 5] * bd[ 5]; + t[11] = 0; + + carry = t[ 0] >> 42; t[ 1] += carry; t[ 0] &= MASK_42; + carry = t[ 2] >> 42; t[ 3] += carry; t[ 2] &= MASK_42; + carry = t[ 4] >> 42; t[ 5] += carry; t[ 4] &= MASK_42; + carry = t[ 6] >> 42; t[ 7] += carry; t[ 6] &= MASK_42; + carry = t[ 8] >> 42; t[ 9] += carry; t[ 8] &= MASK_42; + carry = t[10] >> 42; t[11] += carry; t[10] &= MASK_42; + carry = t[ 1] >> 42; t[ 2] += carry; t[ 1] &= MASK_42; + carry = t[ 3] >> 42; t[ 4] += carry; t[ 3] &= MASK_42; + carry = t[ 5] >> 42; t[ 6] += carry; t[ 5] &= MASK_42; + carry = t[ 7] >> 42; t[ 8] += carry; t[ 7] &= MASK_42; + carry = t[ 9] >> 42; t[10] += carry; t[ 9] &= MASK_42; + + t[ 5] -= t[11] * ORDER_0; + t[ 6] -= t[11] * ORDER_1; + t[ 7] -= t[11] * ORDER_2; + + t[ 4] -= t[10] * ORDER_0; + t[ 5] -= t[10] * ORDER_1; + t[ 6] -= t[10] * ORDER_2; + + t[ 3] -= t[ 9] * ORDER_0; + t[ 4] -= t[ 9] * ORDER_1; + t[ 5] -= t[ 9] * ORDER_2; + + carry = t[ 3] >> 42; t[ 4] += carry; t[ 3] &= MASK_42; + carry = t[ 5] >> 42; t[ 6] += carry; t[ 5] &= MASK_42; + carry = t[ 7] >> 42; t[ 8] += carry; t[ 7] &= MASK_42; + carry = t[ 4] >> 42; t[ 5] += carry; t[ 4] &= MASK_42; + carry = t[ 6] >> 42; t[ 7] += carry; t[ 6] &= MASK_42; + + t[ 2] -= t[ 8] * ORDER_0; + t[ 3] -= t[ 8] * ORDER_1; + t[ 4] -= t[ 8] * ORDER_2; + + t[ 1] -= t[ 7] * ORDER_0; + t[ 2] -= t[ 7] * ORDER_1; + t[ 3] -= t[ 7] * ORDER_2; + + t[ 0] -= t[ 6] * ORDER_0; + t[ 1] -= t[ 6] * ORDER_1; + t[ 2] -= t[ 6] * ORDER_2; + t[ 6] = 0; + + carry = t[ 0] >> 42; t[ 1] += carry; t[ 0] &= MASK_42; + carry = t[ 1] >> 42; t[ 2] += carry; t[ 1] &= MASK_42; + carry = t[ 2] >> 42; t[ 3] += carry; t[ 2] &= MASK_42; + carry = t[ 3] >> 42; t[ 4] += carry; t[ 3] &= MASK_42; + carry = t[ 4] >> 42; t[ 5] += carry; t[ 4] &= MASK_42; + carry = t[ 5] >> 42; t[ 6] += carry; t[ 5] &= MASK_42; + + t[ 0] -= t[ 6] * ORDER_0; + t[ 1] -= t[ 6] * ORDER_1; + t[ 2] -= t[ 6] * ORDER_2; + + carry = t[ 0] >> 42; t[ 1] += carry; t[ 0] &= MASK_42; + carry = t[ 1] >> 42; t[ 2] += carry; t[ 1] &= MASK_42; + carry = t[ 2] >> 42; t[ 3] += carry; t[ 2] &= MASK_42; + carry = t[ 3] >> 42; t[ 4] += carry; t[ 3] &= MASK_42; + carry = t[ 4] >> 42; t[ 5] += carry; t[ 4] &= MASK_42; + + s[ 0] = (t[ 0] >> 0); + s[ 1] = (t[ 0] >> 8); + s[ 2] = (t[ 0] >> 16); + s[ 3] = (t[ 0] >> 24); + s[ 4] = (t[ 0] >> 32); + s[ 5] = (t[ 0] >> 40) | (t[ 1] << 2); + s[ 6] = (t[ 1] >> 6); + s[ 7] = (t[ 1] >> 14); + s[ 8] = (t[ 1] >> 22); + s[ 9] = (t[ 1] >> 30); + s[10] = (t[ 1] >> 38) | (t[ 2] << 4); + s[11] = (t[ 2] >> 4); + s[12] = (t[ 2] >> 12); + s[13] = (t[ 2] >> 20); + s[14] = (t[ 2] >> 28); + s[15] = (t[ 2] >> 36) | (t[ 3] << 6); + s[16] = (t[ 3] >> 2); + s[17] = (t[ 3] >> 10); + s[18] = (t[ 3] >> 18); + s[19] = (t[ 3] >> 26); + s[20] = (t[ 3] >> 34); + s[21] = (t[ 4] >> 0); + s[22] = (t[ 4] >> 8); + s[23] = (t[ 4] >> 16); + s[24] = (t[ 4] >> 24); + s[25] = (t[ 4] >> 32); + s[26] = (t[ 4] >> 40) | (t[ 5] << 2); + s[27] = (t[ 5] >> 6); + s[28] = (t[ 5] >> 14); + s[29] = (t[ 5] >> 22); + s[30] = (t[ 5] >> 30); + s[31] = (t[ 5] >> 38); +} +#endif /* !HAVE___UINT128_T || NO_CURVED25519_128BIT */ + +int ge_compress_key(byte* out, const byte* xIn, const byte* yIn, word32 keySz) +{ + ge x,y,z; + ge_p3 g; + byte bArray[ED25519_KEY_SIZE]; + word32 i; + + fe_0(x); + fe_0(y); + fe_1(z); + fe_frombytes(x, xIn); + fe_frombytes(y, yIn); + + fe_copy(g.X, x); + fe_copy(g.Y, y); + fe_copy(g.Z, z); + + ge_p3_tobytes(bArray, &g); + + for (i = 0; i < keySz; i++) { + out[keySz - 1 - i] = bArray[i]; + } + + return 0; +} + + +/* +r = p + q +*/ +static WC_INLINE void ge_add(ge_p1p1 *r,const ge_p3 *p,const ge_cached *q) +{ +#ifndef CURVED25519_ASM + ge t0; + fe_add(r->X,p->Y,p->X); + fe_sub(r->Y,p->Y,p->X); + fe_mul(r->Z,r->X,q->YplusX); + fe_mul(r->Y,r->Y,q->YminusX); + fe_mul(r->T,q->T2d,p->T); + fe_mul(r->X,p->Z,q->Z); + fe_add(t0,r->X,r->X); + fe_sub(r->X,r->Z,r->Y); + fe_add(r->Y,r->Z,r->Y); + fe_add(r->Z,t0,r->T); + fe_sub(r->T,t0,r->T); +#else + fe_ge_add(r->X, r->Y, r->Z, r->T, p->X, p->Y, p->Z, p->T, q->Z, q->T2d, + q->YplusX, q->YminusX); +#endif +} + + +#ifndef CURVED25519_ASM +/* ge_scalar mult base */ +static unsigned char equal(signed char b,signed char c) +{ + unsigned char ub = b; + unsigned char uc = c; + unsigned char x = ub ^ uc; /* 0: yes; 1..255: no */ + uint32_t y = x; /* 0: yes; 1..255: no */ + y -= 1; /* 4294967295: yes; 0..254: no */ + y >>= 31; /* 1: yes; 0: no */ + return (unsigned char)y; +} + + +static unsigned char negative(signed char b) +{ + return ((unsigned char)b) >> 7; +} + + +static WC_INLINE void cmov(ge_precomp *t,const ge_precomp *u,unsigned char b, + unsigned char n) +{ + b = equal(b,n); + fe_cmov(t->yplusx,u->yplusx,b); + fe_cmov(t->yminusx,u->yminusx,b); + fe_cmov(t->xy2d,u->xy2d,b); +} +#endif + +#ifdef CURVED25519_ASM_64BIT +static const ge_precomp base[64][8] = { +{ + { + { 0x2fbc93c6f58c3b85, -0x306cd2390473f1e7, 0x270b4898643d42c2, 0x07cf9d3a33d4ba65 }, + { -0x62efc6fa28bf6ec2, -0x02c660fa2ebf414d, -0x5a3e7bcb977075f7, 0x44fd2f9298f81267 }, + { -0x2442ea98b49044a7, 0x41e13f00eea2a5ea, -0x322b62e336a83906, 0x4f0ebe1faf16ecca } + }, + { + { -0x6ddb18036cc38e29, -0x60b9626985f00a4b, 0x5aa69a65e1d60702, 0x590c063fa87d2e2e }, + { -0x75665a9fbd4b2a58, -0x70d47ef3b19f530a, -0x1f61dc944e91c856, 0x6bb595a669c92555 }, + { 0x6e347eaadad36802, -0x450ca66c7c11b7fb, 0x3bcabe10e6076826, 0x49314f0a165ed1b8 } + }, + { + { -0x50da4f57b31168d0, 0x025a8430e8864b8a, -0x3ee4affd60fe98ce, 0x7a164e1b9a80f8f4 }, + { 0x56611fe8a4fcd265, 0x3bd353fde5c1ba7d, -0x7ece0ce5deb42943, 0x2ab91587555bda62 }, + { -0x640dee0b0e98b7cc, -0x47b194e809d2076b, -0x282190f8a48dd5b2, 0x549a04b963bb2a21 } + }, + { + { 0x287351b98efc099f, 0x6765c6f47dfd2538, -0x35cb72c204f56d9b, 0x680e910321e58727 }, + { -0x6a01faf5fa97e741, 0x327e89715660faa9, -0x3c171c32f95faf8d, 0x27933f4c7445a49a }, + { -0x40e1ba131aebd950, -0x1cd439c29245f06c, -0x1bd68b2a7307ad40, 0x44f079b1b0e64c18 } + }, + { + { -0x5ded43bbf75a44cd, -0x72afb73c38a112fe, -0x22e414f3a54013bc, 0x2945ccf146e206eb }, + { 0x7f9182c3a447d6ba, -0x2affeb2eb4d8d649, -0x1cc30ee3479b5f79, 0x154a7e73eb1b55f3 }, + { -0x37cd5e86182ffc4d, 0x5f729d0a00124d7e, 0x62c1d4a10e6d8ff3, 0x68b8ac5938b27a98 } + }, + { + { 0x3a0ceeeb77157131, -0x64d8ea76ff375078, -0x7f9a499725a658ca, 0x51e57bb6a2cc38bd }, + { 0x499806b67b7d8ca4, 0x575be28427d22739, -0x44f7a318dfbaac47, 0x38b64c41ae417884 }, + { -0x7062526e97621c5c, 0x175f2428f8fb9137, 0x050ab5329fcfb988, 0x7865dfa21354c09f } + }, + { + { 0x6b1a5cd0944ea3bf, 0x7470353ab39dc0d2, 0x71b2528228542e49, 0x461bea69283c927e }, + { -0x4590d36555cdde4f, 0x6ca021533bba23a7, -0x621589b06de6d3c6, 0x1d6edd5d2e5317e0 }, + { 0x217a8aacab0fda36, -0x5ad739abc2cab638, 0x37d05b8b13ab7568, 0x233cef623a2cbc37 } + }, + { + { 0x59b7596604dd3e8f, 0x6cb30377e288702c, -0x4ecc6399a1263cdd, 0x0915e76061bce52f }, + { -0x1d58a2120c6dcb27, -0x69c2897f1e4aa707, 0x2c2741ac6e3c23fb, 0x3a9024a1320e01c3 }, + { -0x208217ca57cb5c82, -0x741e63259767a816, 0x2c1185367167b326, 0x589eb3d9dbefd5c2 } + }, +}, +{ + { + { 0x322d04a52d9021f6, -0x463e60cc8a394064, 0x587a3a4342d20b09, 0x143b1cf8aa64fe61 }, + { 0x7ec851ca553e2df3, -0x58ed7b3459b7874d, -0x194a1be6cd772e19, 0x4cf210ec5a9a8883 }, + { -0x6079838269753555, 0x5f54258e27092729, -0x2f582cb415e7f68b, 0x21b546a3374126e1 } + }, + { + { 0x490a7a45d185218f, -0x65eac887b9fb6ccb, 0x0060ea09cc31e1f6, 0x7e041577f86ee965 }, + { -0x56b007a75d777cbd, -0x31f12ba9acec12c4, -0x0aa3c2304a40cb06, 0x0a653ca5c9eab371 }, + { 0x66b2a496ce5b67f3, -0x00ab6d2742a9686a, 0x503cec294a592cd0, 0x566943650813acb2 } + }, + { + { 0x5672f9eb1dabb69d, -0x458f4aca5017ac04, 0x47ac0f752796d66d, 0x32a5351794117275 }, + { -0x47e724f3d99df868, 0x5d5c31d9606e354a, 0x0982fa4f00a8cdc7, 0x17e12bcd4653e2d4 }, + { -0x2c59bb59209b7bc9, 0x703b6559880fbfdd, -0x347adabf52c5e55b, 0x0900b3f78e4c6468 } + }, + { + { -0x12d7f04137e952cf, 0x52d9595bd8e6efe3, 0x0fe71772f6c623f5, 0x4314030b051e293c }, + { 0x0a851b9f679d651b, -0x1ef7349efcccbd0e, -0x29fe0a801774cf5d, 0x371f3acaed2dd714 }, + { -0x2a9fffa1040f4353, -0x7148f0d12e78f3a2, 0x201f9033d084e6a0, 0x4c3a5ae1ce7b6670 } + }, + { + { -0x45078a1b36c25f23, -0x46cd7d588e46d6b3, -0x7f29c0480b393ba0, 0x6de9c73dea66c181 }, + { 0x4138a434dcb8fa95, -0x78f3098293697bf5, -0x21c77a8bd68417d4, 0x7c814db27262a55a }, + { 0x478904d5a04df8f2, -0x050451b54efebd2d, -0x0937539caaa2f668, 0x5aac4a412f90b104 } + }, + { + { 0x603a0d0abd7f5134, -0x7f7636cd1e2c51ba, -0x20da6ec67867429d, 0x1c145cd274ba0235 }, + { -0x39b0cd94c536d6f8, 0x5551b282e663e1e0, 0x476b35f54a1a4b83, 0x1b9da3fe189f68c2 }, + { 0x32e8386475f3d743, 0x365b8baf6ae5d9ef, -0x7dadc749c7a497e2, 0x234929c1167d65e1 } + }, + { + { 0x48145cc21d099fcf, 0x4535c192cc28d7e5, -0x7f183e1ab7db81ff, 0x4a5f28743b2973ee }, + { -0x67b213545f885218, 0x383f77ad19eb389d, -0x38139481d6ab286c, 0x59c77b3aeb7c3a7a }, + { -0x2c5228dadda3309e, -0x6ee5cc7e4dead3a3, -0x274c6052a4f70783, 0x6f05606b4799fe3b } + }, + { + { 0x5b433149f91b6483, -0x524a239aa5d3409e, -0x78057bed9cd7d84d, 0x60895e91ab49f8d8 }, + { -0x6001616de884569e, -0x675118e2f21a351f, 0x3ff4ae942d831044, 0x714de12e58533ac8 }, + { -0x16130d12f30793e8, -0x4b92f9edf8ca202c, -0x43625f67fb469419, 0x73e2e62fd96dc26b } + }, +}, +{ + { + { 0x2eccdd0e632f9c1d, 0x51d0b69676893115, 0x52dfb76ba8637a58, 0x6dd37d49a00eef39 }, + { -0x12a49cabb655aea2, -0x579a3b60f4397dc6, -0x7af3e016a4bd2e3c, 0x30d76d6f03d315b9 }, + { 0x6c4444172106e4c7, -0x04ac297f6d728097, -0x4b8c615b96b2c0da, 0x10c697112e864bb0 } + }, + { + { 0x0ca62aa08358c805, 0x6a3d4ae37a204247, 0x7464d3a63b11eddc, 0x03bf9baf550806ef }, + { 0x6493c4277dbe5fde, 0x265d4fad19ad7ea2, 0x0e00dfc846304590, 0x25e61cabed66fe09 }, + { 0x3f13e128cc586604, 0x6f5873ecb459747e, -0x5f49c21233ed970b, 0x566d78634586e22c } + }, + { + { -0x5efabd7a39a5d030, 0x6c64112af31667c3, 0x680ae240731aee58, 0x14fba5f34793b22a }, + { 0x1637a49f9cc10834, -0x4371a92a57643baf, 0x1cb5ec0f7f7fd2db, 0x33975bca5ecc35d9 }, + { 0x3cd746166985f7d4, 0x593e5e84c9c80057, 0x2fc3f2b67b61131e, 0x14829cea83fc526c } + }, + { + { 0x21e70b2f4e71ecb8, -0x19a92246bf5b881d, -0x409aa93131e2b080, 0x05fc3bc4535d7b7e }, + { -0x00bc847b68226a3e, 0x6c744e30aa4eb5a7, -0x61f3a29ec37a1775, 0x2fd9c71e5f758173 }, + { 0x24b8b3ae52afdedd, 0x3495638ced3b30cf, 0x33a4bc83a9be8195, 0x373767475c651f04 } + }, + { + { 0x634095cb14246590, -0x10edebbfe93eaacb, -0x61c7ebf376ef43a0, 0x6bf5905730907c8c }, + { 0x2fba99fd40d1add9, -0x4cf8e990690b2fd9, 0x4363f05215f03bae, 0x1fbea56c3b18f999 }, + { 0x0fa778f1e1415b8a, 0x06409ff7bac3a77e, 0x6f52d7b89aa29a50, 0x02521cf67a635a56 } + }, + { + { -0x4eeb98df88d0a11c, -0x17076b4e69f86532, 0x4af8224d00ac824a, 0x001753d9f7cd6cc4 }, + { 0x513fee0b0a9d5294, -0x706718a3f020a59a, -0x2b9e7977401ef832, 0x3fa00a7e71382ced }, + { 0x3c69232d963ddb34, 0x1dde87dab4973858, -0x55282e065f6e0d7b, 0x12b5fe2fa048edb6 } + }, + { + { -0x20d483d95290e16e, 0x4b66d323504b8913, -0x73bf623f8ae3743d, 0x6f7e93c20796c7b8 }, + { 0x71f0fbc496fce34d, 0x73b9826badf35bed, -0x2dfb8d9e00d73a9f, 0x749b76f96fb1206f }, + { 0x1f5af604aea6ae05, -0x3edcae0e411b6367, 0x61a808b5eeff6b66, 0x0fcec10f01e02151 } + }, + { + { 0x3df2d29dc4244e45, 0x2b020e7493d8de0a, 0x6cc8067e820c214d, 0x413779166feab90a }, + { 0x644d58a649fe1e44, 0x21fcaea231ad777e, 0x02441c5a887fd0d2, 0x4901aa7183c511f3 }, + { 0x08b1b7548c1af8f0, -0x31f08583db9d664c, -0x089f4f06e1f926c7, 0x41bb887b726d1213 } + }, +}, +{ + { + { -0x68267f1f55c6082e, 0x35d0384252c6b51c, 0x7d43f49307cd55aa, 0x56bd36cfb78ac362 }, + { -0x6d987f93a983b628, 0x066d04ccca791e6a, -0x5960a9ba1c33c6b5, 0x5c95b686a0788cd2 }, + { 0x2ac519c10d14a954, -0x150b8b4b6b4a0570, -0x19507c7d560785a6, 0x0dea6db1879be094 } + }, + { + { 0x15baeb74d6a8797a, 0x7ef55cf1fac41732, 0x29001f5a3c8b05c5, 0x0ad7cc8752eaccfb }, + { -0x559940ab8cbb1a55, -0x25eda77770e4bcf7, 0x5e87d2b3fd564b2f, 0x5b2c78885483b1dd }, + { 0x52151362793408cf, -0x14f0e8fce669c26c, -0x57cc4d0577c26b9a, 0x093a7fa775003c78 } + }, + { + { -0x47169fbb9f56ed7a, 0x7f3fd8047778d3de, 0x67d01e31bf8a5e2d, 0x7b038a06c27b653e }, + { -0x1aef8219c5e92842, -0x5c880023650ccd31, 0x70d5bf18440b677f, 0x6a252b19a4a31403 }, + { -0x6126e62a2c966f0d, 0x5213aebbdb4eb9f2, -0x38f715fab3466ecb, 0x58ded57f72260e56 } + }, + { + { -0x2592acd9a4f02b75, -0x769f7dce6c405678, -0x287536cd9e2a81d8, 0x79f2942d3a5c8143 }, + { 0x78e79dade9413d77, -0x0da8062a68d61983, 0x59db910ee37aa7e6, 0x6aa11b5bbb9e039c }, + { -0x6825d0da49377217, 0x251ba7eaacf20169, 0x09b44f87ef4eb4e4, 0x7d90ab1bbc6a7da5 } + }, + { + { 0x1a07a3f496b3c397, 0x11ceaa188f4e2532, 0x7d9498d5a7751bf0, 0x19ed161f508dd8a0 }, + { -0x6533597c58fe9402, -0x6fafa0b20d3af493, 0x6b610d5fcce435aa, 0x19a10d446198ff96 }, + { 0x560a2cd687dce6ca, 0x7f3568c48664cf4d, -0x78be16addd7fc5c8, 0x483bdab1595653fc } + }, + { + { -0x2930b2f54b257f0a, -0x7db7c1ba07cf8020, 0x05005269ae6f9da4, 0x1c7052909cf7877a }, + { -0x0587f0eb78cb05b7, 0x106f0b70360534e0, 0x2210776fe3e307bd, 0x3286c109dde6a0fe }, + { 0x32ee7de2874e98d4, 0x14c362e9b97e0c60, 0x5781dcde6a60a38a, 0x217dd5eaaa7aa840 } + }, + { + { -0x7420e0464173f138, 0x00bae7f8e30a0282, 0x4963991dad6c4f6c, 0x07058a6e5df6f60a }, + { -0x62483b2fdb71e150, -0x1f89681eb28b40ae, 0x1e6a9b173c562354, 0x7fa7c21f795a4965 }, + { -0x1614fd3b24ce0981, -0x12da0276ef4304d5, 0x46c8131f5c5cddb4, 0x33b21c13a0cb9bce } + }, + { + { -0x6550464fa11c73a5, -0x4062d2b1f8e5ec39, -0x7111919216ccd6f6, 0x1c3bab17ae109717 }, + { 0x360692f8087d8e31, -0x0b2339c82d8e9c09, 0x25a4e62065ea5963, 0x659bf72e5ac160d9 }, + { 0x1c9ab216c7cab7b0, 0x7d65d37407bbc3cc, 0x52744750504a58d5, 0x09f2606b131a2990 } + }, +}, +{ + { + { 0x7e234c597c6691ae, 0x64889d3d0a85b4c8, -0x251d36f3cab50519, 0x0a871e070c6a9e1d }, + { 0x40e87d44744346be, 0x1d48dad415b52b25, 0x7c3a8a18a13b603e, 0x4eb728c12fcdbdf7 }, + { 0x3301b5994bbc8989, 0x736bae3a5bdd4260, 0x0d61ade219d59e3c, 0x3ee7300f2685d464 } + }, + { + { 0x43fa7947841e7518, -0x1a3905a69c63b929, -0x5ef9a1e21cfad48c, 0x7d47c6a2cfb89030 }, + { -0x0a2daa1b61822949, -0x7fe9eea39ef4e154, 0x3c99975d92e187ca, 0x13815762979125c2 }, + { 0x3fdad0148ef0d6e0, -0x62c18b656eab90c4, 0x71ec621026bb8157, 0x148cf58d34c9ec80 } + }, + { + { -0x1da8d082651b8a93, 0x56c345bb88f3487f, -0x602ef492969f5773, 0x278febad4eaea1b9 }, + { 0x46a492f67934f027, 0x469984bef6840aa9, 0x5ca1bc2a89611854, 0x3ff2fa1ebd5dbbd4 }, + { -0x4e5597e0736cc69a, -0x73de6b63dfd6f368, 0x39115291219d3c52, 0x4104dd02fe9c677b } + }, + { + { -0x7edeb1f924f69548, 0x21a8b6c90ce44f35, 0x6524c12a409e2af5, 0x0165b5a48efca481 }, + { 0x72b2bf5e1124422a, -0x5e05f3cc675cc54b, -0x6b349efe05ad499a, 0x2c863b00afaf53d5 }, + { -0x0e6f5b8b5f7b958a, 0x12eff984cd2f7cc0, 0x695e290658aa2b8f, 0x591b67d9bffec8b8 } + }, + { + { -0x66464c8e60e74aa3, -0x1b9a1a055e739be2, 0x61081136c29f05ed, 0x489b4f867030128b }, + { 0x312f0d1c80b49bfa, 0x5979515eabf3ec8a, 0x727033c09ef01c88, 0x3de02ec7ca8f7bcb }, + { -0x2dcdefd2c5146d11, -0x1e9dac4b9ee9579f, 0x3d7eabe7190baa24, 0x49f5fbba496cbebf } + }, + { + { 0x155d628c1e9c572e, -0x75b279533a77b8bf, -0x6e5cad09aea89c15, 0x06a1a6c28867515b }, + { 0x30949a108a5bcfd4, -0x23bf228f439b8c15, -0x6d3d6b3ecf83f2e4, 0x5604a86dcbfa6e74 }, + { 0x7288d1d47c1764b6, 0x72541140e0418b51, -0x60fce59fe753092f, 0x20989e89fe2742c6 } + }, + { + { 0x1674278b85eaec2e, 0x5621dc077acb2bdf, 0x640a4c1661cbf45a, 0x730b9950f70595d3 }, + { 0x499777fd3a2dcc7f, 0x32857c2ca54fd892, -0x5d86279b2df81c60, 0x0403ed1d0ca67e29 }, + { -0x36b4d2ca78b13aae, -0x3a19373067db9073, -0x0834b905e93fca32, 0x5bd7454308303dcc } + }, + { + { -0x7a3b6cdeea1886d6, -0x39b3765d42322237, -0x62e1c257525c289e, 0x5bb7db123067f82c }, + { 0x7f9ad19528b24cc2, 0x7f6b54656335c181, 0x66b8b66e4fc07236, 0x133a78007380ad83 }, + { 0x0961f467c6ca62be, 0x04ec21d6211952ee, 0x182360779bd54770, 0x740dca6d58f0e0d2 } + }, +}, +{ + { + { 0x3906c72aed261ae5, -0x65497026771eff09, -0x0a16fa650cc9fe69, 0x0e53dc78bf2b6d47 }, + { 0x50b70bf5d3f0af0b, 0x4feaf48ae32e71f7, 0x60e84ed3a55bbd34, 0x00ed489b3f50d1ed }, + { -0x46f7d640868e7886, 0x5e4444636d17e631, 0x4d05c52e18276893, 0x27632d9a5a4a4af5 } + }, + { + { -0x567d7a2e78150025, -0x5a4b0444272f579c, -0x49a70d80fdd99c09, 0x3bbc2b22d99ce282 }, + { -0x2ee00faeab4d9f32, -0x27923c718d06ad90, 0x601fcd0d267cc138, 0x2b67916429e90ccd }, + { -0x46e836ada7c3f5a8, 0x653ff9b80fe4c6f3, -0x64f258284320c3f4, 0x43a0eeb6ab54d60e } + }, + { + { 0x3ac6322357875fe8, -0x262b0b130a043471, -0x72117b6cc7d449e0, 0x50c5eaa14c799fdc }, + { 0x396966a46d4a5487, -0x07ee5e7553d44c46, 0x66e4685b5628b26b, 0x70a477029d929b92 }, + { -0x22f12374290d04c4, 0x54c63aa79cc7b7a0, -0x51f4fcd4d37260e6, 0x6f9ce107602967fb } + }, + { + { 0x139693063520e0b5, 0x437fcf7c88ea03fe, -0x082b3bf42c36a644, 0x699154d1f893ded9 }, + { -0x52efab4e321e3dd6, -0x3b5716fdb714cd21, 0x5f3e7b33accdc0ea, 0x72364713fc79963e }, + { 0x315d5c75b4b27526, -0x33347bd2fdc9255b, 0x22f0c8a3345fee8e, 0x73975a617d39dbed } + }, + { + { 0x6f37f392f4433e46, 0x0e19b9a11f566b18, 0x220fb78a1fd1d662, 0x362a4258a381c94d }, + { -0x1bfdb2069c8a25f0, 0x78d3251a1830c870, -0x6fd4e6b79a7326e4, 0x7e18b10b29b7438a }, + { -0x6f8e26ecd49414d1, 0x0f26e9ad28418247, -0x1546e13642136da3, 0x4be65bc8f48af2de } + }, + { + { 0x1d50fba257c26234, 0x7bd4823adeb0678b, -0x3d4f239159ac750b, 0x5665eec6351da73e }, + { 0x78487feba36e7028, 0x5f3f13001dd8ce34, -0x6cb04ed2b4cf3b77, 0x056c244d397f0a2b }, + { -0x24c11ff6bc404df0, 0x4972018720800ac2, 0x26ab5d6173bd8667, 0x20b209c2ab204938 } + }, + { + { 0x1fcca94516bd3289, 0x448d65aa41420428, 0x59c3b7b216a55d62, 0x49992cc64e612cd8 }, + { 0x549e342ac07fb34b, 0x02d8220821373d93, -0x43d9d28f532e0a99, 0x7a92c9fdfbcac784 }, + { 0x65bd1bea70f801de, 0x1befb7c0fe49e28a, -0x579cf9324e4d51b6, 0x3b7ac0cd265c2a09 } + }, + { + { -0x0f2ab1b0dd12c659, -0x5d5516e1a9f7eaf6, -0x0bde4d161225178b, 0x31bc531d6b7de992 }, + { -0x7dd411bc73fe4314, 0x530cb525c0fbc73b, 0x48519034c1953fe9, 0x265cc261e09a0f5b }, + { -0x20c2ecb2567f068f, 0x7a4fb8d1221a22a7, 0x3df7d42035aad6d8, 0x2a14edcc6a1a125e } + }, +}, +{ + { + { 0x231a8c570478433c, -0x484ad8f13d7ebc63, -0x245566151c26f861, 0x2c03f5256c2b03d9 }, + { -0x20b711f8ad3031b2, -0x3c00050cf913f749, 0x05710b2ab95459c4, 0x161d25fa963ea38d }, + { 0x790f18757b53a47d, 0x307b0130cf0c5879, 0x31903d77257ef7f9, 0x699468bdbd96bbaf } + }, + { + { -0x2722c2199556e6b8, 0x485064c22fc0d2cc, -0x64b7db99cb0215d1, 0x293e1c4e6c4a2e3a }, + { -0x42e0d0b90b250131, 0x7cef0114a47fd6f7, -0x2ce00225b5b84c81, 0x525219a473905785 }, + { 0x376e134b925112e1, 0x703778b5dca15da0, -0x4fba7650b9e3ceef, 0x5b605c447f032823 } + }, + { + { 0x3be9fec6f0e7f04c, -0x7995a8618a1cb69e, 0x5542ef161e1de61a, 0x2f12fef4cc5abdd5 }, + { -0x469a7fa6df3b8377, -0x180feff36dc47034, 0x0001256502e2ef77, 0x24a76dcea8aeb3ee }, + { 0x0a4522b2dfc0c740, 0x10d06e7f40c9a407, -0x3930ebbe87300998, 0x5e607b2518a43790 } + }, + { + { -0x5fd3bce35a6930ec, -0x1c3bd2bf512c1c00, -0x2dbad97fd1f0d925, 0x201f33139e457068 }, + { 0x58b31d8f6cdf1818, 0x35cfa74fc36258a2, -0x1e4c00b09919e292, 0x5067acab6ccdd5f7 }, + { -0x02ad8094f7fc62af, 0x18b14964017c0006, -0x2addf14fd1da5b58, 0x397cba8862460375 } + }, + { + { 0x7815c3fbc81379e7, -0x599e6bdf221ed50f, -0x00563f077a57022b, 0x771b4022c1e1c252 }, + { 0x30c13093f05959b2, -0x1dc55e721656868a, 0x222fd491721d5e26, 0x2339d320766e6c3a }, + { -0x27822679aec5d059, -0x0a53648e062b30f8, -0x2f943ce4e15d7c4d, 0x331a189219971a76 } + }, + { + { 0x26512f3a9d7572af, 0x5bcbe28868074a9e, -0x7b123e3eee7f083c, 0x1ac9619ff649a67b }, + { -0x0ae990ba04b07f3a, -0x63c938219e388a31, -0x1c2b17e46fbe26e4, 0x31167c6b83bdfe21 }, + { -0x0dd4c7bdadb4ef98, 0x5068343bee9ce987, -0x03628e7bb59daf38, 0x612436341f08b111 } + }, + { + { -0x749cb61ce5d2d9c8, -0x622048ff642c02cb, 0x7f8bf1b8a3a06ba4, 0x1522aa3178d90445 }, + { -0x2662be2478b17673, 0x09fea5f16c07dc20, 0x793d2c67d00f9bbc, 0x46ebe2309e5eff40 }, + { 0x2c382f5369614938, -0x2501bf6548d292f0, -0x1737cc6e49b90dd9, 0x45fe70f50524306c } + }, + { + { 0x62f24920c8951491, 0x05f007c83f630ca2, 0x6fbb45d2f5c9d4b8, 0x16619f6db57a2245 }, + { -0x25b78a5969f3f474, 0x5b68d076ef0e2f20, 0x07fb51cf3d0b8fd4, 0x428d1623a0e392d4 }, + { 0x084f4a4401a308fd, -0x57dde63c895a3554, -0x214721b9bc2e4383, 0x1d81592d60bd38c6 } + }, +}, +{ + { + { 0x3a4a369a2f89c8a1, 0x63137a1d7c8de80d, -0x4353ff7587125feb, 0x2cb8b3a5b483b03f }, + { -0x27cc284113d5b3c8, 0x2c9162830acc20ed, -0x16c5b8556d208a7f, 0x702d67a3333c4a81 }, + { 0x36e417cbcb1b90a1, 0x33b3ddaa7f11794e, 0x3f510808885bc607, 0x24141dc0e6a8020d } + }, + { + { -0x6e6da233427cea83, 0x3ca1205322cc8094, 0x28e57f183f90d6e4, 0x1a4714cede2e767b }, + { 0x59f73c773fefee9d, -0x4c0e10763e306763, -0x1ca204bd1fd1aba1, 0x5766120b47a1b47c }, + { -0x24df45f047494801, -0x48cd3c4988aee05f, -0x56d4ae3f660fd277, 0x4f3875ad489ca5f1 } + }, + { + { 0x79ed13f6ee73eec0, -0x5a39ad9296eef44f, -0x1b76d73c79fc79f4, 0x722a1446fd7059f5 }, + { -0x380389d0b6cd54de, 0x7ac0edf72f4c3c1b, 0x5f6b55aa9aa895e8, 0x3680274dad0a0081 }, + { -0x2f6a6016573077e7, -0x2f566aaf7b8a5664, 0x6eac173320b09cc5, 0x628ecf04331b1095 } + }, + { + { -0x64be5307a38b330f, -0x498cce7ef7d9adaf, -0x6636d512ee524eb9, 0x7a47d70d34ecb40f }, + { -0x67434ee7562f2244, -0x11bb61cbf74b7fd5, -0x78f76dd947594efc, 0x685f349a45c7915d }, + { 0x60a0c4cbcc43a4f5, 0x775c66ca3677bea9, -0x5e855e8ad0070a13, 0x11ded9020e01fdc0 } + }, + { + { 0x471f95b03bea93b7, 0x0552d7d43313abd3, -0x426c8f1d1e81c085, 0x7b120f1db20e5bec }, + { -0x76f187f6351018fc, -0x78d7d6921cf17394, 0x4c5cd2a392aeb1c9, 0x194263d15771531f }, + { 0x17d2fb3d86502d7a, -0x4a9b27bbaf596cae, 0x7da962c8a60ed75d, 0x00d0f85b318736aa } + }, + { + { -0x598ac3e10289de3f, 0x69c0b4a7445671f5, -0x68e0ad8bfa4dc3ef, 0x387bc74851a8c7cd }, + { -0x6874ebd188837b03, -0x0bfd9bb8fa573f9e, -0x59852ae41819ed39, 0x2f7b459698dd6a33 }, + { -0x7e76b4b2b5ad5658, -0x5226c1ed09477cd1, 0x184d8548b61bd638, 0x3f1c62dbd6c9f6cd } + }, + { + { 0x3fad3e40148f693d, 0x052656e194eb9a72, 0x2f4dcbfd184f4e2f, 0x406f8db1c482e18b }, + { 0x2e8f1f0091910c1f, -0x5b20b01f400d1ed4, 0x60c6560aee927438, 0x6338283facefc8fa }, + { -0x619cf2d380e6e11c, 0x4fbf8301bc3ff670, 0x787d8e4e7afb73c4, 0x50d83d5be8f58fa5 } + }, + { + { -0x3f53306f4b2c4993, -0x58fa621a9e8cd1a0, 0x033d1f7870c6b0ba, 0x584161cd26d946e4 }, + { -0x7a97c6e93ee5e769, 0x2d69a4efe506d008, 0x39af1378f664bd01, 0x65942131361517c6 }, + { -0x440d4e5f8d2d835e, -0x40c6c3a6042138fc, -0x167244311d9d47e2, 0x02eebd0b3029b589 } + }, +}, +{ + { + { -0x789a4960847a3a18, 0x6ff0678bd168bab2, 0x3a70e77c1d330f9b, 0x3a5f6d51b0af8e7c }, + { 0x61368756a60dac5f, 0x17e02f6aebabdc57, 0x7f193f2d4cce0f7d, 0x20234a7789ecdcf0 }, + { 0x76d20db67178b252, 0x071c34f9d51ed160, -0x09d5b5df4c1bee90, 0x7cd682353cffe366 } + }, + { + { -0x599a329f97530b0d, 0x42d92d183cd7e3d3, 0x5759389d336025d9, 0x3ef0253b2b2cd8ff }, + { 0x0be1a45bd887fab6, 0x2a846a32ba403b6e, -0x266defed1691a000, 0x2838c8863bdc0943 }, + { -0x2e944f30b5b9afd0, -0x05b694beea3a8855, -0x7d3051750b54be63, 0x21dcb8a606a82812 } + }, + { + { -0x6572ff054188ce46, -0x7dfc9f819d61e777, -0x4d33fdc8bc0c2681, 0x5d840dbf6c6f678b }, + { 0x5c6004468c9d9fc8, 0x2540096ed42aa3cb, 0x125b4d4c12ee2f9c, 0x0bc3d08194a31dab }, + { 0x706e380d309fe18b, 0x6eb02da6b9e165c7, 0x57bbba997dae20ab, 0x3a4276232ac196dd } + }, + { + { 0x3bf8c172db447ecb, 0x5fcfc41fc6282dbd, -0x7f53003f8a55ea02, 0x0770c9e824e1a9f9 }, + { 0x4b42432c8a7084fa, -0x7675e61c20461abb, -0x4160ffde63a71ba3, 0x1ff177cea16debd1 }, + { -0x309e2665ba4a4a03, -0x79f67b16e4c586dc, -0x18cff6e6cfc1c177, 0x39f264fd41500b1e } + }, + { + { -0x2e64b55401f6841f, -0x5b92031e201fe6d7, -0x3c36f76bd3590e01, 0x65c621272c35f14e }, + { -0x5852cbe824181d64, -0x426bc895d463ec64, -0x5f16e4716ca68457, 0x1712d73468889840 }, + { -0x18d4760731ce6c23, 0x4d103356a125c0bb, 0x0419a93d2e1cfe83, 0x22f9800ab19ce272 } + }, + { + { 0x42029fdd9a6efdac, -0x46ed3141cb5ab6bf, 0x640f64b987bdf37b, 0x4171a4d38598cab4 }, + { 0x605a368a3e9ef8cb, -0x1c163fdd5aafb8eb, 0x553d48b05f24248f, 0x13f416cd647626e5 }, + { -0x05d8a7556636b374, 0x23006f6fb000b807, -0x042d6e225225ac6e, 0x508214fa574bd1ab } + }, + { + { 0x461a15bb53d003d6, -0x4defd777430c369b, 0x27c576756c683a5a, 0x3a7758a4c86cb447 }, + { -0x3dfd96eac12901b5, -0x59a598c6aee2883c, -0x3421d9b9d3eb506c, 0x22f960ec6faba74b }, + { 0x548111f693ae5076, 0x1dae21df1dfd54a6, 0x12248c90f3115e65, 0x5d9fd15f8de7f494 } + }, + { + { 0x3f244d2aeed7521e, -0x71c56fd7bcd169eb, -0x1e9b4588d163e92c, 0x3bc187fa47eb98d8 }, + { 0x031408d36d63727f, 0x6a379aefd7c7b533, -0x561e703a33511db5, 0x332f35914f8fbed3 }, + { 0x6d470115ea86c20c, -0x6675483493b92edb, -0x2887cd4ac599fe78, 0x450d81ce906fba03 } + }, +}, +{ + { + { 0x23264d66b2cae0b5, 0x7dbaed33ebca6576, 0x030ebed6f0d24ac8, 0x2a887f78f7635510 }, + { -0x0751b2d527bac6fe, 0x7018058ee8db2d1d, -0x554c66a0382d3ee2, 0x53b16d2324ccca79 }, + { 0x2a23b9e75c012d4f, 0x0c974651cae1f2ea, 0x2fb63273675d70ca, 0x0ba7250b864403f5 } + }, + { + { -0x229ca76c79079264, 0x61699176e13a85a4, 0x2e5111954eaa7d57, 0x32c21b57fb60bdfb }, + { -0x44f2e702fd639bdf, -0x43d2ebde76d670fe, -0x7cb8071974daf16a, 0x7b9f2fe8032d71c9 }, + { -0x2787dc32ce61f880, -0x103b303e76888a3b, 0x4854fb129a0ab3f7, 0x12c49d417238c371 } + }, + { + { 0x09b3a01783799542, 0x626dd08faad5ee3f, -0x45ff4311148feb61, 0x1421b246a0a444c9 }, + { 0x0950b533ffe83769, 0x21861c1d8e1d6bd1, -0x0fdd27c7ecfd1af0, 0x2509200c6391cab4 }, + { 0x4aa43a8e8c24a7c7, 0x04c1f540d8f05ef5, -0x5245a1f3f4c14624, 0x2ab5504448a49ce3 } + }, + { + { -0x23f8539ce3a2c506, 0x58615171f9df8c6c, 0x72a079d89d73e2b0, 0x7301f4ceb4eae15d }, + { 0x2ed227266f0f5dec, -0x67db11bea12af7dc, -0x7f8413836b972beb, 0x7093bae1b521e23f }, + { 0x6409e759d6722c41, -0x598b1e308d408d65, -0x43f5db14c3de1a97, 0x390167d24ebacb23 } + }, + { + { -0x2844fab45d0dedf5, -0x1d4631514efa7649, 0x3fe8bac8f3c0edbe, 0x4cbd40767112cb69 }, + { 0x27f58e3bba353f1c, 0x4c47764dbf6a4361, -0x50443b1a91a9d9b0, 0x07db2ee6aae1a45d }, + { 0x0b603cc029c58176, 0x5988e3825cb15d61, 0x2bb61413dcf0ad8d, 0x7b8eec6c74183287 } + }, + { + { 0x32fee570fc386b73, -0x2574febe25c57339, -0x68a002f537697ca7, 0x6ee809a1b132a855 }, + { -0x1b35bf87d32d8350, -0x25063cdc04169843, -0x4d642cb5752be162, 0x72810497626ede4d }, + { -0x6bbb44ce030279c6, 0x2fe3690a3e4e48c5, -0x23d637982f7705db, 0x13bd1e38d173292e } + }, + { + { 0x223fb5cf1dfac521, 0x325c25316f554450, 0x030b98d7659177ac, 0x1ed018b64f88a4bd }, + { -0x2cd4b327969eb64b, -0x1aa6c8287e275549, 0x0bcb2127ae122b94, 0x41e86fcfb14099b0 }, + { 0x3630dfa1b802a6b0, -0x77f078b8bd52c42b, 0x0af90d6ceec5a4d4, 0x746a247a37cdc5d9 } + }, + { + { 0x6eccd85278d941ed, 0x2254ae83d22f7843, -0x3add2fd184403249, 0x681e3351bff0e4e2 }, + { -0x2ace4742d484650a, 0x5005093537fc5b51, 0x232fcf25c593546d, 0x20a365142bb40f49 }, + { -0x749b4a627cfcb0bb, 0x2f8b71f21fa20efb, 0x69249495ba6550e4, 0x539ef98e45d5472b } + }, +}, +{ + { + { -0x2f8b2769e3518bc1, -0x0792e70a11e39c13, -0x68423aa4180b12d7, 0x4cbad279663ab108 }, + { 0x6e7bb6a1a6205275, -0x55b0de28bec3717d, 0x6f56d155e88f5cb2, 0x2de25d4ba6345be1 }, + { -0x7f2e6fdb5f28e033, -0x3ada3df504d77508, -0x4e5c68b4a0c59be7, 0x7d7fbcefe2007233 } + }, + { + { -0x3283a23a0c3d6f6c, -0x387e5d65d56efa55, -0x7f39e2c9bde3cfa8, 0x4f9cd196dcd8d4d7 }, + { -0x0510e195d994d7ff, -0x7993973b2a8c60ea, -0x0975d043e4fc89d4, 0x5975435e87b75a8d }, + { 0x199297d86a7b3768, -0x2f2fa7dbe52e859d, -0x45fd6352a3e3f3e9, 0x7ccdd084387a0307 } + }, + { + { -0x64f37be7989f336d, -0x3251ff85e54cd567, -0x577213799df425e8, 0x3593ca848190ca44 }, + { -0x2359bdd392d9fbe9, -0x51eac2af6b7dbf43, -0x563f3e4b04973989, 0x428bd0ed61d0cf53 }, + { -0x6dece765a17b6559, -0x2b273cca9a270533, -0x73adaba4ac02442f, 0x27398308da2d63e6 } + }, + { + { -0x465ef1b3f58fdbad, 0x0fa25866d57d1bde, -0x0046264a32d82509, 0x572c2945492c33fd }, + { 0x42c38d28435ed413, -0x42af0c9fcd873337, -0x44f854e58625fc11, 0x269597aebe8c3355 }, + { -0x388038ba2932cf42, -0x1b20172c1c455105, -0x5dd377cf55a225f4, 0x7f985498c05bca80 } + }, + { + { -0x2ca9eaadf0409c9d, 0x08045a45cf4dfba6, -0x113db04378c05f3e, 0x30f2653cd69b12e7 }, + { 0x3849ce889f0be117, -0x7ffa52e484ab5d78, 0x3da3c39f23fc921c, 0x76c2ec470a31f304 }, + { -0x75f736c7553ef37b, 0x46179b60db276bcb, -0x56df3fe1f1905390, 0x2f1273f1596473da } + }, + { + { 0x30488bd755a70bc0, 0x06d6b5a4f1d442e7, -0x152e596143a69e9e, 0x38ac1997edc5f784 }, + { 0x4739fc7c8ae01e11, -0x02ad8b6fb5955461, 0x41d98a8287728f2e, 0x5d9e572ad85b69f2 }, + { 0x0666b517a751b13b, 0x747d06867e9b858c, -0x53533feebab221b7, 0x22dfcd9cbfe9e69c } + }, + { + { 0x56ec59b4103be0a1, 0x2ee3baecd259f969, 0x797cb29413f5cd32, 0x0fe9877824cde472 }, + { -0x72242d1f3cf2f327, -0x527199a05344bccd, -0x7094da73cdd569e1, 0x6b2916c05448c1c7 }, + { 0x7edb34d10aba913b, 0x4ea3cd822e6dac0e, 0x66083dff6578f815, 0x4c303f307ff00a17 } + }, + { + { 0x29fc03580dd94500, -0x132d855b9044136d, 0x130a155fc2e2a7f8, 0x416b151ab706a1d5 }, + { -0x2cf5c429e84d737b, -0x3a2c8848c688c416, -0x39391873e195a341, 0x0d61b8f78b2ab7c4 }, + { 0x56a8d7efe9c136b0, -0x42f81a32a71bb4e0, -0x5019d025e4a81f55, 0x191a2af74277e8d2 } + }, +}, +{ + { + { 0x09d4b60b2fe09a14, -0x3c7b0f50244e8b82, 0x58e2ea8978b5fd6e, 0x519ef577b5e09b0a }, + { -0x2aaff6a45490b67b, 0x04f4cd5b4fbfaf1a, -0x6271d12ed5f38ac0, 0x2bc24e04b2212286 }, + { 0x1863d7d91124cca9, 0x7ac08145b88a708e, 0x2bcd7309857031f5, 0x62337a6e8ab8fae5 } + }, + { + { -0x2e54cdb1e4c5ed8d, 0x18947cf181055340, 0x3b5d9567a98c196e, 0x7fa00425802e1e68 }, + { 0x4bcef17f06ffca16, -0x21f91e2496d51e96, 0x0753702d614f42b0, 0x5f6041b45b9212d0 }, + { 0x7d531574028c2705, -0x7fce829624f28a02, 0x30fface8ef8c8ddd, 0x7e9de97bb6c3e998 } + }, + { + { -0x0ffb419d5db2bf23, -0x45f9a66efbad2be1, -0x7e3ba11e9d5bbdcc, 0x4cb829d8a22266ef }, + { 0x1558967b9e6585a3, -0x6836631f6716746e, 0x10af149b6eb3adad, 0x42181fe8f4d38cfa }, + { 0x1dbcaa8407b86681, 0x081f001e8b26753b, 0x3cd7ce6a84048e81, 0x78af11633f25f22c } + }, + { + { 0x3241c00e7d65318c, -0x19411a232f179219, 0x118b2dc2fbc08c26, 0x680d04a7fc603dc3 }, + { -0x7be9142bf4af4544, 0x1508722628208bee, -0x5ceb7050463e3c93, 0x0d07daacd32d7d5d }, + { -0x063dbeb596a55c15, -0x255bd3b3fa5970df, 0x7c6c23987f93963e, 0x210e8cd30c3954e3 } + }, + { + { 0x2b50f16137fe6c26, -0x1efd4327a91bfb28, 0x12b0f1414c561f6b, 0x51b17bc8d028ec91 }, + { -0x53bdfe0def58e3fa, 0x6a65e0aef3bfb021, -0x43bd3ca3c6c9cd09, 0x56ea8db1865f0742 }, + { -0x000a04b430acaee7, -0x0b67628620eef760, -0x4203159a65c45cdb, 0x18a11f1174d1a6f2 } + }, + { + { -0x0429c3252d85a0d4, -0x0ff03b43755ef929, 0x53fb5c1a8e64a430, 0x04eaabe50c1a2e85 }, + { 0x407375ab3f6bba29, -0x613c492766e1b7d2, -0x6637f17d1aa06d17, 0x307c13b6fb0c0ae1 }, + { 0x24751021cb8ab5e7, -0x03dcbbb6a3afef15, 0x5f1e717b4e5610a1, 0x44da5f18c2710cd5 } + }, + { + { -0x6ea9019476271534, -0x19486bae1dced95f, -0x428b9c26c6bb14b2, 0x726373f6767203ae }, + { 0x033cc55ff1b82eb5, -0x4ea51c92bee351ae, -0x45bf49e67004532d, 0x768edce1532e861f }, + { -0x1cfa358d14810976, 0x662cf31f70eadb23, 0x18f026fdb4c45b68, 0x513b5384b5d2ecbd } + }, + { + { 0x5e2702878af34ceb, -0x6ff4fbf646b92952, 0x6512ebf7dabd8512, 0x61d9b76988258f81 }, + { 0x46d46280c729989e, 0x4b93fbd05368a5dd, 0x63df3f81d1765a89, 0x34cebd64b9a0a223 }, + { -0x593a58ecb64826b5, -0x5c0c2ea7dc146bba, 0x0416fbd277484834, 0x69d45e6f2c70812f } + }, +}, +{ + { + { -0x6019d4bcb0b9f105, -0x212cfc2b59c9f82a, -0x0faddef1485f25dc, 0x237e7dbe00545b93 }, + { -0x31e908b43ac3ebcf, 0x2b9725ce2072edde, -0x47463c904a4dc119, 0x7e2e0e450b5cc908 }, + { 0x013575ed6701b430, 0x231094e69f0bfd10, 0x75320f1583e47f22, 0x71afa699b11155e3 } + }, + { + { -0x15bdc3e3b8c4af2a, 0x51e87a1f3b38ef10, -0x647b40a04d36416b, 0x00731fbc78f89a1c }, + { 0x65ce6f9b3953b61d, -0x39a7c615505ebe1a, 0x0f435ffda9f759fe, 0x021142e9c2b1c28e }, + { -0x1bcf38e7b707e780, -0x4069f3dda1313ee7, -0x49251f7c9445ea1d, 0x4c4d6f3347e15808 } + }, + { + { 0x2f0cddfc988f1970, 0x6b916227b0b9f51b, 0x6ec7b6c4779176be, 0x38bf9500a88f9fa8 }, + { 0x18f7eccfc17d1fc9, 0x6c75f5a651403c14, -0x24218ed40811f321, 0x193fddaaa7e47a22 }, + { 0x1fd2c93c37e8876f, -0x5d09e1a5e72eb9d4, 0x5080f58239241276, 0x6a6fb99ebf0d4969 } + }, + { + { -0x114edd4a491bdc3a, -0x6c628fef0d790072, -0x6f56d57ce230a274, 0x136fda9f42c5eb10 }, + { 0x6a46c1bb560855eb, 0x2416bb38f893f09d, -0x28e2eec8708e533f, 0x75f76914a31896ea }, + { -0x06b3204e5cfa422f, 0x0f364b9d9ff82c08, 0x2a87d8a5c3bb588a, 0x022183510be8dcba } + }, + { + { -0x62a58efebccf8581, -0x4f9c21613b825ba1, 0x22bbfe52be927ad3, 0x1387c441fd40426c }, + { 0x4af766385ead2d14, -0x5f71277f3583a7d0, 0x0d13a6e610211e3d, 0x6a071ce17b806c03 }, + { -0x4a2c3c2e78687508, 0x722b5a3d7f0e4413, 0x0d7b4848bb477ca0, 0x3171b26aaf1edc92 } + }, + { + { -0x59f248274d75b82f, -0x5940eb29e88f5b0f, -0x2b5e076cac2242a8, 0x6c514a63344243e9 }, + { -0x56d0ce6f68a9b358, -0x008447b3dd8a1ee7, 0x4f55fe37a4875150, 0x221fd4873cf0835a }, + { 0x2322204f3a156341, -0x048c1f1645f5fcd3, -0x031f22b3bef0fcf2, 0x48daa596fb924aaa } + }, + { + { 0x14f61d5dc84c9793, -0x66be061c10be7dfa, -0x320a4770cb9d8854, 0x58c837fa0e8a79a9 }, + { 0x6eca8e665ca59cc7, -0x57b8dab4d1c75360, 0x31afc708d21e17ce, 0x676dd6fccad84af7 }, + { 0x0cf9688596fc9058, 0x1ddcbbf37b56a01b, -0x233d1882b6ca2996, 0x1c4f73f2c6a57f0a } + }, + { + { -0x4c918f910383cb7c, 0x73dfc9b4c3c1cf61, -0x14e2863687e3381b, 0x70459adb7daf675c }, + { 0x0e7a4fbd305fa0bb, -0x7d62b31fab399c53, -0x0bde3c7cd01cc7b8, 0x795ac80d1bf64c42 }, + { 0x1b91db4991b42bb3, 0x572696234b02dcca, -0x6020611ae0738724, 0x5fe162848ce21fd3 } + }, +}, +{ + { + { 0x315c29c795115389, -0x281f1af879d08b32, 0x0c4a762185927432, 0x72de6c984a25a1e4 }, + { -0x1d86f551b2f883bf, -0x746c7d8f248b965d, 0x6eb632dc8abd16a2, 0x720814ecaa064b72 }, + { -0x51654aac40955cf0, 0x050a50a9806d6e1b, -0x6d448bfc5200aec7, 0x0394d27645be618b } + }, + { + { -0x0ac69bda4dcaba5c, 0x15a7a27e98fbb296, -0x5493ad439c90227a, 0x79d995a8419334ee }, + { 0x4d572251857eedf4, -0x1c8db1221e616c3b, -0x758ebdf1f4868fcb, 0x3b3c833687abe743 }, + { -0x32757159ee6a228b, -0x5afb2757e22657d1, 0x540dca81a35879b6, 0x60dd16a379c86a8a } + }, + { + { 0x3501d6f8153e47b8, -0x485698abeb5d09f4, 0x112ee8b6455d9523, 0x4e62a3c18112ea8a }, + { 0x35a2c8487381e559, 0x596ffea6d78082cb, -0x34688e14245849ad, 0x5a08b5019b4da685 }, + { -0x372b53fbae95487a, 0x595af3215295b23d, -0x29122dcb24fdcf3f, 0x0929efe8825b41cc } + }, + { + { -0x74ce8d4852a99ae3, 0x01581b7a3fabd717, 0x2dc94df6424df6e4, 0x30376e5d2c29284f }, + { 0x5f0601d1cbd0f2d3, 0x736e412f6132bb7f, -0x7c9fbbcddc722179, 0x1e3a5272f5c0753c }, + { -0x2d6e72587ea65a64, 0x6bdc1cd93f0713f3, 0x565f7a934acd6590, 0x53daacec4cb4c128 } + }, + { + { -0x667ad43c7ad30250, 0x2cc12e9559d6ed0b, 0x70f9e2bf9b5ac27b, 0x4f3b8c117959ae99 }, + { 0x4ca73bd79cc8a7d6, 0x4d4a738f47e9a9b2, -0x0b340ed6bd0a0200, 0x01a13ff9bdbf0752 }, + { 0x55b6c9c82ff26412, 0x1ac4a8c91fb667a8, -0x2ad840301488740e, 0x303337da7012a3be } + }, + { + { -0x6892c334052d022f, -0x34777c68c859bf58, 0x2ff00c1d6734cb25, 0x269ff4dc789c2d2b }, + { -0x6aabdddd73e36284, 0x01fac1371a9b340f, 0x7e8d9177925b48d7, 0x53f8ad5661b3e31b }, + { 0x0c003fbdc08d678d, 0x4d982fa37ead2b17, -0x3f8194324d1a7d0f, 0x296c7291df412a44 } + }, + { + { -0x204dcdfa25474a62, 0x465aeaa0c8092250, -0x2ecc3ee7658da2e8, 0x2327370261f117d1 }, + { 0x7903de2b33daf397, -0x2f00f9e63659db4d, -0x75e2dad4aaa4c1e8, 0x2b6d581c52e0b7c0 }, + { 0x3d0543d3623e7986, 0x679414c2c278a354, -0x51bc0f338d9e690a, 0x7836c41f8245eaba } + }, + { + { -0x359ae17b7fee6c84, -0x394f3b91910be5d8, -0x48fde458a0c072ae, 0x119dff99ead7b9fd }, + { -0x185dab24b616a57f, 0x5192d5d008b0ad73, 0x4d20e5b1d00afc07, 0x5d55f8012cf25f38 }, + { 0x43eadfcbf4b31d4d, -0x39afc08beeeb776e, -0x0111973af9f2c4e9, 0x329293b3dd4a0ac8 } + }, +}, +{ + { + { 0x2879852d5d7cb208, -0x4721228f97820d19, -0x23f40054de97876f, 0x2b44c043677daa35 }, + { 0x4e59214fe194961a, 0x49be7dc70d71cd4f, -0x6cff302dc4af0dd3, 0x4789d446fc917232 }, + { 0x1a1c87ab074eb78e, -0x05392e7166250b99, 0x3eacbbcd484f9067, 0x60c52eef2bb9a4e4 } + }, + { + { 0x702bc5c27cae6d11, 0x44c7699b54a48cab, -0x1043bfa945b6d14e, 0x70d77248d9b6676d }, + { 0x0b5d89bc3bfd8bf1, -0x4f946dc8360caae6, 0x0e4c16b0d53028f5, 0x10bc9c312ccfcaab }, + { -0x557517b4c13d5fa5, -0x6796610b12e87e20, 0x794513e4708e85d1, 0x63755bd3a976f413 } + }, + { + { 0x3dc7101897f1acb7, 0x5dda7d5ec165bbd8, 0x508e5b9c0fa1020f, 0x2763751737c52a56 }, + { -0x4aa05fc1d52ef7ad, 0x356f75909ee63569, -0x60060e0241964770, 0x0d8cc1c48bc16f84 }, + { 0x029402d36eb419a9, -0x0f4bb181884b9f5b, -0x30579dcf2bc3b6aa, 0x70c2dd8a7ad166e7 } + }, + { + { -0x6e2b6982471281ed, 0x74252f0ad776817a, -0x1bf67d1ff27ada9c, 0x32b8613816a53ce5 }, + { 0x656194509f6fec0e, -0x11d18156b939ae73, -0x68cc3e0c981f64a4, 0x2e0fac6363948495 }, + { 0x79e7f7bee448cd64, 0x6ac83a67087886d0, -0x07602b265f1b24d2, 0x4179215c735a4f41 } + }, + { + { -0x1b51cc46d79432cc, -0x48108149aa622924, 0x278b141fb3d38e1f, 0x31fa85662241c286 }, + { -0x738f6b18282312d6, -0x6804753cb82c6390, -0x1ec41fcc56f926fe, 0x700344a30cd99d76 }, + { -0x507d93bdd1c9dd0c, -0x3edfd67867ccafd3, -0x643e481ed4c76edd, 0x24bb2312a9952489 } + }, + { + { 0x41f80c2af5f85c6b, 0x687284c304fa6794, -0x76ba20665c45e453, 0x0d1d2af9ffeb5d16 }, + { -0x4e5712e8cd21983d, 0x3cb49418461b4948, -0x7142bcbc8930432e, 0x0fee3e871e188008 }, + { -0x5625755ecd9de121, 0x30b822a159226579, 0x4004197ba79ac193, 0x16acd79718531d76 } + }, + { + { -0x36a6393a87784953, -0x6b1e6152a06f0146, 0x16e24e62a342f504, 0x164ed34b18161700 }, + { 0x72df72af2d9b1d3d, 0x63462a36a432245a, 0x3ecea07916b39637, 0x123e0ef6b9302309 }, + { 0x487ed94c192fe69a, 0x61ae2cea3a911513, -0x7884092c465b21d9, 0x78da0fc61073f3eb } + }, + { + { -0x5d607f0e97f3c56c, 0x71f77e151ae9e7e6, 0x1100f15848017973, 0x054aa4b316b38ddd }, + { 0x5bf15d28e52bc66a, 0x2c47e31870f01a8e, 0x2419afbc06c28bdd, 0x2d25deeb256b173a }, + { -0x2037b972e6d98348, 0x0b28789c66e54daf, 0x2aeb1d2a666eec17, 0x134610a6ab7da760 } + }, +}, +{ + { + { -0x26ebcf1f23fd73c4, 0x0eb955a85217c771, 0x4b09e1ed2c99a1fa, 0x42881af2bd6a743c }, + { -0x350aa13d83a64dc1, -0x665112c1eab2fb0e, 0x68441d72e14141f4, 0x140345133932a0a2 }, + { 0x7bfec69aab5cad3d, -0x3dc1732cb34d3053, 0x685dd14bfb37d6a2, 0x0ad6d64415677a18 } + }, + { + { 0x7914892847927e9f, 0x33dad6ef370aa877, 0x1f8f24fa11122703, 0x5265ac2f2adf9592 }, + { 0x781a439e417becb5, 0x4ac5938cd10e0266, 0x5da385110692ac24, 0x11b065a2ade31233 }, + { 0x405fdd309afcb346, -0x268dc2bbd719c0ac, -0x6b3fe20fa09a5552, 0x43e4dc3ae14c0809 } + }, + { + { -0x1590853c523d395d, -0x2f16d709168e836c, -0x1d2c861529ba150b, 0x46dd8785c51ffbbe }, + { -0x43ed380e56c75ae9, 0x473028ab3180b2e1, 0x3f78571efbcd254a, 0x74e534426ff6f90f }, + { 0x709801be375c8898, 0x4b06dab5e3fd8348, 0x75880ced27230714, 0x2b09468fdd2f4c42 } + }, + { + { 0x5b97946582ffa02a, -0x25f695ae01570ab7, -0x5f9caec8a0885065, 0x1bcfde61201d1e76 }, + { -0x6838b61148fe346a, -0x7c0bc72b495c963d, 0x62962b8b9a402cd9, 0x6976c7509888df7b }, + { 0x4a4a5490246a59a2, -0x29c1422117802270, -0x26bc8398f2dc8e06, 0x69e87308d30f8ed6 } + }, + { + { 0x0f80bf028bc80303, 0x6aae16b37a18cefb, -0x22b815b828d3295d, 0x61943588f4ed39aa }, + { 0x435a8bb15656beb0, -0x07053645b0b2a436, -0x464d873beab73f8b, 0x3eb0ef76e892b622 }, + { -0x2d91a3c16efc607b, -0x3f161882090cc557, -0x176973aa8ff9956d, 0x3c34d1881faaaddd } + }, + { + { -0x42a4f470d0001f27, 0x6aa254103ed24fb9, 0x2ac7d7bcb26821c4, 0x605b394b60dca36a }, + { 0x3f9d2b5ea09f9ec0, 0x1dab3b6fb623a890, -0x5f645c158d26d93c, 0x374193513fd8b36d }, + { -0x4b17a91ba562e12e, -0x1017b7899368565e, -0x4efb309be1a11183, 0x2f50b81c88a71c8f } + }, + { + { 0x2b552ca0a7da522a, 0x3230b336449b0250, -0x0d3b3a435b466047, 0x7b2c674958074a22 }, + { 0x31723c61fc6811bb, -0x634bafb79dee7ff1, 0x768933d347995753, 0x3491a53502752fcd }, + { -0x2aae9a77c12d7321, 0x12d84fd2d362de39, 0x0a874ad3e3378e4f, 0x000d2b1f7c763e74 } + }, + { + { -0x69db8873c16b5755, 0x0ad6f3cee9a78bec, -0x6b75387ef28bc3b1, 0x76627935aaecfccc }, + { 0x3d420811d06d4a67, -0x4103fb7a6f1f001d, -0x078f394842b78422, 0x6e2a7316319afa28 }, + { 0x56a8ac24d6d59a9f, -0x37248ac1cf690ffa, 0x477f41e68f4c5299, 0x588d851cf6c86114 } + }, +}, +{ + { + { -0x32d59a18882e0aeb, 0x548991878faa60f1, -0x4e48c4432543f91b, 0x654878cba97cc9fb }, + { 0x51138ec78df6b0fe, 0x5397da89e575f51b, 0x09207a1d717af1b9, 0x2102fdba2b20d650 }, + { -0x69611bfafaa3195f, 0x36bca7681251ad29, 0x3a1af517aa7da415, 0x0ad725db29ecb2ba } + }, + { + { -0x013843f364fa907b, 0x537d5268e7f5ffd7, 0x77afc6624312aefa, 0x4f675f5302399fd9 }, + { -0x23bd984e7cb1dba9, -0x498abb4a8f31e43b, 0x1af07a0bf7d15ed7, 0x4aefcffb71a03650 }, + { -0x3cd2c9c9fbeae8e2, -0x32d410ee7667b7c5, -0x78f591522f6baef0, 0x0bccbb72a2a86561 } + }, + { + { 0x186d5e4c50fe1296, -0x1fc6847d01176082, 0x3bc7f6c5507031b0, 0x6678fd69108f37c2 }, + { 0x185e962feab1a9c8, -0x791819ca9aeb8233, -0x4f6d1fce44a4920e, 0x4024f0ab59d6b73e }, + { 0x1586fa31636863c2, 0x07f68c48572d33f2, 0x4f73cc9f789eaefc, 0x2d42e2108ead4701 } + }, + { + { 0x21717b0d0f537593, -0x6eb196f4ece1f9b4, 0x1bb687ae752ae09f, 0x420bf3a79b423c6e }, + { -0x680aecea6b202d65, 0x6155985d313f4c6a, -0x145ec0f8f7baaff0, 0x676b2608b8d2d322 }, + { -0x7ec7459ae3a4d4b9, -0x798e4913cee4e480, 0x7bff0cb1bc3135b0, 0x745d2ffa9c0cf1e0 } + }, + { + { 0x6036df5721d34e6a, -0x4e2477d866844c30, -0x2c3df63c378a9506, 0x06e15be54c1dc839 }, + { -0x40ada5e1d4363743, -0x15a4d9f7d9b8627f, -0x2aee38f120feaa25, 0x1ae23ceb960cf5d0 }, + { 0x5b725d871932994a, 0x32351cb5ceb1dab0, 0x7dc41549dab7ca05, 0x58ded861278ec1f7 } + }, + { + { 0x2dfb5ba8b6c2c9a8, 0x48eeef8ef52c598c, 0x33809107f12d1573, 0x08ba696b531d5bd8 }, + { -0x27e8c86c0d993aa4, -0x3736893a33bab1b7, 0x5ce382f8bc26c3a8, 0x2ff39de85485f6f9 }, + { 0x77ed3eeec3efc57a, 0x04e05517d4ff4811, -0x15c285c00e598e35, 0x120633b4947cfe54 } + }, + { + { -0x7d42ceb8b6edeff6, -0x21dc8492819041fa, -0x1ee189e6ee15863a, 0x07433be3cb393bde }, + { 0x0b94987891610042, 0x4ee7b13cecebfae8, 0x70be739594f0a4c0, 0x35d30a99b4d59185 }, + { -0x0086bb3fa316680c, 0x575d3de4b05c51a3, 0x583381fd5a76847c, 0x2d873ede7af6da9f } + }, + { + { -0x559dfd1eb1a2067f, -0x5df2a6e8afea1e0b, 0x18a275d3bae21d6c, 0x0543618a01600253 }, + { 0x157a316443373409, -0x054748110b557e27, -0x4f6c01190a59b7fa, 0x2e773654707fa7b6 }, + { 0x0deabdf4974c23c1, -0x5590f5da6231b96d, 0x04202cb8a29aba2c, 0x4b1443362d07960d } + }, +}, +{ + { + { 0x299b1c3f57c5715e, -0x69346d6194979270, 0x3004806447235ab3, 0x2c435c24a44d9fe1 }, + { 0x47b837f753242cec, 0x256dc48cc04212f2, -0x1ddd04041e26d73b, 0x48ea295bad8a2c07 }, + { 0x0607c97c80f8833f, 0x0e851578ca25ec5b, 0x54f7450b161ebb6f, 0x7bcb4792a0def80e } + }, + { + { 0x1cecd0a0045224c2, 0x757f1b1b69e53952, 0x775b7a925289f681, 0x1b6cc62016736148 }, + { -0x7b781c2fd438c9a7, 0x4baf8445059979df, -0x2e8368a523529041, 0x57369f0bdefc96b6 }, + { -0x0e5666fe8a9c7968, 0x353dd1beeeaa60d3, -0x7b6b8eccb3645b78, 0x63fa6e6843ade311 } + }, + { + { 0x2195becdd24b5eb7, 0x5e41f18cc0cd44f9, -0x20d7f8bbbe356122, 0x07073b98f35b7d67 }, + { -0x2ea3dfac9a683e98, -0x608c8bff672d7877, 0x18aee7f13257ba1f, 0x3418bfda07346f14 }, + { -0x2fc39893b31acf2c, 0x0b64c0473b5df9f4, 0x065cef8b19b3a31e, 0x3084d661533102c9 } + }, + { + { -0x6593178989fcde03, 0x7fe2b5109eb63ad8, 0x00e7d4ae8ac80592, 0x73d86b7abb6f723a }, + { -0x1e094861407b9653, 0x15801004e2663135, -0x65b67ccf508be7e5, 0x3ba2504f049b673c }, + { 0x0b52b5606dba5ab6, -0x56ecb0f0444e1255, 0x30a9520d9b04a635, 0x6813b8f37973e5db } + }, + { + { -0x0e6b35a90cea81d7, 0x136d35705ef528a5, -0x22b3108874fa6644, 0x7d5472af24f833ed }, + { -0x67ab4fabccbed83f, 0x105d047882fbff25, -0x24b60806bbe790b1, 0x1768e838bed0b900 }, + { -0x2f1078b250cc25b9, 0x00d3be5db6e339f9, 0x3f2a8a2f9c9ceece, 0x5d1aeb792352435a } + }, + { + { 0x12c7bfaeb61ba775, -0x47b19de01d9c4003, 0x0b47a5c35c840dcf, 0x7e83be0bccaf8634 }, + { -0x0a61944ce6329c36, 0x670c159221d06839, -0x4f92a9a4deaf354a, 0x20fb199d104f12a3 }, + { 0x61943dee6d99c120, -0x79efe0d1b9f46020, 0x6bb2f1518ee8598d, 0x76b76289fcc475cc } + }, + { + { 0x4245f1a1522ec0b3, 0x558785b22a75656d, 0x1d485a2548a1b3c0, 0x60959eccd58fe09f }, + { 0x791b4cc1756286fa, -0x24312ce828b5ea84, 0x7e732421ea72bde6, 0x01fe18491131c8e9 }, + { 0x3ebfeb7ba8ed7a09, 0x49fdc2bbe502789c, 0x44ebce5d3c119428, 0x35e1eb55be947f4a } + }, + { + { 0x14fd6dfa726ccc74, 0x3b084cfe2f53b965, -0x0cc51b0aad5d374c, 0x59aab07a0d40166a }, + { -0x242518fe3a8c722d, -0x063909ca4d90e412, 0x61e96a8042f15ef4, 0x3aa1d11faf60a4d8 }, + { 0x77bcec4c925eac25, 0x1848718460137738, 0x5b374337fea9f451, 0x1865e78ec8e6aa46 } + }, +}, +{ + { + { -0x6983ab16e3ad6335, 0x30f6269264c635fb, 0x2747aff478121965, 0x17038418eaf66f5c }, + { -0x333b48384991e086, 0x44157e25f50c2f7e, 0x3ef06dfc713eaf1c, 0x582f446752da63f7 }, + { -0x39ce842cdfcdb31c, -0x57efbd175bb7743c, -0x4de10e74b1a5ec9c, 0x0c2a1c4bcda28dc9 } + }, + { + { -0x123b7eb7964296bb, 0x0d6d907dbe1c8d22, -0x39c42ded2aa33a55, 0x5a6a9b30a314dc83 }, + { -0x2db2382f90e0fbb9, -0x4dd961c124783fa7, -0x2ea4fd8d044d2d71, 0x7c558bd1c6f64877 }, + { -0x2f13eadb2c69b9c3, 0x12bb628ac35a24f0, -0x5af3c586e343a05c, 0x0404a5ca0afbafc3 } + }, + { + { 0x62bc9e1b2a416fd1, -0x4a3908d71cafa675, 0x04343fd83d5d6967, 0x39527516e7f8ee98 }, + { -0x73e0bff8f558bc2a, -0x33452f34a4d9a118, 0x574b046b668fd2de, 0x46395bfdcadd9633 }, + { 0x117fdb2d1a5d9a9c, -0x6388ba432effa3d6, -0x102b410eab2a9016, 0x76579a29e822d016 } + }, + { + { 0x333cb51352b434f2, -0x27cdd7b66c217f1f, -0x4aaed7788af2ca32, 0x02c514bb2a2777c1 }, + { 0x45b68e7e49c02a17, 0x23cd51a2bca9a37f, 0x3ed65f11ec224c1b, 0x43a384dc9e05bdb1 }, + { 0x684bd5da8bf1b645, -0x04742c81094ab4ad, 0x313916d7a9b0d253, 0x1160920961548059 } + }, + { + { 0x7a385616369b4dcd, 0x75c02ca7655c3563, 0x7dc21bf9d4f18021, 0x2f637d7491e6e042 }, + { -0x4bb2e996d6253056, -0x25ad60b37beca671, -0x16109c35bac2aaa7, 0x351e125bc5698e0b }, + { -0x2b4b64b9e5098442, -0x29fcfc853754769f, 0x71dee19ff9a699fb, 0x7f182d06e7ce2a9a } + }, + { + { 0x09454b728e217522, -0x55a7170b2b7b4728, -0x2ca7dab280b96fc4, 0x44acc043241c5217 }, + { 0x7a7c8e64ab0168ec, -0x34a5b5aaea123abd, 0x095519d347cd0eda, 0x67d4ac8c343e93b0 }, + { 0x1c7d6bbb4f7a5777, -0x74ca012b6e7cec1f, 0x4adca1c6c96b4684, 0x556d1c8312ad71bd } + }, + { + { -0x7e0f98a94ee417df, 0x0faff82310a3f3dd, -0x074d2faa9566b9a3, 0x097abe38cc8c7f05 }, + { 0x17ef40e30c8d3982, 0x31f7073e15a3fa34, 0x4f21f3cb0773646e, 0x746c6c6d1d824eff }, + { 0x0c49c9877ea52da4, 0x4c4369559bdc1d43, 0x022c3809f7ccebd2, 0x577e14a34bee84bd } + }, + { + { -0x6b01314142b228d5, -0x0b95b025f9f0ddef, 0x124a5977c0c8d1ff, 0x705304b8fb009295 }, + { -0x0f1d97539e58c4f6, -0x0d0505efc86e5a0b, -0x3e1ec17d9492ff17, 0x60fa7ee96fd78f42 }, + { -0x49c2e2cab2d6913a, -0x0c3cfac1a052ce28, 0x670b958cb4bd42ec, 0x21398e0ca16353fd } + }, +}, +{ + { + { -0x793a03e979e48166, -0x095ccfb895d83baf, 0x01667267a1e93597, 0x05ffb9cd6082dfeb }, + { 0x216ab2ca8da7d2ef, 0x366ad9dd99f42827, -0x519b46ffb022c38b, 0x403a395b53909e62 }, + { -0x59e805600ac09ec7, 0x60f2b5e513e66cb6, -0x285741104cbb755c, 0x7a2932856f5ea192 } + }, + { + { -0x4763bbb7869c6cfe, 0x4ae4f19350c67f2c, -0x0f4ca25737e5063a, 0x39d0003546871017 }, + { 0x0b39d761b02de888, 0x5f550e7ed2414e1f, -0x59405ba7dd1e56c0, 0x050a2f7dfd447b99 }, + { 0x437c3b33a650db77, 0x6bafe81dbac52bb2, -0x0166bfd2d2482ce8, 0x2b5b7eec372ba6ce } + }, + { + { -0x596bbfb29ec5370c, 0x500c3c2bfa97e72c, -0x78befb2de0313df0, 0x1b205fb38604a8ee }, + { -0x4c43b4427c0af111, 0x508f0c998c927866, 0x43e76587c8b7e66e, 0x0f7655a3a47f98d9 }, + { 0x55ecad37d24b133c, 0x441e147d6038c90b, 0x656683a1d62c6fee, 0x0157d5dc87e0ecae } + }, + { + { -0x6ad9aaeb28e14adc, -0x19fc277ea20eba6d, 0x147cdf410d4de6b7, 0x5293b1730437c850 }, + { -0x0d5850aefcab3ec3, -0x285f4eba55c8d4a0, 0x2869b96a05a3d470, 0x6528e42d82460173 }, + { 0x23d0e0814bccf226, -0x6d38ba327e69046d, -0x749e8693a6abe1a5, 0x40a44df0c021f978 } + }, + { + { -0x793691aeb43a2f6b, -0x0df2bf6703597fb6, 0x27363d89c826ea5d, 0x39ca36565719cacf }, + { -0x25579676b0df1596, -0x15eb5c2eb39df9e8, 0x6001fccb090bf8be, 0x35f4e822947e9cf0 }, + { -0x68af90d0907848a4, -0x39db515ffcb51f90, 0x1ec856e3aad34dd6, 0x055b0be0e440e58f } + }, + { + { 0x4d12a04b6ea33da2, 0x57cf4c15e36126dd, -0x6f13698a11bb2699, 0x64ca348d2a985aac }, + { 0x6469a17d89735d12, -0x2490d82a199d460f, -0x60345cd795c6a97f, 0x363b8004d269af25 }, + { -0x66a771e61b3b6ed3, -0x1033c4b1e35a3195, 0x4522ea60fa5b98d5, 0x7064bbab1de4a819 } + }, + { + { -0x5d6f3f9ebdabded7, -0x0d1d3d514172a470, -0x30dba724895401e5, 0x02157ade83d626bf }, + { -0x46e61eaea588f9bf, -0x565d1d38b1807fc7, 0x7527250b3df23109, 0x756a7330ac27b78b }, + { 0x3e46972a1b9a038b, 0x2e4ee66a7ee03fb4, -0x7e5db78891244b36, 0x1a944ee88ecd0563 } + }, + { + { -0x44bf57a6e7dc9d2a, -0x4660aa8875b2e545, -0x72e74bd88a7aa60a, 0x26c20fe74d26235a }, + { -0x2a56e2eeaefc6c8e, 0x2ed377b799ca26de, -0x5e8dfd5302c99495, 0x0730291bd6901995 }, + { 0x648d1d9fe9cc22f5, 0x66bc561928dd577c, 0x47d3ed21652439d1, 0x49d271acedaf8b49 } + }, +}, +{ + { + { 0x2798aaf9b4b75601, 0x5eac72135c8dad72, -0x2d31559e9e485fdd, 0x1bbfb284e98f7d4e }, + { -0x760afa75c7d4cc0d, 0x5ae2ba0bad48c0b4, -0x706c4afc5ac24c92, 0x5aa3ed9d95a232e6 }, + { 0x656777e9c7d96561, -0x34d4edab8d387fca, 0x65053299d9506eee, 0x4a07e14e5e8957cc } + }, + { + { 0x240b58cdc477a49b, -0x02c725219bb80fe9, 0x19928d32a7c86aad, 0x50af7aed84afa081 }, + { 0x4ee412cb980df999, -0x5cea2890c391388f, -0x445a12216da38803, 0x3f0bac391d313402 }, + { 0x6e4fde0115f65be5, 0x29982621216109b2, 0x780205810badd6d9, 0x1921a316baebd006 } + }, + { + { -0x28a55265260c3e75, 0x566a0eef60b1c19c, 0x3e9a0bac255c0ed9, 0x7b049deca062c7f5 }, + { -0x76bdd08120478f04, 0x2c296beb4f76b3bd, 0x0738f1d436c24df7, 0x6458df41e273aeb0 }, + { -0x23341c85cabbbb7d, 0x758879330fedbe93, 0x786004c312c5dd87, 0x6093dccbc2950e64 } + }, + { + { 0x6bdeeebe6084034b, 0x3199c2b6780fb854, -0x68cc895449d2f96b, 0x6e3180c98b647d90 }, + { 0x1ff39a8585e0706d, 0x36d0a5d8b3e73933, 0x43b9f2e1718f453b, 0x57d1ea084827a97c }, + { -0x118549185ed74f8f, -0x5b3ea6926c577456, -0x084b217d4dde9ed0, 0x363e999ddd97bd18 } + }, + { + { 0x2f1848dce24baec6, 0x769b7255babcaf60, -0x6f34c391c31016cf, 0x231f979bc6f9b355 }, + { -0x6957bc3eca11e03c, -0x68914caaf71b3731, -0x4bd097fe4a732cd0, 0x48ee9b78693a052b }, + { 0x5c31de4bcc2af3c6, -0x4fb44fcf01df72e1, -0x48728ff63eb04b9a, 0x079bfa9b08792413 } + }, + { + { -0x0c36127f5d2abdbb, 0x0aa08b7877f63952, -0x2892539c2ef7ab8b, 0x1ef4fb159470636b }, + { -0x1c6fc5ae25cff20c, -0x7bc69bdcc256a550, -0x12c30ed2f4ca9b80, 0x038c77f684817194 }, + { -0x7ab1a119a4e98414, 0x59590a4296d0cdc2, 0x72b2df3498102199, 0x575ee92a4a0bff56 } + }, + { + { 0x5d46bc450aa4d801, -0x3c50edd85acc4628, 0x389e3b262b8906c2, 0x200a1e7e382f581b }, + { -0x2b3f7f6f75e7d031, 0x30e170c299489dbd, 0x05babd5752f733de, 0x43d4e7112cd3fd00 }, + { 0x518db967eaf93ac5, 0x71bc989b056652c0, -0x01d47a26a98e680b, 0x050eca52651e4e38 } + }, + { + { -0x6853c6899f199716, -0x64e64401eac54b69, 0x4cb179b534eca79f, 0x6151c09fa131ae57 }, + { -0x3cbce521bac0f364, -0x160afba1008fc465, -0x03268536127b84c3, 0x4b0ee6c21c58f4c6 }, + { 0x3af55c0dfdf05d96, -0x22d9d11fd54b1186, 0x11b2bb8712171709, 0x1fef24fa800f030b } + }, +}, +{ + { + { -0x006e59956fe99de0, -0x0ddaad51a40e1ff7, 0x7dff85d87f90df7c, 0x4f620ffe0c736fb9 }, + { -0x4b69edc5949399f7, -0x58af017a7f54a6c8, -0x0b8e40c6483d85a1, 0x507903ce77ac193c }, + { 0x62f90d65dfde3e34, -0x30d73a6d4605a053, -0x6637910639e9baf0, 0x25d448044a256c84 } + }, + { + { 0x2c7c4415c9022b55, 0x56a0d241812eb1fe, -0x0fd15e362849a1f3, 0x4180512fd5323b26 }, + { -0x4297dcf138164e91, 0x0eb1b9c1c1c5795d, 0x7943c8c495b6b1ff, 0x2f9faf620bbacf5e }, + { -0x5b00c19675b75a25, -0x4595c7f9426abfc5, -0x60831e50b82a49a3, 0x15e087e55939d2fb } + }, + { + { -0x776be7910469c0c8, 0x48a00e80dc639bd5, -0x5b17f6d41693e367, 0x5a097d54ca573661 }, + { 0x12207543745c1496, -0x2500c30225c79ef4, -0x1b1868d8d38e3cb1, 0x39c07b1934bdede9 }, + { 0x2d45892b17c9e755, -0x2fcc028d76cf7208, 0x6c2fe9d9525b8bd9, 0x2edbecf1c11cc079 } + }, + { + { -0x11f0f0222f785da1, -0x638aceaaa3c1cb12, 0x660c572e8fab3ab5, 0x0854fc44544cd3b2 }, + { 0x1616a4e3c715a0d2, 0x53623cb0f8341d4d, -0x6910acd638176635, 0x3d4e8dbba668baa6 }, + { 0x61eba0c555edad19, 0x24b533fef0a83de6, 0x3b77042883baa5f8, 0x678f82b898a47e8d } + }, + { + { 0x1e09d94057775696, -0x112ed9a3c326ae25, -0x056253d4df431e91, 0x0f7f76e0e8d089f4 }, + { -0x4eb6e2f4296ff3ac, 0x3539722c9d132636, 0x4db928920b362bc9, 0x4d7cd1fea68b69df }, + { 0x36d9ebc5d485b00c, -0x5da69b6d1b524c9b, -0x3e9a6b7f3dee6333, 0x45306349186e0d5f } + }, + { + { -0x695beb13d4f8db6f, 0x1bb2218127a7b65b, 0x6d2849596e8a4af0, 0x65f3b08ccd27765f }, + { -0x6b222f3e593200e3, 0x55f6f115e84213ae, 0x6c935f85992fcf6a, 0x067ee0f54a37f16f }, + { -0x134d6000e667fe09, -0x62c9e2e05d5f08d1, 0x25f11d2375fd2f49, 0x124cefe80fe10fe2 } + }, + { + { 0x1518e85b31b16489, -0x70552348248ef405, 0x39b0bdf4a14ae239, 0x05f4cbea503d20c1 }, + { 0x4c126cf9d18df255, -0x3e2b8e16eb859c4a, 0x2c6d3c73f3c93b5f, 0x6be3a6a2e3ff86a2 }, + { -0x31fbf1613fbeba44, -0x38e00b1df7097cb4, -0x42ab91725477b85d, 0x64666aa0a4d2aba5 } + }, + { + { -0x4f3ac408ccc816b4, 0x7cb5697e11e14f15, 0x4b84abac1930c750, 0x28dd4abfe0640468 }, + { 0x6841435a7c06d912, -0x35edc3de44c07cf5, -0x2b4c84d84e341d88, 0x1d753b84c76f5046 }, + { 0x7dc0b64c44cb9f44, 0x18a3e1ace3925dbf, 0x7a3034862d0457c4, 0x4c498bf78a0c892e } + }, +}, +{ + { + { 0x22d2aff530976b86, -0x726f47f93d2db9fc, -0x235e7693b21a451b, 0x28005fe6c8340c17 }, + { 0x37d653fb1aa73196, 0x0f9495303fd76418, -0x52dff4f604c5e84e, 0x544d49292fc8613e }, + { 0x6aefba9f34528688, 0x5c1bff9425107da1, -0x08a444329926b4ca, 0x72e472930f316dfa } + }, + { + { 0x07f3f635d32a7627, 0x7aaa4d865f6566f0, 0x3c85e79728d04450, 0x1fee7f000fe06438 }, + { 0x2695208c9781084f, -0x4eafd5f4dcbaf11f, -0x02625159fc1021fe, 0x5a9d2e8c2733a34c }, + { 0x765305da03dbf7e5, -0x5b250db6ebcb3243, 0x7b4ad5cdd24a88ec, 0x00f94051ee040543 } + }, + { + { -0x28106c44f85068ad, 0x583ed0cf3db766a7, -0x3196674091f4e13b, 0x47b7ffd25dd40452 }, + { -0x72ca94dc3c2ccf4e, -0x0de374644fb8e4fa, -0x4c93ce9391bd47c4, 0x07d79c7e8beab10d }, + { -0x7804046343f722ee, -0x75f994c51e113d65, 0x0d57242bdb1fc1bf, 0x1c3520a35ea64bb6 } + }, + { + { -0x325790bfde943fa7, 0x1fbb231d12bcd87e, -0x4b6a9561e838f670, 0x38750c3b66d12e55 }, + { -0x7f2dac5943345cb6, 0x3e61c3a13838219b, -0x6f3c49fe677d1c6a, 0x1c3d05775d0ee66f }, + { 0x692ef1409422e51a, -0x343f38c3d4a2098f, 0x21014fe7744ce029, 0x0621e2c7d330487c } + }, + { + { -0x4851e8694f240f0d, 0x54dfafb9e17ce196, 0x25923071e9aaa3b4, 0x5d8e589ca1002e9d }, + { -0x50679f337da67c73, -0x6f15b73e39606524, 0x6526483765581e30, 0x0007d6097bd3a5bc }, + { -0x3f40e26af7bd56b5, -0x4d2c3c9ca770d1c2, 0x0a961438bb51e2ef, 0x1583d7783c1cbf86 } + }, + { + { -0x6ffcb8fb3362d739, 0x1d1b679ef72cc58f, 0x16e12b5fbe5b8726, 0x4958064e83c5580a }, + { -0x13115d10a25d851f, 0x597c3a1455670174, -0x3659d5ed99f6e986, 0x252a5f2e81ed8f70 }, + { 0x0d2894265066e80d, -0x033c087acf837395, 0x1b53da780c1112fd, 0x079c170bd843b388 } + }, + { + { -0x322932af3f2a2faa, -0x6508979244fca8c5, 0x3ca6723ff3c3ef48, 0x6768c0d7317b8acc }, + { 0x0506ece464fa6fff, -0x411cbce19dfa1add, 0x3579422451b8ea42, 0x6dec05e34ac9fb00 }, + { -0x6b49da1a0eaa3e4d, 0x417bf3a7997b7b91, -0x3dd342239294da00, 0x51445e14ddcd52f4 } + }, + { + { -0x76ceb854d4415bab, -0x73ac5db06df86ed7, 0x4b49f948be30f7a7, 0x12e990086e4fd43d }, + { 0x57502b4b3b144951, -0x71980094bbb4434d, -0x474296d8e99c7a25, 0x13186f31e39295c8 }, + { -0x0ef3694c802044d2, -0x60656ca1ede31507, -0x20eec93bc5a467c1, 0x77b2e3f05d3e99af } + }, +}, +{ + { + { -0x6acd0b7033a32d65, 0x2ba851bea3ce3671, 0x32dacaa051122941, 0x478d99d9350004f2 }, + { -0x02f28a78630ed9a9, -0x17d0106b1ac5f1d7, -0x33cb580fa444b419, 0x0b251172a50c38a2 }, + { 0x1d5ad94890bb02c0, 0x50e208b10ec25115, -0x5d95dd76b10de8fe, 0x4dc923343b524805 } + }, + { + { 0x3ad3e3ebf36c4975, -0x28a2da5ac879dedb, -0x178c6bc25fda5aea, 0x6bbc7cb4c411c847 }, + { -0x1c7d73bff07f794a, 0x3f77e6f7979f0dc8, 0x7ef6de304df42cb4, 0x5265797cb6abd784 }, + { 0x3c6f9cd1d4a50d56, -0x49dbbf8839015482, 0x6ff9bf483580972e, 0x00375883b332acfb } + }, + { + { -0x3674137a938a3664, -0x1bbe7b3fff1cc30c, 0x0a676b9bba907634, 0x669e2cb571f379d7 }, + { 0x0001b2cd28cb0940, 0x63fb51a06f1c24c9, -0x4a52796e232a35cf, 0x67238dbd8c450660 }, + { -0x34ee948c5b642cf8, 0x025aad6b2392729e, -0x4b86c105c0aa264f, 0x72a1056140678bb9 } + }, + { + { 0x0d8d2909e2e505b6, -0x673587543fd6edd0, 0x77ef5569a9b12327, 0x7c77897b81439b47 }, + { -0x5d497ed4e336db63, 0x62866eee21211f58, 0x2cb5c5b85df10ece, 0x03a6b259e263ae00 }, + { -0x0e3e4a1d21cce34b, 0x5a9f5d8e15fca420, -0x605bc70e8426cd4f, 0x2a381bf01c6146e7 } + }, + { + { -0x083f41cd4acbe991, 0x27e6ca6419cf70d4, -0x6cb2082856a858a7, 0x5701461dabdec2aa }, + { -0x536467863037ee3f, -0x7482d67ec8a91a99, 0x50da4e607c70edfc, 0x5dbca62f884400b6 }, + { 0x2c6747402c915c25, 0x1bdcd1a80b0d340a, 0x5e5601bd07b43f5f, 0x2555b4e05539a242 } + }, + { + { 0x78409b1d87e463d4, -0x52b256a532049c63, -0x13d788c8aada6464, 0x69c806e9c31230ab }, + { 0x6fc09f5266ddd216, -0x231a9f58371c8fb8, -0x139a6c625d209d03, 0x7a869ae7e52ed192 }, + { 0x7b48f57414bb3f22, 0x68c7cee4aedccc88, -0x12d06c9e86127f42, 0x25d70b885f77bc4b } + }, + { + { -0x67ba62d644e51b2c, 0x56b9c4c739f954ec, -0x7cd8bc093d64b4c2, 0x21ea8e2798b6878a }, + { 0x4151c3d9762bf4de, 0x083f435f2745d82b, 0x29775a2e0d23ddd5, 0x138e3a6269a5db24 }, + { -0x78410b4b95a58464, -0x2dd662e4a03e2f9e, -0x7dbf67e722cde9b8, 0x5c5abeb1e5a2e03d } + }, + { + { 0x02cde6de1306a233, 0x7b5a52a2116f8ec7, -0x1e397e0b3ee9c4a5, 0x241d350660d32643 }, + { 0x14722af4b73c2ddb, -0x43b8f3a0a5faf9f3, 0x00943eac2581b02e, 0x0e434b3b1f499c8f }, + { 0x6be4404d0ebc52c7, -0x51b9dcc44e586e0b, 0x2aec170ed25db42b, 0x1d8dfd966645d694 } + }, +}, +{ + { + { -0x2a679c63ed224f5c, -0x5a2e60cf3fdb7995, -0x2e83d0fca7031ba0, 0x07a195152e095e8a }, + { 0x296fa9c59c2ec4de, -0x43749e40b07b0c35, 0x1c7706d917a8f908, 0x63b795fc7ad3255d }, + { -0x57c970fdc761a038, -0x6fbcc4fd30721bc5, -0x505e02a23abed9bd, 0x3e8fe83d032f0137 } + }, + { + { 0x08704c8de8efd13c, -0x203ae571cc1fc8cf, -0x5a62a25aed9f321d, 0x22d60899a6258c86 }, + { 0x2f8b15b90570a294, -0x6b0dbd8f98f7bab7, -0x21e3a51e9e44027c, 0x75ba3b797fac4007 }, + { 0x6239dbc070cdd196, 0x60fe8a8b6c7d8a9a, -0x4c77b84314bfeda0, 0x0904d07b87779e5e } + }, + { + { -0x0bcdd299b706bf47, 0x06952f0cbd2d0c39, 0x167697ada081f931, 0x6240aacebaf72a6c }, + { -0x4b31e02b22456e64, -0x30ce24c138b37256, 0x2c63cc63ad86cc51, 0x43e2143fbc1dde07 }, + { -0x07cb8b63a45d6a60, -0x296b83a435c82da6, 0x66f13ba7e7c9316a, 0x56bdaf238db40cac } + }, + { + { 0x1310d36cc19d3bb2, 0x062a6bb7622386b9, 0x7c9b8591d7a14f5c, 0x03aa31507e1e5754 }, + { 0x362ab9e3f53533eb, 0x338568d56eb93d40, -0x61f1ebade2a5aa8e, 0x1d24a86d83741318 }, + { -0x0b1389b7002b31e1, -0x1fba150fab5373e4, -0x772dda7de2f6ca84, 0x43b261dc9aeb4859 } + }, + { + { 0x19513d8b6c951364, -0x6b018ed9fff40b85, 0x028d10ddd54f9567, 0x02b4d5e242940964 }, + { -0x1aa4e1e677448645, -0x5f612f823e85ca63, -0x4fd3d11d9fc215cd, 0x326055cf5b276bc2 }, + { -0x4b5eaa34d72e720e, -0x1533b9b9e7931af8, -0x3b630b6c937dbc77, 0x27a6c809ae5d3410 } + }, + { + { -0x32d3d8f53bc296ac, -0x22b5c1a89599354e, 0x79fa592469d7036c, 0x221503603d8c2599 }, + { -0x74591432e0f24e78, 0x37d3d73a675a5be8, -0x0dd1205cea0aa7a6, 0x2cb67174ff60a17e }, + { 0x59eecdf9390be1d0, -0x56bddfbb8d731c0f, -0x7d76e399856b0f0c, 0x7b1df4b73890f436 } + }, + { + { 0x5f2e221807f8f58c, -0x1caaa3602b6bf62c, -0x4d555772e04959d0, 0x68698245d352e03d }, + { -0x1b6d0d1f4c4d5ddc, 0x7c6c9e062b551160, 0x15eb8fe20d7f7b0e, 0x61fcef2658fc5992 }, + { -0x244ea27ad5e7e786, -0x0c1b552c79225329, 0x44bae2810ff6c482, 0x46cf4c473daf01cf } + }, + { + { 0x213c6ea7f1498140, 0x7c1e7ef8392b4854, 0x2488c38c5629ceba, 0x1065aae50d8cc5bb }, + { 0x426525ed9ec4e5f9, 0x0e5eda0116903303, 0x72b1a7f2cbe5cadc, 0x29387bcd14eb5f40 }, + { 0x1c2c4525df200d57, 0x5c3b2dd6bfca674a, 0x0a07e7b1e1834030, 0x69a198e64f1ce716 } + }, +}, +{ + { + { 0x7b26e56b9e2d4734, -0x3b38ecd47e39e98b, -0x10a36ada13632181, 0x39c80b16e71743ad }, + { 0x7afcd613efa9d697, 0x0cc45aa41c067959, -0x5a901efb3e05256a, 0x3a73b70472e40365 }, + { 0x0f196e0d1b826c68, -0x08e00f1db69f1c25, 0x6113167023b7436c, 0x0cf0ea5877da7282 } + }, + { + { -0x1ccd312bc4596ba6, -0x21f4ec9e177e3fa3, 0x1ad40f095e67ed3b, 0x5da8acdab8c63d5d }, + { 0x196c80a4ddd4ccbd, 0x22e6f55d95f2dd9d, -0x38a1cc38bf2938e5, 0x7bb51279cb3c042f }, + { -0x3b4999b5c58fea61, 0x76194f0f0a904e14, -0x5a9eb3c65bf693ed, 0x6cd0ff50979feced } + }, + { + { 0x7fecfabdb04ba18e, -0x2f038403c4224309, -0x5be2b791fa85ece4, 0x641a4391f2223a61 }, + { -0x3f1f981870bbd754, 0x14835ab0a61135e3, -0x0de2eb0cc7f9d6cb, 0x6390a4c8df04849c }, + { -0x3a3946a559f95725, -0x6eb480614f97da0f, 0x2a731f6b44fc9eff, 0x30ddf38562705cfc } + }, + { + { 0x33bef2bd68bcd52c, -0x39b6244f96b7d10e, -0x4a4911f3be34e512, 0x5c294d270212a7e5 }, + { 0x4e3dcbdad1bff7f9, -0x36ee717ddf9ba8e9, -0x45333143f0e762aa, 0x1b4822e9d4467668 }, + { -0x54c9f580daa9c87f, 0x2512228a480f7958, -0x38a2fad89eeb4b1d, 0x222d9625d976fe2a } + }, + { + { 0x0f94be7e0a344f85, -0x14d05573780dd3c8, -0x631e18a1b11e90f1, 0x43e64e5418a08dea }, + { 0x1c717f85b372ace1, -0x7e6cf196b9c740e8, 0x239cad056bc08b58, 0x0b34271c87f8fff4 }, + { -0x7eaa1dade5ca319d, -0x41eff2b206edfd72, -0x4007f4075a822314, 0x57342dc96d6bc6e4 } + }, + { + { -0x0c3c4348e18f840a, 0x351d9b8c7291a762, 0x00502e6edad69a33, 0x522f521f1ec8807f }, + { -0x10110f9a3731a668, -0x40fd6aef4a34155e, -0x739b5ef9df483ba8, 0x35134fb231c24855 }, + { 0x272c1f46f9a3902b, -0x36e45c48669a8434, -0x519eb4cfb075e3f2, 0x7afcaad70b99017b } + }, + { + { -0x577ebe13107bd495, 0x55e7b14797abe6c5, -0x738b7068fc87b002, 0x5b50a1f7afcd00b7 }, + { -0x3da212ab5b4741bf, -0x6fd2ec1ee44f1d23, 0x41f43233cde82ab2, 0x1085faa5c3aae7cb }, + { -0x647bf0990ec9eceb, 0x18462242701003e9, 0x65ed45fae4a25080, 0x0a2862393fda7320 } + }, + { + { -0x69f18c84913462e9, -0x050db6b72983151f, 0x37e7a9b4d55e1b89, 0x5cb7173cb46c59eb }, + { 0x46ab13c8347cbc9d, 0x3849e8d499c12383, 0x4cea314087d64ac9, 0x1f354134b1a29ee7 }, + { 0x4a89e68b82b7abf0, -0x0be326d864594847, 0x16e6c210e18d876f, 0x7cacdb0f7f1b09c6 } + }, +}, +{ + { + { -0x1efebbcb233a3513, 0x47ed5d963c84fb33, 0x70019576ed86a0e7, 0x25b2697bd267f9e4 }, + { -0x6f9d4d1f26e58744, 0x47c9889cc8509667, -0x620ab599bfaf8f48, 0x7369e6a92493a1bf }, + { -0x6298c004ec67979c, 0x3ca5fbd9415dc7b8, -0x1fb133c420d8c4a2, 0x1420683db54e4cd2 } + }, + { + { 0x34eebb6fc1cc5ad0, 0x6a1b0ce99646ac8b, -0x2c4f25b6599421ad, 0x31e83b4161d081c1 }, + { -0x4b8742e1db622e69, 0x620c35005e58c102, -0x04fd2cd0334553a4, 0x60b63bebf508a72d }, + { -0x681738ed61f9d4b1, 0x49e48f4f29320ad8, 0x5bece14b6f18683f, 0x55cf1eb62d550317 } + }, + { + { 0x3076b5e37df58c52, -0x28c54622186633ca, -0x427ce31cb6ec11e0, 0x1a56fbaa62ba0133 }, + { 0x5879101065c23d58, -0x7462f792af6b7e64, -0x1dbfd056ed3aa059, 0x669a6564570891d4 }, + { -0x6bc194afa3623614, 0x302557bba77c371a, -0x678c51a9becb89af, 0x13c4836799c58a5c } + }, + { + { -0x3b230495a2742f80, -0x21143b13a8e5b7be, -0x2b4d177c471aac9b, 0x50bdc87dc8e5b827 }, + { 0x423a5d465ab3e1b9, -0x03ec3e78380ec09f, 0x19f83664ecb5b9b6, 0x66f80c93a637b607 }, + { 0x606d37836edfe111, 0x32353e15f011abd9, 0x64b03ac325b73b96, 0x1dd56444725fd5ae } + }, + { + { -0x3d6819fff7453766, 0x7d4cea11eae1c3e0, -0x0c1c741e60186884, 0x3a3a450f63a305cd }, + { -0x705b8007cc9ded83, -0x4360953b8e3283eb, 0x6e71454349220c8b, 0x0e645912219f732e }, + { 0x078f2f31d8394627, 0x389d3183de94a510, -0x2e1c9392e8669080, 0x318c8d9393a9a87b } + }, + { + { 0x5d669e29ab1dd398, -0x036de9a7cbd261c5, 0x55851dfdf35973cd, 0x509a41c325950af6 }, + { -0x0d8ba2fcd50001e7, 0x0c9f3c497f24db66, -0x43672c1c457a6711, 0x224c7c679a1d5314 }, + { -0x423f91235906da17, 0x793ef3f4641b1f33, -0x7d13ed7f627cc177, 0x05bff02328a11389 } + }, + { + { 0x6881a0dd0dc512e4, 0x4fe70dc844a5fafe, 0x1f748e6b8f4a5240, 0x576277cdee01a3ea }, + { 0x3632137023cae00b, 0x544acf0ad1accf59, -0x698befb62de5e378, 0x780b8cc3fa2a44a7 }, + { 0x1ef38abc234f305f, -0x65a88042ebfa21f8, 0x5e82a51434e62a0d, 0x5ff418726271b7a1 } + }, + { + { -0x1a24b817ec496ac0, -0x0ca2d5c4bcd9ef1f, -0x53e0d916c787ed8a, 0x29d4db8ca0a0cb69 }, + { 0x398e080c1789db9d, -0x589fdfda0c18870b, -0x056776b3f942fca3, 0x106a03dc25a966be }, + { -0x2652f550ccccac30, 0x38669da5acd309e5, 0x3c57658ac888f7f0, 0x4ab38a51052cbefa } + }, +}, +{ + { + { -0x09701d177f621fac, -0x1c43f695637d452f, 0x076353d40aadbf45, 0x7b9b1fb5dea1959e }, + { -0x20253411bcdb3f17, 0x054442883f955bb7, -0x2108555715ce9f61, 0x68aee70642287cff }, + { -0x0fe3370e8b8e33f4, -0x6adbd1c8a86f7d45, 0x27776093d3e46b5f, 0x2d13d55a28bd85fb } + }, + { + { -0x40fe6331851185ae, -0x57212d491bab152d, 0x3c619f0b87a8bb19, 0x3619b5d7560916d8 }, + { -0x053a2df9a4ca4726, -0x572575657a9db449, -0x332d356ec2de32f1, 0x6b8341ee8bf90d58 }, + { 0x3579f26b0282c4b2, 0x64d592f24fafefae, -0x48321284d7373840, 0x6a927b6b7173a8d7 } + }, + { + { -0x728fbf79c1317715, -0x0f1cf8567f113f74, -0x53ddaf9ef2877026, 0x056d92a43a0d478d }, + { 0x1f6db24f986e4656, 0x1021c02ed1e9105b, -0x0700c000d33f5c8b, 0x1d2a6bf8c6c82592 }, + { 0x1b05a196fc3da5a1, 0x77d7a8c243b59ed0, 0x06da3d6297d17918, 0x66fbb494f12353f7 } + }, + { + { -0x2928f6690edcf62a, -0x2404dc7a163c2ac7, 0x46d602b0f7552411, 0x270a0b0557843e0c }, + { 0x751a50b9d85c0fb8, -0x2e5023da7430f685, 0x2f16a6a38309a969, 0x14ddff9ee5b00659 }, + { 0x61ff0640a7862bcc, -0x7e353f65a0ee5402, -0x6fb87cfbaa2ed545, 0x19a4bde1945ae873 } + }, + { + { 0x40c709dec076c49f, 0x657bfaf27f3e53f6, 0x40662331eca042c4, 0x14b375487eb4df04 }, + { -0x6460d90adf59dff6, 0x64804443cf13eaf8, -0x759c98c079ce122d, 0x72bbbce11ed39dc1 }, + { -0x517ac36b549923b9, -0x149dcbc12089d292, -0x0f71f1e7904d082f, 0x4f0b1c02700ab37a } + }, + { + { 0x79fd21ccc1b2e23f, 0x4ae7c281453df52a, -0x37e8d1362eaeb795, 0x68abe9443e0a7534 }, + { -0x1e8f987827e6ae06, -0x5ef5d3714d6f3885, -0x18c7d05fc129988d, 0x0a4d84710bcc4b54 }, + { -0x25ed393bf87ce235, 0x0da230d74d5c510d, 0x4ab1531e6bd404e1, 0x4106b166bcf440ef } + }, + { + { -0x5b7a332ac61b130e, 0x5aa3f3ad0555bab5, 0x145e3439937df82d, 0x1238b51e1214283f }, + { 0x02e57a421cd23668, 0x4ad9fb5d0eaef6fd, -0x6ab198d84edbbb80, 0x7f792f9d2699f331 }, + { 0x0b886b925fd4d924, 0x60906f7a3626a80d, -0x132c984b467542ee, 0x2876beb1def344cf } + }, + { + { -0x2a6b4cccc5757a08, 0x4ea37689e78d7d58, 0x73bf9f455e8e351f, 0x5507d7d2bc41ebb4 }, + { -0x237b16ca9cebb96f, 0x632fe8a0d61f23f4, 0x4caa800612a9a8d5, 0x48f9dbfa0e9918d3 }, + { 0x1ceb2903299572fc, 0x7c8ccaa29502d0ee, -0x6e405bcbee331985, 0x5784481964a831e7 } + }, +}, +{ + { + { -0x29302e10a0223f64, -0x17d4c10208a8a232, 0x25d56b5d201634c2, 0x3041c6bb04ed2b9b }, + { -0x2583d4da98972a6d, -0x673e3fa8bbdd35ed, -0x0e57f42a35f531e3, 0x29cdd1adc088a690 }, + { 0x0ff2f2f9d956e148, -0x5218688a60ca94d2, 0x1a4698bb5f6c025c, 0x104bbd6814049a7b } + }, + { + { -0x56a265a029800e9d, -0x16d41962b338a97f, -0x4807fdb321df0da9, 0x204f2a20fb072df5 }, + { 0x51f0fd3168f1ed67, 0x2c811dcdd86f3bc2, 0x44dc5c4304d2f2de, 0x5be8cc57092a7149 }, + { -0x37ebc4c2cf144f87, 0x7589155abd652e30, 0x653c3c318f6d5c31, 0x2570fb17c279161f } + }, + { + { 0x192ea9550bb8245a, -0x37190457706faf2f, 0x7986ea2d88a4c935, 0x241c5f91de018668 }, + { 0x3efa367f2cb61575, -0x0a069089e329fd94, -0x1738ebd59a4ada9e, 0x3dcb65ea53030acd }, + { 0x28d8172940de6caa, -0x7040d30fdd268cc6, 0x16d7fcdd235b01d1, 0x08420edd5fcdf0e5 } + }, + { + { 0x0358c34e04f410ce, -0x49eca4a5d891f97b, 0x5d9670c7ebb91521, 0x04d654f321db889c }, + { -0x3200df547c9d05b6, 0x57e118d4e21a3e6e, -0x1ce869e803c619d5, 0x0d9a53efbc1769fd }, + { 0x5e7dc116ddbdb5d5, 0x2954deb68da5dd2d, 0x1cb608173334a292, 0x4a7a4f2618991ad7 } + }, + { + { 0x24c3b291af372a4b, -0x6c257d8f8e7eb80e, -0x227b7a9b7976610e, 0x4a96314223e0ee33 }, + { -0x0b58e7fda04ea06b, 0x3df65f346b5c1b8f, -0x32030f7aff1feeee, 0x11b50c4cddd31848 }, + { -0x5917d8bbf75b002a, 0x738e177e9c1576d9, 0x773348b63d02b3f2, 0x4f4bce4dce6bcc51 } + }, + { + { 0x30e2616ec49d0b6f, -0x1ba98e703513dce9, 0x48eb409bf26b4fa6, 0x3042cee561595f37 }, + { -0x58e031a51ddbda7c, 0x26ea725692f58a9e, -0x2de5f628e315c30c, 0x73fcdd14b71c01e6 }, + { 0x427e7079449bac41, -0x7aa51c92431dcef6, 0x4cae76215f841a7c, 0x389e740c9a9ce1d6 } + }, + { + { -0x36428709a8f153d8, -0x1aa4f4cdd86e631f, 0x65fc3eaba19b91ed, 0x25c425e5d6263690 }, + { 0x64fcb3ae34dcb9ce, -0x68affcdc1cb72f53, 0x45b3f07d62c6381b, 0x61545379465a6788 }, + { 0x3f3e06a6f1d7de6e, 0x3ef976278e062308, -0x73eb09d9b1759389, 0x6539a08915484759 } + }, + { + { -0x223b242beb44b5e7, 0x19b2bc3c98424f8e, 0x48a89fd736ca7169, 0x0f65320ef019bd90 }, + { -0x162de08b3c2d088d, -0x3eafabbeda3b97bb, 0x624e5ce8f9b99e33, 0x11c5e4aac5cd186c }, + { -0x2b792e4e35021f3a, 0x4f3fe6e3163b5181, 0x59a8af0dfaf2939a, 0x4cabc7bdec33072a } + }, +}, +{ + { + { -0x083f5e63e5ab5fbc, 0x4a1c5e2477bd9fbb, -0x591c35eea50dd68e, 0x1819bb953f2e9e0d }, + { 0x16faa8fb532f7428, -0x242bd15fb95b1d8e, 0x5337653b8b9ea480, 0x4065947223973f03 }, + { 0x498fbb795e042e84, 0x7d0dd89a7698b714, -0x7404f45bd8019d6b, 0x36ba82e721200524 } + }, + { + { -0x372962f5a8d8b12b, 0x45ba803260804b17, -0x20c325efddaa2054, 0x77d221232709b339 }, + { -0x29f13448bdba13bf, -0x02641761cbcb78ea, -0x36dbf5011bdd7b22, 0x4472f648d0531db4 }, + { 0x498a6d7064ad94d8, -0x5a4a37026509dd9d, -0x735712faba3ebe0c, 0x2c63bec3662d358c } + }, + { + { -0x65ae74c57a790741, -0x6118e509344e6910, -0x55f9da195dc7a30e, 0x1deb2176ddd7c8d1 }, + { 0x7fe60d8bea787955, -0x4623ee814a0bfe49, -0x6e383f65e6caa332, 0x22692ef59442bedf }, + { -0x7a9c2e65df993094, 0x401bfd8c4dcc7cd7, -0x2689594132f2709e, 0x67cfd773a278b05e } + }, + { + { 0x2d5fa9855a4e586a, 0x65f8f7a449beab7e, -0x55f8b2220de2cc2d, 0x185cba721bcb9dee }, + { -0x7213ce0510c11b8b, -0x6624007561dd026e, 0x512d11594e26cab1, 0x0cde561eec4310b9 }, + { -0x6c79625c0b1c34bf, -0x40fc6d0abf086882, 0x026204fcd0463b83, 0x3ec91a769eec6eed } + }, + { + { 0x0fad2fb7b0a3402f, 0x46615ecbfb69f4a8, -0x08ba43373a07155a, 0x7a5fa8794a94e896 }, + { 0x1e9df75bf78166ad, 0x4dfda838eb0cd7af, -0x45ffd1273e150678, 0x13fedb3e11f33cfc }, + { 0x52958faa13cd67a1, -0x69a11f7e74244ae9, 0x16e58daa2e8845b3, 0x357d397d5499da8f } + }, + { + { 0x481dacb4194bfbf8, 0x4d77e3f1bae58299, 0x1ef4612e7d1372a0, 0x3a8d867e70ff69e1 }, + { 0x1ebfa05fb0bace6c, -0x36cb9df3e35065e2, -0x3388e33be27d49e6, 0x2d94a16aa5f74fec }, + { 0x6f58cd5d55aff958, -0x45c155a38aa988df, 0x75c123999165227d, 0x69be1343c2f2b35e } + }, + { + { -0x7d44425397b4721d, -0x5d0b382fc035f8e8, 0x337f92fbe096aaa8, 0x200d4d8c63587376 }, + { 0x0e091d5ee197c92a, 0x4f51019f2945119f, 0x143679b9f034e99c, 0x7d88112e4d24c696 }, + { 0x208aed4b4893b32b, 0x3efbf23ebe59b964, -0x289d214f245a1af9, 0x69607bd681bd9d94 } + }, + { + { 0x3b7f3bd49323a902, 0x7c21b5566b2c6e53, -0x1a45700ac587ad59, 0x28bc77a5838ece00 }, + { -0x0941fdef9721e31f, -0x172ae718f12343e1, -0x1c10022fe4aafa5b, 0x35f63353d3ec3fd0 }, + { 0x63ba78a8e25d8036, 0x63651e0094333490, 0x48d82f20288ce532, 0x3a31abfa36b57524 } + }, +}, +{ + { + { -0x3f708770c0872d77, -0x01cf58d35ebfb261, -0x0d887403309a3363, 0x7ee498165acb2021 }, + { 0x239e9624089c0a2e, -0x38b73b3fc501b8c8, 0x17dbed2a764fa12a, 0x639b93f0321c8582 }, + { 0x7bd508e39111a1c3, 0x2b2b90d480907489, -0x182d513d518d02e7, 0x0edf493c85b602a6 } + }, + { + { 0x6767c4d284764113, -0x5f6fbfc0080a07cb, 0x1c8fcffacae6bede, 0x04c00c54d1dfa369 }, + { -0x51337ea7a664a598, -0x15a8b0f014521df2, 0x4fe41d7422b67f07, 0x403b92e3019d4fb4 }, + { 0x4dc22f818b465cf8, 0x71a0f35a1480eff8, -0x51174052fb3829a9, 0x355bb12ab26176f4 } + }, + { + { -0x5cfe2538a5738ce8, -0x126ffc624c3155ef, 0x6f077cbf3bae3f2d, 0x7518eaf8e052ad8e }, + { -0x58e19b338b6c440c, -0x1a427b26135c4f3d, 0x0a6bc50cfa05e785, 0x0f9b8132182ec312 }, + { -0x5b77a63be48093ce, 0x0f2d60bcf4383298, 0x1815a929c9b1d1d9, 0x47c3871bbb1755c4 } + }, + { + { -0x0419a2af37af9950, 0x62ecc4b0b3a299b0, -0x1ac8ab15bbe51720, 0x08fea02ce8d48d5f }, + { 0x5144539771ec4f48, -0x07fa4e823673a292, -0x089d3ee5b83c3995, 0x00b89b85764699dc }, + { -0x7db2228997211530, -0x379bbadfb497a2dd, -0x4aeb3032a276299b, 0x473829a74f75d537 } + }, + { + { 0x23d9533aad3902c9, 0x64c2ddceef03588f, 0x15257390cfe12fb4, 0x6c668b4d44e4d390 }, + { -0x7d2d258ab9863be8, -0x19c428274d9e7210, 0x355eef24ac47eb0a, 0x2078684c4833c6b4 }, + { 0x3b48cf217a78820c, -0x0895f54d7ed8c169, -0x56939a5873711285, 0x7411a6054f8a433f } + }, + { + { 0x579ae53d18b175b4, 0x68713159f392a102, -0x7baa1345e110ca0b, 0x1ec9a872458c398f }, + { 0x4d659d32b99dc86d, 0x044cdc75603af115, -0x4cb38ed3233d1b78, 0x7c136574fb8134ff }, + { -0x47195b2bff5daf65, -0x647e28fdf4377d4c, 0x57e7cc9bf1957561, 0x3add88a5c7cd6460 } + }, + { + { -0x7a3d672ba6c6cfba, -0x7081ca67a009a614, 0x1d2ca22af2f66e3a, 0x61ba1131a406a720 }, + { -0x5476a88f49ca230e, 0x02dfef6cf66c1fbc, -0x7aacfd9741492e79, 0x249929fccc879e74 }, + { -0x5c2f5f0ee96a6fd7, 0x023b6b6cba7ebd89, 0x7bf15a3e26783307, 0x5620310cbbd8ece7 } + }, + { + { 0x6646b5f477e285d6, 0x40e8ff676c8f6193, -0x59138cee544a6b23, 0x7ec846f3658cec4d }, + { 0x528993434934d643, -0x462407f95aeddd0b, -0x709278703c0be3de, 0x37676a2a4d9d9730 }, + { -0x64a170c0e25dd139, 0x130f1d776c01cd13, 0x214c8fcfa2989fb8, 0x6daaf723399b9dd5 } + }, +}, +{ + { + { -0x7e514422d32ecf90, -0x69d1bcda07a5f162, -0x216c6e5535200135, 0x53177fda52c230e6 }, + { 0x591e4a5610628564, 0x2a4bb87ca8b4df34, -0x21d5da8d185c71bd, 0x3cbdabd9fee5046e }, + { -0x584368f9af462187, 0x3d12a7fbc301b59b, 0x02652e68d36ae38c, 0x79d739835a6199dc } + }, + { + { 0x21c9d9920d591737, -0x6415be2d164b932a, -0x1df17bdff2764036, 0x79d99f946eae5ff8 }, + { -0x26cab209bece3e43, 0x758094a186ec5822, 0x4464ee12e459f3c2, 0x6c11fce4cb133282 }, + { -0x0e84b7ca9798cdfb, 0x387deae83caad96c, 0x61b471fd56ffe386, 0x31741195b745a599 } + }, + { + { 0x17f8ba683b02a047, 0x50212096feefb6c8, 0x70139be21556cbe2, 0x203e44a11d98915b }, + { -0x172efe6f4885c9f5, -0x66467cdf666a18fe, -0x42b0200705fdb856, 0x2772e344e0d36a87 }, + { -0x2979c145c8461c61, 0x105bc169723b5a23, 0x104f6459a65c0762, 0x567951295b4d38d4 } + }, + { + { 0x07242eb30d4b497f, 0x1ef96306b9bccc87, 0x37950934d8116f45, 0x05468d6201405b04 }, + { 0x535fd60613037524, -0x1def52094f043d96, -0x5372f564dc166f52, 0x47204d08d72fdbf9 }, + { 0x00f565a9f93267de, -0x313028723f2a7176, -0x5dea1d230ce71d72, 0x4599ee919b633352 } + }, + { + { -0x538b929479e51a87, 0x31ab0650f6aea9dc, 0x241d661140256d4c, 0x2f485e853d21a5de }, + { -0x2c3ddf358f1f1895, -0x4ed415a71560cf6c, 0x294ddec8c3271282, 0x0c3539e1a1d1d028 }, + { 0x329744839c0833f3, 0x6fe6257fd2abc484, 0x5327d1814b358817, 0x65712585893fe9bc } + }, + { + { -0x7e3d60e428f711c1, -0x2234a5fa519bf830, -0x68513e282d5c1459, 0x1590521a91d50831 }, + { -0x63efd048cd59ee9f, -0x1b71ef22cb2adf58, 0x365c63546f9a9176, 0x32f6fe4c046f6006 }, + { 0x40a3a11ec7910acc, -0x6fec20070e92d852, 0x1a9720d8abb195d4, 0x1bb9fe452ea98463 } + }, + { + { -0x30a1936a33c98b84, 0x294201536b0bc30d, 0x453ac67cee797af0, 0x5eae6ab32a8bb3c9 }, + { -0x162e26af4c2ab062, 0x2d5f9cbee00d33c1, 0x51c2c656a04fc6ac, 0x65c091ee3c1cbcc9 }, + { 0x7083661114f118ea, 0x2b37b87b94349cad, 0x7273f51cb4e99f40, 0x78a2a95823d75698 } + }, + { + { -0x4b0dc3bda107cdf9, -0x54076b2c3656cb4b, -0x2f8f73ecc6027809, 0x1876789117166130 }, + { -0x5d4f8d16a373d532, 0x69cffc96651e9c4b, 0x44328ef842e7b42b, 0x5dd996c122aadeb3 }, + { -0x6da4a10f98f3af84, -0x7e6437bd46c3cc41, 0x10792e9a70dd003f, 0x59ad4b7a6e28dc74 } + }, +}, +{ + { + { 0x583b04bfacad8ea2, 0x29b743e8148be884, 0x2b1e583b0810c5db, 0x2b5449e58eb3bbaa }, + { 0x5f3a7562eb3dbe47, -0x0815c7ab71425f48, 0x00c3e53145747299, 0x1304e9e71627d551 }, + { 0x789814d26adc9cfe, 0x3c1bab3f8b48dd0b, -0x25f01e00068639f6, 0x4468de2d7c2dd693 } + }, + { + { 0x4b9ad8c6f86307ce, 0x21113531435d0c28, -0x2b57993a9a8588d4, 0x5da6427e63247352 }, + { 0x51bb355e9419469e, 0x33e6dc4c23ddc754, -0x6c5a4929bb80669e, 0x6cce7c6ffb44bd63 }, + { 0x1a94c688deac22ca, -0x46f991084451e008, -0x775273c772a6a7f1, 0x58f29abfe79f2ca8 } + }, + { + { 0x4b5a64bf710ecdf6, -0x4eb31ac7b9d3d6c4, 0x3643d056d50b3ab9, 0x6af93724185b4870 }, + { -0x16f130547218c198, 0x54036f9f377e76a5, -0x0fb6a4f441fea67e, 0x577629c4a7f41e36 }, + { 0x3220024509c6a888, -0x2d1fc9ecb4aa768d, -0x7c1dc9dcc3ccd761, 0x701f25bb0caec18f } + }, + { + { -0x62e7092683413eed, -0x7bb5f9198b40241c, 0x20f5b522ac4e60d6, 0x720a5bc050955e51 }, + { -0x3c574f071b9e9313, -0x08ff99f161da5783, 0x61e3061ff4bca59c, 0x2e0c92bfbdc40be9 }, + { 0x0c3f09439b805a35, -0x17b174c89dbd5404, 0x691417f35c229346, 0x0e9b9cbb144ef0ec } + }, + { + { -0x7211642aa24e4112, -0x363c54c8f58dc047, 0x44a8f1bf1c68d791, 0x366d44191cfd3cde }, + { -0x04452b7004a8df53, -0x117e6e942406f2f2, -0x2b7ecead9caabc41, 0x221104eb3f337bd8 }, + { -0x61c3e8bc0d4373ec, 0x2eda26fcb5856c3b, -0x3347d0f197580469, 0x4167a4e6bc593244 } + }, + { + { -0x3d41d99a07317012, -0x169800eb177f29d4, -0x0ed19181d0c9b112, 0x34b33370cb7ed2f6 }, + { 0x643b9d2876f62700, 0x5d1d9d400e7668eb, 0x1b4b430321fc0684, 0x7938bb7e2255246a }, + { -0x323a6e11797e2934, -0x31fdef63127a58ad, -0x128b7a3ea77f777d, 0x1176fc6e2dfe65e4 } + }, + { + { -0x246f1d76b688f148, -0x670433d5530bbf5d, 0x21354ffeded7879b, 0x1f6a3e54f26906b6 }, + { -0x4b50932fa4639e65, 0x2ddfc9f4b2a58480, 0x3d4fa502ebe94dc4, 0x08fc3a4c677d5f34 }, + { 0x60a4c199d30734ea, 0x40c085b631165cd6, -0x1dccc1dc08a67d6b, 0x4f2fad0116b900d1 } + }, + { + { -0x69d326e248c449c8, -0x19fa885503ed63f8, 0x6f619b39f3b61689, 0x3451995f2944ee81 }, + { 0x44beb24194ae4e54, 0x5f541c511857ef6c, -0x59e194d2c972fb68, 0x445484a4972ef7ab }, + { -0x6ead032f60158284, 0x4a816c94b0935cf6, 0x258e9aaa47285c40, 0x10b89ca6042893b7 } + }, +}, +{ + { + { -0x29832129862cb560, -0x33f4613f33b24c61, -0x5aca5ba91ca2e6f1, 0x2e05d9eaf61f6fef }, + { -0x64d5bd91c49b9fdb, 0x32127190385ce4cf, -0x5da3003d229215bb, 0x06409010bea8de75 }, + { -0x3bb86fe529e414a7, 0x661f19bce5dc880a, 0x24685482b7ca6827, 0x293c778cefe07f26 } + }, + { + { 0x16c795d6a11ff200, -0x348f2f1d4ea7ea37, -0x760d6cdf64ac6a4b, 0x50b8c2d031e47b4f }, + { -0x797f618ff8f96f6a, -0x5528a4ea1b1afe77, 0x07f35715a21a0147, 0x0487f3f112815d5e }, + { 0x48350c08068a4962, 0x6ffdd05351092c9a, 0x17af4f4aaf6fc8dd, 0x4b0553b53cdba58b } + }, + { + { -0x40fadee4d83ead2c, 0x5ec26849bd1af639, 0x5e0b2caa8e6fab98, 0x054c8bdd50bd0840 }, + { -0x639a0341e4cd0087, -0x148a1560fc4af065, -0x0312d59393f819fa, 0x35106cd551717908 }, + { 0x38a0b12f1dcf073d, 0x4b60a8a3b7f6a276, -0x012a53da2cbfb066, 0x72e82d5e5505c229 } + }, + { + { 0x00d9cdfd69771d02, 0x410276cd6cfbf17e, 0x4c45306c1cb12ec7, 0x2857bf1627500861 }, + { 0x6b0b697ff0d844c8, -0x44ed07a3268634b7, -0x2d5abe393e25f0e1, 0x7b7c242958ce7211 }, + { -0x60de6fc0fefe9762, -0x2886202c4079effb, -0x5edd11a0c214f0e5, 0x510df84b485a00d4 } + }, + { + { 0x24b3c887c70ac15e, -0x4f0c5aa8047e48ce, -0x64d321d01a8733e5, 0x4cf7ed0703b54f8e }, + { -0x5abecc446d885e06, 0x74ec3b6263991237, 0x1a3c54dc35d2f15a, 0x2d347144e482ba3a }, + { 0x6bd47c6598fbee0f, -0x61b8cc1d54aa41d3, 0x1093f624127610c5, 0x4e05e26ad0a1eaa4 } + }, + { + { 0x1833c773e18fe6c0, -0x1c3b8ee52c378d9b, 0x3bfd3c4f0116b283, 0x1955875eb4cd4db8 }, + { -0x2564949db4ace0e0, 0x429a760e77509abb, -0x24160add17dc3480, 0x618f1856880c8f82 }, + { 0x6da6de8f0e399799, 0x7ad61aa440fda178, -0x4cd327efa1ca9c23, 0x15f6beae2ae340ae } + }, + { + { -0x4565f0846dba1deb, -0x0c979ed22673f245, 0x2e84e4cbf220b020, 0x6ba92fe962d90eda }, + { -0x79d434f3ce13c59e, -0x7ef1d4baeec70c3e, 0x788ec4b839dac2a4, 0x28f76867ae2a9281 }, + { 0x3e4df9655884e2aa, -0x429d0424242b9a5b, -0x28a69355f2161adc, 0x6e8042ccb2b1b3d7 } + }, + { + { 0x1530653616521f7e, 0x660d06b896203dba, 0x2d3989bc545f0879, 0x4b5303af78ebd7b0 }, + { -0x0ef2c3d631d73592, -0x452cbabf0349f6c3, -0x18bd91285d15d2c1, 0x08af9d4e4ff298b9 }, + { 0x72f8a6c3bebcbde8, 0x4f0fca4adc3a8e89, 0x6fa9d4e8c7bfdf7a, 0x0dcf2d679b624eb7 } + }, +}, +{ + { + { 0x753941be5a45f06e, -0x2f8351129263a09b, 0x11776b9c72ff51b6, 0x17d2d1d9ef0d4da9 }, + { 0x3d5947499718289c, 0x12ebf8c524533f26, 0x0262bfcb14c3ef15, 0x20b878d577b7518e }, + { 0x27f2af18073f3e6a, -0x02c01ae628adef97, 0x22e3b72c3ca60022, 0x72214f63cc65c6a7 } + }, + { + { 0x1d9db7b9f43b29c9, -0x29fa7db5b0ae708b, -0x0d3f8d42ced0623c, 0x1f24ac855a1545b0 }, + { -0x4b1c80bfacf8596d, -0x5458eb28d0cc986b, -0x29042f588c89ef67, 0x5fdf48c58171cbc9 }, + { 0x24d608328e9505aa, 0x4748c1d10c1420ee, -0x38001ba3f904da5e, 0x00ba739e2ae395e6 } + }, + { + { -0x51bbd90a157744da, 0x360679d984973bfb, 0x5c9f030c26694e50, 0x72297de7d518d226 }, + { 0x592e98de5c8790d6, -0x1a40482cba3d5d21, 0x115a3b60f9b49922, 0x03283a3e67ad78f3 }, + { 0x48241dc7be0cb939, 0x32f19b4d8b633080, -0x2c2036f2fdd76cf8, 0x05e1296846271945 } + }, + { + { -0x52404437dbd3bab0, -0x4337f3132fcf7e27, -0x7bca99590a37206e, 0x78cf25d38258ce4c }, + { -0x457d114cd263b6a6, -0x311037030ed44684, -0x4fd254516c4a2e20, 0x39c00c9c13698d9b }, + { 0x15ae6b8e31489d68, -0x557ae35463d40f79, -0x3658a5680fb105fb, 0x006b52076b3ff832 } + }, + { + { -0x0a3481e94631f7d3, 0x3407f14c417abc29, -0x2b4c9431d40b5855, 0x7de2e9561a9f75ce }, + { 0x29e0cfe19d95781c, -0x497e20e7699cef1e, 0x57df39d370516b39, 0x4d57e3443bc76122 }, + { -0x218f2b0b495aa135, 0x4801527f5d85db99, -0x24363bbf2c11657f, 0x6b2a90af1a6029ed } + }, + { + { 0x77ebf3245bb2d80a, -0x27cfe4b8d046f865, -0x39b8190db3118ccd, 0x465812c8276c2109 }, + { 0x6923f4fc9ae61e97, 0x5735281de03f5fd1, -0x589b51bc19122ed3, 0x5fd8f4e9d12d3e4a }, + { 0x4d43beb22a1062d9, 0x7065fb753831dc16, 0x180d4a7bde2968d7, 0x05b32c2b1cb16790 } + }, + { + { -0x08035bd3852a7e6b, 0x3214286e4333f3cc, -0x493d62f2cbf46863, 0x31771a48567307e1 }, + { -0x373fa1332db25703, -0x5e30e553fa20107d, -0x2441100d8206329f, 0x3b5556a37b471e99 }, + { 0x32b0c524e14dd482, -0x124caeabe5d45b4a, -0x5c2e9fb7d7d4a50d, 0x4fc079d27a7336eb } + }, + { + { -0x23cb74bbf3793af3, 0x1337cbc9cc94e651, 0x6422f74d643e3cb9, 0x241170c2bae3cd08 }, + { 0x51c938b089bf2f7f, 0x2497bd6502dfe9a7, -0x00003f63877f1bad, 0x124567cecaf98e92 }, + { 0x3ff9ab860ac473b4, -0x0f6ee211feec1bcb, 0x4ae75060ebc6c4af, 0x3f8612966c87000d } + }, +}, +{ + { + { 0x529fdffe638c7bf3, -0x20d4619fc774b66b, -0x1fd84cb0e452fdb7, 0x7bc92fc9b9fa74ed }, + { 0x0c9c5303f7957be4, -0x5c3ce5df1f7a3ebb, -0x4f8de28e2f7affb0, 0x0aba390eab0bf2da }, + { -0x606810d17fe52607, -0x7c9682ab865025c6, -0x16f94c0042a694b0, 0x02672b37dd3fb8e0 } + }, + { + { -0x116458d6c673580b, -0x146359da85b7b625, 0x29eb29ce7ec544e1, 0x232ca21ef736e2c8 }, + { 0x48b2ca8b260885e4, -0x5bd794137d4cb3e4, -0x6c81e5d9e80a708c, 0x741d1fcbab2ca2a5 }, + { -0x409ebdc2dac034e9, 0x08803ceafa39eb14, -0x0e79fd2067ae3851, 0x0400f3a049e3414b } + }, + { + { 0x2efba412a06e7b06, 0x146785452c8d2560, -0x2068ec1429856e39, 0x32830ac7157eadf3 }, + { -0x5431fb89459e3aa5, 0x36a3d6d7c4d39716, 0x6eb259d5e8d82d09, 0x0c9176e984d756fb }, + { 0x0e782a7ab73769e8, 0x04a05d7875b18e2c, 0x29525226ebcceae1, 0x0d794f8383eba820 } + }, + { + { 0x7be44ce7a7a2e1ac, 0x411fd93efad1b8b7, 0x1734a1d70d5f7c9b, 0x0d6592233127db16 }, + { -0x00ca0a3461eae90c, -0x117fa4309b7551bb, -0x0f28c3d446c5610d, 0x097b0bf22092a6c2 }, + { -0x3b7454eade5628cd, -0x593d151529e544db, 0x625c6c1cc6cb4305, 0x7fc90fea93eb3a67 } + }, + { + { -0x3ad8214a63834dc3, -0x6aac6e96acd7bfb2, -0x29bc6d7e8330d386, 0x6ce97dabf7d8fa11 }, + { 0x0408f1fe1f5c5926, 0x1a8f2f5e3b258bf4, 0x40a951a2fdc71669, 0x6598ee93c98b577e }, + { 0x25b5a8e50ef7c48f, -0x149fcbee90d31ace, -0x3a18ae8c1ac21ac9, 0x73119fa08c12bb03 } + }, + { + { 0x7845b94d21f4774d, -0x409d0e93876848d9, 0x671857c03c56522b, 0x3cd6a85295621212 }, + { -0x12cfed6bac0e5b35, -0x4319de36370ac879, -0x0534d4ecc7411847, 0x3025798a9ea8428c }, + { 0x3fecde923aeca999, -0x4255a4ff9d173ed1, 0x67b99dfc96988ade, 0x3f52c02852661036 } + }, + { + { -0x6da74066113be93a, -0x5375afe8562d098f, 0x629549ab16dea4ab, 0x05d0e85c99091569 }, + { -0x00155b71d5ecae3a, 0x28624754fa7f53d7, 0x0b5ba9e57582ddf1, 0x60c0104ba696ac59 }, + { 0x051de020de9cbe97, -0x05f803a94af4308c, 0x378cec9f0f11df65, 0x36853c69ab96de4d } + }, + { + { 0x4433c0b0fac5e7be, 0x724bae854c08dcbe, -0x0e0db33bb9687065, 0x4a0aff6d62825fc8 }, + { 0x36d9b8de78f39b2d, 0x7f42ed71a847b9ec, 0x241cd1d679bd3fde, 0x6a704fec92fbce6b }, + { -0x16e804619ef6acff, -0x3efd206bfd5f6d08, -0x40f61d0a0599e6f5, 0x681109bee0dcfe37 } + }, +}, +{ + { + { -0x63e70305c9fb72ed, 0x29159db373899ddd, -0x2360caf4606d2f56, 0x26f57eee878a19d4 }, + { 0x559a0cc9782a0dde, 0x551dcdb2ea718385, 0x7f62865b31ef238c, 0x504aa7767973613d }, + { 0x0cab2cd55687efb1, 0x5180d162247af17b, -0x7a3ea5cbb0a5db99, 0x4041943d9dba3069 } + }, + { + { 0x4b217743a26caadd, 0x47a6b424648ab7ce, -0x34e2b085fc04361d, 0x12d931429800d019 }, + { -0x3c3f1145bc14336a, -0x728b6363d9156351, -0x26056a11e388333a, 0x1420a1d97684340f }, + { 0x00c67799d337594f, 0x5e3c5140b23aa47b, 0x44182854e35ff395, 0x1b4f92314359a012 } + }, + { + { 0x33cf3030a49866b1, 0x251f73d2215f4859, -0x547d55bfae210b0a, 0x5ff191d56f9a23f6 }, + { 0x3e5c109d89150951, 0x39cefa912de9696a, 0x20eae43f975f3020, 0x239b572a7f132dae }, + { -0x7e612bcc53d26f98, 0x2883ab795fc98523, -0x10ba8d7faa6c14c3, 0x020c526a758f36cb } + }, + { + { -0x16ce10a60fbd3377, 0x2c589c9d8e124bb6, -0x52371e755138a669, 0x452cfe0a5602c50c }, + { 0x779834f89ed8dbbc, -0x370d550623835b94, -0x56adb3235c1e4f8c, 0x02aacc4615313877 }, + { -0x795f085f9b878821, -0x443b9bd8f19f8361, -0x54e815da0e04ee37, 0x4cfb7d7b304b877b } + }, + { + { -0x1d79663d687610ee, 0x2b6ecd71df57190d, -0x3cbc37a813368f30, 0x5b1d4cbc434d3ac5 }, + { 0x72b43d6cb89b75fe, 0x54c694d99c6adc80, -0x473c55c8c11cb361, 0x14b4622b39075364 }, + { -0x4904d9ea33f560da, 0x3a4f0e2bb88dcce5, 0x1301498b3369a705, 0x2f98f71258592dd1 } + }, + { + { 0x2e12ae444f54a701, -0x0301c10f56342822, -0x314076f28a7ca220, 0x1d8062e9e7614554 }, + { 0x0c94a74cb50f9e56, 0x5b1ff4a98e8e1320, -0x65d533de7dcff099, 0x3a6ae249d806aaf9 }, + { 0x657ada85a9907c5a, 0x1a0ea8b591b90f62, -0x72f1e20420cb4b17, 0x298b8ce8aef25ff3 } + }, + { + { -0x7c858d15f5de9a22, 0x3fab07b40bcf79f6, 0x521636c77738ae70, 0x6ba6271803a7d7dc }, + { 0x2a927953eff70cb2, 0x4b89c92a79157076, -0x6be7ba85cf583096, 0x34b8a8404d5ce485 }, + { -0x3d91134a7c96cccb, -0x2a57ec209c4a0103, -0x5d6c55655b4dda8d, 0x71d62bdd465e1c6a } + }, + { + { -0x32d24a254e08a10b, -0x28806a30e94f9a0b, 0x14571fea3f49f085, 0x1c333621262b2b3d }, + { 0x6533cc28d378df80, -0x0924bc86f5f05b4c, -0x1c9ba00608fe25a6, 0x74d5f317f3172ba4 }, + { -0x57901aab9826357f, 0x398b7c752b298c37, -0x2592f76d1c539dc5, 0x4aebcc4547e9d98c } + }, +}, +{ + { + { 0x0de9b204a059a445, -0x1ea34b55b4e852f1, -0x1e4413ade0863aa9, 0x2633f1b9d071081b }, + { 0x53175a7205d21a77, -0x4f3fbbdd2c46cb2c, -0x52260db422a21524, 0x074f46e69f10ff8c }, + { -0x3e04be88fe7466f0, -0x5915df2393f01ec0, -0x299e0c18bcab3901, 0x5ecb72e6f1a3407a } + }, + { + { -0x01151ef917179669, -0x679ccc80672f6c7d, -0x6b8fb7f155f91411, 0x038b6898d4c5c2d0 }, + { -0x5aea5ce4dda604b2, 0x0960f3972bcac52f, -0x124ad01372cbab35, 0x382e2720c476c019 }, + { -0x0c6e3ae27531af5a, 0x3142d0b9ae2d2948, -0x24b2a5e580db3580, 0x21aeba8b59250ea8 } + }, + { + { 0x53853600f0087f23, 0x4c461879da7d5784, 0x6af303deb41f6860, 0x0a3c16c5c27c18ed }, + { 0x24f13b34cf405530, 0x3c44ea4a43088af7, 0x5dd5c5170006a482, 0x118eb8f8890b086d }, + { 0x17e49c17cc947f3d, -0x33391259553e2d85, -0x209f6d314f0f71aa, 0x4909b3e22c67c36b } + }, + { + { 0x59a16676706ff64e, 0x10b953dd0d86a53d, 0x5848e1e6ce5c0b96, 0x2d8b78e712780c68 }, + { -0x63637a159c01d177, -0x41e4506ef16bed14, -0x7084557579040185, 0x0fb17f9fef968b6c }, + { 0x79d5c62eafc3902b, 0x773a215289e80728, -0x3c7519bf1efedf47, 0x09ae23717b2b1a6d } + }, + { + { 0x10ab8fa1ad32b1d0, -0x165312e41d8874dc, -0x577a943fc8c216f1, 0x66f35ddddda53996 }, + { -0x4495e6d5b1b2f7c4, 0x34ace0630029e192, -0x67dba5a655054515, 0x6d9c8a9ada97faac }, + { -0x2d826504db668cdd, 0x1bb7e07ef6f01d2e, 0x2ba7472df52ecc7f, 0x03019b4f646f9dc8 } + }, + { + { -0x50f64deb194c2395, 0x3f7573b5ad7d2f65, -0x2fe62677eff5dc50, 0x392b63a58b5c35f7 }, + { 0x04a186b5565345cd, -0x111899ef433bee96, 0x689c73b478fb2a45, 0x387dcbff65697512 }, + { 0x4093addc9c07c205, -0x3a9a41ea0acd3c82, 0x63dbecfd1583402a, 0x61722b4aef2e032e } + }, + { + { -0x294f85aa7e34f1c4, 0x290ff006d9444969, 0x08680b6a16dcda1f, 0x5568d2b75a06de59 }, + { 0x0012aafeecbd47af, 0x55a266fb1cd46309, -0x0dfc1497f69838d4, 0x39633944ca3c1429 }, + { -0x72f34773e4c8301f, 0x05b6a5a3053818f3, -0x0d1643fb487826a7, 0x6beba1249add7f64 } + }, + { + { 0x5c3cecb943f5a53b, -0x633659e2f93f720e, -0x30459c657a76abb9, 0x5a845ae80df09fd5 }, + { 0x1d06005ca5b1b143, 0x6d4c6bb87fd1cda2, 0x6ef5967653fcffe7, 0x097c29e8c1ce1ea5 }, + { 0x4ce97dbe5deb94ca, 0x38d0a4388c709c48, -0x3bc1312b5e962f69, 0x0a1249fff7e587c3 } + }, +}, +{ + { + { 0x0b408d9e7354b610, -0x7f94cdaca457a492, -0x2419c5fcb5a75df9, 0x173bd9ddc9a1df2c }, + { 0x12f0071b276d01c9, -0x1847453a793b7390, 0x5308129b71d6fba9, 0x5d88fbf95a3db792 }, + { 0x2b500f1efe5872df, 0x58d6582ed43918c1, -0x1912d8713698c520, 0x06e1cd13b19ea319 } + }, + { + { 0x472baf629e5b0353, 0x3baa0b90278d0447, 0x0c785f469643bf27, 0x7f3a6a1a8d837b13 }, + { 0x40d0ad516f166f23, 0x118e32931fab6abe, 0x3fe35e14a04d088e, 0x3080603526e16266 }, + { -0x0819bbc6a2c27ff5, -0x6a572aaa36fe120a, 0x68cd7830592c6339, 0x30d0fded2e51307e } + }, + { + { -0x634b68e19747b8b0, -0x5f6a8dd6999b4431, 0x5c8de72672fa412b, 0x4615084351c589d9 }, + { -0x1fa6b2e50dedcc4d, 0x1bdbe78ef0cc4d9c, 0x6965187f8f499a77, 0x0a9214202c099868 }, + { -0x436fe63f51465fd2, 0x55c7110d16034cae, 0x0e6df501659932ec, 0x3bca0d2895ca5dfe } + }, + { + { -0x639771496133fe41, -0x0f437c5259bb7691, -0x35d26aa0a085601e, 0x4ea8b4038df28241 }, + { 0x40f031bc3c5d62a4, 0x19fc8b3ecff07a60, -0x67e7c25decf04abb, 0x5631deddae8f13cd }, + { 0x2aed460af1cad202, 0x46305305a48cee83, -0x6ede88bab60ee5a1, 0x24ce0930542ca463 } + }, + { + { 0x3fcfa155fdf30b85, -0x2d08e971c9c8d15c, -0x4d1f9b219b6d07bc, 0x549928a7324f4280 }, + { 0x1fe890f5fd06c106, -0x4a3b97caa277ef0e, -0x7d87f701917350c2, 0x41d4e3c28a06d74b }, + { -0x0d91cd589c11e5d2, -0x516e1b482da00216, -0x43c42cc42e80b297, 0x491b66dec0dcff6a } + }, + { + { 0x75f04a8ed0da64a1, -0x12ddd350981dd7b5, -0x7dcb5c86e084845c, 0x4cf6b8b0b7018b67 }, + { -0x670a4ec23815cd59, -0x1c2a073381e92468, -0x53f540ad340726b9, 0x08f338d0c85ee4ac }, + { -0x3c7c57de66e58c43, -0x54d843fe20cdf386, -0x3ec2cce47b888f9d, 0x530d4a82eb078a99 } + }, + { + { 0x6d6973456c9abf9e, 0x257fb2fc4900a880, 0x2bacf412c8cfb850, 0x0db3e7e00cbfbd5b }, + { 0x004c3630e1f94825, 0x7e2d78268cab535a, -0x38b7dcdc337b0075, 0x65ea753f101770b9 }, + { 0x3d66fc3ee2096363, -0x7e29d3809e4a3495, 0x0fbe044213443b1a, 0x02a4ec1921e1a1db } + }, + { + { -0x0a379e9d0e3086a1, 0x118c861926ee57f2, 0x172124851c063578, 0x36d12b5dec067fcf }, + { 0x5ce6259a3b24b8a2, -0x47a88533ba505f48, -0x33341917745f8fc9, 0x3d143c51127809bf }, + { 0x126d279179154557, -0x2a1b70a30387c5f6, 0x36bdb6e8df179bac, 0x2ef517885ba82859 } + }, +}, +{ + { + { 0x1ea436837c6da1e9, -0x063e7650e0464242, 0x303001fcce5dd155, 0x28a7c99ebc57be52 }, + { -0x7742bc732ee1f2b6, 0x30cb610d43ccf308, -0x1f65f1c86e6c8434, 0x4559135b25b1720c }, + { -0x47026c66172e6163, -0x6f7e6e3469dbdc01, -0x4d46b728b838bd5d, 0x37f33226d7fb44c4 } + }, + { + { 0x33912553c821b11d, 0x66ed42c241e301df, 0x066fcc11104222fd, 0x307a3b41c192168f }, + { 0x0dae8767b55f6e08, 0x4a43b3b35b203a02, -0x1c8da5917f507387, 0x0f7a7fd1705fa7a3 }, + { -0x7114a2f8914aa320, 0x2fc536bfaa0d925a, -0x417e7cf023493918, 0x556c7045827baf52 } + }, + { + { -0x46b46ffdd40bbbfa, -0x542bdc81006f4acc, 0x7600a960faf86d3a, 0x2f45abdac2322ee3 }, + { -0x71d4ae8cfd162749, -0x1c1add96db78eb18, -0x42b04288b3569f4b, 0x6f4b4199c5ecada9 }, + { 0x61af4912c8ef8a6a, -0x1a705b01bc0491a2, -0x4a5033a2902bd831, 0x6a5393281e1e11eb } + }, + { + { 0x0fff04fe149443cf, 0x53cac6d9865cddd7, 0x31385b03531ed1b7, 0x5846a27cacd1039d }, + { -0x0c25aec65a2e1177, -0x7ebaba83006c9678, 0x3f622fed00e188c4, 0x0f513815db8b5a3d }, + { 0x4ff5cdac1eb08717, 0x67e8b29590f2e9bc, 0x44093b5e237afa99, 0x0d414bed8708b8b2 } + }, + { + { -0x7e77956dd6b53618, 0x23162b45d55547be, -0x6b3043bbfc8ea67d, 0x50eb8fdb134bc401 }, + { -0x30497d9a02f18a0a, -0x1ba4c1d7446f18f9, 0x7242a8de9ff92c7a, 0x685b3201933202dd }, + { -0x3f48c139294ccf33, -0x7b1bb7f8ecd0500f, 0x732b7352c4a5dee1, 0x5d7c7cf1aa7cd2d2 } + }, + { + { 0x33d1013e9b73a562, -0x6da310a8b713d91f, -0x580319eb22b97fa8, 0x78b0fad41e9aa438 }, + { -0x50c4b94085b5505e, -0x4878fa13b2bf2bef, 0x114f0c6aca7c15e3, 0x3f364faaa9489d4d }, + { -0x40a95bce12fa4b78, -0x5acc199363b6a382, -0x179ad450780c9ae6, 0x0241800059d66c33 } + }, + { + { 0x28350c7dcf38ea01, 0x7c6cdbc0b2917ab6, -0x531830417a8f7d09, 0x4d2845aba2d9a1e0 }, + { -0x314f88015c85a41c, -0x249bd0fd1a5a1149, -0x3d192f3ab8ed8f48, 0x4771b65538e4529c }, + { -0x44ac801fbb8f8f22, -0x3458bbbc922aa821, -0x2c4a5cb8c9ff2435, 0x4aeabbe6f9ffd7f8 } + }, + { + { 0x6a2134bcc4a9c8f2, -0x040702e37531d1c9, 0x000ae3049911a0ba, 0x046e3a616bc89b9e }, + { 0x4630119e40d8f78c, -0x5fe5643ac38ef1ef, 0x486d2b258910dd79, 0x1e6c47b3db0324e5 }, + { 0x14e65442f03906be, 0x4a019d54e362be2a, 0x68ccdfec8dc230c7, 0x7cfb7e3faf6b861c } + }, +}, +{ + { + { -0x69114004cfa4d0af, -0x2c06c752776a6948, -0x0f0ad238b92a22db, 0x57968290bb3a0095 }, + { 0x4637974e8c58aedc, -0x4610dd04540fbe5c, -0x1e7a26a9167f8e76, 0x2f1b78fab143a8a6 }, + { -0x08e547bcf5df1eff, -0x0c6c9a72db0f13b9, -0x308af657911d112f, 0x7dc43e35dc2aa3e1 } + }, + { + { 0x5a782a5c273e9718, 0x3576c6995e4efd94, 0x0f2ed8051f237d3e, 0x044fb81d82d50a99 }, + { -0x7a69999a7782263d, -0x36f064ceb44facab, -0x391f720710df864f, 0x7ef72016758cc12f }, + { -0x3e20e73a56f81c27, 0x57b3371dce4c6359, -0x358fbacb4dfe44b7, 0x7f79823f9c30dd2e } + }, + { + { 0x6a9c1ff068f587ba, 0x0827894e0050c8de, 0x3cbf99557ded5be7, 0x64a9b0431c06d6f0 }, + { -0x7ccb2dc65c4aec18, -0x3ec98f2b46e05728, 0x12b54136f590bd33, 0x0a4e0373d784d9b4 }, + { 0x2eb3d6a15b7d2919, -0x4f4b095f2ac57dcb, 0x7156ce4389a45d47, 0x071a7d0ace18346c } + }, + { + { -0x33f3caaddf1ebbcf, 0x0d65950709b15141, -0x650a9de4df62a0ca, 0x7c69bcf7617755d3 }, + { -0x2cf8d255377845f5, 0x01262905bfa562ee, -0x30abcffd3f108975, 0x2c3bcc7146ea7e9c }, + { 0x07f0d7eb04e8295f, 0x10db18252f50f37d, -0x16ae565ce8e86729, 0x6f5a9a7322aca51d } + }, + { + { -0x18d62b145c26bb42, -0x7261f6bf7f875062, 0x4525567a47869c03, 0x02ab9680ee8d3b24 }, + { -0x745efff3d0be393b, -0x3b60863ef3010465, 0x4efa47703cc51c9f, 0x494e21a2e147afca }, + { -0x105b757a221af266, 0x219a224e0fb9a249, -0x05f6e0e226e10927, 0x6b5d76cbea46bb34 } + }, + { + { -0x1f06bee8e187dade, -0x0e19518bfc96c92d, 0x408b3ea2d0fcc746, 0x16fb869c03dd313e }, + { -0x77a8aa9313f3266c, 0x6472dc6f5cd01dba, -0x50fe96eb70bd4b89, 0x0ae333f685277354 }, + { 0x288e199733b60962, 0x24fc72b4d8abe133, 0x4811f7ed0991d03e, 0x3f81e38b8f70d075 } + }, + { + { 0x0adb7f355f17c824, 0x74b923c3d74299a4, -0x2a83c17434071509, 0x0ad3e2d34cdedc3d }, + { 0x7f910fcc7ed9affe, 0x545cb8a12465874b, -0x57c6812db4f3b8fc, 0x50510fc104f50993 }, + { 0x6f0c0fc5336e249d, 0x745ede19c331cfd9, -0x0d2902fff61101e4, 0x127c158bf0fa1ebe } + }, + { + { -0x215d703b51ae468c, 0x1d9973d3744dfe96, 0x6240680b873848a8, 0x4ed82479d167df95 }, + { -0x09e683bdd167865e, -0x5bb5222bad35c9b9, -0x64bec03eb4b15335, 0x354ef87d07ef4f68 }, + { -0x011c4add9f3a268b, 0x50352efceb41b0b8, -0x77f753cf56099ac4, 0x302d92d20539236d } + }, +}, +{ + { + { -0x6a847474f20ac3d0, 0x2a1c770a8e60f098, -0x4438598fcba86922, 0x22a48f9a90c99bc9 }, + { 0x4c59023fcb3efb7c, 0x6c2fcb99c63c2a94, -0x45be6f1d3c381f7c, 0x0e545daea51874d9 }, + { 0x6b7dc0dc8d3fac58, 0x5497cd6ce6e42bfd, 0x542f7d1bf400d305, 0x4159f47f048d9136 } + }, + { + { 0x748515a8bbd24839, 0x77128347afb02b55, 0x50ba2ac649a2a17f, 0x060525513ad730f1 }, + { 0x20ad660839e31e32, -0x07e1e42a7bfa41b0, -0x07f9bfa90b254397, 0x14d23dd4ce71b975 }, + { -0x0dc671f6755d807e, 0x6d7982bb89a1b024, -0x0596bf7bdeb22db4, 0x71ab966fa32301c3 } + }, + { + { -0x4ef775f8fd7f66ab, 0x43b273ea0b43c391, -0x3564985101f97913, 0x605eecbf8335f4ed }, + { 0x2dcbd8e34ded02fc, 0x1151f3ec596f22aa, -0x435daabcb1fcd726, 0x35768fbe92411b22 }, + { -0x7cdff59a93cbfbcf, -0x60328e98711a63d1, 0x75d4613f71300f8a, 0x7a912faf60f542f9 } + }, + { + { 0x253f4f8dfa2d5597, 0x25e49c405477130c, 0x00c052e5996b1102, 0x33cb966e33bb6c4a }, + { -0x4dfba7a1a123e5bd, -0x60f1e911a76838c4, 0x5b82c0ae4e70483c, 0x624a170e2bddf9be }, + { 0x597028047f116909, -0x7d753be3e1a9bb99, 0x70417dbde6217387, 0x721627aefbac4384 } + }, + { + { -0x02cf6843bef4d0de, -0x0e5fa2584a3057bc, 0x61289a1def57ca74, 0x245ea199bb821902 }, + { -0x682fc43c78c9522b, 0x2f1422afc532b130, 0x3aa68a057101bbc4, 0x4c946cf7e74f9fa7 }, + { -0x51235996872b8808, 0x1898ba3c29117fe1, -0x308c067c8df342a8, 0x67da12e6b8b56351 } + }, + { + { 0x2b7ef3d38ec8308c, -0x7d7028138e146b55, -0x7f83c4c93af9d543, 0x0cb64cb831a94141 }, + { 0x7067e187b4bd6e07, 0x6e8f0203c7d1fe74, -0x6c3955d0c737a5d0, 0x76297d1f3d75a78a }, + { 0x3030fc33534c6378, -0x469ca3a31abe179f, 0x15d9a9bed9b2c728, 0x49233ea3f3775dcb } + }, + { + { 0x7b3985fe1c9f249b, 0x4fd6b2d5a1233293, -0x314cba6be520b29e, 0x6987ff6f542de50c }, + { 0x629398fa8dbffc3a, -0x1ed01ad22ab24bab, -0x0c41ee20250dad6b, 0x628b140dce5e7b51 }, + { 0x47e241428f83753c, 0x6317bebc866af997, -0x2544a4bcc2e567d7, 0x074d8d245287fb2d } + }, + { + { 0x481875c6c0e31488, 0x219429b2e22034b4, 0x7223c98a31283b65, 0x3420d60b342277f9 }, + { -0x7cc82632bbf403cf, 0x729d2ca1af318fd7, -0x5fbf5b5b88d3df90, 0x46002ef03a7349be }, + { -0x055dc52150019a09, 0x78261ed45be0764c, 0x441c0a1e2f164403, 0x5aea8e567a87d395 } + }, +}, +{ + { + { 0x2dbc6fb6e4e0f177, 0x04e1bf29a4bd6a93, 0x5e1966d4787af6e8, 0x0edc5f5eb426d060 }, + { 0x7813c1a2bca4283d, -0x129d0f6e5e79c227, -0x513843473d97057a, 0x10e5d3b76f1cae4c }, + { 0x5453bfd653da8e67, -0x1623e113db5609bf, -0x4078d9c4fca875dd, 0x45b46c51361cba72 } + }, + { + { -0x3162b22275801c1c, -0x54ec9ba9899df1d0, 0x4b594f7bb30e9958, 0x5c1c0aef321229df }, + { -0x56bfd540ceb0805f, -0x1da80e2371730bb0, 0x1dbbd54b23a8be84, 0x2177bfa36dcb713b }, + { 0x37081bbcfa79db8f, 0x6048811ec25f59b3, 0x087a76659c832487, 0x4ae619387d8ab5bb } + }, + { + { 0x61117e44985bfb83, -0x031fb9d58e69ceca, -0x7c53cbb72bda6fb5, 0x75685abe5ba43d64 }, + { -0x72240955acbb5cd2, 0x7d88eab4b41b4078, 0x5eb0eb974a130d60, 0x1a00d91b17bf3e03 }, + { 0x6e960933eb61f2b2, 0x543d0fa8c9ff4952, -0x208d8aef85099a97, 0x135529b623b0e6aa } + }, + { + { -0x0a38e9431dd17c02, -0x4bd414e617f67a3f, -0x136259c8ebdab552, 0x5972ea051590a613 }, + { 0x18f0dbd7add1d518, -0x68608777303ee0ef, -0x78cd1e0f8eeb8a65, 0x79b5b81a65ca3a01 }, + { 0x0fd4ac20dc8f7811, -0x65652d6b53b2b058, -0x3fe4d29b4cc9fbcc, 0x4f7e9c95905f3bdb } + }, + { + { 0x71c8443d355299fe, -0x7432c4e324141529, -0x7f6db6610e5b6b9a, 0x1942eec4a144adc8 }, + { 0x62674bbc5781302e, -0x27adf0c6765223f1, -0x73d66651ac04263a, 0x31993ad92e638e4c }, + { 0x7dac5319ae234992, 0x2c1b3d910cea3e92, 0x553ce494253c1122, 0x2a0a65314ef9ca75 } + }, + { + { -0x30c9e532c3e386c6, 0x2f9ebcac5a35bc3b, 0x60e860e9a8cda6ab, 0x055dc39b6dea1a13 }, + { 0x2db7937ff7f927c2, -0x248be0f9e82f59cb, 0x5982f3a21155af76, 0x4cf6e218647c2ded }, + { -0x4ee6dd833d72a44a, 0x07e24ebc774dffab, -0x57c387311b5cd377, 0x121a307710aa24b6 } + }, + { + { -0x29a68ec1388b7c37, -0x77401f8847d46951, 0x289e28231097bcd3, 0x527bb94a6ced3a9b }, + { -0x1b24a2a160fcb569, -0x1eac03f6cfcb43d3, 0x460546919551d3b1, 0x333fc76c7a40e52d }, + { 0x563d992a995b482e, 0x3405d07c6e383801, 0x485035de2f64d8e5, 0x6b89069b20a7a9f7 } + }, + { + { 0x4082fa8cb5c7db77, 0x068686f8c734c155, 0x29e6c8d9f6e7a57e, 0x0473d308a7639bcf }, + { -0x7ed55fbe9d8fddf3, -0x66a5760506dba4b2, -0x00523b31af8d10fb, 0x23bc2103aa73eb73 }, + { -0x351186d9fca761fb, 0x2b4b421246dcc492, 0x02a1ef74e601a94f, 0x102f73bfde04341a } + }, +}, +{ + { + { 0x358ecba293a36247, -0x5070679d4d97029b, 0x412f7e9968a01c89, 0x5786f312cd754524 }, + { -0x4a5d2af3813df2c2, -0x39b422915f368d9d, 0x56e89052c1ff734d, 0x4929c6f72b2ffaba }, + { 0x337788ffca14032c, -0x0c6defd7bb80e11d, -0x74ebf8e0dce43353, 0x4c817b4bf2344783 } + }, + { + { 0x413ba057a40b4484, -0x45b3d1e5b0a095bd, 0x614ba0a5aee1d61c, 0x78a1531a8b05dc53 }, + { 0x0ff853852871b96e, -0x1ec160549f3c0e45, -0x1102a6acdacbbbfe, 0x0a37c37075b7744b }, + { 0x6cbdf1703ad0562b, -0x7130b7cf36dade5d, -0x25142cfc027bdb19, 0x72ad82a42e5ec56f } + }, + { + { -0x3c976c6e98fdb43d, -0x71962e92b6afd026, -0x030d13c31ba0b4d7, 0x065f669ea3b4cbc4 }, + { 0x3f9e8e35bafb65f6, 0x39d69ec8f27293a1, 0x6cb8cd958cf6a3d0, 0x1734778173adae6d }, + { -0x75ff5138aacd24b3, -0x47965b1bbc1ce44f, 0x4a0f8552d3a7f515, 0x19adeb7c303d7c08 } + }, + { + { -0x62fa4582bc3ce86c, 0x2470c8ff93322526, -0x7cdc2137e9e68bc8, 0x2852709881569b53 }, + { -0x38df349eac15265d, 0x55b2c97f512b636e, -0x4e1ca4a02bfd6f4f, 0x2fd9ccf13b530ee2 }, + { 0x07bd475b47f796b8, -0x2d384fecabd370ac, 0x2dbd23f43b24f87e, 0x6551afd77b0901d6 } + }, + { + { 0x68a24ce3a1d5c9ac, -0x44885cc2ef009b9f, 0x0f86ce4425d3166e, 0x56507c0950b9623b }, + { 0x4546baaf54aac27f, -0x090990134d5ba5d8, 0x582d1b5b562bcfe8, 0x44b123f3920f785f }, + { 0x1206f0b7d1713e63, 0x353fe3d915bafc74, 0x194ceb970ad9d94d, 0x62fadd7cf9d03ad3 } + }, + { + { 0x3cd7bc61e7ce4594, -0x3294ca564822d982, -0x5f7f5437bc9910d9, 0x6ec7c46f59c79711 }, + { -0x394a6984aa675f8c, 0x5efe91ce8e493e25, -0x2b48d3bab6d7f778, 0x20ef1149a26740c2 }, + { 0x2f07ad636f09a8a2, -0x79681931dbdfa183, -0x3f5103fa11ca5ec7, 0x15e80958b5f9d897 } + }, + { + { 0x4dd1ed355bb061c4, 0x42dc0cef941c0700, 0x61305dc1fd86340e, 0x56b2cc930e55a443 }, + { 0x25a5ef7d0c3e235b, 0x6c39c17fbe134ee7, -0x388b1ecbd23a3cd9, 0x021354b892021f39 }, + { 0x1df79da6a6bfc5a2, 0x02f3a2749fde4369, -0x4cdc260d325c6f59, 0x7be0847b8774d363 } + }, + { + { 0x1466f5af5307fa11, -0x7e8033821293f50e, 0x0a6de44ec3a4a3fb, 0x74071475bc927d0b }, + { -0x736633a574c0aa3d, 0x0611d7253fded2a0, -0x12d66a00c948f5ca, 0x1f699a54d78a2619 }, + { -0x188d6d0c8c181576, 0x296537d2cb045a31, 0x1bd0653ed3274fde, 0x2f9a2c4476bd2966 } + }, +}, +{ + { + { -0x5d4b251f4aaee366, 0x7ac860292bffff06, -0x67e0c8a20aafbdcc, 0x3f6bd725da4ea12d }, + { -0x14e7465480a8ba3a, 0x023a8aee5787c690, -0x48d8ed25d2085057, 0x36597d25ea5c013d }, + { 0x734d8d7b106058ac, -0x26bfa86190396fa1, 0x6466f8f99202932d, 0x7b7ecc19da60d6d0 } + }, + { + { 0x6dae4a51a77cfa9b, -0x7dd9c9ab185c79b0, 0x09bbffcd8f2d82db, 0x03bedc661bf5caba }, + { 0x78c2373c695c690d, -0x22dad199f9bd6f92, -0x6ae2bbbbb51ed42e, 0x4235ad7601743956 }, + { 0x6258cb0d078975f5, 0x492942549189f298, -0x5f354bdc1d1c911c, 0x0e7ce2b0cdf066a1 } + }, + { + { -0x0159012026b48f07, -0x0ecf3fae3e0345d3, 0x4882d47e7f2fab89, 0x615256138aeceeb5 }, + { -0x3b6b9bc53b737a5d, -0x02c9e20bc39ec653, 0x09db17dd3ae94d48, 0x666e0a5d8fb4674a }, + { 0x2abbf64e4870cb0d, -0x329a430f55ba7495, -0x6541b1458a1767a3, 0x7f0bc810d514dee4 } + }, + { + { -0x7c5362528c8dec60, -0x60090745d108d168, 0x311e2edd43ec6957, 0x1d3a907ddec5ab75 }, + { -0x46ff945bd90bec91, -0x7298c961a81fcfcb, -0x34372026b0b9c3d8, 0x0d1f8dbcf8eedbf5 }, + { -0x45e96ccec12f7e24, 0x29329fad851b3480, 0x0128013c030321cb, 0x00011b44a31bfde3 } + }, + { + { 0x16561f696a0aa75c, -0x3e408da3a7ad4296, 0x11a8dd7f9a7966ad, 0x63d988a2d2851026 }, + { 0x3fdfa06c3fc66c0c, 0x5d40e38e4dd60dd2, 0x7ae38b38268e4d71, 0x3ac48d916e8357e1 }, + { 0x00120753afbd232e, -0x16d431470227097d, -0x07e9964c7b18d46f, 0x33fad52b2368a066 } + }, + { + { -0x72d3372f3bdd3018, 0x072b4f7b05a13acb, -0x5c01491913095a91, 0x3cc355ccb90a71e2 }, + { 0x540649c6c5e41e16, 0x0af86430333f7735, -0x4d53032d0cfa18ba, 0x16c0f429a256dca7 }, + { -0x16496bbc6fc16ecf, -0x475b6b3485a9c832, -0x37832e5b45456dbc, 0x631eaf426bae7568 } + }, + { + { 0x47d975b9a3700de8, 0x7280c5fbe2f80552, 0x53658f2732e45de1, 0x431f2c7f665f80b5 }, + { -0x4c16fbef25990161, -0x7a22b4ad93e91a5a, -0x43c2689ee106407d, 0x5599648b1ea919b5 }, + { -0x29fd9cbb7a7084e7, 0x14ab352fa1ea514a, -0x76ffbbe5df6f5629, 0x7b04715f91253b26 } + }, + { + { -0x4c893d7f3b19453a, -0x68f12c2292e264f5, -0x4f656aa7baf406bc, 0x48d0acfa57cde223 }, + { -0x7c1242d7530951bd, -0x79ca837482a3854c, -0x3fbfb8964814d3bc, 0x59b37bf5c2f6583f }, + { -0x49f0d91b8254198f, -0x0e2e5e689dd0c5c9, 0x4208ce7ee9960394, 0x16234191336d3bdb } + }, +}, +{ + { + { -0x7ad22e02c2a87442, 0x2b65ce72c3286108, 0x658c07f4eace2273, 0x0933f804ec38ab40 }, + { -0x0e651538cc59c511, 0x2c7fba5d4442454e, 0x5da87aa04795e441, 0x413051e1a4e0b0f5 }, + { -0x5854968672b69b8a, -0x7ede5521034a5438, -0x5a23ed1084ac6b8e, 0x07fd47065e45351a } + }, + { + { 0x304211559ae8e7c3, -0x0d7e4dd66bb77d5b, -0x75ec53d1c87daf1c, 0x014afa0954ba48f4 }, + { -0x37a7c3c2da72d433, 0x17029a4daf60b73f, -0x05f03629be95c87f, 0x1c1e5fba38b3fb23 }, + { -0x34ce68ffe44c9994, 0x330060524bffecb9, 0x293711991a88233c, 0x291884363d4ed364 } + }, + { + { -0x0462c83c43e54915, 0x02be14534d57a240, -0x0b28cbea075a1e0a, 0x5964f4300ccc8188 }, + { 0x033c6805dc4babfa, 0x2c15bf5e5596ecc1, 0x1bc70624b59b1d3b, 0x3ede9850a19f0ec5 }, + { -0x1bb5dcead2f69800, 0x5c08c55970866996, -0x20d249f5b9500492, 0x579155c1f856fd89 } + }, + { + { -0x4a0e949cf7e8185a, -0x7f7396dcc3caefda, 0x324a983b54cef201, 0x53c092084a485345 }, + { -0x69cdb122ed1f3611, 0x468b878df2420297, 0x199a3776a4f573be, 0x1e7fbcf18e91e92a }, + { -0x2d2beb7e0e345041, 0x231d2db6716174e5, 0x0b7d7656e2a55c98, 0x3e955cd82aa495f6 } + }, + { + { -0x54c60c109e44c5c1, -0x714bff9ad146e6c2, -0x4a219133c73ee08c, 0x654d7e9626f3c49f }, + { -0x1b70aca1c12eabcd, -0x2f8a96d5f28d8f5d, 0x40fbd21daade6387, 0x14264887cf4495f5 }, + { -0x1a9b3022a382d315, -0x7d11502128c83347, 0x6107db62d1f9b0ab, 0x0b6baac3b4358dbb } + }, + { + { 0x204abad63700a93b, -0x41ffdc2c25886c8d, -0x27a0fcb99cc548f7, 0x00496dc490820412 }, + { 0x7ae62bcb8622fe98, 0x47762256ceb891af, 0x1a5a92bcf2e406b4, 0x7d29401784e41501 }, + { 0x1c74b88dc27e6360, 0x074854268d14850c, -0x5eba0484c1f234d0, 0x10843f1b43803b23 } + }, + { + { -0x2a9098d21cdb9765, -0x2e2575124c6b567f, -0x2284a7016e973013, 0x7ce246cd4d56c1e8 }, + { -0x3a06fbaac89d8923, -0x31a6ea72289ba327, -0x6d09a2aee2c994c7, 0x11574b6e526996c4 }, + { -0x470bcf71807f41ad, 0x5f3cb8cb34a9d397, 0x18a961bd33cc2b2c, 0x710045fb3a9af671 } + }, + { + { -0x5fc0379dfa629662, 0x2370cfa19a619e69, -0x3b01c4edd07dc215, 0x1d1b056fa7f0844e }, + { 0x73f93d36101b95eb, -0x0510cc86b090bb7a, 0x5651735f8f15e562, 0x7fa3f19058b40da1 }, + { 0x1bc64631e56bf61f, -0x2c8654ef91ac7d5d, 0x4d58c57e0540168d, 0x566256628442d8e4 } + }, +}, +{ + { + { -0x22b66329e00c79c0, 0x29cd9bc3063625a0, 0x51e2d8023dd73dc3, 0x4a25707a203b9231 }, + { -0x461b662109d9800a, 0x7772ca7b742c0843, 0x23a0153fe9a4f2b1, 0x2cdfdfecd5d05006 }, + { 0x2ab7668a53f6ed6a, 0x304242581dd170a1, 0x4000144c3ae20161, 0x5721896d248e49fc } + }, + { + { 0x285d5091a1d0da4e, 0x4baa6fa7b5fe3e08, 0x63e5177ce19393b3, 0x03c935afc4b030fd }, + { 0x0b6e5517fd181bae, -0x6fdd9d60d4469c4c, 0x5509bce932064625, 0x578edd74f63c13da }, + { -0x668d8939b6d4f3c3, 0x47ccc2c4dfe205fc, -0x232d647b229dc5c4, 0x3ec2ab590288c7a2 } + }, + { + { -0x58dec5f651cd2e35, 0x0f2b87df40f5c2d5, 0x0baea4c6e81eab29, 0x0e1bf66c6adbac5e }, + { -0x5e5f2d841b278447, -0x5674b2149ec6e513, -0x665f222f8c34647d, 0x2dd5c25a200fcace }, + { -0x1d542a1686d37782, 0x1a020018cb926d5d, -0x404596324551a0e2, 0x730548b35ae88f5f } + }, + { + { -0x7fa4f6b45e291ccc, -0x40c10e88f6cac0e7, 0x423f06cb0622702b, 0x585a2277d87845dd }, + { -0x3bcaae5c34574712, 0x65a26f1db2115f16, 0x760f4f52ab8c3850, 0x3043443b411db8ca }, + { -0x5e75a07dcc2b769e, 0x6698c4b5ec78257f, -0x5871905ac8c1be01, 0x7656278950ef981f } + }, + { + { -0x1e8f8c5c15793063, 0x3a8cfbb707155fdc, 0x4853e7fc31838a8e, 0x28bbf484b613f616 }, + { 0x38c3cf59d51fc8c0, -0x64122d02faf9490e, 0x26bf109fab570e8f, 0x3f4160a8c1b846a6 }, + { -0x0d9ed0a390ec9384, -0x50152ef80922ee42, 0x527e9ad213de6f33, 0x1e79cb358188f75d } + }, + { + { 0x77e953d8f5e08181, -0x7b5af3bbd6622127, -0x2393d2f379bada1b, 0x478ab52d39d1f2f4 }, + { 0x013436c3eef7e3f1, -0x7d7495800161ef08, 0x7ff908e5bcf9defc, 0x65d7951b3a3b3831 }, + { 0x66a6a4d39252d159, -0x1a221e4378e537f9, -0x47d394bf593e3691, 0x16d87a411a212214 } + }, + { + { -0x045b2a1d2ab1fa7d, -0x1de05028d1426606, 0x497ac2736ee9778f, 0x1f990b577a5a6dde }, + { -0x4c4281a5bdf99deb, -0x78641c32f3a5db3f, 0x57c05db1d6f994b7, 0x28f87c8165f38ca6 }, + { -0x5ccbb152e417082a, 0x7d1e50ebacea798f, 0x77c6569e520de052, 0x45882fe1534d6d3e } + }, + { + { -0x275366d66bc3901c, -0x4a060e9e5c7c6d5e, 0x2699db13bec89af3, 0x7dcf843ce405f074 }, + { 0x6669345d757983d6, 0x62b6ed1117aa11a6, 0x7ddd1857985e128f, 0x688fe5b8f626f6dd }, + { 0x6c90d6484a4732c0, -0x2adebc0235a9cd67, -0x4c41d73c6ea2391f, 0x6739687e7327191b } + }, +}, +{ + { + { -0x731a552f363468e1, 0x1156aaa99fd54a29, 0x41f7247015af9b78, 0x1fe8cca8420f49aa }, + { -0x609a3a15dff7eb31, -0x7bfac91e965ce8c0, -0x74f12ec6da374b53, 0x0080dbafe936361d }, + { 0x72a1848f3c0cc82a, 0x38c560c2877c9e54, 0x5004e228ce554140, 0x042418a103429d71 } + }, + { + { 0x58e84c6f20816247, -0x724d4d491c90286d, -0x688e7da9e2b7b27b, 0x0822024f8632abd7 }, + { -0x766215ae540c00a1, -0x646c5798d03d2746, 0x2c38cb97be6ebd5c, 0x114d578497263b5d }, + { -0x4cfe448394e4135d, 0x55393f6dc6eb1375, -0x6ef2d7ef68491b15, 0x1ad4548d9d479ea3 } + }, + { + { -0x5f901992f016012d, -0x578cc5bfe3a786f7, 0x30d14d800df98953, 0x41ce5876c7b30258 }, + { -0x32a5825fc765b703, -0x4c705b556587c8e2, -0x392689e4d3247194, 0x35cf51dbc97e1443 }, + { 0x59ac3bc5d670c022, -0x151983ef64ee6bfa, -0x6867420f4c87d026, 0x651e3201fd074092 } + }, + { + { -0x5a845b5fe1035162, 0x769f4beedc308a94, -0x2e0ef114c9fc34d2, 0x4099ce5e7e441278 }, + { -0x29c27b7c10cf3a31, 0x4cd4b4962361cc0c, -0x116f1aff5b7bd954, 0x0af51d7d18c14eeb }, + { 0x1ac98e4f8a5121e9, 0x7dae9544dbfa2fe0, -0x7cdf55f229bcf207, 0x667282652c4a2fb5 } + }, + { + { -0x5257491fd6b924dd, 0x1c0ce51a7b253ab7, -0x7bb737a59922b7a5, 0x7f1fc025d0675adf }, + { -0x78b9de0b27943655, -0x4ab38441a9019016, 0x077a24257fadc22c, 0x1ab53be419b90d39 }, + { -0x2711e4e7ce615956, 0x004d88083a21f0da, 0x3bd6aa1d883a4f4b, 0x4db9a3a6dfd9fd14 } + }, + { + { -0x26a4ff4434488398, -0x22437b956e0e87b7, 0x7cf700aebe28d9b3, 0x5ce1285c85d31f3e }, + { -0x73184dc44663f8ab, 0x35c5d6edc4f50f7a, 0x7e1e2ed2ed9b50c3, 0x36305f16e8934da1 }, + { 0x31b6972d98b0bde8, 0x7d920706aca6de5b, -0x198cef076f759a61, 0x50fac2a6efdf0235 } + }, + { + { 0x295b1c86f6f449bc, 0x51b2e84a1f0ab4dd, -0x3ffe34cf5571aae3, 0x6a28d35944f43662 }, + { -0x0c2c560ca477f0a6, -0x1213faf324fc183e, -0x576967e0060f4e5e, 0x49a4ae2bac5e34a4 }, + { 0x28bb12ee04a740e0, 0x14313bbd9bce8174, 0x72f5b5e4e8c10c40, 0x7cbfb19936adcd5b } + }, + { + { -0x7186c58533c91920, -0x0605485c82a79113, 0x3a4f9692bae1f4e4, 0x1c14b03eff5f447e }, + { -0x5cee223d947686d3, 0x1b30b4c6da512664, 0x0ca77b4ccf150859, 0x1de443df1b009408 }, + { 0x19647bd114a85291, 0x57b76cb21034d3af, 0x6329db440f9d6dfa, 0x5ef43e586a571493 } + }, +}, +{ + { + { -0x5992336237f3e540, -0x685fa30be4c75bca, -0x58140c416a24283a, 0x7da0b8f68d7e7dab }, + { -0x1087dfebc7a98a5a, -0x5d9b60cf55025618, 0x4cd1eb505cdfa8cb, 0x46115aba1d4dc0b3 }, + { -0x2bf0e6ac3c4a258a, 0x1dac6f7321119e9b, 0x03cc6021feb25960, 0x5a5f887e83674b4b } + }, + { + { -0x6169d72c5f59bc47, -0x4a3c34ff193cdf9c, -0x64acfd7683d213ce, 0x43e37ae2d5d1c70c }, + { -0x709cfe308f5ec2ef, -0x303147eacaf22f3c, -0x08fd682b5b435b82, 0x3669b656e44d1434 }, + { 0x387e3f06eda6e133, 0x67301d5199a13ac0, -0x42a52707c9d9c7ef, 0x6a21e6cd4fd5e9be } + }, + { + { -0x10bed6ed99664d1d, 0x71d30847708d1301, 0x325432d01182b0bd, 0x45371b07001e8b36 }, + { -0x0e39e8f5cfb919a1, 0x58712a2a00d23524, 0x69dbbd3c8c82b755, 0x586bf9f1a195ff57 }, + { -0x5924f772a10786f5, 0x5278f0dc610937e5, -0x53fcb62d9e5e9148, 0x0eafb03790e52179 } + }, + { + { 0x5140805e0f75ae1d, -0x13fd041cd99d33d0, 0x2cebdf1eea92396d, 0x44ae3344c5435bb3 }, + { -0x69faaa3ec8b7fbd1, 0x219a41e6820baa11, 0x1c81f73873486d0c, 0x309acc675a02c661 }, + { -0x630d7646445abc12, -0x0c89f162a5368ebe, 0x1d82e5c64f9360aa, 0x62d5221b7f94678f } + }, + { + { 0x7585d4263af77a3c, -0x205184ee0116ebb3, -0x5af98f7fa608e6c3, 0x14f29a5383922037 }, + { 0x524c299c18d0936d, -0x37944a9375f3e5f4, -0x5c8afad124b579cf, 0x5c0efde4bc754562 }, + { -0x208e8123da4d280b, 0x21f970db99b53040, -0x256dcb483c12b39e, 0x5e72365c7bee093e } + }, + { + { 0x7d9339062f08b33e, 0x5b9659e5df9f32be, -0x5300c252e0614203, 0x70b20555cb7349b7 }, + { 0x575bfc074571217f, 0x3779675d0694d95b, -0x65f5c8440be6e1cd, 0x77f1104c47b4eabc }, + { -0x41aeec3aaaeed3b4, 0x6688423a9a881fcd, 0x446677855e503b47, 0x0e34398f4a06404a } + }, + { + { 0x18930b093e4b1928, 0x7de3e10e73f3f640, -0x0bcde8258cc6a291, 0x6f8aded6ca379c3e }, + { -0x4982dd26c1314218, 0x09b3e84127822f07, 0x743fa61fb05b6d8d, 0x5e5405368a362372 }, + { -0x1cbfedc202484d66, 0x487b97e1a21ab291, -0x066982fd02196b62, 0x780de72ec8d3de97 } + }, + { + { 0x671feaf300f42772, -0x708d14d5d573be56, 0x29a17fd797373292, 0x1defc6ad32b587a6 }, + { 0x0ae28545089ae7bc, 0x388ddecf1c7f4d06, 0x38ac15510a4811b8, 0x0eb28bf671928ce4 }, + { -0x50a441e510ae6a59, 0x148c1277917b15ed, 0x2991f7fb7ae5da2e, 0x467d201bf8dd2867 } + }, +}, +{ + { + { 0x745f9d56296bc318, -0x66ca7f2b27ead19b, -0x4f1a4ec0a7c61632, 0x51fc2b28d43921c0 }, + { 0x7906ee72f7bd2e6b, 0x05d270d6109abf4e, -0x72a301ba46be575c, 0x44c218671c974287 }, + { 0x1b8fd11795e2a98c, 0x1c4e5ee12b6b6291, 0x5b30e7107424b572, 0x6e6b9de84c4f4ac6 } + }, + { + { 0x6b7c5f10f80cb088, 0x736b54dc56e42151, -0x3d49df5a3910663c, 0x5f4c802cc3a06f42 }, + { -0x200da031b4e21eaf, -0x27be3f381ee3bfdb, 0x2554b3c854749c87, 0x2d292459908e0df9 }, + { -0x649a370e82f8ad26, -0x77e31cc738811800, -0x3c4aeb0fa49d061d, 0x66ed5dd5bec10d48 } + }, + { + { -0x0f520c363435fb83, -0x7e3c4d340baad095, -0x3025eed2bb8ca06d, 0x1f23a0c77e20048c }, + { 0x7d38a1c20bb2089d, -0x7f7ccb1e69332bee, -0x3b58f47393682ced, 0x2eacf8bc03007f20 }, + { -0x0dcab9841a43ea90, 0x03d2d9020dbab38c, 0x27529aa2fcf9e09e, 0x0840bef29d34bc50 } + }, + { + { -0x32ab1f9480c81b15, -0x733ea0780a169336, -0x47db744f2ca68232, 0x246affa06074400c }, + { 0x796dfb35dc10b287, 0x27176bcd5c7ff29d, 0x7f3d43e8c7b24905, 0x0304f5a191c54276 }, + { 0x37d88e68fbe45321, -0x79f68ab73f28afce, 0x4e9b13ef894a0d35, 0x25a83cac5753d325 } + }, + { + { -0x60f099d6c6ad491e, 0x33db5e0e0934267b, -0x00badad429f60124, 0x06be10f5c506e0c9 }, + { 0x10222f48eed8165e, 0x623fc1234b8bcf3a, 0x1e145c09c221e8f0, 0x7ccfa59fca782630 }, + { 0x1a9615a9b62a345f, 0x22050c564a52fecc, -0x585d877ad743f202, 0x5e82770a1a1ee71d } + }, + { + { -0x17fd17f5bdcc638c, 0x34175166a7fffae5, 0x34865d1f1c408cae, 0x2cca982c605bc5ee }, + { 0x35425183ad896a5c, -0x1798c5041872ad0a, 0x2c66f25f92a35f64, 0x09d04f3b3b86b102 }, + { -0x02d2a2cae6824192, 0x207c2eea8be4ffa3, 0x2613d8db325ae918, 0x7a325d1727741d3e } + }, + { + { -0x132d82fe81d5f896, -0x28779760e9c9b6a2, 0x52a61af0919233e5, 0x2a479df17bb1ae64 }, + { -0x2fc946442e92021e, -0x5dfaa8a83b6857d7, -0x71933699580ed999, 0x4d3b1a791239c180 }, + { -0x61a11171cc24d8f0, 0x189854ded6c43ca5, -0x5be3dd3a6d8e7ec8, 0x27ad5538a43a5e9b } + }, + { + { -0x34a5829c71b8f884, -0x7248ac9edf5e3fa7, 0x549e1e4d8bedfdcc, 0x080153b7503b179d }, + { 0x2746dd4b15350d61, -0x2fc03437116ade49, -0x1791c9a5ec798d36, 0x510e987f7e7d89e2 }, + { -0x2259626cf5c12c1d, 0x3d386ef1cd60a722, -0x37e852a74255b11a, 0x23be8d554fe7372a } + }, +}, +{ + { + { -0x43e10b42a9851857, 0x3f624cb2d64498bd, -0x1bef9b2dd3e0b138, 0x2ef9c5a5ba384001 }, + { -0x6a016e658b10b053, 0x3a827becf6a308a2, -0x69b1fe2cf65b84ff, 0x71c43c4f5ba3c797 }, + { -0x4902920905618b33, -0x0e7d87431b50d986, -0x7daa4c2f0e1066f2, 0x5a758ca390c5f293 } + }, + { + { -0x731f6e74e29e236c, -0x7212c9b9657ecf9a, -0x2b1957d65017552d, 0x0a738027f639d43f }, + { -0x5d48d8ef26b9db6b, 0x3aa8c6d2d57d5003, -0x1c2bff405f4b7836, 0x2dbae244b3eb72ec }, + { -0x67f0b5d0a8001e34, 0x00670d0de1839843, 0x105c3f4a49fb15fd, 0x2698ca635126a69c } + }, + { + { 0x2e3d702f5e3dd90e, -0x61c0f6e71b2dac7a, 0x5e773ef6024da96a, 0x3c004b0c4afa3332 }, + { -0x189ace77cd4f4588, 0x381831f7925cff8b, 0x08a81b91a0291fcc, 0x1fb43dcc49caeb07 }, + { -0x6556b953f90b47d5, 0x1ca284a5a806c4f3, 0x3ed3265fc6cd4787, 0x6b43fd01cd1fd217 } + }, + { + { -0x4a38bda7c189f10d, 0x75dc52b9ee0ab990, -0x40ebd83df8d46dc1, 0x73420b2d6ff0d9f0 }, + { -0x3858a2b4b9683abc, 0x15fdf848df0fffbf, 0x2868b9ebaa46785a, 0x5a68d7105b52f714 }, + { -0x50d30934617ae1fa, -0x70a6c6ec39ddc73c, -0x2575476966040c8d, 0x3db5632fea34bc9e } + }, + { + { 0x2e4990b1829825d5, -0x12151478c165766f, -0x110fc2c6b38fb508, 0x59197ea495df2b0e }, + { -0x0b9111d408a22628, 0x0d17b1f6396759a5, 0x1bf2d131499e7273, 0x04321adf49d75f13 }, + { 0x04e16019e4e55aae, -0x1884bc8581d06d17, -0x3831d23e90ea655c, 0x45eafdc1f4d70cc0 } + }, + { + { -0x49f1b9db30334e13, 0x59dbc292bd5c0395, 0x31a09d1ddc0481c9, 0x3f73ceea5d56d940 }, + { 0x698401858045d72b, 0x4c22faa2cf2f0651, -0x6be5c99a94ddd23a, 0x5a5eebc80362dade }, + { -0x4858402ef5b1723a, -0x41a8ff81bb364cc7, 0x60c1207f1557aefa, 0x26058891266218db } + }, + { + { 0x4c818e3cc676e542, 0x5e422c9303ceccad, -0x13f833354bed60f8, 0x0dedfa10b24443b8 }, + { 0x59f704a68360ff04, -0x3c26c021899e190c, -0x7ce4d58ced78caaf, 0x54ad0c2e4e615d57 }, + { -0x11c4982a47d4add6, 0x36f163469fa5c1eb, -0x5a4b2d0d913e602d, 0x62ecb2baa77a9408 } + }, + { + { -0x6df8d7c95049d78c, 0x5fcd5e8579e104a5, 0x5aad01adc630a14a, 0x61913d5075663f98 }, + { -0x1a1286ad9eead4c3, 0x4962357d0eddd7d1, 0x7482c8d0b96b4c71, 0x2e59f919a966d8be }, + { 0x0dc62d361a3231da, -0x05b8a7cd6bdffd90, 0x02d801513f9594ce, 0x3ddbc2a131c05d5c } + }, +}, +{ + { + { -0x048ca53dffb5ca2f, 0x31de0f433a6607c3, 0x7b8591bfc528d599, 0x55be9a25f5bb050c }, + { 0x3f50a50a4ffb81ef, -0x4e1fcaf6c40bdf41, -0x645571e33955d330, 0x32239861fa237a40 }, + { 0x0d005acd33db3dbf, 0x0111b37c80ac35e2, 0x4892d66c6f88ebeb, 0x770eadb16508fbcd } + }, + { + { -0x0e2c497e5faf8e47, 0x2207659a3592ff3a, 0x5f0169297881e40e, 0x16bedd0e86ba374e }, + { -0x7bae061fa1b17623, -0x3f9cfd004386c6c9, 0x5d22749556a6495c, 0x09a6755ca05603fb }, + { 0x5ecccc4f2c2737b5, 0x43b79e0c2dccb703, 0x33e008bc4ec43df3, 0x06c1b840f07566c0 } + }, + { + { 0x69ee9e7f9b02805c, -0x34007d75ab82e9c0, 0x3d93a869b2430968, 0x46b7b8cd3fe26972 }, + { 0x7688a5c6a388f877, 0x02a96c14deb2b6ac, 0x64c9f3431b8c2af8, 0x3628435554a1eed6 }, + { -0x167edf7901811420, 0x4cba6be72f515437, 0x1d04168b516efae9, 0x5ea1391043982cb9 } + }, + { + { 0x6f2b3be4d5d3b002, -0x5013cc2695f63780, 0x035f73a4a8bcc4cc, 0x22c5b9284662198b }, + { 0x49125c9cf4702ee1, 0x4520b71f8b25b32d, 0x33193026501fef7e, 0x656d8997c8d2eb2b }, + { -0x34a73701bcc276c7, -0x765f34d1957281b0, 0x79ca955309fbbe5a, 0x0c626616cd7fc106 } + }, + { + { -0x70203c86040bab4f, 0x45a5a970f1a4b771, -0x536de108452ca6eb, 0x42d088dca81c2192 }, + { 0x1ffeb80a4879b61f, 0x6396726e4ada21ed, 0x33c7b093368025ba, 0x471aa0c6f3c31788 }, + { -0x7025f0c85fe9ae67, 0x0adadb77c8a0e343, 0x20fbfdfcc875e820, 0x1cf2bea80c2206e7 } + }, + { + { -0x67d291e5fd3fbed1, -0x6f05b37c24a71702, 0x01c2f5bcdcb18bc0, 0x686e0c90216abc66 }, + { -0x3d220e214c9dfd54, -0x6d5a01f62d1d855b, 0x7d1648f6fc09f1d3, 0x74c2cc0513bc4959 }, + { 0x1fadbadba54395a7, -0x4be5fd5f51f25996, -0x40e60a67445c83f9, 0x6a12b8acde48430d } + }, + { + { 0x793bdd801aaeeb5f, 0x00a2a0aac1518871, -0x175c8c5ce0dec94c, 0x48aab888fc91ef19 }, + { -0x072515e0c62b6a27, 0x592c190e525f1dfc, -0x247342fb3666e2e5, 0x11f7fda3d88f0cb7 }, + { 0x041f7e925830f40e, 0x002d6ca979661c06, -0x79236006d4fb95d2, 0x760360928b0493d1 } + }, + { + { -0x4bcef71a96a5f4fb, 0x6cb00ee8ad37a38b, 0x5edad6eea3537381, 0x3f2602d4b6dc3224 }, + { 0x21bb41c6120cf9c6, -0x154d55ed21325a65, -0x3e58d2fdf55b74cc, 0x215d4d27e87d3b68 }, + { -0x374db849a4350e64, 0x49779dc3b1b2c652, -0x765e7f442a131d1e, 0x13f098a3cec8e039 } + }, +}, +{ + { + { -0x0c55a85dd86944ec, -0x77c5454864f825df, -0x1ab41de7ce5fc6e4, 0x5ee7fb38d83205f9 }, + { -0x6523f00631a13ab5, 0x039c2a6b8c2f130d, 0x028007c7f0f89515, 0x78968314ac04b36b }, + { 0x538dfdcb41446a8e, -0x5a530256bcb6c807, 0x46af908d263c8c78, 0x61d0633c9bca0d09 } + }, + { + { -0x525cd74307038c21, -0x117b96a2590fc804, 0x637fb4db38c2a909, 0x5b23ac2df8067bdc }, + { 0x63744935ffdb2566, -0x3a42947687f49745, 0x6f1b3280553eec03, 0x6e965fd847aed7f5 }, + { -0x652d46ac117fad85, -0x1770e65505219273, 0x0e711704150e82cf, 0x79b9bbb9dd95dedc } + }, + { + { -0x2e66825171608c8c, -0x5fcd5d073044f7ea, -0x329345ed92bba0f6, 0x1ba811460accb834 }, + { -0x144caabf95ced93e, -0x2d9c7c5797373c6d, 0x6c0c6429e5b97a82, 0x5065f158c9fd2147 }, + { 0x708169fb0c429954, -0x1eb9ff5328913099, 0x2eaab98a70e645ba, 0x3981f39e58a4faf2 } + }, + { + { -0x37ba205a92199022, -0x1ead5affd3bfb7c6, -0x162d1e9c384b09ce, 0x30f4452edcbc1b65 }, + { 0x18fb8a7559230a93, 0x1d168f6960e6f45d, 0x3a85a94514a93cb5, 0x38dc083705acd0fd }, + { -0x7a92d87d3a8a68c0, -0x05ecba9606634134, -0x77bb038c3f15b18f, 0x632d9a1a593f2469 } + }, + { + { -0x40f602ee12f37b59, 0x63f071810d9f693a, 0x21908c2d57cf8779, 0x3a5a7df28af64ba2 }, + { -0x094494ea47f8345a, 0x1823c7dfbc54f0d7, -0x44e268fc91d698f5, 0x0b24f48847ed4a57 }, + { -0x23252b41aee41539, -0x5bac7f8a12d9330e, -0x1e630060ffa0659b, 0x34fcf74475481f63 } + }, + { + { -0x5a44e25487305568, 0x5ceda267190b72f2, -0x6cf636eef56d9f72, 0x0119a3042fb374b0 }, + { -0x3e681fb387689836, -0x478eb234c726b983, 0x55de888283f95fa8, 0x3d3bdc164dfa63f7 }, + { 0x67a2d89ce8c2177d, 0x669da5f66895d0c1, -0x0a9a671a4d7d5d50, 0x56c088f1ede20a73 } + }, + { + { 0x581b5fac24f38f02, -0x56f41601451cf343, -0x65de96fd75306d10, 0x038b7ea48359038f }, + { 0x336d3d1110a86e17, -0x280c77cdf48a4d06, -0x06eacc89daf8d678, 0x09674c6b99108b87 }, + { -0x60b107de66ce9008, 0x2f49d282eaa78d4f, 0x0971a5ab5aef3174, 0x6e5e31025969eb65 } + }, + { + { 0x3304fb0e63066222, -0x04caf976785345c1, -0x42e6db8873ef9e5d, 0x3058ad43d1838620 }, + { -0x4e939d0a781a6c05, 0x4999eddeca5d3e71, -0x4b6e3e1feb33c193, 0x08f5114789a8dba8 }, + { 0x323c0ffde57663d0, 0x05c3df38a22ea610, -0x423875425366b066, 0x26549fa4efe3dc99 } + }, +}, +{ + { + { 0x04dbbc17f75396b9, 0x69e6a2d7d2f86746, -0x39bf62660ac1543a, 0x606175f6332e25d2 }, + { 0x738b38d787ce8f89, -0x49d9a71dbe865773, 0x30738c9cf151316d, 0x49128c7f727275c9 }, + { 0x4021370ef540e7dd, 0x0910d6f5a1f1d0a5, 0x4634aacd5b06b807, 0x6a39e6356944f235 } + }, + { + { 0x1da1965774049e9d, -0x0432915e6701cad5, -0x4e3432af33adc95a, 0x1f5ec83d3f9846e2 }, + { -0x6932a9bf206f0c19, 0x6c3a760edbfa25ea, 0x24f3ef0959e33cc4, 0x42889e7e530d2e58 }, + { -0x7104dc3ccd73348b, -0x50bd5df822789117, 0x20fbdadc5dfae796, 0x241e246b06bf9f51 } + }, + { + { 0x7eaafc9a6280bbb8, 0x22a70f12f403d809, 0x31ce40bb1bfc8d20, 0x2bc65635e8bd53ee }, + { 0x29e68e57ad6e98f6, 0x4c9260c80b462065, 0x3f00862ea51ebb4b, 0x5bc2c77fb38d9097 }, + { -0x172a23605694526d, -0x1a704e8221e6b824, 0x681532ea65185fa3, 0x1fdd6c3b034a7830 } + }, + { + { -0x63ec595ad2270857, 0x2dbb1f8c3efdcabf, -0x69e1cdbfa1f7084b, 0x48c8a121bbe6c9e5 }, + { 0x0a64e28c55dc18fe, -0x1c206166cc661423, 0x79ac432370e2e652, 0x35ff7fc33ae4cc0e }, + { -0x03bea583a69b9bbb, -0x2ddb4d283ed749eb, 0x6035c9c905fbb912, 0x42d7a91274429fab } + }, + { + { -0x565b76b86cc25a44, 0x4a58920ec2e979ec, -0x69277fffec1a53b4, 0x453692d74b48b147 }, + { 0x4e6213e3eaf72ed3, 0x6794981a43acd4e7, -0x00ab8321914af735, 0x6fed19dd10fcb532 }, + { -0x2288a26657aa6391, -0x0bd5debf20ffc1dc, 0x5223e229da928a66, 0x063f46ba6d38f22c } + }, + { + { 0x39843cb737346921, -0x58b804f8c7376bb9, -0x34727fce5dbacf82, 0x67810f8e6d82f068 }, + { -0x2d2dbd76a0ac996c, -0x35cc5d3abd6c64d4, -0x67905259382246a4, 0x5a152c042f712d5d }, + { 0x3eeb8fbcd2287db4, 0x72c7d3a301a03e93, 0x5473e88cbd98265a, 0x7324aa515921b403 } + }, + { + { -0x52dc092517dcab35, 0x6962502ab6571a6d, -0x649ae9c91c71c82f, 0x5cac5005d1a3312f }, + { -0x7a86bd0b93c34172, -0x5e2c9b4eb8cf3fba, 0x1c8ed914d23c41bf, 0x0838e161eef6d5d2 }, + { -0x733eab33161c66fc, 0x5b3a040b84de6846, -0x3b2759e34e41a292, 0x40fb897bd8861f02 } + }, + { + { -0x1a8127b8a54ef89f, 0x71435e206fd13746, 0x342f824ecd025632, 0x4b16281ea8791e7b }, + { -0x7b3a556f9d21c85f, 0x421da5000d1d96e1, 0x788286306a9242d9, 0x3c5e464a690d10da }, + { -0x2e3efe2af47ecc7f, -0x2119f0ee891197d8, 0x0cb68893383f6409, 0x6183c565f6ff484a } + }, +}, +{ + { + { -0x24b97ab650c09992, -0x288030fb0eb5f15b, 0x3df23ff7a4ba0c47, 0x3a10dfe132ce3c85 }, + { 0x741d5a461e6bf9d6, 0x2305b3fc7777a581, -0x2baa8b5d9b8b2c27, 0x1926e1dc6401e0ff }, + { -0x1f80b17515e83160, 0x2fd515463a1fc1fd, 0x175322fd31f2c0f1, 0x1fa1d01d861e5d15 } + }, + { + { 0x38dcac00d1df94ab, 0x2e712bddd1080de9, 0x7f13e93efdd5e262, 0x73fced18ee9a01e5 }, + { -0x337faa6b82a667ce, 0x1e4656da37f15520, -0x6609088bb1fa6ce0, 0x773563bc6a75cf33 }, + { 0x06b1e90863139cb3, -0x5b6c25983a5fc133, -0x72883137529c76ce, 0x1f426b701b864f44 } + }, + { + { -0x0e81ca376e5edaae, -0x48947eaca8a1638a, -0x057cbf90f2648dc2, 0x0b76bb1b3fa7e438 }, + { -0x1036d9b3be6ee3ff, -0x0e5c4847e85dd3db, 0x5875da6bf30f1447, 0x4e1af5271d31b090 }, + { 0x08b8c1f97f92939b, -0x41988e342bbb5492, 0x22e5646399bb8017, 0x7b6dd61eb772a955 } + }, + { + { 0x5730abf9ab01d2c7, 0x16fb76dc40143b18, -0x7993419a5f344d7f, 0x53fa9b659bff6afe }, + { -0x48523e17af0cc26e, 0x7998fa4f608cd5cf, -0x5269d2427203a425, 0x703e9bceaf1d2f4f }, + { 0x6c14c8e994885455, -0x7bc5a2999a512b1b, 0x181bb73ebcd65af1, 0x398d93e5c4c61f50 } + }, + { + { -0x3c78839f2d181c0e, 0x3b34aaa030828bb1, 0x283e26e7739ef138, 0x699c9c9002c30577 }, + { 0x1c4bd16733e248f3, -0x4261ed78ea40f5a1, -0x2bc0730f5ef4fc8a, 0x53b09b5ddf191b13 }, + { -0x0cf958dca6b90e34, -0x6de8e74a331a2683, 0x28cdd24781b4e975, 0x51caf30c6fcdd907 } + }, + { + { 0x737af99a18ac54c7, -0x6fcc87233ae34cf1, 0x2b89bc334ce10cc7, 0x12ae29c189f8e99a }, + { -0x59f458bd898b1ff6, 0x630e8570a17a7bf3, 0x3758563dcf3324cc, 0x5504aa292383fdaa }, + { -0x56613f34e0f2fe31, 0x0dd1efcc3a34f7ae, 0x55ca7521d09c4e22, 0x5fd14fe958eba5ea } + }, + { + { 0x3c42fe5ebf93cb8e, -0x412057aec92ba9a1, -0x1f0f7a6177bddf18, 0x7dd73f960725d128 }, + { -0x4a23d220d7ba54d4, 0x069491b10a7fe993, 0x4daaf3d64002e346, 0x093ff26e586474d1 }, + { -0x4ef2db0197fa67d7, 0x75730672dbaf23e5, 0x1367253ab457ac29, 0x2f59bcbc86b470a4 } + }, + { + { 0x7041d560b691c301, -0x7adfe4c0522818e2, 0x16c2e16311335585, 0x2aa55e3d010828b1 }, + { -0x7c7b82bd66e8eca1, -0x52e46ee0a982fc29, 0x7e7748d9be77aad1, 0x5458b42e2e51af4a }, + { -0x12ae6d19f3f8bbb1, 0x42c54e2d74421d10, 0x352b4c82fdb5c864, 0x13e9004a8a768664 } + }, +}, +{ + { + { 0x1e6284c5806b467c, -0x3a09668418a29f85, -0x749826a74c872d9e, 0x3d88d66a81cd8b70 }, + { -0x344a4aaa93fcd401, -0x208e6e48d6d685c6, -0x3e008cd952127e45, 0x71ade8bb68be03f5 }, + { -0x7489856cdfb12877, 0x762fcacb9fa0ae2a, 0x771febcc6dce4887, 0x343062158ff05fb3 } + }, + { + { -0x031de6f8d584ce4c, 0x4d7adc75aa578016, 0x0ec276a687479324, 0x6d6d9d5d1fda4beb }, + { -0x1fa25e581e0a40b7, 0x26457d6dd4736092, 0x77dcb07773cc32f6, 0x0a5d94969cdd5fcd }, + { 0x22b1a58ae9b08183, -0x026a2f8e3ea3c775, -0x567edc897af5fae9, 0x33384cbabb7f335e } + }, + { + { 0x33bc627a26218b8d, -0x157f4de03857f39f, -0x6ba74ed4e8c1611a, 0x076247be0e2f3059 }, + { 0x3c6fa2680ca2c7b5, 0x1b5082046fb64fda, -0x14accb63abce2922, 0x5278b38f6b879c89 }, + { 0x52e105f61416375a, -0x136850c97a54145c, 0x26e6b50623a67c36, 0x5cf0e856f3d4fb01 } + }, + { + { -0x415131cec24cbd58, -0x345c9ca47bd24812, -0x177399df7e80ec11, 0x1b9438aa4e76d5c6 }, + { -0x0936978ce517354c, 0x5e20741ecb4f92c5, 0x2da53be58ccdbc3e, 0x2dddfea269970df7 }, + { -0x75af8881e990fce6, 0x067b39f10fb7a328, 0x1925c9a6010fbd76, 0x6df9b575cc740905 } + }, + { + { -0x13203ca4b73521bf, 0x6a88471fb2328270, 0x740a4a2440a01b6a, 0x471e5796003b5f29 }, + { 0x42c1192927f6bdcf, -0x706e6e85bfc29e36, -0x23e3a5997461e09f, 0x1596047804ec0f8d }, + { -0x2569444c5312c854, 0x7a2423b5e9208cea, 0x24cc5c3038aebae2, 0x50c356afdc5dae2f } + }, + { + { -0x30126320e4ce469c, -0x0b79567a735ae50d, 0x14897265ea8c1f84, 0x784a53dd932acc00 }, + { 0x09dcbf4341c30318, -0x1145f9ee7ce7e232, -0x3e863f3123e1d65f, 0x1dbf7b89073f35b0 }, + { 0x2d99f9df14fc4920, 0x76ccb60cc4499fe5, -0x5becd3441a30fffd, 0x3f93d82354f000ea } + }, + { + { -0x1553ed2e861eb688, -0x006dc00c441400a2, 0x4af663e40663ce27, 0x0fd381a811a5f5ff }, + { -0x7e7c189761fb317b, 0x678fb71e04465341, -0x526dfa7099771254, 0x5da350d3532b099a }, + { -0x0da953135bc920ac, 0x108b6168ae69d6e8, 0x20d986cb6b5d036c, 0x655957b9fee2af50 } + }, + { + { -0x423ebf642ffd2f54, 0x66660245b5ccd9a6, -0x7dce823b05217a14, 0x02fe934b6ad7df0d }, + { -0x51574f8056fdfcf1, -0x077389950b9c2ebd, 0x15b083663c787a60, 0x08eab1148267a4a8 }, + { -0x10a30eff3048158c, 0x22897633a1cb42ac, -0x2b31f3ab310d7a1e, 0x30408c048a146a55 } + }, +}, +{ + { + { -0x44d1ff36e6c47881, -0x131c576f1f23af95, -0x130c483fc9219b61, 0x5f46040898de9e1a }, + { 0x739d8845832fcedb, -0x05c729365194079d, 0x32bc0dcab74ffef7, 0x73937e8814bce45e }, + { -0x46fc8ee9d6840b73, -0x562ec4dd2b0f97cc, -0x1e68eaa8b969423a, 0x2cf8a4e891d5e835 } + }, + { + { 0x2cb5487e17d06ba2, 0x24d2381c3950196b, -0x289a637e7a6875d0, 0x7a6f7f2891d6a4f6 }, + { 0x6d93fd8707110f67, -0x22b3f62c83c74ab7, 0x7cb16a4cc2736a86, 0x2049bd6e58252a09 }, + { 0x7d09fd8d6a9aef49, -0x0f119f41a4c246f5, 0x4c21b52c519ebfd4, 0x6011aadfc545941d } + }, + { + { 0x63ded0c802cbf890, -0x042f6735f2009556, 0x624d0afdb9b6ed99, 0x69ce18b779340b1e }, + { 0x5f67926dcf95f83c, 0x7c7e856171289071, -0x295e180c667085a5, 0x6fc5cc1b0b62f9e0 }, + { -0x2e10aad74d678635, -0x22e551c32b816f6e, 0x127e0442189f2352, 0x15596b3ae57101f1 } + }, + { + { 0x09ff31167e5124ca, 0x0be4158bd9c745df, 0x292b7d227ef556e5, 0x3aa4e241afb6d138 }, + { 0x462739d23f9179a2, -0x007cedce68292231, 0x1307deb553f2148a, 0x0d2237687b5f4dda }, + { 0x2cc138bf2a3305f5, 0x48583f8fa2e926c3, 0x083ab1a25549d2eb, 0x32fcaa6e4687a36c } + }, + { + { 0x3207a4732787ccdf, 0x17e31908f213e3f8, -0x2a4d132809f269b2, 0x746f6336c2600be9 }, + { 0x7bc56e8dc57d9af5, 0x3e0bd2ed9df0bdf2, -0x553feb21dd101b5d, 0x4627e9cefebd6a5c }, + { 0x3f4af345ab6c971c, -0x1d77148d66bc8ce1, 0x33596a8a0344186d, 0x7b4917007ed66293 } + }, + { + { 0x54341b28dd53a2dd, -0x55e86fa420bd03c1, 0x0ff592d94dd2f8f4, 0x1d03620fe08cd37d }, + { 0x2d85fb5cab84b064, 0x497810d289f3bc14, 0x476adc447b15ce0c, 0x122ba376f844fd7b }, + { -0x3dfdcd325d4b1aac, -0x612f02bdeea2e781, 0x2eabb4be7dd479d9, 0x02c70bf52b68ec4c } + }, + { + { -0x531acd40ba728d1f, 0x5be768e07cb73cb5, 0x56cf7d94ee8bbde7, 0x6b0697e3feb43a03 }, + { -0x5d7813b4a2f4d045, 0x415c5790074882ca, -0x1fbb59e13e2f7ea4, 0x26334f0a409ef5e0 }, + { -0x49370fb5209d5c40, 0x3ef000ef076da45d, -0x636346a7b60f2d57, 0x1cc37f43441b2fae } + }, + { + { -0x2899a90e36315147, 0x1c5b15f818e5656a, 0x26e72832844c2334, 0x3a346f772f196838 }, + { 0x508f565a5cc7324f, -0x2f9e3b3f1af956de, -0x04e75424a3ba53e7, 0x6c6809c10380314a }, + { -0x2d2aaeed1d259538, -0x1642fcce4e17ae13, -0x69f8b92271398d9e, 0x05911b9f6ef7c5d0 } + }, +}, +{ + { + { 0x01c18980c5fe9f94, -0x329a98968e902a38, -0x7e9fba3c2e6a5f7a, 0x6e2b7f3266cc7982 }, + { -0x162328a949c800d3, -0x13b3cb7036780f3c, -0x312a6d7a0c043849, 0x3305354793e1ea87 }, + { -0x337fdb97083ca971, -0x6216457de668b34d, -0x5448dd634a47eca0, 0x44e2017a6fbeba62 } + }, + { + { -0x7807d30c49359133, 0x580f893e18f4a0c2, 0x058930072604e557, 0x6cab6ac256d19c1d }, + { -0x3b3d58bcab25488c, -0x71a2b3c3b150fce6, -0x4893dc2dbd7c70e9, 0x749a098f68dce4ea }, + { -0x23201f5fd33e21a0, 0x032665ff51c5575b, 0x2c0c32f1073abeeb, 0x6a882014cd7b8606 } + }, + { + { -0x2eee2e8350b01492, 0x050bba42b33aa4a3, 0x17514c3ceeb46c30, 0x54bedb8b1bc27d75 }, + { -0x5ad56d015b8b804b, -0x23ed5bb6e05a5477, -0x27d256b447b85b32, 0x4d77edce9512cc4e }, + { 0x77c8e14577e2189c, -0x5c1b909500663bbb, 0x3144dfc86d335343, 0x3a96559e7c4216a9 } + }, + { + { 0x4493896880baaa52, 0x4c98afc4f285940e, -0x10b558645babb74a, 0x5278c510a57aae7f }, + { 0x12550d37f42ad2ee, -0x74871ffb675e040b, 0x5d53078233894cb2, 0x02c84e4e3e498d0c }, + { -0x5ab22f8bd6b3f46c, -0x0aa2b94720e7004a, -0x0f90133a72517c9a, 0x588657668190d165 } + }, + { + { -0x40a7cb0fc21da33d, -0x47783751297eab6a, 0x5105221a9481e892, 0x6760ed19f7723f93 }, + { -0x2b88edcee5108ee9, 0x50343101229e92c7, 0x7a95e1849d159b97, 0x2449959b8b5d29c9 }, + { 0x669ba3b7ac35e160, 0x2eccf73fba842056, 0x1aec1f17c0804f07, 0x0d96bc031856f4e7 } + }, + { + { -0x4e2acb4f338afa1f, 0x32cd003416c35288, -0x34c95a7ff89d3d63, 0x5bfe69b9237a0bf8 }, + { 0x3318be7775c52d82, 0x4cb764b554d0aab9, -0x5430c2d83388c26f, 0x3bf4d1848123288a }, + { 0x183eab7e78a151ab, -0x44166f3666f6c89d, -0x008e8291b5381ccb, 0x4c5cddb325f39f88 } + }, + { + { 0x57750967e7a9f902, 0x2c37fdfc4f5b467e, -0x4d9e99c5ce8845ba, 0x3a375e78dc2d532b }, + { -0x3f0948b29e6f5915, 0x20ea81a42db8f4e4, -0x5742908268cea8a0, 0x33b1d60262ac7c21 }, + { -0x7ebe18d0d2b22216, -0x191501679d39f838, 0x23c28458573cafd0, 0x46b9476f4ff97346 } + }, + { + { 0x1215505c0d58359f, 0x2a2013c7fc28c46b, 0x24a0a1af89ea664e, 0x4400b638a1130e1f }, + { 0x0c1ffea44f901e5c, 0x2b0b6fb72184b782, -0x1a78006efeeb2478, 0x37130f364785a142 }, + { 0x3a01b76496ed19c3, 0x31e00ab0ed327230, 0x520a885783ca15b1, 0x06aab9875accbec7 } + }, +}, +{ + { + { 0x5349acf3512eeaef, 0x20c141d31cc1cb49, 0x24180c07a99a688d, 0x555ef9d1c64b2d17 }, + { -0x3ecc667c0a20f145, -0x3f0c8a70aed3b354, 0x2cf1130a0bb398e1, 0x6b3cecf9aa270c62 }, + { 0x36a770ba3b73bd08, 0x624aef08a3afbf0c, 0x5737ff98b40946f2, 0x675f4de13381749d } + }, + { + { -0x5ed00926c4254ce3, 0x0725d80f9d652dfe, 0x019c4ff39abe9487, 0x60f450b882cd3c43 }, + { 0x0e2c52036b1782fc, 0x64816c816cad83b4, -0x2f234226969bf8c2, 0x13d99df70164c520 }, + { 0x014b5ec321e5c0ca, 0x4fcb69c9d719bfa2, 0x4e5f1c18750023a0, 0x1c06de9e55edac80 } + }, + { + { -0x002ad4bf00929656, 0x34530b18dc4049bb, 0x5e4a5c2fa34d9897, 0x78096f8e7d32ba2d }, + { -0x66f085295cc13b1e, 0x6608f938be2ee08e, -0x635ebc3a9cd7baeb, 0x4cf38a1fec2db60d }, + { -0x5f55559af205a319, -0x063b61d5b74ab874, 0x4f09cc7d7003725b, 0x373cad3a26091abe } + }, + { + { -0x0e41570476224453, 0x3bcb2cbc61aeaecb, -0x70a75844e0647263, 0x21547eda5112a686 }, + { -0x4d6b9cb27d360a84, 0x1fcbfde124934536, -0x6163b24cbe7324a6, 0x0040f3d9454419fc }, + { -0x210216c602a6792d, -0x0bd8d376aef5c7f4, -0x48d45bf844cee647, 0x63550a334a254df4 } + }, + { + { -0x6445a7ba8dab84b7, -0x0cfa39051d3bf720, 0x60e8fa69c734f18d, 0x39a92bafaa7d767a }, + { 0x6507d6edb569cf37, 0x178429b00ca52ee1, -0x1583ff6f149429a3, 0x3eea62c7daf78f51 }, + { -0x62db38ec196cd8b2, 0x5f63857768dbd375, 0x70525560eb8ab39a, 0x68436a0665c9c4cd } + }, + { + { 0x1e56d317e820107c, -0x3ad997bb7bf5169b, -0x3e1f5e39cdf00386, 0x5373669c91611472 }, + { -0x43fdca17dfd0c0d9, -0x38a3ff1d9b068a50, -0x6e5b162a5c73dbea, 0x17b6e7f68ab789f9 }, + { 0x5d2814ab9a0e5257, -0x6f70df7b36354c04, -0x50350a77a4d2e136, 0x1cb4b5a678f87d11 } + }, + { + { 0x6b74aa62a2a007e7, -0x0cee1f4f0f8e384f, 0x5707e438000be223, 0x2dc0fd2d82ef6eac }, + { -0x499b3f94c6b50394, 0x0c88de2498da5fb1, 0x4f8d03164bcad834, 0x330bca78de7434a2 }, + { -0x67d1007beee68bb2, -0x0696a169d4f8b8dc, -0x3a753eb04036ac05, 0x3c31be1b369f1cf5 } + }, + { + { -0x3e97436c0634bd8e, -0x51478ee038312468, 0x7f0e52aa34ac8d7a, 0x41cec1097e7d55bb }, + { -0x4f0b79b2f76b7512, 0x07dc19ee91ba1c6f, 0x7975cdaea6aca158, 0x330b61134262d4bb }, + { -0x0869e6285d927f76, -0x44e02b61e261ea93, 0x73d7c36cdba1df27, 0x26b44cd91f28777d } + }, +}, +{ + { + { -0x50bb7bd24fd7a0c9, -0x78ace76fb8103721, -0x6a8b1f6e07df6866, 0x0e378d6069615579 }, + { 0x300a9035393aa6d8, 0x2b501131a12bb1cd, 0x7b1ff677f093c222, 0x4309c1f8cab82bad }, + { -0x26056e8e7cf8a5ab, 0x4bdb5ad26b009fdc, 0x7829ad2cd63def0e, 0x078fc54975fd3877 } + }, + { + { -0x1dffb4a447cc5676, 0x44775dec2d4c3330, 0x3aa244067eace913, 0x272630e3d58e00a9 }, + { -0x782042ebd77870d3, 0x134636dd1e9421a1, 0x4f17c951257341a3, 0x5df98d4bad296cb8 }, + { -0x0c98702f1336f4ac, -0x0ffeba64edfbca67, 0x26725fbc3758b89b, 0x4325e4aa73a719ae } + }, + { + { -0x12db9d6530960a63, 0x2a4a1ccedd5abbf4, 0x3535ca1f56b2d67b, 0x5d8c68d043b1b42d }, + { 0x657dc6ef433c3493, 0x65375e9f80dbf8c3, 0x47fd2d465b372dae, 0x4966ab79796e7947 }, + { -0x11ccd2b21c4bd4f6, -0x27b1a5d4e95b9fe4, 0x78243877078ba3e4, 0x77ed1eb4184ee437 } + }, + { + { 0x185d43f89e92ed1a, -0x4fb5e11501b8e63a, 0x499fbe88a6f03f4f, 0x5d8b0d2f3c859bdd }, + { -0x402b1ec0dfe7c660, -0x5110001dc1c20e9f, -0x49a4fb0f94a2e01d, 0x52e085fb2b62fbc0 }, + { 0x124079eaa54cf2ba, -0x28db9a14ffe4d919, 0x6843bcfdc97af7fd, 0x0524b42b55eacd02 } + }, + { + { -0x43e72352647d6154, 0x23ae7d28b5f579d0, -0x3cb9edd596c7bdcd, 0x1a6110b2e7d4ac89 }, + { -0x02f2a2411babb850, 0x6cec351a092005ee, -0x665b87bba98a8635, 0x59d242a216e7fa45 }, + { 0x4f833f6ae66997ac, 0x6849762a361839a4, 0x6985dec1970ab525, 0x53045e89dcb1f546 } + }, + { + { -0x7b25c32172ba01ee, -0x42bd3de71bbb1d2e, -0x57ae6987e081ca68, 0x7642c93f5616e2b2 }, + { -0x34744cb928acac25, -0x03034db451aee1de, -0x345b72bf2af51911, 0x26e3bae5f4f7cb5d }, + { 0x2323daa74595f8e4, -0x219773747a85414c, 0x3fc48e961c59326e, 0x0b2e73ca15c9b8ba } + }, + { + { 0x0e3fbfaf79c03a55, 0x3077af054cbb5acf, -0x2a3aadba24c21c61, 0x015e68c1476a4af7 }, + { -0x2944bbd73e80afda, -0x614d8ddc04a56359, -0x1c845afce6e639bc, 0x21ce380db59a6602 }, + { -0x3e2ad7addff995c8, -0x6a9fc1adca8f510d, -0x7cd9a658dd9475b3, 0x5dd689091f8eedc9 } + }, + { + { 0x1d022591a5313084, -0x35d2b55129d8f78e, -0x795ed47ad0f402e0, 0x56e6c439ad7da748 }, + { -0x34537b21402c37aa, 0x1624c348b35ff244, -0x48077235a26352f9, 0x3b0e574da2c2ebe8 }, + { -0x38fb00b6bd42451a, 0x5e21ade2b2de1f79, -0x16a24c0ca9ad0528, 0x0822b5378f08ebc1 } + }, +}, +{ + { + { -0x1e480d6c9d8cfc7d, 0x4b5279ffebca8a2c, -0x25038875402becec, 0x7deb10149c72610f }, + { 0x51f048478f387475, -0x4da2430b634134c4, -0x6554edbb2660dfab, 0x2c709e6c1c10a5d6 }, + { -0x349d509578991186, 0x66cbec045553cd0e, 0x588001380f0be4b5, 0x08e68e9ff62ce2ea } + }, + { + { 0x2f2d09d50ab8f2f9, -0x5346de723aa6dc21, 0x4a8f342673766cb9, 0x4cb13bd738f719f5 }, + { 0x34ad500a4bc130ad, -0x72c724b6c2f42b64, -0x5da3c267aff57642, 0x2f1f3f87eeba3b09 }, + { -0x087b738a1aea49b6, -0x5a6afe4524b56fc8, -0x3df2cec0c08ae4b0, 0x19a1e353c0ae2ee8 } + }, + { + { -0x4bde8d322a694243, -0x6c1fbabc671103c0, -0x604eacb84bbef64b, 0x736bd3990266ae34 }, + { 0x7d1c7560bafa05c3, -0x4c1e5f5f391aa19f, -0x1cad68e73f299b8d, 0x41546b11c20c3486 }, + { -0x7aacd2af6ccb4c4c, 0x46fd114b60816573, -0x33a0a0cfbda37c8b, 0x412295a2b87fab5c } + }, + { + { 0x2e655261e293eac6, -0x7ba56dfcdecc5325, 0x460975cb7900996b, 0x0760bb8d195add80 }, + { 0x19c99b88f57ed6e9, 0x5393cb266df8c825, 0x5cee3213b30ad273, 0x14e153ebb52d2e34 }, + { 0x413e1a17cde6818a, 0x57156da9ed69a084, 0x2cbf268f46caccb1, 0x6b34be9bc33ac5f2 } + }, + { + { 0x11fc69656571f2d3, -0x393617baacf18c86, -0x1cc5185d2b01afcb, 0x01b9c7b62e6dd30b }, + { -0x0c20d09bc5873f4e, 0x4c3e971ef22e027c, -0x1382e3a1b63e4a5d, 0x2012c18f0922dd2d }, + { -0x77f4aa1aa53762d7, 0x1483241f45a0a763, 0x3d36efdfc2e76c1f, 0x08af5b784e4bade8 } + }, + { + { -0x1d8ceb2d7633d3b5, 0x4be4bd11a287178d, 0x18d528d6fa3364ce, 0x6423c1d5afd9826e }, + { 0x283499dc881f2533, -0x62fada25886cdc4a, -0x7685220498cbbe0c, 0x32b79d71163a168d }, + { -0x337a072612034c96, 0x22bcc28f3746e5f9, -0x1b621cc7061a2c33, 0x480a5efbc13e2dcc } + }, + { + { -0x499eb31bbd31dde1, 0x6e199dcc4c053928, 0x663fb4a4dc1cbe03, 0x24b31d47691c8e06 }, + { 0x0b51e70b01622071, 0x06b505cf8b1dafc5, 0x2c6bb061ef5aabcd, 0x47aa27600cb7bf31 }, + { 0x2a541eedc015f8c3, 0x11a4fe7e7c693f7c, -0x0f5099ecb15d872a, 0x545b585d14dda094 } + }, + { + { 0x6204e4d0e3b321e1, 0x3baa637a28ff1e95, 0x0b0ccffd5b99bd9e, 0x4d22dc3e64c8d071 }, + { 0x67bf275ea0d43a0f, -0x521971cbf7641142, 0x4289134cd479e72e, 0x0f62f9c332ba5454 }, + { -0x034b9a7629c4a0c7, 0x5cae6a3f57cbcf61, -0x01453d2d6ac505fb, 0x1c0fa01a36371436 } + }, +}, +{ + { + { -0x3ee11a17ab3ac052, 0x6a0b06c12b4f3ff4, 0x33540f80e0b67a72, 0x15f18fc3cd07e3ef }, + { -0x18ab8bb64383296e, 0x0f9abeaae6f73ddf, 0x4af01ca700837e29, 0x63ab1b5d3f1bc183 }, + { 0x32750763b028f48c, 0x06020740556a065f, -0x2ac427ed3cb6a4a8, 0x08706c9b865f508d } + }, + { + { -0x3366e4bec74bedba, 0x243b9c526f9ac26b, -0x4610b6b248345443, 0x5fba433dd082ed00 }, + { -0x0c835d54c2cbc201, 0x1a8c6a2d80abc617, -0x71b61fca2b330036, 0x48b46beebaa1d1b9 }, + { -0x63b61caa366be530, -0x468cb5218bb6707c, 0x41c3fed066663e5c, 0x0ecfedf8e8e710b3 } + }, + { + { 0x744f7463e9403762, -0x0865721172033637, 0x163a649655e4cde3, 0x3b61788db284f435 }, + { 0x76430f9f9cd470d9, -0x49d533645bd09ff8, 0x1898297c59adad5e, 0x7789dd2db78c5080 }, + { -0x4dddd7e6f291094e, -0x56b5994db931b406, 0x46c1a77a4f0b6cc7, 0x4236ccffeb7338cf } + }, + { + { 0x3bd82dbfda777df6, 0x71b177cc0b98369e, 0x1d0e8463850c3699, 0x5a71945b48e2d1f1 }, + { -0x7b68bfb2f2aa1d8c, 0x6c6663d9c4ad2b53, -0x13d04f265256a8cc, 0x2617e120cdb8f73c }, + { 0x6f203dd5405b4b42, 0x327ec60410b24509, -0x63cb8dcf53d577ba, 0x77de29fc11ffeb6a } + }, + { + { -0x7ca1ec7013312d36, -0x736150ec1569c466, -0x36a0403f4de9f15a, 0x575e66f3ad877892 }, + { -0x4f53a8367c488758, 0x53cdcca9d7fe912c, 0x61c2b854ff1f59dc, 0x3a1a2cf0f0de7dac }, + { -0x667fc5d8377034c6, 0x345a6789275ec0b0, 0x459789d0ff6c2be5, 0x62f882651e70a8b2 } + }, + { + { 0x6d822986698a19e0, -0x2367de1e8b28758f, 0x41a85f31f6cb1f47, 0x352721c2bcda9c51 }, + { 0x085ae2c759ff1be4, 0x149145c93b0e40b7, -0x3b981805800d8c87, 0x4eeecf0ad5c73a95 }, + { 0x48329952213fc985, 0x1087cf0d368a1746, -0x71ad9e4e993ea55b, 0x2d5b2d842ed24c21 } + }, + { + { 0x5eb7d13d196ac533, 0x377234ecdb80be2b, -0x1ebb3003830a51dc, 0x5226bcf9c441acec }, + { 0x02cfebd9ebd3ded1, -0x2ba4de88c6fde68c, 0x7576f813fe30a1b7, 0x5691b6f9a34ef6c2 }, + { 0x79ee6c7223e5b547, 0x6f5f50768330d679, -0x128c1e1692752317, 0x27c3da1e1d8ccc03 } + }, + { + { 0x28302e71630ef9f6, -0x3d2b5dfcd49b3120, 0x090820304b6292be, 0x5fca747aa82adf18 }, + { 0x7eb9efb23fe24c74, 0x3e50f49f1651be01, 0x3ea732dc21858dea, 0x17377bd75bb810f9 }, + { 0x232a03c35c258ea5, -0x790dc5d39434f30f, 0x3dad8d0d2e442166, 0x04a8933cab76862b } + }, +}, +{ + { + { 0x69082b0e8c936a50, -0x06365fca3e253a4a, 0x6fb73e54c4dfb634, 0x4005419b1d2bc140 }, + { -0x2d39fb49dd6bc201, -0x43734131bb304c60, 0x5d254ff397808678, 0x0fa3614f3b1ca6bf }, + { -0x5ffc014246417d10, 0x2089c1af3a44ac90, -0x07b6606ee6ab0572, 0x1fba218aef40ab42 } + }, + { + { 0x4f3e57043e7b0194, -0x57e2c111f7255081, -0x37c639546623210f, 0x6c535d13ff7761d5 }, + { -0x54ab6bb705370ac2, -0x7e0917658459c8bf, 0x74fd6c7d6c2b5e01, 0x392e3acaa8c86e42 }, + { 0x4cbd34e93e8a35af, 0x2e0781445887e816, 0x19319c76f29ab0ab, 0x25e17fe4d50ac13b } + }, + { + { -0x6ea0800a890ede59, -0x3cb5cdd8d032781d, -0x3345d021b2e41ada, 0x6bba828f8969899b }, + { 0x0a289bd71e04f676, 0x208e1c52d6420f95, 0x5186d8b034691fab, 0x255751442a9fb351 }, + { -0x1d2e43996f01c6ff, 0x4cb54a18a0997ad5, -0x68e296eb507b9f2c, 0x559d504f7f6b7be4 } + }, + { + { -0x63b76e18092d9903, 0x0744a19b0307781b, -0x77c770e29f9e1dc5, 0x123ea6a3354bd50e }, + { -0x588c7c874c14ab2b, 0x1d69d366a5553c7c, 0x0a26cf62f92800ba, 0x01ab12d5807e3217 }, + { 0x118d189041e32d96, -0x46121c3d27cea7b8, 0x1eab4271d83245d9, 0x4a3961e2c918a154 } + }, + { + { 0x0327d644f3233f1e, 0x499a260e34fcf016, -0x7c4a58e90d254687, 0x68aceead9bd4111f }, + { 0x71dc3be0f8e6bba0, -0x293107cb81001cf6, -0x566dbda01ec5b896, 0x2cd6bce3fb1db763 }, + { 0x38b4c90ef3d7c210, 0x308e6e24b7ad040c, 0x3860d9f1b7e73e23, 0x595760d5b508f597 } + }, + { + { -0x77d5341402fdd870, -0x7650ccfa3beea8a0, 0x65f492e37d3473f4, 0x2cb2c5df54515a2b }, + { 0x6129bfe104aa6397, -0x7069fff75b580335, 0x3f8bc0897d909458, 0x709fa43edcb291a9 }, + { -0x14f5a2739c02d536, -0x2dd43e99d196b101, 0x2723f36ef8cbb03a, 0x70f029ecf0c8131f } + }, + { + { 0x2a6aafaa5e10b0b9, 0x78f0a370ef041aa9, 0x773efb77aa3ad61f, 0x44eca5a2a74bd9e1 }, + { 0x461307b32eed3e33, -0x51fbd0cc5baa7e19, -0x36bbb62ce6a0fc9a, 0x0b7d5d8a6c314858 }, + { 0x25d448327b95d543, 0x70d38300a3340f1d, -0x21e3ace39f1e3ad5, 0x272224512c7de9e4 } + }, + { + { -0x40844475bd568a04, -0x73a3c68869525ca8, -0x1d803890321255b8, 0x19735fd7f6bc20a6 }, + { 0x1abc92af49c5342e, -0x001127ee4d190530, -0x105d73720337b1d7, 0x11b5df18a44cc543 }, + { -0x1c546f2fbd37bd9a, -0x147b71f080e6ab82, 0x2503a1d065a497b9, 0x0fef911191df895f } + }, +}, +{ + { + { 0x6ab5dcb85b1c16b7, -0x6b3f0317c384d85b, -0x5b4ee3e58caae842, 0x499238d0ba0eafaa }, + { -0x4eaf835e54e39147, -0x42bb70c1e949784d, 0x3455fb7f2c7a91ab, 0x7579229e2f2adec1 }, + { -0x130b91ad854574a9, 0x15a08c478bd1647b, 0x7af1c6a65f706fef, 0x6345fa78f03a30d5 } + }, + { + { -0x6c2c341642270f5c, -0x24ead3e402e88cfe, 0x7dbddc6d7f17a875, 0x3e1a71cc8f426efe }, + { -0x20fd06a0efea185f, 0x790ec41da9b40263, 0x4d3a0ea133ea1107, 0x54f70be7e33af8c9 }, + { -0x37c35c1c6f45429e, -0x7f121c98fd6e37cd, -0x377fc7332c86ff3c, 0x2c5fc0231ec31fa1 } + }, + { + { -0x3bdd1b2efdba919b, -0x78beb53e352b846f, 0x1592e2bba2b6ffdd, 0x75d9d2bff5c2100f }, + { -0x01456ee8e8fc74b1, -0x1aedc8de3621107f, 0x1c97e4e75d0d8834, 0x68afae7a23dc3bc6 }, + { 0x5bd9b4763626e81c, -0x766996c9435fd123, 0x0a41193d61f077b3, 0x3097a24200ce5471 } + }, + { + { -0x5e9d18db996a3b7a, 0x131d633435a89607, 0x30521561a0d12a37, 0x56704bada6afb363 }, + { 0x57427734c7f8b84c, -0x0ebe5ec1fe4d8f17, 0x02d1adfeb4e564a6, 0x4bb23d92ce83bd48 }, + { -0x5093b558ad06ed47, 0x5e665f6cd86770c8, 0x4c35ac83a3c8cd58, 0x2b7a29c010a58a7e } + }, + { + { 0x33810a23bf00086e, -0x50316da118c90084, 0x3d60e670e24922d4, 0x11ce9e714f96061b }, + { -0x3bff80882f3e313d, -0x72efdf49453b6d08, 0x32ec29d57e69daaf, 0x599408759d95fce0 }, + { 0x219ef713d815bac1, -0x0ebeb9a2b7a41da4, 0x6d5447cc4e513c51, 0x174926be5ef44393 } + }, + { + { 0x3ef5d41593ea022e, 0x5cbcc1a20ed0eed6, -0x702db130f8c7d374, 0x6fa42ead06d8e1ad }, + { -0x4a214d0603a42a45, -0x6d2558d51e27ef1f, -0x503b302348d5e3a7, 0x497d78813fc22a24 }, + { -0x1d897db5e08cc8e1, 0x7f7cf01c4f5b6736, 0x7e201fe304fa46e7, 0x785a36a357808c96 } + }, + { + { 0x070442985d517bc3, 0x6acd56c7ae653678, 0x00a27983985a7763, 0x5167effae512662b }, + { -0x7da042029cfeb2d5, -0x37adc9639358a875, 0x5b2fcd285c0b5df0, 0x12ab214c58048c8f }, + { -0x42b1561ef0ac3b4a, 0x1673dc5f8ac91a14, -0x5707e5b1d533e546, 0x33a92a7924332a25 } + }, + { + { 0x7ba95ba0218f2ada, -0x300bdd78ccf04636, -0x2525b692a93926f9, 0x5380c296f4beee54 }, + { -0x622e0b66d86693fe, 0x0cb3b058e04d1752, 0x1f7e88967fd02c3e, 0x2f964268cb8b3eb1 }, + { -0x62b0d8fb997672f6, 0x3d0987990aff3f7a, -0x2f610c9d982545bb, 0x7761455e7b1c669c } + }, +}, +}; +#elif defined(CURVED25519_ASM_32BIT) +static const ge_precomp base[64][8] = { +{ + { + { -0x0a73c47b, 0x2fbc93c6, -0x0473f1e7, -0x306cd23a, 0x643d42c2, 0x270b4898, 0x33d4ba65, 0x07cf9d3a }, + { -0x28bf6ec2, -0x62efc6fb, -0x2ebf414d, -0x02c660fb, 0x688f8a09, -0x5a3e7bcc, -0x6707ed99, 0x44fd2f92 }, + { 0x4b6fbb59, -0x2442ea99, -0x115d5a16, 0x41e13f00, -0x36a83906, -0x322b62e4, -0x50e91336, 0x4f0ebe1f } + }, + { + { -0x6cc38e29, -0x6ddb1804, 0x7a0ff5b5, -0x60b9626a, -0x1e29f8fe, 0x5aa69a65, -0x5782d1d2, 0x590c063f }, + { 0x42b4d5a8, -0x75665aa0, 0x4e60acf6, -0x70d47ef4, -0x4e91c856, -0x1f61dc95, 0x69c92555, 0x6bb595a6 }, + { -0x252c97fe, 0x6e347eaa, -0x7c11b7fb, -0x450ca66d, -0x19f897da, 0x3bcabe10, 0x165ed1b8, 0x49314f0a } + }, + { + { 0x4cee9730, -0x50da4f58, -0x1779b476, 0x025a8430, -0x60fe98ce, -0x3ee4affe, -0x657f070c, 0x7a164e1b }, + { -0x5b032d9b, 0x56611fe8, -0x1a3e4583, 0x3bd353fd, 0x214bd6bd, -0x7ece0ce6, 0x555bda62, 0x2ab91587 }, + { -0x0e98b7cc, -0x640dee0c, -0x09d2076b, -0x47b194e9, 0x5b722a4e, -0x282190f9, 0x63bb2a21, 0x549a04b9 } + }, + { + { -0x7103f661, 0x287351b9, 0x7dfd2538, 0x6765c6f4, -0x04f56d9b, -0x35cb72c3, 0x21e58727, 0x680e9103 }, + { 0x056818bf, -0x6a01faf6, 0x5660faa9, 0x327e8971, 0x06a05073, -0x3c171c33, 0x7445a49a, 0x27933f4c }, + { -0x1aebd950, -0x40e1ba14, 0x6dba0f94, -0x1cd439c3, -0x7307ad40, -0x1bd68b2b, -0x4f19b3e8, 0x44f079b1 } + }, + { + { 0x08a5bb33, -0x5ded43bc, -0x38a112fe, -0x72afb73d, 0x5abfec44, -0x22e414f4, 0x46e206eb, 0x2945ccf1 }, + { -0x5bb82946, 0x7f9182c3, 0x4b2729b7, -0x2affeb2f, -0x479b5f79, -0x1cc30ee4, -0x14e4aa0d, 0x154a7e73 }, + { -0x182ffc4d, -0x37cd5e87, 0x00124d7e, 0x5f729d0a, 0x0e6d8ff3, 0x62c1d4a1, 0x38b27a98, 0x68b8ac59 } + }, + { + { 0x77157131, 0x3a0ceeeb, 0x00c8af88, -0x64d8ea77, -0x25a658ca, -0x7f9a4998, -0x5d33c743, 0x51e57bb6 }, + { 0x7b7d8ca4, 0x499806b6, 0x27d22739, 0x575be284, 0x204553b9, -0x44f7a319, -0x51be877c, 0x38b64c41 }, + { 0x689de3a4, -0x7062526f, -0x07046ec9, 0x175f2428, -0x60304678, 0x050ab532, 0x1354c09f, 0x7865dfa2 } + }, + { + { -0x6bb15c41, 0x6b1a5cd0, -0x4c623f2e, 0x7470353a, 0x28542e49, 0x71b25282, 0x283c927e, 0x461bea69 }, + { -0x55cdde4f, -0x4590d366, 0x3bba23a7, 0x6ca02153, -0x6de6d3c6, -0x621589b1, 0x2e5317e0, 0x1d6edd5d }, + { -0x54f025ca, 0x217a8aac, 0x3d3549c8, -0x5ad739ac, 0x13ab7568, 0x37d05b8b, 0x3a2cbc37, 0x233cef62 } + }, + { + { 0x04dd3e8f, 0x59b75966, -0x1d778fd4, 0x6cb30377, 0x5ed9c323, -0x4ecc639a, 0x61bce52f, 0x0915e760 }, + { -0x0c6dcb27, -0x1d58a213, -0x1e4aa707, -0x69c28980, 0x6e3c23fb, 0x2c2741ac, 0x320e01c3, 0x3a9024a1 }, + { -0x57cb5c82, -0x208217cb, 0x689857ea, -0x741e6326, 0x7167b326, 0x2c118536, -0x24102a3e, 0x589eb3d9 } + }, +}, +{ + { + { 0x2d9021f6, 0x322d04a5, 0x75c6bf9c, -0x463e60cd, 0x42d20b09, 0x587a3a43, -0x559b019f, 0x143b1cf8 }, + { 0x553e2df3, 0x7ec851ca, -0x59b7874d, -0x58ed7b35, 0x3288d1e7, -0x194a1be7, 0x5a9a8883, 0x4cf210ec }, + { -0x69753555, -0x60798383, 0x27092729, 0x5f54258e, -0x15e7f68b, -0x2f582cb5, 0x374126e1, 0x21b546a3 } + }, + { + { -0x2e7ade71, 0x490a7a45, 0x46049335, -0x65eac888, -0x33ce1e0a, 0x0060ea09, -0x0791169b, 0x7e041577 }, + { -0x5d777cbd, -0x56b007a8, 0x5313ed3c, -0x31f12baa, -0x4a40cb06, -0x0aa3c231, -0x36154c8f, 0x0a653ca5 }, + { -0x31a4980d, 0x66b2a496, -0x42a9686a, -0x00ab6d28, 0x4a592cd0, 0x503cec29, 0x0813acb2, 0x56694365 } + }, + { + { 0x1dabb69d, 0x5672f9eb, -0x5017ac04, -0x458f4acb, 0x2796d66d, 0x47ac0f75, -0x6bee8d8b, 0x32a53517 }, + { 0x26620798, -0x47e724f4, 0x606e354a, 0x5d5c31d9, 0x00a8cdc7, 0x0982fa4f, 0x4653e2d4, 0x17e12bcd }, + { -0x209b7bc9, -0x2c59bb5a, -0x77f04023, 0x703b6559, -0x52c5e55b, -0x347adac0, -0x71b39b98, 0x0900b3f7 } + }, + { + { -0x37e952cf, -0x12d7f042, -0x2719101d, 0x52d9595b, -0x0939dc0b, 0x0fe71772, 0x051e293c, 0x4314030b }, + { 0x679d651b, 0x0a851b9f, 0x033342f2, -0x1ef7349f, -0x1774cf5d, -0x29fe0a81, -0x12d228ec, 0x371f3aca }, + { -0x040f4353, -0x2a9fffa2, -0x2e78f3a2, -0x7148f0d2, -0x2f7b1960, 0x201f9033, -0x31849990, 0x4c3a5ae1 } + }, + { + { -0x36c25f23, -0x45078a1c, 0x71b9294d, -0x46cd7d59, -0x0b393ba0, -0x7f29c049, -0x15993e7f, 0x6de9c73d }, + { -0x2347056b, 0x4138a434, 0x6c96840b, -0x78f30983, 0x297be82c, -0x21c77a8c, 0x7262a55a, 0x7c814db2 }, + { -0x5fb2070e, 0x478904d5, -0x4efebd2d, -0x050451b6, 0x555d0998, -0x0937539d, 0x2f90b104, 0x5aac4a41 } + }, + { + { -0x4280aecc, 0x603a0d0a, -0x1e2c51ba, -0x7f7636ce, -0x7867429d, -0x20da6ec7, 0x74ba0235, 0x1c145cd2 }, + { 0x3ac92908, -0x39b0cd95, -0x199c1e20, 0x5551b282, 0x4a1a4b83, 0x476b35f5, 0x189f68c2, 0x1b9da3fe }, + { 0x75f3d743, 0x32e83864, 0x6ae5d9ef, 0x365b8baf, 0x385b681e, -0x7dadc74a, 0x167d65e1, 0x234929c1 } + }, + { + { 0x1d099fcf, 0x48145cc2, -0x33d7281b, 0x4535c192, 0x48247e01, -0x7f183e1b, 0x3b2973ee, 0x4a5f2874 }, + { -0x5f885218, -0x67b21355, 0x19eb389d, 0x383f77ad, 0x2954d794, -0x38139482, -0x1483c586, 0x59c77b3a }, + { 0x225ccf62, -0x2c5228db, -0x4dead3a3, -0x6ee5cc7f, 0x5b08f87d, -0x274c6053, 0x4799fe3b, 0x6f05606b } + }, + { + { -0x06e49b7d, 0x5b433149, 0x5a2cbf62, -0x524a239b, 0x632827b3, -0x78057bee, -0x54b60728, 0x60895e91 }, + { 0x177ba962, -0x6001616e, 0x0de5cae1, -0x675118e3, 0x2d831044, 0x3ff4ae94, 0x58533ac8, 0x714de12e }, + { 0x0cf86c18, -0x16130d13, 0x0735dfd4, -0x4b92f9ee, 0x04b96be7, -0x43625f68, -0x26923d95, 0x73e2e62f } + }, +}, +{ + { + { 0x632f9c1d, 0x2eccdd0e, 0x76893115, 0x51d0b696, -0x579c85a8, 0x52dfb76b, -0x5ff110c7, 0x6dd37d49 }, + { 0x49aa515e, -0x12a49cac, 0x0bc6823a, -0x579a3b61, 0x5b42d1c4, -0x7af3e017, 0x03d315b9, 0x30d76d6f }, + { 0x2106e4c7, 0x6c444417, -0x6d728097, -0x04ac2980, 0x694d3f26, -0x4b8c615c, 0x2e864bb0, 0x10c69711 } + }, + { + { -0x7ca737fb, 0x0ca62aa0, 0x7a204247, 0x6a3d4ae3, 0x3b11eddc, 0x7464d3a6, 0x550806ef, 0x03bf9baf }, + { 0x7dbe5fde, 0x6493c427, 0x19ad7ea2, 0x265d4fad, 0x46304590, 0x0e00dfc8, -0x129901f7, 0x25e61cab }, + { -0x33a799fc, 0x3f13e128, -0x4ba68b82, 0x6f5873ec, -0x33ed970b, -0x5f49c213, 0x4586e22c, 0x566d7863 } + }, + { + { -0x39a5d030, -0x5efabd7b, -0x0ce9983d, 0x6c64112a, 0x731aee58, 0x680ae240, 0x4793b22a, 0x14fba5f3 }, + { -0x633ef7cc, 0x1637a49f, -0x57643baf, -0x4371a92b, 0x7f7fd2db, 0x1cb5ec0f, 0x5ecc35d9, 0x33975bca }, + { 0x6985f7d4, 0x3cd74616, -0x3637ffa9, 0x593e5e84, 0x7b61131e, 0x2fc3f2b6, -0x7c03ad94, 0x14829cea } + }, + { + { 0x4e71ecb8, 0x21e70b2f, 0x40a477e3, -0x19a92247, -0x31e2b080, -0x409aa932, 0x535d7b7e, 0x05fc3bc4 }, + { -0x68226a3e, -0x00bc847c, -0x55b14a59, 0x6c744e30, 0x3c85e88b, -0x61f3a29f, 0x5f758173, 0x2fd9c71e }, + { 0x52afdedd, 0x24b8b3ae, -0x12c4cf31, 0x3495638c, -0x56417e6b, 0x33a4bc83, 0x5c651f04, 0x37376747 } + }, + { + { 0x14246590, 0x634095cb, 0x16c15535, -0x10edebc0, -0x76ef43a0, -0x61c7ebf4, 0x30907c8c, 0x6bf59057 }, + { 0x40d1add9, 0x2fba99fd, -0x690b2fd9, -0x4cf8e991, 0x15f03bae, 0x4363f052, 0x3b18f999, 0x1fbea56c }, + { -0x1ebea476, 0x0fa778f1, -0x453c5882, 0x06409ff7, -0x655d65b0, 0x6f52d7b8, 0x7a635a56, 0x02521cf6 } + }, + { + { 0x772f5ee4, -0x4eeb98e0, -0x69f86532, -0x17076b4f, 0x00ac824a, 0x4af8224d, -0x0832933c, 0x001753d9 }, + { 0x0a9d5294, 0x513fee0b, 0x0fdf5a66, -0x706718a4, -0x401ef832, -0x2b9e7978, 0x71382ced, 0x3fa00a7e }, + { -0x69c224cc, 0x3c69232d, -0x4b68c7a8, 0x1dde87da, -0x5f6e0d7b, -0x55282e07, -0x5fb7124a, 0x12b5fe2f } + }, + { + { -0x5290e16e, -0x20d483da, 0x504b8913, 0x4b66d323, 0x751c8bc3, -0x73bf6240, 0x0796c7b8, 0x6f7e93c2 }, + { -0x69031cb3, 0x71f0fbc4, -0x520ca413, 0x73b9826b, -0x00d73a9f, -0x2dfb8d9f, 0x6fb1206f, 0x749b76f9 }, + { -0x515951fb, 0x1f5af604, -0x411b6367, -0x3edcae0f, -0x1100949a, 0x61a808b5, 0x01e02151, 0x0fcec10f } + }, + { + { -0x3bdbb1bb, 0x3df2d29d, -0x6c2721f6, 0x2b020e74, -0x7df3deb3, 0x6cc8067e, 0x6feab90a, 0x41377916 }, + { 0x49fe1e44, 0x644d58a6, 0x31ad777e, 0x21fcaea2, -0x77802f2e, 0x02441c5a, -0x7c3aee0d, 0x4901aa71 }, + { -0x73e50710, 0x08b1b754, 0x246299b4, -0x31f08584, 0x1e06d939, -0x089f4f07, 0x726d1213, 0x41bb887b } + }, +}, +{ + { + { -0x55c6082e, -0x68267f20, 0x52c6b51c, 0x35d03842, 0x07cd55aa, 0x7d43f493, -0x48753c9e, 0x56bd36cf }, + { 0x567c49d8, -0x6d987f94, -0x3586e196, 0x066d04cc, -0x1c33c6b5, -0x5960a9bb, -0x5f87732e, 0x5c95b686 }, + { 0x0d14a954, 0x2ac519c1, -0x6b4a0570, -0x150b8b4c, -0x560785a6, -0x19507c7e, -0x78641f6c, 0x0dea6db1 } + }, + { + { -0x29578686, 0x15baeb74, -0x053be8ce, 0x7ef55cf1, 0x3c8b05c5, 0x29001f5a, 0x52eaccfb, 0x0ad7cc87 }, + { 0x7344e5ab, -0x559940ac, -0x70e4bcf7, -0x25eda778, -0x02a9b4d1, 0x5e87d2b3, 0x5483b1dd, 0x5b2c7888 }, + { 0x793408cf, 0x52151362, 0x19963d94, -0x14f0e8fd, -0x77c26b9a, -0x57cc4d06, 0x75003c78, 0x093a7fa7 } + }, + { + { 0x60a91286, -0x47169fbc, 0x7778d3de, 0x7f3fd804, -0x4075a1d3, 0x67d01e31, -0x3d849ac2, 0x7b038a06 }, + { 0x3a16d7be, -0x1aef821a, -0x650ccd31, -0x5c880024, 0x440b677f, 0x70d5bf18, -0x5b5cebfd, 0x6a252b19 }, + { -0x2c966f0d, -0x6126e62b, -0x24b1460e, 0x5213aebb, 0x4cb99135, -0x38f715fb, 0x72260e56, 0x58ded57f } + }, + { + { 0x5b0fd48b, -0x2592acda, -0x6c405678, -0x769f7dcf, 0x61d57e28, -0x287536ce, 0x3a5c8143, 0x79f2942d }, + { -0x16bec289, 0x78e79dad, -0x68d61983, -0x0da8062b, -0x1c85581a, 0x59db910e, -0x4461fc64, 0x6aa11b5b }, + { -0x49377217, -0x6825d0db, -0x530dfe97, 0x251ba7ea, -0x10b14b1c, 0x09b44f87, -0x4395825b, 0x7d90ab1b } + }, + { + { -0x694c3c69, 0x1a07a3f4, -0x70b1dace, 0x11ceaa18, -0x588ae410, 0x7d9498d5, 0x508dd8a0, 0x19ed161f }, + { -0x58fe9402, -0x6533597d, -0x0d3af493, -0x6fafa0b3, -0x331bca56, 0x6b610d5f, 0x6198ff96, 0x19a10d44 }, + { -0x78231936, 0x560a2cd6, -0x799b30b3, 0x7f3568c4, 0x22803a38, -0x78be16ae, 0x595653fc, 0x483bdab1 } + }, + { + { -0x4b257f0a, -0x2930b2f6, -0x07cf8020, -0x7db7c1bb, -0x5190625c, 0x05005269, -0x63087886, 0x1c705290 }, + { -0x78cb05b7, -0x0587f0ec, 0x360534e0, 0x106f0b70, -0x1c1cf843, 0x2210776f, -0x22195f02, 0x3286c109 }, + { -0x78b1672c, 0x32ee7de2, -0x4681f3a0, 0x14c362e9, 0x6a60a38a, 0x5781dcde, -0x558557c0, 0x217dd5ea } + }, + { + { -0x4173f138, -0x7420e047, -0x1cf5fd7e, 0x00bae7f8, -0x5293b094, 0x4963991d, 0x5df6f60a, 0x07058a6e }, + { 0x248e1eb0, -0x62483b30, 0x4d74bf52, -0x1f89681f, 0x3c562354, 0x1e6a9b17, 0x795a4965, 0x7fa7c21f }, + { -0x24ce0981, -0x1614fd3c, 0x10bcfb2b, -0x12da0277, 0x5c5cddb4, 0x46c8131f, -0x5f346432, 0x33b21c13 } + }, + { + { 0x5ee38c5b, -0x65504650, 0x071a13c7, -0x4062d2b2, -0x16ccd6f6, -0x71119193, -0x51ef68e9, 0x1c3bab17 }, + { 0x087d8e31, 0x360692f8, -0x2d8e9c09, -0x0b2339c9, 0x65ea5963, 0x25a4e620, 0x5ac160d9, 0x659bf72e }, + { -0x38354850, 0x1c9ab216, 0x07bbc3cc, 0x7d65d374, 0x504a58d5, 0x52744750, 0x131a2990, 0x09f2606b } + }, +}, +{ + { + { 0x7c6691ae, 0x7e234c59, 0x0a85b4c8, 0x64889d3d, 0x354afae7, -0x251d36f4, 0x0c6a9e1d, 0x0a871e07 }, + { 0x744346be, 0x40e87d44, 0x15b52b25, 0x1d48dad4, -0x5ec49fc2, 0x7c3a8a18, 0x2fcdbdf7, 0x4eb728c1 }, + { 0x4bbc8989, 0x3301b599, 0x5bdd4260, 0x736bae3a, 0x19d59e3c, 0x0d61ade2, 0x2685d464, 0x3ee7300f } + }, + { + { -0x7be18ae8, 0x43fa7947, 0x639c46d7, -0x1a3905a7, -0x1cfad48c, -0x5ef9a1e3, -0x30476fd0, 0x7d47c6a2 }, + { -0x61822949, -0x0a2daa1c, 0x610b1eac, -0x7fe9eea4, -0x6d1e7836, 0x3c99975d, -0x686eda3e, 0x13815762 }, + { -0x710f2920, 0x3fdad014, -0x6eab90c4, -0x62c18b66, 0x26bb8157, 0x71ec6210, 0x34c9ec80, 0x148cf58d } + }, + { + { -0x651b8a93, -0x1da8d083, -0x770cb781, 0x56c345bb, 0x6960a88d, -0x602ef493, 0x4eaea1b9, 0x278febad }, + { 0x7934f027, 0x46a492f6, -0x097bf557, 0x469984be, -0x769ee7ac, 0x5ca1bc2a, -0x42a2442c, 0x3ff2fa1e }, + { -0x736cc69a, -0x4e5597e1, 0x20290c98, -0x73de6b64, 0x219d3c52, 0x39115291, -0x01639885, 0x4104dd02 } + }, + { + { -0x24f69548, -0x7edeb1fa, 0x0ce44f35, 0x21a8b6c9, 0x409e2af5, 0x6524c12a, -0x71035b7f, 0x0165b5a4 }, + { 0x1124422a, 0x72b2bf5e, -0x675cc54b, -0x5e05f3cd, -0x05ad499a, -0x6b349eff, -0x5050ac2b, 0x2c863b00 }, + { -0x5f7b958a, -0x0e6f5b8c, -0x32d08340, 0x12eff984, 0x58aa2b8f, 0x695e2906, -0x40013748, 0x591b67d9 } + }, + { + { -0x60e74aa3, -0x66464c8f, -0x5e739be2, -0x1b9a1a06, -0x3d60fa13, 0x61081136, 0x7030128b, 0x489b4f86 }, + { -0x7f4b6406, 0x312f0d1c, -0x540c1376, 0x5979515e, -0x610fe378, 0x727033c0, -0x35708435, 0x3de02ec7 }, + { 0x3aeb92ef, -0x2dcdefd3, 0x6116a861, -0x1e9dac4c, 0x190baa24, 0x3d7eabe7, 0x496cbebf, 0x49f5fbba } + }, + { + { 0x1e9c572e, 0x155d628c, -0x3a77b8bf, -0x75b27954, 0x515763eb, -0x6e5cad0a, -0x7798aea5, 0x06a1a6c2 }, + { -0x75a4302c, 0x30949a10, -0x439b8c15, -0x23bf2290, 0x307c0d1c, -0x6d3d6b3f, -0x3405918c, 0x5604a86d }, + { 0x7c1764b6, 0x7288d1d4, -0x1fbe74af, 0x72541140, 0x18acf6d1, -0x60fce5a0, -0x01d8bd3a, 0x20989e89 } + }, + { + { -0x7a1513d2, 0x1674278b, 0x7acb2bdf, 0x5621dc07, 0x61cbf45a, 0x640a4c16, -0x08fa6a2d, 0x730b9950 }, + { 0x3a2dcc7f, 0x499777fd, -0x5ab0276e, 0x32857c2c, -0x2df81c60, -0x5d86279c, 0x0ca67e29, 0x0403ed1d }, + { -0x78b13aae, -0x36b4d2cb, -0x67db9073, -0x3a193731, 0x16c035ce, -0x0834b906, 0x08303dcc, 0x5bd74543 } + }, + { + { 0x15e7792a, -0x7a3b6cdf, -0x42322237, -0x39b3765e, -0x525c289e, -0x62e1c258, 0x3067f82c, 0x5bb7db12 }, + { 0x28b24cc2, 0x7f9ad195, 0x6335c181, 0x7f6b5465, 0x4fc07236, 0x66b8b66e, 0x7380ad83, 0x133a7800 }, + { -0x39359d42, 0x0961f467, 0x211952ee, 0x04ec21d6, -0x642ab890, 0x18236077, 0x58f0e0d2, 0x740dca6d } + }, +}, +{ + { + { -0x12d9e51b, 0x3906c72a, -0x771eff09, -0x65497027, -0x0cc9fe69, -0x0a16fa66, -0x40d492b9, 0x0e53dc78 }, + { -0x2c0f50f5, 0x50b70bf5, -0x1cd18e09, 0x4feaf48a, -0x5aa442cc, 0x60e84ed3, 0x3f50d1ed, 0x00ed489b }, + { 0x7971877a, -0x46f7d641, 0x6d17e631, 0x5e444463, 0x18276893, 0x4d05c52e, 0x5a4a4af5, 0x27632d9a } + }, + { + { -0x78150025, -0x567d7a2f, -0x272f579c, -0x5a4b0445, 0x022663f7, -0x49a70d81, -0x26631d7e, 0x3bbc2b22 }, + { 0x54b260ce, -0x2ee00faf, 0x72f95270, -0x27923c72, 0x267cc138, 0x601fcd0d, 0x29e90ccd, 0x2b679164 }, + { 0x583c0a58, -0x46e836ae, 0x0fe4c6f3, 0x653ff9b8, -0x4320c3f4, -0x64f25829, -0x54ab29f2, 0x43a0eeb6 } + }, + { + { 0x57875fe8, 0x3ac63223, -0x0a043471, -0x262b0b14, 0x382bb620, -0x72117b6d, 0x4c799fdc, 0x50c5eaa1 }, + { 0x6d4a5487, 0x396966a4, -0x53d44c46, -0x07ee5e76, 0x5628b26b, 0x66e4685b, -0x626d646e, 0x70a47702 }, + { -0x290d04c4, -0x22f12375, -0x63384860, 0x54c63aa7, 0x2c8d9f1a, -0x51f4fcd5, 0x602967fb, 0x6f9ce107 } + }, + { + { 0x3520e0b5, 0x13969306, -0x7715fc02, 0x437fcf7c, -0x2c36a644, -0x082b3bf5, -0x076c2127, 0x699154d1 }, + { -0x321e3dd6, -0x52efab4f, 0x48eb32df, -0x3b5716fe, -0x53323f16, 0x5f3e7b33, -0x038669c2, 0x72364713 }, + { -0x4b4d8ada, 0x315d5c75, 0x0236daa5, -0x33347bd3, 0x345fee8e, 0x22f0c8a3, 0x7d39dbed, 0x73975a61 } + }, + { + { -0x0bbcc1ba, 0x6f37f392, 0x1f566b18, 0x0e19b9a1, 0x1fd1d662, 0x220fb78a, -0x5c7e36b3, 0x362a4258 }, + { 0x6375da10, -0x1bfdb207, 0x1830c870, 0x78d3251a, 0x658cd91c, -0x6fd4e6b8, 0x29b7438a, 0x7e18b10b }, + { 0x2b6beb2f, -0x6f8e26ed, 0x28418247, 0x0f26e9ad, -0x42136da3, -0x1546e137, -0x0b750d22, 0x4be65bc8 } + }, + { + { 0x57c26234, 0x1d50fba2, -0x214f9875, 0x7bd4823a, -0x59ac750b, -0x3d4f2392, 0x351da73e, 0x5665eec6 }, + { -0x5c918fd8, 0x78487feb, 0x1dd8ce34, 0x5f3f1300, 0x4b30c489, -0x6cb04ed3, 0x397f0a2b, 0x056c244d }, + { 0x43bfb210, -0x24c11ff7, 0x20800ac2, 0x49720187, 0x73bd8667, 0x26ab5d61, -0x54dfb6c8, 0x20b209c2 } + }, + { + { 0x16bd3289, 0x1fcca945, 0x41420428, 0x448d65aa, 0x16a55d62, 0x59c3b7b2, 0x4e612cd8, 0x49992cc6 }, + { -0x3f804cb5, 0x549e342a, 0x21373d93, 0x02d82208, -0x532e0a99, -0x43d9d290, -0x0435387c, 0x7a92c9fd }, + { 0x70f801de, 0x65bd1bea, -0x01b61d76, 0x1befb7c0, -0x4e4d51b6, -0x579cf933, 0x265c2a09, 0x3b7ac0cd } + }, + { + { 0x22ed39a7, -0x0f2ab1b1, 0x5608150a, -0x5d5516e2, -0x1225178b, -0x0bde4d17, 0x6b7de992, 0x31bc531d }, + { -0x73fe4314, -0x7dd411bd, -0x3f0438c5, 0x530cb525, -0x3e6ac017, 0x48519034, -0x1f65f0a5, 0x265cc261 }, + { -0x567f068f, -0x20c2ecb3, 0x221a22a7, 0x7a4fb8d1, 0x35aad6d8, 0x3df7d420, 0x6a1a125e, 0x2a14edcc } + }, +}, +{ + { + { 0x0478433c, 0x231a8c57, -0x3d7ebc63, -0x484ad8f2, -0x1c26f861, -0x24556616, 0x6c2b03d9, 0x2c03f525 }, + { 0x52cfce4e, -0x20b711f9, 0x06ec08b7, -0x3c00050d, -0x46aba63c, 0x05710b2a, -0x69c15c73, 0x161d25fa }, + { 0x7b53a47d, 0x790f1875, -0x30f3a787, 0x307b0130, 0x257ef7f9, 0x31903d77, -0x42694451, 0x699468bd } + }, + { + { 0x6aa91948, -0x2722c21a, 0x2fc0d2cc, 0x485064c2, 0x34fdea2f, -0x64b7db9a, 0x6c4a2e3a, 0x293e1c4e }, + { -0x0b250131, -0x42e0d0ba, -0x5b802909, 0x7cef0114, 0x4a47b37f, -0x2ce00226, 0x73905785, 0x525219a4 }, + { -0x6daeed1f, 0x376e134b, -0x235ea260, 0x703778b5, 0x461c3111, -0x4fba7651, 0x7f032823, 0x5b605c44 } + }, + { + { -0x0f180fb4, 0x3be9fec6, 0x75e34962, -0x7995a862, 0x1e1de61a, 0x5542ef16, -0x33a5422b, 0x2f12fef4 }, + { 0x20c47c89, -0x469a7fa7, -0x6dc47034, -0x180feff4, 0x02e2ef77, 0x00012565, -0x57514c12, 0x24a76dce }, + { -0x203f38c0, 0x0a4522b2, 0x40c9a407, 0x10d06e7f, 0x78cff668, -0x3930ebbf, 0x18a43790, 0x5e607b25 } + }, + { + { -0x5a6930ec, -0x5fd3bce4, -0x512c1c00, -0x1c3bd2c0, 0x2e0f26db, -0x2dbad980, -0x61ba8f98, 0x201f3313 }, + { 0x6cdf1818, 0x58b31d8f, -0x3c9da75e, 0x35cfa74f, 0x66e61d6e, -0x1e4c00b1, 0x6ccdd5f7, 0x5067acab }, + { 0x08039d51, -0x02ad8095, 0x017c0006, 0x18b14964, 0x2e25a4a8, -0x2addf150, 0x62460375, 0x397cba88 } + }, + { + { -0x37ec8619, 0x7815c3fb, -0x221ed50f, -0x599e6be0, -0x7a57022b, -0x00563f08, -0x3e1e3dae, 0x771b4022 }, + { -0x0fa6a64e, 0x30c13093, -0x1656868a, -0x1dc55e73, 0x721d5e26, 0x222fd491, 0x766e6c3a, 0x2339d320 }, + { 0x513a2fa7, -0x2782267a, -0x062b30f8, -0x0a53648f, 0x1ea283b3, -0x2f943ce5, 0x19971a76, 0x331a1892 } + }, + { + { -0x628a8d51, 0x26512f3a, 0x68074a9e, 0x5bcbe288, 0x1180f7c4, -0x7b123e3f, -0x09b65985, 0x1ac9619f }, + { -0x04b07f3a, -0x0ae990bb, 0x61c775cf, -0x63c93822, -0x6fbe26e4, -0x1c2b17e5, -0x7c4201df, 0x31167c6b }, + { 0x524b1068, -0x0dd4c7be, -0x11631679, 0x5068343b, 0x4a6250c8, -0x03628e7c, 0x1f08b111, 0x61243634 } + }, + { + { 0x1a2d2638, -0x749cb61d, -0x642c02cb, -0x62204900, -0x5c5f945c, 0x7f8bf1b8, 0x78d90445, 0x1522aa31 }, + { -0x78b17673, -0x2662be25, 0x6c07dc20, 0x09fea5f1, -0x2ff06444, 0x793d2c67, -0x61a100c0, 0x46ebe230 }, + { 0x69614938, 0x2c382f53, -0x48d292f0, -0x2501bf66, -0x49b90dd9, -0x1737cc6f, 0x0524306c, 0x45fe70f5 } + }, + { + { -0x376aeb6f, 0x62f24920, 0x3f630ca2, 0x05f007c8, -0x0a362b48, 0x6fbb45d2, -0x4a85ddbb, 0x16619f6d }, + { -0x69f3f474, -0x25b78a5a, -0x10f1d0e0, 0x5b68d076, 0x3d0b8fd4, 0x07fb51cf, -0x5f1c6d2c, 0x428d1623 }, + { 0x01a308fd, 0x084f4a44, 0x76a5caac, -0x57dde63d, 0x43d1bc7d, -0x214721ba, 0x60bd38c6, 0x1d81592d } + }, +}, +{ + { + { 0x2f89c8a1, 0x3a4a369a, 0x7c8de80d, 0x63137a1d, 0x78eda015, -0x4353ff76, -0x4b7c4fc1, 0x2cb8b3a5 }, + { -0x13d5b3c8, -0x27cc2842, 0x0acc20ed, 0x2c916283, -0x6d208a7f, -0x16c5b856, 0x333c4a81, 0x702d67a3 }, + { -0x34e46f5f, 0x36e417cb, 0x7f11794e, 0x33b3ddaa, -0x77a439f9, 0x3f510808, -0x1957fdf3, 0x24141dc0 } + }, + { + { -0x427cea83, -0x6e6da234, 0x22cc8094, 0x3ca12053, 0x3f90d6e4, 0x28e57f18, -0x21d18985, 0x1a4714ce }, + { 0x3fefee9d, 0x59f73c77, -0x3e306763, -0x4c0e1077, -0x1fd1aba1, -0x1ca204be, 0x47a1b47c, 0x5766120b }, + { -0x47494801, -0x24df45f1, 0x77511fa1, -0x48cd3c4a, -0x660fd277, -0x56d4ae40, 0x489ca5f1, 0x4f3875ad } + }, + { + { -0x118c1140, 0x79ed13f6, 0x69110bb1, -0x5a39ad93, -0x79fc79f4, -0x1b76d73d, -0x028fa60b, 0x722a1446 }, + { 0x4932ab22, -0x380389d1, 0x2f4c3c1b, 0x7ac0edf7, -0x65576a18, 0x5f6b55aa, -0x52f5ff7f, 0x3680274d }, + { -0x573077e7, -0x2f6a6017, -0x7b8a5664, -0x2f566ab0, 0x20b09cc5, 0x6eac1733, 0x331b1095, 0x628ecf04 } + }, + { + { 0x5c74ccf1, -0x64be5308, 0x08265251, -0x498cce7f, 0x11adb147, -0x6636d513, 0x34ecb40f, 0x7a47d70d }, + { -0x562f2244, -0x67434ee8, 0x08b4802b, -0x11bb61cc, -0x47594efc, -0x78f76dda, 0x45c7915d, 0x685f349a }, + { -0x33bc5b0b, 0x60a0c4cb, 0x3677bea9, 0x775c66ca, 0x2ff8f5ed, -0x5e855e8b, 0x0e01fdc0, 0x11ded902 } + }, + { + { 0x3bea93b7, 0x471f95b0, 0x3313abd3, 0x0552d7d4, -0x1e81c085, -0x426c8f1e, -0x4df1a414, 0x7b120f1d }, + { -0x351018fc, -0x76f187f7, -0x1cf17394, -0x78d7d693, -0x6d514e37, 0x4c5cd2a3, 0x5771531f, 0x194263d1 }, + { -0x79afd286, 0x17d2fb3d, 0x50a69352, -0x4a9b27bc, -0x59f128a3, 0x7da962c8, 0x318736aa, 0x00d0f85b } + }, + { + { -0x0289de3f, -0x598ac3e2, 0x445671f5, 0x69c0b4a7, 0x05b23c11, -0x68e0ad8c, 0x51a8c7cd, 0x387bc748 }, + { 0x777c84fd, -0x6874ebd2, 0x05a8c062, -0x0bfd9bb9, -0x1819ed39, -0x59852ae5, -0x672295cd, 0x2f7b4596 }, + { 0x4a52a9a8, -0x7e76b4b3, -0x09477cd1, -0x5226c1ee, -0x49e429c8, 0x184d8548, -0x29360933, 0x3f1c62db } + }, + { + { 0x148f693d, 0x3fad3e40, -0x6b14658e, 0x052656e1, 0x184f4e2f, 0x2f4dcbfd, -0x3b7d1e75, 0x406f8db1 }, + { -0x6e6ef3e1, 0x2e8f1f00, -0x400d1ed4, -0x5b20b020, -0x116d8bc8, 0x60c6560a, -0x53103706, 0x6338283f }, + { 0x7f191ee4, -0x619cf2d4, -0x43c00990, 0x4fbf8301, 0x7afb73c4, 0x787d8e4e, -0x170a705b, 0x50d83d5b } + }, + { + { -0x4b2c4993, -0x3f533070, 0x61732e60, -0x58fa621b, 0x70c6b0ba, 0x033d1f78, 0x26d946e4, 0x584161cd }, + { -0x3ee5e769, -0x7a97c6ea, -0x1af92ff8, 0x2d69a4ef, -0x099b42ff, 0x39af1378, 0x361517c6, 0x65942131 }, + { 0x72d27ca2, -0x440d4e60, -0x042138fc, -0x40c6c3a7, -0x1d9d47e2, -0x16724432, 0x3029b589, 0x02eebd0b } + }, +}, +{ + { + { 0x7b85c5e8, -0x789a4961, -0x2e97454e, 0x6ff0678b, 0x1d330f9b, 0x3a70e77c, -0x4f507184, 0x3a5f6d51 }, + { -0x59f253a1, 0x61368756, -0x145423a9, 0x17e02f6a, 0x4cce0f7d, 0x7f193f2d, -0x76132310, 0x20234a77 }, + { 0x7178b252, 0x76d20db6, -0x2ae12ea0, 0x071c34f9, -0x4c1bee90, -0x09d5b5e0, 0x3cffe366, 0x7cd68235 } + }, + { + { 0x68acf4f3, -0x599a32a0, 0x3cd7e3d3, 0x42d92d18, 0x336025d9, 0x5759389d, 0x2b2cd8ff, 0x3ef0253b }, + { -0x2778054a, 0x0be1a45b, -0x45bfc492, 0x2a846a32, -0x1691a000, -0x266defee, 0x3bdc0943, 0x2838c886 }, + { 0x4a465030, -0x2e944f31, 0x15c577ab, -0x05b694bf, -0x0b54be63, -0x7d305176, 0x06a82812, 0x21dcb8a6 } + }, + { + { -0x4188ce46, -0x6572ff06, 0x629e1889, -0x7dfc9f82, 0x43f3d97f, -0x4d33fdc9, 0x6c6f678b, 0x5d840dbf }, + { -0x73626038, 0x5c600446, -0x2bd55c35, 0x2540096e, 0x12ee2f9c, 0x125b4d4c, -0x6b5ce255, 0x0bc3d081 }, + { 0x309fe18b, 0x706e380d, -0x461e9a39, 0x6eb02da6, 0x7dae20ab, 0x57bbba99, 0x2ac196dd, 0x3a427623 } + }, + { + { -0x24bb8135, 0x3bf8c172, -0x39d7d243, 0x5fcfc41f, 0x75aa15fe, -0x7f530040, 0x24e1a9f9, 0x0770c9e8 }, + { -0x758f7b06, 0x4b42432c, -0x20461abb, -0x7675e61d, -0x63a71ba3, -0x4160ffdf, -0x5e92142f, 0x1ff177ce }, + { 0x45b5b5fd, -0x309e2666, 0x1b3a7924, -0x79f67b17, 0x303e3e89, -0x18cff6e7, 0x41500b1e, 0x39f264fd } + }, + { + { -0x01f6841f, -0x2e64b555, -0x201fe6d7, -0x5b92031f, 0x2ca6f1ff, -0x3c36f76c, 0x2c35f14e, 0x65c62127 }, + { -0x24181d64, -0x5852cbe9, 0x2b9c139c, -0x426bc896, -0x6ca68457, -0x5f16e472, 0x68889840, 0x1712d734 }, + { -0x31ce6c23, -0x18d47608, -0x5eda3f45, 0x4d103356, 0x2e1cfe83, 0x0419a93d, -0x4e631d8e, 0x22f9800a } + }, + { + { -0x65910254, 0x42029fdd, 0x34a54941, -0x46ed3142, -0x78420c85, 0x640f64b9, -0x7a67354c, 0x4171a4d3 }, + { 0x3e9ef8cb, 0x605a368a, -0x5aafb8eb, -0x1c163fde, 0x5f24248f, 0x553d48b0, 0x647626e5, 0x13f416cd }, + { -0x6636b374, -0x05d8a756, -0x4fff47f9, 0x23006f6f, -0x5225ac6e, -0x042d6e23, 0x574bd1ab, 0x508214fa } + }, + { + { 0x53d003d6, 0x461a15bb, -0x430c369b, -0x4defd778, 0x6c683a5a, 0x27c57675, -0x37934bb9, 0x3a7758a4 }, + { 0x3ed6fe4b, -0x3dfd96eb, 0x511d77c4, -0x59a598c7, 0x2c14af94, -0x3421d9ba, 0x6faba74b, 0x22f960ec }, + { -0x6c51af8a, 0x548111f6, 0x1dfd54a6, 0x1dae21df, -0x0ceea19b, 0x12248c90, -0x72180b6c, 0x5d9fd15f } + }, + { + { -0x1128ade2, 0x3f244d2a, 0x432e9615, -0x71c56fd8, 0x2e9c16d4, -0x1e9b4589, 0x47eb98d8, 0x3bc187fa }, + { 0x6d63727f, 0x031408d3, -0x28384acd, 0x6a379aef, -0x33511db5, -0x561e703b, 0x4f8fbed3, 0x332f3591 }, + { -0x15793df4, 0x6d470115, 0x6c46d125, -0x66754835, 0x3a660188, -0x2887cd4b, -0x6f9045fd, 0x450d81ce } + }, +}, +{ + { + { -0x4d351f4b, 0x23264d66, -0x14359a8a, 0x7dbaed33, -0x0f2db538, 0x030ebed6, -0x089caaf0, 0x2a887f78 }, + { -0x27bac6fe, -0x0751b2d6, -0x1724d2e3, 0x7018058e, -0x382d3ee2, -0x554c66a1, 0x24ccca79, 0x53b16d23 }, + { 0x5c012d4f, 0x2a23b9e7, -0x351e0d16, 0x0c974651, 0x675d70ca, 0x2fb63273, -0x79bbfc0b, 0x0ba7250b } + }, + { + { -0x79079264, -0x229ca76d, -0x1ec57a5c, 0x61699176, 0x4eaa7d57, 0x2e511195, -0x049f4205, 0x32c21b57 }, + { 0x029c6421, -0x44f2e703, -0x76d670fe, -0x43d2ebdf, -0x74daf16a, -0x7cb8071a, 0x032d71c9, 0x7b9f2fe8 }, + { 0x319e0780, -0x2787dc33, -0x76888a3b, -0x103b303f, -0x65f54c09, 0x4854fb12, 0x7238c371, 0x12c49d41 } + }, + { + { -0x7c866abe, 0x09b3a017, -0x552a11c1, 0x626dd08f, -0x148feb61, -0x45ff4312, -0x5f5bbb37, 0x1421b246 }, + { -0x0017c897, 0x0950b533, -0x71e2942f, 0x21861c1d, 0x1302e510, -0x0fdd27c8, 0x6391cab4, 0x2509200c }, + { -0x73db5839, 0x4aa43a8e, -0x270fa10b, 0x04c1f540, 0x0b3eb9dc, -0x5245a1f4, 0x48a49ce3, 0x2ab55044 } + }, + { + { 0x1c5d3afa, -0x23f8539d, -0x06207394, 0x58615171, -0x628c1d50, 0x72a079d8, -0x4b151ea3, 0x7301f4ce }, + { 0x6f0f5dec, 0x2ed22726, 0x5ed50824, -0x67db11bf, -0x6b972beb, -0x7f841384, -0x4ade1dc1, 0x7093bae1 }, + { -0x298dd3bf, 0x6409e759, 0x72bf729b, -0x598b1e31, 0x3c21e569, -0x43f5db15, 0x4ebacb23, 0x390167d2 } + }, + { + { -0x5d0dedf5, -0x2844fab5, -0x4efa7649, -0x1d463152, -0x0c3f1242, 0x3fe8bac8, 0x7112cb69, 0x4cbd4076 }, + { -0x45cac0e4, 0x27f58e3b, -0x4095bc9f, 0x4c47764d, 0x6e562650, -0x50443b1b, -0x551e5ba3, 0x07db2ee6 }, + { 0x29c58176, 0x0b603cc0, 0x5cb15d61, 0x5988e382, -0x230f5273, 0x2bb61413, 0x74183287, 0x7b8eec6c } + }, + { + { -0x03c7948d, 0x32fee570, -0x25c57339, -0x2574febf, -0x37697ca7, -0x68a002f6, -0x4ecd57ab, 0x6ee809a1 }, + { 0x2cd27cb0, -0x1b35bf88, -0x04169843, -0x25063cdd, -0x752be162, -0x4d642cb6, 0x626ede4d, 0x72810497 }, + { -0x030279c6, -0x6bbb44cf, 0x3e4e48c5, 0x2fe3690a, -0x2f7705db, -0x23d63799, -0x2e8cd6d2, 0x13bd1e38 } + }, + { + { 0x1dfac521, 0x223fb5cf, 0x6f554450, 0x325c2531, 0x659177ac, 0x030b98d7, 0x4f88a4bd, 0x1ed018b6 }, + { 0x696149b5, -0x2cd4b328, -0x7e275549, -0x1aa6c829, -0x51edd46c, 0x0bcb2127, -0x4ebf6650, 0x41e86fcf }, + { -0x47fd5950, 0x3630dfa1, 0x42ad3bd5, -0x77f078b9, -0x113a5b2c, 0x0af90d6c, 0x37cdc5d9, 0x746a247a } + }, + { + { 0x78d941ed, 0x6eccd852, -0x2dd087bd, 0x2254ae83, 0x7bbfcdb7, -0x3add2fd2, -0x400f1b1e, 0x681e3351 }, + { 0x2b7b9af6, -0x2ace4743, 0x37fc5b51, 0x50050935, -0x3a6cab93, 0x232fcf25, 0x2bb40f49, 0x20a36514 }, + { -0x7cfcb0bb, -0x749b4a63, 0x1fa20efb, 0x2f8b71f2, -0x459aaf1c, 0x69249495, 0x45d5472b, 0x539ef98e } + }, +}, +{ + { + { 0x1cae743f, -0x2f8b276a, -0x11e39c13, -0x0792e70b, -0x180b12d7, -0x68423aa5, 0x663ab108, 0x4cbad279 }, + { -0x59dfad8b, 0x6e7bb6a1, 0x413c8e83, -0x55b0de29, -0x1770a34e, 0x6f56d155, -0x59cba41f, 0x2de25d4b }, + { -0x5f28e033, -0x7f2e6fdc, -0x04d77508, -0x3ada3df6, 0x5f3a6419, -0x4e5c68b5, -0x1dff8dcd, 0x7d7fbcef } + }, + { + { -0x0c3d6f6c, -0x3283a23b, 0x2a9105ab, -0x387e5d66, 0x421c3058, -0x7f39e2ca, -0x23272b29, 0x4f9cd196 }, + { 0x266b2801, -0x0510e196, -0x2a8c60ea, -0x7993973c, 0x1b03762c, -0x0975d044, -0x7848a573, 0x5975435e }, + { 0x6a7b3768, 0x199297d8, 0x1ad17a63, -0x2f2fa7dc, 0x5c1c0c17, -0x45fd6353, 0x387a0307, 0x7ccdd084 } + }, + { + { 0x6760cc93, -0x64f37be8, 0x1ab32a99, -0x3251ff86, 0x620bda18, -0x5772137a, -0x7e6f35bc, 0x3593ca84 }, + { 0x6d260417, -0x2359bdd4, -0x6b7dbf43, -0x51eac2b0, -0x04973989, -0x563f3e4c, 0x61d0cf53, 0x428bd0ed }, + { 0x5e849aa7, -0x6dece766, 0x65d8facd, -0x2b273ccb, 0x53fdbbd1, -0x73adaba5, -0x25d29c1a, 0x27398308 } + }, + { + { 0x0a702453, -0x465ef1b4, -0x2a82e422, 0x0fa25866, -0x32d82509, -0x0046264b, 0x492c33fd, 0x572c2945 }, + { 0x435ed413, 0x42c38d28, 0x3278ccc9, -0x42af0ca0, 0x79da03ef, -0x44f854e6, -0x4173ccab, 0x269597ae }, + { -0x2932cf42, -0x388038bb, -0x1c455105, -0x1b20172d, -0x55a225f4, -0x5dd377d0, -0x3fa43580, 0x7f985498 } + }, + { + { 0x0fbf6363, -0x2ca9eaae, -0x30b2045a, 0x08045a45, -0x78c05f3e, -0x113db044, -0x2964ed19, 0x30f2653c }, + { -0x60f41ee9, 0x3849ce88, 0x7b54a288, -0x7ffa52e5, 0x23fc921c, 0x3da3c39f, 0x0a31f304, 0x76c2ec47 }, + { -0x553ef37b, -0x75f736c8, -0x24d89435, 0x46179b60, 0x0e6fac70, -0x56df3fe2, 0x596473da, 0x2f1273f1 } + }, + { + { 0x55a70bc0, 0x30488bd7, -0x0e2bbd19, 0x06d6b5a4, -0x43a69e9e, -0x152e5962, -0x123a087c, 0x38ac1997 }, + { -0x751fe1ef, 0x4739fc7c, 0x4a6aab9f, -0x02ad8b70, -0x788d70d2, 0x41d98a82, -0x27a4960e, 0x5d9e572a }, + { -0x58ae4ec5, 0x0666b517, 0x7e9b858c, 0x747d0686, 0x454dde49, -0x53533fef, -0x40161964, 0x22dfcd9c } + }, + { + { 0x103be0a1, 0x56ec59b4, -0x2da60697, 0x2ee3baec, 0x13f5cd32, 0x797cb294, 0x24cde472, 0x0fe98778 }, + { -0x3cf2f327, -0x72242d20, -0x5344bccd, -0x527199a1, 0x322a961f, -0x7094da74, 0x5448c1c7, 0x6b2916c0 }, + { 0x0aba913b, 0x7edb34d1, 0x2e6dac0e, 0x4ea3cd82, 0x6578f815, 0x66083dff, 0x7ff00a17, 0x4c303f30 } + }, + { + { 0x0dd94500, 0x29fc0358, 0x6fbbec93, -0x132d855c, -0x3d1d5808, 0x130a155f, -0x48f95e2b, 0x416b151a }, + { 0x17b28c85, -0x2cf5c42a, 0x39773bea, -0x3a2c8849, 0x1e6a5cbf, -0x39391874, -0x74d5483c, 0x0d61b8f7 }, + { -0x163ec950, 0x56a8d7ef, 0x58e44b20, -0x42f81a33, 0x1b57e0ab, -0x5019d026, 0x4277e8d2, 0x191a2af7 } + }, +}, +{ + { + { 0x2fe09a14, 0x09d4b60b, -0x244e8b82, -0x3c7b0f51, 0x78b5fd6e, 0x58e2ea89, -0x4a1f64f6, 0x519ef577 }, + { -0x5490b67b, -0x2aaff6a5, 0x4fbfaf1a, 0x04f4cd5b, 0x2a0c7540, -0x6271d12f, -0x4ddedd7a, 0x2bc24e04 }, + { 0x1124cca9, 0x1863d7d9, -0x47758f72, 0x7ac08145, -0x7a8fce0b, 0x2bcd7309, -0x7547051b, 0x62337a6e } + }, + { + { 0x1b3a1273, -0x2e54cdb2, -0x7efaacc0, 0x18947cf1, -0x5673e692, 0x3b5d9567, -0x7fd1e198, 0x7fa00425 }, + { 0x06ffca16, 0x4bcef17f, 0x692ae16a, -0x21f91e25, 0x614f42b0, 0x0753702d, 0x5b9212d0, 0x5f6041b4 }, + { 0x028c2705, 0x7d531574, -0x24f28a02, -0x7fce8297, -0x10737223, 0x30fface8, -0x493c1668, 0x7e9de97b } + }, + { + { -0x5db2bf23, -0x0ffb419e, 0x0452d41f, -0x45f9a66f, 0x62a44234, -0x7e3ba11f, -0x5ddd9911, 0x4cb829d8 }, + { -0x619a7a5d, 0x1558967b, -0x6716746e, -0x68366320, 0x6eb3adad, 0x10af149b, -0x0b2c7306, 0x42181fe8 }, + { 0x07b86681, 0x1dbcaa84, -0x74d98ac5, 0x081f001e, -0x7bfb717f, 0x3cd7ce6a, 0x3f25f22c, 0x78af1163 } + }, + { + { 0x7d65318c, 0x3241c00e, -0x2f179219, -0x19411a24, -0x043f73da, 0x118b2dc2, -0x039fc23d, 0x680d04a7 }, + { 0x0b50babc, -0x7be9142c, 0x28208bee, 0x15087226, -0x463e3c93, -0x5ceb7051, -0x2cd282a3, 0x0d07daac }, + { 0x695aa3eb, -0x063dbeb6, 0x05a68f21, -0x255bd3b4, 0x7f93963e, 0x7c6c2398, 0x0c3954e3, 0x210e8cd3 } + }, + { + { 0x37fe6c26, 0x2b50f161, 0x56e404d8, -0x1efd4328, 0x4c561f6b, 0x12b0f141, -0x2fd7136f, 0x51b17bc8 }, + { 0x10a71c06, -0x53bdfe0e, -0x0c404fdf, 0x6a65e0ae, 0x393632f7, -0x43bd3ca4, -0x79a0f8be, 0x56ea8db1 }, + { -0x30acaee7, -0x000a04b5, -0x20eef760, -0x0b676287, -0x65c45cdb, -0x4203159b, 0x74d1a6f2, 0x18a11f11 } + }, + { + { -0x2d85a0d4, -0x0429c326, -0x755ef929, -0x0ff03b44, -0x719b5bd0, 0x53fb5c1a, 0x0c1a2e85, 0x04eaabe5 }, + { 0x3f6bba29, 0x407375ab, -0x66e1b7d2, -0x613c4928, -0x1aa06d17, -0x6637f17e, -0x04f3f51f, 0x307c13b6 }, + { -0x34754a19, 0x24751021, 0x5c5010eb, -0x03dcbbb7, 0x4e5610a1, 0x5f1e717b, -0x3d8ef32b, 0x44da5f18 } + }, + { + { -0x76271534, -0x6ea90195, -0x1dced95f, -0x19486baf, 0x3944eb4e, -0x428b9c27, 0x767203ae, 0x726373f6 }, + { -0x0e47d14b, 0x033cc55f, 0x411cae52, -0x4ea51c93, -0x7004532d, -0x45bf49e7, 0x532e861f, 0x768edce1 }, + { -0x14810976, -0x1cfa358e, 0x70eadb23, 0x662cf31f, -0x4b3ba498, 0x18f026fd, -0x4a2d1343, 0x513b5384 } + }, + { + { -0x750cb315, 0x5e270287, -0x46b92952, -0x6ff4fbf7, -0x25427aee, 0x6512ebf7, -0x77da707f, 0x61d9b769 }, + { -0x38d66762, 0x46d46280, 0x5368a5dd, 0x4b93fbd0, -0x2e89a577, 0x63df3f81, -0x465f5ddd, 0x34cebd64 }, + { 0x49b7d94b, -0x593a58ed, 0x23eb9446, -0x5c0c2ea8, 0x77484834, 0x0416fbd2, 0x2c70812f, 0x69d45e6f } + }, +}, +{ + { + { 0x4f460efb, -0x6019d4bd, -0x59c9f82a, -0x212cfc2c, -0x485f25dc, -0x0faddef2, 0x00545b93, 0x237e7dbe }, + { -0x3ac3ebcf, -0x31e908b5, 0x2072edde, 0x2b9725ce, -0x4a4dc119, -0x47463c91, 0x0b5cc908, 0x7e2e0e45 }, + { 0x6701b430, 0x013575ed, -0x60f402f0, 0x231094e6, -0x7c1b80de, 0x75320f15, -0x4eeeaa1d, 0x71afa699 } + }, + { + { 0x473b50d6, -0x15bdc3e4, 0x3b38ef10, 0x51e87a1f, -0x4d36416b, -0x647b40a1, 0x78f89a1c, 0x00731fbc }, + { 0x3953b61d, 0x65ce6f9b, -0x505ebe1a, -0x39a7c616, -0x5608a602, 0x0f435ffd, -0x3d4e3d72, 0x021142e9 }, + { 0x48f81880, -0x1bcf38e8, 0x5ecec119, -0x4069f3de, 0x6bba15e3, -0x49251f7d, 0x47e15808, 0x4c4d6f33 } + }, + { + { -0x6770e690, 0x2f0cddfc, -0x4f460ae5, 0x6b916227, 0x779176be, 0x6ec7b6c4, -0x57706058, 0x38bf9500 }, + { -0x3e82e037, 0x18f7eccf, 0x51403c14, 0x6c75f5a6, -0x0811f321, -0x24218ed5, -0x581b85de, 0x193fddaa }, + { 0x37e8876f, 0x1fd2c93c, 0x18d1462c, -0x5d09e1a6, 0x39241276, 0x5080f582, -0x40f2b697, 0x6a6fb99e } + }, + { + { -0x491bdc3a, -0x114edd4b, -0x0d790072, -0x6c628ff0, 0x1dcf5d8c, -0x6f56d57d, 0x42c5eb10, 0x136fda9f }, + { 0x560855eb, 0x6a46c1bb, -0x076c0f63, 0x2416bb38, -0x708e533f, -0x28e2eec9, -0x5ce76916, 0x75f76914 }, + { -0x5cfa422f, -0x06b3204f, -0x6007d3f8, 0x0f364b9d, -0x3c44a776, 0x2a87d8a5, 0x0be8dcba, 0x02218351 } + }, + { + { 0x43307a7f, -0x62a58eff, -0x3b825ba1, -0x4f9c2162, -0x416d852d, 0x22bbfe52, -0x02bfbd94, 0x1387c441 }, + { 0x5ead2d14, 0x4af76638, -0x3583a7d0, -0x5f712780, 0x10211e3d, 0x0d13a6e6, 0x7b806c03, 0x6a071ce1 }, + { -0x78687508, -0x4a2c3c2f, 0x7f0e4413, 0x722b5a3d, -0x44b88360, 0x0d7b4848, -0x50e1236e, 0x3171b26a } + }, + { + { -0x4d75b82f, -0x59f24828, 0x1770a4f1, -0x5940eb2a, 0x53ddbd58, -0x2b5e076d, 0x344243e9, 0x6c514a63 }, + { -0x68a9b358, -0x56d0ce70, 0x2275e119, -0x008447b4, -0x5b78aeb0, 0x4f55fe37, 0x3cf0835a, 0x221fd487 }, + { 0x3a156341, 0x2322204f, -0x45f5fcd3, -0x048c1f17, 0x410f030e, -0x031f22b4, -0x046db556, 0x48daa596 } + }, + { + { -0x37b3686d, 0x14f61d5d, -0x10be7dfa, -0x66be061d, 0x346277ac, -0x320a4771, 0x0e8a79a9, 0x58c837fa }, + { 0x5ca59cc7, 0x6eca8e66, 0x2e38aca0, -0x57b8dab5, -0x2de1e832, 0x31afc708, -0x3527b509, 0x676dd6fc }, + { -0x69036fa8, 0x0cf96885, 0x7b56a01b, 0x1ddcbbf3, 0x4935d66a, -0x233d1883, -0x395a80f6, 0x1c4f73f2 } + }, + { + { -0x0383cb7c, -0x4c918f92, -0x3c3e309f, 0x73dfc9b4, 0x781cc7e5, -0x14e28637, 0x7daf675c, 0x70459adb }, + { 0x305fa0bb, 0x0e7a4fbd, 0x54c663ad, -0x7d62b320, 0x2fe33848, -0x0bde3c7d, 0x1bf64c42, 0x795ac80d }, + { -0x6e4bd44d, 0x1b91db49, 0x4b02dcca, 0x57269623, 0x1f8c78dc, -0x6020611b, -0x731de02d, 0x5fe16284 } + }, +}, +{ + { + { -0x6aeeac77, 0x315c29c7, -0x79d08b32, -0x281f1af9, -0x7a6d8bce, 0x0c4a7621, 0x4a25a1e4, 0x72de6c98 }, + { 0x4d077c41, -0x1d86f552, -0x248b965d, -0x746c7d90, -0x7542e95e, 0x6eb632dc, -0x55f9b48e, 0x720814ec }, + { -0x40955cf0, -0x51654aad, -0x7f9291e5, 0x050a50a9, -0x5200aec7, -0x6d448bfd, 0x45be618b, 0x0394d276 } + }, + { + { -0x4dcaba5c, -0x0ac69bdb, -0x67044d6a, 0x15a7a27e, 0x636fdd86, -0x5493ad44, 0x419334ee, 0x79d995a8 }, + { -0x7a81120c, 0x4d572251, -0x1e616c3b, -0x1c8db123, 0x0b797035, -0x758ebdf2, -0x785418bd, 0x3b3c8336 }, + { 0x1195dd75, -0x3275715a, 0x1dd9a82f, -0x5afb2758, -0x5ca7864a, 0x540dca81, 0x79c86a8a, 0x60dd16a3 } + }, + { + { 0x153e47b8, 0x3501d6f8, 0x14a2f60c, -0x485698ac, 0x455d9523, 0x112ee8b6, -0x7eed1576, 0x4e62a3c1 }, + { 0x7381e559, 0x35a2c848, -0x287f7d35, 0x596ffea6, -0x245849ad, -0x34688e15, -0x64b2597b, 0x5a08b501 }, + { 0x516ab786, -0x372b53fc, 0x5295b23d, 0x595af321, -0x24fdcf3f, -0x29122dcc, -0x7da4be34, 0x0929efe8 } + }, + { + { -0x52a99ae3, -0x74ce8d49, 0x3fabd717, 0x01581b7a, 0x424df6e4, 0x2dc94df6, 0x2c29284f, 0x30376e5d }, + { -0x342f0d2d, 0x5f0601d1, 0x6132bb7f, 0x736e412f, 0x238dde87, -0x7c9fbbce, -0x0a3f8ac4, 0x1e3a5272 }, + { -0x7ea65a64, -0x2d6e7259, 0x3f0713f3, 0x6bdc1cd9, 0x4acd6590, 0x565f7a93, 0x4cb4c128, 0x53daacec } + }, + { + { -0x7ad30250, -0x667ad43d, 0x59d6ed0b, 0x2cc12e95, -0x64a53d85, 0x70f9e2bf, 0x7959ae99, 0x4f3b8c11 }, + { -0x6337582a, 0x4ca73bd7, 0x47e9a9b2, 0x4d4a738f, 0x42f5fe00, -0x0b340ed7, -0x4240f8ae, 0x01a13ff9 }, + { 0x2ff26412, 0x55b6c9c8, 0x1fb667a8, 0x1ac4a8c9, -0x1488740e, -0x2ad84031, 0x7012a3be, 0x303337da } + }, + { + { -0x052d022f, -0x6892c335, 0x37a640a8, -0x34777c69, 0x6734cb25, 0x2ff00c1d, 0x789c2d2b, 0x269ff4dc }, + { -0x73e36284, -0x6aabddde, 0x1a9b340f, 0x01fac137, -0x6da4b729, 0x7e8d9177, 0x61b3e31b, 0x53f8ad56 }, + { -0x3f729873, 0x0c003fbd, 0x7ead2b17, 0x4d982fa3, -0x4d1a7d0f, -0x3f819433, -0x20bed5bc, 0x296c7291 } + }, + { + { -0x25474a62, -0x204dcdfb, -0x37f6ddb0, 0x465aeaa0, -0x658da2e8, -0x2ecc3ee8, 0x61f117d1, 0x23273702 }, + { 0x33daf397, 0x7903de2b, -0x3659db4d, -0x2f00f9e7, 0x555b3e18, -0x75e2dad5, 0x52e0b7c0, 0x2b6d581c }, + { 0x623e7986, 0x3d0543d3, -0x3d875cac, 0x679414c2, 0x726196f6, -0x51bc0f34, -0x7dba1546, 0x7836c41f } + }, + { + { -0x7fee6c84, -0x359ae17c, 0x6ef41a28, -0x394f3b92, 0x5f3f8d52, -0x48fde459, -0x15284603, 0x119dff99 }, + { 0x49e95a81, -0x185dab25, 0x08b0ad73, 0x5192d5d0, -0x2ff503f9, 0x4d20e5b1, 0x2cf25f38, 0x5d55f801 }, + { -0x0b4ce2b3, 0x43eadfcb, 0x11148892, -0x39afc08c, 0x060d3b17, -0x0111973b, -0x22b5f538, 0x329293b3 } + }, +}, +{ + { + { 0x5d7cb208, 0x2879852d, 0x687df2e7, -0x47212290, 0x21687891, -0x23f40055, 0x677daa35, 0x2b44c043 }, + { -0x1e6b69e6, 0x4e59214f, 0x0d71cd4f, 0x49be7dc7, 0x3b50f22d, -0x6cff302e, -0x036e8dce, 0x4789d446 }, + { 0x074eb78e, 0x1a1c87ab, -0x66250b99, -0x05392e72, 0x484f9067, 0x3eacbbcd, 0x2bb9a4e4, 0x60c52eef } + }, + { + { 0x7cae6d11, 0x702bc5c2, 0x54a48cab, 0x44c7699b, -0x45b6d14e, -0x1043bfaa, -0x26499893, 0x70d77248 }, + { 0x3bfd8bf1, 0x0b5d89bc, -0x360caae6, -0x4f946dc9, -0x2acfd70b, 0x0e4c16b0, 0x2ccfcaab, 0x10bc9c31 }, + { 0x3ec2a05b, -0x557517b5, -0x12e87e20, -0x6796610c, 0x708e85d1, 0x794513e4, -0x56890bed, 0x63755bd3 } + }, + { + { -0x680e5349, 0x3dc71018, -0x3e9a4428, 0x5dda7d5e, 0x0fa1020f, 0x508e5b9c, 0x37c52a56, 0x27637517 }, + { 0x2ad10853, -0x4aa05fc2, -0x6119ca97, 0x356f7590, -0x41964770, -0x60060e03, -0x743e907c, 0x0d8cc1c4 }, + { 0x6eb419a9, 0x029402d3, 0x77b460a5, -0x0f4bb182, -0x2bc3b6aa, -0x30579dd0, 0x7ad166e7, 0x70c2dd8a } + }, + { + { -0x471281ed, -0x6e2b6983, -0x28897e86, 0x74252f0a, 0x0d852564, -0x1bf67d20, 0x16a53ce5, 0x32b86138 }, + { -0x609013f2, 0x65619450, 0x46c6518d, -0x11d18157, 0x67e09b5c, -0x68cc3e0d, 0x63948495, 0x2e0fac63 }, + { -0x1bb7329c, 0x79e7f7be, 0x087886d0, 0x6ac83a67, -0x5f1b24d2, -0x07602b27, 0x735a4f41, 0x4179215c } + }, + { + { 0x286bcd34, -0x1b51cc47, 0x559dd6dc, -0x4810814a, -0x4c2c71e1, 0x278b141f, 0x2241c286, 0x31fa8566 }, + { -0x282312d6, -0x738f6b19, 0x47d39c70, -0x6804753d, -0x56f926fe, -0x1ec41fcd, 0x0cd99d76, 0x700344a3 }, + { 0x2e3622f4, -0x507d93be, -0x67ccafd3, -0x3edfd679, 0x2b389123, -0x643e481f, -0x566adb77, 0x24bb2312 } + }, + { + { -0x0a07a395, 0x41f80c2a, 0x04fa6794, 0x687284c3, -0x5c45e453, -0x76ba2067, -0x0014a2ea, 0x0d1d2af9 }, + { 0x32de67c3, -0x4e5712e9, 0x461b4948, 0x3cb49418, 0x76cfbcd2, -0x7142bcbd, 0x1e188008, 0x0fee3e87 }, + { 0x32621edf, -0x5625755f, 0x59226579, 0x30b822a1, -0x58653e6d, 0x4004197b, 0x18531d76, 0x16acd797 } + }, + { + { 0x7887b6ad, -0x36a6393b, 0x5f90feba, -0x6b1e6153, -0x5cbd0afc, 0x16e24e62, 0x18161700, 0x164ed34b }, + { 0x2d9b1d3d, 0x72df72af, -0x5bcddba6, 0x63462a36, 0x16b39637, 0x3ecea079, -0x46cfdcf7, 0x123e0ef6 }, + { 0x192fe69a, 0x487ed94c, 0x3a911513, 0x61ae2cea, -0x465b21d9, -0x7884092d, 0x1073f3eb, 0x78da0fc6 } + }, + { + { 0x680c3a94, -0x5d607f0f, 0x1ae9e7e6, 0x71f77e15, 0x48017973, 0x1100f158, 0x16b38ddd, 0x054aa4b3 }, + { -0x1ad43996, 0x5bf15d28, 0x70f01a8e, 0x2c47e318, 0x06c28bdd, 0x2419afbc, 0x256b173a, 0x2d25deeb }, + { 0x19267cb8, -0x2037b973, 0x66e54daf, 0x0b28789c, 0x666eec17, 0x2aeb1d2a, -0x548258a0, 0x134610a6 } + }, +}, +{ + { + { -0x23fd73c4, -0x26ebcf20, 0x5217c771, 0x0eb955a8, 0x2c99a1fa, 0x4b09e1ed, -0x42958bc4, 0x42881af2 }, + { 0x7c59b23f, -0x350aa13e, 0x154d04f2, -0x665112c2, -0x1ebebe0c, 0x68441d72, 0x3932a0a2, 0x14034513 }, + { -0x54a352c3, 0x7bfec69a, 0x4cb2cfad, -0x3dc1732d, -0x04c8295e, 0x685dd14b, 0x15677a18, 0x0ad6d644 } + }, + { + { 0x47927e9f, 0x79148928, 0x370aa877, 0x33dad6ef, 0x11122703, 0x1f8f24fa, 0x2adf9592, 0x5265ac2f }, + { 0x417becb5, 0x781a439e, -0x2ef1fd9a, 0x4ac5938c, 0x0692ac24, 0x5da38511, -0x521cedcd, 0x11b065a2 }, + { -0x65034cba, 0x405fdd30, 0x28e63f54, -0x268dc2bc, 0x5f65aaae, -0x6b3fe210, -0x1eb3f7f7, 0x43e4dc3a } + }, + { + { -0x523d395d, -0x1590853d, -0x168e836c, -0x2f16d70a, -0x29ba150b, -0x1d2c8616, -0x3ae00442, 0x46dd8785 }, + { -0x56c75ae9, -0x43ed380f, 0x3180b2e1, 0x473028ab, -0x0432dab6, 0x3f78571e, 0x6ff6f90f, 0x74e53442 }, + { 0x375c8898, 0x709801be, -0x1c027cb8, 0x4b06dab5, 0x27230714, 0x75880ced, -0x22d0b3be, 0x2b09468f } + }, + { + { -0x7d005fd6, 0x5b979465, -0x01570ab7, -0x25f695af, 0x5f77af9b, -0x5f9caec9, 0x201d1e76, 0x1bcfde61 }, + { -0x48fe346a, -0x6838b612, -0x495c963d, -0x7c0bc72c, -0x65bfd327, 0x62962b8b, -0x67772085, 0x6976c750 }, + { 0x246a59a2, 0x4a4a5490, -0x17802270, -0x29c14222, 0x0d2371fa, -0x26bc8399, -0x2cf0712a, 0x69e87308 } + }, + { + { -0x7437fcfd, 0x0f80bf02, 0x7a18cefb, 0x6aae16b3, -0x28d3295d, -0x22b815b9, -0x0b12c656, 0x61943588 }, + { 0x5656beb0, 0x435a8bb1, 0x4f4d5bca, -0x07053646, 0x1548c075, -0x464d873c, -0x176d49de, 0x3eb0ef76 }, + { -0x6efc607b, -0x2d91a3c2, -0x090cc557, -0x3f161883, 0x70066a93, -0x176973ab, 0x1faaaddd, 0x3c34d188 } + }, + { + { 0x2fffe0d9, -0x42a4f471, 0x3ed24fb9, 0x6aa25410, -0x4d97de3c, 0x2ac7d7bc, 0x60dca36a, 0x605b394b }, + { -0x5f606140, 0x3f9d2b5e, -0x49dc5770, 0x1dab3b6f, 0x72d926c4, -0x5f645c16, 0x3fd8b36d, 0x37419351 }, + { 0x5a9d1ed2, -0x4b17a91c, 0x6c97a9a2, -0x1017b78a, 0x1e5eee7d, -0x4efb309c, -0x7758e371, 0x2f50b81c } + }, + { + { -0x5825add6, 0x2b552ca0, 0x449b0250, 0x3230b336, -0x5b466047, -0x0d3b3a44, 0x58074a22, 0x7b2c6749 }, + { -0x0397ee45, 0x31723c61, 0x6211800f, -0x634bafb8, 0x47995753, 0x768933d3, 0x02752fcd, 0x3491a535 }, + { 0x3ed28cdf, -0x2aae9a78, -0x2c9d21c7, 0x12d84fd2, -0x1cc871b1, 0x0a874ad3, 0x7c763e74, 0x000d2b1f } + }, + { + { 0x3e94a8ab, -0x69db8874, -0x16587414, 0x0ad6f3ce, 0x0d743c4f, -0x6b75387f, -0x55130334, 0x76627935 }, + { -0x2f92b599, 0x3d420811, -0x6f1f001d, -0x4103fb7b, -0x42b78422, -0x078f3949, 0x319afa28, 0x6e2a7316 }, + { -0x292a6561, 0x56a8ac24, 0x3096f006, -0x37248ac2, -0x70b3ad67, 0x477f41e6, -0x09379eec, 0x588d851c } + }, +}, +{ + { + { 0x77d1f515, -0x32d59a19, -0x70559f0f, 0x54899187, -0x2543f91b, -0x4e48c444, -0x56833605, 0x654878cb }, + { -0x72094f02, 0x51138ec7, -0x1a8a0ae5, 0x5397da89, 0x717af1b9, 0x09207a1d, 0x2b20d650, 0x2102fdba }, + { 0x055ce6a1, -0x69611bfb, 0x1251ad29, 0x36bca768, -0x55825beb, 0x3a1af517, 0x29ecb2ba, 0x0ad725db } + }, + { + { -0x64fa907b, -0x013843f4, -0x180a0029, 0x537d5268, 0x4312aefa, 0x77afc662, 0x02399fd9, 0x4f675f53 }, + { -0x7cb1dba9, -0x23bd984f, 0x70ce1bc5, -0x498abb4b, -0x082ea129, 0x1af07a0b, 0x71a03650, 0x4aefcffb }, + { 0x0415171e, -0x3cd2c9ca, -0x7667b7c5, -0x32d410ef, -0x2f6baef0, -0x78f59153, -0x5d579a9f, 0x0bccbb72 } + }, + { + { 0x50fe1296, 0x186d5e4c, -0x01176082, -0x1fc6847e, 0x507031b0, 0x3bc7f6c5, 0x108f37c2, 0x6678fd69 }, + { -0x154e5638, 0x185e962f, 0x65147dcd, -0x791819cb, -0x44a4920e, -0x4f6d1fcf, 0x59d6b73e, 0x4024f0ab }, + { 0x636863c2, 0x1586fa31, 0x572d33f2, 0x07f68c48, 0x789eaefc, 0x4f73cc9f, -0x7152b8ff, 0x2d42e210 } + }, + { + { 0x0f537593, 0x21717b0d, 0x131e064c, -0x6eb196f5, 0x752ae09f, 0x1bb687ae, -0x64bdc392, 0x420bf3a7 }, + { -0x6b202d65, -0x680aeceb, 0x313f4c6a, 0x6155985d, 0x08455010, -0x145ec0f9, -0x472d2cde, 0x676b2608 }, + { 0x1c5b2b47, -0x7ec7459b, 0x311b1b80, -0x798e4914, -0x43ceca50, 0x7bff0cb1, -0x63f30e20, 0x745d2ffa } + }, + { + { 0x21d34e6a, 0x6036df57, -0x66844c30, -0x4e2477d9, -0x378a9506, -0x2c3df63d, 0x4c1dc839, 0x06e15be5 }, + { 0x2bc9c8bd, -0x40ada5e2, 0x26479d81, -0x15a4d9f8, -0x20feaa25, -0x2aee38f2, -0x69f30a30, 0x1ae23ceb }, + { 0x1932994a, 0x5b725d87, -0x314e2550, 0x32351cb5, -0x254835fb, 0x7dc41549, 0x278ec1f7, 0x58ded861 } + }, + { + { -0x493d3658, 0x2dfb5ba8, -0x0ad3a674, 0x48eeef8e, -0x0ed2ea8d, 0x33809107, 0x531d5bd8, 0x08ba696b }, + { -0x0d993aa4, -0x27e8c86d, -0x33bab1b7, -0x3736893b, -0x43d93c58, 0x5ce382f8, 0x5485f6f9, 0x2ff39de8 }, + { -0x3c103a86, 0x77ed3eee, -0x2b00b7ef, 0x04e05517, -0x0e598e35, -0x15c285c1, -0x6b8301ac, 0x120633b4 } + }, + { + { 0x4912100a, -0x7d42ceb9, 0x7e6fbe06, -0x21dc8493, 0x11ea79c6, -0x1ee189e7, -0x34c6c422, 0x07433be3 }, + { -0x6e9effbe, 0x0b949878, -0x13140518, 0x4ee7b13c, -0x6b0f5b40, 0x70be7395, -0x4b2a6e7b, 0x35d30a99 }, + { 0x5ce997f4, -0x0086bb40, -0x4fa3ae5d, 0x575d3de4, 0x5a76847c, 0x583381fd, 0x7af6da9f, 0x2d873ede } + }, + { + { 0x4e5df981, -0x559dfd1f, 0x5015e1f5, -0x5df2a6e9, -0x451de294, 0x18a275d3, 0x01600253, 0x0543618a }, + { 0x43373409, 0x157a3164, -0x0b557e27, -0x05474812, -0x0a59b7fa, -0x4f6c011a, 0x707fa7b6, 0x2e773654 }, + { -0x68b3dc3f, 0x0deabdf4, -0x6231b96d, -0x5590f5db, -0x5d6545d4, 0x04202cb8, 0x2d07960d, 0x4b144336 } + }, +}, +{ + { + { 0x57c5715e, 0x299b1c3f, 0x6b686d90, -0x69346d62, 0x47235ab3, 0x30048064, -0x5bb2601f, 0x2c435c24 }, + { 0x53242cec, 0x47b837f7, -0x3fbded0e, 0x256dc48c, -0x1e26d73b, -0x1ddd0405, -0x5275d3f9, 0x48ea295b }, + { -0x7f077cc1, 0x0607c97c, -0x35da13a5, 0x0e851578, 0x161ebb6f, 0x54f7450b, -0x5f2107f2, 0x7bcb4792 } + }, + { + { 0x045224c2, 0x1cecd0a0, 0x69e53952, 0x757f1b1b, 0x5289f681, 0x775b7a92, 0x16736148, 0x1b6cc620 }, + { 0x2bc73659, -0x7b781c30, 0x059979df, 0x4baf8445, -0x23529041, -0x2e8368a6, -0x2103694a, 0x57369f0b }, + { 0x75638698, -0x0e5666ff, -0x11559f2d, 0x353dd1be, 0x4c9ba488, -0x7b6b8ecd, 0x43ade311, 0x63fa6e68 } + }, + { + { -0x2db4a149, 0x2195becd, -0x3f32bb07, 0x5e41f18c, 0x41ca9ede, -0x20d7f8bc, -0x0ca48299, 0x07073b98 }, + { 0x6597c168, -0x2ea3dfad, -0x672d7877, -0x608c8c00, 0x3257ba1f, 0x18aee7f1, 0x07346f14, 0x3418bfda }, + { 0x4ce530d4, -0x2fc39894, 0x3b5df9f4, 0x0b64c047, 0x19b3a31e, 0x065cef8b, 0x533102c9, 0x3084d661 } + }, + { + { 0x760321fd, -0x6593178a, -0x6149c528, 0x7fe2b510, -0x7537fa6e, 0x00e7d4ae, -0x44908dc6, 0x73d86b7a }, + { -0x407b9653, -0x1e094862, -0x1d99cecb, 0x15801004, -0x508be7e5, -0x65b67cd0, 0x049b673c, 0x3ba2504f }, + { 0x6dba5ab6, 0x0b52b560, -0x444e1255, -0x56ecb0f1, -0x64fb59cb, 0x30a9520d, 0x7973e5db, 0x6813b8f3 } + }, + { + { -0x0cea81d7, -0x0e6b35aa, 0x5ef528a5, 0x136d3570, -0x74fa6644, -0x22b31089, 0x24f833ed, 0x7d5472af }, + { 0x334127c1, -0x67ab4fac, -0x7d0400db, 0x105d0478, 0x44186f4f, -0x24b60807, -0x412f4700, 0x1768e838 }, + { -0x50cc25b9, -0x2f1078b3, -0x491cc607, 0x00d3be5d, -0x63631132, 0x3f2a8a2f, 0x2352435a, 0x5d1aeb79 } + }, + { + { -0x49e4588b, 0x12c7bfae, -0x1d9c4003, -0x47b19de1, 0x5c840dcf, 0x0b47a5c3, -0x335079cc, 0x7e83be0b }, + { 0x19cd63ca, -0x0a61944d, 0x21d06839, 0x670c1592, 0x2150cab6, -0x4f92a9a5, 0x104f12a3, 0x20fb199d }, + { 0x6d99c120, 0x61943dee, 0x460b9fe0, -0x79efe0d2, -0x7117a673, 0x6bb2f151, -0x033b8a34, 0x76b76289 } + }, + { + { 0x522ec0b3, 0x4245f1a1, 0x2a75656d, 0x558785b2, 0x48a1b3c0, 0x1d485a25, -0x2a701f61, 0x60959ecc }, + { 0x756286fa, 0x791b4cc1, -0x28b5ea84, -0x24312ce9, -0x158d421a, 0x7e732421, 0x1131c8e9, 0x01fe1849 }, + { -0x571285f7, 0x3ebfeb7b, -0x1afd8764, 0x49fdc2bb, 0x3c119428, 0x44ebce5d, -0x416b80b6, 0x35e1eb55 } + }, + { + { 0x726ccc74, 0x14fd6dfa, 0x2f53b965, 0x3b084cfe, 0x52a2c8b4, -0x0cc51b0b, 0x0d40166a, 0x59aab07a }, + { -0x3a8c722d, -0x242518ff, -0x4d90e412, -0x063909cb, 0x42f15ef4, 0x61e96a80, -0x509f5b28, 0x3aa1d11f }, + { -0x6da153db, 0x77bcec4c, 0x60137738, 0x18487184, -0x01560baf, 0x5b374337, -0x371955ba, 0x1865e78e } + }, +}, +{ + { + { 0x1c529ccb, -0x6983ab17, 0x64c635fb, 0x30f62692, 0x78121965, 0x2747aff4, -0x150990a4, 0x17038418 }, + { -0x4991e086, -0x333b4839, -0x0af3d082, 0x44157e25, 0x713eaf1c, 0x3ef06dfc, 0x52da63f7, 0x582f4467 }, + { 0x20324ce4, -0x39ce842d, -0x5bb7743c, -0x57efbd18, 0x4e5a1364, -0x4de10e75, -0x325d7237, 0x0c2a1c4b } + }, + { + { 0x69bd6945, -0x123b7eb8, -0x41e372de, 0x0d6d907d, -0x2aa33a55, -0x39c42dee, -0x5ceb237d, 0x5a6a9b30 }, + { 0x6f1f0447, -0x2db23830, -0x24783fa7, -0x4dd961c2, -0x044d2d71, -0x2ea4fd8e, -0x3909b789, 0x7c558bd1 }, + { -0x2c69b9c3, -0x2f13eadc, -0x3ca5db10, 0x12bb628a, 0x1cbc5fa4, -0x5af3c587, 0x0afbafc3, 0x0404a5ca } + }, + { + { 0x2a416fd1, 0x62bc9e1b, -0x1cafa675, -0x4a3908d8, 0x3d5d6967, 0x04343fd8, -0x18071168, 0x39527516 }, + { 0x0aa743d6, -0x73e0bff9, 0x5b265ee8, -0x33452f35, 0x668fd2de, 0x574b046b, -0x352269cd, 0x46395bfd }, + { 0x1a5d9a9c, 0x117fdb2d, -0x2effa3d6, -0x6388ba44, 0x54d56fea, -0x102b410f, -0x17dd2fea, 0x76579a29 } + }, + { + { 0x52b434f2, 0x333cb513, -0x6c217f1f, -0x27cdd7b7, 0x750d35ce, -0x4aaed779, 0x2a2777c1, 0x02c514bb }, + { 0x49c02a17, 0x45b68e7e, -0x43565c81, 0x23cd51a2, -0x13ddb3e5, 0x3ed65f11, -0x61fa424f, 0x43a384dc }, + { -0x740e49bb, 0x684bd5da, -0x094ab4ad, -0x04742c82, -0x564f2dad, 0x313916d7, 0x61548059, 0x11609209 } + }, + { + { 0x369b4dcd, 0x7a385616, 0x655c3563, 0x75c02ca7, -0x2b0e7fdf, 0x7dc21bf9, -0x6e191fbe, 0x2f637d74 }, + { 0x29dacfaa, -0x4bb2e997, -0x7beca671, -0x25ad60b4, 0x453d5559, -0x16109c36, -0x3a9671f5, 0x351e125b }, + { 0x1af67bbe, -0x2b4b64ba, -0x3754769f, -0x29fcfc86, -0x06596605, 0x71dee19f, -0x1831d566, 0x7f182d06 } + }, + { + { -0x71de8ade, 0x09454b72, -0x2b7b4728, -0x55a7170c, 0x7f46903c, -0x2ca7dab3, 0x241c5217, 0x44acc043 }, + { -0x54fe9714, 0x7a7c8e64, 0x15edc543, -0x34a5b5ab, 0x47cd0eda, 0x095519d3, 0x343e93b0, 0x67d4ac8c }, + { 0x4f7a5777, 0x1c7d6bbb, -0x6e7cec1f, -0x74ca012c, -0x3694b97c, 0x4adca1c6, 0x12ad71bd, 0x556d1c83 } + }, + { + { -0x4ee417df, -0x7e0f98aa, 0x10a3f3dd, 0x0faff823, 0x6a99465d, -0x074d2fab, -0x337380fb, 0x097abe38 }, + { 0x0c8d3982, 0x17ef40e3, 0x15a3fa34, 0x31f7073e, 0x0773646e, 0x4f21f3cb, 0x1d824eff, 0x746c6c6d }, + { 0x7ea52da4, 0x0c49c987, -0x6423e2bd, 0x4c436955, -0x0833142e, 0x022c3809, 0x4bee84bd, 0x577e14a3 } + }, + { + { -0x42b228d5, -0x6b013142, 0x060f2211, -0x0b95b026, -0x3f372e01, 0x124a5977, -0x04ff6d6b, 0x705304b8 }, + { 0x61a73b0a, -0x0f1d9754, 0x3791a5f5, -0x0d0505f0, 0x6b6d00e9, -0x3e1ec17e, 0x6fd78f42, 0x60fa7ee9 }, + { 0x4d296ec6, -0x49c2e2cb, 0x5fad31d8, -0x0c3cfac2, -0x4b42bd14, 0x670b958c, -0x5e9cac03, 0x21398e0c } + }, +}, +{ + { + { -0x79e48166, -0x793a03ea, 0x6a27c451, -0x095ccfb9, -0x5e16ca69, 0x01667267, 0x6082dfeb, 0x05ffb9cd }, + { -0x72582d11, 0x216ab2ca, -0x660bd7d9, 0x366ad9dd, 0x4fdd3c75, -0x519b4700, 0x53909e62, 0x403a395b }, + { -0x0ac09ec7, -0x59e80561, 0x13e66cb6, 0x60f2b5e5, -0x4cbb755c, -0x28574111, 0x6f5ea192, 0x7a293285 } + }, + { + { 0x79639302, -0x4763bbb8, 0x50c67f2c, 0x4ae4f193, -0x37e5063a, -0x0f4ca258, 0x46871017, 0x39d00035 }, + { -0x4fd21778, 0x0b39d761, -0x2dbeb1e1, 0x5f550e7e, 0x22e1a940, -0x59405ba8, -0x02bb8467, 0x050a2f7d }, + { -0x59af2489, 0x437c3b33, -0x453ad44e, 0x6bafe81d, 0x2db7d318, -0x0166bfd3, 0x372ba6ce, 0x2b5b7eec } + }, + { + { 0x613ac8f4, -0x596bbfb3, -0x056818d4, 0x500c3c2b, 0x1fcec210, -0x78befb2e, -0x79fb5712, 0x1b205fb3 }, + { -0x7c0af111, -0x4c43b443, -0x736d879a, 0x508f0c99, -0x37481992, 0x43e76587, -0x5b806727, 0x0f7655a3 }, + { -0x2db4ecc4, 0x55ecad37, 0x6038c90b, 0x441e147d, -0x29d39012, 0x656683a1, -0x781f1352, 0x0157d5dc } + }, + { + { -0x28e14adc, -0x6ad9aaec, 0x5df14593, -0x19fc277f, 0x0d4de6b7, 0x147cdf41, 0x0437c850, 0x5293b173 }, + { 0x0354c13d, -0x0d5850af, -0x55c8d4a0, -0x285f4ebb, 0x05a3d470, 0x2869b96a, -0x7db9fe8d, 0x6528e42d }, + { 0x4bccf226, 0x23d0e081, -0x7e69046d, -0x6d38ba33, 0x59541e5b, -0x749e8694, -0x3fde0688, 0x40a44df0 } + }, + { + { 0x4bc5d095, -0x793691af, -0x03597fb6, -0x0df2bf68, -0x37d915a3, 0x27363d89, 0x5719cacf, 0x39ca3656 }, + { 0x4f20ea6a, -0x25579677, 0x4c620618, -0x15eb5c2f, 0x090bf8be, 0x6001fccb, -0x6b816310, 0x35f4e822 }, + { 0x6f87b75c, -0x68af90d1, 0x034ae070, -0x39db5160, -0x552cb22a, 0x1ec856e3, -0x1bbf1a71, 0x055b0be0 } + }, + { + { 0x6ea33da2, 0x4d12a04b, -0x1c9ed923, 0x57cf4c15, -0x11bb2699, -0x6f13698b, 0x2a985aac, 0x64ca348d }, + { -0x768ca2ee, 0x6469a17d, -0x199d460f, -0x2490d82b, 0x6a395681, -0x60345cd8, -0x2d9650db, 0x363b8004 }, + { -0x1b3b6ed3, -0x66a771e7, 0x1ca5ce6b, -0x1033c4b2, -0x05a4672b, 0x4522ea60, 0x1de4a819, 0x7064bbab } + }, + { + { 0x42542129, -0x5d6f3f9f, -0x4172a470, -0x0d1d3d52, 0x76abfe1b, -0x30dba725, -0x7c29d941, 0x02157ade }, + { 0x5a770641, -0x46e61eaf, 0x4e7f8039, -0x565d1d39, 0x3df23109, 0x7527250b, -0x53d84875, 0x756a7330 }, + { 0x1b9a038b, 0x3e46972a, 0x7ee03fb4, 0x2e4ee66a, 0x6edbb4ca, -0x7e5db789, -0x7132fa9d, 0x1a944ee8 } + }, + { + { 0x182362d6, -0x44bf57a7, -0x75b2e545, -0x4660aa89, 0x758559f6, -0x72e74bd9, 0x4d26235a, 0x26c20fe7 }, + { 0x51039372, -0x2a56e2ef, -0x6635d922, 0x2ed377b7, -0x02c99495, -0x5e8dfd54, -0x296fe66b, 0x0730291b }, + { -0x1633dd0b, 0x648d1d9f, 0x28dd577c, 0x66bc5619, 0x652439d1, 0x47d3ed21, -0x125074b7, 0x49d271ac } + }, +}, +{ + { + { -0x4b48a9ff, 0x2798aaf9, 0x5c8dad72, 0x5eac7213, 0x61b7a023, -0x2d31559f, -0x167082b2, 0x1bbfb284 }, + { 0x382b33f3, -0x760afa76, -0x52b73f4c, 0x5ae2ba0b, -0x5ac24c92, -0x706c4afd, -0x6a5dcd1a, 0x5aa3ed9d }, + { -0x38269a9f, 0x656777e9, 0x72c78036, -0x34d4edac, -0x26af9112, 0x65053299, 0x5e8957cc, 0x4a07e14e } + }, + { + { -0x3b885b65, 0x240b58cd, 0x6447f017, -0x02c72522, -0x58379553, 0x19928d32, -0x7b505f7f, 0x50af7aed }, + { -0x67f20667, 0x4ee412cb, 0x3c6ec771, -0x5cea2891, -0x6da38803, -0x445a1222, 0x1d313402, 0x3f0bac39 }, + { 0x15f65be5, 0x6e4fde01, 0x216109b2, 0x29982621, 0x0badd6d9, 0x78020581, -0x45142ffa, 0x1921a316 } + }, + { + { -0x260c3e75, -0x28a55266, 0x60b1c19c, 0x566a0eef, 0x255c0ed9, 0x3e9a0bac, -0x5f9d380b, 0x7b049dec }, + { -0x20478f04, -0x76bdd082, 0x4f76b3bd, 0x2c296beb, 0x36c24df7, 0x0738f1d4, -0x1d8c5150, 0x6458df41 }, + { 0x35444483, -0x23341c86, 0x0fedbe93, 0x75887933, 0x12c5dd87, 0x786004c3, -0x3d6af19c, 0x6093dccb } + }, + { + { 0x6084034b, 0x6bdeeebe, 0x780fb854, 0x3199c2b6, -0x49d2f96b, -0x68cc8955, -0x749b8270, 0x6e3180c9 }, + { -0x7a1f8f93, 0x1ff39a85, -0x4c18c6cd, 0x36d0a5d8, 0x718f453b, 0x43b9f2e1, 0x4827a97c, 0x57d1ea08 }, + { -0x5ed74f8f, -0x11854919, -0x6c577456, -0x5b3ea693, -0x4dde9ed0, -0x084b217e, -0x226842e8, 0x363e999d } + }, + { + { -0x1db4513a, 0x2f1848dc, -0x454350a0, 0x769b7255, 0x3cefe931, -0x6f34c392, -0x39064cab, 0x231f979b }, + { 0x35ee1fc4, -0x6957bc3f, 0x08e4c8cf, -0x68914cab, -0x4a732cd0, -0x4bd097ff, 0x693a052b, 0x48ee9b78 }, + { -0x33d50c3a, 0x5c31de4b, -0x01df72e1, -0x4fb44fd0, -0x3eb04b9a, -0x48728ff7, 0x08792413, 0x079bfa9b } + }, + { + { -0x5d2abdbb, -0x0c361280, 0x77f63952, 0x0aa08b78, -0x2ef7ab8b, -0x2892539d, -0x6b8f9c95, 0x1ef4fb15 }, + { -0x25cff20c, -0x1c6fc5af, 0x3da95ab0, -0x7bc69bdd, 0x0b356480, -0x12c30ed3, -0x7b7e8e6c, 0x038c77f6 }, + { 0x5b167bec, -0x7ab1a11a, -0x692f323e, 0x59590a42, -0x67efde67, 0x72b2df34, 0x4a0bff56, 0x575ee92a } + }, + { + { 0x0aa4d801, 0x5d46bc45, -0x5acc4628, -0x3c50edd9, 0x2b8906c2, 0x389e3b26, 0x382f581b, 0x200a1e7e }, + { -0x75e7d031, -0x2b3f7f70, -0x66b76243, 0x30e170c2, 0x52f733de, 0x05babd57, 0x2cd3fd00, 0x43d4e711 }, + { -0x1506c53b, 0x518db967, 0x056652c0, 0x71bc989b, 0x567197f5, -0x01d47a27, 0x651e4e38, 0x050eca52 } + }, + { + { 0x60e668ea, -0x6853c68a, 0x153ab497, -0x64e64402, 0x34eca79f, 0x4cb179b5, -0x5ece51a9, 0x6151c09f }, + { 0x453f0c9c, -0x3cbce522, -0x008fc465, -0x160afba2, -0x127b84c3, -0x03268537, 0x1c58f4c6, 0x4b0ee6c2 }, + { -0x020fa26a, 0x3af55c0d, 0x2ab4ee7a, -0x22d9d120, 0x12171709, 0x11b2bb87, -0x7ff0fcf5, 0x1fef24fa } + }, +}, +{ + { + { -0x6fe99de0, -0x006e5996, 0x5bf1e009, -0x0ddaad52, 0x7f90df7c, 0x7dff85d8, 0x0c736fb9, 0x4f620ffe }, + { 0x6b6c6609, -0x4b69edc6, -0x7f54a6c8, -0x58af017b, -0x483d85a1, -0x0b8e40c7, 0x77ac193c, 0x507903ce }, + { -0x2021c1cc, 0x62f90d65, -0x4605a053, -0x30d73a6e, -0x39e9baf0, -0x66379107, 0x4a256c84, 0x25d44804 } + }, + { + { -0x36fdd4ab, 0x2c7c4415, -0x7ed14e02, 0x56a0d241, -0x2849a1f3, -0x0fd15e37, -0x2acdc4da, 0x4180512f }, + { -0x38164e91, -0x4297dcf2, -0x3e3a86a3, 0x0eb1b9c1, -0x6a494e01, 0x7943c8c4, 0x0bbacf5e, 0x2f9faf62 }, + { -0x75b75a25, -0x5b00c197, -0x426abfc5, -0x4595c7fa, 0x47d5b65d, -0x60831e51, 0x5939d2fb, 0x15e087e5 } + }, + { + { -0x0469c0c8, -0x776be792, -0x239c642b, 0x48a00e80, -0x1693e367, -0x5b17f6d5, -0x35a8c99f, 0x5a097d54 }, + { 0x745c1496, 0x12207543, -0x25c79ef4, -0x2500c303, 0x2c71c34f, -0x1b1868d9, 0x34bdede9, 0x39c07b19 }, + { 0x17c9e755, 0x2d45892b, -0x76cf7208, -0x2fcc028e, 0x525b8bd9, 0x6c2fe9d9, -0x3ee33f87, 0x2edbecf1 } + }, + { + { -0x2f785da1, -0x11f0f023, 0x5c3e34ee, -0x638aceab, -0x7054c54b, 0x660c572e, 0x544cd3b2, 0x0854fc44 }, + { -0x38ea5f2e, 0x1616a4e3, -0x07cbe2b3, 0x53623cb0, -0x38176635, -0x6910acd7, -0x5997455a, 0x3d4e8dbb }, + { 0x55edad19, 0x61eba0c5, -0x0f57c21a, 0x24b533fe, -0x7c455a08, 0x3b770428, -0x675b8173, 0x678f82b8 } + }, + { + { 0x57775696, 0x1e09d940, 0x3cd951db, -0x112ed9a4, 0x20bce16f, -0x056253d5, -0x172f760c, 0x0f7f76e0 }, + { -0x296ff3ac, -0x4eb6e2f5, -0x62ecd9ca, 0x3539722c, 0x0b362bc9, 0x4db92892, -0x59749621, 0x4d7cd1fe }, + { -0x2b7a4ff4, 0x36d9ebc5, -0x1b524c9b, -0x5da69b6e, -0x3dee6333, -0x3e9a6b80, 0x186e0d5f, 0x45306349 } + }, + { + { 0x2b072491, -0x695beb14, 0x27a7b65b, 0x1bb22181, 0x6e8a4af0, 0x6d284959, -0x32d889a1, 0x65f3b08c }, + { -0x593200e3, -0x6b222f3f, -0x17bdec52, 0x55f6f115, -0x66d03096, 0x6c935f85, 0x4a37f16f, 0x067ee0f5 }, + { 0x199801f7, -0x134d6001, -0x5d5f08d1, -0x62c9e2e1, 0x75fd2f49, 0x25f11d23, 0x0fe10fe2, 0x124cefe8 } + }, + { + { 0x31b16489, 0x1518e85b, -0x248ef405, -0x70552349, -0x5eb51dc7, 0x39b0bdf4, 0x503d20c1, 0x05f4cbea }, + { -0x2e720dab, 0x4c126cf9, 0x147a63b6, -0x3e2b8e17, -0x0c36c4a1, 0x2c6d3c73, -0x1c00795e, 0x6be3a6a2 }, + { -0x3fbeba44, -0x31fbf162, 0x08f6834c, -0x38e00b1e, -0x5477b85d, -0x42ab9173, -0x5b2d545b, 0x64666aa0 } + }, + { + { 0x3337e94c, -0x4f3ac409, 0x11e14f15, 0x7cb5697e, 0x1930c750, 0x4b84abac, -0x1f9bfb98, 0x28dd4abf }, + { 0x7c06d912, 0x6841435a, -0x44c07cf5, -0x35edc3df, -0x4e341d88, -0x2b4c84d9, -0x3890afba, 0x1d753b84 }, + { 0x44cb9f44, 0x7dc0b64c, -0x1c6da241, 0x18a3e1ac, 0x2d0457c4, 0x7a303486, -0x75f376d2, 0x4c498bf7 } + }, +}, +{ + { + { 0x30976b86, 0x22d2aff5, -0x3d2db9fc, -0x726f47fa, 0x4de5bae5, -0x235e7694, -0x37cbf3e9, 0x28005fe6 }, + { 0x1aa73196, 0x37d653fb, 0x3fd76418, 0x0f949530, -0x04c5e84e, -0x52dff4f7, 0x2fc8613e, 0x544d4929 }, + { 0x34528688, 0x6aefba9f, 0x25107da1, 0x5c1bff94, 0x66d94b36, -0x08a44433, 0x0f316dfa, 0x72e47293 } + }, + { + { -0x2cd589d9, 0x07f3f635, 0x5f6566f0, 0x7aaa4d86, 0x28d04450, 0x3c85e797, 0x0fe06438, 0x1fee7f00 }, + { -0x687ef7b1, 0x2695208c, 0x23450ee1, -0x4eafd5f5, 0x03efde02, -0x0262515a, 0x2733a34c, 0x5a9d2e8c }, + { 0x03dbf7e5, 0x765305da, 0x1434cdbd, -0x5b250db7, -0x2db57714, 0x7b4ad5cd, -0x11fbfabd, 0x00f94051 } + }, + { + { 0x07af9753, -0x28106c45, 0x3db766a7, 0x583ed0cf, 0x6e0b1ec5, -0x31966741, 0x5dd40452, 0x47b7ffd2 }, + { -0x3c2ccf4e, -0x72ca94dd, -0x4fb8e4fa, -0x0de37465, 0x6e42b83c, -0x4c93ce94, -0x74154ef3, 0x07d79c7e }, + { -0x43f722ee, -0x78040464, -0x1e113d65, -0x75f994c6, -0x24e03e41, 0x0d57242b, 0x5ea64bb6, 0x1c3520a3 } + }, + { + { 0x216bc059, -0x325790c0, 0x12bcd87e, 0x1fbb231d, 0x17c70990, -0x4b6a9562, 0x66d12e55, 0x38750c3b }, + { -0x43345cb6, -0x7f2dac5a, 0x3838219b, 0x3e61c3a1, -0x677d1c6a, -0x6f3c49ff, 0x5d0ee66f, 0x1c3d0577 }, + { -0x6bdd1ae6, 0x692ef140, 0x2b5df671, -0x343f38c4, 0x744ce029, 0x21014fe7, -0x2ccfb784, 0x0621e2c7 } + }, + { + { -0x4f240f0d, -0x4851e86a, -0x1e831e6a, 0x54dfafb9, -0x16555c4c, 0x25923071, -0x5effd163, 0x5d8e589c }, + { -0x7da67c73, -0x50679f34, -0x39606524, -0x6f15b73f, 0x65581e30, 0x65264837, 0x7bd3a5bc, 0x0007d609 }, + { 0x0842a94b, -0x3f40e26b, 0x588f2e3e, -0x4d2c3c9d, -0x44ae1d11, 0x0a961438, 0x3c1cbf86, 0x1583d778 } + }, + { + { -0x3362d739, -0x6ffcb8fc, -0x08d33a71, 0x1d1b679e, -0x41a478da, 0x16e12b5f, -0x7c3aa7f6, 0x4958064e }, + { 0x5da27ae1, -0x13115d11, 0x55670174, 0x597c3a14, 0x6609167a, -0x3659d5ee, -0x7e127090, 0x252a5f2e }, + { 0x5066e80d, 0x0d289426, 0x307c8c6b, -0x033c087b, 0x0c1112fd, 0x1b53da78, -0x27bc4c78, 0x079c170b } + }, + { + { -0x3f2a2faa, -0x322932b0, -0x44fca8c5, -0x65089793, -0x0c3c10b8, 0x3ca6723f, 0x317b8acc, 0x6768c0d7 }, + { 0x64fa6fff, 0x0506ece4, 0x6205e523, -0x411cbce2, 0x51b8ea42, 0x35794224, 0x4ac9fb00, 0x6dec05e3 }, + { -0x0eaa3e4d, -0x6b49da1b, -0x6684846f, 0x417bf3a7, 0x6d6b2600, -0x3dd34224, -0x2232ad0c, 0x51445e14 } + }, + { + { 0x2bbea455, -0x76ceb855, -0x6df86ed7, -0x73ac5db1, -0x41cf0859, 0x4b49f948, 0x6e4fd43d, 0x12e99008 }, + { 0x3b144951, 0x57502b4b, 0x444bbcb3, -0x71980095, 0x166385db, -0x474296d9, -0x1c6d6a38, 0x13186f31 }, + { 0x7fdfbb2e, -0x0ef3694d, 0x121ceaf9, -0x60656ca2, 0x3a5b983f, -0x20eec93c, 0x5d3e99af, 0x77b2e3f0 } + }, +}, +{ + { + { -0x33a32d65, -0x6acd0b71, -0x5c31c98f, 0x2ba851be, 0x51122941, 0x32dacaa0, 0x350004f2, 0x478d99d9 }, + { -0x630ed9a9, -0x02f28a79, -0x1ac5f1d7, -0x17d0106c, 0x5bbb4be7, -0x33cb5810, -0x5af3c75e, 0x0b251172 }, + { -0x6f44fd40, 0x1d5ad948, 0x0ec25115, 0x50e208b1, 0x4ef21702, -0x5d95dd77, 0x3b524805, 0x4dc92334 } + }, + { + { -0x0c93b68b, 0x3ad3e3eb, 0x37862125, -0x28a2da5b, -0x5fda5aea, -0x178c6bc3, -0x3bee37b9, 0x6bbc7cb4 }, + { 0x0f8086b6, -0x1c7d73c0, -0x6860f238, 0x3f77e6f7, 0x4df42cb4, 0x7ef6de30, -0x4954287c, 0x5265797c }, + { -0x2b5af2aa, 0x3c6f9cd1, -0x39015482, -0x49dbbf89, 0x3580972e, 0x6ff9bf48, -0x4ccd5305, 0x00375883 } + }, + { + { 0x6c75c99c, -0x3674137b, 0x00e33cf4, -0x1bbe7b40, -0x456f89cc, 0x0a676b9b, 0x71f379d7, 0x669e2cb5 }, + { 0x28cb0940, 0x0001b2cd, 0x6f1c24c9, 0x63fb51a0, -0x232a35cf, -0x4a52796f, -0x73baf9a0, 0x67238dbd }, + { -0x5b642cf8, -0x34ee948d, 0x2392729e, 0x025aad6b, 0x3f55d9b1, -0x4b86c106, 0x40678bb9, 0x72a10561 } + }, + { + { -0x1d1afa4a, 0x0d8d2909, -0x3fd6edd0, -0x67358755, -0x564edcd9, 0x77ef5569, -0x7ebc64b9, 0x7c77897b }, + { 0x1cc9249d, -0x5d497ed5, 0x21211f58, 0x62866eee, 0x5df10ece, 0x2cb5c5b8, -0x1d9c5200, 0x03a6b259 }, + { -0x21cce34b, -0x0e3e4a1e, 0x15fca420, 0x5a9f5d8e, 0x7bd932b1, -0x605bc70f, 0x1c6146e7, 0x2a381bf0 } + }, + { + { -0x4acbe991, -0x083f41ce, 0x19cf70d4, 0x27e6ca64, -0x56a858a7, -0x6cb20829, -0x54213d56, 0x5701461d }, + { -0x3037ee3f, -0x53646787, 0x3756e567, -0x7482d67f, 0x7c70edfc, 0x50da4e60, -0x77bbff4a, 0x5dbca62f }, + { 0x2c915c25, 0x2c674740, 0x0b0d340a, 0x1bdcd1a8, 0x07b43f5f, 0x5e5601bd, 0x5539a242, 0x2555b4e0 } + }, + { + { -0x781b9c2c, 0x78409b1d, -0x32049c63, -0x52b256a6, 0x55259b9c, -0x13d788c9, -0x3cedcf55, 0x69c806e9 }, + { 0x66ddd216, 0x6fc09f52, -0x371c8fb8, -0x231a9f59, -0x5d209d03, -0x139a6c63, -0x1ad12e6e, 0x7a869ae7 }, + { 0x14bb3f22, 0x7b48f574, -0x51233378, 0x68c7cee4, 0x79ed80be, -0x12d06c9f, 0x5f77bc4b, 0x25d70b88 } + }, + { + { -0x44e51b2c, -0x67ba62d7, 0x39f954ec, 0x56b9c4c7, -0x3d64b4c2, -0x7cd8bc0a, -0x67497876, 0x21ea8e27 }, + { 0x762bf4de, 0x4151c3d9, 0x2745d82b, 0x083f435f, 0x0d23ddd5, 0x29775a2e, 0x69a5db24, 0x138e3a62 }, + { 0x6a5a7b9c, -0x78410b4c, 0x5fc1d062, -0x2dd662e5, -0x22cde9b8, -0x7dbf67e8, -0x1a5d1fc3, 0x5c5abeb1 } + }, + { + { 0x1306a233, 0x02cde6de, 0x116f8ec7, 0x7b5a52a2, -0x3ee9c4a5, -0x1e397e0c, 0x60d32643, 0x241d3506 }, + { -0x48c3d225, 0x14722af4, 0x5a05060d, -0x43b8f3a1, 0x2581b02e, 0x00943eac, 0x1f499c8f, 0x0e434b3b }, + { 0x0ebc52c7, 0x6be4404d, -0x4e586e0b, -0x51b9dcc5, -0x2da24bd5, 0x2aec170e, 0x6645d694, 0x1d8dfd96 } + }, +}, +{ + { + { 0x12ddb0a4, -0x2a679c64, -0x3fdb7995, -0x5a2e60d0, 0x58fce460, -0x2e83d0fd, 0x2e095e8a, 0x07a19515 }, + { -0x63d13b22, 0x296fa9c5, 0x4f84f3cb, -0x43749e41, 0x17a8f908, 0x1c7706d9, 0x7ad3255d, 0x63b795fc }, + { 0x389e5fc8, -0x57c970fe, -0x30721bc5, -0x6fbcc4fe, -0x3abed9bd, -0x505e02a3, 0x032f0137, 0x3e8fe83d } + }, + { + { -0x17102ec4, 0x08704c8d, 0x33e03731, -0x203ae572, 0x1260cde3, -0x5a62a25b, -0x59da737a, 0x22d60899 }, + { 0x0570a294, 0x2f8b15b9, 0x67084549, -0x6b0dbd90, 0x61bbfd84, -0x21e3a51f, 0x7fac4007, 0x75ba3b79 }, + { 0x70cdd196, 0x6239dbc0, 0x6c7d8a9a, 0x60fe8a8b, -0x14bfeda0, -0x4c77b844, -0x788861a2, 0x0904d07b } + }, + { + { 0x48f940b9, -0x0bcdd29a, -0x42d2f3c7, 0x06952f0c, -0x5f7e06cf, 0x167697ad, -0x4508d594, 0x6240aace }, + { -0x22456e64, -0x4b31e02c, -0x38b37256, -0x30ce24c2, -0x527933af, 0x2c63cc63, -0x43e221f9, 0x43e2143f }, + { 0x5ba295a0, -0x07cb8b64, -0x35c82da6, -0x296b83a5, -0x1836ce96, 0x66f13ba7, -0x724bf354, 0x56bdaf23 } + }, + { + { -0x3e62c44e, 0x1310d36c, 0x622386b9, 0x062a6bb7, -0x285eb0a4, 0x7c9b8591, 0x7e1e5754, 0x03aa3150 }, + { -0x0acacc15, 0x362ab9e3, 0x6eb93d40, 0x338568d5, 0x1d5a5572, -0x61f1ebae, -0x7c8bece8, 0x1d24a86d }, + { -0x002b31e1, -0x0b1389b8, 0x54ac8c1c, -0x1fba1510, 0x1d09357c, -0x772dda7e, -0x6514b7a7, 0x43b261dc } + }, + { + { 0x6c951364, 0x19513d8b, 0x000bf47b, -0x6b018eda, -0x2ab06a99, 0x028d10dd, 0x42940964, 0x02b4d5e2 }, + { -0x77448645, -0x1aa4e1e7, -0x3e85ca63, -0x5f612f83, 0x603dea33, -0x4fd3d11e, 0x5b276bc2, 0x326055cf }, + { 0x28d18df2, -0x4b5eaa35, 0x186ce508, -0x1533b9ba, 0x6c824389, -0x3b630b6d, -0x51a2cbf0, 0x27a6c809 } + }, + { + { -0x3bc296ac, -0x32d3d8f6, 0x6a66cab2, -0x22b5c1a9, 0x69d7036c, 0x79fa5924, 0x3d8c2599, 0x22150360 }, + { 0x1f0db188, -0x74591433, 0x675a5be8, 0x37d3d73a, 0x15f5585a, -0x0dd1205d, -0x009f5e82, 0x2cb67174 }, + { 0x390be1d0, 0x59eecdf9, 0x728ce3f1, -0x56bddfbc, 0x7a94f0f4, -0x7d76e39a, 0x3890f436, 0x7b1df4b7 } + }, + { + { 0x07f8f58c, 0x5f2e2218, -0x2b6bf62c, -0x1caaa361, 0x1fb6a630, -0x4d555773, -0x2cad1fc3, 0x68698245 }, + { -0x4c4d5ddc, -0x1b6d0d20, 0x2b551160, 0x7c6c9e06, 0x0d7f7b0e, 0x15eb8fe2, 0x58fc5992, 0x61fcef26 }, + { 0x2a18187a, -0x244ea27b, -0x79225329, -0x0c1b552d, 0x0ff6c482, 0x44bae281, 0x3daf01cf, 0x46cf4c47 } + }, + { + { -0x0eb67ec0, 0x213c6ea7, 0x392b4854, 0x7c1e7ef8, 0x5629ceba, 0x2488c38c, 0x0d8cc5bb, 0x1065aae5 }, + { -0x613b1a07, 0x426525ed, 0x16903303, 0x0e5eda01, -0x341a3524, 0x72b1a7f2, 0x14eb5f40, 0x29387bcd }, + { -0x20dff2a9, 0x1c2c4525, -0x403598b6, 0x5c3b2dd6, -0x1e7cbfd0, 0x0a07e7b1, 0x4f1ce716, 0x69a198e6 } + }, +}, +{ + { + { -0x61d2b8cc, 0x7b26e56b, -0x7e39e98b, -0x3b38ecd5, -0x13632181, -0x10a36adb, -0x18e8bc53, 0x39c80b16 }, + { -0x10562969, 0x7afcd613, 0x1c067959, 0x0cc45aa4, -0x3e05256a, -0x5a901efc, 0x72e40365, 0x3a73b704 }, + { 0x1b826c68, 0x0f196e0d, 0x4960e3db, -0x08e00f1e, 0x23b7436c, 0x61131670, 0x77da7282, 0x0cf0ea58 } + }, + { + { 0x3ba6945a, -0x1ccd312c, -0x177e3fa3, -0x21f4ec9f, 0x5e67ed3b, 0x1ad40f09, -0x4739c2a3, 0x5da8acda }, + { -0x222b3343, 0x196c80a4, -0x6a0d2263, 0x22e6f55d, 0x40d6c71b, -0x38a1cc39, -0x34c3fbd1, 0x7bb51279 }, + { 0x3a70159f, -0x3b4999b6, 0x0a904e14, 0x76194f0f, -0x5bf693ed, -0x5a9eb3c7, -0x68601313, 0x6cd0ff50 } + }, + { + { -0x4fb45e72, 0x7fecfabd, 0x3bddbcf7, -0x2f038404, 0x057a131c, -0x5be2b792, -0x0dddc59f, 0x641a4391 }, + { -0x70bbd754, -0x3f1f9819, -0x59eeca1d, 0x14835ab0, 0x38062935, -0x0de2eb0d, -0x20fb7b64, 0x6390a4c8 }, + { -0x59f95725, -0x3a3946a6, -0x4f97da0f, -0x6eb48062, 0x44fc9eff, 0x2a731f6b, 0x62705cfc, 0x30ddf385 } + }, + { + { 0x68bcd52c, 0x33bef2bd, 0x69482ef2, -0x39b62450, 0x41cb1aee, -0x4a4911f4, 0x0212a7e5, 0x5c294d27 }, + { -0x2e400807, 0x4e3dcbda, 0x20645717, -0x36ee717e, 0x0f189d56, -0x45333144, -0x2bb98998, 0x1b4822e9 }, + { 0x25563781, -0x54c9f581, 0x480f7958, 0x2512228a, 0x6114b4e3, -0x38a2fad9, -0x268901d6, 0x222d9625 } + }, + { + { 0x0a344f85, 0x0f94be7e, -0x780dd3c8, -0x14d05574, 0x4ee16f0f, -0x631e18a2, 0x18a08dea, 0x43e64e54 }, + { -0x4c8d531f, 0x1c717f85, 0x4638bf18, -0x7e6cf197, 0x6bc08b58, 0x239cad05, -0x7807000c, 0x0b34271c }, + { 0x1a35ce63, -0x7eaa1dae, -0x06edfd72, -0x41eff2b3, -0x5a822314, -0x4007f408, 0x6d6bc6e4, 0x57342dc9 } + }, + { + { 0x1e707bf6, -0x0c3c4349, 0x7291a762, 0x351d9b8c, -0x252965cd, 0x00502e6e, 0x1ec8807f, 0x522f521f }, + { -0x3731a668, -0x10110f9b, -0x4a34155e, -0x40fd6af0, 0x20b7c458, -0x739b5efa, 0x31c24855, 0x35134fb2 }, + { -0x065c6fd5, 0x272c1f46, -0x669a8434, -0x36e45c49, 0x4f8a1c0e, -0x519eb4d0, 0x0b99017b, 0x7afcaad7 } + }, + { + { -0x107bd495, -0x577ebe14, -0x6854193b, 0x55e7b147, 0x03784ffe, -0x738b7069, -0x5032ff49, 0x5b50a1f7 }, + { -0x5b4741bf, -0x3da212ac, 0x1bb0e2dd, -0x6fd2ec1f, -0x3217d54e, 0x41f43233, -0x3c551835, 0x1085faa5 }, + { -0x0ec9eceb, -0x647bf09a, 0x701003e9, 0x18462242, -0x1b5daf80, 0x65ed45fa, 0x3fda7320, 0x0a286239 } + }, + { + { 0x6ecb9d17, -0x69f18c85, -0x2983151f, -0x050db6b8, -0x2aa1e477, 0x37e7a9b4, -0x4b93a615, 0x5cb7173c }, + { 0x347cbc9d, 0x46ab13c8, -0x663edc7d, 0x3849e8d4, -0x7829b537, 0x4cea3140, -0x4e5d6119, 0x1f354134 }, + { -0x7d485410, 0x4a89e68b, -0x64594847, -0x0be326d9, -0x1e727891, 0x16e6c210, 0x7f1b09c6, 0x7cacdb0f } + }, +}, +{ + { + { -0x233a3513, -0x1efebbcc, 0x3c84fb33, 0x47ed5d96, -0x12795f19, 0x70019576, -0x2d98061c, 0x25b2697b }, + { -0x26e58744, -0x6f9d4d20, -0x37af6999, 0x47c9889c, 0x405070b8, -0x620ab59a, 0x2493a1bf, 0x7369e6a9 }, + { 0x13986864, -0x6298c005, 0x415dc7b8, 0x3ca5fbd9, -0x20d8c4a2, -0x1fb133c5, -0x4ab1b32e, 0x1420683d } + }, + { + { -0x3e33a530, 0x34eebb6f, -0x69b95375, 0x6a1b0ce9, -0x599421ad, -0x2c4f25b7, 0x61d081c1, 0x31e83b41 }, + { 0x249dd197, -0x4b8742e2, 0x5e58c102, 0x620c3500, -0x334553a4, -0x04fd2cd1, -0x0af758d3, 0x60b63beb }, + { -0x61f9d4b1, -0x681738ee, 0x29320ad8, 0x49e48f4f, 0x6f18683f, 0x5bece14b, 0x2d550317, 0x55cf1eb6 } + }, + { + { 0x7df58c52, 0x3076b5e3, -0x186633ca, -0x28c54623, 0x4913ee20, -0x427ce31d, 0x62ba0133, 0x1a56fbaa }, + { 0x65c23d58, 0x58791010, 0x5094819c, -0x7462f793, 0x12c55fa7, -0x1dbfd057, 0x570891d4, 0x669a6564 }, + { 0x5c9dc9ec, -0x6bc194b0, -0x5883c8e6, 0x302557bb, 0x41347651, -0x678c51aa, -0x663a75a4, 0x13c48367 } + }, + { + { 0x5d8bd080, -0x3b230496, 0x571a4842, -0x21143b14, -0x471aac9b, -0x2b4d177d, -0x371a47d9, 0x50bdc87d }, + { 0x5ab3e1b9, 0x423a5d46, -0x380ec09f, -0x03ec3e79, -0x134a464a, 0x19f83664, -0x59c849f9, 0x66f80c93 }, + { 0x6edfe111, 0x606d3783, -0x0fee5427, 0x32353e15, 0x25b73b96, 0x64b03ac3, 0x725fd5ae, 0x1dd56444 } + }, + { + { 0x08bac89a, -0x3d681a00, -0x151e3c20, 0x7d4cea11, -0x60186884, -0x0c1c741f, 0x63a305cd, 0x3a3a450f }, + { 0x3362127d, -0x705b8008, 0x71cd7c15, -0x4360953c, 0x49220c8b, 0x6e714543, 0x219f732e, 0x0e645912 }, + { -0x27c6b9d9, 0x078f2f31, -0x216b5af0, 0x389d3183, 0x17996f80, -0x2e1c9393, -0x6c565785, 0x318c8d93 } + }, + { + { -0x54e22c68, 0x5d669e29, 0x342d9e3b, -0x036de9a8, -0x0ca68c33, 0x55851dfd, 0x25950af6, 0x509a41c3 }, + { 0x2afffe19, -0x0d8ba2fd, 0x7f24db66, 0x0c9f3c49, -0x457a6711, -0x43672c1d, -0x65e2acec, 0x224c7c67 }, + { -0x5906da17, -0x423f9124, 0x641b1f33, 0x793ef3f4, -0x627cc177, -0x7d13ed80, 0x28a11389, 0x05bff023 } + }, + { + { 0x0dc512e4, 0x6881a0dd, 0x44a5fafe, 0x4fe70dc8, -0x70b5adc0, 0x1f748e6b, -0x11fe5c16, 0x576277cd }, + { 0x23cae00b, 0x36321370, -0x2e5330a7, 0x544acf0a, -0x2de5e378, -0x698befb7, -0x05d5bb59, 0x780b8cc3 }, + { 0x234f305f, 0x1ef38abc, 0x1405de08, -0x65a88043, 0x34e62a0d, 0x5e82a514, 0x6271b7a1, 0x5ff41872 } + }, + { + { 0x13b69540, -0x1a24b818, 0x432610e1, -0x0ca2d5c5, 0x38781276, -0x53e0d917, -0x5f5f3497, 0x29d4db8c }, + { 0x1789db9d, 0x398e080c, -0x0c18870b, -0x589fdfdb, 0x06bd035d, -0x056776b4, 0x25a966be, 0x106a03dc }, + { 0x333353d0, -0x2652f551, -0x532cf61b, 0x38669da5, -0x37770810, 0x3c57658a, 0x052cbefa, 0x4ab38a51 } + }, +}, +{ + { + { -0x7f621fac, -0x09701d18, -0x637d452f, -0x1c43f696, 0x0aadbf45, 0x076353d4, -0x215e6a62, 0x7b9b1fb5 }, + { 0x4324c0e9, -0x20253412, 0x3f955bb7, 0x05444288, -0x15ce9f61, -0x21085558, 0x42287cff, 0x68aee706 }, + { 0x7471cc0c, -0x0fe3370f, 0x579082bb, -0x6adbd1c9, -0x2c1b94a1, 0x27776093, 0x28bd85fb, 0x2d13d55a } + }, + { + { 0x7aee7a52, -0x40fe6332, -0x1bab152d, -0x57212d4a, -0x785744e7, 0x3c619f0b, 0x560916d8, 0x3619b5d7 }, + { 0x5b35b8da, -0x053a2dfa, -0x7a9db449, -0x57257566, 0x3d21cd0f, -0x332d356f, -0x7406f2a8, 0x6b8341ee }, + { 0x0282c4b2, 0x3579f26b, 0x4fafefae, 0x64d592f2, 0x28c8c7c0, -0x48321285, 0x7173a8d7, 0x6a927b6b } + }, + { + { 0x3ece88eb, -0x728fbf7a, -0x7f113f74, -0x0f1cf857, 0x0d788fda, -0x53ddaf9f, 0x3a0d478d, 0x056d92a4 }, + { -0x6791b9aa, 0x1f6db24f, -0x2e16efa5, 0x1021c02e, 0x2cc0a375, -0x0700c001, -0x3937da6e, 0x1d2a6bf8 }, + { -0x03c25a5f, 0x1b05a196, 0x43b59ed0, 0x77d7a8c2, -0x682e86e8, 0x06da3d62, -0x0edcac09, 0x66fbb494 } + }, + { + { -0x0edcf62a, -0x2928f66a, -0x163c2ac7, -0x2404dc7b, -0x08aadbef, 0x46d602b0, 0x57843e0c, 0x270a0b05 }, + { -0x27a3f048, 0x751a50b9, -0x7430f685, -0x2e5023db, -0x7cf65697, 0x2f16a6a3, -0x1a4ff9a7, 0x14ddff9e }, + { -0x5879d434, 0x61ff0640, 0x5f11abfe, -0x7e353f66, 0x55d12abb, -0x6fb87cfc, -0x6ba5178d, 0x19a4bde1 } + }, + { + { -0x3f893b61, 0x40c709de, 0x7f3e53f6, 0x657bfaf2, -0x135fbd3c, 0x40662331, 0x7eb4df04, 0x14b37548 }, + { 0x20a6200a, -0x6460d90b, -0x30ec1508, 0x64804443, -0x79ce122d, -0x759c98c1, 0x1ed39dc1, 0x72bbbce1 }, + { -0x549923b9, -0x517ac36c, -0x2089d292, -0x149dcbc2, 0x6fb2f7d1, -0x0f71f1e8, 0x700ab37a, 0x4f0b1c02 } + }, + { + { -0x3e4d1dc1, 0x79fd21cc, 0x453df52a, 0x4ae7c281, -0x2eaeb795, -0x37e8d137, 0x3e0a7534, 0x68abe944 }, + { -0x27e6ae06, -0x1e8f9879, -0x4d6f3885, -0x5ef5d372, 0x3ed66773, -0x18c7d060, 0x0bcc4b54, 0x0a4d8471 }, + { 0x07831dcb, -0x25ed393c, 0x4d5c510d, 0x0da230d7, 0x6bd404e1, 0x4ab1531e, -0x430bbf11, 0x4106b166 } + }, + { + { 0x39e4ecf2, -0x5b7a332b, 0x0555bab5, 0x5aa3f3ad, -0x6c8207d3, 0x145e3439, 0x1214283f, 0x1238b51e }, + { 0x1cd23668, 0x02e57a42, 0x0eaef6fd, 0x4ad9fb5d, -0x4edbbb80, -0x6ab198d9, 0x2699f331, 0x7f792f9d }, + { 0x5fd4d924, 0x0b886b92, 0x3626a80d, 0x60906f7a, -0x467542ee, -0x132c984c, -0x210cbb31, 0x2876beb1 } + }, + { + { 0x3a8a85f8, -0x2a6b4ccd, -0x187282a8, 0x4ea37689, 0x5e8e351f, 0x73bf9f45, -0x43be144c, 0x5507d7d2 }, + { 0x63144691, -0x237b16cb, -0x29e0dc0c, 0x632fe8a0, 0x12a9a8d5, 0x4caa8006, 0x0e9918d3, 0x48f9dbfa }, + { 0x299572fc, 0x1ceb2903, -0x6afd2f12, 0x7c8ccaa2, 0x11cce67b, -0x6e405bcc, 0x64a831e7, 0x57844819 } + }, +}, +{ + { + { 0x5fddc09c, -0x29302e11, -0x08a8a232, -0x17d4c103, 0x201634c2, 0x25d56b5d, 0x04ed2b9b, 0x3041c6bb }, + { 0x6768d593, -0x2583d4db, 0x4422ca13, -0x673e3fa9, -0x35f531e3, -0x0e57f42b, -0x3f775970, 0x29cdd1ad }, + { -0x26a91eb8, 0x0ff2f2f9, -0x60ca94d2, -0x5218688b, 0x5f6c025c, 0x1a4698bb, 0x14049a7b, 0x104bbd68 } + }, + { + { -0x29800e9d, -0x56a265a1, 0x4cc75681, -0x16d41963, -0x21df0da9, -0x4807fdb4, -0x04f8d20b, 0x204f2a20 }, + { 0x68f1ed67, 0x51f0fd31, -0x2790c43e, 0x2c811dcd, 0x04d2f2de, 0x44dc5c43, 0x092a7149, 0x5be8cc57 }, + { 0x30ebb079, -0x37ebc4c3, -0x429ad1d0, 0x7589155a, -0x7092a3cf, 0x653c3c31, -0x3d86e9e1, 0x2570fb17 } + }, + { + { 0x0bb8245a, 0x192ea955, -0x706faf2f, -0x37190458, -0x775b36cb, 0x7986ea2d, -0x21fe7998, 0x241c5f91 }, + { 0x2cb61575, 0x3efa367f, 0x1cd6026c, -0x0a06908a, 0x65b52562, -0x1738ebd6, 0x53030acd, 0x3dcb65ea }, + { 0x40de6caa, 0x28d81729, 0x22d9733a, -0x7040d310, 0x235b01d1, 0x16d7fcdd, 0x5fcdf0e5, 0x08420edd } + }, + { + { 0x04f410ce, 0x0358c34e, 0x276e0685, -0x49eca4a6, -0x1446eadf, 0x5d9670c7, 0x21db889c, 0x04d654f3 }, + { -0x7c9d05b6, -0x3200df55, -0x1de5c192, 0x57e118d4, -0x03c619d5, -0x1ce869e9, -0x43e89603, 0x0d9a53ef }, + { -0x22424a2b, 0x5e7dc116, -0x725a22d3, 0x2954deb6, 0x3334a292, 0x1cb60817, 0x18991ad7, 0x4a7a4f26 } + }, + { + { -0x50c8d5b5, 0x24c3b291, 0x718147f2, -0x6c257d90, -0x7976610e, -0x227b7a9c, 0x23e0ee33, 0x4a963142 }, + { 0x5fb15f95, -0x0b58e7fe, 0x6b5c1b8f, 0x3df65f34, 0x00e01112, -0x32030f7b, -0x222ce7b8, 0x11b50c4c }, + { 0x08a4ffd6, -0x5917d8bc, -0x63ea8927, 0x738e177e, 0x3d02b3f2, 0x773348b6, -0x319433af, 0x4f4bce4d } + }, + { + { -0x3b62f491, 0x30e2616e, -0x3513dce9, -0x1ba98e71, -0x0d94b05a, 0x48eb409b, 0x61595f37, 0x3042cee5 }, + { -0x1ddbda7c, -0x58e031a6, -0x6d0a7562, 0x26ea7256, 0x1cea3cf4, -0x2de5f629, -0x48e3fe1a, 0x73fcdd14 }, + { 0x449bac41, 0x427e7079, -0x431dcef6, -0x7aa51c93, 0x5f841a7c, 0x4cae7621, -0x65631e2a, 0x389e740c } + }, + { + { 0x570eac28, -0x3642870a, 0x27919ce1, -0x1aa4f4ce, -0x5e646e13, 0x65fc3eab, -0x29d9c970, 0x25c425e5 }, + { 0x34dcb9ce, 0x64fcb3ae, -0x1cb72f53, -0x68affcdd, 0x62c6381b, 0x45b3f07d, 0x465a6788, 0x61545379 }, + { -0x0e282192, 0x3f3e06a6, -0x71f9dcf8, 0x3ef97627, 0x4e8a6c77, -0x73eb09da, 0x15484759, 0x6539a089 } + }, + { + { 0x14bb4a19, -0x223b242c, -0x67bdb072, 0x19b2bc3c, 0x36ca7169, 0x48a89fd7, -0x0fe64270, 0x0f65320e }, + { -0x3c2d088d, -0x162de08c, 0x25c46845, -0x3eafabbf, -0x064661cd, 0x624e5ce8, -0x3a32e794, 0x11c5e4aa }, + { -0x35021f3a, -0x2b792e4f, 0x163b5181, 0x4f3fe6e3, -0x050d6c66, 0x59a8af0d, -0x13ccf8d6, 0x4cabc7bd } + }, +}, +{ + { + { 0x1a54a044, -0x083f5e64, 0x77bd9fbb, 0x4a1c5e24, 0x5af22972, -0x591c35ef, 0x3f2e9e0d, 0x1819bb95 }, + { 0x532f7428, 0x16faa8fb, 0x46a4e272, -0x242bd160, -0x74615b80, 0x5337653b, 0x23973f03, 0x40659472 }, + { 0x5e042e84, 0x498fbb79, 0x7698b714, 0x7d0dd89a, 0x27fe6295, -0x7404f45c, 0x21200524, 0x36ba82e7 } + }, + { + { 0x57274ed5, -0x372962f6, 0x60804b17, 0x45ba8032, 0x2255dfac, -0x20c325f0, 0x2709b339, 0x77d22123 }, + { 0x4245ec41, -0x29f13449, 0x34348716, -0x02641762, -0x1bdd7b22, -0x36dbf502, -0x2face24c, 0x4472f648 }, + { 0x64ad94d8, 0x498a6d70, -0x6509dd9d, -0x5a4a3703, 0x45c141f4, -0x735712fb, 0x662d358c, 0x2c63bec3 } + }, + { + { -0x7a790741, -0x65ae74c6, -0x344e6910, -0x6118e50a, -0x5dc7a30e, -0x55f9da1a, -0x2228372f, 0x1deb2176 }, + { -0x158786ab, 0x7fe60d8b, -0x4a0bfe49, -0x4623ee82, 0x19355cce, -0x6e383f66, -0x6bbd4121, 0x22692ef5 }, + { 0x2066cf6c, -0x7a9c2e66, 0x4dcc7cd7, 0x401bfd8c, -0x32f2709e, -0x26895942, -0x5d874fa2, 0x67cfd773 } + }, + { + { 0x5a4e586a, 0x2d5fa985, 0x49beab7e, 0x65f8f7a4, -0x0de2cc2d, -0x55f8b223, 0x1bcb9dee, 0x185cba72 }, + { -0x10c11b8b, -0x7213ce06, -0x61dd026e, -0x66240076, 0x4e26cab1, 0x512d1159, -0x13bcef47, 0x0cde561e }, + { -0x0b1c34bf, -0x6c79625d, 0x40f7977e, -0x40fc6d0b, -0x2fb9c47d, 0x026204fc, -0x61139113, 0x3ec91a76 } + }, + { + { -0x4f5cbfd1, 0x0fad2fb7, -0x04960b58, 0x46615ecb, -0x3a07155a, -0x08ba4338, 0x4a94e896, 0x7a5fa879 }, + { -0x087e9953, 0x1e9df75b, -0x14f32851, 0x4dfda838, -0x3e150678, -0x45ffd128, 0x11f33cfc, 0x13fedb3e }, + { 0x13cd67a1, 0x52958faa, -0x74244ae9, -0x69a11f7f, 0x2e8845b3, 0x16e58daa, 0x5499da8f, 0x357d397d } + }, + { + { 0x194bfbf8, 0x481dacb4, -0x451a7d67, 0x4d77e3f1, 0x7d1372a0, 0x1ef4612e, 0x70ff69e1, 0x3a8d867e }, + { -0x4f453194, 0x1ebfa05f, 0x1caf9a1e, -0x36cb9df4, 0x1d82b61a, -0x3388e33c, -0x5a08b014, 0x2d94a16a }, + { 0x55aff958, 0x6f58cd5d, 0x75567721, -0x45c155a4, -0x6e9add83, 0x75c12399, -0x3d0d4ca2, 0x69be1343 } + }, + { + { 0x684b8de3, -0x7d444254, 0x3fca0718, -0x5d0b3830, -0x1f695558, 0x337f92fb, 0x63587376, 0x200d4d8c }, + { -0x1e6836d6, 0x0e091d5e, 0x2945119f, 0x4f51019f, -0x0fcb1664, 0x143679b9, 0x4d24c696, 0x7d88112e }, + { 0x4893b32b, 0x208aed4b, -0x41a6469c, 0x3efbf23e, -0x245a1af9, -0x289d2150, -0x7e42626c, 0x69607bd6 } + }, + { + { -0x6cdc56fe, 0x3b7f3bd4, 0x6b2c6e53, 0x7c21b556, 0x3a7852a7, -0x1a45700b, -0x7c713200, 0x28bc77a5 }, + { 0x68de1ce1, -0x0941fdf0, 0x0edcbc1f, -0x172ae719, 0x1b5505a5, -0x1c100230, -0x2c13c030, 0x35f63353 }, + { -0x1da27fca, 0x63ba78a8, -0x6bcccb70, 0x63651e00, 0x288ce532, 0x48d82f20, 0x36b57524, 0x3a31abfa } + }, +}, +{ + { + { 0x3f78d289, -0x3f708771, -0x5ebfb261, -0x01cf58d4, -0x309a3363, -0x0d887404, 0x5acb2021, 0x7ee49816 }, + { 0x089c0a2e, 0x239e9624, 0x3afe4738, -0x38b73b40, 0x764fa12a, 0x17dbed2a, 0x321c8582, 0x639b93f0 }, + { -0x6eee5e3d, 0x7bd508e3, -0x7f6f8b77, 0x2b2b90d4, -0x518d02e7, -0x182d513e, -0x7a49fd5a, 0x0edf493c } + }, + { + { -0x7b89beed, 0x6767c4d2, -0x080a07cb, -0x5f6fbfc1, -0x35194122, 0x1c8fcffa, -0x2e205c97, 0x04c00c54 }, + { 0x599b5a68, -0x51337ea8, -0x14521df2, -0x15a8b0f1, 0x22b67f07, 0x4fe41d74, 0x019d4fb4, 0x403b92e3 }, + { -0x74b9a308, 0x4dc22f81, 0x1480eff8, 0x71a0f35a, 0x04c7d657, -0x51174053, -0x4d9e890c, 0x355bb12a } + }, + { + { 0x5a8c7318, -0x5cfe2539, -0x4c3155ef, -0x126ffc63, 0x3bae3f2d, 0x6f077cbf, -0x1fad5272, 0x7518eaf8 }, + { 0x7493bbf4, -0x58e19b34, -0x135c4f3d, -0x1a427b27, -0x05fa187b, 0x0a6bc50c, 0x182ec312, 0x0f9b8132 }, + { 0x1b7f6c32, -0x5b77a63c, -0x0bc7cd68, 0x0f2d60bc, -0x364e2e27, 0x1815a929, -0x44e8aa3c, 0x47c3871b } + }, + { + { -0x37af9950, -0x0419a2b0, -0x4c5d6650, 0x62ecc4b0, 0x441ae8e0, -0x1ac8ab16, -0x172b72a1, 0x08fea02c }, + { 0x71ec4f48, 0x51445397, -0x3673a292, -0x07fa4e83, 0x47c3c66b, -0x089d3ee6, 0x764699dc, 0x00b89b85 }, + { 0x68deead0, -0x7db2228a, 0x4b685d23, -0x379bbae0, 0x5d89d665, -0x4aeb3033, 0x4f75d537, 0x473829a7 } + }, + { + { -0x52c6fd37, 0x23d9533a, -0x10fca771, 0x64c2ddce, -0x301ed04c, 0x15257390, 0x44e4d390, 0x6c668b4d }, + { 0x4679c418, -0x7d2d258b, -0x4d9e7210, -0x19c42828, -0x53b814f6, 0x355eef24, 0x4833c6b4, 0x2078684c }, + { 0x7a78820c, 0x3b48cf21, -0x7ed8c169, -0x0895f54e, -0x73711285, -0x56939a59, 0x4f8a433f, 0x7411a605 } + }, + { + { 0x18b175b4, 0x579ae53d, -0x0c6d5efe, 0x68713159, 0x1eef35f5, -0x7baa1346, 0x458c398f, 0x1ec9a872 }, + { -0x46623793, 0x4d659d32, 0x603af115, 0x044cdc75, -0x233d1b78, -0x4cb38ed4, -0x047ecb01, 0x7c136574 }, + { 0x00a2509b, -0x47195b2c, 0x0bc882b4, -0x647e28fe, -0x0e6a8a9f, 0x57e7cc9b, -0x38329ba0, 0x3add88a5 } + }, + { + { 0x59393046, -0x7a3d672c, 0x5ff659ec, -0x7081ca68, -0x0d0991c6, 0x1d2ca22a, -0x5bf958e0, 0x61ba1131 }, + { -0x49ca230e, -0x5476a890, -0x0993e044, 0x02dfef6c, -0x41492e79, -0x7aacfd98, -0x3378618c, 0x249929fc }, + { 0x16959029, -0x5c2f5f0f, -0x45814277, 0x023b6b6c, 0x26783307, 0x7bf15a3e, -0x44271319, 0x5620310c } + }, + { + { 0x77e285d6, 0x6646b5f4, 0x6c8f6193, 0x40e8ff67, -0x544a6b23, -0x59138cef, 0x658cec4d, 0x7ec846f3 }, + { 0x4934d643, 0x52899343, -0x5aeddd0b, -0x462407fa, -0x3c0be3de, -0x70927871, 0x4d9d9730, 0x37676a2a }, + { 0x1da22ec7, -0x64a170c1, 0x6c01cd13, 0x130f1d77, -0x5d676048, 0x214c8fcf, 0x399b9dd5, 0x6daaf723 } + }, +}, +{ + { + { 0x2cd13070, -0x7e514423, -0x07a5f162, -0x69d1bcdb, -0x35200135, -0x216c6e56, 0x52c230e6, 0x53177fda }, + { 0x10628564, 0x591e4a56, -0x574b20cc, 0x2a4bb87c, -0x185c71bd, -0x21d5da8e, -0x011afb92, 0x3cbdabd9 }, + { 0x50b9de79, -0x584368fa, -0x3cfe4a65, 0x3d12a7fb, -0x2c951c74, 0x02652e68, 0x5a6199dc, 0x79d73983 } + }, + { + { 0x0d591737, 0x21c9d992, -0x164b932a, -0x6415be2e, 0x0d89bfca, -0x1df17be0, 0x6eae5ff8, 0x79d99f94 }, + { 0x4131c1bd, -0x26cab20a, -0x7913a7de, 0x758094a1, -0x1ba60c3e, 0x4464ee12, -0x34eccd7e, 0x6c11fce4 }, + { 0x68673205, -0x0e84b7cb, 0x3caad96c, 0x387deae8, 0x56ffe386, 0x61b471fd, -0x48ba5a67, 0x31741195 } + }, + { + { 0x3b02a047, 0x17f8ba68, -0x01104938, 0x50212096, 0x1556cbe2, 0x70139be2, 0x1d98915b, 0x203e44a1 }, + { -0x4885c9f5, -0x172efe70, -0x666a18fe, -0x66467ce0, -0x05fdb856, -0x42b02008, -0x1f2c9579, 0x2772e344 }, + { 0x37b9e39f, -0x2979c146, 0x723b5a23, 0x105bc169, -0x59a3f89e, 0x104f6459, 0x5b4d38d4, 0x56795129 } + }, + { + { 0x0d4b497f, 0x07242eb3, -0x46433379, 0x1ef96306, -0x27ee90bb, 0x37950934, 0x01405b04, 0x05468d62 }, + { 0x13037524, 0x535fd606, -0x4f043d96, -0x1def520a, 0x23e990ae, -0x5372f565, -0x28d02407, 0x47204d08 }, + { -0x06cd9822, 0x00f565a9, -0x3f2a7176, -0x31302873, -0x0ce71d72, -0x5dea1d24, -0x649cccae, 0x4599ee91 } + }, + { + { -0x79e51a87, -0x538b9295, -0x09515624, 0x31ab0650, 0x40256d4c, 0x241d6611, 0x3d21a5de, 0x2f485e85 }, + { 0x70e0e76b, -0x2c3ddf36, -0x1560cf6c, -0x4ed415a8, -0x3cd8ed7e, 0x294ddec8, -0x5e2e2fd8, 0x0c3539e1 }, + { -0x63f7cc0d, 0x32974483, -0x2d543b7c, 0x6fe6257f, 0x4b358817, 0x5327d181, -0x76c01644, 0x65712585 } + }, + { + { -0x28f711c1, -0x7e3d60e5, -0x519bf830, -0x2234a5fb, -0x2d5c1459, -0x68513e29, -0x6e2af7cf, 0x1590521a }, + { 0x32a61161, -0x63efd049, 0x34d520a8, -0x1b71ef23, 0x6f9a9176, 0x365c6354, 0x046f6006, 0x32f6fe4c }, + { -0x386ef534, 0x40a3a11e, -0x0e92d852, -0x6fec2008, -0x544e6a2c, 0x1a9720d8, 0x2ea98463, 0x1bb9fe45 } + }, + { + { -0x33c98b84, -0x30a1936b, 0x6b0bc30d, 0x29420153, -0x11868510, 0x453ac67c, 0x2a8bb3c9, 0x5eae6ab3 }, + { -0x4c2ab062, -0x162e26b0, -0x1ff2cc3f, 0x2d5f9cbe, -0x5fb03954, 0x51c2c656, 0x3c1cbcc9, 0x65c091ee }, + { 0x14f118ea, 0x70836611, -0x6bcb6353, 0x2b37b87b, -0x4b1660c0, 0x7273f51c, 0x23d75698, 0x78a2a958 } + }, + { + { 0x5ef83207, -0x4b0dc3be, -0x3656cb4b, -0x54076b2d, 0x39fd87f7, -0x2f8f73ed, 0x17166130, 0x18767891 }, + { 0x5c8c2ace, -0x5d4f8d17, 0x651e9c4b, 0x69cffc96, 0x42e7b42b, 0x44328ef8, 0x22aadeb3, 0x5dd996c1 }, + { 0x670c507c, -0x6da4a110, -0x46c3cc41, -0x7e6437be, 0x70dd003f, 0x10792e9a, 0x6e28dc74, 0x59ad4b7a } + }, +}, +{ + { + { -0x5352715e, 0x583b04bf, 0x148be884, 0x29b743e8, 0x0810c5db, 0x2b1e583b, -0x714c4456, 0x2b5449e5 }, + { -0x14c241b9, 0x5f3a7562, -0x71425f48, -0x0815c7ac, 0x45747299, 0x00c3e531, 0x1627d551, 0x1304e9e7 }, + { 0x6adc9cfe, 0x789814d2, -0x74b722f5, 0x3c1bab3f, -0x068639f6, -0x25f01e01, 0x7c2dd693, 0x4468de2d } + }, + { + { -0x079cf832, 0x4b9ad8c6, 0x435d0c28, 0x21113531, 0x657a772c, -0x2b57993b, 0x63247352, 0x5da6427e }, + { -0x6be6b962, 0x51bb355e, 0x23ddc754, 0x33e6dc4c, 0x447f9962, -0x6c5a492a, -0x04bb429d, 0x6cce7c6f }, + { -0x2153dd36, 0x1a94c688, -0x4451e008, -0x46f99109, -0x72a6a7f1, -0x775273c8, -0x1860d358, 0x58f29abf } + }, + { + { 0x710ecdf6, 0x4b5a64bf, 0x462c293c, -0x4eb31ac8, -0x2af4c547, 0x3643d056, 0x185b4870, 0x6af93724 }, + { -0x7218c198, -0x16f13055, 0x377e76a5, 0x54036f9f, -0x41fea67e, -0x0fb6a4f5, -0x580be1ca, 0x577629c4 }, + { 0x09c6a888, 0x32200245, 0x4b558973, -0x2d1fc9ed, 0x3c33289f, -0x7c1dc9dd, 0x0caec18f, 0x701f25bb } + }, + { + { 0x7cbec113, -0x62e70927, 0x74bfdbe4, -0x7bb5f91a, -0x53b19f2a, 0x20f5b522, 0x50955e51, 0x720a5bc0 }, + { -0x1b9e9313, -0x3c574f08, -0x61da5783, -0x08ff99f2, -0x0b435a64, 0x61e3061f, -0x423bf417, 0x2e0c92bf }, + { -0x647fa5cb, 0x0c3f0943, 0x6242abfc, -0x17b174c9, 0x5c229346, 0x691417f3, 0x144ef0ec, 0x0e9b9cbb } + }, + { + { 0x5db1beee, -0x7211642b, 0x0a723fb9, -0x363c54c9, 0x1c68d791, 0x44a8f1bf, 0x1cfd3cde, 0x366d4419 }, + { -0x04a8df53, -0x04452b71, -0x2406f2f2, -0x117e6e95, 0x635543bf, -0x2b7eceae, 0x3f337bd8, 0x221104eb }, + { -0x0d4373ec, -0x61c3e8bd, -0x4a7a93c5, 0x2eda26fc, 0x68a7fb97, -0x3347d0f2, -0x43a6cdbc, 0x4167a4e6 } + }, + { + { -0x07317012, -0x3d41d99b, -0x177f29d4, -0x169800ec, 0x2f364eee, -0x0ed19182, -0x34812d0a, 0x34b33370 }, + { 0x76f62700, 0x643b9d28, 0x0e7668eb, 0x5d1d9d40, 0x21fc0684, 0x1b4b4303, 0x2255246a, 0x7938bb7e }, + { -0x797e2934, -0x323a6e12, -0x127a58ad, -0x31fdef64, 0x58808883, -0x128b7a3f, 0x2dfe65e4, 0x1176fc6e } + }, + { + { 0x49770eb8, -0x246f1d77, -0x530bbf5d, -0x670433d6, -0x21287865, 0x21354ffe, -0x0d96f94a, 0x1f6a3e54 }, + { 0x5b9c619b, -0x4b509330, -0x4d5a7b80, 0x2ddfc9f4, -0x1416b23c, 0x3d4fa502, 0x677d5f34, 0x08fc3a4c }, + { -0x2cf8cb16, 0x60a4c199, 0x31165cd6, 0x40c085b6, -0x08a67d6b, -0x1dccc1dd, 0x16b900d1, 0x4f2fad01 } + }, + { + { -0x48c449c8, -0x69d326e3, -0x03ed63f8, -0x19fa8856, -0x0c49e977, 0x6f619b39, 0x2944ee81, 0x3451995f }, + { -0x6b51b1ac, 0x44beb241, 0x1857ef6c, 0x5f541c51, 0x368d0498, -0x59e194d3, -0x68d10855, 0x445484a4 }, + { -0x60158284, -0x6ead0330, -0x4f6ca30a, 0x4a816c94, 0x47285c40, 0x258e9aaa, 0x042893b7, 0x10b89ca6 } + }, +}, +{ + { + { 0x79d34aa0, -0x2983212a, -0x33b24c61, -0x33f46140, -0x1ca2e6f1, -0x5aca5baa, -0x09e09011, 0x2e05d9ea }, + { 0x3b646025, -0x64d5bd92, 0x385ce4cf, 0x32127190, -0x229215bb, -0x5da3003e, -0x4157218b, 0x06409010 }, + { -0x29e414a7, -0x3bb86fe6, -0x1a2377f6, 0x661f19bc, -0x483597d9, 0x24685482, -0x101f80da, 0x293c778c } + }, + { + { -0x5ee00e00, 0x16c795d6, -0x4ea7ea37, -0x348f2f1e, -0x64ac6a4b, -0x760d6ce0, 0x31e47b4f, 0x50b8c2d0 }, + { 0x07069096, -0x797f6190, -0x1b1afe77, -0x5528a4eb, -0x5de5feb9, 0x07f35715, 0x12815d5e, 0x0487f3f1 }, + { 0x068a4962, 0x48350c08, 0x51092c9a, 0x6ffdd053, -0x50903723, 0x17af4f4a, 0x3cdba58b, 0x4b0553b5 } + }, + { + { 0x27c152d4, -0x40fadee5, -0x42e509c7, 0x5ec26849, -0x71905468, 0x5e0b2caa, 0x50bd0840, 0x054c8bdd }, + { 0x1b32ff79, -0x639a0342, 0x03b50f9b, -0x148a1561, 0x6c07e606, -0x0312d594, 0x51717908, 0x35106cd5 }, + { 0x1dcf073d, 0x38a0b12f, -0x48095d8a, 0x4b60a8a3, -0x2cbfb066, -0x012a53db, 0x5505c229, 0x72e82d5e } + }, + { + { 0x69771d02, 0x00d9cdfd, 0x6cfbf17e, 0x410276cd, 0x1cb12ec7, 0x4c45306c, 0x27500861, 0x2857bf16 }, + { -0x0f27bb38, 0x6b0b697f, -0x268634b7, -0x44ed07a4, -0x3e25f0e1, -0x2d5abe3a, 0x58ce7211, 0x7b7c2429 }, + { 0x0101689e, -0x60de6fc1, -0x4079effb, -0x2886202d, 0x3deb0f1b, -0x5edd11a1, 0x485a00d4, 0x510df84b } + }, + { + { -0x38f53ea2, 0x24b3c887, -0x047e48ce, -0x4f0c5aa9, -0x1a8733e5, -0x64d321d1, 0x03b54f8e, 0x4cf7ed07 }, + { -0x6d885e06, -0x5abecc45, 0x63991237, 0x74ec3b62, 0x35d2f15a, 0x1a3c54dc, -0x1b7d45c6, 0x2d347144 }, + { -0x670411f1, 0x6bd47c65, -0x54aa41d3, -0x61b8cc1e, 0x127610c5, 0x1093f624, -0x2f5e155c, 0x4e05e26a } + }, + { + { -0x1e701940, 0x1833c773, -0x2c378d9b, -0x1c3b8ee6, 0x0116b283, 0x3bfd3c4f, -0x4b32b248, 0x1955875e }, + { 0x4b531f20, -0x2564949e, 0x77509abb, 0x429a760e, -0x17dc3480, -0x24160ade, -0x77f3707e, 0x618f1856 }, + { 0x0e399799, 0x6da6de8f, 0x40fda178, 0x7ad61aa4, 0x5e3563dd, -0x4cd327f0, 0x2ae340ae, 0x15f6beae } + }, + { + { -0x6dba1deb, -0x4565f085, -0x2673f245, -0x0c979ed3, -0x0ddf4fe0, 0x2e84e4cb, 0x62d90eda, 0x6ba92fe9 }, + { 0x31ec3a62, -0x79d434f4, 0x1138f3c2, -0x7ef1d4bb, 0x39dac2a4, 0x788ec4b8, -0x51d56d7f, 0x28f76867 }, + { 0x5884e2aa, 0x3e4df965, -0x242b9a5b, -0x429d0425, 0x0de9e524, -0x28a69356, -0x4d4e4c29, 0x6e8042cc } + }, + { + { 0x16521f7e, 0x15306536, -0x69dfc246, 0x660d06b8, 0x545f0879, 0x2d3989bc, 0x78ebd7b0, 0x4b5303af }, + { -0x31d73592, -0x0ef2c3d7, -0x0349f6c3, -0x452cbac0, -0x5d15d2c1, -0x18bd9129, 0x4ff298b9, 0x08af9d4e }, + { -0x41434218, 0x72f8a6c3, -0x23c57177, 0x4f0fca4a, -0x38402086, 0x6fa9d4e8, -0x649db149, 0x0dcf2d67 } + }, +}, +{ + { + { 0x5a45f06e, 0x753941be, 0x6d9c5f65, -0x2f835113, 0x72ff51b6, 0x11776b9c, -0x10f2b257, 0x17d2d1d9 }, + { -0x68e7d764, 0x3d594749, 0x24533f26, 0x12ebf8c5, 0x14c3ef15, 0x0262bfcb, 0x77b7518e, 0x20b878d5 }, + { 0x073f3e6a, 0x27f2af18, -0x28adef97, -0x02c01ae7, 0x3ca60022, 0x22e3b72c, -0x339a3959, 0x72214f63 } + }, + { + { -0x0bc4d637, 0x1d9db7b9, 0x4f518f75, -0x29fa7db6, 0x312f9dc4, -0x0d3f8d43, 0x5a1545b0, 0x1f24ac85 }, + { 0x5307a693, -0x4b1c80c0, 0x2f336795, -0x5458eb29, 0x73761099, -0x29042f59, -0x7e8e3437, 0x5fdf48c5 }, + { -0x716afa56, 0x24d60832, 0x0c1420ee, 0x4748c1d1, 0x06fb25a2, -0x38001ba4, 0x2ae395e6, 0x00ba739e } + }, + { + { -0x157744da, -0x51bbd90b, -0x7b68c405, 0x360679d9, 0x26694e50, 0x5c9f030c, -0x2ae72dda, 0x72297de7 }, + { 0x5c8790d6, 0x592e98de, 0x45c2a2df, -0x1a40482d, -0x064b66de, 0x115a3b60, 0x67ad78f3, 0x03283a3e }, + { -0x41f346c7, 0x48241dc7, -0x749ccf80, 0x32f19b4d, 0x02289308, -0x2c2036f3, 0x46271945, 0x05e12968 } + }, + { + { 0x242c4550, -0x52404438, -0x2fcf7e27, -0x4337f314, -0x0a37206e, -0x7bca995a, -0x7da731b4, 0x78cf25d3 }, + { 0x2d9c495a, -0x457d114d, -0x0ed44684, -0x31103704, -0x6c4a2e20, -0x4fd25452, 0x13698d9b, 0x39c00c9c }, + { 0x31489d68, 0x15ae6b8e, -0x63d40f79, -0x557ae355, -0x0fb105fb, -0x3658a569, 0x6b3ff832, 0x006b5207 } + }, + { + { -0x4631f7d3, -0x0a3481ea, 0x417abc29, 0x3407f14c, 0x2bf4a7ab, -0x2b4c9432, 0x1a9f75ce, 0x7de2e956 }, + { -0x626a87e4, 0x29e0cfe1, -0x699cef1e, -0x497e20e8, 0x70516b39, 0x57df39d3, 0x3bc76122, 0x4d57e344 }, + { -0x495aa135, -0x218f2b0c, 0x5d85db99, 0x4801527f, -0x2c11657f, -0x24363bc0, 0x1a6029ed, 0x6b2a90af } + }, + { + { 0x5bb2d80a, 0x77ebf324, 0x2fb9079b, -0x27cfe4b9, 0x4cee7333, -0x39b8190e, 0x276c2109, 0x465812c8 }, + { -0x6519e169, 0x6923f4fc, -0x1fc0a02f, 0x5735281d, -0x19122ed3, -0x589b51bd, -0x2ed2c1b6, 0x5fd8f4e9 }, + { 0x2a1062d9, 0x4d43beb2, 0x3831dc16, 0x7065fb75, -0x21d69729, 0x180d4a7b, 0x1cb16790, 0x05b32c2b } + }, + { + { 0x7ad58195, -0x08035bd4, 0x4333f3cc, 0x3214286e, 0x340b979d, -0x493d62f3, 0x567307e1, 0x31771a48 }, + { -0x2db25703, -0x373fa134, 0x05dfef83, -0x5e30e554, 0x7df9cd61, -0x2441100e, 0x7b471e99, 0x3b5556a3 }, + { -0x1eb22b7e, 0x32b0c524, 0x1a2ba4b6, -0x124caeac, 0x282b5af3, -0x5c2e9fb8, 0x7a7336eb, 0x4fc079d2 } + }, + { + { 0x0c86c50d, -0x23cb74bc, -0x336b19af, 0x1337cbc9, 0x643e3cb9, 0x6422f74d, -0x451c32f8, 0x241170c2 }, + { -0x7640d081, 0x51c938b0, 0x02dfe9a7, 0x2497bd65, 0x7880e453, -0x00003f64, -0x3506716e, 0x124567ce }, + { 0x0ac473b4, 0x3ff9ab86, 0x0113e435, -0x0f6ee212, -0x14393b51, 0x4ae75060, 0x6c87000d, 0x3f861296 } + }, +}, +{ + { + { 0x638c7bf3, 0x529fdffe, 0x388b4995, -0x20d461a0, 0x1bad0249, -0x1fd84cb1, -0x46058b13, 0x7bc92fc9 }, + { -0x086a841c, 0x0c9c5303, -0x1f7a3ebb, -0x5c3ce5e0, -0x2f7affb0, -0x4f8de28f, -0x54f40d26, 0x0aba390e }, + { -0x7fe52607, -0x606810d2, 0x79afda3a, -0x7c9682ac, -0x42a694b0, -0x16f94c01, -0x22c04720, 0x02672b37 } + }, + { + { 0x398ca7f5, -0x116458d7, 0x7a4849db, -0x146359db, 0x7ec544e1, 0x29eb29ce, -0x08c91d38, 0x232ca21e }, + { 0x260885e4, 0x48b2ca8b, -0x7d4cb3e4, -0x5bd79414, 0x17f58f74, -0x6c81e5da, -0x54d35d5b, 0x741d1fcb }, + { 0x253fcb17, -0x409ebdc3, -0x05c614ec, 0x08803cea, -0x67ae3851, -0x0e79fd21, 0x49e3414b, 0x0400f3a0 } + }, + { + { -0x5f9184fa, 0x2efba412, 0x2c8d2560, 0x14678545, -0x29856e39, -0x2068ec15, 0x157eadf3, 0x32830ac7 }, + { -0x459e3aa5, -0x5431fb8a, -0x3b2c68ea, 0x36a3d6d7, -0x1727d2f7, 0x6eb259d5, -0x7b28a905, 0x0c9176e9 }, + { -0x48c89618, 0x0e782a7a, 0x75b18e2c, 0x04a05d78, -0x1433151f, 0x29525226, -0x7c1457e0, 0x0d794f83 } + }, + { + { -0x585d1e54, 0x7be44ce7, -0x052e4749, 0x411fd93e, 0x0d5f7c9b, 0x1734a1d7, 0x3127db16, 0x0d659223 }, + { -0x61eae90c, -0x00ca0a35, 0x648aae45, -0x117fa431, -0x46c5610d, -0x0f28c3d5, 0x2092a6c2, 0x097b0bf2 }, + { 0x21a9d733, -0x3b7454eb, -0x29e544db, -0x593d1516, -0x3934bcfb, 0x625c6c1c, -0x6c14c599, 0x7fc90fea } + }, + { + { -0x63834dc3, -0x3ad8214b, 0x5328404e, -0x6aac6e97, 0x7ccf2c7a, -0x29bc6d7f, -0x082705ef, 0x6ce97dab }, + { 0x1f5c5926, 0x0408f1fe, 0x3b258bf4, 0x1a8f2f5e, -0x0238e997, 0x40a951a2, -0x3674a882, 0x6598ee93 }, + { 0x0ef7c48f, 0x25b5a8e5, 0x6f2ce532, -0x149fcbef, -0x1ac21ac9, -0x3a18ae8d, -0x73ed44fd, 0x73119fa0 } + }, + { + { 0x21f4774d, 0x7845b94d, 0x7897b727, -0x409d0e94, 0x3c56522b, 0x671857c0, -0x6a9dedee, 0x3cd6a852 }, + { 0x53f1a4cb, -0x12cfed6c, -0x370ac879, -0x4319de37, 0x38bee7b9, -0x0534d4ed, -0x6157bd74, 0x3025798a }, + { 0x3aeca999, 0x3fecde92, 0x62e8c12f, -0x4255a500, -0x69677522, 0x67b99dfc, 0x52661036, 0x3f52c028 } + }, + { + { -0x113be93a, -0x6da74067, -0x562d098f, -0x5375afe9, 0x16dea4ab, 0x629549ab, -0x66f6ea97, 0x05d0e85c }, + { 0x2a1351c6, -0x00155b72, -0x0580ac29, 0x28624754, 0x7582ddf1, 0x0b5ba9e5, -0x596953a7, 0x60c0104b }, + { -0x21634169, 0x051de020, -0x4af4308c, -0x05f803aa, 0x0f11df65, 0x378cec9f, -0x546921b3, 0x36853c69 } + }, + { + { -0x053a1842, 0x4433c0b0, 0x4c08dcbe, 0x724bae85, 0x46978f9b, -0x0e0db33c, 0x62825fc8, 0x4a0aff6d }, + { 0x78f39b2d, 0x36d9b8de, -0x57b84614, 0x7f42ed71, 0x79bd3fde, 0x241cd1d6, -0x6d043195, 0x6a704fec }, + { 0x61095301, -0x16e80462, 0x02a092f8, -0x3efd206c, -0x0599e6f5, -0x40f61d0b, -0x1f2301c9, 0x681109be } + }, +}, +{ + { + { 0x36048d13, -0x63e70306, 0x73899ddd, 0x29159db3, -0x606d2f56, -0x2360caf5, -0x7875e62c, 0x26f57eee }, + { 0x782a0dde, 0x559a0cc9, -0x158e7c7b, 0x551dcdb2, 0x31ef238c, 0x7f62865b, 0x7973613d, 0x504aa776 }, + { 0x5687efb1, 0x0cab2cd5, 0x247af17b, 0x5180d162, 0x4f5a2467, -0x7a3ea5cc, -0x6245cf97, 0x4041943d } + }, + { + { -0x5d935523, 0x4b217743, 0x648ab7ce, 0x47a6b424, 0x03fbc9e3, -0x34e2b086, -0x67ff2fe7, 0x12d93142 }, + { 0x43ebcc96, -0x3c3f1146, 0x26ea9caf, -0x728b6364, 0x1c77ccc6, -0x26056a12, 0x7684340f, 0x1420a1d9 }, + { -0x2cc8a6b1, 0x00c67799, -0x4dc55b85, 0x5e3c5140, -0x1ca00c6b, 0x44182854, 0x4359a012, 0x1b4f9231 } + }, + { + { -0x5b67994f, 0x33cf3030, 0x215f4859, 0x251f73d2, 0x51def4f6, -0x547d55c0, 0x6f9a23f6, 0x5ff191d5 }, + { -0x76eaf6af, 0x3e5c109d, 0x2de9696a, 0x39cefa91, -0x68a0cfe0, 0x20eae43f, 0x7f132dae, 0x239b572a }, + { -0x53d26f98, -0x7e612bcd, 0x5fc98523, 0x2883ab79, 0x5593eb3d, -0x10ba8d80, 0x758f36cb, 0x020c526a } + }, + { + { -0x0fbd3377, -0x16ce10a7, -0x71edb44a, 0x2c589c9d, -0x5138a669, -0x52371e76, 0x5602c50c, 0x452cfe0a }, + { -0x61272444, 0x779834f8, -0x23835b94, -0x370d5507, -0x5c1e4f8c, -0x56adb324, 0x15313877, 0x02aacc46 }, + { 0x647877df, -0x795f0860, 0x0e607c9f, -0x443b9bd9, -0x0e04ee37, -0x54e815db, 0x304b877b, 0x4cfb7d7b } + }, + { + { -0x687610ee, -0x1d79663e, -0x20a8e6f3, 0x2b6ecd71, -0x13368f30, -0x3cbc37a9, 0x434d3ac5, 0x5b1d4cbc }, + { -0x47648a02, 0x72b43d6c, -0x63952380, 0x54c694d9, 0x3ee34c9f, -0x473c55c9, 0x39075364, 0x14b4622b }, + { -0x33f560da, -0x4904d9eb, -0x4772331b, 0x3a4f0e2b, 0x3369a705, 0x1301498b, 0x58592dd1, 0x2f98f712 } + }, + { + { 0x4f54a701, 0x2e12ae44, -0x56342822, -0x0301c110, 0x75835de0, -0x314076f3, -0x189ebaac, 0x1d8062e9 }, + { -0x4af061aa, 0x0c94a74c, -0x7171ece0, 0x5b1ff4a9, -0x7dcff099, -0x65d533df, -0x27f95507, 0x3a6ae249 }, + { -0x566f83a6, 0x657ada85, -0x6e46f09e, 0x1a0ea8b5, -0x20cb4b17, -0x72f1e205, -0x510da00d, 0x298b8ce8 } + }, + { + { 0x0a2165de, -0x7c858d16, 0x0bcf79f6, 0x3fab07b4, 0x7738ae70, 0x521636c7, 0x03a7d7dc, 0x6ba62718 }, + { -0x1008f34e, 0x2a927953, 0x79157076, 0x4b89c92a, 0x30a7cf6a, -0x6be7ba86, 0x4d5ce485, 0x34b8a840 }, + { -0x7c96cccb, -0x3d91134b, 0x63b5fefd, -0x2a57ec21, -0x5b4dda8d, -0x5d6c5566, 0x465e1c6a, 0x71d62bdd } + }, + { + { -0x4e08a10b, -0x32d24a26, 0x16b065f5, -0x28806a31, 0x3f49f085, 0x14571fea, 0x262b2b3d, 0x1c333621 }, + { -0x2c872080, 0x6533cc28, 0x0a0fa4b4, -0x0924bc87, -0x08fe25a6, -0x1c9ba007, -0x0ce8d45c, 0x74d5f317 }, + { 0x67d9ca81, -0x57901aac, 0x2b298c37, 0x398b7c75, -0x1c539dc5, -0x2592f76e, 0x47e9d98c, 0x4aebcc45 } + }, +}, +{ + { + { -0x5fa65bbb, 0x0de9b204, 0x4b17ad0f, -0x1ea34b56, 0x1f79c557, -0x1e4413ae, -0x2f8ef7e5, 0x2633f1b9 }, + { 0x05d21a77, 0x53175a72, -0x2c46cb2c, -0x4f3fbbde, -0x22a21524, -0x52260db5, -0x60ef0074, 0x074f46e6 }, + { 0x018b9910, -0x3e04be89, 0x6c0fe140, -0x5915df24, 0x4354c6ff, -0x299e0c19, -0x0e5cbf86, 0x5ecb72e6 } + }, + { + { -0x17179669, -0x01151efa, -0x672f6c7d, -0x679ccc81, -0x55f91411, -0x6b8fb7f2, -0x2b3a3d30, 0x038b6898 }, + { 0x2259fb4e, -0x5aea5ce5, 0x2bcac52f, 0x0960f397, -0x72cbab35, -0x124ad014, -0x3b893fe7, 0x382e2720 }, + { -0x7531af5a, -0x0c6e3ae3, -0x51d2d6b8, 0x3142d0b9, 0x7f24ca80, -0x24b2a5e6, 0x59250ea8, 0x21aeba8b } + }, + { + { -0x0ff780dd, 0x53853600, -0x2582a87c, 0x4c461879, -0x4be097a0, 0x6af303de, -0x3d83e713, 0x0a3c16c5 }, + { -0x30bfaad0, 0x24f13b34, 0x43088af7, 0x3c44ea4a, 0x0006a482, 0x5dd5c517, -0x76f4f793, 0x118eb8f8 }, + { -0x336b80c3, 0x17e49c17, -0x553e2d85, -0x3339125a, -0x4f0f71aa, -0x209f6d32, 0x2c67c36b, 0x4909b3e2 } + }, + { + { 0x706ff64e, 0x59a16676, 0x0d86a53d, 0x10b953dd, -0x31a3f46a, 0x5848e1e6, 0x12780c68, 0x2d8b78e7 }, + { 0x63fe2e89, -0x63637a16, 0x0e9412ec, -0x41e4506f, -0x79040185, -0x70845576, -0x10697494, 0x0fb17f9f }, + { -0x503c6fd5, 0x79d5c62e, -0x7617f8d8, 0x773a2152, -0x1efedf47, -0x3c7519c0, 0x7b2b1a6d, 0x09ae2371 } + }, + { + { -0x52cd4e30, 0x10ab8fa1, -0x1d8874dc, -0x165312e5, 0x373de90f, -0x577a9440, -0x225ac66a, 0x66f35ddd }, + { 0x4e4d083c, -0x4495e6d6, 0x0029e192, 0x34ace063, -0x55054515, -0x67dba5a7, -0x25680554, 0x6d9c8a9a }, + { 0x24997323, -0x2d826505, -0x090fe2d2, 0x1bb7e07e, -0x0ad13381, 0x2ba7472d, 0x646f9dc8, 0x03019b4f } + }, + { + { -0x194c2395, -0x50f64dec, -0x5282d09b, 0x3f7573b5, 0x100a23b0, -0x2fe62678, -0x74a3ca09, 0x392b63a5 }, + { 0x565345cd, 0x04a186b5, -0x433bee96, -0x111899f0, 0x78fb2a45, 0x689c73b4, 0x65697512, 0x387dcbff }, + { -0x63f83dfb, 0x4093addc, -0x0acd3c82, -0x3a9a41eb, 0x1583402a, 0x63dbecfd, -0x10d1fcd2, 0x61722b4a } + }, + { + { -0x7e34f1c4, -0x294f85ab, -0x26bbb697, 0x290ff006, 0x16dcda1f, 0x08680b6a, 0x5a06de59, 0x5568d2b7 }, + { -0x1342b851, 0x0012aafe, 0x1cd46309, 0x55a266fb, 0x0967c72c, -0x0dfc1498, -0x35c3ebd7, 0x39633944 }, + { 0x1b37cfe1, -0x72f34774, 0x053818f3, 0x05b6a5a3, -0x487826a7, -0x0d1643fc, -0x6522809c, 0x6beba124 } + }, + { + { 0x43f5a53b, 0x5c3cecb9, 0x06c08df2, -0x633659e3, -0x7a76abb9, -0x30459c66, 0x0df09fd5, 0x5a845ae8 }, + { -0x5a4e4ebd, 0x1d06005c, 0x7fd1cda2, 0x6d4c6bb8, 0x53fcffe7, 0x6ef59676, -0x3e31e15b, 0x097c29e8 }, + { 0x5deb94ca, 0x4ce97dbe, -0x738f63b8, 0x38d0a438, -0x5e962f69, -0x3bc1312c, -0x081a783d, 0x0a1249ff } + }, +}, +{ + { + { 0x7354b610, 0x0b408d9e, 0x5ba85b6e, -0x7f94cdad, 0x4a58a207, -0x2419c5fd, -0x365e20d4, 0x173bd9dd }, + { 0x276d01c9, 0x12f0071b, -0x793b7390, -0x1847453b, 0x71d6fba9, 0x5308129b, 0x5a3db792, 0x5d88fbf9 }, + { -0x01a78d21, 0x2b500f1e, -0x2bc6e73f, 0x58d6582e, -0x3698c520, -0x1912d872, -0x4e615ce7, 0x06e1cd13 } + }, + { + { -0x61a4fcad, 0x472baf62, 0x278d0447, 0x3baa0b90, -0x69bc40d9, 0x0c785f46, -0x727c84ed, 0x7f3a6a1a }, + { 0x6f166f23, 0x40d0ad51, 0x1fab6abe, 0x118e3293, -0x5fb2f772, 0x3fe35e14, 0x26e16266, 0x30806035 }, + { 0x5d3d800b, -0x0819bbc7, -0x36fe120a, -0x6a572aab, 0x592c6339, 0x68cd7830, 0x2e51307e, 0x30d0fded } + }, + { + { 0x68b84750, -0x634b68e2, 0x6664bbcf, -0x5f6a8dd7, 0x72fa412b, 0x5c8de726, 0x51c589d9, 0x46150843 }, + { -0x0dedcc4d, -0x1fa6b2e6, -0x0f33b264, 0x1bdbe78e, -0x70b66589, 0x6965187f, 0x2c099868, 0x0a921420 }, + { -0x51465fd2, -0x436fe640, 0x16034cae, 0x55c7110d, 0x659932ec, 0x0e6df501, -0x6a35a202, 0x3bca0d28 } + }, + { + { -0x6133fe41, -0x6397714a, -0x59bb7691, -0x0f437c53, 0x5f7a9fe2, -0x35d26aa1, -0x720d7dbf, 0x4ea8b403 }, + { 0x3c5d62a4, 0x40f031bc, -0x300f85a0, 0x19fc8b3e, 0x130fb545, -0x67e7c25e, -0x5170ec33, 0x5631dedd }, + { -0x0e352dfe, 0x2aed460a, -0x5b73117d, 0x46305305, 0x49f11a5f, -0x6ede88bb, 0x542ca463, 0x24ce0930 } + }, + { + { -0x020cf47b, 0x3fcfa155, 0x36372ea4, -0x2d08e972, 0x6492f844, -0x4d1f9b22, 0x324f4280, 0x549928a7 }, + { -0x02f93efa, 0x1fe890f5, 0x5d8810f2, -0x4a3b97cb, 0x6e8caf3e, -0x7d87f702, -0x75f928b5, 0x41d4e3c2 }, + { 0x63ee1a2e, -0x0d91cd59, -0x2da00216, -0x516e1b49, -0x2e80b297, -0x43c42cc5, -0x3f230096, 0x491b66de } + }, + { + { -0x2f259b5f, 0x75f04a8e, 0x67e2284b, -0x12ddd351, 0x1f7b7ba4, -0x7dcb5c87, -0x48fe7499, 0x4cf6b8b0 }, + { -0x3815cd59, -0x670a4ec3, 0x7e16db98, -0x1c2a0734, -0x340726b9, -0x53f540ae, -0x37a11b54, 0x08f338d0 }, + { -0x66e58c43, -0x3c7c57df, -0x20cdf386, -0x54d843ff, -0x7b888f9d, -0x3ec2cce5, -0x14f87567, 0x530d4a82 } + }, + { + { 0x6c9abf9e, 0x6d697345, 0x4900a880, 0x257fb2fc, -0x373047b0, 0x2bacf412, 0x0cbfbd5b, 0x0db3e7e0 }, + { -0x1e06b7db, 0x004c3630, -0x7354aca6, 0x7e2d7826, -0x337b0075, -0x38b7dcdd, 0x101770b9, 0x65ea753f }, + { -0x1df69c9d, 0x3d66fc3e, 0x61b5cb6b, -0x7e29d381, 0x13443b1a, 0x0fbe0442, 0x21e1a1db, 0x02a4ec19 } + }, + { + { -0x0e3086a1, -0x0a379e9e, 0x26ee57f2, 0x118c8619, 0x1c063578, 0x17212485, -0x13f98031, 0x36d12b5d }, + { 0x3b24b8a2, 0x5ce6259a, 0x45afa0b8, -0x47a88534, -0x745f8fc9, -0x33341918, 0x127809bf, 0x3d143c51 }, + { 0x79154557, 0x126d2791, -0x0387c5f6, -0x2a1b70a4, -0x20e86454, 0x36bdb6e8, 0x5ba82859, 0x2ef51788 } + }, +}, +{ + { + { 0x7c6da1e9, 0x1ea43683, 0x1fb9bdbe, -0x063e7651, -0x31a22eab, 0x303001fc, -0x43a841ae, 0x28a7c99e }, + { -0x2ee1f2b6, -0x7742bc74, 0x43ccf308, 0x30cb610d, -0x6e6c8434, -0x1f65f1c9, 0x25b1720c, 0x4559135b }, + { -0x172e6163, -0x47026c67, -0x69dbdc01, -0x6f7e6e35, 0x47c742a3, -0x4d46b729, -0x2804bb3c, 0x37f33226 } + }, + { + { -0x37de4ee3, 0x33912553, 0x41e301df, 0x66ed42c2, 0x104222fd, 0x066fcc11, -0x3e6de971, 0x307a3b41 }, + { -0x4aa091f8, 0x0dae8767, 0x5b203a02, 0x4a43b3b3, -0x7f507387, -0x1c8da592, 0x705fa7a3, 0x0f7a7fd1 }, + { 0x6eb55ce0, -0x7114a2f9, -0x55f26da6, 0x2fc536bf, -0x23493918, -0x417e7cf1, -0x7d8450ae, 0x556c7045 } + }, + { + { 0x2bf44406, -0x46b46ffe, -0x006f4acc, -0x542bdc82, -0x050792c6, 0x7600a960, -0x3dcdd11d, 0x2f45abda }, + { 0x02e9d8b7, -0x71d4ae8d, 0x248714e8, -0x1c1add97, 0x4ca960b5, -0x42b04289, -0x3a135257, 0x6f4b4199 }, + { -0x37107596, 0x61af4912, 0x43fb6e5e, -0x1a705b02, 0x6fd427cf, -0x4a5033a3, 0x1e1e11eb, 0x6a539328 } + }, + { + { 0x149443cf, 0x0fff04fe, -0x79a32229, 0x53cac6d9, 0x531ed1b7, 0x31385b03, -0x532efc63, 0x5846a27c }, + { -0x5a2e1177, -0x0c25aec7, -0x006c9678, -0x7ebaba84, 0x00e188c4, 0x3f622fed, -0x2474a5c3, 0x0f513815 }, + { 0x1eb08717, 0x4ff5cdac, -0x6f0d1644, 0x67e8b295, 0x237afa99, 0x44093b5e, -0x78f7474e, 0x0d414bed } + }, + { + { 0x294ac9e8, -0x7e77956e, -0x2aaab842, 0x23162b45, 0x03715983, -0x6b3043bc, 0x134bc401, 0x50eb8fdb }, + { -0x02f18a0a, -0x30497d9b, -0x446f18f9, -0x1ba4c1d8, -0x6006d386, 0x7242a8de, -0x6ccdfd23, 0x685b3201 }, + { -0x294ccf33, -0x3f48c13a, 0x132faff1, -0x7b1bb7f9, -0x3b5a211f, 0x732b7352, -0x55832d2e, 0x5d7c7cf1 } + }, + { + { -0x648c5a9e, 0x33d1013e, 0x48ec26e1, -0x6da310a9, -0x22b97fa8, -0x580319ec, 0x1e9aa438, 0x78b0fad4 }, + { 0x7a4aafa2, -0x50c4b941, 0x4d40d411, -0x4878fa14, -0x3583ea1d, 0x114f0c6a, -0x56b762b3, 0x3f364faa }, + { -0x12fa4b78, -0x40a95bcf, -0x63b6a382, -0x5acc1994, -0x780c9ae6, -0x179ad451, 0x59d66c33, 0x02418000 } + }, + { + { -0x30c715ff, 0x28350c7d, -0x4d6e854a, 0x7c6cdbc0, -0x7a8f7d09, -0x53183042, -0x5d265e20, 0x4d2845ab }, + { -0x5c85a41c, -0x314f8802, -0x1a5a1149, -0x249bd0fe, 0x471270b8, -0x3d192f3b, 0x38e4529c, 0x4771b655 }, + { 0x447070de, -0x44ac8020, 0x6dd557df, -0x3458bbbd, 0x3600dbcb, -0x2c4a5cb9, -0x06002808, 0x4aeabbe6 } + }, + { + { -0x3b56370e, 0x6a2134bc, -0x7531d1c9, -0x040702e4, -0x66ee5f46, 0x000ae304, 0x6bc89b9e, 0x046e3a61 }, + { 0x40d8f78c, 0x4630119e, 0x3c710e11, -0x5fe5643b, -0x76ef2287, 0x486d2b25, -0x24fcdb1b, 0x1e6c47b3 }, + { -0x0fc6f942, 0x14e65442, -0x1c9d41d6, 0x4a019d54, -0x723dcf39, 0x68ccdfec, -0x509479e4, 0x7cfb7e3f } + }, +}, +{ + { + { 0x305b2f51, -0x69114005, -0x776a6948, -0x2c06c753, 0x46d5dd25, -0x0f0ad239, -0x44c5ff6b, 0x57968290 }, + { -0x73a75124, 0x4637974e, -0x540fbe5c, -0x4610dd05, -0x167f8e76, -0x1e7a26aa, -0x4ebc575a, 0x2f1b78fa }, + { 0x0a20e101, -0x08e547bd, 0x24f0ec47, -0x0c6c9a73, 0x6ee2eed1, -0x308af658, -0x23d55c1f, 0x7dc43e35 } + }, + { + { 0x273e9718, 0x5a782a5c, 0x5e4efd94, 0x3576c699, 0x1f237d3e, 0x0f2ed805, -0x7d2af567, 0x044fb81d }, + { -0x7782263d, -0x7a69999b, 0x4bb05355, -0x36f064cf, -0x10df864f, -0x391f7208, 0x758cc12f, 0x7ef72016 }, + { -0x56f81c27, -0x3e20e73b, -0x31b39ca7, 0x57b3371d, -0x4dfe44b7, -0x358fbacc, -0x63cf22d2, 0x7f79823f } + }, + { + { 0x68f587ba, 0x6a9c1ff0, 0x0050c8de, 0x0827894e, 0x7ded5be7, 0x3cbf9955, 0x1c06d6f0, 0x64a9b043 }, + { -0x5c4aec18, -0x7ccb2dc7, -0x46e05728, -0x3ec98f2c, -0x0a6f42cd, 0x12b54136, -0x287b264c, 0x0a4e0373 }, + { 0x5b7d2919, 0x2eb3d6a1, -0x2ac57dcb, -0x4f4b0960, -0x765ba2b9, 0x7156ce43, -0x31e7cb94, 0x071a7d0a } + }, + { + { 0x20e14431, -0x33f3caae, 0x09b15141, 0x0d659507, 0x209d5f36, -0x650a9de5, 0x617755d3, 0x7c69bcf7 }, + { -0x377845f5, -0x2cf8d256, -0x405a9d12, 0x01262905, -0x3f108975, -0x30abcffe, 0x46ea7e9c, 0x2c3bcc71 }, + { 0x04e8295f, 0x07f0d7eb, 0x2f50f37d, 0x10db1825, 0x171798d7, -0x16ae565d, 0x22aca51d, 0x6f5a9a73 } + }, + { + { -0x5c26bb42, -0x18d62b15, -0x7f875062, -0x7261f6c0, 0x47869c03, 0x4525567a, -0x1172c4dc, 0x02ab9680 }, + { 0x2f41c6c5, -0x745efff4, 0x0cfefb9b, -0x3b60863f, 0x3cc51c9f, 0x4efa4770, -0x1eb85036, 0x494e21a2 }, + { -0x221af266, -0x105b757b, 0x0fb9a249, 0x219a224e, -0x26e10927, -0x05f6e0e3, -0x15b944cc, 0x6b5d76cb } + }, + { + { 0x1e782522, -0x1f06bee9, 0x036936d3, -0x0e19518c, -0x2f0338ba, 0x408b3ea2, 0x03dd313e, 0x16fb869c }, + { -0x13f3266c, -0x77a8aa94, 0x5cd01dba, 0x6472dc6f, -0x70bd4b89, -0x50fe96ec, -0x7ad88cac, 0x0ae333f6 }, + { 0x33b60962, 0x288e1997, -0x27541ecd, 0x24fc72b4, 0x0991d03e, 0x4811f7ed, -0x708f2f8b, 0x3f81e38b } + }, + { + { 0x5f17c824, 0x0adb7f35, -0x28bd665c, 0x74b923c3, -0x34071509, -0x2a83c175, 0x4cdedc3d, 0x0ad3e2d3 }, + { 0x7ed9affe, 0x7f910fcc, 0x2465874b, 0x545cb8a1, 0x4b0c4704, -0x57c6812e, 0x04f50993, 0x50510fc1 }, + { 0x336e249d, 0x6f0c0fc5, -0x3cce3027, 0x745ede19, 0x09eefe1c, -0x0d290300, -0x0f05e142, 0x127c158b } + }, + { + { -0x51ae468c, -0x215d703c, 0x744dfe96, 0x1d9973d3, -0x78c7b758, 0x6240680b, -0x2e98206b, 0x4ed82479 }, + { 0x2e9879a2, -0x09e683be, 0x52ca3647, -0x5bb5222c, 0x4b4eaccb, -0x64bec03f, 0x07ef4f68, 0x354ef87d }, + { 0x60c5d975, -0x011c4ade, -0x14be4f48, 0x50352efc, -0x56099ac4, -0x77f753d0, 0x0539236d, 0x302d92d2 } + }, +}, +{ + { + { 0x0df53c30, -0x6a847475, -0x719f0f68, 0x2a1c770a, 0x345796de, -0x44385990, -0x6f366437, 0x22a48f9a }, + { -0x34c10484, 0x4c59023f, -0x39c3d56c, 0x6c2fcb99, -0x3c381f7c, -0x45be6f1e, -0x5ae78b27, 0x0e545dae }, + { -0x72c053a8, 0x6b7dc0dc, -0x191bd403, 0x5497cd6c, -0x0bff2cfb, 0x542f7d1b, 0x048d9136, 0x4159f47f } + }, + { + { -0x442db7c7, 0x748515a8, -0x504fd4ab, 0x77128347, 0x49a2a17f, 0x50ba2ac6, 0x3ad730f1, 0x06052551 }, + { 0x39e31e32, 0x20ad6608, -0x7bfa41b0, -0x07e1e42b, -0x0b254397, -0x07f9bfaa, -0x318e468b, 0x14d23dd4 }, + { -0x755d807e, -0x0dc671f7, -0x765e4fdc, 0x6d7982bb, 0x214dd24c, -0x0596bf7c, -0x5cdcfe3d, 0x71ab966f } + }, + { + { 0x02809955, -0x4ef775f9, 0x0b43c391, 0x43b273ea, -0x01f97913, -0x35649852, -0x7cca0b13, 0x605eecbf }, + { 0x4ded02fc, 0x2dcbd8e3, 0x596f22aa, 0x1151f3ec, 0x4e0328da, -0x435daabd, -0x6dbee4de, 0x35768fbe }, + { 0x6c340431, -0x7cdff59b, -0x711a63d1, -0x60328e99, 0x71300f8a, 0x75d4613f, 0x60f542f9, 0x7a912faf } + }, + { + { -0x05d2aa69, 0x253f4f8d, 0x5477130c, 0x25e49c40, -0x6694eefe, 0x00c052e5, 0x33bb6c4a, 0x33cb966e }, + { 0x5edc1a43, -0x4dfba7a2, 0x5897c73c, -0x60f1e912, 0x4e70483c, 0x5b82c0ae, 0x2bddf9be, 0x624a170e }, + { 0x7f116909, 0x59702804, 0x1e564467, -0x7d753be4, -0x19de8c79, 0x70417dbd, -0x0453bc7c, 0x721627ae } + }, + { + { 0x410b2f22, -0x02cf6844, -0x4a3057bc, -0x0e5fa259, -0x10a8358c, 0x61289a1d, -0x447de6fe, 0x245ea199 }, + { -0x78c9522b, -0x682fc43d, -0x3acd4ed0, 0x2f1422af, 0x7101bbc4, 0x3aa68a05, -0x18b06059, 0x4c946cf7 }, + { 0x78d477f8, -0x51235997, 0x29117fe1, 0x1898ba3c, 0x720cbd58, -0x308c067d, -0x474a9caf, 0x67da12e6 } + }, + { + { -0x7137cf74, 0x2b7ef3d3, 0x71eb94ab, -0x7d702814, -0x3af9d543, -0x7f83c4ca, 0x31a94141, 0x0cb64cb8 }, + { -0x4b4291f9, 0x7067e187, -0x382e018c, 0x6e8f0203, 0x38c85a30, -0x6c3955d1, 0x3d75a78a, 0x76297d1f }, + { 0x534c6378, 0x3030fc33, -0x1abe179f, -0x469ca3a4, -0x264d38d8, 0x15d9a9be, -0x0c88a235, 0x49233ea3 } + }, + { + { 0x1c9f249b, 0x7b3985fe, -0x5edccd6d, 0x4fd6b2d5, 0x1adf4d62, -0x314cba6c, 0x542de50c, 0x6987ff6f }, + { -0x724003c6, 0x629398fa, -0x2ab24bab, -0x1ed01ad3, -0x250dad6b, -0x0c41ee21, -0x31a184af, 0x628b140d }, + { -0x707c8ac4, 0x47e24142, -0x79950669, 0x6317bebc, 0x3d1a9829, -0x2544a4bd, 0x5287fb2d, 0x074d8d24 } + }, + { + { -0x3f1ceb78, 0x481875c6, -0x1ddfcb4c, 0x219429b2, 0x31283b65, 0x7223c98a, 0x342277f9, 0x3420d60b }, + { 0x440bfc31, -0x7cc82633, -0x50ce7029, 0x729d2ca1, 0x772c2070, -0x5fbf5b5c, 0x3a7349be, 0x46002ef0 }, + { -0x50019a09, -0x055dc522, 0x5be0764c, 0x78261ed4, 0x2f164403, 0x441c0a1e, 0x7a87d395, 0x5aea8e56 } + }, +}, +{ + { + { -0x1b1f0e89, 0x2dbc6fb6, -0x5b42956d, 0x04e1bf29, 0x787af6e8, 0x5e1966d4, -0x4bd92fa0, 0x0edc5f5e }, + { -0x435bd7c3, 0x7813c1a2, -0x5e79c227, -0x129d0f6f, -0x3d97057a, -0x51384348, 0x6f1cae4c, 0x10e5d3b7 }, + { 0x53da8e67, 0x5453bfd6, 0x24a9f641, -0x1623e114, 0x03578a23, -0x4078d9c5, 0x361cba72, 0x45b46c51 } + }, + { + { -0x75801c1c, -0x3162b223, 0x76620e30, -0x54ec9baa, -0x4cf166a8, 0x4b594f7b, 0x321229df, 0x5c1c0aef }, + { 0x314f7fa1, -0x56bfd541, -0x71730bb0, -0x1da80e24, 0x23a8be84, 0x1dbbd54b, 0x6dcb713b, 0x2177bfa3 }, + { -0x05862471, 0x37081bbc, -0x3da0a64d, 0x6048811e, -0x637cdb79, 0x087a7665, 0x7d8ab5bb, 0x4ae61938 } + }, + { + { -0x67a4047d, 0x61117e44, 0x71963136, -0x031fb9d6, -0x2bda6fb5, -0x7c53cbb8, 0x5ba43d64, 0x75685abe }, + { 0x5344a32e, -0x72240956, -0x4be4bf88, 0x7d88eab4, 0x4a130d60, 0x5eb0eb97, 0x17bf3e03, 0x1a00d91b }, + { -0x149e0d4e, 0x6e960933, -0x3600b6ae, 0x543d0fa8, 0x7af66569, -0x208d8af0, 0x23b0e6aa, 0x135529b6 } + }, + { + { -0x1dd17c02, -0x0a38e944, -0x17f67a3f, -0x4bd414e7, 0x14254aae, -0x136259c9, 0x1590a613, 0x5972ea05 }, + { -0x522e2ae8, 0x18f0dbd7, -0x303ee0ef, -0x68608778, 0x7114759b, -0x78cd1e10, 0x65ca3a01, 0x79b5b81a }, + { -0x237087ef, 0x0fd4ac20, -0x53b2b058, -0x65652d6c, -0x4cc9fbcc, -0x3fe4d29c, -0x6fa0c425, 0x4f7e9c95 } + }, + { + { 0x355299fe, 0x71c8443d, -0x24141529, -0x7432c4e4, -0x0e5b6b9a, -0x7f6db662, -0x5ebb5238, 0x1942eec4 }, + { 0x5781302e, 0x62674bbc, -0x765223f1, -0x27adf0c7, 0x53fbd9c6, -0x73d66652, 0x2e638e4c, 0x31993ad9 }, + { -0x51dcb66e, 0x7dac5319, 0x0cea3e92, 0x2c1b3d91, 0x253c1122, 0x553ce494, 0x4ef9ca75, 0x2a0a6531 } + }, + { + { 0x3c1c793a, -0x30c9e533, 0x5a35bc3b, 0x2f9ebcac, -0x57325955, 0x60e860e9, 0x6dea1a13, 0x055dc39b }, + { -0x0806d83e, 0x2db7937f, 0x17d0a635, -0x248be0fa, 0x1155af76, 0x5982f3a2, 0x647c2ded, 0x4cf6e218 }, + { -0x3d72a44a, -0x4ee6dd84, 0x774dffab, 0x07e24ebc, -0x1b5cd377, -0x57c38732, 0x10aa24b6, 0x121a3077 } + }, + { + { -0x388b7c37, -0x29a68ec2, -0x47d46951, -0x77401f89, 0x1097bcd3, 0x289e2823, 0x6ced3a9b, 0x527bb94a }, + { -0x60fcb569, -0x1b24a2a2, 0x3034bc2d, -0x1eac03f7, -0x6aae2c4f, 0x46054691, 0x7a40e52d, 0x333fc76c }, + { -0x66a4b7d2, 0x563d992a, 0x6e383801, 0x3405d07c, 0x2f64d8e5, 0x485035de, 0x20a7a9f7, 0x6b89069b } + }, + { + { -0x4a382489, 0x4082fa8c, -0x38cb3eab, 0x068686f8, -0x09185a82, 0x29e6c8d9, -0x589c6431, 0x0473d308 }, + { 0x6270220d, -0x7ed55fbf, -0x06dba4b2, -0x66a57606, 0x5072ef05, -0x00523b32, -0x558c148d, 0x23bc2103 }, + { 0x03589e05, -0x351186da, 0x46dcc492, 0x2b4b4212, -0x19fe56b1, 0x02a1ef74, -0x21fbcbe6, 0x102f73bf } + }, +}, +{ + { + { -0x6c5c9db9, 0x358ecba2, -0x4d97029b, -0x5070679e, 0x68a01c89, 0x412f7e99, -0x328abadc, 0x5786f312 }, + { 0x7ec20d3e, -0x4a5d2af4, -0x5f368d9d, -0x39b42292, -0x3e008cb3, 0x56e89052, 0x2b2ffaba, 0x4929c6f7 }, + { -0x35ebfcd4, 0x337788ff, 0x447f1ee3, -0x0c6defd8, 0x231bccad, -0x74ebf8e1, -0x0dcbb87d, 0x4c817b4b } + }, + { + { -0x5bf4bb7c, 0x413ba057, 0x4f5f6a43, -0x45b3d1e6, -0x511e29e4, 0x614ba0a5, -0x74fa23ad, 0x78a1531a }, + { 0x2871b96e, 0x0ff85385, 0x60c3f1bb, -0x1ec16055, 0x25344402, -0x1102a6ad, 0x75b7744b, 0x0a37c370 }, + { 0x3ad0562b, 0x6cbdf170, -0x36dade5d, -0x7130b7d0, -0x027bdb19, -0x25142cfd, 0x2e5ec56f, 0x72ad82a4 } + }, + { + { 0x67024bc3, -0x3c976c6f, 0x49502fda, -0x71962e93, -0x1ba0b4d7, -0x030d13c4, -0x5c4b343c, 0x065f669e }, + { -0x45049a0a, 0x3f9e8e35, -0x0d8d6c5f, 0x39d69ec8, -0x73095c30, 0x6cb8cd95, 0x73adae6d, 0x17347781 }, + { 0x5532db4d, -0x75ff5139, 0x43e31bb1, -0x47965b1c, -0x2c580aeb, 0x4a0f8552, 0x303d7c08, 0x19adeb7c } + }, + { + { 0x43c31794, -0x62fa4583, -0x6ccddada, 0x2470c8ff, 0x16197438, -0x7cdc2138, -0x7ea964ad, 0x28527098 }, + { 0x53ead9a3, -0x38df349f, 0x512b636e, 0x55b2c97f, -0x2bfd6f4f, -0x4e1ca4a1, 0x3b530ee2, 0x2fd9ccf1 }, + { 0x47f796b8, 0x07bd475b, 0x542c8f54, -0x2d384fed, 0x3b24f87e, 0x2dbd23f4, 0x7b0901d6, 0x6551afd7 } + }, + { + { -0x5e2a3654, 0x68a24ce3, 0x10ff6461, -0x44885cc3, 0x25d3166e, 0x0f86ce44, 0x50b9623b, 0x56507c09 }, + { 0x54aac27f, 0x4546baaf, -0x4d5ba5d8, -0x09099014, 0x562bcfe8, 0x582d1b5b, -0x6df087a1, 0x44b123f3 }, + { -0x2e8ec19d, 0x1206f0b7, 0x15bafc74, 0x353fe3d9, 0x0ad9d94d, 0x194ceb97, -0x062fc52d, 0x62fadd7c } + }, + { + { -0x1831ba6c, 0x3cd7bc61, -0x4822d982, -0x3294ca57, 0x4366ef27, -0x5f7f5438, 0x59c79711, 0x6ec7c46f }, + { 0x5598a074, -0x394a6985, -0x71b6c1db, 0x5efe91ce, 0x49280888, -0x2b48d3bb, -0x5d98bf3e, 0x20ef1149 }, + { 0x6f09a8a2, 0x2f07ad63, 0x24205e7d, -0x79681932, -0x11ca5ec7, -0x3f5103fb, -0x4a062769, 0x15e80958 } + }, + { + { 0x5bb061c4, 0x4dd1ed35, -0x6be3f900, 0x42dc0cef, -0x0279cbf2, 0x61305dc1, 0x0e55a443, 0x56b2cc93 }, + { 0x0c3e235b, 0x25a5ef7d, -0x41ecb119, 0x6c39c17f, 0x2dc5c327, -0x388b1ecc, -0x6dfde0c7, 0x021354b8 }, + { -0x59403a5e, 0x1df79da6, -0x6021bc97, 0x02f3a274, -0x325c6f59, -0x4cdc260e, -0x788b2c9d, 0x7be0847b } + }, + { + { 0x5307fa11, 0x1466f5af, -0x1293f50e, -0x7e803383, -0x3c5b5c05, 0x0a6de44e, -0x436d82f5, 0x74071475 }, + { -0x74c0aa3d, -0x736633a6, 0x3fded2a0, 0x0611d725, 0x36b70a36, -0x12d66a01, -0x2875d9e7, 0x1f699a54 }, + { 0x73e7ea8a, -0x188d6d0d, -0x34fba5cf, 0x296537d2, -0x2cd8b022, 0x1bd0653e, 0x76bd2966, 0x2f9a2c44 } + }, +}, +{ + { + { -0x4aaee366, -0x5d4b2520, 0x2bffff06, 0x7ac86029, -0x0aafbdcc, -0x67e0c8a3, -0x25b15ed3, 0x3f6bd725 }, + { 0x7f5745c6, -0x14e74655, 0x5787c690, 0x023a8aee, 0x2df7afa9, -0x48d8ed26, -0x15a3fec3, 0x36597d25 }, + { 0x106058ac, 0x734d8d7b, 0x6fc6905f, -0x26bfa862, -0x6dfd6cd3, 0x6466f8f9, -0x259f2930, 0x7b7ecc19 } + }, + { + { -0x58830565, 0x6dae4a51, -0x185c79b0, -0x7dd9c9ac, -0x70d27d25, 0x09bbffcd, 0x1bf5caba, 0x03bedc66 }, + { 0x695c690d, 0x78c2373c, 0x0642906e, -0x22dad19a, 0x4ae12bd2, -0x6ae2bbbc, 0x01743956, 0x4235ad76 }, + { 0x078975f5, 0x6258cb0d, -0x6e760d68, 0x49294254, -0x1d1c911c, -0x5f354bdd, -0x320f995f, 0x0e7ce2b0 } + }, + { + { -0x26b48f07, -0x01590121, -0x3e0345d3, -0x0ecf3faf, 0x7f2fab89, 0x4882d47e, -0x7513114b, 0x61525613 }, + { -0x3b737a5d, -0x3b6b9bc6, 0x3c6139ad, -0x02c9e20c, 0x3ae94d48, 0x09db17dd, -0x704b98b6, 0x666e0a5d }, + { 0x4870cb0d, 0x2abbf64e, -0x55ba7495, -0x329a4310, 0x75e8985d, -0x6541b146, -0x2aeb211c, 0x7f0bc810 } + }, + { + { 0x737213a0, -0x7c536253, 0x2ef72e98, -0x60090746, 0x43ec6957, 0x311e2edd, -0x213a548b, 0x1d3a907d }, + { 0x26f4136f, -0x46ff945c, 0x57e03035, -0x7298c962, 0x4f463c28, -0x34372027, -0x0711240b, 0x0d1f8dbc }, + { 0x3ed081dc, -0x45e96ccf, -0x7ae4cb80, 0x29329fad, 0x030321cb, 0x0128013c, -0x5ce4021d, 0x00011b44 } + }, + { + { 0x6a0aa75c, 0x16561f69, 0x5852bd6a, -0x3e408da4, -0x65869953, 0x11a8dd7f, -0x2d7aefda, 0x63d988a2 }, + { 0x3fc66c0c, 0x3fdfa06c, 0x4dd60dd2, 0x5d40e38e, 0x268e4d71, 0x7ae38b38, 0x6e8357e1, 0x3ac48d91 }, + { -0x5042dcd2, 0x00120753, -0x0227097d, -0x16d43148, -0x7b18d46f, -0x07e9964d, 0x2368a066, 0x33fad52b } + }, + { + { -0x3bdd3018, -0x72d33730, 0x05a13acb, 0x072b4f7b, -0x13095a91, -0x5c01491a, -0x46f58e1e, 0x3cc355cc }, + { -0x3a1be1ea, 0x540649c6, 0x333f7735, 0x0af86430, -0x0cfa18ba, -0x4d53032e, -0x5da92359, 0x16c0f429 }, + { -0x6fc16ecf, -0x16496bbd, 0x7a5637ce, -0x475b6b35, -0x45456dbc, -0x37832e5c, 0x6bae7568, 0x631eaf42 } + }, + { + { -0x5c8ff218, 0x47d975b9, -0x1d07faae, 0x7280c5fb, 0x32e45de1, 0x53658f27, 0x665f80b5, 0x431f2c7f }, + { -0x25990161, -0x4c16fbf0, 0x6c16e5a6, -0x7a22b4ae, 0x1ef9bf83, -0x43c2689f, 0x1ea919b5, 0x5599648b }, + { -0x7a7084e7, -0x29fd9cbc, -0x5e15aeb6, 0x14ab352f, 0x2090a9d7, -0x76ffbbe6, -0x6edac4da, 0x7b04715f } + }, + { + { -0x3b19453a, -0x4c893d80, 0x6d1d9b0b, -0x68f12c23, 0x450bf944, -0x4f656aa8, 0x57cde223, 0x48d0acfa }, + { -0x530951bd, -0x7c1242d8, 0x7d5c7ab4, -0x79ca8375, -0x4814d3bc, -0x3fbfb897, -0x3d09a7c1, 0x59b37bf5 }, + { 0x7dabe671, -0x49f0d91c, 0x622f3a37, -0x0e2e5e69, -0x1669fc6c, 0x4208ce7e, 0x336d3bdb, 0x16234191 } + }, +}, +{ + { + { 0x3d578bbe, -0x7ad22e03, -0x3cd79ef8, 0x2b65ce72, -0x1531dd8d, 0x658c07f4, -0x13c754c0, 0x0933f804 }, + { 0x33a63aef, -0x0e651539, 0x4442454e, 0x2c7fba5d, 0x4795e441, 0x5da87aa0, -0x5b1f4f0b, 0x413051e1 }, + { -0x72b69b8a, -0x58549687, -0x034a5438, -0x7ede5522, 0x7b539472, -0x5a23ed11, 0x5e45351a, 0x07fd4706 } + }, + { + { -0x6517183d, 0x30421155, -0x6bb77d5b, -0x0d7e4dd7, 0x378250e4, -0x75ec53d2, 0x54ba48f4, 0x014afa09 }, + { 0x258d2bcd, -0x37a7c3c3, -0x509f48c1, 0x17029a4d, 0x416a3781, -0x05f0362a, 0x38b3fb23, 0x1c1e5fba }, + { 0x1bb3666c, -0x34ce6900, 0x4bffecb9, 0x33006052, 0x1a88233c, 0x29371199, 0x3d4ed364, 0x29188436 } + }, + { + { -0x43e54915, -0x0462c83d, 0x4d57a240, 0x02be1453, -0x075a1e0a, -0x0b28cbeb, 0x0ccc8188, 0x5964f430 }, + { -0x23b45406, 0x033c6805, 0x5596ecc1, 0x2c15bf5e, -0x4a64e2c5, 0x1bc70624, -0x5e60f13b, 0x3ede9850 }, + { 0x2d096800, -0x1bb5dceb, 0x70866996, 0x5c08c559, 0x46affb6e, -0x20d249f6, -0x07a90277, 0x579155c1 } + }, + { + { 0x0817e7a6, -0x4a0e949d, 0x3c351026, -0x7f7396dd, 0x54cef201, 0x324a983b, 0x4a485345, 0x53c09208 }, + { 0x12e0c9ef, -0x69cdb123, -0x0dbdfd69, 0x468b878d, -0x5b0a8c42, 0x199a3776, -0x716e16d6, 0x1e7fbcf1 }, + { -0x0e345041, -0x2d2beb7f, 0x716174e5, 0x231d2db6, -0x1d5aa368, 0x0b7d7656, 0x2aa495f6, 0x3e955cd8 } + }, + { + { 0x61bb3a3f, -0x54c60c11, 0x2eb9193e, -0x714bff9b, 0x38c11f74, -0x4a219134, 0x26f3c49f, 0x654d7e96 }, + { 0x3ed15433, -0x1b70aca2, 0x0d7270a3, -0x2f8a96d6, -0x55219c79, 0x40fbd21d, -0x30bb6a0b, 0x14264887 }, + { 0x5c7d2ceb, -0x1a9b3023, -0x28c83347, -0x7d115022, -0x2e064f55, 0x6107db62, -0x4bca7245, 0x0b6baac3 } + }, + { + { 0x3700a93b, 0x204abad6, -0x25886c8d, -0x41ffdc2d, 0x633ab709, -0x27a0fcba, -0x6f7dfbee, 0x00496dc4 }, + { -0x79dd0168, 0x7ae62bcb, -0x31476e51, 0x47762256, -0x0d1bf94c, 0x1a5a92bc, -0x7b1beaff, 0x7d294017 }, + { -0x3d819ca0, 0x1c74b88d, -0x72eb7af4, 0x07485426, 0x3e0dcb30, -0x5eba0485, 0x43803b23, 0x10843f1b } + }, + { + { -0x1cdb9765, -0x2a9098d3, -0x4c6b567f, -0x2e257513, -0x6e973013, -0x2284a702, 0x4d56c1e8, 0x7ce246cd }, + { 0x376276dd, -0x3a06fbab, -0x289ba327, -0x31a6ea73, 0x1d366b39, -0x6d09a2af, 0x526996c4, 0x11574b6e }, + { 0x7f80be53, -0x470bcf72, 0x34a9d397, 0x5f3cb8cb, 0x33cc2b2c, 0x18a961bd, 0x3a9af671, 0x710045fb } + }, + { + { 0x059d699e, -0x5fc0379e, -0x659e6197, 0x2370cfa1, 0x2f823deb, -0x3b01c4ee, -0x580f7bb2, 0x1d1b056f }, + { 0x101b95eb, 0x73f93d36, 0x4f6f4486, -0x0510cc87, -0x70ea1a9e, 0x5651735f, 0x58b40da1, 0x7fa3f190 }, + { -0x1a9409e1, 0x1bc64631, 0x6e5382a3, -0x2c8654f0, 0x0540168d, 0x4d58c57e, -0x7bbd271c, 0x56625662 } + }, +}, +{ + { + { 0x1ff38640, -0x22b6632a, 0x063625a0, 0x29cd9bc3, 0x3dd73dc3, 0x51e2d802, 0x203b9231, 0x4a25707a }, + { -0x09d9800a, -0x461b6622, 0x742c0843, 0x7772ca7b, -0x165b0d4f, 0x23a0153f, -0x2a2faffa, 0x2cdfdfec }, + { 0x53f6ed6a, 0x2ab7668a, 0x1dd170a1, 0x30424258, 0x3ae20161, 0x4000144c, 0x248e49fc, 0x5721896d } + }, + { + { -0x5e2f25b2, 0x285d5091, -0x4a01c1f8, 0x4baa6fa7, -0x1e6c6c4d, 0x63e5177c, -0x3b4fcf03, 0x03c935af }, + { -0x02e7e452, 0x0b6e5517, 0x2bb963b4, -0x6fdd9d61, 0x32064625, 0x5509bce9, -0x09c3ec26, 0x578edd74 }, + { 0x492b0c3d, -0x668d893a, -0x201dfa04, 0x47ccc2c4, -0x229dc5c4, -0x232d647c, 0x0288c7a2, 0x3ec2ab59 } + }, + { + { -0x51cd2e35, -0x58dec5f7, 0x40f5c2d5, 0x0f2b87df, -0x17e154d7, 0x0baea4c6, 0x6adbac5e, 0x0e1bf66c }, + { -0x1b278447, -0x5e5f2d85, 0x61391aed, -0x5674b215, 0x73cb9b83, -0x665f2230, 0x200fcace, 0x2dd5c25a }, + { 0x792c887e, -0x1d542a17, -0x346d92a3, 0x1a020018, -0x4551a0e2, -0x40459633, 0x5ae88f5f, 0x730548b3 } + }, + { + { -0x5e291ccc, -0x7fa4f6b5, 0x09353f19, -0x40c10e89, 0x0622702b, 0x423f06cb, -0x2787ba23, 0x585a2277 }, + { -0x34574712, -0x3bcaae5d, -0x4deea0ea, 0x65a26f1d, -0x5473c7b0, 0x760f4f52, 0x411db8ca, 0x3043443b }, + { 0x33d48962, -0x5e75a07e, -0x1387da81, 0x6698c4b5, 0x373e41ff, -0x5871905b, 0x50ef981f, 0x76562789 } + }, + { + { -0x15793063, -0x1e8f8c5d, 0x07155fdc, 0x3a8cfbb7, 0x31838a8e, 0x4853e7fc, -0x49ec09ea, 0x28bbf484 }, + { -0x2ae03740, 0x38c3cf59, 0x0506b6f2, -0x64122d03, -0x54a8f171, 0x26bf109f, -0x3e47b95a, 0x3f4160a8 }, + { 0x6f136c7c, -0x0d9ed0a4, -0x0922ee42, -0x50152ef9, 0x13de6f33, 0x527e9ad2, -0x7e7708a3, 0x1e79cb35 } + }, + { + { -0x0a1f7e7f, 0x77e953d8, 0x299dded9, -0x7b5af3bc, -0x79bada1b, -0x2393d2f4, 0x39d1f2f4, 0x478ab52d }, + { -0x11081c0f, 0x013436c3, -0x0161ef08, -0x7d749581, -0x43062104, 0x7ff908e5, 0x3a3b3831, 0x65d7951b }, + { -0x6dad2ea7, 0x66a6a4d3, -0x78e537f9, -0x1a221e44, -0x593e3691, -0x47d394c0, 0x1a212214, 0x16d87a41 } + }, + { + { -0x2ab1fa7d, -0x045b2a1e, 0x2ebd99fa, -0x1de05029, 0x6ee9778f, 0x497ac273, 0x7a5a6dde, 0x1f990b57 }, + { 0x42066215, -0x4c4281a6, 0x0c5a24c1, -0x78641c33, -0x29066b49, 0x57c05db1, 0x65f38ca6, 0x28f87c81 }, + { 0x1be8f7d6, -0x5ccbb153, -0x53158671, 0x7d1e50eb, 0x520de052, 0x77c6569e, 0x534d6d3e, 0x45882fe1 } + }, + { + { -0x6bc3901c, -0x275366d7, -0x5c7c6d5e, -0x4a060e9f, -0x4137650d, 0x2699db13, -0x1bfa0f8c, 0x7dcf843c }, + { 0x757983d6, 0x6669345d, 0x17aa11a6, 0x62b6ed11, -0x67a1ed71, 0x7ddd1857, -0x09d90923, 0x688fe5b8 }, + { 0x4a4732c0, 0x6c90d648, -0x35a9cd67, -0x2adebc03, -0x6ea2391f, -0x4c41d73d, 0x7327191b, 0x6739687e } + }, +}, +{ + { + { -0x363468e1, -0x731a5530, -0x602ab5d7, 0x1156aaa9, 0x15af9b78, 0x41f72470, 0x420f49aa, 0x1fe8cca8 }, + { 0x200814cf, -0x609a3a16, 0x69a31740, -0x7bfac91f, 0x25c8b4ad, -0x74f12ec7, -0x16c9c9e3, 0x0080dbaf }, + { 0x3c0cc82a, 0x72a1848f, -0x788361ac, 0x38c560c2, -0x31aabec0, 0x5004e228, 0x03429d71, 0x042418a1 } + }, + { + { 0x20816247, 0x58e84c6f, -0x1c90286d, -0x724d4d4a, 0x1d484d85, -0x688e7daa, -0x79cd5429, 0x0822024f }, + { -0x540c00a1, -0x766215af, 0x2fc2d8ba, -0x646c5799, -0x419142a4, 0x2c38cb97, -0x68d9c4a3, 0x114d5784 }, + { 0x6b1beca3, -0x4cfe4484, -0x3914ec8b, 0x55393f6d, -0x68491b15, -0x6ef2d7f0, -0x62b8615d, 0x1ad4548d } + }, + { + { 0x0fe9fed3, -0x5f901993, 0x1c587909, -0x578cc5c0, 0x0df98953, 0x30d14d80, -0x384cfda8, 0x41ce5876 }, + { 0x389a48fd, -0x32a58260, -0x6587c8e2, -0x4c705b56, 0x2cdb8e6c, -0x392689e5, -0x3681ebbd, 0x35cf51db }, + { -0x298f3fde, 0x59ac3bc5, -0x64ee6bfa, -0x151983f0, -0x4c87d026, -0x68674210, -0x02f8bf6e, 0x651e3201 } + }, + { + { 0x1efcae9e, -0x5a845b60, -0x23cf756c, 0x769f4bee, 0x3603cb2e, -0x2e0ef115, 0x7e441278, 0x4099ce5e }, + { -0x10cf3a31, -0x29c27b7d, 0x2361cc0c, 0x4cd4b496, -0x5b7bd954, -0x116f1b00, 0x18c14eeb, 0x0af51d7d }, + { -0x75aede17, 0x1ac98e4f, -0x2405d020, 0x7dae9544, -0x29bcf207, -0x7cdf55f3, 0x2c4a2fb5, 0x66728265 } + }, + { + { 0x2946db23, -0x52574920, 0x7b253ab7, 0x1c0ce51a, 0x66dd485b, -0x7bb737a6, -0x2f98a521, 0x7f1fc025 }, + { -0x27943655, -0x78b9de0c, 0x56fe6fea, -0x4ab38442, 0x7fadc22c, 0x077a2425, 0x19b90d39, 0x1ab53be4 }, + { 0x319ea6aa, -0x2711e4e8, 0x3a21f0da, 0x004d8808, -0x77c5b0b5, 0x3bd6aa1d, -0x202602ec, 0x4db9a3a6 } + }, + { + { -0x34488398, -0x26a4ff45, -0x6e0e87b7, -0x22437b96, -0x41d7264d, 0x7cf700ae, -0x7a2ce0c2, 0x5ce1285c }, + { -0x4663f8ab, -0x73184dc5, -0x3b0af086, 0x35c5d6ed, -0x1264af3d, 0x7e1e2ed2, -0x176cb25f, 0x36305f16 }, + { -0x674f4218, 0x31b6972d, -0x535921a5, 0x7d920706, -0x6f759a61, -0x198cef08, -0x1020fdcb, 0x50fac2a6 } + }, + { + { -0x090bb644, 0x295b1c86, 0x1f0ab4dd, 0x51b2e84a, -0x5571aae3, -0x3ffe34d0, 0x44f43662, 0x6a28d359 }, + { 0x5b880f5a, -0x0c2c560d, -0x24fc183e, -0x1213faf4, -0x060f4e5e, -0x576967e1, -0x53a1cb5c, 0x49a4ae2b }, + { 0x04a740e0, 0x28bb12ee, -0x64317e8c, 0x14313bbd, -0x173ef3c0, 0x72f5b5e4, 0x36adcd5b, 0x7cbfb199 } + }, + { + { -0x33c91920, -0x7186c586, 0x7d586eed, -0x0605485d, -0x451e0b1c, 0x3a4f9692, -0x00a0bb82, 0x1c14b03e }, + { 0x6b89792d, -0x5cee223e, -0x25aed99c, 0x1b30b4c6, -0x30eaf7a7, 0x0ca77b4c, 0x1b009408, 0x1de443df }, + { 0x14a85291, 0x19647bd1, 0x1034d3af, 0x57b76cb2, 0x0f9d6dfa, 0x6329db44, 0x6a571493, 0x5ef43e58 } + }, +}, +{ + { + { -0x37f3e540, -0x59923363, 0x1b38a436, -0x685fa30c, -0x6a24283a, -0x58140c42, -0x72818255, 0x7da0b8f6 }, + { 0x385675a6, -0x1087dfec, -0x55025618, -0x5d9b60d0, 0x5cdfa8cb, 0x4cd1eb50, 0x1d4dc0b3, 0x46115aba }, + { -0x3c4a258a, -0x2bf0e6ad, 0x21119e9b, 0x1dac6f73, -0x014da6a0, 0x03cc6021, -0x7c98b4b5, 0x5a5f887e } + }, + { + { -0x5f59bc47, -0x6169d72d, -0x193cdf9c, -0x4a3c3500, 0x7c2dec32, -0x64acfd77, -0x2a2e38f4, 0x43e37ae2 }, + { 0x70a13d11, -0x709cfe31, 0x350dd0c4, -0x303147eb, -0x5b435b82, -0x08fd682c, -0x1bb2ebcc, 0x3669b656 }, + { -0x12591ecd, 0x387e3f06, -0x665ec540, 0x67301d51, 0x36263811, -0x42a52708, 0x4fd5e9be, 0x6a21e6cd } + }, + { + { 0x6699b2e3, -0x10bed6ee, 0x708d1301, 0x71d30847, 0x1182b0bd, 0x325432d0, 0x001e8b36, 0x45371b07 }, + { 0x3046e65f, -0x0e39e8f6, 0x00d23524, 0x58712a2a, -0x737d48ab, 0x69dbbd3c, -0x5e6a00a9, 0x586bf9f1 }, + { 0x5ef8790b, -0x5924f773, 0x610937e5, 0x5278f0dc, 0x61a16eb8, -0x53fcb62e, -0x6f1ade87, 0x0eafb037 } + }, + { + { 0x0f75ae1d, 0x5140805e, 0x2662cc30, -0x13fd041d, -0x156dc693, 0x2cebdf1e, -0x3abca44d, 0x44ae3344 }, + { 0x3748042f, -0x69faaa3f, -0x7df455ef, 0x219a41e6, 0x73486d0c, 0x1c81f738, 0x5a02c661, 0x309acc67 }, + { -0x445abc12, -0x630d7647, 0x5ac97142, -0x0c89f163, 0x4f9360aa, 0x1d82e5c6, 0x7f94678f, 0x62d5221b } + }, + { + { 0x3af77a3c, 0x7585d426, -0x0116ebb3, -0x205184ef, 0x59f7193d, -0x5af98f80, -0x7c6ddfc9, 0x14f29a53 }, + { 0x18d0936d, 0x524c299c, -0x75f3e5f4, -0x37944a94, -0x24b579cf, -0x5c8afad2, -0x438aba9e, 0x5c0efde4 }, + { 0x25b2d7f5, -0x208e8124, -0x664acfc0, 0x21f970db, -0x3c12b39e, -0x256dcb49, 0x7bee093e, 0x5e72365c } + }, + { + { 0x2f08b33e, 0x7d933906, -0x2060cd42, 0x5b9659e5, 0x1f9ebdfd, -0x5300c253, -0x348cb649, 0x70b20555 }, + { 0x4571217f, 0x575bfc07, 0x0694d95b, 0x3779675d, -0x0be6e1cd, -0x65f5c845, 0x47b4eabc, 0x77f1104c }, + { 0x55112c4c, -0x41aeec3b, -0x6577e033, 0x6688423a, 0x5e503b47, 0x44667785, 0x4a06404a, 0x0e34398f } + }, + { + { 0x3e4b1928, 0x18930b09, 0x73f3f640, 0x7de3e10e, 0x73395d6f, -0x0bcde826, -0x35c863c2, 0x6f8aded6 }, + { 0x3ecebde8, -0x4982dd27, 0x27822f07, 0x09b3e841, -0x4fa49273, 0x743fa61f, -0x75c9dc8e, 0x5e540536 }, + { -0x02484d66, -0x1cbfedc3, -0x5de54d6f, 0x487b97e1, -0x02196b62, -0x066982fe, -0x372c2169, 0x780de72e } + }, + { + { 0x00f42772, 0x671feaf3, 0x2a8c41aa, -0x708d14d6, -0x68c8cd6e, 0x29a17fd7, 0x32b587a6, 0x1defc6ad }, + { 0x089ae7bc, 0x0ae28545, 0x1c7f4d06, 0x388ddecf, 0x0a4811b8, 0x38ac1551, 0x71928ce4, 0x0eb28bf6 }, + { -0x10ae6a59, -0x50a441e6, -0x6e84ea13, 0x148c1277, 0x7ae5da2e, 0x2991f7fb, -0x0722d799, 0x467d201b } + }, +}, +{ + { + { 0x296bc318, 0x745f9d56, -0x27ead19b, -0x66ca7f2c, 0x5839e9ce, -0x4f1a4ec1, -0x2bc6de40, 0x51fc2b28 }, + { -0x0842d195, 0x7906ee72, 0x109abf4e, 0x05d270d6, -0x46be575c, -0x72a301bb, 0x1c974287, 0x44c21867 }, + { -0x6a1d5674, 0x1b8fd117, 0x2b6b6291, 0x1c4e5ee1, 0x7424b572, 0x5b30e710, 0x4c4f4ac6, 0x6e6b9de8 } + }, + { + { -0x07f34f78, 0x6b7c5f10, 0x56e42151, 0x736b54dc, -0x3910663c, -0x3d49df5b, -0x3c5f90be, 0x5f4c802c }, + { 0x4b1de151, -0x200da032, -0x1ee3bfdb, -0x27be3f39, 0x54749c87, 0x2554b3c8, -0x6f71f207, 0x2d292459 }, + { 0x7d0752da, -0x649a370f, -0x38811800, -0x77e31cc8, 0x5b62f9e3, -0x3c4aeb10, -0x413ef2b8, 0x66ed5dd5 } + }, + { + { -0x3435fb83, -0x0f520c37, -0x0baad095, -0x7e3c4d35, 0x44735f93, -0x3025eed3, 0x7e20048c, 0x1f23a0c7 }, + { 0x0bb2089d, 0x7d38a1c2, -0x69332bee, -0x7f7ccb1f, 0x6c97d313, -0x3b58f474, 0x03007f20, 0x2eacf8bc }, + { -0x1a43ea90, -0x0dcab985, 0x0dbab38c, 0x03d2d902, -0x03061f62, 0x27529aa2, -0x62cb43b0, 0x0840bef2 } + }, + { + { 0x7f37e4eb, -0x32ab1f95, -0x0a169336, -0x733ea079, -0x2ca68232, -0x47db7450, 0x6074400c, 0x246affa0 }, + { -0x23ef4d79, 0x796dfb35, 0x5c7ff29d, 0x27176bcd, -0x384db6fb, 0x7f3d43e8, -0x6e3abd8a, 0x0304f5a1 }, + { -0x041bacdf, 0x37d88e68, -0x3f28afce, -0x79f68ab8, -0x76b5f2cb, 0x4e9b13ef, 0x5753d325, 0x25a83cac } + }, + { + { 0x3952b6e2, -0x60f099d7, 0x0934267b, 0x33db5e0e, -0x29f60124, -0x00badad5, -0x3af91f37, 0x06be10f5 }, + { -0x1127e9a2, 0x10222f48, 0x4b8bcf3a, 0x623fc123, -0x3dde1710, 0x1e145c09, -0x3587d9d0, 0x7ccfa59f }, + { -0x49d5cba1, 0x1a9615a9, 0x4a52fecc, 0x22050c56, 0x28bc0dfe, -0x585d877b, 0x1a1ee71d, 0x5e82770a } + }, + { + { 0x42339c74, -0x17fd17f6, -0x5800051b, 0x34175166, 0x1c408cae, 0x34865d1f, 0x605bc5ee, 0x2cca982c }, + { -0x527695a4, 0x35425183, -0x1872ad0a, -0x1798c505, -0x6d5ca09c, 0x2c66f25f, 0x3b86b102, 0x09d04f3b }, + { 0x197dbe6e, -0x02d2a2cb, -0x741b005d, 0x207c2eea, 0x325ae918, 0x2613d8db, 0x27741d3e, 0x7a325d17 } + }, + { + { 0x7e2a076a, -0x132d82ff, 0x1636495e, -0x28779761, -0x6e6dcc1b, 0x52a61af0, 0x7bb1ae64, 0x2a479df1 }, + { -0x2e92021e, -0x2fc94645, -0x3b6857d7, -0x5dfaa8a9, -0x580ed999, -0x7193369a, 0x1239c180, 0x4d3b1a79 }, + { 0x33db2710, -0x61a11172, -0x293bc35b, 0x189854de, -0x6d8e7ec8, -0x5be3dd3b, -0x5bc5a165, 0x27ad5538 } + }, + { + { -0x71b8f884, -0x34a5829d, 0x20a1c059, -0x7248ac9f, -0x74120234, 0x549e1e4d, 0x503b179d, 0x080153b7 }, + { 0x15350d61, 0x2746dd4b, -0x116ade49, -0x2fc03438, 0x138672ca, -0x1791c9a6, 0x7e7d89e2, 0x510e987f }, + { 0x0a3ed3e3, -0x2259626d, -0x329f58de, 0x3d386ef1, -0x4255b11a, -0x37e852a8, 0x4fe7372a, 0x23be8d55 } + }, +}, +{ + { + { 0x567ae7a9, -0x43e10b43, -0x29bb6743, 0x3f624cb2, 0x2c1f4ec8, -0x1bef9b2e, -0x45c7bfff, 0x2ef9c5a5 }, + { 0x74ef4fad, -0x6a016e66, -0x095cf75e, 0x3a827bec, 0x09a47b01, -0x69b1fe2d, 0x5ba3c797, 0x71c43c4f }, + { -0x05618b33, -0x4902920a, -0x1b50d986, -0x0e7d8744, -0x0e1066f2, -0x7daa4c30, -0x6f3a0d6d, 0x5a758ca3 } + }, + { + { 0x1d61dc94, -0x731f6e75, -0x657ecf9a, -0x7212c9ba, -0x5017552d, -0x2b1957d7, -0x09c62bc1, 0x0a738027 }, + { -0x26b9db6b, -0x5d48d8f0, -0x2a82affd, 0x3aa8c6d2, -0x5f4b7836, -0x1c2bff41, -0x4c148d14, 0x2dbae244 }, + { 0x57ffe1cc, -0x67f0b5d1, -0x1e7c67bd, 0x00670d0d, 0x49fb15fd, 0x105c3f4a, 0x5126a69c, 0x2698ca63 } + }, + { + { 0x5e3dd90e, 0x2e3d702f, -0x1b2dac7a, -0x61c0f6e8, 0x024da96a, 0x5e773ef6, 0x4afa3332, 0x3c004b0c }, + { 0x32b0ba78, -0x189ace78, -0x6da30075, 0x381831f7, -0x5fd6e034, 0x08a81b91, 0x49caeb07, 0x1fb43dcc }, + { 0x06f4b82b, -0x6556b954, -0x57f93b0d, 0x1ca284a5, -0x3932b879, 0x3ed3265f, -0x32e02de9, 0x6b43fd01 } + }, + { + { 0x3e760ef3, -0x4a38bda8, -0x11f54670, 0x75dc52b9, 0x072b923f, -0x40ebd83e, 0x6ff0d9f0, 0x73420b2d }, + { 0x4697c544, -0x3858a2b5, -0x20f00041, 0x15fdf848, -0x55b987a6, 0x2868b9eb, 0x5b52f714, 0x5a68d710 }, + { -0x617ae1fa, -0x50d30935, -0x39ddc73c, -0x70a6c6ed, -0x66040c8d, -0x2575476a, -0x15cb4362, 0x3db5632f } + }, + { + { -0x7d67da2b, 0x2e4990b1, 0x3e9a8991, -0x12151479, 0x4c704af8, -0x110fc2c7, -0x6a20d4f2, 0x59197ea4 }, + { -0x08a22628, -0x0b9111d5, 0x396759a5, 0x0d17b1f6, 0x499e7273, 0x1bf2d131, 0x49d75f13, 0x04321adf }, + { -0x1b1aa552, 0x04e16019, 0x7e2f92e9, -0x1884bc86, 0x6f159aa4, -0x3831d23f, -0x0b28f340, 0x45eafdc1 } + }, + { + { -0x30334e13, -0x49f1b9dc, -0x42a3fc6b, 0x59dbc292, -0x23fb7e37, 0x31a09d1d, 0x5d56d940, 0x3f73ceea }, + { -0x7fba28d5, 0x69840185, -0x30d0f9af, 0x4c22faa2, 0x6b222dc6, -0x6be5c99b, 0x0362dade, 0x5a5eebc8 }, + { 0x0a4e8dc6, -0x4858402f, 0x44c9b339, -0x41a8ff82, 0x1557aefa, 0x60c1207f, 0x266218db, 0x26058891 } + }, + { + { -0x39891abe, 0x4c818e3c, 0x03ceccad, 0x5e422c93, -0x4bed60f8, -0x13f83336, -0x4dbbbc48, 0x0dedfa10 }, + { -0x7c9f00fc, 0x59f704a6, 0x7661e6f4, -0x3c26c022, 0x12873551, -0x7ce4d58d, 0x4e615d57, 0x54ad0c2e }, + { -0x47d4add6, -0x11c4982b, -0x605a3e15, 0x36f16346, 0x6ec19fd3, -0x5a4b2d0e, -0x58856bf8, 0x62ecb2ba } + }, + { + { -0x5049d78c, -0x6df8d7ca, 0x79e104a5, 0x5fcd5e85, -0x39cf5eb6, 0x5aad01ad, 0x75663f98, 0x61913d50 }, + { 0x61152b3d, -0x1a1286ae, 0x0eddd7d1, 0x4962357d, -0x4694b38f, 0x7482c8d0, -0x56992742, 0x2e59f919 }, + { 0x1a3231da, 0x0dc62d36, -0x6bdffd90, -0x05b8a7ce, 0x3f9594ce, 0x02d80151, 0x31c05d5c, 0x3ddbc2a1 } + }, +}, +{ + { + { 0x004a35d1, -0x048ca53e, 0x3a6607c3, 0x31de0f43, -0x3ad72a67, 0x7b8591bf, -0x0a44faf4, 0x55be9a25 }, + { 0x4ffb81ef, 0x3f50a50a, 0x3bf420bf, -0x4e1fcaf7, -0x3955d330, -0x645571e4, -0x05dc85c0, 0x32239861 }, + { 0x33db3dbf, 0x0d005acd, -0x7f53ca1e, 0x0111b37c, 0x6f88ebeb, 0x4892d66c, 0x6508fbcd, 0x770eadb1 } + }, + { + { -0x5faf8e47, -0x0e2c497f, 0x3592ff3a, 0x2207659a, 0x7881e40e, 0x5f016929, -0x7945c8b2, 0x16bedd0e }, + { 0x5e4e89dd, -0x7bae0620, -0x4386c6c9, -0x3f9cfd01, 0x56a6495c, 0x5d227495, -0x5fa9fc05, 0x09a6755c }, + { 0x2c2737b5, 0x5ecccc4f, 0x2dccb703, 0x43b79e0c, 0x4ec43df3, 0x33e008bc, -0x0f8a9940, 0x06c1b840 } + }, + { + { -0x64fd7fa4, 0x69ee9e7f, 0x547d1640, -0x34007d76, -0x4dbcf698, 0x3d93a869, 0x3fe26972, 0x46b7b8cd }, + { -0x5c770789, 0x7688a5c6, -0x214d4954, 0x02a96c14, 0x1b8c2af8, 0x64c9f343, 0x54a1eed6, 0x36284355 }, + { -0x01811420, -0x167edf7a, 0x2f515437, 0x4cba6be7, 0x516efae9, 0x1d04168b, 0x43982cb9, 0x5ea13910 } + }, + { + { -0x2a2c4ffe, 0x6f2b3be4, 0x6a09c880, -0x5013cc27, -0x57433b34, 0x035f73a4, 0x4662198b, 0x22c5b928 }, + { -0x0b8fd11f, 0x49125c9c, -0x74da4cd3, 0x4520b71f, 0x501fef7e, 0x33193026, -0x372d14d5, 0x656d8997 }, + { 0x433d8939, -0x34a73702, 0x6a8d7e50, -0x765f34d2, 0x09fbbe5a, 0x79ca9553, -0x32803efa, 0x0c626616 } + }, + { + { -0x040bab4f, -0x70203c87, -0x0e5b488f, 0x45a5a970, -0x452ca6eb, -0x536de109, -0x57e3de6e, 0x42d088dc }, + { 0x4879b61f, 0x1ffeb80a, 0x4ada21ed, 0x6396726e, 0x368025ba, 0x33c7b093, -0x0c3ce878, 0x471aa0c6 }, + { -0x5fe9ae67, -0x7025f0c9, -0x375f1cbd, 0x0adadb77, -0x378a17e0, 0x20fbfdfc, 0x0c2206e7, 0x1cf2bea8 } + }, + { + { 0x02c0412f, -0x67d291e6, -0x24a71702, -0x6f05b37d, -0x234e7440, 0x01c2f5bc, 0x216abc66, 0x686e0c90 }, + { -0x4c9dfd54, -0x3d220e22, -0x2d1d855b, -0x6d5a01f7, -0x03f60e2d, 0x7d1648f6, 0x13bc4959, 0x74c2cc05 }, + { -0x5abc6a59, 0x1fadbadb, -0x51f25996, -0x4be5fd60, -0x445c83f9, -0x40e60a68, -0x21b7bcf3, 0x6a12b8ac } + }, + { + { 0x1aaeeb5f, 0x793bdd80, -0x3eae778f, 0x00a2a0aa, 0x1f2136b4, -0x175c8c5d, -0x036e10e7, 0x48aab888 }, + { 0x39d495d9, -0x072515e1, 0x525f1dfc, 0x592c190e, -0x3666e2e5, -0x247342fc, -0x2770f349, 0x11f7fda3 }, + { 0x5830f40e, 0x041f7e92, 0x79661c06, 0x002d6ca9, 0x2b046a2e, -0x79236007, -0x74fb6c2f, 0x76036092 } + }, + { + { 0x695a0b05, -0x4bcef71b, -0x52c85c75, 0x6cb00ee8, -0x5cac8c7f, 0x5edad6ee, -0x4923cddc, 0x3f2602d4 }, + { 0x120cf9c6, 0x21bb41c6, -0x21325a65, -0x154d55ee, 0x0aa48b34, -0x3e58d2fe, -0x1782c498, 0x215d4d27 }, + { 0x5bcaf19c, -0x374db84a, -0x4e4d39ae, 0x49779dc3, -0x2a131d1e, -0x765e7f45, -0x31371fc7, 0x13f098a3 } + }, +}, +{ + { + { 0x2796bb14, -0x0c55a85e, -0x64f825df, -0x77c54549, 0x31a0391c, -0x1ab41de8, -0x27cdfa07, 0x5ee7fb38 }, + { -0x31a13ab5, -0x6523f007, -0x73d0ecf3, 0x039c2a6b, -0x0f076aeb, 0x028007c7, -0x53fb4c95, 0x78968314 }, + { 0x41446a8e, 0x538dfdcb, 0x434937f9, -0x5a530257, 0x263c8c78, 0x46af908d, -0x6435f2f7, 0x61d0633c } + }, + { + { -0x07038c21, -0x525cd744, -0x590fc804, -0x117b96a3, 0x38c2a909, 0x637fb4db, -0x07f98424, 0x5b23ac2d }, + { -0x0024da9a, 0x63744935, 0x780b68bb, -0x3a429477, 0x553eec03, 0x6f1b3280, 0x47aed7f5, 0x6e965fd8 }, + { -0x117fad85, -0x652d46ad, -0x05219273, -0x1770e656, 0x150e82cf, 0x0e711704, -0x226a2124, 0x79b9bbb9 } + }, + { + { -0x71608c8c, -0x2e668252, -0x3044f7ea, -0x5fcd5d08, 0x6d445f0a, -0x329345ee, 0x0accb834, 0x1ba81146 }, + { 0x6a3126c2, -0x144caac0, 0x68c8c393, -0x2d9c7c58, -0x1a46857e, 0x6c0c6429, -0x3602deb9, 0x5065f158 }, + { 0x0c429954, 0x708169fb, -0x28913099, -0x1eb9ff54, 0x70e645ba, 0x2eaab98a, 0x58a4faf2, 0x3981f39e } + }, + { + { 0x6de66fde, -0x37ba205b, 0x2c40483a, -0x1ead5b00, -0x384b09ce, -0x162d1e9d, -0x2343e49b, 0x30f4452e }, + { 0x59230a93, 0x18fb8a75, 0x60e6f45d, 0x1d168f69, 0x14a93cb5, 0x3a85a945, 0x05acd0fd, 0x38dc0837 }, + { -0x3a8a68c0, -0x7a92d87e, -0x06634134, -0x05ecba97, -0x3f15b18f, -0x77bb038d, 0x593f2469, 0x632d9a1a } + }, + { + { -0x12f37b59, -0x40f602ef, 0x0d9f693a, 0x63f07181, 0x57cf8779, 0x21908c2d, -0x7509b45e, 0x3a5a7df2 }, + { -0x47f8345a, -0x094494eb, -0x43ab0f29, 0x1823c7df, 0x6e29670b, -0x44e268fd, 0x47ed4a57, 0x0b24f488 }, + { 0x511beac7, -0x23252b42, -0x12d9330e, -0x5bac7f8b, 0x005f9a65, -0x1e630061, 0x75481f63, 0x34fcf744 } + }, + { + { 0x78cfaa98, -0x5a44e255, 0x190b72f2, 0x5ceda267, 0x0a92608e, -0x6cf636ef, 0x2fb374b0, 0x0119a304 }, + { 0x789767ca, -0x3e681fb4, 0x38d9467d, -0x478eb235, -0x7c06a058, 0x55de8882, 0x4dfa63f7, 0x3d3bdc16 }, + { -0x173de883, 0x67a2d89c, 0x6895d0c1, 0x669da5f6, -0x4d7d5d50, -0x0a9a671b, -0x121df58d, 0x56c088f1 } + }, + { + { 0x24f38f02, 0x581b5fac, -0x451cf343, -0x56f41602, -0x75306d10, -0x65de96fe, -0x7ca6fc71, 0x038b7ea4 }, + { 0x10a86e17, 0x336d3d11, 0x0b75b2fa, -0x280c77ce, 0x25072988, -0x06eacc8a, -0x66ef7479, 0x09674c6b }, + { -0x66ce9008, -0x60b107df, -0x155872b1, 0x2f49d282, 0x5aef3174, 0x0971a5ab, 0x5969eb65, 0x6e5e3102 } + }, + { + { 0x63066222, 0x3304fb0e, -0x785345c1, -0x04caf977, -0x73ef9e5d, -0x42e6db89, -0x2e7c79e0, 0x3058ad43 }, + { -0x781a6c05, -0x4e939d0b, -0x35a2c18f, 0x4999edde, 0x14cc3e6d, -0x4b6e3e20, -0x76572458, 0x08f51147 }, + { -0x1a899c30, 0x323c0ffd, -0x5dd159f0, 0x05c3df38, -0x5366b066, -0x42387543, -0x101c2367, 0x26549fa4 } + }, +}, +{ + { + { -0x08ac6947, 0x04dbbc17, -0x2d0798ba, 0x69e6a2d7, -0x0ac1543a, -0x39bf6267, 0x332e25d2, 0x606175f6 }, + { -0x78317077, 0x738b38d7, 0x4179a88d, -0x49d9a71e, -0x0eaece93, 0x30738c9c, 0x727275c9, 0x49128c7f }, + { -0x0abf1823, 0x4021370e, -0x5e0e2f5b, 0x0910d6f5, 0x5b06b807, 0x4634aacd, 0x6944f235, 0x6a39e635 } + }, + { + { 0x74049e9d, 0x1da19657, -0x6701cad5, -0x0432915f, -0x33adc95a, -0x4e3432b0, 0x3f9846e2, 0x1f5ec83d }, + { -0x206f0c19, -0x6932a9c0, -0x2405da16, 0x6c3a760e, 0x59e33cc4, 0x24f3ef09, 0x530d2e58, 0x42889e7e }, + { 0x328ccb75, -0x7104dc3d, -0x22789117, -0x50bd5df9, 0x5dfae796, 0x20fbdadc, 0x06bf9f51, 0x241e246b } + }, + { + { 0x6280bbb8, 0x7eaafc9a, -0x0bfc27f7, 0x22a70f12, 0x1bfc8d20, 0x31ce40bb, -0x1742ac12, 0x2bc65635 }, + { -0x5291670a, 0x29e68e57, 0x0b462065, 0x4c9260c8, -0x5ae144b5, 0x3f00862e, -0x4c726f69, 0x5bc2c77f }, + { -0x5694526d, -0x172a2361, -0x21e6b824, -0x1a704e83, 0x65185fa3, 0x681532ea, 0x034a7830, 0x1fdd6c3b } + }, + { + { 0x2dd8f7a9, -0x63ec595b, 0x3efdcabf, 0x2dbb1f8c, 0x5e08f7b5, -0x69e1cdc0, -0x4419361b, 0x48c8a121 }, + { 0x55dc18fe, 0x0a64e28c, 0x3399ebdd, -0x1c206167, 0x70e2e652, 0x79ac4323, 0x3ae4cc0e, 0x35ff7fc3 }, + { 0x59646445, -0x03bea584, -0x3ed749eb, -0x2ddb4d29, 0x05fbb912, 0x6035c9c9, 0x74429fab, 0x42d7a912 } + }, + { + { -0x6cc25a44, -0x565b76b9, -0x3d168614, 0x4a58920e, 0x13e5ac4c, -0x69278000, 0x4b48b147, 0x453692d7 }, + { -0x1508d12d, 0x4e6213e3, 0x43acd4e7, 0x6794981a, 0x6eb508cb, -0x00ab8322, 0x10fcb532, 0x6fed19dd }, + { -0x57aa6391, -0x2288a267, -0x20ffc1dc, -0x0bd5dec0, -0x256d759a, 0x5223e229, 0x6d38f22c, 0x063f46ba } + }, + { + { 0x37346921, 0x39843cb7, 0x38c89447, -0x58b804f9, -0x5dbacf82, -0x34727fcf, 0x6d82f068, 0x67810f8e }, + { 0x5f536694, -0x2d2dbd77, 0x42939b2c, -0x35cc5d3b, -0x382246a4, -0x6790525a, 0x2f712d5d, 0x5a152c04 }, + { -0x2dd7824c, 0x3eeb8fbc, 0x01a03e93, 0x72c7d3a3, -0x4267d9a6, 0x5473e88c, 0x5921b403, 0x7324aa51 } + }, + { + { -0x17dcab35, -0x52dc0926, -0x49a8e593, 0x6962502a, -0x1c71c82f, -0x649ae9ca, -0x2e5cced1, 0x5cac5005 }, + { 0x6c3cbe8e, -0x7a86bd0c, 0x4730c046, -0x5e2c9b4f, -0x2dc3be41, 0x1c8ed914, -0x11092a2e, 0x0838e161 }, + { -0x161c66fc, -0x733eab34, -0x7b2197ba, 0x5b3a040b, -0x4e41a292, -0x3b2759e4, -0x2779e0fe, 0x40fb897b } + }, + { + { 0x5ab10761, -0x1a8127b9, 0x6fd13746, 0x71435e20, -0x32fda9ce, 0x342f824e, -0x5786e185, 0x4b16281e }, + { 0x62de37a1, -0x7b3a5570, 0x0d1d96e1, 0x421da500, 0x6a9242d9, 0x78828630, 0x690d10da, 0x3c5e464a }, + { 0x0b813381, -0x2e3efe2b, 0x76ee6828, -0x2119f0ef, 0x383f6409, 0x0cb68893, -0x0900b7b6, 0x6183c565 } + }, +}, +{ + { + { -0x50c09992, -0x24b97ab7, -0x0eb5f15b, -0x288030fc, -0x5b45f3b9, 0x3df23ff7, 0x32ce3c85, 0x3a10dfe1 }, + { 0x1e6bf9d6, 0x741d5a46, 0x7777a581, 0x2305b3fc, 0x6474d3d9, -0x2baa8b5e, 0x6401e0ff, 0x1926e1dc }, + { -0x15e83160, -0x1f80b176, 0x3a1fc1fd, 0x2fd51546, 0x31f2c0f1, 0x175322fd, -0x79e1a2eb, 0x1fa1d01d } + }, + { + { -0x2e206b55, 0x38dcac00, -0x2ef7f217, 0x2e712bdd, -0x022a1d9e, 0x7f13e93e, -0x1165fe1b, 0x73fced18 }, + { 0x7d599832, -0x337faa6c, 0x37f15520, 0x1e4656da, 0x4e059320, -0x6609088c, 0x6a75cf33, 0x773563bc }, + { 0x63139cb3, 0x06b1e908, -0x3a5fc133, -0x5b6c2599, -0x529c76ce, -0x72883138, 0x1b864f44, 0x1f426b70 } + }, + { + { -0x6e5edaae, -0x0e81ca38, 0x575e9c76, -0x48947ead, 0x0d9b723e, -0x057cbf91, 0x3fa7e438, 0x0b76bb1b }, + { 0x41911c01, -0x1036d9b4, 0x17a22c25, -0x0e5c4848, -0x0cf0ebb9, 0x5875da6b, 0x1d31b090, 0x4e1af527 }, + { 0x7f92939b, 0x08b8c1f9, -0x2bbb5492, -0x41988e35, -0x66447fe9, 0x22e56463, -0x488d56ab, 0x7b6dd61e } + }, + { + { -0x54fe2d39, 0x5730abf9, 0x40143b18, 0x16fb76dc, -0x5f344d7f, -0x7993419b, -0x64009502, 0x53fa9b65 }, + { 0x50f33d92, -0x48523e18, 0x608cd5cf, 0x7998fa4f, -0x7203a425, -0x5269d243, -0x50e2d0b1, 0x703e9bce }, + { -0x6b77abab, 0x6c14c8e9, 0x65aed4e5, -0x7bc5a29a, -0x4329a50f, 0x181bb73e, -0x3b39e0b0, 0x398d93e5 } + }, + { + { -0x2d181c0e, -0x3c7883a0, 0x30828bb1, 0x3b34aaa0, 0x739ef138, 0x283e26e7, 0x02c30577, 0x699c9c90 }, + { 0x33e248f3, 0x1c4bd167, 0x15bf0a5f, -0x4261ed79, -0x5ef4fc8a, -0x2bc07310, -0x20e6e4ed, 0x53b09b5d }, + { 0x5946f1cc, -0x0cf958dd, -0x331a2683, -0x6de8e74b, -0x7e4b168b, 0x28cdd247, 0x6fcdd907, 0x51caf30c } + }, + { + { 0x18ac54c7, 0x737af99a, -0x3ae34cf1, -0x6fcc8724, 0x4ce10cc7, 0x2b89bc33, -0x76071666, 0x12ae29c1 }, + { 0x7674e00a, -0x59f458be, -0x5e85840d, 0x630e8570, -0x30ccdb34, 0x3758563d, 0x2383fdaa, 0x5504aa29 }, + { 0x1f0d01cf, -0x56613f35, 0x3a34f7ae, 0x0dd1efcc, -0x2f63b1de, 0x55ca7521, 0x58eba5ea, 0x5fd14fe9 } + }, + { + { -0x406c3472, 0x3c42fe5e, 0x36d4565f, -0x412057af, -0x77bddf18, -0x1f0f7a62, 0x0725d128, 0x7dd73f96 }, + { 0x2845ab2c, -0x4a23d221, 0x0a7fe993, 0x069491b1, 0x4002e346, 0x4daaf3d6, 0x586474d1, 0x093ff26e }, + { 0x68059829, -0x4ef2db02, -0x2450dc1b, 0x75730672, -0x4ba853d7, 0x1367253a, -0x794b8f5c, 0x2f59bcbc } + }, + { + { -0x496e3cff, 0x7041d560, -0x522818e2, -0x7adfe4c1, 0x11335585, 0x16c2e163, 0x010828b1, 0x2aa55e3d }, + { -0x66e8eca1, -0x7c7b82be, 0x567d03d7, -0x52e46ee1, -0x4188552f, 0x7e7748d9, 0x2e51af4a, 0x5458b42e }, + { 0x0c07444f, -0x12ae6d1a, 0x74421d10, 0x42c54e2d, -0x024a379c, 0x352b4c82, -0x7589799c, 0x13e9004a } + }, +}, +{ + { + { -0x7f94b984, 0x1e6284c5, -0x18a29f85, -0x3a096685, -0x4c872d9e, -0x749826a8, -0x7e327490, 0x3d88d66a }, + { 0x6c032bff, -0x344a4aab, 0x29297a3a, -0x208e6e49, -0x52127e45, -0x3e008cda, 0x68be03f5, 0x71ade8bb }, + { 0x204ed789, -0x7489856d, -0x605f51d6, 0x762fcacb, 0x6dce4887, 0x771febcc, -0x700fa04d, 0x34306215 } + }, + { + { 0x2a7b31b4, -0x031de6f9, -0x55a87fea, 0x4d7adc75, -0x78b86cdc, 0x0ec276a6, 0x1fda4beb, 0x6d6d9d5d }, + { -0x1e0a40b7, -0x1fa25e59, -0x2b8c9f6e, 0x26457d6d, 0x73cc32f6, 0x77dcb077, -0x6322a033, 0x0a5d9496 }, + { -0x164f7e7d, 0x22b1a58a, -0x3ea3c775, -0x026a2f8f, -0x7af5fae9, -0x567edc8a, -0x4480cca2, 0x33384cba } + }, + { + { 0x26218b8d, 0x33bc627a, -0x3857f39f, -0x157f4de1, 0x173e9ee6, -0x6ba74ed5, 0x0e2f3059, 0x076247be }, + { 0x0ca2c7b5, 0x3c6fa268, 0x6fb64fda, 0x1b508204, 0x5431d6de, -0x14accb64, 0x6b879c89, 0x5278b38f }, + { 0x1416375a, 0x52e105f6, -0x7a54145c, -0x136850ca, 0x23a67c36, 0x26e6b506, -0x0c2b04ff, 0x5cf0e856 } + }, + { + { 0x3db342a8, -0x415131cf, -0x7bd24812, -0x345c9ca5, -0x7e80ec11, -0x177399e0, 0x4e76d5c6, 0x1b9438aa }, + { 0x1ae8cab4, -0x0936978d, -0x34b06d3b, 0x5e20741e, -0x733243c2, 0x2da53be5, 0x69970df7, 0x2dddfea2 }, + { 0x166f031a, -0x75af8882, 0x0fb7a328, 0x067b39f1, 0x010fbd76, 0x1925c9a6, -0x338bf6fb, 0x6df9b575 } + }, + { + { 0x48cade41, -0x13203ca5, -0x4dcd7d90, 0x6a88471f, 0x40a01b6a, 0x740a4a24, 0x003b5f29, 0x471e5796 }, + { 0x27f6bdcf, 0x42c11929, 0x403d61ca, -0x706e6e86, -0x7461e09f, -0x23e3a59a, 0x04ec0f8d, 0x15960478 }, + { -0x5312c854, -0x2569444d, -0x16df7316, 0x7a2423b5, 0x38aebae2, 0x24cc5c30, -0x23a251d1, 0x50c356af } + }, + { + { 0x1b31b964, -0x30126321, -0x735ae50d, -0x0b79567b, -0x1573e07c, 0x14897265, -0x6cd53400, 0x784a53dd }, + { 0x41c30318, 0x09dcbf43, -0x7ce7e232, -0x1145f9ef, -0x23e1d65f, -0x3e863f32, 0x073f35b0, 0x1dbf7b89 }, + { 0x14fc4920, 0x2d99f9df, -0x3bb6601b, 0x76ccb60c, -0x1a30fffd, -0x5becd345, 0x54f000ea, 0x3f93d823 } + }, + { + { 0x79e14978, -0x1553ed2f, -0x441400a2, -0x006dc00d, 0x0663ce27, 0x4af663e4, 0x11a5f5ff, 0x0fd381a8 }, + { -0x61fb317b, -0x7e7c1898, 0x04465341, 0x678fb71e, 0x6688edac, -0x526dfa71, 0x532b099a, 0x5da350d3 }, + { -0x5bc920ac, -0x0da95314, -0x51962918, 0x108b6168, 0x6b5d036c, 0x20d986cb, -0x011d50b0, 0x655957b9 } + }, + { + { -0x2ffd2f54, -0x423ebf65, -0x4a33265a, 0x66660245, -0x05217a14, -0x7dce823c, 0x6ad7df0d, 0x02fe934b }, + { -0x56fdfcf1, -0x51574f81, -0x0b9c2ebd, -0x07738996, 0x3c787a60, 0x15b08366, -0x7d985b58, 0x08eab114 }, + { -0x3048158c, -0x10a30f00, -0x5e34bd54, 0x22897633, -0x310d7a1e, -0x2b31f3ac, -0x75eb95ab, 0x30408c04 } + }, +}, +{ + { + { 0x193b877f, -0x44d1ff37, -0x1f23af95, -0x131c5770, 0x36de649f, -0x130c4840, -0x672161e6, 0x5f460408 }, + { -0x7cd03125, 0x739d8845, -0x5194079d, -0x05c72937, -0x48b00109, 0x32bc0dca, 0x14bce45e, 0x73937e88 }, + { 0x297bf48d, -0x46fc8eea, -0x2b0f97cc, -0x562ec4de, 0x4696bdc6, -0x1e68eaa9, -0x6e2a17cb, 0x2cf8a4e8 } + }, + { + { 0x17d06ba2, 0x2cb5487e, 0x3950196b, 0x24d2381c, -0x7a6875d0, -0x289a637f, -0x6e295b0a, 0x7a6f7f28 }, + { 0x07110f67, 0x6d93fd87, 0x7c38b549, -0x22b3f62d, -0x3d8c957a, 0x7cb16a4c, 0x58252a09, 0x2049bd6e }, + { 0x6a9aef49, 0x7d09fd8d, 0x5b3db90b, -0x0f119f42, 0x519ebfd4, 0x4c21b52c, -0x3aba6be3, 0x6011aadf } + }, + { + { 0x02cbf890, 0x63ded0c8, 0x0dff6aaa, -0x042f6736, -0x46491267, 0x624d0afd, 0x79340b1e, 0x69ce18b7 }, + { -0x306a07c4, 0x5f67926d, 0x71289071, 0x7c7e8561, -0x667085a5, -0x295e180d, 0x0b62f9e0, 0x6fc5cc1b }, + { -0x4d678635, -0x2e10aad8, -0x2b816f6e, -0x22e551c4, 0x189f2352, 0x127e0442, -0x1a8efe0f, 0x15596b3a } + }, + { + { 0x7e5124ca, 0x09ff3116, -0x2638ba21, 0x0be4158b, 0x7ef556e5, 0x292b7d22, -0x50492ec8, 0x3aa4e241 }, + { 0x3f9179a2, 0x462739d2, -0x68292231, -0x007cedcf, 0x53f2148a, 0x1307deb5, 0x7b5f4dda, 0x0d223768 }, + { 0x2a3305f5, 0x2cc138bf, -0x5d16d93d, 0x48583f8f, 0x5549d2eb, 0x083ab1a2, 0x4687a36c, 0x32fcaa6e } + }, + { + { 0x2787ccdf, 0x3207a473, -0x0dec1c08, 0x17e31908, -0x09f269b2, -0x2a4d1329, -0x3d9ff417, 0x746f6336 }, + { -0x3a82650b, 0x7bc56e8d, -0x620f420e, 0x3e0bd2ed, 0x22efe4a3, -0x553feb22, -0x014295a4, 0x4627e9ce }, + { -0x549368e4, 0x3f4af345, -0x66bc8ce1, -0x1d77148e, 0x0344186d, 0x33596a8a, 0x7ed66293, 0x7b491700 } + }, + { + { -0x22ac5d23, 0x54341b28, -0x20bd03c1, -0x55e86fa5, 0x4dd2f8f4, 0x0ff592d9, -0x1f732c83, 0x1d03620f }, + { -0x547b4f9c, 0x2d85fb5c, -0x760c43ec, 0x497810d2, 0x7b15ce0c, 0x476adc44, -0x07bb0285, 0x122ba376 }, + { -0x5d4b1aac, -0x3dfdcd33, 0x115d187f, -0x612f02be, 0x7dd479d9, 0x2eabb4be, 0x2b68ec4c, 0x02c70bf5 } + }, + { + { 0x458d72e1, -0x531acd41, 0x7cb73cb5, 0x5be768e0, -0x11744219, 0x56cf7d94, -0x014bc5fd, 0x6b0697e3 }, + { 0x5d0b2fbb, -0x5d7813b5, 0x074882ca, 0x415c5790, -0x3e2f7ea4, -0x1fbb59e2, 0x409ef5e0, 0x26334f0a }, + { -0x209d5c40, -0x49370fb6, 0x076da45d, 0x3ef000ef, 0x49f0d2a9, -0x636346a8, 0x441b2fae, 0x1cc37f43 } + }, + { + { -0x36315147, -0x2899a90f, 0x18e5656a, 0x1c5b15f8, -0x7bb3dccc, 0x26e72832, 0x2f196838, 0x3a346f77 }, + { 0x5cc7324f, 0x508f565a, -0x1af956de, -0x2f9e3b40, 0x5c45ac19, -0x04e75425, 0x0380314a, 0x6c6809c1 }, + { -0x1d259538, -0x2d2aaeee, -0x4e17ae13, -0x1642fccf, -0x71398d9e, -0x69f8b923, 0x6ef7c5d0, 0x05911b9f } + }, +}, +{ + { + { -0x3a01606c, 0x01c18980, 0x716fd5c8, -0x329a9897, -0x2e6a5f7a, -0x7e9fba3d, 0x66cc7982, 0x6e2b7f32 }, + { -0x49c800d3, -0x162328aa, -0x36780f3c, -0x13b3cb71, -0x0c043849, -0x312a6d7b, -0x6c1e1579, 0x33053547 }, + { -0x083ca971, -0x337fdb98, 0x19974cb3, -0x6216457e, -0x4a47eca0, -0x5448dd64, 0x6fbeba62, 0x44e2017a } + }, + { + { -0x49359133, -0x7807d30d, 0x18f4a0c2, 0x580f893e, 0x2604e557, 0x05893007, 0x56d19c1d, 0x6cab6ac2 }, + { 0x54dab774, -0x3b3d58bd, 0x4eaf031a, -0x71a2b3c4, 0x42838f17, -0x4893dc2e, 0x68dce4ea, 0x749a098f }, + { 0x2cc1de60, -0x23201f60, 0x51c5575b, 0x032665ff, 0x073abeeb, 0x2c0c32f1, -0x328479fa, 0x6a882014 } + }, + { + { -0x50b01492, -0x2eee2e84, -0x4cc55b5d, 0x050bba42, -0x114b93d0, 0x17514c3c, 0x1bc27d75, 0x54bedb8b }, + { -0x5b8b804b, -0x5ad56d02, 0x1fa5ab89, -0x23ed5bb7, -0x47b85b32, -0x27d256b5, -0x6aed33b2, 0x4d77edce }, + { 0x77e2189c, 0x77c8e145, -0x00663bbb, -0x5c1b9096, 0x6d335343, 0x3144dfc8, 0x7c4216a9, 0x3a96559e } + }, + { + { -0x7f4555ae, 0x44938968, -0x0d7a6bf2, 0x4c98afc4, -0x5babb74a, -0x10b55865, -0x5a855181, 0x5278c510 }, + { -0x0bd52d12, 0x12550d37, -0x675e040b, -0x74871ffc, 0x33894cb2, 0x5d530782, 0x3e498d0c, 0x02c84e4e }, + { 0x294c0b94, -0x5ab22f8c, -0x20e7004a, -0x0aa2b948, -0x72517c9a, -0x0f90133b, -0x7e6f2e9b, 0x58865766 } + }, + { + { 0x3de25cc3, -0x40a7cb10, -0x297eab6a, -0x47783752, -0x6b7e176e, 0x5105221a, -0x088dc06d, 0x6760ed19 }, + { 0x1aef7117, -0x2b88edcf, 0x229e92c7, 0x50343101, -0x62ea6469, 0x7a95e184, -0x74a2d637, 0x2449959b }, + { -0x53ca1ea0, 0x669ba3b7, -0x457bdfaa, 0x2eccf73f, -0x3f7fb0f9, 0x1aec1f17, 0x1856f4e7, 0x0d96bc03 } + }, + { + { -0x338afa1f, -0x4e2acb50, 0x16c35288, 0x32cd0034, 0x0762c29d, -0x34c95a80, 0x237a0bf8, 0x5bfe69b9 }, + { 0x75c52d82, 0x3318be77, 0x54d0aab9, 0x4cb764b5, -0x3388c26f, -0x5430c2d9, -0x7edcd776, 0x3bf4d184 }, + { 0x78a151ab, 0x183eab7e, -0x66f6c89d, -0x44166f37, 0x4ac7e335, -0x008e8292, 0x25f39f88, 0x4c5cddb3 } + }, + { + { -0x185606fe, 0x57750967, 0x4f5b467e, 0x2c37fdfc, 0x3177ba46, -0x4d9e99c6, -0x23d2acd5, 0x3a375e78 }, + { 0x6190a6eb, -0x3f0948b3, 0x2db8f4e4, 0x20ea81a4, -0x68cea8a0, -0x57429083, 0x62ac7c21, 0x33b1d602 }, + { 0x2d4dddea, -0x7ebe18d1, 0x62c607c8, -0x19150168, 0x573cafd0, 0x23c28458, 0x4ff97346, 0x46b9476f } + }, + { + { 0x0d58359f, 0x1215505c, -0x03d73b95, 0x2a2013c7, -0x761599b2, 0x24a0a1af, -0x5eecf1e1, 0x4400b638 }, + { 0x4f901e5c, 0x0c1ffea4, 0x2184b782, 0x2b0b6fb7, 0x0114db88, -0x1a78006f, 0x4785a142, 0x37130f36 }, + { -0x6912e63d, 0x3a01b764, -0x12cd8dd0, 0x31e00ab0, -0x7c35ea4f, 0x520a8857, 0x5accbec7, 0x06aab987 } + }, +}, +{ + { + { 0x512eeaef, 0x5349acf3, 0x1cc1cb49, 0x20c141d3, -0x56659773, 0x24180c07, -0x39b4d2e9, 0x555ef9d1 }, + { -0x0a20f145, -0x3ecc667d, 0x512c4cac, -0x3f0c8a71, 0x0bb398e1, 0x2cf1130a, -0x55d8f39e, 0x6b3cecf9 }, + { 0x3b73bd08, 0x36a770ba, -0x5c5040f4, 0x624aef08, -0x4bf6b90e, 0x5737ff98, 0x3381749d, 0x675f4de1 } + }, + { + { 0x3bdab31d, -0x5ed00927, -0x629ad202, 0x0725d80f, -0x65416b79, 0x019c4ff3, -0x7d32c3bd, 0x60f450b8 }, + { 0x6b1782fc, 0x0e2c5203, 0x6cad83b4, 0x64816c81, 0x6964073e, -0x2f234227, 0x0164c520, 0x13d99df7 }, + { 0x21e5c0ca, 0x014b5ec3, -0x28e6405e, 0x4fcb69c9, 0x750023a0, 0x4e5f1c18, 0x55edac80, 0x1c06de9e } + }, + { + { -0x00929656, -0x002ad4c0, -0x23bfb645, 0x34530b18, -0x5cb26769, 0x5e4a5c2f, 0x7d32ba2d, 0x78096f8e }, + { -0x5cc13b1e, -0x66f0852a, -0x41d11f72, 0x6608f938, 0x63284515, -0x635ebc3b, -0x13d249f3, 0x4cf38a1f }, + { 0x0dfa5ce7, -0x5f55559b, 0x48b5478c, -0x063b61d6, 0x7003725b, 0x4f09cc7d, 0x26091abe, 0x373cad3a } + }, + { + { -0x76224453, -0x0e415705, 0x61aeaecb, 0x3bcb2cbc, 0x1f9b8d9d, -0x70a75845, 0x5112a686, 0x21547eda }, + { -0x7d360a84, -0x4d6b9cb3, 0x24934536, 0x1fcbfde1, 0x418cdb5a, -0x6163b24d, 0x454419fc, 0x0040f3d9 }, + { -0x02a6792d, -0x210216c7, 0x510a380c, -0x0bd8d377, -0x44cee647, -0x48d45bf9, 0x4a254df4, 0x63550a33 } + }, + { + { 0x72547b49, -0x6445a7bb, -0x1d3bf720, -0x0cfa3906, -0x38cb0e73, 0x60e8fa69, -0x55828986, 0x39a92baf }, + { -0x4a9630c9, 0x6507d6ed, 0x0ca52ee1, 0x178429b0, -0x149429a3, -0x1583ff70, -0x250870af, 0x3eea62c7 }, + { -0x196cd8b2, -0x62db38ed, 0x68dbd375, 0x5f638577, -0x14754c66, 0x70525560, 0x65c9c4cd, 0x68436a06 } + }, + { + { -0x17dfef84, 0x1e56d317, -0x7bf5169b, -0x3ad997bc, 0x320ffc7a, -0x3e1f5e3a, -0x6e9eeb8e, 0x5373669c }, + { 0x202f3f27, -0x43fdca18, 0x64f975b0, -0x38a3ff1e, -0x5c73dbea, -0x6e5b162b, -0x75487607, 0x17b6e7f6 }, + { -0x65f1ada9, 0x5d2814ab, -0x36354c04, -0x6f70df7c, 0x5b2d1eca, -0x50350a78, 0x78f87d11, 0x1cb4b5a6 } + }, + { + { -0x5d5ff819, 0x6b74aa62, -0x0f8e384f, -0x0cee1f50, 0x000be223, 0x5707e438, -0x7d109154, 0x2dc0fd2d }, + { 0x394afc6c, -0x499b3f95, -0x6725a04f, 0x0c88de24, 0x4bcad834, 0x4f8d0316, -0x218bcb5e, 0x330bca78 }, + { 0x1119744e, -0x67d1007c, 0x2b074724, -0x0696a16a, -0x4036ac05, -0x3a753eb1, 0x369f1cf5, 0x3c31be1b } + }, + { + { -0x0634bd8e, -0x3e97436d, -0x38312468, -0x51478ee1, 0x34ac8d7a, 0x7f0e52aa, 0x7e7d55bb, 0x41cec109 }, + { 0x08948aee, -0x4f0b79b3, -0x6e45e391, 0x07dc19ee, -0x59535ea8, 0x7975cdae, 0x4262d4bb, 0x330b6113 }, + { -0x5d927f76, -0x0869e629, 0x1d9e156d, -0x44e02b62, -0x245e20d9, 0x73d7c36c, 0x1f28777d, 0x26b44cd9 } + }, +}, +{ + { + { -0x4fd7a0c9, -0x50bb7bd3, 0x47efc8df, -0x78ace770, -0x07df6866, -0x6a8b1f6f, 0x69615579, 0x0e378d60 }, + { 0x393aa6d8, 0x300a9035, -0x5ed44e33, 0x2b501131, -0x0f6c3dde, 0x7b1ff677, -0x3547d453, 0x4309c1f8 }, + { -0x7cf8a5ab, -0x26056e8f, 0x6b009fdc, 0x4bdb5ad2, -0x29c210f2, 0x7829ad2c, 0x75fd3877, 0x078fc549 } + }, + { + { -0x47cc5676, -0x1dffb4a5, 0x2d4c3330, 0x44775dec, 0x7eace913, 0x3aa24406, -0x2a71ff57, 0x272630e3 }, + { 0x28878f2d, -0x782042ec, 0x1e9421a1, 0x134636dd, 0x257341a3, 0x4f17c951, -0x52d69348, 0x5df98d4b }, + { -0x1336f4ac, -0x0c987030, 0x12043599, -0x0ffeba65, 0x3758b89b, 0x26725fbc, 0x73a719ae, 0x4325e4aa } + }, + { + { -0x30960a63, -0x12db9d66, -0x22a5440c, 0x2a4a1cce, 0x56b2d67b, 0x3535ca1f, 0x43b1b42d, 0x5d8c68d0 }, + { 0x433c3493, 0x657dc6ef, -0x7f24073d, 0x65375e9f, 0x5b372dae, 0x47fd2d46, 0x796e7947, 0x4966ab79 }, + { -0x1c4bd4f6, -0x11ccd2b3, 0x16a4601c, -0x27b1a5d5, 0x078ba3e4, 0x78243877, 0x184ee437, 0x77ed1eb4 } + }, + { + { -0x616d12e6, 0x185d43f8, -0x01b8e63a, -0x4fb5e116, -0x590fc0b1, 0x499fbe88, 0x3c859bdd, 0x5d8b0d2f }, + { 0x201839a0, -0x402b1ec1, 0x3e3df161, -0x5110001e, 0x6b5d1fe3, -0x49a4fb10, 0x2b62fbc0, 0x52e085fb }, + { -0x5ab30d46, 0x124079ea, 0x001b26e7, -0x28db9a15, -0x36850803, 0x6843bcfd, 0x55eacd02, 0x0524b42b } + }, + { + { -0x647d6154, -0x43e72353, -0x4a0a8630, 0x23ae7d28, 0x69384233, -0x3cb9edd6, -0x182b5377, 0x1a6110b2 }, + { -0x1babb850, -0x02f2a242, 0x092005ee, 0x6cec351a, 0x567579cb, -0x665b87bc, 0x16e7fa45, 0x59d242a2 }, + { -0x19966854, 0x4f833f6a, 0x361839a4, 0x6849762a, -0x68f54adb, 0x6985dec1, -0x234e0aba, 0x53045e89 } + }, + { + { -0x72ba01ee, -0x7b25c322, -0x1bbb1d2e, -0x42bd3de8, 0x1f7e3598, -0x57ae6988, 0x5616e2b2, 0x7642c93f }, + { -0x28acac25, -0x34744cba, -0x51aee1de, -0x03034db5, -0x2af51911, -0x345b72c0, -0x0b0834a3, 0x26e3bae5 }, + { 0x4595f8e4, 0x2323daa7, -0x7a85414c, -0x21977375, 0x1c59326e, 0x3fc48e96, 0x15c9b8ba, 0x0b2e73ca } + }, + { + { 0x79c03a55, 0x0e3fbfaf, 0x4cbb5acf, 0x3077af05, -0x24c21c61, -0x2a3aadbb, 0x476a4af7, 0x015e68c1 }, + { -0x3e80afda, -0x2944bbd8, -0x04a56359, -0x614d8ddd, 0x1919c644, -0x1c845afd, -0x4a6599fe, 0x21ce380d }, + { 0x20066a38, -0x3e2ad7ae, 0x3570aef3, -0x6a9fc1ae, 0x226b8a4d, -0x7cd9a659, 0x1f8eedc9, 0x5dd68909 } + }, + { + { -0x5acecf7c, 0x1d022591, -0x29d8f78e, -0x35d2b552, 0x2f0bfd20, -0x795ed47b, -0x528258b8, 0x56e6c439 }, + { -0x402c37aa, -0x34537b22, -0x4ca00dbc, 0x1624c348, 0x5d9cad07, -0x48077236, -0x5d3d1418, 0x3b0e574d }, + { 0x42bdbae6, -0x38fb00b7, -0x4d21e087, 0x5e21ade2, 0x5652fad8, -0x16a24c0d, -0x70f7143f, 0x0822b537 } + }, +}, +{ + { + { 0x62730383, -0x1e480d6d, -0x143575d4, 0x4b5279ff, -0x402becec, -0x25038876, -0x638d9ef1, 0x7deb1014 }, + { -0x70c78b8b, 0x51f04847, -0x634134c4, -0x4da2430c, -0x2660dfab, -0x6554edbc, 0x1c10a5d6, 0x2c709e6c }, + { -0x78991186, -0x349d5096, 0x5553cd0e, 0x66cbec04, 0x0f0be4b5, 0x58800138, -0x09d31d16, 0x08e68e9f } + }, + { + { 0x0ab8f2f9, 0x2f2d09d5, -0x3aa6dc21, -0x5346de73, 0x73766cb9, 0x4a8f3426, 0x38f719f5, 0x4cb13bd7 }, + { 0x4bc130ad, 0x34ad500a, 0x3d0bd49c, -0x72c724b7, 0x500a89be, -0x5da3c268, -0x1145c4f7, 0x2f1f3f87 }, + { -0x1aea49b6, -0x087b738b, -0x24b56fc8, -0x5a6afe46, 0x3f751b50, -0x3df2cec1, -0x3f51d118, 0x19a1e353 } + }, + { + { -0x2a694243, -0x4bde8d33, -0x671103c0, -0x6c1fbabd, -0x4bbef64b, -0x604eacb9, 0x0266ae34, 0x736bd399 }, + { -0x4505fa3d, 0x7d1c7560, -0x391aa19f, -0x4c1e5f60, -0x3f299b8d, -0x1cad68e8, -0x3df3cb7a, 0x41546b11 }, + { -0x6ccb4c4c, -0x7aacd2b0, 0x60816573, 0x46fd114b, 0x425c8375, -0x33a0a0d0, -0x478054a4, 0x412295a2 } + }, + { + { -0x1d6c153a, 0x2e655261, 0x2133acdb, -0x7ba56dfd, 0x7900996b, 0x460975cb, 0x195add80, 0x0760bb8d }, + { -0x0a812917, 0x19c99b88, 0x6df8c825, 0x5393cb26, -0x4cf52d8d, 0x5cee3213, -0x4ad2d1cc, 0x14e153eb }, + { -0x32197e76, 0x413e1a17, -0x12965f7c, 0x57156da9, 0x46caccb1, 0x2cbf268f, -0x3cc53a0e, 0x6b34be9b } + }, + { + { 0x6571f2d3, 0x11fc6965, 0x530e737a, -0x393617bb, -0x2b01afcb, -0x1cc5185e, 0x2e6dd30b, 0x01b9c7b6 }, + { 0x3a78c0b2, -0x0c20d09c, -0x0dd1fd84, 0x4c3e971e, 0x49c1b5a3, -0x1382e3a2, 0x0922dd2d, 0x2012c18f }, + { 0x5ac89d29, -0x77f4aa1b, 0x45a0a763, 0x1483241f, -0x3d1893e1, 0x3d36efdf, 0x4e4bade8, 0x08af5b78 } + }, + { + { -0x7633d3b5, -0x1d8ceb2e, -0x5d78e873, 0x4be4bd11, -0x05cc9b32, 0x18d528d6, -0x50267d92, 0x6423c1d5 }, + { -0x77e0dacd, 0x283499dc, 0x779323b6, -0x62fada26, 0x673441f4, -0x76852205, 0x163a168d, 0x32b79d71 }, + { -0x12034c96, -0x337a0727, 0x3746e5f9, 0x22bcc28f, -0x061a2c33, -0x1b621cc8, -0x3ec1d234, 0x480a5efb } + }, + { + { 0x42ce221f, -0x499eb31c, 0x4c053928, 0x6e199dcc, -0x23e341fd, 0x663fb4a4, 0x691c8e06, 0x24b31d47 }, + { 0x01622071, 0x0b51e70b, -0x74e2503b, 0x06b505cf, -0x10a55433, 0x2c6bb061, 0x0cb7bf31, 0x47aa2760 }, + { -0x3fea073d, 0x2a541eed, 0x7c693f7c, 0x11a4fe7e, 0x4ea278d6, -0x0f5099ed, 0x14dda094, 0x545b585d } + }, + { + { -0x1c4cde1f, 0x6204e4d0, 0x28ff1e95, 0x3baa637a, 0x5b99bd9e, 0x0b0ccffd, 0x64c8d071, 0x4d22dc3e }, + { -0x5f2bc5f1, 0x67bf275e, 0x089beebe, -0x521971cc, -0x2b8618d2, 0x4289134c, 0x32ba5454, 0x0f62f9c3 }, + { -0x29c4a0c7, -0x034b9a77, 0x57cbcf61, 0x5cae6a3f, -0x6ac505fb, -0x01453d2e, 0x36371436, 0x1c0fa01a } + }, +}, +{ + { + { 0x54c53fae, -0x3ee11a18, 0x2b4f3ff4, 0x6a0b06c1, -0x1f49858e, 0x33540f80, -0x32f81c11, 0x15f18fc3 }, + { -0x4383296e, -0x18ab8bb7, -0x1908c221, 0x0f9abeaa, 0x00837e29, 0x4af01ca7, 0x3f1bc183, 0x63ab1b5d }, + { -0x4fd70b74, 0x32750763, 0x556a065f, 0x06020740, -0x3cb6a4a8, -0x2ac427ee, -0x79a0af73, 0x08706c9b } + }, + { + { 0x38b41246, -0x3366e4bf, 0x6f9ac26b, 0x243b9c52, -0x48345443, -0x4610b6b3, -0x2f7d1300, 0x5fba433d }, + { 0x3d343dff, -0x0c835d55, -0x7f5439e9, 0x1a8c6a2d, -0x2b330036, -0x71b61fcb, -0x455e2e47, 0x48b46bee }, + { -0x366be530, -0x63b61cab, 0x74498f84, -0x468cb522, 0x66663e5c, 0x41c3fed0, -0x1718ef4d, 0x0ecfedf8 } + }, + { + { -0x16bfc89e, 0x744f7463, -0x72033637, -0x08657212, 0x55e4cde3, 0x163a6496, -0x4d7b0bcb, 0x3b61788d }, + { -0x632b8f27, 0x76430f9f, -0x5bd09ff8, -0x49d53365, 0x59adad5e, 0x1898297c, -0x4873af80, 0x7789dd2d }, + { 0x0d6ef6b2, -0x4dddd7e7, 0x46ce4bfa, -0x56b5994e, 0x4f0b6cc7, 0x46c1a77a, -0x148cc731, 0x4236ccff } + }, + { + { -0x2588820a, 0x3bd82dbf, 0x0b98369e, 0x71b177cc, -0x7af3c967, 0x1d0e8463, 0x48e2d1f1, 0x5a71945b }, + { 0x0d55e274, -0x7b68bfb3, -0x3b52d4ad, 0x6c6663d9, -0x5256a8cc, -0x13d04f27, -0x324708c4, 0x2617e120 }, + { 0x405b4b42, 0x6f203dd5, 0x10b24509, 0x327ec604, -0x53d577ba, -0x63cb8dd0, 0x11ffeb6a, 0x77de29fc } + }, + { + { -0x13312d36, -0x7ca1ec71, -0x1569c466, -0x736150ed, -0x4de9f15a, -0x36a04040, -0x5278876e, 0x575e66f3 }, + { -0x7c488758, -0x4f53a837, -0x28016ed4, 0x53cdcca9, -0x00e0a624, 0x61c2b854, -0x0f218254, 0x3a1a2cf0 }, + { -0x377034c6, -0x667fc5d9, 0x275ec0b0, 0x345a6789, -0x0093d41b, 0x459789d0, 0x1e70a8b2, 0x62f88265 } + }, + { + { 0x698a19e0, 0x6d822986, 0x74d78a71, -0x2367de1f, -0x0934e0b9, 0x41a85f31, -0x432563af, 0x352721c2 }, + { 0x59ff1be4, 0x085ae2c7, 0x3b0e40b7, 0x149145c9, 0x7ff27379, -0x3b981806, -0x2a38c56b, 0x4eeecf0a }, + { 0x213fc985, 0x48329952, 0x368a1746, 0x1087cf0d, 0x66c15aa5, -0x71ad9e4f, 0x2ed24c21, 0x2d5b2d84 } + }, + { + { 0x196ac533, 0x5eb7d13d, -0x247f41d5, 0x377234ec, 0x7cf5ae24, -0x1ebb3004, -0x3bbe5314, 0x5226bcf9 }, + { -0x142c212f, 0x02cfebd9, 0x39021974, -0x2ba4de89, -0x01cf5e49, 0x7576f813, -0x5cb1093e, 0x5691b6f9 }, + { 0x23e5b547, 0x79ee6c72, -0x7ccf2987, 0x6f5f5076, 0x6d8adce9, -0x128c1e17, 0x1d8ccc03, 0x27c3da1e } + }, + { + { 0x630ef9f6, 0x28302e71, 0x2b64cee0, -0x3d2b5dfd, 0x4b6292be, 0x09082030, -0x57d520e8, 0x5fca747a }, + { 0x3fe24c74, 0x7eb9efb2, 0x1651be01, 0x3e50f49f, 0x21858dea, 0x3ea732dc, 0x5bb810f9, 0x17377bd7 }, + { 0x5c258ea5, 0x232a03c3, 0x6bcb0cf1, -0x790dc5d4, 0x2e442166, 0x3dad8d0d, -0x548979d5, 0x04a8933c } + }, +}, +{ + { + { -0x736c95b0, 0x69082b0e, -0x3e253a4a, -0x06365fcb, -0x3b2049cc, 0x6fb73e54, 0x1d2bc140, 0x4005419b }, + { 0x22943dff, -0x2d39fb4a, 0x44cfb3a0, -0x43734132, -0x687f7988, 0x5d254ff3, 0x3b1ca6bf, 0x0fa3614f }, + { -0x46417d10, -0x5ffc0143, 0x3a44ac90, 0x2089c1af, 0x1954fa8e, -0x07b6606f, -0x10bf54be, 0x1fba218a } + }, + { + { 0x3e7b0194, 0x4f3e5704, 0x08daaf7f, -0x57e2c112, -0x6623210f, -0x37c63955, -0x00889e2b, 0x6c535d13 }, + { -0x05370ac2, -0x54ab6bb8, 0x7ba63741, -0x7e091766, 0x6c2b5e01, 0x74fd6c7d, -0x573791be, 0x392e3aca }, + { 0x3e8a35af, 0x4cbd34e9, 0x5887e816, 0x2e078144, -0x0d654f55, 0x19319c76, -0x2af53ec5, 0x25e17fe4 } + }, + { + { 0x76f121a7, -0x6ea0800b, 0x2fcd87e3, -0x3cb5cdd9, 0x4d1be526, -0x3345d022, -0x76967665, 0x6bba828f }, + { 0x1e04f676, 0x0a289bd7, -0x29bdf06b, 0x208e1c52, 0x34691fab, 0x5186d8b0, 0x2a9fb351, 0x25575144 }, + { -0x6f01c6ff, -0x1d2e439a, -0x5f66852b, 0x4cb54a18, -0x507b9f2c, -0x68e296ec, 0x7f6b7be4, 0x559d504f } + }, + { + { -0x092d9903, -0x63b76e19, 0x0307781b, 0x0744a19b, 0x6061e23b, -0x77c770e3, 0x354bd50e, 0x123ea6a3 }, + { -0x4c14ab2b, -0x588c7c88, -0x5aaac384, 0x1d69d366, -0x06d7ff46, 0x0a26cf62, -0x7f81cde9, 0x01ab12d5 }, + { 0x41e32d96, 0x118d1890, -0x27cea7b8, -0x46121c3e, -0x27cdba27, 0x1eab4271, -0x36e75eac, 0x4a3961e2 } + }, + { + { -0x0cdcc0e2, 0x0327d644, 0x34fcf016, 0x499a260e, -0x0d254687, -0x7c4a58ea, -0x642beee1, 0x68aceead }, + { -0x07194460, 0x71dc3be0, 0x7effe30a, -0x293107cc, -0x1ec5b896, -0x566dbda1, -0x04e2489d, 0x2cd6bce3 }, + { -0x0c283df0, 0x38b4c90e, -0x4852fbf4, 0x308e6e24, -0x4818c1dd, 0x3860d9f1, -0x4af70a69, 0x595760d5 } + }, + { + { -0x02fdd870, -0x77d53415, -0x3beea8a0, -0x7650ccfb, 0x7d3473f4, 0x65f492e3, 0x54515a2b, 0x2cb2c5df }, + { 0x04aa6397, 0x6129bfe1, -0x5b580335, -0x7069fff8, 0x7d909458, 0x3f8bc089, -0x234d6e57, 0x709fa43e }, + { 0x63fd2aca, -0x14f5a274, 0x2e694eff, -0x2dd43e9a, -0x07344fc6, 0x2723f36e, -0x0f37ece1, 0x70f029ec } + }, + { + { 0x5e10b0b9, 0x2a6aafaa, -0x10fbe557, 0x78f0a370, -0x55c529e1, 0x773efb77, -0x58b4261f, 0x44eca5a2 }, + { 0x2eed3e33, 0x461307b3, -0x5baa7e19, -0x51fbd0cd, 0x195f0366, -0x36bbb62d, 0x6c314858, 0x0b7d5d8a }, + { 0x7b95d543, 0x25d44832, -0x5ccbf0e3, 0x70d38300, 0x60e1c52b, -0x21e3ace4, 0x2c7de9e4, 0x27222451 } + }, + { + { 0x42a975fc, -0x40844476, -0x69525ca8, -0x73a3c689, -0x321255b8, -0x1d803891, -0x0943df5a, 0x19735fd7 }, + { 0x49c5342e, 0x1abc92af, -0x4d190530, -0x001127ef, -0x0337b1d7, -0x105d7373, -0x5bb33abd, 0x11b5df18 }, + { 0x42c84266, -0x1c546f30, 0x7f19547e, -0x147b71f1, 0x65a497b9, 0x2503a1d0, -0x6e2076a1, 0x0fef9111 } + }, +}, +{ + { + { 0x5b1c16b7, 0x6ab5dcb8, 0x3c7b27a5, -0x6b3f0318, 0x735517be, -0x5b4ee3e6, -0x45f15056, 0x499238d0 }, + { -0x54e39147, -0x4eaf835f, 0x16b687b3, -0x42bb70c2, 0x2c7a91ab, 0x3455fb7f, 0x2f2adec1, 0x7579229e }, + { 0x7aba8b57, -0x130b91ae, -0x742e9b85, 0x15a08c47, 0x5f706fef, 0x7af1c6a6, -0x0fc5cf2b, 0x6345fa78 } + }, + { + { -0x42270f5c, -0x6c2c3417, -0x02e88cfe, -0x24ead3e5, 0x7f17a875, 0x7dbddc6d, -0x70bd9102, 0x3e1a71cc }, + { 0x1015e7a1, -0x20fd06a1, -0x564bfd9d, 0x790ec41d, 0x33ea1107, 0x4d3a0ea1, -0x1cc50737, 0x54f70be7 }, + { -0x6f45429e, -0x37c35c1d, 0x0291c833, -0x7f121c99, -0x2c86ff3c, -0x377fc734, 0x1ec31fa1, 0x2c5fc023 } + }, + { + { 0x02456e65, -0x3bdd1b2f, -0x352b846f, -0x78beb53f, -0x5d490023, 0x1592e2bb, -0x0a3deff1, 0x75d9d2bf }, + { 0x17038b4f, -0x01456ee9, -0x3621107f, -0x1aedc8df, 0x5d0d8834, 0x1c97e4e7, 0x23dc3bc6, 0x68afae7a }, + { 0x3626e81c, 0x5bd9b476, -0x435fd123, -0x766996ca, 0x61f077b3, 0x0a41193d, 0x00ce5471, 0x3097a242 } + }, + { + { 0x6695c486, -0x5e9d18dc, 0x35a89607, 0x131d6334, -0x5f2ed5c9, 0x30521561, -0x59504c9d, 0x56704bad }, + { -0x380747b4, 0x57427734, 0x01b270e9, -0x0ebe5ec2, -0x4b1a9b5a, 0x02d1adfe, -0x317c42b8, 0x4bb23d92 }, + { 0x52f912b9, -0x5093b559, -0x27988f38, 0x5e665f6c, -0x5c3732a8, 0x4c35ac83, 0x10a58a7e, 0x2b7a29c0 } + }, + { + { -0x40fff792, 0x33810a23, -0x18c90084, -0x50316da2, -0x1db6dd2c, 0x3d60e670, 0x4f96061b, 0x11ce9e71 }, + { -0x2f3e313d, -0x3bff8089, -0x453b6d08, -0x72efdf4a, 0x7e69daaf, 0x32ec29d5, -0x626a0320, 0x59940875 }, + { -0x27ea453f, 0x219ef713, 0x485be25c, -0x0ebeb9a3, 0x4e513c51, 0x6d5447cc, 0x5ef44393, 0x174926be } + }, + { + { -0x6c15fdd2, 0x3ef5d415, 0x0ed0eed6, 0x5cbcc1a2, 0x07382c8c, -0x702db131, 0x06d8e1ad, 0x6fa42ead }, + { -0x03a42a45, -0x4a214d07, -0x1e27ef1f, -0x6d2558d6, -0x48d5e3a7, -0x503b3024, 0x3fc22a24, 0x497d7881 }, + { 0x1f73371f, -0x1d897db6, 0x4f5b6736, 0x7f7cf01c, 0x04fa46e7, 0x7e201fe3, 0x57808c96, 0x785a36a3 } + }, + { + { 0x5d517bc3, 0x07044298, -0x519ac988, 0x6acd56c7, -0x67a5889d, 0x00a27983, -0x1aed99d5, 0x5167effa }, + { 0x63014d2b, -0x7da04203, 0x6ca7578b, -0x37adc964, 0x5c0b5df0, 0x5b2fcd28, 0x58048c8f, 0x12ab214c }, + { 0x0f53c4b6, -0x42b1561f, -0x7536e5ec, 0x1673dc5f, 0x2acc1aba, -0x5707e5b2, 0x24332a25, 0x33a92a79 } + }, + { + { 0x218f2ada, 0x7ba95ba0, 0x330fb9ca, -0x300bdd79, 0x56c6d907, -0x2525b693, -0x0b4111ac, 0x5380c296 }, + { 0x27996c02, -0x622e0b67, -0x1fb2e8ae, 0x0cb3b058, 0x7fd02c3e, 0x1f7e8896, -0x3474c14f, 0x2f964268 }, + { 0x66898d0a, -0x62b0d8fc, 0x0aff3f7a, 0x3d098799, 0x67daba45, -0x2f610c9e, 0x7b1c669c, 0x7761455e } + }, +}, +}; +#elif defined(CURVED25519_128BIT) +static const ge_precomp base[32][8] = { +{ + { + { 0x493c6f58c3b85, 0x0df7181c325f7, 0x0f50b0b3e4cb7, 0x5329385a44c32, 0x07cf9d3a33d4b }, + { 0x03905d740913e, 0x0ba2817d673a2, 0x23e2827f4e67c, 0x133d2e0c21a34, 0x44fd2f9298f81 }, + { 0x11205877aaa68, 0x479955893d579, 0x50d66309b67a0, 0x2d42d0dbee5ee, 0x6f117b689f0c6 }, + }, + { + { 0x4e7fc933c71d7, 0x2cf41feb6b244, 0x7581c0a7d1a76, 0x7172d534d32f0, 0x590c063fa87d2 }, + { 0x1a56042b4d5a8, 0x189cc159ed153, 0x5b8deaa3cae04, 0x2aaf04f11b5d8, 0x6bb595a669c92 }, + { 0x2a8b3a59b7a5f, 0x3abb359ef087f, 0x4f5a8c4db05af, 0x5b9a807d04205, 0x701af5b13ea50 }, + }, + { + { 0x5b0a84cee9730, 0x61d10c97155e4, 0x4059cc8096a10, 0x47a608da8014f, 0x7a164e1b9a80f }, + { 0x11fe8a4fcd265, 0x7bcb8374faacc, 0x52f5af4ef4d4f, 0x5314098f98d10, 0x2ab91587555bd }, + { 0x6933f0dd0d889, 0x44386bb4c4295, 0x3cb6d3162508c, 0x26368b872a2c6, 0x5a2826af12b9b }, + }, + { + { 0x351b98efc099f, 0x68fbfa4a7050e, 0x42a49959d971b, 0x393e51a469efd, 0x680e910321e58 }, + { 0x6050a056818bf, 0x62acc1f5532bf, 0x28141ccc9fa25, 0x24d61f471e683, 0x27933f4c7445a }, + { 0x3fbe9c476ff09, 0x0af6b982e4b42, 0x0ad1251ba78e5, 0x715aeedee7c88, 0x7f9d0cbf63553 }, + }, + { + { 0x2bc4408a5bb33, 0x078ebdda05442, 0x2ffb112354123, 0x375ee8df5862d, 0x2945ccf146e20 }, + { 0x182c3a447d6ba, 0x22964e536eff2, 0x192821f540053, 0x2f9f19e788e5c, 0x154a7e73eb1b5 }, + { 0x3dbf1812a8285, 0x0fa17ba3f9797, 0x6f69cb49c3820, 0x34d5a0db3858d, 0x43aabe696b3bb }, + }, + { + { 0x4eeeb77157131, 0x1201915f10741, 0x1669cda6c9c56, 0x45ec032db346d, 0x51e57bb6a2cc3 }, + { 0x006b67b7d8ca4, 0x084fa44e72933, 0x1154ee55d6f8a, 0x4425d842e7390, 0x38b64c41ae417 }, + { 0x4326702ea4b71, 0x06834376030b5, 0x0ef0512f9c380, 0x0f1a9f2512584, 0x10b8e91a9f0d6 }, + }, + { + { 0x25cd0944ea3bf, 0x75673b81a4d63, 0x150b925d1c0d4, 0x13f38d9294114, 0x461bea69283c9 }, + { 0x72c9aaa3221b1, 0x267774474f74d, 0x064b0e9b28085, 0x3f04ef53b27c9, 0x1d6edd5d2e531 }, + { 0x36dc801b8b3a2, 0x0e0a7d4935e30, 0x1deb7cecc0d7d, 0x053a94e20dd2c, 0x7a9fbb1c6a0f9 }, + }, + { + { 0x7596604dd3e8f, 0x6fc510e058b36, 0x3670c8db2cc0d, 0x297d899ce332f, 0x0915e76061bce }, + { 0x75dedf39234d9, 0x01c36ab1f3c54, 0x0f08fee58f5da, 0x0e19613a0d637, 0x3a9024a1320e0 }, + { 0x1f5d9c9a2911a, 0x7117994fafcf8, 0x2d8a8cae28dc5, 0x74ab1b2090c87, 0x26907c5c2ecc4 }, + }, +}, +{ + { + { 0x4dd0e632f9c1d, 0x2ced12622a5d9, 0x18de9614742da, 0x79ca96fdbb5d4, 0x6dd37d49a00ee }, + { 0x3635449aa515e, 0x3e178d0475dab, 0x50b4712a19712, 0x2dcc2860ff4ad, 0x30d76d6f03d31 }, + { 0x444172106e4c7, 0x01251afed2d88, 0x534fc9bed4f5a, 0x5d85a39cf5234, 0x10c697112e864 }, + }, + { + { 0x62aa08358c805, 0x46f440848e194, 0x447b771a8f52b, 0x377ba3269d31d, 0x03bf9baf55080 }, + { 0x3c4277dbe5fde, 0x5a335afd44c92, 0x0c1164099753e, 0x70487006fe423, 0x25e61cabed66f }, + { 0x3e128cc586604, 0x5968b2e8fc7e2, 0x049a3d5bd61cf, 0x116505b1ef6e6, 0x566d78634586e }, + }, + { + { 0x54285c65a2fd0, 0x55e62ccf87420, 0x46bb961b19044, 0x1153405712039, 0x14fba5f34793b }, + { 0x7a49f9cc10834, 0x2b513788a22c6, 0x5ff4b6ef2395b, 0x2ec8e5af607bf, 0x33975bca5ecc3 }, + { 0x746166985f7d4, 0x09939000ae79a, 0x5844c7964f97a, 0x13617e1f95b3d, 0x14829cea83fc5 }, + }, + { + { 0x70b2f4e71ecb8, 0x728148efc643c, 0x0753e03995b76, 0x5bf5fb2ab6767, 0x05fc3bc4535d7 }, + { 0x37b8497dd95c2, 0x61549d6b4ffe8, 0x217a22db1d138, 0x0b9cf062eb09e, 0x2fd9c71e5f758 }, + { 0x0b3ae52afdedd, 0x19da76619e497, 0x6fa0654d2558e, 0x78219d25e41d4, 0x373767475c651 }, + }, + { + { 0x095cb14246590, 0x002d82aa6ac68, 0x442f183bc4851, 0x6464f1c0a0644, 0x6bf5905730907 }, + { 0x299fd40d1add9, 0x5f2de9a04e5f7, 0x7c0eebacc1c59, 0x4cca1b1f8290a, 0x1fbea56c3b18f }, + { 0x778f1e1415b8a, 0x6f75874efc1f4, 0x28a694019027f, 0x52b37a96bdc4d, 0x02521cf67a635 }, + }, + { + { 0x46720772f5ee4, 0x632c0f359d622, 0x2b2092ba3e252, 0x662257c112680, 0x001753d9f7cd6 }, + { 0x7ee0b0a9d5294, 0x381fbeb4cca27, 0x7841f3a3e639d, 0x676ea30c3445f, 0x3fa00a7e71382 }, + { 0x1232d963ddb34, 0x35692e70b078d, 0x247ca14777a1f, 0x6db556be8fcd0, 0x12b5fe2fa048e }, + }, + { + { 0x37c26ad6f1e92, 0x46a0971227be5, 0x4722f0d2d9b4c, 0x3dc46204ee03a, 0x6f7e93c20796c }, + { 0x0fbc496fce34d, 0x575be6b7dae3e, 0x4a31585cee609, 0x037e9023930ff, 0x749b76f96fb12 }, + { 0x2f604aea6ae05, 0x637dc939323eb, 0x3fdad9b048d47, 0x0a8b0d4045af7, 0x0fcec10f01e02 }, + }, + { + { 0x2d29dc4244e45, 0x6927b1bc147be, 0x0308534ac0839, 0x4853664033f41, 0x413779166feab }, + { 0x558a649fe1e44, 0x44635aeefcc89, 0x1ff434887f2ba, 0x0f981220e2d44, 0x4901aa7183c51 }, + { 0x1b7548c1af8f0, 0x7848c53368116, 0x01b64e7383de9, 0x109fbb0587c8f, 0x41bb887b726d1 }, + }, +}, +{ + { + { 0x34c597c6691ae, 0x7a150b6990fc4, 0x52beb9d922274, 0x70eed7164861a, 0x0a871e070c6a9 }, + { 0x07d44744346be, 0x282b6a564a81d, 0x4ed80f875236b, 0x6fbbe1d450c50, 0x4eb728c12fcdb }, + { 0x1b5994bbc8989, 0x74b7ba84c0660, 0x75678f1cdaeb8, 0x23206b0d6f10c, 0x3ee7300f2685d }, + }, + { + { 0x27947841e7518, 0x32c7388dae87f, 0x414add3971be9, 0x01850832f0ef1, 0x7d47c6a2cfb89 }, + { 0x255e49e7dd6b7, 0x38c2163d59eba, 0x3861f2a005845, 0x2e11e4ccbaec9, 0x1381576297912 }, + { 0x2d0148ef0d6e0, 0x3522a8de787fb, 0x2ee055e74f9d2, 0x64038f6310813, 0x148cf58d34c9e }, + }, + { + { 0x72f7d9ae4756d, 0x7711e690ffc4a, 0x582a2355b0d16, 0x0dccfe885b6b4, 0x278febad4eaea }, + { 0x492f67934f027, 0x7ded0815528d4, 0x58461511a6612, 0x5ea2e50de1544, 0x3ff2fa1ebd5db }, + { 0x2681f8c933966, 0x3840521931635, 0x674f14a308652, 0x3bd9c88a94890, 0x4104dd02fe9c6 }, + }, + { + { 0x14e06db096ab8, 0x1219c89e6b024, 0x278abd486a2db, 0x240b292609520, 0x0165b5a48efca }, + { 0x2bf5e1124422a, 0x673146756ae56, 0x14ad99a87e830, 0x1eaca65b080fd, 0x2c863b00afaf5 }, + { 0x0a474a0846a76, 0x099a5ef981e32, 0x2a8ae3c4bbfe6, 0x45c34af14832c, 0x591b67d9bffec }, + }, + { + { 0x1b3719f18b55d, 0x754318c83d337, 0x27c17b7919797, 0x145b084089b61, 0x489b4f8670301 }, + { 0x70d1c80b49bfa, 0x3d57e7d914625, 0x3c0722165e545, 0x5e5b93819e04f, 0x3de02ec7ca8f7 }, + { 0x2102d3aeb92ef, 0x68c22d50c3a46, 0x42ea89385894e, 0x75f9ebf55f38c, 0x49f5fbba496cb }, + }, + { + { 0x5628c1e9c572e, 0x598b108e822ab, 0x55d8fae29361a, 0x0adc8d1a97b28, 0x06a1a6c288675 }, + { 0x49a108a5bcfd4, 0x6178c8e7d6612, 0x1f03473710375, 0x73a49614a6098, 0x5604a86dcbfa6 }, + { 0x0d1d47c1764b6, 0x01c08316a2e51, 0x2b3db45c95045, 0x1634f818d300c, 0x20989e89fe274 }, + }, + { + { 0x4278b85eaec2e, 0x0ef59657be2ce, 0x72fd169588770, 0x2e9b205260b30, 0x730b9950f7059 }, + { 0x777fd3a2dcc7f, 0x594a9fb124932, 0x01f8e80ca15f0, 0x714d13cec3269, 0x0403ed1d0ca67 }, + { 0x32d35874ec552, 0x1f3048df1b929, 0x300d73b179b23, 0x6e67be5a37d0b, 0x5bd7454308303 }, + }, + { + { 0x4932115e7792a, 0x457b9bbb930b8, 0x68f5d8b193226, 0x4164e8f1ed456, 0x5bb7db123067f }, + { 0x2d19528b24cc2, 0x4ac66b8302ff3, 0x701c8d9fdad51, 0x6c1b35c5b3727, 0x133a78007380a }, + { 0x1f467c6ca62be, 0x2c4232a5dc12c, 0x7551dc013b087, 0x0690c11b03bcd, 0x740dca6d58f0e }, + }, +}, +{ + { + { 0x28c570478433c, 0x1d8502873a463, 0x7641e7eded49c, 0x1ecedd54cf571, 0x2c03f5256c2b0 }, + { 0x0ee0752cfce4e, 0x660dd8116fbe9, 0x55167130fffeb, 0x1c682b885955c, 0x161d25fa963ea }, + { 0x718757b53a47d, 0x619e18b0f2f21, 0x5fbdfe4c1ec04, 0x5d798c81ebb92, 0x699468bdbd96b }, + }, + { + { 0x53de66aa91948, 0x045f81a599b1b, 0x3f7a8bd214193, 0x71d4da412331a, 0x293e1c4e6c4a2 }, + { 0x72f46f4dafecf, 0x2948ffadef7a3, 0x11ecdfdf3bc04, 0x3c2e98ffeed25, 0x525219a473905 }, + { 0x6134b925112e1, 0x6bb942bb406ed, 0x070c445c0dde2, 0x411d822c4d7a3, 0x5b605c447f032 }, + }, + { + { 0x1fec6f0e7f04c, 0x3cebc692c477d, 0x077986a19a95e, 0x6eaaaa1778b0f, 0x2f12fef4cc5ab }, + { 0x5805920c47c89, 0x1924771f9972c, 0x38bbddf9fc040, 0x1f7000092b281, 0x24a76dcea8aeb }, + { 0x522b2dfc0c740, 0x7e8193480e148, 0x33fd9a04341b9, 0x3c863678a20bc, 0x5e607b2518a43 }, + }, + { + { 0x4431ca596cf14, 0x015da7c801405, 0x03c9b6f8f10b5, 0x0346922934017, 0x201f33139e457 }, + { 0x31d8f6cdf1818, 0x1f86c4b144b16, 0x39875b8d73e9d, 0x2fbf0d9ffa7b3, 0x5067acab6ccdd }, + { 0x27f6b08039d51, 0x4802f8000dfaa, 0x09692a062c525, 0x1baea91075817, 0x397cba8862460 }, + }, + { + { 0x5c3fbc81379e7, 0x41bbc255e2f02, 0x6a3f756998650, 0x1297fd4e07c42, 0x771b4022c1e1c }, + { 0x13093f05959b2, 0x1bd352f2ec618, 0x075789b88ea86, 0x61d1117ea48b9, 0x2339d320766e6 }, + { 0x5d986513a2fa7, 0x63f3a99e11b0f, 0x28a0ecfd6b26d, 0x53b6835e18d8f, 0x331a189219971 }, + }, + { + { 0x12f3a9d7572af, 0x10d00e953c4ca, 0x603df116f2f8a, 0x33dc276e0e088, 0x1ac9619ff649a }, + { 0x66f45fb4f80c6, 0x3cc38eeb9fea2, 0x107647270db1f, 0x710f1ea740dc8, 0x31167c6b83bdf }, + { 0x33842524b1068, 0x77dd39d30fe45, 0x189432141a0d0, 0x088fe4eb8c225, 0x612436341f08b }, + }, + { + { 0x349e31a2d2638, 0x0137a7fa6b16c, 0x681ae92777edc, 0x222bfc5f8dc51, 0x1522aa3178d90 }, + { 0x541db874e898d, 0x62d80fb841b33, 0x03e6ef027fa97, 0x7a03c9e9633e8, 0x46ebe2309e5ef }, + { 0x02f5369614938, 0x356e5ada20587, 0x11bc89f6bf902, 0x036746419c8db, 0x45fe70f505243 }, + }, + { + { 0x24920c8951491, 0x107ec61944c5e, 0x72752e017c01f, 0x122b7dda2e97a, 0x16619f6db57a2 }, + { 0x075a6960c0b8c, 0x6dde1c5e41b49, 0x42e3f516da341, 0x16a03fda8e79e, 0x428d1623a0e39 }, + { 0x74a4401a308fd, 0x06ed4b9558109, 0x746f1f6a08867, 0x4636f5c6f2321, 0x1d81592d60bd3 }, + }, +}, +{ + { + { 0x5b69f7b85c5e8, 0x17a2d175650ec, 0x4cc3e6dbfc19e, 0x73e1d3873be0e, 0x3a5f6d51b0af8 }, + { 0x68756a60dac5f, 0x55d757b8aec26, 0x3383df45f80bd, 0x6783f8c9f96a6, 0x20234a7789ecd }, + { 0x20db67178b252, 0x73aa3da2c0eda, 0x79045c01c70d3, 0x1b37b15251059, 0x7cd682353cffe }, + }, + { + { 0x5cd6068acf4f3, 0x3079afc7a74cc, 0x58097650b64b4, 0x47fabac9c4e99, 0x3ef0253b2b2cd }, + { 0x1a45bd887fab6, 0x65748076dc17c, 0x5b98000aa11a8, 0x4a1ecc9080974, 0x2838c8863bdc0 }, + { 0x3b0cf4a465030, 0x022b8aef57a2d, 0x2ad0677e925ad, 0x4094167d7457a, 0x21dcb8a606a82 }, + }, + { + { 0x500fabe7731ba, 0x7cc53c3113351, 0x7cf65fe080d81, 0x3c5d966011ba1, 0x5d840dbf6c6f6 }, + { 0x004468c9d9fc8, 0x5da8554796b8c, 0x3b8be70950025, 0x6d5892da6a609, 0x0bc3d08194a31 }, + { 0x6380d309fe18b, 0x4d73c2cb8ee0d, 0x6b882adbac0b6, 0x36eabdddd4cbe, 0x3a4276232ac19 }, + }, + { + { 0x0c172db447ecb, 0x3f8c505b7a77f, 0x6a857f97f3f10, 0x4fcc0567fe03a, 0x0770c9e824e1a }, + { 0x2432c8a7084fa, 0x47bf73ca8a968, 0x1639176262867, 0x5e8df4f8010ce, 0x1ff177cea16de }, + { 0x1d99a45b5b5fd, 0x523674f2499ec, 0x0f8fa26182613, 0x58f7398048c98, 0x39f264fd41500 }, + }, + { + { 0x34aabfe097be1, 0x43bfc03253a33, 0x29bc7fe91b7f3, 0x0a761e4844a16, 0x65c621272c35f }, + { 0x53417dbe7e29c, 0x54573827394f5, 0x565eea6f650dd, 0x42050748dc749, 0x1712d73468889 }, + { 0x389f8ce3193dd, 0x2d424b8177ce5, 0x073fa0d3440cd, 0x139020cd49e97, 0x22f9800ab19ce }, + }, + { + { 0x29fdd9a6efdac, 0x7c694a9282840, 0x6f7cdeee44b3a, 0x55a3207b25cc3, 0x4171a4d38598c }, + { 0x2368a3e9ef8cb, 0x454aa08e2ac0b, 0x490923f8fa700, 0x372aa9ea4582f, 0x13f416cd64762 }, + { 0x758aa99c94c8c, 0x5f6001700ff44, 0x7694e488c01bd, 0x0d5fde948eed6, 0x508214fa574bd }, + }, + { + { 0x215bb53d003d6, 0x1179e792ca8c3, 0x1a0e96ac840a2, 0x22393e2bb3ab6, 0x3a7758a4c86cb }, + { 0x269153ed6fe4b, 0x72a23aef89840, 0x052be5299699c, 0x3a5e5ef132316, 0x22f960ec6faba }, + { 0x111f693ae5076, 0x3e3bfaa94ca90, 0x445799476b887, 0x24a0912464879, 0x5d9fd15f8de7f }, + }, + { + { 0x44d2aeed7521e, 0x50865d2c2a7e4, 0x2705b5238ea40, 0x46c70b25d3b97, 0x3bc187fa47eb9 }, + { 0x408d36d63727f, 0x5faf8f6a66062, 0x2bb892da8de6b, 0x769d4f0c7e2e6, 0x332f35914f8fb }, + { 0x70115ea86c20c, 0x16d88da24ada8, 0x1980622662adf, 0x501ebbc195a9d, 0x450d81ce906fb }, + }, +}, +{ + { + { 0x4d8961cae743f, 0x6bdc38c7dba0e, 0x7d3b4a7e1b463, 0x0844bdee2adf3, 0x4cbad279663ab }, + { 0x3b6a1a6205275, 0x2e82791d06dcf, 0x23d72caa93c87, 0x5f0b7ab68aaf4, 0x2de25d4ba6345 }, + { 0x19024a0d71fcd, 0x15f65115f101a, 0x4e99067149708, 0x119d8d1cba5af, 0x7d7fbcefe2007 }, + }, + { + { 0x45dc5f3c29094, 0x3455220b579af, 0x070c1631e068a, 0x26bc0630e9b21, 0x4f9cd196dcd8d }, + { 0x71e6a266b2801, 0x09aae73e2df5d, 0x40dd8b219b1a3, 0x546fb4517de0d, 0x5975435e87b75 }, + { 0x297d86a7b3768, 0x4835a2f4c6332, 0x070305f434160, 0x183dd014e56ae, 0x7ccdd084387a0 }, + }, + { + { 0x484186760cc93, 0x7435665533361, 0x02f686336b801, 0x5225446f64331, 0x3593ca848190c }, + { 0x6422c6d260417, 0x212904817bb94, 0x5a319deb854f5, 0x7a9d4e060da7d, 0x428bd0ed61d0c }, + { 0x3189a5e849aa7, 0x6acbb1f59b242, 0x7f6ef4753630c, 0x1f346292a2da9, 0x27398308da2d6 }, + }, + { + { 0x10e4c0a702453, 0x4daafa37bd734, 0x49f6bdc3e8961, 0x1feffdcecdae6, 0x572c2945492c3 }, + { 0x38d28435ed413, 0x4064f19992858, 0x7680fbef543cd, 0x1aadd83d58d3c, 0x269597aebe8c3 }, + { 0x7c745d6cd30be, 0x27c7755df78ef, 0x1776833937fa3, 0x5405116441855, 0x7f985498c05bc }, + }, + { + { 0x615520fbf6363, 0x0b9e9bf74da6a, 0x4fe8308201169, 0x173f76127de43, 0x30f2653cd69b1 }, + { 0x1ce889f0be117, 0x36f6a94510709, 0x7f248720016b4, 0x1821ed1e1cf91, 0x76c2ec470a31f }, + { 0x0c938aac10c85, 0x41b64ed797141, 0x1beb1c1185e6d, 0x1ed5490600f07, 0x2f1273f159647 }, + }, + { + { 0x08bd755a70bc0, 0x49e3a885ce609, 0x16585881b5ad6, 0x3c27568d34f5e, 0x38ac1997edc5f }, + { 0x1fc7c8ae01e11, 0x2094d5573e8e7, 0x5ca3cbbf549d2, 0x4f920ecc54143, 0x5d9e572ad85b6 }, + { 0x6b517a751b13b, 0x0cfd370b180cc, 0x5377925d1f41a, 0x34e56566008a2, 0x22dfcd9cbfe9e }, + }, + { + { 0x459b4103be0a1, 0x59a4b3f2d2add, 0x7d734c8bb8eeb, 0x2393cbe594a09, 0x0fe9877824cde }, + { 0x3d2e0c30d0cd9, 0x3f597686671bb, 0x0aa587eb63999, 0x0e3c7b592c619, 0x6b2916c05448c }, + { 0x334d10aba913b, 0x045cdb581cfdb, 0x5e3e0553a8f36, 0x50bb3041effb2, 0x4c303f307ff00 }, + }, + { + { 0x403580dd94500, 0x48df77d92653f, 0x38a9fe3b349ea, 0x0ea89850aafe1, 0x416b151ab706a }, + { 0x23bd617b28c85, 0x6e72ee77d5a61, 0x1a972ff174dde, 0x3e2636373c60f, 0x0d61b8f78b2ab }, + { 0x0d7efe9c136b0, 0x1ab1c89640ad5, 0x55f82aef41f97, 0x46957f317ed0d, 0x191a2af74277e }, + }, +}, +{ + { + { 0x62b434f460efb, 0x294c6c0fad3fc, 0x68368937b4c0f, 0x5c9f82910875b, 0x237e7dbe00545 }, + { 0x6f74bc53c1431, 0x1c40e5dbbd9c2, 0x6c8fb9cae5c97, 0x4845c5ce1b7da, 0x7e2e0e450b5cc }, + { 0x575ed6701b430, 0x4d3e17fa20026, 0x791fc888c4253, 0x2f1ba99078ac1, 0x71afa699b1115 }, + }, + { + { 0x23c1c473b50d6, 0x3e7671de21d48, 0x326fa5547a1e8, 0x50e4dc25fafd9, 0x00731fbc78f89 }, + { 0x66f9b3953b61d, 0x555f4283cccb9, 0x7dd67fb1960e7, 0x14707a1affed4, 0x021142e9c2b1c }, + { 0x0c71848f81880, 0x44bd9d8233c86, 0x6e8578efe5830, 0x4045b6d7041b5, 0x4c4d6f3347e15 }, + }, + { + { 0x4ddfc988f1970, 0x4f6173ea365e1, 0x645daf9ae4588, 0x7d43763db623b, 0x38bf9500a88f9 }, + { 0x7eccfc17d1fc9, 0x4ca280782831e, 0x7b8337db1d7d6, 0x5116def3895fb, 0x193fddaaa7e47 }, + { 0x2c93c37e8876f, 0x3431a28c583fa, 0x49049da8bd879, 0x4b4a8407ac11c, 0x6a6fb99ebf0d4 }, + }, + { + { 0x122b5b6e423c6, 0x21e50dff1ddd6, 0x73d76324e75c0, 0x588485495418e, 0x136fda9f42c5e }, + { 0x6c1bb560855eb, 0x71f127e13ad48, 0x5c6b304905aec, 0x3756b8e889bc7, 0x75f76914a3189 }, + { 0x4dfb1a305bdd1, 0x3b3ff05811f29, 0x6ed62283cd92e, 0x65d1543ec52e1, 0x022183510be8d }, + }, + { + { 0x2710143307a7f, 0x3d88fb48bf3ab, 0x249eb4ec18f7a, 0x136115dff295f, 0x1387c441fd404 }, + { 0x766385ead2d14, 0x0194f8b06095e, 0x08478f6823b62, 0x6018689d37308, 0x6a071ce17b806 }, + { 0x3c3d187978af8, 0x7afe1c88276ba, 0x51df281c8ad68, 0x64906bda4245d, 0x3171b26aaf1ed }, + }, + { + { 0x5b7d8b28a47d1, 0x2c2ee149e34c1, 0x776f5629afc53, 0x1f4ea50fc49a9, 0x6c514a6334424 }, + { 0x7319097564ca8, 0x1844ebc233525, 0x21d4543fdeee1, 0x1ad27aaff1bd2, 0x221fd4873cf08 }, + { 0x2204f3a156341, 0x537414065a464, 0x43c0c3bedcf83, 0x5557e706ea620, 0x48daa596fb924 }, + }, + { + { 0x61d5dc84c9793, 0x47de83040c29e, 0x189deb26507e7, 0x4d4e6fadc479a, 0x58c837fa0e8a7 }, + { 0x28e665ca59cc7, 0x165c715940dd9, 0x0785f3aa11c95, 0x57b98d7e38469, 0x676dd6fccad84 }, + { 0x1688596fc9058, 0x66f6ad403619f, 0x4d759a87772ef, 0x7856e6173bea4, 0x1c4f73f2c6a57 }, + }, + { + { 0x6706efc7c3484, 0x6987839ec366d, 0x0731f95cf7f26, 0x3ae758ebce4bc, 0x70459adb7daf6 }, + { 0x24fbd305fa0bb, 0x40a98cc75a1cf, 0x78ce1220a7533, 0x6217a10e1c197, 0x795ac80d1bf64 }, + { 0x1db4991b42bb3, 0x469605b994372, 0x631e3715c9a58, 0x7e9cfefcf728f, 0x5fe162848ce21 }, + }, +}, +{ + { + { 0x1852d5d7cb208, 0x60d0fbe5ce50f, 0x5a1e246e37b75, 0x51aee05ffd590, 0x2b44c043677da }, + { 0x1214fe194961a, 0x0e1ae39a9e9cb, 0x543c8b526f9f7, 0x119498067e91d, 0x4789d446fc917 }, + { 0x487ab074eb78e, 0x1d33b5e8ce343, 0x13e419feb1b46, 0x2721f565de6a4, 0x60c52eef2bb9a }, + }, + { + { 0x3c5c27cae6d11, 0x36a9491956e05, 0x124bac9131da6, 0x3b6f7de202b5d, 0x70d77248d9b66 }, + { 0x589bc3bfd8bf1, 0x6f93e6aa3416b, 0x4c0a3d6c1ae48, 0x55587260b586a, 0x10bc9c312ccfc }, + { 0x2e84b3ec2a05b, 0x69da2f03c1551, 0x23a174661a67b, 0x209bca289f238, 0x63755bd3a976f }, + }, + { + { 0x7101897f1acb7, 0x3d82cb77b07b8, 0x684083d7769f5, 0x52b28472dce07, 0x2763751737c52 }, + { 0x7a03e2ad10853, 0x213dcc6ad36ab, 0x1a6e240d5bdd6, 0x7c24ffcf8fedf, 0x0d8cc1c48bc16 }, + { 0x402d36eb419a9, 0x7cef68c14a052, 0x0f1255bc2d139, 0x373e7d431186a, 0x70c2dd8a7ad16 }, + }, + { + { 0x4967db8ed7e13, 0x15aeed02f523a, 0x6149591d094bc, 0x672f204c17006, 0x32b8613816a53 }, + { 0x194509f6fec0e, 0x528d8ca31acac, 0x7826d73b8b9fa, 0x24acb99e0f9b3, 0x2e0fac6363948 }, + { 0x7f7bee448cd64, 0x4e10f10da0f3c, 0x3936cb9ab20e9, 0x7a0fc4fea6cd0, 0x4179215c735a4 }, + }, + { + { 0x633b9286bcd34, 0x6cab3badb9c95, 0x74e387edfbdfa, 0x14313c58a0fd9, 0x31fa85662241c }, + { 0x094e7d7dced2a, 0x068fa738e118e, 0x41b640a5fee2b, 0x6bb709df019d4, 0x700344a30cd99 }, + { 0x26c422e3622f4, 0x0f3066a05b5f0, 0x4e2448f0480a6, 0x244cde0dbf095, 0x24bb2312a9952 }, + }, + { + { 0x00c2af5f85c6b, 0x0609f4cf2883f, 0x6e86eb5a1ca13, 0x68b44a2efccd1, 0x0d1d2af9ffeb5 }, + { 0x0ed1732de67c3, 0x308c369291635, 0x33ef348f2d250, 0x004475ea1a1bb, 0x0fee3e871e188 }, + { 0x28aa132621edf, 0x42b244caf353b, 0x66b064cc2e08a, 0x6bb20020cbdd3, 0x16acd79718531 }, + }, + { + { 0x1c6c57887b6ad, 0x5abf21fd7592b, 0x50bd41253867a, 0x3800b71273151, 0x164ed34b18161 }, + { 0x772af2d9b1d3d, 0x6d486448b4e5b, 0x2ce58dd8d18a8, 0x1849f67503c8b, 0x123e0ef6b9302 }, + { 0x6d94c192fe69a, 0x5475222a2690f, 0x693789d86b8b3, 0x1f5c3bdfb69dc, 0x78da0fc61073f }, + }, + { + { 0x780f1680c3a94, 0x2a35d3cfcd453, 0x005e5cdc7ddf8, 0x6ee888078ac24, 0x054aa4b316b38 }, + { 0x15d28e52bc66a, 0x30e1e0351cb7e, 0x30a2f74b11f8c, 0x39d120cd7de03, 0x2d25deeb256b1 }, + { 0x0468d19267cb8, 0x38cdca9b5fbf9, 0x1bbb05c2ca1e2, 0x3b015758e9533, 0x134610a6ab7da }, + }, +}, +{ + { + { 0x265e777d1f515, 0x0f1f54c1e39a5, 0x2f01b95522646, 0x4fdd8db9dde6d, 0x654878cba97cc }, + { 0x38ec78df6b0fe, 0x13caebea36a22, 0x5ebc6e54e5f6a, 0x32804903d0eb8, 0x2102fdba2b20d }, + { 0x6e405055ce6a1, 0x5024a35a532d3, 0x1f69054daf29d, 0x15d1d0d7a8bd5, 0x0ad725db29ecb }, + }, + { + { 0x7bc0c9b056f85, 0x51cfebffaffd8, 0x44abbe94df549, 0x7ecbbd7e33121, 0x4f675f5302399 }, + { 0x267b1834e2457, 0x6ae19c378bb88, 0x7457b5ed9d512, 0x3280d783d05fb, 0x4aefcffb71a03 }, + { 0x536360415171e, 0x2313309077865, 0x251444334afbc, 0x2b0c3853756e8, 0x0bccbb72a2a86 }, + }, + { + { 0x55e4c50fe1296, 0x05fdd13efc30d, 0x1c0c6c380e5ee, 0x3e11de3fb62a8, 0x6678fd69108f3 }, + { 0x6962feab1a9c8, 0x6aca28fb9a30b, 0x56db7ca1b9f98, 0x39f58497018dd, 0x4024f0ab59d6b }, + { 0x6fa31636863c2, 0x10ae5a67e42b0, 0x27abbf01fda31, 0x380a7b9e64fbc, 0x2d42e2108ead4 }, + }, + { + { 0x17b0d0f537593, 0x16263c0c9842e, 0x4ab827e4539a4, 0x6370ddb43d73a, 0x420bf3a79b423 }, + { 0x5131594dfd29b, 0x3a627e98d52fe, 0x1154041855661, 0x19175d09f8384, 0x676b2608b8d2d }, + { 0x0ba651c5b2b47, 0x5862363701027, 0x0c4d6c219c6db, 0x0f03dff8658de, 0x745d2ffa9c0cf }, + }, + { + { 0x6df5721d34e6a, 0x4f32f767a0c06, 0x1d5abeac76e20, 0x41ce9e104e1e4, 0x06e15be54c1dc }, + { 0x25a1e2bc9c8bd, 0x104c8f3b037ea, 0x405576fa96c98, 0x2e86a88e3876f, 0x1ae23ceb960cf }, + { 0x25d871932994a, 0x6b9d63b560b6e, 0x2df2814c8d472, 0x0fbbee20aa4ed, 0x58ded861278ec }, + }, + { + { 0x35ba8b6c2c9a8, 0x1dea58b3185bf, 0x4b455cd23bbbe, 0x5ec19c04883f8, 0x08ba696b531d5 }, + { 0x73793f266c55c, 0x0b988a9c93b02, 0x09b0ea32325db, 0x37cae71c17c5e, 0x2ff39de85485f }, + { 0x53eeec3efc57a, 0x2fa9fe9022efd, 0x699c72c138154, 0x72a751ebd1ff8, 0x120633b4947cf }, + }, + { + { 0x531474912100a, 0x5afcdf7c0d057, 0x7a9e71b788ded, 0x5ef708f3b0c88, 0x07433be3cb393 }, + { 0x4987891610042, 0x79d9d7f5d0172, 0x3c293013b9ec4, 0x0c2b85f39caca, 0x35d30a99b4d59 }, + { 0x144c05ce997f4, 0x4960b8a347fef, 0x1da11f15d74f7, 0x54fac19c0fead, 0x2d873ede7af6d }, + }, + { + { 0x202e14e5df981, 0x2ea02bc3eb54c, 0x38875b2883564, 0x1298c513ae9dd, 0x0543618a01600 }, + { 0x2316443373409, 0x5de95503b22af, 0x699201beae2df, 0x3db5849ff737a, 0x2e773654707fa }, + { 0x2bdf4974c23c1, 0x4b3b9c8d261bd, 0x26ae8b2a9bc28, 0x3068210165c51, 0x4b1443362d079 }, + }, +}, +{ + { + { 0x454e91c529ccb, 0x24c98c6bf72cf, 0x0486594c3d89a, 0x7ae13a3d7fa3c, 0x17038418eaf66 }, + { 0x4b7c7b66e1f7a, 0x4bea185efd998, 0x4fabc711055f8, 0x1fb9f7836fe38, 0x582f446752da6 }, + { 0x17bd320324ce4, 0x51489117898c6, 0x1684d92a0410b, 0x6e4d90f78c5a7, 0x0c2a1c4bcda28 }, + }, + { + { 0x4814869bd6945, 0x7b7c391a45db8, 0x57316ac35b641, 0x641e31de9096a, 0x5a6a9b30a314d }, + { 0x5c7d06f1f0447, 0x7db70f80b3a49, 0x6cb4a3ec89a78, 0x43be8ad81397d, 0x7c558bd1c6f64 }, + { 0x41524d396463d, 0x1586b449e1a1d, 0x2f17e904aed8a, 0x7e1d2861d3c8e, 0x0404a5ca0afba }, + }, + { + { 0x49e1b2a416fd1, 0x51c6a0b316c57, 0x575a59ed71bdc, 0x74c021a1fec1e, 0x39527516e7f8e }, + { 0x740070aa743d6, 0x16b64cbdd1183, 0x23f4b7b32eb43, 0x319aba58235b3, 0x46395bfdcadd9 }, + { 0x7db2d1a5d9a9c, 0x79a200b85422f, 0x355bfaa71dd16, 0x00b77ea5f78aa, 0x76579a29e822d }, + }, + { + { 0x4b51352b434f2, 0x1327bd01c2667, 0x434d73b60c8a1, 0x3e0daa89443ba, 0x02c514bb2a277 }, + { 0x68e7e49c02a17, 0x45795346fe8b6, 0x089306c8f3546, 0x6d89f6b2f88f6, 0x43a384dc9e05b }, + { 0x3d5da8bf1b645, 0x7ded6a96a6d09, 0x6c3494fee2f4d, 0x02c989c8b6bd4, 0x1160920961548 }, + }, + { + { 0x05616369b4dcd, 0x4ecab86ac6f47, 0x3c60085d700b2, 0x0213ee10dfcea, 0x2f637d7491e6e }, + { 0x5166929dacfaa, 0x190826b31f689, 0x4f55567694a7d, 0x705f4f7b1e522, 0x351e125bc5698 }, + { 0x49b461af67bbe, 0x75915712c3a96, 0x69a67ef580c0d, 0x54d38ef70cffc, 0x7f182d06e7ce2 }, + }, + { + { 0x54b728e217522, 0x69a90971b0128, 0x51a40f2a963a3, 0x10be9ac12a6bf, 0x44acc043241c5 }, + { 0x48e64ab0168ec, 0x2a2bdb8a86f4f, 0x7343b6b2d6929, 0x1d804aa8ce9a3, 0x67d4ac8c343e9 }, + { 0x56bbb4f7a5777, 0x29230627c238f, 0x5ad1a122cd7fb, 0x0dea56e50e364, 0x556d1c8312ad7 }, + }, + { + { 0x06756b11be821, 0x462147e7bb03e, 0x26519743ebfe0, 0x782fc59682ab5, 0x097abe38cc8c7 }, + { 0x740e30c8d3982, 0x7c2b47f4682fd, 0x5cd91b8c7dc1c, 0x77fa790f9e583, 0x746c6c6d1d824 }, + { 0x1c9877ea52da4, 0x2b37b83a86189, 0x733af49310da5, 0x25e81161c04fb, 0x577e14a34bee8 }, + }, + { + { 0x6cebebd4dd72b, 0x340c1e442329f, 0x32347ffd1a93f, 0x14a89252cbbe0, 0x705304b8fb009 }, + { 0x268ac61a73b0a, 0x206f234bebe1c, 0x5b403a7cbebe8, 0x7a160f09f4135, 0x60fa7ee96fd78 }, + { 0x51d354d296ec6, 0x7cbf5a63b16c7, 0x2f50bb3cf0c14, 0x1feb385cac65a, 0x21398e0ca1635 }, + }, +}, +{ + { + { 0x0aaf9b4b75601, 0x26b91b5ae44f3, 0x6de808d7ab1c8, 0x6a769675530b0, 0x1bbfb284e98f7 }, + { 0x5058a382b33f3, 0x175a91816913e, 0x4f6cdb96b8ae8, 0x17347c9da81d2, 0x5aa3ed9d95a23 }, + { 0x777e9c7d96561, 0x28e58f006ccac, 0x541bbbb2cac49, 0x3e63282994cec, 0x4a07e14e5e895 }, + }, + { + { 0x358cdc477a49b, 0x3cc88fe02e481, 0x721aab7f4e36b, 0x0408cc9469953, 0x50af7aed84afa }, + { 0x412cb980df999, 0x5e78dd8ee29dc, 0x171dff68c575d, 0x2015dd2f6ef49, 0x3f0bac391d313 }, + { 0x7de0115f65be5, 0x4242c21364dc9, 0x6b75b64a66098, 0x0033c0102c085, 0x1921a316baebd }, + }, + { + { 0x2ad9ad9f3c18b, 0x5ec1638339aeb, 0x5703b6559a83b, 0x3fa9f4d05d612, 0x7b049deca062c }, + { 0x22f7edfb870fc, 0x569eed677b128, 0x30937dcb0a5af, 0x758039c78ea1b, 0x6458df41e273a }, + { 0x3e37a35444483, 0x661fdb7d27b99, 0x317761dd621e4, 0x7323c30026189, 0x6093dccbc2950 }, + }, + { + { 0x6eebe6084034b, 0x6cf01f70a8d7b, 0x0b41a54c6670a, 0x6c84b99bb55db, 0x6e3180c98b647 }, + { 0x39a8585e0706d, 0x3167ce72663fe, 0x63d14ecdb4297, 0x4be21dcf970b8, 0x57d1ea084827a }, + { 0x2b6e7a128b071, 0x5b27511755dcf, 0x08584c2930565, 0x68c7bda6f4159, 0x363e999ddd97b }, + }, + { + { 0x048dce24baec6, 0x2b75795ec05e3, 0x3bfa4c5da6dc9, 0x1aac8659e371e, 0x231f979bc6f9b }, + { 0x043c135ee1fc4, 0x2a11c9919f2d5, 0x6334cc25dbacd, 0x295da17b400da, 0x48ee9b78693a0 }, + { 0x1de4bcc2af3c6, 0x61fc411a3eb86, 0x53ed19ac12ec0, 0x209dbc6b804e0, 0x079bfa9b08792 }, + }, + { + { 0x1ed80a2d54245, 0x70efec72a5e79, 0x42151d42a822d, 0x1b5ebb6d631e8, 0x1ef4fb1594706 }, + { 0x03a51da300df4, 0x467b52b561c72, 0x4d5920210e590, 0x0ca769e789685, 0x038c77f684817 }, + { 0x65ee65b167bec, 0x052da19b850a9, 0x0408665656429, 0x7ab39596f9a4c, 0x575ee92a4a0bf }, + }, + { + { 0x6bc450aa4d801, 0x4f4a6773b0ba8, 0x6241b0b0ebc48, 0x40d9c4f1d9315, 0x200a1e7e382f5 }, + { 0x080908a182fcf, 0x0532913b7ba98, 0x3dccf78c385c3, 0x68002dd5eaba9, 0x43d4e7112cd3f }, + { 0x5b967eaf93ac5, 0x360acca580a31, 0x1c65fd5c6f262, 0x71c7f15c2ecab, 0x050eca52651e4 }, + }, + { + { 0x4397660e668ea, 0x7c2a75692f2f5, 0x3b29e7e6c66ef, 0x72ba658bcda9a, 0x6151c09fa131a }, + { 0x31ade453f0c9c, 0x3dfee07737868, 0x611ecf7a7d411, 0x2637e6cbd64f6, 0x4b0ee6c21c58f }, + { 0x55c0dfdf05d96, 0x405569dcf475e, 0x05c5c277498bb, 0x18588d95dc389, 0x1fef24fa800f0 }, + }, +}, +{ + { + { 0x2aff530976b86, 0x0d85a48c0845a, 0x796eb963642e0, 0x60bee50c4b626, 0x28005fe6c8340 }, + { 0x653fb1aa73196, 0x607faec8306fa, 0x4e85ec83e5254, 0x09f56900584fd, 0x544d49292fc86 }, + { 0x7ba9f34528688, 0x284a20fb42d5d, 0x3652cd9706ffe, 0x6fd7baddde6b3, 0x72e472930f316 }, + }, + { + { 0x3f635d32a7627, 0x0cbecacde00fe, 0x3411141eaa936, 0x21c1e42f3cb94, 0x1fee7f000fe06 }, + { 0x5208c9781084f, 0x16468a1dc24d2, 0x7bf780ac540a8, 0x1a67eced75301, 0x5a9d2e8c2733a }, + { 0x305da03dbf7e5, 0x1228699b7aeca, 0x12a23b2936bc9, 0x2a1bda56ae6e9, 0x00f94051ee040 }, + }, + { + { 0x793bb07af9753, 0x1e7b6ecd4fafd, 0x02c7b1560fb43, 0x2296734cc5fb7, 0x47b7ffd25dd40 }, + { 0x56b23c3d330b2, 0x37608e360d1a6, 0x10ae0f3c8722e, 0x086d9b618b637, 0x07d79c7e8beab }, + { 0x3fb9cbc08dd12, 0x75c3dd85370ff, 0x47f06fe2819ac, 0x5db06ab9215ed, 0x1c3520a35ea64 }, + }, + { + { 0x06f40216bc059, 0x3a2579b0fd9b5, 0x71c26407eec8c, 0x72ada4ab54f0b, 0x38750c3b66d12 }, + { 0x253a6bccba34a, 0x427070433701a, 0x20b8e58f9870e, 0x337c861db00cc, 0x1c3d05775d0ee }, + { 0x6f1409422e51a, 0x7856bbece2d25, 0x13380a72f031c, 0x43e1080a7f3ba, 0x0621e2c7d3304 }, + }, + { + { 0x61796b0dbf0f3, 0x73c2f9c32d6f5, 0x6aa8ed1537ebe, 0x74e92c91838f4, 0x5d8e589ca1002 }, + { 0x060cc8259838d, 0x038d3f35b95f3, 0x56078c243a923, 0x2de3293241bb2, 0x0007d6097bd3a }, + { 0x71d950842a94b, 0x46b11e5c7d817, 0x5478bbecb4f0d, 0x7c3054b0a1c5d, 0x1583d7783c1cb }, + }, + { + { 0x34704cc9d28c7, 0x3dee598b1f200, 0x16e1c98746d9e, 0x4050b7095afdf, 0x4958064e83c55 }, + { 0x6a2ef5da27ae1, 0x28aace02e9d9d, 0x02459e965f0e8, 0x7b864d3150933, 0x252a5f2e81ed8 }, + { 0x094265066e80d, 0x0a60f918d61a5, 0x0444bf7f30fde, 0x1c40da9ed3c06, 0x079c170bd843b }, + }, + { + { 0x6cd50c0d5d056, 0x5b7606ae779ba, 0x70fbd226bdda1, 0x5661e53391ff9, 0x6768c0d7317b8 }, + { 0x6ece464fa6fff, 0x3cc40bca460a0, 0x6e3a90afb8d0c, 0x5801abca11228, 0x6dec05e34ac9f }, + { 0x625e5f155c1b3, 0x4f32f6f723296, 0x5ac980105efce, 0x17a61165eee36, 0x51445e14ddcd5 }, + }, + { + { 0x147ab2bbea455, 0x1f240f2253126, 0x0c3de9e314e89, 0x21ea5a4fca45f, 0x12e990086e4fd }, + { 0x02b4b3b144951, 0x5688977966aea, 0x18e176e399ffd, 0x2e45c5eb4938b, 0x13186f31e3929 }, + { 0x496b37fdfbb2e, 0x3c2439d5f3e21, 0x16e60fe7e6a4d, 0x4d7ef889b621d, 0x77b2e3f05d3e9 }, + }, +}, +{ + { + { 0x0639c12ddb0a4, 0x6180490cd7ab3, 0x3f3918297467c, 0x74568be1781ac, 0x07a195152e095 }, + { 0x7a9c59c2ec4de, 0x7e9f09e79652d, 0x6a3e422f22d86, 0x2ae8e3b836c8b, 0x63b795fc7ad32 }, + { 0x68f02389e5fc8, 0x059f1bc877506, 0x504990e410cec, 0x09bd7d0feaee2, 0x3e8fe83d032f0 }, + }, + { + { 0x04c8de8efd13c, 0x1c67c06e6210e, 0x183378f7f146a, 0x64352ceaed289, 0x22d60899a6258 }, + { 0x315b90570a294, 0x60ce108a925f1, 0x6eff61253c909, 0x003ef0e2d70b0, 0x75ba3b797fac4 }, + { 0x1dbc070cdd196, 0x16d8fb1534c47, 0x500498183fa2a, 0x72f59c423de75, 0x0904d07b87779 }, + }, + { + { 0x22d6648f940b9, 0x197a5a1873e86, 0x207e4c41a54bc, 0x5360b3b4bd6d0, 0x6240aacebaf72 }, + { 0x61fd4ddba919c, 0x7d8e991b55699, 0x61b31473cc76c, 0x7039631e631d6, 0x43e2143fbc1dd }, + { 0x4749c5ba295a0, 0x37946fa4b5f06, 0x724c5ab5a51f1, 0x65633789dd3f3, 0x56bdaf238db40 }, + }, + { + { 0x0d36cc19d3bb2, 0x6ec4470d72262, 0x6853d7018a9ae, 0x3aa3e4dc2c8eb, 0x03aa31507e1e5 }, + { 0x2b9e3f53533eb, 0x2add727a806c5, 0x56955c8ce15a3, 0x18c4f070a290e, 0x1d24a86d83741 }, + { 0x47648ffd4ce1f, 0x60a9591839e9d, 0x424d5f38117ab, 0x42cc46912c10e, 0x43b261dc9aeb4 }, + }, + { + { 0x13d8b6c951364, 0x4c0017e8f632a, 0x53e559e53f9c4, 0x4b20146886eea, 0x02b4d5e242940 }, + { 0x31e1988bb79bb, 0x7b82f46b3bcab, 0x0f7a8ce827b41, 0x5e15816177130, 0x326055cf5b276 }, + { 0x155cb28d18df2, 0x0c30d9ca11694, 0x2090e27ab3119, 0x208624e7a49b6, 0x27a6c809ae5d3 }, + }, + { + { 0x4270ac43d6954, 0x2ed4cd95659a5, 0x75c0db37528f9, 0x2ccbcfd2c9234, 0x221503603d8c2 }, + { 0x6ebcd1f0db188, 0x74ceb4b7d1174, 0x7d56168df4f5c, 0x0bf79176fd18a, 0x2cb67174ff60a }, + { 0x6cdf9390be1d0, 0x08e519c7e2b3d, 0x253c3d2a50881, 0x21b41448e333d, 0x7b1df4b73890f }, + }, + { + { 0x6221807f8f58c, 0x3fa92813a8be5, 0x6da98c38d5572, 0x01ed95554468f, 0x68698245d352e }, + { 0x2f2e0b3b2a224, 0x0c56aa22c1c92, 0x5fdec39f1b278, 0x4c90af5c7f106, 0x61fcef2658fc5 }, + { 0x15d852a18187a, 0x270dbb59afb76, 0x7db120bcf92ab, 0x0e7a25d714087, 0x46cf4c473daf0 }, + }, + { + { 0x46ea7f1498140, 0x70725690a8427, 0x0a73ae9f079fb, 0x2dd924461c62b, 0x1065aae50d8cc }, + { 0x525ed9ec4e5f9, 0x022d20660684c, 0x7972b70397b68, 0x7a03958d3f965, 0x29387bcd14eb5 }, + { 0x44525df200d57, 0x2d7f94ce94385, 0x60d00c170ecb7, 0x38b0503f3d8f0, 0x69a198e64f1ce }, + }, +}, +{ + { + { 0x14434dcc5caed, 0x2c7909f667c20, 0x61a839d1fb576, 0x4f23800cabb76, 0x25b2697bd267f }, + { 0x2b2e0d91a78bc, 0x3990a12ccf20c, 0x141c2e11f2622, 0x0dfcefaa53320, 0x7369e6a92493a }, + { 0x73ffb13986864, 0x3282bb8f713ac, 0x49ced78f297ef, 0x6697027661def, 0x1420683db54e4 }, + }, + { + { 0x6bb6fc1cc5ad0, 0x532c8d591669d, 0x1af794da86c33, 0x0e0e9d86d24d3, 0x31e83b4161d08 }, + { 0x0bd1e249dd197, 0x00bcb1820568f, 0x2eab1718830d4, 0x396fd816997e6, 0x60b63bebf508a }, + { 0x0c7129e062b4f, 0x1e526415b12fd, 0x461a0fd27923d, 0x18badf670a5b7, 0x55cf1eb62d550 }, + }, + { + { 0x6b5e37df58c52, 0x3bcf33986c60e, 0x44fb8835ceae7, 0x099dec18e71a4, 0x1a56fbaa62ba0 }, + { 0x1101065c23d58, 0x5aa1290338b0f, 0x3157e9e2e7421, 0x0ea712017d489, 0x669a656457089 }, + { 0x66b505c9dc9ec, 0x774ef86e35287, 0x4d1d944c0955e, 0x52e4c39d72b20, 0x13c4836799c58 }, + }, + { + { 0x4fb6a5d8bd080, 0x58ae34908589b, 0x3954d977baf13, 0x413ea597441dc, 0x50bdc87dc8e5b }, + { 0x25d465ab3e1b9, 0x0f8fe27ec2847, 0x2d6e6dbf04f06, 0x3038cfc1b3276, 0x66f80c93a637b }, + { 0x537836edfe111, 0x2be02357b2c0d, 0x6dcee58c8d4f8, 0x2d732581d6192, 0x1dd56444725fd }, + }, + { + { 0x7e60008bac89a, 0x23d5c387c1852, 0x79e5df1f533a8, 0x2e6f9f1c5f0cf, 0x3a3a450f63a30 }, + { 0x47ff83362127d, 0x08e39af82b1f4, 0x488322ef27dab, 0x1973738a2a1a4, 0x0e645912219f7 }, + { 0x72f31d8394627, 0x07bd294a200f1, 0x665be00e274c6, 0x43de8f1b6368b, 0x318c8d9393a9a }, + }, + { + { 0x69e29ab1dd398, 0x30685b3c76bac, 0x565cf37f24859, 0x57b2ac28efef9, 0x509a41c325950 }, + { 0x45d032afffe19, 0x12fe49b6cde4e, 0x21663bc327cf1, 0x18a5e4c69f1dd, 0x224c7c679a1d5 }, + { 0x06edca6f925e9, 0x68c8363e677b8, 0x60cfa25e4fbcf, 0x1c4c17609404e, 0x05bff02328a11 }, + }, + { + { 0x1a0dd0dc512e4, 0x10894bf5fcd10, 0x52949013f9c37, 0x1f50fba4735c7, 0x576277cdee01a }, + { 0x2137023cae00b, 0x15a3599eb26c6, 0x0687221512b3c, 0x253cb3a0824e9, 0x780b8cc3fa2a4 }, + { 0x38abc234f305f, 0x7a280bbc103de, 0x398a836695dfe, 0x3d0af41528a1a, 0x5ff418726271b }, + }, + { + { 0x347e813b69540, 0x76864c21c3cbb, 0x1e049dbcd74a8, 0x5b4d60f93749c, 0x29d4db8ca0a0c }, + { 0x6080c1789db9d, 0x4be7cef1ea731, 0x2f40d769d8080, 0x35f7d4c44a603, 0x106a03dc25a96 }, + { 0x50aaf333353d0, 0x4b59a613cbb35, 0x223dfc0e19a76, 0x77d1e2bb2c564, 0x4ab38a51052cb }, + }, +}, +{ + { + { 0x7d1ef5fddc09c, 0x7beeaebb9dad9, 0x058d30ba0acfb, 0x5cd92eab5ae90, 0x3041c6bb04ed2 }, + { 0x42b256768d593, 0x2e88459427b4f, 0x02b3876630701, 0x34878d405eae5, 0x29cdd1adc088a }, + { 0x2f2f9d956e148, 0x6b3e6ad65c1fe, 0x5b00972b79e5d, 0x53d8d234c5daf, 0x104bbd6814049 }, + }, + { + { 0x59a5fd67ff163, 0x3a998ead0352b, 0x083c95fa4af9a, 0x6fadbfc01266f, 0x204f2a20fb072 }, + { 0x0fd3168f1ed67, 0x1bb0de7784a3e, 0x34bcb78b20477, 0x0a4a26e2e2182, 0x5be8cc57092a7 }, + { 0x43b3d30ebb079, 0x357aca5c61902, 0x5b570c5d62455, 0x30fb29e1e18c7, 0x2570fb17c2791 }, + }, + { + { 0x6a9550bb8245a, 0x511f20a1a2325, 0x29324d7239bee, 0x3343cc37516c4, 0x241c5f91de018 }, + { 0x2367f2cb61575, 0x6c39ac04d87df, 0x6d4958bd7e5bd, 0x566f4638a1532, 0x3dcb65ea53030 }, + { 0x0172940de6caa, 0x6045b2e67451b, 0x56c07463efcb3, 0x0728b6bfe6e91, 0x08420edd5fcdf }, + }, + { + { 0x0c34e04f410ce, 0x344edc0d0a06b, 0x6e45486d84d6d, 0x44e2ecb3863f5, 0x04d654f321db8 }, + { 0x720ab8362fa4a, 0x29c4347cdd9bf, 0x0e798ad5f8463, 0x4fef18bcb0bfe, 0x0d9a53efbc176 }, + { 0x5c116ddbdb5d5, 0x6d1b4bba5abcf, 0x4d28a48a5537a, 0x56b8e5b040b99, 0x4a7a4f2618991 }, + }, + { + { 0x3b291af372a4b, 0x60e3028fe4498, 0x2267bca4f6a09, 0x719eec242b243, 0x4a96314223e0e }, + { 0x718025fb15f95, 0x68d6b8371fe94, 0x3804448f7d97c, 0x42466fe784280, 0x11b50c4cddd31 }, + { 0x0274408a4ffd6, 0x7d382aedb34dd, 0x40acfc9ce385d, 0x628bb99a45b1e, 0x4f4bce4dce6bc }, + }, + { + { 0x2616ec49d0b6f, 0x1f95d8462e61c, 0x1ad3e9b9159c6, 0x79ba475a04df9, 0x3042cee561595 }, + { 0x7ce5ae2242584, 0x2d25eb153d4e3, 0x3a8f3d09ba9c9, 0x0f3690d04eb8e, 0x73fcdd14b71c0 }, + { 0x67079449bac41, 0x5b79c4621484f, 0x61069f2156b8d, 0x0eb26573b10af, 0x389e740c9a9ce }, + }, + { + { 0x578f6570eac28, 0x644f2339c3937, 0x66e47b7956c2c, 0x34832fe1f55d0, 0x25c425e5d6263 }, + { 0x4b3ae34dcb9ce, 0x47c691a15ac9f, 0x318e06e5d400c, 0x3c422d9f83eb1, 0x61545379465a6 }, + { 0x606a6f1d7de6e, 0x4f1c0c46107e7, 0x229b1dcfbe5d8, 0x3acc60a7b1327, 0x6539a08915484 }, + }, + { + { 0x4dbd414bb4a19, 0x7930849f1dbb8, 0x329c5a466caf0, 0x6c824544feb9b, 0x0f65320ef019b }, + { 0x21f74c3d2f773, 0x024b88d08bd3a, 0x6e678cf054151, 0x43631272e747c, 0x11c5e4aac5cd1 }, + { 0x6d1b1cafde0c6, 0x462c76a303a90, 0x3ca4e693cff9b, 0x3952cd45786fd, 0x4cabc7bdec330 }, + }, +}, +{ + { + { 0x7788f3f78d289, 0x5942809b3f811, 0x5973277f8c29c, 0x010f93bc5fe67, 0x7ee498165acb2 }, + { 0x69624089c0a2e, 0x0075fc8e70473, 0x13e84ab1d2313, 0x2c10bedf6953b, 0x639b93f0321c8 }, + { 0x508e39111a1c3, 0x290120e912f7a, 0x1cbf464acae43, 0x15373e9576157, 0x0edf493c85b60 }, + }, + { + { 0x7c4d284764113, 0x7fefebf06acec, 0x39afb7a824100, 0x1b48e47e7fd65, 0x04c00c54d1dfa }, + { 0x48158599b5a68, 0x1fd75bc41d5d9, 0x2d9fc1fa95d3c, 0x7da27f20eba11, 0x403b92e3019d4 }, + { 0x22f818b465cf8, 0x342901dff09b8, 0x31f595dc683cd, 0x37a57745fd682, 0x355bb12ab2617 }, + }, + { + { 0x1dac75a8c7318, 0x3b679d5423460, 0x6b8fcb7b6400e, 0x6c73783be5f9d, 0x7518eaf8e052a }, + { 0x664cc7493bbf4, 0x33d94761874e3, 0x0179e1796f613, 0x1890535e2867d, 0x0f9b8132182ec }, + { 0x059c41b7f6c32, 0x79e8706531491, 0x6c747643cb582, 0x2e20c0ad494e4, 0x47c3871bbb175 }, + }, + { + { 0x65d50c85066b0, 0x6167453361f7c, 0x06ba3818bb312, 0x6aff29baa7522, 0x08fea02ce8d48 }, + { 0x4539771ec4f48, 0x7b9318badca28, 0x70f19afe016c5, 0x4ee7bb1608d23, 0x00b89b8576469 }, + { 0x5dd7668deead0, 0x4096d0ba47049, 0x6275997219114, 0x29bda8a67e6ae, 0x473829a74f75d }, + }, + { + { 0x1533aad3902c9, 0x1dde06b11e47b, 0x784bed1930b77, 0x1c80a92b9c867, 0x6c668b4d44e4d }, + { 0x2da754679c418, 0x3164c31be105a, 0x11fac2b98ef5f, 0x35a1aaf779256, 0x2078684c4833c }, + { 0x0cf217a78820c, 0x65024e7d2e769, 0x23bb5efdda82a, 0x19fd4b632d3c6, 0x7411a6054f8a4 }, + }, + { + { 0x2e53d18b175b4, 0x33e7254204af3, 0x3bcd7d5a1c4c5, 0x4c7c22af65d0f, 0x1ec9a872458c3 }, + { 0x59d32b99dc86d, 0x6ac075e22a9ac, 0x30b9220113371, 0x27fd9a638966e, 0x7c136574fb813 }, + { 0x6a4d400a2509b, 0x041791056971c, 0x655d5866e075c, 0x2302bf3e64df8, 0x3add88a5c7cd6 }, + }, + { + { 0x298d459393046, 0x30bfecb3d90b8, 0x3d9b8ea3df8d6, 0x3900e96511579, 0x61ba1131a406a }, + { 0x15770b635dcf2, 0x59ecd83f79571, 0x2db461c0b7fbd, 0x73a42a981345f, 0x249929fccc879 }, + { 0x0a0f116959029, 0x5974fd7b1347a, 0x1e0cc1c08edad, 0x673bdf8ad1f13, 0x5620310cbbd8e }, + }, + { + { 0x6b5f477e285d6, 0x4ed91ec326cc8, 0x6d6537503a3fd, 0x626d3763988d5, 0x7ec846f3658ce }, + { 0x193434934d643, 0x0d4a2445eaa51, 0x7d0708ae76fe0, 0x39847b6c3c7e1, 0x37676a2a4d9d9 }, + { 0x68f3f1da22ec7, 0x6ed8039a2736b, 0x2627ee04c3c75, 0x6ea90a647e7d1, 0x6daaf723399b9 }, + }, +}, +{ + { + { 0x304bfacad8ea2, 0x502917d108b07, 0x043176ca6dd0f, 0x5d5158f2c1d84, 0x2b5449e58eb3b }, + { 0x27562eb3dbe47, 0x291d7b4170be7, 0x5d1ca67dfa8e1, 0x2a88061f298a2, 0x1304e9e71627d }, + { 0x014d26adc9cfe, 0x7f1691ba16f13, 0x5e71828f06eac, 0x349ed07f0fffc, 0x4468de2d7c2dd }, + }, + { + { 0x2d8c6f86307ce, 0x6286ba1850973, 0x5e9dcb08444d4, 0x1a96a543362b2, 0x5da6427e63247 }, + { 0x3355e9419469e, 0x1847bb8ea8a37, 0x1fe6588cf9b71, 0x6b1c9d2db6b22, 0x6cce7c6ffb44b }, + { 0x4c688deac22ca, 0x6f775c3ff0352, 0x565603ee419bb, 0x6544456c61c46, 0x58f29abfe79f2 }, + }, + { + { 0x264bf710ecdf6, 0x708c58527896b, 0x42ceae6c53394, 0x4381b21e82b6a, 0x6af93724185b4 }, + { 0x6cfab8de73e68, 0x3e6efced4bd21, 0x0056609500dbe, 0x71b7824ad85df, 0x577629c4a7f41 }, + { 0x0024509c6a888, 0x2696ab12e6644, 0x0cca27f4b80d8, 0x0c7c1f11b119e, 0x701f25bb0caec }, + }, + { + { 0x0f6d97cbec113, 0x4ce97fb7c93a3, 0x139835a11281b, 0x728907ada9156, 0x720a5bc050955 }, + { 0x0b0f8e4616ced, 0x1d3c4b50fb875, 0x2f29673dc0198, 0x5f4b0f1830ffa, 0x2e0c92bfbdc40 }, + { 0x709439b805a35, 0x6ec48557f8187, 0x08a4d1ba13a2c, 0x076348a0bf9ae, 0x0e9b9cbb144ef }, + }, + { + { 0x69bd55db1beee, 0x6e14e47f731bd, 0x1a35e47270eac, 0x66f225478df8e, 0x366d44191cfd3 }, + { 0x2d48ffb5720ad, 0x57b7f21a1df77, 0x5550effba0645, 0x5ec6a4098a931, 0x221104eb3f337 }, + { 0x41743f2bc8c14, 0x796b0ad8773c7, 0x29fee5cbb689b, 0x122665c178734, 0x4167a4e6bc593 }, + }, + { + { 0x62665f8ce8fee, 0x29d101ac59857, 0x4d93bbba59ffc, 0x17b7897373f17, 0x34b33370cb7ed }, + { 0x39d2876f62700, 0x001cecd1d6c87, 0x7f01a11747675, 0x2350da5a18190, 0x7938bb7e22552 }, + { 0x591ee8681d6cc, 0x39db0b4ea79b8, 0x202220f380842, 0x2f276ba42e0ac, 0x1176fc6e2dfe6 }, + }, + { + { 0x0e28949770eb8, 0x5559e88147b72, 0x35e1e6e63ef30, 0x35b109aa7ff6f, 0x1f6a3e54f2690 }, + { 0x76cd05b9c619b, 0x69654b0901695, 0x7a53710b77f27, 0x79a1ea7d28175, 0x08fc3a4c677d5 }, + { 0x4c199d30734ea, 0x6c622cb9acc14, 0x5660a55030216, 0x068f1199f11fb, 0x4f2fad0116b90 }, + }, + { + { 0x4d91db73bb638, 0x55f82538112c5, 0x6d85a279815de, 0x740b7b0cd9cf9, 0x3451995f2944e }, + { 0x6b24194ae4e54, 0x2230afded8897, 0x23412617d5071, 0x3d5d30f35969b, 0x445484a4972ef }, + { 0x2fcd09fea7d7c, 0x296126b9ed22a, 0x4a171012a05b2, 0x1db92c74d5523, 0x10b89ca604289 }, + }, +}, +{ + { + { 0x141be5a45f06e, 0x5adb38becaea7, 0x3fd46db41f2bb, 0x6d488bbb5ce39, 0x17d2d1d9ef0d4 }, + { 0x147499718289c, 0x0a48a67e4c7ab, 0x30fbc544bafe3, 0x0c701315fe58a, 0x20b878d577b75 }, + { 0x2af18073f3e6a, 0x33aea420d24fe, 0x298008bf4ff94, 0x3539171db961e, 0x72214f63cc65c }, + }, + { + { 0x5b7b9f43b29c9, 0x149ea31eea3b3, 0x4be7713581609, 0x2d87960395e98, 0x1f24ac855a154 }, + { 0x37f405307a693, 0x2e5e66cf2b69c, 0x5d84266ae9c53, 0x5e4eb7de853b9, 0x5fdf48c58171c }, + { 0x608328e9505aa, 0x22182841dc49a, 0x3ec96891d2307, 0x2f363fff22e03, 0x00ba739e2ae39 }, + }, + { + { 0x426f5ea88bb26, 0x33092e77f75c8, 0x1a53940d819e7, 0x1132e4f818613, 0x72297de7d518d }, + { 0x698de5c8790d6, 0x268b8545beb25, 0x6d2648b96fedf, 0x47988ad1db07c, 0x03283a3e67ad7 }, + { 0x41dc7be0cb939, 0x1b16c66100904, 0x0a24c20cbc66d, 0x4a2e9efe48681, 0x05e1296846271 }, + }, + { + { 0x7bbc8242c4550, 0x59a06103b35b7, 0x7237e4af32033, 0x726421ab3537a, 0x78cf25d38258c }, + { 0x2eeb32d9c495a, 0x79e25772f9750, 0x6d747833bbf23, 0x6cdd816d5d749, 0x39c00c9c13698 }, + { 0x66b8e31489d68, 0x573857e10e2b5, 0x13be816aa1472, 0x41964d3ad4bf8, 0x006b52076b3ff }, + }, + { + { 0x37e16b9ce082d, 0x1882f57853eb9, 0x7d29eacd01fc5, 0x2e76a59b5e715, 0x7de2e9561a9f7 }, + { 0x0cfe19d95781c, 0x312cc621c453c, 0x145ace6da077c, 0x0912bef9ce9b8, 0x4d57e3443bc76 }, + { 0x0d4f4b6a55ecb, 0x7ebb0bb733bce, 0x7ba6a05200549, 0x4f6ede4e22069, 0x6b2a90af1a602 }, + }, + { + { 0x3f3245bb2d80a, 0x0e5f720f36efd, 0x3b9cccf60c06d, 0x084e323f37926, 0x465812c8276c2 }, + { 0x3f4fc9ae61e97, 0x3bc07ebfa2d24, 0x3b744b55cd4a0, 0x72553b25721f3, 0x5fd8f4e9d12d3 }, + { 0x3beb22a1062d9, 0x6a7063b82c9a8, 0x0a5a35dc197ed, 0x3c80c06a53def, 0x05b32c2b1cb16 }, + }, + { + { 0x4a42c7ad58195, 0x5c8667e799eff, 0x02e5e74c850a1, 0x3f0db614e869a, 0x31771a4856730 }, + { 0x05eccd24da8fd, 0x580bbfdf07918, 0x7e73586873c6a, 0x74ceddf77f93e, 0x3b5556a37b471 }, + { 0x0c524e14dd482, 0x283457496c656, 0x0ad6bcfb6cd45, 0x375d1e8b02414, 0x4fc079d27a733 }, + }, + { + { 0x48b440c86c50d, 0x139929cca3b86, 0x0f8f2e44cdf2f, 0x68432117ba6b2, 0x241170c2bae3c }, + { 0x138b089bf2f7f, 0x4a05bfd34ea39, 0x203914c925ef5, 0x7497fffe04e3c, 0x124567cecaf98 }, + { 0x1ab860ac473b4, 0x5c0227c86a7ff, 0x71b12bfc24477, 0x006a573a83075, 0x3f8612966c870 }, + }, +}, +{ + { + { 0x0fcfa36048d13, 0x66e7133bbb383, 0x64b42a8a45676, 0x4ea6e4f9a85cf, 0x26f57eee878a1 }, + { 0x20cc9782a0dde, 0x65d4e3070aab3, 0x7bc8e31547736, 0x09ebfb1432d98, 0x504aa77679736 }, + { 0x32cd55687efb1, 0x4448f5e2f6195, 0x568919d460345, 0x034c2e0ad1a27, 0x4041943d9dba3 }, + }, + { + { 0x17743a26caadd, 0x48c9156f9c964, 0x7ef278d1e9ad0, 0x00ce58ea7bd01, 0x12d931429800d }, + { 0x0eeba43ebcc96, 0x384dd5395f878, 0x1df331a35d272, 0x207ecfd4af70e, 0x1420a1d976843 }, + { 0x67799d337594f, 0x01647548f6018, 0x57fce5578f145, 0x009220c142a71, 0x1b4f92314359a }, + }, + { + { 0x73030a49866b1, 0x2442be90b2679, 0x77bd3d8947dcf, 0x1fb55c1552028, 0x5ff191d56f9a2 }, + { 0x4109d89150951, 0x225bd2d2d47cb, 0x57cc080e73bea, 0x6d71075721fcb, 0x239b572a7f132 }, + { 0x6d433ac2d9068, 0x72bf930a47033, 0x64facf4a20ead, 0x365f7a2b9402a, 0x020c526a758f3 }, + }, + { + { 0x1ef59f042cc89, 0x3b1c24976dd26, 0x31d665cb16272, 0x28656e470c557, 0x452cfe0a5602c }, + { 0x034f89ed8dbbc, 0x73b8f948d8ef3, 0x786c1d323caab, 0x43bd4a9266e51, 0x02aacc4615313 }, + { 0x0f7a0647877df, 0x4e1cc0f93f0d4, 0x7ec4726ef1190, 0x3bdd58bf512f8, 0x4cfb7d7b304b8 }, + }, + { + { 0x699c29789ef12, 0x63beae321bc50, 0x325c340adbb35, 0x562e1a1e42bf6, 0x5b1d4cbc434d3 }, + { 0x43d6cb89b75fe, 0x3338d5b900e56, 0x38d327d531a53, 0x1b25c61d51b9f, 0x14b4622b39075 }, + { 0x32615cc0a9f26, 0x57711b99cb6df, 0x5a69c14e93c38, 0x6e88980a4c599, 0x2f98f71258592 }, + }, + { + { 0x2ae444f54a701, 0x615397afbc5c2, 0x60d7783f3f8fb, 0x2aa675fc486ba, 0x1d8062e9e7614 }, + { 0x4a74cb50f9e56, 0x531d1c2640192, 0x0c03d9d6c7fd2, 0x57ccd156610c1, 0x3a6ae249d806a }, + { 0x2da85a9907c5a, 0x6b23721ec4caf, 0x4d2d3a4683aa2, 0x7f9c6870efdef, 0x298b8ce8aef25 }, + }, + { + { 0x272ea0a2165de, 0x68179ef3ed06f, 0x4e2b9c0feac1e, 0x3ee290b1b63bb, 0x6ba6271803a7d }, + { 0x27953eff70cb2, 0x54f22ae0ec552, 0x29f3da92e2724, 0x242ca0c22bd18, 0x34b8a8404d5ce }, + { 0x6ecb583693335, 0x3ec76bfdfb84d, 0x2c895cf56a04f, 0x6355149d54d52, 0x71d62bdd465e1 }, + }, + { + { 0x5b5dab1f75ef5, 0x1e2d60cbeb9a5, 0x527c2175dfe57, 0x59e8a2b8ff51f, 0x1c333621262b2 }, + { 0x3cc28d378df80, 0x72141f4968ca6, 0x407696bdb6d0d, 0x5d271b22ffcfb, 0x74d5f317f3172 }, + { 0x7e55467d9ca81, 0x6a5653186f50d, 0x6b188ece62df1, 0x4c66d36844971, 0x4aebcc4547e9d }, + }, +}, +{ + { + { 0x08d9e7354b610, 0x26b750b6dc168, 0x162881e01acc9, 0x7966df31d01a5, 0x173bd9ddc9a1d }, + { 0x0071b276d01c9, 0x0b0d8918e025e, 0x75beea79ee2eb, 0x3c92984094db8, 0x5d88fbf95a3db }, + { 0x00f1efe5872df, 0x5da872318256a, 0x59ceb81635960, 0x18cf37693c764, 0x06e1cd13b19ea }, + }, + { + { 0x3af629e5b0353, 0x204f1a088e8e5, 0x10efc9ceea82e, 0x589863c2fa34b, 0x7f3a6a1a8d837 }, + { 0x0ad516f166f23, 0x263f56d57c81a, 0x13422384638ca, 0x1331ff1af0a50, 0x3080603526e16 }, + { 0x644395d3d800b, 0x2b9203dbedefc, 0x4b18ce656a355, 0x03f3466bc182c, 0x30d0fded2e513 }, + }, + { + { 0x4971e68b84750, 0x52ccc9779f396, 0x3e904ae8255c8, 0x4ecae46f39339, 0x4615084351c58 }, + { 0x14d1af21233b3, 0x1de1989b39c0b, 0x52669dc6f6f9e, 0x43434b28c3fc7, 0x0a9214202c099 }, + { 0x019c0aeb9a02e, 0x1a2c06995d792, 0x664cbb1571c44, 0x6ff0736fa80b2, 0x3bca0d2895ca5 }, + }, + { + { 0x08eb69ecc01bf, 0x5b4c8912df38d, 0x5ea7f8bc2f20e, 0x120e516caafaf, 0x4ea8b4038df28 }, + { 0x031bc3c5d62a4, 0x7d9fe0f4c081e, 0x43ed51467f22c, 0x1e6cc0c1ed109, 0x5631deddae8f1 }, + { 0x5460af1cad202, 0x0b4919dd0655d, 0x7c4697d18c14c, 0x231c890bba2a4, 0x24ce0930542ca }, + }, + { + { 0x7a155fdf30b85, 0x1c6c6e5d487f9, 0x24be1134bdc5a, 0x1405970326f32, 0x549928a7324f4 }, + { 0x090f5fd06c106, 0x6abb1021e43fd, 0x232bcfad711a0, 0x3a5c13c047f37, 0x41d4e3c28a06d }, + { 0x632a763ee1a2e, 0x6fa4bffbd5e4d, 0x5fd35a6ba4792, 0x7b55e1de99de8, 0x491b66dec0dcf }, + }, + { + { 0x04a8ed0da64a1, 0x5ecfc45096ebe, 0x5edee93b488b2, 0x5b3c11a51bc8f, 0x4cf6b8b0b7018 }, + { 0x5b13dc7ea32a7, 0x18fc2db73131e, 0x7e3651f8f57e3, 0x25656055fa965, 0x08f338d0c85ee }, + { 0x3a821991a73bd, 0x03be6418f5870, 0x1ddc18eac9ef0, 0x54ce09e998dc2, 0x530d4a82eb078 }, + }, + { + { 0x173456c9abf9e, 0x7892015100dad, 0x33ee14095fecb, 0x6ad95d67a0964, 0x0db3e7e00cbfb }, + { 0x43630e1f94825, 0x4d1956a6b4009, 0x213fe2df8b5e0, 0x05ce3a41191e6, 0x65ea753f10177 }, + { 0x6fc3ee2096363, 0x7ec36b96d67ac, 0x510ec6a0758b1, 0x0ed87df022109, 0x02a4ec1921e1a }, + }, + { + { 0x06162f1cf795f, 0x324ddcafe5eb9, 0x018d5e0463218, 0x7e78b9092428e, 0x36d12b5dec067 }, + { 0x6259a3b24b8a2, 0x188b5f4170b9c, 0x681c0dee15deb, 0x4dfe665f37445, 0x3d143c5112780 }, + { 0x5279179154557, 0x39f8f0741424d, 0x45e6eb357923d, 0x42c9b5edb746f, 0x2ef517885ba82 }, + }, +}, +{ + { + { 0x6bffb305b2f51, 0x5b112b2d712dd, 0x35774974fe4e2, 0x04af87a96e3a3, 0x57968290bb3a0 }, + { 0x7974e8c58aedc, 0x7757e083488c6, 0x601c62ae7bc8b, 0x45370c2ecab74, 0x2f1b78fab143a }, + { 0x2b8430a20e101, 0x1a49e1d88fee3, 0x38bbb47ce4d96, 0x1f0e7ba84d437, 0x7dc43e35dc2aa }, + }, + { + { 0x02a5c273e9718, 0x32bc9dfb28b4f, 0x48df4f8d5db1a, 0x54c87976c028f, 0x044fb81d82d50 }, + { 0x66665887dd9c3, 0x629760a6ab0b2, 0x481e6c7243e6c, 0x097e37046fc77, 0x7ef72016758cc }, + { 0x718c5a907e3d9, 0x3b9c98c6b383b, 0x006ed255eccdc, 0x6976538229a59, 0x7f79823f9c30d }, + }, + { + { 0x41ff068f587ba, 0x1c00a191bcd53, 0x7b56f9c209e25, 0x3781e5fccaabe, 0x64a9b0431c06d }, + { 0x4d239a3b513e8, 0x29723f51b1066, 0x642f4cf04d9c3, 0x4da095aa09b7a, 0x0a4e0373d784d }, + { 0x3d6a15b7d2919, 0x41aa75046a5d6, 0x691751ec2d3da, 0x23638ab6721c4, 0x071a7d0ace183 }, + }, + { + { 0x4355220e14431, 0x0e1362a283981, 0x2757cd8359654, 0x2e9cd7ab10d90, 0x7c69bcf761775 }, + { 0x72daac887ba0b, 0x0b7f4ac5dda60, 0x3bdda2c0498a4, 0x74e67aa180160, 0x2c3bcc7146ea7 }, + { 0x0d7eb04e8295f, 0x4a5ea1e6fa0fe, 0x45e635c436c60, 0x28ef4a8d4d18b, 0x6f5a9a7322aca }, + }, + { + { 0x1d4eba3d944be, 0x0100f15f3dce5, 0x61a700e367825, 0x5922292ab3d23, 0x02ab9680ee8d3 }, + { 0x1000c2f41c6c5, 0x0219fdf737174, 0x314727f127de7, 0x7e5277d23b81e, 0x494e21a2e147a }, + { 0x48a85dde50d9a, 0x1c1f734493df4, 0x47bdb64866889, 0x59a7d048f8eec, 0x6b5d76cbea46b }, + }, + { + { 0x141171e782522, 0x6806d26da7c1f, 0x3f31d1bc79ab9, 0x09f20459f5168, 0x16fb869c03dd3 }, + { 0x7556cec0cd994, 0x5eb9a03b7510a, 0x50ad1dd91cb71, 0x1aa5780b48a47, 0x0ae333f685277 }, + { 0x6199733b60962, 0x69b157c266511, 0x64740f893f1ca, 0x03aa408fbf684, 0x3f81e38b8f70d }, + }, + { + { 0x37f355f17c824, 0x07ae85334815b, 0x7e3abddd2e48f, 0x61eeabe1f45e5, 0x0ad3e2d34cded }, + { 0x10fcc7ed9affe, 0x4248cb0e96ff2, 0x4311c115172e2, 0x4c9d41cbf6925, 0x50510fc104f50 }, + { 0x40fc5336e249d, 0x3386639fb2de1, 0x7bbf871d17b78, 0x75f796b7e8004, 0x127c158bf0fa1 }, + }, + { + { 0x28fc4ae51b974, 0x26e89bfd2dbd4, 0x4e122a07665cf, 0x7cab1203405c3, 0x4ed82479d167d }, + { 0x17c422e9879a2, 0x28a5946c8fec3, 0x53ab32e912b77, 0x7b44da09fe0a5, 0x354ef87d07ef4 }, + { 0x3b52260c5d975, 0x79d6836171fdc, 0x7d994f140d4bb, 0x1b6c404561854, 0x302d92d205392 }, + }, +}, +{ + { + { 0x46fb6e4e0f177, 0x53497ad5265b7, 0x1ebdba01386fc, 0x0302f0cb36a3c, 0x0edc5f5eb426d }, + { 0x3c1a2bca4283d, 0x23430c7bb2f02, 0x1a3ea1bb58bc2, 0x7265763de5c61, 0x10e5d3b76f1ca }, + { 0x3bfd653da8e67, 0x584953ec82a8a, 0x55e288fa7707b, 0x5395fc3931d81, 0x45b46c51361cb }, + }, + { + { 0x54ddd8a7fe3e4, 0x2cecc41c619d3, 0x43a6562ac4d91, 0x4efa5aca7bdd9, 0x5c1c0aef32122 }, + { 0x02abf314f7fa1, 0x391d19e8a1528, 0x6a2fa13895fc7, 0x09d8eddeaa591, 0x2177bfa36dcb7 }, + { 0x01bbcfa79db8f, 0x3d84beb3666e1, 0x20c921d812204, 0x2dd843d3b32ce, 0x4ae619387d8ab }, + }, + { + { 0x17e44985bfb83, 0x54e32c626cc22, 0x096412ff38118, 0x6b241d61a246a, 0x75685abe5ba43 }, + { 0x3f6aa5344a32e, 0x69683680f11bb, 0x04c3581f623aa, 0x701af5875cba5, 0x1a00d91b17bf3 }, + { 0x60933eb61f2b2, 0x5193fe92a4dd2, 0x3d995a550f43e, 0x3556fb93a883d, 0x135529b623b0e }, + }, + { + { 0x716bce22e83fe, 0x33d0130b83eb8, 0x0952abad0afac, 0x309f64ed31b8a, 0x5972ea051590a }, + { 0x0dbd7add1d518, 0x119f823e2231e, 0x451d66e5e7de2, 0x500c39970f838, 0x79b5b81a65ca3 }, + { 0x4ac20dc8f7811, 0x29589a9f501fa, 0x4d810d26a6b4a, 0x5ede00d96b259, 0x4f7e9c95905f3 }, + }, + { + { 0x0443d355299fe, 0x39b7d7d5aee39, 0x692519a2f34ec, 0x6e4404924cf78, 0x1942eec4a144a }, + { 0x74bbc5781302e, 0x73135bb81ec4c, 0x7ef671b61483c, 0x7264614ccd729, 0x31993ad92e638 }, + { 0x45319ae234992, 0x2219d47d24fb5, 0x4f04488b06cf6, 0x53aaa9e724a12, 0x2a0a65314ef9c }, + }, + { + { 0x61acd3c1c793a, 0x58b46b78779e6, 0x3369aacbe7af2, 0x509b0743074d4, 0x055dc39b6dea1 }, + { 0x7937ff7f927c2, 0x0c2fa14c6a5b6, 0x556bddb6dd07c, 0x6f6acc179d108, 0x4cf6e218647c2 }, + { 0x1227cc28d5bb6, 0x78ee9bff57623, 0x28cb2241f893a, 0x25b541e3c6772, 0x121a307710aa2 }, + }, + { + { 0x1713ec77483c9, 0x6f70572d5facb, 0x25ef34e22ff81, 0x54d944f141188, 0x527bb94a6ced3 }, + { 0x35d5e9f034a97, 0x126069785bc9b, 0x5474ec7854ff0, 0x296a302a348ca, 0x333fc76c7a40e }, + { 0x5992a995b482e, 0x78dc707002ac7, 0x5936394d01741, 0x4fba4281aef17, 0x6b89069b20a7a }, + }, + { + { 0x2fa8cb5c7db77, 0x718e6982aa810, 0x39e95f81a1a1b, 0x5e794f3646cfb, 0x0473d308a7639 }, + { 0x2a0416270220d, 0x75f248b69d025, 0x1cbbc16656a27, 0x5b9ffd6e26728, 0x23bc2103aa73e }, + { 0x6792603589e05, 0x248db9892595d, 0x006a53cad2d08, 0x20d0150f7ba73, 0x102f73bfde043 }, + }, +}, +{ + { + { 0x4dae0b5511c9a, 0x5257fffe0d456, 0x54108d1eb2180, 0x096cc0f9baefa, 0x3f6bd725da4ea }, + { 0x0b9ab7f5745c6, 0x5caf0f8d21d63, 0x7debea408ea2b, 0x09edb93896d16, 0x36597d25ea5c0 }, + { 0x58d7b106058ac, 0x3cdf8d20bee69, 0x00a4cb765015e, 0x36832337c7cc9, 0x7b7ecc19da60d }, + }, + { + { 0x64a51a77cfa9b, 0x29cf470ca0db5, 0x4b60b6e0898d9, 0x55d04ddffe6c7, 0x03bedc661bf5c }, + { 0x2373c695c690d, 0x4c0c8520dcf18, 0x384af4b7494b9, 0x4ab4a8ea22225, 0x4235ad7601743 }, + { 0x0cb0d078975f5, 0x292313e530c4b, 0x38dbb9124a509, 0x350d0655a11f1, 0x0e7ce2b0cdf06 }, + }, + { + { 0x6fedfd94b70f9, 0x2383f9745bfd4, 0x4beae27c4c301, 0x75aa4416a3f3f, 0x615256138aece }, + { 0x4643ac48c85a3, 0x6878c2735b892, 0x3a53523f4d877, 0x3a504ed8bee9d, 0x666e0a5d8fb46 }, + { 0x3f64e4870cb0d, 0x61548b16d6557, 0x7a261773596f3, 0x7724d5f275d3a, 0x7f0bc810d514d }, + }, + { + { 0x49dad737213a0, 0x745dee5d31075, 0x7b1a55e7fdbe2, 0x5ba988f176ea1, 0x1d3a907ddec5a }, + { 0x06ba426f4136f, 0x3cafc0606b720, 0x518f0a2359cda, 0x5fae5e46feca7, 0x0d1f8dbcf8eed }, + { 0x693313ed081dc, 0x5b0a366901742, 0x40c872ca4ca7e, 0x6f18094009e01, 0x00011b44a31bf }, + }, + { + { 0x61f696a0aa75c, 0x38b0a57ad42ca, 0x1e59ab706fdc9, 0x01308d46ebfcd, 0x63d988a2d2851 }, + { 0x7a06c3fc66c0c, 0x1c9bac1ba47fb, 0x23935c575038e, 0x3f0bd71c59c13, 0x3ac48d916e835 }, + { 0x20753afbd232e, 0x71fbb1ed06002, 0x39cae47a4af3a, 0x0337c0b34d9c2, 0x33fad52b2368a }, + }, + { + { 0x4c8d0c422cfe8, 0x760b4275971a5, 0x3da95bc1cad3d, 0x0f151ff5b7376, 0x3cc355ccb90a7 }, + { 0x649c6c5e41e16, 0x60667eee6aa80, 0x4179d182be190, 0x653d9567e6979, 0x16c0f429a256d }, + { 0x69443903e9131, 0x16f4ac6f9dd36, 0x2ea4912e29253, 0x2b4643e68d25d, 0x631eaf426bae7 }, + }, + { + { 0x175b9a3700de8, 0x77c5f00aa48fb, 0x3917785ca0317, 0x05aa9b2c79399, 0x431f2c7f665f8 }, + { 0x10410da66fe9f, 0x24d82dcb4d67d, 0x3e6fe0e17752d, 0x4dade1ecbb08f, 0x5599648b1ea91 }, + { 0x26344858f7b19, 0x5f43d4a295ac0, 0x242a75c52acd4, 0x5934480220d10, 0x7b04715f91253 }, + }, + { + { 0x6c280c4e6bac6, 0x3ada3b361766e, 0x42fe5125c3b4f, 0x111d84d4aac22, 0x48d0acfa57cde }, + { 0x5bd28acf6ae43, 0x16fab8f56907d, 0x7acb11218d5f2, 0x41fe02023b4db, 0x59b37bf5c2f65 }, + { 0x726e47dabe671, 0x2ec45e746f6c1, 0x6580e53c74686, 0x5eda104673f74, 0x16234191336d3 }, + }, +}, +{ + { + { 0x19cd61ff38640, 0x060c6c4b41ba9, 0x75cf70ca7366f, 0x118a8f16c011e, 0x4a25707a203b9 }, + { 0x499def6267ff6, 0x76e858108773c, 0x693cac5ddcb29, 0x00311d00a9ff4, 0x2cdfdfecd5d05 }, + { 0x7668a53f6ed6a, 0x303ba2e142556, 0x3880584c10909, 0x4fe20000a261d, 0x5721896d248e4 }, + }, + { + { 0x55091a1d0da4e, 0x4f6bfc7c1050b, 0x64e4ecd2ea9be, 0x07eb1f28bbe70, 0x03c935afc4b03 }, + { 0x65517fd181bae, 0x3e5772c76816d, 0x019189640898a, 0x1ed2a84de7499, 0x578edd74f63c1 }, + { 0x276c6492b0c3d, 0x09bfc40bf932e, 0x588e8f11f330b, 0x3d16e694dc26e, 0x3ec2ab590288c }, + }, + { + { 0x13a09ae32d1cb, 0x3e81eb85ab4e4, 0x07aaca43cae1f, 0x62f05d7526374, 0x0e1bf66c6adba }, + { 0x0d27be4d87bb9, 0x56c27235db434, 0x72e6e0ea62d37, 0x5674cd06ee839, 0x2dd5c25a200fc }, + { 0x3d5e9792c887e, 0x319724dabbc55, 0x2b97c78680800, 0x7afdfdd34e6dd, 0x730548b35ae88 }, + }, + { + { 0x3094ba1d6e334, 0x6e126a7e3300b, 0x089c0aefcfbc5, 0x2eea11f836583, 0x585a2277d8784 }, + { 0x551a3cba8b8ee, 0x3b6422be2d886, 0x630e1419689bc, 0x4653b07a7a955, 0x3043443b411db }, + { 0x25f8233d48962, 0x6bd8f04aff431, 0x4f907fd9a6312, 0x40fd3c737d29b, 0x7656278950ef9 }, + }, + { + { 0x073a3ea86cf9d, 0x6e0e2abfb9c2e, 0x60e2a38ea33ee, 0x30b2429f3fe18, 0x28bbf484b613f }, + { 0x3cf59d51fc8c0, 0x7a0a0d6de4718, 0x55c3a3e6fb74b, 0x353135f884fd5, 0x3f4160a8c1b84 }, + { 0x12f5c6f136c7c, 0x0fedba237de4c, 0x779bccebfab44, 0x3aea93f4d6909, 0x1e79cb358188f }, + }, + { + { 0x153d8f5e08181, 0x08533bbdb2efd, 0x1149796129431, 0x17a6e36168643, 0x478ab52d39d1f }, + { 0x436c3eef7e3f1, 0x7ffd3c21f0026, 0x3e77bf20a2da9, 0x418bffc8472de, 0x65d7951b3a3b3 }, + { 0x6a4d39252d159, 0x790e35900ecd4, 0x30725bf977786, 0x10a5c1635a053, 0x16d87a411a212 }, + }, + { + { 0x4d5e2d54e0583, 0x2e5d7b33f5f74, 0x3a5de3f887ebf, 0x6ef24bd6139b7, 0x1f990b577a5a6 }, + { 0x57e5a42066215, 0x1a18b44983677, 0x3e652de1e6f8f, 0x6532be02ed8eb, 0x28f87c8165f38 }, + { 0x44ead1be8f7d6, 0x5759d4f31f466, 0x0378149f47943, 0x69f3be32b4f29, 0x45882fe1534d6 }, + }, + { + { 0x49929943c6fe4, 0x4347072545b15, 0x3226bced7e7c5, 0x03a134ced89df, 0x7dcf843ce405f }, + { 0x1345d757983d6, 0x222f54234cccd, 0x1784a3d8adbb4, 0x36ebeee8c2bcc, 0x688fe5b8f626f }, + { 0x0d6484a4732c0, 0x7b94ac6532d92, 0x5771b8754850f, 0x48dd9df1461c8, 0x6739687e73271 }, + }, +}, +{ + { + { 0x5cc9dc80c1ac0, 0x683671486d4cd, 0x76f5f1a5e8173, 0x6d5d3f5f9df4a, 0x7da0b8f68d7e7 }, + { 0x02014385675a6, 0x6155fb53d1def, 0x37ea32e89927c, 0x059a668f5a82e, 0x46115aba1d4dc }, + { 0x71953c3b5da76, 0x6642233d37a81, 0x2c9658076b1bd, 0x5a581e63010ff, 0x5a5f887e83674 }, + }, + { + { 0x628d3a0a643b9, 0x01cd8640c93d2, 0x0b7b0cad70f2c, 0x3864da98144be, 0x43e37ae2d5d1c }, + { 0x301cf70a13d11, 0x2a6a1ba1891ec, 0x2f291fb3f3ae0, 0x21a7b814bea52, 0x3669b656e44d1 }, + { 0x63f06eda6e133, 0x233342758070f, 0x098e0459cc075, 0x4df5ead6c7c1b, 0x6a21e6cd4fd5e }, + }, + { + { 0x129126699b2e3, 0x0ee11a2603de8, 0x60ac2f5c74c21, 0x59b192a196808, 0x45371b07001e8 }, + { 0x6170a3046e65f, 0x5401a46a49e38, 0x20add5561c4a8, 0x7abb4edde9e46, 0x586bf9f1a195f }, + { 0x3088d5ef8790b, 0x38c2126fcb4db, 0x685bae149e3c3, 0x0bcd601a4e930, 0x0eafb03790e52 }, + }, + { + { 0x0805e0f75ae1d, 0x464cc59860a28, 0x248e5b7b00bef, 0x5d99675ef8f75, 0x44ae3344c5435 }, + { 0x555c13748042f, 0x4d041754232c0, 0x521b430866907, 0x3308e40fb9c39, 0x309acc675a02c }, + { 0x289b9bba543ee, 0x3ab592e28539e, 0x64d82abcdd83a, 0x3c78ec172e327, 0x62d5221b7f946 }, + }, + { + { 0x5d4263af77a3c, 0x23fdd2289aeb0, 0x7dc64f77eb9ec, 0x01bd28338402c, 0x14f29a5383922 }, + { 0x4299c18d0936d, 0x5914183418a49, 0x52a18c721aed5, 0x2b151ba82976d, 0x5c0efde4bc754 }, + { 0x17edc25b2d7f5, 0x37336a6081bee, 0x7b5318887e5c3, 0x49f6d491a5be1, 0x5e72365c7bee0 }, + }, + { + { 0x339062f08b33e, 0x4bbf3e657cfb2, 0x67af7f56e5967, 0x4dbd67f9ed68f, 0x70b20555cb734 }, + { 0x3fc074571217f, 0x3a0d29b2b6aeb, 0x06478ccdde59d, 0x55e4d051bddfa, 0x77f1104c47b4e }, + { 0x113c555112c4c, 0x7535103f9b7ca, 0x140ed1d9a2108, 0x02522333bc2af, 0x0e34398f4a064 }, + }, + { + { 0x30b093e4b1928, 0x1ce7e7ec80312, 0x4e575bdf78f84, 0x61f7a190bed39, 0x6f8aded6ca379 }, + { 0x522d93ecebde8, 0x024f045e0f6cf, 0x16db63426cfa1, 0x1b93a1fd30fd8, 0x5e5405368a362 }, + { 0x0123dfdb7b29a, 0x4344356523c68, 0x79a527921ee5f, 0x74bfccb3e817e, 0x780de72ec8d3d }, + }, + { + { 0x7eaf300f42772, 0x5455188354ce3, 0x4dcca4a3dcbac, 0x3d314d0bfebcb, 0x1defc6ad32b58 }, + { 0x28545089ae7bc, 0x1e38fe9a0c15c, 0x12046e0e2377b, 0x6721c560aa885, 0x0eb28bf671928 }, + { 0x3be1aef5195a7, 0x6f22f62bdb5eb, 0x39768b8523049, 0x43394c8fbfdbd, 0x467d201bf8dd2 }, + }, +}, +{ + { + { 0x6f4bd567ae7a9, 0x65ac89317b783, 0x07d3b20fd8932, 0x000f208326916, 0x2ef9c5a5ba384 }, + { 0x6919a74ef4fad, 0x59ed4611452bf, 0x691ec04ea09ef, 0x3cbcb2700e984, 0x71c43c4f5ba3c }, + { 0x56df6fa9e74cd, 0x79c95e4cf56df, 0x7be643bc609e2, 0x149c12ad9e878, 0x5a758ca390c5f }, + }, + { + { 0x0918b1d61dc94, 0x0d350260cd19c, 0x7a2ab4e37b4d9, 0x21fea735414d7, 0x0a738027f639d }, + { 0x72710d9462495, 0x25aafaa007456, 0x2d21f28eaa31b, 0x17671ea005fd0, 0x2dbae244b3eb7 }, + { 0x74a2f57ffe1cc, 0x1bc3073087301, 0x7ec57f4019c34, 0x34e082e1fa524, 0x2698ca635126a }, + }, + { + { 0x5702f5e3dd90e, 0x31c9a4a70c5c7, 0x136a5aa78fc24, 0x1992f3b9f7b01, 0x3c004b0c4afa3 }, + { 0x5318832b0ba78, 0x6f24b9ff17cec, 0x0a47f30e060c7, 0x58384540dc8d0, 0x1fb43dcc49cae }, + { 0x146ac06f4b82b, 0x4b500d89e7355, 0x3351e1c728a12, 0x10b9f69932fe3, 0x6b43fd01cd1fd }, + }, + { + { 0x742583e760ef3, 0x73dc1573216b8, 0x4ae48fdd7714a, 0x4f85f8a13e103, 0x73420b2d6ff0d }, + { 0x75d4b4697c544, 0x11be1fff7f8f4, 0x119e16857f7e1, 0x38a14345cf5d5, 0x5a68d7105b52f }, + { 0x4f6cb9e851e06, 0x278c4471895e5, 0x7efcdce3d64e4, 0x64f6d455c4b4c, 0x3db5632fea34b }, + }, + { + { 0x190b1829825d5, 0x0e7d3513225c9, 0x1c12be3b7abae, 0x58777781e9ca6, 0x59197ea495df2 }, + { 0x6ee2bf75dd9d8, 0x6c72ceb34be8d, 0x679c9cc345ec7, 0x7898df96898a4, 0x04321adf49d75 }, + { 0x16019e4e55aae, 0x74fc5f25d209c, 0x4566a939ded0d, 0x66063e716e0b7, 0x45eafdc1f4d70 }, + }, + { + { 0x64624cfccb1ed, 0x257ab8072b6c1, 0x0120725676f0a, 0x4a018d04e8eee, 0x3f73ceea5d56d }, + { 0x401858045d72b, 0x459e5e0ca2d30, 0x488b719308bea, 0x56f4a0d1b32b5, 0x5a5eebc80362d }, + { 0x7bfd10a4e8dc6, 0x7c899366736f4, 0x55ebbeaf95c01, 0x46db060903f8a, 0x2605889126621 }, + }, + { + { 0x18e3cc676e542, 0x26079d995a990, 0x04a7c217908b2, 0x1dc7603e6655a, 0x0dedfa10b2444 }, + { 0x704a68360ff04, 0x3cecc3cde8b3e, 0x21cd5470f64ff, 0x6abc18d953989, 0x54ad0c2e4e615 }, + { 0x367d5b82b522a, 0x0d3f4b83d7dc7, 0x3067f4cdbc58d, 0x20452da697937, 0x62ecb2baa77a9 }, + }, + { + { 0x72836afb62874, 0x0af3c2094b240, 0x0c285297f357a, 0x7cc2d5680d6e3, 0x61913d5075663 }, + { 0x5795261152b3d, 0x7a1dbbafa3cbd, 0x5ad31c52588d5, 0x45f3a4164685c, 0x2e59f919a966d }, + { 0x62d361a3231da, 0x65284004e01b8, 0x656533be91d60, 0x6ae016c00a89f, 0x3ddbc2a131c05 }, + }, +}, +{ + { + { 0x257a22796bb14, 0x6f360fb443e75, 0x680e47220eaea, 0x2fcf2a5f10c18, 0x5ee7fb38d8320 }, + { 0x40ff9ce5ec54b, 0x57185e261b35b, 0x3e254540e70a9, 0x1b5814003e3f8, 0x78968314ac04b }, + { 0x5fdcb41446a8e, 0x5286926ff2a71, 0x0f231e296b3f6, 0x684a357c84693, 0x61d0633c9bca0 }, + }, + { + { 0x328bcf8fc73df, 0x3b4de06ff95b4, 0x30aa427ba11a5, 0x5ee31bfda6d9c, 0x5b23ac2df8067 }, + { 0x44935ffdb2566, 0x12f016d176c6e, 0x4fbb00f16f5ae, 0x3fab78d99402a, 0x6e965fd847aed }, + { 0x2b953ee80527b, 0x55f5bcdb1b35a, 0x43a0b3fa23c66, 0x76e07388b820a, 0x79b9bbb9dd95d }, + }, + { + { 0x17dae8e9f7374, 0x719f76102da33, 0x5117c2a80ca8b, 0x41a66b65d0936, 0x1ba811460accb }, + { 0x355406a3126c2, 0x50d1918727d76, 0x6e5ea0b498e0e, 0x0a3b6063214f2, 0x5065f158c9fd2 }, + { 0x169fb0c429954, 0x59aedd9ecee10, 0x39916eb851802, 0x57917555cc538, 0x3981f39e58a4f }, + }, + { + { 0x5dfa56de66fde, 0x0058809075908, 0x6d3d8cb854a94, 0x5b2f4e970b1e3, 0x30f4452edcbc1 }, + { 0x38a7559230a93, 0x52c1cde8ba31f, 0x2a4f2d4745a3d, 0x07e9d42d4a28a, 0x38dc083705acd }, + { 0x52782c5759740, 0x53f3397d990ad, 0x3a939c7e84d15, 0x234c4227e39e0, 0x632d9a1a593f2 }, + }, + { + { 0x1fd11ed0c84a7, 0x021b3ed2757e1, 0x73e1de58fc1c6, 0x5d110c84616ab, 0x3a5a7df28af64 }, + { 0x36b15b807cba6, 0x3f78a9e1afed7, 0x0a59c2c608f1f, 0x52bdd8ecb81b7, 0x0b24f48847ed4 }, + { 0x2d4be511beac7, 0x6bda4d99e5b9b, 0x17e6996914e01, 0x7b1f0ce7fcf80, 0x34fcf74475481 }, + }, + { + { 0x31dab78cfaa98, 0x4e3216e5e54b7, 0x249823973b689, 0x2584984e48885, 0x0119a3042fb37 }, + { 0x7e04c789767ca, 0x1671b28cfb832, 0x7e57ea2e1c537, 0x1fbaaef444141, 0x3d3bdc164dfa6 }, + { 0x2d89ce8c2177d, 0x6cd12ba182cf4, 0x20a8ac19a7697, 0x539fab2cc72d9, 0x56c088f1ede20 }, + }, + { + { 0x35fac24f38f02, 0x7d75c6197ab03, 0x33e4bc2a42fa7, 0x1c7cd10b48145, 0x038b7ea483590 }, + { 0x53d1110a86e17, 0x6416eb65f466d, 0x41ca6235fce20, 0x5c3fc8a99bb12, 0x09674c6b99108 }, + { 0x6f82199316ff8, 0x05d54f1a9f3e9, 0x3bcc5d0bd274a, 0x5b284b8d2d5ad, 0x6e5e31025969e }, + }, + { + { 0x4fb0e63066222, 0x130f59747e660, 0x041868fecd41a, 0x3105e8c923bc6, 0x3058ad43d1838 }, + { 0x462f587e593fb, 0x3d94ba7ce362d, 0x330f9b52667b7, 0x5d45a48e0f00a, 0x08f5114789a8d }, + { 0x40ffde57663d0, 0x71445d4c20647, 0x2653e68170f7c, 0x64cdee3c55ed6, 0x26549fa4efe3d }, + }, +}, +{ + { + { 0x68549af3f666e, 0x09e2941d4bb68, 0x2e8311f5dff3c, 0x6429ef91ffbd2, 0x3a10dfe132ce3 }, + { 0x55a461e6bf9d6, 0x78eeef4b02e83, 0x1d34f648c16cf, 0x07fea2aba5132, 0x1926e1dc6401e }, + { 0x74e8aea17cea0, 0x0c743f83fbc0f, 0x7cb03c4bf5455, 0x68a8ba9917e98, 0x1fa1d01d861e5 }, + }, + { + { 0x4ac00d1df94ab, 0x3ba2101bd271b, 0x7578988b9c4af, 0x0f2bf89f49f7e, 0x73fced18ee9a0 }, + { 0x055947d599832, 0x346fe2aa41990, 0x0164c8079195b, 0x799ccfb7bba27, 0x773563bc6a75c }, + { 0x1e90863139cb3, 0x4f8b407d9a0d6, 0x58e24ca924f69, 0x7a246bbe76456, 0x1f426b701b864 }, + }, + { + { 0x635c891a12552, 0x26aebd38ede2f, 0x66dc8faddae05, 0x21c7d41a03786, 0x0b76bb1b3fa7e }, + { 0x1264c41911c01, 0x702f44584bdf9, 0x43c511fc68ede, 0x0482c3aed35f9, 0x4e1af5271d31b }, + { 0x0c1f97f92939b, 0x17a88956dc117, 0x6ee005ef99dc7, 0x4aa9172b231cc, 0x7b6dd61eb772a }, + }, + { + { 0x0abf9ab01d2c7, 0x3880287630ae6, 0x32eca045beddb, 0x57f43365f32d0, 0x53fa9b659bff6 }, + { 0x5c1e850f33d92, 0x1ec119ab9f6f5, 0x7f16f6de663e9, 0x7a7d6cb16dec6, 0x703e9bceaf1d2 }, + { 0x4c8e994885455, 0x4ccb5da9cad82, 0x3596bc610e975, 0x7a80c0ddb9f5e, 0x398d93e5c4c61 }, + }, + { + { 0x77c60d2e7e3f2, 0x4061051763870, 0x67bc4e0ecd2aa, 0x2bb941f1373b9, 0x699c9c9002c30 }, + { 0x3d16733e248f3, 0x0e2b7e14be389, 0x42c0ddaf6784a, 0x589ea1fc67850, 0x53b09b5ddf191 }, + { 0x6a7235946f1cc, 0x6b99cbb2fbe60, 0x6d3a5d6485c62, 0x4839466e923c0, 0x51caf30c6fcdd }, + }, + { + { 0x2f99a18ac54c7, 0x398a39661ee6f, 0x384331e40cde3, 0x4cd15c4de19a6, 0x12ae29c189f8e }, + { 0x3a7427674e00a, 0x6142f4f7e74c1, 0x4cc93318c3a15, 0x6d51bac2b1ee7, 0x5504aa292383f }, + { 0x6c0cb1f0d01cf, 0x187469ef5d533, 0x27138883747bf, 0x2f52ae53a90e8, 0x5fd14fe958eba }, + }, + { + { 0x2fe5ebf93cb8e, 0x226da8acbe788, 0x10883a2fb7ea1, 0x094707842cf44, 0x7dd73f960725d }, + { 0x42ddf2845ab2c, 0x6214ffd3276bb, 0x00b8d181a5246, 0x268a6d579eb20, 0x093ff26e58647 }, + { 0x524fe68059829, 0x65b75e47cb621, 0x15eb0a5d5cc19, 0x05209b3929d5a, 0x2f59bcbc86b47 }, + }, + { + { 0x1d560b691c301, 0x7f5bafce3ce08, 0x4cd561614806c, 0x4588b6170b188, 0x2aa55e3d01082 }, + { 0x47d429917135f, 0x3eacfa07af070, 0x1deab46b46e44, 0x7a53f3ba46cdf, 0x5458b42e2e51a }, + { 0x192e60c07444f, 0x5ae8843a21daa, 0x6d721910b1538, 0x3321a95a6417e, 0x13e9004a8a768 }, + }, +}, +{ + { + { 0x600c9193b877f, 0x21c1b8a0d7765, 0x379927fb38ea2, 0x70d7679dbe01b, 0x5f46040898de9 }, + { 0x58845832fcedb, 0x135cd7f0c6e73, 0x53ffbdfe8e35b, 0x22f195e06e55b, 0x73937e8814bce }, + { 0x37116297bf48d, 0x45a9e0d069720, 0x25af71aa744ec, 0x41af0cb8aaba3, 0x2cf8a4e891d5e }, + }, + { + { 0x5487e17d06ba2, 0x3872a032d6596, 0x65e28c09348e0, 0x27b6bb2ce40c2, 0x7a6f7f2891d6a }, + { 0x3fd8707110f67, 0x26f8716a92db2, 0x1cdaa1b753027, 0x504be58b52661, 0x2049bd6e58252 }, + { 0x1fd8d6a9aef49, 0x7cb67b7216fa1, 0x67aff53c3b982, 0x20ea610da9628, 0x6011aadfc5459 }, + }, + { + { 0x6d0c802cbf890, 0x141bfed554c7b, 0x6dbb667ef4263, 0x58f3126857edc, 0x69ce18b779340 }, + { 0x7926dcf95f83c, 0x42e25120e2bec, 0x63de96df1fa15, 0x4f06b50f3f9cc, 0x6fc5cc1b0b62f }, + { 0x75528b29879cb, 0x79a8fd2125a3d, 0x27c8d4b746ab8, 0x0f8893f02210c, 0x15596b3ae5710 }, + }, + { + { 0x731167e5124ca, 0x17b38e8bbe13f, 0x3d55b942f9056, 0x09c1495be913f, 0x3aa4e241afb6d }, + { 0x739d23f9179a2, 0x632fadbb9e8c4, 0x7c8522bfe0c48, 0x6ed0983ef5aa9, 0x0d2237687b5f4 }, + { 0x138bf2a3305f5, 0x1f45d24d86598, 0x5274bad2160fe, 0x1b6041d58d12a, 0x32fcaa6e4687a }, + }, + { + { 0x7a4732787ccdf, 0x11e427c7f0640, 0x03659385f8c64, 0x5f4ead9766bfb, 0x746f6336c2600 }, + { 0x56e8dc57d9af5, 0x5b3be17be4f78, 0x3bf928cf82f4b, 0x52e55600a6f11, 0x4627e9cefebd6 }, + { 0x2f345ab6c971c, 0x653286e63e7e9, 0x51061b78a23ad, 0x14999acb54501, 0x7b4917007ed66 }, + }, + { + { 0x41b28dd53a2dd, 0x37be85f87ea86, 0x74be3d2a85e41, 0x1be87fac96ca6, 0x1d03620fe08cd }, + { 0x5fb5cab84b064, 0x2513e778285b0, 0x457383125e043, 0x6bda3b56e223d, 0x122ba376f844f }, + { 0x232cda2b4e554, 0x0422ba30ff840, 0x751e7667b43f5, 0x6261755da5f3e, 0x02c70bf52b68e }, + }, + { + { 0x532bf458d72e1, 0x40f96e796b59c, 0x22ef79d6f9da3, 0x501ab67beca77, 0x6b0697e3feb43 }, + { 0x7ec4b5d0b2fbb, 0x200e910595450, 0x742057105715e, 0x2f07022530f60, 0x26334f0a409ef }, + { 0x0f04adf62a3c0, 0x5e0edb48bb6d9, 0x7c34aa4fbc003, 0x7d74e4e5cac24, 0x1cc37f43441b2 }, + }, + { + { 0x656f1c9ceaeb9, 0x7031cacad5aec, 0x1308cd0716c57, 0x41c1373941942, 0x3a346f772f196 }, + { 0x7565a5cc7324f, 0x01ca0d5244a11, 0x116b067418713, 0x0a57d8c55edae, 0x6c6809c103803 }, + { 0x55112e2da6ac8, 0x6363d0a3dba5a, 0x319c98ba6f40c, 0x2e84b03a36ec7, 0x05911b9f6ef7c }, + }, +}, +{ + { + { 0x1acf3512eeaef, 0x2639839692a69, 0x669a234830507, 0x68b920c0603d4, 0x555ef9d1c64b2 }, + { 0x39983f5df0ebb, 0x1ea2589959826, 0x6ce638703cdd6, 0x6311678898505, 0x6b3cecf9aa270 }, + { 0x770ba3b73bd08, 0x11475f7e186d4, 0x0251bc9892bbc, 0x24eab9bffcc5a, 0x675f4de133817 }, + }, + { + { 0x7f6d93bdab31d, 0x1f3aca5bfd425, 0x2fa521c1c9760, 0x62180ce27f9cd, 0x60f450b882cd3 }, + { 0x452036b1782fc, 0x02d95b07681c5, 0x5901cf99205b2, 0x290686e5eecb4, 0x13d99df70164c }, + { 0x35ec321e5c0ca, 0x13ae337f44029, 0x4008e813f2da7, 0x640272f8e0c3a, 0x1c06de9e55eda }, + }, + { + { 0x52b40ff6d69aa, 0x31b8809377ffa, 0x536625cd14c2c, 0x516af252e17d1, 0x78096f8e7d32b }, + { 0x77ad6a33ec4e2, 0x717c5dc11d321, 0x4a114559823e4, 0x306ce50a1e2b1, 0x4cf38a1fec2db }, + { 0x2aa650dfa5ce7, 0x54916a8f19415, 0x00dc96fe71278, 0x55f2784e63eb8, 0x373cad3a26091 }, + }, + { + { 0x6a8fb89ddbbad, 0x78c35d5d97e37, 0x66e3674ef2cb2, 0x34347ac53dd8f, 0x21547eda5112a }, + { 0x4634d82c9f57c, 0x4249268a6d652, 0x6336d687f2ff7, 0x4fe4f4e26d9a0, 0x0040f3d945441 }, + { 0x5e939fd5986d3, 0x12a2147019bdf, 0x4c466e7d09cb2, 0x6fa5b95d203dd, 0x63550a334a254 }, + }, + { + { 0x2584572547b49, 0x75c58811c1377, 0x4d3c637cc171b, 0x33d30747d34e3, 0x39a92bafaa7d7 }, + { 0x7d6edb569cf37, 0x60194a5dc2ca0, 0x5af59745e10a6, 0x7a8f53e004875, 0x3eea62c7daf78 }, + { 0x4c713e693274e, 0x6ed1b7a6eb3a4, 0x62ace697d8e15, 0x266b8292ab075, 0x68436a0665c9c }, + }, + { + { 0x6d317e820107c, 0x090815d2ca3ca, 0x03ff1eb1499a1, 0x23960f050e319, 0x5373669c91611 }, + { 0x235e8202f3f27, 0x44c9f2eb61780, 0x630905b1d7003, 0x4fcc8d274ead1, 0x17b6e7f68ab78 }, + { 0x014ab9a0e5257, 0x09939567f8ba5, 0x4b47b2a423c82, 0x688d7e57ac42d, 0x1cb4b5a678f87 }, + }, + { + { 0x4aa62a2a007e7, 0x61e0e38f62d6e, 0x02f888fcc4782, 0x7562b83f21c00, 0x2dc0fd2d82ef6 }, + { 0x4c06b394afc6c, 0x4931b4bf636cc, 0x72b60d0322378, 0x25127c6818b25, 0x330bca78de743 }, + { 0x6ff841119744e, 0x2c560e8e49305, 0x7254fefe5a57a, 0x67ae2c560a7df, 0x3c31be1b369f1 }, + }, + { + { 0x0bc93f9cb4272, 0x3f8f9db73182d, 0x2b235eabae1c4, 0x2ddbf8729551a, 0x41cec1097e7d5 }, + { 0x4864d08948aee, 0x5d237438df61e, 0x2b285601f7067, 0x25dbcbae6d753, 0x330b61134262d }, + { 0x619d7a26d808a, 0x3c3b3c2adbef2, 0x6877c9eec7f52, 0x3beb9ebe1b66d, 0x26b44cd91f287 }, + }, +}, +{ + { + { 0x7f29362730383, 0x7fd7951459c36, 0x7504c512d49e7, 0x087ed7e3bc55f, 0x7deb10149c726 }, + { 0x048478f387475, 0x69397d9678a3e, 0x67c8156c976f3, 0x2eb4d5589226c, 0x2c709e6c1c10a }, + { 0x2af6a8766ee7a, 0x08aaa79a1d96c, 0x42f92d59b2fb0, 0x1752c40009c07, 0x08e68e9ff62ce }, + }, + { + { 0x509d50ab8f2f9, 0x1b8ab247be5e5, 0x5d9b2e6b2e486, 0x4faa5479a1339, 0x4cb13bd738f71 }, + { 0x5500a4bc130ad, 0x127a17a938695, 0x02a26fa34e36d, 0x584d12e1ecc28, 0x2f1f3f87eeba3 }, + { 0x48c75e515b64a, 0x75b6952071ef0, 0x5d46d42965406, 0x7746106989f9f, 0x19a1e353c0ae2 }, + }, + { + { 0x172cdd596bdbd, 0x0731ddf881684, 0x10426d64f8115, 0x71a4fd8a9a3da, 0x736bd3990266a }, + { 0x47560bafa05c3, 0x418dcabcc2fa3, 0x35991cecf8682, 0x24371a94b8c60, 0x41546b11c20c3 }, + { 0x32d509334b3b4, 0x16c102cae70aa, 0x1720dd51bf445, 0x5ae662faf9821, 0x412295a2b87fa }, + }, + { + { 0x55261e293eac6, 0x06426759b65cc, 0x40265ae116a48, 0x6c02304bae5bc, 0x0760bb8d195ad }, + { 0x19b88f57ed6e9, 0x4cdbf1904a339, 0x42b49cd4e4f2c, 0x71a2e771909d9, 0x14e153ebb52d2 }, + { 0x61a17cde6818a, 0x53dad34108827, 0x32b32c55c55b6, 0x2f9165f9347a3, 0x6b34be9bc33ac }, + }, + { + { 0x469656571f2d3, 0x0aa61ce6f423f, 0x3f940d71b27a1, 0x185f19d73d16a, 0x01b9c7b62e6dd }, + { 0x72f643a78c0b2, 0x3de45c04f9e7b, 0x706d68d30fa5c, 0x696f63e8e2f24, 0x2012c18f0922d }, + { 0x355e55ac89d29, 0x3e8b414ec7101, 0x39db07c520c90, 0x6f41e9b77efe1, 0x08af5b784e4ba }, + }, + { + { 0x314d289cc2c4b, 0x23450e2f1bc4e, 0x0cd93392f92f4, 0x1370c6a946b7d, 0x6423c1d5afd98 }, + { 0x499dc881f2533, 0x34ef26476c506, 0x4d107d2741497, 0x346c4bd6efdb3, 0x32b79d71163a1 }, + { 0x5f8d9edfcb36a, 0x1e6e8dcbf3990, 0x7974f348af30a, 0x6e6724ef19c7c, 0x480a5efbc13e2 }, + }, + { + { 0x14ce442ce221f, 0x18980a72516cc, 0x072f80db86677, 0x703331fda526e, 0x24b31d47691c8 }, + { 0x1e70b01622071, 0x1f163b5f8a16a, 0x56aaf341ad417, 0x7989635d830f7, 0x47aa27600cb7b }, + { 0x41eedc015f8c3, 0x7cf8d27ef854a, 0x289e3584693f9, 0x04a7857b309a7, 0x545b585d14dda }, + }, + { + { 0x4e4d0e3b321e1, 0x7451fe3d2ac40, 0x666f678eea98d, 0x038858667fead, 0x4d22dc3e64c8d }, + { 0x7275ea0d43a0f, 0x681137dd7ccf7, 0x1e79cbab79a38, 0x22a214489a66a, 0x0f62f9c332ba5 }, + { 0x46589d63b5f39, 0x7eaf979ec3f96, 0x4ebe81572b9a8, 0x21b7f5d61694a, 0x1c0fa01a36371 }, + }, +}, +{ + { + { 0x02b0e8c936a50, 0x6b83b58b6cd21, 0x37ed8d3e72680, 0x0a037db9f2a62, 0x4005419b1d2bc }, + { 0x604b622943dff, 0x1c899f6741a58, 0x60219e2f232fb, 0x35fae92a7f9cb, 0x0fa3614f3b1ca }, + { 0x3febdb9be82f0, 0x5e74895921400, 0x553ea38822706, 0x5a17c24cfc88c, 0x1fba218aef40a }, + }, + { + { 0x657043e7b0194, 0x5c11b55efe9e7, 0x7737bc6a074fb, 0x0eae41ce355cc, 0x6c535d13ff776 }, + { 0x49448fac8f53e, 0x34f74c6e8356a, 0x0ad780607dba2, 0x7213a7eb63eb6, 0x392e3acaa8c86 }, + { 0x534e93e8a35af, 0x08b10fd02c997, 0x26ac2acb81e05, 0x09d8c98ce3b79, 0x25e17fe4d50ac }, + }, + { + { 0x77ff576f121a7, 0x4e5f9b0fc722b, 0x46f949b0d28c8, 0x4cde65d17ef26, 0x6bba828f89698 }, + { 0x09bd71e04f676, 0x25ac841f2a145, 0x1a47eac823871, 0x1a8a8c36c581a, 0x255751442a9fb }, + { 0x1bc6690fe3901, 0x314132f5abc5a, 0x611835132d528, 0x5f24b8eb48a57, 0x559d504f7f6b7 }, + }, + { + { 0x091e7f6d266fd, 0x36060ef037389, 0x18788ec1d1286, 0x287441c478eb0, 0x123ea6a3354bd }, + { 0x38378b3eb54d5, 0x4d4aaa78f94ee, 0x4a002e875a74d, 0x10b851367b17c, 0x01ab12d5807e3 }, + { 0x5189041e32d96, 0x05b062b090231, 0x0c91766e7b78f, 0x0aa0f55a138ec, 0x4a3961e2c918a }, + }, + { + { 0x7d644f3233f1e, 0x1c69f9e02c064, 0x36ae5e5266898, 0x08fc1dad38b79, 0x68aceead9bd41 }, + { 0x43be0f8e6bba0, 0x68fdffc614e3b, 0x4e91dab5b3be0, 0x3b1d4c9212ff0, 0x2cd6bce3fb1db }, + { 0x4c90ef3d7c210, 0x496f5a0818716, 0x79cf88cc239b8, 0x2cb9c306cf8db, 0x595760d5b508f }, + }, + { + { 0x2cbebfd022790, 0x0b8822aec1105, 0x4d1cfd226bccc, 0x515b2fa4971be, 0x2cb2c5df54515 }, + { 0x1bfe104aa6397, 0x11494ff996c25, 0x64251623e5800, 0x0d49fc5e044be, 0x709fa43edcb29 }, + { 0x25d8c63fd2aca, 0x4c5cd29dffd61, 0x32ec0eb48af05, 0x18f9391f9b77c, 0x70f029ecf0c81 }, + }, + { + { 0x2afaa5e10b0b9, 0x61de08355254d, 0x0eb587de3c28d, 0x4f0bb9f7dbbd5, 0x44eca5a2a74bd }, + { 0x307b32eed3e33, 0x6748ab03ce8c2, 0x57c0d9ab810bc, 0x42c64a224e98c, 0x0b7d5d8a6c314 }, + { 0x448327b95d543, 0x0146681e3a4ba, 0x38714adc34e0c, 0x4f26f0e298e30, 0x272224512c7de }, + }, + { + { 0x3bb8a42a975fc, 0x6f2d5b46b17ef, 0x7b6a9223170e5, 0x053713fe3b7e6, 0x19735fd7f6bc2 }, + { 0x492af49c5342e, 0x2365cdf5a0357, 0x32138a7ffbb60, 0x2a1f7d14646fe, 0x11b5df18a44cc }, + { 0x390d042c84266, 0x1efe32a8fdc75, 0x6925ee7ae1238, 0x4af9281d0e832, 0x0fef911191df8 }, + }, +}, +}; +#else +/* base[i][j] = (j+1)*256^i*B */ +static const ge_precomp base[32][8] = { +{ + { + { 25967493,-14356035,29566456,3660896,-12694345,4014787,27544626,-11754271,-6079156,2047605 }, + { -12545711,934262,-2722910,3049990,-727428,9406986,12720692,5043384,19500929,-15469378 }, + { -8738181,4489570,9688441,-14785194,10184609,-12363380,29287919,11864899,-24514362,-4438546 }, + }, + { + { -12815894,-12976347,-21581243,11784320,-25355658,-2750717,-11717903,-3814571,-358445,-10211303 }, + { -21703237,6903825,27185491,6451973,-29577724,-9554005,-15616551,11189268,-26829678,-5319081 }, + { 26966642,11152617,32442495,15396054,14353839,-12752335,-3128826,-9541118,-15472047,-4166697 }, + }, + { + { 15636291,-9688557,24204773,-7912398,616977,-16685262,27787600,-14772189,28944400,-1550024 }, + { 16568933,4717097,-11556148,-1102322,15682896,-11807043,16354577,-11775962,7689662,11199574 }, + { 30464156,-5976125,-11779434,-15670865,23220365,15915852,7512774,10017326,-17749093,-9920357 }, + }, + { + { -17036878,13921892,10945806,-6033431,27105052,-16084379,-28926210,15006023,3284568,-6276540 }, + { 23599295,-8306047,-11193664,-7687416,13236774,10506355,7464579,9656445,13059162,10374397 }, + { 7798556,16710257,3033922,2874086,28997861,2835604,32406664,-3839045,-641708,-101325 }, + }, + { + { 10861363,11473154,27284546,1981175,-30064349,12577861,32867885,14515107,-15438304,10819380 }, + { 4708026,6336745,20377586,9066809,-11272109,6594696,-25653668,12483688,-12668491,5581306 }, + { 19563160,16186464,-29386857,4097519,10237984,-4348115,28542350,13850243,-23678021,-15815942 }, + }, + { + { -15371964,-12862754,32573250,4720197,-26436522,5875511,-19188627,-15224819,-9818940,-12085777 }, + { -8549212,109983,15149363,2178705,22900618,4543417,3044240,-15689887,1762328,14866737 }, + { -18199695,-15951423,-10473290,1707278,-17185920,3916101,-28236412,3959421,27914454,4383652 }, + }, + { + { 5153746,9909285,1723747,-2777874,30523605,5516873,19480852,5230134,-23952439,-15175766 }, + { -30269007,-3463509,7665486,10083793,28475525,1649722,20654025,16520125,30598449,7715701 }, + { 28881845,14381568,9657904,3680757,-20181635,7843316,-31400660,1370708,29794553,-1409300 }, + }, + { + { 14499471,-2729599,-33191113,-4254652,28494862,14271267,30290735,10876454,-33154098,2381726 }, + { -7195431,-2655363,-14730155,462251,-27724326,3941372,-6236617,3696005,-32300832,15351955 }, + { 27431194,8222322,16448760,-3907995,-18707002,11938355,-32961401,-2970515,29551813,10109425 }, + }, +}, +{ + { + { -13657040,-13155431,-31283750,11777098,21447386,6519384,-2378284,-1627556,10092783,-4764171 }, + { 27939166,14210322,4677035,16277044,-22964462,-12398139,-32508754,12005538,-17810127,12803510 }, + { 17228999,-15661624,-1233527,300140,-1224870,-11714777,30364213,-9038194,18016357,4397660 }, + }, + { + { -10958843,-7690207,4776341,-14954238,27850028,-15602212,-26619106,14544525,-17477504,982639 }, + { 29253598,15796703,-2863982,-9908884,10057023,3163536,7332899,-4120128,-21047696,9934963 }, + { 5793303,16271923,-24131614,-10116404,29188560,1206517,-14747930,4559895,-30123922,-10897950 }, + }, + { + { -27643952,-11493006,16282657,-11036493,28414021,-15012264,24191034,4541697,-13338309,5500568 }, + { 12650548,-1497113,9052871,11355358,-17680037,-8400164,-17430592,12264343,10874051,13524335 }, + { 25556948,-3045990,714651,2510400,23394682,-10415330,33119038,5080568,-22528059,5376628 }, + }, + { + { -26088264,-4011052,-17013699,-3537628,-6726793,1920897,-22321305,-9447443,4535768,1569007 }, + { -2255422,14606630,-21692440,-8039818,28430649,8775819,-30494562,3044290,31848280,12543772 }, + { -22028579,2943893,-31857513,6777306,13784462,-4292203,-27377195,-2062731,7718482,14474653 }, + }, + { + { 2385315,2454213,-22631320,46603,-4437935,-15680415,656965,-7236665,24316168,-5253567 }, + { 13741529,10911568,-33233417,-8603737,-20177830,-1033297,33040651,-13424532,-20729456,8321686 }, + { 21060490,-2212744,15712757,-4336099,1639040,10656336,23845965,-11874838,-9984458,608372 }, + }, + { + { -13672732,-15087586,-10889693,-7557059,-6036909,11305547,1123968,-6780577,27229399,23887 }, + { -23244140,-294205,-11744728,14712571,-29465699,-2029617,12797024,-6440308,-1633405,16678954 }, + { -29500620,4770662,-16054387,14001338,7830047,9564805,-1508144,-4795045,-17169265,4904953 }, + }, + { + { 24059557,14617003,19037157,-15039908,19766093,-14906429,5169211,16191880,2128236,-4326833 }, + { -16981152,4124966,-8540610,-10653797,30336522,-14105247,-29806336,916033,-6882542,-2986532 }, + { -22630907,12419372,-7134229,-7473371,-16478904,16739175,285431,2763829,15736322,4143876 }, + }, + { + { 2379352,11839345,-4110402,-5988665,11274298,794957,212801,-14594663,23527084,-16458268 }, + { 33431127,-11130478,-17838966,-15626900,8909499,8376530,-32625340,4087881,-15188911,-14416214 }, + { 1767683,7197987,-13205226,-2022635,-13091350,448826,5799055,4357868,-4774191,-16323038 }, + }, +}, +{ + { + { 6721966,13833823,-23523388,-1551314,26354293,-11863321,23365147,-3949732,7390890,2759800 }, + { 4409041,2052381,23373853,10530217,7676779,-12885954,21302353,-4264057,1244380,-12919645 }, + { -4421239,7169619,4982368,-2957590,30256825,-2777540,14086413,9208236,15886429,16489664 }, + }, + { + { 1996075,10375649,14346367,13311202,-6874135,-16438411,-13693198,398369,-30606455,-712933 }, + { -25307465,9795880,-2777414,14878809,-33531835,14780363,13348553,12076947,-30836462,5113182 }, + { -17770784,11797796,31950843,13929123,-25888302,12288344,-30341101,-7336386,13847711,5387222 }, + }, + { + { -18582163,-3416217,17824843,-2340966,22744343,-10442611,8763061,3617786,-19600662,10370991 }, + { 20246567,-14369378,22358229,-543712,18507283,-10413996,14554437,-8746092,32232924,16763880 }, + { 9648505,10094563,26416693,14745928,-30374318,-6472621,11094161,15689506,3140038,-16510092 }, + }, + { + { -16160072,5472695,31895588,4744994,8823515,10365685,-27224800,9448613,-28774454,366295 }, + { 19153450,11523972,-11096490,-6503142,-24647631,5420647,28344573,8041113,719605,11671788 }, + { 8678025,2694440,-6808014,2517372,4964326,11152271,-15432916,-15266516,27000813,-10195553 }, + }, + { + { -15157904,7134312,8639287,-2814877,-7235688,10421742,564065,5336097,6750977,-14521026 }, + { 11836410,-3979488,26297894,16080799,23455045,15735944,1695823,-8819122,8169720,16220347 }, + { -18115838,8653647,17578566,-6092619,-8025777,-16012763,-11144307,-2627664,-5990708,-14166033 }, + }, + { + { -23308498,-10968312,15213228,-10081214,-30853605,-11050004,27884329,2847284,2655861,1738395 }, + { -27537433,-14253021,-25336301,-8002780,-9370762,8129821,21651608,-3239336,-19087449,-11005278 }, + { 1533110,3437855,23735889,459276,29970501,11335377,26030092,5821408,10478196,8544890 }, + }, + { + { 32173121,-16129311,24896207,3921497,22579056,-3410854,19270449,12217473,17789017,-3395995 }, + { -30552961,-2228401,-15578829,-10147201,13243889,517024,15479401,-3853233,30460520,1052596 }, + { -11614875,13323618,32618793,8175907,-15230173,12596687,27491595,-4612359,3179268,-9478891 }, + }, + { + { 31947069,-14366651,-4640583,-15339921,-15125977,-6039709,-14756777,-16411740,19072640,-9511060 }, + { 11685058,11822410,3158003,-13952594,33402194,-4165066,5977896,-5215017,473099,5040608 }, + { -20290863,8198642,-27410132,11602123,1290375,-2799760,28326862,1721092,-19558642,-3131606 }, + }, +}, +{ + { + { 7881532,10687937,7578723,7738378,-18951012,-2553952,21820786,8076149,-27868496,11538389 }, + { -19935666,3899861,18283497,-6801568,-15728660,-11249211,8754525,7446702,-5676054,5797016 }, + { -11295600,-3793569,-15782110,-7964573,12708869,-8456199,2014099,-9050574,-2369172,-5877341 }, + }, + { + { -22472376,-11568741,-27682020,1146375,18956691,16640559,1192730,-3714199,15123619,10811505 }, + { 14352098,-3419715,-18942044,10822655,32750596,4699007,-70363,15776356,-28886779,-11974553 }, + { -28241164,-8072475,-4978962,-5315317,29416931,1847569,-20654173,-16484855,4714547,-9600655 }, + }, + { + { 15200332,8368572,19679101,15970074,-31872674,1959451,24611599,-4543832,-11745876,12340220 }, + { 12876937,-10480056,33134381,6590940,-6307776,14872440,9613953,8241152,15370987,9608631 }, + { -4143277,-12014408,8446281,-391603,4407738,13629032,-7724868,15866074,-28210621,-8814099 }, + }, + { + { 26660628,-15677655,8393734,358047,-7401291,992988,-23904233,858697,20571223,8420556 }, + { 14620715,13067227,-15447274,8264467,14106269,15080814,33531827,12516406,-21574435,-12476749 }, + { 236881,10476226,57258,-14677024,6472998,2466984,17258519,7256740,8791136,15069930 }, + }, + { + { 1276410,-9371918,22949635,-16322807,-23493039,-5702186,14711875,4874229,-30663140,-2331391 }, + { 5855666,4990204,-13711848,7294284,-7804282,1924647,-1423175,-7912378,-33069337,9234253 }, + { 20590503,-9018988,31529744,-7352666,-2706834,10650548,31559055,-11609587,18979186,13396066 }, + }, + { + { 24474287,4968103,22267082,4407354,24063882,-8325180,-18816887,13594782,33514650,7021958 }, + { -11566906,-6565505,-21365085,15928892,-26158305,4315421,-25948728,-3916677,-21480480,12868082 }, + { -28635013,13504661,19988037,-2132761,21078225,6443208,-21446107,2244500,-12455797,-8089383 }, + }, + { + { -30595528,13793479,-5852820,319136,-25723172,-6263899,33086546,8957937,-15233648,5540521 }, + { -11630176,-11503902,-8119500,-7643073,2620056,1022908,-23710744,-1568984,-16128528,-14962807 }, + { 23152971,775386,27395463,14006635,-9701118,4649512,1689819,892185,-11513277,-15205948 }, + }, + { + { 9770129,9586738,26496094,4324120,1556511,-3550024,27453819,4763127,-19179614,5867134 }, + { -32765025,1927590,31726409,-4753295,23962434,-16019500,27846559,5931263,-29749703,-16108455 }, + { 27461885,-2977536,22380810,1815854,-23033753,-3031938,7283490,-15148073,-19526700,7734629 }, + }, +}, +{ + { + { -8010264,-9590817,-11120403,6196038,29344158,-13430885,7585295,-3176626,18549497,15302069 }, + { -32658337,-6171222,-7672793,-11051681,6258878,13504381,10458790,-6418461,-8872242,8424746 }, + { 24687205,8613276,-30667046,-3233545,1863892,-1830544,19206234,7134917,-11284482,-828919 }, + }, + { + { 11334899,-9218022,8025293,12707519,17523892,-10476071,10243738,-14685461,-5066034,16498837 }, + { 8911542,6887158,-9584260,-6958590,11145641,-9543680,17303925,-14124238,6536641,10543906 }, + { -28946384,15479763,-17466835,568876,-1497683,11223454,-2669190,-16625574,-27235709,8876771 }, + }, + { + { -25742899,-12566864,-15649966,-846607,-33026686,-796288,-33481822,15824474,-604426,-9039817 }, + { 10330056,70051,7957388,-9002667,9764902,15609756,27698697,-4890037,1657394,3084098 }, + { 10477963,-7470260,12119566,-13250805,29016247,-5365589,31280319,14396151,-30233575,15272409 }, + }, + { + { -12288309,3169463,28813183,16658753,25116432,-5630466,-25173957,-12636138,-25014757,1950504 }, + { -26180358,9489187,11053416,-14746161,-31053720,5825630,-8384306,-8767532,15341279,8373727 }, + { 28685821,7759505,-14378516,-12002860,-31971820,4079242,298136,-10232602,-2878207,15190420 }, + }, + { + { -32932876,13806336,-14337485,-15794431,-24004620,10940928,8669718,2742393,-26033313,-6875003 }, + { -1580388,-11729417,-25979658,-11445023,-17411874,-10912854,9291594,-16247779,-12154742,6048605 }, + { -30305315,14843444,1539301,11864366,20201677,1900163,13934231,5128323,11213262,9168384 }, + }, + { + { -26280513,11007847,19408960,-940758,-18592965,-4328580,-5088060,-11105150,20470157,-16398701 }, + { -23136053,9282192,14855179,-15390078,-7362815,-14408560,-22783952,14461608,14042978,5230683 }, + { 29969567,-2741594,-16711867,-8552442,9175486,-2468974,21556951,3506042,-5933891,-12449708 }, + }, + { + { -3144746,8744661,19704003,4581278,-20430686,6830683,-21284170,8971513,-28539189,15326563 }, + { -19464629,10110288,-17262528,-3503892,-23500387,1355669,-15523050,15300988,-20514118,9168260 }, + { -5353335,4488613,-23803248,16314347,7780487,-15638939,-28948358,9601605,33087103,-9011387 }, + }, + { + { -19443170,-15512900,-20797467,-12445323,-29824447,10229461,-27444329,-15000531,-5996870,15664672 }, + { 23294591,-16632613,-22650781,-8470978,27844204,11461195,13099750,-2460356,18151676,13417686 }, + { -24722913,-4176517,-31150679,5988919,-26858785,6685065,1661597,-12551441,15271676,-15452665 }, + }, +}, +{ + { + { 11433042,-13228665,8239631,-5279517,-1985436,-725718,-18698764,2167544,-6921301,-13440182 }, + { -31436171,15575146,30436815,12192228,-22463353,9395379,-9917708,-8638997,12215110,12028277 }, + { 14098400,6555944,23007258,5757252,-15427832,-12950502,30123440,4617780,-16900089,-655628 }, + }, + { + { -4026201,-15240835,11893168,13718664,-14809462,1847385,-15819999,10154009,23973261,-12684474 }, + { -26531820,-3695990,-1908898,2534301,-31870557,-16550355,18341390,-11419951,32013174,-10103539 }, + { -25479301,10876443,-11771086,-14625140,-12369567,1838104,21911214,6354752,4425632,-837822 }, + }, + { + { -10433389,-14612966,22229858,-3091047,-13191166,776729,-17415375,-12020462,4725005,14044970 }, + { 19268650,-7304421,1555349,8692754,-21474059,-9910664,6347390,-1411784,-19522291,-16109756 }, + { -24864089,12986008,-10898878,-5558584,-11312371,-148526,19541418,8180106,9282262,10282508 }, + }, + { + { -26205082,4428547,-8661196,-13194263,4098402,-14165257,15522535,8372215,5542595,-10702683 }, + { -10562541,14895633,26814552,-16673850,-17480754,-2489360,-2781891,6993761,-18093885,10114655 }, + { -20107055,-929418,31422704,10427861,-7110749,6150669,-29091755,-11529146,25953725,-106158 }, + }, + { + { -4234397,-8039292,-9119125,3046000,2101609,-12607294,19390020,6094296,-3315279,12831125 }, + { -15998678,7578152,5310217,14408357,-33548620,-224739,31575954,6326196,7381791,-2421839 }, + { -20902779,3296811,24736065,-16328389,18374254,7318640,6295303,8082724,-15362489,12339664 }, + }, + { + { 27724736,2291157,6088201,-14184798,1792727,5857634,13848414,15768922,25091167,14856294 }, + { -18866652,8331043,24373479,8541013,-701998,-9269457,12927300,-12695493,-22182473,-9012899 }, + { -11423429,-5421590,11632845,3405020,30536730,-11674039,-27260765,13866390,30146206,9142070 }, + }, + { + { 3924129,-15307516,-13817122,-10054960,12291820,-668366,-27702774,9326384,-8237858,4171294 }, + { -15921940,16037937,6713787,16606682,-21612135,2790944,26396185,3731949,345228,-5462949 }, + { -21327538,13448259,25284571,1143661,20614966,-8849387,2031539,-12391231,-16253183,-13582083 }, + }, + { + { 31016211,-16722429,26371392,-14451233,-5027349,14854137,17477601,3842657,28012650,-16405420 }, + { -5075835,9368966,-8562079,-4600902,-15249953,6970560,-9189873,16292057,-8867157,3507940 }, + { 29439664,3537914,23333589,6997794,-17555561,-11018068,-15209202,-15051267,-9164929,6580396 }, + }, +}, +{ + { + { -12185861,-7679788,16438269,10826160,-8696817,-6235611,17860444,-9273846,-2095802,9304567 }, + { 20714564,-4336911,29088195,7406487,11426967,-5095705,14792667,-14608617,5289421,-477127 }, + { -16665533,-10650790,-6160345,-13305760,9192020,-1802462,17271490,12349094,26939669,-3752294 }, + }, + { + { -12889898,9373458,31595848,16374215,21471720,13221525,-27283495,-12348559,-3698806,117887 }, + { 22263325,-6560050,3984570,-11174646,-15114008,-566785,28311253,5358056,-23319780,541964 }, + { 16259219,3261970,2309254,-15534474,-16885711,-4581916,24134070,-16705829,-13337066,-13552195 }, + }, + { + { 9378160,-13140186,-22845982,-12745264,28198281,-7244098,-2399684,-717351,690426,14876244 }, + { 24977353,-314384,-8223969,-13465086,28432343,-1176353,-13068804,-12297348,-22380984,6618999 }, + { -1538174,11685646,12944378,13682314,-24389511,-14413193,8044829,-13817328,32239829,-5652762 }, + }, + { + { -18603066,4762990,-926250,8885304,-28412480,-3187315,9781647,-10350059,32779359,5095274 }, + { -33008130,-5214506,-32264887,-3685216,9460461,-9327423,-24601656,14506724,21639561,-2630236 }, + { -16400943,-13112215,25239338,15531969,3987758,-4499318,-1289502,-6863535,17874574,558605 }, + }, + { + { -13600129,10240081,9171883,16131053,-20869254,9599700,33499487,5080151,2085892,5119761 }, + { -22205145,-2519528,-16381601,414691,-25019550,2170430,30634760,-8363614,-31999993,-5759884 }, + { -6845704,15791202,8550074,-1312654,29928809,-12092256,27534430,-7192145,-22351378,12961482 }, + }, + { + { -24492060,-9570771,10368194,11582341,-23397293,-2245287,16533930,8206996,-30194652,-5159638 }, + { -11121496,-3382234,2307366,6362031,-135455,8868177,-16835630,7031275,7589640,8945490 }, + { -32152748,8917967,6661220,-11677616,-1192060,-15793393,7251489,-11182180,24099109,-14456170 }, + }, + { + { 5019558,-7907470,4244127,-14714356,-26933272,6453165,-19118182,-13289025,-6231896,-10280736 }, + { 10853594,10721687,26480089,5861829,-22995819,1972175,-1866647,-10557898,-3363451,-6441124 }, + { -17002408,5906790,221599,-6563147,7828208,-13248918,24362661,-2008168,-13866408,7421392 }, + }, + { + { 8139927,-6546497,32257646,-5890546,30375719,1886181,-21175108,15441252,28826358,-4123029 }, + { 6267086,9695052,7709135,-16603597,-32869068,-1886135,14795160,-7840124,13746021,-1742048 }, + { 28584902,7787108,-6732942,-15050729,22846041,-7571236,-3181936,-363524,4771362,-8419958 }, + }, +}, +{ + { + { 24949256,6376279,-27466481,-8174608,-18646154,-9930606,33543569,-12141695,3569627,11342593 }, + { 26514989,4740088,27912651,3697550,19331575,-11472339,6809886,4608608,7325975,-14801071 }, + { -11618399,-14554430,-24321212,7655128,-1369274,5214312,-27400540,10258390,-17646694,-8186692 }, + }, + { + { 11431204,15823007,26570245,14329124,18029990,4796082,-31446179,15580664,9280358,-3973687 }, + { -160783,-10326257,-22855316,-4304997,-20861367,-13621002,-32810901,-11181622,-15545091,4387441 }, + { -20799378,12194512,3937617,-5805892,-27154820,9340370,-24513992,8548137,20617071,-7482001 }, + }, + { + { -938825,-3930586,-8714311,16124718,24603125,-6225393,-13775352,-11875822,24345683,10325460 }, + { -19855277,-1568885,-22202708,8714034,14007766,6928528,16318175,-1010689,4766743,3552007 }, + { -21751364,-16730916,1351763,-803421,-4009670,3950935,3217514,14481909,10988822,-3994762 }, + }, + { + { 15564307,-14311570,3101243,5684148,30446780,-8051356,12677127,-6505343,-8295852,13296005 }, + { -9442290,6624296,-30298964,-11913677,-4670981,-2057379,31521204,9614054,-30000824,12074674 }, + { 4771191,-135239,14290749,-13089852,27992298,14998318,-1413936,-1556716,29832613,-16391035 }, + }, + { + { 7064884,-7541174,-19161962,-5067537,-18891269,-2912736,25825242,5293297,-27122660,13101590 }, + { -2298563,2439670,-7466610,1719965,-27267541,-16328445,32512469,-5317593,-30356070,-4190957 }, + { -30006540,10162316,-33180176,3981723,-16482138,-13070044,14413974,9515896,19568978,9628812 }, + }, + { + { 33053803,199357,15894591,1583059,27380243,-4580435,-17838894,-6106839,-6291786,3437740 }, + { -18978877,3884493,19469877,12726490,15913552,13614290,-22961733,70104,7463304,4176122 }, + { -27124001,10659917,11482427,-16070381,12771467,-6635117,-32719404,-5322751,24216882,5944158 }, + }, + { + { 8894125,7450974,-2664149,-9765752,-28080517,-12389115,19345746,14680796,11632993,5847885 }, + { 26942781,-2315317,9129564,-4906607,26024105,11769399,-11518837,6367194,-9727230,4782140 }, + { 19916461,-4828410,-22910704,-11414391,25606324,-5972441,33253853,8220911,6358847,-1873857 }, + }, + { + { 801428,-2081702,16569428,11065167,29875704,96627,7908388,-4480480,-13538503,1387155 }, + { 19646058,5720633,-11416706,12814209,11607948,12749789,14147075,15156355,-21866831,11835260 }, + { 19299512,1155910,28703737,14890794,2925026,7269399,26121523,15467869,-26560550,5052483 }, + }, +}, +{ + { + { -3017432,10058206,1980837,3964243,22160966,12322533,-6431123,-12618185,12228557,-7003677 }, + { 32944382,14922211,-22844894,5188528,21913450,-8719943,4001465,13238564,-6114803,8653815 }, + { 22865569,-4652735,27603668,-12545395,14348958,8234005,24808405,5719875,28483275,2841751 }, + }, + { + { -16420968,-1113305,-327719,-12107856,21886282,-15552774,-1887966,-315658,19932058,-12739203 }, + { -11656086,10087521,-8864888,-5536143,-19278573,-3055912,3999228,13239134,-4777469,-13910208 }, + { 1382174,-11694719,17266790,9194690,-13324356,9720081,20403944,11284705,-14013818,3093230 }, + }, + { + { 16650921,-11037932,-1064178,1570629,-8329746,7352753,-302424,16271225,-24049421,-6691850 }, + { -21911077,-5927941,-4611316,-5560156,-31744103,-10785293,24123614,15193618,-21652117,-16739389 }, + { -9935934,-4289447,-25279823,4372842,2087473,10399484,31870908,14690798,17361620,11864968 }, + }, + { + { -11307610,6210372,13206574,5806320,-29017692,-13967200,-12331205,-7486601,-25578460,-16240689 }, + { 14668462,-12270235,26039039,15305210,25515617,4542480,10453892,6577524,9145645,-6443880 }, + { 5974874,3053895,-9433049,-10385191,-31865124,3225009,-7972642,3936128,-5652273,-3050304 }, + }, + { + { 30625386,-4729400,-25555961,-12792866,-20484575,7695099,17097188,-16303496,-27999779,1803632 }, + { -3553091,9865099,-5228566,4272701,-5673832,-16689700,14911344,12196514,-21405489,7047412 }, + { 20093277,9920966,-11138194,-5343857,13161587,12044805,-32856851,4124601,-32343828,-10257566 }, + }, + { + { -20788824,14084654,-13531713,7842147,19119038,-13822605,4752377,-8714640,-21679658,2288038 }, + { -26819236,-3283715,29965059,3039786,-14473765,2540457,29457502,14625692,-24819617,12570232 }, + { -1063558,-11551823,16920318,12494842,1278292,-5869109,-21159943,-3498680,-11974704,4724943 }, + }, + { + { 17960970,-11775534,-4140968,-9702530,-8876562,-1410617,-12907383,-8659932,-29576300,1903856 }, + { 23134274,-14279132,-10681997,-1611936,20684485,15770816,-12989750,3190296,26955097,14109738 }, + { 15308788,5320727,-30113809,-14318877,22902008,7767164,29425325,-11277562,31960942,11934971 }, + }, + { + { -27395711,8435796,4109644,12222639,-24627868,14818669,20638173,4875028,10491392,1379718 }, + { -13159415,9197841,3875503,-8936108,-1383712,-5879801,33518459,16176658,21432314,12180697 }, + { -11787308,11500838,13787581,-13832590,-22430679,10140205,1465425,12689540,-10301319,-13872883 }, + }, +}, +{ + { + { 5414091,-15386041,-21007664,9643570,12834970,1186149,-2622916,-1342231,26128231,6032912 }, + { -26337395,-13766162,32496025,-13653919,17847801,-12669156,3604025,8316894,-25875034,-10437358 }, + { 3296484,6223048,24680646,-12246460,-23052020,5903205,-8862297,-4639164,12376617,3188849 }, + }, + { + { 29190488,-14659046,27549113,-1183516,3520066,-10697301,32049515,-7309113,-16109234,-9852307 }, + { -14744486,-9309156,735818,-598978,-20407687,-5057904,25246078,-15795669,18640741,-960977 }, + { -6928835,-16430795,10361374,5642961,4910474,12345252,-31638386,-494430,10530747,1053335 }, + }, + { + { -29265967,-14186805,-13538216,-12117373,-19457059,-10655384,-31462369,-2948985,24018831,15026644 }, + { -22592535,-3145277,-2289276,5953843,-13440189,9425631,25310643,13003497,-2314791,-15145616 }, + { -27419985,-603321,-8043984,-1669117,-26092265,13987819,-27297622,187899,-23166419,-2531735 }, + }, + { + { -21744398,-13810475,1844840,5021428,-10434399,-15911473,9716667,16266922,-5070217,726099 }, + { 29370922,-6053998,7334071,-15342259,9385287,2247707,-13661962,-4839461,30007388,-15823341 }, + { -936379,16086691,23751945,-543318,-1167538,-5189036,9137109,730663,9835848,4555336 }, + }, + { + { -23376435,1410446,-22253753,-12899614,30867635,15826977,17693930,544696,-11985298,12422646 }, + { 31117226,-12215734,-13502838,6561947,-9876867,-12757670,-5118685,-4096706,29120153,13924425 }, + { -17400879,-14233209,19675799,-2734756,-11006962,-5858820,-9383939,-11317700,7240931,-237388 }, + }, + { + { -31361739,-11346780,-15007447,-5856218,-22453340,-12152771,1222336,4389483,3293637,-15551743 }, + { -16684801,-14444245,11038544,11054958,-13801175,-3338533,-24319580,7733547,12796905,-6335822 }, + { -8759414,-10817836,-25418864,10783769,-30615557,-9746811,-28253339,3647836,3222231,-11160462 }, + }, + { + { 18606113,1693100,-25448386,-15170272,4112353,10045021,23603893,-2048234,-7550776,2484985 }, + { 9255317,-3131197,-12156162,-1004256,13098013,-9214866,16377220,-2102812,-19802075,-3034702 }, + { -22729289,7496160,-5742199,11329249,19991973,-3347502,-31718148,9936966,-30097688,-10618797 }, + }, + { + { 21878590,-5001297,4338336,13643897,-3036865,13160960,19708896,5415497,-7360503,-4109293 }, + { 27736861,10103576,12500508,8502413,-3413016,-9633558,10436918,-1550276,-23659143,-8132100 }, + { 19492550,-12104365,-29681976,-852630,-3208171,12403437,30066266,8367329,13243957,8709688 }, + }, +}, +{ + { + { 12015105,2801261,28198131,10151021,24818120,-4743133,-11194191,-5645734,5150968,7274186 }, + { 2831366,-12492146,1478975,6122054,23825128,-12733586,31097299,6083058,31021603,-9793610 }, + { -2529932,-2229646,445613,10720828,-13849527,-11505937,-23507731,16354465,15067285,-14147707 }, + }, + { + { 7840942,14037873,-33364863,15934016,-728213,-3642706,21403988,1057586,-19379462,-12403220 }, + { 915865,-16469274,15608285,-8789130,-24357026,6060030,-17371319,8410997,-7220461,16527025 }, + { 32922597,-556987,20336074,-16184568,10903705,-5384487,16957574,52992,23834301,6588044 }, + }, + { + { 32752030,11232950,3381995,-8714866,22652988,-10744103,17159699,16689107,-20314580,-1305992 }, + { -4689649,9166776,-25710296,-10847306,11576752,12733943,7924251,-2752281,1976123,-7249027 }, + { 21251222,16309901,-2983015,-6783122,30810597,12967303,156041,-3371252,12331345,-8237197 }, + }, + { + { 8651614,-4477032,-16085636,-4996994,13002507,2950805,29054427,-5106970,10008136,-4667901 }, + { 31486080,15114593,-14261250,12951354,14369431,-7387845,16347321,-13662089,8684155,-10532952 }, + { 19443825,11385320,24468943,-9659068,-23919258,2187569,-26263207,-6086921,31316348,14219878 }, + }, + { + { -28594490,1193785,32245219,11392485,31092169,15722801,27146014,6992409,29126555,9207390 }, + { 32382935,1110093,18477781,11028262,-27411763,-7548111,-4980517,10843782,-7957600,-14435730 }, + { 2814918,7836403,27519878,-7868156,-20894015,-11553689,-21494559,8550130,28346258,1994730 }, + }, + { + { -19578299,8085545,-14000519,-3948622,2785838,-16231307,-19516951,7174894,22628102,8115180 }, + { -30405132,955511,-11133838,-15078069,-32447087,-13278079,-25651578,3317160,-9943017,930272 }, + { -15303681,-6833769,28856490,1357446,23421993,1057177,24091212,-1388970,-22765376,-10650715 }, + }, + { + { -22751231,-5303997,-12907607,-12768866,-15811511,-7797053,-14839018,-16554220,-1867018,8398970 }, + { -31969310,2106403,-4736360,1362501,12813763,16200670,22981545,-6291273,18009408,-15772772 }, + { -17220923,-9545221,-27784654,14166835,29815394,7444469,29551787,-3727419,19288549,1325865 }, + }, + { + { 15100157,-15835752,-23923978,-1005098,-26450192,15509408,12376730,-3479146,33166107,-8042750 }, + { 20909231,13023121,-9209752,16251778,-5778415,-8094914,12412151,10018715,2213263,-13878373 }, + { 32529814,-11074689,30361439,-16689753,-9135940,1513226,22922121,6382134,-5766928,8371348 }, + }, +}, +{ + { + { 9923462,11271500,12616794,3544722,-29998368,-1721626,12891687,-8193132,-26442943,10486144 }, + { -22597207,-7012665,8587003,-8257861,4084309,-12970062,361726,2610596,-23921530,-11455195 }, + { 5408411,-1136691,-4969122,10561668,24145918,14240566,31319731,-4235541,19985175,-3436086 }, + }, + { + { -13994457,16616821,14549246,3341099,32155958,13648976,-17577068,8849297,65030,8370684 }, + { -8320926,-12049626,31204563,5839400,-20627288,-1057277,-19442942,6922164,12743482,-9800518 }, + { -2361371,12678785,28815050,4759974,-23893047,4884717,23783145,11038569,18800704,255233 }, + }, + { + { -5269658,-1773886,13957886,7990715,23132995,728773,13393847,9066957,19258688,-14753793 }, + { -2936654,-10827535,-10432089,14516793,-3640786,4372541,-31934921,2209390,-1524053,2055794 }, + { 580882,16705327,5468415,-2683018,-30926419,-14696000,-7203346,-8994389,-30021019,7394435 }, + }, + { + { 23838809,1822728,-15738443,15242727,8318092,-3733104,-21672180,-3492205,-4821741,14799921 }, + { 13345610,9759151,3371034,-16137791,16353039,8577942,31129804,13496856,-9056018,7402518 }, + { 2286874,-4435931,-20042458,-2008336,-13696227,5038122,11006906,-15760352,8205061,1607563 }, + }, + { + { 14414086,-8002132,3331830,-3208217,22249151,-5594188,18364661,-2906958,30019587,-9029278 }, + { -27688051,1585953,-10775053,931069,-29120221,-11002319,-14410829,12029093,9944378,8024 }, + { 4368715,-3709630,29874200,-15022983,-20230386,-11410704,-16114594,-999085,-8142388,5640030 }, + }, + { + { 10299610,13746483,11661824,16234854,7630238,5998374,9809887,-16694564,15219798,-14327783 }, + { 27425505,-5719081,3055006,10660664,23458024,595578,-15398605,-1173195,-18342183,9742717 }, + { 6744077,2427284,26042789,2720740,-847906,1118974,32324614,7406442,12420155,1994844 }, + }, + { + { 14012521,-5024720,-18384453,-9578469,-26485342,-3936439,-13033478,-10909803,24319929,-6446333 }, + { 16412690,-4507367,10772641,15929391,-17068788,-4658621,10555945,-10484049,-30102368,-4739048 }, + { 22397382,-7767684,-9293161,-12792868,17166287,-9755136,-27333065,6199366,21880021,-12250760 }, + }, + { + { -4283307,5368523,-31117018,8163389,-30323063,3209128,16557151,8890729,8840445,4957760 }, + { -15447727,709327,-6919446,-10870178,-29777922,6522332,-21720181,12130072,-14796503,5005757 }, + { -2114751,-14308128,23019042,15765735,-25269683,6002752,10183197,-13239326,-16395286,-2176112 }, + }, +}, +{ + { + { -19025756,1632005,13466291,-7995100,-23640451,16573537,-32013908,-3057104,22208662,2000468 }, + { 3065073,-1412761,-25598674,-361432,-17683065,-5703415,-8164212,11248527,-3691214,-7414184 }, + { 10379208,-6045554,8877319,1473647,-29291284,-12507580,16690915,2553332,-3132688,16400289 }, + }, + { + { 15716668,1254266,-18472690,7446274,-8448918,6344164,-22097271,-7285580,26894937,9132066 }, + { 24158887,12938817,11085297,-8177598,-28063478,-4457083,-30576463,64452,-6817084,-2692882 }, + { 13488534,7794716,22236231,5989356,25426474,-12578208,2350710,-3418511,-4688006,2364226 }, + }, + { + { 16335052,9132434,25640582,6678888,1725628,8517937,-11807024,-11697457,15445875,-7798101 }, + { 29004207,-7867081,28661402,-640412,-12794003,-7943086,31863255,-4135540,-278050,-15759279 }, + { -6122061,-14866665,-28614905,14569919,-10857999,-3591829,10343412,-6976290,-29828287,-10815811 }, + }, + { + { 27081650,3463984,14099042,-4517604,1616303,-6205604,29542636,15372179,17293797,960709 }, + { 20263915,11434237,-5765435,11236810,13505955,-10857102,-16111345,6493122,-19384511,7639714 }, + { -2830798,-14839232,25403038,-8215196,-8317012,-16173699,18006287,-16043750,29994677,-15808121 }, + }, + { + { 9769828,5202651,-24157398,-13631392,-28051003,-11561624,-24613141,-13860782,-31184575,709464 }, + { 12286395,13076066,-21775189,-1176622,-25003198,4057652,-32018128,-8890874,16102007,13205847 }, + { 13733362,5599946,10557076,3195751,-5557991,8536970,-25540170,8525972,10151379,10394400 }, + }, + { + { 4024660,-16137551,22436262,12276534,-9099015,-2686099,19698229,11743039,-33302334,8934414 }, + { -15879800,-4525240,-8580747,-2934061,14634845,-698278,-9449077,3137094,-11536886,11721158 }, + { 17555939,-5013938,8268606,2331751,-22738815,9761013,9319229,8835153,-9205489,-1280045 }, + }, + { + { -461409,-7830014,20614118,16688288,-7514766,-4807119,22300304,505429,6108462,-6183415 }, + { -5070281,12367917,-30663534,3234473,32617080,-8422642,29880583,-13483331,-26898490,-7867459 }, + { -31975283,5726539,26934134,10237677,-3173717,-605053,24199304,3795095,7592688,-14992079 }, + }, + { + { 21594432,-14964228,17466408,-4077222,32537084,2739898,6407723,12018833,-28256052,4298412 }, + { -20650503,-11961496,-27236275,570498,3767144,-1717540,13891942,-1569194,13717174,10805743 }, + { -14676630,-15644296,15287174,11927123,24177847,-8175568,-796431,14860609,-26938930,-5863836 }, + }, +}, +{ + { + { 12962541,5311799,-10060768,11658280,18855286,-7954201,13286263,-12808704,-4381056,9882022 }, + { 18512079,11319350,-20123124,15090309,18818594,5271736,-22727904,3666879,-23967430,-3299429 }, + { -6789020,-3146043,16192429,13241070,15898607,-14206114,-10084880,-6661110,-2403099,5276065 }, + }, + { + { 30169808,-5317648,26306206,-11750859,27814964,7069267,7152851,3684982,1449224,13082861 }, + { 10342826,3098505,2119311,193222,25702612,12233820,23697382,15056736,-21016438,-8202000 }, + { -33150110,3261608,22745853,7948688,19370557,-15177665,-26171976,6482814,-10300080,-11060101 }, + }, + { + { 32869458,-5408545,25609743,15678670,-10687769,-15471071,26112421,2521008,-22664288,6904815 }, + { 29506923,4457497,3377935,-9796444,-30510046,12935080,1561737,3841096,-29003639,-6657642 }, + { 10340844,-6630377,-18656632,-2278430,12621151,-13339055,30878497,-11824370,-25584551,5181966 }, + }, + { + { 25940115,-12658025,17324188,-10307374,-8671468,15029094,24396252,-16450922,-2322852,-12388574 }, + { -21765684,9916823,-1300409,4079498,-1028346,11909559,1782390,12641087,20603771,-6561742 }, + { -18882287,-11673380,24849422,11501709,13161720,-4768874,1925523,11914390,4662781,7820689 }, + }, + { + { 12241050,-425982,8132691,9393934,32846760,-1599620,29749456,12172924,16136752,15264020 }, + { -10349955,-14680563,-8211979,2330220,-17662549,-14545780,10658213,6671822,19012087,3772772 }, + { 3753511,-3421066,10617074,2028709,14841030,-6721664,28718732,-15762884,20527771,12988982 }, + }, + { + { -14822485,-5797269,-3707987,12689773,-898983,-10914866,-24183046,-10564943,3299665,-12424953 }, + { -16777703,-15253301,-9642417,4978983,3308785,8755439,6943197,6461331,-25583147,8991218 }, + { -17226263,1816362,-1673288,-6086439,31783888,-8175991,-32948145,7417950,-30242287,1507265 }, + }, + { + { 29692663,6829891,-10498800,4334896,20945975,-11906496,-28887608,8209391,14606362,-10647073 }, + { -3481570,8707081,32188102,5672294,22096700,1711240,-33020695,9761487,4170404,-2085325 }, + { -11587470,14855945,-4127778,-1531857,-26649089,15084046,22186522,16002000,-14276837,-8400798 }, + }, + { + { -4811456,13761029,-31703877,-2483919,-3312471,7869047,-7113572,-9620092,13240845,10965870 }, + { -7742563,-8256762,-14768334,-13656260,-23232383,12387166,4498947,14147411,29514390,4302863 }, + { -13413405,-12407859,20757302,-13801832,14785143,8976368,-5061276,-2144373,17846988,-13971927 }, + }, +}, +{ + { + { -2244452,-754728,-4597030,-1066309,-6247172,1455299,-21647728,-9214789,-5222701,12650267 }, + { -9906797,-16070310,21134160,12198166,-27064575,708126,387813,13770293,-19134326,10958663 }, + { 22470984,12369526,23446014,-5441109,-21520802,-9698723,-11772496,-11574455,-25083830,4271862 }, + }, + { + { -25169565,-10053642,-19909332,15361595,-5984358,2159192,75375,-4278529,-32526221,8469673 }, + { 15854970,4148314,-8893890,7259002,11666551,13824734,-30531198,2697372,24154791,-9460943 }, + { 15446137,-15806644,29759747,14019369,30811221,-9610191,-31582008,12840104,24913809,9815020 }, + }, + { + { -4709286,-5614269,-31841498,-12288893,-14443537,10799414,-9103676,13438769,18735128,9466238 }, + { 11933045,9281483,5081055,-5183824,-2628162,-4905629,-7727821,-10896103,-22728655,16199064 }, + { 14576810,379472,-26786533,-8317236,-29426508,-10812974,-102766,1876699,30801119,2164795 }, + }, + { + { 15995086,3199873,13672555,13712240,-19378835,-4647646,-13081610,-15496269,-13492807,1268052 }, + { -10290614,-3659039,-3286592,10948818,23037027,3794475,-3470338,-12600221,-17055369,3565904 }, + { 29210088,-9419337,-5919792,-4952785,10834811,-13327726,-16512102,-10820713,-27162222,-14030531 }, + }, + { + { -13161890,15508588,16663704,-8156150,-28349942,9019123,-29183421,-3769423,2244111,-14001979 }, + { -5152875,-3800936,-9306475,-6071583,16243069,14684434,-25673088,-16180800,13491506,4641841 }, + { 10813417,643330,-19188515,-728916,30292062,-16600078,27548447,-7721242,14476989,-12767431 }, + }, + { + { 10292079,9984945,6481436,8279905,-7251514,7032743,27282937,-1644259,-27912810,12651324 }, + { -31185513,-813383,22271204,11835308,10201545,15351028,17099662,3988035,21721536,-3148940 }, + { 10202177,-6545839,-31373232,-9574638,-32150642,-8119683,-12906320,3852694,13216206,14842320 }, + }, + { + { -15815640,-10601066,-6538952,-7258995,-6984659,-6581778,-31500847,13765824,-27434397,9900184 }, + { 14465505,-13833331,-32133984,-14738873,-27443187,12990492,33046193,15796406,-7051866,-8040114 }, + { 30924417,-8279620,6359016,-12816335,16508377,9071735,-25488601,15413635,9524356,-7018878 }, + }, + { + { 12274201,-13175547,32627641,-1785326,6736625,13267305,5237659,-5109483,15663516,4035784 }, + { -2951309,8903985,17349946,601635,-16432815,-4612556,-13732739,-15889334,-22258478,4659091 }, + { -16916263,-4952973,-30393711,-15158821,20774812,15897498,5736189,15026997,-2178256,-13455585 }, + }, +}, +{ + { + { -8858980,-2219056,28571666,-10155518,-474467,-10105698,-3801496,278095,23440562,-290208 }, + { 10226241,-5928702,15139956,120818,-14867693,5218603,32937275,11551483,-16571960,-7442864 }, + { 17932739,-12437276,-24039557,10749060,11316803,7535897,22503767,5561594,-3646624,3898661 }, + }, + { + { 7749907,-969567,-16339731,-16464,-25018111,15122143,-1573531,7152530,21831162,1245233 }, + { 26958459,-14658026,4314586,8346991,-5677764,11960072,-32589295,-620035,-30402091,-16716212 }, + { -12165896,9166947,33491384,13673479,29787085,13096535,6280834,14587357,-22338025,13987525 }, + }, + { + { -24349909,7778775,21116000,15572597,-4833266,-5357778,-4300898,-5124639,-7469781,-2858068 }, + { 9681908,-6737123,-31951644,13591838,-6883821,386950,31622781,6439245,-14581012,4091397 }, + { -8426427,1470727,-28109679,-1596990,3978627,-5123623,-19622683,12092163,29077877,-14741988 }, + }, + { + { 5269168,-6859726,-13230211,-8020715,25932563,1763552,-5606110,-5505881,-20017847,2357889 }, + { 32264008,-15407652,-5387735,-1160093,-2091322,-3946900,23104804,-12869908,5727338,189038 }, + { 14609123,-8954470,-6000566,-16622781,-14577387,-7743898,-26745169,10942115,-25888931,-14884697 }, + }, + { + { 20513500,5557931,-15604613,7829531,26413943,-2019404,-21378968,7471781,13913677,-5137875 }, + { -25574376,11967826,29233242,12948236,-6754465,4713227,-8940970,14059180,12878652,8511905 }, + { -25656801,3393631,-2955415,-7075526,-2250709,9366908,-30223418,6812974,5568676,-3127656 }, + }, + { + { 11630004,12144454,2116339,13606037,27378885,15676917,-17408753,-13504373,-14395196,8070818 }, + { 27117696,-10007378,-31282771,-5570088,1127282,12772488,-29845906,10483306,-11552749,-1028714 }, + { 10637467,-5688064,5674781,1072708,-26343588,-6982302,-1683975,9177853,-27493162,15431203 }, + }, + { + { 20525145,10892566,-12742472,12779443,-29493034,16150075,-28240519,14943142,-15056790,-7935931 }, + { -30024462,5626926,-551567,-9981087,753598,11981191,25244767,-3239766,-3356550,9594024 }, + { -23752644,2636870,-5163910,-10103818,585134,7877383,11345683,-6492290,13352335,-10977084 }, + }, + { + { -1931799,-5407458,3304649,-12884869,17015806,-4877091,-29783850,-7752482,-13215537,-319204 }, + { 20239939,6607058,6203985,3483793,-18386976,-779229,-20723742,15077870,-22750759,14523817 }, + { 27406042,-6041657,27423596,-4497394,4996214,10002360,-28842031,-4545494,-30172742,-4805667 }, + }, +}, +{ + { + { 11374242,12660715,17861383,-12540833,10935568,1099227,-13886076,-9091740,-27727044,11358504 }, + { -12730809,10311867,1510375,10778093,-2119455,-9145702,32676003,11149336,-26123651,4985768 }, + { -19096303,341147,-6197485,-239033,15756973,-8796662,-983043,13794114,-19414307,-15621255 }, + }, + { + { 6490081,11940286,25495923,-7726360,8668373,-8751316,3367603,6970005,-1691065,-9004790 }, + { 1656497,13457317,15370807,6364910,13605745,8362338,-19174622,-5475723,-16796596,-5031438 }, + { -22273315,-13524424,-64685,-4334223,-18605636,-10921968,-20571065,-7007978,-99853,-10237333 }, + }, + { + { 17747465,10039260,19368299,-4050591,-20630635,-16041286,31992683,-15857976,-29260363,-5511971 }, + { 31932027,-4986141,-19612382,16366580,22023614,88450,11371999,-3744247,4882242,-10626905 }, + { 29796507,37186,19818052,10115756,-11829032,3352736,18551198,3272828,-5190932,-4162409 }, + }, + { + { 12501286,4044383,-8612957,-13392385,-32430052,5136599,-19230378,-3529697,330070,-3659409 }, + { 6384877,2899513,17807477,7663917,-2358888,12363165,25366522,-8573892,-271295,12071499 }, + { -8365515,-4042521,25133448,-4517355,-6211027,2265927,-32769618,1936675,-5159697,3829363 }, + }, + { + { 28425966,-5835433,-577090,-4697198,-14217555,6870930,7921550,-6567787,26333140,14267664 }, + { -11067219,11871231,27385719,-10559544,-4585914,-11189312,10004786,-8709488,-21761224,8930324 }, + { -21197785,-16396035,25654216,-1725397,12282012,11008919,1541940,4757911,-26491501,-16408940 }, + }, + { + { 13537262,-7759490,-20604840,10961927,-5922820,-13218065,-13156584,6217254,-15943699,13814990 }, + { -17422573,15157790,18705543,29619,24409717,-260476,27361681,9257833,-1956526,-1776914 }, + { -25045300,-10191966,15366585,15166509,-13105086,8423556,-29171540,12361135,-18685978,4578290 }, + }, + { + { 24579768,3711570,1342322,-11180126,-27005135,14124956,-22544529,14074919,21964432,8235257 }, + { -6528613,-2411497,9442966,-5925588,12025640,-1487420,-2981514,-1669206,13006806,2355433 }, + { -16304899,-13605259,-6632427,-5142349,16974359,-10911083,27202044,1719366,1141648,-12796236 }, + }, + { + { -12863944,-13219986,-8318266,-11018091,-6810145,-4843894,13475066,-3133972,32674895,13715045 }, + { 11423335,-5468059,32344216,8962751,24989809,9241752,-13265253,16086212,-28740881,-15642093 }, + { -1409668,12530728,-6368726,10847387,19531186,-14132160,-11709148,7791794,-27245943,4383347 }, + }, +}, +{ + { + { -28970898,5271447,-1266009,-9736989,-12455236,16732599,-4862407,-4906449,27193557,6245191 }, + { -15193956,5362278,-1783893,2695834,4960227,12840725,23061898,3260492,22510453,8577507 }, + { -12632451,11257346,-32692994,13548177,-721004,10879011,31168030,13952092,-29571492,-3635906 }, + }, + { + { 3877321,-9572739,32416692,5405324,-11004407,-13656635,3759769,11935320,5611860,8164018 }, + { -16275802,14667797,15906460,12155291,-22111149,-9039718,32003002,-8832289,5773085,-8422109 }, + { -23788118,-8254300,1950875,8937633,18686727,16459170,-905725,12376320,31632953,190926 }, + }, + { + { -24593607,-16138885,-8423991,13378746,14162407,6901328,-8288749,4508564,-25341555,-3627528 }, + { 8884438,-5884009,6023974,10104341,-6881569,-4941533,18722941,-14786005,-1672488,827625 }, + { -32720583,-16289296,-32503547,7101210,13354605,2659080,-1800575,-14108036,-24878478,1541286 }, + }, + { + { 2901347,-1117687,3880376,-10059388,-17620940,-3612781,-21802117,-3567481,20456845,-1885033 }, + { 27019610,12299467,-13658288,-1603234,-12861660,-4861471,-19540150,-5016058,29439641,15138866 }, + { 21536104,-6626420,-32447818,-10690208,-22408077,5175814,-5420040,-16361163,7779328,109896 }, + }, + { + { 30279744,14648750,-8044871,6425558,13639621,-743509,28698390,12180118,23177719,-554075 }, + { 26572847,3405927,-31701700,12890905,-19265668,5335866,-6493768,2378492,4439158,-13279347 }, + { -22716706,3489070,-9225266,-332753,18875722,-1140095,14819434,-12731527,-17717757,-5461437 }, + }, + { + { -5056483,16566551,15953661,3767752,-10436499,15627060,-820954,2177225,8550082,-15114165 }, + { -18473302,16596775,-381660,15663611,22860960,15585581,-27844109,-3582739,-23260460,-8428588 }, + { -32480551,15707275,-8205912,-5652081,29464558,2713815,-22725137,15860482,-21902570,1494193 }, + }, + { + { -19562091,-14087393,-25583872,-9299552,13127842,759709,21923482,16529112,8742704,12967017 }, + { -28464899,1553205,32536856,-10473729,-24691605,-406174,-8914625,-2933896,-29903758,15553883 }, + { 21877909,3230008,9881174,10539357,-4797115,2841332,11543572,14513274,19375923,-12647961 }, + }, + { + { 8832269,-14495485,13253511,5137575,5037871,4078777,24880818,-6222716,2862653,9455043 }, + { 29306751,5123106,20245049,-14149889,9592566,8447059,-2077124,-2990080,15511449,4789663 }, + { -20679756,7004547,8824831,-9434977,-4045704,-3750736,-5754762,108893,23513200,16652362 }, + }, +}, +{ + { + { -33256173,4144782,-4476029,-6579123,10770039,-7155542,-6650416,-12936300,-18319198,10212860 }, + { 2756081,8598110,7383731,-6859892,22312759,-1105012,21179801,2600940,-9988298,-12506466 }, + { -24645692,13317462,-30449259,-15653928,21365574,-10869657,11344424,864440,-2499677,-16710063 }, + }, + { + { -26432803,6148329,-17184412,-14474154,18782929,-275997,-22561534,211300,2719757,4940997 }, + { -1323882,3911313,-6948744,14759765,-30027150,7851207,21690126,8518463,26699843,5276295 }, + { -13149873,-6429067,9396249,365013,24703301,-10488939,1321586,149635,-15452774,7159369 }, + }, + { + { 9987780,-3404759,17507962,9505530,9731535,-2165514,22356009,8312176,22477218,-8403385 }, + { 18155857,-16504990,19744716,9006923,15154154,-10538976,24256460,-4864995,-22548173,9334109 }, + { 2986088,-4911893,10776628,-3473844,10620590,-7083203,-21413845,14253545,-22587149,536906 }, + }, + { + { 4377756,8115836,24567078,15495314,11625074,13064599,7390551,10589625,10838060,-15420424 }, + { -19342404,867880,9277171,-3218459,-14431572,-1986443,19295826,-15796950,6378260,699185 }, + { 7895026,4057113,-7081772,-13077756,-17886831,-323126,-716039,15693155,-5045064,-13373962 }, + }, + { + { -7737563,-5869402,-14566319,-7406919,11385654,13201616,31730678,-10962840,-3918636,-9669325 }, + { 10188286,-15770834,-7336361,13427543,22223443,14896287,30743455,7116568,-21786507,5427593 }, + { 696102,13206899,27047647,-10632082,15285305,-9853179,10798490,-4578720,19236243,12477404 }, + }, + { + { -11229439,11243796,-17054270,-8040865,-788228,-8167967,-3897669,11180504,-23169516,7733644 }, + { 17800790,-14036179,-27000429,-11766671,23887827,3149671,23466177,-10538171,10322027,15313801 }, + { 26246234,11968874,32263343,-5468728,6830755,-13323031,-15794704,-101982,-24449242,10890804 }, + }, + { + { -31365647,10271363,-12660625,-6267268,16690207,-13062544,-14982212,16484931,25180797,-5334884 }, + { -586574,10376444,-32586414,-11286356,19801893,10997610,2276632,9482883,316878,13820577 }, + { -9882808,-4510367,-2115506,16457136,-11100081,11674996,30756178,-7515054,30696930,-3712849 }, + }, + { + { 32988917,-9603412,12499366,7910787,-10617257,-11931514,-7342816,-9985397,-32349517,7392473 }, + { -8855661,15927861,9866406,-3649411,-2396914,-16655781,-30409476,-9134995,25112947,-2926644 }, + { -2504044,-436966,25621774,-5678772,15085042,-5479877,-24884878,-13526194,5537438,-13914319 }, + }, +}, +{ + { + { -11225584,2320285,-9584280,10149187,-33444663,5808648,-14876251,-1729667,31234590,6090599 }, + { -9633316,116426,26083934,2897444,-6364437,-2688086,609721,15878753,-6970405,-9034768 }, + { -27757857,247744,-15194774,-9002551,23288161,-10011936,-23869595,6503646,20650474,1804084 }, + }, + { + { -27589786,15456424,8972517,8469608,15640622,4439847,3121995,-10329713,27842616,-202328 }, + { -15306973,2839644,22530074,10026331,4602058,5048462,28248656,5031932,-11375082,12714369 }, + { 20807691,-7270825,29286141,11421711,-27876523,-13868230,-21227475,1035546,-19733229,12796920 }, + }, + { + { 12076899,-14301286,-8785001,-11848922,-25012791,16400684,-17591495,-12899438,3480665,-15182815 }, + { -32361549,5457597,28548107,7833186,7303070,-11953545,-24363064,-15921875,-33374054,2771025 }, + { -21389266,421932,26597266,6860826,22486084,-6737172,-17137485,-4210226,-24552282,15673397 }, + }, + { + { -20184622,2338216,19788685,-9620956,-4001265,-8740893,-20271184,4733254,3727144,-12934448 }, + { 6120119,814863,-11794402,-622716,6812205,-15747771,2019594,7975683,31123697,-10958981 }, + { 30069250,-11435332,30434654,2958439,18399564,-976289,12296869,9204260,-16432438,9648165 }, + }, + { + { 32705432,-1550977,30705658,7451065,-11805606,9631813,3305266,5248604,-26008332,-11377501 }, + { 17219865,2375039,-31570947,-5575615,-19459679,9219903,294711,15298639,2662509,-16297073 }, + { -1172927,-7558695,-4366770,-4287744,-21346413,-8434326,32087529,-1222777,32247248,-14389861 }, + }, + { + { 14312628,1221556,17395390,-8700143,-4945741,-8684635,-28197744,-9637817,-16027623,-13378845 }, + { -1428825,-9678990,-9235681,6549687,-7383069,-468664,23046502,9803137,17597934,2346211 }, + { 18510800,15337574,26171504,981392,-22241552,7827556,-23491134,-11323352,3059833,-11782870 }, + }, + { + { 10141598,6082907,17829293,-1947643,9830092,13613136,-25556636,-5544586,-33502212,3592096 }, + { 33114168,-15889352,-26525686,-13343397,33076705,8716171,1151462,1521897,-982665,-6837803 }, + { -32939165,-4255815,23947181,-324178,-33072974,-12305637,-16637686,3891704,26353178,693168 }, + }, + { + { 30374239,1595580,-16884039,13186931,4600344,406904,9585294,-400668,31375464,14369965 }, + { -14370654,-7772529,1510301,6434173,-18784789,-6262728,32732230,-13108839,17901441,16011505 }, + { 18171223,-11934626,-12500402,15197122,-11038147,-15230035,-19172240,-16046376,8764035,12309598 }, + }, +}, +{ + { + { 5975908,-5243188,-19459362,-9681747,-11541277,14015782,-23665757,1228319,17544096,-10593782 }, + { 5811932,-1715293,3442887,-2269310,-18367348,-8359541,-18044043,-15410127,-5565381,12348900 }, + { -31399660,11407555,25755363,6891399,-3256938,14872274,-24849353,8141295,-10632534,-585479 }, + }, + { + { -12675304,694026,-5076145,13300344,14015258,-14451394,-9698672,-11329050,30944593,1130208 }, + { 8247766,-6710942,-26562381,-7709309,-14401939,-14648910,4652152,2488540,23550156,-271232 }, + { 17294316,-3788438,7026748,15626851,22990044,113481,2267737,-5908146,-408818,-137719 }, + }, + { + { 16091085,-16253926,18599252,7340678,2137637,-1221657,-3364161,14550936,3260525,-7166271 }, + { -4910104,-13332887,18550887,10864893,-16459325,-7291596,-23028869,-13204905,-12748722,2701326 }, + { -8574695,16099415,4629974,-16340524,-20786213,-6005432,-10018363,9276971,11329923,1862132 }, + }, + { + { 14763076,-15903608,-30918270,3689867,3511892,10313526,-21951088,12219231,-9037963,-940300 }, + { 8894987,-3446094,6150753,3013931,301220,15693451,-31981216,-2909717,-15438168,11595570 }, + { 15214962,3537601,-26238722,-14058872,4418657,-15230761,13947276,10730794,-13489462,-4363670 }, + }, + { + { -2538306,7682793,32759013,263109,-29984731,-7955452,-22332124,-10188635,977108,699994 }, + { -12466472,4195084,-9211532,550904,-15565337,12917920,19118110,-439841,-30534533,-14337913 }, + { 31788461,-14507657,4799989,7372237,8808585,-14747943,9408237,-10051775,12493932,-5409317 }, + }, + { + { -25680606,5260744,-19235809,-6284470,-3695942,16566087,27218280,2607121,29375955,6024730 }, + { 842132,-2794693,-4763381,-8722815,26332018,-12405641,11831880,6985184,-9940361,2854096 }, + { -4847262,-7969331,2516242,-5847713,9695691,-7221186,16512645,960770,12121869,16648078 }, + }, + { + { -15218652,14667096,-13336229,2013717,30598287,-464137,-31504922,-7882064,20237806,2838411 }, + { -19288047,4453152,15298546,-16178388,22115043,-15972604,12544294,-13470457,1068881,-12499905 }, + { -9558883,-16518835,33238498,13506958,30505848,-1114596,-8486907,-2630053,12521378,4845654 }, + }, + { + { -28198521,10744108,-2958380,10199664,7759311,-13088600,3409348,-873400,-6482306,-12885870 }, + { -23561822,6230156,-20382013,10655314,-24040585,-11621172,10477734,-1240216,-3113227,13974498 }, + { 12966261,15550616,-32038948,-1615346,21025980,-629444,5642325,7188737,18895762,12629579 }, + }, +}, +{ + { + { 14741879,-14946887,22177208,-11721237,1279741,8058600,11758140,789443,32195181,3895677 }, + { 10758205,15755439,-4509950,9243698,-4879422,6879879,-2204575,-3566119,-8982069,4429647 }, + { -2453894,15725973,-20436342,-10410672,-5803908,-11040220,-7135870,-11642895,18047436,-15281743 }, + }, + { + { -25173001,-11307165,29759956,11776784,-22262383,-15820455,10993114,-12850837,-17620701,-9408468 }, + { 21987233,700364,-24505048,14972008,-7774265,-5718395,32155026,2581431,-29958985,8773375 }, + { -25568350,454463,-13211935,16126715,25240068,8594567,20656846,12017935,-7874389,-13920155 }, + }, + { + { 6028182,6263078,-31011806,-11301710,-818919,2461772,-31841174,-5468042,-1721788,-2776725 }, + { -12278994,16624277,987579,-5922598,32908203,1248608,7719845,-4166698,28408820,6816612 }, + { -10358094,-8237829,19549651,-12169222,22082623,16147817,20613181,13982702,-10339570,5067943 }, + }, + { + { -30505967,-3821767,12074681,13582412,-19877972,2443951,-19719286,12746132,5331210,-10105944 }, + { 30528811,3601899,-1957090,4619785,-27361822,-15436388,24180793,-12570394,27679908,-1648928 }, + { 9402404,-13957065,32834043,10838634,-26580150,-13237195,26653274,-8685565,22611444,-12715406 }, + }, + { + { 22190590,1118029,22736441,15130463,-30460692,-5991321,19189625,-4648942,4854859,6622139 }, + { -8310738,-2953450,-8262579,-3388049,-10401731,-271929,13424426,-3567227,26404409,13001963 }, + { -31241838,-15415700,-2994250,8939346,11562230,-12840670,-26064365,-11621720,-15405155,11020693 }, + }, + { + { 1866042,-7949489,-7898649,-10301010,12483315,13477547,3175636,-12424163,28761762,1406734 }, + { -448555,-1777666,13018551,3194501,-9580420,-11161737,24760585,-4347088,25577411,-13378680 }, + { -24290378,4759345,-690653,-1852816,2066747,10693769,-29595790,9884936,-9368926,4745410 }, + }, + { + { -9141284,6049714,-19531061,-4341411,-31260798,9944276,-15462008,-11311852,10931924,-11931931 }, + { -16561513,14112680,-8012645,4817318,-8040464,-11414606,-22853429,10856641,-20470770,13434654 }, + { 22759489,-10073434,-16766264,-1871422,13637442,-10168091,1765144,-12654326,28445307,-5364710 }, + }, + { + { 29875063,12493613,2795536,-3786330,1710620,15181182,-10195717,-8788675,9074234,1167180 }, + { -26205683,11014233,-9842651,-2635485,-26908120,7532294,-18716888,-9535498,3843903,9367684 }, + { -10969595,-6403711,9591134,9582310,11349256,108879,16235123,8601684,-139197,4242895 }, + }, +}, +{ + { + { 22092954,-13191123,-2042793,-11968512,32186753,-11517388,-6574341,2470660,-27417366,16625501 }, + { -11057722,3042016,13770083,-9257922,584236,-544855,-7770857,2602725,-27351616,14247413 }, + { 6314175,-10264892,-32772502,15957557,-10157730,168750,-8618807,14290061,27108877,-1180880 }, + }, + { + { -8586597,-7170966,13241782,10960156,-32991015,-13794596,33547976,-11058889,-27148451,981874 }, + { 22833440,9293594,-32649448,-13618667,-9136966,14756819,-22928859,-13970780,-10479804,-16197962 }, + { -7768587,3326786,-28111797,10783824,19178761,14905060,22680049,13906969,-15933690,3797899 }, + }, + { + { 21721356,-4212746,-12206123,9310182,-3882239,-13653110,23740224,-2709232,20491983,-8042152 }, + { 9209270,-15135055,-13256557,-6167798,-731016,15289673,25947805,15286587,30997318,-6703063 }, + { 7392032,16618386,23946583,-8039892,-13265164,-1533858,-14197445,-2321576,17649998,-250080 }, + }, + { + { -9301088,-14193827,30609526,-3049543,-25175069,-1283752,-15241566,-9525724,-2233253,7662146 }, + { -17558673,1763594,-33114336,15908610,-30040870,-12174295,7335080,-8472199,-3174674,3440183 }, + { -19889700,-5977008,-24111293,-9688870,10799743,-16571957,40450,-4431835,4862400,1133 }, + }, + { + { -32856209,-7873957,-5422389,14860950,-16319031,7956142,7258061,311861,-30594991,-7379421 }, + { -3773428,-1565936,28985340,7499440,24445838,9325937,29727763,16527196,18278453,15405622 }, + { -4381906,8508652,-19898366,-3674424,-5984453,15149970,-13313598,843523,-21875062,13626197 }, + }, + { + { 2281448,-13487055,-10915418,-2609910,1879358,16164207,-10783882,3953792,13340839,15928663 }, + { 31727126,-7179855,-18437503,-8283652,2875793,-16390330,-25269894,-7014826,-23452306,5964753 }, + { 4100420,-5959452,-17179337,6017714,-18705837,12227141,-26684835,11344144,2538215,-7570755 }, + }, + { + { -9433605,6123113,11159803,-2156608,30016280,14966241,-20474983,1485421,-629256,-15958862 }, + { -26804558,4260919,11851389,9658551,-32017107,16367492,-20205425,-13191288,11659922,-11115118 }, + { 26180396,10015009,-30844224,-8581293,5418197,9480663,2231568,-10170080,33100372,-1306171 }, + }, + { + { 15121113,-5201871,-10389905,15427821,-27509937,-15992507,21670947,4486675,-5931810,-14466380 }, + { 16166486,-9483733,-11104130,6023908,-31926798,-1364923,2340060,-16254968,-10735770,-10039824 }, + { 28042865,-3557089,-12126526,12259706,-3717498,-6945899,6766453,-8689599,18036436,5803270 }, + }, +}, +{ + { + { -817581,6763912,11803561,1585585,10958447,-2671165,23855391,4598332,-6159431,-14117438 }, + { -31031306,-14256194,17332029,-2383520,31312682,-5967183,696309,50292,-20095739,11763584 }, + { -594563,-2514283,-32234153,12643980,12650761,14811489,665117,-12613632,-19773211,-10713562 }, + }, + { + { 30464590,-11262872,-4127476,-12734478,19835327,-7105613,-24396175,2075773,-17020157,992471 }, + { 18357185,-6994433,7766382,16342475,-29324918,411174,14578841,8080033,-11574335,-10601610 }, + { 19598397,10334610,12555054,2555664,18821899,-10339780,21873263,16014234,26224780,16452269 }, + }, + { + { -30223925,5145196,5944548,16385966,3976735,2009897,-11377804,-7618186,-20533829,3698650 }, + { 14187449,3448569,-10636236,-10810935,-22663880,-3433596,7268410,-10890444,27394301,12015369 }, + { 19695761,16087646,28032085,12999827,6817792,11427614,20244189,-1312777,-13259127,-3402461 }, + }, + { + { 30860103,12735208,-1888245,-4699734,-16974906,2256940,-8166013,12298312,-8550524,-10393462 }, + { -5719826,-11245325,-1910649,15569035,26642876,-7587760,-5789354,-15118654,-4976164,12651793 }, + { -2848395,9953421,11531313,-5282879,26895123,-12697089,-13118820,-16517902,9768698,-2533218 }, + }, + { + { -24719459,1894651,-287698,-4704085,15348719,-8156530,32767513,12765450,4940095,10678226 }, + { 18860224,15980149,-18987240,-1562570,-26233012,-11071856,-7843882,13944024,-24372348,16582019 }, + { -15504260,4970268,-29893044,4175593,-20993212,-2199756,-11704054,15444560,-11003761,7989037 }, + }, + { + { 31490452,5568061,-2412803,2182383,-32336847,4531686,-32078269,6200206,-19686113,-14800171 }, + { -17308668,-15879940,-31522777,-2831,-32887382,16375549,8680158,-16371713,28550068,-6857132 }, + { -28126887,-5688091,16837845,-1820458,-6850681,12700016,-30039981,4364038,1155602,5988841 }, + }, + { + { 21890435,-13272907,-12624011,12154349,-7831873,15300496,23148983,-4470481,24618407,8283181 }, + { -33136107,-10512751,9975416,6841041,-31559793,16356536,3070187,-7025928,1466169,10740210 }, + { -1509399,-15488185,-13503385,-10655916,32799044,909394,-13938903,-5779719,-32164649,-15327040 }, + }, + { + { 3960823,-14267803,-28026090,-15918051,-19404858,13146868,15567327,951507,-3260321,-573935 }, + { 24740841,5052253,-30094131,8961361,25877428,6165135,-24368180,14397372,-7380369,-6144105 }, + { -28888365,3510803,-28103278,-1158478,-11238128,-10631454,-15441463,-14453128,-1625486,-6494814 }, + }, +}, +{ + { + { 793299,-9230478,8836302,-6235707,-27360908,-2369593,33152843,-4885251,-9906200,-621852 }, + { 5666233,525582,20782575,-8038419,-24538499,14657740,16099374,1468826,-6171428,-15186581 }, + { -4859255,-3779343,-2917758,-6748019,7778750,11688288,-30404353,-9871238,-1558923,-9863646 }, + }, + { + { 10896332,-7719704,824275,472601,-19460308,3009587,25248958,14783338,-30581476,-15757844 }, + { 10566929,12612572,-31944212,11118703,-12633376,12362879,21752402,8822496,24003793,14264025 }, + { 27713862,-7355973,-11008240,9227530,27050101,2504721,23886875,-13117525,13958495,-5732453 }, + }, + { + { -23481610,4867226,-27247128,3900521,29838369,-8212291,-31889399,-10041781,7340521,-15410068 }, + { 4646514,-8011124,-22766023,-11532654,23184553,8566613,31366726,-1381061,-15066784,-10375192 }, + { -17270517,12723032,-16993061,14878794,21619651,-6197576,27584817,3093888,-8843694,3849921 }, + }, + { + { -9064912,2103172,25561640,-15125738,-5239824,9582958,32477045,-9017955,5002294,-15550259 }, + { -12057553,-11177906,21115585,-13365155,8808712,-12030708,16489530,13378448,-25845716,12741426 }, + { -5946367,10645103,-30911586,15390284,-3286982,-7118677,24306472,15852464,28834118,-7646072 }, + }, + { + { -17335748,-9107057,-24531279,9434953,-8472084,-583362,-13090771,455841,20461858,5491305 }, + { 13669248,-16095482,-12481974,-10203039,-14569770,-11893198,-24995986,11293807,-28588204,-9421832 }, + { 28497928,6272777,-33022994,14470570,8906179,-1225630,18504674,-14165166,29867745,-8795943 }, + }, + { + { -16207023,13517196,-27799630,-13697798,24009064,-6373891,-6367600,-13175392,22853429,-4012011 }, + { 24191378,16712145,-13931797,15217831,14542237,1646131,18603514,-11037887,12876623,-2112447 }, + { 17902668,4518229,-411702,-2829247,26878217,5258055,-12860753,608397,16031844,3723494 }, + }, + { + { -28632773,12763728,-20446446,7577504,33001348,-13017745,17558842,-7872890,23896954,-4314245 }, + { -20005381,-12011952,31520464,605201,2543521,5991821,-2945064,7229064,-9919646,-8826859 }, + { 28816045,298879,-28165016,-15920938,19000928,-1665890,-12680833,-2949325,-18051778,-2082915 }, + }, + { + { 16000882,-344896,3493092,-11447198,-29504595,-13159789,12577740,16041268,-19715240,7847707 }, + { 10151868,10572098,27312476,7922682,14825339,4723128,-32855931,-6519018,-10020567,3852848 }, + { -11430470,15697596,-21121557,-4420647,5386314,15063598,16514493,-15932110,29330899,-15076224 }, + }, +}, +{ + { + { -25499735,-4378794,-15222908,-6901211,16615731,2051784,3303702,15490,-27548796,12314391 }, + { 15683520,-6003043,18109120,-9980648,15337968,-5997823,-16717435,15921866,16103996,-3731215 }, + { -23169824,-10781249,13588192,-1628807,-3798557,-1074929,-19273607,5402699,-29815713,-9841101 }, + }, + { + { 23190676,2384583,-32714340,3462154,-29903655,-1529132,-11266856,8911517,-25205859,2739713 }, + { 21374101,-3554250,-33524649,9874411,15377179,11831242,-33529904,6134907,4931255,11987849 }, + { -7732,-2978858,-16223486,7277597,105524,-322051,-31480539,13861388,-30076310,10117930 }, + }, + { + { -29501170,-10744872,-26163768,13051539,-25625564,5089643,-6325503,6704079,12890019,15728940 }, + { -21972360,-11771379,-951059,-4418840,14704840,2695116,903376,-10428139,12885167,8311031 }, + { -17516482,5352194,10384213,-13811658,7506451,13453191,26423267,4384730,1888765,-5435404 }, + }, + { + { -25817338,-3107312,-13494599,-3182506,30896459,-13921729,-32251644,-12707869,-19464434,-3340243 }, + { -23607977,-2665774,-526091,4651136,5765089,4618330,6092245,14845197,17151279,-9854116 }, + { -24830458,-12733720,-15165978,10367250,-29530908,-265356,22825805,-7087279,-16866484,16176525 }, + }, + { + { -23583256,6564961,20063689,3798228,-4740178,7359225,2006182,-10363426,-28746253,-10197509 }, + { -10626600,-4486402,-13320562,-5125317,3432136,-6393229,23632037,-1940610,32808310,1099883 }, + { 15030977,5768825,-27451236,-2887299,-6427378,-15361371,-15277896,-6809350,2051441,-15225865 }, + }, + { + { -3362323,-7239372,7517890,9824992,23555850,295369,5148398,-14154188,-22686354,16633660 }, + { 4577086,-16752288,13249841,-15304328,19958763,-14537274,18559670,-10759549,8402478,-9864273 }, + { -28406330,-1051581,-26790155,-907698,-17212414,-11030789,9453451,-14980072,17983010,9967138 }, + }, + { + { -25762494,6524722,26585488,9969270,24709298,1220360,-1677990,7806337,17507396,3651560 }, + { -10420457,-4118111,14584639,15971087,-15768321,8861010,26556809,-5574557,-18553322,-11357135 }, + { 2839101,14284142,4029895,3472686,14402957,12689363,-26642121,8459447,-5605463,-7621941 }, + }, + { + { -4839289,-3535444,9744961,2871048,25113978,3187018,-25110813,-849066,17258084,-7977739 }, + { 18164541,-10595176,-17154882,-1542417,19237078,-9745295,23357533,-15217008,26908270,12150756 }, + { -30264870,-7647865,5112249,-7036672,-1499807,-6974257,43168,-5537701,-32302074,16215819 }, + }, +}, +{ + { + { -6898905,9824394,-12304779,-4401089,-31397141,-6276835,32574489,12532905,-7503072,-8675347 }, + { -27343522,-16515468,-27151524,-10722951,946346,16291093,254968,7168080,21676107,-1943028 }, + { 21260961,-8424752,-16831886,-11920822,-23677961,3968121,-3651949,-6215466,-3556191,-7913075 }, + }, + { + { 16544754,13250366,-16804428,15546242,-4583003,12757258,-2462308,-8680336,-18907032,-9662799 }, + { -2415239,-15577728,18312303,4964443,-15272530,-12653564,26820651,16690659,25459437,-4564609 }, + { -25144690,11425020,28423002,-11020557,-6144921,-15826224,9142795,-2391602,-6432418,-1644817 }, + }, + { + { -23104652,6253476,16964147,-3768872,-25113972,-12296437,-27457225,-16344658,6335692,7249989 }, + { -30333227,13979675,7503222,-12368314,-11956721,-4621693,-30272269,2682242,25993170,-12478523 }, + { 4364628,5930691,32304656,-10044554,-8054781,15091131,22857016,-10598955,31820368,15075278 }, + }, + { + { 31879134,-8918693,17258761,90626,-8041836,-4917709,24162788,-9650886,-17970238,12833045 }, + { 19073683,14851414,-24403169,-11860168,7625278,11091125,-19619190,2074449,-9413939,14905377 }, + { 24483667,-11935567,-2518866,-11547418,-1553130,15355506,-25282080,9253129,27628530,-7555480 }, + }, + { + { 17597607,8340603,19355617,552187,26198470,-3176583,4593324,-9157582,-14110875,15297016 }, + { 510886,14337390,-31785257,16638632,6328095,2713355,-20217417,-11864220,8683221,2921426 }, + { 18606791,11874196,27155355,-5281482,-24031742,6265446,-25178240,-1278924,4674690,13890525 }, + }, + { + { 13609624,13069022,-27372361,-13055908,24360586,9592974,14977157,9835105,4389687,288396 }, + { 9922506,-519394,13613107,5883594,-18758345,-434263,-12304062,8317628,23388070,16052080 }, + { 12720016,11937594,-31970060,-5028689,26900120,8561328,-20155687,-11632979,-14754271,-10812892 }, + }, + { + { 15961858,14150409,26716931,-665832,-22794328,13603569,11829573,7467844,-28822128,929275 }, + { 11038231,-11582396,-27310482,-7316562,-10498527,-16307831,-23479533,-9371869,-21393143,2465074 }, + { 20017163,-4323226,27915242,1529148,12396362,15675764,13817261,-9658066,2463391,-4622140 }, + }, + { + { -16358878,-12663911,-12065183,4996454,-1256422,1073572,9583558,12851107,4003896,12673717 }, + { -1731589,-15155870,-3262930,16143082,19294135,13385325,14741514,-9103726,7903886,2348101 }, + { 24536016,-16515207,12715592,-3862155,1511293,10047386,-3842346,-7129159,-28377538,10048127 }, + }, +}, +{ + { + { -12622226,-6204820,30718825,2591312,-10617028,12192840,18873298,-7297090,-32297756,15221632 }, + { -26478122,-11103864,11546244,-1852483,9180880,7656409,-21343950,2095755,29769758,6593415 }, + { -31994208,-2907461,4176912,3264766,12538965,-868111,26312345,-6118678,30958054,8292160 }, + }, + { + { 31429822,-13959116,29173532,15632448,12174511,-2760094,32808831,3977186,26143136,-3148876 }, + { 22648901,1402143,-22799984,13746059,7936347,365344,-8668633,-1674433,-3758243,-2304625 }, + { -15491917,8012313,-2514730,-12702462,-23965846,-10254029,-1612713,-1535569,-16664475,8194478 }, + }, + { + { 27338066,-7507420,-7414224,10140405,-19026427,-6589889,27277191,8855376,28572286,3005164 }, + { 26287124,4821776,25476601,-4145903,-3764513,-15788984,-18008582,1182479,-26094821,-13079595 }, + { -7171154,3178080,23970071,6201893,-17195577,-4489192,-21876275,-13982627,32208683,-1198248 }, + }, + { + { -16657702,2817643,-10286362,14811298,6024667,13349505,-27315504,-10497842,-27672585,-11539858 }, + { 15941029,-9405932,-21367050,8062055,31876073,-238629,-15278393,-1444429,15397331,-4130193 }, + { 8934485,-13485467,-23286397,-13423241,-32446090,14047986,31170398,-1441021,-27505566,15087184 }, + }, + { + { -18357243,-2156491,24524913,-16677868,15520427,-6360776,-15502406,11461896,16788528,-5868942 }, + { -1947386,16013773,21750665,3714552,-17401782,-16055433,-3770287,-10323320,31322514,-11615635 }, + { 21426655,-5650218,-13648287,-5347537,-28812189,-4920970,-18275391,-14621414,13040862,-12112948 }, + }, + { + { 11293895,12478086,-27136401,15083750,-29307421,14748872,14555558,-13417103,1613711,4896935 }, + { -25894883,15323294,-8489791,-8057900,25967126,-13425460,2825960,-4897045,-23971776,-11267415 }, + { -15924766,-5229880,-17443532,6410664,3622847,10243618,20615400,12405433,-23753030,-8436416 }, + }, + { + { -7091295,12556208,-20191352,9025187,-17072479,4333801,4378436,2432030,23097949,-566018 }, + { 4565804,-16025654,20084412,-7842817,1724999,189254,24767264,10103221,-18512313,2424778 }, + { 366633,-11976806,8173090,-6890119,30788634,5745705,-7168678,1344109,-3642553,12412659 }, + }, + { + { -24001791,7690286,14929416,-168257,-32210835,-13412986,24162697,-15326504,-3141501,11179385 }, + { 18289522,-14724954,8056945,16430056,-21729724,7842514,-6001441,-1486897,-18684645,-11443503 }, + { 476239,6601091,-6152790,-9723375,17503545,-4863900,27672959,13403813,11052904,5219329 }, + }, +}, +{ + { + { 20678546,-8375738,-32671898,8849123,-5009758,14574752,31186971,-3973730,9014762,-8579056 }, + { -13644050,-10350239,-15962508,5075808,-1514661,-11534600,-33102500,9160280,8473550,-3256838 }, + { 24900749,14435722,17209120,-15292541,-22592275,9878983,-7689309,-16335821,-24568481,11788948 }, + }, + { + { -3118155,-11395194,-13802089,14797441,9652448,-6845904,-20037437,10410733,-24568470,-1458691 }, + { -15659161,16736706,-22467150,10215878,-9097177,7563911,11871841,-12505194,-18513325,8464118 }, + { -23400612,8348507,-14585951,-861714,-3950205,-6373419,14325289,8628612,33313881,-8370517 }, + }, + { + { -20186973,-4967935,22367356,5271547,-1097117,-4788838,-24805667,-10236854,-8940735,-5818269 }, + { -6948785,-1795212,-32625683,-16021179,32635414,-7374245,15989197,-12838188,28358192,-4253904 }, + { -23561781,-2799059,-32351682,-1661963,-9147719,10429267,-16637684,4072016,-5351664,5596589 }, + }, + { + { -28236598,-3390048,12312896,6213178,3117142,16078565,29266239,2557221,1768301,15373193 }, + { -7243358,-3246960,-4593467,-7553353,-127927,-912245,-1090902,-4504991,-24660491,3442910 }, + { -30210571,5124043,14181784,8197961,18964734,-11939093,22597931,7176455,-18585478,13365930 }, + }, + { + { -7877390,-1499958,8324673,4690079,6261860,890446,24538107,-8570186,-9689599,-3031667 }, + { 25008904,-10771599,-4305031,-9638010,16265036,15721635,683793,-11823784,15723479,-15163481 }, + { -9660625,12374379,-27006999,-7026148,-7724114,-12314514,11879682,5400171,519526,-1235876 }, + }, + { + { 22258397,-16332233,-7869817,14613016,-22520255,-2950923,-20353881,7315967,16648397,7605640 }, + { -8081308,-8464597,-8223311,9719710,19259459,-15348212,23994942,-5281555,-9468848,4763278 }, + { -21699244,9220969,-15730624,1084137,-25476107,-2852390,31088447,-7764523,-11356529,728112 }, + }, + { + { 26047220,-11751471,-6900323,-16521798,24092068,9158119,-4273545,-12555558,-29365436,-5498272 }, + { 17510331,-322857,5854289,8403524,17133918,-3112612,-28111007,12327945,10750447,10014012 }, + { -10312768,3936952,9156313,-8897683,16498692,-994647,-27481051,-666732,3424691,7540221 }, + }, + { + { 30322361,-6964110,11361005,-4143317,7433304,4989748,-7071422,-16317219,-9244265,15258046 }, + { 13054562,-2779497,19155474,469045,-12482797,4566042,5631406,2711395,1062915,-5136345 }, + { -19240248,-11254599,-29509029,-7499965,-5835763,13005411,-6066489,12194497,32960380,1459310 }, + }, +}, +{ + { + { 19852034,7027924,23669353,10020366,8586503,-6657907,394197,-6101885,18638003,-11174937 }, + { 31395534,15098109,26581030,8030562,-16527914,-5007134,9012486,-7584354,-6643087,-5442636 }, + { -9192165,-2347377,-1997099,4529534,25766844,607986,-13222,9677543,-32294889,-6456008 }, + }, + { + { -2444496,-149937,29348902,8186665,1873760,12489863,-30934579,-7839692,-7852844,-8138429 }, + { -15236356,-15433509,7766470,746860,26346930,-10221762,-27333451,10754588,-9431476,5203576 }, + { 31834314,14135496,-770007,5159118,20917671,-16768096,-7467973,-7337524,31809243,7347066 }, + }, + { + { -9606723,-11874240,20414459,13033986,13716524,-11691881,19797970,-12211255,15192876,-2087490 }, + { -12663563,-2181719,1168162,-3804809,26747877,-14138091,10609330,12694420,33473243,-13382104 }, + { 33184999,11180355,15832085,-11385430,-1633671,225884,15089336,-11023903,-6135662,14480053 }, + }, + { + { 31308717,-5619998,31030840,-1897099,15674547,-6582883,5496208,13685227,27595050,8737275 }, + { -20318852,-15150239,10933843,-16178022,8335352,-7546022,-31008351,-12610604,26498114,66511 }, + { 22644454,-8761729,-16671776,4884562,-3105614,-13559366,30540766,-4286747,-13327787,-7515095 }, + }, + { + { -28017847,9834845,18617207,-2681312,-3401956,-13307506,8205540,13585437,-17127465,15115439 }, + { 23711543,-672915,31206561,-8362711,6164647,-9709987,-33535882,-1426096,8236921,16492939 }, + { -23910559,-13515526,-26299483,-4503841,25005590,-7687270,19574902,10071562,6708380,-6222424 }, + }, + { + { 2101391,-4930054,19702731,2367575,-15427167,1047675,5301017,9328700,29955601,-11678310 }, + { 3096359,9271816,-21620864,-15521844,-14847996,-7592937,-25892142,-12635595,-9917575,6216608 }, + { -32615849,338663,-25195611,2510422,-29213566,-13820213,24822830,-6146567,-26767480,7525079 }, + }, + { + { -23066649,-13985623,16133487,-7896178,-3389565,778788,-910336,-2782495,-19386633,11994101 }, + { 21691500,-13624626,-641331,-14367021,3285881,-3483596,-25064666,9718258,-7477437,13381418 }, + { 18445390,-4202236,14979846,11622458,-1727110,-3582980,23111648,-6375247,28535282,15779576 }, + }, + { + { 30098053,3089662,-9234387,16662135,-21306940,11308411,-14068454,12021730,9955285,-16303356 }, + { 9734894,-14576830,-7473633,-9138735,2060392,11313496,-18426029,9924399,20194861,13380996 }, + { -26378102,-7965207,-22167821,15789297,-18055342,-6168792,-1984914,15707771,26342023,10146099 }, + }, +}, +{ + { + { -26016874,-219943,21339191,-41388,19745256,-2878700,-29637280,2227040,21612326,-545728 }, + { -13077387,1184228,23562814,-5970442,-20351244,-6348714,25764461,12243797,-20856566,11649658 }, + { -10031494,11262626,27384172,2271902,26947504,-15997771,39944,6114064,33514190,2333242 }, + }, + { + { -21433588,-12421821,8119782,7219913,-21830522,-9016134,-6679750,-12670638,24350578,-13450001 }, + { -4116307,-11271533,-23886186,4843615,-30088339,690623,-31536088,-10406836,8317860,12352766 }, + { 18200138,-14475911,-33087759,-2696619,-23702521,-9102511,-23552096,-2287550,20712163,6719373 }, + }, + { + { 26656208,6075253,-7858556,1886072,-28344043,4262326,11117530,-3763210,26224235,-3297458 }, + { -17168938,-14854097,-3395676,-16369877,-19954045,14050420,21728352,9493610,18620611,-16428628 }, + { -13323321,13325349,11432106,5964811,18609221,6062965,-5269471,-9725556,-30701573,-16479657 }, + }, + { + { -23860538,-11233159,26961357,1640861,-32413112,-16737940,12248509,-5240639,13735342,1934062 }, + { 25089769,6742589,17081145,-13406266,21909293,-16067981,-15136294,-3765346,-21277997,5473616 }, + { 31883677,-7961101,1083432,-11572403,22828471,13290673,-7125085,12469656,29111212,-5451014 }, + }, + { + { 24244947,-15050407,-26262976,2791540,-14997599,16666678,24367466,6388839,-10295587,452383 }, + { -25640782,-3417841,5217916,16224624,19987036,-4082269,-24236251,-5915248,15766062,8407814 }, + { -20406999,13990231,15495425,16395525,5377168,15166495,-8917023,-4388953,-8067909,2276718 }, + }, + { + { 30157918,12924066,-17712050,9245753,19895028,3368142,-23827587,5096219,22740376,-7303417 }, + { 2041139,-14256350,7783687,13876377,-25946985,-13352459,24051124,13742383,-15637599,13295222 }, + { 33338237,-8505733,12532113,7977527,9106186,-1715251,-17720195,-4612972,-4451357,-14669444 }, + }, + { + { -20045281,5454097,-14346548,6447146,28862071,1883651,-2469266,-4141880,7770569,9620597 }, + { 23208068,7979712,33071466,8149229,1758231,-10834995,30945528,-1694323,-33502340,-14767970 }, + { 1439958,-16270480,-1079989,-793782,4625402,10647766,-5043801,1220118,30494170,-11440799 }, + }, + { + { -5037580,-13028295,-2970559,-3061767,15640974,-6701666,-26739026,926050,-1684339,-13333647 }, + { 13908495,-3549272,30919928,-6273825,-21521863,7989039,9021034,9078865,3353509,4033511 }, + { -29663431,-15113610,32259991,-344482,24295849,-12912123,23161163,8839127,27485041,7356032 }, + }, +}, +{ + { + { 9661027,705443,11980065,-5370154,-1628543,14661173,-6346142,2625015,28431036,-16771834 }, + { -23839233,-8311415,-25945511,7480958,-17681669,-8354183,-22545972,14150565,15970762,4099461 }, + { 29262576,16756590,26350592,-8793563,8529671,-11208050,13617293,-9937143,11465739,8317062 }, + }, + { + { -25493081,-6962928,32500200,-9419051,-23038724,-2302222,14898637,3848455,20969334,-5157516 }, + { -20384450,-14347713,-18336405,13884722,-33039454,2842114,-21610826,-3649888,11177095,14989547 }, + { -24496721,-11716016,16959896,2278463,12066309,10137771,13515641,2581286,-28487508,9930240 }, + }, + { + { -17751622,-2097826,16544300,-13009300,-15914807,-14949081,18345767,-13403753,16291481,-5314038 }, + { -33229194,2553288,32678213,9875984,8534129,6889387,-9676774,6957617,4368891,9788741 }, + { 16660756,7281060,-10830758,12911820,20108584,-8101676,-21722536,-8613148,16250552,-11111103 }, + }, + { + { -19765507,2390526,-16551031,14161980,1905286,6414907,4689584,10604807,-30190403,4782747 }, + { -1354539,14736941,-7367442,-13292886,7710542,-14155590,-9981571,4383045,22546403,437323 }, + { 31665577,-12180464,-16186830,1491339,-18368625,3294682,27343084,2786261,-30633590,-14097016 }, + }, + { + { -14467279,-683715,-33374107,7448552,19294360,14334329,-19690631,2355319,-19284671,-6114373 }, + { 15121312,-15796162,6377020,-6031361,-10798111,-12957845,18952177,15496498,-29380133,11754228 }, + { -2637277,-13483075,8488727,-14303896,12728761,-1622493,7141596,11724556,22761615,-10134141 }, + }, + { + { 16918416,11729663,-18083579,3022987,-31015732,-13339659,-28741185,-12227393,32851222,11717399 }, + { 11166634,7338049,-6722523,4531520,-29468672,-7302055,31474879,3483633,-1193175,-4030831 }, + { -185635,9921305,31456609,-13536438,-12013818,13348923,33142652,6546660,-19985279,-3948376 }, + }, + { + { -32460596,11266712,-11197107,-7899103,31703694,3855903,-8537131,-12833048,-30772034,-15486313 }, + { -18006477,12709068,3991746,-6479188,-21491523,-10550425,-31135347,-16049879,10928917,3011958 }, + { -6957757,-15594337,31696059,334240,29576716,14796075,-30831056,-12805180,18008031,10258577 }, + }, + { + { -22448644,15655569,7018479,-4410003,-30314266,-1201591,-1853465,1367120,25127874,6671743 }, + { 29701166,-14373934,-10878120,9279288,-17568,13127210,21382910,11042292,25838796,4642684 }, + { -20430234,14955537,-24126347,8124619,-5369288,-5990470,30468147,-13900640,18423289,4177476 }, + }, +}, +} ; +#endif + + +static void ge_select(ge_precomp *t,int pos,signed char b) +{ +#ifndef CURVED25519_ASM + ge_precomp minust; + unsigned char bnegative = negative(b); + unsigned char babs = b - (((-bnegative) & b) << 1); + + ge_precomp_0(t); + cmov(t,&base[pos][0],babs,1); + cmov(t,&base[pos][1],babs,2); + cmov(t,&base[pos][2],babs,3); + cmov(t,&base[pos][3],babs,4); + cmov(t,&base[pos][4],babs,5); + cmov(t,&base[pos][5],babs,6); + cmov(t,&base[pos][6],babs,7); + cmov(t,&base[pos][7],babs,8); + fe_cswap(t->yminusx, t->yplusx, bnegative); + fe_neg(minust.xy2d,t->xy2d); + fe_cmov(t->xy2d,minust.xy2d,bnegative); +#else + fe_cmov_table((fe*)t, (fe*)base[pos], b); +#endif +} + +/* +h = a * B +where a = a[0]+256*a[1]+...+256^31 a[31] +B is the Ed25519 base point (x,4/5) with x positive. + +Preconditions: + a[31] <= 127 +*/ +void ge_scalarmult_base(ge_p3 *h,const unsigned char *a) +{ + signed char e[64]; + signed char carry; + ge_p1p1 r; +#ifndef CURVED25519_ASM + ge_p2 s; +#endif + ge_precomp t; + int i; + + for (i = 0;i < 32;++i) { + e[2 * i + 0] = (a[i] >> 0) & 15; + e[2 * i + 1] = (a[i] >> 4) & 15; + } + /* each e[i] is between 0 and 15 */ + /* e[63] is between 0 and 7 */ + + carry = 0; + for (i = 0;i < 63;++i) { + e[i] += carry; + carry = e[i] + 8; + carry >>= 4; + e[i] -= carry << 4; + } + e[63] += carry; + /* each e[i] is between -8 and 8 */ + +#ifndef CURVED25519_ASM + ge_select(&t,0,e[1]); + fe_sub(h->X, t.yplusx, t.yminusx); + fe_add(h->Y, t.yplusx, t.yminusx); + fe_0(h->Z); + h->Z[0] = 4; + fe_mul(h->T,h->X,h->Y); + fe_add(h->X, h->X, h->X); + fe_add(h->Y, h->Y, h->Y); + + for (i = 3;i < 64;i += 2) { + ge_select(&t,i / 2,e[i]); + ge_madd(&r,h,&t); ge_p1p1_to_p3(h,&r); + } + + ge_p3_dbl(&r,h); ge_p1p1_to_p2(&s,&r); + ge_p2_dbl(&r,&s); ge_p1p1_to_p2(&s,&r); + ge_p2_dbl(&r,&s); ge_p1p1_to_p2(&s,&r); + ge_p2_dbl(&r,&s); ge_p1p1_to_p3(h,&r); + + for (i = 0;i < 64;i += 2) { + ge_select(&t,i / 2,e[i]); + ge_madd(&r,h,&t); ge_p1p1_to_p3(h,&r); + } +#else + ge_select(&t, 0, e[0]); + fe_sub(h->X, t.yplusx, t.yminusx); + fe_add(h->Y, t.yplusx, t.yminusx); + fe_0(h->Z); + h->Z[0] = 2; + fe_copy(h->T, t.xy2d); + for (i = 1; i < 64; i++) { + ge_select(&t, i, e[i]); + ge_madd(&r,h,&t); ge_p1p1_to_p3(h,&r); + } +#endif +} + + +/* ge double scalar mult */ +static void slide(signed char *r,const unsigned char *a) +{ + int i; + int b; + int k; + + for (i = 0;i < 256;++i) + r[i] = 1 & (a[i >> 3] >> (i & 7)); + + for (i = 0;i < 256;++i) + if (r[i]) { + for (b = 1;b <= 6 && i + b < 256;++b) { + if (r[i + b]) { + if (r[i] + (r[i + b] << b) <= 15) { + r[i] += r[i + b] << b; r[i + b] = 0; + } else if (r[i] - (r[i + b] << b) >= -15) { + r[i] -= r[i + b] << b; + for (k = i + b;k < 256;++k) { + if (!r[k]) { + r[k] = 1; + break; + } + r[k] = 0; + } + } else + break; + } + } + } +} + +#ifdef CURVED25519_ASM_64BIT +static const ge_precomp Bi[8] = { + { + { 0x2fbc93c6f58c3b85, -0x306cd2390473f1e7, 0x270b4898643d42c2, 0x07cf9d3a33d4ba65, }, + { -0x62efc6fa28bf6ec2, -0x02c660fa2ebf414d, -0x5a3e7bcb977075f7, 0x44fd2f9298f81267, }, + { -0x5436edfa78855598, 0x26d9e823ccaac49e, 0x5a1b7dcbdd43598c, 0x6f117b689f0c65a8, }, + }, + { + { -0x50da4f57b31168d0, 0x025a8430e8864b8a, -0x3ee4affd60fe98ce, 0x7a164e1b9a80f8f4, }, + { 0x56611fe8a4fcd265, 0x3bd353fde5c1ba7d, -0x7ece0ce5deb42943, 0x2ab91587555bda62, }, + { 0x14ae933f0dd0d889, 0x589423221c35da62, -0x2e8f1aba730d24b4, 0x5a2826af12b9b4c6, }, + }, + { + { -0x5ded43bbf75a44cd, -0x72afb73c38a112fe, -0x22e414f3a54013bc, 0x2945ccf146e206eb, }, + { 0x7f9182c3a447d6ba, -0x2affeb2eb4d8d649, -0x1cc30ee3479b5f79, 0x154a7e73eb1b55f3, }, + { -0x4344240e7ed57d7b, 0x270e0807d0bdd1fc, -0x4be498f4e44258d3, 0x43aabe696b3bb69a, }, + }, + { + { 0x6b1a5cd0944ea3bf, 0x7470353ab39dc0d2, 0x71b2528228542e49, 0x461bea69283c927e, }, + { -0x4590d36555cdde4f, 0x6ca021533bba23a7, -0x621589b06de6d3c6, 0x1d6edd5d2e5317e0, }, + { -0x0e7c9237fe474c5e, -0x4cfca0b8fac15b66, 0x529c41ba5877adf3, 0x7a9fbb1c6a0f90a7, }, + }, + { + { -0x64d1987559579cd1, -0x59af6190ae43b93b, -0x314dcc3639790a4b, 0x34b9ed338add7f59, }, + { -0x0c91de81fc627f9c, -0x675f7e490adfbe65, -0x693439f718a14fbc, 0x49c05a51fadc9c8f, }, + { 0x06b4e8bf9045af1b, -0x1d007c1758e62dd1, -0x550903d66c2b30ea, 0x73c172021b008b06, }, + }, + { + { 0x2fbf00848a802ade, -0x1a260130fdcfd1d9, 0x113e847117703406, 0x4275aae2546d8faf, }, + { 0x315f5b0249864348, 0x3ed6b36977088381, -0x5c5f8aaa9572146b, 0x18ab598029d5c77f, }, + { -0x27d4d33a029f7617, 0x031eb4a13282e4a4, 0x44311199b51a8622, 0x3dc65522b53df948, }, + }, + { + { -0x408f3ddd5dff8093, -0x407b4c654a432125, 0x537a0e12fb07ba07, 0x234fd7eec346f241, }, + { 0x506f013b327fbf93, -0x5103143664889095, -0x62ed4dcd5552a698, 0x0267882d176024a7, }, + { 0x5360a119732ea378, 0x2437e6b1df8dd471, -0x5d10c8076e581acd, 0x497ba6fdaa097863, }, + }, + { + { 0x24cecc0313cfeaa0, -0x79b73d72e763db93, 0x2dbdbdfac1f2d4d0, 0x61e22917f12de72b, }, + { 0x040bcd86468ccf0b, -0x2c7d645bd566ef2a, 0x7508300807b25192, 0x43b5cd4218d05ebf, }, + { 0x5d9a762f9bd0b516, -0x14c750b1c8c02112, 0x032e5a7d93d64270, 0x511d61210ae4d842, }, + }, +}; +#elif defined(CURVED25519_ASM_32BIT) +static const ge_precomp Bi[8] = { + { + { -0x0a73c47b, 0x2fbc93c6, -0x0473f1e7, -0x306cd23a, 0x643d42c2, 0x270b4898, 0x33d4ba65, 0x07cf9d3a, }, + { -0x28bf6ec2, -0x62efc6fb, -0x2ebf414d, -0x02c660fb, 0x688f8a09, -0x5a3e7bcc, -0x6707ed99, 0x44fd2f92, }, + { -0x78855598, -0x5436edfb, -0x33553b62, 0x26d9e823, -0x22bca674, 0x5a1b7dcb, -0x60f39a58, 0x6f117b68, }, + }, + { + { 0x4cee9730, -0x50da4f58, -0x1779b476, 0x025a8430, -0x60fe98ce, -0x3ee4affe, -0x657f070c, 0x7a164e1b, }, + { -0x5b032d9b, 0x56611fe8, -0x1a3e4583, 0x3bd353fd, 0x214bd6bd, -0x7ece0ce6, 0x555bda62, 0x2ab91587, }, + { 0x0dd0d889, 0x14ae933f, 0x1c35da62, 0x58942322, -0x730d24b4, -0x2e8f1abb, 0x12b9b4c6, 0x5a2826af, }, + }, + { + { 0x08a5bb33, -0x5ded43bc, -0x38a112fe, -0x72afb73d, 0x5abfec44, -0x22e414f4, 0x46e206eb, 0x2945ccf1, }, + { -0x5bb82946, 0x7f9182c3, 0x4b2729b7, -0x2affeb2f, -0x479b5f79, -0x1cc30ee4, -0x14e4aa0d, 0x154a7e73, }, + { -0x7ed57d7b, -0x4344240f, -0x2f422e04, 0x270e0807, 0x1bbda72d, -0x4be498f5, 0x6b3bb69a, 0x43aabe69, }, + }, + { + { -0x6bb15c41, 0x6b1a5cd0, -0x4c623f2e, 0x7470353a, 0x28542e49, 0x71b25282, 0x283c927e, 0x461bea69, }, + { -0x55cdde4f, -0x4590d366, 0x3bba23a7, 0x6ca02153, -0x6de6d3c6, -0x621589b1, 0x2e5317e0, 0x1d6edd5d, }, + { 0x01b8b3a2, -0x0e7c9238, 0x053ea49a, -0x4cfca0b9, 0x5877adf3, 0x529c41ba, 0x6a0f90a7, 0x7a9fbb1c, }, + }, + { + { -0x59579cd1, -0x64d19876, 0x51bc46c5, -0x59af6191, -0x39790a4b, -0x314dcc37, -0x752280a7, 0x34b9ed33, }, + { 0x039d8064, -0x0c91de82, -0x0adfbe65, -0x675f7e4a, -0x18a14fbc, -0x693439f8, -0x05236371, 0x49c05a51, }, + { -0x6fba50e5, 0x06b4e8bf, -0x58e62dd1, -0x1d007c18, -0x6c2b30ea, -0x550903d7, 0x1b008b06, 0x73c17202, }, + }, + { + { -0x757fd522, 0x2fbf0084, 0x02302e27, -0x1a260131, 0x17703406, 0x113e8471, 0x546d8faf, 0x4275aae2, }, + { 0x49864348, 0x315f5b02, 0x77088381, 0x3ed6b369, 0x6a8deb95, -0x5c5f8aab, 0x29d5c77f, 0x18ab5980, }, + { -0x029f7617, -0x27d4d33b, 0x3282e4a4, 0x031eb4a1, -0x4ae579de, 0x44311199, -0x4ac206b8, 0x3dc65522, }, + }, + { + { -0x5dff8093, -0x408f3dde, -0x4a432125, -0x407b4c66, -0x04f845f9, 0x537a0e12, -0x3cb90dbf, 0x234fd7ee, }, + { 0x327fbf93, 0x506f013b, -0x64889095, -0x51031437, -0x5552a698, -0x62ed4dce, 0x176024a7, 0x0267882d, }, + { 0x732ea378, 0x5360a119, -0x20722b8f, 0x2437e6b1, -0x6e581acd, -0x5d10c808, -0x55f6879d, 0x497ba6fd, }, + }, + { + { 0x13cfeaa0, 0x24cecc03, 0x189c246d, -0x79b73d73, -0x3e0d2b30, 0x2dbdbdfa, -0x0ed218d5, 0x61e22917, }, + { 0x468ccf0b, 0x040bcd86, 0x2a9910d6, -0x2c7d645c, 0x07b25192, 0x75083008, 0x18d05ebf, 0x43b5cd42, }, + { -0x642f4aea, 0x5d9a762f, 0x373fdeee, -0x14c750b2, -0x6c29bd90, 0x032e5a7d, 0x0ae4d842, 0x511d6121, }, + }, +}; +#elif defined(CURVED25519_128BIT) +static const ge_precomp Bi[8] = { + { + { 0x493c6f58c3b85, 0x0df7181c325f7, 0x0f50b0b3e4cb7, 0x5329385a44c32, 0x07cf9d3a33d4b }, + { 0x03905d740913e, 0x0ba2817d673a2, 0x23e2827f4e67c, 0x133d2e0c21a34, 0x44fd2f9298f81 }, + { 0x11205877aaa68, 0x479955893d579, 0x50d66309b67a0, 0x2d42d0dbee5ee, 0x6f117b689f0c6 }, + }, + { + { 0x5b0a84cee9730, 0x61d10c97155e4, 0x4059cc8096a10, 0x47a608da8014f, 0x7a164e1b9a80f }, + { 0x11fe8a4fcd265, 0x7bcb8374faacc, 0x52f5af4ef4d4f, 0x5314098f98d10, 0x2ab91587555bd }, + { 0x6933f0dd0d889, 0x44386bb4c4295, 0x3cb6d3162508c, 0x26368b872a2c6, 0x5a2826af12b9b }, + }, + { + { 0x2bc4408a5bb33, 0x078ebdda05442, 0x2ffb112354123, 0x375ee8df5862d, 0x2945ccf146e20 }, + { 0x182c3a447d6ba, 0x22964e536eff2, 0x192821f540053, 0x2f9f19e788e5c, 0x154a7e73eb1b5 }, + { 0x3dbf1812a8285, 0x0fa17ba3f9797, 0x6f69cb49c3820, 0x34d5a0db3858d, 0x43aabe696b3bb }, + }, + { + { 0x25cd0944ea3bf, 0x75673b81a4d63, 0x150b925d1c0d4, 0x13f38d9294114, 0x461bea69283c9 }, + { 0x72c9aaa3221b1, 0x267774474f74d, 0x064b0e9b28085, 0x3f04ef53b27c9, 0x1d6edd5d2e531 }, + { 0x36dc801b8b3a2, 0x0e0a7d4935e30, 0x1deb7cecc0d7d, 0x053a94e20dd2c, 0x7a9fbb1c6a0f9 }, + }, + { + { 0x6678aa6a8632f, 0x5ea3788d8b365, 0x21bd6d6994279, 0x7ace75919e4e3, 0x34b9ed338add7 }, + { 0x6217e039d8064, 0x6dea408337e6d, 0x57ac112628206, 0x647cb65e30473, 0x49c05a51fadc9 }, + { 0x4e8bf9045af1b, 0x514e33a45e0d6, 0x7533c5b8bfe0f, 0x583557b7e14c9, 0x73c172021b008 }, + }, + { + { 0x700848a802ade, 0x1e04605c4e5f7, 0x5c0d01b9767fb, 0x7d7889f42388b, 0x4275aae2546d8 }, + { 0x75b0249864348, 0x52ee11070262b, 0x237ae54fb5acd, 0x3bfd1d03aaab5, 0x18ab598029d5c }, + { 0x32cc5fd6089e9, 0x426505c949b05, 0x46a18880c7ad2, 0x4a4221888ccda, 0x3dc65522b53df }, + }, + { + { 0x0c222a2007f6d, 0x356b79bdb77ee, 0x41ee81efe12ce, 0x120a9bd07097d, 0x234fd7eec346f }, + { 0x7013b327fbf93, 0x1336eeded6a0d, 0x2b565a2bbf3af, 0x253ce89591955, 0x0267882d17602 }, + { 0x0a119732ea378, 0x63bf1ba8e2a6c, 0x69f94cc90df9a, 0x431d1779bfc48, 0x497ba6fdaa097 }, + }, + { + { 0x6cc0313cfeaa0, 0x1a313848da499, 0x7cb534219230a, 0x39596dedefd60, 0x61e22917f12de }, + { 0x3cd86468ccf0b, 0x48553221ac081, 0x6c9464b4e0a6e, 0x75fba84180403, 0x43b5cd4218d05 }, + { 0x2762f9bd0b516, 0x1c6e7fbddcbb3, 0x75909c3ace2bd, 0x42101972d3ec9, 0x511d61210ae4d }, + }, +}; +#else +static const ge_precomp Bi[8] = { + { + { 25967493,-14356035,29566456,3660896,-12694345,4014787,27544626,-11754271,-6079156,2047605 }, + { -12545711,934262,-2722910,3049990,-727428,9406986,12720692,5043384,19500929,-15469378 }, + { -8738181,4489570,9688441,-14785194,10184609,-12363380,29287919,11864899,-24514362,-4438546 }, + }, + { + { 15636291,-9688557,24204773,-7912398,616977,-16685262,27787600,-14772189,28944400,-1550024 }, + { 16568933,4717097,-11556148,-1102322,15682896,-11807043,16354577,-11775962,7689662,11199574 }, + { 30464156,-5976125,-11779434,-15670865,23220365,15915852,7512774,10017326,-17749093,-9920357 }, + }, + { + { 10861363,11473154,27284546,1981175,-30064349,12577861,32867885,14515107,-15438304,10819380 }, + { 4708026,6336745,20377586,9066809,-11272109,6594696,-25653668,12483688,-12668491,5581306 }, + { 19563160,16186464,-29386857,4097519,10237984,-4348115,28542350,13850243,-23678021,-15815942 }, + }, + { + { 5153746,9909285,1723747,-2777874,30523605,5516873,19480852,5230134,-23952439,-15175766 }, + { -30269007,-3463509,7665486,10083793,28475525,1649722,20654025,16520125,30598449,7715701 }, + { 28881845,14381568,9657904,3680757,-20181635,7843316,-31400660,1370708,29794553,-1409300 }, + }, + { + { -22518993,-6692182,14201702,-8745502,-23510406,8844726,18474211,-1361450,-13062696,13821877 }, + { -6455177,-7839871,3374702,-4740862,-27098617,-10571707,31655028,-7212327,18853322,-14220951 }, + { 4566830,-12963868,-28974889,-12240689,-7602672,-2830569,-8514358,-10431137,2207753,-3209784 }, + }, + { + { -25154831,-4185821,29681144,7868801,-6854661,-9423865,-12437364,-663000,-31111463,-16132436 }, + { 25576264,-2703214,7349804,-11814844,16472782,9300885,3844789,15725684,171356,6466918 }, + { 23103977,13316479,9739013,-16149481,817875,-15038942,8965339,-14088058,-30714912,16193877 }, + }, + { + { -33521811,3180713,-2394130,14003687,-16903474,-16270840,17238398,4729455,-18074513,9256800 }, + { -25182317,-4174131,32336398,5036987,-21236817,11360617,22616405,9761698,-19827198,630305 }, + { -13720693,2639453,-24237460,-7406481,9494427,-5774029,-6554551,-15960994,-2449256,-14291300 }, + }, + { + { -3151181,-5046075,9282714,6866145,-31907062,-863023,-18940575,15033784,25105118,-7894876 }, + { -24326370,15950226,-31801215,-14592823,-11662737,-5090925,1573892,-2625887,2198790,-15804619 }, + { -3099351,10324967,-2241613,7453183,-5446979,-2735503,-13812022,-16236442,-32461234,-12290683 }, + }, +} ; +#endif + + +/* +r = a * A + b * B +where a = a[0]+256*a[1]+...+256^31 a[31]. +and b = b[0]+256*b[1]+...+256^31 b[31]. +B is the Ed25519 base point (x,4/5) with x positive. +*/ +int ge_double_scalarmult_vartime(ge_p2 *r, const unsigned char *a, + const ge_p3 *A, const unsigned char *b) +{ + signed char aslide[256]; + signed char bslide[256]; + ge_cached Ai[8]; /* A,3A,5A,7A,9A,11A,13A,15A */ + ge_p1p1 t; + ge_p3 u; + ge_p3 A2; + int i; + + slide(aslide,a); + slide(bslide,b); + + ge_p3_to_cached(&Ai[0],A); + ge_p3_dbl(&t,A); ge_p1p1_to_p3(&A2,&t); + ge_add(&t,&A2,&Ai[0]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[1],&u); + ge_add(&t,&A2,&Ai[1]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[2],&u); + ge_add(&t,&A2,&Ai[2]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[3],&u); + ge_add(&t,&A2,&Ai[3]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[4],&u); + ge_add(&t,&A2,&Ai[4]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[5],&u); + ge_add(&t,&A2,&Ai[5]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[6],&u); + ge_add(&t,&A2,&Ai[6]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[7],&u); + + ge_p2_0(r); + + for (i = 255;i >= 0;--i) { + if (aslide[i] || bslide[i]) break; + } + + for (;i >= 0;--i) { + ge_p2_dbl(&t,r); + + if (aslide[i] > 0) { + ge_p1p1_to_p3(&u,&t); + ge_add(&t,&u,&Ai[aslide[i]/2]); + } else if (aslide[i] < 0) { + ge_p1p1_to_p3(&u,&t); + ge_sub(&t,&u,&Ai[(-aslide[i])/2]); + } + + if (bslide[i] > 0) { + ge_p1p1_to_p3(&u,&t); + ge_madd(&t,&u,&Bi[bslide[i]/2]); + } else if (bslide[i] < 0) { + ge_p1p1_to_p3(&u,&t); + ge_msub(&t,&u,&Bi[(-bslide[i])/2]); + } + + ge_p1p1_to_p2(r,&t); + } + + return 0; +} + +#ifdef CURVED25519_ASM_64BIT +static const ge d = { + 0x75eb4dca135978a3, 0x00700a4d4141d8ab, -0x7338bf8688861768, 0x52036cee2b6ffe73, +}; +#elif defined(CURVED25519_ASM_32BIT) +static const ge d = { + 0x135978a3, 0x75eb4dca, 0x4141d8ab, 0x00700a4d, 0x7779e898, -0x7338bf87, 0x2b6ffe73, 0x52036cee, +}; +#elif defined(CURVED25519_128BIT) +static const ge d = { + 0x34dca135978a3, 0x1a8283b156ebd, 0x5e7a26001c029, 0x739c663a03cbb, + 0x52036cee2b6ff +}; +#else +static const ge d = { +-10913610,13857413,-15372611,6949391,114729, +-8787816,-6275908,-3247719,-18696448,-12055116 +}; +#endif + + +#ifdef CURVED25519_ASM_64BIT +static const ge sqrtm1 = { + -0x3b11e4d8b5f15f50, 0x2f431806ad2fe478, 0x2b4d00993dfbd7a7, 0x2b8324804fc1df0b, +}; +#elif defined(CURVED25519_ASM_32BIT) +static const ge sqrtm1 = { + 0x4a0ea0b0, -0x3b11e4d9, -0x52d01b88, 0x2f431806, 0x3dfbd7a7, 0x2b4d0099, 0x4fc1df0b, 0x2b832480, +}; +#elif defined(CURVED25519_128BIT) +static const ge sqrtm1 = { + 0x61b274a0ea0b0, 0x0d5a5fc8f189d, 0x7ef5e9cbd0c60, 0x78595a6804c9e, + 0x2b8324804fc1d +}; +#else +static const ge sqrtm1 = { +-32595792,-7943725,9377950,3500415,12389472, +-272473,-25146209,-2005654,326686,11406482 +}; +#endif + + +int ge_frombytes_negate_vartime(ge_p3 *h,const unsigned char *s) +{ + ge u; + ge v; + ge v3; + ge vxx; + ge check; + + fe_frombytes(h->Y,s); + fe_1(h->Z); + fe_sq(u,h->Y); + fe_mul(v,u,d); + fe_sub(u,u,h->Z); /* u = y^2-1 */ + fe_add(v,v,h->Z); /* v = dy^2+1 */ + + + fe_sq(v3,v); + fe_mul(v3,v3,v); /* v3 = v^3 */ + fe_sq(h->X,v3); + fe_mul(h->X,h->X,v); + fe_mul(h->X,h->X,u); /* x = uv^7 */ + + fe_pow22523(h->X,h->X); /* x = (uv^7)^((q-5)/8) */ + fe_mul(h->X,h->X,v3); + fe_mul(h->X,h->X,u); /* x = uv^3(uv^7)^((q-5)/8) */ + + fe_sq(vxx,h->X); + fe_mul(vxx,vxx,v); + fe_sub(check,vxx,u); /* vx^2-u */ + if (fe_isnonzero(check)) { + fe_add(check,vxx,u); /* vx^2+u */ + if (fe_isnonzero(check)) return -1; + fe_mul(h->X,h->X,sqrtm1); + } + + if (fe_isnegative(h->X) == (s[31] >> 7)) + fe_neg(h->X,h->X); + + fe_mul(h->T,h->X,h->Y); + return 0; +} + + +/* ge madd */ +/* +r = p + q +*/ + +static WC_INLINE void ge_madd(ge_p1p1 *r,const ge_p3 *p,const ge_precomp *q) +{ +#ifndef CURVED25519_ASM + ge t0; + fe_add(r->X,p->Y,p->X); + fe_sub(r->Y,p->Y,p->X); + fe_mul(r->Z,r->X,q->yplusx); + fe_mul(r->Y,r->Y,q->yminusx); + fe_mul(r->T,q->xy2d,p->T); + fe_add(t0,p->Z,p->Z); + fe_sub(r->X,r->Z,r->Y); + fe_add(r->Y,r->Z,r->Y); + fe_add(r->Z,t0,r->T); + fe_sub(r->T,t0,r->T); +#else + fe_ge_madd(r->X, r->Y, r->Z, r->T, p->X, p->Y, p->Z, p->T, q->xy2d, + q->yplusx, q->yminusx); +#endif +} + + +/* ge msub */ + +/* +r = p - q +*/ + +static WC_INLINE void ge_msub(ge_p1p1 *r,const ge_p3 *p,const ge_precomp *q) +{ +#ifndef CURVED25519_ASM + ge t0; + fe_add(r->X,p->Y,p->X); + fe_sub(r->Y,p->Y,p->X); + fe_mul(r->Z,r->X,q->yminusx); + fe_mul(r->Y,r->Y,q->yplusx); + fe_mul(r->T,q->xy2d,p->T); + fe_add(t0,p->Z,p->Z); + fe_sub(r->X,r->Z,r->Y); + fe_add(r->Y,r->Z,r->Y); + fe_sub(r->Z,t0,r->T); + fe_add(r->T,t0,r->T); +#else + fe_ge_msub(r->X, r->Y, r->Z, r->T, p->X, p->Y, p->Z, p->T, q->xy2d, + q->yplusx, q->yminusx); +#endif +} + + +/* ge p1p1 to p2 */ +/* +r = p +*/ + +static void ge_p1p1_to_p2(ge_p2 *r,const ge_p1p1 *p) +{ +#ifndef CURVED25519_ASM + fe_mul(r->X,p->X,p->T); + fe_mul(r->Y,p->Y,p->Z); + fe_mul(r->Z,p->Z,p->T); +#else + fe_ge_to_p2(r->X, r->Y, r->Z, p->X, p->Y, p->Z, p->T); +#endif +} + + +/* ge p1p1 to p3 */ + +/* +r = p +*/ + +static WC_INLINE void ge_p1p1_to_p3(ge_p3 *r,const ge_p1p1 *p) +{ +#ifndef CURVED25519_ASM + fe_mul(r->X,p->X,p->T); + fe_mul(r->Y,p->Y,p->Z); + fe_mul(r->Z,p->Z,p->T); + fe_mul(r->T,p->X,p->Y); +#else + fe_ge_to_p3(r->X, r->Y, r->Z, r->T, p->X, p->Y, p->Z, p->T); +#endif +} + + +/* ge p2 0 */ + +static void ge_p2_0(ge_p2 *h) +{ + fe_0(h->X); + fe_1(h->Y); + fe_1(h->Z); +} + + +/* ge p2 dbl */ + +/* +r = 2 * p +*/ + +static WC_INLINE void ge_p2_dbl(ge_p1p1 *r,const ge_p2 *p) +{ +#ifndef CURVED25519_ASM + ge t0; + fe_sq(r->X,p->X); + fe_sq(r->Z,p->Y); + fe_sq2(r->T,p->Z); + fe_add(r->Y,p->X,p->Y); + fe_sq(t0,r->Y); + fe_add(r->Y,r->Z,r->X); + fe_sub(r->Z,r->Z,r->X); + fe_sub(r->X,t0,r->Y); + fe_sub(r->T,r->T,r->Z); +#else + fe_ge_dbl(r->X, r->Y, r->Z, r->T, p->X, p->Y, p->Z); +#endif +} + + +/* ge p3 dble */ + +/* +r = 2 * p +*/ + +static void ge_p3_dbl(ge_p1p1 *r,const ge_p3 *p) +{ + ge_p2 q; + ge_p3_to_p2(&q,p); + ge_p2_dbl(r,&q); +} + + +/* ge p3 to cached */ + +/* +r = p +*/ + +#ifdef CURVED25519_ASM_64BIT +static const ge d2 = { + -0x1429646bd94d0ea7, 0x00e0149a8283b156, 0x198e80f2eef3d130, 0x2406d9dc56dffce7, +}; +#elif defined(CURVED25519_ASM_32BIT) +static const ge d2 = { + 0x26b2f159, -0x1429646c, -0x7d7c4eaa, 0x00e0149a, -0x110c2ed0, 0x198e80f2, 0x56dffce7, 0x2406d9dc, +}; +#elif defined(CURVED25519_128BIT) +static const ge d2 = { + 0x69b9426b2f159, 0x35050762add7a, 0x3cf44c0038052, 0x6738cc7407977, + 0x2406d9dc56dff +}; +#else +static const ge d2 = { +-21827239,-5839606,-30745221,13898782,229458, +15978800,-12551817,-6495438,29715968,9444199 +} ; +#endif + + +static WC_INLINE void ge_p3_to_cached(ge_cached *r,const ge_p3 *p) +{ + fe_add(r->YplusX,p->Y,p->X); + fe_sub(r->YminusX,p->Y,p->X); + fe_copy(r->Z,p->Z); + fe_mul(r->T2d,p->T,d2); +} + + +/* ge p3 to p2 */ +/* +r = p +*/ + +static void ge_p3_to_p2(ge_p2 *r,const ge_p3 *p) +{ + fe_copy(r->X,p->X); + fe_copy(r->Y,p->Y); + fe_copy(r->Z,p->Z); +} + + +/* ge p3 tobytes */ +void ge_p3_tobytes(unsigned char *s,const ge_p3 *h) +{ + ge recip; + ge x; + ge y; + + fe_invert(recip,h->Z); + fe_mul(x,h->X,recip); + fe_mul(y,h->Y,recip); + fe_tobytes(s,y); + s[31] ^= fe_isnegative(x) << 7; +} + + +#ifndef CURVED25519_ASM +/* ge_precomp_0 */ +static void ge_precomp_0(ge_precomp *h) +{ + fe_1(h->yplusx); + fe_1(h->yminusx); + fe_0(h->xy2d); +} +#endif + + +/* ge_sub */ +/* +r = p - q +*/ + +static WC_INLINE void ge_sub(ge_p1p1 *r,const ge_p3 *p,const ge_cached *q) +{ +#ifndef CURVED25519_ASM + ge t0; + fe_add(r->X,p->Y,p->X); + fe_sub(r->Y,p->Y,p->X); + fe_mul(r->Z,r->X,q->YminusX); + fe_mul(r->Y,r->Y,q->YplusX); + fe_mul(r->T,q->T2d,p->T); + fe_mul(r->X,p->Z,q->Z); + fe_add(t0,r->X,r->X); + fe_sub(r->X,r->Z,r->Y); + fe_add(r->Y,r->Z,r->Y); + fe_sub(r->Z,t0,r->T); + fe_add(r->T,t0,r->T); +#else + fe_ge_sub(r->X, r->Y, r->Z, r->T, p->X, p->Y, p->Z, p->T, q->Z, q->T2d, + q->YplusX, q->YminusX); +#endif +} + + +/* ge tobytes */ +void ge_tobytes(unsigned char *s,const ge_p2 *h) +{ + ge recip; + ge x; + ge y; + + fe_invert(recip,h->Z); + fe_mul(x,h->X,recip); + fe_mul(y,h->Y,recip); + fe_tobytes(s,y); + s[31] ^= fe_isnegative(x) << 7; +} + +#endif /* !ED25519_SMALL */ +#endif /* HAVE_ED25519 */ diff --git a/client/wolfssl/wolfcrypt/src/hash.c b/client/wolfssl/wolfcrypt/src/hash.c new file mode 100644 index 0000000..c53f5e6 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/hash.c @@ -0,0 +1,1677 @@ +/* hash.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include +#include +#include +#ifndef NO_ASN +#include +#endif + +#include +#include + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + + +#ifdef NO_ASN +enum Hash_Sum { + MD2h = 646, + MD5h = 649, + SHAh = 88, + SHA224h = 417, + SHA256h = 414, + SHA384h = 415, + SHA512h = 416, + SHA3_224h = 420, + SHA3_256h = 421, + SHA3_384h = 422, + SHA3_512h = 423 +}; +#endif /* !NO_ASN */ + +#if !defined(NO_PWDBASED) || !defined(NO_ASN) +/* function converts int hash type to enum */ +enum wc_HashType wc_HashTypeConvert(int hashType) +{ + /* Default to hash type none as error */ + enum wc_HashType eHashType = WC_HASH_TYPE_NONE; +#if defined(HAVE_FIPS) || defined(HAVE_SELFTEST) + /* original FIPSv1 and CAVP selftest require a mapping for unique hash + type to wc_HashType */ + switch (hashType) { + #ifndef NO_MD5 + case WC_MD5: + eHashType = WC_HASH_TYPE_MD5; + break; + #endif /* !NO_MD5 */ + #ifndef NO_SHA + case WC_SHA: + eHashType = WC_HASH_TYPE_SHA; + break; + #endif /* !NO_SHA */ + + #ifdef WOLFSSL_SHA224 + case WC_SHA224: + eHashType = WC_HASH_TYPE_SHA224; + break; + #endif /* WOLFSSL_SHA224 */ + + #ifndef NO_SHA256 + case WC_SHA256: + eHashType = WC_HASH_TYPE_SHA256; + break; + #endif /* !NO_SHA256 */ + + #ifdef WOLFSSL_SHA384 + case WC_SHA384: + eHashType = WC_HASH_TYPE_SHA384; + break; + #endif /* WOLFSSL_SHA384 */ + #ifdef WOLFSSL_SHA512 + case WC_SHA512: + eHashType = WC_HASH_TYPE_SHA512; + break; + #endif /* WOLFSSL_SHA512 */ + #ifdef WOLFSSL_SHA3 + case WC_SHA3_224: + eHashType = WC_HASH_TYPE_SHA3_224; + break; + case WC_SHA3_256: + eHashType = WC_HASH_TYPE_SHA3_256; + break; + case WC_SHA3_384: + eHashType = WC_HASH_TYPE_SHA3_384; + break; + case WC_SHA3_512: + eHashType = WC_HASH_TYPE_SHA3_512; + break; + #endif /* WOLFSSL_SHA3 */ + default: + eHashType = WC_HASH_TYPE_NONE; + break; + } +#else + /* current master uses same unique types as wc_HashType */ + if (hashType > 0 && hashType <= WC_HASH_TYPE_MAX) { + eHashType = (enum wc_HashType)hashType; + } +#endif + return eHashType; +} +#endif /* !NO_PWDBASED || !NO_ASN */ + +#if !defined(NO_ASN) || !defined(NO_DH) || defined(HAVE_ECC) + +int wc_HashGetOID(enum wc_HashType hash_type) +{ + int oid = HASH_TYPE_E; /* Default to hash type error */ + switch(hash_type) + { + case WC_HASH_TYPE_MD2: + #ifdef WOLFSSL_MD2 + oid = MD2h; + #endif + break; + case WC_HASH_TYPE_MD5_SHA: + case WC_HASH_TYPE_MD5: + #ifndef NO_MD5 + oid = MD5h; + #endif + break; + case WC_HASH_TYPE_SHA: + #ifndef NO_SHA + oid = SHAh; + #endif + break; + case WC_HASH_TYPE_SHA224: + #ifdef WOLFSSL_SHA224 + oid = SHA224h; + #endif + break; + case WC_HASH_TYPE_SHA256: + #ifndef NO_SHA256 + oid = SHA256h; + #endif + break; + case WC_HASH_TYPE_SHA384: + #ifdef WOLFSSL_SHA384 + oid = SHA384h; + #endif + break; + case WC_HASH_TYPE_SHA512: + #ifdef WOLFSSL_SHA512 + oid = SHA512h; + #endif + break; + case WC_HASH_TYPE_SHA3_224: + #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_224) + oid = SHA3_224h; + #endif + break; + case WC_HASH_TYPE_SHA3_256: + #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_256) + oid = SHA3_256h; + #endif + break; + case WC_HASH_TYPE_SHA3_384: + #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_384) + oid = SHA3_384h; + #endif + break; + case WC_HASH_TYPE_SHA3_512: + #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_512) + oid = SHA3_512h; + #endif + break; + + /* Not Supported */ + case WC_HASH_TYPE_MD4: + case WC_HASH_TYPE_BLAKE2B: + case WC_HASH_TYPE_BLAKE2S: + case WC_HASH_TYPE_NONE: + default: + oid = BAD_FUNC_ARG; + break; + } + return oid; +} + +enum wc_HashType wc_OidGetHash(int oid) +{ + enum wc_HashType hash_type = WC_HASH_TYPE_NONE; + switch (oid) + { + #ifdef WOLFSSL_MD2 + case MD2h: + hash_type = WC_HASH_TYPE_MD2; + break; + #endif + case MD5h: + #ifndef NO_MD5 + hash_type = WC_HASH_TYPE_MD5; + #endif + break; + case SHAh: + #ifndef NO_SHA + hash_type = WC_HASH_TYPE_SHA; + #endif + break; + case SHA224h: + #ifdef WOLFSSL_SHA224 + hash_type = WC_HASH_TYPE_SHA224; + #endif + break; + case SHA256h: + #ifndef NO_SHA256 + hash_type = WC_HASH_TYPE_SHA256; + #endif + break; + case SHA384h: + #ifdef WOLFSSL_SHA384 + hash_type = WC_HASH_TYPE_SHA384; + #endif + break; + case SHA512h: + #ifdef WOLFSSL_SHA512 + hash_type = WC_HASH_TYPE_SHA512; + #endif + break; + #ifdef WOLFSSL_SHA3 + case SHA3_224h: + hash_type = WC_HASH_TYPE_SHA3_224; + break; + case SHA3_256h: + hash_type = WC_HASH_TYPE_SHA3_256; + break; + case SHA3_384h: + hash_type = WC_HASH_TYPE_SHA3_384; + break; + case SHA3_512h: + hash_type = WC_HASH_TYPE_SHA3_512; + break; + #endif /* WOLFSSL_SHA3 */ + default: + break; + } + return hash_type; +} +#endif /* !NO_ASN || !NO_DH || HAVE_ECC */ + +#ifndef NO_HASH_WRAPPER + +/* Get Hash digest size */ +int wc_HashGetDigestSize(enum wc_HashType hash_type) +{ + int dig_size = HASH_TYPE_E; /* Default to hash type error */ + switch(hash_type) + { + case WC_HASH_TYPE_MD2: + #ifdef WOLFSSL_MD2 + dig_size = MD2_DIGEST_SIZE; + #endif + break; + case WC_HASH_TYPE_MD4: + #ifndef NO_MD4 + dig_size = MD4_DIGEST_SIZE; + #endif + break; + case WC_HASH_TYPE_MD5: + #ifndef NO_MD5 + dig_size = WC_MD5_DIGEST_SIZE; + #endif + break; + case WC_HASH_TYPE_SHA: + #ifndef NO_SHA + dig_size = WC_SHA_DIGEST_SIZE; + #endif + break; + case WC_HASH_TYPE_SHA224: + #ifdef WOLFSSL_SHA224 + dig_size = WC_SHA224_DIGEST_SIZE; + #endif + break; + case WC_HASH_TYPE_SHA256: + #ifndef NO_SHA256 + dig_size = WC_SHA256_DIGEST_SIZE; + #endif + break; + case WC_HASH_TYPE_SHA384: + #ifdef WOLFSSL_SHA384 + dig_size = WC_SHA384_DIGEST_SIZE; + #endif + break; + case WC_HASH_TYPE_SHA512: + #ifdef WOLFSSL_SHA512 + dig_size = WC_SHA512_DIGEST_SIZE; + #endif + break; + case WC_HASH_TYPE_MD5_SHA: /* Old TLS Specific */ + #if !defined(NO_MD5) && !defined(NO_SHA) + dig_size = (int)WC_MD5_DIGEST_SIZE + (int)WC_SHA_DIGEST_SIZE; + #endif + break; + + case WC_HASH_TYPE_SHA3_224: + #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_224) + dig_size = WC_SHA3_224_DIGEST_SIZE; + #endif + break; + case WC_HASH_TYPE_SHA3_256: + #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_256) + dig_size = WC_SHA3_256_DIGEST_SIZE; + #endif + break; + case WC_HASH_TYPE_SHA3_384: + #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_384) + dig_size = WC_SHA3_384_DIGEST_SIZE; + #endif + break; + case WC_HASH_TYPE_SHA3_512: + #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_512) + dig_size = WC_SHA3_512_DIGEST_SIZE; + #endif + break; + case WC_HASH_TYPE_BLAKE2B: + case WC_HASH_TYPE_BLAKE2S: + #if defined(HAVE_BLAKE2) || defined(HAVE_BLAKE2S) + dig_size = BLAKE2S_OUTBYTES; + #endif + break; + + /* Not Supported */ + case WC_HASH_TYPE_NONE: + default: + dig_size = BAD_FUNC_ARG; + break; + } + return dig_size; +} + + +/* Get Hash block size */ +int wc_HashGetBlockSize(enum wc_HashType hash_type) +{ + int block_size = HASH_TYPE_E; /* Default to hash type error */ + switch (hash_type) + { + case WC_HASH_TYPE_MD2: + #ifdef WOLFSSL_MD2 + block_size = MD2_BLOCK_SIZE; + #endif + break; + case WC_HASH_TYPE_MD4: + #ifndef NO_MD4 + block_size = MD4_BLOCK_SIZE; + #endif + break; + case WC_HASH_TYPE_MD5: + #ifndef NO_MD5 + block_size = WC_MD5_BLOCK_SIZE; + #endif + break; + case WC_HASH_TYPE_SHA: + #ifndef NO_SHA + block_size = WC_SHA_BLOCK_SIZE; + #endif + break; + case WC_HASH_TYPE_SHA224: + #ifdef WOLFSSL_SHA224 + block_size = WC_SHA224_BLOCK_SIZE; + #endif + break; + case WC_HASH_TYPE_SHA256: + #ifndef NO_SHA256 + block_size = WC_SHA256_BLOCK_SIZE; + #endif + break; + case WC_HASH_TYPE_SHA384: + #ifdef WOLFSSL_SHA384 + block_size = WC_SHA384_BLOCK_SIZE; + #endif + break; + case WC_HASH_TYPE_SHA512: + #ifdef WOLFSSL_SHA512 + block_size = WC_SHA512_BLOCK_SIZE; + #endif + break; + case WC_HASH_TYPE_MD5_SHA: /* Old TLS Specific */ + #if !defined(NO_MD5) && !defined(NO_SHA) + block_size = (int)WC_MD5_BLOCK_SIZE + (int)WC_SHA_BLOCK_SIZE; + #endif + break; + + case WC_HASH_TYPE_SHA3_224: + #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_224) + block_size = WC_SHA3_224_BLOCK_SIZE; + #endif + break; + case WC_HASH_TYPE_SHA3_256: + #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_256) + block_size = WC_SHA3_256_BLOCK_SIZE; + #endif + break; + case WC_HASH_TYPE_SHA3_384: + #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_384) + block_size = WC_SHA3_384_BLOCK_SIZE; + #endif + break; + case WC_HASH_TYPE_SHA3_512: + #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_512) + block_size = WC_SHA3_512_BLOCK_SIZE; + #endif + break; + case WC_HASH_TYPE_BLAKE2B: + case WC_HASH_TYPE_BLAKE2S: + #if defined(HAVE_BLAKE2) || defined(HAVE_BLAKE2S) + block_size = BLAKE2S_BLOCKBYTES; + #endif + break; + + /* Not Supported */ + case WC_HASH_TYPE_NONE: + default: + block_size = BAD_FUNC_ARG; + break; + } + return block_size; +} + +/* Generic Hashing Wrapper */ +int wc_Hash(enum wc_HashType hash_type, const byte* data, + word32 data_len, byte* hash, word32 hash_len) +{ + int ret = HASH_TYPE_E; /* Default to hash type error */ + word32 dig_size; + + /* Validate hash buffer size */ + dig_size = wc_HashGetDigestSize(hash_type); + if (hash_len < dig_size) { + return BUFFER_E; + } + + /* Suppress possible unused arg if all hashing is disabled */ + (void)data; + (void)data_len; + (void)hash; + (void)hash_len; + + switch(hash_type) + { + case WC_HASH_TYPE_MD5: +#ifndef NO_MD5 + ret = wc_Md5Hash(data, data_len, hash); +#endif + break; + case WC_HASH_TYPE_SHA: +#ifndef NO_SHA + ret = wc_ShaHash(data, data_len, hash); +#endif + break; + case WC_HASH_TYPE_SHA224: +#ifdef WOLFSSL_SHA224 + ret = wc_Sha224Hash(data, data_len, hash); +#endif + break; + case WC_HASH_TYPE_SHA256: +#ifndef NO_SHA256 + ret = wc_Sha256Hash(data, data_len, hash); +#endif + break; + case WC_HASH_TYPE_SHA384: +#ifdef WOLFSSL_SHA384 + ret = wc_Sha384Hash(data, data_len, hash); +#endif + break; + case WC_HASH_TYPE_SHA512: +#ifdef WOLFSSL_SHA512 + ret = wc_Sha512Hash(data, data_len, hash); +#endif + break; + case WC_HASH_TYPE_MD5_SHA: +#if !defined(NO_MD5) && !defined(NO_SHA) + ret = wc_Md5Hash(data, data_len, hash); + if (ret == 0) { + ret = wc_ShaHash(data, data_len, &hash[WC_MD5_DIGEST_SIZE]); + } +#endif + break; + + case WC_HASH_TYPE_SHA3_224: +#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_224) + ret = wc_Sha3_224Hash(data, data_len, hash); +#endif + break; + case WC_HASH_TYPE_SHA3_256: +#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_256) + ret = wc_Sha3_256Hash(data, data_len, hash); +#endif + break; + case WC_HASH_TYPE_SHA3_384: +#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_384) + ret = wc_Sha3_384Hash(data, data_len, hash); +#endif + break; + case WC_HASH_TYPE_SHA3_512: +#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_512) + ret = wc_Sha3_512Hash(data, data_len, hash); +#endif + break; + + /* Not Supported */ + case WC_HASH_TYPE_MD2: + case WC_HASH_TYPE_MD4: + case WC_HASH_TYPE_BLAKE2B: + case WC_HASH_TYPE_BLAKE2S: + case WC_HASH_TYPE_NONE: + default: + ret = BAD_FUNC_ARG; + break; + } + return ret; +} + +int wc_HashInit_ex(wc_HashAlg* hash, enum wc_HashType type, void* heap, + int devId) +{ + int ret = HASH_TYPE_E; /* Default to hash type error */ + + if (hash == NULL) + return BAD_FUNC_ARG; + + switch (type) { + case WC_HASH_TYPE_MD5: +#ifndef NO_MD5 + ret = wc_InitMd5_ex(&hash->md5, heap, devId); +#endif + break; + case WC_HASH_TYPE_SHA: +#ifndef NO_SHA + ret = wc_InitSha_ex(&hash->sha, heap, devId); +#endif + break; + case WC_HASH_TYPE_SHA224: +#ifdef WOLFSSL_SHA224 + ret = wc_InitSha224_ex(&hash->sha224, heap, devId); +#endif + break; + case WC_HASH_TYPE_SHA256: +#ifndef NO_SHA256 + ret = wc_InitSha256_ex(&hash->sha256, heap, devId); +#endif + break; + case WC_HASH_TYPE_SHA384: +#ifdef WOLFSSL_SHA384 + ret = wc_InitSha384_ex(&hash->sha384, heap, devId); +#endif + break; + case WC_HASH_TYPE_SHA512: +#ifdef WOLFSSL_SHA512 + ret = wc_InitSha512_ex(&hash->sha512, heap, devId); +#endif + break; + + case WC_HASH_TYPE_SHA3_224: +#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_224) + ret = wc_InitSha3_224(&hash->sha3, heap, devId); +#endif + break; + case WC_HASH_TYPE_SHA3_256: +#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_256) + ret = wc_InitSha3_256(&hash->sha3, heap, devId); +#endif + break; + case WC_HASH_TYPE_SHA3_384: +#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_384) + ret = wc_InitSha3_384(&hash->sha3, heap, devId); +#endif + break; + case WC_HASH_TYPE_SHA3_512: +#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_512) + ret = wc_InitSha3_512(&hash->sha3, heap, devId); +#endif + break; + + /* not supported */ + case WC_HASH_TYPE_MD5_SHA: + case WC_HASH_TYPE_MD2: + case WC_HASH_TYPE_MD4: + case WC_HASH_TYPE_BLAKE2B: + case WC_HASH_TYPE_BLAKE2S: + case WC_HASH_TYPE_NONE: + default: + ret = BAD_FUNC_ARG; + }; + + return ret; +} + +int wc_HashInit(wc_HashAlg* hash, enum wc_HashType type) +{ + return wc_HashInit_ex(hash, type, NULL, INVALID_DEVID); +} + +int wc_HashUpdate(wc_HashAlg* hash, enum wc_HashType type, const byte* data, + word32 dataSz) +{ + int ret = HASH_TYPE_E; /* Default to hash type error */ + + if (hash == NULL || data == NULL) + return BAD_FUNC_ARG; + + switch (type) { + case WC_HASH_TYPE_MD5: +#ifndef NO_MD5 + ret = wc_Md5Update(&hash->md5, data, dataSz); +#endif + break; + case WC_HASH_TYPE_SHA: +#ifndef NO_SHA + ret = wc_ShaUpdate(&hash->sha, data, dataSz); +#endif + break; + case WC_HASH_TYPE_SHA224: +#ifdef WOLFSSL_SHA224 + ret = wc_Sha224Update(&hash->sha224, data, dataSz); +#endif + break; + case WC_HASH_TYPE_SHA256: +#ifndef NO_SHA256 + ret = wc_Sha256Update(&hash->sha256, data, dataSz); +#endif + break; + case WC_HASH_TYPE_SHA384: +#ifdef WOLFSSL_SHA384 + ret = wc_Sha384Update(&hash->sha384, data, dataSz); +#endif + break; + case WC_HASH_TYPE_SHA512: +#ifdef WOLFSSL_SHA512 + ret = wc_Sha512Update(&hash->sha512, data, dataSz); +#endif + break; + + case WC_HASH_TYPE_SHA3_224: +#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_224) + ret = wc_Sha3_224_Update(&hash->sha3, data, dataSz); +#endif + break; + case WC_HASH_TYPE_SHA3_256: +#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_256) + ret = wc_Sha3_256_Update(&hash->sha3, data, dataSz); +#endif + break; + case WC_HASH_TYPE_SHA3_384: +#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_384) + ret = wc_Sha3_384_Update(&hash->sha3, data, dataSz); +#endif + break; + case WC_HASH_TYPE_SHA3_512: +#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_512) + ret = wc_Sha3_512_Update(&hash->sha3, data, dataSz); +#endif + break; + + /* not supported */ + case WC_HASH_TYPE_MD5_SHA: + case WC_HASH_TYPE_MD2: + case WC_HASH_TYPE_MD4: + case WC_HASH_TYPE_BLAKE2B: + case WC_HASH_TYPE_BLAKE2S: + case WC_HASH_TYPE_NONE: + default: + ret = BAD_FUNC_ARG; + }; + + return ret; +} + +int wc_HashFinal(wc_HashAlg* hash, enum wc_HashType type, byte* out) +{ + int ret = HASH_TYPE_E; /* Default to hash type error */ + + if (hash == NULL || out == NULL) + return BAD_FUNC_ARG; + + switch (type) { + case WC_HASH_TYPE_MD5: +#ifndef NO_MD5 + ret = wc_Md5Final(&hash->md5, out); +#endif + break; + case WC_HASH_TYPE_SHA: +#ifndef NO_SHA + ret = wc_ShaFinal(&hash->sha, out); +#endif + break; + case WC_HASH_TYPE_SHA224: +#ifdef WOLFSSL_SHA224 + ret = wc_Sha224Final(&hash->sha224, out); +#endif + break; + case WC_HASH_TYPE_SHA256: +#ifndef NO_SHA256 + ret = wc_Sha256Final(&hash->sha256, out); +#endif + break; + case WC_HASH_TYPE_SHA384: +#ifdef WOLFSSL_SHA384 + ret = wc_Sha384Final(&hash->sha384, out); +#endif + break; + case WC_HASH_TYPE_SHA512: +#ifdef WOLFSSL_SHA512 + ret = wc_Sha512Final(&hash->sha512, out); +#endif + break; + + case WC_HASH_TYPE_SHA3_224: +#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_224) + ret = wc_Sha3_224_Final(&hash->sha3, out); +#endif + break; + case WC_HASH_TYPE_SHA3_256: +#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_256) + ret = wc_Sha3_256_Final(&hash->sha3, out); +#endif + break; + case WC_HASH_TYPE_SHA3_384: +#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_384) + ret = wc_Sha3_384_Final(&hash->sha3, out); +#endif + break; + case WC_HASH_TYPE_SHA3_512: +#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_512) + ret = wc_Sha3_512_Final(&hash->sha3, out); +#endif + break; + + /* not supported */ + case WC_HASH_TYPE_MD5_SHA: + case WC_HASH_TYPE_MD2: + case WC_HASH_TYPE_MD4: + case WC_HASH_TYPE_BLAKE2B: + case WC_HASH_TYPE_BLAKE2S: + case WC_HASH_TYPE_NONE: + default: + ret = BAD_FUNC_ARG; + }; + + return ret; +} + +int wc_HashFree(wc_HashAlg* hash, enum wc_HashType type) +{ + int ret = HASH_TYPE_E; /* Default to hash type error */ + + if (hash == NULL) + return BAD_FUNC_ARG; + + switch (type) { + case WC_HASH_TYPE_MD5: +#ifndef NO_MD5 + wc_Md5Free(&hash->md5); + ret = 0; +#endif + break; + case WC_HASH_TYPE_SHA: +#ifndef NO_SHA + wc_ShaFree(&hash->sha); + ret = 0; +#endif + break; + case WC_HASH_TYPE_SHA224: +#ifdef WOLFSSL_SHA224 + wc_Sha224Free(&hash->sha224); + ret = 0; +#endif + break; + case WC_HASH_TYPE_SHA256: +#ifndef NO_SHA256 + wc_Sha256Free(&hash->sha256); + ret = 0; +#endif + break; + case WC_HASH_TYPE_SHA384: +#ifdef WOLFSSL_SHA384 + wc_Sha384Free(&hash->sha384); + ret = 0; +#endif + break; + case WC_HASH_TYPE_SHA512: +#ifdef WOLFSSL_SHA512 + wc_Sha512Free(&hash->sha512); + ret = 0; +#endif + break; + + case WC_HASH_TYPE_SHA3_224: +#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_224) + wc_Sha3_224_Free(&hash->sha3); + ret = 0; +#endif + break; + case WC_HASH_TYPE_SHA3_256: +#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_256) + wc_Sha3_256_Free(&hash->sha3); + ret = 0; +#endif + break; + case WC_HASH_TYPE_SHA3_384: +#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_384) + wc_Sha3_384_Free(&hash->sha3); + ret = 0; +#endif + break; + case WC_HASH_TYPE_SHA3_512: +#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_512) + wc_Sha3_512_Free(&hash->sha3); + ret = 0; +#endif + break; + + /* not supported */ + case WC_HASH_TYPE_MD5_SHA: + case WC_HASH_TYPE_MD2: + case WC_HASH_TYPE_MD4: + case WC_HASH_TYPE_BLAKE2B: + case WC_HASH_TYPE_BLAKE2S: + case WC_HASH_TYPE_NONE: + default: + ret = BAD_FUNC_ARG; + }; + + return ret; +} + +#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB) +int wc_HashSetFlags(wc_HashAlg* hash, enum wc_HashType type, word32 flags) +{ + int ret = HASH_TYPE_E; /* Default to hash type error */ + + if (hash == NULL) + return BAD_FUNC_ARG; + + switch (type) { + case WC_HASH_TYPE_MD5: +#ifndef NO_MD5 + ret = wc_Md5SetFlags(&hash->md5, flags); +#endif + break; + case WC_HASH_TYPE_SHA: +#ifndef NO_SHA + ret = wc_ShaSetFlags(&hash->sha, flags); +#endif + break; + case WC_HASH_TYPE_SHA224: +#ifdef WOLFSSL_SHA224 + ret = wc_Sha224SetFlags(&hash->sha224, flags); +#endif + break; + case WC_HASH_TYPE_SHA256: +#ifndef NO_SHA256 + ret = wc_Sha256SetFlags(&hash->sha256, flags); +#endif + break; + case WC_HASH_TYPE_SHA384: +#ifdef WOLFSSL_SHA384 + ret = wc_Sha384SetFlags(&hash->sha384, flags); +#endif + break; + case WC_HASH_TYPE_SHA512: +#ifdef WOLFSSL_SHA512 + ret = wc_Sha512SetFlags(&hash->sha512, flags); +#endif + break; + + case WC_HASH_TYPE_SHA3_224: + case WC_HASH_TYPE_SHA3_256: + case WC_HASH_TYPE_SHA3_384: + case WC_HASH_TYPE_SHA3_512: +#ifdef WOLFSSL_SHA3 + ret = wc_Sha3_SetFlags(&hash->sha3, flags); +#endif + break; + + /* not supported */ + case WC_HASH_TYPE_MD5_SHA: + case WC_HASH_TYPE_MD2: + case WC_HASH_TYPE_MD4: + case WC_HASH_TYPE_BLAKE2B: + case WC_HASH_TYPE_BLAKE2S: + case WC_HASH_TYPE_NONE: + default: + ret = BAD_FUNC_ARG; + }; + + return ret; +} +int wc_HashGetFlags(wc_HashAlg* hash, enum wc_HashType type, word32* flags) +{ + int ret = HASH_TYPE_E; /* Default to hash type error */ + + if (hash == NULL) + return BAD_FUNC_ARG; + + switch (type) { + case WC_HASH_TYPE_MD5: +#ifndef NO_MD5 + ret = wc_Md5GetFlags(&hash->md5, flags); +#endif + break; + case WC_HASH_TYPE_SHA: +#ifndef NO_SHA + ret = wc_ShaGetFlags(&hash->sha, flags); +#endif + break; + case WC_HASH_TYPE_SHA224: +#ifdef WOLFSSL_SHA224 + ret = wc_Sha224GetFlags(&hash->sha224, flags); +#endif + break; + case WC_HASH_TYPE_SHA256: +#ifndef NO_SHA256 + ret = wc_Sha256GetFlags(&hash->sha256, flags); +#endif + break; + case WC_HASH_TYPE_SHA384: +#ifdef WOLFSSL_SHA384 + ret = wc_Sha384GetFlags(&hash->sha384, flags); +#endif + break; + case WC_HASH_TYPE_SHA512: +#ifdef WOLFSSL_SHA512 + ret = wc_Sha512GetFlags(&hash->sha512, flags); +#endif + break; + + case WC_HASH_TYPE_SHA3_224: + case WC_HASH_TYPE_SHA3_256: + case WC_HASH_TYPE_SHA3_384: + case WC_HASH_TYPE_SHA3_512: +#ifdef WOLFSSL_SHA3 + ret = wc_Sha3_GetFlags(&hash->sha3, flags); +#endif + break; + + /* not supported */ + case WC_HASH_TYPE_MD5_SHA: + case WC_HASH_TYPE_MD2: + case WC_HASH_TYPE_MD4: + case WC_HASH_TYPE_BLAKE2B: + case WC_HASH_TYPE_BLAKE2S: + case WC_HASH_TYPE_NONE: + default: + ret = BAD_FUNC_ARG; + }; + + return ret; +} +#endif + + +#if !defined(WOLFSSL_TI_HASH) + +#if !defined(NO_MD5) + int wc_Md5Hash(const byte* data, word32 len, byte* hash) + { + int ret; + #ifdef WOLFSSL_SMALL_STACK + wc_Md5* md5; + #else + wc_Md5 md5[1]; + #endif + + #ifdef WOLFSSL_SMALL_STACK + md5 = (wc_Md5*)XMALLOC(sizeof(wc_Md5), NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (md5 == NULL) + return MEMORY_E; + #endif + + if ((ret = wc_InitMd5(md5)) != 0) { + WOLFSSL_MSG("InitMd5 failed"); + } + else { + if ((ret = wc_Md5Update(md5, data, len)) != 0) { + WOLFSSL_MSG("Md5Update failed"); + } + else if ((ret = wc_Md5Final(md5, hash)) != 0) { + WOLFSSL_MSG("Md5Final failed"); + } + wc_Md5Free(md5); + } + + #ifdef WOLFSSL_SMALL_STACK + XFREE(md5, NULL, DYNAMIC_TYPE_TMP_BUFFER); + #endif + + return ret; + } +#endif /* !NO_MD5 */ + +#if !defined(NO_SHA) + int wc_ShaHash(const byte* data, word32 len, byte* hash) + { + int ret = 0; + #ifdef WOLFSSL_SMALL_STACK + wc_Sha* sha; + #else + wc_Sha sha[1]; + #endif + + #ifdef WOLFSSL_SMALL_STACK + sha = (wc_Sha*)XMALLOC(sizeof(wc_Sha), NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (sha == NULL) + return MEMORY_E; + #endif + + if ((ret = wc_InitSha(sha)) != 0) { + WOLFSSL_MSG("InitSha failed"); + } + else { + if ((ret = wc_ShaUpdate(sha, data, len)) != 0) { + WOLFSSL_MSG("ShaUpdate failed"); + } + else if ((ret = wc_ShaFinal(sha, hash)) != 0) { + WOLFSSL_MSG("ShaFinal failed"); + } + wc_ShaFree(sha); + } + + #ifdef WOLFSSL_SMALL_STACK + XFREE(sha, NULL, DYNAMIC_TYPE_TMP_BUFFER); + #endif + + return ret; + } +#endif /* !NO_SHA */ + +#if defined(WOLFSSL_SHA224) + int wc_Sha224Hash(const byte* data, word32 len, byte* hash) + { + int ret = 0; + #ifdef WOLFSSL_SMALL_STACK + wc_Sha224* sha224; + #else + wc_Sha224 sha224[1]; + #endif + + #ifdef WOLFSSL_SMALL_STACK + sha224 = (wc_Sha224*)XMALLOC(sizeof(wc_Sha224), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (sha224 == NULL) + return MEMORY_E; + #endif + + if ((ret = wc_InitSha224(sha224)) != 0) { + WOLFSSL_MSG("InitSha224 failed"); + } + else { + if ((ret = wc_Sha224Update(sha224, data, len)) != 0) { + WOLFSSL_MSG("Sha224Update failed"); + } + else if ((ret = wc_Sha224Final(sha224, hash)) != 0) { + WOLFSSL_MSG("Sha224Final failed"); + } + wc_Sha224Free(sha224); + } + + #ifdef WOLFSSL_SMALL_STACK + XFREE(sha224, NULL, DYNAMIC_TYPE_TMP_BUFFER); + #endif + + return ret; +} +#endif /* WOLFSSL_SHA224 */ + +#if !defined(NO_SHA256) + int wc_Sha256Hash(const byte* data, word32 len, byte* hash) + { + int ret = 0; + #ifdef WOLFSSL_SMALL_STACK + wc_Sha256* sha256; + #else + wc_Sha256 sha256[1]; + #endif + + #ifdef WOLFSSL_SMALL_STACK + sha256 = (wc_Sha256*)XMALLOC(sizeof(wc_Sha256), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (sha256 == NULL) + return MEMORY_E; + #endif + + if ((ret = wc_InitSha256(sha256)) != 0) { + WOLFSSL_MSG("InitSha256 failed"); + } + else { + if ((ret = wc_Sha256Update(sha256, data, len)) != 0) { + WOLFSSL_MSG("Sha256Update failed"); + } + else if ((ret = wc_Sha256Final(sha256, hash)) != 0) { + WOLFSSL_MSG("Sha256Final failed"); + } + wc_Sha256Free(sha256); + } + + + #ifdef WOLFSSL_SMALL_STACK + XFREE(sha256, NULL, DYNAMIC_TYPE_TMP_BUFFER); + #endif + + return ret; + } +#endif /* !NO_SHA256 */ + +#endif /* !defined(WOLFSSL_TI_HASH) */ + + +#if defined(WOLFSSL_SHA512) + int wc_Sha512Hash(const byte* data, word32 len, byte* hash) + { + int ret = 0; + #ifdef WOLFSSL_SMALL_STACK + wc_Sha512* sha512; + #else + wc_Sha512 sha512[1]; + #endif + + #ifdef WOLFSSL_SMALL_STACK + sha512 = (wc_Sha512*)XMALLOC(sizeof(wc_Sha512), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (sha512 == NULL) + return MEMORY_E; + #endif + + if ((ret = wc_InitSha512(sha512)) != 0) { + WOLFSSL_MSG("InitSha512 failed"); + } + else { + if ((ret = wc_Sha512Update(sha512, data, len)) != 0) { + WOLFSSL_MSG("Sha512Update failed"); + } + else if ((ret = wc_Sha512Final(sha512, hash)) != 0) { + WOLFSSL_MSG("Sha512Final failed"); + } + wc_Sha512Free(sha512); + } + + #ifdef WOLFSSL_SMALL_STACK + XFREE(sha512, NULL, DYNAMIC_TYPE_TMP_BUFFER); + #endif + + return ret; + } +#endif /* WOLFSSL_SHA512 */ + +#if defined(WOLFSSL_SHA384) + int wc_Sha384Hash(const byte* data, word32 len, byte* hash) + { + int ret = 0; + #ifdef WOLFSSL_SMALL_STACK + wc_Sha384* sha384; + #else + wc_Sha384 sha384[1]; + #endif + + #ifdef WOLFSSL_SMALL_STACK + sha384 = (wc_Sha384*)XMALLOC(sizeof(wc_Sha384), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (sha384 == NULL) + return MEMORY_E; + #endif + + if ((ret = wc_InitSha384(sha384)) != 0) { + WOLFSSL_MSG("InitSha384 failed"); + } + else { + if ((ret = wc_Sha384Update(sha384, data, len)) != 0) { + WOLFSSL_MSG("Sha384Update failed"); + } + else if ((ret = wc_Sha384Final(sha384, hash)) != 0) { + WOLFSSL_MSG("Sha384Final failed"); + } + wc_Sha384Free(sha384); + } + + #ifdef WOLFSSL_SMALL_STACK + XFREE(sha384, NULL, DYNAMIC_TYPE_TMP_BUFFER); + #endif + + return ret; + } +#endif /* WOLFSSL_SHA384 */ + +#if defined(WOLFSSL_SHA3) +#if !defined(WOLFSSL_NOSHA3_224) + int wc_Sha3_224Hash(const byte* data, word32 len, byte* hash) + { + int ret = 0; + #ifdef WOLFSSL_SMALL_STACK + wc_Sha3* sha3; + #else + wc_Sha3 sha3[1]; + #endif + + #ifdef WOLFSSL_SMALL_STACK + sha3 = (wc_Sha3*)XMALLOC(sizeof(wc_Sha3), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (sha3 == NULL) + return MEMORY_E; + #endif + + if ((ret = wc_InitSha3_224(sha3, NULL, INVALID_DEVID)) != 0) { + WOLFSSL_MSG("InitSha3_224 failed"); + } + else { + if ((ret = wc_Sha3_224_Update(sha3, data, len)) != 0) { + WOLFSSL_MSG("Sha3_224_Update failed"); + } + else if ((ret = wc_Sha3_224_Final(sha3, hash)) != 0) { + WOLFSSL_MSG("Sha3_224_Final failed"); + } + wc_Sha3_224_Free(sha3); + } + + #ifdef WOLFSSL_SMALL_STACK + XFREE(sha3, NULL, DYNAMIC_TYPE_TMP_BUFFER); + #endif + + return ret; + } +#endif /* !WOLFSSL_NOSHA3_224 */ + +#if !defined(WOLFSSL_NOSHA3_256) + int wc_Sha3_256Hash(const byte* data, word32 len, byte* hash) + { + int ret = 0; + #ifdef WOLFSSL_SMALL_STACK + wc_Sha3* sha3; + #else + wc_Sha3 sha3[1]; + #endif + + #ifdef WOLFSSL_SMALL_STACK + sha3 = (wc_Sha3*)XMALLOC(sizeof(wc_Sha3), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (sha3 == NULL) + return MEMORY_E; + #endif + + if ((ret = wc_InitSha3_256(sha3, NULL, INVALID_DEVID)) != 0) { + WOLFSSL_MSG("InitSha3_256 failed"); + } + else { + if ((ret = wc_Sha3_256_Update(sha3, data, len)) != 0) { + WOLFSSL_MSG("Sha3_256_Update failed"); + } + else if ((ret = wc_Sha3_256_Final(sha3, hash)) != 0) { + WOLFSSL_MSG("Sha3_256_Final failed"); + } + wc_Sha3_256_Free(sha3); + } + + #ifdef WOLFSSL_SMALL_STACK + XFREE(sha3, NULL, DYNAMIC_TYPE_TMP_BUFFER); + #endif + + return ret; + } +#endif /* !WOLFSSL_NOSHA3_256 */ + +#if !defined(WOLFSSL_NOSHA3_384) + int wc_Sha3_384Hash(const byte* data, word32 len, byte* hash) + { + int ret = 0; + #ifdef WOLFSSL_SMALL_STACK + wc_Sha3* sha3; + #else + wc_Sha3 sha3[1]; + #endif + + #ifdef WOLFSSL_SMALL_STACK + sha3 = (wc_Sha3*)XMALLOC(sizeof(wc_Sha3), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (sha3 == NULL) + return MEMORY_E; + #endif + + if ((ret = wc_InitSha3_384(sha3, NULL, INVALID_DEVID)) != 0) { + WOLFSSL_MSG("InitSha3_384 failed"); + } + else { + if ((ret = wc_Sha3_384_Update(sha3, data, len)) != 0) { + WOLFSSL_MSG("Sha3_384_Update failed"); + } + else if ((ret = wc_Sha3_384_Final(sha3, hash)) != 0) { + WOLFSSL_MSG("Sha3_384_Final failed"); + } + wc_Sha3_384_Free(sha3); + } + + #ifdef WOLFSSL_SMALL_STACK + XFREE(sha3, NULL, DYNAMIC_TYPE_TMP_BUFFER); + #endif + + return ret; + } +#endif /* !WOLFSSL_NOSHA3_384 */ + +#if !defined(WOLFSSL_NOSHA3_512) + int wc_Sha3_512Hash(const byte* data, word32 len, byte* hash) + { + int ret = 0; + #ifdef WOLFSSL_SMALL_STACK + wc_Sha3* sha3; + #else + wc_Sha3 sha3[1]; + #endif + + #ifdef WOLFSSL_SMALL_STACK + sha3 = (wc_Sha3*)XMALLOC(sizeof(wc_Sha3), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (sha3 == NULL) + return MEMORY_E; + #endif + + if ((ret = wc_InitSha3_512(sha3, NULL, INVALID_DEVID)) != 0) { + WOLFSSL_MSG("InitSha3_512 failed"); + } + else { + if ((ret = wc_Sha3_512_Update(sha3, data, len)) != 0) { + WOLFSSL_MSG("Sha3_512_Update failed"); + } + else if ((ret = wc_Sha3_512_Final(sha3, hash)) != 0) { + WOLFSSL_MSG("Sha3_512_Final failed"); + } + wc_Sha3_512_Free(sha3); + } + + #ifdef WOLFSSL_SMALL_STACK + XFREE(sha3, NULL, DYNAMIC_TYPE_TMP_BUFFER); + #endif + + return ret; + } +#endif /* !WOLFSSL_NOSHA3_512 */ + +#if defined(WOLFSSL_SHAKE256) && !defined(WOLFSSL_NO_SHAKE256) + int wc_Shake256Hash(const byte* data, word32 len, byte* hash, + word32 hashLen) + { + int ret = 0; + #ifdef WOLFSSL_SMALL_STACK + wc_Shake* shake; + #else + wc_Shake shake[1]; + #endif + + #ifdef WOLFSSL_SMALL_STACK + shake = (wc_Shake*)XMALLOC(sizeof(wc_Shake), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (shake == NULL) + return MEMORY_E; + #endif + + if ((ret = wc_InitShake256(shake, NULL, INVALID_DEVID)) != 0) { + WOLFSSL_MSG("InitShake256 failed"); + } + else { + if ((ret = wc_Shake256_Update(shake, data, len)) != 0) { + WOLFSSL_MSG("Shake256_Update failed"); + } + else if ((ret = wc_Shake256_Final(shake, hash, hashLen)) != 0) { + WOLFSSL_MSG("Shake256_Final failed"); + } + wc_Shake256_Free(shake); + } + + #ifdef WOLFSSL_SMALL_STACK + XFREE(shake, NULL, DYNAMIC_TYPE_TMP_BUFFER); + #endif + + return ret; + } +#endif /* WOLFSSL_SHAKE_256 && !WOLFSSL_NO_SHAKE256 */ +#endif /* WOLFSSL_SHA3 */ + +#endif /* !NO_HASH_WRAPPER */ + +#ifdef WOLFSSL_HAVE_PRF + +#ifdef WOLFSSL_SHA384 + #define P_HASH_MAX_SIZE WC_SHA384_DIGEST_SIZE +#else + #define P_HASH_MAX_SIZE WC_SHA256_DIGEST_SIZE +#endif + +/* Pseudo Random Function for MD5, SHA-1, SHA-256, or SHA-384 */ +int wc_PRF(byte* result, word32 resLen, const byte* secret, + word32 secLen, const byte* seed, word32 seedLen, int hash, + void* heap, int devId) +{ + word32 len = P_HASH_MAX_SIZE; + word32 times; + word32 lastLen; + word32 lastTime; + word32 i; + word32 idx = 0; + int ret = 0; +#ifdef WOLFSSL_SMALL_STACK + byte* previous; + byte* current; + Hmac* hmac; +#else + byte previous[P_HASH_MAX_SIZE]; /* max size */ + byte current[P_HASH_MAX_SIZE]; /* max size */ + Hmac hmac[1]; +#endif + +#ifdef WOLFSSL_SMALL_STACK + previous = (byte*)XMALLOC(P_HASH_MAX_SIZE, heap, DYNAMIC_TYPE_DIGEST); + current = (byte*)XMALLOC(P_HASH_MAX_SIZE, heap, DYNAMIC_TYPE_DIGEST); + hmac = (Hmac*)XMALLOC(sizeof(Hmac), heap, DYNAMIC_TYPE_HMAC); + + if (previous == NULL || current == NULL || hmac == NULL) { + if (previous) XFREE(previous, heap, DYNAMIC_TYPE_DIGEST); + if (current) XFREE(current, heap, DYNAMIC_TYPE_DIGEST); + if (hmac) XFREE(hmac, heap, DYNAMIC_TYPE_HMAC); + + return MEMORY_E; + } +#endif + + switch (hash) { + #ifndef NO_MD5 + case md5_mac: + hash = WC_MD5; + len = WC_MD5_DIGEST_SIZE; + break; + #endif + + #ifndef NO_SHA256 + case sha256_mac: + hash = WC_SHA256; + len = WC_SHA256_DIGEST_SIZE; + break; + #endif + + #ifdef WOLFSSL_SHA384 + case sha384_mac: + hash = WC_SHA384; + len = WC_SHA384_DIGEST_SIZE; + break; + #endif + + #ifndef NO_SHA + case sha_mac: + default: + hash = WC_SHA; + len = WC_SHA_DIGEST_SIZE; + break; + #endif + } + + times = resLen / len; + lastLen = resLen % len; + + if (lastLen) + times += 1; + + lastTime = times - 1; + + ret = wc_HmacInit(hmac, heap, devId); + if (ret == 0) { + ret = wc_HmacSetKey(hmac, hash, secret, secLen); + if (ret == 0) + ret = wc_HmacUpdate(hmac, seed, seedLen); /* A0 = seed */ + if (ret == 0) + ret = wc_HmacFinal(hmac, previous); /* A1 */ + if (ret == 0) { + for (i = 0; i < times; i++) { + ret = wc_HmacUpdate(hmac, previous, len); + if (ret != 0) + break; + ret = wc_HmacUpdate(hmac, seed, seedLen); + if (ret != 0) + break; + ret = wc_HmacFinal(hmac, current); + if (ret != 0) + break; + + if ((i == lastTime) && lastLen) + XMEMCPY(&result[idx], current, + min(lastLen, P_HASH_MAX_SIZE)); + else { + XMEMCPY(&result[idx], current, len); + idx += len; + ret = wc_HmacUpdate(hmac, previous, len); + if (ret != 0) + break; + ret = wc_HmacFinal(hmac, previous); + if (ret != 0) + break; + } + } + } + wc_HmacFree(hmac); + } + + ForceZero(previous, P_HASH_MAX_SIZE); + ForceZero(current, P_HASH_MAX_SIZE); + ForceZero(hmac, sizeof(Hmac)); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(previous, heap, DYNAMIC_TYPE_DIGEST); + XFREE(current, heap, DYNAMIC_TYPE_DIGEST); + XFREE(hmac, heap, DYNAMIC_TYPE_HMAC); +#endif + + return ret; +} +#undef P_HASH_MAX_SIZE + +/* compute PRF (pseudo random function) using SHA1 and MD5 for TLSv1 */ +int wc_PRF_TLSv1(byte* digest, word32 digLen, const byte* secret, + word32 secLen, const byte* label, word32 labLen, + const byte* seed, word32 seedLen, void* heap, int devId) +{ + int ret = 0; + word32 half = (secLen + 1) / 2; + +#ifdef WOLFSSL_SMALL_STACK + byte* md5_half; + byte* sha_half; + byte* md5_result; + byte* sha_result; +#else + byte md5_half[MAX_PRF_HALF]; /* half is real size */ + byte sha_half[MAX_PRF_HALF]; /* half is real size */ + byte md5_result[MAX_PRF_DIG]; /* digLen is real size */ + byte sha_result[MAX_PRF_DIG]; /* digLen is real size */ +#endif +#if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_ASYNC_NO_HASH) + DECLARE_VAR(labelSeed, byte, MAX_PRF_LABSEED, heap); + if (labelSeed == NULL) + return MEMORY_E; +#else + byte labelSeed[MAX_PRF_LABSEED]; +#endif + + if (half > MAX_PRF_HALF || + labLen + seedLen > MAX_PRF_LABSEED || + digLen > MAX_PRF_DIG) + { + #if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_ASYNC_NO_HASH) + FREE_VAR(labelSeed, heap); + #endif + return BUFFER_E; + } + +#ifdef WOLFSSL_SMALL_STACK + md5_half = (byte*)XMALLOC(MAX_PRF_HALF, heap, DYNAMIC_TYPE_DIGEST); + sha_half = (byte*)XMALLOC(MAX_PRF_HALF, heap, DYNAMIC_TYPE_DIGEST); + md5_result = (byte*)XMALLOC(MAX_PRF_DIG, heap, DYNAMIC_TYPE_DIGEST); + sha_result = (byte*)XMALLOC(MAX_PRF_DIG, heap, DYNAMIC_TYPE_DIGEST); + + if (md5_half == NULL || sha_half == NULL || md5_result == NULL || + sha_result == NULL) { + if (md5_half) XFREE(md5_half, heap, DYNAMIC_TYPE_DIGEST); + if (sha_half) XFREE(sha_half, heap, DYNAMIC_TYPE_DIGEST); + if (md5_result) XFREE(md5_result, heap, DYNAMIC_TYPE_DIGEST); + if (sha_result) XFREE(sha_result, heap, DYNAMIC_TYPE_DIGEST); + #if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_ASYNC_NO_HASH) + FREE_VAR(labelSeed, heap); + #endif + + return MEMORY_E; + } +#endif + + XMEMSET(md5_result, 0, digLen); + XMEMSET(sha_result, 0, digLen); + + XMEMCPY(md5_half, secret, half); + XMEMCPY(sha_half, secret + half - secLen % 2, half); + + XMEMCPY(labelSeed, label, labLen); + XMEMCPY(labelSeed + labLen, seed, seedLen); + + if ((ret = wc_PRF(md5_result, digLen, md5_half, half, labelSeed, + labLen + seedLen, md5_mac, heap, devId)) == 0) { + if ((ret = wc_PRF(sha_result, digLen, sha_half, half, labelSeed, + labLen + seedLen, sha_mac, heap, devId)) == 0) { + /* calculate XOR for TLSv1 PRF */ + XMEMCPY(digest, md5_result, digLen); + xorbuf(digest, sha_result, digLen); + } + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(md5_half, heap, DYNAMIC_TYPE_DIGEST); + XFREE(sha_half, heap, DYNAMIC_TYPE_DIGEST); + XFREE(md5_result, heap, DYNAMIC_TYPE_DIGEST); + XFREE(sha_result, heap, DYNAMIC_TYPE_DIGEST); +#endif + +#if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_ASYNC_NO_HASH) + FREE_VAR(labelSeed, heap); +#endif + + return ret; +} + +/* Wrapper for TLS 1.2 and TLSv1 cases to calculate PRF */ +/* In TLS 1.2 case call straight thru to wc_PRF */ +int wc_PRF_TLS(byte* digest, word32 digLen, const byte* secret, word32 secLen, + const byte* label, word32 labLen, const byte* seed, word32 seedLen, + int useAtLeastSha256, int hash_type, void* heap, int devId) +{ + int ret = 0; + + if (useAtLeastSha256) { + #if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_ASYNC_NO_HASH) + DECLARE_VAR(labelSeed, byte, MAX_PRF_LABSEED, heap); + if (labelSeed == NULL) + return MEMORY_E; + #else + byte labelSeed[MAX_PRF_LABSEED]; + #endif + + if (labLen + seedLen > MAX_PRF_LABSEED) + return BUFFER_E; + + XMEMCPY(labelSeed, label, labLen); + XMEMCPY(labelSeed + labLen, seed, seedLen); + + /* If a cipher suite wants an algorithm better than sha256, it + * should use better. */ + if (hash_type < sha256_mac || hash_type == blake2b_mac) + hash_type = sha256_mac; + /* compute PRF for MD5, SHA-1, SHA-256, or SHA-384 for TLSv1.2 PRF */ + ret = wc_PRF(digest, digLen, secret, secLen, labelSeed, + labLen + seedLen, hash_type, heap, devId); + + #if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_ASYNC_NO_HASH) + FREE_VAR(labelSeed, heap); + #endif + } +#ifndef NO_OLD_TLS + else { + /* compute TLSv1 PRF (pseudo random function using HMAC) */ + ret = wc_PRF_TLSv1(digest, digLen, secret, secLen, label, labLen, seed, + seedLen, heap, devId); + } +#endif + + return ret; +} +#endif /* WOLFSSL_HAVE_PRF */ diff --git a/client/wolfssl/wolfcrypt/src/hc128.c b/client/wolfssl/wolfcrypt/src/hc128.c new file mode 100644 index 0000000..96f02d1 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/hc128.c @@ -0,0 +1,430 @@ +/* hc128.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef HAVE_HC128 + +#include +#include +#include +#ifdef NO_INLINE + #include + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + + +#ifdef BIG_ENDIAN_ORDER + #define LITTLE32(x) ByteReverseWord32(x) +#else + #define LITTLE32(x) (x) +#endif + + +/*h1 function*/ +#define h1(ctx, x, y) { \ + byte a,c; \ + a = (byte) (x); \ + c = (byte) ((x) >> 16); \ + y = (ctx->T[512+a])+(ctx->T[512+256+c]); \ +} + +/*h2 function*/ +#define h2(ctx, x, y) { \ + byte a,c; \ + a = (byte) (x); \ + c = (byte) ((x) >> 16); \ + y = (ctx->T[a])+(ctx->T[256+c]); \ +} + +/*one step of HC-128, update P and generate 32 bits keystream*/ +#define step_P(ctx,u,v,a,b,c,d,n){ \ + word32 tem0,tem1,tem2,tem3; \ + h1((ctx),(ctx->X[(d)]),tem3); \ + tem0 = rotrFixed((ctx->T[(v)]),23); \ + tem1 = rotrFixed((ctx->X[(c)]),10); \ + tem2 = rotrFixed((ctx->X[(b)]),8); \ + (ctx->T[(u)]) += tem2+(tem0 ^ tem1); \ + (ctx->X[(a)]) = (ctx->T[(u)]); \ + (n) = tem3 ^ (ctx->T[(u)]) ; \ +} + +/*one step of HC-128, update Q and generate 32 bits keystream*/ +#define step_Q(ctx,u,v,a,b,c,d,n){ \ + word32 tem0,tem1,tem2,tem3; \ + h2((ctx),(ctx->Y[(d)]),tem3); \ + tem0 = rotrFixed((ctx->T[(v)]),(32-23)); \ + tem1 = rotrFixed((ctx->Y[(c)]),(32-10)); \ + tem2 = rotrFixed((ctx->Y[(b)]),(32-8)); \ + (ctx->T[(u)]) += tem2 + (tem0 ^ tem1); \ + (ctx->Y[(a)]) = (ctx->T[(u)]); \ + (n) = tem3 ^ (ctx->T[(u)]) ; \ +} + +/*16 steps of HC-128, generate 512 bits keystream*/ +static void generate_keystream(HC128* ctx, word32* keystream) +{ + word32 cc,dd; + cc = ctx->counter1024 & 0x1ff; + dd = (cc+16)&0x1ff; + + if (ctx->counter1024 < 512) + { + ctx->counter1024 = (ctx->counter1024 + 16) & 0x3ff; + step_P(ctx, cc+0, cc+1, 0, 6, 13,4, keystream[0]); + step_P(ctx, cc+1, cc+2, 1, 7, 14,5, keystream[1]); + step_P(ctx, cc+2, cc+3, 2, 8, 15,6, keystream[2]); + step_P(ctx, cc+3, cc+4, 3, 9, 0, 7, keystream[3]); + step_P(ctx, cc+4, cc+5, 4, 10,1, 8, keystream[4]); + step_P(ctx, cc+5, cc+6, 5, 11,2, 9, keystream[5]); + step_P(ctx, cc+6, cc+7, 6, 12,3, 10,keystream[6]); + step_P(ctx, cc+7, cc+8, 7, 13,4, 11,keystream[7]); + step_P(ctx, cc+8, cc+9, 8, 14,5, 12,keystream[8]); + step_P(ctx, cc+9, cc+10,9, 15,6, 13,keystream[9]); + step_P(ctx, cc+10,cc+11,10,0, 7, 14,keystream[10]); + step_P(ctx, cc+11,cc+12,11,1, 8, 15,keystream[11]); + step_P(ctx, cc+12,cc+13,12,2, 9, 0, keystream[12]); + step_P(ctx, cc+13,cc+14,13,3, 10,1, keystream[13]); + step_P(ctx, cc+14,cc+15,14,4, 11,2, keystream[14]); + step_P(ctx, cc+15,dd+0, 15,5, 12,3, keystream[15]); + } + else + { + ctx->counter1024 = (ctx->counter1024 + 16) & 0x3ff; + step_Q(ctx, 512+cc+0, 512+cc+1, 0, 6, 13,4, keystream[0]); + step_Q(ctx, 512+cc+1, 512+cc+2, 1, 7, 14,5, keystream[1]); + step_Q(ctx, 512+cc+2, 512+cc+3, 2, 8, 15,6, keystream[2]); + step_Q(ctx, 512+cc+3, 512+cc+4, 3, 9, 0, 7, keystream[3]); + step_Q(ctx, 512+cc+4, 512+cc+5, 4, 10,1, 8, keystream[4]); + step_Q(ctx, 512+cc+5, 512+cc+6, 5, 11,2, 9, keystream[5]); + step_Q(ctx, 512+cc+6, 512+cc+7, 6, 12,3, 10,keystream[6]); + step_Q(ctx, 512+cc+7, 512+cc+8, 7, 13,4, 11,keystream[7]); + step_Q(ctx, 512+cc+8, 512+cc+9, 8, 14,5, 12,keystream[8]); + step_Q(ctx, 512+cc+9, 512+cc+10,9, 15,6, 13,keystream[9]); + step_Q(ctx, 512+cc+10,512+cc+11,10,0, 7, 14,keystream[10]); + step_Q(ctx, 512+cc+11,512+cc+12,11,1, 8, 15,keystream[11]); + step_Q(ctx, 512+cc+12,512+cc+13,12,2, 9, 0, keystream[12]); + step_Q(ctx, 512+cc+13,512+cc+14,13,3, 10,1, keystream[13]); + step_Q(ctx, 512+cc+14,512+cc+15,14,4, 11,2, keystream[14]); + step_Q(ctx, 512+cc+15,512+dd+0, 15,5, 12,3, keystream[15]); + } +} + + +/* The following defines the initialization functions */ +#define f1(x) (rotrFixed((x),7) ^ rotrFixed((x),18) ^ ((x) >> 3)) +#define f2(x) (rotrFixed((x),17) ^ rotrFixed((x),19) ^ ((x) >> 10)) + +/*update table P*/ +#define update_P(ctx,u,v,a,b,c,d){ \ + word32 tem0,tem1,tem2,tem3; \ + tem0 = rotrFixed((ctx->T[(v)]),23); \ + tem1 = rotrFixed((ctx->X[(c)]),10); \ + tem2 = rotrFixed((ctx->X[(b)]),8); \ + h1((ctx),(ctx->X[(d)]),tem3); \ + (ctx->T[(u)]) = ((ctx->T[(u)]) + tem2+(tem0^tem1)) ^ tem3; \ + (ctx->X[(a)]) = (ctx->T[(u)]); \ +} + +/*update table Q*/ +#define update_Q(ctx,u,v,a,b,c,d){ \ + word32 tem0,tem1,tem2,tem3; \ + tem0 = rotrFixed((ctx->T[(v)]),(32-23)); \ + tem1 = rotrFixed((ctx->Y[(c)]),(32-10)); \ + tem2 = rotrFixed((ctx->Y[(b)]),(32-8)); \ + h2((ctx),(ctx->Y[(d)]),tem3); \ + (ctx->T[(u)]) = ((ctx->T[(u)]) + tem2+(tem0^tem1)) ^ tem3; \ + (ctx->Y[(a)]) = (ctx->T[(u)]); \ +} + +/*16 steps of HC-128, without generating keystream, */ +/*but use the outputs to update P and Q*/ +static void setup_update(HC128* ctx) /*each time 16 steps*/ +{ + word32 cc,dd; + cc = ctx->counter1024 & 0x1ff; + dd = (cc+16)&0x1ff; + + if (ctx->counter1024 < 512) + { + ctx->counter1024 = (ctx->counter1024 + 16) & 0x3ff; + update_P(ctx, cc+0, cc+1, 0, 6, 13, 4); + update_P(ctx, cc+1, cc+2, 1, 7, 14, 5); + update_P(ctx, cc+2, cc+3, 2, 8, 15, 6); + update_P(ctx, cc+3, cc+4, 3, 9, 0, 7); + update_P(ctx, cc+4, cc+5, 4, 10,1, 8); + update_P(ctx, cc+5, cc+6, 5, 11,2, 9); + update_P(ctx, cc+6, cc+7, 6, 12,3, 10); + update_P(ctx, cc+7, cc+8, 7, 13,4, 11); + update_P(ctx, cc+8, cc+9, 8, 14,5, 12); + update_P(ctx, cc+9, cc+10,9, 15,6, 13); + update_P(ctx, cc+10,cc+11,10,0, 7, 14); + update_P(ctx, cc+11,cc+12,11,1, 8, 15); + update_P(ctx, cc+12,cc+13,12,2, 9, 0); + update_P(ctx, cc+13,cc+14,13,3, 10, 1); + update_P(ctx, cc+14,cc+15,14,4, 11, 2); + update_P(ctx, cc+15,dd+0, 15,5, 12, 3); + } + else + { + ctx->counter1024 = (ctx->counter1024 + 16) & 0x3ff; + update_Q(ctx, 512+cc+0, 512+cc+1, 0, 6, 13, 4); + update_Q(ctx, 512+cc+1, 512+cc+2, 1, 7, 14, 5); + update_Q(ctx, 512+cc+2, 512+cc+3, 2, 8, 15, 6); + update_Q(ctx, 512+cc+3, 512+cc+4, 3, 9, 0, 7); + update_Q(ctx, 512+cc+4, 512+cc+5, 4, 10,1, 8); + update_Q(ctx, 512+cc+5, 512+cc+6, 5, 11,2, 9); + update_Q(ctx, 512+cc+6, 512+cc+7, 6, 12,3, 10); + update_Q(ctx, 512+cc+7, 512+cc+8, 7, 13,4, 11); + update_Q(ctx, 512+cc+8, 512+cc+9, 8, 14,5, 12); + update_Q(ctx, 512+cc+9, 512+cc+10,9, 15,6, 13); + update_Q(ctx, 512+cc+10,512+cc+11,10,0, 7, 14); + update_Q(ctx, 512+cc+11,512+cc+12,11,1, 8, 15); + update_Q(ctx, 512+cc+12,512+cc+13,12,2, 9, 0); + update_Q(ctx, 512+cc+13,512+cc+14,13,3, 10, 1); + update_Q(ctx, 512+cc+14,512+cc+15,14,4, 11, 2); + update_Q(ctx, 512+cc+15,512+dd+0, 15,5, 12, 3); + } +} + + +/* for the 128-bit key: key[0]...key[15] +* key[0] is the least significant byte of ctx->key[0] (K_0); +* key[3] is the most significant byte of ctx->key[0] (K_0); +* ... +* key[12] is the least significant byte of ctx->key[3] (K_3) +* key[15] is the most significant byte of ctx->key[3] (K_3) +* +* for the 128-bit iv: iv[0]...iv[15] +* iv[0] is the least significant byte of ctx->iv[0] (IV_0); +* iv[3] is the most significant byte of ctx->iv[0] (IV_0); +* ... +* iv[12] is the least significant byte of ctx->iv[3] (IV_3) +* iv[15] is the most significant byte of ctx->iv[3] (IV_3) +*/ + + + +static void Hc128_SetIV(HC128* ctx, const byte* inIv) +{ + word32 i; + word32 iv[4]; + + if (inIv) + XMEMCPY(iv, inIv, sizeof(iv)); + else + XMEMSET(iv, 0, sizeof(iv)); + + for (i = 0; i < (128 >> 5); i++) + ctx->iv[i] = LITTLE32(iv[i]); + + for (; i < 8; i++) ctx->iv[i] = ctx->iv[i-4]; + + /* expand the key and IV into the table T */ + /* (expand the key and IV into the table P and Q) */ + + for (i = 0; i < 8; i++) ctx->T[i] = ctx->key[i]; + for (i = 8; i < 16; i++) ctx->T[i] = ctx->iv[i-8]; + + for (i = 16; i < (256+16); i++) + ctx->T[i] = f2(ctx->T[i-2]) + ctx->T[i-7] + f1(ctx->T[i-15]) + + ctx->T[i-16]+i; + + for (i = 0; i < 16; i++) ctx->T[i] = ctx->T[256+i]; + + for (i = 16; i < 1024; i++) + ctx->T[i] = f2(ctx->T[i-2]) + ctx->T[i-7] + f1(ctx->T[i-15]) + + ctx->T[i-16]+256+i; + + /* initialize counter1024, X and Y */ + ctx->counter1024 = 0; + for (i = 0; i < 16; i++) ctx->X[i] = ctx->T[512-16+i]; + for (i = 0; i < 16; i++) ctx->Y[i] = ctx->T[512+512-16+i]; + + /* run the cipher 1024 steps before generating the output */ + for (i = 0; i < 64; i++) setup_update(ctx); +} + + +static WC_INLINE int DoKey(HC128* ctx, const byte* key, const byte* iv) +{ + word32 i; + + /* Key size in bits 128 */ + for (i = 0; i < (128 >> 5); i++) + ctx->key[i] = LITTLE32(((word32*)key)[i]); + + for ( ; i < 8 ; i++) ctx->key[i] = ctx->key[i-4]; + + Hc128_SetIV(ctx, iv); + + return 0; +} + + +int wc_Hc128_SetHeap(HC128* ctx, void* heap) +{ + if (ctx == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef XSTREAM_ALIGN + ctx->heap = heap; +#endif + + (void)heap; + return 0; +} + +/* Key setup */ +int wc_Hc128_SetKey(HC128* ctx, const byte* key, const byte* iv) +{ + if (ctx == NULL || key == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef XSTREAM_ALIGN + /* default heap to NULL or heap test value */ + #ifdef WOLFSSL_HEAP_TEST + ctx->heap = (void*)WOLFSSL_HEAP_TEST; + #else + ctx->heap = NULL; + #endif /* WOLFSSL_HEAP_TEST */ + + if ((wolfssl_word)key % 4) { + int alignKey[4]; + + /* iv gets aligned in SetIV */ + WOLFSSL_MSG("Hc128SetKey unaligned key"); + + XMEMCPY(alignKey, key, sizeof(alignKey)); + + return DoKey(ctx, (const byte*)alignKey, iv); + } +#endif /* XSTREAM_ALIGN */ + + return DoKey(ctx, key, iv); +} + + + +/* The following defines the encryption of data stream */ +static WC_INLINE int DoProcess(HC128* ctx, byte* output, const byte* input, + word32 msglen) +{ + word32 i, keystream[16]; + + for ( ; msglen >= 64; msglen -= 64, input += 64, output += 64) + { + generate_keystream(ctx, keystream); + + /* unroll loop */ + ((word32*)output)[0] = ((word32*)input)[0] ^ LITTLE32(keystream[0]); + ((word32*)output)[1] = ((word32*)input)[1] ^ LITTLE32(keystream[1]); + ((word32*)output)[2] = ((word32*)input)[2] ^ LITTLE32(keystream[2]); + ((word32*)output)[3] = ((word32*)input)[3] ^ LITTLE32(keystream[3]); + ((word32*)output)[4] = ((word32*)input)[4] ^ LITTLE32(keystream[4]); + ((word32*)output)[5] = ((word32*)input)[5] ^ LITTLE32(keystream[5]); + ((word32*)output)[6] = ((word32*)input)[6] ^ LITTLE32(keystream[6]); + ((word32*)output)[7] = ((word32*)input)[7] ^ LITTLE32(keystream[7]); + ((word32*)output)[8] = ((word32*)input)[8] ^ LITTLE32(keystream[8]); + ((word32*)output)[9] = ((word32*)input)[9] ^ LITTLE32(keystream[9]); + ((word32*)output)[10] = ((word32*)input)[10] ^ LITTLE32(keystream[10]); + ((word32*)output)[11] = ((word32*)input)[11] ^ LITTLE32(keystream[11]); + ((word32*)output)[12] = ((word32*)input)[12] ^ LITTLE32(keystream[12]); + ((word32*)output)[13] = ((word32*)input)[13] ^ LITTLE32(keystream[13]); + ((word32*)output)[14] = ((word32*)input)[14] ^ LITTLE32(keystream[14]); + ((word32*)output)[15] = ((word32*)input)[15] ^ LITTLE32(keystream[15]); + } + + if (msglen > 0) + { + XMEMSET(keystream, 0, sizeof(keystream)); /* hush the static analysis */ + generate_keystream(ctx, keystream); + +#ifdef BIG_ENDIAN_ORDER + { + word32 wordsLeft = msglen / sizeof(word32); + if (msglen % sizeof(word32)) wordsLeft++; + + ByteReverseWords(keystream, keystream, wordsLeft * sizeof(word32)); + } +#endif + + for (i = 0; i < msglen; i++) + output[i] = input[i] ^ ((byte*)keystream)[i]; + } + + return 0; +} + + +/* Encrypt/decrypt a message of any size */ +int wc_Hc128_Process(HC128* ctx, byte* output, const byte* input, word32 msglen) +{ + if (ctx == NULL || output == NULL || input == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef XSTREAM_ALIGN + if ((wolfssl_word)input % 4 || (wolfssl_word)output % 4) { + #ifndef NO_WOLFSSL_ALLOC_ALIGN + byte* tmp; + WOLFSSL_MSG("Hc128Process unaligned"); + + tmp = (byte*)XMALLOC(msglen, ctx->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (tmp == NULL) return MEMORY_E; + + XMEMCPY(tmp, input, msglen); + DoProcess(ctx, tmp, tmp, msglen); + XMEMCPY(output, tmp, msglen); + + XFREE(tmp, ctx->heap, DYNAMIC_TYPE_TMP_BUFFER); + + return 0; + #else + return BAD_ALIGN_E; + #endif + } +#endif /* XSTREAM_ALIGN */ + + return DoProcess(ctx, output, input, msglen); +} + + +#else /* HAVE_HC128 */ + + +#ifdef _MSC_VER + /* 4206 warning for blank file */ + #pragma warning(disable: 4206) +#endif + + +#endif /* HAVE_HC128 */ diff --git a/client/wolfssl/wolfcrypt/src/hmac.c b/client/wolfssl/wolfcrypt/src/hmac.c new file mode 100644 index 0000000..bcebc1c --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/hmac.c @@ -0,0 +1,1290 @@ +/* hmac.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include +#include + +#ifndef NO_HMAC + +#if defined(HAVE_FIPS) && \ + defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2) + + /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */ + #define FIPS_NO_WRAPPERS + + #ifdef USE_WINDOWS_API + #pragma code_seg(".fipsA$b") + #pragma const_seg(".fipsB$b") + #endif +#endif + +#include + +#ifdef WOLF_CRYPTO_CB + #include +#endif + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + + +/* fips wrapper calls, user can call direct */ +/* If building for old FIPS. */ +#if defined(HAVE_FIPS) && \ + (!defined(HAVE_FIPS_VERSION) || (HAVE_FIPS_VERSION < 2)) + + /* does init */ + int wc_HmacSetKey(Hmac* hmac, int type, const byte* key, word32 keySz) + { + if (hmac == NULL || (key == NULL && keySz != 0) || + !(type == WC_MD5 || type == WC_SHA || type == WC_SHA256 || + type == WC_SHA384 || type == WC_SHA512)) { + return BAD_FUNC_ARG; + } + + return HmacSetKey_fips(hmac, type, key, keySz); + } + int wc_HmacUpdate(Hmac* hmac, const byte* in, word32 sz) + { + if (hmac == NULL || (in == NULL && sz > 0)) { + return BAD_FUNC_ARG; + } + + return HmacUpdate_fips(hmac, in, sz); + } + int wc_HmacFinal(Hmac* hmac, byte* out) + { + if (hmac == NULL) { + return BAD_FUNC_ARG; + } + + return HmacFinal_fips(hmac, out); + } + int wolfSSL_GetHmacMaxSize(void) + { + return CyaSSL_GetHmacMaxSize(); + } + + int wc_HmacInit(Hmac* hmac, void* heap, int devId) + { + (void)hmac; + (void)heap; + (void)devId; + /* FIPS doesn't support: + return HmacInit(hmac, heap, devId); */ + return 0; + } + void wc_HmacFree(Hmac* hmac) + { + (void)hmac; + /* FIPS doesn't support: + HmacFree(hmac); */ + } + + #ifdef HAVE_HKDF + int wc_HKDF(int type, const byte* inKey, word32 inKeySz, + const byte* salt, word32 saltSz, + const byte* info, word32 infoSz, + byte* out, word32 outSz) + { + return HKDF(type, inKey, inKeySz, salt, saltSz, + info, infoSz, out, outSz); + } + #endif /* HAVE_HKDF */ + +#else /* else build without fips, or for new fips */ + + +int wc_HmacSizeByType(int type) +{ + int ret; + + if (!(type == WC_MD5 || type == WC_SHA || + type == WC_SHA224 || type == WC_SHA256 || + type == WC_SHA384 || type == WC_SHA512 || + type == WC_SHA3_224 || type == WC_SHA3_256 || + type == WC_SHA3_384 || type == WC_SHA3_512)) { + return BAD_FUNC_ARG; + } + + switch (type) { + #ifndef NO_MD5 + case WC_MD5: + ret = WC_MD5_DIGEST_SIZE; + break; + #endif /* !NO_MD5 */ + + #ifndef NO_SHA + case WC_SHA: + ret = WC_SHA_DIGEST_SIZE; + break; + #endif /* !NO_SHA */ + + #ifdef WOLFSSL_SHA224 + case WC_SHA224: + ret = WC_SHA224_DIGEST_SIZE; + break; + #endif /* WOLFSSL_SHA224 */ + + #ifndef NO_SHA256 + case WC_SHA256: + ret = WC_SHA256_DIGEST_SIZE; + break; + #endif /* !NO_SHA256 */ + + #ifdef WOLFSSL_SHA384 + case WC_SHA384: + ret = WC_SHA384_DIGEST_SIZE; + break; + #endif /* WOLFSSL_SHA384 */ + #ifdef WOLFSSL_SHA512 + case WC_SHA512: + ret = WC_SHA512_DIGEST_SIZE; + break; + #endif /* WOLFSSL_SHA512 */ + + #ifdef WOLFSSL_SHA3 + case WC_SHA3_224: + ret = WC_SHA3_224_DIGEST_SIZE; + break; + + case WC_SHA3_256: + ret = WC_SHA3_256_DIGEST_SIZE; + break; + + case WC_SHA3_384: + ret = WC_SHA3_384_DIGEST_SIZE; + break; + + case WC_SHA3_512: + ret = WC_SHA3_512_DIGEST_SIZE; + break; + + #endif + + default: + ret = BAD_FUNC_ARG; + break; + } + + return ret; +} + +int _InitHmac(Hmac* hmac, int type, void* heap) +{ + int ret = 0; + + switch (type) { + #ifndef NO_MD5 + case WC_MD5: + ret = wc_InitMd5(&hmac->hash.md5); + break; + #endif /* !NO_MD5 */ + + #ifndef NO_SHA + case WC_SHA: + ret = wc_InitSha(&hmac->hash.sha); + break; + #endif /* !NO_SHA */ + + #ifdef WOLFSSL_SHA224 + case WC_SHA224: + ret = wc_InitSha224(&hmac->hash.sha224); + break; + #endif /* WOLFSSL_SHA224 */ + + #ifndef NO_SHA256 + case WC_SHA256: + ret = wc_InitSha256(&hmac->hash.sha256); + break; + #endif /* !NO_SHA256 */ + + #ifdef WOLFSSL_SHA384 + case WC_SHA384: + ret = wc_InitSha384(&hmac->hash.sha384); + break; + #endif /* WOLFSSL_SHA384 */ + #ifdef WOLFSSL_SHA512 + case WC_SHA512: + ret = wc_InitSha512(&hmac->hash.sha512); + break; + #endif /* WOLFSSL_SHA512 */ + + #ifdef WOLFSSL_SHA3 + #ifndef WOLFSSL_NOSHA3_224 + case WC_SHA3_224: + ret = wc_InitSha3_224(&hmac->hash.sha3, heap, INVALID_DEVID); + break; + #endif + #ifndef WOLFSSL_NOSHA3_256 + case WC_SHA3_256: + ret = wc_InitSha3_256(&hmac->hash.sha3, heap, INVALID_DEVID); + break; + #endif + #ifndef WOLFSSL_NOSHA3_384 + case WC_SHA3_384: + ret = wc_InitSha3_384(&hmac->hash.sha3, heap, INVALID_DEVID); + break; + #endif + #ifndef WOLFSSL_NOSHA3_512 + case WC_SHA3_512: + ret = wc_InitSha3_512(&hmac->hash.sha3, heap, INVALID_DEVID); + break; + #endif + #endif + + default: + ret = BAD_FUNC_ARG; + break; + } + + /* default to NULL heap hint or test value */ +#ifdef WOLFSSL_HEAP_TEST + hmac->heap = (void)WOLFSSL_HEAP_TEST; +#else + hmac->heap = heap; +#endif /* WOLFSSL_HEAP_TEST */ + + return ret; +} + + +int wc_HmacSetKey(Hmac* hmac, int type, const byte* key, word32 length) +{ + byte* ip; + byte* op; + word32 i, hmac_block_size = 0; + int ret = 0; + void* heap = NULL; + + if (hmac == NULL || (key == NULL && length != 0) || + !(type == WC_MD5 || type == WC_SHA || + type == WC_SHA224 || type == WC_SHA256 || + type == WC_SHA384 || type == WC_SHA512 || + type == WC_SHA3_224 || type == WC_SHA3_256 || + type == WC_SHA3_384 || type == WC_SHA3_512)) { + return BAD_FUNC_ARG; + } + +#ifndef HAVE_FIPS + /* if set key has already been run then make sure and free existing */ + /* This is for async and PIC32MZ situations, and just normally OK, + provided the user calls wc_HmacInit() first. That function is not + available in FIPS builds. In current FIPS builds, the hashes are + not allocating resources. */ + if (hmac->macType != WC_HASH_TYPE_NONE) { + wc_HmacFree(hmac); + } +#endif + + hmac->innerHashKeyed = 0; + hmac->macType = (byte)type; + + ret = _InitHmac(hmac, type, heap); + if (ret != 0) + return ret; + +#ifdef HAVE_FIPS + if (length < HMAC_FIPS_MIN_KEY) + return HMAC_MIN_KEYLEN_E; +#endif + +#ifdef WOLF_CRYPTO_CB + hmac->keyRaw = key; /* use buffer directly */ + hmac->keyLen = length; +#endif + + ip = (byte*)hmac->ipad; + op = (byte*)hmac->opad; + + switch (hmac->macType) { + #ifndef NO_MD5 + case WC_MD5: + hmac_block_size = WC_MD5_BLOCK_SIZE; + if (length <= WC_MD5_BLOCK_SIZE) { + if (key != NULL) { + XMEMCPY(ip, key, length); + } + } + else { + ret = wc_Md5Update(&hmac->hash.md5, key, length); + if (ret != 0) + break; + ret = wc_Md5Final(&hmac->hash.md5, ip); + if (ret != 0) + break; + length = WC_MD5_DIGEST_SIZE; + } + break; + #endif /* !NO_MD5 */ + + #ifndef NO_SHA + case WC_SHA: + hmac_block_size = WC_SHA_BLOCK_SIZE; + if (length <= WC_SHA_BLOCK_SIZE) { + if (key != NULL) { + XMEMCPY(ip, key, length); + } + } + else { + ret = wc_ShaUpdate(&hmac->hash.sha, key, length); + if (ret != 0) + break; + ret = wc_ShaFinal(&hmac->hash.sha, ip); + if (ret != 0) + break; + + length = WC_SHA_DIGEST_SIZE; + } + break; + #endif /* !NO_SHA */ + + #ifdef WOLFSSL_SHA224 + case WC_SHA224: + hmac_block_size = WC_SHA224_BLOCK_SIZE; + if (length <= WC_SHA224_BLOCK_SIZE) { + if (key != NULL) { + XMEMCPY(ip, key, length); + } + } + else { + ret = wc_Sha224Update(&hmac->hash.sha224, key, length); + if (ret != 0) + break; + ret = wc_Sha224Final(&hmac->hash.sha224, ip); + if (ret != 0) + break; + + length = WC_SHA224_DIGEST_SIZE; + } + break; + #endif /* WOLFSSL_SHA224 */ + #ifndef NO_SHA256 + case WC_SHA256: + hmac_block_size = WC_SHA256_BLOCK_SIZE; + if (length <= WC_SHA256_BLOCK_SIZE) { + if (key != NULL) { + XMEMCPY(ip, key, length); + } + } + else { + ret = wc_Sha256Update(&hmac->hash.sha256, key, length); + if (ret != 0) + break; + ret = wc_Sha256Final(&hmac->hash.sha256, ip); + if (ret != 0) + break; + + length = WC_SHA256_DIGEST_SIZE; + } + break; + #endif /* !NO_SHA256 */ + + #ifdef WOLFSSL_SHA384 + case WC_SHA384: + hmac_block_size = WC_SHA384_BLOCK_SIZE; + if (length <= WC_SHA384_BLOCK_SIZE) { + if (key != NULL) { + XMEMCPY(ip, key, length); + } + } + else { + ret = wc_Sha384Update(&hmac->hash.sha384, key, length); + if (ret != 0) + break; + ret = wc_Sha384Final(&hmac->hash.sha384, ip); + if (ret != 0) + break; + + length = WC_SHA384_DIGEST_SIZE; + } + break; + #endif /* WOLFSSL_SHA384 */ + #ifdef WOLFSSL_SHA512 + case WC_SHA512: + hmac_block_size = WC_SHA512_BLOCK_SIZE; + if (length <= WC_SHA512_BLOCK_SIZE) { + if (key != NULL) { + XMEMCPY(ip, key, length); + } + } + else { + ret = wc_Sha512Update(&hmac->hash.sha512, key, length); + if (ret != 0) + break; + ret = wc_Sha512Final(&hmac->hash.sha512, ip); + if (ret != 0) + break; + + length = WC_SHA512_DIGEST_SIZE; + } + break; + #endif /* WOLFSSL_SHA512 */ + + #ifdef WOLFSSL_SHA3 + #ifndef WOLFSSL_NOSHA3_224 + case WC_SHA3_224: + hmac_block_size = WC_SHA3_224_BLOCK_SIZE; + if (length <= WC_SHA3_224_BLOCK_SIZE) { + if (key != NULL) { + XMEMCPY(ip, key, length); + } + } + else { + ret = wc_Sha3_224_Update(&hmac->hash.sha3, key, length); + if (ret != 0) + break; + ret = wc_Sha3_224_Final(&hmac->hash.sha3, ip); + if (ret != 0) + break; + + length = WC_SHA3_224_DIGEST_SIZE; + } + break; + #endif + #ifndef WOLFSSL_NOSHA3_256 + case WC_SHA3_256: + hmac_block_size = WC_SHA3_256_BLOCK_SIZE; + if (length <= WC_SHA3_256_BLOCK_SIZE) { + if (key != NULL) { + XMEMCPY(ip, key, length); + } + } + else { + ret = wc_Sha3_256_Update(&hmac->hash.sha3, key, length); + if (ret != 0) + break; + ret = wc_Sha3_256_Final(&hmac->hash.sha3, ip); + if (ret != 0) + break; + + length = WC_SHA3_256_DIGEST_SIZE; + } + break; + #endif + #ifndef WOLFSSL_NOSHA3_384 + case WC_SHA3_384: + hmac_block_size = WC_SHA3_384_BLOCK_SIZE; + if (length <= WC_SHA3_384_BLOCK_SIZE) { + if (key != NULL) { + XMEMCPY(ip, key, length); + } + } + else { + ret = wc_Sha3_384_Update(&hmac->hash.sha3, key, length); + if (ret != 0) + break; + ret = wc_Sha3_384_Final(&hmac->hash.sha3, ip); + if (ret != 0) + break; + + length = WC_SHA3_384_DIGEST_SIZE; + } + break; + #endif + #ifndef WOLFSSL_NOSHA3_512 + case WC_SHA3_512: + hmac_block_size = WC_SHA3_512_BLOCK_SIZE; + if (length <= WC_SHA3_512_BLOCK_SIZE) { + if (key != NULL) { + XMEMCPY(ip, key, length); + } + } + else { + ret = wc_Sha3_512_Update(&hmac->hash.sha3, key, length); + if (ret != 0) + break; + ret = wc_Sha3_512_Final(&hmac->hash.sha3, ip); + if (ret != 0) + break; + + length = WC_SHA3_512_DIGEST_SIZE; + } + break; + #endif + #endif /* WOLFSSL_SHA3 */ + + default: + return BAD_FUNC_ARG; + } + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_HMAC) + if (hmac->asyncDev.marker == WOLFSSL_ASYNC_MARKER_HMAC) { + #if defined(HAVE_INTEL_QA) || defined(HAVE_CAVIUM) + #ifdef HAVE_INTEL_QA + if (IntelQaHmacGetType(hmac->macType, NULL) == 0) + #endif + { + if (length > hmac_block_size) + length = hmac_block_size; + /* update key length */ + hmac->keyLen = (word16)length; + + return ret; + } + /* no need to pad below */ + #endif + } +#endif + + if (ret == 0) { + if (length < hmac_block_size) + XMEMSET(ip + length, 0, hmac_block_size - length); + + for(i = 0; i < hmac_block_size; i++) { + op[i] = ip[i] ^ OPAD; + ip[i] ^= IPAD; + } + } + + return ret; +} + + +static int HmacKeyInnerHash(Hmac* hmac) +{ + int ret = 0; + + switch (hmac->macType) { + #ifndef NO_MD5 + case WC_MD5: + ret = wc_Md5Update(&hmac->hash.md5, (byte*)hmac->ipad, + WC_MD5_BLOCK_SIZE); + break; + #endif /* !NO_MD5 */ + + #ifndef NO_SHA + case WC_SHA: + ret = wc_ShaUpdate(&hmac->hash.sha, (byte*)hmac->ipad, + WC_SHA_BLOCK_SIZE); + break; + #endif /* !NO_SHA */ + + #ifdef WOLFSSL_SHA224 + case WC_SHA224: + ret = wc_Sha224Update(&hmac->hash.sha224, (byte*)hmac->ipad, + WC_SHA224_BLOCK_SIZE); + break; + #endif /* WOLFSSL_SHA224 */ + #ifndef NO_SHA256 + case WC_SHA256: + ret = wc_Sha256Update(&hmac->hash.sha256, (byte*)hmac->ipad, + WC_SHA256_BLOCK_SIZE); + break; + #endif /* !NO_SHA256 */ + + #ifdef WOLFSSL_SHA384 + case WC_SHA384: + ret = wc_Sha384Update(&hmac->hash.sha384, (byte*)hmac->ipad, + WC_SHA384_BLOCK_SIZE); + break; + #endif /* WOLFSSL_SHA384 */ + #ifdef WOLFSSL_SHA512 + case WC_SHA512: + ret = wc_Sha512Update(&hmac->hash.sha512, (byte*)hmac->ipad, + WC_SHA512_BLOCK_SIZE); + break; + #endif /* WOLFSSL_SHA512 */ + + #ifdef WOLFSSL_SHA3 + #ifndef WOLFSSL_NOSHA3_224 + case WC_SHA3_224: + ret = wc_Sha3_224_Update(&hmac->hash.sha3, (byte*)hmac->ipad, + WC_SHA3_224_BLOCK_SIZE); + break; + #endif + #ifndef WOLFSSL_NOSHA3_256 + case WC_SHA3_256: + ret = wc_Sha3_256_Update(&hmac->hash.sha3, (byte*)hmac->ipad, + WC_SHA3_256_BLOCK_SIZE); + break; + #endif + #ifndef WOLFSSL_NOSHA3_384 + case WC_SHA3_384: + ret = wc_Sha3_384_Update(&hmac->hash.sha3, (byte*)hmac->ipad, + WC_SHA3_384_BLOCK_SIZE); + break; + #endif + #ifndef WOLFSSL_NOSHA3_512 + case WC_SHA3_512: + ret = wc_Sha3_512_Update(&hmac->hash.sha3, (byte*)hmac->ipad, + WC_SHA3_512_BLOCK_SIZE); + break; + #endif + #endif /* WOLFSSL_SHA3 */ + + default: + break; + } + + if (ret == 0) + hmac->innerHashKeyed = WC_HMAC_INNER_HASH_KEYED_SW; + + return ret; +} + + +int wc_HmacUpdate(Hmac* hmac, const byte* msg, word32 length) +{ + int ret = 0; + + if (hmac == NULL || (msg == NULL && length > 0)) { + return BAD_FUNC_ARG; + } + +#ifdef WOLF_CRYPTO_CB + if (hmac->devId != INVALID_DEVID) { + ret = wc_CryptoCb_Hmac(hmac, hmac->macType, msg, length, NULL); + if (ret != CRYPTOCB_UNAVAILABLE) + return ret; + /* fall-through when unavailable */ + ret = 0; /* reset error code */ + } +#endif +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_HMAC) + if (hmac->asyncDev.marker == WOLFSSL_ASYNC_MARKER_HMAC) { + #if defined(HAVE_CAVIUM) + return NitroxHmacUpdate(hmac, msg, length); + #elif defined(HAVE_INTEL_QA) + if (IntelQaHmacGetType(hmac->macType, NULL) == 0) { + return IntelQaHmac(&hmac->asyncDev, hmac->macType, + (byte*)hmac->ipad, hmac->keyLen, NULL, msg, length); + } + #endif + } +#endif /* WOLFSSL_ASYNC_CRYPT */ + + if (!hmac->innerHashKeyed) { + ret = HmacKeyInnerHash(hmac); + if (ret != 0) + return ret; + } + + switch (hmac->macType) { + #ifndef NO_MD5 + case WC_MD5: + ret = wc_Md5Update(&hmac->hash.md5, msg, length); + break; + #endif /* !NO_MD5 */ + + #ifndef NO_SHA + case WC_SHA: + ret = wc_ShaUpdate(&hmac->hash.sha, msg, length); + break; + #endif /* !NO_SHA */ + + #ifdef WOLFSSL_SHA224 + case WC_SHA224: + ret = wc_Sha224Update(&hmac->hash.sha224, msg, length); + break; + #endif /* WOLFSSL_SHA224 */ + + #ifndef NO_SHA256 + case WC_SHA256: + ret = wc_Sha256Update(&hmac->hash.sha256, msg, length); + break; + #endif /* !NO_SHA256 */ + + #ifdef WOLFSSL_SHA384 + case WC_SHA384: + ret = wc_Sha384Update(&hmac->hash.sha384, msg, length); + break; + #endif /* WOLFSSL_SHA384 */ + #ifdef WOLFSSL_SHA512 + case WC_SHA512: + ret = wc_Sha512Update(&hmac->hash.sha512, msg, length); + break; + #endif /* WOLFSSL_SHA512 */ + + #ifdef WOLFSSL_SHA3 + #ifndef WOLFSSL_NOSHA3_224 + case WC_SHA3_224: + ret = wc_Sha3_224_Update(&hmac->hash.sha3, msg, length); + break; + #endif + #ifndef WOLFSSL_NOSHA3_256 + case WC_SHA3_256: + ret = wc_Sha3_256_Update(&hmac->hash.sha3, msg, length); + break; + #endif + #ifndef WOLFSSL_NOSHA3_384 + case WC_SHA3_384: + ret = wc_Sha3_384_Update(&hmac->hash.sha3, msg, length); + break; + #endif + #ifndef WOLFSSL_NOSHA3_512 + case WC_SHA3_512: + ret = wc_Sha3_512_Update(&hmac->hash.sha3, msg, length); + break; + #endif + #endif /* WOLFSSL_SHA3 */ + + default: + break; + } + + return ret; +} + + +int wc_HmacFinal(Hmac* hmac, byte* hash) +{ + int ret; + + if (hmac == NULL || hash == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef WOLF_CRYPTO_CB + if (hmac->devId != INVALID_DEVID) { + ret = wc_CryptoCb_Hmac(hmac, hmac->macType, NULL, 0, hash); + if (ret != CRYPTOCB_UNAVAILABLE) + return ret; + /* fall-through when unavailable */ + } +#endif +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_HMAC) + if (hmac->asyncDev.marker == WOLFSSL_ASYNC_MARKER_HMAC) { + int hashLen = wc_HmacSizeByType(hmac->macType); + if (hashLen <= 0) + return hashLen; + + #if defined(HAVE_CAVIUM) + return NitroxHmacFinal(hmac, hash, hashLen); + #elif defined(HAVE_INTEL_QA) + if (IntelQaHmacGetType(hmac->macType, NULL) == 0) { + return IntelQaHmac(&hmac->asyncDev, hmac->macType, + (byte*)hmac->ipad, hmac->keyLen, hash, NULL, hashLen); + } + #endif + } +#endif /* WOLFSSL_ASYNC_CRYPT */ + + if (!hmac->innerHashKeyed) { + ret = HmacKeyInnerHash(hmac); + if (ret != 0) + return ret; + } + + switch (hmac->macType) { + #ifndef NO_MD5 + case WC_MD5: + ret = wc_Md5Final(&hmac->hash.md5, (byte*)hmac->innerHash); + if (ret != 0) + break; + ret = wc_Md5Update(&hmac->hash.md5, (byte*)hmac->opad, + WC_MD5_BLOCK_SIZE); + if (ret != 0) + break; + ret = wc_Md5Update(&hmac->hash.md5, (byte*)hmac->innerHash, + WC_MD5_DIGEST_SIZE); + if (ret != 0) + break; + ret = wc_Md5Final(&hmac->hash.md5, hash); + break; + #endif /* !NO_MD5 */ + + #ifndef NO_SHA + case WC_SHA: + ret = wc_ShaFinal(&hmac->hash.sha, (byte*)hmac->innerHash); + if (ret != 0) + break; + ret = wc_ShaUpdate(&hmac->hash.sha, (byte*)hmac->opad, + WC_SHA_BLOCK_SIZE); + if (ret != 0) + break; + ret = wc_ShaUpdate(&hmac->hash.sha, (byte*)hmac->innerHash, + WC_SHA_DIGEST_SIZE); + if (ret != 0) + break; + ret = wc_ShaFinal(&hmac->hash.sha, hash); + break; + #endif /* !NO_SHA */ + + #ifdef WOLFSSL_SHA224 + case WC_SHA224: + ret = wc_Sha224Final(&hmac->hash.sha224, (byte*)hmac->innerHash); + if (ret != 0) + break; + ret = wc_Sha224Update(&hmac->hash.sha224, (byte*)hmac->opad, + WC_SHA224_BLOCK_SIZE); + if (ret != 0) + break; + ret = wc_Sha224Update(&hmac->hash.sha224, (byte*)hmac->innerHash, + WC_SHA224_DIGEST_SIZE); + if (ret != 0) + break; + ret = wc_Sha224Final(&hmac->hash.sha224, hash); + if (ret != 0) + break; + break; + #endif /* WOLFSSL_SHA224 */ + #ifndef NO_SHA256 + case WC_SHA256: + ret = wc_Sha256Final(&hmac->hash.sha256, (byte*)hmac->innerHash); + if (ret != 0) + break; + ret = wc_Sha256Update(&hmac->hash.sha256, (byte*)hmac->opad, + WC_SHA256_BLOCK_SIZE); + if (ret != 0) + break; + ret = wc_Sha256Update(&hmac->hash.sha256, (byte*)hmac->innerHash, + WC_SHA256_DIGEST_SIZE); + if (ret != 0) + break; + ret = wc_Sha256Final(&hmac->hash.sha256, hash); + break; + #endif /* !NO_SHA256 */ + + #ifdef WOLFSSL_SHA384 + case WC_SHA384: + ret = wc_Sha384Final(&hmac->hash.sha384, (byte*)hmac->innerHash); + if (ret != 0) + break; + ret = wc_Sha384Update(&hmac->hash.sha384, (byte*)hmac->opad, + WC_SHA384_BLOCK_SIZE); + if (ret != 0) + break; + ret = wc_Sha384Update(&hmac->hash.sha384, (byte*)hmac->innerHash, + WC_SHA384_DIGEST_SIZE); + if (ret != 0) + break; + ret = wc_Sha384Final(&hmac->hash.sha384, hash); + break; + #endif /* WOLFSSL_SHA384 */ + #ifdef WOLFSSL_SHA512 + case WC_SHA512: + ret = wc_Sha512Final(&hmac->hash.sha512, (byte*)hmac->innerHash); + if (ret != 0) + break; + ret = wc_Sha512Update(&hmac->hash.sha512, (byte*)hmac->opad, + WC_SHA512_BLOCK_SIZE); + if (ret != 0) + break; + ret = wc_Sha512Update(&hmac->hash.sha512, (byte*)hmac->innerHash, + WC_SHA512_DIGEST_SIZE); + if (ret != 0) + break; + ret = wc_Sha512Final(&hmac->hash.sha512, hash); + break; + #endif /* WOLFSSL_SHA512 */ + + #ifdef WOLFSSL_SHA3 + #ifndef WOLFSSL_NOSHA3_224 + case WC_SHA3_224: + ret = wc_Sha3_224_Final(&hmac->hash.sha3, (byte*)hmac->innerHash); + if (ret != 0) + break; + ret = wc_Sha3_224_Update(&hmac->hash.sha3, (byte*)hmac->opad, + WC_SHA3_224_BLOCK_SIZE); + if (ret != 0) + break; + ret = wc_Sha3_224_Update(&hmac->hash.sha3, (byte*)hmac->innerHash, + WC_SHA3_224_DIGEST_SIZE); + if (ret != 0) + break; + ret = wc_Sha3_224_Final(&hmac->hash.sha3, hash); + break; + #endif + #ifndef WOLFSSL_NOSHA3_256 + case WC_SHA3_256: + ret = wc_Sha3_256_Final(&hmac->hash.sha3, (byte*)hmac->innerHash); + if (ret != 0) + break; + ret = wc_Sha3_256_Update(&hmac->hash.sha3, (byte*)hmac->opad, + WC_SHA3_256_BLOCK_SIZE); + if (ret != 0) + break; + ret = wc_Sha3_256_Update(&hmac->hash.sha3, (byte*)hmac->innerHash, + WC_SHA3_256_DIGEST_SIZE); + if (ret != 0) + break; + ret = wc_Sha3_256_Final(&hmac->hash.sha3, hash); + break; + #endif + #ifndef WOLFSSL_NOSHA3_384 + case WC_SHA3_384: + ret = wc_Sha3_384_Final(&hmac->hash.sha3, (byte*)hmac->innerHash); + if (ret != 0) + break; + ret = wc_Sha3_384_Update(&hmac->hash.sha3, (byte*)hmac->opad, + WC_SHA3_384_BLOCK_SIZE); + if (ret != 0) + break; + ret = wc_Sha3_384_Update(&hmac->hash.sha3, (byte*)hmac->innerHash, + WC_SHA3_384_DIGEST_SIZE); + if (ret != 0) + break; + ret = wc_Sha3_384_Final(&hmac->hash.sha3, hash); + break; + #endif + #ifndef WOLFSSL_NOSHA3_512 + case WC_SHA3_512: + ret = wc_Sha3_512_Final(&hmac->hash.sha3, (byte*)hmac->innerHash); + if (ret != 0) + break; + ret = wc_Sha3_512_Update(&hmac->hash.sha3, (byte*)hmac->opad, + WC_SHA3_512_BLOCK_SIZE); + if (ret != 0) + break; + ret = wc_Sha3_512_Update(&hmac->hash.sha3, (byte*)hmac->innerHash, + WC_SHA3_512_DIGEST_SIZE); + if (ret != 0) + break; + ret = wc_Sha3_512_Final(&hmac->hash.sha3, hash); + break; + #endif + #endif /* WOLFSSL_SHA3 */ + + default: + ret = BAD_FUNC_ARG; + break; + } + + if (ret == 0) { + hmac->innerHashKeyed = 0; + } + + return ret; +} + + +/* Initialize Hmac for use with async device */ +int wc_HmacInit(Hmac* hmac, void* heap, int devId) +{ + int ret = 0; + + if (hmac == NULL) + return BAD_FUNC_ARG; + + XMEMSET(hmac, 0, sizeof(Hmac)); + hmac->macType = WC_HASH_TYPE_NONE; + hmac->heap = heap; +#ifdef WOLF_CRYPTO_CB + hmac->devId = devId; + hmac->devCtx = NULL; +#endif + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_HMAC) + ret = wolfAsync_DevCtxInit(&hmac->asyncDev, WOLFSSL_ASYNC_MARKER_HMAC, + hmac->heap, devId); +#else + (void)devId; +#endif /* WOLFSSL_ASYNC_CRYPT */ + + return ret; +} + +#ifdef HAVE_PKCS11 +int wc_HmacInit_Id(Hmac* hmac, unsigned char* id, int len, void* heap, + int devId) +{ + int ret = 0; + + if (hmac == NULL) + ret = BAD_FUNC_ARG; + if (ret == 0 && (len < 0 || len > HMAC_MAX_ID_LEN)) + ret = BUFFER_E; + + if (ret == 0) + ret = wc_HmacInit(hmac, heap, devId); + if (ret == 0) { + XMEMCPY(hmac->id, id, len); + hmac->idLen = len; + } + + return ret; +} +#endif + +/* Free Hmac from use with async device */ +void wc_HmacFree(Hmac* hmac) +{ + if (hmac == NULL) + return; + +#ifdef WOLF_CRYPTO_CB + /* handle cleanup case where final is not called */ + if (hmac->devId != INVALID_DEVID && hmac->devCtx != NULL) { + int ret; + byte finalHash[WC_HMAC_BLOCK_SIZE]; + ret = wc_CryptoCb_Hmac(hmac, hmac->macType, NULL, 0, finalHash); + (void)ret; /* must ignore return code here */ + (void)finalHash; + } +#endif + + switch (hmac->macType) { + #ifndef NO_MD5 + case WC_MD5: + wc_Md5Free(&hmac->hash.md5); + break; + #endif /* !NO_MD5 */ + + #ifndef NO_SHA + case WC_SHA: + wc_ShaFree(&hmac->hash.sha); + break; + #endif /* !NO_SHA */ + + #ifdef WOLFSSL_SHA224 + case WC_SHA224: + wc_Sha224Free(&hmac->hash.sha224); + break; + #endif /* WOLFSSL_SHA224 */ + #ifndef NO_SHA256 + case WC_SHA256: + wc_Sha256Free(&hmac->hash.sha256); + break; + #endif /* !NO_SHA256 */ + + #ifdef WOLFSSL_SHA384 + case WC_SHA384: + wc_Sha384Free(&hmac->hash.sha384); + break; + #endif /* WOLFSSL_SHA384 */ + #ifdef WOLFSSL_SHA512 + case WC_SHA512: + wc_Sha512Free(&hmac->hash.sha512); + break; + #endif /* WOLFSSL_SHA512 */ + + #ifdef WOLFSSL_SHA3 + #ifndef WOLFSSL_NOSHA3_224 + case WC_SHA3_224: + wc_Sha3_224_Free(&hmac->hash.sha3); + break; + #endif + #ifndef WOLFSSL_NOSHA3_256 + case WC_SHA3_256: + wc_Sha3_256_Free(&hmac->hash.sha3); + break; + #endif + #ifndef WOLFSSL_NOSHA3_384 + case WC_SHA3_384: + wc_Sha3_384_Free(&hmac->hash.sha3); + break; + #endif + #ifndef WOLFSSL_NOSHA3_512 + case WC_SHA3_512: + wc_Sha3_512_Free(&hmac->hash.sha3); + break; + #endif + #endif /* WOLFSSL_SHA3 */ + + default: + break; + } + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_HMAC) + wolfAsync_DevCtxFree(&hmac->asyncDev, WOLFSSL_ASYNC_MARKER_HMAC); +#endif /* WOLFSSL_ASYNC_CRYPT */ + + switch (hmac->macType) { + #ifndef NO_MD5 + case WC_MD5: + wc_Md5Free(&hmac->hash.md5); + break; + #endif /* !NO_MD5 */ + + #ifndef NO_SHA + case WC_SHA: + wc_ShaFree(&hmac->hash.sha); + break; + #endif /* !NO_SHA */ + + #ifdef WOLFSSL_SHA224 + case WC_SHA224: + wc_Sha224Free(&hmac->hash.sha224); + break; + #endif /* WOLFSSL_SHA224 */ + #ifndef NO_SHA256 + case WC_SHA256: + wc_Sha256Free(&hmac->hash.sha256); + break; + #endif /* !NO_SHA256 */ + + #ifdef WOLFSSL_SHA512 + #ifdef WOLFSSL_SHA384 + case WC_SHA384: + wc_Sha384Free(&hmac->hash.sha384); + break; + #endif /* WOLFSSL_SHA384 */ + case WC_SHA512: + wc_Sha512Free(&hmac->hash.sha512); + break; + #endif /* WOLFSSL_SHA512 */ + } +} + +int wolfSSL_GetHmacMaxSize(void) +{ + return WC_MAX_DIGEST_SIZE; +} + +#ifdef HAVE_HKDF + /* HMAC-KDF-Extract. + * RFC 5869 - HMAC-based Extract-and-Expand Key Derivation Function (HKDF). + * + * type The hash algorithm type. + * salt The optional salt value. + * saltSz The size of the salt. + * inKey The input keying material. + * inKeySz The size of the input keying material. + * out The pseudorandom key with the length that of the hash. + * returns 0 on success, otherwise failure. + */ + int wc_HKDF_Extract(int type, const byte* salt, word32 saltSz, + const byte* inKey, word32 inKeySz, byte* out) + { + byte tmp[WC_MAX_DIGEST_SIZE]; /* localSalt helper */ + Hmac myHmac; + int ret; + const byte* localSalt; /* either points to user input or tmp */ + int hashSz; + + ret = wc_HmacSizeByType(type); + if (ret < 0) + return ret; + + hashSz = ret; + localSalt = salt; + if (localSalt == NULL) { + XMEMSET(tmp, 0, hashSz); + localSalt = tmp; + saltSz = hashSz; + } + + ret = wc_HmacInit(&myHmac, NULL, INVALID_DEVID); + if (ret == 0) { + ret = wc_HmacSetKey(&myHmac, type, localSalt, saltSz); + if (ret == 0) + ret = wc_HmacUpdate(&myHmac, inKey, inKeySz); + if (ret == 0) + ret = wc_HmacFinal(&myHmac, out); + wc_HmacFree(&myHmac); + } + + return ret; + } + + /* HMAC-KDF-Expand. + * RFC 5869 - HMAC-based Extract-and-Expand Key Derivation Function (HKDF). + * + * type The hash algorithm type. + * inKey The input key. + * inKeySz The size of the input key. + * info The application specific information. + * infoSz The size of the application specific information. + * out The output keying material. + * returns 0 on success, otherwise failure. + */ + int wc_HKDF_Expand(int type, const byte* inKey, word32 inKeySz, + const byte* info, word32 infoSz, byte* out, word32 outSz) + { + byte tmp[WC_MAX_DIGEST_SIZE]; + Hmac myHmac; + int ret = 0; + word32 outIdx = 0; + word32 hashSz = wc_HmacSizeByType(type); + byte n = 0x1; + + ret = wc_HmacInit(&myHmac, NULL, INVALID_DEVID); + if (ret != 0) + return ret; + + while (outIdx < outSz) { + int tmpSz = (n == 1) ? 0 : hashSz; + word32 left = outSz - outIdx; + + ret = wc_HmacSetKey(&myHmac, type, inKey, inKeySz); + if (ret != 0) + break; + ret = wc_HmacUpdate(&myHmac, tmp, tmpSz); + if (ret != 0) + break; + ret = wc_HmacUpdate(&myHmac, info, infoSz); + if (ret != 0) + break; + ret = wc_HmacUpdate(&myHmac, &n, 1); + if (ret != 0) + break; + ret = wc_HmacFinal(&myHmac, tmp); + if (ret != 0) + break; + + left = min(left, hashSz); + XMEMCPY(out+outIdx, tmp, left); + + outIdx += hashSz; + n++; + } + + wc_HmacFree(&myHmac); + + return ret; + } + + /* HMAC-KDF. + * RFC 5869 - HMAC-based Extract-and-Expand Key Derivation Function (HKDF). + * + * type The hash algorithm type. + * inKey The input keying material. + * inKeySz The size of the input keying material. + * salt The optional salt value. + * saltSz The size of the salt. + * info The application specific information. + * infoSz The size of the application specific information. + * out The output keying material. + * returns 0 on success, otherwise failure. + */ + int wc_HKDF(int type, const byte* inKey, word32 inKeySz, + const byte* salt, word32 saltSz, + const byte* info, word32 infoSz, + byte* out, word32 outSz) + { + byte prk[WC_MAX_DIGEST_SIZE]; + int hashSz = wc_HmacSizeByType(type); + int ret; + + if (hashSz < 0) + return BAD_FUNC_ARG; + + ret = wc_HKDF_Extract(type, salt, saltSz, inKey, inKeySz, prk); + if (ret != 0) + return ret; + + return wc_HKDF_Expand(type, prk, hashSz, info, infoSz, out, outSz); + } + +#endif /* HAVE_HKDF */ + +#endif /* HAVE_FIPS */ +#endif /* NO_HMAC */ diff --git a/client/wolfssl/wolfcrypt/src/idea.c b/client/wolfssl/wolfcrypt/src/idea.c new file mode 100644 index 0000000..600c906 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/idea.c @@ -0,0 +1,303 @@ +/* idea.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef HAVE_IDEA + +#include + +#include +#include + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +/* multiplication of x and y modulo 2^16+1 + * IDEA specify a special case when an entry value is 0 ( x or y) + * then it must be replaced by 2^16 + */ +static WC_INLINE word16 idea_mult(word16 x, word16 y) +{ + long mul, res; + + mul = (long)x * (long)y; + if (mul) { + res = (mul & IDEA_MASK) - ((word32)mul >> 16); + if (res <= 0) + res += IDEA_MODULO; + + return (word16) (res & IDEA_MASK); + } + + if (!x) + return ((IDEA_MODULO - y) & IDEA_MASK); + + /* !y */ + return ((IDEA_MODULO - x) & IDEA_MASK); +} + +/* compute 1/a modulo 2^16+1 using Extended euclidean algorithm + * adapted from fp_invmod */ +static WC_INLINE word16 idea_invmod(word16 x) +{ + int u, v, b, d; + + if (x <= 1) + return x; + + u = IDEA_MODULO; + v = x; + d = 1; + b = 0; + + do { + while (!(u & 1)) { + u >>= 1; + if (b & 1) + b -= IDEA_MODULO; + b >>= 1; + } + + while (!(v & 1)) { + v >>= 1; + if (d & 1) { + d -= IDEA_MODULO; + } + d >>= 1; + } + + if (u >= v) { + u -= v; + b -= d; + } else { + v -= u; + d -= b; + } + } while (u != 0); + + /* d is now the inverse, put positive value if required */ + while (d < 0) + d += IDEA_MODULO; + + /* d must be < IDEA_MODULO */ + while (d >= (int)IDEA_MODULO) + d -= IDEA_MODULO; + + return (word16)(d & IDEA_MASK); +} + +/* generate the 52 16-bits key sub-blocks from the 128 key */ +int wc_IdeaSetKey(Idea *idea, const byte* key, word16 keySz, + const byte *iv, int dir) +{ + word16 idx = 0; + word32 t; + short i; + + if (idea == NULL || key == NULL || keySz != IDEA_KEY_SIZE || + (dir != IDEA_ENCRYPTION && dir != IDEA_DECRYPTION)) + return BAD_FUNC_ARG; + + /* initial key schedule for 0 -> 7 */ + for (i = 0; i < IDEA_ROUNDS; i++) { + idea->skey[i] = (word16)key[idx++] << 8; + idea->skey[i] |= (word16)key[idx++]; + } + + /* shift phase key schedule for 8 -> 51 */ + for (i = IDEA_ROUNDS; i < IDEA_SK_NUM; i++) { + t = (word32)idea->skey[((i+1) & 7) ? i-7 : i-15] << 9; + t |= (word32)idea->skey[((i+2) & 7) < 2 ? i-14 : i-6] >> 7; + idea->skey[i] = (word16)(t & IDEA_MASK); + } + + /* compute decryption key from encryption key */ + if (dir == IDEA_DECRYPTION) { + word16 enckey[IDEA_SK_NUM]; + + /* put encryption key in tmp buffer */ + XMEMCPY(enckey, idea->skey, sizeof(idea->skey)); + + idx = 0; + + idea->skey[6*IDEA_ROUNDS] = idea_invmod(enckey[idx++]); + idea->skey[6*IDEA_ROUNDS+1] = (IDEA_2EXP16 - enckey[idx++]) & IDEA_MASK; + idea->skey[6*IDEA_ROUNDS+2] = (IDEA_2EXP16 - enckey[idx++]) & IDEA_MASK; + idea->skey[6*IDEA_ROUNDS+3] = idea_invmod(enckey[idx++]); + + for (i = 6*(IDEA_ROUNDS-1); i >= 0; i -= 6) { + idea->skey[i+4] = enckey[idx++]; + idea->skey[i+5] = enckey[idx++]; + + idea->skey[i] = idea_invmod(enckey[idx++]); + if (i) { + idea->skey[i+2] = (IDEA_2EXP16 - enckey[idx++]) & IDEA_MASK; + idea->skey[i+1] = (IDEA_2EXP16 - enckey[idx++]) & IDEA_MASK; + } + else { + idea->skey[1] = (IDEA_2EXP16 - enckey[idx++]) & IDEA_MASK; + idea->skey[2] = (IDEA_2EXP16 - enckey[idx++]) & IDEA_MASK; + } + + idea->skey[i+3] = idea_invmod(enckey[idx++]); + } + + /* erase temporary buffer */ + ForceZero(enckey, sizeof(enckey)); + } + + /* set the iv */ + return wc_IdeaSetIV(idea, iv); +} + +/* set the IV in the Idea key structure */ +int wc_IdeaSetIV(Idea *idea, const byte* iv) +{ + if (idea == NULL) + return BAD_FUNC_ARG; + + if (iv != NULL) + XMEMCPY(idea->reg, iv, IDEA_BLOCK_SIZE); + else + XMEMSET(idea->reg, 0, IDEA_BLOCK_SIZE); + + return 0; +} + +/* encryption/decryption for a block (64 bits) + */ +int wc_IdeaCipher(Idea *idea, byte* out, const byte* in) +{ + word32 t1, t2; + word16 i, skey_idx = 0, idx = 0; + word16 x[4]; + + if (idea == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + /* put input byte block in word16 */ + for (i = 0; i < IDEA_BLOCK_SIZE/2; i++) { + x[i] = (word16)in[idx++] << 8; + x[i] |= (word16)in[idx++]; + } + + for (i = 0; i < IDEA_ROUNDS; i++) { + x[0] = idea_mult(x[0], idea->skey[skey_idx++]); + x[1] = ((word32)x[1] + (word32)idea->skey[skey_idx++]) & IDEA_MASK; + x[2] = ((word32)x[2] + (word32)idea->skey[skey_idx++]) & IDEA_MASK; + x[3] = idea_mult(x[3], idea->skey[skey_idx++]); + + t2 = x[0] ^ x[2]; + t2 = idea_mult((word16)t2, idea->skey[skey_idx++]); + t1 = (t2 + (x[1] ^ x[3])) & IDEA_MASK; + t1 = idea_mult((word16)t1, idea->skey[skey_idx++]); + t2 = (t1 + t2) & IDEA_MASK; + + x[0] ^= t1; + x[3] ^= t2; + + t2 ^= x[1]; + x[1] = x[2] ^ (word16)t1; + x[2] = (word16)t2; + } + + x[0] = idea_mult(x[0], idea->skey[skey_idx++]); + out[0] = (x[0] >> 8) & 0xFF; + out[1] = x[0] & 0xFF; + + x[2] = ((word32)x[2] + (word32)idea->skey[skey_idx++]) & IDEA_MASK; + out[2] = (x[2] >> 8) & 0xFF; + out[3] = x[2] & 0xFF; + + x[1] = ((word32)x[1] + (word32)idea->skey[skey_idx++]) & IDEA_MASK; + out[4] = (x[1] >> 8) & 0xFF; + out[5] = x[1] & 0xFF; + + x[3] = idea_mult(x[3], idea->skey[skey_idx++]); + out[6] = (x[3] >> 8) & 0xFF; + out[7] = x[3] & 0xFF; + + return 0; +} + +int wc_IdeaCbcEncrypt(Idea *idea, byte* out, const byte* in, word32 len) +{ + int blocks; + int ret; + + if (idea == NULL || out == NULL || in == NULL) + return BAD_FUNC_ARG; + + blocks = len / IDEA_BLOCK_SIZE; + while (blocks--) { + xorbuf((byte*)idea->reg, in, IDEA_BLOCK_SIZE); + ret = wc_IdeaCipher(idea, (byte*)idea->reg, (byte*)idea->reg); + if (ret != 0) { + return ret; + } + + XMEMCPY(out, idea->reg, IDEA_BLOCK_SIZE); + + out += IDEA_BLOCK_SIZE; + in += IDEA_BLOCK_SIZE; + } + + return 0; +} + +int wc_IdeaCbcDecrypt(Idea *idea, byte* out, const byte* in, word32 len) +{ + int blocks; + int ret; + + if (idea == NULL || out == NULL || in == NULL) + return BAD_FUNC_ARG; + + blocks = len / IDEA_BLOCK_SIZE; + while (blocks--) { + XMEMCPY((byte*)idea->tmp, in, IDEA_BLOCK_SIZE); + ret = wc_IdeaCipher(idea, out, (byte*)idea->tmp); + if (ret != 0) { + return ret; + } + + xorbuf(out, (byte*)idea->reg, IDEA_BLOCK_SIZE); + XMEMCPY(idea->reg, idea->tmp, IDEA_BLOCK_SIZE); + + out += IDEA_BLOCK_SIZE; + in += IDEA_BLOCK_SIZE; + } + + return 0; +} + +#endif /* HAVE_IDEA */ diff --git a/client/wolfssl/wolfcrypt/src/include.am b/client/wolfssl/wolfcrypt/src/include.am new file mode 100644 index 0000000..bba761b --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/include.am @@ -0,0 +1,127 @@ +# vim:ft=automake +# All paths should be given relative to the root + +EXTRA_DIST += src/bio.c +EXTRA_DIST += wolfcrypt/src/misc.c +EXTRA_DIST += wolfcrypt/src/evp.c +EXTRA_DIST += wolfcrypt/src/asm.c +EXTRA_DIST += wolfcrypt/src/aes_asm.asm +EXTRA_DIST += wolfcrypt/src/wc_dsp.c +EXTRA_DIST += wolfcrypt/src/sp_dsp32.c + +EXTRA_DIST += \ + wolfcrypt/src/ecc_fp.c \ + wolfcrypt/src/fp_mont_small.i \ + wolfcrypt/src/fp_mul_comba_12.i \ + wolfcrypt/src/fp_mul_comba_17.i \ + wolfcrypt/src/fp_mul_comba_20.i \ + wolfcrypt/src/fp_mul_comba_24.i \ + wolfcrypt/src/fp_mul_comba_28.i \ + wolfcrypt/src/fp_mul_comba_32.i \ + wolfcrypt/src/fp_mul_comba_3.i \ + wolfcrypt/src/fp_mul_comba_48.i \ + wolfcrypt/src/fp_mul_comba_4.i \ + wolfcrypt/src/fp_mul_comba_64.i \ + wolfcrypt/src/fp_mul_comba_6.i \ + wolfcrypt/src/fp_mul_comba_7.i \ + wolfcrypt/src/fp_mul_comba_8.i \ + wolfcrypt/src/fp_mul_comba_9.i \ + wolfcrypt/src/fp_mul_comba_small_set.i \ + wolfcrypt/src/fp_sqr_comba_12.i \ + wolfcrypt/src/fp_sqr_comba_17.i \ + wolfcrypt/src/fp_sqr_comba_20.i \ + wolfcrypt/src/fp_sqr_comba_24.i \ + wolfcrypt/src/fp_sqr_comba_28.i \ + wolfcrypt/src/fp_sqr_comba_32.i \ + wolfcrypt/src/fp_sqr_comba_3.i \ + wolfcrypt/src/fp_sqr_comba_48.i \ + wolfcrypt/src/fp_sqr_comba_4.i \ + wolfcrypt/src/fp_sqr_comba_64.i \ + wolfcrypt/src/fp_sqr_comba_6.i \ + wolfcrypt/src/fp_sqr_comba_7.i \ + wolfcrypt/src/fp_sqr_comba_8.i \ + wolfcrypt/src/fp_sqr_comba_9.i \ + wolfcrypt/src/fp_sqr_comba_small_set.i \ + wolfcrypt/src/fe_x25519_128.i + +EXTRA_DIST += wolfcrypt/src/port/ti/ti-aes.c \ + wolfcrypt/src/port/ti/ti-des3.c \ + wolfcrypt/src/port/ti/ti-hash.c \ + wolfcrypt/src/port/ti/ti-ccm.c \ + wolfcrypt/src/port/pic32/pic32mz-crypt.c \ + wolfcrypt/src/port/nrf51.c \ + wolfcrypt/src/port/arm/armv8-aes.c \ + wolfcrypt/src/port/arm/armv8-sha256.c \ + wolfcrypt/src/port/arm/armv8-chacha.c \ + wolfcrypt/src/port/arm/armv8-curve25519.c \ + wolfcrypt/src/port/arm/armv8-32-curve25519.c \ + wolfcrypt/src/port/arm/armv8-sha512-asm.c \ + wolfcrypt/src/port/arm/armv8-32-sha512-asm.c \ + wolfcrypt/src/port/nxp/ksdk_port.c \ + wolfcrypt/src/port/atmel/README.md \ + wolfcrypt/src/port/xilinx/xil-sha3.c \ + wolfcrypt/src/port/xilinx/xil-aesgcm.c \ + wolfcrypt/src/port/caam/caam_aes.c \ + wolfcrypt/src/port/caam/caam_driver.c \ + wolfcrypt/src/port/caam/caam_init.c \ + wolfcrypt/src/port/caam/caam_sha.c \ + wolfcrypt/src/port/caam/caam_doc.pdf \ + wolfcrypt/src/port/st/stm32.c \ + wolfcrypt/src/port/st/stsafe.c \ + wolfcrypt/src/port/st/README.md \ + wolfcrypt/src/port/af_alg/afalg_aes.c \ + wolfcrypt/src/port/af_alg/afalg_hash.c \ + wolfcrypt/src/port/devcrypto/devcrypto_hash.c \ + wolfcrypt/src/port/devcrypto/wc_devcrypto.c \ + wolfcrypt/src/port/devcrypto/README.md \ + wolfcrypt/src/port/mynewt/mynewt_port.c \ + wolfcrypt/src/port/Espressif/esp32_aes.c \ + wolfcrypt/src/port/Espressif/esp32_sha.c \ + wolfcrypt/src/port/Espressif/esp32_util.c \ + wolfcrypt/src/port/Espressif/esp32_mp.c \ + wolfcrypt/src/port/Espressif/README.md \ + wolfcrypt/src/port/arm/cryptoCell.c \ + wolfcrypt/src/port/arm/cryptoCellHash.c \ + wolfcrypt/src/port/Renesas/renesas_tsip_aes.c \ + wolfcrypt/src/port/Renesas/renesas_tsip_sha.c \ + wolfcrypt/src/port/Renesas/renesas_tsip_util.c \ + wolfcrypt/src/port/Renesas/README.md + + +if BUILD_CRYPTOCB +src_libwolfssl_la_SOURCES += wolfcrypt/src/cryptocb.c +endif + +if BUILD_PKCS11 +src_libwolfssl_la_SOURCES += wolfcrypt/src/wc_pkcs11.c +endif + +if BUILD_DEVCRYPTO +src_libwolfssl_la_SOURCES += wolfcrypt/src/port/devcrypto/devcrypto_hash.c +src_libwolfssl_la_SOURCES += wolfcrypt/src/port/devcrypto/devcrypto_aes.c +src_libwolfssl_la_SOURCES += wolfcrypt/src/port/devcrypto/wc_devcrypto.c +endif + +if BUILD_CAVIUM +src_libwolfssl_la_SOURCES += wolfcrypt/src/port/cavium/cavium_nitrox.c +endif +EXTRA_DIST += wolfcrypt/src/port/cavium/README.md + +if BUILD_OCTEON_SYNC +src_libwolfssl_la_SOURCES += wolfcrypt/src/port/cavium/cavium_octeon_sync.c +endif +EXTRA_DIST += wolfcrypt/src/port/cavium/README_Octeon.md + +if BUILD_INTEL_QA +src_libwolfssl_la_SOURCES += wolfcrypt/src/port/intel/quickassist.c +src_libwolfssl_la_SOURCES += wolfcrypt/src/port/intel/quickassist_mem.c +endif +EXTRA_DIST += wolfcrypt/src/port/intel/README.md + +if BUILD_INTEL_QA_SYNC +src_libwolfssl_la_SOURCES += wolfcrypt/src/port/intel/quickassist_sync.c +endif + +if BUILD_CRYPTOAUTHLIB +src_libwolfssl_la_SOURCES += wolfcrypt/src/port/atmel/atmel.c +endif diff --git a/client/wolfssl/wolfcrypt/src/integer.c b/client/wolfssl/wolfcrypt/src/integer.c new file mode 100644 index 0000000..56d684b --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/integer.c @@ -0,0 +1,5320 @@ +/* integer.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +/* + * Based on public domain LibTomMath 0.38 by Tom St Denis, tomstdenis@iahu.ca, + * http://math.libtomcrypt.com + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +/* in case user set USE_FAST_MATH there */ +#include + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#ifndef NO_BIG_INT + +#ifndef USE_FAST_MATH + +#ifndef WOLFSSL_SP_MATH + +#include + +#if defined(FREESCALE_LTC_TFM) + #include +#endif +#ifdef WOLFSSL_DEBUG_MATH + #include +#endif + +#ifdef SHOW_GEN + #ifndef NO_STDIO_FILESYSTEM + #include + #endif +#endif + +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) +#ifdef __cplusplus + extern "C" { +#endif +WOLFSSL_LOCAL int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod, + mp_int* res); +WOLFSSL_LOCAL int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, + mp_int* res); +WOLFSSL_LOCAL int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, + mp_int* res); +WOLFSSL_LOCAL int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, + mp_int* res); +WOLFSSL_LOCAL int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod, + mp_int* res); +#ifdef __cplusplus + } /* extern "C" */ +#endif +#endif + +/* reverse an array, used for radix code */ +static void +bn_reverse (unsigned char *s, int len) +{ + int ix, iy; + unsigned char t; + + ix = 0; + iy = len - 1; + while (ix < iy) { + t = s[ix]; + s[ix] = s[iy]; + s[iy] = t; + ++ix; + --iy; + } +} + +/* math settings check */ +word32 CheckRunTimeSettings(void) +{ + return CTC_SETTINGS; +} + + +/* handle up to 6 inits */ +int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d, mp_int* e, + mp_int* f) +{ + int res = MP_OKAY; + + if (a) XMEMSET(a, 0, sizeof(mp_int)); + if (b) XMEMSET(b, 0, sizeof(mp_int)); + if (c) XMEMSET(c, 0, sizeof(mp_int)); + if (d) XMEMSET(d, 0, sizeof(mp_int)); + if (e) XMEMSET(e, 0, sizeof(mp_int)); + if (f) XMEMSET(f, 0, sizeof(mp_int)); + + if (a && ((res = mp_init(a)) != MP_OKAY)) + return res; + + if (b && ((res = mp_init(b)) != MP_OKAY)) { + mp_clear(a); + return res; + } + + if (c && ((res = mp_init(c)) != MP_OKAY)) { + mp_clear(a); mp_clear(b); + return res; + } + + if (d && ((res = mp_init(d)) != MP_OKAY)) { + mp_clear(a); mp_clear(b); mp_clear(c); + return res; + } + + if (e && ((res = mp_init(e)) != MP_OKAY)) { + mp_clear(a); mp_clear(b); mp_clear(c); mp_clear(d); + return res; + } + + if (f && ((res = mp_init(f)) != MP_OKAY)) { + mp_clear(a); mp_clear(b); mp_clear(c); mp_clear(d); mp_clear(e); + return res; + } + + return res; +} + + +/* init a new mp_int */ +int mp_init (mp_int * a) +{ + /* Safeguard against passing in a null pointer */ + if (a == NULL) + return MP_VAL; + + /* defer allocation until mp_grow */ + a->dp = NULL; + + /* set the used to zero, allocated digits to the default precision + * and sign to positive */ + a->used = 0; + a->alloc = 0; + a->sign = MP_ZPOS; +#ifdef HAVE_WOLF_BIGINT + wc_bigint_init(&a->raw); +#endif + + return MP_OKAY; +} + + +/* clear one (frees) */ +void mp_clear (mp_int * a) +{ + int i; + + if (a == NULL) + return; + + /* only do anything if a hasn't been freed previously */ + if (a->dp != NULL) { + /* first zero the digits */ + for (i = 0; i < a->used; i++) { + a->dp[i] = 0; + } + + /* free ram */ + mp_free(a); + + /* reset members to make debugging easier */ + a->alloc = a->used = 0; + a->sign = MP_ZPOS; + } +} + +void mp_free (mp_int * a) +{ + /* only do anything if a hasn't been freed previously */ + if (a->dp != NULL) { + /* free ram */ + XFREE(a->dp, 0, DYNAMIC_TYPE_BIGINT); + a->dp = NULL; + } + +#ifdef HAVE_WOLF_BIGINT + wc_bigint_free(&a->raw); +#endif +} + +void mp_forcezero(mp_int * a) +{ + if (a == NULL) + return; + + /* only do anything if a hasn't been freed previously */ + if (a->dp != NULL) { + /* force zero the used digits */ + ForceZero(a->dp, a->used * sizeof(mp_digit)); +#ifdef HAVE_WOLF_BIGINT + wc_bigint_zero(&a->raw); +#endif + /* free ram */ + mp_free(a); + + /* reset members to make debugging easier */ + a->alloc = a->used = 0; + a->sign = MP_ZPOS; + } + + a->sign = MP_ZPOS; + a->used = 0; +} + + +/* get the size for an unsigned equivalent */ +int mp_unsigned_bin_size (mp_int * a) +{ + int size = mp_count_bits (a); + return (size / 8 + ((size & 7) != 0 ? 1 : 0)); +} + + +/* returns the number of bits in an int */ +int mp_count_bits (mp_int * a) +{ + int r; + mp_digit q; + + /* shortcut */ + if (a->used == 0) { + return 0; + } + + /* get number of digits and add that */ + r = (a->used - 1) * DIGIT_BIT; + + /* take the last digit and count the bits in it */ + q = a->dp[a->used - 1]; + while (q > ((mp_digit) 0)) { + ++r; + q >>= ((mp_digit) 1); + } + return r; +} + + +int mp_leading_bit (mp_int * a) +{ + int bit = 0; + mp_int t; + + if (mp_init_copy(&t, a) != MP_OKAY) + return 0; + + while (mp_iszero(&t) == MP_NO) { +#ifndef MP_8BIT + bit = (t.dp[0] & 0x80) != 0; +#else + bit = ((t.dp[0] | ((t.dp[1] & 0x01) << 7)) & 0x80) != 0; +#endif + if (mp_div_2d (&t, 8, &t, NULL) != MP_OKAY) + break; + } + mp_clear(&t); + return bit; +} + +int mp_to_unsigned_bin_at_pos(int x, mp_int *t, unsigned char *b) +{ + int res = 0; + while (mp_iszero(t) == MP_NO) { +#ifndef MP_8BIT + b[x++] = (unsigned char) (t->dp[0] & 255); +#else + b[x++] = (unsigned char) (t->dp[0] | ((t->dp[1] & 0x01) << 7)); +#endif + if ((res = mp_div_2d (t, 8, t, NULL)) != MP_OKAY) { + return res; + } + res = x; + } + return res; +} + +/* store in unsigned [big endian] format */ +int mp_to_unsigned_bin (mp_int * a, unsigned char *b) +{ + int x, res; + mp_int t; + + if ((res = mp_init_copy (&t, a)) != MP_OKAY) { + return res; + } + + x = mp_to_unsigned_bin_at_pos(0, &t, b); + if (x < 0) { + mp_clear(&t); + return x; + } + + bn_reverse (b, x); + mp_clear (&t); + return res; +} + +int mp_to_unsigned_bin_len(mp_int * a, unsigned char *b, int c) +{ + int i, len; + + len = mp_unsigned_bin_size(a); + + /* pad front w/ zeros to match length */ + for (i = 0; i < c - len; i++) + b[i] = 0x00; + return mp_to_unsigned_bin(a, b + i); +} + +/* creates "a" then copies b into it */ +int mp_init_copy (mp_int * a, mp_int * b) +{ + int res; + + if ((res = mp_init_size (a, b->used)) != MP_OKAY) { + return res; + } + + if((res = mp_copy (b, a)) != MP_OKAY) { + mp_clear(a); + } + + return res; +} + + +/* copy, b = a */ +int mp_copy (mp_int * a, mp_int * b) +{ + int res, n; + + /* Safeguard against passing in a null pointer */ + if (a == NULL || b == NULL) + return MP_VAL; + + /* if dst == src do nothing */ + if (a == b) { + return MP_OKAY; + } + + /* grow dest */ + if (b->alloc < a->used || b->alloc == 0) { + if ((res = mp_grow (b, a->used)) != MP_OKAY) { + return res; + } + } + + /* zero b and copy the parameters over */ + { + mp_digit *tmpa, *tmpb; + + /* pointer aliases */ + + /* source */ + tmpa = a->dp; + + /* destination */ + tmpb = b->dp; + + /* copy all the digits */ + for (n = 0; n < a->used; n++) { + *tmpb++ = *tmpa++; + } + + /* clear high digits */ + for (; n < b->used && b->dp; n++) { + *tmpb++ = 0; + } + } + + /* copy used count and sign */ + b->used = a->used; + b->sign = a->sign; + return MP_OKAY; +} + + +/* grow as required */ +int mp_grow (mp_int * a, int size) +{ + int i; + mp_digit *tmp; + + /* if the alloc size is smaller alloc more ram */ + if (a->alloc < size || size == 0) { + /* ensure there are always at least MP_PREC digits extra on top */ + size += (MP_PREC * 2) - (size % MP_PREC); + + /* reallocate the array a->dp + * + * We store the return in a temporary variable + * in case the operation failed we don't want + * to overwrite the dp member of a. + */ + tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size, NULL, + DYNAMIC_TYPE_BIGINT); + if (tmp == NULL) { + /* reallocation failed but "a" is still valid [can be freed] */ + return MP_MEM; + } + + /* reallocation succeeded so set a->dp */ + a->dp = tmp; + + /* zero excess digits */ + i = a->alloc; + a->alloc = size; + for (; i < a->alloc; i++) { + a->dp[i] = 0; + } + } + return MP_OKAY; +} + + +/* shift right by a certain bit count (store quotient in c, optional + remainder in d) */ +int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d) +{ + int D, res; + mp_int t; + + + /* if the shift count is <= 0 then we do no work */ + if (b <= 0) { + res = mp_copy (a, c); + if (d != NULL) { + mp_zero (d); + } + return res; + } + + if ((res = mp_init (&t)) != MP_OKAY) { + return res; + } + + /* get the remainder */ + if (d != NULL) { + if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) { + mp_clear (&t); + return res; + } + } + + /* copy */ + if ((res = mp_copy (a, c)) != MP_OKAY) { + mp_clear (&t); + return res; + } + + /* shift by as many digits in the bit count */ + if (b >= (int)DIGIT_BIT) { + mp_rshd (c, b / DIGIT_BIT); + } + + /* shift any bit count < DIGIT_BIT */ + D = (b % DIGIT_BIT); + if (D != 0) { + mp_rshb(c, D); + } + mp_clamp (c); + if (d != NULL) { + mp_exch (&t, d); + } + mp_clear (&t); + return MP_OKAY; +} + + +/* set to zero */ +void mp_zero (mp_int * a) +{ + int n; + mp_digit *tmp; + + if (a == NULL) + return; + + a->sign = MP_ZPOS; + a->used = 0; + + tmp = a->dp; + for (n = 0; n < a->alloc; n++) { + *tmp++ = 0; + } +} + + +/* trim unused digits + * + * This is used to ensure that leading zero digits are + * trimmed and the leading "used" digit will be non-zero + * Typically very fast. Also fixes the sign if there + * are no more leading digits + */ +void mp_clamp (mp_int * a) +{ + /* decrease used while the most significant digit is + * zero. + */ + while (a->used > 0 && a->dp[a->used - 1] == 0) { + --(a->used); + } + + /* reset the sign flag if used == 0 */ + if (a->used == 0) { + a->sign = MP_ZPOS; + } +} + + +/* swap the elements of two integers, for cases where you can't simply swap the + * mp_int pointers around + */ +void mp_exch (mp_int * a, mp_int * b) +{ + mp_int t; + + t = *a; + *a = *b; + *b = t; +} + + +/* shift right a certain number of bits */ +void mp_rshb (mp_int *c, int x) +{ + mp_digit *tmpc, mask, shift; + mp_digit r, rr; + mp_digit D = x; + + if (mp_iszero(c)) return; + + /* mask */ + mask = (((mp_digit)1) << D) - 1; + + /* shift for lsb */ + shift = DIGIT_BIT - D; + + /* alias */ + tmpc = c->dp + (c->used - 1); + + /* carry */ + r = 0; + for (x = c->used - 1; x >= 0; x--) { + /* get the lower bits of this word in a temp */ + rr = *tmpc & mask; + + /* shift the current word and mix in the carry bits from previous word */ + *tmpc = (*tmpc >> D) | (r << shift); + --tmpc; + + /* set the carry to the carry bits of the current word found above */ + r = rr; + } + mp_clamp(c); +} + + +/* shift right a certain amount of digits */ +void mp_rshd (mp_int * a, int b) +{ + int x; + + /* if b <= 0 then ignore it */ + if (b <= 0) { + return; + } + + /* if b > used then simply zero it and return */ + if (a->used <= b) { + mp_zero (a); + return; + } + + { + mp_digit *bottom, *top; + + /* shift the digits down */ + + /* bottom */ + bottom = a->dp; + + /* top [offset into digits] */ + top = a->dp + b; + + /* this is implemented as a sliding window where + * the window is b-digits long and digits from + * the top of the window are copied to the bottom + * + * e.g. + + b-2 | b-1 | b0 | b1 | b2 | ... | bb | ----> + /\ | ----> + \-------------------/ ----> + */ + for (x = 0; x < (a->used - b); x++) { + *bottom++ = *top++; + } + + /* zero the top digits */ + for (; x < a->used; x++) { + *bottom++ = 0; + } + } + + /* remove excess digits */ + a->used -= b; +} + + +/* calc a value mod 2**b */ +int mp_mod_2d (mp_int * a, int b, mp_int * c) +{ + int x, res; + + /* if b is <= 0 then zero the int */ + if (b <= 0) { + mp_zero (c); + return MP_OKAY; + } + + /* if the modulus is larger than the value than return */ + if (b >= (int) (a->used * DIGIT_BIT)) { + res = mp_copy (a, c); + return res; + } + + /* copy */ + if ((res = mp_copy (a, c)) != MP_OKAY) { + return res; + } + + /* zero digits above the last digit of the modulus */ + for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) { + c->dp[x] = 0; + } + /* clear the digit that is not completely outside/inside the modulus */ + c->dp[b / DIGIT_BIT] &= (mp_digit) ((((mp_digit) 1) << + (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1)); + mp_clamp (c); + return MP_OKAY; +} + + +/* reads a unsigned char array, assumes the msb is stored first [big endian] */ +int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c) +{ + int res; + + /* make sure there are at least two digits */ + if (a->alloc < 2) { + if ((res = mp_grow(a, 2)) != MP_OKAY) { + return res; + } + } + + /* zero the int */ + mp_zero (a); + + /* read the bytes in */ + while (c-- > 0) { + if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) { + return res; + } + +#ifndef MP_8BIT + a->dp[0] |= *b++; + a->used += 1; +#else + a->dp[0] = (*b & MP_MASK); + a->dp[1] |= ((*b++ >> 7U) & 1); + a->used += 2; +#endif + } + mp_clamp (a); + return MP_OKAY; +} + + +/* shift left by a certain bit count */ +int mp_mul_2d (mp_int * a, int b, mp_int * c) +{ + mp_digit d; + int res; + + /* copy */ + if (a != c) { + if ((res = mp_copy (a, c)) != MP_OKAY) { + return res; + } + } + + if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) { + if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) { + return res; + } + } + + /* shift by as many digits in the bit count */ + if (b >= (int)DIGIT_BIT) { + if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) { + return res; + } + } + + /* shift any bit count < DIGIT_BIT */ + d = (mp_digit) (b % DIGIT_BIT); + if (d != 0) { + mp_digit *tmpc, shift, mask, r, rr; + int x; + + /* bitmask for carries */ + mask = (((mp_digit)1) << d) - 1; + + /* shift for msbs */ + shift = DIGIT_BIT - d; + + /* alias */ + tmpc = c->dp; + + /* carry */ + r = 0; + for (x = 0; x < c->used; x++) { + /* get the higher bits of the current word */ + rr = (*tmpc >> shift) & mask; + + /* shift the current word and OR in the carry */ + *tmpc = (mp_digit)(((*tmpc << d) | r) & MP_MASK); + ++tmpc; + + /* set the carry to the carry bits of the current word */ + r = rr; + } + + /* set final carry */ + if (r != 0) { + c->dp[(c->used)++] = r; + } + } + mp_clamp (c); + return MP_OKAY; +} + + +/* shift left a certain amount of digits */ +int mp_lshd (mp_int * a, int b) +{ + int x, res; + + /* if its less than zero return */ + if (b <= 0) { + return MP_OKAY; + } + + /* grow to fit the new digits */ + if (a->alloc < a->used + b) { + if ((res = mp_grow (a, a->used + b)) != MP_OKAY) { + return res; + } + } + + { + mp_digit *top, *bottom; + + /* increment the used by the shift amount then copy upwards */ + a->used += b; + + /* top */ + top = a->dp + a->used - 1; + + /* base */ + bottom = a->dp + a->used - 1 - b; + + /* much like mp_rshd this is implemented using a sliding window + * except the window goes the other way around. Copying from + * the bottom to the top. see bn_mp_rshd.c for more info. + */ + for (x = a->used - 1; x >= b; x--) { + *top-- = *bottom--; + } + + /* zero the lower digits */ + top = a->dp; + for (x = 0; x < b; x++) { + *top++ = 0; + } + } + return MP_OKAY; +} + + +/* this is a shell function that calls either the normal or Montgomery + * exptmod functions. Originally the call to the montgomery code was + * embedded in the normal function but that wasted a lot of stack space + * for nothing (since 99% of the time the Montgomery code would be called) + */ +#if defined(FREESCALE_LTC_TFM) +int wolfcrypt_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y) +#else +int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y) +#endif +{ + int dr; + + /* modulus P must be positive */ + if (mp_iszero(P) || P->sign == MP_NEG) { + return MP_VAL; + } + if (mp_isone(P)) { + mp_set(Y, 0); + return MP_OKAY; + } + if (mp_iszero(X)) { + mp_set(Y, 1); + return MP_OKAY; + } + if (mp_iszero(G)) { + mp_set(Y, 0); + return MP_OKAY; + } + + /* if exponent X is negative we have to recurse */ + if (X->sign == MP_NEG) { +#ifdef BN_MP_INVMOD_C + mp_int tmpG, tmpX; + int err; + + /* first compute 1/G mod P */ + if ((err = mp_init(&tmpG)) != MP_OKAY) { + return err; + } + if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) { + mp_clear(&tmpG); + return err; + } + + /* now get |X| */ + if ((err = mp_init(&tmpX)) != MP_OKAY) { + mp_clear(&tmpG); + return err; + } + if ((err = mp_abs(X, &tmpX)) != MP_OKAY) { + mp_clear(&tmpG); + mp_clear(&tmpX); + return err; + } + + /* and now compute (1/G)**|X| instead of G**X [X < 0] */ + err = mp_exptmod(&tmpG, &tmpX, P, Y); + mp_clear(&tmpG); + mp_clear(&tmpX); + return err; +#else + /* no invmod */ + return MP_VAL; +#endif + } + +#ifdef BN_MP_EXPTMOD_BASE_2 + if (G->used == 1 && G->dp[0] == 2) { + return mp_exptmod_base_2(X, P, Y); + } +#endif + +/* modified diminished radix reduction */ +#if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && \ + defined(BN_S_MP_EXPTMOD_C) + if (mp_reduce_is_2k_l(P) == MP_YES) { + return s_mp_exptmod(G, X, P, Y, 1); + } +#endif + +#ifdef BN_MP_DR_IS_MODULUS_C + /* is it a DR modulus? */ + dr = mp_dr_is_modulus(P); +#else + /* default to no */ + dr = 0; +#endif + + (void)dr; + +#ifdef BN_MP_REDUCE_IS_2K_C + /* if not, is it a unrestricted DR modulus? */ + if (dr == 0) { + dr = mp_reduce_is_2k(P) << 1; + } +#endif + + /* if the modulus is odd or dr != 0 use the montgomery method */ +#ifdef BN_MP_EXPTMOD_FAST_C + if (mp_isodd (P) == MP_YES || dr != 0) { + return mp_exptmod_fast (G, X, P, Y, dr); + } else { +#endif +#ifdef BN_S_MP_EXPTMOD_C + /* otherwise use the generic Barrett reduction technique */ + return s_mp_exptmod (G, X, P, Y, 0); +#else + /* no exptmod for evens */ + return MP_VAL; +#endif +#ifdef BN_MP_EXPTMOD_FAST_C + } +#endif +} + +int mp_exptmod_ex (mp_int * G, mp_int * X, int digits, mp_int * P, mp_int * Y) +{ + (void)digits; + return mp_exptmod(G, X, P, Y); +} + +/* b = |a| + * + * Simple function copies the input and fixes the sign to positive + */ +int mp_abs (mp_int * a, mp_int * b) +{ + int res; + + /* copy a to b */ + if (a != b) { + if ((res = mp_copy (a, b)) != MP_OKAY) { + return res; + } + } + + /* force the sign of b to positive */ + b->sign = MP_ZPOS; + + return MP_OKAY; +} + + +/* hac 14.61, pp608 */ +#if defined(FREESCALE_LTC_TFM) +int wolfcrypt_mp_invmod(mp_int * a, mp_int * b, mp_int * c) +#else +int mp_invmod (mp_int * a, mp_int * b, mp_int * c) +#endif +{ + /* b cannot be negative or zero, and can not divide by 0 (1/a mod b) */ + if (b->sign == MP_NEG || mp_iszero(b) == MP_YES || mp_iszero(a) == MP_YES) { + return MP_VAL; + } + +#ifdef BN_FAST_MP_INVMOD_C + /* if the modulus is odd we can use a faster routine instead */ + if ((mp_isodd(b) == MP_YES) && (mp_cmp_d(b, 1) != MP_EQ)) { + return fast_mp_invmod (a, b, c); + } +#endif + +#ifdef BN_MP_INVMOD_SLOW_C + return mp_invmod_slow(a, b, c); +#else + return MP_VAL; +#endif +} + + +/* computes the modular inverse via binary extended euclidean algorithm, + * that is c = 1/a mod b + * + * Based on slow invmod except this is optimized for the case where b is + * odd as per HAC Note 14.64 on pp. 610 + */ +int fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c) +{ + mp_int x, y, u, v, B, D; + int res, neg, loop_check = 0; + + /* 2. [modified] b must be odd */ + if (mp_iseven (b) == MP_YES) { + return MP_VAL; + } + + /* init all our temps */ + if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D)) != MP_OKAY) { + return res; + } + + /* x == modulus, y == value to invert */ + if ((res = mp_copy (b, &x)) != MP_OKAY) { + goto LBL_ERR; + } + + /* we need y = |a| */ + if ((res = mp_mod (a, b, &y)) != MP_OKAY) { + goto LBL_ERR; + } + + /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */ + if ((res = mp_copy (&x, &u)) != MP_OKAY) { + goto LBL_ERR; + } + if ((res = mp_copy (&y, &v)) != MP_OKAY) { + goto LBL_ERR; + } + if ((res = mp_set (&D, 1)) != MP_OKAY) { + goto LBL_ERR; + } + +top: + /* 4. while u is even do */ + while (mp_iseven (&u) == MP_YES) { + /* 4.1 u = u/2 */ + if ((res = mp_div_2 (&u, &u)) != MP_OKAY) { + goto LBL_ERR; + } + /* 4.2 if B is odd then */ + if (mp_isodd (&B) == MP_YES) { + if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) { + goto LBL_ERR; + } + } + /* B = B/2 */ + if ((res = mp_div_2 (&B, &B)) != MP_OKAY) { + goto LBL_ERR; + } + } + + /* 5. while v is even do */ + while (mp_iseven (&v) == MP_YES) { + /* 5.1 v = v/2 */ + if ((res = mp_div_2 (&v, &v)) != MP_OKAY) { + goto LBL_ERR; + } + /* 5.2 if D is odd then */ + if (mp_isodd (&D) == MP_YES) { + /* D = (D-x)/2 */ + if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) { + goto LBL_ERR; + } + } + /* D = D/2 */ + if ((res = mp_div_2 (&D, &D)) != MP_OKAY) { + goto LBL_ERR; + } + } + + /* 6. if u >= v then */ + if (mp_cmp (&u, &v) != MP_LT) { + /* u = u - v, B = B - D */ + if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) { + goto LBL_ERR; + } + + if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) { + goto LBL_ERR; + } + } else { + /* v - v - u, D = D - B */ + if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) { + goto LBL_ERR; + } + + if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) { + goto LBL_ERR; + } + } + + /* if not zero goto step 4 */ + if (mp_iszero (&u) == MP_NO) { + if (++loop_check > MAX_INVMOD_SZ) { + res = MP_VAL; + goto LBL_ERR; + } + goto top; + } + + /* now a = C, b = D, gcd == g*v */ + + /* if v != 1 then there is no inverse */ + if (mp_cmp_d (&v, 1) != MP_EQ) { + res = MP_VAL; + goto LBL_ERR; + } + + /* b is now the inverse */ + neg = a->sign; + while (D.sign == MP_NEG) { + if ((res = mp_add (&D, b, &D)) != MP_OKAY) { + goto LBL_ERR; + } + } + /* too big */ + while (mp_cmp_mag(&D, b) != MP_LT) { + if ((res = mp_sub(&D, b, &D)) != MP_OKAY) { + goto LBL_ERR; + } + } + mp_exch (&D, c); + c->sign = neg; + res = MP_OKAY; + +LBL_ERR:mp_clear(&x); + mp_clear(&y); + mp_clear(&u); + mp_clear(&v); + mp_clear(&B); + mp_clear(&D); + return res; +} + + +/* hac 14.61, pp608 */ +int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c) +{ + mp_int x, y, u, v, A, B, C, D; + int res; + + /* b cannot be negative */ + if (b->sign == MP_NEG || mp_iszero(b) == MP_YES) { + return MP_VAL; + } + + /* init temps */ + if ((res = mp_init_multi(&x, &y, &u, &v, + &A, &B)) != MP_OKAY) { + return res; + } + + /* init rest of tmps temps */ + if ((res = mp_init_multi(&C, &D, 0, 0, 0, 0)) != MP_OKAY) { + mp_clear(&x); + mp_clear(&y); + mp_clear(&u); + mp_clear(&v); + mp_clear(&A); + mp_clear(&B); + return res; + } + + /* x = a, y = b */ + if ((res = mp_mod(a, b, &x)) != MP_OKAY) { + goto LBL_ERR; + } + if (mp_isone(&x)) { + mp_set(c, 1); + res = MP_OKAY; + goto LBL_ERR; + } + if ((res = mp_copy (b, &y)) != MP_OKAY) { + goto LBL_ERR; + } + + /* 2. [modified] if x,y are both even then return an error! */ + if (mp_iseven (&x) == MP_YES && mp_iseven (&y) == MP_YES) { + res = MP_VAL; + goto LBL_ERR; + } + + /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */ + if ((res = mp_copy (&x, &u)) != MP_OKAY) { + goto LBL_ERR; + } + if ((res = mp_copy (&y, &v)) != MP_OKAY) { + goto LBL_ERR; + } + if ((res = mp_set (&A, 1)) != MP_OKAY) { + goto LBL_ERR; + } + if ((res = mp_set (&D, 1)) != MP_OKAY) { + goto LBL_ERR; + } + +top: + /* 4. while u is even do */ + while (mp_iseven (&u) == MP_YES) { + /* 4.1 u = u/2 */ + if ((res = mp_div_2 (&u, &u)) != MP_OKAY) { + goto LBL_ERR; + } + /* 4.2 if A or B is odd then */ + if (mp_isodd (&A) == MP_YES || mp_isodd (&B) == MP_YES) { + /* A = (A+y)/2, B = (B-x)/2 */ + if ((res = mp_add (&A, &y, &A)) != MP_OKAY) { + goto LBL_ERR; + } + if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) { + goto LBL_ERR; + } + } + /* A = A/2, B = B/2 */ + if ((res = mp_div_2 (&A, &A)) != MP_OKAY) { + goto LBL_ERR; + } + if ((res = mp_div_2 (&B, &B)) != MP_OKAY) { + goto LBL_ERR; + } + } + + /* 5. while v is even do */ + while (mp_iseven (&v) == MP_YES) { + /* 5.1 v = v/2 */ + if ((res = mp_div_2 (&v, &v)) != MP_OKAY) { + goto LBL_ERR; + } + /* 5.2 if C or D is odd then */ + if (mp_isodd (&C) == MP_YES || mp_isodd (&D) == MP_YES) { + /* C = (C+y)/2, D = (D-x)/2 */ + if ((res = mp_add (&C, &y, &C)) != MP_OKAY) { + goto LBL_ERR; + } + if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) { + goto LBL_ERR; + } + } + /* C = C/2, D = D/2 */ + if ((res = mp_div_2 (&C, &C)) != MP_OKAY) { + goto LBL_ERR; + } + if ((res = mp_div_2 (&D, &D)) != MP_OKAY) { + goto LBL_ERR; + } + } + + /* 6. if u >= v then */ + if (mp_cmp (&u, &v) != MP_LT) { + /* u = u - v, A = A - C, B = B - D */ + if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) { + goto LBL_ERR; + } + + if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) { + goto LBL_ERR; + } + + if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) { + goto LBL_ERR; + } + } else { + /* v - v - u, C = C - A, D = D - B */ + if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) { + goto LBL_ERR; + } + + if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) { + goto LBL_ERR; + } + + if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) { + goto LBL_ERR; + } + } + + /* if not zero goto step 4 */ + if (mp_iszero (&u) == MP_NO) + goto top; + + /* now a = C, b = D, gcd == g*v */ + + /* if v != 1 then there is no inverse */ + if (mp_cmp_d (&v, 1) != MP_EQ) { + res = MP_VAL; + goto LBL_ERR; + } + + /* if its too low */ + while (mp_cmp_d(&C, 0) == MP_LT) { + if ((res = mp_add(&C, b, &C)) != MP_OKAY) { + goto LBL_ERR; + } + } + + /* too big */ + while (mp_cmp_mag(&C, b) != MP_LT) { + if ((res = mp_sub(&C, b, &C)) != MP_OKAY) { + goto LBL_ERR; + } + } + + /* C is now the inverse */ + mp_exch (&C, c); + res = MP_OKAY; +LBL_ERR:mp_clear(&x); + mp_clear(&y); + mp_clear(&u); + mp_clear(&v); + mp_clear(&A); + mp_clear(&B); + mp_clear(&C); + mp_clear(&D); + return res; +} + + +/* compare magnitude of two ints (unsigned) */ +int mp_cmp_mag (mp_int * a, mp_int * b) +{ + int n; + mp_digit *tmpa, *tmpb; + + /* compare based on # of non-zero digits */ + if (a->used > b->used) { + return MP_GT; + } + + if (a->used < b->used) { + return MP_LT; + } + + /* alias for a */ + tmpa = a->dp + (a->used - 1); + + /* alias for b */ + tmpb = b->dp + (a->used - 1); + + /* compare based on digits */ + for (n = 0; n < a->used; ++n, --tmpa, --tmpb) { + if (*tmpa > *tmpb) { + return MP_GT; + } + + if (*tmpa < *tmpb) { + return MP_LT; + } + } + return MP_EQ; +} + + +/* compare two ints (signed)*/ +int mp_cmp (mp_int * a, mp_int * b) +{ + /* compare based on sign */ + if (a->sign != b->sign) { + if (a->sign == MP_NEG) { + return MP_LT; + } else { + return MP_GT; + } + } + + /* compare digits */ + if (a->sign == MP_NEG) { + /* if negative compare opposite direction */ + return mp_cmp_mag(b, a); + } else { + return mp_cmp_mag(a, b); + } +} + + +/* compare a digit */ +int mp_cmp_d(mp_int * a, mp_digit b) +{ + /* special case for zero*/ + if (a->used == 0 && b == 0) + return MP_EQ; + + /* compare based on sign */ + if ((b && a->used == 0) || a->sign == MP_NEG) { + return MP_LT; + } + + /* compare based on magnitude */ + if (a->used > 1) { + return MP_GT; + } + + /* compare the only digit of a to b */ + if (a->dp[0] > b) { + return MP_GT; + } else if (a->dp[0] < b) { + return MP_LT; + } else { + return MP_EQ; + } +} + + +/* set to a digit */ +int mp_set (mp_int * a, mp_digit b) +{ + int res; + mp_zero (a); + res = mp_grow (a, 1); + if (res == MP_OKAY) { + a->dp[0] = (mp_digit)(b & MP_MASK); + a->used = (a->dp[0] != 0) ? 1 : 0; + } + return res; +} + +/* check if a bit is set */ +int mp_is_bit_set (mp_int *a, mp_digit b) +{ + if ((mp_digit)a->used < b/DIGIT_BIT) + return 0; + + return (int)((a->dp[b/DIGIT_BIT] >> b%DIGIT_BIT) & (mp_digit)1); +} + +/* c = a mod b, 0 <= c < b */ +#if defined(FREESCALE_LTC_TFM) +int wolfcrypt_mp_mod(mp_int * a, mp_int * b, mp_int * c) +#else +int mp_mod (mp_int * a, mp_int * b, mp_int * c) +#endif +{ + mp_int t; + int res; + + if ((res = mp_init_size (&t, b->used)) != MP_OKAY) { + return res; + } + + if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) { + mp_clear (&t); + return res; + } + + if ((mp_iszero(&t) != MP_NO) || (t.sign == b->sign)) { + res = MP_OKAY; + mp_exch (&t, c); + } else { + res = mp_add (b, &t, c); + } + + mp_clear (&t); + return res; +} + + +/* slower bit-bang division... also smaller */ +int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d) +{ + mp_int ta, tb, tq, q; + int res, n, n2; + + /* is divisor zero ? */ + if (mp_iszero (b) == MP_YES) { + return MP_VAL; + } + + /* if a < b then q=0, r = a */ + if (mp_cmp_mag (a, b) == MP_LT) { + if (d != NULL) { + res = mp_copy (a, d); + } else { + res = MP_OKAY; + } + if (c != NULL) { + mp_zero (c); + } + return res; + } + + /* init our temps */ + if ((res = mp_init_multi(&ta, &tb, &tq, &q, 0, 0)) != MP_OKAY) { + return res; + } + + if ((res = mp_set(&tq, 1)) != MP_OKAY) { + return res; + } + n = mp_count_bits(a) - mp_count_bits(b); + if (((res = mp_abs(a, &ta)) != MP_OKAY) || + ((res = mp_abs(b, &tb)) != MP_OKAY) || + ((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) || + ((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) { + goto LBL_ERR; + } + + while (n-- >= 0) { + if (mp_cmp(&tb, &ta) != MP_GT) { + if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) || + ((res = mp_add(&q, &tq, &q)) != MP_OKAY)) { + goto LBL_ERR; + } + } + if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) || + ((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) { + goto LBL_ERR; + } + } + + /* now q == quotient and ta == remainder */ + n = a->sign; + n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG); + if (c != NULL) { + mp_exch(c, &q); + c->sign = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2; + } + if (d != NULL) { + mp_exch(d, &ta); + d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n; + } +LBL_ERR: + mp_clear(&ta); + mp_clear(&tb); + mp_clear(&tq); + mp_clear(&q); + return res; +} + + +/* b = a/2 */ +int mp_div_2(mp_int * a, mp_int * b) +{ + int x, res, oldused; + + /* copy */ + if (b->alloc < a->used) { + if ((res = mp_grow (b, a->used)) != MP_OKAY) { + return res; + } + } + + oldused = b->used; + b->used = a->used; + { + mp_digit r, rr, *tmpa, *tmpb; + + /* source alias */ + tmpa = a->dp + b->used - 1; + + /* dest alias */ + tmpb = b->dp + b->used - 1; + + /* carry */ + r = 0; + for (x = b->used - 1; x >= 0; x--) { + /* get the carry for the next iteration */ + rr = *tmpa & 1; + + /* shift the current digit, add in carry and store */ + *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1)); + + /* forward carry to next iteration */ + r = rr; + } + + /* zero excess digits */ + tmpb = b->dp + b->used; + for (x = b->used; x < oldused; x++) { + *tmpb++ = 0; + } + } + b->sign = a->sign; + mp_clamp (b); + return MP_OKAY; +} + + +/* high level addition (handles signs) */ +int mp_add (mp_int * a, mp_int * b, mp_int * c) +{ + int sa, sb, res; + + /* get sign of both inputs */ + sa = a->sign; + sb = b->sign; + + /* handle two cases, not four */ + if (sa == sb) { + /* both positive or both negative */ + /* add their magnitudes, copy the sign */ + c->sign = sa; + res = s_mp_add (a, b, c); + } else { + /* one positive, the other negative */ + /* subtract the one with the greater magnitude from */ + /* the one of the lesser magnitude. The result gets */ + /* the sign of the one with the greater magnitude. */ + if (mp_cmp_mag (a, b) == MP_LT) { + c->sign = sb; + res = s_mp_sub (b, a, c); + } else { + c->sign = sa; + res = s_mp_sub (a, b, c); + } + } + return res; +} + + +/* low level addition, based on HAC pp.594, Algorithm 14.7 */ +int s_mp_add (mp_int * a, mp_int * b, mp_int * c) +{ + mp_int *x; + int olduse, res, min_ab, max_ab; + + /* find sizes, we let |a| <= |b| which means we have to sort + * them. "x" will point to the input with the most digits + */ + if (a->used > b->used) { + min_ab = b->used; + max_ab = a->used; + x = a; + } else { + min_ab = a->used; + max_ab = b->used; + x = b; + } + + /* init result */ + if (c->alloc < max_ab + 1) { + if ((res = mp_grow (c, max_ab + 1)) != MP_OKAY) { + return res; + } + } + + /* get old used digit count and set new one */ + olduse = c->used; + c->used = max_ab + 1; + + { + mp_digit u, *tmpa, *tmpb, *tmpc; + int i; + + /* alias for digit pointers */ + + /* first input */ + tmpa = a->dp; + + /* second input */ + tmpb = b->dp; + + /* destination */ + tmpc = c->dp; + + /* zero the carry */ + u = 0; + for (i = 0; i < min_ab; i++) { + /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */ + *tmpc = *tmpa++ + *tmpb++ + u; + + /* U = carry bit of T[i] */ + u = *tmpc >> ((mp_digit)DIGIT_BIT); + + /* take away carry bit from T[i] */ + *tmpc++ &= MP_MASK; + } + + /* now copy higher words if any, that is in A+B + * if A or B has more digits add those in + */ + if (min_ab != max_ab) { + for (; i < max_ab; i++) { + /* T[i] = X[i] + U */ + *tmpc = x->dp[i] + u; + + /* U = carry bit of T[i] */ + u = *tmpc >> ((mp_digit)DIGIT_BIT); + + /* take away carry bit from T[i] */ + *tmpc++ &= MP_MASK; + } + } + + /* add carry */ + *tmpc++ = u; + + /* clear digits above olduse */ + for (i = c->used; i < olduse; i++) { + *tmpc++ = 0; + } + } + + mp_clamp (c); + return MP_OKAY; +} + + +/* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */ +int s_mp_sub (mp_int * a, mp_int * b, mp_int * c) +{ + int olduse, res, min_b, max_a; + + /* find sizes */ + min_b = b->used; + max_a = a->used; + + /* init result */ + if (c->alloc < max_a) { + if ((res = mp_grow (c, max_a)) != MP_OKAY) { + return res; + } + } + + /* sanity check on destination */ + if (c->dp == NULL) + return MP_VAL; + + olduse = c->used; + c->used = max_a; + + { + mp_digit u, *tmpa, *tmpb, *tmpc; + int i; + + /* alias for digit pointers */ + tmpa = a->dp; + tmpb = b->dp; + tmpc = c->dp; + + /* set carry to zero */ + u = 0; + for (i = 0; i < min_b; i++) { + /* T[i] = A[i] - B[i] - U */ + *tmpc = *tmpa++ - *tmpb++ - u; + + /* U = carry bit of T[i] + * Note this saves performing an AND operation since + * if a carry does occur it will propagate all the way to the + * MSB. As a result a single shift is enough to get the carry + */ + u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1)); + + /* Clear carry from T[i] */ + *tmpc++ &= MP_MASK; + } + + /* now copy higher words if any, e.g. if A has more digits than B */ + for (; i < max_a; i++) { + /* T[i] = A[i] - U */ + *tmpc = *tmpa++ - u; + + /* U = carry bit of T[i] */ + u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1)); + + /* Clear carry from T[i] */ + *tmpc++ &= MP_MASK; + } + + /* clear digits above used (since we may not have grown result above) */ + for (i = c->used; i < olduse; i++) { + *tmpc++ = 0; + } + } + + mp_clamp (c); + return MP_OKAY; +} + + +/* high level subtraction (handles signs) */ +int mp_sub (mp_int * a, mp_int * b, mp_int * c) +{ + int sa, sb, res; + + sa = a->sign; + sb = b->sign; + + if (sa != sb) { + /* subtract a negative from a positive, OR */ + /* subtract a positive from a negative. */ + /* In either case, ADD their magnitudes, */ + /* and use the sign of the first number. */ + c->sign = sa; + res = s_mp_add (a, b, c); + } else { + /* subtract a positive from a positive, OR */ + /* subtract a negative from a negative. */ + /* First, take the difference between their */ + /* magnitudes, then... */ + if (mp_cmp_mag (a, b) != MP_LT) { + /* Copy the sign from the first */ + c->sign = sa; + /* The first has a larger or equal magnitude */ + res = s_mp_sub (a, b, c); + } else { + /* The result has the *opposite* sign from */ + /* the first number. */ + c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS; + /* The second has a larger magnitude */ + res = s_mp_sub (b, a, c); + } + } + return res; +} + + +/* determines if reduce_2k_l can be used */ +int mp_reduce_is_2k_l(mp_int *a) +{ + int ix, iy; + + if (a->used == 0) { + return MP_NO; + } else if (a->used == 1) { + return MP_YES; + } else if (a->used > 1) { + /* if more than half of the digits are -1 we're sold */ + for (iy = ix = 0; ix < a->used; ix++) { + if (a->dp[ix] == MP_MASK) { + ++iy; + } + } + return (iy >= (a->used/2)) ? MP_YES : MP_NO; + + } + return MP_NO; +} + + +/* determines if mp_reduce_2k can be used */ +int mp_reduce_is_2k(mp_int *a) +{ + int ix, iy, iw; + mp_digit iz; + + if (a->used == 0) { + return MP_NO; + } else if (a->used == 1) { + return MP_YES; + } else if (a->used > 1) { + iy = mp_count_bits(a); + iz = 1; + iw = 1; + + /* Test every bit from the second digit up, must be 1 */ + for (ix = DIGIT_BIT; ix < iy; ix++) { + if ((a->dp[iw] & iz) == 0) { + return MP_NO; + } + iz <<= 1; + if (iz > (mp_digit)MP_MASK) { + ++iw; + iz = 1; + } + } + } + return MP_YES; +} + + +/* determines if a number is a valid DR modulus */ +int mp_dr_is_modulus(mp_int *a) +{ + int ix; + + /* must be at least two digits */ + if (a->used < 2) { + return 0; + } + + /* must be of the form b**k - a [a <= b] so all + * but the first digit must be equal to -1 (mod b). + */ + for (ix = 1; ix < a->used; ix++) { + if (a->dp[ix] != MP_MASK) { + return 0; + } + } + return 1; +} + + +/* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85 + * + * Uses a left-to-right k-ary sliding window to compute the modular + * exponentiation. + * The value of k changes based on the size of the exponent. + * + * Uses Montgomery or Diminished Radix reduction [whichever appropriate] + */ + +#ifdef MP_LOW_MEM + #define TAB_SIZE 32 +#else + #define TAB_SIZE 256 +#endif + +int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, + int redmode) +{ + mp_int res; + mp_digit buf, mp; + int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; +#ifdef WOLFSSL_SMALL_STACK + mp_int* M; +#else + mp_int M[TAB_SIZE]; +#endif + /* use a pointer to the reduction algorithm. This allows us to use + * one of many reduction algorithms without modding the guts of + * the code with if statements everywhere. + */ + int (*redux)(mp_int*,mp_int*,mp_digit) = NULL; + +#ifdef WOLFSSL_SMALL_STACK + M = (mp_int*) XMALLOC(sizeof(mp_int) * TAB_SIZE, NULL, + DYNAMIC_TYPE_BIGINT); + if (M == NULL) + return MP_MEM; +#endif + + /* find window size */ + x = mp_count_bits (X); + if (x <= 7) { + winsize = 2; + } else if (x <= 36) { + winsize = 3; + } else if (x <= 140) { + winsize = 4; + } else if (x <= 450) { + winsize = 5; + } else if (x <= 1303) { + winsize = 6; + } else if (x <= 3529) { + winsize = 7; + } else { + winsize = 8; + } + +#ifdef MP_LOW_MEM + if (winsize > 5) { + winsize = 5; + } +#endif + + /* init M array */ + /* init first cell */ + if ((err = mp_init_size(&M[1], P->alloc)) != MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(M, NULL, DYNAMIC_TYPE_BIGINT); +#endif + + return err; + } + + /* now init the second half of the array */ + for (x = 1<<(winsize-1); x < (1 << winsize); x++) { + if ((err = mp_init_size(&M[x], P->alloc)) != MP_OKAY) { + for (y = 1<<(winsize-1); y < x; y++) { + mp_clear (&M[y]); + } + mp_clear(&M[1]); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(M, NULL, DYNAMIC_TYPE_BIGINT); +#endif + + return err; + } + } + + /* determine and setup reduction code */ + if (redmode == 0) { +#ifdef BN_MP_MONTGOMERY_SETUP_C + /* now setup montgomery */ + if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) { + goto LBL_M; + } +#else + err = MP_VAL; + goto LBL_M; +#endif + + /* automatically pick the comba one if available (saves quite a few + calls/ifs) */ +#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C + if (((P->used * 2 + 1) < (int)MP_WARRAY) && + P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { + redux = fast_mp_montgomery_reduce; + } else +#endif + { +#ifdef BN_MP_MONTGOMERY_REDUCE_C + /* use slower baseline Montgomery method */ + redux = mp_montgomery_reduce; +#endif + } + } else if (redmode == 1) { +#if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C) + /* setup DR reduction for moduli of the form B**k - b */ + mp_dr_setup(P, &mp); + redux = mp_dr_reduce; +#endif + } else { +#if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C) + /* setup DR reduction for moduli of the form 2**k - b */ + if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) { + goto LBL_M; + } + redux = mp_reduce_2k; +#endif + } + + if (redux == NULL) { + err = MP_VAL; + goto LBL_M; + } + + /* setup result */ + if ((err = mp_init_size (&res, P->alloc)) != MP_OKAY) { + goto LBL_M; + } + + /* create M table + * + + * + * The first half of the table is not computed though accept for M[0] and M[1] + */ + + if (redmode == 0) { +#ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C + /* now we need R mod m */ + if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) { + goto LBL_RES; + } + + /* now set M[1] to G * R mod m */ + if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) { + goto LBL_RES; + } +#else + err = MP_VAL; + goto LBL_RES; +#endif + } else { + if ((err = mp_set(&res, 1)) != MP_OKAY) { + goto LBL_RES; + } + if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) { + goto LBL_RES; + } + } + + /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times*/ + if ((err = mp_copy (&M[1], &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) { + goto LBL_RES; + } + + for (x = 0; x < (winsize - 1); x++) { + if ((err = mp_sqr (&M[(mp_digit)(1 << (winsize - 1))], + &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) { + goto LBL_RES; + } + if ((err = redux (&M[(mp_digit)(1 << (winsize - 1))], P, mp)) != MP_OKAY) { + goto LBL_RES; + } + } + + /* create upper table */ + for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { + if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) { + goto LBL_RES; + } + if ((err = redux (&M[x], P, mp)) != MP_OKAY) { + goto LBL_RES; + } + } + + /* set initial mode and bit cnt */ + mode = 0; + bitcnt = 1; + buf = 0; + digidx = X->used - 1; + bitcpy = 0; + bitbuf = 0; + + for (;;) { + /* grab next digit as required */ + if (--bitcnt == 0) { + /* if digidx == -1 we are out of digits so break */ + if (digidx == -1) { + break; + } + /* read next digit and reset bitcnt */ + buf = X->dp[digidx--]; + bitcnt = (int)DIGIT_BIT; + } + + /* grab the next msb from the exponent */ + y = (int)(buf >> (DIGIT_BIT - 1)) & 1; + buf <<= (mp_digit)1; + + /* if the bit is zero and mode == 0 then we ignore it + * These represent the leading zero bits before the first 1 bit + * in the exponent. Technically this opt is not required but it + * does lower the # of trivial squaring/reductions used + */ + if (mode == 0 && y == 0) { + continue; + } + + /* if the bit is zero and mode == 1 then we square */ + if (mode == 1 && y == 0) { + if ((err = mp_sqr (&res, &res)) != MP_OKAY) { + goto LBL_RES; + } + if ((err = redux (&res, P, mp)) != MP_OKAY) { + goto LBL_RES; + } + continue; + } + + /* else we add it to the window */ + bitbuf |= (y << (winsize - ++bitcpy)); + mode = 2; + + if (bitcpy == winsize) { + /* ok window is filled so square as required and multiply */ + /* square first */ + for (x = 0; x < winsize; x++) { + if ((err = mp_sqr (&res, &res)) != MP_OKAY) { + goto LBL_RES; + } + if ((err = redux (&res, P, mp)) != MP_OKAY) { + goto LBL_RES; + } + } + + /* then multiply */ + if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) { + goto LBL_RES; + } + if ((err = redux (&res, P, mp)) != MP_OKAY) { + goto LBL_RES; + } + + /* empty window and reset */ + bitcpy = 0; + bitbuf = 0; + mode = 1; + } + } + + /* if bits remain then square/multiply */ + if (mode == 2 && bitcpy > 0) { + /* square then multiply if the bit is set */ + for (x = 0; x < bitcpy; x++) { + if ((err = mp_sqr (&res, &res)) != MP_OKAY) { + goto LBL_RES; + } + if ((err = redux (&res, P, mp)) != MP_OKAY) { + goto LBL_RES; + } + + /* get next bit of the window */ + bitbuf <<= 1; + if ((bitbuf & (1 << winsize)) != 0) { + /* then multiply */ + if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) { + goto LBL_RES; + } + if ((err = redux (&res, P, mp)) != MP_OKAY) { + goto LBL_RES; + } + } + } + } + + if (redmode == 0) { + /* fixup result if Montgomery reduction is used + * recall that any value in a Montgomery system is + * actually multiplied by R mod n. So we have + * to reduce one more time to cancel out the factor + * of R. + */ + if ((err = redux(&res, P, mp)) != MP_OKAY) { + goto LBL_RES; + } + } + + /* swap res with Y */ + mp_exch (&res, Y); + err = MP_OKAY; +LBL_RES:mp_clear (&res); +LBL_M: + mp_clear(&M[1]); + for (x = 1<<(winsize-1); x < (1 << winsize); x++) { + mp_clear (&M[x]); + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(M, NULL, DYNAMIC_TYPE_BIGINT); +#endif + + return err; +} + +#ifdef BN_MP_EXPTMOD_BASE_2 +#if DIGIT_BIT < 16 + #define WINSIZE 3 +#elif DIGIT_BIT < 32 + #define WINSIZE 4 +#elif DIGIT_BIT < 64 + #define WINSIZE 5 +#elif DIGIT_BIT < 128 + #define WINSIZE 6 +#endif +int mp_exptmod_base_2(mp_int * X, mp_int * P, mp_int * Y) +{ + mp_digit buf, mp; + int err = MP_OKAY, bitbuf, bitcpy, bitcnt, digidx, x, y; +#ifdef WOLFSSL_SMALL_STACK + mp_int *res = NULL; +#else + mp_int res[1]; +#endif + int (*redux)(mp_int*,mp_int*,mp_digit) = NULL; + + /* automatically pick the comba one if available (saves quite a few + calls/ifs) */ +#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C + if (((P->used * 2 + 1) < (int)MP_WARRAY) && + P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { + redux = fast_mp_montgomery_reduce; + } else +#endif + { +#ifdef BN_MP_MONTGOMERY_REDUCE_C + /* use slower baseline Montgomery method */ + redux = mp_montgomery_reduce; +#else + return MP_VAL; +#endif + } + +#ifdef WOLFSSL_SMALL_STACK + res = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (res == NULL) { + return MP_MEM; + } +#endif + + /* now setup montgomery */ + if ((err = mp_montgomery_setup(P, &mp)) != MP_OKAY) { + goto LBL_M; + } + + /* setup result */ + if ((err = mp_init(res)) != MP_OKAY) { + goto LBL_M; + } + + /* now we need R mod m */ + if ((err = mp_montgomery_calc_normalization(res, P)) != MP_OKAY) { + goto LBL_RES; + } + + /* Get the top bits left over after taking WINSIZE bits starting at the + * least-significant. + */ + digidx = X->used - 1; + bitcpy = (X->used * DIGIT_BIT) % WINSIZE; + if (bitcpy > 0) { + bitcnt = (int)DIGIT_BIT - bitcpy; + buf = X->dp[digidx--]; + bitbuf = (int)(buf >> bitcnt); + /* Multiply montgomery representation of 1 by 2 ^ top */ + err = mp_mul_2d(res, bitbuf, res); + if (err != MP_OKAY) { + goto LBL_RES; + } + err = mp_mod(res, P, res); + if (err != MP_OKAY) { + goto LBL_RES; + } + /* Move out bits used */ + buf <<= bitcpy; + bitcnt++; + } + else { + bitcnt = 1; + buf = 0; + } + + /* empty window and reset */ + bitbuf = 0; + bitcpy = 0; + + for (;;) { + /* grab next digit as required */ + if (--bitcnt == 0) { + /* if digidx == -1 we are out of digits so break */ + if (digidx == -1) { + break; + } + /* read next digit and reset bitcnt */ + buf = X->dp[digidx--]; + bitcnt = (int)DIGIT_BIT; + } + + /* grab the next msb from the exponent */ + y = (int)(buf >> (DIGIT_BIT - 1)) & 1; + buf <<= (mp_digit)1; + /* add bit to the window */ + bitbuf |= (y << (WINSIZE - ++bitcpy)); + + if (bitcpy == WINSIZE) { + /* ok window is filled so square as required and multiply */ + /* square first */ + for (x = 0; x < WINSIZE; x++) { + err = mp_sqr(res, res); + if (err != MP_OKAY) { + goto LBL_RES; + } + err = (*redux)(res, P, mp); + if (err != MP_OKAY) { + goto LBL_RES; + } + } + + /* then multiply by 2^bitbuf */ + err = mp_mul_2d(res, bitbuf, res); + if (err != MP_OKAY) { + goto LBL_RES; + } + err = mp_mod(res, P, res); + if (err != MP_OKAY) { + goto LBL_RES; + } + + /* empty window and reset */ + bitcpy = 0; + bitbuf = 0; + } + } + + /* fixup result if Montgomery reduction is used + * recall that any value in a Montgomery system is + * actually multiplied by R mod n. So we have + * to reduce one more time to cancel out the factor + * of R. + */ + err = (*redux)(res, P, mp); + if (err != MP_OKAY) { + goto LBL_RES; + } + + /* swap res with Y */ + mp_copy(res, Y); + +LBL_RES:mp_clear (res); +LBL_M: +#ifdef WOLFSSL_SMALL_STACK + XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return err; +} + +#undef WINSIZE +#endif /* BN_MP_EXPTMOD_BASE_2 */ + + +/* setups the montgomery reduction stuff */ +int mp_montgomery_setup (mp_int * n, mp_digit * rho) +{ + mp_digit x, b; + +/* fast inversion mod 2**k + * + * Based on the fact that + * + * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n) + * => 2*X*A - X*X*A*A = 1 + * => 2*(1) - (1) = 1 + */ + b = n->dp[0]; + + if ((b & 1) == 0) { + return MP_VAL; + } + + x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - b * x; /* here x*a==1 mod 2**8 */ +#if !defined(MP_8BIT) + x *= 2 - b * x; /* here x*a==1 mod 2**16 */ +#endif +#if defined(MP_64BIT) || !(defined(MP_8BIT) || defined(MP_16BIT)) + x *= 2 - b * x; /* here x*a==1 mod 2**32 */ +#endif +#ifdef MP_64BIT + x *= 2 - b * x; /* here x*a==1 mod 2**64 */ +#endif + + /* rho = -1/m mod b */ + /* TAO, switched mp_word casts to mp_digit to shut up compiler */ + *rho = (mp_digit)((((mp_digit)1 << ((mp_digit) DIGIT_BIT)) - x) & MP_MASK); + + return MP_OKAY; +} + + +/* computes xR**-1 == x (mod N) via Montgomery Reduction + * + * This is an optimized implementation of montgomery_reduce + * which uses the comba method to quickly calculate the columns of the + * reduction. + * + * Based on Algorithm 14.32 on pp.601 of HAC. +*/ +int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) +{ + int ix, res, olduse; +#ifdef WOLFSSL_SMALL_STACK + mp_word* W; /* uses dynamic memory and slower */ +#else + mp_word W[MP_WARRAY]; +#endif + + /* get old used count */ + olduse = x->used; + + /* grow a as required */ + if (x->alloc < n->used + 1) { + if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) { + return res; + } + } + +#ifdef WOLFSSL_SMALL_STACK + W = (mp_word*)XMALLOC(sizeof(mp_word) * MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT); + if (W == NULL) + return MP_MEM; +#endif + + /* first we have to get the digits of the input into + * an array of double precision words W[...] + */ + { + mp_word *_W; + mp_digit *tmpx; + + /* alias for the W[] array */ + _W = W; + + /* alias for the digits of x*/ + tmpx = x->dp; + + /* copy the digits of a into W[0..a->used-1] */ + for (ix = 0; ix < x->used; ix++) { + *_W++ = *tmpx++; + } + + /* zero the high words of W[a->used..m->used*2] */ + for (; ix < n->used * 2 + 1; ix++) { + *_W++ = 0; + } + } + + /* now we proceed to zero successive digits + * from the least significant upwards + */ + for (ix = 0; ix < n->used; ix++) { + /* mu = ai * m' mod b + * + * We avoid a double precision multiplication (which isn't required) + * by casting the value down to a mp_digit. Note this requires + * that W[ix-1] have the carry cleared (see after the inner loop) + */ + mp_digit mu; + mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK); + + /* a = a + mu * m * b**i + * + * This is computed in place and on the fly. The multiplication + * by b**i is handled by offsetting which columns the results + * are added to. + * + * Note the comba method normally doesn't handle carries in the + * inner loop In this case we fix the carry from the previous + * column since the Montgomery reduction requires digits of the + * result (so far) [see above] to work. This is + * handled by fixing up one carry after the inner loop. The + * carry fixups are done in order so after these loops the + * first m->used words of W[] have the carries fixed + */ + { + int iy; + mp_digit *tmpn; + mp_word *_W; + + /* alias for the digits of the modulus */ + tmpn = n->dp; + + /* Alias for the columns set by an offset of ix */ + _W = W + ix; + + /* inner loop */ + for (iy = 0; iy < n->used; iy++) { + *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++); + } + } + + /* now fix carry for next digit, W[ix+1] */ + W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT); + } + + /* now we have to propagate the carries and + * shift the words downward [all those least + * significant digits we zeroed]. + */ + { + mp_digit *tmpx; + mp_word *_W, *_W1; + + /* nox fix rest of carries */ + + /* alias for current word */ + _W1 = W + ix; + + /* alias for next word, where the carry goes */ + _W = W + ++ix; + + for (; ix <= n->used * 2 + 1; ix++) { + *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT); + } + + /* copy out, A = A/b**n + * + * The result is A/b**n but instead of converting from an + * array of mp_word to mp_digit than calling mp_rshd + * we just copy them in the right order + */ + + /* alias for destination word */ + tmpx = x->dp; + + /* alias for shifted double precision result */ + _W = W + n->used; + + for (ix = 0; ix < n->used + 1; ix++) { + *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK)); + } + + /* zero olduse digits, if the input a was larger than + * m->used+1 we'll have to clear the digits + */ + for (; ix < olduse; ix++) { + *tmpx++ = 0; + } + } + + /* set the max used and clamp */ + x->used = n->used + 1; + mp_clamp (x); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(W, NULL, DYNAMIC_TYPE_BIGINT); +#endif + + /* if A >= m then A = A - m */ + if (mp_cmp_mag (x, n) != MP_LT) { + return s_mp_sub (x, n, x); + } + return MP_OKAY; +} + + +/* computes xR**-1 == x (mod N) via Montgomery Reduction */ +int mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) +{ + int ix, res, digs; + mp_digit mu; + + /* can the fast reduction [comba] method be used? + * + * Note that unlike in mul you're safely allowed *less* + * than the available columns [255 per default] since carries + * are fixed up in the inner loop. + */ + digs = n->used * 2 + 1; + if ((digs < (int)MP_WARRAY) && + n->used < + (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { + return fast_mp_montgomery_reduce (x, n, rho); + } + + /* grow the input as required */ + if (x->alloc < digs) { + if ((res = mp_grow (x, digs)) != MP_OKAY) { + return res; + } + } + x->used = digs; + + for (ix = 0; ix < n->used; ix++) { + /* mu = ai * rho mod b + * + * The value of rho must be precalculated via + * montgomery_setup() such that + * it equals -1/n0 mod b this allows the + * following inner loop to reduce the + * input one digit at a time + */ + mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK); + + /* a = a + mu * m * b**i */ + { + int iy; + mp_digit *tmpn, *tmpx, u; + mp_word r; + + /* alias for digits of the modulus */ + tmpn = n->dp; + + /* alias for the digits of x [the input] */ + tmpx = x->dp + ix; + + /* set the carry to zero */ + u = 0; + + /* Multiply and add in place */ + for (iy = 0; iy < n->used; iy++) { + /* compute product and sum */ + r = ((mp_word)mu) * ((mp_word)*tmpn++) + + ((mp_word) u) + ((mp_word) * tmpx); + + /* get carry */ + u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); + + /* fix digit */ + *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK)); + } + /* At this point the ix'th digit of x should be zero */ + + + /* propagate carries upwards as required*/ + while (u) { + *tmpx += u; + u = *tmpx >> DIGIT_BIT; + *tmpx++ &= MP_MASK; + } + } + } + + /* at this point the n.used'th least + * significant digits of x are all zero + * which means we can shift x to the + * right by n.used digits and the + * residue is unchanged. + */ + + /* x = x/b**n.used */ + mp_clamp(x); + mp_rshd (x, n->used); + + /* if x >= n then x = x - n */ + if (mp_cmp_mag (x, n) != MP_LT) { + return s_mp_sub (x, n, x); + } + + return MP_OKAY; +} + + +/* determines the setup value */ +void mp_dr_setup(mp_int *a, mp_digit *d) +{ + /* the casts are required if DIGIT_BIT is one less than + * the number of bits in a mp_digit [e.g. DIGIT_BIT==31] + */ + *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) - + ((mp_word)a->dp[0])); +} + + +/* reduce "x" in place modulo "n" using the Diminished Radix algorithm. + * + * Based on algorithm from the paper + * + * "Generating Efficient Primes for Discrete Log Cryptosystems" + * Chae Hoon Lim, Pil Joong Lee, + * POSTECH Information Research Laboratories + * + * The modulus must be of a special format [see manual] + * + * Has been modified to use algorithm 7.10 from the LTM book instead + * + * Input x must be in the range 0 <= x <= (n-1)**2 + */ +int mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k) +{ + int err, i, m; + mp_word r; + mp_digit mu, *tmpx1, *tmpx2; + + /* m = digits in modulus */ + m = n->used; + + /* ensure that "x" has at least 2m digits */ + if (x->alloc < m + m) { + if ((err = mp_grow (x, m + m)) != MP_OKAY) { + return err; + } + } + +/* top of loop, this is where the code resumes if + * another reduction pass is required. + */ +top: + /* aliases for digits */ + /* alias for lower half of x */ + tmpx1 = x->dp; + + /* alias for upper half of x, or x/B**m */ + tmpx2 = x->dp + m; + + /* set carry to zero */ + mu = 0; + + /* compute (x mod B**m) + k * [x/B**m] inline and inplace */ + for (i = 0; i < m; i++) { + r = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu; + *tmpx1++ = (mp_digit)(r & MP_MASK); + mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT)); + } + + /* set final carry */ + *tmpx1++ = mu; + + /* zero words above m */ + for (i = m + 1; i < x->used; i++) { + *tmpx1++ = 0; + } + + /* clamp, sub and return */ + mp_clamp (x); + + /* if x >= n then subtract and reduce again + * Each successive "recursion" makes the input smaller and smaller. + */ + if (mp_cmp_mag (x, n) != MP_LT) { + if ((err = s_mp_sub(x, n, x)) != MP_OKAY) { + return err; + } + goto top; + } + return MP_OKAY; +} + + +/* reduces a modulo n where n is of the form 2**p - d */ +int mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d) +{ + mp_int q; + int p, res; + + if ((res = mp_init(&q)) != MP_OKAY) { + return res; + } + + p = mp_count_bits(n); +top: + /* q = a/2**p, a = a mod 2**p */ + if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) { + goto ERR; + } + + if (d != 1) { + /* q = q * d */ + if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) { + goto ERR; + } + } + + /* a = a + q */ + if ((res = s_mp_add(a, &q, a)) != MP_OKAY) { + goto ERR; + } + + if (mp_cmp_mag(a, n) != MP_LT) { + if ((res = s_mp_sub(a, n, a)) != MP_OKAY) { + goto ERR; + } + goto top; + } + +ERR: + mp_clear(&q); + return res; +} + + +/* determines the setup value */ +int mp_reduce_2k_setup(mp_int *a, mp_digit *d) +{ + int res, p; + mp_int tmp; + + if ((res = mp_init(&tmp)) != MP_OKAY) { + return res; + } + + p = mp_count_bits(a); + if ((res = mp_2expt(&tmp, p)) != MP_OKAY) { + mp_clear(&tmp); + return res; + } + + if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) { + mp_clear(&tmp); + return res; + } + + *d = tmp.dp[0]; + mp_clear(&tmp); + return MP_OKAY; +} + + +/* set the b bit of a */ +int mp_set_bit (mp_int * a, int b) +{ + int i = b / DIGIT_BIT, res; + + /* + * Require: + * bit index b >= 0 + * a->alloc == a->used == 0 if a->dp == NULL + */ + if (b < 0 || (a->dp == NULL && (a->alloc != 0 || a->used != 0))) + return MP_VAL; + + if (a->dp == NULL || a->used < (int)(i + 1)) { + /* grow a to accommodate the single bit */ + if ((res = mp_grow (a, i + 1)) != MP_OKAY) { + return res; + } + + /* set the used count of where the bit will go */ + a->used = (int)(i + 1); + } + + /* put the single bit in its place */ + a->dp[i] |= ((mp_digit)1) << (b % DIGIT_BIT); + + return MP_OKAY; +} + +/* computes a = 2**b + * + * Simple algorithm which zeros the int, set the required bit + */ +int mp_2expt (mp_int * a, int b) +{ + /* zero a as per default */ + mp_zero (a); + + return mp_set_bit(a, b); +} + +/* multiply by a digit */ +int mp_mul_d (mp_int * a, mp_digit b, mp_int * c) +{ + mp_digit u, *tmpa, *tmpc; + mp_word r; + int ix, res, olduse; + + /* make sure c is big enough to hold a*b */ + if (c->alloc < a->used + 1) { + if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) { + return res; + } + } + + /* get the original destinations used count */ + olduse = c->used; + + /* set the sign */ + c->sign = a->sign; + + /* alias for a->dp [source] */ + tmpa = a->dp; + + /* alias for c->dp [dest] */ + tmpc = c->dp; + + /* zero carry */ + u = 0; + + /* compute columns */ + for (ix = 0; ix < a->used; ix++) { + /* compute product and carry sum for this term */ + r = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b); + + /* mask off higher bits to get a single digit */ + *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK)); + + /* send carry into next iteration */ + u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); + } + + /* store final carry [if any] and increment ix offset */ + *tmpc++ = u; + ++ix; + + /* now zero digits above the top */ + while (ix++ < olduse) { + *tmpc++ = 0; + } + + /* set used count */ + c->used = a->used + 1; + mp_clamp(c); + + return MP_OKAY; +} + + +/* d = a * b (mod c) */ +#if defined(FREESCALE_LTC_TFM) +int wolfcrypt_mp_mulmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d) +#else +int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d) +#endif +{ + int res; + mp_int t; + + if ((res = mp_init_size (&t, c->used)) != MP_OKAY) { + return res; + } + + res = mp_mul (a, b, &t); + if (res == MP_OKAY) { + res = mp_mod (&t, c, d); + } + + mp_clear (&t); + return res; +} + + +/* d = a - b (mod c) */ +int mp_submod(mp_int* a, mp_int* b, mp_int* c, mp_int* d) +{ + int res; + mp_int t; + + if ((res = mp_init (&t)) != MP_OKAY) { + return res; + } + + res = mp_sub (a, b, &t); + if (res == MP_OKAY) { + res = mp_mod (&t, c, d); + } + + mp_clear (&t); + + return res; +} + +/* d = a + b (mod c) */ +int mp_addmod(mp_int* a, mp_int* b, mp_int* c, mp_int* d) +{ + int res; + mp_int t; + + if ((res = mp_init (&t)) != MP_OKAY) { + return res; + } + + res = mp_add (a, b, &t); + if (res == MP_OKAY) { + res = mp_mod (&t, c, d); + } + + mp_clear (&t); + + return res; +} + +/* computes b = a*a */ +int mp_sqr (mp_int * a, mp_int * b) +{ + int res; + + { +#ifdef BN_FAST_S_MP_SQR_C + /* can we use the fast comba multiplier? */ + if ((a->used * 2 + 1) < (int)MP_WARRAY && + a->used < + (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) { + res = fast_s_mp_sqr (a, b); + } else +#endif +#ifdef BN_S_MP_SQR_C + res = s_mp_sqr (a, b); +#else + res = MP_VAL; +#endif + } + b->sign = MP_ZPOS; + return res; +} + + +/* high level multiplication (handles sign) */ +#if defined(FREESCALE_LTC_TFM) +int wolfcrypt_mp_mul(mp_int *a, mp_int *b, mp_int *c) +#else +int mp_mul (mp_int * a, mp_int * b, mp_int * c) +#endif +{ + int res, neg; + neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG; + + { +#ifdef BN_FAST_S_MP_MUL_DIGS_C + /* can we use the fast multiplier? + * + * The fast multiplier can be used if the output will + * have less than MP_WARRAY digits and the number of + * digits won't affect carry propagation + */ + int digs = a->used + b->used + 1; + + if ((digs < (int)MP_WARRAY) && + MIN(a->used, b->used) <= + (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { + res = fast_s_mp_mul_digs (a, b, c, digs); + } else +#endif +#ifdef BN_S_MP_MUL_DIGS_C + res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */ +#else + res = MP_VAL; +#endif + + } + c->sign = (c->used > 0) ? neg : MP_ZPOS; + return res; +} + + +/* b = a*2 */ +int mp_mul_2(mp_int * a, mp_int * b) +{ + int x, res, oldused; + + /* grow to accommodate result */ + if (b->alloc < a->used + 1) { + if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) { + return res; + } + } + + oldused = b->used; + b->used = a->used; + + { + mp_digit r, rr, *tmpa, *tmpb; + + /* alias for source */ + tmpa = a->dp; + + /* alias for dest */ + tmpb = b->dp; + + /* carry */ + r = 0; + for (x = 0; x < a->used; x++) { + + /* get what will be the *next* carry bit from the + * MSB of the current digit + */ + rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1)); + + /* now shift up this digit, add in the carry [from the previous] */ + *tmpb++ = (mp_digit)(((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK); + + /* copy the carry that would be from the source + * digit into the next iteration + */ + r = rr; + } + + /* new leading digit? */ + if (r != 0) { + /* add a MSB which is always 1 at this point */ + *tmpb = 1; + ++(b->used); + } + + /* now zero any excess digits on the destination + * that we didn't write to + */ + tmpb = b->dp + b->used; + for (x = b->used; x < oldused; x++) { + *tmpb++ = 0; + } + } + b->sign = a->sign; + return MP_OKAY; +} + + +/* divide by three (based on routine from MPI and the GMP manual) */ +int mp_div_3 (mp_int * a, mp_int *c, mp_digit * d) +{ + mp_int q; + mp_word w, t; + mp_digit b; + int res, ix; + + /* b = 2**DIGIT_BIT / 3 */ + b = (mp_digit) ( (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3) ); + + if ((res = mp_init_size(&q, a->used)) != MP_OKAY) { + return res; + } + + q.used = a->used; + q.sign = a->sign; + w = 0; + for (ix = a->used - 1; ix >= 0; ix--) { + w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]); + + if (w >= 3) { + /* multiply w by [1/3] */ + t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT); + + /* now subtract 3 * [w/3] from w, to get the remainder */ + w -= t+t+t; + + /* fixup the remainder as required since + * the optimization is not exact. + */ + while (w >= 3) { + t += 1; + w -= 3; + } + } else { + t = 0; + } + q.dp[ix] = (mp_digit)t; + } + + /* [optional] store the remainder */ + if (d != NULL) { + *d = (mp_digit)w; + } + + /* [optional] store the quotient */ + if (c != NULL) { + mp_clamp(&q); + mp_exch(&q, c); + } + mp_clear(&q); + + return res; +} + + +/* init an mp_init for a given size */ +int mp_init_size (mp_int * a, int size) +{ + int x; + + /* pad size so there are always extra digits */ + size += (MP_PREC * 2) - (size % MP_PREC); + + /* alloc mem */ + a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size, NULL, + DYNAMIC_TYPE_BIGINT); + if (a->dp == NULL) { + return MP_MEM; + } + + /* set the members */ + a->used = 0; + a->alloc = size; + a->sign = MP_ZPOS; +#ifdef HAVE_WOLF_BIGINT + wc_bigint_init(&a->raw); +#endif + + /* zero the digits */ + for (x = 0; x < size; x++) { + a->dp[x] = 0; + } + + return MP_OKAY; +} + + +/* the jist of squaring... + * you do like mult except the offset of the tmpx [one that + * starts closer to zero] can't equal the offset of tmpy. + * So basically you set up iy like before then you min it with + * (ty-tx) so that it never happens. You double all those + * you add in the inner loop + +After that loop you do the squares and add them in. +*/ + +int fast_s_mp_sqr (mp_int * a, mp_int * b) +{ + int olduse, res, pa, ix, iz; +#ifdef WOLFSSL_SMALL_STACK + mp_digit* W; /* uses dynamic memory and slower */ +#else + mp_digit W[MP_WARRAY]; +#endif + mp_digit *tmpx; + mp_word W1; + + /* grow the destination as required */ + pa = a->used + a->used; + if (b->alloc < pa) { + if ((res = mp_grow (b, pa)) != MP_OKAY) { + return res; + } + } + + if (pa > (int)MP_WARRAY) + return MP_RANGE; /* TAO range check */ + +#ifdef WOLFSSL_SMALL_STACK + W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT); + if (W == NULL) + return MP_MEM; +#endif + + /* number of output digits to produce */ + W1 = 0; + for (ix = 0; ix < pa; ix++) { + int tx, ty, iy; + mp_word _W; + mp_digit *tmpy; + + /* clear counter */ + _W = 0; + + /* get offsets into the two bignums */ + ty = MIN(a->used-1, ix); + tx = ix - ty; + + /* setup temp aliases */ + tmpx = a->dp + tx; + tmpy = a->dp + ty; + + /* this is the number of times the loop will iterate, essentially + while (tx++ < a->used && ty-- >= 0) { ... } + */ + iy = MIN(a->used-tx, ty+1); + + /* now for squaring tx can never equal ty + * we halve the distance since they approach at a rate of 2x + * and we have to round because odd cases need to be executed + */ + iy = MIN(iy, (ty-tx+1)>>1); + + /* execute loop */ + for (iz = 0; iz < iy; iz++) { + _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); + } + + /* double the inner product and add carry */ + _W = _W + _W + W1; + + /* even columns have the square term in them */ + if ((ix&1) == 0) { + _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]); + } + + /* store it */ + W[ix] = (mp_digit)(_W & MP_MASK); + + /* make next carry */ + W1 = _W >> ((mp_word)DIGIT_BIT); + } + + /* setup dest */ + olduse = b->used; + b->used = a->used+a->used; + + { + mp_digit *tmpb; + tmpb = b->dp; + for (ix = 0; ix < pa; ix++) { + *tmpb++ = (mp_digit)(W[ix] & MP_MASK); + } + + /* clear unused digits [that existed in the old copy of c] */ + for (; ix < olduse; ix++) { + *tmpb++ = 0; + } + } + mp_clamp (b); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(W, NULL, DYNAMIC_TYPE_BIGINT); +#endif + + return MP_OKAY; +} + + +/* Fast (comba) multiplier + * + * This is the fast column-array [comba] multiplier. It is + * designed to compute the columns of the product first + * then handle the carries afterwards. This has the effect + * of making the nested loops that compute the columns very + * simple and schedulable on super-scalar processors. + * + * This has been modified to produce a variable number of + * digits of output so if say only a half-product is required + * you don't have to compute the upper half (a feature + * required for fast Barrett reduction). + * + * Based on Algorithm 14.12 on pp.595 of HAC. + * + */ +int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) +{ + int olduse, res, pa, ix, iz; +#ifdef WOLFSSL_SMALL_STACK + mp_digit* W; /* uses dynamic memory and slower */ +#else + mp_digit W[MP_WARRAY]; +#endif + mp_word _W; + + /* grow the destination as required */ + if (c->alloc < digs) { + if ((res = mp_grow (c, digs)) != MP_OKAY) { + return res; + } + } + + /* number of output digits to produce */ + pa = MIN(digs, a->used + b->used); + if (pa > (int)MP_WARRAY) + return MP_RANGE; /* TAO range check */ + +#ifdef WOLFSSL_SMALL_STACK + W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT); + if (W == NULL) + return MP_MEM; +#endif + + /* clear the carry */ + _W = 0; + for (ix = 0; ix < pa; ix++) { + int tx, ty; + int iy; + mp_digit *tmpx, *tmpy; + + /* get offsets into the two bignums */ + ty = MIN(b->used-1, ix); + tx = ix - ty; + + /* setup temp aliases */ + tmpx = a->dp + tx; + tmpy = b->dp + ty; + + /* this is the number of times the loop will iterate, essentially + while (tx++ < a->used && ty-- >= 0) { ... } + */ + iy = MIN(a->used-tx, ty+1); + + /* execute loop */ + for (iz = 0; iz < iy; ++iz) { + _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); + + } + + /* store term */ + W[ix] = (mp_digit)(((mp_digit)_W) & MP_MASK); + + /* make next carry */ + _W = _W >> ((mp_word)DIGIT_BIT); + } + + /* setup dest */ + olduse = c->used; + c->used = pa; + + { + mp_digit *tmpc; + tmpc = c->dp; + for (ix = 0; ix < pa; ix++) { /* JRB, +1 could read uninitialized data */ + /* now extract the previous digit [below the carry] */ + *tmpc++ = W[ix]; + } + + /* clear unused digits [that existed in the old copy of c] */ + for (; ix < olduse; ix++) { + *tmpc++ = 0; + } + } + mp_clamp (c); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(W, NULL, DYNAMIC_TYPE_BIGINT); +#endif + + return MP_OKAY; +} + + +/* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */ +int s_mp_sqr (mp_int * a, mp_int * b) +{ + mp_int t; + int res, ix, iy, pa; + mp_word r; + mp_digit u, tmpx, *tmpt; + + pa = a->used; + if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) { + return res; + } + + /* default used is maximum possible size */ + t.used = 2*pa + 1; + + for (ix = 0; ix < pa; ix++) { + /* first calculate the digit at 2*ix */ + /* calculate double precision result */ + r = ((mp_word) t.dp[2*ix]) + + ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]); + + /* store lower part in result */ + t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK)); + + /* get the carry */ + u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); + + /* left hand side of A[ix] * A[iy] */ + tmpx = a->dp[ix]; + + /* alias for where to store the results */ + tmpt = t.dp + (2*ix + 1); + + for (iy = ix + 1; iy < pa; iy++) { + /* first calculate the product */ + r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]); + + /* now calculate the double precision result, note we use + * addition instead of *2 since it's easier to optimize + */ + r = ((mp_word) *tmpt) + r + r + ((mp_word) u); + + /* store lower part */ + *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); + + /* get carry */ + u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); + } + /* propagate upwards */ + while (u != ((mp_digit) 0)) { + r = ((mp_word) *tmpt) + ((mp_word) u); + *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); + u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); + } + } + + mp_clamp (&t); + mp_exch (&t, b); + mp_clear (&t); + return MP_OKAY; +} + + +/* multiplies |a| * |b| and only computes up to digs digits of result + * HAC pp. 595, Algorithm 14.12 Modified so you can control how + * many digits of output are created. + */ +int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) +{ + mp_int t; + int res, pa, pb, ix, iy; + mp_digit u; + mp_word r; + mp_digit tmpx, *tmpt, *tmpy; + + /* can we use the fast multiplier? */ + if ((digs < (int)MP_WARRAY) && + MIN (a->used, b->used) < + (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { + return fast_s_mp_mul_digs (a, b, c, digs); + } + + if ((res = mp_init_size (&t, digs)) != MP_OKAY) { + return res; + } + t.used = digs; + + /* compute the digits of the product directly */ + pa = a->used; + for (ix = 0; ix < pa; ix++) { + /* set the carry to zero */ + u = 0; + + /* limit ourselves to making digs digits of output */ + pb = MIN (b->used, digs - ix); + + /* setup some aliases */ + /* copy of the digit from a used within the nested loop */ + tmpx = a->dp[ix]; + + /* an alias for the destination shifted ix places */ + tmpt = t.dp + ix; + + /* an alias for the digits of b */ + tmpy = b->dp; + + /* compute the columns of the output and propagate the carry */ + for (iy = 0; iy < pb; iy++) { + /* compute the column as a mp_word */ + r = ((mp_word)*tmpt) + + ((mp_word)tmpx) * ((mp_word)*tmpy++) + + ((mp_word) u); + + /* the new column is the lower part of the result */ + *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); + + /* get the carry word from the result */ + u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); + } + /* set carry if it is placed below digs */ + if (ix + iy < digs) { + *tmpt = u; + } + } + + mp_clamp (&t); + mp_exch (&t, c); + + mp_clear (&t); + return MP_OKAY; +} + + +/* + * shifts with subtractions when the result is greater than b. + * + * The method is slightly modified to shift B unconditionally up to just under + * the leading bit of b. This saves a lot of multiple precision shifting. + */ +int mp_montgomery_calc_normalization (mp_int * a, mp_int * b) +{ + int x, bits, res; + + /* how many bits of last digit does b use */ + bits = mp_count_bits (b) % DIGIT_BIT; + + if (b->used > 1) { + if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) + != MP_OKAY) { + return res; + } + } else { + if ((res = mp_set(a, 1)) != MP_OKAY) { + return res; + } + bits = 1; + } + + /* now compute C = A * B mod b */ + for (x = bits - 1; x < (int)DIGIT_BIT; x++) { + if ((res = mp_mul_2 (a, a)) != MP_OKAY) { + return res; + } + if (mp_cmp_mag (a, b) != MP_LT) { + if ((res = s_mp_sub (a, b, a)) != MP_OKAY) { + return res; + } + } + } + + return MP_OKAY; +} + + +#ifdef MP_LOW_MEM + #define TAB_SIZE 32 +#else + #define TAB_SIZE 256 +#endif + +int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) +{ + mp_int M[TAB_SIZE], res, mu; + mp_digit buf; + int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; + int (*redux)(mp_int*,mp_int*,mp_int*); + + /* find window size */ + x = mp_count_bits (X); + if (x <= 7) { + winsize = 2; + } else if (x <= 36) { + winsize = 3; + } else if (x <= 140) { + winsize = 4; + } else if (x <= 450) { + winsize = 5; + } else if (x <= 1303) { + winsize = 6; + } else if (x <= 3529) { + winsize = 7; + } else { + winsize = 8; + } + +#ifdef MP_LOW_MEM + if (winsize > 5) { + winsize = 5; + } +#endif + + /* init M array */ + /* init first cell */ + if ((err = mp_init(&M[1])) != MP_OKAY) { + return err; + } + + /* now init the second half of the array */ + for (x = 1<<(winsize-1); x < (1 << winsize); x++) { + if ((err = mp_init(&M[x])) != MP_OKAY) { + for (y = 1<<(winsize-1); y < x; y++) { + mp_clear (&M[y]); + } + mp_clear(&M[1]); + return err; + } + } + + /* create mu, used for Barrett reduction */ + if ((err = mp_init (&mu)) != MP_OKAY) { + goto LBL_M; + } + + if (redmode == 0) { + if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) { + goto LBL_MU; + } + redux = mp_reduce; + } else { + if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) { + goto LBL_MU; + } + redux = mp_reduce_2k_l; + } + + /* create M table + * + * The M table contains powers of the base, + * e.g. M[x] = G**x mod P + * + * The first half of the table is not + * computed though accept for M[0] and M[1] + */ + if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) { + goto LBL_MU; + } + + /* compute the value at M[1<<(winsize-1)] by squaring + * M[1] (winsize-1) times + */ + if ((err = mp_copy (&M[1], &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) { + goto LBL_MU; + } + + for (x = 0; x < (winsize - 1); x++) { + /* square it */ + if ((err = mp_sqr (&M[(mp_digit)(1 << (winsize - 1))], + &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) { + goto LBL_MU; + } + + /* reduce modulo P */ + if ((err = redux (&M[(mp_digit)(1 << (winsize - 1))], P, &mu)) != MP_OKAY) { + goto LBL_MU; + } + } + + /* create upper table, that is M[x] = M[x-1] * M[1] (mod P) + * for x = (2**(winsize - 1) + 1) to (2**winsize - 1) + */ + for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { + if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) { + goto LBL_MU; + } + if ((err = redux (&M[x], P, &mu)) != MP_OKAY) { + goto LBL_MU; + } + } + + /* setup result */ + if ((err = mp_init (&res)) != MP_OKAY) { + goto LBL_MU; + } + if ((err = mp_set (&res, 1)) != MP_OKAY) { + goto LBL_MU; + } + + /* set initial mode and bit cnt */ + mode = 0; + bitcnt = 1; + buf = 0; + digidx = X->used - 1; + bitcpy = 0; + bitbuf = 0; + + for (;;) { + /* grab next digit as required */ + if (--bitcnt == 0) { + /* if digidx == -1 we are out of digits */ + if (digidx == -1) { + break; + } + /* read next digit and reset the bitcnt */ + buf = X->dp[digidx--]; + bitcnt = (int) DIGIT_BIT; + } + + /* grab the next msb from the exponent */ + y = (int)(buf >> (mp_digit)(DIGIT_BIT - 1)) & 1; + buf <<= (mp_digit)1; + + /* if the bit is zero and mode == 0 then we ignore it + * These represent the leading zero bits before the first 1 bit + * in the exponent. Technically this opt is not required but it + * does lower the # of trivial squaring/reductions used + */ + if (mode == 0 && y == 0) { + continue; + } + + /* if the bit is zero and mode == 1 then we square */ + if (mode == 1 && y == 0) { + if ((err = mp_sqr (&res, &res)) != MP_OKAY) { + goto LBL_RES; + } + if ((err = redux (&res, P, &mu)) != MP_OKAY) { + goto LBL_RES; + } + continue; + } + + /* else we add it to the window */ + bitbuf |= (y << (winsize - ++bitcpy)); + mode = 2; + + if (bitcpy == winsize) { + /* ok window is filled so square as required and multiply */ + /* square first */ + for (x = 0; x < winsize; x++) { + if ((err = mp_sqr (&res, &res)) != MP_OKAY) { + goto LBL_RES; + } + if ((err = redux (&res, P, &mu)) != MP_OKAY) { + goto LBL_RES; + } + } + + /* then multiply */ + if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) { + goto LBL_RES; + } + if ((err = redux (&res, P, &mu)) != MP_OKAY) { + goto LBL_RES; + } + + /* empty window and reset */ + bitcpy = 0; + bitbuf = 0; + mode = 1; + } + } + + /* if bits remain then square/multiply */ + if (mode == 2 && bitcpy > 0) { + /* square then multiply if the bit is set */ + for (x = 0; x < bitcpy; x++) { + if ((err = mp_sqr (&res, &res)) != MP_OKAY) { + goto LBL_RES; + } + if ((err = redux (&res, P, &mu)) != MP_OKAY) { + goto LBL_RES; + } + + bitbuf <<= 1; + if ((bitbuf & (1 << winsize)) != 0) { + /* then multiply */ + if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) { + goto LBL_RES; + } + if ((err = redux (&res, P, &mu)) != MP_OKAY) { + goto LBL_RES; + } + } + } + } + + mp_exch (&res, Y); + err = MP_OKAY; +LBL_RES:mp_clear (&res); +LBL_MU:mp_clear (&mu); +LBL_M: + mp_clear(&M[1]); + for (x = 1<<(winsize-1); x < (1 << winsize); x++) { + mp_clear (&M[x]); + } + return err; +} + + +/* pre-calculate the value required for Barrett reduction + * For a given modulus "b" it calculates the value required in "a" + */ +int mp_reduce_setup (mp_int * a, mp_int * b) +{ + int res; + + if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) { + return res; + } + return mp_div (a, b, a, NULL); +} + + +/* reduces x mod m, assumes 0 < x < m**2, mu is + * precomputed via mp_reduce_setup. + * From HAC pp.604 Algorithm 14.42 + */ +int mp_reduce (mp_int * x, mp_int * m, mp_int * mu) +{ + mp_int q; + int res, um = m->used; + + /* q = x */ + if ((res = mp_init_copy (&q, x)) != MP_OKAY) { + return res; + } + + /* q1 = x / b**(k-1) */ + mp_rshd (&q, um - 1); + + /* according to HAC this optimization is ok */ + if (((mp_word) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) { + if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) { + goto CLEANUP; + } + } else { +#ifdef BN_S_MP_MUL_HIGH_DIGS_C + if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) { + goto CLEANUP; + } +#elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C) + if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) { + goto CLEANUP; + } +#else + { + res = MP_VAL; + goto CLEANUP; + } +#endif + } + + /* q3 = q2 / b**(k+1) */ + mp_rshd (&q, um + 1); + + /* x = x mod b**(k+1), quick (no division) */ + if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) { + goto CLEANUP; + } + + /* q = q * m mod b**(k+1), quick (no division) */ + if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) { + goto CLEANUP; + } + + /* x = x - q */ + if ((res = mp_sub (x, &q, x)) != MP_OKAY) { + goto CLEANUP; + } + + /* If x < 0, add b**(k+1) to it */ + if (mp_cmp_d (x, 0) == MP_LT) { + if ((res = mp_set (&q, 1)) != MP_OKAY) + goto CLEANUP; + if ((res = mp_lshd (&q, um + 1)) != MP_OKAY) + goto CLEANUP; + if ((res = mp_add (x, &q, x)) != MP_OKAY) + goto CLEANUP; + } + + /* Back off if it's too big */ + while (mp_cmp (x, m) != MP_LT) { + if ((res = s_mp_sub (x, m, x)) != MP_OKAY) { + goto CLEANUP; + } + } + +CLEANUP: + mp_clear (&q); + + return res; +} + + +/* reduces a modulo n where n is of the form 2**p - d + This differs from reduce_2k since "d" can be larger + than a single digit. +*/ +int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d) +{ + mp_int q; + int p, res; + + if ((res = mp_init(&q)) != MP_OKAY) { + return res; + } + + p = mp_count_bits(n); +top: + /* q = a/2**p, a = a mod 2**p */ + if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) { + goto ERR; + } + + /* q = q * d */ + if ((res = mp_mul(&q, d, &q)) != MP_OKAY) { + goto ERR; + } + + /* a = a + q */ + if ((res = s_mp_add(a, &q, a)) != MP_OKAY) { + goto ERR; + } + + if (mp_cmp_mag(a, n) != MP_LT) { + if ((res = s_mp_sub(a, n, a)) != MP_OKAY) { + goto ERR; + } + goto top; + } + +ERR: + mp_clear(&q); + return res; +} + + +/* determines the setup value */ +int mp_reduce_2k_setup_l(mp_int *a, mp_int *d) +{ + int res; + mp_int tmp; + + if ((res = mp_init(&tmp)) != MP_OKAY) { + return res; + } + + if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) { + goto ERR; + } + + if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) { + goto ERR; + } + +ERR: + mp_clear(&tmp); + return res; +} + + +/* multiplies |a| * |b| and does not compute the lower digs digits + * [meant to get the higher part of the product] + */ +int s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs) +{ + mp_int t; + int res, pa, pb, ix, iy; + mp_digit u; + mp_word r; + mp_digit tmpx, *tmpt, *tmpy; + + /* can we use the fast multiplier? */ +#ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C + if (((a->used + b->used + 1) < (int)MP_WARRAY) + && MIN (a->used, b->used) < + (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { + return fast_s_mp_mul_high_digs (a, b, c, digs); + } +#endif + + if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) { + return res; + } + t.used = a->used + b->used + 1; + + pa = a->used; + pb = b->used; + for (ix = 0; ix < pa && a->dp; ix++) { + /* clear the carry */ + u = 0; + + /* left hand side of A[ix] * B[iy] */ + tmpx = a->dp[ix]; + + /* alias to the address of where the digits will be stored */ + tmpt = &(t.dp[digs]); + + /* alias for where to read the right hand side from */ + tmpy = b->dp + (digs - ix); + + for (iy = digs - ix; iy < pb; iy++) { + /* calculate the double precision result */ + r = ((mp_word)*tmpt) + + ((mp_word)tmpx) * ((mp_word)*tmpy++) + + ((mp_word) u); + + /* get the lower part */ + *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); + + /* carry the carry */ + u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); + } + *tmpt = u; + } + mp_clamp (&t); + mp_exch (&t, c); + mp_clear (&t); + return MP_OKAY; +} + + +/* this is a modified version of fast_s_mul_digs that only produces + * output digits *above* digs. See the comments for fast_s_mul_digs + * to see how it works. + * + * This is used in the Barrett reduction since for one of the multiplications + * only the higher digits were needed. This essentially halves the work. + * + * Based on Algorithm 14.12 on pp.595 of HAC. + */ +int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs) +{ + int olduse, res, pa, ix, iz; +#ifdef WOLFSSL_SMALL_STACK + mp_digit* W; /* uses dynamic memory and slower */ +#else + mp_digit W[MP_WARRAY]; +#endif + mp_word _W; + + if (a->dp == NULL) { /* JRB, avoid reading uninitialized values */ + return MP_VAL; + } + + /* grow the destination as required */ + pa = a->used + b->used; + if (c->alloc < pa) { + if ((res = mp_grow (c, pa)) != MP_OKAY) { + return res; + } + } + + if (pa > (int)MP_WARRAY) + return MP_RANGE; /* TAO range check */ + +#ifdef WOLFSSL_SMALL_STACK + W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT); + if (W == NULL) + return MP_MEM; +#endif + + /* number of output digits to produce */ + pa = a->used + b->used; + _W = 0; + for (ix = digs; ix < pa; ix++) { /* JRB, have a->dp check at top of function*/ + int tx, ty, iy; + mp_digit *tmpx, *tmpy; + + /* get offsets into the two bignums */ + ty = MIN(b->used-1, ix); + tx = ix - ty; + + /* setup temp aliases */ + tmpx = a->dp + tx; + tmpy = b->dp + ty; + + /* this is the number of times the loop will iterate, essentially its + while (tx++ < a->used && ty-- >= 0) { ... } + */ + iy = MIN(a->used-tx, ty+1); + + /* execute loop */ + for (iz = 0; iz < iy; iz++) { + _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); + } + + /* store term */ + W[ix] = (mp_digit)(((mp_digit)_W) & MP_MASK); + + /* make next carry */ + _W = _W >> ((mp_word)DIGIT_BIT); + } + + /* setup dest */ + olduse = c->used; + c->used = pa; + + { + mp_digit *tmpc; + + tmpc = c->dp + digs; + for (ix = digs; ix < pa; ix++) { /* TAO, <= could potentially overwrite */ + /* now extract the previous digit [below the carry] */ + *tmpc++ = W[ix]; + } + + /* clear unused digits [that existed in the old copy of c] */ + for (; ix < olduse; ix++) { + *tmpc++ = 0; + } + } + mp_clamp (c); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(W, NULL, DYNAMIC_TYPE_BIGINT); +#endif + + return MP_OKAY; +} + + +#ifndef MP_SET_CHUNK_BITS + #define MP_SET_CHUNK_BITS 4 +#endif +int mp_set_int (mp_int * a, unsigned long b) +{ + int x, res; + + /* use direct mp_set if b is less than mp_digit max */ + if (b < MP_DIGIT_MAX) { + return mp_set (a, (mp_digit)b); + } + + mp_zero (a); + + /* set chunk bits at a time */ + for (x = 0; x < (int)(sizeof(b) * 8) / MP_SET_CHUNK_BITS; x++) { + /* shift the number up chunk bits */ + if ((res = mp_mul_2d (a, MP_SET_CHUNK_BITS, a)) != MP_OKAY) { + return res; + } + + /* OR in the top bits of the source */ + a->dp[0] |= (b >> ((sizeof(b) * 8) - MP_SET_CHUNK_BITS)) & + ((1 << MP_SET_CHUNK_BITS) - 1); + + /* shift the source up to the next chunk bits */ + b <<= MP_SET_CHUNK_BITS; + + /* ensure that digits are not clamped off */ + a->used += 1; + } + mp_clamp (a); + return MP_OKAY; +} + + +#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_ECC) || !defined(NO_RSA) || \ + !defined(NO_DSA) | !defined(NO_DH) + +/* c = a * a (mod b) */ +int mp_sqrmod (mp_int * a, mp_int * b, mp_int * c) +{ + int res; + mp_int t; + + if ((res = mp_init (&t)) != MP_OKAY) { + return res; + } + + if ((res = mp_sqr (a, &t)) != MP_OKAY) { + mp_clear (&t); + return res; + } + res = mp_mod (&t, b, c); + mp_clear (&t); + return res; +} + +#endif + + +#if defined(HAVE_ECC) || !defined(NO_PWDBASED) || defined(WOLFSSL_SNIFFER) || \ + defined(WOLFSSL_HAVE_WOLFSCEP) || defined(WOLFSSL_KEY_GEN) || \ + defined(OPENSSL_EXTRA) || defined(WC_RSA_BLINDING) || \ + (!defined(NO_RSA) && !defined(NO_RSA_BOUNDS_CHECK)) + +/* single digit addition */ +int mp_add_d (mp_int* a, mp_digit b, mp_int* c) +{ + int res, ix, oldused; + mp_digit *tmpa, *tmpc, mu; + + /* grow c as required */ + if (c->alloc < a->used + 1) { + if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) { + return res; + } + } + + /* if a is negative and |a| >= b, call c = |a| - b */ + if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) { + /* temporarily fix sign of a */ + a->sign = MP_ZPOS; + + /* c = |a| - b */ + res = mp_sub_d(a, b, c); + + /* fix sign */ + a->sign = c->sign = MP_NEG; + + /* clamp */ + mp_clamp(c); + + return res; + } + + /* old number of used digits in c */ + oldused = c->used; + + /* sign always positive */ + c->sign = MP_ZPOS; + + /* source alias */ + tmpa = a->dp; + + /* destination alias */ + tmpc = c->dp; + + /* if a is positive */ + if (a->sign == MP_ZPOS) { + /* add digit, after this we're propagating + * the carry. + */ + *tmpc = *tmpa++ + b; + mu = *tmpc >> DIGIT_BIT; + *tmpc++ &= MP_MASK; + + /* now handle rest of the digits */ + for (ix = 1; ix < a->used; ix++) { + *tmpc = *tmpa++ + mu; + mu = *tmpc >> DIGIT_BIT; + *tmpc++ &= MP_MASK; + } + /* set final carry */ + if (ix < c->alloc) { + ix++; + *tmpc++ = mu; + } + + /* setup size */ + c->used = a->used + 1; + } else { + /* a was negative and |a| < b */ + c->used = 1; + + /* the result is a single digit */ + if (a->used == 1) { + *tmpc++ = b - a->dp[0]; + } else { + *tmpc++ = b; + } + + /* setup count so the clearing of oldused + * can fall through correctly + */ + ix = 1; + } + + /* now zero to oldused */ + while (ix++ < oldused) { + *tmpc++ = 0; + } + mp_clamp(c); + + return MP_OKAY; +} + + +/* single digit subtraction */ +int mp_sub_d (mp_int * a, mp_digit b, mp_int * c) +{ + mp_digit *tmpa, *tmpc, mu; + int res, ix, oldused; + + /* grow c as required */ + if (c->alloc < a->used + 1) { + if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) { + return res; + } + } + + /* if a is negative just do an unsigned + * addition [with fudged signs] + */ + if (a->sign == MP_NEG) { + a->sign = MP_ZPOS; + res = mp_add_d(a, b, c); + a->sign = c->sign = MP_NEG; + + /* clamp */ + mp_clamp(c); + + return res; + } + + /* setup regs */ + oldused = c->used; + tmpa = a->dp; + tmpc = c->dp; + + /* if a <= b simply fix the single digit */ + if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) { + if (a->used == 1) { + *tmpc++ = b - *tmpa; + } else { + *tmpc++ = b; + } + ix = 1; + + /* negative/1digit */ + c->sign = MP_NEG; + c->used = 1; + } else { + /* positive/size */ + c->sign = MP_ZPOS; + c->used = a->used; + + /* subtract first digit */ + *tmpc = *tmpa++ - b; + mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1); + *tmpc++ &= MP_MASK; + + /* handle rest of the digits */ + for (ix = 1; ix < a->used; ix++) { + *tmpc = *tmpa++ - mu; + mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1); + *tmpc++ &= MP_MASK; + } + } + + /* zero excess digits */ + while (ix++ < oldused) { + *tmpc++ = 0; + } + mp_clamp(c); + return MP_OKAY; +} + +#endif /* defined(HAVE_ECC) || !defined(NO_PWDBASED) */ + + +#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || defined(HAVE_ECC) || \ + defined(DEBUG_WOLFSSL) || !defined(NO_RSA) || !defined(NO_DSA) || \ + !defined(NO_DH) + +static const int lnz[16] = { + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 +}; + +/* Counts the number of lsbs which are zero before the first zero bit */ +int mp_cnt_lsb(mp_int *a) +{ + int x; + mp_digit q = 0, qq; + + /* easy out */ + if (mp_iszero(a) == MP_YES) { + return 0; + } + + /* scan lower digits until non-zero */ + for (x = 0; x < a->used && a->dp[x] == 0; x++) {} + if (a->dp) + q = a->dp[x]; + x *= DIGIT_BIT; + + /* now scan this digit until a 1 is found */ + if ((q & 1) == 0) { + do { + qq = q & 15; + x += lnz[qq]; + q >>= 4; + } while (qq == 0); + } + return x; +} + + + + +static int s_is_power_of_two(mp_digit b, int *p) +{ + int x; + + /* fast return if no power of two */ + if ((b==0) || (b & (b-1))) { + return 0; + } + + for (x = 0; x < DIGIT_BIT; x++) { + if (b == (((mp_digit)1)<dp[0] & ((((mp_digit)1)<used)) != MP_OKAY) { + return res; + } + + q.used = a->used; + q.sign = a->sign; + } + else { + if ((res = mp_init(&q)) != MP_OKAY) { + return res; + } + } + + + w = 0; + for (ix = a->used - 1; ix >= 0; ix--) { + w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]); + + if (w >= b) { + t = (mp_digit)(w / b); + w -= ((mp_word)t) * ((mp_word)b); + } else { + t = 0; + } + if (c != NULL) + q.dp[ix] = (mp_digit)t; + } + + if (d != NULL) { + *d = (mp_digit)w; + } + + if (c != NULL) { + mp_clamp(&q); + mp_exch(&q, c); + } + mp_clear(&q); + + return res; +} + + +int mp_mod_d (mp_int * a, mp_digit b, mp_digit * c) +{ + return mp_div_d(a, b, NULL, c); +} + +#endif /* WOLFSSL_KEY_GEN || HAVE_COMP_KEY || HAVE_ECC || DEBUG_WOLFSSL */ + +#if defined(WOLFSSL_KEY_GEN) || !defined(NO_DH) || !defined(NO_DSA) || !defined(NO_RSA) + +const mp_digit ltm_prime_tab[PRIME_SIZE] = { + 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013, + 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035, + 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059, + 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, +#ifndef MP_8BIT + 0x0083, + 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD, + 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF, + 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107, + 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137, + + 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167, + 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199, + 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9, + 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7, + 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239, + 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265, + 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293, + 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF, + + 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301, + 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B, + 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371, + 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD, + 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5, + 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419, + 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449, + 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B, + + 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7, + 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503, + 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529, + 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F, + 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3, + 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7, + 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623, + 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653 +#endif +}; + + +/* Miller-Rabin test of "a" to the base of "b" as described in + * HAC pp. 139 Algorithm 4.24 + * + * Sets result to 0 if definitely composite or 1 if probably prime. + * Randomly the chance of error is no more than 1/4 and often + * very much lower. + */ +static int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result) +{ + mp_int n1, y, r; + int s, j, err; + + /* default */ + *result = MP_NO; + + /* ensure b > 1 */ + if (mp_cmp_d(b, 1) != MP_GT) { + return MP_VAL; + } + + /* get n1 = a - 1 */ + if ((err = mp_init_copy (&n1, a)) != MP_OKAY) { + return err; + } + if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) { + goto LBL_N1; + } + + /* set 2**s * r = n1 */ + if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) { + goto LBL_N1; + } + + /* count the number of least significant bits + * which are zero + */ + s = mp_cnt_lsb(&r); + + /* now divide n - 1 by 2**s */ + if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) { + goto LBL_R; + } + + /* compute y = b**r mod a */ + if ((err = mp_init (&y)) != MP_OKAY) { + goto LBL_R; + } +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) +#ifndef WOLFSSL_SP_NO_2048 + if (mp_count_bits(a) == 1024) + err = sp_ModExp_1024(b, &r, a, &y); + else if (mp_count_bits(a) == 2048) + err = sp_ModExp_2048(b, &r, a, &y); + else +#endif +#ifndef WOLFSSL_SP_NO_3072 + if (mp_count_bits(a) == 1536) + err = sp_ModExp_1536(b, &r, a, &y); + else if (mp_count_bits(a) == 3072) + err = sp_ModExp_3072(b, &r, a, &y); + else +#endif +#ifdef WOLFSSL_SP_4096 + if (mp_count_bits(a) == 4096) + err = sp_ModExp_4096(b, &r, a, &y); + else +#endif +#endif + err = mp_exptmod (b, &r, a, &y); + if (err != MP_OKAY) + goto LBL_Y; + + /* if y != 1 and y != n1 do */ + if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) { + j = 1; + /* while j <= s-1 and y != n1 */ + while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) { + if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) { + goto LBL_Y; + } + + /* if y == 1 then composite */ + if (mp_cmp_d (&y, 1) == MP_EQ) { + goto LBL_Y; + } + + ++j; + } + + /* if y != n1 then composite */ + if (mp_cmp (&y, &n1) != MP_EQ) { + goto LBL_Y; + } + } + + /* probably prime now */ + *result = MP_YES; +LBL_Y:mp_clear (&y); +LBL_R:mp_clear (&r); +LBL_N1:mp_clear (&n1); + return err; +} + + +/* determines if an integers is divisible by one + * of the first PRIME_SIZE primes or not + * + * sets result to 0 if not, 1 if yes + */ +static int mp_prime_is_divisible (mp_int * a, int *result) +{ + int err, ix; + mp_digit res; + + /* default to not */ + *result = MP_NO; + + for (ix = 0; ix < PRIME_SIZE; ix++) { + /* what is a mod LBL_prime_tab[ix] */ + if ((err = mp_mod_d (a, ltm_prime_tab[ix], &res)) != MP_OKAY) { + return err; + } + + /* is the residue zero? */ + if (res == 0) { + *result = MP_YES; + return MP_OKAY; + } + } + + return MP_OKAY; +} + +/* + * Sets result to 1 if probably prime, 0 otherwise + */ +int mp_prime_is_prime (mp_int * a, int t, int *result) +{ + mp_int b; + int ix, err, res; + + /* default to no */ + *result = MP_NO; + + /* valid value of t? */ + if (t <= 0 || t > PRIME_SIZE) { + return MP_VAL; + } + + if (mp_isone(a)) { + *result = MP_NO; + return MP_OKAY; + } + + /* is the input equal to one of the primes in the table? */ + for (ix = 0; ix < PRIME_SIZE; ix++) { + if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) { + *result = MP_YES; + return MP_OKAY; + } + } + + /* first perform trial division */ + if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) { + return err; + } + + /* return if it was trivially divisible */ + if (res == MP_YES) { + return MP_OKAY; + } + + /* now perform the miller-rabin rounds */ + if ((err = mp_init (&b)) != MP_OKAY) { + return err; + } + + for (ix = 0; ix < t; ix++) { + /* set the prime */ + if ((err = mp_set (&b, ltm_prime_tab[ix])) != MP_OKAY) { + goto LBL_B; + } + + if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) { + goto LBL_B; + } + + if (res == MP_NO) { + goto LBL_B; + } + } + + /* passed the test */ + *result = MP_YES; +LBL_B:mp_clear (&b); + return err; +} + + +/* + * Sets result to 1 if probably prime, 0 otherwise + */ +int mp_prime_is_prime_ex (mp_int * a, int t, int *result, WC_RNG *rng) +{ + mp_int b, c; + int ix, err, res; + byte* base = NULL; + word32 baseSz = 0; + + /* default to no */ + *result = MP_NO; + + /* valid value of t? */ + if (t <= 0 || t > PRIME_SIZE) { + return MP_VAL; + } + + if (mp_isone(a)) { + *result = MP_NO; + return MP_OKAY; + } + + /* is the input equal to one of the primes in the table? */ + for (ix = 0; ix < PRIME_SIZE; ix++) { + if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) { + *result = MP_YES; + return MP_OKAY; + } + } + + /* first perform trial division */ + if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) { + return err; + } + + /* return if it was trivially divisible */ + if (res == MP_YES) { + return MP_OKAY; + } + + /* now perform the miller-rabin rounds */ + if ((err = mp_init (&b)) != MP_OKAY) { + return err; + } + if ((err = mp_init (&c)) != MP_OKAY) { + mp_clear(&b); + return err; + } + + baseSz = mp_count_bits(a); + baseSz = (baseSz / 8) + ((baseSz % 8) ? 1 : 0); + + base = (byte*)XMALLOC(baseSz, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (base == NULL) { + err = MP_MEM; + goto LBL_B; + } + + if ((err = mp_sub_d(a, 2, &c)) != MP_OKAY) { + goto LBL_B; + } + + /* now do a miller rabin with up to t random numbers, this should + * give a (1/4)^t chance of a false prime. */ + for (ix = 0; ix < t; ix++) { + /* Set a test candidate. */ + if ((err = wc_RNG_GenerateBlock(rng, base, baseSz)) != 0) { + goto LBL_B; + } + + if ((err = mp_read_unsigned_bin(&b, base, baseSz)) != MP_OKAY) { + goto LBL_B; + } + + if (mp_cmp_d(&b, 2) != MP_GT || mp_cmp(&b, &c) != MP_LT) { + ix--; + continue; + } + + if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) { + goto LBL_B; + } + + if (res == MP_NO) { + goto LBL_B; + } + } + + /* passed the test */ + *result = MP_YES; +LBL_B:mp_clear (&b); + mp_clear (&c); + XFREE(base, NULL, DYNAMIC_TYPE_TMP_BUFFER); + return err; +} + +#endif /* WOLFSSL_KEY_GEN NO_DH NO_DSA NO_RSA */ + +#ifdef WOLFSSL_KEY_GEN + +static const int USE_BBS = 1; + +int mp_rand_prime(mp_int* N, int len, WC_RNG* rng, void* heap) +{ + int err, res, type; + byte* buf; + + if (N == NULL || rng == NULL) + return MP_VAL; + + /* get type */ + if (len < 0) { + type = USE_BBS; + len = -len; + } else { + type = 0; + } + + /* allow sizes between 2 and 512 bytes for a prime size */ + if (len < 2 || len > 512) { + return MP_VAL; + } + + /* allocate buffer to work with */ + buf = (byte*)XMALLOC(len, heap, DYNAMIC_TYPE_RSA); + if (buf == NULL) { + return MP_MEM; + } + XMEMSET(buf, 0, len); + + do { +#ifdef SHOW_GEN + printf("."); + fflush(stdout); +#endif + /* generate value */ + err = wc_RNG_GenerateBlock(rng, buf, len); + if (err != 0) { + XFREE(buf, heap, DYNAMIC_TYPE_RSA); + return err; + } + + /* munge bits */ + buf[0] |= 0x80 | 0x40; + buf[len-1] |= 0x01 | ((type & USE_BBS) ? 0x02 : 0x00); + + /* load value */ + if ((err = mp_read_unsigned_bin(N, buf, len)) != MP_OKAY) { + XFREE(buf, heap, DYNAMIC_TYPE_RSA); + return err; + } + + /* test */ + /* Running Miller-Rabin up to 3 times gives us a 2^{-80} chance + * of a 1024-bit candidate being a false positive, when it is our + * prime candidate. (Note 4.49 of Handbook of Applied Cryptography.) + * Using 8 because we've always used 8. */ + if ((err = mp_prime_is_prime_ex(N, 8, &res, rng)) != MP_OKAY) { + XFREE(buf, heap, DYNAMIC_TYPE_RSA); + return err; + } + } while (res == MP_NO); + + XMEMSET(buf, 0, len); + XFREE(buf, heap, DYNAMIC_TYPE_RSA); + + return MP_OKAY; +} + + +/* computes least common multiple as |a*b|/(a, b) */ +int mp_lcm (mp_int * a, mp_int * b, mp_int * c) +{ + int res; + mp_int t1, t2; + + + if ((res = mp_init_multi (&t1, &t2, NULL, NULL, NULL, NULL)) != MP_OKAY) { + return res; + } + + /* t1 = get the GCD of the two inputs */ + if ((res = mp_gcd (a, b, &t1)) != MP_OKAY) { + goto LBL_T; + } + + /* divide the smallest by the GCD */ + if (mp_cmp_mag(a, b) == MP_LT) { + /* store quotient in t2 such that t2 * b is the LCM */ + if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) { + goto LBL_T; + } + res = mp_mul(b, &t2, c); + } else { + /* store quotient in t2 such that t2 * a is the LCM */ + if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) { + goto LBL_T; + } + res = mp_mul(a, &t2, c); + } + + /* fix the sign to positive */ + c->sign = MP_ZPOS; + +LBL_T: + mp_clear(&t1); + mp_clear(&t2); + return res; +} + + + +/* Greatest Common Divisor using the binary method */ +int mp_gcd (mp_int * a, mp_int * b, mp_int * c) +{ + mp_int u, v; + int k, u_lsb, v_lsb, res; + + /* either zero than gcd is the largest */ + if (mp_iszero (a) == MP_YES) { + return mp_abs (b, c); + } + if (mp_iszero (b) == MP_YES) { + return mp_abs (a, c); + } + + /* get copies of a and b we can modify */ + if ((res = mp_init_copy (&u, a)) != MP_OKAY) { + return res; + } + + if ((res = mp_init_copy (&v, b)) != MP_OKAY) { + goto LBL_U; + } + + /* must be positive for the remainder of the algorithm */ + u.sign = v.sign = MP_ZPOS; + + /* B1. Find the common power of two for u and v */ + u_lsb = mp_cnt_lsb(&u); + v_lsb = mp_cnt_lsb(&v); + k = MIN(u_lsb, v_lsb); + + if (k > 0) { + /* divide the power of two out */ + if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) { + goto LBL_V; + } + + if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) { + goto LBL_V; + } + } + + /* divide any remaining factors of two out */ + if (u_lsb != k) { + if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) { + goto LBL_V; + } + } + + if (v_lsb != k) { + if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) { + goto LBL_V; + } + } + + while (mp_iszero(&v) == MP_NO) { + /* make sure v is the largest */ + if (mp_cmp_mag(&u, &v) == MP_GT) { + /* swap u and v to make sure v is >= u */ + mp_exch(&u, &v); + } + + /* subtract smallest from largest */ + if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) { + goto LBL_V; + } + + /* Divide out all factors of two */ + if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) { + goto LBL_V; + } + } + + /* multiply by 2**k which we divided out at the beginning */ + if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) { + goto LBL_V; + } + c->sign = MP_ZPOS; + res = MP_OKAY; +LBL_V:mp_clear (&v); +LBL_U:mp_clear (&u); + return res; +} + +#endif /* WOLFSSL_KEY_GEN */ + + +#if !defined(NO_DSA) || defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN) || \ + defined(HAVE_COMP_KEY) || defined(WOLFSSL_DEBUG_MATH) || \ + defined(DEBUG_WOLFSSL) || defined(OPENSSL_EXTRA) + +/* chars used in radix conversions */ +const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ\ + abcdefghijklmnopqrstuvwxyz+/"; +#endif + +#if !defined(NO_DSA) || defined(HAVE_ECC) +/* read a string [ASCII] in a given radix */ +int mp_read_radix (mp_int * a, const char *str, int radix) +{ + int y, res, neg; + char ch; + + /* zero the digit bignum */ + mp_zero(a); + + /* make sure the radix is ok */ + if (radix < MP_RADIX_BIN || radix > MP_RADIX_MAX) { + return MP_VAL; + } + + /* if the leading digit is a + * minus set the sign to negative. + */ + if (*str == '-') { + ++str; + neg = MP_NEG; + } else { + neg = MP_ZPOS; + } + + /* set the integer to the default of zero */ + mp_zero (a); + + /* process each digit of the string */ + while (*str != '\0') { + /* if the radix <= 36 the conversion is case insensitive + * this allows numbers like 1AB and 1ab to represent the same value + * [e.g. in hex] + */ + ch = (radix <= 36) ? (char)XTOUPPER((unsigned char)*str) : *str; + for (y = 0; y < 64; y++) { + if (ch == mp_s_rmap[y]) { + break; + } + } + + /* if the char was found in the map + * and is less than the given radix add it + * to the number, otherwise exit the loop. + */ + if (y < radix) { + if ((res = mp_mul_d (a, (mp_digit) radix, a)) != MP_OKAY) { + return res; + } + if ((res = mp_add_d (a, (mp_digit) y, a)) != MP_OKAY) { + return res; + } + } else { + break; + } + ++str; + } + + /* if digit in isn't null term, then invalid character was found */ + if (*str != '\0') { + mp_zero (a); + return MP_VAL; + } + + /* set the sign only if a != 0 */ + if (mp_iszero(a) != MP_YES) { + a->sign = neg; + } + return MP_OKAY; +} +#endif /* !defined(NO_DSA) || defined(HAVE_ECC) */ + +#ifdef WC_MP_TO_RADIX + +/* returns size of ASCII representation */ +int mp_radix_size (mp_int *a, int radix, int *size) +{ + int res, digs; + mp_int t; + mp_digit d; + + *size = 0; + + /* special case for binary */ + if (radix == MP_RADIX_BIN) { + *size = mp_count_bits (a) + (a->sign == MP_NEG ? 1 : 0) + 1; + return MP_OKAY; + } + + /* make sure the radix is in range */ + if (radix < MP_RADIX_BIN || radix > MP_RADIX_MAX) { + return MP_VAL; + } + + if (mp_iszero(a) == MP_YES) { + *size = 2; + return MP_OKAY; + } + + /* digs is the digit count */ + digs = 0; + + /* if it's negative add one for the sign */ + if (a->sign == MP_NEG) { + ++digs; + } + + /* init a copy of the input */ + if ((res = mp_init_copy (&t, a)) != MP_OKAY) { + return res; + } + + /* force temp to positive */ + t.sign = MP_ZPOS; + + /* fetch out all of the digits */ + while (mp_iszero (&t) == MP_NO) { + if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) { + mp_clear (&t); + return res; + } + ++digs; + } + mp_clear (&t); + + /* return digs + 1, the 1 is for the NULL byte that would be required. */ + *size = digs + 1; + return MP_OKAY; +} + +/* stores a bignum as a ASCII string in a given radix (2..64) */ +int mp_toradix (mp_int *a, char *str, int radix) +{ + int res, digs; + mp_int t; + mp_digit d; + char *_s = str; + + /* check range of the radix */ + if (radix < MP_RADIX_BIN || radix > MP_RADIX_MAX) { + return MP_VAL; + } + + /* quick out if its zero */ + if (mp_iszero(a) == MP_YES) { + *str++ = '0'; + *str = '\0'; + return MP_OKAY; + } + + if ((res = mp_init_copy (&t, a)) != MP_OKAY) { + return res; + } + + /* if it is negative output a - */ + if (t.sign == MP_NEG) { + ++_s; + *str++ = '-'; + t.sign = MP_ZPOS; + } + + digs = 0; + while (mp_iszero (&t) == MP_NO) { + if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) { + mp_clear (&t); + return res; + } + *str++ = mp_s_rmap[d]; + ++digs; + } +#ifndef WC_DISABLE_RADIX_ZERO_PAD + /* For hexadecimal output, add zero padding when number of digits is odd */ + if ((digs & 1) && (radix == 16)) { + *str++ = mp_s_rmap[0]; + ++digs; + } +#endif + /* reverse the digits of the string. In this case _s points + * to the first digit [excluding the sign] of the number] + */ + bn_reverse ((unsigned char *)_s, digs); + + /* append a NULL so the string is properly terminated */ + *str = '\0'; + + mp_clear (&t); + return MP_OKAY; +} + +#ifdef WOLFSSL_DEBUG_MATH +void mp_dump(const char* desc, mp_int* a, byte verbose) +{ + char *buffer; + int size = a->alloc; + + buffer = (char*)XMALLOC(size * sizeof(mp_digit) * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (buffer == NULL) { + return; + } + + printf("%s: ptr=%p, used=%d, sign=%d, size=%d, mpd=%d\n", + desc, a, a->used, a->sign, size, (int)sizeof(mp_digit)); + + mp_tohex(a, buffer); + printf(" %s\n ", buffer); + + if (verbose) { + int i; + for(i=0; ialloc * (int)sizeof(mp_digit); i++) { + printf("%02x ", *(((byte*)a->dp) + i)); + } + printf("\n"); + } + + XFREE(buffer, NULL, DYNAMIC_TYPE_TMP_BUFFER); +} +#endif /* WOLFSSL_DEBUG_MATH */ + +#endif /* WC_MP_TO_RADIX */ + +#endif /* WOLFSSL_SP_MATH */ + +#endif /* USE_FAST_MATH */ + +#endif /* NO_BIG_INT */ diff --git a/client/wolfssl/wolfcrypt/src/logging.c b/client/wolfssl/wolfcrypt/src/logging.c new file mode 100644 index 0000000..0c818aa --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/logging.c @@ -0,0 +1,843 @@ +/* logging.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#include +#include +#if defined(OPENSSL_EXTRA) && !defined(WOLFCRYPT_ONLY) +/* avoid adding WANT_READ and WANT_WRITE to error queue */ +#include +#endif + +#if defined(OPENSSL_EXTRA) || defined(DEBUG_WOLFSSL_VERBOSE) +static wolfSSL_Mutex debug_mutex; /* mutex for access to debug structure */ + +/* accessing any node from the queue should be wrapped in a lock of + * debug_mutex */ +static void* wc_error_heap; +struct wc_error_queue { + void* heap; /* the heap hint used with nodes creation */ + struct wc_error_queue* next; + struct wc_error_queue* prev; + char error[WOLFSSL_MAX_ERROR_SZ]; + char file[WOLFSSL_MAX_ERROR_SZ]; + int value; + int line; +}; +volatile struct wc_error_queue* wc_errors; +static struct wc_error_queue* wc_current_node; +static struct wc_error_queue* wc_last_node; +/* pointer to last node in queue to make insertion O(1) */ +#endif + +#ifdef WOLFSSL_FUNC_TIME +/* WARNING: This code is only to be used for debugging performance. + * The code is not thread-safe. + * Do not use WOLFSSL_FUNC_TIME in production code. + */ +static double wc_func_start[WC_FUNC_COUNT]; +static double wc_func_time[WC_FUNC_COUNT] = { 0, }; +static const char* wc_func_name[WC_FUNC_COUNT] = { + "SendHelloRequest", + "DoHelloRequest", + "SendClientHello", + "DoClientHello", + "SendServerHello", + "DoServerHello", + "SendEncryptedExtensions", + "DoEncryptedExtensions", + "SendCertificateRequest", + "DoCertificateRequest", + "SendCertificate", + "DoCertificate", + "SendCertificateVerify", + "DoCertificateVerify", + "SendFinished", + "DoFinished", + "SendKeyUpdate", + "DoKeyUpdate", + "SendEarlyData", + "DoEarlyData", + "SendNewSessionTicket", + "DoNewSessionTicket", + "SendServerHelloDone", + "DoServerHelloDone", + "SendTicket", + "DoTicket", + "SendClientKeyExchange", + "DoClientKeyExchange", + "SendCertificateStatus", + "DoCertificateStatus", + "SendServerKeyExchange", + "DoServerKeyExchange", + "SendEarlyData", + "DoEarlyData", +}; + +#include + +/* WARNING: This function is not portable. */ +static WC_INLINE double current_time(int reset) +{ + struct timeval tv; + gettimeofday(&tv, 0); + (void)reset; + + return (double)tv.tv_sec + (double)tv.tv_usec / 1000000; +} +#endif /* WOLFSSL_FUNC_TIME */ + +#ifdef DEBUG_WOLFSSL + +/* Set these to default values initially. */ +static wolfSSL_Logging_cb log_function = NULL; +static int loggingEnabled = 0; + +#if defined(WOLFSSL_APACHE_MYNEWT) +#include "log/log.h" +static struct log mynewt_log; +#endif /* WOLFSSL_APACHE_MYNEWT */ + +#endif /* DEBUG_WOLFSSL */ + + +/* allow this to be set to NULL, so logs can be redirected to default output */ +int wolfSSL_SetLoggingCb(wolfSSL_Logging_cb f) +{ +#ifdef DEBUG_WOLFSSL + log_function = f; + return 0; +#else + (void)f; + return NOT_COMPILED_IN; +#endif +} + +/* allow this to be set to NULL, so logs can be redirected to default output */ +wolfSSL_Logging_cb wolfSSL_GetLoggingCb(void) +{ +#ifdef DEBUG_WOLFSSL + return log_function; +#else + return NULL; +#endif +} + + +int wolfSSL_Debugging_ON(void) +{ +#ifdef DEBUG_WOLFSSL + loggingEnabled = 1; +#if defined(WOLFSSL_APACHE_MYNEWT) + log_register("wolfcrypt", &mynewt_log, &log_console_handler, NULL, LOG_SYSLEVEL); +#endif /* WOLFSSL_APACHE_MYNEWT */ + return 0; +#else + return NOT_COMPILED_IN; +#endif +} + + +void wolfSSL_Debugging_OFF(void) +{ +#ifdef DEBUG_WOLFSSL + loggingEnabled = 0; +#endif +} + +#ifdef WOLFSSL_FUNC_TIME +/* WARNING: This code is only to be used for debugging performance. + * The code is not thread-safe. + * Do not use WOLFSSL_FUNC_TIME in production code. + */ +void WOLFSSL_START(int funcNum) +{ + double now = current_time(0) * 1000.0; +#ifdef WOLFSSL_FUNC_TIME_LOG + fprintf(stderr, "%17.3f: START - %s\n", now, wc_func_name[funcNum]); +#endif + wc_func_start[funcNum] = now; +} + +void WOLFSSL_END(int funcNum) +{ + double now = current_time(0) * 1000.0; + wc_func_time[funcNum] += now - wc_func_start[funcNum]; +#ifdef WOLFSSL_FUNC_TIME_LOG + fprintf(stderr, "%17.3f: END - %s\n", now, wc_func_name[funcNum]); +#endif +} + +void WOLFSSL_TIME(int count) +{ + int i; + double avg, total = 0; + + for (i = 0; i < WC_FUNC_COUNT; i++) { + if (wc_func_time[i] > 0) { + avg = wc_func_time[i] / count; + fprintf(stderr, "%8.3f ms: %s\n", avg, wc_func_name[i]); + total += avg; + } + } + fprintf(stderr, "%8.3f ms\n", total); +} +#endif + +#ifdef DEBUG_WOLFSSL + +#if defined(FREESCALE_MQX) || defined(FREESCALE_KSDK_MQX) + /* see wc_port.h for fio.h and nio.h includes */ +#elif defined(WOLFSSL_SGX) + /* Declare sprintf for ocall */ + int sprintf(char* buf, const char *fmt, ...); +#elif defined(WOLFSSL_DEOS) +#elif defined(MICRIUM) + #if (BSP_SER_COMM_EN == DEF_ENABLED) + #include + #endif +#elif defined(WOLFSSL_USER_LOG) + /* user includes their own headers */ +#elif defined(WOLFSSL_ESPIDF) + #include "esp_types.h" + #include "esp_log.h" +#elif defined(WOLFSSL_TELIT_M2MB) + #include + #include "m2m_log.h" +#elif defined(WOLFSSL_ANDROID_DEBUG) + #include +#else + #include /* for default printf stuff */ +#endif + +#if defined(THREADX) && !defined(THREADX_NO_DC_PRINTF) + int dc_log_printf(char*, ...); +#endif + +static void wolfssl_log(const int logLevel, const char *const logMessage) +{ + if (log_function) + log_function(logLevel, logMessage); + else { +#if defined(WOLFSSL_USER_LOG) + WOLFSSL_USER_LOG(logMessage); +#elif defined(WOLFSSL_LOG_PRINTF) + printf("%s\n", logMessage); + +#elif defined(THREADX) && !defined(THREADX_NO_DC_PRINTF) + dc_log_printf("%s\n", logMessage); +#elif defined(WOLFSSL_DEOS) + printf("%s\r\n", logMessage); +#elif defined(MICRIUM) + BSP_Ser_Printf("%s\r\n", logMessage); +#elif defined(WOLFSSL_MDK_ARM) + fflush(stdout) ; + printf("%s\n", logMessage); + fflush(stdout) ; +#elif defined(WOLFSSL_UTASKER) + fnDebugMsg((char*)logMessage); + fnDebugMsg("\r\n"); +#elif defined(MQX_USE_IO_OLD) + fprintf(_mqxio_stderr, "%s\n", logMessage); + +#elif defined(WOLFSSL_APACHE_MYNEWT) + LOG_DEBUG(&mynewt_log, LOG_MODULE_DEFAULT, "%s\n", logMessage); +#elif defined(WOLFSSL_ESPIDF) + ESP_LOGI("wolfssl", "%s", logMessage); +#elif defined(WOLFSSL_ZEPHYR) + printk("%s\n", logMessage); +#elif defined(WOLFSSL_TELIT_M2MB) + M2M_LOG_INFO("%s\n", logMessage); +#elif defined(WOLFSSL_ANDROID_DEBUG) + __android_log_print(ANDROID_LOG_VERBOSE, "[wolfSSL]", "%s", logMessage); +#else + fprintf(stderr, "%s\n", logMessage); +#endif + } +} + +#ifndef WOLFSSL_DEBUG_ERRORS_ONLY +void WOLFSSL_MSG(const char* msg) +{ + if (loggingEnabled) + wolfssl_log(INFO_LOG , msg); +} + +#ifndef LINE_LEN +#define LINE_LEN 16 +#endif +void WOLFSSL_BUFFER(const byte* buffer, word32 length) +{ + int i, buflen = (int)length, bufidx; + char line[(LINE_LEN * 4) + 3]; /* \t00..0F | chars...chars\0 */ + + if (!loggingEnabled) { + return; + } + + if (!buffer) { + wolfssl_log(INFO_LOG, "\tNULL"); + return; + } + + while (buflen > 0) { + bufidx = 0; + XSNPRINTF(&line[bufidx], sizeof(line)-bufidx, "\t"); + bufidx++; + + for (i = 0; i < LINE_LEN; i++) { + if (i < buflen) { + XSNPRINTF(&line[bufidx], sizeof(line)-bufidx, "%02x ", buffer[i]); + } + else { + XSNPRINTF(&line[bufidx], sizeof(line)-bufidx, " "); + } + bufidx += 3; + } + + XSNPRINTF(&line[bufidx], sizeof(line)-bufidx, "| "); + bufidx++; + + for (i = 0; i < LINE_LEN; i++) { + if (i < buflen) { + XSNPRINTF(&line[bufidx], sizeof(line)-bufidx, + "%c", 31 < buffer[i] && buffer[i] < 127 ? buffer[i] : '.'); + bufidx++; + } + } + + wolfssl_log(INFO_LOG, line); + buffer += LINE_LEN; + buflen -= LINE_LEN; + } +} + + +void WOLFSSL_ENTER(const char* msg) +{ + if (loggingEnabled) { + char buffer[WOLFSSL_MAX_ERROR_SZ]; + XSNPRINTF(buffer, sizeof(buffer), "wolfSSL Entering %s", msg); + wolfssl_log(ENTER_LOG , buffer); + } +} + + +void WOLFSSL_LEAVE(const char* msg, int ret) +{ + if (loggingEnabled) { + char buffer[WOLFSSL_MAX_ERROR_SZ]; + XSNPRINTF(buffer, sizeof(buffer), "wolfSSL Leaving %s, return %d", + msg, ret); + wolfssl_log(LEAVE_LOG , buffer); + } +} + +WOLFSSL_API int WOLFSSL_IS_DEBUG_ON(void) +{ + return loggingEnabled; +} +#endif /* !WOLFSSL_DEBUG_ERRORS_ONLY */ +#endif /* DEBUG_WOLFSSL */ + +/* + * When using OPENSSL_EXTRA or DEBUG_WOLFSSL_VERBOSE macro then WOLFSSL_ERROR is + * mapped to new function WOLFSSL_ERROR_LINE which gets the line # and function + * name where WOLFSSL_ERROR is called at. + */ +#if defined(DEBUG_WOLFSSL) || defined(OPENSSL_ALL) || \ + defined(WOLFSSL_NGINX) || defined(WOLFSSL_HAPROXY) || \ + defined(OPENSSL_EXTRA) + +#if (defined(OPENSSL_EXTRA) && !defined(_WIN32) && !defined(NO_ERROR_QUEUE)) \ + || defined(DEBUG_WOLFSSL_VERBOSE) +void WOLFSSL_ERROR_LINE(int error, const char* func, unsigned int line, + const char* file, void* usrCtx) +#else +void WOLFSSL_ERROR(int error) +#endif +{ +#ifdef WOLFSSL_ASYNC_CRYPT + if (error != WC_PENDING_E) +#endif + { + char buffer[WOLFSSL_MAX_ERROR_SZ]; + + #if (defined(OPENSSL_EXTRA) && !defined(_WIN32) && \ + !defined(NO_ERROR_QUEUE)) || defined(DEBUG_WOLFSSL_VERBOSE) + (void)usrCtx; /* a user ctx for future flexibility */ + (void)func; + + if (wc_LockMutex(&debug_mutex) != 0) { + WOLFSSL_MSG("Lock debug mutex failed"); + XSNPRINTF(buffer, sizeof(buffer), + "wolfSSL error occurred, error = %d", error); + } + else { + #if defined(OPENSSL_EXTRA) && !defined(WOLFCRYPT_ONLY) + /* If running in compatibility mode do not add want read and + want right to error queue */ + if (error != WANT_READ && error != WANT_WRITE) { + #endif + if (error < 0) + error = error - (2 * error); /* get absolute value */ + XSNPRINTF(buffer, sizeof(buffer), + "wolfSSL error occurred, error = %d line:%d file:%s", + error, line, file); + if (wc_AddErrorNode(error, line, buffer, (char*)file) != 0) { + WOLFSSL_MSG("Error creating logging node"); + /* with void function there is no return here, continue on + * to unlock mutex and log what buffer was created. */ + } + #if defined(OPENSSL_EXTRA) && !defined(WOLFCRYPT_ONLY) + } + else { + XSNPRINTF(buffer, sizeof(buffer), + "wolfSSL error occurred, error = %d", error); + + } + #endif + + wc_UnLockMutex(&debug_mutex); + } + #else + XSNPRINTF(buffer, sizeof(buffer), + "wolfSSL error occurred, error = %d", error); + #endif + + #ifdef DEBUG_WOLFSSL + if (loggingEnabled) + wolfssl_log(ERROR_LOG , buffer); + #endif + } +} + +void WOLFSSL_ERROR_MSG(const char* msg) +{ +#ifdef DEBUG_WOLFSSL + if (loggingEnabled) + wolfssl_log(ERROR_LOG , msg); +#else + (void)msg; +#endif +} + +#endif /* DEBUG_WOLFSSL || WOLFSSL_NGINX || WOLFSSL_HAPROXY */ + +#if defined(OPENSSL_EXTRA) || defined(DEBUG_WOLFSSL_VERBOSE) +/* Internal function that is called by wolfCrypt_Init() */ +int wc_LoggingInit(void) +{ + if (wc_InitMutex(&debug_mutex) != 0) { + WOLFSSL_MSG("Bad Init Mutex"); + return BAD_MUTEX_E; + } + wc_errors = NULL; + wc_current_node = NULL; + wc_last_node = NULL; + + return 0; +} + + +/* internal function that is called by wolfCrypt_Cleanup */ +int wc_LoggingCleanup(void) +{ + /* clear logging entries */ + wc_ClearErrorNodes(); + + /* free mutex */ + if (wc_FreeMutex(&debug_mutex) != 0) { + WOLFSSL_MSG("Bad Mutex free"); + return BAD_MUTEX_E; + } + return 0; +} + + +/* peek at an error node + * + * idx : if -1 then the most recent node is looked at, otherwise search + * through queue for node at the given index + * file : pointer to internal file string + * reason : pointer to internal error reason + * line : line number that error happened at + * + * Returns a negative value in error case, on success returns the nodes error + * value which is positive (absolute value) + */ +int wc_PeekErrorNode(int idx, const char **file, const char **reason, + int *line) +{ + struct wc_error_queue* err; + + if (wc_LockMutex(&debug_mutex) != 0) { + WOLFSSL_MSG("Lock debug mutex failed"); + return BAD_MUTEX_E; + } + + if (idx < 0) { + err = wc_last_node; + } + else { + int i; + + err = (struct wc_error_queue*)wc_errors; + for (i = 0; i < idx; i++) { + if (err == NULL) { + WOLFSSL_MSG("Error node not found. Bad index?"); + wc_UnLockMutex(&debug_mutex); + return BAD_FUNC_ARG; + } + err = err->next; + } + } + + if (err == NULL) { + WOLFSSL_MSG("No Errors in queue"); + wc_UnLockMutex(&debug_mutex); + return BAD_STATE_E; + } + + if (file != NULL) { + *file = err->file; + } + + if (reason != NULL) { + *reason = err->error; + } + + if (line != NULL) { + *line = err->line; + } + + wc_UnLockMutex(&debug_mutex); + + return err->value; +} + + +/* Pulls the current node from error queue and increments current state. + * Note: this does not delete nodes because input arguments are pointing to + * node buffers. + * + * file pointer to file that error was in. Can be NULL to return no file. + * reason error string giving reason for error. Can be NULL to return no reason. + * line return line number of where error happened. + * + * returns the error value on success and BAD_MUTEX_E or BAD_STATE_E on failure + */ +int wc_PullErrorNode(const char **file, const char **reason, int *line) +{ + struct wc_error_queue* err; + int value; + + if (wc_LockMutex(&debug_mutex) != 0) { + WOLFSSL_MSG("Lock debug mutex failed"); + return BAD_MUTEX_E; + } + + err = wc_current_node; + if (err == NULL) { + WOLFSSL_MSG("No Errors in queue"); + wc_UnLockMutex(&debug_mutex); + return BAD_STATE_E; + } + + if (file != NULL) { + *file = err->file; + } + + if (reason != NULL) { + *reason = err->error; + } + + if (line != NULL) { + *line = err->line; + } + + value = err->value; + wc_current_node = err->next; + wc_UnLockMutex(&debug_mutex); + + return value; +} + + +/* create new error node and add it to the queue + * buffers are assumed to be of size WOLFSSL_MAX_ERROR_SZ for this internal + * function. debug_mutex should be locked before a call to this function. */ +int wc_AddErrorNode(int error, int line, char* buf, char* file) +{ +#if defined(NO_ERROR_QUEUE) + (void)error; + (void)line; + (void)buf; + (void)file; + WOLFSSL_MSG("Error queue turned off, can not add nodes"); +#else + struct wc_error_queue* err; + err = (struct wc_error_queue*)XMALLOC( + sizeof(struct wc_error_queue), wc_error_heap, DYNAMIC_TYPE_LOG); + if (err == NULL) { + WOLFSSL_MSG("Unable to create error node for log"); + return MEMORY_E; + } + else { + int sz; + + XMEMSET(err, 0, sizeof(struct wc_error_queue)); + err->heap = wc_error_heap; + sz = (int)XSTRLEN(buf); + if (sz > WOLFSSL_MAX_ERROR_SZ - 1) { + sz = WOLFSSL_MAX_ERROR_SZ - 1; + } + if (sz > 0) { + XMEMCPY(err->error, buf, sz); + } + + sz = (int)XSTRLEN(file); + if (sz > WOLFSSL_MAX_ERROR_SZ - 1) { + sz = WOLFSSL_MAX_ERROR_SZ - 1; + } + if (sz > 0) { + XMEMCPY(err->file, file, sz); + } + + err->value = error; + err->line = line; + + /* make sure is terminated */ + err->error[WOLFSSL_MAX_ERROR_SZ - 1] = '\0'; + err->file[WOLFSSL_MAX_ERROR_SZ - 1] = '\0'; + + + /* since is queue place new node at last of the list */ + if (wc_last_node == NULL) { + /* case of first node added to queue */ + if (wc_errors != NULL) { + /* check for unexpected case before over writing wc_errors */ + WOLFSSL_MSG("ERROR in adding new node to logging queue!!\n"); + /* In the event both wc_last_node and wc_errors are NULL, err + * goes unassigned to external wc_errors, wc_last_node. Free + * err in this instance since wc_ClearErrorNodes will not + */ + XFREE(err, wc_error_heap, DYNAMIC_TYPE_LOG); + } + else { + wc_errors = err; + wc_last_node = err; + wc_current_node = err; + } + } + else { + wc_last_node->next = err; + err->prev = wc_last_node; + wc_last_node = err; + + /* check the case where have read to the end of the queue and the + * current node to read needs updated */ + if (wc_current_node == NULL) { + wc_current_node = err; + } + } + } +#endif + return 0; +} + +/* Removes the error node at the specified index. + * idx : if -1 then the most recent node is looked at, otherwise search + * through queue for node at the given index + */ +void wc_RemoveErrorNode(int idx) +{ + struct wc_error_queue* current; + + if (wc_LockMutex(&debug_mutex) != 0) { + WOLFSSL_MSG("Lock debug mutex failed"); + return; + } + + if (idx == -1) + current = wc_last_node; + else { + current = (struct wc_error_queue*)wc_errors; + for (; current != NULL && idx > 0; idx--) + current = current->next; + } + if (current != NULL) { + if (current->prev != NULL) + current->prev->next = current->next; + if (current->next != NULL) + current->next->prev = current->prev; + if (wc_last_node == current) + wc_last_node = current->prev; + if (wc_errors == current) + wc_errors = current->next; + if (wc_current_node == current) + wc_current_node = current->next; + XFREE(current, current->heap, DYNAMIC_TYPE_LOG); + } + + wc_UnLockMutex(&debug_mutex); +} + + +/* Clears out the list of error nodes. + */ +void wc_ClearErrorNodes(void) +{ +#if defined(DEBUG_WOLFSSL) || defined(WOLFSSL_NGINX) || \ + defined(OPENSSL_EXTRA) || defined(DEBUG_WOLFSSL_VERBOSE) + + if (wc_LockMutex(&debug_mutex) != 0) { + WOLFSSL_MSG("Lock debug mutex failed"); + return; + } + + /* free all nodes from error queue */ + { + struct wc_error_queue* current; + struct wc_error_queue* next; + + current = (struct wc_error_queue*)wc_errors; + while (current != NULL) { + next = current->next; + XFREE(current, current->heap, DYNAMIC_TYPE_LOG); + current = next; + } + } + + wc_errors = NULL; + wc_last_node = NULL; + wc_current_node = NULL; + wc_UnLockMutex(&debug_mutex); +#endif /* DEBUG_WOLFSSL || WOLFSSL_NGINX */ +} + +int wc_SetLoggingHeap(void* h) +{ + if (wc_LockMutex(&debug_mutex) != 0) { + WOLFSSL_MSG("Lock debug mutex failed"); + return BAD_MUTEX_E; + } + wc_error_heap = h; + wc_UnLockMutex(&debug_mutex); + return 0; +} + + +/* frees all nodes in the queue + * + * id this is the thread id + */ +int wc_ERR_remove_state(void) +{ + struct wc_error_queue* current; + struct wc_error_queue* next; + + if (wc_LockMutex(&debug_mutex) != 0) { + WOLFSSL_MSG("Lock debug mutex failed"); + return BAD_MUTEX_E; + } + + /* free all nodes from error queue */ + current = (struct wc_error_queue*)wc_errors; + while (current != NULL) { + next = current->next; + XFREE(current, current->heap, DYNAMIC_TYPE_LOG); + current = next; + } + + wc_errors = NULL; + wc_last_node = NULL; + + wc_UnLockMutex(&debug_mutex); + + return 0; +} + +#if !defined(NO_FILESYSTEM) && !defined(NO_STDIO_FILESYSTEM) +/* empties out the error queue into the file */ +static int wc_ERR_dump_to_file (const char *str, size_t len, void *u) +{ + XFILE fp = (XFILE ) u; + fprintf(fp, "%-*.*s\n", (int)len, (int)len, str); + return 0; +} + +/* This callback allows the application to provide a custom error printing + * function. */ +void wc_ERR_print_errors_cb(int (*cb)(const char *str, size_t len, void *u), + void *u) +{ + WOLFSSL_ENTER("wc_ERR_print_errors_cb"); + + if (cb == NULL) { + /* Invalid param */ + return; + } + + if (wc_LockMutex(&debug_mutex) != 0) + { + WOLFSSL_MSG("Lock debug mutex failed"); + } + else + { + /* free all nodes from error queue and print them to file */ + struct wc_error_queue *current; + struct wc_error_queue *next; + + current = (struct wc_error_queue *)wc_errors; + while (current != NULL) + { + next = current->next; + cb(current->error, strlen(current->error), u); + XFREE(current, current->heap, DYNAMIC_TYPE_LOG); + current = next; + } + + /* set global pointers to match having been freed */ + wc_errors = NULL; + wc_last_node = NULL; + + wc_UnLockMutex(&debug_mutex); + } +} + +void wc_ERR_print_errors_fp(XFILE fp) +{ + WOLFSSL_ENTER("wc_ERR_print_errors_fp"); + + /* Send all errors to the wc_ERR_dump_to_file function */ + wc_ERR_print_errors_cb(wc_ERR_dump_to_file, fp); +} + +#endif /* !defined(NO_FILESYSTEM) && !defined(NO_STDIO_FILESYSTEM) */ + +#endif /* defined(OPENSSL_EXTRA) || defined(DEBUG_WOLFSSL_VERBOSE) */ diff --git a/client/wolfssl/wolfcrypt/src/md2.c b/client/wolfssl/wolfcrypt/src/md2.c new file mode 100644 index 0000000..c2f3420 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/md2.c @@ -0,0 +1,162 @@ +/* md2.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef WOLFSSL_MD2 + +#include +#include + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + + +void wc_InitMd2(Md2* md2) +{ + XMEMSET(md2->X, 0, MD2_X_SIZE); + XMEMSET(md2->C, 0, MD2_BLOCK_SIZE); + XMEMSET(md2->buffer, 0, MD2_BLOCK_SIZE); + md2->count = 0; +} + + +void wc_Md2Update(Md2* md2, const byte* data, word32 len) +{ + static const byte S[256] = + { + 41, 46, 67, 201, 162, 216, 124, 1, 61, 54, 84, 161, 236, 240, 6, + 19, 98, 167, 5, 243, 192, 199, 115, 140, 152, 147, 43, 217, 188, + 76, 130, 202, 30, 155, 87, 60, 253, 212, 224, 22, 103, 66, 111, 24, + 138, 23, 229, 18, 190, 78, 196, 214, 218, 158, 222, 73, 160, 251, + 245, 142, 187, 47, 238, 122, 169, 104, 121, 145, 21, 178, 7, 63, + 148, 194, 16, 137, 11, 34, 95, 33, 128, 127, 93, 154, 90, 144, 50, + 39, 53, 62, 204, 231, 191, 247, 151, 3, 255, 25, 48, 179, 72, 165, + 181, 209, 215, 94, 146, 42, 172, 86, 170, 198, 79, 184, 56, 210, + 150, 164, 125, 182, 118, 252, 107, 226, 156, 116, 4, 241, 69, 157, + 112, 89, 100, 113, 135, 32, 134, 91, 207, 101, 230, 45, 168, 2, 27, + 96, 37, 173, 174, 176, 185, 246, 28, 70, 97, 105, 52, 64, 126, 15, + 85, 71, 163, 35, 221, 81, 175, 58, 195, 92, 249, 206, 186, 197, + 234, 38, 44, 83, 13, 110, 133, 40, 132, 9, 211, 223, 205, 244, 65, + 129, 77, 82, 106, 220, 55, 200, 108, 193, 171, 250, 36, 225, 123, + 8, 12, 189, 177, 74, 120, 136, 149, 139, 227, 99, 232, 109, 233, + 203, 213, 254, 59, 0, 29, 57, 242, 239, 183, 14, 102, 88, 208, 228, + 166, 119, 114, 248, 235, 117, 75, 10, 49, 68, 80, 180, 143, 237, + 31, 26, 219, 153, 141, 51, 159, 17, 131, 20 + }; + + while (len) { + word32 L = (MD2_PAD_SIZE - md2->count) < len ? + (MD2_PAD_SIZE - md2->count) : len; + XMEMCPY(md2->buffer + md2->count, data, L); + md2->count += L; + data += L; + len -= L; + + if (md2->count == MD2_PAD_SIZE) { + int i; + byte t; + + md2->count = 0; + XMEMCPY(md2->X + MD2_PAD_SIZE, md2->buffer, MD2_PAD_SIZE); + t = md2->C[15]; + + for(i = 0; i < MD2_PAD_SIZE; i++) { + md2->X[32 + i] = md2->X[MD2_PAD_SIZE + i] ^ md2->X[i]; + t = md2->C[i] ^= S[md2->buffer[i] ^ t]; + } + + t=0; + for(i = 0; i < 18; i++) { + int j; + for(j = 0; j < MD2_X_SIZE; j += 8) { + t = md2->X[j+0] ^= S[t]; + t = md2->X[j+1] ^= S[t]; + t = md2->X[j+2] ^= S[t]; + t = md2->X[j+3] ^= S[t]; + t = md2->X[j+4] ^= S[t]; + t = md2->X[j+5] ^= S[t]; + t = md2->X[j+6] ^= S[t]; + t = md2->X[j+7] ^= S[t]; + } + t = (t + i) & 0xFF; + } + } + } +} + + +void wc_Md2Final(Md2* md2, byte* hash) +{ + byte padding[MD2_BLOCK_SIZE]; + word32 padLen = MD2_PAD_SIZE - md2->count; + word32 i; + + for (i = 0; i < padLen; i++) + padding[i] = (byte)padLen; + + wc_Md2Update(md2, padding, padLen); + wc_Md2Update(md2, md2->C, MD2_BLOCK_SIZE); + + XMEMCPY(hash, md2->X, MD2_DIGEST_SIZE); + + wc_InitMd2(md2); +} + + +int wc_Md2Hash(const byte* data, word32 len, byte* hash) +{ +#ifdef WOLFSSL_SMALL_STACK + Md2* md2; +#else + Md2 md2[1]; +#endif + +#ifdef WOLFSSL_SMALL_STACK + md2 = (Md2*)XMALLOC(sizeof(Md2), NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (md2 == NULL) + return MEMORY_E; +#endif + + wc_InitMd2(md2); + wc_Md2Update(md2, data, len); + wc_Md2Final(md2, hash); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(md2, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return 0; +} + + +#endif /* WOLFSSL_MD2 */ + diff --git a/client/wolfssl/wolfcrypt/src/md4.c b/client/wolfssl/wolfcrypt/src/md4.c new file mode 100644 index 0000000..f6f6745 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/md4.c @@ -0,0 +1,211 @@ +/* md4.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifndef NO_MD4 + +#include +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + + +void wc_InitMd4(Md4* md4) +{ + md4->digest[0] = 0x67452301L; + md4->digest[1] = 0xefcdab89L; + md4->digest[2] = 0x98badcfeL; + md4->digest[3] = 0x10325476L; + + md4->buffLen = 0; + md4->loLen = 0; + md4->hiLen = 0; +} + + +static void Transform(Md4* md4) +{ +#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) +#define G(x, y, z) (((x) & (y)) | ((x) & (z)) | ((y) & (z))) +#define H(x, y, z) ((x) ^ (y) ^ (z)) + + /* Copy context->state[] to working vars */ + word32 A = md4->digest[0]; + word32 B = md4->digest[1]; + word32 C = md4->digest[2]; + word32 D = md4->digest[3]; + +#define function(a,b,c,d,k,s) a=rotlFixed(a+F(b,c,d)+md4->buffer[k],s); + function(A,B,C,D, 0, 3); + function(D,A,B,C, 1, 7); + function(C,D,A,B, 2,11); + function(B,C,D,A, 3,19); + function(A,B,C,D, 4, 3); + function(D,A,B,C, 5, 7); + function(C,D,A,B, 6,11); + function(B,C,D,A, 7,19); + function(A,B,C,D, 8, 3); + function(D,A,B,C, 9, 7); + function(C,D,A,B,10,11); + function(B,C,D,A,11,19); + function(A,B,C,D,12, 3); + function(D,A,B,C,13, 7); + function(C,D,A,B,14,11); + function(B,C,D,A,15,19); + +#undef function +#define function(a,b,c,d,k,s) \ + a=rotlFixed(a+G(b,c,d)+md4->buffer[k]+0x5a827999,s); + + function(A,B,C,D, 0, 3); + function(D,A,B,C, 4, 5); + function(C,D,A,B, 8, 9); + function(B,C,D,A,12,13); + function(A,B,C,D, 1, 3); + function(D,A,B,C, 5, 5); + function(C,D,A,B, 9, 9); + function(B,C,D,A,13,13); + function(A,B,C,D, 2, 3); + function(D,A,B,C, 6, 5); + function(C,D,A,B,10, 9); + function(B,C,D,A,14,13); + function(A,B,C,D, 3, 3); + function(D,A,B,C, 7, 5); + function(C,D,A,B,11, 9); + function(B,C,D,A,15,13); + +#undef function +#define function(a,b,c,d,k,s) \ + a=rotlFixed(a+H(b,c,d)+md4->buffer[k]+0x6ed9eba1,s); + + function(A,B,C,D, 0, 3); + function(D,A,B,C, 8, 9); + function(C,D,A,B, 4,11); + function(B,C,D,A,12,15); + function(A,B,C,D, 2, 3); + function(D,A,B,C,10, 9); + function(C,D,A,B, 6,11); + function(B,C,D,A,14,15); + function(A,B,C,D, 1, 3); + function(D,A,B,C, 9, 9); + function(C,D,A,B, 5,11); + function(B,C,D,A,13,15); + function(A,B,C,D, 3, 3); + function(D,A,B,C,11, 9); + function(C,D,A,B, 7,11); + function(B,C,D,A,15,15); + + /* Add the working vars back into digest state[] */ + md4->digest[0] += A; + md4->digest[1] += B; + md4->digest[2] += C; + md4->digest[3] += D; +} + + +static WC_INLINE void AddLength(Md4* md4, word32 len) +{ + word32 tmp = md4->loLen; + if ( (md4->loLen += len) < tmp) + md4->hiLen++; /* carry low to high */ +} + + +void wc_Md4Update(Md4* md4, const byte* data, word32 len) +{ + /* do block size increments */ + byte* local = (byte*)md4->buffer; + + while (len) { + word32 add = min(len, MD4_BLOCK_SIZE - md4->buffLen); + XMEMCPY(&local[md4->buffLen], data, add); + + md4->buffLen += add; + data += add; + len -= add; + + if (md4->buffLen == MD4_BLOCK_SIZE) { + #ifdef BIG_ENDIAN_ORDER + ByteReverseWords(md4->buffer, md4->buffer, MD4_BLOCK_SIZE); + #endif + Transform(md4); + AddLength(md4, MD4_BLOCK_SIZE); + md4->buffLen = 0; + } + } +} + + +void wc_Md4Final(Md4* md4, byte* hash) +{ + byte* local = (byte*)md4->buffer; + + AddLength(md4, md4->buffLen); /* before adding pads */ + + local[md4->buffLen++] = 0x80; /* add 1 */ + + /* pad with zeros */ + if (md4->buffLen > MD4_PAD_SIZE) { + XMEMSET(&local[md4->buffLen], 0, MD4_BLOCK_SIZE - md4->buffLen); + md4->buffLen += MD4_BLOCK_SIZE - md4->buffLen; + + #ifdef BIG_ENDIAN_ORDER + ByteReverseWords(md4->buffer, md4->buffer, MD4_BLOCK_SIZE); + #endif + Transform(md4); + md4->buffLen = 0; + } + XMEMSET(&local[md4->buffLen], 0, MD4_PAD_SIZE - md4->buffLen); + + /* put lengths in bits */ + md4->hiLen = (md4->loLen >> (8*sizeof(md4->loLen) - 3)) + + (md4->hiLen << 3); + md4->loLen = md4->loLen << 3; + + /* store lengths */ + #ifdef BIG_ENDIAN_ORDER + ByteReverseWords(md4->buffer, md4->buffer, MD4_BLOCK_SIZE); + #endif + /* ! length ordering dependent on digest endian type ! */ + XMEMCPY(&local[MD4_PAD_SIZE], &md4->loLen, sizeof(word32)); + XMEMCPY(&local[MD4_PAD_SIZE + sizeof(word32)], &md4->hiLen, sizeof(word32)); + + Transform(md4); + #ifdef BIG_ENDIAN_ORDER + ByteReverseWords(md4->digest, md4->digest, MD4_DIGEST_SIZE); + #endif + XMEMCPY(hash, md4->digest, MD4_DIGEST_SIZE); + + wc_InitMd4(md4); /* reset state */ +} + + +#endif /* NO_MD4 */ + diff --git a/client/wolfssl/wolfcrypt/src/md5.c b/client/wolfssl/wolfcrypt/src/md5.c new file mode 100644 index 0000000..7eb2a51 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/md5.c @@ -0,0 +1,572 @@ +/* md5.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#if !defined(NO_MD5) + +#if defined(WOLFSSL_TI_HASH) +/* #include included by wc_port.c */ + +#else + +#include +#include +#include +#include + +#ifdef NO_INLINE +#include +#else +#define WOLFSSL_MISC_INCLUDED +#include +#endif + + +/* Hardware Acceleration */ +#if defined(STM32_HASH) + +/* Supports CubeMX HAL or Standard Peripheral Library */ +#define HAVE_MD5_CUST_API + +int wc_InitMd5_ex(wc_Md5* md5, void* heap, int devId) +{ + if (md5 == NULL) { + return BAD_FUNC_ARG; + } + + (void)devId; + (void)heap; + + wc_Stm32_Hash_Init(&md5->stmCtx); + + return 0; +} + +int wc_Md5Update(wc_Md5* md5, const byte* data, word32 len) +{ + int ret; + + if (md5 == NULL || (data == NULL && len > 0)) { + return BAD_FUNC_ARG; + } + + ret = wolfSSL_CryptHwMutexLock(); + if (ret == 0) { + ret = wc_Stm32_Hash_Update(&md5->stmCtx, HASH_AlgoSelection_MD5, + data, len); + wolfSSL_CryptHwMutexUnLock(); + } + return ret; +} + +int wc_Md5Final(wc_Md5* md5, byte* hash) +{ + int ret; + + if (md5 == NULL || hash == NULL) { + return BAD_FUNC_ARG; + } + + ret = wolfSSL_CryptHwMutexLock(); + if (ret == 0) { + ret = wc_Stm32_Hash_Final(&md5->stmCtx, HASH_AlgoSelection_MD5, + hash, WC_MD5_DIGEST_SIZE); + wolfSSL_CryptHwMutexUnLock(); + } + + (void)wc_InitMd5(md5); /* reset state */ + + return ret; +} + +#elif defined(FREESCALE_MMCAU_SHA) + +#ifdef FREESCALE_MMCAU_CLASSIC_SHA + #include "cau_api.h" +#else + #include "fsl_mmcau.h" +#endif + +#define XTRANSFORM(S,B) Transform((S), (B)) +#define XTRANSFORM_LEN(S,B,L) Transform_Len((S), (B), (L)) + +#ifndef WC_HASH_DATA_ALIGNMENT + /* these hardware API's require 4 byte (word32) alignment */ + #define WC_HASH_DATA_ALIGNMENT 4 +#endif + +static int Transform(wc_Md5* md5, const byte* data) +{ + int ret = wolfSSL_CryptHwMutexLock(); + if (ret == 0) { +#ifdef FREESCALE_MMCAU_CLASSIC_SHA + cau_md5_hash_n((byte*)data, 1, (unsigned char*)md5->digest); +#else + MMCAU_MD5_HashN((byte*)data, 1, (uint32_t*)md5->digest); +#endif + wolfSSL_CryptHwMutexUnLock(); + } + return ret; +} + +static int Transform_Len(wc_Md5* md5, const byte* data, word32 len) +{ + int ret = wolfSSL_CryptHwMutexLock(); + if (ret == 0) { + #if defined(WC_HASH_DATA_ALIGNMENT) && WC_HASH_DATA_ALIGNMENT > 0 + if ((size_t)data % WC_HASH_DATA_ALIGNMENT) { + /* data pointer is NOT aligned, + * so copy and perform one block at a time */ + byte* local = (byte*)md5->buffer; + while (len >= WC_MD5_BLOCK_SIZE) { + XMEMCPY(local, data, WC_MD5_BLOCK_SIZE); + #ifdef FREESCALE_MMCAU_CLASSIC_SHA + cau_md5_hash_n(local, 1, (unsigned char*)md5->digest); + #else + MMCAU_MD5_HashN(local, 1, (uint32_t*)md5->digest); + #endif + data += WC_MD5_BLOCK_SIZE; + len -= WC_MD5_BLOCK_SIZE; + } + } + else + #endif + { +#ifdef FREESCALE_MMCAU_CLASSIC_SHA + cau_md5_hash_n((byte*)data, len / WC_MD5_BLOCK_SIZE, + (unsigned char*)md5->digest); +#else + MMCAU_MD5_HashN((byte*)data, len / WC_MD5_BLOCK_SIZE, + (uint32_t*)md5->digest); +#endif + } + wolfSSL_CryptHwMutexUnLock(); + } + return ret; +} + +#elif defined(WOLFSSL_PIC32MZ_HASH) +#include +#define HAVE_MD5_CUST_API + +#elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_HASH) +/* functions implemented in wolfcrypt/src/port/caam/caam_sha.c */ +#define HAVE_MD5_CUST_API +#else +#define NEED_SOFT_MD5 +#endif /* End Hardware Acceleration */ + +#ifdef NEED_SOFT_MD5 + +#define XTRANSFORM(S,B) Transform((S),(B)) + +#define F1(x, y, z) (z ^ (x & (y ^ z))) +#define F2(x, y, z) F1(z, x, y) +#define F3(x, y, z) (x ^ y ^ z) +#define F4(x, y, z) (y ^ (x | ~z)) + +#define MD5STEP(f, w, x, y, z, data, s) \ + w = rotlFixed(w + f(x, y, z) + data, s) + x + +static int Transform(wc_Md5* md5, const byte* data) +{ + word32* buffer = (word32*)data; + /* Copy context->state[] to working vars */ + word32 a = md5->digest[0]; + word32 b = md5->digest[1]; + word32 c = md5->digest[2]; + word32 d = md5->digest[3]; + + MD5STEP(F1, a, b, c, d, buffer[0] + 0xd76aa478, 7); + MD5STEP(F1, d, a, b, c, buffer[1] + 0xe8c7b756, 12); + MD5STEP(F1, c, d, a, b, buffer[2] + 0x242070db, 17); + MD5STEP(F1, b, c, d, a, buffer[3] + 0xc1bdceee, 22); + MD5STEP(F1, a, b, c, d, buffer[4] + 0xf57c0faf, 7); + MD5STEP(F1, d, a, b, c, buffer[5] + 0x4787c62a, 12); + MD5STEP(F1, c, d, a, b, buffer[6] + 0xa8304613, 17); + MD5STEP(F1, b, c, d, a, buffer[7] + 0xfd469501, 22); + MD5STEP(F1, a, b, c, d, buffer[8] + 0x698098d8, 7); + MD5STEP(F1, d, a, b, c, buffer[9] + 0x8b44f7af, 12); + MD5STEP(F1, c, d, a, b, buffer[10] + 0xffff5bb1, 17); + MD5STEP(F1, b, c, d, a, buffer[11] + 0x895cd7be, 22); + MD5STEP(F1, a, b, c, d, buffer[12] + 0x6b901122, 7); + MD5STEP(F1, d, a, b, c, buffer[13] + 0xfd987193, 12); + MD5STEP(F1, c, d, a, b, buffer[14] + 0xa679438e, 17); + MD5STEP(F1, b, c, d, a, buffer[15] + 0x49b40821, 22); + + MD5STEP(F2, a, b, c, d, buffer[1] + 0xf61e2562, 5); + MD5STEP(F2, d, a, b, c, buffer[6] + 0xc040b340, 9); + MD5STEP(F2, c, d, a, b, buffer[11] + 0x265e5a51, 14); + MD5STEP(F2, b, c, d, a, buffer[0] + 0xe9b6c7aa, 20); + MD5STEP(F2, a, b, c, d, buffer[5] + 0xd62f105d, 5); + MD5STEP(F2, d, a, b, c, buffer[10] + 0x02441453, 9); + MD5STEP(F2, c, d, a, b, buffer[15] + 0xd8a1e681, 14); + MD5STEP(F2, b, c, d, a, buffer[4] + 0xe7d3fbc8, 20); + MD5STEP(F2, a, b, c, d, buffer[9] + 0x21e1cde6, 5); + MD5STEP(F2, d, a, b, c, buffer[14] + 0xc33707d6, 9); + MD5STEP(F2, c, d, a, b, buffer[3] + 0xf4d50d87, 14); + MD5STEP(F2, b, c, d, a, buffer[8] + 0x455a14ed, 20); + MD5STEP(F2, a, b, c, d, buffer[13] + 0xa9e3e905, 5); + MD5STEP(F2, d, a, b, c, buffer[2] + 0xfcefa3f8, 9); + MD5STEP(F2, c, d, a, b, buffer[7] + 0x676f02d9, 14); + MD5STEP(F2, b, c, d, a, buffer[12] + 0x8d2a4c8a, 20); + + MD5STEP(F3, a, b, c, d, buffer[5] + 0xfffa3942, 4); + MD5STEP(F3, d, a, b, c, buffer[8] + 0x8771f681, 11); + MD5STEP(F3, c, d, a, b, buffer[11] + 0x6d9d6122, 16); + MD5STEP(F3, b, c, d, a, buffer[14] + 0xfde5380c, 23); + MD5STEP(F3, a, b, c, d, buffer[1] + 0xa4beea44, 4); + MD5STEP(F3, d, a, b, c, buffer[4] + 0x4bdecfa9, 11); + MD5STEP(F3, c, d, a, b, buffer[7] + 0xf6bb4b60, 16); + MD5STEP(F3, b, c, d, a, buffer[10] + 0xbebfbc70, 23); + MD5STEP(F3, a, b, c, d, buffer[13] + 0x289b7ec6, 4); + MD5STEP(F3, d, a, b, c, buffer[0] + 0xeaa127fa, 11); + MD5STEP(F3, c, d, a, b, buffer[3] + 0xd4ef3085, 16); + MD5STEP(F3, b, c, d, a, buffer[6] + 0x04881d05, 23); + MD5STEP(F3, a, b, c, d, buffer[9] + 0xd9d4d039, 4); + MD5STEP(F3, d, a, b, c, buffer[12] + 0xe6db99e5, 11); + MD5STEP(F3, c, d, a, b, buffer[15] + 0x1fa27cf8, 16); + MD5STEP(F3, b, c, d, a, buffer[2] + 0xc4ac5665, 23); + + MD5STEP(F4, a, b, c, d, buffer[0] + 0xf4292244, 6); + MD5STEP(F4, d, a, b, c, buffer[7] + 0x432aff97, 10); + MD5STEP(F4, c, d, a, b, buffer[14] + 0xab9423a7, 15); + MD5STEP(F4, b, c, d, a, buffer[5] + 0xfc93a039, 21); + MD5STEP(F4, a, b, c, d, buffer[12] + 0x655b59c3, 6); + MD5STEP(F4, d, a, b, c, buffer[3] + 0x8f0ccc92, 10); + MD5STEP(F4, c, d, a, b, buffer[10] + 0xffeff47d, 15); + MD5STEP(F4, b, c, d, a, buffer[1] + 0x85845dd1, 21); + MD5STEP(F4, a, b, c, d, buffer[8] + 0x6fa87e4f, 6); + MD5STEP(F4, d, a, b, c, buffer[15] + 0xfe2ce6e0, 10); + MD5STEP(F4, c, d, a, b, buffer[6] + 0xa3014314, 15); + MD5STEP(F4, b, c, d, a, buffer[13] + 0x4e0811a1, 21); + MD5STEP(F4, a, b, c, d, buffer[4] + 0xf7537e82, 6); + MD5STEP(F4, d, a, b, c, buffer[11] + 0xbd3af235, 10); + MD5STEP(F4, c, d, a, b, buffer[2] + 0x2ad7d2bb, 15); + MD5STEP(F4, b, c, d, a, buffer[9] + 0xeb86d391, 21); + + /* Add the working vars back into digest state[] */ + md5->digest[0] += a; + md5->digest[1] += b; + md5->digest[2] += c; + md5->digest[3] += d; + + return 0; +} +#endif /* NEED_SOFT_MD5 */ + +#ifndef HAVE_MD5_CUST_API + +static WC_INLINE void AddLength(wc_Md5* md5, word32 len) +{ + word32 tmp = md5->loLen; + if ((md5->loLen += len) < tmp) { + md5->hiLen++; /* carry low to high */ + } +} + +static int _InitMd5(wc_Md5* md5) +{ + int ret = 0; + + md5->digest[0] = 0x67452301L; + md5->digest[1] = 0xefcdab89L; + md5->digest[2] = 0x98badcfeL; + md5->digest[3] = 0x10325476L; + + md5->buffLen = 0; + md5->loLen = 0; + md5->hiLen = 0; +#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB) + md5->flags = 0; +#endif + + return ret; +} + +int wc_InitMd5_ex(wc_Md5* md5, void* heap, int devId) +{ + int ret = 0; + + if (md5 == NULL) + return BAD_FUNC_ARG; + + md5->heap = heap; + + ret = _InitMd5(md5); + if (ret != 0) + return ret; + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_MD5) + ret = wolfAsync_DevCtxInit(&md5->asyncDev, WOLFSSL_ASYNC_MARKER_MD5, + md5->heap, devId); +#else + (void)devId; +#endif + return ret; +} + +/* do block size increments/updates */ +int wc_Md5Update(wc_Md5* md5, const byte* data, word32 len) +{ + int ret = 0; + word32 blocksLen; + byte* local; + + if (md5 == NULL || (data == NULL && len > 0)) { + return BAD_FUNC_ARG; + } + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_MD5) + if (md5->asyncDev.marker == WOLFSSL_ASYNC_MARKER_MD5) { +#if defined(HAVE_INTEL_QA) + return IntelQaSymMd5(&md5->asyncDev, NULL, data, len); +#endif + } +#endif /* WOLFSSL_ASYNC_CRYPT */ + + /* check that internal buffLen is valid */ + if (md5->buffLen >= WC_MD5_BLOCK_SIZE) + return BUFFER_E; + + if (data == NULL && len == 0) { + /* valid, but do nothing */ + return 0; + } + + /* add length for final */ + AddLength(md5, len); + + local = (byte*)md5->buffer; + + /* process any remainder from previous operation */ + if (md5->buffLen > 0) { + blocksLen = min(len, WC_MD5_BLOCK_SIZE - md5->buffLen); + XMEMCPY(&local[md5->buffLen], data, blocksLen); + + md5->buffLen += blocksLen; + data += blocksLen; + len -= blocksLen; + + if (md5->buffLen == WC_MD5_BLOCK_SIZE) { + #if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) + ByteReverseWords(md5->buffer, md5->buffer, WC_MD5_BLOCK_SIZE); + #endif + + ret = XTRANSFORM(md5, (const byte*)local); + if (ret != 0) + return ret; + + md5->buffLen = 0; + } + } + + /* process blocks */ +#ifdef XTRANSFORM_LEN + /* get number of blocks */ + /* 64-1 = 0x3F (~ Inverted = 0xFFFFFFC0) */ + /* len (masked by 0xFFFFFFC0) returns block aligned length */ + blocksLen = len & ~(WC_MD5_BLOCK_SIZE-1); + if (blocksLen > 0) { + /* Byte reversal performed in function if required. */ + XTRANSFORM_LEN(md5, data, blocksLen); + data += blocksLen; + len -= blocksLen; + } +#else + while (len >= WC_MD5_BLOCK_SIZE) { + word32* local32 = md5->buffer; + /* optimization to avoid memcpy if data pointer is properly aligned */ + /* Big Endian requires byte swap, so can't use data directly */ + #if defined(WC_HASH_DATA_ALIGNMENT) && !defined(BIG_ENDIAN_ORDER) + if (((size_t)data % WC_HASH_DATA_ALIGNMENT) == 0) { + local32 = (word32*)data; + } + else + #endif + { + XMEMCPY(local32, data, WC_MD5_BLOCK_SIZE); + } + + data += WC_MD5_BLOCK_SIZE; + len -= WC_MD5_BLOCK_SIZE; + + #if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) + ByteReverseWords(local32, local32, WC_MD5_BLOCK_SIZE); + #endif + + ret = XTRANSFORM(md5, (const byte*)local32); + } +#endif /* XTRANSFORM_LEN */ + + /* save remainder */ + if (len > 0) { + XMEMCPY(local, data, len); + md5->buffLen = len; + } + + return ret; +} + +int wc_Md5Final(wc_Md5* md5, byte* hash) +{ + byte* local; + + if (md5 == NULL || hash == NULL) { + return BAD_FUNC_ARG; + } + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_MD5) + if (md5->asyncDev.marker == WOLFSSL_ASYNC_MARKER_MD5) { +#if defined(HAVE_INTEL_QA) + return IntelQaSymMd5(&md5->asyncDev, hash, NULL, WC_MD5_DIGEST_SIZE); +#endif + } +#endif /* WOLFSSL_ASYNC_CRYPT */ + + local = (byte*)md5->buffer; + + local[md5->buffLen++] = 0x80; /* add 1 */ + + /* pad with zeros */ + if (md5->buffLen > WC_MD5_PAD_SIZE) { + XMEMSET(&local[md5->buffLen], 0, WC_MD5_BLOCK_SIZE - md5->buffLen); + md5->buffLen += WC_MD5_BLOCK_SIZE - md5->buffLen; + +#if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) + ByteReverseWords(md5->buffer, md5->buffer, WC_MD5_BLOCK_SIZE); +#endif + XTRANSFORM(md5, local); + md5->buffLen = 0; + } + XMEMSET(&local[md5->buffLen], 0, WC_MD5_PAD_SIZE - md5->buffLen); + +#if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) + ByteReverseWords(md5->buffer, md5->buffer, WC_MD5_BLOCK_SIZE); +#endif + + /* put lengths in bits */ + md5->hiLen = (md5->loLen >> (8 * sizeof(md5->loLen) - 3)) + + (md5->hiLen << 3); + md5->loLen = md5->loLen << 3; + + /* store lengths */ + /* ! length ordering dependent on digest endian type ! */ + XMEMCPY(&local[WC_MD5_PAD_SIZE], &md5->loLen, sizeof(word32)); + XMEMCPY(&local[WC_MD5_PAD_SIZE + sizeof(word32)], &md5->hiLen, sizeof(word32)); + + /* final transform and result to hash */ + XTRANSFORM(md5, local); +#ifdef BIG_ENDIAN_ORDER + ByteReverseWords(md5->digest, md5->digest, WC_MD5_DIGEST_SIZE); +#endif + XMEMCPY(hash, md5->digest, WC_MD5_DIGEST_SIZE); + + return _InitMd5(md5); /* reset state */ +} +#endif /* !HAVE_MD5_CUST_API */ + + +int wc_InitMd5(wc_Md5* md5) +{ + if (md5 == NULL) { + return BAD_FUNC_ARG; + } + return wc_InitMd5_ex(md5, NULL, INVALID_DEVID); +} + +void wc_Md5Free(wc_Md5* md5) +{ + if (md5 == NULL) + return; +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_MD5) + wolfAsync_DevCtxFree(&md5->asyncDev, WOLFSSL_ASYNC_MARKER_MD5); +#endif /* WOLFSSL_ASYNC_CRYPT */ + +#ifdef WOLFSSL_PIC32MZ_HASH + wc_Md5Pic32Free(md5); +#endif +} + +int wc_Md5GetHash(wc_Md5* md5, byte* hash) +{ + int ret; + wc_Md5 tmpMd5; + + if (md5 == NULL || hash == NULL) + return BAD_FUNC_ARG; + + ret = wc_Md5Copy(md5, &tmpMd5); + if (ret == 0) { + ret = wc_Md5Final(&tmpMd5, hash); + } + + return ret; +} + +int wc_Md5Copy(wc_Md5* src, wc_Md5* dst) +{ + int ret = 0; + + if (src == NULL || dst == NULL) + return BAD_FUNC_ARG; + + XMEMCPY(dst, src, sizeof(wc_Md5)); + +#ifdef WOLFSSL_ASYNC_CRYPT + ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev); +#endif +#ifdef WOLFSSL_PIC32MZ_HASH + ret = wc_Pic32HashCopy(&src->cache, &dst->cache); +#endif +#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB) + dst->flags |= WC_HASH_FLAG_ISCOPY; +#endif + + return ret; +} + +#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB) +int wc_Md5SetFlags(wc_Md5* md5, word32 flags) +{ + if (md5) { + md5->flags = flags; + } + return 0; +} +int wc_Md5GetFlags(wc_Md5* md5, word32* flags) +{ + if (md5 && flags) { + *flags = md5->flags; + } + return 0; +} +#endif + +#endif /* WOLFSSL_TI_HASH */ +#endif /* NO_MD5 */ diff --git a/client/wolfssl/wolfcrypt/src/memory.c b/client/wolfssl/wolfcrypt/src/memory.c new file mode 100644 index 0000000..3bc8e21 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/memory.c @@ -0,0 +1,1126 @@ +/* memory.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +/* check old macros @wc_fips */ +#if defined(USE_CYASSL_MEMORY) && !defined(USE_WOLFSSL_MEMORY) + #define USE_WOLFSSL_MEMORY +#endif +#if defined(CYASSL_MALLOC_CHECK) && !defined(WOLFSSL_MALLOC_CHECK) + #define WOLFSSL_MALLOC_CHECK +#endif + + +/* +Possible memory options: + * NO_WOLFSSL_MEMORY: Disables wolf memory callback support. When not defined settings.h defines USE_WOLFSSL_MEMORY. + * WOLFSSL_STATIC_MEMORY: Turns on the use of static memory buffers and functions. + This allows for using static memory instead of dynamic. + * WOLFSSL_STATIC_ALIGN: Define defaults to 16 to indicate static memory alignment. + * HAVE_IO_POOL: Enables use of static thread safe memory pool for input/output buffers. + * XMALLOC_OVERRIDE: Allows override of the XMALLOC, XFREE and XREALLOC macros. + * XMALLOC_USER: Allows custom XMALLOC, XFREE and XREALLOC functions to be defined. + * WOLFSSL_NO_MALLOC: Disables the fall-back case to use STDIO malloc/free when no callbacks are set. + * WOLFSSL_TRACK_MEMORY: Enables memory tracking for total stats and list of allocated memory. + * WOLFSSL_DEBUG_MEMORY: Enables extra function and line number args for memory callbacks. + * WOLFSSL_DEBUG_MEMORY_PRINT: Enables printing of each malloc/free. + * WOLFSSL_MALLOC_CHECK: Reports malloc or alignment failure using WOLFSSL_STATIC_ALIGN + * WOLFSSL_FORCE_MALLOC_FAIL_TEST: Used for internal testing to induce random malloc failures. + * WOLFSSL_HEAP_TEST: Used for internal testing of heap hint + */ + +#ifdef WOLFSSL_ZEPHYR +#undef realloc +void *z_realloc(void *ptr, size_t size) +{ + if (ptr == NULL) + ptr = malloc(size); + else + ptr = realloc(ptr, size); + + return ptr; +} +#define realloc z_realloc +#endif + +#ifdef USE_WOLFSSL_MEMORY + +#include +#include +#include + +#if defined(WOLFSSL_DEBUG_MEMORY) && defined(WOLFSSL_DEBUG_MEMORY_PRINT) +#include +#endif + +#ifdef WOLFSSL_FORCE_MALLOC_FAIL_TEST + static int gMemFailCountSeed; + static int gMemFailCount; + void wolfSSL_SetMemFailCount(int memFailCount) + { + if (gMemFailCountSeed == 0) { + gMemFailCountSeed = memFailCount; + gMemFailCount = memFailCount; + } + } +#endif +#if defined(WOLFSSL_MALLOC_CHECK) || defined(WOLFSSL_TRACK_MEMORY_FULL) || \ + defined(WOLFSSL_MEMORY_LOG) + #include +#endif + + +/* Set these to default values initially. */ +static wolfSSL_Malloc_cb malloc_function = NULL; +static wolfSSL_Free_cb free_function = NULL; +static wolfSSL_Realloc_cb realloc_function = NULL; + +int wolfSSL_SetAllocators(wolfSSL_Malloc_cb mf, + wolfSSL_Free_cb ff, + wolfSSL_Realloc_cb rf) +{ + malloc_function = mf; + free_function = ff; + realloc_function = rf; + return 0; +} + +int wolfSSL_GetAllocators(wolfSSL_Malloc_cb* mf, + wolfSSL_Free_cb* ff, + wolfSSL_Realloc_cb* rf) +{ + if (mf) *mf = malloc_function; + if (ff) *ff = free_function; + if (rf) *rf = realloc_function; + return 0; +} + +#ifndef WOLFSSL_STATIC_MEMORY +#ifdef WOLFSSL_DEBUG_MEMORY +void* wolfSSL_Malloc(size_t size, const char* func, unsigned int line) +#else +void* wolfSSL_Malloc(size_t size) +#endif +{ + void* res = 0; + + if (malloc_function) { + #ifdef WOLFSSL_DEBUG_MEMORY + res = malloc_function(size, func, line); + #else + res = malloc_function(size); + #endif + } + else { + #ifndef WOLFSSL_NO_MALLOC + res = malloc(size); + #else + WOLFSSL_MSG("No malloc available"); + #endif + } + +#ifdef WOLFSSL_DEBUG_MEMORY +#if defined(WOLFSSL_DEBUG_MEMORY_PRINT) && !defined(WOLFSSL_TRACK_MEMORY) + printf("Alloc: %p -> %u at %s:%d\n", res, (word32)size, func, line); +#else + (void)func; + (void)line; +#endif +#endif + +#ifdef WOLFSSL_MALLOC_CHECK + if (res == NULL) + WOLFSSL_MSG("wolfSSL_malloc failed"); +#endif + +#ifdef WOLFSSL_FORCE_MALLOC_FAIL_TEST + if (res && --gMemFailCount == 0) { + printf("\n---FORCED MEM FAIL TEST---\n"); + if (free_function) { + #ifdef WOLFSSL_DEBUG_MEMORY + free_function(res, func, line); + #else + free_function(res); + #endif + } + else { + free(res); /* clear */ + } + gMemFailCount = gMemFailCountSeed; /* reset */ + return NULL; + } +#endif + + return res; +} + +#ifdef WOLFSSL_DEBUG_MEMORY +void wolfSSL_Free(void *ptr, const char* func, unsigned int line) +#else +void wolfSSL_Free(void *ptr) +#endif +{ +#ifdef WOLFSSL_DEBUG_MEMORY +#if defined(WOLFSSL_DEBUG_MEMORY_PRINT) && !defined(WOLFSSL_TRACK_MEMORY) + printf("Free: %p at %s:%d\n", ptr, func, line); +#else + (void)func; + (void)line; +#endif +#endif + + if (free_function) { + #ifdef WOLFSSL_DEBUG_MEMORY + free_function(ptr, func, line); + #else + free_function(ptr); + #endif + } + else { + #ifndef WOLFSSL_NO_MALLOC + free(ptr); + #else + WOLFSSL_MSG("No free available"); + #endif + } +} + +#ifdef WOLFSSL_DEBUG_MEMORY +void* wolfSSL_Realloc(void *ptr, size_t size, const char* func, unsigned int line) +#else +void* wolfSSL_Realloc(void *ptr, size_t size) +#endif +{ + void* res = 0; + + if (realloc_function) { + #ifdef WOLFSSL_DEBUG_MEMORY + res = realloc_function(ptr, size, func, line); + #else + res = realloc_function(ptr, size); + #endif + } + else { + #ifndef WOLFSSL_NO_MALLOC + res = realloc(ptr, size); + #else + WOLFSSL_MSG("No realloc available"); + #endif + } + + return res; +} +#endif /* WOLFSSL_STATIC_MEMORY */ + +#ifdef WOLFSSL_STATIC_MEMORY + +struct wc_Memory { + byte* buffer; + struct wc_Memory* next; + word32 sz; +}; + + +/* returns amount of memory used on success. On error returns negative value + wc_Memory** list is the list that new buckets are prepended to + */ +static int create_memory_buckets(byte* buffer, word32 bufSz, + word32 buckSz, word32 buckNum, wc_Memory** list) { + word32 i; + byte* pt = buffer; + int ret = 0; + word32 memSz = (word32)sizeof(wc_Memory); + word32 padSz = -(int)memSz & (WOLFSSL_STATIC_ALIGN - 1); + + /* if not enough space available for bucket size then do not try */ + if (buckSz + memSz + padSz > bufSz) { + return ret; + } + + for (i = 0; i < buckNum; i++) { + if ((buckSz + memSz + padSz) <= (bufSz - ret)) { + /* create a new struct and set its values */ + wc_Memory* mem = (struct wc_Memory*)(pt); + mem->sz = buckSz; + mem->buffer = (byte*)pt + padSz + memSz; + mem->next = NULL; + + /* add the newly created struct to front of list */ + if (*list == NULL) { + *list = mem; + } else { + mem->next = *list; + *list = mem; + } + + /* advance pointer and keep track of memory used */ + ret += buckSz + padSz + memSz; + pt += buckSz + padSz + memSz; + } + else { + break; /* not enough space left for more buckets of this size */ + } + } + + return ret; +} + +int wolfSSL_init_memory_heap(WOLFSSL_HEAP* heap) +{ + word32 wc_MemSz[WOLFMEM_DEF_BUCKETS] = { WOLFMEM_BUCKETS }; + word32 wc_Dist[WOLFMEM_DEF_BUCKETS] = { WOLFMEM_DIST }; + + if (heap == NULL) { + return BAD_FUNC_ARG; + } + + XMEMSET(heap, 0, sizeof(WOLFSSL_HEAP)); + + XMEMCPY(heap->sizeList, wc_MemSz, sizeof(wc_MemSz)); + XMEMCPY(heap->distList, wc_Dist, sizeof(wc_Dist)); + + if (wc_InitMutex(&(heap->memory_mutex)) != 0) { + WOLFSSL_MSG("Error creating heap memory mutex"); + return BAD_MUTEX_E; + } + + return 0; +} + +int wc_LoadStaticMemory(WOLFSSL_HEAP_HINT** pHint, + unsigned char* buf, unsigned int sz, int flag, int max) +{ + int ret; + WOLFSSL_HEAP* heap; + WOLFSSL_HEAP_HINT* hint; + word32 idx = 0; + + if (pHint == NULL || buf == NULL) { + return BAD_FUNC_ARG; + } + + if ((sizeof(WOLFSSL_HEAP) + sizeof(WOLFSSL_HEAP_HINT)) > sz - idx) { + return BUFFER_E; /* not enough memory for structures */ + } + + /* check if hint has already been assigned */ + if (*pHint == NULL) { + heap = (WOLFSSL_HEAP*)buf; + idx += sizeof(WOLFSSL_HEAP); + hint = (WOLFSSL_HEAP_HINT*)(buf + idx); + idx += sizeof(WOLFSSL_HEAP_HINT); + + ret = wolfSSL_init_memory_heap(heap); + if (ret != 0) { + return ret; + } + + XMEMSET(hint, 0, sizeof(WOLFSSL_HEAP_HINT)); + hint->memory = heap; + } + else { + #ifdef WOLFSSL_HEAP_TEST + /* do not load in memory if test has been set */ + if (heap == (void*)WOLFSSL_HEAP_TEST) { + return 0; + } + #endif + + hint = (WOLFSSL_HEAP_HINT*)(*pHint); + heap = hint->memory; + } + + ret = wolfSSL_load_static_memory(buf + idx, sz - idx, flag, heap); + if (ret != 1) { + WOLFSSL_MSG("Error partitioning memory"); + return -1; + } + + /* determine what max applies too */ + if ((flag & WOLFMEM_IO_POOL) || (flag & WOLFMEM_IO_POOL_FIXED)) { + heap->maxIO = max; + } + else { /* general memory used in handshakes */ + heap->maxHa = max; + } + + heap->flag |= flag; + *pHint = hint; + + (void)max; + + return 0; +} + +int wolfSSL_load_static_memory(byte* buffer, word32 sz, int flag, + WOLFSSL_HEAP* heap) +{ + word32 ava = sz; + byte* pt = buffer; + int ret = 0; + word32 memSz = (word32)sizeof(wc_Memory); + word32 padSz = -(int)memSz & (WOLFSSL_STATIC_ALIGN - 1); + + WOLFSSL_ENTER("wolfSSL_load_static_memory"); + + if (buffer == NULL) { + return BAD_FUNC_ARG; + } + + /* align pt */ + while ((wolfssl_word)pt % WOLFSSL_STATIC_ALIGN && pt < (buffer + sz)) { + *pt = 0x00; + pt++; + ava--; + } + +#ifdef WOLFSSL_DEBUG_MEMORY + printf("Allocated %d bytes for static memory @ %p\n", ava, pt); +#endif + + /* divide into chunks of memory and add them to available list */ + while (ava >= (heap->sizeList[0] + padSz + memSz)) { + int i; + /* creating only IO buffers from memory passed in, max TLS is 16k */ + if (flag & WOLFMEM_IO_POOL || flag & WOLFMEM_IO_POOL_FIXED) { + if ((ret = create_memory_buckets(pt, ava, + WOLFMEM_IO_SZ, 1, &(heap->io))) < 0) { + WOLFSSL_LEAVE("wolfSSL_load_static_memory", ret); + return ret; + } + + /* check if no more room left for creating IO buffers */ + if (ret == 0) { + break; + } + + /* advance pointer in buffer for next buckets and keep track + of how much memory is left available */ + pt += ret; + ava -= ret; + } + else { + /* start at largest and move to smaller buckets */ + for (i = (WOLFMEM_MAX_BUCKETS - 1); i >= 0; i--) { + if ((heap->sizeList[i] + padSz + memSz) <= ava) { + if ((ret = create_memory_buckets(pt, ava, heap->sizeList[i], + heap->distList[i], &(heap->ava[i]))) < 0) { + WOLFSSL_LEAVE("wolfSSL_load_static_memory", ret); + return ret; + } + + /* advance pointer in buffer for next buckets and keep track + of how much memory is left available */ + pt += ret; + ava -= ret; + } + } + } + } + + return 1; +} + + +/* returns the size of management memory needed for each bucket. + * This is memory that is used to keep track of and align memory buckets. */ +int wolfSSL_MemoryPaddingSz(void) +{ + word32 memSz = (word32)sizeof(wc_Memory); + word32 padSz = -(int)memSz & (WOLFSSL_STATIC_ALIGN - 1); + return memSz + padSz; +} + + +/* Used to calculate memory size for optimum use with buckets. + returns the suggested size rounded down to the nearest bucket. */ +int wolfSSL_StaticBufferSz(byte* buffer, word32 sz, int flag) +{ + word32 bucketSz[WOLFMEM_MAX_BUCKETS] = {WOLFMEM_BUCKETS}; + word32 distList[WOLFMEM_MAX_BUCKETS] = {WOLFMEM_DIST}; + + word32 ava = sz; + byte* pt = buffer; + word32 memSz = (word32)sizeof(wc_Memory); + word32 padSz = -(int)memSz & (WOLFSSL_STATIC_ALIGN - 1); + + WOLFSSL_ENTER("wolfSSL_static_size"); + + if (buffer == NULL) { + return BAD_FUNC_ARG; + } + + /* align pt */ + while ((wolfssl_word)pt % WOLFSSL_STATIC_ALIGN && pt < (buffer + sz)) { + pt++; + ava--; + } + + /* creating only IO buffers from memory passed in, max TLS is 16k */ + if (flag & WOLFMEM_IO_POOL || flag & WOLFMEM_IO_POOL_FIXED) { + if (ava < (memSz + padSz + WOLFMEM_IO_SZ)) { + return 0; /* not enough room for even one bucket */ + } + + ava = ava % (memSz + padSz + WOLFMEM_IO_SZ); + } + else { + int i, k; + + if (ava < (bucketSz[0] + padSz + memSz)) { + return 0; /* not enough room for even one bucket */ + } + + while ((ava >= (bucketSz[0] + padSz + memSz)) && (ava > 0)) { + /* start at largest and move to smaller buckets */ + for (i = (WOLFMEM_MAX_BUCKETS - 1); i >= 0; i--) { + for (k = distList[i]; k > 0; k--) { + if ((bucketSz[i] + padSz + memSz) <= ava) { + ava -= bucketSz[i] + padSz + memSz; + } + } + } + } + } + + return sz - ava; /* round down */ +} + + +int FreeFixedIO(WOLFSSL_HEAP* heap, wc_Memory** io) +{ + WOLFSSL_MSG("Freeing fixed IO buffer"); + + /* check if fixed buffer was set */ + if (*io == NULL) { + return 1; + } + + if (heap == NULL) { + WOLFSSL_MSG("No heap to return fixed IO too"); + } + else { + /* put IO buffer back into IO pool */ + (*io)->next = heap->io; + heap->io = *io; + *io = NULL; + } + + return 1; +} + + +int SetFixedIO(WOLFSSL_HEAP* heap, wc_Memory** io) +{ + WOLFSSL_MSG("Setting fixed IO for SSL"); + if (heap == NULL) { + return MEMORY_E; + } + + *io = heap->io; + + if (*io != NULL) { + heap->io = (*io)->next; + (*io)->next = NULL; + } + else { /* failed to grab an IO buffer */ + return 0; + } + + return 1; +} + + +int wolfSSL_GetMemStats(WOLFSSL_HEAP* heap, WOLFSSL_MEM_STATS* stats) +{ + word32 i; + wc_Memory* pt; + + XMEMSET(stats, 0, sizeof(WOLFSSL_MEM_STATS)); + + stats->totalAlloc = heap->alloc; + stats->totalFr = heap->frAlc; + stats->curAlloc = stats->totalAlloc - stats->totalFr; + stats->maxHa = heap->maxHa; + stats->maxIO = heap->maxIO; + for (i = 0; i < WOLFMEM_MAX_BUCKETS; i++) { + stats->blockSz[i] = heap->sizeList[i]; + for (pt = heap->ava[i]; pt != NULL; pt = pt->next) { + stats->avaBlock[i] += 1; + } + } + + for (pt = heap->io; pt != NULL; pt = pt->next) { + stats->avaIO++; + } + + stats->flag = heap->flag; /* flag used */ + + return 1; +} + + +#ifdef WOLFSSL_DEBUG_MEMORY +void* wolfSSL_Malloc(size_t size, void* heap, int type, const char* func, unsigned int line) +#else +void* wolfSSL_Malloc(size_t size, void* heap, int type) +#endif +{ + void* res = 0; + wc_Memory* pt = NULL; + int i; + + /* check for testing heap hint was set */ +#ifdef WOLFSSL_HEAP_TEST + if (heap == (void*)WOLFSSL_HEAP_TEST) { + return malloc(size); + } +#endif + + /* if no heap hint then use dynamic memory*/ + if (heap == NULL) { + #ifdef WOLFSSL_HEAP_TEST + /* allow using malloc for creating ctx and method */ + if (type == DYNAMIC_TYPE_CTX || type == DYNAMIC_TYPE_METHOD || + type == DYNAMIC_TYPE_CERT_MANAGER) { + WOLFSSL_MSG("ERROR allowing null heap hint for ctx/method\n"); + res = malloc(size); + } + else { + WOLFSSL_MSG("ERROR null heap hint passed into XMALLOC\n"); + res = NULL; + } + #else + #ifndef WOLFSSL_NO_MALLOC + #ifdef FREERTOS + res = pvPortMalloc(size); + #else + res = malloc(size); + #endif + #else + WOLFSSL_MSG("No heap hint found to use and no malloc"); + #ifdef WOLFSSL_DEBUG_MEMORY + printf("ERROR: at %s:%d\n", func, line); + #endif + #endif /* WOLFSSL_NO_MALLOC */ + #endif /* WOLFSSL_HEAP_TEST */ + } + else { + WOLFSSL_HEAP_HINT* hint = (WOLFSSL_HEAP_HINT*)heap; + WOLFSSL_HEAP* mem = hint->memory; + + if (wc_LockMutex(&(mem->memory_mutex)) != 0) { + WOLFSSL_MSG("Bad memory_mutex lock"); + return NULL; + } + + /* case of using fixed IO buffers */ + if (mem->flag & WOLFMEM_IO_POOL_FIXED && + (type == DYNAMIC_TYPE_OUT_BUFFER || + type == DYNAMIC_TYPE_IN_BUFFER)) { + if (type == DYNAMIC_TYPE_OUT_BUFFER) { + pt = hint->outBuf; + } + if (type == DYNAMIC_TYPE_IN_BUFFER) { + pt = hint->inBuf; + } + } + else { + /* check if using IO pool flag */ + if (mem->flag & WOLFMEM_IO_POOL && + (type == DYNAMIC_TYPE_OUT_BUFFER || + type == DYNAMIC_TYPE_IN_BUFFER)) { + if (mem->io != NULL) { + pt = mem->io; + mem->io = pt->next; + } + } + + /* general static memory */ + if (pt == NULL) { + for (i = 0; i < WOLFMEM_MAX_BUCKETS; i++) { + if ((word32)size < mem->sizeList[i]) { + if (mem->ava[i] != NULL) { + pt = mem->ava[i]; + mem->ava[i] = pt->next; + break; + } + #ifdef WOLFSSL_DEBUG_STATIC_MEMORY + else { + printf("Size: %ld, Empty: %d\n", size, + mem->sizeList[i]); + } + #endif + } + } + } + } + + if (pt != NULL) { + mem->inUse += pt->sz; + mem->alloc += 1; + res = pt->buffer; + + #ifdef WOLFSSL_DEBUG_MEMORY + printf("Alloc: %p -> %u at %s:%d\n", pt->buffer, pt->sz, func, line); + #endif + + /* keep track of connection statistics if flag is set */ + if (mem->flag & WOLFMEM_TRACK_STATS) { + WOLFSSL_MEM_CONN_STATS* stats = hint->stats; + if (stats != NULL) { + stats->curMem += pt->sz; + if (stats->peakMem < stats->curMem) { + stats->peakMem = stats->curMem; + } + stats->curAlloc++; + if (stats->peakAlloc < stats->curAlloc) { + stats->peakAlloc = stats->curAlloc; + } + stats->totalAlloc++; + } + } + } + else { + WOLFSSL_MSG("ERROR ran out of static memory"); + #ifdef WOLFSSL_DEBUG_MEMORY + printf("Looking for %lu bytes at %s:%d\n", size, func, line); + #endif + } + + wc_UnLockMutex(&(mem->memory_mutex)); + } + + #ifdef WOLFSSL_MALLOC_CHECK + if ((wolfssl_word)res % WOLFSSL_STATIC_ALIGN) { + WOLFSSL_MSG("ERROR memory is not aligned"); + res = NULL; + } + #endif + + + (void)i; + (void)pt; + (void)type; + + return res; +} + + +#ifdef WOLFSSL_DEBUG_MEMORY +void wolfSSL_Free(void *ptr, void* heap, int type, const char* func, unsigned int line) +#else +void wolfSSL_Free(void *ptr, void* heap, int type) +#endif +{ + int i; + wc_Memory* pt; + + if (ptr) { + /* check for testing heap hint was set */ + #ifdef WOLFSSL_HEAP_TEST + if (heap == (void*)WOLFSSL_HEAP_TEST) { + return free(ptr); + } + #endif + + if (heap == NULL) { + #ifdef WOLFSSL_HEAP_TEST + /* allow using malloc for creating ctx and method */ + if (type == DYNAMIC_TYPE_CTX || type == DYNAMIC_TYPE_METHOD || + type == DYNAMIC_TYPE_CERT_MANAGER) { + WOLFSSL_MSG("ERROR allowing null heap hint for ctx/method\n"); + } + else { + WOLFSSL_MSG("ERROR null heap hint passed into XFREE\n"); + } + #endif + #ifndef WOLFSSL_NO_MALLOC + #ifdef FREERTOS + vPortFree(ptr); + #else + free(ptr); + #endif + #else + WOLFSSL_MSG("Error trying to call free when turned off"); + #endif /* WOLFSSL_NO_MALLOC */ + } + else { + WOLFSSL_HEAP_HINT* hint = (WOLFSSL_HEAP_HINT*)heap; + WOLFSSL_HEAP* mem = hint->memory; + word32 padSz = -(int)sizeof(wc_Memory) & (WOLFSSL_STATIC_ALIGN - 1); + + /* get memory struct and add it to available list */ + pt = (wc_Memory*)((byte*)ptr - sizeof(wc_Memory) - padSz); + if (wc_LockMutex(&(mem->memory_mutex)) != 0) { + WOLFSSL_MSG("Bad memory_mutex lock"); + return; + } + + /* case of using fixed IO buffers */ + if (mem->flag & WOLFMEM_IO_POOL_FIXED && + (type == DYNAMIC_TYPE_OUT_BUFFER || + type == DYNAMIC_TYPE_IN_BUFFER)) { + /* fixed IO pools are free'd at the end of SSL lifetime + using FreeFixedIO(WOLFSSL_HEAP* heap, wc_Memory** io) */ + } + else if (mem->flag & WOLFMEM_IO_POOL && pt->sz == WOLFMEM_IO_SZ && + (type == DYNAMIC_TYPE_OUT_BUFFER || + type == DYNAMIC_TYPE_IN_BUFFER)) { + pt->next = mem->io; + mem->io = pt; + } + else { /* general memory free */ + for (i = 0; i < WOLFMEM_MAX_BUCKETS; i++) { + if (pt->sz == mem->sizeList[i]) { + pt->next = mem->ava[i]; + mem->ava[i] = pt; + break; + } + } + } + mem->inUse -= pt->sz; + mem->frAlc += 1; + + #ifdef WOLFSSL_DEBUG_MEMORY + printf("Free: %p -> %u at %s:%d\n", pt->buffer, pt->sz, func, line); + #endif + + /* keep track of connection statistics if flag is set */ + if (mem->flag & WOLFMEM_TRACK_STATS) { + WOLFSSL_MEM_CONN_STATS* stats = hint->stats; + if (stats != NULL) { + /* avoid under flow */ + if (stats->curMem > pt->sz) { + stats->curMem -= pt->sz; + } + else { + stats->curMem = 0; + } + + if (stats->curAlloc > 0) { + stats->curAlloc--; + } + stats->totalFr++; + } + } + wc_UnLockMutex(&(mem->memory_mutex)); + } + } + + (void)i; + (void)pt; + (void)type; +} + +#ifdef WOLFSSL_DEBUG_MEMORY +void* wolfSSL_Realloc(void *ptr, size_t size, void* heap, int type, const char* func, unsigned int line) +#else +void* wolfSSL_Realloc(void *ptr, size_t size, void* heap, int type) +#endif +{ + void* res = 0; + wc_Memory* pt = NULL; + word32 prvSz; + int i; + + /* check for testing heap hint was set */ +#ifdef WOLFSSL_HEAP_TEST + if (heap == (void*)WOLFSSL_HEAP_TEST) { + return realloc(ptr, size); + } +#endif + + if (heap == NULL) { + #ifdef WOLFSSL_HEAP_TEST + WOLFSSL_MSG("ERROR null heap hint passed in to XREALLOC\n"); + #endif + #ifndef WOLFSSL_NO_MALLOC + res = realloc(ptr, size); + #else + WOLFSSL_MSG("NO heap found to use for realloc"); + #endif /* WOLFSSL_NO_MALLOC */ + } + else { + WOLFSSL_HEAP_HINT* hint = (WOLFSSL_HEAP_HINT*)heap; + WOLFSSL_HEAP* mem = hint->memory; + word32 padSz = -(int)sizeof(wc_Memory) & (WOLFSSL_STATIC_ALIGN - 1); + + if (ptr == NULL) { + #ifdef WOLFSSL_DEBUG_MEMORY + return wolfSSL_Malloc(size, heap, type, func, line); + #else + return wolfSSL_Malloc(size, heap, type); + #endif + } + + if (wc_LockMutex(&(mem->memory_mutex)) != 0) { + WOLFSSL_MSG("Bad memory_mutex lock"); + return NULL; + } + + /* case of using fixed IO buffers or IO pool */ + if (((mem->flag & WOLFMEM_IO_POOL)||(mem->flag & WOLFMEM_IO_POOL_FIXED)) + && (type == DYNAMIC_TYPE_OUT_BUFFER || + type == DYNAMIC_TYPE_IN_BUFFER)) { + /* no realloc, is fixed size */ + pt = (wc_Memory*)((byte*)ptr - padSz - sizeof(wc_Memory)); + if (pt->sz < size) { + WOLFSSL_MSG("Error IO memory was not large enough"); + res = NULL; /* return NULL in error case */ + } + res = pt->buffer; + } + else { + /* general memory */ + for (i = 0; i < WOLFMEM_MAX_BUCKETS; i++) { + if ((word32)size < mem->sizeList[i]) { + if (mem->ava[i] != NULL) { + pt = mem->ava[i]; + mem->ava[i] = pt->next; + break; + } + } + } + + if (pt != NULL && res == NULL) { + res = pt->buffer; + + /* copy over original information and free ptr */ + prvSz = ((wc_Memory*)((byte*)ptr - padSz - + sizeof(wc_Memory)))->sz; + prvSz = (prvSz > pt->sz)? pt->sz: prvSz; + XMEMCPY(pt->buffer, ptr, prvSz); + mem->inUse += pt->sz; + mem->alloc += 1; + + /* free memory that was previously being used */ + wc_UnLockMutex(&(mem->memory_mutex)); + wolfSSL_Free(ptr, heap, type + #ifdef WOLFSSL_DEBUG_MEMORY + , func, line + #endif + ); + if (wc_LockMutex(&(mem->memory_mutex)) != 0) { + WOLFSSL_MSG("Bad memory_mutex lock"); + return NULL; + } + } + } + wc_UnLockMutex(&(mem->memory_mutex)); + } + + #ifdef WOLFSSL_MALLOC_CHECK + if ((wolfssl_word)res % WOLFSSL_STATIC_ALIGN) { + WOLFSSL_MSG("ERROR memory is not aligned"); + res = NULL; + } + #endif + + (void)i; + (void)pt; + (void)type; + + return res; +} +#endif /* WOLFSSL_STATIC_MEMORY */ + +#endif /* USE_WOLFSSL_MEMORY */ + + +#ifdef HAVE_IO_POOL + +/* Example for user io pool, shared build may need definitions in lib proper */ + +#include +#include + +#ifndef HAVE_THREAD_LS + #error "Oops, simple I/O pool example needs thread local storage" +#endif + + +/* allow simple per thread in and out pools */ +/* use 17k size since max record size is 16k plus overhead */ +static THREAD_LS_T byte pool_in[17*1024]; +static THREAD_LS_T byte pool_out[17*1024]; + + +void* XMALLOC(size_t n, void* heap, int type) +{ + (void)heap; + + if (type == DYNAMIC_TYPE_IN_BUFFER) { + if (n < sizeof(pool_in)) + return pool_in; + else + return NULL; + } + + if (type == DYNAMIC_TYPE_OUT_BUFFER) { + if (n < sizeof(pool_out)) + return pool_out; + else + return NULL; + } + + return malloc(n); +} + +void* XREALLOC(void *p, size_t n, void* heap, int type) +{ + (void)heap; + + if (type == DYNAMIC_TYPE_IN_BUFFER) { + if (n < sizeof(pool_in)) + return pool_in; + else + return NULL; + } + + if (type == DYNAMIC_TYPE_OUT_BUFFER) { + if (n < sizeof(pool_out)) + return pool_out; + else + return NULL; + } + + return realloc(p, n); +} + +void XFREE(void *p, void* heap, int type) +{ + (void)heap; + + if (type == DYNAMIC_TYPE_IN_BUFFER) + return; /* do nothing, static pool */ + + if (type == DYNAMIC_TYPE_OUT_BUFFER) + return; /* do nothing, static pool */ + + free(p); +} + +#endif /* HAVE_IO_POOL */ + +#ifdef WOLFSSL_MEMORY_LOG +void *xmalloc(size_t n, void* heap, int type, const char* func, + const char* file, unsigned int line) +{ + void* p; + word32* p32; + + if (malloc_function) + p32 = malloc_function(n + sizeof(word32) * 4); + else + p32 = malloc(n + sizeof(word32) * 4); + + p32[0] = (word32)n; + p = (void*)(p32 + 4); + + fprintf(stderr, "Alloc: %p -> %u (%d) at %s:%s:%u\n", p, (word32)n, type, + func, file, line); + + (void)heap; + + return p; +} +void *xrealloc(void *p, size_t n, void* heap, int type, const char* func, + const char* file, unsigned int line) +{ + void* newp = NULL; + word32* p32; + word32* oldp32 = NULL; + word32 oldLen; + + if (p != NULL) { + oldp32 = (word32*)p; + oldp32 -= 4; + oldLen = oldp32[0]; + } + + if (realloc_function) + p32 = realloc_function(oldp32, n + sizeof(word32) * 4); + else + p32 = realloc(oldp32, n + sizeof(word32) * 4); + + if (p32 != NULL) { + p32[0] = (word32)n; + newp = (void*)(p32 + 4); + + fprintf(stderr, "Alloc: %p -> %u (%d) at %s:%s:%u\n", newp, (word32)n, + type, func, file, line); + if (p != NULL) { + fprintf(stderr, "Free: %p -> %u (%d) at %s:%s:%u\n", p, oldLen, + type, func, file, line); + } + } + + (void)heap; + + return newp; +} +void xfree(void *p, void* heap, int type, const char* func, const char* file, + unsigned int line) +{ + word32* p32 = (word32*)p; + + if (p != NULL) { + p32 -= 4; + + fprintf(stderr, "Free: %p -> %u (%d) at %s:%s:%u\n", p, p32[0], type, + func, file, line); + + if (free_function) + free_function(p32); + else + free(p32); + } + + (void)heap; +} +#endif /* WOLFSSL_MEMORY_LOG */ + +#ifdef WOLFSSL_STACK_LOG +/* Note: this code only works with GCC using -finstrument-functions. */ +void __attribute__((no_instrument_function)) + __cyg_profile_func_enter(void *func, void *caller) +{ + register void* sp asm("sp"); + fprintf(stderr, "ENTER: %016lx %p\n", (unsigned long)(size_t)func, sp); + (void)caller; +} + +void __attribute__((no_instrument_function)) + __cyg_profile_func_exit(void *func, void *caller) +{ + register void* sp asm("sp"); + fprintf(stderr, "EXIT: %016lx %p\n", (unsigned long)(size_t)func, sp); + (void)caller; +} +#endif + diff --git a/client/wolfssl/wolfcrypt/src/misc.c b/client/wolfssl/wolfcrypt/src/misc.c new file mode 100644 index 0000000..7f779dc --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/misc.c @@ -0,0 +1,405 @@ +/* misc.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifndef WOLF_CRYPT_MISC_C +#define WOLF_CRYPT_MISC_C + +#include + +/* inlining these functions is a huge speed increase and a small size decrease, + because the functions are smaller than function call setup/cleanup, e.g., + md5 benchmark is twice as fast with inline. If you don't want it, then + define NO_INLINE and compile this file into wolfssl, otherwise it's used as + a source header + */ + +#ifdef NO_INLINE + #define WC_STATIC +#else + #define WC_STATIC static +#endif + +/* Check for if compiling misc.c when not needed. */ +#if !defined(WOLFSSL_MISC_INCLUDED) && !defined(NO_INLINE) + #ifndef WOLFSSL_IGNORE_FILE_WARN + #warning misc.c does not need to be compiled when using inline (NO_INLINE not defined) + #endif + +#else + + +#if defined(__ICCARM__) + #include +#endif + + +#ifdef INTEL_INTRINSICS + + #include /* get intrinsic definitions */ + + /* for non visual studio probably need no long version, 32 bit only + * i.e., _rotl and _rotr */ + #pragma intrinsic(_lrotl, _lrotr) + + WC_STATIC WC_INLINE word32 rotlFixed(word32 x, word32 y) + { + return y ? _lrotl(x, y) : x; + } + + WC_STATIC WC_INLINE word32 rotrFixed(word32 x, word32 y) + { + return y ? _lrotr(x, y) : x; + } + +#else /* generic */ + + WC_STATIC WC_INLINE word32 rotlFixed(word32 x, word32 y) + { + return (x << y) | (x >> (sizeof(y) * 8 - y)); + } + + + WC_STATIC WC_INLINE word32 rotrFixed(word32 x, word32 y) + { + return (x >> y) | (x << (sizeof(y) * 8 - y)); + } + +#endif + + +WC_STATIC WC_INLINE word32 ByteReverseWord32(word32 value) +{ +#ifdef PPC_INTRINSICS + /* PPC: load reverse indexed instruction */ + return (word32)__lwbrx(&value,0); +#elif defined(__ICCARM__) + return (word32)__REV(value); +#elif defined(KEIL_INTRINSICS) + return (word32)__rev(value); +#elif defined(WOLF_ALLOW_BUILTIN) && \ + defined(__GNUC_PREREQ) && __GNUC_PREREQ(4, 3) + return (word32)__builtin_bswap32(value); +#elif defined(FAST_ROTATE) + /* 5 instructions with rotate instruction, 9 without */ + return (rotrFixed(value, 8U) & 0xff00ff00) | + (rotlFixed(value, 8U) & 0x00ff00ff); +#else + /* 6 instructions with rotate instruction, 8 without */ + value = ((value & 0xFF00FF00) >> 8) | ((value & 0x00FF00FF) << 8); + return rotlFixed(value, 16U); +#endif +} + + +WC_STATIC WC_INLINE void ByteReverseWords(word32* out, const word32* in, + word32 byteCount) +{ + word32 count = byteCount/(word32)sizeof(word32), i; + + for (i = 0; i < count; i++) + out[i] = ByteReverseWord32(in[i]); + +} + + +#if defined(WORD64_AVAILABLE) && !defined(WOLFSSL_NO_WORD64_OPS) + + +WC_STATIC WC_INLINE word64 rotlFixed64(word64 x, word64 y) +{ + return (x << y) | (x >> (sizeof(y) * 8 - y)); +} + + +WC_STATIC WC_INLINE word64 rotrFixed64(word64 x, word64 y) +{ + return (x >> y) | (x << (sizeof(y) * 8 - y)); +} + + +WC_STATIC WC_INLINE word64 ByteReverseWord64(word64 value) +{ +#if defined(WOLF_ALLOW_BUILTIN) && defined(__GNUC_PREREQ) && __GNUC_PREREQ(4, 3) + return (word64)__builtin_bswap64(value); +#elif defined(WOLFCRYPT_SLOW_WORD64) + return (word64)((word64)ByteReverseWord32((word32) value)) << 32 | + (word64)ByteReverseWord32((word32)(value >> 32)); +#else + value = ((value & W64LIT(0xFF00FF00FF00FF00)) >> 8) | + ((value & W64LIT(0x00FF00FF00FF00FF)) << 8); + value = ((value & W64LIT(0xFFFF0000FFFF0000)) >> 16) | + ((value & W64LIT(0x0000FFFF0000FFFF)) << 16); + return rotlFixed64(value, 32U); +#endif +} + + +WC_STATIC WC_INLINE void ByteReverseWords64(word64* out, const word64* in, + word32 byteCount) +{ + word32 count = byteCount/(word32)sizeof(word64), i; + + for (i = 0; i < count; i++) + out[i] = ByteReverseWord64(in[i]); + +} + +#endif /* WORD64_AVAILABLE && !WOLFSSL_NO_WORD64_OPS */ + +#ifndef WOLFSSL_NO_XOR_OPS +WC_STATIC WC_INLINE void XorWords(wolfssl_word* r, const wolfssl_word* a, word32 n) +{ + word32 i; + + for (i = 0; i < n; i++) r[i] ^= a[i]; +} + + +WC_STATIC WC_INLINE void xorbuf(void* buf, const void* mask, word32 count) +{ + if (((wolfssl_word)buf | (wolfssl_word)mask | count) % WOLFSSL_WORD_SIZE == 0) + XorWords( (wolfssl_word*)buf, + (const wolfssl_word*)mask, count / WOLFSSL_WORD_SIZE); + else { + word32 i; + byte* b = (byte*)buf; + const byte* m = (const byte*)mask; + + for (i = 0; i < count; i++) b[i] ^= m[i]; + } +} +#endif + +#ifndef WOLFSSL_NO_FORCE_ZERO +/* Make sure compiler doesn't skip */ +WC_STATIC WC_INLINE void ForceZero(const void* mem, word32 len) +{ + volatile byte* z = (volatile byte*)mem; + +#if defined(WOLFSSL_X86_64_BUILD) && defined(WORD64_AVAILABLE) + volatile word64* w; + #ifndef WOLFSSL_UNALIGNED_64BIT_ACCESS + word32 l = (sizeof(word64) - ((size_t)z & (sizeof(word64)-1))) & + (sizeof(word64)-1); + + if (len < l) l = len; + len -= l; + while (l--) *z++ = 0; + #endif + for (w = (volatile word64*)z; len >= sizeof(*w); len -= sizeof(*w)) + *w++ = 0; + z = (volatile byte*)w; +#endif + + while (len--) *z++ = 0; +} +#endif + + +#ifndef WOLFSSL_NO_CONST_CMP +/* check all length bytes for equality, return 0 on success */ +WC_STATIC WC_INLINE int ConstantCompare(const byte* a, const byte* b, int length) +{ + int i; + int compareSum = 0; + + for (i = 0; i < length; i++) { + compareSum |= a[i] ^ b[i]; + } + + return compareSum; +} +#endif + + +#ifndef WOLFSSL_HAVE_MIN + #define WOLFSSL_HAVE_MIN + #if defined(HAVE_FIPS) && !defined(min) /* so ifdef check passes */ + #define min min + #endif + WC_STATIC WC_INLINE word32 min(word32 a, word32 b) + { + return a > b ? b : a; + } +#endif /* !WOLFSSL_HAVE_MIN */ + +#ifndef WOLFSSL_HAVE_MAX + #define WOLFSSL_HAVE_MAX + #if defined(HAVE_FIPS) && !defined(max) /* so ifdef check passes */ + #define max max + #endif + WC_STATIC WC_INLINE word32 max(word32 a, word32 b) + { + return a > b ? a : b; + } +#endif /* !WOLFSSL_HAVE_MAX */ + +#ifndef WOLFSSL_NO_INT_ENCODE +/* converts a 32 bit integer to 24 bit */ +WC_STATIC WC_INLINE void c32to24(word32 in, word24 out) +{ + out[0] = (in >> 16) & 0xff; + out[1] = (in >> 8) & 0xff; + out[2] = in & 0xff; +} + +/* convert 16 bit integer to opaque */ +WC_STATIC WC_INLINE void c16toa(word16 wc_u16, byte* c) +{ + c[0] = (wc_u16 >> 8) & 0xff; + c[1] = wc_u16 & 0xff; +} + +/* convert 32 bit integer to opaque */ +WC_STATIC WC_INLINE void c32toa(word32 wc_u32, byte* c) +{ + c[0] = (wc_u32 >> 24) & 0xff; + c[1] = (wc_u32 >> 16) & 0xff; + c[2] = (wc_u32 >> 8) & 0xff; + c[3] = wc_u32 & 0xff; +} +#endif + +#ifndef WOLFSSL_NO_INT_DECODE +/* convert a 24 bit integer into a 32 bit one */ +WC_STATIC WC_INLINE void c24to32(const word24 wc_u24, word32* wc_u32) +{ + *wc_u32 = ((word32)wc_u24[0] << 16) | (wc_u24[1] << 8) | wc_u24[2]; +} + + +/* convert opaque to 24 bit integer */ +WC_STATIC WC_INLINE void ato24(const byte* c, word32* wc_u24) +{ + *wc_u24 = ((word32)c[0] << 16) | (c[1] << 8) | c[2]; +} + +/* convert opaque to 16 bit integer */ +WC_STATIC WC_INLINE void ato16(const byte* c, word16* wc_u16) +{ + *wc_u16 = (word16) ((c[0] << 8) | (c[1])); +} + +/* convert opaque to 32 bit integer */ +WC_STATIC WC_INLINE void ato32(const byte* c, word32* wc_u32) +{ + *wc_u32 = ((word32)c[0] << 24) | ((word32)c[1] << 16) | (c[2] << 8) | c[3]; +} + + +WC_STATIC WC_INLINE word32 btoi(byte b) +{ + return (word32)(b - 0x30); +} +#endif + + +#ifndef WOLFSSL_NO_CT_OPS +/* Constant time - mask set when a > b. */ +WC_STATIC WC_INLINE byte ctMaskGT(int a, int b) +{ + return (((word32)a - b - 1) >> 31) - 1; +} + +/* Constant time - mask set when a >= b. */ +WC_STATIC WC_INLINE byte ctMaskGTE(int a, int b) +{ + return (((word32)a - b ) >> 31) - 1; +} + +/* Constant time - mask set when a >= b. */ +WC_STATIC WC_INLINE int ctMaskIntGTE(int a, int b) +{ + return (((word32)a - b ) >> 31) - 1; +} + +/* Constant time - mask set when a < b. */ +WC_STATIC WC_INLINE byte ctMaskLT(int a, int b) +{ + return (((word32)b - a - 1) >> 31) - 1; +} + +/* Constant time - mask set when a <= b. */ +WC_STATIC WC_INLINE byte ctMaskLTE(int a, int b) +{ + return (((word32)b - a ) >> 31) - 1; +} + +/* Constant time - mask set when a == b. */ +WC_STATIC WC_INLINE byte ctMaskEq(int a, int b) +{ + return (~ctMaskGT(a, b)) & (~ctMaskLT(a, b)); +} + +WC_STATIC WC_INLINE word16 ctMask16GT(int a, int b) +{ + return (((word32)a - b - 1) >> 31) - 1; +} + +WC_STATIC WC_INLINE word16 ctMask16LT(int a, int b) +{ + return (((word32)a - b - 1) >> 31) - 1; +} + +WC_STATIC WC_INLINE word16 ctMask16Eq(int a, int b) +{ + return (~ctMask16GT(a, b)) & (~ctMask16LT(a, b)); +} + +/* Constant time - mask set when a != b. */ +WC_STATIC WC_INLINE byte ctMaskNotEq(int a, int b) +{ + return ctMaskGT(a, b) | ctMaskLT(a, b); +} + +/* Constant time - select a when mask is set and b otherwise. */ +WC_STATIC WC_INLINE byte ctMaskSel(byte m, byte a, byte b) +{ + return (b & ((byte)~(word32)m)) | (a & m); +} + +/* Constant time - select integer a when mask is set and integer b otherwise. */ +WC_STATIC WC_INLINE int ctMaskSelInt(byte m, int a, int b) +{ + return (b & (~(signed int)(signed char)m)) | + (a & ( (signed int)(signed char)m)); +} + +/* Constant time - bit set when a <= b. */ +WC_STATIC WC_INLINE byte ctSetLTE(int a, int b) +{ + return ((word32)a - b - 1) >> 31; +} +#endif + + +#undef WC_STATIC + +#endif /* !WOLFSSL_MISC_INCLUDED && !NO_INLINE */ + +#endif /* WOLF_CRYPT_MISC_C */ diff --git a/client/wolfssl/wolfcrypt/src/pkcs12.c b/client/wolfssl/wolfcrypt/src/pkcs12.c new file mode 100644 index 0000000..8ae5004 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/pkcs12.c @@ -0,0 +1,2403 @@ +/* pkcs12.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#if !defined(NO_ASN) && !defined(NO_PWDBASED) && defined(HAVE_PKCS12) + +#include +#include +#include +#include +#include +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif +#include +#include +#include + + +#define ERROR_OUT(err, eLabel) { ret = (err); goto eLabel; } + +enum { + WC_PKCS12_KeyBag = 667, + WC_PKCS12_ShroudedKeyBag = 668, + WC_PKCS12_CertBag = 669, + WC_PKCS12_CertBag_Type1 = 675, + WC_PKCS12_CrlBag = 670, + WC_PKCS12_SecretBag = 671, + WC_PKCS12_SafeContentsBag = 672, + WC_PKCS12_DATA = 651, + WC_PKCS12_ENCRYPTED_DATA = 656, + + WC_PKCS12_DATA_OBJ_SZ = 11, +}; + +static const byte WC_PKCS12_ENCRYPTED_OID[] = + {0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x07, 0x06}; +static const byte WC_PKCS12_DATA_OID[] = + {0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x07, 0x01}; +static const byte WC_PKCS12_CertBag_Type1_OID[] = + {0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x16, 0x01}; +static const byte WC_PKCS12_CertBag_OID[] = + {0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x0c, 0x0a, 0x01, 0x03}; +static const byte WC_PKCS12_KeyBag_OID[] = + {0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x0c, 0x0a, 0x01, 0x01}; +static const byte WC_PKCS12_ShroudedKeyBag_OID[] = + {0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x0c, 0x0a, 0x01, 0x02}; + + +typedef struct ContentInfo { + byte* data; + struct ContentInfo* next; + word32 encC; /* encryptedContent */ + word32 dataSz; + int type; /* DATA / encrypted / enveloped */ +} ContentInfo; + + +typedef struct AuthenticatedSafe { + ContentInfo* CI; + byte* data; /* T contents.... */ + word32 oid; /* encrypted or not */ + word32 numCI; /* number of Content Info structs */ + word32 dataSz; +} AuthenticatedSafe; + + +typedef struct MacData { + byte* digest; + byte* salt; + word32 oid; + word32 digestSz; + word32 saltSz; + int itt; /* number of iterations when creating HMAC key */ +} MacData; + + +struct WC_PKCS12 { + void* heap; + AuthenticatedSafe* safe; + MacData* signData; + word32 oid; /* DATA / Enveloped DATA ... */ +}; + + +/* for friendlyName, localKeyId .... */ +typedef struct WC_PKCS12_ATTRIBUTE { + byte* data; + word32 oid; + word32 dataSz; +} WC_PKCS12_ATTRIBUTE; + + +WC_PKCS12* wc_PKCS12_new(void) +{ + WC_PKCS12* pkcs12 = (WC_PKCS12*)XMALLOC(sizeof(WC_PKCS12), + NULL, DYNAMIC_TYPE_PKCS); + if (pkcs12 == NULL) { + WOLFSSL_MSG("Memory issue when creating WC_PKCS12 struct"); + return NULL; + } + + XMEMSET(pkcs12, 0, sizeof(WC_PKCS12)); + + return pkcs12; +} + + +static void freeSafe(AuthenticatedSafe* safe, void* heap) +{ + int i; + + if (safe == NULL) { + return; + } + + /* free content info structs */ + for (i = safe->numCI; i > 0; i--) { + ContentInfo* ci = safe->CI; + safe->CI = ci->next; + XFREE(ci, heap, DYNAMIC_TYPE_PKCS); + } + if (safe->data != NULL) { + XFREE(safe->data, heap, DYNAMIC_TYPE_PKCS); + } + XFREE(safe, heap, DYNAMIC_TYPE_PKCS); + + (void)heap; +} + + +void wc_PKCS12_free(WC_PKCS12* pkcs12) +{ + void* heap; + + /* if null pointer is passed in do nothing */ + if (pkcs12 == NULL) { + WOLFSSL_MSG("Trying to free null WC_PKCS12 object"); + return; + } + + heap = pkcs12->heap; + if (pkcs12->safe != NULL) { + freeSafe(pkcs12->safe, heap); + } + + /* free mac data */ + if (pkcs12->signData != NULL) { + if (pkcs12->signData->digest != NULL) { + XFREE(pkcs12->signData->digest, heap, DYNAMIC_TYPE_DIGEST); + pkcs12->signData->digest = NULL; + } + if (pkcs12->signData->salt != NULL) { + XFREE(pkcs12->signData->salt, heap, DYNAMIC_TYPE_SALT); + pkcs12->signData->salt = NULL; + } + XFREE(pkcs12->signData, heap, DYNAMIC_TYPE_PKCS); + pkcs12->signData = NULL; + } + + XFREE(pkcs12, NULL, DYNAMIC_TYPE_PKCS); + pkcs12 = NULL; +} + + +static int GetSafeContent(WC_PKCS12* pkcs12, const byte* input, + word32* idx, int maxIdx) +{ + AuthenticatedSafe* safe; + word32 oid; + word32 localIdx = *idx; + int ret; + int size = 0; + byte tag; + + safe = (AuthenticatedSafe*)XMALLOC(sizeof(AuthenticatedSafe), pkcs12->heap, + DYNAMIC_TYPE_PKCS); + if (safe == NULL) { + return MEMORY_E; + } + XMEMSET(safe, 0, sizeof(AuthenticatedSafe)); + + ret = GetObjectId(input, &localIdx, &oid, oidIgnoreType, maxIdx); + if (ret < 0) { + WOLFSSL_LEAVE("Get object id failed", ret); + freeSafe(safe, pkcs12->heap); + return ASN_PARSE_E; + } + + safe->oid = oid; + /* check tag, length */ + if (GetASNTag(input, &localIdx, &tag, maxIdx) < 0) { + freeSafe(safe, pkcs12->heap); + return ASN_PARSE_E; + } + + if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC)) { + WOLFSSL_MSG("Unexpected tag in PKCS12 DER"); + freeSafe(safe, pkcs12->heap); + return ASN_PARSE_E; + } + if ((ret = GetLength(input, &localIdx, &size, maxIdx)) <= 0) { + freeSafe(safe, pkcs12->heap); + return ret; + } + + switch (oid) { + case WC_PKCS12_ENCRYPTED_DATA: + WOLFSSL_MSG("Found PKCS12 OBJECT: ENCRYPTED DATA\n"); + break; + + case WC_PKCS12_DATA: + WOLFSSL_MSG("Found PKCS12 OBJECT: DATA"); + /* get octets holding contents */ + if (GetASNTag(input, &localIdx, &tag, maxIdx) < 0) { + freeSafe(safe, pkcs12->heap); + return ASN_PARSE_E; + } + + if (tag != ASN_OCTET_STRING) { + WOLFSSL_MSG("Wrong tag with content PKCS12 type DATA"); + freeSafe(safe, pkcs12->heap); + return ASN_PARSE_E; + } + if ((ret = GetLength(input, &localIdx, &size, maxIdx)) <= 0) { + freeSafe(safe, pkcs12->heap); + return ret; + } + + break; + } + + safe->dataSz = size; + safe->data = (byte*)XMALLOC(size, pkcs12->heap, DYNAMIC_TYPE_PKCS); + if (safe->data == NULL) { + freeSafe(safe, pkcs12->heap); + return MEMORY_E; + } + XMEMCPY(safe->data, input + localIdx, size); + *idx = localIdx; + + /* an instance of AuthenticatedSafe is created from + * ContentInfo's strung together in a SEQUENCE. Here we iterate + * through the ContentInfo's and add them to our + * AuthenticatedSafe struct */ + localIdx = 0; + input = safe->data; + { + int CISz; + ret = GetSequence(input, &localIdx, &CISz, safe->dataSz); + if (ret < 0) { + freeSafe(safe, pkcs12->heap); + return ASN_PARSE_E; + } + CISz += localIdx; + while ((int)localIdx < CISz) { + int curSz = 0; + word32 curIdx; + ContentInfo* ci = NULL; + + #ifdef WOLFSSL_DEBUG_PKCS12 + printf("\t\tlooking for Content Info.... "); + #endif + + if ((ret = GetSequence(input, &localIdx, &curSz, safe->dataSz)) + < 0) { + freeSafe(safe, pkcs12->heap); + return ret; + } + + if (curSz > CISz) { + /* subset should not be larger than universe */ + freeSafe(safe, pkcs12->heap); + return ASN_PARSE_E; + } + + curIdx = localIdx; + if ((ret = GetObjectId(input, &localIdx, &oid, oidIgnoreType, + safe->dataSz)) < 0) { + WOLFSSL_LEAVE("Get object id failed", ret); + freeSafe(safe, pkcs12->heap); + return ret; + } + + /* create new content info struct ... possible OID sanity check? */ + ci = (ContentInfo*)XMALLOC(sizeof(ContentInfo), pkcs12->heap, + DYNAMIC_TYPE_PKCS); + if (ci == NULL) { + freeSafe(safe, pkcs12->heap); + return MEMORY_E; + } + + ci->type = oid; + ci->dataSz = curSz - (localIdx-curIdx); + ci->data = (byte*)input + localIdx; + localIdx += ci->dataSz; + + #ifdef WOLFSSL_DEBUG_PKCS12 + switch (oid) { + case WC_PKCS12_ENCRYPTED_DATA: + printf("CONTENT INFO: ENCRYPTED DATA, size = %d\n", ci->dataSz); + break; + + case WC_PKCS12_DATA: + printf("CONTENT INFO: DATA, size = %d\n", ci->dataSz); + break; + default: + printf("CONTENT INFO: UNKNOWN, size = %d\n", ci->dataSz); + } + #endif + + /* insert to head of list */ + ci->next = safe->CI; + safe->CI = ci; + safe->numCI += 1; + } + } + + pkcs12->safe = safe; + *idx += localIdx; + + return ret; +} + + +/* optional mac data */ +static int GetSignData(WC_PKCS12* pkcs12, const byte* mem, word32* idx, + word32 totalSz) +{ + MacData* mac; + word32 curIdx = *idx; + word32 oid = 0; + int size, ret; + byte tag; + + /* Digest Info : Sequence + * DigestAlgorithmIdentifier + * Digest + */ + if ((ret = GetSequence(mem, &curIdx, &size, totalSz)) <= 0) { + WOLFSSL_MSG("Failed to get PKCS12 sequence"); + return ret; + } + +#ifdef WOLFSSL_DEBUG_PKCS12 + printf("\t\tSEQUENCE: DigestInfo size = %d\n", size); +#endif + + mac = (MacData*)XMALLOC(sizeof(MacData), pkcs12->heap, DYNAMIC_TYPE_PKCS); + if (mac == NULL) { + return MEMORY_E; + } + XMEMSET(mac, 0, sizeof(MacData)); + + /* DigestAlgorithmIdentifier */ + if ((ret = GetAlgoId(mem, &curIdx, &oid, oidIgnoreType, totalSz)) < 0) { + WOLFSSL_MSG("Failed to get PKCS12 sequence"); + XFREE(mac, pkcs12->heap, DYNAMIC_TYPE_PKCS); + return ret; + } + mac->oid = oid; + +#ifdef WOLFSSL_DEBUG_PKCS12 + printf("\t\tALGO ID = %d\n", oid); +#endif + + /* Digest: should be octet type holding digest */ + if (GetASNTag(mem, &curIdx, &tag, totalSz) < 0) { + XFREE(mac, pkcs12->heap, DYNAMIC_TYPE_PKCS); + return ASN_PARSE_E; + } + + if (tag != ASN_OCTET_STRING) { + WOLFSSL_MSG("Failed to get digest"); + XFREE(mac, pkcs12->heap, DYNAMIC_TYPE_PKCS); + return ASN_PARSE_E; + } + + if ((ret = GetLength(mem, &curIdx, &size, totalSz)) <= 0) { + XFREE(mac, pkcs12->heap, DYNAMIC_TYPE_PKCS); + return ret; + } + mac->digestSz = size; + mac->digest = (byte*)XMALLOC(mac->digestSz, pkcs12->heap, + DYNAMIC_TYPE_DIGEST); + if (mac->digest == NULL || mac->digestSz + curIdx > totalSz) { + ERROR_OUT(MEMORY_E, exit_gsd); + } + XMEMCPY(mac->digest, mem + curIdx, mac->digestSz); + +#ifdef WOLFSSL_DEBUG_PKCS12 + { + byte* p; + for (printf("\t\tDigest = "), p = (byte*)mem+curIdx; + p < (byte*)mem + curIdx + mac->digestSz; + printf("%02X", *p), p++); + printf(" : size = %d\n", mac->digestSz); + } +#endif + + curIdx += mac->digestSz; + + /* get salt, should be octet string */ + if (GetASNTag(mem, &curIdx, &tag, totalSz) < 0) { + ERROR_OUT(ASN_PARSE_E, exit_gsd); + } + + if (tag != ASN_OCTET_STRING) { + WOLFSSL_MSG("Failed to get salt"); + ERROR_OUT(ASN_PARSE_E, exit_gsd); + } + + if ((ret = GetLength(mem, &curIdx, &size, totalSz)) < 0) { + goto exit_gsd; + } + mac->saltSz = size; + mac->salt = (byte*)XMALLOC(mac->saltSz, pkcs12->heap, DYNAMIC_TYPE_SALT); + if (mac->salt == NULL || mac->saltSz + curIdx > totalSz) { + ERROR_OUT(MEMORY_E, exit_gsd); + } + XMEMCPY(mac->salt, mem + curIdx, mac->saltSz); + +#ifdef WOLFSSL_DEBUG_PKCS12 + { + byte* p; + for (printf("\t\tSalt = "), p = (byte*)mem + curIdx; + p < (byte*)mem + curIdx + mac->saltSz; + printf("%02X", *p), p++); + printf(" : size = %d\n", mac->saltSz); + } +#endif + + curIdx += mac->saltSz; + + /* check for MAC iterations, default to 1 */ + mac->itt = WC_PKCS12_MAC_DEFAULT; + if (curIdx < totalSz) { + int number = 0; + if ((ret = GetShortInt(mem, &curIdx, &number, totalSz)) >= 0) { + /* found a iteration value */ + mac->itt = number; + } + } + +#ifdef WOLFSSL_DEBUG_PKCS12 + printf("\t\tITERATIONS : %d\n", mac->itt); +#endif + + *idx = curIdx; + pkcs12->signData = mac; + ret = 0; /* success */ + +exit_gsd: + + /* failure cleanup */ + if (ret != 0) { + if (mac) { + if (mac->digest) + XFREE(mac->digest, pkcs12->heap, DYNAMIC_TYPE_DIGEST); + XFREE(mac, pkcs12->heap, DYNAMIC_TYPE_PKCS); + } + } + + return ret; +} + + +/* expects PKCS12 signData to be set up with OID + * + * returns the size of mac created on success. A negative value will be returned + * in the case that an error happened. + */ +static int wc_PKCS12_create_mac(WC_PKCS12* pkcs12, byte* data, word32 dataSz, + const byte* psw, word32 pswSz, byte* out, word32 outSz) +{ + Hmac hmac; + MacData* mac; + int ret, kLen; + enum wc_HashType hashT; + int idx = 0; + int id = 3; /* value from RFC 7292 indicating key is used for MAC */ + word32 i; + byte unicodePasswd[MAX_UNICODE_SZ]; + byte key[MAX_KEY_SIZE]; + + if (pkcs12 == NULL || pkcs12->signData == NULL || data == NULL || + out == NULL) { + return BAD_FUNC_ARG; + } + + mac = pkcs12->signData; + + /* unicode set up from asn.c */ + if ((pswSz * 2 + 2) > (int)sizeof(unicodePasswd)) { + WOLFSSL_MSG("PKCS12 max unicode size too small"); + return UNICODE_SIZE_E; + } + + for (i = 0; i < pswSz; i++) { + unicodePasswd[idx++] = 0x00; + unicodePasswd[idx++] = (byte)psw[i]; + } + /* add trailing NULL */ + unicodePasswd[idx++] = 0x00; + unicodePasswd[idx++] = 0x00; + + /* get hash type used and resulting size of HMAC key */ + hashT = wc_OidGetHash(mac->oid); + if (hashT == WC_HASH_TYPE_NONE) { + WOLFSSL_MSG("Unsupported hash used"); + return BAD_FUNC_ARG; + } + kLen = wc_HashGetDigestSize(hashT); + + /* check out buffer is large enough */ + if (kLen < 0 || outSz < (word32)kLen) { + return BAD_FUNC_ARG; + } + + /* idx contains size of unicodePasswd */ + if ((ret = wc_PKCS12_PBKDF_ex(key, unicodePasswd, idx, mac->salt, + mac->saltSz, mac->itt, kLen, (int)hashT, id, pkcs12->heap)) < 0) { + return ret; + } + + /* now that key has been created use it to get HMAC hash on data */ + if ((ret = wc_HmacInit(&hmac, pkcs12->heap, INVALID_DEVID)) != 0) { + return ret; + } + ret = wc_HmacSetKey(&hmac, (int)hashT, key, kLen); + if (ret == 0) + ret = wc_HmacUpdate(&hmac, data, dataSz); + if (ret == 0) + ret = wc_HmacFinal(&hmac, out); + wc_HmacFree(&hmac); + + if (ret != 0) + return ret; + + return kLen; /* same as digest size */ +} + + +/* check mac on pkcs12, pkcs12->mac has been sanity checked before entering * + * returns the result of comparison, success is 0 */ +static int wc_PKCS12_verify(WC_PKCS12* pkcs12, byte* data, word32 dataSz, + const byte* psw, word32 pswSz) +{ + MacData* mac; + int ret; + byte digest[WC_MAX_DIGEST_SIZE]; + + if (pkcs12 == NULL || pkcs12->signData == NULL || data == NULL) { + return BAD_FUNC_ARG; + } + + mac = pkcs12->signData; + +#ifdef WOLFSSL_DEBUG_PKCS12 + printf("Verifying MAC with OID = %d\n", mac->oid); +#endif + + /* check if this builds digest size is too small */ + if (mac->digestSz > WC_MAX_DIGEST_SIZE) { + WOLFSSL_MSG("PKCS12 max digest size too small"); + return BAD_FUNC_ARG; + } + + if ((ret = wc_PKCS12_create_mac(pkcs12, data, dataSz, psw, pswSz, + digest, WC_MAX_DIGEST_SIZE)) < 0) { + return ret; + } + +#ifdef WOLFSSL_DEBUG_PKCS12 + { + byte* p; + for (printf("\t\tHash = "), p = (byte*)digest; + p < (byte*)digest + mac->digestSz; + printf("%02X", *p), p++); + printf(" : size = %d\n", mac->digestSz); + } +#endif + + return XMEMCMP(digest, mac->digest, mac->digestSz); +} + + +/* Convert DER format stored in der buffer to WC_PKCS12 struct + * Puts the raw contents of Content Info into structure without completely + * parsing or decoding. + * der : pointer to der buffer holding PKCS12 + * derSz : size of der buffer + * pkcs12 : non-null pkcs12 pointer + * return 0 on success and negative on failure. + */ +int wc_d2i_PKCS12(const byte* der, word32 derSz, WC_PKCS12* pkcs12) +{ + word32 idx = 0; + word32 totalSz = 0; + int ret; + int size = 0; + int version = 0; + + WOLFSSL_ENTER("wolfSSL_d2i_PKCS12_bio"); + + if (der == NULL || pkcs12 == NULL) { + return BAD_FUNC_ARG; + } + + totalSz = derSz; + if ((ret = GetSequence(der, &idx, &size, totalSz)) <= 0) { + WOLFSSL_MSG("Failed to get PKCS12 sequence"); + return ret; + } + + /* get version */ + if ((ret = GetMyVersion(der, &idx, &version, totalSz)) < 0) { + return ret; + } + +#ifdef WOLFSSL_DEBUG_PKCS12 + printf("\nBEGIN: PKCS12 size = %d\n", totalSz); + printf("version = %d\n", version); +#endif + + if (version != WC_PKCS12_VERSION_DEFAULT) { + WOLFSSL_MSG("PKCS12 unsupported version!"); + return ASN_VERSION_E; + } + + if ((ret = GetSequence(der, &idx, &size, totalSz)) < 0) { + return ret; + } + +#ifdef WOLFSSL_DEBUG_PKCS12 + printf("\tSEQUENCE: AuthenticatedSafe size = %d\n", size); +#endif + + if ((ret = GetSafeContent(pkcs12, der, &idx, size + idx)) < 0) { + WOLFSSL_MSG("GetSafeContent error"); + return ret; + } + + /* if more buffer left check for MAC data */ + if (idx < totalSz) { + if ((ret = GetSequence(der, &idx, &size, totalSz)) < 0) { + WOLFSSL_MSG("Ignoring unknown data at end of PKCS12 DER buffer"); + } + else { + #ifdef WOLFSSL_DEBUG_PKCS12 + printf("\tSEQUENCE: Signature size = %d\n", size); + #endif + + if ((ret = GetSignData(pkcs12, der, &idx, totalSz)) < 0) { + return ASN_PARSE_E; + } + } + } + +#ifdef WOLFSSL_DEBUG_PKCS12 + printf("END: PKCS12\n"); +#endif + + return ret; +} + +/* Convert WC_PKCS12 struct to allocated DER buffer. + * pkcs12 : non-null pkcs12 pointer + * der : pointer-pointer to der buffer. If NULL space will be + * allocated for der, which must be freed by application. + * derSz : size of buffer passed in when der is not NULL. NULL arg disables + * sanity checks on buffer read/writes. Max size gets set to derSz when + * the "der" buffer passed in is NULL and LENGTH_ONLY_E is returned. + * return size of DER on success and negative on failure. + */ +int wc_i2d_PKCS12(WC_PKCS12* pkcs12, byte** der, int* derSz) +{ + int ret = 0; + word32 seqSz = 0, verSz = 0, totalSz = 0, idx = 0, sdBufSz = 0; + byte *buf = NULL; + byte ver[MAX_VERSION_SZ]; + byte seq[MAX_SEQ_SZ]; + byte *sdBuf = NULL; + + if ((pkcs12 == NULL) || (pkcs12->safe == NULL) || + (der == NULL && derSz == NULL)) { + return BAD_FUNC_ARG; + } + + /* Create the MAC portion */ + if (pkcs12->signData != NULL) { + MacData *mac = (MacData*)pkcs12->signData; + word32 innerSz = 0; + word32 outerSz = 0; + + /* get exact size */ + { + byte ASNLENGTH[MAX_LENGTH_SZ]; + byte ASNSHORT[MAX_SHORT_SZ]; + byte ASNALGO[MAX_ALGO_SZ]; + word32 tmpIdx = 0; + + /* algo id */ + innerSz += SetAlgoID(mac->oid, ASNALGO, oidHashType, 0); + + /* Octet string holding digest */ + innerSz += ASN_TAG_SZ; + innerSz += SetLength(mac->digestSz, ASNLENGTH); + innerSz += mac->digestSz; + + /* salt */ + outerSz += ASN_TAG_SZ; + outerSz += SetLength(mac->saltSz, ASNLENGTH); + outerSz += mac->saltSz; + + /* MAC iterations */ + outerSz += SetShortInt(ASNSHORT, &tmpIdx, mac->itt, MAX_SHORT_SZ); + + /* sequence of inner data */ + outerSz += SetSequence(innerSz, seq); + outerSz += innerSz; + } + sdBufSz = outerSz + SetSequence(outerSz, seq); + sdBuf = (byte*)XMALLOC(sdBufSz, pkcs12->heap, DYNAMIC_TYPE_PKCS); + if (sdBuf == NULL) { + ret = MEMORY_E; + } + + if (ret == 0) { + idx += SetSequence(outerSz, sdBuf); + idx += SetSequence(innerSz, &sdBuf[idx]); + + /* Set Algorithm Identifier */ + { + word32 algoIdSz; + + algoIdSz = SetAlgoID(mac->oid, &sdBuf[idx], oidHashType, 0); + if (algoIdSz == 0) { + ret = ALGO_ID_E; + } + else { + idx += algoIdSz; + } + } + } + + if (ret == 0) { + + + /* Octet string holding digest */ + idx += SetOctetString(mac->digestSz, &sdBuf[idx]); + XMEMCPY(&sdBuf[idx], mac->digest, mac->digestSz); + idx += mac->digestSz; + + /* Set salt */ + idx += SetOctetString(mac->saltSz, &sdBuf[idx]); + XMEMCPY(&sdBuf[idx], mac->salt, mac->saltSz); + idx += mac->saltSz; + + /* MAC iterations */ + { + int tmpSz; + word32 tmpIdx = 0; + byte ar[MAX_SHORT_SZ]; + tmpSz = SetShortInt(ar, &tmpIdx, mac->itt, MAX_SHORT_SZ); + if (tmpSz < 0) { + ret = tmpSz; + } + else { + XMEMCPY(&sdBuf[idx], ar, tmpSz); + } + } + totalSz += sdBufSz; + } + } + + /* Calculate size of der */ + if (ret == 0) { + totalSz += pkcs12->safe->dataSz; + + totalSz += 4; /* Octet string */ + + totalSz += 4; /* Element */ + + totalSz += 2 + sizeof(WC_PKCS12_DATA_OID); + + totalSz += 4; /* Seq */ + + ret = SetMyVersion(WC_PKCS12_VERSION_DEFAULT, ver, FALSE); + if (ret > 0) { + verSz = (word32)ret; + ret = 0; /* value larger than 0 is success */ + totalSz += verSz; + + seqSz = SetSequence(totalSz, seq); + totalSz += seqSz; + + /* check if getting length only */ + if (der == NULL && derSz != NULL) { + *derSz = totalSz; + XFREE(sdBuf, pkcs12->heap, DYNAMIC_TYPE_PKCS); + return LENGTH_ONLY_E; + } + + if (*der == NULL) { + /* Allocate if requested */ + buf = (byte*)XMALLOC(totalSz, NULL, DYNAMIC_TYPE_PKCS); + } + else { + buf = *der; + + /* sanity check on buffer size if passed in */ + if (derSz != NULL) { + if (*derSz < (int)totalSz) { + WOLFSSL_MSG("Buffer passed in is too small"); + ret = BUFFER_E; + } + } + } + } + } + + if (buf == NULL) { + ret = MEMORY_E; + } + + if (ret == 0) { + idx = 0; + + /* Copy parts to buf */ + XMEMCPY(&buf[idx], seq, seqSz); + idx += seqSz; + + XMEMCPY(&buf[idx], ver, verSz); + idx += verSz; + + seqSz = SetSequence(totalSz - sdBufSz - idx - 4, seq); + XMEMCPY(&buf[idx], seq, seqSz); + idx += seqSz; + + /* OID */ + idx += SetObjectId(sizeof(WC_PKCS12_DATA_OID), &buf[idx]); + XMEMCPY(&buf[idx], WC_PKCS12_DATA_OID, sizeof(WC_PKCS12_DATA_OID)); + idx += sizeof(WC_PKCS12_DATA_OID); + + /* Element */ + buf[idx++] = ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC; + idx += SetLength(totalSz - sdBufSz - idx - 3, &buf[idx]); + + /* Octet string */ + idx += SetOctetString(totalSz - sdBufSz - idx - 4, &buf[idx]); + + XMEMCPY(&buf[idx], pkcs12->safe->data, pkcs12->safe->dataSz); + idx += pkcs12->safe->dataSz; + + if (pkcs12->signData != NULL) { + XMEMCPY(&buf[idx], sdBuf, sdBufSz); + } + + if (*der == NULL) { + /* Point to start of data allocated for DER */ + *der = buf; + } + else { + /* Increment pointer to byte past DER */ + *der = &buf[totalSz]; + } + + /* Return size of der */ + ret = totalSz; + } + + XFREE(sdBuf, pkcs12->heap, DYNAMIC_TYPE_PKCS); + /* Allocation of buf was the last time ret could be a failure, + * so no need to free here */ + + return ret; +} + + +/* helper function to free WC_DerCertList */ +void wc_FreeCertList(WC_DerCertList* list, void* heap) +{ + WC_DerCertList* current = list; + WC_DerCertList* next; + + if (list == NULL) { + return; + } + + while (current != NULL) { + next = current->next; + if (current->buffer != NULL) { + XFREE(current->buffer, heap, DYNAMIC_TYPE_PKCS); + } + XFREE(current, heap, DYNAMIC_TYPE_PKCS); + current = next; + } + + (void)heap; +} + +static void freeDecCertList(WC_DerCertList** list, byte** pkey, word32* pkeySz, + byte** cert, word32* certSz, void* heap) +{ + WC_DerCertList* current = *list; + WC_DerCertList* previous = NULL; + DecodedCert DeCert; + + while (current != NULL) { + + InitDecodedCert(&DeCert, current->buffer, current->bufferSz, heap); + if (ParseCertRelative(&DeCert, CERT_TYPE, NO_VERIFY, NULL) == 0) { + if (wc_CheckPrivateKey(*pkey, *pkeySz, &DeCert) == 1) { + WOLFSSL_MSG("Key Pair found"); + *cert = current->buffer; + *certSz = current->bufferSz; + + if (previous == NULL) { + *list = current->next; + } + else { + previous->next = current->next; + } + FreeDecodedCert(&DeCert); + XFREE(current, heap, DYNAMIC_TYPE_PKCS); + break; + } + } + FreeDecodedCert(&DeCert); + + previous = current; + current = current->next; + } +} + + +/* return 0 on success and negative on failure. + * By side effect returns private key, cert, and optionally ca. + * Parses and decodes the parts of PKCS12 + * + * NOTE: can parse with USER RSA enabled but may return cert that is not the + * pair for the key when using RSA key pairs. + * + * pkcs12 : non-null WC_PKCS12 struct + * psw : password to use for PKCS12 decode + * pkey : Private key returned + * cert : x509 cert returned + * ca : optional ca returned + */ +int wc_PKCS12_parse(WC_PKCS12* pkcs12, const char* psw, + byte** pkey, word32* pkeySz, byte** cert, word32* certSz, + WC_DerCertList** ca) +{ + ContentInfo* ci = NULL; + WC_DerCertList* certList = NULL; + WC_DerCertList* tailList = NULL; + byte* buf = NULL; + word32 i, oid; + int ret, pswSz; + word32 algId; + + WOLFSSL_ENTER("wc_PKCS12_parse"); + + if (pkcs12 == NULL || psw == NULL || cert == NULL || certSz == NULL || + pkey == NULL || pkeySz == NULL) { + return BAD_FUNC_ARG; + } + + pswSz = (int)XSTRLEN(psw); + *cert = NULL; + *pkey = NULL; + if (ca != NULL) + *ca = NULL; + + /* if there is sign data then verify the MAC */ + if (pkcs12->signData != NULL ) { + if ((ret = wc_PKCS12_verify(pkcs12, pkcs12->safe->data, + pkcs12->safe->dataSz, (byte*)psw, pswSz)) != 0) { + WOLFSSL_MSG("PKCS12 Bad MAC on verify"); + WOLFSSL_LEAVE("wc_PKCS12_parse verify ", ret); + return MAC_CMP_FAILED_E; + } + } + + if (pkcs12->safe == NULL) { + WOLFSSL_MSG("No PKCS12 safes to parse"); + return BAD_FUNC_ARG; + } + + /* Decode content infos */ + ci = pkcs12->safe->CI; + for (i = 0; i < pkcs12->safe->numCI; i++) { + byte* data; + word32 idx = 0; + int size, totalSz; + byte tag; + + if (ci->type == WC_PKCS12_ENCRYPTED_DATA) { + int number; + + WOLFSSL_MSG("Decrypting PKCS12 Content Info Container"); + data = ci->data; + if (GetASNTag(data, &idx, &tag, ci->dataSz) < 0) { + ERROR_OUT(ASN_PARSE_E, exit_pk12par); + } + + if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC)) { + ERROR_OUT(ASN_PARSE_E, exit_pk12par); + } + if ((ret = GetLength(data, &idx, &size, ci->dataSz)) < 0) { + goto exit_pk12par; + } + + if ((ret = GetSequence(data, &idx, &size, ci->dataSz)) < 0) { + goto exit_pk12par; + } + + if ((ret = GetShortInt(data, &idx, &number, ci->dataSz)) < 0) { + goto exit_pk12par; + } + + if (number != 0) { + WOLFSSL_MSG("Expecting 0 for Integer with Encrypted PKCS12"); + } + + if ((ret = GetSequence(data, &idx, &size, ci->dataSz)) < 0) { + goto exit_pk12par; + } + + ret = GetObjectId(data, &idx, &oid, oidIgnoreType, ci->dataSz); + if (ret < 0 || oid != WC_PKCS12_DATA) { + WOLFSSL_MSG("Not PKCS12 DATA object or get object parse error"); + ERROR_OUT(ASN_PARSE_E, exit_pk12par); + } + + /* decrypted content overwrites input buffer */ + size = ci->dataSz - idx; + buf = (byte*)XMALLOC(size, pkcs12->heap, DYNAMIC_TYPE_PKCS); + if (buf == NULL) { + ERROR_OUT(MEMORY_E, exit_pk12par); + } + XMEMCPY(buf, data + idx, size); + + if ((ret = DecryptContent(buf, size, psw, pswSz)) < 0) { + WOLFSSL_MSG("Decryption failed, algorithm not compiled in?"); + goto exit_pk12par; + } + + data = buf; + idx = 0; + + #ifdef WOLFSSL_DEBUG_PKCS12 + { + byte* p; + for (printf("\tData = "), p = (byte*)buf; + p < (byte*)buf + size; + printf("%02X", *p), p++); + printf("\n"); + } + #endif + } + else { /* type DATA */ + WOLFSSL_MSG("Parsing PKCS12 DATA Content Info Container"); + data = ci->data; + if (GetASNTag(data, &idx, &tag, ci->dataSz) < 0) { + ERROR_OUT(ASN_PARSE_E, exit_pk12par); + } + + if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC)) { + ERROR_OUT(ASN_PARSE_E, exit_pk12par); + } + if ((ret = GetLength(data, &idx, &size, ci->dataSz)) <= 0) { + ERROR_OUT(ASN_PARSE_E, exit_pk12par); + } + + if (GetASNTag(data, &idx, &tag, ci->dataSz) < 0) { + ERROR_OUT(ASN_PARSE_E, exit_pk12par); + } + if (tag != ASN_OCTET_STRING) { + ERROR_OUT(ASN_PARSE_E, exit_pk12par); + } + if ((ret = GetLength(data, &idx, &size, ci->dataSz)) < 0) { + goto exit_pk12par; + } + + } + + /* parse through bags in ContentInfo */ + if ((ret = GetSequence(data, &idx, &totalSz, ci->dataSz)) < 0) { + goto exit_pk12par; + } + totalSz += idx; + + while ((int)idx < totalSz) { + int bagSz; + if ((ret = GetSequence(data, &idx, &bagSz, ci->dataSz)) < 0) { + goto exit_pk12par; + } + bagSz += idx; + + if ((ret = GetObjectId(data, &idx, &oid, oidIgnoreType, + ci->dataSz)) < 0) { + goto exit_pk12par; + } + + switch (oid) { + case WC_PKCS12_KeyBag: /* 667 */ + WOLFSSL_MSG("PKCS12 Key Bag found"); + if (GetASNTag(data, &idx, &tag, ci->dataSz) < 0) { + ERROR_OUT(ASN_PARSE_E, exit_pk12par); + } + if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC)) { + ERROR_OUT(ASN_PARSE_E, exit_pk12par); + } + if ((ret = GetLength(data, &idx, &size, ci->dataSz)) <= 0) { + if (ret == 0) + ret = ASN_PARSE_E; + goto exit_pk12par; + } + if (*pkey == NULL) { + *pkey = (byte*)XMALLOC(size, pkcs12->heap, + DYNAMIC_TYPE_PUBLIC_KEY); + if (*pkey == NULL) { + ERROR_OUT(MEMORY_E, exit_pk12par); + } + XMEMCPY(*pkey, data + idx, size); + *pkeySz = ToTraditional_ex(*pkey, size, &algId); + } + + #ifdef WOLFSSL_DEBUG_PKCS12 + { + byte* p; + for (printf("\tKey = "), p = (byte*)*pkey; + p < (byte*)*pkey + size; + printf("%02X", *p), p++); + printf("\n"); + } + #endif + idx += size; + break; + + case WC_PKCS12_ShroudedKeyBag: /* 668 */ + { + byte* k; + + WOLFSSL_MSG("PKCS12 Shrouded Key Bag found"); + if (GetASNTag(data, &idx, &tag, ci->dataSz) < 0) { + ERROR_OUT(ASN_PARSE_E, exit_pk12par); + } + if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC)) { + ERROR_OUT(ASN_PARSE_E, exit_pk12par); + } + if ((ret = GetLength(data, &idx, &size, + ci->dataSz)) < 0) { + goto exit_pk12par; + } + + k = (byte*)XMALLOC(size, pkcs12->heap, + DYNAMIC_TYPE_PUBLIC_KEY); + if (k == NULL) { + ERROR_OUT(MEMORY_E, exit_pk12par); + } + XMEMCPY(k, data + idx, size); + + /* overwrites input, be warned */ + if ((ret = ToTraditionalEnc(k, size, psw, pswSz, + &algId)) < 0) { + XFREE(k, pkcs12->heap, DYNAMIC_TYPE_PUBLIC_KEY); + goto exit_pk12par; + } + + if (ret < size) { + /* shrink key buffer */ + byte* tmp = (byte*)XMALLOC(ret, pkcs12->heap, + DYNAMIC_TYPE_PUBLIC_KEY); + if (tmp == NULL) { + XFREE(k, pkcs12->heap, DYNAMIC_TYPE_PUBLIC_KEY); + ERROR_OUT(MEMORY_E, exit_pk12par); + } + XMEMCPY(tmp, k, ret); + XFREE(k, pkcs12->heap, DYNAMIC_TYPE_PUBLIC_KEY); + k = tmp; + } + size = ret; + + if (*pkey == NULL) { + *pkey = k; + *pkeySz = size; + } + else { /* only expecting one key */ + XFREE(k, pkcs12->heap, DYNAMIC_TYPE_PUBLIC_KEY); + } + idx += size; + + #ifdef WOLFSSL_DEBUG_PKCS12 + { + byte* p; + for (printf("\tKey = "), p = (byte*)k; + p < (byte*)k + ret; + printf("%02X", *p), p++); + printf("\n"); + } + #endif + } + break; + + case WC_PKCS12_CertBag: /* 669 */ + { + WC_DerCertList* node; + WOLFSSL_MSG("PKCS12 Cert Bag found"); + if (GetASNTag(data, &idx, &tag, ci->dataSz) < 0) { + ERROR_OUT(ASN_PARSE_E, exit_pk12par); + } + if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC)) { + ERROR_OUT(ASN_PARSE_E, exit_pk12par); + } + if ((ret = GetLength(data, &idx, &size, ci->dataSz)) < 0) { + goto exit_pk12par; + } + + /* get cert bag type */ + if ((ret = GetSequence(data, &idx, &size, ci->dataSz)) <0) { + goto exit_pk12par; + } + + if ((ret = GetObjectId(data, &idx, &oid, oidIgnoreType, + ci->dataSz)) < 0) { + goto exit_pk12par; + } + + switch (oid) { + case WC_PKCS12_CertBag_Type1: /* 675 */ + /* type 1 */ + WOLFSSL_MSG("PKCS12 cert bag type 1"); + if (GetASNTag(data, &idx, &tag, ci->dataSz) < 0) { + ERROR_OUT(ASN_PARSE_E, exit_pk12par); + } + if (tag != (ASN_CONSTRUCTED | + ASN_CONTEXT_SPECIFIC)) { + ERROR_OUT(ASN_PARSE_E, exit_pk12par); + } + if ((ret = GetLength(data, &idx, &size, ci->dataSz)) + <= 0) { + if (ret == 0) + ret = ASN_PARSE_E; + goto exit_pk12par; + } + if (GetASNTag(data, &idx, &tag, ci->dataSz) < 0) { + ERROR_OUT(ASN_PARSE_E, exit_pk12par); + } + if (tag != ASN_OCTET_STRING) { + ERROR_OUT(ASN_PARSE_E, exit_pk12par); + + } + if ((ret = GetLength(data, &idx, &size, ci->dataSz)) + < 0) { + goto exit_pk12par; + } + break; + default: + WOLFSSL_MSG("Unknown PKCS12 cert bag type"); + } + + if (size + idx > (word32)bagSz) { + ERROR_OUT(ASN_PARSE_E, exit_pk12par); + } + + /* list to hold all certs found */ + node = (WC_DerCertList*)XMALLOC(sizeof(WC_DerCertList), + pkcs12->heap, DYNAMIC_TYPE_PKCS); + if (node == NULL) { + ERROR_OUT(MEMORY_E, exit_pk12par); + } + XMEMSET(node, 0, sizeof(WC_DerCertList)); + + node->buffer = (byte*)XMALLOC(size, pkcs12->heap, + DYNAMIC_TYPE_PKCS); + if (node->buffer == NULL) { + XFREE(node, pkcs12->heap, DYNAMIC_TYPE_PKCS); + ERROR_OUT(MEMORY_E, exit_pk12par); + } + XMEMCPY(node->buffer, data + idx, size); + node->bufferSz = size; + + /* put the new node into the list */ + if (certList != NULL) { + WOLFSSL_MSG("Pushing new cert onto queue"); + tailList->next = node; + tailList = node; + } + else { + certList = node; + tailList = node; + } + + /* on to next */ + idx += size; + } + break; + + case WC_PKCS12_CrlBag: /* 670 */ + WOLFSSL_MSG("PKCS12 CRL BAG not yet supported"); + break; + + case WC_PKCS12_SecretBag: /* 671 */ + WOLFSSL_MSG("PKCS12 Secret BAG not yet supported"); + break; + + case WC_PKCS12_SafeContentsBag: /* 672 */ + WOLFSSL_MSG("PKCS12 Safe Contents BAG not yet supported"); + break; + + default: + WOLFSSL_MSG("Unknown PKCS12 BAG type found"); + } + + /* Attribute, unknown bag or unsupported */ + if ((int)idx < bagSz) { + idx = bagSz; /* skip for now */ + } + } + + /* free temporary buffer */ + if (buf != NULL) { + XFREE(buf, pkcs12->heap, DYNAMIC_TYPE_PKCS); + buf = NULL; + } + + ci = ci->next; + WOLFSSL_MSG("Done Parsing PKCS12 Content Info Container"); + } + + /* check if key pair, remove from list */ + if (*pkey != NULL) { + freeDecCertList(&certList, pkey, pkeySz, cert, certSz, pkcs12->heap); + } + + /* if ca arg provided return certList, otherwise free it */ + if (ca != NULL) { + *ca = certList; + } + else { + /* free list, not wanted */ + wc_FreeCertList(certList, pkcs12->heap); + } + (void)tailList; /* not used */ + + ret = 0; /* success */ + +exit_pk12par: + + if (ret != 0) { + /* failure cleanup */ + if (*pkey) { + XFREE(*pkey, pkcs12->heap, DYNAMIC_TYPE_PUBLIC_KEY); + *pkey = NULL; + } + if (buf) { + XFREE(buf, pkcs12->heap, DYNAMIC_TYPE_PKCS); + buf = NULL; + } + + wc_FreeCertList(certList, pkcs12->heap); + } + + return ret; +} + + +/* Helper function to shroud keys. + * + * pkcs12 structure to use with shrouding key + * rng random number generator used + * out buffer to hold results + * outSz size of out buffer + * key key that is going to be shrouded + * keySz size of key buffer + * vAlgo algorithm version + * pass password to use + * passSz size of pass buffer + * itt number of iterations + * + * returns the size of the shrouded key on success + */ +static int wc_PKCS12_shroud_key(WC_PKCS12* pkcs12, WC_RNG* rng, + byte* out, word32* outSz, byte* key, word32 keySz, int vAlgo, + const char* pass, int passSz, int itt) +{ + void* heap; + word32 tmpIdx = 0; + int vPKCS = 1; /* PKCS#12 default set to 1 */ + word32 sz; + word32 totalSz = 0; + int ret; + + + if (outSz == NULL || pkcs12 == NULL || rng == NULL || key == NULL || + pass == NULL) { + return BAD_FUNC_ARG; + } + + heap = wc_PKCS12_GetHeap(pkcs12); + + /* check if trying to get size */ + if (out != NULL) { + tmpIdx += MAX_LENGTH_SZ + 1; /* save room for length and tag (+1) */ + sz = *outSz - tmpIdx; + } + + /* case of no encryption */ + if (vAlgo < 0) { + const byte* curveOID = NULL; + word32 oidSz = 0; + int algoID; + + WOLFSSL_MSG("creating PKCS12 Key Bag"); + + /* check key type and get OID if ECC */ + if ((ret = wc_GetKeyOID(key, keySz, &curveOID, &oidSz, &algoID, heap)) + < 0) { + return ret; + } + + /* PKCS#8 wrapping around key */ + ret = wc_CreatePKCS8Key(out + tmpIdx, &sz, key, keySz, algoID, + curveOID, oidSz); + } + else { + WOLFSSL_MSG("creating PKCS12 Shrouded Key Bag"); + + if (vAlgo == PBE_SHA1_DES) { + vPKCS = PKCS5; + vAlgo = 10; + } + + ret = UnTraditionalEnc(key, keySz, out + tmpIdx, &sz, pass, passSz, + vPKCS, vAlgo, NULL, 0, itt, rng, heap); + } + if (ret == LENGTH_ONLY_E) { + *outSz = sz + MAX_LENGTH_SZ + 1; + return LENGTH_ONLY_E; + } + if (ret < 0) { + return ret; + } + + totalSz += ret; + + /* out should not be null at this point but check before writing */ + if (out == NULL) { + return BAD_FUNC_ARG; + } + + /* rewind index and set tag and length */ + tmpIdx -= MAX_LENGTH_SZ + 1; + sz = SetExplicit(0, ret, out + tmpIdx); + tmpIdx += sz; totalSz += sz; + XMEMMOVE(out + tmpIdx, out + MAX_LENGTH_SZ + 1, ret); + + return totalSz; +} + + +/* Helper function to create key bag. + * + * pkcs12 structure to use with key bag + * rng random number generator used + * out buffer to hold results + * outSz size of out buffer + * key key that is going into key bag + * keySz size of key buffer + * algo algorithm version + * iter number of iterations + * pass password to use + * passSz size of pass buffer + * + * returns the size of the key bag on success + */ +static int wc_PKCS12_create_key_bag(WC_PKCS12* pkcs12, WC_RNG* rng, + byte* out, word32* outSz, byte* key, word32 keySz, int algo, int iter, + char* pass, int passSz) +{ + void* heap; + byte* tmp; + word32 length = 0; + word32 idx = 0; + word32 totalSz = 0; + word32 sz; + word32 i; + word32 tmpSz; + int ret; + + /* get max size for shrouded key */ + ret = wc_PKCS12_shroud_key(pkcs12, rng, NULL, &length, key, keySz, + algo, pass, passSz, iter); + if (ret != LENGTH_ONLY_E && ret < 0) { + return ret; + } + + if (out == NULL) { + *outSz = MAX_SEQ_SZ + WC_PKCS12_DATA_OBJ_SZ + 1 + MAX_LENGTH_SZ + + length; + return LENGTH_ONLY_E; + } + + heap = wc_PKCS12_GetHeap(pkcs12); + + /* leave room for sequence */ + idx += MAX_SEQ_SZ; + + if (algo < 0) { /* not encrypted */ + out[idx++] = ASN_OBJECT_ID; totalSz++; + sz = SetLength(sizeof(WC_PKCS12_KeyBag_OID), out + idx); + idx += sz; totalSz += sz; + for (i = 0; i < sizeof(WC_PKCS12_KeyBag_OID); i++) { + out[idx++] = WC_PKCS12_KeyBag_OID[i]; totalSz++; + } + } + else { /* encrypted */ + out[idx++] = ASN_OBJECT_ID; totalSz++; + sz = SetLength(sizeof(WC_PKCS12_ShroudedKeyBag_OID), out + idx); + idx += sz; totalSz += sz; + for (i = 0; i < sizeof(WC_PKCS12_ShroudedKeyBag_OID); i++) { + out[idx++] = WC_PKCS12_ShroudedKeyBag_OID[i]; totalSz++; + } + } + + /* shroud key */ + tmp = (byte*)XMALLOC(length, heap, DYNAMIC_TYPE_TMP_BUFFER); + if (tmp == NULL) { + return MEMORY_E; + } + + ret = wc_PKCS12_shroud_key(pkcs12, rng, tmp, &length, key, keySz, + algo, pass, passSz, iter); + if (ret < 0) { + XFREE(tmp, heap, DYNAMIC_TYPE_TMP_BUFFER); + return ret; + } + length = ret; + XMEMCPY(out + idx, tmp, length); + XFREE(tmp, heap, DYNAMIC_TYPE_TMP_BUFFER); + totalSz += length; + + /* set beginning sequence */ + tmpSz = SetSequence(totalSz, out); + XMEMMOVE(out + tmpSz, out + MAX_SEQ_SZ, totalSz); + + (void)heap; + return totalSz + tmpSz; +} + + +/* Helper function to create cert bag. + * + * pkcs12 structure to use with cert bag + * out buffer to hold results + * outSz size of out buffer + * cert cert that is going into cert bag + * certSz size of cert buffer + * + * returns the size of the cert bag on success + */ +static int wc_PKCS12_create_cert_bag(WC_PKCS12* pkcs12, + byte* out, word32* outSz, byte* cert, word32 certSz) +{ + word32 length = 0; + word32 idx = 0; + word32 totalSz = 0; + word32 sz; + int WC_CERTBAG_OBJECT_ID = 13; + int WC_CERTBAG1_OBJECT_ID = 12; + word32 i; + word32 tmpSz; + + if (out == NULL) { + *outSz = MAX_SEQ_SZ + WC_CERTBAG_OBJECT_ID + 1 + MAX_LENGTH_SZ + + MAX_SEQ_SZ + WC_CERTBAG1_OBJECT_ID + 1 + MAX_LENGTH_SZ + 1 + + MAX_LENGTH_SZ + certSz; + return LENGTH_ONLY_E; + } + + /* check buffer size able to handle max size */ + if (*outSz < (MAX_SEQ_SZ + WC_CERTBAG_OBJECT_ID + 1 + MAX_LENGTH_SZ + + MAX_SEQ_SZ + WC_CERTBAG1_OBJECT_ID + 1 + MAX_LENGTH_SZ + 1 + + MAX_LENGTH_SZ + certSz)) { + return BUFFER_E; + } + + /* save room for sequence */ + idx += MAX_SEQ_SZ; + + /* objectId WC_PKCS12_CertBag */ + out[idx++] = ASN_OBJECT_ID; totalSz++; + sz = SetLength(sizeof(WC_PKCS12_CertBag_OID), out + idx); + idx += sz; totalSz += sz; + for (i = 0; i < sizeof(WC_PKCS12_CertBag_OID); i++) { + out[idx++] = WC_PKCS12_CertBag_OID[i]; totalSz++; + } + + /**** Cert Bag type 1 ****/ + out[idx++] = (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC); totalSz++; + + /* save room for length and sequence */ + idx += MAX_LENGTH_SZ; + idx += MAX_SEQ_SZ; + + /* object id WC_PKCS12_CertBag_Type1 */ + out[idx++] = ASN_OBJECT_ID; length++; + sz = SetLength(sizeof(WC_PKCS12_CertBag_Type1_OID), out + idx); + idx += sz; length += sz; + for (i = 0; i < sizeof(WC_PKCS12_CertBag_Type1_OID); i++) { + out[idx++] = WC_PKCS12_CertBag_Type1_OID[i]; length++; + } + + out[idx++] = (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC); length++; + sz = 0; + idx += MAX_LENGTH_SZ; /* save room for length */ + + /* place the cert in the buffer */ + out[idx++] = ASN_OCTET_STRING; sz++; + tmpSz = SetLength(certSz, out + idx); + idx += tmpSz; sz += tmpSz; + XMEMCPY(out + idx, cert, certSz); + idx += certSz; sz += certSz; + + /* rewind idx and place length */ + idx -= (sz + MAX_LENGTH_SZ); + tmpSz = SetLength(sz, out + idx); + XMEMMOVE(out + idx + tmpSz, out + idx + MAX_LENGTH_SZ, sz); + idx += tmpSz + sz; length += tmpSz + sz; + + /* rewind idx and set sequence */ + idx -= (length + MAX_SEQ_SZ); + tmpSz = SetSequence(length, out + idx); + XMEMMOVE(out + idx + tmpSz, out + idx + MAX_SEQ_SZ, length); + length += tmpSz; + + /* place final length */ + idx -= MAX_LENGTH_SZ; + tmpSz = SetLength(length, out + idx); + XMEMMOVE(out + idx + tmpSz, out + idx + MAX_LENGTH_SZ, length); + length += tmpSz; + + /* place final sequence */ + totalSz += length; + tmpSz = SetSequence(totalSz, out); + XMEMMOVE(out + tmpSz, out + MAX_SEQ_SZ, totalSz); + + (void)pkcs12; + + return totalSz + tmpSz; +} + + +/* Helper function to encrypt content. + * + * pkcs12 structure to use with key bag + * rng random number generator used + * out buffer to hold results + * outSz size of out buffer + * content content to encrypt + * contentSz size of content buffer + * vAlgo algorithm version + * pass password to use + * passSz size of pass buffer + * iter number of iterations + * type content type i.e WC_PKCS12_ENCRYPTED_DATA or WC_PKCS12_DATA + * + * returns the size of result on success + */ +static int wc_PKCS12_encrypt_content(WC_PKCS12* pkcs12, WC_RNG* rng, + byte* out, word32* outSz, byte* content, word32 contentSz, int vAlgo, + const char* pass, int passSz, int iter, int type) +{ + void* heap; + int vPKCS = 1; /* PKCS#12 is always set to 1 */ + int ret; + byte* tmp; + word32 idx = 0; + word32 totalSz = 0; + word32 length = 0; + word32 tmpSz; + word32 encSz; + + byte seq[MAX_SEQ_SZ]; + + WOLFSSL_MSG("encrypting PKCS12 content"); + + heap = wc_PKCS12_GetHeap(pkcs12); + + /* ENCRYPTED DATA + * ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC + * length + * sequence + * short int + * sequence + * get object id */ + if (type == WC_PKCS12_ENCRYPTED_DATA) { + word32 outerSz = 0; + + encSz = contentSz; + if ((ret = EncryptContent(NULL, contentSz, NULL, &encSz, + pass, passSz, vPKCS, vAlgo, NULL, 0, iter, rng, heap)) < 0) { + if (ret != LENGTH_ONLY_E) { + return ret; + } + } + + /* calculate size */ + totalSz = SetObjectId(sizeof(WC_PKCS12_ENCRYPTED_OID), seq); + totalSz += sizeof(WC_PKCS12_ENCRYPTED_OID); + totalSz += ASN_TAG_SZ; + + length = SetMyVersion(0, seq, 0); + tmpSz = SetObjectId(sizeof(WC_PKCS12_DATA_OID), seq); + tmpSz += sizeof(WC_PKCS12_DATA_OID); + tmpSz += encSz; + length += SetSequence(tmpSz, seq) + tmpSz; + outerSz = SetSequence(length, seq) + length; + + totalSz += SetLength(outerSz, seq) + outerSz; + if (out == NULL) { + *outSz = totalSz + SetSequence(totalSz, seq); + return LENGTH_ONLY_E; + } + + if (*outSz < totalSz + SetSequence(totalSz, seq)) { + return BUFFER_E; + } + + idx = 0; + idx += SetSequence(totalSz, out + idx); + idx += SetObjectId(sizeof(WC_PKCS12_ENCRYPTED_OID), out + idx); + if (idx + sizeof(WC_PKCS12_ENCRYPTED_OID) > *outSz){ + return BUFFER_E; + } + XMEMCPY(out + idx, WC_PKCS12_ENCRYPTED_OID, + sizeof(WC_PKCS12_ENCRYPTED_OID)); + idx += sizeof(WC_PKCS12_ENCRYPTED_OID); + + if (idx + 1 > *outSz){ + return BUFFER_E; + } + out[idx++] = (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC); + idx += SetLength(outerSz, out + idx); + + idx += SetSequence(length, out + idx); + idx += SetMyVersion(0, out + idx, 0); + tmp = (byte*)XMALLOC(encSz, heap, DYNAMIC_TYPE_TMP_BUFFER); + if (tmp == NULL) { + return MEMORY_E; + } + + if ((ret = EncryptContent(content, contentSz, tmp, &encSz, + pass, passSz, vPKCS, vAlgo, NULL, 0, iter, rng, heap)) < 0) { + XFREE(tmp, heap, DYNAMIC_TYPE_TMP_BUFFER); + return ret; + } + encSz = ret; + + #ifdef WOLFSSL_DEBUG_PKCS12 + { + byte* p; + for (printf("(size %u) Encrypted Content = ", encSz), + p = (byte*)tmp; + p < (byte*)tmp + encSz; + printf("%02X", *p), p++); + printf("\n"); + } + #endif + + idx += SetSequence(WC_PKCS12_DATA_OBJ_SZ + encSz, out + idx); + idx += SetObjectId(sizeof(WC_PKCS12_DATA_OID), out + idx); + if (idx + sizeof(WC_PKCS12_DATA_OID) > *outSz){ + WOLFSSL_MSG("Buffer not large enough for DATA OID"); + return BUFFER_E; + } + XMEMCPY(out + idx, WC_PKCS12_DATA_OID, sizeof(WC_PKCS12_DATA_OID)); + idx += sizeof(WC_PKCS12_DATA_OID); + + /* copy over encrypted data */ + if (idx + encSz > *outSz){ + return BUFFER_E; + } + XMEMCPY(out + idx, tmp, encSz); + XFREE(tmp, heap, DYNAMIC_TYPE_TMP_BUFFER); + idx += encSz; + return idx; + } + + /* DATA + * ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC + * length + * ASN_OCTET_STRING + * length + * sequence containing all bags */ + if (type == WC_PKCS12_DATA) { + /* calculate size */ + totalSz = SetObjectId(sizeof(WC_PKCS12_DATA_OID), seq); + totalSz += sizeof(WC_PKCS12_DATA_OID); + totalSz += ASN_TAG_SZ; + + length = SetOctetString(contentSz, seq); + length += contentSz; + totalSz += SetLength(length, seq); + totalSz += length; + + if (out == NULL) { + *outSz = totalSz + SetSequence(totalSz, seq); + return LENGTH_ONLY_E; + } + + if (*outSz < (totalSz + SetSequence(totalSz, seq))) { + return BUFFER_E; + } + + /* place data in output buffer */ + idx = 0; + idx += SetSequence(totalSz, out); + idx += SetObjectId(sizeof(WC_PKCS12_DATA_OID), out + idx); + if (idx + sizeof(WC_PKCS12_DATA_OID) > *outSz){ + WOLFSSL_MSG("Buffer not large enough for DATA OID"); + return BUFFER_E; + } + XMEMCPY(out + idx, WC_PKCS12_DATA_OID, sizeof(WC_PKCS12_DATA_OID)); + idx += sizeof(WC_PKCS12_DATA_OID); + + if (idx + 1 > *outSz){ + return BUFFER_E; + } + out[idx++] = (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC); + idx += SetLength(length, out + idx); + idx += SetOctetString(contentSz, out + idx); + + if (idx + contentSz > *outSz){ + return BUFFER_E; + } + XMEMCPY(out + idx, content, contentSz); + idx += contentSz; + + return idx; + } + + WOLFSSL_MSG("Unknown/Unsupported content type"); + return BAD_FUNC_ARG; +} + + +/* helper function to create the PKCS12 key content + * keyCiSz is output buffer size + * returns a pointer to be free'd by caller on success and NULL on failure */ +static byte* PKCS12_create_key_content(WC_PKCS12* pkcs12, int nidKey, + word32* keyCiSz, WC_RNG* rng, char* pass, word32 passSz, + byte* key, word32 keySz, int iter) +{ + byte* keyBuf; + word32 keyBufSz = 0; + byte* keyCi = NULL; + word32 tmpSz; + int ret; + int algo; + void* heap; + + heap = wc_PKCS12_GetHeap(pkcs12); + *keyCiSz = 0; + switch (nidKey) { + case PBE_SHA1_RC4_128: + algo = 1; + break; + + case PBE_SHA1_DES: + algo = 2; + break; + + case PBE_SHA1_DES3: + algo = 3; + break; + + /* no encryption */ + case -1: + algo = -1; + break; + + default: + WOLFSSL_MSG("Unknown/Unsupported key encryption"); + return NULL; + } + + /* get max size for key bag */ + ret = wc_PKCS12_create_key_bag(pkcs12, rng, NULL, &keyBufSz, key, keySz, + algo, iter, pass, passSz); + if (ret != LENGTH_ONLY_E && ret < 0) { + WOLFSSL_MSG("Error getting key bag size"); + return NULL; + } + + /* account for sequence around bag */ + keyBufSz += MAX_SEQ_SZ; + keyBuf = (byte*)XMALLOC(keyBufSz, heap, DYNAMIC_TYPE_TMP_BUFFER); + if (keyBuf == NULL) { + WOLFSSL_MSG("Memory error creating keyBuf buffer"); + return NULL; + } + + ret = wc_PKCS12_create_key_bag(pkcs12, rng, keyBuf + MAX_SEQ_SZ, &keyBufSz, + key, keySz, algo, iter, pass, passSz); + if (ret < 0) { + XFREE(keyBuf, heap, DYNAMIC_TYPE_TMP_BUFFER); + WOLFSSL_MSG("Error creating key bag"); + return NULL; + } + keyBufSz = ret; + + tmpSz = SetSequence(keyBufSz, keyBuf); + XMEMMOVE(keyBuf + tmpSz, keyBuf + MAX_SEQ_SZ, keyBufSz); + keyBufSz += tmpSz; + + #ifdef WOLFSSL_DEBUG_PKCS12 + { + word32 i; + printf("(size %u) Key Bag = ", keyBufSz); + for (i = 0; i < keyBufSz; i++) + printf("%02X", keyBuf[i]); + printf("\n"); + } + #endif + ret = wc_PKCS12_encrypt_content(pkcs12, rng, NULL, keyCiSz, + NULL, keyBufSz, algo, pass, passSz, iter, WC_PKCS12_DATA); + if (ret != LENGTH_ONLY_E) { + XFREE(keyBuf, heap, DYNAMIC_TYPE_TMP_BUFFER); + WOLFSSL_MSG("Error getting key encrypt content size"); + return NULL; + } + keyCi = (byte*)XMALLOC(*keyCiSz, heap, DYNAMIC_TYPE_TMP_BUFFER); + if (keyCi == NULL) { + XFREE(keyBuf, heap, DYNAMIC_TYPE_TMP_BUFFER); + return NULL; + } + + ret = wc_PKCS12_encrypt_content(pkcs12, rng, keyCi, keyCiSz, + keyBuf, keyBufSz, algo, pass, passSz, iter, WC_PKCS12_DATA); + XFREE(keyBuf, heap, DYNAMIC_TYPE_TMP_BUFFER); + if (ret < 0 ) { + XFREE(keyCi, heap, DYNAMIC_TYPE_TMP_BUFFER); + WOLFSSL_MSG("Error creating key encrypt content"); + return NULL; + } + *keyCiSz = ret; + + #ifdef WOLFSSL_DEBUG_PKCS12 + { + word32 i; + printf("(size %u) Key Content Info = ", *keyCiSz); + for (i = 0; i < *keyCiSz; i++) + printf("%02X", keyCi[i]); + printf("\n"); + } + #endif + + (void)heap; + return keyCi; +} + + +/* helper function to create the PKCS12 certificate content + * certCiSz is output buffer size + * returns a pointer to be free'd by caller on success and NULL on failure */ +static byte* PKCS12_create_cert_content(WC_PKCS12* pkcs12, int nidCert, + WC_DerCertList* ca, byte* cert, word32 certSz, word32* certCiSz, + WC_RNG* rng, char* pass, word32 passSz, int iter) +{ + int algo; + int ret; + int type; + + byte* certBuf = NULL; + word32 certBufSz; + word32 idx; + word32 sz; + word32 tmpSz; + + byte* certCi; + void* heap; + + heap = wc_PKCS12_GetHeap(pkcs12); + switch (nidCert) { + case PBE_SHA1_RC4_128: + type = WC_PKCS12_ENCRYPTED_DATA; + algo = 1; + break; + + case PBE_SHA1_DES: + type = WC_PKCS12_ENCRYPTED_DATA; + algo = 2; + break; + + case PBE_SHA1_DES3: + type = WC_PKCS12_ENCRYPTED_DATA; + algo = 3; + break; + + case -1: + type = WC_PKCS12_DATA; + algo = -1; + break; + + default: + WOLFSSL_MSG("Unknown/Unsupported certificate encryption"); + return NULL; + } + + /* get max size of buffer needed */ + ret = wc_PKCS12_create_cert_bag(pkcs12, NULL, &certBufSz, cert, certSz); + if (ret != LENGTH_ONLY_E) { + return NULL; + } + + if (ca != NULL) { + WC_DerCertList* current = ca; + word32 curBufSz = 0; + + /* get max buffer size */ + while (current != NULL) { + ret = wc_PKCS12_create_cert_bag(pkcs12, NULL, &curBufSz, + current->buffer, current->bufferSz); + if (ret != LENGTH_ONLY_E) { + return NULL; + } + certBufSz += curBufSz; + current = current->next; + } + } + + /* account for Sequence that holds all certificate bags */ + certBufSz += MAX_SEQ_SZ; + + /* completed getting max size, now create buffer and start adding bags */ + certBuf = (byte*)XMALLOC(certBufSz, heap, DYNAMIC_TYPE_TMP_BUFFER); + if (certBuf == NULL) { + WOLFSSL_MSG("Memory error creating certificate bags"); + return NULL; + } + + idx = 0; + idx += MAX_SEQ_SZ; + + sz = certBufSz - idx; + if ((ret = wc_PKCS12_create_cert_bag(pkcs12, certBuf + idx, &sz, + cert, certSz)) < 0) { + XFREE(certBuf, heap, DYNAMIC_TYPE_TMP_BUFFER); + return NULL; + } + idx += ret; + + if (ca != NULL) { + WC_DerCertList* current = ca; + + while (current != NULL) { + sz = certBufSz - idx; + if ((ret = wc_PKCS12_create_cert_bag(pkcs12, certBuf + idx, &sz, + current->buffer, current->bufferSz)) < 0) { + XFREE(certBuf, heap, DYNAMIC_TYPE_TMP_BUFFER); + return NULL; + } + idx += ret; + current = current->next; + } + } + + /* set sequence and create encrypted content with all certificate bags */ + tmpSz = SetSequence(idx - MAX_SEQ_SZ, certBuf); + XMEMMOVE(certBuf + tmpSz, certBuf + MAX_SEQ_SZ, idx - MAX_SEQ_SZ); + certBufSz = tmpSz + (idx - MAX_SEQ_SZ); + + /* get buffer size needed for content info */ + ret = wc_PKCS12_encrypt_content(pkcs12, rng, NULL, certCiSz, + NULL, certBufSz, algo, pass, passSz, iter, type); + if (ret != LENGTH_ONLY_E) { + XFREE(certBuf, heap, DYNAMIC_TYPE_TMP_BUFFER); + WOLFSSL_LEAVE("wc_PKCS12_create()", ret); + return NULL; + } + certCi = (byte*)XMALLOC(*certCiSz, heap, DYNAMIC_TYPE_TMP_BUFFER); + if (certCi == NULL) { + XFREE(certBuf, heap, DYNAMIC_TYPE_TMP_BUFFER); + return NULL; + } + + ret = wc_PKCS12_encrypt_content(pkcs12, rng, certCi, certCiSz, + certBuf, certBufSz, algo, pass, passSz, iter, type); + XFREE(certBuf, heap, DYNAMIC_TYPE_TMP_BUFFER); + if (ret < 0) { + WOLFSSL_LEAVE("wc_PKCS12_create()", ret); + return NULL; + } + *certCiSz = ret; + + #ifdef WOLFSSL_DEBUG_PKCS12 + { + word32 i; + printf("(size %u) Encrypted Certificate Content Info = ", *certCiSz); + for (i = 0; i < *certCiSz; i++) + printf("%02X", certCi[i]); + printf("\n"); + } + #endif + + (void)heap; + return certCi; +} + + +/* helper function to create the PKCS12 safe + * returns 0 on success */ +static int PKCS12_create_safe(WC_PKCS12* pkcs12, byte* certCi, word32 certCiSz, + byte* keyCi, word32 keyCiSz, WC_RNG* rng, char* pass, word32 passSz, + int iter) +{ + int length; + int ret; + byte seq[MAX_SEQ_SZ]; + word32 safeDataSz; + word32 innerDataSz; + byte *innerData = NULL; + byte *safeData = NULL; + word32 idx; + + innerDataSz = certCiSz + keyCiSz+SetSequence(certCiSz + keyCiSz, seq); + + /* add Content Info structs to safe, key first then cert */ + ret = wc_PKCS12_encrypt_content(pkcs12, rng, NULL, &safeDataSz, + NULL, innerDataSz, 0, NULL, 0, 0, WC_PKCS12_DATA); + if (ret != LENGTH_ONLY_E) { + return ret; + } + + safeData = (byte*)XMALLOC(safeDataSz, pkcs12->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (safeData == NULL) { + WOLFSSL_MSG("Error malloc'ing safe data buffer"); + return MEMORY_E; + } + + /* create sequence of inner data */ + innerData = (byte*)XMALLOC(innerDataSz, pkcs12->heap, DYNAMIC_TYPE_PKCS); + if (innerData == NULL) { + WOLFSSL_MSG("Error malloc'ing inner data buffer"); + XFREE(safeData, pkcs12->heap, DYNAMIC_TYPE_TMP_BUFFER); + return MEMORY_E; + } + idx = 0; + idx += SetSequence(certCiSz + keyCiSz, innerData); + XMEMCPY(innerData + idx, certCi, certCiSz); + XMEMCPY(innerData + idx + certCiSz, keyCi, keyCiSz); + + ret = wc_PKCS12_encrypt_content(pkcs12, rng, safeData, &safeDataSz, + innerData, innerDataSz, 0, pass, passSz, iter, WC_PKCS12_DATA); + XFREE(innerData, pkcs12->heap, DYNAMIC_TYPE_PKCS); + if (ret < 0 ) { + WOLFSSL_MSG("Error setting data type for safe contents"); + XFREE(safeData, pkcs12->heap, DYNAMIC_TYPE_TMP_BUFFER); + return ret; + } + idx = 0; + + ret = GetSequence(safeData, &idx, &length, safeDataSz); + if (ret < 0) { + WOLFSSL_MSG("Error getting first sequence of safe"); + XFREE(safeData, pkcs12->heap, DYNAMIC_TYPE_TMP_BUFFER); + return ret; + } + + ret = GetSafeContent(pkcs12, safeData, &idx, safeDataSz); + XFREE(safeData, pkcs12->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (ret < 0) { + WOLFSSL_MSG("Unable to create safe contents"); + return ret; + } + return 0; +} + + +/* + * pass : password to use with encryption + * passSz : size of the password buffer + * name : friendlyName to use + * key : DER format of key + * keySz : size of key buffer + * cert : DER format of certificate + * certSz : size of the certificate buffer + * ca : a list of extra certificates + * nidKey : type of encryption to use on the key (-1 means no encryption) + * nidCert : type of encryption to use on the certificate + * (-1 means no encryption) + * iter : number of iterations with encryption + * macIter : number of iterations when creating MAC + * keyType : flag for signature and/or encryption key + * heap : pointer to allocate from memory + * + * returns a pointer to a new WC_PKCS12 structure on success and NULL if failed + */ +WC_PKCS12* wc_PKCS12_create(char* pass, word32 passSz, char* name, + byte* key, word32 keySz, byte* cert, word32 certSz, WC_DerCertList* ca, + int nidKey, int nidCert, int iter, int macIter, int keyType, void* heap) +{ + WC_PKCS12* pkcs12; + WC_RNG rng; + int ret; + + byte* certCi = NULL; + byte* keyCi = NULL; + word32 certCiSz; + word32 keyCiSz; + + WOLFSSL_ENTER("wc_PKCS12_create()"); + + if ((ret = wc_InitRng_ex(&rng, heap, INVALID_DEVID)) != 0) { + return NULL; + } + + if ((pkcs12 = wc_PKCS12_new()) == NULL) { + wc_FreeRng(&rng); + WOLFSSL_LEAVE("wc_PKCS12_create", MEMORY_E); + return NULL; + } + + if ((ret = wc_PKCS12_SetHeap(pkcs12, heap)) != 0) { + wc_PKCS12_free(pkcs12); + wc_FreeRng(&rng); + WOLFSSL_LEAVE("wc_PKCS12_create", ret); + return NULL; + } + + if (iter <= 0) { + iter = WC_PKCS12_ITT_DEFAULT; + } + + /**** add private key bag ****/ + keyCi = PKCS12_create_key_content(pkcs12, nidKey, &keyCiSz, &rng, + pass, passSz, key, keySz, iter); + if (keyCi == NULL) { + wc_PKCS12_free(pkcs12); + wc_FreeRng(&rng); + return NULL; + } + + /**** add main certificate bag and extras ****/ + certCi = PKCS12_create_cert_content(pkcs12, nidCert, ca, cert, certSz, + &certCiSz, &rng, pass, passSz, iter); + if (certCi == NULL) { + XFREE(keyCi, heap, DYNAMIC_TYPE_TMP_BUFFER); + wc_PKCS12_free(pkcs12); + wc_FreeRng(&rng); + return NULL; + } + + /**** create safe and Content Info ****/ + ret = PKCS12_create_safe(pkcs12, certCi, certCiSz, keyCi, keyCiSz, &rng, + pass, passSz, iter); + XFREE(keyCi, heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(certCi, heap, DYNAMIC_TYPE_TMP_BUFFER); + if (ret != 0) { + WOLFSSL_MSG("Unable to create PKCS12 safe"); + wc_PKCS12_free(pkcs12); + wc_FreeRng(&rng); + return NULL; + } + + /* create MAC */ + if (macIter > 0) { + MacData* mac; + byte digest[WC_MAX_DIGEST_SIZE]; /* for MAC */ + + mac = (MacData*)XMALLOC(sizeof(MacData), heap, DYNAMIC_TYPE_PKCS); + if (mac == NULL) { + wc_PKCS12_free(pkcs12); + wc_FreeRng(&rng); + WOLFSSL_MSG("Error malloc'ing mac data buffer"); + return NULL; + } + XMEMSET(mac, 0, sizeof(MacData)); + pkcs12->signData = mac; /* now wc_PKCS12_free will free all mac too */ + + #ifndef NO_SHA256 + mac->oid = SHA256h; + #elif !defined(NO_SHA) + mac->oid = SHA; + #elif defined(WOLFSSL_SHA384) + mac->oid = SHA384; + #elif defined(WOLFSSL_SHA512) + mac->oid = SHA512; + #else + WOLFSSL_MSG("No supported hash algorithm compiled in!"); + wc_PKCS12_free(pkcs12); + wc_FreeRng(&rng); + return NULL; + #endif + + /* store number of iterations */ + mac->itt = macIter; + + /* set mac salt */ + mac->saltSz = 8; + mac->salt = (byte*)XMALLOC(mac->saltSz, heap, DYNAMIC_TYPE_PKCS); + if (mac->salt == NULL) { + wc_PKCS12_free(pkcs12); + wc_FreeRng(&rng); + WOLFSSL_MSG("Error malloc'ing salt data buffer"); + return NULL; + } + + if ((ret = wc_RNG_GenerateBlock(&rng, mac->salt, mac->saltSz)) != 0) { + WOLFSSL_MSG("Error generating random salt"); + wc_PKCS12_free(pkcs12); + wc_FreeRng(&rng); + return NULL; + } + ret = wc_PKCS12_create_mac(pkcs12, pkcs12->safe->data, + pkcs12->safe->dataSz, (const byte*)pass, passSz, digest, + WC_MAX_DIGEST_SIZE); + if (ret < 0) { + wc_PKCS12_free(pkcs12); + wc_FreeRng(&rng); + WOLFSSL_MSG("Error creating mac"); + WOLFSSL_LEAVE("wc_PKCS12_create", ret); + return NULL; + } + + mac->digestSz = ret; + mac->digest = (byte*)XMALLOC(ret, heap, DYNAMIC_TYPE_PKCS); + if (mac->digest == NULL) { + WOLFSSL_MSG("Error malloc'ing mac digest buffer"); + wc_PKCS12_free(pkcs12); + wc_FreeRng(&rng); + return NULL; + } + XMEMCPY(mac->digest, digest, mac->digestSz); + } + else { + pkcs12->signData = NULL; + } + + wc_FreeRng(&rng); + (void)name; + (void)keyType; + + return pkcs12; +} + + +/* if using a specific memory heap */ +int wc_PKCS12_SetHeap(WC_PKCS12* pkcs12, void* heap) +{ + if (pkcs12 == NULL) { + return BAD_FUNC_ARG; + } + pkcs12->heap = heap; + + return 0; +} + + +/* getter for heap */ +void* wc_PKCS12_GetHeap(WC_PKCS12* pkcs12) +{ + if (pkcs12 == NULL) { + return NULL; + } + + return pkcs12->heap; +} + +#undef ERROR_OUT + +#endif /* !NO_ASN && !NO_PWDBASED && HAVE_PKCS12 */ diff --git a/client/wolfssl/wolfcrypt/src/pkcs7.c b/client/wolfssl/wolfcrypt/src/pkcs7.c new file mode 100644 index 0000000..e420cad --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/pkcs7.c @@ -0,0 +1,12523 @@ +/* pkcs7.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef HAVE_PKCS7 + +#include +#include +#include +#include +#ifndef NO_RSA + #include +#endif +#ifdef HAVE_ECC + #include +#endif +#ifdef HAVE_LIBZ + #include +#endif +#ifndef NO_PWDBASED + #include +#endif +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +/* direction for processing, encoding or decoding */ +typedef enum { + WC_PKCS7_ENCODE, + WC_PKCS7_DECODE +} pkcs7Direction; + +#define NO_USER_CHECK 0 + +/* holds information about the signers */ +struct PKCS7SignerInfo { + int version; + byte *sid; + word32 sidSz; +}; + + +#ifndef NO_PKCS7_STREAM + +#define MAX_PKCS7_STREAM_BUFFER 256 +struct PKCS7State { + byte* tmpCert; + byte* bufferPt; + byte* key; + byte* nonce; /* stored nonce */ + byte* aad; /* additional data for AEAD algos */ + byte* tag; /* tag data for AEAD algos */ + byte* content; + byte* buffer; /* main internal read buffer */ + + /* stack variables to store for when returning */ + word32 varOne; + int varTwo; + int varThree; + + word32 vers; + word32 idx; /* index read into current input buffer */ + word32 maxLen; /* sanity cap on maximum amount of data to allow + * needed for GetSequence and other calls */ + word32 length; /* amount of data stored */ + word32 bufferSz; /* size of internal buffer */ + word32 expected; /* next amount of data expected, if needed */ + word32 totalRd; /* total amount of bytes read */ + word32 nonceSz; /* size of nonce stored */ + word32 aadSz; /* size of additional AEAD data */ + word32 tagSz; /* size of tag for AEAD */ + word32 contentSz; + byte tmpIv[MAX_CONTENT_IV_SIZE]; /* store IV if needed */ +#ifdef WC_PKCS7_STREAM_DEBUG + word32 peakUsed; /* most bytes used for struct at any one time */ + word32 peakRead; /* most bytes used by read buffer */ +#endif + byte multi:1; /* flag for if content is in multiple parts */ + byte flagOne:1; + byte detached:1; /* flag to indicate detached signature is present */ +}; + + +enum PKCS7_MaxLen { + PKCS7_DEFAULT_PEEK = 0, + PKCS7_SEQ_PEEK +}; + +/* creates a PKCS7State structure and returns 0 on success */ +static int wc_PKCS7_CreateStream(PKCS7* pkcs7) +{ + WOLFSSL_MSG("creating PKCS7 stream structure"); + pkcs7->stream = (PKCS7State*)XMALLOC(sizeof(PKCS7State), pkcs7->heap, + DYNAMIC_TYPE_PKCS7); + if (pkcs7->stream == NULL) { + return MEMORY_E; + } + XMEMSET(pkcs7->stream, 0, sizeof(PKCS7State)); +#ifdef WC_PKCS7_STREAM_DEBUG + printf("\nCreating new PKCS#7 stream %p\n", pkcs7->stream); +#endif + return 0; +} + + +static void wc_PKCS7_ResetStream(PKCS7* pkcs7) +{ + if (pkcs7 != NULL && pkcs7->stream != NULL) { +#ifdef WC_PKCS7_STREAM_DEBUG + /* collect final data point in case more was read right before reset */ + if (pkcs7->stream->length > pkcs7->stream->peakRead) { + pkcs7->stream->peakRead = pkcs7->stream->length; + } + if (pkcs7->stream->bufferSz + pkcs7->stream->aadSz + + pkcs7->stream->nonceSz + pkcs7->stream->tagSz > + pkcs7->stream->peakUsed) { + pkcs7->stream->peakUsed = pkcs7->stream->bufferSz + + pkcs7->stream->aadSz + pkcs7->stream->nonceSz + + pkcs7->stream->tagSz; + } + + /* print out debugging statistics */ + if (pkcs7->stream->peakUsed > 0 || pkcs7->stream->peakRead > 0) { + printf("PKCS#7 STREAM:\n\tPeak heap used by struct = %d" + "\n\tPeak read buffer bytes = %d" + "\n\tTotal bytes read = %d" + "\n", + pkcs7->stream->peakUsed, pkcs7->stream->peakRead, + pkcs7->stream->totalRd); + } + printf("PKCS#7 stream reset : Address [%p]\n", pkcs7->stream); + #endif + + /* free any buffers that may be allocated */ + XFREE(pkcs7->stream->aad, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(pkcs7->stream->tag, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(pkcs7->stream->nonce, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(pkcs7->stream->buffer, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(pkcs7->stream->key, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + pkcs7->stream->aad = NULL; + pkcs7->stream->tag = NULL; + pkcs7->stream->nonce = NULL; + pkcs7->stream->buffer = NULL; + pkcs7->stream->key = NULL; + + /* reset values, note that content and tmpCert are saved */ + pkcs7->stream->maxLen = 0; + pkcs7->stream->length = 0; + pkcs7->stream->idx = 0; + pkcs7->stream->expected = 0; + pkcs7->stream->totalRd = 0; + pkcs7->stream->bufferSz = 0; + + pkcs7->stream->multi = 0; + pkcs7->stream->flagOne = 0; + pkcs7->stream->detached = 0; + pkcs7->stream->varOne = 0; + pkcs7->stream->varTwo = 0; + pkcs7->stream->varThree = 0; + } +} + + +static void wc_PKCS7_FreeStream(PKCS7* pkcs7) +{ + if (pkcs7 != NULL && pkcs7->stream != NULL) { + wc_PKCS7_ResetStream(pkcs7); + + XFREE(pkcs7->stream->content, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(pkcs7->stream->tmpCert, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + pkcs7->stream->content = NULL; + pkcs7->stream->tmpCert = NULL; + + XFREE(pkcs7->stream, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + pkcs7->stream = NULL; + } +} + + +/* used to increase the max size for internal buffer + * returns 0 on success */ +static int wc_PKCS7_GrowStream(PKCS7* pkcs7, word32 newSz) +{ + byte* pt; + + pt = (byte*)XMALLOC(newSz, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + if (pt == NULL) { + return MEMORY_E; + } + XMEMCPY(pt, pkcs7->stream->buffer, pkcs7->stream->bufferSz); + +#ifdef WC_PKCS7_STREAM_DEBUG + printf("PKCS7 increasing internal stream buffer %d -> %d\n", + pkcs7->stream->bufferSz, newSz); +#endif + pkcs7->stream->bufferSz = newSz; + XFREE(pkcs7->stream->buffer, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + pkcs7->stream->buffer = pt; + return 0; +} + + +/* pt gets set to the buffer that is holding data in the case that stream struct + * is used. + * + * Sets idx to be the current offset into "pt" buffer + * returns 0 on success + */ +static int wc_PKCS7_AddDataToStream(PKCS7* pkcs7, byte* in, word32 inSz, + word32 expected, byte** pt, word32* idx) +{ + word32 rdSz = pkcs7->stream->idx; + + /* If the input size minus current index into input buffer is greater than + * the expected size then use the input buffer. If data is already stored + * in stream buffer or if there is not enough input data available then use + * the stream buffer. */ + if (inSz - rdSz >= expected && pkcs7->stream->length == 0) { + /* storing input buffer is not needed */ + *pt = in; /* reset in case previously used internal buffer */ + *idx = rdSz; + return 0; + } + + /* is there enough stored in buffer already? */ + if (pkcs7->stream->length >= expected) { + *idx = 0; /* start reading from beginning of stream buffer */ + *pt = pkcs7->stream->buffer; + return 0; + } + + /* check if all data has been read from input */ + if (rdSz >= inSz) { + /* no more input to read, reset input index and request more data */ + pkcs7->stream->idx = 0; + return WC_PKCS7_WANT_READ_E; + } + + /* try to store input data into stream buffer */ + if (inSz - rdSz > 0 && pkcs7->stream->length < expected) { + int len = min(inSz - rdSz, expected - pkcs7->stream->length); + + /* sanity check that the input buffer is not internal buffer */ + if (in == pkcs7->stream->buffer) { + return WC_PKCS7_WANT_READ_E; + } + + /* check if internal buffer size needs to be increased */ + if (len + pkcs7->stream->length > pkcs7->stream->bufferSz) { + int ret = wc_PKCS7_GrowStream(pkcs7, expected); + if (ret < 0) { + return ret; + } + } + XMEMCPY(pkcs7->stream->buffer + pkcs7->stream->length, in + rdSz, len); + pkcs7->stream->length += len; + pkcs7->stream->idx += len; + pkcs7->stream->totalRd += len; + } + +#ifdef WC_PKCS7_STREAM_DEBUG + /* collects memory usage for debugging */ + if (pkcs7->stream->length > pkcs7->stream->peakRead) { + pkcs7->stream->peakRead = pkcs7->stream->length; + } + if (pkcs7->stream->bufferSz + pkcs7->stream->aadSz + pkcs7->stream->nonceSz + + pkcs7->stream->tagSz > pkcs7->stream->peakUsed) { + pkcs7->stream->peakUsed = pkcs7->stream->bufferSz + + pkcs7->stream->aadSz + pkcs7->stream->nonceSz + pkcs7->stream->tagSz; + } +#endif + + /* if not enough data was read in then request more */ + if (pkcs7->stream->length < expected) { + pkcs7->stream->idx = 0; + return WC_PKCS7_WANT_READ_E; + } + + /* adjust pointer to read from stored buffer */ + *idx = 0; + *pt = pkcs7->stream->buffer; + return 0; +} + + +/* Does two things + * 1) Tries to get the length from current buffer and set it as max length + * 2) Retrieves the set max length + * + * if no flag value is set then the stored max length is returned. + * returns length found on success and defSz if no stored data is found + */ +static long wc_PKCS7_GetMaxStream(PKCS7* pkcs7, byte flag, byte* in, + word32 defSz) +{ + /* check there is a buffer to read from */ + if (pkcs7) { + int length = 0, ret; + word32 idx = 0, maxIdx; + byte* pt; + + if (flag != PKCS7_DEFAULT_PEEK) { + if (pkcs7->stream->length > 0) { + length = pkcs7->stream->length; + pt = pkcs7->stream->buffer; + } + else { + length = defSz; + pt = in; + } + maxIdx = (word32)length; + + if (length < MAX_SEQ_SZ) { + WOLFSSL_MSG("PKCS7 Error not enough data for SEQ peek\n"); + return 0; + } + if (flag == PKCS7_SEQ_PEEK) { + if ((ret = GetSequence_ex(pt, &idx, &length, maxIdx, + NO_USER_CHECK)) < 0) { + return ret; + } + + #ifdef ASN_BER_TO_DER + if (length == 0 && ret == 0) { + idx = 0; + if ((ret = wc_BerToDer(pt, defSz, NULL, + (word32*)&length)) != LENGTH_ONLY_E) { + return ret; + } + } + #endif /* ASN_BER_TO_DER */ + pkcs7->stream->maxLen = length + idx; + } + } + + if (pkcs7->stream->maxLen == 0) { + pkcs7->stream->maxLen = defSz; + } + + return pkcs7->stream->maxLen; + } + + return defSz; +} + + +/* setter function for stored variables */ +static void wc_PKCS7_StreamStoreVar(PKCS7* pkcs7, word32 var1, int var2, + int var3) +{ + if (pkcs7 != NULL && pkcs7->stream != NULL) { + pkcs7->stream->varOne = var1; + pkcs7->stream->varTwo = var2; + pkcs7->stream->varThree = var3; + } +} + +/* getter function for stored variables */ +static void wc_PKCS7_StreamGetVar(PKCS7* pkcs7, word32* var1, int* var2, + int* var3) +{ + if (pkcs7 != NULL && pkcs7->stream != NULL) { + if (var1 != NULL) *var1 = pkcs7->stream->varOne; + if (var2 != NULL) *var2 = pkcs7->stream->varTwo; + if (var3 != NULL) *var3 = pkcs7->stream->varThree; + } +} + + +/* common update of index and total read after section complete + * returns 0 on success */ +static int wc_PKCS7_StreamEndCase(PKCS7* pkcs7, word32* tmpIdx, word32* idx) +{ + int ret = 0; + + if (pkcs7->stream->length > 0) { + if (pkcs7->stream->length < *idx) { + WOLFSSL_MSG("PKCS7 read too much data from internal buffer"); + ret = BUFFER_E; + } + else { + XMEMMOVE(pkcs7->stream->buffer, pkcs7->stream->buffer + *idx, + pkcs7->stream->length - *idx); + pkcs7->stream->length -= *idx; + } + } + else { + pkcs7->stream->totalRd += *idx - *tmpIdx; + pkcs7->stream->idx = *idx; /* adjust index into input buffer */ + *tmpIdx = *idx; + } + + return ret; +} +#endif /* NO_PKCS7_STREAM */ + +#ifdef WC_PKCS7_STREAM_DEBUG +/* used to print out human readable state for debugging */ +static const char* wc_PKCS7_GetStateName(int in) +{ + switch (in) { + case WC_PKCS7_START: return "WC_PKCS7_START"; + + case WC_PKCS7_STAGE2: return "WC_PKCS7_STAGE2"; + case WC_PKCS7_STAGE3: return "WC_PKCS7_STAGE3"; + case WC_PKCS7_STAGE4: return "WC_PKCS7_STAGE4"; + case WC_PKCS7_STAGE5: return "WC_PKCS7_STAGE5"; + case WC_PKCS7_STAGE6: return "WC_PKCS7_STAGE6"; + + /* parse info set */ + case WC_PKCS7_INFOSET_START: return "WC_PKCS7_INFOSET_START"; + case WC_PKCS7_INFOSET_BER: return "WC_PKCS7_INFOSET_BER"; + case WC_PKCS7_INFOSET_STAGE1: return "WC_PKCS7_INFOSET_STAGE1"; + case WC_PKCS7_INFOSET_STAGE2: return "WC_PKCS7_INFOSET_STAGE2"; + case WC_PKCS7_INFOSET_END: return "WC_PKCS7_INFOSET_END"; + + /* decode enveloped data */ + case WC_PKCS7_ENV_2: return "WC_PKCS7_ENV_2"; + case WC_PKCS7_ENV_3: return "WC_PKCS7_ENV_3"; + case WC_PKCS7_ENV_4: return "WC_PKCS7_ENV_4"; + case WC_PKCS7_ENV_5: return "WC_PKCS7_ENV_5"; + + /* decode auth enveloped */ + case WC_PKCS7_AUTHENV_2: return "WC_PKCS7_AUTHENV_2"; + case WC_PKCS7_AUTHENV_3: return "WC_PKCS7_AUTHENV_3"; + case WC_PKCS7_AUTHENV_4: return "WC_PKCS7_AUTHENV_4"; + case WC_PKCS7_AUTHENV_5: return "WC_PKCS7_AUTHENV_5"; + case WC_PKCS7_AUTHENV_6: return "WC_PKCS7_AUTHENV_6"; + case WC_PKCS7_AUTHENV_ATRB: return "WC_PKCS7_AUTHENV_ATRB"; + case WC_PKCS7_AUTHENV_ATRBEND: return "WC_PKCS7_AUTHENV_ATRBEND"; + case WC_PKCS7_AUTHENV_7: return "WC_PKCS7_AUTHENV_7"; + + /* decryption state types */ + case WC_PKCS7_DECRYPT_KTRI: return "WC_PKCS7_DECRYPT_KTRI"; + case WC_PKCS7_DECRYPT_KTRI_2: return "WC_PKCS7_DECRYPT_KTRI_2"; + case WC_PKCS7_DECRYPT_KTRI_3: return "WC_PKCS7_DECRYPT_KTRI_3"; + + case WC_PKCS7_DECRYPT_KARI: return "WC_PKCS7_DECRYPT_KARI"; + case WC_PKCS7_DECRYPT_KEKRI: return "WC_PKCS7_DECRYPT_KEKRI"; + case WC_PKCS7_DECRYPT_PWRI: return "WC_PKCS7_DECRYPT_PWRI"; + case WC_PKCS7_DECRYPT_ORI: return "WC_PKCS7_DECRYPT_ORI"; + case WC_PKCS7_DECRYPT_DONE: return "WC_PKCS7_DECRYPT_DONE"; + + case WC_PKCS7_VERIFY_STAGE2: return "WC_PKCS7_VERIFY_STAGE2"; + case WC_PKCS7_VERIFY_STAGE3: return "WC_PKCS7_VERIFY_STAGE3"; + case WC_PKCS7_VERIFY_STAGE4: return "WC_PKCS7_VERIFY_STAGE4"; + case WC_PKCS7_VERIFY_STAGE5: return "WC_PKCS7_VERIFY_STAGE5"; + case WC_PKCS7_VERIFY_STAGE6: return "WC_PKCS7_VERIFY_STAGE6"; + + default: + return "Unknown state"; + } +} +#endif + +/* Used to change the PKCS7 state. Having state change as a function allows + * for easier debugging */ +static void wc_PKCS7_ChangeState(PKCS7* pkcs7, int newState) +{ +#ifdef WC_PKCS7_STREAM_DEBUG + printf("\tChanging from state [%02d] %s to [%02d] %s\n", + pkcs7->state, wc_PKCS7_GetStateName(pkcs7->state), + newState, wc_PKCS7_GetStateName(newState)); +#endif + pkcs7->state = newState; +} + +#define MAX_PKCS7_DIGEST_SZ (MAX_SEQ_SZ + MAX_ALGO_SZ + \ + MAX_OCTET_STR_SZ + WC_MAX_DIGEST_SIZE) + + +/* placed ASN.1 contentType OID into *output, return idx on success, + * 0 upon failure */ +static int wc_SetContentType(int pkcs7TypeOID, byte* output, word32 outputSz) +{ + /* PKCS#7 content types, RFC 2315, section 14 */ + const byte pkcs7[] = { 0x2A, 0x86, 0x48, 0x86, 0xF7, + 0x0D, 0x01, 0x07 }; + const byte data[] = { 0x2A, 0x86, 0x48, 0x86, 0xF7, + 0x0D, 0x01, 0x07, 0x01 }; + const byte signedData[] = { 0x2A, 0x86, 0x48, 0x86, 0xF7, + 0x0D, 0x01, 0x07, 0x02}; + const byte envelopedData[] = { 0x2A, 0x86, 0x48, 0x86, 0xF7, + 0x0D, 0x01, 0x07, 0x03 }; + const byte authEnvelopedData[] = { 0x2A, 0x86, 0x48, 0x86, 0xF7, + 0x0D, 0x01, 0x09, 0x10, 0x01, 0x17}; + const byte signedAndEnveloped[] = { 0x2A, 0x86, 0x48, 0x86, 0xF7, + 0x0D, 0x01, 0x07, 0x04 }; + const byte digestedData[] = { 0x2A, 0x86, 0x48, 0x86, 0xF7, + 0x0D, 0x01, 0x07, 0x05 }; +#ifndef NO_PKCS7_ENCRYPTED_DATA + const byte encryptedData[] = { 0x2A, 0x86, 0x48, 0x86, 0xF7, + 0x0D, 0x01, 0x07, 0x06 }; +#endif + /* FirmwarePkgData (1.2.840.113549.1.9.16.1.16), RFC 4108 */ + const byte firmwarePkgData[] = { 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, + 0x01, 0x09, 0x10, 0x01, 0x10 }; +#if defined(HAVE_LIBZ) && !defined(NO_PKCS7_COMPRESSED_DATA) + /* id-ct-compressedData (1.2.840.113549.1.9.16.1.9), RFC 3274 */ + const byte compressedData[] = { 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, + 0x01, 0x09, 0x10, 0x01, 0x09 }; +#endif + +#if !defined(NO_PWDBASED) && !defined(NO_SHA) + const byte pwriKek[] = { 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, + 0x01, 0x09, 0x10, 0x03, 0x09 }; + const byte pbkdf2[] = { 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, + 0x01, 0x05, 0x0C }; +#endif + + int idSz, idx = 0; + word32 typeSz = 0; + const byte* typeName = 0; + byte ID_Length[MAX_LENGTH_SZ]; + + switch (pkcs7TypeOID) { + case PKCS7_MSG: + typeSz = sizeof(pkcs7); + typeName = pkcs7; + break; + + case DATA: + typeSz = sizeof(data); + typeName = data; + break; + + case SIGNED_DATA: + typeSz = sizeof(signedData); + typeName = signedData; + break; + + case ENVELOPED_DATA: + typeSz = sizeof(envelopedData); + typeName = envelopedData; + break; + + case AUTH_ENVELOPED_DATA: + typeSz = sizeof(authEnvelopedData); + typeName = authEnvelopedData; + break; + + case SIGNED_AND_ENVELOPED_DATA: + typeSz = sizeof(signedAndEnveloped); + typeName = signedAndEnveloped; + break; + + case DIGESTED_DATA: + typeSz = sizeof(digestedData); + typeName = digestedData; + break; + +#ifndef NO_PKCS7_ENCRYPTED_DATA + case ENCRYPTED_DATA: + typeSz = sizeof(encryptedData); + typeName = encryptedData; + break; +#endif +#if defined(HAVE_LIBZ) && !defined(NO_PKCS7_COMPRESSED_DATA) + case COMPRESSED_DATA: + typeSz = sizeof(compressedData); + typeName = compressedData; + break; +#endif + case FIRMWARE_PKG_DATA: + typeSz = sizeof(firmwarePkgData); + typeName = firmwarePkgData; + break; + +#if !defined(NO_PWDBASED) && !defined(NO_SHA) + case PWRI_KEK_WRAP: + typeSz = sizeof(pwriKek); + typeName = pwriKek; + break; + + case PBKDF2_OID: + typeSz = sizeof(pbkdf2); + typeName = pbkdf2; + break; +#endif + + default: + WOLFSSL_MSG("Unknown PKCS#7 Type"); + return 0; + }; + + if (outputSz < (MAX_LENGTH_SZ + 1 + typeSz)) { + WOLFSSL_MSG("CMS content type buffer too small"); + return BAD_FUNC_ARG; + } + + idSz = SetLength(typeSz, ID_Length); + output[idx++] = ASN_OBJECT_ID; + XMEMCPY(output + idx, ID_Length, idSz); + idx += idSz; + XMEMCPY(output + idx, typeName, typeSz); + idx += typeSz; + + return idx; +} + + +/* get ASN.1 contentType OID sum, return 0 on success, <0 on failure */ +static int wc_GetContentType(const byte* input, word32* inOutIdx, word32* oid, + word32 maxIdx) +{ + WOLFSSL_ENTER("wc_GetContentType"); + if (GetObjectId(input, inOutIdx, oid, oidIgnoreType, maxIdx) < 0) + return ASN_PARSE_E; + + return 0; +} + + +/* return block size for algorithm represented by oid, or <0 on error */ +static int wc_PKCS7_GetOIDBlockSize(int oid) +{ + int blockSz; + + switch (oid) { +#ifndef NO_AES + #ifdef WOLFSSL_AES_128 + case AES128CBCb: + case AES128GCMb: + case AES128CCMb: + #endif + #ifdef WOLFSSL_AES_192 + case AES192CBCb: + case AES192GCMb: + case AES192CCMb: + #endif + #ifdef WOLFSSL_AES_256 + case AES256CBCb: + case AES256GCMb: + case AES256CCMb: + #endif + blockSz = AES_BLOCK_SIZE; + break; +#endif +#ifndef NO_DES3 + case DESb: + case DES3b: + blockSz = DES_BLOCK_SIZE; + break; +#endif + default: + WOLFSSL_MSG("Unsupported content cipher type"); + return ALGO_ID_E; + }; + + return blockSz; +} + + +/* get key size for algorithm represented by oid, or <0 on error */ +static int wc_PKCS7_GetOIDKeySize(int oid) +{ + int blockKeySz; + + switch (oid) { +#ifndef NO_AES + #ifdef WOLFSSL_AES_128 + case AES128CBCb: + case AES128GCMb: + case AES128CCMb: + case AES128_WRAP: + blockKeySz = 16; + break; + #endif + #ifdef WOLFSSL_AES_192 + case AES192CBCb: + case AES192GCMb: + case AES192CCMb: + case AES192_WRAP: + blockKeySz = 24; + break; + #endif + #ifdef WOLFSSL_AES_256 + case AES256CBCb: + case AES256GCMb: + case AES256CCMb: + case AES256_WRAP: + blockKeySz = 32; + break; + #endif +#endif +#ifndef NO_DES3 + case DESb: + blockKeySz = DES_KEYLEN; + break; + + case DES3b: + blockKeySz = DES3_KEYLEN; + break; +#endif + default: + WOLFSSL_MSG("Unsupported content cipher type"); + return ALGO_ID_E; + }; + + return blockKeySz; +} + + +PKCS7* wc_PKCS7_New(void* heap, int devId) +{ + PKCS7* pkcs7 = (PKCS7*)XMALLOC(sizeof(PKCS7), heap, DYNAMIC_TYPE_PKCS7); + if (pkcs7) { + XMEMSET(pkcs7, 0, sizeof(PKCS7)); + if (wc_PKCS7_Init(pkcs7, heap, devId) == 0) { + pkcs7->isDynamic = 1; + } + else { + XFREE(pkcs7, heap, DYNAMIC_TYPE_PKCS7); + pkcs7 = NULL; + } + } + return pkcs7; +} + +/* This is to initialize a PKCS7 structure. It sets all values to 0 and can be + * used to set the heap hint. + * + * pkcs7 PKCS7 structure to initialize + * heap memory heap hint for PKCS7 structure to use + * devId currently not used but a place holder for async operations + * + * returns 0 on success or a negative value for failure + */ +int wc_PKCS7_Init(PKCS7* pkcs7, void* heap, int devId) +{ + word16 isDynamic; + + WOLFSSL_ENTER("wc_PKCS7_Init"); + + if (pkcs7 == NULL) { + return BAD_FUNC_ARG; + } + + isDynamic = pkcs7->isDynamic; + XMEMSET(pkcs7, 0, sizeof(PKCS7)); + pkcs7->isDynamic = isDynamic; +#ifdef WOLFSSL_HEAP_TEST + pkcs7->heap = (void*)WOLFSSL_HEAP_TEST; +#else + pkcs7->heap = heap; +#endif + pkcs7->devId = devId; + + return 0; +} + + +/* Certificate structure holding der pointer, size, and pointer to next + * Pkcs7Cert struct. Used when creating SignedData types with multiple + * certificates. */ +struct Pkcs7Cert { + byte* der; + word32 derSz; + Pkcs7Cert* next; +}; + + +/* Linked list of ASN.1 encoded RecipientInfos */ +struct Pkcs7EncodedRecip { + byte recip[MAX_RECIP_SZ]; + word32 recipSz; + int recipType; + int recipVersion; + Pkcs7EncodedRecip* next; +}; + + +/* free all members of Pkcs7Cert linked list */ +static void wc_PKCS7_FreeCertSet(PKCS7* pkcs7) +{ + Pkcs7Cert* curr = NULL; + Pkcs7Cert* next = NULL; + + if (pkcs7 == NULL) + return; + + curr = pkcs7->certList; + pkcs7->certList = NULL; + + while (curr != NULL) { + next = curr->next; + curr->next = NULL; + XFREE(curr, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + curr = next; + } + + return; +} + + +/* Get total size of all recipients in recipient list. + * + * Returns total size of recipients, or negative upon error */ +static int wc_PKCS7_GetRecipientListSize(PKCS7* pkcs7) +{ + int totalSz = 0; + Pkcs7EncodedRecip* tmp = NULL; + + if (pkcs7 == NULL) + return BAD_FUNC_ARG; + + tmp = pkcs7->recipList; + + while (tmp != NULL) { + totalSz += tmp->recipSz; + tmp = tmp->next; + } + + return totalSz; +} + + +/* free all members of Pkcs7EncodedRecip linked list */ +static void wc_PKCS7_FreeEncodedRecipientSet(PKCS7* pkcs7) +{ + Pkcs7EncodedRecip* curr = NULL; + Pkcs7EncodedRecip* next = NULL; + + if (pkcs7 == NULL) + return; + + curr = pkcs7->recipList; + pkcs7->recipList = NULL; + + while (curr != NULL) { + next = curr->next; + curr->next = NULL; + XFREE(curr, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + curr = next; + } + + return; +} + + +/* search through RecipientInfo list for specific type. + * return 1 if ANY recipient of type specified is present, otherwise + * return 0 */ +static int wc_PKCS7_RecipientListIncludesType(PKCS7* pkcs7, int type) +{ + Pkcs7EncodedRecip* tmp = NULL; + + if (pkcs7 == NULL) + return BAD_FUNC_ARG; + + tmp = pkcs7->recipList; + + while (tmp != NULL) { + if (tmp->recipType == type) + return 1; + + tmp = tmp->next; + } + + return 0; +} + + +/* searches through RecipientInfo list, returns 1 if all structure + * versions are set to 0, otherwise returns 0 */ +static int wc_PKCS7_RecipientListVersionsAllZero(PKCS7* pkcs7) +{ + Pkcs7EncodedRecip* tmp = NULL; + + if (pkcs7 == NULL) + return BAD_FUNC_ARG; + + tmp = pkcs7->recipList; + + while (tmp != NULL) { + if (tmp->recipVersion != 0) + return 0; + + tmp = tmp->next; + } + + return 1; +} + + +/* Init PKCS7 struct with recipient cert, decode into DecodedCert + * NOTE: keeps previously set pkcs7 heap hint, devId and isDynamic */ +int wc_PKCS7_InitWithCert(PKCS7* pkcs7, byte* derCert, word32 derCertSz) +{ + int ret = 0; + void* heap; + int devId; + Pkcs7Cert* cert; + Pkcs7Cert* lastCert; + + if (pkcs7 == NULL || (derCert == NULL && derCertSz != 0)) { + return BAD_FUNC_ARG; + } + + heap = pkcs7->heap; + devId = pkcs7->devId; + cert = pkcs7->certList; + ret = wc_PKCS7_Init(pkcs7, heap, devId); + if (ret != 0) + return ret; + pkcs7->certList = cert; + + if (derCert != NULL && derCertSz > 0) { +#ifdef WOLFSSL_SMALL_STACK + DecodedCert* dCert; + + dCert = (DecodedCert*)XMALLOC(sizeof(DecodedCert), pkcs7->heap, + DYNAMIC_TYPE_DCERT); + if (dCert == NULL) + return MEMORY_E; +#else + DecodedCert dCert[1]; +#endif + + pkcs7->singleCert = derCert; + pkcs7->singleCertSz = derCertSz; + pkcs7->cert[0] = derCert; + pkcs7->certSz[0] = derCertSz; + + /* create new Pkcs7Cert for recipient, freed during cleanup */ + cert = (Pkcs7Cert*)XMALLOC(sizeof(Pkcs7Cert), pkcs7->heap, + DYNAMIC_TYPE_PKCS7); + XMEMSET(cert, 0, sizeof(Pkcs7Cert)); + cert->der = derCert; + cert->derSz = derCertSz; + cert->next = NULL; + + /* free existing cert list if existing */ + wc_PKCS7_FreeCertSet(pkcs7); + + /* add cert to list */ + if (pkcs7->certList == NULL) { + pkcs7->certList = cert; + } else { + lastCert = pkcs7->certList; + while (lastCert->next != NULL) { + lastCert = lastCert->next; + } + lastCert->next = cert; + } + + InitDecodedCert(dCert, derCert, derCertSz, pkcs7->heap); + ret = ParseCert(dCert, CA_TYPE, NO_VERIFY, 0); + if (ret < 0) { + FreeDecodedCert(dCert); +#ifdef WOLFSSL_SMALL_STACK + XFREE(dCert, pkcs7->heap, DYNAMIC_TYPE_DCERT); +#endif + return ret; + } + + XMEMCPY(pkcs7->publicKey, dCert->publicKey, dCert->pubKeySize); + pkcs7->publicKeySz = dCert->pubKeySize; + pkcs7->publicKeyOID = dCert->keyOID; + XMEMCPY(pkcs7->issuerHash, dCert->issuerHash, KEYID_SIZE); + pkcs7->issuer = dCert->issuerRaw; + pkcs7->issuerSz = dCert->issuerRawLen; + XMEMCPY(pkcs7->issuerSn, dCert->serial, dCert->serialSz); + pkcs7->issuerSnSz = dCert->serialSz; + XMEMCPY(pkcs7->issuerSubjKeyId, dCert->extSubjKeyId, KEYID_SIZE); + + /* default to IssuerAndSerialNumber for SignerIdentifier */ + pkcs7->sidType = CMS_ISSUER_AND_SERIAL_NUMBER; + + /* free existing recipient list if existing */ + wc_PKCS7_FreeEncodedRecipientSet(pkcs7); + + FreeDecodedCert(dCert); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(dCert, pkcs7->heap, DYNAMIC_TYPE_DCERT); +#endif + } + + return ret; +} + + +/* Adds one DER-formatted certificate to the internal PKCS7/CMS certificate + * list, to be added as part of the certificates CertificateSet. Currently + * used in SignedData content type. + * + * Must be called after wc_PKCS7_Init() or wc_PKCS7_InitWithCert(). + * + * Does not represent the recipient/signer certificate, only certificates that + * are part of the certificate chain used to build and verify signer + * certificates. + * + * This API does not currently validate certificates. + * + * Returns 0 on success, negative upon error */ +int wc_PKCS7_AddCertificate(PKCS7* pkcs7, byte* derCert, word32 derCertSz) +{ + Pkcs7Cert* cert; + + if (pkcs7 == NULL || derCert == NULL || derCertSz == 0) + return BAD_FUNC_ARG; + + cert = (Pkcs7Cert*)XMALLOC(sizeof(Pkcs7Cert), pkcs7->heap, + DYNAMIC_TYPE_PKCS7); + if (cert == NULL) + return MEMORY_E; + + cert->der = derCert; + cert->derSz = derCertSz; + + if (pkcs7->certList == NULL) { + pkcs7->certList = cert; + } else { + cert->next = pkcs7->certList; + pkcs7->certList = cert; + } + + return 0; +} + + +/* free linked list of PKCS7DecodedAttrib structs */ +static void wc_PKCS7_FreeDecodedAttrib(PKCS7DecodedAttrib* attrib, void* heap) +{ + PKCS7DecodedAttrib* current; + + if (attrib == NULL) { + return; + } + + current = attrib; + while (current != NULL) { + PKCS7DecodedAttrib* next = current->next; + if (current->oid != NULL) { + XFREE(current->oid, heap, DYNAMIC_TYPE_PKCS7); + } + if (current->value != NULL) { + XFREE(current->value, heap, DYNAMIC_TYPE_PKCS7); + } + XFREE(current, heap, DYNAMIC_TYPE_PKCS7); + current = next; + } + + (void)heap; +} + + +/* return 0 on success */ +static int wc_PKCS7_SignerInfoNew(PKCS7* pkcs7) +{ + if (pkcs7->signerInfo != NULL) { + XFREE(pkcs7->signerInfo, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + pkcs7->signerInfo = NULL; + } + + pkcs7->signerInfo = (PKCS7SignerInfo*)XMALLOC(sizeof(PKCS7SignerInfo), + pkcs7->heap, DYNAMIC_TYPE_PKCS7); + if (pkcs7->signerInfo == NULL) { + WOLFSSL_MSG("Unable to malloc memory for signer info"); + return MEMORY_E; + } + XMEMSET(pkcs7->signerInfo, 0, sizeof(PKCS7SignerInfo)); + return 0; +} + + +static void wc_PKCS7_SignerInfoFree(PKCS7* pkcs7) +{ + if (pkcs7->signerInfo != NULL) { + if (pkcs7->signerInfo->sid != NULL) { + XFREE(pkcs7->signerInfo->sid, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + pkcs7->signerInfo->sid = NULL; + } + XFREE(pkcs7->signerInfo, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + pkcs7->signerInfo = NULL; + } +} + + +/* free's any current SID and sets it to "in" + * returns 0 on success + */ +static int wc_PKCS7_SignerInfoSetSID(PKCS7* pkcs7, byte* in, int inSz) +{ + if (pkcs7 == NULL || in == NULL || inSz < 0) { + return BAD_FUNC_ARG; + } + + if (pkcs7->signerInfo->sid != NULL) { + XFREE(pkcs7->signerInfo->sid, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + pkcs7->signerInfo->sid = NULL; + } + pkcs7->signerInfo->sid = (byte*)XMALLOC(inSz, pkcs7->heap, + DYNAMIC_TYPE_PKCS7); + if (pkcs7->signerInfo->sid == NULL) { + return MEMORY_E; + } + XMEMCPY(pkcs7->signerInfo->sid, in, inSz); + pkcs7->signerInfo->sidSz = inSz; + return 0; +} + + +/* releases any memory allocated by a PKCS7 initializer */ +void wc_PKCS7_Free(PKCS7* pkcs7) +{ + if (pkcs7 == NULL) + return; + +#ifndef NO_PKCS7_STREAM + wc_PKCS7_FreeStream(pkcs7); +#endif + + wc_PKCS7_SignerInfoFree(pkcs7); + wc_PKCS7_FreeDecodedAttrib(pkcs7->decodedAttrib, pkcs7->heap); + wc_PKCS7_FreeCertSet(pkcs7); + +#ifdef ASN_BER_TO_DER + if (pkcs7->der != NULL) + XFREE(pkcs7->der, pkcs7->heap, DYNAMIC_TYPE_PKCS7); +#endif + if (pkcs7->contentDynamic != NULL) + XFREE(pkcs7->contentDynamic, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + + if (pkcs7->cek != NULL) { + ForceZero(pkcs7->cek, pkcs7->cekSz); + XFREE(pkcs7->cek, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + } + + pkcs7->contentTypeSz = 0; + + if (pkcs7->signature) { + XFREE(pkcs7->signature, pkcs7->heap, DYNAMIC_TYPE_SIGNATURE); + pkcs7->signature = NULL; + pkcs7->signatureSz = 0; + } + if (pkcs7->plainDigest) { + XFREE(pkcs7->plainDigest, pkcs7->heap, DYNAMIC_TYPE_DIGEST); + pkcs7->plainDigest = NULL; + pkcs7->plainDigestSz = 0; + } + if (pkcs7->pkcs7Digest) { + XFREE(pkcs7->pkcs7Digest, pkcs7->heap, DYNAMIC_TYPE_DIGEST); + pkcs7->pkcs7Digest = NULL; + pkcs7->pkcs7DigestSz = 0; + } + if (pkcs7->cachedEncryptedContent != NULL) { + XFREE(pkcs7->cachedEncryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + pkcs7->cachedEncryptedContent = NULL; + pkcs7->cachedEncryptedContentSz = 0; + } + + if (pkcs7->isDynamic) { + pkcs7->isDynamic = 0; + XFREE(pkcs7, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + } +} + + +/* helper function for parsing through attributes and finding a specific one. + * returns PKCS7DecodedAttrib pointer on success */ +static PKCS7DecodedAttrib* findAttrib(PKCS7* pkcs7, const byte* oid, word32 oidSz) +{ + PKCS7DecodedAttrib* list; + + if (pkcs7 == NULL || oid == NULL) { + return NULL; + } + + /* search attributes for pkiStatus */ + list = pkcs7->decodedAttrib; + while (list != NULL) { + word32 sz = oidSz; + word32 idx = 0; + int length = 0; + byte tag; + + if (GetASNTag(list->oid, &idx, &tag, list->oidSz) < 0) { + return NULL; + } + if (tag != ASN_OBJECT_ID) { + WOLFSSL_MSG("Bad attribute ASN1 syntax"); + return NULL; + } + + if (GetLength(list->oid, &idx, &length, list->oidSz) < 0) { + WOLFSSL_MSG("Bad attribute length"); + return NULL; + } + + sz = (sz < (word32)length)? sz : (word32)length; + if (XMEMCMP(oid, list->oid + idx, sz) == 0) { + return list; + } + list = list->next; + } + return NULL; +} + + +/* Searches through decoded attributes and returns the value for the first one + * matching the oid passed in. Note that this value includes the leading ASN1 + * syntax. So for a printable string of "3" this would be something like + * + * 0x13, 0x01, 0x33 + * ID SIZE "3" + * + * pkcs7 structure to get value from + * oid OID value to search for with attributes + * oidSz size of oid buffer + * out buffer to hold result + * outSz size of out buffer (if out is NULL this is set to needed size and + LENGTH_ONLY_E is returned) + * + * returns size of value on success + */ +int wc_PKCS7_GetAttributeValue(PKCS7* pkcs7, const byte* oid, word32 oidSz, + byte* out, word32* outSz) +{ + PKCS7DecodedAttrib* attrib; + + if (pkcs7 == NULL || oid == NULL || outSz == NULL) { + return BAD_FUNC_ARG; + } + + attrib = findAttrib(pkcs7, oid, oidSz); + if (attrib == NULL) { + return ASN_PARSE_E; + } + + if (out == NULL) { + *outSz = attrib->valueSz; + return LENGTH_ONLY_E; + } + + if (*outSz < attrib->valueSz) { + return BUFFER_E; + } + + XMEMCPY(out, attrib->value, attrib->valueSz); + return attrib->valueSz; +} + + +/* build PKCS#7 data content type */ +int wc_PKCS7_EncodeData(PKCS7* pkcs7, byte* output, word32 outputSz) +{ + static const byte oid[] = + { ASN_OBJECT_ID, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, + 0x07, 0x01 }; + byte seq[MAX_SEQ_SZ]; + byte octetStr[MAX_OCTET_STR_SZ]; + word32 seqSz; + word32 octetStrSz; + word32 oidSz = (word32)sizeof(oid); + int idx = 0; + + if (pkcs7 == NULL || output == NULL) { + return BAD_FUNC_ARG; + } + + octetStrSz = SetOctetString(pkcs7->contentSz, octetStr); + seqSz = SetSequence(pkcs7->contentSz + octetStrSz + oidSz, seq); + + if (outputSz < pkcs7->contentSz + octetStrSz + oidSz + seqSz) + return BUFFER_E; + + XMEMCPY(output, seq, seqSz); + idx += seqSz; + XMEMCPY(output + idx, oid, oidSz); + idx += oidSz; + XMEMCPY(output + idx, octetStr, octetStrSz); + idx += octetStrSz; + XMEMCPY(output + idx, pkcs7->content, pkcs7->contentSz); + idx += pkcs7->contentSz; + + return idx; +} + + +typedef struct EncodedAttrib { + byte valueSeq[MAX_SEQ_SZ]; + const byte* oid; + byte valueSet[MAX_SET_SZ]; + const byte* value; + word32 valueSeqSz, oidSz, idSz, valueSetSz, valueSz, totalSz; +} EncodedAttrib; + + +typedef struct ESD { + wc_HashAlg hash; + enum wc_HashType hashType; + byte contentDigest[WC_MAX_DIGEST_SIZE + 2]; /* content only + ASN.1 heading */ + byte contentAttribsDigest[WC_MAX_DIGEST_SIZE]; + byte encContentDigest[MAX_ENCRYPTED_KEY_SZ]; + + byte outerSeq[MAX_SEQ_SZ]; + byte outerContent[MAX_EXP_SZ]; + byte innerSeq[MAX_SEQ_SZ]; + byte version[MAX_VERSION_SZ]; + byte digAlgoIdSet[MAX_SET_SZ]; + byte singleDigAlgoId[MAX_ALGO_SZ]; + + byte contentInfoSeq[MAX_SEQ_SZ]; + byte innerContSeq[MAX_EXP_SZ]; + byte innerOctets[MAX_OCTET_STR_SZ]; + + byte certsSet[MAX_SET_SZ]; + + byte signerInfoSet[MAX_SET_SZ]; + byte signerInfoSeq[MAX_SEQ_SZ]; + byte signerVersion[MAX_VERSION_SZ]; + /* issuerAndSerialNumber ...*/ + byte issuerSnSeq[MAX_SEQ_SZ]; + byte issuerName[MAX_SEQ_SZ]; + byte issuerSn[MAX_SN_SZ]; + /* OR subjectKeyIdentifier */ + byte issuerSKIDSeq[MAX_SEQ_SZ]; + byte issuerSKID[MAX_OCTET_STR_SZ]; + byte signerDigAlgoId[MAX_ALGO_SZ]; + byte digEncAlgoId[MAX_ALGO_SZ]; + byte signedAttribSet[MAX_SET_SZ]; + EncodedAttrib signedAttribs[7]; + byte signerDigest[MAX_OCTET_STR_SZ]; + word32 innerOctetsSz, innerContSeqSz, contentInfoSeqSz; + word32 outerSeqSz, outerContentSz, innerSeqSz, versionSz, digAlgoIdSetSz, + singleDigAlgoIdSz, certsSetSz; + word32 signerInfoSetSz, signerInfoSeqSz, signerVersionSz, + issuerSnSeqSz, issuerNameSz, issuerSnSz, issuerSKIDSz, + issuerSKIDSeqSz, signerDigAlgoIdSz, digEncAlgoIdSz, signerDigestSz; + word32 encContentDigestSz, signedAttribsSz, signedAttribsCount, + signedAttribSetSz; +} ESD; + + +static int EncodeAttributes(EncodedAttrib* ea, int eaSz, + PKCS7Attrib* attribs, int attribsSz) +{ + int i; + int maxSz = min(eaSz, attribsSz); + int allAttribsSz = 0; + + for (i = 0; i < maxSz; i++) + { + int attribSz = 0; + + ea[i].value = attribs[i].value; + ea[i].valueSz = attribs[i].valueSz; + attribSz += ea[i].valueSz; + ea[i].valueSetSz = SetSet(attribSz, ea[i].valueSet); + attribSz += ea[i].valueSetSz; + ea[i].oid = attribs[i].oid; + ea[i].oidSz = attribs[i].oidSz; + attribSz += ea[i].oidSz; + ea[i].valueSeqSz = SetSequence(attribSz, ea[i].valueSeq); + attribSz += ea[i].valueSeqSz; + ea[i].totalSz = attribSz; + + allAttribsSz += attribSz; + } + return allAttribsSz; +} + + +typedef struct FlatAttrib { + byte* data; + word32 dataSz; +} FlatAttrib; + +/* Returns a pointer to FlatAttrib whose members are initialized to 0. +* Caller is expected to free. +*/ +static FlatAttrib* NewAttrib(void* heap) +{ + FlatAttrib* fb = (FlatAttrib*) XMALLOC(sizeof(FlatAttrib), heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (fb != NULL) { + ForceZero(fb, sizeof(FlatAttrib)); + } + (void)heap; + return fb; +} + +/* Free FlatAttrib array and memory allocated to internal struct members */ +static void FreeAttribArray(PKCS7* pkcs7, FlatAttrib** arr, int rows) +{ + int i; + + if (arr) { + for (i = 0; i < rows; i++) { + if (arr[i]) { + if (arr[i]->data) { + ForceZero(arr[i]->data, arr[i]->dataSz); + XFREE(arr[i]->data, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + } + ForceZero(arr[i], sizeof(FlatAttrib)); + XFREE(arr[i], pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + } + } + ForceZero(arr, rows); + XFREE(arr, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + } + (void)pkcs7; +} + + +/* Sort FlatAttrib array in ascending order */ +static int SortAttribArray(FlatAttrib** arr, int rows) +{ + int i, j; + word32 minSz, minIdx; + FlatAttrib* a = NULL; + FlatAttrib* b = NULL; + FlatAttrib* tmp = NULL; + + if (arr == NULL) { + return BAD_FUNC_ARG; + } + + for (i = 0; i < rows; i++) { + a = arr[i]; + minSz = a->dataSz; + minIdx = i; + for (j = i+1; j < rows; j++) { + b = arr[j]; + if (b->dataSz < minSz) { + minSz = b->dataSz; + minIdx = j; + } + } + if (minSz < a->dataSz) { + /* swap array positions */ + tmp = arr[i]; + arr[i] = arr[minIdx]; + arr[minIdx] = tmp; + } + } + + return 0; +} + + +/* Build up array of FlatAttrib structs from EncodedAttrib ones. FlatAttrib + * holds flattened DER encoding of each attribute */ +static int FlattenEncodedAttribs(PKCS7* pkcs7, FlatAttrib** derArr, int rows, + EncodedAttrib* ea, int eaSz) +{ + int i, idx, sz; + byte* output = NULL; + FlatAttrib* fa = NULL; + + if (pkcs7 == NULL || derArr == NULL || ea == NULL) { + WOLFSSL_MSG("Invalid arguments to FlattenEncodedAttribs"); + return BAD_FUNC_ARG; + } + + if (rows != eaSz) { + WOLFSSL_MSG("DER array not large enough to hold attribute count"); + return BAD_FUNC_ARG; + } + + for (i = 0; i < eaSz; i++) { + sz = ea[i].valueSeqSz + ea[i].oidSz + ea[i].valueSetSz + ea[i].valueSz; + + output = (byte*)XMALLOC(sz, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (output == NULL) { + return MEMORY_E; + } + + idx = 0; + XMEMCPY(output + idx, ea[i].valueSeq, ea[i].valueSeqSz); + idx += ea[i].valueSeqSz; + XMEMCPY(output + idx, ea[i].oid, ea[i].oidSz); + idx += ea[i].oidSz; + XMEMCPY(output + idx, ea[i].valueSet, ea[i].valueSetSz); + idx += ea[i].valueSetSz; + XMEMCPY(output + idx, ea[i].value, ea[i].valueSz); + + fa = derArr[i]; + fa->data = output; + fa->dataSz = sz; + } + + return 0; +} + + +/* Sort and Flatten EncodedAttrib attributes into output buffer */ +static int FlattenAttributes(PKCS7* pkcs7, byte* output, EncodedAttrib* ea, + int eaSz) +{ + int i, idx, ret; + FlatAttrib** derArr = NULL; + FlatAttrib* fa = NULL; + + if (pkcs7 == NULL || output == NULL || ea == NULL) { + return BAD_FUNC_ARG; + } + + /* create array of FlatAttrib struct pointers to hold DER attribs */ + derArr = (FlatAttrib**) XMALLOC(eaSz * sizeof(FlatAttrib*), pkcs7->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (derArr == NULL) { + return MEMORY_E; + } + XMEMSET(derArr, 0, eaSz * sizeof(FlatAttrib*)); + + for (i = 0; i < eaSz; i++) { + derArr[i] = NewAttrib(pkcs7->heap); + if (derArr[i] == NULL) { + FreeAttribArray(pkcs7, derArr, eaSz); + return MEMORY_E; + } + ForceZero(derArr[i], sizeof(FlatAttrib)); + } + + /* flatten EncodedAttrib into DER byte arrays */ + ret = FlattenEncodedAttribs(pkcs7, derArr, eaSz, ea, eaSz); + if (ret != 0) { + FreeAttribArray(pkcs7, derArr, eaSz); + return ret; + } + + /* SET OF DER signed attributes must be sorted in ascending order */ + ret = SortAttribArray(derArr, eaSz); + if (ret != 0) { + FreeAttribArray(pkcs7, derArr, eaSz); + return ret; + } + + /* copy sorted DER attribute arrays into output buffer */ + idx = 0; + for (i = 0; i < eaSz; i++) { + fa = derArr[i]; + XMEMCPY(output + idx, fa->data, fa->dataSz); + idx += fa->dataSz; + } + + FreeAttribArray(pkcs7, derArr, eaSz); + + return 0; +} + + +#ifndef NO_RSA + +/* returns size of signature put into out, negative on error */ +static int wc_PKCS7_RsaSign(PKCS7* pkcs7, byte* in, word32 inSz, ESD* esd) +{ + int ret; + word32 idx; +#ifdef WOLFSSL_SMALL_STACK + RsaKey* privKey; +#else + RsaKey privKey[1]; +#endif + + if (pkcs7 == NULL || pkcs7->rng == NULL || in == NULL || esd == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef WOLFSSL_SMALL_STACK + privKey = (RsaKey*)XMALLOC(sizeof(RsaKey), pkcs7->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (privKey == NULL) + return MEMORY_E; +#endif + + ret = wc_InitRsaKey_ex(privKey, pkcs7->heap, pkcs7->devId); + if (ret == 0) { + if (pkcs7->privateKey != NULL && pkcs7->privateKeySz > 0) { + idx = 0; + ret = wc_RsaPrivateKeyDecode(pkcs7->privateKey, &idx, privKey, + pkcs7->privateKeySz); + } + else if (pkcs7->devId == INVALID_DEVID) { + ret = BAD_FUNC_ARG; + } + } + if (ret == 0) { + #ifdef WOLFSSL_ASYNC_CRYPT + do { + ret = wc_AsyncWait(ret, &privKey->asyncDev, + WC_ASYNC_FLAG_CALL_AGAIN); + if (ret >= 0) + #endif + { + ret = wc_RsaSSL_Sign(in, inSz, esd->encContentDigest, + sizeof(esd->encContentDigest), + privKey, pkcs7->rng); + } + #ifdef WOLFSSL_ASYNC_CRYPT + } while (ret == WC_PENDING_E); + #endif + } + + wc_FreeRsaKey(privKey); +#ifdef WOLFSSL_SMALL_STACK + XFREE(privKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return ret; +} + +#endif /* NO_RSA */ + + +#ifdef HAVE_ECC + +/* returns size of signature put into out, negative on error */ +static int wc_PKCS7_EcdsaSign(PKCS7* pkcs7, byte* in, word32 inSz, ESD* esd) +{ + int ret; + word32 outSz, idx; +#ifdef WOLFSSL_SMALL_STACK + ecc_key* privKey; +#else + ecc_key privKey[1]; +#endif + + if (pkcs7 == NULL || pkcs7->rng == NULL || in == NULL || esd == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef WOLFSSL_SMALL_STACK + privKey = (ecc_key*)XMALLOC(sizeof(ecc_key), pkcs7->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (privKey == NULL) + return MEMORY_E; +#endif + + ret = wc_ecc_init_ex(privKey, pkcs7->heap, pkcs7->devId); + if (ret == 0) { + if (pkcs7->privateKey != NULL && pkcs7->privateKeySz > 0) { + idx = 0; + ret = wc_EccPrivateKeyDecode(pkcs7->privateKey, &idx, privKey, + pkcs7->privateKeySz); + } + else if (pkcs7->devId == INVALID_DEVID) { + ret = BAD_FUNC_ARG; + } + } + if (ret == 0) { + outSz = sizeof(esd->encContentDigest); + #ifdef WOLFSSL_ASYNC_CRYPT + do { + ret = wc_AsyncWait(ret, &privKey->asyncDev, + WC_ASYNC_FLAG_CALL_AGAIN); + if (ret >= 0) + #endif + { + ret = wc_ecc_sign_hash(in, inSz, esd->encContentDigest, + &outSz, pkcs7->rng, privKey); + } + #ifdef WOLFSSL_ASYNC_CRYPT + } while (ret == WC_PENDING_E); + #endif + if (ret == 0) + ret = (int)outSz; + } + + wc_ecc_free(privKey); +#ifdef WOLFSSL_SMALL_STACK + XFREE(privKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return ret; +} + +#endif /* HAVE_ECC */ + + +/* builds up SignedData signed attributes, including default ones. + * + * pkcs7 - pointer to initialized PKCS7 structure + * esd - pointer to initialized ESD structure, used for output + * + * return 0 on success, negative on error */ +static int wc_PKCS7_BuildSignedAttributes(PKCS7* pkcs7, ESD* esd, + const byte* contentType, word32 contentTypeSz, + const byte* contentTypeOid, word32 contentTypeOidSz, + const byte* messageDigestOid, word32 messageDigestOidSz, + const byte* signingTimeOid, word32 signingTimeOidSz, + byte* signingTime, word32 signingTimeSz) +{ + int hashSz; +#ifdef NO_ASN_TIME + PKCS7Attrib cannedAttribs[2]; +#else + time_t tm; + int timeSz; + PKCS7Attrib cannedAttribs[3]; +#endif + word32 idx = 0; + word32 cannedAttribsCount; + + if (pkcs7 == NULL || esd == NULL || contentType == NULL || + contentTypeOid == NULL || messageDigestOid == NULL || + signingTimeOid == NULL) { + return BAD_FUNC_ARG; + } + + if (pkcs7->skipDefaultSignedAttribs == 0) { + hashSz = wc_HashGetDigestSize(esd->hashType); + if (hashSz < 0) + return hashSz; + + #ifndef NO_ASN_TIME + if (signingTime == NULL || signingTimeSz == 0) + return BAD_FUNC_ARG; + + tm = XTIME(0); + timeSz = GetAsnTimeString(&tm, signingTime, signingTimeSz); + if (timeSz < 0) + return timeSz; + #endif + + cannedAttribsCount = sizeof(cannedAttribs)/sizeof(PKCS7Attrib); + + cannedAttribs[idx].oid = contentTypeOid; + cannedAttribs[idx].oidSz = contentTypeOidSz; + cannedAttribs[idx].value = contentType; + cannedAttribs[idx].valueSz = contentTypeSz; + idx++; + #ifndef NO_ASN_TIME + cannedAttribs[idx].oid = signingTimeOid; + cannedAttribs[idx].oidSz = signingTimeOidSz; + cannedAttribs[idx].value = signingTime; + cannedAttribs[idx].valueSz = timeSz; + idx++; + #endif + cannedAttribs[idx].oid = messageDigestOid; + cannedAttribs[idx].oidSz = messageDigestOidSz; + cannedAttribs[idx].value = esd->contentDigest; + cannedAttribs[idx].valueSz = hashSz + 2; /* ASN.1 heading */ + + esd->signedAttribsCount += cannedAttribsCount; + esd->signedAttribsSz += EncodeAttributes(&esd->signedAttribs[0], 3, + cannedAttribs, cannedAttribsCount); + } else { + esd->signedAttribsCount = 0; + esd->signedAttribsSz = 0; + } + + /* add custom signed attributes if set */ + if (pkcs7->signedAttribsSz > 0 && pkcs7->signedAttribs != NULL) { + esd->signedAttribsCount += pkcs7->signedAttribsSz; + #ifdef NO_ASN_TIME + esd->signedAttribsSz += EncodeAttributes(&esd->signedAttribs[2], 4, + pkcs7->signedAttribs, pkcs7->signedAttribsSz); + #else + esd->signedAttribsSz += EncodeAttributes(&esd->signedAttribs[3], 4, + pkcs7->signedAttribs, pkcs7->signedAttribsSz); + #endif + } + +#ifdef NO_ASN_TIME + (void)signingTimeOidSz; + (void)signingTime; + (void)signingTimeSz; +#endif + + return 0; +} + + +/* gets correct encryption algo ID for SignedData, either CTC_wRSA or + * CTC_wECDSA, from pkcs7->publicKeyOID and pkcs7->hashOID. + * + * pkcs7 - pointer to PKCS7 structure + * digEncAlgoId - [OUT] output int to store correct algo ID in + * digEncAlgoType - [OUT] output for algo ID type + * + * return 0 on success, negative on error */ +static int wc_PKCS7_SignedDataGetEncAlgoId(PKCS7* pkcs7, int* digEncAlgoId, + int* digEncAlgoType) +{ + int algoId = 0; + int algoType = 0; + + if (pkcs7 == NULL || digEncAlgoId == NULL || digEncAlgoType == NULL) + return BAD_FUNC_ARG; + + if (pkcs7->publicKeyOID == RSAk) { + + algoType = oidSigType; + + switch (pkcs7->hashOID) { + #ifndef NO_SHA + case SHAh: + algoId = CTC_SHAwRSA; + break; + #endif + #ifdef WOLFSSL_SHA224 + case SHA224h: + algoId = CTC_SHA224wRSA; + break; + #endif + #ifndef NO_SHA256 + case SHA256h: + algoId = CTC_SHA256wRSA; + break; + #endif + #ifdef WOLFSSL_SHA384 + case SHA384h: + algoId = CTC_SHA384wRSA; + break; + #endif + #ifdef WOLFSSL_SHA512 + case SHA512h: + algoId = CTC_SHA512wRSA; + break; + #endif + } + + } +#ifdef HAVE_ECC + else if (pkcs7->publicKeyOID == ECDSAk) { + + algoType = oidSigType; + + switch (pkcs7->hashOID) { + #ifndef NO_SHA + case SHAh: + algoId = CTC_SHAwECDSA; + break; + #endif + #ifdef WOLFSSL_SHA224 + case SHA224h: + algoId = CTC_SHA224wECDSA; + break; + #endif + #ifndef NO_SHA256 + case SHA256h: + algoId = CTC_SHA256wECDSA; + break; + #endif + #ifdef WOLFSSL_SHA384 + case SHA384h: + algoId = CTC_SHA384wECDSA; + break; + #endif + #ifdef WOLFSSL_SHA512 + case SHA512h: + algoId = CTC_SHA512wECDSA; + break; + #endif + } + } +#endif /* HAVE_ECC */ + + if (algoId == 0) { + WOLFSSL_MSG("Invalid signature algorithm type"); + return BAD_FUNC_ARG; + } + + *digEncAlgoId = algoId; + *digEncAlgoType = algoType; + + return 0; +} + + +/* build SignedData DigestInfo for use with PKCS#7/RSA + * + * pkcs7 - pointer to initialized PKCS7 struct + * flatSignedAttribs - flattened, signed attributes + * flatSignedAttrbsSz - size of flatSignedAttribs, octets + * esd - pointer to initialized ESD struct + * digestInfo - [OUT] output array for DigestInfo + * digestInfoSz - [IN/OUT] - input size of array, size of digestInfo + * + * return 0 on success, negative on error */ +static int wc_PKCS7_BuildDigestInfo(PKCS7* pkcs7, byte* flatSignedAttribs, + word32 flatSignedAttribsSz, ESD* esd, + byte* digestInfo, word32* digestInfoSz) +{ + int ret, hashSz, digIdx = 0; + byte digestInfoSeq[MAX_SEQ_SZ]; + byte digestStr[MAX_OCTET_STR_SZ]; + byte attribSet[MAX_SET_SZ]; + byte algoId[MAX_ALGO_SZ]; + word32 digestInfoSeqSz, digestStrSz, algoIdSz; + word32 attribSetSz; + + if (pkcs7 == NULL || esd == NULL || digestInfo == NULL || + digestInfoSz == NULL) { + return BAD_FUNC_ARG; + } + + hashSz = wc_HashGetDigestSize(esd->hashType); + if (hashSz < 0) + return hashSz; + + if (flatSignedAttribsSz != 0) { + + if (flatSignedAttribs == NULL) + return BAD_FUNC_ARG; + + attribSetSz = SetSet(flatSignedAttribsSz, attribSet); + + ret = wc_HashInit(&esd->hash, esd->hashType); + if (ret < 0) + return ret; + + ret = wc_HashUpdate(&esd->hash, esd->hashType, + attribSet, attribSetSz); + if (ret == 0) + ret = wc_HashUpdate(&esd->hash, esd->hashType, + flatSignedAttribs, flatSignedAttribsSz); + if (ret == 0) + ret = wc_HashFinal(&esd->hash, esd->hashType, + esd->contentAttribsDigest); + wc_HashFree(&esd->hash, esd->hashType); + + if (ret < 0) + return ret; + + } else { + /* when no attrs, digest is contentDigest without tag and length */ + XMEMCPY(esd->contentAttribsDigest, esd->contentDigest + 2, hashSz); + } + + /* set algoID, with NULL attributes */ + algoIdSz = SetAlgoID(pkcs7->hashOID, algoId, oidHashType, 0); + + digestStrSz = SetOctetString(hashSz, digestStr); + digestInfoSeqSz = SetSequence(algoIdSz + digestStrSz + hashSz, + digestInfoSeq); + + if (*digestInfoSz < (digestInfoSeqSz + algoIdSz + digestStrSz + hashSz)) { + return BUFFER_E; + } + + XMEMCPY(digestInfo + digIdx, digestInfoSeq, digestInfoSeqSz); + digIdx += digestInfoSeqSz; + XMEMCPY(digestInfo + digIdx, algoId, algoIdSz); + digIdx += algoIdSz; + XMEMCPY(digestInfo + digIdx, digestStr, digestStrSz); + digIdx += digestStrSz; + XMEMCPY(digestInfo + digIdx, esd->contentAttribsDigest, hashSz); + digIdx += hashSz; + + *digestInfoSz = digIdx; + + return 0; +} + + +/* build SignedData signature over DigestInfo or content digest + * + * pkcs7 - pointer to initialized PKCS7 struct + * flatSignedAttribs - flattened, signed attributes + * flatSignedAttribsSz - size of flatSignedAttribs, octets + * esd - pointer to initialized ESD struct + * + * returns length of signature on success, negative on error */ +static int wc_PKCS7_SignedDataBuildSignature(PKCS7* pkcs7, + byte* flatSignedAttribs, + word32 flatSignedAttribsSz, + ESD* esd) +{ + int ret = 0; +#if defined(HAVE_ECC) || \ + (defined(HAVE_PKCS7_RSA_RAW_SIGN_CALLBACK) && !defined(NO_RSA)) + int hashSz = 0; +#endif +#if defined(HAVE_PKCS7_RSA_RAW_SIGN_CALLBACK) && !defined(NO_RSA) + int hashOID; +#endif + word32 digestInfoSz = MAX_PKCS7_DIGEST_SZ; +#ifdef WOLFSSL_SMALL_STACK + byte* digestInfo; +#else + byte digestInfo[MAX_PKCS7_DIGEST_SZ]; +#endif + + if (pkcs7 == NULL || esd == NULL) + return BAD_FUNC_ARG; + +#ifdef WOLFSSL_SMALL_STACK + digestInfo = (byte*)XMALLOC(digestInfoSz, pkcs7->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (digestInfo == NULL) { + return MEMORY_E; + } +#endif + XMEMSET(digestInfo, 0, digestInfoSz); + + ret = wc_PKCS7_BuildDigestInfo(pkcs7, flatSignedAttribs, + flatSignedAttribsSz, esd, digestInfo, + &digestInfoSz); + if (ret < 0) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(digestInfo, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return ret; + } + +#if defined(HAVE_ECC) || \ + (defined(HAVE_PKCS7_RSA_RAW_SIGN_CALLBACK) && !defined(NO_RSA)) + /* get digest size from hash type */ + hashSz = wc_HashGetDigestSize(esd->hashType); + if (hashSz < 0) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(digestInfo, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return hashSz; + } +#endif + + /* sign digestInfo */ + switch (pkcs7->publicKeyOID) { + +#ifndef NO_RSA + case RSAk: + #ifdef HAVE_PKCS7_RSA_RAW_SIGN_CALLBACK + if (pkcs7->rsaSignRawDigestCb != NULL) { + /* get hash OID */ + hashOID = wc_HashGetOID(esd->hashType); + + /* user signing plain digest, build DigestInfo themselves */ + ret = pkcs7->rsaSignRawDigestCb(pkcs7, + esd->contentAttribsDigest, hashSz, + esd->encContentDigest, sizeof(esd->encContentDigest), + pkcs7->privateKey, pkcs7->privateKeySz, pkcs7->devId, + hashOID); + break; + } + #endif + ret = wc_PKCS7_RsaSign(pkcs7, digestInfo, digestInfoSz, esd); + break; +#endif + +#ifdef HAVE_ECC + case ECDSAk: + /* CMS with ECDSA does not sign DigestInfo structure + * like PKCS#7 with RSA does */ + ret = wc_PKCS7_EcdsaSign(pkcs7, esd->contentAttribsDigest, + hashSz, esd); + break; +#endif + + default: + WOLFSSL_MSG("Unsupported public key type"); + ret = BAD_FUNC_ARG; + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(digestInfo, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + if (ret >= 0) { + esd->encContentDigestSz = (word32)ret; + } + + return ret; +} + + +/* build PKCS#7 signedData content type */ +static int PKCS7_EncodeSigned(PKCS7* pkcs7, ESD* esd, + const byte* hashBuf, word32 hashSz, byte* output, word32* outputSz, + byte* output2, word32* output2Sz) +{ + /* contentType OID (1.2.840.113549.1.9.3) */ + const byte contentTypeOid[] = + { ASN_OBJECT_ID, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xF7, 0x0d, 0x01, + 0x09, 0x03 }; + + /* messageDigest OID (1.2.840.113549.1.9.4) */ + const byte messageDigestOid[] = + { ASN_OBJECT_ID, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, + 0x09, 0x04 }; + + /* signingTime OID () */ + byte signingTimeOid[] = + { ASN_OBJECT_ID, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, + 0x09, 0x05}; + + Pkcs7Cert* certPtr = NULL; + word32 certSetSz = 0; + + word32 signerInfoSz = 0; + word32 totalSz, total2Sz; + int idx = 0, ret = 0; + int digEncAlgoId, digEncAlgoType; + byte* flatSignedAttribs = NULL; + word32 flatSignedAttribsSz = 0; + + byte signedDataOid[MAX_OID_SZ]; + word32 signedDataOidSz; + + byte signingTime[MAX_TIME_STRING_SZ]; + + if (pkcs7 == NULL || pkcs7->contentSz == 0 || + pkcs7->encryptOID == 0 || pkcs7->hashOID == 0 || pkcs7->rng == 0 || + output == NULL || outputSz == NULL || *outputSz == 0 || hashSz == 0 || + hashBuf == NULL) { + return BAD_FUNC_ARG; + } + + /* verify the hash size matches */ +#ifdef WOLFSSL_SMALL_STACK + esd = (ESD*)XMALLOC(sizeof(ESD), pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (esd == NULL) + return MEMORY_E; +#endif + + XMEMSET(esd, 0, sizeof(ESD)); + + /* set content type based on contentOID, unless user has set custom one + with wc_PKCS7_SetContentType() */ + if (pkcs7->contentTypeSz == 0) { + + /* default to DATA content type if user has not set */ + if (pkcs7->contentOID == 0) { + pkcs7->contentOID = DATA; + } + + ret = wc_SetContentType(pkcs7->contentOID, pkcs7->contentType, + sizeof(pkcs7->contentType)); + if (ret < 0) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return ret; + } + pkcs7->contentTypeSz = ret; + } + + /* set signedData outer content type */ + ret = wc_SetContentType(SIGNED_DATA, signedDataOid, sizeof(signedDataOid)); + if (ret < 0) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return ret; + } + signedDataOidSz = ret; + + if (pkcs7->sidType != DEGENERATE_SID) { + esd->hashType = wc_OidGetHash(pkcs7->hashOID); + if (wc_HashGetDigestSize(esd->hashType) != (int)hashSz) { + WOLFSSL_MSG("hashSz did not match hashOID"); + #ifdef WOLFSSL_SMALL_STACK + XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return BUFFER_E; + } + + /* include hash */ + esd->contentDigest[0] = ASN_OCTET_STRING; + esd->contentDigest[1] = (byte)hashSz; + XMEMCPY(&esd->contentDigest[2], hashBuf, hashSz); + } + + if (pkcs7->detached == 1) { + /* do not include content if generating detached signature */ + esd->innerOctetsSz = 0; + esd->innerContSeqSz = 0; + esd->contentInfoSeqSz = SetSequence(pkcs7->contentTypeSz, + esd->contentInfoSeq); + } else { + esd->innerOctetsSz = SetOctetString(pkcs7->contentSz, esd->innerOctets); + esd->innerContSeqSz = SetExplicit(0, esd->innerOctetsSz + + pkcs7->contentSz, esd->innerContSeq); + esd->contentInfoSeqSz = SetSequence(pkcs7->contentSz + + esd->innerOctetsSz + pkcs7->contentTypeSz + + esd->innerContSeqSz, esd->contentInfoSeq); + } + + /* SignerIdentifier */ + if (pkcs7->sidType == CMS_ISSUER_AND_SERIAL_NUMBER) { + /* IssuerAndSerialNumber */ + esd->issuerSnSz = SetSerialNumber(pkcs7->issuerSn, pkcs7->issuerSnSz, + esd->issuerSn, MAX_SN_SZ, MAX_SN_SZ); + signerInfoSz += esd->issuerSnSz; + esd->issuerNameSz = SetSequence(pkcs7->issuerSz, esd->issuerName); + signerInfoSz += esd->issuerNameSz + pkcs7->issuerSz; + esd->issuerSnSeqSz = SetSequence(signerInfoSz, esd->issuerSnSeq); + signerInfoSz += esd->issuerSnSeqSz; + + if (pkcs7->version == 3) { + /* RFC 4108 version MUST be 3 for firmware package signer */ + esd->signerVersionSz = SetMyVersion(3, esd->signerVersion, 0); + } + else { + /* version MUST be 1 otherwise*/ + esd->signerVersionSz = SetMyVersion(1, esd->signerVersion, 0); + } + + } else if (pkcs7->sidType == CMS_SKID) { + /* SubjectKeyIdentifier */ + esd->issuerSKIDSz = SetOctetString(KEYID_SIZE, esd->issuerSKID); + esd->issuerSKIDSeqSz = SetExplicit(0, esd->issuerSKIDSz + KEYID_SIZE, + esd->issuerSKIDSeq); + signerInfoSz += (esd->issuerSKIDSz + esd->issuerSKIDSeqSz + + KEYID_SIZE); + + /* version MUST be 3 */ + esd->signerVersionSz = SetMyVersion(3, esd->signerVersion, 0); + } else if (pkcs7->sidType == DEGENERATE_SID) { + /* no signer info added */ + } else { + #ifdef WOLFSSL_SMALL_STACK + XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return SKID_E; + } + + if (pkcs7->sidType != DEGENERATE_SID) { + signerInfoSz += esd->signerVersionSz; + esd->signerDigAlgoIdSz = SetAlgoID(pkcs7->hashOID, esd->signerDigAlgoId, + oidHashType, 0); + signerInfoSz += esd->signerDigAlgoIdSz; + + /* set signatureAlgorithm */ + ret = wc_PKCS7_SignedDataGetEncAlgoId(pkcs7, &digEncAlgoId, + &digEncAlgoType); + if (ret < 0) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return ret; + } + esd->digEncAlgoIdSz = SetAlgoID(digEncAlgoId, esd->digEncAlgoId, + digEncAlgoType, 0); + signerInfoSz += esd->digEncAlgoIdSz; + + /* build up signed attributes, include contentType, signingTime, and + messageDigest by default */ + ret = wc_PKCS7_BuildSignedAttributes(pkcs7, esd, pkcs7->contentType, + pkcs7->contentTypeSz, + contentTypeOid, sizeof(contentTypeOid), + messageDigestOid, sizeof(messageDigestOid), + signingTimeOid, sizeof(signingTimeOid), + signingTime, sizeof(signingTime)); + if (ret < 0) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return ret; + } + + if (esd->signedAttribsSz > 0) { + flatSignedAttribs = (byte*)XMALLOC(esd->signedAttribsSz, pkcs7->heap, + DYNAMIC_TYPE_PKCS7); + flatSignedAttribsSz = esd->signedAttribsSz; + if (flatSignedAttribs == NULL) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return MEMORY_E; + } + + FlattenAttributes(pkcs7, flatSignedAttribs, + esd->signedAttribs, esd->signedAttribsCount); + esd->signedAttribSetSz = SetImplicit(ASN_SET, 0, esd->signedAttribsSz, + esd->signedAttribSet); + } else { + esd->signedAttribSetSz = 0; + } + + /* Calculate the final hash and encrypt it. */ + ret = wc_PKCS7_SignedDataBuildSignature(pkcs7, flatSignedAttribs, + flatSignedAttribsSz, esd); + if (ret < 0) { + if (pkcs7->signedAttribsSz != 0) + XFREE(flatSignedAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + #ifdef WOLFSSL_SMALL_STACK + XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return ret; + } + + signerInfoSz += flatSignedAttribsSz + esd->signedAttribSetSz; + + esd->signerDigestSz = SetOctetString(esd->encContentDigestSz, + esd->signerDigest); + signerInfoSz += esd->signerDigestSz + esd->encContentDigestSz; + + esd->signerInfoSeqSz = SetSequence(signerInfoSz, esd->signerInfoSeq); + signerInfoSz += esd->signerInfoSeqSz; + } + esd->signerInfoSetSz = SetSet(signerInfoSz, esd->signerInfoSet); + signerInfoSz += esd->signerInfoSetSz; + + /* certificates [0] IMPLICIT CertificateSet */ + /* get total certificates size */ + certPtr = pkcs7->certList; + while (certPtr != NULL) { + certSetSz += certPtr->derSz; + certPtr = certPtr->next; + } + certPtr = NULL; + + if (certSetSz > 0) + esd->certsSetSz = SetImplicit(ASN_SET, 0, certSetSz, esd->certsSet); + + if (pkcs7->sidType != DEGENERATE_SID) { + esd->singleDigAlgoIdSz = SetAlgoID(pkcs7->hashOID, esd->singleDigAlgoId, + oidHashType, 0); + } + esd->digAlgoIdSetSz = SetSet(esd->singleDigAlgoIdSz, esd->digAlgoIdSet); + + if (pkcs7->version == 3) { + /* RFC 4108 version MUST be 3 for firmware package signer */ + esd->versionSz = SetMyVersion(3, esd->version, 0); + } + else { + esd->versionSz = SetMyVersion(1, esd->version, 0); + } + + totalSz = esd->versionSz + esd->singleDigAlgoIdSz + esd->digAlgoIdSetSz + + esd->contentInfoSeqSz + pkcs7->contentTypeSz + + esd->innerContSeqSz + esd->innerOctetsSz + pkcs7->contentSz; + total2Sz = esd->certsSetSz + certSetSz + signerInfoSz; + + if (pkcs7->detached) { + totalSz -= pkcs7->contentSz; + } + + esd->innerSeqSz = SetSequence(totalSz + total2Sz, esd->innerSeq); + totalSz += esd->innerSeqSz; + esd->outerContentSz = SetExplicit(0, totalSz + total2Sz, esd->outerContent); + totalSz += esd->outerContentSz + signedDataOidSz; + esd->outerSeqSz = SetSequence(totalSz + total2Sz, esd->outerSeq); + totalSz += esd->outerSeqSz; + + /* if using header/footer, we are not returning the content */ + if (output2 && output2Sz) { + if (total2Sz > *output2Sz) { + if (pkcs7->signedAttribsSz != 0) + XFREE(flatSignedAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + #ifdef WOLFSSL_SMALL_STACK + XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return BUFFER_E; + } + + if (!pkcs7->detached) { + totalSz -= pkcs7->contentSz; + } + } + else { + /* if using single output buffer include content and footer */ + totalSz += total2Sz; + } + + if (totalSz > *outputSz) { + if (pkcs7->signedAttribsSz != 0) + XFREE(flatSignedAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + #ifdef WOLFSSL_SMALL_STACK + XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return BUFFER_E; + } + + idx = 0; + XMEMCPY(output + idx, esd->outerSeq, esd->outerSeqSz); + idx += esd->outerSeqSz; + XMEMCPY(output + idx, signedDataOid, signedDataOidSz); + idx += signedDataOidSz; + XMEMCPY(output + idx, esd->outerContent, esd->outerContentSz); + idx += esd->outerContentSz; + XMEMCPY(output + idx, esd->innerSeq, esd->innerSeqSz); + idx += esd->innerSeqSz; + XMEMCPY(output + idx, esd->version, esd->versionSz); + idx += esd->versionSz; + XMEMCPY(output + idx, esd->digAlgoIdSet, esd->digAlgoIdSetSz); + idx += esd->digAlgoIdSetSz; + XMEMCPY(output + idx, esd->singleDigAlgoId, esd->singleDigAlgoIdSz); + idx += esd->singleDigAlgoIdSz; + XMEMCPY(output + idx, esd->contentInfoSeq, esd->contentInfoSeqSz); + idx += esd->contentInfoSeqSz; + XMEMCPY(output + idx, pkcs7->contentType, pkcs7->contentTypeSz); + idx += pkcs7->contentTypeSz; + XMEMCPY(output + idx, esd->innerContSeq, esd->innerContSeqSz); + idx += esd->innerContSeqSz; + XMEMCPY(output + idx, esd->innerOctets, esd->innerOctetsSz); + idx += esd->innerOctetsSz; + + /* support returning header and footer without content */ + if (output2 && output2Sz) { + *outputSz = idx; + idx = 0; + } + else { + if (!pkcs7->detached) { + XMEMCPY(output + idx, pkcs7->content, pkcs7->contentSz); + idx += pkcs7->contentSz; + } + output2 = output; + } + + /* certificates */ + XMEMCPY(output2 + idx, esd->certsSet, esd->certsSetSz); + idx += esd->certsSetSz; + certPtr = pkcs7->certList; + while (certPtr != NULL) { + XMEMCPY(output2 + idx, certPtr->der, certPtr->derSz); + idx += certPtr->derSz; + certPtr = certPtr->next; + } + wc_PKCS7_FreeCertSet(pkcs7); + + XMEMCPY(output2 + idx, esd->signerInfoSet, esd->signerInfoSetSz); + idx += esd->signerInfoSetSz; + XMEMCPY(output2 + idx, esd->signerInfoSeq, esd->signerInfoSeqSz); + idx += esd->signerInfoSeqSz; + XMEMCPY(output2 + idx, esd->signerVersion, esd->signerVersionSz); + idx += esd->signerVersionSz; + /* SignerIdentifier */ + if (pkcs7->sidType == CMS_ISSUER_AND_SERIAL_NUMBER) { + /* IssuerAndSerialNumber */ + XMEMCPY(output2 + idx, esd->issuerSnSeq, esd->issuerSnSeqSz); + idx += esd->issuerSnSeqSz; + XMEMCPY(output2 + idx, esd->issuerName, esd->issuerNameSz); + idx += esd->issuerNameSz; + XMEMCPY(output2 + idx, pkcs7->issuer, pkcs7->issuerSz); + idx += pkcs7->issuerSz; + XMEMCPY(output2 + idx, esd->issuerSn, esd->issuerSnSz); + idx += esd->issuerSnSz; + } else if (pkcs7->sidType == CMS_SKID) { + /* SubjectKeyIdentifier */ + XMEMCPY(output2 + idx, esd->issuerSKIDSeq, esd->issuerSKIDSeqSz); + idx += esd->issuerSKIDSeqSz; + XMEMCPY(output2 + idx, esd->issuerSKID, esd->issuerSKIDSz); + idx += esd->issuerSKIDSz; + XMEMCPY(output2 + idx, pkcs7->issuerSubjKeyId, KEYID_SIZE); + idx += KEYID_SIZE; + } else if (pkcs7->sidType == DEGENERATE_SID) { + /* no signer infos in degenerate case */ + } else { + #ifdef WOLFSSL_SMALL_STACK + XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return SKID_E; + } + XMEMCPY(output2 + idx, esd->signerDigAlgoId, esd->signerDigAlgoIdSz); + idx += esd->signerDigAlgoIdSz; + + /* SignerInfo:Attributes */ + if (flatSignedAttribsSz > 0) { + XMEMCPY(output2 + idx, esd->signedAttribSet, esd->signedAttribSetSz); + idx += esd->signedAttribSetSz; + XMEMCPY(output2 + idx, flatSignedAttribs, flatSignedAttribsSz); + idx += flatSignedAttribsSz; + XFREE(flatSignedAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + } + + XMEMCPY(output2 + idx, esd->digEncAlgoId, esd->digEncAlgoIdSz); + idx += esd->digEncAlgoIdSz; + XMEMCPY(output2 + idx, esd->signerDigest, esd->signerDigestSz); + idx += esd->signerDigestSz; + XMEMCPY(output2 + idx, esd->encContentDigest, esd->encContentDigestSz); + idx += esd->encContentDigestSz; + + if (output2 && output2Sz) { + *output2Sz = idx; + idx = 0; /* success */ + } + else { + *outputSz = idx; + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return idx; +} + +/* hashBuf: The computed digest for the pkcs7->content + * hashSz: The size of computed digest for the pkcs7->content based on hashOID + * outputHead: The PKCS7 header that goes on top of the raw data signed. + * outputFoot: The PKCS7 footer that goes at the end of the raw data signed. + * pkcs7->content: Not used + * pkcs7->contentSz: Must be provided as actual sign of raw data + * return codes: 0=success, negative=error + */ +int wc_PKCS7_EncodeSignedData_ex(PKCS7* pkcs7, const byte* hashBuf, + word32 hashSz, byte* outputHead, word32* outputHeadSz, byte* outputFoot, + word32* outputFootSz) +{ + int ret; +#ifdef WOLFSSL_SMALL_STACK + ESD* esd; +#else + ESD esd[1]; +#endif + + /* other args checked in wc_PKCS7_EncodeSigned_ex */ + if (pkcs7 == NULL || outputFoot == NULL || outputFootSz == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef WOLFSSL_SMALL_STACK + esd = (ESD*)XMALLOC(sizeof(ESD), pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (esd == NULL) + return MEMORY_E; +#endif + + XMEMSET(esd, 0, sizeof(ESD)); + + ret = PKCS7_EncodeSigned(pkcs7, esd, hashBuf, hashSz, + outputHead, outputHeadSz, outputFoot, outputFootSz); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return ret; +} + +/* Toggle detached signature mode on/off for PKCS#7/CMS SignedData content type. + * By default wolfCrypt includes the data to be signed in the SignedData + * bundle. This data can be omitted in the case when a detached signature is + * being created. To enable generation of detached signatures, set flag to "1", + * otherwise set to "0": + * + * flag 1 turns on support + * flag 0 turns off support + * + * pkcs7 - pointer to initialized PKCS7 structure + * flag - turn on/off detached signature generation (1 or 0) + * + * Returns 0 on success, negative upon error. */ +int wc_PKCS7_SetDetached(PKCS7* pkcs7, word16 flag) +{ + if (pkcs7 == NULL || (flag != 0 && flag != 1)) + return BAD_FUNC_ARG; + + pkcs7->detached = flag; + + return 0; +} + +/* By default, SignedData bundles have the following signed attributes attached: + * contentType (1.2.840.113549.1.9.3) + * signgingTime (1.2.840.113549.1.9.5) + * messageDigest (1.2.840.113549.1.9.4) + * + * Calling this API before wc_PKCS7_EncodeSignedData() will disable the + * inclusion of those attributes. + * + * pkcs7 - pointer to initialized PKCS7 structure + * + * Returns 0 on success, negative upon error. */ +int wc_PKCS7_NoDefaultSignedAttribs(PKCS7* pkcs7) +{ + if (pkcs7 == NULL) + return BAD_FUNC_ARG; + + pkcs7->skipDefaultSignedAttribs = 1; + + return 0; +} + +/* return codes: >0: Size of signed PKCS7 output buffer, negative: error */ +int wc_PKCS7_EncodeSignedData(PKCS7* pkcs7, byte* output, word32 outputSz) +{ + int ret; + int hashSz; + enum wc_HashType hashType; + byte hashBuf[WC_MAX_DIGEST_SIZE]; +#ifdef WOLFSSL_SMALL_STACK + ESD* esd; +#else + ESD esd[1]; +#endif + + /* other args checked in wc_PKCS7_EncodeSigned_ex */ + if (pkcs7 == NULL || pkcs7->contentSz == 0 || pkcs7->content == NULL) { + return BAD_FUNC_ARG; + } + + /* get hash type and size, validate hashOID */ + hashType = wc_OidGetHash(pkcs7->hashOID); + hashSz = wc_HashGetDigestSize(hashType); + if (hashSz < 0) + return hashSz; + +#ifdef WOLFSSL_SMALL_STACK + esd = (ESD*)XMALLOC(sizeof(ESD), pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (esd == NULL) + return MEMORY_E; +#endif + + XMEMSET(esd, 0, sizeof(ESD)); + esd->hashType = hashType; + + /* calculate hash for content */ + ret = wc_HashInit(&esd->hash, esd->hashType); + if (ret == 0) { + ret = wc_HashUpdate(&esd->hash, esd->hashType, + pkcs7->content, pkcs7->contentSz); + if (ret == 0) { + ret = wc_HashFinal(&esd->hash, esd->hashType, hashBuf); + } + wc_HashFree(&esd->hash, esd->hashType); + } + + if (ret == 0) { + ret = PKCS7_EncodeSigned(pkcs7, esd, hashBuf, hashSz, + output, &outputSz, NULL, NULL); + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return ret; +} + + +/* Single-shot API to generate a CMS SignedData bundle that encapsulates a + * content of type FirmwarePkgData. Any recipient certificates should be + * loaded into the PKCS7 structure prior to calling this function, using + * wc_PKCS7_InitWithCert() and/or wc_PKCS7_AddCertificate(). + * + * pkcs7 - pointer to initialized PKCS7 struct + * privateKey - private RSA/ECC key, used for signing SignedData + * privateKeySz - size of privateKey, octets + * signOID - public key algorithm OID, used for sign operation + * hashOID - hash algorithm OID, used for signature generation + * content - content to be encapsulated, of type FirmwarePkgData + * contentSz - size of content, octets + * signedAttribs - optional signed attributes + * signedAttribsSz - number of PKCS7Attrib members in signedAttribs + * output - output buffer for final bundle + * outputSz - size of output buffer, octets + * + * Returns length of generated bundle on success, negative upon error. */ +int wc_PKCS7_EncodeSignedFPD(PKCS7* pkcs7, byte* privateKey, + word32 privateKeySz, int signOID, int hashOID, + byte* content, word32 contentSz, + PKCS7Attrib* signedAttribs, word32 signedAttribsSz, + byte* output, word32 outputSz) +{ + int ret = 0; + WC_RNG rng; + + if (pkcs7 == NULL || privateKey == NULL || privateKeySz == 0 || + content == NULL || contentSz == 0 || output == NULL || outputSz == 0) + return BAD_FUNC_ARG; + + ret = wc_InitRng(&rng); + if (ret != 0) + return ret; + + pkcs7->rng = &rng; + pkcs7->content = content; + pkcs7->contentSz = contentSz; + pkcs7->contentOID = FIRMWARE_PKG_DATA; + pkcs7->hashOID = hashOID; + pkcs7->encryptOID = signOID; + pkcs7->privateKey = privateKey; + pkcs7->privateKeySz = privateKeySz; + pkcs7->signedAttribs = signedAttribs; + pkcs7->signedAttribsSz = signedAttribsSz; + pkcs7->version = 3; + + ret = wc_PKCS7_EncodeSignedData(pkcs7, output, outputSz); + if (ret <= 0) { + WOLFSSL_MSG("Error encoding CMS SignedData content type"); + } + + pkcs7->rng = NULL; + wc_FreeRng(&rng); + + return ret; +} + +#ifndef NO_PKCS7_ENCRYPTED_DATA + +/* Single-shot API to generate a CMS SignedData bundle that encapsulates a + * CMS EncryptedData bundle. Content of inner EncryptedData is set to that + * of FirmwarePkgData. Any recipient certificates should be loaded into the + * PKCS7 structure prior to calling this function, using wc_PKCS7_InitWithCert() + * and/or wc_PKCS7_AddCertificate(). + * + * pkcs7 - pointer to initialized PKCS7 struct + * encryptKey - encryption key used for encrypting EncryptedData + * encryptKeySz - size of encryptKey, octets + * privateKey - private RSA/ECC key, used for signing SignedData + * privateKeySz - size of privateKey, octets + * encryptOID - encryption algorithm OID, to be used as encryption + * algorithm for EncryptedData + * signOID - public key algorithm OID, to be used for sign + * operation in SignedData generation + * hashOID - hash algorithm OID, to be used for signature in + * SignedData generation + * content - content to be encapsulated + * contentSz - size of content, octets + * unprotectedAttribs - optional unprotected attributes, for EncryptedData + * unprotectedAttribsSz - number of PKCS7Attrib members in unprotectedAttribs + * signedAttribs - optional signed attributes, for SignedData + * signedAttribsSz - number of PKCS7Attrib members in signedAttribs + * output - output buffer for final bundle + * outputSz - size of output buffer, octets + * + * Returns length of generated bundle on success, negative upon error. */ +int wc_PKCS7_EncodeSignedEncryptedFPD(PKCS7* pkcs7, byte* encryptKey, + word32 encryptKeySz, byte* privateKey, + word32 privateKeySz, int encryptOID, + int signOID, int hashOID, + byte* content, word32 contentSz, + PKCS7Attrib* unprotectedAttribs, + word32 unprotectedAttribsSz, + PKCS7Attrib* signedAttribs, + word32 signedAttribsSz, + byte* output, word32 outputSz) +{ + int ret = 0, encryptedSz = 0; + byte* encrypted = NULL; + WC_RNG rng; + + if (pkcs7 == NULL || encryptKey == NULL || encryptKeySz == 0 || + privateKey == NULL || privateKeySz == 0 || content == NULL || + contentSz == 0 || output == NULL || outputSz == 0) { + return BAD_FUNC_ARG; + } + + /* 1: build up EncryptedData using FirmwarePkgData type, use output + * buffer as tmp for storage and to get size */ + + /* set struct elements, inner content type is FirmwarePkgData */ + pkcs7->content = content; + pkcs7->contentSz = contentSz; + pkcs7->contentOID = FIRMWARE_PKG_DATA; + pkcs7->encryptOID = encryptOID; + pkcs7->encryptionKey = encryptKey; + pkcs7->encryptionKeySz = encryptKeySz; + pkcs7->unprotectedAttribs = unprotectedAttribs; + pkcs7->unprotectedAttribsSz = unprotectedAttribsSz; + pkcs7->version = 3; + + encryptedSz = wc_PKCS7_EncodeEncryptedData(pkcs7, output, outputSz); + if (encryptedSz < 0) { + WOLFSSL_MSG("Error encoding CMS EncryptedData content type"); + return encryptedSz; + } + + /* save encryptedData, reset output buffer and struct */ + encrypted = (byte*)XMALLOC(encryptedSz, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + if (encrypted == NULL) { + ForceZero(output, outputSz); + return MEMORY_E; + } + + XMEMCPY(encrypted, output, encryptedSz); + ForceZero(output, outputSz); + + ret = wc_InitRng(&rng); + if (ret != 0) { + ForceZero(encrypted, encryptedSz); + XFREE(encrypted, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + + /* 2: build up SignedData, encapsulating EncryptedData */ + pkcs7->rng = &rng; + pkcs7->content = encrypted; + pkcs7->contentSz = encryptedSz; + pkcs7->contentOID = ENCRYPTED_DATA; + pkcs7->hashOID = hashOID; + pkcs7->encryptOID = signOID; + pkcs7->privateKey = privateKey; + pkcs7->privateKeySz = privateKeySz; + pkcs7->signedAttribs = signedAttribs; + pkcs7->signedAttribsSz = signedAttribsSz; + + ret = wc_PKCS7_EncodeSignedData(pkcs7, output, outputSz); + if (ret <= 0) { + WOLFSSL_MSG("Error encoding CMS SignedData content type"); + } + + ForceZero(encrypted, encryptedSz); + XFREE(encrypted, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + pkcs7->rng = NULL; + wc_FreeRng(&rng); + + return ret; +} + +#endif /* NO_PKCS7_ENCRYPTED_DATA */ + +#if defined(HAVE_LIBZ) && !defined(NO_PKCS7_COMPRESSED_DATA) +/* Single-shot API to generate a CMS SignedData bundle that encapsulates a + * CMS CompressedData bundle. Content of inner CompressedData is set to that + * of FirmwarePkgData. Any recipient certificates should be loaded into the + * PKCS7 structure prior to calling this function, using wc_PKCS7_InitWithCert() + * and/or wc_PKCS7_AddCertificate(). + * + * pkcs7 - pointer to initialized PKCS7 struct + * privateKey - private RSA/ECC key, used for signing SignedData + * privateKeySz - size of privateKey, octets + * signOID - public key algorithm OID, to be used for sign + * operation in SignedData generation + * hashOID - hash algorithm OID, to be used for signature in + * SignedData generation + * content - content to be encapsulated + * contentSz - size of content, octets + * signedAttribs - optional signed attributes, for SignedData + * signedAttribsSz - number of PKCS7Attrib members in signedAttribs + * output - output buffer for final bundle + * outputSz - size of output buffer, octets + * + * Returns length of generated bundle on success, negative upon error. */ +int wc_PKCS7_EncodeSignedCompressedFPD(PKCS7* pkcs7, byte* privateKey, + word32 privateKeySz, int signOID, + int hashOID, byte* content, + word32 contentSz, + PKCS7Attrib* signedAttribs, + word32 signedAttribsSz, byte* output, + word32 outputSz) +{ + int ret = 0, compressedSz = 0; + byte* compressed = NULL; + WC_RNG rng; + + if (pkcs7 == NULL || privateKey == NULL || privateKeySz == 0 || + content == NULL || contentSz == 0 || output == NULL || outputSz == 0) { + return BAD_FUNC_ARG; + } + + /* 1: build up CompressedData using FirmwarePkgData type, use output + * buffer as tmp for storage and to get size */ + + /* set struct elements, inner content type is FirmwarePkgData */ + pkcs7->content = content; + pkcs7->contentSz = contentSz; + pkcs7->contentOID = FIRMWARE_PKG_DATA; + pkcs7->version = 3; + + compressedSz = wc_PKCS7_EncodeCompressedData(pkcs7, output, outputSz); + if (compressedSz < 0) { + WOLFSSL_MSG("Error encoding CMS CompressedData content type"); + return compressedSz; + } + + /* save compressedData, reset output buffer and struct */ + compressed = (byte*)XMALLOC(compressedSz, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + if (compressed == NULL) { + ForceZero(output, outputSz); + return MEMORY_E; + } + + XMEMCPY(compressed, output, compressedSz); + ForceZero(output, outputSz); + + ret = wc_InitRng(&rng); + if (ret != 0) { + ForceZero(compressed, compressedSz); + XFREE(compressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + + /* 2: build up SignedData, encapsulating EncryptedData */ + pkcs7->rng = &rng; + pkcs7->content = compressed; + pkcs7->contentSz = compressedSz; + pkcs7->contentOID = COMPRESSED_DATA; + pkcs7->hashOID = hashOID; + pkcs7->encryptOID = signOID; + pkcs7->privateKey = privateKey; + pkcs7->privateKeySz = privateKeySz; + pkcs7->signedAttribs = signedAttribs; + pkcs7->signedAttribsSz = signedAttribsSz; + + ret = wc_PKCS7_EncodeSignedData(pkcs7, output, outputSz); + if (ret <= 0) { + WOLFSSL_MSG("Error encoding CMS SignedData content type"); + } + + ForceZero(compressed, compressedSz); + XFREE(compressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + pkcs7->rng = NULL; + wc_FreeRng(&rng); + + return ret; +} + +#ifndef NO_PKCS7_ENCRYPTED_DATA + +/* Single-shot API to generate a CMS SignedData bundle that encapsulates a + * CMS EncryptedData bundle, which then encapsulates a CMS CompressedData + * bundle. Content of inner CompressedData is set to that of FirmwarePkgData. + * Any recipient certificates should be loaded into the PKCS7 structure prior + * to calling this function, using wc_PKCS7_InitWithCert() and/or + * wc_PKCS7_AddCertificate(). + * + * pkcs7 - pointer to initialized PKCS7 struct + * encryptKey - encryption key used for encrypting EncryptedData + * encryptKeySz - size of encryptKey, octets + * privateKey - private RSA/ECC key, used for signing SignedData + * privateKeySz - size of privateKey, octets + * encryptOID - encryption algorithm OID, to be used as encryption + * algorithm for EncryptedData + * signOID - public key algorithm OID, to be used for sign + * operation in SignedData generation + * hashOID - hash algorithm OID, to be used for signature in + * SignedData generation + * content - content to be encapsulated + * contentSz - size of content, octets + * unprotectedAttribs - optional unprotected attributes, for EncryptedData + * unprotectedAttribsSz - number of PKCS7Attrib members in unprotectedAttribs + * signedAttribs - optional signed attributes, for SignedData + * signedAttribsSz - number of PKCS7Attrib members in signedAttribs + * output - output buffer for final bundle + * outputSz - size of output buffer, octets + * + * Returns length of generated bundle on success, negative upon error. */ +int wc_PKCS7_EncodeSignedEncryptedCompressedFPD(PKCS7* pkcs7, byte* encryptKey, + word32 encryptKeySz, byte* privateKey, + word32 privateKeySz, int encryptOID, + int signOID, int hashOID, byte* content, + word32 contentSz, + PKCS7Attrib* unprotectedAttribs, + word32 unprotectedAttribsSz, + PKCS7Attrib* signedAttribs, + word32 signedAttribsSz, + byte* output, word32 outputSz) +{ + int ret = 0, compressedSz = 0, encryptedSz = 0; + byte* compressed = NULL; + byte* encrypted = NULL; + WC_RNG rng; + + if (pkcs7 == NULL || encryptKey == NULL || encryptKeySz == 0 || + privateKey == NULL || privateKeySz == 0 || content == NULL || + contentSz == 0 || output == NULL || outputSz == 0) { + return BAD_FUNC_ARG; + } + + /* 1: build up CompressedData using FirmwarePkgData type, use output + * buffer as tmp for storage and to get size */ + pkcs7->content = content; + pkcs7->contentSz = contentSz; + pkcs7->contentOID = FIRMWARE_PKG_DATA; + pkcs7->version = 3; + + compressedSz = wc_PKCS7_EncodeCompressedData(pkcs7, output, outputSz); + if (compressedSz < 0) { + WOLFSSL_MSG("Error encoding CMS CompressedData content type"); + return compressedSz; + } + + /* save compressedData, reset output buffer and struct */ + compressed = (byte*)XMALLOC(compressedSz, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + if (compressed == NULL) + return MEMORY_E; + + XMEMCPY(compressed, output, compressedSz); + ForceZero(output, outputSz); + + /* 2: build up EncryptedData using CompressedData, use output + * buffer as tmp for storage and to get size */ + pkcs7->content = compressed; + pkcs7->contentSz = compressedSz; + pkcs7->contentOID = COMPRESSED_DATA; + pkcs7->encryptOID = encryptOID; + pkcs7->encryptionKey = encryptKey; + pkcs7->encryptionKeySz = encryptKeySz; + pkcs7->unprotectedAttribs = unprotectedAttribs; + pkcs7->unprotectedAttribsSz = unprotectedAttribsSz; + + encryptedSz = wc_PKCS7_EncodeEncryptedData(pkcs7, output, outputSz); + if (encryptedSz < 0) { + WOLFSSL_MSG("Error encoding CMS EncryptedData content type"); + XFREE(compressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return encryptedSz; + } + + /* save encryptedData, reset output buffer and struct */ + encrypted = (byte*)XMALLOC(encryptedSz, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + if (encrypted == NULL) { + ForceZero(compressed, compressedSz); + XFREE(compressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return MEMORY_E; + } + + XMEMCPY(encrypted, output, encryptedSz); + ForceZero(compressed, compressedSz); + XFREE(compressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + ForceZero(output, outputSz); + + ret = wc_InitRng(&rng); + if (ret != 0) { + ForceZero(encrypted, encryptedSz); + XFREE(encrypted, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + + /* 3: build up SignedData, encapsulating EncryptedData */ + pkcs7->rng = &rng; + pkcs7->content = encrypted; + pkcs7->contentSz = encryptedSz; + pkcs7->contentOID = ENCRYPTED_DATA; + pkcs7->hashOID = hashOID; + pkcs7->encryptOID = signOID; + pkcs7->privateKey = privateKey; + pkcs7->privateKeySz = privateKeySz; + pkcs7->signedAttribs = signedAttribs; + pkcs7->signedAttribsSz = signedAttribsSz; + + ret = wc_PKCS7_EncodeSignedData(pkcs7, output, outputSz); + if (ret <= 0) { + WOLFSSL_MSG("Error encoding CMS SignedData content type"); + } + + ForceZero(encrypted, encryptedSz); + XFREE(encrypted, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + pkcs7->rng = NULL; + wc_FreeRng(&rng); + + return ret; +} + +#endif /* !NO_PKCS7_ENCRYPTED_DATA */ +#endif /* HAVE_LIBZ && !NO_PKCS7_COMPRESSED_DATA */ + + +#ifndef NO_RSA + +#ifdef HAVE_PKCS7_RSA_RAW_SIGN_CALLBACK +/* register raw RSA sign digest callback */ +int wc_PKCS7_SetRsaSignRawDigestCb(PKCS7* pkcs7, CallbackRsaSignRawDigest cb) +{ + if (pkcs7 == NULL || cb == NULL) { + return BAD_FUNC_ARG; + } + + pkcs7->rsaSignRawDigestCb = cb; + + return 0; +} +#endif + +/* returns size of signature put into out, negative on error */ +static int wc_PKCS7_RsaVerify(PKCS7* pkcs7, byte* sig, int sigSz, + byte* hash, word32 hashSz) +{ + int ret = 0, i; + word32 scratch = 0, verified = 0; +#ifdef WOLFSSL_SMALL_STACK + byte* digest; + RsaKey* key; + DecodedCert* dCert; +#else + byte digest[MAX_PKCS7_DIGEST_SZ]; + RsaKey key[1]; + DecodedCert stack_dCert; + DecodedCert* dCert = &stack_dCert; +#endif + + if (pkcs7 == NULL || sig == NULL || hash == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef WOLFSSL_SMALL_STACK + digest = (byte*)XMALLOC(MAX_PKCS7_DIGEST_SZ, pkcs7->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (digest == NULL) + return MEMORY_E; + + key = (RsaKey*)XMALLOC(sizeof(RsaKey), pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (key == NULL) { + XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + return MEMORY_E; + } + + dCert = (DecodedCert*)XMALLOC(sizeof(DecodedCert), pkcs7->heap, + DYNAMIC_TYPE_DCERT); + if (dCert == NULL) { + XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(key, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + return MEMORY_E; + } +#endif + + XMEMSET(digest, 0, MAX_PKCS7_DIGEST_SZ); + + /* loop over certs received in certificates set, try to find one + * that will validate signature */ + for (i = 0; i < MAX_PKCS7_CERTS; i++) { + + verified = 0; + scratch = 0; + + if (pkcs7->certSz[i] == 0) + continue; + + ret = wc_InitRsaKey_ex(key, pkcs7->heap, pkcs7->devId); + if (ret != 0) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(key, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(dCert, pkcs7->heap, DYNAMIC_TYPE_DCERT); +#endif + return ret; + } + + InitDecodedCert(dCert, pkcs7->cert[i], pkcs7->certSz[i], pkcs7->heap); + /* not verifying, only using this to extract public key */ + ret = ParseCert(dCert, CA_TYPE, NO_VERIFY, 0); + if (ret < 0) { + WOLFSSL_MSG("ASN RSA cert parse error"); + FreeDecodedCert(dCert); + wc_FreeRsaKey(key); + continue; + } + + if (wc_RsaPublicKeyDecode(dCert->publicKey, &scratch, key, + dCert->pubKeySize) < 0) { + WOLFSSL_MSG("ASN RSA key decode error"); + FreeDecodedCert(dCert); + wc_FreeRsaKey(key); + continue; + } + + #ifdef WOLFSSL_ASYNC_CRYPT + do { + ret = wc_AsyncWait(ret, &key->asyncDev, + WC_ASYNC_FLAG_CALL_AGAIN); + #endif + if (ret >= 0) { + ret = wc_RsaSSL_Verify(sig, sigSz, digest, MAX_PKCS7_DIGEST_SZ, + key); + } + #ifdef WOLFSSL_ASYNC_CRYPT + } while (ret == WC_PENDING_E); + #endif + FreeDecodedCert(dCert); + wc_FreeRsaKey(key); + + if ((ret > 0) && (hashSz == (word32)ret)) { + if (XMEMCMP(digest, hash, hashSz) == 0) { + /* found signer that successfully verified signature */ + verified = 1; + break; + } + } + } + + if (verified == 0) { + ret = SIG_VERIFY_E; + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(key, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(dCert, pkcs7->heap, DYNAMIC_TYPE_DCERT); +#endif + + return ret; +} + +#endif /* NO_RSA */ + + +#ifdef HAVE_ECC + +/* returns size of signature put into out, negative on error */ +static int wc_PKCS7_EcdsaVerify(PKCS7* pkcs7, byte* sig, int sigSz, + byte* hash, word32 hashSz) +{ + int ret = 0, i; + int res = 0; + int verified = 0; +#ifdef WOLFSSL_SMALL_STACK + byte* digest; + ecc_key* key; + DecodedCert* dCert; +#else + byte digest[MAX_PKCS7_DIGEST_SZ]; + ecc_key key[1]; + DecodedCert stack_dCert; + DecodedCert* dCert = &stack_dCert; +#endif + word32 idx = 0; + + if (pkcs7 == NULL || sig == NULL) + return BAD_FUNC_ARG; + +#ifdef WOLFSSL_SMALL_STACK + digest = (byte*)XMALLOC(MAX_PKCS7_DIGEST_SZ, pkcs7->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (digest == NULL) + return MEMORY_E; + + key = (ecc_key*)XMALLOC(sizeof(ecc_key), pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (key == NULL) { + XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + return MEMORY_E; + } + + dCert = (DecodedCert*)XMALLOC(sizeof(DecodedCert), pkcs7->heap, + DYNAMIC_TYPE_DCERT); + if (dCert == NULL) { + XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(key, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + return MEMORY_E; + } +#endif + + XMEMSET(digest, 0, MAX_PKCS7_DIGEST_SZ); + + /* loop over certs received in certificates set, try to find one + * that will validate signature */ + for (i = 0; i < MAX_PKCS7_CERTS; i++) { + + verified = 0; + + if (pkcs7->certSz[i] == 0) + continue; + + ret = wc_ecc_init_ex(key, pkcs7->heap, pkcs7->devId); + if (ret != 0) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(key, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(dCert, pkcs7->heap, DYNAMIC_TYPE_DCERT); +#endif + return ret; + } + + InitDecodedCert(dCert, pkcs7->cert[i], pkcs7->certSz[i], pkcs7->heap); + /* not verifying, only using this to extract public key */ + ret = ParseCert(dCert, CA_TYPE, NO_VERIFY, 0); + if (ret < 0) { + WOLFSSL_MSG("ASN ECC cert parse error"); + FreeDecodedCert(dCert); + wc_ecc_free(key); + continue; + } + + if (wc_EccPublicKeyDecode(pkcs7->publicKey, &idx, key, + pkcs7->publicKeySz) < 0) { + WOLFSSL_MSG("ASN ECC key decode error"); + FreeDecodedCert(dCert); + wc_ecc_free(key); + continue; + } + + #ifdef WOLFSSL_ASYNC_CRYPT + do { + ret = wc_AsyncWait(ret, &key->asyncDev, + WC_ASYNC_FLAG_CALL_AGAIN); + #endif + if (ret >= 0) { + ret = wc_ecc_verify_hash(sig, sigSz, hash, hashSz, &res, key); + } + #ifdef WOLFSSL_ASYNC_CRYPT + } while (ret == WC_PENDING_E); + #endif + + FreeDecodedCert(dCert); + wc_ecc_free(key); + + if (ret == 0 && res == 1) { + /* found signer that successfully verified signature */ + verified = 1; + break; + } + } + + if (verified == 0) { + ret = SIG_VERIFY_E; + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(key, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(dCert, pkcs7->heap, DYNAMIC_TYPE_DCERT); +#endif + + return ret; +} + +#endif /* HAVE_ECC */ + + +/* build SignedData digest, both in PKCS#7 DigestInfo format and + * as plain digest for CMS. + * + * pkcs7 - pointer to initialized PKCS7 struct + * signedAttrib - signed attributes + * signedAttribSz - size of signedAttrib, octets + * pkcs7Digest - [OUT] PKCS#7 DigestInfo + * pkcs7DigestSz - [IN/OUT] size of pkcs7Digest + * plainDigest - [OUT] pointer to plain digest, offset into pkcs7Digest + * plainDigestSz - [OUT] size of digest at plainDigest + * + * returns 0 on success, negative on error */ +static int wc_PKCS7_BuildSignedDataDigest(PKCS7* pkcs7, byte* signedAttrib, + word32 signedAttribSz, byte* pkcs7Digest, + word32* pkcs7DigestSz, byte** plainDigest, + word32* plainDigestSz, + const byte* hashBuf, word32 hashBufSz) +{ + int ret = 0, digIdx = 0; + word32 attribSetSz = 0, hashSz = 0; + byte attribSet[MAX_SET_SZ]; + byte digest[WC_MAX_DIGEST_SIZE]; + byte digestInfoSeq[MAX_SEQ_SZ]; + byte digestStr[MAX_OCTET_STR_SZ]; + byte algoId[MAX_ALGO_SZ]; + word32 digestInfoSeqSz, digestStrSz, algoIdSz; +#ifdef WOLFSSL_SMALL_STACK + byte* digestInfo; +#else + byte digestInfo[MAX_PKCS7_DIGEST_SZ]; +#endif + + wc_HashAlg hash; + enum wc_HashType hashType; + + /* check arguments */ + if (pkcs7 == NULL || pkcs7Digest == NULL || + pkcs7DigestSz == NULL || plainDigest == NULL) { + return BAD_FUNC_ARG; + } + + hashType = wc_OidGetHash(pkcs7->hashOID); + ret = wc_HashGetDigestSize(hashType); + if (ret < 0) + return ret; + hashSz = ret; + + if (signedAttribSz > 0) { + if (signedAttrib == NULL) + return BAD_FUNC_ARG; + } + else { + if (hashBuf && hashBufSz > 0) { + if (hashSz != hashBufSz) + return BAD_FUNC_ARG; + } + else if (pkcs7->content == NULL) + return BAD_FUNC_ARG; + } + +#ifdef WOLFSSL_SMALL_STACK + digestInfo = (byte*)XMALLOC(MAX_PKCS7_DIGEST_SZ, pkcs7->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (digestInfo == NULL) + return MEMORY_E; +#endif + + XMEMSET(pkcs7Digest, 0, *pkcs7DigestSz); + XMEMSET(digest, 0, WC_MAX_DIGEST_SIZE); + XMEMSET(digestInfo, 0, MAX_PKCS7_DIGEST_SZ); + + + /* calculate digest */ + if (hashBuf && hashBufSz > 0 && signedAttribSz == 0) { + XMEMCPY(digest, hashBuf, hashBufSz); + } + else { + ret = wc_HashInit(&hash, hashType); + if (ret < 0) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(digestInfo, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return ret; + } + + if (signedAttribSz > 0) { + attribSetSz = SetSet(signedAttribSz, attribSet); + + /* calculate digest */ + ret = wc_HashUpdate(&hash, hashType, attribSet, attribSetSz); + if (ret == 0) + ret = wc_HashUpdate(&hash, hashType, signedAttrib, signedAttribSz); + if (ret == 0) + ret = wc_HashFinal(&hash, hashType, digest); + } else { + ret = wc_HashUpdate(&hash, hashType, pkcs7->content, pkcs7->contentSz); + if (ret == 0) + ret = wc_HashFinal(&hash, hashType, digest); + } + + wc_HashFree(&hash, hashType); + if (ret < 0) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(digestInfo, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return ret; + } + } + + /* Set algoID, with NULL attributes */ + algoIdSz = SetAlgoID(pkcs7->hashOID, algoId, oidHashType, 0); + + digestStrSz = SetOctetString(hashSz, digestStr); + digestInfoSeqSz = SetSequence(algoIdSz + digestStrSz + hashSz, + digestInfoSeq); + + XMEMCPY(digestInfo + digIdx, digestInfoSeq, digestInfoSeqSz); + digIdx += digestInfoSeqSz; + XMEMCPY(digestInfo + digIdx, algoId, algoIdSz); + digIdx += algoIdSz; + XMEMCPY(digestInfo + digIdx, digestStr, digestStrSz); + digIdx += digestStrSz; + XMEMCPY(digestInfo + digIdx, digest, hashSz); + digIdx += hashSz; + + XMEMCPY(pkcs7Digest, digestInfo, digIdx); + *pkcs7DigestSz = digIdx; + + /* set plain digest pointer */ + *plainDigest = pkcs7Digest + digIdx - hashSz; + *plainDigestSz = hashSz; + +#ifdef WOLFSSL_SMALL_STACK + XFREE(digestInfo, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return 0; +} + + +/* Verifies CMS/PKCS7 SignedData content digest matches that which is + * included in the messageDigest signed attribute. Only called when + * signed attributes are present, otherwise original signature verification + * is done over content. + * + * pkcs7 - pointer to initialized PKCS7 struct + * hashBuf - pointer to user-provided hash buffer, used with + * wc_PKCS7_VerifySignedData_ex() + * hashBufSz - size of hashBuf, octets + * + * return 0 on success, negative on error */ +static int wc_PKCS7_VerifyContentMessageDigest(PKCS7* pkcs7, + const byte* hashBuf, + word32 hashSz) +{ + int ret = 0, digestSz = 0, innerAttribSz = 0; + word32 idx = 0; + byte* digestBuf = NULL; +#ifdef WOLFSSL_SMALL_STACK + byte* digest = NULL; +#else + byte digest[MAX_PKCS7_DIGEST_SZ]; +#endif + PKCS7DecodedAttrib* attrib; + enum wc_HashType hashType; + + /* messageDigest OID (1.2.840.113549.1.9.4) */ + const byte mdOid[] = + { 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x09, 0x04 }; + + if (pkcs7 == NULL) + return BAD_FUNC_ARG; + + if ((pkcs7->content == NULL || pkcs7->contentSz == 0) && + (hashBuf == NULL || hashSz == 0)) { + WOLFSSL_MSG("SignedData bundle has no content or hash to verify"); + return BAD_FUNC_ARG; + } + + /* lookup messageDigest attribute */ + attrib = findAttrib(pkcs7, mdOid, sizeof(mdOid)); + if (attrib == NULL) { + WOLFSSL_MSG("messageDigest attribute not in bundle, must be when " + "signed attribs are present"); + return ASN_PARSE_E; + } + + /* advance past attrib->value ASN.1 header and length */ + if (attrib->value == NULL || attrib->valueSz == 0) + return ASN_PARSE_E; + + if (attrib->value[idx++] != ASN_OCTET_STRING) + return ASN_PARSE_E; + + if (GetLength(attrib->value, &idx, &innerAttribSz, attrib->valueSz) < 0) + return ASN_PARSE_E; + + /* get hash type and size */ + hashType = wc_OidGetHash(pkcs7->hashOID); + if (hashType == WC_HASH_TYPE_NONE) { + WOLFSSL_MSG("Error getting hash type for PKCS7 content verification"); + return BAD_FUNC_ARG; + } + + /* build content hash if needed, or use existing hash value */ + if (hashBuf == NULL) { + +#ifdef WOLFSSL_SMALL_STACK + digest = (byte*)XMALLOC(MAX_PKCS7_DIGEST_SZ, pkcs7->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (digest == NULL) + return MEMORY_E; +#endif + XMEMSET(digest, 0, MAX_PKCS7_DIGEST_SZ); + + ret = wc_Hash(hashType, pkcs7->content, pkcs7->contentSz, digest, + MAX_PKCS7_DIGEST_SZ); + if (ret < 0) { + WOLFSSL_MSG("Error hashing PKCS7 content for verification"); +#ifdef WOLFSSL_SMALL_STACK + XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return ret; + } + + digestBuf = digest; + digestSz = wc_HashGetDigestSize(hashType); + if (digestSz < 0) { + WOLFSSL_MSG("Invalid hash type"); +#ifdef WOLFSSL_SMALL_STACK + XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return digestSz; + } + } else { + + /* user passed in pre-computed hash */ + digestBuf = (byte*)hashBuf; + digestSz = (int)hashSz; + } + + /* compare generated to hash in messageDigest attribute */ + if ((innerAttribSz != digestSz) || + (XMEMCMP(attrib->value + idx, digestBuf, (word32)digestSz) != 0)) { + WOLFSSL_MSG("Content digest does not match messageDigest attrib value"); +#ifdef WOLFSSL_SMALL_STACK + XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return SIG_VERIFY_E; + } + + if (hashBuf == NULL) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + } + + return 0; +} + + +/* verifies SignedData signature, over either PKCS#7 DigestInfo or + * content digest. + * + * pkcs7 - pointer to initialized PKCS7 struct + * sig - signature to verify + * sigSz - size of sig + * signedAttrib - signed attributes, or null if empty + * signedAttribSz - size of signedAttributes + * + * return 0 on success, negative on error */ +static int wc_PKCS7_SignedDataVerifySignature(PKCS7* pkcs7, byte* sig, + word32 sigSz, byte* signedAttrib, + word32 signedAttribSz, + const byte* hashBuf, word32 hashSz) +{ + int ret = 0; + word32 plainDigestSz = 0, pkcs7DigestSz; + byte* plainDigest = NULL; /* offset into pkcs7Digest */ +#ifdef WOLFSSL_SMALL_STACK + byte* pkcs7Digest; +#else + byte pkcs7Digest[MAX_PKCS7_DIGEST_SZ]; +#endif + + if (pkcs7 == NULL) + return BAD_FUNC_ARG; + + /* allocate space to build hash */ + pkcs7DigestSz = MAX_PKCS7_DIGEST_SZ; +#ifdef WOLFSSL_SMALL_STACK + pkcs7Digest = (byte*)XMALLOC(pkcs7DigestSz, pkcs7->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (pkcs7Digest == NULL) + return MEMORY_E; +#endif + + XMEMSET(pkcs7Digest, 0, pkcs7DigestSz); + + /* verify signed attrib digest matches that of content */ + if (signedAttrib != NULL) { + ret = wc_PKCS7_VerifyContentMessageDigest(pkcs7, hashBuf, hashSz); + if (ret != 0) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(pkcs7Digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return ret; + } + } + + /* build hash to verify against */ + ret = wc_PKCS7_BuildSignedDataDigest(pkcs7, signedAttrib, + signedAttribSz, pkcs7Digest, + &pkcs7DigestSz, &plainDigest, + &plainDigestSz, hashBuf, hashSz); + if (ret < 0) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(pkcs7Digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return ret; + } + + /* If no certificates are available then store the signature and hash for + * user to verify. Make sure that different return value than success is + * returned because the signature was not verified here. */ + if (ret == 0) { + byte haveCert = 0; + int i; + + for (i = 0; i < MAX_PKCS7_CERTS; i++) { + if (pkcs7->certSz[i] == 0) + continue; + haveCert = 1; + } + + if (!haveCert) { + WOLFSSL_MSG("No certificates in bundle to verify signature"); + + /* store signature */ + XFREE(pkcs7->signature, pkcs7->heap, DYNAMIC_TYPE_SIGNATURE); + pkcs7->signature = NULL; + pkcs7->signatureSz = 0; + pkcs7->signature = (byte*)XMALLOC(sigSz, pkcs7->heap, + DYNAMIC_TYPE_SIGNATURE); + if (pkcs7->signature == NULL) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(pkcs7Digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return MEMORY_E; + } + XMEMCPY(pkcs7->signature, sig, sigSz); + pkcs7->signatureSz = sigSz; + + /* store plain digest (CMS and ECC) */ + XFREE(pkcs7->plainDigest, pkcs7->heap, DYNAMIC_TYPE_DIGEST); + pkcs7->plainDigest = NULL; + pkcs7->plainDigestSz = 0; + pkcs7->plainDigest = (byte*)XMALLOC(plainDigestSz, pkcs7->heap, + DYNAMIC_TYPE_DIGEST); + if (pkcs7->plainDigest == NULL) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(pkcs7Digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return MEMORY_E; + } + XMEMCPY(pkcs7->plainDigest, plainDigest, plainDigestSz); + pkcs7->plainDigestSz = plainDigestSz; + + /* store pkcs7 digest (default RSA) */ + XFREE(pkcs7->pkcs7Digest, pkcs7->heap, DYNAMIC_TYPE_DIGEST); + pkcs7->pkcs7Digest = NULL; + pkcs7->pkcs7DigestSz = 0; + pkcs7->pkcs7Digest = (byte*)XMALLOC(pkcs7DigestSz, pkcs7->heap, + DYNAMIC_TYPE_DIGEST); + if (pkcs7->pkcs7Digest == NULL) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(pkcs7Digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return MEMORY_E; + } + XMEMCPY(pkcs7->pkcs7Digest, pkcs7Digest, pkcs7DigestSz); + pkcs7->pkcs7DigestSz = pkcs7DigestSz; + + #ifdef WOLFSSL_SMALL_STACK + XFREE(pkcs7Digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return PKCS7_SIGNEEDS_CHECK; + } + } + + + + switch (pkcs7->publicKeyOID) { + +#ifndef NO_RSA + case RSAk: + ret = wc_PKCS7_RsaVerify(pkcs7, sig, sigSz, pkcs7Digest, + pkcs7DigestSz); + if (ret < 0) { + WOLFSSL_MSG("PKCS#7 verification failed, trying CMS"); + ret = wc_PKCS7_RsaVerify(pkcs7, sig, sigSz, plainDigest, + plainDigestSz); + } + break; +#endif + +#ifdef HAVE_ECC + case ECDSAk: + ret = wc_PKCS7_EcdsaVerify(pkcs7, sig, sigSz, plainDigest, + plainDigestSz); + break; +#endif + + default: + WOLFSSL_MSG("Unsupported public key type"); + ret = BAD_FUNC_ARG; + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(pkcs7Digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return ret; +} + + +/* set correct public key OID based on signature OID, stores in + * pkcs7->publicKeyOID and returns same value */ +static int wc_PKCS7_SetPublicKeyOID(PKCS7* pkcs7, int sigOID) +{ + if (pkcs7 == NULL) + return BAD_FUNC_ARG; + + pkcs7->publicKeyOID = 0; + + switch (sigOID) { + + #ifndef NO_RSA + /* RSA signature types */ + case CTC_MD2wRSA: + case CTC_MD5wRSA: + case CTC_SHAwRSA: + case CTC_SHA224wRSA: + case CTC_SHA256wRSA: + case CTC_SHA384wRSA: + case CTC_SHA512wRSA: + pkcs7->publicKeyOID = RSAk; + break; + + /* if sigOID is already RSAk */ + case RSAk: + pkcs7->publicKeyOID = sigOID; + break; + #endif + + #ifndef NO_DSA + /* DSA signature types */ + case CTC_SHAwDSA: + pkcs7->publicKeyOID = DSAk; + break; + + /* if sigOID is already DSAk */ + case DSAk: + pkcs7->publicKeyOID = sigOID; + break; + #endif + + #ifdef HAVE_ECC + /* ECDSA signature types */ + case CTC_SHAwECDSA: + case CTC_SHA224wECDSA: + case CTC_SHA256wECDSA: + case CTC_SHA384wECDSA: + case CTC_SHA512wECDSA: + pkcs7->publicKeyOID = ECDSAk; + break; + + /* if sigOID is already ECDSAk */ + case ECDSAk: + pkcs7->publicKeyOID = sigOID; + break; + #endif + + default: + WOLFSSL_MSG("Unsupported public key algorithm"); + return ASN_SIG_KEY_E; + } + + return pkcs7->publicKeyOID; +} + + +/* Parses through the attributes and adds them to the PKCS7 structure + * Creates dynamic attribute structures that are free'd with calling + * wc_PKCS7_Free() + * + * NOTE: An attribute has the ASN1 format of + ** Sequence + ****** Object ID + ****** Set + ********** {PritnableString, UTCTime, OCTET STRING ...} + * + * pkcs7 the PKCS7 structure to put the parsed attributes into + * in buffer holding all attributes + * inSz size of in buffer + * + * returns the number of attributes parsed on success + */ +static int wc_PKCS7_ParseAttribs(PKCS7* pkcs7, byte* in, int inSz) +{ + int found = 0; + word32 idx = 0; + word32 oid; + + if (pkcs7 == NULL || in == NULL || inSz < 0) { + return BAD_FUNC_ARG; + } + + while (idx < (word32)inSz) { + int length = 0; + int oidIdx; + PKCS7DecodedAttrib* attrib; + + if (GetSequence(in, &idx, &length, inSz) < 0) + return ASN_PARSE_E; + + attrib = (PKCS7DecodedAttrib*)XMALLOC(sizeof(PKCS7DecodedAttrib), + pkcs7->heap, DYNAMIC_TYPE_PKCS7); + if (attrib == NULL) { + return MEMORY_E; + } + XMEMSET(attrib, 0, sizeof(PKCS7DecodedAttrib)); + + oidIdx = idx; + if (GetObjectId(in, &idx, &oid, oidIgnoreType, inSz) + < 0) { + XFREE(attrib, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ASN_PARSE_E; + } + attrib->oidSz = idx - oidIdx; + attrib->oid = (byte*)XMALLOC(attrib->oidSz, pkcs7->heap, + DYNAMIC_TYPE_PKCS7); + if (attrib->oid == NULL) { + XFREE(attrib, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return MEMORY_E; + } + XMEMCPY(attrib->oid, in + oidIdx, attrib->oidSz); + + /* Get Set that contains the printable string value */ + if (GetSet(in, &idx, &length, inSz) < 0) { + XFREE(attrib->oid, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(attrib, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ASN_PARSE_E; + } + + if ((inSz - idx) < (word32)length) { + XFREE(attrib->oid, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(attrib, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ASN_PARSE_E; + } + + attrib->valueSz = (word32)length; + attrib->value = (byte*)XMALLOC(attrib->valueSz, pkcs7->heap, + DYNAMIC_TYPE_PKCS7); + if (attrib->value == NULL) { + XFREE(attrib->oid, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(attrib, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return MEMORY_E; + } + XMEMCPY(attrib->value, in + idx, attrib->valueSz); + idx += length; + + /* store attribute in linked list */ + if (pkcs7->decodedAttrib != NULL) { + attrib->next = pkcs7->decodedAttrib; + pkcs7->decodedAttrib = attrib; + } else { + pkcs7->decodedAttrib = attrib; + } + found++; + } + + return found; +} + + +/* option to turn off support for degenerate cases + * flag 0 turns off support + * flag 1 turns on support + * + * by default support for SignedData degenerate cases is on + */ +void wc_PKCS7_AllowDegenerate(PKCS7* pkcs7, word16 flag) +{ + if (pkcs7) { + if (flag) { /* flag of 1 turns on support for degenerate */ + pkcs7->noDegenerate = 0; + } + else { /* flag of 0 turns off support */ + pkcs7->noDegenerate = 1; + } + } +} + +/* Parses through a signerInfo set. Reads buffer "in" from "idxIn" to "idxIn" + + * length treating the current "idxIn" plus the length of set as max possible + * index. + * + * In the case that signed attributes are found "signedAttrib" gets set to point + * at their location in the buffer "in". Also in this case signedAttribSz gets + * set to the size of the signedAttrib buffer. + * + * returns 0 on success + */ +static int wc_PKCS7_ParseSignerInfo(PKCS7* pkcs7, byte* in, word32 inSz, + word32* idxIn, int degenerate, byte** signedAttrib, int* signedAttribSz) +{ + int ret = 0; + int length; + int version; + word32 sigOID = 0, hashOID = 0; + word32 idx = *idxIn, localIdx; + byte tag; + + WOLFSSL_ENTER("wc_PKCS7_ParseSignerInfo"); + /* require a signer if degenerate case not allowed */ + if (inSz == 0 && pkcs7->noDegenerate == 1) { + WOLFSSL_MSG("Set to not allow degenerate cases"); + return PKCS7_NO_SIGNER_E; + } + + if (inSz == 0 && degenerate == 0) { + WOLFSSL_MSG("PKCS7 signers expected"); + return PKCS7_NO_SIGNER_E; + } + + /* not a degenerate case and there is elements in the set */ + if (inSz > 0 && degenerate == 0) { + ret = wc_PKCS7_SignerInfoNew(pkcs7); + + /* Get the sequence of the first signerInfo */ + if (ret == 0 && GetSequence(in, &idx, &length, inSz) < 0) + ret = ASN_PARSE_E; + + /* Get the version */ + if (ret == 0 && GetMyVersion(in, &idx, &version, inSz) < 0) + ret = ASN_PARSE_E; + + if (ret == 0) { + pkcs7->signerInfo->version = version; + } + + if (ret == 0 && version == 1) { + /* Get the sequence of IssuerAndSerialNumber */ + if (GetSequence(in, &idx, &length, inSz) < 0) + ret = ASN_PARSE_E; + + if (ret == 0) { + ret = wc_PKCS7_SignerInfoSetSID(pkcs7, in + idx, length); + idx += length; + } + + } else if (ret == 0 && version == 3) { + /* Get the sequence of SubjectKeyIdentifier */ + if (idx + 1 > inSz) + ret = BUFFER_E; + + localIdx = idx; + if (ret == 0 && GetASNTag(in, &localIdx, &tag, inSz) == 0 && + tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)) { + idx++; + + if (GetLength(in, &idx, &length, inSz) <= 0) + ret = ASN_PARSE_E; + + if (ret == 0 && idx + 1 > inSz) + ret = BUFFER_E; + + if (ret == 0 && GetASNTag(in, &idx, &tag, inSz) < 0) + ret = ASN_PARSE_E; + + if (ret == 0 && tag != ASN_OCTET_STRING) + ret = ASN_PARSE_E; + + if (ret == 0 && GetLength(in, &idx, &length, inSz) < 0) + ret = ASN_PARSE_E; + } + else { + /* check if SKID with ASN_CONTEXT_SPECIFIC otherwise in version + * 3 try to get issuerAndSerial */ + localIdx = idx; + if (GetASNTag(in, &localIdx, &tag, inSz) == 0 && + tag == ASN_CONTEXT_SPECIFIC) { + idx++; + if (ret == 0 && GetLength(in, &idx, &length, inSz) < 0) + ret = ASN_PARSE_E; + } + else { + if (pkcs7->version != 3) { + WOLFSSL_MSG("Unexpected signer info found with version"); + ret = ASN_PARSE_E; + } + + if (ret == 0 && GetSequence(in, &idx, &length, inSz) < 0) + ret = ASN_PARSE_E; + } + } + + if (ret == 0) { + ret = wc_PKCS7_SignerInfoSetSID(pkcs7, in + idx, length); + idx += length; + } + + } else { + WOLFSSL_MSG("PKCS#7 signerInfo version must be 1 or 3"); + ret = ASN_VERSION_E; + } + + /* Get the sequence of digestAlgorithm */ + if (ret == 0 && GetAlgoId(in, &idx, &hashOID, oidHashType, inSz) < 0) { + ret = ASN_PARSE_E; + } + pkcs7->hashOID = (int)hashOID; + + /* Get the IMPLICIT[0] SET OF signedAttributes */ + localIdx = idx; + if (ret == 0 && GetASNTag(in, &localIdx, &tag, inSz) == 0 && + tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)) { + idx++; + + if (GetLength(in, &idx, &length, inSz) < 0) + ret = ASN_PARSE_E; + + /* save pointer and length */ + *signedAttrib = &in[idx]; + *signedAttribSz = length; + + if (ret == 0 && wc_PKCS7_ParseAttribs(pkcs7, *signedAttrib, + *signedAttribSz) < 0) { + WOLFSSL_MSG("Error parsing signed attributes"); + ret = ASN_PARSE_E; + } + + idx += length; + } + + /* Get digestEncryptionAlgorithm */ + if (ret == 0 && GetAlgoId(in, &idx, &sigOID, oidSigType, inSz) < 0) { + ret = ASN_PARSE_E; + } + + /* store public key type based on digestEncryptionAlgorithm */ + if (ret == 0) { + ret = wc_PKCS7_SetPublicKeyOID(pkcs7, sigOID); + if (ret < 0) { + WOLFSSL_MSG("Failed to set public key OID from signature"); + } + else { + /* if previous return was positive then was success */ + ret = 0; + } + } + } + + /* update index on success */ + if (ret == 0) { + *idxIn = idx; + } + + return ret; +} + + +/* Finds the certificates in the message and saves it. By default allows + * degenerate cases which can have no signer. + * + * By default expects type SIGNED_DATA (SignedData) which can have any number of + * elements in signerInfos collection, including zero. (RFC2315 section 9.1) + * When adding support for the case of SignedAndEnvelopedData content types a + * signer is required. In this case the PKCS7 flag noDegenerate could be set. + */ +static int PKCS7_VerifySignedData(PKCS7* pkcs7, const byte* hashBuf, + word32 hashSz, byte* in, word32 inSz, + byte* in2, word32 in2Sz) +{ + word32 idx, maxIdx = inSz, outerContentType, contentTypeSz = 0, totalSz = 0; + int length = 0, version = 0, ret = 0; + byte* content = NULL; + byte* contentDynamic = NULL; + byte* sig = NULL; + byte* cert = NULL; + byte* signedAttrib = NULL; + byte* contentType = NULL; + int contentSz = 0, sigSz = 0, certSz = 0, signedAttribSz = 0; + word32 localIdx, start; + byte degenerate = 0; + byte detached = 0; + byte tag = 0; +#ifdef ASN_BER_TO_DER + byte* der; +#endif + int multiPart = 0, keepContent; + int contentLen = 0; + + byte* pkiMsg = in; + word32 pkiMsgSz = inSz; +#ifndef NO_PKCS7_STREAM + word32 stateIdx = 0; + long rc; +#endif + + byte* pkiMsg2 = in2; + word32 pkiMsg2Sz = in2Sz; + + if (pkcs7 == NULL) + return BAD_FUNC_ARG; + +#ifndef NO_PKCS7_STREAM + /* allow for 0 size inputs with stream mode */ + if (pkiMsg == NULL && pkiMsgSz > 0) + return BAD_FUNC_ARG; + +#else + if (pkiMsg == NULL || pkiMsgSz == 0) + return BAD_FUNC_ARG; +#endif + + if ((hashSz > 0 && hashBuf == NULL) || (pkiMsg2Sz > 0 && pkiMsg2 == NULL)) { + return BAD_FUNC_ARG; + } + idx = 0; + +#ifdef ASN_BER_TO_DER + if (pkcs7->derSz > 0 && pkcs7->der) { + pkiMsg = in = pkcs7->der; + } +#endif + +#ifndef NO_PKCS7_STREAM + if (pkcs7->stream == NULL) { + if ((ret = wc_PKCS7_CreateStream(pkcs7)) != 0) { + return ret; + } + } +#endif + + switch (pkcs7->state) { + case WC_PKCS7_START: + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_SEQ_SZ + + MAX_VERSION_SZ + MAX_SEQ_SZ + MAX_LENGTH_SZ + + ASN_TAG_SZ + MAX_OID_SZ + MAX_SEQ_SZ, + &pkiMsg, &idx)) != 0) { + break; + } + + rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_SEQ_PEEK, in, inSz); + if (rc < 0) { + ret = (int)rc; + break; + } + pkiMsgSz = (pkcs7->stream->length > 0)? pkcs7->stream->length :inSz; + #endif + + /* determine total message size */ + totalSz = pkiMsgSz; + if (pkiMsg2 && pkiMsg2Sz > 0) { + totalSz += pkiMsg2Sz + pkcs7->contentSz; + } + + /* Get the contentInfo sequence */ + if (ret == 0 && GetSequence_ex(pkiMsg, &idx, &length, totalSz, + NO_USER_CHECK) < 0) + ret = ASN_PARSE_E; + + if (ret == 0 && length == 0 && pkiMsg[idx-1] == 0x80) { + #ifdef ASN_BER_TO_DER + word32 len = 0; + + ret = wc_BerToDer(pkiMsg, pkiMsgSz, NULL, &len); + if (ret != LENGTH_ONLY_E) + return ret; + pkcs7->der = (byte*)XMALLOC(len, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + if (pkcs7->der == NULL) + return MEMORY_E; + ret = wc_BerToDer(pkiMsg, pkiMsgSz, pkcs7->der, &len); + if (ret < 0) + return ret; + + pkiMsg = in = pkcs7->der; + pkiMsgSz = pkcs7->derSz = len; + idx = 0; + if (GetSequence_ex(pkiMsg, &idx, &length, pkiMsgSz, + NO_USER_CHECK) < 0) + return ASN_PARSE_E; + + #ifndef NO_PKCS7_STREAM + rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_SEQ_PEEK, + pkiMsg, pkiMsgSz); + if (rc < 0) { + ret = (int)rc; + break; + } + #endif + #else + ret = BER_INDEF_E; + #endif + } + + /* Get the contentInfo contentType */ + if (ret == 0 && wc_GetContentType(pkiMsg, &idx, &outerContentType, + pkiMsgSz) < 0) + ret = ASN_PARSE_E; + + if (ret == 0 && outerContentType != SIGNED_DATA) { + WOLFSSL_MSG("PKCS#7 input not of type SignedData"); + ret = PKCS7_OID_E; + } + + /* get the ContentInfo content */ + if (ret == 0 && GetASNTag(pkiMsg, &idx, &tag, totalSz) != 0) + ret = ASN_PARSE_E; + + if (ret == 0 && tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)) + ret = ASN_PARSE_E; + + if (ret == 0 && GetLength_ex(pkiMsg, &idx, &length, totalSz, + NO_USER_CHECK) < 0) + ret = ASN_PARSE_E; + + /* Get the signedData sequence */ + if (ret == 0 && GetSequence_ex(pkiMsg, &idx, &length, totalSz, + NO_USER_CHECK) < 0) + ret = ASN_PARSE_E; + + /* Get the version */ + if (ret == 0 && GetMyVersion(pkiMsg, &idx, &version, pkiMsgSz) < 0) + ret = ASN_PARSE_E; + + + /* version 1 follows RFC 2315 */ + /* version 3 follows RFC 4108 */ + if (ret == 0 && (version != 1 && version != 3)) { + WOLFSSL_MSG("PKCS#7 signedData needs to be version 1 or 3"); + ret = ASN_VERSION_E; + } + pkcs7->version = version; + + /* Get the set of DigestAlgorithmIdentifiers */ + if (ret == 0 && GetSet(pkiMsg, &idx, &length, pkiMsgSz) < 0) + ret = ASN_PARSE_E; + + /* Skip the set. */ + idx += length; + degenerate = (length == 0)? 1 : 0; + if (pkcs7->noDegenerate == 1 && degenerate == 1) { + ret = PKCS7_NO_SIGNER_E; + } + + if (ret != 0) + break; + + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &stateIdx, &idx)) != 0) { + break; + } + if (pkiMsg2 && pkiMsg2Sz > 0) { + pkcs7->stream->maxLen += pkiMsg2Sz + pkcs7->contentSz; + } + wc_PKCS7_StreamStoreVar(pkcs7, totalSz, 0, 0); + #endif + + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_VERIFY_STAGE2); + FALL_THROUGH; + + case WC_PKCS7_VERIFY_STAGE2: + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz + in2Sz, + MAX_SEQ_SZ + MAX_OID_SZ + ASN_TAG_SZ + MAX_LENGTH_SZ + + ASN_TAG_SZ + MAX_LENGTH_SZ, &pkiMsg, &idx)) != 0) { + break; + } + + wc_PKCS7_StreamGetVar(pkcs7, &totalSz, 0, 0); + if (pkcs7->stream->length > 0) + pkiMsgSz = pkcs7->stream->length; + #ifdef ASN_BER_TO_DER + else if (pkcs7->der) + pkiMsgSz = pkcs7->derSz; + #endif + else + pkiMsgSz = inSz; + + #endif + /* Get the inner ContentInfo sequence */ + if (GetSequence_ex(pkiMsg, &idx, &length, pkiMsgSz, + NO_USER_CHECK) < 0) + ret = ASN_PARSE_E; + + /* Get the inner ContentInfo contentType */ + if (ret == 0) { + word32 tmpIdx = idx; + + if (GetASNObjectId(pkiMsg, &idx, &length, pkiMsgSz) != 0) + ret = ASN_PARSE_E; + + contentType = pkiMsg + tmpIdx; + contentTypeSz = length + (idx - tmpIdx); + + idx += length; + } + + if (ret != 0) + break; + + /* Check for content info, it could be omitted when degenerate */ + localIdx = idx; + ret = 0; + if (localIdx + 1 > pkiMsgSz) { + ret = BUFFER_E; + break; + } + + if (ret == 0 && GetASNTag(pkiMsg, &localIdx, &tag, pkiMsgSz) != 0) + ret = ASN_PARSE_E; + + if (ret == 0 && tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)) + ret = ASN_PARSE_E; + + if (ret == 0 && GetLength_ex(pkiMsg, &localIdx, &length, pkiMsgSz, + NO_USER_CHECK) <= 0) + ret = ASN_PARSE_E; + + if (localIdx >= pkiMsgSz) { + ret = BUFFER_E; + } + + /* get length of content in the case that there is multiple parts */ + if (ret == 0 && GetASNTag(pkiMsg, &localIdx, &tag, pkiMsgSz) < 0) + ret = ASN_PARSE_E; + + if (ret == 0 && tag == (ASN_OCTET_STRING | ASN_CONSTRUCTED)) { + multiPart = 1; + + /* Get length of all OCTET_STRINGs. */ + if (GetLength_ex(pkiMsg, &localIdx, &contentLen, pkiMsgSz, + NO_USER_CHECK) < 0) + ret = ASN_PARSE_E; + + /* Check whether there is one OCTET_STRING inside. */ + start = localIdx; + if (localIdx >= pkiMsgSz) { + ret = BUFFER_E; + } + + if (ret == 0 && GetASNTag(pkiMsg, &localIdx, &tag, pkiMsgSz) + != 0) + ret = ASN_PARSE_E; + + if (ret == 0 && tag != ASN_OCTET_STRING) + ret = ASN_PARSE_E; + + if (ret == 0 && GetLength_ex(pkiMsg, &localIdx, &length, pkiMsgSz, + NO_USER_CHECK) < 0) + ret = ASN_PARSE_E; + + if (ret == 0) { + /* Use single OCTET_STRING directly. */ + if (localIdx - start + length == (word32)contentLen) + multiPart = 0; + localIdx = start; + } + } + + /* get length of content in case of single part */ + if (ret == 0 && !multiPart) { + if (tag != ASN_OCTET_STRING) + ret = ASN_PARSE_E; + + if (ret == 0 && GetLength_ex(pkiMsg, &localIdx, + &length, pkiMsgSz, NO_USER_CHECK) < 0) + ret = ASN_PARSE_E; + } + + /* update idx if successful */ + if (ret == 0) { + /* support using header and footer without content */ + if (pkiMsg2 && pkiMsg2Sz > 0 && hashBuf && hashSz > 0) { + localIdx = 0; + } + idx = localIdx; + } + else { + + /* if pkcs7->content and pkcs7->contentSz are set, try to + process as a detached signature */ + if (!degenerate && + (pkcs7->content != NULL && pkcs7->contentSz != 0)) { + detached = 1; + } + + if (!degenerate && !detached && ret != 0) + break; + + length = 0; /* no content to read */ + pkiMsg2 = pkiMsg; + pkiMsg2Sz = pkiMsgSz; + } + + #ifndef NO_PKCS7_STREAM + /* save detached flag value */ + pkcs7->stream->detached = detached; + + /* save contentType */ + pkcs7->stream->nonce = (byte*)XMALLOC(contentTypeSz, pkcs7->heap, + DYNAMIC_TYPE_PKCS7); + if (pkcs7->stream->nonce == NULL) { + ret = MEMORY_E; + break; + } + else { + pkcs7->stream->nonceSz = contentTypeSz; + XMEMCPY(pkcs7->stream->nonce, contentType, contentTypeSz); + } + + /* content expected? */ + if ((ret == 0 && length > 0) && + !(pkiMsg2 && pkiMsg2Sz > 0 && hashBuf && hashSz > 0)) { + pkcs7->stream->expected = length + ASN_TAG_SZ + MAX_LENGTH_SZ; + } + else { + pkcs7->stream->expected = ASN_TAG_SZ + MAX_LENGTH_SZ; + } + + if (pkcs7->stream->expected > (pkcs7->stream->maxLen - idx)) { + pkcs7->stream->expected = pkcs7->stream->maxLen - idx; + } + + if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &stateIdx, &idx)) != 0) { + break; + } + wc_PKCS7_StreamStoreVar(pkcs7, pkiMsg2Sz, localIdx, length); + + /* content length is in multiple parts */ + if (multiPart) { + pkcs7->stream->expected = contentLen + ASN_TAG_SZ; + } + pkcs7->stream->multi = multiPart; + + #endif + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_VERIFY_STAGE3); + FALL_THROUGH; + + case WC_PKCS7_VERIFY_STAGE3: + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz + in2Sz, + pkcs7->stream->expected, &pkiMsg, &idx)) != 0) { + break; + } + + rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, + pkiMsg, pkiMsgSz); + if (rc < 0) { + ret = (int)rc; + break; + } + #ifdef ASN_BER_TO_DER + if (pkcs7->derSz != 0) + pkiMsgSz = pkcs7->derSz; + else + #endif + pkiMsgSz = (word32)rc; + wc_PKCS7_StreamGetVar(pkcs7, &pkiMsg2Sz, (int*)&localIdx, &length); + + if (pkcs7->stream->length > 0) { + localIdx = 0; + } + multiPart = pkcs7->stream->multi; + detached = pkcs7->stream->detached; + maxIdx = idx + pkcs7->stream->expected; + #endif + + /* Break out before content because it can be optional in degenerate + * cases. */ + if (ret != 0 && !degenerate) + break; + + /* get parts of content */ + if (ret == 0 && multiPart) { + int i = 0; + keepContent = !(pkiMsg2 && pkiMsg2Sz > 0 && hashBuf && hashSz > 0); + + if (keepContent) { + /* Create a buffer to hold content of OCTET_STRINGs. */ + pkcs7->contentDynamic = (byte*)XMALLOC(contentLen, pkcs7->heap, + DYNAMIC_TYPE_PKCS7); + if (pkcs7->contentDynamic == NULL) + ret = MEMORY_E; + } + + start = localIdx; + /* Use the data from each OCTET_STRING. */ + while (ret == 0 && localIdx < start + contentLen) { + if (GetASNTag(pkiMsg, &localIdx, &tag, totalSz) < 0) + ret = ASN_PARSE_E; + if (ret == 0 && tag != ASN_OCTET_STRING) + ret = ASN_PARSE_E; + + if (ret == 0 && GetLength(pkiMsg, &localIdx, &length, totalSz) < 0) + ret = ASN_PARSE_E; + if (ret == 0 && length + localIdx > start + contentLen) + ret = ASN_PARSE_E; + + if (ret == 0) { + if (keepContent) { + XMEMCPY(pkcs7->contentDynamic + i, pkiMsg + localIdx, + length); + } + i += length; + localIdx += length; + } + } + localIdx = start; /* reset for sanity check, increment later */ + length = i; + } + + /* Save the inner data as the content. */ + if (ret == 0 && length > 0) { + contentSz = length; + + /* support using header and footer without content */ + if (pkiMsg2 && pkiMsg2Sz > 0 && hashBuf && hashSz > 0) { + /* Content not provided, use provided pkiMsg2 footer */ + content = NULL; + localIdx = 0; + if (contentSz != (int)pkcs7->contentSz) { + WOLFSSL_MSG("Data signed does not match contentSz provided"); + ret = BUFFER_E; + } + } + else { + if ((word32)length > pkiMsgSz - localIdx) { + ret = BUFFER_E; + } + + /* Content pointer for calculating hashes later */ + if (ret == 0 && !multiPart) { + content = &pkiMsg[localIdx]; + } + if (ret == 0 && multiPart) { + content = pkcs7->contentDynamic; + } + + if (ret == 0) { + idx += length; + + pkiMsg2 = pkiMsg; + pkiMsg2Sz = pkiMsgSz; + #ifndef NO_PKCS7_STREAM + pkcs7->stream->varOne = pkiMsg2Sz; + pkcs7->stream->flagOne = 1; + #endif + } + } + } + else { + pkiMsg2 = pkiMsg; + pkiMsg2Sz = pkiMsgSz; + #ifndef NO_PKCS7_STREAM + pkcs7->stream->varOne = pkiMsg2Sz; + pkcs7->stream->flagOne = 1; + #endif + } + + /* If getting the content info failed with non degenerate then return the + * error case. Otherwise with a degenerate it is ok if the content + * info was omitted */ + if (!degenerate && !detached && (ret != 0)) { + break; + } + else { + ret = 0; /* reset ret state on degenerate case */ + } + + #ifndef NO_PKCS7_STREAM + /* save content */ + if (detached == 1) { + /* if detached, use content from user in pkcs7 struct */ + content = pkcs7->content; + contentSz = pkcs7->contentSz; + } + + if (content != NULL) { + XFREE(pkcs7->stream->content, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + pkcs7->stream->content = (byte*)XMALLOC(contentSz, pkcs7->heap, + DYNAMIC_TYPE_PKCS7); + if (pkcs7->stream->content == NULL) { + ret = MEMORY_E; + break; + } + else { + XMEMCPY(pkcs7->stream->content, content, contentSz); + pkcs7->stream->contentSz = contentSz; + } + } + #endif /* !NO_PKCS7_STREAM */ + + /* Get the implicit[0] set of certificates */ + if (ret == 0 && idx >= pkiMsg2Sz) + ret = BUFFER_E; + + length = 0; /* set length to 0 to check if reading in any certs */ + localIdx = idx; + if (ret == 0 && GetASNTag(pkiMsg2, &localIdx, &tag, pkiMsg2Sz) == 0 + && tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)) { + idx++; + if (GetLength_ex(pkiMsg2, &idx, &length, maxIdx, NO_USER_CHECK) + < 0) + ret = ASN_PARSE_E; + } + + if (ret != 0) { + break; + } + #ifndef NO_PKCS7_STREAM + if (in2 && in2Sz > 0 && hashBuf && hashSz > 0) { + stateIdx = idx; /* case where all data was read from in2 */ + } + + if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &stateIdx, &idx)) != 0) { + break; + } + wc_PKCS7_StreamStoreVar(pkcs7, pkiMsg2Sz, 0, length); + if (length > 0) { + pkcs7->stream->expected = length; + } + else { + pkcs7->stream->expected = MAX_SEQ_SZ; + if (pkcs7->stream->expected > (pkcs7->stream->maxLen - + pkcs7->stream->totalRd) + pkcs7->stream->length) { + pkcs7->stream->expected = (pkcs7->stream->maxLen - + pkcs7->stream->totalRd) + pkcs7->stream->length; + } + } + #endif + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_VERIFY_STAGE4); + FALL_THROUGH; + + case WC_PKCS7_VERIFY_STAGE4: + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz + in2Sz, + pkcs7->stream->expected, &pkiMsg, &idx)) != 0) { + break; + } + + wc_PKCS7_StreamGetVar(pkcs7, &pkiMsg2Sz, 0, &length); + if (pkcs7->stream->flagOne) { + pkiMsg2 = pkiMsg; + } + + /* restore content */ + content = pkcs7->stream->content; + contentSz = pkcs7->stream->contentSz; + + /* restore detached flag */ + detached = pkcs7->stream->detached; + + /* store certificate if needed */ + if (length > 0 && in2Sz == 0) { + /* free tmpCert if not NULL */ + XFREE(pkcs7->stream->tmpCert, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + pkcs7->stream->tmpCert = (byte*)XMALLOC(length, + pkcs7->heap, DYNAMIC_TYPE_PKCS7); + if ((pkiMsg2 == NULL) || (pkcs7->stream->tmpCert == NULL)) { + ret = MEMORY_E; + break; + } + XMEMCPY(pkcs7->stream->tmpCert, pkiMsg2 + idx, length); + pkiMsg2 = pkcs7->stream->tmpCert; + pkiMsg2Sz = length; + idx = 0; + } + #endif + + if (length > 0) { + /* At this point, idx is at the first certificate in + * a set of certificates. There may be more than one, + * or none, or they may be a PKCS 6 extended + * certificate. We want to save the first cert if it + * is X.509. */ + + word32 certIdx = idx; + + if (length < MAX_LENGTH_SZ + ASN_TAG_SZ) + ret = BUFFER_E; + + if (ret == 0) + ret = GetASNTag(pkiMsg2, &certIdx, &tag, pkiMsg2Sz); + + if (ret == 0 && tag == (ASN_CONSTRUCTED | ASN_SEQUENCE)) { + if (GetLength(pkiMsg2, &certIdx, &certSz, pkiMsg2Sz) < 0) + ret = ASN_PARSE_E; + + cert = &pkiMsg2[idx]; + certSz += (certIdx - idx); + if (certSz > length) { + ret = BUFFER_E; + break; + } + } + #ifdef ASN_BER_TO_DER + der = pkcs7->der; + #endif + contentDynamic = pkcs7->contentDynamic; + version = pkcs7->version; + + + if (ret == 0) { + #ifndef NO_PKCS7_STREAM + PKCS7State* stream = pkcs7->stream; + #endif + /* This will reset PKCS7 structure and then set the + * certificate */ + ret = wc_PKCS7_InitWithCert(pkcs7, cert, certSz); + #ifndef NO_PKCS7_STREAM + pkcs7->stream = stream; + #endif + } + pkcs7->contentDynamic = contentDynamic; + pkcs7->version = version; + #ifdef ASN_BER_TO_DER + pkcs7->der = der; + #endif + if (ret != 0) + break; + + /* iterate through any additional certificates */ + if (ret == 0 && MAX_PKCS7_CERTS > 0) { + int sz = 0; + int i; + + pkcs7->cert[0] = cert; + pkcs7->certSz[0] = certSz; + certIdx = idx + certSz; + + for (i = 1; i < MAX_PKCS7_CERTS && + certIdx + 1 < pkiMsg2Sz && + certIdx + 1 < (word32)length; i++) { + localIdx = certIdx; + + if (ret == 0 && GetASNTag(pkiMsg2, &certIdx, &tag, + pkiMsg2Sz) < 0) { + ret = ASN_PARSE_E; + break; + } + + if (ret == 0 && + tag == (ASN_CONSTRUCTED | ASN_SEQUENCE)) { + if (GetLength(pkiMsg2, &certIdx, &sz, + pkiMsg2Sz) < 0) { + ret = ASN_PARSE_E; + break; + } + + pkcs7->cert[i] = &pkiMsg2[localIdx]; + pkcs7->certSz[i] = sz + (certIdx - localIdx); + certIdx += sz; + } + } + } + } + idx += length; + + if (!detached) { + /* set content and size after init of PKCS7 structure */ + pkcs7->content = content; + pkcs7->contentSz = contentSz; + } + #ifndef NO_PKCS7_STREAM + else { + /* save content if detached and using streaming API */ + if (pkcs7->content != NULL) { + XFREE(pkcs7->stream->content, pkcs7->heap, + DYNAMIC_TYPE_PKCS7); + pkcs7->stream->content = (byte*)XMALLOC(pkcs7->contentSz, + pkcs7->heap, + DYNAMIC_TYPE_PKCS7); + if (pkcs7->stream->content == NULL) { + ret = MEMORY_E; + break; + } + else { + XMEMCPY(pkcs7->stream->content, pkcs7->content, + contentSz); + pkcs7->stream->contentSz = pkcs7->contentSz; + } + } + } + #endif + + if (ret != 0) { + break; + } + #ifndef NO_PKCS7_STREAM + /* factor in that recent idx was in cert buffer. If in2 buffer was + * used then don't advance idx. */ + if (length > 0 && pkcs7->stream->flagOne && + pkcs7->stream->length == 0) { + idx = stateIdx + idx; + if (idx > inSz) { + /* index is more than input size */ + ret = BUFFER_E; + break; + } + } + else { + stateIdx = idx; /* didn't read any from internal buffer */ + } + + if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &stateIdx, &idx)) != 0) { + break; + } + if (pkcs7->stream->flagOne && pkcs7->stream->length > 0) { + idx = stateIdx + idx; + } + + pkcs7->stream->expected = MAX_OID_SZ + ASN_TAG_SZ + MAX_LENGTH_SZ + + MAX_SET_SZ; + + if (pkcs7->stream->expected > (pkcs7->stream->maxLen - + pkcs7->stream->totalRd) + pkcs7->stream->length) + pkcs7->stream->expected = (pkcs7->stream->maxLen - + pkcs7->stream->totalRd) + pkcs7->stream->length; + + wc_PKCS7_StreamGetVar(pkcs7, &pkiMsg2Sz, 0, 0); + wc_PKCS7_StreamStoreVar(pkcs7, pkiMsg2Sz, 0, length); + #endif + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_VERIFY_STAGE5); + FALL_THROUGH; + + case WC_PKCS7_VERIFY_STAGE5: + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz + in2Sz, + pkcs7->stream->expected, &pkiMsg, &idx)) != 0) { + break; + } + wc_PKCS7_StreamGetVar(pkcs7, &pkiMsg2Sz, 0, &length); + if (pkcs7->stream->flagOne) { + pkiMsg2 = pkiMsg; + } + + /* restore content type */ + contentType = pkcs7->stream->nonce; + contentTypeSz = pkcs7->stream->nonceSz; + + maxIdx = idx + pkcs7->stream->expected; + if (maxIdx > pkiMsg2Sz) { + ret = BUFFER_E; + break; + } + stateIdx = idx; + #endif + + /* set contentType and size after init of PKCS7 structure */ + if (ret == 0 && wc_PKCS7_SetContentType(pkcs7, contentType, + contentTypeSz) < 0) + ret = ASN_PARSE_E; + + /* Get the implicit[1] set of crls */ + if (ret == 0 && idx >= maxIdx) + ret = BUFFER_E; + + localIdx = idx; + if (ret == 0 && GetASNTag(pkiMsg2, &localIdx, &tag, pkiMsg2Sz) == 0 + && tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1)) { + idx++; + if (GetLength(pkiMsg2, &idx, &length, pkiMsg2Sz) < 0) + ret = ASN_PARSE_E; + + /* Skip the set */ + idx += length; + } + + /* Get the set of signerInfos */ + if (ret == 0 && GetSet_ex(pkiMsg2, &idx, &length, maxIdx, + NO_USER_CHECK) < 0) + ret = ASN_PARSE_E; + + if (ret != 0) + break; + #ifndef NO_PKCS7_STREAM + if (!pkcs7->stream->flagOne) { + stateIdx = idx; /* didn't read any from internal buffer */ + } + if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &stateIdx, &idx)) != 0) { + break; + } + wc_PKCS7_StreamStoreVar(pkcs7, pkiMsg2Sz, 0, length); + + if (in2 && in2Sz > 0 && hashBuf && hashSz > 0) { + if (length > 0) { + pkcs7->stream->expected = length; + } + else { + pkcs7->stream->expected = 0; + } + } + else { + /* last state expect the reset of the buffer */ + pkcs7->stream->expected = (pkcs7->stream->maxLen - + pkcs7->stream->totalRd) + pkcs7->stream->length; + } + + #endif + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_VERIFY_STAGE6); + FALL_THROUGH; + + case WC_PKCS7_VERIFY_STAGE6: + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz + in2Sz, + pkcs7->stream->expected, &pkiMsg, &idx)) != 0) { + break; + } + + wc_PKCS7_StreamGetVar(pkcs7, &pkiMsg2Sz, 0, &length); + if (pkcs7->stream->flagOne) { + pkiMsg2 = pkiMsg; + } + + /* restore content */ + content = pkcs7->stream->content; + contentSz = pkcs7->stream->contentSz; + #endif + + ret = wc_PKCS7_ParseSignerInfo(pkcs7, pkiMsg2, pkiMsg2Sz, &idx, + degenerate, &signedAttrib, &signedAttribSz); + + /* parse out the signature if present and verify it */ + if (ret == 0 && length > 0 && degenerate == 0) { + WOLFSSL_MSG("Parsing signature and verifying"); + if (idx >= pkiMsg2Sz) + ret = BUFFER_E; + + /* Get the signature */ + localIdx = idx; + if (ret == 0 && GetASNTag(pkiMsg2, &localIdx, &tag, + pkiMsg2Sz) == 0 && tag == ASN_OCTET_STRING) { + idx++; + + if (GetLength(pkiMsg2, &idx, &length, pkiMsg2Sz) < 0) + ret = ASN_PARSE_E; + + /* save pointer and length */ + sig = &pkiMsg2[idx]; + sigSz = length; + + idx += length; + } + + pkcs7->content = content; + pkcs7->contentSz = contentSz; + + if (ret == 0) { + ret = wc_PKCS7_SignedDataVerifySignature(pkcs7, sig, sigSz, + signedAttrib, signedAttribSz, + hashBuf, hashSz); + } + } + + if (ret < 0) + break; + + ret = 0; /* success */ + #ifndef NO_PKCS7_STREAM + wc_PKCS7_ResetStream(pkcs7); + #endif + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_START); + break; + + default: + WOLFSSL_MSG("PKCS7 Unknown verify state"); + ret = BAD_FUNC_ARG; + } + + if (ret != 0 && ret != WC_PKCS7_WANT_READ_E) { + #ifndef NO_PKCS7_STREAM + wc_PKCS7_ResetStream(pkcs7); + #endif + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_START); + } + return ret; +} + + +/* Gets a copy of the SID parsed from signerInfo. This can be called after + * wc_PKCS7_VerifySignedData has been called. SID can be SKID in version 3 case + * or issuerAndSerialNumber. + * + * return 0 on success and LENGTH_ONLY_E if just setting "outSz" for buffer + * length needed. + */ +int wc_PKCS7_GetSignerSID(PKCS7* pkcs7, byte* out, word32* outSz) +{ + if (outSz == NULL || pkcs7 == NULL) { + return BAD_FUNC_ARG; + } + + if (pkcs7->signerInfo == NULL) { + WOLFSSL_MSG("Either the bundle had no signers or" + "wc_PKCS7_VerifySignedData needs called yet"); + return PKCS7_NO_SIGNER_E; + } + + if (pkcs7->signerInfo->sidSz == 0) { + WOLFSSL_MSG("Bundle had no signer SID set"); + return PKCS7_NO_SIGNER_E; + } + + if (out == NULL) { + *outSz = pkcs7->signerInfo->sidSz; + return LENGTH_ONLY_E; + } + + if (*outSz < pkcs7->signerInfo->sidSz) { + WOLFSSL_MSG("Buffer being passed in is not large enough for SKID"); + return BUFFER_E; + } + XMEMCPY(out, pkcs7->signerInfo->sid, pkcs7->signerInfo->sidSz); + *outSz = pkcs7->signerInfo->sidSz; + return 0; +} + + +/* variant that allows computed data hash and header/foot, + * which is useful for large data signing */ +int wc_PKCS7_VerifySignedData_ex(PKCS7* pkcs7, const byte* hashBuf, + word32 hashSz, byte* pkiMsgHead, word32 pkiMsgHeadSz, byte* pkiMsgFoot, + word32 pkiMsgFootSz) +{ + return PKCS7_VerifySignedData(pkcs7, hashBuf, hashSz, + pkiMsgHead, pkiMsgHeadSz, pkiMsgFoot, pkiMsgFootSz); +} + +int wc_PKCS7_VerifySignedData(PKCS7* pkcs7, byte* pkiMsg, word32 pkiMsgSz) +{ + return PKCS7_VerifySignedData(pkcs7, NULL, 0, pkiMsg, pkiMsgSz, NULL, 0); +} + + +/* Generate random content encryption key, store into pkcs7->cek and + * pkcs7->cekSz. + * + * pkcs7 - pointer to initialized PKCS7 structure + * len - length of key to be generated + * + * Returns 0 on success, negative upon error */ +static int PKCS7_GenerateContentEncryptionKey(PKCS7* pkcs7, word32 len) +{ + int ret; + WC_RNG rng; + byte* tmpKey; + + if (pkcs7 == NULL || len == 0) + return BAD_FUNC_ARG; + + /* if key already exists, don't need to re-generate */ + if (pkcs7->cek != NULL && pkcs7->cekSz != 0) { + + /* if key exists, but is different size, return error */ + if (pkcs7->cekSz != len) { + WOLFSSL_MSG("Random content-encryption key size is inconsistent " + "between CMS recipients"); + return WC_KEY_SIZE_E; + } + + return 0; + } + + /* allocate space for cek */ + tmpKey = (byte*)XMALLOC(len, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + if (tmpKey == NULL) + return MEMORY_E; + + XMEMSET(tmpKey, 0, len); + + ret = wc_InitRng_ex(&rng, pkcs7->heap, pkcs7->devId); + if (ret != 0) { + XFREE(tmpKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + + ret = wc_RNG_GenerateBlock(&rng, tmpKey, len); + if (ret != 0) { + wc_FreeRng(&rng); + XFREE(tmpKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + + /* store into PKCS7, memory freed during final cleanup */ + pkcs7->cek = tmpKey; + pkcs7->cekSz = len; + + wc_FreeRng(&rng); + + return 0; +} + + +/* wrap CEK (content encryption key) with KEK, 0 on success, < 0 on error */ +static int wc_PKCS7_KeyWrap(byte* cek, word32 cekSz, byte* kek, + word32 kekSz, byte* out, word32 outSz, + int keyWrapAlgo, int direction) +{ + int ret = 0; + + if (cek == NULL || kek == NULL || out == NULL) + return BAD_FUNC_ARG; + + switch (keyWrapAlgo) { +#ifndef NO_AES + #ifdef WOLFSSL_AES_128 + case AES128_WRAP: + #endif + #ifdef WOLFSSL_AES_192 + case AES192_WRAP: + #endif + #ifdef WOLFSSL_AES_256 + case AES256_WRAP: + #endif + + if (direction == AES_ENCRYPTION) { + + ret = wc_AesKeyWrap(kek, kekSz, cek, cekSz, + out, outSz, NULL); + + } else if (direction == AES_DECRYPTION) { + + ret = wc_AesKeyUnWrap(kek, kekSz, cek, cekSz, + out, outSz, NULL); + } else { + WOLFSSL_MSG("Bad key un/wrap direction"); + return BAD_FUNC_ARG; + } + + if (ret <= 0) + return ret; + break; +#endif /* NO_AES */ + + default: + WOLFSSL_MSG("Unsupported key wrap algorithm"); + return BAD_KEYWRAP_ALG_E; + }; + + (void)cekSz; + (void)kekSz; + (void)outSz; + (void)direction; + return ret; +} + + +#ifdef HAVE_ECC + +/* KARI == KeyAgreeRecipientInfo (key agreement) */ +typedef struct WC_PKCS7_KARI { + DecodedCert* decoded; /* decoded recip cert */ + void* heap; /* user heap, points to PKCS7->heap */ + int devId; /* device ID for HW based private key */ + ecc_key* recipKey; /* recip key (pub | priv) */ + ecc_key* senderKey; /* sender key (pub | priv) */ + byte* senderKeyExport; /* sender ephemeral key DER */ + byte* kek; /* key encryption key */ + byte* ukm; /* OPTIONAL user keying material */ + byte* sharedInfo; /* ECC-CMS-SharedInfo ASN.1 encoded blob */ + word32 senderKeyExportSz; /* size of sender ephemeral key DER */ + word32 kekSz; /* size of key encryption key */ + word32 ukmSz; /* size of user keying material */ + word32 sharedInfoSz; /* size of ECC-CMS-SharedInfo encoded */ + byte ukmOwner; /* do we own ukm buffer? 1:yes, 0:no */ + byte direction; /* WC_PKCS7_ENCODE | WC_PKCS7_DECODE */ + byte decodedInit : 1; /* indicates decoded was initialized */ + byte recipKeyInit : 1; /* indicates recipKey was initialized */ + byte senderKeyInit : 1; /* indicates senderKey was initialized */ +} WC_PKCS7_KARI; + + +/* allocate and create new WC_PKCS7_KARI struct, + * returns struct pointer on success, NULL on failure */ +static WC_PKCS7_KARI* wc_PKCS7_KariNew(PKCS7* pkcs7, byte direction) +{ + WC_PKCS7_KARI* kari = NULL; + + if (pkcs7 == NULL) + return NULL; + + kari = (WC_PKCS7_KARI*)XMALLOC(sizeof(WC_PKCS7_KARI), pkcs7->heap, + DYNAMIC_TYPE_PKCS7); + if (kari == NULL) { + WOLFSSL_MSG("Failed to allocate WC_PKCS7_KARI"); + return NULL; + } + + kari->decoded = (DecodedCert*)XMALLOC(sizeof(DecodedCert), pkcs7->heap, + DYNAMIC_TYPE_PKCS7); + if (kari->decoded == NULL) { + WOLFSSL_MSG("Failed to allocate DecodedCert"); + XFREE(kari, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return NULL; + } + + kari->recipKey = (ecc_key*)XMALLOC(sizeof(ecc_key), pkcs7->heap, + DYNAMIC_TYPE_PKCS7); + if (kari->recipKey == NULL) { + WOLFSSL_MSG("Failed to allocate recipient ecc_key"); + XFREE(kari->decoded, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(kari, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return NULL; + } + + kari->senderKey = (ecc_key*)XMALLOC(sizeof(ecc_key), pkcs7->heap, + DYNAMIC_TYPE_PKCS7); + if (kari->senderKey == NULL) { + WOLFSSL_MSG("Failed to allocate sender ecc_key"); + XFREE(kari->recipKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(kari->decoded, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(kari, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return NULL; + } + + kari->senderKeyExport = NULL; + kari->senderKeyExportSz = 0; + kari->kek = NULL; + kari->kekSz = 0; + kari->ukm = NULL; + kari->ukmSz = 0; + kari->ukmOwner = 0; + kari->sharedInfo = NULL; + kari->sharedInfoSz = 0; + kari->direction = direction; + kari->decodedInit = 0; + kari->recipKeyInit = 0; + kari->senderKeyInit = 0; + + kari->heap = pkcs7->heap; + kari->devId = pkcs7->devId; + + return kari; +} + + +/* free WC_PKCS7_KARI struct, return 0 on success */ +static int wc_PKCS7_KariFree(WC_PKCS7_KARI* kari) +{ + void* heap; + + if (kari) { + heap = kari->heap; + + if (kari->decoded) { + if (kari->decodedInit) + FreeDecodedCert(kari->decoded); + XFREE(kari->decoded, heap, DYNAMIC_TYPE_PKCS7); + } + if (kari->senderKey) { + if (kari->senderKeyInit) + wc_ecc_free(kari->senderKey); + XFREE(kari->senderKey, heap, DYNAMIC_TYPE_PKCS7); + } + if (kari->recipKey) { + if (kari->recipKeyInit) + wc_ecc_free(kari->recipKey); + XFREE(kari->recipKey, heap, DYNAMIC_TYPE_PKCS7); + } + if (kari->senderKeyExport) { + ForceZero(kari->senderKeyExport, kari->senderKeyExportSz); + XFREE(kari->senderKeyExport, heap, DYNAMIC_TYPE_PKCS7); + kari->senderKeyExportSz = 0; + } + if (kari->kek) { + ForceZero(kari->kek, kari->kekSz); + XFREE(kari->kek, heap, DYNAMIC_TYPE_PKCS7); + kari->kekSz = 0; + } + if (kari->ukm) { + if (kari->ukmOwner == 1) { + XFREE(kari->ukm, heap, DYNAMIC_TYPE_PKCS7); + } + kari->ukmSz = 0; + } + if (kari->sharedInfo) { + ForceZero(kari->sharedInfo, kari->sharedInfoSz); + XFREE(kari->sharedInfo, heap, DYNAMIC_TYPE_PKCS7); + kari->sharedInfoSz = 0; + } + XFREE(kari, heap, DYNAMIC_TYPE_PKCS7); + } + + (void)heap; + + return 0; +} + + +/* parse recipient cert/key, return 0 on success, negative on error + * key/keySz only needed during decoding (WC_PKCS7_DECODE) */ +static int wc_PKCS7_KariParseRecipCert(WC_PKCS7_KARI* kari, const byte* cert, + word32 certSz, const byte* key, + word32 keySz) +{ + int ret; + word32 idx; + + if (kari == NULL || kari->decoded == NULL || + cert == NULL || certSz == 0) + return BAD_FUNC_ARG; + + /* decode certificate */ + InitDecodedCert(kari->decoded, (byte*)cert, certSz, kari->heap); + kari->decodedInit = 1; + ret = ParseCert(kari->decoded, CA_TYPE, NO_VERIFY, 0); + if (ret < 0) + return ret; + + /* only supports ECDSA for now */ + if (kari->decoded->keyOID != ECDSAk) { + WOLFSSL_MSG("CMS KARI only supports ECDSA key types"); + return BAD_FUNC_ARG; + } + + /* make sure subject key id was read from cert */ + if (kari->decoded->extSubjKeyIdSet == 0) { + WOLFSSL_MSG("Failed to read subject key ID from recipient cert"); + return BAD_FUNC_ARG; + } + + ret = wc_ecc_init_ex(kari->recipKey, kari->heap, kari->devId); + if (ret != 0) + return ret; + + kari->recipKeyInit = 1; + + /* get recip public key */ + if (kari->direction == WC_PKCS7_ENCODE) { + + idx = 0; + ret = wc_EccPublicKeyDecode(kari->decoded->publicKey, &idx, + kari->recipKey, kari->decoded->pubKeySize); + if (ret != 0) + return ret; + } + /* get recip private key */ + else if (kari->direction == WC_PKCS7_DECODE) { + if (key != NULL && keySz > 0) { + idx = 0; + ret = wc_EccPrivateKeyDecode(key, &idx, kari->recipKey, keySz); + } + else if (kari->devId == INVALID_DEVID) { + ret = BAD_FUNC_ARG; + } + if (ret != 0) + return ret; + + } else { + /* bad direction */ + return BAD_FUNC_ARG; + } + + (void)idx; + + return 0; +} + + +/* create ephemeral ECC key, places ecc_key in kari->senderKey, + * DER encoded in kari->senderKeyExport. return 0 on success, + * negative on error */ +static int wc_PKCS7_KariGenerateEphemeralKey(WC_PKCS7_KARI* kari) +{ + int ret; + WC_RNG rng; + + if (kari == NULL || kari->decoded == NULL || + kari->recipKey == NULL || kari->recipKey->dp == NULL) + return BAD_FUNC_ARG; + + kari->senderKeyExport = (byte*)XMALLOC(kari->decoded->pubKeySize, + kari->heap, DYNAMIC_TYPE_PKCS7); + if (kari->senderKeyExport == NULL) + return MEMORY_E; + + kari->senderKeyExportSz = kari->decoded->pubKeySize; + + ret = wc_ecc_init_ex(kari->senderKey, kari->heap, kari->devId); + if (ret != 0) { + XFREE(kari->senderKeyExport, kari->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + + kari->senderKeyInit = 1; + + ret = wc_InitRng_ex(&rng, kari->heap, kari->devId); + if (ret != 0) { + XFREE(kari->senderKeyExport, kari->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + + ret = wc_ecc_make_key_ex(&rng, kari->recipKey->dp->size, + kari->senderKey, kari->recipKey->dp->id); + if (ret != 0) { + XFREE(kari->senderKeyExport, kari->heap, DYNAMIC_TYPE_PKCS7); + wc_FreeRng(&rng); + return ret; + } + + wc_FreeRng(&rng); + + /* dump generated key to X.963 DER for output in CMS bundle */ + ret = wc_ecc_export_x963(kari->senderKey, kari->senderKeyExport, + &kari->senderKeyExportSz); + if (ret != 0) { + XFREE(kari->senderKeyExport, kari->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + + return 0; +} + + +/* create ASN.1 encoded ECC-CMS-SharedInfo using specified key wrap algorithm, + * place in kari->sharedInfo. returns 0 on success, negative on error */ +static int wc_PKCS7_KariGenerateSharedInfo(WC_PKCS7_KARI* kari, int keyWrapOID) +{ + int idx = 0; + int sharedInfoSeqSz = 0; + int keyInfoSz = 0; + int suppPubInfoSeqSz = 0; + int entityUInfoOctetSz = 0; + int entityUInfoExplicitSz = 0; + int kekOctetSz = 0; + int sharedInfoSz = 0; + + word32 kekBitSz = 0; + + byte sharedInfoSeq[MAX_SEQ_SZ]; + byte keyInfo[MAX_ALGO_SZ]; + byte suppPubInfoSeq[MAX_SEQ_SZ]; + byte entityUInfoOctet[MAX_OCTET_STR_SZ]; + byte entityUInfoExplicitSeq[MAX_SEQ_SZ]; + byte kekOctet[MAX_OCTET_STR_SZ]; + + if (kari == NULL) + return BAD_FUNC_ARG; + + if ((kari->ukmSz > 0) && (kari->ukm == NULL)) + return BAD_FUNC_ARG; + + /* kekOctet */ + kekOctetSz = SetOctetString(sizeof(word32), kekOctet); + sharedInfoSz += (kekOctetSz + sizeof(word32)); + + /* suppPubInfo */ + suppPubInfoSeqSz = SetImplicit(ASN_SEQUENCE, 2, + kekOctetSz + sizeof(word32), + suppPubInfoSeq); + sharedInfoSz += suppPubInfoSeqSz; + + /* optional ukm/entityInfo */ + if (kari->ukmSz > 0) { + entityUInfoOctetSz = SetOctetString(kari->ukmSz, entityUInfoOctet); + sharedInfoSz += (entityUInfoOctetSz + kari->ukmSz); + + entityUInfoExplicitSz = SetExplicit(0, entityUInfoOctetSz + + kari->ukmSz, + entityUInfoExplicitSeq); + sharedInfoSz += entityUInfoExplicitSz; + } + + /* keyInfo */ + keyInfoSz = SetAlgoID(keyWrapOID, keyInfo, oidKeyWrapType, 0); + sharedInfoSz += keyInfoSz; + + /* sharedInfo */ + sharedInfoSeqSz = SetSequence(sharedInfoSz, sharedInfoSeq); + sharedInfoSz += sharedInfoSeqSz; + + kari->sharedInfo = (byte*)XMALLOC(sharedInfoSz, kari->heap, + DYNAMIC_TYPE_PKCS7); + if (kari->sharedInfo == NULL) + return MEMORY_E; + + kari->sharedInfoSz = sharedInfoSz; + + XMEMCPY(kari->sharedInfo + idx, sharedInfoSeq, sharedInfoSeqSz); + idx += sharedInfoSeqSz; + XMEMCPY(kari->sharedInfo + idx, keyInfo, keyInfoSz); + idx += keyInfoSz; + if (kari->ukmSz > 0) { + XMEMCPY(kari->sharedInfo + idx, entityUInfoExplicitSeq, + entityUInfoExplicitSz); + idx += entityUInfoExplicitSz; + XMEMCPY(kari->sharedInfo + idx, entityUInfoOctet, entityUInfoOctetSz); + idx += entityUInfoOctetSz; + XMEMCPY(kari->sharedInfo + idx, kari->ukm, kari->ukmSz); + idx += kari->ukmSz; + } + XMEMCPY(kari->sharedInfo + idx, suppPubInfoSeq, suppPubInfoSeqSz); + idx += suppPubInfoSeqSz; + XMEMCPY(kari->sharedInfo + idx, kekOctet, kekOctetSz); + idx += kekOctetSz; + + kekBitSz = (kari->kekSz) * 8; /* convert to bits */ +#ifdef LITTLE_ENDIAN_ORDER + kekBitSz = ByteReverseWord32(kekBitSz); /* network byte order */ +#endif + XMEMCPY(kari->sharedInfo + idx, &kekBitSz, sizeof(kekBitSz)); + + return 0; +} + + +/* create key encryption key (KEK) using key wrap algorithm and key encryption + * algorithm, place in kari->kek. return 0 on success, <0 on error. */ +static int wc_PKCS7_KariGenerateKEK(WC_PKCS7_KARI* kari, + int keyWrapOID, int keyEncOID) +{ + int ret; + int kSz; + enum wc_HashType kdfType; + byte* secret; + word32 secretSz; + + if (kari == NULL || kari->recipKey == NULL || + kari->senderKey == NULL || kari->senderKey->dp == NULL) + return BAD_FUNC_ARG; + + /* get KEK size, allocate buff */ + kSz = wc_PKCS7_GetOIDKeySize(keyWrapOID); + if (kSz < 0) + return kSz; + + kari->kek = (byte*)XMALLOC(kSz, kari->heap, DYNAMIC_TYPE_PKCS7); + if (kari->kek == NULL) + return MEMORY_E; + + kari->kekSz = (word32)kSz; + + /* generate ECC-CMS-SharedInfo */ + ret = wc_PKCS7_KariGenerateSharedInfo(kari, keyWrapOID); + if (ret != 0) + return ret; + + /* generate shared secret */ + secretSz = kari->senderKey->dp->size; + secret = (byte*)XMALLOC(secretSz, kari->heap, DYNAMIC_TYPE_PKCS7); + if (secret == NULL) + return MEMORY_E; + + if (kari->direction == WC_PKCS7_ENCODE) { + + ret = wc_ecc_shared_secret(kari->senderKey, kari->recipKey, + secret, &secretSz); + + } else if (kari->direction == WC_PKCS7_DECODE) { + + ret = wc_ecc_shared_secret(kari->recipKey, kari->senderKey, + secret, &secretSz); + + } else { + /* bad direction */ + XFREE(secret, kari->heap, DYNAMIC_TYPE_PKCS7); + return BAD_FUNC_ARG; + } + + if (ret != 0) { + XFREE(secret, kari->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + + /* run through KDF */ + switch (keyEncOID) { + + #ifndef NO_SHA + case dhSinglePass_stdDH_sha1kdf_scheme: + kdfType = WC_HASH_TYPE_SHA; + break; + #endif + #ifndef WOLFSSL_SHA224 + case dhSinglePass_stdDH_sha224kdf_scheme: + kdfType = WC_HASH_TYPE_SHA224; + break; + #endif + #ifndef NO_SHA256 + case dhSinglePass_stdDH_sha256kdf_scheme: + kdfType = WC_HASH_TYPE_SHA256; + break; + #endif + #ifdef WOLFSSL_SHA384 + case dhSinglePass_stdDH_sha384kdf_scheme: + kdfType = WC_HASH_TYPE_SHA384; + break; + #endif + #ifdef WOLFSSL_SHA512 + case dhSinglePass_stdDH_sha512kdf_scheme: + kdfType = WC_HASH_TYPE_SHA512; + break; + #endif + default: + WOLFSSL_MSG("Unsupported key agreement algorithm"); + XFREE(secret, kari->heap, DYNAMIC_TYPE_PKCS7); + return BAD_FUNC_ARG; + }; + + ret = wc_X963_KDF(kdfType, secret, secretSz, kari->sharedInfo, + kari->sharedInfoSz, kari->kek, kari->kekSz); + if (ret != 0) { + XFREE(secret, kari->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + + XFREE(secret, kari->heap, DYNAMIC_TYPE_PKCS7); + + return 0; +} + + +/* Encode and add CMS EnvelopedData KARI (KeyAgreeRecipientInfo) RecipientInfo + * to CMS/PKCS#7 EnvelopedData structure. + * + * Returns 0 on success, negative upon error */ +int wc_PKCS7_AddRecipient_KARI(PKCS7* pkcs7, const byte* cert, word32 certSz, + int keyWrapOID, int keyAgreeOID, byte* ukm, + word32 ukmSz, int options) +{ + Pkcs7EncodedRecip* recip; + Pkcs7EncodedRecip* lastRecip = NULL; + WC_PKCS7_KARI* kari = NULL; + + word32 idx = 0; + word32 encryptedKeySz = MAX_ENCRYPTED_KEY_SZ; + + int ret = 0; + int keySz, direction = 0; + int blockKeySz = 0; + + /* ASN.1 layout */ + int totalSz = 0; + int kariSeqSz = 0; + byte kariSeq[MAX_SEQ_SZ]; /* IMPLICIT [1] */ + int verSz = 0; + byte ver[MAX_VERSION_SZ]; + + int origIdOrKeySeqSz = 0; + byte origIdOrKeySeq[MAX_SEQ_SZ]; /* IMPLICIT [0] */ + int origPubKeySeqSz = 0; + byte origPubKeySeq[MAX_SEQ_SZ]; /* IMPLICIT [1] */ + int origAlgIdSz = 0; + byte origAlgId[MAX_ALGO_SZ]; + int origPubKeyStrSz = 0; + byte origPubKeyStr[MAX_OCTET_STR_SZ]; + + /* optional user keying material */ + int ukmOctetSz = 0; + byte ukmOctetStr[MAX_OCTET_STR_SZ]; + int ukmExplicitSz = 0; + byte ukmExplicitSeq[MAX_SEQ_SZ]; + + int keyEncryptAlgoIdSz = 0; + byte keyEncryptAlgoId[MAX_ALGO_SZ]; + int keyWrapAlgSz = 0; + byte keyWrapAlg[MAX_ALGO_SZ]; + + int recipEncKeysSeqSz = 0; + byte recipEncKeysSeq[MAX_SEQ_SZ]; + int recipEncKeySeqSz = 0; + byte recipEncKeySeq[MAX_SEQ_SZ]; + int recipKeyIdSeqSz = 0; + byte recipKeyIdSeq[MAX_SEQ_SZ]; /* IMPLICIT [0] */ + int subjKeyIdOctetSz = 0; + byte subjKeyIdOctet[MAX_OCTET_STR_SZ]; + int encryptedKeyOctetSz = 0; + byte encryptedKeyOctet[MAX_OCTET_STR_SZ]; + +#ifdef WOLFSSL_SMALL_STACK + byte* encryptedKey; + + encryptedKey = (byte*)XMALLOC(MAX_ENCRYPTED_KEY_SZ, pkcs7->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (encryptedKey == NULL) { + return MEMORY_E; + } +#else + byte encryptedKey[MAX_ENCRYPTED_KEY_SZ]; +#endif + + /* allocate and init memory for recipient */ + recip = (Pkcs7EncodedRecip*)XMALLOC(sizeof(Pkcs7EncodedRecip), pkcs7->heap, + DYNAMIC_TYPE_PKCS7); + if (recip == NULL) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return MEMORY_E; + } + XMEMSET(recip, 0, sizeof(Pkcs7EncodedRecip)); + + /* get key size for content-encryption key based on algorithm */ + blockKeySz = wc_PKCS7_GetOIDKeySize(pkcs7->encryptOID); + if (blockKeySz < 0) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return blockKeySz; + } + + /* generate random content encryption key, if needed */ + ret = PKCS7_GenerateContentEncryptionKey(pkcs7, blockKeySz); + if (ret < 0) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + + /* set direction based on keyWrapAlgo */ + switch (keyWrapOID) { +#ifndef NO_AES + #ifdef WOLFSSL_AES_128 + case AES128_WRAP: + #endif + #ifdef WOLFSSL_AES_192 + case AES192_WRAP: + #endif + #ifdef WOLFSSL_AES_256 + case AES256_WRAP: + #endif + direction = AES_ENCRYPTION; + break; +#endif + default: + WOLFSSL_MSG("Unsupported key wrap algorithm"); +#ifdef WOLFSSL_SMALL_STACK + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return BAD_KEYWRAP_ALG_E; + } + + kari = wc_PKCS7_KariNew(pkcs7, WC_PKCS7_ENCODE); + if (kari == NULL) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return MEMORY_E; + } + + /* set user keying material if available */ + if (ukmSz > 0 && ukm != NULL) { + kari->ukm = ukm; + kari->ukmSz = ukmSz; + kari->ukmOwner = 0; + } + + /* parse recipient cert, get public key */ + ret = wc_PKCS7_KariParseRecipCert(kari, cert, certSz, NULL, 0); + if (ret != 0) { + wc_PKCS7_KariFree(kari); +#ifdef WOLFSSL_SMALL_STACK + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + + /* generate sender ephemeral ECC key */ + ret = wc_PKCS7_KariGenerateEphemeralKey(kari); + if (ret != 0) { + wc_PKCS7_KariFree(kari); +#ifdef WOLFSSL_SMALL_STACK + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + + /* generate KEK (key encryption key) */ + ret = wc_PKCS7_KariGenerateKEK(kari, keyWrapOID, keyAgreeOID); + if (ret != 0) { + wc_PKCS7_KariFree(kari); +#ifdef WOLFSSL_SMALL_STACK + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + + /* encrypt CEK with KEK */ + keySz = wc_PKCS7_KeyWrap(pkcs7->cek, pkcs7->cekSz, kari->kek, + kari->kekSz, encryptedKey, encryptedKeySz, + keyWrapOID, direction); + if (keySz <= 0) { + wc_PKCS7_KariFree(kari); +#ifdef WOLFSSL_SMALL_STACK + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return keySz; + } + encryptedKeySz = (word32)keySz; + + /* Start of RecipientEncryptedKeys */ + + /* EncryptedKey */ + encryptedKeyOctetSz = SetOctetString(encryptedKeySz, encryptedKeyOctet); + totalSz += (encryptedKeyOctetSz + encryptedKeySz); + + /* SubjectKeyIdentifier */ + subjKeyIdOctetSz = SetOctetString(KEYID_SIZE, subjKeyIdOctet); + totalSz += (subjKeyIdOctetSz + KEYID_SIZE); + + /* RecipientKeyIdentifier IMPLICIT [0] */ + recipKeyIdSeqSz = SetImplicit(ASN_SEQUENCE, 0, subjKeyIdOctetSz + + KEYID_SIZE, recipKeyIdSeq); + totalSz += recipKeyIdSeqSz; + + /* RecipientEncryptedKey */ + recipEncKeySeqSz = SetSequence(totalSz, recipEncKeySeq); + totalSz += recipEncKeySeqSz; + + /* RecipientEncryptedKeys */ + recipEncKeysSeqSz = SetSequence(totalSz, recipEncKeysSeq); + totalSz += recipEncKeysSeqSz; + + /* Start of optional UserKeyingMaterial */ + + if (kari->ukmSz > 0) { + ukmOctetSz = SetOctetString(kari->ukmSz, ukmOctetStr); + totalSz += (ukmOctetSz + kari->ukmSz); + + ukmExplicitSz = SetExplicit(1, ukmOctetSz + kari->ukmSz, + ukmExplicitSeq); + totalSz += ukmExplicitSz; + } + + /* Start of KeyEncryptionAlgorithmIdentifier */ + + /* KeyWrapAlgorithm */ + keyWrapAlgSz = SetAlgoID(keyWrapOID, keyWrapAlg, oidKeyWrapType, 0); + totalSz += keyWrapAlgSz; + + /* KeyEncryptionAlgorithmIdentifier */ + keyEncryptAlgoIdSz = SetAlgoID(keyAgreeOID, keyEncryptAlgoId, + oidCmsKeyAgreeType, keyWrapAlgSz); + totalSz += keyEncryptAlgoIdSz; + + /* Start of OriginatorIdentifierOrKey */ + + /* recipient ECPoint, public key */ + XMEMSET(origPubKeyStr, 0, sizeof(origPubKeyStr)); /* no unused bits */ + origPubKeyStr[0] = ASN_BIT_STRING; + origPubKeyStrSz = SetLength(kari->senderKeyExportSz + 1, + origPubKeyStr + 1) + 2; + totalSz += (origPubKeyStrSz + kari->senderKeyExportSz); + + /* Originator AlgorithmIdentifier, params set to NULL for interop + compatibility */ + origAlgIdSz = SetAlgoID(ECDSAk, origAlgId, oidKeyType, 2); + origAlgId[origAlgIdSz++] = ASN_TAG_NULL; + origAlgId[origAlgIdSz++] = 0; + totalSz += origAlgIdSz; + + /* outer OriginatorPublicKey IMPLICIT [1] */ + origPubKeySeqSz = SetImplicit(ASN_SEQUENCE, 1, + origAlgIdSz + origPubKeyStrSz + + kari->senderKeyExportSz, origPubKeySeq); + totalSz += origPubKeySeqSz; + + /* outer OriginatorIdentiferOrKey IMPLICIT [0] */ + origIdOrKeySeqSz = SetImplicit(ASN_SEQUENCE, 0, + origPubKeySeqSz + origAlgIdSz + + origPubKeyStrSz + kari->senderKeyExportSz, + origIdOrKeySeq); + totalSz += origIdOrKeySeqSz; + + /* version, always 3 */ + verSz = SetMyVersion(3, ver, 0); + totalSz += verSz; + recip->recipVersion = 3; + + /* outer IMPLICIT [1] kari */ + kariSeqSz = SetImplicit(ASN_SEQUENCE, 1, totalSz, kariSeq); + totalSz += kariSeqSz; + + if (totalSz > MAX_RECIP_SZ) { + WOLFSSL_MSG("KeyAgreeRecipientInfo output buffer too small"); + wc_PKCS7_KariFree(kari); +#ifdef WOLFSSL_SMALL_STACK + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return BUFFER_E; + } + + XMEMCPY(recip->recip + idx, kariSeq, kariSeqSz); + idx += kariSeqSz; + XMEMCPY(recip->recip + idx, ver, verSz); + idx += verSz; + + XMEMCPY(recip->recip + idx, origIdOrKeySeq, origIdOrKeySeqSz); + idx += origIdOrKeySeqSz; + XMEMCPY(recip->recip + idx, origPubKeySeq, origPubKeySeqSz); + idx += origPubKeySeqSz; + + /* AlgorithmIdentifier with NULL parameter */ + XMEMCPY(recip->recip + idx, origAlgId, origAlgIdSz); + idx += origAlgIdSz; + + XMEMCPY(recip->recip + idx, origPubKeyStr, origPubKeyStrSz); + idx += origPubKeyStrSz; + /* ephemeral public key */ + XMEMCPY(recip->recip + idx, kari->senderKeyExport, kari->senderKeyExportSz); + idx += kari->senderKeyExportSz; + + if (kari->ukmSz > 0) { + XMEMCPY(recip->recip + idx, ukmExplicitSeq, ukmExplicitSz); + idx += ukmExplicitSz; + XMEMCPY(recip->recip + idx, ukmOctetStr, ukmOctetSz); + idx += ukmOctetSz; + XMEMCPY(recip->recip + idx, kari->ukm, kari->ukmSz); + idx += kari->ukmSz; + } + + XMEMCPY(recip->recip + idx, keyEncryptAlgoId, keyEncryptAlgoIdSz); + idx += keyEncryptAlgoIdSz; + XMEMCPY(recip->recip + idx, keyWrapAlg, keyWrapAlgSz); + idx += keyWrapAlgSz; + + XMEMCPY(recip->recip + idx, recipEncKeysSeq, recipEncKeysSeqSz); + idx += recipEncKeysSeqSz; + XMEMCPY(recip->recip + idx, recipEncKeySeq, recipEncKeySeqSz); + idx += recipEncKeySeqSz; + XMEMCPY(recip->recip + idx, recipKeyIdSeq, recipKeyIdSeqSz); + idx += recipKeyIdSeqSz; + XMEMCPY(recip->recip + idx, subjKeyIdOctet, subjKeyIdOctetSz); + idx += subjKeyIdOctetSz; + /* subject key id */ + XMEMCPY(recip->recip + idx, kari->decoded->extSubjKeyId, KEYID_SIZE); + idx += KEYID_SIZE; + XMEMCPY(recip->recip + idx, encryptedKeyOctet, encryptedKeyOctetSz); + idx += encryptedKeyOctetSz; + /* encrypted CEK */ + XMEMCPY(recip->recip + idx, encryptedKey, encryptedKeySz); + idx += encryptedKeySz; + + wc_PKCS7_KariFree(kari); +#ifdef WOLFSSL_SMALL_STACK + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + /* store recipient size */ + recip->recipSz = idx; + recip->recipType = PKCS7_KARI; + + /* add recipient to recip list */ + if (pkcs7->recipList == NULL) { + pkcs7->recipList = recip; + } else { + lastRecip = pkcs7->recipList; + while (lastRecip->next != NULL) { + lastRecip = lastRecip->next; + } + lastRecip->next = recip; + } + + (void)options; + + return idx; +} + +#endif /* HAVE_ECC */ + +#ifndef NO_RSA + +/* Encode and add CMS EnvelopedData KTRI (KeyTransRecipientInfo) RecipientInfo + * to CMS/PKCS#7 EnvelopedData structure. + * + * Returns 0 on success, negative upon error */ +int wc_PKCS7_AddRecipient_KTRI(PKCS7* pkcs7, const byte* cert, word32 certSz, + int options) +{ + Pkcs7EncodedRecip* recip = NULL; + Pkcs7EncodedRecip* lastRecip = NULL; + + WC_RNG rng; + word32 idx = 0; + word32 encryptedKeySz = 0; + + int ret = 0, blockKeySz; + int verSz = 0, issuerSz = 0, snSz = 0, keyEncAlgSz = 0; + int issuerSeqSz = 0, recipSeqSz = 0, issuerSerialSeqSz = 0; + int encKeyOctetStrSz; + int sidType; + + byte ver[MAX_VERSION_SZ]; + byte issuerSerialSeq[MAX_SEQ_SZ]; + byte recipSeq[MAX_SEQ_SZ]; + byte issuerSeq[MAX_SEQ_SZ]; + byte encKeyOctetStr[MAX_OCTET_STR_SZ]; + + byte issuerSKIDSeq[MAX_SEQ_SZ]; + byte issuerSKID[MAX_OCTET_STR_SZ]; + word32 issuerSKIDSeqSz = 0, issuerSKIDSz = 0; + +#ifdef WOLFSSL_SMALL_STACK + byte* serial; + byte* keyAlgArray; + byte* encryptedKey; + RsaKey* pubKey; + DecodedCert* decoded; + + serial = (byte*)XMALLOC(MAX_SN_SZ, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + keyAlgArray = (byte*)XMALLOC(MAX_SN_SZ, pkcs7->heap, + DYNAMIC_TYPE_TMP_BUFFER); + encryptedKey = (byte*)XMALLOC(MAX_ENCRYPTED_KEY_SZ, pkcs7->heap, + DYNAMIC_TYPE_TMP_BUFFER); + decoded = (DecodedCert*)XMALLOC(sizeof(DecodedCert), pkcs7->heap, + DYNAMIC_TYPE_TMP_BUFFER); + + if (decoded == NULL || serial == NULL || + encryptedKey == NULL || keyAlgArray == NULL) { + if (serial) + XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (keyAlgArray) + XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (encryptedKey) + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (decoded) + XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + return MEMORY_E; + } +#else + byte serial[MAX_SN_SZ]; + byte keyAlgArray[MAX_ALGO_SZ]; + byte encryptedKey[MAX_ENCRYPTED_KEY_SZ]; + + RsaKey pubKey[1]; + DecodedCert decoded[1]; +#endif + + encryptedKeySz = MAX_ENCRYPTED_KEY_SZ; + XMEMSET(encryptedKey, 0, encryptedKeySz); + + /* default to IssuerAndSerialNumber if not set */ + if (pkcs7->sidType != 0) { + sidType = pkcs7->sidType; + } else { + sidType = CMS_ISSUER_AND_SERIAL_NUMBER; + } + + /* allow options to override SubjectIdentifier type if set */ + if (options & CMS_SKID) { + sidType = CMS_SKID; + } else if (options & CMS_ISSUER_AND_SERIAL_NUMBER) { + sidType = CMS_ISSUER_AND_SERIAL_NUMBER; + } + + /* allocate recipient struct */ + recip = (Pkcs7EncodedRecip*)XMALLOC(sizeof(Pkcs7EncodedRecip), pkcs7->heap, + DYNAMIC_TYPE_PKCS7); + if (recip == NULL) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return MEMORY_E; + } + XMEMSET(recip, 0, sizeof(Pkcs7EncodedRecip)); + + /* get key size for content-encryption key based on algorithm */ + blockKeySz = wc_PKCS7_GetOIDKeySize(pkcs7->encryptOID); + if (blockKeySz < 0) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return blockKeySz; + } + + /* generate random content encryption key, if needed */ + ret = PKCS7_GenerateContentEncryptionKey(pkcs7, blockKeySz); + if (ret < 0) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + + InitDecodedCert(decoded, (byte*)cert, certSz, pkcs7->heap); + ret = ParseCert(decoded, CA_TYPE, NO_VERIFY, 0); + if (ret < 0) { + FreeDecodedCert(decoded); +#ifdef WOLFSSL_SMALL_STACK + XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + + if (sidType == CMS_ISSUER_AND_SERIAL_NUMBER) { + + /* version, must be 0 for IssuerAndSerialNumber */ + verSz = SetMyVersion(0, ver, 0); + recip->recipVersion = 0; + + /* IssuerAndSerialNumber */ + if (decoded->issuerRaw == NULL || decoded->issuerRawLen == 0) { + WOLFSSL_MSG("DecodedCert lacks raw issuer pointer and length"); + FreeDecodedCert(decoded); +#ifdef WOLFSSL_SMALL_STACK + XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return -1; + } + issuerSz = decoded->issuerRawLen; + issuerSeqSz = SetSequence(issuerSz, issuerSeq); + + if (decoded->serialSz == 0) { + WOLFSSL_MSG("DecodedCert missing serial number"); + FreeDecodedCert(decoded); +#ifdef WOLFSSL_SMALL_STACK + XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return -1; + } + snSz = SetSerialNumber(decoded->serial, decoded->serialSz, serial, + MAX_SN_SZ, MAX_SN_SZ); + + issuerSerialSeqSz = SetSequence(issuerSeqSz + issuerSz + snSz, + issuerSerialSeq); + + } else if (sidType == CMS_SKID) { + + /* version, must be 2 for SubjectKeyIdentifier */ + verSz = SetMyVersion(2, ver, 0); + recip->recipVersion = 2; + + issuerSKIDSz = SetOctetString(KEYID_SIZE, issuerSKID); + issuerSKIDSeqSz = SetExplicit(0, issuerSKIDSz + KEYID_SIZE, + issuerSKIDSeq); + } else { + FreeDecodedCert(decoded); +#ifdef WOLFSSL_SMALL_STACK + XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return PKCS7_RECIP_E; + } + + pkcs7->publicKeyOID = decoded->keyOID; + + /* KeyEncryptionAlgorithmIdentifier, only support RSA now */ + if (pkcs7->publicKeyOID != RSAk) { + FreeDecodedCert(decoded); +#ifdef WOLFSSL_SMALL_STACK + XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ALGO_ID_E; + } + + keyEncAlgSz = SetAlgoID(pkcs7->publicKeyOID, keyAlgArray, oidKeyType, 0); + if (keyEncAlgSz == 0) { + FreeDecodedCert(decoded); +#ifdef WOLFSSL_SMALL_STACK + XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return BAD_FUNC_ARG; + } + +#ifdef WOLFSSL_SMALL_STACK + pubKey = (RsaKey*)XMALLOC(sizeof(RsaKey), pkcs7->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (pubKey == NULL) { + FreeDecodedCert(decoded); + XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return MEMORY_E; + } +#endif + + /* EncryptedKey */ + ret = wc_InitRsaKey_ex(pubKey, pkcs7->heap, INVALID_DEVID); + if (ret != 0) { + FreeDecodedCert(decoded); +#ifdef WOLFSSL_SMALL_STACK + XFREE(pubKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + + if (wc_RsaPublicKeyDecode(decoded->publicKey, &idx, pubKey, + decoded->pubKeySize) < 0) { + WOLFSSL_MSG("ASN RSA key decode error"); + wc_FreeRsaKey(pubKey); + FreeDecodedCert(decoded); +#ifdef WOLFSSL_SMALL_STACK + XFREE(pubKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return PUBLIC_KEY_E; + } + + ret = wc_InitRng_ex(&rng, pkcs7->heap, pkcs7->devId); + if (ret != 0) { + wc_FreeRsaKey(pubKey); + FreeDecodedCert(decoded); +#ifdef WOLFSSL_SMALL_STACK + XFREE(pubKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return MEMORY_E; + } + + + ret = wc_RsaPublicEncrypt(pkcs7->cek, pkcs7->cekSz, encryptedKey, + encryptedKeySz, pubKey, &rng); + wc_FreeRsaKey(pubKey); + wc_FreeRng(&rng); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(pubKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + if (ret < 0) { + WOLFSSL_MSG("RSA Public Encrypt failed"); + FreeDecodedCert(decoded); +#ifdef WOLFSSL_SMALL_STACK + XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + encryptedKeySz = ret; + + encKeyOctetStrSz = SetOctetString(encryptedKeySz, encKeyOctetStr); + + /* RecipientInfo */ + if (sidType == CMS_ISSUER_AND_SERIAL_NUMBER) { + recipSeqSz = SetSequence(verSz + issuerSerialSeqSz + issuerSeqSz + + issuerSz + snSz + keyEncAlgSz + + encKeyOctetStrSz + encryptedKeySz, recipSeq); + + if (recipSeqSz + verSz + issuerSerialSeqSz + issuerSeqSz + snSz + + keyEncAlgSz + encKeyOctetStrSz + encryptedKeySz > MAX_RECIP_SZ) { + WOLFSSL_MSG("RecipientInfo output buffer too small"); + FreeDecodedCert(decoded); +#ifdef WOLFSSL_SMALL_STACK + XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return BUFFER_E; + } + + } else { + recipSeqSz = SetSequence(verSz + issuerSKIDSeqSz + issuerSKIDSz + + KEYID_SIZE + keyEncAlgSz + encKeyOctetStrSz + + encryptedKeySz, recipSeq); + + if (recipSeqSz + verSz + issuerSKIDSeqSz + issuerSKIDSz + KEYID_SIZE + + keyEncAlgSz + encKeyOctetStrSz + encryptedKeySz > MAX_RECIP_SZ) { + WOLFSSL_MSG("RecipientInfo output buffer too small"); + FreeDecodedCert(decoded); +#ifdef WOLFSSL_SMALL_STACK + XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return BUFFER_E; + } + } + + idx = 0; + XMEMCPY(recip->recip + idx, recipSeq, recipSeqSz); + idx += recipSeqSz; + XMEMCPY(recip->recip + idx, ver, verSz); + idx += verSz; + if (sidType == CMS_ISSUER_AND_SERIAL_NUMBER) { + XMEMCPY(recip->recip + idx, issuerSerialSeq, issuerSerialSeqSz); + idx += issuerSerialSeqSz; + XMEMCPY(recip->recip + idx, issuerSeq, issuerSeqSz); + idx += issuerSeqSz; + XMEMCPY(recip->recip + idx, decoded->issuerRaw, issuerSz); + idx += issuerSz; + XMEMCPY(recip->recip + idx, serial, snSz); + idx += snSz; + } else { + XMEMCPY(recip->recip + idx, issuerSKIDSeq, issuerSKIDSeqSz); + idx += issuerSKIDSeqSz; + XMEMCPY(recip->recip + idx, issuerSKID, issuerSKIDSz); + idx += issuerSKIDSz; + XMEMCPY(recip->recip + idx, pkcs7->issuerSubjKeyId, KEYID_SIZE); + idx += KEYID_SIZE; + } + XMEMCPY(recip->recip + idx, keyAlgArray, keyEncAlgSz); + idx += keyEncAlgSz; + XMEMCPY(recip->recip + idx, encKeyOctetStr, encKeyOctetStrSz); + idx += encKeyOctetStrSz; + XMEMCPY(recip->recip + idx, encryptedKey, encryptedKeySz); + idx += encryptedKeySz; + + FreeDecodedCert(decoded); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + /* store recipient size */ + recip->recipSz = idx; + recip->recipType = PKCS7_KTRI; + + /* add recipient to recip list */ + if (pkcs7->recipList == NULL) { + pkcs7->recipList = recip; + } else { + lastRecip = pkcs7->recipList; + while (lastRecip->next != NULL) { + lastRecip = lastRecip->next; + } + lastRecip->next = recip; + } + + return idx; +} + +#endif /* !NO_RSA */ + + +/* encrypt content using encryptOID algo */ +static int wc_PKCS7_EncryptContent(int encryptOID, byte* key, int keySz, + byte* iv, int ivSz, byte* aad, word32 aadSz, + byte* authTag, word32 authTagSz, byte* in, + int inSz, byte* out) +{ + int ret; +#ifndef NO_AES + Aes aes; +#endif +#ifndef NO_DES3 + Des des; + Des3 des3; +#endif + + if (key == NULL || iv == NULL || in == NULL || out == NULL) + return BAD_FUNC_ARG; + + switch (encryptOID) { +#ifndef NO_AES + #ifdef WOLFSSL_AES_128 + case AES128CBCb: + #endif + #ifdef WOLFSSL_AES_192 + case AES192CBCb: + #endif + #ifdef WOLFSSL_AES_256 + case AES256CBCb: + #endif + if ( + #ifdef WOLFSSL_AES_128 + (encryptOID == AES128CBCb && keySz != 16 ) || + #endif + #ifdef WOLFSSL_AES_192 + (encryptOID == AES192CBCb && keySz != 24 ) || + #endif + #ifdef WOLFSSL_AES_256 + (encryptOID == AES256CBCb && keySz != 32 ) || + #endif + (ivSz != AES_BLOCK_SIZE) ) + return BAD_FUNC_ARG; + + ret = wc_AesInit(&aes, NULL, INVALID_DEVID); + if (ret == 0) { + ret = wc_AesSetKey(&aes, key, keySz, iv, AES_ENCRYPTION); + if (ret == 0) + ret = wc_AesCbcEncrypt(&aes, out, in, inSz); + wc_AesFree(&aes); + } + break; + #ifdef HAVE_AESGCM + #ifdef WOLFSSL_AES_128 + case AES128GCMb: + #endif + #ifdef WOLFSSL_AES_192 + case AES192GCMb: + #endif + #ifdef WOLFSSL_AES_256 + case AES256GCMb: + #endif + #if defined(WOLFSSL_AES_128) || defined(WOLFSSL_AES_192) || \ + defined(WOLFSSL_AES_256) + if (authTag == NULL) + return BAD_FUNC_ARG; + + ret = wc_AesInit(&aes, NULL, INVALID_DEVID); + if (ret == 0) { + ret = wc_AesGcmSetKey(&aes, key, keySz); + if (ret == 0) + ret = wc_AesGcmEncrypt(&aes, out, in, inSz, iv, ivSz, + authTag, authTagSz, aad, aadSz); + wc_AesFree(&aes); + } + break; + #endif + #endif /* HAVE_AESGCM */ + #ifdef HAVE_AESCCM + #ifdef WOLFSSL_AES_128 + case AES128CCMb: + #endif + #ifdef WOLFSSL_AES_192 + case AES192CCMb: + #endif + #ifdef WOLFSSL_AES_256 + case AES256CCMb: + #endif + #if defined(WOLFSSL_AES_128) || defined(WOLFSSL_AES_192) || \ + defined(WOLFSSL_AES_256) + if (authTag == NULL) + return BAD_FUNC_ARG; + + ret = wc_AesInit(&aes, NULL, INVALID_DEVID); + if (ret == 0) { + ret = wc_AesCcmSetKey(&aes, key, keySz); + if (ret == 0) + ret = wc_AesCcmEncrypt(&aes, out, in, inSz, iv, ivSz, + authTag, authTagSz, aad, aadSz); + wc_AesFree(&aes); + } + break; + #endif + #endif /* HAVE_AESCCM */ +#endif /* NO_AES */ +#ifndef NO_DES3 + case DESb: + if (keySz != DES_KEYLEN || ivSz != DES_BLOCK_SIZE) + return BAD_FUNC_ARG; + + ret = wc_Des_SetKey(&des, key, iv, DES_ENCRYPTION); + if (ret == 0) + ret = wc_Des_CbcEncrypt(&des, out, in, inSz); + + break; + + case DES3b: + if (keySz != DES3_KEYLEN || ivSz != DES_BLOCK_SIZE) + return BAD_FUNC_ARG; + + ret = wc_Des3Init(&des3, NULL, INVALID_DEVID); + if (ret == 0) { + ret = wc_Des3_SetKey(&des3, key, iv, DES_ENCRYPTION); + if (ret == 0) + ret = wc_Des3_CbcEncrypt(&des3, out, in, inSz); + wc_Des3Free(&des3); + } + break; +#endif + default: + WOLFSSL_MSG("Unsupported content cipher type"); + return ALGO_ID_E; + }; + +#if defined(NO_AES) || (!defined(HAVE_AESGCM) && !defined(HAVE_AESCCM)) + (void)authTag; + (void)authTagSz; + (void)aad; + (void)aadSz; +#endif + return ret; +} + + +/* decrypt content using encryptOID algo + * returns 0 on success */ +static int wc_PKCS7_DecryptContent(PKCS7* pkcs7, int encryptOID, byte* key, + int keySz, byte* iv, int ivSz, byte* aad, word32 aadSz, byte* authTag, + word32 authTagSz, byte* in, int inSz, byte* out) +{ + int ret; +#ifndef NO_AES + Aes aes; +#endif +#ifndef NO_DES3 + Des des; + Des3 des3; +#endif + + if (iv == NULL || in == NULL || out == NULL) + return BAD_FUNC_ARG; + + if (pkcs7->decryptionCb != NULL) { + return pkcs7->decryptionCb(pkcs7, encryptOID, iv, ivSz, + aad, aadSz, authTag, authTagSz, in, + inSz, out, pkcs7->decryptionCtx); + } + + if (key == NULL) + return BAD_FUNC_ARG; + + switch (encryptOID) { +#ifndef NO_AES + #ifdef WOLFSSL_AES_128 + case AES128CBCb: + #endif + #ifdef WOLFSSL_AES_192 + case AES192CBCb: + #endif + #ifdef WOLFSSL_AES_256 + case AES256CBCb: + #endif + if ( + #ifdef WOLFSSL_AES_128 + (encryptOID == AES128CBCb && keySz != 16 ) || + #endif + #ifdef WOLFSSL_AES_192 + (encryptOID == AES192CBCb && keySz != 24 ) || + #endif + #ifdef WOLFSSL_AES_256 + (encryptOID == AES256CBCb && keySz != 32 ) || + #endif + (ivSz != AES_BLOCK_SIZE) ) + return BAD_FUNC_ARG; + ret = wc_AesInit(&aes, NULL, INVALID_DEVID); + if (ret == 0) { + ret = wc_AesSetKey(&aes, key, keySz, iv, AES_DECRYPTION); + if (ret == 0) + ret = wc_AesCbcDecrypt(&aes, out, in, inSz); + wc_AesFree(&aes); + } + break; + #ifdef HAVE_AESGCM + #ifdef WOLFSSL_AES_128 + case AES128GCMb: + #endif + #ifdef WOLFSSL_AES_192 + case AES192GCMb: + #endif + #ifdef WOLFSSL_AES_256 + case AES256GCMb: + #endif + #if defined(WOLFSSL_AES_128) || defined(WOLFSSL_AES_192) || \ + defined(WOLFSSL_AES_256) + if (authTag == NULL) + return BAD_FUNC_ARG; + + ret = wc_AesInit(&aes, NULL, INVALID_DEVID); + if (ret == 0) { + ret = wc_AesGcmSetKey(&aes, key, keySz); + if (ret == 0) + ret = wc_AesGcmDecrypt(&aes, out, in, inSz, iv, ivSz, + authTag, authTagSz, aad, aadSz); + wc_AesFree(&aes); + } + break; + #endif + #endif /* HAVE_AESGCM */ + #ifdef HAVE_AESCCM + #ifdef WOLFSSL_AES_128 + case AES128CCMb: + #endif + #ifdef WOLFSSL_AES_192 + case AES192CCMb: + #endif + #ifdef WOLFSSL_AES_256 + case AES256CCMb: + #endif + #if defined(WOLFSSL_AES_128) || defined(WOLFSSL_AES_192) || \ + defined(WOLFSSL_AES_256) + if (authTag == NULL) + return BAD_FUNC_ARG; + + ret = wc_AesInit(&aes, NULL, INVALID_DEVID); + if (ret == 0) { + ret = wc_AesCcmSetKey(&aes, key, keySz); + if (ret == 0) + ret = wc_AesCcmDecrypt(&aes, out, in, inSz, iv, ivSz, + authTag, authTagSz, aad, aadSz); + wc_AesFree(&aes); + } + break; + #endif + #endif /* HAVE_AESCCM */ +#endif /* NO_AES */ +#ifndef NO_DES3 + case DESb: + if (keySz != DES_KEYLEN || ivSz != DES_BLOCK_SIZE) + return BAD_FUNC_ARG; + + ret = wc_Des_SetKey(&des, key, iv, DES_DECRYPTION); + if (ret == 0) + ret = wc_Des_CbcDecrypt(&des, out, in, inSz); + + break; + case DES3b: + if (keySz != DES3_KEYLEN || ivSz != DES_BLOCK_SIZE) + return BAD_FUNC_ARG; + + ret = wc_Des3Init(&des3, NULL, INVALID_DEVID); + if (ret == 0) { + ret = wc_Des3_SetKey(&des3, key, iv, DES_DECRYPTION); + if (ret == 0) + ret = wc_Des3_CbcDecrypt(&des3, out, in, inSz); + wc_Des3Free(&des3); + } + + break; +#endif + default: + WOLFSSL_MSG("Unsupported content cipher type"); + return ALGO_ID_E; + }; + +#if defined(NO_AES) || (!defined(HAVE_AESGCM) && !defined(HAVE_AESCCM)) + (void)authTag; + (void)authTagSz; + (void)aad; + (void)aadSz; +#endif + + return ret; +} + + +/* Generate random block, place in out, return 0 on success negative on error. + * Used for generation of IV, nonce, etc */ +static int wc_PKCS7_GenerateBlock(PKCS7* pkcs7, WC_RNG* rng, byte* out, + word32 outSz) +{ + int ret; + WC_RNG* rnd = NULL; + + if (out == NULL || outSz == 0) + return BAD_FUNC_ARG; + + /* input RNG is optional, init local one if input rng is NULL */ + if (rng == NULL) { + rnd = (WC_RNG*)XMALLOC(sizeof(WC_RNG), pkcs7->heap, DYNAMIC_TYPE_RNG); + if (rnd == NULL) + return MEMORY_E; + + ret = wc_InitRng_ex(rnd, pkcs7->heap, pkcs7->devId); + if (ret != 0) { + XFREE(rnd, pkcs7->heap, DYNAMIC_TYPE_RNG); + return ret; + } + + } else { + rnd = rng; + } + + ret = wc_RNG_GenerateBlock(rnd, out, outSz); + + if (rng == NULL) { + wc_FreeRng(rnd); + XFREE(rnd, pkcs7->heap, DYNAMIC_TYPE_RNG); + } + + return ret; +} + + +/* Set default SignerIdentifier type to be used. Is either + * IssuerAndSerialNumber or SubjectKeyIdentifier. Encoding defaults to using + * IssuerAndSerialNumber unless set with this function or explicitly + * overridden via options when adding RecipientInfo type. + * + * Using the type DEGENERATE_SID skips over signer information. In degenerate + * cases there are no signers. + * + * pkcs7 - pointer to initialized PKCS7 structure + * type - either CMS_ISSUER_AND_SERIAL_NUMBER, CMS_SKID or DEGENERATE_SID + * + * return 0 on success, negative upon error */ +int wc_PKCS7_SetSignerIdentifierType(PKCS7* pkcs7, int type) +{ + if (pkcs7 == NULL) + return BAD_FUNC_ARG; + + if (type != CMS_ISSUER_AND_SERIAL_NUMBER && + type != CMS_SKID && + type != DEGENERATE_SID) { + return BAD_FUNC_ARG; + } + + pkcs7->sidType = type; + + return 0; +} + + +/* Set custom contentType, currently supported with SignedData type + * + * pkcs7 - pointer to initialized PKCS7 structure + * contentType - pointer to array with ASN.1 encoded OID value + * sz - length of contentType array, octets + * + * return 0 on success, negative upon error */ +int wc_PKCS7_SetContentType(PKCS7* pkcs7, byte* contentType, word32 sz) +{ + if (pkcs7 == NULL || contentType == NULL || sz == 0) + return BAD_FUNC_ARG; + + if (sz > MAX_OID_SZ) { + WOLFSSL_MSG("input array too large, bounded by MAX_OID_SZ"); + return BAD_FUNC_ARG; + } + + XMEMCPY(pkcs7->contentType, contentType, sz); + pkcs7->contentTypeSz = sz; + + return 0; +} + + +/* return size of padded data, padded to blockSz chunks, or negative on error */ +int wc_PKCS7_GetPadSize(word32 inputSz, word32 blockSz) +{ + int padSz; + + if (blockSz == 0) + return BAD_FUNC_ARG; + + padSz = blockSz - (inputSz % blockSz); + + return padSz; +} + + +/* pad input data to blockSz chunk, place in outSz. out must be big enough + * for input + pad bytes. See wc_PKCS7_GetPadSize() helper. */ +int wc_PKCS7_PadData(byte* in, word32 inSz, byte* out, word32 outSz, + word32 blockSz) +{ + int i, padSz; + + if (in == NULL || inSz == 0 || + out == NULL || outSz == 0) + return BAD_FUNC_ARG; + + padSz = wc_PKCS7_GetPadSize(inSz, blockSz); + + if (outSz < (inSz + padSz)) + return BAD_FUNC_ARG; + + XMEMCPY(out, in, inSz); + + for (i = 0; i < padSz; i++) { + out[inSz + i] = (byte)padSz; + } + + return inSz + padSz; +} + + +/* Encode and add CMS EnvelopedData ORI (OtherRecipientInfo) RecipientInfo + * to CMS/PKCS#7 EnvelopedData structure. + * + * Return 0 on success, negative upon error */ +int wc_PKCS7_AddRecipient_ORI(PKCS7* pkcs7, CallbackOriEncrypt oriEncryptCb, + int options) +{ + int oriTypeLenSz, blockKeySz, ret; + word32 idx, recipSeqSz; + + Pkcs7EncodedRecip* recip = NULL; + Pkcs7EncodedRecip* lastRecip = NULL; + + byte recipSeq[MAX_SEQ_SZ]; + byte oriTypeLen[MAX_LENGTH_SZ]; + + byte oriType[MAX_ORI_TYPE_SZ]; + byte oriValue[MAX_ORI_VALUE_SZ]; + word32 oriTypeSz = MAX_ORI_TYPE_SZ; + word32 oriValueSz = MAX_ORI_VALUE_SZ; + + if (pkcs7 == NULL || oriEncryptCb == NULL) { + return BAD_FUNC_ARG; + } + + /* allocate memory for RecipientInfo, KEK, encrypted key */ + recip = (Pkcs7EncodedRecip*)XMALLOC(sizeof(Pkcs7EncodedRecip), + pkcs7->heap, DYNAMIC_TYPE_PKCS7); + if (recip == NULL) + return MEMORY_E; + XMEMSET(recip, 0, sizeof(Pkcs7EncodedRecip)); + + /* get key size for content-encryption key based on algorithm */ + blockKeySz = wc_PKCS7_GetOIDKeySize(pkcs7->encryptOID); + if (blockKeySz < 0) { + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return blockKeySz; + } + + /* generate random content encryption key, if needed */ + ret = PKCS7_GenerateContentEncryptionKey(pkcs7, blockKeySz); + if (ret < 0) { + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + + /* call user callback to encrypt CEK and get oriType and oriValue + values back */ + ret = oriEncryptCb(pkcs7, pkcs7->cek, pkcs7->cekSz, oriType, &oriTypeSz, + oriValue, &oriValueSz, pkcs7->oriEncryptCtx); + if (ret != 0) { + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + + oriTypeLenSz = SetLength(oriTypeSz, oriTypeLen); + + recipSeqSz = SetImplicit(ASN_SEQUENCE, 4, 1 + oriTypeLenSz + oriTypeSz + + oriValueSz, recipSeq); + + idx = 0; + XMEMCPY(recip->recip + idx, recipSeq, recipSeqSz); + idx += recipSeqSz; + /* oriType */ + recip->recip[idx] = ASN_OBJECT_ID; + idx += 1; + XMEMCPY(recip->recip + idx, oriTypeLen, oriTypeLenSz); + idx += oriTypeLenSz; + XMEMCPY(recip->recip + idx, oriType, oriTypeSz); + idx += oriTypeSz; + /* oriValue, input MUST already be ASN.1 encoded */ + XMEMCPY(recip->recip + idx, oriValue, oriValueSz); + idx += oriValueSz; + + /* store recipient size */ + recip->recipSz = idx; + recip->recipType = PKCS7_ORI; + recip->recipVersion = 4; + + /* add recipient to recip list */ + if (pkcs7->recipList == NULL) { + pkcs7->recipList = recip; + } else { + lastRecip = pkcs7->recipList; + while (lastRecip->next != NULL) { + lastRecip = lastRecip->next; + } + lastRecip->next = recip; + } + + (void)options; + + return idx; +} + +#if !defined(NO_PWDBASED) && !defined(NO_SHA) + + +static int wc_PKCS7_GenerateKEK_PWRI(PKCS7* pkcs7, byte* passwd, word32 pLen, + byte* salt, word32 saltSz, int kdfOID, + int prfOID, int iterations, byte* out, + word32 outSz) +{ + int ret; + + if (pkcs7 == NULL || passwd == NULL || salt == NULL || out == NULL) + return BAD_FUNC_ARG; + + switch (kdfOID) { + + case PBKDF2_OID: + + ret = wc_PBKDF2(out, passwd, pLen, salt, saltSz, iterations, + outSz, prfOID); + if (ret != 0) { + return ret; + } + + break; + + default: + WOLFSSL_MSG("Unsupported KDF OID"); + return PKCS7_OID_E; + } + + return 0; +} + + +/* RFC3211 (Section 2.3.1) key wrap algorithm (id-alg-PWRI-KEK). + * + * Returns output size on success, negative upon error */ +static int wc_PKCS7_PwriKek_KeyWrap(PKCS7* pkcs7, const byte* kek, word32 kekSz, + const byte* cek, word32 cekSz, + byte* out, word32 *outSz, + const byte* iv, word32 ivSz, int algID) +{ + WC_RNG rng; + int blockSz, outLen, ret; + word32 padSz; + byte* lastBlock; + + if (kek == NULL || cek == NULL || iv == NULL || outSz == NULL) + return BAD_FUNC_ARG; + + /* get encryption algorithm block size */ + blockSz = wc_PKCS7_GetOIDBlockSize(algID); + if (blockSz < 0) + return blockSz; + + /* get pad bytes needed to block boundary */ + padSz = blockSz - ((4 + cekSz) % blockSz); + outLen = 4 + cekSz + padSz; + + /* must be at least two blocks long */ + if (outLen < 2 * blockSz) + padSz += blockSz; + + /* if user set out to NULL, give back required length */ + if (out == NULL) { + *outSz = outLen; + return LENGTH_ONLY_E; + } + + /* verify output buffer is large enough */ + if (*outSz < (word32)outLen) + return BUFFER_E; + + out[0] = cekSz; + out[1] = ~cek[0]; + out[2] = ~cek[1]; + out[3] = ~cek[2]; + XMEMCPY(out + 4, cek, cekSz); + + /* random padding of size padSz */ + ret = wc_InitRng_ex(&rng, pkcs7->heap, pkcs7->devId); + if (ret != 0) + return ret; + + ret = wc_RNG_GenerateBlock(&rng, out + 4 + cekSz, padSz); + + if (ret == 0) { + /* encrypt, normal */ + ret = wc_PKCS7_EncryptContent(algID, (byte*)kek, kekSz, (byte*)iv, + ivSz, NULL, 0, NULL, 0, out, outLen, out); + } + + if (ret == 0) { + /* encrypt again, using last ciphertext block as IV */ + lastBlock = out + (((outLen / blockSz) - 1) * blockSz); + ret = wc_PKCS7_EncryptContent(algID, (byte*)kek, kekSz, lastBlock, + blockSz, NULL, 0, NULL, 0, out, + outLen, out); + } + + if (ret == 0) { + *outSz = outLen; + } else { + outLen = ret; + } + + wc_FreeRng(&rng); + + return outLen; +} + + +/* RFC3211 (Section 2.3.2) key unwrap algorithm (id-alg-PWRI-KEK). + * + * Returns cek size on success, negative upon error */ +static int wc_PKCS7_PwriKek_KeyUnWrap(PKCS7* pkcs7, const byte* kek, + word32 kekSz, const byte* in, word32 inSz, + byte* out, word32 outSz, const byte* iv, + word32 ivSz, int algID) +{ + int blockSz, cekLen, ret; + byte* tmpIv = NULL; + byte* lastBlock = NULL; + byte* outTmp = NULL; + + if (pkcs7 == NULL || kek == NULL || in == NULL || + out == NULL || iv == NULL) { + return BAD_FUNC_ARG; + } + + outTmp = (byte*)XMALLOC(inSz, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (outTmp == NULL) + return MEMORY_E; + + /* get encryption algorithm block size */ + blockSz = wc_PKCS7_GetOIDBlockSize(algID); + if (blockSz < 0) { + XFREE(outTmp, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + return blockSz; + } + + /* input needs to be blockSz multiple and at least 2 * blockSz */ + if (((inSz % blockSz) != 0) || (inSz < (2 * (word32)blockSz))) { + WOLFSSL_MSG("PWRI-KEK unwrap input must of block size and >= 2 " + "times block size"); + XFREE(outTmp, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + return BAD_FUNC_ARG; + } + + /* use block out[n-1] as IV to decrypt block out[n] */ + lastBlock = (byte*)in + inSz - blockSz; + tmpIv = lastBlock - blockSz; + + /* decrypt last block */ + ret = wc_PKCS7_DecryptContent(pkcs7, algID, (byte*)kek, kekSz, tmpIv, + blockSz, NULL, 0, NULL, 0, lastBlock, blockSz, + outTmp + inSz - blockSz); + + if (ret == 0) { + /* using last decrypted block as IV, decrypt [0 ... n-1] blocks */ + lastBlock = outTmp + inSz - blockSz; + ret = wc_PKCS7_DecryptContent(pkcs7, algID, (byte*)kek, kekSz, + lastBlock, blockSz, NULL, 0, NULL, 0, (byte*)in, inSz - blockSz, + outTmp); + } + + if (ret == 0) { + /* decrypt using original kek and iv */ + ret = wc_PKCS7_DecryptContent(pkcs7, algID, (byte*)kek, kekSz, + (byte*)iv, ivSz, NULL, 0, NULL, 0, outTmp, inSz, outTmp); + } + + if (ret != 0) { + ForceZero(outTmp, inSz); + XFREE(outTmp, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + return ret; + } + + cekLen = outTmp[0]; + + /* verify length */ + if ((word32)cekLen > inSz) { + ForceZero(outTmp, inSz); + XFREE(outTmp, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + return BAD_FUNC_ARG; + } + + /* verify check bytes */ + if ((outTmp[1] ^ outTmp[4]) != 0xFF || + (outTmp[2] ^ outTmp[5]) != 0xFF || + (outTmp[3] ^ outTmp[6]) != 0xFF) { + ForceZero(outTmp, inSz); + XFREE(outTmp, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + return BAD_FUNC_ARG; + } + + if (outSz < (word32)cekLen) { + ForceZero(outTmp, inSz); + XFREE(outTmp, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + return BUFFER_E; + } + + XMEMCPY(out, outTmp + 4, outTmp[0]); + ForceZero(outTmp, inSz); + XFREE(outTmp, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + + return cekLen; +} + + +/* Encode and add CMS EnvelopedData PWRI (PasswordRecipientInfo) RecipientInfo + * to CMS/PKCS#7 EnvelopedData structure. + * + * Return 0 on success, negative upon error */ +int wc_PKCS7_AddRecipient_PWRI(PKCS7* pkcs7, byte* passwd, word32 pLen, + byte* salt, word32 saltSz, int kdfOID, + int hashOID, int iterations, int kekEncryptOID, + int options) +{ + Pkcs7EncodedRecip* recip = NULL; + Pkcs7EncodedRecip* lastRecip = NULL; + + /* PasswordRecipientInfo */ + byte recipSeq[MAX_SEQ_SZ]; + byte ver[MAX_VERSION_SZ]; + word32 recipSeqSz, verSz; + + /* KeyDerivationAlgorithmIdentifier */ + byte kdfAlgoIdSeq[MAX_SEQ_SZ]; + byte kdfAlgoId[MAX_OID_SZ]; + byte kdfParamsSeq[MAX_SEQ_SZ]; /* PBKDF2-params */ + byte kdfSaltOctetStr[MAX_OCTET_STR_SZ]; /* salt OCTET STRING */ + byte kdfIterations[MAX_VERSION_SZ]; + word32 kdfAlgoIdSeqSz, kdfAlgoIdSz; + word32 kdfParamsSeqSz, kdfSaltOctetStrSz, kdfIterationsSz; + /* OPTIONAL: keyLength, not supported yet */ + /* OPTIONAL: prf AlgorithIdentifier, not supported yet */ + + /* KeyEncryptionAlgorithmIdentifier */ + byte keyEncAlgoIdSeq[MAX_SEQ_SZ]; + byte keyEncAlgoId[MAX_OID_SZ]; /* id-alg-PWRI-KEK */ + byte pwriEncAlgoId[MAX_ALGO_SZ]; + byte ivOctetString[MAX_OCTET_STR_SZ]; + word32 keyEncAlgoIdSeqSz, keyEncAlgoIdSz; + word32 pwriEncAlgoIdSz, ivOctetStringSz; + + /* EncryptedKey */ + byte encKeyOctetStr[MAX_OCTET_STR_SZ]; + word32 encKeyOctetStrSz; + + byte tmpIv[MAX_CONTENT_IV_SIZE]; + byte* encryptedKey = NULL; + byte* kek = NULL; + + int cekKeySz = 0, kekKeySz = 0, kekBlockSz = 0, ret = 0; + int encryptOID; + word32 idx, totalSz = 0, encryptedKeySz; + + if (pkcs7 == NULL || passwd == NULL || pLen == 0 || + salt == NULL || saltSz == 0) { + return BAD_FUNC_ARG; + } + + /* allow user to use different KEK encryption algorithm than used for + * main content encryption algorithm, if passed in */ + if (kekEncryptOID != 0) { + encryptOID = kekEncryptOID; + } else { + encryptOID = pkcs7->encryptOID; + } + + /* get content-encryption key size, based on algorithm */ + cekKeySz = wc_PKCS7_GetOIDKeySize(pkcs7->encryptOID); + if (cekKeySz < 0) + return cekKeySz; + + /* get KEK encryption key size, based on algorithm */ + if (encryptOID != pkcs7->encryptOID) { + kekKeySz = wc_PKCS7_GetOIDKeySize(encryptOID); + } else { + kekKeySz = cekKeySz; + } + + /* get KEK encryption block size */ + kekBlockSz = wc_PKCS7_GetOIDBlockSize(encryptOID); + if (kekBlockSz < 0) + return kekBlockSz; + + /* generate random CEK */ + ret = PKCS7_GenerateContentEncryptionKey(pkcs7, cekKeySz); + if (ret < 0) + return ret; + + /* generate random IV */ + ret = wc_PKCS7_GenerateBlock(pkcs7, NULL, tmpIv, kekBlockSz); + if (ret != 0) + return ret; + + /* allocate memory for RecipientInfo, KEK, encrypted key */ + recip = (Pkcs7EncodedRecip*)XMALLOC(sizeof(Pkcs7EncodedRecip), + pkcs7->heap, DYNAMIC_TYPE_PKCS7); + if (recip == NULL) + return MEMORY_E; + + kek = (byte*)XMALLOC(kekKeySz, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + if (kek == NULL) { + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return MEMORY_E; + } + + encryptedKey = (byte*)XMALLOC(MAX_ENCRYPTED_KEY_SZ, + pkcs7->heap, DYNAMIC_TYPE_PKCS7); + if (encryptedKey == NULL) { + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(kek, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return MEMORY_E; + } + + encryptedKeySz = MAX_ENCRYPTED_KEY_SZ; + XMEMSET(recip, 0, sizeof(Pkcs7EncodedRecip)); + XMEMSET(kek, 0, kekKeySz); + XMEMSET(encryptedKey, 0, encryptedKeySz); + + /* generate KEK: expand password into KEK */ + ret = wc_PKCS7_GenerateKEK_PWRI(pkcs7, passwd, pLen, salt, saltSz, + kdfOID, hashOID, iterations, kek, + kekKeySz); + if (ret < 0) { + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(kek, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + + /* generate encrypted key: encrypt CEK with KEK */ + ret = wc_PKCS7_PwriKek_KeyWrap(pkcs7, kek, kekKeySz, pkcs7->cek, + pkcs7->cekSz, encryptedKey, &encryptedKeySz, + tmpIv, kekBlockSz, encryptOID); + if (ret < 0) { + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(kek, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + encryptedKeySz = ret; + + /* put together encrypted key OCTET STRING */ + encKeyOctetStrSz = SetOctetString(encryptedKeySz, encKeyOctetStr); + totalSz += (encKeyOctetStrSz + encryptedKeySz); + + /* put together IV OCTET STRING */ + ivOctetStringSz = SetOctetString(kekBlockSz, ivOctetString); + totalSz += (ivOctetStringSz + kekBlockSz); + + /* set PWRIAlgorithms AlgorithmIdentifier, adding (ivOctetStringSz + + blockKeySz) for IV OCTET STRING */ + pwriEncAlgoIdSz = SetAlgoID(encryptOID, pwriEncAlgoId, + oidBlkType, ivOctetStringSz + kekBlockSz); + totalSz += pwriEncAlgoIdSz; + + /* set KeyEncryptionAlgorithms OID */ + ret = wc_SetContentType(PWRI_KEK_WRAP, keyEncAlgoId, sizeof(keyEncAlgoId)); + if (ret <= 0) { + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(kek, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + keyEncAlgoIdSz = ret; + totalSz += keyEncAlgoIdSz; + + /* KeyEncryptionAlgorithm SEQ */ + keyEncAlgoIdSeqSz = SetSequence(keyEncAlgoIdSz + pwriEncAlgoIdSz + + ivOctetStringSz + kekBlockSz, + keyEncAlgoIdSeq); + totalSz += keyEncAlgoIdSeqSz; + + /* set KDF salt */ + kdfSaltOctetStrSz = SetOctetString(saltSz, kdfSaltOctetStr); + totalSz += (kdfSaltOctetStrSz + saltSz); + + /* set KDF iteration count */ + kdfIterationsSz = SetMyVersion(iterations, kdfIterations, 0); + totalSz += kdfIterationsSz; + + /* set KDF params SEQ */ + kdfParamsSeqSz = SetSequence(kdfSaltOctetStrSz + saltSz + kdfIterationsSz, + kdfParamsSeq); + totalSz += kdfParamsSeqSz; + + /* set KDF algo OID */ + ret = wc_SetContentType(kdfOID, kdfAlgoId, sizeof(kdfAlgoId)); + if (ret <= 0) { + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(kek, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + kdfAlgoIdSz = ret; + totalSz += kdfAlgoIdSz; + + /* set KeyDerivationAlgorithmIdentifier EXPLICIT [0] SEQ */ + kdfAlgoIdSeqSz = SetExplicit(0, kdfAlgoIdSz + kdfParamsSeqSz + + kdfSaltOctetStrSz + saltSz + kdfIterationsSz, + kdfAlgoIdSeq); + totalSz += kdfAlgoIdSeqSz; + + /* set PasswordRecipientInfo CMSVersion, MUST be 0 */ + verSz = SetMyVersion(0, ver, 0); + totalSz += verSz; + recip->recipVersion = 0; + + /* set PasswordRecipientInfo SEQ */ + recipSeqSz = SetImplicit(ASN_SEQUENCE, 3, totalSz, recipSeq); + totalSz += recipSeqSz; + + if (totalSz > MAX_RECIP_SZ) { + WOLFSSL_MSG("CMS Recipient output buffer too small"); + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(kek, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return BUFFER_E; + } + + idx = 0; + XMEMCPY(recip->recip + idx, recipSeq, recipSeqSz); + idx += recipSeqSz; + XMEMCPY(recip->recip + idx, ver, verSz); + idx += verSz; + XMEMCPY(recip->recip + idx, kdfAlgoIdSeq, kdfAlgoIdSeqSz); + idx += kdfAlgoIdSeqSz; + XMEMCPY(recip->recip + idx, kdfAlgoId, kdfAlgoIdSz); + idx += kdfAlgoIdSz; + XMEMCPY(recip->recip + idx, kdfParamsSeq, kdfParamsSeqSz); + idx += kdfParamsSeqSz; + XMEMCPY(recip->recip + idx, kdfSaltOctetStr, kdfSaltOctetStrSz); + idx += kdfSaltOctetStrSz; + XMEMCPY(recip->recip + idx, salt, saltSz); + idx += saltSz; + XMEMCPY(recip->recip + idx, kdfIterations, kdfIterationsSz); + idx += kdfIterationsSz; + XMEMCPY(recip->recip + idx, keyEncAlgoIdSeq, keyEncAlgoIdSeqSz); + idx += keyEncAlgoIdSeqSz; + XMEMCPY(recip->recip + idx, keyEncAlgoId, keyEncAlgoIdSz); + idx += keyEncAlgoIdSz; + XMEMCPY(recip->recip + idx, pwriEncAlgoId, pwriEncAlgoIdSz); + idx += pwriEncAlgoIdSz; + XMEMCPY(recip->recip + idx, ivOctetString, ivOctetStringSz); + idx += ivOctetStringSz; + XMEMCPY(recip->recip + idx, tmpIv, kekBlockSz); + idx += kekBlockSz; + XMEMCPY(recip->recip + idx, encKeyOctetStr, encKeyOctetStrSz); + idx += encKeyOctetStrSz; + XMEMCPY(recip->recip + idx, encryptedKey, encryptedKeySz); + idx += encryptedKeySz; + + ForceZero(kek, kekBlockSz); + ForceZero(encryptedKey, encryptedKeySz); + XFREE(kek, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + + /* store recipient size */ + recip->recipSz = idx; + recip->recipType = PKCS7_PWRI; + + /* add recipient to recip list */ + if (pkcs7->recipList == NULL) { + pkcs7->recipList = recip; + } else { + lastRecip = pkcs7->recipList; + while (lastRecip->next != NULL) { + lastRecip = lastRecip->next; + } + lastRecip->next = recip; + } + + (void)options; + + return idx; +} + +/* Import password and KDF settings into a PKCS7 structure. Used for setting + * the password info for decryption a EnvelopedData PWRI RecipientInfo. + * + * Returns 0 on success, negative upon error */ +int wc_PKCS7_SetPassword(PKCS7* pkcs7, byte* passwd, word32 pLen) +{ + if (pkcs7 == NULL || passwd == NULL || pLen == 0) + return BAD_FUNC_ARG; + + pkcs7->pass = passwd; + pkcs7->passSz = pLen; + + return 0; +} + +#endif /* NO_PWDBASED */ + + +/* Encode and add CMS EnvelopedData KEKRI (KEKRecipientInfo) RecipientInfo + * to CMS/PKCS#7 EnvelopedData structure. + * + * pkcs7 - pointer to initialized PKCS7 structure + * keyWrapOID - OID sum of key wrap algorithm identifier + * kek - key encryption key + * kekSz - size of kek, bytes + * keyID - key-encryption key identifier, pre-distributed to endpoints + * keyIDSz - size of keyID, bytes + * timePtr - pointer to "time_t", which is typically "long" (OPTIONAL) + * otherOID - ASN.1 encoded OID of other attribute (OPTIONAL) + * otherOIDSz - size of otherOID, bytes (OPTIONAL) + * other - other attribute (OPTIONAL) + * otherSz - size of other (OPTIONAL) + * + * Returns 0 on success, negative upon error */ +int wc_PKCS7_AddRecipient_KEKRI(PKCS7* pkcs7, int keyWrapOID, byte* kek, + word32 kekSz, byte* keyId, word32 keyIdSz, + void* timePtr, byte* otherOID, + word32 otherOIDSz, byte* other, word32 otherSz, + int options) +{ + Pkcs7EncodedRecip* recip = NULL; + Pkcs7EncodedRecip* lastRecip = NULL; + + byte recipSeq[MAX_SEQ_SZ]; + byte ver[MAX_VERSION_SZ]; + byte kekIdSeq[MAX_SEQ_SZ]; + byte kekIdOctetStr[MAX_OCTET_STR_SZ]; + byte genTime[ASN_GENERALIZED_TIME_SIZE]; + byte otherAttSeq[MAX_SEQ_SZ]; + byte encAlgoId[MAX_ALGO_SZ]; + byte encKeyOctetStr[MAX_OCTET_STR_SZ]; +#ifdef WOLFSSL_SMALL_STACK + byte* encryptedKey; +#else + byte encryptedKey[MAX_ENCRYPTED_KEY_SZ]; +#endif + + int blockKeySz = 0, ret = 0, direction; + word32 idx = 0; + word32 totalSz = 0; + word32 recipSeqSz = 0, verSz = 0; + word32 kekIdSeqSz = 0, kekIdOctetStrSz = 0; + word32 otherAttSeqSz = 0, encAlgoIdSz = 0, encKeyOctetStrSz = 0; + int encryptedKeySz; + + int timeSz = 0; +#ifndef NO_ASN_TIME + time_t* tm = NULL; +#endif + + if (pkcs7 == NULL || kek == NULL || keyId == NULL) + return BAD_FUNC_ARG; + + recip = (Pkcs7EncodedRecip*)XMALLOC(sizeof(Pkcs7EncodedRecip), pkcs7->heap, + DYNAMIC_TYPE_PKCS7); + if (recip == NULL) + return MEMORY_E; + + XMEMSET(recip, 0, sizeof(Pkcs7EncodedRecip)); + + /* get key size for content-encryption key based on algorithm */ + blockKeySz = wc_PKCS7_GetOIDKeySize(pkcs7->encryptOID); + if (blockKeySz < 0) { + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return blockKeySz; + } + + /* generate random content encryption key, if needed */ + ret = PKCS7_GenerateContentEncryptionKey(pkcs7, blockKeySz); + if (ret < 0) { + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + + /* EncryptedKey */ +#ifdef WOLFSSL_SMALL_STACK + encryptedKey = (byte*)XMALLOC(MAX_ENCRYPTED_KEY_SZ, pkcs7->heap, + DYNAMIC_TYPE_PKCS7); + if (encryptedKey == NULL) { + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return MEMORY_E; + } +#endif + encryptedKeySz = MAX_ENCRYPTED_KEY_SZ; + XMEMSET(encryptedKey, 0, encryptedKeySz); + + #ifndef NO_AES + direction = AES_ENCRYPTION; + #else + direction = DES_ENCRYPTION; + #endif + + encryptedKeySz = wc_PKCS7_KeyWrap(pkcs7->cek, pkcs7->cekSz, kek, kekSz, + encryptedKey, encryptedKeySz, keyWrapOID, + direction); + if (encryptedKeySz < 0) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + #endif + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return encryptedKeySz; + } + /* handle a zero size encKey case as WC_KEY_SIZE_E */ + if (encryptedKeySz == 0 || encryptedKeySz > MAX_ENCRYPTED_KEY_SZ) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + #endif + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return WC_KEY_SIZE_E; + } + + encKeyOctetStrSz = SetOctetString(encryptedKeySz, encKeyOctetStr); + totalSz += (encKeyOctetStrSz + encryptedKeySz); + + /* KeyEncryptionAlgorithmIdentifier */ + encAlgoIdSz = SetAlgoID(keyWrapOID, encAlgoId, oidKeyWrapType, 0); + totalSz += encAlgoIdSz; + + /* KEKIdentifier: keyIdentifier */ + kekIdOctetStrSz = SetOctetString(keyIdSz, kekIdOctetStr); + totalSz += (kekIdOctetStrSz + keyIdSz); + + /* KEKIdentifier: GeneralizedTime (OPTIONAL) */ +#ifndef NO_ASN_TIME + if (timePtr != NULL) { + tm = (time_t*)timePtr; + timeSz = GetAsnTimeString(tm, genTime, sizeof(genTime)); + if (timeSz < 0) { + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + #ifdef WOLFSSL_SMALL_STACK + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + #endif + return timeSz; + } + totalSz += timeSz; + } +#endif + + /* KEKIdentifier: OtherKeyAttribute SEQ (OPTIONAL) */ + if (other != NULL && otherSz > 0) { + otherAttSeqSz = SetSequence(otherOIDSz + otherSz, otherAttSeq); + totalSz += otherAttSeqSz + otherOIDSz + otherSz; + } + + /* KEKIdentifier SEQ */ + kekIdSeqSz = SetSequence(kekIdOctetStrSz + keyIdSz + timeSz + + otherAttSeqSz + otherOIDSz + otherSz, kekIdSeq); + totalSz += kekIdSeqSz; + + /* version */ + verSz = SetMyVersion(4, ver, 0); + totalSz += verSz; + recip->recipVersion = 4; + + /* KEKRecipientInfo SEQ */ + recipSeqSz = SetImplicit(ASN_SEQUENCE, 2, totalSz, recipSeq); + totalSz += recipSeqSz; + + if (totalSz > MAX_RECIP_SZ) { + WOLFSSL_MSG("CMS Recipient output buffer too small"); + XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + #ifdef WOLFSSL_SMALL_STACK + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + #endif + return BUFFER_E; + } + + XMEMCPY(recip->recip + idx, recipSeq, recipSeqSz); + idx += recipSeqSz; + XMEMCPY(recip->recip + idx, ver, verSz); + idx += verSz; + XMEMCPY(recip->recip + idx, kekIdSeq, kekIdSeqSz); + idx += kekIdSeqSz; + XMEMCPY(recip->recip + idx, kekIdOctetStr, kekIdOctetStrSz); + idx += kekIdOctetStrSz; + XMEMCPY(recip->recip + idx, keyId, keyIdSz); + idx += keyIdSz; + if (timePtr != NULL) { + XMEMCPY(recip->recip + idx, genTime, timeSz); + idx += timeSz; + } + if (other != NULL && otherSz > 0) { + XMEMCPY(recip->recip + idx, otherAttSeq, otherAttSeqSz); + idx += otherAttSeqSz; + XMEMCPY(recip->recip + idx, otherOID, otherOIDSz); + idx += otherOIDSz; + XMEMCPY(recip->recip + idx, other, otherSz); + idx += otherSz; + } + XMEMCPY(recip->recip + idx, encAlgoId, encAlgoIdSz); + idx += encAlgoIdSz; + XMEMCPY(recip->recip + idx, encKeyOctetStr, encKeyOctetStrSz); + idx += encKeyOctetStrSz; + XMEMCPY(recip->recip + idx, encryptedKey, encryptedKeySz); + idx += encryptedKeySz; + +#ifdef WOLFSSL_SMALL_STACK + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7); +#endif + + /* store recipient size */ + recip->recipSz = idx; + recip->recipType = PKCS7_KEKRI; + + /* add recipient to recip list */ + if (pkcs7->recipList == NULL) { + pkcs7->recipList = recip; + } else { + lastRecip = pkcs7->recipList; + while(lastRecip->next != NULL) { + lastRecip = lastRecip->next; + } + lastRecip->next = recip; + } + + (void)options; + + return idx; +} + + +static int wc_PKCS7_GetCMSVersion(PKCS7* pkcs7, int cmsContentType) +{ + int version = -1; + + if (pkcs7 == NULL) + return BAD_FUNC_ARG; + + switch (cmsContentType) { + case ENVELOPED_DATA: + + /* NOTE: EnvelopedData does not currently support + originatorInfo or unprotectedAttributes. When either of these + are added, version checking below needs to be updated to match + Section 6.1 of RFC 5652 */ + + /* if RecipientInfos include pwri or ori, version is 3 */ + if (wc_PKCS7_RecipientListIncludesType(pkcs7, PKCS7_PWRI) || + wc_PKCS7_RecipientListIncludesType(pkcs7, PKCS7_ORI)) { + version = 3; + break; + } + + /* if unprotectedAttrs is absent AND all RecipientInfo structs + are version 0, version is 0 */ + if (wc_PKCS7_RecipientListVersionsAllZero(pkcs7)) { + version = 0; + break; + } + + /* otherwise, version is 2 */ + version = 2; + break; + + default: + break; + } + + return version; +} + + +/* build PKCS#7 envelopedData content type, return enveloped size */ +int wc_PKCS7_EncodeEnvelopedData(PKCS7* pkcs7, byte* output, word32 outputSz) +{ + int ret, idx = 0; + int totalSz, padSz, encryptedOutSz; + + int contentInfoSeqSz = 0, outerContentTypeSz = 0, outerContentSz; + byte contentInfoSeq[MAX_SEQ_SZ]; + byte outerContentType[MAX_ALGO_SZ]; + byte outerContent[MAX_SEQ_SZ]; + + int kariVersion; + int envDataSeqSz, verSz; + byte envDataSeq[MAX_SEQ_SZ]; + byte ver[MAX_VERSION_SZ]; + + WC_RNG rng; + int blockSz, blockKeySz; + byte* plain; + byte* encryptedContent; + + Pkcs7EncodedRecip* tmpRecip = NULL; + int recipSz, recipSetSz; + byte recipSet[MAX_SET_SZ]; + + int encContentOctetSz, encContentSeqSz, contentTypeSz; + int contentEncAlgoSz, ivOctetStringSz; + byte encContentSeq[MAX_SEQ_SZ]; + byte contentType[MAX_ALGO_SZ]; + byte contentEncAlgo[MAX_ALGO_SZ]; + byte tmpIv[MAX_CONTENT_IV_SIZE]; + byte ivOctetString[MAX_OCTET_STR_SZ]; + byte encContentOctet[MAX_OCTET_STR_SZ]; + + if (pkcs7 == NULL || pkcs7->content == NULL || pkcs7->contentSz == 0) + return BAD_FUNC_ARG; + + if (output == NULL || outputSz == 0) + return BAD_FUNC_ARG; + + blockKeySz = wc_PKCS7_GetOIDKeySize(pkcs7->encryptOID); + if (blockKeySz < 0) + return blockKeySz; + + blockSz = wc_PKCS7_GetOIDBlockSize(pkcs7->encryptOID); + if (blockSz < 0) + return blockSz; + + if (pkcs7->contentOID != FIRMWARE_PKG_DATA) { + /* outer content type */ + ret = wc_SetContentType(ENVELOPED_DATA, outerContentType, + sizeof(outerContentType)); + if (ret < 0) + return ret; + + outerContentTypeSz = ret; + } + + /* generate random content encryption key */ + ret = PKCS7_GenerateContentEncryptionKey(pkcs7, blockKeySz); + if (ret != 0) { + return ret; + } + + /* build RecipientInfo, only if user manually set singleCert and size */ + if (pkcs7->singleCert != NULL && pkcs7->singleCertSz > 0) { + switch (pkcs7->publicKeyOID) { + #ifndef NO_RSA + case RSAk: + ret = wc_PKCS7_AddRecipient_KTRI(pkcs7, pkcs7->singleCert, + pkcs7->singleCertSz, 0); + break; + #endif + #ifdef HAVE_ECC + case ECDSAk: + ret = wc_PKCS7_AddRecipient_KARI(pkcs7, pkcs7->singleCert, + pkcs7->singleCertSz, + pkcs7->keyWrapOID, + pkcs7->keyAgreeOID, pkcs7->ukm, + pkcs7->ukmSz, 0); + break; + #endif + + default: + WOLFSSL_MSG("Unsupported RecipientInfo public key type"); + return BAD_FUNC_ARG; + }; + + if (ret < 0) { + WOLFSSL_MSG("Failed to create RecipientInfo"); + return ret; + } + } + + recipSz = wc_PKCS7_GetRecipientListSize(pkcs7); + if (recipSz < 0) { + return ret; + + } else if (recipSz == 0) { + WOLFSSL_MSG("You must add at least one CMS recipient"); + return PKCS7_RECIP_E; + } + recipSetSz = SetSet(recipSz, recipSet); + + /* version, defined in Section 6.1 of RFC 5652 */ + kariVersion = wc_PKCS7_GetCMSVersion(pkcs7, ENVELOPED_DATA); + if (kariVersion < 0) { + WOLFSSL_MSG("Failed to set CMS EnvelopedData version"); + return PKCS7_RECIP_E; + } + + verSz = SetMyVersion(kariVersion, ver, 0); + + ret = wc_InitRng_ex(&rng, pkcs7->heap, pkcs7->devId); + if (ret != 0) + return ret; + + /* generate IV for block cipher */ + ret = wc_PKCS7_GenerateBlock(pkcs7, &rng, tmpIv, blockSz); + wc_FreeRng(&rng); + if (ret != 0) + return ret; + + /* EncryptedContentInfo */ + ret = wc_SetContentType(pkcs7->contentOID, contentType, + sizeof(contentType)); + if (ret < 0) + return ret; + + contentTypeSz = ret; + + /* allocate encrypted content buffer and PKCS#7 padding */ + padSz = wc_PKCS7_GetPadSize(pkcs7->contentSz, blockSz); + if (padSz < 0) + return padSz; + + encryptedOutSz = pkcs7->contentSz + padSz; + + plain = (byte*)XMALLOC(encryptedOutSz, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + if (plain == NULL) + return MEMORY_E; + + ret = wc_PKCS7_PadData(pkcs7->content, pkcs7->contentSz, plain, + encryptedOutSz, blockSz); + if (ret < 0) { + XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + + encryptedContent = (byte*)XMALLOC(encryptedOutSz, pkcs7->heap, + DYNAMIC_TYPE_PKCS7); + if (encryptedContent == NULL) { + XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return MEMORY_E; + } + + /* put together IV OCTET STRING */ + ivOctetStringSz = SetOctetString(blockSz, ivOctetString); + + /* build up our ContentEncryptionAlgorithmIdentifier sequence, + * adding (ivOctetStringSz + blockSz) for IV OCTET STRING */ + contentEncAlgoSz = SetAlgoID(pkcs7->encryptOID, contentEncAlgo, + oidBlkType, ivOctetStringSz + blockSz); + + if (contentEncAlgoSz == 0) { + XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return BAD_FUNC_ARG; + } + + /* encrypt content */ + ret = wc_PKCS7_EncryptContent(pkcs7->encryptOID, pkcs7->cek, + pkcs7->cekSz, tmpIv, blockSz, NULL, 0, NULL, 0, plain, + encryptedOutSz, encryptedContent); + + if (ret != 0) { + XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + + encContentOctetSz = SetImplicit(ASN_OCTET_STRING, 0, encryptedOutSz, + encContentOctet); + + encContentSeqSz = SetSequence(contentTypeSz + contentEncAlgoSz + + ivOctetStringSz + blockSz + + encContentOctetSz + encryptedOutSz, + encContentSeq); + + /* keep track of sizes for outer wrapper layering */ + totalSz = verSz + recipSetSz + recipSz + encContentSeqSz + contentTypeSz + + contentEncAlgoSz + ivOctetStringSz + blockSz + + encContentOctetSz + encryptedOutSz; + + /* EnvelopedData */ + envDataSeqSz = SetSequence(totalSz, envDataSeq); + totalSz += envDataSeqSz; + + /* outer content */ + outerContentSz = SetExplicit(0, totalSz, outerContent); + totalSz += outerContentTypeSz; + totalSz += outerContentSz; + + if (pkcs7->contentOID != FIRMWARE_PKG_DATA) { + /* ContentInfo */ + contentInfoSeqSz = SetSequence(totalSz, contentInfoSeq); + totalSz += contentInfoSeqSz; + } + + if (totalSz > (int)outputSz) { + WOLFSSL_MSG("Pkcs7_encrypt output buffer too small"); + XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + return BUFFER_E; + } + + if (pkcs7->contentOID != FIRMWARE_PKG_DATA) { + XMEMCPY(output + idx, contentInfoSeq, contentInfoSeqSz); + idx += contentInfoSeqSz; + XMEMCPY(output + idx, outerContentType, outerContentTypeSz); + idx += outerContentTypeSz; + XMEMCPY(output + idx, outerContent, outerContentSz); + idx += outerContentSz; + } + XMEMCPY(output + idx, envDataSeq, envDataSeqSz); + idx += envDataSeqSz; + XMEMCPY(output + idx, ver, verSz); + idx += verSz; + XMEMCPY(output + idx, recipSet, recipSetSz); + idx += recipSetSz; + /* copy in recipients from list */ + tmpRecip = pkcs7->recipList; + while (tmpRecip != NULL) { + XMEMCPY(output + idx, tmpRecip->recip, tmpRecip->recipSz); + idx += tmpRecip->recipSz; + tmpRecip = tmpRecip->next; + } + wc_PKCS7_FreeEncodedRecipientSet(pkcs7); + XMEMCPY(output + idx, encContentSeq, encContentSeqSz); + idx += encContentSeqSz; + XMEMCPY(output + idx, contentType, contentTypeSz); + idx += contentTypeSz; + XMEMCPY(output + idx, contentEncAlgo, contentEncAlgoSz); + idx += contentEncAlgoSz; + XMEMCPY(output + idx, ivOctetString, ivOctetStringSz); + idx += ivOctetStringSz; + XMEMCPY(output + idx, tmpIv, blockSz); + idx += blockSz; + XMEMCPY(output + idx, encContentOctet, encContentOctetSz); + idx += encContentOctetSz; + XMEMCPY(output + idx, encryptedContent, encryptedOutSz); + idx += encryptedOutSz; + + XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + + return idx; +} + +#ifndef NO_RSA +/* decode KeyTransRecipientInfo (ktri), return 0 on success, <0 on error */ +static int wc_PKCS7_DecryptKtri(PKCS7* pkcs7, byte* in, word32 inSz, + word32* idx, byte* decryptedKey, + word32* decryptedKeySz, int* recipFound) +{ + int length, encryptedKeySz = 0, ret = 0; + int keySz, version, sidType = 0; + word32 encOID; + word32 keyIdx; + byte issuerHash[KEYID_SIZE]; + byte* outKey = NULL; + byte* pkiMsg = in; + word32 pkiMsgSz = inSz; + byte tag; + + +#ifndef NO_PKCS7_STREAM + word32 tmpIdx = *idx; + long rc; +#endif +#ifdef WC_RSA_BLINDING + WC_RNG rng; +#endif + +#ifdef WOLFSSL_SMALL_STACK + mp_int* serialNum = NULL; + byte* encryptedKey = NULL; + RsaKey* privKey = NULL; +#else + mp_int serialNum[1]; + byte encryptedKey[MAX_ENCRYPTED_KEY_SZ]; + RsaKey privKey[1]; +#endif + + switch (pkcs7->state) { + case WC_PKCS7_DECRYPT_KTRI: + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_VERSION_SZ, + &pkiMsg, idx)) != 0) { + return ret; + } + + rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, + in, inSz); + if (rc < 0) { + ret = (int)rc; + break; + } + pkiMsgSz = (word32)rc; + + #endif + if (GetMyVersion(pkiMsg, idx, &version, pkiMsgSz) < 0) + return ASN_PARSE_E; + + if (version == 0) { + sidType = CMS_ISSUER_AND_SERIAL_NUMBER; + } else if (version == 2) { + sidType = CMS_SKID; + } else { + return ASN_VERSION_E; + } + + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) { + break; + } + wc_PKCS7_StreamStoreVar(pkcs7, 0, sidType, version); + + /* @TODO getting total amount left because of GetInt call later on + * this could be optimized to stream better */ + pkcs7->stream->expected = (pkcs7->stream->maxLen - + pkcs7->stream->totalRd) + pkcs7->stream->length; + #endif + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_DECRYPT_KTRI_2); + FALL_THROUGH; + + case WC_PKCS7_DECRYPT_KTRI_2: + #ifndef NO_PKCS7_STREAM + + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, pkcs7->stream->expected, + &pkiMsg, idx)) != 0) { + return ret; + } + + rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, + in, inSz); + if (rc < 0) { + ret = (int)rc; + break; + } + pkiMsgSz = (word32)rc; + + wc_PKCS7_StreamGetVar(pkcs7, NULL, &sidType, &version); + + /* @TODO get expected size for next part, does not account for + * GetInt call well */ + if (pkcs7->stream->expected == MAX_SEQ_SZ) { + int sz; + word32 lidx; + + if (sidType == CMS_ISSUER_AND_SERIAL_NUMBER) { + lidx = *idx; + ret = GetSequence(pkiMsg, &lidx, &sz, pkiMsgSz); + if (ret < 0) + return ret; + } + else { + lidx = *idx + ASN_TAG_SZ; + ret = GetLength(pkiMsg, &lidx, &sz, pkiMsgSz); + if (ret < 0) + return ret; + } + + pkcs7->stream->expected = sz + MAX_ALGO_SZ + ASN_TAG_SZ + + MAX_LENGTH_SZ; + if (pkcs7->stream->length > 0 && + pkcs7->stream->length < pkcs7->stream->expected) { + return WC_PKCS7_WANT_READ_E; + } + } + #endif /* !NO_PKCS7_STREAM */ + + if (sidType == CMS_ISSUER_AND_SERIAL_NUMBER) { + + /* remove IssuerAndSerialNumber */ + if (GetSequence(pkiMsg, idx, &length, pkiMsgSz) < 0) + return ASN_PARSE_E; + + if (GetNameHash(pkiMsg, idx, issuerHash, pkiMsgSz) < 0) + return ASN_PARSE_E; + + /* if we found correct recipient, issuer hashes will match */ + if (XMEMCMP(issuerHash, pkcs7->issuerHash, KEYID_SIZE) == 0) { + *recipFound = 1; + } + + #ifdef WOLFSSL_SMALL_STACK + serialNum = (mp_int*)XMALLOC(sizeof(mp_int), pkcs7->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (serialNum == NULL) + return MEMORY_E; + #endif + + if (GetInt(serialNum, pkiMsg, idx, pkiMsgSz) < 0) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(serialNum, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return ASN_PARSE_E; + } + + mp_clear(serialNum); + + #ifdef WOLFSSL_SMALL_STACK + XFREE(serialNum, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + + } else { + /* remove SubjectKeyIdentifier */ + if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0) + return ASN_PARSE_E; + + if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC)) + return ASN_PARSE_E; + + if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0) + return ASN_PARSE_E; + + if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0) + return ASN_PARSE_E; + + if (tag != ASN_OCTET_STRING) + return ASN_PARSE_E; + + if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0) + return ASN_PARSE_E; + + /* if we found correct recipient, SKID will match */ + if (XMEMCMP(pkiMsg + (*idx), pkcs7->issuerSubjKeyId, + KEYID_SIZE) == 0) { + *recipFound = 1; + } + (*idx) += KEYID_SIZE; + } + + if (GetAlgoId(pkiMsg, idx, &encOID, oidKeyType, pkiMsgSz) < 0) + return ASN_PARSE_E; + + /* key encryption algorithm must be RSA for now */ + if (encOID != RSAk) + return ALGO_ID_E; + + /* read encryptedKey */ + if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0) + return ASN_PARSE_E; + + if (tag != ASN_OCTET_STRING) + return ASN_PARSE_E; + + if (GetLength(pkiMsg, idx, &encryptedKeySz, pkiMsgSz) < 0) { + return ASN_PARSE_E; + } + if (encryptedKeySz > MAX_ENCRYPTED_KEY_SZ) { + return BUFFER_E; + } + + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) { + break; + } + wc_PKCS7_StreamStoreVar(pkcs7, encryptedKeySz, sidType, version); + pkcs7->stream->expected = encryptedKeySz; + #endif + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_DECRYPT_KTRI_3); + FALL_THROUGH; + + case WC_PKCS7_DECRYPT_KTRI_3: + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, + pkcs7->stream->expected, &pkiMsg, idx)) != 0) { + return ret; + } + encryptedKeySz = pkcs7->stream->expected; + #endif + + #ifdef WOLFSSL_SMALL_STACK + encryptedKey = (byte*)XMALLOC(encryptedKeySz, pkcs7->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (encryptedKey == NULL) + return MEMORY_E; + #endif + + if (*recipFound == 1) + XMEMCPY(encryptedKey, &pkiMsg[*idx], encryptedKeySz); + *idx += encryptedKeySz; + + /* load private key */ + #ifdef WOLFSSL_SMALL_STACK + privKey = (RsaKey*)XMALLOC(sizeof(RsaKey), pkcs7->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (privKey == NULL) { + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + return MEMORY_E; + } + #endif + + ret = wc_InitRsaKey_ex(privKey, pkcs7->heap, INVALID_DEVID); + if (ret != 0) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(privKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return ret; + } + + if (pkcs7->privateKey != NULL && pkcs7->privateKeySz > 0) { + keyIdx = 0; + ret = wc_RsaPrivateKeyDecode(pkcs7->privateKey, &keyIdx, + privKey, pkcs7->privateKeySz); + } + else if (pkcs7->devId == INVALID_DEVID) { + ret = BAD_FUNC_ARG; + } + if (ret != 0) { + WOLFSSL_MSG("Failed to decode RSA private key"); + wc_FreeRsaKey(privKey); + #ifdef WOLFSSL_SMALL_STACK + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(privKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return ret; + } + + /* decrypt encryptedKey */ + #ifdef WC_RSA_BLINDING + ret = wc_InitRng_ex(&rng, pkcs7->heap, pkcs7->devId); + if (ret == 0) { + ret = wc_RsaSetRNG(privKey, &rng); + } + #endif + if (ret == 0) { + keySz = wc_RsaPrivateDecryptInline(encryptedKey, encryptedKeySz, + &outKey, privKey); + #ifdef WC_RSA_BLINDING + wc_FreeRng(&rng); + #endif + } else { + keySz = ret; + } + wc_FreeRsaKey(privKey); + + if (keySz <= 0 || outKey == NULL) { + ForceZero(encryptedKey, encryptedKeySz); + #ifdef WOLFSSL_SMALL_STACK + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(privKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return keySz; + } else { + *decryptedKeySz = keySz; + XMEMCPY(decryptedKey, outKey, keySz); + ForceZero(encryptedKey, encryptedKeySz); + } + + #ifdef WOLFSSL_SMALL_STACK + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(privKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) { + break; + } + #endif + ret = 0; /* success */ + break; + + default: + WOLFSSL_MSG("PKCS7 Unknown KTRI decrypt state"); + ret = BAD_FUNC_ARG; + } + + return ret; +} +#endif /* !NO_RSA */ + +#ifdef HAVE_ECC + +/* remove ASN.1 OriginatorIdentifierOrKey, return 0 on success, <0 on error */ +static int wc_PKCS7_KariGetOriginatorIdentifierOrKey(WC_PKCS7_KARI* kari, + byte* pkiMsg, word32 pkiMsgSz, word32* idx) +{ + int ret, length; + word32 keyOID, oidSum = 0; + int curve_id = ECC_CURVE_DEF; + byte tag; + + if (kari == NULL || pkiMsg == NULL || idx == NULL) + return BAD_FUNC_ARG; + + /* remove OriginatorIdentifierOrKey */ + if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) == 0 && + tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)) { + if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0) + return ASN_PARSE_E; + + } else { + return ASN_PARSE_E; + } + + /* remove OriginatorPublicKey */ + if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) == 0 && + tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1)) { + if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0) + return ASN_PARSE_E; + + } else { + return ASN_PARSE_E; + } + + /* remove AlgorithmIdentifier */ + if (GetAlgoId(pkiMsg, idx, &keyOID, oidKeyType, pkiMsgSz) < 0) + return ASN_PARSE_E; + + if (keyOID != ECDSAk) + return ASN_PARSE_E; + + /* optional algorithm parameters */ + ret = GetObjectId(pkiMsg, idx, &oidSum, oidIgnoreType, pkiMsgSz); + if (ret == 0) { + /* get curve id */ + curve_id = wc_ecc_get_oid(oidSum, NULL, 0); + if (curve_id < 0) + return ECC_CURVE_OID_E; + } + + /* remove ECPoint BIT STRING */ + if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0) + return ASN_PARSE_E; + + if (tag != ASN_BIT_STRING) + return ASN_PARSE_E; + + if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0) + return ASN_PARSE_E; + + if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0) + return ASN_EXPECT_0_E; + + if (tag != ASN_OTHER_TYPE) + return ASN_EXPECT_0_E; + + /* get sender ephemeral public ECDSA key */ + ret = wc_ecc_init_ex(kari->senderKey, kari->heap, kari->devId); + if (ret != 0) + return ret; + + kari->senderKeyInit = 1; + + /* length-1 for unused bits counter */ + ret = wc_ecc_import_x963_ex(pkiMsg + (*idx), length - 1, kari->senderKey, + curve_id); + if (ret != 0) { + ret = wc_EccPublicKeyDecode(pkiMsg, idx, kari->senderKey, *idx + length - 1); + if (ret != 0) + return ret; + } + else { + (*idx) += length - 1; + } + + return 0; +} + + +/* remove optional UserKeyingMaterial if available, return 0 on success, + * < 0 on error */ +static int wc_PKCS7_KariGetUserKeyingMaterial(WC_PKCS7_KARI* kari, + byte* pkiMsg, word32 pkiMsgSz, word32* idx) +{ + int length; + word32 savedIdx; + byte tag; + + if (kari == NULL || pkiMsg == NULL || idx == NULL) + return BAD_FUNC_ARG; + + savedIdx = *idx; + + /* starts with EXPLICIT [1] */ + if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0) { + *idx = savedIdx; + return 0; + } + if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1)) { + *idx = savedIdx; + return 0; + } + + if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0) { + *idx = savedIdx; + return 0; + } + + /* get OCTET STRING */ + if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0) { + *idx = savedIdx; + return 0; + } + if (tag != ASN_OCTET_STRING) { + *idx = savedIdx; + return 0; + } + + if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0) { + *idx = savedIdx; + return 0; + } + + kari->ukm = NULL; + if (length > 0) { + kari->ukm = (byte*)XMALLOC(length, kari->heap, DYNAMIC_TYPE_PKCS7); + if (kari->ukm == NULL) + return MEMORY_E; + + XMEMCPY(kari->ukm, pkiMsg + (*idx), length); + kari->ukmOwner = 1; + } + + (*idx) += length; + kari->ukmSz = length; + + return 0; +} + + +/* remove ASN.1 KeyEncryptionAlgorithmIdentifier, return 0 on success, + * < 0 on error */ +static int wc_PKCS7_KariGetKeyEncryptionAlgorithmId(WC_PKCS7_KARI* kari, + byte* pkiMsg, word32 pkiMsgSz, word32* idx, + word32* keyAgreeOID, word32* keyWrapOID) +{ + int length = 0; + word32 localIdx; + + if (kari == NULL || pkiMsg == NULL || idx == NULL || + keyAgreeOID == NULL || keyWrapOID == NULL) + return BAD_FUNC_ARG; + + localIdx = *idx; + + /* remove KeyEncryptionAlgorithmIdentifier */ + if (GetSequence(pkiMsg, &localIdx, &length, pkiMsgSz) < 0) + return ASN_PARSE_E; + + localIdx = *idx; + if (GetAlgoId(pkiMsg, &localIdx, keyAgreeOID, oidCmsKeyAgreeType, + pkiMsgSz) < 0) { + return ASN_PARSE_E; + } + + if (localIdx < *idx + length) { + *idx = localIdx; + } + /* remove KeyWrapAlgorithm, stored in parameter of KeyEncAlgoId */ + if (GetAlgoId(pkiMsg, idx, keyWrapOID, oidKeyWrapType, pkiMsgSz) < 0) + return ASN_PARSE_E; + + return 0; +} + + +/* remove ASN.1 SubjectKeyIdentifier, return 0 on success, < 0 on error + * if subject key ID matches, recipFound is set to 1 */ +static int wc_PKCS7_KariGetSubjectKeyIdentifier(WC_PKCS7_KARI* kari, + byte* pkiMsg, word32 pkiMsgSz, word32* idx, + int* recipFound, byte* rid) +{ + int length; + byte tag; + + if (kari == NULL || pkiMsg == NULL || idx == NULL || recipFound == NULL || + rid == NULL) + return BAD_FUNC_ARG; + + /* remove RecipientKeyIdentifier IMPLICIT [0] */ + if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0) { + return ASN_PARSE_E; + } + + if (tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)) { + if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0) + return ASN_PARSE_E; + + } else { + return ASN_PARSE_E; + } + + /* remove SubjectKeyIdentifier */ + if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0) { + return ASN_PARSE_E; + } + + if (tag != ASN_OCTET_STRING) + return ASN_PARSE_E; + + if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0) + return ASN_PARSE_E; + + if (length != KEYID_SIZE) + return ASN_PARSE_E; + + XMEMCPY(rid, pkiMsg + (*idx), KEYID_SIZE); + (*idx) += length; + + /* subject key id should match if recipient found */ + if (XMEMCMP(rid, kari->decoded->extSubjKeyId, KEYID_SIZE) == 0) { + *recipFound = 1; + } + + return 0; +} + + +/* remove ASN.1 IssuerAndSerialNumber, return 0 on success, < 0 on error + * if issuer and serial number match, recipFound is set to 1 */ +static int wc_PKCS7_KariGetIssuerAndSerialNumber(WC_PKCS7_KARI* kari, + byte* pkiMsg, word32 pkiMsgSz, word32* idx, + int* recipFound, byte* rid) +{ + int length, ret; +#ifdef WOLFSSL_SMALL_STACK + mp_int* serial; + mp_int* recipSerial; +#else + mp_int serial[1]; + mp_int recipSerial[1]; +#endif + + if (rid == NULL) { + return BAD_FUNC_ARG; + } + + /* remove IssuerAndSerialNumber */ + if (GetSequence(pkiMsg, idx, &length, pkiMsgSz) < 0) + return ASN_PARSE_E; + + if (GetNameHash(pkiMsg, idx, rid, pkiMsgSz) < 0) + return ASN_PARSE_E; + + /* if we found correct recipient, issuer hashes will match */ + if (XMEMCMP(rid, kari->decoded->issuerHash, KEYID_SIZE) == 0) { + *recipFound = 1; + } + +#ifdef WOLFSSL_SMALL_STACK + serial = (mp_int*)XMALLOC(sizeof(mp_int), kari->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (serial == NULL) + return MEMORY_E; + + recipSerial = (mp_int*)XMALLOC(sizeof(mp_int), kari->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (recipSerial == NULL) { + XFREE(serial, kari->heap, DYNAMIC_TYPE_TMP_BUFFER); + return MEMORY_E; + } +#endif + + if (GetInt(serial, pkiMsg, idx, pkiMsgSz) < 0) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(serial, kari->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(recipSerial, kari->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return ASN_PARSE_E; + } + + ret = mp_read_unsigned_bin(recipSerial, kari->decoded->serial, + kari->decoded->serialSz); + if (ret != MP_OKAY) { + mp_clear(serial); + WOLFSSL_MSG("Failed to parse CMS recipient serial number"); +#ifdef WOLFSSL_SMALL_STACK + XFREE(serial, kari->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(recipSerial, kari->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return ret; + } + + if (mp_cmp(recipSerial, serial) != MP_EQ) { + mp_clear(serial); + mp_clear(recipSerial); + WOLFSSL_MSG("CMS serial number does not match recipient"); +#ifdef WOLFSSL_SMALL_STACK + XFREE(serial, kari->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(recipSerial, kari->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return PKCS7_RECIP_E; + } + + mp_clear(serial); + mp_clear(recipSerial); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(serial, kari->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(recipSerial, kari->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return 0; +} + + +/* remove ASN.1 RecipientEncryptedKeys, return 0 on success, < 0 on error */ +static int wc_PKCS7_KariGetRecipientEncryptedKeys(WC_PKCS7_KARI* kari, + byte* pkiMsg, word32 pkiMsgSz, word32* idx, + int* recipFound, byte* encryptedKey, + int* encryptedKeySz, byte* rid) +{ + int length; + int ret = 0; + byte tag; + word32 localIdx; + + if (kari == NULL || pkiMsg == NULL || idx == NULL || + recipFound == NULL || encryptedKey == NULL) + return BAD_FUNC_ARG; + + /* remove RecipientEncryptedKeys */ + if (GetSequence(pkiMsg, idx, &length, pkiMsgSz) < 0) + return ASN_PARSE_E; + + /* remove RecipientEncryptedKeys */ + if (GetSequence(pkiMsg, idx, &length, pkiMsgSz) < 0) + return ASN_PARSE_E; + + /* KeyAgreeRecipientIdentifier is CHOICE of IssuerAndSerialNumber + * or [0] IMMPLICIT RecipientKeyIdentifier */ + localIdx = *idx; + if (GetASNTag(pkiMsg, &localIdx, &tag, pkiMsgSz) < 0) + return ASN_PARSE_E; + + if (tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)) { + /* try to get RecipientKeyIdentifier */ + ret = wc_PKCS7_KariGetSubjectKeyIdentifier(kari, pkiMsg, pkiMsgSz, + idx, recipFound, rid); + } else { + /* try to get IssuerAndSerialNumber */ + ret = wc_PKCS7_KariGetIssuerAndSerialNumber(kari, pkiMsg, pkiMsgSz, + idx, recipFound, rid); + } + + /* if we don't have either option, malformed CMS */ + if (ret != 0) + return ret; + + /* remove EncryptedKey */ + if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0) + return ASN_PARSE_E; + + if (tag != ASN_OCTET_STRING) + return ASN_PARSE_E; + + if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0) + return ASN_PARSE_E; + + /* put encrypted CEK in decryptedKey buffer for now, decrypt later */ + if (length > *encryptedKeySz) + return BUFFER_E; + + XMEMCPY(encryptedKey, pkiMsg + (*idx), length); + *encryptedKeySz = length; + (*idx) += length; + + return 0; +} + +#endif /* HAVE_ECC */ + + +int wc_PKCS7_SetOriEncryptCtx(PKCS7* pkcs7, void* ctx) +{ + if (pkcs7 == NULL) + return BAD_FUNC_ARG; + + pkcs7->oriEncryptCtx = ctx; + + return 0; +} + + +int wc_PKCS7_SetOriDecryptCtx(PKCS7* pkcs7, void* ctx) +{ + + if (pkcs7 == NULL) + return BAD_FUNC_ARG; + + pkcs7->oriDecryptCtx = ctx; + + return 0; +} + + +int wc_PKCS7_SetOriDecryptCb(PKCS7* pkcs7, CallbackOriDecrypt cb) +{ + if (pkcs7 == NULL) + return BAD_FUNC_ARG; + + pkcs7->oriDecryptCb = cb; + + return 0; +} + + +/* return 0 on success */ +int wc_PKCS7_SetWrapCEKCb(PKCS7* pkcs7, CallbackWrapCEK cb) +{ + if (pkcs7 == NULL) + return BAD_FUNC_ARG; + + pkcs7->wrapCEKCb = cb; + + return 0; +} + +/* Decrypt ASN.1 OtherRecipientInfo (ori), as defined by: + * + * OtherRecipientInfo ::= SEQUENCE { + * oriType OBJECT IDENTIFIER, + * oriValue ANY DEFINED BY oriType } + * + * pkcs7 - pointer to initialized PKCS7 structure + * pkiMsg - pointer to encoded CMS bundle + * pkiMsgSz - size of pkiMsg, bytes + * idx - [IN/OUT] pointer to index into pkiMsg + * decryptedKey - [OUT] output buf for decrypted content encryption key + * decryptedKeySz - [IN/OUT] size of buffer, size of decrypted key + * recipFound - [OUT] 1 if recipient has been found, 0 if not + * + * Return 0 on success, negative upon error. + */ +static int wc_PKCS7_DecryptOri(PKCS7* pkcs7, byte* in, word32 inSz, + word32* idx, byte* decryptedKey, + word32* decryptedKeySz, int* recipFound) +{ + int ret, seqSz, oriOIDSz; + word32 oriValueSz, tmpIdx; + byte* oriValue; + byte oriOID[MAX_OID_SZ]; + + byte* pkiMsg = in; + word32 pkiMsgSz = inSz; +#ifndef NO_PKCS7_STREAM + word32 stateIdx = *idx; + long rc; +#endif + + if (pkcs7->oriDecryptCb == NULL) { + WOLFSSL_MSG("You must register an ORI Decrypt callback"); + return BAD_FUNC_ARG; + } + + switch (pkcs7->state) { + + case WC_PKCS7_DECRYPT_ORI: + #ifndef NO_PKCS7_STREAM + /* @TODO for now just get full buffer, needs divided up */ + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, + (pkcs7->stream->maxLen - pkcs7->stream->totalRd) + + pkcs7->stream->length, &pkiMsg, idx)) != 0) { + return ret; + } + + rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in, + inSz); + if (rc < 0) { + ret = (int)rc; + break; + } + pkiMsgSz = (word32)rc; + #endif + /* get OtherRecipientInfo sequence length */ + if (GetLength(pkiMsg, idx, &seqSz, pkiMsgSz) < 0) + return ASN_PARSE_E; + + tmpIdx = *idx; + + /* remove and store oriType OBJECT IDENTIFIER */ + if (GetASNObjectId(pkiMsg, idx, &oriOIDSz, pkiMsgSz) != 0) + return ASN_PARSE_E; + + XMEMCPY(oriOID, pkiMsg + *idx, oriOIDSz); + *idx += oriOIDSz; + + /* get oriValue, increment idx */ + oriValue = pkiMsg + *idx; + oriValueSz = seqSz - (*idx - tmpIdx); + *idx += oriValueSz; + + /* pass oriOID and oriValue to user callback, expect back + decryptedKey and size */ + ret = pkcs7->oriDecryptCb(pkcs7, oriOID, (word32)oriOIDSz, oriValue, + oriValueSz, decryptedKey, decryptedKeySz, + pkcs7->oriDecryptCtx); + + if (ret != 0 || decryptedKey == NULL || *decryptedKeySz == 0) { + /* decrypt operation failed */ + *recipFound = 0; + return PKCS7_RECIP_E; + } + + /* mark recipFound, since we only support one RecipientInfo for now */ + *recipFound = 1; + + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &stateIdx, idx)) != 0) { + break; + } + #endif + ret = 0; /* success */ + break; + + default: + WOLFSSL_MSG("PKCS7 ORI unknown state"); + ret = BAD_FUNC_ARG; + + } + + return ret; +} + +#if !defined(NO_PWDBASED) && !defined(NO_SHA) + +/* decode ASN.1 PasswordRecipientInfo (pwri), return 0 on success, + * < 0 on error */ +static int wc_PKCS7_DecryptPwri(PKCS7* pkcs7, byte* in, word32 inSz, + word32* idx, byte* decryptedKey, + word32* decryptedKeySz, int* recipFound) +{ + byte* salt; + byte* cek; + byte* kek; + + byte tmpIv[MAX_CONTENT_IV_SIZE]; + + int ret = 0, length, saltSz, iterations, blockSz, kekKeySz; + int hashOID = WC_SHA; /* default to SHA1 */ + word32 kdfAlgoId, pwriEncAlgoId, keyEncAlgoId, cekSz; + byte* pkiMsg = in; + word32 pkiMsgSz = inSz; + byte tag; +#ifndef NO_PKCS7_STREAM + word32 tmpIdx = *idx; + long rc; +#endif + + switch (pkcs7->state) { + case WC_PKCS7_DECRYPT_PWRI: + #ifndef NO_PKCS7_STREAM + /*@TODO for now just get full buffer, needs divided up */ + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, + (pkcs7->stream->maxLen - pkcs7->stream->totalRd) + + pkcs7->stream->length, &pkiMsg, idx)) != 0) { + return ret; + } + + rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in, + inSz); + if (rc < 0) { + ret = (int)rc; + break; + } + pkiMsgSz = (word32)rc; + #endif + /* remove KeyDerivationAlgorithmIdentifier */ + if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0) + return ASN_PARSE_E; + + if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)) + return ASN_PARSE_E; + + if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0) + return ASN_PARSE_E; + + /* get KeyDerivationAlgorithmIdentifier */ + if (wc_GetContentType(pkiMsg, idx, &kdfAlgoId, pkiMsgSz) < 0) + return ASN_PARSE_E; + + /* get KDF params SEQ */ + if (GetSequence(pkiMsg, idx, &length, pkiMsgSz) < 0) + return ASN_PARSE_E; + + /* get KDF salt OCTET STRING */ + if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0) + return ASN_PARSE_E; + + if (tag != ASN_OCTET_STRING) + return ASN_PARSE_E; + + if (GetLength(pkiMsg, idx, &saltSz, pkiMsgSz) < 0) + return ASN_PARSE_E; + + salt = (byte*)XMALLOC(saltSz, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + if (salt == NULL) + return MEMORY_E; + + XMEMCPY(salt, pkiMsg + (*idx), saltSz); + *idx += saltSz; + + /* get KDF iterations */ + if (GetMyVersion(pkiMsg, idx, &iterations, pkiMsgSz) < 0) { + XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ASN_PARSE_E; + } + + /* get KeyEncAlgoId SEQ */ + if (GetSequence(pkiMsg, idx, &length, pkiMsgSz) < 0) { + XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ASN_PARSE_E; + } + + /* get KeyEncAlgoId */ + if (wc_GetContentType(pkiMsg, idx, &keyEncAlgoId, pkiMsgSz) < 0) { + XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ASN_PARSE_E; + } + + /* get pwriEncAlgoId */ + if (GetAlgoId(pkiMsg, idx, &pwriEncAlgoId, oidBlkType, pkiMsgSz) < 0) { + XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ASN_PARSE_E; + } + + blockSz = wc_PKCS7_GetOIDBlockSize(pwriEncAlgoId); + if (blockSz < 0) { + XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return blockSz; + } + + /* get content-encryption key size, based on algorithm */ + kekKeySz = wc_PKCS7_GetOIDKeySize(pwriEncAlgoId); + if (kekKeySz < 0) { + XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return kekKeySz; + } + + /* get block cipher IV, stored in OPTIONAL parameter of AlgoID */ + if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0) { + XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ASN_PARSE_E; + } + + if (tag != ASN_OCTET_STRING) { + XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ASN_PARSE_E; + } + + if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0) { + XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ASN_PARSE_E; + } + + if (length != blockSz) { + WOLFSSL_MSG("Incorrect IV length, must be of content alg block size"); + XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ASN_PARSE_E; + } + + XMEMCPY(tmpIv, pkiMsg + (*idx), length); + *idx += length; + + /* get EncryptedKey */ + if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0) { + XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ASN_PARSE_E; + } + + if (tag != ASN_OCTET_STRING) { + XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ASN_PARSE_E; + } + + if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0) { + XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ASN_PARSE_E; + } + + /* allocate temporary space for decrypted key */ + cekSz = length; + cek = (byte*)XMALLOC(cekSz, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (cek == NULL) { + XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return MEMORY_E; + } + + /* generate KEK */ + kek = (byte*)XMALLOC(kekKeySz, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + if (kek == NULL) { + XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(cek, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return MEMORY_E; + } + + ret = wc_PKCS7_GenerateKEK_PWRI(pkcs7, pkcs7->pass, pkcs7->passSz, + salt, saltSz, kdfAlgoId, hashOID, + iterations, kek, kekKeySz); + if (ret < 0) { + XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(kek, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(cek, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ASN_PARSE_E; + } + + /* decrypt CEK with KEK */ + ret = wc_PKCS7_PwriKek_KeyUnWrap(pkcs7, kek, kekKeySz, + pkiMsg + (*idx), length, cek, + cekSz, tmpIv, blockSz, + pwriEncAlgoId); + if (ret < 0) { + XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(kek, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(cek, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + cekSz = ret; + + if (*decryptedKeySz < cekSz) { + WOLFSSL_MSG("Decrypted key buffer too small for CEK"); + XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(kek, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(cek, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return BUFFER_E; + } + + XMEMCPY(decryptedKey, cek, cekSz); + *decryptedKeySz = cekSz; + + XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(kek, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(cek, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + + /* mark recipFound, since we only support one RecipientInfo for now */ + *recipFound = 1; + *idx += length; + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) { + break; + } + #endif + ret = 0; /* success */ + break; + + default: + WOLFSSL_MSG("PKCS7 PWRI unknown state"); + ret = BAD_FUNC_ARG; + } + + return ret; +} + +#endif /* NO_PWDBASED | NO_SHA */ + +/* decode ASN.1 KEKRecipientInfo (kekri), return 0 on success, + * < 0 on error */ +static int wc_PKCS7_DecryptKekri(PKCS7* pkcs7, byte* in, word32 inSz, + word32* idx, byte* decryptedKey, + word32* decryptedKeySz, int* recipFound) +{ + int length, keySz, dateLen, direction; + byte* keyId = NULL; + const byte* datePtr = NULL; + byte dateFormat, tag; + word32 keyIdSz, kekIdSz, keyWrapOID, localIdx; + + int ret = 0; + byte* pkiMsg = in; + word32 pkiMsgSz = inSz; +#ifndef NO_PKCS7_STREAM + word32 tmpIdx = *idx; + long rc; +#endif + + WOLFSSL_ENTER("wc_PKCS7_DecryptKekri"); + switch (pkcs7->state) { + case WC_PKCS7_DECRYPT_KEKRI: + #ifndef NO_PKCS7_STREAM + /* @TODO for now just get full buffer, needs divided up */ + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, + (pkcs7->stream->maxLen - pkcs7->stream->totalRd) + + pkcs7->stream->length, &pkiMsg, idx)) != 0) { + return ret; + } + + rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in, + inSz); + if (rc < 0) { + ret = (int)rc; + break; + } + pkiMsgSz = (word32)rc; + #endif + /* remove KEKIdentifier */ + if (GetSequence(pkiMsg, idx, &length, pkiMsgSz) < 0) + return ASN_PARSE_E; + + kekIdSz = length; + + if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0) + return ASN_PARSE_E; + + if (tag != ASN_OCTET_STRING) + return ASN_PARSE_E; + + if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0) + return ASN_PARSE_E; + + /* save keyIdentifier and length */ + keyId = pkiMsg + *idx; + keyIdSz = length; + *idx += keyIdSz; + + /* may have OPTIONAL GeneralizedTime */ + localIdx = *idx; + if ((*idx < kekIdSz) && GetASNTag(pkiMsg, &localIdx, &tag, + pkiMsgSz) == 0 && tag == ASN_GENERALIZED_TIME) { + if (wc_GetDateInfo(pkiMsg + *idx, pkiMsgSz, &datePtr, &dateFormat, + &dateLen) != 0) { + return ASN_PARSE_E; + } + *idx += (dateLen + 1); + } + + /* may have OPTIONAL OtherKeyAttribute */ + localIdx = *idx; + if ((*idx < kekIdSz) && GetASNTag(pkiMsg, &localIdx, &tag, + pkiMsgSz) == 0 && tag == (ASN_SEQUENCE | + ASN_CONSTRUCTED)) { + if (GetSequence(pkiMsg, idx, &length, pkiMsgSz) < 0) + return ASN_PARSE_E; + + /* skip it */ + *idx += length; + } + + /* get KeyEncryptionAlgorithmIdentifier */ + if (GetAlgoId(pkiMsg, idx, &keyWrapOID, oidKeyWrapType, pkiMsgSz) < 0) + return ASN_PARSE_E; + + /* get EncryptedKey */ + if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0) + return ASN_PARSE_E; + + if (tag != ASN_OCTET_STRING) + return ASN_PARSE_E; + + if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0) + return ASN_PARSE_E; + + #ifndef NO_AES + direction = AES_DECRYPTION; + #else + direction = DES_DECRYPTION; + #endif + + /* decrypt CEK with KEK */ + if (pkcs7->wrapCEKCb) { + keySz = pkcs7->wrapCEKCb(pkcs7, pkiMsg + *idx, length, keyId, + keyIdSz, NULL, 0, decryptedKey, + *decryptedKeySz, keyWrapOID, + (int)PKCS7_KEKRI, direction); + } + else { + keySz = wc_PKCS7_KeyWrap(pkiMsg + *idx, length, pkcs7->privateKey, + pkcs7->privateKeySz, decryptedKey, *decryptedKeySz, + keyWrapOID, direction); + } + if (keySz <= 0) + return keySz; + + *decryptedKeySz = (word32)keySz; + + /* mark recipFound, since we only support one RecipientInfo for now */ + *recipFound = 1; + *idx += length; + + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) { + break; + } + #endif + ret = 0; /* success */ + break; + + default: + WOLFSSL_MSG("PKCS7 KEKRI unknown state"); + ret = BAD_FUNC_ARG; + + } + + (void)keyId; + return ret; +} + + +/* decode ASN.1 KeyAgreeRecipientInfo (kari), return 0 on success, + * < 0 on error */ +static int wc_PKCS7_DecryptKari(PKCS7* pkcs7, byte* in, word32 inSz, + word32* idx, byte* decryptedKey, + word32* decryptedKeySz, int* recipFound) +{ +#ifdef HAVE_ECC + int ret, keySz; + int encryptedKeySz; + int direction = 0; + word32 keyAgreeOID, keyWrapOID; + byte rid[KEYID_SIZE]; + +#ifdef WOLFSSL_SMALL_STACK + byte* encryptedKey; +#else + byte encryptedKey[MAX_ENCRYPTED_KEY_SZ]; +#endif + + byte* pkiMsg = in; + word32 pkiMsgSz = inSz; +#ifndef NO_PKCS7_STREAM + word32 tmpIdx = (idx) ? *idx : 0; + long rc; +#endif + + WOLFSSL_ENTER("wc_PKCS7_DecryptKari"); + if (pkcs7 == NULL || pkiMsg == NULL || + ((pkcs7->singleCert == NULL || pkcs7->singleCertSz == 0) && + pkcs7->wrapCEKCb == NULL) || + idx == NULL || decryptedKey == NULL || decryptedKeySz == NULL) { + return BAD_FUNC_ARG; + } + + switch (pkcs7->state) { + case WC_PKCS7_DECRYPT_KARI: { + #ifndef NO_PKCS7_STREAM + /* @TODO for now just get full buffer, needs divided up */ + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, + (pkcs7->stream->maxLen - pkcs7->stream->totalRd) + + pkcs7->stream->length, &pkiMsg, idx)) != 0) { + return ret; + } + + rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in, + inSz); + if (rc < 0) { + ret = (int)rc; + break; + } + pkiMsgSz = (word32)rc; + #endif + WC_PKCS7_KARI* kari; + + kari = wc_PKCS7_KariNew(pkcs7, WC_PKCS7_DECODE); + if (kari == NULL) + return MEMORY_E; + + #ifdef WOLFSSL_SMALL_STACK + encryptedKey = (byte*)XMALLOC(MAX_ENCRYPTED_KEY_SZ, pkcs7->heap, + DYNAMIC_TYPE_PKCS7); + if (encryptedKey == NULL) { + wc_PKCS7_KariFree(kari); + return MEMORY_E; + } + #endif + encryptedKeySz = MAX_ENCRYPTED_KEY_SZ; + + /* parse cert and key */ + if (pkcs7->singleCert != NULL) { + ret = wc_PKCS7_KariParseRecipCert(kari, (byte*)pkcs7->singleCert, + pkcs7->singleCertSz, pkcs7->privateKey, + pkcs7->privateKeySz); + if (ret != 0) { + wc_PKCS7_KariFree(kari); + #ifdef WOLFSSL_SMALL_STACK + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + #endif + return ret; + } + } + + /* remove OriginatorIdentifierOrKey */ + ret = wc_PKCS7_KariGetOriginatorIdentifierOrKey(kari, pkiMsg, + pkiMsgSz, idx); + if (ret != 0) { + wc_PKCS7_KariFree(kari); + #ifdef WOLFSSL_SMALL_STACK + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + #endif + return ret; + } + + /* try and remove optional UserKeyingMaterial */ + ret = wc_PKCS7_KariGetUserKeyingMaterial(kari, pkiMsg, pkiMsgSz, idx); + if (ret != 0) { + wc_PKCS7_KariFree(kari); + #ifdef WOLFSSL_SMALL_STACK + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + #endif + return ret; + } + + /* remove KeyEncryptionAlgorithmIdentifier */ + ret = wc_PKCS7_KariGetKeyEncryptionAlgorithmId(kari, pkiMsg, + pkiMsgSz, idx, &keyAgreeOID, &keyWrapOID); + if (ret != 0) { + wc_PKCS7_KariFree(kari); + #ifdef WOLFSSL_SMALL_STACK + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + #endif + return ret; + } + + /* if user has not explicitly set keyAgreeOID, set from one in bundle */ + if (pkcs7->keyAgreeOID == 0) + pkcs7->keyAgreeOID = keyAgreeOID; + + /* set direction based on key wrap algorithm */ + switch (keyWrapOID) { + #ifndef NO_AES + #ifdef WOLFSSL_AES_128 + case AES128_WRAP: + #endif + #ifdef WOLFSSL_AES_192 + case AES192_WRAP: + #endif + #ifdef WOLFSSL_AES_256 + case AES256_WRAP: + #endif + direction = AES_DECRYPTION; + break; + #endif + default: + WOLFSSL_MSG("AES key wrap algorithm unsupported"); + if (pkcs7->wrapCEKCb) { + WOLFSSL_MSG("Direction not set!"); + break; /* if unwrapping callback is set then do not + * force restriction of supported wrap + * algorithms */ + } + + wc_PKCS7_KariFree(kari); + #ifdef WOLFSSL_SMALL_STACK + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + #endif + return BAD_KEYWRAP_ALG_E; + } + + /* remove RecipientEncryptedKeys */ + ret = wc_PKCS7_KariGetRecipientEncryptedKeys(kari, pkiMsg, pkiMsgSz, + idx, recipFound, encryptedKey, &encryptedKeySz, rid); + if (ret != 0) { + wc_PKCS7_KariFree(kari); + #ifdef WOLFSSL_SMALL_STACK + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + #endif + return ret; + } + + /* decrypt CEK with KEK */ + if (pkcs7->wrapCEKCb) { + word32 tmpKeySz = 0; + byte* tmpKeyDer = NULL; + + ret = wc_ecc_export_x963(kari->senderKey, NULL, &tmpKeySz); + if (ret != LENGTH_ONLY_E) { + return ret; + } + + /* buffer space for algorithm/curve */ + tmpKeySz += MAX_SEQ_SZ; + tmpKeySz += 2 * MAX_ALGO_SZ; + + /* buffer space for public key sequence */ + tmpKeySz += MAX_SEQ_SZ; + tmpKeySz += TRAILING_ZERO; + + tmpKeyDer = (byte*)XMALLOC(tmpKeySz, pkcs7->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (tmpKeyDer == NULL) { + return MEMORY_E; + } + + ret = wc_EccPublicKeyToDer(kari->senderKey, tmpKeyDer, + tmpKeySz, 1); + if (ret < 0) { + XFREE(tmpKeyDer, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + return ret; + } + tmpKeySz = (word32)ret; + + keySz = pkcs7->wrapCEKCb(pkcs7, encryptedKey, encryptedKeySz, + rid, KEYID_SIZE, tmpKeyDer, tmpKeySz, + decryptedKey, *decryptedKeySz, + keyWrapOID, (int)PKCS7_KARI, direction); + XFREE(tmpKeyDer, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + + if (keySz > 0) { + /* If unwrapping was successful then consider recipient + * found. Checking for NULL singleCert to confirm previous + * SID check was not done */ + if (pkcs7->singleCert == NULL) + *recipFound = 1; + } + } + else { + /* create KEK */ + ret = wc_PKCS7_KariGenerateKEK(kari, keyWrapOID, pkcs7->keyAgreeOID); + if (ret != 0) { + wc_PKCS7_KariFree(kari); + #ifdef WOLFSSL_SMALL_STACK + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + #endif + return ret; + } + + /* decrypt CEK with KEK */ + keySz = wc_PKCS7_KeyWrap(encryptedKey, encryptedKeySz, kari->kek, + kari->kekSz, decryptedKey, *decryptedKeySz, + keyWrapOID, direction); + } + if (keySz <= 0) { + wc_PKCS7_KariFree(kari); + #ifdef WOLFSSL_SMALL_STACK + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + #endif + return keySz; + } + *decryptedKeySz = (word32)keySz; + + wc_PKCS7_KariFree(kari); + #ifdef WOLFSSL_SMALL_STACK + XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + #endif + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) { + break; + } + #endif + ret = 0; /* success */ + } + break; + + default: + WOLFSSL_MSG("PKCS7 kari unknown state"); + ret = BAD_FUNC_ARG; + + } + + (void)pkiMsg; + (void)pkiMsgSz; + + return ret; +#else + (void)in; + (void)inSz; + (void)pkcs7; + (void)idx; + (void)decryptedKey; + (void)decryptedKeySz; + (void)recipFound; + + return NOT_COMPILED_IN; +#endif /* HAVE_ECC */ +} + + +/* decode ASN.1 RecipientInfos SET, return 0 on success, < 0 on error */ +static int wc_PKCS7_DecryptRecipientInfos(PKCS7* pkcs7, byte* in, + word32 inSz, word32* idx, byte* decryptedKey, + word32* decryptedKeySz, int* recipFound) +{ + word32 savedIdx; + int version, ret = 0, length; + byte* pkiMsg = in; + word32 pkiMsgSz = inSz; + byte tag; +#ifndef NO_PKCS7_STREAM + word32 tmpIdx; + long rc; +#endif + + if (pkcs7 == NULL || pkiMsg == NULL || idx == NULL || + decryptedKey == NULL || decryptedKeySz == NULL || + recipFound == NULL) { + return BAD_FUNC_ARG; + } + + WOLFSSL_ENTER("wc_PKCS7_DecryptRecipientInfos"); +#ifndef NO_PKCS7_STREAM + tmpIdx = *idx; +#endif + + /* check if in the process of decrypting */ + switch (pkcs7->state) { + case WC_PKCS7_DECRYPT_KTRI: + case WC_PKCS7_DECRYPT_KTRI_2: + case WC_PKCS7_DECRYPT_KTRI_3: + #ifndef NO_RSA + ret = wc_PKCS7_DecryptKtri(pkcs7, in, inSz, idx, + decryptedKey, decryptedKeySz, recipFound); + #else + return NOT_COMPILED_IN; + #endif + break; + + case WC_PKCS7_DECRYPT_KARI: + ret = wc_PKCS7_DecryptKari(pkcs7, in, inSz, idx, + decryptedKey, decryptedKeySz, recipFound); + break; + + case WC_PKCS7_DECRYPT_KEKRI: + ret = wc_PKCS7_DecryptKekri(pkcs7, in, inSz, idx, + decryptedKey, decryptedKeySz, recipFound); + break; + + case WC_PKCS7_DECRYPT_PWRI: + #if !defined(NO_PWDBASED) && !defined(NO_SHA) + ret = wc_PKCS7_DecryptPwri(pkcs7, in, inSz, idx, + decryptedKey, decryptedKeySz, recipFound); + #else + return NOT_COMPILED_IN; + #endif + break; + + case WC_PKCS7_DECRYPT_ORI: + ret = wc_PKCS7_DecryptOri(pkcs7, in, inSz, idx, + decryptedKey, decryptedKeySz, recipFound); + break; + + default: + /* not in decrypting state */ + break; + } + + if (ret < 0) { + return ret; + } + + savedIdx = *idx; +#ifndef NO_PKCS7_STREAM + rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in, inSz); + if (rc < 0) { + return (int)rc; + } + pkiMsgSz = (word32)rc; + if (pkcs7->stream->length > 0) + pkiMsg = pkcs7->stream->buffer; +#endif + + /* when looking for next recipient, use first sequence and version to + * indicate there is another, if not, move on */ + while(*recipFound == 0) { + + /* remove RecipientInfo, if we don't have a SEQUENCE, back up idx to + * last good saved one */ + if (GetSequence(pkiMsg, idx, &length, pkiMsgSz) > 0) { + + #ifndef NO_RSA + /* found ktri */ + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) { + break; + } + #endif + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_DECRYPT_KTRI); + ret = wc_PKCS7_DecryptKtri(pkcs7, in, inSz, idx, + decryptedKey, decryptedKeySz, + recipFound); + if (ret != 0) + return ret; + #else + return NOT_COMPILED_IN; + #endif + } + else { + word32 localIdx; + /* kari is IMPLICIT[1] */ + *idx = savedIdx; + localIdx = *idx; + + if (GetASNTag(pkiMsg, &localIdx, &tag, pkiMsgSz) != 0) { + /* no room for recipient info */ + break; + } + + if (tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1)) { + (*idx)++; + if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0) + return ASN_PARSE_E; + + if (GetMyVersion(pkiMsg, idx, &version, pkiMsgSz) < 0) { + *idx = savedIdx; + break; + } + + if (version != 3) + return ASN_VERSION_E; + + /* found kari */ + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) { + break; + } + #endif + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_DECRYPT_KARI); + ret = wc_PKCS7_DecryptKari(pkcs7, in, inSz, idx, + decryptedKey, decryptedKeySz, + recipFound); + if (ret != 0) + return ret; + + /* kekri is IMPLICIT[2] */ + } else if (tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 2)) { + (*idx)++; + + if (GetLength(pkiMsg, idx, &version, pkiMsgSz) < 0) + return ASN_PARSE_E; + + if (GetMyVersion(pkiMsg, idx, &version, pkiMsgSz) < 0) { + *idx = savedIdx; + break; + } + + if (version != 4) + return ASN_VERSION_E; + + /* found kekri */ + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) { + break; + } + #endif + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_DECRYPT_KEKRI); + ret = wc_PKCS7_DecryptKekri(pkcs7, in, inSz, idx, + decryptedKey, decryptedKeySz, + recipFound); + if (ret != 0) + return ret; + + /* pwri is IMPLICIT[3] */ + } else if (tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 3)) { + #if !defined(NO_PWDBASED) && !defined(NO_SHA) + (*idx)++; + + if (GetLength(pkiMsg, idx, &version, pkiMsgSz) < 0) + return ASN_PARSE_E; + + if (GetMyVersion(pkiMsg, idx, &version, pkiMsgSz) < 0) { + *idx = savedIdx; + break; + } + + if (version != 0) + return ASN_VERSION_E; + + /* found pwri */ + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) { + break; + } + #endif + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_DECRYPT_PWRI); + ret = wc_PKCS7_DecryptPwri(pkcs7, in, inSz, idx, + decryptedKey, decryptedKeySz, + recipFound); + if (ret != 0) + return ret; + #else + return NOT_COMPILED_IN; + #endif + + /* ori is IMPLICIT[4] */ + } else if (tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 4)) { + (*idx)++; + + /* found ori */ + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) { + break; + } + #endif + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_DECRYPT_ORI); + ret = wc_PKCS7_DecryptOri(pkcs7, in, inSz, idx, + decryptedKey, decryptedKeySz, + recipFound); + if (ret != 0) + return ret; + + } else { + /* failed to find RecipientInfo, restore idx and continue */ + *idx = savedIdx; + break; + } + } + + /* update good idx */ + savedIdx = *idx; + } + + return ret; +} + + +/* Parse encoded EnvelopedData bundle up to RecipientInfo set. + * + * return size of RecipientInfo SET on success, negative upon error */ +static int wc_PKCS7_ParseToRecipientInfoSet(PKCS7* pkcs7, byte* in, + word32 inSz, word32* idx, + int type) +{ + int version = 0, length, ret = 0; + word32 contentType; + byte* pkiMsg = in; + word32 pkiMsgSz = inSz; + byte tag; +#ifndef NO_PKCS7_STREAM + word32 tmpIdx = 0; + long rc; +#endif + + if (pkcs7 == NULL || pkiMsg == NULL || pkiMsgSz == 0 || idx == NULL) + return BAD_FUNC_ARG; + + if ((type != ENVELOPED_DATA) && (type != AUTH_ENVELOPED_DATA) && + pkcs7->contentOID != FIRMWARE_PKG_DATA) + return BAD_FUNC_ARG; + +#ifndef NO_PKCS7_STREAM + if (pkcs7->stream == NULL) { + if ((ret = wc_PKCS7_CreateStream(pkcs7)) != 0) { + return ret; + } + } +#endif + + switch (pkcs7->state) { + case WC_PKCS7_INFOSET_START: + case WC_PKCS7_INFOSET_BER: + case WC_PKCS7_INFOSET_STAGE1: + case WC_PKCS7_INFOSET_STAGE2: + case WC_PKCS7_INFOSET_END: + break; + + default: + WOLFSSL_MSG("Warning, setting PKCS7 info state to start"); + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_INFOSET_START); + } + + switch (pkcs7->state) { + case WC_PKCS7_INFOSET_START: + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_SEQ_SZ + + ASN_TAG_SZ, &pkiMsg, idx)) != 0) { + return ret; + } + + rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_SEQ_PEEK, in, inSz); + if (rc < 0) { + ret = (int)rc; + break; + } + pkiMsgSz = (word32)rc; + #endif + /* read past ContentInfo, verify type is envelopedData */ + if (ret == 0 && GetSequence(pkiMsg, idx, &length, pkiMsgSz) < 0) + { + ret = ASN_PARSE_E; + } + + if (ret == 0 && length == 0 && pkiMsg[(*idx)-1] == 0x80) { + #ifdef ASN_BER_TO_DER + word32 len; + + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_INFOSET_BER); + FALL_THROUGH; + + /* full buffer is needed for conversion */ + case WC_PKCS7_INFOSET_BER: + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, + pkcs7->stream->maxLen - pkcs7->stream->length, + &pkiMsg, idx)) != 0) { + return ret; + } + + rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, + in, inSz); + if (rc < 0) { + ret = (int)rc; + break; + } + pkiMsgSz = (word32)rc; + #endif + + len = 0; + + ret = wc_BerToDer(pkiMsg, pkiMsgSz, NULL, &len); + if (ret != LENGTH_ONLY_E) + return ret; + pkcs7->der = (byte*)XMALLOC(len, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + if (pkcs7->der == NULL) + return MEMORY_E; + ret = wc_BerToDer(pkiMsg, pkiMsgSz, pkcs7->der, &len); + if (ret < 0) + return ret; + + pkiMsg = in = pkcs7->der; + pkiMsgSz = pkcs7->derSz = len; + *idx = 0; + + if (GetSequence(pkiMsg, idx, &length, pkiMsgSz) < 0) + return ASN_PARSE_E; + #else + return BER_INDEF_E; + #endif + } + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) { + break; + } + #endif + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_INFOSET_STAGE1); + FALL_THROUGH; + + case WC_PKCS7_INFOSET_STAGE1: + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_OID_SZ + + MAX_LENGTH_SZ + ASN_TAG_SZ, &pkiMsg, idx)) != 0) { + return ret; + } + + pkiMsgSz = (pkcs7->stream->length > 0)? pkcs7->stream->length :inSz; + #endif + if (pkcs7->contentOID != FIRMWARE_PKG_DATA || + type == AUTH_ENVELOPED_DATA) { + if (ret == 0 && wc_GetContentType(pkiMsg, idx, &contentType, + pkiMsgSz) < 0) + ret = ASN_PARSE_E; + + if (ret == 0) { + if (type == ENVELOPED_DATA && contentType != ENVELOPED_DATA) { + WOLFSSL_MSG("PKCS#7 input not of type EnvelopedData"); + ret = PKCS7_OID_E; + } else if (type == AUTH_ENVELOPED_DATA && + contentType != AUTH_ENVELOPED_DATA) { + WOLFSSL_MSG("PKCS#7 input not of type AuthEnvelopedData"); + ret = PKCS7_OID_E; + } + } + + if (ret == 0 && GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) != 0) + ret = ASN_PARSE_E; + + if (ret == 0 && tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC + | 0)) + ret = ASN_PARSE_E; + + if (ret == 0 && GetLength_ex(pkiMsg, idx, &length, pkiMsgSz, + NO_USER_CHECK) < 0) + ret = ASN_PARSE_E; + } + + if (ret < 0) + break; + + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) { + break; + } + #endif + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_INFOSET_STAGE2); + FALL_THROUGH; + + case WC_PKCS7_INFOSET_STAGE2: + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_SEQ_SZ + + MAX_VERSION_SZ, &pkiMsg, idx)) != 0) { + return ret; + } + + rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in, + inSz); + if (rc < 0) { + ret = (int)rc; + break; + } + pkiMsgSz = (word32)rc; + #endif + /* remove EnvelopedData and version */ + if (pkcs7->contentOID != FIRMWARE_PKG_DATA || + type == AUTH_ENVELOPED_DATA) { + if (ret == 0 && GetSequence(pkiMsg, idx, &length, pkiMsgSz) < 0) + ret = ASN_PARSE_E; + } + + if (ret == 0 && GetMyVersion(pkiMsg, idx, &version, pkiMsgSz) < 0) + ret = ASN_PARSE_E; + + if (ret < 0) + break; + + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) { + break; + } + + pkcs7->stream->varOne = version; + #endif + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_INFOSET_END); + FALL_THROUGH; + + case WC_PKCS7_INFOSET_END: + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, + MAX_SET_SZ, &pkiMsg, idx)) != 0) { + return ret; + } + + rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in, + inSz); + if (rc < 0) { + ret = (int)rc; + break; + } + pkiMsgSz = (word32)rc; + version = pkcs7->stream->varOne; + #endif + + if (type == ENVELOPED_DATA) { + /* TODO :: make this more accurate */ + if ((pkcs7->publicKeyOID == RSAk && + (version != 0 && version != 2)) + #ifdef HAVE_ECC + || (pkcs7->publicKeyOID == ECDSAk && + (version != 0 && version != 2 && version != 3)) + #endif + ) { + WOLFSSL_MSG("PKCS#7 envelopedData version incorrect"); + ret = ASN_VERSION_E; + } + } else { + /* AuthEnvelopedData version MUST be 0 */ + if (version != 0) { + WOLFSSL_MSG("PKCS#7 AuthEnvelopedData needs to be of version 0"); + ret = ASN_VERSION_E; + } + } + + /* remove RecipientInfo set, get length of set */ + if (ret == 0 && GetSet(pkiMsg, idx, &length, pkiMsgSz) < 0) + ret = ASN_PARSE_E; + + if (ret < 0) + break; + + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) { + break; + } + #endif + + if (ret == 0) + ret = length; + + break; + + default: + WOLFSSL_MSG("Bad PKCS7 info set state"); + ret = BAD_FUNC_ARG; + break; + } + + return ret; +} + + +/* Import secret/private key into a PKCS7 structure. Used for setting + * the secret key for decryption a EnvelopedData KEKRI RecipientInfo. + * + * Returns 0 on success, negative upon error */ +WOLFSSL_API int wc_PKCS7_SetKey(PKCS7* pkcs7, byte* key, word32 keySz) +{ + if (pkcs7 == NULL || key == NULL || keySz == 0) + return BAD_FUNC_ARG; + + pkcs7->privateKey = key; + pkcs7->privateKeySz = keySz; + + return 0; +} + + +/* append data to encrypted content cache in PKCS7 structure + * return 0 on success, negative on error */ +static int PKCS7_CacheEncryptedContent(PKCS7* pkcs7, byte* in, word32 inSz) +{ + byte* oldCache; + word32 oldCacheSz; + + if (pkcs7 == NULL || in == NULL) + return BAD_FUNC_ARG; + + /* save pointer to old cache */ + oldCache = pkcs7->cachedEncryptedContent; + oldCacheSz = pkcs7->cachedEncryptedContentSz; + + /* re-allocate new buffer to fit appended data */ + pkcs7->cachedEncryptedContent = (byte*)XMALLOC(oldCacheSz + inSz, + pkcs7->heap, DYNAMIC_TYPE_PKCS7); + if (pkcs7->cachedEncryptedContent == NULL) { + pkcs7->cachedEncryptedContentSz = 0; + XFREE(oldCache, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return MEMORY_E; + } + + if (oldCache != NULL) { + XMEMCPY(pkcs7->cachedEncryptedContent, oldCache, oldCacheSz); + } + XMEMCPY(pkcs7->cachedEncryptedContent + oldCacheSz, in, inSz); + pkcs7->cachedEncryptedContentSz += inSz; + + XFREE(oldCache, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + + return 0; +} + + +/* unwrap and decrypt PKCS#7 envelopedData object, return decoded size */ +WOLFSSL_API int wc_PKCS7_DecodeEnvelopedData(PKCS7* pkcs7, byte* in, + word32 inSz, byte* output, + word32 outputSz) +{ + int recipFound = 0; + int ret, length = 0; + word32 idx = 0; +#ifndef NO_PKCS7_STREAM + word32 tmpIdx = 0; + long rc; +#endif + word32 contentType, encOID = 0; + word32 decryptedKeySz = MAX_ENCRYPTED_KEY_SZ; + + int expBlockSz = 0, blockKeySz = 0; + byte tmpIvBuf[MAX_CONTENT_IV_SIZE]; + byte* tmpIv = tmpIvBuf; + + byte* pkiMsg = in; + word32 pkiMsgSz = inSz; + byte* decryptedKey = NULL; + int encryptedContentTotalSz = 0; + int encryptedContentSz = 0; + byte padLen; + byte* encryptedContent = NULL; + int explicitOctet = 0; + word32 localIdx; + byte tag; + + if (pkcs7 == NULL) + return BAD_FUNC_ARG; + + if (pkiMsg == NULL || pkiMsgSz == 0 || + output == NULL || outputSz == 0) + return BAD_FUNC_ARG; + +#ifndef NO_PKCS7_STREAM + (void)tmpIv; /* help out static analysis */ + if (pkcs7->stream == NULL) { + if ((ret = wc_PKCS7_CreateStream(pkcs7)) != 0) { + return ret; + } + } +#endif + + switch (pkcs7->state) { + case WC_PKCS7_START: + case WC_PKCS7_INFOSET_START: + case WC_PKCS7_INFOSET_BER: + case WC_PKCS7_INFOSET_STAGE1: + case WC_PKCS7_INFOSET_STAGE2: + case WC_PKCS7_INFOSET_END: + ret = wc_PKCS7_ParseToRecipientInfoSet(pkcs7, pkiMsg, pkiMsgSz, + &idx, ENVELOPED_DATA); + if (ret < 0) { + break; + } + + #ifdef ASN_BER_TO_DER + /* check if content was BER and has been converted to DER */ + if (pkcs7->derSz > 0) + pkiMsg = in = pkcs7->der; + #endif + + decryptedKey = (byte*)XMALLOC(MAX_ENCRYPTED_KEY_SZ, pkcs7->heap, + DYNAMIC_TYPE_PKCS7); + if (decryptedKey == NULL) + return MEMORY_E; + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_ENV_2); + #ifndef NO_PKCS7_STREAM + tmpIdx = idx; + pkcs7->stream->aad = decryptedKey; + #endif + FALL_THROUGH; + + case WC_PKCS7_ENV_2: + #ifndef NO_PKCS7_STREAM + /* store up enough buffer for initial info set decode */ + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_LENGTH_SZ + + MAX_VERSION_SZ + ASN_TAG_SZ, &pkiMsg, &idx)) != 0) { + return ret; + } + #endif + FALL_THROUGH; + + case WC_PKCS7_DECRYPT_KTRI: + case WC_PKCS7_DECRYPT_KTRI_2: + case WC_PKCS7_DECRYPT_KTRI_3: + case WC_PKCS7_DECRYPT_KARI: + case WC_PKCS7_DECRYPT_KEKRI: + case WC_PKCS7_DECRYPT_PWRI: + case WC_PKCS7_DECRYPT_ORI: + #ifndef NO_PKCS7_STREAM + decryptedKey = pkcs7->stream->aad; + decryptedKeySz = MAX_ENCRYPTED_KEY_SZ; + #endif + + ret = wc_PKCS7_DecryptRecipientInfos(pkcs7, in, inSz, &idx, + decryptedKey, &decryptedKeySz, + &recipFound); + if (ret == 0 && recipFound == 0) { + WOLFSSL_MSG("No recipient found in envelopedData that matches input"); + ret = PKCS7_RECIP_E; + } + + if (ret != 0) + break; + #ifndef NO_PKCS7_STREAM + tmpIdx = idx; + pkcs7->stream->aadSz = decryptedKeySz; + #endif + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_ENV_3); + FALL_THROUGH; + + case WC_PKCS7_ENV_3: + + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_LENGTH_SZ + + MAX_VERSION_SZ + ASN_TAG_SZ + + MAX_LENGTH_SZ, &pkiMsg, &idx)) + != 0) { + return ret; + } + + rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in, + inSz); + if (rc < 0) { + ret = (int)rc; + break; + } + pkiMsgSz = (word32)rc; + #else + ret = 0; + #endif + + /* remove EncryptedContentInfo */ + if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0) { + ret = ASN_PARSE_E; + } + + if (ret == 0 && wc_GetContentType(pkiMsg, &idx, &contentType, + pkiMsgSz) < 0) { + ret = ASN_PARSE_E; + } + + if (ret == 0 && GetAlgoId(pkiMsg, &idx, &encOID, oidBlkType, + pkiMsgSz) < 0) { + ret = ASN_PARSE_E; + } + + blockKeySz = wc_PKCS7_GetOIDKeySize(encOID); + if (ret == 0 && blockKeySz < 0) { + ret = blockKeySz; + } + + expBlockSz = wc_PKCS7_GetOIDBlockSize(encOID); + if (ret == 0 && expBlockSz < 0) { + ret = expBlockSz; + } + + /* get block cipher IV, stored in OPTIONAL parameter of AlgoID */ + if (ret == 0 && GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) != 0) { + ret = ASN_PARSE_E; + } + + if (ret == 0 && tag != ASN_OCTET_STRING) { + ret = ASN_PARSE_E; + } + + if (ret == 0 && GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0) { + ret = ASN_PARSE_E; + } + + if (ret == 0 && length != expBlockSz) { + WOLFSSL_MSG("Incorrect IV length, must be of content alg block size"); + ret = ASN_PARSE_E; + } + + if (ret != 0) + break; + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) { + break; + } + wc_PKCS7_StreamStoreVar(pkcs7, encOID, expBlockSz, length); + pkcs7->stream->contentSz = blockKeySz; + pkcs7->stream->expected = length + MAX_LENGTH_SZ + MAX_LENGTH_SZ + + ASN_TAG_SZ + ASN_TAG_SZ; + #endif + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_ENV_4); + FALL_THROUGH; + + case WC_PKCS7_ENV_4: + + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, + pkcs7->stream->expected, &pkiMsg, &idx)) != 0) { + return ret; + } + + rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in, + inSz); + if (rc < 0) { + ret = (int)rc; + break; + } + pkiMsgSz = (word32)rc; + + wc_PKCS7_StreamGetVar(pkcs7, 0, 0, &length); + tmpIv = pkcs7->stream->tmpIv; + if (tmpIv == NULL) { + /* check added to help out static analysis tool */ + ret = MEMORY_E; + break; + } + #else + ret = 0; + #endif + + XMEMCPY(tmpIv, &pkiMsg[idx], length); + idx += length; + + explicitOctet = 0; + localIdx = idx; + if (GetASNTag(pkiMsg, &localIdx, &tag, pkiMsgSz) == 0 && + tag == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 0)) { + explicitOctet = 1; + } + + /* read encryptedContent, cont[0] */ + if (tag != (ASN_CONTEXT_SPECIFIC | 0) && + tag != (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 0)) { + ret = ASN_PARSE_E; + } + idx++; + + if (ret == 0 && GetLength(pkiMsg, &idx, &encryptedContentTotalSz, + pkiMsgSz) <= 0) { + ret = ASN_PARSE_E; + } + + if (ret != 0) + break; + + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) { + break; + } + pkcs7->stream->expected = encryptedContentTotalSz; + wc_PKCS7_StreamGetVar(pkcs7, &encOID, &expBlockSz, 0); + wc_PKCS7_StreamStoreVar(pkcs7, encOID, expBlockSz, explicitOctet); + #endif + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_ENV_5); + FALL_THROUGH; + + case WC_PKCS7_ENV_5: + + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, + pkcs7->stream->expected, &pkiMsg, &idx)) != 0) { + return ret; + } + + wc_PKCS7_StreamGetVar(pkcs7, &encOID, &expBlockSz, &explicitOctet); + tmpIv = pkcs7->stream->tmpIv; + encryptedContentTotalSz = pkcs7->stream->expected; + + /* restore decrypted key */ + decryptedKey = pkcs7->stream->aad; + decryptedKeySz = pkcs7->stream->aadSz; + blockKeySz = pkcs7->stream->contentSz; + #else + ret = 0; + #endif + + if (explicitOctet) { + /* encrypted content may be fragmented into multiple + * consecutive OCTET STRINGs, if so loop through + * collecting and caching encrypted content bytes */ + localIdx = idx; + while (idx < (localIdx + encryptedContentTotalSz)) { + + if (GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) < 0) { + ret = ASN_PARSE_E; + } + + if (ret == 0 && (tag != ASN_OCTET_STRING)) { + ret = ASN_PARSE_E; + } + + if (ret == 0 && GetLength(pkiMsg, &idx, + &encryptedContentSz, pkiMsgSz) <= 0) { + ret = ASN_PARSE_E; + } + + if (ret == 0) { + ret = PKCS7_CacheEncryptedContent(pkcs7, &pkiMsg[idx], + encryptedContentSz); + } + + if (ret != 0) { + break; + } + + /* advance idx past encrypted content */ + idx += encryptedContentSz; + } + + if (ret != 0) { + break; + } + + } else { + /* cache encrypted content, no OCTET STRING */ + ret = PKCS7_CacheEncryptedContent(pkcs7, &pkiMsg[idx], + encryptedContentTotalSz); + if (ret != 0) { + break; + } + idx += encryptedContentTotalSz; + } + + /* use cached content */ + encryptedContent = pkcs7->cachedEncryptedContent; + encryptedContentSz = pkcs7->cachedEncryptedContentSz; + + /* decrypt encryptedContent */ + ret = wc_PKCS7_DecryptContent(pkcs7, encOID, decryptedKey, + blockKeySz, tmpIv, expBlockSz, NULL, 0, NULL, 0, + encryptedContent, encryptedContentSz, encryptedContent); + if (ret != 0) { + break; + } + + padLen = encryptedContent[encryptedContentSz-1]; + + /* copy plaintext to output */ + if (padLen > encryptedContentSz || + (word32)(encryptedContentSz - padLen) > outputSz) { + ret = BUFFER_E; + break; + } + XMEMCPY(output, encryptedContent, encryptedContentSz - padLen); + + /* free memory, zero out keys */ + ForceZero(decryptedKey, MAX_ENCRYPTED_KEY_SZ); + XFREE(decryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + if (pkcs7->cachedEncryptedContent != NULL) { + XFREE(pkcs7->cachedEncryptedContent, pkcs7->heap, + DYNAMIC_TYPE_PKCS7); + pkcs7->cachedEncryptedContent = NULL; + pkcs7->cachedEncryptedContentSz = 0; + } + + ret = encryptedContentSz - padLen; + #ifndef NO_PKCS7_STREAM + pkcs7->stream->aad = NULL; + pkcs7->stream->aadSz = 0; + wc_PKCS7_ResetStream(pkcs7); + #endif + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_START); + break; + + default: + WOLFSSL_MSG("PKCS#7 unknown decode enveloped state"); + ret = BAD_FUNC_ARG; + } + +#ifndef NO_PKCS7_STREAM + if (ret < 0 && ret != WC_PKCS7_WANT_READ_E) { + wc_PKCS7_ResetStream(pkcs7); + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_START); + if (pkcs7->cachedEncryptedContent != NULL) { + XFREE(pkcs7->cachedEncryptedContent, pkcs7->heap, + DYNAMIC_TYPE_PKCS7); + pkcs7->cachedEncryptedContent = NULL; + pkcs7->cachedEncryptedContentSz = 0; + } + } +#else + if (decryptedKey != NULL && ret < 0) { + ForceZero(decryptedKey, MAX_ENCRYPTED_KEY_SZ); + XFREE(decryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + } + if (pkcs7->cachedEncryptedContent != NULL && ret < 0) { + XFREE(pkcs7->cachedEncryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + pkcs7->cachedEncryptedContent = NULL; + pkcs7->cachedEncryptedContentSz = 0; + } +#endif + return ret; +} + + +/* build PKCS#7 authEnvelopedData content type, return enveloped size */ +int wc_PKCS7_EncodeAuthEnvelopedData(PKCS7* pkcs7, byte* output, + word32 outputSz) +{ +#if defined(HAVE_AESGCM) || defined(HAVE_AESCCM) + int ret, idx = 0; + int totalSz, encryptedOutSz; + + int contentInfoSeqSz, outerContentTypeSz, outerContentSz; + byte contentInfoSeq[MAX_SEQ_SZ]; + byte outerContentType[MAX_ALGO_SZ]; + byte outerContent[MAX_SEQ_SZ]; + + int envDataSeqSz, verSz; + byte envDataSeq[MAX_SEQ_SZ]; + byte ver[MAX_VERSION_SZ]; + + WC_RNG rng; + int blockSz, blockKeySz; + byte* encryptedContent; + + Pkcs7EncodedRecip* tmpRecip = NULL; + int recipSz, recipSetSz; + byte recipSet[MAX_SET_SZ]; + + int encContentOctetSz, encContentSeqSz, contentTypeSz; + int contentEncAlgoSz, nonceOctetStringSz, macOctetStringSz; + byte encContentSeq[MAX_SEQ_SZ]; + byte contentType[MAX_ALGO_SZ]; + byte contentEncAlgo[MAX_ALGO_SZ]; + byte nonceOctetString[MAX_OCTET_STR_SZ]; + byte encContentOctet[MAX_OCTET_STR_SZ]; + byte macOctetString[MAX_OCTET_STR_SZ]; + + byte authTag[AES_BLOCK_SIZE]; + byte nonce[GCM_NONCE_MID_SZ]; /* GCM nonce is larger than CCM */ + byte macInt[MAX_VERSION_SZ]; + word32 nonceSz = 0, macIntSz = 0; + + /* authAttribs */ + byte* flatAuthAttribs = NULL; + byte authAttribSet[MAX_SET_SZ]; + EncodedAttrib authAttribs[MAX_AUTH_ATTRIBS_SZ]; + word32 authAttribsSz = 0, authAttribsCount = 0; + word32 authAttribsSetSz = 0; + + byte* aadBuffer = NULL; + word32 aadBufferSz = 0; + byte authAttribAadSet[MAX_SET_SZ]; + word32 authAttribsAadSetSz = 0; + + /* unauthAttribs */ + byte* flatUnauthAttribs = NULL; + byte unauthAttribSet[MAX_SET_SZ]; + EncodedAttrib unauthAttribs[MAX_UNAUTH_ATTRIBS_SZ]; + word32 unauthAttribsSz = 0, unauthAttribsCount = 0; + word32 unauthAttribsSetSz = 0; + + + PKCS7Attrib contentTypeAttrib; + byte contentTypeValue[MAX_OID_SZ]; + /* contentType OID (1.2.840.113549.1.9.3) */ + const byte contentTypeOid[] = + { ASN_OBJECT_ID, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xF7, 0x0d, 0x01, + 0x09, 0x03 }; + + if (pkcs7 == NULL || pkcs7->content == NULL || pkcs7->contentSz == 0) + return BAD_FUNC_ARG; + + if (output == NULL || outputSz == 0) + return BAD_FUNC_ARG; + + switch (pkcs7->encryptOID) { +#ifdef HAVE_AESGCM + #ifdef WOLFSSL_AES_128 + case AES128GCMb: + break; + #endif + #ifdef WOLFSSL_AES_192 + case AES192GCMb: + break; + #endif + #ifdef WOLFSSL_AES_256 + case AES256GCMb: + break; + #endif +#endif +#ifdef HAVE_AESCCM + #ifdef WOLFSSL_AES_128 + case AES128CCMb: + break; + #endif + #ifdef WOLFSSL_AES_192 + case AES192CCMb: + break; + #endif + #ifdef WOLFSSL_AES_256 + case AES256CCMb: + break; + #endif +#endif + default: + WOLFSSL_MSG("CMS AuthEnvelopedData must use AES-GCM or AES-CCM"); + return BAD_FUNC_ARG; + } + + blockKeySz = wc_PKCS7_GetOIDKeySize(pkcs7->encryptOID); + if (blockKeySz < 0) + return blockKeySz; + + blockSz = wc_PKCS7_GetOIDBlockSize(pkcs7->encryptOID); + if (blockSz < 0) + return blockSz; + + /* outer content type */ + ret = wc_SetContentType(AUTH_ENVELOPED_DATA, outerContentType, + sizeof(outerContentType)); + if (ret < 0) + return ret; + + outerContentTypeSz = ret; + + /* version, defined as 0 in RFC 5083 */ + verSz = SetMyVersion(0, ver, 0); + + /* generate random content encryption key */ + ret = PKCS7_GenerateContentEncryptionKey(pkcs7, blockKeySz); + if (ret != 0) { + return ret; + } + + /* build RecipientInfo, only if user manually set singleCert and size */ + if (pkcs7->singleCert != NULL && pkcs7->singleCertSz > 0) { + switch (pkcs7->publicKeyOID) { + #ifndef NO_RSA + case RSAk: + ret = wc_PKCS7_AddRecipient_KTRI(pkcs7, pkcs7->singleCert, + pkcs7->singleCertSz, 0); + break; + #endif + #ifdef HAVE_ECC + case ECDSAk: + ret = wc_PKCS7_AddRecipient_KARI(pkcs7, pkcs7->singleCert, + pkcs7->singleCertSz, + pkcs7->keyWrapOID, + pkcs7->keyAgreeOID, pkcs7->ukm, + pkcs7->ukmSz, 0); + break; + #endif + + default: + WOLFSSL_MSG("Unsupported RecipientInfo public key type"); + return BAD_FUNC_ARG; + }; + + if (ret < 0) { + WOLFSSL_MSG("Failed to create RecipientInfo"); + return ret; + } + } + + recipSz = wc_PKCS7_GetRecipientListSize(pkcs7); + if (recipSz < 0) { + return ret; + + } else if (recipSz == 0) { + WOLFSSL_MSG("You must add at least one CMS recipient"); + return PKCS7_RECIP_E; + } + recipSetSz = SetSet(recipSz, recipSet); + + /* generate random nonce and IV for encryption */ + switch (pkcs7->encryptOID) { +#ifdef HAVE_AESGCM + #ifdef WOLFSSL_AES_128 + case AES128GCMb: + FALL_THROUGH; + #endif + #ifdef WOLFSSL_AES_192 + case AES192GCMb: + FALL_THROUGH; + #endif + #ifdef WOLFSSL_AES_256 + case AES256GCMb: + #endif + #if defined(WOLFSSL_AES_128) || defined(WOLFSSL_AES_192) || \ + defined(WOLFSSL_AES_256) + /* GCM nonce is GCM_NONCE_MID_SZ (12) */ + nonceSz = GCM_NONCE_MID_SZ; + break; + #endif +#endif /* HAVE_AESGCM */ +#ifdef HAVE_AESCCM + #ifdef WOLFSSL_AES_128 + case AES128CCMb: + FALL_THROUGH; + #endif + #ifdef WOLFSSL_AES_192 + case AES192CCMb: + FALL_THROUGH; + #endif + #ifdef WOLFSSL_AES_256 + case AES256CCMb: + #endif + #if defined(WOLFSSL_AES_128) || defined(WOLFSSL_AES_192) || \ + defined(WOLFSSL_AES_256) + /* CCM nonce is CCM_NONCE_MIN_SZ (7) */ + nonceSz = CCM_NONCE_MIN_SZ; + break; + #endif +#endif /* HAVE_AESCCM */ + } + + ret = wc_InitRng_ex(&rng, pkcs7->heap, pkcs7->devId); + if (ret != 0) + return ret; + + ret = wc_PKCS7_GenerateBlock(pkcs7, &rng, nonce, nonceSz); + wc_FreeRng(&rng); + if (ret != 0) { + return ret; + } + + + /* authAttribs: add contentType attrib if needed */ + if (pkcs7->contentOID != DATA) { + + /* if type is not id-data, contentType attribute MUST be added */ + contentTypeAttrib.oid = contentTypeOid; + contentTypeAttrib.oidSz = sizeof(contentTypeOid); + + /* try to set from contentOID first, known types */ + ret = wc_SetContentType(pkcs7->contentOID, contentTypeValue, + sizeof(contentTypeValue)); + if (ret > 0) { + contentTypeAttrib.value = contentTypeValue; + contentTypeAttrib.valueSz = ret; + + /* otherwise, try to set from custom content type */ + } else { + if (pkcs7->contentTypeSz == 0) { + WOLFSSL_MSG("CMS pkcs7->contentType must be set if " + "contentOID is not"); + return BAD_FUNC_ARG; + } + contentTypeAttrib.value = pkcs7->contentType; + contentTypeAttrib.valueSz = pkcs7->contentTypeSz; + } + + authAttribsSz += EncodeAttributes(authAttribs, 1, + &contentTypeAttrib, 1); + authAttribsCount += 1; + } + + /* authAttribs: add in user authenticated attributes */ + if (pkcs7->authAttribs != NULL && pkcs7->authAttribsSz > 0) { + authAttribsSz += EncodeAttributes(authAttribs + authAttribsCount, + MAX_AUTH_ATTRIBS_SZ - authAttribsCount, + pkcs7->authAttribs, + pkcs7->authAttribsSz); + authAttribsCount += pkcs7->authAttribsSz; + } + + /* authAttribs: flatten authAttribs */ + if (authAttribsSz > 0 && authAttribsCount > 0) { + flatAuthAttribs = (byte*)XMALLOC(authAttribsSz, pkcs7->heap, + DYNAMIC_TYPE_PKCS7); + if (flatAuthAttribs == NULL) { + return MEMORY_E; + } + + FlattenAttributes(pkcs7, flatAuthAttribs, authAttribs, + authAttribsCount); + + authAttribsSetSz = SetImplicit(ASN_SET, 1, authAttribsSz, + authAttribSet); + + /* From RFC5083, "For the purpose of constructing the AAD, the + * IMPLICIT [1] tag in the authAttrs field is not used for the + * DER encoding: rather a universal SET OF tag is used. */ + authAttribsAadSetSz = SetSet(authAttribsSz, authAttribAadSet); + + /* allocate temp buffer to hold alternate attrib encoding for aad */ + aadBuffer = (byte*)XMALLOC(authAttribsSz + authAttribsAadSetSz, + pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (aadBuffer == NULL) { + XFREE(flatAuthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return MEMORY_E; + } + + /* build up alternate attrib encoding for aad */ + aadBufferSz = 0; + XMEMCPY(aadBuffer + aadBufferSz, authAttribAadSet, authAttribsAadSetSz); + aadBufferSz += authAttribsAadSetSz; + XMEMCPY(aadBuffer + aadBufferSz, flatAuthAttribs, authAttribsSz); + aadBufferSz += authAttribsSz; + } + + /* build up unauthenticated attributes (unauthAttrs) */ + if (pkcs7->unauthAttribsSz > 0) { + unauthAttribsSz = EncodeAttributes(unauthAttribs + unauthAttribsCount, + MAX_UNAUTH_ATTRIBS_SZ - unauthAttribsCount, + pkcs7->unauthAttribs, + pkcs7->unauthAttribsSz); + unauthAttribsCount = pkcs7->unauthAttribsSz; + + flatUnauthAttribs = (byte*)XMALLOC(unauthAttribsSz, pkcs7->heap, + DYNAMIC_TYPE_PKCS7); + if (flatUnauthAttribs == NULL) { + if (aadBuffer) + XFREE(aadBuffer, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (flatAuthAttribs) + XFREE(flatAuthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return MEMORY_E; + } + + FlattenAttributes(pkcs7, flatUnauthAttribs, unauthAttribs, + unauthAttribsCount); + unauthAttribsSetSz = SetImplicit(ASN_SET, 2, unauthAttribsSz, + unauthAttribSet); + } + + /* allocate encrypted content buffer */ + encryptedOutSz = pkcs7->contentSz; + encryptedContent = (byte*)XMALLOC(encryptedOutSz, pkcs7->heap, + DYNAMIC_TYPE_PKCS7); + if (encryptedContent == NULL) { + if (aadBuffer) + XFREE(aadBuffer, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (flatUnauthAttribs) + XFREE(flatUnauthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + if (flatAuthAttribs) + XFREE(flatAuthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return MEMORY_E; + } + + /* encrypt content */ + ret = wc_PKCS7_EncryptContent(pkcs7->encryptOID, pkcs7->cek, + pkcs7->cekSz, nonce, nonceSz, aadBuffer, aadBufferSz, authTag, + sizeof(authTag), pkcs7->content, encryptedOutSz, encryptedContent); + + if (aadBuffer) { + XFREE(aadBuffer, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER); + aadBuffer = NULL; + } + + if (ret != 0) { + if (flatUnauthAttribs) + XFREE(flatUnauthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + if (flatAuthAttribs) + XFREE(flatAuthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + + /* EncryptedContentInfo */ + ret = wc_SetContentType(pkcs7->contentOID, contentType, + sizeof(contentType)); + if (ret < 0) { + if (flatUnauthAttribs) + XFREE(flatUnauthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + if (flatAuthAttribs) + XFREE(flatAuthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + + contentTypeSz = ret; + + /* put together nonce OCTET STRING */ + nonceOctetStringSz = SetOctetString(nonceSz, nonceOctetString); + + /* put together aes-ICVlen INTEGER */ + macIntSz = SetMyVersion(sizeof(authTag), macInt, 0); + + /* build up our ContentEncryptionAlgorithmIdentifier sequence, + * adding (nonceOctetStringSz + blockSz + macIntSz) for nonce OCTET STRING + * and tag size */ + contentEncAlgoSz = SetAlgoID(pkcs7->encryptOID, contentEncAlgo, + oidBlkType, nonceOctetStringSz + nonceSz + + macIntSz); + + if (contentEncAlgoSz == 0) { + if (flatUnauthAttribs) + XFREE(flatUnauthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + if (flatAuthAttribs) + XFREE(flatAuthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return BAD_FUNC_ARG; + } + + encContentOctetSz = SetImplicit(ASN_OCTET_STRING, 0, encryptedOutSz, + encContentOctet); + + encContentSeqSz = SetSequence(contentTypeSz + contentEncAlgoSz + + nonceOctetStringSz + nonceSz + macIntSz + + encContentOctetSz + encryptedOutSz, + encContentSeq); + + macOctetStringSz = SetOctetString(sizeof(authTag), macOctetString); + + /* keep track of sizes for outer wrapper layering */ + totalSz = verSz + recipSetSz + recipSz + encContentSeqSz + contentTypeSz + + contentEncAlgoSz + nonceOctetStringSz + nonceSz + macIntSz + + encContentOctetSz + encryptedOutSz + authAttribsSz + + authAttribsSetSz + macOctetStringSz + sizeof(authTag) + + unauthAttribsSz + unauthAttribsSetSz; + + /* EnvelopedData */ + envDataSeqSz = SetSequence(totalSz, envDataSeq); + totalSz += envDataSeqSz; + + /* outer content */ + outerContentSz = SetExplicit(0, totalSz, outerContent); + totalSz += outerContentTypeSz; + totalSz += outerContentSz; + + /* ContentInfo */ + contentInfoSeqSz = SetSequence(totalSz, contentInfoSeq); + totalSz += contentInfoSeqSz; + + if (totalSz > (int)outputSz) { + WOLFSSL_MSG("Pkcs7_encrypt output buffer too small"); + if (flatUnauthAttribs) + XFREE(flatUnauthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + if (flatAuthAttribs) + XFREE(flatAuthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return BUFFER_E; + } + + XMEMCPY(output + idx, contentInfoSeq, contentInfoSeqSz); + idx += contentInfoSeqSz; + XMEMCPY(output + idx, outerContentType, outerContentTypeSz); + idx += outerContentTypeSz; + XMEMCPY(output + idx, outerContent, outerContentSz); + idx += outerContentSz; + XMEMCPY(output + idx, envDataSeq, envDataSeqSz); + idx += envDataSeqSz; + XMEMCPY(output + idx, ver, verSz); + idx += verSz; + XMEMCPY(output + idx, recipSet, recipSetSz); + idx += recipSetSz; + /* copy in recipients from list */ + tmpRecip = pkcs7->recipList; + while (tmpRecip != NULL) { + XMEMCPY(output + idx, tmpRecip->recip, tmpRecip->recipSz); + idx += tmpRecip->recipSz; + tmpRecip = tmpRecip->next; + } + wc_PKCS7_FreeEncodedRecipientSet(pkcs7); + XMEMCPY(output + idx, encContentSeq, encContentSeqSz); + idx += encContentSeqSz; + XMEMCPY(output + idx, contentType, contentTypeSz); + idx += contentTypeSz; + XMEMCPY(output + idx, contentEncAlgo, contentEncAlgoSz); + idx += contentEncAlgoSz; + XMEMCPY(output + idx, nonceOctetString, nonceOctetStringSz); + idx += nonceOctetStringSz; + XMEMCPY(output + idx, nonce, nonceSz); + idx += nonceSz; + XMEMCPY(output + idx, macInt, macIntSz); + idx += macIntSz; + XMEMCPY(output + idx, encContentOctet, encContentOctetSz); + idx += encContentOctetSz; + XMEMCPY(output + idx, encryptedContent, encryptedOutSz); + idx += encryptedOutSz; + + /* authenticated attributes */ + if (flatAuthAttribs && authAttribsSz > 0) { + XMEMCPY(output + idx, authAttribSet, authAttribsSetSz); + idx += authAttribsSetSz; + XMEMCPY(output + idx, flatAuthAttribs, authAttribsSz); + idx += authAttribsSz; + XFREE(flatAuthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + } + + XMEMCPY(output + idx, macOctetString, macOctetStringSz); + idx += macOctetStringSz; + XMEMCPY(output + idx, authTag, sizeof(authTag)); + idx += sizeof(authTag); + + /* unauthenticated attributes */ + if (unauthAttribsSz > 0) { + XMEMCPY(output + idx, unauthAttribSet, unauthAttribsSetSz); + idx += unauthAttribsSetSz; + XMEMCPY(output + idx, flatUnauthAttribs, unauthAttribsSz); + idx += unauthAttribsSz; + } + + if (flatUnauthAttribs != NULL) { + XFREE(flatUnauthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + } + + XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + + return idx; + +#else + WOLFSSL_MSG("AuthEnvelopedData requires AES-GCM or AES-CCM to be enabled"); + (void)pkcs7; + (void)output; + (void)outputSz; + + return NOT_COMPILED_IN; +#endif /* HAVE_AESGCM | HAVE_AESCCM */ +} + + +/* unwrap and decrypt PKCS#7 AuthEnvelopedData object, return decoded size */ +WOLFSSL_API int wc_PKCS7_DecodeAuthEnvelopedData(PKCS7* pkcs7, byte* in, + word32 inSz, byte* output, + word32 outputSz) +{ +#if defined(HAVE_AESGCM) || defined(HAVE_AESCCM) + int recipFound = 0; + int ret = 0, length; + word32 idx = 0; +#ifndef NO_PKCS7_STREAM + word32 tmpIdx = 0; + long rc; +#endif + word32 contentType, encOID = 0; + word32 decryptedKeySz = 0; + byte* pkiMsg = in; + word32 pkiMsgSz = inSz; + + int expBlockSz = 0, blockKeySz = 0; + byte authTag[AES_BLOCK_SIZE]; + byte nonce[GCM_NONCE_MID_SZ]; /* GCM nonce is larger than CCM */ + int nonceSz = 0, authTagSz = 0, macSz = 0; + +#ifdef WOLFSSL_SMALL_STACK + byte* decryptedKey = NULL; +#else + byte decryptedKey[MAX_ENCRYPTED_KEY_SZ]; +#endif + int encryptedContentSz = 0; + byte* encryptedContent = NULL; + int explicitOctet = 0; + + byte authAttribSetByte = 0; + byte* encodedAttribs = NULL; + word32 encodedAttribIdx = 0, encodedAttribSz = 0; + byte* authAttrib = NULL; + int authAttribSz = 0; + word32 localIdx; + byte tag; + + if (pkcs7 == NULL) + return BAD_FUNC_ARG; + + if (pkiMsg == NULL || pkiMsgSz == 0 || + output == NULL || outputSz == 0) + return BAD_FUNC_ARG; +#ifndef NO_PKCS7_STREAM + if (pkcs7->stream == NULL) { + if ((ret = wc_PKCS7_CreateStream(pkcs7)) != 0) { + return ret; + } + } +#endif + + switch (pkcs7->state) { + case WC_PKCS7_START: + case WC_PKCS7_INFOSET_START: + case WC_PKCS7_INFOSET_STAGE1: + case WC_PKCS7_INFOSET_STAGE2: + case WC_PKCS7_INFOSET_END: + ret = wc_PKCS7_ParseToRecipientInfoSet(pkcs7, pkiMsg, pkiMsgSz, + &idx, AUTH_ENVELOPED_DATA); + if (ret < 0) + break; + + #ifndef NO_PKCS7_STREAM + tmpIdx = idx; + #endif + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_AUTHENV_2); + FALL_THROUGH; + + case WC_PKCS7_AUTHENV_2: + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_LENGTH_SZ + + MAX_VERSION_SZ + ASN_TAG_SZ, &pkiMsg, &idx)) != 0) { + break; + } + #endif + #ifdef WOLFSSL_SMALL_STACK + decryptedKey = (byte*)XMALLOC(MAX_ENCRYPTED_KEY_SZ, pkcs7->heap, + DYNAMIC_TYPE_PKCS7); + if (decryptedKey == NULL) { + ret = MEMORY_E; + break; + } + #ifndef NO_PKCS7_STREAM + pkcs7->stream->key = decryptedKey; + #endif + #endif + FALL_THROUGH; + + case WC_PKCS7_DECRYPT_KTRI: + case WC_PKCS7_DECRYPT_KTRI_2: + case WC_PKCS7_DECRYPT_KTRI_3: + case WC_PKCS7_DECRYPT_KARI: + case WC_PKCS7_DECRYPT_KEKRI: + case WC_PKCS7_DECRYPT_PWRI: + case WC_PKCS7_DECRYPT_ORI: + + decryptedKeySz = MAX_ENCRYPTED_KEY_SZ; + #ifdef WOLFSSL_SMALL_STACK + #ifndef NO_PKCS7_STREAM + decryptedKey = pkcs7->stream->key; + #endif + #endif + + ret = wc_PKCS7_DecryptRecipientInfos(pkcs7, in, inSz, &idx, + decryptedKey, &decryptedKeySz, + &recipFound); + if (ret != 0) { + break; + } + + if (recipFound == 0) { + WOLFSSL_MSG("No recipient found in envelopedData that matches input"); + ret = PKCS7_RECIP_E; + break; + } + + #ifndef NO_PKCS7_STREAM + tmpIdx = idx; + #endif + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_AUTHENV_3); + FALL_THROUGH; + + case WC_PKCS7_AUTHENV_3: + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_SEQ_SZ + + MAX_ALGO_SZ + MAX_ALGO_SZ + ASN_TAG_SZ, + &pkiMsg, &idx)) != 0) { + break; + } + + rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, + in, inSz); + if (rc < 0) { + ret = (int)rc; + break; + } + pkiMsgSz = (word32)rc; + #endif + + /* remove EncryptedContentInfo */ + if (ret == 0 && GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0) { + ret = ASN_PARSE_E; + } + + if (ret == 0 && wc_GetContentType(pkiMsg, &idx, &contentType, + pkiMsgSz) < 0) { + ret = ASN_PARSE_E; + } + + if (ret == 0 && GetAlgoId(pkiMsg, &idx, &encOID, oidBlkType, + pkiMsgSz) < 0) { + ret = ASN_PARSE_E; + } + + blockKeySz = wc_PKCS7_GetOIDKeySize(encOID); + if (ret == 0 && blockKeySz < 0) { + ret = blockKeySz; + } + + expBlockSz = wc_PKCS7_GetOIDBlockSize(encOID); + if (ret == 0 && expBlockSz < 0) { + ret = expBlockSz; + } + + /* get nonce, stored in OPTIONAL parameter of AlgoID */ + if (ret == 0 && GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) < 0) { + ret = ASN_PARSE_E; + } + + if (ret == 0 && tag != ASN_OCTET_STRING) { + ret = ASN_PARSE_E; + } + + if (ret < 0) + break; + + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) { + break; + } + wc_PKCS7_StreamStoreVar(pkcs7, encOID, blockKeySz, 0); + #endif + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_AUTHENV_4); + FALL_THROUGH; + + case WC_PKCS7_AUTHENV_4: + + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_LENGTH_SZ + + MAX_VERSION_SZ + ASN_TAG_SZ + MAX_LENGTH_SZ, + &pkiMsg, &idx)) != 0) { + break; + } + + rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in, + inSz); + if (rc < 0) { + ret = (int)rc; + break; + } + pkiMsgSz = (word32)rc; + #endif + if (ret == 0 && GetLength(pkiMsg, &idx, &nonceSz, pkiMsgSz) < 0) { + ret = ASN_PARSE_E; + } + + if (ret == 0 && nonceSz > (int)sizeof(nonce)) { + WOLFSSL_MSG("AuthEnvelopedData nonce too large for buffer"); + ret = ASN_PARSE_E; + } + + if (ret == 0) { + XMEMCPY(nonce, &pkiMsg[idx], nonceSz); + idx += nonceSz; + } + + /* get mac size, also stored in OPTIONAL parameter of AlgoID */ + if (ret == 0 && GetMyVersion(pkiMsg, &idx, &macSz, pkiMsgSz) < 0) { + ret = ASN_PARSE_E; + } + + if (ret == 0) { + explicitOctet = 0; + localIdx = idx; + if (GetASNTag(pkiMsg, &localIdx, &tag, pkiMsgSz) == 0 && + tag == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 0)) + explicitOctet = 1; + + /* read encryptedContent, cont[0] */ + ret = GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz); + } + + if (ret == 0 && + tag != (ASN_CONTEXT_SPECIFIC | 0) && + tag != (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 0)) { + ret = ASN_PARSE_E; + } + + if (ret == 0 && GetLength(pkiMsg, &idx, &encryptedContentSz, + pkiMsgSz) <= 0) { + ret = ASN_PARSE_E; + } + + if (explicitOctet) { + if (ret == 0 && GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) < 0) { + ret = ASN_PARSE_E; + } + if (ret == 0 && tag != ASN_OCTET_STRING) { + ret = ASN_PARSE_E; + } + + if (ret == 0 && GetLength(pkiMsg, &idx, &encryptedContentSz, + pkiMsgSz) <= 0) { + ret = ASN_PARSE_E; + } + } + + if (ret < 0) + break; + + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) { + break; + } + + /* store nonce for later */ + if (nonceSz > 0) { + pkcs7->stream->nonceSz = nonceSz; + pkcs7->stream->nonce = (byte*)XMALLOC(nonceSz, pkcs7->heap, + DYNAMIC_TYPE_PKCS7); + if (pkcs7->stream->nonce == NULL) { + ret = MEMORY_E; + break; + } + else { + XMEMCPY(pkcs7->stream->nonce, nonce, nonceSz); + } + } + + pkcs7->stream->expected = encryptedContentSz; + wc_PKCS7_StreamStoreVar(pkcs7, encOID, blockKeySz, + encryptedContentSz); + #endif + + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_AUTHENV_5); + FALL_THROUGH; + + case WC_PKCS7_AUTHENV_5: + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_LENGTH_SZ + + ASN_TAG_SZ + ASN_TAG_SZ + pkcs7->stream->expected, + &pkiMsg, &idx)) != 0) { + break; + } + + rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in, + inSz); + if (rc < 0) { + ret = (int)rc; + break; + } + pkiMsgSz = (word32)rc; + + encryptedContentSz = pkcs7->stream->expected; + #endif + + encryptedContent = (byte*)XMALLOC(encryptedContentSz, pkcs7->heap, + DYNAMIC_TYPE_PKCS7); + if (ret == 0 && encryptedContent == NULL) { + ret = MEMORY_E; + } + + if (ret == 0) { + XMEMCPY(encryptedContent, &pkiMsg[idx], encryptedContentSz); + idx += encryptedContentSz; + } + #ifndef NO_PKCS7_STREAM + pkcs7->stream->bufferPt = encryptedContent; + #endif + + /* may have IMPLICIT [1] authenticatedAttributes */ + localIdx = idx; + if (ret == 0 && GetASNTag(pkiMsg, &localIdx, &tag, pkiMsgSz) == 0 && + tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1)) { + encodedAttribIdx = idx; + encodedAttribs = pkiMsg + idx; + idx++; + + if (GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0) + ret = ASN_PARSE_E; + #ifndef NO_PKCS7_STREAM + pkcs7->stream->expected = length; + #endif + encodedAttribSz = length + (idx - encodedAttribIdx); + + if (ret != 0) + break; + + #ifndef NO_PKCS7_STREAM + if (encodedAttribSz > 0) { + pkcs7->stream->aadSz = encodedAttribSz; + pkcs7->stream->aad = (byte*)XMALLOC(encodedAttribSz, + pkcs7->heap, DYNAMIC_TYPE_PKCS7); + if (pkcs7->stream->aad == NULL) { + ret = MEMORY_E; + break; + } + else { + XMEMCPY(pkcs7->stream->aad, encodedAttribs, + (idx - encodedAttribIdx)); + } + } + + if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) { + break; + } + #endif + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_AUTHENV_ATRB); + } + else { + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) { + break; + } + #endif + goto authenv_atrbend; /* jump over attribute cases */ + } + FALL_THROUGH; + + case WC_PKCS7_AUTHENV_ATRB: + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, + pkcs7->stream->expected, &pkiMsg, &idx)) != 0) { + return ret; + } + + length = pkcs7->stream->expected; + encodedAttribs = pkcs7->stream->aad; + #else + length = 0; + #endif + + /* save pointer and length */ + authAttrib = &pkiMsg[idx]; + authAttribSz = length; + + if (ret == 0 && wc_PKCS7_ParseAttribs(pkcs7, authAttrib, authAttribSz) < 0) { + WOLFSSL_MSG("Error parsing authenticated attributes"); + ret = ASN_PARSE_E; + break; + } + + idx += length; + + #ifndef NO_PKCS7_STREAM + if (encodedAttribSz > 0) { + XMEMCPY(pkcs7->stream->aad + (encodedAttribSz - length), + authAttrib, authAttribSz); + } + if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) { + break; + } + + #endif + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_AUTHENV_ATRBEND); + FALL_THROUGH; + +authenv_atrbend: + case WC_PKCS7_AUTHENV_ATRBEND: + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_LENGTH_SZ + + ASN_TAG_SZ, &pkiMsg, &idx)) != 0) { + return ret; + } + + rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, + in, inSz); + if (rc < 0) { + ret = (int)rc; + break; + } + pkiMsgSz = (word32)rc; + + if (pkcs7->stream->aadSz > 0) { + encodedAttribSz = pkcs7->stream->aadSz; + encodedAttribs = pkcs7->stream->aad; + } + #endif + + + /* get authTag OCTET STRING */ + if (ret == 0 && GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) < 0) { + ret = ASN_PARSE_E; + } + if (ret == 0 && tag != ASN_OCTET_STRING) { + ret = ASN_PARSE_E; + } + + if (ret == 0 && GetLength(pkiMsg, &idx, &authTagSz, pkiMsgSz) < 0) { + ret = ASN_PARSE_E; + } + + if (ret == 0 && authTagSz > (int)sizeof(authTag)) { + WOLFSSL_MSG("AuthEnvelopedData authTag too large for buffer"); + ret = ASN_PARSE_E; + } + + if (ret == 0) { + XMEMCPY(authTag, &pkiMsg[idx], authTagSz); + idx += authTagSz; + } + + if (ret == 0 && authAttrib != NULL) { + /* temporarily swap authAttribs byte[0] to SET OF instead of + * IMPLICIT [1], for aad calculation */ + authAttribSetByte = encodedAttribs[0]; + + encodedAttribs[0] = ASN_SET | ASN_CONSTRUCTED; + } + + if (ret < 0) + break; + + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) { + break; + } + pkcs7->stream->expected = (pkcs7->stream->maxLen - + pkcs7->stream->totalRd) + pkcs7->stream->length; + + + /* store tag for later */ + if (authTagSz > 0) { + pkcs7->stream->tagSz = authTagSz; + pkcs7->stream->tag = (byte*)XMALLOC(authTagSz, pkcs7->heap, + DYNAMIC_TYPE_PKCS7); + if (pkcs7->stream->tag == NULL) { + ret = MEMORY_E; + break; + } + else { + XMEMCPY(pkcs7->stream->tag, authTag, authTagSz); + } + } + + #endif + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_AUTHENV_6); + FALL_THROUGH; + + case WC_PKCS7_AUTHENV_6: + #ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, + pkcs7->stream->expected, &pkiMsg, &idx)) != 0) { + break; + } + + /* restore all variables needed */ + if (pkcs7->stream->nonceSz > 0) { + nonceSz = pkcs7->stream->nonceSz; + if (nonceSz > GCM_NONCE_MID_SZ) { + WOLFSSL_MSG("PKCS7 saved nonce is too large"); + ret = BUFFER_E; + break; + } + else { + XMEMCPY(nonce, pkcs7->stream->nonce, nonceSz); + } + } + + if (pkcs7->stream->tagSz > 0) { + authTagSz = pkcs7->stream->tagSz; + if (authTagSz > AES_BLOCK_SIZE) { + WOLFSSL_MSG("PKCS7 saved tag is too large"); + ret = BUFFER_E; + break; + } + else { + XMEMCPY(authTag, pkcs7->stream->tag, authTagSz); + } + } + + if (pkcs7->stream->aadSz > 0) { + encodedAttribSz = pkcs7->stream->aadSz; + encodedAttribs = pkcs7->stream->aad; + } + + wc_PKCS7_StreamGetVar(pkcs7, &encOID, &blockKeySz, + &encryptedContentSz); + encryptedContent = pkcs7->stream->bufferPt; + #ifdef WOLFSSL_SMALL_STACK + decryptedKey = pkcs7->stream->key; + #endif + #endif + + /* decrypt encryptedContent */ + ret = wc_PKCS7_DecryptContent(pkcs7, encOID, decryptedKey, + blockKeySz, nonce, nonceSz, encodedAttribs, encodedAttribSz, + authTag, authTagSz, encryptedContent, encryptedContentSz, + encryptedContent); + if (ret != 0) { + XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + + if (authAttrib != NULL) { + /* restore authAttrib IMPLICIT [1] */ + encodedAttribs[0] = authAttribSetByte; + } + + /* copy plaintext to output */ + XMEMCPY(output, encryptedContent, encryptedContentSz); + + /* free memory, zero out keys */ + ForceZero(encryptedContent, encryptedContentSz); + XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + ForceZero(decryptedKey, MAX_ENCRYPTED_KEY_SZ); + #ifdef WOLFSSL_SMALL_STACK + XFREE(decryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + decryptedKey = NULL; + #ifdef WOLFSSL_SMALL_STACK + #ifndef NO_PKCS7_STREAM + pkcs7->stream->key = NULL; + #endif + #endif + #endif + ret = encryptedContentSz; + #ifndef NO_PKCS7_STREAM + wc_PKCS7_ResetStream(pkcs7); + #endif + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_START); + break; + default: + WOLFSSL_MSG("Unknown PKCS7 state"); + ret = BAD_FUNC_ARG; + } + +#ifdef WOLFSSL_SMALL_STACK + if (ret != 0 && ret != WC_PKCS7_WANT_READ_E) { + if (decryptedKey != NULL) { + ForceZero(decryptedKey, MAX_ENCRYPTED_KEY_SZ); + } + XFREE(decryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + } +#endif +#ifndef NO_PKCS7_STREAM + if (ret != 0 && ret != WC_PKCS7_WANT_READ_E) { + wc_PKCS7_ResetStream(pkcs7); + } +#endif + + return ret; + +#else + WOLFSSL_MSG("AuthEnvelopedData requires AES-GCM or AES-CCM to be enabled"); + (void)pkcs7; + (void)in; + (void)inSz; + (void)output; + (void)outputSz; + + return NOT_COMPILED_IN; +#endif /* HAVE_AESGCM | HAVE_AESCCM */ +} + + +#ifndef NO_PKCS7_ENCRYPTED_DATA + +/* build PKCS#7 encryptedData content type, return encrypted size */ +int wc_PKCS7_EncodeEncryptedData(PKCS7* pkcs7, byte* output, word32 outputSz) +{ + int ret, idx = 0; + int totalSz, padSz, encryptedOutSz; + + int contentInfoSeqSz, outerContentTypeSz, outerContentSz; + byte contentInfoSeq[MAX_SEQ_SZ]; + byte outerContentType[MAX_ALGO_SZ]; + byte outerContent[MAX_SEQ_SZ]; + + int encDataSeqSz, verSz, blockSz; + byte encDataSeq[MAX_SEQ_SZ]; + byte ver[MAX_VERSION_SZ]; + + byte* plain = NULL; + byte* encryptedContent = NULL; + + int encContentOctetSz, encContentSeqSz, contentTypeSz; + int contentEncAlgoSz, ivOctetStringSz; + byte encContentSeq[MAX_SEQ_SZ]; + byte contentType[MAX_OID_SZ]; + byte contentEncAlgo[MAX_ALGO_SZ]; + byte tmpIv[MAX_CONTENT_IV_SIZE]; + byte ivOctetString[MAX_OCTET_STR_SZ]; + byte encContentOctet[MAX_OCTET_STR_SZ]; + + byte attribSet[MAX_SET_SZ]; + EncodedAttrib* attribs = NULL; + word32 attribsSz; + word32 attribsCount; + word32 attribsSetSz; + + byte* flatAttribs = NULL; + + if (pkcs7 == NULL || pkcs7->content == NULL || pkcs7->contentSz == 0 || + pkcs7->encryptOID == 0 || pkcs7->encryptionKey == NULL || + pkcs7->encryptionKeySz == 0) + return BAD_FUNC_ARG; + + if (output == NULL || outputSz == 0) + return BAD_FUNC_ARG; + + if (pkcs7->version == 3) { + verSz = SetMyVersion(0, ver, 0); + outerContentTypeSz = 0; + } + else { + /* outer content type */ + ret = wc_SetContentType(ENCRYPTED_DATA, outerContentType, + sizeof(outerContentType)); + if (ret < 0) + return ret; + + outerContentTypeSz = ret; + + /* version, 2 if unprotectedAttrs present, 0 if absent */ + if (pkcs7->unprotectedAttribsSz > 0) { + verSz = SetMyVersion(2, ver, 0); + } else { + verSz = SetMyVersion(0, ver, 0); + } + } + + /* EncryptedContentInfo */ + ret = wc_SetContentType(pkcs7->contentOID, contentType, + sizeof(contentType)); + if (ret < 0) + return ret; + + contentTypeSz = ret; + + /* allocate encrypted content buffer, do PKCS#7 padding */ + blockSz = wc_PKCS7_GetOIDBlockSize(pkcs7->encryptOID); + if (blockSz < 0) + return blockSz; + + padSz = wc_PKCS7_GetPadSize(pkcs7->contentSz, blockSz); + if (padSz < 0) + return padSz; + + encryptedOutSz = pkcs7->contentSz + padSz; + + plain = (byte*)XMALLOC(encryptedOutSz, pkcs7->heap, + DYNAMIC_TYPE_PKCS7); + if (plain == NULL) + return MEMORY_E; + + ret = wc_PKCS7_PadData(pkcs7->content, pkcs7->contentSz, plain, + encryptedOutSz, blockSz); + if (ret < 0) { + XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + + encryptedContent = (byte*)XMALLOC(encryptedOutSz, pkcs7->heap, + DYNAMIC_TYPE_PKCS7); + if (encryptedContent == NULL) { + XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return MEMORY_E; + } + + /* put together IV OCTET STRING */ + ivOctetStringSz = SetOctetString(blockSz, ivOctetString); + + /* build up ContentEncryptionAlgorithmIdentifier sequence, + adding (ivOctetStringSz + blockSz) for IV OCTET STRING */ + contentEncAlgoSz = SetAlgoID(pkcs7->encryptOID, contentEncAlgo, + oidBlkType, ivOctetStringSz + blockSz); + if (contentEncAlgoSz == 0) { + XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return BAD_FUNC_ARG; + } + + /* encrypt content */ + WOLFSSL_MSG("Encrypting the content"); + ret = wc_PKCS7_GenerateBlock(pkcs7, NULL, tmpIv, blockSz); + if (ret != 0) { + XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + + ret = wc_PKCS7_EncryptContent(pkcs7->encryptOID, pkcs7->encryptionKey, + pkcs7->encryptionKeySz, tmpIv, blockSz, NULL, 0, NULL, 0, + plain, encryptedOutSz, encryptedContent); + if (ret != 0) { + XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + + encContentOctetSz = SetImplicit(ASN_OCTET_STRING, 0, + encryptedOutSz, encContentOctet); + + encContentSeqSz = SetSequence(contentTypeSz + contentEncAlgoSz + + ivOctetStringSz + blockSz + + encContentOctetSz + encryptedOutSz, + encContentSeq); + + /* optional UnprotectedAttributes */ + if (pkcs7->unprotectedAttribsSz != 0) { + + if (pkcs7->unprotectedAttribs == NULL) { + XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return BAD_FUNC_ARG; + } + + attribs = (EncodedAttrib*)XMALLOC( + sizeof(EncodedAttrib) * pkcs7->unprotectedAttribsSz, + pkcs7->heap, DYNAMIC_TYPE_PKCS7); + if (attribs == NULL) { + XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return MEMORY_E; + } + + attribsCount = pkcs7->unprotectedAttribsSz; + attribsSz = EncodeAttributes(attribs, pkcs7->unprotectedAttribsSz, + pkcs7->unprotectedAttribs, + pkcs7->unprotectedAttribsSz); + + flatAttribs = (byte*)XMALLOC(attribsSz, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + if (flatAttribs == NULL) { + XFREE(attribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return MEMORY_E; + } + + FlattenAttributes(pkcs7, flatAttribs, attribs, attribsCount); + attribsSetSz = SetImplicit(ASN_SET, 1, attribsSz, attribSet); + + } else { + attribsSz = 0; + attribsSetSz = 0; + } + + /* keep track of sizes for outer wrapper layering */ + totalSz = verSz + encContentSeqSz + contentTypeSz + contentEncAlgoSz + + ivOctetStringSz + blockSz + encContentOctetSz + encryptedOutSz + + attribsSz + attribsSetSz; + + /* EncryptedData */ + encDataSeqSz = SetSequence(totalSz, encDataSeq); + totalSz += encDataSeqSz; + + if (pkcs7->version != 3) { + /* outer content */ + outerContentSz = SetExplicit(0, totalSz, outerContent); + totalSz += outerContentTypeSz; + totalSz += outerContentSz; + /* ContentInfo */ + contentInfoSeqSz = SetSequence(totalSz, contentInfoSeq); + totalSz += contentInfoSeqSz; + } else { + contentInfoSeqSz = 0; + outerContentSz = 0; + } + + if (totalSz > (int)outputSz) { + WOLFSSL_MSG("PKCS#7 output buffer too small"); + if (pkcs7->unprotectedAttribsSz != 0) { + XFREE(attribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(flatAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + } + XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return BUFFER_E; + } + + XMEMCPY(output + idx, contentInfoSeq, contentInfoSeqSz); + idx += contentInfoSeqSz; + XMEMCPY(output + idx, outerContentType, outerContentTypeSz); + idx += outerContentTypeSz; + XMEMCPY(output + idx, outerContent, outerContentSz); + idx += outerContentSz; + XMEMCPY(output + idx, encDataSeq, encDataSeqSz); + idx += encDataSeqSz; + XMEMCPY(output + idx, ver, verSz); + idx += verSz; + XMEMCPY(output + idx, encContentSeq, encContentSeqSz); + idx += encContentSeqSz; + XMEMCPY(output + idx, contentType, contentTypeSz); + idx += contentTypeSz; + XMEMCPY(output + idx, contentEncAlgo, contentEncAlgoSz); + idx += contentEncAlgoSz; + XMEMCPY(output + idx, ivOctetString, ivOctetStringSz); + idx += ivOctetStringSz; + XMEMCPY(output + idx, tmpIv, blockSz); + idx += blockSz; + XMEMCPY(output + idx, encContentOctet, encContentOctetSz); + idx += encContentOctetSz; + XMEMCPY(output + idx, encryptedContent, encryptedOutSz); + idx += encryptedOutSz; + + if (pkcs7->unprotectedAttribsSz != 0) { + XMEMCPY(output + idx, attribSet, attribsSetSz); + idx += attribsSetSz; + XMEMCPY(output + idx, flatAttribs, attribsSz); + idx += attribsSz; + XFREE(attribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(flatAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + } + + XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + + return idx; +} + + +/* decode and store unprotected attributes in PKCS7->decodedAttrib. Return + * 0 on success, negative on error. User must call wc_PKCS7_Free(). */ +static int wc_PKCS7_DecodeUnprotectedAttributes(PKCS7* pkcs7, byte* pkiMsg, + word32 pkiMsgSz, word32* inOutIdx) +{ + int ret, attribLen; + word32 idx; + byte tag; + + if (pkcs7 == NULL || pkiMsg == NULL || + pkiMsgSz == 0 || inOutIdx == NULL) + return BAD_FUNC_ARG; + + idx = *inOutIdx; + + if (GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) < 0) + return ASN_PARSE_E; + + if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1)) + return ASN_PARSE_E; + + if (GetLength(pkiMsg, &idx, &attribLen, pkiMsgSz) < 0) + return ASN_PARSE_E; + + /* loop through attributes */ + if ((ret = wc_PKCS7_ParseAttribs(pkcs7, pkiMsg + idx, attribLen)) < 0) { + return ret; + } + + *inOutIdx = idx; + + return 0; +} + + +/* unwrap and decrypt PKCS#7/CMS encrypted-data object, returned decoded size */ +int wc_PKCS7_DecodeEncryptedData(PKCS7* pkcs7, byte* in, word32 inSz, + byte* output, word32 outputSz) +{ + int ret = 0, version, length = 0, haveAttribs = 0; + word32 idx = 0; + +#ifndef NO_PKCS7_STREAM + word32 tmpIdx = 0; + long rc; +#endif + word32 contentType, encOID; + + int expBlockSz = 0; + byte tmpIvBuf[MAX_CONTENT_IV_SIZE]; + byte *tmpIv = tmpIvBuf; + + int encryptedContentSz = 0; + byte padLen; + byte* encryptedContent = NULL; + + byte* pkiMsg = in; + word32 pkiMsgSz = inSz; + byte tag; + + if (pkcs7 == NULL || + ((pkcs7->encryptionKey == NULL || pkcs7->encryptionKeySz == 0) && + pkcs7->decryptionCb == NULL)) + return BAD_FUNC_ARG; + + if (pkiMsg == NULL || pkiMsgSz == 0 || + output == NULL || outputSz == 0) + return BAD_FUNC_ARG; + +#ifndef NO_PKCS7_STREAM + (void)tmpIv; /* help out static analysis */ + if (pkcs7->stream == NULL) { + if ((ret = wc_PKCS7_CreateStream(pkcs7)) != 0) { + return ret; + } + } +#endif + + switch (pkcs7->state) { + case WC_PKCS7_START: +#ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_SEQ_SZ + + MAX_ALGO_SZ, &pkiMsg, &idx)) != 0) { + return ret; + } + + rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_SEQ_PEEK, in, inSz); + if (rc < 0) { + ret = (int)rc; + break; + } + pkiMsgSz = (word32)rc; +#endif + + if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0) + ret = ASN_PARSE_E; + + if (pkcs7->version != 3) { /* ContentInfo not in firmware bundles */ + /* read past ContentInfo, verify type is encrypted-data */ + if (ret == 0 && wc_GetContentType(pkiMsg, &idx, &contentType, + pkiMsgSz) < 0) + ret = ASN_PARSE_E; + + if (ret == 0 && contentType != ENCRYPTED_DATA) { + WOLFSSL_MSG("PKCS#7 input not of type EncryptedData"); + ret = PKCS7_OID_E; + } + } + if (ret != 0) break; +#ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) { + break; + } +#endif + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_STAGE2); + FALL_THROUGH; + /* end of stage 1 */ + + case WC_PKCS7_STAGE2: +#ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, + MAX_LENGTH_SZ + MAX_SEQ_SZ + ASN_TAG_SZ, &pkiMsg, + &idx)) != 0) { + return ret; + } + + rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in, + inSz); + if (rc < 0) { + ret = (int)rc; + break; + } + pkiMsgSz = (word32)rc; +#endif + if (pkcs7->version != 3) { + if (ret == 0 && GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) < 0) + ret = ASN_PARSE_E; + if (ret == 0 && tag != + (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)) + ret = ASN_PARSE_E; + + if (ret == 0 && GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0) + ret = ASN_PARSE_E; + + /* remove EncryptedData and version */ + if (ret == 0 && GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0) + ret = ASN_PARSE_E; + } + + if (ret != 0) break; +#ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) { + break; + } +#endif + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_STAGE3); + FALL_THROUGH; + /* end of stage 2 */ + + case WC_PKCS7_STAGE3: +#ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, + MAX_VERSION_SZ + MAX_SEQ_SZ + MAX_ALGO_SZ * 2, + &pkiMsg, &idx)) != 0) { + return ret; + } + + rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in, + inSz); + if (rc < 0) { + ret = (int)rc; + break; + } + pkiMsgSz = (word32)rc; +#endif + /* get version, check later */ + haveAttribs = 0; + if (ret == 0 && GetMyVersion(pkiMsg, &idx, &version, pkiMsgSz) < 0) + ret = ASN_PARSE_E; + + /* remove EncryptedContentInfo */ + if (ret == 0 && GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0) + ret = ASN_PARSE_E; + + if (ret == 0 && wc_GetContentType(pkiMsg, &idx, &contentType, + pkiMsgSz) < 0) + ret = ASN_PARSE_E; + + if (ret == 0 && (ret = GetAlgoId(pkiMsg, &idx, &encOID, oidBlkType, + pkiMsgSz)) < 0) + ret = ASN_PARSE_E; + if (ret == 0 && (expBlockSz = wc_PKCS7_GetOIDBlockSize(encOID)) < 0) + ret = expBlockSz; + + if (ret != 0) break; +#ifndef NO_PKCS7_STREAM + /* store expBlockSz for later */ + pkcs7->stream->varOne = expBlockSz; + pkcs7->stream->varTwo = encOID; + + if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) { + break; + } + + /* store version for later */ + pkcs7->stream->vers = version; +#endif + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_STAGE4); + FALL_THROUGH; + /* end of stage 3 */ + + /* get block cipher IV, stored in OPTIONAL parameter of AlgoID */ + case WC_PKCS7_STAGE4: +#ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, + ASN_TAG_SZ + MAX_LENGTH_SZ, &pkiMsg, &idx)) != 0) { + return ret; + } + + rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in, + inSz); + if (rc < 0) { + ret = (int)rc; + break; + } + pkiMsgSz = (word32)rc; + + /* restore saved variables */ + expBlockSz = pkcs7->stream->varOne; +#endif + if (ret == 0 && GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) < 0) + ret = ASN_PARSE_E; + if (ret == 0 && tag != ASN_OCTET_STRING) + ret = ASN_PARSE_E; + + if (ret == 0 && GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0) + ret = ASN_PARSE_E; + + if (ret == 0 && length != expBlockSz) { + WOLFSSL_MSG("Incorrect IV length, must be of content alg block size"); + ret = ASN_PARSE_E; + } + + if (ret != 0) break; +#ifndef NO_PKCS7_STREAM + /* next chunk of data expected should have the IV */ + pkcs7->stream->expected = length; + + if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) { + break; + } +#endif + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_STAGE5); + FALL_THROUGH; + /* end of stage 4 */ + + case WC_PKCS7_STAGE5: +#ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, + pkcs7->stream->expected + ASN_TAG_SZ + + MAX_LENGTH_SZ, &pkiMsg, &idx)) != 0) { + return ret; + } + + rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in, + inSz); + if (rc < 0) { + ret = (int)rc; + break; + } + pkiMsgSz = (word32)rc; + + /* use IV buffer from stream structure */ + tmpIv = pkcs7->stream->tmpIv; + length = pkcs7->stream->expected; +#endif + XMEMCPY(tmpIv, &pkiMsg[idx], length); + idx += length; + /* read encryptedContent, cont[0] */ + if (ret == 0 && GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) < 0) + ret = ASN_PARSE_E; + if (ret == 0 && tag != (ASN_CONTEXT_SPECIFIC | 0)) + ret = ASN_PARSE_E; + + if (ret == 0 && GetLength(pkiMsg, &idx, &encryptedContentSz, + pkiMsgSz) <= 0) + ret = ASN_PARSE_E; + + if (ret < 0) + break; +#ifndef NO_PKCS7_STREAM + /* next chunk of data should contain encrypted content */ + pkcs7->stream->varThree = encryptedContentSz; + if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) { + break; + } + + if (pkcs7->stream->totalRd + encryptedContentSz < pkiMsgSz) { + pkcs7->stream->flagOne = 1; + } + + pkcs7->stream->expected = (pkcs7->stream->maxLen - + pkcs7->stream->totalRd) + pkcs7->stream->length; + +#endif + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_STAGE6); + FALL_THROUGH; + /* end of stage 5 */ + + case WC_PKCS7_STAGE6: +#ifndef NO_PKCS7_STREAM + if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, + pkcs7->stream->expected, &pkiMsg, &idx)) != 0) { + return ret; + } + + rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in, + inSz); + if (rc < 0) { + ret = (int)rc; + break; + } + pkiMsgSz = (word32)rc; + + /* restore saved variables */ + expBlockSz = pkcs7->stream->varOne; + encOID = pkcs7->stream->varTwo; + encryptedContentSz = pkcs7->stream->varThree; + version = pkcs7->stream->vers; + tmpIv = pkcs7->stream->tmpIv; +#else + encOID = 0; +#endif + if (ret == 0 && (encryptedContent = (byte*)XMALLOC( + encryptedContentSz, pkcs7->heap, DYNAMIC_TYPE_PKCS7)) == NULL) { + ret = MEMORY_E; + break; + } + + if (ret == 0) { + XMEMCPY(encryptedContent, &pkiMsg[idx], encryptedContentSz); + idx += encryptedContentSz; + + /* decrypt encryptedContent */ + ret = wc_PKCS7_DecryptContent(pkcs7, encOID, + pkcs7->encryptionKey, pkcs7->encryptionKeySz, tmpIv, + expBlockSz, NULL, 0, NULL, 0, encryptedContent, + encryptedContentSz, encryptedContent); + if (ret != 0) { + XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + } + } + + if (ret == 0) { + padLen = encryptedContent[encryptedContentSz-1]; + + if (padLen > encryptedContentSz) { + WOLFSSL_MSG("Bad padding size found"); + ret = BUFFER_E; + XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + break; + } + + /* copy plaintext to output */ + XMEMCPY(output, encryptedContent, encryptedContentSz - padLen); + + /* get implicit[1] unprotected attributes, optional */ + wc_PKCS7_FreeDecodedAttrib(pkcs7->decodedAttrib, pkcs7->heap); + pkcs7->decodedAttrib = NULL; + #ifndef NO_PKCS7_STREAM + if (pkcs7->stream->flagOne) + #else + if (idx < pkiMsgSz) + #endif + { + haveAttribs = 1; + + ret = wc_PKCS7_DecodeUnprotectedAttributes(pkcs7, pkiMsg, + pkiMsgSz, &idx); + if (ret != 0) { + ForceZero(encryptedContent, encryptedContentSz); + XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + ret = ASN_PARSE_E; + } + } + } + + if (ret == 0) { + ForceZero(encryptedContent, encryptedContentSz); + XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + + /* go back and check the version now that attribs have been processed */ + if (pkcs7->version == 3 && version != 0) { + WOLFSSL_MSG("Wrong PKCS#7 FirmwareEncryptedData version"); + return ASN_VERSION_E; + } + + if (pkcs7->version != 3 && + ((haveAttribs == 0 && version != 0) || + (haveAttribs == 1 && version != 2))) { + WOLFSSL_MSG("Wrong PKCS#7 EncryptedData version"); + return ASN_VERSION_E; + } + ret = encryptedContentSz - padLen; + } + + if (ret != 0) break; + #ifndef NO_PKCS7_STREAM + wc_PKCS7_ResetStream(pkcs7); + #endif + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_START); + break; + + default: + WOLFSSL_MSG("Error in unknown PKCS#7 Decode Encrypted Data state"); + return BAD_STATE_E; + } + + if (ret != 0) { + #ifndef NO_PKCS7_STREAM + /* restart in error case */ + wc_PKCS7_ResetStream(pkcs7); + #endif + wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_START); + } + return ret; +} + + +/* Function to set callback during decryption, this overrides the default + * decryption function and can be used for choosing a key at run time based + * on the parsed bundle so far. + * returns 0 on success + */ +int wc_PKCS7_SetDecodeEncryptedCb(PKCS7* pkcs7, + CallbackDecryptContent decryptionCb) +{ + if (pkcs7 != NULL) { + pkcs7->decryptionCb = decryptionCb; + } + return 0; +} + + +/* Set an optional user context that gets passed to callback + * returns 0 on success + */ +int wc_PKCS7_SetDecodeEncryptedCtx(PKCS7* pkcs7, void* ctx) +{ + if (pkcs7 != NULL) { + pkcs7->decryptionCtx = ctx; + } + return 0; +} +#endif /* NO_PKCS7_ENCRYPTED_DATA */ + +#if defined(HAVE_LIBZ) && !defined(NO_PKCS7_COMPRESSED_DATA) + +/* build PKCS#7 compressedData content type, return encrypted size */ +int wc_PKCS7_EncodeCompressedData(PKCS7* pkcs7, byte* output, word32 outputSz) +{ + byte contentInfoSeq[MAX_SEQ_SZ]; + byte contentInfoTypeOid[MAX_OID_SZ]; + byte contentInfoContentSeq[MAX_SEQ_SZ]; /* EXPLICIT [0] */ + byte compressedDataSeq[MAX_SEQ_SZ]; + byte cmsVersion[MAX_VERSION_SZ]; + byte compressAlgId[MAX_ALGO_SZ]; + byte encapContentInfoSeq[MAX_SEQ_SZ]; + byte contentTypeOid[MAX_OID_SZ]; + byte contentSeq[MAX_SEQ_SZ]; /* EXPLICIT [0] */ + byte contentOctetStr[MAX_OCTET_STR_SZ]; + + int ret; + word32 totalSz, idx; + word32 contentInfoSeqSz, contentInfoContentSeqSz, contentInfoTypeOidSz; + word32 compressedDataSeqSz, cmsVersionSz, compressAlgIdSz; + word32 encapContentInfoSeqSz, contentTypeOidSz, contentSeqSz; + word32 contentOctetStrSz; + + byte* compressed; + word32 compressedSz; + + if (pkcs7 == NULL || pkcs7->content == NULL || pkcs7->contentSz == 0 || + output == NULL || outputSz == 0) { + return BAD_FUNC_ARG; + } + + /* allocate space for compressed content. The libz code says the compressed + * buffer should be srcSz + 0.1% + 12. */ + compressedSz = (pkcs7->contentSz + (word32)(pkcs7->contentSz * 0.001) + 12); + compressed = (byte*)XMALLOC(compressedSz, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + if (compressed == NULL) { + WOLFSSL_MSG("Error allocating memory for CMS compressed content"); + return MEMORY_E; + } + + /* compress content */ + ret = wc_Compress(compressed, compressedSz, pkcs7->content, + pkcs7->contentSz, 0); + if (ret < 0) { + XFREE(compressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + compressedSz = (word32)ret; + + /* eContent OCTET STRING, working backwards */ + contentOctetStrSz = SetOctetString(compressedSz, contentOctetStr); + totalSz = contentOctetStrSz + compressedSz; + + /* EXPLICIT [0] eContentType */ + contentSeqSz = SetExplicit(0, totalSz, contentSeq); + totalSz += contentSeqSz; + + /* eContentType OBJECT IDENTIFIER */ + ret = wc_SetContentType(pkcs7->contentOID, contentTypeOid, + sizeof(contentTypeOid)); + if (ret < 0) { + XFREE(compressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + + contentTypeOidSz = ret; + totalSz += contentTypeOidSz; + + /* EncapsulatedContentInfo SEQUENCE */ + encapContentInfoSeqSz = SetSequence(totalSz, encapContentInfoSeq); + totalSz += encapContentInfoSeqSz; + + /* compressionAlgorithm AlgorithmIdentifier */ + /* Only supports zlib for compression currently: + * id-alg-zlibCompress (1.2.840.113549.1.9.16.3.8) */ + compressAlgIdSz = SetAlgoID(ZLIBc, compressAlgId, oidCompressType, 0); + totalSz += compressAlgIdSz; + + /* version */ + cmsVersionSz = SetMyVersion(0, cmsVersion, 0); + totalSz += cmsVersionSz; + + /* CompressedData SEQUENCE */ + compressedDataSeqSz = SetSequence(totalSz, compressedDataSeq); + totalSz += compressedDataSeqSz; + + /* ContentInfo content EXPLICIT SEQUENCE */ + contentInfoContentSeqSz = SetExplicit(0, totalSz, contentInfoContentSeq); + totalSz += contentInfoContentSeqSz; + + /* ContentInfo ContentType (compressedData) */ + if (pkcs7->version == 3) { + contentInfoTypeOidSz = 0; + } + else { + ret = wc_SetContentType(COMPRESSED_DATA, contentInfoTypeOid, + sizeof(contentInfoTypeOid)); + if (ret < 0) { + XFREE(compressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + + contentInfoTypeOidSz = ret; + totalSz += contentInfoTypeOidSz; + } + + /* ContentInfo SEQUENCE */ + contentInfoSeqSz = SetSequence(totalSz, contentInfoSeq); + totalSz += contentInfoSeqSz; + + if (outputSz < totalSz) { + XFREE(compressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return BUFFER_E; + } + + idx = 0; + XMEMCPY(output + idx, contentInfoSeq, contentInfoSeqSz); + idx += contentInfoSeqSz; + XMEMCPY(output + idx, contentInfoTypeOid, contentInfoTypeOidSz); + idx += contentInfoTypeOidSz; + XMEMCPY(output + idx, contentInfoContentSeq, contentInfoContentSeqSz); + idx += contentInfoContentSeqSz; + XMEMCPY(output + idx, compressedDataSeq, compressedDataSeqSz); + idx += compressedDataSeqSz; + XMEMCPY(output + idx, cmsVersion, cmsVersionSz); + idx += cmsVersionSz; + XMEMCPY(output + idx, compressAlgId, compressAlgIdSz); + idx += compressAlgIdSz; + XMEMCPY(output + idx, encapContentInfoSeq, encapContentInfoSeqSz); + idx += encapContentInfoSeqSz; + XMEMCPY(output + idx, contentTypeOid, contentTypeOidSz); + idx += contentTypeOidSz; + XMEMCPY(output + idx, contentSeq, contentSeqSz); + idx += contentSeqSz; + XMEMCPY(output + idx, contentOctetStr, contentOctetStrSz); + idx += contentOctetStrSz; + XMEMCPY(output + idx, compressed, compressedSz); + idx += compressedSz; + + XFREE(compressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + + return idx; +} + +/* unwrap and decompress PKCS#7/CMS compressedData object, + * returned decoded size */ +int wc_PKCS7_DecodeCompressedData(PKCS7* pkcs7, byte* pkiMsg, word32 pkiMsgSz, + byte* output, word32 outputSz) +{ + int length, version, ret; + word32 idx = 0, algOID, contentType; + byte tag; + + byte* decompressed; + word32 decompressedSz; + + if (pkcs7 == NULL || pkiMsg == NULL || pkiMsgSz == 0 || + output == NULL || outputSz == 0) { + return BAD_FUNC_ARG; + } + + /* get ContentInfo SEQUENCE */ + if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0) + return ASN_PARSE_E; + + if (pkcs7->version != 3) { + /* get ContentInfo contentType */ + if (wc_GetContentType(pkiMsg, &idx, &contentType, pkiMsgSz) < 0) + return ASN_PARSE_E; + + if (contentType != COMPRESSED_DATA) + return ASN_PARSE_E; + } + + /* get ContentInfo content EXPLICIT SEQUENCE */ + if (GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) < 0) + return ASN_PARSE_E; + + if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)) + return ASN_PARSE_E; + + if (GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0) + return ASN_PARSE_E; + + /* get CompressedData SEQUENCE */ + if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0) + return ASN_PARSE_E; + + /* get version */ + if (GetMyVersion(pkiMsg, &idx, &version, pkiMsgSz) < 0) + return ASN_PARSE_E; + + if (version != 0) { + WOLFSSL_MSG("CMS CompressedData version MUST be 0, but is not"); + return ASN_PARSE_E; + } + + /* get CompressionAlgorithmIdentifier */ + if (GetAlgoId(pkiMsg, &idx, &algOID, oidIgnoreType, pkiMsgSz) < 0) + return ASN_PARSE_E; + + /* Only supports zlib for compression currently: + * id-alg-zlibCompress (1.2.840.113549.1.9.16.3.8) */ + if (algOID != ZLIBc) { + WOLFSSL_MSG("CMS CompressedData only supports zlib algorithm"); + return ASN_PARSE_E; + } + + /* get EncapsulatedContentInfo SEQUENCE */ + if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0) + return ASN_PARSE_E; + + /* get ContentType OID */ + if (wc_GetContentType(pkiMsg, &idx, &contentType, pkiMsgSz) < 0) + return ASN_PARSE_E; + + pkcs7->contentOID = contentType; + + /* get eContent EXPLICIT SEQUENCE */ + if (GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) < 0) + return ASN_PARSE_E; + + if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)) + return ASN_PARSE_E; + + if (GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0) + return ASN_PARSE_E; + + /* get content OCTET STRING */ + if (GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) < 0) + return ASN_PARSE_E; + + if (tag != ASN_OCTET_STRING) + return ASN_PARSE_E; + + if (GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0) + return ASN_PARSE_E; + + /* allocate space for decompressed data */ + decompressed = (byte*)XMALLOC(length, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + if (decompressed == NULL) { + WOLFSSL_MSG("Error allocating memory for CMS decompression buffer"); + return MEMORY_E; + } + + /* decompress content */ + ret = wc_DeCompress(decompressed, length, &pkiMsg[idx], length); + if (ret < 0) { + XFREE(decompressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return ret; + } + decompressedSz = (word32)ret; + + /* get content */ + if (outputSz < decompressedSz) { + WOLFSSL_MSG("CMS output buffer too small to hold decompressed data"); + XFREE(decompressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + return BUFFER_E; + } + + XMEMCPY(output, decompressed, decompressedSz); + XFREE(decompressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7); + + return decompressedSz; +} + +#endif /* HAVE_LIBZ && !NO_PKCS7_COMPRESSED_DATA */ + +#else /* HAVE_PKCS7 */ + + +#ifdef _MSC_VER + /* 4206 warning for blank file */ + #pragma warning(disable: 4206) +#endif + + +#endif /* HAVE_PKCS7 */ + diff --git a/client/wolfssl/wolfcrypt/src/poly1305.c b/client/wolfssl/wolfcrypt/src/poly1305.c new file mode 100644 index 0000000..6516648 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/poly1305.c @@ -0,0 +1,868 @@ +/* poly1305.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* + * Based off the public domain implementations by Andrew Moon + * and Daniel J. Bernstein + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef HAVE_POLY1305 +#include +#include +#include +#include +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif +#ifdef CHACHA_AEAD_TEST + #include +#endif + +#ifdef _MSC_VER + /* 4127 warning constant while(1) */ + #pragma warning(disable: 4127) +#endif + +#ifdef USE_INTEL_SPEEDUP + #include + #include + + #if defined(__GNUC__) && ((__GNUC__ < 4) || \ + (__GNUC__ == 4 && __GNUC_MINOR__ <= 8)) + #undef NO_AVX2_SUPPORT + #define NO_AVX2_SUPPORT + #endif + #if defined(__clang__) && ((__clang_major__ < 3) || \ + (__clang_major__ == 3 && __clang_minor__ <= 5)) + #define NO_AVX2_SUPPORT + #elif defined(__clang__) && defined(NO_AVX2_SUPPORT) + #undef NO_AVX2_SUPPORT + #endif + + #define HAVE_INTEL_AVX1 + #ifndef NO_AVX2_SUPPORT + #define HAVE_INTEL_AVX2 + #endif +#endif + +#ifdef USE_INTEL_SPEEDUP +static word32 intel_flags = 0; +static word32 cpu_flags_set = 0; +#endif + +#if defined(USE_INTEL_SPEEDUP) || defined(POLY130564) + #if defined(_MSC_VER) + #define POLY1305_NOINLINE __declspec(noinline) + #elif defined(__GNUC__) + #define POLY1305_NOINLINE __attribute__((noinline)) + #else + #define POLY1305_NOINLINE + #endif + + #if defined(_MSC_VER) + #include + + typedef struct word128 { + word64 lo; + word64 hi; + } word128; + + #define MUL(out, x, y) out.lo = _umul128((x), (y), &out.hi) + #define ADD(out, in) { word64 t = out.lo; out.lo += in.lo; \ + out.hi += (out.lo < t) + in.hi; } + #define ADDLO(out, in) { word64 t = out.lo; out.lo += in; \ + out.hi += (out.lo < t); } + #define SHR(in, shift) (__shiftright128(in.lo, in.hi, (shift))) + #define LO(in) (in.lo) + + #elif defined(__GNUC__) + #if defined(__SIZEOF_INT128__) + typedef unsigned __int128 word128; + #else + typedef unsigned word128 __attribute__((mode(TI))); + #endif + + #define MUL(out, x, y) out = ((word128)x * y) + #define ADD(out, in) out += in + #define ADDLO(out, in) out += in + #define SHR(in, shift) (word64)(in >> (shift)) + #define LO(in) (word64)(in) + #endif +#endif + +#ifdef USE_INTEL_SPEEDUP +#ifdef __cplusplus + extern "C" { +#endif + +#ifdef HAVE_INTEL_AVX1 +/* Process one block (16 bytes) of data. + * + * ctx Poly1305 context. + * m One block of message data. + */ +extern void poly1305_block_avx(Poly1305* ctx, const unsigned char *m); +/* Process multiple blocks (n * 16 bytes) of data. + * + * ctx Poly1305 context. + * m Blocks of message data. + * bytes The number of bytes to process. + */ +extern void poly1305_blocks_avx(Poly1305* ctx, const unsigned char* m, + size_t bytes); +/* Set the key to use when processing data. + * Initialize the context. + * + * ctx Poly1305 context. + * key The key data (16 bytes). + */ +extern void poly1305_setkey_avx(Poly1305* ctx, const byte* key); +/* Calculate the final result - authentication data. + * Zeros out the private data in the context. + * + * ctx Poly1305 context. + * mac Buffer to hold 16 bytes. + */ +extern void poly1305_final_avx(Poly1305* ctx, byte* mac); +#endif + +#ifdef HAVE_INTEL_AVX2 +/* Process multiple blocks (n * 16 bytes) of data. + * + * ctx Poly1305 context. + * m Blocks of message data. + * bytes The number of bytes to process. + */ +extern void poly1305_blocks_avx2(Poly1305* ctx, const unsigned char* m, + size_t bytes); +/* Calculate R^1, R^2, R^3 and R^4 and store them in the context. + * + * ctx Poly1305 context. + */ +extern void poly1305_calc_powers_avx2(Poly1305* ctx); +/* Set the key to use when processing data. + * Initialize the context. + * Calls AVX set key function as final function calls AVX code. + * + * ctx Poly1305 context. + * key The key data (16 bytes). + */ +extern void poly1305_setkey_avx2(Poly1305* ctx, const byte* key); +/* Calculate the final result - authentication data. + * Zeros out the private data in the context. + * Calls AVX final function to quickly process last blocks. + * + * ctx Poly1305 context. + * mac Buffer to hold 16 bytes - authentication data. + */ +extern void poly1305_final_avx2(Poly1305* ctx, byte* mac); +#endif + +#ifdef __cplusplus + } /* extern "C" */ +#endif + +#elif defined(POLY130564) +#ifndef WOLFSSL_ARMASM + static word64 U8TO64(const byte* p) + { + return + (((word64)(p[0] & 0xff) ) | + ((word64)(p[1] & 0xff) << 8) | + ((word64)(p[2] & 0xff) << 16) | + ((word64)(p[3] & 0xff) << 24) | + ((word64)(p[4] & 0xff) << 32) | + ((word64)(p[5] & 0xff) << 40) | + ((word64)(p[6] & 0xff) << 48) | + ((word64)(p[7] & 0xff) << 56)); + } + + static void U64TO8(byte* p, word64 v) { + p[0] = (v ) & 0xff; + p[1] = (v >> 8) & 0xff; + p[2] = (v >> 16) & 0xff; + p[3] = (v >> 24) & 0xff; + p[4] = (v >> 32) & 0xff; + p[5] = (v >> 40) & 0xff; + p[6] = (v >> 48) & 0xff; + p[7] = (v >> 56) & 0xff; + } +#endif/* WOLFSSL_ARMASM */ +#else /* if not 64 bit then use 32 bit */ + + static word32 U8TO32(const byte *p) + { + return + (((word32)(p[0] & 0xff) ) | + ((word32)(p[1] & 0xff) << 8) | + ((word32)(p[2] & 0xff) << 16) | + ((word32)(p[3] & 0xff) << 24)); + } + + static void U32TO8(byte *p, word32 v) { + p[0] = (v ) & 0xff; + p[1] = (v >> 8) & 0xff; + p[2] = (v >> 16) & 0xff; + p[3] = (v >> 24) & 0xff; + } +#endif + +/* convert 32-bit unsigned to little endian 64 bit type as byte array */ +static WC_INLINE void u32tole64(const word32 inLe32, byte outLe64[8]) +{ +#ifndef WOLFSSL_X86_64_BUILD + outLe64[0] = (byte)(inLe32 & 0x000000FF); + outLe64[1] = (byte)((inLe32 & 0x0000FF00) >> 8); + outLe64[2] = (byte)((inLe32 & 0x00FF0000) >> 16); + outLe64[3] = (byte)((inLe32 & 0xFF000000) >> 24); + outLe64[4] = 0; + outLe64[5] = 0; + outLe64[6] = 0; + outLe64[7] = 0; +#else + *(word64*)outLe64 = inLe32; +#endif +} + + +#if !defined(WOLFSSL_ARMASM) || !defined(__aarch64__) +void poly1305_blocks(Poly1305* ctx, const unsigned char *m, + size_t bytes) +{ +#ifdef USE_INTEL_SPEEDUP + /* AVX2 is handled in wc_Poly1305Update. */ + poly1305_blocks_avx(ctx, m, bytes); +#elif defined(POLY130564) + const word64 hibit = (ctx->finished) ? 0 : ((word64)1 << 40); /* 1 << 128 */ + word64 r0,r1,r2; + word64 s1,s2; + word64 h0,h1,h2; + word64 c; + word128 d0,d1,d2,d; + + r0 = ctx->r[0]; + r1 = ctx->r[1]; + r2 = ctx->r[2]; + + h0 = ctx->h[0]; + h1 = ctx->h[1]; + h2 = ctx->h[2]; + + s1 = r1 * (5 << 2); + s2 = r2 * (5 << 2); + + while (bytes >= POLY1305_BLOCK_SIZE) { + word64 t0,t1; + + /* h += m[i] */ + t0 = U8TO64(&m[0]); + t1 = U8TO64(&m[8]); + + h0 += (( t0 ) & 0xfffffffffff); + h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff); + h2 += (((t1 >> 24) ) & 0x3ffffffffff) | hibit; + + /* h *= r */ + MUL(d0, h0, r0); MUL(d, h1, s2); ADD(d0, d); MUL(d, h2, s1); ADD(d0, d); + MUL(d1, h0, r1); MUL(d, h1, r0); ADD(d1, d); MUL(d, h2, s2); ADD(d1, d); + MUL(d2, h0, r2); MUL(d, h1, r1); ADD(d2, d); MUL(d, h2, r0); ADD(d2, d); + + /* (partial) h %= p */ + c = SHR(d0, 44); h0 = LO(d0) & 0xfffffffffff; + ADDLO(d1, c); c = SHR(d1, 44); h1 = LO(d1) & 0xfffffffffff; + ADDLO(d2, c); c = SHR(d2, 42); h2 = LO(d2) & 0x3ffffffffff; + h0 += c * 5; c = (h0 >> 44); h0 = h0 & 0xfffffffffff; + h1 += c; + + m += POLY1305_BLOCK_SIZE; + bytes -= POLY1305_BLOCK_SIZE; + } + + ctx->h[0] = h0; + ctx->h[1] = h1; + ctx->h[2] = h2; + +#else /* if not 64 bit then use 32 bit */ + const word32 hibit = (ctx->finished) ? 0 : ((word32)1 << 24); /* 1 << 128 */ + word32 r0,r1,r2,r3,r4; + word32 s1,s2,s3,s4; + word32 h0,h1,h2,h3,h4; + word64 d0,d1,d2,d3,d4; + word32 c; + + + r0 = ctx->r[0]; + r1 = ctx->r[1]; + r2 = ctx->r[2]; + r3 = ctx->r[3]; + r4 = ctx->r[4]; + + s1 = r1 * 5; + s2 = r2 * 5; + s3 = r3 * 5; + s4 = r4 * 5; + + h0 = ctx->h[0]; + h1 = ctx->h[1]; + h2 = ctx->h[2]; + h3 = ctx->h[3]; + h4 = ctx->h[4]; + + while (bytes >= POLY1305_BLOCK_SIZE) { + /* h += m[i] */ + h0 += (U8TO32(m+ 0) ) & 0x3ffffff; + h1 += (U8TO32(m+ 3) >> 2) & 0x3ffffff; + h2 += (U8TO32(m+ 6) >> 4) & 0x3ffffff; + h3 += (U8TO32(m+ 9) >> 6) & 0x3ffffff; + h4 += (U8TO32(m+12) >> 8) | hibit; + + /* h *= r */ + d0 = ((word64)h0 * r0) + ((word64)h1 * s4) + ((word64)h2 * s3) + + ((word64)h3 * s2) + ((word64)h4 * s1); + d1 = ((word64)h0 * r1) + ((word64)h1 * r0) + ((word64)h2 * s4) + + ((word64)h3 * s3) + ((word64)h4 * s2); + d2 = ((word64)h0 * r2) + ((word64)h1 * r1) + ((word64)h2 * r0) + + ((word64)h3 * s4) + ((word64)h4 * s3); + d3 = ((word64)h0 * r3) + ((word64)h1 * r2) + ((word64)h2 * r1) + + ((word64)h3 * r0) + ((word64)h4 * s4); + d4 = ((word64)h0 * r4) + ((word64)h1 * r3) + ((word64)h2 * r2) + + ((word64)h3 * r1) + ((word64)h4 * r0); + + /* (partial) h %= p */ + c = (word32)(d0 >> 26); h0 = (word32)d0 & 0x3ffffff; + d1 += c; c = (word32)(d1 >> 26); h1 = (word32)d1 & 0x3ffffff; + d2 += c; c = (word32)(d2 >> 26); h2 = (word32)d2 & 0x3ffffff; + d3 += c; c = (word32)(d3 >> 26); h3 = (word32)d3 & 0x3ffffff; + d4 += c; c = (word32)(d4 >> 26); h4 = (word32)d4 & 0x3ffffff; + h0 += c * 5; c = (h0 >> 26); h0 = h0 & 0x3ffffff; + h1 += c; + + m += POLY1305_BLOCK_SIZE; + bytes -= POLY1305_BLOCK_SIZE; + } + + ctx->h[0] = h0; + ctx->h[1] = h1; + ctx->h[2] = h2; + ctx->h[3] = h3; + ctx->h[4] = h4; + +#endif /* end of 64 bit cpu blocks or 32 bit cpu */ +} + +void poly1305_block(Poly1305* ctx, const unsigned char *m) +{ +#ifdef USE_INTEL_SPEEDUP + /* No call to poly1305_block when AVX2, AVX2 does 4 blocks at a time. */ + poly1305_block_avx(ctx, m); +#else + poly1305_blocks(ctx, m, POLY1305_BLOCK_SIZE); +#endif +} +#endif /* !defined(WOLFSSL_ARMASM) || !defined(__aarch64__) */ + +#if !defined(WOLFSSL_ARMASM) || !defined(__aarch64__) +int wc_Poly1305SetKey(Poly1305* ctx, const byte* key, word32 keySz) +{ +#if defined(POLY130564) && !defined(USE_INTEL_SPEEDUP) + word64 t0,t1; +#endif + + if (key == NULL) + return BAD_FUNC_ARG; + +#ifdef CHACHA_AEAD_TEST + word32 k; + printf("Poly key used:\n"); + for (k = 0; k < keySz; k++) { + printf("%02x", key[k]); + if ((k+1) % 8 == 0) + printf("\n"); + } + printf("\n"); +#endif + + if (keySz != 32 || ctx == NULL) + return BAD_FUNC_ARG; + +#ifdef USE_INTEL_SPEEDUP + if (!cpu_flags_set) { + intel_flags = cpuid_get_flags(); + cpu_flags_set = 1; + } + #ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_AVX2(intel_flags)) + poly1305_setkey_avx2(ctx, key); + else + #endif + poly1305_setkey_avx(ctx, key); +#elif defined(POLY130564) + + /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ + t0 = U8TO64(key + 0); + t1 = U8TO64(key + 8); + + ctx->r[0] = ( t0 ) & 0xffc0fffffff; + ctx->r[1] = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffff; + ctx->r[2] = ((t1 >> 24) ) & 0x00ffffffc0f; + + /* h (accumulator) = 0 */ + ctx->h[0] = 0; + ctx->h[1] = 0; + ctx->h[2] = 0; + + /* save pad for later */ + ctx->pad[0] = U8TO64(key + 16); + ctx->pad[1] = U8TO64(key + 24); + + ctx->leftover = 0; + ctx->finished = 0; + +#else /* if not 64 bit then use 32 bit */ + + /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ + ctx->r[0] = (U8TO32(key + 0) ) & 0x3ffffff; + ctx->r[1] = (U8TO32(key + 3) >> 2) & 0x3ffff03; + ctx->r[2] = (U8TO32(key + 6) >> 4) & 0x3ffc0ff; + ctx->r[3] = (U8TO32(key + 9) >> 6) & 0x3f03fff; + ctx->r[4] = (U8TO32(key + 12) >> 8) & 0x00fffff; + + /* h = 0 */ + ctx->h[0] = 0; + ctx->h[1] = 0; + ctx->h[2] = 0; + ctx->h[3] = 0; + ctx->h[4] = 0; + + /* save pad for later */ + ctx->pad[0] = U8TO32(key + 16); + ctx->pad[1] = U8TO32(key + 20); + ctx->pad[2] = U8TO32(key + 24); + ctx->pad[3] = U8TO32(key + 28); + + ctx->leftover = 0; + ctx->finished = 0; + +#endif + + return 0; +} + +int wc_Poly1305Final(Poly1305* ctx, byte* mac) +{ +#ifdef USE_INTEL_SPEEDUP +#elif defined(POLY130564) + + word64 h0,h1,h2,c; + word64 g0,g1,g2; + word64 t0,t1; + +#else + + word32 h0,h1,h2,h3,h4,c; + word32 g0,g1,g2,g3,g4; + word64 f; + word32 mask; + +#endif + + if (ctx == NULL) + return BAD_FUNC_ARG; + +#ifdef USE_INTEL_SPEEDUP + #ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_AVX2(intel_flags)) + poly1305_final_avx2(ctx, mac); + else + #endif + poly1305_final_avx(ctx, mac); +#elif defined(POLY130564) + + /* process the remaining block */ + if (ctx->leftover) { + size_t i = ctx->leftover; + ctx->buffer[i] = 1; + for (i = i + 1; i < POLY1305_BLOCK_SIZE; i++) + ctx->buffer[i] = 0; + ctx->finished = 1; + poly1305_block(ctx, ctx->buffer); + } + + /* fully carry h */ + h0 = ctx->h[0]; + h1 = ctx->h[1]; + h2 = ctx->h[2]; + + c = (h1 >> 44); h1 &= 0xfffffffffff; + h2 += c; c = (h2 >> 42); h2 &= 0x3ffffffffff; + h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff; + h1 += c; c = (h1 >> 44); h1 &= 0xfffffffffff; + h2 += c; c = (h2 >> 42); h2 &= 0x3ffffffffff; + h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff; + h1 += c; + + /* compute h + -p */ + g0 = h0 + 5; c = (g0 >> 44); g0 &= 0xfffffffffff; + g1 = h1 + c; c = (g1 >> 44); g1 &= 0xfffffffffff; + g2 = h2 + c - ((word64)1 << 42); + + /* select h if h < p, or h + -p if h >= p */ + c = (g2 >> ((sizeof(word64) * 8) - 1)) - 1; + g0 &= c; + g1 &= c; + g2 &= c; + c = ~c; + h0 = (h0 & c) | g0; + h1 = (h1 & c) | g1; + h2 = (h2 & c) | g2; + + /* h = (h + pad) */ + t0 = ctx->pad[0]; + t1 = ctx->pad[1]; + + h0 += (( t0 ) & 0xfffffffffff) ; + c = (h0 >> 44); h0 &= 0xfffffffffff; + h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff) + c; + c = (h1 >> 44); h1 &= 0xfffffffffff; + h2 += (((t1 >> 24) ) & 0x3ffffffffff) + c; + h2 &= 0x3ffffffffff; + + /* mac = h % (2^128) */ + h0 = ((h0 ) | (h1 << 44)); + h1 = ((h1 >> 20) | (h2 << 24)); + + U64TO8(mac + 0, h0); + U64TO8(mac + 8, h1); + + /* zero out the state */ + ctx->h[0] = 0; + ctx->h[1] = 0; + ctx->h[2] = 0; + ctx->r[0] = 0; + ctx->r[1] = 0; + ctx->r[2] = 0; + ctx->pad[0] = 0; + ctx->pad[1] = 0; + +#else /* if not 64 bit then use 32 bit */ + + /* process the remaining block */ + if (ctx->leftover) { + size_t i = ctx->leftover; + ctx->buffer[i++] = 1; + for (; i < POLY1305_BLOCK_SIZE; i++) + ctx->buffer[i] = 0; + ctx->finished = 1; + poly1305_block(ctx, ctx->buffer); + } + + /* fully carry h */ + h0 = ctx->h[0]; + h1 = ctx->h[1]; + h2 = ctx->h[2]; + h3 = ctx->h[3]; + h4 = ctx->h[4]; + + c = h1 >> 26; h1 = h1 & 0x3ffffff; + h2 += c; c = h2 >> 26; h2 = h2 & 0x3ffffff; + h3 += c; c = h3 >> 26; h3 = h3 & 0x3ffffff; + h4 += c; c = h4 >> 26; h4 = h4 & 0x3ffffff; + h0 += c * 5; c = h0 >> 26; h0 = h0 & 0x3ffffff; + h1 += c; + + /* compute h + -p */ + g0 = h0 + 5; c = g0 >> 26; g0 &= 0x3ffffff; + g1 = h1 + c; c = g1 >> 26; g1 &= 0x3ffffff; + g2 = h2 + c; c = g2 >> 26; g2 &= 0x3ffffff; + g3 = h3 + c; c = g3 >> 26; g3 &= 0x3ffffff; + g4 = h4 + c - ((word32)1 << 26); + + /* select h if h < p, or h + -p if h >= p */ + mask = ((word32)g4 >> ((sizeof(word32) * 8) - 1)) - 1; + g0 &= mask; + g1 &= mask; + g2 &= mask; + g3 &= mask; + g4 &= mask; + mask = ~mask; + h0 = (h0 & mask) | g0; + h1 = (h1 & mask) | g1; + h2 = (h2 & mask) | g2; + h3 = (h3 & mask) | g3; + h4 = (h4 & mask) | g4; + + /* h = h % (2^128) */ + h0 = ((h0 ) | (h1 << 26)) & 0xffffffff; + h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff; + h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff; + h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff; + + /* mac = (h + pad) % (2^128) */ + f = (word64)h0 + ctx->pad[0] ; h0 = (word32)f; + f = (word64)h1 + ctx->pad[1] + (f >> 32); h1 = (word32)f; + f = (word64)h2 + ctx->pad[2] + (f >> 32); h2 = (word32)f; + f = (word64)h3 + ctx->pad[3] + (f >> 32); h3 = (word32)f; + + U32TO8(mac + 0, h0); + U32TO8(mac + 4, h1); + U32TO8(mac + 8, h2); + U32TO8(mac + 12, h3); + + /* zero out the state */ + ctx->h[0] = 0; + ctx->h[1] = 0; + ctx->h[2] = 0; + ctx->h[3] = 0; + ctx->h[4] = 0; + ctx->r[0] = 0; + ctx->r[1] = 0; + ctx->r[2] = 0; + ctx->r[3] = 0; + ctx->r[4] = 0; + ctx->pad[0] = 0; + ctx->pad[1] = 0; + ctx->pad[2] = 0; + ctx->pad[3] = 0; + +#endif + + return 0; +} +#endif /* !defined(WOLFSSL_ARMASM) || !defined(__aarch64__) */ + + +int wc_Poly1305Update(Poly1305* ctx, const byte* m, word32 bytes) +{ + size_t i; + +#ifdef CHACHA_AEAD_TEST + word32 k; + printf("Raw input to poly:\n"); + for (k = 0; k < bytes; k++) { + printf("%02x", m[k]); + if ((k+1) % 16 == 0) + printf("\n"); + } + printf("\n"); +#endif + + if (ctx == NULL) + return BAD_FUNC_ARG; + +#ifdef USE_INTEL_SPEEDUP + #ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_AVX2(intel_flags)) { + /* handle leftover */ + if (ctx->leftover) { + size_t want = sizeof(ctx->buffer) - ctx->leftover; + if (want > bytes) + want = bytes; + + for (i = 0; i < want; i++) + ctx->buffer[ctx->leftover + i] = m[i]; + bytes -= (word32)want; + m += want; + ctx->leftover += want; + if (ctx->leftover < sizeof(ctx->buffer)) + return 0; + + if (!ctx->started) + poly1305_calc_powers_avx2(ctx); + poly1305_blocks_avx2(ctx, ctx->buffer, sizeof(ctx->buffer)); + ctx->leftover = 0; + } + + /* process full blocks */ + if (bytes >= sizeof(ctx->buffer)) { + size_t want = bytes & ~(sizeof(ctx->buffer) - 1); + + if (!ctx->started) + poly1305_calc_powers_avx2(ctx); + poly1305_blocks_avx2(ctx, m, want); + m += want; + bytes -= (word32)want; + } + + /* store leftover */ + if (bytes) { + for (i = 0; i < bytes; i++) + ctx->buffer[ctx->leftover + i] = m[i]; + ctx->leftover += bytes; + } + } + else + #endif +#endif + { + /* handle leftover */ + if (ctx->leftover) { + size_t want = (POLY1305_BLOCK_SIZE - ctx->leftover); + if (want > bytes) + want = bytes; + for (i = 0; i < want; i++) + ctx->buffer[ctx->leftover + i] = m[i]; + bytes -= (word32)want; + m += want; + ctx->leftover += want; + if (ctx->leftover < POLY1305_BLOCK_SIZE) + return 0; + poly1305_block(ctx, ctx->buffer); + ctx->leftover = 0; + } + + /* process full blocks */ + if (bytes >= POLY1305_BLOCK_SIZE) { + size_t want = (bytes & ~(POLY1305_BLOCK_SIZE - 1)); + poly1305_blocks(ctx, m, want); + m += want; + bytes -= (word32)want; + } + + /* store leftover */ + if (bytes) { + for (i = 0; i < bytes; i++) + ctx->buffer[ctx->leftover + i] = m[i]; + ctx->leftover += bytes; + } + } + + return 0; +} + +/* Takes a Poly1305 struct that has a key loaded and pads the provided length + ctx : Initialized Poly1305 struct to use + lenToPad : Current number of bytes updated that needs padding to 16 + */ +int wc_Poly1305_Pad(Poly1305* ctx, word32 lenToPad) +{ + int ret = 0; + word32 paddingLen; + byte padding[WC_POLY1305_PAD_SZ - 1]; + + if (ctx == NULL) { + return BAD_FUNC_ARG; + } + if (lenToPad == 0) { + return 0; /* nothing needs to be done */ + } + + XMEMSET(padding, 0, sizeof(padding)); + + /* Pad length to 16 bytes */ + paddingLen = -(int)lenToPad & (WC_POLY1305_PAD_SZ - 1); + if (paddingLen > 0) { + ret = wc_Poly1305Update(ctx, padding, paddingLen); + } + return ret; +} + +/* Takes a Poly1305 struct that has a key loaded and adds the AEAD length + encoding in 64-bit little endian + aadSz : Size of the additional authentication data + dataSz : Size of the plaintext or ciphertext + */ +int wc_Poly1305_EncodeSizes(Poly1305* ctx, word32 aadSz, word32 dataSz) +{ + int ret; + byte little64[16]; /* sizeof(word64) * 2 */ + + if (ctx == NULL) { + return BAD_FUNC_ARG; + } + + XMEMSET(little64, 0, sizeof(little64)); + + /* size of additional data and input data as little endian 64 bit types */ + u32tole64(aadSz, little64); + u32tole64(dataSz, little64 + 8); + ret = wc_Poly1305Update(ctx, little64, sizeof(little64)); + + return ret; +} + +/* Takes in an initialized Poly1305 struct that has a key loaded and creates + a MAC (tag) using recent TLS AEAD padding scheme. + ctx : Initialized Poly1305 struct to use + additional : Additional data to use + addSz : Size of additional buffer + input : Input buffer to create tag from + sz : Size of input buffer + tag : Buffer to hold created tag + tagSz : Size of input tag buffer (must be at least + WC_POLY1305_MAC_SZ(16)) + */ +int wc_Poly1305_MAC(Poly1305* ctx, byte* additional, word32 addSz, + byte* input, word32 sz, byte* tag, word32 tagSz) +{ + int ret; + + /* sanity check on arguments */ + if (ctx == NULL || input == NULL || tag == NULL || + tagSz < WC_POLY1305_MAC_SZ) { + return BAD_FUNC_ARG; + } + + /* additional allowed to be 0 */ + if (addSz > 0) { + if (additional == NULL) + return BAD_FUNC_ARG; + + /* additional data plus padding */ + if ((ret = wc_Poly1305Update(ctx, additional, addSz)) != 0) { + return ret; + } + /* pad additional data */ + if ((ret = wc_Poly1305_Pad(ctx, addSz)) != 0) { + return ret; + } + } + + /* input plus padding */ + if ((ret = wc_Poly1305Update(ctx, input, sz)) != 0) { + return ret; + } + /* pad input data */ + if ((ret = wc_Poly1305_Pad(ctx, sz)) != 0) { + return ret; + } + + /* encode size of AAD and input data as little endian 64 bit types */ + if ((ret = wc_Poly1305_EncodeSizes(ctx, addSz, sz)) != 0) { + return ret; + } + + /* Finalize the auth tag */ + ret = wc_Poly1305Final(ctx, tag); + + return ret; + +} +#endif /* HAVE_POLY1305 */ diff --git a/client/wolfssl/wolfcrypt/src/poly1305_asm.S b/client/wolfssl/wolfcrypt/src/poly1305_asm.S new file mode 100644 index 0000000..9571107 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/poly1305_asm.S @@ -0,0 +1,1105 @@ +/* poly1305_asm + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#ifndef HAVE_INTEL_AVX1 +#define HAVE_INTEL_AVX1 +#endif /* HAVE_INTEL_AVX1 */ +#ifndef NO_AVX2_SUPPORT +#define HAVE_INTEL_AVX2 +#endif /* NO_AVX2_SUPPORT */ + +#ifdef HAVE_INTEL_AVX1 +#ifndef __APPLE__ +.text +.globl poly1305_setkey_avx +.type poly1305_setkey_avx,@function +.align 4 +poly1305_setkey_avx: +#else +.section __TEXT,__text +.globl _poly1305_setkey_avx +.p2align 2 +_poly1305_setkey_avx: +#endif /* __APPLE__ */ + movabsq $0xffffffc0fffffff, %r10 + movabsq $0xffffffc0ffffffc, %r11 + movq (%rsi), %rdx + movq 8(%rsi), %rax + movq 16(%rsi), %rcx + movq 24(%rsi), %r8 + andq %r10, %rdx + andq %r11, %rax + movq %rdx, %r10 + movq %rax, %r11 + xorq %r9, %r9 + movq %rdx, (%rdi) + movq %rax, 8(%rdi) + movq %r9, 24(%rdi) + movq %r9, 32(%rdi) + movq %r9, 40(%rdi) + movq %rcx, 48(%rdi) + movq %r8, 56(%rdi) + movq %r9, 352(%rdi) + movq %r9, 408(%rdi) + movq %rdx, 360(%rdi) + movq %rax, 416(%rdi) + addq %rdx, %r10 + addq %rax, %r11 + movq %r10, 368(%rdi) + movq %r11, 424(%rdi) + addq %rdx, %r10 + addq %rax, %r11 + movq %r10, 376(%rdi) + movq %r11, 432(%rdi) + addq %rdx, %r10 + addq %rax, %r11 + movq %r10, 384(%rdi) + movq %r11, 440(%rdi) + addq %rdx, %r10 + addq %rax, %r11 + movq %r10, 392(%rdi) + movq %r11, 448(%rdi) + addq %rdx, %r10 + addq %rax, %r11 + movq %r10, 400(%rdi) + movq %r11, 456(%rdi) + movq %r9, 608(%rdi) + movb $0x01, 616(%rdi) + repz retq +#ifndef __APPLE__ +.size poly1305_setkey_avx,.-poly1305_setkey_avx +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl poly1305_block_avx +.type poly1305_block_avx,@function +.align 4 +poly1305_block_avx: +#else +.section __TEXT,__text +.globl _poly1305_block_avx +.p2align 2 +_poly1305_block_avx: +#endif /* __APPLE__ */ + pushq %r15 + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + movq (%rdi), %r15 + movq 8(%rdi), %rbx + movq 24(%rdi), %r8 + movq 32(%rdi), %r9 + movq 40(%rdi), %r10 + xorq %r14, %r14 + movb 616(%rdi), %r14b + # h += m + movq (%rsi), %r11 + movq 8(%rsi), %r12 + addq %r11, %r8 + adcq %r12, %r9 + movq %rbx, %rax + adcq %r14, %r10 + # r[1] * h[0] => rdx, rax ==> t2, t1 + mulq %r8 + movq %rax, %r12 + movq %rdx, %r13 + # r[0] * h[1] => rdx, rax ++> t2, t1 + movq %r15, %rax + mulq %r9 + addq %rax, %r12 + movq %r15, %rax + adcq %rdx, %r13 + # r[0] * h[0] => rdx, rax ==> t4, t0 + mulq %r8 + movq %rax, %r11 + movq %rdx, %r8 + # r[1] * h[1] => rdx, rax =+> t3, t2 + movq %rbx, %rax + mulq %r9 + # r[0] * h[2] +> t2 + addq 352(%rdi,%r10,8), %r13 + movq %rdx, %r14 + addq %r8, %r12 + adcq %rax, %r13 + # r[1] * h[2] +> t3 + adcq 408(%rdi,%r10,8), %r14 + # r * h in r14, r13, r12, r11 + # h = (r * h) mod 2^130 - 5 + movq %r13, %r10 + andq $-4, %r13 + andq $3, %r10 + addq %r13, %r11 + movq %r13, %r8 + adcq %r14, %r12 + adcq $0x00, %r10 + shrdq $2, %r14, %r8 + shrq $2, %r14 + addq %r11, %r8 + adcq %r14, %r12 + movq %r12, %r9 + adcq $0x00, %r10 + # h in r10, r9, r8 + # Store h to ctx + movq %r8, 24(%rdi) + movq %r9, 32(%rdi) + movq %r10, 40(%rdi) + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %r15 + repz retq +#ifndef __APPLE__ +.size poly1305_block_avx,.-poly1305_block_avx +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl poly1305_blocks_avx +.type poly1305_blocks_avx,@function +.align 4 +poly1305_blocks_avx: +#else +.section __TEXT,__text +.globl _poly1305_blocks_avx +.p2align 2 +_poly1305_blocks_avx: +#endif /* __APPLE__ */ + pushq %r15 + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + movq %rdx, %rcx + movq (%rdi), %r15 + movq 8(%rdi), %rbx + movq 24(%rdi), %r8 + movq 32(%rdi), %r9 + movq 40(%rdi), %r10 +L_poly1305_avx_blocks_start: + # h += m + movq (%rsi), %r11 + movq 8(%rsi), %r12 + addq %r11, %r8 + adcq %r12, %r9 + movq %rbx, %rax + adcq $0x00, %r10 + # r[1] * h[0] => rdx, rax ==> t2, t1 + mulq %r8 + movq %rax, %r12 + movq %rdx, %r13 + # r[0] * h[1] => rdx, rax ++> t2, t1 + movq %r15, %rax + mulq %r9 + addq %rax, %r12 + movq %r15, %rax + adcq %rdx, %r13 + # r[0] * h[0] => rdx, rax ==> t4, t0 + mulq %r8 + movq %rax, %r11 + movq %rdx, %r8 + # r[1] * h[1] => rdx, rax =+> t3, t2 + movq %rbx, %rax + mulq %r9 + # r[0] * h[2] +> t2 + addq 360(%rdi,%r10,8), %r13 + movq %rdx, %r14 + addq %r8, %r12 + adcq %rax, %r13 + # r[1] * h[2] +> t3 + adcq 416(%rdi,%r10,8), %r14 + # r * h in r14, r13, r12, r11 + # h = (r * h) mod 2^130 - 5 + movq %r13, %r10 + andq $-4, %r13 + andq $3, %r10 + addq %r13, %r11 + movq %r13, %r8 + adcq %r14, %r12 + adcq $0x00, %r10 + shrdq $2, %r14, %r8 + shrq $2, %r14 + addq %r11, %r8 + adcq %r14, %r12 + movq %r12, %r9 + adcq $0x00, %r10 + # h in r10, r9, r8 + # Next block from message + addq $16, %rsi + subq $16, %rcx + jg L_poly1305_avx_blocks_start + # Store h to ctx + movq %r8, 24(%rdi) + movq %r9, 32(%rdi) + movq %r10, 40(%rdi) + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %r15 + repz retq +#ifndef __APPLE__ +.size poly1305_blocks_avx,.-poly1305_blocks_avx +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl poly1305_final_avx +.type poly1305_final_avx,@function +.align 4 +poly1305_final_avx: +#else +.section __TEXT,__text +.globl _poly1305_final_avx +.p2align 2 +_poly1305_final_avx: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + movq %rsi, %rbx + movq 608(%rdi), %rax + testq %rax, %rax + je L_poly1305_avx_final_no_more + movb $0x01, 480(%rdi,%rax,1) + jmp L_poly1305_avx_final_cmp_rem +L_poly1305_avx_final_zero_rem: + movb $0x00, 480(%rdi,%rax,1) +L_poly1305_avx_final_cmp_rem: + incb %al + cmpq $16, %rax + jl L_poly1305_avx_final_zero_rem + movb $0x00, 616(%rdi) + leaq 480(%rdi), %rsi +#ifndef __APPLE__ + callq poly1305_block_avx@plt +#else + callq _poly1305_block_avx +#endif /* __APPLE__ */ +L_poly1305_avx_final_no_more: + movq 24(%rdi), %rax + movq 32(%rdi), %rdx + movq 40(%rdi), %rcx + movq 48(%rdi), %r11 + movq 56(%rdi), %r12 + # h %= p + # h = (h + pad) + # mod 2^130 - 5 + movq %rcx, %r8 + andq $3, %rcx + shrq $2, %r8 + # Multily by 5 + leaq 0(%r8,%r8,4), %r8 + addq %r8, %rax + adcq $0x00, %rdx + adcq $0x00, %rcx + # Fixup when between (1 << 130) - 1 and (1 << 130) - 5 + movq %rax, %r8 + movq %rdx, %r9 + movq %rcx, %r10 + addq $5, %r8 + adcq $0x00, %r9 + adcq $0x00, %r10 + cmpq $4, %r10 + cmoveq %r8, %rax + cmoveq %r9, %rdx + # h += pad + addq %r11, %rax + adcq %r12, %rdx + movq %rax, (%rbx) + movq %rdx, 8(%rbx) + # Zero out r + movq $0x00, (%rdi) + movq $0x00, 8(%rdi) + # Zero out h + movq $0x00, 24(%rdi) + movq $0x00, 32(%rdi) + movq $0x00, 40(%rdi) + # Zero out pad + movq $0x00, 48(%rdi) + movq $0x00, 56(%rdi) + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size poly1305_final_avx,.-poly1305_final_avx +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX1 */ +#ifdef HAVE_INTEL_AVX2 +#ifndef __APPLE__ +.text +.globl poly1305_calc_powers_avx2 +.type poly1305_calc_powers_avx2,@function +.align 4 +poly1305_calc_powers_avx2: +#else +.section __TEXT,__text +.globl _poly1305_calc_powers_avx2 +.p2align 2 +_poly1305_calc_powers_avx2: +#endif /* __APPLE__ */ + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq %rbx + pushq %rbp + movq (%rdi), %rcx + movq 8(%rdi), %r8 + xorq %r9, %r9 + # Convert to 26 bits in 32 + movq %rcx, %rax + movq %rcx, %rdx + movq %rcx, %rsi + movq %r8, %rbx + movq %r8, %rbp + shrq $26, %rdx + shrdq $52, %r8, %rsi + shrq $14, %rbx + shrdq $40, %r9, %rbp + andq $0x3ffffff, %rax + andq $0x3ffffff, %rdx + andq $0x3ffffff, %rsi + andq $0x3ffffff, %rbx + andq $0x3ffffff, %rbp + movl %eax, 224(%rdi) + movl %edx, 228(%rdi) + movl %esi, 232(%rdi) + movl %ebx, 236(%rdi) + movl %ebp, 240(%rdi) + movl $0x00, 244(%rdi) + # Square 128-bit + movq %r8, %rax + mulq %rcx + xorq %r13, %r13 + movq %rax, %r11 + movq %rdx, %r12 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x00, %r13 + movq %rcx, %rax + mulq %rax + movq %rax, %r10 + movq %rdx, %r15 + movq %r8, %rax + mulq %rax + addq %r15, %r11 + adcq %rax, %r12 + adcq %rdx, %r13 + # Reduce 256-bit to 130-bit + movq %r12, %rax + movq %r13, %rdx + andq $-4, %rax + andq $3, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + shrdq $2, %rdx, %rax + shrq $2, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x00, %r12 + movq %r12, %rax + shrq $2, %rax + leaq 0(%rax,%rax,4), %rax + andq $3, %r12 + addq %rax, %r10 + adcq $0x00, %r11 + adcq $0x00, %r12 + # Convert to 26 bits in 32 + movq %r10, %rax + movq %r10, %rdx + movq %r10, %rsi + movq %r11, %rbx + movq %r11, %rbp + shrq $26, %rdx + shrdq $52, %r11, %rsi + shrq $14, %rbx + shrdq $40, %r12, %rbp + andq $0x3ffffff, %rax + andq $0x3ffffff, %rdx + andq $0x3ffffff, %rsi + andq $0x3ffffff, %rbx + andq $0x3ffffff, %rbp + movl %eax, 256(%rdi) + movl %edx, 260(%rdi) + movl %esi, 264(%rdi) + movl %ebx, 268(%rdi) + movl %ebp, 272(%rdi) + movl $0x00, 276(%rdi) + # Multiply 128-bit by 130-bit + # r1[0] * r2[0] + movq %rcx, %rax + mulq %r10 + movq %rax, %r13 + movq %rdx, %r14 + # r1[0] * r2[1] + movq %rcx, %rax + mulq %r11 + movq $0x00, %r15 + addq %rax, %r14 + adcq %rdx, %r15 + # r1[1] * r2[0] + movq %r8, %rax + mulq %r10 + movq $0x00, %rsi + addq %rax, %r14 + adcq %rdx, %r15 + adcq $0x00, %rsi + # r1[0] * r2[2] + movq %rcx, %rax + mulq %r12 + addq %rax, %r15 + adcq %rdx, %rsi + # r1[1] * r2[1] + movq %r8, %rax + mulq %r11 + movq $0x00, %rbx + addq %rax, %r15 + adcq %rdx, %rsi + adcq $0x00, %rbx + # r1[1] * r2[2] + movq %r8, %rax + mulq %r12 + addq %rax, %rsi + adcq %rdx, %rbx + # Reduce 260-bit to 130-bit + movq %r15, %rax + movq %rsi, %rdx + movq %rbx, %rbx + andq $-4, %rax + andq $3, %r15 + addq %rax, %r13 + adcq %rdx, %r14 + adcq %rbx, %r15 + shrdq $2, %rdx, %rax + shrdq $2, %rbx, %rdx + shrq $2, %rbx + addq %rax, %r13 + adcq %rdx, %r14 + adcq %rbx, %r15 + movq %r15, %rax + andq $3, %r15 + shrq $2, %rax + leaq 0(%rax,%rax,4), %rax + addq %rax, %r13 + adcq $0x00, %r14 + adcq $0x00, %r15 + # Convert to 26 bits in 32 + movq %r13, %rax + movq %r13, %rdx + movq %r13, %rsi + movq %r14, %rbx + movq %r14, %rbp + shrq $26, %rdx + shrdq $52, %r14, %rsi + shrq $14, %rbx + shrdq $40, %r15, %rbp + andq $0x3ffffff, %rax + andq $0x3ffffff, %rdx + andq $0x3ffffff, %rsi + andq $0x3ffffff, %rbx + andq $0x3ffffff, %rbp + movl %eax, 288(%rdi) + movl %edx, 292(%rdi) + movl %esi, 296(%rdi) + movl %ebx, 300(%rdi) + movl %ebp, 304(%rdi) + movl $0x00, 308(%rdi) + # Square 130-bit + movq %r11, %rax + mulq %r10 + xorq %r13, %r13 + movq %rax, %r8 + movq %rdx, %r9 + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0x00, %r13 + movq %r10, %rax + mulq %rax + movq %rax, %rcx + movq %rdx, %r15 + movq %r11, %rax + mulq %rax + addq %r15, %r8 + adcq %rax, %r9 + adcq %rdx, %r13 + movq %r12, %rax + mulq %rax + movq %rax, %r14 + movq %r12, %rax + mulq %r10 + addq %rax, %r9 + adcq %rdx, %r13 + adcq $0x00, %r14 + addq %rax, %r9 + adcq %rdx, %r13 + adcq $0x00, %r14 + movq %r12, %rax + mulq %r11 + addq %rax, %r13 + adcq %rdx, %r14 + addq %rax, %r13 + adcq %rdx, %r14 + # Reduce 260-bit to 130-bit + movq %r9, %rax + movq %r13, %rdx + movq %r14, %r15 + andq $-4, %rax + andq $3, %r9 + addq %rax, %rcx + adcq %rdx, %r8 + adcq %r15, %r9 + shrdq $2, %rdx, %rax + shrdq $2, %r15, %rdx + shrq $2, %r15 + addq %rax, %rcx + adcq %rdx, %r8 + adcq %r15, %r9 + movq %r9, %rax + andq $3, %r9 + shrq $2, %rax + leaq 0(%rax,%rax,4), %rax + addq %rax, %rcx + adcq $0x00, %r8 + adcq $0x00, %r9 + # Convert to 26 bits in 32 + movq %rcx, %rax + movq %rcx, %rdx + movq %rcx, %rsi + movq %r8, %rbx + movq %r8, %rbp + shrq $26, %rdx + shrdq $52, %r8, %rsi + shrq $14, %rbx + shrdq $40, %r9, %rbp + andq $0x3ffffff, %rax + andq $0x3ffffff, %rdx + andq $0x3ffffff, %rsi + andq $0x3ffffff, %rbx + andq $0x3ffffff, %rbp + movl %eax, 320(%rdi) + movl %edx, 324(%rdi) + movl %esi, 328(%rdi) + movl %ebx, 332(%rdi) + movl %ebp, 336(%rdi) + movl $0x00, 340(%rdi) + popq %rbp + popq %rbx + popq %r15 + popq %r14 + popq %r13 + popq %r12 + repz retq +#ifndef __APPLE__ +.size poly1305_calc_powers_avx2,.-poly1305_calc_powers_avx2 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl poly1305_setkey_avx2 +.type poly1305_setkey_avx2,@function +.align 4 +poly1305_setkey_avx2: +#else +.section __TEXT,__text +.globl _poly1305_setkey_avx2 +.p2align 2 +_poly1305_setkey_avx2: +#endif /* __APPLE__ */ +#ifndef __APPLE__ + callq poly1305_setkey_avx@plt +#else + callq _poly1305_setkey_avx +#endif /* __APPLE__ */ + vpxor %ymm0, %ymm0, %ymm0 + vmovdqu %ymm0, 64(%rdi) + vmovdqu %ymm0, 96(%rdi) + vmovdqu %ymm0, 128(%rdi) + vmovdqu %ymm0, 160(%rdi) + vmovdqu %ymm0, 192(%rdi) + movq $0x00, 608(%rdi) + movw $0x00, 616(%rdi) + repz retq +#ifndef __APPLE__ +.size poly1305_setkey_avx2,.-poly1305_setkey_avx2 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 32 +#else +.p2align 5 +#endif /* __APPLE__ */ +L_poly1305_avx2_blocks_mask: +.quad 0x3ffffff, 0x3ffffff +.quad 0x3ffffff, 0x3ffffff +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 32 +#else +.p2align 5 +#endif /* __APPLE__ */ +L_poly1305_avx2_blocks_hibit: +.quad 0x1000000, 0x1000000 +.quad 0x1000000, 0x1000000 +#ifndef __APPLE__ +.text +.globl poly1305_blocks_avx2 +.type poly1305_blocks_avx2,@function +.align 4 +poly1305_blocks_avx2: +#else +.section __TEXT,__text +.globl _poly1305_blocks_avx2 +.p2align 2 +_poly1305_blocks_avx2: +#endif /* __APPLE__ */ + pushq %r12 + pushq %rbx + subq $0x140, %rsp + movq %rsp, %rcx + andq $-32, %rcx + addq $32, %rcx + vpxor %ymm15, %ymm15, %ymm15 + movq %rcx, %rbx + leaq 64(%rdi), %rax + addq $0xa0, %rbx + cmpw $0x00, 616(%rdi) + jne L_poly1305_avx2_blocks_begin_h + # Load the message data + vmovdqu (%rsi), %ymm0 + vmovdqu 32(%rsi), %ymm1 + vperm2i128 $32, %ymm1, %ymm0, %ymm2 + vperm2i128 $49, %ymm1, %ymm0, %ymm0 + vpunpckldq %ymm0, %ymm2, %ymm1 + vpunpckhdq %ymm0, %ymm2, %ymm3 + vpunpckldq %ymm15, %ymm1, %ymm0 + vpunpckhdq %ymm15, %ymm1, %ymm1 + vpunpckldq %ymm15, %ymm3, %ymm2 + vpunpckhdq %ymm15, %ymm3, %ymm3 + vmovdqu L_poly1305_avx2_blocks_hibit(%rip), %ymm4 + vpsllq $6, %ymm1, %ymm1 + vpsllq $12, %ymm2, %ymm2 + vpsllq $18, %ymm3, %ymm3 + vmovdqu L_poly1305_avx2_blocks_mask(%rip), %ymm14 + # Reduce, in place, the message data + vpsrlq $26, %ymm0, %ymm10 + vpsrlq $26, %ymm3, %ymm11 + vpand %ymm14, %ymm0, %ymm0 + vpand %ymm14, %ymm3, %ymm3 + vpaddq %ymm1, %ymm10, %ymm1 + vpaddq %ymm4, %ymm11, %ymm4 + vpsrlq $26, %ymm1, %ymm10 + vpsrlq $26, %ymm4, %ymm11 + vpand %ymm14, %ymm1, %ymm1 + vpand %ymm14, %ymm4, %ymm4 + vpaddq %ymm2, %ymm10, %ymm2 + vpslld $2, %ymm11, %ymm12 + vpaddd %ymm12, %ymm11, %ymm12 + vpsrlq $26, %ymm2, %ymm10 + vpaddq %ymm0, %ymm12, %ymm0 + vpsrlq $26, %ymm0, %ymm11 + vpand %ymm14, %ymm2, %ymm2 + vpand %ymm14, %ymm0, %ymm0 + vpaddq %ymm3, %ymm10, %ymm3 + vpaddq %ymm1, %ymm11, %ymm1 + vpsrlq $26, %ymm3, %ymm10 + vpand %ymm14, %ymm3, %ymm3 + vpaddq %ymm4, %ymm10, %ymm4 + addq $0x40, %rsi + subq $0x40, %rdx + jz L_poly1305_avx2_blocks_store + jmp L_poly1305_avx2_blocks_load_r4 +L_poly1305_avx2_blocks_begin_h: + # Load the H values. + vmovdqu (%rax), %ymm0 + vmovdqu 32(%rax), %ymm1 + vmovdqu 64(%rax), %ymm2 + vmovdqu 96(%rax), %ymm3 + vmovdqu 128(%rax), %ymm4 + # Check if there is a power of r to load - otherwise use r^4. + cmpb $0x00, 616(%rdi) + je L_poly1305_avx2_blocks_load_r4 + # Load the 4 powers of r - r^4, r^3, r^2, r^1. + vmovdqu 224(%rdi), %ymm8 + vmovdqu 256(%rdi), %ymm7 + vmovdqu 288(%rdi), %ymm6 + vmovdqu 320(%rdi), %ymm5 + vpermq $0xd8, %ymm5, %ymm5 + vpermq $0xd8, %ymm6, %ymm6 + vpermq $0xd8, %ymm7, %ymm7 + vpermq $0xd8, %ymm8, %ymm8 + vpunpcklqdq %ymm6, %ymm5, %ymm10 + vpunpckhqdq %ymm6, %ymm5, %ymm11 + vpunpcklqdq %ymm8, %ymm7, %ymm12 + vpunpckhqdq %ymm8, %ymm7, %ymm13 + vperm2i128 $32, %ymm12, %ymm10, %ymm5 + vperm2i128 $49, %ymm12, %ymm10, %ymm7 + vperm2i128 $32, %ymm13, %ymm11, %ymm9 + vpsrlq $32, %ymm5, %ymm6 + vpsrlq $32, %ymm7, %ymm8 + jmp L_poly1305_avx2_blocks_mul_5 +L_poly1305_avx2_blocks_load_r4: + # Load r^4 into all four positions. + vmovdqu 320(%rdi), %ymm13 + vpermq $0x00, %ymm13, %ymm5 + vpsrlq $32, %ymm13, %ymm14 + vpermq $0x55, %ymm13, %ymm7 + vpermq $0xaa, %ymm13, %ymm9 + vpermq $0x00, %ymm14, %ymm6 + vpermq $0x55, %ymm14, %ymm8 +L_poly1305_avx2_blocks_mul_5: + # Multiply top 4 26-bit values of all four H by 5 + vpslld $2, %ymm6, %ymm10 + vpslld $2, %ymm7, %ymm11 + vpslld $2, %ymm8, %ymm12 + vpslld $2, %ymm9, %ymm13 + vpaddq %ymm10, %ymm6, %ymm10 + vpaddq %ymm11, %ymm7, %ymm11 + vpaddq %ymm12, %ymm8, %ymm12 + vpaddq %ymm13, %ymm9, %ymm13 + # Store powers of r and multiple of 5 for use in multiply. + vmovdqa %ymm10, (%rbx) + vmovdqa %ymm11, 32(%rbx) + vmovdqa %ymm12, 64(%rbx) + vmovdqa %ymm13, 96(%rbx) + vmovdqa %ymm5, (%rcx) + vmovdqa %ymm6, 32(%rcx) + vmovdqa %ymm7, 64(%rcx) + vmovdqa %ymm8, 96(%rcx) + vmovdqa %ymm9, 128(%rcx) + vmovdqu L_poly1305_avx2_blocks_mask(%rip), %ymm14 + # If not finished then loop over data + cmpb $0x01, 616(%rdi) + jne L_poly1305_avx2_blocks_start + # Do last multiply, reduce, add the four H together and move to + # 32-bit registers + vpmuludq (%rbx), %ymm4, %ymm5 + vpmuludq 32(%rbx), %ymm3, %ymm10 + vpmuludq 32(%rbx), %ymm4, %ymm6 + vpmuludq 64(%rbx), %ymm3, %ymm11 + vpmuludq 64(%rbx), %ymm4, %ymm7 + vpaddq %ymm5, %ymm10, %ymm5 + vpmuludq 64(%rbx), %ymm2, %ymm12 + vpmuludq 96(%rbx), %ymm4, %ymm8 + vpaddq %ymm6, %ymm11, %ymm6 + vpmuludq 96(%rbx), %ymm1, %ymm13 + vpmuludq 96(%rbx), %ymm2, %ymm10 + vpaddq %ymm5, %ymm12, %ymm5 + vpmuludq 96(%rbx), %ymm3, %ymm11 + vpmuludq (%rcx), %ymm3, %ymm12 + vpaddq %ymm5, %ymm13, %ymm5 + vpmuludq (%rcx), %ymm4, %ymm9 + vpaddq %ymm6, %ymm10, %ymm6 + vpmuludq (%rcx), %ymm0, %ymm13 + vpaddq %ymm7, %ymm11, %ymm7 + vpmuludq (%rcx), %ymm1, %ymm10 + vpaddq %ymm8, %ymm12, %ymm8 + vpmuludq (%rcx), %ymm2, %ymm11 + vpmuludq 32(%rcx), %ymm2, %ymm12 + vpaddq %ymm5, %ymm13, %ymm5 + vpmuludq 32(%rcx), %ymm3, %ymm13 + vpaddq %ymm6, %ymm10, %ymm6 + vpmuludq 32(%rcx), %ymm0, %ymm10 + vpaddq %ymm7, %ymm11, %ymm7 + vpmuludq 32(%rcx), %ymm1, %ymm11 + vpaddq %ymm8, %ymm12, %ymm8 + vpmuludq 64(%rcx), %ymm1, %ymm12 + vpaddq %ymm9, %ymm13, %ymm9 + vpmuludq 64(%rcx), %ymm2, %ymm13 + vpaddq %ymm6, %ymm10, %ymm6 + vpmuludq 64(%rcx), %ymm0, %ymm10 + vpaddq %ymm7, %ymm11, %ymm7 + vpmuludq 96(%rcx), %ymm0, %ymm11 + vpaddq %ymm8, %ymm12, %ymm8 + vpmuludq 96(%rcx), %ymm1, %ymm12 + vpaddq %ymm9, %ymm13, %ymm9 + vpaddq %ymm7, %ymm10, %ymm7 + vpmuludq 128(%rcx), %ymm0, %ymm13 + vpaddq %ymm8, %ymm11, %ymm8 + vpaddq %ymm9, %ymm12, %ymm9 + vpaddq %ymm9, %ymm13, %ymm9 + vpsrlq $26, %ymm5, %ymm10 + vpsrlq $26, %ymm8, %ymm11 + vpand %ymm14, %ymm5, %ymm5 + vpand %ymm14, %ymm8, %ymm8 + vpaddq %ymm6, %ymm10, %ymm6 + vpaddq %ymm9, %ymm11, %ymm9 + vpsrlq $26, %ymm6, %ymm10 + vpsrlq $26, %ymm9, %ymm11 + vpand %ymm14, %ymm6, %ymm1 + vpand %ymm14, %ymm9, %ymm4 + vpaddq %ymm7, %ymm10, %ymm7 + vpslld $2, %ymm11, %ymm12 + vpaddd %ymm12, %ymm11, %ymm12 + vpsrlq $26, %ymm7, %ymm10 + vpaddq %ymm5, %ymm12, %ymm5 + vpsrlq $26, %ymm5, %ymm11 + vpand %ymm14, %ymm7, %ymm2 + vpand %ymm14, %ymm5, %ymm0 + vpaddq %ymm8, %ymm10, %ymm8 + vpaddq %ymm1, %ymm11, %ymm1 + vpsrlq $26, %ymm8, %ymm10 + vpand %ymm14, %ymm8, %ymm3 + vpaddq %ymm4, %ymm10, %ymm4 + vpsrldq $8, %ymm0, %ymm5 + vpsrldq $8, %ymm1, %ymm6 + vpsrldq $8, %ymm2, %ymm7 + vpsrldq $8, %ymm3, %ymm8 + vpsrldq $8, %ymm4, %ymm9 + vpaddq %ymm0, %ymm5, %ymm0 + vpaddq %ymm1, %ymm6, %ymm1 + vpaddq %ymm2, %ymm7, %ymm2 + vpaddq %ymm3, %ymm8, %ymm3 + vpaddq %ymm4, %ymm9, %ymm4 + vpermq $2, %ymm0, %ymm5 + vpermq $2, %ymm1, %ymm6 + vpermq $2, %ymm2, %ymm7 + vpermq $2, %ymm3, %ymm8 + vpermq $2, %ymm4, %ymm9 + vpaddq %ymm0, %ymm5, %ymm0 + vpaddq %ymm1, %ymm6, %ymm1 + vpaddq %ymm2, %ymm7, %ymm2 + vpaddq %ymm3, %ymm8, %ymm3 + vpaddq %ymm4, %ymm9, %ymm4 + vmovd %xmm0, %r8d + vmovd %xmm1, %r9d + vmovd %xmm2, %r10d + vmovd %xmm3, %r11d + vmovd %xmm4, %r12d + jmp L_poly1305_avx2_blocks_end_calc +L_poly1305_avx2_blocks_start: + vmovdqu (%rsi), %ymm5 + vmovdqu 32(%rsi), %ymm6 + vperm2i128 $32, %ymm6, %ymm5, %ymm7 + vperm2i128 $49, %ymm6, %ymm5, %ymm5 + vpunpckldq %ymm5, %ymm7, %ymm6 + vpunpckhdq %ymm5, %ymm7, %ymm8 + vpunpckldq %ymm15, %ymm6, %ymm5 + vpunpckhdq %ymm15, %ymm6, %ymm6 + vpunpckldq %ymm15, %ymm8, %ymm7 + vpunpckhdq %ymm15, %ymm8, %ymm8 + vmovdqu L_poly1305_avx2_blocks_hibit(%rip), %ymm9 + vpsllq $6, %ymm6, %ymm6 + vpsllq $12, %ymm7, %ymm7 + vpsllq $18, %ymm8, %ymm8 + vpmuludq (%rbx), %ymm4, %ymm10 + vpaddq %ymm5, %ymm10, %ymm5 + vpmuludq 32(%rbx), %ymm3, %ymm10 + vpmuludq 32(%rbx), %ymm4, %ymm11 + vpaddq %ymm6, %ymm11, %ymm6 + vpmuludq 64(%rbx), %ymm3, %ymm11 + vpmuludq 64(%rbx), %ymm4, %ymm12 + vpaddq %ymm7, %ymm12, %ymm7 + vpaddq %ymm5, %ymm10, %ymm5 + vpmuludq 64(%rbx), %ymm2, %ymm12 + vpmuludq 96(%rbx), %ymm4, %ymm13 + vpaddq %ymm8, %ymm13, %ymm8 + vpaddq %ymm6, %ymm11, %ymm6 + vpmuludq 96(%rbx), %ymm1, %ymm13 + vpmuludq 96(%rbx), %ymm2, %ymm10 + vpaddq %ymm5, %ymm12, %ymm5 + vpmuludq 96(%rbx), %ymm3, %ymm11 + vpmuludq (%rcx), %ymm3, %ymm12 + vpaddq %ymm5, %ymm13, %ymm5 + vpmuludq (%rcx), %ymm4, %ymm13 + vpaddq %ymm9, %ymm13, %ymm9 + vpaddq %ymm6, %ymm10, %ymm6 + vpmuludq (%rcx), %ymm0, %ymm13 + vpaddq %ymm7, %ymm11, %ymm7 + vpmuludq (%rcx), %ymm1, %ymm10 + vpaddq %ymm8, %ymm12, %ymm8 + vpmuludq (%rcx), %ymm2, %ymm11 + vpmuludq 32(%rcx), %ymm2, %ymm12 + vpaddq %ymm5, %ymm13, %ymm5 + vpmuludq 32(%rcx), %ymm3, %ymm13 + vpaddq %ymm6, %ymm10, %ymm6 + vpmuludq 32(%rcx), %ymm0, %ymm10 + vpaddq %ymm7, %ymm11, %ymm7 + vpmuludq 32(%rcx), %ymm1, %ymm11 + vpaddq %ymm8, %ymm12, %ymm8 + vpmuludq 64(%rcx), %ymm1, %ymm12 + vpaddq %ymm9, %ymm13, %ymm9 + vpmuludq 64(%rcx), %ymm2, %ymm13 + vpaddq %ymm6, %ymm10, %ymm6 + vpmuludq 64(%rcx), %ymm0, %ymm10 + vpaddq %ymm7, %ymm11, %ymm7 + vpmuludq 96(%rcx), %ymm0, %ymm11 + vpaddq %ymm8, %ymm12, %ymm8 + vpmuludq 96(%rcx), %ymm1, %ymm12 + vpaddq %ymm9, %ymm13, %ymm9 + vpaddq %ymm7, %ymm10, %ymm7 + vpmuludq 128(%rcx), %ymm0, %ymm13 + vpaddq %ymm8, %ymm11, %ymm8 + vpaddq %ymm9, %ymm12, %ymm9 + vpaddq %ymm9, %ymm13, %ymm9 + vpsrlq $26, %ymm5, %ymm10 + vpsrlq $26, %ymm8, %ymm11 + vpand %ymm14, %ymm5, %ymm5 + vpand %ymm14, %ymm8, %ymm8 + vpaddq %ymm6, %ymm10, %ymm6 + vpaddq %ymm9, %ymm11, %ymm9 + vpsrlq $26, %ymm6, %ymm10 + vpsrlq $26, %ymm9, %ymm11 + vpand %ymm14, %ymm6, %ymm1 + vpand %ymm14, %ymm9, %ymm4 + vpaddq %ymm7, %ymm10, %ymm7 + vpslld $2, %ymm11, %ymm12 + vpaddd %ymm12, %ymm11, %ymm12 + vpsrlq $26, %ymm7, %ymm10 + vpaddq %ymm5, %ymm12, %ymm5 + vpsrlq $26, %ymm5, %ymm11 + vpand %ymm14, %ymm7, %ymm2 + vpand %ymm14, %ymm5, %ymm0 + vpaddq %ymm8, %ymm10, %ymm8 + vpaddq %ymm1, %ymm11, %ymm1 + vpsrlq $26, %ymm8, %ymm10 + vpand %ymm14, %ymm8, %ymm3 + vpaddq %ymm4, %ymm10, %ymm4 + addq $0x40, %rsi + subq $0x40, %rdx + jnz L_poly1305_avx2_blocks_start +L_poly1305_avx2_blocks_store: + # Store four H values - state + vmovdqu %ymm0, (%rax) + vmovdqu %ymm1, 32(%rax) + vmovdqu %ymm2, 64(%rax) + vmovdqu %ymm3, 96(%rax) + vmovdqu %ymm4, 128(%rax) +L_poly1305_avx2_blocks_end_calc: + cmpb $0x00, 616(%rdi) + je L_poly1305_avx2_blocks_complete + movq %r8, %rax + movq %r10, %rdx + movq %r12, %rcx + shrq $12, %rdx + shrq $24, %rcx + shlq $26, %r9 + shlq $52, %r10 + shlq $14, %r11 + shlq $40, %r12 + addq %r9, %rax + adcq %r10, %rax + adcq %r11, %rdx + adcq %r12, %rdx + adcq $0x00, %rcx + movq %rcx, %r8 + andq $3, %rcx + shrq $2, %r8 + leaq 0(%r8,%r8,4), %r8 + addq %r8, %rax + adcq $0x00, %rdx + adcq $0x00, %rcx + movq %rax, 24(%rdi) + movq %rdx, 32(%rdi) + movq %rcx, 40(%rdi) +L_poly1305_avx2_blocks_complete: + movb $0x01, 617(%rdi) + addq $0x140, %rsp + popq %rbx + popq %r12 + repz retq +#ifndef __APPLE__ +.size poly1305_blocks_avx2,.-poly1305_blocks_avx2 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl poly1305_final_avx2 +.type poly1305_final_avx2,@function +.align 4 +poly1305_final_avx2: +#else +.section __TEXT,__text +.globl _poly1305_final_avx2 +.p2align 2 +_poly1305_final_avx2: +#endif /* __APPLE__ */ + movb $0x01, 616(%rdi) + movb 617(%rdi), %cl + cmpb $0x00, %cl + je L_poly1305_avx2_final_done_blocks_X4 + pushq %rsi + movq $0x40, %rdx + xorq %rsi, %rsi +#ifndef __APPLE__ + callq poly1305_blocks_avx2@plt +#else + callq _poly1305_blocks_avx2 +#endif /* __APPLE__ */ + popq %rsi +L_poly1305_avx2_final_done_blocks_X4: + movq 608(%rdi), %rax + movq %rax, %rcx + andq $-16, %rcx + cmpb $0x00, %cl + je L_poly1305_avx2_final_done_blocks + pushq %rcx + pushq %rax + pushq %rsi + movq %rcx, %rdx + leaq 480(%rdi), %rsi +#ifndef __APPLE__ + callq poly1305_blocks_avx@plt +#else + callq _poly1305_blocks_avx +#endif /* __APPLE__ */ + popq %rsi + popq %rax + popq %rcx +L_poly1305_avx2_final_done_blocks: + subq %rcx, 608(%rdi) + xorq %rdx, %rdx + jmp L_poly1305_avx2_final_cmp_copy +L_poly1305_avx2_final_start_copy: + movb 480(%rdi,%rcx,1), %r8b + movb %r8b, 480(%rdi,%rdx,1) + incb %cl + incb %dl +L_poly1305_avx2_final_cmp_copy: + cmp %rcx, %rax + jne L_poly1305_avx2_final_start_copy +#ifndef __APPLE__ + callq poly1305_final_avx@plt +#else + callq _poly1305_final_avx +#endif /* __APPLE__ */ + vpxor %ymm0, %ymm0, %ymm0 + vmovdqu %ymm0, 64(%rdi) + vmovdqu %ymm0, 96(%rdi) + vmovdqu %ymm0, 128(%rdi) + vmovdqu %ymm0, 160(%rdi) + vmovdqu %ymm0, 192(%rdi) + vmovdqu %ymm0, 224(%rdi) + vmovdqu %ymm0, 256(%rdi) + vmovdqu %ymm0, 288(%rdi) + vmovdqu %ymm0, 320(%rdi) + movq $0x00, 608(%rdi) + movw $0x00, 616(%rdi) + repz retq +#ifndef __APPLE__ +.size poly1305_final_avx2,.-poly1305_final_avx2 +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX2 */ diff --git a/client/wolfssl/wolfcrypt/src/port/Espressif/README.md b/client/wolfssl/wolfcrypt/src/port/Espressif/README.md new file mode 100644 index 0000000..4f0d0b5 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/Espressif/README.md @@ -0,0 +1,109 @@ +# ESP32 Port + +Support for the ESP32-WROOM-32 on-board crypto hardware acceleration for symmetric AES, SHA1/SHA256/SHA384/SHA512 and RSA primitive including mul, mulmod and exptmod. + +## ESP32 Acceleration + +For detail about ESP32 HW Acceleration, you can find in [Technical Reference Manual](https://espressif.com/sites/default/files/documentation/esp32_technical_reference_manual_en.pdf) + +### Building + +To enable hw acceleration : + +Uncomment out #define WOLFSSL_ESPIDF in /path/to/wolfssl/wolfssl/wolfcrypt/settings.h +Uncomment out #define WOLFSSL_ESPWROOM32 in /path/to/wolfssl/wolfssl/wolfcrypt/settings.h + +To disable portions of the hardware acceleration you can optionally define: + +``` +/* Disabled SHA, AES and RSA acceleration */ +#define NO_ESP32WROOM32_CRYPT +/* Disabled AES acceleration */ +#define NO_WOLFSSL_ESP32WROOM32_CRYPT_AES +/* Disabled SHA acceleration */ +#define NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH +/* Disabled RSA Primitive acceleration */ +#define NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI +``` + +### Coding + +In your application you must include before any other wolfSSL headers. If building the sources directly we recommend defining `WOLFSSL_USER_SETTINGS` and adding your own `user_settings.h` file. You can find a good reference for this in `IDE/GCC-ARM/Header/user_settings.h`. + + +### Benchmarks + +w/ USE_FAST_MATH and WOLFSSL_SMALL_STACK options + +Software only implementation : + +``` +AES-128-CBC-enc 1 MB took 1.001 seconds, 1.146 MB/s +AES-128-CBC-dec 1 MB took 1.017 seconds, 1.104 MB/s +AES-192-CBC-enc 1 MB took 1.018 seconds, 1.055 MB/s +AES-192-CBC-dec 1 MB took 1.006 seconds, 1.019 MB/s +AES-256-CBC-enc 1000 KB took 1.000 seconds, 1000.000 KB/s +AES-256-CBC-dec 975 KB took 1.007 seconds, 968.222 KB/s +AES-128-GCM-enc 350 KB took 1.055 seconds, 331.754 KB/s +AES-128-GCM-dec 350 KB took 1.054 seconds, 332.068 KB/s +AES-192-GCM-enc 325 KB took 1.013 seconds, 320.829 KB/s +AES-192-GCM-dec 325 KB took 1.013 seconds, 320.829 KB/s +AES-256-GCM-enc 325 KB took 1.041 seconds, 312.200 KB/s +AES-256-GCM-dec 325 KB took 1.041 seconds, 312.200 KB/s +SHA 6 MB took 1.004 seconds, 5.714 MB/s +SHA-256 2 MB took 1.006 seconds, 1.747 MB/s +SHA-384 1 MB took 1.011 seconds, 1.159 MB/s +SHA-512 1 MB took 1.009 seconds, 1.161 MB/s +HMAC-SHA 6 MB took 1.001 seconds, 5.634 MB/s +HMAC-SHA256 2 MB took 1.000 seconds, 1.733 MB/s +HMAC-SHA384 1 MB took 1.004 seconds, 1.046 MB/s +HMAC-SHA512 1 MB took 1.002 seconds, 1.048 MB/s +RSA 2048 public 16 ops took 1.056 sec, avg 66.000 ms, 15.152 ops/sec +RSA 2048 private 2 ops took 2.488 sec, avg 1244.000 ms, 0.804 ops/sec +ECC 256 key gen 4 ops took 1.101 sec, avg 275.250 ms, 3.633 ops/sec +ECDHE 256 agree 4 ops took 1.098 sec, avg 274.500 ms, 3.643 ops/sec +ECDSA 256 sign 4 ops took 1.111 sec, avg 277.750 ms, 3.600 ops/sec +ECDSA 256 verify 2 ops took 1.099 sec, avg 549.500 ms, 1.820 ops/sec +``` + +Hardware Acceleration : + + +``` +AES-128-CBC-enc 6 MB took 1.004 seconds, 5.958 MB/s +AES-128-CBC-dec 5 MB took 1.002 seconds, 5.287 MB/s +AES-192-CBC-enc 6 MB took 1.004 seconds, 5.958 MB/s +AES-192-CBC-dec 5 MB took 1.002 seconds, 5.287 MB/s +AES-256-CBC-enc 6 MB took 1.001 seconds, 5.951 MB/s +AES-256-CBC-dec 5 MB took 1.004 seconds, 5.277 MB/s +AES-128-GCM-enc 375 KB took 1.067 seconds, 351.453 KB/s +AES-128-GCM-dec 375 KB took 1.067 seconds, 351.453 KB/s +AES-192-GCM-enc 350 KB took 1.010 seconds, 346.535 KB/s +AES-192-GCM-dec 350 KB took 1.009 seconds, 346.878 KB/s +AES-256-GCM-enc 350 KB took 1.016 seconds, 344.488 KB/s +AES-256-GCM-dec 350 KB took 1.016 seconds, 344.488 KB/s +SHA 14 MB took 1.000 seconds, 14.062 MB/s +SHA-256 15 MB took 1.000 seconds, 15.234 MB/s +SHA-384 17 MB took 1.000 seconds, 17.383 MB/s +SHA-512 18 MB took 1.001 seconds, 17.512 MB/s +HMAC-SHA 14 MB took 1.000 seconds, 13.818 MB/s +HMAC-SHA256 15 MB took 1.001 seconds, 14.951 MB/s +HMAC-SHA384 17 MB took 1.001 seconds, 16.683 MB/s +HMAC-SHA512 17 MB took 1.000 seconds, 16.943 MB/s +RSA 2048 public 20 ops took 1.017 sec, avg 50.850 ms, 19.666 ops/sec +RSA 2048 private 4 ops took 1.059 sec, avg 264.750 ms, 3.777 ops/sec +ECC 256 key gen 4 ops took 1.092 sec, avg 273.000 ms, 3.663 ops/sec +ECDHE 256 agree 4 ops took 1.089 sec, avg 272.250 ms, 3.673 ops/sec +ECDSA 256 sign 4 ops took 1.101 sec, avg 275.250 ms, 3.633 ops/sec +ECDSA 256 verify 2 ops took 1.092 sec, avg 546.000 ms, 1.832 ops/sec +``` + +Condition : +- Model : ESP32-WROOM-32 +- CPU Speed: 240Mhz +- ESP-IDF : v3.3-beta1-39-g6cb37ecc5(commit hash : 6cb37ecc5) +- OS : Ubuntu 18.04.1 LTS (Bionic Beaver) + +## Support + +Email us at [support@wolfssl.com](mailto:support@wolfssl.com). diff --git a/client/wolfssl/wolfcrypt/src/port/Espressif/esp32_aes.c b/client/wolfssl/wolfcrypt/src/port/Espressif/esp32_aes.c new file mode 100644 index 0000000..f2fb8a5 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/Espressif/esp32_aes.c @@ -0,0 +1,299 @@ +/* esp32_aes.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#include +#include + +#ifdef HAVE_CONFIG_H + #include +#endif +#include + +#ifndef NO_AES + +#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_AES) + +#include +#include "wolfssl/wolfcrypt/port/Espressif/esp32-crypt.h" + +static const char* TAG = "wolf_hw_aes"; +/* mutex */ +static wolfSSL_Mutex aes_mutex; +static int espaes_CryptHwMutexInit = 0; + +/* +* lock hw engine. +* this should be called before using engine. +*/ +static int esp_aes_hw_InUse() +{ + int ret = 0; + + ESP_LOGV(TAG, "enter esp_aes_hw_InUse"); + + if(espaes_CryptHwMutexInit == 0) { + ret = esp_CryptHwMutexInit(&aes_mutex); + if(ret == 0){ + espaes_CryptHwMutexInit = 1; + } else { + ESP_LOGE(TAG, "aes mutx initialization failed."); + return -1; + } + } + /* lock hardware */ + ret = esp_CryptHwMutexLock(&aes_mutex, portMAX_DELAY); + if(ret != 0) { + ESP_LOGE(TAG, "aes engine lock failed."); + return -1; + } + /* Enable AES hardware */ + periph_module_enable(PERIPH_AES_MODULE); + + ESP_LOGV(TAG, "leave esp_aes_hw_InUse"); + return ret; +} + +/* +* release hw engine +*/ +static void esp_aes_hw_Leave( void ) +{ + ESP_LOGV(TAG, "enter esp_aes_hw_Leave"); + /* Disable AES hardware */ + periph_module_disable(PERIPH_AES_MODULE); + + /* unlock */ + esp_CryptHwMutexUnLock(&aes_mutex); + + ESP_LOGV(TAG, "leave esp_aes_hw_Leave"); +} + +/* + * set key to hardware key registers. + */ +static void esp_aes_hw_Set_KeyMode(Aes *ctx, ESP32_AESPROCESS mode) +{ + int i; + word32 mode_ = 0; + + ESP_LOGV(TAG, "enter esp_aes_hw_Set_KeyMode"); + + /* check mode */ + if(mode == ESP32_AES_UPDATEKEY_ENCRYPT) { + mode_ = 0; + } else if(mode == ESP32_AES_UPDATEKEY_DECRYPT){ + mode_ = 4; + } else { + ESP_LOGE(TAG, "unexpected error."); + return; + } + + /* update key */ + for(i=0;i<(ctx->keylen)/sizeof(word32);i++){ + DPORT_REG_WRITE(AES_KEY_BASE + (i*4), *(((word32*)ctx->key) + i)); + } + + /* mode + * 0 AES-128 Encryption + * 1 AES-192 Encryption + * 2 AES-256 Encryption + * 4 AES-128 Decryption + * 5 AES-192 Decryption + * 6 AES-256 Decryption + */ + switch(ctx->keylen){ + case 24: mode_ += 1; break; + case 32: mode_ += 2; break; + default: break; + } + + DPORT_REG_WRITE(AES_MODE_REG, mode_); + ESP_LOGV(TAG, "leave esp_aes_hw_Setkey"); +} + +/* + * Process a one block of AES + */ +static void esp_aes_bk(const byte* in, byte* out) +{ + const word32 *inwords = (const word32 *)in; + word32 *outwords = (word32 *)out; + + ESP_LOGV(TAG, "enter esp_aes_bk"); + + /* copy text for encrypting/decrypting blocks */ + DPORT_REG_WRITE(AES_TEXT_BASE, inwords[0]); + DPORT_REG_WRITE(AES_TEXT_BASE + 4, inwords[1]); + DPORT_REG_WRITE(AES_TEXT_BASE + 8, inwords[2]); + DPORT_REG_WRITE(AES_TEXT_BASE + 12, inwords[3]); + + /* start engine */ + DPORT_REG_WRITE(AES_START_REG, 1); + + /* wait until finishing the process */ + while(1) { + if(DPORT_REG_READ(AES_IDLE_REG) == 1) + break; + } + + /* read-out blocks */ + esp_dport_access_read_buffer(outwords, AES_TEXT_BASE, 4); + ESP_LOGV(TAG, "leave esp_aes_bk"); +} + +/* +* wc_esp32AesEncrypt +* @brief: a one block encrypt of the input block, into the output block +* @param aes: a pointer of the AES object used to encrypt data +* @param in : a pointer of the input buffer containing plain text to be encrypted +* @param out: a pointer of the output buffer in which to store the cipher text of +* the encrypted message +*/ +int wc_esp32AesEncrypt(Aes *aes, const byte* in, byte* out) +{ + ESP_LOGV(TAG, "enter wc_esp32AesEncrypt"); + /* lock the hw engine */ + esp_aes_hw_InUse(); + /* load the key into the register */ + esp_aes_hw_Set_KeyMode(aes, ESP32_AES_UPDATEKEY_ENCRYPT); + /* process a one block of AES */ + esp_aes_bk(in, out); + /* release hw */ + esp_aes_hw_Leave(); + return 0; +} +/* +* wc_esp32AesDecrypt +* @brief: a one block decrypt of the input block, into the output block +* @param aes: a pointer of the AES object used to decrypt data +* @param in : a pointer of the input buffer containing plain text to be decrypted +* @param out: a pointer of the output buffer in which to store the cipher text of +* the decrypted message +*/ +int wc_esp32AesDecrypt(Aes *aes, const byte* in, byte* out) +{ + ESP_LOGV(TAG, "enter wc_esp32AesDecrypt"); + /* lock the hw engine */ + esp_aes_hw_InUse(); + /* load the key into the register */ + esp_aes_hw_Set_KeyMode(aes, ESP32_AES_UPDATEKEY_DECRYPT); + /* process a one block of AES */ + esp_aes_bk(in, out); + /* release hw engine */ + esp_aes_hw_Leave(); + return 0; +} +/* +* wc_esp32AesCbcEncrypt +* @brief: Encrypts a plain text message from the input buffer, and places the +* resulting cipher text into the output buffer using cipher block chaining +* with AES. +* @param aes: a pointer of the AES object used to encrypt data +* @param out: a pointer of the output buffer in which to store the cipher text of +* the encrypted message +* @param in : a pointer of the input buffer containing plain text to be encrypted +* @param sz : size of input message +*/ +int wc_esp32AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + int i; + int offset = 0; + word32 blocks = (sz / AES_BLOCK_SIZE); + byte *iv; + byte temp_block[AES_BLOCK_SIZE]; + + ESP_LOGV(TAG, "enter wc_esp32AesCbcEncrypt"); + + iv = (byte*)aes->reg; + + esp_aes_hw_InUse(); + + esp_aes_hw_Set_KeyMode(aes, ESP32_AES_UPDATEKEY_ENCRYPT); + + while (blocks--) { + XMEMCPY(temp_block, in + offset, AES_BLOCK_SIZE); + + /* XOR block with IV for CBC */ + for (i = 0; i < AES_BLOCK_SIZE; i++) + temp_block[i] ^= iv[i]; + + esp_aes_bk(temp_block, (out + offset)); + + offset += AES_BLOCK_SIZE; + + /* store IV for next block */ + XMEMCPY(iv, out + offset - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + } + + esp_aes_hw_Leave(); + ESP_LOGV(TAG, "leave wc_esp32AesCbcEncrypt"); + return 0; +} +/* +* wc_esp32AesCbcDecrypt +* @brief: Encrypts a plain text message from the input buffer, and places the +* resulting cipher text into the output buffer using cipher block chaining +* with AES. +* @param aes: a pointer of the AES object used to decrypt data +* @param out: a pointer of the output buffer in which to store the cipher text of +* the decrypted message +* @param in : a pointer of the input buffer containing plain text to be decrypted +* @param sz : size of input message +*/ +int wc_esp32AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + int i; + int offset = 0; + word32 blocks = (sz / AES_BLOCK_SIZE); + byte* iv; + byte temp_block[AES_BLOCK_SIZE]; + + ESP_LOGV(TAG, "enter wc_esp32AesCbcDecrypt"); + + iv = (byte*)aes->reg; + + esp_aes_hw_InUse(); + + esp_aes_hw_Set_KeyMode(aes, ESP32_AES_UPDATEKEY_DECRYPT); + + while (blocks--) { + XMEMCPY(temp_block, in + offset, AES_BLOCK_SIZE); + + esp_aes_bk((in + offset), (out + offset)); + + /* XOR block with IV for CBC */ + for (i = 0; i < AES_BLOCK_SIZE; i++) + (out + offset)[i] ^= iv[i]; + + /* store IV for next block */ + XMEMCPY(iv, temp_block, AES_BLOCK_SIZE); + + offset += AES_BLOCK_SIZE; + } + + esp_aes_hw_Leave(); + ESP_LOGV(TAG, "leave wc_esp32AesCbcDecrypt"); + return 0; +} + +#endif /* WOLFSSL_ESP32WROOM32_CRYPT */ +#endif /* NO_AES */ diff --git a/client/wolfssl/wolfcrypt/src/port/Espressif/esp32_mp.c b/client/wolfssl/wolfcrypt/src/port/Espressif/esp32_mp.c new file mode 100644 index 0000000..2174089 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/Espressif/esp32_mp.c @@ -0,0 +1,514 @@ +/* esp32_mp.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ +#include +#include + +#ifdef HAVE_CONFIG_H + #include +#endif +#include + +#include "wolfssl/wolfcrypt/logging.h" + +#if !defined(NO_RSA) || defined(HAVE_ECC) + +#if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif +#include + +static const char* const TAG = "wolfssl_mp"; + +#define ESP_HW_RSAMAX_BIT 4096 +#define ESP_HW_MULTI_RSAMAX_BITS 2048 +#define ESP_HW_RSAMIN_BIT 512 +#define BYTE_TO_WORDS(s) (((s+3)>>2)) /* (s+(4-1))/ 4 */ +#define BITS_TO_WORDS(s) (((s+31)>>3)>>2) /* (s+(32-1))/ 8/ 4*/ + +#define MP_NG -1 + +/* mutex */ +static wolfSSL_Mutex mp_mutex; +static int espmp_CryptHwMutexInit = 0; +/* +* check if the hw is ready before accessing it +*/ +static int esp_mp_hw_wait_clean() +{ + int timeout = 0; + while(++timeout < ESP_RSA_TIMEOUT && DPORT_REG_READ(RSA_CLEAN_REG) != 1){} + + if(timeout >= ESP_RSA_TIMEOUT) { + ESP_LOGE(TAG, "waiting hw ready is time-outed."); + return MP_NG; + } + return MP_OKAY; +} +/* +* lock hw engine. +* this should be called before using engine. +*/ +static int esp_mp_hw_lock() +{ + int ret = 0; + + if(espmp_CryptHwMutexInit == 0) { + ret = esp_CryptHwMutexInit(&mp_mutex); + if(ret == 0){ + espmp_CryptHwMutexInit = 1; + } else { + ESP_LOGE(TAG, "mp mutx initialization failed."); + return MP_NG; + } + } + /* lock hardware */ + ret = esp_CryptHwMutexLock(&mp_mutex, portMAX_DELAY); + if(ret != 0) { + ESP_LOGE(TAG, "mp engine lock failed."); + return MP_NG; + } + /* Enable RSA hardware */ + periph_module_enable(PERIPH_RSA_MODULE); + + return ret; +} +/* +* Release hw engine +*/ +static void esp_mp_hw_unlock( void ) +{ + /* Disable RSA hardware */ + periph_module_disable(PERIPH_RSA_MODULE); + + /* unlock */ + esp_CryptHwMutexUnLock(&mp_mutex); +} +/* this is based on an article by Cetin Kaya Koc, A New Algorithm for Inversion*/ +/* mod p^k, June 28 2017. */ +static int esp_calc_Mdash(mp_int *M, word32 k, mp_digit* md) +{ + int i; + int xi; + int b0 = 1; + int bi; + word32 N = 0; + word32 x; + + N = M->dp[0]; + bi = b0; + x = 0; + + for(i = 0; i < k; i++) { + xi = bi % 2; + if(xi < 0){ + xi *= -1; + } + bi = (bi - N * xi) / 2; + x |= (xi << i); + } + /* 2's complement */ + *md = ~x + 1; + return MP_OKAY; +} +/* start hw process */ +static void process_start(word32 reg) +{ + /* clear interrupt */ + DPORT_REG_WRITE(RSA_INTERRUPT_REG, 1); + /* start process */ + DPORT_REG_WRITE(reg, 1); +} +/* wait until done */ +static int wait_uitil_done(word32 reg) +{ + int timeout = 0; + /* wait until done && not timeout */ + while(1) { + if(++timeout < ESP_RSA_TIMEOUT && DPORT_REG_READ(reg) == 1){ + break; + } + } + + /* clear interrupt */ + DPORT_REG_WRITE(RSA_INTERRUPT_REG, 1); + + if(timeout >= ESP_RSA_TIMEOUT) { + ESP_LOGE(TAG, "rsa operation is time-outed."); + return MP_NG; + } + + return MP_OKAY; +} +/* read data from memory into mp_init */ +static void esp_memblock_to_mpint(word32 mem_address, mp_int* mp, word32 numwords) +{ + esp_dport_access_read_buffer((uint32_t*)mp->dp, mem_address, numwords); + mp->used = numwords; +} + +/* write mp_init into memory block */ +static void esp_mpint_to_memblock(word32 mem_address, const mp_int* mp, + const word32 bits, + const word32 hwords) +{ + word32 i; + word32 len = (bits / 8 + ((bits & 7) != 0 ? 1 : 0)); + + len = (len+sizeof(word32)-1)/sizeof(word32); + + for(i=0;i < hwords; i++) { + if(i < len) { + DPORT_REG_WRITE(mem_address + (i * sizeof(word32)), mp->dp[i]); + } else { + DPORT_REG_WRITE(mem_address + (i * sizeof(word32)), 0); + } + } +} +/* return needed hw words. */ +/* supported words length */ +/* words : {16 , 32, 48, 64, 80, 96, 112, 128} */ +/* bits : {512,1024, 1536, 2048, 2560, 3072, 3584, 4096} */ +static word32 words2hwords(word32 wd) +{ + const word32 shit_ = 4; + + return (((wd + 0xf)>>shit_)<sign == Y->sign)? MP_ZPOS : MP_NEG; + + word32 Xs; + word32 Ys; + word32 Zs; + word32 maxWords_sz; + word32 hwWords_sz; + + /* ask bits number */ + Xs = mp_count_bits(X); + Ys = mp_count_bits(Y); + Zs = Xs + Ys; + + /* maximum bits and words for writing to hw */ + maxWords_sz = bits2words(max(Xs, Ys)); + hwWords_sz = words2hwords(maxWords_sz); + + /* sanity check */ + if((hwWords_sz<<5) > ESP_HW_MULTI_RSAMAX_BITS) { + ESP_LOGW(TAG, "exceeds max bit length(2048)"); + return -2; + } + + /*Steps to use hw in the following order: + * 1. wait until clean hw engine + * 2. Write(2*N/512bits - 1 + 8) to MULT_MODE_REG + * 3. Write X and Y to memory blocks + * need to write data to each memory block only according to the length + * of the number. + * 4. Write 1 to MUL_START_REG + * 5. Wait for the first operation to be done. Poll INTERRUPT_REG until it reads 1. + * (Or until the INTER interrupt is generated.) + * 6. Write 1 to RSA_INTERRUPT_REG to clear the interrupt. + * 7. Read the Z from RSA_Z_MEM + * 8. Write 1 to RSA_INTERUPT_REG to clear the interrupt. + * 9. Release the hw engine + */ + /* lock hw for use */ + if((ret = esp_mp_hw_lock()) != MP_OKAY) + return ret; + + if((ret = esp_mp_hw_wait_clean()) != MP_OKAY){ + return ret; + } + + /* step.1 (2*N/512) => N/256. 512 bits => 16 words */ + DPORT_REG_WRITE(RSA_MULT_MODE_REG, (hwWords_sz >> 3) - 1 + 8); + /* step.2 write X, M and r_inv into memory */ + esp_mpint_to_memblock(RSA_MEM_X_BLOCK_BASE, X, Xs, hwWords_sz); + /* Y(let-extend) */ + esp_mpint_to_memblock(RSA_MEM_Z_BLOCK_BASE + (hwWords_sz<<2), Y, Ys, hwWords_sz); + /* step.3 start process */ + process_start(RSA_MULT_START_REG); + + /* step.4,5 wait until done */ + wait_uitil_done(RSA_INTERRUPT_REG); + /* step.6 read the result form MEM_Z */ + esp_memblock_to_mpint(RSA_MEM_Z_BLOCK_BASE, Z, BITS_TO_WORDS(Zs)); + + /* step.7 clear and release hw */ + esp_mp_hw_unlock(); + + Z->sign = (Z->used > 0)? neg : MP_ZPOS; + + return ret; +} +/* Z = X * Y (mod M) */ +int esp_mp_mulmod(fp_int* X, fp_int* Y, fp_int* M, fp_int* Z) +{ + int ret = 0; + int negcheck = 0; + word32 Xs; + word32 Ys; + word32 Ms; + word32 maxWords_sz; + word32 hwWords_sz; + word32 zwords; + + mp_int r_inv; + mp_int tmpZ; + mp_digit mp; + + /* neg check */ + if(X->sign != Y->sign) { + /* X*Y becomes negative */ + negcheck = 1; + } + /* ask bits number */ + Xs = mp_count_bits(X); + Ys = mp_count_bits(Y); + Ms = mp_count_bits(M); + + /* maximum bits and words for writing to hw */ + maxWords_sz = bits2words(max(Xs, max(Ys, Ms))); + zwords = bits2words(min(Ms, Xs + Ys)); + hwWords_sz = words2hwords(maxWords_sz); + + if((hwWords_sz<<5) > ESP_HW_RSAMAX_BIT) { + ESP_LOGE(TAG, "exceeds hw maximum bits"); + return -2; + } + /* calculate r_inv = R^2 mode M + * where: R = b^n, and b = 2^32 + * accordingly R^2 = 2^(n*32*2) + */ + ret = mp_init_multi(&tmpZ, &r_inv, NULL, NULL, NULL, NULL); + if(ret == 0 && (ret = esp_get_rinv(&r_inv, M, (hwWords_sz<<6))) != MP_OKAY) { + ESP_LOGE(TAG, "calculate r_inv failed."); + mp_clear(&tmpZ); + mp_clear(&r_inv); + return ret; + } + /* lock hw for use */ + if((ret = esp_mp_hw_lock()) != MP_OKAY){ + mp_clear(&tmpZ); + mp_clear(&r_inv); + return ret; + } + /* Calculate M' */ + if((ret = esp_calc_Mdash(M, 32/* bits */, &mp)) != MP_OKAY) { + ESP_LOGE(TAG, "failed to calculate M dash"); + mp_clear(&tmpZ); + mp_clear(&r_inv); + return -1; + } + /*Steps to use hw in the following order: + * 1. wait until clean hw engine + * 2. Write(N/512bits - 1) to MULT_MODE_REG + * 3. Write X,M(=G, X, P) to memory blocks + * need to write data to each memory block only according to the length + * of the number. + * 4. Write M' to M_PRIME_REG + * 5. Write 1 to MODEXP_START_REG + * 6. Wait for the first operation to be done. Poll INTERRUPT_REG until it reads 1. + * (Or until the INTER interrupt is generated.) + * 7. Write 1 to RSA_INTERRUPT_REG to clear the interrupt. + * 8. Write Y to RSA_X_MEM + * 9. Write 1 to RSA_MULT_START_REG + * 10. Wait for the second operation to be completed. Poll INTERRUPT_REG until it reads 1. + * 11. Read the Z from RSA_Z_MEM + * 12. Write 1 to RSA_INTERUPT_REG to clear the interrupt. + * 13. Release the hw engine + */ + + if((ret = esp_mp_hw_wait_clean()) != MP_OKAY){ + return ret; + } + /* step.1 512 bits => 16 words */ + DPORT_REG_WRITE(RSA_MULT_MODE_REG, (hwWords_sz >> 4) - 1); + + /* step.2 write X, M and r_inv into memory */ + esp_mpint_to_memblock(RSA_MEM_X_BLOCK_BASE, X, Xs, hwWords_sz); + esp_mpint_to_memblock(RSA_MEM_M_BLOCK_BASE, M, Ms, hwWords_sz); + esp_mpint_to_memblock(RSA_MEM_Z_BLOCK_BASE, &r_inv, mp_count_bits(&r_inv), + hwWords_sz); + /* step.3 write M' into memory */ + DPORT_REG_WRITE(RSA_M_DASH_REG, mp); + /* step.4 start process */ + process_start(RSA_MULT_START_REG); + + /* step.5,6 wait until done */ + wait_uitil_done(RSA_INTERRUPT_REG); + /* step.7 Y to MEM_X */ + esp_mpint_to_memblock(RSA_MEM_X_BLOCK_BASE, Y, Ys, hwWords_sz); + + /* step.8 start process */ + process_start(RSA_MULT_START_REG); + + /* step.9,11 wait until done */ + wait_uitil_done(RSA_INTERRUPT_REG); + + /* step.12 read the result from MEM_Z */ + esp_memblock_to_mpint(RSA_MEM_Z_BLOCK_BASE, &tmpZ, zwords); + + /* step.13 clear and release hw */ + esp_mp_hw_unlock(); + + /* additional steps */ + /* this needs for known issue when Z is greater than M */ + if(mp_cmp(&tmpZ, M)==FP_GT) { + /* Z -= M */ + mp_sub(&tmpZ, M, &tmpZ); + } + if(negcheck) { + mp_sub(M, &tmpZ, &tmpZ); + } + + mp_copy(&tmpZ, Z); + + mp_clear(&tmpZ); + mp_clear(&r_inv); + + return ret; +} +/* Z = X^Y mod M */ +int esp_mp_exptmod(fp_int* X, fp_int* Y, word32 Ys, fp_int* M, fp_int* Z) +{ + int ret = 0; + + word32 Xs; + word32 Ms; + word32 maxWords_sz; + word32 hwWords_sz; + + mp_int r_inv; + mp_digit mp; + + /* ask bits number */ + Xs = mp_count_bits(X); + Ms = mp_count_bits(M); + /* maximum bits and words for writing to hw */ + maxWords_sz = bits2words(max(Xs, max(Ys, Ms))); + hwWords_sz = words2hwords(maxWords_sz); + + if((hwWords_sz<<5) > ESP_HW_RSAMAX_BIT) { + ESP_LOGE(TAG, "exceeds hw maximum bits"); + return -2; + } + /* calculate r_inv = R^2 mode M + * where: R = b^n, and b = 2^32 + * accordingly R^2 = 2^(n*32*2) + */ + ret = mp_init(&r_inv); + if(ret == 0 && (ret = esp_get_rinv(&r_inv, M, (hwWords_sz<<6))) != MP_OKAY) { + ESP_LOGE(TAG, "calculate r_inv failed."); + mp_clear(&r_inv); + return ret; + } + /* lock and init the hw */ + if((ret = esp_mp_hw_lock()) != MP_OKAY) { + mp_clear(&r_inv); + return ret; + } + /* calc M' */ + /* if Pm is odd, uses mp_montgomery_setup() */ + if((ret = esp_calc_Mdash(M, 32/* bits */, &mp)) != MP_OKAY) { + ESP_LOGE(TAG, "failed to calculate M dash"); + mp_clear(&r_inv); + return -1; + } + + /*Steps to use hw in the following order: + * 1. Write(N/512bits - 1) to MODEXP_MODE_REG + * 2. Write X, Y, M and r_inv to memory blocks + * need to write data to each memory block only according to the length + * of the number. + * 3. Write M' to M_PRIME_REG + * 4. Write 1 to MODEXP_START_REG + * 5. Wait for the operation to be done. Poll INTERRUPT_REG until it reads 1. + * (Or until the INTER interrupt is generated.) + * 6. Read the result Z(=Y) from Z_MEM + * 7. Write 1 to INTERRUPT_REG to clear the interrupt. + */ + if((ret = esp_mp_hw_wait_clean()) != MP_OKAY){ + return ret; + } + + /* step.1 */ + DPORT_REG_WRITE(RSA_MODEXP_MODE_REG, (hwWords_sz >> 4) - 1); + /* step.2 write G, X, P, r_inv and M' into memory */ + esp_mpint_to_memblock(RSA_MEM_X_BLOCK_BASE, X, Xs, hwWords_sz); + esp_mpint_to_memblock(RSA_MEM_Y_BLOCK_BASE, Y, Ys, hwWords_sz); + esp_mpint_to_memblock(RSA_MEM_M_BLOCK_BASE, M, Ms, hwWords_sz); + esp_mpint_to_memblock(RSA_MEM_Z_BLOCK_BASE, &r_inv, mp_count_bits(&r_inv), + hwWords_sz); + /* step.3 write M' into memory */ + DPORT_REG_WRITE(RSA_M_DASH_REG, mp); + /* step.4 start process */ + process_start(RSA_START_MODEXP_REG); + + /* step.5 wait until done */ + wait_uitil_done(RSA_INTERRUPT_REG); + /* step.6 read a result form memory */ + esp_memblock_to_mpint(RSA_MEM_Z_BLOCK_BASE, Z, BITS_TO_WORDS(Ms)); + /* step.7 clear and release hw */ + esp_mp_hw_unlock(); + + mp_clear(&r_inv); + + return ret; +} +#endif /* !NO_RSA || HAVE_ECC */ +#endif /* (WOLFSS_ESP32WROOM32_CRYPT) && (NO_WOLFSSL_ESP32WROOM32_CRYPT_RES_PRI)*/ diff --git a/client/wolfssl/wolfcrypt/src/port/Espressif/esp32_sha.c b/client/wolfssl/wolfcrypt/src/port/Espressif/esp32_sha.c new file mode 100644 index 0000000..94789cd --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/Espressif/esp32_sha.c @@ -0,0 +1,434 @@ +/* esp32_sha.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ +#include +#include + +#ifdef HAVE_CONFIG_H + #include +#endif +#include + +#if !defined(NO_SHA) || !defined(NO_SHA256) || defined(WC_SHA384) || \ + defined(WC_SHA512) + +#include "wolfssl/wolfcrypt/logging.h" + + +#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + +#include +#include +#include + +#include "wolfssl/wolfcrypt/port/Espressif/esp32-crypt.h" +#include "wolfssl/wolfcrypt/error-crypt.h" + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +static const char* TAG = "wolf_hw_sha"; +/* continue register offset */ +#define CONTINUE_REG_OFFSET (0x04) /* start_reg + 0x04 */ + +#ifdef NO_SHA + #define WC_SHA_DIGEST_SIZE 20 +#endif +/* mutex */ +#if defined(SINGLE_THREADED) +static int InUse = 0; +#else +static wolfSSL_Mutex sha_mutex; +static int espsha_CryptHwMutexInit = 0; +#endif +/* + enum SHA_TYPE { + SHA1 = 0, + SHA2_256, + SHA2_384, + SHA2_512, + SHA_INVALID = -1, + }; +*/ +static word32 esp_sha_digest_size(enum SHA_TYPE type) +{ + ESP_LOGV(TAG, "enter esp_sha_digest_size"); + + switch(type){ +#ifndef NO_SHA + case SHA1: + return WC_SHA_DIGEST_SIZE; +#endif +#ifndef NO_SHA256 + case SHA2_256: + return WC_SHA256_DIGEST_SIZE; +#endif +#ifdef WOLFSSL_SHA384 + case SHA2_384: + return WC_SHA384_DIGEST_SIZE; +#endif +#ifdef WOLFSSL_SHA512 + case SHA2_512: + return WC_SHA512_DIGEST_SIZE; +#endif + default: + ESP_LOGE(TAG, "Bad sha type"); + return WC_SHA_DIGEST_SIZE; + } + ESP_LOGV(TAG, "leave esp_sha_digest_size"); +} +/* +* wait until engines becomes idle +*/ +static void esp_wait_until_idle() +{ + while((DPORT_REG_READ(SHA_1_BUSY_REG) !=0) || + (DPORT_REG_READ(SHA_256_BUSY_REG)!=0) || + (DPORT_REG_READ(SHA_384_BUSY_REG)!=0) || + (DPORT_REG_READ(SHA_512_BUSY_REG)!=0)){ } +} +/* +* lock hw engine. +* this should be called before using engine. +*/ +int esp_sha_try_hw_lock(WC_ESP32SHA* ctx) +{ + int ret = 0; + + ESP_LOGV(TAG, "enter esp_sha_hw_lock"); + + /* Init mutex */ +#if defined(SINGLE_THREADED) + if(ctx->mode == ESP32_SHA_INIT) { + if(!InUse) { + ctx->mode = ESP32_SHA_HW; + InUse = 1; + } else { + ctx->mode = ESP32_SHA_SW; + } + } else { + /* this should not happens */ + ESP_LOGE(TAG, "unexpected error in esp_sha_try_hw_lock."); + return -1; + } +#else + if(espsha_CryptHwMutexInit == 0){ + ret = esp_CryptHwMutexInit(&sha_mutex); + if(ret == 0) { + espsha_CryptHwMutexInit = 1; + } else { + ESP_LOGE(TAG, " mutex initialization failed."); + ctx->mode = ESP32_SHA_SW; + return 0; + } + } + /* check if this sha has been operated as sw or hw, or not yet init */ + if(ctx->mode == ESP32_SHA_INIT){ + /* try to lock the hw engine */ + if(esp_CryptHwMutexLock(&sha_mutex, (TickType_t)0) == 0) { + ctx->mode = ESP32_SHA_HW; + } else { + ESP_LOGI(TAG, "someone used. hw is locked....."); + ESP_LOGI(TAG, "the rest of operation will use sw implementation for this sha"); + ctx->mode = ESP32_SHA_SW; + return 0; + } + } else { + /* this should not happens */ + ESP_LOGE(TAG, "unexpected error in esp_sha_try_hw_lock."); + return -1; + } +#endif + /* Enable SHA hardware */ + periph_module_enable(PERIPH_SHA_MODULE); + + ESP_LOGV(TAG, "leave esp_sha_hw_lock"); + return ret; +} +/* +* release hw engine +*/ +void esp_sha_hw_unlock( void ) +{ + ESP_LOGV(TAG, "enter esp_sha_hw_unlock"); + + /* Disable AES hardware */ + periph_module_disable(PERIPH_SHA_MODULE); +#if defined(SINGLE_THREADED) + InUse = 0; +#else + /* unlock hw engine for next use */ + esp_CryptHwMutexUnLock(&sha_mutex); +#endif + ESP_LOGV(TAG, "leave esp_sha_hw_unlock"); +} +/* +* start sha process by using hw engine +*/ +static void esp_sha_start_process(WC_ESP32SHA* sha, word32 address) +{ + ESP_LOGV(TAG, "enter esp_sha_start_process"); + + if(sha->isfirstblock){ + /* start first message block */ + DPORT_REG_WRITE(address, 1); + sha->isfirstblock = 0; + } else { + /* CONTINU_REG */ + DPORT_REG_WRITE(address + CONTINUE_REG_OFFSET , 1); + } + + ESP_LOGV(TAG, "leave esp_sha_start_process"); +} +/* +* process message block +*/ +static void esp_process_block(WC_ESP32SHA* ctx, word32 address, + const word32* data, word32 len) +{ + int i; + + ESP_LOGV(TAG, "enter esp_process_block"); + + /* check if there are any busy engine */ + esp_wait_until_idle(); + /* load message data into hw */ + for(i=0;i<((len)/(sizeof(word32)));++i){ + DPORT_REG_WRITE(SHA_TEXT_BASE+(i*sizeof(word32)),*(data+i)); + } + /* notify hw to start process */ + esp_sha_start_process(ctx, address); + + ESP_LOGV(TAG, "leave esp_process_block"); +} +/* +* retrieve sha digest from memory +*/ +static void esp_digest_state(WC_ESP32SHA* ctx, byte* hash, enum SHA_TYPE sha_type) +{ + /* registers */ + word32 SHA_LOAD_REG = SHA_1_LOAD_REG; + word32 SHA_BUSY_REG = SHA_1_BUSY_REG; + + ESP_LOGV(TAG, "enter esp_digest_state"); + + /* sanity check */ + if(sha_type == SHA_INVALID) { + ESP_LOGE(TAG, "unexpected error. sha_type is invalid."); + return; + } + + SHA_LOAD_REG += (sha_type << 4); + SHA_BUSY_REG += (sha_type << 4); + + if(ctx->isfirstblock == 1){ + /* no hardware use yet. Nothing to do yet */ + return ; + } + + /* wait until idle */ + esp_wait_until_idle(); + + /* LOAD final digest */ + DPORT_REG_WRITE(SHA_LOAD_REG, 1); + /* wait until done */ + while(DPORT_REG_READ(SHA_BUSY_REG) == 1){ } + + esp_dport_access_read_buffer((word32*)(hash), SHA_TEXT_BASE, + esp_sha_digest_size(sha_type)/sizeof(word32)); + +#if defined(WOLFSSL_SHA512) || defined(WOLFSSL_SHA384) + if(sha_type==SHA2_384||sha_type==SHA2_512) { + word32 i; + word32* pwrd1 = (word32*)(hash); + /* swap value */ + for(i = 0; i ctx, SHA_START_REG, (const word32*)data, + WC_SHA_BLOCK_SIZE); + + ESP_LOGV(TAG, "leave esp_sha_process"); + return ret; +} +/* +* retrieve sha1 digest +*/ +int esp_sha_digest_process(struct wc_Sha* sha, byte blockproc) +{ + int ret = 0; + + ESP_LOGV(TAG, "enter esp_sha_digest_process"); + + if(blockproc) { + word32 SHA_START_REG = SHA_1_START_REG; + + esp_process_block(&sha->ctx, SHA_START_REG, sha->buffer, + WC_SHA_BLOCK_SIZE); + } + + esp_digest_state(&sha->ctx, (byte*)sha->digest, SHA1); + + ESP_LOGV(TAG, "leave esp_sha_digest_process"); + + return ret; +} +#endif /* NO_SHA */ + + +#ifndef NO_SHA256 +/* +* sha256 process +*/ +int esp_sha256_process(struct wc_Sha256* sha, const byte* data) +{ + int ret = 0; + word32 SHA_START_REG = SHA_1_START_REG; + + ESP_LOGV(TAG, "enter esp_sha256_process"); + + /* start register offset */ + SHA_START_REG += (SHA2_256 << 4); + + esp_process_block(&sha->ctx, SHA_START_REG, (const word32*)data, + WC_SHA256_BLOCK_SIZE); + + ESP_LOGV(TAG, "leave esp_sha256_process"); + + return ret; +} +/* +* retrieve sha256 digest +*/ +int esp_sha256_digest_process(struct wc_Sha256* sha, byte blockproc) +{ + int ret = 0; + + ESP_LOGV(TAG, "enter esp_sha256_digest_process"); + + if(blockproc) { + word32 SHA_START_REG = SHA_1_START_REG + (SHA2_256 << 4); + + esp_process_block(&sha->ctx, SHA_START_REG, sha->buffer, + WC_SHA256_BLOCK_SIZE); + } + + esp_digest_state(&sha->ctx, (byte*)sha->digest, SHA2_256); + + ESP_LOGV(TAG, "leave esp_sha256_digest_process"); + return ret; +} +#endif /* NO_SHA256 */ + +#if defined(WOLFSSL_SHA512) || defined(WOLFSSL_SHA384) +/* +* sha512 proess. this is used for sha384 too. +*/ +void esp_sha512_block(struct wc_Sha512* sha, const word32* data, byte isfinal) +{ + enum SHA_TYPE sha_type = sha->ctx.sha_type; + word32 SHA_START_REG = SHA_1_START_REG; + + ESP_LOGV(TAG, "enter esp_sha512_block"); + /* start register offset */ + SHA_START_REG += (sha_type << 4); + + if(sha->ctx.mode == ESP32_SHA_SW){ + ByteReverseWords64(sha->buffer, sha->buffer, + WC_SHA512_BLOCK_SIZE); + if(isfinal){ + sha->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 2] = sha->hiLen; + sha->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 1] = sha->loLen; + } + + } else { + ByteReverseWords((word32*)sha->buffer, (word32*)sha->buffer, + WC_SHA512_BLOCK_SIZE); + if(isfinal){ + sha->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 2] = + rotlFixed64(sha->hiLen, 32U); + sha->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 1] = + rotlFixed64(sha->loLen, 32U); + } + + esp_process_block(&sha->ctx, SHA_START_REG, data, WC_SHA512_BLOCK_SIZE); + } + ESP_LOGV(TAG, "leave esp_sha512_block"); +} +/* +* sha512 process. this is used for sha384 too. +*/ +int esp_sha512_process(struct wc_Sha512* sha) +{ + word32 *data = (word32*)sha->buffer; + + ESP_LOGV(TAG, "enter esp_sha512_process"); + + esp_sha512_block(sha, data, 0); + + ESP_LOGV(TAG, "leave esp_sha512_process"); + return 0; +} +/* +* retrieve sha512 digest. this is used for sha384 too. +*/ +int esp_sha512_digest_process(struct wc_Sha512* sha, byte blockproc) +{ + ESP_LOGV(TAG, "enter esp_sha512_digest_process"); + + if(blockproc) { + word32* data = (word32*)sha->buffer; + + esp_sha512_block(sha, data, 1); + } + if(sha->ctx.mode != ESP32_SHA_SW) + esp_digest_state(&sha->ctx, (byte*)sha->digest, sha->ctx.sha_type); + + ESP_LOGV(TAG, "leave esp_sha512_digest_process"); + return 0; +} +#endif /* WOLFSSL_SHA512 || WOLFSSL_SHA384 */ +#endif /* WOLFSSL_ESP32WROOM32_CRYPT */ +#endif /* !defined(NO_SHA) ||... */ diff --git a/client/wolfssl/wolfcrypt/src/port/Espressif/esp32_util.c b/client/wolfssl/wolfcrypt/src/port/Espressif/esp32_util.c new file mode 100644 index 0000000..b501b5e --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/Espressif/esp32_util.c @@ -0,0 +1,67 @@ +/* esp32_util.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ +#include + +#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + (!defined(NO_AES) || !defined(NO_SHA) || !defined(NO_SHA256) ||\ + defined(WOLFSSL_SHA384) || defined(WOLFSSL_SHA512)) + +#include +#include + +int esp_CryptHwMutexInit(wolfSSL_Mutex* mutex) { + return wc_InitMutex(mutex); +} + +int esp_CryptHwMutexLock(wolfSSL_Mutex* mutex, TickType_t xBlockTime) { +#ifdef SINGLE_THREADED + return wc_LockMutex(mutex); +#else + return ((xSemaphoreTake( *mutex, xBlockTime ) == pdTRUE) ? 0 : BAD_MUTEX_E); +#endif +} + +int esp_CryptHwMutexUnLock(wolfSSL_Mutex* mutex) { + return wc_UnLockMutex(mutex); +} + +#endif + +#ifdef WOLFSSL_ESP32WROOM32_CRYPT_DEBUG + +#include "esp_timer.h" +#include "esp_log.h" + +static uint64_t startTime = 0; + + +void wc_esp32TimerStart() +{ + startTime = esp_timer_get_time(); +} + +uint64_t wc_esp32elapsedTime() +{ + /* return elapsed time since wc_esp32AesTimeStart() is called in us */ + return esp_timer_get_time() - startTime; +} + +#endif /*WOLFSSL_ESP32WROOM32_CRYPT_DEBUG */ diff --git a/client/wolfssl/wolfcrypt/src/port/Renesas/README.md b/client/wolfssl/wolfcrypt/src/port/Renesas/README.md new file mode 100644 index 0000000..ca60bc5 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/Renesas/README.md @@ -0,0 +1,176 @@ +# TSIP FIT Module port +Support for TSIP FIT driver for symmetric AES, SHA1/SHA256 hardware acceleration and TLS-linked capability including Root CA, the server certificate or intermediate certificate verification. + +## Overview +Renesas TSIP FIT module with wolfSSL by setting *WOLFSSL_RENESAS_TSIP* definition. + +Including the following examples: + +* simple tls_client/tls_server +* crypt test +* crypt benchmark + + The *user_settings.h* file enables some of the hardened settings. + +## Requirements +### 1. [Renesas TSIP FIT module](https://www.renesas.com/us/en/products/software-tools/software-os-middleware-driver/security-crypto/trusted-secure-ip-driver.html) +[FIT module](https://www.renesas.com/us/en/products/software-tools/software-os-middleware-driver/software-package/fit.html) +Note : The included example program is tested with TSIP FIT version **1.06**. + +### 2. [e2studio](https://www.renesas.com/us/en/products/software-tools/tools/ide/e2studio.html) + +### 3. Evaluation Board that supports TSIP +Note : The included example program is tested with [GR-ROSE](http://gadget.renesas.com/en/product/rose.html), which is classified to RX65N. + +## Setup and Build wolfSSL library + 1. Uncomment out #define WOLFSSL_RENESAS_TSIP in /path/to/wolfssl/wolfssl/wolfcrypt/settings.h + Uncomment out #define WOLFSSL_RENESAS_RX65N in /path/to/wolfssl/wolfssl/wolfcrypt/settings.h + 2. Open a project file at /path/to/wolfssl/IDE/Renesas/e2studio/Projects/wolfssl/ by e2studio and build to create wolfssl library +Note : Generating FIT module source files in advance are required to compile wolfSSL when enabling WOLFSSL_RENESAS_TSIP and WOLFSSL_RENESAS_RX65N. Please see for creating FIT module files at "Setup and Build and example program" in this readme below. + +To disable portions of the hardware acceleration you can optionally define: + +``` +/* Disabled SHA acceleration */ +#define NO_WOLFSSL_RENESAS_TSIP_CRYPT_HASH +/* Disabled TLS-linked acceleration */ +#define NO_WOLFSSL_RENESAS_TSIP_TLS_SESSION +``` +### Benchmarks +**Software only implementation:** +*block cipher* +``` +RNG 200 KB took 1.099 seconds, 182.000 KB/s +SHA 1 MB took 1.005 seconds, 1.166 MB/s +SHA-256 425 KB took 1.038 seconds, 409.520 KB/s +``` + +*TLS establishment time* +``` +TLS_RSA_WITH_AES_128_CBC_SHA : 0.651 (s) +TLS_RSA_WITH_AES_128_CBC_SHA256 : 0.651 (s) +TLS_RSA_WITH_AES_256_CBC_SHA : 0.642 (s) +TLS_RSA_WITH_AES_256_CBAC_SHA256 : 0.662 (s) +TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 : 2.050 (s) +``` +**Hardware acceleration:** +*block cipher* +``` +RNG 1 MB took 1.011 seconds, 1.038 MB/s +SHA 12 MB took 1.001 seconds, 11.515 MB/s +SHA-256 13 MB took 1.001 seconds, 12.900 MB/s +``` +*TLS establishment time with TLS-linked capability* +*Perform full TlS-linked capability* +``` +TLS_RSA_WITH_AES_128_CBC_SHA : 0.141 (s) +TLS_RSA_WITH_AES_128_CBC_SHA256 : 0.141 (s) +TLS_RSA_WITH_AES_256_CBC_SHA : 0.141 (s) +TLS_RSA_WITH_AES_256_CBAC_SHA256 : 0.144 (s) +``` +*Perform certificate verification by TSIP TLS-linked API* +``` +TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 : 1.721 (s) +``` +Condition: +Renesas : e2Studio v7.4.0 +ToolChain : Renesas CCRX version 3.00.00 +TSIP FIT : version 1.0.6 +Board : [GR-ROSE](http://gadget.renesas.com/en/product/rose.html) +wolfSSL : 4.1.0 + + +## Setup and Build an example program +An example program expects the following FIT modules: + +* r_bsp +* r_cmt_rx +* r_config +* r_ether_rx +* r_sys_time_rx +* r_t4_driver_rx +* r_t4_rx +* r_tsip_rx + +These needed source files can be generated by creating a dummy project including Renesas Smart Configurator as steps below: + + 1. Create a dummy project including Renesas Smart Configurator for your evaluation board type + 2. Open Smart Configurator and add FIT modules above + It would need to expand *User Stack Size* property and *Heap Size* of r_bsp. + Change IP ADDRESS and PORT NUMBER in r_t4_rx_config.h + `#define T4_CFG_FIXED_IP_ADDRESS_CH0 192,168,1,33` + `#define T4_CFG_TCP_REPID1_PORT_NUMBER 11111` + Note: It would need to modify other configuration base on evaluation board. + + When using GR-ROSE, you can choose "GR-ROSE" from "board" tab and "board" drop-down list and then is able to follow settings below: + + Go to component tab and open r_ether_rx properties: + Ethernet interface : RMII + The register bus of PHY0 for ETHER0/1: Use ETHER0 + Resource, ETHERC: Check ETHERC0_RMII + + Go to component tab and open r_t4_rx properties: + Enable/Disable DHCP function : 0 + IP address for ch0, when DHCP disable : 192,168,1,33 + TCP REPID1 prot number : 11111 + + Go to pins tab and select ethernet controller + Check to use pins + + 3. Generate source code +Now, it is able to copy these FIT modules into an example project. + 4. Make "smc_gen" folder under /path/to/wolfssl/IDE/Renesas/e2studio/Projects/test/src/ + 5. Copy the FIT modules into the folder that is created at step 4. + 6. Open an example project file at /path/to/wolfssl/IDE/Renesas/e2studio/Projects/test/ by e2studio + 7. Enable a macro definition in /path/to/wolfssl/IDE/Renesas/e2studio/Projects/test/src/wolfssl_demo.h for application type + `#define CRYPT_TEST // enable crypt test` + `#define BENCHMARK // enable benchmark application` + `#define TLS_CLIENT // enable simple tls client application` + `#define TLS_SERVER // enable simple tls server application` + `#define USE_TSIP_TLS // to inform user key and flash keying, when using TSIP` + Note: CRYPT_TEST and BENCHMARK can be enabled at the same time. TLS_CLIENT and TLS_SERVER cannot be enabled together other definitions. + 7. Setup debug configuration based on your debug hardware + +## Run client/server program on the device +When testing the embedded client or server on the device, it is recommended to test against one of the standard wolfSSL example application running on a desktop machine. + + +For the embedded client, an example server commands for running on a desktop machine, IP address 192.168.1.45, is as follows: +`$./example/server/server -b -d -i` + + +For the embedded server, an example client commands for running on a desktop machine is as follows: +`$./example/client/client -h 192.168.1.33 -p 11111` + +## Modify an example program +To use own TSIP keys for TSIP TLS-linked API use, it needs own flash keyring, PSS signed signature and RSA key. + +### Create flash keyring and use it in an example program + 1. Please follow the instruction at TSIP manual, chapter 7. Key Data Operations. + 2. Copy and paste s_flash[] data to s_flash[] data in example-program/key_data.c +`const uint32_t s_flash[] =` + +### Create RSA key pair for signing Root CA verification and use them in an example program + To use TSIP TLS-linked APIs, it needs RSA key pair and Root CA certificate bundle signature by RSA 2048 PSS with SHA256. + Shell and Perl script program in /path/to/wolfssl/IDE/Renesas/e2studio/Projects/tools/ can be used for the purpose. + + * generate_rsa_keypair.sh : generate RSA 2048 bit key pair. Show modulus and public exponent when specifying "-s" option + * rsa_pss_sign.sh : sign the file by the specified private key + * genhexbuf.pl : generate C header file including a byte array generated from the specified file in the script + + Modulus and public exponent showed by `generate_rsa_keypair.sh` can be used for input date to Renesas Secure Flash Programmer to generate encrypted RSA keys for TSIP TLS-linked API use. Please follow the instruction about how to generate RSA keys in the TSIP manual. + + + Generated byte array of signed signature by genhexbuf.pl can be replaced signature data in key_data.c of an example program. + + + Encrypted RSA key and generated byte array of signed signature need to be informed wolfSSL library before loading CA certification. Please see SetTsipTlskey() function an example program about how to inform them. + +### Coding + +In your application you must include before any other wolfSSL headers. If building the sources directly we recommend defining `WOLFSSL_USER_SETTINGS` and adding your own `user_settings.h` file. You can find a good reference for this in `/path/to/Renesas/e2studio/Projects/common/user_settings.h`. + +## Support + For question please email [support@wolfssl.com] + + diff --git a/client/wolfssl/wolfcrypt/src/port/Renesas/renesas_tsip_aes.c b/client/wolfssl/wolfcrypt/src/port/Renesas/renesas_tsip_aes.c new file mode 100644 index 0000000..ce04ff5 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/Renesas/renesas_tsip_aes.c @@ -0,0 +1,156 @@ +/* renesas_tsip_aes.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#include +#include + +#ifdef HAVE_CONFIG_H + #include +#endif +#include +#include + +#ifndef NO_AES + +#if defined(WOLFSSL_RENESAS_TSIP_CRYPT) && \ + !defined(NO_WOLFSSL_RENESAS_TSIP_CRYPT_AES) + +#include +#include + +#include +#include "wolfssl/wolfcrypt/port/Renesas/renesas-tsip-crypt.h" + +struct Aes; + +int wc_tsip_AesCbcEncrypt(struct Aes* aes, byte* out, const byte* in, word32 sz) +{ + tsip_aes_handle_t _handle; + word32 ret; + word32 blocks = (sz / AES_BLOCK_SIZE); + uint32_t dataLength; + byte *iv; + + if ((in == NULL) || (out == NULL) || (aes == NULL)) + return BAD_FUNC_ARG; + + /* while doing TLS handshake, TSIP driver keeps true-key and iv * + * on the device. iv is dummy */ + iv = (uint8_t*)aes->reg; + + if((ret = tsip_hw_lock()) != 0){ + WOLFSSL_MSG("Failed to lock"); + return ret; + } + + if (aes->ctx.keySize == 16) { + ret = R_TSIP_Aes128CbcEncryptInit(&_handle, &aes->ctx.tsip_keyIdx, iv); + } else if (aes->ctx.keySize == 32) { + ret = R_TSIP_Aes256CbcEncryptInit(&_handle, &aes->ctx.tsip_keyIdx, iv); + } else { + tsip_hw_unlock(); + return -1; + } + + while (ret == TSIP_SUCCESS && blocks--) { + + if (aes->ctx.keySize == 16) + ret = R_TSIP_Aes128CbcEncryptUpdate(&_handle, (uint8_t*)in, + (uint8_t*)out, (uint32_t)AES_BLOCK_SIZE); + else + ret = R_TSIP_Aes256CbcEncryptUpdate(&_handle, (uint8_t*)in, + (uint8_t*)out, (uint32_t)AES_BLOCK_SIZE); + + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + } + + if (ret == TSIP_SUCCESS) { + if (aes->ctx.keySize == 16) { + ret = R_TSIP_Aes128CbcEncryptFinal(&_handle, out, &dataLength); + } else { + ret = R_TSIP_Aes256CbcEncryptFinal(&_handle, out, &dataLength); + } + } else { + WOLFSSL_MSG("TSIP AES CBC encryption failed"); + ret = -1; + } + + tsip_hw_unlock(); + return ret; +} + +int wc_tsip_AesCbcDecrypt(struct Aes* aes, byte* out, const byte* in, word32 sz) +{ + tsip_aes_handle_t _handle; + word32 ret; + word32 blocks = (sz / AES_BLOCK_SIZE); + uint32_t dataLength; + byte *iv; + + if ((in == NULL) || (out == NULL) || (aes == NULL)) + return BAD_FUNC_ARG; + + iv = (uint8_t*)aes->reg; + + if((ret = tsip_hw_lock()) != 0){ + WOLFSSL_MSG("Failed to lock"); + return ret; + } + + if (aes->ctx.keySize == 16) { + ret = R_TSIP_Aes128CbcDecryptInit(&_handle, &aes->ctx.tsip_keyIdx, iv); + } else if (aes->ctx.keySize == 32) { + ret = R_TSIP_Aes256CbcDecryptInit(&_handle, &aes->ctx.tsip_keyIdx, iv); + } else { + tsip_hw_unlock(); + return -1; + } + + while (ret == TSIP_SUCCESS && blocks--) { + + if (aes->ctx.keySize == 16) + ret = R_TSIP_Aes128CbcDecryptUpdate(&_handle, (uint8_t*)in, + (uint8_t*)out, (uint32_t)AES_BLOCK_SIZE); + else + ret = R_TSIP_Aes256CbcDecryptUpdate(&_handle, (uint8_t*)in, + (uint8_t*)out, (uint32_t)AES_BLOCK_SIZE); + + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + } + + if (ret == TSIP_SUCCESS) { + if (aes->ctx.keySize == 16) + ret = R_TSIP_Aes128CbcDecryptFinal(&_handle, out, &dataLength); + else + ret = R_TSIP_Aes256CbcDecryptFinal(&_handle, out, &dataLength); + } else { + WOLFSSL_MSG("TSIP AES CBC decryption failed"); + ret = -1; + } + + tsip_hw_unlock(); + return ret; +} + +#endif /* WOLFSSL_RENESAS_TSIP_CRYPT */ +#endif /* NO_AES */ diff --git a/client/wolfssl/wolfcrypt/src/port/Renesas/renesas_tsip_sha.c b/client/wolfssl/wolfcrypt/src/port/Renesas/renesas_tsip_sha.c new file mode 100644 index 0000000..b12d8ee --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/Renesas/renesas_tsip_sha.c @@ -0,0 +1,274 @@ +/* renesas_tsip_sha.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ +#include +#include + +#ifdef HAVE_CONFIG_H + #include +#endif +#include + +#if !defined(NO_SHA) || !defined(NO_SHA256) + +#include + +#if defined(WOLFSSL_RENESAS_TSIP_CRYPT) + +#include +#include + +#if !defined(NO_SHA) +#include + +static void TSIPHashFree(wolfssl_TSIP_Hash* hash) +{ + if (hash == NULL) + return; + + if (hash->msg != NULL) { + XFREE(hash->msg, hash->heap, DYNAMIC_TYPE_TMP_BUFFER); + hash->msg = NULL; + } +} + +static int TSIPHashInit(wolfssl_TSIP_Hash* hash, void* heap, int devId, + word32 sha_type) +{ + if (hash == NULL) { + return BAD_FUNC_ARG; + } + + (void)devId; + XMEMSET(hash, 0, sizeof(wolfssl_TSIP_Hash)); + + hash->heap = heap; + hash->len = 0; + hash->used = 0; + hash->msg = NULL; + hash->sha_type = sha_type; + + return 0; +} + +static int TSIPHashUpdate(wolfssl_TSIP_Hash* hash, const byte* data, word32 sz) +{ + if (hash == NULL || (sz > 0 && data == NULL)) { + return BAD_FUNC_ARG; + } + + if (hash->len < hash->used + sz) { + if (hash->msg == NULL) { + hash->msg = (byte*)XMALLOC(hash->used + sz, hash->heap, + DYNAMIC_TYPE_TMP_BUFFER); + } else { +#ifdef FREERTOS + byte* pt = (byte*)XMALLOC(hash->used + sz, hash->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (pt == NULL) { + return MEMORY_E; + } + XMEMCPY(pt, hash->msg, hash->used); + XFREE(hash->msg, hash->heap, DYNAMIC_TYPE_TMP_BUFFER); + hash->msg = NULL; + hash->msg = pt; +#else + byte* pt = (byte*)XREALLOC(hash->msg, hash->used + sz, hash->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (pt == NULL) { + return MEMORY_E; + } + hash->msg = pt; +#endif + } + if (hash->msg == NULL) { + return MEMORY_E; + } + hash->len = hash->used + sz; + } + XMEMCPY(hash->msg + hash->used, data , sz); + hash->used += sz; + + return 0; +} + +static int TSIPHashFinal(wolfssl_TSIP_Hash* hash, byte* out, word32 outSz) +{ + int ret; + void* heap; + tsip_sha_md5_handle_t handle; + uint32_t sz; + + e_tsip_err_t (*Init)(tsip_sha_md5_handle_t*); + e_tsip_err_t (*Update)(tsip_sha_md5_handle_t*, uint8_t*, uint32_t); + e_tsip_err_t (*Final )(tsip_sha_md5_handle_t*, uint8_t*, uint32_t*); + + if (hash == NULL || out == NULL) { + return BAD_FUNC_ARG; + } + + if (hash->sha_type == TSIP_SHA1) { + Init = R_TSIP_Sha1Init; + Update = R_TSIP_Sha1Update; + Final = R_TSIP_Sha1Final; + } else if (hash->sha_type == TSIP_SHA256) { + Init = R_TSIP_Sha256Init; + Update = R_TSIP_Sha256Update; + Final = R_TSIP_Sha256Final; + } else + return BAD_FUNC_ARG; + + heap = hash->heap; + + tsip_hw_lock(); + + if (Init(&handle) == TSIP_SUCCESS) { + ret = Update(&handle, (uint8_t*)hash->msg, hash->used); + if (ret == TSIP_SUCCESS) { + ret = Final(&handle, out, (uint32_t*)&sz); + if (ret != TSIP_SUCCESS || sz != outSz) { + return ret; + } + } + } + tsip_hw_unlock(); + + TSIPHashFree(hash); + return TSIPHashInit(hash, heap, 0, hash->sha_type); +} + +static int TSIPHashGet(wolfssl_TSIP_Hash* hash, byte* out, word32 outSz) +{ + int ret; + tsip_sha_md5_handle_t handle; + uint32_t sz; + + e_tsip_err_t (*Init)(tsip_sha_md5_handle_t*); + e_tsip_err_t (*Update)(tsip_sha_md5_handle_t*, uint8_t*, uint32_t); + e_tsip_err_t (*Final )(tsip_sha_md5_handle_t*, uint8_t*, uint32_t*); + + if (hash == NULL || out == NULL) { + return BAD_FUNC_ARG; + } + + if (hash->sha_type == TSIP_SHA1) { + Init = R_TSIP_Sha1Init; + Update = R_TSIP_Sha1Update; + Final = R_TSIP_Sha1Final; + } else if (hash->sha_type == TSIP_SHA256) { + Init = R_TSIP_Sha256Init; + Update = R_TSIP_Sha256Update; + Final = R_TSIP_Sha256Final; + } else + return BAD_FUNC_ARG; + + tsip_hw_lock(); + + if (Init(&handle) == TSIP_SUCCESS) { + ret = Update(&handle, (uint8_t*)hash->msg, hash->used); + if (ret == TSIP_SUCCESS) { + ret = Final(&handle, out, &sz); + if (ret != TSIP_SUCCESS || sz != outSz) { + return ret; + } + } + } + + tsip_hw_unlock(); + + return 0; +} + +static int TSIPHashCopy(wolfssl_TSIP_Hash* src, wolfssl_TSIP_Hash* dst) +{ + if (src == NULL || dst == NULL) { + return BAD_FUNC_ARG; + } + + XMEMCPY(dst, src, sizeof(wolfssl_TSIP_Hash)); + + if (src->len > 0 && src->msg != NULL) { + dst->msg = (byte*)XMALLOC(src->len, dst->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (dst->msg == NULL) { + return MEMORY_E; + } + XMEMCPY(dst->msg, src->msg, src->len); + } + + return 0; +} + /* */ +int wc_InitSha_ex(wc_Sha* sha, void* heap, int devId) +{ + return TSIPHashInit(sha, heap, devId, TSIP_SHA1); +} + +int wc_ShaUpdate(wc_Sha* sha, const byte* in, word32 sz) +{ + return TSIPHashUpdate(sha, in, sz); +} + +int wc_ShaFinal(wc_Sha* sha, byte* hash) +{ + return TSIPHashFinal(sha, hash, WC_SHA_DIGEST_SIZE); +} + +int wc_ShaGetHash(wc_Sha* sha, byte* hash) +{ + return TSIPHashGet(sha, hash, WC_SHA_DIGEST_SIZE); +} + +int wc_ShaCopy(wc_Sha256* src, wc_Sha256* dst) +{ + return TSIPHashCopy(src, dst); +} +#endif /* !NO_SHA */ + +#if !defined(NO_SHA256) +#include + +/* */ +int wc_InitSha256_ex(wc_Sha256* sha, void* heap, int devId) +{ + return TSIPHashInit(sha, heap, devId, TSIP_SHA256); +} + +int wc_Sha256Update(wc_Sha256* sha, const byte* in, word32 sz) +{ + return TSIPHashUpdate(sha, in, sz); +} + +int wc_Sha256Final(wc_Sha256* sha, byte* hash) +{ + return TSIPHashFinal(sha, hash, WC_SHA256_DIGEST_SIZE); +} + +int wc_Sha256GetHash(wc_Sha256* sha, byte* hash) +{ + return TSIPHashGet(sha, hash, WC_SHA256_DIGEST_SIZE); +} + +int wc_Sha256Copy(wc_Sha256* src, wc_Sha256* dst) +{ + return TSIPHashCopy(src, dst); +} +#endif /* !NO_SHA256 */ +#endif /* WOLFSSL_RENESAS_TSIP_CRYPT */ +#endif /* #if !defined(NO_SHA) || !defined(NO_SHA256) */ diff --git a/client/wolfssl/wolfcrypt/src/port/Renesas/renesas_tsip_util.c b/client/wolfssl/wolfcrypt/src/port/Renesas/renesas_tsip_util.c new file mode 100644 index 0000000..e3cd7ad --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/Renesas/renesas_tsip_util.c @@ -0,0 +1,719 @@ +/* renesas_tsip_util.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ +#include + +#if defined(WOLFSSL_RENESAS_TSIP) + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +/* mutex */ +wolfSSL_Mutex tsip_mutex; +static int tsip_CryptHwMutexInit_ = 0; + +/* ./ca-cert.der.sign, */ +/* expect to have these variables defined at user application */ +extern uint32_t s_flash[]; +extern uint32_t s_inst1[R_TSIP_SINST_WORD_SIZE]; +extern uint32_t s_inst2[R_TSIP_SINST2_WORD_SIZE]; +static const byte *ca_cert_sig; + +/* user key */ +static tsip_key_data g_user_key_info; +/* tsip only keep one encrypted ca public key */ +#if defined(WOLFSSL_RENESAS_TSIP_TLS) +static uint32_t g_encrypted_publicCA_key[R_TSIP_SINST_WORD_SIZE]; +static uint32_t g_CAscm_Idx; /* index of CM table */ +#endif + +static int tsip_CryptHwMutexInit(wolfSSL_Mutex* mutex) { + return wc_InitMutex(mutex); +} + +static int tsip_CryptHwMutexLock(wolfSSL_Mutex* mutex) { + return wc_LockMutex(mutex); +} + +static int tsip_CryptHwMutexUnLock(wolfSSL_Mutex* mutex) { + return wc_UnLockMutex(mutex); +} + +/* +* lock hw engine. +* this should be called before using engine. +*/ +int tsip_hw_lock() +{ + int ret = 0; + + WOLFSSL_MSG("enter esp_sha_hw_lock"); + + if(tsip_CryptHwMutexInit_ == 0){ + ret = tsip_CryptHwMutexInit(&tsip_mutex); + if(ret == 0) { + tsip_CryptHwMutexInit_ = 1; + } else { + WOLFSSL_MSG(" mutex initialization failed."); + return -1; + } + } + if(tsip_CryptHwMutexLock(&tsip_mutex) != 0) { + /* this should not happens */ + return -1; + } + + WOLFSSL_MSG("leave tsip_sha_try_hw_lock"); + return ret; +} + +/* +* release hw engine +*/ +void tsip_hw_unlock( void ) +{ + WOLFSSL_MSG("enter tsip_hw_unlock"); + /* unlock hw engine for next use */ + tsip_CryptHwMutexUnLock(&tsip_mutex); + WOLFSSL_MSG("leave tsip_hw_unlock"); +} +/* check if tsip tls functions can be used for the cipher */ +/* cipher0 : in the some cipher suite, */ +/* first byte becomes greater than 0, otherwise 0x00 */ +/* side : CLIENT END or SEVER END */ +int tsip_useable(const struct WOLFSSL *ssl) +{ + byte cipher0; + byte cipher; + byte side; + + /* sanity check */ + if (ssl == NULL) + return BAD_FUNC_ARG; + + /* when rsa key index == NULL, tsip isn't used for cert verification. */ + /* in the case, we cannot use TSIP. */ + if (!ssl->peerTsipEncRsaKeyIndex) + return 0; + + /* when enabled Extended Master Secret, we cannot use TSIP. */ + if (ssl->options.haveEMS) + return 0; + + cipher0 = ssl->options.cipherSuite0; + cipher = ssl->options.cipherSuite; + side = ssl->options.side; + + if (cipher0 > 0x00) + return 0; + + if ((cipher == l_TLS_RSA_WITH_AES_128_CBC_SHA || + cipher == l_TLS_RSA_WITH_AES_128_CBC_SHA256 || + cipher == l_TLS_RSA_WITH_AES_256_CBC_SHA || + cipher == l_TLS_RSA_WITH_AES_256_CBC_SHA256) && + side == WOLFSSL_CLIENT_END) + return 1; + else + return 0; +} + +/* check if the g_alreadyVerified CA's key can be used for * + * peer's certification */ +byte tsip_checkCA(word32 cmIdx) +{ + return (cmIdx == g_CAscm_Idx? 1:0); +} + +/* check if the root CA has been verified by TSIP, * + * and it exists in the CM table. */ +byte tsip_rootCAverified( ) +{ + return (g_CAscm_Idx != (uint32_t)-1 ? 1:0); +} + +/* open TSIP driver for use */ +int tsip_Open( ) { + + int ret; + + if ((ret = tsip_hw_lock()) == 0) { + /* open the TSIP */ + ret = R_TSIP_Open((uint32_t*)s_flash, s_inst1, s_inst2); + if( ret != TSIP_SUCCESS ) { + WOLFSSL_MSG("RENESAS TSIP Open failed"); + } + +#if defined(WOLFSSL_RENESAS_TSIP_TLS) + /* generate TLS Rsa public key for Certificate verification */ + if (ret == TSIP_SUCCESS && g_user_key_info.encrypted_user_tls_key) { + ret = R_TSIP_GenerateTlsRsaPublicKeyIndex( + g_user_key_info.encrypted_session_key, + g_user_key_info.iv, + g_user_key_info.encrypted_user_tls_key, + &g_user_key_info.user_rsa2048_tls_pubindex); + + if (ret != TSIP_SUCCESS) { + WOLFSSL_MSG("R_TSIP_GenerateTlsRsaPublicKeyIndex failed"); + } else { + /* close once */ + tsip_Close( ); + /* open again with s_inst[] */ + XMEMCPY(s_inst1, + g_user_key_info.user_rsa2048_tls_pubindex.value, + sizeof(s_inst1)); + ret = R_TSIP_Open((uint32_t*)s_flash, s_inst1, s_inst2); + if (ret != TSIP_SUCCESS) { + WOLFSSL_MSG("R_TSIP_(Re)Open failed"); + } + /* init vars */ + g_CAscm_Idx = (uint32_t)-1; + } + } +#endif + /* unlock hw */ + tsip_hw_unlock(); + } else + WOLFSSL_MSG("Failed to lock tsip hw \n"); + + return ret; +} + +/* close TSIP driver */ +void tsip_Close( ) { + int ret; + + if ((ret = tsip_hw_lock()) == 0) { + /* close TSIP */ + ret = R_TSIP_Close(); +#if defined(WOLFSSL_RENESAS_TSIP_TLS) + g_CAscm_Idx = (uint32_t)-1; +#endif + /* unlock hw */ + tsip_hw_unlock(); + if( ret != TSIP_SUCCESS ) { + WOLFSSL_MSG("RENESAS TSIP Close failed"); + } + } else + WOLFSSL_MSG("Failed to unlock tsip hw \n"); +} + +/* Support functions for TSIP TLS Capability */ +#if defined(WOLFSSL_RENESAS_TSIP_TLS) + +/* to inform ca certificate sign */ +/* signature format expects RSA 2048 PSS with SHA256 */ +void tsip_inform_cert_sign(const byte *sign) +{ + if(sign) + ca_cert_sig = sign; +} + +/* inform user key */ +/* the function expects to be called from user application */ +/* user has to create these key information by Renesas tool in advance.*/ +void tsip_inform_user_keys( + byte *encrypted_session_key, + byte *iv, + byte *encrypted_user_tls_key +) +{ + g_user_key_info.encrypted_session_key = NULL; + g_user_key_info.iv = NULL; + g_user_key_info.encrypted_user_tls_key = NULL; + + if ( encrypted_session_key ) { + g_user_key_info.encrypted_session_key = encrypted_session_key; + } + if ( iv ) { + g_user_key_info.iv = iv; + } + if ( encrypted_user_tls_key ) { + g_user_key_info.encrypted_user_tls_key = encrypted_user_tls_key; + } +} + +#ifndef NO_WOLFSSL_RENESAS_TSIP_TLS_SESSION +/* convert def to tsip define */ +static byte _tls2tsipdef(byte cipher) +{ + byte def = R_TSIP_TLS_RSA_WITH_AES_128_CBC_SHA; + switch(cipher){ + case l_TLS_RSA_WITH_AES_128_CBC_SHA: + break; + case l_TLS_RSA_WITH_AES_128_CBC_SHA256: + def = R_TSIP_TLS_RSA_WITH_AES_128_CBC_SHA256; + break; + case l_TLS_RSA_WITH_AES_256_CBC_SHA: + def = R_TSIP_TLS_RSA_WITH_AES_256_CBC_SHA; + break; + case l_TLS_RSA_WITH_AES_256_CBC_SHA256: + def = R_TSIP_TLS_RSA_WITH_AES_256_CBC_SHA256; + break; + default:break; + } + return def; +} + +/* Sha1Hmac */ +int tsip_Sha1Hmac(const struct WOLFSSL *ssl, const byte *myInner, + word32 innerSz, const byte *in, word32 sz, byte *digest, + word32 verify) +{ + tsip_hmac_sha_handle_t _handle; + tsip_hmac_sha_key_index_t key_index; + int ret; + + if ((ssl == NULL) || (myInner == NULL) || (in == NULL) || + (digest == NULL)) + return BAD_FUNC_ARG; + + if ((ret = tsip_hw_lock()) != 0) { + WOLFSSL_MSG("hw lock failed\n"); + return ret; + } + + if ( (ssl->options.side == WOLFSSL_CLIENT_END && !verify) || + (ssl->options.side == WOLFSSL_SERVER_END && verify) ) + XMEMCPY(key_index.value, ssl->keys.tsip_client_write_MAC_secret, + sizeof(key_index.value)); + else + XMEMCPY(key_index.value, ssl->keys.tsip_server_write_MAC_secret, + sizeof(key_index.value)); + + ret = R_TSIP_Sha1HmacGenerateInit(&_handle, &key_index); + + if (ret == TSIP_SUCCESS) + ret = R_TSIP_Sha1HmacGenerateUpdate(&_handle, (uint8_t*)myInner, + (uint32_t)innerSz); + + if (ret == TSIP_SUCCESS) + ret = R_TSIP_Sha1HmacGenerateUpdate(&_handle, (uint8_t*)in, sz); + + if (ret == TSIP_SUCCESS) + ret = R_TSIP_Sha1HmacGenerateFinal(&_handle, digest); + + /* unlock hw */ + tsip_hw_unlock(); + + return ret; +} + +/* Sha256Hmac */ +int tsip_Sha256Hmac(const struct WOLFSSL *ssl, const byte *myInner, + word32 innerSz, const byte *in, word32 sz, byte *digest, + word32 verify) +{ + tsip_hmac_sha_handle_t _handle; + tsip_hmac_sha_key_index_t key_index; + int ret; + + if ((ssl == NULL) || (myInner == NULL) || (in == NULL) || + (digest == NULL)) + return BAD_FUNC_ARG; + + if ( (ssl->options.side == WOLFSSL_CLIENT_END && !verify) || + (ssl->options.side == WOLFSSL_SERVER_END && verify) ) + XMEMCPY(key_index.value, ssl->keys.tsip_client_write_MAC_secret, + sizeof(key_index.value)); + else + XMEMCPY(key_index.value, ssl->keys.tsip_server_write_MAC_secret, + sizeof(key_index.value)); + + if ((ret = tsip_hw_lock()) != 0) { + WOLFSSL_MSG("hw lock failed\n"); + return ret; + } + + ret = R_TSIP_Sha256HmacGenerateInit(&_handle, &key_index); + + if (ret == TSIP_SUCCESS) + ret = R_TSIP_Sha256HmacGenerateUpdate(&_handle, (uint8_t*)myInner, + innerSz); + + if (ret == TSIP_SUCCESS) + ret = R_TSIP_Sha256HmacGenerateUpdate(&_handle, (uint8_t*)in, sz); + + if (ret == TSIP_SUCCESS) + ret = R_TSIP_Sha256HmacGenerateFinal(&_handle, digest); + + /* unlock hw */ + tsip_hw_unlock(); + + return ret; +} + +/* generate Verify Data based on master secret */ +int tsip_generateVerifyData(const byte *ms, /* master secret */ + const byte *side, const byte *handshake_hash, + byte *hashes /* out */) +{ + int ret ; + uint32_t l_side = R_TSIP_TLS_GENERATE_CLIENT_VERIFY; + + if ((ms == NULL) || (side == NULL) || (handshake_hash == NULL) || + (hashes == NULL)) + return BAD_FUNC_ARG; + + if (XSTRNCMP((const char*)side, (const char*)tls_server, FINISHED_LABEL_SZ) + == 0) + { + l_side = R_TSIP_TLS_GENERATE_SERVER_VERIFY; + } + + if ((ret = tsip_hw_lock()) == 0) { + ret = R_TSIP_TlsGenerateVerifyData(l_side, (uint32_t*)ms, + (uint8_t*)handshake_hash, hashes/* out */); + if (ret != TSIP_SUCCESS) { + WOLFSSL_MSG("R_TSIP_TlsGenerateSessionKey failed\n"); + } + } + /* unlock hw */ + tsip_hw_unlock(); + + return ret; +} + +/* generate keys for TLS communication */ +int tsip_generateSeesionKey(struct WOLFSSL *ssl) +{ + int ret; + Ciphers *enc; + Ciphers *dec; + tsip_hmac_sha_key_index_t key_client_mac; + tsip_hmac_sha_key_index_t key_server_mac; + tsip_aes_key_index_t key_client_aes; + tsip_aes_key_index_t key_server_aes; + + if (ssl== NULL) + return BAD_FUNC_ARG; + + if ((ret = tsip_hw_lock()) == 0) { + ret = R_TSIP_TlsGenerateSessionKey( + _tls2tsipdef(ssl->options.cipherSuite), + (uint32_t*)ssl->arrays->tsip_masterSecret, + (uint8_t*)ssl->arrays->clientRandom, + (uint8_t*)ssl->arrays->serverRandom, &key_client_mac, + &key_server_mac, &key_client_aes, &key_server_aes, + NULL, NULL); + if (ret != TSIP_SUCCESS) { + WOLFSSL_MSG("R_TSIP_TlsGenerateSessionKey failed\n"); + } else { + /* succeeded creating session keys */ + /* alloc aes instance for both enc and dec */ + enc = &ssl->encrypt; + dec = &ssl->decrypt; + + if (enc) { + if (enc->aes == NULL) { + enc->aes = (Aes*)XMALLOC(sizeof(Aes), ssl->heap, + DYNAMIC_TYPE_CIPHER); + if (enc->aes == NULL) + return MEMORY_E; + } + + XMEMSET(enc->aes, 0, sizeof(Aes)); + } + if (dec) { + if (dec->aes == NULL) { + dec->aes = (Aes*)XMALLOC(sizeof(Aes), ssl->heap, + DYNAMIC_TYPE_CIPHER); + if (dec->aes == NULL) { + if (enc) { + XFREE(enc->aes, NULL, DYNAMIC_TYPE_CIPHER); + } + return MEMORY_E; + } + } + + XMEMSET(dec->aes, 0, sizeof(Aes)); + } + /* copy key index into aes */ + if (ssl->options.side == PROVISION_CLIENT) { + XMEMCPY(&enc->aes->ctx.tsip_keyIdx, &key_client_aes, + sizeof(key_client_aes)); + XMEMCPY(&dec->aes->ctx.tsip_keyIdx, &key_server_aes, + sizeof(key_server_aes)); + } else { + XMEMCPY(&enc->aes->ctx.tsip_keyIdx, &key_server_aes, + sizeof(key_server_aes)); + XMEMCPY(&dec->aes->ctx.tsip_keyIdx, &key_client_aes, + sizeof(key_client_aes)); + } + /* copy hac key index into keys */ + XMEMCPY(ssl->keys.tsip_client_write_MAC_secret, key_client_mac.value, + sizeof(key_client_mac.value)); + XMEMCPY(ssl->keys.tsip_server_write_MAC_secret, key_server_mac.value, + sizeof(key_client_mac.value)); + /* set up key size and marked readly */ + if (enc){ + enc->aes->ctx.keySize = ssl->specs.key_size; + /* ready for use */ + enc->setup = 1; + } + /* set up key size and marked readly */ + if (dec) { + dec->aes->ctx.keySize = ssl->specs.key_size; + /* ready for use */ + dec->setup = 1; + } + } + /* unlock hw */ + tsip_hw_unlock(); + } else + WOLFSSL_MSG("hw lock failed\n"); + + return ret; +} +/* generate Master secrete by TSIP */ +int tsip_generateMasterSecret(const byte *pr, /* pre-master */ + const byte *cr, /* client random */ + const byte *sr, /* server random */ + byte *ms) +{ + int ret; + + if ((pr == NULL) || (cr == NULL) || (sr == NULL) || + (ms == NULL)) + return BAD_FUNC_ARG; + + if ((ret = tsip_hw_lock()) == 0) { + ret = R_TSIP_TlsGenerateMasterSecret( (uint32_t*)pr, + (uint8_t*)cr, (uint8_t*)sr, (uint32_t*)ms); + if (ret != TSIP_SUCCESS) { + WOLFSSL_MSG("R_TSIP_TlsGenerateMasterSecret failed\n"); + } + /* unlock hw */ + tsip_hw_unlock(); + } else { + WOLFSSL_MSG(" hw lock failed "); + } + + return ret; +} +/* generate pre-Master secrete by TSIP */ +int tsip_generatePremasterSecret(byte *premaster, word32 preSz ) +{ + int ret; + + if (premaster == NULL) + return BAD_FUNC_ARG; + + if ((ret = tsip_hw_lock()) == 0 && preSz >= + (R_TSIP_TLS_MASTER_SECRET_WORD_SIZE*4)) { + /* generate pre-master, 80 bytes */ + ret = R_TSIP_TlsGeneratePreMasterSecret( (uint32_t*)premaster ); + if (ret != TSIP_SUCCESS) { + WOLFSSL_MSG(" R_TSIP_TlsGeneratePreMasterSecret failed\n"); + } + /* unlock hw */ + tsip_hw_unlock(); + } else { + WOLFSSL_MSG(" hw lock failed or preSz is smaller than 80"); + } + + return ret; +} +/* generate encrypted pre-Master secrete by TSIP */ +int tsip_generateEncryptPreMasterSecret(WOLFSSL *ssl, byte *out, word32 *outSz) +{ + int ret; + + if ((ssl == NULL) || (out == NULL) || (outSz == NULL)) + return BAD_FUNC_ARG; + + if ((ret = tsip_hw_lock()) == 0) { + if (*outSz >= 256) + ret = R_TSIP_TlsEncryptPreMasterSecret( + (uint32_t*)ssl->peerTsipEncRsaKeyIndex, + (uint32_t*)&ssl->arrays->preMasterSecret[VERSION_SZ], + (uint8_t*)out); + else + ret = -1; + + if (ret != TSIP_SUCCESS) { + WOLFSSL_MSG(" R_TSIP_TlsEncryptPreMasterSecret failed\n"); + } else { + *outSz = 256; /* TSIP can only handles 2048 RSA */ + } + /* unlock hw */ + tsip_hw_unlock(); + } else { + WOLFSSL_MSG(" hw lock failed "); + } + + return ret; +} +#endif /* NO_WOLFSSL_RENESAS_TSIP_TLS_SESSION */ + +/* Certificate verification by TSIP */ +int tsip_tls_CertVerify(const byte *cert, word32 certSz, + const byte *signature, word32 sigSz, + word32 key_n_start, word32 key_n_len, + word32 key_e_start, word32 key_e_len, + byte *tsip_encRsaKeyIndex) +{ + int ret; + + if (cert == NULL) + return BAD_FUNC_ARG; + + if (!signature) { + WOLFSSL_MSG(" signature for ca verification is not set\n"); + return -1; + } + if (!tsip_encRsaKeyIndex) { + WOLFSSL_MSG(" tsip_encRsaKeyIndex is NULL.\n"); + return -1; + } + + if ((ret = tsip_hw_lock()) == 0) { + ret = R_TSIP_TlsCertificateVerification( + (uint32_t*)g_encrypted_publicCA_key,/* encrypted public key */ + (uint8_t*)cert, /* certificate der */ + certSz, /* length of der */ + (uint8_t*)signature, /* sign data by RSA PSS */ + key_n_start, /* start position of public key n in bytes */ + (key_n_start + key_n_len), /* length of the public key n */ + key_e_start, /* start pos, key e in bytes */ + (key_e_start + key_e_len), /* length of the public key e */ + (uint32_t*)tsip_encRsaKeyIndex /* returned encrypted key */ + ); + + if (ret != TSIP_SUCCESS) { + WOLFSSL_MSG(" R_TSIP_TlsCertificateVerification() failed"); + } + tsip_hw_unlock(); + } else { + WOLFSSL_MSG(" hw lock failed "); + } + + return ret; +} +/* Root Certificate verification */ +int tsip_tls_RootCertVerify(const byte *cert, word32 cert_len, + word32 key_n_start, word32 key_n_len, + word32 key_e_start, word32 key_e_len, + word32 cm_row) +{ + int ret; + /* call to generate encrypted public key for certificate verification */ + uint8_t *signature = (uint8_t*)ca_cert_sig; + + if (cert == NULL) + return BAD_FUNC_ARG; + + if (!signature) { + WOLFSSL_MSG(" signature for ca verification is not set\n"); + return -1; + } + + if ((ret = tsip_hw_lock()) == 0) { + ret = R_TSIP_TlsRootCertificateVerification( + /* CA cert */ + (uint8_t*)cert, + /* length of CA cert */ + (uint32_t)cert_len, + /* Byte position of public key */ + key_n_start, + (key_n_start + key_n_len), + key_e_start, + (key_e_start + key_e_len), + /* signature by "RSA 2048 PSS with SHA256" */ + (uint8_t*)ca_cert_sig, + /* RSA-2048 public key used by + RSA-2048 PSS with SHA256. 560 Bytes*/ + g_encrypted_publicCA_key + ); + + if (ret != TSIP_SUCCESS) { + WOLFSSL_MSG(" R_TSIP_TlsRootCertVerify() failed"); + } else { + g_CAscm_Idx = cm_row; + } + + tsip_hw_unlock(); + } else { + WOLFSSL_MSG(" hw lock failed "); + } + + return ret; +} +#endif /* WOLFSSL_RENESAS_TSIP_TLS */ + +#ifdef WOLFSSL_RENESAS_TSIP_CRYPT_DEBUG + +/* err + * e_tsip_err + TSIP_SUCCESS = 0, + TSIP_ERR_SELF_CHECK1, // Self-check 1 fail or TSIP function internal err. + TSIP_ERR_RESOURCE_CONFLICT, // A resource conflict occurred. + TSIP_ERR_SELF_CHECK2, // Self-check 2 fail. + TSIP_ERR_KEY_SET, // setting the invalid key. + TSIP_ERR_AUTHENTICATION, // Authentication failed. + TSIP_ERR_CALLBACK_UNREGIST, // Callback function is not registered. + TSIP_ERR_PARAMETER, // Illegal Input data. + TSIP_ERR_PROHIBIT_FUNCTION, // An invalid function call occurred. + * TSIP_RESUME_FIRMWARE_GENERATE_MAC, + // There is a continuation of R_TSIP_GenerateFirmwareMAC. +*/ + +static void hexdump(const uint8_t* in, uint32_t len) +{ + uint32_t i; + + if (in == NULL) + return; + + for (i = 0; i <= len;i++, in++){ + printf("%02x:", *in); + if (((i+1)%16)==0){ + printf("\n"); + } + } + printf("\n"); +} + +byte *ret2err(word32 ret) +{ + switch(ret){ + case TSIP_SUCCESS: return "success"; + case TSIP_ERR_SELF_CHECK1: return "selfcheck1"; + case TSIP_ERR_RESOURCE_CONFLICT: return "rsconflict"; + case TSIP_ERR_SELF_CHECK2: return "selfcheck2"; + case TSIP_ERR_KEY_SET: return "keyset"; + case TSIP_ERR_AUTHENTICATION: return "authentication"; + case TSIP_ERR_CALLBACK_UNREGIST: return "callback unreg"; + case TSIP_ERR_PARAMETER: return "badarg"; + case TSIP_ERR_PROHIBIT_FUNCTION: return "prohibitfunc"; + case TSIP_RESUME_FIRMWARE_GENERATE_MAC: return "conti-generate-mac"; + default:return "unknown"; + } +} + +#endif /* WOLFSSL_RENESAS_TSIP_CRYPT_DEBUG */ +#endif /* WOLFSSL_RENESAS_TSIP */ diff --git a/client/wolfssl/wolfcrypt/src/port/af_alg/afalg_aes.c b/client/wolfssl/wolfcrypt/src/port/af_alg/afalg_aes.c new file mode 100644 index 0000000..2d1d41a --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/af_alg/afalg_aes.c @@ -0,0 +1,900 @@ +/* afalg_aes.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include +#include + +#if !defined(NO_AES) && (defined(WOLFSSL_AFALG) || \ + defined(WOLFSSL_AFALG_XILINX_AES)) + +#include +#include +#include + +#include /* for readv */ + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#ifdef WOLFSSL_AFALG_XILINX_AES + #define WOLFSSL_XILINX_ALIGN sizeof(wolfssl_word) +#endif + +static const char WC_TYPE_SYMKEY[] = "skcipher"; + +static int wc_AesSetup(Aes* aes, const char* type, const char* name, int ivSz, int aadSz) +{ +#ifdef WOLFSSL_AFALG_XILINX_AES + byte* key = (byte*)aes->msgBuf; +#else + byte* key = (byte*)aes->key; +#endif + + aes->rdFd = wc_Afalg_CreateRead(aes->alFd, type, name); + if (aes->rdFd < 0) { + WOLFSSL_MSG("Unable to accept and get AF_ALG read socket"); + aes->rdFd = WC_SOCK_NOTSET; + return aes->rdFd; + } + + if (setsockopt(aes->alFd, SOL_ALG, ALG_SET_KEY, key, aes->keylen) != 0) { + WOLFSSL_MSG("Unable to set AF_ALG key"); + aes->rdFd = WC_SOCK_NOTSET; + return WC_AFALG_SOCK_E; + } + ForceZero(key, sizeof(aes->key)); + + /* set up CMSG headers */ + XMEMSET((byte*)&(aes->msg), 0, sizeof(struct msghdr)); + + aes->msg.msg_control = key; /* use existing key buffer for + * control buffer */ +#ifdef WOLFSSL_AFALG_XILINX_AES + aes->msg.msg_controllen = CMSG_SPACE(4) + + CMSG_SPACE(sizeof(struct af_alg_iv) + ivSz); + (void)aadSz; +#else + aes->msg.msg_controllen = CMSG_SPACE(4); + if (aadSz > 0) { + aes->msg.msg_controllen += CMSG_SPACE(4); + } + if (ivSz > 0) { + aes->msg.msg_controllen += CMSG_SPACE((sizeof(struct af_alg_iv) + ivSz)); + } +#endif + + if (wc_Afalg_SetOp(CMSG_FIRSTHDR(&(aes->msg)), aes->dir) < 0) { + WOLFSSL_MSG("Error with setting AF_ALG operation"); + aes->rdFd = WC_SOCK_NOTSET; + return -1; + } + + return 0; +} + + +#ifdef WOLFSSL_AFALG +int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) +{ +#if defined(AES_MAX_KEY_SIZE) + const word32 max_key_len = (AES_MAX_KEY_SIZE / 8); +#endif + + if (aes == NULL || + !((keylen == 16) || (keylen == 24) || (keylen == 32))) { + return BAD_FUNC_ARG; + } + +#if defined(AES_MAX_KEY_SIZE) + /* Check key length */ + if (keylen > max_key_len) { + return BAD_FUNC_ARG; + } +#endif + aes->keylen = keylen; + aes->rounds = keylen/4 + 6; + +#ifdef WOLFSSL_AES_COUNTER + aes->left = 0; +#endif + + aes->rdFd = WC_SOCK_NOTSET; + aes->alFd = wc_Afalg_Socket(); + if (aes->alFd < 0) { + WOLFSSL_MSG("Unable to open an AF_ALG socket"); + return WC_AFALG_SOCK_E; + } + + /* save key until type is known i.e. CBC, ECB, ... */ + XMEMCPY((byte*)(aes->key), userKey, keylen); + aes->dir = dir; + + return wc_AesSetIV(aes, iv); +} +#endif + +/* AES-CBC */ +#if defined(HAVE_AES_CBC) && defined(WOLFSSL_AFALG) + static const char WC_NAME_AESCBC[] = "cbc(aes)"; + + int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + struct cmsghdr* cmsg; + struct iovec iov; + int ret; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + if (aes->rdFd == WC_SOCK_NOTSET) { + if ((ret = wc_AesSetup(aes, WC_TYPE_SYMKEY, WC_NAME_AESCBC, + AES_IV_SIZE, 0)) != 0) { + WOLFSSL_MSG("Error with first time setup of AF_ALG socket"); + return ret; + } + } + + sz = sz - (sz % AES_BLOCK_SIZE); + if ((sz / AES_BLOCK_SIZE) > 0) { + /* update IV */ + cmsg = CMSG_FIRSTHDR(&(aes->msg)); + ret = wc_Afalg_SetIv(CMSG_NXTHDR(&(aes->msg), cmsg), + (byte*)(aes->reg), AES_IV_SIZE); + if (ret < 0) { + WOLFSSL_MSG("Error setting IV"); + return ret; + } + + /* set data to be encrypted */ + iov.iov_base = (byte*)in; + iov.iov_len = sz; + + aes->msg.msg_iov = &iov; + aes->msg.msg_iovlen = 1; /* # of iov structures */ + + ret = (int)sendmsg(aes->rdFd, &(aes->msg), 0); + if (ret < 0) { + return ret; + } + ret = (int)read(aes->rdFd, out, sz); + if (ret < 0) { + return ret; + } + + /* set IV for next CBC call */ + XMEMCPY(aes->reg, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + } + + return 0; + } + + #ifdef HAVE_AES_DECRYPT + int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + struct cmsghdr* cmsg; + struct iovec iov; + int ret; + + if (aes == NULL || out == NULL || in == NULL + || sz % AES_BLOCK_SIZE != 0) { + return BAD_FUNC_ARG; + } + + if (aes->rdFd == WC_SOCK_NOTSET) { + if ((ret = wc_AesSetup(aes, WC_TYPE_SYMKEY, WC_NAME_AESCBC, + AES_IV_SIZE, 0)) != 0) { + return ret; + } + } + + if ((sz / AES_BLOCK_SIZE) > 0) { + /* update IV */ + cmsg = CMSG_FIRSTHDR(&(aes->msg)); + ret = wc_Afalg_SetIv(CMSG_NXTHDR(&(aes->msg), cmsg), + (byte*)(aes->reg), AES_IV_SIZE); + if (ret != 0) { + return ret; + } + + /* set data to be decrypted */ + iov.iov_base = (byte*)in; + iov.iov_len = sz; + + aes->msg.msg_iov = &iov; + aes->msg.msg_iovlen = 1; /* # of iov structures */ + + /* set IV for next CBC call */ + XMEMCPY(aes->reg, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + + ret = (int)sendmsg(aes->rdFd, &(aes->msg), 0); + if (ret < 0) { + return ret; + } + ret = (int)read(aes->rdFd, out, sz); + if (ret < 0) { + return ret; + } + + } + + return 0; + } + #endif + +#endif /* HAVE_AES_CBC */ + + +/* AES-DIRECT */ +#if (defined(WOLFSSL_AES_DIRECT) || defined(HAVE_AES_ECB)) && \ + defined(WOLFSSL_AFALG) + +static const char WC_NAME_AESECB[] = "ecb(aes)"; + +/* common code between ECB encrypt and decrypt + * returns 0 on success */ +static int wc_Afalg_AesDirect(Aes* aes, byte* out, const byte* in, word32 sz) +{ + struct iovec iov; + int ret; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + if (aes->rdFd == WC_SOCK_NOTSET) { + if ((ret = wc_AesSetup(aes, WC_TYPE_SYMKEY, WC_NAME_AESECB, + 0, 0)) != 0) { + WOLFSSL_MSG("Error with first time setup of AF_ALG socket"); + return ret; + } + } + + /* set data to be encrypted */ + iov.iov_base = (byte*)in; + iov.iov_len = sz; + + aes->msg.msg_iov = &iov; + aes->msg.msg_iovlen = 1; /* # of iov structures */ + + ret = (int)sendmsg(aes->rdFd, &(aes->msg), 0); + if (ret < 0) { + return ret; + } + ret = (int)read(aes->rdFd, out, sz); + if (ret < 0) { + return ret; + } + + return 0; +} +#endif + + +#if defined(WOLFSSL_AES_DIRECT) && defined(WOLFSSL_AFALG) +void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in) +{ + if (wc_Afalg_AesDirect(aes, out, in, AES_BLOCK_SIZE) != 0) { + WOLFSSL_MSG("Error with AES encrypt direct call"); + } +} + + +void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in) +{ + if (wc_Afalg_AesDirect(aes, out, in, AES_BLOCK_SIZE) != 0) { + WOLFSSL_MSG("Error with AES decrypt direct call"); + } +} + + +int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) +{ + return wc_AesSetKey(aes, userKey, keylen, iv, dir); +} +#endif + + +/* AES-CTR */ +#if defined(WOLFSSL_AES_COUNTER) && defined(WOLFSSL_AFALG) + static const char WC_NAME_AESCTR[] = "ctr(aes)"; + + /* Increment AES counter */ + static WC_INLINE void IncrementAesCounter(byte* inOutCtr) + { + /* in network byte order so start at end and work back */ + int i; + for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) { + if (++inOutCtr[i]) /* we're done unless we overflow */ + return; + } + } + + int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + struct cmsghdr* cmsg; + struct iovec iov[2]; + int ret; + byte* tmp; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + /* consume any unused bytes left in aes->tmp */ + tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left; + while (aes->left && sz) { + *(out++) = *(in++) ^ *(tmp++); + aes->left--; + sz--; + } + + if (aes->rdFd == WC_SOCK_NOTSET) { + if ((ret = wc_AesSetup(aes, WC_TYPE_SYMKEY, WC_NAME_AESCTR, + AES_IV_SIZE, 0)) != 0) { + WOLFSSL_MSG("Error with first time setup of AF_ALG socket"); + return ret; + } + } + + if (sz > 0) { + aes->left = sz % AES_BLOCK_SIZE; + + /* clear previously leftover data */ + tmp = (byte*)aes->tmp; + XMEMSET(tmp, 0, AES_BLOCK_SIZE); + + /* update IV */ + cmsg = CMSG_FIRSTHDR(&(aes->msg)); + ret = wc_Afalg_SetIv(CMSG_NXTHDR(&(aes->msg), cmsg), + (byte*)(aes->reg), AES_IV_SIZE); + if (ret < 0) { + WOLFSSL_MSG("Error setting IV"); + return ret; + } + + /* set data to be encrypted */ + iov[0].iov_base = (byte*)in; + iov[0].iov_len = sz - aes->left; + + iov[1].iov_base = tmp; + if (aes->left > 0) { + XMEMCPY(tmp, in + sz - aes->left, aes->left); + iov[1].iov_len = AES_BLOCK_SIZE; + } + else { + iov[1].iov_len = 0; + } + + aes->msg.msg_iov = iov; + aes->msg.msg_iovlen = 2; /* # of iov structures */ + + ret = (int)sendmsg(aes->rdFd, &(aes->msg), 0); + if (ret < 0) { + return ret; + } + + + /* set buffers to hold result and left over stream */ + iov[0].iov_base = (byte*)out; + iov[0].iov_len = sz - aes->left; + + iov[1].iov_base = tmp; + if (aes->left > 0) { + iov[1].iov_len = AES_BLOCK_SIZE; + } + else { + iov[1].iov_len = 0; + } + + ret = (int)readv(aes->rdFd, iov, 2); + if (ret < 0) { + return ret; + } + + if (aes->left > 0) { + XMEMCPY(out + sz - aes->left, tmp, aes->left); + aes->left = AES_BLOCK_SIZE - aes->left; + } + } + + /* adjust counter after call to hardware */ + while (sz >= AES_BLOCK_SIZE) { + IncrementAesCounter((byte*)aes->reg); + sz -= AES_BLOCK_SIZE; + } + + if (aes->left > 0) { + IncrementAesCounter((byte*)aes->reg); + } + + return 0; + } +#endif /* WOLFSSL_AES_COUNTER */ + + +#ifdef HAVE_AESGCM + + +#ifdef WOLFSSL_AFALG_XILINX_AES + static const char WC_NAME_AESGCM[] = "xilinx-zynqmp-aes"; + static const char* WC_TYPE_AEAD = WC_TYPE_SYMKEY; +#else + static const char WC_NAME_AESGCM[] = "gcm(aes)"; + static const char WC_TYPE_AEAD[] = "aead"; +#endif + +#ifndef WC_SYSTEM_AESGCM_IV +/* size of IV allowed on system for AES-GCM */ +#define WC_SYSTEM_AESGCM_IV 12 +#endif + +#ifndef WOLFSSL_MAX_AUTH_TAG_SZ +/* size of tag is restricted by system for AES-GCM + * check 'cat /proc/crypto' to see restricted size */ +#define WOLFSSL_MAX_AUTH_TAG_SZ 16 +#endif + +#ifdef WOLFSSL_AFALG_XILINX_AES +/* Xilinx uses a slightly different function because the default AES key is also + * needed if handling additional data with creating/validating the TAG. + * + * returns 0 on success + */ +int wc_AesGcmSetKey_ex(Aes* aes, const byte* key, word32 len, word32 kup) +#else +int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) +#endif +{ +#if defined(AES_MAX_KEY_SIZE) + const word32 max_key_len = (AES_MAX_KEY_SIZE / 8); +#endif + + if (aes == NULL || + !((len == 16) || (len == 24) || (len == 32))) { + return BAD_FUNC_ARG; + } + +#if defined(AES_MAX_KEY_SIZE) + /* Check key length */ + if (len > max_key_len) { + return BAD_FUNC_ARG; + } +#endif + aes->keylen = len; + aes->rounds = len/4 + 6; + + aes->rdFd = WC_SOCK_NOTSET; + aes->alFd = wc_Afalg_Socket(); + if (aes->alFd < 0) { + WOLFSSL_MSG("Unable to open an AF_ALG socket"); + return WC_AFALG_SOCK_E; + } + + /* save key until direction is known i.e. encrypt or decrypt */ +#ifdef WOLFSSL_AFALG_XILINX_AES + (void)kup; /* using alternate buffer because software key is needed */ + XMEMCPY((byte*)(aes->msgBuf), key, len); +#else + XMEMCPY((byte*)(aes->key), key, len); +#endif + + return 0; +} + + + +/* Performs AES-GCM encryption and returns 0 on success + * + * Warning: If using Xilinx hardware acceleration it is assumed that the out + * buffer is large enough to hold both cipher text and tag. That is + * sz | 16 bytes. The input and output buffer is expected to be 64 bit + * aligned + * + */ +int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + struct cmsghdr* cmsg; + struct iovec iov[3]; + int ret; + struct msghdr* msg; + byte scratch[AES_BLOCK_SIZE]; + + /* argument checks */ + if (aes == NULL || authTagSz > AES_BLOCK_SIZE) { + return BAD_FUNC_ARG; + } + + if (ivSz != WC_SYSTEM_AESGCM_IV || authTagSz > WOLFSSL_MAX_AUTH_TAG_SZ) { + WOLFSSL_MSG("IV/AAD size not supported on system"); + return BAD_FUNC_ARG; + } + + if (authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ) { + WOLFSSL_MSG("GcmEncrypt authTagSz too small error"); + return BAD_FUNC_ARG; + } + + if (aes->rdFd == WC_SOCK_NOTSET) { + aes->dir = AES_ENCRYPTION; + if ((ret = wc_AesSetup(aes, WC_TYPE_AEAD, WC_NAME_AESGCM, ivSz, + authInSz)) != 0) { + WOLFSSL_MSG("Error with first time setup of AF_ALG socket"); + return ret; + } + + /* note that if the ivSz was to change, the msg_controllen would need + reset */ + +#ifndef WOLFSSL_AFALG_XILINX_AES + /* set auth tag + * @TODO case where tag size changes between calls? */ + ret = setsockopt(aes->alFd, SOL_ALG, ALG_SET_AEAD_AUTHSIZE, NULL, + authTagSz); + if (ret != 0) { + perror("set tag"); + WOLFSSL_MSG("Unable to set AF_ALG tag size "); + return WC_AFALG_SOCK_E; + } +#endif + } + + + msg = &(aes->msg); + cmsg = CMSG_FIRSTHDR(msg); + cmsg = CMSG_NXTHDR(msg, cmsg); + + /* set IV and AAD size */ + ret = wc_Afalg_SetIv(cmsg, (byte*)iv, ivSz); + if (ret < 0) { + WOLFSSL_MSG("Error setting IV"); + return ret; + + } +#ifdef WOLFSSL_AFALG_XILINX_AES + if (sz > 0) { + #ifndef NO_WOLFSSL_ALLOC_ALIGN + byte* tmp = NULL; + #endif + if ((wolfssl_word)in % WOLFSSL_XILINX_ALIGN) { + #ifndef NO_WOLFSSL_ALLOC_ALIGN + byte* tmp_align; + tmp = (byte*)XMALLOC(sz + WOLFSSL_XILINX_ALIGN + + AES_BLOCK_SIZE, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (tmp == NULL) { + return MEMORY_E; + } + tmp_align = tmp + (WOLFSSL_XILINX_ALIGN - + ((size_t)tmp % WOLFSSL_XILINX_ALIGN)); + XMEMCPY(tmp_align, in, sz); + iov[0].iov_base = tmp_align; + #else + WOLFSSL_MSG("Buffer expected to be word aligned"); + return BAD_ALIGN_E; + #endif + } + else { + iov[0].iov_base = (byte*)in; + } + iov[0].iov_len = sz + AES_BLOCK_SIZE; + + msg->msg_iov = iov; + msg->msg_iovlen = 1; /* # of iov structures */ + + ret = (int)sendmsg(aes->rdFd, msg, 0); + #ifndef NO_WOLFSSL_ALLOC_ALIGN + XFREE(tmp, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); + #endif + if (ret < 0) { + return ret; + } + + ret = read(aes->rdFd, out, sz + AES_BLOCK_SIZE); + if (ret < 0) { + return ret; + } + XMEMCPY(authTag, out + sz, authTagSz); + } + + /* handle completing tag with using software if additional data added */ + if (authIn != NULL && authInSz > 0) { + byte initalCounter[AES_BLOCK_SIZE]; + XMEMSET(initalCounter, 0, AES_BLOCK_SIZE); + XMEMCPY(initalCounter, iv, ivSz); + initalCounter[AES_BLOCK_SIZE - 1] = 1; + GHASH(aes, authIn, authInSz, out, sz, authTag, authTagSz); + wc_AesEncryptDirect(aes, scratch, initalCounter); + xorbuf(authTag, scratch, authTagSz); + } +#else + if (authInSz > 0) { + cmsg = CMSG_NXTHDR(msg, cmsg); + ret = wc_Afalg_SetAad(cmsg, authInSz); + if (ret < 0) { + WOLFSSL_MSG("Unable to set AAD size"); + return ret; + } + } + + /* set data to be encrypted*/ + iov[0].iov_base = (byte*)authIn; + iov[0].iov_len = authInSz; + + iov[1].iov_base = (byte*)in; + iov[1].iov_len = sz; + + msg->msg_iov = iov; + msg->msg_iovlen = 2; /* # of iov structures */ + + ret = (int)sendmsg(aes->rdFd, msg, 0); + if (ret < 0) { + return ret; + } + + { + byte* tmp = (byte*)XMALLOC(authInSz, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (tmp == NULL) { + return MEMORY_E; + } + /* first 16 bytes was all 0's */ + iov[0].iov_base = tmp; + (void)scratch; + iov[0].iov_len = authInSz; + + iov[1].iov_base = out; + iov[1].iov_len = sz; + + iov[2].iov_base = authTag; + iov[2].iov_len = authTagSz; + + ret = (int)readv(aes->rdFd, iov, 3); + XFREE(tmp, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); + } + if (ret < 0) { + return ret; + } +#endif + + + return 0; +} + +#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AESGCM_DECRYPT) +/* Performs AES-GCM decryption and returns 0 on success + * + * Warning: If using Xilinx hardware acceleration it is assumed that the in + * buffer is large enough to hold both cipher text and tag. That is + * sz | 16 bytes + */ +int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + struct cmsghdr* cmsg; + struct msghdr* msg; + struct iovec iov[3]; + byte scratch[AES_BLOCK_SIZE]; + int ret; +#ifdef WOLFSSL_AFALG_XILINX_AES + byte* tag = (byte*)authTag; + byte buf[AES_BLOCK_SIZE]; + byte initalCounter[AES_BLOCK_SIZE]; +#ifndef NO_WOLFSSL_ALLOC_ALIGN + byte* tmp = NULL; +#endif +#endif + + /* argument checks */ + if (aes == NULL || authTagSz > AES_BLOCK_SIZE) { + return BAD_FUNC_ARG; + } + + if (ivSz != WC_SYSTEM_AESGCM_IV || authTagSz > WOLFSSL_MAX_AUTH_TAG_SZ) { + WOLFSSL_MSG("IV/AAD size not supported on system"); + return BAD_FUNC_ARG; + } + + if (authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ) { + WOLFSSL_MSG("GcmEncrypt authTagSz too small error"); + return BAD_FUNC_ARG; + } + + if (aes->rdFd == WC_SOCK_NOTSET) { + aes->dir = AES_DECRYPTION; + if ((ret = wc_AesSetup(aes, WC_TYPE_AEAD, WC_NAME_AESGCM, ivSz, + authInSz)) != 0) { + WOLFSSL_MSG("Error with first time setup of AF_ALG socket"); + return ret; + } + +#ifndef WOLFSSL_AFALG_XILINX_AES + /* set auth tag + * @TODO case where tag size changes between calls? */ + ret = setsockopt(aes->alFd, SOL_ALG, ALG_SET_AEAD_AUTHSIZE, NULL, + authTagSz); + if (ret != 0) { + WOLFSSL_MSG("Unable to set AF_ALG tag size "); + return WC_AFALG_SOCK_E; + } +#endif + } + + /* set IV and AAD size */ + msg = &aes->msg; + if ((cmsg = CMSG_FIRSTHDR(msg)) == NULL) { + return WC_AFALG_SOCK_E; + } + if (wc_Afalg_SetOp(cmsg, aes->dir) < 0) { + WOLFSSL_MSG("Error with setting AF_ALG operation"); + return WC_AFALG_SOCK_E; + } + if ((cmsg = CMSG_NXTHDR(msg, cmsg)) == NULL) { + return WC_AFALG_SOCK_E; + } + ret = wc_Afalg_SetIv(cmsg, (byte*)iv, ivSz); + if (ret < 0) { + return ret; + } + +#ifdef WOLFSSL_AFALG_XILINX_AES + /* check for and handle additional data */ + if (authIn != NULL && authInSz > 0) { + + XMEMSET(initalCounter, 0, AES_BLOCK_SIZE); + XMEMCPY(initalCounter, iv, ivSz); + initalCounter[AES_BLOCK_SIZE - 1] = 1; + tag = buf; + GHASH(aes, NULL, 0, in, sz, tag, AES_BLOCK_SIZE); + wc_AesEncryptDirect(aes, scratch, initalCounter); + xorbuf(tag, scratch, AES_BLOCK_SIZE); + if (ret != 0) { + return AES_GCM_AUTH_E; + } + } + + /* it is assumed that in buffer size is large enough to hold TAG */ + XMEMCPY((byte*)in + sz, tag, AES_BLOCK_SIZE); + if ((wolfssl_word)in % WOLFSSL_XILINX_ALIGN) { + #ifndef NO_WOLFSSL_ALLOC_ALIGN + byte* tmp_align; + tmp = (byte*)XMALLOC(sz + WOLFSSL_XILINX_ALIGN + + AES_BLOCK_SIZE, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (tmp == NULL) { + return MEMORY_E; + } + tmp_align = tmp + (WOLFSSL_XILINX_ALIGN - + ((size_t)tmp % WOLFSSL_XILINX_ALIGN)); + XMEMCPY(tmp_align, in, sz + AES_BLOCK_SIZE); + iov[0].iov_base = tmp_align; + #else + WOLFSSL_MSG("Buffer expected to be word aligned"); + return BAD_ALIGN_E; + #endif + } + else { + iov[0].iov_base = (byte*)in; + } + iov[0].iov_len = sz + AES_BLOCK_SIZE; + + msg->msg_iov = iov; + msg->msg_iovlen = 1; + + ret = sendmsg(aes->rdFd, msg, 0); +#ifndef NO_WOLFSSL_ALLOC_ALIGN + XFREE(tmp, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + if (ret < 0) { + return ret; + } + + ret = read(aes->rdFd, out, sz + AES_BLOCK_SIZE); + if (ret < 0) { + return AES_GCM_AUTH_E; + } + + /* check on tag */ + if (authIn != NULL && authInSz > 0) { + GHASH(aes, authIn, authInSz, in, sz, tag, AES_BLOCK_SIZE); + wc_AesEncryptDirect(aes, scratch, initalCounter); + xorbuf(tag, scratch, AES_BLOCK_SIZE); + if (ConstantCompare(tag, authTag, authTagSz) != 0) { + return AES_GCM_AUTH_E; + } + } + +#else + if (authInSz > 0) { + cmsg = CMSG_NXTHDR(msg, cmsg); + ret = wc_Afalg_SetAad(cmsg, authInSz); + if (ret < 0) { + return ret; + } + } + + /* set data to be decrypted*/ + iov[0].iov_base = (byte*)authIn; + iov[0].iov_len = authInSz; + iov[1].iov_base = (byte*)in; + iov[1].iov_len = sz; + iov[2].iov_base = (byte*)authTag; + iov[2].iov_len = authTagSz; + + msg->msg_iov = iov; + msg->msg_iovlen = 3; /* # of iov structures */ + ret = (int)sendmsg(aes->rdFd, &(aes->msg), 0); + if (ret < 0) { + return ret; + } + + { + byte* tmp = (byte*)XMALLOC(authInSz, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (tmp == NULL) { + return MEMORY_E; + } + iov[0].iov_base = tmp; + iov[0].iov_len = authInSz; + iov[1].iov_base = out; + iov[1].iov_len = sz; + ret = (int)readv(aes->rdFd, iov, 2); + XFREE(tmp, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); + } + if (ret < 0) { + return AES_GCM_AUTH_E; + } + (void)scratch; +#endif + + return 0; +} +#endif /* HAVE_AES_DECRYPT || HAVE_AESGCM_DECRYPT */ +#endif /* HAVE_AESGCM */ + + +#ifdef HAVE_AES_ECB +int wc_AesEcbEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + return wc_Afalg_AesDirect(aes, out, in, sz); +} + + +int wc_AesEcbDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + return wc_Afalg_AesDirect(aes, out, in, sz); +} +#endif /* HAVE_AES_ECB */ +#endif /* !NO_AES && WOLFSSL_AFALG */ + diff --git a/client/wolfssl/wolfcrypt/src/port/af_alg/afalg_hash.c b/client/wolfssl/wolfcrypt/src/port/af_alg/afalg_hash.c new file mode 100644 index 0000000..41e57bc --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/af_alg/afalg_hash.c @@ -0,0 +1,339 @@ +/* afalg_hash.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#if defined(WOLFSSL_AFALG_HASH) || (defined(WOLFSSL_AFALG_XILINX_SHA3) \ + && defined(WOLFSSL_SHA3)) + +#include +#include +#include +#include + +static const char WC_TYPE_HASH[] = "hash"; + + +/* generic AF_ALG hash free */ +static void AfalgHashFree(wolfssl_AFALG_Hash* hash) +{ + if (hash == NULL) + return; + + if (hash->alFd > 0) { + close(hash->alFd); + hash->alFd = -1; /* avoid possible double close on socket */ + } + if (hash->rdFd > 0) { + close(hash->rdFd); + hash->rdFd = -1; /* avoid possible double close on socket */ + } + + #if defined(WOLFSSL_AFALG_HASH_KEEP) + if (hash->msg != NULL) { + XFREE(hash->msg, hash->heap, DYNAMIC_TYPE_TMP_BUFFER); + hash->msg = NULL; + } + #endif +} + + +/* generic hash init for AF_ALG, returns 0 on success */ +static int AfalgHashInit(wolfssl_AFALG_Hash* hash, void* heap, int devId, + const char* type) +{ + if (hash == NULL) { + return BAD_FUNC_ARG; + } + + (void)devId; /* no async for now */ + XMEMSET(hash, 0, sizeof(wolfssl_AFALG_Hash)); + hash->heap = heap; + + hash->len = 0; + hash->used = 0; + hash->msg = NULL; + hash->alFd = -1; + hash->rdFd = -1; + + hash->alFd = wc_Afalg_Socket(); + if (hash->alFd < 0) { + return WC_AFALG_SOCK_E; + } + + hash->rdFd = wc_Afalg_CreateRead(hash->alFd, WC_TYPE_HASH, type); + if (hash->rdFd < 0) { + close(hash->alFd); + return WC_AFALG_SOCK_E; + } + + return 0; + +} + + +/* generic hash update for AF_ALG, returns 0 on success */ +static int AfalgHashUpdate(wolfssl_AFALG_Hash* hash, const byte* in, word32 sz) +{ + if (hash == NULL || (sz > 0 && in == NULL)) { + return BAD_FUNC_ARG; + } + +#ifdef WOLFSSL_AFALG_HASH_KEEP + /* keep full message to hash at end instead of incremental updates */ + if (hash->len < hash->used + sz) { + if (hash->msg == NULL) { + hash->msg = (byte*)XMALLOC(hash->used + sz, hash->heap, + DYNAMIC_TYPE_TMP_BUFFER); + } else { + byte* pt = (byte*)XREALLOC(hash->msg, hash->used + sz, hash->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (pt == NULL) { + return MEMORY_E; + } + hash->msg = pt; + } + if (hash->msg == NULL) { + return MEMORY_E; + } + hash->len = hash->used + sz; + } + XMEMCPY(hash->msg + hash->used, in, sz); + hash->used += sz; +#else + int ret; + + if ((ret = (int)send(hash->rdFd, in, sz, MSG_MORE)) < 0) { + return ret; + } +#endif + return 0; +} + + +/* generic hash final for AF_ALG, return 0 on success */ +static int AfalgHashFinal(wolfssl_AFALG_Hash* hash, byte* out, word32 outSz, + const char* type) +{ + int ret; + void* heap; + + if (hash == NULL || out == NULL) { + return BAD_FUNC_ARG; + } + + heap = hash->heap; /* keep because AfalgHashInit clears the pointer */ +#ifdef WOLFSSL_AFALG_HASH_KEEP + /* keep full message to out at end instead of incremental updates */ + if ((ret = (int)send(hash->rdFd, hash->msg, hash->used, 0)) < 0) { + return ret; + } + XFREE(hash->msg, heap, DYNAMIC_TYPE_TMP_BUFFER); + hash->msg = NULL; +#else + if ((ret = (int)send(hash->rdFd, NULL, 0, 0)) < 0) { + return ret; + } +#endif + + if ((ret = (int)read(hash->rdFd, out, outSz)) != (int)outSz) { + return ret; + } + + AfalgHashFree(hash); + return AfalgHashInit(hash, heap, 0, type); +} + + +/* generic function to get intermediate hash */ +static int AfalgHashGet(wolfssl_AFALG_Hash* hash, byte* out, word32 outSz) +{ + int ret; + + if (hash == NULL || out == NULL) { + return BAD_FUNC_ARG; + } + + (void)ret; +#ifdef WOLFSSL_AFALG_HASH_KEEP + if ((ret = (int)send(hash->rdFd, hash->msg, hash->used, 0)) < 0) { + return ret; + } + + if ((ret = (int)read(hash->rdFd, out, outSz)) != (int)outSz) { + return ret; + } + return 0; +#else + (void)hash; + (void)out; + (void)outSz; + + WOLFSSL_MSG("Compile with WOLFSSL_AFALG_HASH_KEEP for this feature"); + return NOT_COMPILED_IN; +#endif +} + + +/* generic struct copy for AF_ALG, returns 0 on success */ +static int AfalgHashCopy(wolfssl_AFALG_Hash* src, wolfssl_AFALG_Hash* dst) +{ + if (src == NULL || dst == NULL) { + return BAD_FUNC_ARG; + } + + XMEMCPY(dst, src, sizeof(wolfssl_AFALG_Hash)); + +#ifdef WOLFSSL_AFALG_HASH_KEEP + dst->msg = (byte*)XMALLOC(src->len, dst->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (dst->msg == NULL) { + return MEMORY_E; + } + XMEMCPY(dst->msg, src->msg, src->len); +#endif + + dst->rdFd = accept(src->rdFd, NULL, 0); + dst->alFd = accept(src->alFd, NULL, 0); + + if (dst->rdFd == -1 || dst->alFd == -1) { + AfalgHashFree(dst); + return -1; + } + + return 0; +} + + +#if !defined(NO_SHA256) && defined(WOLFSSL_AFALG_HASH) +#include + +static const char WC_NAME_SHA256[] = "sha256"; + + +/* create AF_ALG sockets for SHA256 operation */ +int wc_InitSha256_ex(wc_Sha256* sha, void* heap, int devId) +{ + return AfalgHashInit(sha, heap, devId, WC_NAME_SHA256); +} + + +int wc_Sha256Update(wc_Sha256* sha, const byte* in, word32 sz) +{ + return AfalgHashUpdate(sha, in, sz); +} + + +int wc_Sha256Final(wc_Sha256* sha, byte* hash) +{ + return AfalgHashFinal(sha, hash, WC_SHA256_DIGEST_SIZE, WC_NAME_SHA256); +} + + +int wc_Sha256GetHash(wc_Sha256* sha, byte* hash) +{ + return AfalgHashGet(sha, hash, WC_SHA256_DIGEST_SIZE); +} + + +int wc_Sha256Copy(wc_Sha256* src, wc_Sha256* dst) +{ + return AfalgHashCopy(src, dst); +} +#endif /* !NO_SHA256 */ + + + +#if defined(WOLFSSL_SHA3) && defined(WOLFSSL_AFALG_XILINX_SHA3) +#include + +static const char WC_NAME_SHA3[] = "xilinx-keccak-384"; + +void wc_Sha3_384_Free(wc_Sha3* sha) +{ + AfalgHashFree(sha); +} + + +/* create AF_ALG sockets for SHA256 operation */ +int wc_InitSha3_384(wc_Sha3* sha, void* heap, int devId) +{ + return AfalgHashInit(sha, heap, devId, WC_NAME_SHA3); +} + + +int wc_Sha3_384_Update(wc_Sha3* sha, const byte* in, word32 sz) +{ +#ifndef WOLFSSL_AFALG_HASH_KEEP + if (sz % 4) { + WOLFSSL_MSG("Alignment issue. Message size needs to be divisible by 4") + return BAD_FUNC_ARG; + } +#endif + + return AfalgHashUpdate(sha, in, sz); +} + + +int wc_Sha3_384_Final(wc_Sha3* sha, byte* hash) +{ + if (sha == NULL || hash == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef WOLFSSL_AFALG_HASH_KEEP + if (sha->used % 4) { + WOLFSSL_MSG("Alignment issue. Message size needs to be divisible by 4"); + return BAD_FUNC_ARG; + } +#endif + + return AfalgHashFinal(sha, hash, WC_SHA3_384_DIGEST_SIZE, WC_NAME_SHA3); +} + + +int wc_Sha3_384_GetHash(wc_Sha3* sha, byte* hash) +{ + if (sha == NULL || hash == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef WOLFSSL_AFALG_HASH_KEEP + if (sha->used % 4) { + WOLFSSL_MSG("Alignment issue. Message size needs to be divisible by 4"); + return BAD_FUNC_ARG; + } +#endif + + return AfalgHashGet(sha, hash, WC_SHA3_384_DIGEST_SIZE); +} + +int wc_Sha3_384_Copy(wc_Sha3* src, wc_Sha3* dst) +{ + return AfalgHashCopy(src, dst); +} +#endif /* WOLFSSL_SHA3 && WOLFSSL_AFALG_XILINX_SHA3 */ + +#endif /* WOLFSSL_AFALG */ diff --git a/client/wolfssl/wolfcrypt/src/port/af_alg/wc_afalg.c b/client/wolfssl/wolfcrypt/src/port/af_alg/wc_afalg.c new file mode 100644 index 0000000..0a91b51 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/af_alg/wc_afalg.c @@ -0,0 +1,141 @@ +/* wc_afalg.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include +#include +#include + +#if defined(WOLFSSL_AFALG) || defined(WOLFSSL_AFALG_XILINX) + +#include +#include + + +/* Sets the type of socket address to use */ +void wc_Afalg_SockAddr(struct sockaddr_alg* in, const char* type, const char* name) +{ + in->salg_family = AF_ALG; + XSTRNCPY((char*)in->salg_type, type, XSTRLEN(type)); + in->salg_type[XSTRLEN(type)] = '\0'; + XSTRNCPY((char*)in->salg_name, name, XSTRLEN(name)); + in->salg_name[XSTRLEN(name)] = '\0'; +} + + +/* returns the socket accepting on with success + * negative values are returned in fail cases */ +int wc_Afalg_Accept(struct sockaddr_alg* in, int inSz, int sock) +{ + if (bind(sock, (const struct sockaddr*)in, inSz) < 0) { + WOLFSSL_MSG("Failed to bind with AF_ALG"); + return WC_AFALG_SOCK_E; + } + + return accept(sock, NULL, 0); +} + + +/* creates a new AF_ALG socket and returns it + * negative values are returned in fail cases */ +int wc_Afalg_Socket(void) +{ + int sock; + + if ((sock = socket(AF_ALG, SOCK_SEQPACKET, 0)) < 0) { + WOLFSSL_MSG("Failed to get AF_ALG socket"); + return WC_AFALG_SOCK_E; + } + + return sock; +} + + +/* binds and creates the read fd */ +int wc_Afalg_CreateRead(int sock, const char* type, const char* name) +{ + struct sockaddr_alg sa = {0}; + wc_Afalg_SockAddr(&sa, type, name); + return wc_Afalg_Accept(&sa, sizeof(sa), sock); +} + + +/* sets the IV in CMSG structure, returns 0 on success */ +int wc_Afalg_SetIv(struct cmsghdr* cmsg, byte* iv, word32 ivSz) +{ + struct af_alg_iv* afIv; + + if (cmsg == NULL || iv == NULL) { + WOLFSSL_MSG("Null cmsg or iv passed in"); + return BAD_FUNC_ARG; + } + + cmsg->cmsg_level = SOL_ALG; + cmsg->cmsg_type = ALG_SET_IV; + cmsg->cmsg_len = CMSG_LEN(sizeof(struct af_alg_iv) + ivSz); + afIv = (void*)CMSG_DATA(cmsg); + afIv->ivlen = ivSz; + XMEMCPY(afIv->iv, iv, ivSz); + + return 0; +} + + +/* sets the AAD size in CMSG structure, returns 0 on success */ +int wc_Afalg_SetAad(struct cmsghdr* cmsg, word32 sz) +{ + if (cmsg == NULL) { + WOLFSSL_MSG("Null cmsg passed in"); + return BAD_FUNC_ARG; + } + + cmsg->cmsg_level = SOL_ALG; + cmsg->cmsg_type = ALG_SET_AEAD_ASSOCLEN; + cmsg->cmsg_len = CMSG_LEN(sizeof(word32)); + *((word32*)CMSG_DATA(cmsg)) = sz; + + return 0; +} + + +/* sets the operation type in CMSG structure, returns 0 on success + * + * dir 0 is encryption 1 is decryption + */ +int wc_Afalg_SetOp(struct cmsghdr* cmsg, int dir) +{ + if (cmsg == NULL) { + return BAD_FUNC_ARG; + } + + cmsg->cmsg_level = SOL_ALG; + cmsg->cmsg_type = ALG_SET_OP; + cmsg->cmsg_len = CMSG_LEN(4); + *((word32*)CMSG_DATA(cmsg)) = (dir == 1)? ALG_OP_DECRYPT : ALG_OP_ENCRYPT; + + return 0; +} + +#endif /* !NO_AES && WOLFSSL_AFALG */ + diff --git a/client/wolfssl/wolfcrypt/src/port/arm/armv8-32-curve25519.S b/client/wolfssl/wolfcrypt/src/port/arm/armv8-32-curve25519.S new file mode 100644 index 0000000..6fd1ed3 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/arm/armv8-32-curve25519.S @@ -0,0 +1,6012 @@ +/* armv8-32-curve25519 + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* Generated using (from wolfssl): + * cd ../scripts + * ruby ./x25519/x25519.rb arm32 ../wolfssl/wolfcrypt/src/port/arm/armv8-32-curve25519.S + */ + +#ifdef WOLFSSL_ARMASM +#ifndef __aarch64__ + .text + .align 2 + .globl fe_init + .type fe_init, %function +fe_init: + bx lr + .size fe_init,.-fe_init + .text + .align 2 + .globl fe_frombytes + .type fe_frombytes, %function +fe_frombytes: + push {r4, r5, r6, r7, lr} + ldrd r2, r3, [r1] + ldr r12, [r1, #8] + ldr lr, [r1, #12] + ldrd r4, r5, [r1, #16] + ldrd r6, r7, [r1, #24] + and r7, r7, #0x7fffffff + strd r2, r3, [r0] + str r12, [r0, #8] + str lr, [r0, #12] + strd r4, r5, [r0, #16] + strd r6, r7, [r0, #24] + pop {r4, r5, r6, r7, pc} + .size fe_frombytes,.-fe_frombytes + .text + .align 2 + .globl fe_tobytes + .type fe_tobytes, %function +fe_tobytes: + push {r4, r5, r6, r7, r8, lr} + ldrd r2, r3, [r1] + ldr r12, [r1, #8] + ldr lr, [r1, #12] + ldrd r4, r5, [r1, #16] + ldrd r6, r7, [r1, #24] + adds r8, r2, #19 + adcs r8, r3, #0 + adcs r8, r12, #0 + adcs r8, lr, #0 + adcs r8, r4, #0 + adcs r8, r5, #0 + adcs r8, r6, #0 + adc r8, r7, #0 + asr r8, r8, #31 + and r8, r8, #19 + adds r2, r2, r8 + adcs r3, r3, #0 + adcs r12, r12, #0 + adcs lr, lr, #0 + adcs r4, r4, #0 + adcs r5, r5, #0 + adcs r6, r6, #0 + adc r7, r7, #0 + and r7, r7, #0x7fffffff + strd r2, r3, [r0] + str r12, [r0, #8] + str lr, [r0, #12] + strd r4, r5, [r0, #16] + strd r6, r7, [r0, #24] + pop {r4, r5, r6, r7, r8, pc} + .size fe_tobytes,.-fe_tobytes + .text + .align 2 + .globl fe_1 + .type fe_1, %function +fe_1: + # Set one + mov r2, #1 + mov r1, #0 + str r2, [r0] + str r1, [r0, #4] + str r1, [r0, #8] + str r1, [r0, #12] + str r1, [r0, #16] + str r1, [r0, #20] + str r1, [r0, #24] + str r1, [r0, #28] + bx lr + .size fe_1,.-fe_1 + .text + .align 2 + .globl fe_0 + .type fe_0, %function +fe_0: + # Set zero + mov r1, #0 + str r1, [r0] + str r1, [r0, #4] + str r1, [r0, #8] + str r1, [r0, #12] + str r1, [r0, #16] + str r1, [r0, #20] + str r1, [r0, #24] + str r1, [r0, #28] + bx lr + .size fe_0,.-fe_0 + .text + .align 2 + .globl fe_copy + .type fe_copy, %function +fe_copy: + push {lr} + # Copy + ldrd r2, r3, [r1] + ldr r12, [r1, #8] + ldr lr, [r1, #12] + strd r2, r3, [r0] + str r12, [r0, #8] + str lr, [r0, #12] + ldrd r2, r3, [r1, #16] + ldr r12, [r1, #24] + ldr lr, [r1, #28] + strd r2, r3, [r0, #16] + str r12, [r0, #24] + str lr, [r0, #28] + pop {pc} + .size fe_copy,.-fe_copy + .text + .align 2 + .globl fe_sub + .type fe_sub, %function +fe_sub: + push {r4, r5, r6, r7, r8, r9, r10, r11, lr} + # Sub + ldr r12, [r1] + ldr lr, [r1, #4] + ldrd r4, r5, [r1, #8] + ldrd r6, r7, [r2] + ldrd r8, r9, [r2, #8] + subs r6, r12, r6 + sbcs r7, lr, r7 + sbcs r8, r4, r8 + sbcs r9, r5, r9 + strd r6, r7, [r0] + strd r8, r9, [r0, #8] + ldr r12, [r1, #16] + ldr lr, [r1, #20] + ldrd r4, r5, [r1, #24] + ldrd r6, r7, [r2, #16] + ldrd r8, r9, [r2, #24] + sbcs r6, r12, r6 + sbcs r7, lr, r7 + sbcs r8, r4, r8 + sbc r9, r5, r9 + mov r10, #-19 + asr r3, r9, #31 + # Mask the modulus + and r10, r3, r10 + and r11, r3, #0x7fffffff + # Add modulus (if underflow) + ldr r12, [r0] + ldr lr, [r0, #4] + ldrd r4, r5, [r0, #8] + adds r12, r12, r10 + adcs lr, lr, r3 + adcs r4, r4, r3 + adcs r5, r5, r3 + adcs r6, r6, r3 + adcs r7, r7, r3 + adcs r8, r8, r3 + adc r9, r9, r11 + str r12, [r0] + str lr, [r0, #4] + strd r4, r5, [r0, #8] + strd r6, r7, [r0, #16] + strd r8, r9, [r0, #24] + pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + .size fe_sub,.-fe_sub + .text + .align 2 + .globl fe_add + .type fe_add, %function +fe_add: + push {r4, r5, r6, r7, r8, r9, r10, r11, lr} + # Add + ldr r12, [r1] + ldr lr, [r1, #4] + ldrd r4, r5, [r1, #8] + ldrd r6, r7, [r2] + ldrd r8, r9, [r2, #8] + adds r6, r12, r6 + adcs r7, lr, r7 + adcs r8, r4, r8 + adcs r9, r5, r9 + strd r6, r7, [r0] + strd r8, r9, [r0, #8] + ldr r12, [r1, #16] + ldr lr, [r1, #20] + ldrd r4, r5, [r1, #24] + ldrd r6, r7, [r2, #16] + ldrd r8, r9, [r2, #24] + adcs r6, r12, r6 + adcs r7, lr, r7 + adcs r8, r4, r8 + adc r9, r5, r9 + mov r10, #-19 + asr r3, r9, #31 + # Mask the modulus + and r10, r3, r10 + and r11, r3, #0x7fffffff + # Sub modulus (if overflow) + ldr r12, [r0] + ldr lr, [r0, #4] + ldrd r4, r5, [r0, #8] + subs r12, r12, r10 + sbcs lr, lr, r3 + sbcs r4, r4, r3 + sbcs r5, r5, r3 + sbcs r6, r6, r3 + sbcs r7, r7, r3 + sbcs r8, r8, r3 + sbc r9, r9, r11 + str r12, [r0] + str lr, [r0, #4] + strd r4, r5, [r0, #8] + strd r6, r7, [r0, #16] + strd r8, r9, [r0, #24] + pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + .size fe_add,.-fe_add + .text + .align 2 + .globl fe_neg + .type fe_neg, %function +fe_neg: + push {r4, r5, lr} + mov r5, #-1 + mov r4, #-19 + ldrd r2, r3, [r1] + ldr r12, [r1, #8] + ldr lr, [r1, #12] + subs r2, r4, r2 + sbcs r3, r5, r3 + sbcs r12, r5, r12 + sbcs lr, r5, lr + strd r2, r3, [r0] + str r12, [r0, #8] + str lr, [r0, #12] + mov r4, #0x7fffffff + ldrd r2, r3, [r1, #16] + ldr r12, [r1, #24] + ldr lr, [r1, #28] + sbcs r2, r5, r2 + sbcs r3, r5, r3 + sbcs r12, r5, r12 + sbc lr, r4, lr + strd r2, r3, [r0, #16] + str r12, [r0, #24] + str lr, [r0, #28] + pop {r4, r5, pc} + .size fe_neg,.-fe_neg + .text + .align 2 + .globl fe_isnonzero + .type fe_isnonzero, %function +fe_isnonzero: + push {r4, r5, r6, r7, r8, lr} + ldrd r2, r3, [r0] + ldr r12, [r0, #8] + ldr lr, [r0, #12] + ldrd r4, r5, [r0, #16] + ldrd r6, r7, [r0, #24] + adds r1, r2, #19 + adcs r1, r3, #0 + adcs r1, r12, #0 + adcs r1, lr, #0 + adcs r1, r4, #0 + adcs r1, r5, #0 + adcs r1, r6, #0 + adc r1, r7, #0 + asr r1, r1, #31 + and r1, r1, #19 + adds r2, r2, r1 + adcs r3, r3, #0 + adcs r12, r12, #0 + adcs lr, lr, #0 + adcs r4, r4, #0 + adcs r5, r5, #0 + adcs r6, r6, #0 + adc r7, r7, #0 + and r7, r7, #0x7fffffff + orr r2, r2, r3 + orr r12, r12, lr + orr r4, r4, r5 + orr r6, r6, r7 + orr r12, r12, r4 + orr r2, r2, r6 + orr r0, r2, r12 + pop {r4, r5, r6, r7, r8, pc} + .size fe_isnonzero,.-fe_isnonzero + .text + .align 2 + .globl fe_isnegative + .type fe_isnegative, %function +fe_isnegative: + push {lr} + ldrd r2, r3, [r0] + ldr r12, [r0, #8] + ldr lr, [r0, #12] + adds r1, r2, #19 + adcs r1, r3, #0 + adcs r1, r12, #0 + adcs r1, lr, #0 + ldrd r2, r3, [r0, #16] + ldr r12, [r0, #24] + ldr lr, [r0, #28] + adcs r1, r2, #0 + adcs r1, r3, #0 + adcs r1, r12, #0 + ldr r2, [r0] + adc r1, lr, #0 + and r0, r2, #1 + lsr r1, r1, #31 + eor r0, r0, r1 + pop {pc} + .size fe_isnegative,.-fe_isnegative + .text + .align 2 + .globl fe_cmov_table + .type fe_cmov_table, %function +fe_cmov_table: + push {r4, r5, r6, r7, r8, r9, r10, r11, lr} + sxtb r2, r2 + sbfx r7, r2, #7, #1 + eor r10, r2, r7 + sub r10, r10, r7 + mov r3, #1 + mov r12, #0 + mov lr, #1 + mov r4, #0 + mov r5, #0 + mov r6, #0 + mov r7, #0x80000000 + ror r7, r7, #31 + ror r7, r7, r10 + asr r7, r7, #31 + ldrd r8, r9, [r1] + eor r8, r8, r3 + eor r9, r9, r12 + and r8, r8, r7 + and r9, r9, r7 + eor r3, r3, r8 + eor r12, r12, r9 + ldrd r8, r9, [r1, #32] + eor r8, r8, lr + eor r9, r9, r4 + and r8, r8, r7 + and r9, r9, r7 + eor lr, lr, r8 + eor r4, r4, r9 + ldrd r8, r9, [r1, #64] + eor r8, r8, r5 + eor r9, r9, r6 + and r8, r8, r7 + and r9, r9, r7 + eor r5, r5, r8 + eor r6, r6, r9 + add r1, r1, #0x60 + mov r7, #0x80000000 + ror r7, r7, #30 + ror r7, r7, r10 + asr r7, r7, #31 + ldrd r8, r9, [r1] + eor r8, r8, r3 + eor r9, r9, r12 + and r8, r8, r7 + and r9, r9, r7 + eor r3, r3, r8 + eor r12, r12, r9 + ldrd r8, r9, [r1, #32] + eor r8, r8, lr + eor r9, r9, r4 + and r8, r8, r7 + and r9, r9, r7 + eor lr, lr, r8 + eor r4, r4, r9 + ldrd r8, r9, [r1, #64] + eor r8, r8, r5 + eor r9, r9, r6 + and r8, r8, r7 + and r9, r9, r7 + eor r5, r5, r8 + eor r6, r6, r9 + add r1, r1, #0x60 + mov r7, #0x80000000 + ror r7, r7, #29 + ror r7, r7, r10 + asr r7, r7, #31 + ldrd r8, r9, [r1] + eor r8, r8, r3 + eor r9, r9, r12 + and r8, r8, r7 + and r9, r9, r7 + eor r3, r3, r8 + eor r12, r12, r9 + ldrd r8, r9, [r1, #32] + eor r8, r8, lr + eor r9, r9, r4 + and r8, r8, r7 + and r9, r9, r7 + eor lr, lr, r8 + eor r4, r4, r9 + ldrd r8, r9, [r1, #64] + eor r8, r8, r5 + eor r9, r9, r6 + and r8, r8, r7 + and r9, r9, r7 + eor r5, r5, r8 + eor r6, r6, r9 + add r1, r1, #0x60 + mov r7, #0x80000000 + ror r7, r7, #28 + ror r7, r7, r10 + asr r7, r7, #31 + ldrd r8, r9, [r1] + eor r8, r8, r3 + eor r9, r9, r12 + and r8, r8, r7 + and r9, r9, r7 + eor r3, r3, r8 + eor r12, r12, r9 + ldrd r8, r9, [r1, #32] + eor r8, r8, lr + eor r9, r9, r4 + and r8, r8, r7 + and r9, r9, r7 + eor lr, lr, r8 + eor r4, r4, r9 + ldrd r8, r9, [r1, #64] + eor r8, r8, r5 + eor r9, r9, r6 + and r8, r8, r7 + and r9, r9, r7 + eor r5, r5, r8 + eor r6, r6, r9 + add r1, r1, #0x60 + mov r7, #0x80000000 + ror r7, r7, #27 + ror r7, r7, r10 + asr r7, r7, #31 + ldrd r8, r9, [r1] + eor r8, r8, r3 + eor r9, r9, r12 + and r8, r8, r7 + and r9, r9, r7 + eor r3, r3, r8 + eor r12, r12, r9 + ldrd r8, r9, [r1, #32] + eor r8, r8, lr + eor r9, r9, r4 + and r8, r8, r7 + and r9, r9, r7 + eor lr, lr, r8 + eor r4, r4, r9 + ldrd r8, r9, [r1, #64] + eor r8, r8, r5 + eor r9, r9, r6 + and r8, r8, r7 + and r9, r9, r7 + eor r5, r5, r8 + eor r6, r6, r9 + add r1, r1, #0x60 + mov r7, #0x80000000 + ror r7, r7, #26 + ror r7, r7, r10 + asr r7, r7, #31 + ldrd r8, r9, [r1] + eor r8, r8, r3 + eor r9, r9, r12 + and r8, r8, r7 + and r9, r9, r7 + eor r3, r3, r8 + eor r12, r12, r9 + ldrd r8, r9, [r1, #32] + eor r8, r8, lr + eor r9, r9, r4 + and r8, r8, r7 + and r9, r9, r7 + eor lr, lr, r8 + eor r4, r4, r9 + ldrd r8, r9, [r1, #64] + eor r8, r8, r5 + eor r9, r9, r6 + and r8, r8, r7 + and r9, r9, r7 + eor r5, r5, r8 + eor r6, r6, r9 + add r1, r1, #0x60 + mov r7, #0x80000000 + ror r7, r7, #25 + ror r7, r7, r10 + asr r7, r7, #31 + ldrd r8, r9, [r1] + eor r8, r8, r3 + eor r9, r9, r12 + and r8, r8, r7 + and r9, r9, r7 + eor r3, r3, r8 + eor r12, r12, r9 + ldrd r8, r9, [r1, #32] + eor r8, r8, lr + eor r9, r9, r4 + and r8, r8, r7 + and r9, r9, r7 + eor lr, lr, r8 + eor r4, r4, r9 + ldrd r8, r9, [r1, #64] + eor r8, r8, r5 + eor r9, r9, r6 + and r8, r8, r7 + and r9, r9, r7 + eor r5, r5, r8 + eor r6, r6, r9 + add r1, r1, #0x60 + mov r7, #0x80000000 + ror r7, r7, #24 + ror r7, r7, r10 + asr r7, r7, #31 + ldrd r8, r9, [r1] + eor r8, r8, r3 + eor r9, r9, r12 + and r8, r8, r7 + and r9, r9, r7 + eor r3, r3, r8 + eor r12, r12, r9 + ldrd r8, r9, [r1, #32] + eor r8, r8, lr + eor r9, r9, r4 + and r8, r8, r7 + and r9, r9, r7 + eor lr, lr, r8 + eor r4, r4, r9 + ldrd r8, r9, [r1, #64] + eor r8, r8, r5 + eor r9, r9, r6 + and r8, r8, r7 + and r9, r9, r7 + eor r5, r5, r8 + eor r6, r6, r9 + sub r1, r1, #0x2a0 + mov r8, #-19 + mov r9, #-1 + subs r8, r8, r5 + sbcs r9, r9, r6 + sbc r11, r11, r11 + asr r10, r2, #31 + eor r7, r3, lr + and r7, r7, r10 + eor r3, r3, r7 + eor lr, lr, r7 + eor r7, r12, r4 + and r7, r7, r10 + eor r12, r12, r7 + eor r4, r4, r7 + eor r8, r8, r5 + and r8, r8, r10 + eor r5, r5, r8 + eor r9, r9, r6 + and r9, r9, r10 + eor r6, r6, r9 + str r3, [r0] + str r12, [r0, #4] + str lr, [r0, #32] + str r4, [r0, #36] + str r5, [r0, #64] + str r6, [r0, #68] + sbfx r7, r2, #7, #1 + eor r10, r2, r7 + sub r10, r10, r7 + mov r3, #0 + mov r12, #0 + mov lr, #0 + mov r4, #0 + mov r5, #0 + mov r6, #0 + mov r7, #0x80000000 + ror r7, r7, #31 + ror r7, r7, r10 + asr r7, r7, #31 + ldrd r8, r9, [r1, #8] + eor r8, r8, r3 + eor r9, r9, r12 + and r8, r8, r7 + and r9, r9, r7 + eor r3, r3, r8 + eor r12, r12, r9 + ldrd r8, r9, [r1, #40] + eor r8, r8, lr + eor r9, r9, r4 + and r8, r8, r7 + and r9, r9, r7 + eor lr, lr, r8 + eor r4, r4, r9 + ldrd r8, r9, [r1, #72] + eor r8, r8, r5 + eor r9, r9, r6 + and r8, r8, r7 + and r9, r9, r7 + eor r5, r5, r8 + eor r6, r6, r9 + add r1, r1, #0x60 + mov r7, #0x80000000 + ror r7, r7, #30 + ror r7, r7, r10 + asr r7, r7, #31 + ldrd r8, r9, [r1, #8] + eor r8, r8, r3 + eor r9, r9, r12 + and r8, r8, r7 + and r9, r9, r7 + eor r3, r3, r8 + eor r12, r12, r9 + ldrd r8, r9, [r1, #40] + eor r8, r8, lr + eor r9, r9, r4 + and r8, r8, r7 + and r9, r9, r7 + eor lr, lr, r8 + eor r4, r4, r9 + ldrd r8, r9, [r1, #72] + eor r8, r8, r5 + eor r9, r9, r6 + and r8, r8, r7 + and r9, r9, r7 + eor r5, r5, r8 + eor r6, r6, r9 + add r1, r1, #0x60 + mov r7, #0x80000000 + ror r7, r7, #29 + ror r7, r7, r10 + asr r7, r7, #31 + ldrd r8, r9, [r1, #8] + eor r8, r8, r3 + eor r9, r9, r12 + and r8, r8, r7 + and r9, r9, r7 + eor r3, r3, r8 + eor r12, r12, r9 + ldrd r8, r9, [r1, #40] + eor r8, r8, lr + eor r9, r9, r4 + and r8, r8, r7 + and r9, r9, r7 + eor lr, lr, r8 + eor r4, r4, r9 + ldrd r8, r9, [r1, #72] + eor r8, r8, r5 + eor r9, r9, r6 + and r8, r8, r7 + and r9, r9, r7 + eor r5, r5, r8 + eor r6, r6, r9 + add r1, r1, #0x60 + mov r7, #0x80000000 + ror r7, r7, #28 + ror r7, r7, r10 + asr r7, r7, #31 + ldrd r8, r9, [r1, #8] + eor r8, r8, r3 + eor r9, r9, r12 + and r8, r8, r7 + and r9, r9, r7 + eor r3, r3, r8 + eor r12, r12, r9 + ldrd r8, r9, [r1, #40] + eor r8, r8, lr + eor r9, r9, r4 + and r8, r8, r7 + and r9, r9, r7 + eor lr, lr, r8 + eor r4, r4, r9 + ldrd r8, r9, [r1, #72] + eor r8, r8, r5 + eor r9, r9, r6 + and r8, r8, r7 + and r9, r9, r7 + eor r5, r5, r8 + eor r6, r6, r9 + add r1, r1, #0x60 + mov r7, #0x80000000 + ror r7, r7, #27 + ror r7, r7, r10 + asr r7, r7, #31 + ldrd r8, r9, [r1, #8] + eor r8, r8, r3 + eor r9, r9, r12 + and r8, r8, r7 + and r9, r9, r7 + eor r3, r3, r8 + eor r12, r12, r9 + ldrd r8, r9, [r1, #40] + eor r8, r8, lr + eor r9, r9, r4 + and r8, r8, r7 + and r9, r9, r7 + eor lr, lr, r8 + eor r4, r4, r9 + ldrd r8, r9, [r1, #72] + eor r8, r8, r5 + eor r9, r9, r6 + and r8, r8, r7 + and r9, r9, r7 + eor r5, r5, r8 + eor r6, r6, r9 + add r1, r1, #0x60 + mov r7, #0x80000000 + ror r7, r7, #26 + ror r7, r7, r10 + asr r7, r7, #31 + ldrd r8, r9, [r1, #8] + eor r8, r8, r3 + eor r9, r9, r12 + and r8, r8, r7 + and r9, r9, r7 + eor r3, r3, r8 + eor r12, r12, r9 + ldrd r8, r9, [r1, #40] + eor r8, r8, lr + eor r9, r9, r4 + and r8, r8, r7 + and r9, r9, r7 + eor lr, lr, r8 + eor r4, r4, r9 + ldrd r8, r9, [r1, #72] + eor r8, r8, r5 + eor r9, r9, r6 + and r8, r8, r7 + and r9, r9, r7 + eor r5, r5, r8 + eor r6, r6, r9 + add r1, r1, #0x60 + mov r7, #0x80000000 + ror r7, r7, #25 + ror r7, r7, r10 + asr r7, r7, #31 + ldrd r8, r9, [r1, #8] + eor r8, r8, r3 + eor r9, r9, r12 + and r8, r8, r7 + and r9, r9, r7 + eor r3, r3, r8 + eor r12, r12, r9 + ldrd r8, r9, [r1, #40] + eor r8, r8, lr + eor r9, r9, r4 + and r8, r8, r7 + and r9, r9, r7 + eor lr, lr, r8 + eor r4, r4, r9 + ldrd r8, r9, [r1, #72] + eor r8, r8, r5 + eor r9, r9, r6 + and r8, r8, r7 + and r9, r9, r7 + eor r5, r5, r8 + eor r6, r6, r9 + add r1, r1, #0x60 + mov r7, #0x80000000 + ror r7, r7, #24 + ror r7, r7, r10 + asr r7, r7, #31 + ldrd r8, r9, [r1, #8] + eor r8, r8, r3 + eor r9, r9, r12 + and r8, r8, r7 + and r9, r9, r7 + eor r3, r3, r8 + eor r12, r12, r9 + ldrd r8, r9, [r1, #40] + eor r8, r8, lr + eor r9, r9, r4 + and r8, r8, r7 + and r9, r9, r7 + eor lr, lr, r8 + eor r4, r4, r9 + ldrd r8, r9, [r1, #72] + eor r8, r8, r5 + eor r9, r9, r6 + and r8, r8, r7 + and r9, r9, r7 + eor r5, r5, r8 + eor r6, r6, r9 + sub r1, r1, #0x2a0 + mov r8, #-1 + mov r9, #-1 + rsbs r11, r11, #0 + sbcs r8, r8, r5 + sbcs r9, r9, r6 + sbc r11, r11, r11 + asr r10, r2, #31 + eor r7, r3, lr + and r7, r7, r10 + eor r3, r3, r7 + eor lr, lr, r7 + eor r7, r12, r4 + and r7, r7, r10 + eor r12, r12, r7 + eor r4, r4, r7 + eor r8, r8, r5 + and r8, r8, r10 + eor r5, r5, r8 + eor r9, r9, r6 + and r9, r9, r10 + eor r6, r6, r9 + str r3, [r0, #8] + str r12, [r0, #12] + str lr, [r0, #40] + str r4, [r0, #44] + str r5, [r0, #72] + str r6, [r0, #76] + sbfx r7, r2, #7, #1 + eor r10, r2, r7 + sub r10, r10, r7 + mov r3, #0 + mov r12, #0 + mov lr, #0 + mov r4, #0 + mov r5, #0 + mov r6, #0 + mov r7, #0x80000000 + ror r7, r7, #31 + ror r7, r7, r10 + asr r7, r7, #31 + ldrd r8, r9, [r1, #16] + eor r8, r8, r3 + eor r9, r9, r12 + and r8, r8, r7 + and r9, r9, r7 + eor r3, r3, r8 + eor r12, r12, r9 + ldrd r8, r9, [r1, #48] + eor r8, r8, lr + eor r9, r9, r4 + and r8, r8, r7 + and r9, r9, r7 + eor lr, lr, r8 + eor r4, r4, r9 + ldrd r8, r9, [r1, #80] + eor r8, r8, r5 + eor r9, r9, r6 + and r8, r8, r7 + and r9, r9, r7 + eor r5, r5, r8 + eor r6, r6, r9 + add r1, r1, #0x60 + mov r7, #0x80000000 + ror r7, r7, #30 + ror r7, r7, r10 + asr r7, r7, #31 + ldrd r8, r9, [r1, #16] + eor r8, r8, r3 + eor r9, r9, r12 + and r8, r8, r7 + and r9, r9, r7 + eor r3, r3, r8 + eor r12, r12, r9 + ldrd r8, r9, [r1, #48] + eor r8, r8, lr + eor r9, r9, r4 + and r8, r8, r7 + and r9, r9, r7 + eor lr, lr, r8 + eor r4, r4, r9 + ldrd r8, r9, [r1, #80] + eor r8, r8, r5 + eor r9, r9, r6 + and r8, r8, r7 + and r9, r9, r7 + eor r5, r5, r8 + eor r6, r6, r9 + add r1, r1, #0x60 + mov r7, #0x80000000 + ror r7, r7, #29 + ror r7, r7, r10 + asr r7, r7, #31 + ldrd r8, r9, [r1, #16] + eor r8, r8, r3 + eor r9, r9, r12 + and r8, r8, r7 + and r9, r9, r7 + eor r3, r3, r8 + eor r12, r12, r9 + ldrd r8, r9, [r1, #48] + eor r8, r8, lr + eor r9, r9, r4 + and r8, r8, r7 + and r9, r9, r7 + eor lr, lr, r8 + eor r4, r4, r9 + ldrd r8, r9, [r1, #80] + eor r8, r8, r5 + eor r9, r9, r6 + and r8, r8, r7 + and r9, r9, r7 + eor r5, r5, r8 + eor r6, r6, r9 + add r1, r1, #0x60 + mov r7, #0x80000000 + ror r7, r7, #28 + ror r7, r7, r10 + asr r7, r7, #31 + ldrd r8, r9, [r1, #16] + eor r8, r8, r3 + eor r9, r9, r12 + and r8, r8, r7 + and r9, r9, r7 + eor r3, r3, r8 + eor r12, r12, r9 + ldrd r8, r9, [r1, #48] + eor r8, r8, lr + eor r9, r9, r4 + and r8, r8, r7 + and r9, r9, r7 + eor lr, lr, r8 + eor r4, r4, r9 + ldrd r8, r9, [r1, #80] + eor r8, r8, r5 + eor r9, r9, r6 + and r8, r8, r7 + and r9, r9, r7 + eor r5, r5, r8 + eor r6, r6, r9 + add r1, r1, #0x60 + mov r7, #0x80000000 + ror r7, r7, #27 + ror r7, r7, r10 + asr r7, r7, #31 + ldrd r8, r9, [r1, #16] + eor r8, r8, r3 + eor r9, r9, r12 + and r8, r8, r7 + and r9, r9, r7 + eor r3, r3, r8 + eor r12, r12, r9 + ldrd r8, r9, [r1, #48] + eor r8, r8, lr + eor r9, r9, r4 + and r8, r8, r7 + and r9, r9, r7 + eor lr, lr, r8 + eor r4, r4, r9 + ldrd r8, r9, [r1, #80] + eor r8, r8, r5 + eor r9, r9, r6 + and r8, r8, r7 + and r9, r9, r7 + eor r5, r5, r8 + eor r6, r6, r9 + add r1, r1, #0x60 + mov r7, #0x80000000 + ror r7, r7, #26 + ror r7, r7, r10 + asr r7, r7, #31 + ldrd r8, r9, [r1, #16] + eor r8, r8, r3 + eor r9, r9, r12 + and r8, r8, r7 + and r9, r9, r7 + eor r3, r3, r8 + eor r12, r12, r9 + ldrd r8, r9, [r1, #48] + eor r8, r8, lr + eor r9, r9, r4 + and r8, r8, r7 + and r9, r9, r7 + eor lr, lr, r8 + eor r4, r4, r9 + ldrd r8, r9, [r1, #80] + eor r8, r8, r5 + eor r9, r9, r6 + and r8, r8, r7 + and r9, r9, r7 + eor r5, r5, r8 + eor r6, r6, r9 + add r1, r1, #0x60 + mov r7, #0x80000000 + ror r7, r7, #25 + ror r7, r7, r10 + asr r7, r7, #31 + ldrd r8, r9, [r1, #16] + eor r8, r8, r3 + eor r9, r9, r12 + and r8, r8, r7 + and r9, r9, r7 + eor r3, r3, r8 + eor r12, r12, r9 + ldrd r8, r9, [r1, #48] + eor r8, r8, lr + eor r9, r9, r4 + and r8, r8, r7 + and r9, r9, r7 + eor lr, lr, r8 + eor r4, r4, r9 + ldrd r8, r9, [r1, #80] + eor r8, r8, r5 + eor r9, r9, r6 + and r8, r8, r7 + and r9, r9, r7 + eor r5, r5, r8 + eor r6, r6, r9 + add r1, r1, #0x60 + mov r7, #0x80000000 + ror r7, r7, #24 + ror r7, r7, r10 + asr r7, r7, #31 + ldrd r8, r9, [r1, #16] + eor r8, r8, r3 + eor r9, r9, r12 + and r8, r8, r7 + and r9, r9, r7 + eor r3, r3, r8 + eor r12, r12, r9 + ldrd r8, r9, [r1, #48] + eor r8, r8, lr + eor r9, r9, r4 + and r8, r8, r7 + and r9, r9, r7 + eor lr, lr, r8 + eor r4, r4, r9 + ldrd r8, r9, [r1, #80] + eor r8, r8, r5 + eor r9, r9, r6 + and r8, r8, r7 + and r9, r9, r7 + eor r5, r5, r8 + eor r6, r6, r9 + sub r1, r1, #0x2a0 + mov r8, #-1 + mov r9, #-1 + rsbs r11, r11, #0 + sbcs r8, r8, r5 + sbcs r9, r9, r6 + sbc r11, r11, r11 + asr r10, r2, #31 + eor r7, r3, lr + and r7, r7, r10 + eor r3, r3, r7 + eor lr, lr, r7 + eor r7, r12, r4 + and r7, r7, r10 + eor r12, r12, r7 + eor r4, r4, r7 + eor r8, r8, r5 + and r8, r8, r10 + eor r5, r5, r8 + eor r9, r9, r6 + and r9, r9, r10 + eor r6, r6, r9 + str r3, [r0, #16] + str r12, [r0, #20] + str lr, [r0, #48] + str r4, [r0, #52] + str r5, [r0, #80] + str r6, [r0, #84] + sbfx r7, r2, #7, #1 + eor r10, r2, r7 + sub r10, r10, r7 + mov r3, #0 + mov r12, #0 + mov lr, #0 + mov r4, #0 + mov r5, #0 + mov r6, #0 + mov r7, #0x80000000 + ror r7, r7, #31 + ror r7, r7, r10 + asr r7, r7, #31 + ldrd r8, r9, [r1, #24] + eor r8, r8, r3 + eor r9, r9, r12 + and r8, r8, r7 + and r9, r9, r7 + eor r3, r3, r8 + eor r12, r12, r9 + ldrd r8, r9, [r1, #56] + eor r8, r8, lr + eor r9, r9, r4 + and r8, r8, r7 + and r9, r9, r7 + eor lr, lr, r8 + eor r4, r4, r9 + ldrd r8, r9, [r1, #88] + eor r8, r8, r5 + eor r9, r9, r6 + and r8, r8, r7 + and r9, r9, r7 + eor r5, r5, r8 + eor r6, r6, r9 + add r1, r1, #0x60 + mov r7, #0x80000000 + ror r7, r7, #30 + ror r7, r7, r10 + asr r7, r7, #31 + ldrd r8, r9, [r1, #24] + eor r8, r8, r3 + eor r9, r9, r12 + and r8, r8, r7 + and r9, r9, r7 + eor r3, r3, r8 + eor r12, r12, r9 + ldrd r8, r9, [r1, #56] + eor r8, r8, lr + eor r9, r9, r4 + and r8, r8, r7 + and r9, r9, r7 + eor lr, lr, r8 + eor r4, r4, r9 + ldrd r8, r9, [r1, #88] + eor r8, r8, r5 + eor r9, r9, r6 + and r8, r8, r7 + and r9, r9, r7 + eor r5, r5, r8 + eor r6, r6, r9 + add r1, r1, #0x60 + mov r7, #0x80000000 + ror r7, r7, #29 + ror r7, r7, r10 + asr r7, r7, #31 + ldrd r8, r9, [r1, #24] + eor r8, r8, r3 + eor r9, r9, r12 + and r8, r8, r7 + and r9, r9, r7 + eor r3, r3, r8 + eor r12, r12, r9 + ldrd r8, r9, [r1, #56] + eor r8, r8, lr + eor r9, r9, r4 + and r8, r8, r7 + and r9, r9, r7 + eor lr, lr, r8 + eor r4, r4, r9 + ldrd r8, r9, [r1, #88] + eor r8, r8, r5 + eor r9, r9, r6 + and r8, r8, r7 + and r9, r9, r7 + eor r5, r5, r8 + eor r6, r6, r9 + add r1, r1, #0x60 + mov r7, #0x80000000 + ror r7, r7, #28 + ror r7, r7, r10 + asr r7, r7, #31 + ldrd r8, r9, [r1, #24] + eor r8, r8, r3 + eor r9, r9, r12 + and r8, r8, r7 + and r9, r9, r7 + eor r3, r3, r8 + eor r12, r12, r9 + ldrd r8, r9, [r1, #56] + eor r8, r8, lr + eor r9, r9, r4 + and r8, r8, r7 + and r9, r9, r7 + eor lr, lr, r8 + eor r4, r4, r9 + ldrd r8, r9, [r1, #88] + eor r8, r8, r5 + eor r9, r9, r6 + and r8, r8, r7 + and r9, r9, r7 + eor r5, r5, r8 + eor r6, r6, r9 + add r1, r1, #0x60 + mov r7, #0x80000000 + ror r7, r7, #27 + ror r7, r7, r10 + asr r7, r7, #31 + ldrd r8, r9, [r1, #24] + eor r8, r8, r3 + eor r9, r9, r12 + and r8, r8, r7 + and r9, r9, r7 + eor r3, r3, r8 + eor r12, r12, r9 + ldrd r8, r9, [r1, #56] + eor r8, r8, lr + eor r9, r9, r4 + and r8, r8, r7 + and r9, r9, r7 + eor lr, lr, r8 + eor r4, r4, r9 + ldrd r8, r9, [r1, #88] + eor r8, r8, r5 + eor r9, r9, r6 + and r8, r8, r7 + and r9, r9, r7 + eor r5, r5, r8 + eor r6, r6, r9 + add r1, r1, #0x60 + mov r7, #0x80000000 + ror r7, r7, #26 + ror r7, r7, r10 + asr r7, r7, #31 + ldrd r8, r9, [r1, #24] + eor r8, r8, r3 + eor r9, r9, r12 + and r8, r8, r7 + and r9, r9, r7 + eor r3, r3, r8 + eor r12, r12, r9 + ldrd r8, r9, [r1, #56] + eor r8, r8, lr + eor r9, r9, r4 + and r8, r8, r7 + and r9, r9, r7 + eor lr, lr, r8 + eor r4, r4, r9 + ldrd r8, r9, [r1, #88] + eor r8, r8, r5 + eor r9, r9, r6 + and r8, r8, r7 + and r9, r9, r7 + eor r5, r5, r8 + eor r6, r6, r9 + add r1, r1, #0x60 + mov r7, #0x80000000 + ror r7, r7, #25 + ror r7, r7, r10 + asr r7, r7, #31 + ldrd r8, r9, [r1, #24] + eor r8, r8, r3 + eor r9, r9, r12 + and r8, r8, r7 + and r9, r9, r7 + eor r3, r3, r8 + eor r12, r12, r9 + ldrd r8, r9, [r1, #56] + eor r8, r8, lr + eor r9, r9, r4 + and r8, r8, r7 + and r9, r9, r7 + eor lr, lr, r8 + eor r4, r4, r9 + ldrd r8, r9, [r1, #88] + eor r8, r8, r5 + eor r9, r9, r6 + and r8, r8, r7 + and r9, r9, r7 + eor r5, r5, r8 + eor r6, r6, r9 + add r1, r1, #0x60 + mov r7, #0x80000000 + ror r7, r7, #24 + ror r7, r7, r10 + asr r7, r7, #31 + ldrd r8, r9, [r1, #24] + eor r8, r8, r3 + eor r9, r9, r12 + and r8, r8, r7 + and r9, r9, r7 + eor r3, r3, r8 + eor r12, r12, r9 + ldrd r8, r9, [r1, #56] + eor r8, r8, lr + eor r9, r9, r4 + and r8, r8, r7 + and r9, r9, r7 + eor lr, lr, r8 + eor r4, r4, r9 + ldrd r8, r9, [r1, #88] + eor r8, r8, r5 + eor r9, r9, r6 + and r8, r8, r7 + and r9, r9, r7 + eor r5, r5, r8 + eor r6, r6, r9 + sub r1, r1, #0x2a0 + mov r8, #-1 + mov r9, #0x7fffffff + rsbs r11, r11, #0 + sbcs r8, r8, r5 + sbc r9, r9, r6 + asr r10, r2, #31 + eor r7, r3, lr + and r7, r7, r10 + eor r3, r3, r7 + eor lr, lr, r7 + eor r7, r12, r4 + and r7, r7, r10 + eor r12, r12, r7 + eor r4, r4, r7 + eor r8, r8, r5 + and r8, r8, r10 + eor r5, r5, r8 + eor r9, r9, r6 + and r9, r9, r10 + eor r6, r6, r9 + str r3, [r0, #24] + str r12, [r0, #28] + str lr, [r0, #56] + str r4, [r0, #60] + str r5, [r0, #88] + str r6, [r0, #92] + pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + .size fe_cmov_table,.-fe_cmov_table + .text + .align 2 + .globl fe_mul + .type fe_mul, %function +fe_mul: + push {r4, r5, r6, r7, r8, r9, r10, r11, lr} + sub sp, sp, #0x40 + # Multiply + ldr r7, [r1] + ldr r8, [r1, #4] + ldr r9, [r2] + ldr lr, [r2, #4] + # A[0] * B[0] = 0 + umull r4, r5, r7, r9 + str r4, [sp] + # A[0] * B[1] = 1 + umull r3, r6, r7, lr + adds r5, r5, r3 + adc r6, r6, #0 + # A[1] * B[0] = 1 + umull r3, r12, r8, r9 + adds r5, r5, r3 + mov r4, #0 + adcs r6, r6, r12 + adc r4, r4, #0 + str r5, [sp, #4] + # A[2] * B[0] = 2 + ldr r10, [r1, #8] + umull r3, r12, r10, r9 + adds r6, r6, r3 + adc r4, r4, r12 + # A[1] * B[1] = 2 + umull r3, r12, r8, lr + adds r6, r6, r3 + mov r5, #0 + adcs r4, r4, r12 + adc r5, r5, #0 + # A[0] * B[2] = 2 + ldr r11, [r2, #8] + umull r3, r12, r7, r11 + adds r6, r6, r3 + adcs r4, r4, r12 + adc r5, r5, #0 + str r6, [sp, #8] + # A[0] * B[3] = 3 + ldr r11, [r2, #12] + umull r3, r12, r7, r11 + adds r4, r4, r3 + mov r6, #0 + adcs r5, r5, r12 + adc r6, r6, #0 + # A[1] * B[2] = 3 + ldr r11, [r2, #8] + umull r3, r12, r8, r11 + adds r4, r4, r3 + adcs r5, r5, r12 + adc r6, r6, #0 + # A[2] * B[1] = 3 + umull r3, r12, r10, lr + adds r4, r4, r3 + adcs r5, r5, r12 + adc r6, r6, #0 + # A[3] * B[0] = 3 + ldr r10, [r1, #12] + umull r3, r12, r10, r9 + adds r4, r4, r3 + adcs r5, r5, r12 + adc r6, r6, #0 + str r4, [sp, #12] + # A[4] * B[0] = 4 + ldr r10, [r1, #16] + umull r3, r12, r10, r9 + adds r5, r5, r3 + mov r4, #0 + adcs r6, r6, r12 + adc r4, r4, #0 + # A[3] * B[1] = 4 + ldr r10, [r1, #12] + umull r3, r12, r10, lr + adds r5, r5, r3 + adcs r6, r6, r12 + adc r4, r4, #0 + # A[2] * B[2] = 4 + ldr r10, [r1, #8] + umull r3, r12, r10, r11 + adds r5, r5, r3 + adcs r6, r6, r12 + adc r4, r4, #0 + # A[1] * B[3] = 4 + ldr r11, [r2, #12] + umull r3, r12, r8, r11 + adds r5, r5, r3 + adcs r6, r6, r12 + adc r4, r4, #0 + # A[0] * B[4] = 4 + ldr r11, [r2, #16] + umull r3, r12, r7, r11 + adds r5, r5, r3 + adcs r6, r6, r12 + adc r4, r4, #0 + str r5, [sp, #16] + # A[0] * B[5] = 5 + ldr r11, [r2, #20] + umull r3, r12, r7, r11 + adds r6, r6, r3 + mov r5, #0 + adcs r4, r4, r12 + adc r5, r5, #0 + # A[1] * B[4] = 5 + ldr r11, [r2, #16] + umull r3, r12, r8, r11 + adds r6, r6, r3 + adcs r4, r4, r12 + adc r5, r5, #0 + # A[2] * B[3] = 5 + ldr r11, [r2, #12] + umull r3, r12, r10, r11 + adds r6, r6, r3 + adcs r4, r4, r12 + adc r5, r5, #0 + # A[3] * B[2] = 5 + ldr r10, [r1, #12] + ldr r11, [r2, #8] + umull r3, r12, r10, r11 + adds r6, r6, r3 + adcs r4, r4, r12 + adc r5, r5, #0 + # A[4] * B[1] = 5 + ldr r10, [r1, #16] + umull r3, r12, r10, lr + adds r6, r6, r3 + adcs r4, r4, r12 + adc r5, r5, #0 + # A[5] * B[0] = 5 + ldr r10, [r1, #20] + umull r3, r12, r10, r9 + adds r6, r6, r3 + adcs r4, r4, r12 + adc r5, r5, #0 + str r6, [sp, #20] + # A[6] * B[0] = 6 + ldr r10, [r1, #24] + umull r3, r12, r10, r9 + adds r4, r4, r3 + mov r6, #0 + adcs r5, r5, r12 + adc r6, r6, #0 + # A[5] * B[1] = 6 + ldr r10, [r1, #20] + umull r3, r12, r10, lr + adds r4, r4, r3 + adcs r5, r5, r12 + adc r6, r6, #0 + # A[4] * B[2] = 6 + ldr r10, [r1, #16] + umull r3, r12, r10, r11 + adds r4, r4, r3 + adcs r5, r5, r12 + adc r6, r6, #0 + # A[3] * B[3] = 6 + ldr r10, [r1, #12] + ldr r11, [r2, #12] + umull r3, r12, r10, r11 + adds r4, r4, r3 + adcs r5, r5, r12 + adc r6, r6, #0 + # A[2] * B[4] = 6 + ldr r10, [r1, #8] + ldr r11, [r2, #16] + umull r3, r12, r10, r11 + adds r4, r4, r3 + adcs r5, r5, r12 + adc r6, r6, #0 + # A[1] * B[5] = 6 + ldr r11, [r2, #20] + umull r3, r12, r8, r11 + adds r4, r4, r3 + adcs r5, r5, r12 + adc r6, r6, #0 + # A[0] * B[6] = 6 + ldr r11, [r2, #24] + umull r3, r12, r7, r11 + adds r4, r4, r3 + adcs r5, r5, r12 + adc r6, r6, #0 + str r4, [sp, #24] + # A[0] * B[7] = 7 + ldr r11, [r2, #28] + umull r3, r12, r7, r11 + adds r5, r5, r3 + mov r4, #0 + adcs r6, r6, r12 + adc r4, r4, #0 + # A[1] * B[6] = 7 + ldr r11, [r2, #24] + umull r3, r12, r8, r11 + adds r5, r5, r3 + adcs r6, r6, r12 + adc r4, r4, #0 + # A[2] * B[5] = 7 + ldr r11, [r2, #20] + umull r3, r12, r10, r11 + adds r5, r5, r3 + adcs r6, r6, r12 + adc r4, r4, #0 + # A[3] * B[4] = 7 + ldr r10, [r1, #12] + ldr r11, [r2, #16] + umull r3, r12, r10, r11 + adds r5, r5, r3 + adcs r6, r6, r12 + adc r4, r4, #0 + # A[4] * B[3] = 7 + ldr r10, [r1, #16] + ldr r11, [r2, #12] + umull r3, r12, r10, r11 + adds r5, r5, r3 + adcs r6, r6, r12 + adc r4, r4, #0 + # A[5] * B[2] = 7 + ldr r10, [r1, #20] + ldr r11, [r2, #8] + umull r3, r12, r10, r11 + adds r5, r5, r3 + adcs r6, r6, r12 + adc r4, r4, #0 + # A[6] * B[1] = 7 + ldr r10, [r1, #24] + umull r3, r12, r10, lr + adds r5, r5, r3 + adcs r6, r6, r12 + adc r4, r4, #0 + # A[7] * B[0] = 7 + ldr r10, [r1, #28] + umull r3, r12, r10, r9 + adds r5, r5, r3 + adcs r6, r6, r12 + adc r4, r4, #0 + str r5, [sp, #28] + ldr r7, [r1, #24] + ldr r9, [r2, #24] + # A[7] * B[1] = 8 + umull r3, r12, r10, lr + adds r6, r6, r3 + mov r5, #0 + adcs r4, r4, r12 + adc r5, r5, #0 + # A[6] * B[2] = 8 + umull r3, r12, r7, r11 + adds r6, r6, r3 + adcs r4, r4, r12 + adc r5, r5, #0 + # A[5] * B[3] = 8 + ldr r10, [r1, #20] + ldr r11, [r2, #12] + umull r3, r12, r10, r11 + adds r6, r6, r3 + adcs r4, r4, r12 + adc r5, r5, #0 + # A[4] * B[4] = 8 + ldr r10, [r1, #16] + ldr r11, [r2, #16] + umull r3, r12, r10, r11 + adds r6, r6, r3 + adcs r4, r4, r12 + adc r5, r5, #0 + # A[3] * B[5] = 8 + ldr r10, [r1, #12] + ldr r11, [r2, #20] + umull r3, r12, r10, r11 + adds r6, r6, r3 + adcs r4, r4, r12 + adc r5, r5, #0 + # A[2] * B[6] = 8 + ldr r10, [r1, #8] + umull r3, r12, r10, r9 + adds r6, r6, r3 + adcs r4, r4, r12 + adc r5, r5, #0 + # A[1] * B[7] = 8 + ldr r11, [r2, #28] + umull r3, r12, r8, r11 + adds r6, r6, r3 + adcs r4, r4, r12 + adc r5, r5, #0 + str r6, [sp, #32] + ldr r8, [r1, #28] + mov lr, r11 + # A[2] * B[7] = 9 + umull r3, r12, r10, lr + adds r4, r4, r3 + mov r6, #0 + adcs r5, r5, r12 + adc r6, r6, #0 + # A[3] * B[6] = 9 + ldr r10, [r1, #12] + umull r3, r12, r10, r9 + adds r4, r4, r3 + adcs r5, r5, r12 + adc r6, r6, #0 + # A[4] * B[5] = 9 + ldr r10, [r1, #16] + ldr r11, [r2, #20] + umull r3, r12, r10, r11 + adds r4, r4, r3 + adcs r5, r5, r12 + adc r6, r6, #0 + # A[5] * B[4] = 9 + ldr r10, [r1, #20] + ldr r11, [r2, #16] + umull r3, r12, r10, r11 + adds r4, r4, r3 + adcs r5, r5, r12 + adc r6, r6, #0 + # A[6] * B[3] = 9 + ldr r11, [r2, #12] + umull r3, r12, r7, r11 + adds r4, r4, r3 + adcs r5, r5, r12 + adc r6, r6, #0 + # A[7] * B[2] = 9 + ldr r11, [r2, #8] + umull r3, r12, r8, r11 + adds r4, r4, r3 + adcs r5, r5, r12 + adc r6, r6, #0 + str r4, [sp, #36] + # A[7] * B[3] = 10 + ldr r11, [r2, #12] + umull r3, r12, r8, r11 + adds r5, r5, r3 + mov r4, #0 + adcs r6, r6, r12 + adc r4, r4, #0 + # A[6] * B[4] = 10 + ldr r11, [r2, #16] + umull r3, r12, r7, r11 + adds r5, r5, r3 + adcs r6, r6, r12 + adc r4, r4, #0 + # A[5] * B[5] = 10 + ldr r11, [r2, #20] + umull r3, r12, r10, r11 + adds r5, r5, r3 + adcs r6, r6, r12 + adc r4, r4, #0 + # A[4] * B[6] = 10 + ldr r10, [r1, #16] + umull r3, r12, r10, r9 + adds r5, r5, r3 + adcs r6, r6, r12 + adc r4, r4, #0 + # A[3] * B[7] = 10 + ldr r10, [r1, #12] + umull r3, r12, r10, lr + adds r5, r5, r3 + adcs r6, r6, r12 + adc r4, r4, #0 + str r5, [sp, #40] + # A[4] * B[7] = 11 + ldr r10, [r1, #16] + umull r3, r12, r10, lr + adds r6, r6, r3 + mov r5, #0 + adcs r4, r4, r12 + adc r5, r5, #0 + # A[5] * B[6] = 11 + ldr r10, [r1, #20] + umull r3, r12, r10, r9 + adds r6, r6, r3 + adcs r4, r4, r12 + adc r5, r5, #0 + # A[6] * B[5] = 11 + umull r3, r12, r7, r11 + adds r6, r6, r3 + adcs r4, r4, r12 + adc r5, r5, #0 + # A[7] * B[4] = 11 + ldr r11, [r2, #16] + umull r3, r12, r8, r11 + adds r6, r6, r3 + adcs r4, r4, r12 + adc r5, r5, #0 + str r6, [sp, #44] + # A[7] * B[5] = 12 + ldr r11, [r2, #20] + umull r3, r12, r8, r11 + adds r4, r4, r3 + mov r6, #0 + adcs r5, r5, r12 + adc r6, r6, #0 + # A[6] * B[6] = 12 + umull r3, r12, r7, r9 + adds r4, r4, r3 + adcs r5, r5, r12 + adc r6, r6, #0 + # A[5] * B[7] = 12 + umull r3, r12, r10, lr + adds r4, r4, r3 + adcs r5, r5, r12 + adc r6, r6, #0 + str r4, [sp, #48] + # A[6] * B[7] = 13 + umull r3, r12, r7, lr + adds r5, r5, r3 + mov r4, #0 + adcs r6, r6, r12 + adc r4, r4, #0 + # A[7] * B[6] = 13 + umull r3, r12, r8, r9 + adds r5, r5, r3 + adcs r6, r6, r12 + adc r4, r4, #0 + str r5, [sp, #52] + # A[7] * B[7] = 14 + umull r3, r12, r8, lr + adds r6, r6, r3 + adc r4, r4, r12 + str r6, [sp, #56] + str r4, [sp, #60] + # Reduce + # Load bottom half + ldrd r4, r5, [sp] + ldrd r6, r7, [sp, #8] + ldrd r8, r9, [sp, #16] + ldrd r10, r11, [sp, #24] + lsr r3, r11, #31 + and r11, r11, #0x7fffffff + mov lr, #19 + ldr r1, [sp, #32] + orr r3, r3, r1, lsl #1 + umull r3, r12, lr, r3 + adds r4, r4, r3 + mov r2, #0 + adcs r5, r5, r12 + adc r2, r2, #0 + lsr r3, r1, #31 + ldr r1, [sp, #36] + orr r3, r3, r1, lsl #1 + umull r3, r12, lr, r3 + add r12, r12, r2 + adds r5, r5, r3 + mov r2, #0 + adcs r6, r6, r12 + adc r2, r2, #0 + lsr r3, r1, #31 + ldr r1, [sp, #40] + orr r3, r3, r1, lsl #1 + umull r3, r12, lr, r3 + add r12, r12, r2 + adds r6, r6, r3 + mov r2, #0 + adcs r7, r7, r12 + adc r2, r2, #0 + lsr r3, r1, #31 + ldr r1, [sp, #44] + orr r3, r3, r1, lsl #1 + umull r3, r12, lr, r3 + add r12, r12, r2 + adds r7, r7, r3 + mov r2, #0 + adcs r8, r8, r12 + adc r2, r2, #0 + lsr r3, r1, #31 + ldr r1, [sp, #48] + orr r3, r3, r1, lsl #1 + umull r3, r12, lr, r3 + add r12, r12, r2 + adds r8, r8, r3 + mov r2, #0 + adcs r9, r9, r12 + adc r2, r2, #0 + lsr r3, r1, #31 + ldr r1, [sp, #52] + orr r3, r3, r1, lsl #1 + umull r3, r12, lr, r3 + add r12, r12, r2 + adds r9, r9, r3 + mov r2, #0 + adcs r10, r10, r12 + adc r2, r2, #0 + lsr r3, r1, #31 + ldr r1, [sp, #56] + orr r3, r3, r1, lsl #1 + umull r3, r12, lr, r3 + add r12, r12, r2 + adds r10, r10, r3 + mov r2, #0 + adcs r11, r11, r12 + adc r2, r2, #0 + lsr r3, r1, #31 + ldr r1, [sp, #60] + orr r3, r3, r1, lsl #1 + umull r3, r12, lr, r3 + adds r11, r11, r3 + adc r3, r12, r2 + # Overflow + lsl r3, r3, #1 + orr r3, r3, r11, lsr #31 + mul r3, r3, lr + and r11, r11, #0x7fffffff + adds r4, r4, r3 + adcs r5, r5, #0 + adcs r6, r6, #0 + adcs r7, r7, #0 + adcs r8, r8, #0 + adcs r9, r9, #0 + adcs r10, r10, #0 + adc r11, r11, #0 + # Reduce if top bit set + asr r3, r11, #31 + and r3, r3, lr + and r11, r11, #0x7fffffff + adds r4, r4, r3 + adcs r5, r5, #0 + adcs r6, r6, #0 + adcs r7, r7, #0 + adcs r8, r8, #0 + adcs r9, r9, #0 + adcs r10, r10, #0 + adc r11, r11, #0 + # Store + strd r4, r5, [r0] + strd r6, r7, [r0, #8] + strd r8, r9, [r0, #16] + strd r10, r11, [r0, #24] + add sp, sp, #0x40 + pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + .size fe_mul,.-fe_mul + .text + .align 2 + .globl fe_sq + .type fe_sq, %function +fe_sq: + push {r4, r5, r6, r7, r8, r9, r10, r11, lr} + sub sp, sp, #0x40 + # Square + ldr r7, [r1] + ldr r8, [r1, #4] + ldr r9, [r1, #8] + ldr r10, [r1, #12] + ldr r12, [r1, #16] + # A[0] * A[0] = 0 + umull r4, r5, r7, r7 + str r4, [sp] + # A[0] * A[1] = 1 + umull r2, r3, r7, r8 + mov r6, #0 + adds r5, r5, r2 + adc r6, r6, r3 + adds r5, r5, r2 + mov r4, #0 + adcs r6, r6, r3 + adc r4, r4, #0 + str r5, [sp, #4] + # A[1] * A[1] = 2 + umull r2, r3, r8, r8 + adds r6, r6, r2 + adc r4, r4, r3 + # A[0] * A[2] = 2 + umull r2, r3, r7, r9 + adds r6, r6, r2 + mov r5, #0 + adcs r4, r4, r3 + adc r5, r5, #0 + adds r6, r6, r2 + adcs r4, r4, r3 + adc r5, r5, #0 + str r6, [sp, #8] + # A[0] * A[3] = 3 + umull r2, r3, r7, r10 + adds r4, r4, r2 + adc r5, r5, r3 + adds r4, r4, r2 + mov r6, #0 + adcs r5, r5, r3 + adc r6, r6, #0 + # A[1] * A[2] = 3 + umull r2, r3, r8, r9 + adds r4, r4, r2 + adcs r5, r5, r3 + adc r6, r6, #0 + adds r4, r4, r2 + adcs r5, r5, r3 + adc r6, r6, #0 + str r4, [sp, #12] + # A[2] * A[2] = 4 + umull r2, r3, r9, r9 + adds r5, r5, r2 + mov r4, #0 + adcs r6, r6, r3 + adc r4, r4, #0 + # A[1] * A[3] = 4 + umull r2, r3, r8, r10 + adds r5, r5, r2 + adcs r6, r6, r3 + adc r4, r4, #0 + adds r5, r5, r2 + adcs r6, r6, r3 + adc r4, r4, #0 + # A[0] * A[4] = 4 + umull r2, r3, r7, r12 + adds r5, r5, r2 + adcs r6, r6, r3 + adc r4, r4, #0 + adds r5, r5, r2 + adcs r6, r6, r3 + adc r4, r4, #0 + str r5, [sp, #16] + # A[0] * A[5] = 5 + ldr r11, [r1, #20] + umull r2, r3, r7, r11 + adds r6, r6, r2 + mov r5, #0 + adcs r4, r4, r3 + adc r5, r5, #0 + adds r6, r6, r2 + adcs r4, r4, r3 + adc r5, r5, #0 + # A[1] * A[4] = 5 + umull r2, r3, r8, r12 + adds r6, r6, r2 + adcs r4, r4, r3 + adc r5, r5, #0 + adds r6, r6, r2 + adcs r4, r4, r3 + adc r5, r5, #0 + # A[2] * A[3] = 5 + umull r2, r3, r9, r10 + adds r6, r6, r2 + adcs r4, r4, r3 + adc r5, r5, #0 + adds r6, r6, r2 + adcs r4, r4, r3 + adc r5, r5, #0 + str r6, [sp, #20] + # A[3] * A[3] = 6 + umull r2, r3, r10, r10 + adds r4, r4, r2 + mov r6, #0 + adcs r5, r5, r3 + adc r6, r6, #0 + # A[2] * A[4] = 6 + umull r2, r3, r9, r12 + adds r4, r4, r2 + adcs r5, r5, r3 + adc r6, r6, #0 + adds r4, r4, r2 + adcs r5, r5, r3 + adc r6, r6, #0 + # A[1] * A[5] = 6 + umull r2, r3, r8, r11 + adds r4, r4, r2 + adcs r5, r5, r3 + adc r6, r6, #0 + adds r4, r4, r2 + adcs r5, r5, r3 + adc r6, r6, #0 + # A[0] * A[6] = 6 + ldr r11, [r1, #24] + umull r2, r3, r7, r11 + adds r4, r4, r2 + adcs r5, r5, r3 + adc r6, r6, #0 + adds r4, r4, r2 + adcs r5, r5, r3 + adc r6, r6, #0 + str r4, [sp, #24] + # A[0] * A[7] = 7 + ldr r11, [r1, #28] + umull r2, r3, r7, r11 + adds r5, r5, r2 + mov r4, #0 + adcs r6, r6, r3 + adc r4, r4, #0 + adds r5, r5, r2 + adcs r6, r6, r3 + adc r4, r4, #0 + # A[1] * A[6] = 7 + ldr r11, [r1, #24] + umull r2, r3, r8, r11 + adds r5, r5, r2 + adcs r6, r6, r3 + adc r4, r4, #0 + adds r5, r5, r2 + adcs r6, r6, r3 + adc r4, r4, #0 + # A[2] * A[5] = 7 + ldr r11, [r1, #20] + umull r2, r3, r9, r11 + adds r5, r5, r2 + adcs r6, r6, r3 + adc r4, r4, #0 + adds r5, r5, r2 + adcs r6, r6, r3 + adc r4, r4, #0 + # A[3] * A[4] = 7 + umull r2, r3, r10, r12 + adds r5, r5, r2 + adcs r6, r6, r3 + adc r4, r4, #0 + adds r5, r5, r2 + adcs r6, r6, r3 + adc r4, r4, #0 + str r5, [sp, #28] + # A[4] * A[4] = 8 + umull r2, r3, r12, r12 + adds r6, r6, r2 + mov r5, #0 + adcs r4, r4, r3 + adc r5, r5, #0 + # A[3] * A[5] = 8 + umull r2, r3, r10, r11 + adds r6, r6, r2 + adcs r4, r4, r3 + adc r5, r5, #0 + adds r6, r6, r2 + adcs r4, r4, r3 + adc r5, r5, #0 + # A[2] * A[6] = 8 + ldr r11, [r1, #24] + umull r2, r3, r9, r11 + adds r6, r6, r2 + adcs r4, r4, r3 + adc r5, r5, #0 + adds r6, r6, r2 + adcs r4, r4, r3 + adc r5, r5, #0 + # A[1] * A[7] = 8 + ldr r11, [r1, #28] + umull r2, r3, r8, r11 + adds r6, r6, r2 + adcs r4, r4, r3 + adc r5, r5, #0 + adds r6, r6, r2 + adcs r4, r4, r3 + adc r5, r5, #0 + str r6, [sp, #32] + ldr r7, [r1, #20] + # A[2] * A[7] = 9 + umull r2, r3, r9, r11 + adds r4, r4, r2 + mov r6, #0 + adcs r5, r5, r3 + adc r6, r6, #0 + adds r4, r4, r2 + adcs r5, r5, r3 + adc r6, r6, #0 + # A[3] * A[6] = 9 + ldr r11, [r1, #24] + umull r2, r3, r10, r11 + adds r4, r4, r2 + adcs r5, r5, r3 + adc r6, r6, #0 + adds r4, r4, r2 + adcs r5, r5, r3 + adc r6, r6, #0 + # A[4] * A[5] = 9 + umull r2, r3, r12, r7 + adds r4, r4, r2 + adcs r5, r5, r3 + adc r6, r6, #0 + adds r4, r4, r2 + adcs r5, r5, r3 + adc r6, r6, #0 + str r4, [sp, #36] + mov r8, r11 + # A[5] * A[5] = 10 + umull r2, r3, r7, r7 + adds r5, r5, r2 + mov r4, #0 + adcs r6, r6, r3 + adc r4, r4, #0 + # A[4] * A[6] = 10 + umull r2, r3, r12, r8 + adds r5, r5, r2 + adcs r6, r6, r3 + adc r4, r4, #0 + adds r5, r5, r2 + adcs r6, r6, r3 + adc r4, r4, #0 + # A[3] * A[7] = 10 + ldr r11, [r1, #28] + umull r2, r3, r10, r11 + adds r5, r5, r2 + adcs r6, r6, r3 + adc r4, r4, #0 + adds r5, r5, r2 + adcs r6, r6, r3 + adc r4, r4, #0 + str r5, [sp, #40] + mov r9, r11 + # A[4] * A[7] = 11 + umull r2, r3, r12, r9 + adds r6, r6, r2 + mov r5, #0 + adcs r4, r4, r3 + adc r5, r5, #0 + adds r6, r6, r2 + adcs r4, r4, r3 + adc r5, r5, #0 + # A[5] * A[6] = 11 + umull r2, r3, r7, r8 + adds r6, r6, r2 + adcs r4, r4, r3 + adc r5, r5, #0 + adds r6, r6, r2 + adcs r4, r4, r3 + adc r5, r5, #0 + str r6, [sp, #44] + # A[6] * A[6] = 12 + umull r2, r3, r8, r8 + adds r4, r4, r2 + mov r6, #0 + adcs r5, r5, r3 + adc r6, r6, #0 + # A[5] * A[7] = 12 + umull r2, r3, r7, r9 + adds r4, r4, r2 + adcs r5, r5, r3 + adc r6, r6, #0 + adds r4, r4, r2 + adcs r5, r5, r3 + adc r6, r6, #0 + str r4, [sp, #48] + # A[6] * A[7] = 13 + umull r2, r3, r8, r9 + adds r5, r5, r2 + mov r4, #0 + adcs r6, r6, r3 + adc r4, r4, #0 + adds r5, r5, r2 + adcs r6, r6, r3 + adc r4, r4, #0 + str r5, [sp, #52] + # A[7] * A[7] = 14 + umull r2, r3, r9, r9 + adds r6, r6, r2 + adc r4, r4, r3 + str r6, [sp, #56] + str r4, [sp, #60] + # Reduce + # Load bottom half + ldrd r4, r5, [sp] + ldrd r6, r7, [sp, #8] + ldrd r8, r9, [sp, #16] + ldrd r10, r11, [sp, #24] + lsr r2, r11, #31 + and r11, r11, #0x7fffffff + mov r12, #19 + ldr r1, [sp, #32] + orr r2, r2, r1, lsl #1 + umull r2, r3, r12, r2 + adds r4, r4, r2 + mov lr, #0 + adcs r5, r5, r3 + adc lr, lr, #0 + lsr r2, r1, #31 + ldr r1, [sp, #36] + orr r2, r2, r1, lsl #1 + umull r2, r3, r12, r2 + add r3, r3, lr + adds r5, r5, r2 + mov lr, #0 + adcs r6, r6, r3 + adc lr, lr, #0 + lsr r2, r1, #31 + ldr r1, [sp, #40] + orr r2, r2, r1, lsl #1 + umull r2, r3, r12, r2 + add r3, r3, lr + adds r6, r6, r2 + mov lr, #0 + adcs r7, r7, r3 + adc lr, lr, #0 + lsr r2, r1, #31 + ldr r1, [sp, #44] + orr r2, r2, r1, lsl #1 + umull r2, r3, r12, r2 + add r3, r3, lr + adds r7, r7, r2 + mov lr, #0 + adcs r8, r8, r3 + adc lr, lr, #0 + lsr r2, r1, #31 + ldr r1, [sp, #48] + orr r2, r2, r1, lsl #1 + umull r2, r3, r12, r2 + add r3, r3, lr + adds r8, r8, r2 + mov lr, #0 + adcs r9, r9, r3 + adc lr, lr, #0 + lsr r2, r1, #31 + ldr r1, [sp, #52] + orr r2, r2, r1, lsl #1 + umull r2, r3, r12, r2 + add r3, r3, lr + adds r9, r9, r2 + mov lr, #0 + adcs r10, r10, r3 + adc lr, lr, #0 + lsr r2, r1, #31 + ldr r1, [sp, #56] + orr r2, r2, r1, lsl #1 + umull r2, r3, r12, r2 + add r3, r3, lr + adds r10, r10, r2 + mov lr, #0 + adcs r11, r11, r3 + adc lr, lr, #0 + lsr r2, r1, #31 + ldr r1, [sp, #60] + orr r2, r2, r1, lsl #1 + umull r2, r3, r12, r2 + adds r11, r11, r2 + adc r2, r3, lr + # Overflow + lsl r2, r2, #1 + orr r2, r2, r11, lsr #31 + mul r2, r2, r12 + and r11, r11, #0x7fffffff + adds r4, r4, r2 + adcs r5, r5, #0 + adcs r6, r6, #0 + adcs r7, r7, #0 + adcs r8, r8, #0 + adcs r9, r9, #0 + adcs r10, r10, #0 + adc r11, r11, #0 + # Reduce if top bit set + asr r2, r11, #31 + and r2, r2, r12 + and r11, r11, #0x7fffffff + adds r4, r4, r2 + adcs r5, r5, #0 + adcs r6, r6, #0 + adcs r7, r7, #0 + adcs r8, r8, #0 + adcs r9, r9, #0 + adcs r10, r10, #0 + adc r11, r11, #0 + # Store + strd r4, r5, [r0] + strd r6, r7, [r0, #8] + strd r8, r9, [r0, #16] + strd r10, r11, [r0, #24] + add sp, sp, #0x40 + pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + .size fe_sq,.-fe_sq + .text + .align 2 + .globl fe_mul121666 + .type fe_mul121666, %function +fe_mul121666: + push {r4, r5, r6, r7, r8, r9, r10, lr} + # Multiply by 121666 + ldrd r2, r3, [r1] + ldrd r4, r5, [r1, #8] + ldrd r6, r7, [r1, #16] + ldrd r8, r9, [r1, #24] + movw lr, #0xdb42 + movt lr, #1 + umull r2, r10, r2, lr + umull r3, r12, r3, lr + adds r3, r3, r10 + adc r10, r12, #0 + umull r4, r12, r4, lr + adds r4, r4, r10 + adc r10, r12, #0 + umull r5, r12, r5, lr + adds r5, r5, r10 + adc r10, r12, #0 + umull r6, r12, r6, lr + adds r6, r6, r10 + adc r10, r12, #0 + umull r7, r12, r7, lr + adds r7, r7, r10 + adc r10, r12, #0 + umull r8, r12, r8, lr + adds r8, r8, r10 + adc r10, r12, #0 + umull r9, r12, r9, lr + adds r9, r9, r10 + adc r10, r12, #0 + mov lr, #19 + lsl r10, r10, #1 + orr r10, r10, r9, lsr #31 + mul r10, r10, lr + and r9, r9, #0x7fffffff + adds r2, r2, r10 + adcs r3, r3, #0 + adcs r4, r4, #0 + adcs r5, r5, #0 + adcs r6, r6, #0 + adcs r7, r7, #0 + adcs r8, r8, #0 + adc r9, r9, #0 + strd r2, r3, [r0] + strd r4, r5, [r0, #8] + strd r6, r7, [r0, #16] + strd r8, r9, [r0, #24] + pop {r4, r5, r6, r7, r8, r9, r10, pc} + .size fe_mul121666,.-fe_mul121666 + .text + .align 2 + .globl fe_sq2 + .type fe_sq2, %function +fe_sq2: + push {r4, r5, r6, r7, r8, r9, r10, r11, lr} + sub sp, sp, #0x40 + # Square * 2 + ldr r7, [r1] + ldr r8, [r1, #4] + ldr r9, [r1, #8] + ldr r10, [r1, #12] + ldr r12, [r1, #16] + # A[0] * A[0] = 0 + umull r4, r5, r7, r7 + str r4, [sp] + # A[0] * A[1] = 1 + umull r2, r3, r7, r8 + mov r6, #0 + adds r5, r5, r2 + adc r6, r6, r3 + adds r5, r5, r2 + mov r4, #0 + adcs r6, r6, r3 + adc r4, r4, #0 + str r5, [sp, #4] + # A[1] * A[1] = 2 + umull r2, r3, r8, r8 + adds r6, r6, r2 + adc r4, r4, r3 + # A[0] * A[2] = 2 + umull r2, r3, r7, r9 + adds r6, r6, r2 + mov r5, #0 + adcs r4, r4, r3 + adc r5, r5, #0 + adds r6, r6, r2 + adcs r4, r4, r3 + adc r5, r5, #0 + str r6, [sp, #8] + # A[0] * A[3] = 3 + umull r2, r3, r7, r10 + adds r4, r4, r2 + adc r5, r5, r3 + adds r4, r4, r2 + mov r6, #0 + adcs r5, r5, r3 + adc r6, r6, #0 + # A[1] * A[2] = 3 + umull r2, r3, r8, r9 + adds r4, r4, r2 + adcs r5, r5, r3 + adc r6, r6, #0 + adds r4, r4, r2 + adcs r5, r5, r3 + adc r6, r6, #0 + str r4, [sp, #12] + # A[2] * A[2] = 4 + umull r2, r3, r9, r9 + adds r5, r5, r2 + mov r4, #0 + adcs r6, r6, r3 + adc r4, r4, #0 + # A[1] * A[3] = 4 + umull r2, r3, r8, r10 + adds r5, r5, r2 + adcs r6, r6, r3 + adc r4, r4, #0 + adds r5, r5, r2 + adcs r6, r6, r3 + adc r4, r4, #0 + # A[0] * A[4] = 4 + umull r2, r3, r7, r12 + adds r5, r5, r2 + adcs r6, r6, r3 + adc r4, r4, #0 + adds r5, r5, r2 + adcs r6, r6, r3 + adc r4, r4, #0 + str r5, [sp, #16] + # A[0] * A[5] = 5 + ldr r11, [r1, #20] + umull r2, r3, r7, r11 + adds r6, r6, r2 + mov r5, #0 + adcs r4, r4, r3 + adc r5, r5, #0 + adds r6, r6, r2 + adcs r4, r4, r3 + adc r5, r5, #0 + # A[1] * A[4] = 5 + umull r2, r3, r8, r12 + adds r6, r6, r2 + adcs r4, r4, r3 + adc r5, r5, #0 + adds r6, r6, r2 + adcs r4, r4, r3 + adc r5, r5, #0 + # A[2] * A[3] = 5 + umull r2, r3, r9, r10 + adds r6, r6, r2 + adcs r4, r4, r3 + adc r5, r5, #0 + adds r6, r6, r2 + adcs r4, r4, r3 + adc r5, r5, #0 + str r6, [sp, #20] + # A[3] * A[3] = 6 + umull r2, r3, r10, r10 + adds r4, r4, r2 + mov r6, #0 + adcs r5, r5, r3 + adc r6, r6, #0 + # A[2] * A[4] = 6 + umull r2, r3, r9, r12 + adds r4, r4, r2 + adcs r5, r5, r3 + adc r6, r6, #0 + adds r4, r4, r2 + adcs r5, r5, r3 + adc r6, r6, #0 + # A[1] * A[5] = 6 + umull r2, r3, r8, r11 + adds r4, r4, r2 + adcs r5, r5, r3 + adc r6, r6, #0 + adds r4, r4, r2 + adcs r5, r5, r3 + adc r6, r6, #0 + # A[0] * A[6] = 6 + ldr r11, [r1, #24] + umull r2, r3, r7, r11 + adds r4, r4, r2 + adcs r5, r5, r3 + adc r6, r6, #0 + adds r4, r4, r2 + adcs r5, r5, r3 + adc r6, r6, #0 + str r4, [sp, #24] + # A[0] * A[7] = 7 + ldr r11, [r1, #28] + umull r2, r3, r7, r11 + adds r5, r5, r2 + mov r4, #0 + adcs r6, r6, r3 + adc r4, r4, #0 + adds r5, r5, r2 + adcs r6, r6, r3 + adc r4, r4, #0 + # A[1] * A[6] = 7 + ldr r11, [r1, #24] + umull r2, r3, r8, r11 + adds r5, r5, r2 + adcs r6, r6, r3 + adc r4, r4, #0 + adds r5, r5, r2 + adcs r6, r6, r3 + adc r4, r4, #0 + # A[2] * A[5] = 7 + ldr r11, [r1, #20] + umull r2, r3, r9, r11 + adds r5, r5, r2 + adcs r6, r6, r3 + adc r4, r4, #0 + adds r5, r5, r2 + adcs r6, r6, r3 + adc r4, r4, #0 + # A[3] * A[4] = 7 + umull r2, r3, r10, r12 + adds r5, r5, r2 + adcs r6, r6, r3 + adc r4, r4, #0 + adds r5, r5, r2 + adcs r6, r6, r3 + adc r4, r4, #0 + str r5, [sp, #28] + # A[4] * A[4] = 8 + umull r2, r3, r12, r12 + adds r6, r6, r2 + mov r5, #0 + adcs r4, r4, r3 + adc r5, r5, #0 + # A[3] * A[5] = 8 + umull r2, r3, r10, r11 + adds r6, r6, r2 + adcs r4, r4, r3 + adc r5, r5, #0 + adds r6, r6, r2 + adcs r4, r4, r3 + adc r5, r5, #0 + # A[2] * A[6] = 8 + ldr r11, [r1, #24] + umull r2, r3, r9, r11 + adds r6, r6, r2 + adcs r4, r4, r3 + adc r5, r5, #0 + adds r6, r6, r2 + adcs r4, r4, r3 + adc r5, r5, #0 + # A[1] * A[7] = 8 + ldr r11, [r1, #28] + umull r2, r3, r8, r11 + adds r6, r6, r2 + adcs r4, r4, r3 + adc r5, r5, #0 + adds r6, r6, r2 + adcs r4, r4, r3 + adc r5, r5, #0 + str r6, [sp, #32] + ldr r7, [r1, #20] + # A[2] * A[7] = 9 + umull r2, r3, r9, r11 + adds r4, r4, r2 + mov r6, #0 + adcs r5, r5, r3 + adc r6, r6, #0 + adds r4, r4, r2 + adcs r5, r5, r3 + adc r6, r6, #0 + # A[3] * A[6] = 9 + ldr r11, [r1, #24] + umull r2, r3, r10, r11 + adds r4, r4, r2 + adcs r5, r5, r3 + adc r6, r6, #0 + adds r4, r4, r2 + adcs r5, r5, r3 + adc r6, r6, #0 + # A[4] * A[5] = 9 + umull r2, r3, r12, r7 + adds r4, r4, r2 + adcs r5, r5, r3 + adc r6, r6, #0 + adds r4, r4, r2 + adcs r5, r5, r3 + adc r6, r6, #0 + str r4, [sp, #36] + mov r8, r11 + # A[5] * A[5] = 10 + umull r2, r3, r7, r7 + adds r5, r5, r2 + mov r4, #0 + adcs r6, r6, r3 + adc r4, r4, #0 + # A[4] * A[6] = 10 + umull r2, r3, r12, r8 + adds r5, r5, r2 + adcs r6, r6, r3 + adc r4, r4, #0 + adds r5, r5, r2 + adcs r6, r6, r3 + adc r4, r4, #0 + # A[3] * A[7] = 10 + ldr r11, [r1, #28] + umull r2, r3, r10, r11 + adds r5, r5, r2 + adcs r6, r6, r3 + adc r4, r4, #0 + adds r5, r5, r2 + adcs r6, r6, r3 + adc r4, r4, #0 + str r5, [sp, #40] + mov r9, r11 + # A[4] * A[7] = 11 + umull r2, r3, r12, r9 + adds r6, r6, r2 + mov r5, #0 + adcs r4, r4, r3 + adc r5, r5, #0 + adds r6, r6, r2 + adcs r4, r4, r3 + adc r5, r5, #0 + # A[5] * A[6] = 11 + umull r2, r3, r7, r8 + adds r6, r6, r2 + adcs r4, r4, r3 + adc r5, r5, #0 + adds r6, r6, r2 + adcs r4, r4, r3 + adc r5, r5, #0 + str r6, [sp, #44] + # A[6] * A[6] = 12 + umull r2, r3, r8, r8 + adds r4, r4, r2 + mov r6, #0 + adcs r5, r5, r3 + adc r6, r6, #0 + # A[5] * A[7] = 12 + umull r2, r3, r7, r9 + adds r4, r4, r2 + adcs r5, r5, r3 + adc r6, r6, #0 + adds r4, r4, r2 + adcs r5, r5, r3 + adc r6, r6, #0 + str r4, [sp, #48] + # A[6] * A[7] = 13 + umull r2, r3, r8, r9 + adds r5, r5, r2 + mov r4, #0 + adcs r6, r6, r3 + adc r4, r4, #0 + adds r5, r5, r2 + adcs r6, r6, r3 + adc r4, r4, #0 + str r5, [sp, #52] + # A[7] * A[7] = 14 + umull r2, r3, r9, r9 + adds r6, r6, r2 + adc r4, r4, r3 + str r6, [sp, #56] + str r4, [sp, #60] + # Double and Reduce + # Load bottom half + ldrd r4, r5, [sp] + ldrd r6, r7, [sp, #8] + ldrd r8, r9, [sp, #16] + ldrd r10, r11, [sp, #24] + lsr r2, r11, #30 + lsl r11, r11, #1 + orr r11, r11, r10, lsr #31 + lsl r10, r10, #1 + orr r10, r10, r9, lsr #31 + lsl r9, r9, #1 + orr r9, r9, r8, lsr #31 + lsl r8, r8, #1 + orr r8, r8, r7, lsr #31 + lsl r7, r7, #1 + orr r7, r7, r6, lsr #31 + lsl r6, r6, #1 + orr r6, r6, r5, lsr #31 + lsl r5, r5, #1 + orr r5, r5, r4, lsr #31 + lsl r4, r4, #1 + and r11, r11, #0x7fffffff + mov r12, #19 + ldr r1, [sp, #32] + orr r2, r2, r1, lsl #2 + umull r2, r3, r12, r2 + adds r4, r4, r2 + mov lr, #0 + adcs r5, r5, r3 + adc lr, lr, #0 + lsr r2, r1, #30 + ldr r1, [sp, #36] + orr r2, r2, r1, lsl #2 + umull r2, r3, r12, r2 + add r3, r3, lr + adds r5, r5, r2 + mov lr, #0 + adcs r6, r6, r3 + adc lr, lr, #0 + lsr r2, r1, #30 + ldr r1, [sp, #40] + orr r2, r2, r1, lsl #2 + umull r2, r3, r12, r2 + add r3, r3, lr + adds r6, r6, r2 + mov lr, #0 + adcs r7, r7, r3 + adc lr, lr, #0 + lsr r2, r1, #30 + ldr r1, [sp, #44] + orr r2, r2, r1, lsl #2 + umull r2, r3, r12, r2 + add r3, r3, lr + adds r7, r7, r2 + mov lr, #0 + adcs r8, r8, r3 + adc lr, lr, #0 + lsr r2, r1, #30 + ldr r1, [sp, #48] + orr r2, r2, r1, lsl #2 + umull r2, r3, r12, r2 + add r3, r3, lr + adds r8, r8, r2 + mov lr, #0 + adcs r9, r9, r3 + adc lr, lr, #0 + lsr r2, r1, #30 + ldr r1, [sp, #52] + orr r2, r2, r1, lsl #2 + umull r2, r3, r12, r2 + add r3, r3, lr + adds r9, r9, r2 + mov lr, #0 + adcs r10, r10, r3 + adc lr, lr, #0 + lsr r2, r1, #30 + ldr r1, [sp, #56] + orr r2, r2, r1, lsl #2 + umull r2, r3, r12, r2 + add r3, r3, lr + adds r10, r10, r2 + mov lr, #0 + adcs r11, r11, r3 + adc lr, lr, #0 + lsr r2, r1, #30 + ldr r1, [sp, #60] + orr r2, r2, r1, lsl #2 + umull r2, r3, r12, r2 + adds r11, r11, r2 + adc r2, r3, lr + # Overflow + lsl r2, r2, #1 + orr r2, r2, r11, lsr #31 + mul r2, r2, r12 + and r11, r11, #0x7fffffff + adds r4, r4, r2 + adcs r5, r5, #0 + adcs r6, r6, #0 + adcs r7, r7, #0 + adcs r8, r8, #0 + adcs r9, r9, #0 + adcs r10, r10, #0 + adc r11, r11, #0 + # Reduce if top bit set + asr r2, r11, #31 + and r2, r2, r12 + and r11, r11, #0x7fffffff + adds r4, r4, r2 + adcs r5, r5, #0 + adcs r6, r6, #0 + adcs r7, r7, #0 + adcs r8, r8, #0 + adcs r9, r9, #0 + adcs r10, r10, #0 + adc r11, r11, #0 + # Store + strd r4, r5, [r0] + strd r6, r7, [r0, #8] + strd r8, r9, [r0, #16] + strd r10, r11, [r0, #24] + add sp, sp, #0x40 + pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + .size fe_sq2,.-fe_sq2 + .text + .align 2 + .globl fe_invert + .type fe_invert, %function +fe_invert: + push {r4, lr} + sub sp, sp, #0x88 + # Invert + str r0, [sp, #128] + str r1, [sp, #132] + mov r0, sp + ldr r1, [sp, #132] + bl fe_sq + add r0, sp, #32 + mov r1, sp + bl fe_sq + add r0, sp, #32 + add r1, sp, #32 + bl fe_sq + add r0, sp, #32 + ldr r1, [sp, #132] + add r2, sp, #32 + bl fe_mul + mov r0, sp + mov r1, sp + add r2, sp, #32 + bl fe_mul + add r0, sp, #0x40 + mov r1, sp + bl fe_sq + add r0, sp, #32 + add r1, sp, #32 + add r2, sp, #0x40 + bl fe_mul + add r0, sp, #0x40 + add r1, sp, #32 + bl fe_sq + mov r4, #4 +L_fe_invert1: + add r0, sp, #0x40 + add r1, sp, #0x40 + bl fe_sq + sub r4, r4, #1 + cmp r4, #0 + bne L_fe_invert1 + add r0, sp, #32 + add r1, sp, #0x40 + add r2, sp, #32 + bl fe_mul + add r0, sp, #0x40 + add r1, sp, #32 + bl fe_sq + mov r4, #9 +L_fe_invert2: + add r0, sp, #0x40 + add r1, sp, #0x40 + bl fe_sq + sub r4, r4, #1 + cmp r4, #0 + bne L_fe_invert2 + add r0, sp, #0x40 + add r1, sp, #0x40 + add r2, sp, #32 + bl fe_mul + add r0, sp, #0x60 + add r1, sp, #0x40 + bl fe_sq + mov r4, #19 +L_fe_invert3: + add r0, sp, #0x60 + add r1, sp, #0x60 + bl fe_sq + sub r4, r4, #1 + cmp r4, #0 + bne L_fe_invert3 + add r0, sp, #0x40 + add r1, sp, #0x60 + add r2, sp, #0x40 + bl fe_mul + mov r4, #10 +L_fe_invert4: + add r0, sp, #0x40 + add r1, sp, #0x40 + bl fe_sq + sub r4, r4, #1 + cmp r4, #0 + bne L_fe_invert4 + add r0, sp, #32 + add r1, sp, #0x40 + add r2, sp, #32 + bl fe_mul + add r0, sp, #0x40 + add r1, sp, #32 + bl fe_sq + mov r4, #49 +L_fe_invert5: + add r0, sp, #0x40 + add r1, sp, #0x40 + bl fe_sq + sub r4, r4, #1 + cmp r4, #0 + bne L_fe_invert5 + add r0, sp, #0x40 + add r1, sp, #0x40 + add r2, sp, #32 + bl fe_mul + add r0, sp, #0x60 + add r1, sp, #0x40 + bl fe_sq + mov r4, #0x63 +L_fe_invert6: + add r0, sp, #0x60 + add r1, sp, #0x60 + bl fe_sq + sub r4, r4, #1 + cmp r4, #0 + bne L_fe_invert6 + add r0, sp, #0x40 + add r1, sp, #0x60 + add r2, sp, #0x40 + bl fe_mul + mov r4, #50 +L_fe_invert7: + add r0, sp, #0x40 + add r1, sp, #0x40 + bl fe_sq + sub r4, r4, #1 + cmp r4, #0 + bne L_fe_invert7 + add r0, sp, #32 + add r1, sp, #0x40 + add r2, sp, #32 + bl fe_mul + mov r4, #5 +L_fe_invert8: + add r0, sp, #32 + add r1, sp, #32 + bl fe_sq + sub r4, r4, #1 + cmp r4, #0 + bne L_fe_invert8 + ldr r0, [sp, #128] + add r1, sp, #32 + mov r2, sp + bl fe_mul + ldr r1, [sp, #132] + ldr r0, [sp, #128] + add sp, sp, #0x88 + pop {r4, pc} + .size fe_invert,.-fe_invert + .text + .align 2 + .globl curve25519 + .type curve25519, %function +curve25519: + push {r4, r5, r6, r7, r8, r9, r10, r11, lr} + sub sp, sp, #0xbc + str r0, [sp, #160] + str r1, [sp, #164] + str r2, [sp, #168] + mov r1, #0 + str r1, [sp, #172] + # Set one + mov r11, #1 + mov r10, #0 + str r11, [r0] + str r10, [r0, #4] + str r10, [r0, #8] + str r10, [r0, #12] + str r10, [r0, #16] + str r10, [r0, #20] + str r10, [r0, #24] + str r10, [r0, #28] + # Set zero + mov r10, #0 + str r10, [sp] + str r10, [sp, #4] + str r10, [sp, #8] + str r10, [sp, #12] + str r10, [sp, #16] + str r10, [sp, #20] + str r10, [sp, #24] + str r10, [sp, #28] + # Set one + mov r11, #1 + mov r10, #0 + str r11, [sp, #32] + str r10, [sp, #36] + str r10, [sp, #40] + str r10, [sp, #44] + str r10, [sp, #48] + str r10, [sp, #52] + str r10, [sp, #56] + str r10, [sp, #60] + # Copy + ldrd r4, r5, [r2] + ldrd r6, r7, [r2, #8] + strd r4, r5, [sp, #64] + strd r6, r7, [sp, #72] + ldrd r4, r5, [r2, #16] + ldrd r6, r7, [r2, #24] + strd r4, r5, [sp, #80] + strd r6, r7, [sp, #88] + mov r1, #30 + str r1, [sp, #180] + mov r2, #28 + str r2, [sp, #176] +L_curve25519_words: +L_curve25519_bits: + ldr r1, [sp, #164] + ldr r2, [r1, r2] + ldr r1, [sp, #180] + lsr r2, r2, r1 + and r2, r2, #1 + str r2, [sp, #184] + ldr r1, [sp, #172] + eor r1, r1, r2 + str r1, [sp, #172] + ldr r0, [sp, #160] + # Conditional Swap + neg r1, r1 + ldrd r4, r5, [r0] + ldrd r6, r7, [sp, #64] + eor r8, r4, r6 + eor r9, r5, r7 + and r8, r8, r1 + and r9, r9, r1 + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r8 + eor r7, r7, r9 + strd r4, r5, [r0] + strd r6, r7, [sp, #64] + ldrd r4, r5, [r0, #8] + ldrd r6, r7, [sp, #72] + eor r8, r4, r6 + eor r9, r5, r7 + and r8, r8, r1 + and r9, r9, r1 + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r8 + eor r7, r7, r9 + strd r4, r5, [r0, #8] + strd r6, r7, [sp, #72] + ldrd r4, r5, [r0, #16] + ldrd r6, r7, [sp, #80] + eor r8, r4, r6 + eor r9, r5, r7 + and r8, r8, r1 + and r9, r9, r1 + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r8 + eor r7, r7, r9 + strd r4, r5, [r0, #16] + strd r6, r7, [sp, #80] + ldrd r4, r5, [r0, #24] + ldrd r6, r7, [sp, #88] + eor r8, r4, r6 + eor r9, r5, r7 + and r8, r8, r1 + and r9, r9, r1 + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r8 + eor r7, r7, r9 + strd r4, r5, [r0, #24] + strd r6, r7, [sp, #88] + ldr r1, [sp, #172] + # Conditional Swap + neg r1, r1 + ldrd r4, r5, [sp] + ldrd r6, r7, [sp, #32] + eor r8, r4, r6 + eor r9, r5, r7 + and r8, r8, r1 + and r9, r9, r1 + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r8 + eor r7, r7, r9 + strd r4, r5, [sp] + strd r6, r7, [sp, #32] + ldrd r4, r5, [sp, #8] + ldrd r6, r7, [sp, #40] + eor r8, r4, r6 + eor r9, r5, r7 + and r8, r8, r1 + and r9, r9, r1 + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r8 + eor r7, r7, r9 + strd r4, r5, [sp, #8] + strd r6, r7, [sp, #40] + ldrd r4, r5, [sp, #16] + ldrd r6, r7, [sp, #48] + eor r8, r4, r6 + eor r9, r5, r7 + and r8, r8, r1 + and r9, r9, r1 + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r8 + eor r7, r7, r9 + strd r4, r5, [sp, #16] + strd r6, r7, [sp, #48] + ldrd r4, r5, [sp, #24] + ldrd r6, r7, [sp, #56] + eor r8, r4, r6 + eor r9, r5, r7 + and r8, r8, r1 + and r9, r9, r1 + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r8 + eor r7, r7, r9 + strd r4, r5, [sp, #24] + strd r6, r7, [sp, #56] + ldr r1, [sp, #184] + str r1, [sp, #172] + # Add-Sub + # Add + ldrd r4, r5, [r0] + ldrd r6, r7, [sp] + adds r8, r4, r6 + mov r3, #0 + adcs r9, r5, r7 + adc r3, r3, #0 + strd r8, r9, [r0] + # Sub + subs r10, r4, r6 + mov r12, #0 + sbcs r11, r5, r7 + adc r12, r12, #0 + strd r10, r11, [sp, #128] + # Add + ldrd r4, r5, [r0, #8] + ldrd r6, r7, [sp, #8] + adds r3, r3, #-1 + adcs r8, r4, r6 + mov r3, #0 + adcs r9, r5, r7 + adc r3, r3, #0 + strd r8, r9, [r0, #8] + # Sub + adds r12, r12, #-1 + sbcs r10, r4, r6 + mov r12, #0 + sbcs r11, r5, r7 + adc r12, r12, #0 + strd r10, r11, [sp, #136] + # Add + ldrd r4, r5, [r0, #16] + ldrd r6, r7, [sp, #16] + adds r3, r3, #-1 + adcs r8, r4, r6 + mov r3, #0 + adcs r9, r5, r7 + adc r3, r3, #0 + strd r8, r9, [r0, #16] + # Sub + adds r12, r12, #-1 + sbcs r10, r4, r6 + mov r12, #0 + sbcs r11, r5, r7 + adc r12, r12, #0 + strd r10, r11, [sp, #144] + # Add + ldrd r4, r5, [r0, #24] + ldrd r6, r7, [sp, #24] + adds r3, r3, #-1 + adcs r8, r4, r6 + adc r9, r5, r7 + # Sub + adds r12, r12, #-1 + sbcs r10, r4, r6 + sbc r11, r5, r7 + mov r3, #-19 + asr r2, r9, #31 + # Mask the modulus + and r3, r2, r3 + and r12, r2, #0x7fffffff + # Sub modulus (if overflow) + ldrd r4, r5, [r0] + subs r4, r4, r3 + sbcs r5, r5, r2 + strd r4, r5, [r0] + ldrd r4, r5, [r0, #8] + sbcs r4, r4, r2 + sbcs r5, r5, r2 + strd r4, r5, [r0, #8] + ldrd r4, r5, [r0, #16] + sbcs r4, r4, r2 + sbcs r5, r5, r2 + strd r4, r5, [r0, #16] + sbcs r8, r8, r2 + sbc r9, r9, r12 + strd r8, r9, [r0, #24] + mov r3, #-19 + asr r2, r11, #31 + # Mask the modulus + and r3, r2, r3 + and r12, r2, #0x7fffffff + # Add modulus (if underflow) + ldrd r4, r5, [sp, #128] + adds r4, r4, r3 + adcs r5, r5, r2 + strd r4, r5, [sp, #128] + ldrd r4, r5, [sp, #136] + adcs r4, r4, r2 + adcs r5, r5, r2 + strd r4, r5, [sp, #136] + ldrd r4, r5, [sp, #144] + adcs r4, r4, r2 + adcs r5, r5, r2 + strd r4, r5, [sp, #144] + adcs r10, r10, r2 + adc r11, r11, r12 + strd r10, r11, [sp, #152] + # Add-Sub + # Add + ldrd r4, r5, [sp, #64] + ldrd r6, r7, [sp, #32] + adds r8, r4, r6 + mov r3, #0 + adcs r9, r5, r7 + adc r3, r3, #0 + strd r8, r9, [sp] + # Sub + subs r10, r4, r6 + mov r12, #0 + sbcs r11, r5, r7 + adc r12, r12, #0 + strd r10, r11, [sp, #96] + # Add + ldrd r4, r5, [sp, #72] + ldrd r6, r7, [sp, #40] + adds r3, r3, #-1 + adcs r8, r4, r6 + mov r3, #0 + adcs r9, r5, r7 + adc r3, r3, #0 + strd r8, r9, [sp, #8] + # Sub + adds r12, r12, #-1 + sbcs r10, r4, r6 + mov r12, #0 + sbcs r11, r5, r7 + adc r12, r12, #0 + strd r10, r11, [sp, #104] + # Add + ldrd r4, r5, [sp, #80] + ldrd r6, r7, [sp, #48] + adds r3, r3, #-1 + adcs r8, r4, r6 + mov r3, #0 + adcs r9, r5, r7 + adc r3, r3, #0 + strd r8, r9, [sp, #16] + # Sub + adds r12, r12, #-1 + sbcs r10, r4, r6 + mov r12, #0 + sbcs r11, r5, r7 + adc r12, r12, #0 + strd r10, r11, [sp, #112] + # Add + ldrd r4, r5, [sp, #88] + ldrd r6, r7, [sp, #56] + adds r3, r3, #-1 + adcs r8, r4, r6 + adc r9, r5, r7 + # Sub + adds r12, r12, #-1 + sbcs r10, r4, r6 + sbc r11, r5, r7 + mov r3, #-19 + asr r2, r9, #31 + # Mask the modulus + and r3, r2, r3 + and r12, r2, #0x7fffffff + # Sub modulus (if overflow) + ldrd r4, r5, [sp] + subs r4, r4, r3 + sbcs r5, r5, r2 + strd r4, r5, [sp] + ldrd r4, r5, [sp, #8] + sbcs r4, r4, r2 + sbcs r5, r5, r2 + strd r4, r5, [sp, #8] + ldrd r4, r5, [sp, #16] + sbcs r4, r4, r2 + sbcs r5, r5, r2 + strd r4, r5, [sp, #16] + sbcs r8, r8, r2 + sbc r9, r9, r12 + strd r8, r9, [sp, #24] + mov r3, #-19 + asr r2, r11, #31 + # Mask the modulus + and r3, r2, r3 + and r12, r2, #0x7fffffff + # Add modulus (if underflow) + ldrd r4, r5, [sp, #96] + adds r4, r4, r3 + adcs r5, r5, r2 + strd r4, r5, [sp, #96] + ldrd r4, r5, [sp, #104] + adcs r4, r4, r2 + adcs r5, r5, r2 + strd r4, r5, [sp, #104] + ldrd r4, r5, [sp, #112] + adcs r4, r4, r2 + adcs r5, r5, r2 + strd r4, r5, [sp, #112] + adcs r10, r10, r2 + adc r11, r11, r12 + strd r10, r11, [sp, #120] + ldr r2, [sp, #160] + add r1, sp, #0x60 + add r0, sp, #32 + bl fe_mul + add r2, sp, #0x80 + add r1, sp, #0 + add r0, sp, #0 + bl fe_mul + add r1, sp, #0x80 + add r0, sp, #0x60 + bl fe_sq + ldr r1, [sp, #160] + add r0, sp, #0x80 + bl fe_sq + # Add-Sub + # Add + ldrd r4, r5, [sp, #32] + ldrd r6, r7, [sp] + adds r8, r4, r6 + mov r3, #0 + adcs r9, r5, r7 + adc r3, r3, #0 + strd r8, r9, [sp, #64] + # Sub + subs r10, r4, r6 + mov r12, #0 + sbcs r11, r5, r7 + adc r12, r12, #0 + strd r10, r11, [sp] + # Add + ldrd r4, r5, [sp, #40] + ldrd r6, r7, [sp, #8] + adds r3, r3, #-1 + adcs r8, r4, r6 + mov r3, #0 + adcs r9, r5, r7 + adc r3, r3, #0 + strd r8, r9, [sp, #72] + # Sub + adds r12, r12, #-1 + sbcs r10, r4, r6 + mov r12, #0 + sbcs r11, r5, r7 + adc r12, r12, #0 + strd r10, r11, [sp, #8] + # Add + ldrd r4, r5, [sp, #48] + ldrd r6, r7, [sp, #16] + adds r3, r3, #-1 + adcs r8, r4, r6 + mov r3, #0 + adcs r9, r5, r7 + adc r3, r3, #0 + strd r8, r9, [sp, #80] + # Sub + adds r12, r12, #-1 + sbcs r10, r4, r6 + mov r12, #0 + sbcs r11, r5, r7 + adc r12, r12, #0 + strd r10, r11, [sp, #16] + # Add + ldrd r4, r5, [sp, #56] + ldrd r6, r7, [sp, #24] + adds r3, r3, #-1 + adcs r8, r4, r6 + adc r9, r5, r7 + # Sub + adds r12, r12, #-1 + sbcs r10, r4, r6 + sbc r11, r5, r7 + mov r3, #-19 + asr r2, r9, #31 + # Mask the modulus + and r3, r2, r3 + and r12, r2, #0x7fffffff + # Sub modulus (if overflow) + ldrd r4, r5, [sp, #64] + subs r4, r4, r3 + sbcs r5, r5, r2 + strd r4, r5, [sp, #64] + ldrd r4, r5, [sp, #72] + sbcs r4, r4, r2 + sbcs r5, r5, r2 + strd r4, r5, [sp, #72] + ldrd r4, r5, [sp, #80] + sbcs r4, r4, r2 + sbcs r5, r5, r2 + strd r4, r5, [sp, #80] + sbcs r8, r8, r2 + sbc r9, r9, r12 + strd r8, r9, [sp, #88] + mov r3, #-19 + asr r2, r11, #31 + # Mask the modulus + and r3, r2, r3 + and r12, r2, #0x7fffffff + # Add modulus (if underflow) + ldrd r4, r5, [sp] + adds r4, r4, r3 + adcs r5, r5, r2 + strd r4, r5, [sp] + ldrd r4, r5, [sp, #8] + adcs r4, r4, r2 + adcs r5, r5, r2 + strd r4, r5, [sp, #8] + ldrd r4, r5, [sp, #16] + adcs r4, r4, r2 + adcs r5, r5, r2 + strd r4, r5, [sp, #16] + adcs r10, r10, r2 + adc r11, r11, r12 + strd r10, r11, [sp, #24] + add r2, sp, #0x60 + add r1, sp, #0x80 + ldr r0, [sp, #160] + bl fe_mul + # Sub + ldrd r4, r5, [sp, #128] + ldrd r6, r7, [sp, #136] + ldrd r8, r9, [sp, #96] + ldrd r10, r11, [sp, #104] + subs r8, r4, r8 + sbcs r9, r5, r9 + sbcs r10, r6, r10 + sbcs r11, r7, r11 + strd r8, r9, [sp, #128] + strd r10, r11, [sp, #136] + ldrd r4, r5, [sp, #144] + ldrd r6, r7, [sp, #152] + ldrd r8, r9, [sp, #112] + ldrd r10, r11, [sp, #120] + sbcs r8, r4, r8 + sbcs r9, r5, r9 + sbcs r10, r6, r10 + sbc r11, r7, r11 + mov r3, #-19 + asr r2, r11, #31 + # Mask the modulus + and r3, r2, r3 + and r12, r2, #0x7fffffff + # Add modulus (if underflow) + ldrd r4, r5, [sp, #128] + ldrd r6, r7, [sp, #136] + adds r4, r4, r3 + adcs r5, r5, r2 + adcs r6, r6, r2 + adcs r7, r7, r2 + adcs r8, r8, r2 + adcs r9, r9, r2 + adcs r10, r10, r2 + adc r11, r11, r12 + strd r4, r5, [sp, #128] + strd r6, r7, [sp, #136] + strd r8, r9, [sp, #144] + strd r10, r11, [sp, #152] + add r1, sp, #0 + add r0, sp, #0 + bl fe_sq + # Multiply by 121666 + ldrd r4, r5, [sp, #128] + ldrd r6, r7, [sp, #136] + ldrd r8, r9, [sp, #144] + ldrd r10, r11, [sp, #152] + movw r12, #0xdb42 + movt r12, #1 + umull r4, r2, r4, r12 + umull r5, r3, r5, r12 + adds r5, r5, r2 + adc r2, r3, #0 + umull r6, r3, r6, r12 + adds r6, r6, r2 + adc r2, r3, #0 + umull r7, r3, r7, r12 + adds r7, r7, r2 + adc r2, r3, #0 + umull r8, r3, r8, r12 + adds r8, r8, r2 + adc r2, r3, #0 + umull r9, r3, r9, r12 + adds r9, r9, r2 + adc r2, r3, #0 + umull r10, r3, r10, r12 + adds r10, r10, r2 + adc r2, r3, #0 + umull r11, r3, r11, r12 + adds r11, r11, r2 + adc r2, r3, #0 + mov r12, #19 + lsl r2, r2, #1 + orr r2, r2, r11, lsr #31 + mul r2, r2, r12 + and r11, r11, #0x7fffffff + adds r4, r4, r2 + adcs r5, r5, #0 + adcs r6, r6, #0 + adcs r7, r7, #0 + adcs r8, r8, #0 + adcs r9, r9, #0 + adcs r10, r10, #0 + adc r11, r11, #0 + strd r4, r5, [sp, #32] + strd r6, r7, [sp, #40] + strd r8, r9, [sp, #48] + strd r10, r11, [sp, #56] + add r1, sp, #0x40 + add r0, sp, #0x40 + bl fe_sq + # Add + ldrd r4, r5, [sp, #96] + ldrd r6, r7, [sp, #104] + ldrd r8, r9, [sp, #32] + ldrd r10, r11, [sp, #40] + adds r8, r4, r8 + adcs r9, r5, r9 + adcs r10, r6, r10 + adcs r11, r7, r11 + strd r8, r9, [sp, #96] + strd r10, r11, [sp, #104] + ldrd r4, r5, [sp, #112] + ldrd r6, r7, [sp, #120] + ldrd r8, r9, [sp, #48] + ldrd r10, r11, [sp, #56] + adcs r8, r4, r8 + adcs r9, r5, r9 + adcs r10, r6, r10 + adc r11, r7, r11 + mov r3, #-19 + asr r2, r11, #31 + # Mask the modulus + and r3, r2, r3 + and r12, r2, #0x7fffffff + # Sub modulus (if overflow) + ldrd r4, r5, [sp, #96] + ldrd r6, r7, [sp, #104] + subs r4, r4, r3 + sbcs r5, r5, r2 + sbcs r6, r6, r2 + sbcs r7, r7, r2 + sbcs r8, r8, r2 + sbcs r9, r9, r2 + sbcs r10, r10, r2 + sbc r11, r11, r12 + strd r4, r5, [sp, #96] + strd r6, r7, [sp, #104] + strd r8, r9, [sp, #112] + strd r10, r11, [sp, #120] + add r2, sp, #0 + ldr r1, [sp, #168] + add r0, sp, #32 + bl fe_mul + add r2, sp, #0x60 + add r1, sp, #0x80 + add r0, sp, #0 + bl fe_mul + ldr r2, [sp, #176] + ldr r1, [sp, #180] + subs r1, r1, #1 + str r1, [sp, #180] + bge L_curve25519_bits + mov r1, #31 + str r1, [sp, #180] + subs r2, r2, #4 + str r2, [sp, #176] + bge L_curve25519_words + # Invert + add r0, sp, #32 + add r1, sp, #0 + bl fe_sq + add r0, sp, #0x40 + add r1, sp, #32 + bl fe_sq + add r0, sp, #0x40 + add r1, sp, #0x40 + bl fe_sq + add r0, sp, #0x40 + add r1, sp, #0 + add r2, sp, #0x40 + bl fe_mul + add r0, sp, #32 + add r1, sp, #32 + add r2, sp, #0x40 + bl fe_mul + add r0, sp, #0x60 + add r1, sp, #32 + bl fe_sq + add r0, sp, #0x40 + add r1, sp, #0x40 + add r2, sp, #0x60 + bl fe_mul + add r0, sp, #0x60 + add r1, sp, #0x40 + bl fe_sq + mov r4, #4 +L_curve25519_inv_1: + add r0, sp, #0x60 + add r1, sp, #0x60 + bl fe_sq + sub r4, r4, #1 + cmp r4, #0 + bne L_curve25519_inv_1 + add r0, sp, #0x40 + add r1, sp, #0x60 + add r2, sp, #0x40 + bl fe_mul + add r0, sp, #0x60 + add r1, sp, #0x40 + bl fe_sq + mov r4, #9 +L_curve25519_inv_2: + add r0, sp, #0x60 + add r1, sp, #0x60 + bl fe_sq + sub r4, r4, #1 + cmp r4, #0 + bne L_curve25519_inv_2 + add r0, sp, #0x60 + add r1, sp, #0x60 + add r2, sp, #0x40 + bl fe_mul + add r0, sp, #0x80 + add r1, sp, #0x60 + bl fe_sq + mov r4, #19 +L_curve25519_inv_3: + add r0, sp, #0x80 + add r1, sp, #0x80 + bl fe_sq + sub r4, r4, #1 + cmp r4, #0 + bne L_curve25519_inv_3 + add r0, sp, #0x60 + add r1, sp, #0x80 + add r2, sp, #0x60 + bl fe_mul + mov r4, #10 +L_curve25519_inv_4: + add r0, sp, #0x60 + add r1, sp, #0x60 + bl fe_sq + sub r4, r4, #1 + cmp r4, #0 + bne L_curve25519_inv_4 + add r0, sp, #0x40 + add r1, sp, #0x60 + add r2, sp, #0x40 + bl fe_mul + add r0, sp, #0x60 + add r1, sp, #0x40 + bl fe_sq + mov r4, #49 +L_curve25519_inv_5: + add r0, sp, #0x60 + add r1, sp, #0x60 + bl fe_sq + sub r4, r4, #1 + cmp r4, #0 + bne L_curve25519_inv_5 + add r0, sp, #0x60 + add r1, sp, #0x60 + add r2, sp, #0x40 + bl fe_mul + add r0, sp, #0x80 + add r1, sp, #0x60 + bl fe_sq + mov r4, #0x63 +L_curve25519_inv_6: + add r0, sp, #0x80 + add r1, sp, #0x80 + bl fe_sq + sub r4, r4, #1 + cmp r4, #0 + bne L_curve25519_inv_6 + add r0, sp, #0x60 + add r1, sp, #0x80 + add r2, sp, #0x60 + bl fe_mul + mov r4, #50 +L_curve25519_inv_7: + add r0, sp, #0x60 + add r1, sp, #0x60 + bl fe_sq + sub r4, r4, #1 + cmp r4, #0 + bne L_curve25519_inv_7 + add r0, sp, #0x40 + add r1, sp, #0x60 + add r2, sp, #0x40 + bl fe_mul + mov r4, #5 +L_curve25519_inv_8: + add r0, sp, #0x40 + add r1, sp, #0x40 + bl fe_sq + sub r4, r4, #1 + cmp r4, #0 + bne L_curve25519_inv_8 + add r0, sp, #0 + add r1, sp, #0x40 + add r2, sp, #32 + bl fe_mul + add r2, sp, #0 + ldr r1, [sp, #160] + ldr r0, [sp, #160] + bl fe_mul + mov r0, #0 + add sp, sp, #0xbc + pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + .size curve25519,.-curve25519 + .text + .align 2 + .globl fe_pow22523 + .type fe_pow22523, %function +fe_pow22523: + push {r4, lr} + sub sp, sp, #0x68 + # pow22523 + str r0, [sp, #96] + str r1, [sp, #100] + mov r0, sp + ldr r1, [sp, #100] + bl fe_sq + add r0, sp, #32 + mov r1, sp + bl fe_sq + add r0, sp, #32 + add r1, sp, #32 + bl fe_sq + add r0, sp, #32 + ldr r1, [sp, #100] + add r2, sp, #32 + bl fe_mul + mov r0, sp + mov r1, sp + add r2, sp, #32 + bl fe_mul + mov r0, sp + mov r1, sp + bl fe_sq + mov r0, sp + add r1, sp, #32 + mov r2, sp + bl fe_mul + add r0, sp, #32 + mov r1, sp + bl fe_sq + mov r4, #4 +L_fe_pow22523_1: + add r0, sp, #32 + add r1, sp, #32 + bl fe_sq + sub r4, r4, #1 + cmp r4, #0 + bne L_fe_pow22523_1 + mov r0, sp + add r1, sp, #32 + mov r2, sp + bl fe_mul + add r0, sp, #32 + mov r1, sp + bl fe_sq + mov r4, #9 +L_fe_pow22523_2: + add r0, sp, #32 + add r1, sp, #32 + bl fe_sq + sub r4, r4, #1 + cmp r4, #0 + bne L_fe_pow22523_2 + add r0, sp, #32 + add r1, sp, #32 + mov r2, sp + bl fe_mul + add r0, sp, #0x40 + add r1, sp, #32 + bl fe_sq + mov r4, #19 +L_fe_pow22523_3: + add r0, sp, #0x40 + add r1, sp, #0x40 + bl fe_sq + sub r4, r4, #1 + cmp r4, #0 + bne L_fe_pow22523_3 + add r0, sp, #32 + add r1, sp, #0x40 + add r2, sp, #32 + bl fe_mul + mov r4, #10 +L_fe_pow22523_4: + add r0, sp, #32 + add r1, sp, #32 + bl fe_sq + sub r4, r4, #1 + cmp r4, #0 + bne L_fe_pow22523_4 + mov r0, sp + add r1, sp, #32 + mov r2, sp + bl fe_mul + add r0, sp, #32 + mov r1, sp + bl fe_sq + mov r4, #49 +L_fe_pow22523_5: + add r0, sp, #32 + add r1, sp, #32 + bl fe_sq + sub r4, r4, #1 + cmp r4, #0 + bne L_fe_pow22523_5 + add r0, sp, #32 + add r1, sp, #32 + mov r2, sp + bl fe_mul + add r0, sp, #0x40 + add r1, sp, #32 + bl fe_sq + mov r4, #0x63 +L_fe_pow22523_6: + add r0, sp, #0x40 + add r1, sp, #0x40 + bl fe_sq + sub r4, r4, #1 + cmp r4, #0 + bne L_fe_pow22523_6 + add r0, sp, #32 + add r1, sp, #0x40 + add r2, sp, #32 + bl fe_mul + mov r4, #50 +L_fe_pow22523_7: + add r0, sp, #32 + add r1, sp, #32 + bl fe_sq + sub r4, r4, #1 + cmp r4, #0 + bne L_fe_pow22523_7 + mov r0, sp + add r1, sp, #32 + mov r2, sp + bl fe_mul + mov r4, #2 +L_fe_pow22523_8: + mov r0, sp + mov r1, sp + bl fe_sq + sub r4, r4, #1 + cmp r4, #0 + bne L_fe_pow22523_8 + ldr r0, [sp, #96] + mov r1, sp + ldr r2, [sp, #100] + bl fe_mul + ldr r1, [sp, #100] + ldr r0, [sp, #96] + add sp, sp, #0x68 + pop {r4, pc} + .size fe_pow22523,.-fe_pow22523 + .text + .align 2 + .globl fe_ge_to_p2 + .type fe_ge_to_p2, %function +fe_ge_to_p2: + push {lr} + sub sp, sp, #16 + str r0, [sp] + str r1, [sp, #4] + str r2, [sp, #8] + str r3, [sp, #12] + ldr r2, [sp, #28] + ldr r1, [sp, #12] + ldr r0, [sp] + bl fe_mul + ldr r2, [sp, #24] + ldr r1, [sp, #20] + ldr r0, [sp, #4] + bl fe_mul + ldr r2, [sp, #28] + ldr r1, [sp, #24] + ldr r0, [sp, #8] + bl fe_mul + add sp, sp, #16 + pop {pc} + .size fe_ge_to_p2,.-fe_ge_to_p2 + .text + .align 2 + .globl fe_ge_to_p3 + .type fe_ge_to_p3, %function +fe_ge_to_p3: + push {lr} + sub sp, sp, #16 + str r0, [sp] + str r1, [sp, #4] + str r2, [sp, #8] + str r3, [sp, #12] + ldr r2, [sp, #32] + ldr r1, [sp, #20] + ldr r0, [sp] + bl fe_mul + ldr r2, [sp, #28] + ldr r1, [sp, #24] + ldr r0, [sp, #4] + bl fe_mul + ldr r2, [sp, #32] + ldr r1, [sp, #28] + ldr r0, [sp, #8] + bl fe_mul + ldr r2, [sp, #24] + ldr r1, [sp, #20] + ldr r0, [sp, #12] + bl fe_mul + add sp, sp, #16 + pop {pc} + .size fe_ge_to_p3,.-fe_ge_to_p3 + .text + .align 2 + .globl fe_ge_dbl + .type fe_ge_dbl, %function +fe_ge_dbl: + push {r4, r5, r6, r7, r8, r9, r10, r11, lr} + sub sp, sp, #16 + str r0, [sp] + str r1, [sp, #4] + str r2, [sp, #8] + str r3, [sp, #12] + ldr r1, [sp, #52] + ldr r0, [sp] + bl fe_sq + ldr r1, [sp, #56] + ldr r0, [sp, #8] + bl fe_sq + ldr r0, [sp, #4] + ldr r1, [sp, #52] + ldr r2, [sp, #56] + # Add + ldr r3, [r1] + ldr r4, [r1, #4] + ldr r5, [r1, #8] + ldr r6, [r1, #12] + ldr r7, [r2] + ldr r8, [r2, #4] + ldr r9, [r2, #8] + ldr r10, [r2, #12] + adds r7, r3, r7 + adcs r8, r4, r8 + adcs r9, r5, r9 + adcs r10, r6, r10 + str r7, [r0] + str r8, [r0, #4] + str r9, [r0, #8] + str r10, [r0, #12] + ldr r3, [r1, #16] + ldr r4, [r1, #20] + ldr r5, [r1, #24] + ldr r6, [r1, #28] + ldr r7, [r2, #16] + ldr r8, [r2, #20] + ldr r9, [r2, #24] + ldr r10, [r2, #28] + adcs r7, r3, r7 + adcs r8, r4, r8 + adcs r9, r5, r9 + adc r10, r6, r10 + mov r12, #-19 + asr r11, r10, #31 + # Mask the modulus + and r12, r11, r12 + and lr, r11, #0x7fffffff + # Sub modulus (if overflow) + ldr r3, [r0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + subs r3, r3, r12 + sbcs r4, r4, r11 + sbcs r5, r5, r11 + sbcs r6, r6, r11 + sbcs r7, r7, r11 + sbcs r8, r8, r11 + sbcs r9, r9, r11 + sbc r10, r10, lr + str r3, [r0] + str r4, [r0, #4] + str r5, [r0, #8] + str r6, [r0, #12] + str r7, [r0, #16] + str r8, [r0, #20] + str r9, [r0, #24] + str r10, [r0, #28] + ldr r1, [sp, #4] + ldr r0, [sp, #12] + bl fe_sq + ldr r0, [sp, #4] + ldr r1, [sp, #8] + ldr r2, [sp] + # Add-Sub + # Add + ldr r3, [r1] + ldr r4, [r1, #4] + ldr r5, [r2] + ldr r6, [r2, #4] + adds r7, r3, r5 + mov r12, #0 + adcs r8, r4, r6 + adc r12, r12, #0 + str r7, [r0] + str r8, [r0, #4] + # Sub + subs r9, r3, r5 + mov lr, #0 + sbcs r10, r4, r6 + adc lr, lr, #0 + str r9, [r1] + str r10, [r1, #4] + # Add + ldr r3, [r1, #8] + ldr r4, [r1, #12] + ldr r5, [r2, #8] + ldr r6, [r2, #12] + adds r12, r12, #-1 + adcs r7, r3, r5 + mov r12, #0 + adcs r8, r4, r6 + adc r12, r12, #0 + str r7, [r0, #8] + str r8, [r0, #12] + # Sub + adds lr, lr, #-1 + sbcs r9, r3, r5 + mov lr, #0 + sbcs r10, r4, r6 + adc lr, lr, #0 + str r9, [r1, #8] + str r10, [r1, #12] + # Add + ldr r3, [r1, #16] + ldr r4, [r1, #20] + ldr r5, [r2, #16] + ldr r6, [r2, #20] + adds r12, r12, #-1 + adcs r7, r3, r5 + mov r12, #0 + adcs r8, r4, r6 + adc r12, r12, #0 + str r7, [r0, #16] + str r8, [r0, #20] + # Sub + adds lr, lr, #-1 + sbcs r9, r3, r5 + mov lr, #0 + sbcs r10, r4, r6 + adc lr, lr, #0 + str r9, [r1, #16] + str r10, [r1, #20] + # Add + ldr r3, [r1, #24] + ldr r4, [r1, #28] + ldr r5, [r2, #24] + ldr r6, [r2, #28] + adds r12, r12, #-1 + adcs r7, r3, r5 + adc r8, r4, r6 + # Sub + adds lr, lr, #-1 + sbcs r9, r3, r5 + sbc r10, r4, r6 + mov r12, #-19 + asr r11, r8, #31 + # Mask the modulus + and r12, r11, r12 + and lr, r11, #0x7fffffff + # Sub modulus (if overflow) + ldr r3, [r0] + ldr r4, [r0, #4] + subs r3, r3, r12 + sbcs r4, r4, r11 + str r3, [r0] + str r4, [r0, #4] + ldr r3, [r0, #8] + ldr r4, [r0, #12] + sbcs r3, r3, r11 + sbcs r4, r4, r11 + str r3, [r0, #8] + str r4, [r0, #12] + ldr r3, [r0, #16] + ldr r4, [r0, #20] + sbcs r3, r3, r11 + sbcs r4, r4, r11 + str r3, [r0, #16] + str r4, [r0, #20] + sbcs r7, r7, r11 + sbc r8, r8, lr + str r7, [r0, #24] + str r8, [r0, #28] + mov r12, #-19 + asr r11, r10, #31 + # Mask the modulus + and r12, r11, r12 + and lr, r11, #0x7fffffff + # Add modulus (if underflow) + ldr r3, [r1] + ldr r4, [r1, #4] + adds r3, r3, r12 + adcs r4, r4, r11 + str r3, [r1] + str r4, [r1, #4] + ldr r3, [r1, #8] + ldr r4, [r1, #12] + adcs r3, r3, r11 + adcs r4, r4, r11 + str r3, [r1, #8] + str r4, [r1, #12] + ldr r3, [r1, #16] + ldr r4, [r1, #20] + adcs r3, r3, r11 + adcs r4, r4, r11 + str r3, [r1, #16] + str r4, [r1, #20] + adcs r9, r9, r11 + adc r10, r10, lr + str r9, [r1, #24] + str r10, [r1, #28] + ldr r0, [sp] + ldr r1, [sp, #12] + ldr r2, [sp, #4] + # Sub + ldr r3, [r1] + ldr r4, [r1, #4] + ldr r5, [r1, #8] + ldr r6, [r1, #12] + ldr r7, [r2] + ldr r8, [r2, #4] + ldr r9, [r2, #8] + ldr r10, [r2, #12] + subs r7, r3, r7 + sbcs r8, r4, r8 + sbcs r9, r5, r9 + sbcs r10, r6, r10 + str r7, [r0] + str r8, [r0, #4] + str r9, [r0, #8] + str r10, [r0, #12] + ldr r3, [r1, #16] + ldr r4, [r1, #20] + ldr r5, [r1, #24] + ldr r6, [r1, #28] + ldr r7, [r2, #16] + ldr r8, [r2, #20] + ldr r9, [r2, #24] + ldr r10, [r2, #28] + sbcs r7, r3, r7 + sbcs r8, r4, r8 + sbcs r9, r5, r9 + sbc r10, r6, r10 + mov r12, #-19 + asr r11, r10, #31 + # Mask the modulus + and r12, r11, r12 + and lr, r11, #0x7fffffff + # Add modulus (if underflow) + ldr r3, [r0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + adds r3, r3, r12 + adcs r4, r4, r11 + adcs r5, r5, r11 + adcs r6, r6, r11 + adcs r7, r7, r11 + adcs r8, r8, r11 + adcs r9, r9, r11 + adc r10, r10, lr + str r3, [r0] + str r4, [r0, #4] + str r5, [r0, #8] + str r6, [r0, #12] + str r7, [r0, #16] + str r8, [r0, #20] + str r9, [r0, #24] + str r10, [r0, #28] + ldr r1, [sp, #60] + ldr r0, [sp, #12] + bl fe_sq2 + ldr r0, [sp, #12] + ldr r1, [sp, #8] + # Sub + ldr r3, [r0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + ldr r7, [r1] + ldr r8, [r1, #4] + ldr r9, [r1, #8] + ldr r10, [r1, #12] + subs r7, r3, r7 + sbcs r8, r4, r8 + sbcs r9, r5, r9 + sbcs r10, r6, r10 + str r7, [r0] + str r8, [r0, #4] + str r9, [r0, #8] + str r10, [r0, #12] + ldr r3, [r0, #16] + ldr r4, [r0, #20] + ldr r5, [r0, #24] + ldr r6, [r0, #28] + ldr r7, [r1, #16] + ldr r8, [r1, #20] + ldr r9, [r1, #24] + ldr r10, [r1, #28] + sbcs r7, r3, r7 + sbcs r8, r4, r8 + sbcs r9, r5, r9 + sbc r10, r6, r10 + mov r12, #-19 + asr r11, r10, #31 + # Mask the modulus + and r12, r11, r12 + and lr, r11, #0x7fffffff + # Add modulus (if underflow) + ldr r3, [r0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + adds r3, r3, r12 + adcs r4, r4, r11 + adcs r5, r5, r11 + adcs r6, r6, r11 + adcs r7, r7, r11 + adcs r8, r8, r11 + adcs r9, r9, r11 + adc r10, r10, lr + str r3, [r0] + str r4, [r0, #4] + str r5, [r0, #8] + str r6, [r0, #12] + str r7, [r0, #16] + str r8, [r0, #20] + str r9, [r0, #24] + str r10, [r0, #28] + add sp, sp, #16 + pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + .size fe_ge_dbl,.-fe_ge_dbl + .text + .align 2 + .globl fe_ge_madd + .type fe_ge_madd, %function +fe_ge_madd: + push {r4, r5, r6, r7, r8, r9, r10, r11, lr} + sub sp, sp, #32 + str r0, [sp] + str r1, [sp, #4] + str r2, [sp, #8] + str r3, [sp, #12] + ldr r0, [sp] + ldr r1, [sp, #72] + ldr r2, [sp, #68] + # Add + ldr r3, [r1] + ldr r4, [r1, #4] + ldr r5, [r1, #8] + ldr r6, [r1, #12] + ldr r7, [r2] + ldr r8, [r2, #4] + ldr r9, [r2, #8] + ldr r10, [r2, #12] + adds r7, r3, r7 + adcs r8, r4, r8 + adcs r9, r5, r9 + adcs r10, r6, r10 + str r7, [r0] + str r8, [r0, #4] + str r9, [r0, #8] + str r10, [r0, #12] + ldr r3, [r1, #16] + ldr r4, [r1, #20] + ldr r5, [r1, #24] + ldr r6, [r1, #28] + ldr r7, [r2, #16] + ldr r8, [r2, #20] + ldr r9, [r2, #24] + ldr r10, [r2, #28] + adcs r7, r3, r7 + adcs r8, r4, r8 + adcs r9, r5, r9 + adc r10, r6, r10 + mov r12, #-19 + asr r11, r10, #31 + # Mask the modulus + and r12, r11, r12 + and lr, r11, #0x7fffffff + # Sub modulus (if overflow) + ldr r3, [r0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + subs r3, r3, r12 + sbcs r4, r4, r11 + sbcs r5, r5, r11 + sbcs r6, r6, r11 + sbcs r7, r7, r11 + sbcs r8, r8, r11 + sbcs r9, r9, r11 + sbc r10, r10, lr + str r3, [r0] + str r4, [r0, #4] + str r5, [r0, #8] + str r6, [r0, #12] + str r7, [r0, #16] + str r8, [r0, #20] + str r9, [r0, #24] + str r10, [r0, #28] + ldr r0, [sp, #4] + ldr r1, [sp, #72] + ldr r2, [sp, #68] + # Sub + ldr r3, [r1] + ldr r4, [r1, #4] + ldr r5, [r1, #8] + ldr r6, [r1, #12] + ldr r7, [r2] + ldr r8, [r2, #4] + ldr r9, [r2, #8] + ldr r10, [r2, #12] + subs r7, r3, r7 + sbcs r8, r4, r8 + sbcs r9, r5, r9 + sbcs r10, r6, r10 + str r7, [r0] + str r8, [r0, #4] + str r9, [r0, #8] + str r10, [r0, #12] + ldr r3, [r1, #16] + ldr r4, [r1, #20] + ldr r5, [r1, #24] + ldr r6, [r1, #28] + ldr r7, [r2, #16] + ldr r8, [r2, #20] + ldr r9, [r2, #24] + ldr r10, [r2, #28] + sbcs r7, r3, r7 + sbcs r8, r4, r8 + sbcs r9, r5, r9 + sbc r10, r6, r10 + mov r12, #-19 + asr r11, r10, #31 + # Mask the modulus + and r12, r11, r12 + and lr, r11, #0x7fffffff + # Add modulus (if underflow) + ldr r3, [r0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + adds r3, r3, r12 + adcs r4, r4, r11 + adcs r5, r5, r11 + adcs r6, r6, r11 + adcs r7, r7, r11 + adcs r8, r8, r11 + adcs r9, r9, r11 + adc r10, r10, lr + str r3, [r0] + str r4, [r0, #4] + str r5, [r0, #8] + str r6, [r0, #12] + str r7, [r0, #16] + str r8, [r0, #20] + str r9, [r0, #24] + str r10, [r0, #28] + ldr r2, [sp, #88] + ldr r1, [sp] + ldr r0, [sp, #8] + bl fe_mul + ldr r2, [sp, #92] + ldr r1, [sp, #4] + ldr r0, [sp, #4] + bl fe_mul + ldr r2, [sp, #80] + ldr r1, [sp, #84] + ldr r0, [sp, #12] + bl fe_mul + ldr r0, [sp, #4] + ldr r1, [sp] + ldr r2, [sp, #8] + # Add-Sub + # Add + ldr r3, [r2] + ldr r4, [r2, #4] + ldr r5, [r0] + ldr r6, [r0, #4] + adds r7, r3, r5 + mov r12, #0 + adcs r8, r4, r6 + adc r12, r12, #0 + str r7, [r0] + str r8, [r0, #4] + # Sub + subs r9, r3, r5 + mov lr, #0 + sbcs r10, r4, r6 + adc lr, lr, #0 + str r9, [r1] + str r10, [r1, #4] + # Add + ldr r3, [r2, #8] + ldr r4, [r2, #12] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + adds r12, r12, #-1 + adcs r7, r3, r5 + mov r12, #0 + adcs r8, r4, r6 + adc r12, r12, #0 + str r7, [r0, #8] + str r8, [r0, #12] + # Sub + adds lr, lr, #-1 + sbcs r9, r3, r5 + mov lr, #0 + sbcs r10, r4, r6 + adc lr, lr, #0 + str r9, [r1, #8] + str r10, [r1, #12] + # Add + ldr r3, [r2, #16] + ldr r4, [r2, #20] + ldr r5, [r0, #16] + ldr r6, [r0, #20] + adds r12, r12, #-1 + adcs r7, r3, r5 + mov r12, #0 + adcs r8, r4, r6 + adc r12, r12, #0 + str r7, [r0, #16] + str r8, [r0, #20] + # Sub + adds lr, lr, #-1 + sbcs r9, r3, r5 + mov lr, #0 + sbcs r10, r4, r6 + adc lr, lr, #0 + str r9, [r1, #16] + str r10, [r1, #20] + # Add + ldr r3, [r2, #24] + ldr r4, [r2, #28] + ldr r5, [r0, #24] + ldr r6, [r0, #28] + adds r12, r12, #-1 + adcs r7, r3, r5 + adc r8, r4, r6 + # Sub + adds lr, lr, #-1 + sbcs r9, r3, r5 + sbc r10, r4, r6 + mov r12, #-19 + asr r11, r8, #31 + # Mask the modulus + and r12, r11, r12 + and lr, r11, #0x7fffffff + # Sub modulus (if overflow) + ldr r3, [r0] + ldr r4, [r0, #4] + subs r3, r3, r12 + sbcs r4, r4, r11 + str r3, [r0] + str r4, [r0, #4] + ldr r3, [r0, #8] + ldr r4, [r0, #12] + sbcs r3, r3, r11 + sbcs r4, r4, r11 + str r3, [r0, #8] + str r4, [r0, #12] + ldr r3, [r0, #16] + ldr r4, [r0, #20] + sbcs r3, r3, r11 + sbcs r4, r4, r11 + str r3, [r0, #16] + str r4, [r0, #20] + sbcs r7, r7, r11 + sbc r8, r8, lr + str r7, [r0, #24] + str r8, [r0, #28] + mov r12, #-19 + asr r11, r10, #31 + # Mask the modulus + and r12, r11, r12 + and lr, r11, #0x7fffffff + # Add modulus (if underflow) + ldr r3, [r1] + ldr r4, [r1, #4] + adds r3, r3, r12 + adcs r4, r4, r11 + str r3, [r1] + str r4, [r1, #4] + ldr r3, [r1, #8] + ldr r4, [r1, #12] + adcs r3, r3, r11 + adcs r4, r4, r11 + str r3, [r1, #8] + str r4, [r1, #12] + ldr r3, [r1, #16] + ldr r4, [r1, #20] + adcs r3, r3, r11 + adcs r4, r4, r11 + str r3, [r1, #16] + str r4, [r1, #20] + adcs r9, r9, r11 + adc r10, r10, lr + str r9, [r1, #24] + str r10, [r1, #28] + ldr r0, [sp, #8] + ldr r1, [sp, #76] + # Double + ldr r3, [r1] + ldr r4, [r1, #4] + ldr r5, [r1, #8] + ldr r6, [r1, #12] + ldr r7, [r1, #16] + ldr r8, [r1, #20] + ldr r9, [r1, #24] + ldr r10, [r1, #28] + adds r3, r3, r3 + adcs r4, r4, r4 + adcs r5, r5, r5 + adcs r6, r6, r6 + adcs r7, r7, r7 + adcs r8, r8, r8 + adcs r9, r9, r9 + adc r10, r10, r10 + mov r12, #-19 + asr r11, r10, #31 + # Mask the modulus + and r12, r11, r12 + and lr, r11, #0x7fffffff + # Sub modulus (if overflow) + subs r3, r3, r12 + sbcs r4, r4, r11 + sbcs r5, r5, r11 + sbcs r6, r6, r11 + sbcs r7, r7, r11 + sbcs r8, r8, r11 + sbcs r9, r9, r11 + sbc r10, r10, lr + str r3, [r0] + str r4, [r0, #4] + str r5, [r0, #8] + str r6, [r0, #12] + str r7, [r0, #16] + str r8, [r0, #20] + str r9, [r0, #24] + str r10, [r0, #28] + ldr r0, [sp, #8] + ldr r1, [sp, #12] + # Add-Sub + # Add + ldr r3, [r0] + ldr r4, [r0, #4] + ldr r5, [r1] + ldr r6, [r1, #4] + adds r7, r3, r5 + mov r12, #0 + adcs r8, r4, r6 + adc r12, r12, #0 + str r7, [r0] + str r8, [r0, #4] + # Sub + subs r9, r3, r5 + mov lr, #0 + sbcs r10, r4, r6 + adc lr, lr, #0 + str r9, [r1] + str r10, [r1, #4] + # Add + ldr r3, [r0, #8] + ldr r4, [r0, #12] + ldr r5, [r1, #8] + ldr r6, [r1, #12] + adds r12, r12, #-1 + adcs r7, r3, r5 + mov r12, #0 + adcs r8, r4, r6 + adc r12, r12, #0 + str r7, [r0, #8] + str r8, [r0, #12] + # Sub + adds lr, lr, #-1 + sbcs r9, r3, r5 + mov lr, #0 + sbcs r10, r4, r6 + adc lr, lr, #0 + str r9, [r1, #8] + str r10, [r1, #12] + # Add + ldr r3, [r0, #16] + ldr r4, [r0, #20] + ldr r5, [r1, #16] + ldr r6, [r1, #20] + adds r12, r12, #-1 + adcs r7, r3, r5 + mov r12, #0 + adcs r8, r4, r6 + adc r12, r12, #0 + str r7, [r0, #16] + str r8, [r0, #20] + # Sub + adds lr, lr, #-1 + sbcs r9, r3, r5 + mov lr, #0 + sbcs r10, r4, r6 + adc lr, lr, #0 + str r9, [r1, #16] + str r10, [r1, #20] + # Add + ldr r3, [r0, #24] + ldr r4, [r0, #28] + ldr r5, [r1, #24] + ldr r6, [r1, #28] + adds r12, r12, #-1 + adcs r7, r3, r5 + adc r8, r4, r6 + # Sub + adds lr, lr, #-1 + sbcs r9, r3, r5 + sbc r10, r4, r6 + mov r12, #-19 + asr r11, r8, #31 + # Mask the modulus + and r12, r11, r12 + and lr, r11, #0x7fffffff + # Sub modulus (if overflow) + ldr r3, [r0] + ldr r4, [r0, #4] + subs r3, r3, r12 + sbcs r4, r4, r11 + str r3, [r0] + str r4, [r0, #4] + ldr r3, [r0, #8] + ldr r4, [r0, #12] + sbcs r3, r3, r11 + sbcs r4, r4, r11 + str r3, [r0, #8] + str r4, [r0, #12] + ldr r3, [r0, #16] + ldr r4, [r0, #20] + sbcs r3, r3, r11 + sbcs r4, r4, r11 + str r3, [r0, #16] + str r4, [r0, #20] + sbcs r7, r7, r11 + sbc r8, r8, lr + str r7, [r0, #24] + str r8, [r0, #28] + mov r12, #-19 + asr r11, r10, #31 + # Mask the modulus + and r12, r11, r12 + and lr, r11, #0x7fffffff + # Add modulus (if underflow) + ldr r3, [r1] + ldr r4, [r1, #4] + adds r3, r3, r12 + adcs r4, r4, r11 + str r3, [r1] + str r4, [r1, #4] + ldr r3, [r1, #8] + ldr r4, [r1, #12] + adcs r3, r3, r11 + adcs r4, r4, r11 + str r3, [r1, #8] + str r4, [r1, #12] + ldr r3, [r1, #16] + ldr r4, [r1, #20] + adcs r3, r3, r11 + adcs r4, r4, r11 + str r3, [r1, #16] + str r4, [r1, #20] + adcs r9, r9, r11 + adc r10, r10, lr + str r9, [r1, #24] + str r10, [r1, #28] + add sp, sp, #32 + pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + .size fe_ge_madd,.-fe_ge_madd + .text + .align 2 + .globl fe_ge_msub + .type fe_ge_msub, %function +fe_ge_msub: + push {r4, r5, r6, r7, r8, r9, r10, r11, lr} + sub sp, sp, #32 + str r0, [sp] + str r1, [sp, #4] + str r2, [sp, #8] + str r3, [sp, #12] + ldr r0, [sp] + ldr r1, [sp, #72] + ldr r2, [sp, #68] + # Add + ldr r3, [r1] + ldr r4, [r1, #4] + ldr r5, [r1, #8] + ldr r6, [r1, #12] + ldr r7, [r2] + ldr r8, [r2, #4] + ldr r9, [r2, #8] + ldr r10, [r2, #12] + adds r7, r3, r7 + adcs r8, r4, r8 + adcs r9, r5, r9 + adcs r10, r6, r10 + str r7, [r0] + str r8, [r0, #4] + str r9, [r0, #8] + str r10, [r0, #12] + ldr r3, [r1, #16] + ldr r4, [r1, #20] + ldr r5, [r1, #24] + ldr r6, [r1, #28] + ldr r7, [r2, #16] + ldr r8, [r2, #20] + ldr r9, [r2, #24] + ldr r10, [r2, #28] + adcs r7, r3, r7 + adcs r8, r4, r8 + adcs r9, r5, r9 + adc r10, r6, r10 + mov r12, #-19 + asr r11, r10, #31 + # Mask the modulus + and r12, r11, r12 + and lr, r11, #0x7fffffff + # Sub modulus (if overflow) + ldr r3, [r0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + subs r3, r3, r12 + sbcs r4, r4, r11 + sbcs r5, r5, r11 + sbcs r6, r6, r11 + sbcs r7, r7, r11 + sbcs r8, r8, r11 + sbcs r9, r9, r11 + sbc r10, r10, lr + str r3, [r0] + str r4, [r0, #4] + str r5, [r0, #8] + str r6, [r0, #12] + str r7, [r0, #16] + str r8, [r0, #20] + str r9, [r0, #24] + str r10, [r0, #28] + ldr r0, [sp, #4] + ldr r1, [sp, #72] + ldr r2, [sp, #68] + # Sub + ldr r3, [r1] + ldr r4, [r1, #4] + ldr r5, [r1, #8] + ldr r6, [r1, #12] + ldr r7, [r2] + ldr r8, [r2, #4] + ldr r9, [r2, #8] + ldr r10, [r2, #12] + subs r7, r3, r7 + sbcs r8, r4, r8 + sbcs r9, r5, r9 + sbcs r10, r6, r10 + str r7, [r0] + str r8, [r0, #4] + str r9, [r0, #8] + str r10, [r0, #12] + ldr r3, [r1, #16] + ldr r4, [r1, #20] + ldr r5, [r1, #24] + ldr r6, [r1, #28] + ldr r7, [r2, #16] + ldr r8, [r2, #20] + ldr r9, [r2, #24] + ldr r10, [r2, #28] + sbcs r7, r3, r7 + sbcs r8, r4, r8 + sbcs r9, r5, r9 + sbc r10, r6, r10 + mov r12, #-19 + asr r11, r10, #31 + # Mask the modulus + and r12, r11, r12 + and lr, r11, #0x7fffffff + # Add modulus (if underflow) + ldr r3, [r0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + adds r3, r3, r12 + adcs r4, r4, r11 + adcs r5, r5, r11 + adcs r6, r6, r11 + adcs r7, r7, r11 + adcs r8, r8, r11 + adcs r9, r9, r11 + adc r10, r10, lr + str r3, [r0] + str r4, [r0, #4] + str r5, [r0, #8] + str r6, [r0, #12] + str r7, [r0, #16] + str r8, [r0, #20] + str r9, [r0, #24] + str r10, [r0, #28] + ldr r2, [sp, #92] + ldr r1, [sp] + ldr r0, [sp, #8] + bl fe_mul + ldr r2, [sp, #88] + ldr r1, [sp, #4] + ldr r0, [sp, #4] + bl fe_mul + ldr r2, [sp, #80] + ldr r1, [sp, #84] + ldr r0, [sp, #12] + bl fe_mul + ldr r0, [sp, #4] + ldr r1, [sp] + ldr r2, [sp, #8] + # Add-Sub + # Add + ldr r3, [r2] + ldr r4, [r2, #4] + ldr r5, [r0] + ldr r6, [r0, #4] + adds r7, r3, r5 + mov r12, #0 + adcs r8, r4, r6 + adc r12, r12, #0 + str r7, [r0] + str r8, [r0, #4] + # Sub + subs r9, r3, r5 + mov lr, #0 + sbcs r10, r4, r6 + adc lr, lr, #0 + str r9, [r1] + str r10, [r1, #4] + # Add + ldr r3, [r2, #8] + ldr r4, [r2, #12] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + adds r12, r12, #-1 + adcs r7, r3, r5 + mov r12, #0 + adcs r8, r4, r6 + adc r12, r12, #0 + str r7, [r0, #8] + str r8, [r0, #12] + # Sub + adds lr, lr, #-1 + sbcs r9, r3, r5 + mov lr, #0 + sbcs r10, r4, r6 + adc lr, lr, #0 + str r9, [r1, #8] + str r10, [r1, #12] + # Add + ldr r3, [r2, #16] + ldr r4, [r2, #20] + ldr r5, [r0, #16] + ldr r6, [r0, #20] + adds r12, r12, #-1 + adcs r7, r3, r5 + mov r12, #0 + adcs r8, r4, r6 + adc r12, r12, #0 + str r7, [r0, #16] + str r8, [r0, #20] + # Sub + adds lr, lr, #-1 + sbcs r9, r3, r5 + mov lr, #0 + sbcs r10, r4, r6 + adc lr, lr, #0 + str r9, [r1, #16] + str r10, [r1, #20] + # Add + ldr r3, [r2, #24] + ldr r4, [r2, #28] + ldr r5, [r0, #24] + ldr r6, [r0, #28] + adds r12, r12, #-1 + adcs r7, r3, r5 + adc r8, r4, r6 + # Sub + adds lr, lr, #-1 + sbcs r9, r3, r5 + sbc r10, r4, r6 + mov r12, #-19 + asr r11, r8, #31 + # Mask the modulus + and r12, r11, r12 + and lr, r11, #0x7fffffff + # Sub modulus (if overflow) + ldr r3, [r0] + ldr r4, [r0, #4] + subs r3, r3, r12 + sbcs r4, r4, r11 + str r3, [r0] + str r4, [r0, #4] + ldr r3, [r0, #8] + ldr r4, [r0, #12] + sbcs r3, r3, r11 + sbcs r4, r4, r11 + str r3, [r0, #8] + str r4, [r0, #12] + ldr r3, [r0, #16] + ldr r4, [r0, #20] + sbcs r3, r3, r11 + sbcs r4, r4, r11 + str r3, [r0, #16] + str r4, [r0, #20] + sbcs r7, r7, r11 + sbc r8, r8, lr + str r7, [r0, #24] + str r8, [r0, #28] + mov r12, #-19 + asr r11, r10, #31 + # Mask the modulus + and r12, r11, r12 + and lr, r11, #0x7fffffff + # Add modulus (if underflow) + ldr r3, [r1] + ldr r4, [r1, #4] + adds r3, r3, r12 + adcs r4, r4, r11 + str r3, [r1] + str r4, [r1, #4] + ldr r3, [r1, #8] + ldr r4, [r1, #12] + adcs r3, r3, r11 + adcs r4, r4, r11 + str r3, [r1, #8] + str r4, [r1, #12] + ldr r3, [r1, #16] + ldr r4, [r1, #20] + adcs r3, r3, r11 + adcs r4, r4, r11 + str r3, [r1, #16] + str r4, [r1, #20] + adcs r9, r9, r11 + adc r10, r10, lr + str r9, [r1, #24] + str r10, [r1, #28] + ldr r0, [sp, #8] + ldr r1, [sp, #76] + # Double + ldr r3, [r1] + ldr r4, [r1, #4] + ldr r5, [r1, #8] + ldr r6, [r1, #12] + ldr r7, [r1, #16] + ldr r8, [r1, #20] + ldr r9, [r1, #24] + ldr r10, [r1, #28] + adds r3, r3, r3 + adcs r4, r4, r4 + adcs r5, r5, r5 + adcs r6, r6, r6 + adcs r7, r7, r7 + adcs r8, r8, r8 + adcs r9, r9, r9 + adc r10, r10, r10 + mov r12, #-19 + asr r11, r10, #31 + # Mask the modulus + and r12, r11, r12 + and lr, r11, #0x7fffffff + # Sub modulus (if overflow) + subs r3, r3, r12 + sbcs r4, r4, r11 + sbcs r5, r5, r11 + sbcs r6, r6, r11 + sbcs r7, r7, r11 + sbcs r8, r8, r11 + sbcs r9, r9, r11 + sbc r10, r10, lr + str r3, [r0] + str r4, [r0, #4] + str r5, [r0, #8] + str r6, [r0, #12] + str r7, [r0, #16] + str r8, [r0, #20] + str r9, [r0, #24] + str r10, [r0, #28] + ldr r0, [sp, #12] + ldr r1, [sp, #8] + # Add-Sub + # Add + ldr r3, [r1] + ldr r4, [r1, #4] + ldr r5, [r0] + ldr r6, [r0, #4] + adds r7, r3, r5 + mov r12, #0 + adcs r8, r4, r6 + adc r12, r12, #0 + str r7, [r0] + str r8, [r0, #4] + # Sub + subs r9, r3, r5 + mov lr, #0 + sbcs r10, r4, r6 + adc lr, lr, #0 + str r9, [r1] + str r10, [r1, #4] + # Add + ldr r3, [r1, #8] + ldr r4, [r1, #12] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + adds r12, r12, #-1 + adcs r7, r3, r5 + mov r12, #0 + adcs r8, r4, r6 + adc r12, r12, #0 + str r7, [r0, #8] + str r8, [r0, #12] + # Sub + adds lr, lr, #-1 + sbcs r9, r3, r5 + mov lr, #0 + sbcs r10, r4, r6 + adc lr, lr, #0 + str r9, [r1, #8] + str r10, [r1, #12] + # Add + ldr r3, [r1, #16] + ldr r4, [r1, #20] + ldr r5, [r0, #16] + ldr r6, [r0, #20] + adds r12, r12, #-1 + adcs r7, r3, r5 + mov r12, #0 + adcs r8, r4, r6 + adc r12, r12, #0 + str r7, [r0, #16] + str r8, [r0, #20] + # Sub + adds lr, lr, #-1 + sbcs r9, r3, r5 + mov lr, #0 + sbcs r10, r4, r6 + adc lr, lr, #0 + str r9, [r1, #16] + str r10, [r1, #20] + # Add + ldr r3, [r1, #24] + ldr r4, [r1, #28] + ldr r5, [r0, #24] + ldr r6, [r0, #28] + adds r12, r12, #-1 + adcs r7, r3, r5 + adc r8, r4, r6 + # Sub + adds lr, lr, #-1 + sbcs r9, r3, r5 + sbc r10, r4, r6 + mov r12, #-19 + asr r11, r8, #31 + # Mask the modulus + and r12, r11, r12 + and lr, r11, #0x7fffffff + # Sub modulus (if overflow) + ldr r3, [r0] + ldr r4, [r0, #4] + subs r3, r3, r12 + sbcs r4, r4, r11 + str r3, [r0] + str r4, [r0, #4] + ldr r3, [r0, #8] + ldr r4, [r0, #12] + sbcs r3, r3, r11 + sbcs r4, r4, r11 + str r3, [r0, #8] + str r4, [r0, #12] + ldr r3, [r0, #16] + ldr r4, [r0, #20] + sbcs r3, r3, r11 + sbcs r4, r4, r11 + str r3, [r0, #16] + str r4, [r0, #20] + sbcs r7, r7, r11 + sbc r8, r8, lr + str r7, [r0, #24] + str r8, [r0, #28] + mov r12, #-19 + asr r11, r10, #31 + # Mask the modulus + and r12, r11, r12 + and lr, r11, #0x7fffffff + # Add modulus (if underflow) + ldr r3, [r1] + ldr r4, [r1, #4] + adds r3, r3, r12 + adcs r4, r4, r11 + str r3, [r1] + str r4, [r1, #4] + ldr r3, [r1, #8] + ldr r4, [r1, #12] + adcs r3, r3, r11 + adcs r4, r4, r11 + str r3, [r1, #8] + str r4, [r1, #12] + ldr r3, [r1, #16] + ldr r4, [r1, #20] + adcs r3, r3, r11 + adcs r4, r4, r11 + str r3, [r1, #16] + str r4, [r1, #20] + adcs r9, r9, r11 + adc r10, r10, lr + str r9, [r1, #24] + str r10, [r1, #28] + add sp, sp, #32 + pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + .size fe_ge_msub,.-fe_ge_msub + .text + .align 2 + .globl fe_ge_add + .type fe_ge_add, %function +fe_ge_add: + push {r4, r5, r6, r7, r8, r9, r10, r11, lr} + sub sp, sp, #0x60 + str r0, [sp] + str r1, [sp, #4] + str r2, [sp, #8] + str r3, [sp, #12] + ldr r0, [sp] + ldr r1, [sp, #136] + ldr r2, [sp, #132] + # Add + ldr r3, [r1] + ldr r4, [r1, #4] + ldr r5, [r1, #8] + ldr r6, [r1, #12] + ldr r7, [r2] + ldr r8, [r2, #4] + ldr r9, [r2, #8] + ldr r10, [r2, #12] + adds r7, r3, r7 + adcs r8, r4, r8 + adcs r9, r5, r9 + adcs r10, r6, r10 + str r7, [r0] + str r8, [r0, #4] + str r9, [r0, #8] + str r10, [r0, #12] + ldr r3, [r1, #16] + ldr r4, [r1, #20] + ldr r5, [r1, #24] + ldr r6, [r1, #28] + ldr r7, [r2, #16] + ldr r8, [r2, #20] + ldr r9, [r2, #24] + ldr r10, [r2, #28] + adcs r7, r3, r7 + adcs r8, r4, r8 + adcs r9, r5, r9 + adc r10, r6, r10 + mov r12, #-19 + asr r11, r10, #31 + # Mask the modulus + and r12, r11, r12 + and lr, r11, #0x7fffffff + # Sub modulus (if overflow) + ldr r3, [r0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + subs r3, r3, r12 + sbcs r4, r4, r11 + sbcs r5, r5, r11 + sbcs r6, r6, r11 + sbcs r7, r7, r11 + sbcs r8, r8, r11 + sbcs r9, r9, r11 + sbc r10, r10, lr + str r3, [r0] + str r4, [r0, #4] + str r5, [r0, #8] + str r6, [r0, #12] + str r7, [r0, #16] + str r8, [r0, #20] + str r9, [r0, #24] + str r10, [r0, #28] + ldr r0, [sp, #4] + ldr r1, [sp, #136] + ldr r2, [sp, #132] + # Sub + ldr r3, [r1] + ldr r4, [r1, #4] + ldr r5, [r1, #8] + ldr r6, [r1, #12] + ldr r7, [r2] + ldr r8, [r2, #4] + ldr r9, [r2, #8] + ldr r10, [r2, #12] + subs r7, r3, r7 + sbcs r8, r4, r8 + sbcs r9, r5, r9 + sbcs r10, r6, r10 + str r7, [r0] + str r8, [r0, #4] + str r9, [r0, #8] + str r10, [r0, #12] + ldr r3, [r1, #16] + ldr r4, [r1, #20] + ldr r5, [r1, #24] + ldr r6, [r1, #28] + ldr r7, [r2, #16] + ldr r8, [r2, #20] + ldr r9, [r2, #24] + ldr r10, [r2, #28] + sbcs r7, r3, r7 + sbcs r8, r4, r8 + sbcs r9, r5, r9 + sbc r10, r6, r10 + mov r12, #-19 + asr r11, r10, #31 + # Mask the modulus + and r12, r11, r12 + and lr, r11, #0x7fffffff + # Add modulus (if underflow) + ldr r3, [r0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + adds r3, r3, r12 + adcs r4, r4, r11 + adcs r5, r5, r11 + adcs r6, r6, r11 + adcs r7, r7, r11 + adcs r8, r8, r11 + adcs r9, r9, r11 + adc r10, r10, lr + str r3, [r0] + str r4, [r0, #4] + str r5, [r0, #8] + str r6, [r0, #12] + str r7, [r0, #16] + str r8, [r0, #20] + str r9, [r0, #24] + str r10, [r0, #28] + ldr r2, [sp, #156] + ldr r1, [sp] + ldr r0, [sp, #8] + bl fe_mul + ldr r2, [sp, #160] + ldr r1, [sp, #4] + ldr r0, [sp, #4] + bl fe_mul + ldr r2, [sp, #144] + ldr r1, [sp, #152] + ldr r0, [sp, #12] + bl fe_mul + ldr r2, [sp, #148] + ldr r1, [sp, #140] + ldr r0, [sp] + bl fe_mul + add r0, sp, #16 + ldr r1, [sp] + # Double + ldr r3, [r1] + ldr r4, [r1, #4] + ldr r5, [r1, #8] + ldr r6, [r1, #12] + ldr r7, [r1, #16] + ldr r8, [r1, #20] + ldr r9, [r1, #24] + ldr r10, [r1, #28] + adds r3, r3, r3 + adcs r4, r4, r4 + adcs r5, r5, r5 + adcs r6, r6, r6 + adcs r7, r7, r7 + adcs r8, r8, r8 + adcs r9, r9, r9 + adc r10, r10, r10 + mov r12, #-19 + asr r11, r10, #31 + # Mask the modulus + and r12, r11, r12 + and lr, r11, #0x7fffffff + # Sub modulus (if overflow) + subs r3, r3, r12 + sbcs r4, r4, r11 + sbcs r5, r5, r11 + sbcs r6, r6, r11 + sbcs r7, r7, r11 + sbcs r8, r8, r11 + sbcs r9, r9, r11 + sbc r10, r10, lr + str r3, [r0] + str r4, [r0, #4] + str r5, [r0, #8] + str r6, [r0, #12] + str r7, [r0, #16] + str r8, [r0, #20] + str r9, [r0, #24] + str r10, [r0, #28] + ldr r0, [sp, #4] + ldr r1, [sp] + ldr r2, [sp, #8] + # Add-Sub + # Add + ldr r3, [r2] + ldr r4, [r2, #4] + ldr r5, [r0] + ldr r6, [r0, #4] + adds r7, r3, r5 + mov r12, #0 + adcs r8, r4, r6 + adc r12, r12, #0 + str r7, [r0] + str r8, [r0, #4] + # Sub + subs r9, r3, r5 + mov lr, #0 + sbcs r10, r4, r6 + adc lr, lr, #0 + str r9, [r1] + str r10, [r1, #4] + # Add + ldr r3, [r2, #8] + ldr r4, [r2, #12] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + adds r12, r12, #-1 + adcs r7, r3, r5 + mov r12, #0 + adcs r8, r4, r6 + adc r12, r12, #0 + str r7, [r0, #8] + str r8, [r0, #12] + # Sub + adds lr, lr, #-1 + sbcs r9, r3, r5 + mov lr, #0 + sbcs r10, r4, r6 + adc lr, lr, #0 + str r9, [r1, #8] + str r10, [r1, #12] + # Add + ldr r3, [r2, #16] + ldr r4, [r2, #20] + ldr r5, [r0, #16] + ldr r6, [r0, #20] + adds r12, r12, #-1 + adcs r7, r3, r5 + mov r12, #0 + adcs r8, r4, r6 + adc r12, r12, #0 + str r7, [r0, #16] + str r8, [r0, #20] + # Sub + adds lr, lr, #-1 + sbcs r9, r3, r5 + mov lr, #0 + sbcs r10, r4, r6 + adc lr, lr, #0 + str r9, [r1, #16] + str r10, [r1, #20] + # Add + ldr r3, [r2, #24] + ldr r4, [r2, #28] + ldr r5, [r0, #24] + ldr r6, [r0, #28] + adds r12, r12, #-1 + adcs r7, r3, r5 + adc r8, r4, r6 + # Sub + adds lr, lr, #-1 + sbcs r9, r3, r5 + sbc r10, r4, r6 + mov r12, #-19 + asr r11, r8, #31 + # Mask the modulus + and r12, r11, r12 + and lr, r11, #0x7fffffff + # Sub modulus (if overflow) + ldr r3, [r0] + ldr r4, [r0, #4] + subs r3, r3, r12 + sbcs r4, r4, r11 + str r3, [r0] + str r4, [r0, #4] + ldr r3, [r0, #8] + ldr r4, [r0, #12] + sbcs r3, r3, r11 + sbcs r4, r4, r11 + str r3, [r0, #8] + str r4, [r0, #12] + ldr r3, [r0, #16] + ldr r4, [r0, #20] + sbcs r3, r3, r11 + sbcs r4, r4, r11 + str r3, [r0, #16] + str r4, [r0, #20] + sbcs r7, r7, r11 + sbc r8, r8, lr + str r7, [r0, #24] + str r8, [r0, #28] + mov r12, #-19 + asr r11, r10, #31 + # Mask the modulus + and r12, r11, r12 + and lr, r11, #0x7fffffff + # Add modulus (if underflow) + ldr r3, [r1] + ldr r4, [r1, #4] + adds r3, r3, r12 + adcs r4, r4, r11 + str r3, [r1] + str r4, [r1, #4] + ldr r3, [r1, #8] + ldr r4, [r1, #12] + adcs r3, r3, r11 + adcs r4, r4, r11 + str r3, [r1, #8] + str r4, [r1, #12] + ldr r3, [r1, #16] + ldr r4, [r1, #20] + adcs r3, r3, r11 + adcs r4, r4, r11 + str r3, [r1, #16] + str r4, [r1, #20] + adcs r9, r9, r11 + adc r10, r10, lr + str r9, [r1, #24] + str r10, [r1, #28] + ldr r0, [sp, #8] + ldr r1, [sp, #12] + add r2, sp, #16 + # Add-Sub + # Add + ldr r3, [r2] + ldr r4, [r2, #4] + ldr r5, [r1] + ldr r6, [r1, #4] + adds r7, r3, r5 + mov r12, #0 + adcs r8, r4, r6 + adc r12, r12, #0 + str r7, [r0] + str r8, [r0, #4] + # Sub + subs r9, r3, r5 + mov lr, #0 + sbcs r10, r4, r6 + adc lr, lr, #0 + str r9, [r1] + str r10, [r1, #4] + # Add + ldr r3, [r2, #8] + ldr r4, [r2, #12] + ldr r5, [r1, #8] + ldr r6, [r1, #12] + adds r12, r12, #-1 + adcs r7, r3, r5 + mov r12, #0 + adcs r8, r4, r6 + adc r12, r12, #0 + str r7, [r0, #8] + str r8, [r0, #12] + # Sub + adds lr, lr, #-1 + sbcs r9, r3, r5 + mov lr, #0 + sbcs r10, r4, r6 + adc lr, lr, #0 + str r9, [r1, #8] + str r10, [r1, #12] + # Add + ldr r3, [r2, #16] + ldr r4, [r2, #20] + ldr r5, [r1, #16] + ldr r6, [r1, #20] + adds r12, r12, #-1 + adcs r7, r3, r5 + mov r12, #0 + adcs r8, r4, r6 + adc r12, r12, #0 + str r7, [r0, #16] + str r8, [r0, #20] + # Sub + adds lr, lr, #-1 + sbcs r9, r3, r5 + mov lr, #0 + sbcs r10, r4, r6 + adc lr, lr, #0 + str r9, [r1, #16] + str r10, [r1, #20] + # Add + ldr r3, [r2, #24] + ldr r4, [r2, #28] + ldr r5, [r1, #24] + ldr r6, [r1, #28] + adds r12, r12, #-1 + adcs r7, r3, r5 + adc r8, r4, r6 + # Sub + adds lr, lr, #-1 + sbcs r9, r3, r5 + sbc r10, r4, r6 + mov r12, #-19 + asr r11, r8, #31 + # Mask the modulus + and r12, r11, r12 + and lr, r11, #0x7fffffff + # Sub modulus (if overflow) + ldr r3, [r0] + ldr r4, [r0, #4] + subs r3, r3, r12 + sbcs r4, r4, r11 + str r3, [r0] + str r4, [r0, #4] + ldr r3, [r0, #8] + ldr r4, [r0, #12] + sbcs r3, r3, r11 + sbcs r4, r4, r11 + str r3, [r0, #8] + str r4, [r0, #12] + ldr r3, [r0, #16] + ldr r4, [r0, #20] + sbcs r3, r3, r11 + sbcs r4, r4, r11 + str r3, [r0, #16] + str r4, [r0, #20] + sbcs r7, r7, r11 + sbc r8, r8, lr + str r7, [r0, #24] + str r8, [r0, #28] + mov r12, #-19 + asr r11, r10, #31 + # Mask the modulus + and r12, r11, r12 + and lr, r11, #0x7fffffff + # Add modulus (if underflow) + ldr r3, [r1] + ldr r4, [r1, #4] + adds r3, r3, r12 + adcs r4, r4, r11 + str r3, [r1] + str r4, [r1, #4] + ldr r3, [r1, #8] + ldr r4, [r1, #12] + adcs r3, r3, r11 + adcs r4, r4, r11 + str r3, [r1, #8] + str r4, [r1, #12] + ldr r3, [r1, #16] + ldr r4, [r1, #20] + adcs r3, r3, r11 + adcs r4, r4, r11 + str r3, [r1, #16] + str r4, [r1, #20] + adcs r9, r9, r11 + adc r10, r10, lr + str r9, [r1, #24] + str r10, [r1, #28] + add sp, sp, #0x60 + pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + .size fe_ge_add,.-fe_ge_add + .text + .align 2 + .globl fe_ge_sub + .type fe_ge_sub, %function +fe_ge_sub: + push {r4, r5, r6, r7, r8, r9, r10, r11, lr} + sub sp, sp, #0x60 + str r0, [sp] + str r1, [sp, #4] + str r2, [sp, #8] + str r3, [sp, #12] + ldr r0, [sp] + ldr r1, [sp, #136] + ldr r2, [sp, #132] + # Add + ldr r3, [r1] + ldr r4, [r1, #4] + ldr r5, [r1, #8] + ldr r6, [r1, #12] + ldr r7, [r2] + ldr r8, [r2, #4] + ldr r9, [r2, #8] + ldr r10, [r2, #12] + adds r7, r3, r7 + adcs r8, r4, r8 + adcs r9, r5, r9 + adcs r10, r6, r10 + str r7, [r0] + str r8, [r0, #4] + str r9, [r0, #8] + str r10, [r0, #12] + ldr r3, [r1, #16] + ldr r4, [r1, #20] + ldr r5, [r1, #24] + ldr r6, [r1, #28] + ldr r7, [r2, #16] + ldr r8, [r2, #20] + ldr r9, [r2, #24] + ldr r10, [r2, #28] + adcs r7, r3, r7 + adcs r8, r4, r8 + adcs r9, r5, r9 + adc r10, r6, r10 + mov r12, #-19 + asr r11, r10, #31 + # Mask the modulus + and r12, r11, r12 + and lr, r11, #0x7fffffff + # Sub modulus (if overflow) + ldr r3, [r0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + subs r3, r3, r12 + sbcs r4, r4, r11 + sbcs r5, r5, r11 + sbcs r6, r6, r11 + sbcs r7, r7, r11 + sbcs r8, r8, r11 + sbcs r9, r9, r11 + sbc r10, r10, lr + str r3, [r0] + str r4, [r0, #4] + str r5, [r0, #8] + str r6, [r0, #12] + str r7, [r0, #16] + str r8, [r0, #20] + str r9, [r0, #24] + str r10, [r0, #28] + ldr r0, [sp, #4] + ldr r1, [sp, #136] + ldr r2, [sp, #132] + # Sub + ldr r3, [r1] + ldr r4, [r1, #4] + ldr r5, [r1, #8] + ldr r6, [r1, #12] + ldr r7, [r2] + ldr r8, [r2, #4] + ldr r9, [r2, #8] + ldr r10, [r2, #12] + subs r7, r3, r7 + sbcs r8, r4, r8 + sbcs r9, r5, r9 + sbcs r10, r6, r10 + str r7, [r0] + str r8, [r0, #4] + str r9, [r0, #8] + str r10, [r0, #12] + ldr r3, [r1, #16] + ldr r4, [r1, #20] + ldr r5, [r1, #24] + ldr r6, [r1, #28] + ldr r7, [r2, #16] + ldr r8, [r2, #20] + ldr r9, [r2, #24] + ldr r10, [r2, #28] + sbcs r7, r3, r7 + sbcs r8, r4, r8 + sbcs r9, r5, r9 + sbc r10, r6, r10 + mov r12, #-19 + asr r11, r10, #31 + # Mask the modulus + and r12, r11, r12 + and lr, r11, #0x7fffffff + # Add modulus (if underflow) + ldr r3, [r0] + ldr r4, [r0, #4] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + adds r3, r3, r12 + adcs r4, r4, r11 + adcs r5, r5, r11 + adcs r6, r6, r11 + adcs r7, r7, r11 + adcs r8, r8, r11 + adcs r9, r9, r11 + adc r10, r10, lr + str r3, [r0] + str r4, [r0, #4] + str r5, [r0, #8] + str r6, [r0, #12] + str r7, [r0, #16] + str r8, [r0, #20] + str r9, [r0, #24] + str r10, [r0, #28] + ldr r2, [sp, #160] + ldr r1, [sp] + ldr r0, [sp, #8] + bl fe_mul + ldr r2, [sp, #156] + ldr r1, [sp, #4] + ldr r0, [sp, #4] + bl fe_mul + ldr r2, [sp, #144] + ldr r1, [sp, #152] + ldr r0, [sp, #12] + bl fe_mul + ldr r2, [sp, #148] + ldr r1, [sp, #140] + ldr r0, [sp] + bl fe_mul + add r0, sp, #16 + ldr r1, [sp] + # Double + ldr r3, [r1] + ldr r4, [r1, #4] + ldr r5, [r1, #8] + ldr r6, [r1, #12] + ldr r7, [r1, #16] + ldr r8, [r1, #20] + ldr r9, [r1, #24] + ldr r10, [r1, #28] + adds r3, r3, r3 + adcs r4, r4, r4 + adcs r5, r5, r5 + adcs r6, r6, r6 + adcs r7, r7, r7 + adcs r8, r8, r8 + adcs r9, r9, r9 + adc r10, r10, r10 + mov r12, #-19 + asr r11, r10, #31 + # Mask the modulus + and r12, r11, r12 + and lr, r11, #0x7fffffff + # Sub modulus (if overflow) + subs r3, r3, r12 + sbcs r4, r4, r11 + sbcs r5, r5, r11 + sbcs r6, r6, r11 + sbcs r7, r7, r11 + sbcs r8, r8, r11 + sbcs r9, r9, r11 + sbc r10, r10, lr + str r3, [r0] + str r4, [r0, #4] + str r5, [r0, #8] + str r6, [r0, #12] + str r7, [r0, #16] + str r8, [r0, #20] + str r9, [r0, #24] + str r10, [r0, #28] + ldr r0, [sp, #4] + ldr r1, [sp] + ldr r2, [sp, #8] + # Add-Sub + # Add + ldr r3, [r2] + ldr r4, [r2, #4] + ldr r5, [r0] + ldr r6, [r0, #4] + adds r7, r3, r5 + mov r12, #0 + adcs r8, r4, r6 + adc r12, r12, #0 + str r7, [r0] + str r8, [r0, #4] + # Sub + subs r9, r3, r5 + mov lr, #0 + sbcs r10, r4, r6 + adc lr, lr, #0 + str r9, [r1] + str r10, [r1, #4] + # Add + ldr r3, [r2, #8] + ldr r4, [r2, #12] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + adds r12, r12, #-1 + adcs r7, r3, r5 + mov r12, #0 + adcs r8, r4, r6 + adc r12, r12, #0 + str r7, [r0, #8] + str r8, [r0, #12] + # Sub + adds lr, lr, #-1 + sbcs r9, r3, r5 + mov lr, #0 + sbcs r10, r4, r6 + adc lr, lr, #0 + str r9, [r1, #8] + str r10, [r1, #12] + # Add + ldr r3, [r2, #16] + ldr r4, [r2, #20] + ldr r5, [r0, #16] + ldr r6, [r0, #20] + adds r12, r12, #-1 + adcs r7, r3, r5 + mov r12, #0 + adcs r8, r4, r6 + adc r12, r12, #0 + str r7, [r0, #16] + str r8, [r0, #20] + # Sub + adds lr, lr, #-1 + sbcs r9, r3, r5 + mov lr, #0 + sbcs r10, r4, r6 + adc lr, lr, #0 + str r9, [r1, #16] + str r10, [r1, #20] + # Add + ldr r3, [r2, #24] + ldr r4, [r2, #28] + ldr r5, [r0, #24] + ldr r6, [r0, #28] + adds r12, r12, #-1 + adcs r7, r3, r5 + adc r8, r4, r6 + # Sub + adds lr, lr, #-1 + sbcs r9, r3, r5 + sbc r10, r4, r6 + mov r12, #-19 + asr r11, r8, #31 + # Mask the modulus + and r12, r11, r12 + and lr, r11, #0x7fffffff + # Sub modulus (if overflow) + ldr r3, [r0] + ldr r4, [r0, #4] + subs r3, r3, r12 + sbcs r4, r4, r11 + str r3, [r0] + str r4, [r0, #4] + ldr r3, [r0, #8] + ldr r4, [r0, #12] + sbcs r3, r3, r11 + sbcs r4, r4, r11 + str r3, [r0, #8] + str r4, [r0, #12] + ldr r3, [r0, #16] + ldr r4, [r0, #20] + sbcs r3, r3, r11 + sbcs r4, r4, r11 + str r3, [r0, #16] + str r4, [r0, #20] + sbcs r7, r7, r11 + sbc r8, r8, lr + str r7, [r0, #24] + str r8, [r0, #28] + mov r12, #-19 + asr r11, r10, #31 + # Mask the modulus + and r12, r11, r12 + and lr, r11, #0x7fffffff + # Add modulus (if underflow) + ldr r3, [r1] + ldr r4, [r1, #4] + adds r3, r3, r12 + adcs r4, r4, r11 + str r3, [r1] + str r4, [r1, #4] + ldr r3, [r1, #8] + ldr r4, [r1, #12] + adcs r3, r3, r11 + adcs r4, r4, r11 + str r3, [r1, #8] + str r4, [r1, #12] + ldr r3, [r1, #16] + ldr r4, [r1, #20] + adcs r3, r3, r11 + adcs r4, r4, r11 + str r3, [r1, #16] + str r4, [r1, #20] + adcs r9, r9, r11 + adc r10, r10, lr + str r9, [r1, #24] + str r10, [r1, #28] + ldr r0, [sp, #12] + ldr r1, [sp, #8] + add r2, sp, #16 + # Add-Sub + # Add + ldr r3, [r2] + ldr r4, [r2, #4] + ldr r5, [r0] + ldr r6, [r0, #4] + adds r7, r3, r5 + mov r12, #0 + adcs r8, r4, r6 + adc r12, r12, #0 + str r7, [r0] + str r8, [r0, #4] + # Sub + subs r9, r3, r5 + mov lr, #0 + sbcs r10, r4, r6 + adc lr, lr, #0 + str r9, [r1] + str r10, [r1, #4] + # Add + ldr r3, [r2, #8] + ldr r4, [r2, #12] + ldr r5, [r0, #8] + ldr r6, [r0, #12] + adds r12, r12, #-1 + adcs r7, r3, r5 + mov r12, #0 + adcs r8, r4, r6 + adc r12, r12, #0 + str r7, [r0, #8] + str r8, [r0, #12] + # Sub + adds lr, lr, #-1 + sbcs r9, r3, r5 + mov lr, #0 + sbcs r10, r4, r6 + adc lr, lr, #0 + str r9, [r1, #8] + str r10, [r1, #12] + # Add + ldr r3, [r2, #16] + ldr r4, [r2, #20] + ldr r5, [r0, #16] + ldr r6, [r0, #20] + adds r12, r12, #-1 + adcs r7, r3, r5 + mov r12, #0 + adcs r8, r4, r6 + adc r12, r12, #0 + str r7, [r0, #16] + str r8, [r0, #20] + # Sub + adds lr, lr, #-1 + sbcs r9, r3, r5 + mov lr, #0 + sbcs r10, r4, r6 + adc lr, lr, #0 + str r9, [r1, #16] + str r10, [r1, #20] + # Add + ldr r3, [r2, #24] + ldr r4, [r2, #28] + ldr r5, [r0, #24] + ldr r6, [r0, #28] + adds r12, r12, #-1 + adcs r7, r3, r5 + adc r8, r4, r6 + # Sub + adds lr, lr, #-1 + sbcs r9, r3, r5 + sbc r10, r4, r6 + mov r12, #-19 + asr r11, r8, #31 + # Mask the modulus + and r12, r11, r12 + and lr, r11, #0x7fffffff + # Sub modulus (if overflow) + ldr r3, [r0] + ldr r4, [r0, #4] + subs r3, r3, r12 + sbcs r4, r4, r11 + str r3, [r0] + str r4, [r0, #4] + ldr r3, [r0, #8] + ldr r4, [r0, #12] + sbcs r3, r3, r11 + sbcs r4, r4, r11 + str r3, [r0, #8] + str r4, [r0, #12] + ldr r3, [r0, #16] + ldr r4, [r0, #20] + sbcs r3, r3, r11 + sbcs r4, r4, r11 + str r3, [r0, #16] + str r4, [r0, #20] + sbcs r7, r7, r11 + sbc r8, r8, lr + str r7, [r0, #24] + str r8, [r0, #28] + mov r12, #-19 + asr r11, r10, #31 + # Mask the modulus + and r12, r11, r12 + and lr, r11, #0x7fffffff + # Add modulus (if underflow) + ldr r3, [r1] + ldr r4, [r1, #4] + adds r3, r3, r12 + adcs r4, r4, r11 + str r3, [r1] + str r4, [r1, #4] + ldr r3, [r1, #8] + ldr r4, [r1, #12] + adcs r3, r3, r11 + adcs r4, r4, r11 + str r3, [r1, #8] + str r4, [r1, #12] + ldr r3, [r1, #16] + ldr r4, [r1, #20] + adcs r3, r3, r11 + adcs r4, r4, r11 + str r3, [r1, #16] + str r4, [r1, #20] + adcs r9, r9, r11 + adc r10, r10, lr + str r9, [r1, #24] + str r10, [r1, #28] + add sp, sp, #0x60 + pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + .size fe_ge_sub,.-fe_ge_sub +#endif /* !__aarch64__ */ +#endif /* WOLFSSL_ARMASM */ diff --git a/client/wolfssl/wolfcrypt/src/port/arm/armv8-32-curve25519.c b/client/wolfssl/wolfcrypt/src/port/arm/armv8-32-curve25519.c new file mode 100644 index 0000000..f7ef379 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/arm/armv8-32-curve25519.c @@ -0,0 +1,5581 @@ +/* armv8-32-curve25519 + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* Generated using (from wolfssl): + * cd ../scripts + * ruby ./x25519/x25519.rb arm32 ../wolfssl/wolfcrypt/src/port/arm/armv8-32-curve25519.c + */ + +#ifndef __aarch64__ + +#include +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef WOLFSSL_ARMASM +#include +#include + +void fe_init() +{ + __asm__ __volatile__ ( + "\n\t" + : + : + : "memory" + ); +} + +void fe_frombytes(fe out, const unsigned char* in) +{ + __asm__ __volatile__ ( + "ldrd r2, r3, [%[in]]\n\t" + "ldrd r12, lr, [%[in], #8]\n\t" + "ldrd r4, r5, [%[in], #16]\n\t" + "ldrd r6, r7, [%[in], #24]\n\t" + "and r7, r7, #0x7fffffff\n\t" + "strd r2, r3, [%[out]]\n\t" + "strd r12, lr, [%[out], #8]\n\t" + "strd r4, r5, [%[out], #16]\n\t" + "strd r6, r7, [%[out], #24]\n\t" + : [out] "+r" (out), [in] "+r" (in) + : + : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7" + ); +} + +void fe_tobytes(unsigned char* out, const fe n) +{ + __asm__ __volatile__ ( + "ldrd r2, r3, [%[in]]\n\t" + "ldrd r12, lr, [%[in], #8]\n\t" + "ldrd r4, r5, [%[in], #16]\n\t" + "ldrd r6, r7, [%[in], #24]\n\t" + "adds r8, r2, #19\n\t" + "adcs r8, r3, #0\n\t" + "adcs r8, r12, #0\n\t" + "adcs r8, lr, #0\n\t" + "adcs r8, r4, #0\n\t" + "adcs r8, r5, #0\n\t" + "adcs r8, r6, #0\n\t" + "adc r8, r7, #0\n\t" + "asr r8, r8, #31\n\t" + "and r8, r8, #19\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, #0\n\t" + "adcs r12, r12, #0\n\t" + "adcs lr, lr, #0\n\t" + "adcs r4, r4, #0\n\t" + "adcs r5, r5, #0\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "and r7, r7, #0x7fffffff\n\t" + "strd r2, r3, [%[out]]\n\t" + "strd r12, lr, [%[out], #8]\n\t" + "strd r4, r5, [%[out], #16]\n\t" + "strd r6, r7, [%[out], #24]\n\t" + : [out] "+r" (out), [n] "+r" (n) + : + : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" + ); +} + +void fe_1(fe n) +{ + __asm__ __volatile__ ( + /* Set one */ + "mov r2, #1\n\t" + "mov r1, #0\n\t" + "strd r2, r1, [%[n]]\n\t" + "strd r1, r1, [%[n], #8]\n\t" + "strd r1, r1, [%[n], #16]\n\t" + "strd r1, r1, [%[n], #24]\n\t" + : [n] "+r" (n) + : + : "memory", "r1", "r2" + ); +} + +void fe_0(fe n) +{ + __asm__ __volatile__ ( + /* Set zero */ + "mov r1, #0\n\t" + "strd r1, r1, [%[n]]\n\t" + "strd r1, r1, [%[n], #8]\n\t" + "strd r1, r1, [%[n], #16]\n\t" + "strd r1, r1, [%[n], #24]\n\t" + : [n] "+r" (n) + : + : "memory", "r1" + ); +} + +void fe_copy(fe r, const fe a) +{ + __asm__ __volatile__ ( + /* Copy */ + "ldrd r2, r3, [%[a]]\n\t" + "ldrd r12, lr, [%[a], #8]\n\t" + "strd r2, r3, [%[r]]\n\t" + "strd r12, lr, [%[r], #8]\n\t" + "ldrd r2, r3, [%[a], #16]\n\t" + "ldrd r12, lr, [%[a], #24]\n\t" + "strd r2, r3, [%[r], #16]\n\t" + "strd r12, lr, [%[r], #24]\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r2", "r3", "r12", "lr" + ); +} + +void fe_sub(fe r, const fe a, const fe b) +{ + __asm__ __volatile__ ( + /* Sub */ + "ldrd r12, lr, [%[a]]\n\t" + "ldrd r4, r5, [%[a], #8]\n\t" + "ldrd r6, r7, [%[b]]\n\t" + "ldrd r8, r9, [%[b], #8]\n\t" + "subs r6, r12, r6\n\t" + "sbcs r7, lr, r7\n\t" + "sbcs r8, r4, r8\n\t" + "sbcs r9, r5, r9\n\t" + "strd r6, r7, [%[r]]\n\t" + "strd r8, r9, [%[r], #8]\n\t" + "ldrd r12, lr, [%[a], #16]\n\t" + "ldrd r4, r5, [%[a], #24]\n\t" + "ldrd r6, r7, [%[b], #16]\n\t" + "ldrd r8, r9, [%[b], #24]\n\t" + "sbcs r6, r12, r6\n\t" + "sbcs r7, lr, r7\n\t" + "sbcs r8, r4, r8\n\t" + "sbc r9, r5, r9\n\t" + "mov r10, #-19\n\t" + "asr r3, r9, #31\n\t" + /* Mask the modulus */ + "and r10, r3, r10\n\t" + "and r11, r3, #0x7fffffff\n\t" + /* Add modulus (if underflow) */ + "ldrd r12, lr, [%[r]]\n\t" + "ldrd r4, r5, [%[r], #8]\n\t" + "adds r12, r12, r10\n\t" + "adcs lr, lr, r3\n\t" + "adcs r4, r4, r3\n\t" + "adcs r5, r5, r3\n\t" + "adcs r6, r6, r3\n\t" + "adcs r7, r7, r3\n\t" + "adcs r8, r8, r3\n\t" + "adc r9, r9, r11\n\t" + "strd r12, lr, [%[r]]\n\t" + "strd r4, r5, [%[r], #8]\n\t" + "strd r6, r7, [%[r], #16]\n\t" + "strd r8, r9, [%[r], #24]\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + ); +} + +void fe_add(fe r, const fe a, const fe b) +{ + __asm__ __volatile__ ( + /* Add */ + "ldrd r12, lr, [%[a]]\n\t" + "ldrd r4, r5, [%[a], #8]\n\t" + "ldrd r6, r7, [%[b]]\n\t" + "ldrd r8, r9, [%[b], #8]\n\t" + "adds r6, r12, r6\n\t" + "adcs r7, lr, r7\n\t" + "adcs r8, r4, r8\n\t" + "adcs r9, r5, r9\n\t" + "strd r6, r7, [%[r]]\n\t" + "strd r8, r9, [%[r], #8]\n\t" + "ldrd r12, lr, [%[a], #16]\n\t" + "ldrd r4, r5, [%[a], #24]\n\t" + "ldrd r6, r7, [%[b], #16]\n\t" + "ldrd r8, r9, [%[b], #24]\n\t" + "adcs r6, r12, r6\n\t" + "adcs r7, lr, r7\n\t" + "adcs r8, r4, r8\n\t" + "adc r9, r5, r9\n\t" + "mov r10, #-19\n\t" + "asr r3, r9, #31\n\t" + /* Mask the modulus */ + "and r10, r3, r10\n\t" + "and r11, r3, #0x7fffffff\n\t" + /* Sub modulus (if overflow) */ + "ldrd r12, lr, [%[r]]\n\t" + "ldrd r4, r5, [%[r], #8]\n\t" + "subs r12, r12, r10\n\t" + "sbcs lr, lr, r3\n\t" + "sbcs r4, r4, r3\n\t" + "sbcs r5, r5, r3\n\t" + "sbcs r6, r6, r3\n\t" + "sbcs r7, r7, r3\n\t" + "sbcs r8, r8, r3\n\t" + "sbc r9, r9, r11\n\t" + "strd r12, lr, [%[r]]\n\t" + "strd r4, r5, [%[r], #8]\n\t" + "strd r6, r7, [%[r], #16]\n\t" + "strd r8, r9, [%[r], #24]\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + ); +} + +void fe_neg(fe r, const fe a) +{ + __asm__ __volatile__ ( + "mov r5, #-1\n\t" + "mov r4, #-19\n\t" + "ldrd r2, r3, [%[a]]\n\t" + "ldrd r12, lr, [%[a], #8]\n\t" + "subs r2, r4, r2\n\t" + "sbcs r3, r5, r3\n\t" + "sbcs r12, r5, r12\n\t" + "sbcs lr, r5, lr\n\t" + "strd r2, r3, [%[r]]\n\t" + "strd r12, lr, [%[r], #8]\n\t" + "mov r4, #0x7fffffff\n\t" + "ldrd r2, r3, [%[a], #16]\n\t" + "ldrd r12, lr, [%[a], #24]\n\t" + "sbcs r2, r5, r2\n\t" + "sbcs r3, r5, r3\n\t" + "sbcs r12, r5, r12\n\t" + "sbc lr, r4, lr\n\t" + "strd r2, r3, [%[r], #16]\n\t" + "strd r12, lr, [%[r], #24]\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r2", "r3", "r12", "lr", "r4", "r5" + ); +} + +int fe_isnonzero(const fe a) +{ + __asm__ __volatile__ ( + "ldrd r2, r3, [%[a]]\n\t" + "ldrd r12, lr, [%[a], #8]\n\t" + "ldrd r4, r5, [%[a], #16]\n\t" + "ldrd r6, r7, [%[a], #24]\n\t" + "adds r1, r2, #19\n\t" + "adcs r1, r3, #0\n\t" + "adcs r1, r12, #0\n\t" + "adcs r1, lr, #0\n\t" + "adcs r1, r4, #0\n\t" + "adcs r1, r5, #0\n\t" + "adcs r1, r6, #0\n\t" + "adc r1, r7, #0\n\t" + "asr r1, r1, #31\n\t" + "and r1, r1, #19\n\t" + "adds r2, r2, r1\n\t" + "adcs r3, r3, #0\n\t" + "adcs r12, r12, #0\n\t" + "adcs lr, lr, #0\n\t" + "adcs r4, r4, #0\n\t" + "adcs r5, r5, #0\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "and r7, r7, #0x7fffffff\n\t" + "orr r2, r2, r3\n\t" + "orr r12, r12, lr\n\t" + "orr r4, r4, r5\n\t" + "orr r6, r6, r7\n\t" + "orr r12, r12, r4\n\t" + "orr r2, r2, r6\n\t" + "orr %[a], r2, r12\n\t" + : [a] "+r" (a) + : + : "memory", "r1", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" + ); + return (uint32_t)(size_t)a; +} + +int fe_isnegative(const fe a) +{ + __asm__ __volatile__ ( + "ldrd r2, r3, [%[a]]\n\t" + "ldrd r12, lr, [%[a], #8]\n\t" + "adds r1, r2, #19\n\t" + "adcs r1, r3, #0\n\t" + "adcs r1, r12, #0\n\t" + "adcs r1, lr, #0\n\t" + "ldrd r2, r3, [%[a], #16]\n\t" + "ldrd r12, lr, [%[a], #24]\n\t" + "adcs r1, r2, #0\n\t" + "adcs r1, r3, #0\n\t" + "adcs r1, r12, #0\n\t" + "ldr r2, [%[a]]\n\t" + "adc r1, lr, #0\n\t" + "and %[a], r2, #1\n\t" + "lsr r1, r1, #31\n\t" + "eor %[a], %[a], r1\n\t" + : [a] "+r" (a) + : + : "memory", "r1", "r2", "r3", "r12", "lr" + ); + return (uint32_t)(size_t)a; +} + +void fe_cmov_table(fe* r, fe* base, signed char b) +{ + __asm__ __volatile__ ( + "sxtb %[b], %[b]\n\t" + "sbfx r7, %[b], #7, #1\n\t" + "eor r10, %[b], r7\n\t" + "sub r10, r10, r7\n\t" + "mov r3, #1\n\t" + "mov r12, #0\n\t" + "mov lr, #1\n\t" + "mov r4, #0\n\t" + "mov r5, #0\n\t" + "mov r6, #0\n\t" + "mov r7, #0x80000000\n\t" + "ror r7, r7, #31\n\t" + "ror r7, r7, r10\n\t" + "asr r7, r7, #31\n\t" + "ldrd r8, r9, [%[base]]\n\t" + "eor r8, r8, r3\n\t" + "eor r9, r9, r12\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r3, r3, r8\n\t" + "eor r12, r12, r9\n\t" + "ldrd r8, r9, [%[base], #32]\n\t" + "eor r8, r8, lr\n\t" + "eor r9, r9, r4\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor lr, lr, r8\n\t" + "eor r4, r4, r9\n\t" + "ldrd r8, r9, [%[base], #64]\n\t" + "eor r8, r8, r5\n\t" + "eor r9, r9, r6\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r5, r5, r8\n\t" + "eor r6, r6, r9\n\t" + "add %[base], %[base], #0x60\n\t" + "mov r7, #0x80000000\n\t" + "ror r7, r7, #30\n\t" + "ror r7, r7, r10\n\t" + "asr r7, r7, #31\n\t" + "ldrd r8, r9, [%[base]]\n\t" + "eor r8, r8, r3\n\t" + "eor r9, r9, r12\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r3, r3, r8\n\t" + "eor r12, r12, r9\n\t" + "ldrd r8, r9, [%[base], #32]\n\t" + "eor r8, r8, lr\n\t" + "eor r9, r9, r4\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor lr, lr, r8\n\t" + "eor r4, r4, r9\n\t" + "ldrd r8, r9, [%[base], #64]\n\t" + "eor r8, r8, r5\n\t" + "eor r9, r9, r6\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r5, r5, r8\n\t" + "eor r6, r6, r9\n\t" + "add %[base], %[base], #0x60\n\t" + "mov r7, #0x80000000\n\t" + "ror r7, r7, #29\n\t" + "ror r7, r7, r10\n\t" + "asr r7, r7, #31\n\t" + "ldrd r8, r9, [%[base]]\n\t" + "eor r8, r8, r3\n\t" + "eor r9, r9, r12\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r3, r3, r8\n\t" + "eor r12, r12, r9\n\t" + "ldrd r8, r9, [%[base], #32]\n\t" + "eor r8, r8, lr\n\t" + "eor r9, r9, r4\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor lr, lr, r8\n\t" + "eor r4, r4, r9\n\t" + "ldrd r8, r9, [%[base], #64]\n\t" + "eor r8, r8, r5\n\t" + "eor r9, r9, r6\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r5, r5, r8\n\t" + "eor r6, r6, r9\n\t" + "add %[base], %[base], #0x60\n\t" + "mov r7, #0x80000000\n\t" + "ror r7, r7, #28\n\t" + "ror r7, r7, r10\n\t" + "asr r7, r7, #31\n\t" + "ldrd r8, r9, [%[base]]\n\t" + "eor r8, r8, r3\n\t" + "eor r9, r9, r12\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r3, r3, r8\n\t" + "eor r12, r12, r9\n\t" + "ldrd r8, r9, [%[base], #32]\n\t" + "eor r8, r8, lr\n\t" + "eor r9, r9, r4\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor lr, lr, r8\n\t" + "eor r4, r4, r9\n\t" + "ldrd r8, r9, [%[base], #64]\n\t" + "eor r8, r8, r5\n\t" + "eor r9, r9, r6\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r5, r5, r8\n\t" + "eor r6, r6, r9\n\t" + "add %[base], %[base], #0x60\n\t" + "mov r7, #0x80000000\n\t" + "ror r7, r7, #27\n\t" + "ror r7, r7, r10\n\t" + "asr r7, r7, #31\n\t" + "ldrd r8, r9, [%[base]]\n\t" + "eor r8, r8, r3\n\t" + "eor r9, r9, r12\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r3, r3, r8\n\t" + "eor r12, r12, r9\n\t" + "ldrd r8, r9, [%[base], #32]\n\t" + "eor r8, r8, lr\n\t" + "eor r9, r9, r4\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor lr, lr, r8\n\t" + "eor r4, r4, r9\n\t" + "ldrd r8, r9, [%[base], #64]\n\t" + "eor r8, r8, r5\n\t" + "eor r9, r9, r6\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r5, r5, r8\n\t" + "eor r6, r6, r9\n\t" + "add %[base], %[base], #0x60\n\t" + "mov r7, #0x80000000\n\t" + "ror r7, r7, #26\n\t" + "ror r7, r7, r10\n\t" + "asr r7, r7, #31\n\t" + "ldrd r8, r9, [%[base]]\n\t" + "eor r8, r8, r3\n\t" + "eor r9, r9, r12\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r3, r3, r8\n\t" + "eor r12, r12, r9\n\t" + "ldrd r8, r9, [%[base], #32]\n\t" + "eor r8, r8, lr\n\t" + "eor r9, r9, r4\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor lr, lr, r8\n\t" + "eor r4, r4, r9\n\t" + "ldrd r8, r9, [%[base], #64]\n\t" + "eor r8, r8, r5\n\t" + "eor r9, r9, r6\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r5, r5, r8\n\t" + "eor r6, r6, r9\n\t" + "add %[base], %[base], #0x60\n\t" + "mov r7, #0x80000000\n\t" + "ror r7, r7, #25\n\t" + "ror r7, r7, r10\n\t" + "asr r7, r7, #31\n\t" + "ldrd r8, r9, [%[base]]\n\t" + "eor r8, r8, r3\n\t" + "eor r9, r9, r12\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r3, r3, r8\n\t" + "eor r12, r12, r9\n\t" + "ldrd r8, r9, [%[base], #32]\n\t" + "eor r8, r8, lr\n\t" + "eor r9, r9, r4\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor lr, lr, r8\n\t" + "eor r4, r4, r9\n\t" + "ldrd r8, r9, [%[base], #64]\n\t" + "eor r8, r8, r5\n\t" + "eor r9, r9, r6\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r5, r5, r8\n\t" + "eor r6, r6, r9\n\t" + "add %[base], %[base], #0x60\n\t" + "mov r7, #0x80000000\n\t" + "ror r7, r7, #24\n\t" + "ror r7, r7, r10\n\t" + "asr r7, r7, #31\n\t" + "ldrd r8, r9, [%[base]]\n\t" + "eor r8, r8, r3\n\t" + "eor r9, r9, r12\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r3, r3, r8\n\t" + "eor r12, r12, r9\n\t" + "ldrd r8, r9, [%[base], #32]\n\t" + "eor r8, r8, lr\n\t" + "eor r9, r9, r4\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor lr, lr, r8\n\t" + "eor r4, r4, r9\n\t" + "ldrd r8, r9, [%[base], #64]\n\t" + "eor r8, r8, r5\n\t" + "eor r9, r9, r6\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r5, r5, r8\n\t" + "eor r6, r6, r9\n\t" + "sub %[base], %[base], #0x2a0\n\t" + "mov r8, #-19\n\t" + "mov r9, #-1\n\t" + "subs r8, r8, r5\n\t" + "sbcs r9, r9, r6\n\t" + "sbc r11, r11, r11\n\t" + "asr r10, %[b], #31\n\t" + "eor r7, r3, lr\n\t" + "and r7, r7, r10\n\t" + "eor r3, r3, r7\n\t" + "eor lr, lr, r7\n\t" + "eor r7, r12, r4\n\t" + "and r7, r7, r10\n\t" + "eor r12, r12, r7\n\t" + "eor r4, r4, r7\n\t" + "eor r8, r8, r5\n\t" + "and r8, r8, r10\n\t" + "eor r5, r5, r8\n\t" + "eor r9, r9, r6\n\t" + "and r9, r9, r10\n\t" + "eor r6, r6, r9\n\t" + "strd r3, r12, [%[r]]\n\t" + "strd lr, r4, [%[r], #32]\n\t" + "strd r5, r6, [%[r], #64]\n\t" + "sbfx r7, %[b], #7, #1\n\t" + "eor r10, %[b], r7\n\t" + "sub r10, r10, r7\n\t" + "mov r3, #0\n\t" + "mov r12, #0\n\t" + "mov lr, #0\n\t" + "mov r4, #0\n\t" + "mov r5, #0\n\t" + "mov r6, #0\n\t" + "mov r7, #0x80000000\n\t" + "ror r7, r7, #31\n\t" + "ror r7, r7, r10\n\t" + "asr r7, r7, #31\n\t" + "ldrd r8, r9, [%[base], #8]\n\t" + "eor r8, r8, r3\n\t" + "eor r9, r9, r12\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r3, r3, r8\n\t" + "eor r12, r12, r9\n\t" + "ldrd r8, r9, [%[base], #40]\n\t" + "eor r8, r8, lr\n\t" + "eor r9, r9, r4\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor lr, lr, r8\n\t" + "eor r4, r4, r9\n\t" + "ldrd r8, r9, [%[base], #72]\n\t" + "eor r8, r8, r5\n\t" + "eor r9, r9, r6\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r5, r5, r8\n\t" + "eor r6, r6, r9\n\t" + "add %[base], %[base], #0x60\n\t" + "mov r7, #0x80000000\n\t" + "ror r7, r7, #30\n\t" + "ror r7, r7, r10\n\t" + "asr r7, r7, #31\n\t" + "ldrd r8, r9, [%[base], #8]\n\t" + "eor r8, r8, r3\n\t" + "eor r9, r9, r12\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r3, r3, r8\n\t" + "eor r12, r12, r9\n\t" + "ldrd r8, r9, [%[base], #40]\n\t" + "eor r8, r8, lr\n\t" + "eor r9, r9, r4\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor lr, lr, r8\n\t" + "eor r4, r4, r9\n\t" + "ldrd r8, r9, [%[base], #72]\n\t" + "eor r8, r8, r5\n\t" + "eor r9, r9, r6\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r5, r5, r8\n\t" + "eor r6, r6, r9\n\t" + "add %[base], %[base], #0x60\n\t" + "mov r7, #0x80000000\n\t" + "ror r7, r7, #29\n\t" + "ror r7, r7, r10\n\t" + "asr r7, r7, #31\n\t" + "ldrd r8, r9, [%[base], #8]\n\t" + "eor r8, r8, r3\n\t" + "eor r9, r9, r12\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r3, r3, r8\n\t" + "eor r12, r12, r9\n\t" + "ldrd r8, r9, [%[base], #40]\n\t" + "eor r8, r8, lr\n\t" + "eor r9, r9, r4\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor lr, lr, r8\n\t" + "eor r4, r4, r9\n\t" + "ldrd r8, r9, [%[base], #72]\n\t" + "eor r8, r8, r5\n\t" + "eor r9, r9, r6\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r5, r5, r8\n\t" + "eor r6, r6, r9\n\t" + "add %[base], %[base], #0x60\n\t" + "mov r7, #0x80000000\n\t" + "ror r7, r7, #28\n\t" + "ror r7, r7, r10\n\t" + "asr r7, r7, #31\n\t" + "ldrd r8, r9, [%[base], #8]\n\t" + "eor r8, r8, r3\n\t" + "eor r9, r9, r12\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r3, r3, r8\n\t" + "eor r12, r12, r9\n\t" + "ldrd r8, r9, [%[base], #40]\n\t" + "eor r8, r8, lr\n\t" + "eor r9, r9, r4\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor lr, lr, r8\n\t" + "eor r4, r4, r9\n\t" + "ldrd r8, r9, [%[base], #72]\n\t" + "eor r8, r8, r5\n\t" + "eor r9, r9, r6\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r5, r5, r8\n\t" + "eor r6, r6, r9\n\t" + "add %[base], %[base], #0x60\n\t" + "mov r7, #0x80000000\n\t" + "ror r7, r7, #27\n\t" + "ror r7, r7, r10\n\t" + "asr r7, r7, #31\n\t" + "ldrd r8, r9, [%[base], #8]\n\t" + "eor r8, r8, r3\n\t" + "eor r9, r9, r12\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r3, r3, r8\n\t" + "eor r12, r12, r9\n\t" + "ldrd r8, r9, [%[base], #40]\n\t" + "eor r8, r8, lr\n\t" + "eor r9, r9, r4\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor lr, lr, r8\n\t" + "eor r4, r4, r9\n\t" + "ldrd r8, r9, [%[base], #72]\n\t" + "eor r8, r8, r5\n\t" + "eor r9, r9, r6\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r5, r5, r8\n\t" + "eor r6, r6, r9\n\t" + "add %[base], %[base], #0x60\n\t" + "mov r7, #0x80000000\n\t" + "ror r7, r7, #26\n\t" + "ror r7, r7, r10\n\t" + "asr r7, r7, #31\n\t" + "ldrd r8, r9, [%[base], #8]\n\t" + "eor r8, r8, r3\n\t" + "eor r9, r9, r12\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r3, r3, r8\n\t" + "eor r12, r12, r9\n\t" + "ldrd r8, r9, [%[base], #40]\n\t" + "eor r8, r8, lr\n\t" + "eor r9, r9, r4\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor lr, lr, r8\n\t" + "eor r4, r4, r9\n\t" + "ldrd r8, r9, [%[base], #72]\n\t" + "eor r8, r8, r5\n\t" + "eor r9, r9, r6\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r5, r5, r8\n\t" + "eor r6, r6, r9\n\t" + "add %[base], %[base], #0x60\n\t" + "mov r7, #0x80000000\n\t" + "ror r7, r7, #25\n\t" + "ror r7, r7, r10\n\t" + "asr r7, r7, #31\n\t" + "ldrd r8, r9, [%[base], #8]\n\t" + "eor r8, r8, r3\n\t" + "eor r9, r9, r12\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r3, r3, r8\n\t" + "eor r12, r12, r9\n\t" + "ldrd r8, r9, [%[base], #40]\n\t" + "eor r8, r8, lr\n\t" + "eor r9, r9, r4\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor lr, lr, r8\n\t" + "eor r4, r4, r9\n\t" + "ldrd r8, r9, [%[base], #72]\n\t" + "eor r8, r8, r5\n\t" + "eor r9, r9, r6\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r5, r5, r8\n\t" + "eor r6, r6, r9\n\t" + "add %[base], %[base], #0x60\n\t" + "mov r7, #0x80000000\n\t" + "ror r7, r7, #24\n\t" + "ror r7, r7, r10\n\t" + "asr r7, r7, #31\n\t" + "ldrd r8, r9, [%[base], #8]\n\t" + "eor r8, r8, r3\n\t" + "eor r9, r9, r12\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r3, r3, r8\n\t" + "eor r12, r12, r9\n\t" + "ldrd r8, r9, [%[base], #40]\n\t" + "eor r8, r8, lr\n\t" + "eor r9, r9, r4\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor lr, lr, r8\n\t" + "eor r4, r4, r9\n\t" + "ldrd r8, r9, [%[base], #72]\n\t" + "eor r8, r8, r5\n\t" + "eor r9, r9, r6\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r5, r5, r8\n\t" + "eor r6, r6, r9\n\t" + "sub %[base], %[base], #0x2a0\n\t" + "mov r8, #-1\n\t" + "mov r9, #-1\n\t" + "rsbs r11, r11, #0\n\t" + "sbcs r8, r8, r5\n\t" + "sbcs r9, r9, r6\n\t" + "sbc r11, r11, r11\n\t" + "asr r10, %[b], #31\n\t" + "eor r7, r3, lr\n\t" + "and r7, r7, r10\n\t" + "eor r3, r3, r7\n\t" + "eor lr, lr, r7\n\t" + "eor r7, r12, r4\n\t" + "and r7, r7, r10\n\t" + "eor r12, r12, r7\n\t" + "eor r4, r4, r7\n\t" + "eor r8, r8, r5\n\t" + "and r8, r8, r10\n\t" + "eor r5, r5, r8\n\t" + "eor r9, r9, r6\n\t" + "and r9, r9, r10\n\t" + "eor r6, r6, r9\n\t" + "strd r3, r12, [%[r], #8]\n\t" + "strd lr, r4, [%[r], #40]\n\t" + "strd r5, r6, [%[r], #72]\n\t" + "sbfx r7, %[b], #7, #1\n\t" + "eor r10, %[b], r7\n\t" + "sub r10, r10, r7\n\t" + "mov r3, #0\n\t" + "mov r12, #0\n\t" + "mov lr, #0\n\t" + "mov r4, #0\n\t" + "mov r5, #0\n\t" + "mov r6, #0\n\t" + "mov r7, #0x80000000\n\t" + "ror r7, r7, #31\n\t" + "ror r7, r7, r10\n\t" + "asr r7, r7, #31\n\t" + "ldrd r8, r9, [%[base], #16]\n\t" + "eor r8, r8, r3\n\t" + "eor r9, r9, r12\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r3, r3, r8\n\t" + "eor r12, r12, r9\n\t" + "ldrd r8, r9, [%[base], #48]\n\t" + "eor r8, r8, lr\n\t" + "eor r9, r9, r4\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor lr, lr, r8\n\t" + "eor r4, r4, r9\n\t" + "ldrd r8, r9, [%[base], #80]\n\t" + "eor r8, r8, r5\n\t" + "eor r9, r9, r6\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r5, r5, r8\n\t" + "eor r6, r6, r9\n\t" + "add %[base], %[base], #0x60\n\t" + "mov r7, #0x80000000\n\t" + "ror r7, r7, #30\n\t" + "ror r7, r7, r10\n\t" + "asr r7, r7, #31\n\t" + "ldrd r8, r9, [%[base], #16]\n\t" + "eor r8, r8, r3\n\t" + "eor r9, r9, r12\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r3, r3, r8\n\t" + "eor r12, r12, r9\n\t" + "ldrd r8, r9, [%[base], #48]\n\t" + "eor r8, r8, lr\n\t" + "eor r9, r9, r4\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor lr, lr, r8\n\t" + "eor r4, r4, r9\n\t" + "ldrd r8, r9, [%[base], #80]\n\t" + "eor r8, r8, r5\n\t" + "eor r9, r9, r6\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r5, r5, r8\n\t" + "eor r6, r6, r9\n\t" + "add %[base], %[base], #0x60\n\t" + "mov r7, #0x80000000\n\t" + "ror r7, r7, #29\n\t" + "ror r7, r7, r10\n\t" + "asr r7, r7, #31\n\t" + "ldrd r8, r9, [%[base], #16]\n\t" + "eor r8, r8, r3\n\t" + "eor r9, r9, r12\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r3, r3, r8\n\t" + "eor r12, r12, r9\n\t" + "ldrd r8, r9, [%[base], #48]\n\t" + "eor r8, r8, lr\n\t" + "eor r9, r9, r4\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor lr, lr, r8\n\t" + "eor r4, r4, r9\n\t" + "ldrd r8, r9, [%[base], #80]\n\t" + "eor r8, r8, r5\n\t" + "eor r9, r9, r6\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r5, r5, r8\n\t" + "eor r6, r6, r9\n\t" + "add %[base], %[base], #0x60\n\t" + "mov r7, #0x80000000\n\t" + "ror r7, r7, #28\n\t" + "ror r7, r7, r10\n\t" + "asr r7, r7, #31\n\t" + "ldrd r8, r9, [%[base], #16]\n\t" + "eor r8, r8, r3\n\t" + "eor r9, r9, r12\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r3, r3, r8\n\t" + "eor r12, r12, r9\n\t" + "ldrd r8, r9, [%[base], #48]\n\t" + "eor r8, r8, lr\n\t" + "eor r9, r9, r4\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor lr, lr, r8\n\t" + "eor r4, r4, r9\n\t" + "ldrd r8, r9, [%[base], #80]\n\t" + "eor r8, r8, r5\n\t" + "eor r9, r9, r6\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r5, r5, r8\n\t" + "eor r6, r6, r9\n\t" + "add %[base], %[base], #0x60\n\t" + "mov r7, #0x80000000\n\t" + "ror r7, r7, #27\n\t" + "ror r7, r7, r10\n\t" + "asr r7, r7, #31\n\t" + "ldrd r8, r9, [%[base], #16]\n\t" + "eor r8, r8, r3\n\t" + "eor r9, r9, r12\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r3, r3, r8\n\t" + "eor r12, r12, r9\n\t" + "ldrd r8, r9, [%[base], #48]\n\t" + "eor r8, r8, lr\n\t" + "eor r9, r9, r4\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor lr, lr, r8\n\t" + "eor r4, r4, r9\n\t" + "ldrd r8, r9, [%[base], #80]\n\t" + "eor r8, r8, r5\n\t" + "eor r9, r9, r6\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r5, r5, r8\n\t" + "eor r6, r6, r9\n\t" + "add %[base], %[base], #0x60\n\t" + "mov r7, #0x80000000\n\t" + "ror r7, r7, #26\n\t" + "ror r7, r7, r10\n\t" + "asr r7, r7, #31\n\t" + "ldrd r8, r9, [%[base], #16]\n\t" + "eor r8, r8, r3\n\t" + "eor r9, r9, r12\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r3, r3, r8\n\t" + "eor r12, r12, r9\n\t" + "ldrd r8, r9, [%[base], #48]\n\t" + "eor r8, r8, lr\n\t" + "eor r9, r9, r4\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor lr, lr, r8\n\t" + "eor r4, r4, r9\n\t" + "ldrd r8, r9, [%[base], #80]\n\t" + "eor r8, r8, r5\n\t" + "eor r9, r9, r6\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r5, r5, r8\n\t" + "eor r6, r6, r9\n\t" + "add %[base], %[base], #0x60\n\t" + "mov r7, #0x80000000\n\t" + "ror r7, r7, #25\n\t" + "ror r7, r7, r10\n\t" + "asr r7, r7, #31\n\t" + "ldrd r8, r9, [%[base], #16]\n\t" + "eor r8, r8, r3\n\t" + "eor r9, r9, r12\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r3, r3, r8\n\t" + "eor r12, r12, r9\n\t" + "ldrd r8, r9, [%[base], #48]\n\t" + "eor r8, r8, lr\n\t" + "eor r9, r9, r4\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor lr, lr, r8\n\t" + "eor r4, r4, r9\n\t" + "ldrd r8, r9, [%[base], #80]\n\t" + "eor r8, r8, r5\n\t" + "eor r9, r9, r6\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r5, r5, r8\n\t" + "eor r6, r6, r9\n\t" + "add %[base], %[base], #0x60\n\t" + "mov r7, #0x80000000\n\t" + "ror r7, r7, #24\n\t" + "ror r7, r7, r10\n\t" + "asr r7, r7, #31\n\t" + "ldrd r8, r9, [%[base], #16]\n\t" + "eor r8, r8, r3\n\t" + "eor r9, r9, r12\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r3, r3, r8\n\t" + "eor r12, r12, r9\n\t" + "ldrd r8, r9, [%[base], #48]\n\t" + "eor r8, r8, lr\n\t" + "eor r9, r9, r4\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor lr, lr, r8\n\t" + "eor r4, r4, r9\n\t" + "ldrd r8, r9, [%[base], #80]\n\t" + "eor r8, r8, r5\n\t" + "eor r9, r9, r6\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r5, r5, r8\n\t" + "eor r6, r6, r9\n\t" + "sub %[base], %[base], #0x2a0\n\t" + "mov r8, #-1\n\t" + "mov r9, #-1\n\t" + "rsbs r11, r11, #0\n\t" + "sbcs r8, r8, r5\n\t" + "sbcs r9, r9, r6\n\t" + "sbc r11, r11, r11\n\t" + "asr r10, %[b], #31\n\t" + "eor r7, r3, lr\n\t" + "and r7, r7, r10\n\t" + "eor r3, r3, r7\n\t" + "eor lr, lr, r7\n\t" + "eor r7, r12, r4\n\t" + "and r7, r7, r10\n\t" + "eor r12, r12, r7\n\t" + "eor r4, r4, r7\n\t" + "eor r8, r8, r5\n\t" + "and r8, r8, r10\n\t" + "eor r5, r5, r8\n\t" + "eor r9, r9, r6\n\t" + "and r9, r9, r10\n\t" + "eor r6, r6, r9\n\t" + "strd r3, r12, [%[r], #16]\n\t" + "strd lr, r4, [%[r], #48]\n\t" + "strd r5, r6, [%[r], #80]\n\t" + "sbfx r7, %[b], #7, #1\n\t" + "eor r10, %[b], r7\n\t" + "sub r10, r10, r7\n\t" + "mov r3, #0\n\t" + "mov r12, #0\n\t" + "mov lr, #0\n\t" + "mov r4, #0\n\t" + "mov r5, #0\n\t" + "mov r6, #0\n\t" + "mov r7, #0x80000000\n\t" + "ror r7, r7, #31\n\t" + "ror r7, r7, r10\n\t" + "asr r7, r7, #31\n\t" + "ldrd r8, r9, [%[base], #24]\n\t" + "eor r8, r8, r3\n\t" + "eor r9, r9, r12\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r3, r3, r8\n\t" + "eor r12, r12, r9\n\t" + "ldrd r8, r9, [%[base], #56]\n\t" + "eor r8, r8, lr\n\t" + "eor r9, r9, r4\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor lr, lr, r8\n\t" + "eor r4, r4, r9\n\t" + "ldrd r8, r9, [%[base], #88]\n\t" + "eor r8, r8, r5\n\t" + "eor r9, r9, r6\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r5, r5, r8\n\t" + "eor r6, r6, r9\n\t" + "add %[base], %[base], #0x60\n\t" + "mov r7, #0x80000000\n\t" + "ror r7, r7, #30\n\t" + "ror r7, r7, r10\n\t" + "asr r7, r7, #31\n\t" + "ldrd r8, r9, [%[base], #24]\n\t" + "eor r8, r8, r3\n\t" + "eor r9, r9, r12\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r3, r3, r8\n\t" + "eor r12, r12, r9\n\t" + "ldrd r8, r9, [%[base], #56]\n\t" + "eor r8, r8, lr\n\t" + "eor r9, r9, r4\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor lr, lr, r8\n\t" + "eor r4, r4, r9\n\t" + "ldrd r8, r9, [%[base], #88]\n\t" + "eor r8, r8, r5\n\t" + "eor r9, r9, r6\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r5, r5, r8\n\t" + "eor r6, r6, r9\n\t" + "add %[base], %[base], #0x60\n\t" + "mov r7, #0x80000000\n\t" + "ror r7, r7, #29\n\t" + "ror r7, r7, r10\n\t" + "asr r7, r7, #31\n\t" + "ldrd r8, r9, [%[base], #24]\n\t" + "eor r8, r8, r3\n\t" + "eor r9, r9, r12\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r3, r3, r8\n\t" + "eor r12, r12, r9\n\t" + "ldrd r8, r9, [%[base], #56]\n\t" + "eor r8, r8, lr\n\t" + "eor r9, r9, r4\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor lr, lr, r8\n\t" + "eor r4, r4, r9\n\t" + "ldrd r8, r9, [%[base], #88]\n\t" + "eor r8, r8, r5\n\t" + "eor r9, r9, r6\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r5, r5, r8\n\t" + "eor r6, r6, r9\n\t" + "add %[base], %[base], #0x60\n\t" + "mov r7, #0x80000000\n\t" + "ror r7, r7, #28\n\t" + "ror r7, r7, r10\n\t" + "asr r7, r7, #31\n\t" + "ldrd r8, r9, [%[base], #24]\n\t" + "eor r8, r8, r3\n\t" + "eor r9, r9, r12\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r3, r3, r8\n\t" + "eor r12, r12, r9\n\t" + "ldrd r8, r9, [%[base], #56]\n\t" + "eor r8, r8, lr\n\t" + "eor r9, r9, r4\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor lr, lr, r8\n\t" + "eor r4, r4, r9\n\t" + "ldrd r8, r9, [%[base], #88]\n\t" + "eor r8, r8, r5\n\t" + "eor r9, r9, r6\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r5, r5, r8\n\t" + "eor r6, r6, r9\n\t" + "add %[base], %[base], #0x60\n\t" + "mov r7, #0x80000000\n\t" + "ror r7, r7, #27\n\t" + "ror r7, r7, r10\n\t" + "asr r7, r7, #31\n\t" + "ldrd r8, r9, [%[base], #24]\n\t" + "eor r8, r8, r3\n\t" + "eor r9, r9, r12\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r3, r3, r8\n\t" + "eor r12, r12, r9\n\t" + "ldrd r8, r9, [%[base], #56]\n\t" + "eor r8, r8, lr\n\t" + "eor r9, r9, r4\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor lr, lr, r8\n\t" + "eor r4, r4, r9\n\t" + "ldrd r8, r9, [%[base], #88]\n\t" + "eor r8, r8, r5\n\t" + "eor r9, r9, r6\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r5, r5, r8\n\t" + "eor r6, r6, r9\n\t" + "add %[base], %[base], #0x60\n\t" + "mov r7, #0x80000000\n\t" + "ror r7, r7, #26\n\t" + "ror r7, r7, r10\n\t" + "asr r7, r7, #31\n\t" + "ldrd r8, r9, [%[base], #24]\n\t" + "eor r8, r8, r3\n\t" + "eor r9, r9, r12\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r3, r3, r8\n\t" + "eor r12, r12, r9\n\t" + "ldrd r8, r9, [%[base], #56]\n\t" + "eor r8, r8, lr\n\t" + "eor r9, r9, r4\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor lr, lr, r8\n\t" + "eor r4, r4, r9\n\t" + "ldrd r8, r9, [%[base], #88]\n\t" + "eor r8, r8, r5\n\t" + "eor r9, r9, r6\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r5, r5, r8\n\t" + "eor r6, r6, r9\n\t" + "add %[base], %[base], #0x60\n\t" + "mov r7, #0x80000000\n\t" + "ror r7, r7, #25\n\t" + "ror r7, r7, r10\n\t" + "asr r7, r7, #31\n\t" + "ldrd r8, r9, [%[base], #24]\n\t" + "eor r8, r8, r3\n\t" + "eor r9, r9, r12\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r3, r3, r8\n\t" + "eor r12, r12, r9\n\t" + "ldrd r8, r9, [%[base], #56]\n\t" + "eor r8, r8, lr\n\t" + "eor r9, r9, r4\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor lr, lr, r8\n\t" + "eor r4, r4, r9\n\t" + "ldrd r8, r9, [%[base], #88]\n\t" + "eor r8, r8, r5\n\t" + "eor r9, r9, r6\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r5, r5, r8\n\t" + "eor r6, r6, r9\n\t" + "add %[base], %[base], #0x60\n\t" + "mov r7, #0x80000000\n\t" + "ror r7, r7, #24\n\t" + "ror r7, r7, r10\n\t" + "asr r7, r7, #31\n\t" + "ldrd r8, r9, [%[base], #24]\n\t" + "eor r8, r8, r3\n\t" + "eor r9, r9, r12\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r3, r3, r8\n\t" + "eor r12, r12, r9\n\t" + "ldrd r8, r9, [%[base], #56]\n\t" + "eor r8, r8, lr\n\t" + "eor r9, r9, r4\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor lr, lr, r8\n\t" + "eor r4, r4, r9\n\t" + "ldrd r8, r9, [%[base], #88]\n\t" + "eor r8, r8, r5\n\t" + "eor r9, r9, r6\n\t" + "and r8, r8, r7\n\t" + "and r9, r9, r7\n\t" + "eor r5, r5, r8\n\t" + "eor r6, r6, r9\n\t" + "sub %[base], %[base], #0x2a0\n\t" + "mov r8, #-1\n\t" + "mov r9, #0x7fffffff\n\t" + "rsbs r11, r11, #0\n\t" + "sbcs r8, r8, r5\n\t" + "sbc r9, r9, r6\n\t" + "asr r10, %[b], #31\n\t" + "eor r7, r3, lr\n\t" + "and r7, r7, r10\n\t" + "eor r3, r3, r7\n\t" + "eor lr, lr, r7\n\t" + "eor r7, r12, r4\n\t" + "and r7, r7, r10\n\t" + "eor r12, r12, r7\n\t" + "eor r4, r4, r7\n\t" + "eor r8, r8, r5\n\t" + "and r8, r8, r10\n\t" + "eor r5, r5, r8\n\t" + "eor r9, r9, r6\n\t" + "and r9, r9, r10\n\t" + "eor r6, r6, r9\n\t" + "strd r3, r12, [%[r], #24]\n\t" + "strd lr, r4, [%[r], #56]\n\t" + "strd r5, r6, [%[r], #88]\n\t" + : [r] "+r" (r), [base] "+r" (base), [b] "+r" (b) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + ); +} + +void fe_mul(fe r, const fe a, const fe b) +{ + __asm__ __volatile__ ( + "sub sp, sp, #0x40\n\t" + /* Multiply */ + "ldr r7, [%[a]]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b]]\n\t" + "ldr lr, [%[b], #4]\n\t" + /* A[0] * B[0] = 0 */ + "umull r4, r5, r7, r9\n\t" + "str r4, [sp]\n\t" + /* A[0] * B[1] = 1 */ + "umull r3, r6, r7, lr\n\t" + "adds r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + /* A[1] * B[0] = 1 */ + "umull r3, r12, r8, r9\n\t" + "adds r5, r5, r3\n\t" + "mov r4, #0\n\t" + "adcs r6, r6, r12\n\t" + "adc r4, r4, #0\n\t" + "str r5, [sp, #4]\n\t" + /* A[2] * B[0] = 2 */ + "ldr r10, [%[a], #8]\n\t" + "umull r3, r12, r10, r9\n\t" + "adds r6, r6, r3\n\t" + "adc r4, r4, r12\n\t" + /* A[1] * B[1] = 2 */ + "umull r3, r12, r8, lr\n\t" + "adds r6, r6, r3\n\t" + "mov r5, #0\n\t" + "adcs r4, r4, r12\n\t" + "adc r5, r5, #0\n\t" + /* A[0] * B[2] = 2 */ + "ldr r11, [%[b], #8]\n\t" + "umull r3, r12, r7, r11\n\t" + "adds r6, r6, r3\n\t" + "adcs r4, r4, r12\n\t" + "adc r5, r5, #0\n\t" + "str r6, [sp, #8]\n\t" + /* A[0] * B[3] = 3 */ + "ldr r11, [%[b], #12]\n\t" + "umull r3, r12, r7, r11\n\t" + "adds r4, r4, r3\n\t" + "mov r6, #0\n\t" + "adcs r5, r5, r12\n\t" + "adc r6, r6, #0\n\t" + /* A[1] * B[2] = 3 */ + "ldr r11, [%[b], #8]\n\t" + "umull r3, r12, r8, r11\n\t" + "adds r4, r4, r3\n\t" + "adcs r5, r5, r12\n\t" + "adc r6, r6, #0\n\t" + /* A[2] * B[1] = 3 */ + "umull r3, r12, r10, lr\n\t" + "adds r4, r4, r3\n\t" + "adcs r5, r5, r12\n\t" + "adc r6, r6, #0\n\t" + /* A[3] * B[0] = 3 */ + "ldr r10, [%[a], #12]\n\t" + "umull r3, r12, r10, r9\n\t" + "adds r4, r4, r3\n\t" + "adcs r5, r5, r12\n\t" + "adc r6, r6, #0\n\t" + "str r4, [sp, #12]\n\t" + /* A[4] * B[0] = 4 */ + "ldr r10, [%[a], #16]\n\t" + "umull r3, r12, r10, r9\n\t" + "adds r5, r5, r3\n\t" + "mov r4, #0\n\t" + "adcs r6, r6, r12\n\t" + "adc r4, r4, #0\n\t" + /* A[3] * B[1] = 4 */ + "ldr r10, [%[a], #12]\n\t" + "umull r3, r12, r10, lr\n\t" + "adds r5, r5, r3\n\t" + "adcs r6, r6, r12\n\t" + "adc r4, r4, #0\n\t" + /* A[2] * B[2] = 4 */ + "ldr r10, [%[a], #8]\n\t" + "umull r3, r12, r10, r11\n\t" + "adds r5, r5, r3\n\t" + "adcs r6, r6, r12\n\t" + "adc r4, r4, #0\n\t" + /* A[1] * B[3] = 4 */ + "ldr r11, [%[b], #12]\n\t" + "umull r3, r12, r8, r11\n\t" + "adds r5, r5, r3\n\t" + "adcs r6, r6, r12\n\t" + "adc r4, r4, #0\n\t" + /* A[0] * B[4] = 4 */ + "ldr r11, [%[b], #16]\n\t" + "umull r3, r12, r7, r11\n\t" + "adds r5, r5, r3\n\t" + "adcs r6, r6, r12\n\t" + "adc r4, r4, #0\n\t" + "str r5, [sp, #16]\n\t" + /* A[0] * B[5] = 5 */ + "ldr r11, [%[b], #20]\n\t" + "umull r3, r12, r7, r11\n\t" + "adds r6, r6, r3\n\t" + "mov r5, #0\n\t" + "adcs r4, r4, r12\n\t" + "adc r5, r5, #0\n\t" + /* A[1] * B[4] = 5 */ + "ldr r11, [%[b], #16]\n\t" + "umull r3, r12, r8, r11\n\t" + "adds r6, r6, r3\n\t" + "adcs r4, r4, r12\n\t" + "adc r5, r5, #0\n\t" + /* A[2] * B[3] = 5 */ + "ldr r11, [%[b], #12]\n\t" + "umull r3, r12, r10, r11\n\t" + "adds r6, r6, r3\n\t" + "adcs r4, r4, r12\n\t" + "adc r5, r5, #0\n\t" + /* A[3] * B[2] = 5 */ + "ldr r10, [%[a], #12]\n\t" + "ldr r11, [%[b], #8]\n\t" + "umull r3, r12, r10, r11\n\t" + "adds r6, r6, r3\n\t" + "adcs r4, r4, r12\n\t" + "adc r5, r5, #0\n\t" + /* A[4] * B[1] = 5 */ + "ldr r10, [%[a], #16]\n\t" + "umull r3, r12, r10, lr\n\t" + "adds r6, r6, r3\n\t" + "adcs r4, r4, r12\n\t" + "adc r5, r5, #0\n\t" + /* A[5] * B[0] = 5 */ + "ldr r10, [%[a], #20]\n\t" + "umull r3, r12, r10, r9\n\t" + "adds r6, r6, r3\n\t" + "adcs r4, r4, r12\n\t" + "adc r5, r5, #0\n\t" + "str r6, [sp, #20]\n\t" + /* A[6] * B[0] = 6 */ + "ldr r10, [%[a], #24]\n\t" + "umull r3, r12, r10, r9\n\t" + "adds r4, r4, r3\n\t" + "mov r6, #0\n\t" + "adcs r5, r5, r12\n\t" + "adc r6, r6, #0\n\t" + /* A[5] * B[1] = 6 */ + "ldr r10, [%[a], #20]\n\t" + "umull r3, r12, r10, lr\n\t" + "adds r4, r4, r3\n\t" + "adcs r5, r5, r12\n\t" + "adc r6, r6, #0\n\t" + /* A[4] * B[2] = 6 */ + "ldr r10, [%[a], #16]\n\t" + "umull r3, r12, r10, r11\n\t" + "adds r4, r4, r3\n\t" + "adcs r5, r5, r12\n\t" + "adc r6, r6, #0\n\t" + /* A[3] * B[3] = 6 */ + "ldr r10, [%[a], #12]\n\t" + "ldr r11, [%[b], #12]\n\t" + "umull r3, r12, r10, r11\n\t" + "adds r4, r4, r3\n\t" + "adcs r5, r5, r12\n\t" + "adc r6, r6, #0\n\t" + /* A[2] * B[4] = 6 */ + "ldr r10, [%[a], #8]\n\t" + "ldr r11, [%[b], #16]\n\t" + "umull r3, r12, r10, r11\n\t" + "adds r4, r4, r3\n\t" + "adcs r5, r5, r12\n\t" + "adc r6, r6, #0\n\t" + /* A[1] * B[5] = 6 */ + "ldr r11, [%[b], #20]\n\t" + "umull r3, r12, r8, r11\n\t" + "adds r4, r4, r3\n\t" + "adcs r5, r5, r12\n\t" + "adc r6, r6, #0\n\t" + /* A[0] * B[6] = 6 */ + "ldr r11, [%[b], #24]\n\t" + "umull r3, r12, r7, r11\n\t" + "adds r4, r4, r3\n\t" + "adcs r5, r5, r12\n\t" + "adc r6, r6, #0\n\t" + "str r4, [sp, #24]\n\t" + /* A[0] * B[7] = 7 */ + "ldr r11, [%[b], #28]\n\t" + "umull r3, r12, r7, r11\n\t" + "adds r5, r5, r3\n\t" + "mov r4, #0\n\t" + "adcs r6, r6, r12\n\t" + "adc r4, r4, #0\n\t" + /* A[1] * B[6] = 7 */ + "ldr r11, [%[b], #24]\n\t" + "umull r3, r12, r8, r11\n\t" + "adds r5, r5, r3\n\t" + "adcs r6, r6, r12\n\t" + "adc r4, r4, #0\n\t" + /* A[2] * B[5] = 7 */ + "ldr r11, [%[b], #20]\n\t" + "umull r3, r12, r10, r11\n\t" + "adds r5, r5, r3\n\t" + "adcs r6, r6, r12\n\t" + "adc r4, r4, #0\n\t" + /* A[3] * B[4] = 7 */ + "ldr r10, [%[a], #12]\n\t" + "ldr r11, [%[b], #16]\n\t" + "umull r3, r12, r10, r11\n\t" + "adds r5, r5, r3\n\t" + "adcs r6, r6, r12\n\t" + "adc r4, r4, #0\n\t" + /* A[4] * B[3] = 7 */ + "ldr r10, [%[a], #16]\n\t" + "ldr r11, [%[b], #12]\n\t" + "umull r3, r12, r10, r11\n\t" + "adds r5, r5, r3\n\t" + "adcs r6, r6, r12\n\t" + "adc r4, r4, #0\n\t" + /* A[5] * B[2] = 7 */ + "ldr r10, [%[a], #20]\n\t" + "ldr r11, [%[b], #8]\n\t" + "umull r3, r12, r10, r11\n\t" + "adds r5, r5, r3\n\t" + "adcs r6, r6, r12\n\t" + "adc r4, r4, #0\n\t" + /* A[6] * B[1] = 7 */ + "ldr r10, [%[a], #24]\n\t" + "umull r3, r12, r10, lr\n\t" + "adds r5, r5, r3\n\t" + "adcs r6, r6, r12\n\t" + "adc r4, r4, #0\n\t" + /* A[7] * B[0] = 7 */ + "ldr r10, [%[a], #28]\n\t" + "umull r3, r12, r10, r9\n\t" + "adds r5, r5, r3\n\t" + "adcs r6, r6, r12\n\t" + "adc r4, r4, #0\n\t" + "str r5, [sp, #28]\n\t" + "ldr r7, [%[a], #24]\n\t" + "ldr r9, [%[b], #24]\n\t" + /* A[7] * B[1] = 8 */ + "umull r3, r12, r10, lr\n\t" + "adds r6, r6, r3\n\t" + "mov r5, #0\n\t" + "adcs r4, r4, r12\n\t" + "adc r5, r5, #0\n\t" + /* A[6] * B[2] = 8 */ + "umull r3, r12, r7, r11\n\t" + "adds r6, r6, r3\n\t" + "adcs r4, r4, r12\n\t" + "adc r5, r5, #0\n\t" + /* A[5] * B[3] = 8 */ + "ldr r10, [%[a], #20]\n\t" + "ldr r11, [%[b], #12]\n\t" + "umull r3, r12, r10, r11\n\t" + "adds r6, r6, r3\n\t" + "adcs r4, r4, r12\n\t" + "adc r5, r5, #0\n\t" + /* A[4] * B[4] = 8 */ + "ldr r10, [%[a], #16]\n\t" + "ldr r11, [%[b], #16]\n\t" + "umull r3, r12, r10, r11\n\t" + "adds r6, r6, r3\n\t" + "adcs r4, r4, r12\n\t" + "adc r5, r5, #0\n\t" + /* A[3] * B[5] = 8 */ + "ldr r10, [%[a], #12]\n\t" + "ldr r11, [%[b], #20]\n\t" + "umull r3, r12, r10, r11\n\t" + "adds r6, r6, r3\n\t" + "adcs r4, r4, r12\n\t" + "adc r5, r5, #0\n\t" + /* A[2] * B[6] = 8 */ + "ldr r10, [%[a], #8]\n\t" + "umull r3, r12, r10, r9\n\t" + "adds r6, r6, r3\n\t" + "adcs r4, r4, r12\n\t" + "adc r5, r5, #0\n\t" + /* A[1] * B[7] = 8 */ + "ldr r11, [%[b], #28]\n\t" + "umull r3, r12, r8, r11\n\t" + "adds r6, r6, r3\n\t" + "adcs r4, r4, r12\n\t" + "adc r5, r5, #0\n\t" + "str r6, [sp, #32]\n\t" + "ldr r8, [%[a], #28]\n\t" + "mov lr, r11\n\t" + /* A[2] * B[7] = 9 */ + "umull r3, r12, r10, lr\n\t" + "adds r4, r4, r3\n\t" + "mov r6, #0\n\t" + "adcs r5, r5, r12\n\t" + "adc r6, r6, #0\n\t" + /* A[3] * B[6] = 9 */ + "ldr r10, [%[a], #12]\n\t" + "umull r3, r12, r10, r9\n\t" + "adds r4, r4, r3\n\t" + "adcs r5, r5, r12\n\t" + "adc r6, r6, #0\n\t" + /* A[4] * B[5] = 9 */ + "ldr r10, [%[a], #16]\n\t" + "ldr r11, [%[b], #20]\n\t" + "umull r3, r12, r10, r11\n\t" + "adds r4, r4, r3\n\t" + "adcs r5, r5, r12\n\t" + "adc r6, r6, #0\n\t" + /* A[5] * B[4] = 9 */ + "ldr r10, [%[a], #20]\n\t" + "ldr r11, [%[b], #16]\n\t" + "umull r3, r12, r10, r11\n\t" + "adds r4, r4, r3\n\t" + "adcs r5, r5, r12\n\t" + "adc r6, r6, #0\n\t" + /* A[6] * B[3] = 9 */ + "ldr r11, [%[b], #12]\n\t" + "umull r3, r12, r7, r11\n\t" + "adds r4, r4, r3\n\t" + "adcs r5, r5, r12\n\t" + "adc r6, r6, #0\n\t" + /* A[7] * B[2] = 9 */ + "ldr r11, [%[b], #8]\n\t" + "umull r3, r12, r8, r11\n\t" + "adds r4, r4, r3\n\t" + "adcs r5, r5, r12\n\t" + "adc r6, r6, #0\n\t" + "str r4, [sp, #36]\n\t" + /* A[7] * B[3] = 10 */ + "ldr r11, [%[b], #12]\n\t" + "umull r3, r12, r8, r11\n\t" + "adds r5, r5, r3\n\t" + "mov r4, #0\n\t" + "adcs r6, r6, r12\n\t" + "adc r4, r4, #0\n\t" + /* A[6] * B[4] = 10 */ + "ldr r11, [%[b], #16]\n\t" + "umull r3, r12, r7, r11\n\t" + "adds r5, r5, r3\n\t" + "adcs r6, r6, r12\n\t" + "adc r4, r4, #0\n\t" + /* A[5] * B[5] = 10 */ + "ldr r11, [%[b], #20]\n\t" + "umull r3, r12, r10, r11\n\t" + "adds r5, r5, r3\n\t" + "adcs r6, r6, r12\n\t" + "adc r4, r4, #0\n\t" + /* A[4] * B[6] = 10 */ + "ldr r10, [%[a], #16]\n\t" + "umull r3, r12, r10, r9\n\t" + "adds r5, r5, r3\n\t" + "adcs r6, r6, r12\n\t" + "adc r4, r4, #0\n\t" + /* A[3] * B[7] = 10 */ + "ldr r10, [%[a], #12]\n\t" + "umull r3, r12, r10, lr\n\t" + "adds r5, r5, r3\n\t" + "adcs r6, r6, r12\n\t" + "adc r4, r4, #0\n\t" + "str r5, [sp, #40]\n\t" + /* A[4] * B[7] = 11 */ + "ldr r10, [%[a], #16]\n\t" + "umull r3, r12, r10, lr\n\t" + "adds r6, r6, r3\n\t" + "mov r5, #0\n\t" + "adcs r4, r4, r12\n\t" + "adc r5, r5, #0\n\t" + /* A[5] * B[6] = 11 */ + "ldr r10, [%[a], #20]\n\t" + "umull r3, r12, r10, r9\n\t" + "adds r6, r6, r3\n\t" + "adcs r4, r4, r12\n\t" + "adc r5, r5, #0\n\t" + /* A[6] * B[5] = 11 */ + "umull r3, r12, r7, r11\n\t" + "adds r6, r6, r3\n\t" + "adcs r4, r4, r12\n\t" + "adc r5, r5, #0\n\t" + /* A[7] * B[4] = 11 */ + "ldr r11, [%[b], #16]\n\t" + "umull r3, r12, r8, r11\n\t" + "adds r6, r6, r3\n\t" + "adcs r4, r4, r12\n\t" + "adc r5, r5, #0\n\t" + "str r6, [sp, #44]\n\t" + /* A[7] * B[5] = 12 */ + "ldr r11, [%[b], #20]\n\t" + "umull r3, r12, r8, r11\n\t" + "adds r4, r4, r3\n\t" + "mov r6, #0\n\t" + "adcs r5, r5, r12\n\t" + "adc r6, r6, #0\n\t" + /* A[6] * B[6] = 12 */ + "umull r3, r12, r7, r9\n\t" + "adds r4, r4, r3\n\t" + "adcs r5, r5, r12\n\t" + "adc r6, r6, #0\n\t" + /* A[5] * B[7] = 12 */ + "umull r3, r12, r10, lr\n\t" + "adds r4, r4, r3\n\t" + "adcs r5, r5, r12\n\t" + "adc r6, r6, #0\n\t" + "str r4, [sp, #48]\n\t" + /* A[6] * B[7] = 13 */ + "umull r3, r12, r7, lr\n\t" + "adds r5, r5, r3\n\t" + "mov r4, #0\n\t" + "adcs r6, r6, r12\n\t" + "adc r4, r4, #0\n\t" + /* A[7] * B[6] = 13 */ + "umull r3, r12, r8, r9\n\t" + "adds r5, r5, r3\n\t" + "adcs r6, r6, r12\n\t" + "adc r4, r4, #0\n\t" + "str r5, [sp, #52]\n\t" + /* A[7] * B[7] = 14 */ + "umull r3, r12, r8, lr\n\t" + "adds r6, r6, r3\n\t" + "adc r4, r4, r12\n\t" + "str r6, [sp, #56]\n\t" + "str r4, [sp, #60]\n\t" + /* Reduce */ + /* Load bottom half */ + "ldrd r4, r5, [sp]\n\t" + "ldrd r6, r7, [sp, #8]\n\t" + "ldrd r8, r9, [sp, #16]\n\t" + "ldrd r10, r11, [sp, #24]\n\t" + "lsr r3, r11, #31\n\t" + "and r11, r11, #0x7fffffff\n\t" + "mov lr, #19\n\t" + "ldr %[a], [sp, #32]\n\t" + "orr r3, r3, %[a], lsl #1\n\t" + "umull r3, r12, lr, r3\n\t" + "adds r4, r4, r3\n\t" + "mov %[b], #0\n\t" + "adcs r5, r5, r12\n\t" + "adc %[b], %[b], #0\n\t" + "lsr r3, %[a], #31\n\t" + "ldr %[a], [sp, #36]\n\t" + "orr r3, r3, %[a], lsl #1\n\t" + "umull r3, r12, lr, r3\n\t" + "add r12, r12, %[b]\n\t" + "adds r5, r5, r3\n\t" + "mov %[b], #0\n\t" + "adcs r6, r6, r12\n\t" + "adc %[b], %[b], #0\n\t" + "lsr r3, %[a], #31\n\t" + "ldr %[a], [sp, #40]\n\t" + "orr r3, r3, %[a], lsl #1\n\t" + "umull r3, r12, lr, r3\n\t" + "add r12, r12, %[b]\n\t" + "adds r6, r6, r3\n\t" + "mov %[b], #0\n\t" + "adcs r7, r7, r12\n\t" + "adc %[b], %[b], #0\n\t" + "lsr r3, %[a], #31\n\t" + "ldr %[a], [sp, #44]\n\t" + "orr r3, r3, %[a], lsl #1\n\t" + "umull r3, r12, lr, r3\n\t" + "add r12, r12, %[b]\n\t" + "adds r7, r7, r3\n\t" + "mov %[b], #0\n\t" + "adcs r8, r8, r12\n\t" + "adc %[b], %[b], #0\n\t" + "lsr r3, %[a], #31\n\t" + "ldr %[a], [sp, #48]\n\t" + "orr r3, r3, %[a], lsl #1\n\t" + "umull r3, r12, lr, r3\n\t" + "add r12, r12, %[b]\n\t" + "adds r8, r8, r3\n\t" + "mov %[b], #0\n\t" + "adcs r9, r9, r12\n\t" + "adc %[b], %[b], #0\n\t" + "lsr r3, %[a], #31\n\t" + "ldr %[a], [sp, #52]\n\t" + "orr r3, r3, %[a], lsl #1\n\t" + "umull r3, r12, lr, r3\n\t" + "add r12, r12, %[b]\n\t" + "adds r9, r9, r3\n\t" + "mov %[b], #0\n\t" + "adcs r10, r10, r12\n\t" + "adc %[b], %[b], #0\n\t" + "lsr r3, %[a], #31\n\t" + "ldr %[a], [sp, #56]\n\t" + "orr r3, r3, %[a], lsl #1\n\t" + "umull r3, r12, lr, r3\n\t" + "add r12, r12, %[b]\n\t" + "adds r10, r10, r3\n\t" + "mov %[b], #0\n\t" + "adcs r11, r11, r12\n\t" + "adc %[b], %[b], #0\n\t" + "lsr r3, %[a], #31\n\t" + "ldr %[a], [sp, #60]\n\t" + "orr r3, r3, %[a], lsl #1\n\t" + "umull r3, r12, lr, r3\n\t" + "adds r11, r11, r3\n\t" + "adc r3, r12, %[b]\n\t" + /* Overflow */ + "lsl r3, r3, #1\n\t" + "orr r3, r3, r11, lsr #31\n\t" + "mul r3, r3, lr\n\t" + "and r11, r11, #0x7fffffff\n\t" + "adds r4, r4, r3\n\t" + "adcs r5, r5, #0\n\t" + "adcs r6, r6, #0\n\t" + "adcs r7, r7, #0\n\t" + "adcs r8, r8, #0\n\t" + "adcs r9, r9, #0\n\t" + "adcs r10, r10, #0\n\t" + "adc r11, r11, #0\n\t" + /* Reduce if top bit set */ + "asr r3, r11, #31\n\t" + "and r3, r3, lr\n\t" + "and r11, r11, #0x7fffffff\n\t" + "adds r4, r4, r3\n\t" + "adcs r5, r5, #0\n\t" + "adcs r6, r6, #0\n\t" + "adcs r7, r7, #0\n\t" + "adcs r8, r8, #0\n\t" + "adcs r9, r9, #0\n\t" + "adcs r10, r10, #0\n\t" + "adc r11, r11, #0\n\t" + /* Store */ + "strd r4, r5, [%[r]]\n\t" + "strd r6, r7, [%[r], #8]\n\t" + "strd r8, r9, [%[r], #16]\n\t" + "strd r10, r11, [%[r], #24]\n\t" + "add sp, sp, #0x40\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + ); +} + +void fe_sq(fe r, const fe a) +{ + __asm__ __volatile__ ( + "sub sp, sp, #0x40\n\t" + /* Square */ + "ldr r7, [%[a]]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[a], #8]\n\t" + "ldr r10, [%[a], #12]\n\t" + "ldr r12, [%[a], #16]\n\t" + /* A[0] * A[0] = 0 */ + "umull r4, r5, r7, r7\n\t" + "str r4, [sp]\n\t" + /* A[0] * A[1] = 1 */ + "umull r2, r3, r7, r8\n\t" + "mov r6, #0\n\t" + "adds r5, r5, r2\n\t" + "adc r6, r6, r3\n\t" + "adds r5, r5, r2\n\t" + "mov r4, #0\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + "str r5, [sp, #4]\n\t" + /* A[1] * A[1] = 2 */ + "umull r2, r3, r8, r8\n\t" + "adds r6, r6, r2\n\t" + "adc r4, r4, r3\n\t" + /* A[0] * A[2] = 2 */ + "umull r2, r3, r7, r9\n\t" + "adds r6, r6, r2\n\t" + "mov r5, #0\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + "adds r6, r6, r2\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + "str r6, [sp, #8]\n\t" + /* A[0] * A[3] = 3 */ + "umull r2, r3, r7, r10\n\t" + "adds r4, r4, r2\n\t" + "adc r5, r5, r3\n\t" + "adds r4, r4, r2\n\t" + "mov r6, #0\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + /* A[1] * A[2] = 3 */ + "umull r2, r3, r8, r9\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + "str r4, [sp, #12]\n\t" + /* A[2] * A[2] = 4 */ + "umull r2, r3, r9, r9\n\t" + "adds r5, r5, r2\n\t" + "mov r4, #0\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + /* A[1] * A[3] = 4 */ + "umull r2, r3, r8, r10\n\t" + "adds r5, r5, r2\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r2\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + /* A[0] * A[4] = 4 */ + "umull r2, r3, r7, r12\n\t" + "adds r5, r5, r2\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r2\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + "str r5, [sp, #16]\n\t" + /* A[0] * A[5] = 5 */ + "ldr r11, [%[a], #20]\n\t" + "umull r2, r3, r7, r11\n\t" + "adds r6, r6, r2\n\t" + "mov r5, #0\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + "adds r6, r6, r2\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + /* A[1] * A[4] = 5 */ + "umull r2, r3, r8, r12\n\t" + "adds r6, r6, r2\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + "adds r6, r6, r2\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + /* A[2] * A[3] = 5 */ + "umull r2, r3, r9, r10\n\t" + "adds r6, r6, r2\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + "adds r6, r6, r2\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + "str r6, [sp, #20]\n\t" + /* A[3] * A[3] = 6 */ + "umull r2, r3, r10, r10\n\t" + "adds r4, r4, r2\n\t" + "mov r6, #0\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + /* A[2] * A[4] = 6 */ + "umull r2, r3, r9, r12\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + /* A[1] * A[5] = 6 */ + "umull r2, r3, r8, r11\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + /* A[0] * A[6] = 6 */ + "ldr r11, [%[a], #24]\n\t" + "umull r2, r3, r7, r11\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + "str r4, [sp, #24]\n\t" + /* A[0] * A[7] = 7 */ + "ldr r11, [%[a], #28]\n\t" + "umull r2, r3, r7, r11\n\t" + "adds r5, r5, r2\n\t" + "mov r4, #0\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r2\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + /* A[1] * A[6] = 7 */ + "ldr r11, [%[a], #24]\n\t" + "umull r2, r3, r8, r11\n\t" + "adds r5, r5, r2\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r2\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + /* A[2] * A[5] = 7 */ + "ldr r11, [%[a], #20]\n\t" + "umull r2, r3, r9, r11\n\t" + "adds r5, r5, r2\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r2\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + /* A[3] * A[4] = 7 */ + "umull r2, r3, r10, r12\n\t" + "adds r5, r5, r2\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r2\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + "str r5, [sp, #28]\n\t" + /* A[4] * A[4] = 8 */ + "umull r2, r3, r12, r12\n\t" + "adds r6, r6, r2\n\t" + "mov r5, #0\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + /* A[3] * A[5] = 8 */ + "umull r2, r3, r10, r11\n\t" + "adds r6, r6, r2\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + "adds r6, r6, r2\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + /* A[2] * A[6] = 8 */ + "ldr r11, [%[a], #24]\n\t" + "umull r2, r3, r9, r11\n\t" + "adds r6, r6, r2\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + "adds r6, r6, r2\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + /* A[1] * A[7] = 8 */ + "ldr r11, [%[a], #28]\n\t" + "umull r2, r3, r8, r11\n\t" + "adds r6, r6, r2\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + "adds r6, r6, r2\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + "str r6, [sp, #32]\n\t" + "ldr r7, [%[a], #20]\n\t" + /* A[2] * A[7] = 9 */ + "umull r2, r3, r9, r11\n\t" + "adds r4, r4, r2\n\t" + "mov r6, #0\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + /* A[3] * A[6] = 9 */ + "ldr r11, [%[a], #24]\n\t" + "umull r2, r3, r10, r11\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + /* A[4] * A[5] = 9 */ + "umull r2, r3, r12, r7\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + "str r4, [sp, #36]\n\t" + "mov r8, r11\n\t" + /* A[5] * A[5] = 10 */ + "umull r2, r3, r7, r7\n\t" + "adds r5, r5, r2\n\t" + "mov r4, #0\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + /* A[4] * A[6] = 10 */ + "umull r2, r3, r12, r8\n\t" + "adds r5, r5, r2\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r2\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + /* A[3] * A[7] = 10 */ + "ldr r11, [%[a], #28]\n\t" + "umull r2, r3, r10, r11\n\t" + "adds r5, r5, r2\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r2\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + "str r5, [sp, #40]\n\t" + "mov r9, r11\n\t" + /* A[4] * A[7] = 11 */ + "umull r2, r3, r12, r9\n\t" + "adds r6, r6, r2\n\t" + "mov r5, #0\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + "adds r6, r6, r2\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + /* A[5] * A[6] = 11 */ + "umull r2, r3, r7, r8\n\t" + "adds r6, r6, r2\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + "adds r6, r6, r2\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + "str r6, [sp, #44]\n\t" + /* A[6] * A[6] = 12 */ + "umull r2, r3, r8, r8\n\t" + "adds r4, r4, r2\n\t" + "mov r6, #0\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + /* A[5] * A[7] = 12 */ + "umull r2, r3, r7, r9\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + "str r4, [sp, #48]\n\t" + /* A[6] * A[7] = 13 */ + "umull r2, r3, r8, r9\n\t" + "adds r5, r5, r2\n\t" + "mov r4, #0\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r2\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + "str r5, [sp, #52]\n\t" + /* A[7] * A[7] = 14 */ + "umull r2, r3, r9, r9\n\t" + "adds r6, r6, r2\n\t" + "adc r4, r4, r3\n\t" + "str r6, [sp, #56]\n\t" + "str r4, [sp, #60]\n\t" + /* Reduce */ + /* Load bottom half */ + "ldrd r4, r5, [sp]\n\t" + "ldrd r6, r7, [sp, #8]\n\t" + "ldrd r8, r9, [sp, #16]\n\t" + "ldrd r10, r11, [sp, #24]\n\t" + "lsr r2, r11, #31\n\t" + "and r11, r11, #0x7fffffff\n\t" + "mov r12, #19\n\t" + "ldr %[a], [sp, #32]\n\t" + "orr r2, r2, %[a], lsl #1\n\t" + "umull r2, r3, r12, r2\n\t" + "adds r4, r4, r2\n\t" + "mov lr, #0\n\t" + "adcs r5, r5, r3\n\t" + "adc lr, lr, #0\n\t" + "lsr r2, %[a], #31\n\t" + "ldr %[a], [sp, #36]\n\t" + "orr r2, r2, %[a], lsl #1\n\t" + "umull r2, r3, r12, r2\n\t" + "add r3, r3, lr\n\t" + "adds r5, r5, r2\n\t" + "mov lr, #0\n\t" + "adcs r6, r6, r3\n\t" + "adc lr, lr, #0\n\t" + "lsr r2, %[a], #31\n\t" + "ldr %[a], [sp, #40]\n\t" + "orr r2, r2, %[a], lsl #1\n\t" + "umull r2, r3, r12, r2\n\t" + "add r3, r3, lr\n\t" + "adds r6, r6, r2\n\t" + "mov lr, #0\n\t" + "adcs r7, r7, r3\n\t" + "adc lr, lr, #0\n\t" + "lsr r2, %[a], #31\n\t" + "ldr %[a], [sp, #44]\n\t" + "orr r2, r2, %[a], lsl #1\n\t" + "umull r2, r3, r12, r2\n\t" + "add r3, r3, lr\n\t" + "adds r7, r7, r2\n\t" + "mov lr, #0\n\t" + "adcs r8, r8, r3\n\t" + "adc lr, lr, #0\n\t" + "lsr r2, %[a], #31\n\t" + "ldr %[a], [sp, #48]\n\t" + "orr r2, r2, %[a], lsl #1\n\t" + "umull r2, r3, r12, r2\n\t" + "add r3, r3, lr\n\t" + "adds r8, r8, r2\n\t" + "mov lr, #0\n\t" + "adcs r9, r9, r3\n\t" + "adc lr, lr, #0\n\t" + "lsr r2, %[a], #31\n\t" + "ldr %[a], [sp, #52]\n\t" + "orr r2, r2, %[a], lsl #1\n\t" + "umull r2, r3, r12, r2\n\t" + "add r3, r3, lr\n\t" + "adds r9, r9, r2\n\t" + "mov lr, #0\n\t" + "adcs r10, r10, r3\n\t" + "adc lr, lr, #0\n\t" + "lsr r2, %[a], #31\n\t" + "ldr %[a], [sp, #56]\n\t" + "orr r2, r2, %[a], lsl #1\n\t" + "umull r2, r3, r12, r2\n\t" + "add r3, r3, lr\n\t" + "adds r10, r10, r2\n\t" + "mov lr, #0\n\t" + "adcs r11, r11, r3\n\t" + "adc lr, lr, #0\n\t" + "lsr r2, %[a], #31\n\t" + "ldr %[a], [sp, #60]\n\t" + "orr r2, r2, %[a], lsl #1\n\t" + "umull r2, r3, r12, r2\n\t" + "adds r11, r11, r2\n\t" + "adc r2, r3, lr\n\t" + /* Overflow */ + "lsl r2, r2, #1\n\t" + "orr r2, r2, r11, lsr #31\n\t" + "mul r2, r2, r12\n\t" + "and r11, r11, #0x7fffffff\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, #0\n\t" + "adcs r6, r6, #0\n\t" + "adcs r7, r7, #0\n\t" + "adcs r8, r8, #0\n\t" + "adcs r9, r9, #0\n\t" + "adcs r10, r10, #0\n\t" + "adc r11, r11, #0\n\t" + /* Reduce if top bit set */ + "asr r2, r11, #31\n\t" + "and r2, r2, r12\n\t" + "and r11, r11, #0x7fffffff\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, #0\n\t" + "adcs r6, r6, #0\n\t" + "adcs r7, r7, #0\n\t" + "adcs r8, r8, #0\n\t" + "adcs r9, r9, #0\n\t" + "adcs r10, r10, #0\n\t" + "adc r11, r11, #0\n\t" + /* Store */ + "strd r4, r5, [%[r]]\n\t" + "strd r6, r7, [%[r], #8]\n\t" + "strd r8, r9, [%[r], #16]\n\t" + "strd r10, r11, [%[r], #24]\n\t" + "add sp, sp, #0x40\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + ); +} + +void fe_mul121666(fe r, fe a) +{ + __asm__ __volatile__ ( + /* Multiply by 121666 */ + "ldrd r2, r3, [%[a]]\n\t" + "ldrd r4, r5, [%[a], #8]\n\t" + "ldrd r6, r7, [%[a], #16]\n\t" + "ldrd r8, r9, [%[a], #24]\n\t" + "movw lr, #0xdb42\n\t" + "movt lr, #1\n\t" + "umull r2, r10, r2, lr\n\t" + "umull r3, r12, r3, lr\n\t" + "adds r3, r3, r10\n\t" + "adc r10, r12, #0\n\t" + "umull r4, r12, r4, lr\n\t" + "adds r4, r4, r10\n\t" + "adc r10, r12, #0\n\t" + "umull r5, r12, r5, lr\n\t" + "adds r5, r5, r10\n\t" + "adc r10, r12, #0\n\t" + "umull r6, r12, r6, lr\n\t" + "adds r6, r6, r10\n\t" + "adc r10, r12, #0\n\t" + "umull r7, r12, r7, lr\n\t" + "adds r7, r7, r10\n\t" + "adc r10, r12, #0\n\t" + "umull r8, r12, r8, lr\n\t" + "adds r8, r8, r10\n\t" + "adc r10, r12, #0\n\t" + "umull r9, r12, r9, lr\n\t" + "adds r9, r9, r10\n\t" + "adc r10, r12, #0\n\t" + "mov lr, #19\n\t" + "lsl r10, r10, #1\n\t" + "orr r10, r10, r9, lsr #31\n\t" + "mul r10, r10, lr\n\t" + "and r9, r9, #0x7fffffff\n\t" + "adds r2, r2, r10\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "adcs r5, r5, #0\n\t" + "adcs r6, r6, #0\n\t" + "adcs r7, r7, #0\n\t" + "adcs r8, r8, #0\n\t" + "adc r9, r9, #0\n\t" + "strd r2, r3, [%[r]]\n\t" + "strd r4, r5, [%[r], #8]\n\t" + "strd r6, r7, [%[r], #16]\n\t" + "strd r8, r9, [%[r], #24]\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); +} + +void fe_sq2(fe r, const fe a) +{ + __asm__ __volatile__ ( + "sub sp, sp, #0x40\n\t" + /* Square * 2 */ + "ldr r7, [%[a]]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[a], #8]\n\t" + "ldr r10, [%[a], #12]\n\t" + "ldr r12, [%[a], #16]\n\t" + /* A[0] * A[0] = 0 */ + "umull r4, r5, r7, r7\n\t" + "str r4, [sp]\n\t" + /* A[0] * A[1] = 1 */ + "umull r2, r3, r7, r8\n\t" + "mov r6, #0\n\t" + "adds r5, r5, r2\n\t" + "adc r6, r6, r3\n\t" + "adds r5, r5, r2\n\t" + "mov r4, #0\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + "str r5, [sp, #4]\n\t" + /* A[1] * A[1] = 2 */ + "umull r2, r3, r8, r8\n\t" + "adds r6, r6, r2\n\t" + "adc r4, r4, r3\n\t" + /* A[0] * A[2] = 2 */ + "umull r2, r3, r7, r9\n\t" + "adds r6, r6, r2\n\t" + "mov r5, #0\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + "adds r6, r6, r2\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + "str r6, [sp, #8]\n\t" + /* A[0] * A[3] = 3 */ + "umull r2, r3, r7, r10\n\t" + "adds r4, r4, r2\n\t" + "adc r5, r5, r3\n\t" + "adds r4, r4, r2\n\t" + "mov r6, #0\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + /* A[1] * A[2] = 3 */ + "umull r2, r3, r8, r9\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + "str r4, [sp, #12]\n\t" + /* A[2] * A[2] = 4 */ + "umull r2, r3, r9, r9\n\t" + "adds r5, r5, r2\n\t" + "mov r4, #0\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + /* A[1] * A[3] = 4 */ + "umull r2, r3, r8, r10\n\t" + "adds r5, r5, r2\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r2\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + /* A[0] * A[4] = 4 */ + "umull r2, r3, r7, r12\n\t" + "adds r5, r5, r2\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r2\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + "str r5, [sp, #16]\n\t" + /* A[0] * A[5] = 5 */ + "ldr r11, [%[a], #20]\n\t" + "umull r2, r3, r7, r11\n\t" + "adds r6, r6, r2\n\t" + "mov r5, #0\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + "adds r6, r6, r2\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + /* A[1] * A[4] = 5 */ + "umull r2, r3, r8, r12\n\t" + "adds r6, r6, r2\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + "adds r6, r6, r2\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + /* A[2] * A[3] = 5 */ + "umull r2, r3, r9, r10\n\t" + "adds r6, r6, r2\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + "adds r6, r6, r2\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + "str r6, [sp, #20]\n\t" + /* A[3] * A[3] = 6 */ + "umull r2, r3, r10, r10\n\t" + "adds r4, r4, r2\n\t" + "mov r6, #0\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + /* A[2] * A[4] = 6 */ + "umull r2, r3, r9, r12\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + /* A[1] * A[5] = 6 */ + "umull r2, r3, r8, r11\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + /* A[0] * A[6] = 6 */ + "ldr r11, [%[a], #24]\n\t" + "umull r2, r3, r7, r11\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + "str r4, [sp, #24]\n\t" + /* A[0] * A[7] = 7 */ + "ldr r11, [%[a], #28]\n\t" + "umull r2, r3, r7, r11\n\t" + "adds r5, r5, r2\n\t" + "mov r4, #0\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r2\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + /* A[1] * A[6] = 7 */ + "ldr r11, [%[a], #24]\n\t" + "umull r2, r3, r8, r11\n\t" + "adds r5, r5, r2\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r2\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + /* A[2] * A[5] = 7 */ + "ldr r11, [%[a], #20]\n\t" + "umull r2, r3, r9, r11\n\t" + "adds r5, r5, r2\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r2\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + /* A[3] * A[4] = 7 */ + "umull r2, r3, r10, r12\n\t" + "adds r5, r5, r2\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r2\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + "str r5, [sp, #28]\n\t" + /* A[4] * A[4] = 8 */ + "umull r2, r3, r12, r12\n\t" + "adds r6, r6, r2\n\t" + "mov r5, #0\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + /* A[3] * A[5] = 8 */ + "umull r2, r3, r10, r11\n\t" + "adds r6, r6, r2\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + "adds r6, r6, r2\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + /* A[2] * A[6] = 8 */ + "ldr r11, [%[a], #24]\n\t" + "umull r2, r3, r9, r11\n\t" + "adds r6, r6, r2\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + "adds r6, r6, r2\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + /* A[1] * A[7] = 8 */ + "ldr r11, [%[a], #28]\n\t" + "umull r2, r3, r8, r11\n\t" + "adds r6, r6, r2\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + "adds r6, r6, r2\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + "str r6, [sp, #32]\n\t" + "ldr r7, [%[a], #20]\n\t" + /* A[2] * A[7] = 9 */ + "umull r2, r3, r9, r11\n\t" + "adds r4, r4, r2\n\t" + "mov r6, #0\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + /* A[3] * A[6] = 9 */ + "ldr r11, [%[a], #24]\n\t" + "umull r2, r3, r10, r11\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + /* A[4] * A[5] = 9 */ + "umull r2, r3, r12, r7\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + "str r4, [sp, #36]\n\t" + "mov r8, r11\n\t" + /* A[5] * A[5] = 10 */ + "umull r2, r3, r7, r7\n\t" + "adds r5, r5, r2\n\t" + "mov r4, #0\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + /* A[4] * A[6] = 10 */ + "umull r2, r3, r12, r8\n\t" + "adds r5, r5, r2\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r2\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + /* A[3] * A[7] = 10 */ + "ldr r11, [%[a], #28]\n\t" + "umull r2, r3, r10, r11\n\t" + "adds r5, r5, r2\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r2\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + "str r5, [sp, #40]\n\t" + "mov r9, r11\n\t" + /* A[4] * A[7] = 11 */ + "umull r2, r3, r12, r9\n\t" + "adds r6, r6, r2\n\t" + "mov r5, #0\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + "adds r6, r6, r2\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + /* A[5] * A[6] = 11 */ + "umull r2, r3, r7, r8\n\t" + "adds r6, r6, r2\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + "adds r6, r6, r2\n\t" + "adcs r4, r4, r3\n\t" + "adc r5, r5, #0\n\t" + "str r6, [sp, #44]\n\t" + /* A[6] * A[6] = 12 */ + "umull r2, r3, r8, r8\n\t" + "adds r4, r4, r2\n\t" + "mov r6, #0\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + /* A[5] * A[7] = 12 */ + "umull r2, r3, r7, r9\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, r3\n\t" + "adc r6, r6, #0\n\t" + "str r4, [sp, #48]\n\t" + /* A[6] * A[7] = 13 */ + "umull r2, r3, r8, r9\n\t" + "adds r5, r5, r2\n\t" + "mov r4, #0\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r2\n\t" + "adcs r6, r6, r3\n\t" + "adc r4, r4, #0\n\t" + "str r5, [sp, #52]\n\t" + /* A[7] * A[7] = 14 */ + "umull r2, r3, r9, r9\n\t" + "adds r6, r6, r2\n\t" + "adc r4, r4, r3\n\t" + "str r6, [sp, #56]\n\t" + "str r4, [sp, #60]\n\t" + /* Double and Reduce */ + /* Load bottom half */ + "ldrd r4, r5, [sp]\n\t" + "ldrd r6, r7, [sp, #8]\n\t" + "ldrd r8, r9, [sp, #16]\n\t" + "ldrd r10, r11, [sp, #24]\n\t" + "lsr r2, r11, #30\n\t" + "lsl r11, r11, #1\n\t" + "orr r11, r11, r10, lsr #31\n\t" + "lsl r10, r10, #1\n\t" + "orr r10, r10, r9, lsr #31\n\t" + "lsl r9, r9, #1\n\t" + "orr r9, r9, r8, lsr #31\n\t" + "lsl r8, r8, #1\n\t" + "orr r8, r8, r7, lsr #31\n\t" + "lsl r7, r7, #1\n\t" + "orr r7, r7, r6, lsr #31\n\t" + "lsl r6, r6, #1\n\t" + "orr r6, r6, r5, lsr #31\n\t" + "lsl r5, r5, #1\n\t" + "orr r5, r5, r4, lsr #31\n\t" + "lsl r4, r4, #1\n\t" + "and r11, r11, #0x7fffffff\n\t" + "mov r12, #19\n\t" + "ldr %[a], [sp, #32]\n\t" + "orr r2, r2, %[a], lsl #2\n\t" + "umull r2, r3, r12, r2\n\t" + "adds r4, r4, r2\n\t" + "mov lr, #0\n\t" + "adcs r5, r5, r3\n\t" + "adc lr, lr, #0\n\t" + "lsr r2, %[a], #30\n\t" + "ldr %[a], [sp, #36]\n\t" + "orr r2, r2, %[a], lsl #2\n\t" + "umull r2, r3, r12, r2\n\t" + "add r3, r3, lr\n\t" + "adds r5, r5, r2\n\t" + "mov lr, #0\n\t" + "adcs r6, r6, r3\n\t" + "adc lr, lr, #0\n\t" + "lsr r2, %[a], #30\n\t" + "ldr %[a], [sp, #40]\n\t" + "orr r2, r2, %[a], lsl #2\n\t" + "umull r2, r3, r12, r2\n\t" + "add r3, r3, lr\n\t" + "adds r6, r6, r2\n\t" + "mov lr, #0\n\t" + "adcs r7, r7, r3\n\t" + "adc lr, lr, #0\n\t" + "lsr r2, %[a], #30\n\t" + "ldr %[a], [sp, #44]\n\t" + "orr r2, r2, %[a], lsl #2\n\t" + "umull r2, r3, r12, r2\n\t" + "add r3, r3, lr\n\t" + "adds r7, r7, r2\n\t" + "mov lr, #0\n\t" + "adcs r8, r8, r3\n\t" + "adc lr, lr, #0\n\t" + "lsr r2, %[a], #30\n\t" + "ldr %[a], [sp, #48]\n\t" + "orr r2, r2, %[a], lsl #2\n\t" + "umull r2, r3, r12, r2\n\t" + "add r3, r3, lr\n\t" + "adds r8, r8, r2\n\t" + "mov lr, #0\n\t" + "adcs r9, r9, r3\n\t" + "adc lr, lr, #0\n\t" + "lsr r2, %[a], #30\n\t" + "ldr %[a], [sp, #52]\n\t" + "orr r2, r2, %[a], lsl #2\n\t" + "umull r2, r3, r12, r2\n\t" + "add r3, r3, lr\n\t" + "adds r9, r9, r2\n\t" + "mov lr, #0\n\t" + "adcs r10, r10, r3\n\t" + "adc lr, lr, #0\n\t" + "lsr r2, %[a], #30\n\t" + "ldr %[a], [sp, #56]\n\t" + "orr r2, r2, %[a], lsl #2\n\t" + "umull r2, r3, r12, r2\n\t" + "add r3, r3, lr\n\t" + "adds r10, r10, r2\n\t" + "mov lr, #0\n\t" + "adcs r11, r11, r3\n\t" + "adc lr, lr, #0\n\t" + "lsr r2, %[a], #30\n\t" + "ldr %[a], [sp, #60]\n\t" + "orr r2, r2, %[a], lsl #2\n\t" + "umull r2, r3, r12, r2\n\t" + "adds r11, r11, r2\n\t" + "adc r2, r3, lr\n\t" + /* Overflow */ + "lsl r2, r2, #1\n\t" + "orr r2, r2, r11, lsr #31\n\t" + "mul r2, r2, r12\n\t" + "and r11, r11, #0x7fffffff\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, #0\n\t" + "adcs r6, r6, #0\n\t" + "adcs r7, r7, #0\n\t" + "adcs r8, r8, #0\n\t" + "adcs r9, r9, #0\n\t" + "adcs r10, r10, #0\n\t" + "adc r11, r11, #0\n\t" + /* Reduce if top bit set */ + "asr r2, r11, #31\n\t" + "and r2, r2, r12\n\t" + "and r11, r11, #0x7fffffff\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, #0\n\t" + "adcs r6, r6, #0\n\t" + "adcs r7, r7, #0\n\t" + "adcs r8, r8, #0\n\t" + "adcs r9, r9, #0\n\t" + "adcs r10, r10, #0\n\t" + "adc r11, r11, #0\n\t" + /* Store */ + "strd r4, r5, [%[r]]\n\t" + "strd r6, r7, [%[r], #8]\n\t" + "strd r8, r9, [%[r], #16]\n\t" + "strd r10, r11, [%[r], #24]\n\t" + "add sp, sp, #0x40\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + ); +} + +void fe_invert(fe r, const fe a) +{ + __asm__ __volatile__ ( + "sub sp, sp, #0x88\n\t" + /* Invert */ + "str %[r], [sp, #128]\n\t" + "str %[a], [sp, #132]\n\t" + "mov r0, sp\n\t" + "ldr r1, [sp, #132]\n\t" + "bl fe_sq\n\t" + "add r0, sp, #32\n\t" + "mov r1, sp\n\t" + "bl fe_sq\n\t" + "add r0, sp, #32\n\t" + "add r1, sp, #32\n\t" + "bl fe_sq\n\t" + "add r0, sp, #32\n\t" + "ldr r1, [sp, #132]\n\t" + "add r2, sp, #32\n\t" + "bl fe_mul\n\t" + "mov r0, sp\n\t" + "mov r1, sp\n\t" + "add r2, sp, #32\n\t" + "bl fe_mul\n\t" + "add r0, sp, #0x40\n\t" + "mov r1, sp\n\t" + "bl fe_sq\n\t" + "add r0, sp, #32\n\t" + "add r1, sp, #32\n\t" + "add r2, sp, #0x40\n\t" + "bl fe_mul\n\t" + "add r0, sp, #0x40\n\t" + "add r1, sp, #32\n\t" + "bl fe_sq\n\t" + "mov r4, #4\n\t" + "\n" + "L_fe_invert1_%=: \n\t" + "add r0, sp, #0x40\n\t" + "add r1, sp, #0x40\n\t" + "bl fe_sq\n\t" + "sub r4, r4, #1\n\t" + "cmp r4, #0\n\t" + "bne L_fe_invert1_%=\n\t" + "add r0, sp, #32\n\t" + "add r1, sp, #0x40\n\t" + "add r2, sp, #32\n\t" + "bl fe_mul\n\t" + "add r0, sp, #0x40\n\t" + "add r1, sp, #32\n\t" + "bl fe_sq\n\t" + "mov r4, #9\n\t" + "\n" + "L_fe_invert2_%=: \n\t" + "add r0, sp, #0x40\n\t" + "add r1, sp, #0x40\n\t" + "bl fe_sq\n\t" + "sub r4, r4, #1\n\t" + "cmp r4, #0\n\t" + "bne L_fe_invert2_%=\n\t" + "add r0, sp, #0x40\n\t" + "add r1, sp, #0x40\n\t" + "add r2, sp, #32\n\t" + "bl fe_mul\n\t" + "add r0, sp, #0x60\n\t" + "add r1, sp, #0x40\n\t" + "bl fe_sq\n\t" + "mov r4, #19\n\t" + "\n" + "L_fe_invert3_%=: \n\t" + "add r0, sp, #0x60\n\t" + "add r1, sp, #0x60\n\t" + "bl fe_sq\n\t" + "sub r4, r4, #1\n\t" + "cmp r4, #0\n\t" + "bne L_fe_invert3_%=\n\t" + "add r0, sp, #0x40\n\t" + "add r1, sp, #0x60\n\t" + "add r2, sp, #0x40\n\t" + "bl fe_mul\n\t" + "mov r4, #10\n\t" + "\n" + "L_fe_invert4_%=: \n\t" + "add r0, sp, #0x40\n\t" + "add r1, sp, #0x40\n\t" + "bl fe_sq\n\t" + "sub r4, r4, #1\n\t" + "cmp r4, #0\n\t" + "bne L_fe_invert4_%=\n\t" + "add r0, sp, #32\n\t" + "add r1, sp, #0x40\n\t" + "add r2, sp, #32\n\t" + "bl fe_mul\n\t" + "add r0, sp, #0x40\n\t" + "add r1, sp, #32\n\t" + "bl fe_sq\n\t" + "mov r4, #49\n\t" + "\n" + "L_fe_invert5_%=: \n\t" + "add r0, sp, #0x40\n\t" + "add r1, sp, #0x40\n\t" + "bl fe_sq\n\t" + "sub r4, r4, #1\n\t" + "cmp r4, #0\n\t" + "bne L_fe_invert5_%=\n\t" + "add r0, sp, #0x40\n\t" + "add r1, sp, #0x40\n\t" + "add r2, sp, #32\n\t" + "bl fe_mul\n\t" + "add r0, sp, #0x60\n\t" + "add r1, sp, #0x40\n\t" + "bl fe_sq\n\t" + "mov r4, #0x63\n\t" + "\n" + "L_fe_invert6_%=: \n\t" + "add r0, sp, #0x60\n\t" + "add r1, sp, #0x60\n\t" + "bl fe_sq\n\t" + "sub r4, r4, #1\n\t" + "cmp r4, #0\n\t" + "bne L_fe_invert6_%=\n\t" + "add r0, sp, #0x40\n\t" + "add r1, sp, #0x60\n\t" + "add r2, sp, #0x40\n\t" + "bl fe_mul\n\t" + "mov r4, #50\n\t" + "\n" + "L_fe_invert7_%=: \n\t" + "add r0, sp, #0x40\n\t" + "add r1, sp, #0x40\n\t" + "bl fe_sq\n\t" + "sub r4, r4, #1\n\t" + "cmp r4, #0\n\t" + "bne L_fe_invert7_%=\n\t" + "add r0, sp, #32\n\t" + "add r1, sp, #0x40\n\t" + "add r2, sp, #32\n\t" + "bl fe_mul\n\t" + "mov r4, #5\n\t" + "\n" + "L_fe_invert8_%=: \n\t" + "add r0, sp, #32\n\t" + "add r1, sp, #32\n\t" + "bl fe_sq\n\t" + "sub r4, r4, #1\n\t" + "cmp r4, #0\n\t" + "bne L_fe_invert8_%=\n\t" + "ldr r0, [sp, #128]\n\t" + "add r1, sp, #32\n\t" + "mov r2, sp\n\t" + "bl fe_mul\n\t" + "ldr %[a], [sp, #132]\n\t" + "ldr %[r], [sp, #128]\n\t" + "add sp, sp, #0x88\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "lr", "r4" + ); +} + +int curve25519(byte* r, byte* n, byte* a) +{ + __asm__ __volatile__ ( + "sub sp, sp, #0xbc\n\t" + "str %[r], [sp, #160]\n\t" + "str %[n], [sp, #164]\n\t" + "str %[a], [sp, #168]\n\t" + "mov %[n], #0\n\t" + "str %[n], [sp, #172]\n\t" + /* Set one */ + "mov r11, #1\n\t" + "mov r10, #0\n\t" + "strd r11, r10, [%[r]]\n\t" + "strd r10, r10, [%[r], #8]\n\t" + "strd r10, r10, [%[r], #16]\n\t" + "strd r10, r10, [%[r], #24]\n\t" + /* Set zero */ + "mov r10, #0\n\t" + "strd r10, r10, [sp]\n\t" + "strd r10, r10, [sp, #8]\n\t" + "strd r10, r10, [sp, #16]\n\t" + "strd r10, r10, [sp, #24]\n\t" + /* Set one */ + "mov r11, #1\n\t" + "mov r10, #0\n\t" + "strd r11, r10, [sp, #32]\n\t" + "strd r10, r10, [sp, #40]\n\t" + "strd r10, r10, [sp, #48]\n\t" + "strd r10, r10, [sp, #56]\n\t" + /* Copy */ + "ldrd r4, r5, [%[a]]\n\t" + "ldrd r6, r7, [%[a], #8]\n\t" + "strd r4, r5, [sp, #64]\n\t" + "strd r6, r7, [sp, #72]\n\t" + "ldrd r4, r5, [%[a], #16]\n\t" + "ldrd r6, r7, [%[a], #24]\n\t" + "strd r4, r5, [sp, #80]\n\t" + "strd r6, r7, [sp, #88]\n\t" + "mov %[n], #30\n\t" + "str %[n], [sp, #180]\n\t" + "mov %[a], #28\n\t" + "str %[a], [sp, #176]\n\t" + "\n" + "L_curve25519_words_%=: \n\t" + "\n" + "L_curve25519_bits_%=: \n\t" + "ldr %[n], [sp, #164]\n\t" + "ldr %[a], [%[n], r2]\n\t" + "ldr %[n], [sp, #180]\n\t" + "lsr %[a], %[a], %[n]\n\t" + "and %[a], %[a], #1\n\t" + "str %[a], [sp, #184]\n\t" + "ldr %[n], [sp, #172]\n\t" + "eor %[n], %[n], %[a]\n\t" + "str %[n], [sp, #172]\n\t" + "ldr %[r], [sp, #160]\n\t" + /* Conditional Swap */ + "neg %[n], %[n]\n\t" + "ldrd r4, r5, [%[r]]\n\t" + "ldrd r6, r7, [sp, #64]\n\t" + "eor r8, r4, r6\n\t" + "eor r9, r5, r7\n\t" + "and r8, r8, %[n]\n\t" + "and r9, r9, %[n]\n\t" + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "strd r4, r5, [%[r]]\n\t" + "strd r6, r7, [sp, #64]\n\t" + "ldrd r4, r5, [%[r], #8]\n\t" + "ldrd r6, r7, [sp, #72]\n\t" + "eor r8, r4, r6\n\t" + "eor r9, r5, r7\n\t" + "and r8, r8, %[n]\n\t" + "and r9, r9, %[n]\n\t" + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "strd r4, r5, [%[r], #8]\n\t" + "strd r6, r7, [sp, #72]\n\t" + "ldrd r4, r5, [%[r], #16]\n\t" + "ldrd r6, r7, [sp, #80]\n\t" + "eor r8, r4, r6\n\t" + "eor r9, r5, r7\n\t" + "and r8, r8, %[n]\n\t" + "and r9, r9, %[n]\n\t" + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "strd r4, r5, [%[r], #16]\n\t" + "strd r6, r7, [sp, #80]\n\t" + "ldrd r4, r5, [%[r], #24]\n\t" + "ldrd r6, r7, [sp, #88]\n\t" + "eor r8, r4, r6\n\t" + "eor r9, r5, r7\n\t" + "and r8, r8, %[n]\n\t" + "and r9, r9, %[n]\n\t" + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "strd r4, r5, [%[r], #24]\n\t" + "strd r6, r7, [sp, #88]\n\t" + "ldr %[n], [sp, #172]\n\t" + /* Conditional Swap */ + "neg %[n], %[n]\n\t" + "ldrd r4, r5, [sp]\n\t" + "ldrd r6, r7, [sp, #32]\n\t" + "eor r8, r4, r6\n\t" + "eor r9, r5, r7\n\t" + "and r8, r8, %[n]\n\t" + "and r9, r9, %[n]\n\t" + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "strd r4, r5, [sp]\n\t" + "strd r6, r7, [sp, #32]\n\t" + "ldrd r4, r5, [sp, #8]\n\t" + "ldrd r6, r7, [sp, #40]\n\t" + "eor r8, r4, r6\n\t" + "eor r9, r5, r7\n\t" + "and r8, r8, %[n]\n\t" + "and r9, r9, %[n]\n\t" + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "strd r4, r5, [sp, #8]\n\t" + "strd r6, r7, [sp, #40]\n\t" + "ldrd r4, r5, [sp, #16]\n\t" + "ldrd r6, r7, [sp, #48]\n\t" + "eor r8, r4, r6\n\t" + "eor r9, r5, r7\n\t" + "and r8, r8, %[n]\n\t" + "and r9, r9, %[n]\n\t" + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "strd r4, r5, [sp, #16]\n\t" + "strd r6, r7, [sp, #48]\n\t" + "ldrd r4, r5, [sp, #24]\n\t" + "ldrd r6, r7, [sp, #56]\n\t" + "eor r8, r4, r6\n\t" + "eor r9, r5, r7\n\t" + "and r8, r8, %[n]\n\t" + "and r9, r9, %[n]\n\t" + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "strd r4, r5, [sp, #24]\n\t" + "strd r6, r7, [sp, #56]\n\t" + "ldr %[n], [sp, #184]\n\t" + "str %[n], [sp, #172]\n\t" + /* Add-Sub */ + /* Add */ + "ldrd r4, r5, [%[r]]\n\t" + "ldrd r6, r7, [sp]\n\t" + "adds r8, r4, r6\n\t" + "mov r3, #0\n\t" + "adcs r9, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "strd r8, r9, [%[r]]\n\t" + /* Sub */ + "subs r10, r4, r6\n\t" + "mov r12, #0\n\t" + "sbcs r11, r5, r7\n\t" + "adc r12, r12, #0\n\t" + "strd r10, r11, [sp, #128]\n\t" + /* Add */ + "ldrd r4, r5, [%[r], #8]\n\t" + "ldrd r6, r7, [sp, #8]\n\t" + "adds r3, r3, #-1\n\t" + "adcs r8, r4, r6\n\t" + "mov r3, #0\n\t" + "adcs r9, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "strd r8, r9, [%[r], #8]\n\t" + /* Sub */ + "adds r12, r12, #-1\n\t" + "sbcs r10, r4, r6\n\t" + "mov r12, #0\n\t" + "sbcs r11, r5, r7\n\t" + "adc r12, r12, #0\n\t" + "strd r10, r11, [sp, #136]\n\t" + /* Add */ + "ldrd r4, r5, [%[r], #16]\n\t" + "ldrd r6, r7, [sp, #16]\n\t" + "adds r3, r3, #-1\n\t" + "adcs r8, r4, r6\n\t" + "mov r3, #0\n\t" + "adcs r9, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "strd r8, r9, [%[r], #16]\n\t" + /* Sub */ + "adds r12, r12, #-1\n\t" + "sbcs r10, r4, r6\n\t" + "mov r12, #0\n\t" + "sbcs r11, r5, r7\n\t" + "adc r12, r12, #0\n\t" + "strd r10, r11, [sp, #144]\n\t" + /* Add */ + "ldrd r4, r5, [%[r], #24]\n\t" + "ldrd r6, r7, [sp, #24]\n\t" + "adds r3, r3, #-1\n\t" + "adcs r8, r4, r6\n\t" + "adc r9, r5, r7\n\t" + /* Sub */ + "adds r12, r12, #-1\n\t" + "sbcs r10, r4, r6\n\t" + "sbc r11, r5, r7\n\t" + "mov r3, #-19\n\t" + "asr %[a], r9, #31\n\t" + /* Mask the modulus */ + "and r3, %[a], r3\n\t" + "and r12, %[a], #0x7fffffff\n\t" + /* Sub modulus (if overflow) */ + "ldrd r4, r5, [%[r]]\n\t" + "subs r4, r4, r3\n\t" + "sbcs r5, r5, %[a]\n\t" + "strd r4, r5, [%[r]]\n\t" + "ldrd r4, r5, [%[r], #8]\n\t" + "sbcs r4, r4, %[a]\n\t" + "sbcs r5, r5, %[a]\n\t" + "strd r4, r5, [%[r], #8]\n\t" + "ldrd r4, r5, [%[r], #16]\n\t" + "sbcs r4, r4, %[a]\n\t" + "sbcs r5, r5, %[a]\n\t" + "strd r4, r5, [%[r], #16]\n\t" + "sbcs r8, r8, %[a]\n\t" + "sbc r9, r9, r12\n\t" + "strd r8, r9, [%[r], #24]\n\t" + "mov r3, #-19\n\t" + "asr %[a], r11, #31\n\t" + /* Mask the modulus */ + "and r3, %[a], r3\n\t" + "and r12, %[a], #0x7fffffff\n\t" + /* Add modulus (if underflow) */ + "ldrd r4, r5, [sp, #128]\n\t" + "adds r4, r4, r3\n\t" + "adcs r5, r5, %[a]\n\t" + "strd r4, r5, [sp, #128]\n\t" + "ldrd r4, r5, [sp, #136]\n\t" + "adcs r4, r4, %[a]\n\t" + "adcs r5, r5, %[a]\n\t" + "strd r4, r5, [sp, #136]\n\t" + "ldrd r4, r5, [sp, #144]\n\t" + "adcs r4, r4, %[a]\n\t" + "adcs r5, r5, %[a]\n\t" + "strd r4, r5, [sp, #144]\n\t" + "adcs r10, r10, %[a]\n\t" + "adc r11, r11, r12\n\t" + "strd r10, r11, [sp, #152]\n\t" + /* Add-Sub */ + /* Add */ + "ldrd r4, r5, [sp, #64]\n\t" + "ldrd r6, r7, [sp, #32]\n\t" + "adds r8, r4, r6\n\t" + "mov r3, #0\n\t" + "adcs r9, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "strd r8, r9, [sp]\n\t" + /* Sub */ + "subs r10, r4, r6\n\t" + "mov r12, #0\n\t" + "sbcs r11, r5, r7\n\t" + "adc r12, r12, #0\n\t" + "strd r10, r11, [sp, #96]\n\t" + /* Add */ + "ldrd r4, r5, [sp, #72]\n\t" + "ldrd r6, r7, [sp, #40]\n\t" + "adds r3, r3, #-1\n\t" + "adcs r8, r4, r6\n\t" + "mov r3, #0\n\t" + "adcs r9, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "strd r8, r9, [sp, #8]\n\t" + /* Sub */ + "adds r12, r12, #-1\n\t" + "sbcs r10, r4, r6\n\t" + "mov r12, #0\n\t" + "sbcs r11, r5, r7\n\t" + "adc r12, r12, #0\n\t" + "strd r10, r11, [sp, #104]\n\t" + /* Add */ + "ldrd r4, r5, [sp, #80]\n\t" + "ldrd r6, r7, [sp, #48]\n\t" + "adds r3, r3, #-1\n\t" + "adcs r8, r4, r6\n\t" + "mov r3, #0\n\t" + "adcs r9, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "strd r8, r9, [sp, #16]\n\t" + /* Sub */ + "adds r12, r12, #-1\n\t" + "sbcs r10, r4, r6\n\t" + "mov r12, #0\n\t" + "sbcs r11, r5, r7\n\t" + "adc r12, r12, #0\n\t" + "strd r10, r11, [sp, #112]\n\t" + /* Add */ + "ldrd r4, r5, [sp, #88]\n\t" + "ldrd r6, r7, [sp, #56]\n\t" + "adds r3, r3, #-1\n\t" + "adcs r8, r4, r6\n\t" + "adc r9, r5, r7\n\t" + /* Sub */ + "adds r12, r12, #-1\n\t" + "sbcs r10, r4, r6\n\t" + "sbc r11, r5, r7\n\t" + "mov r3, #-19\n\t" + "asr %[a], r9, #31\n\t" + /* Mask the modulus */ + "and r3, %[a], r3\n\t" + "and r12, %[a], #0x7fffffff\n\t" + /* Sub modulus (if overflow) */ + "ldrd r4, r5, [sp]\n\t" + "subs r4, r4, r3\n\t" + "sbcs r5, r5, %[a]\n\t" + "strd r4, r5, [sp]\n\t" + "ldrd r4, r5, [sp, #8]\n\t" + "sbcs r4, r4, %[a]\n\t" + "sbcs r5, r5, %[a]\n\t" + "strd r4, r5, [sp, #8]\n\t" + "ldrd r4, r5, [sp, #16]\n\t" + "sbcs r4, r4, %[a]\n\t" + "sbcs r5, r5, %[a]\n\t" + "strd r4, r5, [sp, #16]\n\t" + "sbcs r8, r8, %[a]\n\t" + "sbc r9, r9, r12\n\t" + "strd r8, r9, [sp, #24]\n\t" + "mov r3, #-19\n\t" + "asr %[a], r11, #31\n\t" + /* Mask the modulus */ + "and r3, %[a], r3\n\t" + "and r12, %[a], #0x7fffffff\n\t" + /* Add modulus (if underflow) */ + "ldrd r4, r5, [sp, #96]\n\t" + "adds r4, r4, r3\n\t" + "adcs r5, r5, %[a]\n\t" + "strd r4, r5, [sp, #96]\n\t" + "ldrd r4, r5, [sp, #104]\n\t" + "adcs r4, r4, %[a]\n\t" + "adcs r5, r5, %[a]\n\t" + "strd r4, r5, [sp, #104]\n\t" + "ldrd r4, r5, [sp, #112]\n\t" + "adcs r4, r4, %[a]\n\t" + "adcs r5, r5, %[a]\n\t" + "strd r4, r5, [sp, #112]\n\t" + "adcs r10, r10, %[a]\n\t" + "adc r11, r11, r12\n\t" + "strd r10, r11, [sp, #120]\n\t" + "ldr r2, [sp, #160]\n\t" + "add r1, sp, #0x60\n\t" + "add r0, sp, #32\n\t" + "bl fe_mul\n\t" + "add r2, sp, #0x80\n\t" + "add r1, sp, #0\n\t" + "add r0, sp, #0\n\t" + "bl fe_mul\n\t" + "add r1, sp, #0x80\n\t" + "add r0, sp, #0x60\n\t" + "bl fe_sq\n\t" + "ldr r1, [sp, #160]\n\t" + "add r0, sp, #0x80\n\t" + "bl fe_sq\n\t" + /* Add-Sub */ + /* Add */ + "ldrd r4, r5, [sp, #32]\n\t" + "ldrd r6, r7, [sp]\n\t" + "adds r8, r4, r6\n\t" + "mov r3, #0\n\t" + "adcs r9, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "strd r8, r9, [sp, #64]\n\t" + /* Sub */ + "subs r10, r4, r6\n\t" + "mov r12, #0\n\t" + "sbcs r11, r5, r7\n\t" + "adc r12, r12, #0\n\t" + "strd r10, r11, [sp]\n\t" + /* Add */ + "ldrd r4, r5, [sp, #40]\n\t" + "ldrd r6, r7, [sp, #8]\n\t" + "adds r3, r3, #-1\n\t" + "adcs r8, r4, r6\n\t" + "mov r3, #0\n\t" + "adcs r9, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "strd r8, r9, [sp, #72]\n\t" + /* Sub */ + "adds r12, r12, #-1\n\t" + "sbcs r10, r4, r6\n\t" + "mov r12, #0\n\t" + "sbcs r11, r5, r7\n\t" + "adc r12, r12, #0\n\t" + "strd r10, r11, [sp, #8]\n\t" + /* Add */ + "ldrd r4, r5, [sp, #48]\n\t" + "ldrd r6, r7, [sp, #16]\n\t" + "adds r3, r3, #-1\n\t" + "adcs r8, r4, r6\n\t" + "mov r3, #0\n\t" + "adcs r9, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "strd r8, r9, [sp, #80]\n\t" + /* Sub */ + "adds r12, r12, #-1\n\t" + "sbcs r10, r4, r6\n\t" + "mov r12, #0\n\t" + "sbcs r11, r5, r7\n\t" + "adc r12, r12, #0\n\t" + "strd r10, r11, [sp, #16]\n\t" + /* Add */ + "ldrd r4, r5, [sp, #56]\n\t" + "ldrd r6, r7, [sp, #24]\n\t" + "adds r3, r3, #-1\n\t" + "adcs r8, r4, r6\n\t" + "adc r9, r5, r7\n\t" + /* Sub */ + "adds r12, r12, #-1\n\t" + "sbcs r10, r4, r6\n\t" + "sbc r11, r5, r7\n\t" + "mov r3, #-19\n\t" + "asr %[a], r9, #31\n\t" + /* Mask the modulus */ + "and r3, %[a], r3\n\t" + "and r12, %[a], #0x7fffffff\n\t" + /* Sub modulus (if overflow) */ + "ldrd r4, r5, [sp, #64]\n\t" + "subs r4, r4, r3\n\t" + "sbcs r5, r5, %[a]\n\t" + "strd r4, r5, [sp, #64]\n\t" + "ldrd r4, r5, [sp, #72]\n\t" + "sbcs r4, r4, %[a]\n\t" + "sbcs r5, r5, %[a]\n\t" + "strd r4, r5, [sp, #72]\n\t" + "ldrd r4, r5, [sp, #80]\n\t" + "sbcs r4, r4, %[a]\n\t" + "sbcs r5, r5, %[a]\n\t" + "strd r4, r5, [sp, #80]\n\t" + "sbcs r8, r8, %[a]\n\t" + "sbc r9, r9, r12\n\t" + "strd r8, r9, [sp, #88]\n\t" + "mov r3, #-19\n\t" + "asr %[a], r11, #31\n\t" + /* Mask the modulus */ + "and r3, %[a], r3\n\t" + "and r12, %[a], #0x7fffffff\n\t" + /* Add modulus (if underflow) */ + "ldrd r4, r5, [sp]\n\t" + "adds r4, r4, r3\n\t" + "adcs r5, r5, %[a]\n\t" + "strd r4, r5, [sp]\n\t" + "ldrd r4, r5, [sp, #8]\n\t" + "adcs r4, r4, %[a]\n\t" + "adcs r5, r5, %[a]\n\t" + "strd r4, r5, [sp, #8]\n\t" + "ldrd r4, r5, [sp, #16]\n\t" + "adcs r4, r4, %[a]\n\t" + "adcs r5, r5, %[a]\n\t" + "strd r4, r5, [sp, #16]\n\t" + "adcs r10, r10, %[a]\n\t" + "adc r11, r11, r12\n\t" + "strd r10, r11, [sp, #24]\n\t" + "add r2, sp, #0x60\n\t" + "add r1, sp, #0x80\n\t" + "ldr r0, [sp, #160]\n\t" + "bl fe_mul\n\t" + /* Sub */ + "ldrd r4, r5, [sp, #128]\n\t" + "ldrd r6, r7, [sp, #136]\n\t" + "ldrd r8, r9, [sp, #96]\n\t" + "ldrd r10, r11, [sp, #104]\n\t" + "subs r8, r4, r8\n\t" + "sbcs r9, r5, r9\n\t" + "sbcs r10, r6, r10\n\t" + "sbcs r11, r7, r11\n\t" + "strd r8, r9, [sp, #128]\n\t" + "strd r10, r11, [sp, #136]\n\t" + "ldrd r4, r5, [sp, #144]\n\t" + "ldrd r6, r7, [sp, #152]\n\t" + "ldrd r8, r9, [sp, #112]\n\t" + "ldrd r10, r11, [sp, #120]\n\t" + "sbcs r8, r4, r8\n\t" + "sbcs r9, r5, r9\n\t" + "sbcs r10, r6, r10\n\t" + "sbc r11, r7, r11\n\t" + "mov r3, #-19\n\t" + "asr %[a], r11, #31\n\t" + /* Mask the modulus */ + "and r3, %[a], r3\n\t" + "and r12, %[a], #0x7fffffff\n\t" + /* Add modulus (if underflow) */ + "ldrd r4, r5, [sp, #128]\n\t" + "ldrd r6, r7, [sp, #136]\n\t" + "adds r4, r4, r3\n\t" + "adcs r5, r5, %[a]\n\t" + "adcs r6, r6, %[a]\n\t" + "adcs r7, r7, %[a]\n\t" + "adcs r8, r8, %[a]\n\t" + "adcs r9, r9, %[a]\n\t" + "adcs r10, r10, %[a]\n\t" + "adc r11, r11, r12\n\t" + "strd r4, r5, [sp, #128]\n\t" + "strd r6, r7, [sp, #136]\n\t" + "strd r8, r9, [sp, #144]\n\t" + "strd r10, r11, [sp, #152]\n\t" + "add r1, sp, #0\n\t" + "add r0, sp, #0\n\t" + "bl fe_sq\n\t" + /* Multiply by 121666 */ + "ldrd r4, r5, [sp, #128]\n\t" + "ldrd r6, r7, [sp, #136]\n\t" + "ldrd r8, r9, [sp, #144]\n\t" + "ldrd r10, r11, [sp, #152]\n\t" + "movw r12, #0xdb42\n\t" + "movt r12, #1\n\t" + "umull r4, %[a], r4, r12\n\t" + "umull r5, r3, r5, r12\n\t" + "adds r5, r5, %[a]\n\t" + "adc %[a], r3, #0\n\t" + "umull r6, r3, r6, r12\n\t" + "adds r6, r6, %[a]\n\t" + "adc %[a], r3, #0\n\t" + "umull r7, r3, r7, r12\n\t" + "adds r7, r7, %[a]\n\t" + "adc %[a], r3, #0\n\t" + "umull r8, r3, r8, r12\n\t" + "adds r8, r8, %[a]\n\t" + "adc %[a], r3, #0\n\t" + "umull r9, r3, r9, r12\n\t" + "adds r9, r9, %[a]\n\t" + "adc %[a], r3, #0\n\t" + "umull r10, r3, r10, r12\n\t" + "adds r10, r10, %[a]\n\t" + "adc %[a], r3, #0\n\t" + "umull r11, r3, r11, r12\n\t" + "adds r11, r11, %[a]\n\t" + "adc %[a], r3, #0\n\t" + "mov r12, #19\n\t" + "lsl %[a], %[a], #1\n\t" + "orr %[a], %[a], r11, lsr #31\n\t" + "mul %[a], %[a], r12\n\t" + "and r11, r11, #0x7fffffff\n\t" + "adds r4, r4, %[a]\n\t" + "adcs r5, r5, #0\n\t" + "adcs r6, r6, #0\n\t" + "adcs r7, r7, #0\n\t" + "adcs r8, r8, #0\n\t" + "adcs r9, r9, #0\n\t" + "adcs r10, r10, #0\n\t" + "adc r11, r11, #0\n\t" + "strd r4, r5, [sp, #32]\n\t" + "strd r6, r7, [sp, #40]\n\t" + "strd r8, r9, [sp, #48]\n\t" + "strd r10, r11, [sp, #56]\n\t" + "add r1, sp, #0x40\n\t" + "add r0, sp, #0x40\n\t" + "bl fe_sq\n\t" + /* Add */ + "ldrd r4, r5, [sp, #96]\n\t" + "ldrd r6, r7, [sp, #104]\n\t" + "ldrd r8, r9, [sp, #32]\n\t" + "ldrd r10, r11, [sp, #40]\n\t" + "adds r8, r4, r8\n\t" + "adcs r9, r5, r9\n\t" + "adcs r10, r6, r10\n\t" + "adcs r11, r7, r11\n\t" + "strd r8, r9, [sp, #96]\n\t" + "strd r10, r11, [sp, #104]\n\t" + "ldrd r4, r5, [sp, #112]\n\t" + "ldrd r6, r7, [sp, #120]\n\t" + "ldrd r8, r9, [sp, #48]\n\t" + "ldrd r10, r11, [sp, #56]\n\t" + "adcs r8, r4, r8\n\t" + "adcs r9, r5, r9\n\t" + "adcs r10, r6, r10\n\t" + "adc r11, r7, r11\n\t" + "mov r3, #-19\n\t" + "asr %[a], r11, #31\n\t" + /* Mask the modulus */ + "and r3, %[a], r3\n\t" + "and r12, %[a], #0x7fffffff\n\t" + /* Sub modulus (if overflow) */ + "ldrd r4, r5, [sp, #96]\n\t" + "ldrd r6, r7, [sp, #104]\n\t" + "subs r4, r4, r3\n\t" + "sbcs r5, r5, %[a]\n\t" + "sbcs r6, r6, %[a]\n\t" + "sbcs r7, r7, %[a]\n\t" + "sbcs r8, r8, %[a]\n\t" + "sbcs r9, r9, %[a]\n\t" + "sbcs r10, r10, %[a]\n\t" + "sbc r11, r11, r12\n\t" + "strd r4, r5, [sp, #96]\n\t" + "strd r6, r7, [sp, #104]\n\t" + "strd r8, r9, [sp, #112]\n\t" + "strd r10, r11, [sp, #120]\n\t" + "add r2, sp, #0\n\t" + "ldr r1, [sp, #168]\n\t" + "add r0, sp, #32\n\t" + "bl fe_mul\n\t" + "add r2, sp, #0x60\n\t" + "add r1, sp, #0x80\n\t" + "add r0, sp, #0\n\t" + "bl fe_mul\n\t" + "ldr %[a], [sp, #176]\n\t" + "ldr %[n], [sp, #180]\n\t" + "subs %[n], %[n], #1\n\t" + "str %[n], [sp, #180]\n\t" + "bge L_curve25519_bits_%=\n\t" + "mov %[n], #31\n\t" + "str %[n], [sp, #180]\n\t" + "subs %[a], %[a], #4\n\t" + "str %[a], [sp, #176]\n\t" + "bge L_curve25519_words_%=\n\t" + /* Invert */ + "add r0, sp, #32\n\t" + "add r1, sp, #0\n\t" + "bl fe_sq\n\t" + "add r0, sp, #0x40\n\t" + "add r1, sp, #32\n\t" + "bl fe_sq\n\t" + "add r0, sp, #0x40\n\t" + "add r1, sp, #0x40\n\t" + "bl fe_sq\n\t" + "add r0, sp, #0x40\n\t" + "add r1, sp, #0\n\t" + "add r2, sp, #0x40\n\t" + "bl fe_mul\n\t" + "add r0, sp, #32\n\t" + "add r1, sp, #32\n\t" + "add r2, sp, #0x40\n\t" + "bl fe_mul\n\t" + "add r0, sp, #0x60\n\t" + "add r1, sp, #32\n\t" + "bl fe_sq\n\t" + "add r0, sp, #0x40\n\t" + "add r1, sp, #0x40\n\t" + "add r2, sp, #0x60\n\t" + "bl fe_mul\n\t" + "add r0, sp, #0x60\n\t" + "add r1, sp, #0x40\n\t" + "bl fe_sq\n\t" + "mov r4, #4\n\t" + "\n" + "L_curve25519_inv_1_%=: \n\t" + "add r0, sp, #0x60\n\t" + "add r1, sp, #0x60\n\t" + "bl fe_sq\n\t" + "sub r4, r4, #1\n\t" + "cmp r4, #0\n\t" + "bne L_curve25519_inv_1_%=\n\t" + "add r0, sp, #0x40\n\t" + "add r1, sp, #0x60\n\t" + "add r2, sp, #0x40\n\t" + "bl fe_mul\n\t" + "add r0, sp, #0x60\n\t" + "add r1, sp, #0x40\n\t" + "bl fe_sq\n\t" + "mov r4, #9\n\t" + "\n" + "L_curve25519_inv_2_%=: \n\t" + "add r0, sp, #0x60\n\t" + "add r1, sp, #0x60\n\t" + "bl fe_sq\n\t" + "sub r4, r4, #1\n\t" + "cmp r4, #0\n\t" + "bne L_curve25519_inv_2_%=\n\t" + "add r0, sp, #0x60\n\t" + "add r1, sp, #0x60\n\t" + "add r2, sp, #0x40\n\t" + "bl fe_mul\n\t" + "add r0, sp, #0x80\n\t" + "add r1, sp, #0x60\n\t" + "bl fe_sq\n\t" + "mov r4, #19\n\t" + "\n" + "L_curve25519_inv_3_%=: \n\t" + "add r0, sp, #0x80\n\t" + "add r1, sp, #0x80\n\t" + "bl fe_sq\n\t" + "sub r4, r4, #1\n\t" + "cmp r4, #0\n\t" + "bne L_curve25519_inv_3_%=\n\t" + "add r0, sp, #0x60\n\t" + "add r1, sp, #0x80\n\t" + "add r2, sp, #0x60\n\t" + "bl fe_mul\n\t" + "mov r4, #10\n\t" + "\n" + "L_curve25519_inv_4_%=: \n\t" + "add r0, sp, #0x60\n\t" + "add r1, sp, #0x60\n\t" + "bl fe_sq\n\t" + "sub r4, r4, #1\n\t" + "cmp r4, #0\n\t" + "bne L_curve25519_inv_4_%=\n\t" + "add r0, sp, #0x40\n\t" + "add r1, sp, #0x60\n\t" + "add r2, sp, #0x40\n\t" + "bl fe_mul\n\t" + "add r0, sp, #0x60\n\t" + "add r1, sp, #0x40\n\t" + "bl fe_sq\n\t" + "mov r4, #49\n\t" + "\n" + "L_curve25519_inv_5_%=: \n\t" + "add r0, sp, #0x60\n\t" + "add r1, sp, #0x60\n\t" + "bl fe_sq\n\t" + "sub r4, r4, #1\n\t" + "cmp r4, #0\n\t" + "bne L_curve25519_inv_5_%=\n\t" + "add r0, sp, #0x60\n\t" + "add r1, sp, #0x60\n\t" + "add r2, sp, #0x40\n\t" + "bl fe_mul\n\t" + "add r0, sp, #0x80\n\t" + "add r1, sp, #0x60\n\t" + "bl fe_sq\n\t" + "mov r4, #0x63\n\t" + "\n" + "L_curve25519_inv_6_%=: \n\t" + "add r0, sp, #0x80\n\t" + "add r1, sp, #0x80\n\t" + "bl fe_sq\n\t" + "sub r4, r4, #1\n\t" + "cmp r4, #0\n\t" + "bne L_curve25519_inv_6_%=\n\t" + "add r0, sp, #0x60\n\t" + "add r1, sp, #0x80\n\t" + "add r2, sp, #0x60\n\t" + "bl fe_mul\n\t" + "mov r4, #50\n\t" + "\n" + "L_curve25519_inv_7_%=: \n\t" + "add r0, sp, #0x60\n\t" + "add r1, sp, #0x60\n\t" + "bl fe_sq\n\t" + "sub r4, r4, #1\n\t" + "cmp r4, #0\n\t" + "bne L_curve25519_inv_7_%=\n\t" + "add r0, sp, #0x40\n\t" + "add r1, sp, #0x60\n\t" + "add r2, sp, #0x40\n\t" + "bl fe_mul\n\t" + "mov r4, #5\n\t" + "\n" + "L_curve25519_inv_8_%=: \n\t" + "add r0, sp, #0x40\n\t" + "add r1, sp, #0x40\n\t" + "bl fe_sq\n\t" + "sub r4, r4, #1\n\t" + "cmp r4, #0\n\t" + "bne L_curve25519_inv_8_%=\n\t" + "add r0, sp, #0\n\t" + "add r1, sp, #0x40\n\t" + "add r2, sp, #32\n\t" + "bl fe_mul\n\t" + "add r2, sp, #0\n\t" + "ldr r1, [sp, #160]\n\t" + "ldr r0, [sp, #160]\n\t" + "bl fe_mul\n\t" + "mov r0, #0\n\t" + "add sp, sp, #0xbc\n\t" + : [r] "+r" (r), [n] "+r" (n), [a] "+r" (a) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + ); + return (uint32_t)(size_t)r; +} + +void fe_pow22523(fe r, const fe a) +{ + __asm__ __volatile__ ( + "sub sp, sp, #0x68\n\t" + /* pow22523 */ + "str %[r], [sp, #96]\n\t" + "str %[a], [sp, #100]\n\t" + "mov r0, sp\n\t" + "ldr r1, [sp, #100]\n\t" + "bl fe_sq\n\t" + "add r0, sp, #32\n\t" + "mov r1, sp\n\t" + "bl fe_sq\n\t" + "add r0, sp, #32\n\t" + "add r1, sp, #32\n\t" + "bl fe_sq\n\t" + "add r0, sp, #32\n\t" + "ldr r1, [sp, #100]\n\t" + "add r2, sp, #32\n\t" + "bl fe_mul\n\t" + "mov r0, sp\n\t" + "mov r1, sp\n\t" + "add r2, sp, #32\n\t" + "bl fe_mul\n\t" + "mov r0, sp\n\t" + "mov r1, sp\n\t" + "bl fe_sq\n\t" + "mov r0, sp\n\t" + "add r1, sp, #32\n\t" + "mov r2, sp\n\t" + "bl fe_mul\n\t" + "add r0, sp, #32\n\t" + "mov r1, sp\n\t" + "bl fe_sq\n\t" + "mov r4, #4\n\t" + "\n" + "L_fe_pow22523_1_%=: \n\t" + "add r0, sp, #32\n\t" + "add r1, sp, #32\n\t" + "bl fe_sq\n\t" + "sub r4, r4, #1\n\t" + "cmp r4, #0\n\t" + "bne L_fe_pow22523_1_%=\n\t" + "mov r0, sp\n\t" + "add r1, sp, #32\n\t" + "mov r2, sp\n\t" + "bl fe_mul\n\t" + "add r0, sp, #32\n\t" + "mov r1, sp\n\t" + "bl fe_sq\n\t" + "mov r4, #9\n\t" + "\n" + "L_fe_pow22523_2_%=: \n\t" + "add r0, sp, #32\n\t" + "add r1, sp, #32\n\t" + "bl fe_sq\n\t" + "sub r4, r4, #1\n\t" + "cmp r4, #0\n\t" + "bne L_fe_pow22523_2_%=\n\t" + "add r0, sp, #32\n\t" + "add r1, sp, #32\n\t" + "mov r2, sp\n\t" + "bl fe_mul\n\t" + "add r0, sp, #0x40\n\t" + "add r1, sp, #32\n\t" + "bl fe_sq\n\t" + "mov r4, #19\n\t" + "\n" + "L_fe_pow22523_3_%=: \n\t" + "add r0, sp, #0x40\n\t" + "add r1, sp, #0x40\n\t" + "bl fe_sq\n\t" + "sub r4, r4, #1\n\t" + "cmp r4, #0\n\t" + "bne L_fe_pow22523_3_%=\n\t" + "add r0, sp, #32\n\t" + "add r1, sp, #0x40\n\t" + "add r2, sp, #32\n\t" + "bl fe_mul\n\t" + "mov r4, #10\n\t" + "\n" + "L_fe_pow22523_4_%=: \n\t" + "add r0, sp, #32\n\t" + "add r1, sp, #32\n\t" + "bl fe_sq\n\t" + "sub r4, r4, #1\n\t" + "cmp r4, #0\n\t" + "bne L_fe_pow22523_4_%=\n\t" + "mov r0, sp\n\t" + "add r1, sp, #32\n\t" + "mov r2, sp\n\t" + "bl fe_mul\n\t" + "add r0, sp, #32\n\t" + "mov r1, sp\n\t" + "bl fe_sq\n\t" + "mov r4, #49\n\t" + "\n" + "L_fe_pow22523_5_%=: \n\t" + "add r0, sp, #32\n\t" + "add r1, sp, #32\n\t" + "bl fe_sq\n\t" + "sub r4, r4, #1\n\t" + "cmp r4, #0\n\t" + "bne L_fe_pow22523_5_%=\n\t" + "add r0, sp, #32\n\t" + "add r1, sp, #32\n\t" + "mov r2, sp\n\t" + "bl fe_mul\n\t" + "add r0, sp, #0x40\n\t" + "add r1, sp, #32\n\t" + "bl fe_sq\n\t" + "mov r4, #0x63\n\t" + "\n" + "L_fe_pow22523_6_%=: \n\t" + "add r0, sp, #0x40\n\t" + "add r1, sp, #0x40\n\t" + "bl fe_sq\n\t" + "sub r4, r4, #1\n\t" + "cmp r4, #0\n\t" + "bne L_fe_pow22523_6_%=\n\t" + "add r0, sp, #32\n\t" + "add r1, sp, #0x40\n\t" + "add r2, sp, #32\n\t" + "bl fe_mul\n\t" + "mov r4, #50\n\t" + "\n" + "L_fe_pow22523_7_%=: \n\t" + "add r0, sp, #32\n\t" + "add r1, sp, #32\n\t" + "bl fe_sq\n\t" + "sub r4, r4, #1\n\t" + "cmp r4, #0\n\t" + "bne L_fe_pow22523_7_%=\n\t" + "mov r0, sp\n\t" + "add r1, sp, #32\n\t" + "mov r2, sp\n\t" + "bl fe_mul\n\t" + "mov r4, #2\n\t" + "\n" + "L_fe_pow22523_8_%=: \n\t" + "mov r0, sp\n\t" + "mov r1, sp\n\t" + "bl fe_sq\n\t" + "sub r4, r4, #1\n\t" + "cmp r4, #0\n\t" + "bne L_fe_pow22523_8_%=\n\t" + "ldr r0, [sp, #96]\n\t" + "mov r1, sp\n\t" + "ldr r2, [sp, #100]\n\t" + "bl fe_mul\n\t" + "ldr %[a], [sp, #100]\n\t" + "ldr %[r], [sp, #96]\n\t" + "add sp, sp, #0x68\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "lr", "r4" + ); +} + +void fe_ge_to_p2(fe rx, fe ry, fe rz, const fe px, const fe py, const fe pz, const fe pt) +{ + __asm__ __volatile__ ( + "sub sp, sp, #16\n\t" + "str %[rx], [sp]\n\t" + "str %[ry], [sp, #4]\n\t" + "str %[rz], [sp, #8]\n\t" + "str %[px], [sp, #12]\n\t" + "ldr r2, [sp, #32]\n\t" + "ldr r1, [sp, #12]\n\t" + "ldr r0, [sp]\n\t" + "bl fe_mul\n\t" + "ldr r2, [sp, #28]\n\t" + "ldr r1, [sp, #24]\n\t" + "ldr r0, [sp, #4]\n\t" + "bl fe_mul\n\t" + "ldr r2, [sp, #32]\n\t" + "ldr r1, [sp, #28]\n\t" + "ldr r0, [sp, #8]\n\t" + "bl fe_mul\n\t" + "add sp, sp, #16\n\t" + : [rx] "+r" (rx), [ry] "+r" (ry), [rz] "+r" (rz), [px] "+r" (px), [py] "+r" (py), [pz] "+r" (pz), [pt] "+r" (pt) + : + : "memory", "lr" + ); +} + +void fe_ge_to_p3(fe rx, fe ry, fe rz, fe rt, const fe px, const fe py, const fe pz, const fe pt) +{ + __asm__ __volatile__ ( + "sub sp, sp, #16\n\t" + "str %[rx], [sp]\n\t" + "str %[ry], [sp, #4]\n\t" + "str %[rz], [sp, #8]\n\t" + "str %[rt], [sp, #12]\n\t" + "ldr r2, [sp, #36]\n\t" + "ldr r1, [sp, #24]\n\t" + "ldr r0, [sp]\n\t" + "bl fe_mul\n\t" + "ldr r2, [sp, #32]\n\t" + "ldr r1, [sp, #28]\n\t" + "ldr r0, [sp, #4]\n\t" + "bl fe_mul\n\t" + "ldr r2, [sp, #36]\n\t" + "ldr r1, [sp, #32]\n\t" + "ldr r0, [sp, #8]\n\t" + "bl fe_mul\n\t" + "ldr r2, [sp, #28]\n\t" + "ldr r1, [sp, #24]\n\t" + "ldr r0, [sp, #12]\n\t" + "bl fe_mul\n\t" + "add sp, sp, #16\n\t" + : [rx] "+r" (rx), [ry] "+r" (ry), [rz] "+r" (rz), [rt] "+r" (rt), [px] "+r" (px), [py] "+r" (py), [pz] "+r" (pz), [pt] "+r" (pt) + : + : "memory", "lr" + ); +} + +void fe_ge_dbl(fe rx, fe ry, fe rz, fe rt, const fe px, const fe py, const fe pz) +{ + __asm__ __volatile__ ( + "sub sp, sp, #16\n\t" + "str %[rx], [sp]\n\t" + "str %[ry], [sp, #4]\n\t" + "str %[rz], [sp, #8]\n\t" + "str %[rt], [sp, #12]\n\t" + "ldr r1, [sp, #88]\n\t" + "ldr r0, [sp]\n\t" + "bl fe_sq\n\t" + "ldr r1, [sp, #92]\n\t" + "ldr r0, [sp, #8]\n\t" + "bl fe_sq\n\t" + "ldr r0, [sp, #4]\n\t" + "ldr r1, [sp, #88]\n\t" + "ldr r2, [sp, #92]\n\t" + /* Add */ + "ldrd %[rt], r4, [r1]\n\t" + "ldrd r5, r6, [r1, #8]\n\t" + "ldrd r7, r8, [r2]\n\t" + "ldrd r9, r10, [r2, #8]\n\t" + "adds r7, %[rt], r7\n\t" + "adcs r8, r4, r8\n\t" + "adcs r9, r5, r9\n\t" + "adcs r10, r6, r10\n\t" + "strd r7, r8, [r0]\n\t" + "strd r9, r10, [r0, #8]\n\t" + "ldrd %[rt], r4, [r1, #16]\n\t" + "ldrd r5, r6, [r1, #24]\n\t" + "ldrd r7, r8, [r2, #16]\n\t" + "ldrd r9, r10, [r2, #24]\n\t" + "adcs r7, %[rt], r7\n\t" + "adcs r8, r4, r8\n\t" + "adcs r9, r5, r9\n\t" + "adc r10, r6, r10\n\t" + "mov r12, #-19\n\t" + "asr r11, r10, #31\n\t" + /* Mask the modulus */ + "and r12, r11, r12\n\t" + "and lr, r11, #0x7fffffff\n\t" + /* Sub modulus (if overflow) */ + "ldrd %[rt], r4, [r0]\n\t" + "ldrd r5, r6, [r0, #8]\n\t" + "subs %[rt], %[rt], r12\n\t" + "sbcs r4, r4, r11\n\t" + "sbcs r5, r5, r11\n\t" + "sbcs r6, r6, r11\n\t" + "sbcs r7, r7, r11\n\t" + "sbcs r8, r8, r11\n\t" + "sbcs r9, r9, r11\n\t" + "sbc r10, r10, lr\n\t" + "strd %[rt], r4, [r0]\n\t" + "strd r5, r6, [r0, #8]\n\t" + "strd r7, r8, [r0, #16]\n\t" + "strd r9, r10, [r0, #24]\n\t" + "ldr r1, [sp, #4]\n\t" + "ldr r0, [sp, #12]\n\t" + "bl fe_sq\n\t" + "ldr r0, [sp, #4]\n\t" + "ldr r1, [sp, #8]\n\t" + "ldr r2, [sp]\n\t" + /* Add-Sub */ + /* Add */ + "ldrd %[rt], r4, [r1]\n\t" + "ldrd r5, r6, [r2]\n\t" + "adds r7, %[rt], r5\n\t" + "mov r12, #0\n\t" + "adcs r8, r4, r6\n\t" + "adc r12, r12, #0\n\t" + "strd r7, r8, [r0]\n\t" + /* Sub */ + "subs r9, %[rt], r5\n\t" + "mov lr, #0\n\t" + "sbcs r10, r4, r6\n\t" + "adc lr, lr, #0\n\t" + "strd r9, r10, [r1]\n\t" + /* Add */ + "ldrd %[rt], r4, [r1, #8]\n\t" + "ldrd r5, r6, [r2, #8]\n\t" + "adds r12, r12, #-1\n\t" + "adcs r7, %[rt], r5\n\t" + "mov r12, #0\n\t" + "adcs r8, r4, r6\n\t" + "adc r12, r12, #0\n\t" + "strd r7, r8, [r0, #8]\n\t" + /* Sub */ + "adds lr, lr, #-1\n\t" + "sbcs r9, %[rt], r5\n\t" + "mov lr, #0\n\t" + "sbcs r10, r4, r6\n\t" + "adc lr, lr, #0\n\t" + "strd r9, r10, [r1, #8]\n\t" + /* Add */ + "ldrd %[rt], r4, [r1, #16]\n\t" + "ldrd r5, r6, [r2, #16]\n\t" + "adds r12, r12, #-1\n\t" + "adcs r7, %[rt], r5\n\t" + "mov r12, #0\n\t" + "adcs r8, r4, r6\n\t" + "adc r12, r12, #0\n\t" + "strd r7, r8, [r0, #16]\n\t" + /* Sub */ + "adds lr, lr, #-1\n\t" + "sbcs r9, %[rt], r5\n\t" + "mov lr, #0\n\t" + "sbcs r10, r4, r6\n\t" + "adc lr, lr, #0\n\t" + "strd r9, r10, [r1, #16]\n\t" + /* Add */ + "ldrd %[rt], r4, [r1, #24]\n\t" + "ldrd r5, r6, [r2, #24]\n\t" + "adds r12, r12, #-1\n\t" + "adcs r7, %[rt], r5\n\t" + "adc r8, r4, r6\n\t" + /* Sub */ + "adds lr, lr, #-1\n\t" + "sbcs r9, %[rt], r5\n\t" + "sbc r10, r4, r6\n\t" + "mov r12, #-19\n\t" + "asr r11, r8, #31\n\t" + /* Mask the modulus */ + "and r12, r11, r12\n\t" + "and lr, r11, #0x7fffffff\n\t" + /* Sub modulus (if overflow) */ + "ldrd %[rt], r4, [r0]\n\t" + "subs %[rt], %[rt], r12\n\t" + "sbcs r4, r4, r11\n\t" + "strd %[rt], r4, [r0]\n\t" + "ldrd %[rt], r4, [r0, #8]\n\t" + "sbcs %[rt], %[rt], r11\n\t" + "sbcs r4, r4, r11\n\t" + "strd %[rt], r4, [r0, #8]\n\t" + "ldrd %[rt], r4, [r0, #16]\n\t" + "sbcs %[rt], %[rt], r11\n\t" + "sbcs r4, r4, r11\n\t" + "strd %[rt], r4, [r0, #16]\n\t" + "sbcs r7, r7, r11\n\t" + "sbc r8, r8, lr\n\t" + "strd r7, r8, [r0, #24]\n\t" + "mov r12, #-19\n\t" + "asr r11, r10, #31\n\t" + /* Mask the modulus */ + "and r12, r11, r12\n\t" + "and lr, r11, #0x7fffffff\n\t" + /* Add modulus (if underflow) */ + "ldrd %[rt], r4, [r1]\n\t" + "adds %[rt], %[rt], r12\n\t" + "adcs r4, r4, r11\n\t" + "strd %[rt], r4, [r1]\n\t" + "ldrd %[rt], r4, [r1, #8]\n\t" + "adcs %[rt], %[rt], r11\n\t" + "adcs r4, r4, r11\n\t" + "strd %[rt], r4, [r1, #8]\n\t" + "ldrd %[rt], r4, [r1, #16]\n\t" + "adcs %[rt], %[rt], r11\n\t" + "adcs r4, r4, r11\n\t" + "strd %[rt], r4, [r1, #16]\n\t" + "adcs r9, r9, r11\n\t" + "adc r10, r10, lr\n\t" + "strd r9, r10, [r1, #24]\n\t" + "ldr r0, [sp]\n\t" + "ldr r1, [sp, #12]\n\t" + "ldr r2, [sp, #4]\n\t" + /* Sub */ + "ldrd %[rt], r4, [r1]\n\t" + "ldrd r5, r6, [r1, #8]\n\t" + "ldrd r7, r8, [r2]\n\t" + "ldrd r9, r10, [r2, #8]\n\t" + "subs r7, %[rt], r7\n\t" + "sbcs r8, r4, r8\n\t" + "sbcs r9, r5, r9\n\t" + "sbcs r10, r6, r10\n\t" + "strd r7, r8, [r0]\n\t" + "strd r9, r10, [r0, #8]\n\t" + "ldrd %[rt], r4, [r1, #16]\n\t" + "ldrd r5, r6, [r1, #24]\n\t" + "ldrd r7, r8, [r2, #16]\n\t" + "ldrd r9, r10, [r2, #24]\n\t" + "sbcs r7, %[rt], r7\n\t" + "sbcs r8, r4, r8\n\t" + "sbcs r9, r5, r9\n\t" + "sbc r10, r6, r10\n\t" + "mov r12, #-19\n\t" + "asr r11, r10, #31\n\t" + /* Mask the modulus */ + "and r12, r11, r12\n\t" + "and lr, r11, #0x7fffffff\n\t" + /* Add modulus (if underflow) */ + "ldrd %[rt], r4, [r0]\n\t" + "ldrd r5, r6, [r0, #8]\n\t" + "adds %[rt], %[rt], r12\n\t" + "adcs r4, r4, r11\n\t" + "adcs r5, r5, r11\n\t" + "adcs r6, r6, r11\n\t" + "adcs r7, r7, r11\n\t" + "adcs r8, r8, r11\n\t" + "adcs r9, r9, r11\n\t" + "adc r10, r10, lr\n\t" + "strd %[rt], r4, [r0]\n\t" + "strd r5, r6, [r0, #8]\n\t" + "strd r7, r8, [r0, #16]\n\t" + "strd r9, r10, [r0, #24]\n\t" + "ldr r1, [sp, #96]\n\t" + "ldr r0, [sp, #12]\n\t" + "bl fe_sq2\n\t" + "ldr r0, [sp, #12]\n\t" + "ldr r1, [sp, #8]\n\t" + /* Sub */ + "ldrd %[rt], r4, [r0]\n\t" + "ldrd r5, r6, [r0, #8]\n\t" + "ldrd r7, r8, [r1]\n\t" + "ldrd r9, r10, [r1, #8]\n\t" + "subs r7, %[rt], r7\n\t" + "sbcs r8, r4, r8\n\t" + "sbcs r9, r5, r9\n\t" + "sbcs r10, r6, r10\n\t" + "strd r7, r8, [r0]\n\t" + "strd r9, r10, [r0, #8]\n\t" + "ldrd %[rt], r4, [r0, #16]\n\t" + "ldrd r5, r6, [r0, #24]\n\t" + "ldrd r7, r8, [r1, #16]\n\t" + "ldrd r9, r10, [r1, #24]\n\t" + "sbcs r7, %[rt], r7\n\t" + "sbcs r8, r4, r8\n\t" + "sbcs r9, r5, r9\n\t" + "sbc r10, r6, r10\n\t" + "mov r12, #-19\n\t" + "asr r11, r10, #31\n\t" + /* Mask the modulus */ + "and r12, r11, r12\n\t" + "and lr, r11, #0x7fffffff\n\t" + /* Add modulus (if underflow) */ + "ldrd %[rt], r4, [r0]\n\t" + "ldrd r5, r6, [r0, #8]\n\t" + "adds %[rt], %[rt], r12\n\t" + "adcs r4, r4, r11\n\t" + "adcs r5, r5, r11\n\t" + "adcs r6, r6, r11\n\t" + "adcs r7, r7, r11\n\t" + "adcs r8, r8, r11\n\t" + "adcs r9, r9, r11\n\t" + "adc r10, r10, lr\n\t" + "strd %[rt], r4, [r0]\n\t" + "strd r5, r6, [r0, #8]\n\t" + "strd r7, r8, [r0, #16]\n\t" + "strd r9, r10, [r0, #24]\n\t" + "add sp, sp, #16\n\t" + : [rx] "+r" (rx), [ry] "+r" (ry), [rz] "+r" (rz), [rt] "+r" (rt), [px] "+r" (px), [py] "+r" (py), [pz] "+r" (pz) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + ); +} + +void fe_ge_madd(fe rx, fe ry, fe rz, fe rt, const fe px, const fe py, const fe pz, const fe pt, const fe qxy2d, const fe qyplusx, const fe qyminusx) +{ + __asm__ __volatile__ ( + "sub sp, sp, #32\n\t" + "str %[rx], [sp]\n\t" + "str %[ry], [sp, #4]\n\t" + "str %[rz], [sp, #8]\n\t" + "str %[rt], [sp, #12]\n\t" + "ldr r0, [sp]\n\t" + "ldr r1, [sp, #108]\n\t" + "ldr r2, [sp, #104]\n\t" + /* Add */ + "ldrd %[rt], r4, [r1]\n\t" + "ldrd r5, r6, [r1, #8]\n\t" + "ldrd r7, r8, [r2]\n\t" + "ldrd r9, r10, [r2, #8]\n\t" + "adds r7, %[rt], r7\n\t" + "adcs r8, r4, r8\n\t" + "adcs r9, r5, r9\n\t" + "adcs r10, r6, r10\n\t" + "strd r7, r8, [r0]\n\t" + "strd r9, r10, [r0, #8]\n\t" + "ldrd %[rt], r4, [r1, #16]\n\t" + "ldrd r5, r6, [r1, #24]\n\t" + "ldrd r7, r8, [r2, #16]\n\t" + "ldrd r9, r10, [r2, #24]\n\t" + "adcs r7, %[rt], r7\n\t" + "adcs r8, r4, r8\n\t" + "adcs r9, r5, r9\n\t" + "adc r10, r6, r10\n\t" + "mov r12, #-19\n\t" + "asr r11, r10, #31\n\t" + /* Mask the modulus */ + "and r12, r11, r12\n\t" + "and lr, r11, #0x7fffffff\n\t" + /* Sub modulus (if overflow) */ + "ldrd %[rt], r4, [r0]\n\t" + "ldrd r5, r6, [r0, #8]\n\t" + "subs %[rt], %[rt], r12\n\t" + "sbcs r4, r4, r11\n\t" + "sbcs r5, r5, r11\n\t" + "sbcs r6, r6, r11\n\t" + "sbcs r7, r7, r11\n\t" + "sbcs r8, r8, r11\n\t" + "sbcs r9, r9, r11\n\t" + "sbc r10, r10, lr\n\t" + "strd %[rt], r4, [r0]\n\t" + "strd r5, r6, [r0, #8]\n\t" + "strd r7, r8, [r0, #16]\n\t" + "strd r9, r10, [r0, #24]\n\t" + "ldr r0, [sp, #4]\n\t" + "ldr r1, [sp, #108]\n\t" + "ldr r2, [sp, #104]\n\t" + /* Sub */ + "ldrd %[rt], r4, [r1]\n\t" + "ldrd r5, r6, [r1, #8]\n\t" + "ldrd r7, r8, [r2]\n\t" + "ldrd r9, r10, [r2, #8]\n\t" + "subs r7, %[rt], r7\n\t" + "sbcs r8, r4, r8\n\t" + "sbcs r9, r5, r9\n\t" + "sbcs r10, r6, r10\n\t" + "strd r7, r8, [r0]\n\t" + "strd r9, r10, [r0, #8]\n\t" + "ldrd %[rt], r4, [r1, #16]\n\t" + "ldrd r5, r6, [r1, #24]\n\t" + "ldrd r7, r8, [r2, #16]\n\t" + "ldrd r9, r10, [r2, #24]\n\t" + "sbcs r7, %[rt], r7\n\t" + "sbcs r8, r4, r8\n\t" + "sbcs r9, r5, r9\n\t" + "sbc r10, r6, r10\n\t" + "mov r12, #-19\n\t" + "asr r11, r10, #31\n\t" + /* Mask the modulus */ + "and r12, r11, r12\n\t" + "and lr, r11, #0x7fffffff\n\t" + /* Add modulus (if underflow) */ + "ldrd %[rt], r4, [r0]\n\t" + "ldrd r5, r6, [r0, #8]\n\t" + "adds %[rt], %[rt], r12\n\t" + "adcs r4, r4, r11\n\t" + "adcs r5, r5, r11\n\t" + "adcs r6, r6, r11\n\t" + "adcs r7, r7, r11\n\t" + "adcs r8, r8, r11\n\t" + "adcs r9, r9, r11\n\t" + "adc r10, r10, lr\n\t" + "strd %[rt], r4, [r0]\n\t" + "strd r5, r6, [r0, #8]\n\t" + "strd r7, r8, [r0, #16]\n\t" + "strd r9, r10, [r0, #24]\n\t" + "ldr r2, [sp, #124]\n\t" + "ldr r1, [sp]\n\t" + "ldr r0, [sp, #8]\n\t" + "bl fe_mul\n\t" + "ldr r2, [sp, #128]\n\t" + "ldr r1, [sp, #4]\n\t" + "ldr r0, [sp, #4]\n\t" + "bl fe_mul\n\t" + "ldr r2, [sp, #116]\n\t" + "ldr r1, [sp, #120]\n\t" + "ldr r0, [sp, #12]\n\t" + "bl fe_mul\n\t" + "ldr r0, [sp, #4]\n\t" + "ldr r1, [sp]\n\t" + "ldr r2, [sp, #8]\n\t" + /* Add-Sub */ + /* Add */ + "ldrd %[rt], r4, [r2]\n\t" + "ldrd r5, r6, [r0]\n\t" + "adds r7, %[rt], r5\n\t" + "mov r12, #0\n\t" + "adcs r8, r4, r6\n\t" + "adc r12, r12, #0\n\t" + "strd r7, r8, [r0]\n\t" + /* Sub */ + "subs r9, %[rt], r5\n\t" + "mov lr, #0\n\t" + "sbcs r10, r4, r6\n\t" + "adc lr, lr, #0\n\t" + "strd r9, r10, [r1]\n\t" + /* Add */ + "ldrd %[rt], r4, [r2, #8]\n\t" + "ldrd r5, r6, [r0, #8]\n\t" + "adds r12, r12, #-1\n\t" + "adcs r7, %[rt], r5\n\t" + "mov r12, #0\n\t" + "adcs r8, r4, r6\n\t" + "adc r12, r12, #0\n\t" + "strd r7, r8, [r0, #8]\n\t" + /* Sub */ + "adds lr, lr, #-1\n\t" + "sbcs r9, %[rt], r5\n\t" + "mov lr, #0\n\t" + "sbcs r10, r4, r6\n\t" + "adc lr, lr, #0\n\t" + "strd r9, r10, [r1, #8]\n\t" + /* Add */ + "ldrd %[rt], r4, [r2, #16]\n\t" + "ldrd r5, r6, [r0, #16]\n\t" + "adds r12, r12, #-1\n\t" + "adcs r7, %[rt], r5\n\t" + "mov r12, #0\n\t" + "adcs r8, r4, r6\n\t" + "adc r12, r12, #0\n\t" + "strd r7, r8, [r0, #16]\n\t" + /* Sub */ + "adds lr, lr, #-1\n\t" + "sbcs r9, %[rt], r5\n\t" + "mov lr, #0\n\t" + "sbcs r10, r4, r6\n\t" + "adc lr, lr, #0\n\t" + "strd r9, r10, [r1, #16]\n\t" + /* Add */ + "ldrd %[rt], r4, [r2, #24]\n\t" + "ldrd r5, r6, [r0, #24]\n\t" + "adds r12, r12, #-1\n\t" + "adcs r7, %[rt], r5\n\t" + "adc r8, r4, r6\n\t" + /* Sub */ + "adds lr, lr, #-1\n\t" + "sbcs r9, %[rt], r5\n\t" + "sbc r10, r4, r6\n\t" + "mov r12, #-19\n\t" + "asr r11, r8, #31\n\t" + /* Mask the modulus */ + "and r12, r11, r12\n\t" + "and lr, r11, #0x7fffffff\n\t" + /* Sub modulus (if overflow) */ + "ldrd %[rt], r4, [r0]\n\t" + "subs %[rt], %[rt], r12\n\t" + "sbcs r4, r4, r11\n\t" + "strd %[rt], r4, [r0]\n\t" + "ldrd %[rt], r4, [r0, #8]\n\t" + "sbcs %[rt], %[rt], r11\n\t" + "sbcs r4, r4, r11\n\t" + "strd %[rt], r4, [r0, #8]\n\t" + "ldrd %[rt], r4, [r0, #16]\n\t" + "sbcs %[rt], %[rt], r11\n\t" + "sbcs r4, r4, r11\n\t" + "strd %[rt], r4, [r0, #16]\n\t" + "sbcs r7, r7, r11\n\t" + "sbc r8, r8, lr\n\t" + "strd r7, r8, [r0, #24]\n\t" + "mov r12, #-19\n\t" + "asr r11, r10, #31\n\t" + /* Mask the modulus */ + "and r12, r11, r12\n\t" + "and lr, r11, #0x7fffffff\n\t" + /* Add modulus (if underflow) */ + "ldrd %[rt], r4, [r1]\n\t" + "adds %[rt], %[rt], r12\n\t" + "adcs r4, r4, r11\n\t" + "strd %[rt], r4, [r1]\n\t" + "ldrd %[rt], r4, [r1, #8]\n\t" + "adcs %[rt], %[rt], r11\n\t" + "adcs r4, r4, r11\n\t" + "strd %[rt], r4, [r1, #8]\n\t" + "ldrd %[rt], r4, [r1, #16]\n\t" + "adcs %[rt], %[rt], r11\n\t" + "adcs r4, r4, r11\n\t" + "strd %[rt], r4, [r1, #16]\n\t" + "adcs r9, r9, r11\n\t" + "adc r10, r10, lr\n\t" + "strd r9, r10, [r1, #24]\n\t" + "ldr r0, [sp, #8]\n\t" + "ldr r1, [sp, #112]\n\t" + /* Double */ + "ldrd %[rt], r4, [r1]\n\t" + "ldrd r5, r6, [r1, #8]\n\t" + "ldrd r7, r8, [r1, #16]\n\t" + "ldrd r9, r10, [r1, #24]\n\t" + "adds %[rt], %[rt], %[rt]\n\t" + "adcs r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adc r10, r10, r10\n\t" + "mov r12, #-19\n\t" + "asr r11, r10, #31\n\t" + /* Mask the modulus */ + "and r12, r11, r12\n\t" + "and lr, r11, #0x7fffffff\n\t" + /* Sub modulus (if overflow) */ + "subs %[rt], %[rt], r12\n\t" + "sbcs r4, r4, r11\n\t" + "sbcs r5, r5, r11\n\t" + "sbcs r6, r6, r11\n\t" + "sbcs r7, r7, r11\n\t" + "sbcs r8, r8, r11\n\t" + "sbcs r9, r9, r11\n\t" + "sbc r10, r10, lr\n\t" + "strd %[rt], r4, [r0]\n\t" + "strd r5, r6, [r0, #8]\n\t" + "strd r7, r8, [r0, #16]\n\t" + "strd r9, r10, [r0, #24]\n\t" + "ldr r0, [sp, #8]\n\t" + "ldr r1, [sp, #12]\n\t" + /* Add-Sub */ + /* Add */ + "ldrd %[rt], r4, [r0]\n\t" + "ldrd r5, r6, [r1]\n\t" + "adds r7, %[rt], r5\n\t" + "mov r12, #0\n\t" + "adcs r8, r4, r6\n\t" + "adc r12, r12, #0\n\t" + "strd r7, r8, [r0]\n\t" + /* Sub */ + "subs r9, %[rt], r5\n\t" + "mov lr, #0\n\t" + "sbcs r10, r4, r6\n\t" + "adc lr, lr, #0\n\t" + "strd r9, r10, [r1]\n\t" + /* Add */ + "ldrd %[rt], r4, [r0, #8]\n\t" + "ldrd r5, r6, [r1, #8]\n\t" + "adds r12, r12, #-1\n\t" + "adcs r7, %[rt], r5\n\t" + "mov r12, #0\n\t" + "adcs r8, r4, r6\n\t" + "adc r12, r12, #0\n\t" + "strd r7, r8, [r0, #8]\n\t" + /* Sub */ + "adds lr, lr, #-1\n\t" + "sbcs r9, %[rt], r5\n\t" + "mov lr, #0\n\t" + "sbcs r10, r4, r6\n\t" + "adc lr, lr, #0\n\t" + "strd r9, r10, [r1, #8]\n\t" + /* Add */ + "ldrd %[rt], r4, [r0, #16]\n\t" + "ldrd r5, r6, [r1, #16]\n\t" + "adds r12, r12, #-1\n\t" + "adcs r7, %[rt], r5\n\t" + "mov r12, #0\n\t" + "adcs r8, r4, r6\n\t" + "adc r12, r12, #0\n\t" + "strd r7, r8, [r0, #16]\n\t" + /* Sub */ + "adds lr, lr, #-1\n\t" + "sbcs r9, %[rt], r5\n\t" + "mov lr, #0\n\t" + "sbcs r10, r4, r6\n\t" + "adc lr, lr, #0\n\t" + "strd r9, r10, [r1, #16]\n\t" + /* Add */ + "ldrd %[rt], r4, [r0, #24]\n\t" + "ldrd r5, r6, [r1, #24]\n\t" + "adds r12, r12, #-1\n\t" + "adcs r7, %[rt], r5\n\t" + "adc r8, r4, r6\n\t" + /* Sub */ + "adds lr, lr, #-1\n\t" + "sbcs r9, %[rt], r5\n\t" + "sbc r10, r4, r6\n\t" + "mov r12, #-19\n\t" + "asr r11, r8, #31\n\t" + /* Mask the modulus */ + "and r12, r11, r12\n\t" + "and lr, r11, #0x7fffffff\n\t" + /* Sub modulus (if overflow) */ + "ldrd %[rt], r4, [r0]\n\t" + "subs %[rt], %[rt], r12\n\t" + "sbcs r4, r4, r11\n\t" + "strd %[rt], r4, [r0]\n\t" + "ldrd %[rt], r4, [r0, #8]\n\t" + "sbcs %[rt], %[rt], r11\n\t" + "sbcs r4, r4, r11\n\t" + "strd %[rt], r4, [r0, #8]\n\t" + "ldrd %[rt], r4, [r0, #16]\n\t" + "sbcs %[rt], %[rt], r11\n\t" + "sbcs r4, r4, r11\n\t" + "strd %[rt], r4, [r0, #16]\n\t" + "sbcs r7, r7, r11\n\t" + "sbc r8, r8, lr\n\t" + "strd r7, r8, [r0, #24]\n\t" + "mov r12, #-19\n\t" + "asr r11, r10, #31\n\t" + /* Mask the modulus */ + "and r12, r11, r12\n\t" + "and lr, r11, #0x7fffffff\n\t" + /* Add modulus (if underflow) */ + "ldrd %[rt], r4, [r1]\n\t" + "adds %[rt], %[rt], r12\n\t" + "adcs r4, r4, r11\n\t" + "strd %[rt], r4, [r1]\n\t" + "ldrd %[rt], r4, [r1, #8]\n\t" + "adcs %[rt], %[rt], r11\n\t" + "adcs r4, r4, r11\n\t" + "strd %[rt], r4, [r1, #8]\n\t" + "ldrd %[rt], r4, [r1, #16]\n\t" + "adcs %[rt], %[rt], r11\n\t" + "adcs r4, r4, r11\n\t" + "strd %[rt], r4, [r1, #16]\n\t" + "adcs r9, r9, r11\n\t" + "adc r10, r10, lr\n\t" + "strd r9, r10, [r1, #24]\n\t" + "add sp, sp, #32\n\t" + : [rx] "+r" (rx), [ry] "+r" (ry), [rz] "+r" (rz), [rt] "+r" (rt), [px] "+r" (px), [py] "+r" (py), [pz] "+r" (pz), [pt] "+r" (pt) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + ); + (void)qxy2d; + (void)qyplusx; + (void)qyminusx; +} + +void fe_ge_msub(fe rx, fe ry, fe rz, fe rt, const fe px, const fe py, const fe pz, const fe pt, const fe qxy2d, const fe qyplusx, const fe qyminusx) +{ + __asm__ __volatile__ ( + "sub sp, sp, #32\n\t" + "str %[rx], [sp]\n\t" + "str %[ry], [sp, #4]\n\t" + "str %[rz], [sp, #8]\n\t" + "str %[rt], [sp, #12]\n\t" + "ldr r0, [sp]\n\t" + "ldr r1, [sp, #108]\n\t" + "ldr r2, [sp, #104]\n\t" + /* Add */ + "ldrd %[rt], r4, [r1]\n\t" + "ldrd r5, r6, [r1, #8]\n\t" + "ldrd r7, r8, [r2]\n\t" + "ldrd r9, r10, [r2, #8]\n\t" + "adds r7, %[rt], r7\n\t" + "adcs r8, r4, r8\n\t" + "adcs r9, r5, r9\n\t" + "adcs r10, r6, r10\n\t" + "strd r7, r8, [r0]\n\t" + "strd r9, r10, [r0, #8]\n\t" + "ldrd %[rt], r4, [r1, #16]\n\t" + "ldrd r5, r6, [r1, #24]\n\t" + "ldrd r7, r8, [r2, #16]\n\t" + "ldrd r9, r10, [r2, #24]\n\t" + "adcs r7, %[rt], r7\n\t" + "adcs r8, r4, r8\n\t" + "adcs r9, r5, r9\n\t" + "adc r10, r6, r10\n\t" + "mov r12, #-19\n\t" + "asr r11, r10, #31\n\t" + /* Mask the modulus */ + "and r12, r11, r12\n\t" + "and lr, r11, #0x7fffffff\n\t" + /* Sub modulus (if overflow) */ + "ldrd %[rt], r4, [r0]\n\t" + "ldrd r5, r6, [r0, #8]\n\t" + "subs %[rt], %[rt], r12\n\t" + "sbcs r4, r4, r11\n\t" + "sbcs r5, r5, r11\n\t" + "sbcs r6, r6, r11\n\t" + "sbcs r7, r7, r11\n\t" + "sbcs r8, r8, r11\n\t" + "sbcs r9, r9, r11\n\t" + "sbc r10, r10, lr\n\t" + "strd %[rt], r4, [r0]\n\t" + "strd r5, r6, [r0, #8]\n\t" + "strd r7, r8, [r0, #16]\n\t" + "strd r9, r10, [r0, #24]\n\t" + "ldr r0, [sp, #4]\n\t" + "ldr r1, [sp, #108]\n\t" + "ldr r2, [sp, #104]\n\t" + /* Sub */ + "ldrd %[rt], r4, [r1]\n\t" + "ldrd r5, r6, [r1, #8]\n\t" + "ldrd r7, r8, [r2]\n\t" + "ldrd r9, r10, [r2, #8]\n\t" + "subs r7, %[rt], r7\n\t" + "sbcs r8, r4, r8\n\t" + "sbcs r9, r5, r9\n\t" + "sbcs r10, r6, r10\n\t" + "strd r7, r8, [r0]\n\t" + "strd r9, r10, [r0, #8]\n\t" + "ldrd %[rt], r4, [r1, #16]\n\t" + "ldrd r5, r6, [r1, #24]\n\t" + "ldrd r7, r8, [r2, #16]\n\t" + "ldrd r9, r10, [r2, #24]\n\t" + "sbcs r7, %[rt], r7\n\t" + "sbcs r8, r4, r8\n\t" + "sbcs r9, r5, r9\n\t" + "sbc r10, r6, r10\n\t" + "mov r12, #-19\n\t" + "asr r11, r10, #31\n\t" + /* Mask the modulus */ + "and r12, r11, r12\n\t" + "and lr, r11, #0x7fffffff\n\t" + /* Add modulus (if underflow) */ + "ldrd %[rt], r4, [r0]\n\t" + "ldrd r5, r6, [r0, #8]\n\t" + "adds %[rt], %[rt], r12\n\t" + "adcs r4, r4, r11\n\t" + "adcs r5, r5, r11\n\t" + "adcs r6, r6, r11\n\t" + "adcs r7, r7, r11\n\t" + "adcs r8, r8, r11\n\t" + "adcs r9, r9, r11\n\t" + "adc r10, r10, lr\n\t" + "strd %[rt], r4, [r0]\n\t" + "strd r5, r6, [r0, #8]\n\t" + "strd r7, r8, [r0, #16]\n\t" + "strd r9, r10, [r0, #24]\n\t" + "ldr r2, [sp, #128]\n\t" + "ldr r1, [sp]\n\t" + "ldr r0, [sp, #8]\n\t" + "bl fe_mul\n\t" + "ldr r2, [sp, #124]\n\t" + "ldr r1, [sp, #4]\n\t" + "ldr r0, [sp, #4]\n\t" + "bl fe_mul\n\t" + "ldr r2, [sp, #116]\n\t" + "ldr r1, [sp, #120]\n\t" + "ldr r0, [sp, #12]\n\t" + "bl fe_mul\n\t" + "ldr r0, [sp, #4]\n\t" + "ldr r1, [sp]\n\t" + "ldr r2, [sp, #8]\n\t" + /* Add-Sub */ + /* Add */ + "ldrd %[rt], r4, [r2]\n\t" + "ldrd r5, r6, [r0]\n\t" + "adds r7, %[rt], r5\n\t" + "mov r12, #0\n\t" + "adcs r8, r4, r6\n\t" + "adc r12, r12, #0\n\t" + "strd r7, r8, [r0]\n\t" + /* Sub */ + "subs r9, %[rt], r5\n\t" + "mov lr, #0\n\t" + "sbcs r10, r4, r6\n\t" + "adc lr, lr, #0\n\t" + "strd r9, r10, [r1]\n\t" + /* Add */ + "ldrd %[rt], r4, [r2, #8]\n\t" + "ldrd r5, r6, [r0, #8]\n\t" + "adds r12, r12, #-1\n\t" + "adcs r7, %[rt], r5\n\t" + "mov r12, #0\n\t" + "adcs r8, r4, r6\n\t" + "adc r12, r12, #0\n\t" + "strd r7, r8, [r0, #8]\n\t" + /* Sub */ + "adds lr, lr, #-1\n\t" + "sbcs r9, %[rt], r5\n\t" + "mov lr, #0\n\t" + "sbcs r10, r4, r6\n\t" + "adc lr, lr, #0\n\t" + "strd r9, r10, [r1, #8]\n\t" + /* Add */ + "ldrd %[rt], r4, [r2, #16]\n\t" + "ldrd r5, r6, [r0, #16]\n\t" + "adds r12, r12, #-1\n\t" + "adcs r7, %[rt], r5\n\t" + "mov r12, #0\n\t" + "adcs r8, r4, r6\n\t" + "adc r12, r12, #0\n\t" + "strd r7, r8, [r0, #16]\n\t" + /* Sub */ + "adds lr, lr, #-1\n\t" + "sbcs r9, %[rt], r5\n\t" + "mov lr, #0\n\t" + "sbcs r10, r4, r6\n\t" + "adc lr, lr, #0\n\t" + "strd r9, r10, [r1, #16]\n\t" + /* Add */ + "ldrd %[rt], r4, [r2, #24]\n\t" + "ldrd r5, r6, [r0, #24]\n\t" + "adds r12, r12, #-1\n\t" + "adcs r7, %[rt], r5\n\t" + "adc r8, r4, r6\n\t" + /* Sub */ + "adds lr, lr, #-1\n\t" + "sbcs r9, %[rt], r5\n\t" + "sbc r10, r4, r6\n\t" + "mov r12, #-19\n\t" + "asr r11, r8, #31\n\t" + /* Mask the modulus */ + "and r12, r11, r12\n\t" + "and lr, r11, #0x7fffffff\n\t" + /* Sub modulus (if overflow) */ + "ldrd %[rt], r4, [r0]\n\t" + "subs %[rt], %[rt], r12\n\t" + "sbcs r4, r4, r11\n\t" + "strd %[rt], r4, [r0]\n\t" + "ldrd %[rt], r4, [r0, #8]\n\t" + "sbcs %[rt], %[rt], r11\n\t" + "sbcs r4, r4, r11\n\t" + "strd %[rt], r4, [r0, #8]\n\t" + "ldrd %[rt], r4, [r0, #16]\n\t" + "sbcs %[rt], %[rt], r11\n\t" + "sbcs r4, r4, r11\n\t" + "strd %[rt], r4, [r0, #16]\n\t" + "sbcs r7, r7, r11\n\t" + "sbc r8, r8, lr\n\t" + "strd r7, r8, [r0, #24]\n\t" + "mov r12, #-19\n\t" + "asr r11, r10, #31\n\t" + /* Mask the modulus */ + "and r12, r11, r12\n\t" + "and lr, r11, #0x7fffffff\n\t" + /* Add modulus (if underflow) */ + "ldrd %[rt], r4, [r1]\n\t" + "adds %[rt], %[rt], r12\n\t" + "adcs r4, r4, r11\n\t" + "strd %[rt], r4, [r1]\n\t" + "ldrd %[rt], r4, [r1, #8]\n\t" + "adcs %[rt], %[rt], r11\n\t" + "adcs r4, r4, r11\n\t" + "strd %[rt], r4, [r1, #8]\n\t" + "ldrd %[rt], r4, [r1, #16]\n\t" + "adcs %[rt], %[rt], r11\n\t" + "adcs r4, r4, r11\n\t" + "strd %[rt], r4, [r1, #16]\n\t" + "adcs r9, r9, r11\n\t" + "adc r10, r10, lr\n\t" + "strd r9, r10, [r1, #24]\n\t" + "ldr r0, [sp, #8]\n\t" + "ldr r1, [sp, #112]\n\t" + /* Double */ + "ldrd %[rt], r4, [r1]\n\t" + "ldrd r5, r6, [r1, #8]\n\t" + "ldrd r7, r8, [r1, #16]\n\t" + "ldrd r9, r10, [r1, #24]\n\t" + "adds %[rt], %[rt], %[rt]\n\t" + "adcs r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adc r10, r10, r10\n\t" + "mov r12, #-19\n\t" + "asr r11, r10, #31\n\t" + /* Mask the modulus */ + "and r12, r11, r12\n\t" + "and lr, r11, #0x7fffffff\n\t" + /* Sub modulus (if overflow) */ + "subs %[rt], %[rt], r12\n\t" + "sbcs r4, r4, r11\n\t" + "sbcs r5, r5, r11\n\t" + "sbcs r6, r6, r11\n\t" + "sbcs r7, r7, r11\n\t" + "sbcs r8, r8, r11\n\t" + "sbcs r9, r9, r11\n\t" + "sbc r10, r10, lr\n\t" + "strd %[rt], r4, [r0]\n\t" + "strd r5, r6, [r0, #8]\n\t" + "strd r7, r8, [r0, #16]\n\t" + "strd r9, r10, [r0, #24]\n\t" + "ldr r0, [sp, #12]\n\t" + "ldr r1, [sp, #8]\n\t" + /* Add-Sub */ + /* Add */ + "ldrd %[rt], r4, [r1]\n\t" + "ldrd r5, r6, [r0]\n\t" + "adds r7, %[rt], r5\n\t" + "mov r12, #0\n\t" + "adcs r8, r4, r6\n\t" + "adc r12, r12, #0\n\t" + "strd r7, r8, [r0]\n\t" + /* Sub */ + "subs r9, %[rt], r5\n\t" + "mov lr, #0\n\t" + "sbcs r10, r4, r6\n\t" + "adc lr, lr, #0\n\t" + "strd r9, r10, [r1]\n\t" + /* Add */ + "ldrd %[rt], r4, [r1, #8]\n\t" + "ldrd r5, r6, [r0, #8]\n\t" + "adds r12, r12, #-1\n\t" + "adcs r7, %[rt], r5\n\t" + "mov r12, #0\n\t" + "adcs r8, r4, r6\n\t" + "adc r12, r12, #0\n\t" + "strd r7, r8, [r0, #8]\n\t" + /* Sub */ + "adds lr, lr, #-1\n\t" + "sbcs r9, %[rt], r5\n\t" + "mov lr, #0\n\t" + "sbcs r10, r4, r6\n\t" + "adc lr, lr, #0\n\t" + "strd r9, r10, [r1, #8]\n\t" + /* Add */ + "ldrd %[rt], r4, [r1, #16]\n\t" + "ldrd r5, r6, [r0, #16]\n\t" + "adds r12, r12, #-1\n\t" + "adcs r7, %[rt], r5\n\t" + "mov r12, #0\n\t" + "adcs r8, r4, r6\n\t" + "adc r12, r12, #0\n\t" + "strd r7, r8, [r0, #16]\n\t" + /* Sub */ + "adds lr, lr, #-1\n\t" + "sbcs r9, %[rt], r5\n\t" + "mov lr, #0\n\t" + "sbcs r10, r4, r6\n\t" + "adc lr, lr, #0\n\t" + "strd r9, r10, [r1, #16]\n\t" + /* Add */ + "ldrd %[rt], r4, [r1, #24]\n\t" + "ldrd r5, r6, [r0, #24]\n\t" + "adds r12, r12, #-1\n\t" + "adcs r7, %[rt], r5\n\t" + "adc r8, r4, r6\n\t" + /* Sub */ + "adds lr, lr, #-1\n\t" + "sbcs r9, %[rt], r5\n\t" + "sbc r10, r4, r6\n\t" + "mov r12, #-19\n\t" + "asr r11, r8, #31\n\t" + /* Mask the modulus */ + "and r12, r11, r12\n\t" + "and lr, r11, #0x7fffffff\n\t" + /* Sub modulus (if overflow) */ + "ldrd %[rt], r4, [r0]\n\t" + "subs %[rt], %[rt], r12\n\t" + "sbcs r4, r4, r11\n\t" + "strd %[rt], r4, [r0]\n\t" + "ldrd %[rt], r4, [r0, #8]\n\t" + "sbcs %[rt], %[rt], r11\n\t" + "sbcs r4, r4, r11\n\t" + "strd %[rt], r4, [r0, #8]\n\t" + "ldrd %[rt], r4, [r0, #16]\n\t" + "sbcs %[rt], %[rt], r11\n\t" + "sbcs r4, r4, r11\n\t" + "strd %[rt], r4, [r0, #16]\n\t" + "sbcs r7, r7, r11\n\t" + "sbc r8, r8, lr\n\t" + "strd r7, r8, [r0, #24]\n\t" + "mov r12, #-19\n\t" + "asr r11, r10, #31\n\t" + /* Mask the modulus */ + "and r12, r11, r12\n\t" + "and lr, r11, #0x7fffffff\n\t" + /* Add modulus (if underflow) */ + "ldrd %[rt], r4, [r1]\n\t" + "adds %[rt], %[rt], r12\n\t" + "adcs r4, r4, r11\n\t" + "strd %[rt], r4, [r1]\n\t" + "ldrd %[rt], r4, [r1, #8]\n\t" + "adcs %[rt], %[rt], r11\n\t" + "adcs r4, r4, r11\n\t" + "strd %[rt], r4, [r1, #8]\n\t" + "ldrd %[rt], r4, [r1, #16]\n\t" + "adcs %[rt], %[rt], r11\n\t" + "adcs r4, r4, r11\n\t" + "strd %[rt], r4, [r1, #16]\n\t" + "adcs r9, r9, r11\n\t" + "adc r10, r10, lr\n\t" + "strd r9, r10, [r1, #24]\n\t" + "add sp, sp, #32\n\t" + : [rx] "+r" (rx), [ry] "+r" (ry), [rz] "+r" (rz), [rt] "+r" (rt), [px] "+r" (px), [py] "+r" (py), [pz] "+r" (pz), [pt] "+r" (pt) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + ); + (void)qxy2d; + (void)qyplusx; + (void)qyminusx; +} + +void fe_ge_add(fe rx, fe ry, fe rz, fe rt, const fe px, const fe py, const fe pz, const fe pt, const fe qz, const fe qt2d, const fe qyplusx, const fe qyminusx) +{ + __asm__ __volatile__ ( + "sub sp, sp, #0x60\n\t" + "str %[rx], [sp]\n\t" + "str %[ry], [sp, #4]\n\t" + "str %[rz], [sp, #8]\n\t" + "str %[rt], [sp, #12]\n\t" + "ldr r0, [sp]\n\t" + "ldr r1, [sp, #172]\n\t" + "ldr r2, [sp, #168]\n\t" + /* Add */ + "ldrd %[rt], r4, [r1]\n\t" + "ldrd r5, r6, [r1, #8]\n\t" + "ldrd r7, r8, [r2]\n\t" + "ldrd r9, r10, [r2, #8]\n\t" + "adds r7, %[rt], r7\n\t" + "adcs r8, r4, r8\n\t" + "adcs r9, r5, r9\n\t" + "adcs r10, r6, r10\n\t" + "strd r7, r8, [r0]\n\t" + "strd r9, r10, [r0, #8]\n\t" + "ldrd %[rt], r4, [r1, #16]\n\t" + "ldrd r5, r6, [r1, #24]\n\t" + "ldrd r7, r8, [r2, #16]\n\t" + "ldrd r9, r10, [r2, #24]\n\t" + "adcs r7, %[rt], r7\n\t" + "adcs r8, r4, r8\n\t" + "adcs r9, r5, r9\n\t" + "adc r10, r6, r10\n\t" + "mov r12, #-19\n\t" + "asr r11, r10, #31\n\t" + /* Mask the modulus */ + "and r12, r11, r12\n\t" + "and lr, r11, #0x7fffffff\n\t" + /* Sub modulus (if overflow) */ + "ldrd %[rt], r4, [r0]\n\t" + "ldrd r5, r6, [r0, #8]\n\t" + "subs %[rt], %[rt], r12\n\t" + "sbcs r4, r4, r11\n\t" + "sbcs r5, r5, r11\n\t" + "sbcs r6, r6, r11\n\t" + "sbcs r7, r7, r11\n\t" + "sbcs r8, r8, r11\n\t" + "sbcs r9, r9, r11\n\t" + "sbc r10, r10, lr\n\t" + "strd %[rt], r4, [r0]\n\t" + "strd r5, r6, [r0, #8]\n\t" + "strd r7, r8, [r0, #16]\n\t" + "strd r9, r10, [r0, #24]\n\t" + "ldr r0, [sp, #4]\n\t" + "ldr r1, [sp, #172]\n\t" + "ldr r2, [sp, #168]\n\t" + /* Sub */ + "ldrd %[rt], r4, [r1]\n\t" + "ldrd r5, r6, [r1, #8]\n\t" + "ldrd r7, r8, [r2]\n\t" + "ldrd r9, r10, [r2, #8]\n\t" + "subs r7, %[rt], r7\n\t" + "sbcs r8, r4, r8\n\t" + "sbcs r9, r5, r9\n\t" + "sbcs r10, r6, r10\n\t" + "strd r7, r8, [r0]\n\t" + "strd r9, r10, [r0, #8]\n\t" + "ldrd %[rt], r4, [r1, #16]\n\t" + "ldrd r5, r6, [r1, #24]\n\t" + "ldrd r7, r8, [r2, #16]\n\t" + "ldrd r9, r10, [r2, #24]\n\t" + "sbcs r7, %[rt], r7\n\t" + "sbcs r8, r4, r8\n\t" + "sbcs r9, r5, r9\n\t" + "sbc r10, r6, r10\n\t" + "mov r12, #-19\n\t" + "asr r11, r10, #31\n\t" + /* Mask the modulus */ + "and r12, r11, r12\n\t" + "and lr, r11, #0x7fffffff\n\t" + /* Add modulus (if underflow) */ + "ldrd %[rt], r4, [r0]\n\t" + "ldrd r5, r6, [r0, #8]\n\t" + "adds %[rt], %[rt], r12\n\t" + "adcs r4, r4, r11\n\t" + "adcs r5, r5, r11\n\t" + "adcs r6, r6, r11\n\t" + "adcs r7, r7, r11\n\t" + "adcs r8, r8, r11\n\t" + "adcs r9, r9, r11\n\t" + "adc r10, r10, lr\n\t" + "strd %[rt], r4, [r0]\n\t" + "strd r5, r6, [r0, #8]\n\t" + "strd r7, r8, [r0, #16]\n\t" + "strd r9, r10, [r0, #24]\n\t" + "ldr r2, [sp, #192]\n\t" + "ldr r1, [sp]\n\t" + "ldr r0, [sp, #8]\n\t" + "bl fe_mul\n\t" + "ldr r2, [sp, #196]\n\t" + "ldr r1, [sp, #4]\n\t" + "ldr r0, [sp, #4]\n\t" + "bl fe_mul\n\t" + "ldr r2, [sp, #180]\n\t" + "ldr r1, [sp, #188]\n\t" + "ldr r0, [sp, #12]\n\t" + "bl fe_mul\n\t" + "ldr r2, [sp, #184]\n\t" + "ldr r1, [sp, #176]\n\t" + "ldr r0, [sp]\n\t" + "bl fe_mul\n\t" + "add r0, sp, #16\n\t" + "ldr r1, [sp]\n\t" + /* Double */ + "ldrd %[rt], r4, [r1]\n\t" + "ldrd r5, r6, [r1, #8]\n\t" + "ldrd r7, r8, [r1, #16]\n\t" + "ldrd r9, r10, [r1, #24]\n\t" + "adds %[rt], %[rt], %[rt]\n\t" + "adcs r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adc r10, r10, r10\n\t" + "mov r12, #-19\n\t" + "asr r11, r10, #31\n\t" + /* Mask the modulus */ + "and r12, r11, r12\n\t" + "and lr, r11, #0x7fffffff\n\t" + /* Sub modulus (if overflow) */ + "subs %[rt], %[rt], r12\n\t" + "sbcs r4, r4, r11\n\t" + "sbcs r5, r5, r11\n\t" + "sbcs r6, r6, r11\n\t" + "sbcs r7, r7, r11\n\t" + "sbcs r8, r8, r11\n\t" + "sbcs r9, r9, r11\n\t" + "sbc r10, r10, lr\n\t" + "strd %[rt], r4, [r0]\n\t" + "strd r5, r6, [r0, #8]\n\t" + "strd r7, r8, [r0, #16]\n\t" + "strd r9, r10, [r0, #24]\n\t" + "ldr r0, [sp, #4]\n\t" + "ldr r1, [sp]\n\t" + "ldr r2, [sp, #8]\n\t" + /* Add-Sub */ + /* Add */ + "ldrd %[rt], r4, [r2]\n\t" + "ldrd r5, r6, [r0]\n\t" + "adds r7, %[rt], r5\n\t" + "mov r12, #0\n\t" + "adcs r8, r4, r6\n\t" + "adc r12, r12, #0\n\t" + "strd r7, r8, [r0]\n\t" + /* Sub */ + "subs r9, %[rt], r5\n\t" + "mov lr, #0\n\t" + "sbcs r10, r4, r6\n\t" + "adc lr, lr, #0\n\t" + "strd r9, r10, [r1]\n\t" + /* Add */ + "ldrd %[rt], r4, [r2, #8]\n\t" + "ldrd r5, r6, [r0, #8]\n\t" + "adds r12, r12, #-1\n\t" + "adcs r7, %[rt], r5\n\t" + "mov r12, #0\n\t" + "adcs r8, r4, r6\n\t" + "adc r12, r12, #0\n\t" + "strd r7, r8, [r0, #8]\n\t" + /* Sub */ + "adds lr, lr, #-1\n\t" + "sbcs r9, %[rt], r5\n\t" + "mov lr, #0\n\t" + "sbcs r10, r4, r6\n\t" + "adc lr, lr, #0\n\t" + "strd r9, r10, [r1, #8]\n\t" + /* Add */ + "ldrd %[rt], r4, [r2, #16]\n\t" + "ldrd r5, r6, [r0, #16]\n\t" + "adds r12, r12, #-1\n\t" + "adcs r7, %[rt], r5\n\t" + "mov r12, #0\n\t" + "adcs r8, r4, r6\n\t" + "adc r12, r12, #0\n\t" + "strd r7, r8, [r0, #16]\n\t" + /* Sub */ + "adds lr, lr, #-1\n\t" + "sbcs r9, %[rt], r5\n\t" + "mov lr, #0\n\t" + "sbcs r10, r4, r6\n\t" + "adc lr, lr, #0\n\t" + "strd r9, r10, [r1, #16]\n\t" + /* Add */ + "ldrd %[rt], r4, [r2, #24]\n\t" + "ldrd r5, r6, [r0, #24]\n\t" + "adds r12, r12, #-1\n\t" + "adcs r7, %[rt], r5\n\t" + "adc r8, r4, r6\n\t" + /* Sub */ + "adds lr, lr, #-1\n\t" + "sbcs r9, %[rt], r5\n\t" + "sbc r10, r4, r6\n\t" + "mov r12, #-19\n\t" + "asr r11, r8, #31\n\t" + /* Mask the modulus */ + "and r12, r11, r12\n\t" + "and lr, r11, #0x7fffffff\n\t" + /* Sub modulus (if overflow) */ + "ldrd %[rt], r4, [r0]\n\t" + "subs %[rt], %[rt], r12\n\t" + "sbcs r4, r4, r11\n\t" + "strd %[rt], r4, [r0]\n\t" + "ldrd %[rt], r4, [r0, #8]\n\t" + "sbcs %[rt], %[rt], r11\n\t" + "sbcs r4, r4, r11\n\t" + "strd %[rt], r4, [r0, #8]\n\t" + "ldrd %[rt], r4, [r0, #16]\n\t" + "sbcs %[rt], %[rt], r11\n\t" + "sbcs r4, r4, r11\n\t" + "strd %[rt], r4, [r0, #16]\n\t" + "sbcs r7, r7, r11\n\t" + "sbc r8, r8, lr\n\t" + "strd r7, r8, [r0, #24]\n\t" + "mov r12, #-19\n\t" + "asr r11, r10, #31\n\t" + /* Mask the modulus */ + "and r12, r11, r12\n\t" + "and lr, r11, #0x7fffffff\n\t" + /* Add modulus (if underflow) */ + "ldrd %[rt], r4, [r1]\n\t" + "adds %[rt], %[rt], r12\n\t" + "adcs r4, r4, r11\n\t" + "strd %[rt], r4, [r1]\n\t" + "ldrd %[rt], r4, [r1, #8]\n\t" + "adcs %[rt], %[rt], r11\n\t" + "adcs r4, r4, r11\n\t" + "strd %[rt], r4, [r1, #8]\n\t" + "ldrd %[rt], r4, [r1, #16]\n\t" + "adcs %[rt], %[rt], r11\n\t" + "adcs r4, r4, r11\n\t" + "strd %[rt], r4, [r1, #16]\n\t" + "adcs r9, r9, r11\n\t" + "adc r10, r10, lr\n\t" + "strd r9, r10, [r1, #24]\n\t" + "ldr r0, [sp, #8]\n\t" + "ldr r1, [sp, #12]\n\t" + "add r2, sp, #16\n\t" + /* Add-Sub */ + /* Add */ + "ldrd %[rt], r4, [r2]\n\t" + "ldrd r5, r6, [r1]\n\t" + "adds r7, %[rt], r5\n\t" + "mov r12, #0\n\t" + "adcs r8, r4, r6\n\t" + "adc r12, r12, #0\n\t" + "strd r7, r8, [r0]\n\t" + /* Sub */ + "subs r9, %[rt], r5\n\t" + "mov lr, #0\n\t" + "sbcs r10, r4, r6\n\t" + "adc lr, lr, #0\n\t" + "strd r9, r10, [r1]\n\t" + /* Add */ + "ldrd %[rt], r4, [r2, #8]\n\t" + "ldrd r5, r6, [r1, #8]\n\t" + "adds r12, r12, #-1\n\t" + "adcs r7, %[rt], r5\n\t" + "mov r12, #0\n\t" + "adcs r8, r4, r6\n\t" + "adc r12, r12, #0\n\t" + "strd r7, r8, [r0, #8]\n\t" + /* Sub */ + "adds lr, lr, #-1\n\t" + "sbcs r9, %[rt], r5\n\t" + "mov lr, #0\n\t" + "sbcs r10, r4, r6\n\t" + "adc lr, lr, #0\n\t" + "strd r9, r10, [r1, #8]\n\t" + /* Add */ + "ldrd %[rt], r4, [r2, #16]\n\t" + "ldrd r5, r6, [r1, #16]\n\t" + "adds r12, r12, #-1\n\t" + "adcs r7, %[rt], r5\n\t" + "mov r12, #0\n\t" + "adcs r8, r4, r6\n\t" + "adc r12, r12, #0\n\t" + "strd r7, r8, [r0, #16]\n\t" + /* Sub */ + "adds lr, lr, #-1\n\t" + "sbcs r9, %[rt], r5\n\t" + "mov lr, #0\n\t" + "sbcs r10, r4, r6\n\t" + "adc lr, lr, #0\n\t" + "strd r9, r10, [r1, #16]\n\t" + /* Add */ + "ldrd %[rt], r4, [r2, #24]\n\t" + "ldrd r5, r6, [r1, #24]\n\t" + "adds r12, r12, #-1\n\t" + "adcs r7, %[rt], r5\n\t" + "adc r8, r4, r6\n\t" + /* Sub */ + "adds lr, lr, #-1\n\t" + "sbcs r9, %[rt], r5\n\t" + "sbc r10, r4, r6\n\t" + "mov r12, #-19\n\t" + "asr r11, r8, #31\n\t" + /* Mask the modulus */ + "and r12, r11, r12\n\t" + "and lr, r11, #0x7fffffff\n\t" + /* Sub modulus (if overflow) */ + "ldrd %[rt], r4, [r0]\n\t" + "subs %[rt], %[rt], r12\n\t" + "sbcs r4, r4, r11\n\t" + "strd %[rt], r4, [r0]\n\t" + "ldrd %[rt], r4, [r0, #8]\n\t" + "sbcs %[rt], %[rt], r11\n\t" + "sbcs r4, r4, r11\n\t" + "strd %[rt], r4, [r0, #8]\n\t" + "ldrd %[rt], r4, [r0, #16]\n\t" + "sbcs %[rt], %[rt], r11\n\t" + "sbcs r4, r4, r11\n\t" + "strd %[rt], r4, [r0, #16]\n\t" + "sbcs r7, r7, r11\n\t" + "sbc r8, r8, lr\n\t" + "strd r7, r8, [r0, #24]\n\t" + "mov r12, #-19\n\t" + "asr r11, r10, #31\n\t" + /* Mask the modulus */ + "and r12, r11, r12\n\t" + "and lr, r11, #0x7fffffff\n\t" + /* Add modulus (if underflow) */ + "ldrd %[rt], r4, [r1]\n\t" + "adds %[rt], %[rt], r12\n\t" + "adcs r4, r4, r11\n\t" + "strd %[rt], r4, [r1]\n\t" + "ldrd %[rt], r4, [r1, #8]\n\t" + "adcs %[rt], %[rt], r11\n\t" + "adcs r4, r4, r11\n\t" + "strd %[rt], r4, [r1, #8]\n\t" + "ldrd %[rt], r4, [r1, #16]\n\t" + "adcs %[rt], %[rt], r11\n\t" + "adcs r4, r4, r11\n\t" + "strd %[rt], r4, [r1, #16]\n\t" + "adcs r9, r9, r11\n\t" + "adc r10, r10, lr\n\t" + "strd r9, r10, [r1, #24]\n\t" + "add sp, sp, #0x60\n\t" + : [rx] "+r" (rx), [ry] "+r" (ry), [rz] "+r" (rz), [rt] "+r" (rt), [px] "+r" (px), [py] "+r" (py), [pz] "+r" (pz), [pt] "+r" (pt) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + ); + (void)qz; + (void)qt2d; + (void)qyplusx; + (void)qyminusx; +} + +void fe_ge_sub(fe rx, fe ry, fe rz, fe rt, const fe px, const fe py, const fe pz, const fe pt, const fe qz, const fe qt2d, const fe qyplusx, const fe qyminusx) +{ + __asm__ __volatile__ ( + "sub sp, sp, #0x60\n\t" + "str %[rx], [sp]\n\t" + "str %[ry], [sp, #4]\n\t" + "str %[rz], [sp, #8]\n\t" + "str %[rt], [sp, #12]\n\t" + "ldr r0, [sp]\n\t" + "ldr r1, [sp, #172]\n\t" + "ldr r2, [sp, #168]\n\t" + /* Add */ + "ldrd %[rt], r4, [r1]\n\t" + "ldrd r5, r6, [r1, #8]\n\t" + "ldrd r7, r8, [r2]\n\t" + "ldrd r9, r10, [r2, #8]\n\t" + "adds r7, %[rt], r7\n\t" + "adcs r8, r4, r8\n\t" + "adcs r9, r5, r9\n\t" + "adcs r10, r6, r10\n\t" + "strd r7, r8, [r0]\n\t" + "strd r9, r10, [r0, #8]\n\t" + "ldrd %[rt], r4, [r1, #16]\n\t" + "ldrd r5, r6, [r1, #24]\n\t" + "ldrd r7, r8, [r2, #16]\n\t" + "ldrd r9, r10, [r2, #24]\n\t" + "adcs r7, %[rt], r7\n\t" + "adcs r8, r4, r8\n\t" + "adcs r9, r5, r9\n\t" + "adc r10, r6, r10\n\t" + "mov r12, #-19\n\t" + "asr r11, r10, #31\n\t" + /* Mask the modulus */ + "and r12, r11, r12\n\t" + "and lr, r11, #0x7fffffff\n\t" + /* Sub modulus (if overflow) */ + "ldrd %[rt], r4, [r0]\n\t" + "ldrd r5, r6, [r0, #8]\n\t" + "subs %[rt], %[rt], r12\n\t" + "sbcs r4, r4, r11\n\t" + "sbcs r5, r5, r11\n\t" + "sbcs r6, r6, r11\n\t" + "sbcs r7, r7, r11\n\t" + "sbcs r8, r8, r11\n\t" + "sbcs r9, r9, r11\n\t" + "sbc r10, r10, lr\n\t" + "strd %[rt], r4, [r0]\n\t" + "strd r5, r6, [r0, #8]\n\t" + "strd r7, r8, [r0, #16]\n\t" + "strd r9, r10, [r0, #24]\n\t" + "ldr r0, [sp, #4]\n\t" + "ldr r1, [sp, #172]\n\t" + "ldr r2, [sp, #168]\n\t" + /* Sub */ + "ldrd %[rt], r4, [r1]\n\t" + "ldrd r5, r6, [r1, #8]\n\t" + "ldrd r7, r8, [r2]\n\t" + "ldrd r9, r10, [r2, #8]\n\t" + "subs r7, %[rt], r7\n\t" + "sbcs r8, r4, r8\n\t" + "sbcs r9, r5, r9\n\t" + "sbcs r10, r6, r10\n\t" + "strd r7, r8, [r0]\n\t" + "strd r9, r10, [r0, #8]\n\t" + "ldrd %[rt], r4, [r1, #16]\n\t" + "ldrd r5, r6, [r1, #24]\n\t" + "ldrd r7, r8, [r2, #16]\n\t" + "ldrd r9, r10, [r2, #24]\n\t" + "sbcs r7, %[rt], r7\n\t" + "sbcs r8, r4, r8\n\t" + "sbcs r9, r5, r9\n\t" + "sbc r10, r6, r10\n\t" + "mov r12, #-19\n\t" + "asr r11, r10, #31\n\t" + /* Mask the modulus */ + "and r12, r11, r12\n\t" + "and lr, r11, #0x7fffffff\n\t" + /* Add modulus (if underflow) */ + "ldrd %[rt], r4, [r0]\n\t" + "ldrd r5, r6, [r0, #8]\n\t" + "adds %[rt], %[rt], r12\n\t" + "adcs r4, r4, r11\n\t" + "adcs r5, r5, r11\n\t" + "adcs r6, r6, r11\n\t" + "adcs r7, r7, r11\n\t" + "adcs r8, r8, r11\n\t" + "adcs r9, r9, r11\n\t" + "adc r10, r10, lr\n\t" + "strd %[rt], r4, [r0]\n\t" + "strd r5, r6, [r0, #8]\n\t" + "strd r7, r8, [r0, #16]\n\t" + "strd r9, r10, [r0, #24]\n\t" + "ldr r2, [sp, #196]\n\t" + "ldr r1, [sp]\n\t" + "ldr r0, [sp, #8]\n\t" + "bl fe_mul\n\t" + "ldr r2, [sp, #192]\n\t" + "ldr r1, [sp, #4]\n\t" + "ldr r0, [sp, #4]\n\t" + "bl fe_mul\n\t" + "ldr r2, [sp, #180]\n\t" + "ldr r1, [sp, #188]\n\t" + "ldr r0, [sp, #12]\n\t" + "bl fe_mul\n\t" + "ldr r2, [sp, #184]\n\t" + "ldr r1, [sp, #176]\n\t" + "ldr r0, [sp]\n\t" + "bl fe_mul\n\t" + "add r0, sp, #16\n\t" + "ldr r1, [sp]\n\t" + /* Double */ + "ldrd %[rt], r4, [r1]\n\t" + "ldrd r5, r6, [r1, #8]\n\t" + "ldrd r7, r8, [r1, #16]\n\t" + "ldrd r9, r10, [r1, #24]\n\t" + "adds %[rt], %[rt], %[rt]\n\t" + "adcs r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adc r10, r10, r10\n\t" + "mov r12, #-19\n\t" + "asr r11, r10, #31\n\t" + /* Mask the modulus */ + "and r12, r11, r12\n\t" + "and lr, r11, #0x7fffffff\n\t" + /* Sub modulus (if overflow) */ + "subs %[rt], %[rt], r12\n\t" + "sbcs r4, r4, r11\n\t" + "sbcs r5, r5, r11\n\t" + "sbcs r6, r6, r11\n\t" + "sbcs r7, r7, r11\n\t" + "sbcs r8, r8, r11\n\t" + "sbcs r9, r9, r11\n\t" + "sbc r10, r10, lr\n\t" + "strd %[rt], r4, [r0]\n\t" + "strd r5, r6, [r0, #8]\n\t" + "strd r7, r8, [r0, #16]\n\t" + "strd r9, r10, [r0, #24]\n\t" + "ldr r0, [sp, #4]\n\t" + "ldr r1, [sp]\n\t" + "ldr r2, [sp, #8]\n\t" + /* Add-Sub */ + /* Add */ + "ldrd %[rt], r4, [r2]\n\t" + "ldrd r5, r6, [r0]\n\t" + "adds r7, %[rt], r5\n\t" + "mov r12, #0\n\t" + "adcs r8, r4, r6\n\t" + "adc r12, r12, #0\n\t" + "strd r7, r8, [r0]\n\t" + /* Sub */ + "subs r9, %[rt], r5\n\t" + "mov lr, #0\n\t" + "sbcs r10, r4, r6\n\t" + "adc lr, lr, #0\n\t" + "strd r9, r10, [r1]\n\t" + /* Add */ + "ldrd %[rt], r4, [r2, #8]\n\t" + "ldrd r5, r6, [r0, #8]\n\t" + "adds r12, r12, #-1\n\t" + "adcs r7, %[rt], r5\n\t" + "mov r12, #0\n\t" + "adcs r8, r4, r6\n\t" + "adc r12, r12, #0\n\t" + "strd r7, r8, [r0, #8]\n\t" + /* Sub */ + "adds lr, lr, #-1\n\t" + "sbcs r9, %[rt], r5\n\t" + "mov lr, #0\n\t" + "sbcs r10, r4, r6\n\t" + "adc lr, lr, #0\n\t" + "strd r9, r10, [r1, #8]\n\t" + /* Add */ + "ldrd %[rt], r4, [r2, #16]\n\t" + "ldrd r5, r6, [r0, #16]\n\t" + "adds r12, r12, #-1\n\t" + "adcs r7, %[rt], r5\n\t" + "mov r12, #0\n\t" + "adcs r8, r4, r6\n\t" + "adc r12, r12, #0\n\t" + "strd r7, r8, [r0, #16]\n\t" + /* Sub */ + "adds lr, lr, #-1\n\t" + "sbcs r9, %[rt], r5\n\t" + "mov lr, #0\n\t" + "sbcs r10, r4, r6\n\t" + "adc lr, lr, #0\n\t" + "strd r9, r10, [r1, #16]\n\t" + /* Add */ + "ldrd %[rt], r4, [r2, #24]\n\t" + "ldrd r5, r6, [r0, #24]\n\t" + "adds r12, r12, #-1\n\t" + "adcs r7, %[rt], r5\n\t" + "adc r8, r4, r6\n\t" + /* Sub */ + "adds lr, lr, #-1\n\t" + "sbcs r9, %[rt], r5\n\t" + "sbc r10, r4, r6\n\t" + "mov r12, #-19\n\t" + "asr r11, r8, #31\n\t" + /* Mask the modulus */ + "and r12, r11, r12\n\t" + "and lr, r11, #0x7fffffff\n\t" + /* Sub modulus (if overflow) */ + "ldrd %[rt], r4, [r0]\n\t" + "subs %[rt], %[rt], r12\n\t" + "sbcs r4, r4, r11\n\t" + "strd %[rt], r4, [r0]\n\t" + "ldrd %[rt], r4, [r0, #8]\n\t" + "sbcs %[rt], %[rt], r11\n\t" + "sbcs r4, r4, r11\n\t" + "strd %[rt], r4, [r0, #8]\n\t" + "ldrd %[rt], r4, [r0, #16]\n\t" + "sbcs %[rt], %[rt], r11\n\t" + "sbcs r4, r4, r11\n\t" + "strd %[rt], r4, [r0, #16]\n\t" + "sbcs r7, r7, r11\n\t" + "sbc r8, r8, lr\n\t" + "strd r7, r8, [r0, #24]\n\t" + "mov r12, #-19\n\t" + "asr r11, r10, #31\n\t" + /* Mask the modulus */ + "and r12, r11, r12\n\t" + "and lr, r11, #0x7fffffff\n\t" + /* Add modulus (if underflow) */ + "ldrd %[rt], r4, [r1]\n\t" + "adds %[rt], %[rt], r12\n\t" + "adcs r4, r4, r11\n\t" + "strd %[rt], r4, [r1]\n\t" + "ldrd %[rt], r4, [r1, #8]\n\t" + "adcs %[rt], %[rt], r11\n\t" + "adcs r4, r4, r11\n\t" + "strd %[rt], r4, [r1, #8]\n\t" + "ldrd %[rt], r4, [r1, #16]\n\t" + "adcs %[rt], %[rt], r11\n\t" + "adcs r4, r4, r11\n\t" + "strd %[rt], r4, [r1, #16]\n\t" + "adcs r9, r9, r11\n\t" + "adc r10, r10, lr\n\t" + "strd r9, r10, [r1, #24]\n\t" + "ldr r0, [sp, #12]\n\t" + "ldr r1, [sp, #8]\n\t" + "add r2, sp, #16\n\t" + /* Add-Sub */ + /* Add */ + "ldrd %[rt], r4, [r2]\n\t" + "ldrd r5, r6, [r0]\n\t" + "adds r7, %[rt], r5\n\t" + "mov r12, #0\n\t" + "adcs r8, r4, r6\n\t" + "adc r12, r12, #0\n\t" + "strd r7, r8, [r0]\n\t" + /* Sub */ + "subs r9, %[rt], r5\n\t" + "mov lr, #0\n\t" + "sbcs r10, r4, r6\n\t" + "adc lr, lr, #0\n\t" + "strd r9, r10, [r1]\n\t" + /* Add */ + "ldrd %[rt], r4, [r2, #8]\n\t" + "ldrd r5, r6, [r0, #8]\n\t" + "adds r12, r12, #-1\n\t" + "adcs r7, %[rt], r5\n\t" + "mov r12, #0\n\t" + "adcs r8, r4, r6\n\t" + "adc r12, r12, #0\n\t" + "strd r7, r8, [r0, #8]\n\t" + /* Sub */ + "adds lr, lr, #-1\n\t" + "sbcs r9, %[rt], r5\n\t" + "mov lr, #0\n\t" + "sbcs r10, r4, r6\n\t" + "adc lr, lr, #0\n\t" + "strd r9, r10, [r1, #8]\n\t" + /* Add */ + "ldrd %[rt], r4, [r2, #16]\n\t" + "ldrd r5, r6, [r0, #16]\n\t" + "adds r12, r12, #-1\n\t" + "adcs r7, %[rt], r5\n\t" + "mov r12, #0\n\t" + "adcs r8, r4, r6\n\t" + "adc r12, r12, #0\n\t" + "strd r7, r8, [r0, #16]\n\t" + /* Sub */ + "adds lr, lr, #-1\n\t" + "sbcs r9, %[rt], r5\n\t" + "mov lr, #0\n\t" + "sbcs r10, r4, r6\n\t" + "adc lr, lr, #0\n\t" + "strd r9, r10, [r1, #16]\n\t" + /* Add */ + "ldrd %[rt], r4, [r2, #24]\n\t" + "ldrd r5, r6, [r0, #24]\n\t" + "adds r12, r12, #-1\n\t" + "adcs r7, %[rt], r5\n\t" + "adc r8, r4, r6\n\t" + /* Sub */ + "adds lr, lr, #-1\n\t" + "sbcs r9, %[rt], r5\n\t" + "sbc r10, r4, r6\n\t" + "mov r12, #-19\n\t" + "asr r11, r8, #31\n\t" + /* Mask the modulus */ + "and r12, r11, r12\n\t" + "and lr, r11, #0x7fffffff\n\t" + /* Sub modulus (if overflow) */ + "ldrd %[rt], r4, [r0]\n\t" + "subs %[rt], %[rt], r12\n\t" + "sbcs r4, r4, r11\n\t" + "strd %[rt], r4, [r0]\n\t" + "ldrd %[rt], r4, [r0, #8]\n\t" + "sbcs %[rt], %[rt], r11\n\t" + "sbcs r4, r4, r11\n\t" + "strd %[rt], r4, [r0, #8]\n\t" + "ldrd %[rt], r4, [r0, #16]\n\t" + "sbcs %[rt], %[rt], r11\n\t" + "sbcs r4, r4, r11\n\t" + "strd %[rt], r4, [r0, #16]\n\t" + "sbcs r7, r7, r11\n\t" + "sbc r8, r8, lr\n\t" + "strd r7, r8, [r0, #24]\n\t" + "mov r12, #-19\n\t" + "asr r11, r10, #31\n\t" + /* Mask the modulus */ + "and r12, r11, r12\n\t" + "and lr, r11, #0x7fffffff\n\t" + /* Add modulus (if underflow) */ + "ldrd %[rt], r4, [r1]\n\t" + "adds %[rt], %[rt], r12\n\t" + "adcs r4, r4, r11\n\t" + "strd %[rt], r4, [r1]\n\t" + "ldrd %[rt], r4, [r1, #8]\n\t" + "adcs %[rt], %[rt], r11\n\t" + "adcs r4, r4, r11\n\t" + "strd %[rt], r4, [r1, #8]\n\t" + "ldrd %[rt], r4, [r1, #16]\n\t" + "adcs %[rt], %[rt], r11\n\t" + "adcs r4, r4, r11\n\t" + "strd %[rt], r4, [r1, #16]\n\t" + "adcs r9, r9, r11\n\t" + "adc r10, r10, lr\n\t" + "strd r9, r10, [r1, #24]\n\t" + "add sp, sp, #0x60\n\t" + : [rx] "+r" (rx), [ry] "+r" (ry), [rz] "+r" (rz), [rt] "+r" (rt), [px] "+r" (px), [py] "+r" (py), [pz] "+r" (pz), [pt] "+r" (pt) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + ); + (void)qz; + (void)qt2d; + (void)qyplusx; + (void)qyminusx; +} + +#endif /* WOLFSSL_ARMASM */ +#endif /* !__aarch64__ */ diff --git a/client/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha512-asm.S b/client/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha512-asm.S new file mode 100644 index 0000000..d2b899c --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha512-asm.S @@ -0,0 +1,5335 @@ +/* armv8-32-sha512-asm + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* Generated using (from wolfssl): + * cd ../scripts + * ruby ./sha2/sha512.rb arm32 ../wolfssl/wolfcrypt/src/port/arm/armv8-32-sha512-asm.S + */ + +#ifdef WOLFSSL_ARMASM +#ifndef __aarch64__ +#ifdef WOLFSSL_ARMASM_NO_NEON + .text + .type L_SHA512_transform_len_k, %object + .size L_SHA512_transform_len_k, 640 + .align 3 +L_SHA512_transform_len_k: + .word 0xd728ae22 + .word 0x428a2f98 + .word 0x23ef65cd + .word 0x71374491 + .word 0xec4d3b2f + .word 0xb5c0fbcf + .word 0x8189dbbc + .word 0xe9b5dba5 + .word 0xf348b538 + .word 0x3956c25b + .word 0xb605d019 + .word 0x59f111f1 + .word 0xaf194f9b + .word 0x923f82a4 + .word 0xda6d8118 + .word 0xab1c5ed5 + .word 0xa3030242 + .word 0xd807aa98 + .word 0x45706fbe + .word 0x12835b01 + .word 0x4ee4b28c + .word 0x243185be + .word 0xd5ffb4e2 + .word 0x550c7dc3 + .word 0xf27b896f + .word 0x72be5d74 + .word 0x3b1696b1 + .word 0x80deb1fe + .word 0x25c71235 + .word 0x9bdc06a7 + .word 0xcf692694 + .word 0xc19bf174 + .word 0x9ef14ad2 + .word 0xe49b69c1 + .word 0x384f25e3 + .word 0xefbe4786 + .word 0x8b8cd5b5 + .word 0xfc19dc6 + .word 0x77ac9c65 + .word 0x240ca1cc + .word 0x592b0275 + .word 0x2de92c6f + .word 0x6ea6e483 + .word 0x4a7484aa + .word 0xbd41fbd4 + .word 0x5cb0a9dc + .word 0x831153b5 + .word 0x76f988da + .word 0xee66dfab + .word 0x983e5152 + .word 0x2db43210 + .word 0xa831c66d + .word 0x98fb213f + .word 0xb00327c8 + .word 0xbeef0ee4 + .word 0xbf597fc7 + .word 0x3da88fc2 + .word 0xc6e00bf3 + .word 0x930aa725 + .word 0xd5a79147 + .word 0xe003826f + .word 0x6ca6351 + .word 0xa0e6e70 + .word 0x14292967 + .word 0x46d22ffc + .word 0x27b70a85 + .word 0x5c26c926 + .word 0x2e1b2138 + .word 0x5ac42aed + .word 0x4d2c6dfc + .word 0x9d95b3df + .word 0x53380d13 + .word 0x8baf63de + .word 0x650a7354 + .word 0x3c77b2a8 + .word 0x766a0abb + .word 0x47edaee6 + .word 0x81c2c92e + .word 0x1482353b + .word 0x92722c85 + .word 0x4cf10364 + .word 0xa2bfe8a1 + .word 0xbc423001 + .word 0xa81a664b + .word 0xd0f89791 + .word 0xc24b8b70 + .word 0x654be30 + .word 0xc76c51a3 + .word 0xd6ef5218 + .word 0xd192e819 + .word 0x5565a910 + .word 0xd6990624 + .word 0x5771202a + .word 0xf40e3585 + .word 0x32bbd1b8 + .word 0x106aa070 + .word 0xb8d2d0c8 + .word 0x19a4c116 + .word 0x5141ab53 + .word 0x1e376c08 + .word 0xdf8eeb99 + .word 0x2748774c + .word 0xe19b48a8 + .word 0x34b0bcb5 + .word 0xc5c95a63 + .word 0x391c0cb3 + .word 0xe3418acb + .word 0x4ed8aa4a + .word 0x7763e373 + .word 0x5b9cca4f + .word 0xd6b2b8a3 + .word 0x682e6ff3 + .word 0x5defb2fc + .word 0x748f82ee + .word 0x43172f60 + .word 0x78a5636f + .word 0xa1f0ab72 + .word 0x84c87814 + .word 0x1a6439ec + .word 0x8cc70208 + .word 0x23631e28 + .word 0x90befffa + .word 0xde82bde9 + .word 0xa4506ceb + .word 0xb2c67915 + .word 0xbef9a3f7 + .word 0xe372532b + .word 0xc67178f2 + .word 0xea26619c + .word 0xca273ece + .word 0x21c0c207 + .word 0xd186b8c7 + .word 0xcde0eb1e + .word 0xeada7dd6 + .word 0xee6ed178 + .word 0xf57d4f7f + .word 0x72176fba + .word 0x6f067aa + .word 0xa2c898a6 + .word 0xa637dc5 + .word 0xbef90dae + .word 0x113f9804 + .word 0x131c471b + .word 0x1b710b35 + .word 0x23047d84 + .word 0x28db77f5 + .word 0x40c72493 + .word 0x32caab7b + .word 0x15c9bebc + .word 0x3c9ebe0a + .word 0x9c100d4c + .word 0x431d67c4 + .word 0xcb3e42b6 + .word 0x4cc5d4be + .word 0xfc657e2a + .word 0x597f299c + .word 0x3ad6faec + .word 0x5fcb6fab + .word 0x4a475817 + .word 0x6c44198c + .text + .align 2 + .globl Transform_Sha512_Len + .type Transform_Sha512_Len, %function +Transform_Sha512_Len: + push {r4, r5, r6, r7, r8, r9, r10, lr} + sub sp, sp, #0xc0 + adr r3, L_SHA512_transform_len_k + # Copy digest to add in at end + ldr r12, [r0] + ldr lr, [r0, #4] + ldrd r4, r5, [r0, #8] + ldrd r6, r7, [r0, #16] + ldrd r8, r9, [r0, #24] + str r12, [sp, #128] + str lr, [sp, #132] + strd r4, r5, [sp, #136] + strd r6, r7, [sp, #144] + strd r8, r9, [sp, #152] + ldr r12, [r0, #32] + ldr lr, [r0, #36] + ldrd r4, r5, [r0, #40] + ldrd r6, r7, [r0, #48] + ldrd r8, r9, [r0, #56] + str r12, [sp, #160] + str lr, [sp, #164] + strd r4, r5, [sp, #168] + strd r6, r7, [sp, #176] + strd r8, r9, [sp, #184] + # Start of loop processing a block +L_sha512_len_neon_begin: + # Load, Reverse and Store W + ldr r12, [r1] + ldr lr, [r1, #4] + ldrd r4, r5, [r1, #8] + ldrd r6, r7, [r1, #16] + ldrd r8, r9, [r1, #24] + rev r12, r12 + rev lr, lr + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 + rev r8, r8 + rev r9, r9 + str lr, [sp] + str r12, [sp, #4] + str r5, [sp, #8] + str r4, [sp, #12] + str r7, [sp, #16] + str r6, [sp, #20] + str r9, [sp, #24] + str r8, [sp, #28] + ldr r12, [r1, #32] + ldr lr, [r1, #36] + ldrd r4, r5, [r1, #40] + ldrd r6, r7, [r1, #48] + ldrd r8, r9, [r1, #56] + rev r12, r12 + rev lr, lr + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 + rev r8, r8 + rev r9, r9 + str lr, [sp, #32] + str r12, [sp, #36] + str r5, [sp, #40] + str r4, [sp, #44] + str r7, [sp, #48] + str r6, [sp, #52] + str r9, [sp, #56] + str r8, [sp, #60] + ldr r12, [r1, #64] + ldr lr, [r1, #68] + ldrd r4, r5, [r1, #72] + ldrd r6, r7, [r1, #80] + ldrd r8, r9, [r1, #88] + rev r12, r12 + rev lr, lr + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 + rev r8, r8 + rev r9, r9 + str lr, [sp, #64] + str r12, [sp, #68] + str r5, [sp, #72] + str r4, [sp, #76] + str r7, [sp, #80] + str r6, [sp, #84] + str r9, [sp, #88] + str r8, [sp, #92] + ldr r12, [r1, #96] + ldr lr, [r1, #100] + ldrd r4, r5, [r1, #104] + ldrd r6, r7, [r1, #112] + ldrd r8, r9, [r1, #120] + rev r12, r12 + rev lr, lr + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 + rev r8, r8 + rev r9, r9 + str lr, [sp, #96] + str r12, [sp, #100] + str r5, [sp, #104] + str r4, [sp, #108] + str r7, [sp, #112] + str r6, [sp, #116] + str r9, [sp, #120] + str r8, [sp, #124] + # Pre-calc: b ^ c + ldrd r8, r9, [r0, #8] + ldr r12, [r0, #16] + ldr lr, [r0, #20] + eor r8, r8, r12 + eor r9, r9, lr + mov r10, #4 + # Start of 16 rounds +L_sha512_len_neon_start: + # Round 0 + ldr r12, [r0, #32] + ldr lr, [r0, #36] + lsrs r4, r12, #14 + lsrs r5, lr, #14 + orr r5, r5, r12, lsl #18 + orr r4, r4, lr, lsl #18 + lsrs r6, r12, #18 + lsrs r7, lr, #18 + orr r7, r7, r12, lsl #14 + orr r6, r6, lr, lsl #14 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #23 + lsls r7, lr, #23 + orr r7, r7, r12, lsr #9 + orr r6, r6, lr, lsr #9 + ldr r12, [r0, #56] + ldr lr, [r0, #60] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #56] + str lr, [r0, #60] + ldr r12, [r0, #32] + ldr lr, [r0, #36] + ldrd r4, r5, [r0, #40] + ldrd r6, r7, [r0, #48] + eor r4, r4, r6 + eor r5, r5, r7 + and r4, r4, r12 + and r5, r5, lr + eor r4, r4, r6 + eor r5, r5, r7 + ldr r12, [r0, #56] + ldr lr, [r0, #60] + ldrd r6, r7, [sp] + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r4, r5, [r3] + adds r12, r12, r6 + adc lr, lr, r7 + ldrd r6, r7, [r0, #24] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #56] + str lr, [r0, #60] + adds r6, r6, r12 + adc r7, r7, lr + ldr r12, [r0] + ldr lr, [r0, #4] + strd r6, r7, [r0, #24] + lsrs r4, r12, #28 + lsrs r5, lr, #28 + orr r5, r5, r12, lsl #4 + orr r4, r4, lr, lsl #4 + lsls r6, r12, #30 + lsls r7, lr, #30 + orr r7, r7, r12, lsr #2 + orr r6, r6, lr, lsr #2 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #25 + lsls r7, lr, #25 + orr r7, r7, r12, lsr #7 + orr r6, r6, lr, lsr #7 + ldr r12, [r0, #56] + ldr lr, [r0, #60] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r6, r7, [r0] + ldrd r4, r5, [r0, #8] + str r12, [r0, #56] + str lr, [r0, #60] + eor r6, r6, r4 + eor r7, r7, r5 + and r8, r8, r6 + and r9, r9, r7 + eor r8, r8, r4 + eor r9, r9, r5 + ldrd r4, r5, [r0, #56] + adds r4, r4, r8 + adc r5, r5, r9 + strd r4, r5, [r0, #56] + mov r8, r6 + mov r9, r7 + # Calc new W[0] + ldr r12, [sp, #112] + ldr lr, [sp, #116] + lsrs r4, r12, #19 + lsrs r5, lr, #19 + orr r5, r5, r12, lsl #13 + orr r4, r4, lr, lsl #13 + lsls r6, r12, #3 + lsls r7, lr, #3 + orr r7, r7, r12, lsr #29 + orr r6, r6, lr, lsr #29 + eor r5, r5, r7 + eor r4, r4, r6 + lsrs r6, r12, #6 + lsrs r7, lr, #6 + orr r6, r6, lr, lsl #26 + eor r5, r5, r7 + eor r4, r4, r6 + ldr r12, [sp] + ldr lr, [sp, #4] + ldrd r6, r7, [sp, #72] + adds r12, r12, r4 + adc lr, lr, r5 + adds r12, r12, r6 + adc lr, lr, r7 + str r12, [sp] + str lr, [sp, #4] + ldr r12, [sp, #8] + ldr lr, [sp, #12] + lsrs r4, r12, #1 + lsrs r5, lr, #1 + orr r5, r5, r12, lsl #31 + orr r4, r4, lr, lsl #31 + lsrs r6, r12, #8 + lsrs r7, lr, #8 + orr r7, r7, r12, lsl #24 + orr r6, r6, lr, lsl #24 + eor r5, r5, r7 + eor r4, r4, r6 + lsrs r6, r12, #7 + lsrs r7, lr, #7 + orr r6, r6, lr, lsl #25 + eor r5, r5, r7 + eor r4, r4, r6 + ldr r12, [sp] + ldr lr, [sp, #4] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [sp] + str lr, [sp, #4] + # Round 1 + ldr r12, [r0, #24] + ldr lr, [r0, #28] + lsrs r4, r12, #14 + lsrs r5, lr, #14 + orr r5, r5, r12, lsl #18 + orr r4, r4, lr, lsl #18 + lsrs r6, r12, #18 + lsrs r7, lr, #18 + orr r7, r7, r12, lsl #14 + orr r6, r6, lr, lsl #14 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #23 + lsls r7, lr, #23 + orr r7, r7, r12, lsr #9 + orr r6, r6, lr, lsr #9 + ldr r12, [r0, #48] + ldr lr, [r0, #52] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #48] + str lr, [r0, #52] + ldr r12, [r0, #24] + ldr lr, [r0, #28] + ldrd r4, r5, [r0, #32] + ldrd r6, r7, [r0, #40] + eor r4, r4, r6 + eor r5, r5, r7 + and r4, r4, r12 + and r5, r5, lr + eor r4, r4, r6 + eor r5, r5, r7 + ldr r12, [r0, #48] + ldr lr, [r0, #52] + ldrd r6, r7, [sp, #8] + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r4, r5, [r3, #8] + adds r12, r12, r6 + adc lr, lr, r7 + ldrd r6, r7, [r0, #16] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #48] + str lr, [r0, #52] + adds r6, r6, r12 + adc r7, r7, lr + ldr r12, [r0, #56] + ldr lr, [r0, #60] + strd r6, r7, [r0, #16] + lsrs r4, r12, #28 + lsrs r5, lr, #28 + orr r5, r5, r12, lsl #4 + orr r4, r4, lr, lsl #4 + lsls r6, r12, #30 + lsls r7, lr, #30 + orr r7, r7, r12, lsr #2 + orr r6, r6, lr, lsr #2 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #25 + lsls r7, lr, #25 + orr r7, r7, r12, lsr #7 + orr r6, r6, lr, lsr #7 + ldr r12, [r0, #48] + ldr lr, [r0, #52] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r6, r7, [r0, #56] + ldrd r4, r5, [r0] + str r12, [r0, #48] + str lr, [r0, #52] + eor r6, r6, r4 + eor r7, r7, r5 + and r8, r8, r6 + and r9, r9, r7 + eor r8, r8, r4 + eor r9, r9, r5 + ldrd r4, r5, [r0, #48] + adds r4, r4, r8 + adc r5, r5, r9 + strd r4, r5, [r0, #48] + mov r8, r6 + mov r9, r7 + # Calc new W[1] + ldr r12, [sp, #120] + ldr lr, [sp, #124] + lsrs r4, r12, #19 + lsrs r5, lr, #19 + orr r5, r5, r12, lsl #13 + orr r4, r4, lr, lsl #13 + lsls r6, r12, #3 + lsls r7, lr, #3 + orr r7, r7, r12, lsr #29 + orr r6, r6, lr, lsr #29 + eor r5, r5, r7 + eor r4, r4, r6 + lsrs r6, r12, #6 + lsrs r7, lr, #6 + orr r6, r6, lr, lsl #26 + eor r5, r5, r7 + eor r4, r4, r6 + ldr r12, [sp, #8] + ldr lr, [sp, #12] + ldrd r6, r7, [sp, #80] + adds r12, r12, r4 + adc lr, lr, r5 + adds r12, r12, r6 + adc lr, lr, r7 + str r12, [sp, #8] + str lr, [sp, #12] + ldr r12, [sp, #16] + ldr lr, [sp, #20] + lsrs r4, r12, #1 + lsrs r5, lr, #1 + orr r5, r5, r12, lsl #31 + orr r4, r4, lr, lsl #31 + lsrs r6, r12, #8 + lsrs r7, lr, #8 + orr r7, r7, r12, lsl #24 + orr r6, r6, lr, lsl #24 + eor r5, r5, r7 + eor r4, r4, r6 + lsrs r6, r12, #7 + lsrs r7, lr, #7 + orr r6, r6, lr, lsl #25 + eor r5, r5, r7 + eor r4, r4, r6 + ldr r12, [sp, #8] + ldr lr, [sp, #12] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [sp, #8] + str lr, [sp, #12] + # Round 2 + ldr r12, [r0, #16] + ldr lr, [r0, #20] + lsrs r4, r12, #14 + lsrs r5, lr, #14 + orr r5, r5, r12, lsl #18 + orr r4, r4, lr, lsl #18 + lsrs r6, r12, #18 + lsrs r7, lr, #18 + orr r7, r7, r12, lsl #14 + orr r6, r6, lr, lsl #14 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #23 + lsls r7, lr, #23 + orr r7, r7, r12, lsr #9 + orr r6, r6, lr, lsr #9 + ldr r12, [r0, #40] + ldr lr, [r0, #44] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #40] + str lr, [r0, #44] + ldr r12, [r0, #16] + ldr lr, [r0, #20] + ldrd r4, r5, [r0, #24] + ldrd r6, r7, [r0, #32] + eor r4, r4, r6 + eor r5, r5, r7 + and r4, r4, r12 + and r5, r5, lr + eor r4, r4, r6 + eor r5, r5, r7 + ldr r12, [r0, #40] + ldr lr, [r0, #44] + ldrd r6, r7, [sp, #16] + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r4, r5, [r3, #16] + adds r12, r12, r6 + adc lr, lr, r7 + ldrd r6, r7, [r0, #8] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #40] + str lr, [r0, #44] + adds r6, r6, r12 + adc r7, r7, lr + ldr r12, [r0, #48] + ldr lr, [r0, #52] + strd r6, r7, [r0, #8] + lsrs r4, r12, #28 + lsrs r5, lr, #28 + orr r5, r5, r12, lsl #4 + orr r4, r4, lr, lsl #4 + lsls r6, r12, #30 + lsls r7, lr, #30 + orr r7, r7, r12, lsr #2 + orr r6, r6, lr, lsr #2 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #25 + lsls r7, lr, #25 + orr r7, r7, r12, lsr #7 + orr r6, r6, lr, lsr #7 + ldr r12, [r0, #40] + ldr lr, [r0, #44] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r6, r7, [r0, #48] + ldrd r4, r5, [r0, #56] + str r12, [r0, #40] + str lr, [r0, #44] + eor r6, r6, r4 + eor r7, r7, r5 + and r8, r8, r6 + and r9, r9, r7 + eor r8, r8, r4 + eor r9, r9, r5 + ldrd r4, r5, [r0, #40] + adds r4, r4, r8 + adc r5, r5, r9 + strd r4, r5, [r0, #40] + mov r8, r6 + mov r9, r7 + # Calc new W[2] + ldr r12, [sp] + ldr lr, [sp, #4] + lsrs r4, r12, #19 + lsrs r5, lr, #19 + orr r5, r5, r12, lsl #13 + orr r4, r4, lr, lsl #13 + lsls r6, r12, #3 + lsls r7, lr, #3 + orr r7, r7, r12, lsr #29 + orr r6, r6, lr, lsr #29 + eor r5, r5, r7 + eor r4, r4, r6 + lsrs r6, r12, #6 + lsrs r7, lr, #6 + orr r6, r6, lr, lsl #26 + eor r5, r5, r7 + eor r4, r4, r6 + ldr r12, [sp, #16] + ldr lr, [sp, #20] + ldrd r6, r7, [sp, #88] + adds r12, r12, r4 + adc lr, lr, r5 + adds r12, r12, r6 + adc lr, lr, r7 + str r12, [sp, #16] + str lr, [sp, #20] + ldr r12, [sp, #24] + ldr lr, [sp, #28] + lsrs r4, r12, #1 + lsrs r5, lr, #1 + orr r5, r5, r12, lsl #31 + orr r4, r4, lr, lsl #31 + lsrs r6, r12, #8 + lsrs r7, lr, #8 + orr r7, r7, r12, lsl #24 + orr r6, r6, lr, lsl #24 + eor r5, r5, r7 + eor r4, r4, r6 + lsrs r6, r12, #7 + lsrs r7, lr, #7 + orr r6, r6, lr, lsl #25 + eor r5, r5, r7 + eor r4, r4, r6 + ldr r12, [sp, #16] + ldr lr, [sp, #20] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [sp, #16] + str lr, [sp, #20] + # Round 3 + ldr r12, [r0, #8] + ldr lr, [r0, #12] + lsrs r4, r12, #14 + lsrs r5, lr, #14 + orr r5, r5, r12, lsl #18 + orr r4, r4, lr, lsl #18 + lsrs r6, r12, #18 + lsrs r7, lr, #18 + orr r7, r7, r12, lsl #14 + orr r6, r6, lr, lsl #14 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #23 + lsls r7, lr, #23 + orr r7, r7, r12, lsr #9 + orr r6, r6, lr, lsr #9 + ldr r12, [r0, #32] + ldr lr, [r0, #36] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #32] + str lr, [r0, #36] + ldr r12, [r0, #8] + ldr lr, [r0, #12] + ldrd r4, r5, [r0, #16] + ldrd r6, r7, [r0, #24] + eor r4, r4, r6 + eor r5, r5, r7 + and r4, r4, r12 + and r5, r5, lr + eor r4, r4, r6 + eor r5, r5, r7 + ldr r12, [r0, #32] + ldr lr, [r0, #36] + ldrd r6, r7, [sp, #24] + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r4, r5, [r3, #24] + adds r12, r12, r6 + adc lr, lr, r7 + ldrd r6, r7, [r0] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #32] + str lr, [r0, #36] + adds r6, r6, r12 + adc r7, r7, lr + ldr r12, [r0, #40] + ldr lr, [r0, #44] + strd r6, r7, [r0] + lsrs r4, r12, #28 + lsrs r5, lr, #28 + orr r5, r5, r12, lsl #4 + orr r4, r4, lr, lsl #4 + lsls r6, r12, #30 + lsls r7, lr, #30 + orr r7, r7, r12, lsr #2 + orr r6, r6, lr, lsr #2 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #25 + lsls r7, lr, #25 + orr r7, r7, r12, lsr #7 + orr r6, r6, lr, lsr #7 + ldr r12, [r0, #32] + ldr lr, [r0, #36] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r6, r7, [r0, #40] + ldrd r4, r5, [r0, #48] + str r12, [r0, #32] + str lr, [r0, #36] + eor r6, r6, r4 + eor r7, r7, r5 + and r8, r8, r6 + and r9, r9, r7 + eor r8, r8, r4 + eor r9, r9, r5 + ldrd r4, r5, [r0, #32] + adds r4, r4, r8 + adc r5, r5, r9 + strd r4, r5, [r0, #32] + mov r8, r6 + mov r9, r7 + # Calc new W[3] + ldr r12, [sp, #8] + ldr lr, [sp, #12] + lsrs r4, r12, #19 + lsrs r5, lr, #19 + orr r5, r5, r12, lsl #13 + orr r4, r4, lr, lsl #13 + lsls r6, r12, #3 + lsls r7, lr, #3 + orr r7, r7, r12, lsr #29 + orr r6, r6, lr, lsr #29 + eor r5, r5, r7 + eor r4, r4, r6 + lsrs r6, r12, #6 + lsrs r7, lr, #6 + orr r6, r6, lr, lsl #26 + eor r5, r5, r7 + eor r4, r4, r6 + ldr r12, [sp, #24] + ldr lr, [sp, #28] + ldrd r6, r7, [sp, #96] + adds r12, r12, r4 + adc lr, lr, r5 + adds r12, r12, r6 + adc lr, lr, r7 + str r12, [sp, #24] + str lr, [sp, #28] + ldr r12, [sp, #32] + ldr lr, [sp, #36] + lsrs r4, r12, #1 + lsrs r5, lr, #1 + orr r5, r5, r12, lsl #31 + orr r4, r4, lr, lsl #31 + lsrs r6, r12, #8 + lsrs r7, lr, #8 + orr r7, r7, r12, lsl #24 + orr r6, r6, lr, lsl #24 + eor r5, r5, r7 + eor r4, r4, r6 + lsrs r6, r12, #7 + lsrs r7, lr, #7 + orr r6, r6, lr, lsl #25 + eor r5, r5, r7 + eor r4, r4, r6 + ldr r12, [sp, #24] + ldr lr, [sp, #28] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [sp, #24] + str lr, [sp, #28] + # Round 4 + ldr r12, [r0] + ldr lr, [r0, #4] + lsrs r4, r12, #14 + lsrs r5, lr, #14 + orr r5, r5, r12, lsl #18 + orr r4, r4, lr, lsl #18 + lsrs r6, r12, #18 + lsrs r7, lr, #18 + orr r7, r7, r12, lsl #14 + orr r6, r6, lr, lsl #14 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #23 + lsls r7, lr, #23 + orr r7, r7, r12, lsr #9 + orr r6, r6, lr, lsr #9 + ldr r12, [r0, #24] + ldr lr, [r0, #28] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #24] + str lr, [r0, #28] + ldr r12, [r0] + ldr lr, [r0, #4] + ldrd r4, r5, [r0, #8] + ldrd r6, r7, [r0, #16] + eor r4, r4, r6 + eor r5, r5, r7 + and r4, r4, r12 + and r5, r5, lr + eor r4, r4, r6 + eor r5, r5, r7 + ldr r12, [r0, #24] + ldr lr, [r0, #28] + ldrd r6, r7, [sp, #32] + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r4, r5, [r3, #32] + adds r12, r12, r6 + adc lr, lr, r7 + ldrd r6, r7, [r0, #56] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #24] + str lr, [r0, #28] + adds r6, r6, r12 + adc r7, r7, lr + ldr r12, [r0, #32] + ldr lr, [r0, #36] + strd r6, r7, [r0, #56] + lsrs r4, r12, #28 + lsrs r5, lr, #28 + orr r5, r5, r12, lsl #4 + orr r4, r4, lr, lsl #4 + lsls r6, r12, #30 + lsls r7, lr, #30 + orr r7, r7, r12, lsr #2 + orr r6, r6, lr, lsr #2 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #25 + lsls r7, lr, #25 + orr r7, r7, r12, lsr #7 + orr r6, r6, lr, lsr #7 + ldr r12, [r0, #24] + ldr lr, [r0, #28] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r6, r7, [r0, #32] + ldrd r4, r5, [r0, #40] + str r12, [r0, #24] + str lr, [r0, #28] + eor r6, r6, r4 + eor r7, r7, r5 + and r8, r8, r6 + and r9, r9, r7 + eor r8, r8, r4 + eor r9, r9, r5 + ldrd r4, r5, [r0, #24] + adds r4, r4, r8 + adc r5, r5, r9 + strd r4, r5, [r0, #24] + mov r8, r6 + mov r9, r7 + # Calc new W[4] + ldr r12, [sp, #16] + ldr lr, [sp, #20] + lsrs r4, r12, #19 + lsrs r5, lr, #19 + orr r5, r5, r12, lsl #13 + orr r4, r4, lr, lsl #13 + lsls r6, r12, #3 + lsls r7, lr, #3 + orr r7, r7, r12, lsr #29 + orr r6, r6, lr, lsr #29 + eor r5, r5, r7 + eor r4, r4, r6 + lsrs r6, r12, #6 + lsrs r7, lr, #6 + orr r6, r6, lr, lsl #26 + eor r5, r5, r7 + eor r4, r4, r6 + ldr r12, [sp, #32] + ldr lr, [sp, #36] + ldrd r6, r7, [sp, #104] + adds r12, r12, r4 + adc lr, lr, r5 + adds r12, r12, r6 + adc lr, lr, r7 + str r12, [sp, #32] + str lr, [sp, #36] + ldr r12, [sp, #40] + ldr lr, [sp, #44] + lsrs r4, r12, #1 + lsrs r5, lr, #1 + orr r5, r5, r12, lsl #31 + orr r4, r4, lr, lsl #31 + lsrs r6, r12, #8 + lsrs r7, lr, #8 + orr r7, r7, r12, lsl #24 + orr r6, r6, lr, lsl #24 + eor r5, r5, r7 + eor r4, r4, r6 + lsrs r6, r12, #7 + lsrs r7, lr, #7 + orr r6, r6, lr, lsl #25 + eor r5, r5, r7 + eor r4, r4, r6 + ldr r12, [sp, #32] + ldr lr, [sp, #36] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [sp, #32] + str lr, [sp, #36] + # Round 5 + ldr r12, [r0, #56] + ldr lr, [r0, #60] + lsrs r4, r12, #14 + lsrs r5, lr, #14 + orr r5, r5, r12, lsl #18 + orr r4, r4, lr, lsl #18 + lsrs r6, r12, #18 + lsrs r7, lr, #18 + orr r7, r7, r12, lsl #14 + orr r6, r6, lr, lsl #14 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #23 + lsls r7, lr, #23 + orr r7, r7, r12, lsr #9 + orr r6, r6, lr, lsr #9 + ldr r12, [r0, #16] + ldr lr, [r0, #20] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #16] + str lr, [r0, #20] + ldr r12, [r0, #56] + ldr lr, [r0, #60] + ldrd r4, r5, [r0] + ldrd r6, r7, [r0, #8] + eor r4, r4, r6 + eor r5, r5, r7 + and r4, r4, r12 + and r5, r5, lr + eor r4, r4, r6 + eor r5, r5, r7 + ldr r12, [r0, #16] + ldr lr, [r0, #20] + ldrd r6, r7, [sp, #40] + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r4, r5, [r3, #40] + adds r12, r12, r6 + adc lr, lr, r7 + ldrd r6, r7, [r0, #48] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #16] + str lr, [r0, #20] + adds r6, r6, r12 + adc r7, r7, lr + ldr r12, [r0, #24] + ldr lr, [r0, #28] + strd r6, r7, [r0, #48] + lsrs r4, r12, #28 + lsrs r5, lr, #28 + orr r5, r5, r12, lsl #4 + orr r4, r4, lr, lsl #4 + lsls r6, r12, #30 + lsls r7, lr, #30 + orr r7, r7, r12, lsr #2 + orr r6, r6, lr, lsr #2 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #25 + lsls r7, lr, #25 + orr r7, r7, r12, lsr #7 + orr r6, r6, lr, lsr #7 + ldr r12, [r0, #16] + ldr lr, [r0, #20] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r6, r7, [r0, #24] + ldrd r4, r5, [r0, #32] + str r12, [r0, #16] + str lr, [r0, #20] + eor r6, r6, r4 + eor r7, r7, r5 + and r8, r8, r6 + and r9, r9, r7 + eor r8, r8, r4 + eor r9, r9, r5 + ldrd r4, r5, [r0, #16] + adds r4, r4, r8 + adc r5, r5, r9 + strd r4, r5, [r0, #16] + mov r8, r6 + mov r9, r7 + # Calc new W[5] + ldr r12, [sp, #24] + ldr lr, [sp, #28] + lsrs r4, r12, #19 + lsrs r5, lr, #19 + orr r5, r5, r12, lsl #13 + orr r4, r4, lr, lsl #13 + lsls r6, r12, #3 + lsls r7, lr, #3 + orr r7, r7, r12, lsr #29 + orr r6, r6, lr, lsr #29 + eor r5, r5, r7 + eor r4, r4, r6 + lsrs r6, r12, #6 + lsrs r7, lr, #6 + orr r6, r6, lr, lsl #26 + eor r5, r5, r7 + eor r4, r4, r6 + ldr r12, [sp, #40] + ldr lr, [sp, #44] + ldrd r6, r7, [sp, #112] + adds r12, r12, r4 + adc lr, lr, r5 + adds r12, r12, r6 + adc lr, lr, r7 + str r12, [sp, #40] + str lr, [sp, #44] + ldr r12, [sp, #48] + ldr lr, [sp, #52] + lsrs r4, r12, #1 + lsrs r5, lr, #1 + orr r5, r5, r12, lsl #31 + orr r4, r4, lr, lsl #31 + lsrs r6, r12, #8 + lsrs r7, lr, #8 + orr r7, r7, r12, lsl #24 + orr r6, r6, lr, lsl #24 + eor r5, r5, r7 + eor r4, r4, r6 + lsrs r6, r12, #7 + lsrs r7, lr, #7 + orr r6, r6, lr, lsl #25 + eor r5, r5, r7 + eor r4, r4, r6 + ldr r12, [sp, #40] + ldr lr, [sp, #44] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [sp, #40] + str lr, [sp, #44] + # Round 6 + ldr r12, [r0, #48] + ldr lr, [r0, #52] + lsrs r4, r12, #14 + lsrs r5, lr, #14 + orr r5, r5, r12, lsl #18 + orr r4, r4, lr, lsl #18 + lsrs r6, r12, #18 + lsrs r7, lr, #18 + orr r7, r7, r12, lsl #14 + orr r6, r6, lr, lsl #14 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #23 + lsls r7, lr, #23 + orr r7, r7, r12, lsr #9 + orr r6, r6, lr, lsr #9 + ldr r12, [r0, #8] + ldr lr, [r0, #12] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #8] + str lr, [r0, #12] + ldr r12, [r0, #48] + ldr lr, [r0, #52] + ldrd r4, r5, [r0, #56] + ldrd r6, r7, [r0] + eor r4, r4, r6 + eor r5, r5, r7 + and r4, r4, r12 + and r5, r5, lr + eor r4, r4, r6 + eor r5, r5, r7 + ldr r12, [r0, #8] + ldr lr, [r0, #12] + ldrd r6, r7, [sp, #48] + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r4, r5, [r3, #48] + adds r12, r12, r6 + adc lr, lr, r7 + ldrd r6, r7, [r0, #40] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #8] + str lr, [r0, #12] + adds r6, r6, r12 + adc r7, r7, lr + ldr r12, [r0, #16] + ldr lr, [r0, #20] + strd r6, r7, [r0, #40] + lsrs r4, r12, #28 + lsrs r5, lr, #28 + orr r5, r5, r12, lsl #4 + orr r4, r4, lr, lsl #4 + lsls r6, r12, #30 + lsls r7, lr, #30 + orr r7, r7, r12, lsr #2 + orr r6, r6, lr, lsr #2 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #25 + lsls r7, lr, #25 + orr r7, r7, r12, lsr #7 + orr r6, r6, lr, lsr #7 + ldr r12, [r0, #8] + ldr lr, [r0, #12] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r6, r7, [r0, #16] + ldrd r4, r5, [r0, #24] + str r12, [r0, #8] + str lr, [r0, #12] + eor r6, r6, r4 + eor r7, r7, r5 + and r8, r8, r6 + and r9, r9, r7 + eor r8, r8, r4 + eor r9, r9, r5 + ldrd r4, r5, [r0, #8] + adds r4, r4, r8 + adc r5, r5, r9 + strd r4, r5, [r0, #8] + mov r8, r6 + mov r9, r7 + # Calc new W[6] + ldr r12, [sp, #32] + ldr lr, [sp, #36] + lsrs r4, r12, #19 + lsrs r5, lr, #19 + orr r5, r5, r12, lsl #13 + orr r4, r4, lr, lsl #13 + lsls r6, r12, #3 + lsls r7, lr, #3 + orr r7, r7, r12, lsr #29 + orr r6, r6, lr, lsr #29 + eor r5, r5, r7 + eor r4, r4, r6 + lsrs r6, r12, #6 + lsrs r7, lr, #6 + orr r6, r6, lr, lsl #26 + eor r5, r5, r7 + eor r4, r4, r6 + ldr r12, [sp, #48] + ldr lr, [sp, #52] + ldrd r6, r7, [sp, #120] + adds r12, r12, r4 + adc lr, lr, r5 + adds r12, r12, r6 + adc lr, lr, r7 + str r12, [sp, #48] + str lr, [sp, #52] + ldr r12, [sp, #56] + ldr lr, [sp, #60] + lsrs r4, r12, #1 + lsrs r5, lr, #1 + orr r5, r5, r12, lsl #31 + orr r4, r4, lr, lsl #31 + lsrs r6, r12, #8 + lsrs r7, lr, #8 + orr r7, r7, r12, lsl #24 + orr r6, r6, lr, lsl #24 + eor r5, r5, r7 + eor r4, r4, r6 + lsrs r6, r12, #7 + lsrs r7, lr, #7 + orr r6, r6, lr, lsl #25 + eor r5, r5, r7 + eor r4, r4, r6 + ldr r12, [sp, #48] + ldr lr, [sp, #52] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [sp, #48] + str lr, [sp, #52] + # Round 7 + ldr r12, [r0, #40] + ldr lr, [r0, #44] + lsrs r4, r12, #14 + lsrs r5, lr, #14 + orr r5, r5, r12, lsl #18 + orr r4, r4, lr, lsl #18 + lsrs r6, r12, #18 + lsrs r7, lr, #18 + orr r7, r7, r12, lsl #14 + orr r6, r6, lr, lsl #14 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #23 + lsls r7, lr, #23 + orr r7, r7, r12, lsr #9 + orr r6, r6, lr, lsr #9 + ldr r12, [r0] + ldr lr, [r0, #4] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0] + str lr, [r0, #4] + ldr r12, [r0, #40] + ldr lr, [r0, #44] + ldrd r4, r5, [r0, #48] + ldrd r6, r7, [r0, #56] + eor r4, r4, r6 + eor r5, r5, r7 + and r4, r4, r12 + and r5, r5, lr + eor r4, r4, r6 + eor r5, r5, r7 + ldr r12, [r0] + ldr lr, [r0, #4] + ldrd r6, r7, [sp, #56] + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r4, r5, [r3, #56] + adds r12, r12, r6 + adc lr, lr, r7 + ldrd r6, r7, [r0, #32] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0] + str lr, [r0, #4] + adds r6, r6, r12 + adc r7, r7, lr + ldr r12, [r0, #8] + ldr lr, [r0, #12] + strd r6, r7, [r0, #32] + lsrs r4, r12, #28 + lsrs r5, lr, #28 + orr r5, r5, r12, lsl #4 + orr r4, r4, lr, lsl #4 + lsls r6, r12, #30 + lsls r7, lr, #30 + orr r7, r7, r12, lsr #2 + orr r6, r6, lr, lsr #2 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #25 + lsls r7, lr, #25 + orr r7, r7, r12, lsr #7 + orr r6, r6, lr, lsr #7 + ldr r12, [r0] + ldr lr, [r0, #4] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r6, r7, [r0, #8] + ldrd r4, r5, [r0, #16] + str r12, [r0] + str lr, [r0, #4] + eor r6, r6, r4 + eor r7, r7, r5 + and r8, r8, r6 + and r9, r9, r7 + eor r8, r8, r4 + eor r9, r9, r5 + ldrd r4, r5, [r0] + adds r4, r4, r8 + adc r5, r5, r9 + strd r4, r5, [r0] + mov r8, r6 + mov r9, r7 + # Calc new W[7] + ldr r12, [sp, #40] + ldr lr, [sp, #44] + lsrs r4, r12, #19 + lsrs r5, lr, #19 + orr r5, r5, r12, lsl #13 + orr r4, r4, lr, lsl #13 + lsls r6, r12, #3 + lsls r7, lr, #3 + orr r7, r7, r12, lsr #29 + orr r6, r6, lr, lsr #29 + eor r5, r5, r7 + eor r4, r4, r6 + lsrs r6, r12, #6 + lsrs r7, lr, #6 + orr r6, r6, lr, lsl #26 + eor r5, r5, r7 + eor r4, r4, r6 + ldr r12, [sp, #56] + ldr lr, [sp, #60] + ldrd r6, r7, [sp] + adds r12, r12, r4 + adc lr, lr, r5 + adds r12, r12, r6 + adc lr, lr, r7 + str r12, [sp, #56] + str lr, [sp, #60] + ldr r12, [sp, #64] + ldr lr, [sp, #68] + lsrs r4, r12, #1 + lsrs r5, lr, #1 + orr r5, r5, r12, lsl #31 + orr r4, r4, lr, lsl #31 + lsrs r6, r12, #8 + lsrs r7, lr, #8 + orr r7, r7, r12, lsl #24 + orr r6, r6, lr, lsl #24 + eor r5, r5, r7 + eor r4, r4, r6 + lsrs r6, r12, #7 + lsrs r7, lr, #7 + orr r6, r6, lr, lsl #25 + eor r5, r5, r7 + eor r4, r4, r6 + ldr r12, [sp, #56] + ldr lr, [sp, #60] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [sp, #56] + str lr, [sp, #60] + # Round 8 + ldr r12, [r0, #32] + ldr lr, [r0, #36] + lsrs r4, r12, #14 + lsrs r5, lr, #14 + orr r5, r5, r12, lsl #18 + orr r4, r4, lr, lsl #18 + lsrs r6, r12, #18 + lsrs r7, lr, #18 + orr r7, r7, r12, lsl #14 + orr r6, r6, lr, lsl #14 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #23 + lsls r7, lr, #23 + orr r7, r7, r12, lsr #9 + orr r6, r6, lr, lsr #9 + ldr r12, [r0, #56] + ldr lr, [r0, #60] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #56] + str lr, [r0, #60] + ldr r12, [r0, #32] + ldr lr, [r0, #36] + ldrd r4, r5, [r0, #40] + ldrd r6, r7, [r0, #48] + eor r4, r4, r6 + eor r5, r5, r7 + and r4, r4, r12 + and r5, r5, lr + eor r4, r4, r6 + eor r5, r5, r7 + ldr r12, [r0, #56] + ldr lr, [r0, #60] + ldrd r6, r7, [sp, #64] + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r4, r5, [r3, #64] + adds r12, r12, r6 + adc lr, lr, r7 + ldrd r6, r7, [r0, #24] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #56] + str lr, [r0, #60] + adds r6, r6, r12 + adc r7, r7, lr + ldr r12, [r0] + ldr lr, [r0, #4] + strd r6, r7, [r0, #24] + lsrs r4, r12, #28 + lsrs r5, lr, #28 + orr r5, r5, r12, lsl #4 + orr r4, r4, lr, lsl #4 + lsls r6, r12, #30 + lsls r7, lr, #30 + orr r7, r7, r12, lsr #2 + orr r6, r6, lr, lsr #2 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #25 + lsls r7, lr, #25 + orr r7, r7, r12, lsr #7 + orr r6, r6, lr, lsr #7 + ldr r12, [r0, #56] + ldr lr, [r0, #60] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r6, r7, [r0] + ldrd r4, r5, [r0, #8] + str r12, [r0, #56] + str lr, [r0, #60] + eor r6, r6, r4 + eor r7, r7, r5 + and r8, r8, r6 + and r9, r9, r7 + eor r8, r8, r4 + eor r9, r9, r5 + ldrd r4, r5, [r0, #56] + adds r4, r4, r8 + adc r5, r5, r9 + strd r4, r5, [r0, #56] + mov r8, r6 + mov r9, r7 + # Calc new W[8] + ldr r12, [sp, #48] + ldr lr, [sp, #52] + lsrs r4, r12, #19 + lsrs r5, lr, #19 + orr r5, r5, r12, lsl #13 + orr r4, r4, lr, lsl #13 + lsls r6, r12, #3 + lsls r7, lr, #3 + orr r7, r7, r12, lsr #29 + orr r6, r6, lr, lsr #29 + eor r5, r5, r7 + eor r4, r4, r6 + lsrs r6, r12, #6 + lsrs r7, lr, #6 + orr r6, r6, lr, lsl #26 + eor r5, r5, r7 + eor r4, r4, r6 + ldr r12, [sp, #64] + ldr lr, [sp, #68] + ldrd r6, r7, [sp, #8] + adds r12, r12, r4 + adc lr, lr, r5 + adds r12, r12, r6 + adc lr, lr, r7 + str r12, [sp, #64] + str lr, [sp, #68] + ldr r12, [sp, #72] + ldr lr, [sp, #76] + lsrs r4, r12, #1 + lsrs r5, lr, #1 + orr r5, r5, r12, lsl #31 + orr r4, r4, lr, lsl #31 + lsrs r6, r12, #8 + lsrs r7, lr, #8 + orr r7, r7, r12, lsl #24 + orr r6, r6, lr, lsl #24 + eor r5, r5, r7 + eor r4, r4, r6 + lsrs r6, r12, #7 + lsrs r7, lr, #7 + orr r6, r6, lr, lsl #25 + eor r5, r5, r7 + eor r4, r4, r6 + ldr r12, [sp, #64] + ldr lr, [sp, #68] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [sp, #64] + str lr, [sp, #68] + # Round 9 + ldr r12, [r0, #24] + ldr lr, [r0, #28] + lsrs r4, r12, #14 + lsrs r5, lr, #14 + orr r5, r5, r12, lsl #18 + orr r4, r4, lr, lsl #18 + lsrs r6, r12, #18 + lsrs r7, lr, #18 + orr r7, r7, r12, lsl #14 + orr r6, r6, lr, lsl #14 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #23 + lsls r7, lr, #23 + orr r7, r7, r12, lsr #9 + orr r6, r6, lr, lsr #9 + ldr r12, [r0, #48] + ldr lr, [r0, #52] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #48] + str lr, [r0, #52] + ldr r12, [r0, #24] + ldr lr, [r0, #28] + ldrd r4, r5, [r0, #32] + ldrd r6, r7, [r0, #40] + eor r4, r4, r6 + eor r5, r5, r7 + and r4, r4, r12 + and r5, r5, lr + eor r4, r4, r6 + eor r5, r5, r7 + ldr r12, [r0, #48] + ldr lr, [r0, #52] + ldrd r6, r7, [sp, #72] + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r4, r5, [r3, #72] + adds r12, r12, r6 + adc lr, lr, r7 + ldrd r6, r7, [r0, #16] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #48] + str lr, [r0, #52] + adds r6, r6, r12 + adc r7, r7, lr + ldr r12, [r0, #56] + ldr lr, [r0, #60] + strd r6, r7, [r0, #16] + lsrs r4, r12, #28 + lsrs r5, lr, #28 + orr r5, r5, r12, lsl #4 + orr r4, r4, lr, lsl #4 + lsls r6, r12, #30 + lsls r7, lr, #30 + orr r7, r7, r12, lsr #2 + orr r6, r6, lr, lsr #2 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #25 + lsls r7, lr, #25 + orr r7, r7, r12, lsr #7 + orr r6, r6, lr, lsr #7 + ldr r12, [r0, #48] + ldr lr, [r0, #52] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r6, r7, [r0, #56] + ldrd r4, r5, [r0] + str r12, [r0, #48] + str lr, [r0, #52] + eor r6, r6, r4 + eor r7, r7, r5 + and r8, r8, r6 + and r9, r9, r7 + eor r8, r8, r4 + eor r9, r9, r5 + ldrd r4, r5, [r0, #48] + adds r4, r4, r8 + adc r5, r5, r9 + strd r4, r5, [r0, #48] + mov r8, r6 + mov r9, r7 + # Calc new W[9] + ldr r12, [sp, #56] + ldr lr, [sp, #60] + lsrs r4, r12, #19 + lsrs r5, lr, #19 + orr r5, r5, r12, lsl #13 + orr r4, r4, lr, lsl #13 + lsls r6, r12, #3 + lsls r7, lr, #3 + orr r7, r7, r12, lsr #29 + orr r6, r6, lr, lsr #29 + eor r5, r5, r7 + eor r4, r4, r6 + lsrs r6, r12, #6 + lsrs r7, lr, #6 + orr r6, r6, lr, lsl #26 + eor r5, r5, r7 + eor r4, r4, r6 + ldr r12, [sp, #72] + ldr lr, [sp, #76] + ldrd r6, r7, [sp, #16] + adds r12, r12, r4 + adc lr, lr, r5 + adds r12, r12, r6 + adc lr, lr, r7 + str r12, [sp, #72] + str lr, [sp, #76] + ldr r12, [sp, #80] + ldr lr, [sp, #84] + lsrs r4, r12, #1 + lsrs r5, lr, #1 + orr r5, r5, r12, lsl #31 + orr r4, r4, lr, lsl #31 + lsrs r6, r12, #8 + lsrs r7, lr, #8 + orr r7, r7, r12, lsl #24 + orr r6, r6, lr, lsl #24 + eor r5, r5, r7 + eor r4, r4, r6 + lsrs r6, r12, #7 + lsrs r7, lr, #7 + orr r6, r6, lr, lsl #25 + eor r5, r5, r7 + eor r4, r4, r6 + ldr r12, [sp, #72] + ldr lr, [sp, #76] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [sp, #72] + str lr, [sp, #76] + # Round 10 + ldr r12, [r0, #16] + ldr lr, [r0, #20] + lsrs r4, r12, #14 + lsrs r5, lr, #14 + orr r5, r5, r12, lsl #18 + orr r4, r4, lr, lsl #18 + lsrs r6, r12, #18 + lsrs r7, lr, #18 + orr r7, r7, r12, lsl #14 + orr r6, r6, lr, lsl #14 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #23 + lsls r7, lr, #23 + orr r7, r7, r12, lsr #9 + orr r6, r6, lr, lsr #9 + ldr r12, [r0, #40] + ldr lr, [r0, #44] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #40] + str lr, [r0, #44] + ldr r12, [r0, #16] + ldr lr, [r0, #20] + ldrd r4, r5, [r0, #24] + ldrd r6, r7, [r0, #32] + eor r4, r4, r6 + eor r5, r5, r7 + and r4, r4, r12 + and r5, r5, lr + eor r4, r4, r6 + eor r5, r5, r7 + ldr r12, [r0, #40] + ldr lr, [r0, #44] + ldrd r6, r7, [sp, #80] + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r4, r5, [r3, #80] + adds r12, r12, r6 + adc lr, lr, r7 + ldrd r6, r7, [r0, #8] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #40] + str lr, [r0, #44] + adds r6, r6, r12 + adc r7, r7, lr + ldr r12, [r0, #48] + ldr lr, [r0, #52] + strd r6, r7, [r0, #8] + lsrs r4, r12, #28 + lsrs r5, lr, #28 + orr r5, r5, r12, lsl #4 + orr r4, r4, lr, lsl #4 + lsls r6, r12, #30 + lsls r7, lr, #30 + orr r7, r7, r12, lsr #2 + orr r6, r6, lr, lsr #2 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #25 + lsls r7, lr, #25 + orr r7, r7, r12, lsr #7 + orr r6, r6, lr, lsr #7 + ldr r12, [r0, #40] + ldr lr, [r0, #44] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r6, r7, [r0, #48] + ldrd r4, r5, [r0, #56] + str r12, [r0, #40] + str lr, [r0, #44] + eor r6, r6, r4 + eor r7, r7, r5 + and r8, r8, r6 + and r9, r9, r7 + eor r8, r8, r4 + eor r9, r9, r5 + ldrd r4, r5, [r0, #40] + adds r4, r4, r8 + adc r5, r5, r9 + strd r4, r5, [r0, #40] + mov r8, r6 + mov r9, r7 + # Calc new W[10] + ldr r12, [sp, #64] + ldr lr, [sp, #68] + lsrs r4, r12, #19 + lsrs r5, lr, #19 + orr r5, r5, r12, lsl #13 + orr r4, r4, lr, lsl #13 + lsls r6, r12, #3 + lsls r7, lr, #3 + orr r7, r7, r12, lsr #29 + orr r6, r6, lr, lsr #29 + eor r5, r5, r7 + eor r4, r4, r6 + lsrs r6, r12, #6 + lsrs r7, lr, #6 + orr r6, r6, lr, lsl #26 + eor r5, r5, r7 + eor r4, r4, r6 + ldr r12, [sp, #80] + ldr lr, [sp, #84] + ldrd r6, r7, [sp, #24] + adds r12, r12, r4 + adc lr, lr, r5 + adds r12, r12, r6 + adc lr, lr, r7 + str r12, [sp, #80] + str lr, [sp, #84] + ldr r12, [sp, #88] + ldr lr, [sp, #92] + lsrs r4, r12, #1 + lsrs r5, lr, #1 + orr r5, r5, r12, lsl #31 + orr r4, r4, lr, lsl #31 + lsrs r6, r12, #8 + lsrs r7, lr, #8 + orr r7, r7, r12, lsl #24 + orr r6, r6, lr, lsl #24 + eor r5, r5, r7 + eor r4, r4, r6 + lsrs r6, r12, #7 + lsrs r7, lr, #7 + orr r6, r6, lr, lsl #25 + eor r5, r5, r7 + eor r4, r4, r6 + ldr r12, [sp, #80] + ldr lr, [sp, #84] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [sp, #80] + str lr, [sp, #84] + # Round 11 + ldr r12, [r0, #8] + ldr lr, [r0, #12] + lsrs r4, r12, #14 + lsrs r5, lr, #14 + orr r5, r5, r12, lsl #18 + orr r4, r4, lr, lsl #18 + lsrs r6, r12, #18 + lsrs r7, lr, #18 + orr r7, r7, r12, lsl #14 + orr r6, r6, lr, lsl #14 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #23 + lsls r7, lr, #23 + orr r7, r7, r12, lsr #9 + orr r6, r6, lr, lsr #9 + ldr r12, [r0, #32] + ldr lr, [r0, #36] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #32] + str lr, [r0, #36] + ldr r12, [r0, #8] + ldr lr, [r0, #12] + ldrd r4, r5, [r0, #16] + ldrd r6, r7, [r0, #24] + eor r4, r4, r6 + eor r5, r5, r7 + and r4, r4, r12 + and r5, r5, lr + eor r4, r4, r6 + eor r5, r5, r7 + ldr r12, [r0, #32] + ldr lr, [r0, #36] + ldrd r6, r7, [sp, #88] + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r4, r5, [r3, #88] + adds r12, r12, r6 + adc lr, lr, r7 + ldrd r6, r7, [r0] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #32] + str lr, [r0, #36] + adds r6, r6, r12 + adc r7, r7, lr + ldr r12, [r0, #40] + ldr lr, [r0, #44] + strd r6, r7, [r0] + lsrs r4, r12, #28 + lsrs r5, lr, #28 + orr r5, r5, r12, lsl #4 + orr r4, r4, lr, lsl #4 + lsls r6, r12, #30 + lsls r7, lr, #30 + orr r7, r7, r12, lsr #2 + orr r6, r6, lr, lsr #2 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #25 + lsls r7, lr, #25 + orr r7, r7, r12, lsr #7 + orr r6, r6, lr, lsr #7 + ldr r12, [r0, #32] + ldr lr, [r0, #36] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r6, r7, [r0, #40] + ldrd r4, r5, [r0, #48] + str r12, [r0, #32] + str lr, [r0, #36] + eor r6, r6, r4 + eor r7, r7, r5 + and r8, r8, r6 + and r9, r9, r7 + eor r8, r8, r4 + eor r9, r9, r5 + ldrd r4, r5, [r0, #32] + adds r4, r4, r8 + adc r5, r5, r9 + strd r4, r5, [r0, #32] + mov r8, r6 + mov r9, r7 + # Calc new W[11] + ldr r12, [sp, #72] + ldr lr, [sp, #76] + lsrs r4, r12, #19 + lsrs r5, lr, #19 + orr r5, r5, r12, lsl #13 + orr r4, r4, lr, lsl #13 + lsls r6, r12, #3 + lsls r7, lr, #3 + orr r7, r7, r12, lsr #29 + orr r6, r6, lr, lsr #29 + eor r5, r5, r7 + eor r4, r4, r6 + lsrs r6, r12, #6 + lsrs r7, lr, #6 + orr r6, r6, lr, lsl #26 + eor r5, r5, r7 + eor r4, r4, r6 + ldr r12, [sp, #88] + ldr lr, [sp, #92] + ldrd r6, r7, [sp, #32] + adds r12, r12, r4 + adc lr, lr, r5 + adds r12, r12, r6 + adc lr, lr, r7 + str r12, [sp, #88] + str lr, [sp, #92] + ldr r12, [sp, #96] + ldr lr, [sp, #100] + lsrs r4, r12, #1 + lsrs r5, lr, #1 + orr r5, r5, r12, lsl #31 + orr r4, r4, lr, lsl #31 + lsrs r6, r12, #8 + lsrs r7, lr, #8 + orr r7, r7, r12, lsl #24 + orr r6, r6, lr, lsl #24 + eor r5, r5, r7 + eor r4, r4, r6 + lsrs r6, r12, #7 + lsrs r7, lr, #7 + orr r6, r6, lr, lsl #25 + eor r5, r5, r7 + eor r4, r4, r6 + ldr r12, [sp, #88] + ldr lr, [sp, #92] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [sp, #88] + str lr, [sp, #92] + # Round 12 + ldr r12, [r0] + ldr lr, [r0, #4] + lsrs r4, r12, #14 + lsrs r5, lr, #14 + orr r5, r5, r12, lsl #18 + orr r4, r4, lr, lsl #18 + lsrs r6, r12, #18 + lsrs r7, lr, #18 + orr r7, r7, r12, lsl #14 + orr r6, r6, lr, lsl #14 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #23 + lsls r7, lr, #23 + orr r7, r7, r12, lsr #9 + orr r6, r6, lr, lsr #9 + ldr r12, [r0, #24] + ldr lr, [r0, #28] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #24] + str lr, [r0, #28] + ldr r12, [r0] + ldr lr, [r0, #4] + ldrd r4, r5, [r0, #8] + ldrd r6, r7, [r0, #16] + eor r4, r4, r6 + eor r5, r5, r7 + and r4, r4, r12 + and r5, r5, lr + eor r4, r4, r6 + eor r5, r5, r7 + ldr r12, [r0, #24] + ldr lr, [r0, #28] + ldrd r6, r7, [sp, #96] + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r4, r5, [r3, #96] + adds r12, r12, r6 + adc lr, lr, r7 + ldrd r6, r7, [r0, #56] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #24] + str lr, [r0, #28] + adds r6, r6, r12 + adc r7, r7, lr + ldr r12, [r0, #32] + ldr lr, [r0, #36] + strd r6, r7, [r0, #56] + lsrs r4, r12, #28 + lsrs r5, lr, #28 + orr r5, r5, r12, lsl #4 + orr r4, r4, lr, lsl #4 + lsls r6, r12, #30 + lsls r7, lr, #30 + orr r7, r7, r12, lsr #2 + orr r6, r6, lr, lsr #2 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #25 + lsls r7, lr, #25 + orr r7, r7, r12, lsr #7 + orr r6, r6, lr, lsr #7 + ldr r12, [r0, #24] + ldr lr, [r0, #28] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r6, r7, [r0, #32] + ldrd r4, r5, [r0, #40] + str r12, [r0, #24] + str lr, [r0, #28] + eor r6, r6, r4 + eor r7, r7, r5 + and r8, r8, r6 + and r9, r9, r7 + eor r8, r8, r4 + eor r9, r9, r5 + ldrd r4, r5, [r0, #24] + adds r4, r4, r8 + adc r5, r5, r9 + strd r4, r5, [r0, #24] + mov r8, r6 + mov r9, r7 + # Calc new W[12] + ldr r12, [sp, #80] + ldr lr, [sp, #84] + lsrs r4, r12, #19 + lsrs r5, lr, #19 + orr r5, r5, r12, lsl #13 + orr r4, r4, lr, lsl #13 + lsls r6, r12, #3 + lsls r7, lr, #3 + orr r7, r7, r12, lsr #29 + orr r6, r6, lr, lsr #29 + eor r5, r5, r7 + eor r4, r4, r6 + lsrs r6, r12, #6 + lsrs r7, lr, #6 + orr r6, r6, lr, lsl #26 + eor r5, r5, r7 + eor r4, r4, r6 + ldr r12, [sp, #96] + ldr lr, [sp, #100] + ldrd r6, r7, [sp, #40] + adds r12, r12, r4 + adc lr, lr, r5 + adds r12, r12, r6 + adc lr, lr, r7 + str r12, [sp, #96] + str lr, [sp, #100] + ldr r12, [sp, #104] + ldr lr, [sp, #108] + lsrs r4, r12, #1 + lsrs r5, lr, #1 + orr r5, r5, r12, lsl #31 + orr r4, r4, lr, lsl #31 + lsrs r6, r12, #8 + lsrs r7, lr, #8 + orr r7, r7, r12, lsl #24 + orr r6, r6, lr, lsl #24 + eor r5, r5, r7 + eor r4, r4, r6 + lsrs r6, r12, #7 + lsrs r7, lr, #7 + orr r6, r6, lr, lsl #25 + eor r5, r5, r7 + eor r4, r4, r6 + ldr r12, [sp, #96] + ldr lr, [sp, #100] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [sp, #96] + str lr, [sp, #100] + # Round 13 + ldr r12, [r0, #56] + ldr lr, [r0, #60] + lsrs r4, r12, #14 + lsrs r5, lr, #14 + orr r5, r5, r12, lsl #18 + orr r4, r4, lr, lsl #18 + lsrs r6, r12, #18 + lsrs r7, lr, #18 + orr r7, r7, r12, lsl #14 + orr r6, r6, lr, lsl #14 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #23 + lsls r7, lr, #23 + orr r7, r7, r12, lsr #9 + orr r6, r6, lr, lsr #9 + ldr r12, [r0, #16] + ldr lr, [r0, #20] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #16] + str lr, [r0, #20] + ldr r12, [r0, #56] + ldr lr, [r0, #60] + ldrd r4, r5, [r0] + ldrd r6, r7, [r0, #8] + eor r4, r4, r6 + eor r5, r5, r7 + and r4, r4, r12 + and r5, r5, lr + eor r4, r4, r6 + eor r5, r5, r7 + ldr r12, [r0, #16] + ldr lr, [r0, #20] + ldrd r6, r7, [sp, #104] + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r4, r5, [r3, #104] + adds r12, r12, r6 + adc lr, lr, r7 + ldrd r6, r7, [r0, #48] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #16] + str lr, [r0, #20] + adds r6, r6, r12 + adc r7, r7, lr + ldr r12, [r0, #24] + ldr lr, [r0, #28] + strd r6, r7, [r0, #48] + lsrs r4, r12, #28 + lsrs r5, lr, #28 + orr r5, r5, r12, lsl #4 + orr r4, r4, lr, lsl #4 + lsls r6, r12, #30 + lsls r7, lr, #30 + orr r7, r7, r12, lsr #2 + orr r6, r6, lr, lsr #2 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #25 + lsls r7, lr, #25 + orr r7, r7, r12, lsr #7 + orr r6, r6, lr, lsr #7 + ldr r12, [r0, #16] + ldr lr, [r0, #20] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r6, r7, [r0, #24] + ldrd r4, r5, [r0, #32] + str r12, [r0, #16] + str lr, [r0, #20] + eor r6, r6, r4 + eor r7, r7, r5 + and r8, r8, r6 + and r9, r9, r7 + eor r8, r8, r4 + eor r9, r9, r5 + ldrd r4, r5, [r0, #16] + adds r4, r4, r8 + adc r5, r5, r9 + strd r4, r5, [r0, #16] + mov r8, r6 + mov r9, r7 + # Calc new W[13] + ldr r12, [sp, #88] + ldr lr, [sp, #92] + lsrs r4, r12, #19 + lsrs r5, lr, #19 + orr r5, r5, r12, lsl #13 + orr r4, r4, lr, lsl #13 + lsls r6, r12, #3 + lsls r7, lr, #3 + orr r7, r7, r12, lsr #29 + orr r6, r6, lr, lsr #29 + eor r5, r5, r7 + eor r4, r4, r6 + lsrs r6, r12, #6 + lsrs r7, lr, #6 + orr r6, r6, lr, lsl #26 + eor r5, r5, r7 + eor r4, r4, r6 + ldr r12, [sp, #104] + ldr lr, [sp, #108] + ldrd r6, r7, [sp, #48] + adds r12, r12, r4 + adc lr, lr, r5 + adds r12, r12, r6 + adc lr, lr, r7 + str r12, [sp, #104] + str lr, [sp, #108] + ldr r12, [sp, #112] + ldr lr, [sp, #116] + lsrs r4, r12, #1 + lsrs r5, lr, #1 + orr r5, r5, r12, lsl #31 + orr r4, r4, lr, lsl #31 + lsrs r6, r12, #8 + lsrs r7, lr, #8 + orr r7, r7, r12, lsl #24 + orr r6, r6, lr, lsl #24 + eor r5, r5, r7 + eor r4, r4, r6 + lsrs r6, r12, #7 + lsrs r7, lr, #7 + orr r6, r6, lr, lsl #25 + eor r5, r5, r7 + eor r4, r4, r6 + ldr r12, [sp, #104] + ldr lr, [sp, #108] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [sp, #104] + str lr, [sp, #108] + # Round 14 + ldr r12, [r0, #48] + ldr lr, [r0, #52] + lsrs r4, r12, #14 + lsrs r5, lr, #14 + orr r5, r5, r12, lsl #18 + orr r4, r4, lr, lsl #18 + lsrs r6, r12, #18 + lsrs r7, lr, #18 + orr r7, r7, r12, lsl #14 + orr r6, r6, lr, lsl #14 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #23 + lsls r7, lr, #23 + orr r7, r7, r12, lsr #9 + orr r6, r6, lr, lsr #9 + ldr r12, [r0, #8] + ldr lr, [r0, #12] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #8] + str lr, [r0, #12] + ldr r12, [r0, #48] + ldr lr, [r0, #52] + ldrd r4, r5, [r0, #56] + ldrd r6, r7, [r0] + eor r4, r4, r6 + eor r5, r5, r7 + and r4, r4, r12 + and r5, r5, lr + eor r4, r4, r6 + eor r5, r5, r7 + ldr r12, [r0, #8] + ldr lr, [r0, #12] + ldrd r6, r7, [sp, #112] + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r4, r5, [r3, #112] + adds r12, r12, r6 + adc lr, lr, r7 + ldrd r6, r7, [r0, #40] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #8] + str lr, [r0, #12] + adds r6, r6, r12 + adc r7, r7, lr + ldr r12, [r0, #16] + ldr lr, [r0, #20] + strd r6, r7, [r0, #40] + lsrs r4, r12, #28 + lsrs r5, lr, #28 + orr r5, r5, r12, lsl #4 + orr r4, r4, lr, lsl #4 + lsls r6, r12, #30 + lsls r7, lr, #30 + orr r7, r7, r12, lsr #2 + orr r6, r6, lr, lsr #2 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #25 + lsls r7, lr, #25 + orr r7, r7, r12, lsr #7 + orr r6, r6, lr, lsr #7 + ldr r12, [r0, #8] + ldr lr, [r0, #12] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r6, r7, [r0, #16] + ldrd r4, r5, [r0, #24] + str r12, [r0, #8] + str lr, [r0, #12] + eor r6, r6, r4 + eor r7, r7, r5 + and r8, r8, r6 + and r9, r9, r7 + eor r8, r8, r4 + eor r9, r9, r5 + ldrd r4, r5, [r0, #8] + adds r4, r4, r8 + adc r5, r5, r9 + strd r4, r5, [r0, #8] + mov r8, r6 + mov r9, r7 + # Calc new W[14] + ldr r12, [sp, #96] + ldr lr, [sp, #100] + lsrs r4, r12, #19 + lsrs r5, lr, #19 + orr r5, r5, r12, lsl #13 + orr r4, r4, lr, lsl #13 + lsls r6, r12, #3 + lsls r7, lr, #3 + orr r7, r7, r12, lsr #29 + orr r6, r6, lr, lsr #29 + eor r5, r5, r7 + eor r4, r4, r6 + lsrs r6, r12, #6 + lsrs r7, lr, #6 + orr r6, r6, lr, lsl #26 + eor r5, r5, r7 + eor r4, r4, r6 + ldr r12, [sp, #112] + ldr lr, [sp, #116] + ldrd r6, r7, [sp, #56] + adds r12, r12, r4 + adc lr, lr, r5 + adds r12, r12, r6 + adc lr, lr, r7 + str r12, [sp, #112] + str lr, [sp, #116] + ldr r12, [sp, #120] + ldr lr, [sp, #124] + lsrs r4, r12, #1 + lsrs r5, lr, #1 + orr r5, r5, r12, lsl #31 + orr r4, r4, lr, lsl #31 + lsrs r6, r12, #8 + lsrs r7, lr, #8 + orr r7, r7, r12, lsl #24 + orr r6, r6, lr, lsl #24 + eor r5, r5, r7 + eor r4, r4, r6 + lsrs r6, r12, #7 + lsrs r7, lr, #7 + orr r6, r6, lr, lsl #25 + eor r5, r5, r7 + eor r4, r4, r6 + ldr r12, [sp, #112] + ldr lr, [sp, #116] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [sp, #112] + str lr, [sp, #116] + # Round 15 + ldr r12, [r0, #40] + ldr lr, [r0, #44] + lsrs r4, r12, #14 + lsrs r5, lr, #14 + orr r5, r5, r12, lsl #18 + orr r4, r4, lr, lsl #18 + lsrs r6, r12, #18 + lsrs r7, lr, #18 + orr r7, r7, r12, lsl #14 + orr r6, r6, lr, lsl #14 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #23 + lsls r7, lr, #23 + orr r7, r7, r12, lsr #9 + orr r6, r6, lr, lsr #9 + ldr r12, [r0] + ldr lr, [r0, #4] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0] + str lr, [r0, #4] + ldr r12, [r0, #40] + ldr lr, [r0, #44] + ldrd r4, r5, [r0, #48] + ldrd r6, r7, [r0, #56] + eor r4, r4, r6 + eor r5, r5, r7 + and r4, r4, r12 + and r5, r5, lr + eor r4, r4, r6 + eor r5, r5, r7 + ldr r12, [r0] + ldr lr, [r0, #4] + ldrd r6, r7, [sp, #120] + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r4, r5, [r3, #120] + adds r12, r12, r6 + adc lr, lr, r7 + ldrd r6, r7, [r0, #32] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0] + str lr, [r0, #4] + adds r6, r6, r12 + adc r7, r7, lr + ldr r12, [r0, #8] + ldr lr, [r0, #12] + strd r6, r7, [r0, #32] + lsrs r4, r12, #28 + lsrs r5, lr, #28 + orr r5, r5, r12, lsl #4 + orr r4, r4, lr, lsl #4 + lsls r6, r12, #30 + lsls r7, lr, #30 + orr r7, r7, r12, lsr #2 + orr r6, r6, lr, lsr #2 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #25 + lsls r7, lr, #25 + orr r7, r7, r12, lsr #7 + orr r6, r6, lr, lsr #7 + ldr r12, [r0] + ldr lr, [r0, #4] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r6, r7, [r0, #8] + ldrd r4, r5, [r0, #16] + str r12, [r0] + str lr, [r0, #4] + eor r6, r6, r4 + eor r7, r7, r5 + and r8, r8, r6 + and r9, r9, r7 + eor r8, r8, r4 + eor r9, r9, r5 + ldrd r4, r5, [r0] + adds r4, r4, r8 + adc r5, r5, r9 + strd r4, r5, [r0] + mov r8, r6 + mov r9, r7 + # Calc new W[15] + ldr r12, [sp, #104] + ldr lr, [sp, #108] + lsrs r4, r12, #19 + lsrs r5, lr, #19 + orr r5, r5, r12, lsl #13 + orr r4, r4, lr, lsl #13 + lsls r6, r12, #3 + lsls r7, lr, #3 + orr r7, r7, r12, lsr #29 + orr r6, r6, lr, lsr #29 + eor r5, r5, r7 + eor r4, r4, r6 + lsrs r6, r12, #6 + lsrs r7, lr, #6 + orr r6, r6, lr, lsl #26 + eor r5, r5, r7 + eor r4, r4, r6 + ldr r12, [sp, #120] + ldr lr, [sp, #124] + ldrd r6, r7, [sp, #64] + adds r12, r12, r4 + adc lr, lr, r5 + adds r12, r12, r6 + adc lr, lr, r7 + str r12, [sp, #120] + str lr, [sp, #124] + ldr r12, [sp] + ldr lr, [sp, #4] + lsrs r4, r12, #1 + lsrs r5, lr, #1 + orr r5, r5, r12, lsl #31 + orr r4, r4, lr, lsl #31 + lsrs r6, r12, #8 + lsrs r7, lr, #8 + orr r7, r7, r12, lsl #24 + orr r6, r6, lr, lsl #24 + eor r5, r5, r7 + eor r4, r4, r6 + lsrs r6, r12, #7 + lsrs r7, lr, #7 + orr r6, r6, lr, lsl #25 + eor r5, r5, r7 + eor r4, r4, r6 + ldr r12, [sp, #120] + ldr lr, [sp, #124] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [sp, #120] + str lr, [sp, #124] + add r3, r3, #0x80 + subs r10, r10, #1 + bne L_sha512_len_neon_start + # Round 0 + ldr r12, [r0, #32] + ldr lr, [r0, #36] + lsrs r4, r12, #14 + lsrs r5, lr, #14 + orr r5, r5, r12, lsl #18 + orr r4, r4, lr, lsl #18 + lsrs r6, r12, #18 + lsrs r7, lr, #18 + orr r7, r7, r12, lsl #14 + orr r6, r6, lr, lsl #14 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #23 + lsls r7, lr, #23 + orr r7, r7, r12, lsr #9 + orr r6, r6, lr, lsr #9 + ldr r12, [r0, #56] + ldr lr, [r0, #60] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #56] + str lr, [r0, #60] + ldr r12, [r0, #32] + ldr lr, [r0, #36] + ldrd r4, r5, [r0, #40] + ldrd r6, r7, [r0, #48] + eor r4, r4, r6 + eor r5, r5, r7 + and r4, r4, r12 + and r5, r5, lr + eor r4, r4, r6 + eor r5, r5, r7 + ldr r12, [r0, #56] + ldr lr, [r0, #60] + ldrd r6, r7, [sp] + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r4, r5, [r3] + adds r12, r12, r6 + adc lr, lr, r7 + ldrd r6, r7, [r0, #24] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #56] + str lr, [r0, #60] + adds r6, r6, r12 + adc r7, r7, lr + ldr r12, [r0] + ldr lr, [r0, #4] + strd r6, r7, [r0, #24] + lsrs r4, r12, #28 + lsrs r5, lr, #28 + orr r5, r5, r12, lsl #4 + orr r4, r4, lr, lsl #4 + lsls r6, r12, #30 + lsls r7, lr, #30 + orr r7, r7, r12, lsr #2 + orr r6, r6, lr, lsr #2 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #25 + lsls r7, lr, #25 + orr r7, r7, r12, lsr #7 + orr r6, r6, lr, lsr #7 + ldr r12, [r0, #56] + ldr lr, [r0, #60] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r6, r7, [r0] + ldrd r4, r5, [r0, #8] + str r12, [r0, #56] + str lr, [r0, #60] + eor r6, r6, r4 + eor r7, r7, r5 + and r8, r8, r6 + and r9, r9, r7 + eor r8, r8, r4 + eor r9, r9, r5 + ldrd r4, r5, [r0, #56] + adds r4, r4, r8 + adc r5, r5, r9 + strd r4, r5, [r0, #56] + mov r8, r6 + mov r9, r7 + # Round 1 + ldr r12, [r0, #24] + ldr lr, [r0, #28] + lsrs r4, r12, #14 + lsrs r5, lr, #14 + orr r5, r5, r12, lsl #18 + orr r4, r4, lr, lsl #18 + lsrs r6, r12, #18 + lsrs r7, lr, #18 + orr r7, r7, r12, lsl #14 + orr r6, r6, lr, lsl #14 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #23 + lsls r7, lr, #23 + orr r7, r7, r12, lsr #9 + orr r6, r6, lr, lsr #9 + ldr r12, [r0, #48] + ldr lr, [r0, #52] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #48] + str lr, [r0, #52] + ldr r12, [r0, #24] + ldr lr, [r0, #28] + ldrd r4, r5, [r0, #32] + ldrd r6, r7, [r0, #40] + eor r4, r4, r6 + eor r5, r5, r7 + and r4, r4, r12 + and r5, r5, lr + eor r4, r4, r6 + eor r5, r5, r7 + ldr r12, [r0, #48] + ldr lr, [r0, #52] + ldrd r6, r7, [sp, #8] + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r4, r5, [r3, #8] + adds r12, r12, r6 + adc lr, lr, r7 + ldrd r6, r7, [r0, #16] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #48] + str lr, [r0, #52] + adds r6, r6, r12 + adc r7, r7, lr + ldr r12, [r0, #56] + ldr lr, [r0, #60] + strd r6, r7, [r0, #16] + lsrs r4, r12, #28 + lsrs r5, lr, #28 + orr r5, r5, r12, lsl #4 + orr r4, r4, lr, lsl #4 + lsls r6, r12, #30 + lsls r7, lr, #30 + orr r7, r7, r12, lsr #2 + orr r6, r6, lr, lsr #2 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #25 + lsls r7, lr, #25 + orr r7, r7, r12, lsr #7 + orr r6, r6, lr, lsr #7 + ldr r12, [r0, #48] + ldr lr, [r0, #52] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r6, r7, [r0, #56] + ldrd r4, r5, [r0] + str r12, [r0, #48] + str lr, [r0, #52] + eor r6, r6, r4 + eor r7, r7, r5 + and r8, r8, r6 + and r9, r9, r7 + eor r8, r8, r4 + eor r9, r9, r5 + ldrd r4, r5, [r0, #48] + adds r4, r4, r8 + adc r5, r5, r9 + strd r4, r5, [r0, #48] + mov r8, r6 + mov r9, r7 + # Round 2 + ldr r12, [r0, #16] + ldr lr, [r0, #20] + lsrs r4, r12, #14 + lsrs r5, lr, #14 + orr r5, r5, r12, lsl #18 + orr r4, r4, lr, lsl #18 + lsrs r6, r12, #18 + lsrs r7, lr, #18 + orr r7, r7, r12, lsl #14 + orr r6, r6, lr, lsl #14 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #23 + lsls r7, lr, #23 + orr r7, r7, r12, lsr #9 + orr r6, r6, lr, lsr #9 + ldr r12, [r0, #40] + ldr lr, [r0, #44] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #40] + str lr, [r0, #44] + ldr r12, [r0, #16] + ldr lr, [r0, #20] + ldrd r4, r5, [r0, #24] + ldrd r6, r7, [r0, #32] + eor r4, r4, r6 + eor r5, r5, r7 + and r4, r4, r12 + and r5, r5, lr + eor r4, r4, r6 + eor r5, r5, r7 + ldr r12, [r0, #40] + ldr lr, [r0, #44] + ldrd r6, r7, [sp, #16] + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r4, r5, [r3, #16] + adds r12, r12, r6 + adc lr, lr, r7 + ldrd r6, r7, [r0, #8] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #40] + str lr, [r0, #44] + adds r6, r6, r12 + adc r7, r7, lr + ldr r12, [r0, #48] + ldr lr, [r0, #52] + strd r6, r7, [r0, #8] + lsrs r4, r12, #28 + lsrs r5, lr, #28 + orr r5, r5, r12, lsl #4 + orr r4, r4, lr, lsl #4 + lsls r6, r12, #30 + lsls r7, lr, #30 + orr r7, r7, r12, lsr #2 + orr r6, r6, lr, lsr #2 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #25 + lsls r7, lr, #25 + orr r7, r7, r12, lsr #7 + orr r6, r6, lr, lsr #7 + ldr r12, [r0, #40] + ldr lr, [r0, #44] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r6, r7, [r0, #48] + ldrd r4, r5, [r0, #56] + str r12, [r0, #40] + str lr, [r0, #44] + eor r6, r6, r4 + eor r7, r7, r5 + and r8, r8, r6 + and r9, r9, r7 + eor r8, r8, r4 + eor r9, r9, r5 + ldrd r4, r5, [r0, #40] + adds r4, r4, r8 + adc r5, r5, r9 + strd r4, r5, [r0, #40] + mov r8, r6 + mov r9, r7 + # Round 3 + ldr r12, [r0, #8] + ldr lr, [r0, #12] + lsrs r4, r12, #14 + lsrs r5, lr, #14 + orr r5, r5, r12, lsl #18 + orr r4, r4, lr, lsl #18 + lsrs r6, r12, #18 + lsrs r7, lr, #18 + orr r7, r7, r12, lsl #14 + orr r6, r6, lr, lsl #14 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #23 + lsls r7, lr, #23 + orr r7, r7, r12, lsr #9 + orr r6, r6, lr, lsr #9 + ldr r12, [r0, #32] + ldr lr, [r0, #36] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #32] + str lr, [r0, #36] + ldr r12, [r0, #8] + ldr lr, [r0, #12] + ldrd r4, r5, [r0, #16] + ldrd r6, r7, [r0, #24] + eor r4, r4, r6 + eor r5, r5, r7 + and r4, r4, r12 + and r5, r5, lr + eor r4, r4, r6 + eor r5, r5, r7 + ldr r12, [r0, #32] + ldr lr, [r0, #36] + ldrd r6, r7, [sp, #24] + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r4, r5, [r3, #24] + adds r12, r12, r6 + adc lr, lr, r7 + ldrd r6, r7, [r0] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #32] + str lr, [r0, #36] + adds r6, r6, r12 + adc r7, r7, lr + ldr r12, [r0, #40] + ldr lr, [r0, #44] + strd r6, r7, [r0] + lsrs r4, r12, #28 + lsrs r5, lr, #28 + orr r5, r5, r12, lsl #4 + orr r4, r4, lr, lsl #4 + lsls r6, r12, #30 + lsls r7, lr, #30 + orr r7, r7, r12, lsr #2 + orr r6, r6, lr, lsr #2 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #25 + lsls r7, lr, #25 + orr r7, r7, r12, lsr #7 + orr r6, r6, lr, lsr #7 + ldr r12, [r0, #32] + ldr lr, [r0, #36] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r6, r7, [r0, #40] + ldrd r4, r5, [r0, #48] + str r12, [r0, #32] + str lr, [r0, #36] + eor r6, r6, r4 + eor r7, r7, r5 + and r8, r8, r6 + and r9, r9, r7 + eor r8, r8, r4 + eor r9, r9, r5 + ldrd r4, r5, [r0, #32] + adds r4, r4, r8 + adc r5, r5, r9 + strd r4, r5, [r0, #32] + mov r8, r6 + mov r9, r7 + # Round 4 + ldr r12, [r0] + ldr lr, [r0, #4] + lsrs r4, r12, #14 + lsrs r5, lr, #14 + orr r5, r5, r12, lsl #18 + orr r4, r4, lr, lsl #18 + lsrs r6, r12, #18 + lsrs r7, lr, #18 + orr r7, r7, r12, lsl #14 + orr r6, r6, lr, lsl #14 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #23 + lsls r7, lr, #23 + orr r7, r7, r12, lsr #9 + orr r6, r6, lr, lsr #9 + ldr r12, [r0, #24] + ldr lr, [r0, #28] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #24] + str lr, [r0, #28] + ldr r12, [r0] + ldr lr, [r0, #4] + ldrd r4, r5, [r0, #8] + ldrd r6, r7, [r0, #16] + eor r4, r4, r6 + eor r5, r5, r7 + and r4, r4, r12 + and r5, r5, lr + eor r4, r4, r6 + eor r5, r5, r7 + ldr r12, [r0, #24] + ldr lr, [r0, #28] + ldrd r6, r7, [sp, #32] + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r4, r5, [r3, #32] + adds r12, r12, r6 + adc lr, lr, r7 + ldrd r6, r7, [r0, #56] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #24] + str lr, [r0, #28] + adds r6, r6, r12 + adc r7, r7, lr + ldr r12, [r0, #32] + ldr lr, [r0, #36] + strd r6, r7, [r0, #56] + lsrs r4, r12, #28 + lsrs r5, lr, #28 + orr r5, r5, r12, lsl #4 + orr r4, r4, lr, lsl #4 + lsls r6, r12, #30 + lsls r7, lr, #30 + orr r7, r7, r12, lsr #2 + orr r6, r6, lr, lsr #2 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #25 + lsls r7, lr, #25 + orr r7, r7, r12, lsr #7 + orr r6, r6, lr, lsr #7 + ldr r12, [r0, #24] + ldr lr, [r0, #28] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r6, r7, [r0, #32] + ldrd r4, r5, [r0, #40] + str r12, [r0, #24] + str lr, [r0, #28] + eor r6, r6, r4 + eor r7, r7, r5 + and r8, r8, r6 + and r9, r9, r7 + eor r8, r8, r4 + eor r9, r9, r5 + ldrd r4, r5, [r0, #24] + adds r4, r4, r8 + adc r5, r5, r9 + strd r4, r5, [r0, #24] + mov r8, r6 + mov r9, r7 + # Round 5 + ldr r12, [r0, #56] + ldr lr, [r0, #60] + lsrs r4, r12, #14 + lsrs r5, lr, #14 + orr r5, r5, r12, lsl #18 + orr r4, r4, lr, lsl #18 + lsrs r6, r12, #18 + lsrs r7, lr, #18 + orr r7, r7, r12, lsl #14 + orr r6, r6, lr, lsl #14 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #23 + lsls r7, lr, #23 + orr r7, r7, r12, lsr #9 + orr r6, r6, lr, lsr #9 + ldr r12, [r0, #16] + ldr lr, [r0, #20] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #16] + str lr, [r0, #20] + ldr r12, [r0, #56] + ldr lr, [r0, #60] + ldrd r4, r5, [r0] + ldrd r6, r7, [r0, #8] + eor r4, r4, r6 + eor r5, r5, r7 + and r4, r4, r12 + and r5, r5, lr + eor r4, r4, r6 + eor r5, r5, r7 + ldr r12, [r0, #16] + ldr lr, [r0, #20] + ldrd r6, r7, [sp, #40] + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r4, r5, [r3, #40] + adds r12, r12, r6 + adc lr, lr, r7 + ldrd r6, r7, [r0, #48] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #16] + str lr, [r0, #20] + adds r6, r6, r12 + adc r7, r7, lr + ldr r12, [r0, #24] + ldr lr, [r0, #28] + strd r6, r7, [r0, #48] + lsrs r4, r12, #28 + lsrs r5, lr, #28 + orr r5, r5, r12, lsl #4 + orr r4, r4, lr, lsl #4 + lsls r6, r12, #30 + lsls r7, lr, #30 + orr r7, r7, r12, lsr #2 + orr r6, r6, lr, lsr #2 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #25 + lsls r7, lr, #25 + orr r7, r7, r12, lsr #7 + orr r6, r6, lr, lsr #7 + ldr r12, [r0, #16] + ldr lr, [r0, #20] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r6, r7, [r0, #24] + ldrd r4, r5, [r0, #32] + str r12, [r0, #16] + str lr, [r0, #20] + eor r6, r6, r4 + eor r7, r7, r5 + and r8, r8, r6 + and r9, r9, r7 + eor r8, r8, r4 + eor r9, r9, r5 + ldrd r4, r5, [r0, #16] + adds r4, r4, r8 + adc r5, r5, r9 + strd r4, r5, [r0, #16] + mov r8, r6 + mov r9, r7 + # Round 6 + ldr r12, [r0, #48] + ldr lr, [r0, #52] + lsrs r4, r12, #14 + lsrs r5, lr, #14 + orr r5, r5, r12, lsl #18 + orr r4, r4, lr, lsl #18 + lsrs r6, r12, #18 + lsrs r7, lr, #18 + orr r7, r7, r12, lsl #14 + orr r6, r6, lr, lsl #14 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #23 + lsls r7, lr, #23 + orr r7, r7, r12, lsr #9 + orr r6, r6, lr, lsr #9 + ldr r12, [r0, #8] + ldr lr, [r0, #12] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #8] + str lr, [r0, #12] + ldr r12, [r0, #48] + ldr lr, [r0, #52] + ldrd r4, r5, [r0, #56] + ldrd r6, r7, [r0] + eor r4, r4, r6 + eor r5, r5, r7 + and r4, r4, r12 + and r5, r5, lr + eor r4, r4, r6 + eor r5, r5, r7 + ldr r12, [r0, #8] + ldr lr, [r0, #12] + ldrd r6, r7, [sp, #48] + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r4, r5, [r3, #48] + adds r12, r12, r6 + adc lr, lr, r7 + ldrd r6, r7, [r0, #40] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #8] + str lr, [r0, #12] + adds r6, r6, r12 + adc r7, r7, lr + ldr r12, [r0, #16] + ldr lr, [r0, #20] + strd r6, r7, [r0, #40] + lsrs r4, r12, #28 + lsrs r5, lr, #28 + orr r5, r5, r12, lsl #4 + orr r4, r4, lr, lsl #4 + lsls r6, r12, #30 + lsls r7, lr, #30 + orr r7, r7, r12, lsr #2 + orr r6, r6, lr, lsr #2 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #25 + lsls r7, lr, #25 + orr r7, r7, r12, lsr #7 + orr r6, r6, lr, lsr #7 + ldr r12, [r0, #8] + ldr lr, [r0, #12] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r6, r7, [r0, #16] + ldrd r4, r5, [r0, #24] + str r12, [r0, #8] + str lr, [r0, #12] + eor r6, r6, r4 + eor r7, r7, r5 + and r8, r8, r6 + and r9, r9, r7 + eor r8, r8, r4 + eor r9, r9, r5 + ldrd r4, r5, [r0, #8] + adds r4, r4, r8 + adc r5, r5, r9 + strd r4, r5, [r0, #8] + mov r8, r6 + mov r9, r7 + # Round 7 + ldr r12, [r0, #40] + ldr lr, [r0, #44] + lsrs r4, r12, #14 + lsrs r5, lr, #14 + orr r5, r5, r12, lsl #18 + orr r4, r4, lr, lsl #18 + lsrs r6, r12, #18 + lsrs r7, lr, #18 + orr r7, r7, r12, lsl #14 + orr r6, r6, lr, lsl #14 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #23 + lsls r7, lr, #23 + orr r7, r7, r12, lsr #9 + orr r6, r6, lr, lsr #9 + ldr r12, [r0] + ldr lr, [r0, #4] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0] + str lr, [r0, #4] + ldr r12, [r0, #40] + ldr lr, [r0, #44] + ldrd r4, r5, [r0, #48] + ldrd r6, r7, [r0, #56] + eor r4, r4, r6 + eor r5, r5, r7 + and r4, r4, r12 + and r5, r5, lr + eor r4, r4, r6 + eor r5, r5, r7 + ldr r12, [r0] + ldr lr, [r0, #4] + ldrd r6, r7, [sp, #56] + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r4, r5, [r3, #56] + adds r12, r12, r6 + adc lr, lr, r7 + ldrd r6, r7, [r0, #32] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0] + str lr, [r0, #4] + adds r6, r6, r12 + adc r7, r7, lr + ldr r12, [r0, #8] + ldr lr, [r0, #12] + strd r6, r7, [r0, #32] + lsrs r4, r12, #28 + lsrs r5, lr, #28 + orr r5, r5, r12, lsl #4 + orr r4, r4, lr, lsl #4 + lsls r6, r12, #30 + lsls r7, lr, #30 + orr r7, r7, r12, lsr #2 + orr r6, r6, lr, lsr #2 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #25 + lsls r7, lr, #25 + orr r7, r7, r12, lsr #7 + orr r6, r6, lr, lsr #7 + ldr r12, [r0] + ldr lr, [r0, #4] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r6, r7, [r0, #8] + ldrd r4, r5, [r0, #16] + str r12, [r0] + str lr, [r0, #4] + eor r6, r6, r4 + eor r7, r7, r5 + and r8, r8, r6 + and r9, r9, r7 + eor r8, r8, r4 + eor r9, r9, r5 + ldrd r4, r5, [r0] + adds r4, r4, r8 + adc r5, r5, r9 + strd r4, r5, [r0] + mov r8, r6 + mov r9, r7 + # Round 8 + ldr r12, [r0, #32] + ldr lr, [r0, #36] + lsrs r4, r12, #14 + lsrs r5, lr, #14 + orr r5, r5, r12, lsl #18 + orr r4, r4, lr, lsl #18 + lsrs r6, r12, #18 + lsrs r7, lr, #18 + orr r7, r7, r12, lsl #14 + orr r6, r6, lr, lsl #14 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #23 + lsls r7, lr, #23 + orr r7, r7, r12, lsr #9 + orr r6, r6, lr, lsr #9 + ldr r12, [r0, #56] + ldr lr, [r0, #60] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #56] + str lr, [r0, #60] + ldr r12, [r0, #32] + ldr lr, [r0, #36] + ldrd r4, r5, [r0, #40] + ldrd r6, r7, [r0, #48] + eor r4, r4, r6 + eor r5, r5, r7 + and r4, r4, r12 + and r5, r5, lr + eor r4, r4, r6 + eor r5, r5, r7 + ldr r12, [r0, #56] + ldr lr, [r0, #60] + ldrd r6, r7, [sp, #64] + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r4, r5, [r3, #64] + adds r12, r12, r6 + adc lr, lr, r7 + ldrd r6, r7, [r0, #24] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #56] + str lr, [r0, #60] + adds r6, r6, r12 + adc r7, r7, lr + ldr r12, [r0] + ldr lr, [r0, #4] + strd r6, r7, [r0, #24] + lsrs r4, r12, #28 + lsrs r5, lr, #28 + orr r5, r5, r12, lsl #4 + orr r4, r4, lr, lsl #4 + lsls r6, r12, #30 + lsls r7, lr, #30 + orr r7, r7, r12, lsr #2 + orr r6, r6, lr, lsr #2 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #25 + lsls r7, lr, #25 + orr r7, r7, r12, lsr #7 + orr r6, r6, lr, lsr #7 + ldr r12, [r0, #56] + ldr lr, [r0, #60] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r6, r7, [r0] + ldrd r4, r5, [r0, #8] + str r12, [r0, #56] + str lr, [r0, #60] + eor r6, r6, r4 + eor r7, r7, r5 + and r8, r8, r6 + and r9, r9, r7 + eor r8, r8, r4 + eor r9, r9, r5 + ldrd r4, r5, [r0, #56] + adds r4, r4, r8 + adc r5, r5, r9 + strd r4, r5, [r0, #56] + mov r8, r6 + mov r9, r7 + # Round 9 + ldr r12, [r0, #24] + ldr lr, [r0, #28] + lsrs r4, r12, #14 + lsrs r5, lr, #14 + orr r5, r5, r12, lsl #18 + orr r4, r4, lr, lsl #18 + lsrs r6, r12, #18 + lsrs r7, lr, #18 + orr r7, r7, r12, lsl #14 + orr r6, r6, lr, lsl #14 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #23 + lsls r7, lr, #23 + orr r7, r7, r12, lsr #9 + orr r6, r6, lr, lsr #9 + ldr r12, [r0, #48] + ldr lr, [r0, #52] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #48] + str lr, [r0, #52] + ldr r12, [r0, #24] + ldr lr, [r0, #28] + ldrd r4, r5, [r0, #32] + ldrd r6, r7, [r0, #40] + eor r4, r4, r6 + eor r5, r5, r7 + and r4, r4, r12 + and r5, r5, lr + eor r4, r4, r6 + eor r5, r5, r7 + ldr r12, [r0, #48] + ldr lr, [r0, #52] + ldrd r6, r7, [sp, #72] + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r4, r5, [r3, #72] + adds r12, r12, r6 + adc lr, lr, r7 + ldrd r6, r7, [r0, #16] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #48] + str lr, [r0, #52] + adds r6, r6, r12 + adc r7, r7, lr + ldr r12, [r0, #56] + ldr lr, [r0, #60] + strd r6, r7, [r0, #16] + lsrs r4, r12, #28 + lsrs r5, lr, #28 + orr r5, r5, r12, lsl #4 + orr r4, r4, lr, lsl #4 + lsls r6, r12, #30 + lsls r7, lr, #30 + orr r7, r7, r12, lsr #2 + orr r6, r6, lr, lsr #2 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #25 + lsls r7, lr, #25 + orr r7, r7, r12, lsr #7 + orr r6, r6, lr, lsr #7 + ldr r12, [r0, #48] + ldr lr, [r0, #52] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r6, r7, [r0, #56] + ldrd r4, r5, [r0] + str r12, [r0, #48] + str lr, [r0, #52] + eor r6, r6, r4 + eor r7, r7, r5 + and r8, r8, r6 + and r9, r9, r7 + eor r8, r8, r4 + eor r9, r9, r5 + ldrd r4, r5, [r0, #48] + adds r4, r4, r8 + adc r5, r5, r9 + strd r4, r5, [r0, #48] + mov r8, r6 + mov r9, r7 + # Round 10 + ldr r12, [r0, #16] + ldr lr, [r0, #20] + lsrs r4, r12, #14 + lsrs r5, lr, #14 + orr r5, r5, r12, lsl #18 + orr r4, r4, lr, lsl #18 + lsrs r6, r12, #18 + lsrs r7, lr, #18 + orr r7, r7, r12, lsl #14 + orr r6, r6, lr, lsl #14 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #23 + lsls r7, lr, #23 + orr r7, r7, r12, lsr #9 + orr r6, r6, lr, lsr #9 + ldr r12, [r0, #40] + ldr lr, [r0, #44] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #40] + str lr, [r0, #44] + ldr r12, [r0, #16] + ldr lr, [r0, #20] + ldrd r4, r5, [r0, #24] + ldrd r6, r7, [r0, #32] + eor r4, r4, r6 + eor r5, r5, r7 + and r4, r4, r12 + and r5, r5, lr + eor r4, r4, r6 + eor r5, r5, r7 + ldr r12, [r0, #40] + ldr lr, [r0, #44] + ldrd r6, r7, [sp, #80] + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r4, r5, [r3, #80] + adds r12, r12, r6 + adc lr, lr, r7 + ldrd r6, r7, [r0, #8] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #40] + str lr, [r0, #44] + adds r6, r6, r12 + adc r7, r7, lr + ldr r12, [r0, #48] + ldr lr, [r0, #52] + strd r6, r7, [r0, #8] + lsrs r4, r12, #28 + lsrs r5, lr, #28 + orr r5, r5, r12, lsl #4 + orr r4, r4, lr, lsl #4 + lsls r6, r12, #30 + lsls r7, lr, #30 + orr r7, r7, r12, lsr #2 + orr r6, r6, lr, lsr #2 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #25 + lsls r7, lr, #25 + orr r7, r7, r12, lsr #7 + orr r6, r6, lr, lsr #7 + ldr r12, [r0, #40] + ldr lr, [r0, #44] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r6, r7, [r0, #48] + ldrd r4, r5, [r0, #56] + str r12, [r0, #40] + str lr, [r0, #44] + eor r6, r6, r4 + eor r7, r7, r5 + and r8, r8, r6 + and r9, r9, r7 + eor r8, r8, r4 + eor r9, r9, r5 + ldrd r4, r5, [r0, #40] + adds r4, r4, r8 + adc r5, r5, r9 + strd r4, r5, [r0, #40] + mov r8, r6 + mov r9, r7 + # Round 11 + ldr r12, [r0, #8] + ldr lr, [r0, #12] + lsrs r4, r12, #14 + lsrs r5, lr, #14 + orr r5, r5, r12, lsl #18 + orr r4, r4, lr, lsl #18 + lsrs r6, r12, #18 + lsrs r7, lr, #18 + orr r7, r7, r12, lsl #14 + orr r6, r6, lr, lsl #14 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #23 + lsls r7, lr, #23 + orr r7, r7, r12, lsr #9 + orr r6, r6, lr, lsr #9 + ldr r12, [r0, #32] + ldr lr, [r0, #36] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #32] + str lr, [r0, #36] + ldr r12, [r0, #8] + ldr lr, [r0, #12] + ldrd r4, r5, [r0, #16] + ldrd r6, r7, [r0, #24] + eor r4, r4, r6 + eor r5, r5, r7 + and r4, r4, r12 + and r5, r5, lr + eor r4, r4, r6 + eor r5, r5, r7 + ldr r12, [r0, #32] + ldr lr, [r0, #36] + ldrd r6, r7, [sp, #88] + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r4, r5, [r3, #88] + adds r12, r12, r6 + adc lr, lr, r7 + ldrd r6, r7, [r0] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #32] + str lr, [r0, #36] + adds r6, r6, r12 + adc r7, r7, lr + ldr r12, [r0, #40] + ldr lr, [r0, #44] + strd r6, r7, [r0] + lsrs r4, r12, #28 + lsrs r5, lr, #28 + orr r5, r5, r12, lsl #4 + orr r4, r4, lr, lsl #4 + lsls r6, r12, #30 + lsls r7, lr, #30 + orr r7, r7, r12, lsr #2 + orr r6, r6, lr, lsr #2 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #25 + lsls r7, lr, #25 + orr r7, r7, r12, lsr #7 + orr r6, r6, lr, lsr #7 + ldr r12, [r0, #32] + ldr lr, [r0, #36] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r6, r7, [r0, #40] + ldrd r4, r5, [r0, #48] + str r12, [r0, #32] + str lr, [r0, #36] + eor r6, r6, r4 + eor r7, r7, r5 + and r8, r8, r6 + and r9, r9, r7 + eor r8, r8, r4 + eor r9, r9, r5 + ldrd r4, r5, [r0, #32] + adds r4, r4, r8 + adc r5, r5, r9 + strd r4, r5, [r0, #32] + mov r8, r6 + mov r9, r7 + # Round 12 + ldr r12, [r0] + ldr lr, [r0, #4] + lsrs r4, r12, #14 + lsrs r5, lr, #14 + orr r5, r5, r12, lsl #18 + orr r4, r4, lr, lsl #18 + lsrs r6, r12, #18 + lsrs r7, lr, #18 + orr r7, r7, r12, lsl #14 + orr r6, r6, lr, lsl #14 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #23 + lsls r7, lr, #23 + orr r7, r7, r12, lsr #9 + orr r6, r6, lr, lsr #9 + ldr r12, [r0, #24] + ldr lr, [r0, #28] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #24] + str lr, [r0, #28] + ldr r12, [r0] + ldr lr, [r0, #4] + ldrd r4, r5, [r0, #8] + ldrd r6, r7, [r0, #16] + eor r4, r4, r6 + eor r5, r5, r7 + and r4, r4, r12 + and r5, r5, lr + eor r4, r4, r6 + eor r5, r5, r7 + ldr r12, [r0, #24] + ldr lr, [r0, #28] + ldrd r6, r7, [sp, #96] + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r4, r5, [r3, #96] + adds r12, r12, r6 + adc lr, lr, r7 + ldrd r6, r7, [r0, #56] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #24] + str lr, [r0, #28] + adds r6, r6, r12 + adc r7, r7, lr + ldr r12, [r0, #32] + ldr lr, [r0, #36] + strd r6, r7, [r0, #56] + lsrs r4, r12, #28 + lsrs r5, lr, #28 + orr r5, r5, r12, lsl #4 + orr r4, r4, lr, lsl #4 + lsls r6, r12, #30 + lsls r7, lr, #30 + orr r7, r7, r12, lsr #2 + orr r6, r6, lr, lsr #2 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #25 + lsls r7, lr, #25 + orr r7, r7, r12, lsr #7 + orr r6, r6, lr, lsr #7 + ldr r12, [r0, #24] + ldr lr, [r0, #28] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r6, r7, [r0, #32] + ldrd r4, r5, [r0, #40] + str r12, [r0, #24] + str lr, [r0, #28] + eor r6, r6, r4 + eor r7, r7, r5 + and r8, r8, r6 + and r9, r9, r7 + eor r8, r8, r4 + eor r9, r9, r5 + ldrd r4, r5, [r0, #24] + adds r4, r4, r8 + adc r5, r5, r9 + strd r4, r5, [r0, #24] + mov r8, r6 + mov r9, r7 + # Round 13 + ldr r12, [r0, #56] + ldr lr, [r0, #60] + lsrs r4, r12, #14 + lsrs r5, lr, #14 + orr r5, r5, r12, lsl #18 + orr r4, r4, lr, lsl #18 + lsrs r6, r12, #18 + lsrs r7, lr, #18 + orr r7, r7, r12, lsl #14 + orr r6, r6, lr, lsl #14 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #23 + lsls r7, lr, #23 + orr r7, r7, r12, lsr #9 + orr r6, r6, lr, lsr #9 + ldr r12, [r0, #16] + ldr lr, [r0, #20] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #16] + str lr, [r0, #20] + ldr r12, [r0, #56] + ldr lr, [r0, #60] + ldrd r4, r5, [r0] + ldrd r6, r7, [r0, #8] + eor r4, r4, r6 + eor r5, r5, r7 + and r4, r4, r12 + and r5, r5, lr + eor r4, r4, r6 + eor r5, r5, r7 + ldr r12, [r0, #16] + ldr lr, [r0, #20] + ldrd r6, r7, [sp, #104] + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r4, r5, [r3, #104] + adds r12, r12, r6 + adc lr, lr, r7 + ldrd r6, r7, [r0, #48] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #16] + str lr, [r0, #20] + adds r6, r6, r12 + adc r7, r7, lr + ldr r12, [r0, #24] + ldr lr, [r0, #28] + strd r6, r7, [r0, #48] + lsrs r4, r12, #28 + lsrs r5, lr, #28 + orr r5, r5, r12, lsl #4 + orr r4, r4, lr, lsl #4 + lsls r6, r12, #30 + lsls r7, lr, #30 + orr r7, r7, r12, lsr #2 + orr r6, r6, lr, lsr #2 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #25 + lsls r7, lr, #25 + orr r7, r7, r12, lsr #7 + orr r6, r6, lr, lsr #7 + ldr r12, [r0, #16] + ldr lr, [r0, #20] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r6, r7, [r0, #24] + ldrd r4, r5, [r0, #32] + str r12, [r0, #16] + str lr, [r0, #20] + eor r6, r6, r4 + eor r7, r7, r5 + and r8, r8, r6 + and r9, r9, r7 + eor r8, r8, r4 + eor r9, r9, r5 + ldrd r4, r5, [r0, #16] + adds r4, r4, r8 + adc r5, r5, r9 + strd r4, r5, [r0, #16] + mov r8, r6 + mov r9, r7 + # Round 14 + ldr r12, [r0, #48] + ldr lr, [r0, #52] + lsrs r4, r12, #14 + lsrs r5, lr, #14 + orr r5, r5, r12, lsl #18 + orr r4, r4, lr, lsl #18 + lsrs r6, r12, #18 + lsrs r7, lr, #18 + orr r7, r7, r12, lsl #14 + orr r6, r6, lr, lsl #14 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #23 + lsls r7, lr, #23 + orr r7, r7, r12, lsr #9 + orr r6, r6, lr, lsr #9 + ldr r12, [r0, #8] + ldr lr, [r0, #12] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #8] + str lr, [r0, #12] + ldr r12, [r0, #48] + ldr lr, [r0, #52] + ldrd r4, r5, [r0, #56] + ldrd r6, r7, [r0] + eor r4, r4, r6 + eor r5, r5, r7 + and r4, r4, r12 + and r5, r5, lr + eor r4, r4, r6 + eor r5, r5, r7 + ldr r12, [r0, #8] + ldr lr, [r0, #12] + ldrd r6, r7, [sp, #112] + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r4, r5, [r3, #112] + adds r12, r12, r6 + adc lr, lr, r7 + ldrd r6, r7, [r0, #40] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0, #8] + str lr, [r0, #12] + adds r6, r6, r12 + adc r7, r7, lr + ldr r12, [r0, #16] + ldr lr, [r0, #20] + strd r6, r7, [r0, #40] + lsrs r4, r12, #28 + lsrs r5, lr, #28 + orr r5, r5, r12, lsl #4 + orr r4, r4, lr, lsl #4 + lsls r6, r12, #30 + lsls r7, lr, #30 + orr r7, r7, r12, lsr #2 + orr r6, r6, lr, lsr #2 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #25 + lsls r7, lr, #25 + orr r7, r7, r12, lsr #7 + orr r6, r6, lr, lsr #7 + ldr r12, [r0, #8] + ldr lr, [r0, #12] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r6, r7, [r0, #16] + ldrd r4, r5, [r0, #24] + str r12, [r0, #8] + str lr, [r0, #12] + eor r6, r6, r4 + eor r7, r7, r5 + and r8, r8, r6 + and r9, r9, r7 + eor r8, r8, r4 + eor r9, r9, r5 + ldrd r4, r5, [r0, #8] + adds r4, r4, r8 + adc r5, r5, r9 + strd r4, r5, [r0, #8] + mov r8, r6 + mov r9, r7 + # Round 15 + ldr r12, [r0, #40] + ldr lr, [r0, #44] + lsrs r4, r12, #14 + lsrs r5, lr, #14 + orr r5, r5, r12, lsl #18 + orr r4, r4, lr, lsl #18 + lsrs r6, r12, #18 + lsrs r7, lr, #18 + orr r7, r7, r12, lsl #14 + orr r6, r6, lr, lsl #14 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #23 + lsls r7, lr, #23 + orr r7, r7, r12, lsr #9 + orr r6, r6, lr, lsr #9 + ldr r12, [r0] + ldr lr, [r0, #4] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0] + str lr, [r0, #4] + ldr r12, [r0, #40] + ldr lr, [r0, #44] + ldrd r4, r5, [r0, #48] + ldrd r6, r7, [r0, #56] + eor r4, r4, r6 + eor r5, r5, r7 + and r4, r4, r12 + and r5, r5, lr + eor r4, r4, r6 + eor r5, r5, r7 + ldr r12, [r0] + ldr lr, [r0, #4] + ldrd r6, r7, [sp, #120] + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r4, r5, [r3, #120] + adds r12, r12, r6 + adc lr, lr, r7 + ldrd r6, r7, [r0, #32] + adds r12, r12, r4 + adc lr, lr, r5 + str r12, [r0] + str lr, [r0, #4] + adds r6, r6, r12 + adc r7, r7, lr + ldr r12, [r0, #8] + ldr lr, [r0, #12] + strd r6, r7, [r0, #32] + lsrs r4, r12, #28 + lsrs r5, lr, #28 + orr r5, r5, r12, lsl #4 + orr r4, r4, lr, lsl #4 + lsls r6, r12, #30 + lsls r7, lr, #30 + orr r7, r7, r12, lsr #2 + orr r6, r6, lr, lsr #2 + eor r4, r4, r6 + eor r5, r5, r7 + lsls r6, r12, #25 + lsls r7, lr, #25 + orr r7, r7, r12, lsr #7 + orr r6, r6, lr, lsr #7 + ldr r12, [r0] + ldr lr, [r0, #4] + eor r4, r4, r6 + eor r5, r5, r7 + adds r12, r12, r4 + adc lr, lr, r5 + ldrd r6, r7, [r0, #8] + ldrd r4, r5, [r0, #16] + str r12, [r0] + str lr, [r0, #4] + eor r6, r6, r4 + eor r7, r7, r5 + and r8, r8, r6 + and r9, r9, r7 + eor r8, r8, r4 + eor r9, r9, r5 + ldrd r4, r5, [r0] + adds r4, r4, r8 + adc r5, r5, r9 + strd r4, r5, [r0] + mov r8, r6 + mov r9, r7 + # Add in digest from start + ldr r12, [r0] + ldr lr, [r0, #4] + ldrd r4, r5, [r0, #8] + ldrd r6, r7, [sp, #128] + ldrd r8, r9, [sp, #136] + adds r12, r12, r6 + adc lr, lr, r7 + adds r4, r4, r8 + adc r5, r5, r9 + str r12, [r0] + str lr, [r0, #4] + strd r4, r5, [r0, #8] + str r12, [sp, #128] + str lr, [sp, #132] + strd r4, r5, [sp, #136] + ldr r12, [r0, #16] + ldr lr, [r0, #20] + ldrd r4, r5, [r0, #24] + ldrd r6, r7, [sp, #144] + ldrd r8, r9, [sp, #152] + adds r12, r12, r6 + adc lr, lr, r7 + adds r4, r4, r8 + adc r5, r5, r9 + str r12, [r0, #16] + str lr, [r0, #20] + strd r4, r5, [r0, #24] + str r12, [sp, #144] + str lr, [sp, #148] + strd r4, r5, [sp, #152] + ldr r12, [r0, #32] + ldr lr, [r0, #36] + ldrd r4, r5, [r0, #40] + ldrd r6, r7, [sp, #160] + ldrd r8, r9, [sp, #168] + adds r12, r12, r6 + adc lr, lr, r7 + adds r4, r4, r8 + adc r5, r5, r9 + str r12, [r0, #32] + str lr, [r0, #36] + strd r4, r5, [r0, #40] + str r12, [sp, #160] + str lr, [sp, #164] + strd r4, r5, [sp, #168] + ldr r12, [r0, #48] + ldr lr, [r0, #52] + ldrd r4, r5, [r0, #56] + ldrd r6, r7, [sp, #176] + ldrd r8, r9, [sp, #184] + adds r12, r12, r6 + adc lr, lr, r7 + adds r4, r4, r8 + adc r5, r5, r9 + str r12, [r0, #48] + str lr, [r0, #52] + strd r4, r5, [r0, #56] + str r12, [sp, #176] + str lr, [sp, #180] + strd r4, r5, [sp, #184] + subs r2, r2, #0x80 + sub r3, r3, #0x200 + add r1, r1, #0x80 + bne L_sha512_len_neon_begin + eor r0, r0, r0 + add sp, sp, #0xc0 + pop {r4, r5, r6, r7, r8, r9, r10, pc} + .size Transform_Sha512_Len,.-Transform_Sha512_Len +#endif /* WOLFSSL_ARMASM_NO_NEON */ +#ifndef WOLFSSL_ARMASM_NO_NEON + .text + .type L_SHA512_transform_neon_len_k, %object + .size L_SHA512_transform_neon_len_k, 640 + .align 3 +L_SHA512_transform_neon_len_k: + .word 0xd728ae22 + .word 0x428a2f98 + .word 0x23ef65cd + .word 0x71374491 + .word 0xec4d3b2f + .word 0xb5c0fbcf + .word 0x8189dbbc + .word 0xe9b5dba5 + .word 0xf348b538 + .word 0x3956c25b + .word 0xb605d019 + .word 0x59f111f1 + .word 0xaf194f9b + .word 0x923f82a4 + .word 0xda6d8118 + .word 0xab1c5ed5 + .word 0xa3030242 + .word 0xd807aa98 + .word 0x45706fbe + .word 0x12835b01 + .word 0x4ee4b28c + .word 0x243185be + .word 0xd5ffb4e2 + .word 0x550c7dc3 + .word 0xf27b896f + .word 0x72be5d74 + .word 0x3b1696b1 + .word 0x80deb1fe + .word 0x25c71235 + .word 0x9bdc06a7 + .word 0xcf692694 + .word 0xc19bf174 + .word 0x9ef14ad2 + .word 0xe49b69c1 + .word 0x384f25e3 + .word 0xefbe4786 + .word 0x8b8cd5b5 + .word 0xfc19dc6 + .word 0x77ac9c65 + .word 0x240ca1cc + .word 0x592b0275 + .word 0x2de92c6f + .word 0x6ea6e483 + .word 0x4a7484aa + .word 0xbd41fbd4 + .word 0x5cb0a9dc + .word 0x831153b5 + .word 0x76f988da + .word 0xee66dfab + .word 0x983e5152 + .word 0x2db43210 + .word 0xa831c66d + .word 0x98fb213f + .word 0xb00327c8 + .word 0xbeef0ee4 + .word 0xbf597fc7 + .word 0x3da88fc2 + .word 0xc6e00bf3 + .word 0x930aa725 + .word 0xd5a79147 + .word 0xe003826f + .word 0x6ca6351 + .word 0xa0e6e70 + .word 0x14292967 + .word 0x46d22ffc + .word 0x27b70a85 + .word 0x5c26c926 + .word 0x2e1b2138 + .word 0x5ac42aed + .word 0x4d2c6dfc + .word 0x9d95b3df + .word 0x53380d13 + .word 0x8baf63de + .word 0x650a7354 + .word 0x3c77b2a8 + .word 0x766a0abb + .word 0x47edaee6 + .word 0x81c2c92e + .word 0x1482353b + .word 0x92722c85 + .word 0x4cf10364 + .word 0xa2bfe8a1 + .word 0xbc423001 + .word 0xa81a664b + .word 0xd0f89791 + .word 0xc24b8b70 + .word 0x654be30 + .word 0xc76c51a3 + .word 0xd6ef5218 + .word 0xd192e819 + .word 0x5565a910 + .word 0xd6990624 + .word 0x5771202a + .word 0xf40e3585 + .word 0x32bbd1b8 + .word 0x106aa070 + .word 0xb8d2d0c8 + .word 0x19a4c116 + .word 0x5141ab53 + .word 0x1e376c08 + .word 0xdf8eeb99 + .word 0x2748774c + .word 0xe19b48a8 + .word 0x34b0bcb5 + .word 0xc5c95a63 + .word 0x391c0cb3 + .word 0xe3418acb + .word 0x4ed8aa4a + .word 0x7763e373 + .word 0x5b9cca4f + .word 0xd6b2b8a3 + .word 0x682e6ff3 + .word 0x5defb2fc + .word 0x748f82ee + .word 0x43172f60 + .word 0x78a5636f + .word 0xa1f0ab72 + .word 0x84c87814 + .word 0x1a6439ec + .word 0x8cc70208 + .word 0x23631e28 + .word 0x90befffa + .word 0xde82bde9 + .word 0xa4506ceb + .word 0xb2c67915 + .word 0xbef9a3f7 + .word 0xe372532b + .word 0xc67178f2 + .word 0xea26619c + .word 0xca273ece + .word 0x21c0c207 + .word 0xd186b8c7 + .word 0xcde0eb1e + .word 0xeada7dd6 + .word 0xee6ed178 + .word 0xf57d4f7f + .word 0x72176fba + .word 0x6f067aa + .word 0xa2c898a6 + .word 0xa637dc5 + .word 0xbef90dae + .word 0x113f9804 + .word 0x131c471b + .word 0x1b710b35 + .word 0x23047d84 + .word 0x28db77f5 + .word 0x40c72493 + .word 0x32caab7b + .word 0x15c9bebc + .word 0x3c9ebe0a + .word 0x9c100d4c + .word 0x431d67c4 + .word 0xcb3e42b6 + .word 0x4cc5d4be + .word 0xfc657e2a + .word 0x597f299c + .word 0x3ad6faec + .word 0x5fcb6fab + .word 0x4a475817 + .word 0x6c44198c + .text + .align 2 + .globl Transform_Sha512_Len + .type Transform_Sha512_Len, %function +Transform_Sha512_Len: + vpush {d8-d15} + # Load digest into working vars + vldm.64 r0, {d0-d7} + # Start of loop processing a block +L_sha512_len_neon_begin: + # Load W + vldm.64 r1!, {d16-d31} + vrev64.8 q8, q8 + vrev64.8 q9, q9 + vrev64.8 q10, q10 + vrev64.8 q11, q11 + vrev64.8 q12, q12 + vrev64.8 q13, q13 + vrev64.8 q14, q14 + vrev64.8 q15, q15 + adr r3, L_SHA512_transform_neon_len_k + mov r12, #4 + # Start of 16 rounds +L_sha512_len_neon_start: + # Round 0 + vld1.64 {d12}, [r3:64]! + vshl.u64 d8, d4, #50 + vsri.u64 d8, d4, #14 + vshl.u64 d9, d0, #36 + vsri.u64 d9, d0, #28 + vshl.u64 d10, d4, #46 + vsri.u64 d10, d4, #18 + vshl.u64 d11, d0, #30 + vsri.u64 d11, d0, #34 + veor d8, d10 + veor d9, d11 + vshl.u64 d10, d4, #23 + vsri.u64 d10, d4, #41 + vshl.u64 d11, d0, #25 + vsri.u64 d11, d0, #39 + veor d8, d10 + veor d9, d11 + vadd.i64 d7, d8 + vadd.i64 d12, d16 + vmov d8, d4 + veor d10, d1, d2 + vadd.i64 d7, d12 + vbsl d8, d5, d6 + vbsl d10, d0, d2 + vadd.i64 d7, d8 + vadd.i64 d10, d9 + vadd.i64 d3, d7 + vadd.i64 d7, d10 + # Round 1 + vld1.64 {d12}, [r3:64]! + vshl.u64 d8, d3, #50 + vsri.u64 d8, d3, #14 + vshl.u64 d9, d7, #36 + vsri.u64 d9, d7, #28 + vshl.u64 d10, d3, #46 + vsri.u64 d10, d3, #18 + vshl.u64 d11, d7, #30 + vsri.u64 d11, d7, #34 + veor d8, d10 + veor d9, d11 + vshl.u64 d10, d3, #23 + vsri.u64 d10, d3, #41 + vshl.u64 d11, d7, #25 + vsri.u64 d11, d7, #39 + veor d8, d10 + veor d9, d11 + vadd.i64 d6, d8 + vadd.i64 d12, d17 + vmov d8, d3 + veor d10, d0, d1 + vadd.i64 d6, d12 + vbsl d8, d4, d5 + vbsl d10, d7, d1 + vadd.i64 d6, d8 + vadd.i64 d10, d9 + vadd.i64 d2, d6 + vadd.i64 d6, d10 + # Calc new W[0]-W[1] + vext.8 q6, q8, q9, #8 + vshl.u64 q4, q15, #45 + vsri.u64 q4, q15, #19 + vshl.u64 q5, q15, #3 + vsri.u64 q5, q15, #61 + veor q5, q4 + vshr.u64 q4, q15, #6 + veor q5, q4 + vadd.i64 q8, q5 + vext.8 q7, q12, q13, #8 + vadd.i64 q8, q7 + vshl.u64 q4, q6, #63 + vsri.u64 q4, q6, #1 + vshl.u64 q5, q6, #56 + vsri.u64 q5, q6, #8 + veor q5, q4 + vshr.u64 q6, #7 + veor q5, q6 + vadd.i64 q8, q5 + # Round 2 + vld1.64 {d12}, [r3:64]! + vshl.u64 d8, d2, #50 + vsri.u64 d8, d2, #14 + vshl.u64 d9, d6, #36 + vsri.u64 d9, d6, #28 + vshl.u64 d10, d2, #46 + vsri.u64 d10, d2, #18 + vshl.u64 d11, d6, #30 + vsri.u64 d11, d6, #34 + veor d8, d10 + veor d9, d11 + vshl.u64 d10, d2, #23 + vsri.u64 d10, d2, #41 + vshl.u64 d11, d6, #25 + vsri.u64 d11, d6, #39 + veor d8, d10 + veor d9, d11 + vadd.i64 d5, d8 + vadd.i64 d12, d18 + vmov d8, d2 + veor d10, d7, d0 + vadd.i64 d5, d12 + vbsl d8, d3, d4 + vbsl d10, d6, d0 + vadd.i64 d5, d8 + vadd.i64 d10, d9 + vadd.i64 d1, d5 + vadd.i64 d5, d10 + # Round 3 + vld1.64 {d12}, [r3:64]! + vshl.u64 d8, d1, #50 + vsri.u64 d8, d1, #14 + vshl.u64 d9, d5, #36 + vsri.u64 d9, d5, #28 + vshl.u64 d10, d1, #46 + vsri.u64 d10, d1, #18 + vshl.u64 d11, d5, #30 + vsri.u64 d11, d5, #34 + veor d8, d10 + veor d9, d11 + vshl.u64 d10, d1, #23 + vsri.u64 d10, d1, #41 + vshl.u64 d11, d5, #25 + vsri.u64 d11, d5, #39 + veor d8, d10 + veor d9, d11 + vadd.i64 d4, d8 + vadd.i64 d12, d19 + vmov d8, d1 + veor d10, d6, d7 + vadd.i64 d4, d12 + vbsl d8, d2, d3 + vbsl d10, d5, d7 + vadd.i64 d4, d8 + vadd.i64 d10, d9 + vadd.i64 d0, d4 + vadd.i64 d4, d10 + # Calc new W[2]-W[3] + vext.8 q6, q9, q10, #8 + vshl.u64 q4, q8, #45 + vsri.u64 q4, q8, #19 + vshl.u64 q5, q8, #3 + vsri.u64 q5, q8, #61 + veor q5, q4 + vshr.u64 q4, q8, #6 + veor q5, q4 + vadd.i64 q9, q5 + vext.8 q7, q13, q14, #8 + vadd.i64 q9, q7 + vshl.u64 q4, q6, #63 + vsri.u64 q4, q6, #1 + vshl.u64 q5, q6, #56 + vsri.u64 q5, q6, #8 + veor q5, q4 + vshr.u64 q6, #7 + veor q5, q6 + vadd.i64 q9, q5 + # Round 4 + vld1.64 {d12}, [r3:64]! + vshl.u64 d8, d0, #50 + vsri.u64 d8, d0, #14 + vshl.u64 d9, d4, #36 + vsri.u64 d9, d4, #28 + vshl.u64 d10, d0, #46 + vsri.u64 d10, d0, #18 + vshl.u64 d11, d4, #30 + vsri.u64 d11, d4, #34 + veor d8, d10 + veor d9, d11 + vshl.u64 d10, d0, #23 + vsri.u64 d10, d0, #41 + vshl.u64 d11, d4, #25 + vsri.u64 d11, d4, #39 + veor d8, d10 + veor d9, d11 + vadd.i64 d3, d8 + vadd.i64 d12, d20 + vmov d8, d0 + veor d10, d5, d6 + vadd.i64 d3, d12 + vbsl d8, d1, d2 + vbsl d10, d4, d6 + vadd.i64 d3, d8 + vadd.i64 d10, d9 + vadd.i64 d7, d3 + vadd.i64 d3, d10 + # Round 5 + vld1.64 {d12}, [r3:64]! + vshl.u64 d8, d7, #50 + vsri.u64 d8, d7, #14 + vshl.u64 d9, d3, #36 + vsri.u64 d9, d3, #28 + vshl.u64 d10, d7, #46 + vsri.u64 d10, d7, #18 + vshl.u64 d11, d3, #30 + vsri.u64 d11, d3, #34 + veor d8, d10 + veor d9, d11 + vshl.u64 d10, d7, #23 + vsri.u64 d10, d7, #41 + vshl.u64 d11, d3, #25 + vsri.u64 d11, d3, #39 + veor d8, d10 + veor d9, d11 + vadd.i64 d2, d8 + vadd.i64 d12, d21 + vmov d8, d7 + veor d10, d4, d5 + vadd.i64 d2, d12 + vbsl d8, d0, d1 + vbsl d10, d3, d5 + vadd.i64 d2, d8 + vadd.i64 d10, d9 + vadd.i64 d6, d2 + vadd.i64 d2, d10 + # Calc new W[4]-W[5] + vext.8 q6, q10, q11, #8 + vshl.u64 q4, q9, #45 + vsri.u64 q4, q9, #19 + vshl.u64 q5, q9, #3 + vsri.u64 q5, q9, #61 + veor q5, q4 + vshr.u64 q4, q9, #6 + veor q5, q4 + vadd.i64 q10, q5 + vext.8 q7, q14, q15, #8 + vadd.i64 q10, q7 + vshl.u64 q4, q6, #63 + vsri.u64 q4, q6, #1 + vshl.u64 q5, q6, #56 + vsri.u64 q5, q6, #8 + veor q5, q4 + vshr.u64 q6, #7 + veor q5, q6 + vadd.i64 q10, q5 + # Round 6 + vld1.64 {d12}, [r3:64]! + vshl.u64 d8, d6, #50 + vsri.u64 d8, d6, #14 + vshl.u64 d9, d2, #36 + vsri.u64 d9, d2, #28 + vshl.u64 d10, d6, #46 + vsri.u64 d10, d6, #18 + vshl.u64 d11, d2, #30 + vsri.u64 d11, d2, #34 + veor d8, d10 + veor d9, d11 + vshl.u64 d10, d6, #23 + vsri.u64 d10, d6, #41 + vshl.u64 d11, d2, #25 + vsri.u64 d11, d2, #39 + veor d8, d10 + veor d9, d11 + vadd.i64 d1, d8 + vadd.i64 d12, d22 + vmov d8, d6 + veor d10, d3, d4 + vadd.i64 d1, d12 + vbsl d8, d7, d0 + vbsl d10, d2, d4 + vadd.i64 d1, d8 + vadd.i64 d10, d9 + vadd.i64 d5, d1 + vadd.i64 d1, d10 + # Round 7 + vld1.64 {d12}, [r3:64]! + vshl.u64 d8, d5, #50 + vsri.u64 d8, d5, #14 + vshl.u64 d9, d1, #36 + vsri.u64 d9, d1, #28 + vshl.u64 d10, d5, #46 + vsri.u64 d10, d5, #18 + vshl.u64 d11, d1, #30 + vsri.u64 d11, d1, #34 + veor d8, d10 + veor d9, d11 + vshl.u64 d10, d5, #23 + vsri.u64 d10, d5, #41 + vshl.u64 d11, d1, #25 + vsri.u64 d11, d1, #39 + veor d8, d10 + veor d9, d11 + vadd.i64 d0, d8 + vadd.i64 d12, d23 + vmov d8, d5 + veor d10, d2, d3 + vadd.i64 d0, d12 + vbsl d8, d6, d7 + vbsl d10, d1, d3 + vadd.i64 d0, d8 + vadd.i64 d10, d9 + vadd.i64 d4, d0 + vadd.i64 d0, d10 + # Calc new W[6]-W[7] + vext.8 q6, q11, q12, #8 + vshl.u64 q4, q10, #45 + vsri.u64 q4, q10, #19 + vshl.u64 q5, q10, #3 + vsri.u64 q5, q10, #61 + veor q5, q4 + vshr.u64 q4, q10, #6 + veor q5, q4 + vadd.i64 q11, q5 + vext.8 q7, q15, q8, #8 + vadd.i64 q11, q7 + vshl.u64 q4, q6, #63 + vsri.u64 q4, q6, #1 + vshl.u64 q5, q6, #56 + vsri.u64 q5, q6, #8 + veor q5, q4 + vshr.u64 q6, #7 + veor q5, q6 + vadd.i64 q11, q5 + # Round 8 + vld1.64 {d12}, [r3:64]! + vshl.u64 d8, d4, #50 + vsri.u64 d8, d4, #14 + vshl.u64 d9, d0, #36 + vsri.u64 d9, d0, #28 + vshl.u64 d10, d4, #46 + vsri.u64 d10, d4, #18 + vshl.u64 d11, d0, #30 + vsri.u64 d11, d0, #34 + veor d8, d10 + veor d9, d11 + vshl.u64 d10, d4, #23 + vsri.u64 d10, d4, #41 + vshl.u64 d11, d0, #25 + vsri.u64 d11, d0, #39 + veor d8, d10 + veor d9, d11 + vadd.i64 d7, d8 + vadd.i64 d12, d24 + vmov d8, d4 + veor d10, d1, d2 + vadd.i64 d7, d12 + vbsl d8, d5, d6 + vbsl d10, d0, d2 + vadd.i64 d7, d8 + vadd.i64 d10, d9 + vadd.i64 d3, d7 + vadd.i64 d7, d10 + # Round 9 + vld1.64 {d12}, [r3:64]! + vshl.u64 d8, d3, #50 + vsri.u64 d8, d3, #14 + vshl.u64 d9, d7, #36 + vsri.u64 d9, d7, #28 + vshl.u64 d10, d3, #46 + vsri.u64 d10, d3, #18 + vshl.u64 d11, d7, #30 + vsri.u64 d11, d7, #34 + veor d8, d10 + veor d9, d11 + vshl.u64 d10, d3, #23 + vsri.u64 d10, d3, #41 + vshl.u64 d11, d7, #25 + vsri.u64 d11, d7, #39 + veor d8, d10 + veor d9, d11 + vadd.i64 d6, d8 + vadd.i64 d12, d25 + vmov d8, d3 + veor d10, d0, d1 + vadd.i64 d6, d12 + vbsl d8, d4, d5 + vbsl d10, d7, d1 + vadd.i64 d6, d8 + vadd.i64 d10, d9 + vadd.i64 d2, d6 + vadd.i64 d6, d10 + # Calc new W[8]-W[9] + vext.8 q6, q12, q13, #8 + vshl.u64 q4, q11, #45 + vsri.u64 q4, q11, #19 + vshl.u64 q5, q11, #3 + vsri.u64 q5, q11, #61 + veor q5, q4 + vshr.u64 q4, q11, #6 + veor q5, q4 + vadd.i64 q12, q5 + vext.8 q7, q8, q9, #8 + vadd.i64 q12, q7 + vshl.u64 q4, q6, #63 + vsri.u64 q4, q6, #1 + vshl.u64 q5, q6, #56 + vsri.u64 q5, q6, #8 + veor q5, q4 + vshr.u64 q6, #7 + veor q5, q6 + vadd.i64 q12, q5 + # Round 10 + vld1.64 {d12}, [r3:64]! + vshl.u64 d8, d2, #50 + vsri.u64 d8, d2, #14 + vshl.u64 d9, d6, #36 + vsri.u64 d9, d6, #28 + vshl.u64 d10, d2, #46 + vsri.u64 d10, d2, #18 + vshl.u64 d11, d6, #30 + vsri.u64 d11, d6, #34 + veor d8, d10 + veor d9, d11 + vshl.u64 d10, d2, #23 + vsri.u64 d10, d2, #41 + vshl.u64 d11, d6, #25 + vsri.u64 d11, d6, #39 + veor d8, d10 + veor d9, d11 + vadd.i64 d5, d8 + vadd.i64 d12, d26 + vmov d8, d2 + veor d10, d7, d0 + vadd.i64 d5, d12 + vbsl d8, d3, d4 + vbsl d10, d6, d0 + vadd.i64 d5, d8 + vadd.i64 d10, d9 + vadd.i64 d1, d5 + vadd.i64 d5, d10 + # Round 11 + vld1.64 {d12}, [r3:64]! + vshl.u64 d8, d1, #50 + vsri.u64 d8, d1, #14 + vshl.u64 d9, d5, #36 + vsri.u64 d9, d5, #28 + vshl.u64 d10, d1, #46 + vsri.u64 d10, d1, #18 + vshl.u64 d11, d5, #30 + vsri.u64 d11, d5, #34 + veor d8, d10 + veor d9, d11 + vshl.u64 d10, d1, #23 + vsri.u64 d10, d1, #41 + vshl.u64 d11, d5, #25 + vsri.u64 d11, d5, #39 + veor d8, d10 + veor d9, d11 + vadd.i64 d4, d8 + vadd.i64 d12, d27 + vmov d8, d1 + veor d10, d6, d7 + vadd.i64 d4, d12 + vbsl d8, d2, d3 + vbsl d10, d5, d7 + vadd.i64 d4, d8 + vadd.i64 d10, d9 + vadd.i64 d0, d4 + vadd.i64 d4, d10 + # Calc new W[10]-W[11] + vext.8 q6, q13, q14, #8 + vshl.u64 q4, q12, #45 + vsri.u64 q4, q12, #19 + vshl.u64 q5, q12, #3 + vsri.u64 q5, q12, #61 + veor q5, q4 + vshr.u64 q4, q12, #6 + veor q5, q4 + vadd.i64 q13, q5 + vext.8 q7, q9, q10, #8 + vadd.i64 q13, q7 + vshl.u64 q4, q6, #63 + vsri.u64 q4, q6, #1 + vshl.u64 q5, q6, #56 + vsri.u64 q5, q6, #8 + veor q5, q4 + vshr.u64 q6, #7 + veor q5, q6 + vadd.i64 q13, q5 + # Round 12 + vld1.64 {d12}, [r3:64]! + vshl.u64 d8, d0, #50 + vsri.u64 d8, d0, #14 + vshl.u64 d9, d4, #36 + vsri.u64 d9, d4, #28 + vshl.u64 d10, d0, #46 + vsri.u64 d10, d0, #18 + vshl.u64 d11, d4, #30 + vsri.u64 d11, d4, #34 + veor d8, d10 + veor d9, d11 + vshl.u64 d10, d0, #23 + vsri.u64 d10, d0, #41 + vshl.u64 d11, d4, #25 + vsri.u64 d11, d4, #39 + veor d8, d10 + veor d9, d11 + vadd.i64 d3, d8 + vadd.i64 d12, d28 + vmov d8, d0 + veor d10, d5, d6 + vadd.i64 d3, d12 + vbsl d8, d1, d2 + vbsl d10, d4, d6 + vadd.i64 d3, d8 + vadd.i64 d10, d9 + vadd.i64 d7, d3 + vadd.i64 d3, d10 + # Round 13 + vld1.64 {d12}, [r3:64]! + vshl.u64 d8, d7, #50 + vsri.u64 d8, d7, #14 + vshl.u64 d9, d3, #36 + vsri.u64 d9, d3, #28 + vshl.u64 d10, d7, #46 + vsri.u64 d10, d7, #18 + vshl.u64 d11, d3, #30 + vsri.u64 d11, d3, #34 + veor d8, d10 + veor d9, d11 + vshl.u64 d10, d7, #23 + vsri.u64 d10, d7, #41 + vshl.u64 d11, d3, #25 + vsri.u64 d11, d3, #39 + veor d8, d10 + veor d9, d11 + vadd.i64 d2, d8 + vadd.i64 d12, d29 + vmov d8, d7 + veor d10, d4, d5 + vadd.i64 d2, d12 + vbsl d8, d0, d1 + vbsl d10, d3, d5 + vadd.i64 d2, d8 + vadd.i64 d10, d9 + vadd.i64 d6, d2 + vadd.i64 d2, d10 + # Calc new W[12]-W[13] + vext.8 q6, q14, q15, #8 + vshl.u64 q4, q13, #45 + vsri.u64 q4, q13, #19 + vshl.u64 q5, q13, #3 + vsri.u64 q5, q13, #61 + veor q5, q4 + vshr.u64 q4, q13, #6 + veor q5, q4 + vadd.i64 q14, q5 + vext.8 q7, q10, q11, #8 + vadd.i64 q14, q7 + vshl.u64 q4, q6, #63 + vsri.u64 q4, q6, #1 + vshl.u64 q5, q6, #56 + vsri.u64 q5, q6, #8 + veor q5, q4 + vshr.u64 q6, #7 + veor q5, q6 + vadd.i64 q14, q5 + # Round 14 + vld1.64 {d12}, [r3:64]! + vshl.u64 d8, d6, #50 + vsri.u64 d8, d6, #14 + vshl.u64 d9, d2, #36 + vsri.u64 d9, d2, #28 + vshl.u64 d10, d6, #46 + vsri.u64 d10, d6, #18 + vshl.u64 d11, d2, #30 + vsri.u64 d11, d2, #34 + veor d8, d10 + veor d9, d11 + vshl.u64 d10, d6, #23 + vsri.u64 d10, d6, #41 + vshl.u64 d11, d2, #25 + vsri.u64 d11, d2, #39 + veor d8, d10 + veor d9, d11 + vadd.i64 d1, d8 + vadd.i64 d12, d30 + vmov d8, d6 + veor d10, d3, d4 + vadd.i64 d1, d12 + vbsl d8, d7, d0 + vbsl d10, d2, d4 + vadd.i64 d1, d8 + vadd.i64 d10, d9 + vadd.i64 d5, d1 + vadd.i64 d1, d10 + # Round 15 + vld1.64 {d12}, [r3:64]! + vshl.u64 d8, d5, #50 + vsri.u64 d8, d5, #14 + vshl.u64 d9, d1, #36 + vsri.u64 d9, d1, #28 + vshl.u64 d10, d5, #46 + vsri.u64 d10, d5, #18 + vshl.u64 d11, d1, #30 + vsri.u64 d11, d1, #34 + veor d8, d10 + veor d9, d11 + vshl.u64 d10, d5, #23 + vsri.u64 d10, d5, #41 + vshl.u64 d11, d1, #25 + vsri.u64 d11, d1, #39 + veor d8, d10 + veor d9, d11 + vadd.i64 d0, d8 + vadd.i64 d12, d31 + vmov d8, d5 + veor d10, d2, d3 + vadd.i64 d0, d12 + vbsl d8, d6, d7 + vbsl d10, d1, d3 + vadd.i64 d0, d8 + vadd.i64 d10, d9 + vadd.i64 d4, d0 + vadd.i64 d0, d10 + # Calc new W[14]-W[15] + vext.8 q6, q15, q8, #8 + vshl.u64 q4, q14, #45 + vsri.u64 q4, q14, #19 + vshl.u64 q5, q14, #3 + vsri.u64 q5, q14, #61 + veor q5, q4 + vshr.u64 q4, q14, #6 + veor q5, q4 + vadd.i64 q15, q5 + vext.8 q7, q11, q12, #8 + vadd.i64 q15, q7 + vshl.u64 q4, q6, #63 + vsri.u64 q4, q6, #1 + vshl.u64 q5, q6, #56 + vsri.u64 q5, q6, #8 + veor q5, q4 + vshr.u64 q6, #7 + veor q5, q6 + vadd.i64 q15, q5 + subs r12, r12, #1 + bne L_sha512_len_neon_start + # Round 0 + vld1.64 {d12}, [r3:64]! + vshl.u64 d8, d4, #50 + vsri.u64 d8, d4, #14 + vshl.u64 d9, d0, #36 + vsri.u64 d9, d0, #28 + vshl.u64 d10, d4, #46 + vsri.u64 d10, d4, #18 + vshl.u64 d11, d0, #30 + vsri.u64 d11, d0, #34 + veor d8, d10 + veor d9, d11 + vshl.u64 d10, d4, #23 + vsri.u64 d10, d4, #41 + vshl.u64 d11, d0, #25 + vsri.u64 d11, d0, #39 + veor d8, d10 + veor d9, d11 + vadd.i64 d7, d8 + vadd.i64 d12, d16 + vmov d8, d4 + veor d10, d1, d2 + vadd.i64 d7, d12 + vbsl d8, d5, d6 + vbsl d10, d0, d2 + vadd.i64 d7, d8 + vadd.i64 d10, d9 + vadd.i64 d3, d7 + vadd.i64 d7, d10 + # Round 1 + vld1.64 {d12}, [r3:64]! + vshl.u64 d8, d3, #50 + vsri.u64 d8, d3, #14 + vshl.u64 d9, d7, #36 + vsri.u64 d9, d7, #28 + vshl.u64 d10, d3, #46 + vsri.u64 d10, d3, #18 + vshl.u64 d11, d7, #30 + vsri.u64 d11, d7, #34 + veor d8, d10 + veor d9, d11 + vshl.u64 d10, d3, #23 + vsri.u64 d10, d3, #41 + vshl.u64 d11, d7, #25 + vsri.u64 d11, d7, #39 + veor d8, d10 + veor d9, d11 + vadd.i64 d6, d8 + vadd.i64 d12, d17 + vmov d8, d3 + veor d10, d0, d1 + vadd.i64 d6, d12 + vbsl d8, d4, d5 + vbsl d10, d7, d1 + vadd.i64 d6, d8 + vadd.i64 d10, d9 + vadd.i64 d2, d6 + vadd.i64 d6, d10 + # Round 2 + vld1.64 {d12}, [r3:64]! + vshl.u64 d8, d2, #50 + vsri.u64 d8, d2, #14 + vshl.u64 d9, d6, #36 + vsri.u64 d9, d6, #28 + vshl.u64 d10, d2, #46 + vsri.u64 d10, d2, #18 + vshl.u64 d11, d6, #30 + vsri.u64 d11, d6, #34 + veor d8, d10 + veor d9, d11 + vshl.u64 d10, d2, #23 + vsri.u64 d10, d2, #41 + vshl.u64 d11, d6, #25 + vsri.u64 d11, d6, #39 + veor d8, d10 + veor d9, d11 + vadd.i64 d5, d8 + vadd.i64 d12, d18 + vmov d8, d2 + veor d10, d7, d0 + vadd.i64 d5, d12 + vbsl d8, d3, d4 + vbsl d10, d6, d0 + vadd.i64 d5, d8 + vadd.i64 d10, d9 + vadd.i64 d1, d5 + vadd.i64 d5, d10 + # Round 3 + vld1.64 {d12}, [r3:64]! + vshl.u64 d8, d1, #50 + vsri.u64 d8, d1, #14 + vshl.u64 d9, d5, #36 + vsri.u64 d9, d5, #28 + vshl.u64 d10, d1, #46 + vsri.u64 d10, d1, #18 + vshl.u64 d11, d5, #30 + vsri.u64 d11, d5, #34 + veor d8, d10 + veor d9, d11 + vshl.u64 d10, d1, #23 + vsri.u64 d10, d1, #41 + vshl.u64 d11, d5, #25 + vsri.u64 d11, d5, #39 + veor d8, d10 + veor d9, d11 + vadd.i64 d4, d8 + vadd.i64 d12, d19 + vmov d8, d1 + veor d10, d6, d7 + vadd.i64 d4, d12 + vbsl d8, d2, d3 + vbsl d10, d5, d7 + vadd.i64 d4, d8 + vadd.i64 d10, d9 + vadd.i64 d0, d4 + vadd.i64 d4, d10 + # Round 4 + vld1.64 {d12}, [r3:64]! + vshl.u64 d8, d0, #50 + vsri.u64 d8, d0, #14 + vshl.u64 d9, d4, #36 + vsri.u64 d9, d4, #28 + vshl.u64 d10, d0, #46 + vsri.u64 d10, d0, #18 + vshl.u64 d11, d4, #30 + vsri.u64 d11, d4, #34 + veor d8, d10 + veor d9, d11 + vshl.u64 d10, d0, #23 + vsri.u64 d10, d0, #41 + vshl.u64 d11, d4, #25 + vsri.u64 d11, d4, #39 + veor d8, d10 + veor d9, d11 + vadd.i64 d3, d8 + vadd.i64 d12, d20 + vmov d8, d0 + veor d10, d5, d6 + vadd.i64 d3, d12 + vbsl d8, d1, d2 + vbsl d10, d4, d6 + vadd.i64 d3, d8 + vadd.i64 d10, d9 + vadd.i64 d7, d3 + vadd.i64 d3, d10 + # Round 5 + vld1.64 {d12}, [r3:64]! + vshl.u64 d8, d7, #50 + vsri.u64 d8, d7, #14 + vshl.u64 d9, d3, #36 + vsri.u64 d9, d3, #28 + vshl.u64 d10, d7, #46 + vsri.u64 d10, d7, #18 + vshl.u64 d11, d3, #30 + vsri.u64 d11, d3, #34 + veor d8, d10 + veor d9, d11 + vshl.u64 d10, d7, #23 + vsri.u64 d10, d7, #41 + vshl.u64 d11, d3, #25 + vsri.u64 d11, d3, #39 + veor d8, d10 + veor d9, d11 + vadd.i64 d2, d8 + vadd.i64 d12, d21 + vmov d8, d7 + veor d10, d4, d5 + vadd.i64 d2, d12 + vbsl d8, d0, d1 + vbsl d10, d3, d5 + vadd.i64 d2, d8 + vadd.i64 d10, d9 + vadd.i64 d6, d2 + vadd.i64 d2, d10 + # Round 6 + vld1.64 {d12}, [r3:64]! + vshl.u64 d8, d6, #50 + vsri.u64 d8, d6, #14 + vshl.u64 d9, d2, #36 + vsri.u64 d9, d2, #28 + vshl.u64 d10, d6, #46 + vsri.u64 d10, d6, #18 + vshl.u64 d11, d2, #30 + vsri.u64 d11, d2, #34 + veor d8, d10 + veor d9, d11 + vshl.u64 d10, d6, #23 + vsri.u64 d10, d6, #41 + vshl.u64 d11, d2, #25 + vsri.u64 d11, d2, #39 + veor d8, d10 + veor d9, d11 + vadd.i64 d1, d8 + vadd.i64 d12, d22 + vmov d8, d6 + veor d10, d3, d4 + vadd.i64 d1, d12 + vbsl d8, d7, d0 + vbsl d10, d2, d4 + vadd.i64 d1, d8 + vadd.i64 d10, d9 + vadd.i64 d5, d1 + vadd.i64 d1, d10 + # Round 7 + vld1.64 {d12}, [r3:64]! + vshl.u64 d8, d5, #50 + vsri.u64 d8, d5, #14 + vshl.u64 d9, d1, #36 + vsri.u64 d9, d1, #28 + vshl.u64 d10, d5, #46 + vsri.u64 d10, d5, #18 + vshl.u64 d11, d1, #30 + vsri.u64 d11, d1, #34 + veor d8, d10 + veor d9, d11 + vshl.u64 d10, d5, #23 + vsri.u64 d10, d5, #41 + vshl.u64 d11, d1, #25 + vsri.u64 d11, d1, #39 + veor d8, d10 + veor d9, d11 + vadd.i64 d0, d8 + vadd.i64 d12, d23 + vmov d8, d5 + veor d10, d2, d3 + vadd.i64 d0, d12 + vbsl d8, d6, d7 + vbsl d10, d1, d3 + vadd.i64 d0, d8 + vadd.i64 d10, d9 + vadd.i64 d4, d0 + vadd.i64 d0, d10 + # Round 8 + vld1.64 {d12}, [r3:64]! + vshl.u64 d8, d4, #50 + vsri.u64 d8, d4, #14 + vshl.u64 d9, d0, #36 + vsri.u64 d9, d0, #28 + vshl.u64 d10, d4, #46 + vsri.u64 d10, d4, #18 + vshl.u64 d11, d0, #30 + vsri.u64 d11, d0, #34 + veor d8, d10 + veor d9, d11 + vshl.u64 d10, d4, #23 + vsri.u64 d10, d4, #41 + vshl.u64 d11, d0, #25 + vsri.u64 d11, d0, #39 + veor d8, d10 + veor d9, d11 + vadd.i64 d7, d8 + vadd.i64 d12, d24 + vmov d8, d4 + veor d10, d1, d2 + vadd.i64 d7, d12 + vbsl d8, d5, d6 + vbsl d10, d0, d2 + vadd.i64 d7, d8 + vadd.i64 d10, d9 + vadd.i64 d3, d7 + vadd.i64 d7, d10 + # Round 9 + vld1.64 {d12}, [r3:64]! + vshl.u64 d8, d3, #50 + vsri.u64 d8, d3, #14 + vshl.u64 d9, d7, #36 + vsri.u64 d9, d7, #28 + vshl.u64 d10, d3, #46 + vsri.u64 d10, d3, #18 + vshl.u64 d11, d7, #30 + vsri.u64 d11, d7, #34 + veor d8, d10 + veor d9, d11 + vshl.u64 d10, d3, #23 + vsri.u64 d10, d3, #41 + vshl.u64 d11, d7, #25 + vsri.u64 d11, d7, #39 + veor d8, d10 + veor d9, d11 + vadd.i64 d6, d8 + vadd.i64 d12, d25 + vmov d8, d3 + veor d10, d0, d1 + vadd.i64 d6, d12 + vbsl d8, d4, d5 + vbsl d10, d7, d1 + vadd.i64 d6, d8 + vadd.i64 d10, d9 + vadd.i64 d2, d6 + vadd.i64 d6, d10 + # Round 10 + vld1.64 {d12}, [r3:64]! + vshl.u64 d8, d2, #50 + vsri.u64 d8, d2, #14 + vshl.u64 d9, d6, #36 + vsri.u64 d9, d6, #28 + vshl.u64 d10, d2, #46 + vsri.u64 d10, d2, #18 + vshl.u64 d11, d6, #30 + vsri.u64 d11, d6, #34 + veor d8, d10 + veor d9, d11 + vshl.u64 d10, d2, #23 + vsri.u64 d10, d2, #41 + vshl.u64 d11, d6, #25 + vsri.u64 d11, d6, #39 + veor d8, d10 + veor d9, d11 + vadd.i64 d5, d8 + vadd.i64 d12, d26 + vmov d8, d2 + veor d10, d7, d0 + vadd.i64 d5, d12 + vbsl d8, d3, d4 + vbsl d10, d6, d0 + vadd.i64 d5, d8 + vadd.i64 d10, d9 + vadd.i64 d1, d5 + vadd.i64 d5, d10 + # Round 11 + vld1.64 {d12}, [r3:64]! + vshl.u64 d8, d1, #50 + vsri.u64 d8, d1, #14 + vshl.u64 d9, d5, #36 + vsri.u64 d9, d5, #28 + vshl.u64 d10, d1, #46 + vsri.u64 d10, d1, #18 + vshl.u64 d11, d5, #30 + vsri.u64 d11, d5, #34 + veor d8, d10 + veor d9, d11 + vshl.u64 d10, d1, #23 + vsri.u64 d10, d1, #41 + vshl.u64 d11, d5, #25 + vsri.u64 d11, d5, #39 + veor d8, d10 + veor d9, d11 + vadd.i64 d4, d8 + vadd.i64 d12, d27 + vmov d8, d1 + veor d10, d6, d7 + vadd.i64 d4, d12 + vbsl d8, d2, d3 + vbsl d10, d5, d7 + vadd.i64 d4, d8 + vadd.i64 d10, d9 + vadd.i64 d0, d4 + vadd.i64 d4, d10 + # Round 12 + vld1.64 {d12}, [r3:64]! + vshl.u64 d8, d0, #50 + vsri.u64 d8, d0, #14 + vshl.u64 d9, d4, #36 + vsri.u64 d9, d4, #28 + vshl.u64 d10, d0, #46 + vsri.u64 d10, d0, #18 + vshl.u64 d11, d4, #30 + vsri.u64 d11, d4, #34 + veor d8, d10 + veor d9, d11 + vshl.u64 d10, d0, #23 + vsri.u64 d10, d0, #41 + vshl.u64 d11, d4, #25 + vsri.u64 d11, d4, #39 + veor d8, d10 + veor d9, d11 + vadd.i64 d3, d8 + vadd.i64 d12, d28 + vmov d8, d0 + veor d10, d5, d6 + vadd.i64 d3, d12 + vbsl d8, d1, d2 + vbsl d10, d4, d6 + vadd.i64 d3, d8 + vadd.i64 d10, d9 + vadd.i64 d7, d3 + vadd.i64 d3, d10 + # Round 13 + vld1.64 {d12}, [r3:64]! + vshl.u64 d8, d7, #50 + vsri.u64 d8, d7, #14 + vshl.u64 d9, d3, #36 + vsri.u64 d9, d3, #28 + vshl.u64 d10, d7, #46 + vsri.u64 d10, d7, #18 + vshl.u64 d11, d3, #30 + vsri.u64 d11, d3, #34 + veor d8, d10 + veor d9, d11 + vshl.u64 d10, d7, #23 + vsri.u64 d10, d7, #41 + vshl.u64 d11, d3, #25 + vsri.u64 d11, d3, #39 + veor d8, d10 + veor d9, d11 + vadd.i64 d2, d8 + vadd.i64 d12, d29 + vmov d8, d7 + veor d10, d4, d5 + vadd.i64 d2, d12 + vbsl d8, d0, d1 + vbsl d10, d3, d5 + vadd.i64 d2, d8 + vadd.i64 d10, d9 + vadd.i64 d6, d2 + vadd.i64 d2, d10 + # Round 14 + vld1.64 {d12}, [r3:64]! + vshl.u64 d8, d6, #50 + vsri.u64 d8, d6, #14 + vshl.u64 d9, d2, #36 + vsri.u64 d9, d2, #28 + vshl.u64 d10, d6, #46 + vsri.u64 d10, d6, #18 + vshl.u64 d11, d2, #30 + vsri.u64 d11, d2, #34 + veor d8, d10 + veor d9, d11 + vshl.u64 d10, d6, #23 + vsri.u64 d10, d6, #41 + vshl.u64 d11, d2, #25 + vsri.u64 d11, d2, #39 + veor d8, d10 + veor d9, d11 + vadd.i64 d1, d8 + vadd.i64 d12, d30 + vmov d8, d6 + veor d10, d3, d4 + vadd.i64 d1, d12 + vbsl d8, d7, d0 + vbsl d10, d2, d4 + vadd.i64 d1, d8 + vadd.i64 d10, d9 + vadd.i64 d5, d1 + vadd.i64 d1, d10 + # Round 15 + vld1.64 {d12}, [r3:64]! + vshl.u64 d8, d5, #50 + vsri.u64 d8, d5, #14 + vshl.u64 d9, d1, #36 + vsri.u64 d9, d1, #28 + vshl.u64 d10, d5, #46 + vsri.u64 d10, d5, #18 + vshl.u64 d11, d1, #30 + vsri.u64 d11, d1, #34 + veor d8, d10 + veor d9, d11 + vshl.u64 d10, d5, #23 + vsri.u64 d10, d5, #41 + vshl.u64 d11, d1, #25 + vsri.u64 d11, d1, #39 + veor d8, d10 + veor d9, d11 + vadd.i64 d0, d8 + vadd.i64 d12, d31 + vmov d8, d5 + veor d10, d2, d3 + vadd.i64 d0, d12 + vbsl d8, d6, d7 + vbsl d10, d1, d3 + vadd.i64 d0, d8 + vadd.i64 d10, d9 + vadd.i64 d4, d0 + vadd.i64 d0, d10 + # Add in digest from start + vldm.64 r0, {d8-d15} + vadd.i64 q0, q0, q4 + vadd.i64 q1, q1, q5 + vadd.i64 q2, q2, q6 + vadd.i64 q3, q3, q7 + vstm.64 r0, {d0-d7} + subs r2, r2, #0x80 + bne L_sha512_len_neon_begin + vpop {d8-d15} + bx lr + .size Transform_Sha512_Len,.-Transform_Sha512_Len +#endif /* !WOLFSSL_ARMASM_NO_NEON */ +#endif /* !__aarch64__ */ +#endif /* WOLFSSL_ARMASM */ diff --git a/client/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha512-asm.c b/client/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha512-asm.c new file mode 100644 index 0000000..c502a39 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha512-asm.c @@ -0,0 +1,4783 @@ +/* armv8-32-sha512-asm + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* Generated using (from wolfssl): + * cd ../scripts + * ruby ./sha2/sha512.rb arm32 ../wolfssl/wolfcrypt/src/port/arm/armv8-32-sha512-asm.c + */ + +#ifndef __aarch64__ +#include + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef WOLFSSL_ARMASM +#include + +#ifdef WOLFSSL_ARMASM_NO_NEON +static const uint64_t L_SHA512_transform_len_k[] = { + 0x428a2f98d728ae22UL, + 0x7137449123ef65cdUL, + 0xb5c0fbcfec4d3b2fUL, + 0xe9b5dba58189dbbcUL, + 0x3956c25bf348b538UL, + 0x59f111f1b605d019UL, + 0x923f82a4af194f9bUL, + 0xab1c5ed5da6d8118UL, + 0xd807aa98a3030242UL, + 0x12835b0145706fbeUL, + 0x243185be4ee4b28cUL, + 0x550c7dc3d5ffb4e2UL, + 0x72be5d74f27b896fUL, + 0x80deb1fe3b1696b1UL, + 0x9bdc06a725c71235UL, + 0xc19bf174cf692694UL, + 0xe49b69c19ef14ad2UL, + 0xefbe4786384f25e3UL, + 0xfc19dc68b8cd5b5UL, + 0x240ca1cc77ac9c65UL, + 0x2de92c6f592b0275UL, + 0x4a7484aa6ea6e483UL, + 0x5cb0a9dcbd41fbd4UL, + 0x76f988da831153b5UL, + 0x983e5152ee66dfabUL, + 0xa831c66d2db43210UL, + 0xb00327c898fb213fUL, + 0xbf597fc7beef0ee4UL, + 0xc6e00bf33da88fc2UL, + 0xd5a79147930aa725UL, + 0x6ca6351e003826fUL, + 0x142929670a0e6e70UL, + 0x27b70a8546d22ffcUL, + 0x2e1b21385c26c926UL, + 0x4d2c6dfc5ac42aedUL, + 0x53380d139d95b3dfUL, + 0x650a73548baf63deUL, + 0x766a0abb3c77b2a8UL, + 0x81c2c92e47edaee6UL, + 0x92722c851482353bUL, + 0xa2bfe8a14cf10364UL, + 0xa81a664bbc423001UL, + 0xc24b8b70d0f89791UL, + 0xc76c51a30654be30UL, + 0xd192e819d6ef5218UL, + 0xd69906245565a910UL, + 0xf40e35855771202aUL, + 0x106aa07032bbd1b8UL, + 0x19a4c116b8d2d0c8UL, + 0x1e376c085141ab53UL, + 0x2748774cdf8eeb99UL, + 0x34b0bcb5e19b48a8UL, + 0x391c0cb3c5c95a63UL, + 0x4ed8aa4ae3418acbUL, + 0x5b9cca4f7763e373UL, + 0x682e6ff3d6b2b8a3UL, + 0x748f82ee5defb2fcUL, + 0x78a5636f43172f60UL, + 0x84c87814a1f0ab72UL, + 0x8cc702081a6439ecUL, + 0x90befffa23631e28UL, + 0xa4506cebde82bde9UL, + 0xbef9a3f7b2c67915UL, + 0xc67178f2e372532bUL, + 0xca273eceea26619cUL, + 0xd186b8c721c0c207UL, + 0xeada7dd6cde0eb1eUL, + 0xf57d4f7fee6ed178UL, + 0x6f067aa72176fbaUL, + 0xa637dc5a2c898a6UL, + 0x113f9804bef90daeUL, + 0x1b710b35131c471bUL, + 0x28db77f523047d84UL, + 0x32caab7b40c72493UL, + 0x3c9ebe0a15c9bebcUL, + 0x431d67c49c100d4cUL, + 0x4cc5d4becb3e42b6UL, + 0x597f299cfc657e2aUL, + 0x5fcb6fab3ad6faecUL, + 0x6c44198c4a475817UL, +}; + +void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len) +{ + __asm__ __volatile__ ( + "sub sp, sp, #0xc0\n\t" + "mov r3, %[L_SHA512_transform_len_k]\n\t" + /* Copy digest to add in at end */ + "ldrd r12, lr, [%[sha512]]\n\t" + "ldrd r4, r5, [%[sha512], #8]\n\t" + "ldrd r6, r7, [%[sha512], #16]\n\t" + "ldrd r8, r9, [%[sha512], #24]\n\t" + "strd r12, lr, [sp, #128]\n\t" + "strd r4, r5, [sp, #136]\n\t" + "strd r6, r7, [sp, #144]\n\t" + "strd r8, r9, [sp, #152]\n\t" + "ldrd r12, lr, [%[sha512], #32]\n\t" + "ldrd r4, r5, [%[sha512], #40]\n\t" + "ldrd r6, r7, [%[sha512], #48]\n\t" + "ldrd r8, r9, [%[sha512], #56]\n\t" + "strd r12, lr, [sp, #160]\n\t" + "strd r4, r5, [sp, #168]\n\t" + "strd r6, r7, [sp, #176]\n\t" + "strd r8, r9, [sp, #184]\n\t" + /* Start of loop processing a block */ + "\n" + "L_sha512_len_neon_begin_%=: \n\t" + /* Load, Reverse and Store W */ + "ldrd r12, lr, [%[data]]\n\t" + "ldrd r4, r5, [%[data], #8]\n\t" + "ldrd r6, r7, [%[data], #16]\n\t" + "ldrd r8, r9, [%[data], #24]\n\t" + "rev r12, r12\n\t" + "rev lr, lr\n\t" + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" + "rev r8, r8\n\t" + "rev r9, r9\n\t" + "str lr, [sp]\n\t" + "str r12, [sp, #4]\n\t" + "str r5, [sp, #8]\n\t" + "str r4, [sp, #12]\n\t" + "str r7, [sp, #16]\n\t" + "str r6, [sp, #20]\n\t" + "str r9, [sp, #24]\n\t" + "str r8, [sp, #28]\n\t" + "ldrd r12, lr, [%[data], #32]\n\t" + "ldrd r4, r5, [%[data], #40]\n\t" + "ldrd r6, r7, [%[data], #48]\n\t" + "ldrd r8, r9, [%[data], #56]\n\t" + "rev r12, r12\n\t" + "rev lr, lr\n\t" + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" + "rev r8, r8\n\t" + "rev r9, r9\n\t" + "str lr, [sp, #32]\n\t" + "str r12, [sp, #36]\n\t" + "str r5, [sp, #40]\n\t" + "str r4, [sp, #44]\n\t" + "str r7, [sp, #48]\n\t" + "str r6, [sp, #52]\n\t" + "str r9, [sp, #56]\n\t" + "str r8, [sp, #60]\n\t" + "ldrd r12, lr, [%[data], #64]\n\t" + "ldrd r4, r5, [%[data], #72]\n\t" + "ldrd r6, r7, [%[data], #80]\n\t" + "ldrd r8, r9, [%[data], #88]\n\t" + "rev r12, r12\n\t" + "rev lr, lr\n\t" + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" + "rev r8, r8\n\t" + "rev r9, r9\n\t" + "str lr, [sp, #64]\n\t" + "str r12, [sp, #68]\n\t" + "str r5, [sp, #72]\n\t" + "str r4, [sp, #76]\n\t" + "str r7, [sp, #80]\n\t" + "str r6, [sp, #84]\n\t" + "str r9, [sp, #88]\n\t" + "str r8, [sp, #92]\n\t" + "ldrd r12, lr, [%[data], #96]\n\t" + "ldrd r4, r5, [%[data], #104]\n\t" + "ldrd r6, r7, [%[data], #112]\n\t" + "ldrd r8, r9, [%[data], #120]\n\t" + "rev r12, r12\n\t" + "rev lr, lr\n\t" + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" + "rev r8, r8\n\t" + "rev r9, r9\n\t" + "str lr, [sp, #96]\n\t" + "str r12, [sp, #100]\n\t" + "str r5, [sp, #104]\n\t" + "str r4, [sp, #108]\n\t" + "str r7, [sp, #112]\n\t" + "str r6, [sp, #116]\n\t" + "str r9, [sp, #120]\n\t" + "str r8, [sp, #124]\n\t" + /* Pre-calc: b ^ c */ + "ldrd r8, r9, [%[sha512], #8]\n\t" + "ldrd r12, lr, [%[sha512], #16]\n\t" + "eor r8, r8, r12\n\t" + "eor r9, r9, lr\n\t" + "mov r10, #4\n\t" + /* Start of 16 rounds */ + "\n" + "L_sha512_len_neon_start_%=: \n\t" + /* Round 0 */ + "ldrd r12, lr, [%[sha512], #32]\n\t" + "lsrs r4, r12, #14\n\t" + "lsrs r5, lr, #14\n\t" + "orr r5, r5, r12, lsl 18\n\t" + "orr r4, r4, lr, lsl 18\n\t" + "lsrs r6, r12, #18\n\t" + "lsrs r7, lr, #18\n\t" + "orr r7, r7, r12, lsl 14\n\t" + "orr r6, r6, lr, lsl 14\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #23\n\t" + "lsls r7, lr, #23\n\t" + "orr r7, r7, r12, lsr 9\n\t" + "orr r6, r6, lr, lsr 9\n\t" + "ldrd r12, lr, [%[sha512], #56]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #56]\n\t" + "ldrd r12, lr, [%[sha512], #32]\n\t" + "ldrd r4, r5, [%[sha512], #40]\n\t" + "ldrd r6, r7, [%[sha512], #48]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "and r4, r4, r12\n\t" + "and r5, r5, lr\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "ldrd r12, lr, [%[sha512], #56]\n\t" + "ldrd r6, r7, [sp]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r4, r5, [r3]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "ldrd r6, r7, [%[sha512], #24]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #56]\n\t" + "adds r6, r6, r12\n\t" + "adc r7, r7, lr\n\t" + "ldrd r12, lr, [%[sha512]]\n\t" + "strd r6, r7, [%[sha512], #24]\n\t" + "lsrs r4, r12, #28\n\t" + "lsrs r5, lr, #28\n\t" + "orr r5, r5, r12, lsl 4\n\t" + "orr r4, r4, lr, lsl 4\n\t" + "lsls r6, r12, #30\n\t" + "lsls r7, lr, #30\n\t" + "orr r7, r7, r12, lsr 2\n\t" + "orr r6, r6, lr, lsr 2\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #25\n\t" + "lsls r7, lr, #25\n\t" + "orr r7, r7, r12, lsr 7\n\t" + "orr r6, r6, lr, lsr 7\n\t" + "ldrd r12, lr, [%[sha512], #56]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r6, r7, [%[sha512]]\n\t" + "ldrd r4, r5, [%[sha512], #8]\n\t" + "strd r12, lr, [%[sha512], #56]\n\t" + "eor r6, r6, r4\n\t" + "eor r7, r7, r5\n\t" + "and r8, r8, r6\n\t" + "and r9, r9, r7\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "ldrd r4, r5, [%[sha512], #56]\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r4, r5, [%[sha512], #56]\n\t" + "mov r8, r6\n\t" + "mov r9, r7\n\t" + /* Calc new W[0] */ + "ldrd r12, lr, [sp, #112]\n\t" + "lsrs r4, r12, #19\n\t" + "lsrs r5, lr, #19\n\t" + "orr r5, r5, r12, lsl 13\n\t" + "orr r4, r4, lr, lsl 13\n\t" + "lsls r6, r12, #3\n\t" + "lsls r7, lr, #3\n\t" + "orr r7, r7, r12, lsr 29\n\t" + "orr r6, r6, lr, lsr 29\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "lsrs r6, r12, #6\n\t" + "lsrs r7, lr, #6\n\t" + "orr r6, r6, lr, lsl 26\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "ldrd r12, lr, [sp]\n\t" + "ldrd r6, r7, [sp, #72]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "strd r12, lr, [sp]\n\t" + "ldrd r12, lr, [sp, #8]\n\t" + "lsrs r4, r12, #1\n\t" + "lsrs r5, lr, #1\n\t" + "orr r5, r5, r12, lsl 31\n\t" + "orr r4, r4, lr, lsl 31\n\t" + "lsrs r6, r12, #8\n\t" + "lsrs r7, lr, #8\n\t" + "orr r7, r7, r12, lsl 24\n\t" + "orr r6, r6, lr, lsl 24\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "lsrs r6, r12, #7\n\t" + "lsrs r7, lr, #7\n\t" + "orr r6, r6, lr, lsl 25\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "ldrd r12, lr, [sp]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [sp]\n\t" + /* Round 1 */ + "ldrd r12, lr, [%[sha512], #24]\n\t" + "lsrs r4, r12, #14\n\t" + "lsrs r5, lr, #14\n\t" + "orr r5, r5, r12, lsl 18\n\t" + "orr r4, r4, lr, lsl 18\n\t" + "lsrs r6, r12, #18\n\t" + "lsrs r7, lr, #18\n\t" + "orr r7, r7, r12, lsl 14\n\t" + "orr r6, r6, lr, lsl 14\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #23\n\t" + "lsls r7, lr, #23\n\t" + "orr r7, r7, r12, lsr 9\n\t" + "orr r6, r6, lr, lsr 9\n\t" + "ldrd r12, lr, [%[sha512], #48]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #48]\n\t" + "ldrd r12, lr, [%[sha512], #24]\n\t" + "ldrd r4, r5, [%[sha512], #32]\n\t" + "ldrd r6, r7, [%[sha512], #40]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "and r4, r4, r12\n\t" + "and r5, r5, lr\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "ldrd r12, lr, [%[sha512], #48]\n\t" + "ldrd r6, r7, [sp, #8]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r4, r5, [r3, #8]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "ldrd r6, r7, [%[sha512], #16]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #48]\n\t" + "adds r6, r6, r12\n\t" + "adc r7, r7, lr\n\t" + "ldrd r12, lr, [%[sha512], #56]\n\t" + "strd r6, r7, [%[sha512], #16]\n\t" + "lsrs r4, r12, #28\n\t" + "lsrs r5, lr, #28\n\t" + "orr r5, r5, r12, lsl 4\n\t" + "orr r4, r4, lr, lsl 4\n\t" + "lsls r6, r12, #30\n\t" + "lsls r7, lr, #30\n\t" + "orr r7, r7, r12, lsr 2\n\t" + "orr r6, r6, lr, lsr 2\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #25\n\t" + "lsls r7, lr, #25\n\t" + "orr r7, r7, r12, lsr 7\n\t" + "orr r6, r6, lr, lsr 7\n\t" + "ldrd r12, lr, [%[sha512], #48]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r6, r7, [%[sha512], #56]\n\t" + "ldrd r4, r5, [%[sha512]]\n\t" + "strd r12, lr, [%[sha512], #48]\n\t" + "eor r6, r6, r4\n\t" + "eor r7, r7, r5\n\t" + "and r8, r8, r6\n\t" + "and r9, r9, r7\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "ldrd r4, r5, [%[sha512], #48]\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r4, r5, [%[sha512], #48]\n\t" + "mov r8, r6\n\t" + "mov r9, r7\n\t" + /* Calc new W[1] */ + "ldrd r12, lr, [sp, #120]\n\t" + "lsrs r4, r12, #19\n\t" + "lsrs r5, lr, #19\n\t" + "orr r5, r5, r12, lsl 13\n\t" + "orr r4, r4, lr, lsl 13\n\t" + "lsls r6, r12, #3\n\t" + "lsls r7, lr, #3\n\t" + "orr r7, r7, r12, lsr 29\n\t" + "orr r6, r6, lr, lsr 29\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "lsrs r6, r12, #6\n\t" + "lsrs r7, lr, #6\n\t" + "orr r6, r6, lr, lsl 26\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "ldrd r12, lr, [sp, #8]\n\t" + "ldrd r6, r7, [sp, #80]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "strd r12, lr, [sp, #8]\n\t" + "ldrd r12, lr, [sp, #16]\n\t" + "lsrs r4, r12, #1\n\t" + "lsrs r5, lr, #1\n\t" + "orr r5, r5, r12, lsl 31\n\t" + "orr r4, r4, lr, lsl 31\n\t" + "lsrs r6, r12, #8\n\t" + "lsrs r7, lr, #8\n\t" + "orr r7, r7, r12, lsl 24\n\t" + "orr r6, r6, lr, lsl 24\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "lsrs r6, r12, #7\n\t" + "lsrs r7, lr, #7\n\t" + "orr r6, r6, lr, lsl 25\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "ldrd r12, lr, [sp, #8]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [sp, #8]\n\t" + /* Round 2 */ + "ldrd r12, lr, [%[sha512], #16]\n\t" + "lsrs r4, r12, #14\n\t" + "lsrs r5, lr, #14\n\t" + "orr r5, r5, r12, lsl 18\n\t" + "orr r4, r4, lr, lsl 18\n\t" + "lsrs r6, r12, #18\n\t" + "lsrs r7, lr, #18\n\t" + "orr r7, r7, r12, lsl 14\n\t" + "orr r6, r6, lr, lsl 14\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #23\n\t" + "lsls r7, lr, #23\n\t" + "orr r7, r7, r12, lsr 9\n\t" + "orr r6, r6, lr, lsr 9\n\t" + "ldrd r12, lr, [%[sha512], #40]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #40]\n\t" + "ldrd r12, lr, [%[sha512], #16]\n\t" + "ldrd r4, r5, [%[sha512], #24]\n\t" + "ldrd r6, r7, [%[sha512], #32]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "and r4, r4, r12\n\t" + "and r5, r5, lr\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "ldrd r12, lr, [%[sha512], #40]\n\t" + "ldrd r6, r7, [sp, #16]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r4, r5, [r3, #16]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "ldrd r6, r7, [%[sha512], #8]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #40]\n\t" + "adds r6, r6, r12\n\t" + "adc r7, r7, lr\n\t" + "ldrd r12, lr, [%[sha512], #48]\n\t" + "strd r6, r7, [%[sha512], #8]\n\t" + "lsrs r4, r12, #28\n\t" + "lsrs r5, lr, #28\n\t" + "orr r5, r5, r12, lsl 4\n\t" + "orr r4, r4, lr, lsl 4\n\t" + "lsls r6, r12, #30\n\t" + "lsls r7, lr, #30\n\t" + "orr r7, r7, r12, lsr 2\n\t" + "orr r6, r6, lr, lsr 2\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #25\n\t" + "lsls r7, lr, #25\n\t" + "orr r7, r7, r12, lsr 7\n\t" + "orr r6, r6, lr, lsr 7\n\t" + "ldrd r12, lr, [%[sha512], #40]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r6, r7, [%[sha512], #48]\n\t" + "ldrd r4, r5, [%[sha512], #56]\n\t" + "strd r12, lr, [%[sha512], #40]\n\t" + "eor r6, r6, r4\n\t" + "eor r7, r7, r5\n\t" + "and r8, r8, r6\n\t" + "and r9, r9, r7\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "ldrd r4, r5, [%[sha512], #40]\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r4, r5, [%[sha512], #40]\n\t" + "mov r8, r6\n\t" + "mov r9, r7\n\t" + /* Calc new W[2] */ + "ldrd r12, lr, [sp]\n\t" + "lsrs r4, r12, #19\n\t" + "lsrs r5, lr, #19\n\t" + "orr r5, r5, r12, lsl 13\n\t" + "orr r4, r4, lr, lsl 13\n\t" + "lsls r6, r12, #3\n\t" + "lsls r7, lr, #3\n\t" + "orr r7, r7, r12, lsr 29\n\t" + "orr r6, r6, lr, lsr 29\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "lsrs r6, r12, #6\n\t" + "lsrs r7, lr, #6\n\t" + "orr r6, r6, lr, lsl 26\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "ldrd r12, lr, [sp, #16]\n\t" + "ldrd r6, r7, [sp, #88]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "strd r12, lr, [sp, #16]\n\t" + "ldrd r12, lr, [sp, #24]\n\t" + "lsrs r4, r12, #1\n\t" + "lsrs r5, lr, #1\n\t" + "orr r5, r5, r12, lsl 31\n\t" + "orr r4, r4, lr, lsl 31\n\t" + "lsrs r6, r12, #8\n\t" + "lsrs r7, lr, #8\n\t" + "orr r7, r7, r12, lsl 24\n\t" + "orr r6, r6, lr, lsl 24\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "lsrs r6, r12, #7\n\t" + "lsrs r7, lr, #7\n\t" + "orr r6, r6, lr, lsl 25\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "ldrd r12, lr, [sp, #16]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [sp, #16]\n\t" + /* Round 3 */ + "ldrd r12, lr, [%[sha512], #8]\n\t" + "lsrs r4, r12, #14\n\t" + "lsrs r5, lr, #14\n\t" + "orr r5, r5, r12, lsl 18\n\t" + "orr r4, r4, lr, lsl 18\n\t" + "lsrs r6, r12, #18\n\t" + "lsrs r7, lr, #18\n\t" + "orr r7, r7, r12, lsl 14\n\t" + "orr r6, r6, lr, lsl 14\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #23\n\t" + "lsls r7, lr, #23\n\t" + "orr r7, r7, r12, lsr 9\n\t" + "orr r6, r6, lr, lsr 9\n\t" + "ldrd r12, lr, [%[sha512], #32]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #32]\n\t" + "ldrd r12, lr, [%[sha512], #8]\n\t" + "ldrd r4, r5, [%[sha512], #16]\n\t" + "ldrd r6, r7, [%[sha512], #24]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "and r4, r4, r12\n\t" + "and r5, r5, lr\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "ldrd r12, lr, [%[sha512], #32]\n\t" + "ldrd r6, r7, [sp, #24]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r4, r5, [r3, #24]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "ldrd r6, r7, [%[sha512]]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #32]\n\t" + "adds r6, r6, r12\n\t" + "adc r7, r7, lr\n\t" + "ldrd r12, lr, [%[sha512], #40]\n\t" + "strd r6, r7, [%[sha512]]\n\t" + "lsrs r4, r12, #28\n\t" + "lsrs r5, lr, #28\n\t" + "orr r5, r5, r12, lsl 4\n\t" + "orr r4, r4, lr, lsl 4\n\t" + "lsls r6, r12, #30\n\t" + "lsls r7, lr, #30\n\t" + "orr r7, r7, r12, lsr 2\n\t" + "orr r6, r6, lr, lsr 2\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #25\n\t" + "lsls r7, lr, #25\n\t" + "orr r7, r7, r12, lsr 7\n\t" + "orr r6, r6, lr, lsr 7\n\t" + "ldrd r12, lr, [%[sha512], #32]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r6, r7, [%[sha512], #40]\n\t" + "ldrd r4, r5, [%[sha512], #48]\n\t" + "strd r12, lr, [%[sha512], #32]\n\t" + "eor r6, r6, r4\n\t" + "eor r7, r7, r5\n\t" + "and r8, r8, r6\n\t" + "and r9, r9, r7\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "ldrd r4, r5, [%[sha512], #32]\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r4, r5, [%[sha512], #32]\n\t" + "mov r8, r6\n\t" + "mov r9, r7\n\t" + /* Calc new W[3] */ + "ldrd r12, lr, [sp, #8]\n\t" + "lsrs r4, r12, #19\n\t" + "lsrs r5, lr, #19\n\t" + "orr r5, r5, r12, lsl 13\n\t" + "orr r4, r4, lr, lsl 13\n\t" + "lsls r6, r12, #3\n\t" + "lsls r7, lr, #3\n\t" + "orr r7, r7, r12, lsr 29\n\t" + "orr r6, r6, lr, lsr 29\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "lsrs r6, r12, #6\n\t" + "lsrs r7, lr, #6\n\t" + "orr r6, r6, lr, lsl 26\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "ldrd r12, lr, [sp, #24]\n\t" + "ldrd r6, r7, [sp, #96]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "strd r12, lr, [sp, #24]\n\t" + "ldrd r12, lr, [sp, #32]\n\t" + "lsrs r4, r12, #1\n\t" + "lsrs r5, lr, #1\n\t" + "orr r5, r5, r12, lsl 31\n\t" + "orr r4, r4, lr, lsl 31\n\t" + "lsrs r6, r12, #8\n\t" + "lsrs r7, lr, #8\n\t" + "orr r7, r7, r12, lsl 24\n\t" + "orr r6, r6, lr, lsl 24\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "lsrs r6, r12, #7\n\t" + "lsrs r7, lr, #7\n\t" + "orr r6, r6, lr, lsl 25\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "ldrd r12, lr, [sp, #24]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [sp, #24]\n\t" + /* Round 4 */ + "ldrd r12, lr, [%[sha512]]\n\t" + "lsrs r4, r12, #14\n\t" + "lsrs r5, lr, #14\n\t" + "orr r5, r5, r12, lsl 18\n\t" + "orr r4, r4, lr, lsl 18\n\t" + "lsrs r6, r12, #18\n\t" + "lsrs r7, lr, #18\n\t" + "orr r7, r7, r12, lsl 14\n\t" + "orr r6, r6, lr, lsl 14\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #23\n\t" + "lsls r7, lr, #23\n\t" + "orr r7, r7, r12, lsr 9\n\t" + "orr r6, r6, lr, lsr 9\n\t" + "ldrd r12, lr, [%[sha512], #24]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #24]\n\t" + "ldrd r12, lr, [%[sha512]]\n\t" + "ldrd r4, r5, [%[sha512], #8]\n\t" + "ldrd r6, r7, [%[sha512], #16]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "and r4, r4, r12\n\t" + "and r5, r5, lr\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "ldrd r12, lr, [%[sha512], #24]\n\t" + "ldrd r6, r7, [sp, #32]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r4, r5, [r3, #32]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "ldrd r6, r7, [%[sha512], #56]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #24]\n\t" + "adds r6, r6, r12\n\t" + "adc r7, r7, lr\n\t" + "ldrd r12, lr, [%[sha512], #32]\n\t" + "strd r6, r7, [%[sha512], #56]\n\t" + "lsrs r4, r12, #28\n\t" + "lsrs r5, lr, #28\n\t" + "orr r5, r5, r12, lsl 4\n\t" + "orr r4, r4, lr, lsl 4\n\t" + "lsls r6, r12, #30\n\t" + "lsls r7, lr, #30\n\t" + "orr r7, r7, r12, lsr 2\n\t" + "orr r6, r6, lr, lsr 2\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #25\n\t" + "lsls r7, lr, #25\n\t" + "orr r7, r7, r12, lsr 7\n\t" + "orr r6, r6, lr, lsr 7\n\t" + "ldrd r12, lr, [%[sha512], #24]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r6, r7, [%[sha512], #32]\n\t" + "ldrd r4, r5, [%[sha512], #40]\n\t" + "strd r12, lr, [%[sha512], #24]\n\t" + "eor r6, r6, r4\n\t" + "eor r7, r7, r5\n\t" + "and r8, r8, r6\n\t" + "and r9, r9, r7\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "ldrd r4, r5, [%[sha512], #24]\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r4, r5, [%[sha512], #24]\n\t" + "mov r8, r6\n\t" + "mov r9, r7\n\t" + /* Calc new W[4] */ + "ldrd r12, lr, [sp, #16]\n\t" + "lsrs r4, r12, #19\n\t" + "lsrs r5, lr, #19\n\t" + "orr r5, r5, r12, lsl 13\n\t" + "orr r4, r4, lr, lsl 13\n\t" + "lsls r6, r12, #3\n\t" + "lsls r7, lr, #3\n\t" + "orr r7, r7, r12, lsr 29\n\t" + "orr r6, r6, lr, lsr 29\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "lsrs r6, r12, #6\n\t" + "lsrs r7, lr, #6\n\t" + "orr r6, r6, lr, lsl 26\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "ldrd r12, lr, [sp, #32]\n\t" + "ldrd r6, r7, [sp, #104]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "strd r12, lr, [sp, #32]\n\t" + "ldrd r12, lr, [sp, #40]\n\t" + "lsrs r4, r12, #1\n\t" + "lsrs r5, lr, #1\n\t" + "orr r5, r5, r12, lsl 31\n\t" + "orr r4, r4, lr, lsl 31\n\t" + "lsrs r6, r12, #8\n\t" + "lsrs r7, lr, #8\n\t" + "orr r7, r7, r12, lsl 24\n\t" + "orr r6, r6, lr, lsl 24\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "lsrs r6, r12, #7\n\t" + "lsrs r7, lr, #7\n\t" + "orr r6, r6, lr, lsl 25\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "ldrd r12, lr, [sp, #32]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [sp, #32]\n\t" + /* Round 5 */ + "ldrd r12, lr, [%[sha512], #56]\n\t" + "lsrs r4, r12, #14\n\t" + "lsrs r5, lr, #14\n\t" + "orr r5, r5, r12, lsl 18\n\t" + "orr r4, r4, lr, lsl 18\n\t" + "lsrs r6, r12, #18\n\t" + "lsrs r7, lr, #18\n\t" + "orr r7, r7, r12, lsl 14\n\t" + "orr r6, r6, lr, lsl 14\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #23\n\t" + "lsls r7, lr, #23\n\t" + "orr r7, r7, r12, lsr 9\n\t" + "orr r6, r6, lr, lsr 9\n\t" + "ldrd r12, lr, [%[sha512], #16]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #16]\n\t" + "ldrd r12, lr, [%[sha512], #56]\n\t" + "ldrd r4, r5, [%[sha512]]\n\t" + "ldrd r6, r7, [%[sha512], #8]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "and r4, r4, r12\n\t" + "and r5, r5, lr\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "ldrd r12, lr, [%[sha512], #16]\n\t" + "ldrd r6, r7, [sp, #40]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r4, r5, [r3, #40]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "ldrd r6, r7, [%[sha512], #48]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #16]\n\t" + "adds r6, r6, r12\n\t" + "adc r7, r7, lr\n\t" + "ldrd r12, lr, [%[sha512], #24]\n\t" + "strd r6, r7, [%[sha512], #48]\n\t" + "lsrs r4, r12, #28\n\t" + "lsrs r5, lr, #28\n\t" + "orr r5, r5, r12, lsl 4\n\t" + "orr r4, r4, lr, lsl 4\n\t" + "lsls r6, r12, #30\n\t" + "lsls r7, lr, #30\n\t" + "orr r7, r7, r12, lsr 2\n\t" + "orr r6, r6, lr, lsr 2\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #25\n\t" + "lsls r7, lr, #25\n\t" + "orr r7, r7, r12, lsr 7\n\t" + "orr r6, r6, lr, lsr 7\n\t" + "ldrd r12, lr, [%[sha512], #16]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r6, r7, [%[sha512], #24]\n\t" + "ldrd r4, r5, [%[sha512], #32]\n\t" + "strd r12, lr, [%[sha512], #16]\n\t" + "eor r6, r6, r4\n\t" + "eor r7, r7, r5\n\t" + "and r8, r8, r6\n\t" + "and r9, r9, r7\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "ldrd r4, r5, [%[sha512], #16]\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r4, r5, [%[sha512], #16]\n\t" + "mov r8, r6\n\t" + "mov r9, r7\n\t" + /* Calc new W[5] */ + "ldrd r12, lr, [sp, #24]\n\t" + "lsrs r4, r12, #19\n\t" + "lsrs r5, lr, #19\n\t" + "orr r5, r5, r12, lsl 13\n\t" + "orr r4, r4, lr, lsl 13\n\t" + "lsls r6, r12, #3\n\t" + "lsls r7, lr, #3\n\t" + "orr r7, r7, r12, lsr 29\n\t" + "orr r6, r6, lr, lsr 29\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "lsrs r6, r12, #6\n\t" + "lsrs r7, lr, #6\n\t" + "orr r6, r6, lr, lsl 26\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "ldrd r12, lr, [sp, #40]\n\t" + "ldrd r6, r7, [sp, #112]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "strd r12, lr, [sp, #40]\n\t" + "ldrd r12, lr, [sp, #48]\n\t" + "lsrs r4, r12, #1\n\t" + "lsrs r5, lr, #1\n\t" + "orr r5, r5, r12, lsl 31\n\t" + "orr r4, r4, lr, lsl 31\n\t" + "lsrs r6, r12, #8\n\t" + "lsrs r7, lr, #8\n\t" + "orr r7, r7, r12, lsl 24\n\t" + "orr r6, r6, lr, lsl 24\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "lsrs r6, r12, #7\n\t" + "lsrs r7, lr, #7\n\t" + "orr r6, r6, lr, lsl 25\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "ldrd r12, lr, [sp, #40]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [sp, #40]\n\t" + /* Round 6 */ + "ldrd r12, lr, [%[sha512], #48]\n\t" + "lsrs r4, r12, #14\n\t" + "lsrs r5, lr, #14\n\t" + "orr r5, r5, r12, lsl 18\n\t" + "orr r4, r4, lr, lsl 18\n\t" + "lsrs r6, r12, #18\n\t" + "lsrs r7, lr, #18\n\t" + "orr r7, r7, r12, lsl 14\n\t" + "orr r6, r6, lr, lsl 14\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #23\n\t" + "lsls r7, lr, #23\n\t" + "orr r7, r7, r12, lsr 9\n\t" + "orr r6, r6, lr, lsr 9\n\t" + "ldrd r12, lr, [%[sha512], #8]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #8]\n\t" + "ldrd r12, lr, [%[sha512], #48]\n\t" + "ldrd r4, r5, [%[sha512], #56]\n\t" + "ldrd r6, r7, [%[sha512]]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "and r4, r4, r12\n\t" + "and r5, r5, lr\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "ldrd r12, lr, [%[sha512], #8]\n\t" + "ldrd r6, r7, [sp, #48]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r4, r5, [r3, #48]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "ldrd r6, r7, [%[sha512], #40]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #8]\n\t" + "adds r6, r6, r12\n\t" + "adc r7, r7, lr\n\t" + "ldrd r12, lr, [%[sha512], #16]\n\t" + "strd r6, r7, [%[sha512], #40]\n\t" + "lsrs r4, r12, #28\n\t" + "lsrs r5, lr, #28\n\t" + "orr r5, r5, r12, lsl 4\n\t" + "orr r4, r4, lr, lsl 4\n\t" + "lsls r6, r12, #30\n\t" + "lsls r7, lr, #30\n\t" + "orr r7, r7, r12, lsr 2\n\t" + "orr r6, r6, lr, lsr 2\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #25\n\t" + "lsls r7, lr, #25\n\t" + "orr r7, r7, r12, lsr 7\n\t" + "orr r6, r6, lr, lsr 7\n\t" + "ldrd r12, lr, [%[sha512], #8]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r6, r7, [%[sha512], #16]\n\t" + "ldrd r4, r5, [%[sha512], #24]\n\t" + "strd r12, lr, [%[sha512], #8]\n\t" + "eor r6, r6, r4\n\t" + "eor r7, r7, r5\n\t" + "and r8, r8, r6\n\t" + "and r9, r9, r7\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "ldrd r4, r5, [%[sha512], #8]\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r4, r5, [%[sha512], #8]\n\t" + "mov r8, r6\n\t" + "mov r9, r7\n\t" + /* Calc new W[6] */ + "ldrd r12, lr, [sp, #32]\n\t" + "lsrs r4, r12, #19\n\t" + "lsrs r5, lr, #19\n\t" + "orr r5, r5, r12, lsl 13\n\t" + "orr r4, r4, lr, lsl 13\n\t" + "lsls r6, r12, #3\n\t" + "lsls r7, lr, #3\n\t" + "orr r7, r7, r12, lsr 29\n\t" + "orr r6, r6, lr, lsr 29\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "lsrs r6, r12, #6\n\t" + "lsrs r7, lr, #6\n\t" + "orr r6, r6, lr, lsl 26\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "ldrd r12, lr, [sp, #48]\n\t" + "ldrd r6, r7, [sp, #120]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "strd r12, lr, [sp, #48]\n\t" + "ldrd r12, lr, [sp, #56]\n\t" + "lsrs r4, r12, #1\n\t" + "lsrs r5, lr, #1\n\t" + "orr r5, r5, r12, lsl 31\n\t" + "orr r4, r4, lr, lsl 31\n\t" + "lsrs r6, r12, #8\n\t" + "lsrs r7, lr, #8\n\t" + "orr r7, r7, r12, lsl 24\n\t" + "orr r6, r6, lr, lsl 24\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "lsrs r6, r12, #7\n\t" + "lsrs r7, lr, #7\n\t" + "orr r6, r6, lr, lsl 25\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "ldrd r12, lr, [sp, #48]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [sp, #48]\n\t" + /* Round 7 */ + "ldrd r12, lr, [%[sha512], #40]\n\t" + "lsrs r4, r12, #14\n\t" + "lsrs r5, lr, #14\n\t" + "orr r5, r5, r12, lsl 18\n\t" + "orr r4, r4, lr, lsl 18\n\t" + "lsrs r6, r12, #18\n\t" + "lsrs r7, lr, #18\n\t" + "orr r7, r7, r12, lsl 14\n\t" + "orr r6, r6, lr, lsl 14\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #23\n\t" + "lsls r7, lr, #23\n\t" + "orr r7, r7, r12, lsr 9\n\t" + "orr r6, r6, lr, lsr 9\n\t" + "ldrd r12, lr, [%[sha512]]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512]]\n\t" + "ldrd r12, lr, [%[sha512], #40]\n\t" + "ldrd r4, r5, [%[sha512], #48]\n\t" + "ldrd r6, r7, [%[sha512], #56]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "and r4, r4, r12\n\t" + "and r5, r5, lr\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "ldrd r12, lr, [%[sha512]]\n\t" + "ldrd r6, r7, [sp, #56]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r4, r5, [r3, #56]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "ldrd r6, r7, [%[sha512], #32]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512]]\n\t" + "adds r6, r6, r12\n\t" + "adc r7, r7, lr\n\t" + "ldrd r12, lr, [%[sha512], #8]\n\t" + "strd r6, r7, [%[sha512], #32]\n\t" + "lsrs r4, r12, #28\n\t" + "lsrs r5, lr, #28\n\t" + "orr r5, r5, r12, lsl 4\n\t" + "orr r4, r4, lr, lsl 4\n\t" + "lsls r6, r12, #30\n\t" + "lsls r7, lr, #30\n\t" + "orr r7, r7, r12, lsr 2\n\t" + "orr r6, r6, lr, lsr 2\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #25\n\t" + "lsls r7, lr, #25\n\t" + "orr r7, r7, r12, lsr 7\n\t" + "orr r6, r6, lr, lsr 7\n\t" + "ldrd r12, lr, [%[sha512]]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r6, r7, [%[sha512], #8]\n\t" + "ldrd r4, r5, [%[sha512], #16]\n\t" + "strd r12, lr, [%[sha512]]\n\t" + "eor r6, r6, r4\n\t" + "eor r7, r7, r5\n\t" + "and r8, r8, r6\n\t" + "and r9, r9, r7\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "ldrd r4, r5, [%[sha512]]\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r4, r5, [%[sha512]]\n\t" + "mov r8, r6\n\t" + "mov r9, r7\n\t" + /* Calc new W[7] */ + "ldrd r12, lr, [sp, #40]\n\t" + "lsrs r4, r12, #19\n\t" + "lsrs r5, lr, #19\n\t" + "orr r5, r5, r12, lsl 13\n\t" + "orr r4, r4, lr, lsl 13\n\t" + "lsls r6, r12, #3\n\t" + "lsls r7, lr, #3\n\t" + "orr r7, r7, r12, lsr 29\n\t" + "orr r6, r6, lr, lsr 29\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "lsrs r6, r12, #6\n\t" + "lsrs r7, lr, #6\n\t" + "orr r6, r6, lr, lsl 26\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "ldrd r12, lr, [sp, #56]\n\t" + "ldrd r6, r7, [sp]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "strd r12, lr, [sp, #56]\n\t" + "ldrd r12, lr, [sp, #64]\n\t" + "lsrs r4, r12, #1\n\t" + "lsrs r5, lr, #1\n\t" + "orr r5, r5, r12, lsl 31\n\t" + "orr r4, r4, lr, lsl 31\n\t" + "lsrs r6, r12, #8\n\t" + "lsrs r7, lr, #8\n\t" + "orr r7, r7, r12, lsl 24\n\t" + "orr r6, r6, lr, lsl 24\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "lsrs r6, r12, #7\n\t" + "lsrs r7, lr, #7\n\t" + "orr r6, r6, lr, lsl 25\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "ldrd r12, lr, [sp, #56]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [sp, #56]\n\t" + /* Round 8 */ + "ldrd r12, lr, [%[sha512], #32]\n\t" + "lsrs r4, r12, #14\n\t" + "lsrs r5, lr, #14\n\t" + "orr r5, r5, r12, lsl 18\n\t" + "orr r4, r4, lr, lsl 18\n\t" + "lsrs r6, r12, #18\n\t" + "lsrs r7, lr, #18\n\t" + "orr r7, r7, r12, lsl 14\n\t" + "orr r6, r6, lr, lsl 14\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #23\n\t" + "lsls r7, lr, #23\n\t" + "orr r7, r7, r12, lsr 9\n\t" + "orr r6, r6, lr, lsr 9\n\t" + "ldrd r12, lr, [%[sha512], #56]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #56]\n\t" + "ldrd r12, lr, [%[sha512], #32]\n\t" + "ldrd r4, r5, [%[sha512], #40]\n\t" + "ldrd r6, r7, [%[sha512], #48]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "and r4, r4, r12\n\t" + "and r5, r5, lr\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "ldrd r12, lr, [%[sha512], #56]\n\t" + "ldrd r6, r7, [sp, #64]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r4, r5, [r3, #64]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "ldrd r6, r7, [%[sha512], #24]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #56]\n\t" + "adds r6, r6, r12\n\t" + "adc r7, r7, lr\n\t" + "ldrd r12, lr, [%[sha512]]\n\t" + "strd r6, r7, [%[sha512], #24]\n\t" + "lsrs r4, r12, #28\n\t" + "lsrs r5, lr, #28\n\t" + "orr r5, r5, r12, lsl 4\n\t" + "orr r4, r4, lr, lsl 4\n\t" + "lsls r6, r12, #30\n\t" + "lsls r7, lr, #30\n\t" + "orr r7, r7, r12, lsr 2\n\t" + "orr r6, r6, lr, lsr 2\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #25\n\t" + "lsls r7, lr, #25\n\t" + "orr r7, r7, r12, lsr 7\n\t" + "orr r6, r6, lr, lsr 7\n\t" + "ldrd r12, lr, [%[sha512], #56]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r6, r7, [%[sha512]]\n\t" + "ldrd r4, r5, [%[sha512], #8]\n\t" + "strd r12, lr, [%[sha512], #56]\n\t" + "eor r6, r6, r4\n\t" + "eor r7, r7, r5\n\t" + "and r8, r8, r6\n\t" + "and r9, r9, r7\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "ldrd r4, r5, [%[sha512], #56]\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r4, r5, [%[sha512], #56]\n\t" + "mov r8, r6\n\t" + "mov r9, r7\n\t" + /* Calc new W[8] */ + "ldrd r12, lr, [sp, #48]\n\t" + "lsrs r4, r12, #19\n\t" + "lsrs r5, lr, #19\n\t" + "orr r5, r5, r12, lsl 13\n\t" + "orr r4, r4, lr, lsl 13\n\t" + "lsls r6, r12, #3\n\t" + "lsls r7, lr, #3\n\t" + "orr r7, r7, r12, lsr 29\n\t" + "orr r6, r6, lr, lsr 29\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "lsrs r6, r12, #6\n\t" + "lsrs r7, lr, #6\n\t" + "orr r6, r6, lr, lsl 26\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "ldrd r12, lr, [sp, #64]\n\t" + "ldrd r6, r7, [sp, #8]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "strd r12, lr, [sp, #64]\n\t" + "ldrd r12, lr, [sp, #72]\n\t" + "lsrs r4, r12, #1\n\t" + "lsrs r5, lr, #1\n\t" + "orr r5, r5, r12, lsl 31\n\t" + "orr r4, r4, lr, lsl 31\n\t" + "lsrs r6, r12, #8\n\t" + "lsrs r7, lr, #8\n\t" + "orr r7, r7, r12, lsl 24\n\t" + "orr r6, r6, lr, lsl 24\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "lsrs r6, r12, #7\n\t" + "lsrs r7, lr, #7\n\t" + "orr r6, r6, lr, lsl 25\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "ldrd r12, lr, [sp, #64]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [sp, #64]\n\t" + /* Round 9 */ + "ldrd r12, lr, [%[sha512], #24]\n\t" + "lsrs r4, r12, #14\n\t" + "lsrs r5, lr, #14\n\t" + "orr r5, r5, r12, lsl 18\n\t" + "orr r4, r4, lr, lsl 18\n\t" + "lsrs r6, r12, #18\n\t" + "lsrs r7, lr, #18\n\t" + "orr r7, r7, r12, lsl 14\n\t" + "orr r6, r6, lr, lsl 14\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #23\n\t" + "lsls r7, lr, #23\n\t" + "orr r7, r7, r12, lsr 9\n\t" + "orr r6, r6, lr, lsr 9\n\t" + "ldrd r12, lr, [%[sha512], #48]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #48]\n\t" + "ldrd r12, lr, [%[sha512], #24]\n\t" + "ldrd r4, r5, [%[sha512], #32]\n\t" + "ldrd r6, r7, [%[sha512], #40]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "and r4, r4, r12\n\t" + "and r5, r5, lr\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "ldrd r12, lr, [%[sha512], #48]\n\t" + "ldrd r6, r7, [sp, #72]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r4, r5, [r3, #72]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "ldrd r6, r7, [%[sha512], #16]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #48]\n\t" + "adds r6, r6, r12\n\t" + "adc r7, r7, lr\n\t" + "ldrd r12, lr, [%[sha512], #56]\n\t" + "strd r6, r7, [%[sha512], #16]\n\t" + "lsrs r4, r12, #28\n\t" + "lsrs r5, lr, #28\n\t" + "orr r5, r5, r12, lsl 4\n\t" + "orr r4, r4, lr, lsl 4\n\t" + "lsls r6, r12, #30\n\t" + "lsls r7, lr, #30\n\t" + "orr r7, r7, r12, lsr 2\n\t" + "orr r6, r6, lr, lsr 2\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #25\n\t" + "lsls r7, lr, #25\n\t" + "orr r7, r7, r12, lsr 7\n\t" + "orr r6, r6, lr, lsr 7\n\t" + "ldrd r12, lr, [%[sha512], #48]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r6, r7, [%[sha512], #56]\n\t" + "ldrd r4, r5, [%[sha512]]\n\t" + "strd r12, lr, [%[sha512], #48]\n\t" + "eor r6, r6, r4\n\t" + "eor r7, r7, r5\n\t" + "and r8, r8, r6\n\t" + "and r9, r9, r7\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "ldrd r4, r5, [%[sha512], #48]\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r4, r5, [%[sha512], #48]\n\t" + "mov r8, r6\n\t" + "mov r9, r7\n\t" + /* Calc new W[9] */ + "ldrd r12, lr, [sp, #56]\n\t" + "lsrs r4, r12, #19\n\t" + "lsrs r5, lr, #19\n\t" + "orr r5, r5, r12, lsl 13\n\t" + "orr r4, r4, lr, lsl 13\n\t" + "lsls r6, r12, #3\n\t" + "lsls r7, lr, #3\n\t" + "orr r7, r7, r12, lsr 29\n\t" + "orr r6, r6, lr, lsr 29\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "lsrs r6, r12, #6\n\t" + "lsrs r7, lr, #6\n\t" + "orr r6, r6, lr, lsl 26\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "ldrd r12, lr, [sp, #72]\n\t" + "ldrd r6, r7, [sp, #16]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "strd r12, lr, [sp, #72]\n\t" + "ldrd r12, lr, [sp, #80]\n\t" + "lsrs r4, r12, #1\n\t" + "lsrs r5, lr, #1\n\t" + "orr r5, r5, r12, lsl 31\n\t" + "orr r4, r4, lr, lsl 31\n\t" + "lsrs r6, r12, #8\n\t" + "lsrs r7, lr, #8\n\t" + "orr r7, r7, r12, lsl 24\n\t" + "orr r6, r6, lr, lsl 24\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "lsrs r6, r12, #7\n\t" + "lsrs r7, lr, #7\n\t" + "orr r6, r6, lr, lsl 25\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "ldrd r12, lr, [sp, #72]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [sp, #72]\n\t" + /* Round 10 */ + "ldrd r12, lr, [%[sha512], #16]\n\t" + "lsrs r4, r12, #14\n\t" + "lsrs r5, lr, #14\n\t" + "orr r5, r5, r12, lsl 18\n\t" + "orr r4, r4, lr, lsl 18\n\t" + "lsrs r6, r12, #18\n\t" + "lsrs r7, lr, #18\n\t" + "orr r7, r7, r12, lsl 14\n\t" + "orr r6, r6, lr, lsl 14\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #23\n\t" + "lsls r7, lr, #23\n\t" + "orr r7, r7, r12, lsr 9\n\t" + "orr r6, r6, lr, lsr 9\n\t" + "ldrd r12, lr, [%[sha512], #40]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #40]\n\t" + "ldrd r12, lr, [%[sha512], #16]\n\t" + "ldrd r4, r5, [%[sha512], #24]\n\t" + "ldrd r6, r7, [%[sha512], #32]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "and r4, r4, r12\n\t" + "and r5, r5, lr\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "ldrd r12, lr, [%[sha512], #40]\n\t" + "ldrd r6, r7, [sp, #80]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r4, r5, [r3, #80]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "ldrd r6, r7, [%[sha512], #8]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #40]\n\t" + "adds r6, r6, r12\n\t" + "adc r7, r7, lr\n\t" + "ldrd r12, lr, [%[sha512], #48]\n\t" + "strd r6, r7, [%[sha512], #8]\n\t" + "lsrs r4, r12, #28\n\t" + "lsrs r5, lr, #28\n\t" + "orr r5, r5, r12, lsl 4\n\t" + "orr r4, r4, lr, lsl 4\n\t" + "lsls r6, r12, #30\n\t" + "lsls r7, lr, #30\n\t" + "orr r7, r7, r12, lsr 2\n\t" + "orr r6, r6, lr, lsr 2\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #25\n\t" + "lsls r7, lr, #25\n\t" + "orr r7, r7, r12, lsr 7\n\t" + "orr r6, r6, lr, lsr 7\n\t" + "ldrd r12, lr, [%[sha512], #40]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r6, r7, [%[sha512], #48]\n\t" + "ldrd r4, r5, [%[sha512], #56]\n\t" + "strd r12, lr, [%[sha512], #40]\n\t" + "eor r6, r6, r4\n\t" + "eor r7, r7, r5\n\t" + "and r8, r8, r6\n\t" + "and r9, r9, r7\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "ldrd r4, r5, [%[sha512], #40]\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r4, r5, [%[sha512], #40]\n\t" + "mov r8, r6\n\t" + "mov r9, r7\n\t" + /* Calc new W[10] */ + "ldrd r12, lr, [sp, #64]\n\t" + "lsrs r4, r12, #19\n\t" + "lsrs r5, lr, #19\n\t" + "orr r5, r5, r12, lsl 13\n\t" + "orr r4, r4, lr, lsl 13\n\t" + "lsls r6, r12, #3\n\t" + "lsls r7, lr, #3\n\t" + "orr r7, r7, r12, lsr 29\n\t" + "orr r6, r6, lr, lsr 29\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "lsrs r6, r12, #6\n\t" + "lsrs r7, lr, #6\n\t" + "orr r6, r6, lr, lsl 26\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "ldrd r12, lr, [sp, #80]\n\t" + "ldrd r6, r7, [sp, #24]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "strd r12, lr, [sp, #80]\n\t" + "ldrd r12, lr, [sp, #88]\n\t" + "lsrs r4, r12, #1\n\t" + "lsrs r5, lr, #1\n\t" + "orr r5, r5, r12, lsl 31\n\t" + "orr r4, r4, lr, lsl 31\n\t" + "lsrs r6, r12, #8\n\t" + "lsrs r7, lr, #8\n\t" + "orr r7, r7, r12, lsl 24\n\t" + "orr r6, r6, lr, lsl 24\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "lsrs r6, r12, #7\n\t" + "lsrs r7, lr, #7\n\t" + "orr r6, r6, lr, lsl 25\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "ldrd r12, lr, [sp, #80]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [sp, #80]\n\t" + /* Round 11 */ + "ldrd r12, lr, [%[sha512], #8]\n\t" + "lsrs r4, r12, #14\n\t" + "lsrs r5, lr, #14\n\t" + "orr r5, r5, r12, lsl 18\n\t" + "orr r4, r4, lr, lsl 18\n\t" + "lsrs r6, r12, #18\n\t" + "lsrs r7, lr, #18\n\t" + "orr r7, r7, r12, lsl 14\n\t" + "orr r6, r6, lr, lsl 14\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #23\n\t" + "lsls r7, lr, #23\n\t" + "orr r7, r7, r12, lsr 9\n\t" + "orr r6, r6, lr, lsr 9\n\t" + "ldrd r12, lr, [%[sha512], #32]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #32]\n\t" + "ldrd r12, lr, [%[sha512], #8]\n\t" + "ldrd r4, r5, [%[sha512], #16]\n\t" + "ldrd r6, r7, [%[sha512], #24]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "and r4, r4, r12\n\t" + "and r5, r5, lr\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "ldrd r12, lr, [%[sha512], #32]\n\t" + "ldrd r6, r7, [sp, #88]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r4, r5, [r3, #88]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "ldrd r6, r7, [%[sha512]]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #32]\n\t" + "adds r6, r6, r12\n\t" + "adc r7, r7, lr\n\t" + "ldrd r12, lr, [%[sha512], #40]\n\t" + "strd r6, r7, [%[sha512]]\n\t" + "lsrs r4, r12, #28\n\t" + "lsrs r5, lr, #28\n\t" + "orr r5, r5, r12, lsl 4\n\t" + "orr r4, r4, lr, lsl 4\n\t" + "lsls r6, r12, #30\n\t" + "lsls r7, lr, #30\n\t" + "orr r7, r7, r12, lsr 2\n\t" + "orr r6, r6, lr, lsr 2\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #25\n\t" + "lsls r7, lr, #25\n\t" + "orr r7, r7, r12, lsr 7\n\t" + "orr r6, r6, lr, lsr 7\n\t" + "ldrd r12, lr, [%[sha512], #32]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r6, r7, [%[sha512], #40]\n\t" + "ldrd r4, r5, [%[sha512], #48]\n\t" + "strd r12, lr, [%[sha512], #32]\n\t" + "eor r6, r6, r4\n\t" + "eor r7, r7, r5\n\t" + "and r8, r8, r6\n\t" + "and r9, r9, r7\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "ldrd r4, r5, [%[sha512], #32]\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r4, r5, [%[sha512], #32]\n\t" + "mov r8, r6\n\t" + "mov r9, r7\n\t" + /* Calc new W[11] */ + "ldrd r12, lr, [sp, #72]\n\t" + "lsrs r4, r12, #19\n\t" + "lsrs r5, lr, #19\n\t" + "orr r5, r5, r12, lsl 13\n\t" + "orr r4, r4, lr, lsl 13\n\t" + "lsls r6, r12, #3\n\t" + "lsls r7, lr, #3\n\t" + "orr r7, r7, r12, lsr 29\n\t" + "orr r6, r6, lr, lsr 29\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "lsrs r6, r12, #6\n\t" + "lsrs r7, lr, #6\n\t" + "orr r6, r6, lr, lsl 26\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "ldrd r12, lr, [sp, #88]\n\t" + "ldrd r6, r7, [sp, #32]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "strd r12, lr, [sp, #88]\n\t" + "ldrd r12, lr, [sp, #96]\n\t" + "lsrs r4, r12, #1\n\t" + "lsrs r5, lr, #1\n\t" + "orr r5, r5, r12, lsl 31\n\t" + "orr r4, r4, lr, lsl 31\n\t" + "lsrs r6, r12, #8\n\t" + "lsrs r7, lr, #8\n\t" + "orr r7, r7, r12, lsl 24\n\t" + "orr r6, r6, lr, lsl 24\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "lsrs r6, r12, #7\n\t" + "lsrs r7, lr, #7\n\t" + "orr r6, r6, lr, lsl 25\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "ldrd r12, lr, [sp, #88]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [sp, #88]\n\t" + /* Round 12 */ + "ldrd r12, lr, [%[sha512]]\n\t" + "lsrs r4, r12, #14\n\t" + "lsrs r5, lr, #14\n\t" + "orr r5, r5, r12, lsl 18\n\t" + "orr r4, r4, lr, lsl 18\n\t" + "lsrs r6, r12, #18\n\t" + "lsrs r7, lr, #18\n\t" + "orr r7, r7, r12, lsl 14\n\t" + "orr r6, r6, lr, lsl 14\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #23\n\t" + "lsls r7, lr, #23\n\t" + "orr r7, r7, r12, lsr 9\n\t" + "orr r6, r6, lr, lsr 9\n\t" + "ldrd r12, lr, [%[sha512], #24]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #24]\n\t" + "ldrd r12, lr, [%[sha512]]\n\t" + "ldrd r4, r5, [%[sha512], #8]\n\t" + "ldrd r6, r7, [%[sha512], #16]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "and r4, r4, r12\n\t" + "and r5, r5, lr\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "ldrd r12, lr, [%[sha512], #24]\n\t" + "ldrd r6, r7, [sp, #96]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r4, r5, [r3, #96]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "ldrd r6, r7, [%[sha512], #56]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #24]\n\t" + "adds r6, r6, r12\n\t" + "adc r7, r7, lr\n\t" + "ldrd r12, lr, [%[sha512], #32]\n\t" + "strd r6, r7, [%[sha512], #56]\n\t" + "lsrs r4, r12, #28\n\t" + "lsrs r5, lr, #28\n\t" + "orr r5, r5, r12, lsl 4\n\t" + "orr r4, r4, lr, lsl 4\n\t" + "lsls r6, r12, #30\n\t" + "lsls r7, lr, #30\n\t" + "orr r7, r7, r12, lsr 2\n\t" + "orr r6, r6, lr, lsr 2\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #25\n\t" + "lsls r7, lr, #25\n\t" + "orr r7, r7, r12, lsr 7\n\t" + "orr r6, r6, lr, lsr 7\n\t" + "ldrd r12, lr, [%[sha512], #24]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r6, r7, [%[sha512], #32]\n\t" + "ldrd r4, r5, [%[sha512], #40]\n\t" + "strd r12, lr, [%[sha512], #24]\n\t" + "eor r6, r6, r4\n\t" + "eor r7, r7, r5\n\t" + "and r8, r8, r6\n\t" + "and r9, r9, r7\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "ldrd r4, r5, [%[sha512], #24]\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r4, r5, [%[sha512], #24]\n\t" + "mov r8, r6\n\t" + "mov r9, r7\n\t" + /* Calc new W[12] */ + "ldrd r12, lr, [sp, #80]\n\t" + "lsrs r4, r12, #19\n\t" + "lsrs r5, lr, #19\n\t" + "orr r5, r5, r12, lsl 13\n\t" + "orr r4, r4, lr, lsl 13\n\t" + "lsls r6, r12, #3\n\t" + "lsls r7, lr, #3\n\t" + "orr r7, r7, r12, lsr 29\n\t" + "orr r6, r6, lr, lsr 29\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "lsrs r6, r12, #6\n\t" + "lsrs r7, lr, #6\n\t" + "orr r6, r6, lr, lsl 26\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "ldrd r12, lr, [sp, #96]\n\t" + "ldrd r6, r7, [sp, #40]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "strd r12, lr, [sp, #96]\n\t" + "ldrd r12, lr, [sp, #104]\n\t" + "lsrs r4, r12, #1\n\t" + "lsrs r5, lr, #1\n\t" + "orr r5, r5, r12, lsl 31\n\t" + "orr r4, r4, lr, lsl 31\n\t" + "lsrs r6, r12, #8\n\t" + "lsrs r7, lr, #8\n\t" + "orr r7, r7, r12, lsl 24\n\t" + "orr r6, r6, lr, lsl 24\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "lsrs r6, r12, #7\n\t" + "lsrs r7, lr, #7\n\t" + "orr r6, r6, lr, lsl 25\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "ldrd r12, lr, [sp, #96]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [sp, #96]\n\t" + /* Round 13 */ + "ldrd r12, lr, [%[sha512], #56]\n\t" + "lsrs r4, r12, #14\n\t" + "lsrs r5, lr, #14\n\t" + "orr r5, r5, r12, lsl 18\n\t" + "orr r4, r4, lr, lsl 18\n\t" + "lsrs r6, r12, #18\n\t" + "lsrs r7, lr, #18\n\t" + "orr r7, r7, r12, lsl 14\n\t" + "orr r6, r6, lr, lsl 14\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #23\n\t" + "lsls r7, lr, #23\n\t" + "orr r7, r7, r12, lsr 9\n\t" + "orr r6, r6, lr, lsr 9\n\t" + "ldrd r12, lr, [%[sha512], #16]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #16]\n\t" + "ldrd r12, lr, [%[sha512], #56]\n\t" + "ldrd r4, r5, [%[sha512]]\n\t" + "ldrd r6, r7, [%[sha512], #8]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "and r4, r4, r12\n\t" + "and r5, r5, lr\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "ldrd r12, lr, [%[sha512], #16]\n\t" + "ldrd r6, r7, [sp, #104]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r4, r5, [r3, #104]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "ldrd r6, r7, [%[sha512], #48]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #16]\n\t" + "adds r6, r6, r12\n\t" + "adc r7, r7, lr\n\t" + "ldrd r12, lr, [%[sha512], #24]\n\t" + "strd r6, r7, [%[sha512], #48]\n\t" + "lsrs r4, r12, #28\n\t" + "lsrs r5, lr, #28\n\t" + "orr r5, r5, r12, lsl 4\n\t" + "orr r4, r4, lr, lsl 4\n\t" + "lsls r6, r12, #30\n\t" + "lsls r7, lr, #30\n\t" + "orr r7, r7, r12, lsr 2\n\t" + "orr r6, r6, lr, lsr 2\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #25\n\t" + "lsls r7, lr, #25\n\t" + "orr r7, r7, r12, lsr 7\n\t" + "orr r6, r6, lr, lsr 7\n\t" + "ldrd r12, lr, [%[sha512], #16]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r6, r7, [%[sha512], #24]\n\t" + "ldrd r4, r5, [%[sha512], #32]\n\t" + "strd r12, lr, [%[sha512], #16]\n\t" + "eor r6, r6, r4\n\t" + "eor r7, r7, r5\n\t" + "and r8, r8, r6\n\t" + "and r9, r9, r7\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "ldrd r4, r5, [%[sha512], #16]\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r4, r5, [%[sha512], #16]\n\t" + "mov r8, r6\n\t" + "mov r9, r7\n\t" + /* Calc new W[13] */ + "ldrd r12, lr, [sp, #88]\n\t" + "lsrs r4, r12, #19\n\t" + "lsrs r5, lr, #19\n\t" + "orr r5, r5, r12, lsl 13\n\t" + "orr r4, r4, lr, lsl 13\n\t" + "lsls r6, r12, #3\n\t" + "lsls r7, lr, #3\n\t" + "orr r7, r7, r12, lsr 29\n\t" + "orr r6, r6, lr, lsr 29\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "lsrs r6, r12, #6\n\t" + "lsrs r7, lr, #6\n\t" + "orr r6, r6, lr, lsl 26\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "ldrd r12, lr, [sp, #104]\n\t" + "ldrd r6, r7, [sp, #48]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "strd r12, lr, [sp, #104]\n\t" + "ldrd r12, lr, [sp, #112]\n\t" + "lsrs r4, r12, #1\n\t" + "lsrs r5, lr, #1\n\t" + "orr r5, r5, r12, lsl 31\n\t" + "orr r4, r4, lr, lsl 31\n\t" + "lsrs r6, r12, #8\n\t" + "lsrs r7, lr, #8\n\t" + "orr r7, r7, r12, lsl 24\n\t" + "orr r6, r6, lr, lsl 24\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "lsrs r6, r12, #7\n\t" + "lsrs r7, lr, #7\n\t" + "orr r6, r6, lr, lsl 25\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "ldrd r12, lr, [sp, #104]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [sp, #104]\n\t" + /* Round 14 */ + "ldrd r12, lr, [%[sha512], #48]\n\t" + "lsrs r4, r12, #14\n\t" + "lsrs r5, lr, #14\n\t" + "orr r5, r5, r12, lsl 18\n\t" + "orr r4, r4, lr, lsl 18\n\t" + "lsrs r6, r12, #18\n\t" + "lsrs r7, lr, #18\n\t" + "orr r7, r7, r12, lsl 14\n\t" + "orr r6, r6, lr, lsl 14\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #23\n\t" + "lsls r7, lr, #23\n\t" + "orr r7, r7, r12, lsr 9\n\t" + "orr r6, r6, lr, lsr 9\n\t" + "ldrd r12, lr, [%[sha512], #8]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #8]\n\t" + "ldrd r12, lr, [%[sha512], #48]\n\t" + "ldrd r4, r5, [%[sha512], #56]\n\t" + "ldrd r6, r7, [%[sha512]]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "and r4, r4, r12\n\t" + "and r5, r5, lr\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "ldrd r12, lr, [%[sha512], #8]\n\t" + "ldrd r6, r7, [sp, #112]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r4, r5, [r3, #112]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "ldrd r6, r7, [%[sha512], #40]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #8]\n\t" + "adds r6, r6, r12\n\t" + "adc r7, r7, lr\n\t" + "ldrd r12, lr, [%[sha512], #16]\n\t" + "strd r6, r7, [%[sha512], #40]\n\t" + "lsrs r4, r12, #28\n\t" + "lsrs r5, lr, #28\n\t" + "orr r5, r5, r12, lsl 4\n\t" + "orr r4, r4, lr, lsl 4\n\t" + "lsls r6, r12, #30\n\t" + "lsls r7, lr, #30\n\t" + "orr r7, r7, r12, lsr 2\n\t" + "orr r6, r6, lr, lsr 2\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #25\n\t" + "lsls r7, lr, #25\n\t" + "orr r7, r7, r12, lsr 7\n\t" + "orr r6, r6, lr, lsr 7\n\t" + "ldrd r12, lr, [%[sha512], #8]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r6, r7, [%[sha512], #16]\n\t" + "ldrd r4, r5, [%[sha512], #24]\n\t" + "strd r12, lr, [%[sha512], #8]\n\t" + "eor r6, r6, r4\n\t" + "eor r7, r7, r5\n\t" + "and r8, r8, r6\n\t" + "and r9, r9, r7\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "ldrd r4, r5, [%[sha512], #8]\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r4, r5, [%[sha512], #8]\n\t" + "mov r8, r6\n\t" + "mov r9, r7\n\t" + /* Calc new W[14] */ + "ldrd r12, lr, [sp, #96]\n\t" + "lsrs r4, r12, #19\n\t" + "lsrs r5, lr, #19\n\t" + "orr r5, r5, r12, lsl 13\n\t" + "orr r4, r4, lr, lsl 13\n\t" + "lsls r6, r12, #3\n\t" + "lsls r7, lr, #3\n\t" + "orr r7, r7, r12, lsr 29\n\t" + "orr r6, r6, lr, lsr 29\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "lsrs r6, r12, #6\n\t" + "lsrs r7, lr, #6\n\t" + "orr r6, r6, lr, lsl 26\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "ldrd r12, lr, [sp, #112]\n\t" + "ldrd r6, r7, [sp, #56]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "strd r12, lr, [sp, #112]\n\t" + "ldrd r12, lr, [sp, #120]\n\t" + "lsrs r4, r12, #1\n\t" + "lsrs r5, lr, #1\n\t" + "orr r5, r5, r12, lsl 31\n\t" + "orr r4, r4, lr, lsl 31\n\t" + "lsrs r6, r12, #8\n\t" + "lsrs r7, lr, #8\n\t" + "orr r7, r7, r12, lsl 24\n\t" + "orr r6, r6, lr, lsl 24\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "lsrs r6, r12, #7\n\t" + "lsrs r7, lr, #7\n\t" + "orr r6, r6, lr, lsl 25\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "ldrd r12, lr, [sp, #112]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [sp, #112]\n\t" + /* Round 15 */ + "ldrd r12, lr, [%[sha512], #40]\n\t" + "lsrs r4, r12, #14\n\t" + "lsrs r5, lr, #14\n\t" + "orr r5, r5, r12, lsl 18\n\t" + "orr r4, r4, lr, lsl 18\n\t" + "lsrs r6, r12, #18\n\t" + "lsrs r7, lr, #18\n\t" + "orr r7, r7, r12, lsl 14\n\t" + "orr r6, r6, lr, lsl 14\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #23\n\t" + "lsls r7, lr, #23\n\t" + "orr r7, r7, r12, lsr 9\n\t" + "orr r6, r6, lr, lsr 9\n\t" + "ldrd r12, lr, [%[sha512]]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512]]\n\t" + "ldrd r12, lr, [%[sha512], #40]\n\t" + "ldrd r4, r5, [%[sha512], #48]\n\t" + "ldrd r6, r7, [%[sha512], #56]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "and r4, r4, r12\n\t" + "and r5, r5, lr\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "ldrd r12, lr, [%[sha512]]\n\t" + "ldrd r6, r7, [sp, #120]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r4, r5, [r3, #120]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "ldrd r6, r7, [%[sha512], #32]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512]]\n\t" + "adds r6, r6, r12\n\t" + "adc r7, r7, lr\n\t" + "ldrd r12, lr, [%[sha512], #8]\n\t" + "strd r6, r7, [%[sha512], #32]\n\t" + "lsrs r4, r12, #28\n\t" + "lsrs r5, lr, #28\n\t" + "orr r5, r5, r12, lsl 4\n\t" + "orr r4, r4, lr, lsl 4\n\t" + "lsls r6, r12, #30\n\t" + "lsls r7, lr, #30\n\t" + "orr r7, r7, r12, lsr 2\n\t" + "orr r6, r6, lr, lsr 2\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #25\n\t" + "lsls r7, lr, #25\n\t" + "orr r7, r7, r12, lsr 7\n\t" + "orr r6, r6, lr, lsr 7\n\t" + "ldrd r12, lr, [%[sha512]]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r6, r7, [%[sha512], #8]\n\t" + "ldrd r4, r5, [%[sha512], #16]\n\t" + "strd r12, lr, [%[sha512]]\n\t" + "eor r6, r6, r4\n\t" + "eor r7, r7, r5\n\t" + "and r8, r8, r6\n\t" + "and r9, r9, r7\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "ldrd r4, r5, [%[sha512]]\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r4, r5, [%[sha512]]\n\t" + "mov r8, r6\n\t" + "mov r9, r7\n\t" + /* Calc new W[15] */ + "ldrd r12, lr, [sp, #104]\n\t" + "lsrs r4, r12, #19\n\t" + "lsrs r5, lr, #19\n\t" + "orr r5, r5, r12, lsl 13\n\t" + "orr r4, r4, lr, lsl 13\n\t" + "lsls r6, r12, #3\n\t" + "lsls r7, lr, #3\n\t" + "orr r7, r7, r12, lsr 29\n\t" + "orr r6, r6, lr, lsr 29\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "lsrs r6, r12, #6\n\t" + "lsrs r7, lr, #6\n\t" + "orr r6, r6, lr, lsl 26\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "ldrd r12, lr, [sp, #120]\n\t" + "ldrd r6, r7, [sp, #64]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "strd r12, lr, [sp, #120]\n\t" + "ldrd r12, lr, [sp]\n\t" + "lsrs r4, r12, #1\n\t" + "lsrs r5, lr, #1\n\t" + "orr r5, r5, r12, lsl 31\n\t" + "orr r4, r4, lr, lsl 31\n\t" + "lsrs r6, r12, #8\n\t" + "lsrs r7, lr, #8\n\t" + "orr r7, r7, r12, lsl 24\n\t" + "orr r6, r6, lr, lsl 24\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "lsrs r6, r12, #7\n\t" + "lsrs r7, lr, #7\n\t" + "orr r6, r6, lr, lsl 25\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r6\n\t" + "ldrd r12, lr, [sp, #120]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [sp, #120]\n\t" + "add r3, r3, #0x80\n\t" + "subs r10, r10, #1\n\t" + "bne L_sha512_len_neon_start_%=\n\t" + /* Round 0 */ + "ldrd r12, lr, [%[sha512], #32]\n\t" + "lsrs r4, r12, #14\n\t" + "lsrs r5, lr, #14\n\t" + "orr r5, r5, r12, lsl 18\n\t" + "orr r4, r4, lr, lsl 18\n\t" + "lsrs r6, r12, #18\n\t" + "lsrs r7, lr, #18\n\t" + "orr r7, r7, r12, lsl 14\n\t" + "orr r6, r6, lr, lsl 14\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #23\n\t" + "lsls r7, lr, #23\n\t" + "orr r7, r7, r12, lsr 9\n\t" + "orr r6, r6, lr, lsr 9\n\t" + "ldrd r12, lr, [%[sha512], #56]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #56]\n\t" + "ldrd r12, lr, [%[sha512], #32]\n\t" + "ldrd r4, r5, [%[sha512], #40]\n\t" + "ldrd r6, r7, [%[sha512], #48]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "and r4, r4, r12\n\t" + "and r5, r5, lr\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "ldrd r12, lr, [%[sha512], #56]\n\t" + "ldrd r6, r7, [sp]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r4, r5, [r3]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "ldrd r6, r7, [%[sha512], #24]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #56]\n\t" + "adds r6, r6, r12\n\t" + "adc r7, r7, lr\n\t" + "ldrd r12, lr, [%[sha512]]\n\t" + "strd r6, r7, [%[sha512], #24]\n\t" + "lsrs r4, r12, #28\n\t" + "lsrs r5, lr, #28\n\t" + "orr r5, r5, r12, lsl 4\n\t" + "orr r4, r4, lr, lsl 4\n\t" + "lsls r6, r12, #30\n\t" + "lsls r7, lr, #30\n\t" + "orr r7, r7, r12, lsr 2\n\t" + "orr r6, r6, lr, lsr 2\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #25\n\t" + "lsls r7, lr, #25\n\t" + "orr r7, r7, r12, lsr 7\n\t" + "orr r6, r6, lr, lsr 7\n\t" + "ldrd r12, lr, [%[sha512], #56]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r6, r7, [%[sha512]]\n\t" + "ldrd r4, r5, [%[sha512], #8]\n\t" + "strd r12, lr, [%[sha512], #56]\n\t" + "eor r6, r6, r4\n\t" + "eor r7, r7, r5\n\t" + "and r8, r8, r6\n\t" + "and r9, r9, r7\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "ldrd r4, r5, [%[sha512], #56]\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r4, r5, [%[sha512], #56]\n\t" + "mov r8, r6\n\t" + "mov r9, r7\n\t" + /* Round 1 */ + "ldrd r12, lr, [%[sha512], #24]\n\t" + "lsrs r4, r12, #14\n\t" + "lsrs r5, lr, #14\n\t" + "orr r5, r5, r12, lsl 18\n\t" + "orr r4, r4, lr, lsl 18\n\t" + "lsrs r6, r12, #18\n\t" + "lsrs r7, lr, #18\n\t" + "orr r7, r7, r12, lsl 14\n\t" + "orr r6, r6, lr, lsl 14\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #23\n\t" + "lsls r7, lr, #23\n\t" + "orr r7, r7, r12, lsr 9\n\t" + "orr r6, r6, lr, lsr 9\n\t" + "ldrd r12, lr, [%[sha512], #48]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #48]\n\t" + "ldrd r12, lr, [%[sha512], #24]\n\t" + "ldrd r4, r5, [%[sha512], #32]\n\t" + "ldrd r6, r7, [%[sha512], #40]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "and r4, r4, r12\n\t" + "and r5, r5, lr\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "ldrd r12, lr, [%[sha512], #48]\n\t" + "ldrd r6, r7, [sp, #8]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r4, r5, [r3, #8]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "ldrd r6, r7, [%[sha512], #16]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #48]\n\t" + "adds r6, r6, r12\n\t" + "adc r7, r7, lr\n\t" + "ldrd r12, lr, [%[sha512], #56]\n\t" + "strd r6, r7, [%[sha512], #16]\n\t" + "lsrs r4, r12, #28\n\t" + "lsrs r5, lr, #28\n\t" + "orr r5, r5, r12, lsl 4\n\t" + "orr r4, r4, lr, lsl 4\n\t" + "lsls r6, r12, #30\n\t" + "lsls r7, lr, #30\n\t" + "orr r7, r7, r12, lsr 2\n\t" + "orr r6, r6, lr, lsr 2\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #25\n\t" + "lsls r7, lr, #25\n\t" + "orr r7, r7, r12, lsr 7\n\t" + "orr r6, r6, lr, lsr 7\n\t" + "ldrd r12, lr, [%[sha512], #48]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r6, r7, [%[sha512], #56]\n\t" + "ldrd r4, r5, [%[sha512]]\n\t" + "strd r12, lr, [%[sha512], #48]\n\t" + "eor r6, r6, r4\n\t" + "eor r7, r7, r5\n\t" + "and r8, r8, r6\n\t" + "and r9, r9, r7\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "ldrd r4, r5, [%[sha512], #48]\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r4, r5, [%[sha512], #48]\n\t" + "mov r8, r6\n\t" + "mov r9, r7\n\t" + /* Round 2 */ + "ldrd r12, lr, [%[sha512], #16]\n\t" + "lsrs r4, r12, #14\n\t" + "lsrs r5, lr, #14\n\t" + "orr r5, r5, r12, lsl 18\n\t" + "orr r4, r4, lr, lsl 18\n\t" + "lsrs r6, r12, #18\n\t" + "lsrs r7, lr, #18\n\t" + "orr r7, r7, r12, lsl 14\n\t" + "orr r6, r6, lr, lsl 14\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #23\n\t" + "lsls r7, lr, #23\n\t" + "orr r7, r7, r12, lsr 9\n\t" + "orr r6, r6, lr, lsr 9\n\t" + "ldrd r12, lr, [%[sha512], #40]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #40]\n\t" + "ldrd r12, lr, [%[sha512], #16]\n\t" + "ldrd r4, r5, [%[sha512], #24]\n\t" + "ldrd r6, r7, [%[sha512], #32]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "and r4, r4, r12\n\t" + "and r5, r5, lr\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "ldrd r12, lr, [%[sha512], #40]\n\t" + "ldrd r6, r7, [sp, #16]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r4, r5, [r3, #16]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "ldrd r6, r7, [%[sha512], #8]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #40]\n\t" + "adds r6, r6, r12\n\t" + "adc r7, r7, lr\n\t" + "ldrd r12, lr, [%[sha512], #48]\n\t" + "strd r6, r7, [%[sha512], #8]\n\t" + "lsrs r4, r12, #28\n\t" + "lsrs r5, lr, #28\n\t" + "orr r5, r5, r12, lsl 4\n\t" + "orr r4, r4, lr, lsl 4\n\t" + "lsls r6, r12, #30\n\t" + "lsls r7, lr, #30\n\t" + "orr r7, r7, r12, lsr 2\n\t" + "orr r6, r6, lr, lsr 2\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #25\n\t" + "lsls r7, lr, #25\n\t" + "orr r7, r7, r12, lsr 7\n\t" + "orr r6, r6, lr, lsr 7\n\t" + "ldrd r12, lr, [%[sha512], #40]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r6, r7, [%[sha512], #48]\n\t" + "ldrd r4, r5, [%[sha512], #56]\n\t" + "strd r12, lr, [%[sha512], #40]\n\t" + "eor r6, r6, r4\n\t" + "eor r7, r7, r5\n\t" + "and r8, r8, r6\n\t" + "and r9, r9, r7\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "ldrd r4, r5, [%[sha512], #40]\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r4, r5, [%[sha512], #40]\n\t" + "mov r8, r6\n\t" + "mov r9, r7\n\t" + /* Round 3 */ + "ldrd r12, lr, [%[sha512], #8]\n\t" + "lsrs r4, r12, #14\n\t" + "lsrs r5, lr, #14\n\t" + "orr r5, r5, r12, lsl 18\n\t" + "orr r4, r4, lr, lsl 18\n\t" + "lsrs r6, r12, #18\n\t" + "lsrs r7, lr, #18\n\t" + "orr r7, r7, r12, lsl 14\n\t" + "orr r6, r6, lr, lsl 14\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #23\n\t" + "lsls r7, lr, #23\n\t" + "orr r7, r7, r12, lsr 9\n\t" + "orr r6, r6, lr, lsr 9\n\t" + "ldrd r12, lr, [%[sha512], #32]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #32]\n\t" + "ldrd r12, lr, [%[sha512], #8]\n\t" + "ldrd r4, r5, [%[sha512], #16]\n\t" + "ldrd r6, r7, [%[sha512], #24]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "and r4, r4, r12\n\t" + "and r5, r5, lr\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "ldrd r12, lr, [%[sha512], #32]\n\t" + "ldrd r6, r7, [sp, #24]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r4, r5, [r3, #24]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "ldrd r6, r7, [%[sha512]]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #32]\n\t" + "adds r6, r6, r12\n\t" + "adc r7, r7, lr\n\t" + "ldrd r12, lr, [%[sha512], #40]\n\t" + "strd r6, r7, [%[sha512]]\n\t" + "lsrs r4, r12, #28\n\t" + "lsrs r5, lr, #28\n\t" + "orr r5, r5, r12, lsl 4\n\t" + "orr r4, r4, lr, lsl 4\n\t" + "lsls r6, r12, #30\n\t" + "lsls r7, lr, #30\n\t" + "orr r7, r7, r12, lsr 2\n\t" + "orr r6, r6, lr, lsr 2\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #25\n\t" + "lsls r7, lr, #25\n\t" + "orr r7, r7, r12, lsr 7\n\t" + "orr r6, r6, lr, lsr 7\n\t" + "ldrd r12, lr, [%[sha512], #32]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r6, r7, [%[sha512], #40]\n\t" + "ldrd r4, r5, [%[sha512], #48]\n\t" + "strd r12, lr, [%[sha512], #32]\n\t" + "eor r6, r6, r4\n\t" + "eor r7, r7, r5\n\t" + "and r8, r8, r6\n\t" + "and r9, r9, r7\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "ldrd r4, r5, [%[sha512], #32]\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r4, r5, [%[sha512], #32]\n\t" + "mov r8, r6\n\t" + "mov r9, r7\n\t" + /* Round 4 */ + "ldrd r12, lr, [%[sha512]]\n\t" + "lsrs r4, r12, #14\n\t" + "lsrs r5, lr, #14\n\t" + "orr r5, r5, r12, lsl 18\n\t" + "orr r4, r4, lr, lsl 18\n\t" + "lsrs r6, r12, #18\n\t" + "lsrs r7, lr, #18\n\t" + "orr r7, r7, r12, lsl 14\n\t" + "orr r6, r6, lr, lsl 14\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #23\n\t" + "lsls r7, lr, #23\n\t" + "orr r7, r7, r12, lsr 9\n\t" + "orr r6, r6, lr, lsr 9\n\t" + "ldrd r12, lr, [%[sha512], #24]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #24]\n\t" + "ldrd r12, lr, [%[sha512]]\n\t" + "ldrd r4, r5, [%[sha512], #8]\n\t" + "ldrd r6, r7, [%[sha512], #16]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "and r4, r4, r12\n\t" + "and r5, r5, lr\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "ldrd r12, lr, [%[sha512], #24]\n\t" + "ldrd r6, r7, [sp, #32]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r4, r5, [r3, #32]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "ldrd r6, r7, [%[sha512], #56]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #24]\n\t" + "adds r6, r6, r12\n\t" + "adc r7, r7, lr\n\t" + "ldrd r12, lr, [%[sha512], #32]\n\t" + "strd r6, r7, [%[sha512], #56]\n\t" + "lsrs r4, r12, #28\n\t" + "lsrs r5, lr, #28\n\t" + "orr r5, r5, r12, lsl 4\n\t" + "orr r4, r4, lr, lsl 4\n\t" + "lsls r6, r12, #30\n\t" + "lsls r7, lr, #30\n\t" + "orr r7, r7, r12, lsr 2\n\t" + "orr r6, r6, lr, lsr 2\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #25\n\t" + "lsls r7, lr, #25\n\t" + "orr r7, r7, r12, lsr 7\n\t" + "orr r6, r6, lr, lsr 7\n\t" + "ldrd r12, lr, [%[sha512], #24]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r6, r7, [%[sha512], #32]\n\t" + "ldrd r4, r5, [%[sha512], #40]\n\t" + "strd r12, lr, [%[sha512], #24]\n\t" + "eor r6, r6, r4\n\t" + "eor r7, r7, r5\n\t" + "and r8, r8, r6\n\t" + "and r9, r9, r7\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "ldrd r4, r5, [%[sha512], #24]\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r4, r5, [%[sha512], #24]\n\t" + "mov r8, r6\n\t" + "mov r9, r7\n\t" + /* Round 5 */ + "ldrd r12, lr, [%[sha512], #56]\n\t" + "lsrs r4, r12, #14\n\t" + "lsrs r5, lr, #14\n\t" + "orr r5, r5, r12, lsl 18\n\t" + "orr r4, r4, lr, lsl 18\n\t" + "lsrs r6, r12, #18\n\t" + "lsrs r7, lr, #18\n\t" + "orr r7, r7, r12, lsl 14\n\t" + "orr r6, r6, lr, lsl 14\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #23\n\t" + "lsls r7, lr, #23\n\t" + "orr r7, r7, r12, lsr 9\n\t" + "orr r6, r6, lr, lsr 9\n\t" + "ldrd r12, lr, [%[sha512], #16]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #16]\n\t" + "ldrd r12, lr, [%[sha512], #56]\n\t" + "ldrd r4, r5, [%[sha512]]\n\t" + "ldrd r6, r7, [%[sha512], #8]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "and r4, r4, r12\n\t" + "and r5, r5, lr\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "ldrd r12, lr, [%[sha512], #16]\n\t" + "ldrd r6, r7, [sp, #40]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r4, r5, [r3, #40]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "ldrd r6, r7, [%[sha512], #48]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #16]\n\t" + "adds r6, r6, r12\n\t" + "adc r7, r7, lr\n\t" + "ldrd r12, lr, [%[sha512], #24]\n\t" + "strd r6, r7, [%[sha512], #48]\n\t" + "lsrs r4, r12, #28\n\t" + "lsrs r5, lr, #28\n\t" + "orr r5, r5, r12, lsl 4\n\t" + "orr r4, r4, lr, lsl 4\n\t" + "lsls r6, r12, #30\n\t" + "lsls r7, lr, #30\n\t" + "orr r7, r7, r12, lsr 2\n\t" + "orr r6, r6, lr, lsr 2\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #25\n\t" + "lsls r7, lr, #25\n\t" + "orr r7, r7, r12, lsr 7\n\t" + "orr r6, r6, lr, lsr 7\n\t" + "ldrd r12, lr, [%[sha512], #16]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r6, r7, [%[sha512], #24]\n\t" + "ldrd r4, r5, [%[sha512], #32]\n\t" + "strd r12, lr, [%[sha512], #16]\n\t" + "eor r6, r6, r4\n\t" + "eor r7, r7, r5\n\t" + "and r8, r8, r6\n\t" + "and r9, r9, r7\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "ldrd r4, r5, [%[sha512], #16]\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r4, r5, [%[sha512], #16]\n\t" + "mov r8, r6\n\t" + "mov r9, r7\n\t" + /* Round 6 */ + "ldrd r12, lr, [%[sha512], #48]\n\t" + "lsrs r4, r12, #14\n\t" + "lsrs r5, lr, #14\n\t" + "orr r5, r5, r12, lsl 18\n\t" + "orr r4, r4, lr, lsl 18\n\t" + "lsrs r6, r12, #18\n\t" + "lsrs r7, lr, #18\n\t" + "orr r7, r7, r12, lsl 14\n\t" + "orr r6, r6, lr, lsl 14\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #23\n\t" + "lsls r7, lr, #23\n\t" + "orr r7, r7, r12, lsr 9\n\t" + "orr r6, r6, lr, lsr 9\n\t" + "ldrd r12, lr, [%[sha512], #8]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #8]\n\t" + "ldrd r12, lr, [%[sha512], #48]\n\t" + "ldrd r4, r5, [%[sha512], #56]\n\t" + "ldrd r6, r7, [%[sha512]]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "and r4, r4, r12\n\t" + "and r5, r5, lr\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "ldrd r12, lr, [%[sha512], #8]\n\t" + "ldrd r6, r7, [sp, #48]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r4, r5, [r3, #48]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "ldrd r6, r7, [%[sha512], #40]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #8]\n\t" + "adds r6, r6, r12\n\t" + "adc r7, r7, lr\n\t" + "ldrd r12, lr, [%[sha512], #16]\n\t" + "strd r6, r7, [%[sha512], #40]\n\t" + "lsrs r4, r12, #28\n\t" + "lsrs r5, lr, #28\n\t" + "orr r5, r5, r12, lsl 4\n\t" + "orr r4, r4, lr, lsl 4\n\t" + "lsls r6, r12, #30\n\t" + "lsls r7, lr, #30\n\t" + "orr r7, r7, r12, lsr 2\n\t" + "orr r6, r6, lr, lsr 2\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #25\n\t" + "lsls r7, lr, #25\n\t" + "orr r7, r7, r12, lsr 7\n\t" + "orr r6, r6, lr, lsr 7\n\t" + "ldrd r12, lr, [%[sha512], #8]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r6, r7, [%[sha512], #16]\n\t" + "ldrd r4, r5, [%[sha512], #24]\n\t" + "strd r12, lr, [%[sha512], #8]\n\t" + "eor r6, r6, r4\n\t" + "eor r7, r7, r5\n\t" + "and r8, r8, r6\n\t" + "and r9, r9, r7\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "ldrd r4, r5, [%[sha512], #8]\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r4, r5, [%[sha512], #8]\n\t" + "mov r8, r6\n\t" + "mov r9, r7\n\t" + /* Round 7 */ + "ldrd r12, lr, [%[sha512], #40]\n\t" + "lsrs r4, r12, #14\n\t" + "lsrs r5, lr, #14\n\t" + "orr r5, r5, r12, lsl 18\n\t" + "orr r4, r4, lr, lsl 18\n\t" + "lsrs r6, r12, #18\n\t" + "lsrs r7, lr, #18\n\t" + "orr r7, r7, r12, lsl 14\n\t" + "orr r6, r6, lr, lsl 14\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #23\n\t" + "lsls r7, lr, #23\n\t" + "orr r7, r7, r12, lsr 9\n\t" + "orr r6, r6, lr, lsr 9\n\t" + "ldrd r12, lr, [%[sha512]]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512]]\n\t" + "ldrd r12, lr, [%[sha512], #40]\n\t" + "ldrd r4, r5, [%[sha512], #48]\n\t" + "ldrd r6, r7, [%[sha512], #56]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "and r4, r4, r12\n\t" + "and r5, r5, lr\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "ldrd r12, lr, [%[sha512]]\n\t" + "ldrd r6, r7, [sp, #56]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r4, r5, [r3, #56]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "ldrd r6, r7, [%[sha512], #32]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512]]\n\t" + "adds r6, r6, r12\n\t" + "adc r7, r7, lr\n\t" + "ldrd r12, lr, [%[sha512], #8]\n\t" + "strd r6, r7, [%[sha512], #32]\n\t" + "lsrs r4, r12, #28\n\t" + "lsrs r5, lr, #28\n\t" + "orr r5, r5, r12, lsl 4\n\t" + "orr r4, r4, lr, lsl 4\n\t" + "lsls r6, r12, #30\n\t" + "lsls r7, lr, #30\n\t" + "orr r7, r7, r12, lsr 2\n\t" + "orr r6, r6, lr, lsr 2\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #25\n\t" + "lsls r7, lr, #25\n\t" + "orr r7, r7, r12, lsr 7\n\t" + "orr r6, r6, lr, lsr 7\n\t" + "ldrd r12, lr, [%[sha512]]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r6, r7, [%[sha512], #8]\n\t" + "ldrd r4, r5, [%[sha512], #16]\n\t" + "strd r12, lr, [%[sha512]]\n\t" + "eor r6, r6, r4\n\t" + "eor r7, r7, r5\n\t" + "and r8, r8, r6\n\t" + "and r9, r9, r7\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "ldrd r4, r5, [%[sha512]]\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r4, r5, [%[sha512]]\n\t" + "mov r8, r6\n\t" + "mov r9, r7\n\t" + /* Round 8 */ + "ldrd r12, lr, [%[sha512], #32]\n\t" + "lsrs r4, r12, #14\n\t" + "lsrs r5, lr, #14\n\t" + "orr r5, r5, r12, lsl 18\n\t" + "orr r4, r4, lr, lsl 18\n\t" + "lsrs r6, r12, #18\n\t" + "lsrs r7, lr, #18\n\t" + "orr r7, r7, r12, lsl 14\n\t" + "orr r6, r6, lr, lsl 14\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #23\n\t" + "lsls r7, lr, #23\n\t" + "orr r7, r7, r12, lsr 9\n\t" + "orr r6, r6, lr, lsr 9\n\t" + "ldrd r12, lr, [%[sha512], #56]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #56]\n\t" + "ldrd r12, lr, [%[sha512], #32]\n\t" + "ldrd r4, r5, [%[sha512], #40]\n\t" + "ldrd r6, r7, [%[sha512], #48]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "and r4, r4, r12\n\t" + "and r5, r5, lr\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "ldrd r12, lr, [%[sha512], #56]\n\t" + "ldrd r6, r7, [sp, #64]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r4, r5, [r3, #64]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "ldrd r6, r7, [%[sha512], #24]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #56]\n\t" + "adds r6, r6, r12\n\t" + "adc r7, r7, lr\n\t" + "ldrd r12, lr, [%[sha512]]\n\t" + "strd r6, r7, [%[sha512], #24]\n\t" + "lsrs r4, r12, #28\n\t" + "lsrs r5, lr, #28\n\t" + "orr r5, r5, r12, lsl 4\n\t" + "orr r4, r4, lr, lsl 4\n\t" + "lsls r6, r12, #30\n\t" + "lsls r7, lr, #30\n\t" + "orr r7, r7, r12, lsr 2\n\t" + "orr r6, r6, lr, lsr 2\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #25\n\t" + "lsls r7, lr, #25\n\t" + "orr r7, r7, r12, lsr 7\n\t" + "orr r6, r6, lr, lsr 7\n\t" + "ldrd r12, lr, [%[sha512], #56]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r6, r7, [%[sha512]]\n\t" + "ldrd r4, r5, [%[sha512], #8]\n\t" + "strd r12, lr, [%[sha512], #56]\n\t" + "eor r6, r6, r4\n\t" + "eor r7, r7, r5\n\t" + "and r8, r8, r6\n\t" + "and r9, r9, r7\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "ldrd r4, r5, [%[sha512], #56]\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r4, r5, [%[sha512], #56]\n\t" + "mov r8, r6\n\t" + "mov r9, r7\n\t" + /* Round 9 */ + "ldrd r12, lr, [%[sha512], #24]\n\t" + "lsrs r4, r12, #14\n\t" + "lsrs r5, lr, #14\n\t" + "orr r5, r5, r12, lsl 18\n\t" + "orr r4, r4, lr, lsl 18\n\t" + "lsrs r6, r12, #18\n\t" + "lsrs r7, lr, #18\n\t" + "orr r7, r7, r12, lsl 14\n\t" + "orr r6, r6, lr, lsl 14\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #23\n\t" + "lsls r7, lr, #23\n\t" + "orr r7, r7, r12, lsr 9\n\t" + "orr r6, r6, lr, lsr 9\n\t" + "ldrd r12, lr, [%[sha512], #48]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #48]\n\t" + "ldrd r12, lr, [%[sha512], #24]\n\t" + "ldrd r4, r5, [%[sha512], #32]\n\t" + "ldrd r6, r7, [%[sha512], #40]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "and r4, r4, r12\n\t" + "and r5, r5, lr\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "ldrd r12, lr, [%[sha512], #48]\n\t" + "ldrd r6, r7, [sp, #72]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r4, r5, [r3, #72]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "ldrd r6, r7, [%[sha512], #16]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #48]\n\t" + "adds r6, r6, r12\n\t" + "adc r7, r7, lr\n\t" + "ldrd r12, lr, [%[sha512], #56]\n\t" + "strd r6, r7, [%[sha512], #16]\n\t" + "lsrs r4, r12, #28\n\t" + "lsrs r5, lr, #28\n\t" + "orr r5, r5, r12, lsl 4\n\t" + "orr r4, r4, lr, lsl 4\n\t" + "lsls r6, r12, #30\n\t" + "lsls r7, lr, #30\n\t" + "orr r7, r7, r12, lsr 2\n\t" + "orr r6, r6, lr, lsr 2\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #25\n\t" + "lsls r7, lr, #25\n\t" + "orr r7, r7, r12, lsr 7\n\t" + "orr r6, r6, lr, lsr 7\n\t" + "ldrd r12, lr, [%[sha512], #48]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r6, r7, [%[sha512], #56]\n\t" + "ldrd r4, r5, [%[sha512]]\n\t" + "strd r12, lr, [%[sha512], #48]\n\t" + "eor r6, r6, r4\n\t" + "eor r7, r7, r5\n\t" + "and r8, r8, r6\n\t" + "and r9, r9, r7\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "ldrd r4, r5, [%[sha512], #48]\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r4, r5, [%[sha512], #48]\n\t" + "mov r8, r6\n\t" + "mov r9, r7\n\t" + /* Round 10 */ + "ldrd r12, lr, [%[sha512], #16]\n\t" + "lsrs r4, r12, #14\n\t" + "lsrs r5, lr, #14\n\t" + "orr r5, r5, r12, lsl 18\n\t" + "orr r4, r4, lr, lsl 18\n\t" + "lsrs r6, r12, #18\n\t" + "lsrs r7, lr, #18\n\t" + "orr r7, r7, r12, lsl 14\n\t" + "orr r6, r6, lr, lsl 14\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #23\n\t" + "lsls r7, lr, #23\n\t" + "orr r7, r7, r12, lsr 9\n\t" + "orr r6, r6, lr, lsr 9\n\t" + "ldrd r12, lr, [%[sha512], #40]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #40]\n\t" + "ldrd r12, lr, [%[sha512], #16]\n\t" + "ldrd r4, r5, [%[sha512], #24]\n\t" + "ldrd r6, r7, [%[sha512], #32]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "and r4, r4, r12\n\t" + "and r5, r5, lr\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "ldrd r12, lr, [%[sha512], #40]\n\t" + "ldrd r6, r7, [sp, #80]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r4, r5, [r3, #80]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "ldrd r6, r7, [%[sha512], #8]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #40]\n\t" + "adds r6, r6, r12\n\t" + "adc r7, r7, lr\n\t" + "ldrd r12, lr, [%[sha512], #48]\n\t" + "strd r6, r7, [%[sha512], #8]\n\t" + "lsrs r4, r12, #28\n\t" + "lsrs r5, lr, #28\n\t" + "orr r5, r5, r12, lsl 4\n\t" + "orr r4, r4, lr, lsl 4\n\t" + "lsls r6, r12, #30\n\t" + "lsls r7, lr, #30\n\t" + "orr r7, r7, r12, lsr 2\n\t" + "orr r6, r6, lr, lsr 2\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #25\n\t" + "lsls r7, lr, #25\n\t" + "orr r7, r7, r12, lsr 7\n\t" + "orr r6, r6, lr, lsr 7\n\t" + "ldrd r12, lr, [%[sha512], #40]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r6, r7, [%[sha512], #48]\n\t" + "ldrd r4, r5, [%[sha512], #56]\n\t" + "strd r12, lr, [%[sha512], #40]\n\t" + "eor r6, r6, r4\n\t" + "eor r7, r7, r5\n\t" + "and r8, r8, r6\n\t" + "and r9, r9, r7\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "ldrd r4, r5, [%[sha512], #40]\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r4, r5, [%[sha512], #40]\n\t" + "mov r8, r6\n\t" + "mov r9, r7\n\t" + /* Round 11 */ + "ldrd r12, lr, [%[sha512], #8]\n\t" + "lsrs r4, r12, #14\n\t" + "lsrs r5, lr, #14\n\t" + "orr r5, r5, r12, lsl 18\n\t" + "orr r4, r4, lr, lsl 18\n\t" + "lsrs r6, r12, #18\n\t" + "lsrs r7, lr, #18\n\t" + "orr r7, r7, r12, lsl 14\n\t" + "orr r6, r6, lr, lsl 14\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #23\n\t" + "lsls r7, lr, #23\n\t" + "orr r7, r7, r12, lsr 9\n\t" + "orr r6, r6, lr, lsr 9\n\t" + "ldrd r12, lr, [%[sha512], #32]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #32]\n\t" + "ldrd r12, lr, [%[sha512], #8]\n\t" + "ldrd r4, r5, [%[sha512], #16]\n\t" + "ldrd r6, r7, [%[sha512], #24]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "and r4, r4, r12\n\t" + "and r5, r5, lr\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "ldrd r12, lr, [%[sha512], #32]\n\t" + "ldrd r6, r7, [sp, #88]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r4, r5, [r3, #88]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "ldrd r6, r7, [%[sha512]]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #32]\n\t" + "adds r6, r6, r12\n\t" + "adc r7, r7, lr\n\t" + "ldrd r12, lr, [%[sha512], #40]\n\t" + "strd r6, r7, [%[sha512]]\n\t" + "lsrs r4, r12, #28\n\t" + "lsrs r5, lr, #28\n\t" + "orr r5, r5, r12, lsl 4\n\t" + "orr r4, r4, lr, lsl 4\n\t" + "lsls r6, r12, #30\n\t" + "lsls r7, lr, #30\n\t" + "orr r7, r7, r12, lsr 2\n\t" + "orr r6, r6, lr, lsr 2\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #25\n\t" + "lsls r7, lr, #25\n\t" + "orr r7, r7, r12, lsr 7\n\t" + "orr r6, r6, lr, lsr 7\n\t" + "ldrd r12, lr, [%[sha512], #32]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r6, r7, [%[sha512], #40]\n\t" + "ldrd r4, r5, [%[sha512], #48]\n\t" + "strd r12, lr, [%[sha512], #32]\n\t" + "eor r6, r6, r4\n\t" + "eor r7, r7, r5\n\t" + "and r8, r8, r6\n\t" + "and r9, r9, r7\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "ldrd r4, r5, [%[sha512], #32]\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r4, r5, [%[sha512], #32]\n\t" + "mov r8, r6\n\t" + "mov r9, r7\n\t" + /* Round 12 */ + "ldrd r12, lr, [%[sha512]]\n\t" + "lsrs r4, r12, #14\n\t" + "lsrs r5, lr, #14\n\t" + "orr r5, r5, r12, lsl 18\n\t" + "orr r4, r4, lr, lsl 18\n\t" + "lsrs r6, r12, #18\n\t" + "lsrs r7, lr, #18\n\t" + "orr r7, r7, r12, lsl 14\n\t" + "orr r6, r6, lr, lsl 14\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #23\n\t" + "lsls r7, lr, #23\n\t" + "orr r7, r7, r12, lsr 9\n\t" + "orr r6, r6, lr, lsr 9\n\t" + "ldrd r12, lr, [%[sha512], #24]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #24]\n\t" + "ldrd r12, lr, [%[sha512]]\n\t" + "ldrd r4, r5, [%[sha512], #8]\n\t" + "ldrd r6, r7, [%[sha512], #16]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "and r4, r4, r12\n\t" + "and r5, r5, lr\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "ldrd r12, lr, [%[sha512], #24]\n\t" + "ldrd r6, r7, [sp, #96]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r4, r5, [r3, #96]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "ldrd r6, r7, [%[sha512], #56]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #24]\n\t" + "adds r6, r6, r12\n\t" + "adc r7, r7, lr\n\t" + "ldrd r12, lr, [%[sha512], #32]\n\t" + "strd r6, r7, [%[sha512], #56]\n\t" + "lsrs r4, r12, #28\n\t" + "lsrs r5, lr, #28\n\t" + "orr r5, r5, r12, lsl 4\n\t" + "orr r4, r4, lr, lsl 4\n\t" + "lsls r6, r12, #30\n\t" + "lsls r7, lr, #30\n\t" + "orr r7, r7, r12, lsr 2\n\t" + "orr r6, r6, lr, lsr 2\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #25\n\t" + "lsls r7, lr, #25\n\t" + "orr r7, r7, r12, lsr 7\n\t" + "orr r6, r6, lr, lsr 7\n\t" + "ldrd r12, lr, [%[sha512], #24]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r6, r7, [%[sha512], #32]\n\t" + "ldrd r4, r5, [%[sha512], #40]\n\t" + "strd r12, lr, [%[sha512], #24]\n\t" + "eor r6, r6, r4\n\t" + "eor r7, r7, r5\n\t" + "and r8, r8, r6\n\t" + "and r9, r9, r7\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "ldrd r4, r5, [%[sha512], #24]\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r4, r5, [%[sha512], #24]\n\t" + "mov r8, r6\n\t" + "mov r9, r7\n\t" + /* Round 13 */ + "ldrd r12, lr, [%[sha512], #56]\n\t" + "lsrs r4, r12, #14\n\t" + "lsrs r5, lr, #14\n\t" + "orr r5, r5, r12, lsl 18\n\t" + "orr r4, r4, lr, lsl 18\n\t" + "lsrs r6, r12, #18\n\t" + "lsrs r7, lr, #18\n\t" + "orr r7, r7, r12, lsl 14\n\t" + "orr r6, r6, lr, lsl 14\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #23\n\t" + "lsls r7, lr, #23\n\t" + "orr r7, r7, r12, lsr 9\n\t" + "orr r6, r6, lr, lsr 9\n\t" + "ldrd r12, lr, [%[sha512], #16]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #16]\n\t" + "ldrd r12, lr, [%[sha512], #56]\n\t" + "ldrd r4, r5, [%[sha512]]\n\t" + "ldrd r6, r7, [%[sha512], #8]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "and r4, r4, r12\n\t" + "and r5, r5, lr\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "ldrd r12, lr, [%[sha512], #16]\n\t" + "ldrd r6, r7, [sp, #104]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r4, r5, [r3, #104]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "ldrd r6, r7, [%[sha512], #48]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #16]\n\t" + "adds r6, r6, r12\n\t" + "adc r7, r7, lr\n\t" + "ldrd r12, lr, [%[sha512], #24]\n\t" + "strd r6, r7, [%[sha512], #48]\n\t" + "lsrs r4, r12, #28\n\t" + "lsrs r5, lr, #28\n\t" + "orr r5, r5, r12, lsl 4\n\t" + "orr r4, r4, lr, lsl 4\n\t" + "lsls r6, r12, #30\n\t" + "lsls r7, lr, #30\n\t" + "orr r7, r7, r12, lsr 2\n\t" + "orr r6, r6, lr, lsr 2\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #25\n\t" + "lsls r7, lr, #25\n\t" + "orr r7, r7, r12, lsr 7\n\t" + "orr r6, r6, lr, lsr 7\n\t" + "ldrd r12, lr, [%[sha512], #16]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r6, r7, [%[sha512], #24]\n\t" + "ldrd r4, r5, [%[sha512], #32]\n\t" + "strd r12, lr, [%[sha512], #16]\n\t" + "eor r6, r6, r4\n\t" + "eor r7, r7, r5\n\t" + "and r8, r8, r6\n\t" + "and r9, r9, r7\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "ldrd r4, r5, [%[sha512], #16]\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r4, r5, [%[sha512], #16]\n\t" + "mov r8, r6\n\t" + "mov r9, r7\n\t" + /* Round 14 */ + "ldrd r12, lr, [%[sha512], #48]\n\t" + "lsrs r4, r12, #14\n\t" + "lsrs r5, lr, #14\n\t" + "orr r5, r5, r12, lsl 18\n\t" + "orr r4, r4, lr, lsl 18\n\t" + "lsrs r6, r12, #18\n\t" + "lsrs r7, lr, #18\n\t" + "orr r7, r7, r12, lsl 14\n\t" + "orr r6, r6, lr, lsl 14\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #23\n\t" + "lsls r7, lr, #23\n\t" + "orr r7, r7, r12, lsr 9\n\t" + "orr r6, r6, lr, lsr 9\n\t" + "ldrd r12, lr, [%[sha512], #8]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #8]\n\t" + "ldrd r12, lr, [%[sha512], #48]\n\t" + "ldrd r4, r5, [%[sha512], #56]\n\t" + "ldrd r6, r7, [%[sha512]]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "and r4, r4, r12\n\t" + "and r5, r5, lr\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "ldrd r12, lr, [%[sha512], #8]\n\t" + "ldrd r6, r7, [sp, #112]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r4, r5, [r3, #112]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "ldrd r6, r7, [%[sha512], #40]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512], #8]\n\t" + "adds r6, r6, r12\n\t" + "adc r7, r7, lr\n\t" + "ldrd r12, lr, [%[sha512], #16]\n\t" + "strd r6, r7, [%[sha512], #40]\n\t" + "lsrs r4, r12, #28\n\t" + "lsrs r5, lr, #28\n\t" + "orr r5, r5, r12, lsl 4\n\t" + "orr r4, r4, lr, lsl 4\n\t" + "lsls r6, r12, #30\n\t" + "lsls r7, lr, #30\n\t" + "orr r7, r7, r12, lsr 2\n\t" + "orr r6, r6, lr, lsr 2\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #25\n\t" + "lsls r7, lr, #25\n\t" + "orr r7, r7, r12, lsr 7\n\t" + "orr r6, r6, lr, lsr 7\n\t" + "ldrd r12, lr, [%[sha512], #8]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r6, r7, [%[sha512], #16]\n\t" + "ldrd r4, r5, [%[sha512], #24]\n\t" + "strd r12, lr, [%[sha512], #8]\n\t" + "eor r6, r6, r4\n\t" + "eor r7, r7, r5\n\t" + "and r8, r8, r6\n\t" + "and r9, r9, r7\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "ldrd r4, r5, [%[sha512], #8]\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r4, r5, [%[sha512], #8]\n\t" + "mov r8, r6\n\t" + "mov r9, r7\n\t" + /* Round 15 */ + "ldrd r12, lr, [%[sha512], #40]\n\t" + "lsrs r4, r12, #14\n\t" + "lsrs r5, lr, #14\n\t" + "orr r5, r5, r12, lsl 18\n\t" + "orr r4, r4, lr, lsl 18\n\t" + "lsrs r6, r12, #18\n\t" + "lsrs r7, lr, #18\n\t" + "orr r7, r7, r12, lsl 14\n\t" + "orr r6, r6, lr, lsl 14\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #23\n\t" + "lsls r7, lr, #23\n\t" + "orr r7, r7, r12, lsr 9\n\t" + "orr r6, r6, lr, lsr 9\n\t" + "ldrd r12, lr, [%[sha512]]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512]]\n\t" + "ldrd r12, lr, [%[sha512], #40]\n\t" + "ldrd r4, r5, [%[sha512], #48]\n\t" + "ldrd r6, r7, [%[sha512], #56]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "and r4, r4, r12\n\t" + "and r5, r5, lr\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "ldrd r12, lr, [%[sha512]]\n\t" + "ldrd r6, r7, [sp, #120]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r4, r5, [r3, #120]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "ldrd r6, r7, [%[sha512], #32]\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "strd r12, lr, [%[sha512]]\n\t" + "adds r6, r6, r12\n\t" + "adc r7, r7, lr\n\t" + "ldrd r12, lr, [%[sha512], #8]\n\t" + "strd r6, r7, [%[sha512], #32]\n\t" + "lsrs r4, r12, #28\n\t" + "lsrs r5, lr, #28\n\t" + "orr r5, r5, r12, lsl 4\n\t" + "orr r4, r4, lr, lsl 4\n\t" + "lsls r6, r12, #30\n\t" + "lsls r7, lr, #30\n\t" + "orr r7, r7, r12, lsr 2\n\t" + "orr r6, r6, lr, lsr 2\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "lsls r6, r12, #25\n\t" + "lsls r7, lr, #25\n\t" + "orr r7, r7, r12, lsr 7\n\t" + "orr r6, r6, lr, lsr 7\n\t" + "ldrd r12, lr, [%[sha512]]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, r5\n\t" + "ldrd r6, r7, [%[sha512], #8]\n\t" + "ldrd r4, r5, [%[sha512], #16]\n\t" + "strd r12, lr, [%[sha512]]\n\t" + "eor r6, r6, r4\n\t" + "eor r7, r7, r5\n\t" + "and r8, r8, r6\n\t" + "and r9, r9, r7\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "ldrd r4, r5, [%[sha512]]\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r4, r5, [%[sha512]]\n\t" + "mov r8, r6\n\t" + "mov r9, r7\n\t" + /* Add in digest from start */ + "ldrd r12, lr, [%[sha512]]\n\t" + "ldrd r4, r5, [%[sha512], #8]\n\t" + "ldrd r6, r7, [sp, #128]\n\t" + "ldrd r8, r9, [sp, #136]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r12, lr, [%[sha512]]\n\t" + "strd r4, r5, [%[sha512], #8]\n\t" + "strd r12, lr, [sp, #128]\n\t" + "strd r4, r5, [sp, #136]\n\t" + "ldrd r12, lr, [%[sha512], #16]\n\t" + "ldrd r4, r5, [%[sha512], #24]\n\t" + "ldrd r6, r7, [sp, #144]\n\t" + "ldrd r8, r9, [sp, #152]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r12, lr, [%[sha512], #16]\n\t" + "strd r4, r5, [%[sha512], #24]\n\t" + "strd r12, lr, [sp, #144]\n\t" + "strd r4, r5, [sp, #152]\n\t" + "ldrd r12, lr, [%[sha512], #32]\n\t" + "ldrd r4, r5, [%[sha512], #40]\n\t" + "ldrd r6, r7, [sp, #160]\n\t" + "ldrd r8, r9, [sp, #168]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r12, lr, [%[sha512], #32]\n\t" + "strd r4, r5, [%[sha512], #40]\n\t" + "strd r12, lr, [sp, #160]\n\t" + "strd r4, r5, [sp, #168]\n\t" + "ldrd r12, lr, [%[sha512], #48]\n\t" + "ldrd r4, r5, [%[sha512], #56]\n\t" + "ldrd r6, r7, [sp, #176]\n\t" + "ldrd r8, r9, [sp, #184]\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, r7\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "strd r12, lr, [%[sha512], #48]\n\t" + "strd r4, r5, [%[sha512], #56]\n\t" + "strd r12, lr, [sp, #176]\n\t" + "strd r4, r5, [sp, #184]\n\t" + "subs %[len], %[len], #0x80\n\t" + "sub r3, r3, #0x200\n\t" + "add %[data], %[data], #0x80\n\t" + "bne L_sha512_len_neon_begin_%=\n\t" + "eor r0, r0, r0\n\t" + "add sp, sp, #0xc0\n\t" + : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len) + : [L_SHA512_transform_len_k] "r" (L_SHA512_transform_len_k) + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); +} + +#endif /* WOLFSSL_ARMASM_NO_NEON */ +#include + +#ifndef WOLFSSL_ARMASM_NO_NEON +static const uint64_t L_SHA512_transform_neon_len_k[] = { + 0x428a2f98d728ae22UL, + 0x7137449123ef65cdUL, + 0xb5c0fbcfec4d3b2fUL, + 0xe9b5dba58189dbbcUL, + 0x3956c25bf348b538UL, + 0x59f111f1b605d019UL, + 0x923f82a4af194f9bUL, + 0xab1c5ed5da6d8118UL, + 0xd807aa98a3030242UL, + 0x12835b0145706fbeUL, + 0x243185be4ee4b28cUL, + 0x550c7dc3d5ffb4e2UL, + 0x72be5d74f27b896fUL, + 0x80deb1fe3b1696b1UL, + 0x9bdc06a725c71235UL, + 0xc19bf174cf692694UL, + 0xe49b69c19ef14ad2UL, + 0xefbe4786384f25e3UL, + 0xfc19dc68b8cd5b5UL, + 0x240ca1cc77ac9c65UL, + 0x2de92c6f592b0275UL, + 0x4a7484aa6ea6e483UL, + 0x5cb0a9dcbd41fbd4UL, + 0x76f988da831153b5UL, + 0x983e5152ee66dfabUL, + 0xa831c66d2db43210UL, + 0xb00327c898fb213fUL, + 0xbf597fc7beef0ee4UL, + 0xc6e00bf33da88fc2UL, + 0xd5a79147930aa725UL, + 0x6ca6351e003826fUL, + 0x142929670a0e6e70UL, + 0x27b70a8546d22ffcUL, + 0x2e1b21385c26c926UL, + 0x4d2c6dfc5ac42aedUL, + 0x53380d139d95b3dfUL, + 0x650a73548baf63deUL, + 0x766a0abb3c77b2a8UL, + 0x81c2c92e47edaee6UL, + 0x92722c851482353bUL, + 0xa2bfe8a14cf10364UL, + 0xa81a664bbc423001UL, + 0xc24b8b70d0f89791UL, + 0xc76c51a30654be30UL, + 0xd192e819d6ef5218UL, + 0xd69906245565a910UL, + 0xf40e35855771202aUL, + 0x106aa07032bbd1b8UL, + 0x19a4c116b8d2d0c8UL, + 0x1e376c085141ab53UL, + 0x2748774cdf8eeb99UL, + 0x34b0bcb5e19b48a8UL, + 0x391c0cb3c5c95a63UL, + 0x4ed8aa4ae3418acbUL, + 0x5b9cca4f7763e373UL, + 0x682e6ff3d6b2b8a3UL, + 0x748f82ee5defb2fcUL, + 0x78a5636f43172f60UL, + 0x84c87814a1f0ab72UL, + 0x8cc702081a6439ecUL, + 0x90befffa23631e28UL, + 0xa4506cebde82bde9UL, + 0xbef9a3f7b2c67915UL, + 0xc67178f2e372532bUL, + 0xca273eceea26619cUL, + 0xd186b8c721c0c207UL, + 0xeada7dd6cde0eb1eUL, + 0xf57d4f7fee6ed178UL, + 0x6f067aa72176fbaUL, + 0xa637dc5a2c898a6UL, + 0x113f9804bef90daeUL, + 0x1b710b35131c471bUL, + 0x28db77f523047d84UL, + 0x32caab7b40c72493UL, + 0x3c9ebe0a15c9bebcUL, + 0x431d67c49c100d4cUL, + 0x4cc5d4becb3e42b6UL, + 0x597f299cfc657e2aUL, + 0x5fcb6fab3ad6faecUL, + 0x6c44198c4a475817UL, +}; + +void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len) +{ + __asm__ __volatile__ ( + /* Load digest into working vars */ + "vldm.64 %[sha512], {d0-d7}\n\t" + /* Start of loop processing a block */ + "\n" + "L_sha512_len_neon_begin_%=: \n\t" + /* Load W */ + "vldm.64 %[data]!, {d16-d31}\n\t" + "vrev64.8 q8, q8\n\t" + "vrev64.8 q9, q9\n\t" + "vrev64.8 q10, q10\n\t" + "vrev64.8 q11, q11\n\t" + "vrev64.8 q12, q12\n\t" + "vrev64.8 q13, q13\n\t" + "vrev64.8 q14, q14\n\t" + "vrev64.8 q15, q15\n\t" + "mov r3, %[L_SHA512_transform_neon_len_k]\n\t" + "mov r12, #4\n\t" + /* Start of 16 rounds */ + "\n" + "L_sha512_len_neon_start_%=: \n\t" + /* Round 0 */ + "vld1.64 {d12}, [r3]!\n\t" + "vshl.u64 d8, d4, #50\n\t" + "vsri.u64 d8, d4, #14\n\t" + "vshl.u64 d9, d0, #36\n\t" + "vsri.u64 d9, d0, #28\n\t" + "vshl.u64 d10, d4, #46\n\t" + "vsri.u64 d10, d4, #18\n\t" + "vshl.u64 d11, d0, #30\n\t" + "vsri.u64 d11, d0, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d4, #23\n\t" + "vsri.u64 d10, d4, #41\n\t" + "vshl.u64 d11, d0, #25\n\t" + "vsri.u64 d11, d0, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d7, d8\n\t" + "vadd.i64 d12, d16\n\t" + "vmov d8, d4\n\t" + "veor d10, d1, d2\n\t" + "vadd.i64 d7, d12\n\t" + "vbsl d8, d5, d6\n\t" + "vbsl d10, d0, d2\n\t" + "vadd.i64 d7, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d3, d7\n\t" + "vadd.i64 d7, d10\n\t" + /* Round 1 */ + "vld1.64 {d12}, [r3]!\n\t" + "vshl.u64 d8, d3, #50\n\t" + "vsri.u64 d8, d3, #14\n\t" + "vshl.u64 d9, d7, #36\n\t" + "vsri.u64 d9, d7, #28\n\t" + "vshl.u64 d10, d3, #46\n\t" + "vsri.u64 d10, d3, #18\n\t" + "vshl.u64 d11, d7, #30\n\t" + "vsri.u64 d11, d7, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d3, #23\n\t" + "vsri.u64 d10, d3, #41\n\t" + "vshl.u64 d11, d7, #25\n\t" + "vsri.u64 d11, d7, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d6, d8\n\t" + "vadd.i64 d12, d17\n\t" + "vmov d8, d3\n\t" + "veor d10, d0, d1\n\t" + "vadd.i64 d6, d12\n\t" + "vbsl d8, d4, d5\n\t" + "vbsl d10, d7, d1\n\t" + "vadd.i64 d6, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d2, d6\n\t" + "vadd.i64 d6, d10\n\t" + /* Calc new W[0]-W[1] */ + "vext.8 q6, q8, q9, #8\n\t" + "vshl.u64 q4, q15, #45\n\t" + "vsri.u64 q4, q15, #19\n\t" + "vshl.u64 q5, q15, #3\n\t" + "vsri.u64 q5, q15, #61\n\t" + "veor q5, q4\n\t" + "vshr.u64 q4, q15, #6\n\t" + "veor q5, q4\n\t" + "vadd.i64 q8, q5\n\t" + "vext.8 q7, q12, q13, #8\n\t" + "vadd.i64 q8, q7\n\t" + "vshl.u64 q4, q6, #63\n\t" + "vsri.u64 q4, q6, #1\n\t" + "vshl.u64 q5, q6, #56\n\t" + "vsri.u64 q5, q6, #8\n\t" + "veor q5, q4\n\t" + "vshr.u64 q6, #7\n\t" + "veor q5, q6\n\t" + "vadd.i64 q8, q5\n\t" + /* Round 2 */ + "vld1.64 {d12}, [r3]!\n\t" + "vshl.u64 d8, d2, #50\n\t" + "vsri.u64 d8, d2, #14\n\t" + "vshl.u64 d9, d6, #36\n\t" + "vsri.u64 d9, d6, #28\n\t" + "vshl.u64 d10, d2, #46\n\t" + "vsri.u64 d10, d2, #18\n\t" + "vshl.u64 d11, d6, #30\n\t" + "vsri.u64 d11, d6, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d2, #23\n\t" + "vsri.u64 d10, d2, #41\n\t" + "vshl.u64 d11, d6, #25\n\t" + "vsri.u64 d11, d6, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d5, d8\n\t" + "vadd.i64 d12, d18\n\t" + "vmov d8, d2\n\t" + "veor d10, d7, d0\n\t" + "vadd.i64 d5, d12\n\t" + "vbsl d8, d3, d4\n\t" + "vbsl d10, d6, d0\n\t" + "vadd.i64 d5, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d1, d5\n\t" + "vadd.i64 d5, d10\n\t" + /* Round 3 */ + "vld1.64 {d12}, [r3]!\n\t" + "vshl.u64 d8, d1, #50\n\t" + "vsri.u64 d8, d1, #14\n\t" + "vshl.u64 d9, d5, #36\n\t" + "vsri.u64 d9, d5, #28\n\t" + "vshl.u64 d10, d1, #46\n\t" + "vsri.u64 d10, d1, #18\n\t" + "vshl.u64 d11, d5, #30\n\t" + "vsri.u64 d11, d5, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d1, #23\n\t" + "vsri.u64 d10, d1, #41\n\t" + "vshl.u64 d11, d5, #25\n\t" + "vsri.u64 d11, d5, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d4, d8\n\t" + "vadd.i64 d12, d19\n\t" + "vmov d8, d1\n\t" + "veor d10, d6, d7\n\t" + "vadd.i64 d4, d12\n\t" + "vbsl d8, d2, d3\n\t" + "vbsl d10, d5, d7\n\t" + "vadd.i64 d4, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d0, d4\n\t" + "vadd.i64 d4, d10\n\t" + /* Calc new W[2]-W[3] */ + "vext.8 q6, q9, q10, #8\n\t" + "vshl.u64 q4, q8, #45\n\t" + "vsri.u64 q4, q8, #19\n\t" + "vshl.u64 q5, q8, #3\n\t" + "vsri.u64 q5, q8, #61\n\t" + "veor q5, q4\n\t" + "vshr.u64 q4, q8, #6\n\t" + "veor q5, q4\n\t" + "vadd.i64 q9, q5\n\t" + "vext.8 q7, q13, q14, #8\n\t" + "vadd.i64 q9, q7\n\t" + "vshl.u64 q4, q6, #63\n\t" + "vsri.u64 q4, q6, #1\n\t" + "vshl.u64 q5, q6, #56\n\t" + "vsri.u64 q5, q6, #8\n\t" + "veor q5, q4\n\t" + "vshr.u64 q6, #7\n\t" + "veor q5, q6\n\t" + "vadd.i64 q9, q5\n\t" + /* Round 4 */ + "vld1.64 {d12}, [r3]!\n\t" + "vshl.u64 d8, d0, #50\n\t" + "vsri.u64 d8, d0, #14\n\t" + "vshl.u64 d9, d4, #36\n\t" + "vsri.u64 d9, d4, #28\n\t" + "vshl.u64 d10, d0, #46\n\t" + "vsri.u64 d10, d0, #18\n\t" + "vshl.u64 d11, d4, #30\n\t" + "vsri.u64 d11, d4, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d0, #23\n\t" + "vsri.u64 d10, d0, #41\n\t" + "vshl.u64 d11, d4, #25\n\t" + "vsri.u64 d11, d4, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d3, d8\n\t" + "vadd.i64 d12, d20\n\t" + "vmov d8, d0\n\t" + "veor d10, d5, d6\n\t" + "vadd.i64 d3, d12\n\t" + "vbsl d8, d1, d2\n\t" + "vbsl d10, d4, d6\n\t" + "vadd.i64 d3, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d7, d3\n\t" + "vadd.i64 d3, d10\n\t" + /* Round 5 */ + "vld1.64 {d12}, [r3]!\n\t" + "vshl.u64 d8, d7, #50\n\t" + "vsri.u64 d8, d7, #14\n\t" + "vshl.u64 d9, d3, #36\n\t" + "vsri.u64 d9, d3, #28\n\t" + "vshl.u64 d10, d7, #46\n\t" + "vsri.u64 d10, d7, #18\n\t" + "vshl.u64 d11, d3, #30\n\t" + "vsri.u64 d11, d3, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d7, #23\n\t" + "vsri.u64 d10, d7, #41\n\t" + "vshl.u64 d11, d3, #25\n\t" + "vsri.u64 d11, d3, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d2, d8\n\t" + "vadd.i64 d12, d21\n\t" + "vmov d8, d7\n\t" + "veor d10, d4, d5\n\t" + "vadd.i64 d2, d12\n\t" + "vbsl d8, d0, d1\n\t" + "vbsl d10, d3, d5\n\t" + "vadd.i64 d2, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d6, d2\n\t" + "vadd.i64 d2, d10\n\t" + /* Calc new W[4]-W[5] */ + "vext.8 q6, q10, q11, #8\n\t" + "vshl.u64 q4, q9, #45\n\t" + "vsri.u64 q4, q9, #19\n\t" + "vshl.u64 q5, q9, #3\n\t" + "vsri.u64 q5, q9, #61\n\t" + "veor q5, q4\n\t" + "vshr.u64 q4, q9, #6\n\t" + "veor q5, q4\n\t" + "vadd.i64 q10, q5\n\t" + "vext.8 q7, q14, q15, #8\n\t" + "vadd.i64 q10, q7\n\t" + "vshl.u64 q4, q6, #63\n\t" + "vsri.u64 q4, q6, #1\n\t" + "vshl.u64 q5, q6, #56\n\t" + "vsri.u64 q5, q6, #8\n\t" + "veor q5, q4\n\t" + "vshr.u64 q6, #7\n\t" + "veor q5, q6\n\t" + "vadd.i64 q10, q5\n\t" + /* Round 6 */ + "vld1.64 {d12}, [r3]!\n\t" + "vshl.u64 d8, d6, #50\n\t" + "vsri.u64 d8, d6, #14\n\t" + "vshl.u64 d9, d2, #36\n\t" + "vsri.u64 d9, d2, #28\n\t" + "vshl.u64 d10, d6, #46\n\t" + "vsri.u64 d10, d6, #18\n\t" + "vshl.u64 d11, d2, #30\n\t" + "vsri.u64 d11, d2, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d6, #23\n\t" + "vsri.u64 d10, d6, #41\n\t" + "vshl.u64 d11, d2, #25\n\t" + "vsri.u64 d11, d2, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d1, d8\n\t" + "vadd.i64 d12, d22\n\t" + "vmov d8, d6\n\t" + "veor d10, d3, d4\n\t" + "vadd.i64 d1, d12\n\t" + "vbsl d8, d7, d0\n\t" + "vbsl d10, d2, d4\n\t" + "vadd.i64 d1, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d5, d1\n\t" + "vadd.i64 d1, d10\n\t" + /* Round 7 */ + "vld1.64 {d12}, [r3]!\n\t" + "vshl.u64 d8, d5, #50\n\t" + "vsri.u64 d8, d5, #14\n\t" + "vshl.u64 d9, d1, #36\n\t" + "vsri.u64 d9, d1, #28\n\t" + "vshl.u64 d10, d5, #46\n\t" + "vsri.u64 d10, d5, #18\n\t" + "vshl.u64 d11, d1, #30\n\t" + "vsri.u64 d11, d1, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d5, #23\n\t" + "vsri.u64 d10, d5, #41\n\t" + "vshl.u64 d11, d1, #25\n\t" + "vsri.u64 d11, d1, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d0, d8\n\t" + "vadd.i64 d12, d23\n\t" + "vmov d8, d5\n\t" + "veor d10, d2, d3\n\t" + "vadd.i64 d0, d12\n\t" + "vbsl d8, d6, d7\n\t" + "vbsl d10, d1, d3\n\t" + "vadd.i64 d0, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d4, d0\n\t" + "vadd.i64 d0, d10\n\t" + /* Calc new W[6]-W[7] */ + "vext.8 q6, q11, q12, #8\n\t" + "vshl.u64 q4, q10, #45\n\t" + "vsri.u64 q4, q10, #19\n\t" + "vshl.u64 q5, q10, #3\n\t" + "vsri.u64 q5, q10, #61\n\t" + "veor q5, q4\n\t" + "vshr.u64 q4, q10, #6\n\t" + "veor q5, q4\n\t" + "vadd.i64 q11, q5\n\t" + "vext.8 q7, q15, q8, #8\n\t" + "vadd.i64 q11, q7\n\t" + "vshl.u64 q4, q6, #63\n\t" + "vsri.u64 q4, q6, #1\n\t" + "vshl.u64 q5, q6, #56\n\t" + "vsri.u64 q5, q6, #8\n\t" + "veor q5, q4\n\t" + "vshr.u64 q6, #7\n\t" + "veor q5, q6\n\t" + "vadd.i64 q11, q5\n\t" + /* Round 8 */ + "vld1.64 {d12}, [r3]!\n\t" + "vshl.u64 d8, d4, #50\n\t" + "vsri.u64 d8, d4, #14\n\t" + "vshl.u64 d9, d0, #36\n\t" + "vsri.u64 d9, d0, #28\n\t" + "vshl.u64 d10, d4, #46\n\t" + "vsri.u64 d10, d4, #18\n\t" + "vshl.u64 d11, d0, #30\n\t" + "vsri.u64 d11, d0, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d4, #23\n\t" + "vsri.u64 d10, d4, #41\n\t" + "vshl.u64 d11, d0, #25\n\t" + "vsri.u64 d11, d0, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d7, d8\n\t" + "vadd.i64 d12, d24\n\t" + "vmov d8, d4\n\t" + "veor d10, d1, d2\n\t" + "vadd.i64 d7, d12\n\t" + "vbsl d8, d5, d6\n\t" + "vbsl d10, d0, d2\n\t" + "vadd.i64 d7, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d3, d7\n\t" + "vadd.i64 d7, d10\n\t" + /* Round 9 */ + "vld1.64 {d12}, [r3]!\n\t" + "vshl.u64 d8, d3, #50\n\t" + "vsri.u64 d8, d3, #14\n\t" + "vshl.u64 d9, d7, #36\n\t" + "vsri.u64 d9, d7, #28\n\t" + "vshl.u64 d10, d3, #46\n\t" + "vsri.u64 d10, d3, #18\n\t" + "vshl.u64 d11, d7, #30\n\t" + "vsri.u64 d11, d7, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d3, #23\n\t" + "vsri.u64 d10, d3, #41\n\t" + "vshl.u64 d11, d7, #25\n\t" + "vsri.u64 d11, d7, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d6, d8\n\t" + "vadd.i64 d12, d25\n\t" + "vmov d8, d3\n\t" + "veor d10, d0, d1\n\t" + "vadd.i64 d6, d12\n\t" + "vbsl d8, d4, d5\n\t" + "vbsl d10, d7, d1\n\t" + "vadd.i64 d6, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d2, d6\n\t" + "vadd.i64 d6, d10\n\t" + /* Calc new W[8]-W[9] */ + "vext.8 q6, q12, q13, #8\n\t" + "vshl.u64 q4, q11, #45\n\t" + "vsri.u64 q4, q11, #19\n\t" + "vshl.u64 q5, q11, #3\n\t" + "vsri.u64 q5, q11, #61\n\t" + "veor q5, q4\n\t" + "vshr.u64 q4, q11, #6\n\t" + "veor q5, q4\n\t" + "vadd.i64 q12, q5\n\t" + "vext.8 q7, q8, q9, #8\n\t" + "vadd.i64 q12, q7\n\t" + "vshl.u64 q4, q6, #63\n\t" + "vsri.u64 q4, q6, #1\n\t" + "vshl.u64 q5, q6, #56\n\t" + "vsri.u64 q5, q6, #8\n\t" + "veor q5, q4\n\t" + "vshr.u64 q6, #7\n\t" + "veor q5, q6\n\t" + "vadd.i64 q12, q5\n\t" + /* Round 10 */ + "vld1.64 {d12}, [r3]!\n\t" + "vshl.u64 d8, d2, #50\n\t" + "vsri.u64 d8, d2, #14\n\t" + "vshl.u64 d9, d6, #36\n\t" + "vsri.u64 d9, d6, #28\n\t" + "vshl.u64 d10, d2, #46\n\t" + "vsri.u64 d10, d2, #18\n\t" + "vshl.u64 d11, d6, #30\n\t" + "vsri.u64 d11, d6, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d2, #23\n\t" + "vsri.u64 d10, d2, #41\n\t" + "vshl.u64 d11, d6, #25\n\t" + "vsri.u64 d11, d6, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d5, d8\n\t" + "vadd.i64 d12, d26\n\t" + "vmov d8, d2\n\t" + "veor d10, d7, d0\n\t" + "vadd.i64 d5, d12\n\t" + "vbsl d8, d3, d4\n\t" + "vbsl d10, d6, d0\n\t" + "vadd.i64 d5, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d1, d5\n\t" + "vadd.i64 d5, d10\n\t" + /* Round 11 */ + "vld1.64 {d12}, [r3]!\n\t" + "vshl.u64 d8, d1, #50\n\t" + "vsri.u64 d8, d1, #14\n\t" + "vshl.u64 d9, d5, #36\n\t" + "vsri.u64 d9, d5, #28\n\t" + "vshl.u64 d10, d1, #46\n\t" + "vsri.u64 d10, d1, #18\n\t" + "vshl.u64 d11, d5, #30\n\t" + "vsri.u64 d11, d5, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d1, #23\n\t" + "vsri.u64 d10, d1, #41\n\t" + "vshl.u64 d11, d5, #25\n\t" + "vsri.u64 d11, d5, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d4, d8\n\t" + "vadd.i64 d12, d27\n\t" + "vmov d8, d1\n\t" + "veor d10, d6, d7\n\t" + "vadd.i64 d4, d12\n\t" + "vbsl d8, d2, d3\n\t" + "vbsl d10, d5, d7\n\t" + "vadd.i64 d4, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d0, d4\n\t" + "vadd.i64 d4, d10\n\t" + /* Calc new W[10]-W[11] */ + "vext.8 q6, q13, q14, #8\n\t" + "vshl.u64 q4, q12, #45\n\t" + "vsri.u64 q4, q12, #19\n\t" + "vshl.u64 q5, q12, #3\n\t" + "vsri.u64 q5, q12, #61\n\t" + "veor q5, q4\n\t" + "vshr.u64 q4, q12, #6\n\t" + "veor q5, q4\n\t" + "vadd.i64 q13, q5\n\t" + "vext.8 q7, q9, q10, #8\n\t" + "vadd.i64 q13, q7\n\t" + "vshl.u64 q4, q6, #63\n\t" + "vsri.u64 q4, q6, #1\n\t" + "vshl.u64 q5, q6, #56\n\t" + "vsri.u64 q5, q6, #8\n\t" + "veor q5, q4\n\t" + "vshr.u64 q6, #7\n\t" + "veor q5, q6\n\t" + "vadd.i64 q13, q5\n\t" + /* Round 12 */ + "vld1.64 {d12}, [r3]!\n\t" + "vshl.u64 d8, d0, #50\n\t" + "vsri.u64 d8, d0, #14\n\t" + "vshl.u64 d9, d4, #36\n\t" + "vsri.u64 d9, d4, #28\n\t" + "vshl.u64 d10, d0, #46\n\t" + "vsri.u64 d10, d0, #18\n\t" + "vshl.u64 d11, d4, #30\n\t" + "vsri.u64 d11, d4, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d0, #23\n\t" + "vsri.u64 d10, d0, #41\n\t" + "vshl.u64 d11, d4, #25\n\t" + "vsri.u64 d11, d4, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d3, d8\n\t" + "vadd.i64 d12, d28\n\t" + "vmov d8, d0\n\t" + "veor d10, d5, d6\n\t" + "vadd.i64 d3, d12\n\t" + "vbsl d8, d1, d2\n\t" + "vbsl d10, d4, d6\n\t" + "vadd.i64 d3, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d7, d3\n\t" + "vadd.i64 d3, d10\n\t" + /* Round 13 */ + "vld1.64 {d12}, [r3]!\n\t" + "vshl.u64 d8, d7, #50\n\t" + "vsri.u64 d8, d7, #14\n\t" + "vshl.u64 d9, d3, #36\n\t" + "vsri.u64 d9, d3, #28\n\t" + "vshl.u64 d10, d7, #46\n\t" + "vsri.u64 d10, d7, #18\n\t" + "vshl.u64 d11, d3, #30\n\t" + "vsri.u64 d11, d3, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d7, #23\n\t" + "vsri.u64 d10, d7, #41\n\t" + "vshl.u64 d11, d3, #25\n\t" + "vsri.u64 d11, d3, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d2, d8\n\t" + "vadd.i64 d12, d29\n\t" + "vmov d8, d7\n\t" + "veor d10, d4, d5\n\t" + "vadd.i64 d2, d12\n\t" + "vbsl d8, d0, d1\n\t" + "vbsl d10, d3, d5\n\t" + "vadd.i64 d2, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d6, d2\n\t" + "vadd.i64 d2, d10\n\t" + /* Calc new W[12]-W[13] */ + "vext.8 q6, q14, q15, #8\n\t" + "vshl.u64 q4, q13, #45\n\t" + "vsri.u64 q4, q13, #19\n\t" + "vshl.u64 q5, q13, #3\n\t" + "vsri.u64 q5, q13, #61\n\t" + "veor q5, q4\n\t" + "vshr.u64 q4, q13, #6\n\t" + "veor q5, q4\n\t" + "vadd.i64 q14, q5\n\t" + "vext.8 q7, q10, q11, #8\n\t" + "vadd.i64 q14, q7\n\t" + "vshl.u64 q4, q6, #63\n\t" + "vsri.u64 q4, q6, #1\n\t" + "vshl.u64 q5, q6, #56\n\t" + "vsri.u64 q5, q6, #8\n\t" + "veor q5, q4\n\t" + "vshr.u64 q6, #7\n\t" + "veor q5, q6\n\t" + "vadd.i64 q14, q5\n\t" + /* Round 14 */ + "vld1.64 {d12}, [r3]!\n\t" + "vshl.u64 d8, d6, #50\n\t" + "vsri.u64 d8, d6, #14\n\t" + "vshl.u64 d9, d2, #36\n\t" + "vsri.u64 d9, d2, #28\n\t" + "vshl.u64 d10, d6, #46\n\t" + "vsri.u64 d10, d6, #18\n\t" + "vshl.u64 d11, d2, #30\n\t" + "vsri.u64 d11, d2, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d6, #23\n\t" + "vsri.u64 d10, d6, #41\n\t" + "vshl.u64 d11, d2, #25\n\t" + "vsri.u64 d11, d2, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d1, d8\n\t" + "vadd.i64 d12, d30\n\t" + "vmov d8, d6\n\t" + "veor d10, d3, d4\n\t" + "vadd.i64 d1, d12\n\t" + "vbsl d8, d7, d0\n\t" + "vbsl d10, d2, d4\n\t" + "vadd.i64 d1, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d5, d1\n\t" + "vadd.i64 d1, d10\n\t" + /* Round 15 */ + "vld1.64 {d12}, [r3]!\n\t" + "vshl.u64 d8, d5, #50\n\t" + "vsri.u64 d8, d5, #14\n\t" + "vshl.u64 d9, d1, #36\n\t" + "vsri.u64 d9, d1, #28\n\t" + "vshl.u64 d10, d5, #46\n\t" + "vsri.u64 d10, d5, #18\n\t" + "vshl.u64 d11, d1, #30\n\t" + "vsri.u64 d11, d1, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d5, #23\n\t" + "vsri.u64 d10, d5, #41\n\t" + "vshl.u64 d11, d1, #25\n\t" + "vsri.u64 d11, d1, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d0, d8\n\t" + "vadd.i64 d12, d31\n\t" + "vmov d8, d5\n\t" + "veor d10, d2, d3\n\t" + "vadd.i64 d0, d12\n\t" + "vbsl d8, d6, d7\n\t" + "vbsl d10, d1, d3\n\t" + "vadd.i64 d0, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d4, d0\n\t" + "vadd.i64 d0, d10\n\t" + /* Calc new W[14]-W[15] */ + "vext.8 q6, q15, q8, #8\n\t" + "vshl.u64 q4, q14, #45\n\t" + "vsri.u64 q4, q14, #19\n\t" + "vshl.u64 q5, q14, #3\n\t" + "vsri.u64 q5, q14, #61\n\t" + "veor q5, q4\n\t" + "vshr.u64 q4, q14, #6\n\t" + "veor q5, q4\n\t" + "vadd.i64 q15, q5\n\t" + "vext.8 q7, q11, q12, #8\n\t" + "vadd.i64 q15, q7\n\t" + "vshl.u64 q4, q6, #63\n\t" + "vsri.u64 q4, q6, #1\n\t" + "vshl.u64 q5, q6, #56\n\t" + "vsri.u64 q5, q6, #8\n\t" + "veor q5, q4\n\t" + "vshr.u64 q6, #7\n\t" + "veor q5, q6\n\t" + "vadd.i64 q15, q5\n\t" + "subs r12, r12, #1\n\t" + "bne L_sha512_len_neon_start_%=\n\t" + /* Round 0 */ + "vld1.64 {d12}, [r3]!\n\t" + "vshl.u64 d8, d4, #50\n\t" + "vsri.u64 d8, d4, #14\n\t" + "vshl.u64 d9, d0, #36\n\t" + "vsri.u64 d9, d0, #28\n\t" + "vshl.u64 d10, d4, #46\n\t" + "vsri.u64 d10, d4, #18\n\t" + "vshl.u64 d11, d0, #30\n\t" + "vsri.u64 d11, d0, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d4, #23\n\t" + "vsri.u64 d10, d4, #41\n\t" + "vshl.u64 d11, d0, #25\n\t" + "vsri.u64 d11, d0, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d7, d8\n\t" + "vadd.i64 d12, d16\n\t" + "vmov d8, d4\n\t" + "veor d10, d1, d2\n\t" + "vadd.i64 d7, d12\n\t" + "vbsl d8, d5, d6\n\t" + "vbsl d10, d0, d2\n\t" + "vadd.i64 d7, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d3, d7\n\t" + "vadd.i64 d7, d10\n\t" + /* Round 1 */ + "vld1.64 {d12}, [r3]!\n\t" + "vshl.u64 d8, d3, #50\n\t" + "vsri.u64 d8, d3, #14\n\t" + "vshl.u64 d9, d7, #36\n\t" + "vsri.u64 d9, d7, #28\n\t" + "vshl.u64 d10, d3, #46\n\t" + "vsri.u64 d10, d3, #18\n\t" + "vshl.u64 d11, d7, #30\n\t" + "vsri.u64 d11, d7, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d3, #23\n\t" + "vsri.u64 d10, d3, #41\n\t" + "vshl.u64 d11, d7, #25\n\t" + "vsri.u64 d11, d7, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d6, d8\n\t" + "vadd.i64 d12, d17\n\t" + "vmov d8, d3\n\t" + "veor d10, d0, d1\n\t" + "vadd.i64 d6, d12\n\t" + "vbsl d8, d4, d5\n\t" + "vbsl d10, d7, d1\n\t" + "vadd.i64 d6, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d2, d6\n\t" + "vadd.i64 d6, d10\n\t" + /* Round 2 */ + "vld1.64 {d12}, [r3]!\n\t" + "vshl.u64 d8, d2, #50\n\t" + "vsri.u64 d8, d2, #14\n\t" + "vshl.u64 d9, d6, #36\n\t" + "vsri.u64 d9, d6, #28\n\t" + "vshl.u64 d10, d2, #46\n\t" + "vsri.u64 d10, d2, #18\n\t" + "vshl.u64 d11, d6, #30\n\t" + "vsri.u64 d11, d6, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d2, #23\n\t" + "vsri.u64 d10, d2, #41\n\t" + "vshl.u64 d11, d6, #25\n\t" + "vsri.u64 d11, d6, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d5, d8\n\t" + "vadd.i64 d12, d18\n\t" + "vmov d8, d2\n\t" + "veor d10, d7, d0\n\t" + "vadd.i64 d5, d12\n\t" + "vbsl d8, d3, d4\n\t" + "vbsl d10, d6, d0\n\t" + "vadd.i64 d5, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d1, d5\n\t" + "vadd.i64 d5, d10\n\t" + /* Round 3 */ + "vld1.64 {d12}, [r3]!\n\t" + "vshl.u64 d8, d1, #50\n\t" + "vsri.u64 d8, d1, #14\n\t" + "vshl.u64 d9, d5, #36\n\t" + "vsri.u64 d9, d5, #28\n\t" + "vshl.u64 d10, d1, #46\n\t" + "vsri.u64 d10, d1, #18\n\t" + "vshl.u64 d11, d5, #30\n\t" + "vsri.u64 d11, d5, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d1, #23\n\t" + "vsri.u64 d10, d1, #41\n\t" + "vshl.u64 d11, d5, #25\n\t" + "vsri.u64 d11, d5, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d4, d8\n\t" + "vadd.i64 d12, d19\n\t" + "vmov d8, d1\n\t" + "veor d10, d6, d7\n\t" + "vadd.i64 d4, d12\n\t" + "vbsl d8, d2, d3\n\t" + "vbsl d10, d5, d7\n\t" + "vadd.i64 d4, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d0, d4\n\t" + "vadd.i64 d4, d10\n\t" + /* Round 4 */ + "vld1.64 {d12}, [r3]!\n\t" + "vshl.u64 d8, d0, #50\n\t" + "vsri.u64 d8, d0, #14\n\t" + "vshl.u64 d9, d4, #36\n\t" + "vsri.u64 d9, d4, #28\n\t" + "vshl.u64 d10, d0, #46\n\t" + "vsri.u64 d10, d0, #18\n\t" + "vshl.u64 d11, d4, #30\n\t" + "vsri.u64 d11, d4, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d0, #23\n\t" + "vsri.u64 d10, d0, #41\n\t" + "vshl.u64 d11, d4, #25\n\t" + "vsri.u64 d11, d4, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d3, d8\n\t" + "vadd.i64 d12, d20\n\t" + "vmov d8, d0\n\t" + "veor d10, d5, d6\n\t" + "vadd.i64 d3, d12\n\t" + "vbsl d8, d1, d2\n\t" + "vbsl d10, d4, d6\n\t" + "vadd.i64 d3, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d7, d3\n\t" + "vadd.i64 d3, d10\n\t" + /* Round 5 */ + "vld1.64 {d12}, [r3]!\n\t" + "vshl.u64 d8, d7, #50\n\t" + "vsri.u64 d8, d7, #14\n\t" + "vshl.u64 d9, d3, #36\n\t" + "vsri.u64 d9, d3, #28\n\t" + "vshl.u64 d10, d7, #46\n\t" + "vsri.u64 d10, d7, #18\n\t" + "vshl.u64 d11, d3, #30\n\t" + "vsri.u64 d11, d3, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d7, #23\n\t" + "vsri.u64 d10, d7, #41\n\t" + "vshl.u64 d11, d3, #25\n\t" + "vsri.u64 d11, d3, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d2, d8\n\t" + "vadd.i64 d12, d21\n\t" + "vmov d8, d7\n\t" + "veor d10, d4, d5\n\t" + "vadd.i64 d2, d12\n\t" + "vbsl d8, d0, d1\n\t" + "vbsl d10, d3, d5\n\t" + "vadd.i64 d2, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d6, d2\n\t" + "vadd.i64 d2, d10\n\t" + /* Round 6 */ + "vld1.64 {d12}, [r3]!\n\t" + "vshl.u64 d8, d6, #50\n\t" + "vsri.u64 d8, d6, #14\n\t" + "vshl.u64 d9, d2, #36\n\t" + "vsri.u64 d9, d2, #28\n\t" + "vshl.u64 d10, d6, #46\n\t" + "vsri.u64 d10, d6, #18\n\t" + "vshl.u64 d11, d2, #30\n\t" + "vsri.u64 d11, d2, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d6, #23\n\t" + "vsri.u64 d10, d6, #41\n\t" + "vshl.u64 d11, d2, #25\n\t" + "vsri.u64 d11, d2, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d1, d8\n\t" + "vadd.i64 d12, d22\n\t" + "vmov d8, d6\n\t" + "veor d10, d3, d4\n\t" + "vadd.i64 d1, d12\n\t" + "vbsl d8, d7, d0\n\t" + "vbsl d10, d2, d4\n\t" + "vadd.i64 d1, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d5, d1\n\t" + "vadd.i64 d1, d10\n\t" + /* Round 7 */ + "vld1.64 {d12}, [r3]!\n\t" + "vshl.u64 d8, d5, #50\n\t" + "vsri.u64 d8, d5, #14\n\t" + "vshl.u64 d9, d1, #36\n\t" + "vsri.u64 d9, d1, #28\n\t" + "vshl.u64 d10, d5, #46\n\t" + "vsri.u64 d10, d5, #18\n\t" + "vshl.u64 d11, d1, #30\n\t" + "vsri.u64 d11, d1, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d5, #23\n\t" + "vsri.u64 d10, d5, #41\n\t" + "vshl.u64 d11, d1, #25\n\t" + "vsri.u64 d11, d1, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d0, d8\n\t" + "vadd.i64 d12, d23\n\t" + "vmov d8, d5\n\t" + "veor d10, d2, d3\n\t" + "vadd.i64 d0, d12\n\t" + "vbsl d8, d6, d7\n\t" + "vbsl d10, d1, d3\n\t" + "vadd.i64 d0, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d4, d0\n\t" + "vadd.i64 d0, d10\n\t" + /* Round 8 */ + "vld1.64 {d12}, [r3]!\n\t" + "vshl.u64 d8, d4, #50\n\t" + "vsri.u64 d8, d4, #14\n\t" + "vshl.u64 d9, d0, #36\n\t" + "vsri.u64 d9, d0, #28\n\t" + "vshl.u64 d10, d4, #46\n\t" + "vsri.u64 d10, d4, #18\n\t" + "vshl.u64 d11, d0, #30\n\t" + "vsri.u64 d11, d0, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d4, #23\n\t" + "vsri.u64 d10, d4, #41\n\t" + "vshl.u64 d11, d0, #25\n\t" + "vsri.u64 d11, d0, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d7, d8\n\t" + "vadd.i64 d12, d24\n\t" + "vmov d8, d4\n\t" + "veor d10, d1, d2\n\t" + "vadd.i64 d7, d12\n\t" + "vbsl d8, d5, d6\n\t" + "vbsl d10, d0, d2\n\t" + "vadd.i64 d7, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d3, d7\n\t" + "vadd.i64 d7, d10\n\t" + /* Round 9 */ + "vld1.64 {d12}, [r3]!\n\t" + "vshl.u64 d8, d3, #50\n\t" + "vsri.u64 d8, d3, #14\n\t" + "vshl.u64 d9, d7, #36\n\t" + "vsri.u64 d9, d7, #28\n\t" + "vshl.u64 d10, d3, #46\n\t" + "vsri.u64 d10, d3, #18\n\t" + "vshl.u64 d11, d7, #30\n\t" + "vsri.u64 d11, d7, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d3, #23\n\t" + "vsri.u64 d10, d3, #41\n\t" + "vshl.u64 d11, d7, #25\n\t" + "vsri.u64 d11, d7, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d6, d8\n\t" + "vadd.i64 d12, d25\n\t" + "vmov d8, d3\n\t" + "veor d10, d0, d1\n\t" + "vadd.i64 d6, d12\n\t" + "vbsl d8, d4, d5\n\t" + "vbsl d10, d7, d1\n\t" + "vadd.i64 d6, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d2, d6\n\t" + "vadd.i64 d6, d10\n\t" + /* Round 10 */ + "vld1.64 {d12}, [r3]!\n\t" + "vshl.u64 d8, d2, #50\n\t" + "vsri.u64 d8, d2, #14\n\t" + "vshl.u64 d9, d6, #36\n\t" + "vsri.u64 d9, d6, #28\n\t" + "vshl.u64 d10, d2, #46\n\t" + "vsri.u64 d10, d2, #18\n\t" + "vshl.u64 d11, d6, #30\n\t" + "vsri.u64 d11, d6, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d2, #23\n\t" + "vsri.u64 d10, d2, #41\n\t" + "vshl.u64 d11, d6, #25\n\t" + "vsri.u64 d11, d6, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d5, d8\n\t" + "vadd.i64 d12, d26\n\t" + "vmov d8, d2\n\t" + "veor d10, d7, d0\n\t" + "vadd.i64 d5, d12\n\t" + "vbsl d8, d3, d4\n\t" + "vbsl d10, d6, d0\n\t" + "vadd.i64 d5, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d1, d5\n\t" + "vadd.i64 d5, d10\n\t" + /* Round 11 */ + "vld1.64 {d12}, [r3]!\n\t" + "vshl.u64 d8, d1, #50\n\t" + "vsri.u64 d8, d1, #14\n\t" + "vshl.u64 d9, d5, #36\n\t" + "vsri.u64 d9, d5, #28\n\t" + "vshl.u64 d10, d1, #46\n\t" + "vsri.u64 d10, d1, #18\n\t" + "vshl.u64 d11, d5, #30\n\t" + "vsri.u64 d11, d5, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d1, #23\n\t" + "vsri.u64 d10, d1, #41\n\t" + "vshl.u64 d11, d5, #25\n\t" + "vsri.u64 d11, d5, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d4, d8\n\t" + "vadd.i64 d12, d27\n\t" + "vmov d8, d1\n\t" + "veor d10, d6, d7\n\t" + "vadd.i64 d4, d12\n\t" + "vbsl d8, d2, d3\n\t" + "vbsl d10, d5, d7\n\t" + "vadd.i64 d4, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d0, d4\n\t" + "vadd.i64 d4, d10\n\t" + /* Round 12 */ + "vld1.64 {d12}, [r3]!\n\t" + "vshl.u64 d8, d0, #50\n\t" + "vsri.u64 d8, d0, #14\n\t" + "vshl.u64 d9, d4, #36\n\t" + "vsri.u64 d9, d4, #28\n\t" + "vshl.u64 d10, d0, #46\n\t" + "vsri.u64 d10, d0, #18\n\t" + "vshl.u64 d11, d4, #30\n\t" + "vsri.u64 d11, d4, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d0, #23\n\t" + "vsri.u64 d10, d0, #41\n\t" + "vshl.u64 d11, d4, #25\n\t" + "vsri.u64 d11, d4, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d3, d8\n\t" + "vadd.i64 d12, d28\n\t" + "vmov d8, d0\n\t" + "veor d10, d5, d6\n\t" + "vadd.i64 d3, d12\n\t" + "vbsl d8, d1, d2\n\t" + "vbsl d10, d4, d6\n\t" + "vadd.i64 d3, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d7, d3\n\t" + "vadd.i64 d3, d10\n\t" + /* Round 13 */ + "vld1.64 {d12}, [r3]!\n\t" + "vshl.u64 d8, d7, #50\n\t" + "vsri.u64 d8, d7, #14\n\t" + "vshl.u64 d9, d3, #36\n\t" + "vsri.u64 d9, d3, #28\n\t" + "vshl.u64 d10, d7, #46\n\t" + "vsri.u64 d10, d7, #18\n\t" + "vshl.u64 d11, d3, #30\n\t" + "vsri.u64 d11, d3, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d7, #23\n\t" + "vsri.u64 d10, d7, #41\n\t" + "vshl.u64 d11, d3, #25\n\t" + "vsri.u64 d11, d3, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d2, d8\n\t" + "vadd.i64 d12, d29\n\t" + "vmov d8, d7\n\t" + "veor d10, d4, d5\n\t" + "vadd.i64 d2, d12\n\t" + "vbsl d8, d0, d1\n\t" + "vbsl d10, d3, d5\n\t" + "vadd.i64 d2, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d6, d2\n\t" + "vadd.i64 d2, d10\n\t" + /* Round 14 */ + "vld1.64 {d12}, [r3]!\n\t" + "vshl.u64 d8, d6, #50\n\t" + "vsri.u64 d8, d6, #14\n\t" + "vshl.u64 d9, d2, #36\n\t" + "vsri.u64 d9, d2, #28\n\t" + "vshl.u64 d10, d6, #46\n\t" + "vsri.u64 d10, d6, #18\n\t" + "vshl.u64 d11, d2, #30\n\t" + "vsri.u64 d11, d2, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d6, #23\n\t" + "vsri.u64 d10, d6, #41\n\t" + "vshl.u64 d11, d2, #25\n\t" + "vsri.u64 d11, d2, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d1, d8\n\t" + "vadd.i64 d12, d30\n\t" + "vmov d8, d6\n\t" + "veor d10, d3, d4\n\t" + "vadd.i64 d1, d12\n\t" + "vbsl d8, d7, d0\n\t" + "vbsl d10, d2, d4\n\t" + "vadd.i64 d1, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d5, d1\n\t" + "vadd.i64 d1, d10\n\t" + /* Round 15 */ + "vld1.64 {d12}, [r3]!\n\t" + "vshl.u64 d8, d5, #50\n\t" + "vsri.u64 d8, d5, #14\n\t" + "vshl.u64 d9, d1, #36\n\t" + "vsri.u64 d9, d1, #28\n\t" + "vshl.u64 d10, d5, #46\n\t" + "vsri.u64 d10, d5, #18\n\t" + "vshl.u64 d11, d1, #30\n\t" + "vsri.u64 d11, d1, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d5, #23\n\t" + "vsri.u64 d10, d5, #41\n\t" + "vshl.u64 d11, d1, #25\n\t" + "vsri.u64 d11, d1, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d0, d8\n\t" + "vadd.i64 d12, d31\n\t" + "vmov d8, d5\n\t" + "veor d10, d2, d3\n\t" + "vadd.i64 d0, d12\n\t" + "vbsl d8, d6, d7\n\t" + "vbsl d10, d1, d3\n\t" + "vadd.i64 d0, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d4, d0\n\t" + "vadd.i64 d0, d10\n\t" + /* Add in digest from start */ + "vldm.64 %[sha512], {d8-d15}\n\t" + "vadd.i64 q0, q0, q4\n\t" + "vadd.i64 q1, q1, q5\n\t" + "vadd.i64 q2, q2, q6\n\t" + "vadd.i64 q3, q3, q7\n\t" + "vstm.64 %[sha512], {d0-d7}\n\t" + "subs %[len], %[len], #0x80\n\t" + "bne L_sha512_len_neon_begin_%=\n\t" + : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len) + : [L_SHA512_transform_len_k] "r" (L_SHA512_transform_len_k), [L_SHA512_transform_neon_len_k] "r" (L_SHA512_transform_neon_len_k) + : "memory", "r3", "r12", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" + ); +} + +#endif /* !WOLFSSL_ARMASM_NO_NEON */ +#endif /* WOLFSSL_ARMASM */ +#endif /* !__aarch64__ */ diff --git a/client/wolfssl/wolfcrypt/src/port/arm/armv8-aes.c b/client/wolfssl/wolfcrypt/src/port/arm/armv8-aes.c new file mode 100644 index 0000000..d0f8a9c --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/arm/armv8-aes.c @@ -0,0 +1,4653 @@ +/* armv8-aes.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +/* + * There are two versions one for 64 (Aarch64) and one for 32 bit (Aarch32). + * If changing one check the other. + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#if !defined(NO_AES) && defined(WOLFSSL_ARMASM) + +#include +#include +#include +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#ifdef _MSC_VER + /* 4127 warning constant while(1) */ + #pragma warning(disable: 4127) +#endif + + +static const byte rcon[] = { + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,0x1B, 0x36 + /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */ +}; + +/* get table value from hardware */ +#ifdef __aarch64__ + #define SBOX(x) \ + do { \ + __asm__ volatile ( \ + "DUP v1.4s, %w[in] \n" \ + "MOVI v0.16b, #0 \n" \ + "AESE v0.16b, v1.16b \n" \ + "UMOV %w[out], v0.s[0] \n" \ + : [out] "=r"((x)) \ + : [in] "r" ((x)) \ + : "cc", "memory", "v0", "v1"\ + ); \ + } while(0) + + #define IMIX(x) \ + do { \ + __asm__ volatile ( \ + "LD1 {v0.16b}, [%[in]] \n" \ + "AESIMC v0.16b, v0.16b \n" \ + "ST1 {v0.16b}, [%[out]]\n" \ + : [out] "=r" ((x)) \ + : [in] "0" ((x)) \ + : "cc", "memory", "v0" \ + ); \ + } while(0) +#else /* if not defined __aarch64__ then use 32 bit version */ + #define SBOX(x) \ + do { \ + __asm__ volatile ( \ + "VDUP.32 q1, %[in] \n" \ + "VMOV.i32 q0, #0 \n" \ + "AESE.8 q0, q1 \n" \ + "VMOV.32 %[out], d0[0] \n" \ + : [out] "=r"((x)) \ + : [in] "r" ((x)) \ + : "cc", "memory", "q0", "q1"\ + ); \ + } while(0) + + #define IMIX(x) \ + do { \ + __asm__ volatile ( \ + "VLD1.32 {q0}, [%[in]] \n" \ + "AESIMC.8 q0, q0 \n" \ + "VST1.32 {q0}, [%[out]] \n" \ + : [out] "=r" ((x)) \ + : [in] "0" ((x)) \ + : "cc", "memory", "q0" \ + ); \ + } while(0) +#endif /* aarch64 */ + + +#ifdef HAVE_AESGCM + +static WC_INLINE void IncrementGcmCounter(byte* inOutCtr) +{ + int i; + + /* in network byte order so start at end and work back */ + for (i = AES_BLOCK_SIZE - 1; i >= AES_BLOCK_SIZE - CTR_SZ; i--) { + if (++inOutCtr[i]) /* we're done unless we overflow */ + return; + } +} + + +static WC_INLINE void FlattenSzInBits(byte* buf, word32 sz) +{ + /* Multiply the sz by 8 */ + word32 szHi = (sz >> (8*sizeof(sz) - 3)); + sz <<= 3; + + /* copy over the words of the sz into the destination buffer */ + buf[0] = (szHi >> 24) & 0xff; + buf[1] = (szHi >> 16) & 0xff; + buf[2] = (szHi >> 8) & 0xff; + buf[3] = szHi & 0xff; + buf[4] = (sz >> 24) & 0xff; + buf[5] = (sz >> 16) & 0xff; + buf[6] = (sz >> 8) & 0xff; + buf[7] = sz & 0xff; +} + +#endif /* HAVE_AESGCM */ + +/* Similar to wolfSSL software implementation of expanding the AES key. + * Changed out the locations of where table look ups where made to + * use hardware instruction. Also altered decryption key to match. */ +int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) +{ + word32 temp; + word32 *rk; + unsigned int i = 0; + +#if defined(AES_MAX_KEY_SIZE) + const word32 max_key_len = (AES_MAX_KEY_SIZE / 8); +#endif + + if (!((keylen == 16) || (keylen == 24) || (keylen == 32)) || + aes == NULL || userKey == NULL) + return BAD_FUNC_ARG; + + rk = aes->key; +#if defined(AES_MAX_KEY_SIZE) + /* Check key length */ + if (keylen > max_key_len) { + return BAD_FUNC_ARG; + } +#endif + + #ifdef WOLFSSL_AES_COUNTER + aes->left = 0; + #endif /* WOLFSSL_AES_COUNTER */ + + aes->rounds = keylen/4 + 6; + XMEMCPY(rk, userKey, keylen); + + switch(keylen) + { +#if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 128 && \ + defined(WOLFSSL_AES_128) + case 16: + while (1) + { + temp = rk[3]; + SBOX(temp); + temp = rotrFixed(temp, 8); + rk[4] = rk[0] ^ temp ^ rcon[i]; + rk[5] = rk[4] ^ rk[1]; + rk[6] = rk[5] ^ rk[2]; + rk[7] = rk[6] ^ rk[3]; + if (++i == 10) + break; + rk += 4; + } + break; +#endif /* 128 */ + +#if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 192 && \ + defined(WOLFSSL_AES_192) + case 24: + /* for (;;) here triggers a bug in VC60 SP4 w/ Pro Pack */ + while (1) + { + temp = rk[5]; + SBOX(temp); + temp = rotrFixed(temp, 8); + rk[ 6] = rk[ 0] ^ temp ^ rcon[i]; + rk[ 7] = rk[ 1] ^ rk[ 6]; + rk[ 8] = rk[ 2] ^ rk[ 7]; + rk[ 9] = rk[ 3] ^ rk[ 8]; + if (++i == 8) + break; + rk[10] = rk[ 4] ^ rk[ 9]; + rk[11] = rk[ 5] ^ rk[10]; + rk += 6; + } + break; +#endif /* 192 */ + +#if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 256 && \ + defined(WOLFSSL_AES_256) + case 32: + while (1) + { + temp = rk[7]; + SBOX(temp); + temp = rotrFixed(temp, 8); + rk[8] = rk[0] ^ temp ^ rcon[i]; + rk[ 9] = rk[ 1] ^ rk[ 8]; + rk[10] = rk[ 2] ^ rk[ 9]; + rk[11] = rk[ 3] ^ rk[10]; + if (++i == 7) + break; + temp = rk[11]; + SBOX(temp); + rk[12] = rk[ 4] ^ temp; + rk[13] = rk[ 5] ^ rk[12]; + rk[14] = rk[ 6] ^ rk[13]; + rk[15] = rk[ 7] ^ rk[14]; + + rk += 8; + } + break; +#endif /* 256 */ + + default: + return BAD_FUNC_ARG; + } + + if (dir == AES_DECRYPTION) + { +#ifdef HAVE_AES_DECRYPT + unsigned int j; + rk = aes->key; + + /* invert the order of the round keys: */ + for (i = 0, j = 4* aes->rounds; i < j; i += 4, j -= 4) { + temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp; + temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp; + temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp; + temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp; + } + /* apply the inverse MixColumn transform to all round keys but the + first and the last: */ + for (i = 1; i < aes->rounds; i++) { + rk += 4; + IMIX(rk); + } +#else + WOLFSSL_MSG("AES Decryption not compiled in"); + return BAD_FUNC_ARG; +#endif /* HAVE_AES_DECRYPT */ + } + + return wc_AesSetIV(aes, iv); +} + +#if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) + int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) + { + return wc_AesSetKey(aes, userKey, keylen, iv, dir); + } +#endif + +/* wc_AesSetIV is shared between software and hardware */ +int wc_AesSetIV(Aes* aes, const byte* iv) +{ + if (aes == NULL) + return BAD_FUNC_ARG; + + if (iv) + XMEMCPY(aes->reg, iv, AES_BLOCK_SIZE); + else + XMEMSET(aes->reg, 0, AES_BLOCK_SIZE); + + return 0; +} + + +#ifdef __aarch64__ +/* AES CCM/GCM use encrypt direct but not decrypt */ +#if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ + defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) + static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + word32* keyPt = aes->key; + + /* + AESE exor's input with round key + shift rows of exor'ed result + sub bytes for shifted rows + */ + + __asm__ __volatile__ ( + "LD1 {v0.16b}, [%[CtrIn]] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + + "LD1 {v1.2d-v2.2d}, [%[Key]], #32 \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v2.16b \n" + + "#subtract rounds done so far and see if should continue\n" + "MOV w12, %w[R] \n" + "SUB w12, w12, #10 \n" + "CBZ w12, 1f \n" + "LD1 {v1.2d-v2.2d}, [%[Key]], #32 \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v2.16b \n" + + "SUB w12, w12, #2 \n" + "CBZ w12, 1f \n" + "LD1 {v1.2d-v2.2d}, [%[Key]], #32 \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v2.16b \n" + + "#Final AddRoundKey then store result \n" + "1: \n" + "LD1 {v1.2d}, [%[Key]], #16 \n" + "EOR v0.16b, v0.16b, v1.16b \n" + "ST1 {v0.16b}, [%[CtrOut]] \n" + + :[CtrOut] "=r" (outBlock), "=r" (keyPt), "=r" (aes->rounds), + "=r" (inBlock) + :"0" (outBlock), [Key] "1" (keyPt), [R] "2" (aes->rounds), + [CtrIn] "3" (inBlock) + : "cc", "memory", "w12", "v0", "v1", "v2", "v3", "v4" + ); + + return 0; + } +#endif /* AES_GCM, AES_CCM, DIRECT or COUNTER */ +#if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) + #ifdef HAVE_AES_DECRYPT + static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + word32* keyPt = aes->key; + + /* + AESE exor's input with round key + shift rows of exor'ed result + sub bytes for shifted rows + */ + + __asm__ __volatile__ ( + "LD1 {v0.16b}, [%[CtrIn]] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + + "AESD v0.16b, v1.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v2.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v3.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v4.16b \n" + "AESIMC v0.16b, v0.16b \n" + + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + "AESD v0.16b, v1.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v2.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v3.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v4.16b \n" + "AESIMC v0.16b, v0.16b \n" + + "LD1 {v1.2d-v2.2d}, [%[Key]], #32 \n" + "AESD v0.16b, v1.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v2.16b \n" + + "#subtract rounds done so far and see if should continue\n" + "MOV w12, %w[R] \n" + "SUB w12, w12, #10 \n" + "CBZ w12, 1f \n" + "LD1 {v1.2d-v2.2d}, [%[Key]], #32 \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v1.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v2.16b \n" + + "SUB w12, w12, #2 \n" + "CBZ w12, 1f \n" + "LD1 {v1.2d-v2.2d}, [%[Key]], #32 \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v1.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v2.16b \n" + + "#Final AddRoundKey then store result \n" + "1: \n" + "LD1 {v1.2d}, [%[Key]], #16 \n" + "EOR v0.16b, v0.16b, v1.16b \n" + "ST1 {v0.4s}, [%[CtrOut]] \n" + + :[CtrOut] "=r" (outBlock), "=r" (keyPt), "=r" (aes->rounds), + "=r" (inBlock) + :[Key] "1" (aes->key), "0" (outBlock), [R] "2" (aes->rounds), + [CtrIn] "3" (inBlock) + : "cc", "memory", "w12", "v0", "v1", "v2", "v3", "v4" + ); + + return 0; +} + #endif /* HAVE_AES_DECRYPT */ +#endif /* DIRECT or COUNTER */ + +/* AES-CBC */ +#ifdef HAVE_AES_CBC + int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + word32 numBlocks = sz / AES_BLOCK_SIZE; + + if (aes == NULL || out == NULL || (in == NULL && sz > 0)) { + return BAD_FUNC_ARG; + } + + /* do as many block size ops as possible */ + if (numBlocks > 0) { + word32* key = aes->key; + word32* reg = aes->reg; + /* + AESE exor's input with round key + shift rows of exor'ed result + sub bytes for shifted rows + + note: grouping AESE & AESMC together as pairs reduces latency + */ + switch(aes->rounds) { +#ifdef WOLFSSL_AES_128 + case 10: /* AES 128 BLOCK */ + __asm__ __volatile__ ( + "MOV w11, %w[blocks] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "LD1 {v9.2d-v11.2d},[%[Key]], #48 \n" + "LD1 {v0.2d}, [%[reg]] \n" + + "LD1 {v12.2d}, [%[input]], #16 \n" + "1:\n" + "#CBC operations, xorbuf in with current aes->reg \n" + "EOR v0.16b, v0.16b, v12.16b \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v10.16b \n" + "SUB w11, w11, #1 \n" + "EOR v0.16b, v0.16b, v11.16b \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + + "CBZ w11, 2f \n" + "LD1 {v12.2d}, [%[input]], #16 \n" + "B 1b \n" + + "2:\n" + "#store current counter value at the end \n" + "ST1 {v0.2d}, [%[regOut]] \n" + + :[out] "=r" (out), [regOut] "=r" (reg), "=r" (in) + :"0" (out), [Key] "r" (key), [input] "2" (in), + [blocks] "r" (numBlocks), [reg] "1" (reg) + : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", + "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13" + ); + break; +#endif /* WOLFSSL_AES_128 */ +#ifdef WOLFSSL_AES_192 + case 12: /* AES 192 BLOCK */ + __asm__ __volatile__ ( + "MOV w11, %w[blocks] \n" + "LD1 {v1.2d-v4.2d}, %[Key], #64 \n" + "LD1 {v5.2d-v8.2d}, %[Key], #64 \n" + "LD1 {v9.2d-v12.2d},%[Key], #64 \n" + "LD1 {v13.2d}, %[Key], #16 \n" + "LD1 {v0.2d}, %[reg] \n" + + "LD1 {v14.2d}, [%[input]], #16 \n" + "1:\n" + "#CBC operations, xorbuf in with current aes->reg \n" + "EOR v0.16b, v0.16b, v14.16b \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v12.16b \n" + "EOR v0.16b, v0.16b, v13.16b \n" + "SUB w11, w11, #1 \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + + "CBZ w11, 2f \n" + "LD1 {v14.2d}, [%[input]], #16\n" + "B 1b \n" + + "2:\n" + "#store current counter value at the end \n" + "ST1 {v0.2d}, %[regOut] \n" + + + :[out] "=r" (out), [regOut] "=m" (aes->reg), "=r" (in) + :"0" (out), [Key] "m" (aes->key), [input] "2" (in), + [blocks] "r" (numBlocks), [reg] "m" (aes->reg) + : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", + "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14" + ); + break; +#endif /* WOLFSSL_AES_192*/ +#ifdef WOLFSSL_AES_256 + case 14: /* AES 256 BLOCK */ + __asm__ __volatile__ ( + "MOV w11, %w[blocks] \n" + "LD1 {v1.2d-v4.2d}, %[Key], #64 \n" + + "LD1 {v5.2d-v8.2d}, %[Key], #64 \n" + "LD1 {v9.2d-v12.2d}, %[Key], #64 \n" + "LD1 {v13.2d-v15.2d}, %[Key], #48 \n" + "LD1 {v0.2d}, %[reg] \n" + + "LD1 {v16.2d}, [%[input]], #16 \n" + "1: \n" + "#CBC operations, xorbuf in with current aes->reg \n" + "EOR v0.16b, v0.16b, v16.16b \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v12.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v13.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "SUB w11, w11, #1 \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + + "CBZ w11, 2f \n" + "LD1 {v16.2d}, [%[input]], #16 \n" + "B 1b \n" + + "2: \n" + "#store current counter value at the end \n" + "ST1 {v0.2d}, %[regOut] \n" + + + :[out] "=r" (out), [regOut] "=m" (aes->reg), "=r" (in) + :"0" (out), [Key] "m" (aes->key), [input] "2" (in), + [blocks] "r" (numBlocks), [reg] "m" (aes->reg) + : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", + "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14","v15", + "v16" + ); + break; +#endif /* WOLFSSL_AES_256 */ + default: + WOLFSSL_MSG("Bad AES-CBC round value"); + return BAD_FUNC_ARG; + } + } + + return 0; + } + + #ifdef HAVE_AES_DECRYPT + int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + word32 numBlocks = sz / AES_BLOCK_SIZE; + + if (aes == NULL || out == NULL || (in == NULL && sz > 0) + || sz % AES_BLOCK_SIZE != 0) { + return BAD_FUNC_ARG; + } + + /* do as many block size ops as possible */ + if (numBlocks > 0) { + word32* key = aes->key; + word32* reg = aes->reg; + + switch(aes->rounds) { +#ifdef WOLFSSL_AES_128 + case 10: /* AES 128 BLOCK */ + __asm__ __volatile__ ( + "MOV w11, %w[blocks] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "LD1 {v9.2d-v11.2d},[%[Key]], #48 \n" + "LD1 {v13.2d}, [%[reg]] \n" + + "1:\n" + "LD1 {v0.2d}, [%[input]], #16 \n" + "MOV v12.16b, v0.16b \n" + "AESD v0.16b, v1.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v2.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v3.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v4.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v5.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v6.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v7.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v8.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v9.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v10.16b \n" + "EOR v0.16b, v0.16b, v11.16b \n" + + "EOR v0.16b, v0.16b, v13.16b \n" + "SUB w11, w11, #1 \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + "MOV v13.16b, v12.16b \n" + + "CBZ w11, 2f \n" + "B 1b \n" + + "2: \n" + "#store current counter value at the end \n" + "ST1 {v13.2d}, [%[regOut]] \n" + + :[out] "=r" (out), [regOut] "=r" (reg), "=r" (in) + :"0" (out), [Key] "r" (key), [input] "2" (in), + [blocks] "r" (numBlocks), [reg] "1" (reg) + : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", + "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13" + ); + break; +#endif /* WOLFSSL_AES_128 */ +#ifdef WOLFSSL_AES_192 + case 12: /* AES 192 BLOCK */ + __asm__ __volatile__ ( + "MOV w11, %w[blocks] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "LD1 {v9.2d-v12.2d},[%[Key]], #64 \n" + "LD1 {v13.16b}, [%[Key]], #16 \n" + "LD1 {v15.2d}, [%[reg]] \n" + + "LD1 {v0.2d}, [%[input]], #16 \n" + "1: \n" + "MOV v14.16b, v0.16b \n" + "AESD v0.16b, v1.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v2.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v3.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v4.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v5.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v6.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v7.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v8.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v9.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v10.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v11.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v12.16b \n" + "EOR v0.16b, v0.16b, v13.16b \n" + + "EOR v0.16b, v0.16b, v15.16b \n" + "SUB w11, w11, #1 \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + "MOV v15.16b, v14.16b \n" + + "CBZ w11, 2f \n" + "LD1 {v0.2d}, [%[input]], #16 \n" + "B 1b \n" + + "2:\n" + "#store current counter value at the end \n" + "ST1 {v15.2d}, [%[regOut]] \n" + + :[out] "=r" (out), [regOut] "=r" (reg), "=r" (in) + :"0" (out), [Key] "r" (key), [input] "2" (in), + [blocks] "r" (numBlocks), [reg] "1" (reg) + : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", + "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15" + ); + break; +#endif /* WOLFSSL_AES_192 */ +#ifdef WOLFSSL_AES_256 + case 14: /* AES 256 BLOCK */ + __asm__ __volatile__ ( + "MOV w11, %w[blocks] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "LD1 {v9.2d-v12.2d}, [%[Key]], #64 \n" + "LD1 {v13.2d-v15.2d}, [%[Key]], #48 \n" + "LD1 {v17.2d}, [%[reg]] \n" + + "LD1 {v0.2d}, [%[input]], #16 \n" + "1: \n" + "MOV v16.16b, v0.16b \n" + "AESD v0.16b, v1.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v2.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v3.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v4.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v5.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v6.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v7.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v8.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v9.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v10.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v11.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v12.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v13.16b \n" + "AESIMC v0.16b, v0.16b \n" + "AESD v0.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + + "EOR v0.16b, v0.16b, v17.16b \n" + "SUB w11, w11, #1 \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + "MOV v17.16b, v16.16b \n" + + "CBZ w11, 2f \n" + "LD1 {v0.2d}, [%[input]], #16 \n" + "B 1b \n" + + "2:\n" + "#store current counter value at the end \n" + "ST1 {v17.2d}, [%[regOut]] \n" + + :[out] "=r" (out), [regOut] "=r" (reg), "=r" (in) + :"0" (out), [Key] "r" (key), [input] "2" (in), + [blocks] "r" (numBlocks), [reg] "1" (reg) + : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", + "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14","v15", + "v16", "v17" + ); + break; +#endif /* WOLFSSL_AES_256 */ + default: + WOLFSSL_MSG("Bad AES-CBC round value"); + return BAD_FUNC_ARG; + } + } + + return 0; + } + #endif + +#endif /* HAVE_AES_CBC */ + +/* AES-CTR */ +#ifdef WOLFSSL_AES_COUNTER + + /* Increment AES counter */ + static WC_INLINE void IncrementAesCounter(byte* inOutCtr) + { + int i; + + /* in network byte order so start at end and work back */ + for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) { + if (++inOutCtr[i]) /* we're done unless we overflow */ + return; + } + } + + int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + byte* tmp; + word32 numBlocks; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left; + + /* consume any unused bytes left in aes->tmp */ + while (aes->left && sz) { + *(out++) = *(in++) ^ *(tmp++); + aes->left--; + sz--; + } + + /* do as many block size ops as possible */ + numBlocks = sz/AES_BLOCK_SIZE; + if (numBlocks > 0) { + /* pointer needed because it is incremented when read, causing + * an issue with call to encrypt/decrypt leftovers */ + byte* keyPt = (byte*)aes->key; + sz -= numBlocks * AES_BLOCK_SIZE; + switch(aes->rounds) { +#ifdef WOLFSSL_AES_128 + case 10: /* AES 128 BLOCK */ + __asm__ __volatile__ ( + "MOV w11, %w[blocks] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + + "#Create vector with the value 1 \n" + "MOVI v15.16b, #1 \n" + "USHR v15.2d, v15.2d, #56 \n" + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "EOR v14.16b, v14.16b, v14.16b \n" + "EXT v14.16b, v15.16b, v14.16b, #8\n" + + "LD1 {v9.2d-v11.2d}, [%[Key]], #48\n" + "LD1 {v13.2d}, %[reg] \n" + + /* double block */ + "1: \n" + "CMP w11, #1 \n" + "BEQ 2f \n" + "CMP w11, #0 \n" + "BEQ 3f \n" + + "MOV v0.16b, v13.16b \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v13.16b, v13.16b \n" /* network order */ + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "SUB w11, w11, #2 \n" + "ADD v15.2d, v13.2d, v14.2d \n" /* add 1 to counter */ + "ADD v13.2d, v15.2d, v14.2d \n" /* add 1 to counter */ + + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v15.16b, v15.16b, v15.16b, #8 \n" + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v15.16b, v15.16b \n" /* revert from network order */ + "REV64 v13.16b, v13.16b \n" /* revert from network order */ + + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v15.16b, v1.16b \n" + "AESMC v15.16b, v15.16b \n" + + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v15.16b, v2.16b \n" + "AESMC v15.16b, v15.16b \n" + + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v15.16b, v3.16b \n" + "AESMC v15.16b, v15.16b \n" + + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v15.16b, v4.16b \n" + "AESMC v15.16b, v15.16b \n" + + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v15.16b, v5.16b \n" + "AESMC v15.16b, v15.16b \n" + + "AESE v0.16b, v10.16b \n" + "AESE v15.16b, v6.16b \n" + "AESMC v15.16b, v15.16b \n" + + "EOR v0.16b, v0.16b, v11.16b \n" + "AESE v15.16b, v7.16b \n" + "AESMC v15.16b, v15.16b \n" + + "LD1 {v12.2d}, [%[input]], #16 \n" + "AESE v15.16b, v8.16b \n" + "AESMC v15.16b, v15.16b \n" + + "EOR v0.16b, v0.16b, v12.16b \n" + "AESE v15.16b, v9.16b \n" + "AESMC v15.16b, v15.16b \n" + + "LD1 {v12.2d}, [%[input]], #16 \n" + "AESE v15.16b, v10.16b \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + "EOR v15.16b, v15.16b, v11.16b \n" + "EOR v15.16b, v15.16b, v12.16b \n" + "ST1 {v15.2d}, [%[out]], #16 \n" + + "B 1b \n" + + /* single block */ + "2: \n" + "MOV v0.16b, v13.16b \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v13.16b, v13.16b \n" /* network order */ + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "ADD v13.2d, v13.2d, v14.2d \n" /* add 1 to counter */ + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "SUB w11, w11, #1 \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v13.16b, v13.16b \n" /* revert from network order */ + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v10.16b \n" + "EOR v0.16b, v0.16b, v11.16b \n" + "#CTR operations, increment counter and xorbuf \n" + "LD1 {v12.2d}, [%[input]], #16 \n" + "EOR v0.16b, v0.16b, v12.16b \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + + "3: \n" + "#store current counter value at the end \n" + "ST1 {v13.2d}, %[regOut] \n" + + :[out] "=r" (out), "=r" (keyPt), [regOut] "=m" (aes->reg), + "=r" (in) + :"0" (out), [Key] "1" (keyPt), [input] "3" (in), + [blocks] "r" (numBlocks), [reg] "m" (aes->reg) + : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", + "v6", "v7", "v8", "v9", "v10","v11","v12","v13","v14","v15" + ); + break; +#endif /* WOLFSSL_AES_128 */ +#ifdef WOLFSSL_AES_192 + case 12: /* AES 192 BLOCK */ + __asm__ __volatile__ ( + "MOV w11, %w[blocks] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + + "#Create vector with the value 1 \n" + "MOVI v16.16b, #1 \n" + "USHR v16.2d, v16.2d, #56 \n" + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "EOR v14.16b, v14.16b, v14.16b \n" + "EXT v16.16b, v16.16b, v14.16b, #8\n" + + "LD1 {v9.2d-v12.2d}, [%[Key]], #64\n" + "LD1 {v15.2d}, %[reg] \n" + "LD1 {v13.16b}, [%[Key]], #16 \n" + + /* double block */ + "1: \n" + "CMP w11, #1 \n" + "BEQ 2f \n" + "CMP w11, #0 \n" + "BEQ 3f \n" + + "MOV v0.16b, v15.16b \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v15.16b, v15.16b \n" /* network order */ + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v15.16b, v15.16b, v15.16b, #8 \n" + "SUB w11, w11, #2 \n" + "ADD v17.2d, v15.2d, v16.2d \n" /* add 1 to counter */ + "ADD v15.2d, v17.2d, v16.2d \n" /* add 1 to counter */ + + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v17.16b, v17.16b, v17.16b, #8 \n" + "EXT v15.16b, v15.16b, v15.16b, #8 \n" + + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v17.16b, v17.16b \n" /* revert from network order */ + "REV64 v15.16b, v15.16b \n" /* revert from network order */ + + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v17.16b, v1.16b \n" + "AESMC v17.16b, v17.16b \n" + + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v17.16b, v2.16b \n" + "AESMC v17.16b, v17.16b \n" + + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v17.16b, v3.16b \n" + "AESMC v17.16b, v17.16b \n" + + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v17.16b, v4.16b \n" + "AESMC v17.16b, v17.16b \n" + + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v17.16b, v5.16b \n" + "AESMC v17.16b, v17.16b \n" + + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v17.16b, v6.16b \n" + "AESMC v17.16b, v17.16b \n" + + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v17.16b, v7.16b \n" + "AESMC v17.16b, v17.16b \n" + + "AESE v0.16b, v12.16b \n" + "AESE v17.16b, v8.16b \n" + "AESMC v17.16b, v17.16b \n" + + "EOR v0.16b, v0.16b, v13.16b \n" + "AESE v17.16b, v9.16b \n" + "AESMC v17.16b, v17.16b \n" + + "LD1 {v14.2d}, [%[input]], #16 \n" + "AESE v17.16b, v10.16b \n" + "AESMC v17.16b, v17.16b \n" + + "EOR v0.16b, v0.16b, v14.16b \n" + "AESE v17.16b, v11.16b \n" + "AESMC v17.16b, v17.16b \n" + + "LD1 {v14.2d}, [%[input]], #16 \n" + "AESE v17.16b, v12.16b \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + "EOR v17.16b, v17.16b, v13.16b \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "ST1 {v17.2d}, [%[out]], #16 \n" + + "B 1b \n" + + "2: \n" + "LD1 {v14.2d}, [%[input]], #16 \n" + "MOV v0.16b, v15.16b \n" + + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v15.16b, v15.16b \n" /* network order */ + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v15.16b, v15.16b, v15.16b, #8 \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "ADD v15.2d, v15.2d, v16.2d \n" /* add 1 to counter */ + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "SUB w11, w11, #1 \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v15.16b, v15.16b, v15.16b, #8 \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v15.16b, v15.16b \n" /* revert from network order */ + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v12.16b \n" + "EOR v0.16b, v0.16b, v13.16b \n" + "#CTR operations, increment counter and xorbuf \n" + "EOR v0.16b, v0.16b, v14.16b \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + + "3: \n" + "#store current counter value at the end \n" + "ST1 {v15.2d}, %[regOut] \n" + + :[out] "=r" (out), "=r" (keyPt), [regOut] "=m" (aes->reg), + "=r" (in) + :"0" (out), [Key] "1" (keyPt), [input] "3" (in), + [blocks] "r" (numBlocks), [reg] "m" (aes->reg) + : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", + "v6", "v7", "v8", "v9", "v10","v11","v12","v13","v14","v15", + "v16", "v17" + ); + break; +#endif /* WOLFSSL_AES_192 */ +#ifdef WOLFSSL_AES_256 + case 14: /* AES 256 BLOCK */ + __asm__ __volatile__ ( + "MOV w11, %w[blocks] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + + "#Create vector with the value 1 \n" + "MOVI v18.16b, #1 \n" + "USHR v18.2d, v18.2d, #56 \n" + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "EOR v19.16b, v19.16b, v19.16b \n" + "EXT v18.16b, v18.16b, v19.16b, #8\n" + + "LD1 {v9.2d-v12.2d}, [%[Key]], #64 \n" + "LD1 {v13.2d-v15.2d}, [%[Key]], #48 \n" + "LD1 {v17.2d}, %[reg] \n" + + /* double block */ + "1: \n" + "CMP w11, #1 \n" + "BEQ 2f \n" + "CMP w11, #0 \n" + "BEQ 3f \n" + + "MOV v0.16b, v17.16b \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v17.16b, v17.16b \n" /* network order */ + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v17.16b, v17.16b, v17.16b, #8 \n" + "SUB w11, w11, #2 \n" + "ADD v19.2d, v17.2d, v18.2d \n" /* add 1 to counter */ + "ADD v17.2d, v19.2d, v18.2d \n" /* add 1 to counter */ + + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v19.16b, v19.16b, v19.16b, #8 \n" + "EXT v17.16b, v17.16b, v17.16b, #8 \n" + + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v19.16b, v19.16b \n" /* revert from network order */ + "REV64 v17.16b, v17.16b \n" /* revert from network order */ + + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v19.16b, v1.16b \n" + "AESMC v19.16b, v19.16b \n" + + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v19.16b, v2.16b \n" + "AESMC v19.16b, v19.16b \n" + + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v19.16b, v3.16b \n" + "AESMC v19.16b, v19.16b \n" + + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v19.16b, v4.16b \n" + "AESMC v19.16b, v19.16b \n" + + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v19.16b, v5.16b \n" + "AESMC v19.16b, v19.16b \n" + + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v19.16b, v6.16b \n" + "AESMC v19.16b, v19.16b \n" + + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v19.16b, v7.16b \n" + "AESMC v19.16b, v19.16b \n" + + "AESE v0.16b, v12.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v19.16b, v8.16b \n" + "AESMC v19.16b, v19.16b \n" + + "AESE v0.16b, v13.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v19.16b, v9.16b \n" + "AESMC v19.16b, v19.16b \n" + + "AESE v0.16b, v14.16b \n" + "AESE v19.16b, v10.16b \n" + "AESMC v19.16b, v19.16b \n" + + "EOR v0.16b, v0.16b, v15.16b \n" + "AESE v19.16b, v11.16b \n" + "AESMC v19.16b, v19.16b \n" + + "LD1 {v16.2d}, [%[input]], #16 \n" + "AESE v19.16b, v12.16b \n" + "AESMC v19.16b, v19.16b \n" + + "EOR v0.16b, v0.16b, v16.16b \n" + "AESE v19.16b, v13.16b \n" + "AESMC v19.16b, v19.16b \n" + + "LD1 {v16.2d}, [%[input]], #16 \n" + "AESE v19.16b, v14.16b \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + "EOR v19.16b, v19.16b, v15.16b \n" + "EOR v19.16b, v19.16b, v16.16b \n" + "ST1 {v19.2d}, [%[out]], #16 \n" + + "B 1b \n" + + "2: \n" + "LD1 {v16.2d}, [%[input]], #16 \n" + "MOV v0.16b, v17.16b \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v17.16b, v17.16b \n" /* network order */ + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v17.16b, v17.16b, v17.16b, #8 \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "ADD v17.2d, v17.2d, v18.2d \n" /* add 1 to counter */ + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v17.16b, v17.16b, v17.16b, #8 \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v17.16b, v17.16b \n" /* revert from network order */ + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v12.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v13.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "#CTR operations, increment counter and xorbuf \n" + "EOR v0.16b, v0.16b, v16.16b \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + + "3: \n" + "#store current counter value at the end \n" + "ST1 {v17.2d}, %[regOut] \n" + + + :[out] "=r" (out), "=r" (keyPt), [regOut] "=m" (aes->reg), + "=r" (in) + :"0" (out), [Key] "1" (keyPt), [input] "3" (in), + [blocks] "r" (numBlocks), [reg] "m" (aes->reg) + : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", + "v6", "v7", "v8", "v9", "v10","v11","v12","v13","v14","v15", + "v16", "v17", "v18", "v19" + ); + break; +#endif /* WOLFSSL_AES_256 */ + default: + WOLFSSL_MSG("Bad AES-CTR round value"); + return BAD_FUNC_ARG; + } + + aes->left = 0; + } + + /* handle non block size remaining */ + if (sz) { + wc_AesEncrypt(aes, (byte*)aes->reg, (byte*)aes->tmp); + IncrementAesCounter((byte*)aes->reg); + + aes->left = AES_BLOCK_SIZE; + tmp = (byte*)aes->tmp; + + while (sz--) { + *(out++) = *(in++) ^ *(tmp++); + aes->left--; + } + } + return 0; + } + +#endif /* WOLFSSL_AES_COUNTER */ + +#ifdef HAVE_AESGCM + +/* + * Based from GCM implementation in wolfcrypt/src/aes.c + */ + +/* PMULL and RBIT only with AArch64 */ +/* Use ARM hardware for polynomial multiply */ +static void GMULT(byte* X, byte* Y) +{ + __asm__ volatile ( + "LD1 {v0.16b}, [%[inX]] \n" + "LD1 {v1.16b}, [%[inY]] \n" /* v1 already reflected from set key */ + "RBIT v0.16b, v0.16b \n" + + + /* Algorithm 1 from Intel GCM white paper. + "Carry-Less Multiplication and Its Usage for Computing the GCM Mode" + */ + "PMULL v3.1q, v0.1d, v1.1d \n" /* a0 * b0 = C */ + "PMULL2 v4.1q, v0.2d, v1.2d \n" /* a1 * b1 = D */ + "EXT v5.16b, v1.16b, v1.16b, #8 \n" /* b0b1 -> b1b0 */ + "PMULL v6.1q, v0.1d, v5.1d \n" /* a0 * b1 = E */ + "PMULL2 v5.1q, v0.2d, v5.2d \n" /* a1 * b0 = F */ + + "#Set a register to all 0s using EOR \n" + "EOR v7.16b, v7.16b, v7.16b \n" + "EOR v5.16b, v5.16b, v6.16b \n" /* F ^ E */ + "EXT v6.16b, v7.16b, v5.16b, #8 \n" /* get (F^E)[0] */ + "EOR v3.16b, v3.16b, v6.16b \n" /* low 128 bits in v3 */ + "EXT v6.16b, v5.16b, v7.16b, #8 \n" /* get (F^E)[1] */ + "EOR v4.16b, v4.16b, v6.16b \n" /* high 128 bits in v4 */ + + + /* Based from White Paper "Implementing GCM on ARMv8" + by Conrado P.L. Gouvea and Julio Lopez + reduction on 256bit value using Algorithm 5 */ + "MOVI v8.16b, #0x87 \n" + "USHR v8.2d, v8.2d, #56 \n" + /* v8 is now 0x00000000000000870000000000000087 reflected 0xe1....*/ + "PMULL2 v5.1q, v4.2d, v8.2d \n" + "EXT v6.16b, v5.16b, v7.16b, #8 \n" /* v7 is all 0's */ + "EOR v4.16b, v4.16b, v6.16b \n" + "EXT v6.16b, v7.16b, v5.16b, #8 \n" + "EOR v3.16b, v3.16b, v6.16b \n" + "PMULL v5.1q, v4.1d, v8.1d \n" + "EOR v4.16b, v3.16b, v5.16b \n" + + "RBIT v4.16b, v4.16b \n" + "STR q4, [%[out]] \n" + : [out] "=r" (X), "=r" (Y) + : [inX] "0" (X), [inY] "1" (Y) + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8" + ); +} + + +void GHASH(Aes* aes, const byte* a, word32 aSz, + const byte* c, word32 cSz, byte* s, word32 sSz) +{ + byte x[AES_BLOCK_SIZE]; + byte scratch[AES_BLOCK_SIZE]; + word32 blocks, partial; + byte* h = aes->H; + + XMEMSET(x, 0, AES_BLOCK_SIZE); + + /* Hash in A, the Additional Authentication Data */ + if (aSz != 0 && a != NULL) { + blocks = aSz / AES_BLOCK_SIZE; + partial = aSz % AES_BLOCK_SIZE; + /* do as many blocks as possible */ + while (blocks--) { + xorbuf(x, a, AES_BLOCK_SIZE); + GMULT(x, h); + a += AES_BLOCK_SIZE; + } + if (partial != 0) { + XMEMSET(scratch, 0, AES_BLOCK_SIZE); + XMEMCPY(scratch, a, partial); + xorbuf(x, scratch, AES_BLOCK_SIZE); + GMULT(x, h); + } + } + + /* Hash in C, the Ciphertext */ + if (cSz != 0 && c != NULL) { + blocks = cSz / AES_BLOCK_SIZE; + partial = cSz % AES_BLOCK_SIZE; + while (blocks--) { + xorbuf(x, c, AES_BLOCK_SIZE); + GMULT(x, h); + c += AES_BLOCK_SIZE; + } + if (partial != 0) { + XMEMSET(scratch, 0, AES_BLOCK_SIZE); + XMEMCPY(scratch, c, partial); + xorbuf(x, scratch, AES_BLOCK_SIZE); + GMULT(x, h); + } + } + + /* Hash in the lengths of A and C in bits */ + FlattenSzInBits(&scratch[0], aSz); + FlattenSzInBits(&scratch[8], cSz); + xorbuf(x, scratch, AES_BLOCK_SIZE); + + /* Copy the result (minus last GMULT) into s. */ + XMEMCPY(s, x, sSz); +} + + +#ifdef WOLFSSL_AES_128 +/* internal function : see wc_AesGcmEncrypt */ +static int Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + word32 blocks; + word32 partial; + byte counter[AES_BLOCK_SIZE]; + byte initialCounter[AES_BLOCK_SIZE]; + byte x[AES_BLOCK_SIZE]; + byte scratch[AES_BLOCK_SIZE]; + + /* Noticed different optimization levels treated head of array different. + Some cases was stack pointer plus offset others was a regester containing + address. To make uniform for passing in to inline assembly code am using + pointers to the head of each local array. + */ + byte* ctr = counter; + byte* iCtr = initialCounter; + byte* xPt = x; + byte* sPt = scratch; + byte* keyPt; /* pointer to handle pointer advencment */ + + XMEMSET(initialCounter, 0, AES_BLOCK_SIZE); + if (ivSz == GCM_NONCE_MID_SZ) { + XMEMCPY(initialCounter, iv, ivSz); + initialCounter[AES_BLOCK_SIZE - 1] = 1; + } + else { + GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE); + GMULT(initialCounter, aes->H); + } + XMEMCPY(counter, initialCounter, AES_BLOCK_SIZE); + + + /* Hash in the Additional Authentication Data */ + XMEMSET(x, 0, AES_BLOCK_SIZE); + if (authInSz != 0 && authIn != NULL) { + blocks = authInSz / AES_BLOCK_SIZE; + partial = authInSz % AES_BLOCK_SIZE; + /* do as many blocks as possible */ + while (blocks--) { + xorbuf(x, authIn, AES_BLOCK_SIZE); + GMULT(x, aes->H); + authIn += AES_BLOCK_SIZE; + } + if (partial != 0) { + XMEMSET(scratch, 0, AES_BLOCK_SIZE); + XMEMCPY(scratch, authIn, partial); + xorbuf(x, scratch, AES_BLOCK_SIZE); + GMULT(x, aes->H); + } + } + + /* do as many blocks as possible */ + blocks = sz / AES_BLOCK_SIZE; + partial = sz % AES_BLOCK_SIZE; + if (blocks > 0) { + keyPt = (byte*)aes->key; + __asm__ __volatile__ ( + "MOV w11, %w[blocks] \n" + "LD1 {v13.2d}, [%[ctr]] \n" + + "#Create vector with the value 1 \n" + "MOVI v14.16b, #1 \n" + "USHR v14.2d, v14.2d, #56 \n" + "EOR v22.16b, v22.16b, v22.16b \n" + "EXT v14.16b, v14.16b, v22.16b, #8\n" + + + /*************************************************** + Get first out block for GHASH using AES encrypt + ***************************************************/ + "REV64 v13.16b, v13.16b \n" /* network order */ + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "ADD v13.2d, v13.2d, v14.2d \n" /* add 1 to counter */ + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "REV64 v13.16b, v13.16b \n" /* revert from network order */ + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "MOV v0.16b, v13.16b \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "LD1 {v16.2d}, %[inY] \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "SUB w11, w11, #1 \n" + "LD1 {v9.2d-v11.2d}, [%[Key]], #48\n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "MOVI v23.16b, #0x87 \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "LD1 {v17.2d}, [%[inX]] \n" /* account for additional data */ + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "USHR v23.2d, v23.2d, #56 \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "LD1 {v12.2d}, [%[input]], #16 \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v10.16b \n" + "EOR v0.16b, v0.16b, v11.16b \n" + + "EOR v0.16b, v0.16b, v12.16b \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + "MOV v15.16b, v0.16b \n" + + "CBZ w11, 1f \n" /* only one block jump to final GHASH */ + + "LD1 {v12.2d}, [%[input]], #16 \n" + + /*************************************************** + Interweave GHASH and encrypt if more then 1 block + ***************************************************/ + "2: \n" + "REV64 v13.16b, v13.16b \n" /* network order */ + "EOR v15.16b, v17.16b, v15.16b \n" + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "ADD v13.2d, v13.2d, v14.2d \n" /* add 1 to counter */ + "RBIT v15.16b, v15.16b \n" /* v15 is encrypted out block (c) */ + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "REV64 v13.16b, v13.16b \n" /* revert from network order */ + "PMULL v18.1q, v15.1d, v16.1d \n" /* a0 * b0 = C */ + "MOV v0.16b, v13.16b \n" + "PMULL2 v19.1q, v15.2d, v16.2d \n" /* a1 * b1 = D */ + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" /* b0b1 -> b1b0 */ + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL v21.1q, v15.1d, v20.1d \n" /* a0 * b1 = E */ + "PMULL2 v20.1q, v15.2d, v20.2d \n" /* a1 * b0 = F */ + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v20.16b, v20.16b, v21.16b \n" /* F ^ E */ + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */ + "SUB w11, w11, #1 \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v18.16b, v18.16b, v21.16b \n" /* low 128 bits in v3 */ + "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */ + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v19.16b, v19.16b, v21.16b \n" /* high 128 bits in v4 */ + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* v22 is all 0's */ + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v19.16b, v19.16b, v21.16b \n" + "AESE v0.16b, v10.16b \n" + "EXT v21.16b, v22.16b, v20.16b, #8 \n" + "EOR v0.16b, v0.16b, v11.16b \n" + "EOR v18.16b, v18.16b, v21.16b \n" + + "EOR v0.16b, v0.16b, v12.16b \n" + "PMULL v20.1q, v19.1d, v23.1d \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + "EOR v19.16b, v18.16b, v20.16b \n" + "MOV v15.16b, v0.16b \n" + "RBIT v17.16b, v19.16b \n" + + "CBZ w11, 1f \n" + "LD1 {v12.2d}, [%[input]], #16 \n" + "B 2b \n" + + /*************************************************** + GHASH on last block + ***************************************************/ + "1: \n" + "EOR v15.16b, v17.16b, v15.16b \n" + "RBIT v15.16b, v15.16b \n" /* v15 is encrypted out block */ + + "#store current AES counter value \n" + "ST1 {v13.2d}, [%[ctrOut]] \n" + "PMULL v18.1q, v15.1d, v16.1d \n" /* a0 * b0 = C */ + "PMULL2 v19.1q, v15.2d, v16.2d \n" /* a1 * b1 = D */ + "EXT v20.16b, v16.16b, v16.16b, #8 \n" /* b0b1 -> b1b0 */ + "PMULL v21.1q, v15.1d, v20.1d \n" /* a0 * b1 = E */ + "PMULL2 v20.1q, v15.2d, v20.2d \n" /* a1 * b0 = F */ + "EOR v20.16b, v20.16b, v21.16b \n" /* F ^ E */ + "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */ + "EOR v18.16b, v18.16b, v21.16b \n" /* low 128 bits in v3 */ + "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */ + "EOR v19.16b, v19.16b, v21.16b \n" /* high 128 bits in v4 */ + + "#Reduce product from multiplication \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* v22 is all 0's */ + "EOR v19.16b, v19.16b, v21.16b \n" + "EXT v21.16b, v22.16b, v20.16b, #8 \n" + "EOR v18.16b, v18.16b, v21.16b \n" + "PMULL v20.1q, v19.1d, v23.1d \n" + "EOR v19.16b, v18.16b, v20.16b \n" + "RBIT v17.16b, v19.16b \n" + "STR q17, [%[xOut]] \n" /* GHASH x value for partial blocks */ + + :[out] "=r" (out), "=r" (keyPt), [ctrOut] "=r" (ctr), "=r" (in) + ,[xOut] "=r" (xPt),"=m" (aes->H) + :"0" (out), [Key] "1" (keyPt), [ctr] "2" (ctr), [blocks] "r" (blocks), + [input] "3" (in) + ,[inX] "4" (xPt), [inY] "m" (aes->H) + : "cc", "w11", "v0", "v1", "v2", "v3", "v4", "v5", + "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14" + ,"v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24" + ); + } + + /* take care of partial block sizes leftover */ + if (partial != 0) { + IncrementGcmCounter(counter); + wc_AesEncrypt(aes, counter, scratch); + xorbuf(scratch, in, partial); + XMEMCPY(out, scratch, partial); + + XMEMSET(scratch, 0, AES_BLOCK_SIZE); + XMEMCPY(scratch, out, partial); + xorbuf(x, scratch, AES_BLOCK_SIZE); + GMULT(x, aes->H); + } + + /* Hash in the lengths of A and C in bits */ + XMEMSET(scratch, 0, AES_BLOCK_SIZE); + FlattenSzInBits(&scratch[0], authInSz); + FlattenSzInBits(&scratch[8], sz); + xorbuf(x, scratch, AES_BLOCK_SIZE); + XMEMCPY(scratch, x, AES_BLOCK_SIZE); + + keyPt = (byte*)aes->key; + __asm__ __volatile__ ( + + "LD1 {v16.16b}, [%[tag]] \n" + "LD1 {v17.16b}, %[h] \n" + "RBIT v16.16b, v16.16b \n" + + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + "PMULL v18.1q, v16.1d, v17.1d \n" /* a0 * b0 = C */ + "PMULL2 v19.1q, v16.2d, v17.2d \n" /* a1 * b1 = D */ + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "EXT v20.16b, v17.16b, v17.16b, #8 \n" /* b0b1 -> b1b0 */ + "LD1 {v9.2d-v11.2d}, [%[Key]], #48\n" + "PMULL v21.1q, v16.1d, v20.1d \n" /* a0 * b1 = E */ + "PMULL2 v20.1q, v16.2d, v20.2d \n" /* a1 * b0 = F */ + "LD1 {v0.2d}, [%[ctr]] \n" + + "#Set a register to all 0s using EOR \n" + "EOR v22.16b, v22.16b, v22.16b \n" + "EOR v20.16b, v20.16b, v21.16b \n" /* F ^ E */ + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */ + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v18.16b, v18.16b, v21.16b \n" /* low 128 bits in v3 */ + "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */ + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v19.16b, v19.16b, v21.16b \n" /* high 128 bits in v4 */ + "MOVI v23.16b, #0x87 \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "USHR v23.2d, v23.2d, #56 \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v21.16b, v20.16b, v22.16b, #8 \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v19.16b, v19.16b, v21.16b \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v21.16b, v22.16b, v20.16b, #8 \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v18.16b, v18.16b, v21.16b \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL v20.1q, v19.1d, v23.1d \n" + "EOR v19.16b, v18.16b, v20.16b \n" + "AESE v0.16b, v10.16b \n" + "RBIT v19.16b, v19.16b \n" + "EOR v0.16b, v0.16b, v11.16b \n" + "EOR v19.16b, v19.16b, v0.16b \n" + "STR q19, [%[out]] \n" + + :[out] "=r" (sPt), "=r" (keyPt), "=r" (iCtr) + :[tag] "0" (sPt), [Key] "1" (keyPt), + [ctr] "2" (iCtr) , [h] "m" (aes->H) + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", + "v6", "v7", "v8", "v9", "v10","v11","v12","v13","v14", + "v15", "v16", "v17","v18", "v19", "v20","v21","v22","v23","v24" + ); + + + if (authTagSz > AES_BLOCK_SIZE) { + XMEMCPY(authTag, scratch, AES_BLOCK_SIZE); + } + else { + /* authTagSz can be smaller than AES_BLOCK_SIZE */ + XMEMCPY(authTag, scratch, authTagSz); + } + return 0; +} +#endif /* WOLFSSL_AES_128 */ + +#ifdef WOLFSSL_AES_192 +/* internal function : see wc_AesGcmEncrypt */ +static int Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + word32 blocks; + word32 partial; + byte counter[AES_BLOCK_SIZE]; + byte initialCounter[AES_BLOCK_SIZE]; + byte x[AES_BLOCK_SIZE]; + byte scratch[AES_BLOCK_SIZE]; + + /* Noticed different optimization levels treated head of array different. + Some cases was stack pointer plus offset others was a regester containing + address. To make uniform for passing in to inline assembly code am using + pointers to the head of each local array. + */ + byte* ctr = counter; + byte* iCtr = initialCounter; + byte* xPt = x; + byte* sPt = scratch; + byte* keyPt; /* pointer to handle pointer advencment */ + + XMEMSET(initialCounter, 0, AES_BLOCK_SIZE); + if (ivSz == GCM_NONCE_MID_SZ) { + XMEMCPY(initialCounter, iv, ivSz); + initialCounter[AES_BLOCK_SIZE - 1] = 1; + } + else { + GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE); + GMULT(initialCounter, aes->H); + } + XMEMCPY(counter, initialCounter, AES_BLOCK_SIZE); + + + /* Hash in the Additional Authentication Data */ + XMEMSET(x, 0, AES_BLOCK_SIZE); + if (authInSz != 0 && authIn != NULL) { + blocks = authInSz / AES_BLOCK_SIZE; + partial = authInSz % AES_BLOCK_SIZE; + /* do as many blocks as possible */ + while (blocks--) { + xorbuf(x, authIn, AES_BLOCK_SIZE); + GMULT(x, aes->H); + authIn += AES_BLOCK_SIZE; + } + if (partial != 0) { + XMEMSET(scratch, 0, AES_BLOCK_SIZE); + XMEMCPY(scratch, authIn, partial); + xorbuf(x, scratch, AES_BLOCK_SIZE); + GMULT(x, aes->H); + } + } + + /* do as many blocks as possible */ + blocks = sz / AES_BLOCK_SIZE; + partial = sz % AES_BLOCK_SIZE; + if (blocks > 0) { + keyPt = (byte*)aes->key; + __asm__ __volatile__ ( + "MOV w11, %w[blocks] \n" + "LD1 {v13.2d}, [%[ctr]] \n" + + "#Create vector with the value 1 \n" + "MOVI v14.16b, #1 \n" + "USHR v14.2d, v14.2d, #56 \n" + "EOR v22.16b, v22.16b, v22.16b \n" + "EXT v14.16b, v14.16b, v22.16b, #8\n" + + + /*************************************************** + Get first out block for GHASH using AES encrypt + ***************************************************/ + "REV64 v13.16b, v13.16b \n" /* network order */ + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "ADD v13.2d, v13.2d, v14.2d \n" /* add 1 to counter */ + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "REV64 v13.16b, v13.16b \n" /* revert from network order */ + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "MOV v0.16b, v13.16b \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "LD1 {v16.2d}, %[inY] \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "SUB w11, w11, #1 \n" + "LD1 {v9.2d-v11.2d}, [%[Key]], #48\n" + "LD1 {v30.2d-v31.2d}, [%[Key]], #32\n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "MOVI v23.16b, #0x87 \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "LD1 {v17.2d}, [%[inX]] \n" /* account for additional data */ + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "USHR v23.2d, v23.2d, #56 \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "LD1 {v12.2d}, [%[input]], #16 \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v30.16b \n" + "EOR v0.16b, v0.16b, v31.16b \n" + + "EOR v0.16b, v0.16b, v12.16b \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + "MOV v15.16b, v0.16b \n" + + "CBZ w11, 1f \n" /* only one block jump to final GHASH */ + "LD1 {v12.2d}, [%[input]], #16 \n" + + /*************************************************** + Interweave GHASH and encrypt if more then 1 block + ***************************************************/ + "2: \n" + "REV64 v13.16b, v13.16b \n" /* network order */ + "EOR v15.16b, v17.16b, v15.16b \n" + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "ADD v13.2d, v13.2d, v14.2d \n" /* add 1 to counter */ + "RBIT v15.16b, v15.16b \n" /* v15 is encrypted out block (c) */ + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "REV64 v13.16b, v13.16b \n" /* revert from network order */ + "PMULL v18.1q, v15.1d, v16.1d \n" /* a0 * b0 = C */ + "MOV v0.16b, v13.16b \n" + "PMULL2 v19.1q, v15.2d, v16.2d \n" /* a1 * b1 = D */ + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" /* b0b1 -> b1b0 */ + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL v21.1q, v15.1d, v20.1d \n" /* a0 * b1 = E */ + "PMULL2 v20.1q, v15.2d, v20.2d \n" /* a1 * b0 = F */ + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v20.16b, v20.16b, v21.16b \n" /* F ^ E */ + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */ + "SUB w11, w11, #1 \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v18.16b, v18.16b, v21.16b \n" /* low 128 bits in v3 */ + "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */ + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v19.16b, v19.16b, v21.16b \n" /* high 128 bits in v4 */ + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* v22 is all 0's */ + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v19.16b, v19.16b, v21.16b \n" + "AESE v0.16b, v30.16b \n" + "EXT v21.16b, v22.16b, v20.16b, #8 \n" + "EOR v0.16b, v0.16b, v31.16b \n" + "EOR v18.16b, v18.16b, v21.16b \n" + + "EOR v0.16b, v0.16b, v12.16b \n" + "PMULL v20.1q, v19.1d, v23.1d \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + "EOR v19.16b, v18.16b, v20.16b \n" + "MOV v15.16b, v0.16b \n" + "RBIT v17.16b, v19.16b \n" + + "CBZ w11, 1f \n" + "LD1 {v12.2d}, [%[input]], #16 \n" + "B 2b \n" + + /*************************************************** + GHASH on last block + ***************************************************/ + "1: \n" + "EOR v15.16b, v17.16b, v15.16b \n" + "RBIT v15.16b, v15.16b \n" /* v15 is encrypted out block */ + + "#store current AES counter value \n" + "ST1 {v13.2d}, [%[ctrOut]] \n" + "PMULL v18.1q, v15.1d, v16.1d \n" /* a0 * b0 = C */ + "PMULL2 v19.1q, v15.2d, v16.2d \n" /* a1 * b1 = D */ + "EXT v20.16b, v16.16b, v16.16b, #8 \n" /* b0b1 -> b1b0 */ + "PMULL v21.1q, v15.1d, v20.1d \n" /* a0 * b1 = E */ + "PMULL2 v20.1q, v15.2d, v20.2d \n" /* a1 * b0 = F */ + "EOR v20.16b, v20.16b, v21.16b \n" /* F ^ E */ + "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */ + "EOR v18.16b, v18.16b, v21.16b \n" /* low 128 bits in v3 */ + "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */ + "EOR v19.16b, v19.16b, v21.16b \n" /* high 128 bits in v4 */ + + "#Reduce product from multiplication \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* v22 is all 0's */ + "EOR v19.16b, v19.16b, v21.16b \n" + "EXT v21.16b, v22.16b, v20.16b, #8 \n" + "EOR v18.16b, v18.16b, v21.16b \n" + "PMULL v20.1q, v19.1d, v23.1d \n" + "EOR v19.16b, v18.16b, v20.16b \n" + "RBIT v17.16b, v19.16b \n" + "STR q17, [%[xOut]] \n" /* GHASH x value for partial blocks */ + + :[out] "=r" (out), "=r" (keyPt), [ctrOut] "=r" (ctr), "=r" (in) + ,[xOut] "=r" (xPt),"=m" (aes->H) + :"0" (out), [Key] "1" (keyPt), [ctr] "2" (ctr), [blocks] "r" (blocks), + [input] "3" (in) + ,[inX] "4" (xPt), [inY] "m" (aes->H) + : "cc", "w11", "v0", "v1", "v2", "v3", "v4", "v5", + "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14" + ,"v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", + "v24","v25","v26","v27","v28","v29","v30","v31" + ); + } + + /* take care of partial block sizes leftover */ + if (partial != 0) { + IncrementGcmCounter(counter); + wc_AesEncrypt(aes, counter, scratch); + xorbuf(scratch, in, partial); + XMEMCPY(out, scratch, partial); + + XMEMSET(scratch, 0, AES_BLOCK_SIZE); + XMEMCPY(scratch, out, partial); + xorbuf(x, scratch, AES_BLOCK_SIZE); + GMULT(x, aes->H); + } + + /* Hash in the lengths of A and C in bits */ + XMEMSET(scratch, 0, AES_BLOCK_SIZE); + FlattenSzInBits(&scratch[0], authInSz); + FlattenSzInBits(&scratch[8], sz); + xorbuf(x, scratch, AES_BLOCK_SIZE); + XMEMCPY(scratch, x, AES_BLOCK_SIZE); + + keyPt = (byte*)aes->key; + __asm__ __volatile__ ( + + "LD1 {v16.16b}, [%[tag]] \n" + "LD1 {v17.16b}, %[h] \n" + "RBIT v16.16b, v16.16b \n" + + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + "PMULL v18.1q, v16.1d, v17.1d \n" /* a0 * b0 = C */ + "PMULL2 v19.1q, v16.2d, v17.2d \n" /* a1 * b1 = D */ + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "EXT v20.16b, v17.16b, v17.16b, #8 \n" /* b0b1 -> b1b0 */ + "LD1 {v9.2d-v11.2d}, [%[Key]], #48\n" + "LD1 {v30.2d-v31.2d}, [%[Key]], #32\n" + "PMULL v21.1q, v16.1d, v20.1d \n" /* a0 * b1 = E */ + "PMULL2 v20.1q, v16.2d, v20.2d \n" /* a1 * b0 = F */ + "LD1 {v0.2d}, [%[ctr]] \n" + + "#Set a register to all 0s using EOR \n" + "EOR v22.16b, v22.16b, v22.16b \n" + "EOR v20.16b, v20.16b, v21.16b \n" /* F ^ E */ + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */ + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v18.16b, v18.16b, v21.16b \n" /* low 128 bits in v3 */ + "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */ + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v19.16b, v19.16b, v21.16b \n" /* high 128 bits in v4 */ + "MOVI v23.16b, #0x87 \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "USHR v23.2d, v23.2d, #56 \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v21.16b, v20.16b, v22.16b, #8 \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v19.16b, v19.16b, v21.16b \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v21.16b, v22.16b, v20.16b, #8 \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v18.16b, v18.16b, v21.16b \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL v20.1q, v19.1d, v23.1d \n" + "EOR v19.16b, v18.16b, v20.16b \n" + "AESE v0.16b, v30.16b \n" + "RBIT v19.16b, v19.16b \n" + "EOR v0.16b, v0.16b, v31.16b \n" + "EOR v19.16b, v19.16b, v0.16b \n" + "STR q19, [%[out]] \n" + + :[out] "=r" (sPt), "=r" (keyPt), "=r" (iCtr) + :[tag] "0" (sPt), [Key] "1" (keyPt), + [ctr] "2" (iCtr) , [h] "m" (aes->H) + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", + "v6", "v7", "v8", "v9", "v10","v11","v12","v13","v14", + "v15", "v16", "v17","v18", "v19", "v20","v21","v22","v23","v24" + ); + + + if (authTagSz > AES_BLOCK_SIZE) { + XMEMCPY(authTag, scratch, AES_BLOCK_SIZE); + } + else { + /* authTagSz can be smaller than AES_BLOCK_SIZE */ + XMEMCPY(authTag, scratch, authTagSz); + } + + return 0; +} +#endif /* WOLFSSL_AES_192 */ + +#ifdef WOLFSSL_AES_256 +/* internal function : see wc_AesGcmEncrypt */ +static int Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + word32 blocks; + word32 partial; + byte counter[AES_BLOCK_SIZE]; + byte initialCounter[AES_BLOCK_SIZE]; + byte x[AES_BLOCK_SIZE]; + byte scratch[AES_BLOCK_SIZE]; + + /* Noticed different optimization levels treated head of array different. + Some cases was stack pointer plus offset others was a regester containing + address. To make uniform for passing in to inline assembly code am using + pointers to the head of each local array. + */ + byte* ctr = counter; + byte* iCtr = initialCounter; + byte* xPt = x; + byte* sPt = scratch; + byte* keyPt; /* pointer to handle pointer advencment */ + + XMEMSET(initialCounter, 0, AES_BLOCK_SIZE); + if (ivSz == GCM_NONCE_MID_SZ) { + XMEMCPY(initialCounter, iv, ivSz); + initialCounter[AES_BLOCK_SIZE - 1] = 1; + } + else { + GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE); + GMULT(initialCounter, aes->H); + } + XMEMCPY(counter, initialCounter, AES_BLOCK_SIZE); + + + /* Hash in the Additional Authentication Data */ + XMEMSET(x, 0, AES_BLOCK_SIZE); + if (authInSz != 0 && authIn != NULL) { + blocks = authInSz / AES_BLOCK_SIZE; + partial = authInSz % AES_BLOCK_SIZE; + /* do as many blocks as possible */ + while (blocks--) { + xorbuf(x, authIn, AES_BLOCK_SIZE); + GMULT(x, aes->H); + authIn += AES_BLOCK_SIZE; + } + if (partial != 0) { + XMEMSET(scratch, 0, AES_BLOCK_SIZE); + XMEMCPY(scratch, authIn, partial); + xorbuf(x, scratch, AES_BLOCK_SIZE); + GMULT(x, aes->H); + } + } + + /* do as many blocks as possible */ + blocks = sz / AES_BLOCK_SIZE; + partial = sz % AES_BLOCK_SIZE; + if (blocks > 0) { + keyPt = (byte*)aes->key; + __asm__ __volatile__ ( + "MOV w11, %w[blocks] \n" + "LD1 {v13.2d}, [%[ctr]] \n" + + "#Create vector with the value 1 \n" + "MOVI v14.16b, #1 \n" + "USHR v14.2d, v14.2d, #56 \n" + "EOR v22.16b, v22.16b, v22.16b \n" + "EXT v14.16b, v14.16b, v22.16b, #8\n" + + + /*************************************************** + Get first out block for GHASH using AES encrypt + ***************************************************/ + "REV64 v13.16b, v13.16b \n" /* network order */ + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "ADD v13.2d, v13.2d, v14.2d \n" /* add 1 to counter */ + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "REV64 v13.16b, v13.16b \n" /* revert from network order */ + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "MOV v0.16b, v13.16b \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "LD1 {v16.2d}, %[inY] \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "SUB w11, w11, #1 \n" + "LD1 {v9.2d-v11.2d}, [%[Key]], #48\n" + "LD1 {v28.2d-v31.2d}, [%[Key]], #64\n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "MOVI v23.16b, #0x87 \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "LD1 {v17.2d}, [%[inX]] \n" /* account for additional data */ + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "USHR v23.2d, v23.2d, #56 \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "LD1 {v12.2d}, [%[input]], #16 \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v28.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v29.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v30.16b \n" + "EOR v0.16b, v0.16b, v31.16b \n" + + "EOR v0.16b, v0.16b, v12.16b \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + "MOV v15.16b, v0.16b \n" + + "CBZ w11, 1f \n" /* only one block jump to final GHASH */ + "LD1 {v12.2d}, [%[input]], #16 \n" + + /*************************************************** + Interweave GHASH and encrypt if more then 1 block + ***************************************************/ + "2: \n" + "REV64 v13.16b, v13.16b \n" /* network order */ + "EOR v15.16b, v17.16b, v15.16b \n" + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "ADD v13.2d, v13.2d, v14.2d \n" /* add 1 to counter */ + "RBIT v15.16b, v15.16b \n" /* v15 is encrypted out block (c) */ + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "REV64 v13.16b, v13.16b \n" /* revert from network order */ + "PMULL v18.1q, v15.1d, v16.1d \n" /* a0 * b0 = C */ + "MOV v0.16b, v13.16b \n" + "PMULL2 v19.1q, v15.2d, v16.2d \n" /* a1 * b1 = D */ + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" /* b0b1 -> b1b0 */ + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL v21.1q, v15.1d, v20.1d \n" /* a0 * b1 = E */ + "PMULL2 v20.1q, v15.2d, v20.2d \n" /* a1 * b0 = F */ + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v20.16b, v20.16b, v21.16b \n" /* F ^ E */ + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */ + "SUB w11, w11, #1 \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v18.16b, v18.16b, v21.16b \n" /* low 128 bits in v3 */ + "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */ + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v19.16b, v19.16b, v21.16b \n" /* high 128 bits in v4 */ + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* v22 is all 0's */ + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v28.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v29.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v19.16b, v19.16b, v21.16b \n" + "AESE v0.16b, v30.16b \n" + "EXT v21.16b, v22.16b, v20.16b, #8 \n" + "EOR v0.16b, v0.16b, v31.16b \n" + "EOR v18.16b, v18.16b, v21.16b \n" + + "EOR v0.16b, v0.16b, v12.16b \n" + "PMULL v20.1q, v19.1d, v23.1d \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + "EOR v19.16b, v18.16b, v20.16b \n" + "MOV v15.16b, v0.16b \n" + "RBIT v17.16b, v19.16b \n" + + "CBZ w11, 1f \n" + "LD1 {v12.2d}, [%[input]], #16 \n" + "B 2b \n" + + /*************************************************** + GHASH on last block + ***************************************************/ + "1: \n" + "EOR v15.16b, v17.16b, v15.16b \n" + "RBIT v15.16b, v15.16b \n" /* v15 is encrypted out block */ + + "#store current AES counter value \n" + "ST1 {v13.2d}, [%[ctrOut]] \n" + "PMULL v18.1q, v15.1d, v16.1d \n" /* a0 * b0 = C */ + "PMULL2 v19.1q, v15.2d, v16.2d \n" /* a1 * b1 = D */ + "EXT v20.16b, v16.16b, v16.16b, #8 \n" /* b0b1 -> b1b0 */ + "PMULL v21.1q, v15.1d, v20.1d \n" /* a0 * b1 = E */ + "PMULL2 v20.1q, v15.2d, v20.2d \n" /* a1 * b0 = F */ + "EOR v20.16b, v20.16b, v21.16b \n" /* F ^ E */ + "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */ + "EOR v18.16b, v18.16b, v21.16b \n" /* low 128 bits in v3 */ + "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */ + "EOR v19.16b, v19.16b, v21.16b \n" /* high 128 bits in v4 */ + + "#Reduce product from multiplication \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* v22 is all 0's */ + "EOR v19.16b, v19.16b, v21.16b \n" + "EXT v21.16b, v22.16b, v20.16b, #8 \n" + "EOR v18.16b, v18.16b, v21.16b \n" + "PMULL v20.1q, v19.1d, v23.1d \n" + "EOR v19.16b, v18.16b, v20.16b \n" + "RBIT v17.16b, v19.16b \n" + "STR q17, [%[xOut]] \n" /* GHASH x value for partial blocks */ + + :[out] "=r" (out), "=r" (keyPt), [ctrOut] "=r" (ctr), "=r" (in) + ,[xOut] "=r" (xPt),"=m" (aes->H) + :"0" (out), [Key] "1" (keyPt), [ctr] "2" (ctr), [blocks] "r" (blocks), + [input] "3" (in) + ,[inX] "4" (xPt), [inY] "m" (aes->H) + : "cc", "w11", "v0", "v1", "v2", "v3", "v4", "v5", + "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14" + ,"v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24" + ); + } + + /* take care of partial block sizes leftover */ + if (partial != 0) { + IncrementGcmCounter(counter); + wc_AesEncrypt(aes, counter, scratch); + xorbuf(scratch, in, partial); + XMEMCPY(out, scratch, partial); + + XMEMSET(scratch, 0, AES_BLOCK_SIZE); + XMEMCPY(scratch, out, partial); + xorbuf(x, scratch, AES_BLOCK_SIZE); + GMULT(x, aes->H); + } + + /* Hash in the lengths of A and C in bits */ + XMEMSET(scratch, 0, AES_BLOCK_SIZE); + FlattenSzInBits(&scratch[0], authInSz); + FlattenSzInBits(&scratch[8], sz); + xorbuf(x, scratch, AES_BLOCK_SIZE); + XMEMCPY(scratch, x, AES_BLOCK_SIZE); + + keyPt = (byte*)aes->key; + __asm__ __volatile__ ( + + "LD1 {v16.16b}, [%[tag]] \n" + "LD1 {v17.16b}, %[h] \n" + "RBIT v16.16b, v16.16b \n" + + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + "PMULL v18.1q, v16.1d, v17.1d \n" /* a0 * b0 = C */ + "PMULL2 v19.1q, v16.2d, v17.2d \n" /* a1 * b1 = D */ + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "EXT v20.16b, v17.16b, v17.16b, #8 \n" /* b0b1 -> b1b0 */ + "LD1 {v9.2d-v11.2d}, [%[Key]], #48\n" + "LD1 {v28.2d-v31.2d}, [%[Key]], #64\n" + "PMULL v21.1q, v16.1d, v20.1d \n" /* a0 * b1 = E */ + "PMULL2 v20.1q, v16.2d, v20.2d \n" /* a1 * b0 = F */ + "LD1 {v0.2d}, [%[ctr]] \n" + + "#Set a register to all 0s using EOR \n" + "EOR v22.16b, v22.16b, v22.16b \n" + "EOR v20.16b, v20.16b, v21.16b \n" /* F ^ E */ + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */ + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v18.16b, v18.16b, v21.16b \n" /* low 128 bits in v3 */ + "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */ + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v19.16b, v19.16b, v21.16b \n" /* high 128 bits in v4 */ + "MOVI v23.16b, #0x87 \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "USHR v23.2d, v23.2d, #56 \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v21.16b, v20.16b, v22.16b, #8 \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v19.16b, v19.16b, v21.16b \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v21.16b, v22.16b, v20.16b, #8 \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v18.16b, v18.16b, v21.16b \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v28.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v29.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL v20.1q, v19.1d, v23.1d \n" + "EOR v19.16b, v18.16b, v20.16b \n" + "AESE v0.16b, v30.16b \n" + "RBIT v19.16b, v19.16b \n" + "EOR v0.16b, v0.16b, v31.16b \n" + "EOR v19.16b, v19.16b, v0.16b \n" + "STR q19, [%[out]] \n" + + :[out] "=r" (sPt), "=r" (keyPt), "=r" (iCtr) + :[tag] "0" (sPt), [Key] "1" (keyPt), + [ctr] "2" (iCtr) , [h] "m" (aes->H) + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", + "v6", "v7", "v8", "v9", "v10","v11","v12","v13","v14", + "v15", "v16", "v17","v18", "v19", "v20","v21","v22","v23", + "v24","v25","v26","v27","v28","v29","v30","v31" + ); + + + if (authTagSz > AES_BLOCK_SIZE) { + XMEMCPY(authTag, scratch, AES_BLOCK_SIZE); + } + else { + /* authTagSz can be smaller than AES_BLOCK_SIZE */ + XMEMCPY(authTag, scratch, authTagSz); + } + + return 0; +} +#endif /* WOLFSSL_AES_256 */ + + +/* aarch64 with PMULL and PMULL2 + * Encrypt and tag data using AES with GCM mode. + * aes: Aes structure having already been set with set key function + * out: encrypted data output buffer + * in: plain text input buffer + * sz: size of plain text and out buffer + * iv: initialization vector + * ivSz: size of iv buffer + * authTag: buffer to hold tag + * authTagSz: size of tag buffer + * authIn: additional data buffer + * authInSz: size of additional data buffer + * + * Notes: + * GHASH multiplication based from Algorithm 1 from Intel GCM white paper. + * "Carry-Less Multiplication and Its Usage for Computing the GCM Mode" + * + * GHASH reduction Based from White Paper "Implementing GCM on ARMv8" + * by Conrado P.L. Gouvea and Julio Lopez reduction on 256bit value using + * Algorithm 5 + */ +int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + /* sanity checks */ + if (aes == NULL || (iv == NULL && ivSz > 0) || + (authTag == NULL) || + (authIn == NULL && authInSz > 0) || + (in == NULL && sz > 0) || + (out == NULL && sz > 0)) { + WOLFSSL_MSG("a NULL parameter passed in when size is larger than 0"); + return BAD_FUNC_ARG; + } + + if (authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ || authTagSz > AES_BLOCK_SIZE) { + WOLFSSL_MSG("GcmEncrypt authTagSz error"); + return BAD_FUNC_ARG; + } + + switch (aes->rounds) { +#ifdef WOLFSSL_AES_128 + case 10: + return Aes128GcmEncrypt(aes, out, in, sz, iv, ivSz, + authTag, authTagSz, authIn, authInSz); +#endif +#ifdef WOLFSSL_AES_192 + case 12: + return Aes192GcmEncrypt(aes, out, in, sz, iv, ivSz, + authTag, authTagSz, authIn, authInSz); +#endif +#ifdef WOLFSSL_AES_256 + case 14: + return Aes256GcmEncrypt(aes, out, in, sz, iv, ivSz, + authTag, authTagSz, authIn, authInSz); +#endif + default: + WOLFSSL_MSG("AES-GCM invalid round number"); + return BAD_FUNC_ARG; + } +} + + +#ifdef HAVE_AES_DECRYPT +/* + * Check tag and decrypt data using AES with GCM mode. + * aes: Aes structure having already been set with set key function + * out: decrypted data output buffer + * in: cipher text buffer + * sz: size of plain text and out buffer + * iv: initialization vector + * ivSz: size of iv buffer + * authTag: buffer holding tag + * authTagSz: size of tag buffer + * authIn: additional data buffer + * authInSz: size of additional data buffer + */ +int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + word32 blocks = sz / AES_BLOCK_SIZE; + word32 partial = sz % AES_BLOCK_SIZE; + const byte* c = in; + byte* p = out; + byte counter[AES_BLOCK_SIZE]; + byte initialCounter[AES_BLOCK_SIZE]; + byte *ctr ; + byte scratch[AES_BLOCK_SIZE]; + + ctr = counter ; + + /* sanity checks */ + if (aes == NULL || (iv == NULL && ivSz > 0) || + (authTag == NULL) || + (authIn == NULL && authInSz > 0) || + (in == NULL && sz > 0) || + (out == NULL && sz > 0)) { + WOLFSSL_MSG("a NULL parameter passed in when size is larger than 0"); + return BAD_FUNC_ARG; + } + + XMEMSET(initialCounter, 0, AES_BLOCK_SIZE); + if (ivSz == GCM_NONCE_MID_SZ) { + XMEMCPY(initialCounter, iv, ivSz); + initialCounter[AES_BLOCK_SIZE - 1] = 1; + } + else { + GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE); + GMULT(initialCounter, aes->H); + } + XMEMCPY(ctr, initialCounter, AES_BLOCK_SIZE); + + /* Calculate the authTag again using the received auth data and the + * cipher text. */ + { + byte Tprime[AES_BLOCK_SIZE]; + byte EKY0[AES_BLOCK_SIZE]; + + GHASH(aes, authIn, authInSz, in, sz, Tprime, sizeof(Tprime)); + GMULT(Tprime, aes->H); + wc_AesEncrypt(aes, ctr, EKY0); + xorbuf(Tprime, EKY0, sizeof(Tprime)); + + if (ConstantCompare(authTag, Tprime, authTagSz) != 0) { + return AES_GCM_AUTH_E; + } + } + + /* do as many blocks as possible */ + if (blocks > 0) { + /* pointer needed because it is incremented when read, causing + * an issue with call to encrypt/decrypt leftovers */ + byte* keyPt = (byte*)aes->key; + switch(aes->rounds) { +#ifdef WOLFSSL_AES_128 + case 10: /* AES 128 BLOCK */ + __asm__ __volatile__ ( + "MOV w11, %w[blocks] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + + "#Create vector with the value 1 \n" + "MOVI v14.16b, #1 \n" + "USHR v14.2d, v14.2d, #56 \n" + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "EOR v13.16b, v13.16b, v13.16b \n" + "EXT v14.16b, v14.16b, v13.16b, #8 \n" + + "LD1 {v9.2d-v11.2d}, [%[Key]], #48 \n" + "LD1 {v12.2d}, [%[ctr]] \n" + "LD1 {v13.2d}, [%[input]], #16 \n" + + "1: \n" + "REV64 v12.16b, v12.16b \n" /* network order */ + "EXT v12.16b, v12.16b, v12.16b, #8 \n" + "ADD v12.2d, v12.2d, v14.2d \n" /* add 1 to counter */ + "EXT v12.16b, v12.16b, v12.16b, #8 \n" + "REV64 v12.16b, v12.16b \n" /* revert from network order */ + "MOV v0.16b, v12.16b \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "SUB w11, w11, #1 \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v10.16b \n" + "EOR v0.16b, v0.16b, v11.16b \n" + + "EOR v0.16b, v0.16b, v13.16b \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + + "CBZ w11, 2f \n" + "LD1 {v13.2d}, [%[input]], #16 \n" + "B 1b \n" + + "2: \n" + "#store current counter value at the end \n" + "ST1 {v12.16b}, [%[ctrOut]] \n" + + :[out] "=r" (p), "=r" (keyPt), [ctrOut] "=r" (ctr), "=r" (c) + :"0" (p), [Key] "1" (keyPt), [ctr] "2" (ctr), [blocks] "r" (blocks), + [input] "3" (c) + : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", + "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14" + ); + break; +#endif +#ifdef WOLFSSL_AES_192 + case 12: /* AES 192 BLOCK */ + __asm__ __volatile__ ( + "MOV w11, %w[blocks] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + + "#Create vector with the value 1 \n" + "MOVI v16.16b, #1 \n" + "USHR v16.2d, v16.2d, #56 \n" + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "EOR v14.16b, v14.16b, v14.16b \n" + "EXT v16.16b, v16.16b, v14.16b, #8 \n" + + "LD1 {v9.2d-v12.2d}, [%[Key]], #64 \n" + "LD1 {v13.2d}, [%[Key]], #16 \n" + "LD1 {v14.2d}, [%[ctr]] \n" + "LD1 {v15.2d}, [%[input]], #16 \n" + + "1: \n" + "REV64 v14.16b, v14.16b \n" /* network order */ + "EXT v14.16b, v14.16b, v14.16b, #8 \n" + "ADD v14.2d, v14.2d, v16.2d \n" /* add 1 to counter */ + "EXT v14.16b, v14.16b, v14.16b, #8 \n" + "REV64 v14.16b, v14.16b \n" /* revert from network order */ + "MOV v0.16b, v14.16b \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "SUB w11, w11, #1 \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v12.16b \n" + "EOR v0.16b, v0.16b, v13.16b \n" + + "EOR v0.16b, v0.16b, v15.16b \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + + "CBZ w11, 2f \n" + "LD1 {v15.2d}, [%[input]], #16 \n" + "B 1b \n" + + "2: \n" + "#store current counter value at the end \n" + "ST1 {v14.2d}, [%[ctrOut]] \n" + + :[out] "=r" (p), "=r" (keyPt), [ctrOut] "=r" (ctr), "=r" (c) + :"0" (p), [Key] "1" (keyPt), [ctr] "2" (ctr), [blocks] "r" (blocks), + [input] "3" (c) + : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", + "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", + "v16" + ); + break; +#endif /* WOLFSSL_AES_192 */ +#ifdef WOLFSSL_AES_256 + case 14: /* AES 256 BLOCK */ + __asm__ __volatile__ ( + "MOV w11, %w[blocks] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + + "#Create vector with the value 1 \n" + "MOVI v18.16b, #1 \n" + "USHR v18.2d, v18.2d, #56 \n" + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "EOR v19.16b, v19.16b, v19.16b \n" + "EXT v18.16b, v18.16b, v19.16b, #8 \n" + + "LD1 {v9.2d-v12.2d}, [%[Key]], #64 \n" + "LD1 {v13.2d-v15.2d}, [%[Key]], #48 \n" + "LD1 {v17.2d}, [%[ctr]] \n" + "LD1 {v16.2d}, [%[input]], #16 \n" + + "1: \n" + "REV64 v17.16b, v17.16b \n" /* network order */ + "EXT v17.16b, v17.16b, v17.16b, #8 \n" + "ADD v17.2d, v17.2d, v18.2d \n" /* add 1 to counter */ + "EXT v17.16b, v17.16b, v17.16b, #8 \n" + "REV64 v17.16b, v17.16b \n" /* revert from network order */ + "MOV v0.16b, v17.16b \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "SUB w11, w11, #1 \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v12.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v13.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + + "EOR v0.16b, v0.16b, v16.16b \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + + "CBZ w11, 2f \n" + "LD1 {v16.2d}, [%[input]], #16 \n" + "B 1b \n" + + "2: \n" + "#store current counter value at the end \n" + "ST1 {v17.2d}, [%[ctrOut]] \n" + + :[out] "=r" (p), "=r" (keyPt), [ctrOut] "=r" (ctr), "=r" (c) + :"0" (p), [Key] "1" (keyPt), [ctr] "2" (ctr), [blocks] "r" (blocks), + [input] "3" (c) + : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", + "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", + "v16", "v17", "v18", "v19" + ); + break; +#endif /* WOLFSSL_AES_256 */ + default: + WOLFSSL_MSG("Bad AES-GCM round value"); + return BAD_FUNC_ARG; + } + } + if (partial != 0) { + IncrementGcmCounter(ctr); + wc_AesEncrypt(aes, ctr, scratch); + + /* check if pointer is null after main AES-GCM blocks + * helps static analysis */ + if (p == NULL || c == NULL) { + return BAD_STATE_E; + } + xorbuf(scratch, c, partial); + XMEMCPY(p, scratch, partial); + } + return 0; +} + +#endif /* HAVE_AES_DECRYPT */ +#endif /* HAVE_AESGCM */ + + +/*************************************** + * not 64 bit so use 32 bit mode +****************************************/ +#else + +/* AES CCM/GCM use encrypt direct but not decrypt */ +#if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ + defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) + static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + /* + AESE exor's input with round key + shift rows of exor'ed result + sub bytes for shifted rows + */ + + word32* keyPt = aes->key; + __asm__ __volatile__ ( + "VLD1.32 {q0}, [%[CtrIn]] \n" + "VLDM %[Key]!, {q1-q4} \n" + + "AESE.8 q0, q1\n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q2\n" + "AESMC.8 q0, q0\n" + "VLD1.32 {q1}, [%[Key]]! \n" + "AESE.8 q0, q3\n" + "AESMC.8 q0, q0\n" + "VLD1.32 {q2}, [%[Key]]! \n" + "AESE.8 q0, q4\n" + "AESMC.8 q0, q0\n" + "VLD1.32 {q3}, [%[Key]]! \n" + "AESE.8 q0, q1\n" + "AESMC.8 q0, q0\n" + "VLD1.32 {q4}, [%[Key]]! \n" + "AESE.8 q0, q2\n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q3\n" + "AESMC.8 q0, q0\n" + "VLD1.32 {q1}, [%[Key]]! \n" + "AESE.8 q0, q4\n" + "AESMC.8 q0, q0\n" + "VLD1.32 {q2}, [%[Key]]! \n" + "AESE.8 q0, q1\n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q2\n" + + "MOV r12, %[R] \n" + "CMP r12, #10 \n" + "BEQ 1f \n" + "VLD1.32 {q1}, [%[Key]]! \n" + "AESMC.8 q0, q0\n" + "VLD1.32 {q2}, [%[Key]]! \n" + "AESE.8 q0, q1\n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q2\n" + + "CMP r12, #12 \n" + "BEQ 1f \n" + "VLD1.32 {q1}, [%[Key]]! \n" + "AESMC.8 q0, q0\n" + "VLD1.32 {q2}, [%[Key]]! \n" + "AESE.8 q0, q1\n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q2\n" + + "#Final AddRoundKey then store result \n" + "1: \n" + "VLD1.32 {q1}, [%[Key]]! \n" + "VEOR.32 q0, q0, q1\n" + "VST1.32 {q0}, [%[CtrOut]] \n" + + :[CtrOut] "=r" (outBlock), "=r" (keyPt), "=r" (aes->rounds), + "=r" (inBlock) + :"0" (outBlock), [Key] "1" (keyPt), [R] "2" (aes->rounds), + [CtrIn] "3" (inBlock) + : "cc", "memory", "r12", "q0", "q1", "q2", "q3", "q4" + ); + + return 0; + } +#endif /* AES_GCM, AES_CCM, DIRECT or COUNTER */ +#if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) + #ifdef HAVE_AES_DECRYPT + static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + /* + AESE exor's input with round key + shift rows of exor'ed result + sub bytes for shifted rows + */ + + word32* keyPt = aes->key; + __asm__ __volatile__ ( + "VLD1.32 {q0}, [%[CtrIn]] \n" + "VLDM %[Key]!, {q1-q4} \n" + + "AESD.8 q0, q1\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q2\n" + "AESIMC.8 q0, q0\n" + "VLD1.32 {q1}, [%[Key]]! \n" + "AESD.8 q0, q3\n" + "AESIMC.8 q0, q0\n" + "VLD1.32 {q2}, [%[Key]]! \n" + "AESD.8 q0, q4\n" + "AESIMC.8 q0, q0\n" + "VLD1.32 {q3}, [%[Key]]! \n" + "AESD.8 q0, q1\n" + "AESIMC.8 q0, q0\n" + "VLD1.32 {q4}, [%[Key]]! \n" + "AESD.8 q0, q2\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q3\n" + "AESIMC.8 q0, q0\n" + "VLD1.32 {q1}, [%[Key]]! \n" + "AESD.8 q0, q4\n" + "AESIMC.8 q0, q0\n" + "VLD1.32 {q2}, [%[Key]]! \n" + "AESD.8 q0, q1\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q2\n" + + "MOV r12, %[R] \n" + "CMP r12, #10 \n" + "BEQ 1f \n" + "VLD1.32 {q1}, [%[Key]]! \n" + "AESIMC.8 q0, q0\n" + "VLD1.32 {q2}, [%[Key]]! \n" + "AESD.8 q0, q1\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q2\n" + + "CMP r12, #12 \n" + "BEQ 1f \n" + "VLD1.32 {q1}, [%[Key]]! \n" + "AESIMC.8 q0, q0\n" + "VLD1.32 {q2}, [%[Key]]! \n" + "AESD.8 q0, q1\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q2\n" + + "#Final AddRoundKey then store result \n" + "1: \n" + "VLD1.32 {q1}, [%[Key]]! \n" + "VEOR.32 q0, q0, q1\n" + "VST1.32 {q0}, [%[CtrOut]] \n" + + :[CtrOut] "=r" (outBlock), "=r" (keyPt), "=r" (aes->rounds), + "=r" (inBlock) + :"0" (outBlock), [Key] "1" (keyPt), [R] "2" (aes->rounds), + [CtrIn] "3" (inBlock) + : "cc", "memory", "r12", "q0", "q1", "q2", "q3", "q4" + ); + + return 0; +} + #endif /* HAVE_AES_DECRYPT */ +#endif /* DIRECT or COUNTER */ + +/* AES-CBC */ +#ifdef HAVE_AES_CBC + int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + word32 numBlocks = sz / AES_BLOCK_SIZE; + + if (aes == NULL || out == NULL || (in == NULL && sz > 0)) { + return BAD_FUNC_ARG; + } + + /* do as many block size ops as possible */ + if (numBlocks > 0) { + word32* keyPt = aes->key; + word32* regPt = aes->reg; + /* + AESE exor's input with round key + shift rows of exor'ed result + sub bytes for shifted rows + + note: grouping AESE & AESMC together as pairs reduces latency + */ + switch(aes->rounds) { +#ifdef WOLFSSL_AES_128 + case 10: /* AES 128 BLOCK */ + __asm__ __volatile__ ( + "MOV r11, %[blocks] \n" + "VLD1.32 {q1}, [%[Key]]! \n" + "VLD1.32 {q2}, [%[Key]]! \n" + "VLD1.32 {q3}, [%[Key]]! \n" + "VLD1.32 {q4}, [%[Key]]! \n" + "VLD1.32 {q5}, [%[Key]]! \n" + "VLD1.32 {q6}, [%[Key]]! \n" + "VLD1.32 {q7}, [%[Key]]! \n" + "VLD1.32 {q8}, [%[Key]]! \n" + "VLD1.32 {q9}, [%[Key]]! \n" + "VLD1.32 {q10}, [%[Key]]! \n" + "VLD1.32 {q11}, [%[Key]]! \n" + "VLD1.32 {q0}, [%[reg]] \n" + "VLD1.32 {q12}, [%[input]]!\n" + + "1:\n" + "#CBC operations, xorbuf in with current aes->reg \n" + "VEOR.32 q0, q0, q12 \n" + "AESE.8 q0, q1 \n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q2 \n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q3 \n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q4 \n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q5 \n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q6 \n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q7 \n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q8 \n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q9 \n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q10\n" + "VEOR.32 q0, q0, q11 \n" + "SUB r11, r11, #1 \n" + "VST1.32 {q0}, [%[out]]! \n" + + "CMP r11, #0 \n" + "BEQ 2f \n" + "VLD1.32 {q12}, [%[input]]! \n" + "B 1b \n" + + "2:\n" + "#store current counter value at the end \n" + "VST1.32 {q0}, [%[regOut]] \n" + + :[out] "=r" (out), [regOut] "=r" (regPt) + :"0" (out), [Key] "r" (keyPt), [input] "r" (in), + [blocks] "r" (numBlocks), [reg] "1" (regPt) + : "cc", "memory", "r11", "q0", "q1", "q2", "q3", "q4", "q5", + "q6", "q7", "q8", "q9", "q10", "q11", "q12" + ); + break; +#endif /* WOLFSSL_AES_128 */ +#ifdef WOLFSSL_AES_192 + case 12: /* AES 192 BLOCK */ + __asm__ __volatile__ ( + "MOV r11, %[blocks] \n" + "VLD1.32 {q1}, [%[Key]]! \n" + "VLD1.32 {q2}, [%[Key]]! \n" + "VLD1.32 {q3}, [%[Key]]! \n" + "VLD1.32 {q4}, [%[Key]]! \n" + "VLD1.32 {q5}, [%[Key]]! \n" + "VLD1.32 {q6}, [%[Key]]! \n" + "VLD1.32 {q7}, [%[Key]]! \n" + "VLD1.32 {q8}, [%[Key]]! \n" + "VLD1.32 {q9}, [%[Key]]! \n" + "VLD1.32 {q10}, [%[Key]]! \n" + "VLD1.32 {q11}, [%[Key]]! \n" + "VLD1.32 {q0}, [%[reg]] \n" + "VLD1.32 {q12}, [%[input]]!\n" + "VLD1.32 {q13}, [%[Key]]! \n" + "VLD1.32 {q14}, [%[Key]]! \n" + + "1:\n" + "#CBC operations, xorbuf in with current aes->reg \n" + "VEOR.32 q0, q0, q12 \n" + "AESE.8 q0, q1 \n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q2 \n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q3 \n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q4 \n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q5 \n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q6 \n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q7 \n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q8 \n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q9 \n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q10 \n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q11 \n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q13\n" + "VEOR.32 q0, q0, q14 \n" + "SUB r11, r11, #1 \n" + "VST1.32 {q0}, [%[out]]! \n" + + "CMP r11, #0 \n" + "BEQ 2f \n" + "VLD1.32 {q12}, [%[input]]! \n" + "B 1b \n" + + "2:\n" + "#store current counter qalue at the end \n" + "VST1.32 {q0}, [%[regOut]] \n" + + :[out] "=r" (out), [regOut] "=r" (regPt) + :"0" (out), [Key] "r" (keyPt), [input] "r" (in), + [blocks] "r" (numBlocks), [reg] "1" (regPt) + : "cc", "memory", "r11", "q0", "q1", "q2", "q3", "q4", "q5", + "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "q14" + ); + break; +#endif /* WOLFSSL_AES_192 */ +#ifdef WOLFSSL_AES_256 + case 14: /* AES 256 BLOCK */ + __asm__ __volatile__ ( + "MOV r11, %[blocks] \n" + "VLD1.32 {q1}, [%[Key]]! \n" + "VLD1.32 {q2}, [%[Key]]! \n" + "VLD1.32 {q3}, [%[Key]]! \n" + "VLD1.32 {q4}, [%[Key]]! \n" + "VLD1.32 {q5}, [%[Key]]! \n" + "VLD1.32 {q6}, [%[Key]]! \n" + "VLD1.32 {q7}, [%[Key]]! \n" + "VLD1.32 {q8}, [%[Key]]! \n" + "VLD1.32 {q9}, [%[Key]]! \n" + "VLD1.32 {q10}, [%[Key]]! \n" + "VLD1.32 {q11}, [%[Key]]! \n" + "VLD1.32 {q0}, [%[reg]] \n" + "VLD1.32 {q12}, [%[input]]!\n" + "VLD1.32 {q13}, [%[Key]]! \n" + "VLD1.32 {q14}, [%[Key]]! \n" + + "1:\n" + "#CBC operations, xorbuf in with current aes->reg \n" + "VEOR.32 q0, q0, q12 \n" + "AESE.8 q0, q1 \n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q2 \n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q3 \n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q4 \n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q5 \n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q6 \n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q7 \n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q8 \n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q9 \n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q10 \n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q11 \n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q13 \n" + "AESMC.8 q0, q0\n" + "VLD1.32 {q15}, [%[Key]]! \n" + "AESE.8 q0, q14 \n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q15\n" + "VLD1.32 {q15}, [%[Key]] \n" + "VEOR.32 q0, q0, q15 \n" + "SUB r11, r11, #1 \n" + "VST1.32 {q0}, [%[out]]! \n" + "SUB %[Key], %[Key], #16 \n" + + "CMP r11, #0 \n" + "BEQ 2f \n" + "VLD1.32 {q12}, [%[input]]! \n" + "B 1b \n" + + "2:\n" + "#store current counter qalue at the end \n" + "VST1.32 {q0}, [%[regOut]] \n" + + :[out] "=r" (out), [regOut] "=r" (regPt), "=r" (keyPt) + :"0" (out), [Key] "2" (keyPt), [input] "r" (in), + [blocks] "r" (numBlocks), [reg] "1" (regPt) + : "cc", "memory", "r11", "q0", "q1", "q2", "q3", "q4", "q5", + "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" + ); + break; +#endif /* WOLFSSL_AES_256 */ + default: + WOLFSSL_MSG("Bad AES-CBC round value"); + return BAD_FUNC_ARG; + } + } + + return 0; + } + + #ifdef HAVE_AES_DECRYPT + int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + word32 numBlocks = sz / AES_BLOCK_SIZE; + + if (aes == NULL || out == NULL || (in == NULL && sz > 0) + || sz % AES_BLOCK_SIZE != 0) { + return BAD_FUNC_ARG; + } + + /* do as many block size ops as possible */ + if (numBlocks > 0) { + word32* keyPt = aes->key; + word32* regPt = aes->reg; + switch(aes->rounds) { +#ifdef WOLFSSL_AES_128 + case 10: /* AES 128 BLOCK */ + __asm__ __volatile__ ( + "MOV r11, %[blocks] \n" + "VLD1.32 {q1}, [%[Key]]! \n" + "VLD1.32 {q2}, [%[Key]]! \n" + "VLD1.32 {q3}, [%[Key]]! \n" + "VLD1.32 {q4}, [%[Key]]! \n" + "VLD1.32 {q5}, [%[Key]]! \n" + "VLD1.32 {q6}, [%[Key]]! \n" + "VLD1.32 {q7}, [%[Key]]! \n" + "VLD1.32 {q8}, [%[Key]]! \n" + "VLD1.32 {q9}, [%[Key]]! \n" + "VLD1.32 {q10}, [%[Key]]! \n" + "VLD1.32 {q11}, [%[Key]]! \n" + "VLD1.32 {q13}, [%[reg]] \n" + "VLD1.32 {q0}, [%[input]]!\n" + + "1:\n" + "VMOV.32 q12, q0 \n" + "AESD.8 q0, q1\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q2\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q3\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q4\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q5\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q6\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q7\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q8\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q9\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q10\n" + "VEOR.32 q0, q0, q11\n" + + "VEOR.32 q0, q0, q13\n" + "SUB r11, r11, #1 \n" + "VST1.32 {q0}, [%[out]]! \n" + "VMOV.32 q13, q12 \n" + + "CMP r11, #0 \n" + "BEQ 2f \n" + "VLD1.32 {q0}, [%[input]]! \n" + "B 1b \n" + + "2: \n" + "#store current counter qalue at the end \n" + "VST1.32 {q13}, [%[regOut]] \n" + + :[out] "=r" (out), [regOut] "=r" (regPt) + :"0" (out), [Key] "r" (keyPt), [input] "r" (in), + [blocks] "r" (numBlocks), [reg] "1" (regPt) + : "cc", "memory", "r11", "q0", "q1", "q2", "q3", "q4", "q5", + "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13" + ); + break; +#endif /* WOLFSSL_AES_128 */ +#ifdef WOLFSSL_AES_192 + case 12: /* AES 192 BLOCK */ + __asm__ __volatile__ ( + "MOV r11, %[blocks] \n" + "VLD1.32 {q1}, [%[Key]]! \n" + "VLD1.32 {q2}, [%[Key]]! \n" + "VLD1.32 {q3}, [%[Key]]! \n" + "VLD1.32 {q4}, [%[Key]]! \n" + "VLD1.32 {q5}, [%[Key]]! \n" + "VLD1.32 {q6}, [%[Key]]! \n" + "VLD1.32 {q7}, [%[Key]]! \n" + "VLD1.32 {q8}, [%[Key]]! \n" + "VLD1.32 {q9}, [%[Key]]! \n" + "VLD1.32 {q10}, [%[Key]]! \n" + "VLD1.32 {q11}, [%[Key]]! \n" + "VLD1.32 {q12}, [%[Key]]! \n" + "VLD1.32 {q13}, [%[Key]]! \n" + "VLD1.32 {q14}, [%[reg]] \n" + "VLD1.32 {q0}, [%[input]]!\n" + + "1: \n" + "VMOV.32 q15, q0 \n" + "AESD.8 q0, q1\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q2\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q3\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q4\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q5\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q6\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q7\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q8\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q9\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q10\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q11\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q12\n" + "VEOR.32 q0, q0, q13\n" + + "VEOR.32 q0, q0, q14\n" + "SUB r11, r11, #1 \n" + "VST1.32 {q0}, [%[out]]! \n" + "VMOV.32 q14, q15 \n" + + "CMP r11, #0 \n" + "BEQ 2f \n" + "VLD1.32 {q0}, [%[input]]! \n" + "B 1b \n" + + "2:\n" + "#store current counter value at the end \n" + "VST1.32 {q15}, [%[regOut]] \n" + + :[out] "=r" (out), [regOut] "=r" (regPt) + :"0" (out), [Key] "r" (keyPt), [input] "r" (in), + [blocks] "r" (numBlocks), [reg] "1" (regPt) + : "cc", "memory", "r11", "q0", "q1", "q2", "q3", "q4", "q5", + "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" + ); + break; +#endif /* WOLFSSL_AES_192 */ +#ifdef WOLFSSL_AES_256 + case 14: /* AES 256 BLOCK */ + __asm__ __volatile__ ( + "MOV r11, %[blocks] \n" + "VLD1.32 {q1}, [%[Key]]! \n" + "VLD1.32 {q2}, [%[Key]]! \n" + "VLD1.32 {q3}, [%[Key]]! \n" + "VLD1.32 {q4}, [%[Key]]! \n" + "VLD1.32 {q5}, [%[Key]]! \n" + "VLD1.32 {q6}, [%[Key]]! \n" + "VLD1.32 {q7}, [%[Key]]! \n" + "VLD1.32 {q8}, [%[Key]]! \n" + "VLD1.32 {q9}, [%[Key]]! \n" + "VLD1.32 {q10}, [%[Key]]! \n" + "VLD1.32 {q11}, [%[Key]]! \n" + "VLD1.32 {q12}, [%[Key]]! \n" + "VLD1.32 {q14}, [%[reg]] \n" + "VLD1.32 {q0}, [%[input]]!\n" + + "1:\n" + "VMOV.32 q15, q0 \n" + "AESD.8 q0, q1\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q2\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q3\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q4\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q5\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q6\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q7\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q8\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q9\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q10\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q11\n" + "AESIMC.8 q0, q0\n" + "VLD1.32 {q13}, [%[Key]]! \n" + "AESD.8 q0, q12\n" + "AESIMC.8 q0, q0\n" + "AESD.8 q0, q13\n" + "AESIMC.8 q0, q0\n" + "VLD1.32 {q13}, [%[Key]]! \n" + "AESD.8 q0, q13\n" + "VLD1.32 {q13}, [%[Key]] \n" + "VEOR.32 q0, q0, q13\n" + "SUB %[Key], %[Key], #32 \n" + + "VEOR.32 q0, q0, q14\n" + "SUB r11, r11, #1 \n" + "VST1.32 {q0}, [%[out]]! \n" + "VMOV.32 q14, q15 \n" + + "CMP r11, #0 \n" + "BEQ 2f \n" + "VLD1.32 {q0}, [%[input]]! \n" + "B 1b \n" + + "2:\n" + "#store current counter value at the end \n" + "VST1.32 {q15}, [%[regOut]] \n" + + :[out] "=r" (out), [regOut] "=r" (regPt) + :"0" (out), [Key] "r" (keyPt), [input] "r" (in), + [blocks] "r" (numBlocks), [reg] "1" (regPt) + : "cc", "memory", "r11", "q0", "q1", "q2", "q3", "q4", "q5", + "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" + ); + break; +#endif /* WOLFSSL_AES_256 */ + default: + WOLFSSL_MSG("Bad AES-CBC round value"); + return BAD_FUNC_ARG; + } + } + + return 0; + } + #endif + +#endif /* HAVE_AES_CBC */ + +/* AES-CTR */ +#ifdef WOLFSSL_AES_COUNTER + + /* Increment AES counter */ + static WC_INLINE void IncrementAesCounter(byte* inOutCtr) + { + int i; + + /* in network byte order so start at end and work back */ + for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) { + if (++inOutCtr[i]) /* we're done unless we overflow */ + return; + } + } + + int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + byte* tmp; + word32 numBlocks; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left; + + /* consume any unused bytes left in aes->tmp */ + while (aes->left && sz) { + *(out++) = *(in++) ^ *(tmp++); + aes->left--; + sz--; + } + + /* do as many block size ops as possible */ + numBlocks = sz/AES_BLOCK_SIZE; + if (numBlocks > 0) { + /* pointer needed because it is incremented when read, causing + * an issue with call to encrypt/decrypt leftovers */ + word32* keyPt = aes->key; + word32* regPt = aes->reg; + sz -= numBlocks * AES_BLOCK_SIZE; + switch(aes->rounds) { +#ifdef WOLFSSL_AES_128 + case 10: /* AES 128 BLOCK */ + __asm__ __volatile__ ( + "MOV r11, %[blocks] \n" + "VLDM %[Key]!, {q1-q4} \n" + + "#Create vector with the value 1 \n" + "VMOV.u32 q15, #1 \n" + "VSHR.u64 q15, q15, #32 \n" + "VLDM %[Key]!, {q5-q8} \n" + "VEOR.32 q14, q14, q14 \n" + "VLDM %[Key]!, {q9-q11} \n" + "VEXT.8 q14, q15, q14, #8\n" + + "VLD1.32 {q13}, [%[reg]]\n" + + /* double block */ + "1: \n" + "CMP r11, #1 \n" + "BEQ 2f \n" + "CMP r11, #0 \n" + "BEQ 3f \n" + + "VMOV.32 q0, q13 \n" + "AESE.8 q0, q1\n" + "AESMC.8 q0, q0\n" + "VREV64.8 q13, q13 \n" /* network order */ + "AESE.8 q0, q2\n" + "AESMC.8 q0, q0\n" + "VEXT.8 q13, q13, q13, #8 \n" + "SUB r11, r11, #2 \n" + "VADD.i32 q15, q13, q14 \n" /* add 1 to counter */ + "VADD.i32 q13, q15, q14 \n" /* add 1 to counter */ + "AESE.8 q0, q3\n" + "AESMC.8 q0, q0\n" + "VEXT.8 q15, q15, q15, #8 \n" + "VEXT.8 q13, q13, q13, #8 \n" + "AESE.8 q0, q4\n" + "AESMC.8 q0, q0\n" + "VREV64.8 q15, q15\n" /* revert from network order */ + "VREV64.8 q13, q13\n" /* revert from network order */ + "AESE.8 q0, q5\n" + "AESMC.8 q0, q0\n" + "AESE.8 q15, q1\n" + "AESMC.8 q15, q15\n" + + "AESE.8 q0, q6\n" + "AESMC.8 q0, q0\n" + "AESE.8 q15, q2\n" + "AESMC.8 q15, q15\n" + + "AESE.8 q0, q7\n" + "AESMC.8 q0, q0\n" + "AESE.8 q15, q3\n" + "AESMC.8 q15, q15\n" + + "AESE.8 q0, q8\n" + "AESMC.8 q0, q0\n" + "AESE.8 q15, q4\n" + "AESMC.8 q15, q15\n" + + "AESE.8 q0, q9\n" + "AESMC.8 q0, q0\n" + "AESE.8 q15, q5\n" + "AESMC.8 q15, q15\n" + + "AESE.8 q0, q10\n" + "AESE.8 q15, q6\n" + "AESMC.8 q15, q15\n" + "VEOR.32 q0, q0, q11\n" + + "AESE.8 q15, q7\n" + "AESMC.8 q15, q15\n" + "VLD1.32 {q12}, [%[input]]! \n" + "AESE.8 q15, q8\n" + "AESMC.8 q15, q15\n" + + "VEOR.32 q0, q0, q12\n" + "AESE.8 q15, q9\n" + "AESMC.8 q15, q15\n" + + "VLD1.32 {q12}, [%[input]]! \n" + "AESE.8 q15, q10\n" + "VST1.32 {q0}, [%[out]]! \n" + "VEOR.32 q15, q15, q11\n" + "VEOR.32 q15, q15, q12\n" + "VST1.32 {q15}, [%[out]]! \n" + + "B 1b \n" + + /* single block */ + "2: \n" + "VMOV.32 q0, q13 \n" + "AESE.8 q0, q1\n" + "AESMC.8 q0, q0\n" + "VREV64.8 q13, q13 \n" /* network order */ + "AESE.8 q0, q2\n" + "AESMC.8 q0, q0\n" + "VEXT.8 q13, q13, q13, #8 \n" + "AESE.8 q0, q3\n" + "AESMC.8 q0, q0\n" + "VADD.i32 q13, q13, q14 \n" /* add 1 to counter */ + "AESE.8 q0, q4\n" + "AESMC.8 q0, q0\n" + "SUB r11, r11, #1 \n" + "AESE.8 q0, q5\n" + "AESMC.8 q0, q0\n" + "VEXT.8 q13, q13, q13, #8 \n" + "AESE.8 q0, q6\n" + "AESMC.8 q0, q0\n" + "VREV64.8 q13, q13\n" /* revert from network order */ + "AESE.8 q0, q7\n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q8\n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q9\n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q10\n" + "VLD1.32 {q12}, [%[input]]! \n" + "VEOR.32 q0, q0, q11\n" + "#CTR operations, increment counter and xorbuf \n" + "VEOR.32 q0, q0, q12\n" + "VST1.32 {q0}, [%[out]]! \n" + + "3: \n" + "#store current counter qalue at the end \n" + "VST1.32 {q13}, [%[regOut]] \n" + + :[out] "=r" (out), "=r" (keyPt), [regOut] "=r" (regPt), + "=r" (in) + :"0" (out), [Key] "1" (keyPt), [input] "3" (in), + [blocks] "r" (numBlocks), [reg] "2" (regPt) + : "cc", "memory", "r11", "q0", "q1", "q2", "q3", "q4", "q5", + "q6", "q7", "q8", "q9", "q10","q11","q12","q13","q14", "q15" + ); + break; +#endif /* WOLFSSL_AES_128 */ +#ifdef WOLFSSL_AES_192 + case 12: /* AES 192 BLOCK */ + __asm__ __volatile__ ( + "MOV r11, %[blocks] \n" + "VLDM %[Key]!, {q1-q4} \n" + + "#Create vector with the value 1 \n" + "VMOV.u32 q15, #1 \n" + "VSHR.u64 q15, q15, #32 \n" + "VLDM %[Key]!, {q5-q8} \n" + "VEOR.32 q14, q14, q14 \n" + "VEXT.8 q14, q15, q14, #8\n" + + "VLDM %[Key]!, {q9-q10} \n" + "VLD1.32 {q13}, [%[reg]]\n" + + /* double block */ + "1: \n" + "CMP r11, #1 \n" + "BEQ 2f \n" + "CMP r11, #0 \n" + "BEQ 3f \n" + + "VMOV.32 q0, q13\n" + "AESE.8 q0, q1\n" + "AESMC.8 q0, q0\n" + "VREV64.8 q13, q13 \n" /* network order */ + "AESE.8 q0, q2\n" + "AESMC.8 q0, q0\n" + "VEXT.8 q13, q13, q13, #8 \n" + "SUB r11, r11, #2 \n" + "VADD.i32 q15, q13, q14 \n" /* add 1 to counter */ + "VADD.i32 q13, q15, q14 \n" /* add 1 to counter */ + "AESE.8 q0, q3\n" + "AESMC.8 q0, q0\n" + "VEXT.8 q15, q15, q15, #8 \n" + "VEXT.8 q13, q13, q13, #8 \n" + "AESE.8 q0, q4\n" + "AESMC.8 q0, q0\n" + "VREV64.8 q15, q15\n" /* revert from network order */ + "VREV64.8 q13, q13\n" /* revert from network order */ + "AESE.8 q0, q5\n" + "AESMC.8 q0, q0\n" + "AESE.8 q15, q1\n" + "AESMC.8 q15, q15\n" + + "AESE.8 q0, q6\n" + "AESMC.8 q0, q0\n" + "AESE.8 q15, q2\n" + "AESMC.8 q15, q15\n" + + "AESE.8 q0, q7\n" + "AESMC.8 q0, q0\n" + "AESE.8 q15, q3\n" + "AESMC.8 q15, q15\n" + + "AESE.8 q0, q8\n" + "AESMC.8 q0, q0\n" + "AESE.8 q15, q4\n" + "AESMC.8 q15, q15\n" + + "AESE.8 q0, q9\n" + "AESMC.8 q0, q0\n" + "AESE.8 q15, q5\n" + "AESMC.8 q15, q15\n" + + "AESE.8 q0, q10\n" + "AESMC.8 q0, q0\n" + "VLD1.32 {q11}, [%[Key]]! \n" + "AESE.8 q15, q6\n" + "AESMC.8 q15, q15\n" + + "AESE.8 q0, q11\n" + "AESMC.8 q0, q0\n" + "AESE.8 q15, q7\n" + "AESMC.8 q15, q15\n" + + "AESE.8 q15, q8\n" + "AESMC.8 q15, q15\n" + + "VLD1.32 {q12}, [%[Key]]! \n" + "AESE.8 q15, q9\n" + "AESMC.8 q15, q15\n" + "AESE.8 q15, q10\n" + "AESMC.8 q15, q15\n" + + "AESE.8 q15, q11\n" + "AESMC.8 q15, q15\n" + "VLD1.32 {q11}, [%[Key]] \n" + "AESE.8 q0, q12\n" + "AESE.8 q15, q12\n" + + "VLD1.32 {q12}, [%[input]]! \n" + "VEOR.32 q0, q0, q11\n" + "VEOR.32 q15, q15, q11\n" + "VEOR.32 q0, q0, q12\n" + + "VLD1.32 {q12}, [%[input]]! \n" + "VST1.32 {q0}, [%[out]]! \n" + "VEOR.32 q15, q15, q12\n" + "VST1.32 {q15}, [%[out]]! \n" + "SUB %[Key], %[Key], #32 \n" + + "B 1b \n" + + + /* single block */ + "2: \n" + "VLD1.32 {q11}, [%[Key]]! \n" + "VMOV.32 q0, q13 \n" + "AESE.8 q0, q1\n" + "AESMC.8 q0, q0\n" + "VREV64.8 q13, q13 \n" /* network order */ + "AESE.8 q0, q2\n" + "AESMC.8 q0, q0\n" + "VEXT.8 q13, q13, q13, #8 \n" + "AESE.8 q0, q3\n" + "AESMC.8 q0, q0\n" + "VADD.i32 q13, q13, q14 \n" /* add 1 to counter */ + "AESE.8 q0, q4\n" + "AESMC.8 q0, q0\n" + "SUB r11, r11, #1 \n" + "AESE.8 q0, q5\n" + "AESMC.8 q0, q0\n" + "VEXT.8 q13, q13, q13, #8 \n" + "AESE.8 q0, q6\n" + "AESMC.8 q0, q0\n" + "VREV64.8 q13, q13\n" /* revert from network order */ + "AESE.8 q0, q7\n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q8\n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q9\n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q10\n" + "AESMC.8 q0, q0\n" + "VLD1.32 {q12}, [%[Key]]! \n" + "AESE.8 q0, q11\n" + "AESMC.8 q0, q0\n" + "VLD1.32 {q11}, [%[Key]] \n" + "AESE.8 q0, q12\n" + "VLD1.32 {q12}, [%[input]]! \n" + "VEOR.32 q0, q0, q11\n" + "#CTR operations, increment counter and xorbuf \n" + "VEOR.32 q0, q0, q12\n" + "VST1.32 {q0}, [%[out]]! \n" + + "3: \n" + "#store current counter qalue at the end \n" + "VST1.32 {q13}, [%[regOut]] \n" + + :[out] "=r" (out), "=r" (keyPt), [regOut] "=r" (regPt), + "=r" (in) + :"0" (out), [Key] "1" (keyPt), [input] "3" (in), + [blocks] "r" (numBlocks), [reg] "2" (regPt) + : "cc", "memory", "r11", "q0", "q1", "q2", "q3", "q4", "q5", + "q6", "q7", "q8", "q9", "q10","q11","q12","q13","q14" + ); + break; +#endif /* WOLFSSL_AES_192 */ +#ifdef WOLFSSL_AES_256 + case 14: /* AES 256 BLOCK */ + __asm__ __volatile__ ( + "MOV r11, %[blocks] \n" + "VLDM %[Key]!, {q1-q4} \n" + + "#Create vector with the value 1 \n" + "VMOV.u32 q15, #1 \n" + "VSHR.u64 q15, q15, #32 \n" + "VLDM %[Key]!, {q5-q8} \n" + "VEOR.32 q14, q14, q14 \n" + "VEXT.8 q14, q15, q14, #8\n" + + "VLDM %[Key]!, {q9-q10} \n" + "VLD1.32 {q13}, [%[reg]]\n" + + /* double block */ + "1: \n" + "CMP r11, #1 \n" + "BEQ 2f \n" + "CMP r11, #0 \n" + "BEQ 3f \n" + + "VMOV.32 q0, q13 \n" + "AESE.8 q0, q1\n" + "AESMC.8 q0, q0\n" + "VREV64.8 q13, q13 \n" /* network order */ + "AESE.8 q0, q2\n" + "AESMC.8 q0, q0\n" + "VEXT.8 q13, q13, q13, #8 \n" + "SUB r11, r11, #2 \n" + "VADD.i32 q15, q13, q14 \n" /* add 1 to counter */ + "VADD.i32 q13, q15, q14 \n" /* add 1 to counter */ + "AESE.8 q0, q3\n" + "AESMC.8 q0, q0\n" + "VEXT.8 q15, q15, q15, #8 \n" + "VEXT.8 q13, q13, q13, #8 \n" + "AESE.8 q0, q4\n" + "AESMC.8 q0, q0\n" + "VREV64.8 q15, q15\n" /* revert from network order */ + "AESE.8 q0, q5\n" + "AESMC.8 q0, q0\n" + "VREV64.8 q13, q13\n" /* revert from network order */ + "AESE.8 q15, q1\n" + "AESMC.8 q15, q15\n" + + "AESE.8 q0, q6\n" + "AESMC.8 q0, q0\n" + "AESE.8 q15, q2\n" + "AESMC.8 q15, q15\n" + + "AESE.8 q0, q7\n" + "AESMC.8 q0, q0\n" + "AESE.8 q15, q3\n" + "AESMC.8 q15, q15\n" + + "AESE.8 q0, q8\n" + "AESMC.8 q0, q0\n" + "AESE.8 q15, q4\n" + "AESMC.8 q15, q15\n" + + "AESE.8 q0, q9\n" + "AESMC.8 q0, q0\n" + "AESE.8 q15, q5\n" + "AESMC.8 q15, q15\n" + + "AESE.8 q0, q10\n" + "AESMC.8 q0, q0\n" + "VLD1.32 {q11}, [%[Key]]! \n" + "AESE.8 q15, q6\n" + "AESMC.8 q15, q15\n" + + "AESE.8 q0, q11\n" + "AESMC.8 q0, q0\n" + "AESE.8 q15, q7\n" + "AESMC.8 q15, q15\n" + + "AESE.8 q15, q8\n" + "AESMC.8 q15, q15\n" + + "AESE.8 q15, q9\n" + "AESMC.8 q15, q15\n" + "VLD1.32 {q12}, [%[Key]]! \n" + "AESE.8 q15, q10\n" + "AESMC.8 q15, q15\n" + + "AESE.8 q15, q11\n" + "AESMC.8 q15, q15\n" + + "VLD1.32 {q11}, [%[Key]]! \n" + "AESE.8 q0, q12\n" /* rnd 12*/ + "AESMC.8 q0, q0\n" + "AESE.8 q15, q12\n" /* rnd 12 */ + "AESMC.8 q15, q15\n" + + "VLD1.32 {q12}, [%[Key]]! \n" + "AESE.8 q0, q11\n" /* rnd 13 */ + "AESMC.8 q0, q0\n" + "AESE.8 q15, q11\n" /* rnd 13 */ + "AESMC.8 q15, q15\n" + + "VLD1.32 {q11}, [%[Key]] \n" + "AESE.8 q0, q12\n" /* rnd 14 */ + "AESE.8 q15, q12\n" /* rnd 14 */ + + "VLD1.32 {q12}, [%[input]]! \n" + "VEOR.32 q0, q0, q11\n" /* rnd 15 */ + "VEOR.32 q15, q15, q11\n" /* rnd 15 */ + "VEOR.32 q0, q0, q12\n" + + "VLD1.32 {q12}, [%[input]]! \n" + "VST1.32 {q0}, [%[out]]! \n" + "VEOR.32 q15, q15, q12\n" + "VST1.32 {q15}, [%[out]]! \n" + "SUB %[Key], %[Key], #64 \n" + + /* single block */ + "B 1b \n" + + "2: \n" + "VLD1.32 {q11}, [%[Key]]! \n" + "VMOV.32 q0, q13 \n" + "AESE.8 q0, q1\n" + "AESMC.8 q0, q0\n" + "VREV64.8 q13, q13 \n" /* network order */ + "AESE.8 q0, q2\n" + "AESMC.8 q0, q0\n" + "VEXT.8 q13, q13, q13, #8 \n" + "AESE.8 q0, q3\n" + "AESMC.8 q0, q0\n" + "VADD.i32 q13, q13, q14 \n" /* add 1 to counter */ + "AESE.8 q0, q4\n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q5\n" + "AESMC.8 q0, q0\n" + "VEXT.8 q13, q13, q13, #8 \n" + "AESE.8 q0, q6\n" + "AESMC.8 q0, q0\n" + "VREV64.8 q13, q13\n" /* revert from network order */ + "AESE.8 q0, q7\n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q8\n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q9\n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q10\n" + "AESMC.8 q0, q0\n" + "VLD1.32 {q12}, [%[Key]]! \n" + "AESE.8 q0, q11\n" + "AESMC.8 q0, q0\n" + "VLD1.32 {q11}, [%[Key]]! \n" + "AESE.8 q0, q12\n" /* rnd 12 */ + "AESMC.8 q0, q0\n" + "VLD1.32 {q12}, [%[Key]]! \n" + "AESE.8 q0, q11\n" /* rnd 13 */ + "AESMC.8 q0, q0\n" + "VLD1.32 {q11}, [%[Key]] \n" + "AESE.8 q0, q12\n" /* rnd 14 */ + "VLD1.32 {q12}, [%[input]]! \n" + "VEOR.32 q0, q0, q11\n" /* rnd 15 */ + "#CTR operations, increment counter and xorbuf \n" + "VEOR.32 q0, q0, q12\n" + "VST1.32 {q0}, [%[out]]! \n" + + "3: \n" + "#store current counter qalue at the end \n" + "VST1.32 {q13}, [%[regOut]] \n" + + :[out] "=r" (out), "=r" (keyPt), [regOut] "=r" (regPt), + "=r" (in) + :"0" (out), [Key] "1" (keyPt), [input] "3" (in), + [blocks] "r" (numBlocks), [reg] "2" (regPt) + : "cc", "memory", "r11", "q0", "q1", "q2", "q3", "q4", "q5", + "q6", "q7", "q8", "q9", "q10","q11","q12","q13","q14" + ); + break; +#endif /* WOLFSSL_AES_256 */ + default: + WOLFSSL_MSG("Bad AES-CTR round qalue"); + return BAD_FUNC_ARG; + } + + aes->left = 0; + } + + /* handle non block size remaining */ + if (sz) { + wc_AesEncrypt(aes, (byte*)aes->reg, (byte*)aes->tmp); + IncrementAesCounter((byte*)aes->reg); + + aes->left = AES_BLOCK_SIZE; + tmp = (byte*)aes->tmp; + + while (sz--) { + *(out++) = *(in++) ^ *(tmp++); + aes->left--; + } + } + + return 0; + } + +#endif /* WOLFSSL_AES_COUNTER */ + +#ifdef HAVE_AESGCM +/* + * Uses Karatsuba algorithm. Reduction algorithm is based on "Implementing GCM + * on ARMv8". Shifting left to account for bit reflection is based on + * "Carry-Less Multiplication and Its Usage for Computing the GCM mode" + */ +static void GMULT(byte* X, byte* Y) +{ + __asm__ __volatile__ ( + "VLD1.32 {q0}, [%[x]] \n" + + /* In GCM format bits are big endian, switch location of bytes to + * allow for logical shifts and carries. + */ + "VREV64.8 q0, q0 \n" + "VLD1.32 {q1}, [%[y]] \n" /* converted on set key */ + "VSWP.8 d0, d1 \n" + + "VMULL.p64 q5, d0, d2 \n" + "VMULL.p64 q6, d1, d3 \n" + "VEOR d15, d2, d3 \n" + "VEOR d14, d0, d1 \n" + "VMULL.p64 q7, d15, d14 \n" + "VEOR q7, q5 \n" + "VEOR q7, q6 \n" + "VEOR d11, d14 \n" + "VEOR d12, d15\n" + + /* shift to left by 1 to account for reflection */ + "VMOV q7, q6 \n" + "VSHL.u64 q6, q6, #1 \n" + "VSHR.u64 q7, q7, #63 \n" + "VEOR d13, d14 \n" + "VMOV q8, q5 \n" + "VSHL.u64 q5, q5, #1 \n" + "VSHR.u64 q8, q8, #63 \n" + "VEOR d12, d17 \n" + "VEOR d11, d16 \n" + + /* create constant 0xc200000000000000 */ + "VMOV.i32 d16, 0xc2000000 \n" + "VSHL.u64 d16, d16, #32 \n" + + /* reduce product of multiplication */ + "VMULL.p64 q9, d10, d16 \n" + "VEOR d11, d18 \n" + "VEOR d12, d19 \n" + "VMULL.p64 q9, d11, d16 \n" + "VEOR q6, q9 \n" + "VEOR q10, q5, q6 \n" + + /* convert to GCM format */ + "VREV64.8 q10, q10 \n" + "VSWP.8 d20, d21 \n" + + "VST1.32 {q10}, [%[xOut]] \n" + + : [xOut] "=r" (X), [yOut] "=r" (Y) + : [x] "0" (X), [y] "1" (Y) + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6" ,"q7", "q8", + "q9", "q10", "q11" ,"q12", "q13", "q14", "q15" + ); +} + + +void GHASH(Aes* aes, const byte* a, word32 aSz, + const byte* c, word32 cSz, byte* s, word32 sSz) +{ + byte x[AES_BLOCK_SIZE]; + byte scratch[AES_BLOCK_SIZE]; + word32 blocks, partial; + byte* h = aes->H; + + XMEMSET(x, 0, AES_BLOCK_SIZE); + + /* Hash in A, the Additional Authentication Data */ + if (aSz != 0 && a != NULL) { + blocks = aSz / AES_BLOCK_SIZE; + partial = aSz % AES_BLOCK_SIZE; + while (blocks--) { + xorbuf(x, a, AES_BLOCK_SIZE); + GMULT(x, h); + a += AES_BLOCK_SIZE; + } + if (partial != 0) { + XMEMSET(scratch, 0, AES_BLOCK_SIZE); + XMEMCPY(scratch, a, partial); + xorbuf(x, scratch, AES_BLOCK_SIZE); + GMULT(x, h); + } + } + + /* Hash in C, the Ciphertext */ + if (cSz != 0 && c != NULL) { + blocks = cSz / AES_BLOCK_SIZE; + partial = cSz % AES_BLOCK_SIZE; + while (blocks--) { + xorbuf(x, c, AES_BLOCK_SIZE); + GMULT(x, h); + c += AES_BLOCK_SIZE; + } + if (partial != 0) { + XMEMSET(scratch, 0, AES_BLOCK_SIZE); + XMEMCPY(scratch, c, partial); + xorbuf(x, scratch, AES_BLOCK_SIZE); + GMULT(x, h); + } + } + + /* Hash in the lengths of A and C in bits */ + FlattenSzInBits(&scratch[0], aSz); + FlattenSzInBits(&scratch[8], cSz); + xorbuf(x, scratch, AES_BLOCK_SIZE); + GMULT(x, h); + + /* Copy the result into s. */ + XMEMCPY(s, x, sSz); +} + + +/* Aarch32 + * Encrypt and tag data using AES with GCM mode. + * aes: Aes structure having already been set with set key function + * out: encrypted data output buffer + * in: plain text input buffer + * sz: size of plain text and out buffer + * iv: initialization vector + * ivSz: size of iv buffer + * authTag: buffer to hold tag + * authTagSz: size of tag buffer + * authIn: additional data buffer + * authInSz: size of additional data buffer + */ +int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + word32 blocks = sz / AES_BLOCK_SIZE; + word32 partial = sz % AES_BLOCK_SIZE; + const byte* p = in; + byte* c = out; + byte counter[AES_BLOCK_SIZE]; + byte initialCounter[AES_BLOCK_SIZE]; + byte *ctr ; + byte scratch[AES_BLOCK_SIZE]; + ctr = counter ; + + /* sanity checks */ + if (aes == NULL || (iv == NULL && ivSz > 0) || + (authTag == NULL) || + (authIn == NULL && authInSz > 0) || + (in == NULL && sz > 0) || + (out == NULL && sz > 0)) { + WOLFSSL_MSG("a NULL parameter passed in when size is larger than 0"); + return BAD_FUNC_ARG; + } + + if (authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ || authTagSz > AES_BLOCK_SIZE) { + WOLFSSL_MSG("GcmEncrypt authTagSz error"); + return BAD_FUNC_ARG; + } + + XMEMSET(initialCounter, 0, AES_BLOCK_SIZE); + if (ivSz == GCM_NONCE_MID_SZ) { + XMEMCPY(initialCounter, iv, ivSz); + initialCounter[AES_BLOCK_SIZE - 1] = 1; + } + else { + GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE); + } + XMEMCPY(ctr, initialCounter, AES_BLOCK_SIZE); + + while (blocks--) { + IncrementGcmCounter(ctr); + wc_AesEncrypt(aes, ctr, scratch); + xorbuf(scratch, p, AES_BLOCK_SIZE); + XMEMCPY(c, scratch, AES_BLOCK_SIZE); + p += AES_BLOCK_SIZE; + c += AES_BLOCK_SIZE; + } + + if (partial != 0) { + IncrementGcmCounter(ctr); + wc_AesEncrypt(aes, ctr, scratch); + xorbuf(scratch, p, partial); + XMEMCPY(c, scratch, partial); + + } + + GHASH(aes, authIn, authInSz, out, sz, authTag, authTagSz); + wc_AesEncrypt(aes, initialCounter, scratch); + if (authTagSz > AES_BLOCK_SIZE) { + xorbuf(authTag, scratch, AES_BLOCK_SIZE); + } + else { + xorbuf(authTag, scratch, authTagSz); + } + + return 0; +} + + +#ifdef HAVE_AES_DECRYPT +/* + * Check tag and decrypt data using AES with GCM mode. + * aes: Aes structure having already been set with set key function + * out: decrypted data output buffer + * in: cipher text buffer + * sz: size of plain text and out buffer + * iv: initialization vector + * ivSz: size of iv buffer + * authTag: buffer holding tag + * authTagSz: size of tag buffer + * authIn: additional data buffer + * authInSz: size of additional data buffer + */ +int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + word32 blocks = sz / AES_BLOCK_SIZE; + word32 partial = sz % AES_BLOCK_SIZE; + const byte* c = in; + byte* p = out; + byte counter[AES_BLOCK_SIZE]; + byte initialCounter[AES_BLOCK_SIZE]; + byte *ctr ; + byte scratch[AES_BLOCK_SIZE]; + ctr = counter ; + + /* sanity checks */ + if (aes == NULL || (iv == NULL && ivSz > 0) || + (authTag == NULL) || + (authIn == NULL && authInSz > 0) || + (in == NULL && sz > 0) || + (out == NULL && sz > 0)) { + WOLFSSL_MSG("a NULL parameter passed in when size is larger than 0"); + return BAD_FUNC_ARG; + } + + XMEMSET(initialCounter, 0, AES_BLOCK_SIZE); + if (ivSz == GCM_NONCE_MID_SZ) { + XMEMCPY(initialCounter, iv, ivSz); + initialCounter[AES_BLOCK_SIZE - 1] = 1; + } + else { + GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE); + } + XMEMCPY(ctr, initialCounter, AES_BLOCK_SIZE); + + /* Calculate the authTag again using the received auth data and the + * cipher text. */ + { + byte Tprime[AES_BLOCK_SIZE]; + byte EKY0[AES_BLOCK_SIZE]; + + GHASH(aes, authIn, authInSz, in, sz, Tprime, sizeof(Tprime)); + wc_AesEncrypt(aes, ctr, EKY0); + xorbuf(Tprime, EKY0, sizeof(Tprime)); + + if (ConstantCompare(authTag, Tprime, authTagSz) != 0) { + return AES_GCM_AUTH_E; + } + } + + while (blocks--) { + IncrementGcmCounter(ctr); + wc_AesEncrypt(aes, ctr, scratch); + xorbuf(scratch, c, AES_BLOCK_SIZE); + XMEMCPY(p, scratch, AES_BLOCK_SIZE); + p += AES_BLOCK_SIZE; + c += AES_BLOCK_SIZE; + } + if (partial != 0) { + IncrementGcmCounter(ctr); + wc_AesEncrypt(aes, ctr, scratch); + + /* check if pointer is null after main AES-GCM blocks + * helps static analysis */ + if (p == NULL || c == NULL) { + return BAD_STATE_E; + } + xorbuf(scratch, c, partial); + XMEMCPY(p, scratch, partial); + } + return 0; +} +#endif /* HAVE_AES_DECRYPT */ +#endif /* HAVE_AESGCM */ + +#endif /* aarch64 */ + + +#ifdef HAVE_AESCCM +/* Software version of AES-CCM from wolfcrypt/src/aes.c + * Gets some speed up from hardware acceleration of wc_AesEncrypt */ + +static void roll_x(Aes* aes, const byte* in, word32 inSz, byte* out) +{ + /* process the bulk of the data */ + while (inSz >= AES_BLOCK_SIZE) { + xorbuf(out, in, AES_BLOCK_SIZE); + in += AES_BLOCK_SIZE; + inSz -= AES_BLOCK_SIZE; + + wc_AesEncrypt(aes, out, out); + } + + /* process remainder of the data */ + if (inSz > 0) { + xorbuf(out, in, inSz); + wc_AesEncrypt(aes, out, out); + } +} + + +static void roll_auth(Aes* aes, const byte* in, word32 inSz, byte* out) +{ + word32 authLenSz; + word32 remainder; + + /* encode the length in */ + if (inSz <= 0xFEFF) { + authLenSz = 2; + out[0] ^= ((inSz & 0xFF00) >> 8); + out[1] ^= (inSz & 0x00FF); + } + else if (inSz <= 0xFFFFFFFF) { + authLenSz = 6; + out[0] ^= 0xFF; out[1] ^= 0xFE; + out[2] ^= ((inSz & 0xFF000000) >> 24); + out[3] ^= ((inSz & 0x00FF0000) >> 16); + out[4] ^= ((inSz & 0x0000FF00) >> 8); + out[5] ^= (inSz & 0x000000FF); + } + /* Note, the protocol handles auth data up to 2^64, but we are + * using 32-bit sizes right now, so the bigger data isn't handled + * else if (inSz <= 0xFFFFFFFFFFFFFFFF) {} */ + else + return; + + /* start fill out the rest of the first block */ + remainder = AES_BLOCK_SIZE - authLenSz; + if (inSz >= remainder) { + /* plenty of bulk data to fill the remainder of this block */ + xorbuf(out + authLenSz, in, remainder); + inSz -= remainder; + in += remainder; + } + else { + /* not enough bulk data, copy what is available, and pad zero */ + xorbuf(out + authLenSz, in, inSz); + inSz = 0; + } + wc_AesEncrypt(aes, out, out); + + if (inSz > 0) + roll_x(aes, in, inSz, out); +} + + +static WC_INLINE void AesCcmCtrInc(byte* B, word32 lenSz) +{ + word32 i; + + for (i = 0; i < lenSz; i++) { + if (++B[AES_BLOCK_SIZE - 1 - i] != 0) return; + } +} + + +/* return 0 on success */ +int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz, + const byte* nonce, word32 nonceSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + byte A[AES_BLOCK_SIZE]; + byte B[AES_BLOCK_SIZE]; + byte lenSz; + word32 i; + byte mask = 0xFF; + word32 wordSz = (word32)sizeof(word32); + + /* sanity check on arguments */ + if (aes == NULL || out == NULL || in == NULL || nonce == NULL + || authTag == NULL || nonceSz < 7 || nonceSz > 13) + return BAD_FUNC_ARG; + + XMEMCPY(B+1, nonce, nonceSz); + lenSz = AES_BLOCK_SIZE - 1 - (byte)nonceSz; + B[0] = (authInSz > 0 ? 64 : 0) + + (8 * (((byte)authTagSz - 2) / 2)) + + (lenSz - 1); + for (i = 0; i < lenSz; i++) { + if (mask && i >= wordSz) + mask = 0x00; + B[AES_BLOCK_SIZE - 1 - i] = (inSz >> ((8 * i) & mask)) & mask; + } + + wc_AesEncrypt(aes, B, A); + + if (authInSz > 0) + roll_auth(aes, authIn, authInSz, A); + if (inSz > 0) + roll_x(aes, in, inSz, A); + XMEMCPY(authTag, A, authTagSz); + + B[0] = lenSz - 1; + for (i = 0; i < lenSz; i++) + B[AES_BLOCK_SIZE - 1 - i] = 0; + wc_AesEncrypt(aes, B, A); + xorbuf(authTag, A, authTagSz); + + B[15] = 1; + while (inSz >= AES_BLOCK_SIZE) { + wc_AesEncrypt(aes, B, A); + xorbuf(A, in, AES_BLOCK_SIZE); + XMEMCPY(out, A, AES_BLOCK_SIZE); + + AesCcmCtrInc(B, lenSz); + inSz -= AES_BLOCK_SIZE; + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + } + if (inSz > 0) { + wc_AesEncrypt(aes, B, A); + xorbuf(A, in, inSz); + XMEMCPY(out, A, inSz); + } + + ForceZero(A, AES_BLOCK_SIZE); + ForceZero(B, AES_BLOCK_SIZE); + + return 0; +} + +#ifdef HAVE_AES_DECRYPT +int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz, + const byte* nonce, word32 nonceSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + byte A[AES_BLOCK_SIZE]; + byte B[AES_BLOCK_SIZE]; + byte* o; + byte lenSz; + word32 i, oSz; + int result = 0; + byte mask = 0xFF; + word32 wordSz = (word32)sizeof(word32); + + /* sanity check on arguments */ + if (aes == NULL || out == NULL || in == NULL || nonce == NULL + || authTag == NULL || nonceSz < 7 || nonceSz > 13) + return BAD_FUNC_ARG; + + o = out; + oSz = inSz; + XMEMCPY(B+1, nonce, nonceSz); + lenSz = AES_BLOCK_SIZE - 1 - (byte)nonceSz; + + B[0] = lenSz - 1; + for (i = 0; i < lenSz; i++) + B[AES_BLOCK_SIZE - 1 - i] = 0; + B[15] = 1; + + while (oSz >= AES_BLOCK_SIZE) { + wc_AesEncrypt(aes, B, A); + xorbuf(A, in, AES_BLOCK_SIZE); + XMEMCPY(o, A, AES_BLOCK_SIZE); + + AesCcmCtrInc(B, lenSz); + oSz -= AES_BLOCK_SIZE; + in += AES_BLOCK_SIZE; + o += AES_BLOCK_SIZE; + } + if (inSz > 0) { + wc_AesEncrypt(aes, B, A); + xorbuf(A, in, oSz); + XMEMCPY(o, A, oSz); + } + + for (i = 0; i < lenSz; i++) + B[AES_BLOCK_SIZE - 1 - i] = 0; + wc_AesEncrypt(aes, B, A); + + o = out; + oSz = inSz; + + B[0] = (authInSz > 0 ? 64 : 0) + + (8 * (((byte)authTagSz - 2) / 2)) + + (lenSz - 1); + for (i = 0; i < lenSz; i++) { + if (mask && i >= wordSz) + mask = 0x00; + B[AES_BLOCK_SIZE - 1 - i] = (inSz >> ((8 * i) & mask)) & mask; + } + + wc_AesEncrypt(aes, B, A); + + if (authInSz > 0) + roll_auth(aes, authIn, authInSz, A); + if (inSz > 0) + roll_x(aes, o, oSz, A); + + B[0] = lenSz - 1; + for (i = 0; i < lenSz; i++) + B[AES_BLOCK_SIZE - 1 - i] = 0; + wc_AesEncrypt(aes, B, B); + xorbuf(A, B, authTagSz); + + if (ConstantCompare(A, authTag, authTagSz) != 0) { + /* If the authTag check fails, don't keep the decrypted data. + * Unfortunately, you need the decrypted data to calculate the + * check value. */ + XMEMSET(out, 0, inSz); + result = AES_CCM_AUTH_E; + } + + ForceZero(A, AES_BLOCK_SIZE); + ForceZero(B, AES_BLOCK_SIZE); + o = NULL; + + return result; +} +#endif /* HAVE_AES_DECRYPT */ +#endif /* HAVE_AESCCM */ + + + +#ifdef HAVE_AESGCM /* common GCM functions 32 and 64 bit */ +int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) +{ + int ret; + byte iv[AES_BLOCK_SIZE]; + + if (!((len == 16) || (len == 24) || (len == 32))) + return BAD_FUNC_ARG; + + XMEMSET(iv, 0, AES_BLOCK_SIZE); + ret = wc_AesSetKey(aes, key, len, iv, AES_ENCRYPTION); + + if (ret == 0) { + wc_AesEncrypt(aes, iv, aes->H); + #if defined(__aarch64__) + { + word32* pt = (word32*)aes->H; + __asm__ volatile ( + "LD1 {v0.16b}, [%[h]] \n" + "RBIT v0.16b, v0.16b \n" + "ST1 {v0.16b}, [%[out]] \n" + : [out] "=r" (pt) + : [h] "0" (pt) + : "cc", "memory", "v0" + ); + } + #else + { + word32* pt = (word32*)aes->H; + __asm__ volatile ( + "VLD1.32 {q0}, [%[h]] \n" + "VREV64.8 q0, q0 \n" + "VSWP.8 d0, d1 \n" + "VST1.32 {q0}, [%[out]] \n" + : [out] "=r" (pt) + : [h] "0" (pt) + : "cc", "memory", "q0" + ); + } + #endif + } + + return ret; +} + +#endif /* HAVE_AESGCM */ + +/* AES-DIRECT */ +#if defined(WOLFSSL_AES_DIRECT) + /* Allow direct access to one block encrypt */ + void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in) + { + if (aes == NULL || out == NULL || in == NULL) { + WOLFSSL_MSG("Invalid input to wc_AesEncryptDirect"); + return; + } + wc_AesEncrypt(aes, in, out); + } + #ifdef HAVE_AES_DECRYPT + /* Allow direct access to one block decrypt */ + void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in) + { + if (aes == NULL || out == NULL || in == NULL) { + WOLFSSL_MSG("Invalid input to wc_AesDecryptDirect"); + return; + } + wc_AesDecrypt(aes, in, out); + } + #endif /* HAVE_AES_DECRYPT */ +#endif /* WOLFSSL_AES_DIRECT */ +#endif /* !NO_AES && WOLFSSL_ARMASM */ diff --git a/client/wolfssl/wolfcrypt/src/port/arm/armv8-chacha.c b/client/wolfssl/wolfcrypt/src/port/arm/armv8-chacha.c new file mode 100644 index 0000000..df76bec --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/arm/armv8-chacha.c @@ -0,0 +1,2857 @@ +/* armv8-chacha.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + * + */ + +/* The paper NEON crypto by Daniel J. Bernstein and Peter Schwabe was used to optimize for ARM + * https://cryptojedi.org/papers/neoncrypto-20120320.pdf + */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef WOLFSSL_ARMASM +#ifdef HAVE_CHACHA + +#include +#include +#include +#include +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#ifdef CHACHA_AEAD_TEST + #include +#endif + +#ifdef CHACHA_TEST + #include +#endif + +#ifdef BIG_ENDIAN_ORDER + #define LITTLE32(x) ByteReverseWord32(x) +#else + #define LITTLE32(x) (x) +#endif + +/* Number of rounds */ +#define ROUNDS 20 + +#define U32C(v) (v##U) +#define U32V(v) ((word32)(v) & U32C(0xFFFFFFFF)) +#define U8TO32_LITTLE(p) LITTLE32(((word32*)(p))[0]) + +#define PLUS(v,w) (U32V((v) + (w))) +#define PLUSONE(v) (PLUS((v),1)) + +#define ARM_SIMD_LEN_BYTES 16 + +/** + * Set up iv(nonce). Earlier versions used 64 bits instead of 96, this version + * uses the typical AEAD 96 bit nonce and can do record sizes of 256 GB. + */ +int wc_Chacha_SetIV(ChaCha* ctx, const byte* inIv, word32 counter) +{ + word32 temp[CHACHA_IV_WORDS];/* used for alignment of memory */ + +#ifdef CHACHA_AEAD_TEST + word32 i; + printf("NONCE : "); + for (i = 0; i < CHACHA_IV_BYTES; i++) { + printf("%02x", inIv[i]); + } + printf("\n\n"); +#endif + + if (ctx == NULL) + return BAD_FUNC_ARG; + + XMEMCPY(temp, inIv, CHACHA_IV_BYTES); + + ctx->X[CHACHA_IV_BYTES+0] = counter; /* block counter */ + ctx->X[CHACHA_IV_BYTES+1] = LITTLE32(temp[0]); /* fixed variable from nonce */ + ctx->X[CHACHA_IV_BYTES+2] = LITTLE32(temp[1]); /* counter from nonce */ + ctx->X[CHACHA_IV_BYTES+3] = LITTLE32(temp[2]); /* counter from nonce */ + + return 0; +} + +/* "expand 32-byte k" as unsigned 32 byte */ +static const word32 sigma[4] = {0x61707865, 0x3320646e, 0x79622d32, 0x6b206574}; +/* "expand 16-byte k" as unsigned 16 byte */ +static const word32 tau[4] = {0x61707865, 0x3120646e, 0x79622d36, 0x6b206574}; + +/** + * Key setup. 8 word iv (nonce) + */ +int wc_Chacha_SetKey(ChaCha* ctx, const byte* key, word32 keySz) +{ + const word32* constants; + const byte* k; + +#ifdef XSTREAM_ALIGN + word32 alignKey[8]; +#endif + + if (ctx == NULL) + return BAD_FUNC_ARG; + + if (keySz != (CHACHA_MAX_KEY_SZ/2) && keySz != CHACHA_MAX_KEY_SZ) + return BAD_FUNC_ARG; + +#ifdef XSTREAM_ALIGN + if ((wolfssl_word)key % 4) { + WOLFSSL_MSG("wc_ChachaSetKey unaligned key"); + XMEMCPY(alignKey, key, keySz); + k = (byte*)alignKey; + } + else { + k = key; + } +#else + k = key; +#endif /* XSTREAM_ALIGN */ + +#ifdef CHACHA_AEAD_TEST + word32 i; + printf("ChaCha key used :\n"); + for (i = 0; i < keySz; i++) { + printf("%02x", key[i]); + if ((i + 1) % 8 == 0) + printf("\n"); + } + printf("\n\n"); +#endif + + ctx->X[4] = U8TO32_LITTLE(k + 0); + ctx->X[5] = U8TO32_LITTLE(k + 4); + ctx->X[6] = U8TO32_LITTLE(k + 8); + ctx->X[7] = U8TO32_LITTLE(k + 12); + if (keySz == CHACHA_MAX_KEY_SZ) { + k += 16; + constants = sigma; + } + else { + constants = tau; + } + ctx->X[ 8] = U8TO32_LITTLE(k + 0); + ctx->X[ 9] = U8TO32_LITTLE(k + 4); + ctx->X[10] = U8TO32_LITTLE(k + 8); + ctx->X[11] = U8TO32_LITTLE(k + 12); + ctx->X[ 0] = constants[0]; + ctx->X[ 1] = constants[1]; + ctx->X[ 2] = constants[2]; + ctx->X[ 3] = constants[3]; + + return 0; +} + +static const word32 L_chacha20_neon_inc_first_word[] = { + 0x1, + 0x0, + 0x0, + 0x0, +}; + +#ifdef __aarch64__ + +static const word32 L_chacha20_neon_add_all_counters[] = { + 0x0, + 0x1, + 0x2, + 0x3, +}; + +static const word32 L_chacha20_neon_rol8[] = { + 0x2010003, + 0x6050407, + 0xa09080b, + 0xe0d0c0f, +}; + +static WC_INLINE void wc_Chacha_encrypt_320(const word32* input, const byte* m, byte* c, word32 bytes) +{ +#ifdef CHACHA_TEST + printf("Entering wc_Chacha_encrypt_320 with %d bytes\n", bytes); +#endif /*CHACHA_TEST */ + word64 bytes64 = (word64) bytes; + __asm__ __volatile__ ( + /* + * The layout of used registers is: + * ARM + * w4-w19: these registers hold the fifth Chacha block for calculation in regular ARM + * w20: loop counter for how many even-odd rounds need to be executed + * w21: the counter offset for the block in ARM registers + * NEON + * v0-v15: the vi'th register holds the i'th word of four blocks during the quarter rounds. + * these registers are later transposed make ADDing the input and XORing the message easier. + * v16-v19: these are helper registers that are used as temporary location to store data + * v20-v23: load the next message block + * v24-v27: the 64 byte initial Chacha block + * v28: vector to increment the counter words of each block + * v29: vector of 5's to increment counters between L_chacha20_arm64_outer_%= loops + * v30: table lookup indices to rotate values by 8 + */ + + /* Load counter-add values for each block */ + "LD1 {v28.4s}, [%[L_chacha20_neon_add_all_counters]] \n\t" + /* Load index look-up for rotating left 8 bits */ + "LD1 {v30.16b}, [%[L_chacha20_neon_rol8]] \n\t" + /* For adding 5 to each counter-add for next 320-byte chunk */ + "MOVI v29.4s, #5 \n\t" + /* Counter for 5th block in regular registers */ + "MOV w21, #4 \n\t" + /* Load state to encrypt */ + "LD1 {v24.4s-v27.4s}, [%[input]] \n\t" + "\n" + "L_chacha20_arm64_outer_%=: \n\t" + /* Move state into regular registers */ + "MOV x4, v24.d[0] \n\t" + "MOV x6, v24.d[1] \n\t" + "MOV x8, v25.d[0] \n\t" + "MOV x10, v25.d[1] \n\t" + "MOV x12, v26.d[0] \n\t" + "MOV x14, v26.d[1] \n\t" + "MOV x16, v27.d[0] \n\t" + "MOV x22, v27.d[1] \n\t" + /* Move state into vector registers (x4) */ + "DUP v0.4s, v24.s[0] \n\t" + "DUP v1.4s, v24.s[1] \n\t" + "LSR x5, x4, #32 \n\t" + "DUP v2.4s, v24.s[2] \n\t" + "DUP v3.4s, v24.s[3] \n\t" + "LSR x7, x6, #32 \n\t" + "DUP v4.4s, v25.s[0] \n\t" + "DUP v5.4s, v25.s[1] \n\t" + "LSR x9, x8, #32 \n\t" + "DUP v6.4s, v25.s[2] \n\t" + "DUP v7.4s, v25.s[3] \n\t" + "LSR x11, x10, #32 \n\t" + "DUP v8.4s, v26.s[0] \n\t" + "DUP v9.4s, v26.s[1] \n\t" + "LSR x13, x12, #32 \n\t" + "DUP v10.4s, v26.s[2] \n\t" + "DUP v11.4s, v26.s[3] \n\t" + "LSR x15, x14, #32 \n\t" + "DUP v12.4s, v27.s[0] \n\t" + "DUP v13.4s, v27.s[1] \n\t" + "LSR x17, x16, #32 \n\t" + "DUP v14.4s, v27.s[2] \n\t" + "DUP v15.4s, v27.s[3] \n\t" + "LSR x19, x22, #32 \n\t" + /* Add to counter word */ + "ADD v12.4s, v12.4s, v28.4s \n\t" + "ADD w16, w16, w21 \n\t" + /* Set number of odd+even rounds to perform */ + "MOV w20, #10 \n\t" + "\n" + "L_chacha20_arm64_inner_%=: \n\t" + "SUBS w20, w20, #1 \n\t" + /* Odd Round */ + /* a += b; d ^= a; d <<<= 16; */ + "ADD v0.4s, v0.4s, v4.4s \n\t" + "ADD w4, w4, w8 \n\t" + "ADD v1.4s, v1.4s, v5.4s \n\t" + "ADD w5, w5, w9 \n\t" + "ADD v2.4s, v2.4s, v6.4s \n\t" + "ADD w6, w6, w10 \n\t" + "ADD v3.4s, v3.4s, v7.4s \n\t" + "ADD w7, w7, w11 \n\t" + "EOR v12.16b, v12.16b, v0.16b \n\t" + "EOR w16, w16, w4 \n\t" + "EOR v13.16b, v13.16b, v1.16b \n\t" + "EOR w17, w17, w5 \n\t" + "EOR v14.16b, v14.16b, v2.16b \n\t" + "EOR w22, w22, w6 \n\t" + "EOR v15.16b, v15.16b, v3.16b \n\t" + "EOR w19, w19, w7 \n\t" + "REV32 v12.8h, v12.8h \n\t" + "ROR w16, w16, #16 \n\t" + "REV32 v13.8h, v13.8h \n\t" + "ROR w17, w17, #16 \n\t" + "REV32 v14.8h, v14.8h \n\t" + "ROR w22, w22, #16 \n\t" + "REV32 v15.8h, v15.8h \n\t" + "ROR w19, w19, #16 \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "ADD v8.4s, v8.4s, v12.4s \n\t" + "ADD w12, w12, w16 \n\t" + "ADD v9.4s, v9.4s, v13.4s \n\t" + "ADD w13, w13, w17 \n\t" + "ADD v10.4s, v10.4s, v14.4s \n\t" + "ADD w14, w14, w22 \n\t" + "ADD v11.4s, v11.4s, v15.4s \n\t" + "ADD w15, w15, w19 \n\t" + "EOR v16.16b, v4.16b, v8.16b \n\t" + "EOR w8, w8, w12 \n\t" + "EOR v17.16b, v5.16b, v9.16b \n\t" + "EOR w9, w9, w13 \n\t" + "EOR v18.16b, v6.16b, v10.16b \n\t" + "EOR w10, w10, w14 \n\t" + "EOR v19.16b, v7.16b, v11.16b \n\t" + "EOR w11, w11, w15 \n\t" + "SHL v4.4s, v16.4s, #12 \n\t" + "ROR w8, w8, #20 \n\t" + "SHL v5.4s, v17.4s, #12 \n\t" + "ROR w9, w9, #20 \n\t" + "SHL v6.4s, v18.4s, #12 \n\t" + "ROR w10, w10, #20 \n\t" + "SHL v7.4s, v19.4s, #12 \n\t" + "ROR w11, w11, #20 \n\t" + "SRI v4.4s, v16.4s, #20 \n\t" + "SRI v5.4s, v17.4s, #20 \n\t" + "SRI v6.4s, v18.4s, #20 \n\t" + "SRI v7.4s, v19.4s, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "ADD v0.4s, v0.4s, v4.4s \n\t" + "ADD w4, w4, w8 \n\t" + "ADD v1.4s, v1.4s, v5.4s \n\t" + "ADD w5, w5, w9 \n\t" + "ADD v2.4s, v2.4s, v6.4s \n\t" + "ADD w6, w6, w10 \n\t" + "ADD v3.4s, v3.4s, v7.4s \n\t" + "ADD w7, w7, w11 \n\t" + "EOR v12.16b, v12.16b, v0.16b \n\t" + "EOR w16, w16, w4 \n\t" + "EOR v13.16b, v13.16b, v1.16b \n\t" + "EOR w17, w17, w5 \n\t" + "EOR v14.16b, v14.16b, v2.16b \n\t" + "EOR w22, w22, w6 \n\t" + "EOR v15.16b, v15.16b, v3.16b \n\t" + "EOR w19, w19, w7 \n\t" + "TBL v12.16b, { v12.16b }, v30.16b \n\t" + "ROR w16, w16, #24 \n\t" + "TBL v13.16b, { v13.16b }, v30.16b \n\t" + "ROR w17, w17, #24 \n\t" + "TBL v14.16b, { v14.16b }, v30.16b \n\t" + "ROR w22, w22, #24 \n\t" + "TBL v15.16b, { v15.16b }, v30.16b \n\t" + "ROR w19, w19, #24 \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "ADD v8.4s, v8.4s, v12.4s \n\t" + "ADD w12, w12, w16 \n\t" + "ADD v9.4s, v9.4s, v13.4s \n\t" + "ADD w13, w13, w17 \n\t" + "ADD v10.4s, v10.4s, v14.4s \n\t" + "ADD w14, w14, w22 \n\t" + "ADD v11.4s, v11.4s, v15.4s \n\t" + "ADD w15, w15, w19 \n\t" + "EOR v16.16b, v4.16b, v8.16b \n\t" + "EOR w8, w8, w12 \n\t" + "EOR v17.16b, v5.16b, v9.16b \n\t" + "EOR w9, w9, w13 \n\t" + "EOR v18.16b, v6.16b, v10.16b \n\t" + "EOR w10, w10, w14 \n\t" + "EOR v19.16b, v7.16b, v11.16b \n\t" + "EOR w11, w11, w15 \n\t" + "SHL v4.4s, v16.4s, #7 \n\t" + "ROR w8, w8, #25 \n\t" + "SHL v5.4s, v17.4s, #7 \n\t" + "ROR w9, w9, #25 \n\t" + "SHL v6.4s, v18.4s, #7 \n\t" + "ROR w10, w10, #25 \n\t" + "SHL v7.4s, v19.4s, #7 \n\t" + "ROR w11, w11, #25 \n\t" + "SRI v4.4s, v16.4s, #25 \n\t" + "SRI v5.4s, v17.4s, #25 \n\t" + "SRI v6.4s, v18.4s, #25 \n\t" + "SRI v7.4s, v19.4s, #25 \n\t" + /* Even Round */ + /* a += b; d ^= a; d <<<= 16; */ + "ADD v0.4s, v0.4s, v5.4s \n\t" + "ADD w4, w4, w9 \n\t" + "ADD v1.4s, v1.4s, v6.4s \n\t" + "ADD w5, w5, w10 \n\t" + "ADD v2.4s, v2.4s, v7.4s \n\t" + "ADD w6, w6, w11 \n\t" + "ADD v3.4s, v3.4s, v4.4s \n\t" + "ADD w7, w7, w8 \n\t" + "EOR v15.16b, v15.16b, v0.16b \n\t" + "EOR w19, w19, w4 \n\t" + "EOR v12.16b, v12.16b, v1.16b \n\t" + "EOR w16, w16, w5 \n\t" + "EOR v13.16b, v13.16b, v2.16b \n\t" + "EOR w17, w17, w6 \n\t" + "EOR v14.16b, v14.16b, v3.16b \n\t" + "EOR w22, w22, w7 \n\t" + "REV32 v15.8h, v15.8h \n\t" + "ROR w19, w19, #16 \n\t" + "REV32 v12.8h, v12.8h \n\t" + "ROR w16, w16, #16 \n\t" + "REV32 v13.8h, v13.8h \n\t" + "ROR w17, w17, #16 \n\t" + "REV32 v14.8h, v14.8h \n\t" + "ROR w22, w22, #16 \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "ADD v10.4s, v10.4s, v15.4s \n\t" + "ADD w14, w14, w19 \n\t" + "ADD v11.4s, v11.4s, v12.4s \n\t" + "ADD w15, w15, w16 \n\t" + "ADD v8.4s, v8.4s, v13.4s \n\t" + "ADD w12, w12, w17 \n\t" + "ADD v9.4s, v9.4s, v14.4s \n\t" + "ADD w13, w13, w22 \n\t" + "EOR v16.16b, v5.16b, v10.16b \n\t" + "EOR w9, w9, w14 \n\t" + "EOR v17.16b, v6.16b, v11.16b \n\t" + "EOR w10, w10, w15 \n\t" + "EOR v18.16b, v7.16b, v8.16b \n\t" + "EOR w11, w11, w12 \n\t" + "EOR v19.16b, v4.16b, v9.16b \n\t" + "EOR w8, w8, w13 \n\t" + "SHL v5.4s, v16.4s, #12 \n\t" + "ROR w9, w9, #20 \n\t" + "SHL v6.4s, v17.4s, #12 \n\t" + "ROR w10, w10, #20 \n\t" + "SHL v7.4s, v18.4s, #12 \n\t" + "ROR w11, w11, #20 \n\t" + "SHL v4.4s, v19.4s, #12 \n\t" + "ROR w8, w8, #20 \n\t" + "SRI v5.4s, v16.4s, #20 \n\t" + "SRI v6.4s, v17.4s, #20 \n\t" + "SRI v7.4s, v18.4s, #20 \n\t" + "SRI v4.4s, v19.4s, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "ADD v0.4s, v0.4s, v5.4s \n\t" + "ADD w4, w4, w9 \n\t" + "ADD v1.4s, v1.4s, v6.4s \n\t" + "ADD w5, w5, w10 \n\t" + "ADD v2.4s, v2.4s, v7.4s \n\t" + "ADD w6, w6, w11 \n\t" + "ADD v3.4s, v3.4s, v4.4s \n\t" + "ADD w7, w7, w8 \n\t" + "EOR v15.16b, v15.16b, v0.16b \n\t" + "EOR w19, w19, w4 \n\t" + "EOR v12.16b, v12.16b, v1.16b \n\t" + "EOR w16, w16, w5 \n\t" + "EOR v13.16b, v13.16b, v2.16b \n\t" + "EOR w17, w17, w6 \n\t" + "EOR v14.16b, v14.16b, v3.16b \n\t" + "EOR w22, w22, w7 \n\t" + "TBL v15.16b, { v15.16b }, v30.16b \n\t" + "ROR w19, w19, #24 \n\t" + "TBL v12.16b, { v12.16b }, v30.16b \n\t" + "ROR w16, w16, #24 \n\t" + "TBL v13.16b, { v13.16b }, v30.16b \n\t" + "ROR w17, w17, #24 \n\t" + "TBL v14.16b, { v14.16b }, v30.16b \n\t" + "ROR w22, w22, #24 \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "ADD v10.4s, v10.4s, v15.4s \n\t" + "ADD w14, w14, w19 \n\t" + "ADD v11.4s, v11.4s, v12.4s \n\t" + "ADD w15, w15, w16 \n\t" + "ADD v8.4s, v8.4s, v13.4s \n\t" + "ADD w12, w12, w17 \n\t" + "ADD v9.4s, v9.4s, v14.4s \n\t" + "ADD w13, w13, w22 \n\t" + "EOR v16.16b, v5.16b, v10.16b \n\t" + "EOR w9, w9, w14 \n\t" + "EOR v17.16b, v6.16b, v11.16b \n\t" + "EOR w10, w10, w15 \n\t" + "EOR v18.16b, v7.16b, v8.16b \n\t" + "EOR w11, w11, w12 \n\t" + "EOR v19.16b, v4.16b, v9.16b \n\t" + "EOR w8, w8, w13 \n\t" + "SHL v5.4s, v16.4s, #7 \n\t" + "ROR w9, w9, #25 \n\t" + "SHL v6.4s, v17.4s, #7 \n\t" + "ROR w10, w10, #25 \n\t" + "SHL v7.4s, v18.4s, #7 \n\t" + "ROR w11, w11, #25 \n\t" + "SHL v4.4s, v19.4s, #7 \n\t" + "ROR w8, w8, #25 \n\t" + "SRI v5.4s, v16.4s, #25 \n\t" + "SRI v6.4s, v17.4s, #25 \n\t" + "SRI v7.4s, v18.4s, #25 \n\t" + "SRI v4.4s, v19.4s, #25 \n\t" + "BNE L_chacha20_arm64_inner_%= \n\t" + /* Add counter now rather than after transposed */ + "ADD v12.4s, v12.4s, v28.4s \n\t" + "ADD w16, w16, w21 \n\t" + /* Load message */ + "LD1 {v20.4s-v23.4s}, [%[m]], #64 \n\t" + /* Transpose vectors */ + "TRN1 v16.4s, v0.4s, v1.4s \n\t" + "TRN1 v18.4s, v2.4s, v3.4s \n\t" + "TRN2 v17.4s, v0.4s, v1.4s \n\t" + "TRN2 v19.4s, v2.4s, v3.4s \n\t" + "TRN1 v0.2d, v16.2d, v18.2d \n\t" + "TRN1 v1.2d, v17.2d, v19.2d \n\t" + "TRN2 v2.2d, v16.2d, v18.2d \n\t" + "TRN2 v3.2d, v17.2d, v19.2d \n\t" + "TRN1 v16.4s, v4.4s, v5.4s \n\t" + "TRN1 v18.4s, v6.4s, v7.4s \n\t" + "TRN2 v17.4s, v4.4s, v5.4s \n\t" + "TRN2 v19.4s, v6.4s, v7.4s \n\t" + "TRN1 v4.2d, v16.2d, v18.2d \n\t" + "TRN1 v5.2d, v17.2d, v19.2d \n\t" + "TRN2 v6.2d, v16.2d, v18.2d \n\t" + "TRN2 v7.2d, v17.2d, v19.2d \n\t" + "TRN1 v16.4s, v8.4s, v9.4s \n\t" + "TRN1 v18.4s, v10.4s, v11.4s \n\t" + "TRN2 v17.4s, v8.4s, v9.4s \n\t" + "TRN2 v19.4s, v10.4s, v11.4s \n\t" + "TRN1 v8.2d, v16.2d, v18.2d \n\t" + "TRN1 v9.2d, v17.2d, v19.2d \n\t" + "TRN2 v10.2d, v16.2d, v18.2d \n\t" + "TRN2 v11.2d, v17.2d, v19.2d \n\t" + "TRN1 v16.4s, v12.4s, v13.4s \n\t" + "TRN1 v18.4s, v14.4s, v15.4s \n\t" + "TRN2 v17.4s, v12.4s, v13.4s \n\t" + "TRN2 v19.4s, v14.4s, v15.4s \n\t" + "TRN1 v12.2d, v16.2d, v18.2d \n\t" + "TRN1 v13.2d, v17.2d, v19.2d \n\t" + "TRN2 v14.2d, v16.2d, v18.2d \n\t" + "TRN2 v15.2d, v17.2d, v19.2d \n\t" + /* Add back state, XOR in message and store (load next block) */ + "ADD v16.4s, v0.4s, v24.4s \n\t" + "ADD v17.4s, v4.4s, v25.4s \n\t" + "ADD v18.4s, v8.4s, v26.4s \n\t" + "ADD v19.4s, v12.4s, v27.4s \n\t" + "EOR v16.16b, v16.16b, v20.16b \n\t" + "EOR v17.16b, v17.16b, v21.16b \n\t" + "EOR v18.16b, v18.16b, v22.16b \n\t" + "EOR v19.16b, v19.16b, v23.16b \n\t" + "LD1 {v20.4s-v23.4s}, [%[m]], #64 \n\t" + "ST1 {v16.4s-v19.4s}, [%[c]], #64 \n\t" + "ADD v16.4s, v1.4s, v24.4s \n\t" + "ADD v17.4s, v5.4s, v25.4s \n\t" + "ADD v18.4s, v9.4s, v26.4s \n\t" + "ADD v19.4s, v13.4s, v27.4s \n\t" + "EOR v16.16b, v16.16b, v20.16b \n\t" + "EOR v17.16b, v17.16b, v21.16b \n\t" + "EOR v18.16b, v18.16b, v22.16b \n\t" + "EOR v19.16b, v19.16b, v23.16b \n\t" + "LD1 {v20.4s-v23.4s}, [%[m]], #64 \n\t" + "ST1 {v16.4s-v19.4s}, [%[c]], #64 \n\t" + "ADD v16.4s, v2.4s, v24.4s \n\t" + "ADD v17.4s, v6.4s, v25.4s \n\t" + "ADD v18.4s, v10.4s, v26.4s \n\t" + "ADD v19.4s, v14.4s, v27.4s \n\t" + "EOR v16.16b, v16.16b, v20.16b \n\t" + "EOR v17.16b, v17.16b, v21.16b \n\t" + "EOR v18.16b, v18.16b, v22.16b \n\t" + "EOR v19.16b, v19.16b, v23.16b \n\t" + "LD1 {v20.4s-v23.4s}, [%[m]], #64 \n\t" + "ST1 {v16.4s-v19.4s}, [%[c]], #64 \n\t" + "ADD v16.4s, v3.4s, v24.4s \n\t" + "ADD v17.4s, v7.4s, v25.4s \n\t" + "ADD v18.4s, v11.4s, v26.4s \n\t" + "ADD v19.4s, v15.4s, v27.4s \n\t" + "EOR v16.16b, v16.16b, v20.16b \n\t" + "EOR v17.16b, v17.16b, v21.16b \n\t" + "EOR v18.16b, v18.16b, v22.16b \n\t" + "EOR v19.16b, v19.16b, v23.16b \n\t" + "LD1 {v20.4s-v23.4s}, [%[m]], #64 \n\t" + "ST1 {v16.4s-v19.4s}, [%[c]], #64 \n\t" + /* Move regular registers into vector registers for adding and xor */ + "ORR x4, x4, x5, LSL #32 \n\t" + "ORR x6, x6, x7, LSL #32 \n\t" + "ORR x8, x8, x9, LSL #32 \n\t" + "MOV v16.d[0], x4 \n\t" + "ORR x10, x10, x11, LSL #32 \n\t" + "MOV v16.d[1], x6 \n\t" + "ORR x12, x12, x13, LSL #32 \n\t" + "MOV v17.d[0], x8 \n\t" + "ORR x14, x14, x15, LSL #32 \n\t" + "MOV v17.d[1], x10 \n\t" + "ORR x16, x16, x17, LSL #32 \n\t" + "MOV v18.d[0], x12 \n\t" + "ORR x22, x22, x19, LSL #32 \n\t" + "MOV v18.d[1], x14 \n\t" + "MOV v19.d[0], x16 \n\t" + "MOV v19.d[1], x22 \n\t" + /* Add back state, XOR in message and store */ + "ADD v16.4s, v16.4s, v24.4s \n\t" + "ADD v17.4s, v17.4s, v25.4s \n\t" + "ADD v18.4s, v18.4s, v26.4s \n\t" + "ADD v19.4s, v19.4s, v27.4s \n\t" + "EOR v16.16b, v16.16b, v20.16b \n\t" + "EOR v17.16b, v17.16b, v21.16b \n\t" + "EOR v18.16b, v18.16b, v22.16b \n\t" + "EOR v19.16b, v19.16b, v23.16b \n\t" + "ADD w21, w21, #5 \n\t" + "ST1 {v16.4s-v19.4s}, [%[c]], #64 \n\t" + "SUBS %[bytes], %[bytes], #320 \n\t" + "ADD v28.4s, v28.4s, v29.4s \n\t" + "BNE L_chacha20_arm64_outer_%= \n\t" + : [input] "+r" (input), [m] "+r" (m), [c] "+r" (c), + [bytes] "+r" (bytes64) + : [L_chacha20_neon_add_all_counters] "r" (L_chacha20_neon_add_all_counters), + [L_chacha20_neon_rol8] "r" (L_chacha20_neon_rol8) + : "memory", "cc", + "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", + "x13", "x14", "x15", "x16", "x17", "x22", "x19", "x20", "x21", + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", + "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27" + ); +} +#endif /* __aarch64__ */ + +/** + * Converts word into bytes with rotations having been done. + */ +static WC_INLINE int wc_Chacha_encrypt_256(const word32 input[CHACHA_CHUNK_WORDS], const byte* m, byte* c) +{ +#ifdef CHACHA_TEST + printf("Entering wc_Chacha_encrypt_256\n"); +#endif /*CHACHA_TEST */ + +#ifdef __aarch64__ + __asm__ __volatile__ ( + // v0-v3 - first block + // v12 first block helper + // v4-v7 - second block + // v13 second block helper + // v8-v11 - third block + // v14 third block helper + // w4-w19 - fourth block + + // v0 0 1 2 3 + // v1 4 5 6 7 + // v2 8 9 10 11 + // v3 12 13 14 15 + // load CHACHA state with indices placed as shown above + /* Load state to encrypt */ + "LD1 {v20.4S-v23.4S}, [%[input]] \n\t" + /* Load index look-up for rotating left 8 bits */ + "LD1 {v24.16B}, [%[L_chacha20_neon_rol8]] \n\t" + /* Move state into regular registers */ + "MOV x4, v20.D[0] \n\t" + "MOV x6, v20.D[1] \n\t" + "MOV x8, v21.D[0] \n\t" + "MOV x10, v21.D[1] \n\t" + "MOV x12, v22.D[0] \n\t" + "MOV x14, v22.D[1] \n\t" + "MOV x16, v23.D[0] \n\t" + "MOV x22, v23.D[1] \n\t" + /* Move state into vector registers (x3) */ + "MOV v0.16B, v20.16B \n\t" + "MOV v1.16B, v21.16B \n\t" + "LSR x19, x22, #32 \n\t" + "MOV v2.16B, v22.16B \n\t" + "ADD w20, w16, #1 \n\t" + "MOV v3.16B, v23.16B \n\t" + "LSR x17, x16, #32 \n\t" + "MOV v4.16B, v20.16B \n\t" + "MOV v5.16B, v21.16B \n\t" + "LSR x15, x14, #32 \n\t" + "MOV v6.16B, v22.16B \n\t" + "ADD w21, w16, #2 \n\t" + "MOV v7.16B, v23.16B \n\t" + "LSR x13, x12, #32 \n\t" + "MOV v8.16B, v20.16B \n\t" + "MOV v9.16B, v21.16B \n\t" + "LSR x11, x10, #32 \n\t" + "MOV v10.16B, v22.16B \n\t" + "ADD w16, w16, #3 \n\t" + "MOV v11.16B, v23.16B \n\t" + "LSR x9, x8, #32 \n\t" + /* Set counter word */ + "MOV v7.S[0], w20 \n\t" + "LSR x7, x6, #32 \n\t" + "MOV v11.S[0], w21 \n\t" + "LSR x5, x4, #32 \n\t" + /* Set number of odd+even rounds to perform */ + "MOV w3, #10 \n\t" + "\n" + "L_chacha20_arm64_256_loop_%=: \n\t" + "SUBS w3, w3, #1 \n\t" + /* Odd Round */ + /* a += b; d ^= a; d <<<= 16; */ + "ADD w4, w4, w8 \n\t" + "ADD v0.4S, v0.4S, v1.4S \n\t" + "ADD w5, w5, w9 \n\t" + "ADD v4.4S, v4.4S, v5.4S \n\t" + "ADD w6, w6, w10 \n\t" + "ADD v8.4S, v8.4S, v9.4S \n\t" + "ADD w7, w7, w11 \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "EOR w16, w16, w4 \n\t" + "EOR v7.16B, v7.16B, v4.16B \n\t" + "EOR w17, w17, w5 \n\t" + "EOR v11.16B, v11.16B, v8.16B \n\t" + "EOR w22, w22, w6 \n\t" + "REV32 v3.8H, v3.8H \n\t" + "EOR w19, w19, w7 \n\t" + "REV32 v7.8H, v7.8H \n\t" + "ROR w16, w16, #16 \n\t" + "REV32 v11.8H, v11.8H \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "ROR w17, w17, #16 \n\t" + "ADD v2.4S, v2.4S, v3.4S \n\t" + "ROR w22, w22, #16 \n\t" + "ADD v6.4S, v6.4S, v7.4S \n\t" + "ROR w19, w19, #16 \n\t" + "ADD v10.4S, v10.4S, v11.4S \n\t" + "ADD w12, w12, w16 \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "ADD w13, w13, w17 \n\t" + "EOR v13.16B, v5.16B, v6.16B \n\t" + "ADD w14, w14, w22 \n\t" + "EOR v14.16B, v9.16B, v10.16B \n\t" + "ADD w15, w15, w19 \n\t" + "SHL v1.4S, v12.4S, #12 \n\t" + "EOR w8, w8, w12 \n\t" + "SHL v5.4S, v13.4S, #12 \n\t" + "EOR w9, w9, w13 \n\t" + "SHL v9.4S, v14.4S, #12 \n\t" + "EOR w10, w10, w14 \n\t" + "SRI v1.4S, v12.4S, #20 \n\t" + "EOR w11, w11, w15 \n\t" + "SRI v5.4S, v13.4S, #20 \n\t" + "ROR w8, w8, #20 \n\t" + "SRI v9.4S, v14.4S, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "ROR w9, w9, #20 \n\t" + "ADD v0.4S, v0.4S, v1.4S \n\t" + "ROR w10, w10, #20 \n\t" + "ADD v4.4S, v4.4S, v5.4S \n\t" + "ROR w11, w11, #20 \n\t" + "ADD v8.4S, v8.4S, v9.4S \n\t" + "ADD w4, w4, w8 \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "ADD w5, w5, w9 \n\t" + "EOR v7.16B, v7.16B, v4.16B \n\t" + "ADD w6, w6, w10 \n\t" + "EOR v11.16B, v11.16B, v8.16B \n\t" + "ADD w7, w7, w11 \n\t" + "TBL v3.16B, { v3.16B }, v24.16B \n\t" + "EOR w16, w16, w4 \n\t" + "TBL v7.16B, { v7.16B }, v24.16B \n\t" + "EOR w17, w17, w5 \n\t" + "TBL v11.16B, { v11.16B }, v24.16B \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "EOR w22, w22, w6 \n\t" + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR w19, w19, w7 \n\t" + "ADD v6.4S, v6.4S, v7.4S \n\t" + "ROR w16, w16, #24 \n\t" + "ADD v10.4S, v10.4S, v11.4S \n\t" + "ROR w17, w17, #24 \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "ROR w22, w22, #24 \n\t" + "EOR v13.16B, v5.16B, v6.16B \n\t" + "ROR w19, w19, #24 \n\t" + "EOR v14.16B, v9.16B, v10.16B \n\t" + "ADD w12, w12, w16 \n\t" + "SHL v1.4S, v12.4S, #7 \n\t" + "ADD w13, w13, w17 \n\t" + "SHL v5.4S, v13.4S, #7 \n\t" + "ADD w14, w14, w22 \n\t" + "SHL v9.4S, v14.4S, #7 \n\t" + "ADD w15, w15, w19 \n\t" + "SRI v1.4S, v12.4S, #25 \n\t" + "EOR w8, w8, w12 \n\t" + "SRI v5.4S, v13.4S, #25 \n\t" + "EOR w9, w9, w13 \n\t" + "SRI v9.4S, v14.4S, #25 \n\t" + "EOR w10, w10, w14 \n\t" + "EXT v1.16B, v1.16B, v1.16B, #4 \n\t" + "EOR w11, w11, w15 \n\t" + "EXT v2.16B, v2.16B, v2.16B, #8 \n\t" + "ROR w8, w8, #25 \n\t" + "EXT v3.16B, v3.16B, v3.16B, #12 \n\t" + "ROR w9, w9, #25 \n\t" + "EXT v5.16B, v5.16B, v5.16B, #4 \n\t" + "ROR w10, w10, #25 \n\t" + "EXT v6.16B, v6.16B, v6.16B, #8 \n\t" + "ROR w11, w11, #25 \n\t" + "EXT v7.16B, v7.16B, v7.16B, #12 \n\t" + "EXT v9.16B, v9.16B, v9.16B, #4 \n\t" + "EXT v10.16B, v10.16B, v10.16B, #8 \n\t" + "EXT v11.16B, v11.16B, v11.16B, #12 \n\t" + /* Even Round */ + /* a += b; d ^= a; d <<<= 16; */ + "ADD w4, w4, w9 \n\t" + "ADD v0.4S, v0.4S, v1.4S \n\t" + "ADD w5, w5, w10 \n\t" + "ADD v4.4S, v4.4S, v5.4S \n\t" + "ADD w6, w6, w11 \n\t" + "ADD v8.4S, v8.4S, v9.4S \n\t" + "ADD w7, w7, w8 \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "EOR w19, w19, w4 \n\t" + "EOR v7.16B, v7.16B, v4.16B \n\t" + "EOR w16, w16, w5 \n\t" + "EOR v11.16B, v11.16B, v8.16B \n\t" + "EOR w17, w17, w6 \n\t" + "REV32 v3.8H, v3.8H \n\t" + "EOR w22, w22, w7 \n\t" + "REV32 v7.8H, v7.8H \n\t" + "ROR w19, w19, #16 \n\t" + "REV32 v11.8H, v11.8H \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "ROR w16, w16, #16 \n\t" + "ADD v2.4S, v2.4S, v3.4S \n\t" + "ROR w17, w17, #16 \n\t" + "ADD v6.4S, v6.4S, v7.4S \n\t" + "ROR w22, w22, #16 \n\t" + "ADD v10.4S, v10.4S, v11.4S \n\t" + "ADD w14, w14, w19 \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "ADD w15, w15, w16 \n\t" + "EOR v13.16B, v5.16B, v6.16B \n\t" + "ADD w12, w12, w17 \n\t" + "EOR v14.16B, v9.16B, v10.16B \n\t" + "ADD w13, w13, w22 \n\t" + "SHL v1.4S, v12.4S, #12 \n\t" + "EOR w9, w9, w14 \n\t" + "SHL v5.4S, v13.4S, #12 \n\t" + "EOR w10, w10, w15 \n\t" + "SHL v9.4S, v14.4S, #12 \n\t" + "EOR w11, w11, w12 \n\t" + "SRI v1.4S, v12.4S, #20 \n\t" + "EOR w8, w8, w13 \n\t" + "SRI v5.4S, v13.4S, #20 \n\t" + "ROR w9, w9, #20 \n\t" + "SRI v9.4S, v14.4S, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "ROR w10, w10, #20 \n\t" + "ADD v0.4S, v0.4S, v1.4S \n\t" + "ROR w11, w11, #20 \n\t" + "ADD v4.4S, v4.4S, v5.4S \n\t" + "ROR w8, w8, #20 \n\t" + "ADD v8.4S, v8.4S, v9.4S \n\t" + "ADD w4, w4, w9 \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "ADD w5, w5, w10 \n\t" + "EOR v7.16B, v7.16B, v4.16B \n\t" + "ADD w6, w6, w11 \n\t" + "EOR v11.16B, v11.16B, v8.16B \n\t" + "ADD w7, w7, w8 \n\t" + "TBL v3.16B, { v3.16B }, v24.16B \n\t" + "EOR w19, w19, w4 \n\t" + "TBL v7.16B, { v7.16B }, v24.16B \n\t" + "EOR w16, w16, w5 \n\t" + "TBL v11.16B, { v11.16B }, v24.16B \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "EOR w17, w17, w6 \n\t" + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR w22, w22, w7 \n\t" + "ADD v6.4S, v6.4S, v7.4S \n\t" + "ROR w19, w19, #24 \n\t" + "ADD v10.4S, v10.4S, v11.4S \n\t" + "ROR w16, w16, #24 \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "ROR w17, w17, #24 \n\t" + "EOR v13.16B, v5.16B, v6.16B \n\t" + "ROR w22, w22, #24 \n\t" + "EOR v14.16B, v9.16B, v10.16B \n\t" + "ADD w14, w14, w19 \n\t" + "SHL v1.4S, v12.4S, #7 \n\t" + "ADD w15, w15, w16 \n\t" + "SHL v5.4S, v13.4S, #7 \n\t" + "ADD w12, w12, w17 \n\t" + "SHL v9.4S, v14.4S, #7 \n\t" + "ADD w13, w13, w22 \n\t" + "SRI v1.4S, v12.4S, #25 \n\t" + "EOR w9, w9, w14 \n\t" + "SRI v5.4S, v13.4S, #25 \n\t" + "EOR w10, w10, w15 \n\t" + "SRI v9.4S, v14.4S, #25 \n\t" + "EOR w11, w11, w12 \n\t" + "EXT v1.16B, v1.16B, v1.16B, #12 \n\t" + "EOR w8, w8, w13 \n\t" + "EXT v2.16B, v2.16B, v2.16B, #8 \n\t" + "ROR w9, w9, #25 \n\t" + "EXT v3.16B, v3.16B, v3.16B, #4 \n\t" + "ROR w10, w10, #25 \n\t" + "EXT v5.16B, v5.16B, v5.16B, #12 \n\t" + "ROR w11, w11, #25 \n\t" + "EXT v6.16B, v6.16B, v6.16B, #8 \n\t" + "ROR w8, w8, #25 \n\t" + "EXT v7.16B, v7.16B, v7.16B, #4 \n\t" + "EXT v9.16B, v9.16B, v9.16B, #12 \n\t" + "EXT v10.16B, v10.16B, v10.16B, #8 \n\t" + "EXT v11.16B, v11.16B, v11.16B, #4 \n\t" + "BNE L_chacha20_arm64_256_loop_%= \n\t" + /* Load message */ + "LD1 {v16.4S-v19.4S}, [%[m]], #64 \n\t" + /* Add one (2 added during calculating vector results) */ + "ADD w16, w16, #1 \n\t" + /* Add back state, XOR in message and store (load next block) */ + "ADD v0.4S, v0.4S, v20.4S \n\t" + "ADD v1.4S, v1.4S, v21.4S \n\t" + "ADD v2.4S, v2.4S, v22.4S \n\t" + "ADD v3.4S, v3.4S, v23.4S \n\t" + "EOR v0.16B, v0.16B, v16.16B \n\t" + "EOR v1.16B, v1.16B, v17.16B \n\t" + "EOR v2.16B, v2.16B, v18.16B \n\t" + "EOR v3.16B, v3.16B, v19.16B \n\t" + "LD1 {v16.4S-v19.4S}, [%[m]], #64 \n\t" + "ST1 {v0.4S-v3.4S}, [%[c]], #64 \n\t" + "MOV v23.S[0], w20 \n\t" + "ADD v4.4S, v4.4S, v20.4S \n\t" + "ADD v5.4S, v5.4S, v21.4S \n\t" + "ADD v6.4S, v6.4S, v22.4S \n\t" + "ADD v7.4S, v7.4S, v23.4S \n\t" + "EOR v4.16B, v4.16B, v16.16B \n\t" + "EOR v5.16B, v5.16B, v17.16B \n\t" + "EOR v6.16B, v6.16B, v18.16B \n\t" + "EOR v7.16B, v7.16B, v19.16B \n\t" + "LD1 {v16.4S-v19.4S}, [%[m]], #64 \n\t" + "ST1 {v4.4S-v7.4S}, [%[c]], #64 \n\t" + "MOV v23.S[0], w21 \n\t" + "ADD v8.4S, v8.4S, v20.4S \n\t" + "ADD v9.4S, v9.4S, v21.4S \n\t" + "ADD v10.4S, v10.4S, v22.4S \n\t" + "ADD v11.4S, v11.4S, v23.4S \n\t" + "EOR v8.16B, v8.16B, v16.16B \n\t" + "EOR v9.16B, v9.16B, v17.16B \n\t" + "EOR v10.16B, v10.16B, v18.16B \n\t" + "EOR v11.16B, v11.16B, v19.16B \n\t" + "LD1 {v16.4S-v19.4S}, [%[m]], #64 \n\t" + "ST1 {v8.4S-v11.4S}, [%[c]], #64 \n\t" + /* Move regular registers into vector registers for adding and xor */ + "ORR x4, x4, x5, lsl #32 \n\t" + "ORR x6, x6, x7, lsl #32 \n\t" + "ORR x8, x8, x9, lsl #32 \n\t" + "MOV v12.D[0], x4 \n\t" + "ORR x10, x10, x11, lsl #32 \n\t" + "MOV v12.D[1], x6 \n\t" + "ORR x12, x12, x13, lsl #32 \n\t" + "MOV v13.D[0], x8 \n\t" + "ORR x14, x14, x15, lsl #32 \n\t" + "MOV v13.D[1], x10 \n\t" + "ORR x16, x16, x17, lsl #32 \n\t" + "MOV v14.D[0], x12 \n\t" + "ORR x22, x22, x19, lsl #32 \n\t" + "MOV v14.D[1], x14 \n\t" + "MOV v15.D[0], x16 \n\t" + "MOV v15.D[1], x22 \n\t" + /* Add back state, XOR in message and store */ + "ADD v12.4S, v12.4S, v20.4S \n\t" + "ADD v13.4S, v13.4S, v21.4S \n\t" + "ADD v14.4S, v14.4S, v22.4S \n\t" + "ADD v15.4S, v15.4S, v23.4S \n\t" + "EOR v12.16B, v12.16B, v16.16B \n\t" + "EOR v13.16B, v13.16B, v17.16B \n\t" + "EOR v14.16B, v14.16B, v18.16B \n\t" + "EOR v15.16B, v15.16B, v19.16B \n\t" + "ST1 {v12.4S-v15.4S}, [%[c]], #64 \n\t" + : [input] "+r" (input), [m] "+r" (m), [c] "+r" (c) + : [L_chacha20_neon_rol8] "r" (L_chacha20_neon_rol8) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", + "x10", "x11", "x12", "x13", "x14", "x15", "x16", + "x17", "x22", "x19", "x20", "x21", "v0", "v1", + "v2", "v3", "v4", "v5", "v6", "v7", "v8", + "v9", "v10", "v11", "v12", "v13", "v14", + "v15", "v16", "v17", "v18", "v19", "v20", + "v21", "v22", "v23" + ); +#else + word32 x[CHACHA_CHUNK_WORDS]; + word32* x_addr = x; + __asm__ __volatile__ ( + // The paper NEON crypto by Daniel J. Bernstein and Peter Schwabe was used to optimize for ARM + // https://cryptojedi.org/papers/neoncrypto-20120320.pdf + + ".align 2 \n\t" + "LDR r14, %[input] \n\t" // load input address + + "LDM r14, { r0-r12 } \n\t" + // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12 + // 0 1 2 3 4 5 6 7 8 9 10 11 12 + "VMOV d0, r0, r1 \n\t" + "VMOV d1, r2, r3 \n\t" + "VMOV d2, r4, r5 \n\t" + "VMOV d3, r6, r7 \n\t" + "VMOV d4, r8, r9 \n\t" + "STRD r10, r11, %[x_10] \n\t" + "VMOV d5, r10, r11 \n\t" + "LDRD r11, r10, [r14, #4*14] \n\t" + "VMOV q4, q0 \n\t" + "VMOV q5, q1 \n\t" + "VMOV q6, q2 \n\t" + "VMOV q8, q0 \n\t" + "VMOV q9, q1 \n\t" + "VMOV q10, q2 \n\t" + // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12 + // 0 1 2 3 4 5 6 7 8 9 15 14 12 + "VMOV d7, r11, r10 \n\t" + "STR r10, %[x_15] \n\t" + "VMOV d15, r11, r10 \n\t" + "VMOV d23, r11, r10 \n\t" + "MOV r10, r12 \n\t" + "MOV r12, r11 \n\t" + "LDR r11, [r14, #4*13] \n\t" + // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12 + // 0 1 2 3 4 5 6 7 8 9 12 13 14 + + "MOV r14, %[rounds] \n\t" + + "VMOV d6, r10, r11 \n\t" + "ADD r10, r10, #1 \n\t" + "VMOV d14, r10, r11 \n\t" + "ADD r10, r10, #1 \n\t" + "VMOV d22, r10, r11 \n\t" + "ADD r10, r10, #1 \n\t" // ARM calculates the fourth block (two was already added earlier) + "\n" + "L_chacha20_arm32_256_loop_%=: \n\t" + "SUBS r14, r14, #1 \n\t" + + // 0, 4, 8, 12 + // 1, 5, 9, 13 + + // ODD ROUND + "ADD r0, r0, r4 \n\t" // 0 0 4 + "VADD.I32 q0, q0, q1 \n\t" + "ADD r1, r1, r5 \n\t" // 1 1 5 + "VADD.I32 q4, q4, q5 \n\t" + "EOR r10, r10, r0 \n\t" // 12 12 0 + "VADD.I32 q8, q8, q9 \n\t" + "EOR r11, r11, r1 \n\t" // 13 13 1 + "VEOR q12, q3, q0 \n\t" + "ROR r10, r10, #16 \n\t" // 12 12 + "VEOR q13, q7, q4 \n\t" + "ROR r11, r11, #16 \n\t" // 13 13 + "VEOR q14, q11, q8 \n\t" + "ADD r8, r8, r10 \n\t" // 8 8 12 + // rotation by 16 bits may be done by reversing the 16 bit elements in 32 bit words + "VREV32.16 q3, q12 \n\t" + "ADD r9, r9, r11 \n\t" // 9 9 13 + "VREV32.16 q7, q13 \n\t" + "EOR r4, r4, r8 \n\t" // 4 4 8 + "VREV32.16 q11, q14 \n\t" + + "EOR r5, r5, r9 \n\t" // 5 5 9 + "VADD.I32 q2, q2, q3 \n\t" + "ROR r4, r4, #20 \n\t" // 4 4 + "VADD.I32 q6, q6, q7 \n\t" + "ROR r5, r5, #20 \n\t" // 5 5 + "VADD.I32 q10, q10, q11 \n\t" + "ADD r0, r0, r4 \n\t" // 0 0 4 + "VEOR q12, q1, q2 \n\t" + "ADD r1, r1, r5 \n\t" // 1 1 5 + "VEOR q13, q5, q6 \n\t" + "EOR r10, r10, r0 \n\t" // 12 12 0 + "VEOR q14, q9, q10 \n\t" + "EOR r11, r11, r1 \n\t" // 13 13 1 + // SIMD instructions don't support rotation so we have to cheat using shifts and a help register + "VSHL.I32 q1, q12, #12 \n\t" + "ROR r10, r10, #24 \n\t" // 12 12 + "VSHL.I32 q5, q13, #12 \n\t" + "ROR r11, r11, #24 \n\t" // 13 13 + "VSHL.I32 q9, q14, #12 \n\t" + "ADD r8, r8, r10 \n\t" // 8 8 12 + "VSRI.I32 q1, q12, #20 \n\t" + "ADD r9, r9, r11 \n\t" // 9 9 13 + "VSRI.I32 q5, q13, #20 \n\t" + "STR r11, %[x_13] \n\t" + "VSRI.I32 q9, q14, #20 \n\t" + + "LDR r11, %[x_15] \n\t" + "VADD.I32 q0, q0, q1 \n\t" + "EOR r4, r4, r8 \n\t" // 4 4 8 + "VADD.I32 q4, q4, q5 \n\t" + "STR r8, %[x_8] \n\t" + "VADD.I32 q8, q8, q9 \n\t" + "LDR r8, %[x_10] \n\t" + "VEOR q12, q3, q0 \n\t" + "EOR r5, r5, r9 \n\t" // 5 5 9 + "VEOR q13, q7, q4 \n\t" + "STR r9, %[x_9] \n\t" + "VEOR q14, q11, q8 \n\t" + "LDR r9, %[x_11] \n\t" + // SIMD instructions don't support rotation so we have to cheat using shifts and a help register + "VSHL.I32 q3, q12, #8 \n\t" + "ROR r4, r4, #25 \n\t" // 4 4 + "VSHL.I32 q7, q13, #8 \n\t" + "ROR r5, r5, #25 \n\t" // 5 5 + "VSHL.I32 q11, q14, #8 \n\t" + + // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12 + // 0 1 2 3 4 5 6 7 10 11 12 15 14 + + // 2, 6, 10, 14 + // 3, 7, 11, 15 + + "ADD r2, r2, r6 \n\t" // 2 2 6 + "VSRI.I32 q3, q12, #24 \n\t" + "ADD r3, r3, r7 \n\t" // 3 3 7 + "VSRI.I32 q7, q13, #24 \n\t" + "EOR r12, r12, r2 \n\t" // 14 14 2 + "VSRI.I32 q11, q14, #24 \n\t" + + "EOR r11, r11, r3 \n\t" // 15 15 3 + "VADD.I32 q2, q2, q3 \n\t" + "ROR r12, r12, #16 \n\t" // 14 14 + "VADD.I32 q6, q6, q7 \n\t" + "ROR r11, r11, #16 \n\t" // 15 15 + "VADD.I32 q10, q10, q11 \n\t" + "ADD r8, r8, r12 \n\t" // 10 10 14 + "VEOR q12, q1, q2 \n\t" + "ADD r9, r9, r11 \n\t" // 11 11 15 + "VEOR q13, q5, q6 \n\t" + "EOR r6, r6, r8 \n\t" // 6 6 10 + "VEOR q14, q9, q10 \n\t" + "EOR r7, r7, r9 \n\t" // 7 7 11 + // SIMD instructions don't support rotation so we have to cheat using shifts and a help register + "VSHL.I32 q1, q12, #7 \n\t" + "ROR r6, r6, #20 \n\t" // 6 6 + "VSHL.I32 q5, q13, #7 \n\t" + "ROR r7, r7, #20 \n\t" // 7 7 + "VSHL.I32 q9, q14, #7 \n\t" + "ADD r2, r2, r6 \n\t" // 2 2 6 + "VSRI.I32 q1, q12, #25 \n\t" + "ADD r3, r3, r7 \n\t" // 3 3 7 + "VSRI.I32 q5, q13, #25 \n\t" + "EOR r12, r12, r2 \n\t" // 14 14 2 + "VSRI.I32 q9, q14, #25 \n\t" + + // EVEN ROUND + + "EOR r11, r11, r3 \n\t" // 15 15 3 + "VEXT.8 q1, q1, q1, #4 \n\t" // permute elements left by one + "ROR r12, r12, #24 \n\t" // 14 14 + "VEXT.8 q2, q2, q2, #8 \n\t" // permute elements left by two + "ROR r11, r11, #24 \n\t" // 15 15 + "VEXT.8 q3, q3, q3, #12 \n\t" // permute elements left by three + + "ADD r8, r8, r12 \n\t" // 10 10 14 + "VEXT.8 q5, q5, q5, #4 \n\t" // permute elements left by one + "ADD r9, r9, r11 \n\t" // 11 11 15 + "VEXT.8 q6, q6, q6, #8 \n\t" // permute elements left by two + "EOR r6, r6, r8 \n\t" // 6 6 10 + "VEXT.8 q7, q7, q7, #12 \n\t" // permute elements left by three + + "EOR r7, r7, r9 \n\t" // 7 7 11 + "VEXT.8 q9, q9, q9, #4 \n\t" // permute elements left by one + "ROR r6, r6, #25 \n\t" // 6 6 + "VEXT.8 q10, q10, q10, #8 \n\t" // permute elements left by two + "ROR r7, r7, #25 \n\t" // 7 7 + "VEXT.8 q11, q11, q11, #12 \n\t" // permute elements left by three + + // 0, 5, 10, 15 + // 1, 6, 11, 12 + + "ADD r0, r0, r5 \n\t" // 0 0 5 + "VADD.I32 q0, q0, q1 \n\t" + "ADD r1, r1, r6 \n\t" // 1 1 6 + "VADD.I32 q4, q4, q5 \n\t" + "EOR r11, r11, r0 \n\t" // 15 15 0 + "VADD.I32 q8, q8, q9 \n\t" + "EOR r10, r10, r1 \n\t" // 12 12 1 + "VEOR q12, q3, q0 \n\t" + "ROR r11, r11, #16 \n\t" // 15 15 + "VEOR q13, q7, q4 \n\t" + "ROR r10, r10, #16 \n\t" // 12 12 + "VEOR q14, q11, q8 \n\t" + "ADD r8, r8, r11 \n\t" // 10 10 15 + // rotation by 16 bits may be done by reversing the 16 bit elements in 32 bit words + "VREV32.16 q3, q12 \n\t" + "ADD r9, r9, r10 \n\t" // 11 11 12 + "VREV32.16 q7, q13 \n\t" + "EOR r5, r5, r8 \n\t" // 5 5 10 + "VREV32.16 q11, q14 \n\t" + + "EOR r6, r6, r9 \n\t" // 6 6 11 + "VADD.I32 q2, q2, q3 \n\t" + "ROR r5, r5, #20 \n\t" // 5 5 + "VADD.I32 q6, q6, q7 \n\t" + "ROR r6, r6, #20 \n\t" // 6 6 + "VADD.I32 q10, q10, q11 \n\t" + "ADD r0, r0, r5 \n\t" // 0 0 5 + "VEOR q12, q1, q2 \n\t" + "ADD r1, r1, r6 \n\t" // 1 1 6 + "VEOR q13, q5, q6 \n\t" + "EOR r11, r11, r0 \n\t" // 15 15 0 + "VEOR q14, q9, q10 \n\t" + "EOR r10, r10, r1 \n\t" // 12 12 1 + // SIMD instructions don't support rotation so we have to cheat using shifts and a help register + "VSHL.I32 q1, q12, #12 \n\t" + "ROR r11, r11, #24 \n\t" // 15 15 + "VSHL.I32 q5, q13, #12 \n\t" + "ROR r10, r10, #24 \n\t" // 12 12 + "VSHL.I32 q9, q14, #12 \n\t" + "ADD r8, r8, r11 \n\t" // 10 10 15 + "VSRI.I32 q1, q12, #20 \n\t" + "STR r11, %[x_15] \n\t" + "VSRI.I32 q5, q13, #20 \n\t" + "LDR r11, %[x_13] \n\t" + "VSRI.I32 q9, q14, #20 \n\t" + + "ADD r9, r9, r10 \n\t" // 11 11 12 + "VADD.I32 q0, q0, q1 \n\t" + "EOR r5, r5, r8 \n\t" // 5 5 10 + "VADD.I32 q4, q4, q5 \n\t" + "STR r8, %[x_10] \n\t" + "VADD.I32 q8, q8, q9 \n\t" + "LDR r8, %[x_8] \n\t" + "VEOR q12, q3, q0 \n\t" + "EOR r6, r6, r9 \n\t" // 6 6 11 + "VEOR q13, q7, q4 \n\t" + "STR r9, %[x_11] \n\t" + "VEOR q14, q11, q8 \n\t" + "LDR r9, %[x_9] \n\t" + // SIMD instructions don't support rotation so we have to cheat using shifts and a help register + "VSHL.I32 q3, q12, #8 \n\t" + "ROR r5, r5, #25 \n\t" // 5 5 + "VSHL.I32 q7, q13, #8 \n\t" + "ROR r6, r6, #25 \n\t" // 6 6 + "VSHL.I32 q11, q14, #8 \n\t" + + // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12 + // 0 1 2 3 4 5 6 7 8 9 12 13 14 + + // 2, 7, 8, 13 + // 3, 4, 9, 14 + + "ADD r2, r2, r7 \n\t" // 2 2 7 + "VSRI.I32 q3, q12, #24 \n\t" + "ADD r3, r3, r4 \n\t" // 3 3 4 + "VSRI.I32 q7, q13, #24 \n\t" + "EOR r11, r11, r2 \n\t" // 13 13 2 + "VSRI.I32 q11, q14, #24 \n\t" + + "EOR r12, r12, r3 \n\t" // 14 14 3 + "VADD.I32 q2, q2, q3 \n\t" + "ROR r11, r11, #16 \n\t" // 13 13 + "VADD.I32 q6, q6, q7 \n\t" + "ROR r12, r12, #16 \n\t" // 14 14 + "VADD.I32 q10, q10, q11 \n\t" + "ADD r8, r8, r11 \n\t" // 8 8 13 + "VEOR q12, q1, q2 \n\t" + "ADD r9, r9, r12 \n\t" // 9 9 14 + "VEOR q13, q5, q6 \n\t" + "EOR r7, r7, r8 \n\t" // 7 7 8 + "VEOR q14, q9, q10 \n\t" + "EOR r4, r4, r9 \n\t" // 4 4 9 + // SIMD instructions don't support rotation so we have to cheat using shifts and a help register + "VSHL.I32 q1, q12, #7 \n\t" + "ROR r7, r7, #20 \n\t" // 7 7 + "VSHL.I32 q5, q13, #7 \n\t" + "ROR r4, r4, #20 \n\t" // 4 4 + "VSHL.I32 q9, q14, #7 \n\t" + "ADD r2, r2, r7 \n\t" // 2 2 7 + "VSRI.I32 q1, q12, #25 \n\t" + "ADD r3, r3, r4 \n\t" // 3 3 4 + "VSRI.I32 q5, q13, #25 \n\t" + "EOR r11, r11, r2 \n\t" // 13 13 2 + "VSRI.I32 q9, q14, #25 \n\t" + + "EOR r12, r12, r3 \n\t" // 14 14 3 + "VEXT.8 q1, q1, q1, #12 \n\t" // permute elements left by three + "ROR r11, r11, #24 \n\t" // 13 13 + "VEXT.8 q2, q2, q2, #8 \n\t" // permute elements left by two + "ROR r12, r12, #24 \n\t" // 14 14 + "VEXT.8 q3, q3, q3, #4 \n\t" // permute elements left by one + + "ADD r8, r8, r11 \n\t" // 8 8 13 + "VEXT.8 q5, q5, q5, #12 \n\t" // permute elements left by three + "ADD r9, r9, r12 \n\t" // 9 9 14 + "VEXT.8 q6, q6, q6, #8 \n\t" // permute elements left by two + "EOR r7, r7, r8 \n\t" // 7 7 8 + "VEXT.8 q7, q7, q7, #4 \n\t" // permute elements left by one + + "EOR r4, r4, r9 \n\t" // 4 4 9 + "VEXT.8 q9, q9, q9, #12 \n\t" // permute elements left by three + "ROR r7, r7, #25 \n\t" // 7 7 + "VEXT.8 q10, q10, q10, #8 \n\t" // permute elements left by two + "ROR r4, r4, #25 \n\t" // 4 4 + "VEXT.8 q11, q11, q11, #4 \n\t" // permute elements left by one + + "BNE L_chacha20_arm32_256_loop_%= \n\t" + + "LDR r14, %[x_addr] \n\t" // load address of x to r14 + // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12 + // 0 1 2 3 4 5 6 7 8 9 12 13 14 + "ADD r10, r10, #3 \n\t" // add three here to make later NEON easier + "STM r14, { r0-r9 } \n\t" + "STRD r10, r11, [r14, #4*12] \n\t" + "LDR r9, %[input] \n\t" // load input address + "STR r12, [r14, #4*14] \n\t" + "LDR r10, %[c] \n\t" // load c address + + "VLDM r9, { q12-q15 } \n\t" + "LDR r12, %[m] \n\t" // load m address + + "VADD.I32 q0, q0, q12 \n\t" + "VADD.I32 q1, q1, q13 \n\t" + "VADD.I32 q2, q2, q14 \n\t" + "VADD.I32 q3, q3, q15 \n\t" + + "VADD.I32 q4, q4, q12 \n\t" + "VADD.I32 q5, q5, q13 \n\t" + "VADD.I32 q6, q6, q14 \n\t" + "VADD.I32 q7, q7, q15 \n\t" + + "MOV r11, #1 \n\t" + + "VADD.I32 q8, q8, q12 \n\t" + "VMOV.I32 q12, #0 \n\t" + "VADD.I32 q9, q9, q13 \n\t" + "VMOV.I32 d24[0], r11 \n\t" + "VADD.I32 q10, q10, q14 \n\t" + "VADD.I32 q11, q11, q15 \n\t" + + "VADD.I32 q11, q11, q12 \n\t" // add one to counter + "VADD.I32 q7, q7, q12 \n\t" // add one to counter + "VADD.I32 q11, q11, q12 \n\t" // add one to counter + + "VLDM r12!, { q12-q15 } \n\t" // load m + "VEOR q0, q0, q12 \n\t" + "VEOR q1, q1, q13 \n\t" + "VEOR q2, q2, q14 \n\t" + "VEOR q3, q3, q15 \n\t" + "VSTM r10!, { q0-q3 } \n\t" // store to c + + "VLDM r14, { q0-q3 } \n\t " // load final block from x + + "VLDM r12!, { q12-q15 } \n\t" // load m + "VEOR q4, q4, q12 \n\t" + "VEOR q5, q5, q13 \n\t" + "VEOR q6, q6, q14 \n\t" + "VEOR q7, q7, q15 \n\t" + "VSTM r10!, { q4-q7 } \n\t" // store to c + + "VLDM r9, { q4-q7 } \n\t" // load input + + "VLDM r12!, { q12-q15 } \n\t" // load m + "VEOR q8, q8, q12 \n\t" + "VEOR q9, q9, q13 \n\t" + "VEOR q10, q10, q14 \n\t" + "VEOR q11, q11, q15 \n\t" + "VSTM r10!, { q8-q11 } \n\t" // store to c + + "VLDM r12!, { q12-q15 } \n\t" // load m + "VADD.I32 q0, q0, q4 \n\t" + "VADD.I32 q1, q1, q5 \n\t" + "VADD.I32 q2, q2, q6 \n\t" + "VADD.I32 q3, q3, q7 \n\t" // three was added earlier + "VEOR q0, q0, q12 \n\t" + "VEOR q1, q1, q13 \n\t" + "VEOR q2, q2, q14 \n\t" + "VEOR q3, q3, q15 \n\t" + "VSTM r10!, { q0-q3 } \n\t" // store to c + + : [c] "+m" (c), + [x_0] "=m" (x), + [x_8] "=m" (x[8]), + [x_9] "=m" (x[9]), + [x_10] "=m" (x[10]), + [x_11] "=m" (x[11]), + [x_13] "=m" (x[13]), + [x_15] "=m" (x[15]) + : [rounds] "I" (ROUNDS/2), [input] "m" (input), + [chacha_chunk_bytes] "I" (CHACHA_CHUNK_BYTES), + [m] "m" (m), [x_addr] "m" (x_addr) + : "memory", "cc", + "r0", "r1", "r2", "r3", + "r4", "r5", "r6", "r7", + "r8", "r9", "r10", "r11", "r12", "r14", + "q0", "q1", "q2", "q3", "q4", + "q5", "q6", "q7", "q8", "q9", + "q10", "q11", "q12", "q13", "q14", "q15" + ); + +#endif /* __aarch64__ */ + return CHACHA_CHUNK_BYTES * 4; +} + + +static WC_INLINE int wc_Chacha_encrypt_128(const word32 input[CHACHA_CHUNK_WORDS], const byte* m, byte* c) +{ +#ifdef CHACHA_TEST + printf("Entering wc_Chacha_encrypt_128\n"); +#endif /*CHACHA_TEST */ + +#ifdef __aarch64__ + __asm__ __volatile__ ( + /* Load incrementer register to modify counter */ + "LD1 {v22.16B}, [%[L_chacha20_neon_inc_first_word]] \n\t" + /* Load index look-up for rotating left 8 bits */ + "LD1 {v23.16B}, [%[L_chacha20_neon_rol8]] \n\t" + /* Load state to encrypt */ + "LD1 {v18.4S-v21.4S}, [%[input]] \n\t" + /* Load message */ + "LD1 {v14.4S-v17.4S}, [%[m]], #64 \n\t" + /* Move state into vector registers (x3) */ + "MOV v0.16B, v18.16B \n\t" + "MOV v1.16B, v19.16B \n\t" + "MOV v2.16B, v20.16B \n\t" + "MOV v3.16B, v21.16B \n\t" + "MOV v4.16B, v18.16B \n\t" + "MOV v5.16B, v19.16B \n\t" + "MOV v6.16B, v20.16B \n\t" + "MOV v7.16B, v21.16B \n\t" + /* Add counter word */ + "ADD v7.4S, v7.4S, v22.4S \n\t" + /* Set number of odd+even rounds to perform */ + "MOV w3, #10 \n\t" + "\n" + "L_chacha20_arm64_128_loop_%=: \n\t" + "SUBS w3, w3, #1 \n\t" + /* Odd Round */ + /* a += b; d ^= a; d <<<= 16; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "ADD v4.4S, v4.4S, v5.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "EOR v7.16B, v7.16B, v4.16B \n\t" + "REV32 v3.8H, v3.8H \n\t" + "REV32 v7.8H, v7.8H \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "ADD v6.4S, v6.4S, v7.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "EOR v13.16B, v5.16B, v6.16B \n\t" + "SHL v1.4S, v12.4S, #12 \n\t" + "SHL v5.4S, v13.4S, #12 \n\t" + "SRI v1.4S, v12.4S, #20 \n\t" + "SRI v5.4S, v13.4S, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "ADD v4.4S, v4.4S, v5.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "EOR v7.16B, v7.16B, v4.16B \n\t" + "TBL v3.16B, { v3.16B }, v23.16B \n\t" + "TBL v7.16B, { v7.16B }, v23.16B \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "ADD v6.4S, v6.4S, v7.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "EOR v13.16B, v5.16B, v6.16B \n\t" + "SHL v1.4S, v12.4S, #7 \n\t" + "SHL v5.4S, v13.4S, #7 \n\t" + "SRI v1.4S, v12.4S, #25 \n\t" + "SRI v5.4S, v13.4S, #25 \n\t" + "EXT v1.16B, v1.16B, v1.16B, #4 \n\t" + "EXT v2.16B, v2.16B, v2.16B, #8 \n\t" + "EXT v3.16B, v3.16B, v3.16B, #12 \n\t" + "EXT v5.16B, v5.16B, v5.16B, #4 \n\t" + "EXT v6.16B, v6.16B, v6.16B, #8 \n\t" + "EXT v7.16B, v7.16B, v7.16B, #12 \n\t" + /* Even Round */ + /* a += b; d ^= a; d <<<= 16; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "ADD v4.4S, v4.4S, v5.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "EOR v7.16B, v7.16B, v4.16B \n\t" + "REV32 v3.8H, v3.8H \n\t" + "REV32 v7.8H, v7.8H \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "ADD v6.4S, v6.4S, v7.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "EOR v13.16B, v5.16B, v6.16B \n\t" + "SHL v1.4S, v12.4S, #12 \n\t" + "SHL v5.4S, v13.4S, #12 \n\t" + "SRI v1.4S, v12.4S, #20 \n\t" + "SRI v5.4S, v13.4S, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "ADD v4.4S, v4.4S, v5.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "EOR v7.16B, v7.16B, v4.16B \n\t" + "TBL v3.16B, { v3.16B }, v23.16B \n\t" + "TBL v7.16B, { v7.16B }, v23.16B \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "ADD v6.4S, v6.4S, v7.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "EOR v13.16B, v5.16B, v6.16B \n\t" + "SHL v1.4S, v12.4S, #7 \n\t" + "SHL v5.4S, v13.4S, #7 \n\t" + "SRI v1.4S, v12.4S, #25 \n\t" + "SRI v5.4S, v13.4S, #25 \n\t" + "EXT v1.16B, v1.16B, v1.16B, #12 \n\t" + "EXT v2.16B, v2.16B, v2.16B, #8 \n\t" + "EXT v3.16B, v3.16B, v3.16B, #4 \n\t" + "EXT v5.16B, v5.16B, v5.16B, #12 \n\t" + "EXT v6.16B, v6.16B, v6.16B, #8 \n\t" + "EXT v7.16B, v7.16B, v7.16B, #4 \n\t" + "BNE L_chacha20_arm64_128_loop_%= \n\t" + /* Add back state, XOR in message and store (load next block) */ + "ADD v0.4S, v0.4S, v18.4S \n\t" + "ADD v1.4S, v1.4S, v19.4S \n\t" + "ADD v2.4S, v2.4S, v20.4S \n\t" + "ADD v3.4S, v3.4S, v21.4S \n\t" + "EOR v0.16B, v0.16B, v14.16B \n\t" + "EOR v1.16B, v1.16B, v15.16B \n\t" + "EOR v2.16B, v2.16B, v16.16B \n\t" + "EOR v3.16B, v3.16B, v17.16B \n\t" + "LD1 {v14.4S-v17.4S}, [%[m]], #64 \n\t" + "ST1 {v0.4S-v3.4S}, [%[c]], #64 \n\t" + "ADD v21.4S, v21.4S, v22.4S \n\t" + "ADD v4.4S, v4.4S, v18.4S \n\t" + "ADD v5.4S, v5.4S, v19.4S \n\t" + "ADD v6.4S, v6.4S, v20.4S \n\t" + "ADD v7.4S, v7.4S, v21.4S \n\t" + "EOR v4.16B, v4.16B, v14.16B \n\t" + "EOR v5.16B, v5.16B, v15.16B \n\t" + "EOR v6.16B, v6.16B, v16.16B \n\t" + "EOR v7.16B, v7.16B, v17.16B \n\t" + "ST1 {v4.4S-v7.4S}, [%[c]], #64 \n\t" + : [input] "+r" (input), [m] "+r" (m), [c] "+r" (c) + : [L_chacha20_neon_rol8] "r" (L_chacha20_neon_rol8), + [L_chacha20_neon_inc_first_word] "r" (L_chacha20_neon_inc_first_word) + : "memory", "x3", "v0", "v1", "v2", "v3", "v4", "v5", "v6", + "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", + "v16", "v17", "v18", "v19", "v20", "v21" + ); +#else + __asm__ __volatile__ ( + "MOV r11, %[rounds] \n\t" + "MOV r12, #1 \n\t" + "VLDM %[input], { q0-q3 } \n\t" + "VMOV.I32 q8, #0 \n\t" + "VMOV q4, q0 \n\t" + "VMOV.I32 d16[0], r12 \n\t" + "VMOV q5, q1 \n\t" + "VMOV q6, q2 \n\t" + "VADD.I32 q7, q3, q8 \n\t" // add one to counter + + // store input + "VMOV q10, q0 \n\t" + "VMOV q11, q1 \n\t" + "VMOV q12, q2 \n\t" + "VMOV q13, q3 \n\t" + "\n" + "L_chacha20_arm32_128_loop_%=: \n\t" + "SUBS r11, r11, #1 \n\t" + + // ODD ROUND + "VADD.I32 q0, q0, q1 \n\t" + "VADD.I32 q4, q4, q5 \n\t" + "VEOR q8, q3, q0 \n\t" + "VEOR q9, q7, q4 \n\t" + // rotation by 16 bits may be done by reversing the 16 bit elements in 32 bit words + "VREV32.16 q3, q8 \n\t" + "VREV32.16 q7, q9 \n\t" + + "VADD.I32 q2, q2, q3 \n\t" + "VADD.I32 q6, q6, q7 \n\t" + "VEOR q8, q1, q2 \n\t" + "VEOR q9, q5, q6 \n\t" + // SIMD instructions don't support rotation so we have to cheat using shifts and a help register + "VSHL.I32 q1, q8, #12 \n\t" + "VSHL.I32 q5, q9, #12 \n\t" + "VSRI.I32 q1, q8, #20 \n\t" + "VSRI.I32 q5, q9, #20 \n\t" + + "VADD.I32 q0, q0, q1 \n\t" + "VADD.I32 q4, q4, q5 \n\t" + "VEOR q8, q3, q0 \n\t" + "VEOR q9, q7, q4 \n\t" + // SIMD instructions don't support rotation so we have to cheat using shifts and a help register + "VSHL.I32 q3, q8, #8 \n\t" + "VSHL.I32 q7, q9, #8 \n\t" + "VSRI.I32 q3, q8, #24 \n\t" + "VSRI.I32 q7, q9, #24 \n\t" + + "VADD.I32 q2, q2, q3 \n\t" + "VADD.I32 q6, q6, q7 \n\t" + "VEOR q8, q1, q2 \n\t" + "VEOR q9, q5, q6 \n\t" + // SIMD instructions don't support rotation so we have to cheat using shifts and a help register + "VSHL.I32 q1, q8, #7 \n\t" + "VSHL.I32 q5, q9, #7 \n\t" + "VSRI.I32 q1, q8, #25 \n\t" + "VSRI.I32 q5, q9, #25 \n\t" + + // EVEN ROUND + + "VEXT.8 q1, q1, q1, #4 \n\t" // permute elements left by one + "VEXT.8 q2, q2, q2, #8 \n\t" // permute elements left by two + "VEXT.8 q3, q3, q3, #12 \n\t" // permute elements left by three + + "VEXT.8 q5, q5, q5, #4 \n\t" // permute elements left by one + "VEXT.8 q6, q6, q6, #8 \n\t" // permute elements left by two + "VEXT.8 q7, q7, q7, #12 \n\t" // permute elements left by three + + "VADD.I32 q0, q0, q1 \n\t" + "VADD.I32 q4, q4, q5 \n\t" + "VEOR q8, q3, q0 \n\t" + "VEOR q9, q7, q4 \n\t" + // rotation by 16 bits may be done by reversing the 16 bit elements in 32 bit words + "VREV32.16 q3, q8 \n\t" + "VREV32.16 q7, q9 \n\t" + + "VADD.I32 q2, q2, q3 \n\t" + "VADD.I32 q6, q6, q7 \n\t" + "VEOR q8, q1, q2 \n\t" + "VEOR q9, q5, q6 \n\t" + // SIMD instructions don't support rotation so we have to cheat using shifts and a help register + "VSHL.I32 q1, q8, #12 \n\t" + "VSHL.I32 q5, q9, #12 \n\t" + "VSRI.I32 q1, q8, #20 \n\t" + "VSRI.I32 q5, q9, #20 \n\t" + + "VADD.I32 q0, q0, q1 \n\t" + "VADD.I32 q4, q4, q5 \n\t" + "VEOR q8, q3, q0 \n\t" + "VEOR q9, q7, q4 \n\t" + // SIMD instructions don't support rotation so we have to cheat using shifts and a help register + "VSHL.I32 q3, q8, #8 \n\t" + "VSHL.I32 q7, q9, #8 \n\t" + "VSRI.I32 q3, q8, #24 \n\t" + "VSRI.I32 q7, q9, #24 \n\t" + + "VADD.I32 q2, q2, q3 \n\t" + "VADD.I32 q6, q6, q7 \n\t" + "VEOR q8, q1, q2 \n\t" + "VEOR q9, q5, q6 \n\t" + // SIMD instructions don't support rotation so we have to cheat using shifts and a help register + "VSHL.I32 q1, q8, #7 \n\t" + "VSHL.I32 q5, q9, #7 \n\t" + "VSRI.I32 q1, q8, #25 \n\t" + "VSRI.I32 q5, q9, #25 \n\t" + + "VEXT.8 q1, q1, q1, #12 \n\t" // permute elements left by three + "VEXT.8 q2, q2, q2, #8 \n\t" // permute elements left by two + "VEXT.8 q3, q3, q3, #4 \n\t" // permute elements left by one + + "VEXT.8 q5, q5, q5, #12 \n\t" // permute elements left by three + "VEXT.8 q6, q6, q6, #8 \n\t" // permute elements left by two + "VEXT.8 q7, q7, q7, #4 \n\t" // permute elements left by one + + "BNE L_chacha20_arm32_128_loop_%= \n\t" + + "VMOV.I32 q8, #0 \n\t" + "VADD.I32 q0, q0, q10 \n\t" + "VADD.I32 q1, q1, q11 \n\t" + "VMOV.I32 d16[0], r12 \n\t" + "VADD.I32 q2, q2, q12 \n\t" + "VADD.I32 q3, q3, q13 \n\t" + + "VADD.I32 q13, q13, q8 \n\t" // add one to counter + + "VADD.I32 q4, q4, q10 \n\t" + "VADD.I32 q5, q5, q11 \n\t" + "VADD.I32 q6, q6, q12 \n\t" + "VADD.I32 q7, q7, q13 \n\t" + + "VLDM %[m], { q8-q15 } \n\t" + "VEOR q0, q0, q8 \n\t" + "VEOR q1, q1, q9 \n\t" + "VEOR q2, q2, q10 \n\t" + "VEOR q3, q3, q11 \n\t" + "VEOR q4, q4, q12 \n\t" + "VEOR q5, q5, q13 \n\t" + "VEOR q6, q6, q14 \n\t" + "VEOR q7, q7, q15 \n\t" + "VSTM %[c], { q0-q7 } \n\t" + + : [c] "+r" (c), [m] "+r" (m) + : [rounds] "I" (ROUNDS/2), [input] "r" (input), + [chacha_chunk_bytes] "I" (CHACHA_CHUNK_BYTES) + : "memory", "cc", + "r11", "r12", + "q0", "q1", "q2", "q3", "q4", + "q5", "q6", "q7", "q8", "q9", + "q10", "q11", "q12", "q13", "q14", "q15" + ); +#endif /* __aarch64__ */ + return CHACHA_CHUNK_BYTES * 2; +} + +static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m, + byte* c, word32 bytes) +{ +#ifdef CHACHA_TEST + printf("Entering wc_Chacha_encrypt_64 with %d bytes\n", bytes); +#endif /*CHACHA_TEST */ + +#ifdef __aarch64__ + word64 bytes64 = (word64) bytes; + __asm__ __volatile__ ( + /* Load index look-up for rotating left 8 bits */ + "LD1 {v13.16B}, [%[L_chacha20_neon_rol8]] \n\t" + "LD1 {v14.4S}, [%[L_chacha20_neon_inc_first_word]] \n\t" + /* Load state to encrypt */ + "LD1 {v8.4S-v11.4S}, [%[input]] \n\t" + "\n" + "L_chacha20_arm64_64_loop_%=: \n\t" + /* Move state into vector registers (x3) */ + "MOV v0.16B, v8.16B \n\t" + "MOV v1.16B, v9.16B \n\t" + "MOV v2.16B, v10.16B \n\t" + "MOV v3.16B, v11.16B \n\t" + /* Add counter word */ + /* Odd Round */ + /* a += b; d ^= a; d <<<= 16; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "REV32 v3.8H, v3.8H \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #12 \n\t" + "SRI v1.4S, v12.4S, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "TBL v3.16B, { v3.16B }, v13.16B \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #7 \n\t" + "SRI v1.4S, v12.4S, #25 \n\t" + "EXT v3.16B, v3.16B, v3.16B, #12 \n\t" + "EXT v1.16B, v1.16B, v1.16B, #4 \n\t" + "EXT v2.16B, v2.16B, v2.16B, #8 \n\t" + /* Even Round */ + /* a += b; d ^= a; d <<<= 16; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "REV32 v3.8H, v3.8H \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #12 \n\t" + "SRI v1.4S, v12.4S, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "TBL v3.16B, { v3.16B }, v13.16B \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #7 \n\t" + "SRI v1.4S, v12.4S, #25 \n\t" + "EXT v3.16B, v3.16B, v3.16B, #4 \n\t" + "EXT v1.16B, v1.16B, v1.16B, #12 \n\t" + "EXT v2.16B, v2.16B, v2.16B, #8 \n\t" + /* Odd Round */ + /* a += b; d ^= a; d <<<= 16; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "REV32 v3.8H, v3.8H \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #12 \n\t" + "SRI v1.4S, v12.4S, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "TBL v3.16B, { v3.16B }, v13.16B \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #7 \n\t" + "SRI v1.4S, v12.4S, #25 \n\t" + "EXT v3.16B, v3.16B, v3.16B, #12 \n\t" + "EXT v1.16B, v1.16B, v1.16B, #4 \n\t" + "EXT v2.16B, v2.16B, v2.16B, #8 \n\t" + /* Even Round */ + /* a += b; d ^= a; d <<<= 16; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "REV32 v3.8H, v3.8H \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #12 \n\t" + "SRI v1.4S, v12.4S, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "TBL v3.16B, { v3.16B }, v13.16B \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #7 \n\t" + "SRI v1.4S, v12.4S, #25 \n\t" + "EXT v3.16B, v3.16B, v3.16B, #4 \n\t" + "EXT v1.16B, v1.16B, v1.16B, #12 \n\t" + "EXT v2.16B, v2.16B, v2.16B, #8 \n\t" + /* Odd Round */ + /* a += b; d ^= a; d <<<= 16; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "REV32 v3.8H, v3.8H \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #12 \n\t" + "SRI v1.4S, v12.4S, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "TBL v3.16B, { v3.16B }, v13.16B \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #7 \n\t" + "SRI v1.4S, v12.4S, #25 \n\t" + "EXT v3.16B, v3.16B, v3.16B, #12 \n\t" + "EXT v1.16B, v1.16B, v1.16B, #4 \n\t" + "EXT v2.16B, v2.16B, v2.16B, #8 \n\t" + /* Even Round */ + /* a += b; d ^= a; d <<<= 16; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "REV32 v3.8H, v3.8H \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #12 \n\t" + "SRI v1.4S, v12.4S, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "TBL v3.16B, { v3.16B }, v13.16B \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #7 \n\t" + "SRI v1.4S, v12.4S, #25 \n\t" + "EXT v3.16B, v3.16B, v3.16B, #4 \n\t" + "EXT v1.16B, v1.16B, v1.16B, #12 \n\t" + "EXT v2.16B, v2.16B, v2.16B, #8 \n\t" + /* Odd Round */ + /* a += b; d ^= a; d <<<= 16; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "REV32 v3.8H, v3.8H \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #12 \n\t" + "SRI v1.4S, v12.4S, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "TBL v3.16B, { v3.16B }, v13.16B \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #7 \n\t" + "SRI v1.4S, v12.4S, #25 \n\t" + "EXT v3.16B, v3.16B, v3.16B, #12 \n\t" + "EXT v1.16B, v1.16B, v1.16B, #4 \n\t" + "EXT v2.16B, v2.16B, v2.16B, #8 \n\t" + /* Even Round */ + /* a += b; d ^= a; d <<<= 16; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "REV32 v3.8H, v3.8H \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #12 \n\t" + "SRI v1.4S, v12.4S, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "TBL v3.16B, { v3.16B }, v13.16B \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #7 \n\t" + "SRI v1.4S, v12.4S, #25 \n\t" + "EXT v3.16B, v3.16B, v3.16B, #4 \n\t" + "EXT v1.16B, v1.16B, v1.16B, #12 \n\t" + "EXT v2.16B, v2.16B, v2.16B, #8 \n\t" + /* Odd Round */ + /* a += b; d ^= a; d <<<= 16; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "REV32 v3.8H, v3.8H \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #12 \n\t" + "SRI v1.4S, v12.4S, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "TBL v3.16B, { v3.16B }, v13.16B \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #7 \n\t" + "SRI v1.4S, v12.4S, #25 \n\t" + "EXT v3.16B, v3.16B, v3.16B, #12 \n\t" + "EXT v1.16B, v1.16B, v1.16B, #4 \n\t" + "EXT v2.16B, v2.16B, v2.16B, #8 \n\t" + /* Even Round */ + /* a += b; d ^= a; d <<<= 16; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "REV32 v3.8H, v3.8H \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #12 \n\t" + "SRI v1.4S, v12.4S, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "TBL v3.16B, { v3.16B }, v13.16B \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #7 \n\t" + "SRI v1.4S, v12.4S, #25 \n\t" + "EXT v3.16B, v3.16B, v3.16B, #4 \n\t" + "EXT v1.16B, v1.16B, v1.16B, #12 \n\t" + "EXT v2.16B, v2.16B, v2.16B, #8 \n\t" + /* Odd Round */ + /* a += b; d ^= a; d <<<= 16; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "REV32 v3.8H, v3.8H \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #12 \n\t" + "SRI v1.4S, v12.4S, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "TBL v3.16B, { v3.16B }, v13.16B \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #7 \n\t" + "SRI v1.4S, v12.4S, #25 \n\t" + "EXT v3.16B, v3.16B, v3.16B, #12 \n\t" + "EXT v1.16B, v1.16B, v1.16B, #4 \n\t" + "EXT v2.16B, v2.16B, v2.16B, #8 \n\t" + /* Even Round */ + /* a += b; d ^= a; d <<<= 16; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "REV32 v3.8H, v3.8H \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #12 \n\t" + "SRI v1.4S, v12.4S, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "TBL v3.16B, { v3.16B }, v13.16B \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #7 \n\t" + "SRI v1.4S, v12.4S, #25 \n\t" + "EXT v3.16B, v3.16B, v3.16B, #4 \n\t" + "EXT v1.16B, v1.16B, v1.16B, #12 \n\t" + "EXT v2.16B, v2.16B, v2.16B, #8 \n\t" + /* Odd Round */ + /* a += b; d ^= a; d <<<= 16; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "REV32 v3.8H, v3.8H \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #12 \n\t" + "SRI v1.4S, v12.4S, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "TBL v3.16B, { v3.16B }, v13.16B \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #7 \n\t" + "SRI v1.4S, v12.4S, #25 \n\t" + "EXT v3.16B, v3.16B, v3.16B, #12 \n\t" + "EXT v1.16B, v1.16B, v1.16B, #4 \n\t" + "EXT v2.16B, v2.16B, v2.16B, #8 \n\t" + /* Even Round */ + /* a += b; d ^= a; d <<<= 16; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "REV32 v3.8H, v3.8H \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #12 \n\t" + "SRI v1.4S, v12.4S, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "TBL v3.16B, { v3.16B }, v13.16B \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #7 \n\t" + "SRI v1.4S, v12.4S, #25 \n\t" + "EXT v3.16B, v3.16B, v3.16B, #4 \n\t" + "EXT v1.16B, v1.16B, v1.16B, #12 \n\t" + "EXT v2.16B, v2.16B, v2.16B, #8 \n\t" + /* Odd Round */ + /* a += b; d ^= a; d <<<= 16; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "REV32 v3.8H, v3.8H \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #12 \n\t" + "SRI v1.4S, v12.4S, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "TBL v3.16B, { v3.16B }, v13.16B \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #7 \n\t" + "SRI v1.4S, v12.4S, #25 \n\t" + "EXT v3.16B, v3.16B, v3.16B, #12 \n\t" + "EXT v1.16B, v1.16B, v1.16B, #4 \n\t" + "EXT v2.16B, v2.16B, v2.16B, #8 \n\t" + /* Even Round */ + /* a += b; d ^= a; d <<<= 16; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "REV32 v3.8H, v3.8H \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #12 \n\t" + "SRI v1.4S, v12.4S, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "TBL v3.16B, { v3.16B }, v13.16B \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #7 \n\t" + "SRI v1.4S, v12.4S, #25 \n\t" + "EXT v3.16B, v3.16B, v3.16B, #4 \n\t" + "EXT v1.16B, v1.16B, v1.16B, #12 \n\t" + "EXT v2.16B, v2.16B, v2.16B, #8 \n\t" + /* Odd Round */ + /* a += b; d ^= a; d <<<= 16; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "REV32 v3.8H, v3.8H \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #12 \n\t" + "SRI v1.4S, v12.4S, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "TBL v3.16B, { v3.16B }, v13.16B \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #7 \n\t" + "SRI v1.4S, v12.4S, #25 \n\t" + "EXT v3.16B, v3.16B, v3.16B, #12 \n\t" + "EXT v1.16B, v1.16B, v1.16B, #4 \n\t" + "EXT v2.16B, v2.16B, v2.16B, #8 \n\t" + /* Even Round */ + /* a += b; d ^= a; d <<<= 16; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "REV32 v3.8H, v3.8H \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #12 \n\t" + "SRI v1.4S, v12.4S, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "TBL v3.16B, { v3.16B }, v13.16B \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #7 \n\t" + "SRI v1.4S, v12.4S, #25 \n\t" + "EXT v3.16B, v3.16B, v3.16B, #4 \n\t" + "EXT v1.16B, v1.16B, v1.16B, #12 \n\t" + "EXT v2.16B, v2.16B, v2.16B, #8 \n\t" + /* Odd Round */ + /* a += b; d ^= a; d <<<= 16; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "REV32 v3.8H, v3.8H \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #12 \n\t" + "SRI v1.4S, v12.4S, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "TBL v3.16B, { v3.16B }, v13.16B \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #7 \n\t" + "SRI v1.4S, v12.4S, #25 \n\t" + "EXT v3.16B, v3.16B, v3.16B, #12 \n\t" + "EXT v1.16B, v1.16B, v1.16B, #4 \n\t" + "EXT v2.16B, v2.16B, v2.16B, #8 \n\t" + /* Even Round */ + /* a += b; d ^= a; d <<<= 16; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "REV32 v3.8H, v3.8H \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #12 \n\t" + "SRI v1.4S, v12.4S, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "ADD v0.4S, v0.4S, v1.4S \n\t" + "EOR v3.16B, v3.16B, v0.16B \n\t" + "TBL v3.16B, { v3.16B }, v13.16B \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "ADD v2.4S, v2.4S, v3.4S \n\t" + "EOR v12.16B, v1.16B, v2.16B \n\t" + "SHL v1.4S, v12.4S, #7 \n\t" + "SRI v1.4S, v12.4S, #25 \n\t" + "EXT v3.16B, v3.16B, v3.16B, #4 \n\t" + "EXT v1.16B, v1.16B, v1.16B, #12 \n\t" + "EXT v2.16B, v2.16B, v2.16B, #8 \n\t" + /* Add back state */ + "ADD v0.4S, v0.4S, v8.4S \n\t" + "ADD v1.4S, v1.4S, v9.4S \n\t" + "ADD v2.4S, v2.4S, v10.4S \n\t" + "ADD v3.4S, v3.4S, v11.4S \n\t" + "CMP %[bytes], #64 \n\t" + "BLT L_chacha20_arm64_64_lt_64_%= \n\t" + "LD1 {v4.4S-v7.4S}, [%[m]], #64 \n\t" + "EOR v4.16B, v4.16B, v0.16B \n\t" + "EOR v5.16B, v5.16B, v1.16B \n\t" + "EOR v6.16B, v6.16B, v2.16B \n\t" + "EOR v7.16B, v7.16B, v3.16B \n\t" + "ST1 {v4.4S-v7.4S}, [%[c]], #64 \n\t" + "SUBS %[bytes], %[bytes], #64 \n\t" + "ADD v11.4S, v11.4S, v14.4S \n\t" + "BNE L_chacha20_arm64_64_loop_%= \n\t" + "B L_chacha20_arm64_64_done_%= \n\t" + "\n" + "L_chacha20_arm64_64_lt_64_%=: \n\t" + "CMP %[bytes], #32 \n\t" + "BLT L_chacha20_arm64_64_lt_32_%= \n\t" + "LD1 {v4.4S, v5.4S}, [%[m]], #32 \n\t" + "EOR v4.16B, v4.16B, v0.16B \n\t" + "EOR v5.16B, v5.16B, v1.16B \n\t" + "ST1 {v4.4S, v5.4S}, [%[c]], #32 \n\t" + "SUBS %[bytes], %[bytes], #32 \n\t" + "MOV v0.16B, v2.16B \n\t" + "MOV v1.16B, v3.16B \n\t" + "BEQ L_chacha20_arm64_64_done_%= \n\t" + "\n" + "L_chacha20_arm64_64_lt_32_%=: \n\t" + "CMP %[bytes], #16 \n\t" + "BLT L_chacha20_arm64_64_lt_16_%= \n\t" + "LD1 {v4.4S}, [%[m]], #16 \n\t" + "EOR v4.16B, v4.16B, v0.16B \n\t" + "ST1 {v4.4S}, [%[c]], #16 \n\t" + "SUBS %[bytes], %[bytes], #16 \n\t" + "MOV v0.16B, v1.16B \n\t" + "BEQ L_chacha20_arm64_64_done_%= \n\t" + "\n" + "L_chacha20_arm64_64_lt_16_%=: \n\t" + "CMP %[bytes], #8 \n\t" + "BLT L_chacha20_arm64_64_lt_8_%= \n\t" + "LD1 {v4.2S}, [%[m]], #8 \n\t" + "EOR v4.8B, v4.8B, v0.8B \n\t" + "ST1 {v4.2S}, [%[c]], #8 \n\t" + "SUBS %[bytes], %[bytes], #8 \n\t" + "MOV v0.D[0], v0.D[1] \n\t" + "BEQ L_chacha20_arm64_64_done_%= \n\t" + "\n" + "L_chacha20_arm64_64_lt_8_%=: \n\t" + "MOV x4, v0.D[0] \n\t" + "LSL x5, %[bytes], #3 \n\t" + "\n" + "L_chacha20_arm64_64_loop_lt_8_%=: \n\t" + "LDRB w6, [%[m], %[bytes]] \n\t" + "ROR x7, x4, x5 \n\t" + "EOR w6, w6, w7 \n\t" + "STRB w6, [%[c], %[bytes]] \n\t" + "SUBS %[bytes], %[bytes], #1 \n\t" + "SUB x5, x5, #8 \n\t" + "BGE L_chacha20_arm64_64_loop_lt_8_%= \n\t" + "\n" + "L_chacha20_arm64_64_done_%=: \n\t" + : [input] "+r" (input), [m] "+r" (m), [c] "+r" (c), [bytes] "+r" (bytes64) + : [L_chacha20_neon_rol8] "r" (L_chacha20_neon_rol8), + [L_chacha20_neon_inc_first_word] "r" (L_chacha20_neon_inc_first_word) + : "memory", "x4", "x5", "x6", "x7", "v0", "v1", "v2", "v3", + "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11" + ); +#else + __asm__ __volatile__ ( + /* Get the input state */ + "VLDM %[input], { q8-q11 } \n\t" + /* Get the incrementer register */ + "VLDM %[L_chacha20_neon_inc_first_word], { q14 } \n\t" + "\n" + "L_chacha20_arm32_64_outer_loop_%=: \n\t" + /* Copy over the input state */ + "VMOV q0, q8 \n\t" + "VMOV q1, q9 \n\t" + "VMOV q2, q10 \n\t" + "VMOV q3, q11 \n\t" + /* Compute quarter rounds */ + /* Odd Round */ + /* a += b; d ^= a; d <<<= 16; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VREV32.16 q3, q4 \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #12 \n\t" + "VSRI.I32 q1, q4, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VSHL.I32 q3, q4, #8 \n\t" + "VSRI.I32 q3, q4, #24 \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #7 \n\t" + "VSRI.I32 q1, q4, #25 \n\t" + /* Permute Odd->Even */ + "VEXT.8 q1, q1, q1, #4 \n\t" + "VEXT.8 q2, q2, q2, #8 \n\t" + "VEXT.8 q3, q3, q3, #12 \n\t" + /* Even Round */ + /* a += b; d ^= a; d <<<= 16; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VREV32.16 q3, q4 \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #12 \n\t" + "VSRI.I32 q1, q4, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VSHL.I32 q3, q4, #8 \n\t" + "VSRI.I32 q3, q4, #24 \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #7 \n\t" + "VSRI.I32 q1, q4, #25 \n\t" + /* Permute Even->Odd */ + "VEXT.8 q1, q1, q1, #12 \n\t" + "VEXT.8 q2, q2, q2, #8 \n\t" + "VEXT.8 q3, q3, q3, #4 \n\t" + /* Odd Round */ + /* a += b; d ^= a; d <<<= 16; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VREV32.16 q3, q4 \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #12 \n\t" + "VSRI.I32 q1, q4, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VSHL.I32 q3, q4, #8 \n\t" + "VSRI.I32 q3, q4, #24 \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #7 \n\t" + "VSRI.I32 q1, q4, #25 \n\t" + /* Permute Odd->Even */ + "VEXT.8 q1, q1, q1, #4 \n\t" + "VEXT.8 q2, q2, q2, #8 \n\t" + "VEXT.8 q3, q3, q3, #12 \n\t" + /* Even Round */ + /* a += b; d ^= a; d <<<= 16; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VREV32.16 q3, q4 \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #12 \n\t" + "VSRI.I32 q1, q4, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VSHL.I32 q3, q4, #8 \n\t" + "VSRI.I32 q3, q4, #24 \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #7 \n\t" + "VSRI.I32 q1, q4, #25 \n\t" + /* Permute Even->Odd */ + "VEXT.8 q1, q1, q1, #12 \n\t" + "VEXT.8 q2, q2, q2, #8 \n\t" + "VEXT.8 q3, q3, q3, #4 \n\t" + /* Odd Round */ + /* a += b; d ^= a; d <<<= 16; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VREV32.16 q3, q4 \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #12 \n\t" + "VSRI.I32 q1, q4, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VSHL.I32 q3, q4, #8 \n\t" + "VSRI.I32 q3, q4, #24 \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #7 \n\t" + "VSRI.I32 q1, q4, #25 \n\t" + /* Permute Odd->Even */ + "VEXT.8 q1, q1, q1, #4 \n\t" + "VEXT.8 q2, q2, q2, #8 \n\t" + "VEXT.8 q3, q3, q3, #12 \n\t" + /* Even Round */ + /* a += b; d ^= a; d <<<= 16; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VREV32.16 q3, q4 \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #12 \n\t" + "VSRI.I32 q1, q4, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VSHL.I32 q3, q4, #8 \n\t" + "VSRI.I32 q3, q4, #24 \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #7 \n\t" + "VSRI.I32 q1, q4, #25 \n\t" + /* Permute Even->Odd */ + "VEXT.8 q1, q1, q1, #12 \n\t" + "VEXT.8 q2, q2, q2, #8 \n\t" + "VEXT.8 q3, q3, q3, #4 \n\t" + /* Odd Round */ + /* a += b; d ^= a; d <<<= 16; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VREV32.16 q3, q4 \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #12 \n\t" + "VSRI.I32 q1, q4, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VSHL.I32 q3, q4, #8 \n\t" + "VSRI.I32 q3, q4, #24 \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #7 \n\t" + "VSRI.I32 q1, q4, #25 \n\t" + /* Permute Odd->Even */ + "VEXT.8 q1, q1, q1, #4 \n\t" + "VEXT.8 q2, q2, q2, #8 \n\t" + "VEXT.8 q3, q3, q3, #12 \n\t" + /* Even Round */ + /* a += b; d ^= a; d <<<= 16; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VREV32.16 q3, q4 \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #12 \n\t" + "VSRI.I32 q1, q4, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VSHL.I32 q3, q4, #8 \n\t" + "VSRI.I32 q3, q4, #24 \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #7 \n\t" + "VSRI.I32 q1, q4, #25 \n\t" + /* Permute Even->Odd */ + "VEXT.8 q1, q1, q1, #12 \n\t" + "VEXT.8 q2, q2, q2, #8 \n\t" + "VEXT.8 q3, q3, q3, #4 \n\t" + /* Odd Round */ + /* a += b; d ^= a; d <<<= 16; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VREV32.16 q3, q4 \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #12 \n\t" + "VSRI.I32 q1, q4, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VSHL.I32 q3, q4, #8 \n\t" + "VSRI.I32 q3, q4, #24 \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #7 \n\t" + "VSRI.I32 q1, q4, #25 \n\t" + /* Permute Odd->Even */ + "VEXT.8 q1, q1, q1, #4 \n\t" + "VEXT.8 q2, q2, q2, #8 \n\t" + "VEXT.8 q3, q3, q3, #12 \n\t" + /* Even Round */ + /* a += b; d ^= a; d <<<= 16; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VREV32.16 q3, q4 \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #12 \n\t" + "VSRI.I32 q1, q4, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VSHL.I32 q3, q4, #8 \n\t" + "VSRI.I32 q3, q4, #24 \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #7 \n\t" + "VSRI.I32 q1, q4, #25 \n\t" + /* Permute Even->Odd */ + "VEXT.8 q1, q1, q1, #12 \n\t" + "VEXT.8 q2, q2, q2, #8 \n\t" + "VEXT.8 q3, q3, q3, #4 \n\t" + /* Odd Round */ + /* a += b; d ^= a; d <<<= 16; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VREV32.16 q3, q4 \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #12 \n\t" + "VSRI.I32 q1, q4, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VSHL.I32 q3, q4, #8 \n\t" + "VSRI.I32 q3, q4, #24 \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #7 \n\t" + "VSRI.I32 q1, q4, #25 \n\t" + /* Permute Odd->Even */ + "VEXT.8 q1, q1, q1, #4 \n\t" + "VEXT.8 q2, q2, q2, #8 \n\t" + "VEXT.8 q3, q3, q3, #12 \n\t" + /* Even Round */ + /* a += b; d ^= a; d <<<= 16; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VREV32.16 q3, q4 \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #12 \n\t" + "VSRI.I32 q1, q4, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VSHL.I32 q3, q4, #8 \n\t" + "VSRI.I32 q3, q4, #24 \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #7 \n\t" + "VSRI.I32 q1, q4, #25 \n\t" + /* Permute Even->Odd */ + "VEXT.8 q1, q1, q1, #12 \n\t" + "VEXT.8 q2, q2, q2, #8 \n\t" + "VEXT.8 q3, q3, q3, #4 \n\t" + /* Odd Round */ + /* a += b; d ^= a; d <<<= 16; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VREV32.16 q3, q4 \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #12 \n\t" + "VSRI.I32 q1, q4, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VSHL.I32 q3, q4, #8 \n\t" + "VSRI.I32 q3, q4, #24 \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #7 \n\t" + "VSRI.I32 q1, q4, #25 \n\t" + /* Permute Odd->Even */ + "VEXT.8 q1, q1, q1, #4 \n\t" + "VEXT.8 q2, q2, q2, #8 \n\t" + "VEXT.8 q3, q3, q3, #12 \n\t" + /* Even Round */ + /* a += b; d ^= a; d <<<= 16; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VREV32.16 q3, q4 \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #12 \n\t" + "VSRI.I32 q1, q4, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VSHL.I32 q3, q4, #8 \n\t" + "VSRI.I32 q3, q4, #24 \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #7 \n\t" + "VSRI.I32 q1, q4, #25 \n\t" + /* Permute Even->Odd */ + "VEXT.8 q1, q1, q1, #12 \n\t" + "VEXT.8 q2, q2, q2, #8 \n\t" + "VEXT.8 q3, q3, q3, #4 \n\t" + /* Odd Round */ + /* a += b; d ^= a; d <<<= 16; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VREV32.16 q3, q4 \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #12 \n\t" + "VSRI.I32 q1, q4, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VSHL.I32 q3, q4, #8 \n\t" + "VSRI.I32 q3, q4, #24 \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #7 \n\t" + "VSRI.I32 q1, q4, #25 \n\t" + /* Permute Odd->Even */ + "VEXT.8 q1, q1, q1, #4 \n\t" + "VEXT.8 q2, q2, q2, #8 \n\t" + "VEXT.8 q3, q3, q3, #12 \n\t" + /* Even Round */ + /* a += b; d ^= a; d <<<= 16; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VREV32.16 q3, q4 \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #12 \n\t" + "VSRI.I32 q1, q4, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VSHL.I32 q3, q4, #8 \n\t" + "VSRI.I32 q3, q4, #24 \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #7 \n\t" + "VSRI.I32 q1, q4, #25 \n\t" + /* Permute Even->Odd */ + "VEXT.8 q1, q1, q1, #12 \n\t" + "VEXT.8 q2, q2, q2, #8 \n\t" + "VEXT.8 q3, q3, q3, #4 \n\t" + /* Odd Round */ + /* a += b; d ^= a; d <<<= 16; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VREV32.16 q3, q4 \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #12 \n\t" + "VSRI.I32 q1, q4, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VSHL.I32 q3, q4, #8 \n\t" + "VSRI.I32 q3, q4, #24 \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #7 \n\t" + "VSRI.I32 q1, q4, #25 \n\t" + /* Permute Odd->Even */ + "VEXT.8 q1, q1, q1, #4 \n\t" + "VEXT.8 q2, q2, q2, #8 \n\t" + "VEXT.8 q3, q3, q3, #12 \n\t" + /* Even Round */ + /* a += b; d ^= a; d <<<= 16; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VREV32.16 q3, q4 \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #12 \n\t" + "VSRI.I32 q1, q4, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VSHL.I32 q3, q4, #8 \n\t" + "VSRI.I32 q3, q4, #24 \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #7 \n\t" + "VSRI.I32 q1, q4, #25 \n\t" + /* Permute Even->Odd */ + "VEXT.8 q1, q1, q1, #12 \n\t" + "VEXT.8 q2, q2, q2, #8 \n\t" + "VEXT.8 q3, q3, q3, #4 \n\t" + /* Odd Round */ + /* a += b; d ^= a; d <<<= 16; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VREV32.16 q3, q4 \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #12 \n\t" + "VSRI.I32 q1, q4, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VSHL.I32 q3, q4, #8 \n\t" + "VSRI.I32 q3, q4, #24 \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #7 \n\t" + "VSRI.I32 q1, q4, #25 \n\t" + /* Permute Odd->Even */ + "VEXT.8 q1, q1, q1, #4 \n\t" + "VEXT.8 q2, q2, q2, #8 \n\t" + "VEXT.8 q3, q3, q3, #12 \n\t" + /* Even Round */ + /* a += b; d ^= a; d <<<= 16; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VREV32.16 q3, q4 \n\t" + /* c += d; b ^= c; b <<<= 12; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #12 \n\t" + "VSRI.I32 q1, q4, #20 \n\t" + /* a += b; d ^= a; d <<<= 8; */ + "VADD.I32 q0, q0, q1 \n\t" + "VEOR q4, q3, q0 \n\t" + "VSHL.I32 q3, q4, #8 \n\t" + "VSRI.I32 q3, q4, #24 \n\t" + /* c += d; b ^= c; b <<<= 7; */ + "VADD.I32 q2, q2, q3 \n\t" + "VEOR q4, q1, q2 \n\t" + "VSHL.I32 q1, q4, #7 \n\t" + "VSRI.I32 q1, q4, #25 \n\t" + /* Permute Even->Odd */ + "VEXT.8 q1, q1, q1, #12 \n\t" + "VEXT.8 q2, q2, q2, #8 \n\t" + "VEXT.8 q3, q3, q3, #4 \n\t" + /* Add back state */ + "VADD.I32 q0, q0, q8 \n\t" + "VADD.I32 q1, q1, q9 \n\t" + "VADD.I32 q2, q2, q10 \n\t" + "VADD.I32 q3, q3, q11 \n\t" + "CMP %[bytes], #64 \n\t" + "BLT L_chacha20_arm32_64_lt_64_%= \n\t" + /* XOR full 64 byte block */ + "VLDM %[m], { q4-q7 } \n\t" + "ADD %[m], %[m], #64 \n\t" + "VEOR q0, q0, q4 \n\t" + "VEOR q1, q1, q5 \n\t" + "VEOR q2, q2, q6 \n\t" + "VEOR q3, q3, q7 \n\t" + "VSTM %[c], { q0-q3 } \n\t" + "ADD %[c], %[c], #64 \n\t" + "SUBS %[bytes], %[bytes], #64 \n\t" + "VADD.I32 q11, q11, q14 \n\t" + "BNE L_chacha20_arm32_64_outer_loop_%= \n\t" + "B L_chacha20_arm32_64_done_%= \n\t" + "\n" + "L_chacha20_arm32_64_lt_64_%=: \n\t" + /* XOR 32 bytes */ + "CMP %[bytes], #32 \n\t" + "BLT L_chacha20_arm32_64_lt_32_%= \n\t" + "VLDM %[m], { q4-q5 } \n\t" + "ADD %[m], %[m], #32 \n\t" + "VEOR q4, q4, q0 \n\t" + "VEOR q5, q5, q1 \n\t" + "VSTM %[c], { q4-q5 } \n\t" + "ADD %[c], %[c], #32 \n\t" + "SUBS %[bytes], %[bytes], #32 \n\t" + "VMOV q0, q2 \n\t" + "VMOV q1, q3 \n\t" + "BEQ L_chacha20_arm32_64_done_%= \n\t" + "\n" + "L_chacha20_arm32_64_lt_32_%=: \n\t" + /* XOR 16 bytes */ + "CMP %[bytes], #16 \n\t" + "BLT L_chacha20_arm32_64_lt_16_%= \n\t" + "VLDM %[m], { q4 } \n\t" + "ADD %[m], %[m], #16 \n\t" + "VEOR q4, q4, q0 \n\t" + "VSTM %[c], { q4 } \n\t" + "ADD %[c], %[c], #16 \n\t" + "SUBS %[bytes], %[bytes], #16 \n\t" + "VMOV q0, q1 \n\t" + "BEQ L_chacha20_arm32_64_done_%= \n\t" + "\n" + "L_chacha20_arm32_64_lt_16_%=: \n\t" + /* XOR 8 bytes */ + "CMP %[bytes], #8 \n\t" + "BLT L_chacha20_arm32_64_lt_8_%= \n\t" + "VLDR d8, [%[m], #0] \n\t" + "ADD %[m], %[m], #8 \n\t" + "VEOR d8, d8, d0 \n\t" + "VSTR d8, [%[c], #0] \n\t" + "ADD %[c], %[c], #8 \n\t" + "SUBS %[bytes], %[bytes], #8 \n\t" + "VMOV d0, d1 \n\t" + "BEQ L_chacha20_arm32_64_done_%= \n\t" + "\n" + "L_chacha20_arm32_64_lt_8_%=: \n\t" + /* XOR 4 bytes */ + "CMP %[bytes], #4 \n\t" + "BLT L_chacha20_arm32_64_lt_4_%= \n\t" + "LDR r12, [%[m]], #4 \n\t" + "VMOV r14, d0[0] \n\t" + "EOR r12, r12, r14 \n\t" + "STR r12, [%[c]], #4 \n\t" + "SUBS %[bytes], %[bytes], #4 \n\t" + "VTRN.32 d0, d0 \n\t" + "BEQ L_chacha20_arm32_64_done_%= \n\t" + "\n" + "L_chacha20_arm32_64_lt_4_%=: \n\t" + /* XOR remaining bytes */ + "VMOV r14, d0[0] \n\t" + "\n" + "L_chacha20_arm32_64_lt_4_loop_%=: \n\t" + "LDRB r12, [%[m]], #1 \n\t" + "EOR r12, r12, r14 \n\t" + "STRB r12, [%[c]], #1 \n\t" + "SUBS %[bytes], %[bytes], #1 \n\t" + "LSR r14, r14, #8 \n\t" + "BGT L_chacha20_arm32_64_lt_4_loop_%= \n\t" + "\n" + "L_chacha20_arm32_64_done_%=: \n\t" + : [input] "+r" (input), [m] "+r" (m), [c] "+r" (c), [bytes] "+r" (bytes) + : [L_chacha20_neon_inc_first_word] "r" (L_chacha20_neon_inc_first_word) + : "memory", "cc", + "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "q14", "r12", "r14" + ); +#endif /* __aarch64__ */ +} + +/** + * Encrypt a stream of bytes + */ +static void wc_Chacha_encrypt_bytes(ChaCha* ctx, const byte* m, byte* c, + word32 bytes) +{ + int processed; + +#ifdef __aarch64__ + if (bytes >= CHACHA_CHUNK_BYTES * 5) { + processed = (bytes / (CHACHA_CHUNK_BYTES * 5)) * CHACHA_CHUNK_BYTES * 5; + wc_Chacha_encrypt_320(ctx->X, m, c, processed); + + bytes -= processed; + c += processed; + m += processed; + ctx->X[CHACHA_IV_BYTES] = PLUS(ctx->X[CHACHA_IV_BYTES], processed / CHACHA_CHUNK_BYTES); + } + if (bytes >= CHACHA_CHUNK_BYTES * 4) { +#else + while (bytes >= CHACHA_CHUNK_BYTES * 4) { +#endif /*__aarch64__ */ + processed = wc_Chacha_encrypt_256(ctx->X, m, c); + + bytes -= processed; + c += processed; + m += processed; + ctx->X[CHACHA_IV_BYTES] = PLUS(ctx->X[CHACHA_IV_BYTES], processed / CHACHA_CHUNK_BYTES); + } + if (bytes >= CHACHA_CHUNK_BYTES * 2) { + processed = wc_Chacha_encrypt_128(ctx->X, m, c); + + bytes -= processed; + c += processed; + m += processed; + ctx->X[CHACHA_IV_BYTES] = PLUS(ctx->X[CHACHA_IV_BYTES], processed / CHACHA_CHUNK_BYTES); + } + if (bytes > 0) { + wc_Chacha_encrypt_64(ctx->X, m, c, bytes); + if (bytes > 64) + ctx->X[CHACHA_IV_BYTES] = PLUSONE(ctx->X[CHACHA_IV_BYTES]); + ctx->X[CHACHA_IV_BYTES] = PLUSONE(ctx->X[CHACHA_IV_BYTES]); + } +} + +/** + * API to encrypt/decrypt a message of any size. + */ +int wc_Chacha_Process(ChaCha* ctx, byte* output, const byte* input, + word32 msglen) +{ + if (ctx == NULL || output == NULL || input == NULL) + return BAD_FUNC_ARG; + + wc_Chacha_encrypt_bytes(ctx, input, output, msglen); + + return 0; +} + +#endif /* HAVE_CHACHA */ +#endif /* WOLFSSL_ARMASM */ diff --git a/client/wolfssl/wolfcrypt/src/port/arm/armv8-curve25519.S b/client/wolfssl/wolfcrypt/src/port/arm/armv8-curve25519.S new file mode 100644 index 0000000..891c6d8 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/arm/armv8-curve25519.S @@ -0,0 +1,6715 @@ +/* armv8-curve25519 + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* Generated using (from wolfssl): + * cd ../scripts + * ruby ./x25519/x25519.rb arm64 ../wolfssl/wolfcrypt/src/port/arm/armv8-curve25519.S + */ +#ifdef __aarch64__ + .text + .align 2 + .globl fe_init + .type fe_init, %function +fe_init: + ret + .size fe_init,.-fe_init + .text + .align 2 + .globl fe_frombytes + .type fe_frombytes, %function +fe_frombytes: + ldp x2, x3, [x1] + ldp x4, x5, [x1, #16] + and x5, x5, #0x7fffffffffffffff + stp x2, x3, [x0] + stp x4, x5, [x0, #16] + ret + .size fe_frombytes,.-fe_frombytes + .text + .align 2 + .globl fe_tobytes + .type fe_tobytes, %function +fe_tobytes: + mov x7, #19 + ldp x2, x3, [x1] + ldp x4, x5, [x1, #16] + adds x6, x2, x7 + adcs x6, x3, xzr + adcs x6, x4, xzr + adc x6, x5, xzr + and x6, x7, x6, asr 63 + adds x2, x2, x6 + adcs x3, x3, xzr + adcs x4, x4, xzr + adc x5, x5, xzr + and x5, x5, #0x7fffffffffffffff + stp x2, x3, [x0] + stp x4, x5, [x0, #16] + ret + .size fe_tobytes,.-fe_tobytes + .text + .align 2 + .globl fe_1 + .type fe_1, %function +fe_1: + # Set one + mov x1, #1 + stp x1, xzr, [x0] + stp xzr, xzr, [x0, #16] + ret + .size fe_1,.-fe_1 + .text + .align 2 + .globl fe_0 + .type fe_0, %function +fe_0: + # Set zero + stp xzr, xzr, [x0] + stp xzr, xzr, [x0, #16] + ret + .size fe_0,.-fe_0 + .text + .align 2 + .globl fe_copy + .type fe_copy, %function +fe_copy: + # Copy + ldp x2, x3, [x1] + ldp x4, x5, [x1, #16] + stp x2, x3, [x0] + stp x4, x5, [x0, #16] + ret + .size fe_copy,.-fe_copy + .text + .align 2 + .globl fe_sub + .type fe_sub, %function +fe_sub: + # Sub + ldp x3, x4, [x1] + ldp x5, x6, [x1, #16] + ldp x7, x8, [x2] + ldp x9, x10, [x2, #16] + subs x3, x3, x7 + sbcs x4, x4, x8 + sbcs x5, x5, x9 + sbcs x6, x6, x10 + mov x12, #-19 + csetm x11, cc + # Mask the modulus + and x12, x11, x12 + and x13, x11, #0x7fffffffffffffff + # Add modulus (if underflow) + adds x3, x3, x12 + adcs x4, x4, x11 + adcs x5, x5, x11 + adc x6, x6, x13 + stp x3, x4, [x0] + stp x5, x6, [x0, #16] + ret + .size fe_sub,.-fe_sub + .text + .align 2 + .globl fe_add + .type fe_add, %function +fe_add: + # Add + ldp x3, x4, [x1] + ldp x5, x6, [x1, #16] + ldp x7, x8, [x2] + ldp x9, x10, [x2, #16] + adds x3, x3, x7 + adcs x4, x4, x8 + adcs x5, x5, x9 + adc x6, x6, x10 + mov x12, #-19 + asr x11, x6, #63 + # Mask the modulus + and x12, x11, x12 + and x13, x11, #0x7fffffffffffffff + # Sub modulus (if overflow) + subs x3, x3, x12 + sbcs x4, x4, x11 + sbcs x5, x5, x11 + sbc x6, x6, x13 + stp x3, x4, [x0] + stp x5, x6, [x0, #16] + ret + .size fe_add,.-fe_add + .text + .align 2 + .globl fe_neg + .type fe_neg, %function +fe_neg: + ldp x2, x3, [x1] + ldp x4, x5, [x1, #16] + mov x6, #-19 + mov x7, #-1 + mov x8, #-1 + mov x9, #0x7fffffffffffffff + subs x6, x6, x2 + sbcs x7, x7, x3 + sbcs x8, x8, x4 + sbc x9, x9, x5 + stp x6, x7, [x0] + stp x8, x9, [x0, #16] + ret + .size fe_neg,.-fe_neg + .text + .align 2 + .globl fe_isnonzero + .type fe_isnonzero, %function +fe_isnonzero: + mov x6, #19 + ldp x1, x2, [x0] + ldp x3, x4, [x0, #16] + adds x5, x1, x6 + adcs x5, x2, xzr + adcs x5, x3, xzr + adc x5, x4, xzr + and x5, x6, x5, asr 63 + adds x1, x1, x5 + adcs x2, x2, xzr + adcs x3, x3, xzr + adc x4, x4, xzr + and x4, x4, #0x7fffffffffffffff + orr x0, x1, x2 + orr x3, x3, x4 + orr x0, x0, x3 + ret + .size fe_isnonzero,.-fe_isnonzero + .text + .align 2 + .globl fe_isnegative + .type fe_isnegative, %function +fe_isnegative: + mov x6, #19 + ldp x1, x2, [x0] + ldp x3, x4, [x0, #16] + adds x5, x1, x6 + adcs x5, x2, xzr + adcs x5, x3, xzr + adc x5, x4, xzr + and x0, x1, #1 + eor x0, x0, x5, lsr 63 + ret + .size fe_isnegative,.-fe_isnegative + .text + .align 2 + .globl fe_cmov_table + .type fe_cmov_table, %function +fe_cmov_table: + stp x29, x30, [sp, #-128]! + add x29, sp, #0 + str x17, [x29, #40] + str x19, [x29, #48] + stp x20, x21, [x29, #56] + stp x22, x23, [x29, #72] + stp x24, x25, [x29, #88] + stp x26, x27, [x29, #104] + str x28, [x29, #120] + str x0, [x29, #16] + sxtb x2, w2 + sbfx x3, x2, #7, #1 + eor x0, x2, x3 + sub x0, x0, x3 + mov x4, #1 + mov x5, xzr + mov x6, xzr + mov x7, xzr + mov x8, #1 + mov x9, xzr + mov x10, xzr + mov x11, xzr + mov x12, xzr + mov x13, xzr + mov x14, xzr + mov x15, xzr + cmp x0, #1 + ldp x16, x17, [x1] + ldp x19, x20, [x1, #16] + ldp x21, x22, [x1, #32] + ldp x23, x24, [x1, #48] + ldp x25, x26, [x1, #64] + ldp x27, x28, [x1, #80] + csel x4, x16, x4, eq + csel x5, x17, x5, eq + csel x6, x19, x6, eq + csel x7, x20, x7, eq + csel x8, x21, x8, eq + csel x9, x22, x9, eq + csel x10, x23, x10, eq + csel x11, x24, x11, eq + csel x12, x25, x12, eq + csel x13, x26, x13, eq + csel x14, x27, x14, eq + csel x15, x28, x15, eq + cmp x0, #2 + ldp x16, x17, [x1, #96] + ldp x19, x20, [x1, #112] + ldp x21, x22, [x1, #128] + ldp x23, x24, [x1, #144] + ldp x25, x26, [x1, #160] + ldp x27, x28, [x1, #176] + csel x4, x16, x4, eq + csel x5, x17, x5, eq + csel x6, x19, x6, eq + csel x7, x20, x7, eq + csel x8, x21, x8, eq + csel x9, x22, x9, eq + csel x10, x23, x10, eq + csel x11, x24, x11, eq + csel x12, x25, x12, eq + csel x13, x26, x13, eq + csel x14, x27, x14, eq + csel x15, x28, x15, eq + cmp x0, #3 + ldp x16, x17, [x1, #192] + ldp x19, x20, [x1, #208] + ldp x21, x22, [x1, #224] + ldp x23, x24, [x1, #240] + ldp x25, x26, [x1, #256] + ldp x27, x28, [x1, #272] + csel x4, x16, x4, eq + csel x5, x17, x5, eq + csel x6, x19, x6, eq + csel x7, x20, x7, eq + csel x8, x21, x8, eq + csel x9, x22, x9, eq + csel x10, x23, x10, eq + csel x11, x24, x11, eq + csel x12, x25, x12, eq + csel x13, x26, x13, eq + csel x14, x27, x14, eq + csel x15, x28, x15, eq + cmp x0, #4 + ldp x16, x17, [x1, #288] + ldp x19, x20, [x1, #304] + ldp x21, x22, [x1, #320] + ldp x23, x24, [x1, #336] + ldp x25, x26, [x1, #352] + ldp x27, x28, [x1, #368] + csel x4, x16, x4, eq + csel x5, x17, x5, eq + csel x6, x19, x6, eq + csel x7, x20, x7, eq + csel x8, x21, x8, eq + csel x9, x22, x9, eq + csel x10, x23, x10, eq + csel x11, x24, x11, eq + csel x12, x25, x12, eq + csel x13, x26, x13, eq + csel x14, x27, x14, eq + csel x15, x28, x15, eq + add x1, x1, #0x180 + cmp x0, #5 + ldp x16, x17, [x1] + ldp x19, x20, [x1, #16] + ldp x21, x22, [x1, #32] + ldp x23, x24, [x1, #48] + ldp x25, x26, [x1, #64] + ldp x27, x28, [x1, #80] + csel x4, x16, x4, eq + csel x5, x17, x5, eq + csel x6, x19, x6, eq + csel x7, x20, x7, eq + csel x8, x21, x8, eq + csel x9, x22, x9, eq + csel x10, x23, x10, eq + csel x11, x24, x11, eq + csel x12, x25, x12, eq + csel x13, x26, x13, eq + csel x14, x27, x14, eq + csel x15, x28, x15, eq + cmp x0, #6 + ldp x16, x17, [x1, #96] + ldp x19, x20, [x1, #112] + ldp x21, x22, [x1, #128] + ldp x23, x24, [x1, #144] + ldp x25, x26, [x1, #160] + ldp x27, x28, [x1, #176] + csel x4, x16, x4, eq + csel x5, x17, x5, eq + csel x6, x19, x6, eq + csel x7, x20, x7, eq + csel x8, x21, x8, eq + csel x9, x22, x9, eq + csel x10, x23, x10, eq + csel x11, x24, x11, eq + csel x12, x25, x12, eq + csel x13, x26, x13, eq + csel x14, x27, x14, eq + csel x15, x28, x15, eq + cmp x0, #7 + ldp x16, x17, [x1, #192] + ldp x19, x20, [x1, #208] + ldp x21, x22, [x1, #224] + ldp x23, x24, [x1, #240] + ldp x25, x26, [x1, #256] + ldp x27, x28, [x1, #272] + csel x4, x16, x4, eq + csel x5, x17, x5, eq + csel x6, x19, x6, eq + csel x7, x20, x7, eq + csel x8, x21, x8, eq + csel x9, x22, x9, eq + csel x10, x23, x10, eq + csel x11, x24, x11, eq + csel x12, x25, x12, eq + csel x13, x26, x13, eq + csel x14, x27, x14, eq + csel x15, x28, x15, eq + cmp x0, #8 + ldp x16, x17, [x1, #288] + ldp x19, x20, [x1, #304] + ldp x21, x22, [x1, #320] + ldp x23, x24, [x1, #336] + ldp x25, x26, [x1, #352] + ldp x27, x28, [x1, #368] + csel x4, x16, x4, eq + csel x5, x17, x5, eq + csel x6, x19, x6, eq + csel x7, x20, x7, eq + csel x8, x21, x8, eq + csel x9, x22, x9, eq + csel x10, x23, x10, eq + csel x11, x24, x11, eq + csel x12, x25, x12, eq + csel x13, x26, x13, eq + csel x14, x27, x14, eq + csel x15, x28, x15, eq + mov x16, #-19 + mov x17, #-1 + mov x19, #-1 + mov x20, #0x7fffffffffffffff + subs x16, x16, x12 + sbcs x17, x17, x13 + sbcs x19, x19, x14 + sbc x20, x20, x15 + cmp x2, #0 + mov x3, x4 + csel x4, x8, x4, lt + csel x8, x3, x8, lt + mov x3, x5 + csel x5, x9, x5, lt + csel x9, x3, x9, lt + mov x3, x6 + csel x6, x10, x6, lt + csel x10, x3, x10, lt + mov x3, x7 + csel x7, x11, x7, lt + csel x11, x3, x11, lt + csel x12, x16, x12, lt + csel x13, x17, x13, lt + csel x14, x19, x14, lt + csel x15, x20, x15, lt + ldr x0, [x29, #16] + stp x4, x5, [x0] + stp x6, x7, [x0, #16] + stp x8, x9, [x0, #32] + stp x10, x11, [x0, #48] + stp x12, x13, [x0, #64] + stp x14, x15, [x0, #80] + ldr x17, [x29, #40] + ldr x19, [x29, #48] + ldp x20, x21, [x29, #56] + ldp x22, x23, [x29, #72] + ldp x24, x25, [x29, #88] + ldp x26, x27, [x29, #104] + ldr x28, [x29, #120] + ldp x29, x30, [sp], #0x80 + ret + .size fe_cmov_table,.-fe_cmov_table + .text + .align 2 + .globl fe_mul + .type fe_mul, %function +fe_mul: + stp x29, x30, [sp, #-64]! + add x29, sp, #0 + str x17, [x29, #24] + str x19, [x29, #32] + stp x20, x21, [x29, #40] + str x22, [x29, #56] + # Multiply + ldp x14, x15, [x1] + ldp x16, x17, [x1, #16] + ldp x19, x20, [x2] + ldp x21, x22, [x2, #16] + # A[0] * B[0] + mul x6, x14, x19 + umulh x7, x14, x19 + # A[0] * B[1] + mul x3, x14, x20 + umulh x8, x14, x20 + adds x7, x7, x3 + adc x8, x8, xzr + # A[1] * B[0] + mul x3, x15, x19 + umulh x4, x15, x19 + adds x7, x7, x3 + adcs x8, x8, x4 + adc x9, xzr, xzr + # A[0] * B[2] + mul x3, x14, x21 + umulh x4, x14, x21 + adds x8, x8, x3 + adc x9, x9, x4 + # A[1] * B[1] + mul x3, x15, x20 + umulh x4, x15, x20 + adds x8, x8, x3 + adcs x9, x9, x4 + adc x10, xzr, xzr + # A[2] * B[0] + mul x3, x16, x19 + umulh x4, x16, x19 + adds x8, x8, x3 + adcs x9, x9, x4 + adc x10, x10, xzr + # A[0] * B[3] + mul x3, x14, x22 + umulh x4, x14, x22 + adds x9, x9, x3 + adcs x10, x10, x4 + adc x11, xzr, xzr + # A[1] * B[2] + mul x3, x15, x21 + umulh x4, x15, x21 + adds x9, x9, x3 + adcs x10, x10, x4 + adc x11, x11, xzr + # A[2] * B[1] + mul x3, x16, x20 + umulh x4, x16, x20 + adds x9, x9, x3 + adcs x10, x10, x4 + adc x11, x11, xzr + # A[3] * B[0] + mul x3, x17, x19 + umulh x4, x17, x19 + adds x9, x9, x3 + adcs x10, x10, x4 + adc x11, x11, xzr + # A[1] * B[3] + mul x3, x15, x22 + umulh x4, x15, x22 + adds x10, x10, x3 + adcs x11, x11, x4 + adc x12, xzr, xzr + # A[2] * B[2] + mul x3, x16, x21 + umulh x4, x16, x21 + adds x10, x10, x3 + adcs x11, x11, x4 + adc x12, x12, xzr + # A[3] * B[1] + mul x3, x17, x20 + umulh x4, x17, x20 + adds x10, x10, x3 + adcs x11, x11, x4 + adc x12, x12, xzr + # A[2] * B[3] + mul x3, x16, x22 + umulh x4, x16, x22 + adds x11, x11, x3 + adcs x12, x12, x4 + adc x13, xzr, xzr + # A[3] * B[2] + mul x3, x17, x21 + umulh x4, x17, x21 + adds x11, x11, x3 + adcs x12, x12, x4 + adc x13, x13, xzr + # A[3] * B[3] + mul x3, x17, x22 + umulh x4, x17, x22 + adds x12, x12, x3 + adc x13, x13, x4 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x13, x13, x12, #63 + extr x12, x12, x11, #63 + extr x11, x11, x10, #63 + extr x10, x10, x9, #63 + and x9, x9, #0x7fffffffffffffff + # Multiply top half by 19 + mov x3, #19 + mul x4, x3, x10 + umulh x10, x3, x10 + adds x6, x6, x4 + mul x4, x3, x11 + umulh x11, x3, x11 + adcs x7, x7, x4 + mul x4, x3, x12 + umulh x12, x3, x12 + adcs x8, x8, x4 + mul x4, x3, x13 + umulh x5, x3, x13 + adcs x9, x9, x4 + adc x5, x5, xzr + # Add remaining product results in + adds x7, x7, x10 + adcs x8, x8, x11 + adcs x9, x9, x12 + adc x5, x5, xzr + # Overflow + extr x5, x5, x9, #63 + mul x5, x5, x3 + and x9, x9, #0x7fffffffffffffff + adds x6, x6, x5 + adcs x7, x7, xzr + adcs x8, x8, xzr + adc x9, x9, xzr + # Reduce if top bit set + and x5, x3, x9, asr 63 + and x9, x9, #0x7fffffffffffffff + adds x6, x6, x5 + adcs x7, x7, xzr + adcs x8, x8, xzr + adc x9, x9, xzr + # Store + stp x6, x7, [x0] + stp x8, x9, [x0, #16] + ldr x17, [x29, #24] + ldr x19, [x29, #32] + ldp x20, x21, [x29, #40] + ldr x22, [x29, #56] + ldp x29, x30, [sp], #0x40 + ret + .size fe_mul,.-fe_mul + .text + .align 2 + .globl fe_sq + .type fe_sq, %function +fe_sq: + # Square + ldp x13, x14, [x1] + ldp x15, x16, [x1, #16] + # A[0] * A[1] + mul x6, x13, x14 + umulh x7, x13, x14 + # A[0] * A[2] + mul x2, x13, x15 + umulh x8, x13, x15 + adds x7, x7, x2 + adc x8, x8, xzr + # A[0] * A[3] + mul x2, x13, x16 + umulh x9, x13, x16 + adds x8, x8, x2 + adc x9, x9, xzr + # A[1] * A[2] + mul x2, x14, x15 + umulh x3, x14, x15 + adds x8, x8, x2 + adcs x9, x9, x3 + adc x10, xzr, xzr + # A[1] * A[3] + mul x2, x14, x16 + umulh x3, x14, x16 + adds x9, x9, x2 + adc x10, x10, x3 + # A[2] * A[3] + mul x2, x15, x16 + umulh x11, x15, x16 + adds x10, x10, x2 + adc x11, x11, xzr + # Double + adds x6, x6, x6 + adcs x7, x7, x7 + adcs x8, x8, x8 + adcs x9, x9, x9 + adcs x10, x10, x10 + adcs x11, x11, x11 + adc x12, xzr, xzr + # A[0] * A[0] + mul x5, x13, x13 + umulh x4, x13, x13 + # A[1] * A[1] + mul x2, x14, x14 + umulh x3, x14, x14 + adds x6, x6, x4 + adcs x7, x7, x2 + adc x4, x3, xzr + # A[2] * A[2] + mul x2, x15, x15 + umulh x3, x15, x15 + adds x8, x8, x4 + adcs x9, x9, x2 + adc x4, x3, xzr + # A[3] * A[3] + mul x2, x16, x16 + umulh x3, x16, x16 + adds x10, x10, x4 + adcs x11, x11, x2 + adc x12, x12, x3 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x12, x12, x11, #63 + extr x11, x11, x10, #63 + extr x10, x10, x9, #63 + extr x9, x9, x8, #63 + and x8, x8, #0x7fffffffffffffff + # Multiply top half by 19 + mov x2, #19 + mul x3, x2, x9 + umulh x9, x2, x9 + adds x5, x5, x3 + mul x3, x2, x10 + umulh x10, x2, x10 + adcs x6, x6, x3 + mul x3, x2, x11 + umulh x11, x2, x11 + adcs x7, x7, x3 + mul x3, x2, x12 + umulh x4, x2, x12 + adcs x8, x8, x3 + adc x4, x4, xzr + # Add remaining product results in + adds x6, x6, x9 + adcs x7, x7, x10 + adcs x8, x8, x11 + adc x4, x4, xzr + # Overflow + extr x4, x4, x8, #63 + mul x4, x4, x2 + and x8, x8, #0x7fffffffffffffff + adds x5, x5, x4 + adcs x6, x6, xzr + adcs x7, x7, xzr + adc x8, x8, xzr + # Reduce if top bit set + and x4, x2, x8, asr 63 + and x8, x8, #0x7fffffffffffffff + adds x5, x5, x4 + adcs x6, x6, xzr + adcs x7, x7, xzr + adc x8, x8, xzr + # Store + stp x5, x6, [x0] + stp x7, x8, [x0, #16] + ret + .size fe_sq,.-fe_sq + .text + .align 2 + .globl fe_invert + .type fe_invert, %function +fe_invert: + stp x29, x30, [sp, #-176]! + add x29, sp, #0 + str x20, [x29, #168] + # Invert + str x0, [x29, #144] + str x1, [x29, #152] + add x0, x29, #16 + bl fe_sq + add x0, x29, #48 + add x1, x29, #16 + bl fe_sq + add x1, x29, #48 + bl fe_sq + ldr x1, [x29, #152] + add x2, x29, #48 + bl fe_mul + add x0, x29, #16 + add x1, x29, #16 + add x2, x29, #48 + bl fe_mul + add x0, x29, #0x50 + bl fe_sq + add x0, x29, #48 + add x1, x29, #48 + add x2, x29, #0x50 + bl fe_mul + add x0, x29, #0x50 + bl fe_sq + mov x20, #4 + add x1, x29, #0x50 +L_fe_invert1: + bl fe_sq + sub x20, x20, #1 + cmp x20, #0 + bne L_fe_invert1 + add x0, x29, #48 + add x2, x29, #48 + bl fe_mul + add x0, x29, #0x50 + add x1, x29, #48 + bl fe_sq + mov x20, #9 + add x1, x29, #0x50 +L_fe_invert2: + bl fe_sq + sub x20, x20, #1 + cmp x20, #0 + bne L_fe_invert2 + add x2, x29, #48 + bl fe_mul + add x0, x29, #0x70 + bl fe_sq + mov x20, #19 + add x1, x29, #0x70 +L_fe_invert3: + bl fe_sq + sub x20, x20, #1 + cmp x20, #0 + bne L_fe_invert3 + add x0, x29, #0x50 + add x2, x29, #0x50 + bl fe_mul + mov x20, #10 + add x1, x29, #0x50 +L_fe_invert4: + bl fe_sq + sub x20, x20, #1 + cmp x20, #0 + bne L_fe_invert4 + add x0, x29, #48 + add x2, x29, #48 + bl fe_mul + add x0, x29, #0x50 + add x1, x29, #48 + bl fe_sq + mov x20, #49 + add x1, x29, #0x50 +L_fe_invert5: + bl fe_sq + sub x20, x20, #1 + cmp x20, #0 + bne L_fe_invert5 + add x2, x29, #48 + bl fe_mul + add x0, x29, #0x70 + bl fe_sq + mov x20, #0x63 + add x1, x29, #0x70 +L_fe_invert6: + bl fe_sq + sub x20, x20, #1 + cmp x20, #0 + bne L_fe_invert6 + add x0, x29, #0x50 + add x2, x29, #0x50 + bl fe_mul + mov x20, #50 + add x1, x29, #0x50 +L_fe_invert7: + bl fe_sq + sub x20, x20, #1 + cmp x20, #0 + bne L_fe_invert7 + add x0, x29, #48 + add x2, x29, #48 + bl fe_mul + mov x20, #5 + add x1, x29, #48 +L_fe_invert8: + bl fe_sq + sub x20, x20, #1 + cmp x20, #0 + bne L_fe_invert8 + ldr x0, [x29, #144] + add x2, x29, #16 + bl fe_mul + ldr x20, [x29, #168] + ldp x29, x30, [sp], #0xb0 + ret + .size fe_invert,.-fe_invert + .text + .align 2 + .globl curve25519 + .type curve25519, %function +curve25519: + stp x29, x30, [sp, #-288]! + add x29, sp, #0 + str x17, [x29, #200] + str x19, [x29, #208] + stp x20, x21, [x29, #216] + stp x22, x23, [x29, #232] + stp x24, x25, [x29, #248] + stp x26, x27, [x29, #264] + str x28, [x29, #280] + mov x23, xzr + str x0, [x29, #176] + str x2, [x29, #184] + # Copy + ldp x6, x7, [x2] + ldp x8, x9, [x2, #16] + stp x6, x7, [x29, #80] + stp x8, x9, [x29, #96] + # Set one + mov x2, #1 + stp x2, xzr, [x0] + stp xzr, xzr, [x0, #16] + # Set zero + stp xzr, xzr, [x29, #16] + stp xzr, xzr, [x29, #32] + # Set one + mov x2, #1 + stp x2, xzr, [x29, #48] + stp xzr, xzr, [x29, #64] + mov x25, #62 + mov x24, #24 +L_curve25519_words: +L_curve25519_bits: + ldr x2, [x1, x24] + lsr x2, x2, x25 + and x2, x2, #1 + eor x23, x23, x2 + # Conditional Swap + cmp x23, #1 + ldp x10, x11, [x0] + ldp x12, x13, [x0, #16] + ldp x6, x7, [x29, #80] + ldp x8, x9, [x29, #96] + csel x14, x10, x6, eq + csel x10, x6, x10, eq + csel x15, x11, x7, eq + csel x11, x7, x11, eq + csel x16, x12, x8, eq + csel x12, x8, x12, eq + csel x17, x13, x9, eq + csel x13, x9, x13, eq + # Conditional Swap + cmp x23, #1 + ldp x19, x20, [x29, #16] + ldp x21, x22, [x29, #32] + ldp x6, x7, [x29, #48] + ldp x8, x9, [x29, #64] + csel x5, x19, x6, eq + csel x19, x6, x19, eq + csel x26, x20, x7, eq + csel x20, x7, x20, eq + csel x27, x21, x8, eq + csel x21, x8, x21, eq + csel x28, x22, x9, eq + csel x22, x9, x22, eq + mov x23, x2 + # Add + adds x6, x10, x19 + adcs x7, x11, x20 + adcs x8, x12, x21 + adc x9, x13, x22 + mov x3, #-19 + asr x2, x9, #63 + # Mask the modulus + and x3, x2, x3 + and x4, x2, #0x7fffffffffffffff + # Sub modulus (if overflow) + subs x6, x6, x3 + sbcs x7, x7, x2 + sbcs x8, x8, x2 + sbc x9, x9, x4 + # Sub + subs x19, x10, x19 + sbcs x20, x11, x20 + sbcs x21, x12, x21 + sbcs x22, x13, x22 + mov x3, #-19 + csetm x2, cc + # Mask the modulus + and x3, x2, x3 + and x4, x2, #0x7fffffffffffffff + # Add modulus (if underflow) + adds x19, x19, x3 + adcs x20, x20, x2 + adcs x21, x21, x2 + adc x22, x22, x4 + stp x19, x20, [x29, #144] + stp x21, x22, [x29, #160] + # Add + adds x10, x14, x5 + adcs x11, x15, x26 + adcs x12, x16, x27 + adc x13, x17, x28 + mov x3, #-19 + asr x2, x13, #63 + # Mask the modulus + and x3, x2, x3 + and x4, x2, #0x7fffffffffffffff + # Sub modulus (if overflow) + subs x10, x10, x3 + sbcs x11, x11, x2 + sbcs x12, x12, x2 + sbc x13, x13, x4 + # Sub + subs x14, x14, x5 + sbcs x15, x15, x26 + sbcs x16, x16, x27 + sbcs x17, x17, x28 + mov x3, #-19 + csetm x2, cc + # Mask the modulus + and x3, x2, x3 + and x4, x2, #0x7fffffffffffffff + # Add modulus (if underflow) + adds x14, x14, x3 + adcs x15, x15, x2 + adcs x16, x16, x2 + adc x17, x17, x4 + # Multiply + # A[0] * B[0] + mul x19, x14, x6 + umulh x20, x14, x6 + # A[0] * B[1] + mul x3, x14, x7 + umulh x21, x14, x7 + adds x20, x20, x3 + adc x21, x21, xzr + # A[1] * B[0] + mul x3, x15, x6 + umulh x4, x15, x6 + adds x20, x20, x3 + adcs x21, x21, x4 + adc x22, xzr, xzr + # A[0] * B[2] + mul x3, x14, x8 + umulh x4, x14, x8 + adds x21, x21, x3 + adc x22, x22, x4 + # A[1] * B[1] + mul x3, x15, x7 + umulh x4, x15, x7 + adds x21, x21, x3 + adcs x22, x22, x4 + adc x2, xzr, xzr + # A[2] * B[0] + mul x3, x16, x6 + umulh x4, x16, x6 + adds x21, x21, x3 + adcs x22, x22, x4 + adc x2, x2, xzr + # A[0] * B[3] + mul x3, x14, x9 + umulh x4, x14, x9 + adds x22, x22, x3 + adcs x2, x2, x4 + adc x26, xzr, xzr + # A[1] * B[2] + mul x3, x15, x8 + umulh x4, x15, x8 + adds x22, x22, x3 + adcs x2, x2, x4 + adc x26, x26, xzr + # A[2] * B[1] + mul x3, x16, x7 + umulh x4, x16, x7 + adds x22, x22, x3 + adcs x2, x2, x4 + adc x26, x26, xzr + # A[3] * B[0] + mul x3, x17, x6 + umulh x4, x17, x6 + adds x22, x22, x3 + adcs x2, x2, x4 + adc x26, x26, xzr + # A[1] * B[3] + mul x3, x15, x9 + umulh x4, x15, x9 + adds x2, x2, x3 + adcs x26, x26, x4 + adc x27, xzr, xzr + # A[2] * B[2] + mul x3, x16, x8 + umulh x4, x16, x8 + adds x2, x2, x3 + adcs x26, x26, x4 + adc x27, x27, xzr + # A[3] * B[1] + mul x3, x17, x7 + umulh x4, x17, x7 + adds x2, x2, x3 + adcs x26, x26, x4 + adc x27, x27, xzr + # A[2] * B[3] + mul x3, x16, x9 + umulh x4, x16, x9 + adds x26, x26, x3 + adcs x27, x27, x4 + adc x28, xzr, xzr + # A[3] * B[2] + mul x3, x17, x8 + umulh x4, x17, x8 + adds x26, x26, x3 + adcs x27, x27, x4 + adc x28, x28, xzr + # A[3] * B[3] + mul x3, x17, x9 + umulh x4, x17, x9 + adds x27, x27, x3 + adc x28, x28, x4 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x28, x28, x27, #63 + extr x27, x27, x26, #63 + extr x26, x26, x2, #63 + extr x2, x2, x22, #63 + and x22, x22, #0x7fffffffffffffff + # Multiply top half by 19 + mov x3, #19 + mul x4, x3, x2 + umulh x2, x3, x2 + adds x19, x19, x4 + mul x4, x3, x26 + umulh x26, x3, x26 + adcs x20, x20, x4 + mul x4, x3, x27 + umulh x27, x3, x27 + adcs x21, x21, x4 + mul x4, x3, x28 + umulh x5, x3, x28 + adcs x22, x22, x4 + adc x5, x5, xzr + # Add remaining product results in + adds x20, x20, x2 + adcs x21, x21, x26 + adcs x22, x22, x27 + adc x5, x5, xzr + # Overflow + extr x5, x5, x22, #63 + mul x5, x5, x3 + and x22, x22, #0x7fffffffffffffff + adds x19, x19, x5 + adcs x20, x20, xzr + adcs x21, x21, xzr + adc x22, x22, xzr + # Reduce if top bit set + and x5, x3, x22, asr 63 + and x22, x22, #0x7fffffffffffffff + adds x19, x19, x5 + adcs x20, x20, xzr + adcs x21, x21, xzr + adc x22, x22, xzr + # Store + stp x19, x20, [x29, #112] + stp x21, x22, [x29, #128] + # Multiply + ldp x2, x26, [x29, #144] + ldp x27, x28, [x29, #160] + # A[0] * B[0] + mul x19, x10, x2 + umulh x20, x10, x2 + # A[0] * B[1] + mul x3, x10, x26 + umulh x21, x10, x26 + adds x20, x20, x3 + adc x21, x21, xzr + # A[1] * B[0] + mul x3, x11, x2 + umulh x4, x11, x2 + adds x20, x20, x3 + adcs x21, x21, x4 + adc x22, xzr, xzr + # A[0] * B[2] + mul x3, x10, x27 + umulh x4, x10, x27 + adds x21, x21, x3 + adc x22, x22, x4 + # A[1] * B[1] + mul x3, x11, x26 + umulh x4, x11, x26 + adds x21, x21, x3 + adcs x22, x22, x4 + adc x14, xzr, xzr + # A[2] * B[0] + mul x3, x12, x2 + umulh x4, x12, x2 + adds x21, x21, x3 + adcs x22, x22, x4 + adc x14, x14, xzr + # A[0] * B[3] + mul x3, x10, x28 + umulh x4, x10, x28 + adds x22, x22, x3 + adcs x14, x14, x4 + adc x15, xzr, xzr + # A[1] * B[2] + mul x3, x11, x27 + umulh x4, x11, x27 + adds x22, x22, x3 + adcs x14, x14, x4 + adc x15, x15, xzr + # A[2] * B[1] + mul x3, x12, x26 + umulh x4, x12, x26 + adds x22, x22, x3 + adcs x14, x14, x4 + adc x15, x15, xzr + # A[3] * B[0] + mul x3, x13, x2 + umulh x4, x13, x2 + adds x22, x22, x3 + adcs x14, x14, x4 + adc x15, x15, xzr + # A[1] * B[3] + mul x3, x11, x28 + umulh x4, x11, x28 + adds x14, x14, x3 + adcs x15, x15, x4 + adc x16, xzr, xzr + # A[2] * B[2] + mul x3, x12, x27 + umulh x4, x12, x27 + adds x14, x14, x3 + adcs x15, x15, x4 + adc x16, x16, xzr + # A[3] * B[1] + mul x3, x13, x26 + umulh x4, x13, x26 + adds x14, x14, x3 + adcs x15, x15, x4 + adc x16, x16, xzr + # A[2] * B[3] + mul x3, x12, x28 + umulh x4, x12, x28 + adds x15, x15, x3 + adcs x16, x16, x4 + adc x17, xzr, xzr + # A[3] * B[2] + mul x3, x13, x27 + umulh x4, x13, x27 + adds x15, x15, x3 + adcs x16, x16, x4 + adc x17, x17, xzr + # A[3] * B[3] + mul x3, x13, x28 + umulh x4, x13, x28 + adds x16, x16, x3 + adc x17, x17, x4 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x17, x17, x16, #63 + extr x16, x16, x15, #63 + extr x15, x15, x14, #63 + extr x14, x14, x22, #63 + and x22, x22, #0x7fffffffffffffff + # Multiply top half by 19 + mov x3, #19 + mul x4, x3, x14 + umulh x14, x3, x14 + adds x19, x19, x4 + mul x4, x3, x15 + umulh x15, x3, x15 + adcs x20, x20, x4 + mul x4, x3, x16 + umulh x16, x3, x16 + adcs x21, x21, x4 + mul x4, x3, x17 + umulh x5, x3, x17 + adcs x22, x22, x4 + adc x5, x5, xzr + # Add remaining product results in + adds x20, x20, x14 + adcs x21, x21, x15 + adcs x22, x22, x16 + adc x5, x5, xzr + # Overflow + extr x5, x5, x22, #63 + mul x5, x5, x3 + and x22, x22, #0x7fffffffffffffff + adds x19, x19, x5 + adcs x20, x20, xzr + adcs x21, x21, xzr + adc x22, x22, xzr + # Reduce if top bit set + and x5, x3, x22, asr 63 + and x22, x22, #0x7fffffffffffffff + adds x19, x19, x5 + adcs x20, x20, xzr + adcs x21, x21, xzr + adc x22, x22, xzr + # Store + # Square + # A[0] * A[1] + mul x11, x2, x26 + umulh x12, x2, x26 + # A[0] * A[2] + mul x3, x2, x27 + umulh x13, x2, x27 + adds x12, x12, x3 + adc x13, x13, xzr + # A[0] * A[3] + mul x3, x2, x28 + umulh x14, x2, x28 + adds x13, x13, x3 + adc x14, x14, xzr + # A[1] * A[2] + mul x3, x26, x27 + umulh x4, x26, x27 + adds x13, x13, x3 + adcs x14, x14, x4 + adc x15, xzr, xzr + # A[1] * A[3] + mul x3, x26, x28 + umulh x4, x26, x28 + adds x14, x14, x3 + adc x15, x15, x4 + # A[2] * A[3] + mul x3, x27, x28 + umulh x16, x27, x28 + adds x15, x15, x3 + adc x16, x16, xzr + # Double + adds x11, x11, x11 + adcs x12, x12, x12 + adcs x13, x13, x13 + adcs x14, x14, x14 + adcs x15, x15, x15 + adcs x16, x16, x16 + adc x17, xzr, xzr + # A[0] * A[0] + mul x10, x2, x2 + umulh x5, x2, x2 + # A[1] * A[1] + mul x3, x26, x26 + umulh x4, x26, x26 + adds x11, x11, x5 + adcs x12, x12, x3 + adc x5, x4, xzr + # A[2] * A[2] + mul x3, x27, x27 + umulh x4, x27, x27 + adds x13, x13, x5 + adcs x14, x14, x3 + adc x5, x4, xzr + # A[3] * A[3] + mul x3, x28, x28 + umulh x4, x28, x28 + adds x15, x15, x5 + adcs x16, x16, x3 + adc x17, x17, x4 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x17, x17, x16, #63 + extr x16, x16, x15, #63 + extr x15, x15, x14, #63 + extr x14, x14, x13, #63 + and x13, x13, #0x7fffffffffffffff + # Multiply top half by 19 + mov x3, #19 + mul x4, x3, x14 + umulh x14, x3, x14 + adds x10, x10, x4 + mul x4, x3, x15 + umulh x15, x3, x15 + adcs x11, x11, x4 + mul x4, x3, x16 + umulh x16, x3, x16 + adcs x12, x12, x4 + mul x4, x3, x17 + umulh x5, x3, x17 + adcs x13, x13, x4 + adc x5, x5, xzr + # Add remaining product results in + adds x11, x11, x14 + adcs x12, x12, x15 + adcs x13, x13, x16 + adc x5, x5, xzr + # Overflow + extr x5, x5, x13, #63 + mul x5, x5, x3 + and x13, x13, #0x7fffffffffffffff + adds x10, x10, x5 + adcs x11, x11, xzr + adcs x12, x12, xzr + adc x13, x13, xzr + # Reduce if top bit set + and x5, x3, x13, asr 63 + and x13, x13, #0x7fffffffffffffff + adds x10, x10, x5 + adcs x11, x11, xzr + adcs x12, x12, xzr + adc x13, x13, xzr + # Store + # Square + # A[0] * A[1] + mul x15, x6, x7 + umulh x16, x6, x7 + # A[0] * A[2] + mul x3, x6, x8 + umulh x17, x6, x8 + adds x16, x16, x3 + adc x17, x17, xzr + # A[0] * A[3] + mul x3, x6, x9 + umulh x2, x6, x9 + adds x17, x17, x3 + adc x2, x2, xzr + # A[1] * A[2] + mul x3, x7, x8 + umulh x4, x7, x8 + adds x17, x17, x3 + adcs x2, x2, x4 + adc x26, xzr, xzr + # A[1] * A[3] + mul x3, x7, x9 + umulh x4, x7, x9 + adds x2, x2, x3 + adc x26, x26, x4 + # A[2] * A[3] + mul x3, x8, x9 + umulh x27, x8, x9 + adds x26, x26, x3 + adc x27, x27, xzr + # Double + adds x15, x15, x15 + adcs x16, x16, x16 + adcs x17, x17, x17 + adcs x2, x2, x2 + adcs x26, x26, x26 + adcs x27, x27, x27 + adc x28, xzr, xzr + # A[0] * A[0] + mul x14, x6, x6 + umulh x5, x6, x6 + # A[1] * A[1] + mul x3, x7, x7 + umulh x4, x7, x7 + adds x15, x15, x5 + adcs x16, x16, x3 + adc x5, x4, xzr + # A[2] * A[2] + mul x3, x8, x8 + umulh x4, x8, x8 + adds x17, x17, x5 + adcs x2, x2, x3 + adc x5, x4, xzr + # A[3] * A[3] + mul x3, x9, x9 + umulh x4, x9, x9 + adds x26, x26, x5 + adcs x27, x27, x3 + adc x28, x28, x4 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x28, x28, x27, #63 + extr x27, x27, x26, #63 + extr x26, x26, x2, #63 + extr x2, x2, x17, #63 + and x17, x17, #0x7fffffffffffffff + # Multiply top half by 19 + mov x3, #19 + mul x4, x3, x2 + umulh x2, x3, x2 + adds x14, x14, x4 + mul x4, x3, x26 + umulh x26, x3, x26 + adcs x15, x15, x4 + mul x4, x3, x27 + umulh x27, x3, x27 + adcs x16, x16, x4 + mul x4, x3, x28 + umulh x5, x3, x28 + adcs x17, x17, x4 + adc x5, x5, xzr + # Add remaining product results in + adds x15, x15, x2 + adcs x16, x16, x26 + adcs x17, x17, x27 + adc x5, x5, xzr + # Overflow + extr x5, x5, x17, #63 + mul x5, x5, x3 + and x17, x17, #0x7fffffffffffffff + adds x14, x14, x5 + adcs x15, x15, xzr + adcs x16, x16, xzr + adc x17, x17, xzr + # Reduce if top bit set + and x5, x3, x17, asr 63 + and x17, x17, #0x7fffffffffffffff + adds x14, x14, x5 + adcs x15, x15, xzr + adcs x16, x16, xzr + adc x17, x17, xzr + # Store + # Multiply + # A[0] * B[0] + mul x6, x14, x10 + umulh x7, x14, x10 + # A[0] * B[1] + mul x3, x14, x11 + umulh x8, x14, x11 + adds x7, x7, x3 + adc x8, x8, xzr + # A[1] * B[0] + mul x3, x15, x10 + umulh x4, x15, x10 + adds x7, x7, x3 + adcs x8, x8, x4 + adc x9, xzr, xzr + # A[0] * B[2] + mul x3, x14, x12 + umulh x4, x14, x12 + adds x8, x8, x3 + adc x9, x9, x4 + # A[1] * B[1] + mul x3, x15, x11 + umulh x4, x15, x11 + adds x8, x8, x3 + adcs x9, x9, x4 + adc x2, xzr, xzr + # A[2] * B[0] + mul x3, x16, x10 + umulh x4, x16, x10 + adds x8, x8, x3 + adcs x9, x9, x4 + adc x2, x2, xzr + # A[0] * B[3] + mul x3, x14, x13 + umulh x4, x14, x13 + adds x9, x9, x3 + adcs x2, x2, x4 + adc x26, xzr, xzr + # A[1] * B[2] + mul x3, x15, x12 + umulh x4, x15, x12 + adds x9, x9, x3 + adcs x2, x2, x4 + adc x26, x26, xzr + # A[2] * B[1] + mul x3, x16, x11 + umulh x4, x16, x11 + adds x9, x9, x3 + adcs x2, x2, x4 + adc x26, x26, xzr + # A[3] * B[0] + mul x3, x17, x10 + umulh x4, x17, x10 + adds x9, x9, x3 + adcs x2, x2, x4 + adc x26, x26, xzr + # A[1] * B[3] + mul x3, x15, x13 + umulh x4, x15, x13 + adds x2, x2, x3 + adcs x26, x26, x4 + adc x27, xzr, xzr + # A[2] * B[2] + mul x3, x16, x12 + umulh x4, x16, x12 + adds x2, x2, x3 + adcs x26, x26, x4 + adc x27, x27, xzr + # A[3] * B[1] + mul x3, x17, x11 + umulh x4, x17, x11 + adds x2, x2, x3 + adcs x26, x26, x4 + adc x27, x27, xzr + # A[2] * B[3] + mul x3, x16, x13 + umulh x4, x16, x13 + adds x26, x26, x3 + adcs x27, x27, x4 + adc x28, xzr, xzr + # A[3] * B[2] + mul x3, x17, x12 + umulh x4, x17, x12 + adds x26, x26, x3 + adcs x27, x27, x4 + adc x28, x28, xzr + # A[3] * B[3] + mul x3, x17, x13 + umulh x4, x17, x13 + adds x27, x27, x3 + adc x28, x28, x4 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x28, x28, x27, #63 + extr x27, x27, x26, #63 + extr x26, x26, x2, #63 + extr x2, x2, x9, #63 + and x9, x9, #0x7fffffffffffffff + # Multiply top half by 19 + mov x3, #19 + mul x4, x3, x2 + umulh x2, x3, x2 + adds x6, x6, x4 + mul x4, x3, x26 + umulh x26, x3, x26 + adcs x7, x7, x4 + mul x4, x3, x27 + umulh x27, x3, x27 + adcs x8, x8, x4 + mul x4, x3, x28 + umulh x5, x3, x28 + adcs x9, x9, x4 + adc x5, x5, xzr + # Add remaining product results in + adds x7, x7, x2 + adcs x8, x8, x26 + adcs x9, x9, x27 + adc x5, x5, xzr + # Overflow + extr x5, x5, x9, #63 + mul x5, x5, x3 + and x9, x9, #0x7fffffffffffffff + adds x6, x6, x5 + adcs x7, x7, xzr + adcs x8, x8, xzr + adc x9, x9, xzr + # Reduce if top bit set + and x5, x3, x9, asr 63 + and x9, x9, #0x7fffffffffffffff + adds x6, x6, x5 + adcs x7, x7, xzr + adcs x8, x8, xzr + adc x9, x9, xzr + # Store + stp x6, x7, [x0] + stp x8, x9, [x0, #16] + # Sub + subs x14, x14, x10 + sbcs x15, x15, x11 + sbcs x16, x16, x12 + sbcs x17, x17, x13 + mov x3, #-19 + csetm x2, cc + # Mask the modulus + and x3, x2, x3 + and x4, x2, #0x7fffffffffffffff + # Add modulus (if underflow) + adds x14, x14, x3 + adcs x15, x15, x2 + adcs x16, x16, x2 + adc x17, x17, x4 + # Multiply by 121666 + mov x5, #0xdb42 + movk x5, #1, lsl 16 + mul x6, x14, x5 + umulh x7, x14, x5 + mul x3, x15, x5 + umulh x4, x15, x5 + adds x7, x7, x3 + adc x8, xzr, x4 + mul x3, x16, x5 + umulh x4, x16, x5 + adds x8, x8, x3 + adc x9, xzr, x4 + mul x3, x17, x5 + umulh x4, x17, x5 + adds x9, x9, x3 + adc x4, xzr, x4 + mov x5, #19 + extr x4, x4, x9, #63 + mul x4, x4, x5 + and x9, x9, #0x7fffffffffffffff + adds x6, x6, x4 + adcs x7, x7, xzr + adcs x8, x8, xzr + adc x9, x9, xzr + # Add + adds x10, x10, x6 + adcs x11, x11, x7 + adcs x12, x12, x8 + adc x13, x13, x9 + mov x3, #-19 + asr x2, x13, #63 + # Mask the modulus + and x3, x2, x3 + and x4, x2, #0x7fffffffffffffff + # Sub modulus (if overflow) + subs x10, x10, x3 + sbcs x11, x11, x2 + sbcs x12, x12, x2 + sbc x13, x13, x4 + # Multiply + # A[0] * B[0] + mul x6, x14, x10 + umulh x7, x14, x10 + # A[0] * B[1] + mul x3, x14, x11 + umulh x8, x14, x11 + adds x7, x7, x3 + adc x8, x8, xzr + # A[1] * B[0] + mul x3, x15, x10 + umulh x4, x15, x10 + adds x7, x7, x3 + adcs x8, x8, x4 + adc x9, xzr, xzr + # A[0] * B[2] + mul x3, x14, x12 + umulh x4, x14, x12 + adds x8, x8, x3 + adc x9, x9, x4 + # A[1] * B[1] + mul x3, x15, x11 + umulh x4, x15, x11 + adds x8, x8, x3 + adcs x9, x9, x4 + adc x2, xzr, xzr + # A[2] * B[0] + mul x3, x16, x10 + umulh x4, x16, x10 + adds x8, x8, x3 + adcs x9, x9, x4 + adc x2, x2, xzr + # A[0] * B[3] + mul x3, x14, x13 + umulh x4, x14, x13 + adds x9, x9, x3 + adcs x2, x2, x4 + adc x26, xzr, xzr + # A[1] * B[2] + mul x3, x15, x12 + umulh x4, x15, x12 + adds x9, x9, x3 + adcs x2, x2, x4 + adc x26, x26, xzr + # A[2] * B[1] + mul x3, x16, x11 + umulh x4, x16, x11 + adds x9, x9, x3 + adcs x2, x2, x4 + adc x26, x26, xzr + # A[3] * B[0] + mul x3, x17, x10 + umulh x4, x17, x10 + adds x9, x9, x3 + adcs x2, x2, x4 + adc x26, x26, xzr + # A[1] * B[3] + mul x3, x15, x13 + umulh x4, x15, x13 + adds x2, x2, x3 + adcs x26, x26, x4 + adc x27, xzr, xzr + # A[2] * B[2] + mul x3, x16, x12 + umulh x4, x16, x12 + adds x2, x2, x3 + adcs x26, x26, x4 + adc x27, x27, xzr + # A[3] * B[1] + mul x3, x17, x11 + umulh x4, x17, x11 + adds x2, x2, x3 + adcs x26, x26, x4 + adc x27, x27, xzr + # A[2] * B[3] + mul x3, x16, x13 + umulh x4, x16, x13 + adds x26, x26, x3 + adcs x27, x27, x4 + adc x28, xzr, xzr + # A[3] * B[2] + mul x3, x17, x12 + umulh x4, x17, x12 + adds x26, x26, x3 + adcs x27, x27, x4 + adc x28, x28, xzr + # A[3] * B[3] + mul x3, x17, x13 + umulh x4, x17, x13 + adds x27, x27, x3 + adc x28, x28, x4 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x28, x28, x27, #63 + extr x27, x27, x26, #63 + extr x26, x26, x2, #63 + extr x2, x2, x9, #63 + and x9, x9, #0x7fffffffffffffff + # Multiply top half by 19 + mov x3, #19 + mul x4, x3, x2 + umulh x2, x3, x2 + adds x6, x6, x4 + mul x4, x3, x26 + umulh x26, x3, x26 + adcs x7, x7, x4 + mul x4, x3, x27 + umulh x27, x3, x27 + adcs x8, x8, x4 + mul x4, x3, x28 + umulh x5, x3, x28 + adcs x9, x9, x4 + adc x5, x5, xzr + # Add remaining product results in + adds x7, x7, x2 + adcs x8, x8, x26 + adcs x9, x9, x27 + adc x5, x5, xzr + # Overflow + extr x5, x5, x9, #63 + mul x5, x5, x3 + and x9, x9, #0x7fffffffffffffff + adds x6, x6, x5 + adcs x7, x7, xzr + adcs x8, x8, xzr + adc x9, x9, xzr + # Reduce if top bit set + and x5, x3, x9, asr 63 + and x9, x9, #0x7fffffffffffffff + adds x6, x6, x5 + adcs x7, x7, xzr + adcs x8, x8, xzr + adc x9, x9, xzr + # Store + stp x6, x7, [x29, #16] + stp x8, x9, [x29, #32] + # Add + ldp x6, x7, [x29, #112] + ldp x8, x9, [x29, #128] + adds x10, x6, x19 + adcs x11, x7, x20 + adcs x12, x8, x21 + adc x13, x9, x22 + mov x3, #-19 + asr x2, x13, #63 + # Mask the modulus + and x3, x2, x3 + and x4, x2, #0x7fffffffffffffff + # Sub modulus (if overflow) + subs x10, x10, x3 + sbcs x11, x11, x2 + sbcs x12, x12, x2 + sbc x13, x13, x4 + # Sub + subs x19, x6, x19 + sbcs x20, x7, x20 + sbcs x21, x8, x21 + sbcs x22, x9, x22 + mov x3, #-19 + csetm x2, cc + # Mask the modulus + and x3, x2, x3 + and x4, x2, #0x7fffffffffffffff + # Add modulus (if underflow) + adds x19, x19, x3 + adcs x20, x20, x2 + adcs x21, x21, x2 + adc x22, x22, x4 + # Square + # A[0] * A[1] + mul x7, x10, x11 + umulh x8, x10, x11 + # A[0] * A[2] + mul x3, x10, x12 + umulh x9, x10, x12 + adds x8, x8, x3 + adc x9, x9, xzr + # A[0] * A[3] + mul x3, x10, x13 + umulh x2, x10, x13 + adds x9, x9, x3 + adc x2, x2, xzr + # A[1] * A[2] + mul x3, x11, x12 + umulh x4, x11, x12 + adds x9, x9, x3 + adcs x2, x2, x4 + adc x26, xzr, xzr + # A[1] * A[3] + mul x3, x11, x13 + umulh x4, x11, x13 + adds x2, x2, x3 + adc x26, x26, x4 + # A[2] * A[3] + mul x3, x12, x13 + umulh x27, x12, x13 + adds x26, x26, x3 + adc x27, x27, xzr + # Double + adds x7, x7, x7 + adcs x8, x8, x8 + adcs x9, x9, x9 + adcs x2, x2, x2 + adcs x26, x26, x26 + adcs x27, x27, x27 + adc x28, xzr, xzr + # A[0] * A[0] + mul x6, x10, x10 + umulh x5, x10, x10 + # A[1] * A[1] + mul x3, x11, x11 + umulh x4, x11, x11 + adds x7, x7, x5 + adcs x8, x8, x3 + adc x5, x4, xzr + # A[2] * A[2] + mul x3, x12, x12 + umulh x4, x12, x12 + adds x9, x9, x5 + adcs x2, x2, x3 + adc x5, x4, xzr + # A[3] * A[3] + mul x3, x13, x13 + umulh x4, x13, x13 + adds x26, x26, x5 + adcs x27, x27, x3 + adc x28, x28, x4 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x28, x28, x27, #63 + extr x27, x27, x26, #63 + extr x26, x26, x2, #63 + extr x2, x2, x9, #63 + and x9, x9, #0x7fffffffffffffff + # Multiply top half by 19 + mov x3, #19 + mul x4, x3, x2 + umulh x2, x3, x2 + adds x6, x6, x4 + mul x4, x3, x26 + umulh x26, x3, x26 + adcs x7, x7, x4 + mul x4, x3, x27 + umulh x27, x3, x27 + adcs x8, x8, x4 + mul x4, x3, x28 + umulh x5, x3, x28 + adcs x9, x9, x4 + adc x5, x5, xzr + # Add remaining product results in + adds x7, x7, x2 + adcs x8, x8, x26 + adcs x9, x9, x27 + adc x5, x5, xzr + # Overflow + extr x5, x5, x9, #63 + mul x5, x5, x3 + and x9, x9, #0x7fffffffffffffff + adds x6, x6, x5 + adcs x7, x7, xzr + adcs x8, x8, xzr + adc x9, x9, xzr + # Reduce if top bit set + and x5, x3, x9, asr 63 + and x9, x9, #0x7fffffffffffffff + adds x6, x6, x5 + adcs x7, x7, xzr + adcs x8, x8, xzr + adc x9, x9, xzr + # Store + stp x6, x7, [x29, #80] + stp x8, x9, [x29, #96] + # Square + # A[0] * A[1] + mul x7, x19, x20 + umulh x8, x19, x20 + # A[0] * A[2] + mul x3, x19, x21 + umulh x9, x19, x21 + adds x8, x8, x3 + adc x9, x9, xzr + # A[0] * A[3] + mul x3, x19, x22 + umulh x2, x19, x22 + adds x9, x9, x3 + adc x2, x2, xzr + # A[1] * A[2] + mul x3, x20, x21 + umulh x4, x20, x21 + adds x9, x9, x3 + adcs x2, x2, x4 + adc x26, xzr, xzr + # A[1] * A[3] + mul x3, x20, x22 + umulh x4, x20, x22 + adds x2, x2, x3 + adc x26, x26, x4 + # A[2] * A[3] + mul x3, x21, x22 + umulh x27, x21, x22 + adds x26, x26, x3 + adc x27, x27, xzr + # Double + adds x7, x7, x7 + adcs x8, x8, x8 + adcs x9, x9, x9 + adcs x2, x2, x2 + adcs x26, x26, x26 + adcs x27, x27, x27 + adc x28, xzr, xzr + # A[0] * A[0] + mul x6, x19, x19 + umulh x5, x19, x19 + # A[1] * A[1] + mul x3, x20, x20 + umulh x4, x20, x20 + adds x7, x7, x5 + adcs x8, x8, x3 + adc x5, x4, xzr + # A[2] * A[2] + mul x3, x21, x21 + umulh x4, x21, x21 + adds x9, x9, x5 + adcs x2, x2, x3 + adc x5, x4, xzr + # A[3] * A[3] + mul x3, x22, x22 + umulh x4, x22, x22 + adds x26, x26, x5 + adcs x27, x27, x3 + adc x28, x28, x4 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x28, x28, x27, #63 + extr x27, x27, x26, #63 + extr x26, x26, x2, #63 + extr x2, x2, x9, #63 + and x9, x9, #0x7fffffffffffffff + # Multiply top half by 19 + mov x3, #19 + mul x4, x3, x2 + umulh x2, x3, x2 + adds x6, x6, x4 + mul x4, x3, x26 + umulh x26, x3, x26 + adcs x7, x7, x4 + mul x4, x3, x27 + umulh x27, x3, x27 + adcs x8, x8, x4 + mul x4, x3, x28 + umulh x5, x3, x28 + adcs x9, x9, x4 + adc x5, x5, xzr + # Add remaining product results in + adds x7, x7, x2 + adcs x8, x8, x26 + adcs x9, x9, x27 + adc x5, x5, xzr + # Overflow + extr x5, x5, x9, #63 + mul x5, x5, x3 + and x9, x9, #0x7fffffffffffffff + adds x6, x6, x5 + adcs x7, x7, xzr + adcs x8, x8, xzr + adc x9, x9, xzr + # Reduce if top bit set + and x5, x3, x9, asr 63 + and x9, x9, #0x7fffffffffffffff + adds x6, x6, x5 + adcs x7, x7, xzr + adcs x8, x8, xzr + adc x9, x9, xzr + # Store + ldr x2, [x29, #184] + # Multiply + ldp x14, x15, [x2] + ldp x16, x17, [x2, #16] + # A[0] * B[0] + mul x10, x14, x6 + umulh x11, x14, x6 + # A[0] * B[1] + mul x3, x14, x7 + umulh x12, x14, x7 + adds x11, x11, x3 + adc x12, x12, xzr + # A[1] * B[0] + mul x3, x15, x6 + umulh x4, x15, x6 + adds x11, x11, x3 + adcs x12, x12, x4 + adc x13, xzr, xzr + # A[0] * B[2] + mul x3, x14, x8 + umulh x4, x14, x8 + adds x12, x12, x3 + adc x13, x13, x4 + # A[1] * B[1] + mul x3, x15, x7 + umulh x4, x15, x7 + adds x12, x12, x3 + adcs x13, x13, x4 + adc x2, xzr, xzr + # A[2] * B[0] + mul x3, x16, x6 + umulh x4, x16, x6 + adds x12, x12, x3 + adcs x13, x13, x4 + adc x2, x2, xzr + # A[0] * B[3] + mul x3, x14, x9 + umulh x4, x14, x9 + adds x13, x13, x3 + adcs x2, x2, x4 + adc x26, xzr, xzr + # A[1] * B[2] + mul x3, x15, x8 + umulh x4, x15, x8 + adds x13, x13, x3 + adcs x2, x2, x4 + adc x26, x26, xzr + # A[2] * B[1] + mul x3, x16, x7 + umulh x4, x16, x7 + adds x13, x13, x3 + adcs x2, x2, x4 + adc x26, x26, xzr + # A[3] * B[0] + mul x3, x17, x6 + umulh x4, x17, x6 + adds x13, x13, x3 + adcs x2, x2, x4 + adc x26, x26, xzr + # A[1] * B[3] + mul x3, x15, x9 + umulh x4, x15, x9 + adds x2, x2, x3 + adcs x26, x26, x4 + adc x27, xzr, xzr + # A[2] * B[2] + mul x3, x16, x8 + umulh x4, x16, x8 + adds x2, x2, x3 + adcs x26, x26, x4 + adc x27, x27, xzr + # A[3] * B[1] + mul x3, x17, x7 + umulh x4, x17, x7 + adds x2, x2, x3 + adcs x26, x26, x4 + adc x27, x27, xzr + # A[2] * B[3] + mul x3, x16, x9 + umulh x4, x16, x9 + adds x26, x26, x3 + adcs x27, x27, x4 + adc x28, xzr, xzr + # A[3] * B[2] + mul x3, x17, x8 + umulh x4, x17, x8 + adds x26, x26, x3 + adcs x27, x27, x4 + adc x28, x28, xzr + # A[3] * B[3] + mul x3, x17, x9 + umulh x4, x17, x9 + adds x27, x27, x3 + adc x28, x28, x4 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x28, x28, x27, #63 + extr x27, x27, x26, #63 + extr x26, x26, x2, #63 + extr x2, x2, x13, #63 + and x13, x13, #0x7fffffffffffffff + # Multiply top half by 19 + mov x3, #19 + mul x4, x3, x2 + umulh x2, x3, x2 + adds x10, x10, x4 + mul x4, x3, x26 + umulh x26, x3, x26 + adcs x11, x11, x4 + mul x4, x3, x27 + umulh x27, x3, x27 + adcs x12, x12, x4 + mul x4, x3, x28 + umulh x5, x3, x28 + adcs x13, x13, x4 + adc x5, x5, xzr + # Add remaining product results in + adds x11, x11, x2 + adcs x12, x12, x26 + adcs x13, x13, x27 + adc x5, x5, xzr + # Overflow + extr x5, x5, x13, #63 + mul x5, x5, x3 + and x13, x13, #0x7fffffffffffffff + adds x10, x10, x5 + adcs x11, x11, xzr + adcs x12, x12, xzr + adc x13, x13, xzr + # Reduce if top bit set + and x5, x3, x13, asr 63 + and x13, x13, #0x7fffffffffffffff + adds x10, x10, x5 + adcs x11, x11, xzr + adcs x12, x12, xzr + adc x13, x13, xzr + # Store + stp x10, x11, [x29, #48] + stp x12, x13, [x29, #64] + sub x25, x25, #1 + cmp x25, #0 + bge L_curve25519_bits + mov x25, #63 + sub x24, x24, #8 + cmp x24, #0 + bge L_curve25519_words + # Invert + add x0, x29, #48 + add x1, x29, #16 + bl fe_sq + add x0, x29, #0x50 + add x1, x29, #48 + bl fe_sq + add x1, x29, #0x50 + bl fe_sq + add x1, x29, #16 + add x2, x29, #0x50 + bl fe_mul + add x0, x29, #48 + add x1, x29, #48 + add x2, x29, #0x50 + bl fe_mul + add x0, x29, #0x70 + bl fe_sq + add x0, x29, #0x50 + add x1, x29, #0x50 + add x2, x29, #0x70 + bl fe_mul + add x0, x29, #0x70 + bl fe_sq + mov x24, #4 + add x1, x29, #0x70 +L_curve25519_inv_1: + bl fe_sq + sub x24, x24, #1 + cmp x24, #0 + bne L_curve25519_inv_1 + add x0, x29, #0x50 + add x2, x29, #0x50 + bl fe_mul + add x0, x29, #0x70 + add x1, x29, #0x50 + bl fe_sq + mov x24, #9 + add x1, x29, #0x70 +L_curve25519_inv_2: + bl fe_sq + sub x24, x24, #1 + cmp x24, #0 + bne L_curve25519_inv_2 + add x2, x29, #0x50 + bl fe_mul + add x0, x29, #0x90 + bl fe_sq + mov x24, #19 + add x1, x29, #0x90 +L_curve25519_inv_3: + bl fe_sq + sub x24, x24, #1 + cmp x24, #0 + bne L_curve25519_inv_3 + add x0, x29, #0x70 + add x2, x29, #0x70 + bl fe_mul + mov x24, #10 + add x1, x29, #0x70 +L_curve25519_inv_4: + bl fe_sq + sub x24, x24, #1 + cmp x24, #0 + bne L_curve25519_inv_4 + add x0, x29, #0x50 + add x2, x29, #0x50 + bl fe_mul + add x0, x29, #0x70 + add x1, x29, #0x50 + bl fe_sq + mov x24, #49 + add x1, x29, #0x70 +L_curve25519_inv_5: + bl fe_sq + sub x24, x24, #1 + cmp x24, #0 + bne L_curve25519_inv_5 + add x2, x29, #0x50 + bl fe_mul + add x0, x29, #0x90 + bl fe_sq + mov x24, #0x63 + add x1, x29, #0x90 +L_curve25519_inv_6: + bl fe_sq + sub x24, x24, #1 + cmp x24, #0 + bne L_curve25519_inv_6 + add x0, x29, #0x70 + add x2, x29, #0x70 + bl fe_mul + mov x24, #50 + add x1, x29, #0x70 +L_curve25519_inv_7: + bl fe_sq + sub x24, x24, #1 + cmp x24, #0 + bne L_curve25519_inv_7 + add x0, x29, #0x50 + add x2, x29, #0x50 + bl fe_mul + mov x24, #5 + add x1, x29, #0x50 +L_curve25519_inv_8: + bl fe_sq + sub x24, x24, #1 + cmp x24, #0 + bne L_curve25519_inv_8 + add x0, x29, #16 + add x2, x29, #48 + bl fe_mul + ldr x0, [x29, #176] + # Multiply + ldp x6, x7, [x0] + ldp x8, x9, [x0, #16] + ldp x10, x11, [x29, #16] + ldp x12, x13, [x29, #32] + # A[0] * B[0] + mul x14, x6, x10 + umulh x15, x6, x10 + # A[0] * B[1] + mul x3, x6, x11 + umulh x16, x6, x11 + adds x15, x15, x3 + adc x16, x16, xzr + # A[1] * B[0] + mul x3, x7, x10 + umulh x4, x7, x10 + adds x15, x15, x3 + adcs x16, x16, x4 + adc x17, xzr, xzr + # A[0] * B[2] + mul x3, x6, x12 + umulh x4, x6, x12 + adds x16, x16, x3 + adc x17, x17, x4 + # A[1] * B[1] + mul x3, x7, x11 + umulh x4, x7, x11 + adds x16, x16, x3 + adcs x17, x17, x4 + adc x19, xzr, xzr + # A[2] * B[0] + mul x3, x8, x10 + umulh x4, x8, x10 + adds x16, x16, x3 + adcs x17, x17, x4 + adc x19, x19, xzr + # A[0] * B[3] + mul x3, x6, x13 + umulh x4, x6, x13 + adds x17, x17, x3 + adcs x19, x19, x4 + adc x20, xzr, xzr + # A[1] * B[2] + mul x3, x7, x12 + umulh x4, x7, x12 + adds x17, x17, x3 + adcs x19, x19, x4 + adc x20, x20, xzr + # A[2] * B[1] + mul x3, x8, x11 + umulh x4, x8, x11 + adds x17, x17, x3 + adcs x19, x19, x4 + adc x20, x20, xzr + # A[3] * B[0] + mul x3, x9, x10 + umulh x4, x9, x10 + adds x17, x17, x3 + adcs x19, x19, x4 + adc x20, x20, xzr + # A[1] * B[3] + mul x3, x7, x13 + umulh x4, x7, x13 + adds x19, x19, x3 + adcs x20, x20, x4 + adc x21, xzr, xzr + # A[2] * B[2] + mul x3, x8, x12 + umulh x4, x8, x12 + adds x19, x19, x3 + adcs x20, x20, x4 + adc x21, x21, xzr + # A[3] * B[1] + mul x3, x9, x11 + umulh x4, x9, x11 + adds x19, x19, x3 + adcs x20, x20, x4 + adc x21, x21, xzr + # A[2] * B[3] + mul x3, x8, x13 + umulh x4, x8, x13 + adds x20, x20, x3 + adcs x21, x21, x4 + adc x22, xzr, xzr + # A[3] * B[2] + mul x3, x9, x12 + umulh x4, x9, x12 + adds x20, x20, x3 + adcs x21, x21, x4 + adc x22, x22, xzr + # A[3] * B[3] + mul x3, x9, x13 + umulh x4, x9, x13 + adds x21, x21, x3 + adc x22, x22, x4 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x22, x22, x21, #63 + extr x21, x21, x20, #63 + extr x20, x20, x19, #63 + extr x19, x19, x17, #63 + and x17, x17, #0x7fffffffffffffff + # Multiply top half by 19 + mov x3, #19 + mul x4, x3, x19 + umulh x19, x3, x19 + adds x14, x14, x4 + mul x4, x3, x20 + umulh x20, x3, x20 + adcs x15, x15, x4 + mul x4, x3, x21 + umulh x21, x3, x21 + adcs x16, x16, x4 + mul x4, x3, x22 + umulh x5, x3, x22 + adcs x17, x17, x4 + adc x5, x5, xzr + # Add remaining product results in + adds x15, x15, x19 + adcs x16, x16, x20 + adcs x17, x17, x21 + adc x5, x5, xzr + # Overflow + extr x5, x5, x17, #63 + mul x5, x5, x3 + and x17, x17, #0x7fffffffffffffff + adds x14, x14, x5 + adcs x15, x15, xzr + adcs x16, x16, xzr + adc x17, x17, xzr + # Reduce if top bit set + and x5, x3, x17, asr 63 + and x17, x17, #0x7fffffffffffffff + adds x14, x14, x5 + adcs x15, x15, xzr + adcs x16, x16, xzr + adc x17, x17, xzr + # Store + stp x14, x15, [x0] + stp x16, x17, [x0, #16] + mov x0, xzr + ldr x17, [x29, #200] + ldr x19, [x29, #208] + ldp x20, x21, [x29, #216] + ldp x22, x23, [x29, #232] + ldp x24, x25, [x29, #248] + ldp x26, x27, [x29, #264] + ldr x28, [x29, #280] + ldp x29, x30, [sp], #0x120 + ret + .size curve25519,.-curve25519 + .text + .align 2 + .globl fe_pow22523 + .type fe_pow22523, %function +fe_pow22523: + stp x29, x30, [sp, #-144]! + add x29, sp, #0 + str x21, [x29, #136] + # pow22523 + str x0, [x29, #112] + str x1, [x29, #120] + add x0, x29, #16 + bl fe_sq + add x0, x29, #48 + add x1, x29, #16 + bl fe_sq + add x1, x29, #48 + bl fe_sq + ldr x1, [x29, #120] + add x2, x29, #48 + bl fe_mul + add x0, x29, #16 + add x1, x29, #16 + add x2, x29, #48 + bl fe_mul + bl fe_sq + add x1, x29, #48 + add x2, x29, #16 + bl fe_mul + add x0, x29, #48 + add x1, x29, #16 + bl fe_sq + mov x21, #4 + add x1, x29, #48 +L_fe_pow22523_1: + bl fe_sq + sub x21, x21, #1 + cmp x21, #0 + bne L_fe_pow22523_1 + add x0, x29, #16 + add x2, x29, #16 + bl fe_mul + add x0, x29, #48 + add x1, x29, #16 + bl fe_sq + mov x21, #9 + add x1, x29, #48 +L_fe_pow22523_2: + bl fe_sq + sub x21, x21, #1 + cmp x21, #0 + bne L_fe_pow22523_2 + add x2, x29, #16 + bl fe_mul + add x0, x29, #0x50 + bl fe_sq + mov x21, #19 + add x1, x29, #0x50 +L_fe_pow22523_3: + bl fe_sq + sub x21, x21, #1 + cmp x21, #0 + bne L_fe_pow22523_3 + add x0, x29, #48 + add x2, x29, #48 + bl fe_mul + mov x21, #10 + add x1, x29, #48 +L_fe_pow22523_4: + bl fe_sq + sub x21, x21, #1 + cmp x21, #0 + bne L_fe_pow22523_4 + add x0, x29, #16 + add x2, x29, #16 + bl fe_mul + add x0, x29, #48 + add x1, x29, #16 + bl fe_sq + mov x21, #49 + add x1, x29, #48 +L_fe_pow22523_5: + bl fe_sq + sub x21, x21, #1 + cmp x21, #0 + bne L_fe_pow22523_5 + add x2, x29, #16 + bl fe_mul + add x0, x29, #0x50 + bl fe_sq + mov x21, #0x63 + add x1, x29, #0x50 +L_fe_pow22523_6: + bl fe_sq + sub x21, x21, #1 + cmp x21, #0 + bne L_fe_pow22523_6 + add x0, x29, #48 + add x2, x29, #48 + bl fe_mul + mov x21, #50 + add x1, x29, #48 +L_fe_pow22523_7: + bl fe_sq + sub x21, x21, #1 + cmp x21, #0 + bne L_fe_pow22523_7 + add x0, x29, #16 + add x2, x29, #16 + bl fe_mul + mov x21, #2 + add x1, x29, #16 +L_fe_pow22523_8: + bl fe_sq + sub x21, x21, #1 + cmp x21, #0 + bne L_fe_pow22523_8 + ldr x0, [x29, #112] + ldr x2, [x29, #120] + bl fe_mul + ldr x21, [x29, #136] + ldp x29, x30, [sp], #0x90 + ret + .size fe_pow22523,.-fe_pow22523 + .text + .align 2 + .globl fe_ge_to_p2 + .type fe_ge_to_p2, %function +fe_ge_to_p2: + stp x29, x30, [sp, #-112]! + add x29, sp, #0 + str x17, [x29, #72] + str x19, [x29, #80] + stp x20, x21, [x29, #88] + str x22, [x29, #104] + str x1, [x29, #16] + str x2, [x29, #24] + str x3, [x29, #32] + str x4, [x29, #40] + str x5, [x29, #48] + str x6, [x29, #56] + ldr x1, [x29, #32] + ldr x2, [x29, #56] + # Multiply + ldp x11, x12, [x1] + ldp x13, x14, [x1, #16] + ldp x15, x16, [x2] + ldp x17, x19, [x2, #16] + # A[0] * B[0] + mul x3, x11, x15 + umulh x4, x11, x15 + # A[0] * B[1] + mul x20, x11, x16 + umulh x5, x11, x16 + adds x4, x4, x20 + adc x5, x5, xzr + # A[1] * B[0] + mul x20, x12, x15 + umulh x21, x12, x15 + adds x4, x4, x20 + adcs x5, x5, x21 + adc x6, xzr, xzr + # A[0] * B[2] + mul x20, x11, x17 + umulh x21, x11, x17 + adds x5, x5, x20 + adc x6, x6, x21 + # A[1] * B[1] + mul x20, x12, x16 + umulh x21, x12, x16 + adds x5, x5, x20 + adcs x6, x6, x21 + adc x7, xzr, xzr + # A[2] * B[0] + mul x20, x13, x15 + umulh x21, x13, x15 + adds x5, x5, x20 + adcs x6, x6, x21 + adc x7, x7, xzr + # A[0] * B[3] + mul x20, x11, x19 + umulh x21, x11, x19 + adds x6, x6, x20 + adcs x7, x7, x21 + adc x8, xzr, xzr + # A[1] * B[2] + mul x20, x12, x17 + umulh x21, x12, x17 + adds x6, x6, x20 + adcs x7, x7, x21 + adc x8, x8, xzr + # A[2] * B[1] + mul x20, x13, x16 + umulh x21, x13, x16 + adds x6, x6, x20 + adcs x7, x7, x21 + adc x8, x8, xzr + # A[3] * B[0] + mul x20, x14, x15 + umulh x21, x14, x15 + adds x6, x6, x20 + adcs x7, x7, x21 + adc x8, x8, xzr + # A[1] * B[3] + mul x20, x12, x19 + umulh x21, x12, x19 + adds x7, x7, x20 + adcs x8, x8, x21 + adc x9, xzr, xzr + # A[2] * B[2] + mul x20, x13, x17 + umulh x21, x13, x17 + adds x7, x7, x20 + adcs x8, x8, x21 + adc x9, x9, xzr + # A[3] * B[1] + mul x20, x14, x16 + umulh x21, x14, x16 + adds x7, x7, x20 + adcs x8, x8, x21 + adc x9, x9, xzr + # A[2] * B[3] + mul x20, x13, x19 + umulh x21, x13, x19 + adds x8, x8, x20 + adcs x9, x9, x21 + adc x10, xzr, xzr + # A[3] * B[2] + mul x20, x14, x17 + umulh x21, x14, x17 + adds x8, x8, x20 + adcs x9, x9, x21 + adc x10, x10, xzr + # A[3] * B[3] + mul x20, x14, x19 + umulh x21, x14, x19 + adds x9, x9, x20 + adc x10, x10, x21 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x10, x10, x9, #63 + extr x9, x9, x8, #63 + extr x8, x8, x7, #63 + extr x7, x7, x6, #63 + and x6, x6, #0x7fffffffffffffff + # Multiply top half by 19 + mov x20, #19 + mul x21, x20, x7 + umulh x7, x20, x7 + adds x3, x3, x21 + mul x21, x20, x8 + umulh x8, x20, x8 + adcs x4, x4, x21 + mul x21, x20, x9 + umulh x9, x20, x9 + adcs x5, x5, x21 + mul x21, x20, x10 + umulh x22, x20, x10 + adcs x6, x6, x21 + adc x22, x22, xzr + # Add remaining product results in + adds x4, x4, x7 + adcs x5, x5, x8 + adcs x6, x6, x9 + adc x22, x22, xzr + # Overflow + extr x22, x22, x6, #63 + mul x22, x22, x20 + and x6, x6, #0x7fffffffffffffff + adds x3, x3, x22 + adcs x4, x4, xzr + adcs x5, x5, xzr + adc x6, x6, xzr + # Reduce if top bit set + and x22, x20, x6, asr 63 + and x6, x6, #0x7fffffffffffffff + adds x3, x3, x22 + adcs x4, x4, xzr + adcs x5, x5, xzr + adc x6, x6, xzr + # Store + stp x3, x4, [x0] + stp x5, x6, [x0, #16] + ldr x0, [x29, #16] + ldr x1, [x29, #40] + ldr x2, [x29, #48] + # Multiply + ldp x11, x12, [x1] + ldp x13, x14, [x1, #16] + ldp x15, x16, [x2] + ldp x17, x19, [x2, #16] + # A[0] * B[0] + mul x3, x11, x15 + umulh x4, x11, x15 + # A[0] * B[1] + mul x20, x11, x16 + umulh x5, x11, x16 + adds x4, x4, x20 + adc x5, x5, xzr + # A[1] * B[0] + mul x20, x12, x15 + umulh x21, x12, x15 + adds x4, x4, x20 + adcs x5, x5, x21 + adc x6, xzr, xzr + # A[0] * B[2] + mul x20, x11, x17 + umulh x21, x11, x17 + adds x5, x5, x20 + adc x6, x6, x21 + # A[1] * B[1] + mul x20, x12, x16 + umulh x21, x12, x16 + adds x5, x5, x20 + adcs x6, x6, x21 + adc x7, xzr, xzr + # A[2] * B[0] + mul x20, x13, x15 + umulh x21, x13, x15 + adds x5, x5, x20 + adcs x6, x6, x21 + adc x7, x7, xzr + # A[0] * B[3] + mul x20, x11, x19 + umulh x21, x11, x19 + adds x6, x6, x20 + adcs x7, x7, x21 + adc x8, xzr, xzr + # A[1] * B[2] + mul x20, x12, x17 + umulh x21, x12, x17 + adds x6, x6, x20 + adcs x7, x7, x21 + adc x8, x8, xzr + # A[2] * B[1] + mul x20, x13, x16 + umulh x21, x13, x16 + adds x6, x6, x20 + adcs x7, x7, x21 + adc x8, x8, xzr + # A[3] * B[0] + mul x20, x14, x15 + umulh x21, x14, x15 + adds x6, x6, x20 + adcs x7, x7, x21 + adc x8, x8, xzr + # A[1] * B[3] + mul x20, x12, x19 + umulh x21, x12, x19 + adds x7, x7, x20 + adcs x8, x8, x21 + adc x9, xzr, xzr + # A[2] * B[2] + mul x20, x13, x17 + umulh x21, x13, x17 + adds x7, x7, x20 + adcs x8, x8, x21 + adc x9, x9, xzr + # A[3] * B[1] + mul x20, x14, x16 + umulh x21, x14, x16 + adds x7, x7, x20 + adcs x8, x8, x21 + adc x9, x9, xzr + # A[2] * B[3] + mul x20, x13, x19 + umulh x21, x13, x19 + adds x8, x8, x20 + adcs x9, x9, x21 + adc x10, xzr, xzr + # A[3] * B[2] + mul x20, x14, x17 + umulh x21, x14, x17 + adds x8, x8, x20 + adcs x9, x9, x21 + adc x10, x10, xzr + # A[3] * B[3] + mul x20, x14, x19 + umulh x21, x14, x19 + adds x9, x9, x20 + adc x10, x10, x21 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x10, x10, x9, #63 + extr x9, x9, x8, #63 + extr x8, x8, x7, #63 + extr x7, x7, x6, #63 + and x6, x6, #0x7fffffffffffffff + # Multiply top half by 19 + mov x20, #19 + mul x21, x20, x7 + umulh x7, x20, x7 + adds x3, x3, x21 + mul x21, x20, x8 + umulh x8, x20, x8 + adcs x4, x4, x21 + mul x21, x20, x9 + umulh x9, x20, x9 + adcs x5, x5, x21 + mul x21, x20, x10 + umulh x22, x20, x10 + adcs x6, x6, x21 + adc x22, x22, xzr + # Add remaining product results in + adds x4, x4, x7 + adcs x5, x5, x8 + adcs x6, x6, x9 + adc x22, x22, xzr + # Overflow + extr x22, x22, x6, #63 + mul x22, x22, x20 + and x6, x6, #0x7fffffffffffffff + adds x3, x3, x22 + adcs x4, x4, xzr + adcs x5, x5, xzr + adc x6, x6, xzr + # Reduce if top bit set + and x22, x20, x6, asr 63 + and x6, x6, #0x7fffffffffffffff + adds x3, x3, x22 + adcs x4, x4, xzr + adcs x5, x5, xzr + adc x6, x6, xzr + # Store + stp x3, x4, [x0] + stp x5, x6, [x0, #16] + ldr x0, [x29, #24] + ldr x2, [x29, #56] + # Multiply + ldp x11, x12, [x2] + ldp x13, x14, [x2, #16] + # A[0] * B[0] + mul x3, x15, x11 + umulh x4, x15, x11 + # A[0] * B[1] + mul x20, x15, x12 + umulh x5, x15, x12 + adds x4, x4, x20 + adc x5, x5, xzr + # A[1] * B[0] + mul x20, x16, x11 + umulh x21, x16, x11 + adds x4, x4, x20 + adcs x5, x5, x21 + adc x6, xzr, xzr + # A[0] * B[2] + mul x20, x15, x13 + umulh x21, x15, x13 + adds x5, x5, x20 + adc x6, x6, x21 + # A[1] * B[1] + mul x20, x16, x12 + umulh x21, x16, x12 + adds x5, x5, x20 + adcs x6, x6, x21 + adc x7, xzr, xzr + # A[2] * B[0] + mul x20, x17, x11 + umulh x21, x17, x11 + adds x5, x5, x20 + adcs x6, x6, x21 + adc x7, x7, xzr + # A[0] * B[3] + mul x20, x15, x14 + umulh x21, x15, x14 + adds x6, x6, x20 + adcs x7, x7, x21 + adc x8, xzr, xzr + # A[1] * B[2] + mul x20, x16, x13 + umulh x21, x16, x13 + adds x6, x6, x20 + adcs x7, x7, x21 + adc x8, x8, xzr + # A[2] * B[1] + mul x20, x17, x12 + umulh x21, x17, x12 + adds x6, x6, x20 + adcs x7, x7, x21 + adc x8, x8, xzr + # A[3] * B[0] + mul x20, x19, x11 + umulh x21, x19, x11 + adds x6, x6, x20 + adcs x7, x7, x21 + adc x8, x8, xzr + # A[1] * B[3] + mul x20, x16, x14 + umulh x21, x16, x14 + adds x7, x7, x20 + adcs x8, x8, x21 + adc x9, xzr, xzr + # A[2] * B[2] + mul x20, x17, x13 + umulh x21, x17, x13 + adds x7, x7, x20 + adcs x8, x8, x21 + adc x9, x9, xzr + # A[3] * B[1] + mul x20, x19, x12 + umulh x21, x19, x12 + adds x7, x7, x20 + adcs x8, x8, x21 + adc x9, x9, xzr + # A[2] * B[3] + mul x20, x17, x14 + umulh x21, x17, x14 + adds x8, x8, x20 + adcs x9, x9, x21 + adc x10, xzr, xzr + # A[3] * B[2] + mul x20, x19, x13 + umulh x21, x19, x13 + adds x8, x8, x20 + adcs x9, x9, x21 + adc x10, x10, xzr + # A[3] * B[3] + mul x20, x19, x14 + umulh x21, x19, x14 + adds x9, x9, x20 + adc x10, x10, x21 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x10, x10, x9, #63 + extr x9, x9, x8, #63 + extr x8, x8, x7, #63 + extr x7, x7, x6, #63 + and x6, x6, #0x7fffffffffffffff + # Multiply top half by 19 + mov x20, #19 + mul x21, x20, x7 + umulh x7, x20, x7 + adds x3, x3, x21 + mul x21, x20, x8 + umulh x8, x20, x8 + adcs x4, x4, x21 + mul x21, x20, x9 + umulh x9, x20, x9 + adcs x5, x5, x21 + mul x21, x20, x10 + umulh x22, x20, x10 + adcs x6, x6, x21 + adc x22, x22, xzr + # Add remaining product results in + adds x4, x4, x7 + adcs x5, x5, x8 + adcs x6, x6, x9 + adc x22, x22, xzr + # Overflow + extr x22, x22, x6, #63 + mul x22, x22, x20 + and x6, x6, #0x7fffffffffffffff + adds x3, x3, x22 + adcs x4, x4, xzr + adcs x5, x5, xzr + adc x6, x6, xzr + # Reduce if top bit set + and x22, x20, x6, asr 63 + and x6, x6, #0x7fffffffffffffff + adds x3, x3, x22 + adcs x4, x4, xzr + adcs x5, x5, xzr + adc x6, x6, xzr + # Store + stp x3, x4, [x0] + stp x5, x6, [x0, #16] + ldr x17, [x29, #72] + ldr x19, [x29, #80] + ldp x20, x21, [x29, #88] + ldr x22, [x29, #104] + ldp x29, x30, [sp], #0x70 + ret + .size fe_ge_to_p2,.-fe_ge_to_p2 + .text + .align 2 + .globl fe_ge_to_p3 + .type fe_ge_to_p3, %function +fe_ge_to_p3: + stp x29, x30, [sp, #-160]! + add x29, sp, #0 + str x17, [x29, #88] + str x19, [x29, #96] + stp x20, x21, [x29, #104] + stp x22, x23, [x29, #120] + stp x24, x25, [x29, #136] + str x26, [x29, #152] + str x1, [x29, #16] + str x2, [x29, #24] + str x3, [x29, #32] + str x4, [x29, #40] + str x5, [x29, #48] + str x6, [x29, #56] + str x7, [x29, #64] + ldr x1, [x29, #40] + ldr x2, [x29, #64] + # Multiply + ldp x11, x12, [x1] + ldp x13, x14, [x1, #16] + ldp x15, x16, [x2] + ldp x17, x19, [x2, #16] + # A[0] * B[0] + mul x3, x11, x15 + umulh x4, x11, x15 + # A[0] * B[1] + mul x24, x11, x16 + umulh x5, x11, x16 + adds x4, x4, x24 + adc x5, x5, xzr + # A[1] * B[0] + mul x24, x12, x15 + umulh x25, x12, x15 + adds x4, x4, x24 + adcs x5, x5, x25 + adc x6, xzr, xzr + # A[0] * B[2] + mul x24, x11, x17 + umulh x25, x11, x17 + adds x5, x5, x24 + adc x6, x6, x25 + # A[1] * B[1] + mul x24, x12, x16 + umulh x25, x12, x16 + adds x5, x5, x24 + adcs x6, x6, x25 + adc x7, xzr, xzr + # A[2] * B[0] + mul x24, x13, x15 + umulh x25, x13, x15 + adds x5, x5, x24 + adcs x6, x6, x25 + adc x7, x7, xzr + # A[0] * B[3] + mul x24, x11, x19 + umulh x25, x11, x19 + adds x6, x6, x24 + adcs x7, x7, x25 + adc x8, xzr, xzr + # A[1] * B[2] + mul x24, x12, x17 + umulh x25, x12, x17 + adds x6, x6, x24 + adcs x7, x7, x25 + adc x8, x8, xzr + # A[2] * B[1] + mul x24, x13, x16 + umulh x25, x13, x16 + adds x6, x6, x24 + adcs x7, x7, x25 + adc x8, x8, xzr + # A[3] * B[0] + mul x24, x14, x15 + umulh x25, x14, x15 + adds x6, x6, x24 + adcs x7, x7, x25 + adc x8, x8, xzr + # A[1] * B[3] + mul x24, x12, x19 + umulh x25, x12, x19 + adds x7, x7, x24 + adcs x8, x8, x25 + adc x9, xzr, xzr + # A[2] * B[2] + mul x24, x13, x17 + umulh x25, x13, x17 + adds x7, x7, x24 + adcs x8, x8, x25 + adc x9, x9, xzr + # A[3] * B[1] + mul x24, x14, x16 + umulh x25, x14, x16 + adds x7, x7, x24 + adcs x8, x8, x25 + adc x9, x9, xzr + # A[2] * B[3] + mul x24, x13, x19 + umulh x25, x13, x19 + adds x8, x8, x24 + adcs x9, x9, x25 + adc x10, xzr, xzr + # A[3] * B[2] + mul x24, x14, x17 + umulh x25, x14, x17 + adds x8, x8, x24 + adcs x9, x9, x25 + adc x10, x10, xzr + # A[3] * B[3] + mul x24, x14, x19 + umulh x25, x14, x19 + adds x9, x9, x24 + adc x10, x10, x25 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x10, x10, x9, #63 + extr x9, x9, x8, #63 + extr x8, x8, x7, #63 + extr x7, x7, x6, #63 + and x6, x6, #0x7fffffffffffffff + # Multiply top half by 19 + mov x24, #19 + mul x25, x24, x7 + umulh x7, x24, x7 + adds x3, x3, x25 + mul x25, x24, x8 + umulh x8, x24, x8 + adcs x4, x4, x25 + mul x25, x24, x9 + umulh x9, x24, x9 + adcs x5, x5, x25 + mul x25, x24, x10 + umulh x26, x24, x10 + adcs x6, x6, x25 + adc x26, x26, xzr + # Add remaining product results in + adds x4, x4, x7 + adcs x5, x5, x8 + adcs x6, x6, x9 + adc x26, x26, xzr + # Overflow + extr x26, x26, x6, #63 + mul x26, x26, x24 + and x6, x6, #0x7fffffffffffffff + adds x3, x3, x26 + adcs x4, x4, xzr + adcs x5, x5, xzr + adc x6, x6, xzr + # Reduce if top bit set + and x26, x24, x6, asr 63 + and x6, x6, #0x7fffffffffffffff + adds x3, x3, x26 + adcs x4, x4, xzr + adcs x5, x5, xzr + adc x6, x6, xzr + # Store + stp x3, x4, [x0] + stp x5, x6, [x0, #16] + ldr x0, [x29, #32] + ldr x2, [x29, #48] + # Multiply + ldp x20, x21, [x2] + ldp x22, x23, [x2, #16] + # A[0] * B[0] + mul x3, x11, x20 + umulh x4, x11, x20 + # A[0] * B[1] + mul x24, x11, x21 + umulh x5, x11, x21 + adds x4, x4, x24 + adc x5, x5, xzr + # A[1] * B[0] + mul x24, x12, x20 + umulh x25, x12, x20 + adds x4, x4, x24 + adcs x5, x5, x25 + adc x6, xzr, xzr + # A[0] * B[2] + mul x24, x11, x22 + umulh x25, x11, x22 + adds x5, x5, x24 + adc x6, x6, x25 + # A[1] * B[1] + mul x24, x12, x21 + umulh x25, x12, x21 + adds x5, x5, x24 + adcs x6, x6, x25 + adc x7, xzr, xzr + # A[2] * B[0] + mul x24, x13, x20 + umulh x25, x13, x20 + adds x5, x5, x24 + adcs x6, x6, x25 + adc x7, x7, xzr + # A[0] * B[3] + mul x24, x11, x23 + umulh x25, x11, x23 + adds x6, x6, x24 + adcs x7, x7, x25 + adc x8, xzr, xzr + # A[1] * B[2] + mul x24, x12, x22 + umulh x25, x12, x22 + adds x6, x6, x24 + adcs x7, x7, x25 + adc x8, x8, xzr + # A[2] * B[1] + mul x24, x13, x21 + umulh x25, x13, x21 + adds x6, x6, x24 + adcs x7, x7, x25 + adc x8, x8, xzr + # A[3] * B[0] + mul x24, x14, x20 + umulh x25, x14, x20 + adds x6, x6, x24 + adcs x7, x7, x25 + adc x8, x8, xzr + # A[1] * B[3] + mul x24, x12, x23 + umulh x25, x12, x23 + adds x7, x7, x24 + adcs x8, x8, x25 + adc x9, xzr, xzr + # A[2] * B[2] + mul x24, x13, x22 + umulh x25, x13, x22 + adds x7, x7, x24 + adcs x8, x8, x25 + adc x9, x9, xzr + # A[3] * B[1] + mul x24, x14, x21 + umulh x25, x14, x21 + adds x7, x7, x24 + adcs x8, x8, x25 + adc x9, x9, xzr + # A[2] * B[3] + mul x24, x13, x23 + umulh x25, x13, x23 + adds x8, x8, x24 + adcs x9, x9, x25 + adc x10, xzr, xzr + # A[3] * B[2] + mul x24, x14, x22 + umulh x25, x14, x22 + adds x8, x8, x24 + adcs x9, x9, x25 + adc x10, x10, xzr + # A[3] * B[3] + mul x24, x14, x23 + umulh x25, x14, x23 + adds x9, x9, x24 + adc x10, x10, x25 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x10, x10, x9, #63 + extr x9, x9, x8, #63 + extr x8, x8, x7, #63 + extr x7, x7, x6, #63 + and x6, x6, #0x7fffffffffffffff + # Multiply top half by 19 + mov x24, #19 + mul x25, x24, x7 + umulh x7, x24, x7 + adds x3, x3, x25 + mul x25, x24, x8 + umulh x8, x24, x8 + adcs x4, x4, x25 + mul x25, x24, x9 + umulh x9, x24, x9 + adcs x5, x5, x25 + mul x25, x24, x10 + umulh x26, x24, x10 + adcs x6, x6, x25 + adc x26, x26, xzr + # Add remaining product results in + adds x4, x4, x7 + adcs x5, x5, x8 + adcs x6, x6, x9 + adc x26, x26, xzr + # Overflow + extr x26, x26, x6, #63 + mul x26, x26, x24 + and x6, x6, #0x7fffffffffffffff + adds x3, x3, x26 + adcs x4, x4, xzr + adcs x5, x5, xzr + adc x6, x6, xzr + # Reduce if top bit set + and x26, x24, x6, asr 63 + and x6, x6, #0x7fffffffffffffff + adds x3, x3, x26 + adcs x4, x4, xzr + adcs x5, x5, xzr + adc x6, x6, xzr + # Store + stp x3, x4, [x0] + stp x5, x6, [x0, #16] + ldr x0, [x29, #16] + ldr x2, [x29, #56] + # Multiply + ldp x11, x12, [x2] + ldp x13, x14, [x2, #16] + # A[0] * B[0] + mul x3, x20, x11 + umulh x4, x20, x11 + # A[0] * B[1] + mul x24, x20, x12 + umulh x5, x20, x12 + adds x4, x4, x24 + adc x5, x5, xzr + # A[1] * B[0] + mul x24, x21, x11 + umulh x25, x21, x11 + adds x4, x4, x24 + adcs x5, x5, x25 + adc x6, xzr, xzr + # A[0] * B[2] + mul x24, x20, x13 + umulh x25, x20, x13 + adds x5, x5, x24 + adc x6, x6, x25 + # A[1] * B[1] + mul x24, x21, x12 + umulh x25, x21, x12 + adds x5, x5, x24 + adcs x6, x6, x25 + adc x7, xzr, xzr + # A[2] * B[0] + mul x24, x22, x11 + umulh x25, x22, x11 + adds x5, x5, x24 + adcs x6, x6, x25 + adc x7, x7, xzr + # A[0] * B[3] + mul x24, x20, x14 + umulh x25, x20, x14 + adds x6, x6, x24 + adcs x7, x7, x25 + adc x8, xzr, xzr + # A[1] * B[2] + mul x24, x21, x13 + umulh x25, x21, x13 + adds x6, x6, x24 + adcs x7, x7, x25 + adc x8, x8, xzr + # A[2] * B[1] + mul x24, x22, x12 + umulh x25, x22, x12 + adds x6, x6, x24 + adcs x7, x7, x25 + adc x8, x8, xzr + # A[3] * B[0] + mul x24, x23, x11 + umulh x25, x23, x11 + adds x6, x6, x24 + adcs x7, x7, x25 + adc x8, x8, xzr + # A[1] * B[3] + mul x24, x21, x14 + umulh x25, x21, x14 + adds x7, x7, x24 + adcs x8, x8, x25 + adc x9, xzr, xzr + # A[2] * B[2] + mul x24, x22, x13 + umulh x25, x22, x13 + adds x7, x7, x24 + adcs x8, x8, x25 + adc x9, x9, xzr + # A[3] * B[1] + mul x24, x23, x12 + umulh x25, x23, x12 + adds x7, x7, x24 + adcs x8, x8, x25 + adc x9, x9, xzr + # A[2] * B[3] + mul x24, x22, x14 + umulh x25, x22, x14 + adds x8, x8, x24 + adcs x9, x9, x25 + adc x10, xzr, xzr + # A[3] * B[2] + mul x24, x23, x13 + umulh x25, x23, x13 + adds x8, x8, x24 + adcs x9, x9, x25 + adc x10, x10, xzr + # A[3] * B[3] + mul x24, x23, x14 + umulh x25, x23, x14 + adds x9, x9, x24 + adc x10, x10, x25 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x10, x10, x9, #63 + extr x9, x9, x8, #63 + extr x8, x8, x7, #63 + extr x7, x7, x6, #63 + and x6, x6, #0x7fffffffffffffff + # Multiply top half by 19 + mov x24, #19 + mul x25, x24, x7 + umulh x7, x24, x7 + adds x3, x3, x25 + mul x25, x24, x8 + umulh x8, x24, x8 + adcs x4, x4, x25 + mul x25, x24, x9 + umulh x9, x24, x9 + adcs x5, x5, x25 + mul x25, x24, x10 + umulh x26, x24, x10 + adcs x6, x6, x25 + adc x26, x26, xzr + # Add remaining product results in + adds x4, x4, x7 + adcs x5, x5, x8 + adcs x6, x6, x9 + adc x26, x26, xzr + # Overflow + extr x26, x26, x6, #63 + mul x26, x26, x24 + and x6, x6, #0x7fffffffffffffff + adds x3, x3, x26 + adcs x4, x4, xzr + adcs x5, x5, xzr + adc x6, x6, xzr + # Reduce if top bit set + and x26, x24, x6, asr 63 + and x6, x6, #0x7fffffffffffffff + adds x3, x3, x26 + adcs x4, x4, xzr + adcs x5, x5, xzr + adc x6, x6, xzr + # Store + stp x3, x4, [x0] + stp x5, x6, [x0, #16] + ldr x0, [x29, #24] + # Multiply + # A[0] * B[0] + mul x3, x11, x15 + umulh x4, x11, x15 + # A[0] * B[1] + mul x24, x11, x16 + umulh x5, x11, x16 + adds x4, x4, x24 + adc x5, x5, xzr + # A[1] * B[0] + mul x24, x12, x15 + umulh x25, x12, x15 + adds x4, x4, x24 + adcs x5, x5, x25 + adc x6, xzr, xzr + # A[0] * B[2] + mul x24, x11, x17 + umulh x25, x11, x17 + adds x5, x5, x24 + adc x6, x6, x25 + # A[1] * B[1] + mul x24, x12, x16 + umulh x25, x12, x16 + adds x5, x5, x24 + adcs x6, x6, x25 + adc x7, xzr, xzr + # A[2] * B[0] + mul x24, x13, x15 + umulh x25, x13, x15 + adds x5, x5, x24 + adcs x6, x6, x25 + adc x7, x7, xzr + # A[0] * B[3] + mul x24, x11, x19 + umulh x25, x11, x19 + adds x6, x6, x24 + adcs x7, x7, x25 + adc x8, xzr, xzr + # A[1] * B[2] + mul x24, x12, x17 + umulh x25, x12, x17 + adds x6, x6, x24 + adcs x7, x7, x25 + adc x8, x8, xzr + # A[2] * B[1] + mul x24, x13, x16 + umulh x25, x13, x16 + adds x6, x6, x24 + adcs x7, x7, x25 + adc x8, x8, xzr + # A[3] * B[0] + mul x24, x14, x15 + umulh x25, x14, x15 + adds x6, x6, x24 + adcs x7, x7, x25 + adc x8, x8, xzr + # A[1] * B[3] + mul x24, x12, x19 + umulh x25, x12, x19 + adds x7, x7, x24 + adcs x8, x8, x25 + adc x9, xzr, xzr + # A[2] * B[2] + mul x24, x13, x17 + umulh x25, x13, x17 + adds x7, x7, x24 + adcs x8, x8, x25 + adc x9, x9, xzr + # A[3] * B[1] + mul x24, x14, x16 + umulh x25, x14, x16 + adds x7, x7, x24 + adcs x8, x8, x25 + adc x9, x9, xzr + # A[2] * B[3] + mul x24, x13, x19 + umulh x25, x13, x19 + adds x8, x8, x24 + adcs x9, x9, x25 + adc x10, xzr, xzr + # A[3] * B[2] + mul x24, x14, x17 + umulh x25, x14, x17 + adds x8, x8, x24 + adcs x9, x9, x25 + adc x10, x10, xzr + # A[3] * B[3] + mul x24, x14, x19 + umulh x25, x14, x19 + adds x9, x9, x24 + adc x10, x10, x25 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x10, x10, x9, #63 + extr x9, x9, x8, #63 + extr x8, x8, x7, #63 + extr x7, x7, x6, #63 + and x6, x6, #0x7fffffffffffffff + # Multiply top half by 19 + mov x24, #19 + mul x25, x24, x7 + umulh x7, x24, x7 + adds x3, x3, x25 + mul x25, x24, x8 + umulh x8, x24, x8 + adcs x4, x4, x25 + mul x25, x24, x9 + umulh x9, x24, x9 + adcs x5, x5, x25 + mul x25, x24, x10 + umulh x26, x24, x10 + adcs x6, x6, x25 + adc x26, x26, xzr + # Add remaining product results in + adds x4, x4, x7 + adcs x5, x5, x8 + adcs x6, x6, x9 + adc x26, x26, xzr + # Overflow + extr x26, x26, x6, #63 + mul x26, x26, x24 + and x6, x6, #0x7fffffffffffffff + adds x3, x3, x26 + adcs x4, x4, xzr + adcs x5, x5, xzr + adc x6, x6, xzr + # Reduce if top bit set + and x26, x24, x6, asr 63 + and x6, x6, #0x7fffffffffffffff + adds x3, x3, x26 + adcs x4, x4, xzr + adcs x5, x5, xzr + adc x6, x6, xzr + # Store + stp x3, x4, [x0] + stp x5, x6, [x0, #16] + ldr x17, [x29, #88] + ldr x19, [x29, #96] + ldp x20, x21, [x29, #104] + ldp x22, x23, [x29, #120] + ldp x24, x25, [x29, #136] + ldr x26, [x29, #152] + ldp x29, x30, [sp], #0xa0 + ret + .size fe_ge_to_p3,.-fe_ge_to_p3 + .text + .align 2 + .globl fe_ge_dbl + .type fe_ge_dbl, %function +fe_ge_dbl: + stp x29, x30, [sp, #-176]! + add x29, sp, #0 + str x17, [x29, #88] + str x19, [x29, #96] + stp x20, x21, [x29, #104] + stp x22, x23, [x29, #120] + stp x24, x25, [x29, #136] + stp x26, x27, [x29, #152] + str x28, [x29, #168] + str x0, [x29, #16] + str x1, [x29, #24] + str x2, [x29, #32] + str x3, [x29, #40] + str x4, [x29, #48] + str x5, [x29, #56] + str x6, [x29, #64] + ldr x1, [x29, #48] + # Square + ldp x12, x13, [x1] + ldp x14, x15, [x1, #16] + # A[0] * A[1] + mul x5, x12, x13 + umulh x6, x12, x13 + # A[0] * A[2] + mul x25, x12, x14 + umulh x7, x12, x14 + adds x6, x6, x25 + adc x7, x7, xzr + # A[0] * A[3] + mul x25, x12, x15 + umulh x8, x12, x15 + adds x7, x7, x25 + adc x8, x8, xzr + # A[1] * A[2] + mul x25, x13, x14 + umulh x26, x13, x14 + adds x7, x7, x25 + adcs x8, x8, x26 + adc x9, xzr, xzr + # A[1] * A[3] + mul x25, x13, x15 + umulh x26, x13, x15 + adds x8, x8, x25 + adc x9, x9, x26 + # A[2] * A[3] + mul x25, x14, x15 + umulh x10, x14, x15 + adds x9, x9, x25 + adc x10, x10, xzr + # Double + adds x5, x5, x5 + adcs x6, x6, x6 + adcs x7, x7, x7 + adcs x8, x8, x8 + adcs x9, x9, x9 + adcs x10, x10, x10 + adc x11, xzr, xzr + # A[0] * A[0] + mul x4, x12, x12 + umulh x27, x12, x12 + # A[1] * A[1] + mul x25, x13, x13 + umulh x26, x13, x13 + adds x5, x5, x27 + adcs x6, x6, x25 + adc x27, x26, xzr + # A[2] * A[2] + mul x25, x14, x14 + umulh x26, x14, x14 + adds x7, x7, x27 + adcs x8, x8, x25 + adc x27, x26, xzr + # A[3] * A[3] + mul x25, x15, x15 + umulh x26, x15, x15 + adds x9, x9, x27 + adcs x10, x10, x25 + adc x11, x11, x26 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x11, x11, x10, #63 + extr x10, x10, x9, #63 + extr x9, x9, x8, #63 + extr x8, x8, x7, #63 + and x7, x7, #0x7fffffffffffffff + # Multiply top half by 19 + mov x25, #19 + mul x26, x25, x8 + umulh x8, x25, x8 + adds x4, x4, x26 + mul x26, x25, x9 + umulh x9, x25, x9 + adcs x5, x5, x26 + mul x26, x25, x10 + umulh x10, x25, x10 + adcs x6, x6, x26 + mul x26, x25, x11 + umulh x27, x25, x11 + adcs x7, x7, x26 + adc x27, x27, xzr + # Add remaining product results in + adds x5, x5, x8 + adcs x6, x6, x9 + adcs x7, x7, x10 + adc x27, x27, xzr + # Overflow + extr x27, x27, x7, #63 + mul x27, x27, x25 + and x7, x7, #0x7fffffffffffffff + adds x4, x4, x27 + adcs x5, x5, xzr + adcs x6, x6, xzr + adc x7, x7, xzr + # Reduce if top bit set + and x27, x25, x7, asr 63 + and x7, x7, #0x7fffffffffffffff + adds x4, x4, x27 + adcs x5, x5, xzr + adcs x6, x6, xzr + adc x7, x7, xzr + # Store + stp x4, x5, [x0] + stp x6, x7, [x0, #16] + ldr x0, [x29, #32] + ldr x1, [x29, #56] + # Square + ldp x21, x22, [x1] + ldp x23, x24, [x1, #16] + # A[0] * A[1] + mul x9, x21, x22 + umulh x10, x21, x22 + # A[0] * A[2] + mul x25, x21, x23 + umulh x11, x21, x23 + adds x10, x10, x25 + adc x11, x11, xzr + # A[0] * A[3] + mul x25, x21, x24 + umulh x16, x21, x24 + adds x11, x11, x25 + adc x16, x16, xzr + # A[1] * A[2] + mul x25, x22, x23 + umulh x26, x22, x23 + adds x11, x11, x25 + adcs x16, x16, x26 + adc x17, xzr, xzr + # A[1] * A[3] + mul x25, x22, x24 + umulh x26, x22, x24 + adds x16, x16, x25 + adc x17, x17, x26 + # A[2] * A[3] + mul x25, x23, x24 + umulh x19, x23, x24 + adds x17, x17, x25 + adc x19, x19, xzr + # Double + adds x9, x9, x9 + adcs x10, x10, x10 + adcs x11, x11, x11 + adcs x16, x16, x16 + adcs x17, x17, x17 + adcs x19, x19, x19 + adc x20, xzr, xzr + # A[0] * A[0] + mul x8, x21, x21 + umulh x27, x21, x21 + # A[1] * A[1] + mul x25, x22, x22 + umulh x26, x22, x22 + adds x9, x9, x27 + adcs x10, x10, x25 + adc x27, x26, xzr + # A[2] * A[2] + mul x25, x23, x23 + umulh x26, x23, x23 + adds x11, x11, x27 + adcs x16, x16, x25 + adc x27, x26, xzr + # A[3] * A[3] + mul x25, x24, x24 + umulh x26, x24, x24 + adds x17, x17, x27 + adcs x19, x19, x25 + adc x20, x20, x26 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x20, x20, x19, #63 + extr x19, x19, x17, #63 + extr x17, x17, x16, #63 + extr x16, x16, x11, #63 + and x11, x11, #0x7fffffffffffffff + # Multiply top half by 19 + mov x25, #19 + mul x26, x25, x16 + umulh x16, x25, x16 + adds x8, x8, x26 + mul x26, x25, x17 + umulh x17, x25, x17 + adcs x9, x9, x26 + mul x26, x25, x19 + umulh x19, x25, x19 + adcs x10, x10, x26 + mul x26, x25, x20 + umulh x27, x25, x20 + adcs x11, x11, x26 + adc x27, x27, xzr + # Add remaining product results in + adds x9, x9, x16 + adcs x10, x10, x17 + adcs x11, x11, x19 + adc x27, x27, xzr + # Overflow + extr x27, x27, x11, #63 + mul x27, x27, x25 + and x11, x11, #0x7fffffffffffffff + adds x8, x8, x27 + adcs x9, x9, xzr + adcs x10, x10, xzr + adc x11, x11, xzr + # Reduce if top bit set + and x27, x25, x11, asr 63 + and x11, x11, #0x7fffffffffffffff + adds x8, x8, x27 + adcs x9, x9, xzr + adcs x10, x10, xzr + adc x11, x11, xzr + # Store + stp x8, x9, [x0] + stp x10, x11, [x0, #16] + ldr x0, [x29, #24] + # Add + adds x12, x12, x21 + adcs x13, x13, x22 + adcs x14, x14, x23 + adc x15, x15, x24 + mov x25, #-19 + asr x28, x15, #63 + # Mask the modulus + and x25, x28, x25 + and x26, x28, #0x7fffffffffffffff + # Sub modulus (if overflow) + subs x12, x12, x25 + sbcs x13, x13, x28 + sbcs x14, x14, x28 + sbc x15, x15, x26 + ldr x0, [x29, #40] + # Square + # A[0] * A[1] + mul x17, x12, x13 + umulh x19, x12, x13 + # A[0] * A[2] + mul x25, x12, x14 + umulh x20, x12, x14 + adds x19, x19, x25 + adc x20, x20, xzr + # A[0] * A[3] + mul x25, x12, x15 + umulh x21, x12, x15 + adds x20, x20, x25 + adc x21, x21, xzr + # A[1] * A[2] + mul x25, x13, x14 + umulh x26, x13, x14 + adds x20, x20, x25 + adcs x21, x21, x26 + adc x22, xzr, xzr + # A[1] * A[3] + mul x25, x13, x15 + umulh x26, x13, x15 + adds x21, x21, x25 + adc x22, x22, x26 + # A[2] * A[3] + mul x25, x14, x15 + umulh x23, x14, x15 + adds x22, x22, x25 + adc x23, x23, xzr + # Double + adds x17, x17, x17 + adcs x19, x19, x19 + adcs x20, x20, x20 + adcs x21, x21, x21 + adcs x22, x22, x22 + adcs x23, x23, x23 + adc x24, xzr, xzr + # A[0] * A[0] + mul x16, x12, x12 + umulh x27, x12, x12 + # A[1] * A[1] + mul x25, x13, x13 + umulh x26, x13, x13 + adds x17, x17, x27 + adcs x19, x19, x25 + adc x27, x26, xzr + # A[2] * A[2] + mul x25, x14, x14 + umulh x26, x14, x14 + adds x20, x20, x27 + adcs x21, x21, x25 + adc x27, x26, xzr + # A[3] * A[3] + mul x25, x15, x15 + umulh x26, x15, x15 + adds x22, x22, x27 + adcs x23, x23, x25 + adc x24, x24, x26 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x24, x24, x23, #63 + extr x23, x23, x22, #63 + extr x22, x22, x21, #63 + extr x21, x21, x20, #63 + and x20, x20, #0x7fffffffffffffff + # Multiply top half by 19 + mov x25, #19 + mul x26, x25, x21 + umulh x21, x25, x21 + adds x16, x16, x26 + mul x26, x25, x22 + umulh x22, x25, x22 + adcs x17, x17, x26 + mul x26, x25, x23 + umulh x23, x25, x23 + adcs x19, x19, x26 + mul x26, x25, x24 + umulh x27, x25, x24 + adcs x20, x20, x26 + adc x27, x27, xzr + # Add remaining product results in + adds x17, x17, x21 + adcs x19, x19, x22 + adcs x20, x20, x23 + adc x27, x27, xzr + # Overflow + extr x27, x27, x20, #63 + mul x27, x27, x25 + and x20, x20, #0x7fffffffffffffff + adds x16, x16, x27 + adcs x17, x17, xzr + adcs x19, x19, xzr + adc x20, x20, xzr + # Reduce if top bit set + and x27, x25, x20, asr 63 + and x20, x20, #0x7fffffffffffffff + adds x16, x16, x27 + adcs x17, x17, xzr + adcs x19, x19, xzr + adc x20, x20, xzr + # Store + stp x16, x17, [x0] + stp x19, x20, [x0, #16] + ldr x0, [x29, #24] + ldr x1, [x29, #32] + # Add + adds x12, x8, x4 + adcs x13, x9, x5 + adcs x14, x10, x6 + adc x15, x11, x7 + mov x25, #-19 + asr x28, x15, #63 + # Mask the modulus + and x25, x28, x25 + and x26, x28, #0x7fffffffffffffff + # Sub modulus (if overflow) + subs x12, x12, x25 + sbcs x13, x13, x28 + sbcs x14, x14, x28 + sbc x15, x15, x26 + # Sub + subs x21, x8, x4 + sbcs x22, x9, x5 + sbcs x23, x10, x6 + sbcs x24, x11, x7 + mov x25, #-19 + csetm x28, cc + # Mask the modulus + and x25, x28, x25 + and x26, x28, #0x7fffffffffffffff + # Add modulus (if underflow) + adds x21, x21, x25 + adcs x22, x22, x28 + adcs x23, x23, x28 + adc x24, x24, x26 + stp x12, x13, [x0] + stp x14, x15, [x0, #16] + stp x21, x22, [x1] + stp x23, x24, [x1, #16] + ldr x0, [x29, #16] + # Sub + subs x16, x16, x12 + sbcs x17, x17, x13 + sbcs x19, x19, x14 + sbcs x20, x20, x15 + mov x25, #-19 + csetm x28, cc + # Mask the modulus + and x25, x28, x25 + and x26, x28, #0x7fffffffffffffff + # Add modulus (if underflow) + adds x16, x16, x25 + adcs x17, x17, x28 + adcs x19, x19, x28 + adc x20, x20, x26 + stp x16, x17, [x0] + stp x19, x20, [x0, #16] + ldr x0, [x29, #40] + ldr x1, [x29, #64] + # Square * 2 + ldp x12, x13, [x1] + ldp x14, x15, [x1, #16] + # A[0] * A[1] + mul x5, x12, x13 + umulh x6, x12, x13 + # A[0] * A[2] + mul x25, x12, x14 + umulh x7, x12, x14 + adds x6, x6, x25 + adc x7, x7, xzr + # A[0] * A[3] + mul x25, x12, x15 + umulh x8, x12, x15 + adds x7, x7, x25 + adc x8, x8, xzr + # A[1] * A[2] + mul x25, x13, x14 + umulh x26, x13, x14 + adds x7, x7, x25 + adcs x8, x8, x26 + adc x9, xzr, xzr + # A[1] * A[3] + mul x25, x13, x15 + umulh x26, x13, x15 + adds x8, x8, x25 + adc x9, x9, x26 + # A[2] * A[3] + mul x25, x14, x15 + umulh x10, x14, x15 + adds x9, x9, x25 + adc x10, x10, xzr + # Double + adds x5, x5, x5 + adcs x6, x6, x6 + adcs x7, x7, x7 + adcs x8, x8, x8 + adcs x9, x9, x9 + adcs x10, x10, x10 + adc x11, xzr, xzr + # A[0] * A[0] + mul x4, x12, x12 + umulh x28, x12, x12 + # A[1] * A[1] + mul x25, x13, x13 + umulh x26, x13, x13 + adds x5, x5, x28 + adcs x6, x6, x25 + adc x28, x26, xzr + # A[2] * A[2] + mul x25, x14, x14 + umulh x26, x14, x14 + adds x7, x7, x28 + adcs x8, x8, x25 + adc x28, x26, xzr + # A[3] * A[3] + mul x25, x15, x15 + umulh x26, x15, x15 + adds x9, x9, x28 + adcs x10, x10, x25 + adc x11, x11, x26 + # Double and Reduce + mov x25, #0x169 + # Move top half into t4-t7 and remove top bit from t3 + lsr x28, x11, #61 + extr x11, x11, x10, #62 + extr x10, x10, x9, #62 + extr x9, x9, x8, #62 + extr x8, x8, x7, #62 + extr x7, x7, x6, #63 + extr x6, x6, x5, #63 + extr x5, x5, x4, #63 + lsl x4, x4, #1 + and x7, x7, #0x7fffffffffffffff + # Two left, only one right + and x11, x11, #0x7fffffffffffffff + # Multiply top bits by 19*19 + mul x28, x28, x25 + # Multiply top half by 19 + mov x25, #19 + mul x26, x25, x8 + umulh x8, x25, x8 + adds x4, x4, x26 + mul x26, x25, x9 + umulh x9, x25, x9 + adcs x5, x5, x26 + mul x26, x25, x10 + umulh x10, x25, x10 + adcs x6, x6, x26 + mul x26, x25, x11 + umulh x27, x25, x11 + adcs x7, x7, x26 + adc x27, x27, xzr + # Add remaining product results in + adds x4, x4, x28 + adcs x5, x5, x8 + adcs x6, x6, x9 + adcs x7, x7, x10 + adc x27, x27, xzr + # Overflow + extr x27, x27, x7, #63 + mul x27, x27, x25 + and x7, x7, #0x7fffffffffffffff + adds x4, x4, x27 + adcs x5, x5, xzr + adcs x6, x6, xzr + adc x7, x7, xzr + # Reduce if top bit set + and x27, x25, x7, asr 63 + and x7, x7, #0x7fffffffffffffff + adds x4, x4, x27 + adcs x5, x5, xzr + adcs x6, x6, xzr + adc x7, x7, xzr + # Store + ldr x0, [x29, #40] + # Sub + subs x4, x4, x21 + sbcs x5, x5, x22 + sbcs x6, x6, x23 + sbcs x7, x7, x24 + mov x25, #-19 + csetm x28, cc + # Mask the modulus + and x25, x28, x25 + and x26, x28, #0x7fffffffffffffff + # Add modulus (if underflow) + adds x4, x4, x25 + adcs x5, x5, x28 + adcs x6, x6, x28 + adc x7, x7, x26 + stp x4, x5, [x0] + stp x6, x7, [x0, #16] + ldr x17, [x29, #88] + ldr x19, [x29, #96] + ldp x20, x21, [x29, #104] + ldp x22, x23, [x29, #120] + ldp x24, x25, [x29, #136] + ldp x26, x27, [x29, #152] + ldr x28, [x29, #168] + ldp x29, x30, [sp], #0xb0 + ret + .size fe_ge_dbl,.-fe_ge_dbl + .text + .align 2 + .globl fe_ge_madd + .type fe_ge_madd, %function +fe_ge_madd: + stp x29, x30, [sp, #-176]! + add x29, sp, #0 + str x17, [x29, #88] + str x19, [x29, #96] + stp x20, x21, [x29, #104] + stp x22, x23, [x29, #120] + stp x24, x25, [x29, #136] + stp x26, x27, [x29, #152] + str x28, [x29, #168] + str x0, [x29, #16] + str x1, [x29, #24] + str x2, [x29, #32] + str x3, [x29, #40] + str x4, [x29, #48] + str x5, [x29, #56] + str x6, [x29, #64] + str x7, [x29, #72] + ldr x2, [x29, #56] + ldr x3, [x29, #48] + # Add + ldp x12, x13, [x2] + ldp x14, x15, [x2, #16] + ldp x16, x17, [x3] + ldp x19, x20, [x3, #16] + adds x4, x12, x16 + adcs x5, x13, x17 + adcs x6, x14, x19 + adc x7, x15, x20 + mov x25, #-19 + asr x28, x7, #63 + # Mask the modulus + and x25, x28, x25 + and x26, x28, #0x7fffffffffffffff + # Sub modulus (if overflow) + subs x4, x4, x25 + sbcs x5, x5, x28 + sbcs x6, x6, x28 + sbc x7, x7, x26 + # Sub + subs x8, x12, x16 + sbcs x9, x13, x17 + sbcs x10, x14, x19 + sbcs x11, x15, x20 + mov x25, #-19 + csetm x28, cc + # Mask the modulus + and x25, x28, x25 + and x26, x28, #0x7fffffffffffffff + # Add modulus (if underflow) + adds x8, x8, x25 + adcs x9, x9, x28 + adcs x10, x10, x28 + adc x11, x11, x26 + ldr x0, [x29, #32] + ldr x2, [x29, #184] + # Multiply + ldp x21, x22, [x2] + ldp x23, x24, [x2, #16] + # A[0] * B[0] + mul x12, x4, x21 + umulh x13, x4, x21 + # A[0] * B[1] + mul x25, x4, x22 + umulh x14, x4, x22 + adds x13, x13, x25 + adc x14, x14, xzr + # A[1] * B[0] + mul x25, x5, x21 + umulh x26, x5, x21 + adds x13, x13, x25 + adcs x14, x14, x26 + adc x15, xzr, xzr + # A[0] * B[2] + mul x25, x4, x23 + umulh x26, x4, x23 + adds x14, x14, x25 + adc x15, x15, x26 + # A[1] * B[1] + mul x25, x5, x22 + umulh x26, x5, x22 + adds x14, x14, x25 + adcs x15, x15, x26 + adc x16, xzr, xzr + # A[2] * B[0] + mul x25, x6, x21 + umulh x26, x6, x21 + adds x14, x14, x25 + adcs x15, x15, x26 + adc x16, x16, xzr + # A[0] * B[3] + mul x25, x4, x24 + umulh x26, x4, x24 + adds x15, x15, x25 + adcs x16, x16, x26 + adc x17, xzr, xzr + # A[1] * B[2] + mul x25, x5, x23 + umulh x26, x5, x23 + adds x15, x15, x25 + adcs x16, x16, x26 + adc x17, x17, xzr + # A[2] * B[1] + mul x25, x6, x22 + umulh x26, x6, x22 + adds x15, x15, x25 + adcs x16, x16, x26 + adc x17, x17, xzr + # A[3] * B[0] + mul x25, x7, x21 + umulh x26, x7, x21 + adds x15, x15, x25 + adcs x16, x16, x26 + adc x17, x17, xzr + # A[1] * B[3] + mul x25, x5, x24 + umulh x26, x5, x24 + adds x16, x16, x25 + adcs x17, x17, x26 + adc x19, xzr, xzr + # A[2] * B[2] + mul x25, x6, x23 + umulh x26, x6, x23 + adds x16, x16, x25 + adcs x17, x17, x26 + adc x19, x19, xzr + # A[3] * B[1] + mul x25, x7, x22 + umulh x26, x7, x22 + adds x16, x16, x25 + adcs x17, x17, x26 + adc x19, x19, xzr + # A[2] * B[3] + mul x25, x6, x24 + umulh x26, x6, x24 + adds x17, x17, x25 + adcs x19, x19, x26 + adc x20, xzr, xzr + # A[3] * B[2] + mul x25, x7, x23 + umulh x26, x7, x23 + adds x17, x17, x25 + adcs x19, x19, x26 + adc x20, x20, xzr + # A[3] * B[3] + mul x25, x7, x24 + umulh x26, x7, x24 + adds x19, x19, x25 + adc x20, x20, x26 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x20, x20, x19, #63 + extr x19, x19, x17, #63 + extr x17, x17, x16, #63 + extr x16, x16, x15, #63 + and x15, x15, #0x7fffffffffffffff + # Multiply top half by 19 + mov x25, #19 + mul x26, x25, x16 + umulh x16, x25, x16 + adds x12, x12, x26 + mul x26, x25, x17 + umulh x17, x25, x17 + adcs x13, x13, x26 + mul x26, x25, x19 + umulh x19, x25, x19 + adcs x14, x14, x26 + mul x26, x25, x20 + umulh x27, x25, x20 + adcs x15, x15, x26 + adc x27, x27, xzr + # Add remaining product results in + adds x13, x13, x16 + adcs x14, x14, x17 + adcs x15, x15, x19 + adc x27, x27, xzr + # Overflow + extr x27, x27, x15, #63 + mul x27, x27, x25 + and x15, x15, #0x7fffffffffffffff + adds x12, x12, x27 + adcs x13, x13, xzr + adcs x14, x14, xzr + adc x15, x15, xzr + # Reduce if top bit set + and x27, x25, x15, asr 63 + and x15, x15, #0x7fffffffffffffff + adds x12, x12, x27 + adcs x13, x13, xzr + adcs x14, x14, xzr + adc x15, x15, xzr + # Store + ldr x0, [x29, #24] + ldr x1, [x29, #192] + # Multiply + ldp x21, x22, [x1] + ldp x23, x24, [x1, #16] + # A[0] * B[0] + mul x4, x8, x21 + umulh x5, x8, x21 + # A[0] * B[1] + mul x25, x8, x22 + umulh x6, x8, x22 + adds x5, x5, x25 + adc x6, x6, xzr + # A[1] * B[0] + mul x25, x9, x21 + umulh x26, x9, x21 + adds x5, x5, x25 + adcs x6, x6, x26 + adc x7, xzr, xzr + # A[0] * B[2] + mul x25, x8, x23 + umulh x26, x8, x23 + adds x6, x6, x25 + adc x7, x7, x26 + # A[1] * B[1] + mul x25, x9, x22 + umulh x26, x9, x22 + adds x6, x6, x25 + adcs x7, x7, x26 + adc x16, xzr, xzr + # A[2] * B[0] + mul x25, x10, x21 + umulh x26, x10, x21 + adds x6, x6, x25 + adcs x7, x7, x26 + adc x16, x16, xzr + # A[0] * B[3] + mul x25, x8, x24 + umulh x26, x8, x24 + adds x7, x7, x25 + adcs x16, x16, x26 + adc x17, xzr, xzr + # A[1] * B[2] + mul x25, x9, x23 + umulh x26, x9, x23 + adds x7, x7, x25 + adcs x16, x16, x26 + adc x17, x17, xzr + # A[2] * B[1] + mul x25, x10, x22 + umulh x26, x10, x22 + adds x7, x7, x25 + adcs x16, x16, x26 + adc x17, x17, xzr + # A[3] * B[0] + mul x25, x11, x21 + umulh x26, x11, x21 + adds x7, x7, x25 + adcs x16, x16, x26 + adc x17, x17, xzr + # A[1] * B[3] + mul x25, x9, x24 + umulh x26, x9, x24 + adds x16, x16, x25 + adcs x17, x17, x26 + adc x19, xzr, xzr + # A[2] * B[2] + mul x25, x10, x23 + umulh x26, x10, x23 + adds x16, x16, x25 + adcs x17, x17, x26 + adc x19, x19, xzr + # A[3] * B[1] + mul x25, x11, x22 + umulh x26, x11, x22 + adds x16, x16, x25 + adcs x17, x17, x26 + adc x19, x19, xzr + # A[2] * B[3] + mul x25, x10, x24 + umulh x26, x10, x24 + adds x17, x17, x25 + adcs x19, x19, x26 + adc x20, xzr, xzr + # A[3] * B[2] + mul x25, x11, x23 + umulh x26, x11, x23 + adds x17, x17, x25 + adcs x19, x19, x26 + adc x20, x20, xzr + # A[3] * B[3] + mul x25, x11, x24 + umulh x26, x11, x24 + adds x19, x19, x25 + adc x20, x20, x26 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x20, x20, x19, #63 + extr x19, x19, x17, #63 + extr x17, x17, x16, #63 + extr x16, x16, x7, #63 + and x7, x7, #0x7fffffffffffffff + # Multiply top half by 19 + mov x25, #19 + mul x26, x25, x16 + umulh x16, x25, x16 + adds x4, x4, x26 + mul x26, x25, x17 + umulh x17, x25, x17 + adcs x5, x5, x26 + mul x26, x25, x19 + umulh x19, x25, x19 + adcs x6, x6, x26 + mul x26, x25, x20 + umulh x27, x25, x20 + adcs x7, x7, x26 + adc x27, x27, xzr + # Add remaining product results in + adds x5, x5, x16 + adcs x6, x6, x17 + adcs x7, x7, x19 + adc x27, x27, xzr + # Overflow + extr x27, x27, x7, #63 + mul x27, x27, x25 + and x7, x7, #0x7fffffffffffffff + adds x4, x4, x27 + adcs x5, x5, xzr + adcs x6, x6, xzr + adc x7, x7, xzr + # Reduce if top bit set + and x27, x25, x7, asr 63 + and x7, x7, #0x7fffffffffffffff + adds x4, x4, x27 + adcs x5, x5, xzr + adcs x6, x6, xzr + adc x7, x7, xzr + # Store + ldr x0, [x29, #24] + ldr x1, [x29, #16] + # Add + adds x8, x12, x4 + adcs x9, x13, x5 + adcs x10, x14, x6 + adc x11, x15, x7 + mov x25, #-19 + asr x28, x11, #63 + # Mask the modulus + and x25, x28, x25 + and x26, x28, #0x7fffffffffffffff + # Sub modulus (if overflow) + subs x8, x8, x25 + sbcs x9, x9, x28 + sbcs x10, x10, x28 + sbc x11, x11, x26 + # Sub + subs x16, x12, x4 + sbcs x17, x13, x5 + sbcs x19, x14, x6 + sbcs x20, x15, x7 + mov x25, #-19 + csetm x28, cc + # Mask the modulus + and x25, x28, x25 + and x26, x28, #0x7fffffffffffffff + # Add modulus (if underflow) + adds x16, x16, x25 + adcs x17, x17, x28 + adcs x19, x19, x28 + adc x20, x20, x26 + stp x8, x9, [x0] + stp x10, x11, [x0, #16] + stp x16, x17, [x1] + stp x19, x20, [x1, #16] + ldr x0, [x29, #40] + ldr x1, [x29, #176] + ldr x3, [x29, #72] + # Multiply + ldp x16, x17, [x1] + ldp x19, x20, [x1, #16] + ldp x21, x22, [x3] + ldp x23, x24, [x3, #16] + # A[0] * B[0] + mul x4, x16, x21 + umulh x5, x16, x21 + # A[0] * B[1] + mul x25, x16, x22 + umulh x6, x16, x22 + adds x5, x5, x25 + adc x6, x6, xzr + # A[1] * B[0] + mul x25, x17, x21 + umulh x26, x17, x21 + adds x5, x5, x25 + adcs x6, x6, x26 + adc x7, xzr, xzr + # A[0] * B[2] + mul x25, x16, x23 + umulh x26, x16, x23 + adds x6, x6, x25 + adc x7, x7, x26 + # A[1] * B[1] + mul x25, x17, x22 + umulh x26, x17, x22 + adds x6, x6, x25 + adcs x7, x7, x26 + adc x8, xzr, xzr + # A[2] * B[0] + mul x25, x19, x21 + umulh x26, x19, x21 + adds x6, x6, x25 + adcs x7, x7, x26 + adc x8, x8, xzr + # A[0] * B[3] + mul x25, x16, x24 + umulh x26, x16, x24 + adds x7, x7, x25 + adcs x8, x8, x26 + adc x9, xzr, xzr + # A[1] * B[2] + mul x25, x17, x23 + umulh x26, x17, x23 + adds x7, x7, x25 + adcs x8, x8, x26 + adc x9, x9, xzr + # A[2] * B[1] + mul x25, x19, x22 + umulh x26, x19, x22 + adds x7, x7, x25 + adcs x8, x8, x26 + adc x9, x9, xzr + # A[3] * B[0] + mul x25, x20, x21 + umulh x26, x20, x21 + adds x7, x7, x25 + adcs x8, x8, x26 + adc x9, x9, xzr + # A[1] * B[3] + mul x25, x17, x24 + umulh x26, x17, x24 + adds x8, x8, x25 + adcs x9, x9, x26 + adc x10, xzr, xzr + # A[2] * B[2] + mul x25, x19, x23 + umulh x26, x19, x23 + adds x8, x8, x25 + adcs x9, x9, x26 + adc x10, x10, xzr + # A[3] * B[1] + mul x25, x20, x22 + umulh x26, x20, x22 + adds x8, x8, x25 + adcs x9, x9, x26 + adc x10, x10, xzr + # A[2] * B[3] + mul x25, x19, x24 + umulh x26, x19, x24 + adds x9, x9, x25 + adcs x10, x10, x26 + adc x11, xzr, xzr + # A[3] * B[2] + mul x25, x20, x23 + umulh x26, x20, x23 + adds x9, x9, x25 + adcs x10, x10, x26 + adc x11, x11, xzr + # A[3] * B[3] + mul x25, x20, x24 + umulh x26, x20, x24 + adds x10, x10, x25 + adc x11, x11, x26 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x11, x11, x10, #63 + extr x10, x10, x9, #63 + extr x9, x9, x8, #63 + extr x8, x8, x7, #63 + and x7, x7, #0x7fffffffffffffff + # Multiply top half by 19 + mov x25, #19 + mul x26, x25, x8 + umulh x8, x25, x8 + adds x4, x4, x26 + mul x26, x25, x9 + umulh x9, x25, x9 + adcs x5, x5, x26 + mul x26, x25, x10 + umulh x10, x25, x10 + adcs x6, x6, x26 + mul x26, x25, x11 + umulh x27, x25, x11 + adcs x7, x7, x26 + adc x27, x27, xzr + # Add remaining product results in + adds x5, x5, x8 + adcs x6, x6, x9 + adcs x7, x7, x10 + adc x27, x27, xzr + # Overflow + extr x27, x27, x7, #63 + mul x27, x27, x25 + and x7, x7, #0x7fffffffffffffff + adds x4, x4, x27 + adcs x5, x5, xzr + adcs x6, x6, xzr + adc x7, x7, xzr + # Reduce if top bit set + and x27, x25, x7, asr 63 + and x7, x7, #0x7fffffffffffffff + adds x4, x4, x27 + adcs x5, x5, xzr + adcs x6, x6, xzr + adc x7, x7, xzr + # Store + ldr x0, [x29, #32] + ldr x1, [x29, #64] + # Double + ldp x8, x9, [x1] + ldp x10, x11, [x1, #16] + adds x8, x8, x8 + adcs x9, x9, x9 + adcs x10, x10, x10 + adc x11, x11, x11 + mov x25, #-19 + asr x28, x11, #63 + # Mask the modulus + and x25, x28, x25 + and x26, x28, #0x7fffffffffffffff + # Sub modulus (if overflow) + subs x8, x8, x25 + sbcs x9, x9, x28 + sbcs x10, x10, x28 + sbc x11, x11, x26 + ldr x1, [x29, #40] + # Add + adds x12, x8, x4 + adcs x13, x9, x5 + adcs x14, x10, x6 + adc x15, x11, x7 + mov x25, #-19 + asr x28, x15, #63 + # Mask the modulus + and x25, x28, x25 + and x26, x28, #0x7fffffffffffffff + # Sub modulus (if overflow) + subs x12, x12, x25 + sbcs x13, x13, x28 + sbcs x14, x14, x28 + sbc x15, x15, x26 + # Sub + subs x16, x8, x4 + sbcs x17, x9, x5 + sbcs x19, x10, x6 + sbcs x20, x11, x7 + mov x25, #-19 + csetm x28, cc + # Mask the modulus + and x25, x28, x25 + and x26, x28, #0x7fffffffffffffff + # Add modulus (if underflow) + adds x16, x16, x25 + adcs x17, x17, x28 + adcs x19, x19, x28 + adc x20, x20, x26 + stp x12, x13, [x0] + stp x14, x15, [x0, #16] + stp x16, x17, [x1] + stp x19, x20, [x1, #16] + ldr x17, [x29, #88] + ldr x19, [x29, #96] + ldp x20, x21, [x29, #104] + ldp x22, x23, [x29, #120] + ldp x24, x25, [x29, #136] + ldp x26, x27, [x29, #152] + ldr x28, [x29, #168] + ldp x29, x30, [sp], #0xb0 + ret + .size fe_ge_madd,.-fe_ge_madd + .text + .align 2 + .globl fe_ge_msub + .type fe_ge_msub, %function +fe_ge_msub: + stp x29, x30, [sp, #-176]! + add x29, sp, #0 + str x17, [x29, #88] + str x19, [x29, #96] + stp x20, x21, [x29, #104] + stp x22, x23, [x29, #120] + stp x24, x25, [x29, #136] + stp x26, x27, [x29, #152] + str x28, [x29, #168] + str x0, [x29, #16] + str x1, [x29, #24] + str x2, [x29, #32] + str x3, [x29, #40] + str x4, [x29, #48] + str x5, [x29, #56] + str x6, [x29, #64] + str x7, [x29, #72] + ldr x2, [x29, #56] + ldr x3, [x29, #48] + # Add + ldp x12, x13, [x2] + ldp x14, x15, [x2, #16] + ldp x16, x17, [x3] + ldp x19, x20, [x3, #16] + adds x4, x12, x16 + adcs x5, x13, x17 + adcs x6, x14, x19 + adc x7, x15, x20 + mov x25, #-19 + asr x28, x7, #63 + # Mask the modulus + and x25, x28, x25 + and x26, x28, #0x7fffffffffffffff + # Sub modulus (if overflow) + subs x4, x4, x25 + sbcs x5, x5, x28 + sbcs x6, x6, x28 + sbc x7, x7, x26 + # Sub + subs x8, x12, x16 + sbcs x9, x13, x17 + sbcs x10, x14, x19 + sbcs x11, x15, x20 + mov x25, #-19 + csetm x28, cc + # Mask the modulus + and x25, x28, x25 + and x26, x28, #0x7fffffffffffffff + # Add modulus (if underflow) + adds x8, x8, x25 + adcs x9, x9, x28 + adcs x10, x10, x28 + adc x11, x11, x26 + ldr x0, [x29, #32] + ldr x2, [x29, #192] + # Multiply + ldp x21, x22, [x2] + ldp x23, x24, [x2, #16] + # A[0] * B[0] + mul x12, x4, x21 + umulh x13, x4, x21 + # A[0] * B[1] + mul x25, x4, x22 + umulh x14, x4, x22 + adds x13, x13, x25 + adc x14, x14, xzr + # A[1] * B[0] + mul x25, x5, x21 + umulh x26, x5, x21 + adds x13, x13, x25 + adcs x14, x14, x26 + adc x15, xzr, xzr + # A[0] * B[2] + mul x25, x4, x23 + umulh x26, x4, x23 + adds x14, x14, x25 + adc x15, x15, x26 + # A[1] * B[1] + mul x25, x5, x22 + umulh x26, x5, x22 + adds x14, x14, x25 + adcs x15, x15, x26 + adc x16, xzr, xzr + # A[2] * B[0] + mul x25, x6, x21 + umulh x26, x6, x21 + adds x14, x14, x25 + adcs x15, x15, x26 + adc x16, x16, xzr + # A[0] * B[3] + mul x25, x4, x24 + umulh x26, x4, x24 + adds x15, x15, x25 + adcs x16, x16, x26 + adc x17, xzr, xzr + # A[1] * B[2] + mul x25, x5, x23 + umulh x26, x5, x23 + adds x15, x15, x25 + adcs x16, x16, x26 + adc x17, x17, xzr + # A[2] * B[1] + mul x25, x6, x22 + umulh x26, x6, x22 + adds x15, x15, x25 + adcs x16, x16, x26 + adc x17, x17, xzr + # A[3] * B[0] + mul x25, x7, x21 + umulh x26, x7, x21 + adds x15, x15, x25 + adcs x16, x16, x26 + adc x17, x17, xzr + # A[1] * B[3] + mul x25, x5, x24 + umulh x26, x5, x24 + adds x16, x16, x25 + adcs x17, x17, x26 + adc x19, xzr, xzr + # A[2] * B[2] + mul x25, x6, x23 + umulh x26, x6, x23 + adds x16, x16, x25 + adcs x17, x17, x26 + adc x19, x19, xzr + # A[3] * B[1] + mul x25, x7, x22 + umulh x26, x7, x22 + adds x16, x16, x25 + adcs x17, x17, x26 + adc x19, x19, xzr + # A[2] * B[3] + mul x25, x6, x24 + umulh x26, x6, x24 + adds x17, x17, x25 + adcs x19, x19, x26 + adc x20, xzr, xzr + # A[3] * B[2] + mul x25, x7, x23 + umulh x26, x7, x23 + adds x17, x17, x25 + adcs x19, x19, x26 + adc x20, x20, xzr + # A[3] * B[3] + mul x25, x7, x24 + umulh x26, x7, x24 + adds x19, x19, x25 + adc x20, x20, x26 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x20, x20, x19, #63 + extr x19, x19, x17, #63 + extr x17, x17, x16, #63 + extr x16, x16, x15, #63 + and x15, x15, #0x7fffffffffffffff + # Multiply top half by 19 + mov x25, #19 + mul x26, x25, x16 + umulh x16, x25, x16 + adds x12, x12, x26 + mul x26, x25, x17 + umulh x17, x25, x17 + adcs x13, x13, x26 + mul x26, x25, x19 + umulh x19, x25, x19 + adcs x14, x14, x26 + mul x26, x25, x20 + umulh x27, x25, x20 + adcs x15, x15, x26 + adc x27, x27, xzr + # Add remaining product results in + adds x13, x13, x16 + adcs x14, x14, x17 + adcs x15, x15, x19 + adc x27, x27, xzr + # Overflow + extr x27, x27, x15, #63 + mul x27, x27, x25 + and x15, x15, #0x7fffffffffffffff + adds x12, x12, x27 + adcs x13, x13, xzr + adcs x14, x14, xzr + adc x15, x15, xzr + # Reduce if top bit set + and x27, x25, x15, asr 63 + and x15, x15, #0x7fffffffffffffff + adds x12, x12, x27 + adcs x13, x13, xzr + adcs x14, x14, xzr + adc x15, x15, xzr + # Store + ldr x0, [x29, #24] + ldr x1, [x29, #184] + # Multiply + ldp x21, x22, [x1] + ldp x23, x24, [x1, #16] + # A[0] * B[0] + mul x4, x8, x21 + umulh x5, x8, x21 + # A[0] * B[1] + mul x25, x8, x22 + umulh x6, x8, x22 + adds x5, x5, x25 + adc x6, x6, xzr + # A[1] * B[0] + mul x25, x9, x21 + umulh x26, x9, x21 + adds x5, x5, x25 + adcs x6, x6, x26 + adc x7, xzr, xzr + # A[0] * B[2] + mul x25, x8, x23 + umulh x26, x8, x23 + adds x6, x6, x25 + adc x7, x7, x26 + # A[1] * B[1] + mul x25, x9, x22 + umulh x26, x9, x22 + adds x6, x6, x25 + adcs x7, x7, x26 + adc x16, xzr, xzr + # A[2] * B[0] + mul x25, x10, x21 + umulh x26, x10, x21 + adds x6, x6, x25 + adcs x7, x7, x26 + adc x16, x16, xzr + # A[0] * B[3] + mul x25, x8, x24 + umulh x26, x8, x24 + adds x7, x7, x25 + adcs x16, x16, x26 + adc x17, xzr, xzr + # A[1] * B[2] + mul x25, x9, x23 + umulh x26, x9, x23 + adds x7, x7, x25 + adcs x16, x16, x26 + adc x17, x17, xzr + # A[2] * B[1] + mul x25, x10, x22 + umulh x26, x10, x22 + adds x7, x7, x25 + adcs x16, x16, x26 + adc x17, x17, xzr + # A[3] * B[0] + mul x25, x11, x21 + umulh x26, x11, x21 + adds x7, x7, x25 + adcs x16, x16, x26 + adc x17, x17, xzr + # A[1] * B[3] + mul x25, x9, x24 + umulh x26, x9, x24 + adds x16, x16, x25 + adcs x17, x17, x26 + adc x19, xzr, xzr + # A[2] * B[2] + mul x25, x10, x23 + umulh x26, x10, x23 + adds x16, x16, x25 + adcs x17, x17, x26 + adc x19, x19, xzr + # A[3] * B[1] + mul x25, x11, x22 + umulh x26, x11, x22 + adds x16, x16, x25 + adcs x17, x17, x26 + adc x19, x19, xzr + # A[2] * B[3] + mul x25, x10, x24 + umulh x26, x10, x24 + adds x17, x17, x25 + adcs x19, x19, x26 + adc x20, xzr, xzr + # A[3] * B[2] + mul x25, x11, x23 + umulh x26, x11, x23 + adds x17, x17, x25 + adcs x19, x19, x26 + adc x20, x20, xzr + # A[3] * B[3] + mul x25, x11, x24 + umulh x26, x11, x24 + adds x19, x19, x25 + adc x20, x20, x26 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x20, x20, x19, #63 + extr x19, x19, x17, #63 + extr x17, x17, x16, #63 + extr x16, x16, x7, #63 + and x7, x7, #0x7fffffffffffffff + # Multiply top half by 19 + mov x25, #19 + mul x26, x25, x16 + umulh x16, x25, x16 + adds x4, x4, x26 + mul x26, x25, x17 + umulh x17, x25, x17 + adcs x5, x5, x26 + mul x26, x25, x19 + umulh x19, x25, x19 + adcs x6, x6, x26 + mul x26, x25, x20 + umulh x27, x25, x20 + adcs x7, x7, x26 + adc x27, x27, xzr + # Add remaining product results in + adds x5, x5, x16 + adcs x6, x6, x17 + adcs x7, x7, x19 + adc x27, x27, xzr + # Overflow + extr x27, x27, x7, #63 + mul x27, x27, x25 + and x7, x7, #0x7fffffffffffffff + adds x4, x4, x27 + adcs x5, x5, xzr + adcs x6, x6, xzr + adc x7, x7, xzr + # Reduce if top bit set + and x27, x25, x7, asr 63 + and x7, x7, #0x7fffffffffffffff + adds x4, x4, x27 + adcs x5, x5, xzr + adcs x6, x6, xzr + adc x7, x7, xzr + # Store + ldr x0, [x29, #24] + ldr x1, [x29, #16] + # Add + adds x8, x12, x4 + adcs x9, x13, x5 + adcs x10, x14, x6 + adc x11, x15, x7 + mov x25, #-19 + asr x28, x11, #63 + # Mask the modulus + and x25, x28, x25 + and x26, x28, #0x7fffffffffffffff + # Sub modulus (if overflow) + subs x8, x8, x25 + sbcs x9, x9, x28 + sbcs x10, x10, x28 + sbc x11, x11, x26 + # Sub + subs x16, x12, x4 + sbcs x17, x13, x5 + sbcs x19, x14, x6 + sbcs x20, x15, x7 + mov x25, #-19 + csetm x28, cc + # Mask the modulus + and x25, x28, x25 + and x26, x28, #0x7fffffffffffffff + # Add modulus (if underflow) + adds x16, x16, x25 + adcs x17, x17, x28 + adcs x19, x19, x28 + adc x20, x20, x26 + stp x8, x9, [x0] + stp x10, x11, [x0, #16] + stp x16, x17, [x1] + stp x19, x20, [x1, #16] + ldr x0, [x29, #40] + ldr x1, [x29, #176] + ldr x3, [x29, #72] + # Multiply + ldp x16, x17, [x1] + ldp x19, x20, [x1, #16] + ldp x21, x22, [x3] + ldp x23, x24, [x3, #16] + # A[0] * B[0] + mul x4, x16, x21 + umulh x5, x16, x21 + # A[0] * B[1] + mul x25, x16, x22 + umulh x6, x16, x22 + adds x5, x5, x25 + adc x6, x6, xzr + # A[1] * B[0] + mul x25, x17, x21 + umulh x26, x17, x21 + adds x5, x5, x25 + adcs x6, x6, x26 + adc x7, xzr, xzr + # A[0] * B[2] + mul x25, x16, x23 + umulh x26, x16, x23 + adds x6, x6, x25 + adc x7, x7, x26 + # A[1] * B[1] + mul x25, x17, x22 + umulh x26, x17, x22 + adds x6, x6, x25 + adcs x7, x7, x26 + adc x8, xzr, xzr + # A[2] * B[0] + mul x25, x19, x21 + umulh x26, x19, x21 + adds x6, x6, x25 + adcs x7, x7, x26 + adc x8, x8, xzr + # A[0] * B[3] + mul x25, x16, x24 + umulh x26, x16, x24 + adds x7, x7, x25 + adcs x8, x8, x26 + adc x9, xzr, xzr + # A[1] * B[2] + mul x25, x17, x23 + umulh x26, x17, x23 + adds x7, x7, x25 + adcs x8, x8, x26 + adc x9, x9, xzr + # A[2] * B[1] + mul x25, x19, x22 + umulh x26, x19, x22 + adds x7, x7, x25 + adcs x8, x8, x26 + adc x9, x9, xzr + # A[3] * B[0] + mul x25, x20, x21 + umulh x26, x20, x21 + adds x7, x7, x25 + adcs x8, x8, x26 + adc x9, x9, xzr + # A[1] * B[3] + mul x25, x17, x24 + umulh x26, x17, x24 + adds x8, x8, x25 + adcs x9, x9, x26 + adc x10, xzr, xzr + # A[2] * B[2] + mul x25, x19, x23 + umulh x26, x19, x23 + adds x8, x8, x25 + adcs x9, x9, x26 + adc x10, x10, xzr + # A[3] * B[1] + mul x25, x20, x22 + umulh x26, x20, x22 + adds x8, x8, x25 + adcs x9, x9, x26 + adc x10, x10, xzr + # A[2] * B[3] + mul x25, x19, x24 + umulh x26, x19, x24 + adds x9, x9, x25 + adcs x10, x10, x26 + adc x11, xzr, xzr + # A[3] * B[2] + mul x25, x20, x23 + umulh x26, x20, x23 + adds x9, x9, x25 + adcs x10, x10, x26 + adc x11, x11, xzr + # A[3] * B[3] + mul x25, x20, x24 + umulh x26, x20, x24 + adds x10, x10, x25 + adc x11, x11, x26 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x11, x11, x10, #63 + extr x10, x10, x9, #63 + extr x9, x9, x8, #63 + extr x8, x8, x7, #63 + and x7, x7, #0x7fffffffffffffff + # Multiply top half by 19 + mov x25, #19 + mul x26, x25, x8 + umulh x8, x25, x8 + adds x4, x4, x26 + mul x26, x25, x9 + umulh x9, x25, x9 + adcs x5, x5, x26 + mul x26, x25, x10 + umulh x10, x25, x10 + adcs x6, x6, x26 + mul x26, x25, x11 + umulh x27, x25, x11 + adcs x7, x7, x26 + adc x27, x27, xzr + # Add remaining product results in + adds x5, x5, x8 + adcs x6, x6, x9 + adcs x7, x7, x10 + adc x27, x27, xzr + # Overflow + extr x27, x27, x7, #63 + mul x27, x27, x25 + and x7, x7, #0x7fffffffffffffff + adds x4, x4, x27 + adcs x5, x5, xzr + adcs x6, x6, xzr + adc x7, x7, xzr + # Reduce if top bit set + and x27, x25, x7, asr 63 + and x7, x7, #0x7fffffffffffffff + adds x4, x4, x27 + adcs x5, x5, xzr + adcs x6, x6, xzr + adc x7, x7, xzr + # Store + ldr x0, [x29, #32] + ldr x1, [x29, #64] + # Double + ldp x8, x9, [x1] + ldp x10, x11, [x1, #16] + adds x8, x8, x8 + adcs x9, x9, x9 + adcs x10, x10, x10 + adc x11, x11, x11 + mov x25, #-19 + asr x28, x11, #63 + # Mask the modulus + and x25, x28, x25 + and x26, x28, #0x7fffffffffffffff + # Sub modulus (if overflow) + subs x8, x8, x25 + sbcs x9, x9, x28 + sbcs x10, x10, x28 + sbc x11, x11, x26 + ldr x1, [x29, #40] + # Add + adds x12, x8, x4 + adcs x13, x9, x5 + adcs x14, x10, x6 + adc x15, x11, x7 + mov x25, #-19 + asr x28, x15, #63 + # Mask the modulus + and x25, x28, x25 + and x26, x28, #0x7fffffffffffffff + # Sub modulus (if overflow) + subs x12, x12, x25 + sbcs x13, x13, x28 + sbcs x14, x14, x28 + sbc x15, x15, x26 + # Sub + subs x16, x8, x4 + sbcs x17, x9, x5 + sbcs x19, x10, x6 + sbcs x20, x11, x7 + mov x25, #-19 + csetm x28, cc + # Mask the modulus + and x25, x28, x25 + and x26, x28, #0x7fffffffffffffff + # Add modulus (if underflow) + adds x16, x16, x25 + adcs x17, x17, x28 + adcs x19, x19, x28 + adc x20, x20, x26 + stp x12, x13, [x1] + stp x14, x15, [x1, #16] + stp x16, x17, [x0] + stp x19, x20, [x0, #16] + ldr x17, [x29, #88] + ldr x19, [x29, #96] + ldp x20, x21, [x29, #104] + ldp x22, x23, [x29, #120] + ldp x24, x25, [x29, #136] + ldp x26, x27, [x29, #152] + ldr x28, [x29, #168] + ldp x29, x30, [sp], #0xb0 + ret + .size fe_ge_msub,.-fe_ge_msub + .text + .align 2 + .globl fe_ge_add + .type fe_ge_add, %function +fe_ge_add: + stp x29, x30, [sp, #-176]! + add x29, sp, #0 + str x17, [x29, #88] + str x19, [x29, #96] + stp x20, x21, [x29, #104] + stp x22, x23, [x29, #120] + stp x24, x25, [x29, #136] + stp x26, x27, [x29, #152] + str x28, [x29, #168] + str x0, [x29, #16] + str x1, [x29, #24] + str x2, [x29, #32] + str x3, [x29, #40] + str x4, [x29, #48] + str x5, [x29, #56] + str x6, [x29, #64] + str x7, [x29, #72] + ldr x2, [x29, #56] + ldr x3, [x29, #48] + # Add + ldp x12, x13, [x2] + ldp x14, x15, [x2, #16] + ldp x16, x17, [x3] + ldp x19, x20, [x3, #16] + adds x4, x12, x16 + adcs x5, x13, x17 + adcs x6, x14, x19 + adc x7, x15, x20 + mov x25, #-19 + asr x28, x7, #63 + # Mask the modulus + and x25, x28, x25 + and x26, x28, #0x7fffffffffffffff + # Sub modulus (if overflow) + subs x4, x4, x25 + sbcs x5, x5, x28 + sbcs x6, x6, x28 + sbc x7, x7, x26 + # Sub + subs x8, x12, x16 + sbcs x9, x13, x17 + sbcs x10, x14, x19 + sbcs x11, x15, x20 + mov x25, #-19 + csetm x28, cc + # Mask the modulus + and x25, x28, x25 + and x26, x28, #0x7fffffffffffffff + # Add modulus (if underflow) + adds x8, x8, x25 + adcs x9, x9, x28 + adcs x10, x10, x28 + adc x11, x11, x26 + ldr x0, [x29, #32] + ldr x2, [x29, #192] + # Multiply + ldp x21, x22, [x2] + ldp x23, x24, [x2, #16] + # A[0] * B[0] + mul x12, x4, x21 + umulh x13, x4, x21 + # A[0] * B[1] + mul x25, x4, x22 + umulh x14, x4, x22 + adds x13, x13, x25 + adc x14, x14, xzr + # A[1] * B[0] + mul x25, x5, x21 + umulh x26, x5, x21 + adds x13, x13, x25 + adcs x14, x14, x26 + adc x15, xzr, xzr + # A[0] * B[2] + mul x25, x4, x23 + umulh x26, x4, x23 + adds x14, x14, x25 + adc x15, x15, x26 + # A[1] * B[1] + mul x25, x5, x22 + umulh x26, x5, x22 + adds x14, x14, x25 + adcs x15, x15, x26 + adc x16, xzr, xzr + # A[2] * B[0] + mul x25, x6, x21 + umulh x26, x6, x21 + adds x14, x14, x25 + adcs x15, x15, x26 + adc x16, x16, xzr + # A[0] * B[3] + mul x25, x4, x24 + umulh x26, x4, x24 + adds x15, x15, x25 + adcs x16, x16, x26 + adc x17, xzr, xzr + # A[1] * B[2] + mul x25, x5, x23 + umulh x26, x5, x23 + adds x15, x15, x25 + adcs x16, x16, x26 + adc x17, x17, xzr + # A[2] * B[1] + mul x25, x6, x22 + umulh x26, x6, x22 + adds x15, x15, x25 + adcs x16, x16, x26 + adc x17, x17, xzr + # A[3] * B[0] + mul x25, x7, x21 + umulh x26, x7, x21 + adds x15, x15, x25 + adcs x16, x16, x26 + adc x17, x17, xzr + # A[1] * B[3] + mul x25, x5, x24 + umulh x26, x5, x24 + adds x16, x16, x25 + adcs x17, x17, x26 + adc x19, xzr, xzr + # A[2] * B[2] + mul x25, x6, x23 + umulh x26, x6, x23 + adds x16, x16, x25 + adcs x17, x17, x26 + adc x19, x19, xzr + # A[3] * B[1] + mul x25, x7, x22 + umulh x26, x7, x22 + adds x16, x16, x25 + adcs x17, x17, x26 + adc x19, x19, xzr + # A[2] * B[3] + mul x25, x6, x24 + umulh x26, x6, x24 + adds x17, x17, x25 + adcs x19, x19, x26 + adc x20, xzr, xzr + # A[3] * B[2] + mul x25, x7, x23 + umulh x26, x7, x23 + adds x17, x17, x25 + adcs x19, x19, x26 + adc x20, x20, xzr + # A[3] * B[3] + mul x25, x7, x24 + umulh x26, x7, x24 + adds x19, x19, x25 + adc x20, x20, x26 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x20, x20, x19, #63 + extr x19, x19, x17, #63 + extr x17, x17, x16, #63 + extr x16, x16, x15, #63 + and x15, x15, #0x7fffffffffffffff + # Multiply top half by 19 + mov x25, #19 + mul x26, x25, x16 + umulh x16, x25, x16 + adds x12, x12, x26 + mul x26, x25, x17 + umulh x17, x25, x17 + adcs x13, x13, x26 + mul x26, x25, x19 + umulh x19, x25, x19 + adcs x14, x14, x26 + mul x26, x25, x20 + umulh x27, x25, x20 + adcs x15, x15, x26 + adc x27, x27, xzr + # Add remaining product results in + adds x13, x13, x16 + adcs x14, x14, x17 + adcs x15, x15, x19 + adc x27, x27, xzr + # Overflow + extr x27, x27, x15, #63 + mul x27, x27, x25 + and x15, x15, #0x7fffffffffffffff + adds x12, x12, x27 + adcs x13, x13, xzr + adcs x14, x14, xzr + adc x15, x15, xzr + # Reduce if top bit set + and x27, x25, x15, asr 63 + and x15, x15, #0x7fffffffffffffff + adds x12, x12, x27 + adcs x13, x13, xzr + adcs x14, x14, xzr + adc x15, x15, xzr + # Store + ldr x0, [x29, #24] + ldr x1, [x29, #200] + # Multiply + ldp x21, x22, [x1] + ldp x23, x24, [x1, #16] + # A[0] * B[0] + mul x4, x8, x21 + umulh x5, x8, x21 + # A[0] * B[1] + mul x25, x8, x22 + umulh x6, x8, x22 + adds x5, x5, x25 + adc x6, x6, xzr + # A[1] * B[0] + mul x25, x9, x21 + umulh x26, x9, x21 + adds x5, x5, x25 + adcs x6, x6, x26 + adc x7, xzr, xzr + # A[0] * B[2] + mul x25, x8, x23 + umulh x26, x8, x23 + adds x6, x6, x25 + adc x7, x7, x26 + # A[1] * B[1] + mul x25, x9, x22 + umulh x26, x9, x22 + adds x6, x6, x25 + adcs x7, x7, x26 + adc x16, xzr, xzr + # A[2] * B[0] + mul x25, x10, x21 + umulh x26, x10, x21 + adds x6, x6, x25 + adcs x7, x7, x26 + adc x16, x16, xzr + # A[0] * B[3] + mul x25, x8, x24 + umulh x26, x8, x24 + adds x7, x7, x25 + adcs x16, x16, x26 + adc x17, xzr, xzr + # A[1] * B[2] + mul x25, x9, x23 + umulh x26, x9, x23 + adds x7, x7, x25 + adcs x16, x16, x26 + adc x17, x17, xzr + # A[2] * B[1] + mul x25, x10, x22 + umulh x26, x10, x22 + adds x7, x7, x25 + adcs x16, x16, x26 + adc x17, x17, xzr + # A[3] * B[0] + mul x25, x11, x21 + umulh x26, x11, x21 + adds x7, x7, x25 + adcs x16, x16, x26 + adc x17, x17, xzr + # A[1] * B[3] + mul x25, x9, x24 + umulh x26, x9, x24 + adds x16, x16, x25 + adcs x17, x17, x26 + adc x19, xzr, xzr + # A[2] * B[2] + mul x25, x10, x23 + umulh x26, x10, x23 + adds x16, x16, x25 + adcs x17, x17, x26 + adc x19, x19, xzr + # A[3] * B[1] + mul x25, x11, x22 + umulh x26, x11, x22 + adds x16, x16, x25 + adcs x17, x17, x26 + adc x19, x19, xzr + # A[2] * B[3] + mul x25, x10, x24 + umulh x26, x10, x24 + adds x17, x17, x25 + adcs x19, x19, x26 + adc x20, xzr, xzr + # A[3] * B[2] + mul x25, x11, x23 + umulh x26, x11, x23 + adds x17, x17, x25 + adcs x19, x19, x26 + adc x20, x20, xzr + # A[3] * B[3] + mul x25, x11, x24 + umulh x26, x11, x24 + adds x19, x19, x25 + adc x20, x20, x26 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x20, x20, x19, #63 + extr x19, x19, x17, #63 + extr x17, x17, x16, #63 + extr x16, x16, x7, #63 + and x7, x7, #0x7fffffffffffffff + # Multiply top half by 19 + mov x25, #19 + mul x26, x25, x16 + umulh x16, x25, x16 + adds x4, x4, x26 + mul x26, x25, x17 + umulh x17, x25, x17 + adcs x5, x5, x26 + mul x26, x25, x19 + umulh x19, x25, x19 + adcs x6, x6, x26 + mul x26, x25, x20 + umulh x27, x25, x20 + adcs x7, x7, x26 + adc x27, x27, xzr + # Add remaining product results in + adds x5, x5, x16 + adcs x6, x6, x17 + adcs x7, x7, x19 + adc x27, x27, xzr + # Overflow + extr x27, x27, x7, #63 + mul x27, x27, x25 + and x7, x7, #0x7fffffffffffffff + adds x4, x4, x27 + adcs x5, x5, xzr + adcs x6, x6, xzr + adc x7, x7, xzr + # Reduce if top bit set + and x27, x25, x7, asr 63 + and x7, x7, #0x7fffffffffffffff + adds x4, x4, x27 + adcs x5, x5, xzr + adcs x6, x6, xzr + adc x7, x7, xzr + # Store + ldr x0, [x29, #24] + ldr x1, [x29, #16] + # Add + adds x8, x12, x4 + adcs x9, x13, x5 + adcs x10, x14, x6 + adc x11, x15, x7 + mov x25, #-19 + asr x28, x11, #63 + # Mask the modulus + and x25, x28, x25 + and x26, x28, #0x7fffffffffffffff + # Sub modulus (if overflow) + subs x8, x8, x25 + sbcs x9, x9, x28 + sbcs x10, x10, x28 + sbc x11, x11, x26 + # Sub + subs x16, x12, x4 + sbcs x17, x13, x5 + sbcs x19, x14, x6 + sbcs x20, x15, x7 + mov x25, #-19 + csetm x28, cc + # Mask the modulus + and x25, x28, x25 + and x26, x28, #0x7fffffffffffffff + # Add modulus (if underflow) + adds x16, x16, x25 + adcs x17, x17, x28 + adcs x19, x19, x28 + adc x20, x20, x26 + stp x8, x9, [x0] + stp x10, x11, [x0, #16] + stp x16, x17, [x1] + stp x19, x20, [x1, #16] + ldr x0, [x29, #48] + ldr x1, [x29, #64] + ldr x2, [x29, #176] + # Multiply + ldp x12, x13, [x1] + ldp x14, x15, [x1, #16] + ldp x16, x17, [x2] + ldp x19, x20, [x2, #16] + # A[0] * B[0] + mul x4, x12, x16 + umulh x5, x12, x16 + # A[0] * B[1] + mul x25, x12, x17 + umulh x6, x12, x17 + adds x5, x5, x25 + adc x6, x6, xzr + # A[1] * B[0] + mul x25, x13, x16 + umulh x26, x13, x16 + adds x5, x5, x25 + adcs x6, x6, x26 + adc x7, xzr, xzr + # A[0] * B[2] + mul x25, x12, x19 + umulh x26, x12, x19 + adds x6, x6, x25 + adc x7, x7, x26 + # A[1] * B[1] + mul x25, x13, x17 + umulh x26, x13, x17 + adds x6, x6, x25 + adcs x7, x7, x26 + adc x8, xzr, xzr + # A[2] * B[0] + mul x25, x14, x16 + umulh x26, x14, x16 + adds x6, x6, x25 + adcs x7, x7, x26 + adc x8, x8, xzr + # A[0] * B[3] + mul x25, x12, x20 + umulh x26, x12, x20 + adds x7, x7, x25 + adcs x8, x8, x26 + adc x9, xzr, xzr + # A[1] * B[2] + mul x25, x13, x19 + umulh x26, x13, x19 + adds x7, x7, x25 + adcs x8, x8, x26 + adc x9, x9, xzr + # A[2] * B[1] + mul x25, x14, x17 + umulh x26, x14, x17 + adds x7, x7, x25 + adcs x8, x8, x26 + adc x9, x9, xzr + # A[3] * B[0] + mul x25, x15, x16 + umulh x26, x15, x16 + adds x7, x7, x25 + adcs x8, x8, x26 + adc x9, x9, xzr + # A[1] * B[3] + mul x25, x13, x20 + umulh x26, x13, x20 + adds x8, x8, x25 + adcs x9, x9, x26 + adc x10, xzr, xzr + # A[2] * B[2] + mul x25, x14, x19 + umulh x26, x14, x19 + adds x8, x8, x25 + adcs x9, x9, x26 + adc x10, x10, xzr + # A[3] * B[1] + mul x25, x15, x17 + umulh x26, x15, x17 + adds x8, x8, x25 + adcs x9, x9, x26 + adc x10, x10, xzr + # A[2] * B[3] + mul x25, x14, x20 + umulh x26, x14, x20 + adds x9, x9, x25 + adcs x10, x10, x26 + adc x11, xzr, xzr + # A[3] * B[2] + mul x25, x15, x19 + umulh x26, x15, x19 + adds x9, x9, x25 + adcs x10, x10, x26 + adc x11, x11, xzr + # A[3] * B[3] + mul x25, x15, x20 + umulh x26, x15, x20 + adds x10, x10, x25 + adc x11, x11, x26 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x11, x11, x10, #63 + extr x10, x10, x9, #63 + extr x9, x9, x8, #63 + extr x8, x8, x7, #63 + and x7, x7, #0x7fffffffffffffff + # Multiply top half by 19 + mov x25, #19 + mul x26, x25, x8 + umulh x8, x25, x8 + adds x4, x4, x26 + mul x26, x25, x9 + umulh x9, x25, x9 + adcs x5, x5, x26 + mul x26, x25, x10 + umulh x10, x25, x10 + adcs x6, x6, x26 + mul x26, x25, x11 + umulh x27, x25, x11 + adcs x7, x7, x26 + adc x27, x27, xzr + # Add remaining product results in + adds x5, x5, x8 + adcs x6, x6, x9 + adcs x7, x7, x10 + adc x27, x27, xzr + # Overflow + extr x27, x27, x7, #63 + mul x27, x27, x25 + and x7, x7, #0x7fffffffffffffff + adds x4, x4, x27 + adcs x5, x5, xzr + adcs x6, x6, xzr + adc x7, x7, xzr + # Reduce if top bit set + and x27, x25, x7, asr 63 + and x7, x7, #0x7fffffffffffffff + adds x4, x4, x27 + adcs x5, x5, xzr + adcs x6, x6, xzr + adc x7, x7, xzr + # Store + ldr x0, [x29, #48] + # Double + adds x4, x4, x4 + adcs x5, x5, x5 + adcs x6, x6, x6 + adc x7, x7, x7 + mov x25, #-19 + asr x28, x7, #63 + # Mask the modulus + and x25, x28, x25 + and x26, x28, #0x7fffffffffffffff + # Sub modulus (if overflow) + subs x4, x4, x25 + sbcs x5, x5, x28 + sbcs x6, x6, x28 + sbc x7, x7, x26 + ldr x0, [x29, #40] + ldr x1, [x29, #184] + ldr x2, [x29, #72] + # Multiply + ldp x16, x17, [x1] + ldp x19, x20, [x1, #16] + ldp x21, x22, [x2] + ldp x23, x24, [x2, #16] + # A[0] * B[0] + mul x8, x16, x21 + umulh x9, x16, x21 + # A[0] * B[1] + mul x25, x16, x22 + umulh x10, x16, x22 + adds x9, x9, x25 + adc x10, x10, xzr + # A[1] * B[0] + mul x25, x17, x21 + umulh x26, x17, x21 + adds x9, x9, x25 + adcs x10, x10, x26 + adc x11, xzr, xzr + # A[0] * B[2] + mul x25, x16, x23 + umulh x26, x16, x23 + adds x10, x10, x25 + adc x11, x11, x26 + # A[1] * B[1] + mul x25, x17, x22 + umulh x26, x17, x22 + adds x10, x10, x25 + adcs x11, x11, x26 + adc x12, xzr, xzr + # A[2] * B[0] + mul x25, x19, x21 + umulh x26, x19, x21 + adds x10, x10, x25 + adcs x11, x11, x26 + adc x12, x12, xzr + # A[0] * B[3] + mul x25, x16, x24 + umulh x26, x16, x24 + adds x11, x11, x25 + adcs x12, x12, x26 + adc x13, xzr, xzr + # A[1] * B[2] + mul x25, x17, x23 + umulh x26, x17, x23 + adds x11, x11, x25 + adcs x12, x12, x26 + adc x13, x13, xzr + # A[2] * B[1] + mul x25, x19, x22 + umulh x26, x19, x22 + adds x11, x11, x25 + adcs x12, x12, x26 + adc x13, x13, xzr + # A[3] * B[0] + mul x25, x20, x21 + umulh x26, x20, x21 + adds x11, x11, x25 + adcs x12, x12, x26 + adc x13, x13, xzr + # A[1] * B[3] + mul x25, x17, x24 + umulh x26, x17, x24 + adds x12, x12, x25 + adcs x13, x13, x26 + adc x14, xzr, xzr + # A[2] * B[2] + mul x25, x19, x23 + umulh x26, x19, x23 + adds x12, x12, x25 + adcs x13, x13, x26 + adc x14, x14, xzr + # A[3] * B[1] + mul x25, x20, x22 + umulh x26, x20, x22 + adds x12, x12, x25 + adcs x13, x13, x26 + adc x14, x14, xzr + # A[2] * B[3] + mul x25, x19, x24 + umulh x26, x19, x24 + adds x13, x13, x25 + adcs x14, x14, x26 + adc x15, xzr, xzr + # A[3] * B[2] + mul x25, x20, x23 + umulh x26, x20, x23 + adds x13, x13, x25 + adcs x14, x14, x26 + adc x15, x15, xzr + # A[3] * B[3] + mul x25, x20, x24 + umulh x26, x20, x24 + adds x14, x14, x25 + adc x15, x15, x26 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x15, x15, x14, #63 + extr x14, x14, x13, #63 + extr x13, x13, x12, #63 + extr x12, x12, x11, #63 + and x11, x11, #0x7fffffffffffffff + # Multiply top half by 19 + mov x25, #19 + mul x26, x25, x12 + umulh x12, x25, x12 + adds x8, x8, x26 + mul x26, x25, x13 + umulh x13, x25, x13 + adcs x9, x9, x26 + mul x26, x25, x14 + umulh x14, x25, x14 + adcs x10, x10, x26 + mul x26, x25, x15 + umulh x27, x25, x15 + adcs x11, x11, x26 + adc x27, x27, xzr + # Add remaining product results in + adds x9, x9, x12 + adcs x10, x10, x13 + adcs x11, x11, x14 + adc x27, x27, xzr + # Overflow + extr x27, x27, x11, #63 + mul x27, x27, x25 + and x11, x11, #0x7fffffffffffffff + adds x8, x8, x27 + adcs x9, x9, xzr + adcs x10, x10, xzr + adc x11, x11, xzr + # Reduce if top bit set + and x27, x25, x11, asr 63 + and x11, x11, #0x7fffffffffffffff + adds x8, x8, x27 + adcs x9, x9, xzr + adcs x10, x10, xzr + adc x11, x11, xzr + # Store + ldr x0, [x29, #32] + ldr x1, [x29, #40] + # Add + adds x12, x4, x8 + adcs x13, x5, x9 + adcs x14, x6, x10 + adc x15, x7, x11 + mov x25, #-19 + asr x28, x15, #63 + # Mask the modulus + and x25, x28, x25 + and x26, x28, #0x7fffffffffffffff + # Sub modulus (if overflow) + subs x12, x12, x25 + sbcs x13, x13, x28 + sbcs x14, x14, x28 + sbc x15, x15, x26 + # Sub + subs x16, x4, x8 + sbcs x17, x5, x9 + sbcs x19, x6, x10 + sbcs x20, x7, x11 + mov x25, #-19 + csetm x28, cc + # Mask the modulus + and x25, x28, x25 + and x26, x28, #0x7fffffffffffffff + # Add modulus (if underflow) + adds x16, x16, x25 + adcs x17, x17, x28 + adcs x19, x19, x28 + adc x20, x20, x26 + stp x12, x13, [x0] + stp x14, x15, [x0, #16] + stp x16, x17, [x1] + stp x19, x20, [x1, #16] + ldr x17, [x29, #88] + ldr x19, [x29, #96] + ldp x20, x21, [x29, #104] + ldp x22, x23, [x29, #120] + ldp x24, x25, [x29, #136] + ldp x26, x27, [x29, #152] + ldr x28, [x29, #168] + ldp x29, x30, [sp], #0xb0 + ret + .size fe_ge_add,.-fe_ge_add + .text + .align 2 + .globl fe_ge_sub + .type fe_ge_sub, %function +fe_ge_sub: + stp x29, x30, [sp, #-176]! + add x29, sp, #0 + str x17, [x29, #88] + str x19, [x29, #96] + stp x20, x21, [x29, #104] + stp x22, x23, [x29, #120] + stp x24, x25, [x29, #136] + stp x26, x27, [x29, #152] + str x28, [x29, #168] + str x0, [x29, #16] + str x1, [x29, #24] + str x2, [x29, #32] + str x3, [x29, #40] + str x4, [x29, #48] + str x5, [x29, #56] + str x6, [x29, #64] + str x7, [x29, #72] + ldr x2, [x29, #56] + ldr x3, [x29, #48] + # Add + ldp x12, x13, [x2] + ldp x14, x15, [x2, #16] + ldp x16, x17, [x3] + ldp x19, x20, [x3, #16] + adds x4, x12, x16 + adcs x5, x13, x17 + adcs x6, x14, x19 + adc x7, x15, x20 + mov x25, #-19 + asr x28, x7, #63 + # Mask the modulus + and x25, x28, x25 + and x26, x28, #0x7fffffffffffffff + # Sub modulus (if overflow) + subs x4, x4, x25 + sbcs x5, x5, x28 + sbcs x6, x6, x28 + sbc x7, x7, x26 + # Sub + subs x8, x12, x16 + sbcs x9, x13, x17 + sbcs x10, x14, x19 + sbcs x11, x15, x20 + mov x25, #-19 + csetm x28, cc + # Mask the modulus + and x25, x28, x25 + and x26, x28, #0x7fffffffffffffff + # Add modulus (if underflow) + adds x8, x8, x25 + adcs x9, x9, x28 + adcs x10, x10, x28 + adc x11, x11, x26 + ldr x0, [x29, #32] + ldr x2, [x29, #200] + # Multiply + ldp x21, x22, [x2] + ldp x23, x24, [x2, #16] + # A[0] * B[0] + mul x12, x4, x21 + umulh x13, x4, x21 + # A[0] * B[1] + mul x25, x4, x22 + umulh x14, x4, x22 + adds x13, x13, x25 + adc x14, x14, xzr + # A[1] * B[0] + mul x25, x5, x21 + umulh x26, x5, x21 + adds x13, x13, x25 + adcs x14, x14, x26 + adc x15, xzr, xzr + # A[0] * B[2] + mul x25, x4, x23 + umulh x26, x4, x23 + adds x14, x14, x25 + adc x15, x15, x26 + # A[1] * B[1] + mul x25, x5, x22 + umulh x26, x5, x22 + adds x14, x14, x25 + adcs x15, x15, x26 + adc x16, xzr, xzr + # A[2] * B[0] + mul x25, x6, x21 + umulh x26, x6, x21 + adds x14, x14, x25 + adcs x15, x15, x26 + adc x16, x16, xzr + # A[0] * B[3] + mul x25, x4, x24 + umulh x26, x4, x24 + adds x15, x15, x25 + adcs x16, x16, x26 + adc x17, xzr, xzr + # A[1] * B[2] + mul x25, x5, x23 + umulh x26, x5, x23 + adds x15, x15, x25 + adcs x16, x16, x26 + adc x17, x17, xzr + # A[2] * B[1] + mul x25, x6, x22 + umulh x26, x6, x22 + adds x15, x15, x25 + adcs x16, x16, x26 + adc x17, x17, xzr + # A[3] * B[0] + mul x25, x7, x21 + umulh x26, x7, x21 + adds x15, x15, x25 + adcs x16, x16, x26 + adc x17, x17, xzr + # A[1] * B[3] + mul x25, x5, x24 + umulh x26, x5, x24 + adds x16, x16, x25 + adcs x17, x17, x26 + adc x19, xzr, xzr + # A[2] * B[2] + mul x25, x6, x23 + umulh x26, x6, x23 + adds x16, x16, x25 + adcs x17, x17, x26 + adc x19, x19, xzr + # A[3] * B[1] + mul x25, x7, x22 + umulh x26, x7, x22 + adds x16, x16, x25 + adcs x17, x17, x26 + adc x19, x19, xzr + # A[2] * B[3] + mul x25, x6, x24 + umulh x26, x6, x24 + adds x17, x17, x25 + adcs x19, x19, x26 + adc x20, xzr, xzr + # A[3] * B[2] + mul x25, x7, x23 + umulh x26, x7, x23 + adds x17, x17, x25 + adcs x19, x19, x26 + adc x20, x20, xzr + # A[3] * B[3] + mul x25, x7, x24 + umulh x26, x7, x24 + adds x19, x19, x25 + adc x20, x20, x26 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x20, x20, x19, #63 + extr x19, x19, x17, #63 + extr x17, x17, x16, #63 + extr x16, x16, x15, #63 + and x15, x15, #0x7fffffffffffffff + # Multiply top half by 19 + mov x25, #19 + mul x26, x25, x16 + umulh x16, x25, x16 + adds x12, x12, x26 + mul x26, x25, x17 + umulh x17, x25, x17 + adcs x13, x13, x26 + mul x26, x25, x19 + umulh x19, x25, x19 + adcs x14, x14, x26 + mul x26, x25, x20 + umulh x27, x25, x20 + adcs x15, x15, x26 + adc x27, x27, xzr + # Add remaining product results in + adds x13, x13, x16 + adcs x14, x14, x17 + adcs x15, x15, x19 + adc x27, x27, xzr + # Overflow + extr x27, x27, x15, #63 + mul x27, x27, x25 + and x15, x15, #0x7fffffffffffffff + adds x12, x12, x27 + adcs x13, x13, xzr + adcs x14, x14, xzr + adc x15, x15, xzr + # Reduce if top bit set + and x27, x25, x15, asr 63 + and x15, x15, #0x7fffffffffffffff + adds x12, x12, x27 + adcs x13, x13, xzr + adcs x14, x14, xzr + adc x15, x15, xzr + # Store + ldr x0, [x29, #24] + ldr x1, [x29, #192] + # Multiply + ldp x21, x22, [x1] + ldp x23, x24, [x1, #16] + # A[0] * B[0] + mul x4, x8, x21 + umulh x5, x8, x21 + # A[0] * B[1] + mul x25, x8, x22 + umulh x6, x8, x22 + adds x5, x5, x25 + adc x6, x6, xzr + # A[1] * B[0] + mul x25, x9, x21 + umulh x26, x9, x21 + adds x5, x5, x25 + adcs x6, x6, x26 + adc x7, xzr, xzr + # A[0] * B[2] + mul x25, x8, x23 + umulh x26, x8, x23 + adds x6, x6, x25 + adc x7, x7, x26 + # A[1] * B[1] + mul x25, x9, x22 + umulh x26, x9, x22 + adds x6, x6, x25 + adcs x7, x7, x26 + adc x16, xzr, xzr + # A[2] * B[0] + mul x25, x10, x21 + umulh x26, x10, x21 + adds x6, x6, x25 + adcs x7, x7, x26 + adc x16, x16, xzr + # A[0] * B[3] + mul x25, x8, x24 + umulh x26, x8, x24 + adds x7, x7, x25 + adcs x16, x16, x26 + adc x17, xzr, xzr + # A[1] * B[2] + mul x25, x9, x23 + umulh x26, x9, x23 + adds x7, x7, x25 + adcs x16, x16, x26 + adc x17, x17, xzr + # A[2] * B[1] + mul x25, x10, x22 + umulh x26, x10, x22 + adds x7, x7, x25 + adcs x16, x16, x26 + adc x17, x17, xzr + # A[3] * B[0] + mul x25, x11, x21 + umulh x26, x11, x21 + adds x7, x7, x25 + adcs x16, x16, x26 + adc x17, x17, xzr + # A[1] * B[3] + mul x25, x9, x24 + umulh x26, x9, x24 + adds x16, x16, x25 + adcs x17, x17, x26 + adc x19, xzr, xzr + # A[2] * B[2] + mul x25, x10, x23 + umulh x26, x10, x23 + adds x16, x16, x25 + adcs x17, x17, x26 + adc x19, x19, xzr + # A[3] * B[1] + mul x25, x11, x22 + umulh x26, x11, x22 + adds x16, x16, x25 + adcs x17, x17, x26 + adc x19, x19, xzr + # A[2] * B[3] + mul x25, x10, x24 + umulh x26, x10, x24 + adds x17, x17, x25 + adcs x19, x19, x26 + adc x20, xzr, xzr + # A[3] * B[2] + mul x25, x11, x23 + umulh x26, x11, x23 + adds x17, x17, x25 + adcs x19, x19, x26 + adc x20, x20, xzr + # A[3] * B[3] + mul x25, x11, x24 + umulh x26, x11, x24 + adds x19, x19, x25 + adc x20, x20, x26 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x20, x20, x19, #63 + extr x19, x19, x17, #63 + extr x17, x17, x16, #63 + extr x16, x16, x7, #63 + and x7, x7, #0x7fffffffffffffff + # Multiply top half by 19 + mov x25, #19 + mul x26, x25, x16 + umulh x16, x25, x16 + adds x4, x4, x26 + mul x26, x25, x17 + umulh x17, x25, x17 + adcs x5, x5, x26 + mul x26, x25, x19 + umulh x19, x25, x19 + adcs x6, x6, x26 + mul x26, x25, x20 + umulh x27, x25, x20 + adcs x7, x7, x26 + adc x27, x27, xzr + # Add remaining product results in + adds x5, x5, x16 + adcs x6, x6, x17 + adcs x7, x7, x19 + adc x27, x27, xzr + # Overflow + extr x27, x27, x7, #63 + mul x27, x27, x25 + and x7, x7, #0x7fffffffffffffff + adds x4, x4, x27 + adcs x5, x5, xzr + adcs x6, x6, xzr + adc x7, x7, xzr + # Reduce if top bit set + and x27, x25, x7, asr 63 + and x7, x7, #0x7fffffffffffffff + adds x4, x4, x27 + adcs x5, x5, xzr + adcs x6, x6, xzr + adc x7, x7, xzr + # Store + ldr x0, [x29, #24] + ldr x1, [x29, #16] + # Add + adds x8, x12, x4 + adcs x9, x13, x5 + adcs x10, x14, x6 + adc x11, x15, x7 + mov x25, #-19 + asr x28, x11, #63 + # Mask the modulus + and x25, x28, x25 + and x26, x28, #0x7fffffffffffffff + # Sub modulus (if overflow) + subs x8, x8, x25 + sbcs x9, x9, x28 + sbcs x10, x10, x28 + sbc x11, x11, x26 + # Sub + subs x16, x12, x4 + sbcs x17, x13, x5 + sbcs x19, x14, x6 + sbcs x20, x15, x7 + mov x25, #-19 + csetm x28, cc + # Mask the modulus + and x25, x28, x25 + and x26, x28, #0x7fffffffffffffff + # Add modulus (if underflow) + adds x16, x16, x25 + adcs x17, x17, x28 + adcs x19, x19, x28 + adc x20, x20, x26 + stp x8, x9, [x0] + stp x10, x11, [x0, #16] + stp x16, x17, [x1] + stp x19, x20, [x1, #16] + ldr x0, [x29, #48] + ldr x1, [x29, #64] + ldr x2, [x29, #176] + # Multiply + ldp x12, x13, [x1] + ldp x14, x15, [x1, #16] + ldp x16, x17, [x2] + ldp x19, x20, [x2, #16] + # A[0] * B[0] + mul x4, x12, x16 + umulh x5, x12, x16 + # A[0] * B[1] + mul x25, x12, x17 + umulh x6, x12, x17 + adds x5, x5, x25 + adc x6, x6, xzr + # A[1] * B[0] + mul x25, x13, x16 + umulh x26, x13, x16 + adds x5, x5, x25 + adcs x6, x6, x26 + adc x7, xzr, xzr + # A[0] * B[2] + mul x25, x12, x19 + umulh x26, x12, x19 + adds x6, x6, x25 + adc x7, x7, x26 + # A[1] * B[1] + mul x25, x13, x17 + umulh x26, x13, x17 + adds x6, x6, x25 + adcs x7, x7, x26 + adc x8, xzr, xzr + # A[2] * B[0] + mul x25, x14, x16 + umulh x26, x14, x16 + adds x6, x6, x25 + adcs x7, x7, x26 + adc x8, x8, xzr + # A[0] * B[3] + mul x25, x12, x20 + umulh x26, x12, x20 + adds x7, x7, x25 + adcs x8, x8, x26 + adc x9, xzr, xzr + # A[1] * B[2] + mul x25, x13, x19 + umulh x26, x13, x19 + adds x7, x7, x25 + adcs x8, x8, x26 + adc x9, x9, xzr + # A[2] * B[1] + mul x25, x14, x17 + umulh x26, x14, x17 + adds x7, x7, x25 + adcs x8, x8, x26 + adc x9, x9, xzr + # A[3] * B[0] + mul x25, x15, x16 + umulh x26, x15, x16 + adds x7, x7, x25 + adcs x8, x8, x26 + adc x9, x9, xzr + # A[1] * B[3] + mul x25, x13, x20 + umulh x26, x13, x20 + adds x8, x8, x25 + adcs x9, x9, x26 + adc x10, xzr, xzr + # A[2] * B[2] + mul x25, x14, x19 + umulh x26, x14, x19 + adds x8, x8, x25 + adcs x9, x9, x26 + adc x10, x10, xzr + # A[3] * B[1] + mul x25, x15, x17 + umulh x26, x15, x17 + adds x8, x8, x25 + adcs x9, x9, x26 + adc x10, x10, xzr + # A[2] * B[3] + mul x25, x14, x20 + umulh x26, x14, x20 + adds x9, x9, x25 + adcs x10, x10, x26 + adc x11, xzr, xzr + # A[3] * B[2] + mul x25, x15, x19 + umulh x26, x15, x19 + adds x9, x9, x25 + adcs x10, x10, x26 + adc x11, x11, xzr + # A[3] * B[3] + mul x25, x15, x20 + umulh x26, x15, x20 + adds x10, x10, x25 + adc x11, x11, x26 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x11, x11, x10, #63 + extr x10, x10, x9, #63 + extr x9, x9, x8, #63 + extr x8, x8, x7, #63 + and x7, x7, #0x7fffffffffffffff + # Multiply top half by 19 + mov x25, #19 + mul x26, x25, x8 + umulh x8, x25, x8 + adds x4, x4, x26 + mul x26, x25, x9 + umulh x9, x25, x9 + adcs x5, x5, x26 + mul x26, x25, x10 + umulh x10, x25, x10 + adcs x6, x6, x26 + mul x26, x25, x11 + umulh x27, x25, x11 + adcs x7, x7, x26 + adc x27, x27, xzr + # Add remaining product results in + adds x5, x5, x8 + adcs x6, x6, x9 + adcs x7, x7, x10 + adc x27, x27, xzr + # Overflow + extr x27, x27, x7, #63 + mul x27, x27, x25 + and x7, x7, #0x7fffffffffffffff + adds x4, x4, x27 + adcs x5, x5, xzr + adcs x6, x6, xzr + adc x7, x7, xzr + # Reduce if top bit set + and x27, x25, x7, asr 63 + and x7, x7, #0x7fffffffffffffff + adds x4, x4, x27 + adcs x5, x5, xzr + adcs x6, x6, xzr + adc x7, x7, xzr + # Store + ldr x0, [x29, #48] + # Double + adds x4, x4, x4 + adcs x5, x5, x5 + adcs x6, x6, x6 + adc x7, x7, x7 + mov x25, #-19 + asr x28, x7, #63 + # Mask the modulus + and x25, x28, x25 + and x26, x28, #0x7fffffffffffffff + # Sub modulus (if overflow) + subs x4, x4, x25 + sbcs x5, x5, x28 + sbcs x6, x6, x28 + sbc x7, x7, x26 + ldr x0, [x29, #40] + ldr x1, [x29, #184] + ldr x2, [x29, #72] + # Multiply + ldp x16, x17, [x1] + ldp x19, x20, [x1, #16] + ldp x21, x22, [x2] + ldp x23, x24, [x2, #16] + # A[0] * B[0] + mul x8, x16, x21 + umulh x9, x16, x21 + # A[0] * B[1] + mul x25, x16, x22 + umulh x10, x16, x22 + adds x9, x9, x25 + adc x10, x10, xzr + # A[1] * B[0] + mul x25, x17, x21 + umulh x26, x17, x21 + adds x9, x9, x25 + adcs x10, x10, x26 + adc x11, xzr, xzr + # A[0] * B[2] + mul x25, x16, x23 + umulh x26, x16, x23 + adds x10, x10, x25 + adc x11, x11, x26 + # A[1] * B[1] + mul x25, x17, x22 + umulh x26, x17, x22 + adds x10, x10, x25 + adcs x11, x11, x26 + adc x12, xzr, xzr + # A[2] * B[0] + mul x25, x19, x21 + umulh x26, x19, x21 + adds x10, x10, x25 + adcs x11, x11, x26 + adc x12, x12, xzr + # A[0] * B[3] + mul x25, x16, x24 + umulh x26, x16, x24 + adds x11, x11, x25 + adcs x12, x12, x26 + adc x13, xzr, xzr + # A[1] * B[2] + mul x25, x17, x23 + umulh x26, x17, x23 + adds x11, x11, x25 + adcs x12, x12, x26 + adc x13, x13, xzr + # A[2] * B[1] + mul x25, x19, x22 + umulh x26, x19, x22 + adds x11, x11, x25 + adcs x12, x12, x26 + adc x13, x13, xzr + # A[3] * B[0] + mul x25, x20, x21 + umulh x26, x20, x21 + adds x11, x11, x25 + adcs x12, x12, x26 + adc x13, x13, xzr + # A[1] * B[3] + mul x25, x17, x24 + umulh x26, x17, x24 + adds x12, x12, x25 + adcs x13, x13, x26 + adc x14, xzr, xzr + # A[2] * B[2] + mul x25, x19, x23 + umulh x26, x19, x23 + adds x12, x12, x25 + adcs x13, x13, x26 + adc x14, x14, xzr + # A[3] * B[1] + mul x25, x20, x22 + umulh x26, x20, x22 + adds x12, x12, x25 + adcs x13, x13, x26 + adc x14, x14, xzr + # A[2] * B[3] + mul x25, x19, x24 + umulh x26, x19, x24 + adds x13, x13, x25 + adcs x14, x14, x26 + adc x15, xzr, xzr + # A[3] * B[2] + mul x25, x20, x23 + umulh x26, x20, x23 + adds x13, x13, x25 + adcs x14, x14, x26 + adc x15, x15, xzr + # A[3] * B[3] + mul x25, x20, x24 + umulh x26, x20, x24 + adds x14, x14, x25 + adc x15, x15, x26 + # Reduce + # Move top half into t4-t7 and remove top bit from t3 + extr x15, x15, x14, #63 + extr x14, x14, x13, #63 + extr x13, x13, x12, #63 + extr x12, x12, x11, #63 + and x11, x11, #0x7fffffffffffffff + # Multiply top half by 19 + mov x25, #19 + mul x26, x25, x12 + umulh x12, x25, x12 + adds x8, x8, x26 + mul x26, x25, x13 + umulh x13, x25, x13 + adcs x9, x9, x26 + mul x26, x25, x14 + umulh x14, x25, x14 + adcs x10, x10, x26 + mul x26, x25, x15 + umulh x27, x25, x15 + adcs x11, x11, x26 + adc x27, x27, xzr + # Add remaining product results in + adds x9, x9, x12 + adcs x10, x10, x13 + adcs x11, x11, x14 + adc x27, x27, xzr + # Overflow + extr x27, x27, x11, #63 + mul x27, x27, x25 + and x11, x11, #0x7fffffffffffffff + adds x8, x8, x27 + adcs x9, x9, xzr + adcs x10, x10, xzr + adc x11, x11, xzr + # Reduce if top bit set + and x27, x25, x11, asr 63 + and x11, x11, #0x7fffffffffffffff + adds x8, x8, x27 + adcs x9, x9, xzr + adcs x10, x10, xzr + adc x11, x11, xzr + # Store + ldr x0, [x29, #40] + ldr x1, [x29, #32] + # Add + adds x12, x4, x8 + adcs x13, x5, x9 + adcs x14, x6, x10 + adc x15, x7, x11 + mov x25, #-19 + asr x28, x15, #63 + # Mask the modulus + and x25, x28, x25 + and x26, x28, #0x7fffffffffffffff + # Sub modulus (if overflow) + subs x12, x12, x25 + sbcs x13, x13, x28 + sbcs x14, x14, x28 + sbc x15, x15, x26 + # Sub + subs x16, x4, x8 + sbcs x17, x5, x9 + sbcs x19, x6, x10 + sbcs x20, x7, x11 + mov x25, #-19 + csetm x28, cc + # Mask the modulus + and x25, x28, x25 + and x26, x28, #0x7fffffffffffffff + # Add modulus (if underflow) + adds x16, x16, x25 + adcs x17, x17, x28 + adcs x19, x19, x28 + adc x20, x20, x26 + stp x12, x13, [x0] + stp x14, x15, [x0, #16] + stp x16, x17, [x1] + stp x19, x20, [x1, #16] + ldr x17, [x29, #88] + ldr x19, [x29, #96] + ldp x20, x21, [x29, #104] + ldp x22, x23, [x29, #120] + ldp x24, x25, [x29, #136] + ldp x26, x27, [x29, #152] + ldr x28, [x29, #168] + ldp x29, x30, [sp], #0xb0 + ret + .size fe_ge_sub,.-fe_ge_sub +#endif /* __aarch64__ */ diff --git a/client/wolfssl/wolfcrypt/src/port/arm/armv8-curve25519.c b/client/wolfssl/wolfcrypt/src/port/arm/armv8-curve25519.c new file mode 100644 index 0000000..d1ab4c8 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/arm/armv8-curve25519.c @@ -0,0 +1,6725 @@ +/* armv8-curve25519 + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* Generated using (from wolfssl): + * cd ../scripts + * ruby ./x25519/x25519.rb arm64 ../wolfssl/wolfcrypt/src/port/arm/armv8-curve25519.c + */ +#ifdef __aarch64__ +#include +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef WOLFSSL_ARMASM +#include +#include + +void fe_init() +{ + __asm__ __volatile__ ( + "stp x29, x30, [sp, #-16]!\n\t" + "add x29, sp, #0\n\t" + "\n\t" + "ldp x29, x30, [sp], #16\n\t" + : + : + : "memory" + ); +} + +void fe_frombytes(fe out, const unsigned char* in) +{ + __asm__ __volatile__ ( + "stp x29, x30, [sp, #-16]!\n\t" + "add x29, sp, #0\n\t" + "ldp x2, x3, [%x[in]]\n\t" + "ldp x4, x5, [%x[in], #16]\n\t" + "and x5, x5, #0x7fffffffffffffff\n\t" + "stp x2, x3, [%x[out]]\n\t" + "stp x4, x5, [%x[out], #16]\n\t" + "ldp x29, x30, [sp], #16\n\t" + : [out] "+r" (out), [in] "+r" (in) + : + : "memory", "x2", "x3", "x4", "x5", "x6" + ); +} + +void fe_tobytes(unsigned char* out, const fe n) +{ + __asm__ __volatile__ ( + "stp x29, x30, [sp, #-16]!\n\t" + "add x29, sp, #0\n\t" + "mov x7, #19\n\t" + "ldp x2, x3, [%x[n]]\n\t" + "ldp x4, x5, [%x[n], #16]\n\t" + "adds x6, x2, x7\n\t" + "adcs x6, x3, xzr\n\t" + "adcs x6, x4, xzr\n\t" + "adc x6, x5, xzr\n\t" + "and x6, x7, x6, asr 63\n\t" + "adds x2, x2, x6\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adc x5, x5, xzr\n\t" + "and x5, x5, #0x7fffffffffffffff\n\t" + "stp x2, x3, [%x[out]]\n\t" + "stp x4, x5, [%x[out], #16]\n\t" + "ldp x29, x30, [sp], #16\n\t" + : [out] "+r" (out), [n] "+r" (n) + : + : "memory", "x2", "x3", "x4", "x5", "x6", "x7" + ); +} + +void fe_1(fe n) +{ + __asm__ __volatile__ ( + "stp x29, x30, [sp, #-16]!\n\t" + "add x29, sp, #0\n\t" + /* Set one */ + "mov x1, #1\n\t" + "stp x1, xzr, [%x[n]]\n\t" + "stp xzr, xzr, [%x[n], #16]\n\t" + "ldp x29, x30, [sp], #16\n\t" + : [n] "+r" (n) + : + : "memory", "x1" + ); +} + +void fe_0(fe n) +{ + __asm__ __volatile__ ( + "stp x29, x30, [sp, #-16]!\n\t" + "add x29, sp, #0\n\t" + /* Set zero */ + "stp xzr, xzr, [%x[n]]\n\t" + "stp xzr, xzr, [%x[n], #16]\n\t" + "ldp x29, x30, [sp], #16\n\t" + : [n] "+r" (n) + : + : "memory" + ); +} + +void fe_copy(fe r, const fe a) +{ + __asm__ __volatile__ ( + "stp x29, x30, [sp, #-16]!\n\t" + "add x29, sp, #0\n\t" + /* Copy */ + "ldp x2, x3, [%x[a]]\n\t" + "ldp x4, x5, [%x[a], #16]\n\t" + "stp x2, x3, [%x[r]]\n\t" + "stp x4, x5, [%x[r], #16]\n\t" + "ldp x29, x30, [sp], #16\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "x2", "x3", "x4", "x5" + ); +} + +void fe_sub(fe r, const fe a, const fe b) +{ + __asm__ __volatile__ ( + "stp x29, x30, [sp, #-16]!\n\t" + "add x29, sp, #0\n\t" + /* Sub */ + "ldp x3, x4, [%x[a]]\n\t" + "ldp x5, x6, [%x[a], #16]\n\t" + "ldp x7, x8, [%x[b]]\n\t" + "ldp x9, x10, [%x[b], #16]\n\t" + "subs x3, x3, x7\n\t" + "sbcs x4, x4, x8\n\t" + "sbcs x5, x5, x9\n\t" + "sbcs x6, x6, x10\n\t" + "mov x12, #-19\n\t" + "csetm x11, cc\n\t" + /* Mask the modulus */ + "and x12, x11, x12\n\t" + "and x13, x11, #0x7fffffffffffffff\n\t" + /* Add modulus (if underflow) */ + "adds x3, x3, x12\n\t" + "adcs x4, x4, x11\n\t" + "adcs x5, x5, x11\n\t" + "adc x6, x6, x13\n\t" + "stp x3, x4, [%x[r]]\n\t" + "stp x5, x6, [%x[r], #16]\n\t" + "ldp x29, x30, [sp], #16\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13" + ); +} + +void fe_add(fe r, const fe a, const fe b) +{ + __asm__ __volatile__ ( + "stp x29, x30, [sp, #-16]!\n\t" + "add x29, sp, #0\n\t" + /* Add */ + "ldp x3, x4, [%x[a]]\n\t" + "ldp x5, x6, [%x[a], #16]\n\t" + "ldp x7, x8, [%x[b]]\n\t" + "ldp x9, x10, [%x[b], #16]\n\t" + "adds x3, x3, x7\n\t" + "adcs x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "adc x6, x6, x10\n\t" + "mov x12, #-19\n\t" + "asr x11, x6, #63\n\t" + /* Mask the modulus */ + "and x12, x11, x12\n\t" + "and x13, x11, #0x7fffffffffffffff\n\t" + /* Sub modulus (if overflow) */ + "subs x3, x3, x12\n\t" + "sbcs x4, x4, x11\n\t" + "sbcs x5, x5, x11\n\t" + "sbc x6, x6, x13\n\t" + "stp x3, x4, [%x[r]]\n\t" + "stp x5, x6, [%x[r], #16]\n\t" + "ldp x29, x30, [sp], #16\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13" + ); +} + +void fe_neg(fe r, const fe a) +{ + __asm__ __volatile__ ( + "stp x29, x30, [sp, #-16]!\n\t" + "add x29, sp, #0\n\t" + "ldp x2, x3, [%x[a]]\n\t" + "ldp x4, x5, [%x[a], #16]\n\t" + "mov x6, #-19\n\t" + "mov x7, #-1\n\t" + "mov x8, #-1\n\t" + "mov x9, #0x7fffffffffffffff\n\t" + "subs x6, x6, x2\n\t" + "sbcs x7, x7, x3\n\t" + "sbcs x8, x8, x4\n\t" + "sbc x9, x9, x5\n\t" + "stp x6, x7, [%x[r]]\n\t" + "stp x8, x9, [%x[r], #16]\n\t" + "ldp x29, x30, [sp], #16\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + ); +} + +int fe_isnonzero(const fe a) +{ + __asm__ __volatile__ ( + "stp x29, x30, [sp, #-16]!\n\t" + "add x29, sp, #0\n\t" + "mov x6, #19\n\t" + "ldp x1, x2, [%x[a]]\n\t" + "ldp x3, x4, [%x[a], #16]\n\t" + "adds x5, x1, x6\n\t" + "adcs x5, x2, xzr\n\t" + "adcs x5, x3, xzr\n\t" + "adc x5, x4, xzr\n\t" + "and x5, x6, x5, asr 63\n\t" + "adds x1, x1, x5\n\t" + "adcs x2, x2, xzr\n\t" + "adcs x3, x3, xzr\n\t" + "adc x4, x4, xzr\n\t" + "and x4, x4, #0x7fffffffffffffff\n\t" + "orr %x[a], x1, x2\n\t" + "orr x3, x3, x4\n\t" + "orr %x[a], %x[a], x3\n\t" + "ldp x29, x30, [sp], #16\n\t" + : [a] "+r" (a) + : + : "memory", "x1", "x2", "x3", "x4", "x5", "x6" + ); + return (uint32_t)(size_t)a; +} + +int fe_isnegative(const fe a) +{ + __asm__ __volatile__ ( + "stp x29, x30, [sp, #-16]!\n\t" + "add x29, sp, #0\n\t" + "mov x6, #19\n\t" + "ldp x1, x2, [%x[a]]\n\t" + "ldp x3, x4, [%x[a], #16]\n\t" + "adds x5, x1, x6\n\t" + "adcs x5, x2, xzr\n\t" + "adcs x5, x3, xzr\n\t" + "adc x5, x4, xzr\n\t" + "and %x[a], x1, #1\n\t" + "eor %x[a], %x[a], x5, lsr 63\n\t" + "ldp x29, x30, [sp], #16\n\t" + : [a] "+r" (a) + : + : "memory", "x1", "x2", "x3", "x4", "x5", "x6" + ); + return (uint32_t)(size_t)a; +} + +void fe_cmov_table(fe* r, fe* base, signed char b) +{ + __asm__ __volatile__ ( + "stp x29, x30, [sp, #-32]!\n\t" + "add x29, sp, #0\n\t" + "str %x[r], [x29, #16]\n\t" + "sxtb %x[b], %w[b]\n\t" + "sbfx x3, %x[b], #7, #1\n\t" + "eor %x[r], %x[b], x3\n\t" + "sub %x[r], %x[r], x3\n\t" + "mov x4, #1\n\t" + "mov x5, xzr\n\t" + "mov x6, xzr\n\t" + "mov x7, xzr\n\t" + "mov x8, #1\n\t" + "mov x9, xzr\n\t" + "mov x10, xzr\n\t" + "mov x11, xzr\n\t" + "mov x12, xzr\n\t" + "mov x13, xzr\n\t" + "mov x14, xzr\n\t" + "mov x15, xzr\n\t" + "cmp %x[r], #1\n\t" + "ldp x16, x17, [%x[base]]\n\t" + "ldp x19, x20, [%x[base], #16]\n\t" + "ldp x21, x22, [%x[base], #32]\n\t" + "ldp x23, x24, [%x[base], #48]\n\t" + "ldp x25, x26, [%x[base], #64]\n\t" + "ldp x27, x28, [%x[base], #80]\n\t" + "csel x4, x16, x4, eq\n\t" + "csel x5, x17, x5, eq\n\t" + "csel x6, x19, x6, eq\n\t" + "csel x7, x20, x7, eq\n\t" + "csel x8, x21, x8, eq\n\t" + "csel x9, x22, x9, eq\n\t" + "csel x10, x23, x10, eq\n\t" + "csel x11, x24, x11, eq\n\t" + "csel x12, x25, x12, eq\n\t" + "csel x13, x26, x13, eq\n\t" + "csel x14, x27, x14, eq\n\t" + "csel x15, x28, x15, eq\n\t" + "cmp %x[r], #2\n\t" + "ldp x16, x17, [%x[base], #96]\n\t" + "ldp x19, x20, [%x[base], #112]\n\t" + "ldp x21, x22, [%x[base], #128]\n\t" + "ldp x23, x24, [%x[base], #144]\n\t" + "ldp x25, x26, [%x[base], #160]\n\t" + "ldp x27, x28, [%x[base], #176]\n\t" + "csel x4, x16, x4, eq\n\t" + "csel x5, x17, x5, eq\n\t" + "csel x6, x19, x6, eq\n\t" + "csel x7, x20, x7, eq\n\t" + "csel x8, x21, x8, eq\n\t" + "csel x9, x22, x9, eq\n\t" + "csel x10, x23, x10, eq\n\t" + "csel x11, x24, x11, eq\n\t" + "csel x12, x25, x12, eq\n\t" + "csel x13, x26, x13, eq\n\t" + "csel x14, x27, x14, eq\n\t" + "csel x15, x28, x15, eq\n\t" + "cmp %x[r], #3\n\t" + "ldp x16, x17, [%x[base], #192]\n\t" + "ldp x19, x20, [%x[base], #208]\n\t" + "ldp x21, x22, [%x[base], #224]\n\t" + "ldp x23, x24, [%x[base], #240]\n\t" + "ldp x25, x26, [%x[base], #256]\n\t" + "ldp x27, x28, [%x[base], #272]\n\t" + "csel x4, x16, x4, eq\n\t" + "csel x5, x17, x5, eq\n\t" + "csel x6, x19, x6, eq\n\t" + "csel x7, x20, x7, eq\n\t" + "csel x8, x21, x8, eq\n\t" + "csel x9, x22, x9, eq\n\t" + "csel x10, x23, x10, eq\n\t" + "csel x11, x24, x11, eq\n\t" + "csel x12, x25, x12, eq\n\t" + "csel x13, x26, x13, eq\n\t" + "csel x14, x27, x14, eq\n\t" + "csel x15, x28, x15, eq\n\t" + "cmp %x[r], #4\n\t" + "ldp x16, x17, [%x[base], #288]\n\t" + "ldp x19, x20, [%x[base], #304]\n\t" + "ldp x21, x22, [%x[base], #320]\n\t" + "ldp x23, x24, [%x[base], #336]\n\t" + "ldp x25, x26, [%x[base], #352]\n\t" + "ldp x27, x28, [%x[base], #368]\n\t" + "csel x4, x16, x4, eq\n\t" + "csel x5, x17, x5, eq\n\t" + "csel x6, x19, x6, eq\n\t" + "csel x7, x20, x7, eq\n\t" + "csel x8, x21, x8, eq\n\t" + "csel x9, x22, x9, eq\n\t" + "csel x10, x23, x10, eq\n\t" + "csel x11, x24, x11, eq\n\t" + "csel x12, x25, x12, eq\n\t" + "csel x13, x26, x13, eq\n\t" + "csel x14, x27, x14, eq\n\t" + "csel x15, x28, x15, eq\n\t" + "add %x[base], %x[base], #0x180\n\t" + "cmp %x[r], #5\n\t" + "ldp x16, x17, [%x[base]]\n\t" + "ldp x19, x20, [%x[base], #16]\n\t" + "ldp x21, x22, [%x[base], #32]\n\t" + "ldp x23, x24, [%x[base], #48]\n\t" + "ldp x25, x26, [%x[base], #64]\n\t" + "ldp x27, x28, [%x[base], #80]\n\t" + "csel x4, x16, x4, eq\n\t" + "csel x5, x17, x5, eq\n\t" + "csel x6, x19, x6, eq\n\t" + "csel x7, x20, x7, eq\n\t" + "csel x8, x21, x8, eq\n\t" + "csel x9, x22, x9, eq\n\t" + "csel x10, x23, x10, eq\n\t" + "csel x11, x24, x11, eq\n\t" + "csel x12, x25, x12, eq\n\t" + "csel x13, x26, x13, eq\n\t" + "csel x14, x27, x14, eq\n\t" + "csel x15, x28, x15, eq\n\t" + "cmp %x[r], #6\n\t" + "ldp x16, x17, [%x[base], #96]\n\t" + "ldp x19, x20, [%x[base], #112]\n\t" + "ldp x21, x22, [%x[base], #128]\n\t" + "ldp x23, x24, [%x[base], #144]\n\t" + "ldp x25, x26, [%x[base], #160]\n\t" + "ldp x27, x28, [%x[base], #176]\n\t" + "csel x4, x16, x4, eq\n\t" + "csel x5, x17, x5, eq\n\t" + "csel x6, x19, x6, eq\n\t" + "csel x7, x20, x7, eq\n\t" + "csel x8, x21, x8, eq\n\t" + "csel x9, x22, x9, eq\n\t" + "csel x10, x23, x10, eq\n\t" + "csel x11, x24, x11, eq\n\t" + "csel x12, x25, x12, eq\n\t" + "csel x13, x26, x13, eq\n\t" + "csel x14, x27, x14, eq\n\t" + "csel x15, x28, x15, eq\n\t" + "cmp %x[r], #7\n\t" + "ldp x16, x17, [%x[base], #192]\n\t" + "ldp x19, x20, [%x[base], #208]\n\t" + "ldp x21, x22, [%x[base], #224]\n\t" + "ldp x23, x24, [%x[base], #240]\n\t" + "ldp x25, x26, [%x[base], #256]\n\t" + "ldp x27, x28, [%x[base], #272]\n\t" + "csel x4, x16, x4, eq\n\t" + "csel x5, x17, x5, eq\n\t" + "csel x6, x19, x6, eq\n\t" + "csel x7, x20, x7, eq\n\t" + "csel x8, x21, x8, eq\n\t" + "csel x9, x22, x9, eq\n\t" + "csel x10, x23, x10, eq\n\t" + "csel x11, x24, x11, eq\n\t" + "csel x12, x25, x12, eq\n\t" + "csel x13, x26, x13, eq\n\t" + "csel x14, x27, x14, eq\n\t" + "csel x15, x28, x15, eq\n\t" + "cmp %x[r], #8\n\t" + "ldp x16, x17, [%x[base], #288]\n\t" + "ldp x19, x20, [%x[base], #304]\n\t" + "ldp x21, x22, [%x[base], #320]\n\t" + "ldp x23, x24, [%x[base], #336]\n\t" + "ldp x25, x26, [%x[base], #352]\n\t" + "ldp x27, x28, [%x[base], #368]\n\t" + "csel x4, x16, x4, eq\n\t" + "csel x5, x17, x5, eq\n\t" + "csel x6, x19, x6, eq\n\t" + "csel x7, x20, x7, eq\n\t" + "csel x8, x21, x8, eq\n\t" + "csel x9, x22, x9, eq\n\t" + "csel x10, x23, x10, eq\n\t" + "csel x11, x24, x11, eq\n\t" + "csel x12, x25, x12, eq\n\t" + "csel x13, x26, x13, eq\n\t" + "csel x14, x27, x14, eq\n\t" + "csel x15, x28, x15, eq\n\t" + "mov x16, #-19\n\t" + "mov x17, #-1\n\t" + "mov x19, #-1\n\t" + "mov x20, #0x7fffffffffffffff\n\t" + "subs x16, x16, x12\n\t" + "sbcs x17, x17, x13\n\t" + "sbcs x19, x19, x14\n\t" + "sbc x20, x20, x15\n\t" + "cmp %x[b], #0\n\t" + "mov x3, x4\n\t" + "csel x4, x8, x4, lt\n\t" + "csel x8, x3, x8, lt\n\t" + "mov x3, x5\n\t" + "csel x5, x9, x5, lt\n\t" + "csel x9, x3, x9, lt\n\t" + "mov x3, x6\n\t" + "csel x6, x10, x6, lt\n\t" + "csel x10, x3, x10, lt\n\t" + "mov x3, x7\n\t" + "csel x7, x11, x7, lt\n\t" + "csel x11, x3, x11, lt\n\t" + "csel x12, x16, x12, lt\n\t" + "csel x13, x17, x13, lt\n\t" + "csel x14, x19, x14, lt\n\t" + "csel x15, x20, x15, lt\n\t" + "ldr %x[r], [x29, #16]\n\t" + "stp x4, x5, [%x[r]]\n\t" + "stp x6, x7, [%x[r], #16]\n\t" + "stp x8, x9, [%x[r], #32]\n\t" + "stp x10, x11, [%x[r], #48]\n\t" + "stp x12, x13, [%x[r], #64]\n\t" + "stp x14, x15, [%x[r], #80]\n\t" + "ldp x29, x30, [sp], #32\n\t" + : [r] "+r" (r), [base] "+r" (base), [b] "+r" (b) + : + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" + ); +} + +void fe_mul(fe r, const fe a, const fe b) +{ + __asm__ __volatile__ ( + "stp x29, x30, [sp, #-16]!\n\t" + "add x29, sp, #0\n\t" + /* Multiply */ + "ldp x14, x15, [%x[a]]\n\t" + "ldp x16, x17, [%x[a], #16]\n\t" + "ldp x19, x20, [%x[b]]\n\t" + "ldp x21, x22, [%x[b], #16]\n\t" + /* A[0] * B[0] */ + "mul x6, x14, x19\n\t" + "umulh x7, x14, x19\n\t" + /* A[0] * B[1] */ + "mul x3, x14, x20\n\t" + "umulh x8, x14, x20\n\t" + "adds x7, x7, x3\n\t" + "adc x8, x8, xzr\n\t" + /* A[1] * B[0] */ + "mul x3, x15, x19\n\t" + "umulh x4, x15, x19\n\t" + "adds x7, x7, x3\n\t" + "adcs x8, x8, x4\n\t" + "adc x9, xzr, xzr\n\t" + /* A[0] * B[2] */ + "mul x3, x14, x21\n\t" + "umulh x4, x14, x21\n\t" + "adds x8, x8, x3\n\t" + "adc x9, x9, x4\n\t" + /* A[1] * B[1] */ + "mul x3, x15, x20\n\t" + "umulh x4, x15, x20\n\t" + "adds x8, x8, x3\n\t" + "adcs x9, x9, x4\n\t" + "adc x10, xzr, xzr\n\t" + /* A[2] * B[0] */ + "mul x3, x16, x19\n\t" + "umulh x4, x16, x19\n\t" + "adds x8, x8, x3\n\t" + "adcs x9, x9, x4\n\t" + "adc x10, x10, xzr\n\t" + /* A[0] * B[3] */ + "mul x3, x14, x22\n\t" + "umulh x4, x14, x22\n\t" + "adds x9, x9, x3\n\t" + "adcs x10, x10, x4\n\t" + "adc x11, xzr, xzr\n\t" + /* A[1] * B[2] */ + "mul x3, x15, x21\n\t" + "umulh x4, x15, x21\n\t" + "adds x9, x9, x3\n\t" + "adcs x10, x10, x4\n\t" + "adc x11, x11, xzr\n\t" + /* A[2] * B[1] */ + "mul x3, x16, x20\n\t" + "umulh x4, x16, x20\n\t" + "adds x9, x9, x3\n\t" + "adcs x10, x10, x4\n\t" + "adc x11, x11, xzr\n\t" + /* A[3] * B[0] */ + "mul x3, x17, x19\n\t" + "umulh x4, x17, x19\n\t" + "adds x9, x9, x3\n\t" + "adcs x10, x10, x4\n\t" + "adc x11, x11, xzr\n\t" + /* A[1] * B[3] */ + "mul x3, x15, x22\n\t" + "umulh x4, x15, x22\n\t" + "adds x10, x10, x3\n\t" + "adcs x11, x11, x4\n\t" + "adc x12, xzr, xzr\n\t" + /* A[2] * B[2] */ + "mul x3, x16, x21\n\t" + "umulh x4, x16, x21\n\t" + "adds x10, x10, x3\n\t" + "adcs x11, x11, x4\n\t" + "adc x12, x12, xzr\n\t" + /* A[3] * B[1] */ + "mul x3, x17, x20\n\t" + "umulh x4, x17, x20\n\t" + "adds x10, x10, x3\n\t" + "adcs x11, x11, x4\n\t" + "adc x12, x12, xzr\n\t" + /* A[2] * B[3] */ + "mul x3, x16, x22\n\t" + "umulh x4, x16, x22\n\t" + "adds x11, x11, x3\n\t" + "adcs x12, x12, x4\n\t" + "adc x13, xzr, xzr\n\t" + /* A[3] * B[2] */ + "mul x3, x17, x21\n\t" + "umulh x4, x17, x21\n\t" + "adds x11, x11, x3\n\t" + "adcs x12, x12, x4\n\t" + "adc x13, x13, xzr\n\t" + /* A[3] * B[3] */ + "mul x3, x17, x22\n\t" + "umulh x4, x17, x22\n\t" + "adds x12, x12, x3\n\t" + "adc x13, x13, x4\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x13, x13, x12, #63\n\t" + "extr x12, x12, x11, #63\n\t" + "extr x11, x11, x10, #63\n\t" + "extr x10, x10, x9, #63\n\t" + "and x9, x9, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x3, #19\n\t" + "mul x4, x3, x10\n\t" + "umulh x10, x3, x10\n\t" + "adds x6, x6, x4\n\t" + "mul x4, x3, x11\n\t" + "umulh x11, x3, x11\n\t" + "adcs x7, x7, x4\n\t" + "mul x4, x3, x12\n\t" + "umulh x12, x3, x12\n\t" + "adcs x8, x8, x4\n\t" + "mul x4, x3, x13\n\t" + "umulh x5, x3, x13\n\t" + "adcs x9, x9, x4\n\t" + "adc x5, x5, xzr\n\t" + /* Add remaining product results in */ + "adds x7, x7, x10\n\t" + "adcs x8, x8, x11\n\t" + "adcs x9, x9, x12\n\t" + "adc x5, x5, xzr\n\t" + /* Overflow */ + "extr x5, x5, x9, #63\n\t" + "mul x5, x5, x3\n\t" + "and x9, x9, #0x7fffffffffffffff\n\t" + "adds x6, x6, x5\n\t" + "adcs x7, x7, xzr\n\t" + "adcs x8, x8, xzr\n\t" + "adc x9, x9, xzr\n\t" + /* Reduce if top bit set */ + "and x5, x3, x9, asr 63\n\t" + "and x9, x9, #0x7fffffffffffffff\n\t" + "adds x6, x6, x5\n\t" + "adcs x7, x7, xzr\n\t" + "adcs x8, x8, xzr\n\t" + "adc x9, x9, xzr\n\t" + /* Store */ + "stp x6, x7, [%x[r]]\n\t" + "stp x8, x9, [%x[r], #16]\n\t" + "ldp x29, x30, [sp], #16\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22" + ); +} + +void fe_sq(fe r, const fe a) +{ + __asm__ __volatile__ ( + "stp x29, x30, [sp, #-16]!\n\t" + "add x29, sp, #0\n\t" + /* Square */ + "ldp x13, x14, [%x[a]]\n\t" + "ldp x15, x16, [%x[a], #16]\n\t" + /* A[0] * A[1] */ + "mul x6, x13, x14\n\t" + "umulh x7, x13, x14\n\t" + /* A[0] * A[2] */ + "mul x2, x13, x15\n\t" + "umulh x8, x13, x15\n\t" + "adds x7, x7, x2\n\t" + "adc x8, x8, xzr\n\t" + /* A[0] * A[3] */ + "mul x2, x13, x16\n\t" + "umulh x9, x13, x16\n\t" + "adds x8, x8, x2\n\t" + "adc x9, x9, xzr\n\t" + /* A[1] * A[2] */ + "mul x2, x14, x15\n\t" + "umulh x3, x14, x15\n\t" + "adds x8, x8, x2\n\t" + "adcs x9, x9, x3\n\t" + "adc x10, xzr, xzr\n\t" + /* A[1] * A[3] */ + "mul x2, x14, x16\n\t" + "umulh x3, x14, x16\n\t" + "adds x9, x9, x2\n\t" + "adc x10, x10, x3\n\t" + /* A[2] * A[3] */ + "mul x2, x15, x16\n\t" + "umulh x11, x15, x16\n\t" + "adds x10, x10, x2\n\t" + "adc x11, x11, xzr\n\t" + /* Double */ + "adds x6, x6, x6\n\t" + "adcs x7, x7, x7\n\t" + "adcs x8, x8, x8\n\t" + "adcs x9, x9, x9\n\t" + "adcs x10, x10, x10\n\t" + "adcs x11, x11, x11\n\t" + "adc x12, xzr, xzr\n\t" + /* A[0] * A[0] */ + "mul x5, x13, x13\n\t" + "umulh x4, x13, x13\n\t" + /* A[1] * A[1] */ + "mul x2, x14, x14\n\t" + "umulh x3, x14, x14\n\t" + "adds x6, x6, x4\n\t" + "adcs x7, x7, x2\n\t" + "adc x4, x3, xzr\n\t" + /* A[2] * A[2] */ + "mul x2, x15, x15\n\t" + "umulh x3, x15, x15\n\t" + "adds x8, x8, x4\n\t" + "adcs x9, x9, x2\n\t" + "adc x4, x3, xzr\n\t" + /* A[3] * A[3] */ + "mul x2, x16, x16\n\t" + "umulh x3, x16, x16\n\t" + "adds x10, x10, x4\n\t" + "adcs x11, x11, x2\n\t" + "adc x12, x12, x3\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x12, x12, x11, #63\n\t" + "extr x11, x11, x10, #63\n\t" + "extr x10, x10, x9, #63\n\t" + "extr x9, x9, x8, #63\n\t" + "and x8, x8, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x2, #19\n\t" + "mul x3, x2, x9\n\t" + "umulh x9, x2, x9\n\t" + "adds x5, x5, x3\n\t" + "mul x3, x2, x10\n\t" + "umulh x10, x2, x10\n\t" + "adcs x6, x6, x3\n\t" + "mul x3, x2, x11\n\t" + "umulh x11, x2, x11\n\t" + "adcs x7, x7, x3\n\t" + "mul x3, x2, x12\n\t" + "umulh x4, x2, x12\n\t" + "adcs x8, x8, x3\n\t" + "adc x4, x4, xzr\n\t" + /* Add remaining product results in */ + "adds x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adcs x8, x8, x11\n\t" + "adc x4, x4, xzr\n\t" + /* Overflow */ + "extr x4, x4, x8, #63\n\t" + "mul x4, x4, x2\n\t" + "and x8, x8, #0x7fffffffffffffff\n\t" + "adds x5, x5, x4\n\t" + "adcs x6, x6, xzr\n\t" + "adcs x7, x7, xzr\n\t" + "adc x8, x8, xzr\n\t" + /* Reduce if top bit set */ + "and x4, x2, x8, asr 63\n\t" + "and x8, x8, #0x7fffffffffffffff\n\t" + "adds x5, x5, x4\n\t" + "adcs x6, x6, xzr\n\t" + "adcs x7, x7, xzr\n\t" + "adc x8, x8, xzr\n\t" + /* Store */ + "stp x5, x6, [%x[r]]\n\t" + "stp x7, x8, [%x[r], #16]\n\t" + "ldp x29, x30, [sp], #16\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16" + ); +} + +void fe_invert(fe r, const fe a) +{ + __asm__ __volatile__ ( + "stp x29, x30, [sp, #-160]!\n\t" + "add x29, sp, #0\n\t" + /* Invert */ + "str %x[r], [x29, #144]\n\t" + "str %x[a], [x29, #152]\n\t" + "add x0, x29, #16\n\t" + "bl fe_sq\n\t" + "add x0, x29, #48\n\t" + "add x1, x29, #16\n\t" + "bl fe_sq\n\t" + "add x1, x29, #48\n\t" + "bl fe_sq\n\t" + "ldr x1, [x29, #152]\n\t" + "add x2, x29, #48\n\t" + "bl fe_mul\n\t" + "add x0, x29, #16\n\t" + "add x1, x29, #16\n\t" + "add x2, x29, #48\n\t" + "bl fe_mul\n\t" + "add x0, x29, #0x50\n\t" + "bl fe_sq\n\t" + "add x0, x29, #48\n\t" + "add x1, x29, #48\n\t" + "add x2, x29, #0x50\n\t" + "bl fe_mul\n\t" + "add x0, x29, #0x50\n\t" + "bl fe_sq\n\t" + "mov x20, #4\n\t" + "add x1, x29, #0x50\n\t" + "\n" + "L_fe_invert1_%=: \n\t" + "bl fe_sq\n\t" + "sub x20, x20, #1\n\t" + "cmp x20, #0\n\t" + "bne L_fe_invert1_%=\n\t" + "add x0, x29, #48\n\t" + "add x2, x29, #48\n\t" + "bl fe_mul\n\t" + "add x0, x29, #0x50\n\t" + "add x1, x29, #48\n\t" + "bl fe_sq\n\t" + "mov x20, #9\n\t" + "add x1, x29, #0x50\n\t" + "\n" + "L_fe_invert2_%=: \n\t" + "bl fe_sq\n\t" + "sub x20, x20, #1\n\t" + "cmp x20, #0\n\t" + "bne L_fe_invert2_%=\n\t" + "add x2, x29, #48\n\t" + "bl fe_mul\n\t" + "add x0, x29, #0x70\n\t" + "bl fe_sq\n\t" + "mov x20, #19\n\t" + "add x1, x29, #0x70\n\t" + "\n" + "L_fe_invert3_%=: \n\t" + "bl fe_sq\n\t" + "sub x20, x20, #1\n\t" + "cmp x20, #0\n\t" + "bne L_fe_invert3_%=\n\t" + "add x0, x29, #0x50\n\t" + "add x2, x29, #0x50\n\t" + "bl fe_mul\n\t" + "mov x20, #10\n\t" + "add x1, x29, #0x50\n\t" + "\n" + "L_fe_invert4_%=: \n\t" + "bl fe_sq\n\t" + "sub x20, x20, #1\n\t" + "cmp x20, #0\n\t" + "bne L_fe_invert4_%=\n\t" + "add x0, x29, #48\n\t" + "add x2, x29, #48\n\t" + "bl fe_mul\n\t" + "add x0, x29, #0x50\n\t" + "add x1, x29, #48\n\t" + "bl fe_sq\n\t" + "mov x20, #49\n\t" + "add x1, x29, #0x50\n\t" + "\n" + "L_fe_invert5_%=: \n\t" + "bl fe_sq\n\t" + "sub x20, x20, #1\n\t" + "cmp x20, #0\n\t" + "bne L_fe_invert5_%=\n\t" + "add x2, x29, #48\n\t" + "bl fe_mul\n\t" + "add x0, x29, #0x70\n\t" + "bl fe_sq\n\t" + "mov x20, #0x63\n\t" + "add x1, x29, #0x70\n\t" + "\n" + "L_fe_invert6_%=: \n\t" + "bl fe_sq\n\t" + "sub x20, x20, #1\n\t" + "cmp x20, #0\n\t" + "bne L_fe_invert6_%=\n\t" + "add x0, x29, #0x50\n\t" + "add x2, x29, #0x50\n\t" + "bl fe_mul\n\t" + "mov x20, #50\n\t" + "add x1, x29, #0x50\n\t" + "\n" + "L_fe_invert7_%=: \n\t" + "bl fe_sq\n\t" + "sub x20, x20, #1\n\t" + "cmp x20, #0\n\t" + "bne L_fe_invert7_%=\n\t" + "add x0, x29, #48\n\t" + "add x2, x29, #48\n\t" + "bl fe_mul\n\t" + "mov x20, #5\n\t" + "add x1, x29, #48\n\t" + "\n" + "L_fe_invert8_%=: \n\t" + "bl fe_sq\n\t" + "sub x20, x20, #1\n\t" + "cmp x20, #0\n\t" + "bne L_fe_invert8_%=\n\t" + "ldr x0, [x29, #144]\n\t" + "add x2, x29, #16\n\t" + "bl fe_mul\n\t" + "ldp x29, x30, [sp], #0xa0\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "x20" + ); +} + +int curve25519(byte* r, byte* n, byte* a) +{ + __asm__ __volatile__ ( + "stp x29, x30, [sp, #-192]!\n\t" + "add x29, sp, #0\n\t" + "mov x23, xzr\n\t" + "str %x[r], [x29, #176]\n\t" + "str %x[a], [x29, #184]\n\t" + /* Copy */ + "ldp x6, x7, [%x[a]]\n\t" + "ldp x8, x9, [%x[a], #16]\n\t" + "stp x6, x7, [x29, #80]\n\t" + "stp x8, x9, [x29, #96]\n\t" + /* Set one */ + "mov %x[a], #1\n\t" + "stp %x[a], xzr, [%x[r]]\n\t" + "stp xzr, xzr, [%x[r], #16]\n\t" + /* Set zero */ + "stp xzr, xzr, [x29, #16]\n\t" + "stp xzr, xzr, [x29, #32]\n\t" + /* Set one */ + "mov %x[a], #1\n\t" + "stp %x[a], xzr, [x29, #48]\n\t" + "stp xzr, xzr, [x29, #64]\n\t" + "mov x25, #62\n\t" + "mov x24, #24\n\t" + "\n" + "L_curve25519_words_%=: \n\t" + "\n" + "L_curve25519_bits_%=: \n\t" + "ldr %x[a], [%x[n], x24]\n\t" + "lsr %x[a], %x[a], x25\n\t" + "and %x[a], %x[a], #1\n\t" + "eor x23, x23, %x[a]\n\t" + /* Conditional Swap */ + "cmp x23, #1\n\t" + "ldp x10, x11, [%x[r]]\n\t" + "ldp x12, x13, [%x[r], #16]\n\t" + "ldp x6, x7, [x29, #80]\n\t" + "ldp x8, x9, [x29, #96]\n\t" + "csel x14, x10, x6, eq\n\t" + "csel x10, x6, x10, eq\n\t" + "csel x15, x11, x7, eq\n\t" + "csel x11, x7, x11, eq\n\t" + "csel x16, x12, x8, eq\n\t" + "csel x12, x8, x12, eq\n\t" + "csel x17, x13, x9, eq\n\t" + "csel x13, x9, x13, eq\n\t" + /* Conditional Swap */ + "cmp x23, #1\n\t" + "ldp x19, x20, [x29, #16]\n\t" + "ldp x21, x22, [x29, #32]\n\t" + "ldp x6, x7, [x29, #48]\n\t" + "ldp x8, x9, [x29, #64]\n\t" + "csel x5, x19, x6, eq\n\t" + "csel x19, x6, x19, eq\n\t" + "csel x26, x20, x7, eq\n\t" + "csel x20, x7, x20, eq\n\t" + "csel x27, x21, x8, eq\n\t" + "csel x21, x8, x21, eq\n\t" + "csel x28, x22, x9, eq\n\t" + "csel x22, x9, x22, eq\n\t" + "mov x23, %x[a]\n\t" + /* Add */ + "adds x6, x10, x19\n\t" + "adcs x7, x11, x20\n\t" + "adcs x8, x12, x21\n\t" + "adc x9, x13, x22\n\t" + "mov x3, #-19\n\t" + "asr %x[a], x9, #63\n\t" + /* Mask the modulus */ + "and x3, %x[a], x3\n\t" + "and x4, %x[a], #0x7fffffffffffffff\n\t" + /* Sub modulus (if overflow) */ + "subs x6, x6, x3\n\t" + "sbcs x7, x7, %x[a]\n\t" + "sbcs x8, x8, %x[a]\n\t" + "sbc x9, x9, x4\n\t" + /* Sub */ + "subs x19, x10, x19\n\t" + "sbcs x20, x11, x20\n\t" + "sbcs x21, x12, x21\n\t" + "sbcs x22, x13, x22\n\t" + "mov x3, #-19\n\t" + "csetm %x[a], cc\n\t" + /* Mask the modulus */ + "and x3, %x[a], x3\n\t" + "and x4, %x[a], #0x7fffffffffffffff\n\t" + /* Add modulus (if underflow) */ + "adds x19, x19, x3\n\t" + "adcs x20, x20, %x[a]\n\t" + "adcs x21, x21, %x[a]\n\t" + "adc x22, x22, x4\n\t" + "stp x19, x20, [x29, #144]\n\t" + "stp x21, x22, [x29, #160]\n\t" + /* Add */ + "adds x10, x14, x5\n\t" + "adcs x11, x15, x26\n\t" + "adcs x12, x16, x27\n\t" + "adc x13, x17, x28\n\t" + "mov x3, #-19\n\t" + "asr %x[a], x13, #63\n\t" + /* Mask the modulus */ + "and x3, %x[a], x3\n\t" + "and x4, %x[a], #0x7fffffffffffffff\n\t" + /* Sub modulus (if overflow) */ + "subs x10, x10, x3\n\t" + "sbcs x11, x11, %x[a]\n\t" + "sbcs x12, x12, %x[a]\n\t" + "sbc x13, x13, x4\n\t" + /* Sub */ + "subs x14, x14, x5\n\t" + "sbcs x15, x15, x26\n\t" + "sbcs x16, x16, x27\n\t" + "sbcs x17, x17, x28\n\t" + "mov x3, #-19\n\t" + "csetm %x[a], cc\n\t" + /* Mask the modulus */ + "and x3, %x[a], x3\n\t" + "and x4, %x[a], #0x7fffffffffffffff\n\t" + /* Add modulus (if underflow) */ + "adds x14, x14, x3\n\t" + "adcs x15, x15, %x[a]\n\t" + "adcs x16, x16, %x[a]\n\t" + "adc x17, x17, x4\n\t" + /* Multiply */ + /* A[0] * B[0] */ + "mul x19, x14, x6\n\t" + "umulh x20, x14, x6\n\t" + /* A[0] * B[1] */ + "mul x3, x14, x7\n\t" + "umulh x21, x14, x7\n\t" + "adds x20, x20, x3\n\t" + "adc x21, x21, xzr\n\t" + /* A[1] * B[0] */ + "mul x3, x15, x6\n\t" + "umulh x4, x15, x6\n\t" + "adds x20, x20, x3\n\t" + "adcs x21, x21, x4\n\t" + "adc x22, xzr, xzr\n\t" + /* A[0] * B[2] */ + "mul x3, x14, x8\n\t" + "umulh x4, x14, x8\n\t" + "adds x21, x21, x3\n\t" + "adc x22, x22, x4\n\t" + /* A[1] * B[1] */ + "mul x3, x15, x7\n\t" + "umulh x4, x15, x7\n\t" + "adds x21, x21, x3\n\t" + "adcs x22, x22, x4\n\t" + "adc %x[a], xzr, xzr\n\t" + /* A[2] * B[0] */ + "mul x3, x16, x6\n\t" + "umulh x4, x16, x6\n\t" + "adds x21, x21, x3\n\t" + "adcs x22, x22, x4\n\t" + "adc %x[a], %x[a], xzr\n\t" + /* A[0] * B[3] */ + "mul x3, x14, x9\n\t" + "umulh x4, x14, x9\n\t" + "adds x22, x22, x3\n\t" + "adcs %x[a], %x[a], x4\n\t" + "adc x26, xzr, xzr\n\t" + /* A[1] * B[2] */ + "mul x3, x15, x8\n\t" + "umulh x4, x15, x8\n\t" + "adds x22, x22, x3\n\t" + "adcs %x[a], %x[a], x4\n\t" + "adc x26, x26, xzr\n\t" + /* A[2] * B[1] */ + "mul x3, x16, x7\n\t" + "umulh x4, x16, x7\n\t" + "adds x22, x22, x3\n\t" + "adcs %x[a], %x[a], x4\n\t" + "adc x26, x26, xzr\n\t" + /* A[3] * B[0] */ + "mul x3, x17, x6\n\t" + "umulh x4, x17, x6\n\t" + "adds x22, x22, x3\n\t" + "adcs %x[a], %x[a], x4\n\t" + "adc x26, x26, xzr\n\t" + /* A[1] * B[3] */ + "mul x3, x15, x9\n\t" + "umulh x4, x15, x9\n\t" + "adds %x[a], %x[a], x3\n\t" + "adcs x26, x26, x4\n\t" + "adc x27, xzr, xzr\n\t" + /* A[2] * B[2] */ + "mul x3, x16, x8\n\t" + "umulh x4, x16, x8\n\t" + "adds %x[a], %x[a], x3\n\t" + "adcs x26, x26, x4\n\t" + "adc x27, x27, xzr\n\t" + /* A[3] * B[1] */ + "mul x3, x17, x7\n\t" + "umulh x4, x17, x7\n\t" + "adds %x[a], %x[a], x3\n\t" + "adcs x26, x26, x4\n\t" + "adc x27, x27, xzr\n\t" + /* A[2] * B[3] */ + "mul x3, x16, x9\n\t" + "umulh x4, x16, x9\n\t" + "adds x26, x26, x3\n\t" + "adcs x27, x27, x4\n\t" + "adc x28, xzr, xzr\n\t" + /* A[3] * B[2] */ + "mul x3, x17, x8\n\t" + "umulh x4, x17, x8\n\t" + "adds x26, x26, x3\n\t" + "adcs x27, x27, x4\n\t" + "adc x28, x28, xzr\n\t" + /* A[3] * B[3] */ + "mul x3, x17, x9\n\t" + "umulh x4, x17, x9\n\t" + "adds x27, x27, x3\n\t" + "adc x28, x28, x4\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x28, x28, x27, #63\n\t" + "extr x27, x27, x26, #63\n\t" + "extr x26, x26, %x[a], #63\n\t" + "extr %x[a], %x[a], x22, #63\n\t" + "and x22, x22, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x3, #19\n\t" + "mul x4, x3, %x[a]\n\t" + "umulh %x[a], x3, %x[a]\n\t" + "adds x19, x19, x4\n\t" + "mul x4, x3, x26\n\t" + "umulh x26, x3, x26\n\t" + "adcs x20, x20, x4\n\t" + "mul x4, x3, x27\n\t" + "umulh x27, x3, x27\n\t" + "adcs x21, x21, x4\n\t" + "mul x4, x3, x28\n\t" + "umulh x5, x3, x28\n\t" + "adcs x22, x22, x4\n\t" + "adc x5, x5, xzr\n\t" + /* Add remaining product results in */ + "adds x20, x20, %x[a]\n\t" + "adcs x21, x21, x26\n\t" + "adcs x22, x22, x27\n\t" + "adc x5, x5, xzr\n\t" + /* Overflow */ + "extr x5, x5, x22, #63\n\t" + "mul x5, x5, x3\n\t" + "and x22, x22, #0x7fffffffffffffff\n\t" + "adds x19, x19, x5\n\t" + "adcs x20, x20, xzr\n\t" + "adcs x21, x21, xzr\n\t" + "adc x22, x22, xzr\n\t" + /* Reduce if top bit set */ + "and x5, x3, x22, asr 63\n\t" + "and x22, x22, #0x7fffffffffffffff\n\t" + "adds x19, x19, x5\n\t" + "adcs x20, x20, xzr\n\t" + "adcs x21, x21, xzr\n\t" + "adc x22, x22, xzr\n\t" + /* Store */ + "stp x19, x20, [x29, #112]\n\t" + "stp x21, x22, [x29, #128]\n\t" + /* Multiply */ + "ldp %x[a], x26, [x29, #144]\n\t" + "ldp x27, x28, [x29, #160]\n\t" + /* A[0] * B[0] */ + "mul x19, x10, %x[a]\n\t" + "umulh x20, x10, %x[a]\n\t" + /* A[0] * B[1] */ + "mul x3, x10, x26\n\t" + "umulh x21, x10, x26\n\t" + "adds x20, x20, x3\n\t" + "adc x21, x21, xzr\n\t" + /* A[1] * B[0] */ + "mul x3, x11, %x[a]\n\t" + "umulh x4, x11, %x[a]\n\t" + "adds x20, x20, x3\n\t" + "adcs x21, x21, x4\n\t" + "adc x22, xzr, xzr\n\t" + /* A[0] * B[2] */ + "mul x3, x10, x27\n\t" + "umulh x4, x10, x27\n\t" + "adds x21, x21, x3\n\t" + "adc x22, x22, x4\n\t" + /* A[1] * B[1] */ + "mul x3, x11, x26\n\t" + "umulh x4, x11, x26\n\t" + "adds x21, x21, x3\n\t" + "adcs x22, x22, x4\n\t" + "adc x14, xzr, xzr\n\t" + /* A[2] * B[0] */ + "mul x3, x12, %x[a]\n\t" + "umulh x4, x12, %x[a]\n\t" + "adds x21, x21, x3\n\t" + "adcs x22, x22, x4\n\t" + "adc x14, x14, xzr\n\t" + /* A[0] * B[3] */ + "mul x3, x10, x28\n\t" + "umulh x4, x10, x28\n\t" + "adds x22, x22, x3\n\t" + "adcs x14, x14, x4\n\t" + "adc x15, xzr, xzr\n\t" + /* A[1] * B[2] */ + "mul x3, x11, x27\n\t" + "umulh x4, x11, x27\n\t" + "adds x22, x22, x3\n\t" + "adcs x14, x14, x4\n\t" + "adc x15, x15, xzr\n\t" + /* A[2] * B[1] */ + "mul x3, x12, x26\n\t" + "umulh x4, x12, x26\n\t" + "adds x22, x22, x3\n\t" + "adcs x14, x14, x4\n\t" + "adc x15, x15, xzr\n\t" + /* A[3] * B[0] */ + "mul x3, x13, %x[a]\n\t" + "umulh x4, x13, %x[a]\n\t" + "adds x22, x22, x3\n\t" + "adcs x14, x14, x4\n\t" + "adc x15, x15, xzr\n\t" + /* A[1] * B[3] */ + "mul x3, x11, x28\n\t" + "umulh x4, x11, x28\n\t" + "adds x14, x14, x3\n\t" + "adcs x15, x15, x4\n\t" + "adc x16, xzr, xzr\n\t" + /* A[2] * B[2] */ + "mul x3, x12, x27\n\t" + "umulh x4, x12, x27\n\t" + "adds x14, x14, x3\n\t" + "adcs x15, x15, x4\n\t" + "adc x16, x16, xzr\n\t" + /* A[3] * B[1] */ + "mul x3, x13, x26\n\t" + "umulh x4, x13, x26\n\t" + "adds x14, x14, x3\n\t" + "adcs x15, x15, x4\n\t" + "adc x16, x16, xzr\n\t" + /* A[2] * B[3] */ + "mul x3, x12, x28\n\t" + "umulh x4, x12, x28\n\t" + "adds x15, x15, x3\n\t" + "adcs x16, x16, x4\n\t" + "adc x17, xzr, xzr\n\t" + /* A[3] * B[2] */ + "mul x3, x13, x27\n\t" + "umulh x4, x13, x27\n\t" + "adds x15, x15, x3\n\t" + "adcs x16, x16, x4\n\t" + "adc x17, x17, xzr\n\t" + /* A[3] * B[3] */ + "mul x3, x13, x28\n\t" + "umulh x4, x13, x28\n\t" + "adds x16, x16, x3\n\t" + "adc x17, x17, x4\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x17, x17, x16, #63\n\t" + "extr x16, x16, x15, #63\n\t" + "extr x15, x15, x14, #63\n\t" + "extr x14, x14, x22, #63\n\t" + "and x22, x22, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x3, #19\n\t" + "mul x4, x3, x14\n\t" + "umulh x14, x3, x14\n\t" + "adds x19, x19, x4\n\t" + "mul x4, x3, x15\n\t" + "umulh x15, x3, x15\n\t" + "adcs x20, x20, x4\n\t" + "mul x4, x3, x16\n\t" + "umulh x16, x3, x16\n\t" + "adcs x21, x21, x4\n\t" + "mul x4, x3, x17\n\t" + "umulh x5, x3, x17\n\t" + "adcs x22, x22, x4\n\t" + "adc x5, x5, xzr\n\t" + /* Add remaining product results in */ + "adds x20, x20, x14\n\t" + "adcs x21, x21, x15\n\t" + "adcs x22, x22, x16\n\t" + "adc x5, x5, xzr\n\t" + /* Overflow */ + "extr x5, x5, x22, #63\n\t" + "mul x5, x5, x3\n\t" + "and x22, x22, #0x7fffffffffffffff\n\t" + "adds x19, x19, x5\n\t" + "adcs x20, x20, xzr\n\t" + "adcs x21, x21, xzr\n\t" + "adc x22, x22, xzr\n\t" + /* Reduce if top bit set */ + "and x5, x3, x22, asr 63\n\t" + "and x22, x22, #0x7fffffffffffffff\n\t" + "adds x19, x19, x5\n\t" + "adcs x20, x20, xzr\n\t" + "adcs x21, x21, xzr\n\t" + "adc x22, x22, xzr\n\t" + /* Store */ + /* Square */ + /* A[0] * A[1] */ + "mul x11, %x[a], x26\n\t" + "umulh x12, %x[a], x26\n\t" + /* A[0] * A[2] */ + "mul x3, %x[a], x27\n\t" + "umulh x13, %x[a], x27\n\t" + "adds x12, x12, x3\n\t" + "adc x13, x13, xzr\n\t" + /* A[0] * A[3] */ + "mul x3, %x[a], x28\n\t" + "umulh x14, %x[a], x28\n\t" + "adds x13, x13, x3\n\t" + "adc x14, x14, xzr\n\t" + /* A[1] * A[2] */ + "mul x3, x26, x27\n\t" + "umulh x4, x26, x27\n\t" + "adds x13, x13, x3\n\t" + "adcs x14, x14, x4\n\t" + "adc x15, xzr, xzr\n\t" + /* A[1] * A[3] */ + "mul x3, x26, x28\n\t" + "umulh x4, x26, x28\n\t" + "adds x14, x14, x3\n\t" + "adc x15, x15, x4\n\t" + /* A[2] * A[3] */ + "mul x3, x27, x28\n\t" + "umulh x16, x27, x28\n\t" + "adds x15, x15, x3\n\t" + "adc x16, x16, xzr\n\t" + /* Double */ + "adds x11, x11, x11\n\t" + "adcs x12, x12, x12\n\t" + "adcs x13, x13, x13\n\t" + "adcs x14, x14, x14\n\t" + "adcs x15, x15, x15\n\t" + "adcs x16, x16, x16\n\t" + "adc x17, xzr, xzr\n\t" + /* A[0] * A[0] */ + "mul x10, %x[a], %x[a]\n\t" + "umulh x5, %x[a], %x[a]\n\t" + /* A[1] * A[1] */ + "mul x3, x26, x26\n\t" + "umulh x4, x26, x26\n\t" + "adds x11, x11, x5\n\t" + "adcs x12, x12, x3\n\t" + "adc x5, x4, xzr\n\t" + /* A[2] * A[2] */ + "mul x3, x27, x27\n\t" + "umulh x4, x27, x27\n\t" + "adds x13, x13, x5\n\t" + "adcs x14, x14, x3\n\t" + "adc x5, x4, xzr\n\t" + /* A[3] * A[3] */ + "mul x3, x28, x28\n\t" + "umulh x4, x28, x28\n\t" + "adds x15, x15, x5\n\t" + "adcs x16, x16, x3\n\t" + "adc x17, x17, x4\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x17, x17, x16, #63\n\t" + "extr x16, x16, x15, #63\n\t" + "extr x15, x15, x14, #63\n\t" + "extr x14, x14, x13, #63\n\t" + "and x13, x13, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x3, #19\n\t" + "mul x4, x3, x14\n\t" + "umulh x14, x3, x14\n\t" + "adds x10, x10, x4\n\t" + "mul x4, x3, x15\n\t" + "umulh x15, x3, x15\n\t" + "adcs x11, x11, x4\n\t" + "mul x4, x3, x16\n\t" + "umulh x16, x3, x16\n\t" + "adcs x12, x12, x4\n\t" + "mul x4, x3, x17\n\t" + "umulh x5, x3, x17\n\t" + "adcs x13, x13, x4\n\t" + "adc x5, x5, xzr\n\t" + /* Add remaining product results in */ + "adds x11, x11, x14\n\t" + "adcs x12, x12, x15\n\t" + "adcs x13, x13, x16\n\t" + "adc x5, x5, xzr\n\t" + /* Overflow */ + "extr x5, x5, x13, #63\n\t" + "mul x5, x5, x3\n\t" + "and x13, x13, #0x7fffffffffffffff\n\t" + "adds x10, x10, x5\n\t" + "adcs x11, x11, xzr\n\t" + "adcs x12, x12, xzr\n\t" + "adc x13, x13, xzr\n\t" + /* Reduce if top bit set */ + "and x5, x3, x13, asr 63\n\t" + "and x13, x13, #0x7fffffffffffffff\n\t" + "adds x10, x10, x5\n\t" + "adcs x11, x11, xzr\n\t" + "adcs x12, x12, xzr\n\t" + "adc x13, x13, xzr\n\t" + /* Store */ + /* Square */ + /* A[0] * A[1] */ + "mul x15, x6, x7\n\t" + "umulh x16, x6, x7\n\t" + /* A[0] * A[2] */ + "mul x3, x6, x8\n\t" + "umulh x17, x6, x8\n\t" + "adds x16, x16, x3\n\t" + "adc x17, x17, xzr\n\t" + /* A[0] * A[3] */ + "mul x3, x6, x9\n\t" + "umulh %x[a], x6, x9\n\t" + "adds x17, x17, x3\n\t" + "adc %x[a], %x[a], xzr\n\t" + /* A[1] * A[2] */ + "mul x3, x7, x8\n\t" + "umulh x4, x7, x8\n\t" + "adds x17, x17, x3\n\t" + "adcs %x[a], %x[a], x4\n\t" + "adc x26, xzr, xzr\n\t" + /* A[1] * A[3] */ + "mul x3, x7, x9\n\t" + "umulh x4, x7, x9\n\t" + "adds %x[a], %x[a], x3\n\t" + "adc x26, x26, x4\n\t" + /* A[2] * A[3] */ + "mul x3, x8, x9\n\t" + "umulh x27, x8, x9\n\t" + "adds x26, x26, x3\n\t" + "adc x27, x27, xzr\n\t" + /* Double */ + "adds x15, x15, x15\n\t" + "adcs x16, x16, x16\n\t" + "adcs x17, x17, x17\n\t" + "adcs %x[a], %x[a], %x[a]\n\t" + "adcs x26, x26, x26\n\t" + "adcs x27, x27, x27\n\t" + "adc x28, xzr, xzr\n\t" + /* A[0] * A[0] */ + "mul x14, x6, x6\n\t" + "umulh x5, x6, x6\n\t" + /* A[1] * A[1] */ + "mul x3, x7, x7\n\t" + "umulh x4, x7, x7\n\t" + "adds x15, x15, x5\n\t" + "adcs x16, x16, x3\n\t" + "adc x5, x4, xzr\n\t" + /* A[2] * A[2] */ + "mul x3, x8, x8\n\t" + "umulh x4, x8, x8\n\t" + "adds x17, x17, x5\n\t" + "adcs %x[a], %x[a], x3\n\t" + "adc x5, x4, xzr\n\t" + /* A[3] * A[3] */ + "mul x3, x9, x9\n\t" + "umulh x4, x9, x9\n\t" + "adds x26, x26, x5\n\t" + "adcs x27, x27, x3\n\t" + "adc x28, x28, x4\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x28, x28, x27, #63\n\t" + "extr x27, x27, x26, #63\n\t" + "extr x26, x26, %x[a], #63\n\t" + "extr %x[a], %x[a], x17, #63\n\t" + "and x17, x17, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x3, #19\n\t" + "mul x4, x3, %x[a]\n\t" + "umulh %x[a], x3, %x[a]\n\t" + "adds x14, x14, x4\n\t" + "mul x4, x3, x26\n\t" + "umulh x26, x3, x26\n\t" + "adcs x15, x15, x4\n\t" + "mul x4, x3, x27\n\t" + "umulh x27, x3, x27\n\t" + "adcs x16, x16, x4\n\t" + "mul x4, x3, x28\n\t" + "umulh x5, x3, x28\n\t" + "adcs x17, x17, x4\n\t" + "adc x5, x5, xzr\n\t" + /* Add remaining product results in */ + "adds x15, x15, %x[a]\n\t" + "adcs x16, x16, x26\n\t" + "adcs x17, x17, x27\n\t" + "adc x5, x5, xzr\n\t" + /* Overflow */ + "extr x5, x5, x17, #63\n\t" + "mul x5, x5, x3\n\t" + "and x17, x17, #0x7fffffffffffffff\n\t" + "adds x14, x14, x5\n\t" + "adcs x15, x15, xzr\n\t" + "adcs x16, x16, xzr\n\t" + "adc x17, x17, xzr\n\t" + /* Reduce if top bit set */ + "and x5, x3, x17, asr 63\n\t" + "and x17, x17, #0x7fffffffffffffff\n\t" + "adds x14, x14, x5\n\t" + "adcs x15, x15, xzr\n\t" + "adcs x16, x16, xzr\n\t" + "adc x17, x17, xzr\n\t" + /* Store */ + /* Multiply */ + /* A[0] * B[0] */ + "mul x6, x14, x10\n\t" + "umulh x7, x14, x10\n\t" + /* A[0] * B[1] */ + "mul x3, x14, x11\n\t" + "umulh x8, x14, x11\n\t" + "adds x7, x7, x3\n\t" + "adc x8, x8, xzr\n\t" + /* A[1] * B[0] */ + "mul x3, x15, x10\n\t" + "umulh x4, x15, x10\n\t" + "adds x7, x7, x3\n\t" + "adcs x8, x8, x4\n\t" + "adc x9, xzr, xzr\n\t" + /* A[0] * B[2] */ + "mul x3, x14, x12\n\t" + "umulh x4, x14, x12\n\t" + "adds x8, x8, x3\n\t" + "adc x9, x9, x4\n\t" + /* A[1] * B[1] */ + "mul x3, x15, x11\n\t" + "umulh x4, x15, x11\n\t" + "adds x8, x8, x3\n\t" + "adcs x9, x9, x4\n\t" + "adc %x[a], xzr, xzr\n\t" + /* A[2] * B[0] */ + "mul x3, x16, x10\n\t" + "umulh x4, x16, x10\n\t" + "adds x8, x8, x3\n\t" + "adcs x9, x9, x4\n\t" + "adc %x[a], %x[a], xzr\n\t" + /* A[0] * B[3] */ + "mul x3, x14, x13\n\t" + "umulh x4, x14, x13\n\t" + "adds x9, x9, x3\n\t" + "adcs %x[a], %x[a], x4\n\t" + "adc x26, xzr, xzr\n\t" + /* A[1] * B[2] */ + "mul x3, x15, x12\n\t" + "umulh x4, x15, x12\n\t" + "adds x9, x9, x3\n\t" + "adcs %x[a], %x[a], x4\n\t" + "adc x26, x26, xzr\n\t" + /* A[2] * B[1] */ + "mul x3, x16, x11\n\t" + "umulh x4, x16, x11\n\t" + "adds x9, x9, x3\n\t" + "adcs %x[a], %x[a], x4\n\t" + "adc x26, x26, xzr\n\t" + /* A[3] * B[0] */ + "mul x3, x17, x10\n\t" + "umulh x4, x17, x10\n\t" + "adds x9, x9, x3\n\t" + "adcs %x[a], %x[a], x4\n\t" + "adc x26, x26, xzr\n\t" + /* A[1] * B[3] */ + "mul x3, x15, x13\n\t" + "umulh x4, x15, x13\n\t" + "adds %x[a], %x[a], x3\n\t" + "adcs x26, x26, x4\n\t" + "adc x27, xzr, xzr\n\t" + /* A[2] * B[2] */ + "mul x3, x16, x12\n\t" + "umulh x4, x16, x12\n\t" + "adds %x[a], %x[a], x3\n\t" + "adcs x26, x26, x4\n\t" + "adc x27, x27, xzr\n\t" + /* A[3] * B[1] */ + "mul x3, x17, x11\n\t" + "umulh x4, x17, x11\n\t" + "adds %x[a], %x[a], x3\n\t" + "adcs x26, x26, x4\n\t" + "adc x27, x27, xzr\n\t" + /* A[2] * B[3] */ + "mul x3, x16, x13\n\t" + "umulh x4, x16, x13\n\t" + "adds x26, x26, x3\n\t" + "adcs x27, x27, x4\n\t" + "adc x28, xzr, xzr\n\t" + /* A[3] * B[2] */ + "mul x3, x17, x12\n\t" + "umulh x4, x17, x12\n\t" + "adds x26, x26, x3\n\t" + "adcs x27, x27, x4\n\t" + "adc x28, x28, xzr\n\t" + /* A[3] * B[3] */ + "mul x3, x17, x13\n\t" + "umulh x4, x17, x13\n\t" + "adds x27, x27, x3\n\t" + "adc x28, x28, x4\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x28, x28, x27, #63\n\t" + "extr x27, x27, x26, #63\n\t" + "extr x26, x26, %x[a], #63\n\t" + "extr %x[a], %x[a], x9, #63\n\t" + "and x9, x9, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x3, #19\n\t" + "mul x4, x3, %x[a]\n\t" + "umulh %x[a], x3, %x[a]\n\t" + "adds x6, x6, x4\n\t" + "mul x4, x3, x26\n\t" + "umulh x26, x3, x26\n\t" + "adcs x7, x7, x4\n\t" + "mul x4, x3, x27\n\t" + "umulh x27, x3, x27\n\t" + "adcs x8, x8, x4\n\t" + "mul x4, x3, x28\n\t" + "umulh x5, x3, x28\n\t" + "adcs x9, x9, x4\n\t" + "adc x5, x5, xzr\n\t" + /* Add remaining product results in */ + "adds x7, x7, %x[a]\n\t" + "adcs x8, x8, x26\n\t" + "adcs x9, x9, x27\n\t" + "adc x5, x5, xzr\n\t" + /* Overflow */ + "extr x5, x5, x9, #63\n\t" + "mul x5, x5, x3\n\t" + "and x9, x9, #0x7fffffffffffffff\n\t" + "adds x6, x6, x5\n\t" + "adcs x7, x7, xzr\n\t" + "adcs x8, x8, xzr\n\t" + "adc x9, x9, xzr\n\t" + /* Reduce if top bit set */ + "and x5, x3, x9, asr 63\n\t" + "and x9, x9, #0x7fffffffffffffff\n\t" + "adds x6, x6, x5\n\t" + "adcs x7, x7, xzr\n\t" + "adcs x8, x8, xzr\n\t" + "adc x9, x9, xzr\n\t" + /* Store */ + "stp x6, x7, [%x[r]]\n\t" + "stp x8, x9, [%x[r], #16]\n\t" + /* Sub */ + "subs x14, x14, x10\n\t" + "sbcs x15, x15, x11\n\t" + "sbcs x16, x16, x12\n\t" + "sbcs x17, x17, x13\n\t" + "mov x3, #-19\n\t" + "csetm %x[a], cc\n\t" + /* Mask the modulus */ + "and x3, %x[a], x3\n\t" + "and x4, %x[a], #0x7fffffffffffffff\n\t" + /* Add modulus (if underflow) */ + "adds x14, x14, x3\n\t" + "adcs x15, x15, %x[a]\n\t" + "adcs x16, x16, %x[a]\n\t" + "adc x17, x17, x4\n\t" + /* Multiply by 121666 */ + "mov x5, #0xdb42\n\t" + "movk x5, #1, lsl 16\n\t" + "mul x6, x14, x5\n\t" + "umulh x7, x14, x5\n\t" + "mul x3, x15, x5\n\t" + "umulh x4, x15, x5\n\t" + "adds x7, x7, x3\n\t" + "adc x8, xzr, x4\n\t" + "mul x3, x16, x5\n\t" + "umulh x4, x16, x5\n\t" + "adds x8, x8, x3\n\t" + "adc x9, xzr, x4\n\t" + "mul x3, x17, x5\n\t" + "umulh x4, x17, x5\n\t" + "adds x9, x9, x3\n\t" + "adc x4, xzr, x4\n\t" + "mov x5, #19\n\t" + "extr x4, x4, x9, #63\n\t" + "mul x4, x4, x5\n\t" + "and x9, x9, #0x7fffffffffffffff\n\t" + "adds x6, x6, x4\n\t" + "adcs x7, x7, xzr\n\t" + "adcs x8, x8, xzr\n\t" + "adc x9, x9, xzr\n\t" + /* Add */ + "adds x10, x10, x6\n\t" + "adcs x11, x11, x7\n\t" + "adcs x12, x12, x8\n\t" + "adc x13, x13, x9\n\t" + "mov x3, #-19\n\t" + "asr %x[a], x13, #63\n\t" + /* Mask the modulus */ + "and x3, %x[a], x3\n\t" + "and x4, %x[a], #0x7fffffffffffffff\n\t" + /* Sub modulus (if overflow) */ + "subs x10, x10, x3\n\t" + "sbcs x11, x11, %x[a]\n\t" + "sbcs x12, x12, %x[a]\n\t" + "sbc x13, x13, x4\n\t" + /* Multiply */ + /* A[0] * B[0] */ + "mul x6, x14, x10\n\t" + "umulh x7, x14, x10\n\t" + /* A[0] * B[1] */ + "mul x3, x14, x11\n\t" + "umulh x8, x14, x11\n\t" + "adds x7, x7, x3\n\t" + "adc x8, x8, xzr\n\t" + /* A[1] * B[0] */ + "mul x3, x15, x10\n\t" + "umulh x4, x15, x10\n\t" + "adds x7, x7, x3\n\t" + "adcs x8, x8, x4\n\t" + "adc x9, xzr, xzr\n\t" + /* A[0] * B[2] */ + "mul x3, x14, x12\n\t" + "umulh x4, x14, x12\n\t" + "adds x8, x8, x3\n\t" + "adc x9, x9, x4\n\t" + /* A[1] * B[1] */ + "mul x3, x15, x11\n\t" + "umulh x4, x15, x11\n\t" + "adds x8, x8, x3\n\t" + "adcs x9, x9, x4\n\t" + "adc %x[a], xzr, xzr\n\t" + /* A[2] * B[0] */ + "mul x3, x16, x10\n\t" + "umulh x4, x16, x10\n\t" + "adds x8, x8, x3\n\t" + "adcs x9, x9, x4\n\t" + "adc %x[a], %x[a], xzr\n\t" + /* A[0] * B[3] */ + "mul x3, x14, x13\n\t" + "umulh x4, x14, x13\n\t" + "adds x9, x9, x3\n\t" + "adcs %x[a], %x[a], x4\n\t" + "adc x26, xzr, xzr\n\t" + /* A[1] * B[2] */ + "mul x3, x15, x12\n\t" + "umulh x4, x15, x12\n\t" + "adds x9, x9, x3\n\t" + "adcs %x[a], %x[a], x4\n\t" + "adc x26, x26, xzr\n\t" + /* A[2] * B[1] */ + "mul x3, x16, x11\n\t" + "umulh x4, x16, x11\n\t" + "adds x9, x9, x3\n\t" + "adcs %x[a], %x[a], x4\n\t" + "adc x26, x26, xzr\n\t" + /* A[3] * B[0] */ + "mul x3, x17, x10\n\t" + "umulh x4, x17, x10\n\t" + "adds x9, x9, x3\n\t" + "adcs %x[a], %x[a], x4\n\t" + "adc x26, x26, xzr\n\t" + /* A[1] * B[3] */ + "mul x3, x15, x13\n\t" + "umulh x4, x15, x13\n\t" + "adds %x[a], %x[a], x3\n\t" + "adcs x26, x26, x4\n\t" + "adc x27, xzr, xzr\n\t" + /* A[2] * B[2] */ + "mul x3, x16, x12\n\t" + "umulh x4, x16, x12\n\t" + "adds %x[a], %x[a], x3\n\t" + "adcs x26, x26, x4\n\t" + "adc x27, x27, xzr\n\t" + /* A[3] * B[1] */ + "mul x3, x17, x11\n\t" + "umulh x4, x17, x11\n\t" + "adds %x[a], %x[a], x3\n\t" + "adcs x26, x26, x4\n\t" + "adc x27, x27, xzr\n\t" + /* A[2] * B[3] */ + "mul x3, x16, x13\n\t" + "umulh x4, x16, x13\n\t" + "adds x26, x26, x3\n\t" + "adcs x27, x27, x4\n\t" + "adc x28, xzr, xzr\n\t" + /* A[3] * B[2] */ + "mul x3, x17, x12\n\t" + "umulh x4, x17, x12\n\t" + "adds x26, x26, x3\n\t" + "adcs x27, x27, x4\n\t" + "adc x28, x28, xzr\n\t" + /* A[3] * B[3] */ + "mul x3, x17, x13\n\t" + "umulh x4, x17, x13\n\t" + "adds x27, x27, x3\n\t" + "adc x28, x28, x4\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x28, x28, x27, #63\n\t" + "extr x27, x27, x26, #63\n\t" + "extr x26, x26, %x[a], #63\n\t" + "extr %x[a], %x[a], x9, #63\n\t" + "and x9, x9, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x3, #19\n\t" + "mul x4, x3, %x[a]\n\t" + "umulh %x[a], x3, %x[a]\n\t" + "adds x6, x6, x4\n\t" + "mul x4, x3, x26\n\t" + "umulh x26, x3, x26\n\t" + "adcs x7, x7, x4\n\t" + "mul x4, x3, x27\n\t" + "umulh x27, x3, x27\n\t" + "adcs x8, x8, x4\n\t" + "mul x4, x3, x28\n\t" + "umulh x5, x3, x28\n\t" + "adcs x9, x9, x4\n\t" + "adc x5, x5, xzr\n\t" + /* Add remaining product results in */ + "adds x7, x7, %x[a]\n\t" + "adcs x8, x8, x26\n\t" + "adcs x9, x9, x27\n\t" + "adc x5, x5, xzr\n\t" + /* Overflow */ + "extr x5, x5, x9, #63\n\t" + "mul x5, x5, x3\n\t" + "and x9, x9, #0x7fffffffffffffff\n\t" + "adds x6, x6, x5\n\t" + "adcs x7, x7, xzr\n\t" + "adcs x8, x8, xzr\n\t" + "adc x9, x9, xzr\n\t" + /* Reduce if top bit set */ + "and x5, x3, x9, asr 63\n\t" + "and x9, x9, #0x7fffffffffffffff\n\t" + "adds x6, x6, x5\n\t" + "adcs x7, x7, xzr\n\t" + "adcs x8, x8, xzr\n\t" + "adc x9, x9, xzr\n\t" + /* Store */ + "stp x6, x7, [x29, #16]\n\t" + "stp x8, x9, [x29, #32]\n\t" + /* Add */ + "ldp x6, x7, [x29, #112]\n\t" + "ldp x8, x9, [x29, #128]\n\t" + "adds x10, x6, x19\n\t" + "adcs x11, x7, x20\n\t" + "adcs x12, x8, x21\n\t" + "adc x13, x9, x22\n\t" + "mov x3, #-19\n\t" + "asr %x[a], x13, #63\n\t" + /* Mask the modulus */ + "and x3, %x[a], x3\n\t" + "and x4, %x[a], #0x7fffffffffffffff\n\t" + /* Sub modulus (if overflow) */ + "subs x10, x10, x3\n\t" + "sbcs x11, x11, %x[a]\n\t" + "sbcs x12, x12, %x[a]\n\t" + "sbc x13, x13, x4\n\t" + /* Sub */ + "subs x19, x6, x19\n\t" + "sbcs x20, x7, x20\n\t" + "sbcs x21, x8, x21\n\t" + "sbcs x22, x9, x22\n\t" + "mov x3, #-19\n\t" + "csetm %x[a], cc\n\t" + /* Mask the modulus */ + "and x3, %x[a], x3\n\t" + "and x4, %x[a], #0x7fffffffffffffff\n\t" + /* Add modulus (if underflow) */ + "adds x19, x19, x3\n\t" + "adcs x20, x20, %x[a]\n\t" + "adcs x21, x21, %x[a]\n\t" + "adc x22, x22, x4\n\t" + /* Square */ + /* A[0] * A[1] */ + "mul x7, x10, x11\n\t" + "umulh x8, x10, x11\n\t" + /* A[0] * A[2] */ + "mul x3, x10, x12\n\t" + "umulh x9, x10, x12\n\t" + "adds x8, x8, x3\n\t" + "adc x9, x9, xzr\n\t" + /* A[0] * A[3] */ + "mul x3, x10, x13\n\t" + "umulh %x[a], x10, x13\n\t" + "adds x9, x9, x3\n\t" + "adc %x[a], %x[a], xzr\n\t" + /* A[1] * A[2] */ + "mul x3, x11, x12\n\t" + "umulh x4, x11, x12\n\t" + "adds x9, x9, x3\n\t" + "adcs %x[a], %x[a], x4\n\t" + "adc x26, xzr, xzr\n\t" + /* A[1] * A[3] */ + "mul x3, x11, x13\n\t" + "umulh x4, x11, x13\n\t" + "adds %x[a], %x[a], x3\n\t" + "adc x26, x26, x4\n\t" + /* A[2] * A[3] */ + "mul x3, x12, x13\n\t" + "umulh x27, x12, x13\n\t" + "adds x26, x26, x3\n\t" + "adc x27, x27, xzr\n\t" + /* Double */ + "adds x7, x7, x7\n\t" + "adcs x8, x8, x8\n\t" + "adcs x9, x9, x9\n\t" + "adcs %x[a], %x[a], %x[a]\n\t" + "adcs x26, x26, x26\n\t" + "adcs x27, x27, x27\n\t" + "adc x28, xzr, xzr\n\t" + /* A[0] * A[0] */ + "mul x6, x10, x10\n\t" + "umulh x5, x10, x10\n\t" + /* A[1] * A[1] */ + "mul x3, x11, x11\n\t" + "umulh x4, x11, x11\n\t" + "adds x7, x7, x5\n\t" + "adcs x8, x8, x3\n\t" + "adc x5, x4, xzr\n\t" + /* A[2] * A[2] */ + "mul x3, x12, x12\n\t" + "umulh x4, x12, x12\n\t" + "adds x9, x9, x5\n\t" + "adcs %x[a], %x[a], x3\n\t" + "adc x5, x4, xzr\n\t" + /* A[3] * A[3] */ + "mul x3, x13, x13\n\t" + "umulh x4, x13, x13\n\t" + "adds x26, x26, x5\n\t" + "adcs x27, x27, x3\n\t" + "adc x28, x28, x4\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x28, x28, x27, #63\n\t" + "extr x27, x27, x26, #63\n\t" + "extr x26, x26, %x[a], #63\n\t" + "extr %x[a], %x[a], x9, #63\n\t" + "and x9, x9, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x3, #19\n\t" + "mul x4, x3, %x[a]\n\t" + "umulh %x[a], x3, %x[a]\n\t" + "adds x6, x6, x4\n\t" + "mul x4, x3, x26\n\t" + "umulh x26, x3, x26\n\t" + "adcs x7, x7, x4\n\t" + "mul x4, x3, x27\n\t" + "umulh x27, x3, x27\n\t" + "adcs x8, x8, x4\n\t" + "mul x4, x3, x28\n\t" + "umulh x5, x3, x28\n\t" + "adcs x9, x9, x4\n\t" + "adc x5, x5, xzr\n\t" + /* Add remaining product results in */ + "adds x7, x7, %x[a]\n\t" + "adcs x8, x8, x26\n\t" + "adcs x9, x9, x27\n\t" + "adc x5, x5, xzr\n\t" + /* Overflow */ + "extr x5, x5, x9, #63\n\t" + "mul x5, x5, x3\n\t" + "and x9, x9, #0x7fffffffffffffff\n\t" + "adds x6, x6, x5\n\t" + "adcs x7, x7, xzr\n\t" + "adcs x8, x8, xzr\n\t" + "adc x9, x9, xzr\n\t" + /* Reduce if top bit set */ + "and x5, x3, x9, asr 63\n\t" + "and x9, x9, #0x7fffffffffffffff\n\t" + "adds x6, x6, x5\n\t" + "adcs x7, x7, xzr\n\t" + "adcs x8, x8, xzr\n\t" + "adc x9, x9, xzr\n\t" + /* Store */ + "stp x6, x7, [x29, #80]\n\t" + "stp x8, x9, [x29, #96]\n\t" + /* Square */ + /* A[0] * A[1] */ + "mul x7, x19, x20\n\t" + "umulh x8, x19, x20\n\t" + /* A[0] * A[2] */ + "mul x3, x19, x21\n\t" + "umulh x9, x19, x21\n\t" + "adds x8, x8, x3\n\t" + "adc x9, x9, xzr\n\t" + /* A[0] * A[3] */ + "mul x3, x19, x22\n\t" + "umulh %x[a], x19, x22\n\t" + "adds x9, x9, x3\n\t" + "adc %x[a], %x[a], xzr\n\t" + /* A[1] * A[2] */ + "mul x3, x20, x21\n\t" + "umulh x4, x20, x21\n\t" + "adds x9, x9, x3\n\t" + "adcs %x[a], %x[a], x4\n\t" + "adc x26, xzr, xzr\n\t" + /* A[1] * A[3] */ + "mul x3, x20, x22\n\t" + "umulh x4, x20, x22\n\t" + "adds %x[a], %x[a], x3\n\t" + "adc x26, x26, x4\n\t" + /* A[2] * A[3] */ + "mul x3, x21, x22\n\t" + "umulh x27, x21, x22\n\t" + "adds x26, x26, x3\n\t" + "adc x27, x27, xzr\n\t" + /* Double */ + "adds x7, x7, x7\n\t" + "adcs x8, x8, x8\n\t" + "adcs x9, x9, x9\n\t" + "adcs %x[a], %x[a], %x[a]\n\t" + "adcs x26, x26, x26\n\t" + "adcs x27, x27, x27\n\t" + "adc x28, xzr, xzr\n\t" + /* A[0] * A[0] */ + "mul x6, x19, x19\n\t" + "umulh x5, x19, x19\n\t" + /* A[1] * A[1] */ + "mul x3, x20, x20\n\t" + "umulh x4, x20, x20\n\t" + "adds x7, x7, x5\n\t" + "adcs x8, x8, x3\n\t" + "adc x5, x4, xzr\n\t" + /* A[2] * A[2] */ + "mul x3, x21, x21\n\t" + "umulh x4, x21, x21\n\t" + "adds x9, x9, x5\n\t" + "adcs %x[a], %x[a], x3\n\t" + "adc x5, x4, xzr\n\t" + /* A[3] * A[3] */ + "mul x3, x22, x22\n\t" + "umulh x4, x22, x22\n\t" + "adds x26, x26, x5\n\t" + "adcs x27, x27, x3\n\t" + "adc x28, x28, x4\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x28, x28, x27, #63\n\t" + "extr x27, x27, x26, #63\n\t" + "extr x26, x26, %x[a], #63\n\t" + "extr %x[a], %x[a], x9, #63\n\t" + "and x9, x9, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x3, #19\n\t" + "mul x4, x3, %x[a]\n\t" + "umulh %x[a], x3, %x[a]\n\t" + "adds x6, x6, x4\n\t" + "mul x4, x3, x26\n\t" + "umulh x26, x3, x26\n\t" + "adcs x7, x7, x4\n\t" + "mul x4, x3, x27\n\t" + "umulh x27, x3, x27\n\t" + "adcs x8, x8, x4\n\t" + "mul x4, x3, x28\n\t" + "umulh x5, x3, x28\n\t" + "adcs x9, x9, x4\n\t" + "adc x5, x5, xzr\n\t" + /* Add remaining product results in */ + "adds x7, x7, %x[a]\n\t" + "adcs x8, x8, x26\n\t" + "adcs x9, x9, x27\n\t" + "adc x5, x5, xzr\n\t" + /* Overflow */ + "extr x5, x5, x9, #63\n\t" + "mul x5, x5, x3\n\t" + "and x9, x9, #0x7fffffffffffffff\n\t" + "adds x6, x6, x5\n\t" + "adcs x7, x7, xzr\n\t" + "adcs x8, x8, xzr\n\t" + "adc x9, x9, xzr\n\t" + /* Reduce if top bit set */ + "and x5, x3, x9, asr 63\n\t" + "and x9, x9, #0x7fffffffffffffff\n\t" + "adds x6, x6, x5\n\t" + "adcs x7, x7, xzr\n\t" + "adcs x8, x8, xzr\n\t" + "adc x9, x9, xzr\n\t" + /* Store */ + "ldr %x[a], [x29, #184]\n\t" + /* Multiply */ + "ldp x14, x15, [%x[a]]\n\t" + "ldp x16, x17, [%x[a], #16]\n\t" + /* A[0] * B[0] */ + "mul x10, x14, x6\n\t" + "umulh x11, x14, x6\n\t" + /* A[0] * B[1] */ + "mul x3, x14, x7\n\t" + "umulh x12, x14, x7\n\t" + "adds x11, x11, x3\n\t" + "adc x12, x12, xzr\n\t" + /* A[1] * B[0] */ + "mul x3, x15, x6\n\t" + "umulh x4, x15, x6\n\t" + "adds x11, x11, x3\n\t" + "adcs x12, x12, x4\n\t" + "adc x13, xzr, xzr\n\t" + /* A[0] * B[2] */ + "mul x3, x14, x8\n\t" + "umulh x4, x14, x8\n\t" + "adds x12, x12, x3\n\t" + "adc x13, x13, x4\n\t" + /* A[1] * B[1] */ + "mul x3, x15, x7\n\t" + "umulh x4, x15, x7\n\t" + "adds x12, x12, x3\n\t" + "adcs x13, x13, x4\n\t" + "adc %x[a], xzr, xzr\n\t" + /* A[2] * B[0] */ + "mul x3, x16, x6\n\t" + "umulh x4, x16, x6\n\t" + "adds x12, x12, x3\n\t" + "adcs x13, x13, x4\n\t" + "adc %x[a], %x[a], xzr\n\t" + /* A[0] * B[3] */ + "mul x3, x14, x9\n\t" + "umulh x4, x14, x9\n\t" + "adds x13, x13, x3\n\t" + "adcs %x[a], %x[a], x4\n\t" + "adc x26, xzr, xzr\n\t" + /* A[1] * B[2] */ + "mul x3, x15, x8\n\t" + "umulh x4, x15, x8\n\t" + "adds x13, x13, x3\n\t" + "adcs %x[a], %x[a], x4\n\t" + "adc x26, x26, xzr\n\t" + /* A[2] * B[1] */ + "mul x3, x16, x7\n\t" + "umulh x4, x16, x7\n\t" + "adds x13, x13, x3\n\t" + "adcs %x[a], %x[a], x4\n\t" + "adc x26, x26, xzr\n\t" + /* A[3] * B[0] */ + "mul x3, x17, x6\n\t" + "umulh x4, x17, x6\n\t" + "adds x13, x13, x3\n\t" + "adcs %x[a], %x[a], x4\n\t" + "adc x26, x26, xzr\n\t" + /* A[1] * B[3] */ + "mul x3, x15, x9\n\t" + "umulh x4, x15, x9\n\t" + "adds %x[a], %x[a], x3\n\t" + "adcs x26, x26, x4\n\t" + "adc x27, xzr, xzr\n\t" + /* A[2] * B[2] */ + "mul x3, x16, x8\n\t" + "umulh x4, x16, x8\n\t" + "adds %x[a], %x[a], x3\n\t" + "adcs x26, x26, x4\n\t" + "adc x27, x27, xzr\n\t" + /* A[3] * B[1] */ + "mul x3, x17, x7\n\t" + "umulh x4, x17, x7\n\t" + "adds %x[a], %x[a], x3\n\t" + "adcs x26, x26, x4\n\t" + "adc x27, x27, xzr\n\t" + /* A[2] * B[3] */ + "mul x3, x16, x9\n\t" + "umulh x4, x16, x9\n\t" + "adds x26, x26, x3\n\t" + "adcs x27, x27, x4\n\t" + "adc x28, xzr, xzr\n\t" + /* A[3] * B[2] */ + "mul x3, x17, x8\n\t" + "umulh x4, x17, x8\n\t" + "adds x26, x26, x3\n\t" + "adcs x27, x27, x4\n\t" + "adc x28, x28, xzr\n\t" + /* A[3] * B[3] */ + "mul x3, x17, x9\n\t" + "umulh x4, x17, x9\n\t" + "adds x27, x27, x3\n\t" + "adc x28, x28, x4\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x28, x28, x27, #63\n\t" + "extr x27, x27, x26, #63\n\t" + "extr x26, x26, %x[a], #63\n\t" + "extr %x[a], %x[a], x13, #63\n\t" + "and x13, x13, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x3, #19\n\t" + "mul x4, x3, %x[a]\n\t" + "umulh %x[a], x3, %x[a]\n\t" + "adds x10, x10, x4\n\t" + "mul x4, x3, x26\n\t" + "umulh x26, x3, x26\n\t" + "adcs x11, x11, x4\n\t" + "mul x4, x3, x27\n\t" + "umulh x27, x3, x27\n\t" + "adcs x12, x12, x4\n\t" + "mul x4, x3, x28\n\t" + "umulh x5, x3, x28\n\t" + "adcs x13, x13, x4\n\t" + "adc x5, x5, xzr\n\t" + /* Add remaining product results in */ + "adds x11, x11, %x[a]\n\t" + "adcs x12, x12, x26\n\t" + "adcs x13, x13, x27\n\t" + "adc x5, x5, xzr\n\t" + /* Overflow */ + "extr x5, x5, x13, #63\n\t" + "mul x5, x5, x3\n\t" + "and x13, x13, #0x7fffffffffffffff\n\t" + "adds x10, x10, x5\n\t" + "adcs x11, x11, xzr\n\t" + "adcs x12, x12, xzr\n\t" + "adc x13, x13, xzr\n\t" + /* Reduce if top bit set */ + "and x5, x3, x13, asr 63\n\t" + "and x13, x13, #0x7fffffffffffffff\n\t" + "adds x10, x10, x5\n\t" + "adcs x11, x11, xzr\n\t" + "adcs x12, x12, xzr\n\t" + "adc x13, x13, xzr\n\t" + /* Store */ + "stp x10, x11, [x29, #48]\n\t" + "stp x12, x13, [x29, #64]\n\t" + "sub x25, x25, #1\n\t" + "cmp x25, #0\n\t" + "bge L_curve25519_bits_%=\n\t" + "mov x25, #63\n\t" + "sub x24, x24, #8\n\t" + "cmp x24, #0\n\t" + "bge L_curve25519_words_%=\n\t" + /* Invert */ + "add x0, x29, #48\n\t" + "add x1, x29, #16\n\t" + "bl fe_sq\n\t" + "add x0, x29, #0x50\n\t" + "add x1, x29, #48\n\t" + "bl fe_sq\n\t" + "add x1, x29, #0x50\n\t" + "bl fe_sq\n\t" + "add x1, x29, #16\n\t" + "add x2, x29, #0x50\n\t" + "bl fe_mul\n\t" + "add x0, x29, #48\n\t" + "add x1, x29, #48\n\t" + "add x2, x29, #0x50\n\t" + "bl fe_mul\n\t" + "add x0, x29, #0x70\n\t" + "bl fe_sq\n\t" + "add x0, x29, #0x50\n\t" + "add x1, x29, #0x50\n\t" + "add x2, x29, #0x70\n\t" + "bl fe_mul\n\t" + "add x0, x29, #0x70\n\t" + "bl fe_sq\n\t" + "mov x24, #4\n\t" + "add x1, x29, #0x70\n\t" + "\n" + "L_curve25519_inv_1_%=: \n\t" + "bl fe_sq\n\t" + "sub x24, x24, #1\n\t" + "cmp x24, #0\n\t" + "bne L_curve25519_inv_1_%=\n\t" + "add x0, x29, #0x50\n\t" + "add x2, x29, #0x50\n\t" + "bl fe_mul\n\t" + "add x0, x29, #0x70\n\t" + "add x1, x29, #0x50\n\t" + "bl fe_sq\n\t" + "mov x24, #9\n\t" + "add x1, x29, #0x70\n\t" + "\n" + "L_curve25519_inv_2_%=: \n\t" + "bl fe_sq\n\t" + "sub x24, x24, #1\n\t" + "cmp x24, #0\n\t" + "bne L_curve25519_inv_2_%=\n\t" + "add x2, x29, #0x50\n\t" + "bl fe_mul\n\t" + "add x0, x29, #0x90\n\t" + "bl fe_sq\n\t" + "mov x24, #19\n\t" + "add x1, x29, #0x90\n\t" + "\n" + "L_curve25519_inv_3_%=: \n\t" + "bl fe_sq\n\t" + "sub x24, x24, #1\n\t" + "cmp x24, #0\n\t" + "bne L_curve25519_inv_3_%=\n\t" + "add x0, x29, #0x70\n\t" + "add x2, x29, #0x70\n\t" + "bl fe_mul\n\t" + "mov x24, #10\n\t" + "add x1, x29, #0x70\n\t" + "\n" + "L_curve25519_inv_4_%=: \n\t" + "bl fe_sq\n\t" + "sub x24, x24, #1\n\t" + "cmp x24, #0\n\t" + "bne L_curve25519_inv_4_%=\n\t" + "add x0, x29, #0x50\n\t" + "add x2, x29, #0x50\n\t" + "bl fe_mul\n\t" + "add x0, x29, #0x70\n\t" + "add x1, x29, #0x50\n\t" + "bl fe_sq\n\t" + "mov x24, #49\n\t" + "add x1, x29, #0x70\n\t" + "\n" + "L_curve25519_inv_5_%=: \n\t" + "bl fe_sq\n\t" + "sub x24, x24, #1\n\t" + "cmp x24, #0\n\t" + "bne L_curve25519_inv_5_%=\n\t" + "add x2, x29, #0x50\n\t" + "bl fe_mul\n\t" + "add x0, x29, #0x90\n\t" + "bl fe_sq\n\t" + "mov x24, #0x63\n\t" + "add x1, x29, #0x90\n\t" + "\n" + "L_curve25519_inv_6_%=: \n\t" + "bl fe_sq\n\t" + "sub x24, x24, #1\n\t" + "cmp x24, #0\n\t" + "bne L_curve25519_inv_6_%=\n\t" + "add x0, x29, #0x70\n\t" + "add x2, x29, #0x70\n\t" + "bl fe_mul\n\t" + "mov x24, #50\n\t" + "add x1, x29, #0x70\n\t" + "\n" + "L_curve25519_inv_7_%=: \n\t" + "bl fe_sq\n\t" + "sub x24, x24, #1\n\t" + "cmp x24, #0\n\t" + "bne L_curve25519_inv_7_%=\n\t" + "add x0, x29, #0x50\n\t" + "add x2, x29, #0x50\n\t" + "bl fe_mul\n\t" + "mov x24, #5\n\t" + "add x1, x29, #0x50\n\t" + "\n" + "L_curve25519_inv_8_%=: \n\t" + "bl fe_sq\n\t" + "sub x24, x24, #1\n\t" + "cmp x24, #0\n\t" + "bne L_curve25519_inv_8_%=\n\t" + "add x0, x29, #16\n\t" + "add x2, x29, #48\n\t" + "bl fe_mul\n\t" + "ldr %x[r], [x29, #176]\n\t" + /* Multiply */ + "ldp x6, x7, [%x[r]]\n\t" + "ldp x8, x9, [%x[r], #16]\n\t" + "ldp x10, x11, [x29, #16]\n\t" + "ldp x12, x13, [x29, #32]\n\t" + /* A[0] * B[0] */ + "mul x14, x6, x10\n\t" + "umulh x15, x6, x10\n\t" + /* A[0] * B[1] */ + "mul x3, x6, x11\n\t" + "umulh x16, x6, x11\n\t" + "adds x15, x15, x3\n\t" + "adc x16, x16, xzr\n\t" + /* A[1] * B[0] */ + "mul x3, x7, x10\n\t" + "umulh x4, x7, x10\n\t" + "adds x15, x15, x3\n\t" + "adcs x16, x16, x4\n\t" + "adc x17, xzr, xzr\n\t" + /* A[0] * B[2] */ + "mul x3, x6, x12\n\t" + "umulh x4, x6, x12\n\t" + "adds x16, x16, x3\n\t" + "adc x17, x17, x4\n\t" + /* A[1] * B[1] */ + "mul x3, x7, x11\n\t" + "umulh x4, x7, x11\n\t" + "adds x16, x16, x3\n\t" + "adcs x17, x17, x4\n\t" + "adc x19, xzr, xzr\n\t" + /* A[2] * B[0] */ + "mul x3, x8, x10\n\t" + "umulh x4, x8, x10\n\t" + "adds x16, x16, x3\n\t" + "adcs x17, x17, x4\n\t" + "adc x19, x19, xzr\n\t" + /* A[0] * B[3] */ + "mul x3, x6, x13\n\t" + "umulh x4, x6, x13\n\t" + "adds x17, x17, x3\n\t" + "adcs x19, x19, x4\n\t" + "adc x20, xzr, xzr\n\t" + /* A[1] * B[2] */ + "mul x3, x7, x12\n\t" + "umulh x4, x7, x12\n\t" + "adds x17, x17, x3\n\t" + "adcs x19, x19, x4\n\t" + "adc x20, x20, xzr\n\t" + /* A[2] * B[1] */ + "mul x3, x8, x11\n\t" + "umulh x4, x8, x11\n\t" + "adds x17, x17, x3\n\t" + "adcs x19, x19, x4\n\t" + "adc x20, x20, xzr\n\t" + /* A[3] * B[0] */ + "mul x3, x9, x10\n\t" + "umulh x4, x9, x10\n\t" + "adds x17, x17, x3\n\t" + "adcs x19, x19, x4\n\t" + "adc x20, x20, xzr\n\t" + /* A[1] * B[3] */ + "mul x3, x7, x13\n\t" + "umulh x4, x7, x13\n\t" + "adds x19, x19, x3\n\t" + "adcs x20, x20, x4\n\t" + "adc x21, xzr, xzr\n\t" + /* A[2] * B[2] */ + "mul x3, x8, x12\n\t" + "umulh x4, x8, x12\n\t" + "adds x19, x19, x3\n\t" + "adcs x20, x20, x4\n\t" + "adc x21, x21, xzr\n\t" + /* A[3] * B[1] */ + "mul x3, x9, x11\n\t" + "umulh x4, x9, x11\n\t" + "adds x19, x19, x3\n\t" + "adcs x20, x20, x4\n\t" + "adc x21, x21, xzr\n\t" + /* A[2] * B[3] */ + "mul x3, x8, x13\n\t" + "umulh x4, x8, x13\n\t" + "adds x20, x20, x3\n\t" + "adcs x21, x21, x4\n\t" + "adc x22, xzr, xzr\n\t" + /* A[3] * B[2] */ + "mul x3, x9, x12\n\t" + "umulh x4, x9, x12\n\t" + "adds x20, x20, x3\n\t" + "adcs x21, x21, x4\n\t" + "adc x22, x22, xzr\n\t" + /* A[3] * B[3] */ + "mul x3, x9, x13\n\t" + "umulh x4, x9, x13\n\t" + "adds x21, x21, x3\n\t" + "adc x22, x22, x4\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x22, x22, x21, #63\n\t" + "extr x21, x21, x20, #63\n\t" + "extr x20, x20, x19, #63\n\t" + "extr x19, x19, x17, #63\n\t" + "and x17, x17, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x3, #19\n\t" + "mul x4, x3, x19\n\t" + "umulh x19, x3, x19\n\t" + "adds x14, x14, x4\n\t" + "mul x4, x3, x20\n\t" + "umulh x20, x3, x20\n\t" + "adcs x15, x15, x4\n\t" + "mul x4, x3, x21\n\t" + "umulh x21, x3, x21\n\t" + "adcs x16, x16, x4\n\t" + "mul x4, x3, x22\n\t" + "umulh x5, x3, x22\n\t" + "adcs x17, x17, x4\n\t" + "adc x5, x5, xzr\n\t" + /* Add remaining product results in */ + "adds x15, x15, x19\n\t" + "adcs x16, x16, x20\n\t" + "adcs x17, x17, x21\n\t" + "adc x5, x5, xzr\n\t" + /* Overflow */ + "extr x5, x5, x17, #63\n\t" + "mul x5, x5, x3\n\t" + "and x17, x17, #0x7fffffffffffffff\n\t" + "adds x14, x14, x5\n\t" + "adcs x15, x15, xzr\n\t" + "adcs x16, x16, xzr\n\t" + "adc x17, x17, xzr\n\t" + /* Reduce if top bit set */ + "and x5, x3, x17, asr 63\n\t" + "and x17, x17, #0x7fffffffffffffff\n\t" + "adds x14, x14, x5\n\t" + "adcs x15, x15, xzr\n\t" + "adcs x16, x16, xzr\n\t" + "adc x17, x17, xzr\n\t" + /* Store */ + "stp x14, x15, [%x[r]]\n\t" + "stp x16, x17, [%x[r], #16]\n\t" + "mov x0, xzr\n\t" + "ldp x29, x30, [sp], #0xc0\n\t" + : [r] "+r" (r), [n] "+r" (n), [a] "+r" (a) + : + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" + ); + return (uint32_t)(size_t)r; +} + +void fe_pow22523(fe r, const fe a) +{ + __asm__ __volatile__ ( + "stp x29, x30, [sp, #-128]!\n\t" + "add x29, sp, #0\n\t" + /* pow22523 */ + "str %x[r], [x29, #112]\n\t" + "str %x[a], [x29, #120]\n\t" + "add x0, x29, #16\n\t" + "bl fe_sq\n\t" + "add x0, x29, #48\n\t" + "add x1, x29, #16\n\t" + "bl fe_sq\n\t" + "add x1, x29, #48\n\t" + "bl fe_sq\n\t" + "ldr x1, [x29, #120]\n\t" + "add x2, x29, #48\n\t" + "bl fe_mul\n\t" + "add x0, x29, #16\n\t" + "add x1, x29, #16\n\t" + "add x2, x29, #48\n\t" + "bl fe_mul\n\t" + "bl fe_sq\n\t" + "add x1, x29, #48\n\t" + "add x2, x29, #16\n\t" + "bl fe_mul\n\t" + "add x0, x29, #48\n\t" + "add x1, x29, #16\n\t" + "bl fe_sq\n\t" + "mov x21, #4\n\t" + "add x1, x29, #48\n\t" + "\n" + "L_fe_pow22523_1_%=: \n\t" + "bl fe_sq\n\t" + "sub x21, x21, #1\n\t" + "cmp x21, #0\n\t" + "bne L_fe_pow22523_1_%=\n\t" + "add x0, x29, #16\n\t" + "add x2, x29, #16\n\t" + "bl fe_mul\n\t" + "add x0, x29, #48\n\t" + "add x1, x29, #16\n\t" + "bl fe_sq\n\t" + "mov x21, #9\n\t" + "add x1, x29, #48\n\t" + "\n" + "L_fe_pow22523_2_%=: \n\t" + "bl fe_sq\n\t" + "sub x21, x21, #1\n\t" + "cmp x21, #0\n\t" + "bne L_fe_pow22523_2_%=\n\t" + "add x2, x29, #16\n\t" + "bl fe_mul\n\t" + "add x0, x29, #0x50\n\t" + "bl fe_sq\n\t" + "mov x21, #19\n\t" + "add x1, x29, #0x50\n\t" + "\n" + "L_fe_pow22523_3_%=: \n\t" + "bl fe_sq\n\t" + "sub x21, x21, #1\n\t" + "cmp x21, #0\n\t" + "bne L_fe_pow22523_3_%=\n\t" + "add x0, x29, #48\n\t" + "add x2, x29, #48\n\t" + "bl fe_mul\n\t" + "mov x21, #10\n\t" + "add x1, x29, #48\n\t" + "\n" + "L_fe_pow22523_4_%=: \n\t" + "bl fe_sq\n\t" + "sub x21, x21, #1\n\t" + "cmp x21, #0\n\t" + "bne L_fe_pow22523_4_%=\n\t" + "add x0, x29, #16\n\t" + "add x2, x29, #16\n\t" + "bl fe_mul\n\t" + "add x0, x29, #48\n\t" + "add x1, x29, #16\n\t" + "bl fe_sq\n\t" + "mov x21, #49\n\t" + "add x1, x29, #48\n\t" + "\n" + "L_fe_pow22523_5_%=: \n\t" + "bl fe_sq\n\t" + "sub x21, x21, #1\n\t" + "cmp x21, #0\n\t" + "bne L_fe_pow22523_5_%=\n\t" + "add x2, x29, #16\n\t" + "bl fe_mul\n\t" + "add x0, x29, #0x50\n\t" + "bl fe_sq\n\t" + "mov x21, #0x63\n\t" + "add x1, x29, #0x50\n\t" + "\n" + "L_fe_pow22523_6_%=: \n\t" + "bl fe_sq\n\t" + "sub x21, x21, #1\n\t" + "cmp x21, #0\n\t" + "bne L_fe_pow22523_6_%=\n\t" + "add x0, x29, #48\n\t" + "add x2, x29, #48\n\t" + "bl fe_mul\n\t" + "mov x21, #50\n\t" + "add x1, x29, #48\n\t" + "\n" + "L_fe_pow22523_7_%=: \n\t" + "bl fe_sq\n\t" + "sub x21, x21, #1\n\t" + "cmp x21, #0\n\t" + "bne L_fe_pow22523_7_%=\n\t" + "add x0, x29, #16\n\t" + "add x2, x29, #16\n\t" + "bl fe_mul\n\t" + "mov x21, #2\n\t" + "add x1, x29, #16\n\t" + "\n" + "L_fe_pow22523_8_%=: \n\t" + "bl fe_sq\n\t" + "sub x21, x21, #1\n\t" + "cmp x21, #0\n\t" + "bne L_fe_pow22523_8_%=\n\t" + "ldr x0, [x29, #112]\n\t" + "ldr x2, [x29, #120]\n\t" + "bl fe_mul\n\t" + "ldp x29, x30, [sp], #0x80\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "x21" + ); +} + +void fe_ge_to_p2(fe rx, fe ry, fe rz, const fe px, const fe py, const fe pz, const fe pt) +{ + __asm__ __volatile__ ( + "stp x29, x30, [sp, #-64]!\n\t" + "add x29, sp, #0\n\t" + "str %x[ry], [x29, #16]\n\t" + "str %x[rz], [x29, #24]\n\t" + "str %x[px], [x29, #32]\n\t" + "str %x[py], [x29, #40]\n\t" + "str %x[pz], [x29, #48]\n\t" + "str %x[pt], [x29, #56]\n\t" + "ldr x1, [x29, #32]\n\t" + "ldr x2, [x29, #56]\n\t" + /* Multiply */ + "ldp x11, x12, [x1]\n\t" + "ldp x13, x14, [x1, #16]\n\t" + "ldp x15, x16, [x2]\n\t" + "ldp x17, x19, [x2, #16]\n\t" + /* A[0] * B[0] */ + "mul x3, x11, x15\n\t" + "umulh x4, x11, x15\n\t" + /* A[0] * B[1] */ + "mul x20, x11, x16\n\t" + "umulh x5, x11, x16\n\t" + "adds x4, x4, x20\n\t" + "adc x5, x5, xzr\n\t" + /* A[1] * B[0] */ + "mul x20, x12, x15\n\t" + "umulh x21, x12, x15\n\t" + "adds x4, x4, x20\n\t" + "adcs x5, x5, x21\n\t" + "adc x6, xzr, xzr\n\t" + /* A[0] * B[2] */ + "mul x20, x11, x17\n\t" + "umulh x21, x11, x17\n\t" + "adds x5, x5, x20\n\t" + "adc x6, x6, x21\n\t" + /* A[1] * B[1] */ + "mul x20, x12, x16\n\t" + "umulh x21, x12, x16\n\t" + "adds x5, x5, x20\n\t" + "adcs x6, x6, x21\n\t" + "adc x7, xzr, xzr\n\t" + /* A[2] * B[0] */ + "mul x20, x13, x15\n\t" + "umulh x21, x13, x15\n\t" + "adds x5, x5, x20\n\t" + "adcs x6, x6, x21\n\t" + "adc x7, x7, xzr\n\t" + /* A[0] * B[3] */ + "mul x20, x11, x19\n\t" + "umulh x21, x11, x19\n\t" + "adds x6, x6, x20\n\t" + "adcs x7, x7, x21\n\t" + "adc x8, xzr, xzr\n\t" + /* A[1] * B[2] */ + "mul x20, x12, x17\n\t" + "umulh x21, x12, x17\n\t" + "adds x6, x6, x20\n\t" + "adcs x7, x7, x21\n\t" + "adc x8, x8, xzr\n\t" + /* A[2] * B[1] */ + "mul x20, x13, x16\n\t" + "umulh x21, x13, x16\n\t" + "adds x6, x6, x20\n\t" + "adcs x7, x7, x21\n\t" + "adc x8, x8, xzr\n\t" + /* A[3] * B[0] */ + "mul x20, x14, x15\n\t" + "umulh x21, x14, x15\n\t" + "adds x6, x6, x20\n\t" + "adcs x7, x7, x21\n\t" + "adc x8, x8, xzr\n\t" + /* A[1] * B[3] */ + "mul x20, x12, x19\n\t" + "umulh x21, x12, x19\n\t" + "adds x7, x7, x20\n\t" + "adcs x8, x8, x21\n\t" + "adc x9, xzr, xzr\n\t" + /* A[2] * B[2] */ + "mul x20, x13, x17\n\t" + "umulh x21, x13, x17\n\t" + "adds x7, x7, x20\n\t" + "adcs x8, x8, x21\n\t" + "adc x9, x9, xzr\n\t" + /* A[3] * B[1] */ + "mul x20, x14, x16\n\t" + "umulh x21, x14, x16\n\t" + "adds x7, x7, x20\n\t" + "adcs x8, x8, x21\n\t" + "adc x9, x9, xzr\n\t" + /* A[2] * B[3] */ + "mul x20, x13, x19\n\t" + "umulh x21, x13, x19\n\t" + "adds x8, x8, x20\n\t" + "adcs x9, x9, x21\n\t" + "adc x10, xzr, xzr\n\t" + /* A[3] * B[2] */ + "mul x20, x14, x17\n\t" + "umulh x21, x14, x17\n\t" + "adds x8, x8, x20\n\t" + "adcs x9, x9, x21\n\t" + "adc x10, x10, xzr\n\t" + /* A[3] * B[3] */ + "mul x20, x14, x19\n\t" + "umulh x21, x14, x19\n\t" + "adds x9, x9, x20\n\t" + "adc x10, x10, x21\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x10, x10, x9, #63\n\t" + "extr x9, x9, x8, #63\n\t" + "extr x8, x8, x7, #63\n\t" + "extr x7, x7, x6, #63\n\t" + "and x6, x6, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x20, #19\n\t" + "mul x21, x20, x7\n\t" + "umulh x7, x20, x7\n\t" + "adds x3, x3, x21\n\t" + "mul x21, x20, x8\n\t" + "umulh x8, x20, x8\n\t" + "adcs x4, x4, x21\n\t" + "mul x21, x20, x9\n\t" + "umulh x9, x20, x9\n\t" + "adcs x5, x5, x21\n\t" + "mul x21, x20, x10\n\t" + "umulh x22, x20, x10\n\t" + "adcs x6, x6, x21\n\t" + "adc x22, x22, xzr\n\t" + /* Add remaining product results in */ + "adds x4, x4, x7\n\t" + "adcs x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x22, x22, xzr\n\t" + /* Overflow */ + "extr x22, x22, x6, #63\n\t" + "mul x22, x22, x20\n\t" + "and x6, x6, #0x7fffffffffffffff\n\t" + "adds x3, x3, x22\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "adc x6, x6, xzr\n\t" + /* Reduce if top bit set */ + "and x22, x20, x6, asr 63\n\t" + "and x6, x6, #0x7fffffffffffffff\n\t" + "adds x3, x3, x22\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "adc x6, x6, xzr\n\t" + /* Store */ + "stp x3, x4, [x0]\n\t" + "stp x5, x6, [x0, #16]\n\t" + "ldr x0, [x29, #16]\n\t" + "ldr x1, [x29, #40]\n\t" + "ldr x2, [x29, #48]\n\t" + /* Multiply */ + "ldp x11, x12, [x1]\n\t" + "ldp x13, x14, [x1, #16]\n\t" + "ldp x15, x16, [x2]\n\t" + "ldp x17, x19, [x2, #16]\n\t" + /* A[0] * B[0] */ + "mul x3, x11, x15\n\t" + "umulh x4, x11, x15\n\t" + /* A[0] * B[1] */ + "mul x20, x11, x16\n\t" + "umulh x5, x11, x16\n\t" + "adds x4, x4, x20\n\t" + "adc x5, x5, xzr\n\t" + /* A[1] * B[0] */ + "mul x20, x12, x15\n\t" + "umulh x21, x12, x15\n\t" + "adds x4, x4, x20\n\t" + "adcs x5, x5, x21\n\t" + "adc x6, xzr, xzr\n\t" + /* A[0] * B[2] */ + "mul x20, x11, x17\n\t" + "umulh x21, x11, x17\n\t" + "adds x5, x5, x20\n\t" + "adc x6, x6, x21\n\t" + /* A[1] * B[1] */ + "mul x20, x12, x16\n\t" + "umulh x21, x12, x16\n\t" + "adds x5, x5, x20\n\t" + "adcs x6, x6, x21\n\t" + "adc x7, xzr, xzr\n\t" + /* A[2] * B[0] */ + "mul x20, x13, x15\n\t" + "umulh x21, x13, x15\n\t" + "adds x5, x5, x20\n\t" + "adcs x6, x6, x21\n\t" + "adc x7, x7, xzr\n\t" + /* A[0] * B[3] */ + "mul x20, x11, x19\n\t" + "umulh x21, x11, x19\n\t" + "adds x6, x6, x20\n\t" + "adcs x7, x7, x21\n\t" + "adc x8, xzr, xzr\n\t" + /* A[1] * B[2] */ + "mul x20, x12, x17\n\t" + "umulh x21, x12, x17\n\t" + "adds x6, x6, x20\n\t" + "adcs x7, x7, x21\n\t" + "adc x8, x8, xzr\n\t" + /* A[2] * B[1] */ + "mul x20, x13, x16\n\t" + "umulh x21, x13, x16\n\t" + "adds x6, x6, x20\n\t" + "adcs x7, x7, x21\n\t" + "adc x8, x8, xzr\n\t" + /* A[3] * B[0] */ + "mul x20, x14, x15\n\t" + "umulh x21, x14, x15\n\t" + "adds x6, x6, x20\n\t" + "adcs x7, x7, x21\n\t" + "adc x8, x8, xzr\n\t" + /* A[1] * B[3] */ + "mul x20, x12, x19\n\t" + "umulh x21, x12, x19\n\t" + "adds x7, x7, x20\n\t" + "adcs x8, x8, x21\n\t" + "adc x9, xzr, xzr\n\t" + /* A[2] * B[2] */ + "mul x20, x13, x17\n\t" + "umulh x21, x13, x17\n\t" + "adds x7, x7, x20\n\t" + "adcs x8, x8, x21\n\t" + "adc x9, x9, xzr\n\t" + /* A[3] * B[1] */ + "mul x20, x14, x16\n\t" + "umulh x21, x14, x16\n\t" + "adds x7, x7, x20\n\t" + "adcs x8, x8, x21\n\t" + "adc x9, x9, xzr\n\t" + /* A[2] * B[3] */ + "mul x20, x13, x19\n\t" + "umulh x21, x13, x19\n\t" + "adds x8, x8, x20\n\t" + "adcs x9, x9, x21\n\t" + "adc x10, xzr, xzr\n\t" + /* A[3] * B[2] */ + "mul x20, x14, x17\n\t" + "umulh x21, x14, x17\n\t" + "adds x8, x8, x20\n\t" + "adcs x9, x9, x21\n\t" + "adc x10, x10, xzr\n\t" + /* A[3] * B[3] */ + "mul x20, x14, x19\n\t" + "umulh x21, x14, x19\n\t" + "adds x9, x9, x20\n\t" + "adc x10, x10, x21\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x10, x10, x9, #63\n\t" + "extr x9, x9, x8, #63\n\t" + "extr x8, x8, x7, #63\n\t" + "extr x7, x7, x6, #63\n\t" + "and x6, x6, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x20, #19\n\t" + "mul x21, x20, x7\n\t" + "umulh x7, x20, x7\n\t" + "adds x3, x3, x21\n\t" + "mul x21, x20, x8\n\t" + "umulh x8, x20, x8\n\t" + "adcs x4, x4, x21\n\t" + "mul x21, x20, x9\n\t" + "umulh x9, x20, x9\n\t" + "adcs x5, x5, x21\n\t" + "mul x21, x20, x10\n\t" + "umulh x22, x20, x10\n\t" + "adcs x6, x6, x21\n\t" + "adc x22, x22, xzr\n\t" + /* Add remaining product results in */ + "adds x4, x4, x7\n\t" + "adcs x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x22, x22, xzr\n\t" + /* Overflow */ + "extr x22, x22, x6, #63\n\t" + "mul x22, x22, x20\n\t" + "and x6, x6, #0x7fffffffffffffff\n\t" + "adds x3, x3, x22\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "adc x6, x6, xzr\n\t" + /* Reduce if top bit set */ + "and x22, x20, x6, asr 63\n\t" + "and x6, x6, #0x7fffffffffffffff\n\t" + "adds x3, x3, x22\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "adc x6, x6, xzr\n\t" + /* Store */ + "stp x3, x4, [x0]\n\t" + "stp x5, x6, [x0, #16]\n\t" + "ldr x0, [x29, #24]\n\t" + "ldr x2, [x29, #56]\n\t" + /* Multiply */ + "ldp x11, x12, [x2]\n\t" + "ldp x13, x14, [x2, #16]\n\t" + /* A[0] * B[0] */ + "mul x3, x15, x11\n\t" + "umulh x4, x15, x11\n\t" + /* A[0] * B[1] */ + "mul x20, x15, x12\n\t" + "umulh x5, x15, x12\n\t" + "adds x4, x4, x20\n\t" + "adc x5, x5, xzr\n\t" + /* A[1] * B[0] */ + "mul x20, x16, x11\n\t" + "umulh x21, x16, x11\n\t" + "adds x4, x4, x20\n\t" + "adcs x5, x5, x21\n\t" + "adc x6, xzr, xzr\n\t" + /* A[0] * B[2] */ + "mul x20, x15, x13\n\t" + "umulh x21, x15, x13\n\t" + "adds x5, x5, x20\n\t" + "adc x6, x6, x21\n\t" + /* A[1] * B[1] */ + "mul x20, x16, x12\n\t" + "umulh x21, x16, x12\n\t" + "adds x5, x5, x20\n\t" + "adcs x6, x6, x21\n\t" + "adc x7, xzr, xzr\n\t" + /* A[2] * B[0] */ + "mul x20, x17, x11\n\t" + "umulh x21, x17, x11\n\t" + "adds x5, x5, x20\n\t" + "adcs x6, x6, x21\n\t" + "adc x7, x7, xzr\n\t" + /* A[0] * B[3] */ + "mul x20, x15, x14\n\t" + "umulh x21, x15, x14\n\t" + "adds x6, x6, x20\n\t" + "adcs x7, x7, x21\n\t" + "adc x8, xzr, xzr\n\t" + /* A[1] * B[2] */ + "mul x20, x16, x13\n\t" + "umulh x21, x16, x13\n\t" + "adds x6, x6, x20\n\t" + "adcs x7, x7, x21\n\t" + "adc x8, x8, xzr\n\t" + /* A[2] * B[1] */ + "mul x20, x17, x12\n\t" + "umulh x21, x17, x12\n\t" + "adds x6, x6, x20\n\t" + "adcs x7, x7, x21\n\t" + "adc x8, x8, xzr\n\t" + /* A[3] * B[0] */ + "mul x20, x19, x11\n\t" + "umulh x21, x19, x11\n\t" + "adds x6, x6, x20\n\t" + "adcs x7, x7, x21\n\t" + "adc x8, x8, xzr\n\t" + /* A[1] * B[3] */ + "mul x20, x16, x14\n\t" + "umulh x21, x16, x14\n\t" + "adds x7, x7, x20\n\t" + "adcs x8, x8, x21\n\t" + "adc x9, xzr, xzr\n\t" + /* A[2] * B[2] */ + "mul x20, x17, x13\n\t" + "umulh x21, x17, x13\n\t" + "adds x7, x7, x20\n\t" + "adcs x8, x8, x21\n\t" + "adc x9, x9, xzr\n\t" + /* A[3] * B[1] */ + "mul x20, x19, x12\n\t" + "umulh x21, x19, x12\n\t" + "adds x7, x7, x20\n\t" + "adcs x8, x8, x21\n\t" + "adc x9, x9, xzr\n\t" + /* A[2] * B[3] */ + "mul x20, x17, x14\n\t" + "umulh x21, x17, x14\n\t" + "adds x8, x8, x20\n\t" + "adcs x9, x9, x21\n\t" + "adc x10, xzr, xzr\n\t" + /* A[3] * B[2] */ + "mul x20, x19, x13\n\t" + "umulh x21, x19, x13\n\t" + "adds x8, x8, x20\n\t" + "adcs x9, x9, x21\n\t" + "adc x10, x10, xzr\n\t" + /* A[3] * B[3] */ + "mul x20, x19, x14\n\t" + "umulh x21, x19, x14\n\t" + "adds x9, x9, x20\n\t" + "adc x10, x10, x21\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x10, x10, x9, #63\n\t" + "extr x9, x9, x8, #63\n\t" + "extr x8, x8, x7, #63\n\t" + "extr x7, x7, x6, #63\n\t" + "and x6, x6, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x20, #19\n\t" + "mul x21, x20, x7\n\t" + "umulh x7, x20, x7\n\t" + "adds x3, x3, x21\n\t" + "mul x21, x20, x8\n\t" + "umulh x8, x20, x8\n\t" + "adcs x4, x4, x21\n\t" + "mul x21, x20, x9\n\t" + "umulh x9, x20, x9\n\t" + "adcs x5, x5, x21\n\t" + "mul x21, x20, x10\n\t" + "umulh x22, x20, x10\n\t" + "adcs x6, x6, x21\n\t" + "adc x22, x22, xzr\n\t" + /* Add remaining product results in */ + "adds x4, x4, x7\n\t" + "adcs x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x22, x22, xzr\n\t" + /* Overflow */ + "extr x22, x22, x6, #63\n\t" + "mul x22, x22, x20\n\t" + "and x6, x6, #0x7fffffffffffffff\n\t" + "adds x3, x3, x22\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "adc x6, x6, xzr\n\t" + /* Reduce if top bit set */ + "and x22, x20, x6, asr 63\n\t" + "and x6, x6, #0x7fffffffffffffff\n\t" + "adds x3, x3, x22\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "adc x6, x6, xzr\n\t" + /* Store */ + "stp x3, x4, [x0]\n\t" + "stp x5, x6, [x0, #16]\n\t" + "ldp x29, x30, [sp], #0x40\n\t" + : [rx] "+r" (rx), [ry] "+r" (ry), [rz] "+r" (rz), [px] "+r" (px), [py] "+r" (py), [pz] "+r" (pz), [pt] "+r" (pt) + : + : "memory", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22" + ); +} + +void fe_ge_to_p3(fe rx, fe ry, fe rz, fe rt, const fe px, const fe py, const fe pz, const fe pt) +{ + __asm__ __volatile__ ( + "stp x29, x30, [sp, #-80]!\n\t" + "add x29, sp, #0\n\t" + "str %x[ry], [x29, #16]\n\t" + "str %x[rz], [x29, #24]\n\t" + "str %x[rt], [x29, #32]\n\t" + "str %x[px], [x29, #40]\n\t" + "str %x[py], [x29, #48]\n\t" + "str %x[pz], [x29, #56]\n\t" + "str %x[pt], [x29, #64]\n\t" + "ldr x1, [x29, #40]\n\t" + "ldr x2, [x29, #64]\n\t" + /* Multiply */ + "ldp x11, x12, [x1]\n\t" + "ldp x13, x14, [x1, #16]\n\t" + "ldp x15, x16, [x2]\n\t" + "ldp x17, x19, [x2, #16]\n\t" + /* A[0] * B[0] */ + "mul x3, x11, x15\n\t" + "umulh x4, x11, x15\n\t" + /* A[0] * B[1] */ + "mul x24, x11, x16\n\t" + "umulh x5, x11, x16\n\t" + "adds x4, x4, x24\n\t" + "adc x5, x5, xzr\n\t" + /* A[1] * B[0] */ + "mul x24, x12, x15\n\t" + "umulh x25, x12, x15\n\t" + "adds x4, x4, x24\n\t" + "adcs x5, x5, x25\n\t" + "adc x6, xzr, xzr\n\t" + /* A[0] * B[2] */ + "mul x24, x11, x17\n\t" + "umulh x25, x11, x17\n\t" + "adds x5, x5, x24\n\t" + "adc x6, x6, x25\n\t" + /* A[1] * B[1] */ + "mul x24, x12, x16\n\t" + "umulh x25, x12, x16\n\t" + "adds x5, x5, x24\n\t" + "adcs x6, x6, x25\n\t" + "adc x7, xzr, xzr\n\t" + /* A[2] * B[0] */ + "mul x24, x13, x15\n\t" + "umulh x25, x13, x15\n\t" + "adds x5, x5, x24\n\t" + "adcs x6, x6, x25\n\t" + "adc x7, x7, xzr\n\t" + /* A[0] * B[3] */ + "mul x24, x11, x19\n\t" + "umulh x25, x11, x19\n\t" + "adds x6, x6, x24\n\t" + "adcs x7, x7, x25\n\t" + "adc x8, xzr, xzr\n\t" + /* A[1] * B[2] */ + "mul x24, x12, x17\n\t" + "umulh x25, x12, x17\n\t" + "adds x6, x6, x24\n\t" + "adcs x7, x7, x25\n\t" + "adc x8, x8, xzr\n\t" + /* A[2] * B[1] */ + "mul x24, x13, x16\n\t" + "umulh x25, x13, x16\n\t" + "adds x6, x6, x24\n\t" + "adcs x7, x7, x25\n\t" + "adc x8, x8, xzr\n\t" + /* A[3] * B[0] */ + "mul x24, x14, x15\n\t" + "umulh x25, x14, x15\n\t" + "adds x6, x6, x24\n\t" + "adcs x7, x7, x25\n\t" + "adc x8, x8, xzr\n\t" + /* A[1] * B[3] */ + "mul x24, x12, x19\n\t" + "umulh x25, x12, x19\n\t" + "adds x7, x7, x24\n\t" + "adcs x8, x8, x25\n\t" + "adc x9, xzr, xzr\n\t" + /* A[2] * B[2] */ + "mul x24, x13, x17\n\t" + "umulh x25, x13, x17\n\t" + "adds x7, x7, x24\n\t" + "adcs x8, x8, x25\n\t" + "adc x9, x9, xzr\n\t" + /* A[3] * B[1] */ + "mul x24, x14, x16\n\t" + "umulh x25, x14, x16\n\t" + "adds x7, x7, x24\n\t" + "adcs x8, x8, x25\n\t" + "adc x9, x9, xzr\n\t" + /* A[2] * B[3] */ + "mul x24, x13, x19\n\t" + "umulh x25, x13, x19\n\t" + "adds x8, x8, x24\n\t" + "adcs x9, x9, x25\n\t" + "adc x10, xzr, xzr\n\t" + /* A[3] * B[2] */ + "mul x24, x14, x17\n\t" + "umulh x25, x14, x17\n\t" + "adds x8, x8, x24\n\t" + "adcs x9, x9, x25\n\t" + "adc x10, x10, xzr\n\t" + /* A[3] * B[3] */ + "mul x24, x14, x19\n\t" + "umulh x25, x14, x19\n\t" + "adds x9, x9, x24\n\t" + "adc x10, x10, x25\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x10, x10, x9, #63\n\t" + "extr x9, x9, x8, #63\n\t" + "extr x8, x8, x7, #63\n\t" + "extr x7, x7, x6, #63\n\t" + "and x6, x6, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x24, #19\n\t" + "mul x25, x24, x7\n\t" + "umulh x7, x24, x7\n\t" + "adds x3, x3, x25\n\t" + "mul x25, x24, x8\n\t" + "umulh x8, x24, x8\n\t" + "adcs x4, x4, x25\n\t" + "mul x25, x24, x9\n\t" + "umulh x9, x24, x9\n\t" + "adcs x5, x5, x25\n\t" + "mul x25, x24, x10\n\t" + "umulh x26, x24, x10\n\t" + "adcs x6, x6, x25\n\t" + "adc x26, x26, xzr\n\t" + /* Add remaining product results in */ + "adds x4, x4, x7\n\t" + "adcs x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x26, x26, xzr\n\t" + /* Overflow */ + "extr x26, x26, x6, #63\n\t" + "mul x26, x26, x24\n\t" + "and x6, x6, #0x7fffffffffffffff\n\t" + "adds x3, x3, x26\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "adc x6, x6, xzr\n\t" + /* Reduce if top bit set */ + "and x26, x24, x6, asr 63\n\t" + "and x6, x6, #0x7fffffffffffffff\n\t" + "adds x3, x3, x26\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "adc x6, x6, xzr\n\t" + /* Store */ + "stp x3, x4, [x0]\n\t" + "stp x5, x6, [x0, #16]\n\t" + "ldr x0, [x29, #32]\n\t" + "ldr x2, [x29, #48]\n\t" + /* Multiply */ + "ldp x20, x21, [x2]\n\t" + "ldp x22, x23, [x2, #16]\n\t" + /* A[0] * B[0] */ + "mul x3, x11, x20\n\t" + "umulh x4, x11, x20\n\t" + /* A[0] * B[1] */ + "mul x24, x11, x21\n\t" + "umulh x5, x11, x21\n\t" + "adds x4, x4, x24\n\t" + "adc x5, x5, xzr\n\t" + /* A[1] * B[0] */ + "mul x24, x12, x20\n\t" + "umulh x25, x12, x20\n\t" + "adds x4, x4, x24\n\t" + "adcs x5, x5, x25\n\t" + "adc x6, xzr, xzr\n\t" + /* A[0] * B[2] */ + "mul x24, x11, x22\n\t" + "umulh x25, x11, x22\n\t" + "adds x5, x5, x24\n\t" + "adc x6, x6, x25\n\t" + /* A[1] * B[1] */ + "mul x24, x12, x21\n\t" + "umulh x25, x12, x21\n\t" + "adds x5, x5, x24\n\t" + "adcs x6, x6, x25\n\t" + "adc x7, xzr, xzr\n\t" + /* A[2] * B[0] */ + "mul x24, x13, x20\n\t" + "umulh x25, x13, x20\n\t" + "adds x5, x5, x24\n\t" + "adcs x6, x6, x25\n\t" + "adc x7, x7, xzr\n\t" + /* A[0] * B[3] */ + "mul x24, x11, x23\n\t" + "umulh x25, x11, x23\n\t" + "adds x6, x6, x24\n\t" + "adcs x7, x7, x25\n\t" + "adc x8, xzr, xzr\n\t" + /* A[1] * B[2] */ + "mul x24, x12, x22\n\t" + "umulh x25, x12, x22\n\t" + "adds x6, x6, x24\n\t" + "adcs x7, x7, x25\n\t" + "adc x8, x8, xzr\n\t" + /* A[2] * B[1] */ + "mul x24, x13, x21\n\t" + "umulh x25, x13, x21\n\t" + "adds x6, x6, x24\n\t" + "adcs x7, x7, x25\n\t" + "adc x8, x8, xzr\n\t" + /* A[3] * B[0] */ + "mul x24, x14, x20\n\t" + "umulh x25, x14, x20\n\t" + "adds x6, x6, x24\n\t" + "adcs x7, x7, x25\n\t" + "adc x8, x8, xzr\n\t" + /* A[1] * B[3] */ + "mul x24, x12, x23\n\t" + "umulh x25, x12, x23\n\t" + "adds x7, x7, x24\n\t" + "adcs x8, x8, x25\n\t" + "adc x9, xzr, xzr\n\t" + /* A[2] * B[2] */ + "mul x24, x13, x22\n\t" + "umulh x25, x13, x22\n\t" + "adds x7, x7, x24\n\t" + "adcs x8, x8, x25\n\t" + "adc x9, x9, xzr\n\t" + /* A[3] * B[1] */ + "mul x24, x14, x21\n\t" + "umulh x25, x14, x21\n\t" + "adds x7, x7, x24\n\t" + "adcs x8, x8, x25\n\t" + "adc x9, x9, xzr\n\t" + /* A[2] * B[3] */ + "mul x24, x13, x23\n\t" + "umulh x25, x13, x23\n\t" + "adds x8, x8, x24\n\t" + "adcs x9, x9, x25\n\t" + "adc x10, xzr, xzr\n\t" + /* A[3] * B[2] */ + "mul x24, x14, x22\n\t" + "umulh x25, x14, x22\n\t" + "adds x8, x8, x24\n\t" + "adcs x9, x9, x25\n\t" + "adc x10, x10, xzr\n\t" + /* A[3] * B[3] */ + "mul x24, x14, x23\n\t" + "umulh x25, x14, x23\n\t" + "adds x9, x9, x24\n\t" + "adc x10, x10, x25\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x10, x10, x9, #63\n\t" + "extr x9, x9, x8, #63\n\t" + "extr x8, x8, x7, #63\n\t" + "extr x7, x7, x6, #63\n\t" + "and x6, x6, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x24, #19\n\t" + "mul x25, x24, x7\n\t" + "umulh x7, x24, x7\n\t" + "adds x3, x3, x25\n\t" + "mul x25, x24, x8\n\t" + "umulh x8, x24, x8\n\t" + "adcs x4, x4, x25\n\t" + "mul x25, x24, x9\n\t" + "umulh x9, x24, x9\n\t" + "adcs x5, x5, x25\n\t" + "mul x25, x24, x10\n\t" + "umulh x26, x24, x10\n\t" + "adcs x6, x6, x25\n\t" + "adc x26, x26, xzr\n\t" + /* Add remaining product results in */ + "adds x4, x4, x7\n\t" + "adcs x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x26, x26, xzr\n\t" + /* Overflow */ + "extr x26, x26, x6, #63\n\t" + "mul x26, x26, x24\n\t" + "and x6, x6, #0x7fffffffffffffff\n\t" + "adds x3, x3, x26\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "adc x6, x6, xzr\n\t" + /* Reduce if top bit set */ + "and x26, x24, x6, asr 63\n\t" + "and x6, x6, #0x7fffffffffffffff\n\t" + "adds x3, x3, x26\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "adc x6, x6, xzr\n\t" + /* Store */ + "stp x3, x4, [x0]\n\t" + "stp x5, x6, [x0, #16]\n\t" + "ldr x0, [x29, #16]\n\t" + "ldr x2, [x29, #56]\n\t" + /* Multiply */ + "ldp x11, x12, [x2]\n\t" + "ldp x13, x14, [x2, #16]\n\t" + /* A[0] * B[0] */ + "mul x3, x20, x11\n\t" + "umulh x4, x20, x11\n\t" + /* A[0] * B[1] */ + "mul x24, x20, x12\n\t" + "umulh x5, x20, x12\n\t" + "adds x4, x4, x24\n\t" + "adc x5, x5, xzr\n\t" + /* A[1] * B[0] */ + "mul x24, x21, x11\n\t" + "umulh x25, x21, x11\n\t" + "adds x4, x4, x24\n\t" + "adcs x5, x5, x25\n\t" + "adc x6, xzr, xzr\n\t" + /* A[0] * B[2] */ + "mul x24, x20, x13\n\t" + "umulh x25, x20, x13\n\t" + "adds x5, x5, x24\n\t" + "adc x6, x6, x25\n\t" + /* A[1] * B[1] */ + "mul x24, x21, x12\n\t" + "umulh x25, x21, x12\n\t" + "adds x5, x5, x24\n\t" + "adcs x6, x6, x25\n\t" + "adc x7, xzr, xzr\n\t" + /* A[2] * B[0] */ + "mul x24, x22, x11\n\t" + "umulh x25, x22, x11\n\t" + "adds x5, x5, x24\n\t" + "adcs x6, x6, x25\n\t" + "adc x7, x7, xzr\n\t" + /* A[0] * B[3] */ + "mul x24, x20, x14\n\t" + "umulh x25, x20, x14\n\t" + "adds x6, x6, x24\n\t" + "adcs x7, x7, x25\n\t" + "adc x8, xzr, xzr\n\t" + /* A[1] * B[2] */ + "mul x24, x21, x13\n\t" + "umulh x25, x21, x13\n\t" + "adds x6, x6, x24\n\t" + "adcs x7, x7, x25\n\t" + "adc x8, x8, xzr\n\t" + /* A[2] * B[1] */ + "mul x24, x22, x12\n\t" + "umulh x25, x22, x12\n\t" + "adds x6, x6, x24\n\t" + "adcs x7, x7, x25\n\t" + "adc x8, x8, xzr\n\t" + /* A[3] * B[0] */ + "mul x24, x23, x11\n\t" + "umulh x25, x23, x11\n\t" + "adds x6, x6, x24\n\t" + "adcs x7, x7, x25\n\t" + "adc x8, x8, xzr\n\t" + /* A[1] * B[3] */ + "mul x24, x21, x14\n\t" + "umulh x25, x21, x14\n\t" + "adds x7, x7, x24\n\t" + "adcs x8, x8, x25\n\t" + "adc x9, xzr, xzr\n\t" + /* A[2] * B[2] */ + "mul x24, x22, x13\n\t" + "umulh x25, x22, x13\n\t" + "adds x7, x7, x24\n\t" + "adcs x8, x8, x25\n\t" + "adc x9, x9, xzr\n\t" + /* A[3] * B[1] */ + "mul x24, x23, x12\n\t" + "umulh x25, x23, x12\n\t" + "adds x7, x7, x24\n\t" + "adcs x8, x8, x25\n\t" + "adc x9, x9, xzr\n\t" + /* A[2] * B[3] */ + "mul x24, x22, x14\n\t" + "umulh x25, x22, x14\n\t" + "adds x8, x8, x24\n\t" + "adcs x9, x9, x25\n\t" + "adc x10, xzr, xzr\n\t" + /* A[3] * B[2] */ + "mul x24, x23, x13\n\t" + "umulh x25, x23, x13\n\t" + "adds x8, x8, x24\n\t" + "adcs x9, x9, x25\n\t" + "adc x10, x10, xzr\n\t" + /* A[3] * B[3] */ + "mul x24, x23, x14\n\t" + "umulh x25, x23, x14\n\t" + "adds x9, x9, x24\n\t" + "adc x10, x10, x25\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x10, x10, x9, #63\n\t" + "extr x9, x9, x8, #63\n\t" + "extr x8, x8, x7, #63\n\t" + "extr x7, x7, x6, #63\n\t" + "and x6, x6, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x24, #19\n\t" + "mul x25, x24, x7\n\t" + "umulh x7, x24, x7\n\t" + "adds x3, x3, x25\n\t" + "mul x25, x24, x8\n\t" + "umulh x8, x24, x8\n\t" + "adcs x4, x4, x25\n\t" + "mul x25, x24, x9\n\t" + "umulh x9, x24, x9\n\t" + "adcs x5, x5, x25\n\t" + "mul x25, x24, x10\n\t" + "umulh x26, x24, x10\n\t" + "adcs x6, x6, x25\n\t" + "adc x26, x26, xzr\n\t" + /* Add remaining product results in */ + "adds x4, x4, x7\n\t" + "adcs x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x26, x26, xzr\n\t" + /* Overflow */ + "extr x26, x26, x6, #63\n\t" + "mul x26, x26, x24\n\t" + "and x6, x6, #0x7fffffffffffffff\n\t" + "adds x3, x3, x26\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "adc x6, x6, xzr\n\t" + /* Reduce if top bit set */ + "and x26, x24, x6, asr 63\n\t" + "and x6, x6, #0x7fffffffffffffff\n\t" + "adds x3, x3, x26\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "adc x6, x6, xzr\n\t" + /* Store */ + "stp x3, x4, [x0]\n\t" + "stp x5, x6, [x0, #16]\n\t" + "ldr x0, [x29, #24]\n\t" + /* Multiply */ + /* A[0] * B[0] */ + "mul x3, x11, x15\n\t" + "umulh x4, x11, x15\n\t" + /* A[0] * B[1] */ + "mul x24, x11, x16\n\t" + "umulh x5, x11, x16\n\t" + "adds x4, x4, x24\n\t" + "adc x5, x5, xzr\n\t" + /* A[1] * B[0] */ + "mul x24, x12, x15\n\t" + "umulh x25, x12, x15\n\t" + "adds x4, x4, x24\n\t" + "adcs x5, x5, x25\n\t" + "adc x6, xzr, xzr\n\t" + /* A[0] * B[2] */ + "mul x24, x11, x17\n\t" + "umulh x25, x11, x17\n\t" + "adds x5, x5, x24\n\t" + "adc x6, x6, x25\n\t" + /* A[1] * B[1] */ + "mul x24, x12, x16\n\t" + "umulh x25, x12, x16\n\t" + "adds x5, x5, x24\n\t" + "adcs x6, x6, x25\n\t" + "adc x7, xzr, xzr\n\t" + /* A[2] * B[0] */ + "mul x24, x13, x15\n\t" + "umulh x25, x13, x15\n\t" + "adds x5, x5, x24\n\t" + "adcs x6, x6, x25\n\t" + "adc x7, x7, xzr\n\t" + /* A[0] * B[3] */ + "mul x24, x11, x19\n\t" + "umulh x25, x11, x19\n\t" + "adds x6, x6, x24\n\t" + "adcs x7, x7, x25\n\t" + "adc x8, xzr, xzr\n\t" + /* A[1] * B[2] */ + "mul x24, x12, x17\n\t" + "umulh x25, x12, x17\n\t" + "adds x6, x6, x24\n\t" + "adcs x7, x7, x25\n\t" + "adc x8, x8, xzr\n\t" + /* A[2] * B[1] */ + "mul x24, x13, x16\n\t" + "umulh x25, x13, x16\n\t" + "adds x6, x6, x24\n\t" + "adcs x7, x7, x25\n\t" + "adc x8, x8, xzr\n\t" + /* A[3] * B[0] */ + "mul x24, x14, x15\n\t" + "umulh x25, x14, x15\n\t" + "adds x6, x6, x24\n\t" + "adcs x7, x7, x25\n\t" + "adc x8, x8, xzr\n\t" + /* A[1] * B[3] */ + "mul x24, x12, x19\n\t" + "umulh x25, x12, x19\n\t" + "adds x7, x7, x24\n\t" + "adcs x8, x8, x25\n\t" + "adc x9, xzr, xzr\n\t" + /* A[2] * B[2] */ + "mul x24, x13, x17\n\t" + "umulh x25, x13, x17\n\t" + "adds x7, x7, x24\n\t" + "adcs x8, x8, x25\n\t" + "adc x9, x9, xzr\n\t" + /* A[3] * B[1] */ + "mul x24, x14, x16\n\t" + "umulh x25, x14, x16\n\t" + "adds x7, x7, x24\n\t" + "adcs x8, x8, x25\n\t" + "adc x9, x9, xzr\n\t" + /* A[2] * B[3] */ + "mul x24, x13, x19\n\t" + "umulh x25, x13, x19\n\t" + "adds x8, x8, x24\n\t" + "adcs x9, x9, x25\n\t" + "adc x10, xzr, xzr\n\t" + /* A[3] * B[2] */ + "mul x24, x14, x17\n\t" + "umulh x25, x14, x17\n\t" + "adds x8, x8, x24\n\t" + "adcs x9, x9, x25\n\t" + "adc x10, x10, xzr\n\t" + /* A[3] * B[3] */ + "mul x24, x14, x19\n\t" + "umulh x25, x14, x19\n\t" + "adds x9, x9, x24\n\t" + "adc x10, x10, x25\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x10, x10, x9, #63\n\t" + "extr x9, x9, x8, #63\n\t" + "extr x8, x8, x7, #63\n\t" + "extr x7, x7, x6, #63\n\t" + "and x6, x6, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x24, #19\n\t" + "mul x25, x24, x7\n\t" + "umulh x7, x24, x7\n\t" + "adds x3, x3, x25\n\t" + "mul x25, x24, x8\n\t" + "umulh x8, x24, x8\n\t" + "adcs x4, x4, x25\n\t" + "mul x25, x24, x9\n\t" + "umulh x9, x24, x9\n\t" + "adcs x5, x5, x25\n\t" + "mul x25, x24, x10\n\t" + "umulh x26, x24, x10\n\t" + "adcs x6, x6, x25\n\t" + "adc x26, x26, xzr\n\t" + /* Add remaining product results in */ + "adds x4, x4, x7\n\t" + "adcs x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x26, x26, xzr\n\t" + /* Overflow */ + "extr x26, x26, x6, #63\n\t" + "mul x26, x26, x24\n\t" + "and x6, x6, #0x7fffffffffffffff\n\t" + "adds x3, x3, x26\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "adc x6, x6, xzr\n\t" + /* Reduce if top bit set */ + "and x26, x24, x6, asr 63\n\t" + "and x6, x6, #0x7fffffffffffffff\n\t" + "adds x3, x3, x26\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "adc x6, x6, xzr\n\t" + /* Store */ + "stp x3, x4, [x0]\n\t" + "stp x5, x6, [x0, #16]\n\t" + "ldp x29, x30, [sp], #0x50\n\t" + : [rx] "+r" (rx), [ry] "+r" (ry), [rz] "+r" (rz), [rt] "+r" (rt), [px] "+r" (px), [py] "+r" (py), [pz] "+r" (pz), [pt] "+r" (pt) + : + : "memory", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26" + ); +} + +void fe_ge_dbl(fe rx, fe ry, fe rz, fe rt, const fe px, const fe py, const fe pz) +{ + __asm__ __volatile__ ( + "stp x29, x30, [sp, #-80]!\n\t" + "add x29, sp, #0\n\t" + "str %x[rx], [x29, #16]\n\t" + "str %x[ry], [x29, #24]\n\t" + "str %x[rz], [x29, #32]\n\t" + "str %x[rt], [x29, #40]\n\t" + "str %x[px], [x29, #48]\n\t" + "str %x[py], [x29, #56]\n\t" + "str %x[pz], [x29, #64]\n\t" + "ldr x1, [x29, #48]\n\t" + /* Square */ + "ldp x12, x13, [x1]\n\t" + "ldp x14, x15, [x1, #16]\n\t" + /* A[0] * A[1] */ + "mul x5, x12, x13\n\t" + "umulh x6, x12, x13\n\t" + /* A[0] * A[2] */ + "mul x25, x12, x14\n\t" + "umulh x7, x12, x14\n\t" + "adds x6, x6, x25\n\t" + "adc x7, x7, xzr\n\t" + /* A[0] * A[3] */ + "mul x25, x12, x15\n\t" + "umulh x8, x12, x15\n\t" + "adds x7, x7, x25\n\t" + "adc x8, x8, xzr\n\t" + /* A[1] * A[2] */ + "mul x25, x13, x14\n\t" + "umulh x26, x13, x14\n\t" + "adds x7, x7, x25\n\t" + "adcs x8, x8, x26\n\t" + "adc x9, xzr, xzr\n\t" + /* A[1] * A[3] */ + "mul x25, x13, x15\n\t" + "umulh x26, x13, x15\n\t" + "adds x8, x8, x25\n\t" + "adc x9, x9, x26\n\t" + /* A[2] * A[3] */ + "mul x25, x14, x15\n\t" + "umulh x10, x14, x15\n\t" + "adds x9, x9, x25\n\t" + "adc x10, x10, xzr\n\t" + /* Double */ + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adcs x7, x7, x7\n\t" + "adcs x8, x8, x8\n\t" + "adcs x9, x9, x9\n\t" + "adcs x10, x10, x10\n\t" + "adc x11, xzr, xzr\n\t" + /* A[0] * A[0] */ + "mul x4, x12, x12\n\t" + "umulh x27, x12, x12\n\t" + /* A[1] * A[1] */ + "mul x25, x13, x13\n\t" + "umulh x26, x13, x13\n\t" + "adds x5, x5, x27\n\t" + "adcs x6, x6, x25\n\t" + "adc x27, x26, xzr\n\t" + /* A[2] * A[2] */ + "mul x25, x14, x14\n\t" + "umulh x26, x14, x14\n\t" + "adds x7, x7, x27\n\t" + "adcs x8, x8, x25\n\t" + "adc x27, x26, xzr\n\t" + /* A[3] * A[3] */ + "mul x25, x15, x15\n\t" + "umulh x26, x15, x15\n\t" + "adds x9, x9, x27\n\t" + "adcs x10, x10, x25\n\t" + "adc x11, x11, x26\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x11, x11, x10, #63\n\t" + "extr x10, x10, x9, #63\n\t" + "extr x9, x9, x8, #63\n\t" + "extr x8, x8, x7, #63\n\t" + "and x7, x7, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x25, #19\n\t" + "mul x26, x25, x8\n\t" + "umulh x8, x25, x8\n\t" + "adds x4, x4, x26\n\t" + "mul x26, x25, x9\n\t" + "umulh x9, x25, x9\n\t" + "adcs x5, x5, x26\n\t" + "mul x26, x25, x10\n\t" + "umulh x10, x25, x10\n\t" + "adcs x6, x6, x26\n\t" + "mul x26, x25, x11\n\t" + "umulh x27, x25, x11\n\t" + "adcs x7, x7, x26\n\t" + "adc x27, x27, xzr\n\t" + /* Add remaining product results in */ + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x27, x27, xzr\n\t" + /* Overflow */ + "extr x27, x27, x7, #63\n\t" + "mul x27, x27, x25\n\t" + "and x7, x7, #0x7fffffffffffffff\n\t" + "adds x4, x4, x27\n\t" + "adcs x5, x5, xzr\n\t" + "adcs x6, x6, xzr\n\t" + "adc x7, x7, xzr\n\t" + /* Reduce if top bit set */ + "and x27, x25, x7, asr 63\n\t" + "and x7, x7, #0x7fffffffffffffff\n\t" + "adds x4, x4, x27\n\t" + "adcs x5, x5, xzr\n\t" + "adcs x6, x6, xzr\n\t" + "adc x7, x7, xzr\n\t" + /* Store */ + "stp x4, x5, [x0]\n\t" + "stp x6, x7, [x0, #16]\n\t" + "ldr x0, [x29, #32]\n\t" + "ldr x1, [x29, #56]\n\t" + /* Square */ + "ldp x21, x22, [x1]\n\t" + "ldp x23, x24, [x1, #16]\n\t" + /* A[0] * A[1] */ + "mul x9, x21, x22\n\t" + "umulh x10, x21, x22\n\t" + /* A[0] * A[2] */ + "mul x25, x21, x23\n\t" + "umulh x11, x21, x23\n\t" + "adds x10, x10, x25\n\t" + "adc x11, x11, xzr\n\t" + /* A[0] * A[3] */ + "mul x25, x21, x24\n\t" + "umulh x16, x21, x24\n\t" + "adds x11, x11, x25\n\t" + "adc x16, x16, xzr\n\t" + /* A[1] * A[2] */ + "mul x25, x22, x23\n\t" + "umulh x26, x22, x23\n\t" + "adds x11, x11, x25\n\t" + "adcs x16, x16, x26\n\t" + "adc x17, xzr, xzr\n\t" + /* A[1] * A[3] */ + "mul x25, x22, x24\n\t" + "umulh x26, x22, x24\n\t" + "adds x16, x16, x25\n\t" + "adc x17, x17, x26\n\t" + /* A[2] * A[3] */ + "mul x25, x23, x24\n\t" + "umulh x19, x23, x24\n\t" + "adds x17, x17, x25\n\t" + "adc x19, x19, xzr\n\t" + /* Double */ + "adds x9, x9, x9\n\t" + "adcs x10, x10, x10\n\t" + "adcs x11, x11, x11\n\t" + "adcs x16, x16, x16\n\t" + "adcs x17, x17, x17\n\t" + "adcs x19, x19, x19\n\t" + "adc x20, xzr, xzr\n\t" + /* A[0] * A[0] */ + "mul x8, x21, x21\n\t" + "umulh x27, x21, x21\n\t" + /* A[1] * A[1] */ + "mul x25, x22, x22\n\t" + "umulh x26, x22, x22\n\t" + "adds x9, x9, x27\n\t" + "adcs x10, x10, x25\n\t" + "adc x27, x26, xzr\n\t" + /* A[2] * A[2] */ + "mul x25, x23, x23\n\t" + "umulh x26, x23, x23\n\t" + "adds x11, x11, x27\n\t" + "adcs x16, x16, x25\n\t" + "adc x27, x26, xzr\n\t" + /* A[3] * A[3] */ + "mul x25, x24, x24\n\t" + "umulh x26, x24, x24\n\t" + "adds x17, x17, x27\n\t" + "adcs x19, x19, x25\n\t" + "adc x20, x20, x26\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x20, x20, x19, #63\n\t" + "extr x19, x19, x17, #63\n\t" + "extr x17, x17, x16, #63\n\t" + "extr x16, x16, x11, #63\n\t" + "and x11, x11, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x25, #19\n\t" + "mul x26, x25, x16\n\t" + "umulh x16, x25, x16\n\t" + "adds x8, x8, x26\n\t" + "mul x26, x25, x17\n\t" + "umulh x17, x25, x17\n\t" + "adcs x9, x9, x26\n\t" + "mul x26, x25, x19\n\t" + "umulh x19, x25, x19\n\t" + "adcs x10, x10, x26\n\t" + "mul x26, x25, x20\n\t" + "umulh x27, x25, x20\n\t" + "adcs x11, x11, x26\n\t" + "adc x27, x27, xzr\n\t" + /* Add remaining product results in */ + "adds x9, x9, x16\n\t" + "adcs x10, x10, x17\n\t" + "adcs x11, x11, x19\n\t" + "adc x27, x27, xzr\n\t" + /* Overflow */ + "extr x27, x27, x11, #63\n\t" + "mul x27, x27, x25\n\t" + "and x11, x11, #0x7fffffffffffffff\n\t" + "adds x8, x8, x27\n\t" + "adcs x9, x9, xzr\n\t" + "adcs x10, x10, xzr\n\t" + "adc x11, x11, xzr\n\t" + /* Reduce if top bit set */ + "and x27, x25, x11, asr 63\n\t" + "and x11, x11, #0x7fffffffffffffff\n\t" + "adds x8, x8, x27\n\t" + "adcs x9, x9, xzr\n\t" + "adcs x10, x10, xzr\n\t" + "adc x11, x11, xzr\n\t" + /* Store */ + "stp x8, x9, [x0]\n\t" + "stp x10, x11, [x0, #16]\n\t" + "ldr x0, [x29, #24]\n\t" + /* Add */ + "adds x12, x12, x21\n\t" + "adcs x13, x13, x22\n\t" + "adcs x14, x14, x23\n\t" + "adc x15, x15, x24\n\t" + "mov x25, #-19\n\t" + "asr x28, x15, #63\n\t" + /* Mask the modulus */ + "and x25, x28, x25\n\t" + "and x26, x28, #0x7fffffffffffffff\n\t" + /* Sub modulus (if overflow) */ + "subs x12, x12, x25\n\t" + "sbcs x13, x13, x28\n\t" + "sbcs x14, x14, x28\n\t" + "sbc x15, x15, x26\n\t" + "ldr x0, [x29, #40]\n\t" + /* Square */ + /* A[0] * A[1] */ + "mul x17, x12, x13\n\t" + "umulh x19, x12, x13\n\t" + /* A[0] * A[2] */ + "mul x25, x12, x14\n\t" + "umulh x20, x12, x14\n\t" + "adds x19, x19, x25\n\t" + "adc x20, x20, xzr\n\t" + /* A[0] * A[3] */ + "mul x25, x12, x15\n\t" + "umulh x21, x12, x15\n\t" + "adds x20, x20, x25\n\t" + "adc x21, x21, xzr\n\t" + /* A[1] * A[2] */ + "mul x25, x13, x14\n\t" + "umulh x26, x13, x14\n\t" + "adds x20, x20, x25\n\t" + "adcs x21, x21, x26\n\t" + "adc x22, xzr, xzr\n\t" + /* A[1] * A[3] */ + "mul x25, x13, x15\n\t" + "umulh x26, x13, x15\n\t" + "adds x21, x21, x25\n\t" + "adc x22, x22, x26\n\t" + /* A[2] * A[3] */ + "mul x25, x14, x15\n\t" + "umulh x23, x14, x15\n\t" + "adds x22, x22, x25\n\t" + "adc x23, x23, xzr\n\t" + /* Double */ + "adds x17, x17, x17\n\t" + "adcs x19, x19, x19\n\t" + "adcs x20, x20, x20\n\t" + "adcs x21, x21, x21\n\t" + "adcs x22, x22, x22\n\t" + "adcs x23, x23, x23\n\t" + "adc x24, xzr, xzr\n\t" + /* A[0] * A[0] */ + "mul x16, x12, x12\n\t" + "umulh x27, x12, x12\n\t" + /* A[1] * A[1] */ + "mul x25, x13, x13\n\t" + "umulh x26, x13, x13\n\t" + "adds x17, x17, x27\n\t" + "adcs x19, x19, x25\n\t" + "adc x27, x26, xzr\n\t" + /* A[2] * A[2] */ + "mul x25, x14, x14\n\t" + "umulh x26, x14, x14\n\t" + "adds x20, x20, x27\n\t" + "adcs x21, x21, x25\n\t" + "adc x27, x26, xzr\n\t" + /* A[3] * A[3] */ + "mul x25, x15, x15\n\t" + "umulh x26, x15, x15\n\t" + "adds x22, x22, x27\n\t" + "adcs x23, x23, x25\n\t" + "adc x24, x24, x26\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x24, x24, x23, #63\n\t" + "extr x23, x23, x22, #63\n\t" + "extr x22, x22, x21, #63\n\t" + "extr x21, x21, x20, #63\n\t" + "and x20, x20, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x25, #19\n\t" + "mul x26, x25, x21\n\t" + "umulh x21, x25, x21\n\t" + "adds x16, x16, x26\n\t" + "mul x26, x25, x22\n\t" + "umulh x22, x25, x22\n\t" + "adcs x17, x17, x26\n\t" + "mul x26, x25, x23\n\t" + "umulh x23, x25, x23\n\t" + "adcs x19, x19, x26\n\t" + "mul x26, x25, x24\n\t" + "umulh x27, x25, x24\n\t" + "adcs x20, x20, x26\n\t" + "adc x27, x27, xzr\n\t" + /* Add remaining product results in */ + "adds x17, x17, x21\n\t" + "adcs x19, x19, x22\n\t" + "adcs x20, x20, x23\n\t" + "adc x27, x27, xzr\n\t" + /* Overflow */ + "extr x27, x27, x20, #63\n\t" + "mul x27, x27, x25\n\t" + "and x20, x20, #0x7fffffffffffffff\n\t" + "adds x16, x16, x27\n\t" + "adcs x17, x17, xzr\n\t" + "adcs x19, x19, xzr\n\t" + "adc x20, x20, xzr\n\t" + /* Reduce if top bit set */ + "and x27, x25, x20, asr 63\n\t" + "and x20, x20, #0x7fffffffffffffff\n\t" + "adds x16, x16, x27\n\t" + "adcs x17, x17, xzr\n\t" + "adcs x19, x19, xzr\n\t" + "adc x20, x20, xzr\n\t" + /* Store */ + "stp x16, x17, [x0]\n\t" + "stp x19, x20, [x0, #16]\n\t" + "ldr x0, [x29, #24]\n\t" + "ldr x1, [x29, #32]\n\t" + /* Add */ + "adds x12, x8, x4\n\t" + "adcs x13, x9, x5\n\t" + "adcs x14, x10, x6\n\t" + "adc x15, x11, x7\n\t" + "mov x25, #-19\n\t" + "asr x28, x15, #63\n\t" + /* Mask the modulus */ + "and x25, x28, x25\n\t" + "and x26, x28, #0x7fffffffffffffff\n\t" + /* Sub modulus (if overflow) */ + "subs x12, x12, x25\n\t" + "sbcs x13, x13, x28\n\t" + "sbcs x14, x14, x28\n\t" + "sbc x15, x15, x26\n\t" + /* Sub */ + "subs x21, x8, x4\n\t" + "sbcs x22, x9, x5\n\t" + "sbcs x23, x10, x6\n\t" + "sbcs x24, x11, x7\n\t" + "mov x25, #-19\n\t" + "csetm x28, cc\n\t" + /* Mask the modulus */ + "and x25, x28, x25\n\t" + "and x26, x28, #0x7fffffffffffffff\n\t" + /* Add modulus (if underflow) */ + "adds x21, x21, x25\n\t" + "adcs x22, x22, x28\n\t" + "adcs x23, x23, x28\n\t" + "adc x24, x24, x26\n\t" + "stp x12, x13, [x0]\n\t" + "stp x14, x15, [x0, #16]\n\t" + "stp x21, x22, [x1]\n\t" + "stp x23, x24, [x1, #16]\n\t" + "ldr x0, [x29, #16]\n\t" + /* Sub */ + "subs x16, x16, x12\n\t" + "sbcs x17, x17, x13\n\t" + "sbcs x19, x19, x14\n\t" + "sbcs x20, x20, x15\n\t" + "mov x25, #-19\n\t" + "csetm x28, cc\n\t" + /* Mask the modulus */ + "and x25, x28, x25\n\t" + "and x26, x28, #0x7fffffffffffffff\n\t" + /* Add modulus (if underflow) */ + "adds x16, x16, x25\n\t" + "adcs x17, x17, x28\n\t" + "adcs x19, x19, x28\n\t" + "adc x20, x20, x26\n\t" + "stp x16, x17, [x0]\n\t" + "stp x19, x20, [x0, #16]\n\t" + "ldr x0, [x29, #40]\n\t" + "ldr x1, [x29, #64]\n\t" + /* Square * 2 */ + "ldp x12, x13, [x1]\n\t" + "ldp x14, x15, [x1, #16]\n\t" + /* A[0] * A[1] */ + "mul x5, x12, x13\n\t" + "umulh x6, x12, x13\n\t" + /* A[0] * A[2] */ + "mul x25, x12, x14\n\t" + "umulh x7, x12, x14\n\t" + "adds x6, x6, x25\n\t" + "adc x7, x7, xzr\n\t" + /* A[0] * A[3] */ + "mul x25, x12, x15\n\t" + "umulh x8, x12, x15\n\t" + "adds x7, x7, x25\n\t" + "adc x8, x8, xzr\n\t" + /* A[1] * A[2] */ + "mul x25, x13, x14\n\t" + "umulh x26, x13, x14\n\t" + "adds x7, x7, x25\n\t" + "adcs x8, x8, x26\n\t" + "adc x9, xzr, xzr\n\t" + /* A[1] * A[3] */ + "mul x25, x13, x15\n\t" + "umulh x26, x13, x15\n\t" + "adds x8, x8, x25\n\t" + "adc x9, x9, x26\n\t" + /* A[2] * A[3] */ + "mul x25, x14, x15\n\t" + "umulh x10, x14, x15\n\t" + "adds x9, x9, x25\n\t" + "adc x10, x10, xzr\n\t" + /* Double */ + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adcs x7, x7, x7\n\t" + "adcs x8, x8, x8\n\t" + "adcs x9, x9, x9\n\t" + "adcs x10, x10, x10\n\t" + "adc x11, xzr, xzr\n\t" + /* A[0] * A[0] */ + "mul x4, x12, x12\n\t" + "umulh x28, x12, x12\n\t" + /* A[1] * A[1] */ + "mul x25, x13, x13\n\t" + "umulh x26, x13, x13\n\t" + "adds x5, x5, x28\n\t" + "adcs x6, x6, x25\n\t" + "adc x28, x26, xzr\n\t" + /* A[2] * A[2] */ + "mul x25, x14, x14\n\t" + "umulh x26, x14, x14\n\t" + "adds x7, x7, x28\n\t" + "adcs x8, x8, x25\n\t" + "adc x28, x26, xzr\n\t" + /* A[3] * A[3] */ + "mul x25, x15, x15\n\t" + "umulh x26, x15, x15\n\t" + "adds x9, x9, x28\n\t" + "adcs x10, x10, x25\n\t" + "adc x11, x11, x26\n\t" + /* Double and Reduce */ + "mov x25, #0x169\n\t" + /* Move top half into t4-t7 and remove top bit from t3 */ + "lsr x28, x11, #61\n\t" + "extr x11, x11, x10, #62\n\t" + "extr x10, x10, x9, #62\n\t" + "extr x9, x9, x8, #62\n\t" + "extr x8, x8, x7, #62\n\t" + "extr x7, x7, x6, #63\n\t" + "extr x6, x6, x5, #63\n\t" + "extr x5, x5, x4, #63\n\t" + "lsl x4, x4, #1\n\t" + "and x7, x7, #0x7fffffffffffffff\n\t" + /* Two left, only one right */ + "and x11, x11, #0x7fffffffffffffff\n\t" + /* Multiply top bits by 19*19 */ + "mul x28, x28, x25\n\t" + /* Multiply top half by 19 */ + "mov x25, #19\n\t" + "mul x26, x25, x8\n\t" + "umulh x8, x25, x8\n\t" + "adds x4, x4, x26\n\t" + "mul x26, x25, x9\n\t" + "umulh x9, x25, x9\n\t" + "adcs x5, x5, x26\n\t" + "mul x26, x25, x10\n\t" + "umulh x10, x25, x10\n\t" + "adcs x6, x6, x26\n\t" + "mul x26, x25, x11\n\t" + "umulh x27, x25, x11\n\t" + "adcs x7, x7, x26\n\t" + "adc x27, x27, xzr\n\t" + /* Add remaining product results in */ + "adds x4, x4, x28\n\t" + "adcs x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x27, x27, xzr\n\t" + /* Overflow */ + "extr x27, x27, x7, #63\n\t" + "mul x27, x27, x25\n\t" + "and x7, x7, #0x7fffffffffffffff\n\t" + "adds x4, x4, x27\n\t" + "adcs x5, x5, xzr\n\t" + "adcs x6, x6, xzr\n\t" + "adc x7, x7, xzr\n\t" + /* Reduce if top bit set */ + "and x27, x25, x7, asr 63\n\t" + "and x7, x7, #0x7fffffffffffffff\n\t" + "adds x4, x4, x27\n\t" + "adcs x5, x5, xzr\n\t" + "adcs x6, x6, xzr\n\t" + "adc x7, x7, xzr\n\t" + /* Store */ + "ldr x0, [x29, #40]\n\t" + /* Sub */ + "subs x4, x4, x21\n\t" + "sbcs x5, x5, x22\n\t" + "sbcs x6, x6, x23\n\t" + "sbcs x7, x7, x24\n\t" + "mov x25, #-19\n\t" + "csetm x28, cc\n\t" + /* Mask the modulus */ + "and x25, x28, x25\n\t" + "and x26, x28, #0x7fffffffffffffff\n\t" + /* Add modulus (if underflow) */ + "adds x4, x4, x25\n\t" + "adcs x5, x5, x28\n\t" + "adcs x6, x6, x28\n\t" + "adc x7, x7, x26\n\t" + "stp x4, x5, [x0]\n\t" + "stp x6, x7, [x0, #16]\n\t" + "ldp x29, x30, [sp], #0x50\n\t" + : [rx] "+r" (rx), [ry] "+r" (ry), [rz] "+r" (rz), [rt] "+r" (rt), [px] "+r" (px), [py] "+r" (py), [pz] "+r" (pz) + : + : "memory", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" + ); +} + +void fe_ge_madd(fe rx, fe ry, fe rz, fe rt, const fe px, const fe py, const fe pz, const fe pt, const fe qxy2d, const fe qyplusx, const fe qyminusx) +{ + __asm__ __volatile__ ( + "stp x29, x30, [sp, #-80]!\n\t" + "add x29, sp, #0\n\t" + "str %x[rx], [x29, #16]\n\t" + "str %x[ry], [x29, #24]\n\t" + "str %x[rz], [x29, #32]\n\t" + "str %x[rt], [x29, #40]\n\t" + "str %x[px], [x29, #48]\n\t" + "str %x[py], [x29, #56]\n\t" + "str %x[pz], [x29, #64]\n\t" + "str %x[pt], [x29, #72]\n\t" + "ldr x2, [x29, #56]\n\t" + "ldr x3, [x29, #48]\n\t" + /* Add */ + "ldp x12, x13, [x2]\n\t" + "ldp x14, x15, [x2, #16]\n\t" + "ldp x16, x17, [x3]\n\t" + "ldp x19, x20, [x3, #16]\n\t" + "adds x4, x12, x16\n\t" + "adcs x5, x13, x17\n\t" + "adcs x6, x14, x19\n\t" + "adc x7, x15, x20\n\t" + "mov x25, #-19\n\t" + "asr x28, x7, #63\n\t" + /* Mask the modulus */ + "and x25, x28, x25\n\t" + "and x26, x28, #0x7fffffffffffffff\n\t" + /* Sub modulus (if overflow) */ + "subs x4, x4, x25\n\t" + "sbcs x5, x5, x28\n\t" + "sbcs x6, x6, x28\n\t" + "sbc x7, x7, x26\n\t" + /* Sub */ + "subs x8, x12, x16\n\t" + "sbcs x9, x13, x17\n\t" + "sbcs x10, x14, x19\n\t" + "sbcs x11, x15, x20\n\t" + "mov x25, #-19\n\t" + "csetm x28, cc\n\t" + /* Mask the modulus */ + "and x25, x28, x25\n\t" + "and x26, x28, #0x7fffffffffffffff\n\t" + /* Add modulus (if underflow) */ + "adds x8, x8, x25\n\t" + "adcs x9, x9, x28\n\t" + "adcs x10, x10, x28\n\t" + "adc x11, x11, x26\n\t" + "ldr x0, [x29, #32]\n\t" + "ldr x2, [x29, #168]\n\t" + /* Multiply */ + "ldp x21, x22, [x2]\n\t" + "ldp x23, x24, [x2, #16]\n\t" + /* A[0] * B[0] */ + "mul x12, x4, x21\n\t" + "umulh x13, x4, x21\n\t" + /* A[0] * B[1] */ + "mul x25, x4, x22\n\t" + "umulh x14, x4, x22\n\t" + "adds x13, x13, x25\n\t" + "adc x14, x14, xzr\n\t" + /* A[1] * B[0] */ + "mul x25, x5, x21\n\t" + "umulh x26, x5, x21\n\t" + "adds x13, x13, x25\n\t" + "adcs x14, x14, x26\n\t" + "adc x15, xzr, xzr\n\t" + /* A[0] * B[2] */ + "mul x25, x4, x23\n\t" + "umulh x26, x4, x23\n\t" + "adds x14, x14, x25\n\t" + "adc x15, x15, x26\n\t" + /* A[1] * B[1] */ + "mul x25, x5, x22\n\t" + "umulh x26, x5, x22\n\t" + "adds x14, x14, x25\n\t" + "adcs x15, x15, x26\n\t" + "adc x16, xzr, xzr\n\t" + /* A[2] * B[0] */ + "mul x25, x6, x21\n\t" + "umulh x26, x6, x21\n\t" + "adds x14, x14, x25\n\t" + "adcs x15, x15, x26\n\t" + "adc x16, x16, xzr\n\t" + /* A[0] * B[3] */ + "mul x25, x4, x24\n\t" + "umulh x26, x4, x24\n\t" + "adds x15, x15, x25\n\t" + "adcs x16, x16, x26\n\t" + "adc x17, xzr, xzr\n\t" + /* A[1] * B[2] */ + "mul x25, x5, x23\n\t" + "umulh x26, x5, x23\n\t" + "adds x15, x15, x25\n\t" + "adcs x16, x16, x26\n\t" + "adc x17, x17, xzr\n\t" + /* A[2] * B[1] */ + "mul x25, x6, x22\n\t" + "umulh x26, x6, x22\n\t" + "adds x15, x15, x25\n\t" + "adcs x16, x16, x26\n\t" + "adc x17, x17, xzr\n\t" + /* A[3] * B[0] */ + "mul x25, x7, x21\n\t" + "umulh x26, x7, x21\n\t" + "adds x15, x15, x25\n\t" + "adcs x16, x16, x26\n\t" + "adc x17, x17, xzr\n\t" + /* A[1] * B[3] */ + "mul x25, x5, x24\n\t" + "umulh x26, x5, x24\n\t" + "adds x16, x16, x25\n\t" + "adcs x17, x17, x26\n\t" + "adc x19, xzr, xzr\n\t" + /* A[2] * B[2] */ + "mul x25, x6, x23\n\t" + "umulh x26, x6, x23\n\t" + "adds x16, x16, x25\n\t" + "adcs x17, x17, x26\n\t" + "adc x19, x19, xzr\n\t" + /* A[3] * B[1] */ + "mul x25, x7, x22\n\t" + "umulh x26, x7, x22\n\t" + "adds x16, x16, x25\n\t" + "adcs x17, x17, x26\n\t" + "adc x19, x19, xzr\n\t" + /* A[2] * B[3] */ + "mul x25, x6, x24\n\t" + "umulh x26, x6, x24\n\t" + "adds x17, x17, x25\n\t" + "adcs x19, x19, x26\n\t" + "adc x20, xzr, xzr\n\t" + /* A[3] * B[2] */ + "mul x25, x7, x23\n\t" + "umulh x26, x7, x23\n\t" + "adds x17, x17, x25\n\t" + "adcs x19, x19, x26\n\t" + "adc x20, x20, xzr\n\t" + /* A[3] * B[3] */ + "mul x25, x7, x24\n\t" + "umulh x26, x7, x24\n\t" + "adds x19, x19, x25\n\t" + "adc x20, x20, x26\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x20, x20, x19, #63\n\t" + "extr x19, x19, x17, #63\n\t" + "extr x17, x17, x16, #63\n\t" + "extr x16, x16, x15, #63\n\t" + "and x15, x15, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x25, #19\n\t" + "mul x26, x25, x16\n\t" + "umulh x16, x25, x16\n\t" + "adds x12, x12, x26\n\t" + "mul x26, x25, x17\n\t" + "umulh x17, x25, x17\n\t" + "adcs x13, x13, x26\n\t" + "mul x26, x25, x19\n\t" + "umulh x19, x25, x19\n\t" + "adcs x14, x14, x26\n\t" + "mul x26, x25, x20\n\t" + "umulh x27, x25, x20\n\t" + "adcs x15, x15, x26\n\t" + "adc x27, x27, xzr\n\t" + /* Add remaining product results in */ + "adds x13, x13, x16\n\t" + "adcs x14, x14, x17\n\t" + "adcs x15, x15, x19\n\t" + "adc x27, x27, xzr\n\t" + /* Overflow */ + "extr x27, x27, x15, #63\n\t" + "mul x27, x27, x25\n\t" + "and x15, x15, #0x7fffffffffffffff\n\t" + "adds x12, x12, x27\n\t" + "adcs x13, x13, xzr\n\t" + "adcs x14, x14, xzr\n\t" + "adc x15, x15, xzr\n\t" + /* Reduce if top bit set */ + "and x27, x25, x15, asr 63\n\t" + "and x15, x15, #0x7fffffffffffffff\n\t" + "adds x12, x12, x27\n\t" + "adcs x13, x13, xzr\n\t" + "adcs x14, x14, xzr\n\t" + "adc x15, x15, xzr\n\t" + /* Store */ + "ldr x0, [x29, #24]\n\t" + "ldr x1, [x29, #176]\n\t" + /* Multiply */ + "ldp x21, x22, [x1]\n\t" + "ldp x23, x24, [x1, #16]\n\t" + /* A[0] * B[0] */ + "mul x4, x8, x21\n\t" + "umulh x5, x8, x21\n\t" + /* A[0] * B[1] */ + "mul x25, x8, x22\n\t" + "umulh x6, x8, x22\n\t" + "adds x5, x5, x25\n\t" + "adc x6, x6, xzr\n\t" + /* A[1] * B[0] */ + "mul x25, x9, x21\n\t" + "umulh x26, x9, x21\n\t" + "adds x5, x5, x25\n\t" + "adcs x6, x6, x26\n\t" + "adc x7, xzr, xzr\n\t" + /* A[0] * B[2] */ + "mul x25, x8, x23\n\t" + "umulh x26, x8, x23\n\t" + "adds x6, x6, x25\n\t" + "adc x7, x7, x26\n\t" + /* A[1] * B[1] */ + "mul x25, x9, x22\n\t" + "umulh x26, x9, x22\n\t" + "adds x6, x6, x25\n\t" + "adcs x7, x7, x26\n\t" + "adc x16, xzr, xzr\n\t" + /* A[2] * B[0] */ + "mul x25, x10, x21\n\t" + "umulh x26, x10, x21\n\t" + "adds x6, x6, x25\n\t" + "adcs x7, x7, x26\n\t" + "adc x16, x16, xzr\n\t" + /* A[0] * B[3] */ + "mul x25, x8, x24\n\t" + "umulh x26, x8, x24\n\t" + "adds x7, x7, x25\n\t" + "adcs x16, x16, x26\n\t" + "adc x17, xzr, xzr\n\t" + /* A[1] * B[2] */ + "mul x25, x9, x23\n\t" + "umulh x26, x9, x23\n\t" + "adds x7, x7, x25\n\t" + "adcs x16, x16, x26\n\t" + "adc x17, x17, xzr\n\t" + /* A[2] * B[1] */ + "mul x25, x10, x22\n\t" + "umulh x26, x10, x22\n\t" + "adds x7, x7, x25\n\t" + "adcs x16, x16, x26\n\t" + "adc x17, x17, xzr\n\t" + /* A[3] * B[0] */ + "mul x25, x11, x21\n\t" + "umulh x26, x11, x21\n\t" + "adds x7, x7, x25\n\t" + "adcs x16, x16, x26\n\t" + "adc x17, x17, xzr\n\t" + /* A[1] * B[3] */ + "mul x25, x9, x24\n\t" + "umulh x26, x9, x24\n\t" + "adds x16, x16, x25\n\t" + "adcs x17, x17, x26\n\t" + "adc x19, xzr, xzr\n\t" + /* A[2] * B[2] */ + "mul x25, x10, x23\n\t" + "umulh x26, x10, x23\n\t" + "adds x16, x16, x25\n\t" + "adcs x17, x17, x26\n\t" + "adc x19, x19, xzr\n\t" + /* A[3] * B[1] */ + "mul x25, x11, x22\n\t" + "umulh x26, x11, x22\n\t" + "adds x16, x16, x25\n\t" + "adcs x17, x17, x26\n\t" + "adc x19, x19, xzr\n\t" + /* A[2] * B[3] */ + "mul x25, x10, x24\n\t" + "umulh x26, x10, x24\n\t" + "adds x17, x17, x25\n\t" + "adcs x19, x19, x26\n\t" + "adc x20, xzr, xzr\n\t" + /* A[3] * B[2] */ + "mul x25, x11, x23\n\t" + "umulh x26, x11, x23\n\t" + "adds x17, x17, x25\n\t" + "adcs x19, x19, x26\n\t" + "adc x20, x20, xzr\n\t" + /* A[3] * B[3] */ + "mul x25, x11, x24\n\t" + "umulh x26, x11, x24\n\t" + "adds x19, x19, x25\n\t" + "adc x20, x20, x26\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x20, x20, x19, #63\n\t" + "extr x19, x19, x17, #63\n\t" + "extr x17, x17, x16, #63\n\t" + "extr x16, x16, x7, #63\n\t" + "and x7, x7, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x25, #19\n\t" + "mul x26, x25, x16\n\t" + "umulh x16, x25, x16\n\t" + "adds x4, x4, x26\n\t" + "mul x26, x25, x17\n\t" + "umulh x17, x25, x17\n\t" + "adcs x5, x5, x26\n\t" + "mul x26, x25, x19\n\t" + "umulh x19, x25, x19\n\t" + "adcs x6, x6, x26\n\t" + "mul x26, x25, x20\n\t" + "umulh x27, x25, x20\n\t" + "adcs x7, x7, x26\n\t" + "adc x27, x27, xzr\n\t" + /* Add remaining product results in */ + "adds x5, x5, x16\n\t" + "adcs x6, x6, x17\n\t" + "adcs x7, x7, x19\n\t" + "adc x27, x27, xzr\n\t" + /* Overflow */ + "extr x27, x27, x7, #63\n\t" + "mul x27, x27, x25\n\t" + "and x7, x7, #0x7fffffffffffffff\n\t" + "adds x4, x4, x27\n\t" + "adcs x5, x5, xzr\n\t" + "adcs x6, x6, xzr\n\t" + "adc x7, x7, xzr\n\t" + /* Reduce if top bit set */ + "and x27, x25, x7, asr 63\n\t" + "and x7, x7, #0x7fffffffffffffff\n\t" + "adds x4, x4, x27\n\t" + "adcs x5, x5, xzr\n\t" + "adcs x6, x6, xzr\n\t" + "adc x7, x7, xzr\n\t" + /* Store */ + "ldr x0, [x29, #24]\n\t" + "ldr x1, [x29, #16]\n\t" + /* Add */ + "adds x8, x12, x4\n\t" + "adcs x9, x13, x5\n\t" + "adcs x10, x14, x6\n\t" + "adc x11, x15, x7\n\t" + "mov x25, #-19\n\t" + "asr x28, x11, #63\n\t" + /* Mask the modulus */ + "and x25, x28, x25\n\t" + "and x26, x28, #0x7fffffffffffffff\n\t" + /* Sub modulus (if overflow) */ + "subs x8, x8, x25\n\t" + "sbcs x9, x9, x28\n\t" + "sbcs x10, x10, x28\n\t" + "sbc x11, x11, x26\n\t" + /* Sub */ + "subs x16, x12, x4\n\t" + "sbcs x17, x13, x5\n\t" + "sbcs x19, x14, x6\n\t" + "sbcs x20, x15, x7\n\t" + "mov x25, #-19\n\t" + "csetm x28, cc\n\t" + /* Mask the modulus */ + "and x25, x28, x25\n\t" + "and x26, x28, #0x7fffffffffffffff\n\t" + /* Add modulus (if underflow) */ + "adds x16, x16, x25\n\t" + "adcs x17, x17, x28\n\t" + "adcs x19, x19, x28\n\t" + "adc x20, x20, x26\n\t" + "stp x8, x9, [x0]\n\t" + "stp x10, x11, [x0, #16]\n\t" + "stp x16, x17, [x1]\n\t" + "stp x19, x20, [x1, #16]\n\t" + "ldr x0, [x29, #40]\n\t" + "ldr x1, [x29, #160]\n\t" + "ldr x3, [x29, #72]\n\t" + /* Multiply */ + "ldp x16, x17, [x1]\n\t" + "ldp x19, x20, [x1, #16]\n\t" + "ldp x21, x22, [x3]\n\t" + "ldp x23, x24, [x3, #16]\n\t" + /* A[0] * B[0] */ + "mul x4, x16, x21\n\t" + "umulh x5, x16, x21\n\t" + /* A[0] * B[1] */ + "mul x25, x16, x22\n\t" + "umulh x6, x16, x22\n\t" + "adds x5, x5, x25\n\t" + "adc x6, x6, xzr\n\t" + /* A[1] * B[0] */ + "mul x25, x17, x21\n\t" + "umulh x26, x17, x21\n\t" + "adds x5, x5, x25\n\t" + "adcs x6, x6, x26\n\t" + "adc x7, xzr, xzr\n\t" + /* A[0] * B[2] */ + "mul x25, x16, x23\n\t" + "umulh x26, x16, x23\n\t" + "adds x6, x6, x25\n\t" + "adc x7, x7, x26\n\t" + /* A[1] * B[1] */ + "mul x25, x17, x22\n\t" + "umulh x26, x17, x22\n\t" + "adds x6, x6, x25\n\t" + "adcs x7, x7, x26\n\t" + "adc x8, xzr, xzr\n\t" + /* A[2] * B[0] */ + "mul x25, x19, x21\n\t" + "umulh x26, x19, x21\n\t" + "adds x6, x6, x25\n\t" + "adcs x7, x7, x26\n\t" + "adc x8, x8, xzr\n\t" + /* A[0] * B[3] */ + "mul x25, x16, x24\n\t" + "umulh x26, x16, x24\n\t" + "adds x7, x7, x25\n\t" + "adcs x8, x8, x26\n\t" + "adc x9, xzr, xzr\n\t" + /* A[1] * B[2] */ + "mul x25, x17, x23\n\t" + "umulh x26, x17, x23\n\t" + "adds x7, x7, x25\n\t" + "adcs x8, x8, x26\n\t" + "adc x9, x9, xzr\n\t" + /* A[2] * B[1] */ + "mul x25, x19, x22\n\t" + "umulh x26, x19, x22\n\t" + "adds x7, x7, x25\n\t" + "adcs x8, x8, x26\n\t" + "adc x9, x9, xzr\n\t" + /* A[3] * B[0] */ + "mul x25, x20, x21\n\t" + "umulh x26, x20, x21\n\t" + "adds x7, x7, x25\n\t" + "adcs x8, x8, x26\n\t" + "adc x9, x9, xzr\n\t" + /* A[1] * B[3] */ + "mul x25, x17, x24\n\t" + "umulh x26, x17, x24\n\t" + "adds x8, x8, x25\n\t" + "adcs x9, x9, x26\n\t" + "adc x10, xzr, xzr\n\t" + /* A[2] * B[2] */ + "mul x25, x19, x23\n\t" + "umulh x26, x19, x23\n\t" + "adds x8, x8, x25\n\t" + "adcs x9, x9, x26\n\t" + "adc x10, x10, xzr\n\t" + /* A[3] * B[1] */ + "mul x25, x20, x22\n\t" + "umulh x26, x20, x22\n\t" + "adds x8, x8, x25\n\t" + "adcs x9, x9, x26\n\t" + "adc x10, x10, xzr\n\t" + /* A[2] * B[3] */ + "mul x25, x19, x24\n\t" + "umulh x26, x19, x24\n\t" + "adds x9, x9, x25\n\t" + "adcs x10, x10, x26\n\t" + "adc x11, xzr, xzr\n\t" + /* A[3] * B[2] */ + "mul x25, x20, x23\n\t" + "umulh x26, x20, x23\n\t" + "adds x9, x9, x25\n\t" + "adcs x10, x10, x26\n\t" + "adc x11, x11, xzr\n\t" + /* A[3] * B[3] */ + "mul x25, x20, x24\n\t" + "umulh x26, x20, x24\n\t" + "adds x10, x10, x25\n\t" + "adc x11, x11, x26\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x11, x11, x10, #63\n\t" + "extr x10, x10, x9, #63\n\t" + "extr x9, x9, x8, #63\n\t" + "extr x8, x8, x7, #63\n\t" + "and x7, x7, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x25, #19\n\t" + "mul x26, x25, x8\n\t" + "umulh x8, x25, x8\n\t" + "adds x4, x4, x26\n\t" + "mul x26, x25, x9\n\t" + "umulh x9, x25, x9\n\t" + "adcs x5, x5, x26\n\t" + "mul x26, x25, x10\n\t" + "umulh x10, x25, x10\n\t" + "adcs x6, x6, x26\n\t" + "mul x26, x25, x11\n\t" + "umulh x27, x25, x11\n\t" + "adcs x7, x7, x26\n\t" + "adc x27, x27, xzr\n\t" + /* Add remaining product results in */ + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x27, x27, xzr\n\t" + /* Overflow */ + "extr x27, x27, x7, #63\n\t" + "mul x27, x27, x25\n\t" + "and x7, x7, #0x7fffffffffffffff\n\t" + "adds x4, x4, x27\n\t" + "adcs x5, x5, xzr\n\t" + "adcs x6, x6, xzr\n\t" + "adc x7, x7, xzr\n\t" + /* Reduce if top bit set */ + "and x27, x25, x7, asr 63\n\t" + "and x7, x7, #0x7fffffffffffffff\n\t" + "adds x4, x4, x27\n\t" + "adcs x5, x5, xzr\n\t" + "adcs x6, x6, xzr\n\t" + "adc x7, x7, xzr\n\t" + /* Store */ + "ldr x0, [x29, #32]\n\t" + "ldr x1, [x29, #64]\n\t" + /* Double */ + "ldp x8, x9, [x1]\n\t" + "ldp x10, x11, [x1, #16]\n\t" + "adds x8, x8, x8\n\t" + "adcs x9, x9, x9\n\t" + "adcs x10, x10, x10\n\t" + "adc x11, x11, x11\n\t" + "mov x25, #-19\n\t" + "asr x28, x11, #63\n\t" + /* Mask the modulus */ + "and x25, x28, x25\n\t" + "and x26, x28, #0x7fffffffffffffff\n\t" + /* Sub modulus (if overflow) */ + "subs x8, x8, x25\n\t" + "sbcs x9, x9, x28\n\t" + "sbcs x10, x10, x28\n\t" + "sbc x11, x11, x26\n\t" + "ldr x1, [x29, #40]\n\t" + /* Add */ + "adds x12, x8, x4\n\t" + "adcs x13, x9, x5\n\t" + "adcs x14, x10, x6\n\t" + "adc x15, x11, x7\n\t" + "mov x25, #-19\n\t" + "asr x28, x15, #63\n\t" + /* Mask the modulus */ + "and x25, x28, x25\n\t" + "and x26, x28, #0x7fffffffffffffff\n\t" + /* Sub modulus (if overflow) */ + "subs x12, x12, x25\n\t" + "sbcs x13, x13, x28\n\t" + "sbcs x14, x14, x28\n\t" + "sbc x15, x15, x26\n\t" + /* Sub */ + "subs x16, x8, x4\n\t" + "sbcs x17, x9, x5\n\t" + "sbcs x19, x10, x6\n\t" + "sbcs x20, x11, x7\n\t" + "mov x25, #-19\n\t" + "csetm x28, cc\n\t" + /* Mask the modulus */ + "and x25, x28, x25\n\t" + "and x26, x28, #0x7fffffffffffffff\n\t" + /* Add modulus (if underflow) */ + "adds x16, x16, x25\n\t" + "adcs x17, x17, x28\n\t" + "adcs x19, x19, x28\n\t" + "adc x20, x20, x26\n\t" + "stp x12, x13, [x0]\n\t" + "stp x14, x15, [x0, #16]\n\t" + "stp x16, x17, [x1]\n\t" + "stp x19, x20, [x1, #16]\n\t" + "ldp x29, x30, [sp], #0x50\n\t" + : [rx] "+r" (rx), [ry] "+r" (ry), [rz] "+r" (rz), [rt] "+r" (rt), [px] "+r" (px), [py] "+r" (py), [pz] "+r" (pz), [pt] "+r" (pt) + : + : "memory", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" + ); + (void)qxy2d; + (void)qyplusx; + (void)qyminusx; +} + +void fe_ge_msub(fe rx, fe ry, fe rz, fe rt, const fe px, const fe py, const fe pz, const fe pt, const fe qxy2d, const fe qyplusx, const fe qyminusx) +{ + __asm__ __volatile__ ( + "stp x29, x30, [sp, #-80]!\n\t" + "add x29, sp, #0\n\t" + "str %x[rx], [x29, #16]\n\t" + "str %x[ry], [x29, #24]\n\t" + "str %x[rz], [x29, #32]\n\t" + "str %x[rt], [x29, #40]\n\t" + "str %x[px], [x29, #48]\n\t" + "str %x[py], [x29, #56]\n\t" + "str %x[pz], [x29, #64]\n\t" + "str %x[pt], [x29, #72]\n\t" + "ldr x2, [x29, #56]\n\t" + "ldr x3, [x29, #48]\n\t" + /* Add */ + "ldp x12, x13, [x2]\n\t" + "ldp x14, x15, [x2, #16]\n\t" + "ldp x16, x17, [x3]\n\t" + "ldp x19, x20, [x3, #16]\n\t" + "adds x4, x12, x16\n\t" + "adcs x5, x13, x17\n\t" + "adcs x6, x14, x19\n\t" + "adc x7, x15, x20\n\t" + "mov x25, #-19\n\t" + "asr x28, x7, #63\n\t" + /* Mask the modulus */ + "and x25, x28, x25\n\t" + "and x26, x28, #0x7fffffffffffffff\n\t" + /* Sub modulus (if overflow) */ + "subs x4, x4, x25\n\t" + "sbcs x5, x5, x28\n\t" + "sbcs x6, x6, x28\n\t" + "sbc x7, x7, x26\n\t" + /* Sub */ + "subs x8, x12, x16\n\t" + "sbcs x9, x13, x17\n\t" + "sbcs x10, x14, x19\n\t" + "sbcs x11, x15, x20\n\t" + "mov x25, #-19\n\t" + "csetm x28, cc\n\t" + /* Mask the modulus */ + "and x25, x28, x25\n\t" + "and x26, x28, #0x7fffffffffffffff\n\t" + /* Add modulus (if underflow) */ + "adds x8, x8, x25\n\t" + "adcs x9, x9, x28\n\t" + "adcs x10, x10, x28\n\t" + "adc x11, x11, x26\n\t" + "ldr x0, [x29, #32]\n\t" + "ldr x2, [x29, #176]\n\t" + /* Multiply */ + "ldp x21, x22, [x2]\n\t" + "ldp x23, x24, [x2, #16]\n\t" + /* A[0] * B[0] */ + "mul x12, x4, x21\n\t" + "umulh x13, x4, x21\n\t" + /* A[0] * B[1] */ + "mul x25, x4, x22\n\t" + "umulh x14, x4, x22\n\t" + "adds x13, x13, x25\n\t" + "adc x14, x14, xzr\n\t" + /* A[1] * B[0] */ + "mul x25, x5, x21\n\t" + "umulh x26, x5, x21\n\t" + "adds x13, x13, x25\n\t" + "adcs x14, x14, x26\n\t" + "adc x15, xzr, xzr\n\t" + /* A[0] * B[2] */ + "mul x25, x4, x23\n\t" + "umulh x26, x4, x23\n\t" + "adds x14, x14, x25\n\t" + "adc x15, x15, x26\n\t" + /* A[1] * B[1] */ + "mul x25, x5, x22\n\t" + "umulh x26, x5, x22\n\t" + "adds x14, x14, x25\n\t" + "adcs x15, x15, x26\n\t" + "adc x16, xzr, xzr\n\t" + /* A[2] * B[0] */ + "mul x25, x6, x21\n\t" + "umulh x26, x6, x21\n\t" + "adds x14, x14, x25\n\t" + "adcs x15, x15, x26\n\t" + "adc x16, x16, xzr\n\t" + /* A[0] * B[3] */ + "mul x25, x4, x24\n\t" + "umulh x26, x4, x24\n\t" + "adds x15, x15, x25\n\t" + "adcs x16, x16, x26\n\t" + "adc x17, xzr, xzr\n\t" + /* A[1] * B[2] */ + "mul x25, x5, x23\n\t" + "umulh x26, x5, x23\n\t" + "adds x15, x15, x25\n\t" + "adcs x16, x16, x26\n\t" + "adc x17, x17, xzr\n\t" + /* A[2] * B[1] */ + "mul x25, x6, x22\n\t" + "umulh x26, x6, x22\n\t" + "adds x15, x15, x25\n\t" + "adcs x16, x16, x26\n\t" + "adc x17, x17, xzr\n\t" + /* A[3] * B[0] */ + "mul x25, x7, x21\n\t" + "umulh x26, x7, x21\n\t" + "adds x15, x15, x25\n\t" + "adcs x16, x16, x26\n\t" + "adc x17, x17, xzr\n\t" + /* A[1] * B[3] */ + "mul x25, x5, x24\n\t" + "umulh x26, x5, x24\n\t" + "adds x16, x16, x25\n\t" + "adcs x17, x17, x26\n\t" + "adc x19, xzr, xzr\n\t" + /* A[2] * B[2] */ + "mul x25, x6, x23\n\t" + "umulh x26, x6, x23\n\t" + "adds x16, x16, x25\n\t" + "adcs x17, x17, x26\n\t" + "adc x19, x19, xzr\n\t" + /* A[3] * B[1] */ + "mul x25, x7, x22\n\t" + "umulh x26, x7, x22\n\t" + "adds x16, x16, x25\n\t" + "adcs x17, x17, x26\n\t" + "adc x19, x19, xzr\n\t" + /* A[2] * B[3] */ + "mul x25, x6, x24\n\t" + "umulh x26, x6, x24\n\t" + "adds x17, x17, x25\n\t" + "adcs x19, x19, x26\n\t" + "adc x20, xzr, xzr\n\t" + /* A[3] * B[2] */ + "mul x25, x7, x23\n\t" + "umulh x26, x7, x23\n\t" + "adds x17, x17, x25\n\t" + "adcs x19, x19, x26\n\t" + "adc x20, x20, xzr\n\t" + /* A[3] * B[3] */ + "mul x25, x7, x24\n\t" + "umulh x26, x7, x24\n\t" + "adds x19, x19, x25\n\t" + "adc x20, x20, x26\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x20, x20, x19, #63\n\t" + "extr x19, x19, x17, #63\n\t" + "extr x17, x17, x16, #63\n\t" + "extr x16, x16, x15, #63\n\t" + "and x15, x15, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x25, #19\n\t" + "mul x26, x25, x16\n\t" + "umulh x16, x25, x16\n\t" + "adds x12, x12, x26\n\t" + "mul x26, x25, x17\n\t" + "umulh x17, x25, x17\n\t" + "adcs x13, x13, x26\n\t" + "mul x26, x25, x19\n\t" + "umulh x19, x25, x19\n\t" + "adcs x14, x14, x26\n\t" + "mul x26, x25, x20\n\t" + "umulh x27, x25, x20\n\t" + "adcs x15, x15, x26\n\t" + "adc x27, x27, xzr\n\t" + /* Add remaining product results in */ + "adds x13, x13, x16\n\t" + "adcs x14, x14, x17\n\t" + "adcs x15, x15, x19\n\t" + "adc x27, x27, xzr\n\t" + /* Overflow */ + "extr x27, x27, x15, #63\n\t" + "mul x27, x27, x25\n\t" + "and x15, x15, #0x7fffffffffffffff\n\t" + "adds x12, x12, x27\n\t" + "adcs x13, x13, xzr\n\t" + "adcs x14, x14, xzr\n\t" + "adc x15, x15, xzr\n\t" + /* Reduce if top bit set */ + "and x27, x25, x15, asr 63\n\t" + "and x15, x15, #0x7fffffffffffffff\n\t" + "adds x12, x12, x27\n\t" + "adcs x13, x13, xzr\n\t" + "adcs x14, x14, xzr\n\t" + "adc x15, x15, xzr\n\t" + /* Store */ + "ldr x0, [x29, #24]\n\t" + "ldr x1, [x29, #168]\n\t" + /* Multiply */ + "ldp x21, x22, [x1]\n\t" + "ldp x23, x24, [x1, #16]\n\t" + /* A[0] * B[0] */ + "mul x4, x8, x21\n\t" + "umulh x5, x8, x21\n\t" + /* A[0] * B[1] */ + "mul x25, x8, x22\n\t" + "umulh x6, x8, x22\n\t" + "adds x5, x5, x25\n\t" + "adc x6, x6, xzr\n\t" + /* A[1] * B[0] */ + "mul x25, x9, x21\n\t" + "umulh x26, x9, x21\n\t" + "adds x5, x5, x25\n\t" + "adcs x6, x6, x26\n\t" + "adc x7, xzr, xzr\n\t" + /* A[0] * B[2] */ + "mul x25, x8, x23\n\t" + "umulh x26, x8, x23\n\t" + "adds x6, x6, x25\n\t" + "adc x7, x7, x26\n\t" + /* A[1] * B[1] */ + "mul x25, x9, x22\n\t" + "umulh x26, x9, x22\n\t" + "adds x6, x6, x25\n\t" + "adcs x7, x7, x26\n\t" + "adc x16, xzr, xzr\n\t" + /* A[2] * B[0] */ + "mul x25, x10, x21\n\t" + "umulh x26, x10, x21\n\t" + "adds x6, x6, x25\n\t" + "adcs x7, x7, x26\n\t" + "adc x16, x16, xzr\n\t" + /* A[0] * B[3] */ + "mul x25, x8, x24\n\t" + "umulh x26, x8, x24\n\t" + "adds x7, x7, x25\n\t" + "adcs x16, x16, x26\n\t" + "adc x17, xzr, xzr\n\t" + /* A[1] * B[2] */ + "mul x25, x9, x23\n\t" + "umulh x26, x9, x23\n\t" + "adds x7, x7, x25\n\t" + "adcs x16, x16, x26\n\t" + "adc x17, x17, xzr\n\t" + /* A[2] * B[1] */ + "mul x25, x10, x22\n\t" + "umulh x26, x10, x22\n\t" + "adds x7, x7, x25\n\t" + "adcs x16, x16, x26\n\t" + "adc x17, x17, xzr\n\t" + /* A[3] * B[0] */ + "mul x25, x11, x21\n\t" + "umulh x26, x11, x21\n\t" + "adds x7, x7, x25\n\t" + "adcs x16, x16, x26\n\t" + "adc x17, x17, xzr\n\t" + /* A[1] * B[3] */ + "mul x25, x9, x24\n\t" + "umulh x26, x9, x24\n\t" + "adds x16, x16, x25\n\t" + "adcs x17, x17, x26\n\t" + "adc x19, xzr, xzr\n\t" + /* A[2] * B[2] */ + "mul x25, x10, x23\n\t" + "umulh x26, x10, x23\n\t" + "adds x16, x16, x25\n\t" + "adcs x17, x17, x26\n\t" + "adc x19, x19, xzr\n\t" + /* A[3] * B[1] */ + "mul x25, x11, x22\n\t" + "umulh x26, x11, x22\n\t" + "adds x16, x16, x25\n\t" + "adcs x17, x17, x26\n\t" + "adc x19, x19, xzr\n\t" + /* A[2] * B[3] */ + "mul x25, x10, x24\n\t" + "umulh x26, x10, x24\n\t" + "adds x17, x17, x25\n\t" + "adcs x19, x19, x26\n\t" + "adc x20, xzr, xzr\n\t" + /* A[3] * B[2] */ + "mul x25, x11, x23\n\t" + "umulh x26, x11, x23\n\t" + "adds x17, x17, x25\n\t" + "adcs x19, x19, x26\n\t" + "adc x20, x20, xzr\n\t" + /* A[3] * B[3] */ + "mul x25, x11, x24\n\t" + "umulh x26, x11, x24\n\t" + "adds x19, x19, x25\n\t" + "adc x20, x20, x26\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x20, x20, x19, #63\n\t" + "extr x19, x19, x17, #63\n\t" + "extr x17, x17, x16, #63\n\t" + "extr x16, x16, x7, #63\n\t" + "and x7, x7, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x25, #19\n\t" + "mul x26, x25, x16\n\t" + "umulh x16, x25, x16\n\t" + "adds x4, x4, x26\n\t" + "mul x26, x25, x17\n\t" + "umulh x17, x25, x17\n\t" + "adcs x5, x5, x26\n\t" + "mul x26, x25, x19\n\t" + "umulh x19, x25, x19\n\t" + "adcs x6, x6, x26\n\t" + "mul x26, x25, x20\n\t" + "umulh x27, x25, x20\n\t" + "adcs x7, x7, x26\n\t" + "adc x27, x27, xzr\n\t" + /* Add remaining product results in */ + "adds x5, x5, x16\n\t" + "adcs x6, x6, x17\n\t" + "adcs x7, x7, x19\n\t" + "adc x27, x27, xzr\n\t" + /* Overflow */ + "extr x27, x27, x7, #63\n\t" + "mul x27, x27, x25\n\t" + "and x7, x7, #0x7fffffffffffffff\n\t" + "adds x4, x4, x27\n\t" + "adcs x5, x5, xzr\n\t" + "adcs x6, x6, xzr\n\t" + "adc x7, x7, xzr\n\t" + /* Reduce if top bit set */ + "and x27, x25, x7, asr 63\n\t" + "and x7, x7, #0x7fffffffffffffff\n\t" + "adds x4, x4, x27\n\t" + "adcs x5, x5, xzr\n\t" + "adcs x6, x6, xzr\n\t" + "adc x7, x7, xzr\n\t" + /* Store */ + "ldr x0, [x29, #24]\n\t" + "ldr x1, [x29, #16]\n\t" + /* Add */ + "adds x8, x12, x4\n\t" + "adcs x9, x13, x5\n\t" + "adcs x10, x14, x6\n\t" + "adc x11, x15, x7\n\t" + "mov x25, #-19\n\t" + "asr x28, x11, #63\n\t" + /* Mask the modulus */ + "and x25, x28, x25\n\t" + "and x26, x28, #0x7fffffffffffffff\n\t" + /* Sub modulus (if overflow) */ + "subs x8, x8, x25\n\t" + "sbcs x9, x9, x28\n\t" + "sbcs x10, x10, x28\n\t" + "sbc x11, x11, x26\n\t" + /* Sub */ + "subs x16, x12, x4\n\t" + "sbcs x17, x13, x5\n\t" + "sbcs x19, x14, x6\n\t" + "sbcs x20, x15, x7\n\t" + "mov x25, #-19\n\t" + "csetm x28, cc\n\t" + /* Mask the modulus */ + "and x25, x28, x25\n\t" + "and x26, x28, #0x7fffffffffffffff\n\t" + /* Add modulus (if underflow) */ + "adds x16, x16, x25\n\t" + "adcs x17, x17, x28\n\t" + "adcs x19, x19, x28\n\t" + "adc x20, x20, x26\n\t" + "stp x8, x9, [x0]\n\t" + "stp x10, x11, [x0, #16]\n\t" + "stp x16, x17, [x1]\n\t" + "stp x19, x20, [x1, #16]\n\t" + "ldr x0, [x29, #40]\n\t" + "ldr x1, [x29, #160]\n\t" + "ldr x3, [x29, #72]\n\t" + /* Multiply */ + "ldp x16, x17, [x1]\n\t" + "ldp x19, x20, [x1, #16]\n\t" + "ldp x21, x22, [x3]\n\t" + "ldp x23, x24, [x3, #16]\n\t" + /* A[0] * B[0] */ + "mul x4, x16, x21\n\t" + "umulh x5, x16, x21\n\t" + /* A[0] * B[1] */ + "mul x25, x16, x22\n\t" + "umulh x6, x16, x22\n\t" + "adds x5, x5, x25\n\t" + "adc x6, x6, xzr\n\t" + /* A[1] * B[0] */ + "mul x25, x17, x21\n\t" + "umulh x26, x17, x21\n\t" + "adds x5, x5, x25\n\t" + "adcs x6, x6, x26\n\t" + "adc x7, xzr, xzr\n\t" + /* A[0] * B[2] */ + "mul x25, x16, x23\n\t" + "umulh x26, x16, x23\n\t" + "adds x6, x6, x25\n\t" + "adc x7, x7, x26\n\t" + /* A[1] * B[1] */ + "mul x25, x17, x22\n\t" + "umulh x26, x17, x22\n\t" + "adds x6, x6, x25\n\t" + "adcs x7, x7, x26\n\t" + "adc x8, xzr, xzr\n\t" + /* A[2] * B[0] */ + "mul x25, x19, x21\n\t" + "umulh x26, x19, x21\n\t" + "adds x6, x6, x25\n\t" + "adcs x7, x7, x26\n\t" + "adc x8, x8, xzr\n\t" + /* A[0] * B[3] */ + "mul x25, x16, x24\n\t" + "umulh x26, x16, x24\n\t" + "adds x7, x7, x25\n\t" + "adcs x8, x8, x26\n\t" + "adc x9, xzr, xzr\n\t" + /* A[1] * B[2] */ + "mul x25, x17, x23\n\t" + "umulh x26, x17, x23\n\t" + "adds x7, x7, x25\n\t" + "adcs x8, x8, x26\n\t" + "adc x9, x9, xzr\n\t" + /* A[2] * B[1] */ + "mul x25, x19, x22\n\t" + "umulh x26, x19, x22\n\t" + "adds x7, x7, x25\n\t" + "adcs x8, x8, x26\n\t" + "adc x9, x9, xzr\n\t" + /* A[3] * B[0] */ + "mul x25, x20, x21\n\t" + "umulh x26, x20, x21\n\t" + "adds x7, x7, x25\n\t" + "adcs x8, x8, x26\n\t" + "adc x9, x9, xzr\n\t" + /* A[1] * B[3] */ + "mul x25, x17, x24\n\t" + "umulh x26, x17, x24\n\t" + "adds x8, x8, x25\n\t" + "adcs x9, x9, x26\n\t" + "adc x10, xzr, xzr\n\t" + /* A[2] * B[2] */ + "mul x25, x19, x23\n\t" + "umulh x26, x19, x23\n\t" + "adds x8, x8, x25\n\t" + "adcs x9, x9, x26\n\t" + "adc x10, x10, xzr\n\t" + /* A[3] * B[1] */ + "mul x25, x20, x22\n\t" + "umulh x26, x20, x22\n\t" + "adds x8, x8, x25\n\t" + "adcs x9, x9, x26\n\t" + "adc x10, x10, xzr\n\t" + /* A[2] * B[3] */ + "mul x25, x19, x24\n\t" + "umulh x26, x19, x24\n\t" + "adds x9, x9, x25\n\t" + "adcs x10, x10, x26\n\t" + "adc x11, xzr, xzr\n\t" + /* A[3] * B[2] */ + "mul x25, x20, x23\n\t" + "umulh x26, x20, x23\n\t" + "adds x9, x9, x25\n\t" + "adcs x10, x10, x26\n\t" + "adc x11, x11, xzr\n\t" + /* A[3] * B[3] */ + "mul x25, x20, x24\n\t" + "umulh x26, x20, x24\n\t" + "adds x10, x10, x25\n\t" + "adc x11, x11, x26\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x11, x11, x10, #63\n\t" + "extr x10, x10, x9, #63\n\t" + "extr x9, x9, x8, #63\n\t" + "extr x8, x8, x7, #63\n\t" + "and x7, x7, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x25, #19\n\t" + "mul x26, x25, x8\n\t" + "umulh x8, x25, x8\n\t" + "adds x4, x4, x26\n\t" + "mul x26, x25, x9\n\t" + "umulh x9, x25, x9\n\t" + "adcs x5, x5, x26\n\t" + "mul x26, x25, x10\n\t" + "umulh x10, x25, x10\n\t" + "adcs x6, x6, x26\n\t" + "mul x26, x25, x11\n\t" + "umulh x27, x25, x11\n\t" + "adcs x7, x7, x26\n\t" + "adc x27, x27, xzr\n\t" + /* Add remaining product results in */ + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x27, x27, xzr\n\t" + /* Overflow */ + "extr x27, x27, x7, #63\n\t" + "mul x27, x27, x25\n\t" + "and x7, x7, #0x7fffffffffffffff\n\t" + "adds x4, x4, x27\n\t" + "adcs x5, x5, xzr\n\t" + "adcs x6, x6, xzr\n\t" + "adc x7, x7, xzr\n\t" + /* Reduce if top bit set */ + "and x27, x25, x7, asr 63\n\t" + "and x7, x7, #0x7fffffffffffffff\n\t" + "adds x4, x4, x27\n\t" + "adcs x5, x5, xzr\n\t" + "adcs x6, x6, xzr\n\t" + "adc x7, x7, xzr\n\t" + /* Store */ + "ldr x0, [x29, #32]\n\t" + "ldr x1, [x29, #64]\n\t" + /* Double */ + "ldp x8, x9, [x1]\n\t" + "ldp x10, x11, [x1, #16]\n\t" + "adds x8, x8, x8\n\t" + "adcs x9, x9, x9\n\t" + "adcs x10, x10, x10\n\t" + "adc x11, x11, x11\n\t" + "mov x25, #-19\n\t" + "asr x28, x11, #63\n\t" + /* Mask the modulus */ + "and x25, x28, x25\n\t" + "and x26, x28, #0x7fffffffffffffff\n\t" + /* Sub modulus (if overflow) */ + "subs x8, x8, x25\n\t" + "sbcs x9, x9, x28\n\t" + "sbcs x10, x10, x28\n\t" + "sbc x11, x11, x26\n\t" + "ldr x1, [x29, #40]\n\t" + /* Add */ + "adds x12, x8, x4\n\t" + "adcs x13, x9, x5\n\t" + "adcs x14, x10, x6\n\t" + "adc x15, x11, x7\n\t" + "mov x25, #-19\n\t" + "asr x28, x15, #63\n\t" + /* Mask the modulus */ + "and x25, x28, x25\n\t" + "and x26, x28, #0x7fffffffffffffff\n\t" + /* Sub modulus (if overflow) */ + "subs x12, x12, x25\n\t" + "sbcs x13, x13, x28\n\t" + "sbcs x14, x14, x28\n\t" + "sbc x15, x15, x26\n\t" + /* Sub */ + "subs x16, x8, x4\n\t" + "sbcs x17, x9, x5\n\t" + "sbcs x19, x10, x6\n\t" + "sbcs x20, x11, x7\n\t" + "mov x25, #-19\n\t" + "csetm x28, cc\n\t" + /* Mask the modulus */ + "and x25, x28, x25\n\t" + "and x26, x28, #0x7fffffffffffffff\n\t" + /* Add modulus (if underflow) */ + "adds x16, x16, x25\n\t" + "adcs x17, x17, x28\n\t" + "adcs x19, x19, x28\n\t" + "adc x20, x20, x26\n\t" + "stp x12, x13, [x1]\n\t" + "stp x14, x15, [x1, #16]\n\t" + "stp x16, x17, [x0]\n\t" + "stp x19, x20, [x0, #16]\n\t" + "ldp x29, x30, [sp], #0x50\n\t" + : [rx] "+r" (rx), [ry] "+r" (ry), [rz] "+r" (rz), [rt] "+r" (rt), [px] "+r" (px), [py] "+r" (py), [pz] "+r" (pz), [pt] "+r" (pt) + : + : "memory", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" + ); + (void)qxy2d; + (void)qyplusx; + (void)qyminusx; +} + +void fe_ge_add(fe rx, fe ry, fe rz, fe rt, const fe px, const fe py, const fe pz, const fe pt, const fe qz, const fe qt2d, const fe qyplusx, const fe qyminusx) +{ + __asm__ __volatile__ ( + "stp x29, x30, [sp, #-80]!\n\t" + "add x29, sp, #0\n\t" + "str %x[rx], [x29, #16]\n\t" + "str %x[ry], [x29, #24]\n\t" + "str %x[rz], [x29, #32]\n\t" + "str %x[rt], [x29, #40]\n\t" + "str %x[px], [x29, #48]\n\t" + "str %x[py], [x29, #56]\n\t" + "str %x[pz], [x29, #64]\n\t" + "str %x[pt], [x29, #72]\n\t" + "ldr x2, [x29, #56]\n\t" + "ldr x3, [x29, #48]\n\t" + /* Add */ + "ldp x12, x13, [x2]\n\t" + "ldp x14, x15, [x2, #16]\n\t" + "ldp x16, x17, [x3]\n\t" + "ldp x19, x20, [x3, #16]\n\t" + "adds x4, x12, x16\n\t" + "adcs x5, x13, x17\n\t" + "adcs x6, x14, x19\n\t" + "adc x7, x15, x20\n\t" + "mov x25, #-19\n\t" + "asr x28, x7, #63\n\t" + /* Mask the modulus */ + "and x25, x28, x25\n\t" + "and x26, x28, #0x7fffffffffffffff\n\t" + /* Sub modulus (if overflow) */ + "subs x4, x4, x25\n\t" + "sbcs x5, x5, x28\n\t" + "sbcs x6, x6, x28\n\t" + "sbc x7, x7, x26\n\t" + /* Sub */ + "subs x8, x12, x16\n\t" + "sbcs x9, x13, x17\n\t" + "sbcs x10, x14, x19\n\t" + "sbcs x11, x15, x20\n\t" + "mov x25, #-19\n\t" + "csetm x28, cc\n\t" + /* Mask the modulus */ + "and x25, x28, x25\n\t" + "and x26, x28, #0x7fffffffffffffff\n\t" + /* Add modulus (if underflow) */ + "adds x8, x8, x25\n\t" + "adcs x9, x9, x28\n\t" + "adcs x10, x10, x28\n\t" + "adc x11, x11, x26\n\t" + "ldr x0, [x29, #32]\n\t" + "ldr x2, [x29, #176]\n\t" + /* Multiply */ + "ldp x21, x22, [x2]\n\t" + "ldp x23, x24, [x2, #16]\n\t" + /* A[0] * B[0] */ + "mul x12, x4, x21\n\t" + "umulh x13, x4, x21\n\t" + /* A[0] * B[1] */ + "mul x25, x4, x22\n\t" + "umulh x14, x4, x22\n\t" + "adds x13, x13, x25\n\t" + "adc x14, x14, xzr\n\t" + /* A[1] * B[0] */ + "mul x25, x5, x21\n\t" + "umulh x26, x5, x21\n\t" + "adds x13, x13, x25\n\t" + "adcs x14, x14, x26\n\t" + "adc x15, xzr, xzr\n\t" + /* A[0] * B[2] */ + "mul x25, x4, x23\n\t" + "umulh x26, x4, x23\n\t" + "adds x14, x14, x25\n\t" + "adc x15, x15, x26\n\t" + /* A[1] * B[1] */ + "mul x25, x5, x22\n\t" + "umulh x26, x5, x22\n\t" + "adds x14, x14, x25\n\t" + "adcs x15, x15, x26\n\t" + "adc x16, xzr, xzr\n\t" + /* A[2] * B[0] */ + "mul x25, x6, x21\n\t" + "umulh x26, x6, x21\n\t" + "adds x14, x14, x25\n\t" + "adcs x15, x15, x26\n\t" + "adc x16, x16, xzr\n\t" + /* A[0] * B[3] */ + "mul x25, x4, x24\n\t" + "umulh x26, x4, x24\n\t" + "adds x15, x15, x25\n\t" + "adcs x16, x16, x26\n\t" + "adc x17, xzr, xzr\n\t" + /* A[1] * B[2] */ + "mul x25, x5, x23\n\t" + "umulh x26, x5, x23\n\t" + "adds x15, x15, x25\n\t" + "adcs x16, x16, x26\n\t" + "adc x17, x17, xzr\n\t" + /* A[2] * B[1] */ + "mul x25, x6, x22\n\t" + "umulh x26, x6, x22\n\t" + "adds x15, x15, x25\n\t" + "adcs x16, x16, x26\n\t" + "adc x17, x17, xzr\n\t" + /* A[3] * B[0] */ + "mul x25, x7, x21\n\t" + "umulh x26, x7, x21\n\t" + "adds x15, x15, x25\n\t" + "adcs x16, x16, x26\n\t" + "adc x17, x17, xzr\n\t" + /* A[1] * B[3] */ + "mul x25, x5, x24\n\t" + "umulh x26, x5, x24\n\t" + "adds x16, x16, x25\n\t" + "adcs x17, x17, x26\n\t" + "adc x19, xzr, xzr\n\t" + /* A[2] * B[2] */ + "mul x25, x6, x23\n\t" + "umulh x26, x6, x23\n\t" + "adds x16, x16, x25\n\t" + "adcs x17, x17, x26\n\t" + "adc x19, x19, xzr\n\t" + /* A[3] * B[1] */ + "mul x25, x7, x22\n\t" + "umulh x26, x7, x22\n\t" + "adds x16, x16, x25\n\t" + "adcs x17, x17, x26\n\t" + "adc x19, x19, xzr\n\t" + /* A[2] * B[3] */ + "mul x25, x6, x24\n\t" + "umulh x26, x6, x24\n\t" + "adds x17, x17, x25\n\t" + "adcs x19, x19, x26\n\t" + "adc x20, xzr, xzr\n\t" + /* A[3] * B[2] */ + "mul x25, x7, x23\n\t" + "umulh x26, x7, x23\n\t" + "adds x17, x17, x25\n\t" + "adcs x19, x19, x26\n\t" + "adc x20, x20, xzr\n\t" + /* A[3] * B[3] */ + "mul x25, x7, x24\n\t" + "umulh x26, x7, x24\n\t" + "adds x19, x19, x25\n\t" + "adc x20, x20, x26\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x20, x20, x19, #63\n\t" + "extr x19, x19, x17, #63\n\t" + "extr x17, x17, x16, #63\n\t" + "extr x16, x16, x15, #63\n\t" + "and x15, x15, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x25, #19\n\t" + "mul x26, x25, x16\n\t" + "umulh x16, x25, x16\n\t" + "adds x12, x12, x26\n\t" + "mul x26, x25, x17\n\t" + "umulh x17, x25, x17\n\t" + "adcs x13, x13, x26\n\t" + "mul x26, x25, x19\n\t" + "umulh x19, x25, x19\n\t" + "adcs x14, x14, x26\n\t" + "mul x26, x25, x20\n\t" + "umulh x27, x25, x20\n\t" + "adcs x15, x15, x26\n\t" + "adc x27, x27, xzr\n\t" + /* Add remaining product results in */ + "adds x13, x13, x16\n\t" + "adcs x14, x14, x17\n\t" + "adcs x15, x15, x19\n\t" + "adc x27, x27, xzr\n\t" + /* Overflow */ + "extr x27, x27, x15, #63\n\t" + "mul x27, x27, x25\n\t" + "and x15, x15, #0x7fffffffffffffff\n\t" + "adds x12, x12, x27\n\t" + "adcs x13, x13, xzr\n\t" + "adcs x14, x14, xzr\n\t" + "adc x15, x15, xzr\n\t" + /* Reduce if top bit set */ + "and x27, x25, x15, asr 63\n\t" + "and x15, x15, #0x7fffffffffffffff\n\t" + "adds x12, x12, x27\n\t" + "adcs x13, x13, xzr\n\t" + "adcs x14, x14, xzr\n\t" + "adc x15, x15, xzr\n\t" + /* Store */ + "ldr x0, [x29, #24]\n\t" + "ldr x1, [x29, #184]\n\t" + /* Multiply */ + "ldp x21, x22, [x1]\n\t" + "ldp x23, x24, [x1, #16]\n\t" + /* A[0] * B[0] */ + "mul x4, x8, x21\n\t" + "umulh x5, x8, x21\n\t" + /* A[0] * B[1] */ + "mul x25, x8, x22\n\t" + "umulh x6, x8, x22\n\t" + "adds x5, x5, x25\n\t" + "adc x6, x6, xzr\n\t" + /* A[1] * B[0] */ + "mul x25, x9, x21\n\t" + "umulh x26, x9, x21\n\t" + "adds x5, x5, x25\n\t" + "adcs x6, x6, x26\n\t" + "adc x7, xzr, xzr\n\t" + /* A[0] * B[2] */ + "mul x25, x8, x23\n\t" + "umulh x26, x8, x23\n\t" + "adds x6, x6, x25\n\t" + "adc x7, x7, x26\n\t" + /* A[1] * B[1] */ + "mul x25, x9, x22\n\t" + "umulh x26, x9, x22\n\t" + "adds x6, x6, x25\n\t" + "adcs x7, x7, x26\n\t" + "adc x16, xzr, xzr\n\t" + /* A[2] * B[0] */ + "mul x25, x10, x21\n\t" + "umulh x26, x10, x21\n\t" + "adds x6, x6, x25\n\t" + "adcs x7, x7, x26\n\t" + "adc x16, x16, xzr\n\t" + /* A[0] * B[3] */ + "mul x25, x8, x24\n\t" + "umulh x26, x8, x24\n\t" + "adds x7, x7, x25\n\t" + "adcs x16, x16, x26\n\t" + "adc x17, xzr, xzr\n\t" + /* A[1] * B[2] */ + "mul x25, x9, x23\n\t" + "umulh x26, x9, x23\n\t" + "adds x7, x7, x25\n\t" + "adcs x16, x16, x26\n\t" + "adc x17, x17, xzr\n\t" + /* A[2] * B[1] */ + "mul x25, x10, x22\n\t" + "umulh x26, x10, x22\n\t" + "adds x7, x7, x25\n\t" + "adcs x16, x16, x26\n\t" + "adc x17, x17, xzr\n\t" + /* A[3] * B[0] */ + "mul x25, x11, x21\n\t" + "umulh x26, x11, x21\n\t" + "adds x7, x7, x25\n\t" + "adcs x16, x16, x26\n\t" + "adc x17, x17, xzr\n\t" + /* A[1] * B[3] */ + "mul x25, x9, x24\n\t" + "umulh x26, x9, x24\n\t" + "adds x16, x16, x25\n\t" + "adcs x17, x17, x26\n\t" + "adc x19, xzr, xzr\n\t" + /* A[2] * B[2] */ + "mul x25, x10, x23\n\t" + "umulh x26, x10, x23\n\t" + "adds x16, x16, x25\n\t" + "adcs x17, x17, x26\n\t" + "adc x19, x19, xzr\n\t" + /* A[3] * B[1] */ + "mul x25, x11, x22\n\t" + "umulh x26, x11, x22\n\t" + "adds x16, x16, x25\n\t" + "adcs x17, x17, x26\n\t" + "adc x19, x19, xzr\n\t" + /* A[2] * B[3] */ + "mul x25, x10, x24\n\t" + "umulh x26, x10, x24\n\t" + "adds x17, x17, x25\n\t" + "adcs x19, x19, x26\n\t" + "adc x20, xzr, xzr\n\t" + /* A[3] * B[2] */ + "mul x25, x11, x23\n\t" + "umulh x26, x11, x23\n\t" + "adds x17, x17, x25\n\t" + "adcs x19, x19, x26\n\t" + "adc x20, x20, xzr\n\t" + /* A[3] * B[3] */ + "mul x25, x11, x24\n\t" + "umulh x26, x11, x24\n\t" + "adds x19, x19, x25\n\t" + "adc x20, x20, x26\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x20, x20, x19, #63\n\t" + "extr x19, x19, x17, #63\n\t" + "extr x17, x17, x16, #63\n\t" + "extr x16, x16, x7, #63\n\t" + "and x7, x7, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x25, #19\n\t" + "mul x26, x25, x16\n\t" + "umulh x16, x25, x16\n\t" + "adds x4, x4, x26\n\t" + "mul x26, x25, x17\n\t" + "umulh x17, x25, x17\n\t" + "adcs x5, x5, x26\n\t" + "mul x26, x25, x19\n\t" + "umulh x19, x25, x19\n\t" + "adcs x6, x6, x26\n\t" + "mul x26, x25, x20\n\t" + "umulh x27, x25, x20\n\t" + "adcs x7, x7, x26\n\t" + "adc x27, x27, xzr\n\t" + /* Add remaining product results in */ + "adds x5, x5, x16\n\t" + "adcs x6, x6, x17\n\t" + "adcs x7, x7, x19\n\t" + "adc x27, x27, xzr\n\t" + /* Overflow */ + "extr x27, x27, x7, #63\n\t" + "mul x27, x27, x25\n\t" + "and x7, x7, #0x7fffffffffffffff\n\t" + "adds x4, x4, x27\n\t" + "adcs x5, x5, xzr\n\t" + "adcs x6, x6, xzr\n\t" + "adc x7, x7, xzr\n\t" + /* Reduce if top bit set */ + "and x27, x25, x7, asr 63\n\t" + "and x7, x7, #0x7fffffffffffffff\n\t" + "adds x4, x4, x27\n\t" + "adcs x5, x5, xzr\n\t" + "adcs x6, x6, xzr\n\t" + "adc x7, x7, xzr\n\t" + /* Store */ + "ldr x0, [x29, #24]\n\t" + "ldr x1, [x29, #16]\n\t" + /* Add */ + "adds x8, x12, x4\n\t" + "adcs x9, x13, x5\n\t" + "adcs x10, x14, x6\n\t" + "adc x11, x15, x7\n\t" + "mov x25, #-19\n\t" + "asr x28, x11, #63\n\t" + /* Mask the modulus */ + "and x25, x28, x25\n\t" + "and x26, x28, #0x7fffffffffffffff\n\t" + /* Sub modulus (if overflow) */ + "subs x8, x8, x25\n\t" + "sbcs x9, x9, x28\n\t" + "sbcs x10, x10, x28\n\t" + "sbc x11, x11, x26\n\t" + /* Sub */ + "subs x16, x12, x4\n\t" + "sbcs x17, x13, x5\n\t" + "sbcs x19, x14, x6\n\t" + "sbcs x20, x15, x7\n\t" + "mov x25, #-19\n\t" + "csetm x28, cc\n\t" + /* Mask the modulus */ + "and x25, x28, x25\n\t" + "and x26, x28, #0x7fffffffffffffff\n\t" + /* Add modulus (if underflow) */ + "adds x16, x16, x25\n\t" + "adcs x17, x17, x28\n\t" + "adcs x19, x19, x28\n\t" + "adc x20, x20, x26\n\t" + "stp x8, x9, [x0]\n\t" + "stp x10, x11, [x0, #16]\n\t" + "stp x16, x17, [x1]\n\t" + "stp x19, x20, [x1, #16]\n\t" + "ldr x0, [x29, #48]\n\t" + "ldr x1, [x29, #64]\n\t" + "ldr x2, [x29, #160]\n\t" + /* Multiply */ + "ldp x12, x13, [x1]\n\t" + "ldp x14, x15, [x1, #16]\n\t" + "ldp x16, x17, [x2]\n\t" + "ldp x19, x20, [x2, #16]\n\t" + /* A[0] * B[0] */ + "mul x4, x12, x16\n\t" + "umulh x5, x12, x16\n\t" + /* A[0] * B[1] */ + "mul x25, x12, x17\n\t" + "umulh x6, x12, x17\n\t" + "adds x5, x5, x25\n\t" + "adc x6, x6, xzr\n\t" + /* A[1] * B[0] */ + "mul x25, x13, x16\n\t" + "umulh x26, x13, x16\n\t" + "adds x5, x5, x25\n\t" + "adcs x6, x6, x26\n\t" + "adc x7, xzr, xzr\n\t" + /* A[0] * B[2] */ + "mul x25, x12, x19\n\t" + "umulh x26, x12, x19\n\t" + "adds x6, x6, x25\n\t" + "adc x7, x7, x26\n\t" + /* A[1] * B[1] */ + "mul x25, x13, x17\n\t" + "umulh x26, x13, x17\n\t" + "adds x6, x6, x25\n\t" + "adcs x7, x7, x26\n\t" + "adc x8, xzr, xzr\n\t" + /* A[2] * B[0] */ + "mul x25, x14, x16\n\t" + "umulh x26, x14, x16\n\t" + "adds x6, x6, x25\n\t" + "adcs x7, x7, x26\n\t" + "adc x8, x8, xzr\n\t" + /* A[0] * B[3] */ + "mul x25, x12, x20\n\t" + "umulh x26, x12, x20\n\t" + "adds x7, x7, x25\n\t" + "adcs x8, x8, x26\n\t" + "adc x9, xzr, xzr\n\t" + /* A[1] * B[2] */ + "mul x25, x13, x19\n\t" + "umulh x26, x13, x19\n\t" + "adds x7, x7, x25\n\t" + "adcs x8, x8, x26\n\t" + "adc x9, x9, xzr\n\t" + /* A[2] * B[1] */ + "mul x25, x14, x17\n\t" + "umulh x26, x14, x17\n\t" + "adds x7, x7, x25\n\t" + "adcs x8, x8, x26\n\t" + "adc x9, x9, xzr\n\t" + /* A[3] * B[0] */ + "mul x25, x15, x16\n\t" + "umulh x26, x15, x16\n\t" + "adds x7, x7, x25\n\t" + "adcs x8, x8, x26\n\t" + "adc x9, x9, xzr\n\t" + /* A[1] * B[3] */ + "mul x25, x13, x20\n\t" + "umulh x26, x13, x20\n\t" + "adds x8, x8, x25\n\t" + "adcs x9, x9, x26\n\t" + "adc x10, xzr, xzr\n\t" + /* A[2] * B[2] */ + "mul x25, x14, x19\n\t" + "umulh x26, x14, x19\n\t" + "adds x8, x8, x25\n\t" + "adcs x9, x9, x26\n\t" + "adc x10, x10, xzr\n\t" + /* A[3] * B[1] */ + "mul x25, x15, x17\n\t" + "umulh x26, x15, x17\n\t" + "adds x8, x8, x25\n\t" + "adcs x9, x9, x26\n\t" + "adc x10, x10, xzr\n\t" + /* A[2] * B[3] */ + "mul x25, x14, x20\n\t" + "umulh x26, x14, x20\n\t" + "adds x9, x9, x25\n\t" + "adcs x10, x10, x26\n\t" + "adc x11, xzr, xzr\n\t" + /* A[3] * B[2] */ + "mul x25, x15, x19\n\t" + "umulh x26, x15, x19\n\t" + "adds x9, x9, x25\n\t" + "adcs x10, x10, x26\n\t" + "adc x11, x11, xzr\n\t" + /* A[3] * B[3] */ + "mul x25, x15, x20\n\t" + "umulh x26, x15, x20\n\t" + "adds x10, x10, x25\n\t" + "adc x11, x11, x26\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x11, x11, x10, #63\n\t" + "extr x10, x10, x9, #63\n\t" + "extr x9, x9, x8, #63\n\t" + "extr x8, x8, x7, #63\n\t" + "and x7, x7, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x25, #19\n\t" + "mul x26, x25, x8\n\t" + "umulh x8, x25, x8\n\t" + "adds x4, x4, x26\n\t" + "mul x26, x25, x9\n\t" + "umulh x9, x25, x9\n\t" + "adcs x5, x5, x26\n\t" + "mul x26, x25, x10\n\t" + "umulh x10, x25, x10\n\t" + "adcs x6, x6, x26\n\t" + "mul x26, x25, x11\n\t" + "umulh x27, x25, x11\n\t" + "adcs x7, x7, x26\n\t" + "adc x27, x27, xzr\n\t" + /* Add remaining product results in */ + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x27, x27, xzr\n\t" + /* Overflow */ + "extr x27, x27, x7, #63\n\t" + "mul x27, x27, x25\n\t" + "and x7, x7, #0x7fffffffffffffff\n\t" + "adds x4, x4, x27\n\t" + "adcs x5, x5, xzr\n\t" + "adcs x6, x6, xzr\n\t" + "adc x7, x7, xzr\n\t" + /* Reduce if top bit set */ + "and x27, x25, x7, asr 63\n\t" + "and x7, x7, #0x7fffffffffffffff\n\t" + "adds x4, x4, x27\n\t" + "adcs x5, x5, xzr\n\t" + "adcs x6, x6, xzr\n\t" + "adc x7, x7, xzr\n\t" + /* Store */ + "ldr x0, [x29, #48]\n\t" + /* Double */ + "adds x4, x4, x4\n\t" + "adcs x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "mov x25, #-19\n\t" + "asr x28, x7, #63\n\t" + /* Mask the modulus */ + "and x25, x28, x25\n\t" + "and x26, x28, #0x7fffffffffffffff\n\t" + /* Sub modulus (if overflow) */ + "subs x4, x4, x25\n\t" + "sbcs x5, x5, x28\n\t" + "sbcs x6, x6, x28\n\t" + "sbc x7, x7, x26\n\t" + "ldr x0, [x29, #40]\n\t" + "ldr x1, [x29, #168]\n\t" + "ldr x2, [x29, #72]\n\t" + /* Multiply */ + "ldp x16, x17, [x1]\n\t" + "ldp x19, x20, [x1, #16]\n\t" + "ldp x21, x22, [x2]\n\t" + "ldp x23, x24, [x2, #16]\n\t" + /* A[0] * B[0] */ + "mul x8, x16, x21\n\t" + "umulh x9, x16, x21\n\t" + /* A[0] * B[1] */ + "mul x25, x16, x22\n\t" + "umulh x10, x16, x22\n\t" + "adds x9, x9, x25\n\t" + "adc x10, x10, xzr\n\t" + /* A[1] * B[0] */ + "mul x25, x17, x21\n\t" + "umulh x26, x17, x21\n\t" + "adds x9, x9, x25\n\t" + "adcs x10, x10, x26\n\t" + "adc x11, xzr, xzr\n\t" + /* A[0] * B[2] */ + "mul x25, x16, x23\n\t" + "umulh x26, x16, x23\n\t" + "adds x10, x10, x25\n\t" + "adc x11, x11, x26\n\t" + /* A[1] * B[1] */ + "mul x25, x17, x22\n\t" + "umulh x26, x17, x22\n\t" + "adds x10, x10, x25\n\t" + "adcs x11, x11, x26\n\t" + "adc x12, xzr, xzr\n\t" + /* A[2] * B[0] */ + "mul x25, x19, x21\n\t" + "umulh x26, x19, x21\n\t" + "adds x10, x10, x25\n\t" + "adcs x11, x11, x26\n\t" + "adc x12, x12, xzr\n\t" + /* A[0] * B[3] */ + "mul x25, x16, x24\n\t" + "umulh x26, x16, x24\n\t" + "adds x11, x11, x25\n\t" + "adcs x12, x12, x26\n\t" + "adc x13, xzr, xzr\n\t" + /* A[1] * B[2] */ + "mul x25, x17, x23\n\t" + "umulh x26, x17, x23\n\t" + "adds x11, x11, x25\n\t" + "adcs x12, x12, x26\n\t" + "adc x13, x13, xzr\n\t" + /* A[2] * B[1] */ + "mul x25, x19, x22\n\t" + "umulh x26, x19, x22\n\t" + "adds x11, x11, x25\n\t" + "adcs x12, x12, x26\n\t" + "adc x13, x13, xzr\n\t" + /* A[3] * B[0] */ + "mul x25, x20, x21\n\t" + "umulh x26, x20, x21\n\t" + "adds x11, x11, x25\n\t" + "adcs x12, x12, x26\n\t" + "adc x13, x13, xzr\n\t" + /* A[1] * B[3] */ + "mul x25, x17, x24\n\t" + "umulh x26, x17, x24\n\t" + "adds x12, x12, x25\n\t" + "adcs x13, x13, x26\n\t" + "adc x14, xzr, xzr\n\t" + /* A[2] * B[2] */ + "mul x25, x19, x23\n\t" + "umulh x26, x19, x23\n\t" + "adds x12, x12, x25\n\t" + "adcs x13, x13, x26\n\t" + "adc x14, x14, xzr\n\t" + /* A[3] * B[1] */ + "mul x25, x20, x22\n\t" + "umulh x26, x20, x22\n\t" + "adds x12, x12, x25\n\t" + "adcs x13, x13, x26\n\t" + "adc x14, x14, xzr\n\t" + /* A[2] * B[3] */ + "mul x25, x19, x24\n\t" + "umulh x26, x19, x24\n\t" + "adds x13, x13, x25\n\t" + "adcs x14, x14, x26\n\t" + "adc x15, xzr, xzr\n\t" + /* A[3] * B[2] */ + "mul x25, x20, x23\n\t" + "umulh x26, x20, x23\n\t" + "adds x13, x13, x25\n\t" + "adcs x14, x14, x26\n\t" + "adc x15, x15, xzr\n\t" + /* A[3] * B[3] */ + "mul x25, x20, x24\n\t" + "umulh x26, x20, x24\n\t" + "adds x14, x14, x25\n\t" + "adc x15, x15, x26\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x15, x15, x14, #63\n\t" + "extr x14, x14, x13, #63\n\t" + "extr x13, x13, x12, #63\n\t" + "extr x12, x12, x11, #63\n\t" + "and x11, x11, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x25, #19\n\t" + "mul x26, x25, x12\n\t" + "umulh x12, x25, x12\n\t" + "adds x8, x8, x26\n\t" + "mul x26, x25, x13\n\t" + "umulh x13, x25, x13\n\t" + "adcs x9, x9, x26\n\t" + "mul x26, x25, x14\n\t" + "umulh x14, x25, x14\n\t" + "adcs x10, x10, x26\n\t" + "mul x26, x25, x15\n\t" + "umulh x27, x25, x15\n\t" + "adcs x11, x11, x26\n\t" + "adc x27, x27, xzr\n\t" + /* Add remaining product results in */ + "adds x9, x9, x12\n\t" + "adcs x10, x10, x13\n\t" + "adcs x11, x11, x14\n\t" + "adc x27, x27, xzr\n\t" + /* Overflow */ + "extr x27, x27, x11, #63\n\t" + "mul x27, x27, x25\n\t" + "and x11, x11, #0x7fffffffffffffff\n\t" + "adds x8, x8, x27\n\t" + "adcs x9, x9, xzr\n\t" + "adcs x10, x10, xzr\n\t" + "adc x11, x11, xzr\n\t" + /* Reduce if top bit set */ + "and x27, x25, x11, asr 63\n\t" + "and x11, x11, #0x7fffffffffffffff\n\t" + "adds x8, x8, x27\n\t" + "adcs x9, x9, xzr\n\t" + "adcs x10, x10, xzr\n\t" + "adc x11, x11, xzr\n\t" + /* Store */ + "ldr x0, [x29, #32]\n\t" + "ldr x1, [x29, #40]\n\t" + /* Add */ + "adds x12, x4, x8\n\t" + "adcs x13, x5, x9\n\t" + "adcs x14, x6, x10\n\t" + "adc x15, x7, x11\n\t" + "mov x25, #-19\n\t" + "asr x28, x15, #63\n\t" + /* Mask the modulus */ + "and x25, x28, x25\n\t" + "and x26, x28, #0x7fffffffffffffff\n\t" + /* Sub modulus (if overflow) */ + "subs x12, x12, x25\n\t" + "sbcs x13, x13, x28\n\t" + "sbcs x14, x14, x28\n\t" + "sbc x15, x15, x26\n\t" + /* Sub */ + "subs x16, x4, x8\n\t" + "sbcs x17, x5, x9\n\t" + "sbcs x19, x6, x10\n\t" + "sbcs x20, x7, x11\n\t" + "mov x25, #-19\n\t" + "csetm x28, cc\n\t" + /* Mask the modulus */ + "and x25, x28, x25\n\t" + "and x26, x28, #0x7fffffffffffffff\n\t" + /* Add modulus (if underflow) */ + "adds x16, x16, x25\n\t" + "adcs x17, x17, x28\n\t" + "adcs x19, x19, x28\n\t" + "adc x20, x20, x26\n\t" + "stp x12, x13, [x0]\n\t" + "stp x14, x15, [x0, #16]\n\t" + "stp x16, x17, [x1]\n\t" + "stp x19, x20, [x1, #16]\n\t" + "ldp x29, x30, [sp], #0x50\n\t" + : [rx] "+r" (rx), [ry] "+r" (ry), [rz] "+r" (rz), [rt] "+r" (rt), [px] "+r" (px), [py] "+r" (py), [pz] "+r" (pz), [pt] "+r" (pt) + : + : "memory", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" + ); + (void)qz; + (void)qt2d; + (void)qyplusx; + (void)qyminusx; +} + +void fe_ge_sub(fe rx, fe ry, fe rz, fe rt, const fe px, const fe py, const fe pz, const fe pt, const fe qz, const fe qt2d, const fe qyplusx, const fe qyminusx) +{ + __asm__ __volatile__ ( + "stp x29, x30, [sp, #-80]!\n\t" + "add x29, sp, #0\n\t" + "str %x[rx], [x29, #16]\n\t" + "str %x[ry], [x29, #24]\n\t" + "str %x[rz], [x29, #32]\n\t" + "str %x[rt], [x29, #40]\n\t" + "str %x[px], [x29, #48]\n\t" + "str %x[py], [x29, #56]\n\t" + "str %x[pz], [x29, #64]\n\t" + "str %x[pt], [x29, #72]\n\t" + "ldr x2, [x29, #56]\n\t" + "ldr x3, [x29, #48]\n\t" + /* Add */ + "ldp x12, x13, [x2]\n\t" + "ldp x14, x15, [x2, #16]\n\t" + "ldp x16, x17, [x3]\n\t" + "ldp x19, x20, [x3, #16]\n\t" + "adds x4, x12, x16\n\t" + "adcs x5, x13, x17\n\t" + "adcs x6, x14, x19\n\t" + "adc x7, x15, x20\n\t" + "mov x25, #-19\n\t" + "asr x28, x7, #63\n\t" + /* Mask the modulus */ + "and x25, x28, x25\n\t" + "and x26, x28, #0x7fffffffffffffff\n\t" + /* Sub modulus (if overflow) */ + "subs x4, x4, x25\n\t" + "sbcs x5, x5, x28\n\t" + "sbcs x6, x6, x28\n\t" + "sbc x7, x7, x26\n\t" + /* Sub */ + "subs x8, x12, x16\n\t" + "sbcs x9, x13, x17\n\t" + "sbcs x10, x14, x19\n\t" + "sbcs x11, x15, x20\n\t" + "mov x25, #-19\n\t" + "csetm x28, cc\n\t" + /* Mask the modulus */ + "and x25, x28, x25\n\t" + "and x26, x28, #0x7fffffffffffffff\n\t" + /* Add modulus (if underflow) */ + "adds x8, x8, x25\n\t" + "adcs x9, x9, x28\n\t" + "adcs x10, x10, x28\n\t" + "adc x11, x11, x26\n\t" + "ldr x0, [x29, #32]\n\t" + "ldr x2, [x29, #184]\n\t" + /* Multiply */ + "ldp x21, x22, [x2]\n\t" + "ldp x23, x24, [x2, #16]\n\t" + /* A[0] * B[0] */ + "mul x12, x4, x21\n\t" + "umulh x13, x4, x21\n\t" + /* A[0] * B[1] */ + "mul x25, x4, x22\n\t" + "umulh x14, x4, x22\n\t" + "adds x13, x13, x25\n\t" + "adc x14, x14, xzr\n\t" + /* A[1] * B[0] */ + "mul x25, x5, x21\n\t" + "umulh x26, x5, x21\n\t" + "adds x13, x13, x25\n\t" + "adcs x14, x14, x26\n\t" + "adc x15, xzr, xzr\n\t" + /* A[0] * B[2] */ + "mul x25, x4, x23\n\t" + "umulh x26, x4, x23\n\t" + "adds x14, x14, x25\n\t" + "adc x15, x15, x26\n\t" + /* A[1] * B[1] */ + "mul x25, x5, x22\n\t" + "umulh x26, x5, x22\n\t" + "adds x14, x14, x25\n\t" + "adcs x15, x15, x26\n\t" + "adc x16, xzr, xzr\n\t" + /* A[2] * B[0] */ + "mul x25, x6, x21\n\t" + "umulh x26, x6, x21\n\t" + "adds x14, x14, x25\n\t" + "adcs x15, x15, x26\n\t" + "adc x16, x16, xzr\n\t" + /* A[0] * B[3] */ + "mul x25, x4, x24\n\t" + "umulh x26, x4, x24\n\t" + "adds x15, x15, x25\n\t" + "adcs x16, x16, x26\n\t" + "adc x17, xzr, xzr\n\t" + /* A[1] * B[2] */ + "mul x25, x5, x23\n\t" + "umulh x26, x5, x23\n\t" + "adds x15, x15, x25\n\t" + "adcs x16, x16, x26\n\t" + "adc x17, x17, xzr\n\t" + /* A[2] * B[1] */ + "mul x25, x6, x22\n\t" + "umulh x26, x6, x22\n\t" + "adds x15, x15, x25\n\t" + "adcs x16, x16, x26\n\t" + "adc x17, x17, xzr\n\t" + /* A[3] * B[0] */ + "mul x25, x7, x21\n\t" + "umulh x26, x7, x21\n\t" + "adds x15, x15, x25\n\t" + "adcs x16, x16, x26\n\t" + "adc x17, x17, xzr\n\t" + /* A[1] * B[3] */ + "mul x25, x5, x24\n\t" + "umulh x26, x5, x24\n\t" + "adds x16, x16, x25\n\t" + "adcs x17, x17, x26\n\t" + "adc x19, xzr, xzr\n\t" + /* A[2] * B[2] */ + "mul x25, x6, x23\n\t" + "umulh x26, x6, x23\n\t" + "adds x16, x16, x25\n\t" + "adcs x17, x17, x26\n\t" + "adc x19, x19, xzr\n\t" + /* A[3] * B[1] */ + "mul x25, x7, x22\n\t" + "umulh x26, x7, x22\n\t" + "adds x16, x16, x25\n\t" + "adcs x17, x17, x26\n\t" + "adc x19, x19, xzr\n\t" + /* A[2] * B[3] */ + "mul x25, x6, x24\n\t" + "umulh x26, x6, x24\n\t" + "adds x17, x17, x25\n\t" + "adcs x19, x19, x26\n\t" + "adc x20, xzr, xzr\n\t" + /* A[3] * B[2] */ + "mul x25, x7, x23\n\t" + "umulh x26, x7, x23\n\t" + "adds x17, x17, x25\n\t" + "adcs x19, x19, x26\n\t" + "adc x20, x20, xzr\n\t" + /* A[3] * B[3] */ + "mul x25, x7, x24\n\t" + "umulh x26, x7, x24\n\t" + "adds x19, x19, x25\n\t" + "adc x20, x20, x26\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x20, x20, x19, #63\n\t" + "extr x19, x19, x17, #63\n\t" + "extr x17, x17, x16, #63\n\t" + "extr x16, x16, x15, #63\n\t" + "and x15, x15, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x25, #19\n\t" + "mul x26, x25, x16\n\t" + "umulh x16, x25, x16\n\t" + "adds x12, x12, x26\n\t" + "mul x26, x25, x17\n\t" + "umulh x17, x25, x17\n\t" + "adcs x13, x13, x26\n\t" + "mul x26, x25, x19\n\t" + "umulh x19, x25, x19\n\t" + "adcs x14, x14, x26\n\t" + "mul x26, x25, x20\n\t" + "umulh x27, x25, x20\n\t" + "adcs x15, x15, x26\n\t" + "adc x27, x27, xzr\n\t" + /* Add remaining product results in */ + "adds x13, x13, x16\n\t" + "adcs x14, x14, x17\n\t" + "adcs x15, x15, x19\n\t" + "adc x27, x27, xzr\n\t" + /* Overflow */ + "extr x27, x27, x15, #63\n\t" + "mul x27, x27, x25\n\t" + "and x15, x15, #0x7fffffffffffffff\n\t" + "adds x12, x12, x27\n\t" + "adcs x13, x13, xzr\n\t" + "adcs x14, x14, xzr\n\t" + "adc x15, x15, xzr\n\t" + /* Reduce if top bit set */ + "and x27, x25, x15, asr 63\n\t" + "and x15, x15, #0x7fffffffffffffff\n\t" + "adds x12, x12, x27\n\t" + "adcs x13, x13, xzr\n\t" + "adcs x14, x14, xzr\n\t" + "adc x15, x15, xzr\n\t" + /* Store */ + "ldr x0, [x29, #24]\n\t" + "ldr x1, [x29, #176]\n\t" + /* Multiply */ + "ldp x21, x22, [x1]\n\t" + "ldp x23, x24, [x1, #16]\n\t" + /* A[0] * B[0] */ + "mul x4, x8, x21\n\t" + "umulh x5, x8, x21\n\t" + /* A[0] * B[1] */ + "mul x25, x8, x22\n\t" + "umulh x6, x8, x22\n\t" + "adds x5, x5, x25\n\t" + "adc x6, x6, xzr\n\t" + /* A[1] * B[0] */ + "mul x25, x9, x21\n\t" + "umulh x26, x9, x21\n\t" + "adds x5, x5, x25\n\t" + "adcs x6, x6, x26\n\t" + "adc x7, xzr, xzr\n\t" + /* A[0] * B[2] */ + "mul x25, x8, x23\n\t" + "umulh x26, x8, x23\n\t" + "adds x6, x6, x25\n\t" + "adc x7, x7, x26\n\t" + /* A[1] * B[1] */ + "mul x25, x9, x22\n\t" + "umulh x26, x9, x22\n\t" + "adds x6, x6, x25\n\t" + "adcs x7, x7, x26\n\t" + "adc x16, xzr, xzr\n\t" + /* A[2] * B[0] */ + "mul x25, x10, x21\n\t" + "umulh x26, x10, x21\n\t" + "adds x6, x6, x25\n\t" + "adcs x7, x7, x26\n\t" + "adc x16, x16, xzr\n\t" + /* A[0] * B[3] */ + "mul x25, x8, x24\n\t" + "umulh x26, x8, x24\n\t" + "adds x7, x7, x25\n\t" + "adcs x16, x16, x26\n\t" + "adc x17, xzr, xzr\n\t" + /* A[1] * B[2] */ + "mul x25, x9, x23\n\t" + "umulh x26, x9, x23\n\t" + "adds x7, x7, x25\n\t" + "adcs x16, x16, x26\n\t" + "adc x17, x17, xzr\n\t" + /* A[2] * B[1] */ + "mul x25, x10, x22\n\t" + "umulh x26, x10, x22\n\t" + "adds x7, x7, x25\n\t" + "adcs x16, x16, x26\n\t" + "adc x17, x17, xzr\n\t" + /* A[3] * B[0] */ + "mul x25, x11, x21\n\t" + "umulh x26, x11, x21\n\t" + "adds x7, x7, x25\n\t" + "adcs x16, x16, x26\n\t" + "adc x17, x17, xzr\n\t" + /* A[1] * B[3] */ + "mul x25, x9, x24\n\t" + "umulh x26, x9, x24\n\t" + "adds x16, x16, x25\n\t" + "adcs x17, x17, x26\n\t" + "adc x19, xzr, xzr\n\t" + /* A[2] * B[2] */ + "mul x25, x10, x23\n\t" + "umulh x26, x10, x23\n\t" + "adds x16, x16, x25\n\t" + "adcs x17, x17, x26\n\t" + "adc x19, x19, xzr\n\t" + /* A[3] * B[1] */ + "mul x25, x11, x22\n\t" + "umulh x26, x11, x22\n\t" + "adds x16, x16, x25\n\t" + "adcs x17, x17, x26\n\t" + "adc x19, x19, xzr\n\t" + /* A[2] * B[3] */ + "mul x25, x10, x24\n\t" + "umulh x26, x10, x24\n\t" + "adds x17, x17, x25\n\t" + "adcs x19, x19, x26\n\t" + "adc x20, xzr, xzr\n\t" + /* A[3] * B[2] */ + "mul x25, x11, x23\n\t" + "umulh x26, x11, x23\n\t" + "adds x17, x17, x25\n\t" + "adcs x19, x19, x26\n\t" + "adc x20, x20, xzr\n\t" + /* A[3] * B[3] */ + "mul x25, x11, x24\n\t" + "umulh x26, x11, x24\n\t" + "adds x19, x19, x25\n\t" + "adc x20, x20, x26\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x20, x20, x19, #63\n\t" + "extr x19, x19, x17, #63\n\t" + "extr x17, x17, x16, #63\n\t" + "extr x16, x16, x7, #63\n\t" + "and x7, x7, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x25, #19\n\t" + "mul x26, x25, x16\n\t" + "umulh x16, x25, x16\n\t" + "adds x4, x4, x26\n\t" + "mul x26, x25, x17\n\t" + "umulh x17, x25, x17\n\t" + "adcs x5, x5, x26\n\t" + "mul x26, x25, x19\n\t" + "umulh x19, x25, x19\n\t" + "adcs x6, x6, x26\n\t" + "mul x26, x25, x20\n\t" + "umulh x27, x25, x20\n\t" + "adcs x7, x7, x26\n\t" + "adc x27, x27, xzr\n\t" + /* Add remaining product results in */ + "adds x5, x5, x16\n\t" + "adcs x6, x6, x17\n\t" + "adcs x7, x7, x19\n\t" + "adc x27, x27, xzr\n\t" + /* Overflow */ + "extr x27, x27, x7, #63\n\t" + "mul x27, x27, x25\n\t" + "and x7, x7, #0x7fffffffffffffff\n\t" + "adds x4, x4, x27\n\t" + "adcs x5, x5, xzr\n\t" + "adcs x6, x6, xzr\n\t" + "adc x7, x7, xzr\n\t" + /* Reduce if top bit set */ + "and x27, x25, x7, asr 63\n\t" + "and x7, x7, #0x7fffffffffffffff\n\t" + "adds x4, x4, x27\n\t" + "adcs x5, x5, xzr\n\t" + "adcs x6, x6, xzr\n\t" + "adc x7, x7, xzr\n\t" + /* Store */ + "ldr x0, [x29, #24]\n\t" + "ldr x1, [x29, #16]\n\t" + /* Add */ + "adds x8, x12, x4\n\t" + "adcs x9, x13, x5\n\t" + "adcs x10, x14, x6\n\t" + "adc x11, x15, x7\n\t" + "mov x25, #-19\n\t" + "asr x28, x11, #63\n\t" + /* Mask the modulus */ + "and x25, x28, x25\n\t" + "and x26, x28, #0x7fffffffffffffff\n\t" + /* Sub modulus (if overflow) */ + "subs x8, x8, x25\n\t" + "sbcs x9, x9, x28\n\t" + "sbcs x10, x10, x28\n\t" + "sbc x11, x11, x26\n\t" + /* Sub */ + "subs x16, x12, x4\n\t" + "sbcs x17, x13, x5\n\t" + "sbcs x19, x14, x6\n\t" + "sbcs x20, x15, x7\n\t" + "mov x25, #-19\n\t" + "csetm x28, cc\n\t" + /* Mask the modulus */ + "and x25, x28, x25\n\t" + "and x26, x28, #0x7fffffffffffffff\n\t" + /* Add modulus (if underflow) */ + "adds x16, x16, x25\n\t" + "adcs x17, x17, x28\n\t" + "adcs x19, x19, x28\n\t" + "adc x20, x20, x26\n\t" + "stp x8, x9, [x0]\n\t" + "stp x10, x11, [x0, #16]\n\t" + "stp x16, x17, [x1]\n\t" + "stp x19, x20, [x1, #16]\n\t" + "ldr x0, [x29, #48]\n\t" + "ldr x1, [x29, #64]\n\t" + "ldr x2, [x29, #160]\n\t" + /* Multiply */ + "ldp x12, x13, [x1]\n\t" + "ldp x14, x15, [x1, #16]\n\t" + "ldp x16, x17, [x2]\n\t" + "ldp x19, x20, [x2, #16]\n\t" + /* A[0] * B[0] */ + "mul x4, x12, x16\n\t" + "umulh x5, x12, x16\n\t" + /* A[0] * B[1] */ + "mul x25, x12, x17\n\t" + "umulh x6, x12, x17\n\t" + "adds x5, x5, x25\n\t" + "adc x6, x6, xzr\n\t" + /* A[1] * B[0] */ + "mul x25, x13, x16\n\t" + "umulh x26, x13, x16\n\t" + "adds x5, x5, x25\n\t" + "adcs x6, x6, x26\n\t" + "adc x7, xzr, xzr\n\t" + /* A[0] * B[2] */ + "mul x25, x12, x19\n\t" + "umulh x26, x12, x19\n\t" + "adds x6, x6, x25\n\t" + "adc x7, x7, x26\n\t" + /* A[1] * B[1] */ + "mul x25, x13, x17\n\t" + "umulh x26, x13, x17\n\t" + "adds x6, x6, x25\n\t" + "adcs x7, x7, x26\n\t" + "adc x8, xzr, xzr\n\t" + /* A[2] * B[0] */ + "mul x25, x14, x16\n\t" + "umulh x26, x14, x16\n\t" + "adds x6, x6, x25\n\t" + "adcs x7, x7, x26\n\t" + "adc x8, x8, xzr\n\t" + /* A[0] * B[3] */ + "mul x25, x12, x20\n\t" + "umulh x26, x12, x20\n\t" + "adds x7, x7, x25\n\t" + "adcs x8, x8, x26\n\t" + "adc x9, xzr, xzr\n\t" + /* A[1] * B[2] */ + "mul x25, x13, x19\n\t" + "umulh x26, x13, x19\n\t" + "adds x7, x7, x25\n\t" + "adcs x8, x8, x26\n\t" + "adc x9, x9, xzr\n\t" + /* A[2] * B[1] */ + "mul x25, x14, x17\n\t" + "umulh x26, x14, x17\n\t" + "adds x7, x7, x25\n\t" + "adcs x8, x8, x26\n\t" + "adc x9, x9, xzr\n\t" + /* A[3] * B[0] */ + "mul x25, x15, x16\n\t" + "umulh x26, x15, x16\n\t" + "adds x7, x7, x25\n\t" + "adcs x8, x8, x26\n\t" + "adc x9, x9, xzr\n\t" + /* A[1] * B[3] */ + "mul x25, x13, x20\n\t" + "umulh x26, x13, x20\n\t" + "adds x8, x8, x25\n\t" + "adcs x9, x9, x26\n\t" + "adc x10, xzr, xzr\n\t" + /* A[2] * B[2] */ + "mul x25, x14, x19\n\t" + "umulh x26, x14, x19\n\t" + "adds x8, x8, x25\n\t" + "adcs x9, x9, x26\n\t" + "adc x10, x10, xzr\n\t" + /* A[3] * B[1] */ + "mul x25, x15, x17\n\t" + "umulh x26, x15, x17\n\t" + "adds x8, x8, x25\n\t" + "adcs x9, x9, x26\n\t" + "adc x10, x10, xzr\n\t" + /* A[2] * B[3] */ + "mul x25, x14, x20\n\t" + "umulh x26, x14, x20\n\t" + "adds x9, x9, x25\n\t" + "adcs x10, x10, x26\n\t" + "adc x11, xzr, xzr\n\t" + /* A[3] * B[2] */ + "mul x25, x15, x19\n\t" + "umulh x26, x15, x19\n\t" + "adds x9, x9, x25\n\t" + "adcs x10, x10, x26\n\t" + "adc x11, x11, xzr\n\t" + /* A[3] * B[3] */ + "mul x25, x15, x20\n\t" + "umulh x26, x15, x20\n\t" + "adds x10, x10, x25\n\t" + "adc x11, x11, x26\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x11, x11, x10, #63\n\t" + "extr x10, x10, x9, #63\n\t" + "extr x9, x9, x8, #63\n\t" + "extr x8, x8, x7, #63\n\t" + "and x7, x7, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x25, #19\n\t" + "mul x26, x25, x8\n\t" + "umulh x8, x25, x8\n\t" + "adds x4, x4, x26\n\t" + "mul x26, x25, x9\n\t" + "umulh x9, x25, x9\n\t" + "adcs x5, x5, x26\n\t" + "mul x26, x25, x10\n\t" + "umulh x10, x25, x10\n\t" + "adcs x6, x6, x26\n\t" + "mul x26, x25, x11\n\t" + "umulh x27, x25, x11\n\t" + "adcs x7, x7, x26\n\t" + "adc x27, x27, xzr\n\t" + /* Add remaining product results in */ + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x27, x27, xzr\n\t" + /* Overflow */ + "extr x27, x27, x7, #63\n\t" + "mul x27, x27, x25\n\t" + "and x7, x7, #0x7fffffffffffffff\n\t" + "adds x4, x4, x27\n\t" + "adcs x5, x5, xzr\n\t" + "adcs x6, x6, xzr\n\t" + "adc x7, x7, xzr\n\t" + /* Reduce if top bit set */ + "and x27, x25, x7, asr 63\n\t" + "and x7, x7, #0x7fffffffffffffff\n\t" + "adds x4, x4, x27\n\t" + "adcs x5, x5, xzr\n\t" + "adcs x6, x6, xzr\n\t" + "adc x7, x7, xzr\n\t" + /* Store */ + "ldr x0, [x29, #48]\n\t" + /* Double */ + "adds x4, x4, x4\n\t" + "adcs x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "mov x25, #-19\n\t" + "asr x28, x7, #63\n\t" + /* Mask the modulus */ + "and x25, x28, x25\n\t" + "and x26, x28, #0x7fffffffffffffff\n\t" + /* Sub modulus (if overflow) */ + "subs x4, x4, x25\n\t" + "sbcs x5, x5, x28\n\t" + "sbcs x6, x6, x28\n\t" + "sbc x7, x7, x26\n\t" + "ldr x0, [x29, #40]\n\t" + "ldr x1, [x29, #168]\n\t" + "ldr x2, [x29, #72]\n\t" + /* Multiply */ + "ldp x16, x17, [x1]\n\t" + "ldp x19, x20, [x1, #16]\n\t" + "ldp x21, x22, [x2]\n\t" + "ldp x23, x24, [x2, #16]\n\t" + /* A[0] * B[0] */ + "mul x8, x16, x21\n\t" + "umulh x9, x16, x21\n\t" + /* A[0] * B[1] */ + "mul x25, x16, x22\n\t" + "umulh x10, x16, x22\n\t" + "adds x9, x9, x25\n\t" + "adc x10, x10, xzr\n\t" + /* A[1] * B[0] */ + "mul x25, x17, x21\n\t" + "umulh x26, x17, x21\n\t" + "adds x9, x9, x25\n\t" + "adcs x10, x10, x26\n\t" + "adc x11, xzr, xzr\n\t" + /* A[0] * B[2] */ + "mul x25, x16, x23\n\t" + "umulh x26, x16, x23\n\t" + "adds x10, x10, x25\n\t" + "adc x11, x11, x26\n\t" + /* A[1] * B[1] */ + "mul x25, x17, x22\n\t" + "umulh x26, x17, x22\n\t" + "adds x10, x10, x25\n\t" + "adcs x11, x11, x26\n\t" + "adc x12, xzr, xzr\n\t" + /* A[2] * B[0] */ + "mul x25, x19, x21\n\t" + "umulh x26, x19, x21\n\t" + "adds x10, x10, x25\n\t" + "adcs x11, x11, x26\n\t" + "adc x12, x12, xzr\n\t" + /* A[0] * B[3] */ + "mul x25, x16, x24\n\t" + "umulh x26, x16, x24\n\t" + "adds x11, x11, x25\n\t" + "adcs x12, x12, x26\n\t" + "adc x13, xzr, xzr\n\t" + /* A[1] * B[2] */ + "mul x25, x17, x23\n\t" + "umulh x26, x17, x23\n\t" + "adds x11, x11, x25\n\t" + "adcs x12, x12, x26\n\t" + "adc x13, x13, xzr\n\t" + /* A[2] * B[1] */ + "mul x25, x19, x22\n\t" + "umulh x26, x19, x22\n\t" + "adds x11, x11, x25\n\t" + "adcs x12, x12, x26\n\t" + "adc x13, x13, xzr\n\t" + /* A[3] * B[0] */ + "mul x25, x20, x21\n\t" + "umulh x26, x20, x21\n\t" + "adds x11, x11, x25\n\t" + "adcs x12, x12, x26\n\t" + "adc x13, x13, xzr\n\t" + /* A[1] * B[3] */ + "mul x25, x17, x24\n\t" + "umulh x26, x17, x24\n\t" + "adds x12, x12, x25\n\t" + "adcs x13, x13, x26\n\t" + "adc x14, xzr, xzr\n\t" + /* A[2] * B[2] */ + "mul x25, x19, x23\n\t" + "umulh x26, x19, x23\n\t" + "adds x12, x12, x25\n\t" + "adcs x13, x13, x26\n\t" + "adc x14, x14, xzr\n\t" + /* A[3] * B[1] */ + "mul x25, x20, x22\n\t" + "umulh x26, x20, x22\n\t" + "adds x12, x12, x25\n\t" + "adcs x13, x13, x26\n\t" + "adc x14, x14, xzr\n\t" + /* A[2] * B[3] */ + "mul x25, x19, x24\n\t" + "umulh x26, x19, x24\n\t" + "adds x13, x13, x25\n\t" + "adcs x14, x14, x26\n\t" + "adc x15, xzr, xzr\n\t" + /* A[3] * B[2] */ + "mul x25, x20, x23\n\t" + "umulh x26, x20, x23\n\t" + "adds x13, x13, x25\n\t" + "adcs x14, x14, x26\n\t" + "adc x15, x15, xzr\n\t" + /* A[3] * B[3] */ + "mul x25, x20, x24\n\t" + "umulh x26, x20, x24\n\t" + "adds x14, x14, x25\n\t" + "adc x15, x15, x26\n\t" + /* Reduce */ + /* Move top half into t4-t7 and remove top bit from t3 */ + "extr x15, x15, x14, #63\n\t" + "extr x14, x14, x13, #63\n\t" + "extr x13, x13, x12, #63\n\t" + "extr x12, x12, x11, #63\n\t" + "and x11, x11, #0x7fffffffffffffff\n\t" + /* Multiply top half by 19 */ + "mov x25, #19\n\t" + "mul x26, x25, x12\n\t" + "umulh x12, x25, x12\n\t" + "adds x8, x8, x26\n\t" + "mul x26, x25, x13\n\t" + "umulh x13, x25, x13\n\t" + "adcs x9, x9, x26\n\t" + "mul x26, x25, x14\n\t" + "umulh x14, x25, x14\n\t" + "adcs x10, x10, x26\n\t" + "mul x26, x25, x15\n\t" + "umulh x27, x25, x15\n\t" + "adcs x11, x11, x26\n\t" + "adc x27, x27, xzr\n\t" + /* Add remaining product results in */ + "adds x9, x9, x12\n\t" + "adcs x10, x10, x13\n\t" + "adcs x11, x11, x14\n\t" + "adc x27, x27, xzr\n\t" + /* Overflow */ + "extr x27, x27, x11, #63\n\t" + "mul x27, x27, x25\n\t" + "and x11, x11, #0x7fffffffffffffff\n\t" + "adds x8, x8, x27\n\t" + "adcs x9, x9, xzr\n\t" + "adcs x10, x10, xzr\n\t" + "adc x11, x11, xzr\n\t" + /* Reduce if top bit set */ + "and x27, x25, x11, asr 63\n\t" + "and x11, x11, #0x7fffffffffffffff\n\t" + "adds x8, x8, x27\n\t" + "adcs x9, x9, xzr\n\t" + "adcs x10, x10, xzr\n\t" + "adc x11, x11, xzr\n\t" + /* Store */ + "ldr x0, [x29, #40]\n\t" + "ldr x1, [x29, #32]\n\t" + /* Add */ + "adds x12, x4, x8\n\t" + "adcs x13, x5, x9\n\t" + "adcs x14, x6, x10\n\t" + "adc x15, x7, x11\n\t" + "mov x25, #-19\n\t" + "asr x28, x15, #63\n\t" + /* Mask the modulus */ + "and x25, x28, x25\n\t" + "and x26, x28, #0x7fffffffffffffff\n\t" + /* Sub modulus (if overflow) */ + "subs x12, x12, x25\n\t" + "sbcs x13, x13, x28\n\t" + "sbcs x14, x14, x28\n\t" + "sbc x15, x15, x26\n\t" + /* Sub */ + "subs x16, x4, x8\n\t" + "sbcs x17, x5, x9\n\t" + "sbcs x19, x6, x10\n\t" + "sbcs x20, x7, x11\n\t" + "mov x25, #-19\n\t" + "csetm x28, cc\n\t" + /* Mask the modulus */ + "and x25, x28, x25\n\t" + "and x26, x28, #0x7fffffffffffffff\n\t" + /* Add modulus (if underflow) */ + "adds x16, x16, x25\n\t" + "adcs x17, x17, x28\n\t" + "adcs x19, x19, x28\n\t" + "adc x20, x20, x26\n\t" + "stp x12, x13, [x0]\n\t" + "stp x14, x15, [x0, #16]\n\t" + "stp x16, x17, [x1]\n\t" + "stp x19, x20, [x1, #16]\n\t" + "ldp x29, x30, [sp], #0x50\n\t" + : [rx] "+r" (rx), [ry] "+r" (ry), [rz] "+r" (rz), [rt] "+r" (rt), [px] "+r" (px), [py] "+r" (py), [pz] "+r" (pz), [pt] "+r" (pt) + : + : "memory", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" + ); + (void)qz; + (void)qt2d; + (void)qyplusx; + (void)qyminusx; +} + +#endif /* WOLFSSL_ARMASM */ +#endif /* __aarch64__ */ diff --git a/client/wolfssl/wolfcrypt/src/port/arm/armv8-poly1305.c b/client/wolfssl/wolfcrypt/src/port/arm/armv8-poly1305.c new file mode 100644 index 0000000..3df07f7 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/arm/armv8-poly1305.c @@ -0,0 +1,1166 @@ +/* armv8-poly1305.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* + * Based off the public domain implementations by Andrew Moon + * and Daniel J. Bernstein + */ + + +#ifdef __aarch64__ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef WOLFSSL_ARMASM +#ifdef HAVE_POLY1305 +#include +#include +#include +#include +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif +#ifdef CHACHA_AEAD_TEST + #include +#endif + +static WC_INLINE void poly1305_blocks_16(Poly1305* ctx, const unsigned char *m, + size_t bytes) +{ + __asm__ __volatile__ ( + "CMP %[bytes], %[POLY1305_BLOCK_SIZE] \n\t" + "BLO L_poly1305_16_64_done_%= \n\t" + /* Load r and h */ + "LDP x21, x23, %[ctx_r] \n\t" + "LDR w25, %[ctx_r_4] \n\t" + "LDP x2, x4, %[ctx_h] \n\t" + "LDR w6, %[ctx_h_4] \n\t" + "LSR x22, x21, #32 \n\t" + "LSR x24, x23, #32 \n\t" + "LSR x3, x2, #32 \n\t" + "LSR x5, x4, #32 \n\t" + "AND x21, x21, #0x3ffffff \n\t" + "AND x23, x23, #0x3ffffff \n\t" + "AND x2, x2, #0x3ffffff \n\t" + "AND x4, x4, #0x3ffffff \n\t" + /* s1 = r1 * 5; */ + /* s2 = r2 * 5; */ + /* s3 = r3 * 5; */ + /* s4 = r4 * 5; */ + "MOV x15, #5 \n\t" + "CMP %[finished], #0 \n\t" + "MUL w7, w22, w15 \n\t" + "CSET %[finished], EQ \n\t" + "MUL w8, w23, w15 \n\t" + "LSL %[finished], %[finished], #24 \n\t" + "MUL w9, w24, w15 \n\t" + "MOV x14, #0x3ffffff \n\t" + "MUL w10, w25, w15 \n\t" + "\n" + ".align 2 \n\t" + "L_poly1305_16_64_loop_%=: \n\t" + /* t0 = U8TO64(&m[0]); */ + /* t1 = U8TO64(&m[8]); */ + "LDP x16, x17, [%[m]], #16 \n\t" + /* h0 += (U8TO32(m + 0)) & 0x3ffffff; */ + "AND x26, x16, #0x3ffffff \n\t" + "ADD x2, x2, x26 \n\t" + /* h1 += (U8TO32(m + 3) >> 2) & 0x3ffffff; */ + "AND x26, x14, x16, LSR #26 \n\t" + "ADD x3, x3, x26 \n\t" + /* h2 += (U8TO32(m + 6) >> 4) & 0x3ffffff; */ + "EXTR x26, x17, x16, #52 \n\t" + "AND x26, x26, #0x3ffffff \n\t" + "ADD x4, x4, x26 \n\t" + /* h3 += (U8TO32(m + 9) >> 6) & 0x3ffffff; */ + "AND x26, x14, x17, LSR #14 \n\t" + "ADD x5, x5, x26 \n\t" + /* h4 += (U8TO32(m + 12) >> 8) | hibit; */ + "ORR x17, %[finished], x17, LSR #40 \n\t" + "ADD x6, x6, x17 \n\t" + /* d0 = h0 * r0 + h1 * s4 + h2 * s3 + h3 * s2 + h4 * s1 */ + /* d1 = h0 * r1 + h1 * r0 + h2 * s4 + h3 * s3 + h4 * s2 */ + /* d2 = h0 * r2 + h1 * r1 + h2 * r0 + h3 * s4 + h4 * s3 */ + /* d3 = h0 * r3 + h1 * r2 + h2 * r1 + h3 * r0 + h4 * s4 */ + /* d4 = h0 * r4 + h1 * r3 + h2 * r2 + h3 * r1 + h4 * r0 */ + "MUL x16, x2, x21 \n\t" + "MUL x17, x2, x22 \n\t" + "MUL x26, x2, x23 \n\t" + "MUL x19, x2, x24 \n\t" + "MUL x20, x2, x25 \n\t" + "MADD x16, x3, x10, x16 \n\t" + "MADD x17, x3, x21, x17 \n\t" + "MADD x26, x3, x22, x26 \n\t" + "MADD x19, x3, x23, x19 \n\t" + "MADD x20, x3, x24, x20 \n\t" + "MADD x16, x4, x9, x16 \n\t" + "MADD x17, x4, x10, x17 \n\t" + "MADD x26, x4, x21, x26 \n\t" + "MADD x19, x4, x22, x19 \n\t" + "MADD x20, x4, x23, x20 \n\t" + "MADD x16, x5, x8, x16 \n\t" + "MADD x17, x5, x9, x17 \n\t" + "MADD x26, x5, x10, x26 \n\t" + "MADD x19, x5, x21, x19 \n\t" + "MADD x20, x5, x22, x20 \n\t" + "MADD x16, x6, x7, x16 \n\t" + "MADD x17, x6, x8, x17 \n\t" + "MADD x26, x6, x9, x26 \n\t" + "MADD x19, x6, x10, x19 \n\t" + "MADD x20, x6, x21, x20 \n\t" + /* d1 = d1 + d0 >> 26 */ + /* d2 = d2 + d1 >> 26 */ + /* d3 = d3 + d2 >> 26 */ + /* d4 = d4 + d3 >> 26 */ + /* h0 = d0 & 0x3ffffff */ + /* h1 = d1 & 0x3ffffff */ + /* h2 = d2 & 0x3ffffff */ + /* h0 = h0 + (d4 >> 26) * 5 */ + /* h1 = h1 + h0 >> 26 */ + /* h3 = d3 & 0x3ffffff */ + /* h4 = d4 & 0x3ffffff */ + /* h0 = h0 & 0x3ffffff */ + "ADD x17, x17, x16, LSR #26 \n\t" + "ADD x20, x20, x19, LSR #26 \n\t" + "AND x16, x16, #0x3ffffff \n\t" + "LSR x2, x20, #26 \n\t" + "AND x19, x19, #0x3ffffff \n\t" + "MADD x16, x2, x15, x16 \n\t" + "ADD x26, x26, x17, LSR #26 \n\t" + "AND x17, x17, #0x3ffffff \n\t" + "AND x20, x20, #0x3ffffff \n\t" + "ADD x19, x19, x26, LSR #26 \n\t" + "AND x4, x26, #0x3ffffff \n\t" + "ADD x3, x17, x16, LSR #26 \n\t" + "AND x2, x16, #0x3ffffff \n\t" + "ADD x6, x20, x19, LSR #26 \n\t" + "AND x5, x19, #0x3ffffff \n\t" + "SUB %[bytes], %[bytes], %[POLY1305_BLOCK_SIZE] \n\t" + "CMP %[bytes], %[POLY1305_BLOCK_SIZE] \n\t" + "BHS L_poly1305_16_64_loop_%= \n\t" + /* Store h */ + "ORR x2, x2, x3, LSL #32 \n\t" + "ORR x4, x4, x5, LSL #32 \n\t" + "STP x2, x4, %[ctx_h] \n\t" + "STR w6, %[ctx_h_4] \n\t" + "\n" + ".align 2 \n\t" + "L_poly1305_16_64_done_%=: \n\t" + : [ctx_h] "+m" (ctx->h[0]), + [ctx_h_4] "+m" (ctx->h[4]), + [bytes] "+r" (bytes), + [m] "+r" (m) + : [POLY1305_BLOCK_SIZE] "I" (POLY1305_BLOCK_SIZE), + [ctx_r] "m" (ctx->r[0]), + [ctx_r_4] "m" (ctx->r[4]), + [finished] "r" ((word64)ctx->finished) + : "memory", "cc", + "w2", "w3", "w4", "w5", "w6", "w7", "w8", "w9", "w10", "w15", + "w21", "w22", "w23", "w24", "w25", "x2", "x3", "x4", "x5", "x6", + "x7", "x8", "x9", "x10", "x14", "x15", "x16", "x17", "x19", "x20", + "x21", "x22", "x23", "x24", "x25", "x26" + ); +} + +void poly1305_blocks(Poly1305* ctx, const unsigned char *m, + size_t bytes) +{ + __asm__ __volatile__ ( + /* If less than 4 blocks to process then use regular method */ + "CMP %[bytes], %[POLY1305_BLOCK_SIZE]*4 \n\t" + "BLO L_poly1305_64_done_%= \n\t" + "MOV x9, #0x3ffffff \n\t" + /* Load h */ + "LDP x20, x22, [%[h]] \n\t" + "MOV v27.D[0], x9 \n\t" + "LDR w24, [%[h], #16] \n\t" + "MOV v27.D[1], x9 \n\t" + "LSR x21, x20, #32 \n\t" + "DUP v29.4S, v27.S[0] \n\t" + "LSR x23, x22, #32 \n\t" + "MOV x9, #5 \n\t" + "AND x20, x20, #0x3ffffff \n\t" + "MOV v28.D[0], x9 \n\t" + "AND x22, x22, #0x3ffffff \n\t" + /* Zero accumulator registers */ + "MOVI v15.2D, #0x0 \n\t" + "MOVI v16.2D, #0x0 \n\t" + "MOVI v17.2D, #0x0 \n\t" + "MOVI v18.2D, #0x0 \n\t" + "MOVI v19.2D, #0x0 \n\t" + /* Set hibit */ + "CMP %[finished], #0 \n\t" + "CSET x9, EQ \n\t" + "LSL x9, x9, #24 \n\t" + "MOV v26.D[0], x9 \n\t" + "MOV v26.D[1], x9 \n\t" + "DUP v30.4S, v26.S[0] \n\t" + "CMP %[bytes], %[POLY1305_BLOCK_SIZE]*6 \n\t" + "BLO L_poly1305_64_start_block_size_64_%= \n\t" + /* Load r^2 to NEON v0, v1, v2, v3, v4 */ + "LD4 { v0.S-v3.S }[2], [%[r_2]], #16 \n\t" + "LD1 { v4.S }[2], [%[r_2]] \n\t" + "SUB %[r_2], %[r_2], #16 \n\t" + /* Load r^4 to NEON v0, v1, v2, v3, v4 */ + "LD4 { v0.S-v3.S }[0], [%[r_4]], #16 \n\t" + "LD1 { v4.S }[0], [%[r_4]] \n\t" + "SUB %[r_4], %[r_4], #16 \n\t" + "MOV v0.S[1], v0.S[0] \n\t" + "MOV v0.S[3], v0.S[2] \n\t" + "MOV v1.S[1], v1.S[0] \n\t" + "MOV v1.S[3], v1.S[2] \n\t" + "MOV v2.S[1], v2.S[0] \n\t" + "MOV v2.S[3], v2.S[2] \n\t" + "MOV v3.S[1], v3.S[0] \n\t" + "MOV v3.S[3], v3.S[2] \n\t" + "MOV v4.S[1], v4.S[0] \n\t" + "MOV v4.S[3], v4.S[2] \n\t" + /* Store [r^4, r^2] * 5 */ + "MUL v5.4S, v0.4S, v28.S[0] \n\t" + "MUL v6.4S, v1.4S, v28.S[0] \n\t" + "MUL v7.4S, v2.4S, v28.S[0] \n\t" + "MUL v8.4S, v3.4S, v28.S[0] \n\t" + "MUL v9.4S, v4.4S, v28.S[0] \n\t" + /* Copy r^4 to ARM */ + "MOV w25, v0.S[0] \n\t" + "MOV w26, v1.S[0] \n\t" + "MOV w27, v2.S[0] \n\t" + "MOV w28, v3.S[0] \n\t" + "MOV w30, v4.S[0] \n\t" + /* Copy 5*r^4 to ARM */ + "MOV w15, v5.S[0] \n\t" + "MOV w16, v6.S[0] \n\t" + "MOV w17, v7.S[0] \n\t" + "MOV w8, v8.S[0] \n\t" + "MOV w19, v9.S[0] \n\t" + /* Load m */ + /* Load four message blocks to NEON v10, v11, v12, v13, v14 */ + "LD4 { v10.4S-v13.4S }, [%[m]], #64 \n\t" + "SUB %[bytes], %[bytes], %[POLY1305_BLOCK_SIZE]*4 \n\t" + "USHR v14.4S, v13.4S, #8 \n\t" + "ORR v14.16B, v14.16B, v30.16B \n\t" + "SHL v13.4S, v13.4S, #18 \n\t" + "SRI v13.4S, v12.4S, #14 \n\t" + "SHL v12.4S, v12.4S, #12 \n\t" + "SRI v12.4S, v11.4S, #20 \n\t" + "SHL v11.4S, v11.4S, #6 \n\t" + "SRI v11.4S, v10.4S, #26 \n\t" + "AND v10.16B, v10.16B, v29.16B \n\t" + "AND v11.16B, v11.16B, v29.16B \n\t" + "AND v12.16B, v12.16B, v29.16B \n\t" + "AND v13.16B, v13.16B, v29.16B \n\t" + "AND v14.16B, v14.16B, v29.16B \n\t" + /* Four message blocks loaded */ + /* Add messages to accumulator */ + "ADD v15.2S, v15.2S, v10.2S \n\t" + "ADD v16.2S, v16.2S, v11.2S \n\t" + "ADD v17.2S, v17.2S, v12.2S \n\t" + "ADD v18.2S, v18.2S, v13.2S \n\t" + "ADD v19.2S, v19.2S, v14.2S \n\t" + "\n" + ".align 2 \n\t" + "L_poly1305_64_loop_128_%=: \n\t" + /* d0 = h0*r0 + h1*s4 + h2*s3 + h3*s2 + h4*s1 */ + /* d1 = h0*r1 + h1*r0 + h2*s4 + h3*s3 + h4*s2 */ + /* d2 = h0*r2 + h1*r1 + h2*r0 + h3*s4 + h4*s3 */ + /* d3 = h0*r3 + h1*r2 + h2*r1 + h3*r0 + h4*s4 */ + /* d4 = h0*r4 + h1*r3 + h2*r2 + h3*r1 + h4*r0 */ + "UMULL v21.2D, v15.2S, v0.2S \n\t" + /* Compute h*r^2 */ + /* d0 = h0 * r0 + h1 * s4 + h2 * s3 + h3 * s2 + h4 * s1 */ + /* d1 = h0 * r1 + h1 * r0 + h2 * s4 + h3 * s3 + h4 * s2 */ + /* d2 = h0 * r2 + h1 * r1 + h2 * r0 + h3 * s4 + h4 * s3 */ + /* d3 = h0 * r3 + h1 * r2 + h2 * r1 + h3 * r0 + h4 * s4 */ + /* d4 = h0 * r4 + h1 * r3 + h2 * r2 + h3 * r1 + h4 * r0 */ + "MUL x9, x20, x25 \n\t" + "UMULL v22.2D, v15.2S, v1.2S \n\t" + "MUL x10, x20, x26 \n\t" + "UMULL v23.2D, v15.2S, v2.2S \n\t" + "MUL x11, x20, x27 \n\t" + "UMULL v24.2D, v15.2S, v3.2S \n\t" + "MUL x12, x20, x28 \n\t" + "UMULL v25.2D, v15.2S, v4.2S \n\t" + "MUL x13, x20, x30 \n\t" + "UMLAL v21.2D, v16.2S, v9.2S \n\t" + "MADD x9, x21, x19, x9 \n\t" + "UMLAL v22.2D, v16.2S, v0.2S \n\t" + "MADD x10, x21, x25, x10 \n\t" + "UMLAL v23.2D, v16.2S, v1.2S \n\t" + "MADD x11, x21, x26, x11 \n\t" + "UMLAL v24.2D, v16.2S, v2.2S \n\t" + "MADD x12, x21, x27, x12 \n\t" + "UMLAL v25.2D, v16.2S, v3.2S \n\t" + "MADD x13, x21, x28, x13 \n\t" + "UMLAL v21.2D, v17.2S, v8.2S \n\t" + "MADD x9, x22, x8, x9 \n\t" + "UMLAL v22.2D, v17.2S, v9.2S \n\t" + "MADD x10, x22, x19, x10 \n\t" + "UMLAL v23.2D, v17.2S, v0.2S \n\t" + "MADD x11, x22, x25, x11 \n\t" + "UMLAL v24.2D, v17.2S, v1.2S \n\t" + "MADD x12, x22, x26, x12 \n\t" + "UMLAL v25.2D, v17.2S, v2.2S \n\t" + "MADD x13, x22, x27, x13 \n\t" + "UMLAL v21.2D, v18.2S, v7.2S \n\t" + "MADD x9, x23, x17, x9 \n\t" + "UMLAL v22.2D, v18.2S, v8.2S \n\t" + "MADD x10, x23, x8, x10 \n\t" + "UMLAL v23.2D, v18.2S, v9.2S \n\t" + "MADD x11, x23, x19, x11 \n\t" + "UMLAL v24.2D, v18.2S, v0.2S \n\t" + "MADD x12, x23, x25, x12 \n\t" + "UMLAL v25.2D, v18.2S, v1.2S \n\t" + "MADD x13, x23, x26, x13 \n\t" + "UMLAL v21.2D, v19.2S, v6.2S \n\t" + "MADD x9, x24, x16, x9 \n\t" + "UMLAL v22.2D, v19.2S, v7.2S \n\t" + "MADD x10, x24, x17, x10 \n\t" + "UMLAL v23.2D, v19.2S, v8.2S \n\t" + "MADD x11, x24, x8, x11 \n\t" + "UMLAL v24.2D, v19.2S, v9.2S \n\t" + "MADD x12, x24, x19, x12 \n\t" + "UMLAL v25.2D, v19.2S, v0.2S \n\t" + "MADD x13, x24, x25, x13 \n\t" + /* d0 = h0*r0 + h1*s4 + h2*s3 + h3*s2 + h4*s1 */ + /* d1 = h0*r1 + h1*r0 + h2*s4 + h3*s3 + h4*s2 */ + /* d2 = h0*r2 + h1*r1 + h2*r0 + h3*s4 + h4*s3 */ + /* d3 = h0*r3 + h1*r2 + h2*r1 + h3*r0 + h4*s4 */ + /* d4 = h0*r4 + h1*r3 + h2*r2 + h3*r1 + h4*r0 */ + "UMLAL2 v21.2D, v10.4S, v0.4S \n\t" + /* Reduce h % P */ + "MOV x14, #5 \n\t" + "UMLAL2 v22.2D, v10.4S, v1.4S \n\t" + "ADD x10, x10, x9, LSR #26 \n\t" + "UMLAL2 v23.2D, v10.4S, v2.4S \n\t" + "ADD x13, x13, x12, LSR #26 \n\t" + "UMLAL2 v24.2D, v10.4S, v3.4S \n\t" + "AND x9, x9, #0x3ffffff \n\t" + "UMLAL2 v25.2D, v10.4S, v4.4S \n\t" + "LSR x20, x13, #26 \n\t" + "UMLAL2 v21.2D, v11.4S, v9.4S \n\t" + "AND x12, x12, #0x3ffffff \n\t" + "UMLAL2 v22.2D, v11.4S, v0.4S \n\t" + "MADD x9, x20, x14, x9 \n\t" + "UMLAL2 v23.2D, v11.4S, v1.4S \n\t" + "ADD x11, x11, x10, LSR #26 \n\t" + "UMLAL2 v24.2D, v11.4S, v2.4S \n\t" + "AND x10, x10, #0x3ffffff \n\t" + "UMLAL2 v25.2D, v11.4S, v3.4S \n\t" + "AND x13, x13, #0x3ffffff \n\t" + "UMLAL2 v21.2D, v12.4S, v8.4S \n\t" + "ADD x12, x12, x11, LSR #26 \n\t" + "UMLAL2 v22.2D, v12.4S, v9.4S \n\t" + "AND x22, x11, #0x3ffffff \n\t" + "UMLAL2 v23.2D, v12.4S, v0.4S \n\t" + "ADD x21, x10, x9, LSR #26 \n\t" + "UMLAL2 v24.2D, v12.4S, v1.4S \n\t" + "AND x20, x9, #0x3ffffff \n\t" + "UMLAL2 v25.2D, v12.4S, v2.4S \n\t" + "ADD x24, x13, x12, LSR #26 \n\t" + "UMLAL2 v21.2D, v13.4S, v7.4S \n\t" + "AND x23, x12, #0x3ffffff \n\t" + "UMLAL2 v22.2D, v13.4S, v8.4S \n\t" + "UMLAL2 v23.2D, v13.4S, v9.4S \n\t" + "UMLAL2 v24.2D, v13.4S, v0.4S \n\t" + "UMLAL2 v25.2D, v13.4S, v1.4S \n\t" + "UMLAL2 v21.2D, v14.4S, v6.4S \n\t" + "UMLAL2 v22.2D, v14.4S, v7.4S \n\t" + "UMLAL2 v23.2D, v14.4S, v8.4S \n\t" + "UMLAL2 v24.2D, v14.4S, v9.4S \n\t" + "UMLAL2 v25.2D, v14.4S, v0.4S \n\t" + /* If less than six message blocks left then leave loop */ + "CMP %[bytes], %[POLY1305_BLOCK_SIZE]*6 \n\t" + "BLS L_poly1305_64_loop_128_final_%= \n\t" + /* Load m */ + /* Load four message blocks to NEON v10, v11, v12, v13, v14 */ + "LD4 { v10.4S-v13.4S }, [%[m]], #64 \n\t" + "SUB %[bytes], %[bytes], %[POLY1305_BLOCK_SIZE]*4 \n\t" + "USHR v14.4S, v13.4S, #8 \n\t" + "ORR v14.16B, v14.16B, v30.16B \n\t" + "SHL v13.4S, v13.4S, #18 \n\t" + "SRI v13.4S, v12.4S, #14 \n\t" + "SHL v12.4S, v12.4S, #12 \n\t" + "SRI v12.4S, v11.4S, #20 \n\t" + "SHL v11.4S, v11.4S, #6 \n\t" + "SRI v11.4S, v10.4S, #26 \n\t" + "AND v10.16B, v10.16B, v29.16B \n\t" + "AND v11.16B, v11.16B, v29.16B \n\t" + "AND v12.16B, v12.16B, v29.16B \n\t" + "AND v13.16B, v13.16B, v29.16B \n\t" + "AND v14.16B, v14.16B, v29.16B \n\t" + /* Four message blocks loaded */ + /* Add new message block to accumulator */ + "UADDW v21.2D, v21.2D, v10.2S \n\t" + "UADDW v22.2D, v22.2D, v11.2S \n\t" + "UADDW v23.2D, v23.2D, v12.2S \n\t" + "UADDW v24.2D, v24.2D, v13.2S \n\t" + "UADDW v25.2D, v25.2D, v14.2S \n\t" + /* Reduce radix 26 NEON */ + /* Interleave h0 -> h1 -> h2 -> h3 -> h4 */ + /* with h3 -> h4 -> h0 -> h1 */ + "USRA v22.2D, v21.2D, #26 \n\t" + "AND v21.16B, v21.16B, v27.16B \n\t" + "USRA v25.2D, v24.2D, #26 \n\t" + "AND v24.16B, v24.16B, v27.16B \n\t" + "USHR v15.2D, v25.2D, #26 \n\t" + "USRA v23.2D, v22.2D, #26 \n\t" + /* Simulate multiplying by 5 using adding and shifting */ + "SHL v18.2D, v15.2D, #2 \n\t" + "AND v16.16B, v22.16B, v27.16B \n\t" + "ADD v18.2D, v18.2D, v15.2D \n\t" + "AND v19.16B, v25.16B, v27.16B \n\t" + "ADD v21.2D, v21.2D, v18.2D \n\t" + "USRA v24.2D, v23.2D, #26 \n\t" + "AND v17.16B, v23.16B, v27.16B \n\t" + "USRA v16.2D, v21.2D, #26 \n\t" + "AND v15.16B, v21.16B, v27.16B \n\t" + "USRA v19.2D, v24.2D, #26 \n\t" + "AND v18.16B, v24.16B, v27.16B \n\t" + /* Copy values to lower halves of result registers */ + "MOV v15.S[1], v15.S[2] \n\t" + "MOV v16.S[1], v16.S[2] \n\t" + "MOV v17.S[1], v17.S[2] \n\t" + "MOV v18.S[1], v18.S[2] \n\t" + "MOV v19.S[1], v19.S[2] \n\t" + "B L_poly1305_64_loop_128_%= \n\t" + "\n" + ".align 2 \n\t" + "L_poly1305_64_loop_128_final_%=: \n\t" + /* Load m */ + /* Load two message blocks to NEON v10, v11, v12, v13, v14 */ + "LD2 { v10.2D-v11.2D }, [%[m]], #32 \n\t" + /* Copy r^2 to lower half of registers */ + "MOV v0.D[0], v0.D[1] \n\t" + "SUB %[bytes], %[bytes], %[POLY1305_BLOCK_SIZE]*2 \n\t" + "MOV v5.D[0], v5.D[1] \n\t" + "USHR v14.2D, v11.2D, #40 \n\t" + "MOV v1.D[0], v1.D[1] \n\t" + "ORR v14.16B, v14.16B, v26.16B \n\t" + "MOV v6.D[0], v6.D[1] \n\t" + "USHR v13.2D, v11.2D, #14 \n\t" + "MOV v2.D[0], v2.D[1] \n\t" + "AND v13.16B, v13.16B, v27.16B \n\t" + "MOV v7.D[0], v7.D[1] \n\t" + "SHL v12.2D, v11.2D, #12 \n\t" + "MOV v3.D[0], v3.D[1] \n\t" + "SRI v12.2D, v10.2D, #52 \n\t" + "MOV v8.D[0], v8.D[1] \n\t" + "AND v12.16B, v12.16B, v27.16B \n\t" + "MOV v4.D[0], v4.D[1] \n\t" + "USHR v11.2D, v10.2D, #26 \n\t" + "MOV v9.D[0], v9.D[1] \n\t" + "AND v11.16B, v11.16B, v27.16B \n\t" + /* Copy r^2 to ARM */ + "MOV w25, v0.S[2] \n\t" + "AND v10.16B, v10.16B, v27.16B \n\t" + "MOV w26, v1.S[2] \n\t" + /* Two message blocks loaded */ + /* Add last messages */ + "ADD v21.2D, v21.2D, v10.2D \n\t" + "MOV w27, v2.S[2] \n\t" + "ADD v22.2D, v22.2D, v11.2D \n\t" + "MOV w28, v3.S[2] \n\t" + "ADD v23.2D, v23.2D, v12.2D \n\t" + "MOV w30, v4.S[2] \n\t" + "ADD v24.2D, v24.2D, v13.2D \n\t" + /* Copy 5*r^2 to ARM */ + "MOV w15, v5.S[2] \n\t" + "ADD v25.2D, v25.2D, v14.2D \n\t" + "MOV w16, v6.S[2] \n\t" + /* Reduce message to be ready for next multiplication */ + /* Reduce radix 26 NEON */ + /* Interleave h0 -> h1 -> h2 -> h3 -> h4 */ + /* with h3 -> h4 -> h0 -> h1 */ + "USRA v22.2D, v21.2D, #26 \n\t" + "MOV w17, v7.S[2] \n\t" + "AND v21.16B, v21.16B, v27.16B \n\t" + "MOV w8, v8.S[2] \n\t" + "USRA v25.2D, v24.2D, #26 \n\t" + "MOV w19, v9.S[2] \n\t" + "AND v24.16B, v24.16B, v27.16B \n\t" + "USHR v15.2D, v25.2D, #26 \n\t" + "USRA v23.2D, v22.2D, #26 \n\t" + /* Simulate multiplying by 5 using adding and shifting */ + "SHL v18.2D, v15.2D, #2 \n\t" + "AND v16.16B, v22.16B, v27.16B \n\t" + "ADD v18.2D, v18.2D, v15.2D \n\t" + "AND v19.16B, v25.16B, v27.16B \n\t" + "ADD v21.2D, v21.2D, v18.2D \n\t" + "USRA v24.2D, v23.2D, #26 \n\t" + "AND v17.16B, v23.16B, v27.16B \n\t" + "USRA v16.2D, v21.2D, #26 \n\t" + "AND v15.16B, v21.16B, v27.16B \n\t" + "USRA v19.2D, v24.2D, #26 \n\t" + "AND v18.16B, v24.16B, v27.16B \n\t" + /* Copy values to lower halves of result registers */ + "MOV v15.S[1], v15.S[2] \n\t" + "MOV v16.S[1], v16.S[2] \n\t" + "MOV v17.S[1], v17.S[2] \n\t" + "MOV v18.S[1], v18.S[2] \n\t" + "MOV v19.S[1], v19.S[2] \n\t" + /* If less than 2 blocks left go straight to final multiplication. */ + "CMP %[bytes], %[POLY1305_BLOCK_SIZE]*2 \n\t" + "BLO L_poly1305_64_last_mult_%= \n\t" + /* Else go to one loop of L_poly1305_64_loop_64 */ + "B L_poly1305_64_loop_64_%= \n\t" + "\n" + ".align 2 \n\t" + "L_poly1305_64_start_block_size_64_%=: \n\t" + /* Load r^2 to NEON v0, v1, v2, v3, v4 */ + "LD4R { v0.2S-v3.2S }, [%[r_2]], #16 \n\t" + "LD1R { v4.2S }, [%[r_2]] \n\t" + "SUB %[r_2], %[r_2], #16 \n\t" + /* Store r^2 * 5 */ + "MUL v5.4S, v0.4S, v28.S[0] \n\t" + "MUL v6.4S, v1.4S, v28.S[0] \n\t" + "MUL v7.4S, v2.4S, v28.S[0] \n\t" + "MUL v8.4S, v3.4S, v28.S[0] \n\t" + "MUL v9.4S, v4.4S, v28.S[0] \n\t" + /* Copy r^2 to ARM */ + "MOV w25, v0.S[0] \n\t" + "MOV w26, v1.S[0] \n\t" + "MOV w27, v2.S[0] \n\t" + "MOV w28, v3.S[0] \n\t" + "MOV w30, v4.S[0] \n\t" + /* Copy 5*r^2 to ARM */ + "MOV w15, v5.S[0] \n\t" + "MOV w16, v6.S[0] \n\t" + "MOV w17, v7.S[0] \n\t" + "MOV w8, v8.S[0] \n\t" + "MOV w19, v9.S[0] \n\t" + /* Load m */ + /* Load two message blocks to NEON v10, v11, v12, v13, v14 */ + "LD2 { v10.2D-v11.2D }, [%[m]], #32 \n\t" + "SUB %[bytes], %[bytes], %[POLY1305_BLOCK_SIZE]*2 \n\t" + "USHR v14.2D, v11.2D, #40 \n\t" + "ORR v14.16B, v14.16B, v26.16B \n\t" + "USHR v13.2D, v11.2D, #14 \n\t" + "AND v13.16B, v13.16B, v27.16B \n\t" + "SHL v12.2D, v11.2D, #12 \n\t" + "SRI v12.2D, v10.2D, #52 \n\t" + "AND v12.16B, v12.16B, v27.16B \n\t" + "USHR v11.2D, v10.2D, #26 \n\t" + "AND v11.16B, v11.16B, v27.16B \n\t" + "AND v10.16B, v10.16B, v27.16B \n\t" + "MOV v10.S[1], v10.S[2] \n\t" + "MOV v11.S[1], v11.S[2] \n\t" + "MOV v12.S[1], v12.S[2] \n\t" + "MOV v13.S[1], v13.S[2] \n\t" + "MOV v14.S[1], v14.S[2] \n\t" + /* Two message blocks loaded */ + /* Add messages to accumulator */ + "ADD v15.2S, v15.2S, v10.2S \n\t" + "ADD v16.2S, v16.2S, v11.2S \n\t" + "ADD v17.2S, v17.2S, v12.2S \n\t" + "ADD v18.2S, v18.2S, v13.2S \n\t" + "ADD v19.2S, v19.2S, v14.2S \n\t" + "\n" + ".align 2 \n\t" + "L_poly1305_64_loop_64_%=: \n\t" + /* d0 = h0*r0 + h1*s4 + h2*s3 + h3*s2 + h4*s1 */ + /* d1 = h0*r1 + h1*r0 + h2*s4 + h3*s3 + h4*s2 */ + /* d2 = h0*r2 + h1*r1 + h2*r0 + h3*s4 + h4*s3 */ + /* d3 = h0*r3 + h1*r2 + h2*r1 + h3*r0 + h4*s4 */ + /* d4 = h0*r4 + h1*r3 + h2*r2 + h3*r1 + h4*r0 */ + "UMULL v21.2D, v15.2S, v0.2S \n\t" + /* Compute h*r^2 */ + /* d0 = h0 * r0 + h1 * s4 + h2 * s3 + h3 * s2 + h4 * s1 */ + /* d1 = h0 * r1 + h1 * r0 + h2 * s4 + h3 * s3 + h4 * s2 */ + /* d2 = h0 * r2 + h1 * r1 + h2 * r0 + h3 * s4 + h4 * s3 */ + /* d3 = h0 * r3 + h1 * r2 + h2 * r1 + h3 * r0 + h4 * s4 */ + /* d4 = h0 * r4 + h1 * r3 + h2 * r2 + h3 * r1 + h4 * r0 */ + "MUL x9, x20, x25 \n\t" + "UMULL v22.2D, v15.2S, v1.2S \n\t" + "MUL x10, x20, x26 \n\t" + "UMULL v23.2D, v15.2S, v2.2S \n\t" + "MUL x11, x20, x27 \n\t" + "UMULL v24.2D, v15.2S, v3.2S \n\t" + "MUL x12, x20, x28 \n\t" + "UMULL v25.2D, v15.2S, v4.2S \n\t" + "MUL x13, x20, x30 \n\t" + "UMLAL v21.2D, v16.2S, v9.2S \n\t" + "MADD x9, x21, x19, x9 \n\t" + "UMLAL v22.2D, v16.2S, v0.2S \n\t" + "MADD x10, x21, x25, x10 \n\t" + "UMLAL v23.2D, v16.2S, v1.2S \n\t" + "MADD x11, x21, x26, x11 \n\t" + "UMLAL v24.2D, v16.2S, v2.2S \n\t" + "MADD x12, x21, x27, x12 \n\t" + "UMLAL v25.2D, v16.2S, v3.2S \n\t" + "MADD x13, x21, x28, x13 \n\t" + "UMLAL v21.2D, v17.2S, v8.2S \n\t" + "MADD x9, x22, x8, x9 \n\t" + "UMLAL v22.2D, v17.2S, v9.2S \n\t" + "MADD x10, x22, x19, x10 \n\t" + "UMLAL v23.2D, v17.2S, v0.2S \n\t" + "MADD x11, x22, x25, x11 \n\t" + "UMLAL v24.2D, v17.2S, v1.2S \n\t" + "MADD x12, x22, x26, x12 \n\t" + "UMLAL v25.2D, v17.2S, v2.2S \n\t" + "MADD x13, x22, x27, x13 \n\t" + "UMLAL v21.2D, v18.2S, v7.2S \n\t" + "MADD x9, x23, x17, x9 \n\t" + "UMLAL v22.2D, v18.2S, v8.2S \n\t" + "MADD x10, x23, x8, x10 \n\t" + "UMLAL v23.2D, v18.2S, v9.2S \n\t" + "MADD x11, x23, x19, x11 \n\t" + "UMLAL v24.2D, v18.2S, v0.2S \n\t" + "MADD x12, x23, x25, x12 \n\t" + "UMLAL v25.2D, v18.2S, v1.2S \n\t" + "MADD x13, x23, x26, x13 \n\t" + "UMLAL v21.2D, v19.2S, v6.2S \n\t" + "MADD x9, x24, x16, x9 \n\t" + "UMLAL v22.2D, v19.2S, v7.2S \n\t" + "MADD x10, x24, x17, x10 \n\t" + "UMLAL v23.2D, v19.2S, v8.2S \n\t" + "MADD x11, x24, x8, x11 \n\t" + "UMLAL v24.2D, v19.2S, v9.2S \n\t" + "MADD x12, x24, x19, x12 \n\t" + "UMLAL v25.2D, v19.2S, v0.2S \n\t" + "MADD x13, x24, x25, x13 \n\t" + /* Load m */ + /* Load two message blocks to NEON v10, v11, v12, v13, v14 */ + "LD2 { v10.2D-v11.2D }, [%[m]], #32 \n\t" + /* Reduce h % P */ + "MOV x14, #5 \n\t" + "SUB %[bytes], %[bytes], %[POLY1305_BLOCK_SIZE]*2 \n\t" + "ADD x10, x10, x9, LSR #26 \n\t" + "USHR v14.2D, v11.2D, #40 \n\t" + "ADD x13, x13, x12, LSR #26 \n\t" + "ORR v14.16B, v14.16B, v26.16B \n\t" + "AND x9, x9, #0x3ffffff \n\t" + "USHR v13.2D, v11.2D, #14 \n\t" + "LSR x20, x13, #26 \n\t" + "AND v13.16B, v13.16B, v27.16B \n\t" + "AND x12, x12, #0x3ffffff \n\t" + "SHL v12.2D, v11.2D, #12 \n\t" + "MADD x9, x20, x14, x9 \n\t" + "SRI v12.2D, v10.2D, #52 \n\t" + "ADD x11, x11, x10, LSR #26 \n\t" + "AND v12.16B, v12.16B, v27.16B \n\t" + "AND x10, x10, #0x3ffffff \n\t" + "USHR v11.2D, v10.2D, #26 \n\t" + "AND x13, x13, #0x3ffffff \n\t" + "AND v11.16B, v11.16B, v27.16B \n\t" + "ADD x12, x12, x11, LSR #26 \n\t" + "AND v10.16B, v10.16B, v27.16B \n\t" + "AND x22, x11, #0x3ffffff \n\t" + /* Two message blocks loaded */ + "ADD v21.2D, v21.2D, v10.2D \n\t" + "ADD x21, x10, x9, LSR #26 \n\t" + "ADD v22.2D, v22.2D, v11.2D \n\t" + "AND x20, x9, #0x3ffffff \n\t" + "ADD v23.2D, v23.2D, v12.2D \n\t" + "ADD x24, x13, x12, LSR #26 \n\t" + "ADD v24.2D, v24.2D, v13.2D \n\t" + "AND x23, x12, #0x3ffffff \n\t" + "ADD v25.2D, v25.2D, v14.2D \n\t" + /* Reduce radix 26 NEON */ + /* Interleave h0 -> h1 -> h2 -> h3 -> h4 */ + /* with h3 -> h4 -> h0 -> h1 */ + "USRA v22.2D, v21.2D, #26 \n\t" + "AND v21.16B, v21.16B, v27.16B \n\t" + "USRA v25.2D, v24.2D, #26 \n\t" + "AND v24.16B, v24.16B, v27.16B \n\t" + "USHR v15.2D, v25.2D, #26 \n\t" + "USRA v23.2D, v22.2D, #26 \n\t" + /* Simulate multiplying by 5 using adding and shifting */ + "SHL v18.2D, v15.2D, #2 \n\t" + "AND v16.16B, v22.16B, v27.16B \n\t" + "ADD v18.2D, v18.2D, v15.2D \n\t" + "AND v19.16B, v25.16B, v27.16B \n\t" + "ADD v21.2D, v21.2D, v18.2D \n\t" + "USRA v24.2D, v23.2D, #26 \n\t" + "AND v17.16B, v23.16B, v27.16B \n\t" + "USRA v16.2D, v21.2D, #26 \n\t" + "AND v15.16B, v21.16B, v27.16B \n\t" + "USRA v19.2D, v24.2D, #26 \n\t" + "AND v18.16B, v24.16B, v27.16B \n\t" + /* Copy values to lower halves of result registers */ + "MOV v15.S[1], v15.S[2] \n\t" + "MOV v16.S[1], v16.S[2] \n\t" + "MOV v17.S[1], v17.S[2] \n\t" + "MOV v18.S[1], v18.S[2] \n\t" + "MOV v19.S[1], v19.S[2] \n\t" + /* If at least two message blocks left then loop_64 */ + "CMP %[bytes], %[POLY1305_BLOCK_SIZE]*2 \n\t" + "BHS L_poly1305_64_loop_64_%= \n\t" + "\n" + ".align 2 \n\t" + "L_poly1305_64_last_mult_%=: \n\t" + /* Load r */ + "LD4 { v0.S-v3.S }[1], [%[r]], #16 \n\t" + /* Compute h*r^2 */ + /* d0 = h0 * r0 + h1 * s4 + h2 * s3 + h3 * s2 + h4 * s1 */ + /* d1 = h0 * r1 + h1 * r0 + h2 * s4 + h3 * s3 + h4 * s2 */ + /* d2 = h0 * r2 + h1 * r1 + h2 * r0 + h3 * s4 + h4 * s3 */ + /* d3 = h0 * r3 + h1 * r2 + h2 * r1 + h3 * r0 + h4 * s4 */ + /* d4 = h0 * r4 + h1 * r3 + h2 * r2 + h3 * r1 + h4 * r0 */ + "MUL x9, x20, x25 \n\t" + "LD1 { v4.S }[1], [%[r]] \n\t" + "MUL x10, x20, x26 \n\t" + "SUB %[r], %[r], #16 \n\t" + "MUL x11, x20, x27 \n\t" + /* Store [r^2, r] * 5 */ + "MUL v5.2S, v0.2S, v28.S[0] \n\t" + "MUL x12, x20, x28 \n\t" + "MUL v6.2S, v1.2S, v28.S[0] \n\t" + "MUL x13, x20, x30 \n\t" + "MUL v7.2S, v2.2S, v28.S[0] \n\t" + "MADD x9, x21, x19, x9 \n\t" + "MUL v8.2S, v3.2S, v28.S[0] \n\t" + "MADD x10, x21, x25, x10 \n\t" + "MUL v9.2S, v4.2S, v28.S[0] \n\t" + "MADD x11, x21, x26, x11 \n\t" + /* Final multiply by [r^2, r] */ + /* d0 = h0*r0 + h1*s4 + h2*s3 + h3*s2 + h4*s1 */ + /* d1 = h0*r1 + h1*r0 + h2*s4 + h3*s3 + h4*s2 */ + /* d2 = h0*r2 + h1*r1 + h2*r0 + h3*s4 + h4*s3 */ + /* d3 = h0*r3 + h1*r2 + h2*r1 + h3*r0 + h4*s4 */ + /* d4 = h0*r4 + h1*r3 + h2*r2 + h3*r1 + h4*r0 */ + "UMULL v21.2D, v15.2S, v0.2S \n\t" + "MADD x12, x21, x27, x12 \n\t" + "UMULL v22.2D, v15.2S, v1.2S \n\t" + "MADD x13, x21, x28, x13 \n\t" + "UMULL v23.2D, v15.2S, v2.2S \n\t" + "MADD x9, x22, x8, x9 \n\t" + "UMULL v24.2D, v15.2S, v3.2S \n\t" + "MADD x10, x22, x19, x10 \n\t" + "UMULL v25.2D, v15.2S, v4.2S \n\t" + "MADD x11, x22, x25, x11 \n\t" + "UMLAL v21.2D, v16.2S, v9.2S \n\t" + "MADD x12, x22, x26, x12 \n\t" + "UMLAL v22.2D, v16.2S, v0.2S \n\t" + "MADD x13, x22, x27, x13 \n\t" + "UMLAL v23.2D, v16.2S, v1.2S \n\t" + "MADD x9, x23, x17, x9 \n\t" + "UMLAL v24.2D, v16.2S, v2.2S \n\t" + "MADD x10, x23, x8, x10 \n\t" + "UMLAL v25.2D, v16.2S, v3.2S \n\t" + "MADD x11, x23, x19, x11 \n\t" + "UMLAL v21.2D, v17.2S, v8.2S \n\t" + "MADD x12, x23, x25, x12 \n\t" + "UMLAL v22.2D, v17.2S, v9.2S \n\t" + "MADD x13, x23, x26, x13 \n\t" + "UMLAL v23.2D, v17.2S, v0.2S \n\t" + "MADD x9, x24, x16, x9 \n\t" + "UMLAL v24.2D, v17.2S, v1.2S \n\t" + "MADD x10, x24, x17, x10 \n\t" + "UMLAL v25.2D, v17.2S, v2.2S \n\t" + "MADD x11, x24, x8, x11 \n\t" + "UMLAL v21.2D, v18.2S, v7.2S \n\t" + "MADD x12, x24, x19, x12 \n\t" + "UMLAL v22.2D, v18.2S, v8.2S \n\t" + "MADD x13, x24, x25, x13 \n\t" + "UMLAL v23.2D, v18.2S, v9.2S \n\t" + /* Reduce h % P */ + "MOV x14, #5 \n\t" + "UMLAL v24.2D, v18.2S, v0.2S \n\t" + "ADD x10, x10, x9, LSR #26 \n\t" + "UMLAL v25.2D, v18.2S, v1.2S \n\t" + "ADD x13, x13, x12, LSR #26 \n\t" + "UMLAL v21.2D, v19.2S, v6.2S \n\t" + "AND x9, x9, #0x3ffffff \n\t" + "UMLAL v22.2D, v19.2S, v7.2S \n\t" + "LSR x20, x13, #26 \n\t" + "UMLAL v23.2D, v19.2S, v8.2S \n\t" + "AND x12, x12, #0x3ffffff \n\t" + "UMLAL v24.2D, v19.2S, v9.2S \n\t" + "MADD x9, x20, x14, x9 \n\t" + "UMLAL v25.2D, v19.2S, v0.2S \n\t" + "ADD x11, x11, x10, LSR #26 \n\t" + /* Add even and odd elements */ + "ADDP d21, v21.2D \n\t" + "AND x10, x10, #0x3ffffff \n\t" + "ADDP d22, v22.2D \n\t" + "AND x13, x13, #0x3ffffff \n\t" + "ADDP d23, v23.2D \n\t" + "ADD x12, x12, x11, LSR #26 \n\t" + "ADDP d24, v24.2D \n\t" + "AND x22, x11, #0x3ffffff \n\t" + "ADDP d25, v25.2D \n\t" + "ADD x21, x10, x9, LSR #26 \n\t" + "AND x20, x9, #0x3ffffff \n\t" + "ADD x24, x13, x12, LSR #26 \n\t" + "AND x23, x12, #0x3ffffff \n\t" + /* Load h to NEON */ + "MOV v5.D[0], x20 \n\t" + "MOV v6.D[0], x21 \n\t" + "MOV v7.D[0], x22 \n\t" + "MOV v8.D[0], x23 \n\t" + "MOV v9.D[0], x24 \n\t" + /* Add ctx->h to current accumulator */ + "ADD v21.2D, v21.2D, v5.2D \n\t" + "ADD v22.2D, v22.2D, v6.2D \n\t" + "ADD v23.2D, v23.2D, v7.2D \n\t" + "ADD v24.2D, v24.2D, v8.2D \n\t" + "ADD v25.2D, v25.2D, v9.2D \n\t" + /* Reduce h (h % P) */ + /* Reduce radix 26 NEON */ + /* Interleave h0 -> h1 -> h2 -> h3 -> h4 */ + /* with h3 -> h4 -> h0 -> h1 */ + "USRA v22.2D, v21.2D, #26 \n\t" + "AND v21.16B, v21.16B, v27.16B \n\t" + "USRA v25.2D, v24.2D, #26 \n\t" + "AND v24.16B, v24.16B, v27.16B \n\t" + "USHR v5.2D, v25.2D, #26 \n\t" + "USRA v23.2D, v22.2D, #26 \n\t" + /* Simulate multiplying by 5 using adding and shifting */ + "SHL v8.2D, v5.2D, #2 \n\t" + "AND v6.16B, v22.16B, v27.16B \n\t" + "ADD v8.2D, v8.2D, v5.2D \n\t" + "AND v9.16B, v25.16B, v27.16B \n\t" + "ADD v21.2D, v21.2D, v8.2D \n\t" + "USRA v24.2D, v23.2D, #26 \n\t" + "AND v7.16B, v23.16B, v27.16B \n\t" + "USRA v6.2D, v21.2D, #26 \n\t" + "AND v5.16B, v21.16B, v27.16B \n\t" + "USRA v9.2D, v24.2D, #26 \n\t" + "AND v8.16B, v24.16B, v27.16B \n\t" + /* Copy values to lower halves of result registers */ + /* Store h */ + "ST4 { v5.S-v8.S }[0], [%[h]], #16 \n\t" + "ST1 { v9.S }[0], [%[h]] \n\t" + "SUB %[h], %[h], #16 \n\t" + "\n" + ".align 2 \n\t" + "L_poly1305_64_done_%=: \n\t" + : [bytes] "+r" (bytes), + [m] "+r" (m), + [ctx] "+m" (ctx) + : [POLY1305_BLOCK_SIZE] "I" (POLY1305_BLOCK_SIZE), + [h] "r" (ctx->h), + [r] "r" (ctx->r), + [r_2] "r" (ctx->r_2), + [r_4] "r" (ctx->r_4), + [finished] "r" ((word64)ctx->finished) + : "memory", "cc", + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", + "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", + "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15", "w16", "w17", + "w19", "w20", "w21", "w22", "w23", "w24", "w25", "w26", "w27", "w28", + "w30", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", + "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", + "x28", "x30" + ); + poly1305_blocks_16(ctx, m, bytes); +} + +void poly1305_block(Poly1305* ctx, const unsigned char *m) +{ + poly1305_blocks_16(ctx, m, POLY1305_BLOCK_SIZE); +} + +#if defined(POLY130564) +static word64 clamp[] = { + 0x0ffffffc0fffffff, + 0x0ffffffc0ffffffc, +}; +#endif /* POLY130564 */ + + +int wc_Poly1305SetKey(Poly1305* ctx, const byte* key, word32 keySz) +{ + if (key == NULL) + return BAD_FUNC_ARG; + +#ifdef CHACHA_AEAD_TEST + word32 k; + printf("Poly key used:\n"); + for (k = 0; k < keySz; k++) { + printf("%02x", key[k]); + if ((k+1) % 8 == 0) + printf("\n"); + } + printf("\n"); +#endif + + if (keySz != 32 || ctx == NULL) + return BAD_FUNC_ARG; + + __asm__ __volatile__ ( + /* Load key material */ + "LDP x8, x9, [%[key]] \n\t" + "LDP x10, x11, [%[key], #16] \n\t" + /* Load clamp */ + "LDP x12, x13, [%[clamp]] \n\t" + /* Apply clamp */ + /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ + "AND x8, x8, x12 \n\t" + "AND x9, x9, x13 \n\t" + "MOV x19, xzr \n\t" + "MOV x20, xzr \n\t" + "MOV x21, xzr \n\t" + "MOV x22, xzr \n\t" + "MOV x23, xzr \n\t" + "BFI x19, x8, #0, #26 \n\t" + "LSR x8, x8, #26 \n\t" + "BFI x20, x8, #0, #26 \n\t" + "LSR x8, x8, #26 \n\t" + "BFI x21, x8, #0, #12 \n\t" + "BFI x21, x9, #12, #14 \n\t" + "LSR x9, x9, #14 \n\t" + "BFI x22, x9, #0, #26 \n\t" + "LSR x9, x9, #26 \n\t" + "BFI x23, x9, #0, #24 \n\t" + /* Compute r^2 */ + /* r*5 */ + "MOV x8, #5 \n\t" + "MUL x24, x20, x8 \n\t" + "MUL x25, x21, x8 \n\t" + "MUL x26, x22, x8 \n\t" + "MUL x27, x23, x8 \n\t" + /* d = r*r */ + /* d0 = h0 * r0 + h1 * s4 + h2 * s3 + h3 * s2 + h4 * s1 */ + /* d1 = h0 * r1 + h1 * r0 + h2 * s4 + h3 * s3 + h4 * s2 */ + /* d2 = h0 * r2 + h1 * r1 + h2 * r0 + h3 * s4 + h4 * s3 */ + /* d3 = h0 * r3 + h1 * r2 + h2 * r1 + h3 * r0 + h4 * s4 */ + /* d4 = h0 * r4 + h1 * r3 + h2 * r2 + h3 * r1 + h4 * r0 */ + "MUL x14, x19, x19 \n\t" + "MUL x15, x19, x20 \n\t" + "MUL x16, x19, x21 \n\t" + "MUL x17, x19, x22 \n\t" + "MUL x7, x19, x23 \n\t" + "MADD x14, x20, x27, x14 \n\t" + "MADD x15, x20, x19, x15 \n\t" + "MADD x16, x20, x20, x16 \n\t" + "MADD x17, x20, x21, x17 \n\t" + "MADD x7, x20, x22, x7 \n\t" + "MADD x14, x21, x26, x14 \n\t" + "MADD x15, x21, x27, x15 \n\t" + "MADD x16, x21, x19, x16 \n\t" + "MADD x17, x21, x20, x17 \n\t" + "MADD x7, x21, x21, x7 \n\t" + "MADD x14, x22, x25, x14 \n\t" + "MADD x15, x22, x26, x15 \n\t" + "MADD x16, x22, x27, x16 \n\t" + "MADD x17, x22, x19, x17 \n\t" + "MADD x7, x22, x20, x7 \n\t" + "MADD x14, x23, x24, x14 \n\t" + "MADD x15, x23, x25, x15 \n\t" + "MADD x16, x23, x26, x16 \n\t" + "MADD x17, x23, x27, x17 \n\t" + "MADD x7, x23, x19, x7 \n\t" + /* r_2 = r^2 % P */ + "ADD x15, x15, x14, LSR #26 \n\t" + "ADD x7, x7, x17, LSR #26 \n\t" + "AND x14, x14, #0x3ffffff \n\t" + "LSR x9, x7, #26 \n\t" + "AND x17, x17, #0x3ffffff \n\t" + "MADD x14, x9, x8, x14 \n\t" + "ADD x16, x16, x15, LSR #26 \n\t" + "AND x15, x15, #0x3ffffff \n\t" + "AND x7, x7, #0x3ffffff \n\t" + "ADD x17, x17, x16, LSR #26 \n\t" + "AND x16, x16, #0x3ffffff \n\t" + "ADD x15, x15, x14, LSR #26 \n\t" + "AND x14, x14, #0x3ffffff \n\t" + "ADD x7, x7, x17, LSR #26 \n\t" + "AND x17, x17, #0x3ffffff \n\t" + /* Store r */ + "ORR x19, x19, x20, LSL #32 \n\t" + "ORR x21, x21, x22, LSL #32 \n\t" + "STP x19, x21, [%[ctx_r]] \n\t" + "STR w23, [%[ctx_r], #16] \n\t" + "MOV x8, #5 \n\t" + "MUL x24, x15, x8 \n\t" + "MUL x25, x16, x8 \n\t" + "MUL x26, x17, x8 \n\t" + "MUL x27, x7, x8 \n\t" + /* Compute r^4 */ + /* d0 = h0 * r0 + h1 * s4 + h2 * s3 + h3 * s2 + h4 * s1 */ + /* d1 = h0 * r1 + h1 * r0 + h2 * s4 + h3 * s3 + h4 * s2 */ + /* d2 = h0 * r2 + h1 * r1 + h2 * r0 + h3 * s4 + h4 * s3 */ + /* d3 = h0 * r3 + h1 * r2 + h2 * r1 + h3 * r0 + h4 * s4 */ + /* d4 = h0 * r4 + h1 * r3 + h2 * r2 + h3 * r1 + h4 * r0 */ + "MUL x19, x14, x14 \n\t" + "MUL x20, x14, x15 \n\t" + "MUL x21, x14, x16 \n\t" + "MUL x22, x14, x17 \n\t" + "MUL x23, x14, x7 \n\t" + "MADD x19, x15, x27, x19 \n\t" + "MADD x20, x15, x14, x20 \n\t" + "MADD x21, x15, x15, x21 \n\t" + "MADD x22, x15, x16, x22 \n\t" + "MADD x23, x15, x17, x23 \n\t" + "MADD x19, x16, x26, x19 \n\t" + "MADD x20, x16, x27, x20 \n\t" + "MADD x21, x16, x14, x21 \n\t" + "MADD x22, x16, x15, x22 \n\t" + "MADD x23, x16, x16, x23 \n\t" + "MADD x19, x17, x25, x19 \n\t" + "MADD x20, x17, x26, x20 \n\t" + "MADD x21, x17, x27, x21 \n\t" + "MADD x22, x17, x14, x22 \n\t" + "MADD x23, x17, x15, x23 \n\t" + "MADD x19, x7, x24, x19 \n\t" + "MADD x20, x7, x25, x20 \n\t" + "MADD x21, x7, x26, x21 \n\t" + "MADD x22, x7, x27, x22 \n\t" + "MADD x23, x7, x14, x23 \n\t" + /* r^4 % P */ + "ADD x20, x20, x19, LSR #26 \n\t" + "ADD x23, x23, x22, LSR #26 \n\t" + "AND x19, x19, #0x3ffffff \n\t" + "LSR x9, x23, #26 \n\t" + "AND x22, x22, #0x3ffffff \n\t" + "MADD x19, x9, x8, x19 \n\t" + "ADD x21, x21, x20, LSR #26 \n\t" + "AND x20, x20, #0x3ffffff \n\t" + "AND x23, x23, #0x3ffffff \n\t" + "ADD x22, x22, x21, LSR #26 \n\t" + "AND x21, x21, #0x3ffffff \n\t" + "ADD x20, x20, x19, LSR #26 \n\t" + "AND x19, x19, #0x3ffffff \n\t" + "ADD x23, x23, x22, LSR #26 \n\t" + "AND x22, x22, #0x3ffffff \n\t" + /* Store r^2 */ + "ORR x14, x14, x15, LSL #32 \n\t" + "ORR x16, x16, x17, LSL #32 \n\t" + "STP x14, x16, [%[ctx_r_2]] \n\t" + "STR w7, [%[ctx_r_2], #16] \n\t" + /* Store r^4 */ + "ORR x19, x19, x20, LSL #32 \n\t" + "ORR x21, x21, x22, LSL #32 \n\t" + "STP x19, x21, [%[ctx_r_4]] \n\t" + "STR w23, [%[ctx_r_4], #16] \n\t" + /* h (accumulator) = 0 */ + "STP xzr, xzr, [%[ctx_h_0]] \n\t" + "STR wzr, [%[ctx_h_0], #16] \n\t" + /* Save pad for later */ + "STP x10, x11, [%[ctx_pad]] \n\t" + /* Zero leftover */ + "STR xzr, [%[ctx_leftover]] \n\t" + /* Zero finished */ + "STRB wzr, [%[ctx_finished]] \n\t" + : + : [clamp] "r" (clamp), + [key] "r" (key), + [ctx_r] "r" (ctx->r), + [ctx_r_2] "r" (ctx->r_2), + [ctx_r_4] "r" (ctx->r_4), + [ctx_h_0] "r" (ctx->h), + [ctx_pad] "r" (ctx->pad), + [ctx_leftover] "r" (&ctx->leftover), + [ctx_finished] "r" (&ctx->finished) + : "memory", "cc", + "w7", "w14", "w15", "w16", "w17", "w19", "w20", "w21", "w22", "w23", + "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", + "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27" + ); + + return 0; +} + + +int wc_Poly1305Final(Poly1305* ctx, byte* mac) +{ + + if (ctx == NULL) + return BAD_FUNC_ARG; + + /* process the remaining block */ + if (ctx->leftover) { + size_t i = ctx->leftover; + ctx->buffer[i++] = 1; + for (; i < POLY1305_BLOCK_SIZE; i++) + ctx->buffer[i] = 0; + ctx->finished = 1; + poly1305_block(ctx, ctx->buffer); + } + + __asm__ __volatile__ ( + /* Load raw h and zero h registers */ + "LDP x2, x3, %[h_addr] \n\t" + "MOV x5, xzr \n\t" + "LDR w4, %[h_4_addr] \n\t" + "MOV x6, xzr \n\t" + "LDP x16, x17, %[pad_addr] \n\t" + /* Base 26 -> Base 64 */ + "MOV w5, w2 \n\t" + "LSR x2, x2, #32 \n\t" + "ORR x5, x5, x2, LSL #26 \n\t" + "ORR x5, x5, x3, LSL #52 \n\t" + "LSR w6, w3, #12 \n\t" + "LSR x3, x3, #32 \n\t" + "ORR x6, x6, x3, LSL #14 \n\t" + "ORR x6, x6, x4, LSL #40 \n\t" + "LSR x7, x4, #24 \n\t" + /* Check if h is larger than p */ + "ADDS x2, x5, #5 \n\t" + "ADCS x3, x6, xzr \n\t" + "ADC x4, x7, xzr \n\t" + /* Check if h+5 is larger than 2^130 */ + "CMP x4, #3 \n\t" + "CSEL x5, x2, x5, HI \n\t" + "CSEL x6, x3, x6, HI \n\t" + "ADDS x5, x5, x16 \n\t" + "ADC x6, x6, x17 \n\t" + "STP x5, x6, [%[mac]] \n\t" + : [mac] "+r" (mac) + : [pad_addr] "m" (ctx->pad), + [h_addr] "m" (ctx->h), + [h_4_addr] "m" (ctx->h[4]) + : "memory", "cc", + "w2", "w3", "w4", "w5", "w6", "w7", "x2", "x3", "x4", "x5", + "x6", "x7", "x16", "x17" + ); + + /* zero out the state */ + ctx->h[0] = 0; + ctx->h[1] = 0; + ctx->h[2] = 0; + ctx->h[3] = 0; + ctx->h[4] = 0; + ctx->r[0] = 0; + ctx->r[1] = 0; + ctx->r[2] = 0; + ctx->r[3] = 0; + ctx->r[4] = 0; + ctx->r_2[0] = 0; + ctx->r_2[1] = 0; + ctx->r_2[2] = 0; + ctx->r_2[3] = 0; + ctx->r_2[4] = 0; + ctx->r_4[0] = 0; + ctx->r_4[1] = 0; + ctx->r_4[2] = 0; + ctx->r_4[3] = 0; + ctx->r_4[4] = 0; + ctx->pad[0] = 0; + ctx->pad[1] = 0; + ctx->pad[2] = 0; + ctx->pad[3] = 0; + + return 0; +} + +#endif /* HAVE_POLY1305 */ +#endif /* WOLFSSL_ARMASM */ +#endif /* __aarch64__ */ diff --git a/client/wolfssl/wolfcrypt/src/port/arm/armv8-sha256.c b/client/wolfssl/wolfcrypt/src/port/arm/armv8-sha256.c new file mode 100644 index 0000000..7f214d4 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/arm/armv8-sha256.c @@ -0,0 +1,1508 @@ +/* armv8-sha256.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef WOLFSSL_ARMASM +#if !defined(NO_SHA256) || defined(WOLFSSL_SHA224) + +#include +#include +#include + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + + +static const ALIGN32 word32 K[64] = { + 0x428A2F98L, 0x71374491L, 0xB5C0FBCFL, 0xE9B5DBA5L, 0x3956C25BL, + 0x59F111F1L, 0x923F82A4L, 0xAB1C5ED5L, 0xD807AA98L, 0x12835B01L, + 0x243185BEL, 0x550C7DC3L, 0x72BE5D74L, 0x80DEB1FEL, 0x9BDC06A7L, + 0xC19BF174L, 0xE49B69C1L, 0xEFBE4786L, 0x0FC19DC6L, 0x240CA1CCL, + 0x2DE92C6FL, 0x4A7484AAL, 0x5CB0A9DCL, 0x76F988DAL, 0x983E5152L, + 0xA831C66DL, 0xB00327C8L, 0xBF597FC7L, 0xC6E00BF3L, 0xD5A79147L, + 0x06CA6351L, 0x14292967L, 0x27B70A85L, 0x2E1B2138L, 0x4D2C6DFCL, + 0x53380D13L, 0x650A7354L, 0x766A0ABBL, 0x81C2C92EL, 0x92722C85L, + 0xA2BFE8A1L, 0xA81A664BL, 0xC24B8B70L, 0xC76C51A3L, 0xD192E819L, + 0xD6990624L, 0xF40E3585L, 0x106AA070L, 0x19A4C116L, 0x1E376C08L, + 0x2748774CL, 0x34B0BCB5L, 0x391C0CB3L, 0x4ED8AA4AL, 0x5B9CCA4FL, + 0x682E6FF3L, 0x748F82EEL, 0x78A5636FL, 0x84C87814L, 0x8CC70208L, + 0x90BEFFFAL, 0xA4506CEBL, 0xBEF9A3F7L, 0xC67178F2L +}; + + +static int InitSha256(wc_Sha256* sha256) +{ + int ret = 0; + + if (sha256 == NULL) { + return BAD_FUNC_ARG; + } + + sha256->digest[0] = 0x6A09E667L; + sha256->digest[1] = 0xBB67AE85L; + sha256->digest[2] = 0x3C6EF372L; + sha256->digest[3] = 0xA54FF53AL; + sha256->digest[4] = 0x510E527FL; + sha256->digest[5] = 0x9B05688CL; + sha256->digest[6] = 0x1F83D9ABL; + sha256->digest[7] = 0x5BE0CD19L; + + sha256->buffLen = 0; + sha256->loLen = 0; + sha256->hiLen = 0; + + return ret; +} + +static WC_INLINE void AddLength(wc_Sha256* sha256, word32 len) +{ + word32 tmp = sha256->loLen; + if ((sha256->loLen += len) < tmp) + sha256->hiLen++; /* carry low to high */ +} + + +#ifdef __aarch64__ + +/* ARMv8 hardware acceleration */ +static WC_INLINE int Sha256Update(wc_Sha256* sha256, const byte* data, word32 len) +{ + word32 add; + word32 numBlocks; + + /* only perform actions if a buffer is passed in */ + if (len > 0) { + /* fill leftover buffer with data */ + add = min(len, WC_SHA256_BLOCK_SIZE - sha256->buffLen); + XMEMCPY((byte*)(sha256->buffer) + sha256->buffLen, data, add); + sha256->buffLen += add; + data += add; + len -= add; + + /* number of blocks in a row to complete */ + numBlocks = (len + sha256->buffLen)/WC_SHA256_BLOCK_SIZE; + + if (numBlocks > 0) { + word32* k = (word32*)K; + + /* get leftover amount after blocks */ + add = (len + sha256->buffLen) - numBlocks * WC_SHA256_BLOCK_SIZE; + __asm__ volatile ( + "#load leftover data\n" + "LD1 {v0.2d-v3.2d}, %[buffer] \n" + + "#load current digest\n" + "LD1 {v12.2d-v13.2d}, %[digest] \n" + "MOV w8, %w[blocks] \n" + "REV32 v0.16b, v0.16b \n" + "REV32 v1.16b, v1.16b \n" + "REV32 v2.16b, v2.16b \n" + "REV32 v3.16b, v3.16b \n" + + "#load K values in \n" + "LD1 {v16.4s-v19.4s}, [%[k]], #64 \n" + "LD1 {v20.4s-v23.4s}, [%[k]], #64 \n" + "MOV v14.16b, v12.16b \n" /* store digest for add at the end */ + "MOV v15.16b, v13.16b \n" + "LD1 {v24.4s-v27.4s}, [%[k]], #64 \n" + "LD1 {v28.4s-v31.4s}, [%[k]], #64 \n" + + /* beginning of SHA256 block operation */ + "1:\n" + /* Round 1 */ + "MOV v4.16b, v0.16b \n" + "ADD v0.4s, v0.4s, v16.4s \n" + "MOV v11.16b, v12.16b \n" + "SHA256H q12, q13, v0.4s \n" + "SHA256H2 q13, q11, v0.4s \n" + + /* Round 2 */ + "SHA256SU0 v4.4s, v1.4s \n" + "ADD v0.4s, v1.4s, v17.4s \n" + "MOV v11.16b, v12.16b \n" + "SHA256SU1 v4.4s, v2.4s, v3.4s \n" + "SHA256H q12, q13, v0.4s \n" + "SHA256H2 q13, q11, v0.4s \n" + + /* Round 3 */ + "SHA256SU0 v1.4s, v2.4s \n" + "ADD v0.4s, v2.4s, v18.4s \n" + "MOV v11.16b, v12.16b \n" + "SHA256SU1 v1.4s, v3.4s, v4.4s \n" + "SHA256H q12, q13, v0.4s \n" + "SHA256H2 q13, q11, v0.4s \n" + + /* Round 4 */ + "SHA256SU0 v2.4s, v3.4s \n" + "ADD v0.4s, v3.4s, v19.4s \n" + "MOV v11.16b, v12.16b \n" + "SHA256SU1 v2.4s, v4.4s, v1.4s \n" + "SHA256H q12, q13, v0.4s \n" + "SHA256H2 q13, q11, v0.4s \n" + + /* Round 5 */ + "SHA256SU0 v3.4s, v4.4s \n" + "ADD v0.4s, v4.4s, v20.4s \n" + "MOV v11.16b, v12.16b \n" + "SHA256SU1 v3.4s, v1.4s, v2.4s \n" + "SHA256H q12, q13, v0.4s \n" + "SHA256H2 q13, q11, v0.4s \n" + + /* Round 6 */ + "SHA256SU0 v4.4s, v1.4s \n" + "ADD v0.4s, v1.4s, v21.4s \n" + "MOV v11.16b, v12.16b \n" + "SHA256SU1 v4.4s, v2.4s, v3.4s \n" + "SHA256H q12, q13, v0.4s \n" + "SHA256H2 q13, q11, v0.4s \n" + + /* Round 7 */ + "SHA256SU0 v1.4s, v2.4s \n" + "ADD v0.4s, v2.4s, v22.4s \n" + "MOV v11.16b, v12.16b \n" + "SHA256SU1 v1.4s, v3.4s, v4.4s \n" + "SHA256H q12, q13, v0.4s \n" + "SHA256H2 q13, q11, v0.4s \n" + + /* Round 8 */ + "SHA256SU0 v2.4s, v3.4s \n" + "ADD v0.4s, v3.4s, v23.4s \n" + "MOV v11.16b, v12.16b \n" + "SHA256SU1 v2.4s, v4.4s, v1.4s \n" + "SHA256H q12, q13, v0.4s \n" + "SHA256H2 q13, q11, v0.4s \n" + + /* Round 9 */ + "SHA256SU0 v3.4s, v4.4s \n" + "ADD v0.4s, v4.4s, v24.4s \n" + "MOV v11.16b, v12.16b \n" + "SHA256SU1 v3.4s, v1.4s, v2.4s \n" + "SHA256H q12, q13, v0.4s \n" + "SHA256H2 q13, q11, v0.4s \n" + + /* Round 10 */ + "SHA256SU0 v4.4s, v1.4s \n" + "ADD v0.4s, v1.4s, v25.4s \n" + "MOV v11.16b, v12.16b \n" + "SHA256SU1 v4.4s, v2.4s, v3.4s \n" + "SHA256H q12, q13, v0.4s \n" + "SHA256H2 q13, q11, v0.4s \n" + + /* Round 11 */ + "SHA256SU0 v1.4s, v2.4s \n" + "ADD v0.4s, v2.4s, v26.4s \n" + "MOV v11.16b, v12.16b \n" + "SHA256SU1 v1.4s, v3.4s, v4.4s \n" + "SHA256H q12, q13, v0.4s \n" + "SHA256H2 q13, q11, v0.4s \n" + + /* Round 12 */ + "SHA256SU0 v2.4s, v3.4s \n" + "ADD v0.4s, v3.4s, v27.4s \n" + "MOV v11.16b, v12.16b \n" + "SHA256SU1 v2.4s, v4.4s, v1.4s \n" + "SHA256H q12, q13, v0.4s \n" + "SHA256H2 q13, q11, v0.4s \n" + + /* Round 13 */ + "SHA256SU0 v3.4s, v4.4s \n" + "ADD v0.4s, v4.4s, v28.4s \n" + "MOV v11.16b, v12.16b \n" + "SHA256SU1 v3.4s, v1.4s, v2.4s \n" + "SHA256H q12, q13, v0.4s \n" + "SHA256H2 q13, q11, v0.4s \n" + + /* Round 14 */ + "ADD v0.4s, v1.4s, v29.4s \n" + "MOV v11.16b, v12.16b \n" + "SHA256H q12, q13, v0.4s \n" + "SHA256H2 q13, q11, v0.4s \n" + + /* Round 15 */ + "ADD v0.4s, v2.4s, v30.4s \n" + "MOV v11.16b, v12.16b \n" + "SHA256H q12, q13, v0.4s \n" + "SHA256H2 q13, q11, v0.4s \n" + + /* Round 16 */ + "ADD v0.4s, v3.4s, v31.4s \n" + "MOV v11.16b, v12.16b \n" + "SHA256H q12, q13, v0.4s \n" + "SHA256H2 q13, q11, v0.4s \n" + + "#Add working vars back into digest state \n" + "SUB w8, w8, #1 \n" + "ADD v12.4s, v12.4s, v14.4s \n" + "ADD v13.4s, v13.4s, v15.4s \n" + + "#check if more blocks should be done\n" + "CBZ w8, 2f \n" + + "#load in message and schedule updates \n" + "LD1 {v0.2d-v3.2d}, [%[dataIn]], #64 \n" + "MOV v14.16b, v12.16b \n" + "MOV v15.16b, v13.16b \n" + "REV32 v0.16b, v0.16b \n" + "REV32 v1.16b, v1.16b \n" + "REV32 v2.16b, v2.16b \n" + "REV32 v3.16b, v3.16b \n" + "B 1b \n" /* do another block */ + + "2:\n" + "STP q12, q13, %[out] \n" + + : [out] "=m" (sha256->digest), "=m" (sha256->buffer), "=r" (numBlocks), + "=r" (data), "=r" (k) + : [k] "4" (k), [digest] "m" (sha256->digest), [buffer] "m" (sha256->buffer), + [blocks] "2" (numBlocks), [dataIn] "3" (data) + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14", + "v15", "v16", "v17", "v18", "v19", "v20", "v21", + "v22", "v23", "v24", "v25", "v26", "v27", "v28", + "v29", "v30", "v31", "w8" + ); + + AddLength(sha256, WC_SHA256_BLOCK_SIZE * numBlocks); + + /* copy over any remaining data leftover */ + XMEMCPY(sha256->buffer, data, add); + sha256->buffLen = add; + } + } + + /* account for possibility of not used if len = 0 */ + (void)add; + (void)numBlocks; + + return 0; +} + + +static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash) +{ + byte* local; + + local = (byte*)sha256->buffer; + AddLength(sha256, sha256->buffLen); /* before adding pads */ + + local[sha256->buffLen++] = 0x80; /* add 1 */ + + /* pad with zeros */ + if (sha256->buffLen > WC_SHA256_PAD_SIZE) { + + XMEMSET(&local[sha256->buffLen], 0, WC_SHA256_BLOCK_SIZE - sha256->buffLen); + sha256->buffLen += WC_SHA256_BLOCK_SIZE - sha256->buffLen; + __asm__ volatile ( + "LD1 {v4.2d-v7.2d}, %[buffer] \n" + "MOV v0.16b, v4.16b \n" + "MOV v1.16b, v5.16b \n" + "REV32 v0.16b, v0.16b \n" + "REV32 v1.16b, v1.16b \n" + "MOV v2.16b, v6.16b \n" + "MOV v3.16b, v7.16b \n" + "REV32 v2.16b, v2.16b \n" + "REV32 v3.16b, v3.16b \n" + "MOV v4.16b, v0.16b \n" + "MOV v5.16b, v1.16b \n" + "LD1 {v20.2d-v21.2d}, %[digest] \n" + + "#SHA256 operation on updated message \n" + "MOV v16.16b, v20.16b \n" + "MOV v17.16b, v21.16b \n" + + "LD1 {v22.16b-v25.16b}, [%[k]], #64 \n" + "SHA256SU0 v4.4s, v1.4s \n" + "ADD v0.4s, v0.4s, v22.4s \n" + "MOV v6.16b, v2.16b \n" + "MOV v18.16b, v16.16b \n" + "SHA256SU1 v4.4s, v2.4s, v3.4s \n" + "SHA256H q16, q17, v0.4s \n" + "SHA256H2 q17, q18, v0.4s \n" + + "SHA256SU0 v5.4s, v2.4s \n" + "ADD v1.4s, v1.4s, v23.4s \n" + "MOV v18.16b, v16.16b \n" + "MOV v7.16b, v3.16b \n" + "SHA256SU1 v5.4s, v3.4s, v4.4s \n" + "SHA256H q16, q17, v1.4s \n" + "SHA256H2 q17, q18, v1.4s \n" + + "SHA256SU0 v6.4s, v3.4s \n" + "ADD v2.4s, v2.4s, v24.4s \n" + "MOV v18.16b, v16.16b \n" + "MOV v8.16b, v4.16b \n" + "SHA256SU1 v6.4s, v4.4s, v5.4s \n" + "SHA256H q16, q17, v2.4s \n" + "SHA256H2 q17, q18, v2.4s \n" + + "SHA256SU0 v7.4s, v4.4s \n" + "ADD v3.4s, v3.4s, v25.4s \n" + "MOV v18.16b, v16.16b \n" + "MOV v9.16b, v5.16b \n" + "SHA256SU1 v7.4s, v5.4s, v6.4s \n" + "SHA256H q16, q17, v3.4s \n" + "SHA256H2 q17, q18, v3.4s \n" + + "LD1 {v22.16b-v25.16b}, [%[k]], #64 \n" + "SHA256SU0 v8.4s, v5.4s \n" + "ADD v4.4s, v4.4s, v22.4s \n" + "MOV v18.16b, v16.16b \n" + "MOV v10.16b, v6.16b \n" + "SHA256SU1 v8.4s, v6.4s, v7.4s \n" + "SHA256H q16, q17, v4.4s \n" + "SHA256H2 q17, q18, v4.4s \n" + + "SHA256SU0 v9.4s, v6.4s \n" + "ADD v5.4s, v5.4s, v23.4s \n" + "MOV v18.16b, v16.16b \n" + "MOV v11.16b, v7.16b \n" + "SHA256SU1 v9.4s, v7.4s, v8.4s \n" + "SHA256H q16, q17, v5.4s \n" + "SHA256H2 q17, q18, v5.4s \n" + + "SHA256SU0 v10.4s, v7.4s \n" + "ADD v6.4s, v6.4s, v24.4s \n" + "MOV v18.16b, v16.16b \n" + "MOV v12.16b, v8.16b \n" + "SHA256SU1 v10.4s, v8.4s, v9.4s \n" + "SHA256H q16, q17, v6.4s \n" + "SHA256H2 q17, q18, v6.4s \n" + + "SHA256SU0 v11.4s, v8.4s \n" + "ADD v7.4s, v7.4s, v25.4s \n" + "MOV v18.16b, v16.16b \n" + "MOV v13.16b, v9.16b \n" + "SHA256SU1 v11.4s, v9.4s, v10.4s \n" + "SHA256H q16, q17, v7.4s \n" + "SHA256H2 q17, q18, v7.4s \n" + + "LD1 {v22.16b-v25.16b}, [%[k]], #64 \n" + "SHA256SU0 v12.4s, v9.4s \n" + "ADD v8.4s, v8.4s, v22.4s \n" + "MOV v18.16b, v16.16b \n" + "MOV v14.16b, v10.16b \n" + "SHA256SU1 v12.4s, v10.4s, v11.4s \n" + "SHA256H q16, q17, v8.4s \n" + "SHA256H2 q17, q18, v8.4s \n" + + "SHA256SU0 v13.4s, v10.4s \n" + "ADD v9.4s, v9.4s, v23.4s \n" + "MOV v18.16b, v16.16b \n" + "MOV v15.16b, v11.16b \n" + "SHA256SU1 v13.4s, v11.4s, v12.4s \n" + "SHA256H q16, q17, v9.4s \n" + "SHA256H2 q17, q18, v9.4s \n" + + "SHA256SU0 v14.4s, v11.4s \n" + "ADD v10.4s, v10.4s, v24.4s \n" + "MOV v18.16b, v16.16b \n" + "SHA256SU1 v14.4s, v12.4s, v13.4s \n" + "SHA256H q16, q17, v10.4s \n" + "SHA256H2 q17, q18, v10.4s \n" + + "SHA256SU0 v15.4s, v12.4s \n" + "ADD v11.4s, v11.4s, v25.4s \n" + "MOV v18.16b, v16.16b \n" + "SHA256SU1 v15.4s, v13.4s, v14.4s \n" + "SHA256H q16, q17, v11.4s \n" + "SHA256H2 q17, q18, v11.4s \n" + + "LD1 {v22.16b-v25.16b}, [%[k]] \n" + "ADD v12.4s, v12.4s, v22.4s \n" + "MOV v18.16b, v16.16b \n" + "SHA256H q16, q17, v12.4s \n" + "SHA256H2 q17, q18, v12.4s \n" + + "ADD v13.4s, v13.4s, v23.4s \n" + "MOV v18.16b, v16.16b \n" + "SHA256H q16, q17, v13.4s \n" + "SHA256H2 q17, q18, v13.4s \n" + + "ADD v14.4s, v14.4s, v24.4s \n" + "MOV v18.16b, v16.16b \n" + "SHA256H q16, q17, v14.4s \n" + "SHA256H2 q17, q18, v14.4s \n" + + "ADD v15.4s, v15.4s, v25.4s \n" + "MOV v18.16b, v16.16b \n" + "SHA256H q16, q17, v15.4s \n" + "SHA256H2 q17, q18, v15.4s \n" + + "#Add working vars back into digest state \n" + "ADD v16.4s, v16.4s, v20.4s \n" + "ADD v17.4s, v17.4s, v21.4s \n" + "STP q16, q17, %[out] \n" + + : [out] "=m" (sha256->digest) + : [k] "r" (K), [digest] "m" (sha256->digest), + [buffer] "m" (sha256->buffer) + : "cc", "memory", "v0", "v1", "v2", "v3", "v8", "v9", "v10", "v11" + , "v12", "v13", "v14", "v15", "v16", "v17", "v18" + , "v19", "v20", "v21", "v22", "v23", "v24", "v25" + ); + + sha256->buffLen = 0; + } + XMEMSET(&local[sha256->buffLen], 0, WC_SHA256_PAD_SIZE - sha256->buffLen); + + /* put lengths in bits */ + sha256->hiLen = (sha256->loLen >> (8*sizeof(sha256->loLen) - 3)) + + (sha256->hiLen << 3); + sha256->loLen = sha256->loLen << 3; + + /* store lengths */ + #if defined(LITTLE_ENDIAN_ORDER) + __asm__ volatile ( + "LD1 {v0.2d-v3.2d}, %[in] \n" + "REV32 v0.16b, v0.16b \n" + "REV32 v1.16b, v1.16b \n" + "REV32 v2.16b, v2.16b \n" + "REV32 v3.16b, v3.16b \n" + "ST1 {v0.2d-v3.2d}, %[out] \n" + : [out] "=m" (sha256->buffer) + : [in] "m" (sha256->buffer) + : "cc", "memory", "v0", "v1", "v2", "v3" + ); + #endif + /* ! length ordering dependent on digest endian type ! */ + XMEMCPY(&local[WC_SHA256_PAD_SIZE], &sha256->hiLen, sizeof(word32)); + XMEMCPY(&local[WC_SHA256_PAD_SIZE + sizeof(word32)], &sha256->loLen, + sizeof(word32)); + + __asm__ volatile ( + "#load in message and schedule updates \n" + "LD1 {v4.2d-v7.2d}, %[buffer] \n" + "MOV v0.16b, v4.16b \n" + "MOV v1.16b, v5.16b \n" + "MOV v2.16b, v6.16b \n" + "MOV v3.16b, v7.16b \n" + "LD1 {v20.2d-v21.2d}, %[digest] \n" + + "MOV v16.16b, v20.16b \n" + "MOV v17.16b, v21.16b \n" + "LD1 {v22.16b-v25.16b}, [%[k]], #64 \n" + "SHA256SU0 v4.4s, v1.4s \n" + "ADD v0.4s, v0.4s, v22.4s \n" + "MOV v6.16b, v2.16b \n" + "MOV v18.16b, v16.16b \n" + "SHA256SU1 v4.4s, v2.4s, v3.4s \n" + "SHA256H q16, q17, v0.4s \n" + "SHA256H2 q17, q18, v0.4s \n" + + "SHA256SU0 v5.4s, v2.4s \n" + "ADD v1.4s, v1.4s, v23.4s \n" + "MOV v7.16b, v3.16b \n" + "MOV v18.16b, v16.16b \n" + "SHA256SU1 v5.4s, v3.4s, v4.4s \n" + "SHA256H q16, q17, v1.4s \n" + "SHA256H2 q17, q18, v1.4s \n" + + "SHA256SU0 v6.4s, v3.4s \n" + "ADD v2.4s, v2.4s, v24.4s \n" + "MOV v18.16b, v16.16b \n" + "MOV v8.16b, v4.16b \n" + "SHA256SU1 v6.4s, v4.4s, v5.4s \n" + "SHA256H q16, q17, v2.4s \n" + "SHA256H2 q17, q18, v2.4s \n" + + "SHA256SU0 v7.4s, v4.4s \n" + "ADD v3.4s, v3.4s, v25.4s \n" + "MOV v18.16b, v16.16b \n" + "MOV v9.16b, v5.16b \n" + "SHA256SU1 v7.4s, v5.4s, v6.4s \n" + "SHA256H q16, q17, v3.4s \n" + "SHA256H2 q17, q18, v3.4s \n" + + "LD1 {v22.16b-v25.16b}, [%[k]], #64 \n" + "SHA256SU0 v8.4s, v5.4s \n" + "ADD v4.4s, v4.4s, v22.4s \n" + "MOV v18.16b, v16.16b \n" + "MOV v10.16b, v6.16b \n" + "SHA256SU1 v8.4s, v6.4s, v7.4s \n" + "SHA256H q16, q17, v4.4s \n" + "SHA256H2 q17, q18, v4.4s \n" + + "SHA256SU0 v9.4s, v6.4s \n" + "ADD v5.4s, v5.4s, v23.4s \n" + "MOV v18.16b, v16.16b \n" + "MOV v11.16b, v7.16b \n" + "SHA256SU1 v9.4s, v7.4s, v8.4s \n" + "SHA256H q16, q17, v5.4s \n" + "SHA256H2 q17, q18, v5.4s \n" + + "SHA256SU0 v10.4s, v7.4s \n" + "ADD v6.4s, v6.4s, v24.4s \n" + "MOV v18.16b, v16.16b \n" + "MOV v12.16b, v8.16b \n" + "SHA256SU1 v10.4s, v8.4s, v9.4s \n" + "SHA256H q16, q17, v6.4s \n" + "SHA256H2 q17, q18, v6.4s \n" + + "SHA256SU0 v11.4s, v8.4s \n" + "ADD v7.4s, v7.4s, v25.4s \n" + "MOV v18.16b, v16.16b \n" + "MOV v13.16b, v9.16b \n" + "SHA256SU1 v11.4s, v9.4s, v10.4s \n" + "SHA256H q16, q17, v7.4s \n" + "SHA256H2 q17, q18, v7.4s \n" + + "LD1 {v22.16b-v25.16b}, [%[k]], #64 \n" + "SHA256SU0 v12.4s, v9.4s \n" + "ADD v8.4s, v8.4s, v22.4s \n" + "MOV v18.16b, v16.16b \n" + "MOV v14.16b, v10.16b \n" + "SHA256SU1 v12.4s, v10.4s, v11.4s \n" + "SHA256H q16, q17, v8.4s \n" + "SHA256H2 q17, q18, v8.4s \n" + + "SHA256SU0 v13.4s, v10.4s \n" + "ADD v9.4s, v9.4s, v23.4s \n" + "MOV v18.16b, v16.16b \n" + "MOV v15.16b, v11.16b \n" + "SHA256SU1 v13.4s, v11.4s, v12.4s \n" + "SHA256H q16, q17, v9.4s \n" + "SHA256H2 q17, q18, v9.4s \n" + + "SHA256SU0 v14.4s, v11.4s \n" + "ADD v10.4s, v10.4s, v24.4s \n" + "MOV v18.16b, v16.16b \n" + "SHA256SU1 v14.4s, v12.4s, v13.4s \n" + "SHA256H q16, q17, v10.4s \n" + "SHA256H2 q17, q18, v10.4s \n" + + "SHA256SU0 v15.4s, v12.4s \n" + "ADD v11.4s, v11.4s, v25.4s \n" + "MOV v18.16b, v16.16b \n" + "SHA256SU1 v15.4s, v13.4s, v14.4s \n" + "SHA256H q16, q17, v11.4s \n" + "SHA256H2 q17, q18, v11.4s \n" + + "LD1 {v22.16b-v25.16b}, [%[k]] \n" + "ADD v12.4s, v12.4s, v22.4s \n" + "MOV v18.16b, v16.16b \n" + "SHA256H q16, q17, v12.4s \n" + "SHA256H2 q17, q18, v12.4s \n" + + "ADD v13.4s, v13.4s, v23.4s \n" + "MOV v18.16b, v16.16b \n" + "SHA256H q16, q17, v13.4s \n" + "SHA256H2 q17, q18, v13.4s \n" + + "ADD v14.4s, v14.4s, v24.4s \n" + "MOV v18.16b, v16.16b \n" + "SHA256H q16, q17, v14.4s \n" + "SHA256H2 q17, q18, v14.4s \n" + + "ADD v15.4s, v15.4s, v25.4s \n" + "MOV v18.16b, v16.16b \n" + "SHA256H q16, q17, v15.4s \n" + "SHA256H2 q17, q18, v15.4s \n" + + "#Add working vars back into digest state \n" + "ADD v16.4s, v16.4s, v20.4s \n" + "ADD v17.4s, v17.4s, v21.4s \n" + + "#Store value as hash output \n" + #if defined(LITTLE_ENDIAN_ORDER) + "REV32 v16.16b, v16.16b \n" + #endif + "ST1 {v16.16b}, [%[hashOut]], #16 \n" + #if defined(LITTLE_ENDIAN_ORDER) + "REV32 v17.16b, v17.16b \n" + #endif + "ST1 {v17.16b}, [%[hashOut]] \n" + : [hashOut] "=r" (hash) + : [k] "r" (K), [digest] "m" (sha256->digest), + [buffer] "m" (sha256->buffer), + "0" (hash) + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14", + "v15", "v16", "v17", "v18", "v19", "v20", "v21", + "v22", "v23", "v24", "v25" + ); + + return 0; +} + +#else /* not using 64 bit */ + +/* ARMv8 hardware acceleration Aarch32 */ +static WC_INLINE int Sha256Update(wc_Sha256* sha256, const byte* data, word32 len) +{ + word32 add; + word32 numBlocks; + + /* only perform actions if a buffer is passed in */ + if (len > 0) { + /* fill leftover buffer with data */ + add = min(len, WC_SHA256_BLOCK_SIZE - sha256->buffLen); + XMEMCPY((byte*)(sha256->buffer) + sha256->buffLen, data, add); + sha256->buffLen += add; + data += add; + len -= add; + + /* number of blocks in a row to complete */ + numBlocks = (len + sha256->buffLen)/WC_SHA256_BLOCK_SIZE; + + if (numBlocks > 0) { + word32* bufPt = sha256->buffer; + word32* digPt = sha256->digest; + /* get leftover amount after blocks */ + add = (len + sha256->buffLen) - numBlocks * WC_SHA256_BLOCK_SIZE; + __asm__ volatile ( + "#load leftover data\n" + "VLDM %[buffer]!, {q0-q3} \n" + + "#load current digest\n" + "VLDM %[digest], {q12-q13} \n" + "MOV r8, %[blocks] \n" + "VREV32.8 q0, q0 \n" + "VREV32.8 q1, q1 \n" + "VREV32.8 q2, q2 \n" + "VREV32.8 q3, q3 \n" + "VLDM %[k]! ,{q5-q8} \n" + "VLDM %[k]! ,{q9}\n" + + "VMOV.32 q14, q12 \n" /* store digest for add at the end */ + "VMOV.32 q15, q13 \n" + + /* beginning of SHA256 block operation */ + "1:\n" + + /* Round 1 */ + "VMOV.32 q4, q0 \n" + "VADD.i32 q0, q0, q5 \n" + "VMOV.32 q11, q12 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 2 */ + "SHA256SU0.32 q4, q1 \n" + "VADD.i32 q0, q1, q6 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q4, q2, q3 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 3 */ + "SHA256SU0.32 q1, q2 \n" + "VADD.i32 q0, q2, q7 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q1, q3, q4 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 4 */ + "SHA256SU0.32 q2, q3 \n" + "VADD.i32 q0, q3, q8 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q2, q4, q1 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 5 */ + "SHA256SU0.32 q3, q4 \n" + "VADD.i32 q0, q4, q9 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q3, q1, q2 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 6 */ + "VLD1.32 {q10}, [%[k]]! \n" + "SHA256SU0.32 q4, q1 \n" + "VADD.i32 q0, q1, q10 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q4, q2, q3 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 7 */ + "VLD1.32 {q10}, [%[k]]! \n" + "SHA256SU0.32 q1, q2 \n" + "VADD.i32 q0, q2, q10 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q1, q3, q4 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 8 */ + "VLD1.32 {q10}, [%[k]]! \n" + "SHA256SU0.32 q2, q3 \n" + "VADD.i32 q0, q3, q10 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q2, q4, q1 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 9 */ + "VLD1.32 {q10}, [%[k]]! \n" + "SHA256SU0.32 q3, q4 \n" + "VADD.i32 q0, q4, q10 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q3, q1, q2 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 10 */ + "VLD1.32 {q10}, [%[k]]! \n" + "SHA256SU0.32 q4, q1 \n" + "VADD.i32 q0, q1, q10 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q4, q2, q3 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 11 */ + "VLD1.32 {q10}, [%[k]]! \n" + "SHA256SU0.32 q1, q2 \n" + "VADD.i32 q0, q2, q10 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q1, q3, q4 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 12 */ + "VLD1.32 {q10}, [%[k]]! \n" + "SHA256SU0.32 q2, q3 \n" + "VADD.i32 q0, q3, q10 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q2, q4, q1 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 13 */ + "VLD1.32 {q10}, [%[k]]! \n" + "SHA256SU0.32 q3, q4 \n" + "VADD.i32 q0, q4, q10 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q3, q1, q2 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 14 */ + "VLD1.32 {q10}, [%[k]]! \n" + "VADD.i32 q0, q1, q10 \n" + "VMOV.32 q11, q12 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 15 */ + "VLD1.32 {q10}, [%[k]]! \n" + "VADD.i32 q0, q2, q10 \n" + "VMOV.32 q11, q12 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 16 */ + "VLD1.32 {q10}, [%[k]] \n" + "SUB r8, r8, #1 \n" + "VADD.i32 q0, q3, q10 \n" + "VMOV.32 q11, q12 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + "#Add working vars back into digest state \n" + "VADD.i32 q12, q12, q14 \n" + "VADD.i32 q13, q13, q15 \n" + + "#check if more blocks should be done\n" + "CMP r8, #0 \n" + "BEQ 2f \n" + + "#load in message and schedule updates \n" + "VLD1.32 {q0}, [%[dataIn]]! \n" + "VLD1.32 {q1}, [%[dataIn]]! \n" + "VLD1.32 {q2}, [%[dataIn]]! \n" + "VLD1.32 {q3}, [%[dataIn]]! \n" + + /* reset K pointer */ + "SUB %[k], %[k], #160 \n" + "VREV32.8 q0, q0 \n" + "VREV32.8 q1, q1 \n" + "VREV32.8 q2, q2 \n" + "VREV32.8 q3, q3 \n" + "VMOV.32 q14, q12 \n" + "VMOV.32 q15, q13 \n" + "B 1b \n" /* do another block */ + + "2:\n" + "VST1.32 {q12, q13}, [%[out]] \n" + + : [out] "=r" (digPt), "=r" (bufPt), "=r" (numBlocks), + "=r" (data) + : [k] "r" (K), [digest] "0" (digPt), [buffer] "1" (bufPt), + [blocks] "2" (numBlocks), [dataIn] "3" (data) + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", + "q8", "q9", "q10", "q11", "q12", "q13", "q14", + "q15", "r8" + ); + + AddLength(sha256, WC_SHA256_BLOCK_SIZE * numBlocks); + + /* copy over any remaining data leftover */ + XMEMCPY(sha256->buffer, data, add); + sha256->buffLen = add; + } + } + + /* account for possibility of not used if len = 0 */ + (void)add; + (void)numBlocks; + + return 0; +} + + +static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash) +{ + byte* local; + + if (sha256 == NULL || hash == NULL) { + return BAD_FUNC_ARG; + } + + local = (byte*)sha256->buffer; + AddLength(sha256, sha256->buffLen); /* before adding pads */ + + local[sha256->buffLen++] = 0x80; /* add 1 */ + + /* pad with zeros */ + if (sha256->buffLen > WC_SHA256_PAD_SIZE) { + word32* bufPt = sha256->buffer; + word32* digPt = sha256->digest; + XMEMSET(&local[sha256->buffLen], 0, WC_SHA256_BLOCK_SIZE - sha256->buffLen); + sha256->buffLen += WC_SHA256_BLOCK_SIZE - sha256->buffLen; + __asm__ volatile ( + "#load leftover data\n" + "VLDM %[buffer]!, {q0-q3} \n" + + "#load current digest\n" + "VLDM %[digest], {q12-q13} \n" + "VREV32.8 q0, q0 \n" + "VREV32.8 q1, q1 \n" + "VREV32.8 q2, q2 \n" + "VREV32.8 q3, q3 \n" + + "#load K values in \n" + "VMOV.32 q14, q12 \n" /* store digest for add at the end */ + "VMOV.32 q15, q13 \n" + + /* beginning of SHA256 block operation */ + /* Round 1 */ + "VLD1.32 {q5}, [%[k]]! \n" + "VMOV.32 q4, q0 \n" + "VADD.i32 q0, q0, q5 \n" + "VMOV.32 q11, q12 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 2 */ + "VLD1.32 {q5}, [%[k]]! \n" + "SHA256SU0.32 q4, q1 \n" + "VADD.i32 q0, q1, q5 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q4, q2, q3 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 3 */ + "VLD1.32 {q5}, [%[k]]! \n" + "SHA256SU0.32 q1, q2 \n" + "VADD.i32 q0, q2, q5 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q1, q3, q4 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 4 */ + "VLD1.32 {q5}, [%[k]]! \n" + "SHA256SU0.32 q2, q3 \n" + "VADD.i32 q0, q3, q5 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q2, q4, q1 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 5 */ + "VLD1.32 {q5}, [%[k]]! \n" + "SHA256SU0.32 q3, q4 \n" + "VADD.i32 q0, q4, q5 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q3, q1, q2 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 6 */ + "VLD1.32 {q5}, [%[k]]! \n" + "SHA256SU0.32 q4, q1 \n" + "VADD.i32 q0, q1, q5 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q4, q2, q3 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 7 */ + "VLD1.32 {q5}, [%[k]]! \n" + "SHA256SU0.32 q1, q2 \n" + "VADD.i32 q0, q2, q5 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q1, q3, q4 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 8 */ + "VLD1.32 {q5}, [%[k]]! \n" + "SHA256SU0.32 q2, q3 \n" + "VADD.i32 q0, q3, q5 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q2, q4, q1 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 9 */ + "VLD1.32 {q5}, [%[k]]! \n" + "SHA256SU0.32 q3, q4 \n" + "VADD.i32 q0, q4, q5 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q3, q1, q2 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 10 */ + "VLD1.32 {q5}, [%[k]]! \n" + "SHA256SU0.32 q4, q1 \n" + "VADD.i32 q0, q1, q5 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q4, q2, q3 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 11 */ + "VLD1.32 {q5}, [%[k]]! \n" + "SHA256SU0.32 q1, q2 \n" + "VADD.i32 q0, q2, q5 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q1, q3, q4 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 12 */ + "VLD1.32 {q5}, [%[k]]! \n" + "SHA256SU0.32 q2, q3 \n" + "VADD.i32 q0, q3, q5 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q2, q4, q1 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 13 */ + "VLD1.32 {q5}, [%[k]]! \n" + "SHA256SU0.32 q3, q4 \n" + "VADD.i32 q0, q4, q5 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q3, q1, q2 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 14 */ + "VLD1.32 {q5}, [%[k]]! \n" + "VADD.i32 q0, q1, q5 \n" + "VMOV.32 q11, q12 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 15 */ + "VLD1.32 {q5}, [%[k]]! \n" + "VADD.i32 q0, q2, q5 \n" + "VMOV.32 q11, q12 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 16 */ + "VLD1.32 {q5}, [%[k]]! \n" + "VADD.i32 q0, q3, q5 \n" + "VMOV.32 q11, q12 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + "#Add working vars back into digest state \n" + "VADD.i32 q12, q12, q14 \n" + "VADD.i32 q13, q13, q15 \n" + + /* reset K pointer */ + "SUB %[k], %[k], #256 \n" + "VST1.32 {q12, q13}, [%[out]] \n" + + : [out] "=r" (digPt), "=r" (bufPt) + : [k] "r" (K), [digest] "0" (digPt), [buffer] "1" (bufPt) + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", + "q8", "q9", "q10", "q11", "q12", "q13", "q14", + "q15" + ); + + sha256->buffLen = 0; + } + XMEMSET(&local[sha256->buffLen], 0, WC_SHA256_PAD_SIZE - sha256->buffLen); + + /* put lengths in bits */ + sha256->hiLen = (sha256->loLen >> (8*sizeof(sha256->loLen) - 3)) + + (sha256->hiLen << 3); + sha256->loLen = sha256->loLen << 3; + + /* store lengths */ + #if defined(LITTLE_ENDIAN_ORDER) + { + word32* bufPt = sha256->buffer; + __asm__ volatile ( + "VLD1.32 {q0}, [%[in]] \n" + "VREV32.8 q0, q0 \n" + "VST1.32 {q0}, [%[out]]!\n" + "VLD1.32 {q1}, [%[in]] \n" + "VREV32.8 q1, q1 \n" + "VST1.32 {q1}, [%[out]]!\n" + "VLD1.32 {q2}, [%[in]] \n" + "VREV32.8 q2, q2 \n" + "VST1.32 {q2}, [%[out]]!\n" + "VLD1.32 {q3}, [%[in]] \n" + "VREV32.8 q3, q3 \n" + "VST1.32 {q3}, [%[out]] \n" + : [out] "=r" (bufPt) + : [in] "0" (bufPt) + : "cc", "memory", "q0", "q1", "q2", "q3" + ); + } + #endif + /* ! length ordering dependent on digest endian type ! */ + XMEMCPY(&local[WC_SHA256_PAD_SIZE], &sha256->hiLen, sizeof(word32)); + XMEMCPY(&local[WC_SHA256_PAD_SIZE + sizeof(word32)], &sha256->loLen, + sizeof(word32)); + + bufPt = sha256->buffer; + word32* digPt = sha256->digest; + __asm__ volatile ( + "#load leftover data\n" + "VLDM %[buffer]!, {q0-q3} \n" + + "#load current digest\n" + "VLDM %[digest], {q12-q13} \n" + + "VMOV.32 q14, q12 \n" /* store digest for add at the end */ + "VMOV.32 q15, q13 \n" + + /* beginning of SHA256 block operation */ + /* Round 1 */ + "VLD1.32 {q5}, [%[k]]! \n" + "VMOV.32 q4, q0 \n" + "VADD.i32 q0, q0, q5 \n" + "VMOV.32 q11, q12 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 2 */ + "VLD1.32 {q5}, [%[k]]! \n" + "SHA256SU0.32 q4, q1 \n" + "VADD.i32 q0, q1, q5 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q4, q2, q3 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 3 */ + "VLD1.32 {q5}, [%[k]]! \n" + "SHA256SU0.32 q1, q2 \n" + "VADD.i32 q0, q2, q5 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q1, q3, q4 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 4 */ + "VLD1.32 {q5}, [%[k]]! \n" + "SHA256SU0.32 q2, q3 \n" + "VADD.i32 q0, q3, q5 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q2, q4, q1 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 5 */ + "VLD1.32 {q5}, [%[k]]! \n" + "SHA256SU0.32 q3, q4 \n" + "VADD.i32 q0, q4, q5 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q3, q1, q2 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 6 */ + "VLD1.32 {q5}, [%[k]]! \n" + "SHA256SU0.32 q4, q1 \n" + "VADD.i32 q0, q1, q5 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q4, q2, q3 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 7 */ + "VLD1.32 {q5}, [%[k]]! \n" + "SHA256SU0.32 q1, q2 \n" + "VADD.i32 q0, q2, q5 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q1, q3, q4 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 8 */ + "VLD1.32 {q5}, [%[k]]! \n" + "SHA256SU0.32 q2, q3 \n" + "VADD.i32 q0, q3, q5 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q2, q4, q1 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 9 */ + "VLD1.32 {q5}, [%[k]]! \n" + "SHA256SU0.32 q3, q4 \n" + "VADD.i32 q0, q4, q5 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q3, q1, q2 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 10 */ + "VLD1.32 {q5}, [%[k]]! \n" + "SHA256SU0.32 q4, q1 \n" + "VADD.i32 q0, q1, q5 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q4, q2, q3 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 11 */ + "VLD1.32 {q5}, [%[k]]! \n" + "SHA256SU0.32 q1, q2 \n" + "VADD.i32 q0, q2, q5 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q1, q3, q4 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 12 */ + "VLD1.32 {q5}, [%[k]]! \n" + "SHA256SU0.32 q2, q3 \n" + "VADD.i32 q0, q3, q5 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q2, q4, q1 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 13 */ + "VLD1.32 {q5}, [%[k]]! \n" + "SHA256SU0.32 q3, q4 \n" + "VADD.i32 q0, q4, q5 \n" + "VMOV.32 q11, q12 \n" + "SHA256SU1.32 q3, q1, q2 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 14 */ + "VLD1.32 {q5}, [%[k]]! \n" + "VADD.i32 q0, q1, q5 \n" + "VMOV.32 q11, q12 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 15 */ + "VLD1.32 {q5}, [%[k]]! \n" + "VADD.i32 q0, q2, q5 \n" + "VMOV.32 q11, q12 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + /* Round 16 */ + "VLD1.32 {q5}, [%[k]]! \n" + "VADD.i32 q0, q3, q5 \n" + "VMOV.32 q11, q12 \n" + "SHA256H.32 q12, q13, q0 \n" + "SHA256H2.32 q13, q11, q0 \n" + + "#Add working vars back into digest state \n" + "VADD.i32 q12, q12, q14 \n" + "VADD.i32 q13, q13, q15 \n" + + "#Store value as hash output \n" + #if defined(LITTLE_ENDIAN_ORDER) + "VREV32.8 q12, q12 \n" + #endif + "VST1.32 {q12}, [%[hashOut]]! \n" + #if defined(LITTLE_ENDIAN_ORDER) + "VREV32.8 q13, q13 \n" + #endif + "VST1.32 {q13}, [%[hashOut]] \n" + + : [out] "=r" (digPt), "=r" (bufPt), + [hashOut] "=r" (hash) + : [k] "r" (K), [digest] "0" (digPt), [buffer] "1" (bufPt), + "2" (hash) + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", + "q8", "q9", "q10", "q11", "q12", "q13", "q14", + "q15" + ); + + return 0; +} + +#endif /* __aarch64__ */ + + +#ifndef NO_SHA256 + +int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId) +{ + if (sha256 == NULL) + return BAD_FUNC_ARG; + + sha256->heap = heap; + (void)devId; + + return InitSha256(sha256); +} + +int wc_InitSha256(wc_Sha256* sha256) +{ + return wc_InitSha256_ex(sha256, NULL, INVALID_DEVID); +} + +void wc_Sha256Free(wc_Sha256* sha256) +{ + (void)sha256; +} + +int wc_Sha256Update(wc_Sha256* sha256, const byte* data, word32 len) +{ + if (sha256 == NULL || (data == NULL && len != 0)) { + return BAD_FUNC_ARG; + } + + return Sha256Update(sha256, data, len); +} + +int wc_Sha256Final(wc_Sha256* sha256, byte* hash) +{ + int ret; + + if (sha256 == NULL || hash == NULL) { + return BAD_FUNC_ARG; + } + + ret = Sha256Final(sha256, hash); + if (ret != 0) + return ret; + + return InitSha256(sha256); /* reset state */ +} + +int wc_Sha256GetHash(wc_Sha256* sha256, byte* hash) +{ + int ret; + wc_Sha256 tmpSha256; + + if (sha256 == NULL || hash == NULL) + return BAD_FUNC_ARG; + + ret = wc_Sha256Copy(sha256, &tmpSha256); + if (ret == 0) { + ret = wc_Sha256Final(&tmpSha256, hash); + } + return ret; +} + +#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB) +int wc_Sha256SetFlags(wc_Sha256* sha256, word32 flags) +{ + if (sha256) { + sha256->flags = flags; + } + return 0; +} +int wc_Sha256GetFlags(wc_Sha256* sha256, word32* flags) +{ + if (sha256 && flags) { + *flags = sha256->flags; + } + return 0; +} +#endif + +int wc_Sha256Copy(wc_Sha256* src, wc_Sha256* dst) +{ + int ret = 0; + + if (src == NULL || dst == NULL) + return BAD_FUNC_ARG; + + XMEMCPY(dst, src, sizeof(wc_Sha256)); + + return ret; +} + +#endif /* !NO_SHA256 */ + + +#ifdef WOLFSSL_SHA224 + static int InitSha224(wc_Sha224* sha224) + { + + int ret = 0; + + if (sha224 == NULL) { + return BAD_FUNC_ARG; + } + + sha224->digest[0] = 0xc1059ed8; + sha224->digest[1] = 0x367cd507; + sha224->digest[2] = 0x3070dd17; + sha224->digest[3] = 0xf70e5939; + sha224->digest[4] = 0xffc00b31; + sha224->digest[5] = 0x68581511; + sha224->digest[6] = 0x64f98fa7; + sha224->digest[7] = 0xbefa4fa4; + + sha224->buffLen = 0; + sha224->loLen = 0; + sha224->hiLen = 0; + + return ret; + } + + int wc_InitSha224_ex(wc_Sha224* sha224, void* heap, int devId) + { + if (sha224 == NULL) + return BAD_FUNC_ARG; + + sha224->heap = heap; + (void)devId; + + return InitSha224(sha224); + } + + int wc_InitSha224(wc_Sha224* sha224) + { + return wc_InitSha224_ex(sha224, NULL, INVALID_DEVID); + } + + int wc_Sha224Update(wc_Sha224* sha224, const byte* data, word32 len) + { + int ret; + + if (sha224 == NULL || (data == NULL && len > 0)) { + return BAD_FUNC_ARG; + } + + ret = Sha256Update((wc_Sha256 *)sha224, data, len); + + return ret; + } + + int wc_Sha224Final(wc_Sha224* sha224, byte* hash) + { + int ret; + word32 hashTmp[WC_SHA256_DIGEST_SIZE/sizeof(word32)]; + + if (sha224 == NULL || hash == NULL) { + return BAD_FUNC_ARG; + } + + ret = Sha256Final((wc_Sha256*)sha224, (byte*)hashTmp); + if (ret != 0) + return ret; + + XMEMCPY(hash, hashTmp, WC_SHA224_DIGEST_SIZE); + + return InitSha224(sha224); /* reset state */ + } + + void wc_Sha224Free(wc_Sha224* sha224) + { + if (sha224 == NULL) + return; + } + + int wc_Sha224GetHash(wc_Sha224* sha224, byte* hash) + { + int ret; + wc_Sha224 tmpSha224; + + if (sha224 == NULL || hash == NULL) + return BAD_FUNC_ARG; + + ret = wc_Sha224Copy(sha224, &tmpSha224); + if (ret == 0) { + ret = wc_Sha224Final(&tmpSha224, hash); + } + return ret; + } + +#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB) + int wc_Sha224SetFlags(wc_Sha224* sha224, word32 flags) + { + if (sha224) { + sha224->flags = flags; + } + return 0; + } + int wc_Sha224GetFlags(wc_Sha224* sha224, word32* flags) + { + if (sha224 && flags) { + *flags = sha224->flags; + } + return 0; + } +#endif + + int wc_Sha224Copy(wc_Sha224* src, wc_Sha224* dst) + { + int ret = 0; + + if (src == NULL || dst == NULL) + return BAD_FUNC_ARG; + + XMEMCPY(dst, src, sizeof(wc_Sha224)); + + return ret; + } + +#endif /* WOLFSSL_SHA224 */ + +#endif /* !NO_SHA256 || WOLFSSL_SHA224 */ +#endif /* WOLFSSL_ARMASM */ diff --git a/client/wolfssl/wolfcrypt/src/port/arm/armv8-sha512-asm.S b/client/wolfssl/wolfcrypt/src/port/arm/armv8-sha512-asm.S new file mode 100644 index 0000000..a35bccb --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/arm/armv8-sha512-asm.S @@ -0,0 +1,1046 @@ +/* armv8-sha512-asm + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* Generated using (from wolfssl): + * cd ../scripts + * ruby ./sha2/sha512.rb arm64 ../wolfssl/wolfcrypt/src/port/arm/armv8-sha512-asm.S + */ +#ifdef __aarch64__ + .text + .section .rodata + .type L_SHA512_transform_neon_len_k, %object + .size L_SHA512_transform_neon_len_k, 640 + .align 3 +L_SHA512_transform_neon_len_k: + .xword 0x428a2f98d728ae22 + .xword 0x7137449123ef65cd + .xword 0xb5c0fbcfec4d3b2f + .xword 0xe9b5dba58189dbbc + .xword 0x3956c25bf348b538 + .xword 0x59f111f1b605d019 + .xword 0x923f82a4af194f9b + .xword 0xab1c5ed5da6d8118 + .xword 0xd807aa98a3030242 + .xword 0x12835b0145706fbe + .xword 0x243185be4ee4b28c + .xword 0x550c7dc3d5ffb4e2 + .xword 0x72be5d74f27b896f + .xword 0x80deb1fe3b1696b1 + .xword 0x9bdc06a725c71235 + .xword 0xc19bf174cf692694 + .xword 0xe49b69c19ef14ad2 + .xword 0xefbe4786384f25e3 + .xword 0xfc19dc68b8cd5b5 + .xword 0x240ca1cc77ac9c65 + .xword 0x2de92c6f592b0275 + .xword 0x4a7484aa6ea6e483 + .xword 0x5cb0a9dcbd41fbd4 + .xword 0x76f988da831153b5 + .xword 0x983e5152ee66dfab + .xword 0xa831c66d2db43210 + .xword 0xb00327c898fb213f + .xword 0xbf597fc7beef0ee4 + .xword 0xc6e00bf33da88fc2 + .xword 0xd5a79147930aa725 + .xword 0x6ca6351e003826f + .xword 0x142929670a0e6e70 + .xword 0x27b70a8546d22ffc + .xword 0x2e1b21385c26c926 + .xword 0x4d2c6dfc5ac42aed + .xword 0x53380d139d95b3df + .xword 0x650a73548baf63de + .xword 0x766a0abb3c77b2a8 + .xword 0x81c2c92e47edaee6 + .xword 0x92722c851482353b + .xword 0xa2bfe8a14cf10364 + .xword 0xa81a664bbc423001 + .xword 0xc24b8b70d0f89791 + .xword 0xc76c51a30654be30 + .xword 0xd192e819d6ef5218 + .xword 0xd69906245565a910 + .xword 0xf40e35855771202a + .xword 0x106aa07032bbd1b8 + .xword 0x19a4c116b8d2d0c8 + .xword 0x1e376c085141ab53 + .xword 0x2748774cdf8eeb99 + .xword 0x34b0bcb5e19b48a8 + .xword 0x391c0cb3c5c95a63 + .xword 0x4ed8aa4ae3418acb + .xword 0x5b9cca4f7763e373 + .xword 0x682e6ff3d6b2b8a3 + .xword 0x748f82ee5defb2fc + .xword 0x78a5636f43172f60 + .xword 0x84c87814a1f0ab72 + .xword 0x8cc702081a6439ec + .xword 0x90befffa23631e28 + .xword 0xa4506cebde82bde9 + .xword 0xbef9a3f7b2c67915 + .xword 0xc67178f2e372532b + .xword 0xca273eceea26619c + .xword 0xd186b8c721c0c207 + .xword 0xeada7dd6cde0eb1e + .xword 0xf57d4f7fee6ed178 + .xword 0x6f067aa72176fba + .xword 0xa637dc5a2c898a6 + .xword 0x113f9804bef90dae + .xword 0x1b710b35131c471b + .xword 0x28db77f523047d84 + .xword 0x32caab7b40c72493 + .xword 0x3c9ebe0a15c9bebc + .xword 0x431d67c49c100d4c + .xword 0x4cc5d4becb3e42b6 + .xword 0x597f299cfc657e2a + .xword 0x5fcb6fab3ad6faec + .xword 0x6c44198c4a475817 + .text + .section .rodata + .type L_SHA512_transform_neon_len_ror8, %object + .size L_SHA512_transform_neon_len_ror8, 16 + .align 4 +L_SHA512_transform_neon_len_ror8: + .xword 0x7060504030201, 0x80f0e0d0c0b0a09 + .text + .align 2 + .globl Transform_Sha512_Len + .type Transform_Sha512_Len, %function +Transform_Sha512_Len: + stp x29, x30, [sp, #-128]! + add x29, sp, #0 + str x17, [x29, #16] + str x19, [x29, #24] + stp x20, x21, [x29, #32] + stp x22, x23, [x29, #48] + stp x24, x25, [x29, #64] + stp x26, x27, [x29, #80] + stp d8, d9, [x29, #96] + stp d10, d11, [x29, #112] + adr x3, L_SHA512_transform_neon_len_k + adr x27, L_SHA512_transform_neon_len_ror8 + ld1 {v11.16b}, [x27] + # Load digest into working vars + ldp x4, x5, [x0] + ldp x6, x7, [x0, #16] + ldp x8, x9, [x0, #32] + ldp x10, x11, [x0, #48] + # Start of loop processing a block +L_sha512_len_neon_begin: + # Load W + # Copy digest to add in at end + ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x1], #0x40 + mov x19, x4 + ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x1], #0x40 + mov x20, x5 + rev64 v0.16b, v0.16b + mov x21, x6 + rev64 v1.16b, v1.16b + mov x22, x7 + rev64 v2.16b, v2.16b + mov x23, x8 + rev64 v3.16b, v3.16b + mov x24, x9 + rev64 v4.16b, v4.16b + mov x25, x10 + rev64 v5.16b, v5.16b + mov x26, x11 + rev64 v6.16b, v6.16b + rev64 v7.16b, v7.16b + # Pre-calc: b ^ c + eor x16, x5, x6 + mov x27, #4 + # Start of 16 rounds +L_sha512_len_neon_start: + # Round 0 + mov x13, v0.d[0] + ldr x15, [x3], #8 + ror x12, x8, #14 + ror x14, x4, #28 + eor x12, x12, x8, ror 18 + eor x14, x14, x4, ror 34 + eor x12, x12, x8, ror 41 + eor x14, x14, x4, ror 39 + add x11, x11, x12 + eor x17, x4, x5 + eor x12, x9, x10 + and x16, x17, x16 + and x12, x12, x8 + add x11, x11, x13 + eor x12, x12, x10 + add x11, x11, x15 + eor x16, x16, x5 + add x11, x11, x12 + add x14, x14, x16 + add x7, x7, x11 + add x11, x11, x14 + # Round 1 + mov x13, v0.d[1] + ldr x15, [x3], #8 + ext v10.16b, v0.16b, v1.16b, #8 + ror x12, x7, #14 + shl v8.2d, v7.2d, #45 + ror x14, x11, #28 + sri v8.2d, v7.2d, #19 + eor x12, x12, x7, ror 18 + shl v9.2d, v7.2d, #3 + eor x14, x14, x11, ror 34 + sri v9.2d, v7.2d, #61 + eor x12, x12, x7, ror 41 + eor v9.16b, v9.16b, v8.16b + eor x14, x14, x11, ror 39 + ushr v8.2d, v7.2d, #6 + add x10, x10, x12 + eor v9.16b, v9.16b, v8.16b + eor x16, x11, x4 + add v0.2d, v0.2d, v9.2d + eor x12, x8, x9 + ext v9.16b, v4.16b, v5.16b, #8 + and x17, x16, x17 + add v0.2d, v0.2d, v9.2d + and x12, x12, x7 + shl v8.2d, v10.2d, #63 + add x10, x10, x13 + sri v8.2d, v10.2d, #1 + eor x12, x12, x9 + tbl v9.16b, {v10.16b}, v11.16b + add x10, x10, x15 + eor v9.16b, v9.16b, v8.16b + eor x17, x17, x4 + ushr v10.2d, v10.2d, #7 + add x10, x10, x12 + eor v9.16b, v9.16b, v10.16b + add x14, x14, x17 + add v0.2d, v0.2d, v9.2d + add x6, x6, x10 + add x10, x10, x14 + # Round 2 + mov x13, v1.d[0] + ldr x15, [x3], #8 + ror x12, x6, #14 + ror x14, x10, #28 + eor x12, x12, x6, ror 18 + eor x14, x14, x10, ror 34 + eor x12, x12, x6, ror 41 + eor x14, x14, x10, ror 39 + add x9, x9, x12 + eor x17, x10, x11 + eor x12, x7, x8 + and x16, x17, x16 + and x12, x12, x6 + add x9, x9, x13 + eor x12, x12, x8 + add x9, x9, x15 + eor x16, x16, x11 + add x9, x9, x12 + add x14, x14, x16 + add x5, x5, x9 + add x9, x9, x14 + # Round 3 + mov x13, v1.d[1] + ldr x15, [x3], #8 + ext v10.16b, v1.16b, v2.16b, #8 + ror x12, x5, #14 + shl v8.2d, v0.2d, #45 + ror x14, x9, #28 + sri v8.2d, v0.2d, #19 + eor x12, x12, x5, ror 18 + shl v9.2d, v0.2d, #3 + eor x14, x14, x9, ror 34 + sri v9.2d, v0.2d, #61 + eor x12, x12, x5, ror 41 + eor v9.16b, v9.16b, v8.16b + eor x14, x14, x9, ror 39 + ushr v8.2d, v0.2d, #6 + add x8, x8, x12 + eor v9.16b, v9.16b, v8.16b + eor x16, x9, x10 + add v1.2d, v1.2d, v9.2d + eor x12, x6, x7 + ext v9.16b, v5.16b, v6.16b, #8 + and x17, x16, x17 + add v1.2d, v1.2d, v9.2d + and x12, x12, x5 + shl v8.2d, v10.2d, #63 + add x8, x8, x13 + sri v8.2d, v10.2d, #1 + eor x12, x12, x7 + tbl v9.16b, {v10.16b}, v11.16b + add x8, x8, x15 + eor v9.16b, v9.16b, v8.16b + eor x17, x17, x10 + ushr v10.2d, v10.2d, #7 + add x8, x8, x12 + eor v9.16b, v9.16b, v10.16b + add x14, x14, x17 + add v1.2d, v1.2d, v9.2d + add x4, x4, x8 + add x8, x8, x14 + # Round 4 + mov x13, v2.d[0] + ldr x15, [x3], #8 + ror x12, x4, #14 + ror x14, x8, #28 + eor x12, x12, x4, ror 18 + eor x14, x14, x8, ror 34 + eor x12, x12, x4, ror 41 + eor x14, x14, x8, ror 39 + add x7, x7, x12 + eor x17, x8, x9 + eor x12, x5, x6 + and x16, x17, x16 + and x12, x12, x4 + add x7, x7, x13 + eor x12, x12, x6 + add x7, x7, x15 + eor x16, x16, x9 + add x7, x7, x12 + add x14, x14, x16 + add x11, x11, x7 + add x7, x7, x14 + # Round 5 + mov x13, v2.d[1] + ldr x15, [x3], #8 + ext v10.16b, v2.16b, v3.16b, #8 + ror x12, x11, #14 + shl v8.2d, v1.2d, #45 + ror x14, x7, #28 + sri v8.2d, v1.2d, #19 + eor x12, x12, x11, ror 18 + shl v9.2d, v1.2d, #3 + eor x14, x14, x7, ror 34 + sri v9.2d, v1.2d, #61 + eor x12, x12, x11, ror 41 + eor v9.16b, v9.16b, v8.16b + eor x14, x14, x7, ror 39 + ushr v8.2d, v1.2d, #6 + add x6, x6, x12 + eor v9.16b, v9.16b, v8.16b + eor x16, x7, x8 + add v2.2d, v2.2d, v9.2d + eor x12, x4, x5 + ext v9.16b, v6.16b, v7.16b, #8 + and x17, x16, x17 + add v2.2d, v2.2d, v9.2d + and x12, x12, x11 + shl v8.2d, v10.2d, #63 + add x6, x6, x13 + sri v8.2d, v10.2d, #1 + eor x12, x12, x5 + tbl v9.16b, {v10.16b}, v11.16b + add x6, x6, x15 + eor v9.16b, v9.16b, v8.16b + eor x17, x17, x8 + ushr v10.2d, v10.2d, #7 + add x6, x6, x12 + eor v9.16b, v9.16b, v10.16b + add x14, x14, x17 + add v2.2d, v2.2d, v9.2d + add x10, x10, x6 + add x6, x6, x14 + # Round 6 + mov x13, v3.d[0] + ldr x15, [x3], #8 + ror x12, x10, #14 + ror x14, x6, #28 + eor x12, x12, x10, ror 18 + eor x14, x14, x6, ror 34 + eor x12, x12, x10, ror 41 + eor x14, x14, x6, ror 39 + add x5, x5, x12 + eor x17, x6, x7 + eor x12, x11, x4 + and x16, x17, x16 + and x12, x12, x10 + add x5, x5, x13 + eor x12, x12, x4 + add x5, x5, x15 + eor x16, x16, x7 + add x5, x5, x12 + add x14, x14, x16 + add x9, x9, x5 + add x5, x5, x14 + # Round 7 + mov x13, v3.d[1] + ldr x15, [x3], #8 + ext v10.16b, v3.16b, v4.16b, #8 + ror x12, x9, #14 + shl v8.2d, v2.2d, #45 + ror x14, x5, #28 + sri v8.2d, v2.2d, #19 + eor x12, x12, x9, ror 18 + shl v9.2d, v2.2d, #3 + eor x14, x14, x5, ror 34 + sri v9.2d, v2.2d, #61 + eor x12, x12, x9, ror 41 + eor v9.16b, v9.16b, v8.16b + eor x14, x14, x5, ror 39 + ushr v8.2d, v2.2d, #6 + add x4, x4, x12 + eor v9.16b, v9.16b, v8.16b + eor x16, x5, x6 + add v3.2d, v3.2d, v9.2d + eor x12, x10, x11 + ext v9.16b, v7.16b, v0.16b, #8 + and x17, x16, x17 + add v3.2d, v3.2d, v9.2d + and x12, x12, x9 + shl v8.2d, v10.2d, #63 + add x4, x4, x13 + sri v8.2d, v10.2d, #1 + eor x12, x12, x11 + tbl v9.16b, {v10.16b}, v11.16b + add x4, x4, x15 + eor v9.16b, v9.16b, v8.16b + eor x17, x17, x6 + ushr v10.2d, v10.2d, #7 + add x4, x4, x12 + eor v9.16b, v9.16b, v10.16b + add x14, x14, x17 + add v3.2d, v3.2d, v9.2d + add x8, x8, x4 + add x4, x4, x14 + # Round 8 + mov x13, v4.d[0] + ldr x15, [x3], #8 + ror x12, x8, #14 + ror x14, x4, #28 + eor x12, x12, x8, ror 18 + eor x14, x14, x4, ror 34 + eor x12, x12, x8, ror 41 + eor x14, x14, x4, ror 39 + add x11, x11, x12 + eor x17, x4, x5 + eor x12, x9, x10 + and x16, x17, x16 + and x12, x12, x8 + add x11, x11, x13 + eor x12, x12, x10 + add x11, x11, x15 + eor x16, x16, x5 + add x11, x11, x12 + add x14, x14, x16 + add x7, x7, x11 + add x11, x11, x14 + # Round 9 + mov x13, v4.d[1] + ldr x15, [x3], #8 + ext v10.16b, v4.16b, v5.16b, #8 + ror x12, x7, #14 + shl v8.2d, v3.2d, #45 + ror x14, x11, #28 + sri v8.2d, v3.2d, #19 + eor x12, x12, x7, ror 18 + shl v9.2d, v3.2d, #3 + eor x14, x14, x11, ror 34 + sri v9.2d, v3.2d, #61 + eor x12, x12, x7, ror 41 + eor v9.16b, v9.16b, v8.16b + eor x14, x14, x11, ror 39 + ushr v8.2d, v3.2d, #6 + add x10, x10, x12 + eor v9.16b, v9.16b, v8.16b + eor x16, x11, x4 + add v4.2d, v4.2d, v9.2d + eor x12, x8, x9 + ext v9.16b, v0.16b, v1.16b, #8 + and x17, x16, x17 + add v4.2d, v4.2d, v9.2d + and x12, x12, x7 + shl v8.2d, v10.2d, #63 + add x10, x10, x13 + sri v8.2d, v10.2d, #1 + eor x12, x12, x9 + tbl v9.16b, {v10.16b}, v11.16b + add x10, x10, x15 + eor v9.16b, v9.16b, v8.16b + eor x17, x17, x4 + ushr v10.2d, v10.2d, #7 + add x10, x10, x12 + eor v9.16b, v9.16b, v10.16b + add x14, x14, x17 + add v4.2d, v4.2d, v9.2d + add x6, x6, x10 + add x10, x10, x14 + # Round 10 + mov x13, v5.d[0] + ldr x15, [x3], #8 + ror x12, x6, #14 + ror x14, x10, #28 + eor x12, x12, x6, ror 18 + eor x14, x14, x10, ror 34 + eor x12, x12, x6, ror 41 + eor x14, x14, x10, ror 39 + add x9, x9, x12 + eor x17, x10, x11 + eor x12, x7, x8 + and x16, x17, x16 + and x12, x12, x6 + add x9, x9, x13 + eor x12, x12, x8 + add x9, x9, x15 + eor x16, x16, x11 + add x9, x9, x12 + add x14, x14, x16 + add x5, x5, x9 + add x9, x9, x14 + # Round 11 + mov x13, v5.d[1] + ldr x15, [x3], #8 + ext v10.16b, v5.16b, v6.16b, #8 + ror x12, x5, #14 + shl v8.2d, v4.2d, #45 + ror x14, x9, #28 + sri v8.2d, v4.2d, #19 + eor x12, x12, x5, ror 18 + shl v9.2d, v4.2d, #3 + eor x14, x14, x9, ror 34 + sri v9.2d, v4.2d, #61 + eor x12, x12, x5, ror 41 + eor v9.16b, v9.16b, v8.16b + eor x14, x14, x9, ror 39 + ushr v8.2d, v4.2d, #6 + add x8, x8, x12 + eor v9.16b, v9.16b, v8.16b + eor x16, x9, x10 + add v5.2d, v5.2d, v9.2d + eor x12, x6, x7 + ext v9.16b, v1.16b, v2.16b, #8 + and x17, x16, x17 + add v5.2d, v5.2d, v9.2d + and x12, x12, x5 + shl v8.2d, v10.2d, #63 + add x8, x8, x13 + sri v8.2d, v10.2d, #1 + eor x12, x12, x7 + tbl v9.16b, {v10.16b}, v11.16b + add x8, x8, x15 + eor v9.16b, v9.16b, v8.16b + eor x17, x17, x10 + ushr v10.2d, v10.2d, #7 + add x8, x8, x12 + eor v9.16b, v9.16b, v10.16b + add x14, x14, x17 + add v5.2d, v5.2d, v9.2d + add x4, x4, x8 + add x8, x8, x14 + # Round 12 + mov x13, v6.d[0] + ldr x15, [x3], #8 + ror x12, x4, #14 + ror x14, x8, #28 + eor x12, x12, x4, ror 18 + eor x14, x14, x8, ror 34 + eor x12, x12, x4, ror 41 + eor x14, x14, x8, ror 39 + add x7, x7, x12 + eor x17, x8, x9 + eor x12, x5, x6 + and x16, x17, x16 + and x12, x12, x4 + add x7, x7, x13 + eor x12, x12, x6 + add x7, x7, x15 + eor x16, x16, x9 + add x7, x7, x12 + add x14, x14, x16 + add x11, x11, x7 + add x7, x7, x14 + # Round 13 + mov x13, v6.d[1] + ldr x15, [x3], #8 + ext v10.16b, v6.16b, v7.16b, #8 + ror x12, x11, #14 + shl v8.2d, v5.2d, #45 + ror x14, x7, #28 + sri v8.2d, v5.2d, #19 + eor x12, x12, x11, ror 18 + shl v9.2d, v5.2d, #3 + eor x14, x14, x7, ror 34 + sri v9.2d, v5.2d, #61 + eor x12, x12, x11, ror 41 + eor v9.16b, v9.16b, v8.16b + eor x14, x14, x7, ror 39 + ushr v8.2d, v5.2d, #6 + add x6, x6, x12 + eor v9.16b, v9.16b, v8.16b + eor x16, x7, x8 + add v6.2d, v6.2d, v9.2d + eor x12, x4, x5 + ext v9.16b, v2.16b, v3.16b, #8 + and x17, x16, x17 + add v6.2d, v6.2d, v9.2d + and x12, x12, x11 + shl v8.2d, v10.2d, #63 + add x6, x6, x13 + sri v8.2d, v10.2d, #1 + eor x12, x12, x5 + tbl v9.16b, {v10.16b}, v11.16b + add x6, x6, x15 + eor v9.16b, v9.16b, v8.16b + eor x17, x17, x8 + ushr v10.2d, v10.2d, #7 + add x6, x6, x12 + eor v9.16b, v9.16b, v10.16b + add x14, x14, x17 + add v6.2d, v6.2d, v9.2d + add x10, x10, x6 + add x6, x6, x14 + # Round 14 + mov x13, v7.d[0] + ldr x15, [x3], #8 + ror x12, x10, #14 + ror x14, x6, #28 + eor x12, x12, x10, ror 18 + eor x14, x14, x6, ror 34 + eor x12, x12, x10, ror 41 + eor x14, x14, x6, ror 39 + add x5, x5, x12 + eor x17, x6, x7 + eor x12, x11, x4 + and x16, x17, x16 + and x12, x12, x10 + add x5, x5, x13 + eor x12, x12, x4 + add x5, x5, x15 + eor x16, x16, x7 + add x5, x5, x12 + add x14, x14, x16 + add x9, x9, x5 + add x5, x5, x14 + # Round 15 + mov x13, v7.d[1] + ldr x15, [x3], #8 + ext v10.16b, v7.16b, v0.16b, #8 + ror x12, x9, #14 + shl v8.2d, v6.2d, #45 + ror x14, x5, #28 + sri v8.2d, v6.2d, #19 + eor x12, x12, x9, ror 18 + shl v9.2d, v6.2d, #3 + eor x14, x14, x5, ror 34 + sri v9.2d, v6.2d, #61 + eor x12, x12, x9, ror 41 + eor v9.16b, v9.16b, v8.16b + eor x14, x14, x5, ror 39 + ushr v8.2d, v6.2d, #6 + add x4, x4, x12 + eor v9.16b, v9.16b, v8.16b + eor x16, x5, x6 + add v7.2d, v7.2d, v9.2d + eor x12, x10, x11 + ext v9.16b, v3.16b, v4.16b, #8 + and x17, x16, x17 + add v7.2d, v7.2d, v9.2d + and x12, x12, x9 + shl v8.2d, v10.2d, #63 + add x4, x4, x13 + sri v8.2d, v10.2d, #1 + eor x12, x12, x11 + tbl v9.16b, {v10.16b}, v11.16b + add x4, x4, x15 + eor v9.16b, v9.16b, v8.16b + eor x17, x17, x6 + ushr v10.2d, v10.2d, #7 + add x4, x4, x12 + eor v9.16b, v9.16b, v10.16b + add x14, x14, x17 + add v7.2d, v7.2d, v9.2d + add x8, x8, x4 + add x4, x4, x14 + subs x27, x27, #1 + bne L_sha512_len_neon_start + # Round 0 + mov x13, v0.d[0] + ldr x15, [x3], #8 + ror x12, x8, #14 + ror x14, x4, #28 + eor x12, x12, x8, ror 18 + eor x14, x14, x4, ror 34 + eor x12, x12, x8, ror 41 + eor x14, x14, x4, ror 39 + add x11, x11, x12 + eor x17, x4, x5 + eor x12, x9, x10 + and x16, x17, x16 + and x12, x12, x8 + add x11, x11, x13 + eor x12, x12, x10 + add x11, x11, x15 + eor x16, x16, x5 + add x11, x11, x12 + add x14, x14, x16 + add x7, x7, x11 + add x11, x11, x14 + # Round 1 + mov x13, v0.d[1] + ldr x15, [x3], #8 + ror x12, x7, #14 + ror x14, x11, #28 + eor x12, x12, x7, ror 18 + eor x14, x14, x11, ror 34 + eor x12, x12, x7, ror 41 + eor x14, x14, x11, ror 39 + add x10, x10, x12 + eor x16, x11, x4 + eor x12, x8, x9 + and x17, x16, x17 + and x12, x12, x7 + add x10, x10, x13 + eor x12, x12, x9 + add x10, x10, x15 + eor x17, x17, x4 + add x10, x10, x12 + add x14, x14, x17 + add x6, x6, x10 + add x10, x10, x14 + # Round 2 + mov x13, v1.d[0] + ldr x15, [x3], #8 + ror x12, x6, #14 + ror x14, x10, #28 + eor x12, x12, x6, ror 18 + eor x14, x14, x10, ror 34 + eor x12, x12, x6, ror 41 + eor x14, x14, x10, ror 39 + add x9, x9, x12 + eor x17, x10, x11 + eor x12, x7, x8 + and x16, x17, x16 + and x12, x12, x6 + add x9, x9, x13 + eor x12, x12, x8 + add x9, x9, x15 + eor x16, x16, x11 + add x9, x9, x12 + add x14, x14, x16 + add x5, x5, x9 + add x9, x9, x14 + # Round 3 + mov x13, v1.d[1] + ldr x15, [x3], #8 + ror x12, x5, #14 + ror x14, x9, #28 + eor x12, x12, x5, ror 18 + eor x14, x14, x9, ror 34 + eor x12, x12, x5, ror 41 + eor x14, x14, x9, ror 39 + add x8, x8, x12 + eor x16, x9, x10 + eor x12, x6, x7 + and x17, x16, x17 + and x12, x12, x5 + add x8, x8, x13 + eor x12, x12, x7 + add x8, x8, x15 + eor x17, x17, x10 + add x8, x8, x12 + add x14, x14, x17 + add x4, x4, x8 + add x8, x8, x14 + # Round 4 + mov x13, v2.d[0] + ldr x15, [x3], #8 + ror x12, x4, #14 + ror x14, x8, #28 + eor x12, x12, x4, ror 18 + eor x14, x14, x8, ror 34 + eor x12, x12, x4, ror 41 + eor x14, x14, x8, ror 39 + add x7, x7, x12 + eor x17, x8, x9 + eor x12, x5, x6 + and x16, x17, x16 + and x12, x12, x4 + add x7, x7, x13 + eor x12, x12, x6 + add x7, x7, x15 + eor x16, x16, x9 + add x7, x7, x12 + add x14, x14, x16 + add x11, x11, x7 + add x7, x7, x14 + # Round 5 + mov x13, v2.d[1] + ldr x15, [x3], #8 + ror x12, x11, #14 + ror x14, x7, #28 + eor x12, x12, x11, ror 18 + eor x14, x14, x7, ror 34 + eor x12, x12, x11, ror 41 + eor x14, x14, x7, ror 39 + add x6, x6, x12 + eor x16, x7, x8 + eor x12, x4, x5 + and x17, x16, x17 + and x12, x12, x11 + add x6, x6, x13 + eor x12, x12, x5 + add x6, x6, x15 + eor x17, x17, x8 + add x6, x6, x12 + add x14, x14, x17 + add x10, x10, x6 + add x6, x6, x14 + # Round 6 + mov x13, v3.d[0] + ldr x15, [x3], #8 + ror x12, x10, #14 + ror x14, x6, #28 + eor x12, x12, x10, ror 18 + eor x14, x14, x6, ror 34 + eor x12, x12, x10, ror 41 + eor x14, x14, x6, ror 39 + add x5, x5, x12 + eor x17, x6, x7 + eor x12, x11, x4 + and x16, x17, x16 + and x12, x12, x10 + add x5, x5, x13 + eor x12, x12, x4 + add x5, x5, x15 + eor x16, x16, x7 + add x5, x5, x12 + add x14, x14, x16 + add x9, x9, x5 + add x5, x5, x14 + # Round 7 + mov x13, v3.d[1] + ldr x15, [x3], #8 + ror x12, x9, #14 + ror x14, x5, #28 + eor x12, x12, x9, ror 18 + eor x14, x14, x5, ror 34 + eor x12, x12, x9, ror 41 + eor x14, x14, x5, ror 39 + add x4, x4, x12 + eor x16, x5, x6 + eor x12, x10, x11 + and x17, x16, x17 + and x12, x12, x9 + add x4, x4, x13 + eor x12, x12, x11 + add x4, x4, x15 + eor x17, x17, x6 + add x4, x4, x12 + add x14, x14, x17 + add x8, x8, x4 + add x4, x4, x14 + # Round 8 + mov x13, v4.d[0] + ldr x15, [x3], #8 + ror x12, x8, #14 + ror x14, x4, #28 + eor x12, x12, x8, ror 18 + eor x14, x14, x4, ror 34 + eor x12, x12, x8, ror 41 + eor x14, x14, x4, ror 39 + add x11, x11, x12 + eor x17, x4, x5 + eor x12, x9, x10 + and x16, x17, x16 + and x12, x12, x8 + add x11, x11, x13 + eor x12, x12, x10 + add x11, x11, x15 + eor x16, x16, x5 + add x11, x11, x12 + add x14, x14, x16 + add x7, x7, x11 + add x11, x11, x14 + # Round 9 + mov x13, v4.d[1] + ldr x15, [x3], #8 + ror x12, x7, #14 + ror x14, x11, #28 + eor x12, x12, x7, ror 18 + eor x14, x14, x11, ror 34 + eor x12, x12, x7, ror 41 + eor x14, x14, x11, ror 39 + add x10, x10, x12 + eor x16, x11, x4 + eor x12, x8, x9 + and x17, x16, x17 + and x12, x12, x7 + add x10, x10, x13 + eor x12, x12, x9 + add x10, x10, x15 + eor x17, x17, x4 + add x10, x10, x12 + add x14, x14, x17 + add x6, x6, x10 + add x10, x10, x14 + # Round 10 + mov x13, v5.d[0] + ldr x15, [x3], #8 + ror x12, x6, #14 + ror x14, x10, #28 + eor x12, x12, x6, ror 18 + eor x14, x14, x10, ror 34 + eor x12, x12, x6, ror 41 + eor x14, x14, x10, ror 39 + add x9, x9, x12 + eor x17, x10, x11 + eor x12, x7, x8 + and x16, x17, x16 + and x12, x12, x6 + add x9, x9, x13 + eor x12, x12, x8 + add x9, x9, x15 + eor x16, x16, x11 + add x9, x9, x12 + add x14, x14, x16 + add x5, x5, x9 + add x9, x9, x14 + # Round 11 + mov x13, v5.d[1] + ldr x15, [x3], #8 + ror x12, x5, #14 + ror x14, x9, #28 + eor x12, x12, x5, ror 18 + eor x14, x14, x9, ror 34 + eor x12, x12, x5, ror 41 + eor x14, x14, x9, ror 39 + add x8, x8, x12 + eor x16, x9, x10 + eor x12, x6, x7 + and x17, x16, x17 + and x12, x12, x5 + add x8, x8, x13 + eor x12, x12, x7 + add x8, x8, x15 + eor x17, x17, x10 + add x8, x8, x12 + add x14, x14, x17 + add x4, x4, x8 + add x8, x8, x14 + # Round 12 + mov x13, v6.d[0] + ldr x15, [x3], #8 + ror x12, x4, #14 + ror x14, x8, #28 + eor x12, x12, x4, ror 18 + eor x14, x14, x8, ror 34 + eor x12, x12, x4, ror 41 + eor x14, x14, x8, ror 39 + add x7, x7, x12 + eor x17, x8, x9 + eor x12, x5, x6 + and x16, x17, x16 + and x12, x12, x4 + add x7, x7, x13 + eor x12, x12, x6 + add x7, x7, x15 + eor x16, x16, x9 + add x7, x7, x12 + add x14, x14, x16 + add x11, x11, x7 + add x7, x7, x14 + # Round 13 + mov x13, v6.d[1] + ldr x15, [x3], #8 + ror x12, x11, #14 + ror x14, x7, #28 + eor x12, x12, x11, ror 18 + eor x14, x14, x7, ror 34 + eor x12, x12, x11, ror 41 + eor x14, x14, x7, ror 39 + add x6, x6, x12 + eor x16, x7, x8 + eor x12, x4, x5 + and x17, x16, x17 + and x12, x12, x11 + add x6, x6, x13 + eor x12, x12, x5 + add x6, x6, x15 + eor x17, x17, x8 + add x6, x6, x12 + add x14, x14, x17 + add x10, x10, x6 + add x6, x6, x14 + # Round 14 + mov x13, v7.d[0] + ldr x15, [x3], #8 + ror x12, x10, #14 + ror x14, x6, #28 + eor x12, x12, x10, ror 18 + eor x14, x14, x6, ror 34 + eor x12, x12, x10, ror 41 + eor x14, x14, x6, ror 39 + add x5, x5, x12 + eor x17, x6, x7 + eor x12, x11, x4 + and x16, x17, x16 + and x12, x12, x10 + add x5, x5, x13 + eor x12, x12, x4 + add x5, x5, x15 + eor x16, x16, x7 + add x5, x5, x12 + add x14, x14, x16 + add x9, x9, x5 + add x5, x5, x14 + # Round 15 + mov x13, v7.d[1] + ldr x15, [x3], #8 + ror x12, x9, #14 + ror x14, x5, #28 + eor x12, x12, x9, ror 18 + eor x14, x14, x5, ror 34 + eor x12, x12, x9, ror 41 + eor x14, x14, x5, ror 39 + add x4, x4, x12 + eor x16, x5, x6 + eor x12, x10, x11 + and x17, x16, x17 + and x12, x12, x9 + add x4, x4, x13 + eor x12, x12, x11 + add x4, x4, x15 + eor x17, x17, x6 + add x4, x4, x12 + add x14, x14, x17 + add x8, x8, x4 + add x4, x4, x14 + add x11, x11, x26 + add x10, x10, x25 + add x9, x9, x24 + add x8, x8, x23 + add x7, x7, x22 + add x6, x6, x21 + add x5, x5, x20 + add x4, x4, x19 + adr x3, L_SHA512_transform_neon_len_k + subs w2, w2, #0x80 + bne L_sha512_len_neon_begin + stp x4, x5, [x0] + stp x6, x7, [x0, #16] + stp x8, x9, [x0, #32] + stp x10, x11, [x0, #48] + ldr x17, [x29, #16] + ldr x19, [x29, #24] + ldp x20, x21, [x29, #32] + ldp x22, x23, [x29, #48] + ldp x24, x25, [x29, #64] + ldp x26, x27, [x29, #80] + ldp d8, d9, [x29, #96] + ldp d10, d11, [x29, #112] + ldp x29, x30, [sp], #0x80 + ret + .size Transform_Sha512_Len,.-Transform_Sha512_Len +#endif /* __aarch64__ */ diff --git a/client/wolfssl/wolfcrypt/src/port/arm/armv8-sha512-asm.c b/client/wolfssl/wolfcrypt/src/port/arm/armv8-sha512-asm.c new file mode 100644 index 0000000..d323598 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/arm/armv8-sha512-asm.c @@ -0,0 +1,1041 @@ +/* armv8-sha512-asm + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* Generated using (from wolfssl): + * cd ../scripts + * ruby ./sha2/sha512.rb arm64 ../wolfssl/wolfcrypt/src/port/arm/armv8-sha512-asm.c + */ +#ifdef __aarch64__ +#include + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef WOLFSSL_ARMASM +#include + +static const uint64_t L_SHA512_transform_neon_len_k[] = { + 0x428a2f98d728ae22UL, + 0x7137449123ef65cdUL, + 0xb5c0fbcfec4d3b2fUL, + 0xe9b5dba58189dbbcUL, + 0x3956c25bf348b538UL, + 0x59f111f1b605d019UL, + 0x923f82a4af194f9bUL, + 0xab1c5ed5da6d8118UL, + 0xd807aa98a3030242UL, + 0x12835b0145706fbeUL, + 0x243185be4ee4b28cUL, + 0x550c7dc3d5ffb4e2UL, + 0x72be5d74f27b896fUL, + 0x80deb1fe3b1696b1UL, + 0x9bdc06a725c71235UL, + 0xc19bf174cf692694UL, + 0xe49b69c19ef14ad2UL, + 0xefbe4786384f25e3UL, + 0xfc19dc68b8cd5b5UL, + 0x240ca1cc77ac9c65UL, + 0x2de92c6f592b0275UL, + 0x4a7484aa6ea6e483UL, + 0x5cb0a9dcbd41fbd4UL, + 0x76f988da831153b5UL, + 0x983e5152ee66dfabUL, + 0xa831c66d2db43210UL, + 0xb00327c898fb213fUL, + 0xbf597fc7beef0ee4UL, + 0xc6e00bf33da88fc2UL, + 0xd5a79147930aa725UL, + 0x6ca6351e003826fUL, + 0x142929670a0e6e70UL, + 0x27b70a8546d22ffcUL, + 0x2e1b21385c26c926UL, + 0x4d2c6dfc5ac42aedUL, + 0x53380d139d95b3dfUL, + 0x650a73548baf63deUL, + 0x766a0abb3c77b2a8UL, + 0x81c2c92e47edaee6UL, + 0x92722c851482353bUL, + 0xa2bfe8a14cf10364UL, + 0xa81a664bbc423001UL, + 0xc24b8b70d0f89791UL, + 0xc76c51a30654be30UL, + 0xd192e819d6ef5218UL, + 0xd69906245565a910UL, + 0xf40e35855771202aUL, + 0x106aa07032bbd1b8UL, + 0x19a4c116b8d2d0c8UL, + 0x1e376c085141ab53UL, + 0x2748774cdf8eeb99UL, + 0x34b0bcb5e19b48a8UL, + 0x391c0cb3c5c95a63UL, + 0x4ed8aa4ae3418acbUL, + 0x5b9cca4f7763e373UL, + 0x682e6ff3d6b2b8a3UL, + 0x748f82ee5defb2fcUL, + 0x78a5636f43172f60UL, + 0x84c87814a1f0ab72UL, + 0x8cc702081a6439ecUL, + 0x90befffa23631e28UL, + 0xa4506cebde82bde9UL, + 0xbef9a3f7b2c67915UL, + 0xc67178f2e372532bUL, + 0xca273eceea26619cUL, + 0xd186b8c721c0c207UL, + 0xeada7dd6cde0eb1eUL, + 0xf57d4f7fee6ed178UL, + 0x6f067aa72176fbaUL, + 0xa637dc5a2c898a6UL, + 0x113f9804bef90daeUL, + 0x1b710b35131c471bUL, + 0x28db77f523047d84UL, + 0x32caab7b40c72493UL, + 0x3c9ebe0a15c9bebcUL, + 0x431d67c49c100d4cUL, + 0x4cc5d4becb3e42b6UL, + 0x597f299cfc657e2aUL, + 0x5fcb6fab3ad6faecUL, + 0x6c44198c4a475817UL, +}; + +static const uint64_t L_SHA512_transform_neon_len_ror8[] = { + 0x7060504030201UL, + 0x80f0e0d0c0b0a09UL, +}; + +void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len) +{ + __asm__ __volatile__ ( + "stp x29, x30, [sp, #-16]!\n\t" + "add x29, sp, #0\n\t" + "adr x3, %[L_SHA512_transform_neon_len_k]\n\t" + "adr x27, %[L_SHA512_transform_neon_len_ror8]\n\t" + "ld1 {v11.16b}, [x27]\n\t" + /* Load digest into working vars */ + "ldp x4, x5, [%x[sha512]]\n\t" + "ldp x6, x7, [%x[sha512], #16]\n\t" + "ldp x8, x9, [%x[sha512], #32]\n\t" + "ldp x10, x11, [%x[sha512], #48]\n\t" + /* Start of loop processing a block */ + "\n" + "L_sha512_len_neon_begin_%=: \n\t" + /* Load W */ + /* Copy digest to add in at end */ + "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[data]], #0x40\n\t" + "mov x19, x4\n\t" + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[data]], #0x40\n\t" + "mov x20, x5\n\t" + "rev64 v0.16b, v0.16b\n\t" + "mov x21, x6\n\t" + "rev64 v1.16b, v1.16b\n\t" + "mov x22, x7\n\t" + "rev64 v2.16b, v2.16b\n\t" + "mov x23, x8\n\t" + "rev64 v3.16b, v3.16b\n\t" + "mov x24, x9\n\t" + "rev64 v4.16b, v4.16b\n\t" + "mov x25, x10\n\t" + "rev64 v5.16b, v5.16b\n\t" + "mov x26, x11\n\t" + "rev64 v6.16b, v6.16b\n\t" + "rev64 v7.16b, v7.16b\n\t" + /* Pre-calc: b ^ c */ + "eor x16, x5, x6\n\t" + "mov x27, #4\n\t" + /* Start of 16 rounds */ + "\n" + "L_sha512_len_neon_start_%=: \n\t" + /* Round 0 */ + "mov x13, v0.d[0]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x8, #14\n\t" + "ror x14, x4, #28\n\t" + "eor x12, x12, x8, ror 18\n\t" + "eor x14, x14, x4, ror 34\n\t" + "eor x12, x12, x8, ror 41\n\t" + "eor x14, x14, x4, ror 39\n\t" + "add x11, x11, x12\n\t" + "eor x17, x4, x5\n\t" + "eor x12, x9, x10\n\t" + "and x16, x17, x16\n\t" + "and x12, x12, x8\n\t" + "add x11, x11, x13\n\t" + "eor x12, x12, x10\n\t" + "add x11, x11, x15\n\t" + "eor x16, x16, x5\n\t" + "add x11, x11, x12\n\t" + "add x14, x14, x16\n\t" + "add x7, x7, x11\n\t" + "add x11, x11, x14\n\t" + /* Round 1 */ + "mov x13, v0.d[1]\n\t" + "ldr x15, [x3], #8\n\t" + "ext v10.16b, v0.16b, v1.16b, #8\n\t" + "ror x12, x7, #14\n\t" + "shl v8.2d, v7.2d, #45\n\t" + "ror x14, x11, #28\n\t" + "sri v8.2d, v7.2d, #19\n\t" + "eor x12, x12, x7, ror 18\n\t" + "shl v9.2d, v7.2d, #3\n\t" + "eor x14, x14, x11, ror 34\n\t" + "sri v9.2d, v7.2d, #61\n\t" + "eor x12, x12, x7, ror 41\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x14, x14, x11, ror 39\n\t" + "ushr v8.2d, v7.2d, #6\n\t" + "add x10, x10, x12\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x16, x11, x4\n\t" + "add v0.2d, v0.2d, v9.2d\n\t" + "eor x12, x8, x9\n\t" + "ext v9.16b, v4.16b, v5.16b, #8\n\t" + "and x17, x16, x17\n\t" + "add v0.2d, v0.2d, v9.2d\n\t" + "and x12, x12, x7\n\t" + "shl v8.2d, v10.2d, #63\n\t" + "add x10, x10, x13\n\t" + "sri v8.2d, v10.2d, #1\n\t" + "eor x12, x12, x9\n\t" + "tbl v9.16b, {v10.16b}, v11.16b\n\t" + "add x10, x10, x15\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x17, x17, x4\n\t" + "ushr v10.2d, v10.2d, #7\n\t" + "add x10, x10, x12\n\t" + "eor v9.16b, v9.16b, v10.16b\n\t" + "add x14, x14, x17\n\t" + "add v0.2d, v0.2d, v9.2d\n\t" + "add x6, x6, x10\n\t" + "add x10, x10, x14\n\t" + /* Round 2 */ + "mov x13, v1.d[0]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x6, #14\n\t" + "ror x14, x10, #28\n\t" + "eor x12, x12, x6, ror 18\n\t" + "eor x14, x14, x10, ror 34\n\t" + "eor x12, x12, x6, ror 41\n\t" + "eor x14, x14, x10, ror 39\n\t" + "add x9, x9, x12\n\t" + "eor x17, x10, x11\n\t" + "eor x12, x7, x8\n\t" + "and x16, x17, x16\n\t" + "and x12, x12, x6\n\t" + "add x9, x9, x13\n\t" + "eor x12, x12, x8\n\t" + "add x9, x9, x15\n\t" + "eor x16, x16, x11\n\t" + "add x9, x9, x12\n\t" + "add x14, x14, x16\n\t" + "add x5, x5, x9\n\t" + "add x9, x9, x14\n\t" + /* Round 3 */ + "mov x13, v1.d[1]\n\t" + "ldr x15, [x3], #8\n\t" + "ext v10.16b, v1.16b, v2.16b, #8\n\t" + "ror x12, x5, #14\n\t" + "shl v8.2d, v0.2d, #45\n\t" + "ror x14, x9, #28\n\t" + "sri v8.2d, v0.2d, #19\n\t" + "eor x12, x12, x5, ror 18\n\t" + "shl v9.2d, v0.2d, #3\n\t" + "eor x14, x14, x9, ror 34\n\t" + "sri v9.2d, v0.2d, #61\n\t" + "eor x12, x12, x5, ror 41\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x14, x14, x9, ror 39\n\t" + "ushr v8.2d, v0.2d, #6\n\t" + "add x8, x8, x12\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x16, x9, x10\n\t" + "add v1.2d, v1.2d, v9.2d\n\t" + "eor x12, x6, x7\n\t" + "ext v9.16b, v5.16b, v6.16b, #8\n\t" + "and x17, x16, x17\n\t" + "add v1.2d, v1.2d, v9.2d\n\t" + "and x12, x12, x5\n\t" + "shl v8.2d, v10.2d, #63\n\t" + "add x8, x8, x13\n\t" + "sri v8.2d, v10.2d, #1\n\t" + "eor x12, x12, x7\n\t" + "tbl v9.16b, {v10.16b}, v11.16b\n\t" + "add x8, x8, x15\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x17, x17, x10\n\t" + "ushr v10.2d, v10.2d, #7\n\t" + "add x8, x8, x12\n\t" + "eor v9.16b, v9.16b, v10.16b\n\t" + "add x14, x14, x17\n\t" + "add v1.2d, v1.2d, v9.2d\n\t" + "add x4, x4, x8\n\t" + "add x8, x8, x14\n\t" + /* Round 4 */ + "mov x13, v2.d[0]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x4, #14\n\t" + "ror x14, x8, #28\n\t" + "eor x12, x12, x4, ror 18\n\t" + "eor x14, x14, x8, ror 34\n\t" + "eor x12, x12, x4, ror 41\n\t" + "eor x14, x14, x8, ror 39\n\t" + "add x7, x7, x12\n\t" + "eor x17, x8, x9\n\t" + "eor x12, x5, x6\n\t" + "and x16, x17, x16\n\t" + "and x12, x12, x4\n\t" + "add x7, x7, x13\n\t" + "eor x12, x12, x6\n\t" + "add x7, x7, x15\n\t" + "eor x16, x16, x9\n\t" + "add x7, x7, x12\n\t" + "add x14, x14, x16\n\t" + "add x11, x11, x7\n\t" + "add x7, x7, x14\n\t" + /* Round 5 */ + "mov x13, v2.d[1]\n\t" + "ldr x15, [x3], #8\n\t" + "ext v10.16b, v2.16b, v3.16b, #8\n\t" + "ror x12, x11, #14\n\t" + "shl v8.2d, v1.2d, #45\n\t" + "ror x14, x7, #28\n\t" + "sri v8.2d, v1.2d, #19\n\t" + "eor x12, x12, x11, ror 18\n\t" + "shl v9.2d, v1.2d, #3\n\t" + "eor x14, x14, x7, ror 34\n\t" + "sri v9.2d, v1.2d, #61\n\t" + "eor x12, x12, x11, ror 41\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x14, x14, x7, ror 39\n\t" + "ushr v8.2d, v1.2d, #6\n\t" + "add x6, x6, x12\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x16, x7, x8\n\t" + "add v2.2d, v2.2d, v9.2d\n\t" + "eor x12, x4, x5\n\t" + "ext v9.16b, v6.16b, v7.16b, #8\n\t" + "and x17, x16, x17\n\t" + "add v2.2d, v2.2d, v9.2d\n\t" + "and x12, x12, x11\n\t" + "shl v8.2d, v10.2d, #63\n\t" + "add x6, x6, x13\n\t" + "sri v8.2d, v10.2d, #1\n\t" + "eor x12, x12, x5\n\t" + "tbl v9.16b, {v10.16b}, v11.16b\n\t" + "add x6, x6, x15\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x17, x17, x8\n\t" + "ushr v10.2d, v10.2d, #7\n\t" + "add x6, x6, x12\n\t" + "eor v9.16b, v9.16b, v10.16b\n\t" + "add x14, x14, x17\n\t" + "add v2.2d, v2.2d, v9.2d\n\t" + "add x10, x10, x6\n\t" + "add x6, x6, x14\n\t" + /* Round 6 */ + "mov x13, v3.d[0]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x10, #14\n\t" + "ror x14, x6, #28\n\t" + "eor x12, x12, x10, ror 18\n\t" + "eor x14, x14, x6, ror 34\n\t" + "eor x12, x12, x10, ror 41\n\t" + "eor x14, x14, x6, ror 39\n\t" + "add x5, x5, x12\n\t" + "eor x17, x6, x7\n\t" + "eor x12, x11, x4\n\t" + "and x16, x17, x16\n\t" + "and x12, x12, x10\n\t" + "add x5, x5, x13\n\t" + "eor x12, x12, x4\n\t" + "add x5, x5, x15\n\t" + "eor x16, x16, x7\n\t" + "add x5, x5, x12\n\t" + "add x14, x14, x16\n\t" + "add x9, x9, x5\n\t" + "add x5, x5, x14\n\t" + /* Round 7 */ + "mov x13, v3.d[1]\n\t" + "ldr x15, [x3], #8\n\t" + "ext v10.16b, v3.16b, v4.16b, #8\n\t" + "ror x12, x9, #14\n\t" + "shl v8.2d, v2.2d, #45\n\t" + "ror x14, x5, #28\n\t" + "sri v8.2d, v2.2d, #19\n\t" + "eor x12, x12, x9, ror 18\n\t" + "shl v9.2d, v2.2d, #3\n\t" + "eor x14, x14, x5, ror 34\n\t" + "sri v9.2d, v2.2d, #61\n\t" + "eor x12, x12, x9, ror 41\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x14, x14, x5, ror 39\n\t" + "ushr v8.2d, v2.2d, #6\n\t" + "add x4, x4, x12\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x16, x5, x6\n\t" + "add v3.2d, v3.2d, v9.2d\n\t" + "eor x12, x10, x11\n\t" + "ext v9.16b, v7.16b, v0.16b, #8\n\t" + "and x17, x16, x17\n\t" + "add v3.2d, v3.2d, v9.2d\n\t" + "and x12, x12, x9\n\t" + "shl v8.2d, v10.2d, #63\n\t" + "add x4, x4, x13\n\t" + "sri v8.2d, v10.2d, #1\n\t" + "eor x12, x12, x11\n\t" + "tbl v9.16b, {v10.16b}, v11.16b\n\t" + "add x4, x4, x15\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x17, x17, x6\n\t" + "ushr v10.2d, v10.2d, #7\n\t" + "add x4, x4, x12\n\t" + "eor v9.16b, v9.16b, v10.16b\n\t" + "add x14, x14, x17\n\t" + "add v3.2d, v3.2d, v9.2d\n\t" + "add x8, x8, x4\n\t" + "add x4, x4, x14\n\t" + /* Round 8 */ + "mov x13, v4.d[0]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x8, #14\n\t" + "ror x14, x4, #28\n\t" + "eor x12, x12, x8, ror 18\n\t" + "eor x14, x14, x4, ror 34\n\t" + "eor x12, x12, x8, ror 41\n\t" + "eor x14, x14, x4, ror 39\n\t" + "add x11, x11, x12\n\t" + "eor x17, x4, x5\n\t" + "eor x12, x9, x10\n\t" + "and x16, x17, x16\n\t" + "and x12, x12, x8\n\t" + "add x11, x11, x13\n\t" + "eor x12, x12, x10\n\t" + "add x11, x11, x15\n\t" + "eor x16, x16, x5\n\t" + "add x11, x11, x12\n\t" + "add x14, x14, x16\n\t" + "add x7, x7, x11\n\t" + "add x11, x11, x14\n\t" + /* Round 9 */ + "mov x13, v4.d[1]\n\t" + "ldr x15, [x3], #8\n\t" + "ext v10.16b, v4.16b, v5.16b, #8\n\t" + "ror x12, x7, #14\n\t" + "shl v8.2d, v3.2d, #45\n\t" + "ror x14, x11, #28\n\t" + "sri v8.2d, v3.2d, #19\n\t" + "eor x12, x12, x7, ror 18\n\t" + "shl v9.2d, v3.2d, #3\n\t" + "eor x14, x14, x11, ror 34\n\t" + "sri v9.2d, v3.2d, #61\n\t" + "eor x12, x12, x7, ror 41\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x14, x14, x11, ror 39\n\t" + "ushr v8.2d, v3.2d, #6\n\t" + "add x10, x10, x12\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x16, x11, x4\n\t" + "add v4.2d, v4.2d, v9.2d\n\t" + "eor x12, x8, x9\n\t" + "ext v9.16b, v0.16b, v1.16b, #8\n\t" + "and x17, x16, x17\n\t" + "add v4.2d, v4.2d, v9.2d\n\t" + "and x12, x12, x7\n\t" + "shl v8.2d, v10.2d, #63\n\t" + "add x10, x10, x13\n\t" + "sri v8.2d, v10.2d, #1\n\t" + "eor x12, x12, x9\n\t" + "tbl v9.16b, {v10.16b}, v11.16b\n\t" + "add x10, x10, x15\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x17, x17, x4\n\t" + "ushr v10.2d, v10.2d, #7\n\t" + "add x10, x10, x12\n\t" + "eor v9.16b, v9.16b, v10.16b\n\t" + "add x14, x14, x17\n\t" + "add v4.2d, v4.2d, v9.2d\n\t" + "add x6, x6, x10\n\t" + "add x10, x10, x14\n\t" + /* Round 10 */ + "mov x13, v5.d[0]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x6, #14\n\t" + "ror x14, x10, #28\n\t" + "eor x12, x12, x6, ror 18\n\t" + "eor x14, x14, x10, ror 34\n\t" + "eor x12, x12, x6, ror 41\n\t" + "eor x14, x14, x10, ror 39\n\t" + "add x9, x9, x12\n\t" + "eor x17, x10, x11\n\t" + "eor x12, x7, x8\n\t" + "and x16, x17, x16\n\t" + "and x12, x12, x6\n\t" + "add x9, x9, x13\n\t" + "eor x12, x12, x8\n\t" + "add x9, x9, x15\n\t" + "eor x16, x16, x11\n\t" + "add x9, x9, x12\n\t" + "add x14, x14, x16\n\t" + "add x5, x5, x9\n\t" + "add x9, x9, x14\n\t" + /* Round 11 */ + "mov x13, v5.d[1]\n\t" + "ldr x15, [x3], #8\n\t" + "ext v10.16b, v5.16b, v6.16b, #8\n\t" + "ror x12, x5, #14\n\t" + "shl v8.2d, v4.2d, #45\n\t" + "ror x14, x9, #28\n\t" + "sri v8.2d, v4.2d, #19\n\t" + "eor x12, x12, x5, ror 18\n\t" + "shl v9.2d, v4.2d, #3\n\t" + "eor x14, x14, x9, ror 34\n\t" + "sri v9.2d, v4.2d, #61\n\t" + "eor x12, x12, x5, ror 41\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x14, x14, x9, ror 39\n\t" + "ushr v8.2d, v4.2d, #6\n\t" + "add x8, x8, x12\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x16, x9, x10\n\t" + "add v5.2d, v5.2d, v9.2d\n\t" + "eor x12, x6, x7\n\t" + "ext v9.16b, v1.16b, v2.16b, #8\n\t" + "and x17, x16, x17\n\t" + "add v5.2d, v5.2d, v9.2d\n\t" + "and x12, x12, x5\n\t" + "shl v8.2d, v10.2d, #63\n\t" + "add x8, x8, x13\n\t" + "sri v8.2d, v10.2d, #1\n\t" + "eor x12, x12, x7\n\t" + "tbl v9.16b, {v10.16b}, v11.16b\n\t" + "add x8, x8, x15\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x17, x17, x10\n\t" + "ushr v10.2d, v10.2d, #7\n\t" + "add x8, x8, x12\n\t" + "eor v9.16b, v9.16b, v10.16b\n\t" + "add x14, x14, x17\n\t" + "add v5.2d, v5.2d, v9.2d\n\t" + "add x4, x4, x8\n\t" + "add x8, x8, x14\n\t" + /* Round 12 */ + "mov x13, v6.d[0]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x4, #14\n\t" + "ror x14, x8, #28\n\t" + "eor x12, x12, x4, ror 18\n\t" + "eor x14, x14, x8, ror 34\n\t" + "eor x12, x12, x4, ror 41\n\t" + "eor x14, x14, x8, ror 39\n\t" + "add x7, x7, x12\n\t" + "eor x17, x8, x9\n\t" + "eor x12, x5, x6\n\t" + "and x16, x17, x16\n\t" + "and x12, x12, x4\n\t" + "add x7, x7, x13\n\t" + "eor x12, x12, x6\n\t" + "add x7, x7, x15\n\t" + "eor x16, x16, x9\n\t" + "add x7, x7, x12\n\t" + "add x14, x14, x16\n\t" + "add x11, x11, x7\n\t" + "add x7, x7, x14\n\t" + /* Round 13 */ + "mov x13, v6.d[1]\n\t" + "ldr x15, [x3], #8\n\t" + "ext v10.16b, v6.16b, v7.16b, #8\n\t" + "ror x12, x11, #14\n\t" + "shl v8.2d, v5.2d, #45\n\t" + "ror x14, x7, #28\n\t" + "sri v8.2d, v5.2d, #19\n\t" + "eor x12, x12, x11, ror 18\n\t" + "shl v9.2d, v5.2d, #3\n\t" + "eor x14, x14, x7, ror 34\n\t" + "sri v9.2d, v5.2d, #61\n\t" + "eor x12, x12, x11, ror 41\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x14, x14, x7, ror 39\n\t" + "ushr v8.2d, v5.2d, #6\n\t" + "add x6, x6, x12\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x16, x7, x8\n\t" + "add v6.2d, v6.2d, v9.2d\n\t" + "eor x12, x4, x5\n\t" + "ext v9.16b, v2.16b, v3.16b, #8\n\t" + "and x17, x16, x17\n\t" + "add v6.2d, v6.2d, v9.2d\n\t" + "and x12, x12, x11\n\t" + "shl v8.2d, v10.2d, #63\n\t" + "add x6, x6, x13\n\t" + "sri v8.2d, v10.2d, #1\n\t" + "eor x12, x12, x5\n\t" + "tbl v9.16b, {v10.16b}, v11.16b\n\t" + "add x6, x6, x15\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x17, x17, x8\n\t" + "ushr v10.2d, v10.2d, #7\n\t" + "add x6, x6, x12\n\t" + "eor v9.16b, v9.16b, v10.16b\n\t" + "add x14, x14, x17\n\t" + "add v6.2d, v6.2d, v9.2d\n\t" + "add x10, x10, x6\n\t" + "add x6, x6, x14\n\t" + /* Round 14 */ + "mov x13, v7.d[0]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x10, #14\n\t" + "ror x14, x6, #28\n\t" + "eor x12, x12, x10, ror 18\n\t" + "eor x14, x14, x6, ror 34\n\t" + "eor x12, x12, x10, ror 41\n\t" + "eor x14, x14, x6, ror 39\n\t" + "add x5, x5, x12\n\t" + "eor x17, x6, x7\n\t" + "eor x12, x11, x4\n\t" + "and x16, x17, x16\n\t" + "and x12, x12, x10\n\t" + "add x5, x5, x13\n\t" + "eor x12, x12, x4\n\t" + "add x5, x5, x15\n\t" + "eor x16, x16, x7\n\t" + "add x5, x5, x12\n\t" + "add x14, x14, x16\n\t" + "add x9, x9, x5\n\t" + "add x5, x5, x14\n\t" + /* Round 15 */ + "mov x13, v7.d[1]\n\t" + "ldr x15, [x3], #8\n\t" + "ext v10.16b, v7.16b, v0.16b, #8\n\t" + "ror x12, x9, #14\n\t" + "shl v8.2d, v6.2d, #45\n\t" + "ror x14, x5, #28\n\t" + "sri v8.2d, v6.2d, #19\n\t" + "eor x12, x12, x9, ror 18\n\t" + "shl v9.2d, v6.2d, #3\n\t" + "eor x14, x14, x5, ror 34\n\t" + "sri v9.2d, v6.2d, #61\n\t" + "eor x12, x12, x9, ror 41\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x14, x14, x5, ror 39\n\t" + "ushr v8.2d, v6.2d, #6\n\t" + "add x4, x4, x12\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x16, x5, x6\n\t" + "add v7.2d, v7.2d, v9.2d\n\t" + "eor x12, x10, x11\n\t" + "ext v9.16b, v3.16b, v4.16b, #8\n\t" + "and x17, x16, x17\n\t" + "add v7.2d, v7.2d, v9.2d\n\t" + "and x12, x12, x9\n\t" + "shl v8.2d, v10.2d, #63\n\t" + "add x4, x4, x13\n\t" + "sri v8.2d, v10.2d, #1\n\t" + "eor x12, x12, x11\n\t" + "tbl v9.16b, {v10.16b}, v11.16b\n\t" + "add x4, x4, x15\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x17, x17, x6\n\t" + "ushr v10.2d, v10.2d, #7\n\t" + "add x4, x4, x12\n\t" + "eor v9.16b, v9.16b, v10.16b\n\t" + "add x14, x14, x17\n\t" + "add v7.2d, v7.2d, v9.2d\n\t" + "add x8, x8, x4\n\t" + "add x4, x4, x14\n\t" + "subs x27, x27, #1\n\t" + "bne L_sha512_len_neon_start_%=\n\t" + /* Round 0 */ + "mov x13, v0.d[0]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x8, #14\n\t" + "ror x14, x4, #28\n\t" + "eor x12, x12, x8, ror 18\n\t" + "eor x14, x14, x4, ror 34\n\t" + "eor x12, x12, x8, ror 41\n\t" + "eor x14, x14, x4, ror 39\n\t" + "add x11, x11, x12\n\t" + "eor x17, x4, x5\n\t" + "eor x12, x9, x10\n\t" + "and x16, x17, x16\n\t" + "and x12, x12, x8\n\t" + "add x11, x11, x13\n\t" + "eor x12, x12, x10\n\t" + "add x11, x11, x15\n\t" + "eor x16, x16, x5\n\t" + "add x11, x11, x12\n\t" + "add x14, x14, x16\n\t" + "add x7, x7, x11\n\t" + "add x11, x11, x14\n\t" + /* Round 1 */ + "mov x13, v0.d[1]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x7, #14\n\t" + "ror x14, x11, #28\n\t" + "eor x12, x12, x7, ror 18\n\t" + "eor x14, x14, x11, ror 34\n\t" + "eor x12, x12, x7, ror 41\n\t" + "eor x14, x14, x11, ror 39\n\t" + "add x10, x10, x12\n\t" + "eor x16, x11, x4\n\t" + "eor x12, x8, x9\n\t" + "and x17, x16, x17\n\t" + "and x12, x12, x7\n\t" + "add x10, x10, x13\n\t" + "eor x12, x12, x9\n\t" + "add x10, x10, x15\n\t" + "eor x17, x17, x4\n\t" + "add x10, x10, x12\n\t" + "add x14, x14, x17\n\t" + "add x6, x6, x10\n\t" + "add x10, x10, x14\n\t" + /* Round 2 */ + "mov x13, v1.d[0]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x6, #14\n\t" + "ror x14, x10, #28\n\t" + "eor x12, x12, x6, ror 18\n\t" + "eor x14, x14, x10, ror 34\n\t" + "eor x12, x12, x6, ror 41\n\t" + "eor x14, x14, x10, ror 39\n\t" + "add x9, x9, x12\n\t" + "eor x17, x10, x11\n\t" + "eor x12, x7, x8\n\t" + "and x16, x17, x16\n\t" + "and x12, x12, x6\n\t" + "add x9, x9, x13\n\t" + "eor x12, x12, x8\n\t" + "add x9, x9, x15\n\t" + "eor x16, x16, x11\n\t" + "add x9, x9, x12\n\t" + "add x14, x14, x16\n\t" + "add x5, x5, x9\n\t" + "add x9, x9, x14\n\t" + /* Round 3 */ + "mov x13, v1.d[1]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x5, #14\n\t" + "ror x14, x9, #28\n\t" + "eor x12, x12, x5, ror 18\n\t" + "eor x14, x14, x9, ror 34\n\t" + "eor x12, x12, x5, ror 41\n\t" + "eor x14, x14, x9, ror 39\n\t" + "add x8, x8, x12\n\t" + "eor x16, x9, x10\n\t" + "eor x12, x6, x7\n\t" + "and x17, x16, x17\n\t" + "and x12, x12, x5\n\t" + "add x8, x8, x13\n\t" + "eor x12, x12, x7\n\t" + "add x8, x8, x15\n\t" + "eor x17, x17, x10\n\t" + "add x8, x8, x12\n\t" + "add x14, x14, x17\n\t" + "add x4, x4, x8\n\t" + "add x8, x8, x14\n\t" + /* Round 4 */ + "mov x13, v2.d[0]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x4, #14\n\t" + "ror x14, x8, #28\n\t" + "eor x12, x12, x4, ror 18\n\t" + "eor x14, x14, x8, ror 34\n\t" + "eor x12, x12, x4, ror 41\n\t" + "eor x14, x14, x8, ror 39\n\t" + "add x7, x7, x12\n\t" + "eor x17, x8, x9\n\t" + "eor x12, x5, x6\n\t" + "and x16, x17, x16\n\t" + "and x12, x12, x4\n\t" + "add x7, x7, x13\n\t" + "eor x12, x12, x6\n\t" + "add x7, x7, x15\n\t" + "eor x16, x16, x9\n\t" + "add x7, x7, x12\n\t" + "add x14, x14, x16\n\t" + "add x11, x11, x7\n\t" + "add x7, x7, x14\n\t" + /* Round 5 */ + "mov x13, v2.d[1]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x11, #14\n\t" + "ror x14, x7, #28\n\t" + "eor x12, x12, x11, ror 18\n\t" + "eor x14, x14, x7, ror 34\n\t" + "eor x12, x12, x11, ror 41\n\t" + "eor x14, x14, x7, ror 39\n\t" + "add x6, x6, x12\n\t" + "eor x16, x7, x8\n\t" + "eor x12, x4, x5\n\t" + "and x17, x16, x17\n\t" + "and x12, x12, x11\n\t" + "add x6, x6, x13\n\t" + "eor x12, x12, x5\n\t" + "add x6, x6, x15\n\t" + "eor x17, x17, x8\n\t" + "add x6, x6, x12\n\t" + "add x14, x14, x17\n\t" + "add x10, x10, x6\n\t" + "add x6, x6, x14\n\t" + /* Round 6 */ + "mov x13, v3.d[0]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x10, #14\n\t" + "ror x14, x6, #28\n\t" + "eor x12, x12, x10, ror 18\n\t" + "eor x14, x14, x6, ror 34\n\t" + "eor x12, x12, x10, ror 41\n\t" + "eor x14, x14, x6, ror 39\n\t" + "add x5, x5, x12\n\t" + "eor x17, x6, x7\n\t" + "eor x12, x11, x4\n\t" + "and x16, x17, x16\n\t" + "and x12, x12, x10\n\t" + "add x5, x5, x13\n\t" + "eor x12, x12, x4\n\t" + "add x5, x5, x15\n\t" + "eor x16, x16, x7\n\t" + "add x5, x5, x12\n\t" + "add x14, x14, x16\n\t" + "add x9, x9, x5\n\t" + "add x5, x5, x14\n\t" + /* Round 7 */ + "mov x13, v3.d[1]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x9, #14\n\t" + "ror x14, x5, #28\n\t" + "eor x12, x12, x9, ror 18\n\t" + "eor x14, x14, x5, ror 34\n\t" + "eor x12, x12, x9, ror 41\n\t" + "eor x14, x14, x5, ror 39\n\t" + "add x4, x4, x12\n\t" + "eor x16, x5, x6\n\t" + "eor x12, x10, x11\n\t" + "and x17, x16, x17\n\t" + "and x12, x12, x9\n\t" + "add x4, x4, x13\n\t" + "eor x12, x12, x11\n\t" + "add x4, x4, x15\n\t" + "eor x17, x17, x6\n\t" + "add x4, x4, x12\n\t" + "add x14, x14, x17\n\t" + "add x8, x8, x4\n\t" + "add x4, x4, x14\n\t" + /* Round 8 */ + "mov x13, v4.d[0]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x8, #14\n\t" + "ror x14, x4, #28\n\t" + "eor x12, x12, x8, ror 18\n\t" + "eor x14, x14, x4, ror 34\n\t" + "eor x12, x12, x8, ror 41\n\t" + "eor x14, x14, x4, ror 39\n\t" + "add x11, x11, x12\n\t" + "eor x17, x4, x5\n\t" + "eor x12, x9, x10\n\t" + "and x16, x17, x16\n\t" + "and x12, x12, x8\n\t" + "add x11, x11, x13\n\t" + "eor x12, x12, x10\n\t" + "add x11, x11, x15\n\t" + "eor x16, x16, x5\n\t" + "add x11, x11, x12\n\t" + "add x14, x14, x16\n\t" + "add x7, x7, x11\n\t" + "add x11, x11, x14\n\t" + /* Round 9 */ + "mov x13, v4.d[1]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x7, #14\n\t" + "ror x14, x11, #28\n\t" + "eor x12, x12, x7, ror 18\n\t" + "eor x14, x14, x11, ror 34\n\t" + "eor x12, x12, x7, ror 41\n\t" + "eor x14, x14, x11, ror 39\n\t" + "add x10, x10, x12\n\t" + "eor x16, x11, x4\n\t" + "eor x12, x8, x9\n\t" + "and x17, x16, x17\n\t" + "and x12, x12, x7\n\t" + "add x10, x10, x13\n\t" + "eor x12, x12, x9\n\t" + "add x10, x10, x15\n\t" + "eor x17, x17, x4\n\t" + "add x10, x10, x12\n\t" + "add x14, x14, x17\n\t" + "add x6, x6, x10\n\t" + "add x10, x10, x14\n\t" + /* Round 10 */ + "mov x13, v5.d[0]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x6, #14\n\t" + "ror x14, x10, #28\n\t" + "eor x12, x12, x6, ror 18\n\t" + "eor x14, x14, x10, ror 34\n\t" + "eor x12, x12, x6, ror 41\n\t" + "eor x14, x14, x10, ror 39\n\t" + "add x9, x9, x12\n\t" + "eor x17, x10, x11\n\t" + "eor x12, x7, x8\n\t" + "and x16, x17, x16\n\t" + "and x12, x12, x6\n\t" + "add x9, x9, x13\n\t" + "eor x12, x12, x8\n\t" + "add x9, x9, x15\n\t" + "eor x16, x16, x11\n\t" + "add x9, x9, x12\n\t" + "add x14, x14, x16\n\t" + "add x5, x5, x9\n\t" + "add x9, x9, x14\n\t" + /* Round 11 */ + "mov x13, v5.d[1]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x5, #14\n\t" + "ror x14, x9, #28\n\t" + "eor x12, x12, x5, ror 18\n\t" + "eor x14, x14, x9, ror 34\n\t" + "eor x12, x12, x5, ror 41\n\t" + "eor x14, x14, x9, ror 39\n\t" + "add x8, x8, x12\n\t" + "eor x16, x9, x10\n\t" + "eor x12, x6, x7\n\t" + "and x17, x16, x17\n\t" + "and x12, x12, x5\n\t" + "add x8, x8, x13\n\t" + "eor x12, x12, x7\n\t" + "add x8, x8, x15\n\t" + "eor x17, x17, x10\n\t" + "add x8, x8, x12\n\t" + "add x14, x14, x17\n\t" + "add x4, x4, x8\n\t" + "add x8, x8, x14\n\t" + /* Round 12 */ + "mov x13, v6.d[0]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x4, #14\n\t" + "ror x14, x8, #28\n\t" + "eor x12, x12, x4, ror 18\n\t" + "eor x14, x14, x8, ror 34\n\t" + "eor x12, x12, x4, ror 41\n\t" + "eor x14, x14, x8, ror 39\n\t" + "add x7, x7, x12\n\t" + "eor x17, x8, x9\n\t" + "eor x12, x5, x6\n\t" + "and x16, x17, x16\n\t" + "and x12, x12, x4\n\t" + "add x7, x7, x13\n\t" + "eor x12, x12, x6\n\t" + "add x7, x7, x15\n\t" + "eor x16, x16, x9\n\t" + "add x7, x7, x12\n\t" + "add x14, x14, x16\n\t" + "add x11, x11, x7\n\t" + "add x7, x7, x14\n\t" + /* Round 13 */ + "mov x13, v6.d[1]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x11, #14\n\t" + "ror x14, x7, #28\n\t" + "eor x12, x12, x11, ror 18\n\t" + "eor x14, x14, x7, ror 34\n\t" + "eor x12, x12, x11, ror 41\n\t" + "eor x14, x14, x7, ror 39\n\t" + "add x6, x6, x12\n\t" + "eor x16, x7, x8\n\t" + "eor x12, x4, x5\n\t" + "and x17, x16, x17\n\t" + "and x12, x12, x11\n\t" + "add x6, x6, x13\n\t" + "eor x12, x12, x5\n\t" + "add x6, x6, x15\n\t" + "eor x17, x17, x8\n\t" + "add x6, x6, x12\n\t" + "add x14, x14, x17\n\t" + "add x10, x10, x6\n\t" + "add x6, x6, x14\n\t" + /* Round 14 */ + "mov x13, v7.d[0]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x10, #14\n\t" + "ror x14, x6, #28\n\t" + "eor x12, x12, x10, ror 18\n\t" + "eor x14, x14, x6, ror 34\n\t" + "eor x12, x12, x10, ror 41\n\t" + "eor x14, x14, x6, ror 39\n\t" + "add x5, x5, x12\n\t" + "eor x17, x6, x7\n\t" + "eor x12, x11, x4\n\t" + "and x16, x17, x16\n\t" + "and x12, x12, x10\n\t" + "add x5, x5, x13\n\t" + "eor x12, x12, x4\n\t" + "add x5, x5, x15\n\t" + "eor x16, x16, x7\n\t" + "add x5, x5, x12\n\t" + "add x14, x14, x16\n\t" + "add x9, x9, x5\n\t" + "add x5, x5, x14\n\t" + /* Round 15 */ + "mov x13, v7.d[1]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x9, #14\n\t" + "ror x14, x5, #28\n\t" + "eor x12, x12, x9, ror 18\n\t" + "eor x14, x14, x5, ror 34\n\t" + "eor x12, x12, x9, ror 41\n\t" + "eor x14, x14, x5, ror 39\n\t" + "add x4, x4, x12\n\t" + "eor x16, x5, x6\n\t" + "eor x12, x10, x11\n\t" + "and x17, x16, x17\n\t" + "and x12, x12, x9\n\t" + "add x4, x4, x13\n\t" + "eor x12, x12, x11\n\t" + "add x4, x4, x15\n\t" + "eor x17, x17, x6\n\t" + "add x4, x4, x12\n\t" + "add x14, x14, x17\n\t" + "add x8, x8, x4\n\t" + "add x4, x4, x14\n\t" + "add x11, x11, x26\n\t" + "add x10, x10, x25\n\t" + "add x9, x9, x24\n\t" + "add x8, x8, x23\n\t" + "add x7, x7, x22\n\t" + "add x6, x6, x21\n\t" + "add x5, x5, x20\n\t" + "add x4, x4, x19\n\t" + "adr x3, %[L_SHA512_transform_neon_len_k]\n\t" + "subs %w[len], %w[len], #0x80\n\t" + "bne L_sha512_len_neon_begin_%=\n\t" + "stp x4, x5, [%x[sha512]]\n\t" + "stp x6, x7, [%x[sha512], #16]\n\t" + "stp x8, x9, [%x[sha512], #32]\n\t" + "stp x10, x11, [%x[sha512], #48]\n\t" + "ldp x29, x30, [sp], #16\n\t" + : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len) + : [L_SHA512_transform_neon_len_k] "S" (L_SHA512_transform_neon_len_k), [L_SHA512_transform_neon_len_ror8] "S" (L_SHA512_transform_neon_len_ror8) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11" + ); +} + +#endif /* WOLFSSL_ARMASM */ +#endif /* __aarch64__ */ diff --git a/client/wolfssl/wolfcrypt/src/port/arm/armv8-sha512.c b/client/wolfssl/wolfcrypt/src/port/arm/armv8-sha512.c new file mode 100644 index 0000000..e909c7c --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/arm/armv8-sha512.c @@ -0,0 +1,715 @@ +/* sha512.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef WOLFSSL_ARMASM +#if defined(WOLFSSL_SHA512) || defined(WOLFSSL_SHA384) + +#include +#include +#include +#include + +#include + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#ifdef WOLFSSL_SHA512 + +static int InitSha512(wc_Sha512* sha512) +{ + if (sha512 == NULL) + return BAD_FUNC_ARG; + + sha512->digest[0] = W64LIT(0x6a09e667f3bcc908); + sha512->digest[1] = W64LIT(0xbb67ae8584caa73b); + sha512->digest[2] = W64LIT(0x3c6ef372fe94f82b); + sha512->digest[3] = W64LIT(0xa54ff53a5f1d36f1); + sha512->digest[4] = W64LIT(0x510e527fade682d1); + sha512->digest[5] = W64LIT(0x9b05688c2b3e6c1f); + sha512->digest[6] = W64LIT(0x1f83d9abfb41bd6b); + sha512->digest[7] = W64LIT(0x5be0cd19137e2179); + + sha512->buffLen = 0; + sha512->loLen = 0; + sha512->hiLen = 0; +#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB) + sha512->flags = 0; +#endif + + return 0; +} + +#endif /* WOLFSSL_SHA512 */ + +#ifdef WOLFSSL_SHA512 + +int wc_InitSha512_ex(wc_Sha512* sha512, void* heap, int devId) +{ + int ret = 0; + + if (sha512 == NULL) + return BAD_FUNC_ARG; + + sha512->heap = heap; + + ret = InitSha512(sha512); + if (ret != 0) + return ret; + +#ifdef WOLFSSL_SMALL_STACK_CACHE + sha512->W = NULL; +#endif + + (void)devId; + + return ret; +} + +#endif /* WOLFSSL_SHA512 */ + +#ifndef WOLFSSL_ARMASM +static const word64 K512[80] = { + W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd), + W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc), + W64LIT(0x3956c25bf348b538), W64LIT(0x59f111f1b605d019), + W64LIT(0x923f82a4af194f9b), W64LIT(0xab1c5ed5da6d8118), + W64LIT(0xd807aa98a3030242), W64LIT(0x12835b0145706fbe), + W64LIT(0x243185be4ee4b28c), W64LIT(0x550c7dc3d5ffb4e2), + W64LIT(0x72be5d74f27b896f), W64LIT(0x80deb1fe3b1696b1), + W64LIT(0x9bdc06a725c71235), W64LIT(0xc19bf174cf692694), + W64LIT(0xe49b69c19ef14ad2), W64LIT(0xefbe4786384f25e3), + W64LIT(0x0fc19dc68b8cd5b5), W64LIT(0x240ca1cc77ac9c65), + W64LIT(0x2de92c6f592b0275), W64LIT(0x4a7484aa6ea6e483), + W64LIT(0x5cb0a9dcbd41fbd4), W64LIT(0x76f988da831153b5), + W64LIT(0x983e5152ee66dfab), W64LIT(0xa831c66d2db43210), + W64LIT(0xb00327c898fb213f), W64LIT(0xbf597fc7beef0ee4), + W64LIT(0xc6e00bf33da88fc2), W64LIT(0xd5a79147930aa725), + W64LIT(0x06ca6351e003826f), W64LIT(0x142929670a0e6e70), + W64LIT(0x27b70a8546d22ffc), W64LIT(0x2e1b21385c26c926), + W64LIT(0x4d2c6dfc5ac42aed), W64LIT(0x53380d139d95b3df), + W64LIT(0x650a73548baf63de), W64LIT(0x766a0abb3c77b2a8), + W64LIT(0x81c2c92e47edaee6), W64LIT(0x92722c851482353b), + W64LIT(0xa2bfe8a14cf10364), W64LIT(0xa81a664bbc423001), + W64LIT(0xc24b8b70d0f89791), W64LIT(0xc76c51a30654be30), + W64LIT(0xd192e819d6ef5218), W64LIT(0xd69906245565a910), + W64LIT(0xf40e35855771202a), W64LIT(0x106aa07032bbd1b8), + W64LIT(0x19a4c116b8d2d0c8), W64LIT(0x1e376c085141ab53), + W64LIT(0x2748774cdf8eeb99), W64LIT(0x34b0bcb5e19b48a8), + W64LIT(0x391c0cb3c5c95a63), W64LIT(0x4ed8aa4ae3418acb), + W64LIT(0x5b9cca4f7763e373), W64LIT(0x682e6ff3d6b2b8a3), + W64LIT(0x748f82ee5defb2fc), W64LIT(0x78a5636f43172f60), + W64LIT(0x84c87814a1f0ab72), W64LIT(0x8cc702081a6439ec), + W64LIT(0x90befffa23631e28), W64LIT(0xa4506cebde82bde9), + W64LIT(0xbef9a3f7b2c67915), W64LIT(0xc67178f2e372532b), + W64LIT(0xca273eceea26619c), W64LIT(0xd186b8c721c0c207), + W64LIT(0xeada7dd6cde0eb1e), W64LIT(0xf57d4f7fee6ed178), + W64LIT(0x06f067aa72176fba), W64LIT(0x0a637dc5a2c898a6), + W64LIT(0x113f9804bef90dae), W64LIT(0x1b710b35131c471b), + W64LIT(0x28db77f523047d84), W64LIT(0x32caab7b40c72493), + W64LIT(0x3c9ebe0a15c9bebc), W64LIT(0x431d67c49c100d4c), + W64LIT(0x4cc5d4becb3e42b6), W64LIT(0x597f299cfc657e2a), + W64LIT(0x5fcb6fab3ad6faec), W64LIT(0x6c44198c4a475817) +}; + +#ifdef LITTLE_ENDIAN_ORDER +#define blk0(i) (W[i] = ByteReverseWord64(DATA[i])) +#else +#define blk0(i) (W[i] = DATA[i]) +#endif + +#define blk2(i) ( \ + W[ i ] += \ + s1(W[(i- 2) & 15])+ \ + W[(i- 7) & 15] + \ + s0(W[(i-15) & 15]) \ + ) + +#define Ch(x,y,z) (z ^ ((z ^ y) & x)) +#define Maj(x,y,z) (y ^ ((y ^ z) & (x ^ y))) + +#define a(i) T[(0-i) & 7] +#define b(i) T[(1-i) & 7] +#define c(i) T[(2-i) & 7] +#define d(i) T[(3-i) & 7] +#define e(i) T[(4-i) & 7] +#define f(i) T[(5-i) & 7] +#define g(i) T[(6-i) & 7] +#define h(i) T[(7-i) & 7] + +#define S0(x) (rotrFixed64(x,28) ^ rotrFixed64(x,34) ^ rotrFixed64(x,39)) +#define S1(x) (rotrFixed64(x,14) ^ rotrFixed64(x,18) ^ rotrFixed64(x,41)) +#define s0(x) (rotrFixed64(x, 1) ^ rotrFixed64(x, 8) ^ (x>>7)) +#define s1(x) (rotrFixed64(x,19) ^ rotrFixed64(x,61) ^ (x>>6)) + +#define R0(i) \ + h(i) += S1(e(i)) + Ch(e(i),f(i),g(i)) + K[i+j] + blk0(i); \ + d(i) += h(i); \ + h(i) += S0(a(i)) + Maj(a(i),b(i),c(i)) +#define R(i) \ + h(i) += S1(e(i)) + Ch(e(i),f(i),g(i)) + K[i+j] + blk2(i); \ + d(i) += h(i); \ + h(i) += S0(a(i)) + Maj(a(i),b(i),c(i)) + +#define DATA sha512->buffer +static void Transform_Sha512(wc_Sha512* sha512) +{ + const word64* K = K512; + word32 j; + word64 T[8]; + word64 W[16]; + + /* Copy digest to working vars */ + T[0] = sha512->digest[0]; + T[1] = sha512->digest[1]; + T[2] = sha512->digest[2]; + T[3] = sha512->digest[3]; + T[4] = sha512->digest[4]; + T[5] = sha512->digest[5]; + T[6] = sha512->digest[6]; + T[7] = sha512->digest[7]; + + /* 80 operations, partially loop unrolled */ + j = 0; + R0( 0); R0( 1); R0( 2); R0( 3); + R0( 4); R0( 5); R0( 6); R0( 7); + R0( 8); R0( 9); R0(10); R0(11); + R0(12); R0(13); R0(14); R0(15); + for (j = 16; j < 80; j += 16) { + R( 0); R( 1); R( 2); R( 3); + R( 4); R( 5); R( 6); R( 7); + R( 8); R( 9); R(10); R(11); + R(12); R(13); R(14); R(15); + } + + /* Add the working vars back into digest */ + sha512->digest[0] += T[0]; + sha512->digest[1] += T[1]; + sha512->digest[2] += T[2]; + sha512->digest[3] += T[3]; + sha512->digest[4] += T[4]; + sha512->digest[5] += T[5]; + sha512->digest[6] += T[6]; + sha512->digest[7] += T[7]; + + return 0; +} +#undef DATA + +#define DATA ((word64*)data) +static void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len) +{ + const word64* K = K512; + word32 j; + word64 T[8]; + word64 TO[8]; + word64 W[16]; + + /* Copy digest to working vars */ + T[0] = sha512->digest[0]; + T[1] = sha512->digest[1]; + T[2] = sha512->digest[2]; + T[3] = sha512->digest[3]; + T[4] = sha512->digest[4]; + T[5] = sha512->digest[5]; + T[6] = sha512->digest[6]; + T[7] = sha512->digest[7]; + + do { + TO[0] = T[0]; + TO[1] = T[1]; + TO[2] = T[2]; + TO[3] = T[3]; + TO[4] = T[4]; + TO[5] = T[5]; + TO[6] = T[6]; + TO[7] = T[7]; + + /* 80 operations, partially loop unrolled */ + j = 0; + R0( 0); R0( 1); R0( 2); R0( 3); + R0( 4); R0( 5); R0( 6); R0( 7); + R0( 8); R0( 9); R0(10); R0(11); + R0(12); R0(13); R0(14); R0(15); + for (j = 16; j < 80; j += 16) { + R( 0); R( 1); R( 2); R( 3); + R( 4); R( 5); R( 6); R( 7); + R( 8); R( 9); R(10); R(11); + R(12); R(13); R(14); R(15); + } + + T[0] += TO[0]; + T[1] += TO[1]; + T[2] += TO[2]; + T[3] += TO[3]; + T[4] += TO[4]; + T[5] += TO[5]; + T[6] += TO[6]; + T[7] += TO[7]; + + data += 128; + len -= 128; + } + while (len > 0); + + /* Add the working vars back into digest */ + sha512->digest[0] = T[0]; + sha512->digest[1] = T[1]; + sha512->digest[2] = T[2]; + sha512->digest[3] = T[3]; + sha512->digest[4] = T[4]; + sha512->digest[5] = T[5]; + sha512->digest[6] = T[6]; + sha512->digest[7] = T[7]; + + return 0; +} +#undef DATA +#endif + + +static WC_INLINE void AddLength(wc_Sha512* sha512, word32 len) +{ + word64 tmp = sha512->loLen; + if ( (sha512->loLen += len) < tmp) + sha512->hiLen++; /* carry low to high */ +} + +static WC_INLINE int Sha512Update(wc_Sha512* sha512, const byte* data, word32 len) +{ + int ret = 0; + /* do block size increments */ + byte* local = (byte*)sha512->buffer; + word32 blocksLen; + + /* check that internal buffLen is valid */ + if (sha512->buffLen >= WC_SHA512_BLOCK_SIZE) + return BUFFER_E; + + AddLength(sha512, len); + + if (sha512->buffLen > 0) { + word32 add = min(len, WC_SHA512_BLOCK_SIZE - sha512->buffLen); + if (add > 0) { + XMEMCPY(&local[sha512->buffLen], data, add); + + sha512->buffLen += add; + data += add; + len -= add; + } + + if (sha512->buffLen == WC_SHA512_BLOCK_SIZE) { +#ifndef WOLFSSL_ARMASM + Transform_Sha512(sha512); +#else + Transform_Sha512_Len(sha512, (const byte*)sha512->buffer, + WC_SHA512_BLOCK_SIZE); +#endif + sha512->buffLen = 0; + } + } + + blocksLen = len & ~(WC_SHA512_BLOCK_SIZE-1); + if (blocksLen > 0) { + /* Byte reversal performed in function if required. */ + Transform_Sha512_Len(sha512, data, blocksLen); + data += blocksLen; + len -= blocksLen; + } + + if (len > 0) { + XMEMCPY(local, data, len); + sha512->buffLen = len; + } + + return ret; +} + +#ifdef WOLFSSL_SHA512 + +int wc_Sha512Update(wc_Sha512* sha512, const byte* data, word32 len) +{ + if (sha512 == NULL || (data == NULL && len > 0)) { + return BAD_FUNC_ARG; + } + + return Sha512Update(sha512, data, len); +} + +#endif /* WOLFSSL_SHA512 */ + +static WC_INLINE int Sha512Final(wc_Sha512* sha512) +{ + byte* local = (byte*)sha512->buffer; + + if (sha512 == NULL) { + return BAD_FUNC_ARG; + } + + local[sha512->buffLen++] = 0x80; /* add 1 */ + + /* pad with zeros */ + if (sha512->buffLen > WC_SHA512_PAD_SIZE) { + XMEMSET(&local[sha512->buffLen], 0, WC_SHA512_BLOCK_SIZE - + sha512->buffLen); + sha512->buffLen += WC_SHA512_BLOCK_SIZE - sha512->buffLen; +#ifndef WOLFSSL_ARMASM + Transform_Sha512(sha512); +#else + Transform_Sha512_Len(sha512, (const byte*)sha512->buffer, + WC_SHA512_BLOCK_SIZE); +#endif + + sha512->buffLen = 0; + } + XMEMSET(&local[sha512->buffLen], 0, WC_SHA512_PAD_SIZE - sha512->buffLen); + + /* put lengths in bits */ + sha512->hiLen = (sha512->loLen >> (8 * sizeof(sha512->loLen) - 3)) + + (sha512->hiLen << 3); + sha512->loLen = sha512->loLen << 3; + + /* store lengths */ + /* ! length ordering dependent on digest endian type ! */ + + sha512->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 2] = sha512->hiLen; + sha512->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 1] = sha512->loLen; + + ByteReverseWords64( + &(sha512->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 2]), + &(sha512->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 2]), + WC_SHA512_BLOCK_SIZE - WC_SHA512_PAD_SIZE); +#ifndef WOLFSSL_ARMASM + Transform_Sha512(sha512); +#else + Transform_Sha512_Len(sha512, (const byte*)sha512->buffer, + WC_SHA512_BLOCK_SIZE); +#endif + +#ifdef LITTLE_ENDIAN_ORDER + ByteReverseWords64(sha512->digest, sha512->digest, WC_SHA512_DIGEST_SIZE); +#endif + + return 0; +} + +#ifdef WOLFSSL_SHA512 + +int wc_Sha512FinalRaw(wc_Sha512* sha512, byte* hash) +{ +#ifdef LITTLE_ENDIAN_ORDER + word64 digest[WC_SHA512_DIGEST_SIZE / sizeof(word64)]; +#endif + + if (sha512 == NULL || hash == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef LITTLE_ENDIAN_ORDER + ByteReverseWords64((word64*)digest, (word64*)sha512->digest, + WC_SHA512_DIGEST_SIZE); + XMEMCPY(hash, digest, WC_SHA512_DIGEST_SIZE); +#else + XMEMCPY(hash, sha512->digest, WC_SHA512_DIGEST_SIZE); +#endif + + return 0; +} + +int wc_Sha512Final(wc_Sha512* sha512, byte* hash) +{ + int ret; + + if (sha512 == NULL || hash == NULL) { + return BAD_FUNC_ARG; + } + + ret = Sha512Final(sha512); + if (ret != 0) + return ret; + + XMEMCPY(hash, sha512->digest, WC_SHA512_DIGEST_SIZE); + + return InitSha512(sha512); /* reset state */ +} + +int wc_InitSha512(wc_Sha512* sha512) +{ + return wc_InitSha512_ex(sha512, NULL, INVALID_DEVID); +} + +void wc_Sha512Free(wc_Sha512* sha512) +{ + if (sha512 == NULL) + return; + +#ifdef WOLFSSL_SMALL_STACK_CACHE + if (sha512->W != NULL) { + XFREE(sha512->W, NULL, DYNAMIC_TYPE_TMP_BUFFER); + sha512->W = NULL; + } +#endif +} + +#endif /* WOLFSSL_SHA512 */ + +/* -------------------------------------------------------------------------- */ +/* SHA384 */ +/* -------------------------------------------------------------------------- */ +#ifdef WOLFSSL_SHA384 + +static int InitSha384(wc_Sha384* sha384) +{ + if (sha384 == NULL) { + return BAD_FUNC_ARG; + } + + sha384->digest[0] = W64LIT(0xcbbb9d5dc1059ed8); + sha384->digest[1] = W64LIT(0x629a292a367cd507); + sha384->digest[2] = W64LIT(0x9159015a3070dd17); + sha384->digest[3] = W64LIT(0x152fecd8f70e5939); + sha384->digest[4] = W64LIT(0x67332667ffc00b31); + sha384->digest[5] = W64LIT(0x8eb44a8768581511); + sha384->digest[6] = W64LIT(0xdb0c2e0d64f98fa7); + sha384->digest[7] = W64LIT(0x47b5481dbefa4fa4); + + sha384->buffLen = 0; + sha384->loLen = 0; + sha384->hiLen = 0; +#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB) + sha384->flags = 0; +#endif + + return 0; +} + +int wc_Sha384Update(wc_Sha384* sha384, const byte* data, word32 len) +{ + if (sha384 == NULL || (data == NULL && len > 0)) { + return BAD_FUNC_ARG; + } + + return Sha512Update((wc_Sha512*)sha384, data, len); +} + + +int wc_Sha384FinalRaw(wc_Sha384* sha384, byte* hash) +{ +#ifdef LITTLE_ENDIAN_ORDER + word64 digest[WC_SHA384_DIGEST_SIZE / sizeof(word64)]; +#endif + + if (sha384 == NULL || hash == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef LITTLE_ENDIAN_ORDER + ByteReverseWords64((word64*)digest, (word64*)sha384->digest, + WC_SHA384_DIGEST_SIZE); + XMEMCPY(hash, digest, WC_SHA384_DIGEST_SIZE); +#else + XMEMCPY(hash, sha384->digest, WC_SHA384_DIGEST_SIZE); +#endif + + return 0; +} + +int wc_Sha384Final(wc_Sha384* sha384, byte* hash) +{ + int ret; + + if (sha384 == NULL || hash == NULL) { + return BAD_FUNC_ARG; + } + + ret = Sha512Final((wc_Sha512*)sha384); + if (ret != 0) + return ret; + + XMEMCPY(hash, sha384->digest, WC_SHA384_DIGEST_SIZE); + + return InitSha384(sha384); /* reset state */ +} + +int wc_InitSha384_ex(wc_Sha384* sha384, void* heap, int devId) +{ + int ret; + + if (sha384 == NULL) { + return BAD_FUNC_ARG; + } + + sha384->heap = heap; + ret = InitSha384(sha384); + if (ret != 0) + return ret; + +#ifdef WOLFSSL_SMALL_STACK_CACHE + sha384->W = NULL; +#endif + + (void)devId; + + return ret; +} + +int wc_InitSha384(wc_Sha384* sha384) +{ + return wc_InitSha384_ex(sha384, NULL, INVALID_DEVID); +} + +void wc_Sha384Free(wc_Sha384* sha384) +{ + if (sha384 == NULL) + return; + +#ifdef WOLFSSL_SMALL_STACK_CACHE + if (sha384->W != NULL) { + XFREE(sha384->W, NULL, DYNAMIC_TYPE_TMP_BUFFER); + sha384->W = NULL; + } +#endif +} + +#endif /* WOLFSSL_SHA384 */ + +#ifdef WOLFSSL_SHA512 + +int wc_Sha512GetHash(wc_Sha512* sha512, byte* hash) +{ + int ret; + wc_Sha512 tmpSha512; + + if (sha512 == NULL || hash == NULL) + return BAD_FUNC_ARG; + + ret = wc_Sha512Copy(sha512, &tmpSha512); + if (ret == 0) { + ret = wc_Sha512Final(&tmpSha512, hash); + wc_Sha512Free(&tmpSha512); + } + return ret; +} + +int wc_Sha512Copy(wc_Sha512* src, wc_Sha512* dst) +{ + int ret = 0; + + if (src == NULL || dst == NULL) + return BAD_FUNC_ARG; + + XMEMCPY(dst, src, sizeof(wc_Sha512)); +#ifdef WOLFSSL_SMALL_STACK_CACHE + dst->W = NULL; +#endif + +#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB) + dst->flags |= WC_HASH_FLAG_ISCOPY; +#endif + + return ret; +} + +#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB) +int wc_Sha512SetFlags(wc_Sha512* sha512, word32 flags) +{ + if (sha512) { + sha512->flags = flags; + } + return 0; +} +int wc_Sha512GetFlags(wc_Sha512* sha512, word32* flags) +{ + if (sha512 && flags) { + *flags = sha512->flags; + } + return 0; +} +#endif + +#endif /* WOLFSSL_SHA512 */ + +#ifdef WOLFSSL_SHA384 + +int wc_Sha384GetHash(wc_Sha384* sha384, byte* hash) +{ + int ret; + wc_Sha384 tmpSha384; + + if (sha384 == NULL || hash == NULL) + return BAD_FUNC_ARG; + ret = wc_Sha384Copy(sha384, &tmpSha384); + if (ret == 0) { + ret = wc_Sha384Final(&tmpSha384, hash); + wc_Sha384Free(&tmpSha384); + } + return ret; +} +int wc_Sha384Copy(wc_Sha384* src, wc_Sha384* dst) +{ + int ret = 0; + + if (src == NULL || dst == NULL) + return BAD_FUNC_ARG; + + XMEMCPY(dst, src, sizeof(wc_Sha384)); +#ifdef WOLFSSL_SMALL_STACK_CACHE + dst->W = NULL; +#endif + +#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB) + dst->flags |= WC_HASH_FLAG_ISCOPY; +#endif + + return ret; +} + +#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB) +int wc_Sha384SetFlags(wc_Sha384* sha384, word32 flags) +{ + if (sha384) { + sha384->flags = flags; + } + return 0; +} +int wc_Sha384GetFlags(wc_Sha384* sha384, word32* flags) +{ + if (sha384 && flags) { + *flags = sha384->flags; + } + return 0; +} +#endif + +#endif /* WOLFSSL_SHA384 */ + +#endif /* WOLFSSL_SHA512 || WOLFSSL_SHA384 */ +#endif /* WOLFSSL_ARMASM */ diff --git a/client/wolfssl/wolfcrypt/src/port/arm/cryptoCell.c b/client/wolfssl/wolfcrypt/src/port/arm/cryptoCell.c new file mode 100644 index 0000000..c3bd2d9 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/arm/cryptoCell.c @@ -0,0 +1,309 @@ +/* cryptoCell.c + * + * Copyright (C) 2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +/* This source is included in wc_port.c */ +/* WOLFSSL_CRYPTOCELL_C is defined by wc_port.c in case compile tries to + include this .c directly */ +#ifdef WOLFSSL_CRYPTOCELL_C + +#ifdef WOLFSSL_CRYPTOCELL + +#include +#include +#include +#include + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +/* Global Variables (extern) */ +CRYS_RND_State_t wc_rndState; +CRYS_RND_WorkBuff_t wc_rndWorkBuff; +SaSiRndGenerateVectWorkFunc_t wc_rndGenVectFunc = CRYS_RND_GenerateVector; + +static word32 cc310_enableCount = 0; + +static void cc310_enable(void) +{ + cc310_enableCount++; + + /* Enable the CC310 HW/IQ once*/ + + NRF_CRYPTOCELL->ENABLE = 1; + NVIC_EnableIRQ(CRYPTOCELL_IRQn); +} + +static void cc310_disable(void) +{ + cc310_enableCount--; + + /* Disable HW/IRQ if no more users */ + if (cc310_enableCount == 0) { + NRF_CRYPTOCELL->ENABLE = 0; + NVIC_DisableIRQ(CRYPTOCELL_IRQn); + } +} + +int cc310_Init(void) +{ + int ret = 0; + static int initialized = 0; + + if (!initialized) { + /* Enable the CC310 HW. */ + cc310_enable(); + + /*Initialize the CC310 run-time library*/ + ret = SaSi_LibInit(); + + if (ret != SA_SILIB_RET_OK) { + WOLFSSL_MSG("Error SaSi_LibInit"); + return ret; + } + + /* RNG CryptoCell CC310 */ + ret = CRYS_RndInit(&wc_rndState, &wc_rndWorkBuff); + if (ret != CRYS_OK) { + WOLFSSL_MSG("Error CRYS_RndInit"); + return ret; + } + initialized = 1; + } + return ret; +} + +void cc310_Free(void) +{ + CRYSError_t crys_result; + + SaSi_LibFini(); + + crys_result = CRYS_RND_UnInstantiation(&wc_rndState); + + if (crys_result != CRYS_OK) { + WOLFSSL_MSG("Error RYS_RND_UnInstantiation"); + } + cc310_disable(); +} + +int cc310_random_generate(byte* output, word32 size) +{ + CRYSError_t crys_result; + + crys_result = CRYS_RND_GenerateVector(&wc_rndState, size, output); + + return (crys_result == CRYS_OK) ? 0 : -1; +} +#ifdef HAVE_ECC +CRYS_ECPKI_DomainID_t cc310_mapCurve(int curve_id) +{ + switch(curve_id) + { + case ECC_CURVE_DEF: return CRYS_ECPKI_DomainID_secp256r1; /* default */ + case ECC_SECP160K1: return CRYS_ECPKI_DomainID_secp160k1; + case ECC_SECP160R1: return CRYS_ECPKI_DomainID_secp160r1; + case ECC_SECP160R2: return CRYS_ECPKI_DomainID_secp160r2; + case ECC_SECP192K1: return CRYS_ECPKI_DomainID_secp192k1; + case ECC_SECP192R1: return CRYS_ECPKI_DomainID_secp192r1; + case ECC_SECP224K1: return CRYS_ECPKI_DomainID_secp224k1; + case ECC_SECP224R1: return CRYS_ECPKI_DomainID_secp224r1; + case ECC_SECP256K1: return CRYS_ECPKI_DomainID_secp256k1; + case ECC_SECP256R1: return CRYS_ECPKI_DomainID_secp256r1; + case ECC_SECP384R1: return CRYS_ECPKI_DomainID_secp384r1; + case ECC_SECP521R1: return CRYS_ECPKI_DomainID_secp521r1; + default: WOLFSSL_MSG("Curve not identified"); + return CRYS_ECPKI_DomainID_Builded; + } +} +#endif /* HAVE_ECC */ + +#ifndef NO_RSA +CRYS_RSA_HASH_OpMode_t cc310_hashModeRSA(enum wc_HashType hash_type, int isHashed) +{ + switch(hash_type) + { + case WC_HASH_TYPE_MD5: + #ifndef NO_MD5 + return isHashed? CRYS_RSA_After_MD5_mode : CRYS_RSA_HASH_MD5_mode; + #endif + case WC_HASH_TYPE_SHA: + #ifndef NO_SHA + return isHashed? CRYS_RSA_After_SHA1_mode : CRYS_RSA_HASH_SHA1_mode; + #endif + case WC_HASH_TYPE_SHA224: + #ifdef WOLFSSL_SHA224 + return isHashed? CRYS_RSA_After_SHA224_mode : CRYS_RSA_HASH_SHA224_mode; + #endif + case WC_HASH_TYPE_SHA256: + #ifndef NO_SHA256 + return isHashed? CRYS_RSA_After_SHA256_mode : CRYS_RSA_HASH_SHA256_mode; + #endif + case WC_HASH_TYPE_SHA384: + #ifdef WOLFSSL_SHA384 + return isHashed? CRYS_RSA_After_SHA384_mode : CRYS_RSA_HASH_SHA384_mode; + #endif + case WC_HASH_TYPE_SHA512: + #ifdef WOLFSSL_SHA512 + return isHashed? CRYS_RSA_After_SHA512_mode : CRYS_RSA_HASH_SHA512_mode; + #endif + case WC_HASH_TYPE_NONE: + /* default to SHA256 */ + return isHashed? CRYS_RSA_After_SHA256_mode : CRYS_RSA_HASH_SHA256_mode; + default: + return CRYS_RSA_After_HASH_NOT_KNOWN_mode; + } +} +#endif /* !NO_RSA */ + +#ifdef HAVE_ECC +CRYS_ECPKI_HASH_OpMode_t cc310_hashModeECC(int hash_size) +{ + CRYS_ECPKI_HASH_OpMode_t hash_mode; + switch (hash_size) + { + case 20: + hash_mode = CRYS_ECPKI_AFTER_HASH_SHA1_mode; + break; + case 28: + hash_mode = CRYS_ECPKI_AFTER_HASH_SHA224_mode; + break; + case 32: + hash_mode = CRYS_ECPKI_AFTER_HASH_SHA256_mode; + break; + case 48: + hash_mode = CRYS_ECPKI_AFTER_HASH_SHA384_mode; + break; + case 64: + hash_mode = CRYS_ECPKI_AFTER_HASH_SHA512_mode; + break; + default: + hash_mode = CRYS_ECPKI_HASH_OpModeLast; + break; + } + return hash_mode; +} +#endif /* HAVE_ECC */ +#endif /* WOLFSSL_CRYPTOCELL*/ + +#if !defined(NO_CRYPT_BENCHMARK) && defined(WOLFSSL_nRF5x_SDK_15_2) + +static int mRtcSec = 0; +static const nrfx_rtc_t rtc = NRFX_RTC_INSTANCE(0); + +static void rtc_handler(nrfx_rtc_int_type_t int_type) +{ + if (int_type == NRFX_RTC_INT_COMPARE0) { + mRtcSec++; + nrfx_rtc_counter_clear(&rtc); + nrfx_rtc_int_enable(&rtc, RTC_CHANNEL_INT_MASK(0)); +#ifdef BSP_LED_1 + nrf_gpio_pin_toggle(BSP_LED_1); +#endif + } + else if (int_type == NRF_DRV_RTC_INT_TICK) { +#ifdef BSP_LED_0 + nrf_gpio_pin_toggle(BSP_LED_0); +#endif + } +} + +static void rtc_config(void) +{ + uint32_t err_code; + nrfx_rtc_config_t config = NRFX_RTC_DEFAULT_CONFIG; + + /* configure gpio for pin toggling. */ + bsp_board_init(BSP_INIT_LEDS); + + /* start the internal LFCLK XTAL oscillator.*/ + err_code = nrf_drv_clock_init(); + APP_ERROR_CHECK(err_code); + nrf_drv_clock_lfclk_request(NULL); + + /* Initialize RTC instance */ + err_code = nrfx_rtc_init(&rtc, &config, rtc_handler); + APP_ERROR_CHECK(err_code); + + /* Enable tick event */ + nrfx_rtc_tick_enable(&rtc, false); + + /* Set compare channel to trigger interrupt after 1 seconds */ + err_code = nrfx_rtc_cc_set(&rtc, 0, RTC_INPUT_FREQ, true); + APP_ERROR_CHECK(err_code); + + /* Power on RTC instance */ + nrfx_rtc_enable(&rtc); +} + +static int rtc_get_ms(void) +{ + /* Prescaler is 12-bit for COUNTER: frequency = (32768/(PRESCALER+1)) */ + int frequency = (RTC_INPUT_FREQ / (rtc_prescaler_get(rtc.p_reg) + 1)); + uint32_t counter = nrfx_rtc_counter_get(&rtc); + + /* Convert with rounding frequency to milliseconds */ + return ((counter * 1000) + (frequency / 2) ) / frequency; +} + +double current_time(int reset) +{ + double time; + static int initialized = 0; + + if (!initialized) { + rtc_config(); + initialized = 1; + } + time = mRtcSec; + time += (double)rtc_get_ms() / 1000; + + return time; +} + +int nrf_random_generate(byte* output, word32 size) +{ + uint32_t err_code; + static int initialized = 0; + + /* RNG must be initialized once */ + if (!initialized) { + err_code = nrf_drv_rng_init(NULL); + if (err_code != NRF_SUCCESS) { + return -1; + } + initialized = 1; + } + nrf_drv_rng_block_rand(output, size); + return 0; +} +#endif /* !NO_CRYPT_BENCHMARK && WOLFSSL_nRF5x_SDK_15_2 */ + +#endif /* WOLFSSL_CRYPTOCELL_C */ diff --git a/client/wolfssl/wolfcrypt/src/port/arm/cryptoCellHash.c b/client/wolfssl/wolfcrypt/src/port/arm/cryptoCellHash.c new file mode 100644 index 0000000..bc729f7 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/arm/cryptoCellHash.c @@ -0,0 +1,134 @@ +/* cryptoCellHash.c + * + * Copyright (C) 2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +/* This source is included in wc_port.c */ +/* WOLFSSL_CRYPTOCELL_HASH_C is defined by wc_port.c in case compile tries + to include this .c directly */ +#ifdef WOLFSSL_CRYPTOCELL_HASH_C +#if !defined(NO_SHA256) && defined(WOLFSSL_CRYPTOCELL) + +#include +#include +#include +#include + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId) +{ + CRYSError_t ret = 0; + + (void)heap; + (void)devId; + + if (sha256 == NULL) + return BAD_FUNC_ARG; + + XMEMSET(sha256->digest, 0, sizeof(sha256->digest)); + + /* initializes the HASH context and machine to the supported mode.*/ + ret = CRYS_HASH_Init(&sha256->ctx, CRYS_HASH_SHA256_mode); + + if (ret != SA_SILIB_RET_OK){ + WOLFSSL_MSG("Error CRYS_HASH_Init failed"); + } + + return ret; +} + +int wc_InitSha256(Sha256* sha256) +{ + return wc_InitSha256_ex(sha256, NULL, INVALID_DEVID); +} + +int wc_Sha256Update(wc_Sha256* sha256, const byte* data, word32 len) +{ + CRYSError_t ret = 0; + size_t length; + size_t remaining = len; + byte const * p_cur = data; + + if (sha256 == NULL || (data == NULL && len > 0)) { + return BAD_FUNC_ARG; + } + + if (data == NULL && len == 0) { + /* valid, but do nothing */ + return 0; + } + + /* If the input is larger than CC310_MAX_LENGTH_DMA, split into smaller */ + do { + length = (remaining > CC310_MAX_LENGTH_DMA) ? + CC310_MAX_LENGTH_DMA : remaining; + + ret = CRYS_HASH_Update(&sha256->ctx, (uint8_t *)p_cur, length); + + remaining -= length; + p_cur += length; + + } while (ret == CRYS_OK && remaining > 0); + + return ret; +} + +int wc_Sha256Final(wc_Sha256* sha256, byte* hash) +{ + CRYSError_t ret = 0; + CRYS_HASH_Result_t hashResult; + + if (sha256 == NULL || hash == NULL) { + return BAD_FUNC_ARG; + } + + ret = CRYS_HASH_Finish(&sha256->ctx, hashResult); + + if (ret != SA_SILIB_RET_OK){ + WOLFSSL_MSG("Error CRYS_HASH_Finish failed"); + return ret; + } + XMEMCPY(sha256->digest, hashResult, WC_SHA256_DIGEST_SIZE); + + XMEMCPY(hash, sha256->digest, WC_SHA256_DIGEST_SIZE); + + /* reset state */ + return wc_InitSha256_ex(sha256, NULL, INVALID_DEVID); +} + +void wc_Sha256Free(wc_Sha256* sha256) +{ + if (sha256 == NULL) + return; +} + +#endif /* !NO_SHA256 && WOLFSSL_CRYPTOCELL */ +#endif /* WOLFSSL_CRYPTOCELL_HASH_C */ diff --git a/client/wolfssl/wolfcrypt/src/port/atmel/README.md b/client/wolfssl/wolfcrypt/src/port/atmel/README.md new file mode 100644 index 0000000..50352fc --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/atmel/README.md @@ -0,0 +1,94 @@ +# Microchip/Atmel ATECC508A/ATECC608A Support + +Support for ATECC508A using these methods: +* TLS: Using the PK callbacks and reference ATECC508A callbacks. See Coding section below. Requires options `HAVE_PK_CALLBACKS` and `WOLFSSL_ATECC_PKCB or WOLFSSL_ATECC508A` +* wolfCrypt: Native wc_ecc_* API's using the `./configure CFLAGS="-DWOLFSSL_ATECC508A"` or `#define WOLFSSL_ATECC508A`. + +## Dependency + +Requires the Microchip CryptoAuthLib. The examples in `wolfcrypt/src/port/atmel/atmel.c` make calls to the `atcatls_*` API's. + + +## Building + +### Build Options + +* `HAVE_PK_CALLBACKS`: Option for enabling wolfSSL's PK callback support for TLS. +* `WOLFSSL_ATECC508A`: Enables support for initializing the CryptoAuthLib and setting up the encryption key used for the I2C communication. +* `WOLFSSL_ATECC_PKCB`: Enables support for the reference PK callbacks without init. +* `WOLFSSL_ATMEL`: Enables ASF hooks seeding random data using the `atmel_get_random_number` function. +* `WOLFSSL_ATMEL_TIME`: Enables the built-in `atmel_get_curr_time_and_date` function get getting time from ASF RTC. +* `ATECC_GET_ENC_KEY`: Macro to define your own function for getting the encryption key. +* `ATECC_SLOT_I2C_ENC`: Macro for the default encryption key slot. Can also get via the slot callback with `ATMEL_SLOT_ENCKEY`. +* `ATECC_MAX_SLOT`: Macro for the maximum dynamically allocated slots. + +### Build Command Examples + +`./configure --enable-pkcallbacks CFLAGS="-DWOLFSSL_ATECC_PKCB"` +`#define HAVE_PK_CALLBACKS` +`#define WOLFSSL_ATECC_PKCB` + +or + +`./configure CFLAGS="-DWOLFSSL_ATECC508A"` +`#define WOLFSSL_ATECC508A` + + +## Coding + +Setup the PK callbacks for TLS using: + +``` +/* Setup PK Callbacks for ATECC508A */ +WOLFSSL_CTX* ctx; +wolfSSL_CTX_SetEccKeyGenCb(ctx, atcatls_create_key_cb); +wolfSSL_CTX_SetEccVerifyCb(ctx, atcatls_verify_signature_cb); +wolfSSL_CTX_SetEccSignCb(ctx, atcatls_sign_certificate_cb); +wolfSSL_CTX_SetEccSharedSecretCb(ctx, atcatls_create_pms_cb); +``` + +The reference ATECC508A PK callback functions are located in the `wolfcrypt/src/port/atmel/atmel.c` file. + + +Adding a custom context to the callbacks: + +``` +/* Setup PK Callbacks context */ +WOLFSSL* ssl; +void* myOwnCtx; +wolfSSL_SetEccKeyGenCtx(ssl, myOwnCtx); +wolfSSL_SetEccVerifyCtx(ssl, myOwnCtx); +wolfSSL_SetEccSignCtx(ssl, myOwnCtx); +wolfSSL_SetEccSharedSecretCtx(ssl, myOwnCtx); +``` + +## Benchmarks + +Supports ECC SECP256R1 (NIST P-256) + +### TLS + +TLS Establishment Times: + +* Hardware accelerated ATECC508A: 2.342 seconds average +* Software only: 13.422 seconds average + +The TLS connection establishment time is 5.73 times faster with the ATECC508A. + +### Cryptographic ECC + +Software only implementation (SAMD21 48Mhz Cortex-M0, Fast Math TFM-ASM): + +`EC-DHE key generation 3123.000 milliseconds, avg over 5 iterations, 1.601 ops/sec` +`EC-DHE key agreement 3117.000 milliseconds, avg over 5 iterations, 1.604 ops/sec` +`EC-DSA sign time 1997.000 milliseconds, avg over 5 iterations, 2.504 ops/sec` +`EC-DSA verify time 5057.000 milliseconds, avg over 5 iterations, 0.988 ops/sec` + +ATECC508A HW accelerated implementation: +`EC-DHE key generation 144.400 milliseconds, avg over 5 iterations, 34.722 ops/sec` +`EC-DHE key agreement 134.200 milliseconds, avg over 5 iterations, 37.313 ops/sec` +`EC-DSA sign time 293.400 milliseconds, avg over 5 iterations, 17.065 ops/sec` +`EC-DSA verify time 208.400 milliseconds, avg over 5 iterations, 24.038 ops/sec` + + +For details see our [wolfSSL Atmel ATECC508A](https://wolfssl.com/wolfSSL/wolfssl-atmel.html) page. diff --git a/client/wolfssl/wolfcrypt/src/port/atmel/atmel.c b/client/wolfssl/wolfcrypt/src/port/atmel/atmel.c new file mode 100644 index 0000000..04d2aeb --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/atmel/atmel.c @@ -0,0 +1,843 @@ +/* atmel.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#if defined(WOLFSSL_ATMEL) || defined(WOLFSSL_ATECC508A) || defined(WOLFSSL_ATECC_PKCB) + +#include +#include +#include +#include + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#ifdef WOLFSSL_ATMEL +/* remap name conflicts */ +#define Aes Aes_Remap +#define Gmac Gmac_Remap +#include "asf.h" +#undef Aes +#undef Gmac +#endif /* WOLFSSL_ATMEL */ + +#include + +#ifdef WOLFSSL_ATECC508A + +#ifdef WOLFSSL_ATECC508A_TLS + extern ATCA_STATUS device_init_default(void); +#endif + +static int mAtcaInitDone = 0; + +/* ATECC slotId handling */ +static atmel_slot_alloc_cb mSlotAlloc; +static atmel_slot_dealloc_cb mSlotDealloc; +static byte mSlotList[ATECC_MAX_SLOT]; +#ifndef SINGLE_THREADED +static wolfSSL_Mutex mSlotMutex; +#endif + +/* Raspberry Pi uses /dev/i2c-1 */ +#ifndef ATECC_I2C_ADDR +#define ATECC_I2C_ADDR 0xC0 +#endif +#ifndef ATECC_I2C_BUS +#define ATECC_I2C_BUS 1 +#endif +#ifndef ATECC_DEV_TYPE +#define ATECC_DEV_TYPE ATECC508A +#endif +static ATCAIfaceCfg cfg_ateccx08a_i2c_pi; +#endif /* WOLFSSL_ATECC508A */ + + +/** + * \brief Generate random number to be used for hash. + */ +int atmel_get_random_number(uint32_t count, uint8_t* rand_out) +{ + int ret = 0; +#ifdef WOLFSSL_ATECC508A + uint8_t i = 0; + uint32_t copy_count = 0; + uint8_t rng_buffer[RANDOM_NUM_SIZE]; + + if (rand_out == NULL) { + return -1; + } + + while (i < count) { + ret = atcab_random(rng_buffer); + if (ret != ATCA_SUCCESS) { + WOLFSSL_MSG("Failed to create random number!"); + return -1; + } + copy_count = (count - i > RANDOM_NUM_SIZE) ? RANDOM_NUM_SIZE : count - i; + XMEMCPY(&rand_out[i], rng_buffer, copy_count); + i += copy_count; + } + #ifdef ATCAPRINTF + atcab_printbin_label((const char*)"\r\nRandom Number", rand_out, count); + #endif +#else + /* TODO: Use on-board TRNG */ +#endif + return ret; +} + +int atmel_get_random_block(unsigned char* output, unsigned int sz) +{ + return atmel_get_random_number((uint32_t)sz, (uint8_t*)output); +} + +#if defined(WOLFSSL_ATMEL) && defined(WOLFSSL_ATMEL_TIME) +#include "asf.h" +#include "rtc_calendar.h" +extern struct rtc_module *_rtc_instance[RTC_INST_NUM]; + +long atmel_get_curr_time_and_date(long* tm) +{ + long rt = 0; + + /* Get current time */ + struct rtc_calendar_time rtcTime; + const int monthDay[] = {0,31,59,90,120,151,181,212,243,273,304,334}; + int month, year, yearLeap; + + rtc_calendar_get_time(_rtc_instance[0], &rtcTime); + + /* Convert rtc_calendar_time to seconds since UTC */ + month = rtcTime.month % 12; + year = rtcTime.year + rtcTime.month / 12; + if (month < 0) { + month += 12; + year--; + } + yearLeap = (month > 1) ? year + 1 : year; + rt = rtcTime.second + + 60 * (rtcTime.minute + + 60 * (rtcTime.hour + + 24 * (monthDay[month] + rtcTime.day - 1 + + 365 * (year - 70) + + (yearLeap - 69) / 4 + - (yearLeap - 1) / 100 + + (yearLeap + 299) / 400 + ) + ) + ); + + (void)tm; + return rt; +} +#endif + + +#ifdef WOLFSSL_ATECC508A + +int atmel_ecc_translate_err(int status) +{ + switch (status) { + case ATCA_SUCCESS: + return 0; + case ATCA_BAD_PARAM: + return BAD_FUNC_ARG; + case ATCA_ALLOC_FAILURE: + return MEMORY_E; + default: + #ifdef WOLFSSL_ATECC508A_DEBUG + printf("ATECC Failure: %x\n", (word32)status); + #endif + break; + } + return WC_HW_E; +} + +/* Function to set the slotId allocator and deallocator */ +int atmel_set_slot_allocator(atmel_slot_alloc_cb alloc, + atmel_slot_dealloc_cb dealloc) +{ +#ifndef SINGLE_THREADED + wc_LockMutex(&mSlotMutex); +#endif + mSlotAlloc = alloc; + mSlotDealloc = dealloc; +#ifndef SINGLE_THREADED + wc_UnLockMutex(&mSlotMutex); +#endif + return 0; +} + +/* Function to allocate new slotId number */ +int atmel_ecc_alloc(int slotType) +{ + int slotId = ATECC_INVALID_SLOT, i; + +#ifndef SINGLE_THREADED + wc_LockMutex(&mSlotMutex); +#endif + + if (mSlotAlloc) { + slotId = mSlotAlloc(slotType); + } + else { + switch (slotType) { + case ATMEL_SLOT_ENCKEY: + /* not reserved in mSlotList, so return */ + slotId = ATECC_SLOT_I2C_ENC; + goto exit; + case ATMEL_SLOT_DEVICE: + /* not reserved in mSlotList, so return */ + slotId = ATECC_SLOT_AUTH_PRIV; + goto exit; + case ATMEL_SLOT_ECDHE: + slotId = ATECC_SLOT_ECDHE_PRIV; + break; + case ATMEL_SLOT_ECDHE_ENC: + slotId = ATECC_SLOT_ENC_PARENT; + break; + case ATMEL_SLOT_ANY: + for (i=0; i < ATECC_MAX_SLOT; i++) { + /* Find free slotId */ + if (mSlotList[i] == ATECC_INVALID_SLOT) { + slotId = i; + break; + } + } + break; + } + + /* is slot available */ + if (mSlotList[slotId] != ATECC_INVALID_SLOT) { + slotId = ATECC_INVALID_SLOT; + } + else { + mSlotList[slotId] = slotId; + } + } + +exit: +#ifndef SINGLE_THREADED + wc_UnLockMutex(&mSlotMutex); +#endif + + return slotId; +} + + +/* Function to return slotId number to available list */ +void atmel_ecc_free(int slotId) +{ +#ifndef SINGLE_THREADED + wc_LockMutex(&mSlotMutex); +#endif + if (mSlotDealloc) { + mSlotDealloc(slotId); + } + else if (slotId >= 0 && slotId < ATECC_MAX_SLOT) { + if (slotId != ATECC_SLOT_AUTH_PRIV && slotId != ATECC_SLOT_I2C_ENC) { + /* Mark slotId free */ + mSlotList[slotId] = ATECC_INVALID_SLOT; + } + } +#ifndef SINGLE_THREADED + wc_UnLockMutex(&mSlotMutex); +#endif +} + + +/** + * \brief Callback function for getting the current encryption key + */ +int atmel_get_enc_key_default(byte* enckey, word16 keysize) +{ + if (enckey == NULL || keysize != ATECC_KEY_SIZE) { + return BAD_FUNC_ARG; + } + + XMEMSET(enckey, 0xFF, keysize); /* use default value */ + + return 0; +} + +/** + * \brief Write enc key before. + */ +static int atmel_init_enc_key(void) +{ + int ret; + uint8_t read_key[ATECC_KEY_SIZE]; + uint8_t writeBlock = 0; + uint8_t writeOffset = 0; + int slotId; + + slotId = atmel_ecc_alloc(ATMEL_SLOT_ENCKEY); + + /* check for encryption key slotId */ + if (slotId == ATECC_INVALID_SLOT) + return BAD_FUNC_ARG; + + /* get encryption key */ + ATECC_GET_ENC_KEY(read_key, sizeof(read_key)); + + ret = atcab_write_zone(ATCA_ZONE_DATA, slotId, writeBlock, writeOffset, + read_key, ATCA_BLOCK_SIZE); + ForceZero(read_key, sizeof(read_key)); + ret = atmel_ecc_translate_err(ret); + + return ret; +} + +int atmel_get_rev_info(word32* revision) +{ + int ret; + ret = atcab_info((uint8_t*)revision); + ret = atmel_ecc_translate_err(ret); + return ret; +} + +void atmel_show_rev_info(void) +{ +#ifdef WOLFSSL_ATECC508A_DEBUG + word32 revision = 0; + atmel_get_rev_info(&revision); + printf("ATECC508A Revision: %x\n", (word32)revision); +#endif +} + +int atmel_ecc_create_pms(int slotId, const uint8_t* peerKey, uint8_t* pms) +{ + int ret; + uint8_t read_key[ATECC_KEY_SIZE]; + int slotIdEnc; + + slotIdEnc = atmel_ecc_alloc(ATMEL_SLOT_ECDHE_ENC); + if (slotIdEnc == ATECC_INVALID_SLOT) + return BAD_FUNC_ARG; + + /* get encryption key */ + ATECC_GET_ENC_KEY(read_key, sizeof(read_key)); + + /* send the encrypted version of the ECDH command */ + ret = atcab_ecdh_enc(slotId, peerKey, pms, read_key, slotIdEnc); + ret = atmel_ecc_translate_err(ret); + + /* free the ECDHE slot */ + atmel_ecc_free(slotIdEnc); + + return ret; +} + +int atmel_ecc_create_key(int slotId, byte* peerKey) +{ + int ret; + + /* verify provided slotId */ + if (slotId == ATECC_INVALID_SLOT) { + return WC_HW_WAIT_E; + } + + /* generate new ephemeral key on device */ + ret = atcab_genkey(slotId, peerKey); + ret = atmel_ecc_translate_err(ret); + return ret; +} + +int atmel_ecc_sign(int slotId, const byte* message, byte* signature) +{ + int ret; + + ret = atcab_sign(slotId, message, signature); + ret = atmel_ecc_translate_err(ret); + return ret; +} + +int atmel_ecc_verify(const byte* message, const byte* signature, + const byte* pubkey, int* verified) +{ + int ret; + + ret = atcab_verify_extern(message, signature, pubkey, (bool*)verified); + ret = atmel_ecc_translate_err(ret); + return ret; +} + +#endif /* WOLFSSL_ATECC508A */ + + + +int atmel_init(void) +{ + int ret = 0; + +#ifdef WOLFSSL_ATECC508A + if (!mAtcaInitDone) { + ATCA_STATUS status; + int i; + + #ifndef SINGLE_THREADED + wc_InitMutex(&mSlotMutex); + #endif + + /* Init the free slotId list */ + for (i=0; islot = slotId; + } + else { + atmel_ecc_free(slotId); + #ifdef WOLFSSL_ATECC508A_DEBUG + printf("atcatls_create_key_cb: ret %d\n", ret); + #endif + } + } + else { + #ifndef WOLFSSL_ATECC508A_NOSOFTECC + /* use software for non P-256 cases */ + WC_RNG rng; + ret = wc_InitRng(&rng); + if (ret == 0) { + ret = wc_ecc_make_key_ex(&rng, keySz, key, ecc_curve); + wc_FreeRng(&rng); + } + #else + ret = NOT_COMPILED_IN; + #endif /* !WOLFSSL_ATECC508A_NOSOFTECC */ + } + return ret; +} + +/** + * \brief Creates a shared secret using a peer public key and a device key + */ +int atcatls_create_pms_cb(WOLFSSL* ssl, ecc_key* otherKey, + unsigned char* pubKeyDer, word32* pubKeySz, + unsigned char* out, word32* outlen, + int side, void* ctx) +{ + int ret; + ecc_key tmpKey; + uint8_t peerKeyBuf[ATECC_PUBKEY_SIZE]; + uint8_t* peerKey = peerKeyBuf; + uint8_t* qx = &peerKey[0]; + uint8_t* qy = &peerKey[ATECC_PUBKEY_SIZE/2]; + word32 qxLen = ATECC_PUBKEY_SIZE/2, qyLen = ATECC_PUBKEY_SIZE/2; + + if (pubKeyDer == NULL || pubKeySz == NULL || out == NULL || outlen == NULL) { + return BAD_FUNC_ARG; + } + + (void)ssl; + (void)ctx; + (void)otherKey; + + ret = wc_ecc_init(&tmpKey); + if (ret != 0) { + return ret; + } + + /* ATECC508A only supports P-256 */ + if (otherKey->dp->id == ECC_SECP256R1) { + XMEMSET(peerKey, 0, ATECC_PUBKEY_SIZE); + + /* for client: create and export public key */ + if (side == WOLFSSL_CLIENT_END) { + int slotId = atmel_ecc_alloc(ATMEL_SLOT_ECDHE); + if (slotId == ATECC_INVALID_SLOT) + return WC_HW_WAIT_E; + tmpKey.slot = slotId; + + /* generate new ephemeral key on device */ + ret = atmel_ecc_create_key(slotId, peerKey); + if (ret != ATCA_SUCCESS) { + goto exit; + } + + /* convert raw unsigned public key to X.963 format for TLS */ + ret = wc_ecc_import_unsigned(&tmpKey, qx, qy, NULL, ECC_SECP256R1); + if (ret == 0) { + ret = wc_ecc_export_x963(&tmpKey, pubKeyDer, pubKeySz); + } + + /* export peer's key as raw unsigned for hardware */ + if (ret == 0) { + ret = wc_ecc_export_public_raw(otherKey, qx, &qxLen, qy, &qyLen); + } + } + + /* for server: import public key */ + else if (side == WOLFSSL_SERVER_END) { + tmpKey.slot = otherKey->slot; + + /* import peer's key and export as raw unsigned for hardware */ + ret = wc_ecc_import_x963_ex(pubKeyDer, *pubKeySz, &tmpKey, ECC_SECP256R1); + if (ret == 0) { + ret = wc_ecc_export_public_raw(&tmpKey, qx, &qxLen, qy, &qyLen); + } + } + else { + ret = BAD_FUNC_ARG; + } + + if (ret != 0) { + goto exit; + } + + ret = atmel_ecc_create_pms(tmpKey.slot, peerKey, out); + *outlen = ATECC_KEY_SIZE; + + #ifndef WOLFSSL_ATECC508A_NOIDLE + /* put chip into idle to prevent watchdog situation on chip */ + atcab_idle(); + #endif + + (void)qxLen; + (void)qyLen; + } + else { + #ifndef WOLFSSL_ATECC508A_NOSOFTECC + /* use software for non P-256 cases */ + ecc_key* privKey = NULL; + ecc_key* pubKey = NULL; + + /* for client: create and export public key */ + if (side == WOLFSSL_CLIENT_END) + { + WC_RNG rng; + privKey = &tmpKey; + pubKey = otherKey; + + ret = wc_InitRng(&rng); + if (ret == 0) { + ret = wc_ecc_make_key_ex(&rng, 0, privKey, otherKey->dp->id); + if (ret == 0) { + ret = wc_ecc_export_x963(privKey, pubKeyDer, pubKeySz); + } + wc_FreeRng(&rng); + } + } + /* for server: import public key */ + else if (side == WOLFSSL_SERVER_END) { + privKey = otherKey; + pubKey = &tmpKey; + + ret = wc_ecc_import_x963_ex(pubKeyDer, *pubKeySz, pubKey, + otherKey->dp->id); + } + else { + ret = BAD_FUNC_ARG; + } + + /* generate shared secret and return it */ + if (ret == 0) { + ret = wc_ecc_shared_secret(privKey, pubKey, out, outlen); + } + #else + ret = NOT_COMPILED_IN; + #endif /* !WOLFSSL_ATECC508A_NOSOFTECC */ + } + +exit: + wc_ecc_free(&tmpKey); + +#ifdef WOLFSSL_ATECC508A_DEBUG + if (ret != 0) { + printf("atcab_ecdh_enc: ret %d\n", ret); + } +#endif + + return ret; +} + + +/** + * \brief Sign received digest using private key on device + */ +int atcatls_sign_certificate_cb(WOLFSSL* ssl, const byte* in, unsigned int inSz, + byte* out, word32* outSz, const byte* key, unsigned int keySz, void* ctx) +{ + int ret; + byte sigRs[ATECC_SIG_SIZE]; + int slotId; + + (void)ssl; + (void)inSz; + (void)key; + (void)keySz; + (void)ctx; + + if (in == NULL || out == NULL || outSz == NULL) { + return BAD_FUNC_ARG; + } + + slotId = atmel_ecc_alloc(ATMEL_SLOT_DEVICE); + if (slotId == ATECC_INVALID_SLOT) + return WC_HW_WAIT_E; + + /* We can only sign with P-256 */ + ret = atmel_ecc_sign(slotId, in, sigRs); + if (ret != ATCA_SUCCESS) { + ret = WC_HW_E; goto exit; + } + +#ifndef WOLFSSL_ATECC508A_NOIDLE + /* put chip into idle to prevent watchdog situation on chip */ + atcab_idle(); +#endif + + /* Encode with ECDSA signature */ + ret = wc_ecc_rs_raw_to_sig( + &sigRs[0], ATECC_SIG_SIZE/2, + &sigRs[ATECC_SIG_SIZE/2], ATECC_SIG_SIZE/2, + out, outSz); + if (ret != 0) { + goto exit; + } + +exit: + + atmel_ecc_free(slotId); + +#ifdef WOLFSSL_ATECC508A_DEBUG + if (ret != 0) { + printf("atcatls_sign_certificate_cb: ret %d\n", ret); + } +#endif + + return ret; +} + +/** + * \brief Verify signature received from peers to prove peer's private key. + */ +int atcatls_verify_signature_cb(WOLFSSL* ssl, const byte* sig, unsigned int sigSz, + const byte* hash, unsigned int hashSz, const byte* key, unsigned int keySz, int* result, + void* ctx) +{ + int ret; + ecc_key tmpKey; + word32 idx = 0; + uint8_t peerKey[ATECC_PUBKEY_SIZE]; + uint8_t* qx = &peerKey[0]; + uint8_t* qy = &peerKey[ATECC_PUBKEY_SIZE/2]; + word32 qxLen = ATECC_PUBKEY_SIZE/2, qyLen = ATECC_PUBKEY_SIZE/2; + byte sigRs[ATECC_SIG_SIZE]; + word32 rSz = ATECC_SIG_SIZE/2; + word32 sSz = ATECC_SIG_SIZE/2; + + (void)sigSz; + (void)hashSz; + (void)ctx; + + if (ssl == NULL || key == NULL || sig == NULL || hash == NULL || result == NULL) { + return BAD_FUNC_ARG; + } + + /* import public key */ + ret = wc_ecc_init(&tmpKey); + if (ret == 0) { + ret = wc_EccPublicKeyDecode(key, &idx, &tmpKey, keySz); + } + if (ret != 0) { + goto exit; + } + + if (tmpKey.dp->id == ECC_SECP256R1) { + /* export public as unsigned bin for hardware */ + ret = wc_ecc_export_public_raw(&tmpKey, qx, &qxLen, qy, &qyLen); + wc_ecc_free(&tmpKey); + if (ret != 0) { + goto exit; + } + + /* decode the ECDSA signature */ + ret = wc_ecc_sig_to_rs(sig, sigSz, + &sigRs[0], &rSz, + &sigRs[ATECC_SIG_SIZE/2], &sSz); + if (ret != 0) { + goto exit; + } + + ret = atmel_ecc_verify(hash, sigRs, peerKey, result); + if (ret != ATCA_SUCCESS || !*result) { + ret = WC_HW_E; goto exit; + } + + #ifndef WOLFSSL_ATECC508A_NOIDLE + /* put chip into idle to prevent watchdog situation on chip */ + atcab_idle(); + #endif + } + else { + #ifndef WOLFSSL_ATECC508A_NOSOFTECC + ret = wc_ecc_verify_hash(sig, sigSz, hash, hashSz, result, &tmpKey); + #else + ret = NOT_COMPILED_IN; + #endif /* !WOLFSSL_ATECC508A_NOSOFTECC */ + } + + (void)rSz; + (void)sSz; + (void)qxLen; + (void)qyLen; + + ret = 0; /* success */ + +exit: + +#ifdef WOLFSSL_ATECC508A_DEBUG + if (ret != 0) { + printf("atcatls_verify_signature_cb: ret %d\n", ret); + } +#endif + + return ret; +} + +int atcatls_set_callbacks(WOLFSSL_CTX* ctx) +{ + wolfSSL_CTX_SetEccKeyGenCb(ctx, atcatls_create_key_cb); + wolfSSL_CTX_SetEccVerifyCb(ctx, atcatls_verify_signature_cb); + wolfSSL_CTX_SetEccSignCb(ctx, atcatls_sign_certificate_cb); + wolfSSL_CTX_SetEccSharedSecretCb(ctx, atcatls_create_pms_cb); + return 0; +} + +int atcatls_set_callback_ctx(WOLFSSL* ssl, void* user_ctx) +{ + wolfSSL_SetEccKeyGenCtx(ssl, user_ctx); + wolfSSL_SetEccVerifyCtx(ssl, user_ctx); + wolfSSL_SetEccSignCtx(ssl, user_ctx); + wolfSSL_SetEccSharedSecretCtx(ssl, user_ctx); + return 0; +} + + +#endif /* HAVE_PK_CALLBACKS */ + +#endif /* WOLFSSL_ATMEL || WOLFSSL_ATECC508A || WOLFSSL_ATECC_PKCB */ diff --git a/client/wolfssl/wolfcrypt/src/port/caam/caam_aes.c b/client/wolfssl/wolfcrypt/src/port/caam/caam_aes.c new file mode 100644 index 0000000..e00214d --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/caam/caam_aes.c @@ -0,0 +1,649 @@ +/* caam_aes.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#include + +#if defined(WOLFSSL_IMX6_CAAM) && !defined(NO_AES) && \ + !defined(NO_IMX6_CAAM_AES) + +#include +#include +#include + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#include +#include + +#if defined(WOLFSSL_CAAM_DEBUG) || defined(WOLFSSL_CAAM_PRINT) +#include +#endif + +int wc_AesSetKey(Aes* aes, const byte* key, word32 len, + const byte* iv, int dir) +{ + int ret; + + if (aes == NULL || key == NULL) { + return BAD_FUNC_ARG; + } + + if (len > 32) { + byte out[32]; /* max AES key size */ + word32 outSz; + int ret; + + if (len != 64 && len != 72 && len != 80) { + return BAD_FUNC_ARG; + } + + outSz = sizeof(out); + /* if length greater then 32 then try to unencapsulate */ + if ((ret = wc_caamOpenBlob((byte*)key, len, out, &outSz)) != 0) { + return ret; + } + + XMEMCPY((byte*)aes->key, out, outSz); + aes->keylen = outSz; + } + else { + if (len != 16 && len != 24 && len != 32) { + return BAD_FUNC_ARG; + } + + XMEMCPY((byte*)aes->key, key, len); + aes->keylen = len; + } + + switch (aes->keylen) { + case 16: aes->rounds = 10; break; + case 24: aes->rounds = 12; break; + case 32: aes->rounds = 14; break; + default: + return BAD_FUNC_ARG; + } + + if ((ret = wc_AesSetIV(aes, iv)) != 0) { + return ret; + } + +#ifdef WOLFSSL_AES_COUNTER + aes->left = 0; +#endif + + return 0; +} + + +int wc_AesCbcEncrypt(Aes* aes, byte* out, + const byte* in, word32 sz) +{ + word32 blocks; + + WOLFSSL_ENTER("wc_AesCbcEncrypt"); + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + blocks = sz / AES_BLOCK_SIZE; + + if (blocks > 0) { + Buffer buf[4]; + word32 arg[4]; + word32 keySz; + int ret; + + if (wc_AesGetKeySize(aes, &keySz) != 0) { + return BAD_FUNC_ARG; + } + + /* Set buffers for key, cipher text, and plain text */ + buf[0].BufferType = DataBuffer; + buf[0].TheAddress = (Address)aes->key; + buf[0].Length = keySz; + + buf[1].BufferType = DataBuffer; + buf[1].TheAddress = (Address)aes->reg; + buf[1].Length = AES_BLOCK_SIZE; + + buf[2].BufferType = DataBuffer; + buf[2].TheAddress = (Address)in; + buf[2].Length = blocks * AES_BLOCK_SIZE; + + buf[3].BufferType = DataBuffer | LastBuffer; + buf[3].TheAddress = (Address)out; + buf[3].Length = blocks * AES_BLOCK_SIZE; + + arg[0] = CAAM_ENC; + arg[1] = keySz; + arg[2] = blocks * AES_BLOCK_SIZE; + + if ((ret = wc_caamAddAndWait(buf, arg, CAAM_AESCBC)) != 0) { + WOLFSSL_MSG("Error with CAAM AES CBC encrypt"); + return ret; + } + } + + return 0; +} + + +int wc_AesCbcDecrypt(Aes* aes, byte* out, + const byte* in, word32 sz) +{ + word32 blocks; + + WOLFSSL_ENTER("wc_AesCbcDecrypt"); + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + blocks = sz / AES_BLOCK_SIZE; + + if (blocks > 0) { + Buffer buf[4]; + word32 arg[4]; + word32 keySz; + int ret; + + if (wc_AesGetKeySize(aes, &keySz) != 0) { + return BAD_FUNC_ARG; + } + + /* Set buffers for key, cipher text, and plain text */ + buf[0].BufferType = DataBuffer; + buf[0].TheAddress = (Address)aes->key; + buf[0].Length = keySz; + + buf[1].BufferType = DataBuffer; + buf[1].TheAddress = (Address)aes->reg; + buf[1].Length = AES_BLOCK_SIZE; + + buf[2].BufferType = DataBuffer; + buf[2].TheAddress = (Address)in; + buf[2].Length = blocks * AES_BLOCK_SIZE; + + buf[3].BufferType = DataBuffer | LastBuffer; + buf[3].TheAddress = (Address)out; + buf[3].Length = blocks * AES_BLOCK_SIZE; + + arg[0] = CAAM_DEC; + arg[1] = keySz; + arg[2] = blocks * AES_BLOCK_SIZE; + + if ((ret = wc_caamAddAndWait(buf, arg, CAAM_AESCBC)) != 0) { + WOLFSSL_MSG("Error with CAAM AES CBC decrypt"); + return ret; + } + } + + return 0; +} + +#if defined(HAVE_AES_ECB) +/* is assumed that input size is a multiple of AES_BLOCK_SIZE */ +int wc_AesEcbEncrypt(Aes* aes, byte* out, + const byte* in, word32 sz) +{ + word32 blocks; + Buffer buf[3]; + word32 arg[4]; + word32 keySz; + int ret; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + blocks = sz / AES_BLOCK_SIZE; + + if (wc_AesGetKeySize(aes, &keySz) != 0) { + return BAD_FUNC_ARG; + } + + /* Set buffers for key, cipher text, and plain text */ + buf[0].BufferType = DataBuffer; + buf[0].TheAddress = (Address)aes->key; + buf[0].Length = keySz; + + buf[1].BufferType = DataBuffer; + buf[1].TheAddress = (Address)in; + buf[1].Length = blocks * AES_BLOCK_SIZE; + + buf[2].BufferType = DataBuffer | LastBuffer; + buf[2].TheAddress = (Address)out; + buf[2].Length = blocks * AES_BLOCK_SIZE; + + arg[0] = CAAM_ENC; + arg[1] = keySz; + arg[2] = blocks * AES_BLOCK_SIZE; + + if ((ret = wc_caamAddAndWait(buf, arg, CAAM_AESECB)) != 0) { + WOLFSSL_MSG("Error with CAAM AES ECB encrypt"); + return ret; + } + + return 0; +} + + +int wc_AesEcbDecrypt(Aes* aes, byte* out, + const byte* in, word32 sz) +{ + word32 blocks; + Buffer buf[3]; + word32 arg[4]; + word32 keySz; + int ret; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + blocks = sz / AES_BLOCK_SIZE; + + if (wc_AesGetKeySize(aes, &keySz) != 0) { + return BAD_FUNC_ARG; + } + + /* Set buffers for key, cipher text, and plain text */ + buf[0].BufferType = DataBuffer; + buf[0].TheAddress = (Address)aes->key; + buf[0].Length = keySz; + + buf[1].BufferType = DataBuffer; + buf[1].TheAddress = (Address)in; + buf[1].Length = blocks * AES_BLOCK_SIZE; + + buf[2].BufferType = DataBuffer | LastBuffer; + buf[2].TheAddress = (Address)out; + buf[2].Length = blocks * AES_BLOCK_SIZE; + + arg[0] = CAAM_DEC; + arg[1] = keySz; + arg[2] = blocks * AES_BLOCK_SIZE; + + if ((ret = wc_caamAddAndWait(buf, arg, CAAM_AESECB)) != 0) { + WOLFSSL_MSG("Error with CAAM AES ECB decrypt"); + return ret; + } + + return 0; +} +#endif + +/* AES-CTR */ +#ifdef WOLFSSL_AES_COUNTER +/* Increment AES counter (from wolfcrypt/src/aes.c) */ +static WC_INLINE void IncrementAesCounter(byte* inOutCtr) +{ + /* in network byte order so start at end and work back */ + int i; + for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) { + if (++inOutCtr[i]) /* we're done unless we overflow */ + return; + } +} + + +int wc_AesCtrEncrypt(Aes* aes, byte* out, + const byte* in, word32 sz) +{ + byte* tmp; + Buffer buf[4]; + word32 arg[4]; + word32 keySz; + int ret, blocks; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + if (wc_AesGetKeySize(aes, &keySz) != 0) { + return BAD_FUNC_ARG; + } + + /* consume any unused bytes left in aes->tmp */ + tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left; + while (aes->left && sz) { + *(out++) = *(in++) ^ *(tmp++); + aes->left--; + sz--; + } + + /* do full blocks to then get potential left over amount */ + blocks = sz / AES_BLOCK_SIZE; + if (blocks > 0) { + /* Set buffers for key, cipher text, and plain text */ + buf[0].BufferType = DataBuffer; + buf[0].TheAddress = (Address)aes->key; + buf[0].Length = keySz; + + buf[1].BufferType = DataBuffer; + buf[1].TheAddress = (Address)aes->reg; + buf[1].Length = AES_BLOCK_SIZE; + + buf[2].BufferType = DataBuffer; + buf[2].TheAddress = (Address)in; + buf[2].Length = blocks * AES_BLOCK_SIZE; + + buf[3].BufferType = DataBuffer | LastBuffer; + buf[3].TheAddress = (Address)out; + buf[3].Length = blocks * AES_BLOCK_SIZE; + + arg[0] = CAAM_ENC; + arg[1] = keySz; + arg[2] = blocks * AES_BLOCK_SIZE; + + if ((ret = wc_caamAddAndWait(buf, arg, CAAM_AESCTR)) != 0) { + WOLFSSL_MSG("Error with CAAM AES CTR encrypt"); + return ret; + } + + out += blocks * AES_BLOCK_SIZE; + in += blocks * AES_BLOCK_SIZE; + sz -= blocks * AES_BLOCK_SIZE; + } + + if (sz) { + wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg); + IncrementAesCounter((byte*)aes->reg); + + aes->left = AES_BLOCK_SIZE; + tmp = (byte*)aes->tmp; + + while (sz--) { + *(out++) = *(in++) ^ *(tmp++); + aes->left--; + } + } + + return 0; +} +#endif + + +/* AES-DIRECT */ +#if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) +void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in) +{ + Buffer buf[3]; + word32 arg[4]; + word32 keySz; + + if (aes == NULL || out == NULL || in == NULL) { + /* return BAD_FUNC_ARG; */ + return; + } + + if (wc_AesGetKeySize(aes, &keySz) != 0) { + /* return BAD_FUNC_ARG; */ + return; + } + + /* Set buffers for key, cipher text, and plain text */ + buf[0].BufferType = DataBuffer; + buf[0].TheAddress = (Address)aes->key; + buf[0].Length = keySz; + + buf[1].BufferType = DataBuffer; + buf[1].TheAddress = (Address)in; + buf[1].Length = AES_BLOCK_SIZE; + + buf[2].BufferType = DataBuffer | LastBuffer; + buf[2].TheAddress = (Address)out; + buf[2].Length = AES_BLOCK_SIZE; + + arg[0] = CAAM_ENC; + arg[1] = keySz; + arg[2] = AES_BLOCK_SIZE; + + if (wc_caamAddAndWait(buf, arg, CAAM_AESECB) != 0) { + WOLFSSL_MSG("Error with CAAM AES direct encrypt"); + } +} + + +void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in) +{ + Buffer buf[3]; + word32 arg[4]; + word32 keySz; + + if (aes == NULL || out == NULL || in == NULL) { + /* return BAD_FUNC_ARG; */ + return; + } + + if (wc_AesGetKeySize(aes, &keySz) != 0) { + /* return BAD_FUNC_ARG; */ + return; + } + + /* Set buffers for key, cipher text, and plain text */ + buf[0].BufferType = DataBuffer; + buf[0].TheAddress = (Address)aes->key; + buf[0].Length = keySz; + + buf[1].BufferType = DataBuffer; + buf[1].TheAddress = (Address)in; + buf[1].Length = AES_BLOCK_SIZE; + + buf[2].BufferType = DataBuffer | LastBuffer; + buf[2].TheAddress = (Address)out; + buf[2].Length = AES_BLOCK_SIZE; + + arg[0] = CAAM_DEC; + arg[1] = keySz; + arg[2] = AES_BLOCK_SIZE; + + if (wc_caamAddAndWait(buf, arg, CAAM_AESECB) != 0) { + WOLFSSL_MSG("Error with CAAM AES direct decrypt"); + } +} + + +int wc_AesSetKeyDirect(Aes* aes, const byte* key, word32 len, + const byte* iv, int dir) +{ + return wc_AesSetKey(aes, key, len, iv, dir); +} +#endif + +#ifdef HAVE_AESCCM +int wc_AesCcmEncrypt(Aes* aes, byte* out, + const byte* in, word32 inSz, + const byte* nonce, word32 nonceSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + Buffer buf[5]; + word32 arg[4]; + word32 keySz; + word32 i; + byte B0Ctr0[AES_BLOCK_SIZE + AES_BLOCK_SIZE]; + int lenSz; + byte mask = 0xFF; + const word32 wordSz = (word32)sizeof(word32); + int ret; + + /* sanity check on arguments */ + if (aes == NULL || out == NULL || in == NULL || nonce == NULL + || authTag == NULL || nonceSz < 7 || nonceSz > 13 || + authTagSz > AES_BLOCK_SIZE) + return BAD_FUNC_ARG; + + if (wc_AesGetKeySize(aes, &keySz) != 0) { + return BAD_FUNC_ARG; + } + + /* set up B0 and CTR0 similar to how wolfcrypt/src/aes.c does */ + XMEMCPY(B0Ctr0+1, nonce, nonceSz); + XMEMCPY(B0Ctr0+AES_BLOCK_SIZE+1, nonce, nonceSz); + lenSz = AES_BLOCK_SIZE - 1 - (byte)nonceSz; + B0Ctr0[0] = (authInSz > 0 ? 64 : 0) + + (8 * (((byte)authTagSz - 2) / 2)) + + (lenSz - 1); + for (i = 0; i < lenSz; i++) { + if (mask && i >= wordSz) + mask = 0x00; + B0Ctr0[AES_BLOCK_SIZE - 1 - i] = (inSz >> ((8 * i) & mask)) & mask; + B0Ctr0[AES_BLOCK_SIZE + AES_BLOCK_SIZE - 1 - i] = 0; + } + B0Ctr0[AES_BLOCK_SIZE] = lenSz - 1; + + /* Set buffers for key, cipher text, and plain text */ + buf[0].BufferType = DataBuffer; + buf[0].TheAddress = (Address)aes->key; + buf[0].Length = keySz; + + buf[1].BufferType = DataBuffer; + buf[1].TheAddress = (Address)B0Ctr0; + buf[1].Length = AES_BLOCK_SIZE + AES_BLOCK_SIZE; + + buf[2].BufferType = DataBuffer; + buf[2].TheAddress = (Address)authIn; + buf[2].Length = authInSz; + + buf[3].BufferType = DataBuffer; + buf[3].TheAddress = (Address)in; + buf[3].Length = inSz; + + buf[4].BufferType = DataBuffer | LastBuffer; + buf[4].TheAddress = (Address)out; + buf[4].Length = inSz; + + arg[0] = CAAM_ENC; + arg[1] = keySz; + arg[2] = inSz; + arg[3] = authInSz; + + if ((ret = wc_caamAddAndWait(buf, arg, CAAM_AESCCM)) != 0) { + WOLFSSL_MSG("Error with CAAM AES-CCM encrypt"); + return ret; + } + + XMEMCPY(authTag, B0Ctr0, authTagSz); + return 0; +} + + +#ifdef HAVE_AES_DECRYPT +int wc_AesCcmDecrypt(Aes* aes, byte* out, + const byte* in, word32 inSz, + const byte* nonce, word32 nonceSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + Buffer buf[5]; + word32 arg[4]; + word32 keySz; + word32 i; + byte B0Ctr0[AES_BLOCK_SIZE + AES_BLOCK_SIZE]; + byte tag[AES_BLOCK_SIZE]; + int lenSz; + byte mask = 0xFF; + const word32 wordSz = (word32)sizeof(word32); + int ret; + + /* sanity check on arguments */ + if (aes == NULL || out == NULL || in == NULL || nonce == NULL + || authTag == NULL || nonceSz < 7 || nonceSz > 13 || + authTagSz > AES_BLOCK_SIZE) + return BAD_FUNC_ARG; + + if (wc_AesGetKeySize(aes, &keySz) != 0) { + return BAD_FUNC_ARG; + } + + /* set up B0 and CTR0 similar to how wolfcrypt/src/aes.c does */ + XMEMCPY(B0Ctr0+1, nonce, nonceSz); + XMEMCPY(B0Ctr0+AES_BLOCK_SIZE+1, nonce, nonceSz); + lenSz = AES_BLOCK_SIZE - 1 - (byte)nonceSz; + B0Ctr0[0] = (authInSz > 0 ? 64 : 0) + + (8 * (((byte)authTagSz - 2) / 2)) + + (lenSz - 1); + for (i = 0; i < lenSz; i++) { + if (mask && i >= wordSz) + mask = 0x00; + B0Ctr0[AES_BLOCK_SIZE - 1 - i] = (inSz >> ((8 * i) & mask)) & mask; + B0Ctr0[AES_BLOCK_SIZE + AES_BLOCK_SIZE - 1 - i] = 0; + } + B0Ctr0[AES_BLOCK_SIZE] = lenSz - 1; + wc_AesEncryptDirect(aes, tag, B0Ctr0 + AES_BLOCK_SIZE); + + /* Set buffers for key, cipher text, and plain text */ + buf[0].BufferType = DataBuffer; + buf[0].TheAddress = (Address)aes->key; + buf[0].Length = keySz; + + buf[1].BufferType = DataBuffer; + buf[1].TheAddress = (Address)B0Ctr0; + buf[1].Length = AES_BLOCK_SIZE + AES_BLOCK_SIZE; + + buf[2].BufferType = DataBuffer; + buf[2].TheAddress = (Address)authIn; + buf[2].Length = authInSz; + + buf[3].BufferType = DataBuffer; + buf[3].TheAddress = (Address)in; + buf[3].Length = inSz; + + buf[4].BufferType = DataBuffer | LastBuffer; + buf[4].TheAddress = (Address)out; + buf[4].Length = inSz; + + arg[0] = CAAM_DEC; + arg[1] = keySz; + arg[2] = inSz; + arg[3] = authInSz; + + if ((ret = wc_caamAddAndWait(buf, arg, CAAM_AESCCM)) != 0) { + WOLFSSL_MSG("Error with CAAM AES-CCM derypt"); + return ret; + } + + xorbuf(tag, B0Ctr0, authTagSz); + if (ConstantCompare(tag, authTag, authTagSz) != 0) { + /* If the authTag check fails, don't keep the decrypted data. + * Unfortunately, you need the decrypted data to calculate the + * check value. */ + XMEMSET(out, 0, inSz); + ret = AES_CCM_AUTH_E; + } + + ForceZero(tag, AES_BLOCK_SIZE); + ForceZero(B0Ctr0, AES_BLOCK_SIZE * 2); + + return ret; + +} +#endif /* HAVE_AES_DECRYPT */ +#endif /* HAVE_AESCCM */ + +#endif /* WOLFSSL_IMX6_CAAM && !NO_AES */ + diff --git a/client/wolfssl/wolfcrypt/src/port/caam/caam_doc.pdf b/client/wolfssl/wolfcrypt/src/port/caam/caam_doc.pdf new file mode 100644 index 0000000..8213634 Binary files /dev/null and b/client/wolfssl/wolfcrypt/src/port/caam/caam_doc.pdf differ diff --git a/client/wolfssl/wolfcrypt/src/port/caam/caam_driver.c b/client/wolfssl/wolfcrypt/src/port/caam/caam_driver.c new file mode 100644 index 0000000..5d44f2d --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/caam/caam_driver.c @@ -0,0 +1,1713 @@ +/* caam_driver.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#if defined(__INTEGRITY) || defined(INTEGRITY) + +/* build into Integrity kernel */ +#include +#include "wolfssl/wolfcrypt/port/caam/caam_driver.h" + +#define CAAM_READ(reg) *(volatile unsigned int*)(reg) +#define CAAM_WRITE(reg, in) *(volatile unsigned int*)(reg) = (in); + +#define DESC_COUNT 1 +#define MAX_BUF 20 +#define BUFFER_COUNT (MAX_BUF * DESC_COUNT) + +/* CAAM descriptors can only be 64 unsigned ints */ +#define MAX_DESC_SZ 64 + +/* 64 byte buffer for when data crosses a page boundary */ +#define ALIGN_BUF 16 + +/* MAX_CTX is 64 bytes (sha512 digest) + 8 bytes (CAAM length value) */ +#define MAX_CTX 18 + +#define MIN_READ_REG 0xF2100000 +#define MAX_READ_REG 0XF2110000 + +struct JobRing { + Address JobIn; + Address JobOut; + Address Desc; + Value page; /* page allocation for descriptor to use */ +}; + +struct buffer { + Address data; + Address dataSz; +}; + +/* CAAM descriptor */ +struct DescStruct { + struct IORequestStruct TheIORequest; + struct CAAM_DEVICE* caam; + struct buffer buf[MAX_BUF]; /* buffers holding data input address */ + UINT4 desc[MAX_DESC_SZ]; /* max size of 64 word32 */ + UINT4 aadSzBuf[4]; /* Formatted AAD size for CCM */ + UINT4 alignBuf[ALIGN_BUF]; /* 64 byte buffer for non page + align */ + UINT4 iv[MAX_CTX]; /* AES IV and also hash state */ + UINT4 ctxBuf[MAX_CTX]; /* key */ + Address output; /* address to output buffer */ + Address ctxOut; /* address to update buffer holding state */ + Value alignIdx;/* index for align buffer */ + Value idx; /* index for descriptor buffer */ + Value headIdx; /* for first portion of descriptor buffer */ + Value lastIdx; /* for last portion of descriptor buffer */ + Value outputIdx; /* idx to output buffer in "buf" */ + Value inputSz; /* size of input buffer */ + Value ctxSz; /* size of CTX/Key buffer */ + Value aadSz; /* AAD size for CCM */ + Value lastFifo; + Value type; + Value state; + Value DescriptorCount; + Boolean running; /* True if building/running descriptor is + in process */ +}; + +struct CAAM_DEVICE { + struct IODeviceVectorStruct caamVector; + struct IODescriptorStruct IODescriptorArray[BUFFER_COUNT]; + struct DescStruct DescArray[DESC_COUNT]; + volatile Value InterruptStatus; + CALL HandleInterruptCall; + struct JobRing ring; +}; + +#define DRIVER_NAME "wolfSSL_CAAM_Driver" + +static struct CAAM_DEVICE caam; + +/****************************************************************************** + Internal CAAM Job Ring and partition functions + ****************************************************************************/ + +/* flush job ring and reset */ +static Error caamReset(void) +{ + int t = 100000; /* time out counter for flushing job ring */ + + /* make sure interrupts are masked in JRCFGR0_LS register */ + CAAM_WRITE(CAAM_BASE | 0x1054, CAAM_READ(CAAM_BASE | 0x1054) | 1); + + /* flush and reset job rings using JRCR0 register */ + CAAM_WRITE(CAAM_BASE | 0x106C, 1); + + /* check register JRINTR for if halt is in progress */ + while (t > 0 && ((CAAM_READ(CAAM_BASE | 0x104C) & 0x4) == 0x4)) t--; + if (t == 0) { + /*unrecoverable failure, the job ring is locked, up hard reset needed*/ + return NotRestartable; + } + + /* now that flush has been done restart the job ring */ + t = 100000; + CAAM_WRITE(CAAM_BASE | 0x106C, 1); + while (t > 0 && ((CAAM_READ(CAAM_BASE | 0x106C) & 1) == 1)) t--; + if (t == 0) { + /*unrecoverable failure, reset bit did not return to 0 */ + return NotRestartable; + } + + /* reset most registers and state machines in CAAM using MCFGR register + also reset DMA */ + CAAM_WRITE(CAAM_BASE | 0x0004, 0x90000000); + + return Success; +} + +/* returns MemoryMapMayNotBeEmpty if page/par is already owned + * returns Success on success + * all other returns is an error state + */ +static Error caamCreatePartition(unsigned char page, unsigned char par) +{ + /* check ownership of partition */ + if ((CAAM_READ(CAAM_BASE | 0x1FBC) & (0x3 << (par * 2))) > 0) { + return MemoryMapMayNotBeEmpty; + } + + /* set generic all access permissions, gets reset later */ + CAAM_WRITE(CAAM_BASE | (0x1108 + (par * 16)), 0xF); + CAAM_WRITE(CAAM_BASE | (0x110C + (par * 16)), 0xF); + CAAM_WRITE(CAAM_BASE | (0x1104 + (par * 16)), 0xFF); + + /* check ownership of page */ + CAAM_WRITE(CAAM_BASE | 0x10F4, (page << 16) | 0x5); + /* wait for inquiry cmd to complete */ + while ((CAAM_READ(CAAM_BASE | 0x10FC) & 0x0000C000) > 0 && + (CAAM_READ(CAAM_BASE | 0x10FC) & 0x00003000) == 0) { + } + if ((CAAM_READ(CAAM_BASE | 0x10FC) & 0x000000C0) == 0xC0) { + /* owns the page can dealloc it */ + CAAM_WRITE(CAAM_BASE | 0x10F4, (page << 16) | 0x2); + while ((CAAM_READ(CAAM_BASE | 0x10FC) & 0x0000C000) > 0 && + (CAAM_READ(CAAM_BASE | 0x10FC) & 0x00003000) == 0) {} + if ((CAAM_READ(CAAM_BASE | 0x10FC) & 0x00003000) > 0) { + /* error while deallocating page */ + return MemoryMapMayNotBeEmpty; /* PSP set on page or is unavailable */ + } + } + else { + /* check if owned by someone else */ + if ((CAAM_READ(CAAM_BASE | 0x10FC) & 0x000000C0) != 0) { + return MemoryMapMayNotBeEmpty; + } + } + + /* allocate page to partition */ + CAAM_WRITE(CAAM_BASE | 0x10F4, (page << 16) | (par << 8) | 0x1); + /* wait for alloc cmd to complete */ + while ((CAAM_READ(CAAM_BASE | 0x10FC) & 0x0000C000) > 0 && + (CAAM_READ(CAAM_BASE | 0x10FC) & 0x00003000) == 0) { + } + + if ((CAAM_READ(CAAM_BASE | 0x10FC) & 0x00003000) > 0) { + return MemoryOperationNotPerformed; + } + + /* double check ownership now of page */ + CAAM_WRITE(CAAM_BASE | 0x10F4, (page << 16) | 0x5); + /* wait for inquiry cmd to complete */ + while ((CAAM_READ(CAAM_BASE | 0x10FC) & 0x0000C000) > 0 && + (CAAM_READ(CAAM_BASE | 0x10FC) & 0x00003000) == 0) { + } + if ((CAAM_READ(CAAM_BASE | 0x10FC) & 0x0000000F) == 0 || + (CAAM_READ(CAAM_BASE | 0x10FC) & 0x00003000) > 0) { + /* page not owned */ + return MemoryOperationNotPerformed; + } + + return Success; +} + + +/* Gets the status of a job. Returns Waiting if no output jobs ready to be + * read. + * If no jobs are done then return Waiting + * If jobs are done but does not match desc then return NoActivityReady + * Status holds the error values if any */ +static Error caamGetJob(struct CAAM_DEVICE* dev, UINT4* status) +{ + UINT4 reg = CAAM_READ(CAAM_BASE | 0x1044); /* JRSTAR0 status */ + if (status) { + *status = 0; + } + + /* check for DECO, CCB, and Job Ring error state JRSTAR0 register */ + if (((reg & 0xF0000000) == 0x20000000) || /* CCB error */ + ((reg & 0xF0000000) == 0x40000000)|| /* DECO error */ + ((reg & 0xF0000000) == 0x60000000)) { /* Job Ring error */ + + if ((reg & 0x0000000F) > 0) { + *status = reg; + return Failure; + } + } + + /* Check number of done jobs in output list */ + reg = CAAM_READ(CAAM_BASE | 0x103C); + if ((reg & 0x000003FF) > 0) { + UINT4* out = (UINT4*)(dev->ring.JobOut); + if (status) { + *status = out[1]; + } + + if ((dev->ring.Desc ^ 0xF0000000) != out[0]) { + db_printf("CAAM job completed vs expected mismatch"); + return NoActivityReady; + } + + if (out[1] > 0) { + return Failure; + } + + /* increment jobs removed */ + CAAM_WRITE(CAAM_BASE | 0x1034, 1); + } + else { + /* check if the CAAM is idle and not processing any descriptors */ + if ((CAAM_READ(CAAM_BASE | 0x0FD4) & 0x00000002) == 2 /* idle */ + && (CAAM_READ(CAAM_BASE | 0x0FD4) & 0x00000001) == 0) { + return NoActivityReady; + } + + return Waiting; + } + + return Success; +} + + +/* Initialize CAAM RNG + * returns 0 on success */ +static int caamInitRng(struct CAAM_DEVICE* dev) +{ + UINT4 reg, status; + int ret = 0; + + /* Set up use of the TRNG for seeding wolfSSL HASH-DRBG */ + CAAM_WRITE(CAAM_RTMCTL, CAAM_PRGM); + CAAM_WRITE(CAAM_RTMCTL, CAAM_READ(CAAM_RTMCTL) | 0x40); /* reset */ + + /* Set up reading from TRNG */ + CAAM_WRITE(CAAM_RTMCTL, CAAM_READ(CAAM_RTMCTL) | CAAM_TRNG); + + /* Set up delay for TRNG @TODO Optimizations? + * Shift left with RTSDCTL because 0-15 is for sample number + * Also setting the max and min frequencies */ + CAAM_WRITE(CAAM_RTSDCTL, (CAAM_ENT_DLY << 16) | 0x09C4); + CAAM_WRITE(CAAM_RTFRQMIN, CAAM_ENT_DLY >> 1); /* 1/2 */ + CAAM_WRITE(CAAM_RTFRQMAX, CAAM_ENT_DLY << 3); /* up to 8x */ + + /* Set back to run mode and clear RTMCL error bit */ + reg = CAAM_READ(CAAM_RTMCTL) ^ CAAM_PRGM; + + CAAM_WRITE(CAAM_RTMCTL, reg); + reg = CAAM_READ(CAAM_RTMCTL); + reg |= CAAM_CTLERR; + CAAM_WRITE(CAAM_RTMCTL, reg); + + /* check input slot is available and then add */ + if (CAAM_READ(CAAM_BASE | 0x1014) > 0) { + UINT4* in = (UINT4*)dev->ring.JobIn; + + memcpy((unsigned char*)dev->ring.Desc, (unsigned char*)wc_rng_start, + sizeof(wc_rng_start)); + + in[0] = dev->ring.Desc ^ 0xF0000000; /* physical address */ + CAAM_WRITE(CAAM_IRJAR0, 0x00000001); + } + else { + return Waiting; + } + + do { + ret = caamGetJob(dev, &status); + /* @TODO use a better way to chill out CPU. */ + } while (ret == Waiting); + + return ret; +} + + +static Error caamDoJob(struct DescStruct* desc) +{ + Error ret; + UINT4 status; + + /* clear and set desc size */ + desc->desc[0] &= 0xFFFFFF80; + desc->desc[0] += desc->idx; + + /* check input slot is available and then add */ + if (CAAM_READ(CAAM_BASE | 0x1014) > 0) { + UINT4* in = (UINT4*)desc->caam->ring.JobIn; + + memcpy((unsigned char*)desc->caam->ring.Desc, (unsigned char*)desc->desc, + (desc->idx + 1) * sizeof(UINT4)); + + in[0] = desc->caam->ring.Desc ^ 0xF0000000; /* physical address */ + CAAM_WRITE(CAAM_IRJAR0, 0x00000001); + } + else { + return Waiting; + } + + do { + ret = caamGetJob(desc->caam, &status); + /* @TODO use a better way to chill out CPU. */ + } while (ret == Waiting); + + if (status != 0 || ret != Success) { + #if 0 + /* Used during testing to print out descriptor */ + { + char msg[2048]; + char* pt = msg; + int z; + + memset(msg, 0, sizeof(msg)); + for (z = 0; z < desc->idx; z++) { + snprintf(pt, sizeof(msg) - (z * 21), "desc[%d] = 0x%8.8x, ", + z, desc->desc[z]); + pt += 21; + } + snprintf(pt, sizeof(msg) - (z * 21), "status = 0x%8.8x\n", status); + if (desc->buf[0].data != 0) { /* for testing */ + memcpy((char*)desc->buf[0].data, msg, sizeof(msg)); + } + } + #endif + + + /* try to reset after error */ + caamReset(); + return ret; + } + + return Success; +} + + +/* handle input or output buffers + * NOTES: if sz == 0 then read all the rest of the buffers available + * when align == 1 then there is no alignment constraints + * + * returns the data size in bytes on success. With failure a negative value is + * returned. + */ +static int caamAddIO(struct DescStruct* desc, UINT4 options, UINT4 sz, + UINT4 align, UINT4* idx) +{ + int i, outSz = 0; + + if (align == 0) { + return -1; /* programming error */ + } + + for (i = *idx; i < desc->DescriptorCount; i++) { + /* input must be a multiple of "align" bytes */ + struct buffer* buf = &desc->buf[i]; + int blocks = buf->dataSz / align; + Address data = buf->data; + Address dataSz = buf->dataSz; + + if (outSz >= sz && sz != 0) { + break; + } + + if (dataSz % align > 0) { + /* store potential overlap */ + int tmpSz = dataSz % align; + int add = (tmpSz < (align - desc->alignIdx)) ? tmpSz : + align - desc->alignIdx; + unsigned char* local = (unsigned char*)desc->alignBuf; + + /* if already something in the buffer then add from front */ + if (desc->alignIdx > 0) { + memcpy((unsigned char*)&local[desc->alignIdx], + (unsigned char*)data, add); + data += add; + } + else { + memcpy((unsigned char*)&local[desc->alignIdx], + (unsigned char*)data + (blocks * align), add); + } + dataSz -= add; + desc->alignIdx += add; + } + + if (desc->alignIdx == align) { + desc->lastFifo = desc->idx; + if (desc->idx + 2 > MAX_DESC_SZ) { + return -1; + } + desc->desc[desc->idx++] = options + desc->alignIdx; + desc->desc[desc->idx++] = BSP_VirtualToPhysical(desc->alignBuf); + ASP_FlushCaches((Address)desc->alignBuf, desc->alignIdx); + outSz += desc->alignIdx; + } + + if (blocks > 0) { + desc->lastFifo = desc->idx; + if (desc->idx + 2 > MAX_DESC_SZ) { + return -1; + } + desc->desc[desc->idx++] = options + (blocks * align); + desc->desc[desc->idx++] = BSP_VirtualToPhysical(data); + outSz += (blocks * align); + + /* only one buffer available for align cases so exit here and make + a new descriptor after running current one */ + if (desc->alignIdx == align) { + desc->alignIdx = 0; + i++; /* start at next buffer */ + break; + } + } + } + + *idx = i; + return outSz; +} + + +/****************************************************************************** + IODevice Register Read and Write + ****************************************************************************/ + +static Error caamReadRegister(IODeviceVector ioCaam, Value reg, Value *out) +{ + if (reg < MIN_READ_REG || reg > MAX_READ_REG) { + return IllegalRegisterNumber; + } + + switch (reg) { + case CAAM_STATUS: + case CAAM_VERSION_MS: + case CAAM_VERSION_LS: + case CAMM_SUPPORT_MS: + case CAMM_SUPPORT_LS: + case CAAM_RTMCTL: + *out = CAAM_READ(reg); + break; + + default: + return IllegalRegisterNumber; + } + + (void)ioCaam; + return Success; +} + + +static Error caamWriteRegister(IODeviceVector ioCaam, Value reg, Value in) +{ + /* Should be no need for writes */ + return OperationNotAllowedOnTheUniversalIODevice; +} + + +/****************************************************************************** + CAAM Blob Operations + ****************************************************************************/ + +/* limit on size due to size of job ring being 64 word32's */ +static Error caamBlob(struct DescStruct* desc) +{ + Error err; + UINT4 keyType = 0x00000C08; /* default red */ + UINT4 i = 0; + int sz = 0, ret; + + if (desc->idx + 3 > MAX_DESC_SZ) { + return Failure; + } + + /*default to Red Key type, with offset of 12 and 8 byte load to context 2*/ + desc->desc[desc->idx++] = (CAAM_LOAD_CTX | CAAM_CLASS2 | CAAM_IMM | keyType); + + /* add key modifier */ + if (i < desc->DescriptorCount) { + UINT4* pt; + Address data = desc->buf[i].data; + Address dataSz = desc->buf[i].dataSz; + + pt = (UINT4*)data; + if (dataSz < 8) { /* expecting 8 bytes for key modifier*/ + return TooManyBuffers; + } + desc->desc[desc->idx++] = pt[0]; + desc->desc[desc->idx++] = pt[1]; + } + + /* add input */ + while (sz < desc->inputSz && i < desc->DescriptorCount) { + ret = caamAddIO(desc, CAAM_SEQI, desc->inputSz - sz, 1, &i); + if (ret < 0) { /* handle error case */ + return TooManyBuffers; + } + sz += ret; + } + desc->outputIdx = i; + + /* add output */ + if (caamAddIO(desc, CAAM_SEQO, 0, 1, &i) < 0) { + return TooManyBuffers; + } + + if (desc->idx + 1 > MAX_DESC_SZ) { + return Failure; + } + desc->desc[desc->idx++] = CAAM_OP | CAAM_OPID_BLOB | desc->type; + + if ((err = caamDoJob(desc)) != Success) { + return err; + } + + /* flush output buffers */ + for (i = desc->outputIdx; i < desc->DescriptorCount; i++) { + ASP_FlushCaches(desc->buf[i].data, desc->buf[i].dataSz); + } + + return Success; +} + + +/****************************************************************************** + CAAM AES Operations + ****************************************************************************/ + +/* returns amount written on success and negative value in error case. + * Is different from caamAddIO in that it only adds a single input buffer + * rather than multiple ones. + */ +static int caamAesInput(struct DescStruct* desc, UINT4* idx, int align, + UINT4 totalSz) +{ + int sz; + UINT4 i = *idx; + + /* handle alignment constraints on input */ + if (desc->alignIdx > 0) { + sz = desc->alignIdx; + + /* if there is more input buffers then add part of it */ + if (i < desc->outputIdx && i < desc->DescriptorCount) { + sz = align - desc->alignIdx; + sz = (sz <= desc->buf[i].dataSz) ? sz : desc->buf[i].dataSz; + memcpy((unsigned char*)(desc->alignBuf) + desc->alignIdx, + (unsigned char*)(desc->buf[i].data), sz); + + desc->buf[i].dataSz -= sz; + desc->buf[i].data += sz; + sz += desc->alignIdx; + } + + if (desc->idx + 2 > MAX_DESC_SZ) { + return -1; + } + ASP_FlushCaches((Address)desc->alignBuf, sz); + desc->desc[desc->idx++] = (CAAM_FIFO_L | FIFOL_TYPE_LC1 | + CAAM_CLASS1 | FIFOL_TYPE_MSG) + sz; + desc->desc[desc->idx++] = BSP_VirtualToPhysical(desc->alignBuf); + desc->alignIdx = 0; + } + else { + sz = desc->buf[i].dataSz; + if ((totalSz + sz) == desc->inputSz) { /* not an issue on final */ + align = 1; + } + + desc->alignIdx = sz % align; + if (desc->alignIdx != 0) { + sz -= desc->alignIdx; + memcpy((unsigned char*)desc->alignBuf, + (unsigned char*)(desc->buf[i].data) + sz, + desc->alignIdx); + } + + if (desc->idx + 2 > MAX_DESC_SZ) { + return -1; + } + desc->desc[desc->idx++] = (CAAM_FIFO_L | FIFOL_TYPE_LC1 | + CAAM_CLASS1 | FIFOL_TYPE_MSG) + sz; + desc->desc[desc->idx++] = BSP_VirtualToPhysical(desc->buf[i].data); + i++; + } + + *idx = i; + return sz; +} + + +/* returns enum Success on success, all other return values should be + * considered an error. + * + * ofst is the amount of leftover buffer from previous calls + * inputSz is the amount of input in bytes that is being matched to output + */ +static Error caamAesOutput(struct DescStruct* desc, int* ofst, UINT4 inputSz) +{ + int offset = *ofst; + + if (desc->output != 0 && offset > 0 && inputSz > 0) { + UINT4 addSz; + + /* handle potential leftovers */ + addSz = (inputSz >= offset) ? offset : inputSz; + + inputSz -= addSz; + desc->desc[desc->idx++] = CAAM_FIFO_S | FIFOS_TYPE_MSG + addSz; + if (inputSz > 0) { /* check if expecting more output */ + desc->desc[desc->idx - 1] |= CAAM_FIFOS_CONT; + } + desc->desc[desc->idx++] = BSP_VirtualToPhysical(desc->output); + + if (addSz == offset) { + /* reset */ + desc->output = 0; + offset = 0; + } + else { + offset -= addSz; + desc->output += addSz; + + if (offset < 0) { + return TransferFailed; + } + } + } + + for (; desc->lastIdx < desc->DescriptorCount; desc->lastIdx++) { + struct buffer* buf = &desc->buf[desc->lastIdx]; + + if (inputSz > 0) { + int tmp; + + if (buf->dataSz <= inputSz) { + tmp = buf->dataSz; + } + else { + offset = buf->dataSz - inputSz; + tmp = inputSz; + desc->output = buf->data + tmp; + } + inputSz -= tmp; + if (desc->idx + 2 > MAX_DESC_SZ) { + return TransferFailed; + } + desc->desc[desc->idx++] = CAAM_FIFO_S | FIFOS_TYPE_MSG + tmp; + if (inputSz > 0) { /* check if expecting more output */ + desc->desc[desc->idx - 1] |= CAAM_FIFOS_CONT; + } + desc->desc[desc->idx++] = BSP_VirtualToPhysical(buf->data); + } + else { + break; + } + } + + *ofst = offset; + return Success; +} + + +/* check size of output and get starting buffer for it */ +static Error caamAesOutSz(struct DescStruct* desc, UINT4 i) +{ + int sz = 0; + + for (desc->outputIdx = i; desc->outputIdx < desc->DescriptorCount && + sz < desc->inputSz; desc->outputIdx++) { + sz += desc->buf[desc->outputIdx].dataSz; + } + desc->lastIdx = desc->outputIdx; + + /* make certain that output size is same as input */ + sz = 0; + for (; desc->lastIdx < desc->DescriptorCount; desc->lastIdx++) { + sz += desc->buf[desc->lastIdx].dataSz; + } + if (sz != desc->inputSz) { + return SizeIsTooLarge; + } + desc->lastIdx = desc->outputIdx; + + return Success; +} + + +/* AES operations follow the buffer sequence of KEY -> (IV) -> Input -> Output + */ +static Error caamAes(struct DescStruct* desc) +{ + struct buffer* ctx[3]; + struct buffer* iv[3]; + Value ofst = 0; + Error err; + UINT4 i, totalSz = 0; + int ctxIdx = 0; + int ivIdx = 0; + int offset = 0; + int align = 1; + int sz = 0; + + int ctxSz = desc->ctxSz; + + if (desc->state != CAAM_ENC && desc->state != CAAM_DEC) { + return IllegalStatusNumber; + } + + if (ctxSz != 16 && ctxSz != 24 && ctxSz != 32) { + return ArgumentError; + } + + /* get key */ + for (i = 0; i < desc->DescriptorCount; i++) { + struct buffer* buf = &desc->buf[i]; + unsigned char* local = (unsigned char*)desc->ctxBuf; + + if (sz < ctxSz && sz < (MAX_CTX * sizeof(UINT4))) { + ctx[ctxIdx] = buf; + sz += buf->dataSz; + + memcpy((unsigned char*)&local[offset], + (unsigned char*)ctx[ctxIdx]->data, ctx[ctxIdx]->dataSz); + offset += ctx[ctxIdx]->dataSz; + ctxIdx++; + } + else { + break; + } + } + + /* sanity checks on size of key */ + if (sz > ctxSz) { + return SizeIsTooLarge; + } + if (ctxSz > (MAX_CTX * sizeof(UINT4)) - 16) { + return ArgumentError; + } + + /* Flush cache of ctx buffer then : + Add KEY Load command 0x0220000X + Add address to read key from 0xXXXXXXXX */ + ASP_FlushCaches((Address)desc->ctxBuf, ctxSz); + if (desc->idx + 2 > MAX_DESC_SZ) { + return TransferFailed; + } + desc->desc[desc->idx++] = (CAAM_KEY | CAAM_CLASS1 | CAAM_NWB) + ctxSz; + desc->desc[desc->idx++] = BSP_VirtualToPhysical(desc->ctxBuf); + + /* get IV if needed by algorithm */ + switch (desc->type) { + case CAAM_AESECB: + break; + + case CAAM_AESCTR: + ofst = 0x00001000; + /* fall through because states are the same only the offset changes */ + + case CAAM_AESCBC: + { + int maxSz = 16; /* default to CBC/CTR max size */ + + sz = 0; + offset = 0; + for (; i < desc->DescriptorCount; i++) { + struct buffer* buf = &desc->buf[i]; + unsigned char* local = (unsigned char*)desc->iv; + + if (sz < maxSz) { + iv[ivIdx] = buf; + + if (buf->dataSz + sz > maxSz) { + return SizeIsTooLarge; + } + + sz += buf->dataSz; + memcpy((unsigned char*)&local[offset], + (unsigned char*)iv[ivIdx]->data, iv[ivIdx]->dataSz); + offset += iv[ivIdx]->dataSz; + ivIdx++; + } + else { + break; + } + } + + if (sz != maxSz) { + /* invalid IV size */ + return SizeIsTooLarge; + } + + ASP_FlushCaches((Address)desc->iv, maxSz); + if (desc->idx + 2 > MAX_DESC_SZ) { + return TransferFailed; + } + desc->desc[desc->idx++] = (CAAM_LOAD_CTX | CAAM_CLASS1 | ofst) + maxSz; + desc->desc[desc->idx++] = BSP_VirtualToPhysical(desc->iv); + } + break; + + default: + return OperationNotImplemented; + } + + /* write operation */ + if (desc->idx + 1 > MAX_DESC_SZ) { + return TransferFailed; + } + desc->desc[desc->idx++] = CAAM_OP | CAAM_CLASS1 | desc->type | + CAAM_ALG_UPDATE | desc->state; + + /* find output buffers */ + if (caamAesOutSz(desc, i) != Success) { + return SizeIsTooLarge; + } + + /* set alignment constraints */ + if (desc->type == CAAM_AESCBC || desc->type == CAAM_AESECB) { + align = 16; + } + + /* indefinite loop for input/output buffers */ + desc->headIdx = desc->idx; + desc->output = 0; + offset = 0; /* store left over amount for output buffer */ + do { + desc->idx = desc->headIdx; /* reset for each loop */ + + /* add a single input buffer (multiple ones was giving deco watch dog + * time out errors on the FIFO load of 1c. + * @TODO this could be a place for optimization if more data could be + * loaded in at one time */ + if ((sz = caamAesInput(desc, &i, align, totalSz)) < 0) { + return TransferFailed; + } + totalSz += sz; + + if (caamAesOutput(desc, &offset, sz) != Success) { + return TransferFailed; + } + + /* store updated IV */ + if (ivIdx > 0) { + if (desc->idx + 2 > MAX_DESC_SZ) { + return TransferFailed; + } + desc->desc[desc->idx++] = CAAM_STORE_CTX | CAAM_CLASS1 | ofst | 16; + desc->desc[desc->idx++] = BSP_VirtualToPhysical((Address)desc->iv); + } + + if ((err = caamDoJob(desc)) != Success) { + return err; + } + ASP_FlushCaches((Address)desc->iv, 16); + } while (desc->lastIdx < desc->DescriptorCount || offset > 0); + + /* flush output buffers */ + for (i = desc->outputIdx; i < desc->lastIdx; i++) { + ASP_FlushCaches(desc->buf[i].data, desc->buf[i].dataSz); + } + + /* handle case with IV */ + if (ivIdx > 0) { + unsigned char* pt = (unsigned char*)desc->iv; + ASP_FlushCaches((Address)pt, 16); + for (i = 0; i < ivIdx; i++) { + memcpy((unsigned char*)iv[i]->data, pt, iv[i]->dataSz); + pt += iv[i]->dataSz; + ASP_FlushCaches(iv[i]->data, iv[i]->dataSz); + } + } + + return Success; +} + + +/****************************************************************************** + CAAM AEAD Operations + ****************************************************************************/ + +/* AEAD operations follow the buffer sequence of KEY -> (IV or B0 | CTR0) -> (AD) + * -> Input -> Output + * + */ +static Error caamAead(struct DescStruct* desc) +{ + struct buffer* ctx[3]; + struct buffer* iv[3]; + Value ofst = 0; + UINT4 state = CAAM_ALG_INIT; + UINT4 totalSz = 0; + Error err; + UINT4 i; + int ctxIdx = 0; + int ivIdx = 0; + int offset = 0; + int sz = 0; + int ivSz = 32; /* size of B0 | CTR0 for CCM mode */ + int ctxSz = desc->ctxSz; + int align = 16; /* input should be multiples of 16 bytes unless is final */ + int opIdx; + + if (desc->state != CAAM_ENC && desc->state != CAAM_DEC) { + return IllegalStatusNumber; + } + + /* sanity check is valid AES key size */ + if (ctxSz != 16 && ctxSz != 24 && ctxSz != 32) { + return ArgumentError; + } + + /* get key */ + for (i = 0; i < desc->DescriptorCount; i++) { + struct buffer* buf = &desc->buf[i]; + unsigned char* local = (unsigned char*)desc->ctxBuf; + + if (sz < ctxSz && sz < (MAX_CTX * sizeof(UINT4))) { + ctx[ctxIdx] = buf; + sz += buf->dataSz; + + memcpy((unsigned char*)&local[offset], + (unsigned char*)ctx[ctxIdx]->data, ctx[ctxIdx]->dataSz); + offset += ctx[ctxIdx]->dataSz; + ctxIdx++; + } + else { + break; + } + } + + /* sanity checks on size of key */ + if (sz > ctxSz) { + return SizeIsTooLarge; + } + + /* Flush cache of ctx buffer then : + Add KEY Load command 0x0220000X + Add address to read key from 0xXXXXXXXX */ + ASP_FlushCaches((Address)desc->ctxBuf, ctxSz); + if (desc->idx + 2 > MAX_DESC_SZ) { + return TransferFailed; + } + desc->desc[desc->idx++] = (CAAM_KEY | CAAM_CLASS1 | CAAM_NWB) + ctxSz; + desc->desc[desc->idx++] = BSP_VirtualToPhysical(desc->ctxBuf); + + desc->headIdx = desc->idx; + desc->output = 0; + offset = 0; /* store left over amount for output buffer */ + do { + desc->idx = desc->headIdx; /* reset for each loop */ + + /* write operation */ + if (desc->idx + 1 > MAX_DESC_SZ) { + return TransferFailed; + } + opIdx = desc->idx; + desc->desc[desc->idx++] = CAAM_OP | CAAM_CLASS1 | state | desc->type | + desc->state; + + /* get IV if needed by algorithm */ + switch (desc->type) { + case CAAM_AESCCM: + if ((state & CAAM_ALG_INIT) == CAAM_ALG_INIT) { + sz = 0; + offset = 0; + for (; i < desc->DescriptorCount; i++) { + struct buffer* buf = &desc->buf[i]; + unsigned char* local = (unsigned char*)desc->iv; + + if (sz < ivSz) { + iv[ivIdx] = buf; + + if (buf->dataSz + sz > ivSz) { + return SizeIsTooLarge; + } + + sz += buf->dataSz; + memcpy((unsigned char*)&local[offset], + (unsigned char*)iv[ivIdx]->data, iv[ivIdx]->dataSz); + offset += iv[ivIdx]->dataSz; + ivIdx++; + } + else { + break; + } + } + + if (sz != ivSz) { + /* invalid IV size */ + return SizeIsTooLarge; + } + offset = 0; + } + + ASP_FlushCaches((Address)desc->iv, ivSz); + if (desc->idx + 2 > MAX_DESC_SZ) { + return TransferFailed; + } + desc->desc[desc->idx++] = (CAAM_LOAD_CTX | CAAM_CLASS1 | ofst) + + ivSz; + desc->desc[desc->idx++] = BSP_VirtualToPhysical(desc->iv); + break; + + default: + return OperationNotImplemented; + } + + + /********* handle AAD -- is only done with Init **********************/ + if ((state & CAAM_ALG_INIT) == CAAM_ALG_INIT) { + if ((desc->type == CAAM_AESCCM) && (desc->aadSz > 0)) { + /* set formatted AAD buffer size for CCM */ + ASP_FlushCaches((Address)desc->aadSzBuf, sizeof(desc->aadSzBuf)); + desc->desc[desc->idx++] = CAAM_FIFO_L | CAAM_CLASS1 | + FIFOL_TYPE_AAD + desc->aadSz; + desc->desc[desc->idx++] = BSP_VirtualToPhysical(desc->aadSzBuf); + + /* now set aadSz to unformatted version for getting buffers */ + if (desc->aadSz == 2) { + unsigned char* pt = (unsigned char*)desc->aadSzBuf; + desc->aadSz = (((UINT4)pt[0] & 0xFF) << 8) | + ((UINT4)pt[1] & 0xFF); + } + else { + unsigned char* pt = (unsigned char*)desc->aadSzBuf; + desc->aadSz = (((UINT4)pt[2] & 0xFF) << 24) | + (((UINT4)pt[3] & 0xFF) << 16) | + (((UINT4)pt[4] & 0xFF) << 8) | + ((UINT4)pt[5] & 0xFF); + } + } + + /* get additional data buffers */ + if (desc->aadSz > 0) { + sz = 0; + for (; i < desc->DescriptorCount; i++) { + struct buffer* buf = &desc->buf[i]; + if (sz < desc->aadSz) { + if (desc->idx + 2 > MAX_DESC_SZ) { + return TransferFailed; + } + desc->lastFifo = desc->idx; + desc->desc[desc->idx++] = CAAM_FIFO_L | CAAM_CLASS1 | + FIFOL_TYPE_AAD + buf->dataSz; + desc->desc[desc->idx++] = BSP_VirtualToPhysical(buf->data); + sz += buf->dataSz; + } + else { + break; + } + } + + /* flush AAD from FIFO and pad it to 16 byte block */ + desc->desc[desc->lastFifo] |= FIFOL_TYPE_FC1; + } + + /* find output buffers */ + if (caamAesOutSz(desc, i) != Success) { + return SizeIsTooLarge; + } + } + + /* handle alignment constraints on input */ + if ((sz = caamAesInput(desc, &i, align, totalSz)) < 0) { + return TransferFailed; + } + totalSz += sz; + + /* handle output buffers */ + if (caamAesOutput(desc, &offset, sz) != Success) { + return TransferFailed; + } + + /* store updated IV, if is last then set offset and final for MAC */ + if ((desc->lastIdx == desc->DescriptorCount) && (offset == 0)) { + ivSz = 16; + if (desc->state == CAAM_ENC) { + ofst = 32 << 8; /* offset is in 15-8 bits */ + } + else { + ofst = 0; + } + desc->desc[opIdx] |= CAAM_ALG_FINAL; + } + else { + /* if not final then store and use ctr and encrypted ctr from + context dword 2,3 and 4,5. Also store MAC and AAD info from + context dword 6. */ + ivSz = 56; + ofst = 0; + } + + if (desc->idx + 2 > MAX_DESC_SZ) { + return TransferFailed; + } + desc->desc[desc->idx++] = CAAM_STORE_CTX | CAAM_CLASS1 | ofst | ivSz; + desc->desc[desc->idx++] = BSP_VirtualToPhysical((Address)desc->iv); + + if ((err = caamDoJob(desc)) != Success) { + return err; + } + state = CAAM_ALG_UPDATE; + } while (desc->lastIdx < desc->DescriptorCount || offset > 0); + + /* flush output buffers */ + for (i = desc->outputIdx; i < desc->lastIdx; i++) { + ASP_FlushCaches(desc->buf[i].data, desc->buf[i].dataSz); + } + + /* handle case with IV (This is also the output of MAC with AES-CCM) */ + if (ivIdx > 0) { + unsigned char* pt = (unsigned char*)desc->iv; + ASP_FlushCaches((Address)pt, ivSz); + for (i = 0; i < ivIdx; i++) { + memcpy((unsigned char*)iv[i]->data, pt, iv[i]->dataSz); + pt += iv[i]->dataSz; + ASP_FlushCaches(iv[i]->data, iv[i]->dataSz); + } + } + + return Success; +} + + +/****************************************************************************** + CAAM SHA Operations + ****************************************************************************/ +static int shaSize(struct DescStruct* desc) +{ + /* sanity check on dataSz for context */ + switch (desc->type) { + case CAAM_MD5: + return CAAM_MD5_CTXSZ; + + case CAAM_SHA: + return CAAM_SHA_CTXSZ; + + case CAAM_SHA224: + return CAAM_SHA224_CTXSZ; + + case CAAM_SHA256: + return CAAM_SHA256_CTXSZ; + + case CAAM_SHA384: + return CAAM_SHA384_CTXSZ; + + case CAAM_SHA512: + return CAAM_SHA512_CTXSZ; + + default: + return 0; + } +} + +/* SHA operations + * start: the index to start traversing through buffers. It's needed to allow + * for HMAC to reuse this code. + * + * return Success on success. All other return values are considered a fail + * case. + */ +static Error caamSha(struct DescStruct* desc, int start) +{ + struct buffer* ctx[3]; + Error err; + UINT4 i; + int sz = 0; + int ctxIdx = 0; + int offset = 0; + + int ctxSz = shaSize(desc); + + /* get context */ + for (i = start; i < desc->DescriptorCount; i++) { + struct buffer* buf = &desc->buf[i]; + unsigned char* local = (unsigned char*)desc->iv; + + if (sz < ctxSz && sz < (MAX_CTX * sizeof(UINT4))) { + ctx[ctxIdx] = buf; + sz += buf->dataSz; + + if (ctx[ctxIdx]->dataSz + offset > (MAX_CTX * sizeof(UINT4))) { + return SizeIsTooLarge; + } + memcpy((unsigned char*)&local[offset], (unsigned char*)ctx[ctxIdx]->data, + ctx[ctxIdx]->dataSz); + offset += ctx[ctxIdx]->dataSz; + ctxIdx++; + } + else { + break; + } + } + if (sz > ctxSz || ctxSz > (MAX_CTX * sizeof(UINT4))) { + return SizeIsTooLarge; + } + + ASP_FlushCaches((Address)desc->iv, ctxSz); + /*Manage Context (current digest + 8 byte running message length)*/ + if ((desc->state & CAAM_ALG_INIT) != CAAM_ALG_INIT) { + /* don't load into the class 2 context register on inti. + Found that loading in caused context to not get set. */ + if (desc->idx + 2 > MAX_DESC_SZ) { + return TransferFailed; + } + desc->desc[desc->idx++] = (CAAM_LOAD_CTX | CAAM_CLASS2) + ctxSz; + desc->desc[desc->idx++] = BSP_VirtualToPhysical((Address)desc->iv); + } + + /* add operation command */ + desc->desc[desc->idx++] = CAAM_OP | CAAM_CLASS2 | desc->state | + desc->type; + + /* Check case where there is no input. + In all cases the FIFO Load should be flushed. */ + if (i == desc->DescriptorCount) { + desc->lastFifo = desc->idx; + if (desc->idx + 1 > MAX_DESC_SZ) { + return TransferFailed; + } + desc->desc[desc->idx++] = CAAM_FIFO_L | CAAM_CLASS2 | + FIFOL_TYPE_MSG | CAAM_IMM; + } + + /* save index for looping over input */ + desc->headIdx = desc->idx; + do { + desc->idx = desc->headIdx; /* reset for each loop */ + if (i < desc->DescriptorCount) { + /* input must be a multiple of 64 bytes unless in final call */ + if (((desc->state & CAAM_ALG_FINAL) == CAAM_ALG_FINAL)) { + if (caamAddIO(desc, (CAAM_FIFO_L | CAAM_CLASS2 | + FIFOL_TYPE_MSG), 0, 1, &i) < 0) { + return TooManyBuffers; + } + } + else { + if (caamAddIO(desc, (CAAM_FIFO_L | CAAM_CLASS2 | + FIFOL_TYPE_MSG), 0, 64, &i) < 0) { + return TooManyBuffers; + } + } + } + + desc->desc[desc->lastFifo] |= FIFOL_TYPE_LC2; + + /* set context out */ + if (desc->idx + 2 > MAX_DESC_SZ) { + return TransferFailed; + } + desc->desc[desc->idx++] = CAAM_STORE_CTX | CAAM_CLASS2 + ctxSz; + desc->desc[desc->idx++] = BSP_VirtualToPhysical(desc->iv); + + if ((err = caamDoJob(desc)) != Success) { + return err; + } + /* flush context output for each loop */ + ASP_FlushCaches((Address)desc->iv, ctxSz); + } while (i < desc->DescriptorCount); + + /* store context to buffers */ + { + unsigned char* pt = (unsigned char*)desc->iv; + for (i = 0; i < ctxIdx; i++) { + memcpy((unsigned char*)ctx[i]->data, pt, ctx[i]->dataSz); + pt += ctx[i]->dataSz; + ASP_FlushCaches(ctx[i]->data, ctx[i]->dataSz); + } + } + + return Success; +} + + +/****************************************************************************** + CAAM TRNG Operations + ****************************************************************************/ + +/* If Entropy is not ready then return Waiting */ +static Error caamRng(struct DescStruct* desc) +{ + int sz = 0; + int i; + + Address reg; /* RTENT reg to read */ + int ofst = sizeof(UINT4); + + + /* Check ENT_VAL bit to make sure entropy is ready */ + if ((CAAM_READ(CAAM_RTMCTL) & CAAM_ENTVAL) != + CAAM_ENTVAL) { + return Waiting; + } + + /* check state of TRNG */ + if ((CAAM_READ(CAAM_RTSTATUS) & 0x0000FFFF) > 0) { + return Failure; + } + + /* read entropy from RTENT registers */ + reg = CAAM_RTENT0; + + for (i = 0; i < desc->DescriptorCount; i++) { + struct buffer* buf = &desc->buf[i]; + unsigned char* local = (unsigned char*)buf->data; + sz = buf->dataSz; + + while (sz > 3 && reg <= CAAM_RTENT11) { + *((UINT4*)local) = CAAM_READ(reg); + reg += ofst; + local += ofst; + sz -= ofst; + } + + if (reg > CAAM_RTENT11 && sz > 0) { + return SizeIsTooLarge; + } + + /* handle non word32 size amount left over */ + if (sz > 0) { + UINT4 tmp = CAAM_READ(reg); + memcpy(local, (unsigned char*)&tmp, sz); + } + + ASP_FlushCaches(buf->data, buf->dataSz); + } + + + /* read RTENT11 to trigger new entropy generation */ + if (reg != CAAM_RTENT11) { + CAAM_READ(CAAM_RTENT11); + } + + return Success; +} + + +/****************************************************************************** + IODevice Start, Transfer and Finish Buffer + ****************************************************************************/ +/* args[0] holds the state such as encrypt/decrypt or init/update/final + * args[1] holds the ctx/key size + * args[2] holds the input size + * args[3] dependent on algo (such as AAD size with AES-CCM) */ +static Error caamTransferStart(IODeviceVector ioCaam, + Value type, const volatile Value args[4]) +{ + struct CAAM_DEVICE* local = (struct CAAM_DEVICE*)ioCaam; + struct DescStruct* desc; + + /* currently only one desc is available for use */ + desc = &local->DescArray[0]; + + /* check if the desc is idle before using */ + if (GetIORequestStatus((IORequest)desc) != IdleIORequest) { + return ResourceNotAvailable; + } + + desc->idx = 0; + desc->output = 0; + desc->ctxOut = 0; + desc->outputIdx = 0; + desc->alignIdx = 0; + desc->lastFifo = 0; + desc->state = args[0]; + desc->ctxSz = args[1]; + desc->inputSz = args[2]; + desc->aadSz = 0; + desc->desc[desc->idx++] = CAAM_HEAD; /* later will put size to header*/ + + switch (type) { + case CAAM_AESECB: + case CAAM_AESCBC: + if (desc->inputSz % 16 != 0) { + return ArgumentError; + } + /* fall through to break */ + case CAAM_AESCTR: + break; + + case CAAM_AESCCM: + memset((unsigned char*)desc->aadSzBuf, 0, sizeof(desc->aadSzBuf)); + if (args[3] > 0) { + /* encode the length in */ + if (args[3] <= 0xFEFF) { + unsigned char* pt = (unsigned char*)desc->aadSzBuf; + desc->aadSz = 2; + pt[0] = ((args[3] & 0xFF00) >> 8); + pt[1] = (args[3] & 0x00FF); + } + else if (args[3] <= 0xFFFFFFFF) { + unsigned char* pt = (unsigned char*)desc->aadSzBuf; + desc->aadSz = 6; + pt[0] = 0xFF; pt[1] = 0xFE; + pt[2] = ((args[3] & 0xFF000000) >> 24); + pt[3] = ((args[3] & 0x00FF0000) >> 16); + pt[4] = ((args[3] & 0x0000FF00) >> 8); + pt[5] = (args[3] & 0x000000FF); + } + } + break; + + case CAAM_MD5: + case CAAM_SHA: + case CAAM_SHA224: + case CAAM_SHA256: + case CAAM_SHA384: + case CAAM_SHA512: + break; + + case CAAM_BLOB_ENCAP: + case CAAM_BLOB_DECAP: + break; + + case CAAM_ENTROPY: + break; + + default: + /* unknown type */ + return UsageNotSupported; + } + + desc->DescriptorCount = 0; + desc->type = type; + desc->running = true; + StartIORequest((IORequest)desc); + + /* For now only require READ permissions */ + SetIORequestBufferPermissions((IORequest)desc, MEMORY_READ); + return Success; +} + + +static Error caamTransferBuffer(IODeviceVector TheIODeviceVector, + IORequest req, IODescriptor NewIODescriptor, + Address data, Address dataSz) +{ + struct DescStruct* desc = (struct DescStruct*)req; + Error err; + + switch (desc->type) { + case CAAM_AESECB: + case CAAM_AESCTR: + case CAAM_AESCBC: + case CAAM_AESCCM: + + case CAAM_MD5: + case CAAM_SHA: + case CAAM_SHA224: + case CAAM_SHA256: + case CAAM_SHA384: + case CAAM_SHA512: + + case CAAM_BLOB_ENCAP: + case CAAM_BLOB_DECAP: + case CAAM_ENTROPY: + { /* set buffer for transfer finish */ + struct buffer* buf; + if (desc->DescriptorCount >= MAX_BUF) { + return TooManyBuffers; + } + buf = &desc->buf[desc->DescriptorCount]; + buf->data = data; + buf->dataSz = dataSz; + } + err = Success; + break; + + default: + err = UsageNotSupported; + } + + if (err != Success) { + desc->running = false; + DismissIORequest(req); + return err; + } + + desc->DescriptorCount++; + return Success; +} + + +static Error caamTransferFinish(IODeviceVector ioCaam, IORequest req) +{ + struct DescStruct* desc = (struct DescStruct*)req; + Error ret; + + /* construct desc */ + switch (desc->type) { + case CAAM_AESECB: + case CAAM_AESCTR: + case CAAM_AESCBC: + ret = caamAes(desc); + break; + + case CAAM_AESCCM: + ret = caamAead(desc); + break; + + case CAAM_MD5: + case CAAM_SHA: + case CAAM_SHA224: + case CAAM_SHA256: + case CAAM_SHA384: + case CAAM_SHA512: + ret = caamSha(desc, 0); + break; + + case CAAM_ENTROPY: + ret = caamRng(desc); + break; + + case CAAM_BLOB_ENCAP: + case CAAM_BLOB_DECAP: + ret = caamBlob(desc); + break; + + default: + ret = UsageNotSupported; + } + + desc->running = false; + DismissIORequest(req); + return ret; +} + + +/****************************************************************************** + IODevice Interrupt and Init + ****************************************************************************/ + +static Error caamTransferWrite(IODeviceVector ioCaam, + IORequest req, Value dataSz, const volatile Value *data) +{ + DismissIORequest(req); + return UsageNotSupported; +} + + +static void caamTransferAbort(IODeviceVector ioCaam, IORequest req) +{ + DismissIORequest(req); +} + + +static void caamTransferRecall(IODeviceVector ioCaam, IODescriptor req) +{ + +} + + +static void HandleInterrupt(Address id) +{ + struct CAAM_DEVICE* local = (struct CAAM_DEVICE*)id; + Value InterruptStatus = INTERRUPT_AtomicWrite(&local->InterruptStatus, 0); + int i; + + /* Loop through descriptors and try to dismiss them */ + for (i = 0; i < DESC_COUNT; i++) { + struct DescStruct* desc = &local->DescArray[i]; + if (InterruptStatus & (1 << i)) { + desc->running = false; + if (GetIORequestStatus((IORequest)desc) == IORequestSuspended) { + ContinueIORequest((IORequest)desc); + } + else { + DismissIORequest((IORequest)desc); + } + } + } +} + + +static Error caamCreate(IODeviceVector ioCaam) +{ + return Success; +} + + +void InitCAAM(void) +{ + /* get IO vector and set it up */ + IODeviceVector ioCaam = &caam.caamVector; + unsigned int reg; + int i; + Error ret; + + + ioCaam->Create = &caamCreate; + ioCaam->ReadRegister = &caamReadRegister; + ioCaam->WriteRegister = &caamWriteRegister; + + ioCaam->TransferStart = &caamTransferStart; + ioCaam->TransferBuffer = &caamTransferBuffer; + ioCaam->TransferWrite = &caamTransferWrite; + ioCaam->TransferFinish = &caamTransferFinish; + ioCaam->TransferAbort = &caamTransferAbort; + ioCaam->TransferRecall = &caamTransferRecall; +#ifdef HARDWARE_CACHE_COHERENCY + ioCaam->IOSynchronizationNotRequired = 1; +#endif + + RegisterIODeviceVector(ioCaam, DRIVER_NAME); + RequestIOTerminationTask(ioCaam, 10); + + /* Initialize descriptors */ + for (i = 0; i < BUFFER_COUNT; i++) { + InitializeIODescriptor(ioCaam, &caam.IODescriptorArray[i]); + } + + /* Initialize Descriptors */ + for (i = 0; i < DESC_COUNT; i++) { + InitializeIORequest(ioCaam, &caam.DescArray[i].TheIORequest, + IOREQUEST_STANDARD); + caam.DescArray[i].running = false; + caam.DescArray[i].caam = &caam; + } + + + /* call interrupt to make IORequests available */ + caam.InterruptStatus = 0; + INTERRUPT_InitCall(&caam.HandleInterruptCall, + &HandleInterrupt, "Start up CAAM IORequest"); + + /* set clock speed for CAAM. Setting it here to allow for restricting + access */ + #define REGS_CCM_BASE (0xf20c4000) + #define HW_CCM_CCGR0_ADDR (0xf20c4068) + #define CG(x) (3 << (x*2)) + + reg = CG(6) | CG(5) | CG(4); + *(volatile unsigned int*)HW_CCM_CCGR0_ADDR = + *(volatile unsigned int*)HW_CCM_CCGR0_ADDR | reg; + + /* set up job ring */ + + /* @TODO create partition in physical memory for job rings + current partition security is set to the default */ + for (i = 1; i < CAAM_PAGE_MAX; i++) { + ret = caamCreatePartition(i, i); + if (ret == 0) { + break; + } + + if (ret != MemoryMapMayNotBeEmpty) { + INTERRUPT_Panic(); + } + } + + if (ret != 0) { + INTERRUPT_Panic(); + } + + caam.ring.page = i; + caam.ring.JobIn = (CAAM_PAGE + (i << 12)); + caam.ring.JobOut = caam.ring.JobIn + 16; + caam.ring.Desc = caam.ring.JobOut + 16; + + /* set physical address of job rings */ + CAAM_WRITE(CAAM_IRBAR0, caam.ring.JobIn ^ 0xF0000000); + CAAM_WRITE(CAAM_ORBAR0, caam.ring.JobOut ^ 0xF0000000); + + /* Initialize job ring sizes to 1 */ + CAAM_WRITE(CAAM_IRSR0, 1); + CAAM_WRITE(CAAM_ORSR0, 1); + + /* set DECO watchdog to time out and flush jobs that cause the DECO to hang */ + CAAM_WRITE((CAAM_BASE | 0x0004), CAAM_READ(CAAM_BASE | 0x0004) | 0x40000000); + + /* start up RNG if not already started */ + if (caamInitRng(&caam) != 0) { + INTERRUPT_Panic(); + } +} + +void (*__ghsentry_bspuserinit_InitCAAM)(void) = &InitCAAM; + +#endif /* INTEGRITY */ diff --git a/client/wolfssl/wolfcrypt/src/port/caam/caam_init.c b/client/wolfssl/wolfcrypt/src/port/caam/caam_init.c new file mode 100644 index 0000000..014341c --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/caam/caam_init.c @@ -0,0 +1,289 @@ +/* caam_init.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#include + +#if defined(WOLFSSL_IMX6_CAAM) || defined(WOLFSSL_IMX6_CAAM_RNG) || \ + defined(WOLFSSL_IMX6_CAAM_BLOB) + +#include +#include +#include + +#define WC_CAAM_BLOB_SZ 48 + +#ifndef WC_CAAM_PASSWORD + #define WC_CAAM_PASSWORD "!systempassword" +#endif + +#if defined(__INTEGRITY) || defined(INTEGRITY) + #include + #include + static IODevice caam = NULLIODevice; +#endif + +#if defined(WOLFSSL_CAAM_PRINT) || defined(WOLFSSL_CAAM_DEBUG) +#include +#include + +static void wc_caamBanner(void) +{ + printf("********* wolfSSL Version %s : Printing Out CAAM Information ********\n", + LIBWOLFSSL_VERSION_STRING); + printf("CAAM Status [0x%8.8x] = 0x%8.8x\n", + CAAM_STATUS, WC_CAAM_READ(CAAM_STATUS)); + printf("CAAM Version MS Register [0x%8.8x] = 0x%8.8x\n", + CAAM_VERSION_MS, WC_CAAM_READ(CAAM_VERSION_MS)); + printf("CAAM Version LS Register [0x%8.8x] = 0x%8.8x\n", + CAAM_VERSION_LS, WC_CAAM_READ(CAAM_VERSION_LS)); + printf("CAAM Support MS Register [0x%8.8x] = 0x%8.8x\n", + CAMM_SUPPORT_MS, WC_CAAM_READ(CAMM_SUPPORT_MS)); + printf("CAAM Support LS [0x%8.8x] = 0x%8.8x\n", + CAMM_SUPPORT_LS, WC_CAAM_READ(CAMM_SUPPORT_LS)); + printf("********************************************************************\n\n"); +} +#endif + + +/* Allow runtime setting for CAAM IODevice in case user wants to use password + * at run time. + * + * returns 0 on success + * + * NOTE this is how IODevice is defined in INTEGRITY "typedef struct + * IODeviceStruct *IODevice;" + */ +int wc_caamSetResource(IODevice ioDev) +{ + WOLFSSL_MSG("Setting CAAM driver"); + caam = ioDev; + + return 0; +} + +/* Check hardware support + * + * returns 0 on success + */ +int wc_caamInit(void) +{ + int ret; + word32 reg; + + /* get the driver up */ + if (caam == NULLIODevice) { + WOLFSSL_MSG("Starting CAAM driver"); + if ((ret = (int)RequestResource((Object *)&caam, "wolfSSL_CAAM_Driver", + WC_CAAM_PASSWORD)) != (int)Success) { + WOLFSSL_MSG("Unable to get the CAAM IODevice, check password?"); + WOLFSSL_LEAVE("wc_caamInit: error from driver = ", ret); + ret = 0; /* not a hard failure because user can set resource */ + } + } + +#if defined(WOLFSSL_CAAM_PRINT) || defined(WOLFSSL_CAAM_DEBUG) + /* print out CAAM version/info and wolfSSL version */ + wc_caamBanner(); +#endif + + /* check that for implemented modules + * bits 0-3 AES, 4-7 DES, 12-15 Hashing , 16-19 RNG */ + reg = WC_CAAM_READ(CAMM_SUPPORT_LS); + + #ifndef WC_NO_RNG + if (((reg & 0x000F0000) >> 16) > 0) { + WOLFSSL_MSG("Found CAAM RNG hardware module"); + if ((WC_CAAM_READ(CAAM_RTMCTL) & 0x40000001) != 0x40000001) { + WOLFSSL_MSG("Error CAAM RNG has not been set up"); + } + } + #endif + + #ifndef NO_SHA256 + if ((reg & 0x0000F000) > 0) { + WOLFSSL_MSG("Found CAAM MDHA module"); + } + else { + WOLFSSL_MSG("Hashing not supported by CAAM"); + return WC_HW_E; + } + #endif + + #ifndef NO_AES + if ((reg & 0x0000000F) > 0) { + WOLFSSL_MSG("Found CAAM AES module"); + } + else { + WOLFSSL_MSG("AES not supported by CAAM"); + return WC_HW_E; + } + #endif + + (void)ret; + return 0; +} + + +int wc_caamFree(void) +{ + return 0; +} + + +word32 wc_caamReadRegister(word32 reg) +{ + Value out = 0; + + if (caam == NULLIODevice) { + WOLFSSL_MSG("Error CAAM IODevice not found! Bad password?"); + return 0; + } + + if (ReadIODeviceRegister(caam, reg, &out) != Success) { + WOLFSSL_MSG("Error reading register\n"); + } + + return (word32)out; +} + +void wc_caamWriteRegister(word32 reg, word32 value) +{ + if (caam == NULLIODevice) { + WOLFSSL_MSG("Error CAAM IODevice not found! Bad password?"); + return; + } + + if (WriteIODeviceRegister(caam, reg, value) != Success) { + WOLFSSL_MSG("Error writing to register\n"); + } +} + + +/* return 0 on success and WC_HW_E on failure. Can also return WC_HW_WAIT_E + * in the case that the driver is waiting for a resource or RAN_BLOCK_E if + * waiting for entropy. */ +int wc_caamAddAndWait(Buffer* buf, word32 arg[4], word32 type) +{ + int ret; + if (caam == NULLIODevice) { + WOLFSSL_MSG("Error CAAM IODevice not found! Bad password?"); + return WC_HW_E; + } + + if ((ret = SynchronousSendIORequest(caam, type, (const Value*)arg, buf)) + != Success) { + #if defined(WOLFSSL_CAAM_PRINT) || defined(WOLFSSL_CAAM_DEBUG) + printf("ret of SynchronousSendIORequest = %d type = %d\n", ret, type); + #endif + + /* if waiting for resource or RNG return waiting */ + if (ret == Waiting) { + WOLFSSL_MSG("Waiting on entropy from driver"); + return RAN_BLOCK_E; + } + + if (ret == ResourceNotAvailable) { + WOLFSSL_MSG("Waiting on CAAM driver"); + return WC_HW_WAIT_E; + } + + return WC_HW_E; + } + + (void)ret; + return 0; +} + + +int wc_caamCreateBlob(byte* data, word32 dataSz, byte* out, word32* outSz) +{ + Buffer in[3]; + word32 arg[4]; + int ret; + word32 local[2] = {0,0}; + + if (data == NULL || out == NULL || outSz == NULL || + *outSz < dataSz + WC_CAAM_BLOB_SZ) { + return BAD_FUNC_ARG; + } + + in[0].BufferType = DataBuffer; + in[0].TheAddress = (Address)local; + in[0].Length = sizeof(local); + + in[1].BufferType = DataBuffer; + in[1].TheAddress = (Address)data; + in[1].Length = dataSz; + + in[2].BufferType = DataBuffer | LastBuffer; + in[2].TheAddress = (Address)out; + in[2].Length = dataSz + WC_CAAM_BLOB_SZ; + + arg[2] = dataSz; + + if ((ret = wc_caamAddAndWait(in, arg, CAAM_BLOB_ENCAP)) != 0) { + WOLFSSL_MSG("Error with CAAM blob create"); + return ret; + } + + *outSz = dataSz + WC_CAAM_BLOB_SZ; + return 0; +} + + +int wc_caamOpenBlob(byte* data, word32 dataSz, byte* out, word32* outSz) +{ + Buffer in[3]; + word32 arg[4]; + int ret; + word32 local[2] = {0,0}; + + if (data == NULL || out == NULL || outSz == NULL || + *outSz < dataSz - WC_CAAM_BLOB_SZ) { + return BAD_FUNC_ARG; + } + + in[0].BufferType = DataBuffer; + in[0].TheAddress = (Address)local; + in[0].Length = sizeof(local); + + in[0].BufferType = DataBuffer; + in[0].TheAddress = (Address)data; + in[0].Length = dataSz; + + in[1].BufferType = DataBuffer | LastBuffer; + in[1].TheAddress = (Address)out; + in[1].Length = dataSz - WC_CAAM_BLOB_SZ; + + arg[2] = dataSz; + + if ((ret = wc_caamAddAndWait(in, arg, CAAM_BLOB_DECAP)) != 0) { + WOLFSSL_MSG("Error with CAAM blob create"); + return ret; + } + + *outSz = dataSz - WC_CAAM_BLOB_SZ; + return 0; +} + +#endif /* WOLFSSL_IMX6_CAAM */ + diff --git a/client/wolfssl/wolfcrypt/src/port/caam/caam_sha.c b/client/wolfssl/wolfcrypt/src/port/caam/caam_sha.c new file mode 100644 index 0000000..74d62fb --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/caam/caam_sha.c @@ -0,0 +1,397 @@ +/* caam_sha.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#include + +#if defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_HASH) + +#include +#include + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + + +#include +#include +#include + +#if defined(WOLFSSL_CAAM_DEBUG) || defined(WOLFSSL_CAAM_PRINT) +#include +#endif + +#ifndef NO_SHA +#include +#endif + +#if !defined(NO_SHA256) || defined(WOLFSSL_SHA224) +#include +#endif + +#if defined(WOLFSSL_SHA384) || defined(WOLFSSL_SHA512) +#include +#endif + +#ifndef NO_MD5 +#include +#endif + +/****************************************************************************** + Common Code Between SHA Functions + ****************************************************************************/ + +static int _InitSha(wc_Sha* sha, void* heap, int devId, word32 digestSz, + word32 type) +{ + Buffer buf[1]; + word32 arg[4]; + int ret; + + (void)heap; + (void)devId; + + if (sha == NULL) { + return BAD_FUNC_ARG; + } + + XMEMSET(sha, 0, sizeof(Sha)); + + /* Set buffer for context */ + buf[0].BufferType = DataBuffer | LastBuffer; + buf[0].TheAddress = (Address)sha->ctx; + buf[0].Length = digestSz + WC_CAAM_CTXLEN; + buf[0].Transferred = 0; + + arg[0] = CAAM_ALG_INIT; + arg[1] = digestSz + WC_CAAM_CTXLEN; + + if ((ret = wc_caamAddAndWait(buf, arg, type)) != 0) { + WOLFSSL_MSG("Error with CAAM SHA init"); + return ret; + } + + return 0; +} + + +static int _ShaUpdate(wc_Sha* sha, const byte* data, word32 len, word32 digestSz, + word32 type) +{ + Buffer buf[2]; + word32 arg[4]; + int ret; + byte* local; + + if (sha == NULL ||(data == NULL && len > 0)) { + return BAD_FUNC_ARG; + } + + if (len == 0) return 0; /* nothing to do */ + + local = (byte*)sha->buffer; + /* check for filling out existing buffer */ + if (sha->buffLen > 0) { + word32 add = min(len, WC_CAAM_HASH_BLOCK - sha->buffLen); + XMEMCPY(&local[sha->buffLen], data, add); + + sha->buffLen += add; + data += add; + len -= add; + + if (sha->buffLen == WC_CAAM_HASH_BLOCK) { + /* Set buffer for context */ + buf[0].BufferType = DataBuffer; + buf[0].TheAddress = (Address)sha->ctx; + buf[0].Length = digestSz + WC_CAAM_CTXLEN; + buf[0].Transferred = 0; + + /* data to update with */ + buf[1].BufferType = DataBuffer | LastBuffer; + buf[1].TheAddress = (Address)sha->buffer; + buf[1].Length = sha->buffLen; + buf[1].Transferred = 0; + + arg[0] = CAAM_ALG_UPDATE; + arg[1] = digestSz + WC_CAAM_CTXLEN; + + if ((ret = wc_caamAddAndWait(buf, arg, type)) != 0) { + WOLFSSL_MSG("Error with CAAM SHA update"); + return ret; + } + sha->buffLen = 0; /* cleared out buffer */ + } + } + + /* check if multiple full blocks can be done */ + if (len >= WC_CAAM_HASH_BLOCK) { + word32 sz = len / WC_CAAM_HASH_BLOCK; + sz = sz * WC_CAAM_HASH_BLOCK; + + /* Set buffer for context */ + buf[0].BufferType = DataBuffer; + buf[0].TheAddress = (Address)sha->ctx; + buf[0].Length = digestSz + WC_CAAM_CTXLEN; + buf[0].Transferred = 0; + + /* data to update with */ + buf[1].BufferType = DataBuffer | LastBuffer; + buf[1].TheAddress = (Address)data; + buf[1].Length = sz; + buf[1].Transferred = 0; + + arg[0] = CAAM_ALG_UPDATE; + arg[1] = digestSz + WC_CAAM_CTXLEN; + + if ((ret = wc_caamAddAndWait(buf, arg, type)) != 0) { + WOLFSSL_MSG("Error with CAAM SHA update"); + return ret; + } + + len -= sz; + data += sz; + } + + /* check for left overs */ + if (len > 0) { + word32 add = min(len, WC_CAAM_HASH_BLOCK - sha->buffLen); + XMEMCPY(&local[sha->buffLen], data, add); + sha->buffLen += add; + } + + return 0; +} + + +static int _ShaFinal(wc_Sha* sha, byte* out, word32 digestSz, + word32 type) +{ + Buffer buf[2]; + word32 arg[4]; + int ret; + + if (sha == NULL || out == NULL) { + return BAD_FUNC_ARG; + } + + /* Set buffer for context */ + buf[0].BufferType = DataBuffer; + buf[0].TheAddress = (Address)sha->ctx; + buf[0].Length = digestSz + WC_CAAM_CTXLEN; + buf[0].Transferred = 0; + + /* add any potential left overs */ + buf[1].BufferType = DataBuffer | LastBuffer; + buf[1].TheAddress = (Address)sha->buffer; + buf[1].Length = sha->buffLen; + buf[1].Transferred = 0; + + arg[0] = CAAM_ALG_FINAL; + arg[1] = digestSz + WC_CAAM_CTXLEN; + + if ((ret = wc_caamAddAndWait(buf, arg, type)) != 0) { + WOLFSSL_MSG("Error with CAAM SHA init"); + return ret; + } + + return 0; +} + +/****************************************************************************** + MD5 + ****************************************************************************/ +#if !defined(NO_MD5) +int wc_InitMd5_ex(wc_Md5* sha, void* heap, int devId) +{ + return _InitSha(sha, heap, devId, MD5_DIGEST_SIZE, CAAM_MD5); +} + + +int wc_Md5Update(wc_Md5* sha, const byte* data, word32 len) +{ + return _ShaUpdate(sha, data, len, MD5_DIGEST_SIZE, CAAM_MD5); +} + + +int wc_Md5Final(wc_Md5* sha, byte* hash) +{ + int ret; + if ((ret = _ShaFinal(sha, hash, MD5_DIGEST_SIZE, CAAM_MD5)) != 0) { + return ret; + } + + XMEMCPY(hash, (byte*)sha->ctx, MD5_DIGEST_SIZE); + return _InitSha(sha, NULL, 0, MD5_DIGEST_SIZE, CAAM_MD5); +} +#endif /* !NO_MD5 */ + + +/****************************************************************************** + SHA 1 + ****************************************************************************/ +#if !defined(NO_SHA) +int wc_InitSha_ex(wc_Sha* sha, void* heap, int devId) +{ + return _InitSha(sha, heap, devId, SHA_DIGEST_SIZE, CAAM_SHA); +} + + +int wc_ShaUpdate(wc_Sha* sha, const byte* data, word32 len) +{ + return _ShaUpdate(sha, data, len, SHA_DIGEST_SIZE, CAAM_SHA); +} + + +int wc_ShaFinal(wc_Sha* sha, byte* out) +{ + int ret; + if ((ret = _ShaFinal(sha, out, SHA_DIGEST_SIZE, CAAM_SHA)) != 0) { + return ret; + } + + XMEMCPY(out, (byte*)sha->ctx, SHA_DIGEST_SIZE); + return _InitSha(sha, NULL, 0, SHA_DIGEST_SIZE, CAAM_SHA); +} +#endif /* !NO_SHA */ + + +/****************************************************************************** + SHA 224 + ****************************************************************************/ +#ifdef WOLFSSL_SHA224 +int wc_InitSha224_ex(wc_Sha224* sha, void* heap, int devId) +{ + return _InitSha(sha, heap, devId, SHA256_DIGEST_SIZE, CAAM_SHA224); +} + + +int wc_Sha224Update(wc_Sha224* sha, const byte* data, word32 len) +{ + return _ShaUpdate(sha, data, len, SHA256_DIGEST_SIZE, CAAM_SHA224); +} + + +int wc_Sha224Final(wc_Sha224* sha, byte* out) +{ + int ret; + if ((ret = _ShaFinal(sha, out, SHA256_DIGEST_SIZE, CAAM_SHA224)) != 0) { + return ret; + } + + XMEMCPY(out, (byte*)sha->ctx, SHA224_DIGEST_SIZE); + return _InitSha(sha, NULL, 0, SHA256_DIGEST_SIZE, CAAM_SHA224); +} +#endif /* WOLFSSL_SHA224 */ + + +/****************************************************************************** + SHA 256 + ****************************************************************************/ +#if !defined(NO_SHA256) +int wc_InitSha256_ex(wc_Sha256* sha, void* heap, int devId) +{ + return _InitSha(sha, heap, devId, SHA256_DIGEST_SIZE, CAAM_SHA256); +} + + +int wc_Sha256Update(wc_Sha256* sha, const byte* data, word32 len) +{ + return _ShaUpdate(sha, data, len, SHA256_DIGEST_SIZE, CAAM_SHA256); +} + + +int wc_Sha256Final(wc_Sha256* sha, byte* out) +{ + int ret; + if ((ret = _ShaFinal(sha, out, SHA256_DIGEST_SIZE, CAAM_SHA256)) != 0) { + return ret; + } + + XMEMCPY(out, (byte*)sha->ctx, SHA256_DIGEST_SIZE); + return _InitSha(sha, NULL, 0, SHA256_DIGEST_SIZE, CAAM_SHA256); +} +#endif /* !NO_SHA256 */ + + +/****************************************************************************** + SHA 384 + ****************************************************************************/ +#ifdef WOLFSSL_SHA384 +int wc_InitSha384_ex(wc_Sha384* sha, void* heap, int devId) +{ + return _InitSha(sha, heap, devId, SHA512_DIGEST_SIZE, CAAM_SHA384); +} + + +int wc_Sha384Update(wc_Sha384* sha, const byte* data, word32 len) +{ + return _ShaUpdate(sha, data, len, SHA512_DIGEST_SIZE, CAAM_SHA384); +} + + +int wc_Sha384Final(wc_Sha384* sha, byte* out) +{ + int ret; + if ((ret = _ShaFinal(sha, out, SHA512_DIGEST_SIZE, CAAM_SHA384)) != 0) { + return ret; + } + + XMEMCPY(out, (byte*)sha->ctx, SHA384_DIGEST_SIZE); + return _InitSha(sha, NULL, 0, SHA512_DIGEST_SIZE, CAAM_SHA384); +} +#endif /* WOLFSSL_SHA384 */ + + + +/****************************************************************************** + SHA 512 + ****************************************************************************/ +#ifdef WOLFSSL_SHA512 +int wc_InitSha512_ex(wc_Sha512* sha, void* heap, int devId) +{ + return _InitSha(sha, heap, devId, SHA512_DIGEST_SIZE, CAAM_SHA512); +} + + +int wc_Sha512Update(wc_Sha512* sha, const byte* data, word32 len) +{ + return _ShaUpdate(sha, data, len, SHA512_DIGEST_SIZE, CAAM_SHA512); +} + + +int wc_Sha512Final(wc_Sha512* sha, byte* out) +{ + int ret; + if ((ret = _ShaFinal(sha, out, SHA512_DIGEST_SIZE, CAAM_SHA512)) != 0) { + return ret; + } + + XMEMCPY(out, (byte*)sha->ctx, SHA512_DIGEST_SIZE); + return _InitSha(sha, NULL, 0, SHA512_DIGEST_SIZE, CAAM_SHA512); +} +#endif /* WOLFSSL_SHA512 */ + +#endif /* WOLFSSL_IMX6_CAAM */ + diff --git a/client/wolfssl/wolfcrypt/src/port/cavium/README.md b/client/wolfssl/wolfcrypt/src/port/cavium/README.md new file mode 100644 index 0000000..b98d866 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/cavium/README.md @@ -0,0 +1,3 @@ +# Cavium Nitrox III/V Support + +Please contact wolfSSL at info@wolfssl.com to request an evaluation. diff --git a/client/wolfssl/wolfcrypt/src/port/cavium/README_Octeon.md b/client/wolfssl/wolfcrypt/src/port/cavium/README_Octeon.md new file mode 100644 index 0000000..b2670d0 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/cavium/README_Octeon.md @@ -0,0 +1,3 @@ +# Cavium Octeon III CN7300 + +Please contact wolfSSL at info@wolfssl.com to request an evaluation. diff --git a/client/wolfssl/wolfcrypt/src/port/cavium/cavium_nitrox.c b/client/wolfssl/wolfcrypt/src/port/cavium/cavium_nitrox.c new file mode 100644 index 0000000..e69de29 diff --git a/client/wolfssl/wolfcrypt/src/port/cavium/cavium_octeon_sync.c b/client/wolfssl/wolfcrypt/src/port/cavium/cavium_octeon_sync.c new file mode 100644 index 0000000..078e8cb --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/cavium/cavium_octeon_sync.c @@ -0,0 +1,879 @@ +/* cavium_octeon_sync.c + * + * Copyright(C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL.(formerly known as CyaSSL) + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + *(at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include +#include +#include + +#ifdef HAVE_CAVIUM_OCTEON_SYNC + +/* Setting NO_MAIN_DRIVER here because this file ends up building + * in the library sources which doesn't have NO_MAIN_DRIVER set, + * as the library expects main to be somewhere else. */ +#undef NO_MAIN_DRIVER +#define NO_MAIN_DRIVER + +#include +#include +#include +#include + +#include "cvmx.h" +#include "cvmx-asm.h" +#include "cvmx-key.h" +#include "cvmx-swap.h" + +#ifndef NO_DES3 + #include +#endif +#ifndef NO_AES + #include +#endif + +#define NOOPT __attribute__((optimize("O0"))) + +static int devId = 1234; + +#ifndef NO_DES3 +static int Octeon_Des3_CbcEncrypt(Des3* des3, + uint64_t *inp64, uint64_t *outp64, size_t inl) +{ + register uint64_t i0, r0; + uint64_t *key, *iv; + + if (des3 == NULL || inp64 == NULL || outp64 == NULL) + return BAD_FUNC_ARG; + + /* expects 64-bit aligned value */ + key = (uint64_t*)des3->devKey; + CVMX_MT_3DES_KEY(key[0], 0); + CVMX_MT_3DES_KEY(key[1], 1); + CVMX_MT_3DES_KEY(key[2], 2); + iv = (uint64_t*)des3->reg; + CVMX_MT_3DES_IV(iv[0]); + + CVMX_PREFETCH0(inp64); + + i0 = *inp64; + + /* DES3 assembly can handle 16-byte chunks */ + if (inl >= 16) { + CVMX_MT_3DES_ENC_CBC(i0); + inl -= 8; + inp64++; + outp64++; + + if (inl >= 8) { + i0 = inp64[0]; + CVMX_MF_3DES_RESULT(r0); + CVMX_MT_3DES_ENC_CBC(i0); + + for (;;) { + outp64[-1] = r0; + inl -= 8; + inp64++; + outp64++; + i0 = *inp64; + + if (inl < 8) + break; + + CVMX_PREFETCH(inp64, 64); + CVMX_MF_3DES_RESULT(r0); + CVMX_MT_3DES_ENC_CBC(i0); + } + } + CVMX_MF_3DES_RESULT(r0); + outp64[-1] = r0; + } + /* remainder */ + if (inl > 0) { + uint64_t r = 0; + if (inl <= 8) { + XMEMCPY(&r, inp64, inl); + CVMX_MT_3DES_ENC_CBC(r); + CVMX_MF_3DES_RESULT(*outp64); + } + else { + i0 = *inp64; + CVMX_MT_3DES_ENC_CBC(i0); + CVMX_MF_3DES_RESULT(*outp64); + inp64++, outp64++; + + XMEMCPY(&r, inp64, inl); + CVMX_MT_3DES_ENC_CBC(r); + CVMX_MF_3DES_RESULT(*outp64); + } + } + + CVMX_MT_3DES_IV(iv[0]); + + return 0; +} + +static int Octeon_Des3_CbcDecrypt(Des3* des3, + uint64_t *inp64, uint64_t *outp64, size_t inl) +{ + register uint64_t i0, r0; + uint64_t *key, *iv; + + if (des3 == NULL || inp64 == NULL || outp64 == NULL) + return BAD_FUNC_ARG; + + /* expects 64-bit aligned value */ + key = (uint64_t*)des3->devKey; + CVMX_MT_3DES_KEY(key[0], 0); + CVMX_MT_3DES_KEY(key[1], 1); + CVMX_MT_3DES_KEY(key[2], 2); + + iv = (uint64_t*)des3->reg; + CVMX_MT_3DES_IV(iv[0]); + + CVMX_PREFETCH0(inp64); + + i0 = *inp64; + + /* DES3 assembly can handle 16-byte chunks */ + if (inl >= 16) { + CVMX_MT_3DES_DEC_CBC(i0); + inl -= 8; + inp64++; + outp64++; + + if (inl >= 8) { + i0 = inp64[0]; + CVMX_MF_3DES_RESULT(r0); + CVMX_MT_3DES_DEC_CBC(i0); + + for (;;) { + outp64[-1] = r0; + inl -= 8; + inp64++; + outp64++; + i0 = *inp64; + + if (inl < 8) + break; + + CVMX_PREFETCH(inp64, 64); + CVMX_MF_3DES_RESULT(r0); + CVMX_MT_3DES_DEC_CBC(i0); + } + } + + CVMX_MF_3DES_RESULT(r0); + outp64[-1] = r0; + } + /* remainder */ + if (inl > 0) { + if (inl <= 8) { + uint64_t r = 0; + XMEMCPY(&r, inp64, inl); + CVMX_MT_3DES_DEC_CBC(r); + CVMX_MF_3DES_RESULT(*outp64); + } + else { + uint64_t r = 0; + i0 = *inp64; + CVMX_MT_3DES_DEC_CBC(i0); + CVMX_MF_3DES_RESULT(*outp64); + inp64++, outp64++; + + XMEMCPY(&r, inp64, inl); + CVMX_MT_3DES_DEC_CBC(r); + CVMX_MF_3DES_RESULT(*outp64); + } + } + + CVMX_MT_3DES_IV(iv[0]); + + return 0; +} +#endif /* !NO_DES3 */ + + +#ifndef NO_AES + +#ifdef HAVE_AES_CBC +static int Octeon_AesCbc_Encrypt(Aes *aes, + uint64_t *inp64, uint64_t *outp64, size_t inl) +{ + register uint64_t i0, i1, r0, r1; + uint64_t *key, *iv; + + if (aes == NULL || inp64 == NULL || outp64 == NULL) { + return BAD_FUNC_ARG; + } + + iv = (uint64_t*)aes->reg; + CVMX_MT_AES_IV(iv[0], 0); + CVMX_MT_AES_IV(iv[1], 1); + + key = (uint64_t*)aes->devKey; + CVMX_MT_AES_KEY(key[0], 0); + CVMX_MT_AES_KEY(key[1], 1); + CVMX_MT_AES_KEY(key[2], 2); + CVMX_MT_AES_KEY(key[3], 3); + + CVMX_MT_AES_KEYLENGTH(aes->keylen/8 - 1); + + CVMX_PREFETCH0(inp64); + + i0 = inp64[0]; + i1 = inp64[1]; + + /* AES assembly can handle 32-byte chunks */ + if (inl >= 32) { + CVMX_MT_AES_ENC_CBC0(i0); + CVMX_MT_AES_ENC_CBC1(i1); + inl -= 16; + inp64 += 2; + outp64 += 2; + + if (inl >= 16) { + CVMX_MF_AES_RESULT(r0, 0); + CVMX_MF_AES_RESULT(r1, 1); + i0 = inp64[0]; + i1 = inp64[1]; + CVMX_MT_AES_ENC_CBC0(i0); + CVMX_MT_AES_ENC_CBC1(i1); + + for (;;) { + outp64[-2] = r0; + outp64[-1] = r1; + outp64 += 2; + inp64 += 2; + inl -= 16; + i0 = inp64[0]; + i1 = inp64[1]; + + if (inl < 16) + break; + + CVMX_PREFETCH(inp64, 64); + CVMX_MF_AES_RESULT(r0, 0); + CVMX_MF_AES_RESULT(r1, 1); + CVMX_MT_AES_ENC_CBC0(i0); + CVMX_MT_AES_ENC_CBC1(i1); + } + } + + CVMX_MF_AES_RESULT(r0, 0); + CVMX_MF_AES_RESULT(r1, 1); + outp64[-2] = r0; + outp64[-1] = r1; + } + /* remainder */ + if (inl > 0) { + uint64_t in64[2] = { 0, 0 }; + if (inl <= 16) { + XMEMCPY(in64, inp64, inl); + CVMX_MT_AES_ENC_CBC0(in64[0]); + CVMX_MT_AES_ENC_CBC1(in64[1]); + CVMX_MF_AES_RESULT(r0, 0); + CVMX_MF_AES_RESULT(r1, 1); + outp64[0] = r0; + outp64[1] = r1; + } + else { + CVMX_MT_AES_ENC_CBC0(i0); + CVMX_MT_AES_ENC_CBC1(i1); + CVMX_MF_AES_RESULT(r0, 0); + CVMX_MF_AES_RESULT(r1, 1); + inl -= 16; + outp64[0] = r0; + outp64[1] = r1; + inp64 += 2; + outp64 += 2; + XMEMCPY(in64, inp64, inl); + CVMX_MT_AES_ENC_CBC0(in64[0]); + CVMX_MT_AES_ENC_CBC1(in64[1]); + CVMX_MF_AES_RESULT(r0, 0); + CVMX_MF_AES_RESULT(r1, 1); + outp64[0] = r0; + outp64[1] = r1; + } + } + + CVMX_MF_AES_IV(iv[0], 0); + CVMX_MF_AES_IV(iv[1], 1); + + return 0; +} + +static int Octeon_AesCbc_Decrypt(Aes *aes, + uint64_t *inp64, uint64_t *outp64, size_t inl) +{ + register uint64_t i0, i1, r0, r1; + uint64_t *key, *iv; + + if (aes == NULL || inp64 == NULL || outp64 == NULL) { + return BAD_FUNC_ARG; + } + + iv = (uint64_t*)aes->reg; + key = (uint64_t*)aes->devKey; + + CVMX_MT_AES_IV(iv[0], 0); + CVMX_MT_AES_IV(iv[1], 1); + + CVMX_MT_AES_KEY(key[0], 0); + CVMX_MT_AES_KEY(key[1], 1); + CVMX_MT_AES_KEY(key[2], 2); + CVMX_MT_AES_KEY(key[3], 3); + CVMX_MT_AES_KEYLENGTH(aes->keylen/8 - 1); + + CVMX_PREFETCH0(inp64); + + i0 = inp64[0]; + i1 = inp64[1]; + + /* AES assembly can handle 32-byte chunks */ + if (inl >= 32) { + CVMX_MT_AES_DEC_CBC0(i0); + CVMX_MT_AES_DEC_CBC1(i1); + inp64 += 2; + outp64 += 2; + inl -= 16; + + if (inl >= 16) { + i0 = inp64[0]; + i1 = inp64[1]; + CVMX_MF_AES_RESULT(r0, 0); + CVMX_MF_AES_RESULT(r1, 1); + CVMX_MT_AES_DEC_CBC0(i0); + CVMX_MT_AES_DEC_CBC1(i1); + + for (;;) { + outp64[-2] = r0; + outp64[-1] = r1; + outp64 += 2; + inp64 += 2; + inl -= 16; + i0 = inp64[0]; + i1 = inp64[1]; + + if (inl < 16) + break; + + CVMX_PREFETCH(inp64, 64); + CVMX_MF_AES_RESULT(r0, 0); + CVMX_MF_AES_RESULT(r1, 1); + CVMX_MT_AES_DEC_CBC0(i0); + CVMX_MT_AES_DEC_CBC1(i1); + } + } + + CVMX_MF_AES_RESULT(r0, 0); + CVMX_MF_AES_RESULT(r1, 1); + outp64[-2] = r0; + outp64[-1] = r1; + } + /* remainder */ + if (inl > 0) { + uint64_t in64[2] = { 0, 0 }; + XMEMCPY(in64, inp64, inl); + CVMX_MT_AES_DEC_CBC0(in64[0]); + CVMX_MT_AES_DEC_CBC1(in64[1]); + CVMX_MF_AES_RESULT(r0, 0); + CVMX_MF_AES_RESULT(r1, 1); + outp64[0] = r0; + outp64[1] = r1; + } + + CVMX_MF_AES_IV(iv[0], 0); + CVMX_MF_AES_IV(iv[1], 1); + + return 0; +} +#endif /* HAVE_AES_CBC */ + + +#ifdef HAVE_AESGCM + +#define CVM_AES_RD_RESULT_WR_DATA(in1, in2, out1, out2) \ + asm volatile(\ + ".set noreorder \n" \ + "dmfc2 %[r1],0x0100\n" \ + "dmfc2 %[r2],0x0101\n" \ + "dmtc2 %[r3],0x010a\n" \ + "dmtc2 %[r4],0x310b\n" \ + ".set reorder \n" \ + : [r1] "=&d"(in1) , [r2] "=&d"(in2) \ + : [r3] "d"(out1), [r4] "d"(out2)) + +static NOOPT void Octeon_GHASH_Restore(word16 poly, byte* h) +{ + word64* bigH = (word64*)h; + CVMX_MT_GFM_POLY((word64)poly); + CVMX_MT_GFM_MUL(bigH[0], 0); + CVMX_MT_GFM_MUL(bigH[1], 1); +} + + +static NOOPT void Octeon_GHASH_Init(word16 poly, byte* h) +{ + Octeon_GHASH_Restore(poly, h); + CVMX_MT_GFM_RESINP(0, 0); + CVMX_MT_GFM_RESINP(0, 1); +} + + +static NOOPT void Octeon_GHASH_Update(byte* in) +{ + word64* bigIn = (word64*)in; + CVMX_MT_GFM_XOR0(bigIn[0]); + CVMX_MT_GFM_XORMUL1(bigIn[1]); +} + + +static NOOPT void Octeon_GHASH_Final(byte* out, word64 authInSz, word64 inSz) +{ + word64* bigOut = (word64*)out; + + CVMX_MT_GFM_XOR0(authInSz * 8); + CVMX_MT_GFM_XORMUL1(inSz * 8); + CVMX_MF_GFM_RESINP(bigOut[0], 0); + CVMX_MF_GFM_RESINP(bigOut[1], 1); +} + + +/* Sets the Octeon key with the key found in the Aes record. */ +static NOOPT int Octeon_AesGcm_SetKey(Aes* aes) +{ + int ret = 0; + + if (aes == NULL) + ret = BAD_FUNC_ARG; + + if (ret == 0) { + uint64_t* key = (uint64_t*)aes->devKey; + + CVMX_MT_AES_KEY(key[0], 0); + CVMX_MT_AES_KEY(key[1], 1); + CVMX_MT_AES_KEY(key[2], 2); + CVMX_MT_AES_KEY(key[3], 3); + CVMX_MT_AES_KEYLENGTH((aes->keylen / 8) - 1); + + if (!aes->keySet) { + uint64_t* bigH = (uint64_t*)aes->H; + CVMX_MT_AES_ENC0(0); + CVMX_MT_AES_ENC1(0); + CVMX_MF_AES_RESULT(bigH[0], 0); + CVMX_MF_AES_RESULT(bigH[1], 1); + aes->keySet = 1; + } + } + + return ret; +} + + +static NOOPT int Octeon_AesGcm_SetIV(Aes* aes, byte* iv, word32 ivSz) +{ + int ret = 0; + + if (aes == NULL || iv == NULL) + ret = BAD_FUNC_ARG; + + if (ret == 0) { + if (ivSz == GCM_NONCE_MID_SZ) { + XMEMSET((byte*)aes->reg, 0, sizeof(aes->reg)); + XMEMCPY((byte*)aes->reg, iv, ivSz); + aes->reg[3] = 1; + } + else { + int blocks, remainder, i; + byte aesBlock[AES_BLOCK_SIZE]; + + blocks = ivSz / AES_BLOCK_SIZE; + remainder = ivSz % AES_BLOCK_SIZE; + + for (i = 0; i < blocks; i++, iv += AES_BLOCK_SIZE) + Octeon_GHASH_Update(iv); + + XMEMSET(aesBlock, 0, sizeof(aesBlock)); + for (i = 0; i < remainder; i++) + aesBlock[i] = iv[i]; + Octeon_GHASH_Update(aesBlock); + + Octeon_GHASH_Final((byte*)aes->reg, 0, ivSz); + } + + aes->y0 = aes->reg[3]; + aes->reg[3]++; + + Octeon_GHASH_Init(0xe100, aes->H); + } + + return ret; +} + + +static NOOPT int Octeon_AesGcm_SetAAD(Aes* aes, byte* aad, word32 aadSz) +{ + word64* p; + ALIGN16 byte aesBlock[AES_BLOCK_SIZE]; + int blocks, remainder, i; + + if (aes == NULL || (aadSz != 0 && aad == NULL)) + return BAD_FUNC_ARG; + + if (aadSz == 0) + return 0; + + blocks = aadSz / AES_BLOCK_SIZE; + remainder = aadSz % AES_BLOCK_SIZE; + + Octeon_GHASH_Restore(0xe100, aes->H); + + p = (word64*)aesBlock; + + for (i = 0; i < blocks; i++, aad += AES_BLOCK_SIZE) { + CVMX_LOADUNA_INT64(p[0], aad, 0); + CVMX_LOADUNA_INT64(p[1], aad, 8); + CVMX_MT_GFM_XOR0(p[0]); + CVMX_MT_GFM_XORMUL1(p[1]); + } + + XMEMSET(aesBlock, 0, sizeof(aesBlock)); + + for (i = 0; i < remainder; i++) + aesBlock[i] = aad[i]; + + CVMX_MT_GFM_XOR0(p[0]); + CVMX_MT_GFM_XORMUL1(p[1]); + + return 0; +} + + +static int Octeon_AesGcm_SetEncrypt(Aes* aes, byte* in, byte* out, word32 inSz, + int encrypt) +{ + word32 i, blocks, remainder; + ALIGN16 byte aesBlockIn[AES_BLOCK_SIZE]; + ALIGN16 byte aesBlockOut[AES_BLOCK_SIZE]; + word64* pIn; + word64* pOut; + word64* pIv; + + if (aes == NULL || in == NULL || out == NULL) + return BAD_FUNC_ARG; + + pIn = (word64*)aesBlockIn; + pOut = (word64*)aesBlockOut; + pIv = (word64*)aes->reg; + + CVMX_PREFETCH0(in); + + CVMX_MT_AES_ENC0(pIv[0]); + CVMX_MT_AES_ENC1(pIv[1]); + + blocks = inSz / AES_BLOCK_SIZE; + remainder = inSz % AES_BLOCK_SIZE; + + for (i = 0; i < blocks; + i++, in += AES_BLOCK_SIZE, out += AES_BLOCK_SIZE) { + CVMX_PREFETCH128(in); + aes->reg[3]++; + + CVMX_LOADUNA_INT64(pIn[0], in, 0); + CVMX_LOADUNA_INT64(pIn[1], in, 8); + + CVM_AES_RD_RESULT_WR_DATA(pOut[0], pOut[1], pIv[0], pIv[1]); + + if (encrypt) { + pOut[0] ^= pIn[0]; + pOut[1] ^= pIn[1]; + CVMX_MT_GFM_XOR0(pOut[0]); + CVMX_MT_GFM_XORMUL1(pOut[1]); + } + else { + CVMX_MT_GFM_XOR0(pIn[0]); + CVMX_MT_GFM_XORMUL1(pIn[1]); + pOut[0] ^= pIn[0]; + pOut[1] ^= pIn[1]; + } + + CVMX_STOREUNA_INT64(pOut[0], out, 0); + CVMX_STOREUNA_INT64(pOut[1], out, 8); + } + + if (remainder > 0) { + ALIGN16 byte aesBlockMask[AES_BLOCK_SIZE]; + word64* pMask = (word64*)aesBlockMask; + + XMEMSET(aesBlockOut, 0, sizeof(aesBlockOut)); + XMEMSET(aesBlockMask, 0, sizeof(aesBlockMask)); + for (i = 0; i < remainder; i++) { + aesBlockIn[i] = in[i]; + aesBlockMask[i] = 0xFF; + } + + if (encrypt) { + CVMX_MF_AES_RESULT(pOut[0], 0); + CVMX_MF_AES_RESULT(pOut[1], 1); + + pOut[0] ^= pIn[0]; + pOut[1] ^= pIn[1]; + + pOut[0] &= pMask[0]; + pOut[1] &= pMask[1]; + + CVMX_MT_GFM_XOR0(pOut[0]); + CVMX_MT_GFM_XORMUL1(pOut[1]); + } + else { + CVMX_MT_GFM_XOR0(pIn[0]); + CVMX_MT_GFM_XORMUL1(pIn[1]); + + CVMX_MF_AES_RESULT(pOut[0], 0); + CVMX_MF_AES_RESULT(pOut[1], 1); + + pOut[0] ^= pIn[0]; + pOut[1] ^= pIn[1]; + + pOut[0] &= pMask[0]; + pOut[1] &= pMask[1]; + } + + for (i = 0; i < remainder; i++) + out[i] = aesBlockOut[i]; + } + + return 0; +} + + +static NOOPT int Octeon_AesGcm_Finalize(Aes* aes, word32 inSz, word32 aadSz, + byte* tag) +{ + word64 bigSz; + word64* pIv; + word64* pIn; + word64* pOut; + uint32_t countSave; + ALIGN16 byte aesBlockIn[AES_BLOCK_SIZE]; + ALIGN16 byte aesBlockOut[AES_BLOCK_SIZE]; + + countSave = aes->reg[3]; + aes->reg[3] = aes->y0; + + pIv = (word64*)aes->reg; + CVMX_MT_AES_ENC0(pIv[0]); + CVMX_MT_AES_ENC1(pIv[1]); + + bigSz = (word64)aadSz * 8; + CVMX_MT_GFM_XOR0(bigSz); + bigSz = (word64)inSz * 8; + CVMX_MT_GFM_XORMUL1(bigSz); + + aes->reg[3] = countSave; + + pIn = (word64*)aesBlockIn; + CVMX_MF_AES_RESULT(pIn[0], 0); + CVMX_MF_AES_RESULT(pIn[1], 1); + + pOut = (word64*)aesBlockOut; + CVMX_MF_GFM_RESINP(pOut[0], 0); + CVMX_MF_GFM_RESINP(pOut[1], 1); + + pOut[0] ^= pIn[0]; + pOut[1] ^= pIn[1]; + + CVMX_STOREUNA_INT64(pOut[0], tag, 0); + CVMX_STOREUNA_INT64(pOut[1], tag, 8); + + return 0; +} + + +static int Octeon_AesGcm_Encrypt(Aes* aes, byte* in, byte* out, word32 inSz, + byte* iv, word32 ivSz, byte* aad, word32 aadSz, byte* tag) +{ + int ret = 0; + + if (aes == NULL) + ret = BAD_FUNC_ARG; + + if (ret == 0) + ret = Octeon_AesGcm_SetKey(aes); + + if (ret == 0) + ret = Octeon_AesGcm_SetIV(aes, iv, ivSz); + + if (ret == 0) + ret = Octeon_AesGcm_SetAAD(aes, aad, aadSz); + + if (ret == 0) + ret = Octeon_AesGcm_SetEncrypt(aes, in, out, inSz, 1); + + if (ret == 0) + ret = Octeon_AesGcm_Finalize(aes, inSz, aadSz, tag); + + return ret; +} + + +static int Octeon_AesGcm_Decrypt(Aes* aes, byte* in, byte* out, word32 inSz, + byte* iv, word32 ivSz, byte* aad, word32 aadSz, byte* tag) +{ + int ret = 0; + + if (aes == NULL) + ret = BAD_FUNC_ARG; + + if (ret == 0) + ret = Octeon_AesGcm_SetKey(aes); + + if (ret == 0) + ret = Octeon_AesGcm_SetIV(aes, iv, ivSz); + + if (ret == 0) + ret = Octeon_AesGcm_SetAAD(aes, aad, aadSz); + + if (ret == 0) + ret = Octeon_AesGcm_SetEncrypt(aes, in, out, inSz, 0); + + if (ret == 0) + ret = Octeon_AesGcm_Finalize(aes, inSz, aadSz, tag); + + return ret; +} + +#endif /* HAVE_AESGCM */ + +#endif /* !NO_AES */ + +#ifdef WOLF_CRYPTO_CB + +#include + + +static int myCryptoDevCb(int devIdArg, wc_CryptoInfo* info, void* ctx) +{ + int ret = NOT_COMPILED_IN; /* return this to bypass HW and use SW */ + + if (info == NULL) + return BAD_FUNC_ARG; + +#ifdef DEBUG_WOLFSSL + printf("CryptoDevCb: Algo Type %d\n", info->algo_type); +#endif + + if (info->algo_type == WC_ALGO_TYPE_CIPHER) { +#if !defined(NO_AES) || !defined(NO_DES3) + #ifdef HAVE_AESGCM + if (info->cipher.type == WC_CIPHER_AES_GCM) { + if (info->cipher.enc) { + ret = Octeon_AesGcm_Encrypt( + info->cipher.aesgcm_enc.aes, + (byte*)info->cipher.aesgcm_enc.in, + (byte*)info->cipher.aesgcm_enc.out, + info->cipher.aesgcm_enc.sz, + (byte*)info->cipher.aesgcm_enc.iv, + info->cipher.aesgcm_enc.ivSz, + (byte*)info->cipher.aesgcm_enc.authIn, + info->cipher.aesgcm_enc.authInSz, + (byte*)info->cipher.aesgcm_enc.authTag); + } + else { + ret = Octeon_AesGcm_Decrypt( + info->cipher.aesgcm_dec.aes, + (byte*)info->cipher.aesgcm_dec.in, + (byte*)info->cipher.aesgcm_dec.out, + info->cipher.aesgcm_dec.sz, + (byte*)info->cipher.aesgcm_dec.iv, + info->cipher.aesgcm_dec.ivSz, + (byte*)info->cipher.aesgcm_dec.authIn, + info->cipher.aesgcm_dec.authInSz, + (byte*)info->cipher.aesgcm_dec.authTag); + } + } + #endif /* HAVE_AESGCM */ + #ifdef HAVE_AES_CBC + if (info->cipher.type == WC_CIPHER_AES_CBC) { + if (info->cipher.enc) { + ret = Octeon_AesCbc_Encrypt( + info->cipher.aescbc.aes, + (word64*)info->cipher.aescbc.in, + (word64*)info->cipher.aescbc.out, + info->cipher.aescbc.sz); + } + else { + ret = Octeon_AesCbc_Decrypt( + info->cipher.aescbc.aes, + (word64*)info->cipher.aescbc.in, + (word64*)info->cipher.aescbc.out, + info->cipher.aescbc.sz); + } + } + #endif /* HAVE_AES_CBC */ + #ifndef NO_DES3 + if (info->cipher.type == WC_CIPHER_DES3) { + if (info->cipher.enc) { + ret = Octeon_Des3_CbcEncrypt( + info->cipher.des3.des, + (word64*)info->cipher.des3.in, + (word64*)info->cipher.des3.out, + info->cipher.des3.sz); + } + else { + ret = Octeon_Des3_CbcDecrypt( + info->cipher.des3.des, + (word64*)info->cipher.des3.in, + (word64*)info->cipher.des3.out, + info->cipher.des3.sz); + } + } + #endif /* !NO_DES3 */ +#endif /* !NO_AES || !NO_DES3 */ + } + + (void)devIdArg; + (void)ctx; + + return ret; +} + +int wc_CryptoCb_InitOcteon(void) +{ + if (wc_CryptoCb_RegisterDevice(devId, myCryptoDevCb, NULL) < 0) { + return INVALID_DEVID; + } + + return devId; +} + +void wc_CryptoCb_CleanupOcteon(int* id) +{ + wc_CryptoCb_UnRegisterDevice(*id); + *id = INVALID_DEVID; +} + +#endif /* WOLF_CRYPTO_CB */ + +#endif /* HAVE_CAVIUM_OCTEON_SYNC */ diff --git a/client/wolfssl/wolfcrypt/src/port/devcrypto/README.md b/client/wolfssl/wolfcrypt/src/port/devcrypto/README.md new file mode 100644 index 0000000..7844dca --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/devcrypto/README.md @@ -0,0 +1,43 @@ +# Description + +Used to build with cryptodev-linux library with Linux OS. + +# Quick Start + +## Installing cryptodev module + +If not already installed then the cryptodev-linux module will need installed. + +``` +git clone https://github.com/cryptodev-linux/cryptodev-linux.git +cd cryptodev-linux +make +sudo make install +modprobe cryptodev +``` + + +## Options for building wolfSSL + +For default build with all supported features use: + +``` +./configure --enable-cryptodev +``` + +Or for more control over features used: + +``` +./configure --enable-devcrypto=cbc +./configure --enable-devcrypto=hash +./configure --enable-devcrypto=aes +./configure --enable-devcrypto=all +``` + +Then build the wolfSSL library with: + +``` +make +sudo make install +./wolfcrypt/test/testwolfcrypt +``` diff --git a/client/wolfssl/wolfcrypt/src/port/devcrypto/devcrypto_aes.c b/client/wolfssl/wolfcrypt/src/port/devcrypto/devcrypto_aes.c new file mode 100644 index 0000000..1f6d09d --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/devcrypto/devcrypto_aes.c @@ -0,0 +1,384 @@ +/* devcrypto_aes.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include +#include + +#if !defined(NO_AES) && defined(WOLFSSL_DEVCRYPTO) + +#include +#include +#include + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + + +#if defined(HAVE_AES_CBC) && defined(WOLFSSL_DEVCRYPTO_CBC) +int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + struct crypt_op crt; + int ret; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + /* encrypt only up to AES block size of date */ + sz = sz - (sz % AES_BLOCK_SIZE); + if (aes->ctx.cfd == -1) { + ret = wc_DevCryptoCreate(&aes->ctx, CRYPTO_AES_CBC, + (byte*)aes->devKey, aes->keylen); + if (ret != 0) + return ret; + } + wc_SetupCryptSym(&crt, &aes->ctx, (byte*)in, sz, out, (byte*)aes->reg, + COP_ENCRYPT); + ret = ioctl(aes->ctx.cfd, CIOCCRYPT, &crt); + if (ret != 0) { + return WC_DEVCRYPTO_E; + } + + /* store iv for next call */ + XMEMCPY(aes->reg, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + + return 0; +} + +#ifdef HAVE_AES_DECRYPT +int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + struct crypt_op crt; + int ret; + + if (aes == NULL || out == NULL || in == NULL || sz % AES_BLOCK_SIZE != 0) { + return BAD_FUNC_ARG; + } + + XMEMCPY(aes->tmp, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + if (aes->ctx.cfd == -1) { + ret = wc_DevCryptoCreate(&aes->ctx, CRYPTO_AES_CBC, + (byte*)aes->devKey, aes->keylen); + if (ret != 0) + return ret; + } + wc_SetupCryptSym(&crt, &aes->ctx, (byte*)in, sz, out, (byte*)aes->reg, + COP_DECRYPT); + ret = ioctl(aes->ctx.cfd, CIOCCRYPT, &crt); + if (ret != 0) { + return WC_DEVCRYPTO_E; + } + + XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE); + return 0; +} +#endif /* HAVE_AES_DECRYPT */ +#endif /* HAVE_AES_CBC && WOLFSSL_DEVCRYPTO_CBC */ + + +#ifdef WOLFSSL_DEVCRYPTO_AES /* all AES algorithms supported */ +int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) +{ +#if defined(AES_MAX_KEY_SIZE) + const word32 max_key_len = (AES_MAX_KEY_SIZE / 8); +#endif + + if (aes == NULL || + !((keylen == 16) || (keylen == 24) || (keylen == 32))) { + return BAD_FUNC_ARG; + } + +#if defined(AES_MAX_KEY_SIZE) + /* Check key length */ + if (keylen > max_key_len) { + return BAD_FUNC_ARG; + } +#endif + aes->keylen = keylen; + aes->rounds = keylen/4 + 6; + +#ifdef WOLFSSL_AES_COUNTER + aes->left = 0; +#endif + aes->ctx.cfd = -1; + XMEMCPY(aes->devKey, userKey, keylen); + + (void)dir; + return wc_AesSetIV(aes, iv); +} + + +/* AES-DIRECT */ +#if defined(WOLFSSL_AES_DIRECT) || defined(HAVE_AES_ECB) + +/* common code between ECB encrypt and decrypt + * returns 0 on success */ +static int wc_DevCrypto_AesDirect(Aes* aes, byte* out, const byte* in, + word32 sz, int dir) +{ + int ret; + struct crypt_op crt; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + if (aes->ctx.cfd == -1) { + ret = wc_DevCryptoCreate(&aes->ctx, CRYPTO_AES_ECB, (byte*)aes->devKey, + aes->keylen); + if (ret != 0) + return ret; + } + + wc_SetupCryptSym(&crt, &aes->ctx, (byte*)in, sz, out, NULL, dir); + ret = ioctl(aes->ctx.cfd, CIOCCRYPT, &crt); + if (ret != 0) { + return WC_DEVCRYPTO_E; + } + return 0; +} +#endif + + +#if defined(WOLFSSL_AES_DIRECT) || defined(HAVE_AESCCM) +void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in) +{ + wc_DevCrypto_AesDirect(aes, out, in, AES_BLOCK_SIZE, COP_ENCRYPT); +} + + +void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in) +{ + wc_DevCrypto_AesDirect(aes, out, in, AES_BLOCK_SIZE, COP_DECRYPT); +} + + +int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) +{ + return wc_AesSetKey(aes, userKey, keylen, iv, dir); +} +#endif + + +/* AES-CTR */ +#if defined(WOLFSSL_AES_COUNTER) + +/* Increment AES counter */ +static WC_INLINE void IncrementAesCounter(byte* inOutCtr) +{ + /* in network byte order so start at end and work back */ + int i; + for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) { + if (++inOutCtr[i]) /* we're done unless we overflow */ + return; + } +} + +int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + int ret; + struct crypt_op crt; + byte* tmp; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + /* consume any unused bytes left in aes->tmp */ + tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left; + while (aes->left && sz) { + *(out++) = *(in++) ^ *(tmp++); + aes->left--; + sz--; + } + + if (aes->ctx.cfd == -1) { + ret = wc_DevCryptoCreate(&aes->ctx, CRYPTO_AES_CTR, (byte*)aes->devKey, + aes->keylen); + if (ret != 0) + return ret; + } + + if (sz > 0) { + /* clear previously leftover data */ + tmp = (byte*)aes->tmp; + XMEMSET(tmp, 0, AES_BLOCK_SIZE); + + /* update IV */ + wc_SetupCryptSym(&crt, &aes->ctx, (byte*)in, sz, out, (byte*)aes->reg, + COP_ENCRYPT); + ret = ioctl(aes->ctx.cfd, CIOCCRYPT, &crt); + if (ret != 0) { + return WC_DEVCRYPTO_E; + } + + /* adjust counter after call to hardware */ + while (sz >= AES_BLOCK_SIZE) { + IncrementAesCounter((byte*)aes->reg); + sz -= AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + in += AES_BLOCK_SIZE; + } + } + + /* create key stream for later if needed */ + if (sz > 0) { + Aes tmpAes; + wc_AesSetKey(&tmpAes, (byte*)aes->devKey, aes->keylen, (byte*)aes->reg, + AES_ENCRYPTION); + wc_AesEncryptDirect(&tmpAes, (byte*)aes->tmp, (const byte*)aes->reg); + wc_AesFree(&tmpAes); + IncrementAesCounter((byte*)aes->reg); + + aes->left = AES_BLOCK_SIZE - (sz % AES_BLOCK_SIZE); + } + + return 0; +} +#endif /* WOLFSSL_AES_COUNTER */ + + +#ifdef HAVE_AESGCM + +int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) +{ + return wc_AesSetKey(aes, key, len, NULL, AES_ENCRYPTION); +} + + + +/* common code for AES-GCM encrypt/decrypt */ +static int wc_DevCrypto_AesGcm(Aes* aes, byte* out, byte* in, word32 sz, + const byte* iv, word32 ivSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz, + int dir) +{ + struct crypt_auth_op crt = {0}; + int ret; + byte scratch[AES_BLOCK_SIZE]; + + /* argument checks */ + if (aes == NULL || authTagSz > AES_BLOCK_SIZE) { + return BAD_FUNC_ARG; + } + + /* Account for NULL in/out buffers. Up to tag size is still written into + * in/out buffers */ + if (out == NULL) + out = scratch; + if (in == NULL) + in = scratch; + + XMEMSET(scratch, 0, AES_BLOCK_SIZE); + if (aes->ctx.cfd == -1) { + ret = wc_DevCryptoCreate(&aes->ctx, CRYPTO_AES_GCM, (byte*)aes->devKey, + aes->keylen); + if (ret != 0) + return ret; + } + + /* if decrypting then the tag is expected to be at the end of "in" buffer */ + if (dir == COP_DECRYPT) { + XMEMCPY(in + sz, authTag, authTagSz); + sz += authTagSz; + } + else{ + /* get full tag from hardware */ + authTagSz = AES_BLOCK_SIZE; + } + wc_SetupCryptAead(&crt, &aes->ctx, (byte*)in, sz, out, (byte*)iv, ivSz, + dir, (byte*)authIn, authInSz, authTag, authTagSz); + ret = ioctl(aes->ctx.cfd, CIOCAUTHCRYPT, &crt); + if (ret != 0) { + if (dir == COP_DECRYPT) { + return AES_GCM_AUTH_E; + } + else { + return WC_DEVCRYPTO_E; + } + } + + /* after encryption the tag has been placed at the end of "out" buffer */ + if (dir == COP_ENCRYPT) { + XMEMCPY(authTag, out + sz, authTagSz); + } + return 0; +} + + +/* it is assumed that "out" buffer has enough room for cipher text + tag */ +int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + if (authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ) { + WOLFSSL_MSG("GcmEncrypt authTagSz too small error"); + return BAD_FUNC_ARG; + } + + return wc_DevCrypto_AesGcm(aes, out, (byte*)in, sz, iv, ivSz, + authTag, authTagSz, authIn, authInSz, + COP_ENCRYPT); +} + +#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AESGCM_DECRYPT) +/* it is assumed that "in" buffer has enough room for cipher text + tag */ +int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + return wc_DevCrypto_AesGcm(aes, out, (byte*)in, sz, iv, ivSz, + (byte*)authTag, authTagSz, authIn, authInSz, + COP_DECRYPT); +} +#endif /* HAVE_AES_DECRYPT || HAVE_AESGCM_DECRYPT */ +#endif /* HAVE_AESGCM */ + + +#ifdef HAVE_AES_ECB +int wc_AesEcbEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + return wc_DevCrypto_AesDirect(aes, out, in, sz, COP_ENCRYPT); +} + + +int wc_AesEcbDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + return wc_DevCrypto_AesDirect(aes, out, in, sz, COP_DECRYPT); +} +#endif /* HAVE_AES_ECB */ +#endif /* WOLFSSL_DEVCRYPTO_AES */ +#endif /* !NO_AES && WOLFSSL_DEVCRYPTO */ + diff --git a/client/wolfssl/wolfcrypt/src/port/devcrypto/devcrypto_hash.c b/client/wolfssl/wolfcrypt/src/port/devcrypto/devcrypto_hash.c new file mode 100644 index 0000000..f73224d --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/devcrypto/devcrypto_hash.c @@ -0,0 +1,248 @@ +/* devcrypto_hash.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#if defined(WOLFSSL_DEVCRYPTO_HASH) + +#include +#include +#include + +#if !defined(NO_SHA256) +#include +#endif + +/* dereference structure based on type to get cryptodev context pointer + * can return NULL on fail case */ +static WC_CRYPTODEV* GetHashContext(void* ctx, int type) +{ + switch (type) { + case CRYPTO_SHA2_256: + return &((wc_Sha256*)ctx)->ctx; + + default: + return NULL; + } + + return NULL; +} + + +/* generic hash initialization + * key is for hmac algorithms and keySz is for the size of key buffer + * key should be null in the case of non hmac algorithms + * return 0 on success */ +static int HashInit(void* ctx, int type, byte* key, word32 keySz) +{ + WC_CRYPTODEV* cdev; + + if ((cdev = GetHashContext(ctx, type)) == NULL) { + WOLFSSL_MSG("Unsupported hash type"); + return BAD_FUNC_ARG; + } + + return wc_DevCryptoCreate(cdev, type, key, keySz); +} + + +/* generic function for updated hash structure + * returns 0 on success */ +static int HashUpdate(void* ctx, int type, const byte* input, word32 inputSz) +{ + WC_CRYPTODEV* dev; + struct crypt_op crt; + byte digest[64]; + + if (inputSz == 0) { + return 0; + } + + if ((dev = GetHashContext(ctx, type)) == NULL) { + WOLFSSL_MSG("Unsupported hash type"); + return BAD_FUNC_ARG; + } + + wc_SetupCrypt(&crt, dev, (byte*)input, inputSz, NULL, digest, COP_FLAG_UPDATE); + if (ioctl(dev->cfd, CIOCCRYPT, &crt)) { + WOLFSSL_MSG("Error with call to ioctl"); + return WC_DEVCRYPTO_E; + } + + return 0; +} + + +/* generic function for getting final digest value */ +static int GetDigest(void* ctx, int type, byte* out) +{ + WC_CRYPTODEV* dev; + struct crypt_op crt; + + if ((dev = GetHashContext(ctx, type)) == NULL) { + WOLFSSL_MSG("Unsupported hash type"); + return BAD_FUNC_ARG; + } + + wc_SetupCrypt(&crt, dev, NULL, 0, NULL, out, COP_FLAG_FINAL); + if (ioctl(dev->cfd, CIOCCRYPT, &crt)) { + WOLFSSL_MSG("Error with call to ioctl"); + return WC_DEVCRYPTO_E; + } + + return 0; +} + +#if !defined(NO_SHA256) + +int wc_InitSha256_ex(wc_Sha256* sha, void* heap, int devId) +{ + if (sha == NULL) { + return BAD_FUNC_ARG; + } + + (void)devId; /* no async for now */ + XMEMSET(sha, 0, sizeof(wc_Sha256)); + sha->heap = heap; + + return HashInit((void*)sha, CRYPTO_SHA2_256, NULL, 0); +} + + +int wc_Sha256Update(wc_Sha256* sha, const byte* in, word32 sz) +{ + if (sha == NULL || (sz > 0 && in == NULL)) { + return BAD_FUNC_ARG; + } + +#ifdef WOLFSSL_DEVCRYPTO_HASH_KEEP + /* keep full message to hash at end instead of incremental updates */ + if (sha->len < sha->used + sz) { + if (sha->msg == NULL) { + sha->msg = (byte*)XMALLOC(sha->used + sz, sha->heap, + DYNAMIC_TYPE_TMP_BUFFER); + } else { + byte* pt = (byte*)XREALLOC(sha->msg, sha->used + sz, sha->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (pt == NULL) { + return MEMORY_E; + } + sha->msg = pt; + } + if (sha->msg == NULL) { + return MEMORY_E; + } + sha->len = sha->used + sz; + } + XMEMCPY(sha->msg + sha->used, in, sz); + sha->used += sz; + return 0; +#else + return HashUpdate(sha, CRYPTO_SHA2_256, in, sz); +#endif +} + + +int wc_Sha256Final(wc_Sha256* sha, byte* hash) +{ + int ret; + + if (sha == NULL || hash == NULL) { + return BAD_FUNC_ARG; + } + + /* help static analysis tools out */ + XMEMSET(hash, 0, WC_SHA256_DIGEST_SIZE); +#ifdef WOLFSSL_DEVCRYPTO_HASH_KEEP + /* keep full message to hash at end instead of incremental updates */ + if ((ret = HashUpdate(sha, CRYPTO_SHA2_256, sha->msg, sha->used)) < 0) { + return ret; + } + XFREE(sha->msg, sha->heap, DYNAMIC_TYPE_TMP_BUFFER); + sha->msg = NULL; +#endif + ret = GetDigest(sha, CRYPTO_SHA2_256, hash); + if (ret != 0) { + return ret; + } + + wc_Sha256Free(sha); + return wc_InitSha256_ex(sha, sha->heap, 0); +} + + +int wc_Sha256GetHash(wc_Sha256* sha, byte* hash) +{ + if (sha == NULL || hash == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef WOLFSSL_DEVCRYPTO_HASH_KEEP + { + int ret; + wc_Sha256 cpy; + wc_Sha256Copy(sha, &cpy); + + if ((ret = HashUpdate(&cpy, CRYPTO_SHA2_256, cpy.msg, cpy.used)) == 0) { + /* help static analysis tools out */ + XMEMSET(hash, 0, WC_SHA256_DIGEST_SIZE); + ret = GetDigest(&cpy, CRYPTO_SHA2_256, hash); + } + wc_Sha256Free(&cpy); + return ret; + } +#else + (void)sha; + (void)hash; + + WOLFSSL_MSG("Compile with WOLFSSL_DEVCRYPTO_HASH_KEEP for this feature"); + return NOT_COMPILED_IN; +#endif +} + +int wc_Sha256Copy(wc_Sha256* src, wc_Sha256* dst) +{ + if (src == NULL || dst == NULL) { + return BAD_FUNC_ARG; + } + + wc_InitSha256_ex(dst, src->heap, 0); +#ifdef WOLFSSL_DEVCRYPTO_HASH_KEEP + dst->len = src->len; + dst->used = src->used; + dst->msg = (byte*)XMALLOC(src->len, dst->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (dst->msg == NULL) { + return MEMORY_E; + } + XMEMCPY(dst->msg, src->msg, src->len); +#endif + + return 0; +} + +#endif /* !NO_SHA256 */ + +#endif /* WOLFSSL_DEVCRYPTO */ diff --git a/client/wolfssl/wolfcrypt/src/port/devcrypto/wc_devcrypto.c b/client/wolfssl/wolfcrypt/src/port/devcrypto/wc_devcrypto.c new file mode 100644 index 0000000..2c80518 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/devcrypto/wc_devcrypto.c @@ -0,0 +1,167 @@ +/* wc_devcrypto.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#if defined(WOLFSSL_DEVCRYPTO) + +#include +#include +#include + +/* sets up a context for talking to /dev/crypto + * return 0 on success */ +int wc_DevCryptoCreate(WC_CRYPTODEV* ctx, int type, byte* key, word32 keySz) +{ + int fd; + int isHash = 0; /* flag for if hashing algorithm */ + + if (ctx == NULL) { + return BAD_FUNC_ARG; + } + + /* sanity check on session type before creating descriptor */ + XMEMSET(ctx, 0, sizeof(WC_CRYPTODEV)); + switch (type) { + case CRYPTO_SHA1: + case CRYPTO_SHA2_256: + isHash = 1; + break; + + #ifndef NO_AES + case CRYPTO_AES_CTR: + case CRYPTO_AES_ECB: + case CRYPTO_AES_GCM: + case CRYPTO_AES_CBC: + isHash = 0; + break; + #endif + + default: + WOLFSSL_MSG("Unknown / Unimplemented algorithm type"); + return BAD_FUNC_ARG; + } + + /* create descriptor */ + if ((fd = open("/dev/crypto", O_RDWR, 0)) < 0) { + WOLFSSL_MSG("Error opening /dev/crypto is cryptodev module loaded?"); + return WC_DEVCRYPTO_E; + } + if (fcntl(fd, F_SETFD, 1) == -1) { + WOLFSSL_MSG("Error setting F_SETFD with fcntl"); + close(fd); + return WC_DEVCRYPTO_E; + } + + /* set up session */ + ctx->cfd = fd; + + if (isHash) { + ctx->sess.mac = type; + } + else { + ctx->sess.cipher = type; + ctx->sess.key = (void*)key; + ctx->sess.keylen = keySz; + } + + if (ioctl(ctx->cfd, CIOCGSESSION, &ctx->sess)) { + close(fd); + WOLFSSL_MSG("Error starting cryptodev session"); + return WC_DEVCRYPTO_E; + } + + (void)key; + (void)keySz; + + return 0; +} + + +/* free up descriptor and session used with ctx */ +void wc_DevCryptoFree(WC_CRYPTODEV* ctx) +{ + if (ctx != NULL && ctx->cfd >= 0) { + if (ioctl(ctx->cfd, CIOCFSESSION, &ctx->sess.ses)) { + WOLFSSL_MSG("Error stopping cryptodev session"); + } + close(ctx->cfd); + } +} + + +/* setup crypt_op structure */ +void wc_SetupCrypt(struct crypt_op* crt, WC_CRYPTODEV* dev, + byte* src, int srcSz, byte* dst, byte* dig, int flag) + +{ + XMEMSET(crt, 0, sizeof(struct crypt_op)); + crt->ses = dev->sess.ses; + crt->src = src; + crt->len = srcSz; + crt->dst = dst; + crt->mac = dig; + crt->flags = flag; +} + + +/* setup crypt_op structure for symmetric key operations */ +void wc_SetupCryptSym(struct crypt_op* crt, WC_CRYPTODEV* dev, + byte* src, word32 srcSz, byte* dst, byte* iv, int flag) + +{ + XMEMSET(crt, 0, sizeof(struct crypt_op)); + crt->ses = dev->sess.ses; + crt->src = src; + crt->len = srcSz; + crt->dst = dst; + crt->iv = iv; + crt->op = flag; +} + + +/* setup crypt_auth_op structure for aead operations */ +void wc_SetupCryptAead(struct crypt_auth_op* crt, WC_CRYPTODEV* dev, + byte* src, word32 srcSz, byte* dst, byte* iv, word32 ivSz, int flag, + byte* authIn, word32 authInSz, byte* authTag, word32 authTagSz) +{ + XMEMSET(crt, 0, sizeof(struct crypt_op)); + crt->ses = dev->sess.ses; + crt->src = src; + crt->len = srcSz; + crt->dst = dst; + crt->iv = iv; + crt->iv_len = ivSz; + crt->op = flag; + + /* also set auth in and tag */ + crt->auth_src = authIn; + crt->auth_len = authInSz; + crt->tag = authTag; + crt->tag_len = authTagSz; +} +#endif /* WOLFSSL_DEVCRYPTO */ + diff --git a/client/wolfssl/wolfcrypt/src/port/intel/README.md b/client/wolfssl/wolfcrypt/src/port/intel/README.md new file mode 100644 index 0000000..4b5d971 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/intel/README.md @@ -0,0 +1,3 @@ +# Intel QuickAssist Adapter Asynchronous Support + +Please contact wolfSSL at info@wolfssl.com to request an evaluation. diff --git a/client/wolfssl/wolfcrypt/src/port/intel/quickassist.c b/client/wolfssl/wolfcrypt/src/port/intel/quickassist.c new file mode 100644 index 0000000..e69de29 diff --git a/client/wolfssl/wolfcrypt/src/port/intel/quickassist_mem.c b/client/wolfssl/wolfcrypt/src/port/intel/quickassist_mem.c new file mode 100644 index 0000000..e69de29 diff --git a/client/wolfssl/wolfcrypt/src/port/intel/quickassist_sync.c b/client/wolfssl/wolfcrypt/src/port/intel/quickassist_sync.c new file mode 100644 index 0000000..e03bca9 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/intel/quickassist_sync.c @@ -0,0 +1,2004 @@ +/* quickassist_sync.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. (formerly known as CyaSSL) + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef HAVE_INTEL_QA_SYNC + +#ifdef QAT_DEMO_MAIN + #define QAT_DEBUG +#endif + + +#include +#include +#include +#ifndef NO_AES + #include +#endif + +#include +#include + +#include "cpa.h" +#include "cpa_cy_im.h" +#include "cpa_cy_sym.h" +#include "cpa_cy_rsa.h" +#include "cpa_cy_ln.h" +#include "cpa_cy_ecdh.h" +#include "cpa_cy_ecdsa.h" +#include "cpa_cy_dh.h" +#include "cpa_cy_drbg.h" +#include "cpa_cy_nrbg.h" +#include "cpa_cy_prime.h" + +#include "icp_sal_user.h" +#include "icp_sal_poll.h" + + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +/* User space utils */ +#include +#include +#include +#include + +#if 0 + /* Optional feature for partial QAT hashing support */ + /* This will process updates through hardware instead of caching them */ + #define QAT_HASH_ENABLE_PARTIAL +#endif +#ifdef QAT_HASH_ENABLE_PARTIAL + #define MAX_QAT_HASH_BUFFERS 2 +#endif + +/* Detect QAT driver version */ +#if defined(CPA_CY_API_VERSION_NUM_MAJOR) && CPA_CY_API_VERSION_NUM_MAJOR > 1 + #define QAT_V2 +#endif + +#ifdef QAT_V2 + /* quickassist/utilities/libusdm_drv/qae_mem.h */ + /* Provides user-space API's for accessing NUMA allocated memory through usdm_drv */ + #include "qae_mem.h" +#include "linux/include/qae_mem_utils.h" +#endif + +#ifdef QAT_USE_POLLING_THREAD + #include +#endif + +/* Tunable parameters */ +#ifndef QAT_PROCESS_NAME + #define QAT_PROCESS_NAME "SSL" +#endif +#ifndef QAT_LIMIT_DEV_ACCESS + #define QAT_LIMIT_DEV_ACCESS CPA_FALSE +#endif +#ifndef QAT_MAX_DEVICES + #define QAT_MAX_DEVICES (1) /* maximum number of QAT cards */ +#endif + +#ifndef QAT_RETRY_LIMIT + #define QAT_RETRY_LIMIT (100) +#endif +#ifndef QAT_POLL_RESP_QUOTA + #define QAT_POLL_RESP_QUOTA (0) /* all pending */ +#endif + +#if !defined(NO_AES) || !defined(NO_DES3) + #define QAT_ENABLE_CRYPTO +#endif + +/* Pre-declarations */ +struct IntelQaDev; +struct wc_CryptoInfo; +struct WC_BIGINT; +struct WC_RNG; + + +#if defined(QAT_ENABLE_HASH) || defined(QAT_ENABLE_CRYPTO) +/* symmetric context */ +typedef struct IntelQaSymCtx { + CpaCySymOpData opData; + CpaCySymSessionCtx symCtxSrc; + CpaCySymSessionCtx symCtx; + word32 symCtxSize; + + /* flags */ + word32 isOpen:1; + word32 isCopy:1; +} IntelQaSymCtx; +#endif + +typedef void (*IntelQaFreeFunc)(struct IntelQaDev*); + + +/* QuickAssist device */ +typedef struct IntelQaDev { + CpaInstanceHandle handle; + int devId; + void* heap; + + /* callback return info */ + int ret; + byte* out; + union { + word32* outLenPtr; + word32 outLen; + }; + + /* operations */ + IntelQaFreeFunc freeFunc; + union { + #ifdef QAT_ENABLE_CRYPTO + struct { + IntelQaSymCtx ctx; + CpaBufferList bufferList; + CpaFlatBuffer flatBuffer; + byte* authTag; + word32 authTagSz; + } cipher; + #endif + } op; + +#ifdef QAT_USE_POLLING_THREAD + pthread_t pollingThread; + byte pollingCy; +#endif +} IntelQaDev; + + +/* Interface */ +static int IntelQaHardwareStart(const char*, int); +static void IntelQaHardwareStop(void); +static int IntelQaInit(void*); +static void IntelQaDeInit(int); +static int IntelQaNumInstances(void); +static int IntelQaOpen(IntelQaDev*, int); +static void IntelQaClose(IntelQaDev*); +static int IntelQaDevCopy(IntelQaDev*, IntelQaDev*); +static int IntelQaPoll(IntelQaDev*); +static int IntelQaGetCyInstanceCount(void); + +#ifndef NO_AES + #ifdef HAVE_AES_CBC + static int IntelQaSymAesCbcEncrypt(IntelQaDev*, byte*, + const byte*, word32, const byte*, word32, const byte*, word32); + #ifdef HAVE_AES_DECRYPT + static int IntelQaSymAesCbcDecrypt(IntelQaDev*, byte*, + const byte*, word32, const byte*, word32, const byte*, word32); + #endif /* HAVE_AES_DECRYPT */ + #endif /* HAVE_AES_CBC */ + + #ifdef HAVE_AESGCM + static int IntelQaSymAesGcmEncrypt(IntelQaDev*, byte*, + const byte*, word32, const byte*, word32, const byte*, word32, + byte*, word32, const byte*, word32); + #ifdef HAVE_AES_DECRYPT + static int IntelQaSymAesGcmDecrypt(IntelQaDev*, byte*, + const byte*, word32, const byte*, word32, const byte*, word32, + const byte*, word32, const byte*, word32); + #endif /* HAVE_AES_DECRYPT */ + #endif /* HAVE_AESGCM */ +#endif /* !NO_AES */ + +#ifndef NO_DES3 + static int IntelQaSymDes3CbcEncrypt(IntelQaDev*, byte*, + const byte*, word32, const byte*, word32, const byte* iv, word32); + static int IntelQaSymDes3CbcDecrypt(IntelQaDev* dev, byte*, + const byte*, word32, const byte*, word32, const byte* iv, word32); +#endif /*! NO_DES3 */ + +#ifdef WOLF_CRYPTO_CB + static int IntelQaSymSync_CryptoDevCb(int, struct wc_CryptoInfo*, + void*); +#endif /* WOLF_CRYPTO_CB */ + + +#ifdef QAT_DEBUG + #define QLOG(...) do { printf(__VA_ARGS__); } while (0) +#else + #define QLOG(...) +#endif + + +#define OS_HOST_TO_NW_32(uData) ByteReverseWord32(uData) + + +static CpaInstanceHandle* g_cyInstances = NULL; +static CpaInstanceInfo2* g_cyInstanceInfo = NULL; +static Cpa32U* g_cyInstMap = NULL; +static Cpa16U g_numInstances = 0; +static Cpa16U g_instCounter = 0; +static CpaBoolean g_cyServiceStarted = CPA_FALSE; +#ifdef QAT_USE_POLLING_CHECK + static CpaBoolean* g_cyPolling = NULL; + static pthread_mutex_t* g_PollLock; +#endif +static volatile int g_initCount = 0; +static pthread_mutex_t g_Hwlock = PTHREAD_MUTEX_INITIALIZER; + + +typedef struct qatCapabilities { + /* capabilities */ + word32 supPartial:1; + word32 supSha3:1; +} qatCapabilities_t; +static qatCapabilities_t g_qatCapabilities = {0}; + + +#if defined(QAT_ENABLE_CRYPTO) + static int IntelQaSymClose(IntelQaDev* dev, int doFree); +#endif + + +extern Cpa32U osalLogLevelSet(Cpa32U level); + + +static IntelQaDev qaDev; + + +/* -------------------------------------------------------------------------- */ +/* Polling */ +/* -------------------------------------------------------------------------- */ + +static WC_INLINE int SyncSleep(word32 ms) +{ + int ret = 0; + struct timespec resTime, remTime; + resTime.tv_sec = ms/1000; + resTime.tv_nsec = (ms%1000)*1000000; + do { + ret = nanosleep(&resTime, &remTime); + resTime = remTime; + } while ((ret!=0) && (errno == EINTR)); + + if (ret != 0) { + QLOG("nanoSleep failed with code %d\n", ret); + return BAD_FUNC_ARG; + } + + return ret; +} + +#ifdef QAT_USE_POLLING_THREAD +static void* IntelQaPollingThread(void* context) +{ + IntelQaDev* dev = (IntelQaDev*)context; + + QLOG("Polling Thread Start\n"); + while (dev->pollingCy) { + icp_sal_CyPollInstance(dev->handle, QAT_POLL_RESP_QUOTA); + SyncSleep(10); + } + QLOG("Polling Thread Exit\n"); + pthread_exit(NULL); +} + +static CpaStatus IntelQaStartPollingThread(IntelQaDev* dev) +{ + if (dev->pollingCy == 0) { + dev->pollingCy = 1; + + QLOG("Polling Thread Created\n"); + + if (pthread_create(&dev->pollingThread, NULL, IntelQaPollingThread, + (void*)dev) != 0) { + QLOG("Failed create polling thread!\n"); + return CPA_STATUS_FAIL; + } + } + return CPA_STATUS_SUCCESS; +} + +static void IntelQaStopPollingThread(IntelQaDev* dev) +{ + dev->pollingCy = 0; + pthread_join(dev->pollingThread, 0); +} +#endif /* QAT_USE_POLLING_THREAD */ + + +/* -------------------------------------------------------------------------- */ +/* Device */ +/* -------------------------------------------------------------------------- */ +void IntelQaHardwareStop(void) +{ + int i; + CpaStatus status; + + g_initCount--; /* track de-init count */ + if (g_initCount != 0) { + return; + } + + if (g_cyServiceStarted == CPA_TRUE) { + g_cyServiceStarted = CPA_FALSE; + for (i=0; i 1) { + return 0; + } + + status = qaeMemInit(); + if (status != CPA_STATUS_SUCCESS) { + QLOG("IntelQA: Could not start qae mem for user space (status %d)\n" + "\tHas the qaeMemDrv.ko module been loaded?\n", + status); + return ASYNC_INIT_E; + } + + status = icp_sal_userStartMultiProcess(process_name, + limitDevAccess ? CPA_TRUE : CPA_FALSE); + if (status != CPA_STATUS_SUCCESS) { + QLOG("IntelQA: Could not start sal for user space! status %d\n", + status); + ret = ASYNC_INIT_E; goto error; + } + +#ifdef QAT_DEBUG + /* optionally enable debugging */ + //osalLogLevelSet(8); +#endif + + status = cpaCyGetNumInstances(&g_numInstances); + if (status != CPA_STATUS_SUCCESS || g_numInstances == 0) { + QLOG("IntelQA: Failed to get num of instances! status %d\n", status); + ret = INVALID_DEVID; goto error; + } + + /* Get handles / info */ + g_cyInstances = (CpaInstanceHandle*)XMALLOC( + sizeof(CpaInstanceHandle) * g_numInstances, NULL, DYNAMIC_TYPE_ASYNC); + if (g_cyInstances == NULL) { + QLOG("IntelQA: Failed to allocate instances\n"); + ret = INVALID_DEVID; goto error; + } + +#ifdef QAT_USE_POLLING_CHECK + g_cyPolling = (CpaBoolean*)XMALLOC(sizeof(CpaBoolean) * g_numInstances, NULL, + DYNAMIC_TYPE_ASYNC); + if (g_cyPolling == NULL) { + QLOG("IntelQA: Failed to allocate polling status\n"); + ret = INVALID_DEVID; goto error; + } + g_PollLock = (pthread_mutex_t*)XMALLOC(sizeof(pthread_mutex_t) * + g_numInstances, NULL, DYNAMIC_TYPE_ASYNC); + if (g_PollLock == NULL) { + QLOG("IntelQA: Failed to allocate polling locks\n"); + ret = INVALID_DEVID; goto error; + } + for (i=0; i> 8), + (Cpa8U)((g_cyInstanceInfo[i].physInstId.busAddress) + & 0xFF) >> 3, + (Cpa8U)((g_cyInstanceInfo[i].physInstId.busAddress) & 3), + g_cyInstanceInfo[i].isPolled); + + status = cpaCySetAddressTranslation(g_cyInstances[i], + qaeVirtToPhysNUMA); + if (status != CPA_STATUS_SUCCESS) { + QLOG("IntelQA: Error setting memory config for inst %d\n", i); + ret = INVALID_DEVID; goto error; + } + + status = cpaCyStartInstance(g_cyInstances[i]); + if (status != CPA_STATUS_SUCCESS) { + QLOG("IntelQA: Error starting crypto instance %d\n", i); + ret = INVALID_DEVID; goto error; + } + } + + QLOG("IntelQA: Instances %d\n", g_numInstances); + return ret; + +error: + IntelQaHardwareStop(); + return ret; +} + + +int IntelQaInit(void* threadId) +{ + int ret; + int devId; + (void)threadId; + + ret = pthread_mutex_lock(&g_Hwlock); + if (ret != 0) { + QLOG("IntelQaInit: mutex lock failed! %d\n", ret); + return BAD_MUTEX_E; + } + + ret = IntelQaHardwareStart(QAT_PROCESS_NAME, QAT_LIMIT_DEV_ACCESS); + if (ret != 0) { + pthread_mutex_unlock(&g_Hwlock); + return ret; + } + + if (g_numInstances <= 0) { + pthread_mutex_unlock(&g_Hwlock); + return ASYNC_INIT_E; + } + + /* assign device id */ + devId = (g_instCounter % g_numInstances); + g_instCounter++; + + pthread_mutex_unlock(&g_Hwlock); + + return devId; +} + + +int IntelQaNumInstances(void) +{ + return g_numInstances; +} + + +int IntelQaOpen(IntelQaDev* dev, int devId) +{ + if (dev == NULL) { + return BAD_FUNC_ARG; + } + + /* clear device info */ + XMEMSET(dev, 0, sizeof(IntelQaDev)); + + if (g_cyInstances == NULL) { + QLOG("IntelQA not initialized\n"); + return ASYNC_INIT_E; + } + + dev->devId = devId; + dev->handle = g_cyInstances[devId]; + +#ifdef QAT_USE_POLLING_THREAD + /* start polling thread */ + IntelQaStartPollingThread(dev); +#endif + + return 0; +} + + +#if defined(QAT_ENABLE_CRYPTO) + +static IntelQaSymCtx* IntelQaGetSymCtx(IntelQaDev* dev) +{ + return &dev->op.cipher.ctx; +} + +#endif + + +void IntelQaClose(IntelQaDev* dev) +{ + if (dev) { + QLOG("IntelQaClose %p\n", dev); + /* close any active session */ + IntelQaSymClose(dev, 1); + + #ifdef QAT_USE_POLLING_THREAD + IntelQaStopPollingThread(dev); + #endif + + dev->handle = NULL; + } +} + +void IntelQaDeInit(int devId) +{ + (void)devId; + + if (pthread_mutex_lock(&g_Hwlock) == 0) { + IntelQaHardwareStop(); + pthread_mutex_unlock(&g_Hwlock); + } +} + +int IntelQaPoll(IntelQaDev* dev) +{ + int ret = 0; + CpaStatus status; + +#ifdef QAT_USE_POLLING_CHECK + pthread_mutex_t* lock = &g_PollLock[dev->qat.devId]; + if (pthread_mutex_lock(lock) == 0) { + /* test if any other threads are polling */ + if (g_cyPolling[dev->qat.devId]) { + pthread_mutex_unlock(lock); + + /* return success even though its busy, caller will treat as WC_PENDING_E */ + return 0; + } + + g_cyPolling[dev->qat.devId] = 1; + pthread_mutex_unlock(lock); + } +#endif + + status = icp_sal_CyPollInstance(dev->handle, QAT_POLL_RESP_QUOTA); + if (status != CPA_STATUS_SUCCESS && status != CPA_STATUS_RETRY) { + QLOG("IntelQa: Poll failure %d\n", status); + ret = -1; + } + + { + if (dev->ret != WC_PENDING_E) { + /* perform cleanup */ + IntelQaFreeFunc freeFunc = dev->freeFunc; + QLOG("IntelQaOpFree: Dev %p, FreeFunc %p\n", dev, freeFunc); + if (freeFunc) { + dev->freeFunc = NULL; + freeFunc(dev); + } + } + } + +#ifdef QAT_USE_POLLING_CHECK + /* indicate we are done polling */ + if (pthread_mutex_lock(lock) == 0) { + g_cyPolling[dev->qat.devId] = 0; + pthread_mutex_unlock(lock); + } +#endif + + return ret; +} + +static int IntelQaPollBlockRet(IntelQaDev* dev, int ret_wait) +{ + int ret; + + do { + ret = IntelQaPoll(dev); + + if (dev->ret != ret_wait) { + break; + } + } while (1); + ret = dev->ret; + + return ret; +} + +int IntelQaGetCyInstanceCount(void) +{ + return g_numInstances; +} + +static WC_INLINE int IntelQaHandleCpaStatus(IntelQaDev* dev, CpaStatus status, + int* ret, byte isAsync, void* callback, int* retryCount) +{ + int retry = 0; + + if (status == CPA_STATUS_SUCCESS) { + if (isAsync && callback) { + *ret = WC_PENDING_E; + } + else { + *ret = IntelQaPollBlockRet(dev, WC_PENDING_E); + } + } + else if (status == CPA_STATUS_RETRY) { + (*retryCount)++; + if ((*retryCount % (QAT_RETRY_LIMIT + 1)) == QAT_RETRY_LIMIT) { + SyncSleep(10); + } + retry = 1; + } + else { + *ret = ASYNC_OP_E; + } + + return retry; +} + +static WC_INLINE void IntelQaOpInit(IntelQaDev* dev, IntelQaFreeFunc freeFunc) +{ + dev->ret = WC_PENDING_E; + dev->freeFunc = freeFunc; +} + + +/* -------------------------------------------------------------------------- */ +/* Symmetric Algos */ +/* -------------------------------------------------------------------------- */ + +#if defined(QAT_ENABLE_CRYPTO) + +static int IntelQaSymOpen(IntelQaDev* dev, CpaCySymSessionSetupData* setup, + CpaCySymCbFunc callback) +{ + int ret = 0; + CpaStatus status = CPA_STATUS_SUCCESS; + Cpa32U sessionCtxSize = 0; + IntelQaSymCtx* ctx; + + /* arg check */ + if (dev == NULL || setup == NULL) { + return BAD_FUNC_ARG; + } + + ctx = IntelQaGetSymCtx(dev); + + /* Determine size of session context to allocate - use max size */ + status = cpaCySymSessionCtxGetSize(dev->handle, setup, &sessionCtxSize); + + if (ctx->symCtxSize > 0 && ctx->symCtxSize > sessionCtxSize) { + QLOG("Symmetric context size error! Buf %d, Exp %d\n", + ctx->symCtxSize, sessionCtxSize); + return ASYNC_OP_E; + } + + /* make sure session context is allocated */ + if (ctx->symCtx == NULL) { + /* Allocate session context */ + ctx->symCtx = XMALLOC(sessionCtxSize, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA64); + if (ctx->symCtx == NULL) { + return MEMORY_E; + } + } + ctx->symCtxSize = sessionCtxSize; + + if (!ctx->isOpen) { + ctx->isOpen = 1; + + QLOG("IntelQaSymOpen: InitSession dev %p, symCtx %p\n", + dev, ctx->symCtx); + + /* open symmetric session */ + status = cpaCySymInitSession(dev->handle, callback, setup, ctx->symCtx); + if (status != CPA_STATUS_SUCCESS) { + QLOG("cpaCySymInitSession failed! dev %p, status %d\n", + dev, status); + XFREE(ctx->symCtx, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA64); + ctx->symCtx = NULL; + return ASYNC_INIT_E; + } + } + + if (ctx->symCtxSrc == NULL) { + ctx->symCtxSrc = ctx->symCtx; + } + + QLOG("IntelQaSymOpen: dev %p, symCtx %p (src %p), " + "symCtxSize %d, isCopy %d, isOpen %d\n", + dev, ctx->symCtx, ctx->symCtxSrc, ctx->symCtxSize, + ctx->isCopy, ctx->isOpen); + + return ret; +} + +static int IntelQaSymClose(IntelQaDev* dev, int doFree) +{ + int ret = 0; + CpaStatus status = CPA_STATUS_SUCCESS; + IntelQaSymCtx* ctx; + + if (dev == NULL) { + return BAD_FUNC_ARG; + } + + ctx = IntelQaGetSymCtx(dev); + + QLOG("IntelQaSymClose: dev %p, ctx %p, symCtx %p (src %p), " + "symCtxSize %d, isCopy %d, isOpen %d, doFree %d\n", + dev, ctx, ctx->symCtx, ctx->symCtxSrc, ctx->symCtxSize, + ctx->isCopy, ctx->isOpen, doFree); + + if (ctx->symCtx == ctx->symCtxSrc && ctx->symCtx != NULL) { + if (ctx->isOpen) { + ctx->isOpen = 0; + QLOG("IntelQaSymClose: RemoveSession dev %p, symCtx %p\n", + dev, ctx->symCtx); + status = cpaCySymRemoveSession(dev->handle, ctx->symCtx); + if (status == CPA_STATUS_RETRY) { + QLOG("cpaCySymRemoveSession retry!\n"); + /* treat this as error, since session should not be active */ + ret = ASYNC_OP_E; + } + else if (status != CPA_STATUS_SUCCESS) { + QLOG("cpaCySymRemoveSession failed! status %d\n", status); + ret = ASYNC_OP_E; + } + } + } + + if (doFree) { + XFREE(ctx->symCtx, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA64); + ctx->symCtx = NULL; + ctx->symCtxSrc = NULL; + ctx->symCtxSize = 0; + } + + return ret; +} + +#endif /* QAT_ENABLE_CRYPTO */ + + +/* -------------------------------------------------------------------------- */ +/* AES/DES Algo */ +/* -------------------------------------------------------------------------- */ + +#ifdef QAT_ENABLE_CRYPTO + +static void IntelQaSymCipherFree(IntelQaDev* dev) +{ + IntelQaSymCtx* ctx = &dev->op.cipher.ctx; + CpaCySymOpData* opData = &ctx->opData; + CpaBufferList* pDstBuffer = &dev->op.cipher.bufferList; + + if (opData) { + if (opData->pAdditionalAuthData) { + XFREE(opData->pAdditionalAuthData, dev->heap, + DYNAMIC_TYPE_ASYNC_NUMA); + opData->pAdditionalAuthData = NULL; + } + if (opData->pIv) { + XFREE(opData->pIv, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + opData->pIv = NULL; + } + XMEMSET(opData, 0, sizeof(CpaCySymOpData)); + } + if (pDstBuffer) { + if (pDstBuffer->pBuffers) { + if (pDstBuffer->pBuffers->pData) { + XFREE(pDstBuffer->pBuffers->pData, dev->heap, + DYNAMIC_TYPE_ASYNC_NUMA); + pDstBuffer->pBuffers->pData = NULL; + } + XMEMSET(pDstBuffer->pBuffers, 0, sizeof(CpaFlatBuffer)); + } + if (pDstBuffer->pPrivateMetaData) { + XFREE(pDstBuffer->pPrivateMetaData, dev->heap, + DYNAMIC_TYPE_ASYNC_NUMA); + pDstBuffer->pPrivateMetaData = NULL; + } + XMEMSET(pDstBuffer, 0, sizeof(CpaBufferList)); + } + + /* close and free sym context */ + IntelQaSymClose(dev, 1); + + /* clear temp pointers */ + dev->out = NULL; + dev->outLen = 0; +#ifndef NO_AES + if (dev->op.cipher.authTag != NULL) { + XMEMSET(dev->op.cipher.authTag, 0, dev->op.cipher.authTagSz); + XFREE(dev->op.cipher.authTag, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + dev->op.cipher.authTag = NULL; + } + dev->op.cipher.authTagSz = 0; +#endif +} + +static int IntelQaSymCipher(IntelQaDev* dev, byte* out, const byte* in, + word32 inOutSz, const byte* key, word32 keySz, const byte* iv, word32 ivSz, + CpaCySymOp symOperation, CpaCySymCipherAlgorithm cipherAlgorithm, + CpaCySymCipherDirection cipherDirection, + + /* for auth ciphers (CCM or GCM) */ + CpaCySymHashAlgorithm hashAlgorithm, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + int ret; + CpaStatus status = CPA_STATUS_SUCCESS; + CpaCySymOpData* opData = NULL; + CpaCySymSessionSetupData setup; + const Cpa32U numBuffers = 1; + CpaBufferList* bufferList = NULL; + CpaFlatBuffer* flatBuffer = NULL; + Cpa8U* ivBuf = NULL; + Cpa8U* dataBuf = NULL; + Cpa32U dataLen = inOutSz; + Cpa8U* metaBuf = NULL; + Cpa32U metaSize = 0; + Cpa8U* authInBuf = NULL; + Cpa32U authInSzAligned = authInSz; + Cpa8U* authTagBuf = NULL; + IntelQaSymCtx* ctx; + CpaBoolean verifyResult = CPA_FALSE; + + QLOG("IntelQaSymCipher: dev %p, out %p, in %p, inOutSz %d, op %d, " + "algo %d, dir %d, hash %d\n", + dev, out, in, inOutSz, symOperation, cipherAlgorithm, + cipherDirection, hashAlgorithm); + + /* check args */ + if (out == NULL || in == NULL || inOutSz == 0 || + key == NULL || keySz == 0 || iv == NULL || ivSz == 0) { + return BAD_FUNC_ARG; + } + if (hashAlgorithm != CPA_CY_SYM_HASH_NONE && + (authTag == NULL || authTagSz == 0)) { + return BAD_FUNC_ARG; + } + + /* get meta size */ + status = cpaCyBufferListGetMetaSize(dev->handle, numBuffers, &metaSize); + if (status != CPA_STATUS_SUCCESS && metaSize <= 0) { + ret = BUFFER_E; goto exit; + } + + /* if authtag provided then it will be appended to end of input */ + if (authTag && authTagSz > 0) { + dataLen += authTagSz; + } + + /* allocate buffers */ + ctx = &dev->op.cipher.ctx; + opData = &ctx->opData; + bufferList = &dev->op.cipher.bufferList; + flatBuffer = &dev->op.cipher.flatBuffer; + metaBuf = XMALLOC(metaSize, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + dataBuf = XMALLOC(dataLen, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + XMEMCPY(dataBuf, in, inOutSz); + ivBuf = XMALLOC(AES_BLOCK_SIZE, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + XMEMCPY(ivBuf, iv, ivSz); + authTagBuf = XMALLOC(authTagSz, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + + /* check allocations */ + if (ivBuf == NULL || metaBuf == NULL || dataBuf == NULL || + authTagBuf == NULL) { + ret = MEMORY_E; goto exit; + } + + /* AAD */ + if (authIn && authInSz > 0) { + /* make sure AAD is block aligned */ + if (authInSzAligned % AES_BLOCK_SIZE) { + authInSzAligned += AES_BLOCK_SIZE - + (authInSzAligned % AES_BLOCK_SIZE); + } + + authInBuf = XMALLOC(authInSzAligned, dev->heap, + DYNAMIC_TYPE_ASYNC_NUMA); + XMEMCPY(authInBuf, authIn, authInSz); + if (authInBuf == NULL) { + ret = MEMORY_E; goto exit; + } + /* clear remainder */ + XMEMSET(authInBuf + authInSz, 0, authInSzAligned - authInSz); + } + + /* init buffers */ + XMEMSET(&setup, 0, sizeof(CpaCySymSessionSetupData)); + XMEMSET(opData, 0, sizeof(CpaCySymOpData)); + XMEMSET(bufferList, 0, sizeof(CpaBufferList)); + XMEMSET(flatBuffer, 0, sizeof(CpaFlatBuffer)); + XMEMSET(metaBuf, 0, metaSize); + + bufferList->pBuffers = flatBuffer; + bufferList->numBuffers = numBuffers; + bufferList->pPrivateMetaData = metaBuf; + flatBuffer->dataLenInBytes = dataLen; + flatBuffer->pData = dataBuf; + + /* setup */ + setup.sessionPriority = CPA_CY_PRIORITY_NORMAL; + setup.symOperation = symOperation; + setup.cipherSetupData.cipherAlgorithm = cipherAlgorithm; + setup.cipherSetupData.cipherKeyLenInBytes = keySz; + setup.cipherSetupData.pCipherKey = (byte*)key; + setup.cipherSetupData.cipherDirection = cipherDirection; + + /* setup auth ciphers */ + if (hashAlgorithm != CPA_CY_SYM_HASH_NONE) { + setup.algChainOrder = + (cipherDirection == CPA_CY_SYM_CIPHER_DIRECTION_ENCRYPT) ? + CPA_CY_SYM_ALG_CHAIN_ORDER_CIPHER_THEN_HASH : + CPA_CY_SYM_ALG_CHAIN_ORDER_HASH_THEN_CIPHER; + + setup.hashSetupData.hashAlgorithm = hashAlgorithm; + setup.hashSetupData.hashMode = CPA_CY_SYM_HASH_MODE_AUTH; + setup.hashSetupData.digestResultLenInBytes = authTagSz; + setup.hashSetupData.authModeSetupData.aadLenInBytes = authInSz; + + if (cipherDirection == CPA_CY_SYM_CIPHER_DIRECTION_DECRYPT) + setup.digestIsAppended = CPA_TRUE; + else + setup.digestIsAppended = CPA_FALSE; + } + + /* open session */ + ret = IntelQaSymOpen(dev, &setup, NULL); + if (ret != 0) { + goto exit; + } + + /* operation data */ + opData->sessionCtx = ctx->symCtx; + opData->packetType = CPA_CY_SYM_PACKET_TYPE_FULL; + opData->pIv = ivBuf; + opData->ivLenInBytes = ivSz; + opData->cryptoStartSrcOffsetInBytes = 0; + opData->messageLenToCipherInBytes = inOutSz; + if (authIn && authInSz > 0) { + opData->pAdditionalAuthData = authInBuf; + } + if (cipherDirection == CPA_CY_SYM_CIPHER_DIRECTION_DECRYPT) { + if (authTag && authTagSz > 0) { + /* append digest to end of data buffer */ + XMEMCPY(flatBuffer->pData + inOutSz, authTag, authTagSz); + } + } + else { + if (authTag && authTagSz > 0) { + XMEMCPY(authTagBuf, authTag, authTagSz); + } + } + + /* store info needed for output */ + dev->out = out; + dev->outLen = inOutSz; + if (cipherDirection == CPA_CY_SYM_CIPHER_DIRECTION_ENCRYPT) { + dev->op.cipher.authTag = authTagBuf; + dev->op.cipher.authTagSz = authTagSz; + opData->pDigestResult = authTagBuf; + } + else { + dev->op.cipher.authTag = NULL; + dev->op.cipher.authTagSz = 0; + } + IntelQaOpInit(dev, IntelQaSymCipherFree); + + /* perform symmetric AES operation async */ + /* use same buffer list for in-place operation */ + status = cpaCySymPerformOp(dev->handle, dev, opData, + bufferList, bufferList, &verifyResult); + + if (symOperation == CPA_CY_SYM_OP_ALGORITHM_CHAINING && + cipherAlgorithm == CPA_CY_SYM_CIPHER_AES_GCM && + cipherDirection == CPA_CY_SYM_CIPHER_DIRECTION_DECRYPT && + hashAlgorithm == CPA_CY_SYM_HASH_AES_GCM) { + if (verifyResult == CPA_FALSE) { + ret = AES_GCM_AUTH_E; + } + } +exit: + + if (ret != 0) { + QLOG("cpaCySymPerformOp Cipher failed! dev %p, status %d, ret %d\n", + dev, status, ret); + } + + /* Capture the inline decrypt into the output. */ + XMEMCPY(out, dataBuf, inOutSz); + if (cipherDirection == CPA_CY_SYM_CIPHER_DIRECTION_ENCRYPT) { + if (authTag != NULL && authTagSz > 0) { + XMEMCPY(authTag, authTagBuf, authTagSz); + } + } + + /* handle cleanup */ + IntelQaSymCipherFree(dev); + + return ret; +} + +#ifdef HAVE_AES_CBC +int IntelQaSymAesCbcEncrypt(IntelQaDev* dev, + byte* out, const byte* in, word32 sz, + const byte* key, word32 keySz, + const byte* iv, word32 ivSz) +{ + int ret = IntelQaSymCipher(dev, out, in, sz, + key, keySz, iv, ivSz, + CPA_CY_SYM_OP_CIPHER, CPA_CY_SYM_CIPHER_AES_CBC, + CPA_CY_SYM_CIPHER_DIRECTION_ENCRYPT, + CPA_CY_SYM_HASH_NONE, NULL, 0, NULL, 0); + + XMEMCPY((byte*)iv, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + return ret; +} + +#ifdef HAVE_AES_DECRYPT +int IntelQaSymAesCbcDecrypt(IntelQaDev* dev, + byte* out, const byte* in, word32 sz, + const byte* key, word32 keySz, + const byte* iv, word32 ivSz) +{ + byte nextIv[AES_BLOCK_SIZE]; + int ret; + + XMEMCPY(nextIv, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + ret = IntelQaSymCipher(dev, out, in, sz, + key, keySz, iv, ivSz, + CPA_CY_SYM_OP_CIPHER, CPA_CY_SYM_CIPHER_AES_CBC, + CPA_CY_SYM_CIPHER_DIRECTION_DECRYPT, + CPA_CY_SYM_HASH_NONE, NULL, 0, NULL, 0); + + XMEMCPY((byte*)iv, nextIv, AES_BLOCK_SIZE); + return ret; +} +#endif /* HAVE_AES_DECRYPT */ +#endif /* HAVE_AES_CBC */ + + +#ifdef HAVE_AESGCM +int IntelQaSymAesGcmEncrypt(IntelQaDev* dev, + byte* out, const byte* in, word32 sz, + const byte* key, word32 keySz, + const byte* iv, word32 ivSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + return IntelQaSymCipher(dev, out, in, sz, + key, keySz, iv, ivSz, + CPA_CY_SYM_OP_ALGORITHM_CHAINING, CPA_CY_SYM_CIPHER_AES_GCM, + CPA_CY_SYM_CIPHER_DIRECTION_ENCRYPT, + CPA_CY_SYM_HASH_AES_GCM, authTag, authTagSz, authIn, authInSz); +} +#ifdef HAVE_AES_DECRYPT +int IntelQaSymAesGcmDecrypt(IntelQaDev* dev, + byte* out, const byte* in, word32 sz, + const byte* key, word32 keySz, + const byte* iv, word32 ivSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + return IntelQaSymCipher(dev, out, in, sz, + key, keySz, iv, ivSz, + CPA_CY_SYM_OP_ALGORITHM_CHAINING, CPA_CY_SYM_CIPHER_AES_GCM, + CPA_CY_SYM_CIPHER_DIRECTION_DECRYPT, + CPA_CY_SYM_HASH_AES_GCM, (byte*)authTag, authTagSz, authIn, authInSz); +} +#endif /* HAVE_AES_DECRYPT */ +#endif /* HAVE_AESGCM */ + +#ifndef NO_DES3 +int IntelQaSymDes3CbcEncrypt(IntelQaDev* dev, + byte* out, const byte* in, word32 sz, + const byte* key, word32 keySz, + const byte* iv, word32 ivSz) +{ + return IntelQaSymCipher(dev, out, in, sz, + key, keySz, iv, ivSz, + CPA_CY_SYM_OP_CIPHER, CPA_CY_SYM_CIPHER_3DES_CBC, + CPA_CY_SYM_CIPHER_DIRECTION_ENCRYPT, + CPA_CY_SYM_HASH_NONE, NULL, 0, NULL, 0); +} + +int IntelQaSymDes3CbcDecrypt(IntelQaDev* dev, + byte* out, const byte* in, word32 sz, + const byte* key, word32 keySz, + const byte* iv, word32 ivSz) +{ + return IntelQaSymCipher(dev, out, in, sz, + key, keySz, iv, ivSz, + CPA_CY_SYM_OP_CIPHER, CPA_CY_SYM_CIPHER_3DES_CBC, + CPA_CY_SYM_CIPHER_DIRECTION_DECRYPT, + CPA_CY_SYM_HASH_NONE, NULL, 0, NULL, 0); +} +#endif /* !NO_DES3 */ + +#endif /* QAT_ENABLE_CRYPTO */ + + +#ifdef WOLF_CRYPTO_CB + +int IntelQaSymSync_CryptoDevCb(int devId, struct wc_CryptoInfo* info, void* ctx) +{ + int rc = NOT_COMPILED_IN; /* return this to bypass HW and use SW */ + IntelQaDev* dev; + + if (info == NULL || ctx == NULL) + return BAD_FUNC_ARG; + + (void)devId; + dev = (IntelQaDev*)ctx; + + #ifdef QAT_ENABLE_CRYPTO + if (info->algo_type == WC_ALGO_TYPE_CIPHER) { + QLOG("CryptoDevCb Cipher: Type %d\n", info->cipher.type); + + #ifndef NO_AES + if (info->cipher.type == WC_CIPHER_AES_CBC) { + Aes* aes = info->cipher.aescbc.aes; + if (aes == NULL) + return BAD_FUNC_ARG; + + if (info->cipher.enc) { + rc = IntelQaSymAesCbcEncrypt(dev, + info->cipher.aescbc.out, + info->cipher.aescbc.in, + info->cipher.aescbc.sz, + (byte*)aes->devKey, aes->keylen, + (byte*)aes->reg, AES_BLOCK_SIZE); + } + else { + rc = IntelQaSymAesCbcDecrypt(dev, + info->cipher.aescbc.out, + info->cipher.aescbc.in, + info->cipher.aescbc.sz, + (byte*)aes->devKey, aes->keylen, + (byte*)aes->reg, AES_BLOCK_SIZE); + } + } + #endif /* !NO_AES */ + + #ifdef HAVE_AESGCM + if (info->cipher.type == WC_CIPHER_AES_GCM) { + if (info->cipher.enc) { + Aes* aes = info->cipher.aesgcm_enc.aes; + if (aes == NULL) + return BAD_FUNC_ARG; + + rc = IntelQaSymAesGcmEncrypt(dev, + info->cipher.aesgcm_enc.out, + info->cipher.aesgcm_enc.in, + info->cipher.aesgcm_enc.sz, + (const byte*)aes->devKey, aes->keylen, + info->cipher.aesgcm_enc.iv, + info->cipher.aesgcm_enc.ivSz, + info->cipher.aesgcm_enc.authTag, + info->cipher.aesgcm_enc.authTagSz, + info->cipher.aesgcm_enc.authIn, + info->cipher.aesgcm_enc.authInSz); + } + else { + Aes* aes = info->cipher.aesgcm_dec.aes; + if (aes == NULL) + return BAD_FUNC_ARG; + + rc = IntelQaSymAesGcmDecrypt(dev, + info->cipher.aesgcm_dec.out, + info->cipher.aesgcm_dec.in, + info->cipher.aesgcm_dec.sz, + (const byte*)aes->devKey, aes->keylen, + info->cipher.aesgcm_dec.iv, + info->cipher.aesgcm_dec.ivSz, + info->cipher.aesgcm_dec.authTag, + info->cipher.aesgcm_dec.authTagSz, + info->cipher.aesgcm_dec.authIn, + info->cipher.aesgcm_dec.authInSz); + } + } + #endif /* HAVE_AESGCM */ + + #ifndef NO_DES3 + if (info->cipher.type == WC_CIPHER_DES3) { + Des3* des = info->cipher.des3.des; + if (des == NULL) + return BAD_FUNC_ARG; + + if (info->cipher.enc) { + rc = IntelQaSymDes3CbcEncrypt(dev, + info->cipher.des3.out, + info->cipher.des3.in, + info->cipher.des3.sz, + (byte*)des->devKey, DES3_KEYLEN, + (byte*)des->reg, DES_BLOCK_SIZE); + } + else { + rc = IntelQaSymDes3CbcDecrypt(dev, + info->cipher.des3.out, + info->cipher.des3.in, + info->cipher.des3.sz, + (byte*)des->devKey, DES3_KEYLEN, + (byte*)des->reg, DES_BLOCK_SIZE); + } + } + #endif /* !NO_DES3 */ + } + #endif /* QAT_ENABLE_CRYPTO */ + + return rc; +} + +/* -------------------------------------------------------------------------- */ +/* Public API */ +/* -------------------------------------------------------------------------- */ + +int wc_CryptoCb_InitIntelQa(void) +{ + int devId, rc; + + devId = IntelQaInit(NULL); + if (devId < 0) { + QLOG("Couldn't init the Intel QA\n"); + devId = INVALID_DEVID; + } + else { + rc = IntelQaOpen(&qaDev, devId); + if (rc != 0) { + QLOG("Couldn't open the device\n"); + IntelQaDeInit(devId); + devId = INVALID_DEVID; + } + else { + rc = wc_CryptoCb_RegisterDevice(devId, + IntelQaSymSync_CryptoDevCb, &qaDev); + if (rc != 0) { + QLOG("Couldn't register the device\n"); + IntelQaClose(&qaDev); + IntelQaDeInit(devId); + devId = INVALID_DEVID; + } + } + } + + return devId; +} + + +void wc_CryptoCb_CleanupIntelQa(int* id) +{ + if (INVALID_DEVID != *id) { + wc_CryptoCb_UnRegisterDevice(*id); + IntelQaClose(&qaDev); + IntelQaDeInit(*id); + *id = INVALID_DEVID; + } +} + +#endif /* WOLF_CRYPTO_CB */ + + +/* -------------------------------------------------------------------------- */ +/* Memory allocator and deallocator */ +/* -------------------------------------------------------------------------- */ +#include +#include +#include + +/* use thread local for QAE variables (removing mutex requirement) */ +#ifdef USE_QAE_THREAD_LS + #include /* for threadId tracking */ + #define QAE_THREAD_LS THREAD_LS_T +#else + #define QAE_THREAD_LS +#endif + +/* these are used to align memory to a byte boundary */ +#define ALIGNMENT_BASE (16ul) +#define ALIGNMENT_HW (64ul) +#define WOLF_MAGIC_NUM 0xA576F6C6641736EBUL /* (0xA)WolfAsyn(0xB) */ +#define WOLF_HEADER_ALIGN ALIGNMENT_BASE + +#define QAE_NOT_NUMA_PAGE 0xFFFF +typedef struct qaeMemHeader { +#ifdef WOLFSSL_TRACK_MEMORY + struct qaeMemHeader* next; + struct qaeMemHeader* prev; + #ifdef WOLFSSL_DEBUG_MEMORY + const char* func; + unsigned int line; + #endif +#endif + uint64_t magic; + void* heap; +#ifdef USE_QAE_THREAD_LS + pthread_t threadId; +#endif + size_t size; + word16 count; + word16 isNuma:1; + word16 reservedBits:15; /* use for future bits */ + word16 type; + word16 numa_page_offset; /* use QAE_NOT_NUMA_PAGE if not NUMA */ +} ALIGN16 qaeMemHeader; + +#ifdef WOLFSSL_TRACK_MEMORY + typedef struct qaeMemStats { + long totalAllocs; /* number of allocations */ + long totalDeallocs; /* number of deallocations */ + long totalBytes; /* total number of bytes allocated */ + long peakBytes; /* concurrent max bytes */ + long currentBytes; /* total current bytes in use */ + } qaeMemStats; + + /* track allocations and report at end */ + typedef struct qaeMemList { + qaeMemHeader* head; + qaeMemHeader* tail; + uint32_t count; + } qaeMemList; +#endif /* WOLFSSL_TRACK_MEMORY */ + + +/* local variables */ +#ifndef USE_QAE_THREAD_LS + static pthread_mutex_t g_memLock = PTHREAD_MUTEX_INITIALIZER; +#endif + + +#ifdef WOLFSSL_TRACK_MEMORY + static qaeMemStats g_memStats; + static qaeMemList g_memList; + static pthread_mutex_t g_memStatLock = PTHREAD_MUTEX_INITIALIZER; +#endif + +static WC_INLINE int qaeMemTypeIsNuma(int type) +{ + int isNuma = 0; + + switch (type) { + case DYNAMIC_TYPE_ASYNC_NUMA: + case DYNAMIC_TYPE_ASYNC_NUMA64: + case DYNAMIC_TYPE_WOLF_BIGINT: + case DYNAMIC_TYPE_PRIVATE_KEY: + case DYNAMIC_TYPE_PUBLIC_KEY: + case DYNAMIC_TYPE_AES_BUFFER: + case DYNAMIC_TYPE_RSA_BUFFER: + case DYNAMIC_TYPE_ECC_BUFFER: + case DYNAMIC_TYPE_SIGNATURE: + case DYNAMIC_TYPE_DIGEST: + case DYNAMIC_TYPE_SECRET: + case DYNAMIC_TYPE_SEED: + case DYNAMIC_TYPE_SALT: + { + isNuma = 1; + break; + } + case DYNAMIC_TYPE_OUT_BUFFER: + case DYNAMIC_TYPE_IN_BUFFER: + { + #if !defined(WC_ASYNC_NO_CRYPT) && !defined(WC_ASYNC_NO_HASH) + isNuma = 1; + #else + isNuma = 0; + #endif + break; + } + default: + isNuma = 0; + break; + } + return isNuma; +} + + +static void _qaeMemFree(void *ptr, void* heap, int type +#ifdef WOLFSSL_DEBUG_MEMORY + , const char* func, unsigned int line +#endif +) +{ + qaeMemHeader* header = NULL; + size_t size; + void* origPtr = ptr; + + if (ptr == NULL) + return; + + /* adjust for header and align */ + ptr = (byte*)(((size_t)ptr - ((size_t)ptr % WOLF_HEADER_ALIGN)) - + sizeof(qaeMemHeader)); + header = (qaeMemHeader*)ptr; + + /* check for header magic */ + if (header->magic != WOLF_MAGIC_NUM) { + printf("Free: Header magic not found! %p\n", ptr); + return; + } + + /* cache values for later */ + size = header->size; + +#ifdef WOLFSSL_DEBUG_MEMORY +#ifdef WOLFSSL_DEBUG_MEMORY_PRINT + printf("Free: %p (%u) at %s:%u, heap %p, type %d, count %d\n", + origPtr, (unsigned int)size, func, line, heap, type, header->count); +#else + (void)func; + (void)line; +#endif +#endif + (void)type; + + /* adjust free count */ + header->count--; + + /* check header count */ + if (header->count > 0) { + /* go ahead and return if still in use */ + return; + } + +#ifdef WOLFSSL_TRACK_MEMORY + if (pthread_mutex_lock(&g_memStatLock) == 0) { + g_memStats.currentBytes -= size; + g_memStats.totalDeallocs++; + + if (header == g_memList.head && header == g_memList.tail) { + g_memList.head = NULL; + g_memList.tail = NULL; + } + else if (header == g_memList.head) { + g_memList.head = header->next; + g_memList.head->prev = NULL; + } + else if (header == g_memList.tail) { + g_memList.tail = header->prev; + g_memList.tail->next = NULL; + } + else { + qaeMemHeader* next = header->next; + qaeMemHeader* prev = header->prev; + if (next) + next->prev = prev; + if (prev) + prev->next = next; + } + g_memList.count--; + + pthread_mutex_unlock(&g_memStatLock); + } +#endif + + (void)heap; + (void)size; + (void)origPtr; + +#ifdef WOLFSSL_DEBUG_MEMORY + /* make sure magic is gone */ + header->magic = 0; +#endif + + /* free type */ + if (header->isNuma && header->numa_page_offset != QAE_NOT_NUMA_PAGE) { + qaeMemFreeNUMA(&ptr); + } + else { + free(ptr); + } +} + + +static void* _qaeMemAlloc(size_t size, void* heap, int type +#ifdef WOLFSSL_DEBUG_MEMORY + , const char* func, unsigned int line +#endif +) +{ + void* ptr = NULL; + qaeMemHeader* header = NULL; + int isNuma; + int alignment = ALIGNMENT_BASE; + word16 page_offset = QAE_NOT_NUMA_PAGE; + + /* make sure all allocations are aligned */ + if ((size % WOLF_HEADER_ALIGN) != 0) { + size += (WOLF_HEADER_ALIGN - (size % WOLF_HEADER_ALIGN)); + } + + isNuma = qaeMemTypeIsNuma(type); + if (type == DYNAMIC_TYPE_ASYNC_NUMA64) + alignment = ALIGNMENT_HW; + + /* allocate type */ + if (isNuma) { + /* Node is typically 0 */ + page_offset = 0; + ptr = qaeMemAllocNUMA((Cpa32U)(size + sizeof(qaeMemHeader)), 0, + alignment); + } + else { + isNuma = 0; + ptr = malloc(size + sizeof(qaeMemHeader)); + } + + /* add header */ + if (ptr) { + header = (qaeMemHeader*)ptr; + ptr = (byte*)ptr + sizeof(qaeMemHeader); + header->magic = WOLF_MAGIC_NUM; + header->heap = heap; + header->size = size; + header->type = type; + header->count = 1; + header->isNuma = isNuma; + header->numa_page_offset = page_offset; + #ifdef USE_QAE_THREAD_LS + header->threadId = pthread_self(); + #endif + + #ifdef WOLFSSL_TRACK_MEMORY + if (pthread_mutex_lock(&g_memStatLock) == 0) { + g_memStats.totalAllocs++; + g_memStats.totalBytes += size; + g_memStats.currentBytes += size; + if (g_memStats.currentBytes > g_memStats.peakBytes) + g_memStats.peakBytes = g_memStats.currentBytes; + + #ifdef WOLFSSL_DEBUG_MEMORY + header->func = func; + header->line = line; + #endif + + /* Setup event */ + header->next = NULL; + if (g_memList.tail == NULL) { + g_memList.head = header; + } + else { + g_memList.tail->next = header; + header->prev = g_memList.tail; + } + g_memList.tail = header; /* add to the end either way */ + g_memList.count++; + + pthread_mutex_unlock(&g_memStatLock); + } + #endif + } + +#ifdef WOLFSSL_DEBUG_MEMORY +#ifdef WOLFSSL_DEBUG_MEMORY_PRINT + printf("Alloc: %p (%u) at %s:%u, heap %p, type %d\n", + ptr, (unsigned int)size, func, line, heap, type); +#else + (void)func; + (void)line; +#endif +#endif + + (void)heap; + + return ptr; +} + +/* Public Functions */ +void* wc_CryptoCb_IntelQaMalloc(size_t size, void* heap, int type +#ifdef WOLFSSL_DEBUG_MEMORY + , const char* func, unsigned int line +#endif +) +{ + void* ptr; + +#ifndef USE_QAE_THREAD_LS + int ret = pthread_mutex_lock(&g_memLock); + if (ret != 0) { + printf("Alloc: Error(%d) on mutex lock\n", ret); + return NULL; + } +#endif + + ptr = _qaeMemAlloc(size, heap, type + #ifdef WOLFSSL_DEBUG_MEMORY + , func, line + #endif + ); + +#ifndef USE_QAE_THREAD_LS + pthread_mutex_unlock(&g_memLock); +#endif + + return ptr; +} + +void wc_CryptoCb_IntelQaFree(void *ptr, void* heap, int type +#ifdef WOLFSSL_DEBUG_MEMORY + , const char* func, unsigned int line +#endif +) +{ +#ifndef USE_QAE_THREAD_LS + int ret = pthread_mutex_lock(&g_memLock); + if (ret != 0) { + printf("Free: Error(%d) on mutex lock\n", ret); + return; + } +#endif + + _qaeMemFree(ptr, heap, type + #ifdef WOLFSSL_DEBUG_MEMORY + , func, line + #endif + ); + +#ifndef USE_QAE_THREAD_LS + pthread_mutex_unlock(&g_memLock); +#endif +} + +void* wc_CryptoCb_IntelQaRealloc(void *ptr, size_t size, void* heap, int type +#ifdef WOLFSSL_DEBUG_MEMORY + , const char* func, unsigned int line +#endif +) +{ + void* newPtr = NULL; + void* origPtr = ptr; + qaeMemHeader* header = NULL; + byte allocNew = 1; + int newIsNuma = -1, ptrIsNuma = -1; + size_t copySize = 0; + +#ifndef USE_QAE_THREAD_LS + int ret = pthread_mutex_lock(&g_memLock); + if (ret != 0) { + printf("Realloc: Error(%d) on mutex lock\n", ret); + return NULL; + } +#endif + + (void)heap; + + if (ptr) { + /* get header pointer and align */ + header = (qaeMemHeader*)(((size_t)ptr - + ((size_t)ptr % WOLF_HEADER_ALIGN)) - sizeof(qaeMemHeader)); + if (header->magic == WOLF_MAGIC_NUM) { + newIsNuma = qaeMemTypeIsNuma(type); + ptrIsNuma = (header->numa_page_offset != QAE_NOT_NUMA_PAGE) ? 1 : 0; + + /* for non-NUMA, treat as normal REALLOC */ + if (newIsNuma == 0 && ptrIsNuma == 0) { + allocNew = 1; + } + /* if matching NUMA type and size fits, use existing */ + else if (newIsNuma == ptrIsNuma && header->size >= size) { + + #ifdef USE_QAE_THREAD_LS + if (header->threadId != pthread_self()) { + allocNew = 1; + #if 0 + printf("Realloc %p from different thread! orig %lx this %lx\n", + origPtr, header->threadId, pthread_self()); + #endif + } + else + #endif + { + /* use existing pointer and increment counter */ + header->count++; + newPtr = origPtr; + allocNew = 0; + } + } + + copySize = header->size; + } + else { + copySize = size; + } + } + + if (allocNew) { + newPtr = _qaeMemAlloc(size, heap, type + #ifdef WOLFSSL_DEBUG_MEMORY + , func, line + #endif + ); + if (newPtr && ptr) { + /* only copy min of new and old size to new pointer */ + if (copySize > size) + copySize = size; + XMEMCPY(newPtr, ptr, copySize); + + if (newIsNuma == 0 && ptrIsNuma == 0) { + /* for non-NUMA, treat as normal REALLOC and free old pointer */ + _qaeMemFree(ptr, heap, type + #ifdef WOLFSSL_DEBUG_MEMORY + , func, line + #endif + ); + } + } + } + +#ifndef USE_QAE_THREAD_LS + pthread_mutex_unlock(&g_memLock); +#endif + +#ifdef WOLFSSL_DEBUG_MEMORY +#ifdef WOLFSSL_DEBUG_MEMORY_PRINT + if (allocNew) { + printf("Realloc: New %p -> %p (%u) at %s:%u, heap %p, type %d\n", + origPtr, newPtr, (unsigned int)size, func, line, heap, type); + } + else { + printf("Realloc: Reuse %p (%u) at %s:%u, heap %p, type %d, count %d\n", + origPtr, (unsigned int)size, func, line, header->heap, header->type, header->count); + } +#else + (void)func; + (void)line; +#endif +#endif + + return newPtr; +} + + +#ifdef WOLFSSL_TRACK_MEMORY +int InitMemoryTracker(void) +{ + if (pthread_mutex_lock(&g_memStatLock) == 0) { + g_memStats.totalAllocs = 0; + g_memStats.totalDeallocs= 0; + g_memStats.totalBytes = 0; + g_memStats.peakBytes = 0; + g_memStats.currentBytes = 0; + + XMEMSET(&g_memList, 0, sizeof(g_memList)); + + pthread_mutex_unlock(&g_memStatLock); + } + + return 0; +} + +void ShowMemoryTracker(void) +{ + if (pthread_mutex_lock(&g_memStatLock) == 0) { + printf("total Allocs = %9ld\n", g_memStats.totalAllocs); + printf("total Deallocs = %9ld\n", g_memStats.totalDeallocs); + printf("total Bytes = %9ld\n", g_memStats.totalBytes); + printf("peak Bytes = %9ld\n", g_memStats.peakBytes); + printf("current Bytes = %9ld\n", g_memStats.currentBytes); + + if (g_memList.count > 0) { + + /* print list of allocations */ + qaeMemHeader* header; + for (header = g_memList.head; header != NULL; header = header->next) { + printf("Leak: Ptr %p, Size %u, Type %d, Heap %p" + #ifdef WOLFSSL_DEBUG_MEMORY + ", Func %s, Line %d" + #endif + "\n", + (byte*)header + sizeof(qaeMemHeader), (unsigned int)header->size, + header->type, header->heap + #ifdef WOLFSSL_DEBUG_MEMORY + , header->func, header->line + #endif + ); + } + } + + pthread_mutex_unlock(&g_memStatLock); + + /* cleanup lock */ + pthread_mutex_destroy(&g_memStatLock); + } +} +#endif /* WOLFSSL_TRACK_MEMORY */ + +#ifdef QAT_DEMO_MAIN + +/* AES GCM */ +static const byte aesgcm_k[] = { + 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, + 0x99, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, + 0x77, 0x88, 0x99, 0x00, 0x11, 0x22, 0x33, 0x44, + 0x55, 0x66, 0x77, 0x88, 0x99, 0x00, 0x11, 0x22 +}; + +static const byte aesgcm_iv[] = { + 0xca, 0xfe, 0xca, 0xfe, 0xca, 0xfe, 0xca, 0xfe, + 0xca, 0xfe, 0xca, 0xfe +}; + +static const byte aesgcm_a[] = { + 0xde, 0xad, 0xde, 0xad, 0xde, 0xad, 0xde, 0xad, + 0xde, 0xad, 0xde, 0xad, 0xde, 0xad, 0xde, 0xad, + 0xde, 0xad, 0xde, 0xad +}; + +static const byte aesgcm_p[] = { + 0x79, 0x84, 0x86, 0x44, 0x68, 0x45, 0x15, 0x61, + 0x86, 0x54, 0x66, 0x56, 0x54, 0x54, 0x31, 0x54, + 0x64, 0x64, 0x68, 0x45, 0x15, 0x15, 0x61, 0x61, + 0x51, 0x51, 0x51, 0x51, 0x51, 0x56, 0x14, 0x11, + 0x72, 0x13, 0x51, 0x82, 0x84, 0x56, 0x74, 0x53, + 0x45, 0x34, 0x65, 0x15, 0x46, 0x14, 0x67, 0x55, + 0x16, 0x14, 0x67, 0x54, 0x65, 0x47, 0x14, 0x67, + 0x46, 0x74, 0x65, 0x46 +}; + +static const byte aesgcm_c[] = { + 0x59, 0x85, 0x02, 0x97, 0xE0, 0x4D, 0xFC, 0x5C, + 0x03, 0xCC, 0x83, 0x64, 0xCE, 0x28, 0x0B, 0x95, + 0x78, 0xEC, 0x93, 0x40, 0xA1, 0x8D, 0x21, 0xC5, + 0x48, 0x6A, 0x39, 0xBA, 0x4F, 0x4B, 0x8C, 0x95, + 0x6F, 0x8C, 0xF6, 0x9C, 0xD0, 0xA5, 0x8D, 0x67, + 0xA1, 0x32, 0x11, 0xE7, 0x2E, 0xF6, 0x63, 0xAF, + 0xDE, 0xD4, 0x7D, 0xEC, 0x15, 0x01, 0x58, 0xCB, + 0xE3, 0x7B, 0xC6, 0x94, +}; + +static byte aesgcm_t[] = { + 0x5D, 0x10, 0x3F, 0xC7, 0x22, 0xC7, 0x21, 0x29 +}; + + +/* simple example of using AES-GCM encrypt with Intel QA */ +int main(int argc, char** argv) +{ +#if !defined(NO_AES) && defined(HAVE_AESGCM) + int ret; + IntelQaDev dev; + byte out[256]; + byte tmp[256]; + word32 tmpLen; +#endif + +#ifdef QAT_DEBUG + wolfSSL_Debugging_ON(); +#endif + + IntelQaInit(NULL); + +#ifndef NO_AES +#ifdef HAVE_AESGCM + /* AES Test */ + IntelQaOpen(&dev, 0); + dev.event.ret = WC_PENDING_E; + tmpLen = sizeof(aesgcm_t); + XMEMSET(out, 0, sizeof(out)); + XMEMSET(tmp, 0, sizeof(tmp)); + + ret = IntelQaSymAesGcmEncrypt(&dev, out, aesgcm_p, sizeof(aesgcm_p), + aesgcm_k, sizeof(aesgcm_k), aesgcm_iv, sizeof(aesgcm_iv), + tmp, tmpLen, aesgcm_a, sizeof(aesgcm_a)); + printf("AES GCM Encrypt: Ret=%d, Tag Len=%d\n", ret, tmpLen); + IntelQaClose(&dev); +#endif /* HAVE_AESGCM */ +#endif /* NO_AES */ + + IntelQaDeInit(0); + + return 0; +} + +#endif + +#endif /* HAVE_INTEL_QA_SYNC */ diff --git a/client/wolfssl/wolfcrypt/src/port/mynewt/mynewt_port.c b/client/wolfssl/wolfcrypt/src/port/mynewt/mynewt_port.c new file mode 100644 index 0000000..8a4e903 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/mynewt/mynewt_port.c @@ -0,0 +1,146 @@ +/* mynewt_port.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#if defined(WOLFSSL_APACHE_MYNEWT) +#ifndef NO_FILESYSTEM +#include "fs/fs.h" +#define FILE struct fs_file + +FILE* mynewt_fopen(const char * restrict path, const char * restrict mode) +{ + FILE *file; + uint8_t access_flags = 0; + const char *p = mode; + while(*p != '\0') { + switch(*p) { + case 'r': + { + access_flags |= FS_ACCESS_READ; + if(*(p+1) == '+') { + access_flags |= FS_ACCESS_WRITE; + } + } + break; + + case 'w': + { + access_flags |= (FS_ACCESS_WRITE | FS_ACCESS_TRUNCATE); + if(*(p+1) == '+') { + access_flags |= FS_ACCESS_READ; + } + } + break; + + case 'a': + { + access_flags |= (FS_ACCESS_WRITE | FS_ACCESS_APPEND); + if(*(p+1) == '+') { + access_flags |= FS_ACCESS_READ; + } + } + break; + } + p++; + } + + /* Open the file for reading. */ + int rc = fs_open(path, access_flags, &file); + if (rc != 0) { + return NULL; + } + return file; +} + +int mynewt_fseek(FILE *stream, long offset, int whence) +{ + uint32_t fs_offset; + + switch(whence) { + case 0: /* SEEK_SET */ + { + fs_offset += offset; + } + break; + + case 1: /* SEEK_CUR */ + { + fs_offset = fs_getpos(stream); + fs_offset += offset; + } + break; + + case 2: /* SEEK_END */ + { + fs_filelen(stream, &fs_offset); + fs_offset += offset; + } + break; + } + + fs_seek(stream, fs_offset); + + return 0; +} + +long mynewt_ftell(FILE *stream) +{ + uint32_t fs_offset; + fs_filelen(stream, &fs_offset); + fs_seek(stream, fs_offset); + return (long)fs_offset; +} + +void mynewt_rewind(FILE *stream) +{ + fs_seek(stream, 0); +} + +size_t mynewt_fread(void *restrict ptr, size_t size, size_t nitems, FILE *restrict stream) +{ + size_t to_read = size * nitems; + uint32_t read_size; + int rc = fs_read(stream, to_read, ptr, &read_size); + if(rc != 0) { + return 0; + } + + return (size_t)read_size; +} + +size_t mynewt_fwrite(const void *restrict ptr, size_t size, size_t nitems, FILE *restrict stream) +{ + size_t to_write = size * nitems; + int rc = fs_write(stream, ptr, to_write); + if(rc != 0) { + return 0; + } + + return to_write; +} + +int mynewt_fclose(FILE *stream) +{ + fs_close(stream); + return 0; +} + +#endif /* NO_FILESYSTEM*/ +#endif /* if defined(WOLFSSL_APACHE_MYNEWT) */ diff --git a/client/wolfssl/wolfcrypt/src/port/nrf51.c b/client/wolfssl/wolfcrypt/src/port/nrf51.c new file mode 100644 index 0000000..c7db4b0 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/nrf51.c @@ -0,0 +1,220 @@ +/* nrf51.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef WOLFSSL_NRF51 + +#include "bsp.h" +#include "nrf_delay.h" +#include "app_uart.h" +#include "app_error.h" +#include "nrf_drv_rng.h" +#include "nrf_drv_rtc.h" +#include "nrf_drv_clock.h" +#include "nrf_ecb.h" + +#ifdef SOFTDEVICE_PRESENT + #include "softdevice_handler.h" + #include "nrf_soc.h" +#endif /* SOFTDEVICE_PRESENT */ + +/* RTC */ +#ifndef NO_CRYPT_BENCHMARK +static byte mRtcInitDone = 0; +static int mRtcSec = 0; +const nrf_drv_rtc_t rtc = NRF_DRV_RTC_INSTANCE(0); /**< Declaring an instance of nrf_drv_rtc for RTC0. */ +#endif /* !NO_CRYPT_BENCHMARK */ + +/* AES */ +#if !defined(NO_AES) && !defined(SOFTDEVICE_PRESENT) + static byte mAesInitDone = 0; +#endif + +/** @brief Function for getting vector of random numbers. + * + * @param[out] p_buff Pointer to unit8_t buffer for storing the bytes. + * @param[in] length Number of bytes to take from pool and place in p_buff. + * + * @retval 0 = Success, else error + */ +int nrf51_random_generate(byte* output, word32 size) +{ + int remaining = size, length, pos = 0; + uint8_t available; + uint32_t err_code; + + /* Make sure RNG is running */ + err_code = nrf_drv_rng_init(NULL); + if (err_code != NRF_SUCCESS && err_code != NRF_ERROR_INVALID_STATE) { + return -1; + } + + while (remaining > 0) { + err_code = nrf_drv_rng_bytes_available(&available); + if (err_code == NRF_SUCCESS) { + length = (remaining < available) ? remaining : available; + if (length > 0) { + err_code = nrf_drv_rng_rand(&output[pos], length); + remaining -= length; + pos += length; + } + } + + if (err_code != NRF_SUCCESS) { + break; + } + } + + return (err_code == NRF_SUCCESS) ? 0 : -1; +} + +#if !defined(NO_AES) && defined(WOLFSSL_NRF51_AES) + +#ifdef SOFTDEVICE_PRESENT +static const byte* nRF51AesKey = NULL; +#endif +int nrf51_aes_set_key(const byte* key) +{ +#ifdef SOFTDEVICE_PRESENT + nRF51AesKey = key; +#else + if (!mAesInitDone) { + nrf_ecb_init(); + mAesInitDone = 1; + } + nrf_ecb_set_key(key); +#endif + return 0; +} + + +int nrf51_aes_encrypt(const byte* in, const byte* key, word32 rounds, byte* out) +{ + int ret; + uint32_t err_code = 0; +#ifdef SOFTDEVICE_PRESENT + nrf_ecb_hal_data_t ecb_hal_data; +#endif + + /* Set key */ + ret = nrf51_aes_set_key(key); + if (ret != 0) { + return ret; + } + +#ifdef SOFTDEVICE_PRESENT + /* Define ECB record */ + XMEMCPY(ecb_hal_data.key, nRF51AesKey, SOC_ECB_KEY_LENGTH); + XMEMCPY(ecb_hal_data.cleartext, in, SOC_ECB_CLEARTEXT_LENGTH); + XMEMSET(ecb_hal_data.ciphertext, 0, SOC_ECB_CIPHERTEXT_LENGTH); + + /* Perform block encrypt */ + err_code = sd_ecb_block_encrypt(&ecb_hal_data); + if (err_code != NRF_SUCCESS) { + return -1; + } + + /* Grab result */ + XMEMCPY(out, ecb_hal_data.ciphertext, SOC_ECB_CIPHERTEXT_LENGTH); +#else + err_code = nrf_ecb_crypt(out, in); + err_code = err_code ? 0 : -1; +#endif + + return err_code; +} + +#endif /* !NO_AES && WOLFSSL_NRF51_AES */ + + +#ifndef NO_CRYPT_BENCHMARK +static void rtc_handler(nrf_drv_rtc_int_type_t int_type) +{ + if (int_type == NRF_DRV_RTC_INT_COMPARE0) + { + mRtcSec++; + nrf_drv_rtc_counter_clear(&rtc); + nrf_drv_rtc_int_enable(&rtc, RTC_CHANNEL_INT_MASK(0)); + +#ifdef BSP_LED_0 + nrf_gpio_pin_toggle(BSP_LED_0); +#endif + } +} + +static void rtc_config(void) +{ + uint32_t err_code; + + // Start the internal LFCLK XTAL oscillator + err_code = nrf_drv_clock_init(NULL); + APP_ERROR_CHECK(err_code); + + nrf_drv_clock_lfclk_request(); + + // Initialize RTC instance + err_code = nrf_drv_rtc_init(&rtc, NULL, rtc_handler); + APP_ERROR_CHECK(err_code); + + // Enable tick event + nrf_drv_rtc_tick_enable(&rtc, false); + + // Set compare channel to trigger interrupt after 1 seconds + err_code = nrf_drv_rtc_cc_set(&rtc, 0, RTC0_CONFIG_FREQUENCY, true); + APP_ERROR_CHECK(err_code); + + // Power on RTC instance + nrf_drv_rtc_enable(&rtc); +} + +static int rtc_get_ms(void) +{ + /* Prescaler is 12-bit for COUNTER: frequency = (32768/(PRESCALER+1)) */ + int frequency = (32768 / (rtc_prescaler_get(rtc.p_reg) + 1)); + int counter = nrf_drv_rtc_counter_get(&rtc); + + /* Convert with rounding frequency to milliseconds */ + return ((counter * 1000) + (frequency / 2) ) / frequency; +} + +double current_time(int reset) +{ + double time; + + if (!mRtcInitDone) { + rtc_config(); + mRtcInitDone = 1; + } + + time = mRtcSec; + time += (double)rtc_get_ms() / 1000; + + return time; +} +#endif /* !NO_CRYPT_BENCHMARK */ + +#endif /* WOLFSSL_NRF51 */ diff --git a/client/wolfssl/wolfcrypt/src/port/nxp/ksdk_port.c b/client/wolfssl/wolfcrypt/src/port/nxp/ksdk_port.c new file mode 100644 index 0000000..a5cc737 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/nxp/ksdk_port.c @@ -0,0 +1,1731 @@ +/* ksdk_port.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +/* If FREESCALE_LTC_TFM or FREESCALE_LTC_ECC */ +#if defined(FREESCALE_LTC_TFM) || defined(FREESCALE_LTC_ECC) + +#include +#include +#include +#include +#include + +#define ERROR_OUT(res) { ret = (res); goto done; } + + +int ksdk_port_init(void) +{ +#if defined(FREESCALE_LTC_TFM) + LTC_Init(LTC0); +#endif + + return 0; +} + +/* Reverse array in memory (in place) */ +static void ltc_reverse_array(uint8_t *src, size_t src_len) +{ + unsigned int i; + + for (i = 0; i < src_len / 2; i++) { + uint8_t tmp; + + tmp = src[i]; + src[i] = src[src_len - 1 - i]; + src[src_len - 1 - i] = tmp; + } +} + + +#ifndef WOLFSSL_SP_MATH +/* same as mp_to_unsigned_bin() with mp_reverse() skipped */ +static int mp_to_unsigned_lsb_bin(mp_int *a, unsigned char *b) +{ + int res; + mp_int t; + + res = mp_init_copy(&t, a); + if (res == MP_OKAY) { + res = mp_to_unsigned_bin_at_pos(0, &t, b); + if (res >= 0) + res = 0; + #ifndef USE_FAST_MATH + mp_clear(&t); + #endif + } + + return res; +} +#endif + +static int ltc_get_lsb_bin_from_mp_int(uint8_t *dst, mp_int *A, uint16_t *psz) +{ + int res; + uint16_t sz; + + sz = mp_unsigned_bin_size(A); +#ifndef WOLFSSL_SP_MATH + res = mp_to_unsigned_lsb_bin(A, dst); /* result is lsbyte at lowest addr as required by LTC */ +#else + res = mp_to_unsigned_bin(A, dst); + if (res == MP_OKAY) { + ltc_reverse_array(dst, sz); + } +#endif + *psz = sz; + return res; +} + +/* LTC TFM */ +#if defined(FREESCALE_LTC_TFM) + + +/* these function are used by wolfSSL upper layers (like RSA) */ + +/* c = a * b */ +int mp_mul(mp_int *A, mp_int *B, mp_int *C) +{ + int res = MP_OKAY; + int szA, szB; + szA = mp_unsigned_bin_size(A); + szB = mp_unsigned_bin_size(B); + + /* if unsigned mul can fit into LTC PKHA let's use it, otherwise call software mul */ + if ((szA <= LTC_MAX_INT_BYTES / 2) && (szB <= LTC_MAX_INT_BYTES / 2)) { + int neg = 0; + +#ifndef WOLFSSL_SP_MATH + neg = (A->sign == B->sign) ? MP_ZPOS : MP_NEG; +#endif + + /* unsigned multiply */ + uint8_t *ptrA = (uint8_t *)XMALLOC(LTC_MAX_INT_BYTES, 0, DYNAMIC_TYPE_BIGINT); + uint8_t *ptrB = (uint8_t *)XMALLOC(LTC_MAX_INT_BYTES, 0, DYNAMIC_TYPE_BIGINT); + uint8_t *ptrC = (uint8_t *)XMALLOC(LTC_MAX_INT_BYTES, 0, DYNAMIC_TYPE_BIGINT); + + if (ptrA && ptrB && ptrC) { + uint16_t sizeA, sizeB; + + res = ltc_get_lsb_bin_from_mp_int(ptrA, A, &sizeA); + if (res == MP_OKAY) + res = ltc_get_lsb_bin_from_mp_int(ptrB, B, &sizeB); + if (res == MP_OKAY) { + XMEMSET(ptrC, 0xFF, LTC_MAX_INT_BYTES); + + LTC_PKHA_ModMul(LTC_BASE, ptrA, sizeA, ptrB, sizeB, ptrC, LTC_MAX_INT_BYTES, ptrB, &sizeB, + kLTC_PKHA_IntegerArith, kLTC_PKHA_NormalValue, kLTC_PKHA_NormalValue, + kLTC_PKHA_TimingEqualized); + + ltc_reverse_array(ptrB, sizeB); + res = mp_read_unsigned_bin(C, ptrB, sizeB); + } + } + +#ifndef WOLFSSL_SP_MATH + /* fix sign */ + C->sign = neg; +#endif + if (ptrA) { + XFREE(ptrA, NULL, DYNAMIC_TYPE_BIGINT); + } + if (ptrB) { + XFREE(ptrB, NULL, DYNAMIC_TYPE_BIGINT); + } + if (ptrC) { + XFREE(ptrC, NULL, DYNAMIC_TYPE_BIGINT); + } + } + else { +#ifdef WOLFSSL_SP_MATH + res = sp_mul(A, B, C); +#else + res = wolfcrypt_mp_mul(A, B, C); +#endif + } + return res; +} + +/* c = a mod b, 0 <= c < b */ +int mp_mod(mp_int *a, mp_int *b, mp_int *c) +{ + int res = MP_OKAY; +#if defined(FREESCALE_LTC_TFM_RSA_4096_ENABLE) + int szA, szB; + szA = mp_unsigned_bin_size(a); + szB = mp_unsigned_bin_size(b); + if ((szA <= LTC_MAX_INT_BYTES) && (szB <= LTC_MAX_INT_BYTES)) + { +#endif /* FREESCALE_LTC_TFM_RSA_4096_ENABLE */ + int neg = 0; + uint8_t *ptrA = (uint8_t *)XMALLOC(LTC_MAX_INT_BYTES, 0, DYNAMIC_TYPE_BIGINT); + uint8_t *ptrB = (uint8_t *)XMALLOC(LTC_MAX_INT_BYTES, 0, DYNAMIC_TYPE_BIGINT); + uint8_t *ptrC = (uint8_t *)XMALLOC(LTC_MAX_INT_BYTES, 0, DYNAMIC_TYPE_BIGINT); + +#ifndef WOLFSSL_SP_MATH + /* get sign for the result */ + neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG; +#endif + + /* get remainder of unsigned a divided by unsigned b */ + if (ptrA && ptrB && ptrC) { + uint16_t sizeA, sizeB, sizeC; + + res = ltc_get_lsb_bin_from_mp_int(ptrA, a, &sizeA); + if (res == MP_OKAY) + res = ltc_get_lsb_bin_from_mp_int(ptrB, b, &sizeB); + if (res == MP_OKAY) { + if (kStatus_Success == + LTC_PKHA_ModRed(LTC_BASE, ptrA, sizeA, ptrB, sizeB, ptrC, &sizeC, kLTC_PKHA_IntegerArith)) + { + ltc_reverse_array(ptrC, sizeC); + res = mp_read_unsigned_bin(c, ptrC, sizeC); + } + else { + res = MP_VAL; + } + } + } + else { + res = MP_MEM; + } + +#ifndef WOLFSSL_SP_MATH + /* fix sign */ + c->sign = neg; +#endif + + if (ptrA) { + XFREE(ptrA, NULL, DYNAMIC_TYPE_BIGINT); + } + if (ptrB) { + XFREE(ptrB, NULL, DYNAMIC_TYPE_BIGINT); + } + if (ptrC) { + XFREE(ptrC, NULL, DYNAMIC_TYPE_BIGINT); + } +#if defined(FREESCALE_LTC_TFM_RSA_4096_ENABLE) + } + else { + res = wolfcrypt_mp_mod(a, b, c); + } +#endif /* FREESCALE_LTC_TFM_RSA_4096_ENABLE */ + return res; +} + +/* c = 1/a (mod b) for odd b only */ +int mp_invmod(mp_int *a, mp_int *b, mp_int *c) +{ + int res = MP_OKAY; +#if defined(FREESCALE_LTC_TFM_RSA_4096_ENABLE) + int szA, szB; + szA = mp_unsigned_bin_size(a); + szB = mp_unsigned_bin_size(b); + if ((szA <= LTC_MAX_INT_BYTES) && (szB <= LTC_MAX_INT_BYTES)) { +#endif + uint8_t *ptrA = (uint8_t *)XMALLOC(LTC_MAX_INT_BYTES, 0, DYNAMIC_TYPE_BIGINT); + uint8_t *ptrB = (uint8_t *)XMALLOC(LTC_MAX_INT_BYTES, 0, DYNAMIC_TYPE_BIGINT); + uint8_t *ptrC = (uint8_t *)XMALLOC(LTC_MAX_INT_BYTES, 0, DYNAMIC_TYPE_BIGINT); + + if (ptrA && ptrB && ptrC) { + uint16_t sizeA, sizeB, sizeC; + + res = ltc_get_lsb_bin_from_mp_int(ptrA, a, &sizeA); + if (res == MP_OKAY) + res = ltc_get_lsb_bin_from_mp_int(ptrB, b, &sizeB); + if (res == MP_OKAY) { + if (kStatus_Success == + LTC_PKHA_ModInv(LTC_BASE, ptrA, sizeA, ptrB, sizeB, ptrC, &sizeC, kLTC_PKHA_IntegerArith)) + { + ltc_reverse_array(ptrC, sizeC); + res = mp_read_unsigned_bin(c, ptrC, sizeC); + } + else { + res = MP_VAL; + } + } + } + else { + res = MP_MEM; + } + +#ifndef WOLFSSL_SP_MATH + c->sign = a->sign; +#endif + if (ptrA) { + XFREE(ptrA, NULL, DYNAMIC_TYPE_BIGINT); + } + if (ptrB) { + XFREE(ptrB, NULL, DYNAMIC_TYPE_BIGINT); + } + if (ptrC) { + XFREE(ptrC, NULL, DYNAMIC_TYPE_BIGINT); + } +#if defined(FREESCALE_LTC_TFM_RSA_4096_ENABLE) + } + else { + res = wolfcrypt_mp_invmod(a, b, c); + } +#endif /* FREESCALE_LTC_TFM_RSA_4096_ENABLE */ + return res; +} + +/* d = a * b (mod c) */ +int mp_mulmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d) +{ + int res = MP_OKAY; +#if defined(FREESCALE_LTC_TFM_RSA_4096_ENABLE) + int szA, szB, szC; + szA = mp_unsigned_bin_size(a); + szB = mp_unsigned_bin_size(b); + szC = mp_unsigned_bin_size(c); + if ((szA <= LTC_MAX_INT_BYTES) && (szB <= LTC_MAX_INT_BYTES) && (szC <= LTC_MAX_INT_BYTES)) { +#endif /* FREESCALE_LTC_TFM_RSA_4096_ENABLE */ + mp_int t; + + uint8_t *ptrA = (uint8_t *)XMALLOC(LTC_MAX_INT_BYTES, NULL, DYNAMIC_TYPE_BIGINT); + uint8_t *ptrB = (uint8_t *)XMALLOC(LTC_MAX_INT_BYTES, NULL, DYNAMIC_TYPE_BIGINT); + uint8_t *ptrC = (uint8_t *)XMALLOC(LTC_MAX_INT_BYTES, NULL, DYNAMIC_TYPE_BIGINT); + uint8_t *ptrD = (uint8_t *)XMALLOC(LTC_MAX_INT_BYTES, NULL, DYNAMIC_TYPE_BIGINT); + + /* if A or B is negative, subtract abs(A) or abs(B) from modulus to get positive integer representation of the + * same number */ + res = mp_init(&t); +#ifndef WOLFSSL_SP_MATH + if (a->sign) { + if (res == MP_OKAY) + res = mp_add(a, c, &t); + if (res == MP_OKAY) + res = mp_copy(&t, a); + } + if (b->sign) { + if (res == MP_OKAY) + res = mp_add(b, c, &t); + if (res == MP_OKAY) + res = mp_copy(&t, b); + } +#endif + + if (res == MP_OKAY && ptrA && ptrB && ptrC && ptrD) { + uint16_t sizeA, sizeB, sizeC, sizeD; + + res = ltc_get_lsb_bin_from_mp_int(ptrA, a, &sizeA); + if (res == MP_OKAY) + res = ltc_get_lsb_bin_from_mp_int(ptrB, b, &sizeB); + if (res == MP_OKAY) + res = ltc_get_lsb_bin_from_mp_int(ptrC, c, &sizeC); + + /* (A*B)mod C = ((A mod C) * (B mod C)) mod C */ + if (res == MP_OKAY && LTC_PKHA_CompareBigNum(ptrA, sizeA, ptrC, sizeC) >= 0) { + if (kStatus_Success != + LTC_PKHA_ModRed(LTC_BASE, ptrA, sizeA, ptrC, sizeC, ptrA, &sizeA, kLTC_PKHA_IntegerArith)) + { + res = MP_VAL; + } + } + if (res == MP_OKAY && (LTC_PKHA_CompareBigNum(ptrB, sizeB, ptrC, sizeC) >= 0)) + { + if (kStatus_Success != + LTC_PKHA_ModRed(LTC_BASE, ptrB, sizeB, ptrC, sizeC, ptrB, &sizeB, kLTC_PKHA_IntegerArith)) + { + res = MP_VAL; + } + } + + if (res == MP_OKAY) { + if (kStatus_Success != LTC_PKHA_ModMul(LTC_BASE, ptrA, sizeA, ptrB, sizeB, ptrC, sizeC, ptrD, &sizeD, + kLTC_PKHA_IntegerArith, kLTC_PKHA_NormalValue, + kLTC_PKHA_NormalValue, kLTC_PKHA_TimingEqualized)) + { + res = MP_VAL; + } + } + + if (res == MP_OKAY) { + ltc_reverse_array(ptrD, sizeD); + res = mp_read_unsigned_bin(d, ptrD, sizeD); + } + } + else { + res = MP_MEM; + } + + if (ptrA) { + XFREE(ptrA, NULL, DYNAMIC_TYPE_BIGINT); + } + if (ptrB) { + XFREE(ptrB, NULL, DYNAMIC_TYPE_BIGINT); + } + if (ptrC) { + XFREE(ptrC, NULL, DYNAMIC_TYPE_BIGINT); + } + if (ptrD) { + XFREE(ptrD, NULL, DYNAMIC_TYPE_BIGINT); + } + #ifndef USE_FAST_MATH + mp_clear(&t); + #endif +#if defined(FREESCALE_LTC_TFM_RSA_4096_ENABLE) + } + else { + res = wolfcrypt_mp_mulmod(a, b, c, d); + } +#endif /* FREESCALE_LTC_TFM_RSA_4096_ENABLE */ + return res; +} + +/* Y = G^X mod P */ +int mp_exptmod(mp_int *G, mp_int *X, mp_int *P, mp_int *Y) +{ + int res = MP_OKAY; +#if defined(FREESCALE_LTC_TFM_RSA_4096_ENABLE) + int szA, szB, szC; + mp_int tmp; + + /* if G cannot fit into LTC_PKHA, reduce it */ + szA = mp_unsigned_bin_size(G); + if (szA > LTC_MAX_INT_BYTES) { + res = mp_init(&tmp); + if (res != MP_OKAY) + return res; + if ((res = mp_mod(G, P, &tmp)) != MP_OKAY) { + return res; + } + G = &tmp; + szA = mp_unsigned_bin_size(G); + } + + szB = mp_unsigned_bin_size(X); + szC = mp_unsigned_bin_size(P); + + if ((szA <= LTC_MAX_INT_BYTES) && (szB <= LTC_MAX_INT_BYTES) && (szC <= LTC_MAX_INT_BYTES)) { +#endif /* FREESCALE_LTC_TFM_RSA_4096_ENABLE */ + mp_int t; + + uint16_t sizeG, sizeX, sizeP; + uint8_t *ptrG = (uint8_t *)XMALLOC(LTC_MAX_INT_BYTES, 0, DYNAMIC_TYPE_BIGINT); + uint8_t *ptrX = (uint8_t *)XMALLOC(LTC_MAX_INT_BYTES, 0, DYNAMIC_TYPE_BIGINT); + uint8_t *ptrP = (uint8_t *)XMALLOC(LTC_MAX_INT_BYTES, 0, DYNAMIC_TYPE_BIGINT); + + /* if G is negative, add modulus to convert to positive number for LTC */ + res = mp_init(&t); +#ifndef WOLFSSL_SP_MATH + if (G->sign) { + if (res == MP_OKAY) + res = mp_add(G, P, &t); + if (res == MP_OKAY) + res = mp_copy(&t, G); + } +#endif + + if (res == MP_OKAY && ptrG && ptrX && ptrP) { + res = ltc_get_lsb_bin_from_mp_int(ptrG, G, &sizeG); + if (res == MP_OKAY) + res = ltc_get_lsb_bin_from_mp_int(ptrX, X, &sizeX); + if (res == MP_OKAY) + res = ltc_get_lsb_bin_from_mp_int(ptrP, P, &sizeP); + + /* if number if greater that modulo, we must first reduce due to LTC requirement on modular exponentiaton */ + /* it needs number less than modulus. */ + /* we can take advantage of modular arithmetic rule that: A^B mod C = ( (A mod C)^B ) mod C + and so we do first (A mod N) : LTC does not give size requirement on A versus N, + and then the modular exponentiation. + */ + /* if G >= P then */ + if (res == MP_OKAY && LTC_PKHA_CompareBigNum(ptrG, sizeG, ptrP, sizeP) >= 0) { + res = (int)LTC_PKHA_ModRed(LTC_BASE, ptrG, sizeG, ptrP, sizeP, ptrG, &sizeG, kLTC_PKHA_IntegerArith); + + if (res != kStatus_Success) { + res = MP_VAL; + } + } + + if (res == MP_OKAY) { + res = (int)LTC_PKHA_ModExp(LTC_BASE, ptrG, sizeG, ptrP, sizeP, ptrX, sizeX, ptrP, &sizeP, + kLTC_PKHA_IntegerArith, kLTC_PKHA_NormalValue, kLTC_PKHA_TimingEqualized); + + if (res != kStatus_Success) { + res = MP_VAL; + } + else { + ltc_reverse_array(ptrP, sizeP); + res = mp_read_unsigned_bin(Y, ptrP, sizeP); + } + } + } + else { + res = MP_MEM; + } + + if (ptrG) { + XFREE(ptrG, NULL, DYNAMIC_TYPE_BIGINT); + } + if (ptrX) { + XFREE(ptrX, NULL, DYNAMIC_TYPE_BIGINT); + } + if (ptrP) { + XFREE(ptrP, NULL, DYNAMIC_TYPE_BIGINT); + } + #ifndef USE_FAST_MATH + mp_clear(&t); + #endif +#if defined(FREESCALE_LTC_TFM_RSA_4096_ENABLE) + } + else { + res = wolfcrypt_mp_exptmod(G, X, P, Y); + } + +#ifndef USE_FAST_MATH + if (szA > LTC_MAX_INT_BYTES) + mp_clear(&tmp); +#endif +#endif /* FREESCALE_LTC_TFM_RSA_4096_ENABLE */ + return res; +} + +#endif /* FREESCALE_LTC_TFM */ + + +/* ECC */ +#if defined(HAVE_ECC) && defined(FREESCALE_LTC_ECC) + +/* convert from mp_int to LTC integer, as array of bytes of size sz. + * if mp_int has less bytes than sz, add zero bytes at most significant byte positions. + * This is when for example modulus is 32 bytes (P-256 curve) + * and mp_int has only 31 bytes, we add leading zeros + * so that result array has 32 bytes, same as modulus (sz). + */ +static int ltc_get_from_mp_int(uint8_t *dst, mp_int *a, int sz) +{ + int res; + int szbin; + int offset; + + /* check how many bytes are in the mp_int */ + szbin = mp_unsigned_bin_size(a); + + /* compute offset from dst */ + offset = sz - szbin; + if (offset < 0) + offset = 0; + if (offset > sz) + offset = sz; + + /* add leading zeroes */ + if (offset) + XMEMSET(dst, 0, offset); + + /* convert mp_int to array of bytes */ + res = mp_to_unsigned_bin(a, dst + offset); + + if (res == MP_OKAY) { + /* reverse array for LTC direct use */ + ltc_reverse_array(dst, sz); + } + + return res; +} + +/* ECC specs in lsbyte at lowest address format for direct use by LTC PKHA driver functions */ +#if defined(HAVE_ECC192) || defined(HAVE_ALL_CURVES) +#define ECC192 +#endif +#if defined(HAVE_ECC224) || defined(HAVE_ALL_CURVES) +#define ECC224 +#endif +#if !defined(NO_ECC256) || defined(HAVE_ALL_CURVES) +#define ECC256 +#endif +#if defined(HAVE_ECC384) || defined(HAVE_ALL_CURVES) +#define ECC384 +#endif + +/* P-256 */ +#ifdef ECC256 +static const uint8_t ltc_ecc256_modulus[32] = { + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF}; +static const uint8_t ltc_ecc256_r2modn[32] = { + 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFB, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFD, 0xFF, 0xFF, 0xFF, 0x04, 0x00, 0x00, 0x00}; +static const uint8_t ltc_ecc256_aCurveParam[32] = { + 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF}; +static const uint8_t ltc_ecc256_bCurveParam[32] = { + 0x4B, 0x60, 0xD2, 0x27, 0x3E, 0x3C, 0xCE, 0x3B, 0xF6, 0xB0, 0x53, + 0xCC, 0xB0, 0x06, 0x1D, 0x65, 0xBC, 0x86, 0x98, 0x76, 0x55, 0xBD, + 0xEB, 0xB3, 0xE7, 0x93, 0x3A, 0xAA, 0xD8, 0x35, 0xC6, 0x5A}; +#endif + +#ifdef ECC192 +static const uint8_t ltc_ecc192_modulus[24] = { + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; +static const uint8_t ltc_ecc192_r2modn[24] = { + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; +static const uint8_t ltc_ecc192_aCurveParam[24] = { + 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; +static const uint8_t ltc_ecc192_bCurveParam[24] = { + 0xB1, 0xB9, 0x46, 0xC1, 0xEC, 0xDE, 0xB8, 0xFE, 0x49, 0x30, 0x24, 0x72, + 0xAB, 0xE9, 0xA7, 0x0F, 0xE7, 0x80, 0x9C, 0xE5, 0x19, 0x05, 0x21, 0x64}; +#endif + +#ifdef ECC224 +static const uint8_t ltc_ecc224_modulus[28] = { + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; +static const uint8_t ltc_ecc224_r2modn[28] = { + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00}; +static const uint8_t ltc_ecc224_aCurveParam[28] = { + 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; +static const uint8_t ltc_ecc224_bCurveParam[28] = { + 0xB4, 0xFF, 0x55, 0x23, 0x43, 0x39, 0x0B, 0x27, 0xBA, 0xD8, + 0xBF, 0xD7, 0xB7, 0xB0, 0x44, 0x50, 0x56, 0x32, 0x41, 0xF5, + 0xAB, 0xB3, 0x04, 0x0C, 0x85, 0x0A, 0x05, 0xB4}; +#endif + +#ifdef ECC384 +static const uint8_t ltc_ecc384_modulus[48] = { + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; +static const uint8_t ltc_ecc384_r2modn[48] = { + 0x01, 0x00, 0x00, 0x00, 0xfe, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; +static const uint8_t ltc_ecc384_aCurveParam[48] = { + 0xfc, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; +static const uint8_t ltc_ecc384_bCurveParam[48] = { + 0xef, 0x2a, 0xec, 0xd3, 0xed, 0xc8, 0x85, 0x2a, 0x9d, 0xd1, 0x2e, 0x8a, + 0x8d, 0x39, 0x56, 0xc6, 0x5a, 0x87, 0x13, 0x50, 0x8f, 0x08, 0x14, 0x03, + 0x12, 0x41, 0x81, 0xfe, 0x6e, 0x9c, 0x1d, 0x18, 0x19, 0x2d, 0xf8, 0xe3, + 0x6b, 0x05, 0x8e, 0x98, 0xe4, 0xe7, 0x3e, 0xe2, 0xa7, 0x2f, 0x31, 0xb3}; +#endif + +static int ltc_get_ecc_specs(const uint8_t **modulus, const uint8_t **r2modn, + const uint8_t **aCurveParam, const uint8_t **bCurveParam, int size) +{ + switch(size) { + case 32: + *modulus = ltc_ecc256_modulus; + *r2modn = ltc_ecc256_r2modn; + *aCurveParam = ltc_ecc256_aCurveParam; + *bCurveParam = ltc_ecc256_bCurveParam; + break; +#ifdef ECC224 + case 28: + *modulus = ltc_ecc224_modulus; + *r2modn = ltc_ecc224_r2modn; + *aCurveParam = ltc_ecc224_aCurveParam; + *bCurveParam = ltc_ecc224_bCurveParam; + break; +#endif +#ifdef ECC192 + case 24: + *modulus = ltc_ecc192_modulus; + *r2modn = ltc_ecc192_r2modn; + *aCurveParam = ltc_ecc192_aCurveParam; + *bCurveParam = ltc_ecc192_bCurveParam; + break; +#endif +#ifdef HAVE_ECC384 + case 48: + *modulus = ltc_ecc384_modulus; + *r2modn = ltc_ecc384_r2modn; + *aCurveParam = ltc_ecc384_aCurveParam; + *bCurveParam = ltc_ecc384_bCurveParam; + break; +#endif + default: + return -1; + } + return 0; +} + +/** + Perform a point multiplication (timing resistant) + k The scalar to multiply by + G The base point + R [out] Destination for kG + modulus The modulus of the field the ECC curve is in + map Boolean whether to map back to affine or not + (1==map, 0 == leave in projective) + return MP_OKAY on success +*/ +int wc_ecc_mulmod_ex(mp_int *k, ecc_point *G, ecc_point *R, mp_int* a, + mp_int *modulus, int map, void* heap) +{ + ltc_pkha_ecc_point_t B; + uint8_t size; + int szModulus; + int szkbin; + bool point_of_infinity; + status_t status; + int res; + + (void)a; + (void)heap; + + uint8_t Gxbin[LTC_MAX_ECC_BITS / 8]; + uint8_t Gybin[LTC_MAX_ECC_BITS / 8]; + uint8_t kbin[LTC_MAX_INT_BYTES]; + + const uint8_t *modbin; + const uint8_t *aCurveParam; + const uint8_t *bCurveParam; + const uint8_t *r2modn; + + if (k == NULL || G == NULL || R == NULL || modulus == NULL) { + return ECC_BAD_ARG_E; + } + + szModulus = mp_unsigned_bin_size(modulus); + szkbin = mp_unsigned_bin_size(k); + + res = ltc_get_from_mp_int(kbin, k, szkbin); + if (res == MP_OKAY) + res = ltc_get_from_mp_int(Gxbin, G->x, szModulus); + if (res == MP_OKAY) + res = ltc_get_from_mp_int(Gybin, G->y, szModulus); + + if (res != MP_OKAY) + return res; + + size = szModulus; + /* find LTC friendly parameters for the selected curve */ + if (0 != ltc_get_ecc_specs(&modbin, &r2modn, &aCurveParam, &bCurveParam, size)) { + return ECC_BAD_ARG_E; + } + + B.X = &Gxbin[0]; + B.Y = &Gybin[0]; + + status = LTC_PKHA_ECC_PointMul(LTC_BASE, &B, kbin, szkbin, modbin, r2modn, aCurveParam, bCurveParam, size, + kLTC_PKHA_TimingEqualized, kLTC_PKHA_IntegerArith, &B, &point_of_infinity); + if (status != kStatus_Success) { + return MP_VAL; + } + + ltc_reverse_array(Gxbin, size); + ltc_reverse_array(Gybin, size); + res = mp_read_unsigned_bin(R->x, Gxbin, size); + if (res == MP_OKAY) { + res = mp_read_unsigned_bin(R->y, Gybin, size); + /* if k is negative, we compute the multiplication with abs(-k) + * with result (x, y) and modify the result to (x, -y) + */ +#ifndef WOLFSSL_SP_MATH + R->y->sign = k->sign; +#endif + } + if (res == MP_OKAY) + res = mp_set(R->z, 1); + + return res; +} + +int wc_ecc_point_add(ecc_point *mG, ecc_point *mQ, ecc_point *mR, mp_int *m) +{ + int res; + ltc_pkha_ecc_point_t A, B; + int size; + status_t status; + + uint8_t Gxbin[LTC_MAX_ECC_BITS / 8]; + uint8_t Gybin[LTC_MAX_ECC_BITS / 8]; + uint8_t Qxbin[LTC_MAX_ECC_BITS / 8]; + uint8_t Qybin[LTC_MAX_ECC_BITS / 8]; + const uint8_t *modbin; + const uint8_t *aCurveParam; + const uint8_t *bCurveParam; + const uint8_t *r2modn; + + size = mp_unsigned_bin_size(m); + + /* find LTC friendly parameters for the selected curve */ + if (ltc_get_ecc_specs(&modbin, &r2modn, &aCurveParam, &bCurveParam, size) != 0) { + res = ECC_BAD_ARG_E; + } + else { + res = ltc_get_from_mp_int(Gxbin, mG->x, size); + if (res == MP_OKAY) + res = ltc_get_from_mp_int(Gybin, mG->y, size); + if (res == MP_OKAY) + res = ltc_get_from_mp_int(Qxbin, mQ->x, size); + if (res == MP_OKAY) + res = ltc_get_from_mp_int(Qybin, mQ->y, size); + + if (res != MP_OKAY) + return res; + + A.X = Gxbin; + A.Y = Gybin; + + B.X = Qxbin; + B.Y = Qybin; + + status = LTC_PKHA_ECC_PointAdd(LTC_BASE, &A, &B, modbin, r2modn, aCurveParam, bCurveParam, size, + kLTC_PKHA_IntegerArith, &A); + if (status != kStatus_Success) { + res = MP_VAL; + } + else { + ltc_reverse_array(Gxbin, size); + ltc_reverse_array(Gybin, size); + res = mp_read_unsigned_bin(mR->x, Gxbin, size); + if (res == MP_OKAY) + res = mp_read_unsigned_bin(mR->y, Gybin, size); + if (res == MP_OKAY) + res = mp_set(mR->z, 1); + } + } + return res; +} + +#if defined(HAVE_ED25519) || defined(HAVE_CURVE25519) +/* Weierstrass parameters of prime 2^255 - 19 */ +static const uint8_t curve25519_modbin[32] = { + 0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f}; +/* precomputed R2modN for the curve25519 */ +static const uint8_t r2mod[32] = { + 0xa4, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + +/* invThree = ModInv(3,curve25519_modbin) in LSB first */ +static const uint8_t invThree[32] = { + 0x49, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, + 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, + 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55}; + +/* + * + * finds square root in finite field when modulus congruent to 5 modulo 8 + * this is fixed to curve25519 modulus 2^255 - 19 which is congruent to 5 modulo 8 + * + * This function solves equation: res^2 = a mod (2^255 - 19) + * +p = prime +p % 8 must be 5 + +v = ModularArithmetic.powmod(2*a, (p-5)/8, p) +i = (2*a*v**2) % p +r1 = 1*a*v*(i - 1) % p +r2 = -1*a*v*(i - 1) % p +puts "Gy=0x#{r2.to_s(16)}" + */ +status_t LTC_PKHA_Prime25519SquareRootMod(const uint8_t *A, size_t sizeA, + uint8_t *res, size_t *szRes, int sign) +{ + status_t status; + const uint8_t curve25519_param[] = { + 0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0f}; + uint8_t twoA[sizeof(curve25519_modbin)] = {0}; + uint8_t V[sizeof(curve25519_modbin)] = {0}; + uint8_t I[sizeof(curve25519_modbin)] = {0}; + uint8_t VV[sizeof(curve25519_modbin)] = {0}; + uint16_t szTwoA = 0; + uint16_t szV = 0; + uint16_t szVV = 0; + uint16_t szI = 0; + uint16_t szRes16 = 0; + uint8_t one = 1; + + /* twoA = 2*A % p */ + status = LTC_PKHA_ModAdd(LTC_BASE, A, sizeA, A, sizeA, curve25519_modbin, + sizeof(curve25519_modbin), twoA, &szTwoA, kLTC_PKHA_IntegerArith); + + /* V = ModularArithmetic.powmod(twoA, (p-5)/8, p) */ + if (status == kStatus_Success) { + status = + LTC_PKHA_ModExp(LTC_BASE, twoA, szTwoA, curve25519_modbin, + sizeof(curve25519_modbin), curve25519_param, + sizeof(curve25519_param), V, &szV, kLTC_PKHA_IntegerArith, + kLTC_PKHA_NormalValue, kLTC_PKHA_TimingEqualized); + } + + /* VV = V*V % p */ + if (status == kStatus_Success) { + status = LTC_PKHA_ModMul(LTC_BASE, V, szV, V, szV, curve25519_modbin, + sizeof(curve25519_modbin), VV, &szVV, kLTC_PKHA_IntegerArith, + kLTC_PKHA_NormalValue, kLTC_PKHA_NormalValue, + kLTC_PKHA_TimingEqualized); + } + + /* I = twoA * VV = 2*A*V*V % p */ + if (status == kStatus_Success) { + status = LTC_PKHA_ModMul(LTC_BASE, twoA, szTwoA, VV, szVV, + curve25519_modbin, sizeof(curve25519_modbin), I, &szI, + kLTC_PKHA_IntegerArith, kLTC_PKHA_NormalValue, + kLTC_PKHA_NormalValue, kLTC_PKHA_TimingEqualized); + } + + /* I = I - 1 */ + XMEMSET(VV, 0xff, sizeof(VV)); /* just temp for maximum integer - for non-modular subtract */ + if (0 <= LTC_PKHA_CompareBigNum(I, szI, &one, sizeof(one))) { + if (status == kStatus_Success) { + status = LTC_PKHA_ModSub1(LTC_BASE, I, szI, &one, sizeof(one), + VV, sizeof(VV), I, &szI); + } + } + else { + if (status == kStatus_Success) { + status = LTC_PKHA_ModSub1(LTC_BASE, curve25519_modbin, + sizeof(curve25519_modbin), &one, sizeof(one), VV, sizeof(VV), I, + &szI); + } + } + + /* res = a*v mod p */ + status = LTC_PKHA_ModMul(LTC_BASE, A, sizeA, V, szV, curve25519_modbin, + sizeof(curve25519_modbin), res, &szRes16, kLTC_PKHA_IntegerArith, + kLTC_PKHA_NormalValue, kLTC_PKHA_NormalValue, + kLTC_PKHA_TimingEqualized); + + /* res = res * (i-1) mod p */ + if (status == kStatus_Success) { + status = LTC_PKHA_ModMul(LTC_BASE, res, szRes16, I, szI, + curve25519_modbin, sizeof(curve25519_modbin), res, &szRes16, + kLTC_PKHA_IntegerArith, kLTC_PKHA_NormalValue, + kLTC_PKHA_NormalValue, kLTC_PKHA_TimingEqualized); + } + + /* if X mod 2 != X_0 then we need the -X + * + * X mod 2 get from LSB bit0 + */ + if ((status == kStatus_Success) && + ((bool)sign != (bool)(res[0] & 0x01u))) + { + status = LTC_PKHA_ModSub1(LTC_BASE, curve25519_modbin, + sizeof(curve25519_modbin), res, szRes16, VV, sizeof(VV), res, + &szRes16); /* -a = p - a */ + } + + if (status == kStatus_Success) { + *szRes = szRes16; + } + + return status; +} +#endif /* HAVE_ED25519 || HAVE_CURVE25519 */ + + +#ifdef HAVE_CURVE25519 + +/* for LTC we need Weierstrass format of curve25519 parameters + * these two are base point X and Y. + * in LSB first format (native for LTC) + */ +static const ECPoint ecBasePoint = { + {0x5a, 0x24, 0xad, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0x2a}, + {0xd9, 0xd3, 0xce, 0x7e, 0xa2, 0xc5, 0xe9, 0x29, 0xb2, 0x61, 0x7c, + 0x6d, 0x7e, 0x4d, 0x3d, 0x92, 0x4c, 0xd1, 0x48, 0x77, 0x2c, 0xdd, + 0x1e, 0xe0, 0xb4, 0x86, 0xa0, 0xb8, 0xa1, 0x19, 0xae, 0x20}, +}; + +const ECPoint *wc_curve25519_GetBasePoint(void) +{ + return &ecBasePoint; +} + +static const uint8_t curve25519_aCurveParam[CURVE25519_KEYSIZE] = { + 0x44, 0xa1, 0x14, 0x49, 0x98, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0x2a}; + +static const uint8_t curve_bCurveParam[CURVE25519_KEYSIZE] = { + 0x64, 0xc8, 0x10, 0x77, 0x9c, 0x5e, 0x0b, 0x26, 0xb4, 0x97, 0xd0, + 0x5e, 0x42, 0x7b, 0x09, 0xed, + 0x25, 0xb4, 0x97, 0xd0, 0x5e, 0x42, 0x7b, 0x09, 0xed, 0x25, 0xb4, + 0x97, 0xd0, 0x5e, 0x42, 0x7b}; + +/* transform a point on Montgomery curve to a point on Weierstrass curve */ +status_t LTC_PKHA_Curve25519ToWeierstrass( + const ltc_pkha_ecc_point_t *ltcPointIn,ltc_pkha_ecc_point_t *ltcPointOut) +{ + /* offset X point (in Montgomery) so that it becomes Weierstrass */ + const uint8_t offset[] = { + 0x51, 0x24, 0xad, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0x2a}; + uint16_t sizeRes = 0; + status_t status; + status = LTC_PKHA_ModAdd(LTC_BASE, ltcPointIn->X, CURVE25519_KEYSIZE, + offset, sizeof(offset), curve25519_modbin, CURVE25519_KEYSIZE, + ltcPointOut->X, &sizeRes, kLTC_PKHA_IntegerArith); + + if (status == kStatus_Success) { + if (ltcPointOut->Y != ltcPointIn->Y) { + XMEMCPY(ltcPointOut->Y, ltcPointIn->Y, CURVE25519_KEYSIZE); + } + } + + return status; +} + +/* transform a point on Weierstrass curve to a point on Montgomery curve */ +status_t LTC_PKHA_WeierstrassToCurve25519( + const ltc_pkha_ecc_point_t *ltcPointIn, ltc_pkha_ecc_point_t *ltcPointOut) +{ + status_t status; + uint16_t resultSize = 0; + const uint8_t three = 0x03; + + status = LTC_PKHA_ModMul(LTC_BASE, &three, sizeof(three), ltcPointIn->X, + CURVE25519_KEYSIZE, curve25519_modbin, CURVE25519_KEYSIZE, + ltcPointOut->X, &resultSize, kLTC_PKHA_IntegerArith, + kLTC_PKHA_NormalValue, kLTC_PKHA_NormalValue, + kLTC_PKHA_TimingEqualized); + + if (status == kStatus_Success) { + const uint8_t A[] = {0x06, 0x6d, 0x07}; + if (LTC_PKHA_CompareBigNum(ltcPointOut->X, resultSize, A, sizeof(A))) { + status = LTC_PKHA_ModSub1(LTC_BASE, ltcPointOut->X, resultSize, A, + sizeof(A), curve25519_modbin, CURVE25519_KEYSIZE, + ltcPointOut->X, &resultSize); + } + else { + status = LTC_PKHA_ModSub2(LTC_BASE, ltcPointOut->X, resultSize, A, + sizeof(A), curve25519_modbin, CURVE25519_KEYSIZE, + ltcPointOut->X, &resultSize); + } + } + + if (status == kStatus_Success) { + status = LTC_PKHA_ModMul(LTC_BASE, invThree, CURVE25519_KEYSIZE, + ltcPointOut->X, resultSize, curve25519_modbin, CURVE25519_KEYSIZE, + ltcPointOut->X, &resultSize, kLTC_PKHA_IntegerArith, + kLTC_PKHA_NormalValue, kLTC_PKHA_NormalValue, + kLTC_PKHA_TimingEqualized); + } + + if (status == kStatus_Success) { + if (ltcPointOut->Y != ltcPointIn->Y) { + XMEMCPY(ltcPointOut->Y, ltcPointIn->Y, CURVE25519_KEYSIZE); + } + } + + return status; +} + +/* Y = square root (X^3 + 486662*X^2 + X) */ +status_t LTC_PKHA_Curve25519ComputeY(ltc_pkha_ecc_point_t *ltcPoint) +{ + uint8_t three = 3; + const uint8_t A[] = {0x06, 0x6d, 0x07}; + uint8_t U[CURVE25519_KEYSIZE] = {0}; + uint8_t X2[CURVE25519_KEYSIZE] = {0}; + uint16_t sizeU = 0; + uint16_t sizeX2 = 0; + size_t szRes = 0; + status_t status; + + /* X^3 */ + status = LTC_PKHA_ModExp(LTC_BASE, ltcPoint->X, CURVE25519_KEYSIZE, + curve25519_modbin, CURVE25519_KEYSIZE, &three, 1, U, &sizeU, + kLTC_PKHA_IntegerArith, kLTC_PKHA_NormalValue, + kLTC_PKHA_TimingEqualized); + + /* X^2 */ + if (status == kStatus_Success) { + status = LTC_PKHA_ModMul(LTC_BASE, ltcPoint->X, CURVE25519_KEYSIZE, + ltcPoint->X, CURVE25519_KEYSIZE, curve25519_modbin, + CURVE25519_KEYSIZE, X2, &sizeX2, kLTC_PKHA_IntegerArith, + kLTC_PKHA_NormalValue, kLTC_PKHA_NormalValue, + kLTC_PKHA_TimingEqualized); + } + + /* 486662*X^2 */ + if (status == kStatus_Success) { + status = LTC_PKHA_ModMul(LTC_BASE, A, sizeof(A), X2, sizeX2, + curve25519_modbin, CURVE25519_KEYSIZE, X2, &sizeX2, + kLTC_PKHA_IntegerArith, kLTC_PKHA_NormalValue, + kLTC_PKHA_NormalValue, kLTC_PKHA_TimingEqualized); + } + + /* X^3 + 486662*X^2 */ + if (status == kStatus_Success) { + status = LTC_PKHA_ModAdd(LTC_BASE, U, sizeU, X2, sizeX2, + curve25519_modbin, CURVE25519_KEYSIZE, U, &sizeU, + kLTC_PKHA_IntegerArith); + } + + /* U = X^3 + 486662*X^2 + X */ + if (status == kStatus_Success) { + status = LTC_PKHA_ModAdd(LTC_BASE, U, sizeU, ltcPoint->X, + CURVE25519_KEYSIZE, curve25519_modbin, CURVE25519_KEYSIZE, U, + &sizeU, kLTC_PKHA_IntegerArith); + } + + /* Y = modular square root of U (U is Y^2) */ + if (status == kStatus_Success) { + status = LTC_PKHA_Prime25519SquareRootMod(U, sizeU, ltcPoint->Y, + &szRes, 1); + } + + return status; +} + +/* Q = n*P */ +/* if type is set, the input point p is in Montgomery curve coordinates, + so there is a map to Weierstrass curve */ +/* q output point is always in Montgomery curve coordinates */ +int wc_curve25519(ECPoint *q, byte *n, const ECPoint *p, fsl_ltc_ecc_coordinate_system_t type) +{ + status_t status; + ltc_pkha_ecc_point_t ltcPoint; + ltc_pkha_ecc_point_t ltcPointOut; + ECPoint pIn = {{0}}; + + XMEMCPY(&pIn, p, sizeof(*p)); + ltcPoint.X = &pIn.point[0]; + ltcPoint.Y = &pIn.pointY[0]; + + /* if input point P is on Curve25519 Montgomery curve, transform + it to Weierstrass equivalent */ + if (type == kLTC_Curve25519) { + LTC_PKHA_Curve25519ToWeierstrass(<cPoint, <cPoint); + } + + ltcPointOut.X = &q->point[0]; + ltcPointOut.Y = &q->pointY[0]; + /* curve25519_modbin, r2mod, curve25519_aCurveParam, curve25519_bCurveParam + * are Weierstrass equivalent with Curve25519 */ + status = LTC_PKHA_ECC_PointMul(LTC_BASE, <cPoint, n, CURVE25519_KEYSIZE, + curve25519_modbin, r2mod, curve25519_aCurveParam, + curve25519_bCurveParam, CURVE25519_KEYSIZE, kLTC_PKHA_TimingEqualized, + kLTC_PKHA_IntegerArith, <cPointOut, NULL); + + /* now need to map from Weierstrass form to Montgomery form */ + if (status == kStatus_Success) { + status = LTC_PKHA_WeierstrassToCurve25519(<cPointOut, <cPointOut); + } + + return (status == kStatus_Success) ? 0 : IS_POINT_E; +} + +#endif /* HAVE_CURVE25519 */ + + +#ifdef HAVE_ED25519 +/* a and d are Edwards curve parameters -1 and -121665/121666 prime is 2^255 - 19. + * + * https://en.wikipedia.org/wiki/Montgomery_curve#Equivalence_with_Edward_curves + */ + +/* d parameter of ed25519 */ +static const uint8_t d_coefEd25519[] = { + 0xa3, 0x78, 0x59, 0x13, 0xca, 0x4d, 0xeb, 0x75, 0xab, 0xd8, 0x41, + 0x41, 0x4d, 0x0a, 0x70, 0x00, 0x98, 0xe8, 0x79, 0x77, 0x79, 0x40, + 0xc7, 0x8c, 0x73, 0xfe, 0x6f, 0x2b, 0xee, 0x6c, 0x03, 0x52}; + +/* Montgomery curve parameter A for a Montgomery curve equivalent with ed25519 */ +static const uint8_t A_coefEd25519[] = { + 0x06, 0x6d, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + +/* Montgomery curve parameter B for a Montgomery curve equivalent with ed25519 */ +static const uint8_t B_coefEd25519[] = { + 0xe5, 0x92, 0xf8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f}; + +/* these are pre-computed constants used in computations */ + +/* = 3*B */ +static const uint8_t threeB_coefEd25519[] = { + 0xd5, 0xb8, 0xe9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f}; + +/* = -A */ +static const uint8_t minus_A_coefEd25519[] = { + 0xe7, 0x92, 0xf8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f}; + +/* = 1/B */ +static const uint8_t invB_coefEd25519[] = { +0xc4, 0xa1, 0x29, 0x7b, 0x8d, 0x2c, 0x85, 0x22, 0xd5, 0x89, 0xaf, + 0xaf, 0x6c, 0xfd, 0xe3, 0xff, 0xd9, 0x85, 0x21, 0xa2, 0xe1, 0x2f, + 0xce, 0x1c, 0x63, 0x00, 0x24, 0x75, 0xc4, 0x24, 0x7f, 0x6b}; + +/* = 1/(3*B) */ +static const uint8_t A_mul_invThreeB_coefEd25519[] = { + 0xb9, 0x3e, 0xe4, 0xad, 0xa1, 0x37, 0xa7, 0x93, 0x1c, 0xa4, 0x35, + 0xe0, 0x0c, 0x57, 0xbd, 0xaa, 0x6e, 0x51, 0x94, 0x3e, 0x14, 0xe0, + 0xcb, 0xec, 0xbd, 0xff, 0xe7, 0xb1, 0x27, 0x92, 0x00, 0x63}; + +/* Weierstrass curve parameter a for a Weierstrass curve equivalent with ed25519 */ +static const uint8_t a_coefEd25519[] = { + 0x2d, 0x17, 0xbc, 0xf8, 0x8e, 0xe1, 0x71, 0xac, 0xf7, 0x2a, 0xa5, + 0x0c, 0x5d, 0xb6, 0xb8, 0x6b, 0xd6, 0x3d, 0x7b, 0x61, 0x0d, 0xe1, + 0x97, 0x31, 0xe6, 0xbe, 0xb9, 0xa5, 0xd3, 0xac, 0x4e, 0x5d}; + +/* Weierstrass curve parameter b for a Weierstrass curve equivalent with ed25519 */ +static const uint8_t b_coefEd25519[] = { + 0xa4, 0xb2, 0x64, 0xf3, 0xc1, 0xeb, 0x04, 0x90, 0x32, 0xbc, 0x9f, + 0x6b, 0x97, 0x31, 0x48, 0xf5, 0xd5, 0x80, 0x57, 0x10, 0x06, 0xdb, + 0x0d, 0x55, 0xe0, 0xb3, 0xd0, 0xcf, 0x9b, 0xb2, 0x11, 0x1d}; + +/* Ed25519 basepoint B mapped to Weierstrass equivalent */ +static uint8_t Wx_Ed25519[ED25519_KEY_SIZE] = { + 0x35, 0xef, 0x5a, 0x02, 0x9b, 0xc8, 0x55, 0xca, 0x9a, 0x7c, 0x61, + 0x0d, 0xdf, 0x3f, 0xc1, 0xa9, 0x18, 0x06, 0xc2, 0xf1, 0x02, 0x8f, + 0x0b, 0xf0, 0x39, 0x03, 0x2c, 0xd0, 0x0f, 0xdd, 0x78, 0x2a}; +static uint8_t Wy_Ed25519[ED25519_KEY_SIZE] = { + 0x14, 0x1d, 0x2c, 0xf6, 0xf3, 0x30, 0x78, 0x9b, 0x65, 0x31, 0x71, + 0x80, 0x61, 0xd0, 0x6f, 0xcf, 0x23, 0x83, 0x79, 0x63, 0xa5, 0x3b, + 0x48, 0xbe, 0x2e, 0xa2, 0x1d, 0xc7, 0xa5, 0x44, 0xc6, 0x29}; + +static const ltc_pkha_ecc_point_t basepointEd25519 = { + Wx_Ed25519, Wy_Ed25519, +}; + +const ltc_pkha_ecc_point_t *LTC_PKHA_Ed25519_BasePoint(void) +{ + return &basepointEd25519; +} + +/* input point is on Weierstrass curve, typeOut determines the coordinates + system of output point (either Weierstrass or Ed25519) */ +status_t LTC_PKHA_Ed25519_PointMul(const ltc_pkha_ecc_point_t *ltcPointIn, + const uint8_t *N, + size_t sizeN, + ltc_pkha_ecc_point_t *ltcPointOut, + fsl_ltc_ecc_coordinate_system_t typeOut) +{ + uint16_t szN = (uint16_t)sizeN; + status_t status; + /* input on W, output in W, W parameters of ECC curve are Ed25519 curve + parameters mapped to Weierstrass curve */ + status = LTC_PKHA_ECC_PointMul(LTC_BASE, ltcPointIn, N, szN, + curve25519_modbin, r2mod, a_coefEd25519, b_coefEd25519, + ED25519_KEY_SIZE, kLTC_PKHA_TimingEqualized, kLTC_PKHA_IntegerArith, + ltcPointOut, NULL); + + /* Weierstrass coordinates to Ed25519 coordinates */ + if ((status == kStatus_Success) && (typeOut == kLTC_Ed25519)) { + status = LTC_PKHA_WeierstrassToEd25519(ltcPointOut, ltcPointOut); + } + return status; +} + +status_t LTC_PKHA_Ed25519ToWeierstrass(const ltc_pkha_ecc_point_t *ltcPointIn, + ltc_pkha_ecc_point_t *ltcPointOut) +{ + status_t status; + uint8_t Mx[ED25519_KEY_SIZE] = {0}; + uint8_t My[ED25519_KEY_SIZE] = {0}; + uint8_t temp[ED25519_KEY_SIZE] = {0}; + uint8_t temp2[ED25519_KEY_SIZE] = {0}; + const uint8_t max[32] = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + const uint8_t *Ex; + const uint8_t *Ey; + uint8_t *Gx; + uint8_t *Gy; + uint16_t szMx = 0; + uint16_t szGx = 0; + uint16_t szMy = 0; + uint16_t szGy = 0; + uint16_t szTemp = 0; + uint16_t szTemp2 = 0; + uint8_t one = 1; + + Ex = ltcPointIn->X; + Ey = ltcPointIn->Y; + Gx = ltcPointOut->X; + Gy = ltcPointOut->Y; + /* # (Ex, Ey) on Ed (a_ed, d) to (x, y) on M (A,B) + Mx = (1 + Ey) * ModularArithmetic.invert(1 - Ey, prime) % prime + My = (1 + Ey) * ModularArithmetic.invert((1 - Ey)*Ex, prime) % prime */ + + /* Gx = ((Mx * ModularArithmetic.invert(B, prime)) + + (A * ModularArithmetic.invert(3*B, prime))) % prime + Gy = (My * ModularArithmetic.invert(B, prime)) % prime */ + + /* temp = 1 + Ey */ + status = LTC_PKHA_ModAdd(LTC_BASE, Ey, ED25519_KEY_SIZE, &one, sizeof(one), + curve25519_modbin, sizeof(curve25519_modbin), temp, &szTemp, + kLTC_PKHA_IntegerArith); + + /* temp2 = 1 - Ey = 1 + (p - Ey) */ + if (status == kStatus_Success) { + status = LTC_PKHA_ModSub1(LTC_BASE, curve25519_modbin, + sizeof(curve25519_modbin), Ey, ED25519_KEY_SIZE, max, sizeof(max), + temp2, &szTemp2); + } + if (status == kStatus_Success) { + status = LTC_PKHA_ModAdd(LTC_BASE, temp2, szTemp2, &one, sizeof(one), + curve25519_modbin, sizeof(curve25519_modbin), temp2, &szTemp2, + kLTC_PKHA_IntegerArith); + } + + /* Mx = ModInv(temp2,prime) */ + if (status == kStatus_Success) { + status = LTC_PKHA_ModInv(LTC_BASE, temp2, szTemp2, curve25519_modbin, + sizeof(curve25519_modbin), Mx, &szMx, kLTC_PKHA_IntegerArith); + } + + /* Mx = Mx * temp */ + if (status == kStatus_Success) { + status = LTC_PKHA_ModMul(LTC_BASE, Mx, szMx, temp, szTemp, + curve25519_modbin, ED25519_KEY_SIZE, Mx, &szMx, + kLTC_PKHA_IntegerArith, kLTC_PKHA_NormalValue, + kLTC_PKHA_NormalValue, kLTC_PKHA_TimingEqualized); + } + + /* My = temp2 * Ex */ + if (status == kStatus_Success) { + status = LTC_PKHA_ModMul(LTC_BASE, Ex, ED25519_KEY_SIZE, temp2, + szTemp2, curve25519_modbin, ED25519_KEY_SIZE, My, &szMy, + kLTC_PKHA_IntegerArith, kLTC_PKHA_NormalValue, + kLTC_PKHA_NormalValue, kLTC_PKHA_TimingEqualized); + } + + /* My = ModInv(My, prime) */ + if (status == kStatus_Success) { + status = LTC_PKHA_ModInv(LTC_BASE, My, szMy, curve25519_modbin, + sizeof(curve25519_modbin), My, &szMy, kLTC_PKHA_IntegerArith); + } + /* My = My * temp */ + if (status == kStatus_Success) { + status = LTC_PKHA_ModMul(LTC_BASE, My, szMy, temp, szTemp, + curve25519_modbin, ED25519_KEY_SIZE, My, &szMy, + kLTC_PKHA_IntegerArith, kLTC_PKHA_NormalValue, + kLTC_PKHA_NormalValue, kLTC_PKHA_TimingEqualized); + } + + /* Gx = Mx * invB_coefEd25519 + A_mul_invThreeB_coefEd25519 */ + if (status == kStatus_Success) { + status = LTC_PKHA_ModMul(LTC_BASE, Mx, szMx, invB_coefEd25519, + sizeof(invB_coefEd25519), curve25519_modbin, ED25519_KEY_SIZE, Gx, + &szGx, kLTC_PKHA_IntegerArith, kLTC_PKHA_NormalValue, + kLTC_PKHA_NormalValue, kLTC_PKHA_TimingEqualized); + } + if (status == kStatus_Success) { + status = LTC_PKHA_ModAdd(LTC_BASE, Gx, szGx, + A_mul_invThreeB_coefEd25519, sizeof(A_mul_invThreeB_coefEd25519), + curve25519_modbin, sizeof(curve25519_modbin), Gx, &szGx, + kLTC_PKHA_IntegerArith); + } + + /* Gy = My * invB_coefEd25519 */ + if (status == kStatus_Success) { + status = LTC_PKHA_ModMul(LTC_BASE, My, szMy, invB_coefEd25519, + sizeof(invB_coefEd25519), curve25519_modbin, ED25519_KEY_SIZE, Gy, + &szGy, kLTC_PKHA_IntegerArith, kLTC_PKHA_NormalValue, + kLTC_PKHA_NormalValue, kLTC_PKHA_TimingEqualized); + } + + return status; +} + +/* +# (Gx, Gy) on W to (Ex, Ey) on E +My = (B*Gy) % prime +Mx = ((3*B*Gx-A)*ModularArithmetic.invert(3, prime)) % prime +Ex = Mx*ModularArithmetic.invert(My, prime) % prime +Ey = (Mx - 1)*ModularArithmetic.invert(Mx + 1, prime) % prime +*/ +status_t LTC_PKHA_WeierstrassToEd25519(const ltc_pkha_ecc_point_t *ltcPointIn, + ltc_pkha_ecc_point_t *ltcPointOut) +{ + status_t status; + uint8_t Mx[ED25519_KEY_SIZE] = {0}; + uint8_t My[ED25519_KEY_SIZE] = {0}; + uint8_t temp[ED25519_KEY_SIZE] = {0}; + const uint8_t *Gx; + const uint8_t *Gy; + uint8_t *Ex; + uint8_t *Ey; + uint16_t szMx = 0; + uint16_t szEx = 0; + uint16_t szMy = 0; + uint16_t szEy = 0; + uint16_t szTemp = 0; + uint8_t one = 1; + + Gx = ltcPointIn->X; + Gy = ltcPointIn->Y; + Ex = ltcPointOut->X; + Ey = ltcPointOut->Y; + + /* My = (B*Gy) % prime */ + status = LTC_PKHA_ModMul(LTC_BASE, B_coefEd25519, sizeof(B_coefEd25519), + Gy, ED25519_KEY_SIZE, curve25519_modbin, ED25519_KEY_SIZE, My, &szMy, + kLTC_PKHA_IntegerArith, kLTC_PKHA_NormalValue, kLTC_PKHA_NormalValue, + kLTC_PKHA_TimingEqualized); + + /* temp = 3*B*Gx mod p */ + if (status == kStatus_Success) { + status = LTC_PKHA_ModMul(LTC_BASE, threeB_coefEd25519, + sizeof(threeB_coefEd25519), Gx, ED25519_KEY_SIZE, curve25519_modbin, + ED25519_KEY_SIZE, temp, &szTemp, kLTC_PKHA_IntegerArith, + kLTC_PKHA_NormalValue, kLTC_PKHA_NormalValue, + kLTC_PKHA_TimingEqualized); + } + /* temp = (temp - A) mod p */ + if (status == kStatus_Success) { + status = LTC_PKHA_ModAdd(LTC_BASE, temp, szTemp, minus_A_coefEd25519, + sizeof(minus_A_coefEd25519), curve25519_modbin, + sizeof(curve25519_modbin), temp, &szTemp, kLTC_PKHA_IntegerArith); + } + /* Mx = (temp/3) mod p */ + if (status == kStatus_Success) { + status = LTC_PKHA_ModMul(LTC_BASE, temp, szTemp, invThree, + sizeof(invThree), curve25519_modbin, sizeof(curve25519_modbin), Mx, + &szMx, kLTC_PKHA_IntegerArith, kLTC_PKHA_NormalValue, + kLTC_PKHA_NormalValue, kLTC_PKHA_TimingEqualized); + } + /* temp = 1/My mod p */ + if (status == kStatus_Success) { + status = LTC_PKHA_ModInv(LTC_BASE, My, szMy, curve25519_modbin, + sizeof(curve25519_modbin), temp, &szTemp, kLTC_PKHA_IntegerArith); + } + /* Ex = Mx * temp mod p */ + if (status == kStatus_Success) { + status = LTC_PKHA_ModMul(LTC_BASE, temp, szTemp, Mx, szMx, + curve25519_modbin, sizeof(curve25519_modbin), Ex, &szEx, + kLTC_PKHA_IntegerArith, kLTC_PKHA_NormalValue, + kLTC_PKHA_NormalValue, kLTC_PKHA_TimingEqualized); + } + + /* temp = Mx + 1 mod p */ + if (status == kStatus_Success) { + status = LTC_PKHA_ModAdd(LTC_BASE, Mx, szMx, &one, sizeof(one), + curve25519_modbin, sizeof(curve25519_modbin), temp, &szTemp, + kLTC_PKHA_IntegerArith); + } + /* temp = 1/temp mod p */ + if (status == kStatus_Success) { + status = LTC_PKHA_ModInv(LTC_BASE, temp, szTemp, curve25519_modbin, + sizeof(curve25519_modbin), temp, &szTemp, kLTC_PKHA_IntegerArith); + } + /* Mx = (Mx - 1) mod p */ + if (status == kStatus_Success) { + if (LTC_PKHA_CompareBigNum(Mx, szMx, &one, sizeof(one)) >= 0) { + status = LTC_PKHA_ModSub1(LTC_BASE, Mx, szMx, &one, sizeof(one), + curve25519_modbin, sizeof(curve25519_modbin), Mx, &szMx); + } + else { + /* Mx is zero, so it is modulus, thus we do modulus - 1 */ + XMEMCPY(Mx, curve25519_modbin, sizeof(curve25519_modbin)); + Mx[0]--; + } + } + /* Ey = Mx * temp mod p */ + if (status == kStatus_Success) { + status = LTC_PKHA_ModMul(LTC_BASE, temp, szTemp, Mx, szMx, + curve25519_modbin, sizeof(curve25519_modbin), Ey, &szEy, + kLTC_PKHA_IntegerArith, kLTC_PKHA_NormalValue, + kLTC_PKHA_NormalValue, kLTC_PKHA_TimingEqualized); + } + + return status; +} + +status_t LTC_PKHA_Ed25519_PointDecompress(const uint8_t *pubkey, + size_t pubKeySize, ltc_pkha_ecc_point_t *ltcPointOut) +{ + status_t status; + const uint8_t one = 1; + + /* pubkey contains the Y coordinate and a sign of X + */ + + /* x^2 = ((y^2 - 1) / (d*y^2 +1)) mod p */ + + /* decode Y from pubkey */ + XMEMCPY(ltcPointOut->Y, pubkey, pubKeySize); + ltcPointOut->Y[pubKeySize - 1] &= ~0x80u; + int sign = (int)(bool)(pubkey[pubKeySize - 1] & 0x80u); + + uint8_t U[ED25519_KEY_SIZE] = {0}; + uint8_t V[ED25519_KEY_SIZE] = {0}; + uint8_t *X = ltcPointOut->X; + uint8_t *Y = ltcPointOut->Y; + uint16_t szU = 0; + uint16_t szV = 0; + size_t szRes = 0; + + /* decode X from pubkey */ + + /* U = y * y mod p */ + status = LTC_PKHA_ModMul(LTC_BASE, Y, ED25519_KEY_SIZE, Y, + ED25519_KEY_SIZE, curve25519_modbin, ED25519_KEY_SIZE, U, &szU, + kLTC_PKHA_IntegerArith, kLTC_PKHA_NormalValue, kLTC_PKHA_NormalValue, + kLTC_PKHA_TimingEqualized); + XMEMCPY(V, U, szU); + szV = szU; + + /* U = U - 1 = y^2 - 1 */ + if (status == kStatus_Success) { + if (LTC_PKHA_CompareBigNum(U, szU, &one, sizeof(one)) >= 0) { + status = LTC_PKHA_ModSub1(LTC_BASE, U, szU, &one, sizeof(one), + curve25519_modbin, sizeof(curve25519_modbin), U, &szU); + } + else { + /* U is zero, so it is modulus, thus we do modulus - 1 */ + XMEMCPY(U, curve25519_modbin, sizeof(curve25519_modbin)); + U[0]--; + } + } + + /* V = d*y*y + 1 */ + if (status == kStatus_Success) { + status = LTC_PKHA_ModMul(LTC_BASE, V, szV, d_coefEd25519, + ED25519_KEY_SIZE, curve25519_modbin, ED25519_KEY_SIZE, V, &szV, + kLTC_PKHA_IntegerArith, kLTC_PKHA_NormalValue, + kLTC_PKHA_NormalValue, kLTC_PKHA_TimingEqualized); + } + + if (status == kStatus_Success) { + status = LTC_PKHA_ModAdd(LTC_BASE, V, szV, &one, sizeof(one), + curve25519_modbin, sizeof(curve25519_modbin), V, &szV, + kLTC_PKHA_IntegerArith); + } + + /* U = U / V (mod p) */ + if (status == kStatus_Success) { + status = LTC_PKHA_ModInv(LTC_BASE, V, szV, curve25519_modbin, + sizeof(curve25519_modbin), V, &szV, kLTC_PKHA_IntegerArith); + } + if (status == kStatus_Success) { + status = LTC_PKHA_ModMul(LTC_BASE, V, szV, U, szU, curve25519_modbin, + ED25519_KEY_SIZE, U, &szU, kLTC_PKHA_IntegerArith, + kLTC_PKHA_NormalValue, kLTC_PKHA_NormalValue, + kLTC_PKHA_TimingEqualized); + } + + /* get square root */ + if (status == kStatus_Success) { + status = LTC_PKHA_Prime25519SquareRootMod(U, szU, X, &szRes, sign); + } + + return status; +} + +/* LSByte first of Ed25519 parameter l = 2^252 + 27742317777372353535851937790883648493 */ +static const uint8_t l_coefEdDSA[] = { + 0xed, 0xd3, 0xf5, 0x5c, 0x1a, 0x63, 0x12, 0x58, 0xd6, 0x9c, 0xf7, + 0xa2, 0xde, 0xf9, 0xde, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10}; + +/* +Input: + s[0]+256*s[1]+...+256^63*s[63] = s + +Output: + s[0]+256*s[1]+...+256^31*s[31] = s mod l + where l = 2^252 + 27742317777372353535851937790883648493. + Overwrites s in place. +*/ +status_t LTC_PKHA_sc_reduce(uint8_t *a) +{ + uint16_t szA = 0; + return LTC_PKHA_ModRed(LTC_BASE, a, 64, l_coefEdDSA, sizeof(l_coefEdDSA), + a, &szA, kLTC_PKHA_IntegerArith); +} + +/* +Input: + a[0]+256*a[1]+...+256^31*a[31] = a + b[0]+256*b[1]+...+256^31*b[31] = b + c[0]+256*c[1]+...+256^31*c[31] = c + +Output: + s[0]+256*s[1]+...+256^31*s[31] = (ab+c) mod l + where l = 2^252 + 27742317777372353535851937790883648493. +*/ +status_t LTC_PKHA_sc_muladd(uint8_t *s, const uint8_t *a, + const uint8_t *b, const uint8_t *c) +{ + uint16_t szS = 0; + uint16_t szB = 0; + uint8_t tempB[32] = {0}; + status_t status; + + /* Assume only b can be larger than modulus. It is called durind + * wc_ed25519_sign_msg() where hram (=a) and nonce(=c) + * have been reduced by LTC_PKHA_sc_reduce() + * Thus reducing b only. + */ + status = LTC_PKHA_ModRed(LTC_BASE, b, 32, l_coefEdDSA, sizeof(l_coefEdDSA), + tempB, &szB, kLTC_PKHA_IntegerArith); + + if (status == kStatus_Success) { + status = LTC_PKHA_ModMul(LTC_BASE, a, 32, tempB, szB, l_coefEdDSA, + sizeof(l_coefEdDSA), s, &szS, kLTC_PKHA_IntegerArith, + kLTC_PKHA_NormalValue, kLTC_PKHA_NormalValue, + kLTC_PKHA_TimingEqualized); + } + + if (status == kStatus_Success) { + status = LTC_PKHA_ModAdd(LTC_BASE, s, szS, c, 32, l_coefEdDSA, 32, s, + &szS, kLTC_PKHA_IntegerArith); + } + + return status; +} + +/* +r = a * A + b * B +where A is public key point, B is basepoint +where a = a[0]+256*a[1]+...+256^31 a[31]. +and b = b[0]+256*b[1]+...+256^31 b[31]. +B is the Ed25519 base point (x,4/5) with x positive. +*/ +status_t LTC_PKHA_SignatureForVerify(uint8_t *rcheck, const unsigned char *a, + const unsigned char *b, ed25519_key *key) +{ + /* To verify a signature on a message M, first split the signature + into two 32-octet halves. Decode the first half as a point R, + and the second half as an integer s, in the range 0 <= s < q. If + the decoding fails, the signature is invalid. */ + + /* Check the group equation 8s B = 8 R + 8k A. */ + + /* + Uses a fast single-signature verification SB = R + H(R,A,M)A becomes + SB - H(R,A,M)A saving decompression of R + */ + uint8_t X0[ED25519_PUB_KEY_SIZE] = {0}; + uint8_t X1[ED25519_PUB_KEY_SIZE] = {0}; + uint8_t Y0[ED25519_PUB_KEY_SIZE] = {0}; + uint8_t Y1[ED25519_PUB_KEY_SIZE] = {0}; + const uint8_t max[32] = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + ltc_pkha_ecc_point_t ltc0; + ltc_pkha_ecc_point_t ltc1; + ltc_pkha_ecc_point_t pubKey; + status_t status; + + /* The equality for the negative of a point P, in affine coordinates, + is -P = -(x,y) = (x, -y) */ + uint16_t szY = 32; + + ltc0.X = X0; + ltc1.X = X1; + ltc0.Y = Y0; + ltc1.Y = Y1; + pubKey.X = key->pointX; + pubKey.Y = key->pointY; + + /* ltc0 = b*B */ + status = LTC_PKHA_Ed25519_PointMul(LTC_PKHA_Ed25519_BasePoint(), b, + ED25519_KEY_SIZE, <c0, kLTC_Weierstrass /* result in W */); + + /* ltc1 = a*A */ + if (status == kStatus_Success) { + status = LTC_PKHA_Ed25519ToWeierstrass(&pubKey, <c1); + } + if (status == kStatus_Success) { + status = LTC_PKHA_Ed25519_PointMul(<c1, a, ED25519_KEY_SIZE, <c1, + kLTC_Weierstrass /* result in W */); + } + + /* R = b*B - a*A */ + if (status == kStatus_Success) { + status = LTC_PKHA_ModSub1(LTC_BASE, curve25519_modbin, + sizeof(curve25519_modbin), ltc1.Y, szY, max, sizeof(max), ltc1.Y, + &szY); + } + if (status == kStatus_Success) { + status = LTC_PKHA_ECC_PointAdd(LTC_BASE, <c0, <c1, + curve25519_modbin, r2mod, a_coefEd25519, b_coefEd25519, + ED25519_KEY_SIZE, kLTC_PKHA_IntegerArith, <c0); + } + /* map to Ed25519 */ + if (status == kStatus_Success) { + status = LTC_PKHA_WeierstrassToEd25519(<c0, <c0); + } + if (((uint32_t)ltc0.X[0]) & 0x01u) { + ltc0.Y[ED25519_KEY_SIZE - 1] |= 0x80u; + } + + XMEMCPY(rcheck, ltc0.Y, ED25519_KEY_SIZE); + return status; +} + +status_t LTC_PKHA_Ed25519_Compress(const ltc_pkha_ecc_point_t *ltcPointIn, + uint8_t *p) +{ + /* compress */ + /* get sign of X per https://tools.ietf.org/html/draft-josefsson-eddsa-ed25519-02 + * To form the encoding of the point, copy the least + significant bit of the x-coordinate to the most significant bit of + the final octet + */ + XMEMCPY(p, ltcPointIn->Y, ED25519_KEY_SIZE); + if (((uint32_t)ltcPointIn->X[0]) & 0x01u) { + p[ED25519_KEY_SIZE - 1] |= 0x80u; + } + return kStatus_Success; +} + +#endif /* HAVE_ED25519 */ +#endif /* FREESCALE_LTC_ECC */ + + +#undef ERROR_OUT + +#endif /* FREESCALE_LTC_TFM || FREESCALE_LTC_ECC */ diff --git a/client/wolfssl/wolfcrypt/src/port/pic32/pic32mz-crypt.c b/client/wolfssl/wolfcrypt/src/port/pic32/pic32mz-crypt.c new file mode 100644 index 0000000..1e618c1 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/pic32/pic32mz-crypt.c @@ -0,0 +1,804 @@ +/* pic32mz-crypt.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef WOLFSSL_MICROCHIP_PIC32MZ + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#include +#include + +#include + +#ifdef WOLFSSL_PIC32MZ_CRYPT +#include +#include +#endif + +#ifdef WOLFSSL_PIC32MZ_HASH +#include +#include +#include +#endif + + +#if defined(WOLFSSL_PIC32MZ_CRYPT) || defined(WOLFSSL_PIC32MZ_HASH) + +static int Pic32GetBlockSize(int algo) +{ + switch (algo) { + case PIC32_ALGO_HMAC1: + return PIC32_BLOCKSIZE_HMAC; + case PIC32_ALGO_SHA256: + return PIC32_BLOCKSIZE_SHA256; + case PIC32_ALGO_SHA1: + return PIC32_BLOCKSIZE_SHA1; + case PIC32_ALGO_MD5: + return PIC32_BLOCKSIZE_MD5; + case PIC32_ALGO_AES: + return PIC32_BLOCKSIZE_AES; + case PIC32_ALGO_TDES: + return PIC32_BLOCKSIZE_TDES; + case PIC32_ALGO_DES: + return PIC32_BLOCKSIZE_DES; + } + return 0; +} + +static int Pic32Crypto(const byte* pIn, int inLen, word32* pOut, int outLen, + int dir, int algo, int cryptoalgo, + + /* For DES/AES only */ + word32* key, int keyLen, word32* iv, int ivLen) +{ + int ret = 0; + int blockSize = Pic32GetBlockSize(algo); + volatile bufferDescriptor bd __attribute__((aligned (8))); + securityAssociation sa __attribute__((aligned (8))); + securityAssociation *sa_p; + bufferDescriptor *bd_p; + byte *in_p; + byte *out_p; + word32* dst; + word32 padRemain; + int timeout = 0xFFFFFF; + word32* in = (word32*)pIn; + word32* out = pOut; + int isDynamic = 0; + + /* check args */ + if (in == NULL || inLen <= 0 || out == NULL || blockSize == 0) { + return BAD_FUNC_ARG; + } + + /* check pointer alignment - must be word aligned */ + if (((size_t)in % sizeof(word32)) || ((size_t)out % sizeof(word32))) { + /* dynamically allocate aligned pointers */ + isDynamic = 1; + in = (word32*)XMALLOC(inLen, NULL, DYNAMIC_TYPE_AES_BUFFER); + if (in == NULL) + return MEMORY_E; + if ((word32*)pIn == pOut) /* inline */ + out = (word32*)in; + else { + out = (word32*)XMALLOC(outLen, NULL, DYNAMIC_TYPE_AES_BUFFER); + if (out == NULL) { + XFREE(in, NULL, DYNAMIC_TYPE_AES_BUFFER); + return MEMORY_E; + } + } + XMEMCPY(in, pIn, inLen); + } + + /* get uncached address */ + sa_p = KVA0_TO_KVA1(&sa); + bd_p = KVA0_TO_KVA1(&bd); + out_p= KVA0_TO_KVA1(out); + in_p = KVA0_TO_KVA1(in); + + /* Sync cache if in physical memory (not flash) */ + if (PIC32MZ_IF_RAM(in_p)) { + XMEMCPY(in_p, in, inLen); + } + + /* Set up the Security Association */ + XMEMSET(sa_p, 0, sizeof(sa)); + sa_p->SA_CTRL.ALGO = algo; + sa_p->SA_CTRL.ENCTYPE = dir; + sa_p->SA_CTRL.FB = 1; /* first block */ + sa_p->SA_CTRL.LNC = 1; /* Load new set of keys */ + if (key) { + /* cipher */ + sa_p->SA_CTRL.CRYPTOALGO = cryptoalgo; + + switch (keyLen) { + case 32: + sa_p->SA_CTRL.KEYSIZE = PIC32_KEYSIZE_256; + break; + case 24: + case 8: /* DES */ + sa_p->SA_CTRL.KEYSIZE = PIC32_KEYSIZE_192; + break; + case 16: + sa_p->SA_CTRL.KEYSIZE = PIC32_KEYSIZE_128; + break; + } + + dst = (word32*)KVA0_TO_KVA1(sa.SA_ENCKEY + + (sizeof(sa.SA_ENCKEY)/sizeof(word32)) - (keyLen/sizeof(word32))); + ByteReverseWords(dst, key, keyLen); + + if (iv && ivLen > 0) { + sa_p->SA_CTRL.LOADIV = 1; + dst = (word32*)KVA0_TO_KVA1(sa.SA_ENCIV + + (sizeof(sa.SA_ENCIV)/sizeof(word32)) - (ivLen/sizeof(word32))); + ByteReverseWords(dst, iv, ivLen); + } + } + else { + /* hashing */ + sa_p->SA_CTRL.LOADIV = 1; + sa_p->SA_CTRL.IRFLAG = 0; /* immediate result for hashing */ + + dst = (word32*)KVA0_TO_KVA1(sa.SA_AUTHIV + + (sizeof(sa.SA_AUTHIV)/sizeof(word32)) - (outLen/sizeof(word32))); + ByteReverseWords(dst, out, outLen); + } + + /* Set up the Buffer Descriptor */ + XMEMSET(bd_p, 0, sizeof(bd)); + bd_p->BD_CTRL.BUFLEN = inLen; + padRemain = (inLen % 4); /* make sure buffer is 4-byte multiple */ + if (padRemain != 0) { + bd_p->BD_CTRL.BUFLEN += (4 - padRemain); + } + bd_p->BD_CTRL.SA_FETCH_EN = 1; /* Fetch the security association */ + bd_p->BD_CTRL.PKT_INT_EN = 1; /* enable interrupt */ + bd_p->BD_CTRL.LAST_BD = 1; /* last buffer desc in chain */ + bd_p->BD_CTRL.LIFM = 1; /* last in frame */ + bd_p->SA_ADDR = (unsigned int)KVA_TO_PA(&sa); + bd_p->SRCADDR = (unsigned int)KVA_TO_PA(in); + if (key) { + /* cipher */ + if (in != out) + XMEMSET(out_p, 0, outLen); /* clear output buffer */ + bd_p->DSTADDR = (unsigned int)KVA_TO_PA(out); + } + else { + /* hashing */ + /* digest result returned in UPDPTR */ + bd_p->UPDPTR = (unsigned int)KVA_TO_PA(out); + } + bd_p->NXTPTR = (unsigned int)KVA_TO_PA(&bd); + bd_p->MSGLEN = inLen; /* actual message size */ + bd_p->BD_CTRL.DESC_EN = 1; /* enable this descriptor */ + + /* begin access to hardware */ + ret = wolfSSL_CryptHwMutexLock(); + if (ret == 0) { + /* Software Reset the Crypto Engine */ + CECON = 1 << 6; + while (CECON); + + /* Clear the interrupt flags */ + CEINTSRC = 0xF; + + /* Run the engine */ + CEBDPADDR = (unsigned int)KVA_TO_PA(&bd); + CEINTEN = 0x07; /* enable DMA Packet Completion Interrupt */ + + /* input swap, enable BD fetch and start DMA */ + #if PIC32_NO_OUT_SWAP + CECON = 0x25; + #else + CECON = 0xa5; /* bit 7 = enable out swap */ + #endif + + /* wait for operation to complete */ + while (CEINTSRCbits.PKTIF == 0 && --timeout > 0) {}; + + /* Clear the interrupt flags */ + CEINTSRC = 0xF; + + /* check for errors */ + if (CESTATbits.ERROP || timeout <= 0) { + #if 0 + printf("PIC32 Crypto: ERROP %x, ERRPHASE %x, TIMEOUT %s\n", + CESTATbits.ERROP, CESTATbits.ERRPHASE, timeout <= 0 ? "yes" : "no"); + #endif + ret = ASYNC_OP_E; + } + + wolfSSL_CryptHwMutexUnLock(); + + /* copy result to output */ + #if PIC32_NO_OUT_SWAP + /* swap bytes */ + ByteReverseWords(out, (word32*)out_p, outLen); + #elif defined(_SYS_DEVCON_LOCAL_H) + /* sync cache */ + SYS_DEVCON_DataCacheInvalidate((word32)out, outLen); + #else + XMEMCPY(out, out_p, outLen); + #endif + } + + /* handle unaligned */ + if (isDynamic) { + /* return result */ + XMEMCPY(pOut, out, outLen); + + /* free dynamic buffers */ + XFREE(in, NULL, DYNAMIC_TYPE_AES_BUFFER); + if ((word32*)pIn != pOut) + XFREE(out, NULL, DYNAMIC_TYPE_AES_BUFFER); + } + + return ret; +} +#endif /* WOLFSSL_PIC32MZ_CRYPT || WOLFSSL_PIC32MZ_HASH */ + + +#ifdef WOLFSSL_PIC32MZ_HASH + +#ifdef WOLFSSL_PIC32MZ_LARGE_HASH + +/* tunable large hash block size */ +#ifndef PIC32_BLOCK_SIZE + #define PIC32_BLOCK_SIZE 256 +#endif + +#define PIC32MZ_MIN_BLOCK 64 +#define PIC32MZ_MAX_BLOCK (32*1024) + +#ifndef PIC32MZ_MAX_BD + #define PIC32MZ_MAX_BD 2 +#endif + +#if PIC32_BLOCK_SIZE < PIC32MZ_MIN_BLOCK + #error Encryption block size must be at least 64 bytes. +#endif + +/* Crypt Engine descriptor */ +typedef struct { + int currBd; + int err; + unsigned int msgSize; + uint32_t processed; + uint32_t dbPtr; + int engine_ready; + volatile bufferDescriptor bd[PIC32MZ_MAX_BD] __attribute__((aligned (8))); + securityAssociation sa __attribute__((aligned (8))); +} pic32mz_desc; + +static pic32mz_desc gLHDesc; +static uint8_t gLHDataBuf[PIC32MZ_MAX_BD][PIC32_BLOCK_SIZE] __attribute__((aligned (4), coherent)); + +static void reset_engine(pic32mz_desc *desc, int algo) +{ + int i; + pic32mz_desc* uc_desc = KVA0_TO_KVA1(desc); + + wolfSSL_CryptHwMutexLock(); + + /* Software reset */ + CECON = 1 << 6; + while (CECON); + + /* Clear the interrupt flags */ + CEINTSRC = 0xF; + + /* Make sure everything is clear first before we setup */ + XMEMSET(desc, 0, sizeof(pic32mz_desc)); + XMEMSET((void *)&uc_desc->sa, 0, sizeof(uc_desc->sa)); + + /* Set up the Security Association */ + uc_desc->sa.SA_CTRL.ALGO = algo; + uc_desc->sa.SA_CTRL.LNC = 1; + uc_desc->sa.SA_CTRL.FB = 1; + uc_desc->sa.SA_CTRL.ENCTYPE = 1; + uc_desc->sa.SA_CTRL.LOADIV = 1; + + /* Set up the Buffer Descriptor */ + uc_desc->err = 0; + for (i = 0; i < PIC32MZ_MAX_BD; i++) { + XMEMSET((void *)&uc_desc->bd[i], 0, sizeof(uc_desc->bd[i])); + uc_desc->bd[i].BD_CTRL.LAST_BD = 1; + uc_desc->bd[i].BD_CTRL.LIFM = 1; + uc_desc->bd[i].BD_CTRL.PKT_INT_EN = 1; + uc_desc->bd[i].SA_ADDR = KVA_TO_PA(&uc_desc->sa); + uc_desc->bd[i].SRCADDR = KVA_TO_PA(&gLHDataBuf[i]); + if (PIC32MZ_MAX_BD > i+1) + uc_desc->bd[i].NXTPTR = KVA_TO_PA(&uc_desc->bd[i+1]); + else + uc_desc->bd[i].NXTPTR = KVA_TO_PA(&uc_desc->bd[0]); + XMEMSET((void *)&gLHDataBuf[i], 0, PIC32_BLOCK_SIZE); + } + uc_desc->bd[0].BD_CTRL.SA_FETCH_EN = 1; /* Fetch the security association on the first BD */ + desc->dbPtr = 0; + desc->currBd = 0; + desc->msgSize = 0; + desc->processed = 0; + CEBDPADDR = KVA_TO_PA(&(desc->bd[0])); + + CEPOLLCON = 10; + +#if PIC32_NO_OUT_SWAP + CECON = 0x27; +#else + CECON = 0xa7; +#endif +} + +static void update_engine(pic32mz_desc *desc, const byte *input, word32 len, + word32 *hash) +{ + int total; + pic32mz_desc *uc_desc = KVA0_TO_KVA1(desc); + + uc_desc->bd[desc->currBd].UPDPTR = KVA_TO_PA(hash); + + /* Add the data to the current buffer. If the buffer fills, start processing it + and fill the next one. */ + while (len) { + /* If we've been given the message size, we can process along the + way. + Enable the current buffer descriptor if it is full. */ + if (desc->dbPtr >= PIC32_BLOCK_SIZE) { + /* Wrap up the buffer descriptor and enable it so the engine can process */ + uc_desc->bd[desc->currBd].MSGLEN = desc->msgSize; + uc_desc->bd[desc->currBd].BD_CTRL.BUFLEN = desc->dbPtr; + uc_desc->bd[desc->currBd].BD_CTRL.LAST_BD = 0; + uc_desc->bd[desc->currBd].BD_CTRL.LIFM = 0; + uc_desc->bd[desc->currBd].BD_CTRL.DESC_EN = 1; + /* Move to the next buffer descriptor, or wrap around. */ + desc->currBd++; + if (desc->currBd >= PIC32MZ_MAX_BD) + desc->currBd = 0; + /* Wait until the engine has processed the new BD. */ + while (uc_desc->bd[desc->currBd].BD_CTRL.DESC_EN); + uc_desc->bd[desc->currBd].UPDPTR = KVA_TO_PA(hash); + desc->dbPtr = 0; + } + if (!PIC32MZ_IF_RAM(input)) { + /* If we're inputting from flash, let the BD have + the address and max the buffer size */ + uc_desc->bd[desc->currBd].SRCADDR = KVA_TO_PA(input); + total = (len > PIC32MZ_MAX_BLOCK ? PIC32MZ_MAX_BLOCK : len); + desc->dbPtr = total; + len -= total; + input += total; + } + else { + if (len > PIC32_BLOCK_SIZE - desc->dbPtr) { + /* We have more data than can be put in the buffer. Fill what we can.*/ + total = PIC32_BLOCK_SIZE - desc->dbPtr; + XMEMCPY(&gLHDataBuf[desc->currBd][desc->dbPtr], input, total); + len -= total; + desc->dbPtr = PIC32_BLOCK_SIZE; + input += total; + } + else { + /* Fill up what we have, but don't turn on the engine.*/ + XMEMCPY(&gLHDataBuf[desc->currBd][desc->dbPtr], input, len); + desc->dbPtr += len; + len = 0; + } + } + } +} + +static void start_engine(pic32mz_desc *desc) +{ + /* Wrap up the last buffer descriptor and enable it */ + int bufferLen; + pic32mz_desc *uc_desc = KVA0_TO_KVA1(desc); + + bufferLen = desc->dbPtr; + if (bufferLen % 4) + bufferLen = (bufferLen + 4) - (bufferLen % 4); + /* initialize the MSGLEN on engine startup to avoid infinite loop when + * length is less than 257 (size of PIC32_BLOCK_SIZE) */ + uc_desc->bd[desc->currBd].MSGLEN = desc->msgSize; + uc_desc->bd[desc->currBd].BD_CTRL.BUFLEN = bufferLen; + uc_desc->bd[desc->currBd].BD_CTRL.LAST_BD = 1; + uc_desc->bd[desc->currBd].BD_CTRL.LIFM = 1; + uc_desc->bd[desc->currBd].BD_CTRL.DESC_EN = 1; +} + +void wait_engine(pic32mz_desc *desc, char *hash, int hash_sz) +{ + int i; + pic32mz_desc *uc_desc = KVA0_TO_KVA1(desc); + unsigned int engineRunning; + + do { + engineRunning = 0; + for (i = 0; i < PIC32MZ_MAX_BD; i++) { + engineRunning = engineRunning || uc_desc->bd[i].BD_CTRL.DESC_EN; + } + } while (engineRunning); + +#if PIC32_NO_OUT_SWAP + /* swap bytes */ + ByteReverseWords(hash, KVA0_TO_KVA1(hash), hash_sz); +#else + /* copy output - hardware already swapped */ + XMEMCPY(hash, KVA0_TO_KVA1(hash), hash_sz); +#endif + + wolfSSL_CryptHwMutexUnLock(); +} + +#endif /* WOLFSSL_PIC32MZ_LARGE_HASH */ + +int wc_Pic32Hash(const byte* in, int inLen, word32* out, int outLen, int algo) +{ + return Pic32Crypto(in, inLen, out, outLen, PIC32_ENCRYPTION, algo, 0, + NULL, 0, NULL, 0); +} + +int wc_Pic32HashCopy(hashUpdCache* src, hashUpdCache* dst) +{ + /* mark destination as copy, so cache->buf is not free'd */ + if (dst) { + dst->isCopy = 1; + } + return 0; +} + +static int wc_Pic32HashUpdate(hashUpdCache* cache, byte* stdBuf, int stdBufLen, + word32* digest, int digestSz, const byte* data, int len, int algo, void* heap) +{ + int ret = 0; + word32 newLenUpd, newLenPad, padRemain; + byte* newBuf; + int isNewBuf = 0; + +#ifdef WOLFSSL_PIC32MZ_LARGE_HASH + /* if final length is set then pass straight to hardware */ + if (cache->finalLen) { + if (cache->bufLen == 0) { + reset_engine(&gLHDesc, algo); + gLHDesc.msgSize = cache->finalLen; + } + update_engine(&gLHDesc, data, len, digest); + cache->bufLen += len; /* track progress for blockType */ + return 0; + } +#endif + + /* cache updates */ + /* calculate new len */ + newLenUpd = cache->updLen + len; + + /* calculate padded len - pad buffer at 64-bytes for hardware */ + newLenPad = newLenUpd; + padRemain = (newLenUpd % PIC32_BLOCKSIZE_HASH); + if (padRemain != 0) { + newLenPad += (PIC32_BLOCKSIZE_HASH - padRemain); + } + + /* determine buffer source */ + if (newLenPad <= stdBufLen) { + /* use standard buffer */ + newBuf = stdBuf; + } + else if (newLenPad > cache->bufLen) { + /* alloc buffer */ + newBuf = (byte*)XMALLOC(newLenPad, heap, DYNAMIC_TYPE_HASH_TMP); + if (newBuf == NULL) { + if (cache->buf != stdBuf && !cache->isCopy) { + XFREE(cache->buf, heap, DYNAMIC_TYPE_HASH_TMP); + cache->buf = NULL; + cache->updLen = cache->bufLen = 0; + } + return MEMORY_E; + } + isNewBuf = 1; + cache->isCopy = 0; /* no longer using copy buffer */ + } + else { + /* use existing buffer */ + newBuf = cache->buf; + } + if (cache->buf && cache->updLen > 0) { + XMEMCPY(newBuf, cache->buf, cache->updLen); + if (isNewBuf && cache->buf != stdBuf) { + XFREE(cache->buf, heap, DYNAMIC_TYPE_HASH_TMP); + cache->buf = NULL; + } + } + XMEMCPY(newBuf + cache->updLen, data, len); + + cache->buf = newBuf; + cache->updLen = newLenUpd; + cache->bufLen = newLenPad; + + return ret; +} + +static int wc_Pic32HashFinal(hashUpdCache* cache, byte* stdBuf, + word32* digest, byte* hash, int digestSz, int algo, void* heap) +{ + int ret = 0; + + /* if room add the pad */ + if (cache->buf && cache->updLen < cache->bufLen) { + cache->buf[cache->updLen] = 0x80; + } + +#ifdef WOLFSSL_PIC32MZ_LARGE_HASH + if (cache->finalLen) { + start_engine(&gLHDesc); + wait_engine(&gLHDesc, (char*)digest, digestSz); + XMEMCPY(hash, digest, digestSz); + cache->finalLen = 0; + } + else +#endif + { + if (cache->updLen == 0) { + /* handle empty input */ + switch (algo) { + case PIC32_ALGO_SHA256: { + const char* sha256EmptyHash = + "\xe3\xb0\xc4\x42\x98\xfc\x1c\x14\x9a\xfb\xf4\xc8\x99\x6f\xb9" + "\x24\x27\xae\x41\xe4\x64\x9b\x93\x4c\xa4\x95\x99\x1b\x78\x52" + "\xb8\x55"; + XMEMCPY(hash, sha256EmptyHash, digestSz); + break; + } + case PIC32_ALGO_SHA1: { + const char* shaEmptyHash = + "\xda\x39\xa3\xee\x5e\x6b\x4b\x0d\x32\x55\xbf\xef\x95\x60\x18" + "\x90\xaf\xd8\x07\x09"; + XMEMCPY(hash, shaEmptyHash, digestSz); + break; + } + case PIC32_ALGO_MD5: { + const char* md5EmptyHash = + "\xd4\x1d\x8c\xd9\x8f\x00\xb2\x04\xe9\x80\x09\x98\xec\xf8\x42" + "\x7e"; + XMEMCPY(hash, md5EmptyHash, digestSz); + break; + } + } /* switch */ + } + else { + ret = wc_Pic32Hash(cache->buf, cache->updLen, digest, digestSz, algo); + if (ret == 0) { + XMEMCPY(hash, digest, digestSz); + } + } + + if (cache->buf && cache->buf != stdBuf && !cache->isCopy) { + XFREE(cache->buf, heap, DYNAMIC_TYPE_HASH_TMP); + cache->buf = NULL; + } + } + + cache->buf = NULL; + cache->bufLen = cache->updLen = 0; + + return ret; +} + +static void wc_Pic32HashFree(hashUpdCache* cache, void* heap) +{ + if (cache && cache->buf && !cache->isCopy) { + XFREE(cache->buf, heap, DYNAMIC_TYPE_HASH_TMP); + cache->buf = NULL; + } +} + +/* API's for compatibility with Harmony wrappers - not used */ +#ifndef NO_MD5 + int wc_InitMd5_ex(wc_Md5* md5, void* heap, int devId) + { + if (md5 == NULL) + return BAD_FUNC_ARG; + + XMEMSET(md5, 0, sizeof(wc_Md5)); + md5->heap = heap; + (void)devId; + return 0; + } + int wc_Md5Update(wc_Md5* md5, const byte* data, word32 len) + { + if (md5 == NULL || (data == NULL && len > 0)) + return BAD_FUNC_ARG; + return wc_Pic32HashUpdate(&md5->cache, (byte*)md5->buffer, + sizeof(md5->buffer), md5->digest, MD5_DIGEST_SIZE, + data, len, PIC32_ALGO_MD5, md5->heap); + } + int wc_Md5Final(wc_Md5* md5, byte* hash) + { + int ret; + + if (md5 == NULL || hash == NULL) + return BAD_FUNC_ARG; + + ret = wc_Pic32HashFinal(&md5->cache, (byte*)md5->buffer, + md5->digest, hash, MD5_DIGEST_SIZE, + PIC32_ALGO_MD5, md5->heap); + + wc_InitMd5_ex(md5, md5->heap, INVALID_DEVID); /* reset state */ + + return ret; + } + void wc_Md5SizeSet(wc_Md5* md5, word32 len) + { + if (md5) { + #ifdef WOLFSSL_PIC32MZ_LARGE_HASH + md5->cache.finalLen = len; + #else + (void)len; + #endif + } + } + void wc_Md5Pic32Free(wc_Md5* md5) + { + if (md5) { + wc_Pic32HashFree(&md5->cache, md5->heap); + } + } +#endif /* !NO_MD5 */ +#ifndef NO_SHA + int wc_InitSha_ex(wc_Sha* sha, void* heap, int devId) + { + if (sha == NULL) + return BAD_FUNC_ARG; + + XMEMSET(sha, 0, sizeof(wc_Sha)); + sha->heap = heap; + (void)devId; + return 0; + } + int wc_ShaUpdate(wc_Sha* sha, const byte* data, word32 len) + { + if (sha == NULL || (data == NULL && len > 0)) + return BAD_FUNC_ARG; + return wc_Pic32HashUpdate(&sha->cache, (byte*)sha->buffer, + sizeof(sha->buffer), sha->digest, SHA_DIGEST_SIZE, + data, len, PIC32_ALGO_SHA1, sha->heap); + } + int wc_ShaFinal(wc_Sha* sha, byte* hash) + { + int ret; + + if (sha == NULL || hash == NULL) + return BAD_FUNC_ARG; + + ret = wc_Pic32HashFinal(&sha->cache, (byte*)sha->buffer, + sha->digest, hash, SHA_DIGEST_SIZE, + PIC32_ALGO_SHA1, sha->heap); + + wc_InitSha_ex(sha, sha->heap, INVALID_DEVID); /* reset state */ + + return ret; + } + void wc_ShaSizeSet(wc_Sha* sha, word32 len) + { + if (sha) { + #ifdef WOLFSSL_PIC32MZ_LARGE_HASH + sha->cache.finalLen = len; + #else + (void)len; + #endif + } + } + void wc_ShaPic32Free(wc_Sha* sha) + { + if (sha) { + wc_Pic32HashFree(&sha->cache, sha->heap); + } + } +#endif /* !NO_SHA */ +#ifndef NO_SHA256 + int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId) + { + if (sha256 == NULL) + return BAD_FUNC_ARG; + + XMEMSET(sha256, 0, sizeof(wc_Sha256)); + sha256->heap = heap; + (void)devId; + return 0; + } + int wc_Sha256Update(wc_Sha256* sha256, const byte* data, word32 len) + { + if (sha256 == NULL || (data == NULL && len > 0)) + return BAD_FUNC_ARG; + return wc_Pic32HashUpdate(&sha256->cache, (byte*)sha256->buffer, + sizeof(sha256->buffer), sha256->digest, SHA256_DIGEST_SIZE, + data, len, PIC32_ALGO_SHA256, sha256->heap); + } + int wc_Sha256Final(wc_Sha256* sha256, byte* hash) + { + int ret; + + if (sha256 == NULL || hash == NULL) + return BAD_FUNC_ARG; + + ret = wc_Pic32HashFinal(&sha256->cache, (byte*)sha256->buffer, + sha256->digest, hash, SHA256_DIGEST_SIZE, + PIC32_ALGO_SHA256, sha256->heap); + + wc_InitSha256_ex(sha256, sha256->heap, INVALID_DEVID); /* reset state */ + + return ret; + } + void wc_Sha256SizeSet(wc_Sha256* sha256, word32 len) + { + if (sha256) { + #ifdef WOLFSSL_PIC32MZ_LARGE_HASH + sha256->cache.finalLen = len; + #else + (void)len; + #endif + } + } + void wc_Sha256Pic32Free(wc_Sha256* sha256) + { + if (sha256) { + wc_Pic32HashFree(&sha256->cache, sha256->heap); + } + } +#endif /* !NO_SHA256 */ +#endif /* WOLFSSL_PIC32MZ_HASH */ + + +#ifdef WOLFSSL_PIC32MZ_CRYPT +#if !defined(NO_AES) + int wc_Pic32AesCrypt(word32 *key, int keyLen, word32 *iv, int ivLen, + byte* out, const byte* in, word32 sz, + int dir, int algo, int cryptoalgo) + { + return Pic32Crypto(in, sz, (word32*)out, sz, dir, algo, cryptoalgo, + key, keyLen, iv, ivLen); + } +#endif /* !NO_AES */ + +#ifndef NO_DES3 + int wc_Pic32DesCrypt(word32 *key, int keyLen, word32 *iv, int ivLen, + byte* out, const byte* in, word32 sz, + int dir, int algo, int cryptoalgo) + { + return Pic32Crypto(in, sz, (word32*)out, sz, dir, algo, cryptoalgo, + key, keyLen, iv, ivLen); + } +#endif /* !NO_DES3 */ +#endif /* WOLFSSL_PIC32MZ_CRYPT */ + +#endif /* WOLFSSL_MICROCHIP_PIC32MZ */ diff --git a/client/wolfssl/wolfcrypt/src/port/st/README.md b/client/wolfssl/wolfcrypt/src/port/st/README.md new file mode 100644 index 0000000..011dd90 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/st/README.md @@ -0,0 +1,132 @@ +# ST Ports + +Support for the STM32 L4, F1, F2, F4 and F7 on-board crypto hardware acceleration for symmetric AES (ECB/CBC/CTR/GCM) and MD5/SHA1/SHA224/SHA256. + +Support for the STSAFE-A100 crypto hardware accelerator co-processor via I2C for ECC supporting NIST or Brainpool 256-bit and 384-bit curves. It requires the ST-Safe SDK including wolf stsafe_interface.c/.h files. Please contact ST for these. + + +For details see our [wolfSSL ST](https://www.wolfssl.com/docs/stm32/) page. + + +## STM32 Symmetric Acceleration + +We support using the STM32 CubeMX and Standard Peripheral Library. + +### Building + +To enable support define one of the following: + +``` +#define WOLFSSL_STM32L4 +#define WOLFSSL_STM32F1 +#define WOLFSSL_STM32F2 +#define WOLFSSL_STM32F4 +#define WOLFSSL_STM32F7 +``` + +To use CubeMX define `WOLFSSL_STM32_CUBEMX` otherwise StdPeriLib is used. + +To disable portions of the hardware acceleration you can optionally define: + +``` +#define NO_STM32_RNG +#define NO_STM32_CRYPTO +#define NO_STM32_HASH +``` + +### Coding + +In your application you must include before any other wolfSSL headers. If building the sources directly we recommend defining `WOLFSSL_USER_SETTINGS` and adding your own `user_settings.h` file. You can find a good reference for this in `IDE/GCC-ARM/Header/user_settings.h`. + + +### Benchmarks + +See our [benchmarks](https://www.wolfssl.com/docs/benchmarks/) on the wolfSSL website. + + + +## STSAFE-A100 ECC Acceleration + +Using the wolfSSL PK callbacks and the reference ST Safe reference API's we support an ECC only cipher suite such as ECDHE-ECDSA-AES128-SHA256 for TLS client or server. + +At the wolfCrypt level we also support ECC native API's for `wc_ecc_*` using the ST-Safe. + +### Building + +`./configure --enable-pkcallbacks CFLAGS="-DWOLFSSL_STSAFEA100"` + +or + +`#define HAVE_PK_CALLBACKS` +`#define WOLFSSL_STSAFEA100` + + +### Coding + +Setup the PK callbacks for TLS using: + +``` +/* Setup PK Callbacks for STSAFE-A100 */ +WOLFSSL_CTX* ctx; +wolfSSL_CTX_SetEccKeyGenCb(ctx, SSL_STSAFE_CreateKeyCb); +wolfSSL_CTX_SetEccSignCb(ctx, SSL_STSAFE_SignCertificateCb); +wolfSSL_CTX_SetEccVerifyCb(ctx, SSL_STSAFE_VerifyPeerCertCb); +wolfSSL_CTX_SetEccSharedSecretCb(ctx, SSL_STSAFE_SharedSecretCb); +wolfSSL_CTX_SetDevId(ctx, 0); /* enables wolfCrypt `wc_ecc_*` ST-Safe use */ +``` + +The reference STSAFE-A100 PK callback functions are located in the `wolfcrypt/src/port/st/stsafe.c` file. + +Adding a custom context to the callbacks: + +``` +/* Setup PK Callbacks context */ +WOLFSSL* ssl; +void* myOwnCtx; +wolfSSL_SetEccKeyGenCtx(ssl, myOwnCtx); +wolfSSL_SetEccVerifyCtx(ssl, myOwnCtx); +wolfSSL_SetEccSignCtx(ssl, myOwnCtx); +wolfSSL_SetEccSharedSecretCtx(ssl, myOwnCtx); +``` + +### Benchmarks and Memory Use + +Software only implementation (STM32L4 120Mhz, Cortex-M4, Fast Math): + +``` +ECDHE 256 key gen SW 4 ops took 1.278 sec, avg 319.500 ms, 3.130 ops/sec +ECDHE 256 agree SW 4 ops took 1.306 sec, avg 326.500 ms, 3.063 ops/sec +ECDSA 256 sign SW 4 ops took 1.298 sec, avg 324.500 ms, 3.082 ops/sec +ECDSA 256 verify SW 2 ops took 1.283 sec, avg 641.500 ms, 1.559 ops/sec +``` + +Memory Use: + +``` +Peak Stack: 18456 +Peak Heap: 2640 +Total: 21096 +``` + + +STSAFE-A100 acceleration: + +``` +ECDHE 256 key gen HW 8 ops took 1.008 sec, avg 126.000 ms, 7.937 ops/sec +ECDHE 256 agree HW 6 ops took 1.051 sec, avg 175.167 ms, 5.709 ops/sec +ECDSA 256 sign HW 14 ops took 1.161 sec, avg 82.929 ms, 12.059 ops/sec +ECDSA 256 verify HW 8 ops took 1.184 sec, avg 148.000 ms, 6.757 ops/sec +``` + +Memory Use: + +``` +Peak Stack: 9592 +Peak Heap: 170 +Total: 9762 +``` + + +## Support + +Email us at [support@wolfssl.com](mailto:support@wolfssl.com). diff --git a/client/wolfssl/wolfcrypt/src/port/st/stm32.c b/client/wolfssl/wolfcrypt/src/port/st/stm32.c new file mode 100644 index 0000000..b37dbd8 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/st/stm32.c @@ -0,0 +1,879 @@ +/* stm32.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* Generic STM32 Hashing Function */ +/* Supports CubeMX HAL or Standard Peripheral Library */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#include +#include +#include + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#ifndef NO_AES + #include +#endif + + +#ifdef STM32_HASH + +#ifdef WOLFSSL_STM32L4 + #define HASH_STR_NBW HASH_STR_NBLW +#endif + +/* User can override STM32_HASH_CLOCK_ENABLE and STM32_HASH_CLOCK_DISABLE */ +#ifndef STM32_HASH_CLOCK_ENABLE + static WC_INLINE void wc_Stm32_Hash_Clock_Enable(STM32_HASH_Context* stmCtx) + { + #ifdef WOLFSSL_STM32_CUBEMX + __HAL_RCC_HASH_CLK_ENABLE(); + #else + RCC_AHB2PeriphClockCmd(RCC_AHB2Periph_HASH, ENABLE); + #endif + (void)stmCtx; + } + #define STM32_HASH_CLOCK_ENABLE(ctx) wc_Stm32_Hash_Clock_Enable(ctx) +#endif + +#ifndef STM32_HASH_CLOCK_DISABLE + static WC_INLINE void wc_Stm32_Hash_Clock_Disable(STM32_HASH_Context* stmCtx) + { + #ifdef WOLFSSL_STM32_CUBEMX + __HAL_RCC_HASH_CLK_DISABLE(); + #else + RCC_AHB2PeriphClockCmd(RCC_AHB2Periph_HASH, DISABLE); + #endif + (void)stmCtx; + } + #define STM32_HASH_CLOCK_DISABLE(ctx) wc_Stm32_Hash_Clock_Disable(ctx) +#endif + +/* STM32 Port Internal Functions */ +static WC_INLINE void wc_Stm32_Hash_SaveContext(STM32_HASH_Context* ctx) +{ + int i; + + /* save context registers */ + ctx->HASH_IMR = HASH->IMR; + ctx->HASH_STR = HASH->STR; + ctx->HASH_CR = HASH->CR; + for (i=0; iHASH_CSR[i] = HASH->CSR[i]; + } +} + +static WC_INLINE int wc_Stm32_Hash_RestoreContext(STM32_HASH_Context* ctx) +{ + int i; + + if (ctx->HASH_CR != 0) { + /* restore context registers */ + HASH->IMR = ctx->HASH_IMR; + HASH->STR = ctx->HASH_STR; + HASH->CR = ctx->HASH_CR; + + /* Initialize the hash processor */ + HASH->CR |= HASH_CR_INIT; + + /* continue restoring context registers */ + for (i=0; iCSR[i] = ctx->HASH_CSR[i]; + } + return 1; + } + return 0; +} + +static WC_INLINE void wc_Stm32_Hash_GetDigest(byte* hash, int digestSize) +{ + word32 digest[HASH_MAX_DIGEST/sizeof(word32)]; + + /* get digest result */ + digest[0] = HASH->HR[0]; + digest[1] = HASH->HR[1]; + digest[2] = HASH->HR[2]; + digest[3] = HASH->HR[3]; + if (digestSize >= 20) { + digest[4] = HASH->HR[4]; + #ifdef HASH_DIGEST + if (digestSize >= 28) { + digest[5] = HASH_DIGEST->HR[5]; + digest[6] = HASH_DIGEST->HR[6]; + if (digestSize == 32) + digest[7] = HASH_DIGEST->HR[7]; + } + #endif + } + + ByteReverseWords(digest, digest, digestSize); + + XMEMCPY(hash, digest, digestSize); +} + + +/* STM32 Port Exposed Functions */ +static WC_INLINE int wc_Stm32_Hash_WaitDone(void) +{ + /* wait until hash hardware is not busy */ + int timeout = 0; + while ((HASH->SR & HASH_SR_BUSY) && ++timeout < STM32_HASH_TIMEOUT) { + + } + /* verify timeout did not occur */ + if (timeout >= STM32_HASH_TIMEOUT) { + return WC_TIMEOUT_E; + } + return 0; +} + + +void wc_Stm32_Hash_Init(STM32_HASH_Context* stmCtx) +{ + /* clear context */ + XMEMSET(stmCtx, 0, sizeof(STM32_HASH_Context)); +} + +int wc_Stm32_Hash_Update(STM32_HASH_Context* stmCtx, word32 algo, + const byte* data, int len) +{ + int ret = 0; + byte* local = (byte*)stmCtx->buffer; + int wroteToFifo = 0; + + /* check that internal buffLen is valid */ + if (stmCtx->buffLen >= STM32_HASH_REG_SIZE) { + return BUFFER_E; + } + + /* turn on hash clock */ + STM32_HASH_CLOCK_ENABLE(stmCtx); + + /* restore hash context or init as new hash */ + if (wc_Stm32_Hash_RestoreContext(stmCtx) == 0) { + /* reset the control register */ + HASH->CR &= ~(HASH_CR_ALGO | HASH_CR_DATATYPE | HASH_CR_MODE); + + /* configure algorithm, mode and data type */ + HASH->CR |= (algo | HASH_ALGOMODE_HASH | HASH_DATATYPE_8B); + + /* reset HASH processor */ + HASH->CR |= HASH_CR_INIT; + } + + /* write 4-bytes at a time into FIFO */ + while (len) { + word32 add = min(len, STM32_HASH_REG_SIZE - stmCtx->buffLen); + XMEMCPY(&local[stmCtx->buffLen], data, add); + + stmCtx->buffLen += add; + data += add; + len -= add; + + if (stmCtx->buffLen == STM32_HASH_REG_SIZE) { + wroteToFifo = 1; + HASH->DIN = *(word32*)stmCtx->buffer; + + stmCtx->loLen += STM32_HASH_REG_SIZE; + stmCtx->buffLen = 0; + } + } + + if (wroteToFifo) { + /* save hash state for next operation */ + wc_Stm32_Hash_SaveContext(stmCtx); + } + + /* turn off hash clock */ + STM32_HASH_CLOCK_DISABLE(stmCtx); + + return ret; +} + +int wc_Stm32_Hash_Final(STM32_HASH_Context* stmCtx, word32 algo, + byte* hash, int digestSize) +{ + int ret = 0; + word32 nbvalidbitsdata = 0; + + /* turn on hash clock */ + STM32_HASH_CLOCK_ENABLE(stmCtx); + + /* restore hash state */ + wc_Stm32_Hash_RestoreContext(stmCtx); + + /* finish reading any trailing bytes into FIFO */ + if (stmCtx->buffLen > 0) { + HASH->DIN = *(word32*)stmCtx->buffer; + stmCtx->loLen += stmCtx->buffLen; + } + + /* calculate number of valid bits in last word */ + nbvalidbitsdata = 8 * (stmCtx->loLen % STM32_HASH_REG_SIZE); + HASH->STR &= ~HASH_STR_NBW; + HASH->STR |= nbvalidbitsdata; + + /* start hash processor */ + HASH->STR |= HASH_STR_DCAL; + + /* wait for hash done */ + ret = wc_Stm32_Hash_WaitDone(); + if (ret == 0) { + /* read message digest */ + wc_Stm32_Hash_GetDigest(hash, digestSize); + } + + /* turn off hash clock */ + STM32_HASH_CLOCK_DISABLE(stmCtx); + + return ret; +} + +#endif /* STM32_HASH */ + + +#ifdef STM32_CRYPTO + +#ifndef NO_AES +#ifdef WOLFSSL_STM32_CUBEMX +int wc_Stm32_Aes_Init(Aes* aes, CRYP_HandleTypeDef* hcryp) +{ + int ret; + word32 keySize; + + ret = wc_AesGetKeySize(aes, &keySize); + if (ret != 0) + return ret; + + XMEMSET(hcryp, 0, sizeof(CRYP_HandleTypeDef)); + switch (keySize) { + case 16: /* 128-bit key */ + hcryp->Init.KeySize = CRYP_KEYSIZE_128B; + break; + #ifdef CRYP_KEYSIZE_192B + case 24: /* 192-bit key */ + hcryp->Init.KeySize = CRYP_KEYSIZE_192B; + break; + #endif + case 32: /* 256-bit key */ + hcryp->Init.KeySize = CRYP_KEYSIZE_256B; + break; + default: + break; + } + hcryp->Instance = CRYP; + hcryp->Init.DataType = CRYP_DATATYPE_8B; + hcryp->Init.pKey = (STM_CRYPT_TYPE*)aes->key; +#ifdef STM32_HAL_V2 + hcryp->Init.DataWidthUnit = CRYP_DATAWIDTHUNIT_BYTE; +#endif + + return 0; +} + +#else /* STD_PERI_LIB */ + +int wc_Stm32_Aes_Init(Aes* aes, CRYP_InitTypeDef* cryptInit, + CRYP_KeyInitTypeDef* keyInit) +{ + int ret; + word32 keySize; + word32* aes_key; + + ret = wc_AesGetKeySize(aes, &keySize); + if (ret != 0) + return ret; + + aes_key = aes->key; + + /* crypto structure initialization */ + CRYP_KeyStructInit(keyInit); + CRYP_StructInit(cryptInit); + + /* load key into correct registers */ + switch (keySize) { + case 16: /* 128-bit key */ + cryptInit->CRYP_KeySize = CRYP_KeySize_128b; + keyInit->CRYP_Key2Left = aes_key[0]; + keyInit->CRYP_Key2Right = aes_key[1]; + keyInit->CRYP_Key3Left = aes_key[2]; + keyInit->CRYP_Key3Right = aes_key[3]; + break; + + case 24: /* 192-bit key */ + cryptInit->CRYP_KeySize = CRYP_KeySize_192b; + keyInit->CRYP_Key1Left = aes_key[0]; + keyInit->CRYP_Key1Right = aes_key[1]; + keyInit->CRYP_Key2Left = aes_key[2]; + keyInit->CRYP_Key2Right = aes_key[3]; + keyInit->CRYP_Key3Left = aes_key[4]; + keyInit->CRYP_Key3Right = aes_key[5]; + break; + + case 32: /* 256-bit key */ + cryptInit->CRYP_KeySize = CRYP_KeySize_256b; + keyInit->CRYP_Key0Left = aes_key[0]; + keyInit->CRYP_Key0Right = aes_key[1]; + keyInit->CRYP_Key1Left = aes_key[2]; + keyInit->CRYP_Key1Right = aes_key[3]; + keyInit->CRYP_Key2Left = aes_key[4]; + keyInit->CRYP_Key2Right = aes_key[5]; + keyInit->CRYP_Key3Left = aes_key[6]; + keyInit->CRYP_Key3Right = aes_key[7]; + break; + + default: + break; + } + cryptInit->CRYP_DataType = CRYP_DataType_8b; + + return 0; +} +#endif /* WOLFSSL_STM32_CUBEMX */ +#endif /* !NO_AES */ +#endif /* STM32_CRYPTO */ + +#ifdef WOLFSSL_STM32_PKA +#include +#include +#include + +extern PKA_HandleTypeDef hpka; + +/* Reverse array in memory (in place) */ +#ifdef HAVE_ECC +#include + +/* convert from mp_int to STM32 PKA HAL integer, as array of bytes of size sz. + * if mp_int has less bytes than sz, add zero bytes at most significant byte positions. + * This is when for example modulus is 32 bytes (P-256 curve) + * and mp_int has only 31 bytes, we add leading zeros + * so that result array has 32 bytes, same as modulus (sz). + */ +static int stm32_get_from_mp_int(uint8_t *dst, mp_int *a, int sz) +{ + int res; + int szbin; + int offset; + + if (!a || !dst || (sz < 0)) + return -1; + + /* check how many bytes are in the mp_int */ + szbin = mp_unsigned_bin_size(a); + if ((szbin < 0) || (szbin > sz)) + return -1; + + /* compute offset from dst */ + offset = sz - szbin; + if (offset < 0) + offset = 0; + if (offset > sz) + offset = sz; + + /* add leading zeroes */ + if (offset) + XMEMSET(dst, 0, offset); + + /* convert mp_int to array of bytes */ + res = mp_to_unsigned_bin(a, dst + offset); + return res; +} + +/* ECC specs in lsbyte at lowest address format for direct use by STM32_PKA PKHA driver functions */ +#if defined(HAVE_ECC192) || defined(HAVE_ALL_CURVES) +#define ECC192 +#endif +#if defined(HAVE_ECC224) || defined(HAVE_ALL_CURVES) +#define ECC224 +#endif +#if !defined(NO_ECC256) || defined(HAVE_ALL_CURVES) +#define ECC256 +#endif +#if defined(HAVE_ECC384) || defined(HAVE_ALL_CURVES) +#define ECC384 +#endif + +/* STM32 PKA supports up to 640bit numbers */ +#define STM32_MAX_ECC_SIZE (80) + + +/* P-192 */ +#ifdef ECC192 +#define ECC192_KEYSIZE (24) +static const uint8_t stm32_ecc192_prime[ECC192_KEYSIZE] = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; +static const uint32_t stm32_ecc192_coef_sign = 1U; + +static const uint8_t stm32_ecc192_coef[ECC192_KEYSIZE] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03 +}; + +static const uint8_t stm32_ecc192_pointX[ECC192_KEYSIZE] = { + 0x18, 0x8D, 0xA8, 0x0E, 0xB0, 0x30, 0x90, 0xF6, + 0x7C, 0xBF, 0x20, 0xEB, 0x43, 0xA1, 0x88, 0x00, + 0xF4, 0xFF, 0x0A, 0xFD, 0x82, 0xFF, 0x10, 0x12 +}; + +const uint8_t stm32_ecc192_pointY[ECC192_KEYSIZE] = { + 0x07, 0x19, 0x2B, 0x95, 0xFF, 0xC8, 0xDA, 0x78, + 0x63, 0x10, 0x11, 0xED, 0x6B, 0x24, 0xCD, 0xD5, + 0x73, 0xF9, 0x77, 0xA1, 0x1E, 0x79, 0x48, 0x11 +}; + +const uint8_t stm32_ecc192_order[ECC192_KEYSIZE] = { + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x99, 0xDE, 0xF8, 0x36, + 0x14, 0x6B, 0xC9, 0xB1, 0xB4, 0xD2, 0x28, 0x31 +}; +const uint32_t stm32_ecc192_cofactor = 1U; + +#endif /* ECC192 */ + +/* P-224 */ +#ifdef ECC224 +#define ECC224_KEYSIZE (28) +static const uint8_t stm32_ecc224_prime[ECC224_KEYSIZE] = { + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01 +}; +static const uint32_t stm32_ecc224_coef_sign = 1U; + +static const uint8_t stm32_ecc224_coef[ECC224_KEYSIZE] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x03 +}; + +static const uint8_t stm32_ecc224_pointX[ECC224_KEYSIZE] = { + 0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, + 0x32, 0x13, 0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, + 0x56, 0xC2, 0x11, 0x22, 0x34, 0x32, 0x80, 0xD6, + 0x11, 0x5C, 0x1D, 0x21 +}; + +const uint8_t stm32_ecc224_pointY[ECC224_KEYSIZE] = { + 0xBD, 0x37, 0x63, 0x88, 0xB5, 0xF7, 0x23, 0xFB, + 0x4C, 0x22, 0xDF, 0xE6, 0xCD, 0x43, 0x75, 0xA0, + 0x5A, 0x07, 0x47, 0x64, 0x44, 0xD5, 0x81, 0x99, + 0x85, 0x00, 0x7E, 0x34 +}; + +const uint8_t stm32_ecc224_order[ECC224_KEYSIZE] = { +}; +const uint32_t stm32_ecc224_cofactor = 1U; + +#endif /* ECC224 */ + +/* P-256 */ +#ifdef ECC256 +#define ECC256_KEYSIZE (32) + +static const uint8_t stm32_ecc256_prime[ECC256_KEYSIZE] = { + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; +static const uint32_t stm32_ecc256_coef_sign = 1U; + +static const uint8_t stm32_ecc256_coef[ECC256_KEYSIZE] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03 +}; + +static const uint8_t stm32_ecc256_pointX[ECC256_KEYSIZE] = { + 0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, + 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2, + 0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0, + 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96 +}; + +const uint8_t stm32_ecc256_pointY[ECC256_KEYSIZE] = { + 0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, + 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16, + 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce, + 0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5 +}; + +const uint8_t stm32_ecc256_order[ECC256_KEYSIZE] = { + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xbc, 0xe6, 0xfa, 0xad, 0xa7, 0x17, 0x9e, 0x84, + 0xf3, 0xb9, 0xca, 0xc2, 0xfc, 0x63, 0x25, 0x51 +}; +const uint32_t stm32_ecc256_cofactor = 1U; + +#endif /* ECC256 */ + +/* P-384 */ +#ifdef ECC384 +#define ECC384_KEYSIZE (48) + +static const uint8_t stm32_ecc384_prime[ECC384_KEYSIZE] = { + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF +}; +static const uint32_t stm32_ecc384_coef_sign = 1U; + +static const uint8_t stm32_ecc384_coef[ECC384_KEYSIZE] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03 +}; + +static const uint8_t stm32_ecc384_pointX[ECC384_KEYSIZE] = { + 0xAA, 0x87, 0xCA, 0x22, 0xBE, 0x8B, 0x05, 0x37, + 0x8E, 0xB1, 0xC7, 0x1E, 0xF3, 0x20, 0xAD, 0x74, + 0x6E, 0x1D, 0x3B, 0x62, 0x8B, 0xA7, 0x9B, 0x98, + 0x59, 0xF7, 0x41, 0xE0, 0x82, 0x54, 0x2A, 0x38, + 0x55, 0x02, 0xF2, 0x5D, 0xBF, 0x55, 0x29, 0x6C, + 0x3A, 0x54, 0x5E, 0x38, 0x72, 0x76, 0x0A, 0xB7, +}; + +const uint8_t stm32_ecc384_pointY[ECC384_KEYSIZE] = { + 0x36, 0x17, 0xDE, 0x4A, 0x96, 0x26, 0x2C, 0x6F, + 0x5D, 0x9E, 0x98, 0xBF, 0x92, 0x92, 0xDC, 0x29, + 0xF8, 0xF4, 0x1D, 0xBD, 0x28, 0x9A, 0x14, 0x7C, + 0xE9, 0xDA, 0x31, 0x13, 0xB5, 0xF0, 0xB8, 0xC0, + 0x0A, 0x60, 0xB1, 0xCE, 0x1D, 0x7E, 0x81, 0x9D, + 0x7A, 0x43, 0x1D, 0x7C, 0x90, 0xEA, 0x0E, 0x5F, +}; + +const uint8_t stm32_ecc384_order[ECC384_KEYSIZE] = { +}; +const uint32_t stm32_ecc384_cofactor = 1U; + +#endif /* ECC384 */ +static int stm32_get_ecc_specs(const uint8_t **prime, const uint8_t **coef, + const uint32_t **coef_sign, const uint8_t **GenPointX, const uint8_t **GenPointY, + const uint8_t **order, int size) +{ + switch(size) { + case 32: + *prime = stm32_ecc256_prime; + *coef = stm32_ecc256_coef; + *GenPointX = stm32_ecc256_pointX; + *GenPointY = stm32_ecc256_pointY; + *coef_sign = &stm32_ecc256_coef_sign; + *order = stm32_ecc256_order; + break; +#ifdef ECC224 + case 28: + *prime = stm32_ecc224_prime; + *coef = stm32_ecc224_coef; + *GenPointX = stm32_ecc224_pointX; + *GenPointY = stm32_ecc224_pointY; + *coef_sign = &stm32_ecc224_coef; + break; +#endif +#ifdef ECC192 + case 24: + *prime = stm32_ecc192_prime; + *coef = stm32_ecc192_coef; + *GenPointX = stm32_ecc192_pointX; + *GenPointY = stm32_ecc192_pointY; + *coef_sign = &stm32_ecc192_coef; + break; +#endif +#ifdef ECC384 + case 48: + *prime = stm32_ecc384_prime; + *coef = stm32_ecc384_coef; + *GenPointX = stm32_ecc384_pointX; + *GenPointY = stm32_ecc384_pointY; + *coef_sign = &stm32_ecc384_coef; + break; +#endif + default: + return -1; + } + return 0; +} + + +/** + Perform a point multiplication (timing resistant) + k The scalar to multiply by + G The base point + R [out] Destination for kG + modulus The modulus of the field the ECC curve is in + map Boolean whether to map back to affine or not + (1==map, 0 == leave in projective) + return MP_OKAY on success +*/ +int wc_ecc_mulmod_ex(mp_int *k, ecc_point *G, ecc_point *R, mp_int* a, + mp_int *modulus, int map, void* heap) +{ + PKA_ECCMulInTypeDef pka_mul; + PKA_ECCMulOutTypeDef pka_mul_res; + uint8_t size; + int szModulus; + int szkbin; + int status; + int res; + uint8_t Gxbin[STM32_MAX_ECC_SIZE]; + uint8_t Gybin[STM32_MAX_ECC_SIZE]; + uint8_t kbin[STM32_MAX_ECC_SIZE]; + uint8_t PtXbin[STM32_MAX_ECC_SIZE]; + uint8_t PtYbin[STM32_MAX_ECC_SIZE]; + const uint8_t *prime, *coef, *gen_x, *gen_y, *order; + const uint32_t *coef_sign; + (void)a; + (void)heap; + XMEMSET(&pka_mul, 0x00, sizeof(PKA_ECCMulInTypeDef)); + XMEMSET(&pka_mul_res, 0x00, sizeof(PKA_ECCMulOutTypeDef)); + pka_mul_res.ptX = PtXbin; + pka_mul_res.ptY = PtYbin; + + if (k == NULL || G == NULL || R == NULL || modulus == NULL) { + return ECC_BAD_ARG_E; + } + + szModulus = mp_unsigned_bin_size(modulus); + szkbin = mp_unsigned_bin_size(k); + + res = stm32_get_from_mp_int(kbin, k, szkbin); + if (res == MP_OKAY) + res = stm32_get_from_mp_int(Gxbin, G->x, szModulus); + if (res == MP_OKAY) + res = stm32_get_from_mp_int(Gybin, G->y, szModulus); + + if (res != MP_OKAY) + return res; + + size = (uint8_t)szModulus; + /* find STM32_PKA friendly parameters for the selected curve */ + if (0 != stm32_get_ecc_specs(&prime, &coef, &coef_sign, &gen_x, &gen_y, &order, size)) { + return ECC_BAD_ARG_E; + } + (void)order; + + pka_mul.modulusSize = szModulus; + pka_mul.coefSign = *coef_sign; + pka_mul.coefA = coef; + pka_mul.modulus = prime; + pka_mul.pointX = Gxbin; + pka_mul.pointY = Gybin; + pka_mul.scalarMulSize = size; + pka_mul.scalarMul = kbin; + + status = HAL_PKA_ECCMul(&hpka, &pka_mul, HAL_MAX_DELAY); + if (status != HAL_OK) { + return WC_HW_E; + } + pka_mul_res.ptX = Gxbin; + pka_mul_res.ptY = Gybin; + HAL_PKA_ECCMul_GetResult(&hpka, &pka_mul_res); + res = mp_read_unsigned_bin(R->x, Gxbin, size); + if (res == MP_OKAY) { + res = mp_read_unsigned_bin(R->y, Gybin, size); +#ifndef WOLFSSL_SP_MATH + /* if k is negative, we compute the multiplication with abs(-k) + * with result (x, y) and modify the result to (x, -y) + */ + R->y->sign = k->sign; +#endif + } + if (res == MP_OKAY) + res = mp_set(R->z, 1); + HAL_PKA_RAMReset(&hpka); + return res; +} + +int stm32_ecc_verify_hash_ex(mp_int *r, mp_int *s, const byte* hash, + word32 hashlen, int* res, ecc_key* key) +{ + PKA_ECDSAVerifInTypeDef pka_ecc; + uint8_t size; + int szModulus; + int szrbin; + int status; + uint8_t Rbin[STM32_MAX_ECC_SIZE]; + uint8_t Sbin[STM32_MAX_ECC_SIZE]; + uint8_t Qxbin[STM32_MAX_ECC_SIZE]; + uint8_t Qybin[STM32_MAX_ECC_SIZE]; + uint8_t Hashbin[STM32_MAX_ECC_SIZE]; + uint8_t privKeybin[STM32_MAX_ECC_SIZE]; + const uint8_t *prime, *coef, *gen_x, *gen_y, *order; + const uint32_t *coef_sign; + XMEMSET(&pka_ecc, 0x00, sizeof(PKA_ECDSAVerifInTypeDef)); + + if (r == NULL || s == NULL || hash == NULL || res == NULL || key == NULL) { + return ECC_BAD_ARG_E; + } + *res = 0; + + szModulus = mp_unsigned_bin_size(key->pubkey.x); + szrbin = mp_unsigned_bin_size(r); + + status = stm32_get_from_mp_int(Rbin, r, szrbin); + if (status == MP_OKAY) + status = stm32_get_from_mp_int(Sbin, s, szrbin); + if (status == MP_OKAY) + status = stm32_get_from_mp_int(Qxbin, key->pubkey.x, szModulus); + if (status == MP_OKAY) + status = stm32_get_from_mp_int(Qybin, key->pubkey.y, szModulus); + if (status == MP_OKAY) + status = stm32_get_from_mp_int(privKeybin, &key->k, szModulus); + if (status != MP_OKAY) + return status; + + size = (uint8_t)szModulus; + /* find parameters for the selected curve */ + if (0 != stm32_get_ecc_specs(&prime, &coef, &coef_sign, &gen_x, &gen_y, &order, size)) { + return ECC_BAD_ARG_E; + } + + + pka_ecc.primeOrderSize = size; + pka_ecc.modulusSize = size; + pka_ecc.coefSign = *coef_sign; + pka_ecc.coef = coef; + pka_ecc.modulus = prime; + pka_ecc.basePointX = gen_x; + pka_ecc.basePointY = gen_y; + pka_ecc.primeOrder = order; + + pka_ecc.pPubKeyCurvePtX = Qxbin; + pka_ecc.pPubKeyCurvePtY = Qybin; + pka_ecc.RSign = Rbin; + pka_ecc.SSign = Sbin; + XMEMSET(Hashbin, 0, STM32_MAX_ECC_SIZE); + XMEMCPY(Hashbin + (size - hashlen), hash, hashlen); + pka_ecc.hash = Hashbin; + + status = HAL_PKA_ECDSAVerif(&hpka, &pka_ecc, HAL_MAX_DELAY); + if (status != HAL_OK) { + HAL_PKA_RAMReset(&hpka); + return WC_HW_E; + } + *res = HAL_PKA_ECDSAVerif_IsValidSignature(&hpka); + HAL_PKA_RAMReset(&hpka); + return status; +} + +int stm32_ecc_sign_hash_ex(const byte* hash, word32 hashlen, WC_RNG* rng, + ecc_key* key, mp_int *r, mp_int *s) +{ + PKA_ECDSASignInTypeDef pka_ecc; + PKA_ECDSASignOutTypeDef pka_ecc_out; + int size; + int status; + mp_int gen_k; + mp_int order_mp; + uint8_t Keybin[STM32_MAX_ECC_SIZE]; + uint8_t Intbin[STM32_MAX_ECC_SIZE]; + uint8_t Rbin[STM32_MAX_ECC_SIZE]; + uint8_t Sbin[STM32_MAX_ECC_SIZE]; + uint8_t Hashbin[STM32_MAX_ECC_SIZE]; + const uint8_t *prime, *coef, *gen_x, *gen_y, *order; + const uint32_t *coef_sign; + XMEMSET(&pka_ecc, 0x00, sizeof(PKA_ECDSASignInTypeDef)); + XMEMSET(&pka_ecc, 0x00, sizeof(PKA_ECDSASignOutTypeDef)); + + if (r == NULL || s == NULL || hash == NULL || key == NULL) { + return ECC_BAD_ARG_E; + } + + mp_init(&gen_k); + mp_init(&order_mp); + + size = mp_unsigned_bin_size(key->pubkey.x); + + status = stm32_get_from_mp_int(Keybin, &key->k, size); + if (status != MP_OKAY) + return status; + + /* find parameters for the selected curve */ + if (0 != stm32_get_ecc_specs(&prime, &coef, &coef_sign, &gen_x, &gen_y, &order, size)) { + return ECC_BAD_ARG_E; + } + + status = mp_read_unsigned_bin(&order_mp, order, size); + if (status == MP_OKAY) + status = wc_ecc_gen_k(rng, size, &gen_k, &order_mp); + if (status == MP_OKAY) + status = stm32_get_from_mp_int(Intbin, &gen_k, size); + if (status != MP_OKAY) + return status; + + pka_ecc.primeOrderSize = size; + pka_ecc.modulusSize = size; + pka_ecc.coefSign = *coef_sign; + pka_ecc.coef = coef; + pka_ecc.modulus = prime; + pka_ecc.basePointX = gen_x; + pka_ecc.basePointY = gen_y; + pka_ecc.primeOrder = order; + + XMEMSET(Hashbin, 0, STM32_MAX_ECC_SIZE); + XMEMCPY(Hashbin + (size - hashlen), hash, hashlen); + pka_ecc.hash = Hashbin; + pka_ecc.integer = Intbin; + pka_ecc.privateKey = Keybin; + + /* Assign R, S static buffers */ + pka_ecc_out.RSign = Rbin; + pka_ecc_out.SSign = Sbin; + + status = HAL_PKA_ECDSASign(&hpka, &pka_ecc, HAL_MAX_DELAY); + if (status != HAL_OK) { + HAL_PKA_RAMReset(&hpka); + return WC_HW_E; + } + HAL_PKA_ECDSASign_GetResult(&hpka, &pka_ecc_out, NULL); + status = mp_read_unsigned_bin(r, pka_ecc_out.RSign, size); + if (status == MP_OKAY) + status = mp_read_unsigned_bin(s, pka_ecc_out.SSign, size); + HAL_PKA_RAMReset(&hpka); + return status; +} + +#endif /* HAVE_ECC */ +#endif /* WOLFSSL_STM32_PKA */ diff --git a/client/wolfssl/wolfcrypt/src/port/st/stsafe.c b/client/wolfssl/wolfcrypt/src/port/st/stsafe.c new file mode 100644 index 0000000..239b159 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/st/stsafe.c @@ -0,0 +1,566 @@ +/* stsafe.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#include +#include + +#ifndef STSAFE_INTERFACE_PRINTF +#define STSAFE_INTERFACE_PRINTF(...) +#endif + +#ifdef WOLFSSL_STSAFEA100 + +int SSL_STSAFE_LoadDeviceCertificate(byte** pRawCertificate, + word32* pRawCertificateLen) +{ + int err; + + if (pRawCertificate == NULL || pRawCertificateLen == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef USE_STSAFE_VERBOSE + WOLFSSL_MSG("SSL_STSAFE_LoadDeviceCertificate"); +#endif + + /* Try reading device certificate from ST-SAFE Zone 0 */ + err = stsafe_interface_read_device_certificate_raw( + pRawCertificate, (uint32_t*)pRawCertificateLen); + if (err == STSAFE_A_OK) { + #if 0 + /* example for loading into WOLFSSL_CTX */ + err = wolfSSL_CTX_use_certificate_buffer(ctx, + *pRawCertificate, *pRawCertificateLen, SSL_FILETYPE_ASN1); + if (err != WOLFSSL_SUCCESS) { + /* failed */ + } + /* can free now */ + XFREE(*pRawCertificate, NULL, DYNAMIC_TEMP_BUFFER); + *pRawCertificate = NULL; + #endif + } + else { + err = WC_HW_E; + } + + return err; +} + +#ifdef HAVE_PK_CALLBACKS + +/** + * \brief Key Gen Callback (used by TLS server) + */ +int SSL_STSAFE_CreateKeyCb(WOLFSSL* ssl, ecc_key* key, word32 keySz, + int ecc_curve, void* ctx) +{ + int err; + byte pubKeyRaw[STSAFE_MAX_PUBKEY_RAW_LEN]; + StSafeA_KeySlotNumber slot; + StSafeA_CurveId curve_id; + + (void)ssl; + (void)ctx; + +#ifdef USE_STSAFE_VERBOSE + WOLFSSL_MSG("CreateKeyCb: STSAFE"); +#endif + + /* get curve */ + curve_id = stsafe_get_ecc_curve_id(ecc_curve); + + /* generate new ephemeral key on device */ + err = stsafe_interface_create_key(&slot, curve_id, (uint8_t*)&pubKeyRaw[0]); + if (err != STSAFE_A_OK) { + #ifdef USE_STSAFE_VERBOSE + STSAFE_INTERFACE_PRINTF("stsafe_interface_create_key error: %d\n", err); + #endif + err = WC_HW_E; + return err; + } + + /* load generated public key into key, used by wolfSSL */ + err = wc_ecc_import_unsigned(key, &pubKeyRaw[0], &pubKeyRaw[keySz], + NULL, ecc_curve); + + return err; +} + +/** + * \brief Verify Peer Cert Callback. + */ +int SSL_STSAFE_VerifyPeerCertCb(WOLFSSL* ssl, + const unsigned char* sig, unsigned int sigSz, + const unsigned char* hash, unsigned int hashSz, + const unsigned char* keyDer, unsigned int keySz, + int* result, void* ctx) +{ + int err; + byte sigRS[STSAFE_MAX_SIG_LEN]; + byte *r, *s; + word32 r_len = STSAFE_MAX_SIG_LEN/2, s_len = STSAFE_MAX_SIG_LEN/2; + byte pubKeyX[STSAFE_MAX_PUBKEY_RAW_LEN/2]; + byte pubKeyY[STSAFE_MAX_PUBKEY_RAW_LEN/2]; + word32 pubKeyX_len = sizeof(pubKeyX); + word32 pubKeyY_len = sizeof(pubKeyY); + ecc_key key; + word32 inOutIdx = 0; + StSafeA_CurveId curve_id; + int ecc_curve; + + (void)ssl; + (void)ctx; + +#ifdef USE_STSAFE_VERBOSE + WOLFSSL_MSG("VerifyPeerCertCB: STSAFE"); +#endif + + err = wc_ecc_init(&key); + if (err != 0) { + return err; + } + + /* Decode the public key */ + err = wc_EccPublicKeyDecode(keyDer, &inOutIdx, &key, keySz); + if (err == 0) { + /* Extract Raw X and Y coordinates of the public key */ + err = wc_ecc_export_public_raw(&key, pubKeyX, &pubKeyX_len, + pubKeyY, &pubKeyY_len); + } + if (err == 0) { + int key_sz; + + /* determine curve */ + ecc_curve = key.dp->id; + curve_id = stsafe_get_ecc_curve_id(ecc_curve); + key_sz = stsafe_get_key_size(curve_id); + + /* Extract R and S from signature */ + XMEMSET(sigRS, 0, sizeof(sigRS)); + r = &sigRS[0]; + s = &sigRS[key_sz]; + err = wc_ecc_sig_to_rs(sig, sigSz, r, &r_len, s, &s_len); + (void)r_len; + (void)s_len; + } + + if (err == 0) { + /* Verify signature */ + err = stsafe_interface_verify(curve_id, (uint8_t*)hash, sigRS, + pubKeyX, pubKeyY, (int32_t*)result); + if (err != STSAFE_A_OK) { + #ifdef USE_STSAFE_VERBOSE + STSAFE_INTERFACE_PRINTF("stsafe_interface_verify error: %d\n", err); + #endif + err = WC_HW_E; + } + } + + wc_ecc_free(&key); + return err; +} + +/** + * \brief Sign Certificate Callback. + */ +int SSL_STSAFE_SignCertificateCb(WOLFSSL* ssl, const byte* in, + word32 inSz, byte* out, word32* outSz, + const byte* key, word32 keySz, void* ctx) +{ + int err; + byte digest[STSAFE_MAX_KEY_LEN]; + byte sigRS[STSAFE_MAX_SIG_LEN]; + byte *r, *s; + StSafeA_CurveId curve_id; + int key_sz; + + (void)ssl; + (void)ctx; + +#ifdef USE_STSAFE_VERBOSE + WOLFSSL_MSG("SignCertificateCb: STSAFE"); +#endif + + curve_id = stsafe_get_curve_mode(); + key_sz = stsafe_get_key_size(curve_id); + + /* Build input digest */ + if (inSz > key_sz) + inSz = key_sz; + XMEMSET(&digest[0], 0, sizeof(digest)); + XMEMCPY(&digest[key_sz - inSz], in, inSz); + + /* Sign using slot 0: Result is R then S */ + /* Sign will always use the curve type in slot 0 (the TLS curve needs to match) */ + XMEMSET(sigRS, 0, sizeof(sigRS)); + err = stsafe_interface_sign(STSAFE_A_SLOT_0, curve_id, digest, sigRS); + if (err != STSAFE_A_OK) { + #ifdef USE_STSAFE_VERBOSE + STSAFE_INTERFACE_PRINTF("stsafe_interface_sign error: %d\n", err); + #endif + err = WC_HW_E; + return err; + } + + /* Convert R and S to signature */ + r = &sigRS[0]; + s = &sigRS[key_sz]; + err = wc_ecc_rs_raw_to_sig((const byte*)r, key_sz, (const byte*)s, key_sz, + out, outSz); + if (err != 0) { + #ifdef USE_STSAFE_VERBOSE + WOLFSSL_MSG("Error converting RS to Signature"); + #endif + } + + return err; +} + + +/** + * \brief Create pre master secret using peer's public key and self private key. + */ +int SSL_STSAFE_SharedSecretCb(WOLFSSL* ssl, ecc_key* otherKey, + unsigned char* pubKeyDer, unsigned int* pubKeySz, + unsigned char* out, unsigned int* outlen, + int side, void* ctx) +{ + int err; + byte otherKeyX[STSAFE_MAX_KEY_LEN]; + byte otherKeyY[STSAFE_MAX_KEY_LEN]; + word32 otherKeyX_len = sizeof(otherKeyX); + word32 otherKeyY_len = sizeof(otherKeyY); + byte pubKeyRaw[STSAFE_MAX_PUBKEY_RAW_LEN]; + StSafeA_KeySlotNumber slot; + StSafeA_CurveId curve_id; + ecc_key tmpKey; + int ecc_curve; + int key_sz; + + (void)ssl; + (void)ctx; + +#ifdef USE_STSAFE_VERBOSE + WOLFSSL_MSG("SharedSecretCb: STSAFE"); +#endif + + err = wc_ecc_init(&tmpKey); + if (err != 0) { + return err; + } + + /* set curve */ + ecc_curve = otherKey->dp->id; + curve_id = stsafe_get_ecc_curve_id(ecc_curve); + key_sz = stsafe_get_key_size(curve_id); + + /* for client: create and export public key */ + if (side == WOLFSSL_CLIENT_END) { + /* Export otherKey raw X and Y */ + err = wc_ecc_export_public_raw(otherKey, + &otherKeyX[0], (word32*)&otherKeyX_len, + &otherKeyY[0], (word32*)&otherKeyY_len); + if (err != 0) { + return err; + } + + err = stsafe_interface_create_key(&slot, curve_id, (uint8_t*)&pubKeyRaw[0]); + if (err != STSAFE_A_OK) { + #ifdef USE_STSAFE_VERBOSE + STSAFE_INTERFACE_PRINTF("stsafe_interface_create_key error: %d\n", err); + #endif + err = WC_HW_E; + return err; + } + + /* convert raw unsigned public key to X.963 format for TLS */ + err = wc_ecc_init(&tmpKey); + if (err == 0) { + err = wc_ecc_import_unsigned(&tmpKey, &pubKeyRaw[0], &pubKeyRaw[key_sz], + NULL, ecc_curve); + if (err == 0) { + err = wc_ecc_export_x963(&tmpKey, pubKeyDer, pubKeySz); + } + wc_ecc_free(&tmpKey); + } + } + /* for server: import public key */ + else if (side == WOLFSSL_SERVER_END) { + /* import peer's key and export as raw unsigned for hardware */ + err = wc_ecc_import_x963_ex(pubKeyDer, *pubKeySz, &tmpKey, ecc_curve); + if (err == 0) { + err = wc_ecc_export_public_raw(&tmpKey, otherKeyX, &otherKeyX_len, + otherKeyY, &otherKeyY_len); + } + } + else { + err = BAD_FUNC_ARG; + } + + wc_ecc_free(&tmpKey); + + if (err != 0) { + return err; + } + + /* Compute shared secret */ + err = stsafe_interface_shared_secret(curve_id, &otherKeyX[0], &otherKeyY[0], + out, (int32_t*)outlen); + if (err != STSAFE_A_OK) { + #ifdef USE_STSAFE_VERBOSE + STSAFE_INTERFACE_PRINTF("stsafe_interface_shared_secret error: %d\n", err); + #endif + err = WC_HW_E; + } + + return err; +} + +int SSL_STSAFE_SetupPkCallbacks(WOLFSSL_CTX* ctx) +{ + wolfSSL_CTX_SetEccKeyGenCb(ctx, SSL_STSAFE_CreateKeyCb); + wolfSSL_CTX_SetEccSignCb(ctx, SSL_STSAFE_SignCertificateCb); + wolfSSL_CTX_SetEccVerifyCb(ctx, SSL_STSAFE_VerifyPeerCertCb); + wolfSSL_CTX_SetEccSharedSecretCb(ctx, SSL_STSAFE_SharedSecretCb); + wolfSSL_CTX_SetDevId(ctx, 0); /* enables wolfCrypt `wc_ecc_*` ST-Safe use */ + return 0; +} + +int SSL_STSAFE_SetupPkCallbackCtx(WOLFSSL* ssl, void* user_ctx) +{ + wolfSSL_SetEccKeyGenCtx(ssl, user_ctx); + wolfSSL_SetEccSharedSecretCtx(ssl, user_ctx); + wolfSSL_SetEccSignCtx(ssl, user_ctx); + wolfSSL_SetEccVerifyCtx(ssl, user_ctx); + return 0; +} + + +#endif /* HAVE_PK_CALLBACKS */ + +#ifdef WOLF_CRYPTO_CB + +int wolfSSL_STSAFE_CryptoDevCb(int devId, wc_CryptoInfo* info, void* ctx) +{ + int rc = CRYPTOCB_UNAVAILABLE; + wolfSTSAFE_CryptoCb_Ctx* stsCtx = (wolfSTSAFE_CryptoCb_Ctx*)ctx; + + if (info == NULL || ctx == NULL) + return BAD_FUNC_ARG; + + (void)devId; + (void)stsCtx; + + if (info->algo_type == WC_ALGO_TYPE_SEED) { + /* use the STSAFE hardware for RNG seed */ + #if !defined(WC_NO_RNG) && defined(USE_STSAFE_RNG_SEED) + while (info->seed.sz > 0) { + rc = stsafe_interface_getrandom(info->seed.seed, info->seed.sz); + if (rc < 0) { + return rc; + } + info->seed.seed += rc; + info->seed.sz -= rc; + } + rc = 0; + #else + rc = CRYPTOCB_UNAVAILABLE; + #endif + } +#ifdef HAVE_ECC + else if (info->algo_type == WC_ALGO_TYPE_PK) { + #ifdef USE_STSAFE_VERBOSE + STSAFE_INTERFACE_PRINTF("STSAFE Pk: Type %d\n", info->pk.type); + #endif + + if (info->pk.type == WC_PK_TYPE_EC_KEYGEN) { + byte pubKeyRaw[STSAFE_MAX_PUBKEY_RAW_LEN]; + StSafeA_KeySlotNumber slot; + StSafeA_CurveId curve_id; + int ecc_curve, key_sz; + + WOLFSSL_MSG("STSAFE: ECC KeyGen"); + + /* get curve */ + ecc_curve = info->pk.eckg.curveId; + curve_id = stsafe_get_ecc_curve_id(ecc_curve); + key_sz = stsafe_get_key_size(curve_id); + + /* generate new ephemeral key on device */ + rc = stsafe_interface_create_key(&slot, curve_id, + (uint8_t*)pubKeyRaw); + if (rc != STSAFE_A_OK) { + #ifdef USE_STSAFE_VERBOSE + STSAFE_INTERFACE_PRINTF("stsafe_interface_create_key error: %d\n", rc); + #endif + rc = WC_HW_E; + return rc; + } + + /* load generated public key into key, used by wolfSSL */ + rc = wc_ecc_import_unsigned(info->pk.eckg.key, pubKeyRaw, + &pubKeyRaw[key_sz], NULL, ecc_curve); + } + else if (info->pk.type == WC_PK_TYPE_ECDSA_SIGN) { + byte digest[STSAFE_MAX_KEY_LEN]; + byte sigRS[STSAFE_MAX_SIG_LEN]; + byte *r, *s; + StSafeA_CurveId curve_id; + word32 inSz = info->pk.eccsign.inlen; + int key_sz; + + WOLFSSL_MSG("STSAFE: ECC Sign"); + + curve_id = stsafe_get_curve_mode(); + key_sz = stsafe_get_key_size(curve_id); + + /* truncate input to match key size */ + if (inSz > key_sz) + inSz = key_sz; + + /* Build input digest */ + XMEMSET(&digest[0], 0, sizeof(digest)); + XMEMCPY(&digest[key_sz - inSz], info->pk.eccsign.in, inSz); + + /* Sign using slot 0: Result is R then S */ + /* Sign will always use the curve type in slot 0 + (the TLS curve needs to match) */ + XMEMSET(sigRS, 0, sizeof(sigRS)); + rc = stsafe_interface_sign(STSAFE_A_SLOT_0, curve_id, + (uint8_t*)info->pk.eccsign.in, sigRS); + if (rc != STSAFE_A_OK) { + #ifdef USE_STSAFE_VERBOSE + STSAFE_INTERFACE_PRINTF("stsafe_interface_sign error: %d\n", rc); + #endif + rc = WC_HW_E; + return rc; + } + + /* Convert R and S to signature */ + r = &sigRS[0]; + s = &sigRS[key_sz]; + rc = wc_ecc_rs_raw_to_sig((const byte*)r, key_sz, (const byte*)s, + key_sz, info->pk.eccsign.out, info->pk.eccsign.outlen); + if (rc != 0) { + WOLFSSL_MSG("Error converting RS to Signature"); + } + } + else if (info->pk.type == WC_PK_TYPE_ECDSA_VERIFY) { + byte sigRS[STSAFE_MAX_SIG_LEN]; + byte *r, *s; + word32 r_len = STSAFE_MAX_SIG_LEN/2, s_len = STSAFE_MAX_SIG_LEN/2; + byte pubKeyX[STSAFE_MAX_PUBKEY_RAW_LEN/2]; + byte pubKeyY[STSAFE_MAX_PUBKEY_RAW_LEN/2]; + word32 pubKeyX_len = sizeof(pubKeyX); + word32 pubKeyY_len = sizeof(pubKeyY); + StSafeA_CurveId curve_id; + int ecc_curve, key_sz; + + WOLFSSL_MSG("STSAFE: ECC Verify"); + + if (info->pk.eccverify.key == NULL) + return BAD_FUNC_ARG; + + /* determine curve */ + ecc_curve = info->pk.eccverify.key->dp->id; + curve_id = stsafe_get_ecc_curve_id(ecc_curve); + key_sz = stsafe_get_key_size(curve_id); + + /* Extract Raw X and Y coordinates of the public key */ + rc = wc_ecc_export_public_raw(info->pk.eccverify.key, + pubKeyX, &pubKeyX_len, + pubKeyY, &pubKeyY_len); + if (rc == 0) { + /* Extract R and S from signature */ + XMEMSET(sigRS, 0, sizeof(sigRS)); + r = &sigRS[0]; + s = &sigRS[key_sz]; + rc = wc_ecc_sig_to_rs(info->pk.eccverify.sig, + info->pk.eccverify.siglen, r, &r_len, s, &s_len); + (void)r_len; + (void)s_len; + } + if (rc == 0) { + /* Verify signature */ + rc = stsafe_interface_verify(curve_id, + (uint8_t*)info->pk.eccverify.hash, sigRS, pubKeyX, pubKeyY, + (int32_t*)info->pk.eccverify.res); + if (rc != STSAFE_A_OK) { + #ifdef USE_STSAFE_VERBOSE + STSAFE_INTERFACE_PRINTF("stsafe_interface_verify error: %d\n", rc); + #endif + rc = WC_HW_E; + } + } + } + else if (info->pk.type == WC_PK_TYPE_ECDH) { + byte otherKeyX[STSAFE_MAX_KEY_LEN]; + byte otherKeyY[STSAFE_MAX_KEY_LEN]; + word32 otherKeyX_len = sizeof(otherKeyX); + word32 otherKeyY_len = sizeof(otherKeyY); + StSafeA_CurveId curve_id; + int ecc_curve; + + WOLFSSL_MSG("STSAFE: PMS"); + + if (info->pk.ecdh.public_key == NULL) + return BAD_FUNC_ARG; + + /* get curve */ + ecc_curve = info->pk.ecdh.public_key->dp->id; + curve_id = stsafe_get_ecc_curve_id(ecc_curve); + + /* Export otherKey raw X and Y */ + rc = wc_ecc_export_public_raw(info->pk.ecdh.public_key, + &otherKeyX[0], (word32*)&otherKeyX_len, + &otherKeyY[0], (word32*)&otherKeyY_len); + if (rc == 0) { + /* Compute shared secret */ + *info->pk.ecdh.outlen = 0; + rc = stsafe_interface_shared_secret(curve_id, + otherKeyX, otherKeyY, + info->pk.ecdh.out, (int32_t*)info->pk.ecdh.outlen); + if (rc != STSAFE_A_OK) { + #ifdef USE_STSAFE_VERBOSE + STSAFE_INTERFACE_PRINTF("stsafe_interface_shared_secret error: %d\n", rc); + #endif + rc = WC_HW_E; + } + } + } + } +#endif /* HAVE_ECC */ + + /* need to return negative here for error */ + if (rc != 0 && rc != CRYPTOCB_UNAVAILABLE) { + WOLFSSL_MSG("STSAFE: CryptoCb failed"); + #ifdef USE_STSAFE_VERBOSE + STSAFE_INTERFACE_PRINTF("STSAFE: CryptoCb failed %d\n", rc); + #endif + rc = WC_HW_E; + } + + return rc; +} + +#endif /* WOLF_CRYPTO_CB */ + +#endif /* WOLFSSL_STSAFEA100 */ diff --git a/client/wolfssl/wolfcrypt/src/port/ti/ti-aes.c b/client/wolfssl/wolfcrypt/src/port/ti/ti-aes.c new file mode 100644 index 0000000..52f2ceb --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/ti/ti-aes.c @@ -0,0 +1,569 @@ +/* port/ti/ti-aes.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifndef NO_AES + + +#if defined(WOLFSSL_TI_CRYPT) +#include +#include + +#include +#include +#include + +#include "inc/hw_aes.h" +#include "inc/hw_memmap.h" +#include "inc/hw_ints.h" +#include "driverlib/aes.h" +#include "driverlib/sysctl.h" +#include "driverlib/rom_map.h" +#include "driverlib/rom.h" + +static int AesSetIV(Aes* aes, const byte* iv) +{ + if (aes == NULL) + return BAD_FUNC_ARG; + + if (iv) + XMEMCPY(aes->reg, iv, AES_BLOCK_SIZE); + else + XMEMSET(aes->reg, 0, AES_BLOCK_SIZE); + + return 0; +} + +WOLFSSL_API int wc_AesSetKey(Aes* aes, const byte* key, word32 len, const byte* iv, + int dir) +{ + if(!wolfSSL_TI_CCMInit())return 1 ; + if ((aes == NULL) || (key == NULL) || (iv == NULL)) + return BAD_FUNC_ARG; + if(!((dir == AES_ENCRYPTION) || (dir == AES_DECRYPTION))) + return BAD_FUNC_ARG; + + switch(len) { + case 16: aes->keylen = AES_CFG_KEY_SIZE_128BIT ; break ; + case 24: aes->keylen = AES_CFG_KEY_SIZE_192BIT ; break ; + case 32: aes->keylen = AES_CFG_KEY_SIZE_256BIT ; break ; + default: return BAD_FUNC_ARG; + } + + XMEMCPY(aes->key, key, len) ; + #ifdef WOLFSSL_AES_COUNTER + aes->left = 0; + #endif /* WOLFSSL_AES_COUNTER */ + return AesSetIV(aes, iv); +} + +#define AES_CFG_MODE_CTR_NOCTR AES_CFG_MODE_CTR+100 +#define IS_ALIGN16(p) (((unsigned int)(p)&0xf) == 0) + +static int AesAlign16(Aes* aes, byte* out, const byte* in, word32 sz, word32 dir, word32 mode) +{ + wolfSSL_TI_lockCCM() ; + ROM_AESReset(AES_BASE); + ROM_AESConfigSet(AES_BASE, (aes->keylen | dir | + (mode==AES_CFG_MODE_CTR_NOCTR ? AES_CFG_MODE_CTR : mode))); + ROM_AESIVSet(AES_BASE, (uint32_t *)aes->reg); + ROM_AESKey1Set(AES_BASE, (uint32_t *)aes->key, aes->keylen); + if((dir == AES_CFG_DIR_DECRYPT)&& (mode == AES_CFG_MODE_CBC)) + /* if input and output same will overwrite input iv */ + XMEMCPY(aes->tmp, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + ROM_AESDataProcess(AES_BASE, (uint32_t *)in, (uint32_t *)out, sz); + wolfSSL_TI_unlockCCM() ; + + /* store iv for next call */ + if(mode == AES_CFG_MODE_CBC){ + if(dir == AES_CFG_DIR_ENCRYPT) + XMEMCPY(aes->reg, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + else + XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE); + } + + if(mode == AES_CFG_MODE_CTR) { + do { + int i ; + for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) { + if (++((byte *)aes->reg)[i]) + break ; + } + sz -= AES_BLOCK_SIZE ; + } while((int)sz > 0) ; + } + + return 0 ; +} + +static int AesProcess(Aes* aes, byte* out, const byte* in, word32 sz, word32 dir, word32 mode) +{ + const byte * in_p ; byte * out_p ; + word32 size ; + #define TI_BUFFSIZE 1024 + byte buff[TI_BUFFSIZE] ; + + if ((aes == NULL) || (in == NULL) || (out == NULL)) + return BAD_FUNC_ARG; + if(sz % AES_BLOCK_SIZE) + return BAD_FUNC_ARG; + + while(sz > 0) { + size = sz ; in_p = in ; out_p = out ; + if(!IS_ALIGN16(in)){ + size = sz>TI_BUFFSIZE ? TI_BUFFSIZE : sz ; + XMEMCPY(buff, in, size) ; + in_p = (const byte *)buff ; + } + if(!IS_ALIGN16(out)){ + size = sz>TI_BUFFSIZE ? TI_BUFFSIZE : sz ; + out_p = buff ; + } + + AesAlign16(aes, out_p, in_p, size, dir, mode) ; + + if(!IS_ALIGN16(out)){ + XMEMCPY(out, buff, size) ; + } + sz -= size ; in += size ; out += size ; + } + + return 0 ; +} + +WOLFSSL_API int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + return AesProcess(aes, out, in, sz, AES_CFG_DIR_ENCRYPT, AES_CFG_MODE_CBC) ; +} + +WOLFSSL_API int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + return AesProcess(aes, out, in, sz, AES_CFG_DIR_DECRYPT, AES_CFG_MODE_CBC) ; +} + +#ifdef WOLFSSL_AES_COUNTER +WOLFSSL_API void wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + char out_block[AES_BLOCK_SIZE] ; + int odd ; + int even ; + char *tmp ; /* (char *)aes->tmp, for short */ + + tmp = (char *)aes->tmp ; + if(aes->left) { + if((aes->left + sz) >= AES_BLOCK_SIZE){ + odd = AES_BLOCK_SIZE - aes->left ; + } else { + odd = sz ; + } + XMEMCPY(tmp+aes->left, in, odd) ; + if((odd+aes->left) == AES_BLOCK_SIZE){ + AesProcess(aes, (byte *)out_block, (byte const *)tmp, AES_BLOCK_SIZE, + AES_CFG_DIR_ENCRYPT, AES_CFG_MODE_CTR) ; + XMEMCPY(out, out_block+aes->left, odd) ; + aes->left = 0 ; + XMEMSET(tmp, 0x0, AES_BLOCK_SIZE) ; + } + in += odd ; + out+= odd ; + sz -= odd ; + } + odd = sz % AES_BLOCK_SIZE ; /* if there is tail flagment */ + if(sz / AES_BLOCK_SIZE) { + even = (sz/AES_BLOCK_SIZE)*AES_BLOCK_SIZE ; + AesProcess(aes, out, in, even, AES_CFG_DIR_ENCRYPT, AES_CFG_MODE_CTR); + out += even ; + in += even ; + } + if(odd) { + XMEMSET(tmp+aes->left, 0x0, AES_BLOCK_SIZE - aes->left) ; + XMEMCPY(tmp+aes->left, in, odd) ; + AesProcess(aes, (byte *)out_block, (byte const *)tmp, AES_BLOCK_SIZE, + AES_CFG_DIR_ENCRYPT, + AES_CFG_MODE_CTR_NOCTR /* Counter mode without counting IV */ + ); + XMEMCPY(out, out_block+aes->left,odd) ; + aes->left += odd ; + } +} +#endif + +/* AES-DIRECT */ +#if defined(WOLFSSL_AES_DIRECT) +WOLFSSL_API void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in) +{ + AesProcess(aes, out, in, AES_BLOCK_SIZE, AES_CFG_DIR_ENCRYPT, AES_CFG_MODE_CBC) ; +} +WOLFSSL_API void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in) +{ + AesProcess(aes, out, in, AES_BLOCK_SIZE, AES_CFG_DIR_DECRYPT, AES_CFG_MODE_CBC) ; +} +WOLFSSL_API int wc_AesSetKeyDirect(Aes* aes, const byte* key, word32 len, + const byte* iv, int dir) +{ + return(wc_AesSetKey(aes, key, len, iv, dir)) ; +} +#endif + + +#if defined(HAVE_AESGCM) || defined(HAVE_AESCCM) + +static int AesAuthSetKey(Aes* aes, const byte* key, word32 keySz) +{ + byte nonce[AES_BLOCK_SIZE]; + + if ((aes == NULL) || (key == NULL)) + return BAD_FUNC_ARG ; + if (!((keySz == 16) || (keySz == 24) || (keySz == 32))) + return BAD_FUNC_ARG ; + + XMEMSET(nonce, 0, sizeof(nonce)); + return wc_AesSetKey(aes, key, keySz, nonce, AES_ENCRYPTION); +} + + +static int AesAuthArgCheck(Aes* aes, byte* out, const byte* in, word32 inSz, + const byte* nonce, word32 nonceSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz, word32 *M, word32 *L) +{ + (void) authInSz ; + if((aes == NULL)||(nonce == NULL)||(authTag== NULL)||(authIn == NULL)) + return BAD_FUNC_ARG; + if((inSz != 0) && ((out == NULL)||(in == NULL))) + return BAD_FUNC_ARG; + + switch(authTagSz){ + case 4: + *M = AES_CFG_CCM_M_4; break ; + case 6: + *M = AES_CFG_CCM_M_6; break ; + case 8: + *M = AES_CFG_CCM_M_8; break ; + case 10: + *M = AES_CFG_CCM_M_10; break ; + case 12: + *M = AES_CFG_CCM_M_12; break ; + case 14: + *M = AES_CFG_CCM_M_14; break ; + case 16: + *M = AES_CFG_CCM_M_16; break ; + default: + return 1 ; + } + + switch(nonceSz){ + case 7: + *L = AES_CFG_CCM_L_8; break ; + case 8: + *L = AES_CFG_CCM_L_7; break ; + case 9: + *L = AES_CFG_CCM_L_6; break ; + case 10: + *L = AES_CFG_CCM_L_5; break ; + case 11: + *L = AES_CFG_CCM_L_4; break ; + case 12: + *L = AES_CFG_CCM_L_3; break ; + case 13: + *L = AES_CFG_CCM_L_2; break ; + case 14: + *L = AES_CFG_CCM_L_1; break ; + default: + return 1; + } + return 0 ; +} + +static void AesAuthSetIv(Aes *aes, const byte *nonce, word32 len, word32 L, int mode) { + + if(mode == AES_CFG_MODE_CCM){ + XMEMSET(aes->reg, 0, 16) ; + switch(L){ + case AES_CFG_CCM_L_8: + aes->reg[0] = 0x7; break ; + case AES_CFG_CCM_L_7: + aes->reg[0] = 0x6; break ; + case AES_CFG_CCM_L_6: + aes->reg[0] = 0x5; break ; + case AES_CFG_CCM_L_5: + aes->reg[0] = 0x4; break ; + case AES_CFG_CCM_L_4: + aes->reg[0] = 0x3; break ; + case AES_CFG_CCM_L_3: + aes->reg[0] = 0x2; break ; + case AES_CFG_CCM_L_2: + aes->reg[0] = 0x1; break ; + case AES_CFG_CCM_L_1: + aes->reg[0] = 0x0; break ; + } + XMEMCPY(((byte *)aes->reg)+1, nonce, len) ; + } else { + byte *b = (byte *)aes->reg ; + XMEMSET(aes->reg, 0, AES_BLOCK_SIZE); + XMEMCPY(aes->reg, nonce, len); + b[AES_BLOCK_SIZE-4] = 0 ; + b[AES_BLOCK_SIZE-3] = 0 ; + b[AES_BLOCK_SIZE-2] = 0 ; + b[AES_BLOCK_SIZE-1] = 1 ; + } +} + +#define RoundUp16(n) ((n+15)&0xfffffff0) +#define FREE_ALL \ + if(in_save) XFREE(in_save, NULL, DYNAMIC_TYPE_TMP_BUFFER);\ + if(out_save) XFREE(out_save, NULL, DYNAMIC_TYPE_TMP_BUFFER);\ + if(authIn_save)XFREE(authIn_save, NULL, DYNAMIC_TYPE_TMP_BUFFER);\ + if(nonce_save) XFREE(nonce_save, NULL, DYNAMIC_TYPE_TMP_BUFFER); + +static int AesAuthEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz, + const byte* nonce, word32 nonceSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz, int mode) +{ + word32 M, L ; + byte *in_a, *in_save ; + byte *out_a, *out_save ; + byte *authIn_a, *authIn_save ; + byte *nonce_a, *nonce_save ; + word32 tmpTag[4] ; + int ret ; + + if(AesAuthArgCheck(aes, out, in, inSz, nonce, nonceSz, authTag, authTagSz, authIn, authInSz, &M, &L) + == BAD_FUNC_ARG)return BAD_FUNC_ARG ; + + /* 16 byte padding */ + in_save = NULL ; out_save = NULL ; authIn_save = NULL ; nonce_save = NULL ; + if((inSz%16)==0){ + in_save = NULL ; in_a = (byte *)in ; + out_save = NULL ; out_a = out ; + } else { + if((in_save = XMALLOC(RoundUp16(inSz), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL){ + FREE_ALL; return MEMORY_E ; } + in_a = in_save ; XMEMSET(in_a, 0, RoundUp16(inSz)) ; XMEMCPY(in_a, in, inSz) ; + + if((out_save = XMALLOC(RoundUp16(inSz), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL){ + FREE_ALL; return MEMORY_E ; } + out_a = out_save ; + } + + if((authInSz%16)==0){ + authIn_save = NULL ; authIn_a = (byte *)authIn ; + } else { + if((authIn_save = XMALLOC(RoundUp16(authInSz), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL){ + FREE_ALL; return MEMORY_E ; } + authIn_a = authIn_save ; XMEMSET(authIn_a, 0, RoundUp16(authInSz)) ; XMEMCPY(authIn_a, authIn, authInSz) ; + } + + if((nonceSz%16)==0){ + nonce_save = NULL ; nonce_a = (byte *)nonce ; + } else { + if((nonce_save = XMALLOC(RoundUp16(nonceSz), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL){ + FREE_ALL; return MEMORY_E; } + nonce_a = nonce_save ; XMEMSET(nonce_a, 0, RoundUp16(nonceSz)) ; XMEMCPY(nonce_a, nonce, nonceSz) ; + } + + /* do aes-ccm */ + AesAuthSetIv(aes, nonce, nonceSz, L, mode) ; + ROM_AESReset(AES_BASE); + ROM_AESConfigSet(AES_BASE, (aes->keylen | AES_CFG_DIR_ENCRYPT | + AES_CFG_CTR_WIDTH_128 | + mode | ((mode== AES_CFG_MODE_CCM) ? (L | M) : 0 ))) ; + ROM_AESIVSet(AES_BASE, aes->reg); + ROM_AESKey1Set(AES_BASE, aes->key, aes->keylen); + ret = ROM_AESDataProcessAuth(AES_BASE, (unsigned int*)in_a, (unsigned int *)out_a, inSz, + (unsigned int*)authIn_a, authInSz, (unsigned int *)tmpTag); + if(ret == false){ + XMEMSET(out, 0, inSz) ; + XMEMSET(authTag, 0, authTagSz) ; + } else { + XMEMCPY(out, out_a, inSz) ; + XMEMCPY(authTag, tmpTag, authTagSz) ; + } + + FREE_ALL; + return 0 ; +} + +static int AesAuthDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz, + const byte* nonce, word32 nonceSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz, int mode) +{ + word32 M, L ; + byte *in_a, *in_save ; + byte *out_a, *out_save ; + byte *authIn_a, *authIn_save ; + byte *nonce_a, *nonce_save ; + word32 tmpTag[4] ; + bool ret ; + + if(AesAuthArgCheck(aes, out, in, inSz, nonce, nonceSz, authTag, authTagSz, authIn, authInSz, &M, &L) + == BAD_FUNC_ARG)return BAD_FUNC_ARG ; + + /* 16 byte padding */ + in_save = NULL ; out_save = NULL ; authIn_save = NULL ; nonce_save = NULL ; + if((inSz%16)==0){ + in_save = NULL ; in_a = (byte *)in ; + out_save = NULL ; out_a = out ; + } else { + if((in_save = XMALLOC(RoundUp16(inSz), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL){ + FREE_ALL; return MEMORY_E;} + in_a = in_save ; XMEMSET(in_a, 0, RoundUp16(inSz)) ; XMEMCPY(in_a, in, inSz) ; + + if((out_save = XMALLOC(RoundUp16(inSz), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL){ + FREE_ALL; return MEMORY_E;} + out_a = out_save ; + } + + if((authInSz%16)==0){ + authIn_save = NULL ; authIn_a = (byte *)authIn ; + } else { + if((authIn_save = XMALLOC(RoundUp16(authInSz), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL){ + FREE_ALL; return MEMORY_E; } + authIn_a = authIn_save ; XMEMSET(authIn_a, 0, RoundUp16(authInSz)) ; XMEMCPY(authIn_a, authIn, authInSz) ; + } + + if((nonceSz%16)==0){ + nonce_save = NULL ; nonce_a = (byte *)nonce ; + } else { + if((nonce_save = XMALLOC(RoundUp16(nonceSz), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL){ + FREE_ALL; return MEMORY_E; } + nonce_a = nonce_save ; XMEMSET(nonce_a, 0, RoundUp16(nonceSz)) ; XMEMCPY(nonce_a, nonce, nonceSz) ; + } + + /* do aes-ccm */ + AesAuthSetIv(aes, nonce, nonceSz, L, mode) ; + ROM_AESReset(AES_BASE); + ROM_AESConfigSet(AES_BASE, (aes->keylen | AES_CFG_DIR_DECRYPT | + AES_CFG_CTR_WIDTH_128 | + mode | ((mode== AES_CFG_MODE_CCM) ? (L | M) : 0 ))) ; + ROM_AESIVSet(AES_BASE, aes->reg); + ROM_AESKey1Set(AES_BASE, aes->key, aes->keylen); + ret = ROM_AESDataProcessAuth(AES_BASE, (unsigned int*)in_a, (unsigned int *)out_a, inSz, + (unsigned int*)authIn_a, authInSz, (unsigned int *)tmpTag); + if((ret == false) || (XMEMCMP(authTag, tmpTag, authTagSz) != 0)){ + XMEMSET(out, 0, inSz) ; + ret = false ; + } else { + XMEMCPY(out, out_a, inSz) ; + } + + FREE_ALL ; + return ret==true ? 0 : 1 ; +} +#endif + + +#ifdef HAVE_AESGCM +WOLFSSL_API int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) +{ + return AesAuthSetKey(aes, key, len) ; +} + +WOLFSSL_API int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + if (authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ) { + return BAD_FUNC_ARG; + } + return AesAuthEncrypt(aes, out, in, sz, iv, ivSz, authTag, authTagSz, + authIn, authInSz, AES_CFG_MODE_GCM_HY0CALC) ; +} +WOLFSSL_API int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + return AesAuthDecrypt(aes, out, in, sz, iv, ivSz, authTag, authTagSz, + authIn, authInSz, AES_CFG_MODE_GCM_HY0CALC) ; +} + +WOLFSSL_API int wc_GmacSetKey(Gmac* gmac, const byte* key, word32 len) +{ + return AesAuthSetKey(&gmac->aes, key, len) ; +} + +WOLFSSL_API int wc_GmacUpdate(Gmac* gmac, const byte* iv, word32 ivSz, + const byte* authIn, word32 authInSz, + byte* authTag, word32 authTagSz) +{ + return AesAuthEncrypt(&gmac->aes, NULL, NULL, 0, iv, ivSz, authTag, authTagSz, + authIn, authInSz, AES_CFG_MODE_GCM_HY0CALC) ; +} + +#endif /* HAVE_AESGCM */ + +#ifdef HAVE_AESCCM +WOLFSSL_API int wc_AesCcmSetKey(Aes* aes, const byte* key, word32 keySz) +{ + return AesAuthSetKey(aes, key, keySz) ; +} + +WOLFSSL_API int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz, + const byte* nonce, word32 nonceSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + return AesAuthEncrypt(aes, out, in, inSz, nonce, nonceSz, authTag, authTagSz, + authIn, authInSz, AES_CFG_MODE_CCM) ; +} + +WOLFSSL_API int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz, + const byte* nonce, word32 nonceSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + return AesAuthDecrypt(aes, out, in, inSz, nonce, nonceSz, authTag, authTagSz, + authIn, authInSz, AES_CFG_MODE_CCM) ; +} +#endif /* HAVE_AESCCM */ + +WOLFSSL_API int wc_AesInit(Aes* aes, void* heap, int devId) +{ + if (aes == NULL) + return BAD_FUNC_ARG; + + aes->heap = heap; + (void)devId; + + return 0; +} + +WOLFSSL_API void wc_AesFree(Aes* aes) +{ + (void)aes; +} + +#endif /* WOLFSSL_TI_CRYPT */ + +#endif /* NO_AES */ + + + diff --git a/client/wolfssl/wolfcrypt/src/port/ti/ti-ccm.c b/client/wolfssl/wolfcrypt/src/port/ti/ti-ccm.c new file mode 100644 index 0000000..5c0051e --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/ti/ti-ccm.c @@ -0,0 +1,94 @@ +/* port/ti/ti_ccm.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#if defined(WOLFSSL_TI_CRYPT) || defined(WOLFSSL_TI_HASH) + +#include "wolfssl/wolfcrypt/port/ti/ti-ccm.h" +#include +#include + +#ifndef TI_DUMMY_BUILD +#include "driverlib/sysctl.h" +#include "driverlib/rom_map.h" +#include "driverlib/rom.h" + +#ifndef SINGLE_THREADED +#include + static wolfSSL_Mutex TI_CCM_Mutex; +#endif +#endif /* TI_DUMMY_BUILD */ + +#define TIMEOUT 500000 +#define WAIT(stat) { volatile int i; for(i=0; i +#endif + +#include + +#ifndef NO_DES + +#if defined(WOLFSSL_TI_CRYPT) +#include +#include + +#include +#include +#include + +#include "inc/hw_des.h" +#include "inc/hw_memmap.h" +#include "inc/hw_ints.h" +#include "driverlib/des.h" +#include "driverlib/sysctl.h" +#include "driverlib/rom_map.h" +#include "driverlib/rom.h" + +static int DesSetIV(Des* des, const byte* iv, int tri) +{ + if (des == NULL) + return BAD_FUNC_ARG; + + if (iv) + XMEMCPY(des->reg, iv, tri == DES_CFG_TRIPLE ? DES3_IVLEN : DES_IVLEN); + else + XMEMSET(des->reg, 0, tri == DES_CFG_TRIPLE ? DES3_IVLEN : DES_IVLEN); + + return 0; +} + +static int DesSetKey(Des* des, const byte* key, const byte* iv,int dir, int tri) +{ + if(!wolfSSL_TI_CCMInit())return 1 ; + if ((des == NULL) || (key == NULL) || (iv == NULL)) + return BAD_FUNC_ARG; + if(!((dir == DES_ENCRYPTION) || (dir == DES_DECRYPTION))) + return BAD_FUNC_ARG; + + XMEMCPY(des->key, key, tri == DES_CFG_SINGLE ? DES_KEYLEN : DES3_KEYLEN) ; + return DesSetIV(des, iv, tri); +} + +static int DesCbcAlign16(Des* des, byte* out, const byte* in, word32 sz, word32 dir, word32 tri) +{ + + wolfSSL_TI_lockCCM() ; + ROM_DESReset(DES_BASE); + ROM_DESConfigSet(DES_BASE, (dir | DES_CFG_MODE_CBC | tri)); + ROM_DESIVSet(DES_BASE, (uint32_t*)des->reg); + ROM_DESKeySet(DES_BASE,(uint32_t*)des->key); + if(dir == DES_CFG_DIR_DECRYPT) + /* if input and output same will overwrite input iv */ + XMEMCPY(des->tmp, in + sz - DES_BLOCK_SIZE, DES_BLOCK_SIZE); + ROM_DESDataProcess(DES_BASE, (uint32_t *)in, (uint32_t *)out, sz); + wolfSSL_TI_unlockCCM() ; + + /* store iv for next call */ + if(dir == DES_CFG_DIR_ENCRYPT) + XMEMCPY(des->reg, out + sz - DES_BLOCK_SIZE, DES_BLOCK_SIZE); + else + XMEMCPY(des->reg, des->tmp, DES_BLOCK_SIZE); + + return 0 ; +} + +#define IS_ALIGN16(p) (((unsigned int)(p)&0xf) == 0) + +static int DesCbc(Des* des, byte* out, const byte* in, word32 sz, word32 dir, word32 tri) +{ + const byte * in_p ; byte * out_p ; + word32 size ; + #define TI_BUFFSIZE 1024 + byte buff[TI_BUFFSIZE] ; + if ((des == NULL) || (in == NULL) || (out == NULL)) + return BAD_FUNC_ARG; + if(sz % DES_BLOCK_SIZE) + return BAD_FUNC_ARG; + + while(sz > 0) { + size = sz ; in_p = in ; out_p = out ; + if(!IS_ALIGN16(in)){ + size = sz>TI_BUFFSIZE ? TI_BUFFSIZE : sz ; + XMEMCPY(buff, in, size) ; + in_p = (const byte *)buff ; + } + if(!IS_ALIGN16(out)){ + size = sz>TI_BUFFSIZE ? TI_BUFFSIZE : sz ; + out_p = (byte *)buff ; + } + + DesCbcAlign16(des, out_p, in_p, size, dir, tri) ; + + if(!IS_ALIGN16(out)){ + XMEMCPY(out, buff, size) ; + } + sz -= size ; in += size ; out += size ; + } + return 0 ; +} + +WOLFSSL_API int wc_Des_SetKey(Des* des, const byte* key, const byte* iv,int dir) +{ + return DesSetKey(des, key, iv, dir, DES_CFG_SINGLE) ; +} + +WOLFSSL_API void wc_Des_SetIV(Des* des, const byte* iv) +{ + DesSetIV(des, iv, DES_CFG_SINGLE) ; +} + +WOLFSSL_API int wc_Des3_SetKey(Des3* des, const byte* key, const byte* iv,int dir) +{ + return DesSetKey((Des *)des, key, iv, dir, DES_CFG_TRIPLE) ; +} + +WOLFSSL_API int wc_Des3_SetIV(Des3* des, const byte* iv) +{ + return DesSetIV((Des *)des, iv, DES_CFG_TRIPLE) ; +} + + +WOLFSSL_API int wc_Des_CbcEncrypt(Des* des, byte* out, const byte* in, word32 sz) +{ + return DesCbc(des, out, in, sz, DES_CFG_DIR_ENCRYPT, DES_CFG_SINGLE) ; +} + +WOLFSSL_API int wc_Des_CbcDecrypt(Des* des, byte* out, const byte* in, word32 sz) +{ + return DesCbc(des, out, in, sz, DES_CFG_DIR_DECRYPT, DES_CFG_SINGLE) ; +} + +WOLFSSL_API int wc_Des_CbcDecryptWithKey(byte* out, const byte* in, word32 sz, + const byte* key, const byte* iv) +{ + (void)out; (void)in; (void)sz; (void)key; (void)iv ; + return -1 ; +} + +WOLFSSL_API int wc_Des3_CbcEncrypt(Des3* des, byte* out, const byte* in, word32 sz) +{ + return DesCbc((Des *)des, out, in, sz, DES_CFG_DIR_ENCRYPT, DES_CFG_TRIPLE) ; +} + +WOLFSSL_API int wc_Des3_CbcDecrypt(Des3* des, byte* out, const byte* in, word32 sz) +{ + return DesCbc((Des *)des, out, in, sz, DES_CFG_DIR_DECRYPT, DES_CFG_TRIPLE) ; +} + +WOLFSSL_API int wc_Des3_CbcDecryptWithKey(byte* out, const byte* in, word32 sz, + const byte* key, const byte* iv) +{ + (void)out; (void)in; (void)sz; (void)key; (void)iv ; + return -1 ; + } + +WOLFSSL_API int wc_Des3Init(Des3* des, void* heap, int devId) +{ + if (des == NULL) + return BAD_FUNC_ARG; + + des->heap = heap; + (void)devId; + + return 0; +} + +WOLFSSL_API void wc_Des3Free(Des3* des) +{ + (void)des; +} + + +#endif /* WOLFSSL_TI_CRYPT */ + +#endif /* NO_DES */ diff --git a/client/wolfssl/wolfcrypt/src/port/ti/ti-hash.c b/client/wolfssl/wolfcrypt/src/port/ti/ti-hash.c new file mode 100644 index 0000000..ab8f2cc --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/ti/ti-hash.c @@ -0,0 +1,338 @@ +/* port/ti/ti-hash.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#include + +#if defined(WOLFSSL_TI_HASH) + +#ifdef __cplusplus + extern "C" { +#endif + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef TI_DUMMY_BUILD +#include "inc/hw_memmap.h" +#include "inc/hw_shamd5.h" +#include "inc/hw_ints.h" +#include "driverlib/shamd5.h" +#include "driverlib/sysctl.h" +#include "driverlib/rom_map.h" +#include "driverlib/rom.h" +#else +#define SHAMD5_ALGO_MD5 1 +#define SHAMD5_ALGO_SHA1 2 +#define SHAMD5_ALGO_SHA256 3 +#define SHAMD5_ALGO_SHA224 4 +#endif + +static int hashInit(wolfssl_TI_Hash *hash) { + if (!wolfSSL_TI_CCMInit())return 1; + hash->used = 0; + hash->msg = 0; + hash->len = 0; + return 0; +} + +static int hashUpdate(wolfssl_TI_Hash *hash, const byte* data, word32 len) +{ + void *p; + + if ((hash== NULL) || (data == NULL))return BAD_FUNC_ARG; + + if (hash->len < hash->used+len) { + if (hash->msg == NULL) { + p = XMALLOC(hash->used+len, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } else { + p = XREALLOC(hash->msg, hash->used+len, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } + if (p == 0)return 1; + hash->msg = p; + hash->len = hash->used+len; + } + XMEMCPY(hash->msg+hash->used, data, len); + hash->used += len; + return 0; +} + +static int hashGetHash(wolfssl_TI_Hash *hash, byte* result, word32 algo, word32 hsize) +{ + uint32_t h[16]; +#ifndef TI_DUMMY_BUILD + wolfSSL_TI_lockCCM(); + ROM_SHAMD5Reset(SHAMD5_BASE); + ROM_SHAMD5ConfigSet(SHAMD5_BASE, algo); + ROM_SHAMD5DataProcess(SHAMD5_BASE, + (uint32_t *)hash->msg, hash->used, h); + wolfSSL_TI_unlockCCM(); +#else + (void) hash; + (void) algo; + + XMEMSET(h, 0, sizeof(h)); +#endif + XMEMCPY(result, h, hsize); + + return 0; +} + +static int hashCopy(wolfssl_TI_Hash *src, wolfssl_TI_Hash *dst) { + XMEMCPY(dst, src, sizeof(wolfssl_TI_Hash)); + return 0; +} + +static int hashFinal(wolfssl_TI_Hash *hash, byte* result, word32 algo, word32 hsize) +{ + hashGetHash(hash, result, algo, hsize); + XFREE(hash->msg, NULL, DYNAMIC_TYPE_TMP_BUFFER); + hashInit(hash); + return 0; +} + +static int hashHash(const byte* data, word32 len, byte* hash, word32 algo, word32 hsize) +{ + int ret = 0; +#ifdef WOLFSSL_SMALL_STACK + wolfssl_TI_Hash* hash_desc; +#else + wolfssl_TI_Hash hash_desc[1]; +#endif + +#ifdef WOLFSSL_SMALL_STACK + hash_desc = (wolfssl_TI_Hash*)XMALLOC(sizeof(wolfssl_TI_Hash), NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (hash_desc == NULL) + return MEMORY_E; +#endif + + if ((ret = hashInit(hash_desc)) != 0) { + WOLFSSL_MSG("Hash Init failed"); + } + else { + hashUpdate(hash_desc, data, len); + hashFinal(hash_desc, hash, algo, hsize); + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(hash_desc, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return ret; +} + +static int hashFree(wolfssl_TI_Hash *hash) +{ + XFREE(hash->msg, NULL, DYNAMIC_TYPE_TMP_BUFFER); + hashInit(hash); + return 0; +} + +#if !defined(NO_MD5) +WOLFSSL_API int wc_InitMd5_ex(Md5* md5, void* heap, int devId) +{ + if (md5 == NULL) + return 1; + (void)heap; + (void)devId; + return hashInit((wolfssl_TI_Hash *)md5); +} +WOLFSSL_API int wc_InitMd5(Md5* md5) +{ + return wc_InitMd5_ex(md5, NULL, INVALID_DEVID); +} + +WOLFSSL_API int wc_Md5Update(Md5* md5, const byte* data, word32 len) +{ + return hashUpdate((wolfssl_TI_Hash *)md5, data, len); +} + +WOLFSSL_API int wc_Md5Final(Md5* md5, byte* hash) +{ + return hashFinal((wolfssl_TI_Hash *)md5, hash, SHAMD5_ALGO_MD5, MD5_DIGEST_SIZE); +} + +WOLFSSL_API int wc_Md5GetHash(Md5* md5, byte* hash) +{ + return hashGetHash((wolfssl_TI_Hash *)md5, hash, SHAMD5_ALGO_MD5, MD5_DIGEST_SIZE); +} + +WOLFSSL_API int wc_Md5Copy(Md5* src, Md5* dst) { + return hashCopy((wolfssl_TI_Hash *)src, (wolfssl_TI_Hash *)dst); +} + +WOLFSSL_API int wc_Md5Hash(const byte*data, word32 len, byte*hash) +{ + return hashHash(data, len, hash, SHAMD5_ALGO_MD5, MD5_DIGEST_SIZE); +} + +WOLFSSL_API void wc_Md5Free(Md5* md5) +{ + hashFree((wolfssl_TI_Hash *)md5); +} + +#endif /* !NO_MD5 */ + +#if !defined(NO_SHA) +WOLFSSL_API int wc_InitSha_ex(Md5* sha, void* heap, int devId) +{ + if (sha == NULL) + return 1; + (void)heap; + (void)devId; + return hashInit((wolfssl_TI_Hash *)sha); +} +WOLFSSL_API int wc_InitSha(Sha* sha) +{ + return wc_InitSha_ex(sha, NULL, INVALID_DEVID); +} + +WOLFSSL_API int wc_ShaUpdate(Sha* sha, const byte* data, word32 len) +{ + return hashUpdate((wolfssl_TI_Hash *)sha, data, len); +} + +WOLFSSL_API int wc_ShaFinal(Sha* sha, byte* hash) +{ + return hashFinal((wolfssl_TI_Hash *)sha, hash, SHAMD5_ALGO_SHA1, SHA_DIGEST_SIZE); +} + +WOLFSSL_API int wc_ShaGetHash(Sha* sha, byte* hash) +{ + return hashGetHash(sha, hash, SHAMD5_ALGO_SHA1, SHA_DIGEST_SIZE); +} + +WOLFSSL_API int wc_ShaCopy(Sha* src, Sha* dst) { + return hashCopy((wolfssl_TI_Hash *)src, (wolfssl_TI_Hash *)dst); +} + +WOLFSSL_API int wc_ShaHash(const byte*data, word32 len, byte*hash) +{ + return hashHash(data, len, hash, SHAMD5_ALGO_SHA1, SHA_DIGEST_SIZE); +} + +WOLFSSL_API void wc_ShaFree(Sha* sha) +{ + hashFree((wolfssl_TI_Hash *)sha); +} + +#endif /* !NO_SHA */ + +#if defined(WOLFSSL_SHA224) +WOLFSSL_API int wc_InitSha224_ex(Sha224* sha224, void* heap, int devId) +{ + if (sha224 == NULL) + return 1; + (void)heap; + (void)devId; + return hashInit((wolfssl_TI_Hash *)sha224); +} +WOLFSSL_API int wc_InitSha224(Sha224* sha224) +{ + return wc_InitSha224_ex(sha224, NULL, INVALID_DEVID); +} + +WOLFSSL_API int wc_Sha224Update(Sha224* sha224, const byte* data, word32 len) +{ + return hashUpdate((wolfssl_TI_Hash *)sha224, data, len); +} + +WOLFSSL_API int wc_Sha224Final(Sha224* sha224, byte* hash) +{ + return hashFinal((wolfssl_TI_Hash *)sha224, hash, SHAMD5_ALGO_SHA224, SHA224_DIGEST_SIZE); +} + +WOLFSSL_API int wc_Sha224GetHash(Sha224* sha224, byte* hash) +{ + return hashGetHash(sha224, hash, SHAMD5_ALGO_SHA224, SHA224_DIGEST_SIZE); +} + +WOLFSSL_API int wc_Sha224Hash(const byte* data, word32 len, byte*hash) +{ + return hashHash(data, len, hash, SHAMD5_ALGO_SHA224, SHA224_DIGEST_SIZE); +} + +WOLFSSL_API void wc_Sha224Free(Sha224* sha224) +{ + hashFree((wolfssl_TI_Hash *)sha224); +} + +#endif /* WOLFSSL_SHA224 */ + +#if !defined(NO_SHA256) +WOLFSSL_API int wc_InitSha256_ex(Sha256* sha256, void* heap, int devId) +{ + if (sha256 == NULL) + return 1; + (void)heap; + (void)devId; + return hashInit((wolfssl_TI_Hash *)sha256); +} + +WOLFSSL_API int wc_InitSha256(Sha256* sha256) +{ + return wc_InitSha256_ex(sha256, NULL, INVALID_DEVID); +} + +WOLFSSL_API int wc_Sha256Update(Sha256* sha256, const byte* data, word32 len) +{ + return hashUpdate((wolfssl_TI_Hash *)sha256, data, len); +} + +WOLFSSL_API int wc_Sha256Final(Sha256* sha256, byte* hash) +{ + return hashFinal((wolfssl_TI_Hash *)sha256, hash, SHAMD5_ALGO_SHA256, SHA256_DIGEST_SIZE); +} + +WOLFSSL_API int wc_Sha256GetHash(Sha256* sha256, byte* hash) +{ + return hashGetHash(sha256, hash, SHAMD5_ALGO_SHA256, SHA256_DIGEST_SIZE); +} + +WOLFSSL_API int wc_Sha256Hash(const byte* data, word32 len, byte*hash) +{ + return hashHash(data, len, hash, SHAMD5_ALGO_SHA256, SHA256_DIGEST_SIZE); +} + +WOLFSSL_API void wc_Sha256Free(Sha256* sha256) +{ + hashFree((wolfssl_TI_Hash *)sha256); +} + +#endif /* !NO_SHA256 */ + +#endif diff --git a/client/wolfssl/wolfcrypt/src/port/xilinx/xil-aesgcm.c b/client/wolfssl/wolfcrypt/src/port/xilinx/xil-aesgcm.c new file mode 100644 index 0000000..6af4b31 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/xilinx/xil-aesgcm.c @@ -0,0 +1,202 @@ +/* xil-aesgcm.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#if !defined(NO_AES) && defined(WOLFSSL_XILINX_CRYPT) + +#include + + +#ifdef HAVE_AESGCM +/* Make calls to Xilinx hardened AES-GCM crypto */ + +#include +#include + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#include "xparameters.h" + +enum { + AEAD_NONCE_SZ = 12, + AES_GCM_AUTH_SZ = 16, /* AES-GCM Auth Tag length */ +}; + + +int wc_AesGcmSetKey_ex(Aes* aes, const byte* key, word32 len, word32 kup) +{ + XCsuDma_Config* con; + + if (aes == NULL || key == NULL) { + return BAD_FUNC_ARG; + } + + if (len != 32) { + WOLFSSL_MSG("Expecting a 256 bit key"); + return BAD_FUNC_ARG; + } + + if ((con = XCsuDma_LookupConfig(0)) == NULL) { + WOLFSSL_MSG("Failed to look up config"); + return MEMORY_E; + } + + /* XST_SUCCESS comes from Xilinx header file */ + if (XCsuDma_CfgInitialize(&(aes->dma), con, con->BaseAddress) != + XST_SUCCESS) { + WOLFSSL_MSG("Failed to initialize hardware"); + return MEMORY_E; + } + + aes->keylen = len; + aes->kup = kup; + XMEMCPY((byte*)(aes->key_init), key, len); + + return 0; +} + + + +int wc_AesGcmEncrypt(Aes* aes, byte* out, + const byte* in, word32 sz, + const byte* iv, word32 ivSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + byte* tmp; + byte scratch[AES_BLOCK_SIZE]; + byte initalCounter[AES_BLOCK_SIZE]; + + if ((in == NULL && sz > 0) || iv == NULL || authTag == NULL || + authTagSz > AES_GCM_AUTH_SZ) { + return BAD_FUNC_ARG; + } + + if (ivSz != AEAD_NONCE_SZ) { + WOLFSSL_MSG("Expecting an IV size of 12"); + return BAD_FUNC_ARG; + } + + /* API expects that output is size of input + 16 byte tag. A temporary + * buffer is created to keep AES encrypt from writing over the end of + * out buffer. */ + if (in != NULL) { + if (aes->keylen != 32) { + WOLFSSL_MSG("Expecting 256 bit AES key"); + return BAD_FUNC_ARG; + } + + tmp = (byte*)XMALLOC(sz + AES_GCM_AUTH_SZ, aes->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (tmp == NULL) { + return MEMORY_E; + } + + XSecure_AesInitialize(&(aes->xilAes), &(aes->dma), aes->kup, (word32*)iv, + aes->key_init); + XSecure_AesEncryptData(&(aes->xilAes), tmp, in, sz); + XMEMCPY(out, tmp, sz); + XMEMCPY(authTag, tmp + sz, authTagSz); + XFREE(tmp, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); + } + + /* handle completing tag with any additional data */ + if (authIn != NULL) { + /* @TODO avoid hashing out again since Xilinx call already does */ + XMEMSET(initalCounter, 0, AES_BLOCK_SIZE); + XMEMCPY(initalCounter, iv, ivSz); + initalCounter[AES_BLOCK_SIZE - 1] = 1; + GHASH(aes, authIn, authInSz, out, sz, authTag, authTagSz); + wc_AesEncryptDirect(aes, scratch, initalCounter); + xorbuf(authTag, scratch, authTagSz); + } + + return 0; +} + + +int wc_AesGcmDecrypt(Aes* aes, byte* out, + const byte* in, word32 sz, + const byte* iv, word32 ivSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + byte* tag; + byte buf[AES_GCM_AUTH_SZ]; + byte scratch[AES_BLOCK_SIZE]; + byte initalCounter[AES_BLOCK_SIZE]; + + if (in == NULL || iv == NULL || authTag == NULL || + authTagSz < AES_GCM_AUTH_SZ) { + return BAD_FUNC_ARG; + } + + if (ivSz != AEAD_NONCE_SZ) { + WOLFSSL_MSG("Expecting an IV size of 12"); + return BAD_FUNC_ARG; + } + + /* account for additional data */ + if (authIn != NULL && authInSz > 0) { + XMEMSET(initalCounter, 0, AES_BLOCK_SIZE); + XMEMCPY(initalCounter, iv, ivSz); + initalCounter[AES_BLOCK_SIZE - 1] = 1; + tag = buf; + GHASH(aes, NULL, 0, in, sz, tag, AES_GCM_AUTH_SZ); + wc_AesEncryptDirect(aes, scratch, initalCounter); + xorbuf(tag, scratch, AES_GCM_AUTH_SZ); + } + else { + tag = authTag; + } + + /* calls to hardened crypto */ + XSecure_AesInitialize(&(aes->xilAes), &(aes->dma), aes->kup, + (word32*)iv, aes->key_init); + XSecure_AesDecryptData(&(aes->xilAes), out, in, sz, tag); + + /* account for additional data */ + if (authIn != NULL && authInSz > 0) { + GHASH(aes, authIn, authInSz, in, sz, tag, AES_GCM_AUTH_SZ); + wc_AesEncryptDirect(aes, scratch, initalCounter); + xorbuf(tag, scratch, AES_GCM_AUTH_SZ); + if (ConstantCompare(authTag, tag, authTagSz) != 0) { + return AES_GCM_AUTH_E; + } + } + + return 0; + +} +#endif /* HAVE_AESGCM */ + +#endif diff --git a/client/wolfssl/wolfcrypt/src/port/xilinx/xil-sha3.c b/client/wolfssl/wolfcrypt/src/port/xilinx/xil-sha3.c new file mode 100644 index 0000000..a9db6b9 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/port/xilinx/xil-sha3.c @@ -0,0 +1,158 @@ +/* xil-sha3.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + + +#if defined(WOLFSSL_SHA3) && defined(WOLFSSL_XILINX_CRYPT) + +#include +#include +#include + +#if !defined(WOLFSSL_NOSHA3_224) || !defined(WOLFSSL_NOSHA3_256) \ + || !defined(WOLFSSL_NOSHA3_512) + #error sizes of SHA3 other than 384 are not supported +#endif + +/* Initialize hardware for SHA3 operations + * + * sha SHA3 structure to initialize + * heap memory heap hint to use + * devId used for async operations (currently not supported here) + */ +int wc_InitSha3_384(wc_Sha3* sha, void* heap, int devId) +{ + XCsuDma_Config* con; + + (void)heap; + (void)devId; + + if (sha == NULL) { + return BAD_FUNC_ARG; + } + + if ((con = XCsuDma_LookupConfig(0)) == NULL) { + WOLFSSL_MSG("Unable to look up configure for SHA3"); + return BAD_STATE_E; + } + + /* XST_SUCCESS is success macro from Xilinx header */ + if (XCsuDma_CfgInitialize(&(sha->dma), con, con->BaseAddress) != + XST_SUCCESS) { + WOLFSSL_MSG("Unable to initialize CsuDma"); + return BAD_STATE_E; + } + + XSecure_Sha3Initialize(&(sha->hw), &(sha->dma)); + XSecure_Sha3Start(&(sha->hw)); + + return 0; +} + + +/* Update SHA3 state + * + * sha SHA3 structure to update + * data message to update SHA3 state with + * len length of data buffer + */ +int wc_Sha3_384_Update(wc_Sha3* sha, const byte* data, word32 len) +{ + if (sha == NULL || (data == NULL && len > 0)) { + return BAD_FUNC_ARG; + } + XSecure_Sha3Update(&(sha->hw), (byte*)data, len); + + return 0; +} + + +/* Finalize SHA3 state and get digest + * + * sha SHA3 structure to get hash + * out digest out, expected to be large enough to hold SHA3 digest + */ +int wc_Sha3_384_Final(wc_Sha3* sha, byte* out) +{ + if (sha == NULL || out == NULL) { + return BAD_FUNC_ARG; + } + XSecure_Sha3Finish(&(sha->hw), out); + + return wc_InitSha3_384(sha, NULL, INVALID_DEVID); +} + + +/* Free SHA3 structure + * + * sha SHA3 structure to free + */ +void wc_Sha3_384_Free(wc_Sha3* sha) +{ + (void)sha; + /* nothing to free yet */ +} + + +/* Get SHA3 digest without finalize SHA3 state + * + * sha SHA3 structure to get hash + * out digest out, expected to be large enough to hold SHA3 digest + */ +int wc_Sha3_384_GetHash(wc_Sha3* sha, byte* out) +{ + wc_Sha3 s; + + if (sha == NULL || out == NULL) { + return BAD_FUNC_ARG; + } + + if (wc_Sha3_384_Copy(sha, &s) != 0) { + WOLFSSL_MSG("Unable to copy SHA3 structure"); + return MEMORY_E; + } + + return wc_Sha3_384_Final(&s, out); +} + + +/* Get copy of SHA3 structure + * + * src SHA3 structure to make copy of + * dst [out]structure to hold copy + */ +int wc_Sha3_384_Copy(wc_Sha3* src, wc_Sha3* dst) +{ + if (src == NULL || dst== NULL) { + return BAD_FUNC_ARG; + } + + XMEMCPY((byte*)dst, (byte*)src, sizeof(wc_Sha3)); + return 0; +} + +#endif diff --git a/client/wolfssl/wolfcrypt/src/pwdbased.c b/client/wolfssl/wolfcrypt/src/pwdbased.c new file mode 100644 index 0000000..c672c22 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/pwdbased.c @@ -0,0 +1,795 @@ +/* pwdbased.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifndef NO_PWDBASED + +#include +#include +#include +#include +#include + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + + +#ifdef HAVE_PBKDF1 + +/* PKCS#5 v1.5 with non standard extension to optionally derive the extra data (IV) */ +int wc_PBKDF1_ex(byte* key, int keyLen, byte* iv, int ivLen, + const byte* passwd, int passwdLen, const byte* salt, int saltLen, + int iterations, int hashType, void* heap) +{ + int err; + int keyLeft, ivLeft, i; + int digestLeft, store; + int keyOutput = 0; + int diestLen; + byte digest[WC_MAX_DIGEST_SIZE]; +#ifdef WOLFSSL_SMALL_STACK + wc_HashAlg* hash = NULL; +#else + wc_HashAlg hash[1]; +#endif + enum wc_HashType hashT; + + (void)heap; + + if (key == NULL || keyLen < 0 || passwdLen < 0 || saltLen < 0 || ivLen < 0){ + return BAD_FUNC_ARG; + } + + if (iterations <= 0) + iterations = 1; + + hashT = wc_HashTypeConvert(hashType); + err = wc_HashGetDigestSize(hashT); + if (err < 0) + return err; + diestLen = err; + + /* initialize hash */ +#ifdef WOLFSSL_SMALL_STACK + hash = (wc_HashAlg*)XMALLOC(sizeof(wc_HashAlg), heap, + DYNAMIC_TYPE_HASHCTX); + if (hash == NULL) + return MEMORY_E; +#endif + + err = wc_HashInit_ex(hash, hashT, heap, INVALID_DEVID); + if (err != 0) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(hash, heap, DYNAMIC_TYPE_HASHCTX); + #endif + return err; + } + + keyLeft = keyLen; + ivLeft = ivLen; + while (keyOutput < (keyLen + ivLen)) { + digestLeft = diestLen; + /* D_(i - 1) */ + if (keyOutput) { /* first time D_0 is empty */ + err = wc_HashUpdate(hash, hashT, digest, diestLen); + if (err != 0) break; + } + + /* data */ + err = wc_HashUpdate(hash, hashT, passwd, passwdLen); + if (err != 0) break; + + /* salt */ + if (salt) { + err = wc_HashUpdate(hash, hashT, salt, saltLen); + if (err != 0) break; + } + + err = wc_HashFinal(hash, hashT, digest); + if (err != 0) break; + + /* count */ + for (i = 1; i < iterations; i++) { + err = wc_HashUpdate(hash, hashT, digest, diestLen); + if (err != 0) break; + + err = wc_HashFinal(hash, hashT, digest); + if (err != 0) break; + } + + if (keyLeft) { + store = min(keyLeft, diestLen); + XMEMCPY(&key[keyLen - keyLeft], digest, store); + + keyOutput += store; + keyLeft -= store; + digestLeft -= store; + } + + if (ivLeft && digestLeft) { + store = min(ivLeft, digestLeft); + if (iv != NULL) + XMEMCPY(&iv[ivLen - ivLeft], + &digest[diestLen - digestLeft], store); + keyOutput += store; + ivLeft -= store; + } + } + + wc_HashFree(hash, hashT); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(hash, heap, DYNAMIC_TYPE_HASHCTX); +#endif + + if (err != 0) + return err; + + if (keyOutput != (keyLen + ivLen)) + return BUFFER_E; + + return err; +} + +/* PKCS#5 v1.5 */ +int wc_PBKDF1(byte* output, const byte* passwd, int pLen, const byte* salt, + int sLen, int iterations, int kLen, int hashType) +{ + return wc_PBKDF1_ex(output, kLen, NULL, 0, + passwd, pLen, salt, sLen, iterations, hashType, NULL); +} + +#endif /* HAVE_PKCS5 */ + +#ifdef HAVE_PBKDF2 + +int wc_PBKDF2_ex(byte* output, const byte* passwd, int pLen, const byte* salt, + int sLen, int iterations, int kLen, int hashType, void* heap, int devId) +{ + word32 i = 1; + int hLen; + int j, ret; +#ifdef WOLFSSL_SMALL_STACK + byte* buffer; + Hmac* hmac; +#else + byte buffer[WC_MAX_DIGEST_SIZE]; + Hmac hmac[1]; +#endif + enum wc_HashType hashT; + + if (output == NULL || pLen < 0 || sLen < 0 || kLen < 0) { + return BAD_FUNC_ARG; + } + + if (iterations <= 0) + iterations = 1; + + hashT = wc_HashTypeConvert(hashType); + hLen = wc_HashGetDigestSize(hashT); + if (hLen < 0) + return BAD_FUNC_ARG; + +#ifdef WOLFSSL_SMALL_STACK + buffer = (byte*)XMALLOC(WC_MAX_DIGEST_SIZE, heap, DYNAMIC_TYPE_TMP_BUFFER); + if (buffer == NULL) + return MEMORY_E; + hmac = (Hmac*)XMALLOC(sizeof(Hmac), heap, DYNAMIC_TYPE_HMAC); + if (hmac == NULL) { + XFREE(buffer, heap, DYNAMIC_TYPE_TMP_BUFFER); + return MEMORY_E; + } +#endif + + ret = wc_HmacInit(hmac, heap, devId); + if (ret == 0) { + /* use int hashType here, since HMAC FIPS uses the old unique value */ + ret = wc_HmacSetKey(hmac, hashType, passwd, pLen); + + while (ret == 0 && kLen) { + int currentLen; + + ret = wc_HmacUpdate(hmac, salt, sLen); + if (ret != 0) + break; + + /* encode i */ + for (j = 0; j < 4; j++) { + byte b = (byte)(i >> ((3-j) * 8)); + + ret = wc_HmacUpdate(hmac, &b, 1); + if (ret != 0) + break; + } + + /* check ret from inside for loop */ + if (ret != 0) + break; + + ret = wc_HmacFinal(hmac, buffer); + if (ret != 0) + break; + + currentLen = min(kLen, hLen); + XMEMCPY(output, buffer, currentLen); + + for (j = 1; j < iterations; j++) { + ret = wc_HmacUpdate(hmac, buffer, hLen); + if (ret != 0) + break; + ret = wc_HmacFinal(hmac, buffer); + if (ret != 0) + break; + xorbuf(output, buffer, currentLen); + } + + /* check ret from inside for loop */ + if (ret != 0) + break; + + output += currentLen; + kLen -= currentLen; + i++; + } + wc_HmacFree(hmac); + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(buffer, heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(hmac, heap, DYNAMIC_TYPE_HMAC); +#endif + + return ret; +} + +int wc_PBKDF2(byte* output, const byte* passwd, int pLen, const byte* salt, + int sLen, int iterations, int kLen, int hashType) +{ + return wc_PBKDF2_ex(output, passwd, pLen, salt, sLen, iterations, kLen, + hashType, NULL, INVALID_DEVID); +} + +#endif /* HAVE_PBKDF2 */ + +#ifdef HAVE_PKCS12 + +/* helper for PKCS12_PBKDF(), does hash operation */ +static int DoPKCS12Hash(int hashType, byte* buffer, word32 totalLen, + byte* Ai, word32 u, int iterations) +{ + int i; + int ret = 0; +#ifdef WOLFSSL_SMALL_STACK + wc_HashAlg* hash = NULL; +#else + wc_HashAlg hash[1]; +#endif + enum wc_HashType hashT; + + if (buffer == NULL || Ai == NULL) { + return BAD_FUNC_ARG; + } + + hashT = wc_HashTypeConvert(hashType); + + /* initialize hash */ +#ifdef WOLFSSL_SMALL_STACK + hash = (wc_HashAlg*)XMALLOC(sizeof(wc_HashAlg), NULL, + DYNAMIC_TYPE_HASHCTX); + if (hash == NULL) + return MEMORY_E; +#endif + + ret = wc_HashInit(hash, hashT); + if (ret != 0) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(hash, NULL, DYNAMIC_TYPE_HASHCTX); + #endif + return ret; + } + + ret = wc_HashUpdate(hash, hashT, buffer, totalLen); + + if (ret == 0) + ret = wc_HashFinal(hash, hashT, Ai); + + for (i = 1; i < iterations; i++) { + if (ret == 0) + ret = wc_HashUpdate(hash, hashT, Ai, u); + if (ret == 0) + ret = wc_HashFinal(hash, hashT, Ai); + } + + wc_HashFree(hash, hashT); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(hash, NULL, DYNAMIC_TYPE_HASHCTX); +#endif + + return ret; +} + + +int wc_PKCS12_PBKDF(byte* output, const byte* passwd, int passLen, + const byte* salt, int saltLen, int iterations, int kLen, int hashType, + int id) +{ + return wc_PKCS12_PBKDF_ex(output, passwd, passLen, salt, saltLen, + iterations, kLen, hashType, id, NULL); +} + + +/* extended API that allows a heap hint to be used */ +int wc_PKCS12_PBKDF_ex(byte* output, const byte* passwd, int passLen, + const byte* salt, int saltLen, int iterations, int kLen, + int hashType, int id, void* heap) +{ + /* all in bytes instead of bits */ + word32 u, v, dLen, pLen, iLen, sLen, totalLen; + int dynamic = 0; + int ret = 0; + int i; + byte *D, *S, *P, *I; +#ifdef WOLFSSL_SMALL_STACK + byte staticBuffer[1]; /* force dynamic usage */ +#else + byte staticBuffer[1024]; +#endif + byte* buffer = staticBuffer; + +#ifdef WOLFSSL_SMALL_STACK + byte* Ai; + byte* B; +#else + byte Ai[WC_MAX_DIGEST_SIZE]; + byte B[WC_MAX_BLOCK_SIZE]; +#endif + enum wc_HashType hashT; + + (void)heap; + + if (output == NULL || passLen < 0 || saltLen < 0 || kLen < 0) { + return BAD_FUNC_ARG; + } + + if (iterations <= 0) + iterations = 1; + + hashT = wc_HashTypeConvert(hashType); + ret = wc_HashGetDigestSize(hashT); + if (ret < 0) + return ret; + u = ret; + + ret = wc_HashGetBlockSize(hashT); + if (ret < 0) + return ret; + v = ret; + +#ifdef WOLFSSL_SMALL_STACK + Ai = (byte*)XMALLOC(WC_MAX_DIGEST_SIZE, heap, DYNAMIC_TYPE_TMP_BUFFER); + if (Ai == NULL) + return MEMORY_E; + + B = (byte*)XMALLOC(WC_MAX_BLOCK_SIZE, heap, DYNAMIC_TYPE_TMP_BUFFER); + if (B == NULL) { + XFREE(Ai, heap, DYNAMIC_TYPE_TMP_BUFFER); + return MEMORY_E; + } +#endif + + XMEMSET(Ai, 0, WC_MAX_DIGEST_SIZE); + XMEMSET(B, 0, WC_MAX_BLOCK_SIZE); + + dLen = v; + sLen = v * ((saltLen + v - 1) / v); + if (passLen) + pLen = v * ((passLen + v - 1) / v); + else + pLen = 0; + iLen = sLen + pLen; + + totalLen = dLen + sLen + pLen; + + if (totalLen > sizeof(staticBuffer)) { + buffer = (byte*)XMALLOC(totalLen, heap, DYNAMIC_TYPE_KEY); + if (buffer == NULL) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(Ai, heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(B, heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return MEMORY_E; + } + dynamic = 1; + } + + D = buffer; + S = D + dLen; + P = S + sLen; + I = S; + + XMEMSET(D, id, dLen); + + for (i = 0; i < (int)sLen; i++) + S[i] = salt[i % saltLen]; + for (i = 0; i < (int)pLen; i++) + P[i] = passwd[i % passLen]; + + while (kLen > 0) { + word32 currentLen; + mp_int B1; + + ret = DoPKCS12Hash(hashType, buffer, totalLen, Ai, u, iterations); + if (ret < 0) + break; + + for (i = 0; i < (int)v; i++) + B[i] = Ai[i % u]; + + if (mp_init(&B1) != MP_OKAY) + ret = MP_INIT_E; + else if (mp_read_unsigned_bin(&B1, B, v) != MP_OKAY) + ret = MP_READ_E; + else if (mp_add_d(&B1, (mp_digit)1, &B1) != MP_OKAY) + ret = MP_ADD_E; + + if (ret != 0) { + mp_clear(&B1); + break; + } + + for (i = 0; i < (int)iLen; i += v) { + int outSz; + mp_int i1; + mp_int res; + + if (mp_init_multi(&i1, &res, NULL, NULL, NULL, NULL) != MP_OKAY) { + ret = MP_INIT_E; + break; + } + if (mp_read_unsigned_bin(&i1, I + i, v) != MP_OKAY) + ret = MP_READ_E; + else if (mp_add(&i1, &B1, &res) != MP_OKAY) + ret = MP_ADD_E; + else if ( (outSz = mp_unsigned_bin_size(&res)) < 0) + ret = MP_TO_E; + else { + if (outSz > (int)v) { + /* take off MSB */ + byte tmp[WC_MAX_BLOCK_SIZE + 1]; + ret = mp_to_unsigned_bin(&res, tmp); + XMEMCPY(I + i, tmp + 1, v); + } + else if (outSz < (int)v) { + XMEMSET(I + i, 0, v - outSz); + ret = mp_to_unsigned_bin(&res, I + i + v - outSz); + } + else + ret = mp_to_unsigned_bin(&res, I + i); + } + + mp_clear(&i1); + mp_clear(&res); + if (ret < 0) break; + } + + currentLen = min(kLen, (int)u); + XMEMCPY(output, Ai, currentLen); + output += currentLen; + kLen -= currentLen; + mp_clear(&B1); + } + + if (dynamic) XFREE(buffer, heap, DYNAMIC_TYPE_KEY); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(Ai, heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(B, heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return ret; +} + +#endif /* HAVE_PKCS12 */ + +#ifdef HAVE_SCRYPT +/* Rotate the 32-bit value a by b bits to the left. + * + * a 32-bit value. + * b Number of bits to rotate. + * returns rotated value. + */ +#define R(a, b) rotlFixed(a, b) + +/* One round of Salsa20/8. + * Code taken from RFC 7914: scrypt PBKDF. + * + * out Output buffer. + * in Input data to hash. + */ +static void scryptSalsa(word32* out, word32* in) +{ + int i; + word32 x[16]; + +#ifdef LITTLE_ENDIAN_ORDER + for (i = 0; i < 16; ++i) + x[i] = in[i]; +#else + for (i = 0; i < 16; i++) + x[i] = ByteReverseWord32(in[i]); +#endif + for (i = 8; i > 0; i -= 2) { + x[ 4] ^= R(x[ 0] + x[12], 7); x[ 8] ^= R(x[ 4] + x[ 0], 9); + x[12] ^= R(x[ 8] + x[ 4], 13); x[ 0] ^= R(x[12] + x[ 8], 18); + x[ 9] ^= R(x[ 5] + x[ 1], 7); x[13] ^= R(x[ 9] + x[ 5], 9); + x[ 1] ^= R(x[13] + x[ 9], 13); x[ 5] ^= R(x[ 1] + x[13], 18); + x[14] ^= R(x[10] + x[ 6], 7); x[ 2] ^= R(x[14] + x[10], 9); + x[ 6] ^= R(x[ 2] + x[14], 13); x[10] ^= R(x[ 6] + x[ 2], 18); + x[ 3] ^= R(x[15] + x[11], 7); x[ 7] ^= R(x[ 3] + x[15], 9); + x[11] ^= R(x[ 7] + x[ 3], 13); x[15] ^= R(x[11] + x[ 7], 18); + x[ 1] ^= R(x[ 0] + x[ 3], 7); x[ 2] ^= R(x[ 1] + x[ 0], 9); + x[ 3] ^= R(x[ 2] + x[ 1], 13); x[ 0] ^= R(x[ 3] + x[ 2], 18); + x[ 6] ^= R(x[ 5] + x[ 4], 7); x[ 7] ^= R(x[ 6] + x[ 5], 9); + x[ 4] ^= R(x[ 7] + x[ 6], 13); x[ 5] ^= R(x[ 4] + x[ 7], 18); + x[11] ^= R(x[10] + x[ 9], 7); x[ 8] ^= R(x[11] + x[10], 9); + x[ 9] ^= R(x[ 8] + x[11], 13); x[10] ^= R(x[ 9] + x[ 8], 18); + x[12] ^= R(x[15] + x[14], 7); x[13] ^= R(x[12] + x[15], 9); + x[14] ^= R(x[13] + x[12], 13); x[15] ^= R(x[14] + x[13], 18); + } +#ifdef LITTLE_ENDIAN_ORDER + for (i = 0; i < 16; ++i) + out[i] = in[i] + x[i]; +#else + for (i = 0; i < 16; i++) + out[i] = ByteReverseWord32(ByteReverseWord32(in[i]) + x[i]); +#endif +} + +/* Mix a block using Salsa20/8. + * Based on RFC 7914: scrypt PBKDF. + * + * b Blocks to mix. + * y Temporary storage. + * r Size of the block. + */ +static void scryptBlockMix(byte* b, byte* y, int r) +{ + byte x[64]; +#ifdef WORD64_AVAILABLE + word64* b64 = (word64*)b; + word64* y64 = (word64*)y; + word64* x64 = (word64*)x; +#else + word32* b32 = (word32*)b; + word32* y32 = (word32*)y; + word32* x32 = (word32*)x; +#endif + int i; + int j; + + /* Step 1. */ + XMEMCPY(x, b + (2 * r - 1) * 64, sizeof(x)); + /* Step 2. */ + for (i = 0; i < 2 * r; i++) + { +#ifdef WORD64_AVAILABLE + for (j = 0; j < 8; j++) + x64[j] ^= b64[i * 8 + j]; +#else + for (j = 0; j < 16; j++) + x32[j] ^= b32[i * 16 + j]; +#endif + scryptSalsa((word32*)x, (word32*)x); + XMEMCPY(y + i * 64, x, sizeof(x)); + } + /* Step 3. */ + for (i = 0; i < r; i++) { +#ifdef WORD64_AVAILABLE + for (j = 0; j < 8; j++) { + b64[i * 8 + j] = y64[2 * i * 8 + j]; + b64[(r + i) * 8 + j] = y64[(2 * i + 1) * 8 + j]; + } +#else + for (j = 0; j < 16; j++) { + b32[i * 16 + j] = y32[2 * i * 16 + j]; + b32[(r + i) * 16 + j] = y32[(2 * i + 1) * 16 + j]; + } +#endif + } +} + +/* Random oracles mix. + * Based on RFC 7914: scrypt PBKDF. + * + * x Data to mix. + * v Temporary buffer. + * y Temporary buffer for the block mix. + * r Block size parameter. + * n CPU/Memory cost parameter. + */ +static void scryptROMix(byte* x, byte* v, byte* y, int r, word32 n) +{ + word32 i; + word32 j; + word32 k; + word32 bSz = 128 * r; +#ifdef WORD64_AVAILABLE + word64* x64 = (word64*)x; + word64* v64 = (word64*)v; +#else + word32* x32 = (word32*)x; + word32* v32 = (word32*)v; +#endif + + /* Step 1. X = B (B not needed therefore not implemented) */ + /* Step 2. */ + for (i = 0; i < n; i++) + { + XMEMCPY(v + i * bSz, x, bSz); + scryptBlockMix(x, y, r); + } + + /* Step 3. */ + for (i = 0; i < n; i++) + { +#ifdef LITTLE_ENDIAN_ORDER +#ifdef WORD64_AVAILABLE + j = *(word64*)(x + (2*r - 1) * 64) & (n-1); +#else + j = *(word32*)(x + (2*r - 1) * 64) & (n-1); +#endif +#else + byte* t = x + (2*r - 1) * 64; + j = (t[0] | (t[1] << 8) | (t[2] << 16) | ((word32)t[3] << 24)) & (n-1); +#endif +#ifdef WORD64_AVAILABLE + for (k = 0; k < bSz / 8; k++) + x64[k] ^= v64[j * bSz / 8 + k]; +#else + for (k = 0; k < bSz / 4; k++) + x32[k] ^= v32[j * bSz / 4 + k]; +#endif + scryptBlockMix(x, y, r); + } + /* Step 4. B' = X (B = X = B' so not needed, therefore not implemented) */ +} + +/* Generates an key derived from a password and salt using a memory hard + * algorithm. + * Implements RFC 7914: scrypt PBKDF. + * + * output The derived key. + * passwd The password to derive key from. + * passLen The length of the password. + * salt The key specific data. + * saltLen The length of the salt data. + * cost The CPU/memory cost parameter. Range: 1..(128*r/8-1) + * (Iterations = 2^cost) + * blockSize The number of 128 byte octets in a working block. + * parallel The number of parallel mix operations to perform. + * (Note: this implementation does not use threads.) + * dkLen The length of the derived key in bytes. + * returns BAD_FUNC_ARG when: blockSize is too large for cost. + */ +int wc_scrypt(byte* output, const byte* passwd, int passLen, + const byte* salt, int saltLen, int cost, int blockSize, + int parallel, int dkLen) +{ + int ret = 0; + int i; + byte* v = NULL; + byte* y = NULL; + byte* blocks = NULL; + word32 blocksSz; + word32 bSz; + + if (blockSize > 8) + return BAD_FUNC_ARG; + + if (cost < 1 || cost >= 128 * blockSize / 8 || parallel < 1 || dkLen < 1) + return BAD_FUNC_ARG; + + bSz = 128 * blockSize; + blocksSz = bSz * parallel; + blocks = (byte*)XMALLOC(blocksSz, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (blocks == NULL) + goto end; + /* Temporary for scryptROMix. */ + v = (byte*)XMALLOC((1 << cost) * bSz, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (v == NULL) + goto end; + /* Temporary for scryptBlockMix. */ + y = (byte*)XMALLOC(blockSize * 128, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (y == NULL) + goto end; + + /* Step 1. */ + ret = wc_PBKDF2(blocks, passwd, passLen, salt, saltLen, 1, blocksSz, + WC_SHA256); + if (ret != 0) + goto end; + + /* Step 2. */ + for (i = 0; i < parallel; i++) + scryptROMix(blocks + i * bSz, v, y, blockSize, 1 << cost); + + /* Step 3. */ + ret = wc_PBKDF2(output, passwd, passLen, blocks, blocksSz, 1, dkLen, + WC_SHA256); +end: + if (blocks != NULL) + XFREE(blocks, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (v != NULL) + XFREE(v, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (y != NULL) + XFREE(y, NULL, DYNAMIC_TYPE_TMP_BUFFER); + + return ret; +} + +/* Generates an key derived from a password and salt using a memory hard + * algorithm. + * Implements RFC 7914: scrypt PBKDF. + * + * output Derived key. + * passwd Password to derive key from. + * passLen Length of the password. + * salt Key specific data. + * saltLen Length of the salt data. + * iterations Number of iterations to perform. Range: 1 << (1..(128*r/8-1)) + * blockSize Number of 128 byte octets in a working block. + * parallel Number of parallel mix operations to perform. + * (Note: this implementation does not use threads.) + * dkLen Length of the derived key in bytes. + * returns BAD_FUNC_ARG when: iterations is not a power of 2 or blockSize is too + * large for iterations. + */ +int wc_scrypt_ex(byte* output, const byte* passwd, int passLen, + const byte* salt, int saltLen, word32 iterations, + int blockSize, int parallel, int dkLen) +{ + int cost; + + /* Iterations must be a power of 2. */ + if ((iterations & (iterations - 1)) != 0) + return BAD_FUNC_ARG; + + for (cost = -1; iterations != 0; cost++) { + iterations >>= 1; + } + + return wc_scrypt(output, passwd, passLen, salt, saltLen, cost, blockSize, + parallel, dkLen); +} +#endif /* HAVE_SCRYPT */ + +#endif /* NO_PWDBASED */ diff --git a/client/wolfssl/wolfcrypt/src/rabbit.c b/client/wolfssl/wolfcrypt/src/rabbit.c new file mode 100644 index 0000000..820fd0a --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/rabbit.c @@ -0,0 +1,342 @@ +/* rabbit.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifndef NO_RABBIT + +#include +#include +#include +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + + +#ifdef BIG_ENDIAN_ORDER + #define LITTLE32(x) ByteReverseWord32(x) +#else + #define LITTLE32(x) (x) +#endif + +#define U32V(x) ((word32)(x) & 0xFFFFFFFFU) + + +/* Square a 32-bit unsigned integer to obtain the 64-bit result and return */ +/* the upper 32 bits XOR the lower 32 bits */ +static word32 RABBIT_g_func(word32 x) +{ + /* Temporary variables */ + word32 a, b, h, l; + + /* Construct high and low argument for squaring */ + a = x&0xFFFF; + b = x>>16; + + /* Calculate high and low result of squaring */ + h = (((U32V(a*a)>>17) + U32V(a*b))>>15) + b*b; + l = x*x; + + /* Return high XOR low */ + return U32V(h^l); +} + + +/* Calculate the next internal state */ +static void RABBIT_next_state(RabbitCtx* ctx) +{ + /* Temporary variables */ + word32 g[8], c_old[8], i; + + /* Save old counter values */ + for (i=0; i<8; i++) + c_old[i] = ctx->c[i]; + + /* Calculate new counter values */ + ctx->c[0] = U32V(ctx->c[0] + 0x4D34D34D + ctx->carry); + ctx->c[1] = U32V(ctx->c[1] + 0xD34D34D3 + (ctx->c[0] < c_old[0])); + ctx->c[2] = U32V(ctx->c[2] + 0x34D34D34 + (ctx->c[1] < c_old[1])); + ctx->c[3] = U32V(ctx->c[3] + 0x4D34D34D + (ctx->c[2] < c_old[2])); + ctx->c[4] = U32V(ctx->c[4] + 0xD34D34D3 + (ctx->c[3] < c_old[3])); + ctx->c[5] = U32V(ctx->c[5] + 0x34D34D34 + (ctx->c[4] < c_old[4])); + ctx->c[6] = U32V(ctx->c[6] + 0x4D34D34D + (ctx->c[5] < c_old[5])); + ctx->c[7] = U32V(ctx->c[7] + 0xD34D34D3 + (ctx->c[6] < c_old[6])); + ctx->carry = (ctx->c[7] < c_old[7]); + + /* Calculate the g-values */ + for (i=0;i<8;i++) + g[i] = RABBIT_g_func(U32V(ctx->x[i] + ctx->c[i])); + + /* Calculate new state values */ + ctx->x[0] = U32V(g[0] + rotlFixed(g[7],16) + rotlFixed(g[6], 16)); + ctx->x[1] = U32V(g[1] + rotlFixed(g[0], 8) + g[7]); + ctx->x[2] = U32V(g[2] + rotlFixed(g[1],16) + rotlFixed(g[0], 16)); + ctx->x[3] = U32V(g[3] + rotlFixed(g[2], 8) + g[1]); + ctx->x[4] = U32V(g[4] + rotlFixed(g[3],16) + rotlFixed(g[2], 16)); + ctx->x[5] = U32V(g[5] + rotlFixed(g[4], 8) + g[3]); + ctx->x[6] = U32V(g[6] + rotlFixed(g[5],16) + rotlFixed(g[4], 16)); + ctx->x[7] = U32V(g[7] + rotlFixed(g[6], 8) + g[5]); +} + + +/* IV setup */ +static void wc_RabbitSetIV(Rabbit* ctx, const byte* inIv) +{ + /* Temporary variables */ + word32 i0, i1, i2, i3, i; + word32 iv[2]; + + if (inIv) + XMEMCPY(iv, inIv, sizeof(iv)); + else + XMEMSET(iv, 0, sizeof(iv)); + + /* Generate four subvectors */ + i0 = LITTLE32(iv[0]); + i2 = LITTLE32(iv[1]); + i1 = (i0>>16) | (i2&0xFFFF0000); + i3 = (i2<<16) | (i0&0x0000FFFF); + + /* Modify counter values */ + ctx->workCtx.c[0] = ctx->masterCtx.c[0] ^ i0; + ctx->workCtx.c[1] = ctx->masterCtx.c[1] ^ i1; + ctx->workCtx.c[2] = ctx->masterCtx.c[2] ^ i2; + ctx->workCtx.c[3] = ctx->masterCtx.c[3] ^ i3; + ctx->workCtx.c[4] = ctx->masterCtx.c[4] ^ i0; + ctx->workCtx.c[5] = ctx->masterCtx.c[5] ^ i1; + ctx->workCtx.c[6] = ctx->masterCtx.c[6] ^ i2; + ctx->workCtx.c[7] = ctx->masterCtx.c[7] ^ i3; + + /* Copy state variables */ + for (i=0; i<8; i++) + ctx->workCtx.x[i] = ctx->masterCtx.x[i]; + ctx->workCtx.carry = ctx->masterCtx.carry; + + /* Iterate the system four times */ + for (i=0; i<4; i++) + RABBIT_next_state(&(ctx->workCtx)); +} + + +/* Key setup */ +static WC_INLINE int DoKey(Rabbit* ctx, const byte* key, const byte* iv) +{ + /* Temporary variables */ + word32 k0, k1, k2, k3, i; + + /* Generate four subkeys */ + k0 = LITTLE32(*(word32*)(key+ 0)); + k1 = LITTLE32(*(word32*)(key+ 4)); + k2 = LITTLE32(*(word32*)(key+ 8)); + k3 = LITTLE32(*(word32*)(key+12)); + + /* Generate initial state variables */ + ctx->masterCtx.x[0] = k0; + ctx->masterCtx.x[2] = k1; + ctx->masterCtx.x[4] = k2; + ctx->masterCtx.x[6] = k3; + ctx->masterCtx.x[1] = U32V(k3<<16) | (k2>>16); + ctx->masterCtx.x[3] = U32V(k0<<16) | (k3>>16); + ctx->masterCtx.x[5] = U32V(k1<<16) | (k0>>16); + ctx->masterCtx.x[7] = U32V(k2<<16) | (k1>>16); + + /* Generate initial counter values */ + ctx->masterCtx.c[0] = rotlFixed(k2, 16); + ctx->masterCtx.c[2] = rotlFixed(k3, 16); + ctx->masterCtx.c[4] = rotlFixed(k0, 16); + ctx->masterCtx.c[6] = rotlFixed(k1, 16); + ctx->masterCtx.c[1] = (k0&0xFFFF0000) | (k1&0xFFFF); + ctx->masterCtx.c[3] = (k1&0xFFFF0000) | (k2&0xFFFF); + ctx->masterCtx.c[5] = (k2&0xFFFF0000) | (k3&0xFFFF); + ctx->masterCtx.c[7] = (k3&0xFFFF0000) | (k0&0xFFFF); + + /* Clear carry bit */ + ctx->masterCtx.carry = 0; + + /* Iterate the system four times */ + for (i=0; i<4; i++) + RABBIT_next_state(&(ctx->masterCtx)); + + /* Modify the counters */ + for (i=0; i<8; i++) + ctx->masterCtx.c[i] ^= ctx->masterCtx.x[(i+4)&0x7]; + + /* Copy master instance to work instance */ + for (i=0; i<8; i++) { + ctx->workCtx.x[i] = ctx->masterCtx.x[i]; + ctx->workCtx.c[i] = ctx->masterCtx.c[i]; + } + ctx->workCtx.carry = ctx->masterCtx.carry; + + wc_RabbitSetIV(ctx, iv); + + return 0; +} + + +int wc_Rabbit_SetHeap(Rabbit* ctx, void* heap) +{ + if (ctx == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef XSTREAM_ALIGN + ctx->heap = heap; +#endif + + (void)heap; + return 0; +} + + +/* Key setup */ +int wc_RabbitSetKey(Rabbit* ctx, const byte* key, const byte* iv) +{ + if (ctx == NULL || key == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef XSTREAM_ALIGN + /* default heap to NULL or heap test value */ + #ifdef WOLFSSL_HEAP_TEST + ctx->heap = (void*)WOLFSSL_HEAP_TEST; + #else + ctx->heap = NULL; + #endif /* WOLFSSL_HEAP_TEST */ + + if ((wolfssl_word)key % 4) { + int alignKey[4]; + + /* iv aligned in SetIV */ + WOLFSSL_MSG("wc_RabbitSetKey unaligned key"); + + XMEMCPY(alignKey, key, sizeof(alignKey)); + + return DoKey(ctx, (const byte*)alignKey, iv); + } +#endif /* XSTREAM_ALIGN */ + + return DoKey(ctx, key, iv); +} + + +/* Encrypt/decrypt a message of any size */ +static WC_INLINE int DoProcess(Rabbit* ctx, byte* output, const byte* input, + word32 msglen) +{ + /* Encrypt/decrypt all full blocks */ + while (msglen >= 16) { + /* Iterate the system */ + RABBIT_next_state(&(ctx->workCtx)); + + /* Encrypt/decrypt 16 bytes of data */ + *(word32*)(output+ 0) = *(word32*)(input+ 0) ^ + LITTLE32(ctx->workCtx.x[0] ^ (ctx->workCtx.x[5]>>16) ^ + U32V(ctx->workCtx.x[3]<<16)); + *(word32*)(output+ 4) = *(word32*)(input+ 4) ^ + LITTLE32(ctx->workCtx.x[2] ^ (ctx->workCtx.x[7]>>16) ^ + U32V(ctx->workCtx.x[5]<<16)); + *(word32*)(output+ 8) = *(word32*)(input+ 8) ^ + LITTLE32(ctx->workCtx.x[4] ^ (ctx->workCtx.x[1]>>16) ^ + U32V(ctx->workCtx.x[7]<<16)); + *(word32*)(output+12) = *(word32*)(input+12) ^ + LITTLE32(ctx->workCtx.x[6] ^ (ctx->workCtx.x[3]>>16) ^ + U32V(ctx->workCtx.x[1]<<16)); + + /* Increment pointers and decrement length */ + input += 16; + output += 16; + msglen -= 16; + } + + /* Encrypt/decrypt remaining data */ + if (msglen) { + + word32 i; + word32 tmp[4]; + byte* buffer = (byte*)tmp; + + XMEMSET(tmp, 0, sizeof(tmp)); /* help static analysis */ + + /* Iterate the system */ + RABBIT_next_state(&(ctx->workCtx)); + + /* Generate 16 bytes of pseudo-random data */ + tmp[0] = LITTLE32(ctx->workCtx.x[0] ^ + (ctx->workCtx.x[5]>>16) ^ U32V(ctx->workCtx.x[3]<<16)); + tmp[1] = LITTLE32(ctx->workCtx.x[2] ^ + (ctx->workCtx.x[7]>>16) ^ U32V(ctx->workCtx.x[5]<<16)); + tmp[2] = LITTLE32(ctx->workCtx.x[4] ^ + (ctx->workCtx.x[1]>>16) ^ U32V(ctx->workCtx.x[7]<<16)); + tmp[3] = LITTLE32(ctx->workCtx.x[6] ^ + (ctx->workCtx.x[3]>>16) ^ U32V(ctx->workCtx.x[1]<<16)); + + /* Encrypt/decrypt the data */ + for (i=0; iheap, DYNAMIC_TYPE_TMP_BUFFER); + if (tmp == NULL) return MEMORY_E; + + XMEMCPY(tmp, input, msglen); + DoProcess(ctx, tmp, tmp, msglen); + XMEMCPY(output, tmp, msglen); + + XFREE(tmp, ctx->heap, DYNAMIC_TYPE_TMP_BUFFER); + + return 0; + #else + return BAD_ALIGN_E; + #endif + } +#endif /* XSTREAM_ALIGN */ + + return DoProcess(ctx, output, input, msglen); +} + + +#endif /* NO_RABBIT */ diff --git a/client/wolfssl/wolfcrypt/src/random.c b/client/wolfssl/wolfcrypt/src/random.c new file mode 100644 index 0000000..53041d1 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/random.c @@ -0,0 +1,2552 @@ +/* random.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include +#include + +/* on HPUX 11 you may need to install /dev/random see + http://h20293.www2.hp.com/portal/swdepot/displayProductInfo.do?productNumber=KRNG11I + +*/ + +#if defined(HAVE_FIPS) && \ + defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2) + + /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */ + #define FIPS_NO_WRAPPERS + + #ifdef USE_WINDOWS_API + #pragma code_seg(".fipsA$c") + #pragma const_seg(".fipsB$c") + #endif +#endif + + +#include +#include + + +/* If building for old FIPS. */ +#if defined(HAVE_FIPS) && \ + (!defined(HAVE_FIPS_VERSION) || (HAVE_FIPS_VERSION < 2)) + +int wc_GenerateSeed(OS_Seed* os, byte* seed, word32 sz) +{ + return GenerateSeed(os, seed, sz); +} + +int wc_InitRng_ex(WC_RNG* rng, void* heap, int devId) +{ + (void)heap; + (void)devId; + return InitRng_fips(rng); +} + +int wc_InitRng(WC_RNG* rng) +{ + return InitRng_fips(rng); +} + + +int wc_RNG_GenerateBlock(WC_RNG* rng, byte* b, word32 sz) +{ + return RNG_GenerateBlock_fips(rng, b, sz); +} + + +int wc_RNG_GenerateByte(WC_RNG* rng, byte* b) +{ + return RNG_GenerateByte(rng, b); +} + +#ifdef HAVE_HASHDRBG + + int wc_FreeRng(WC_RNG* rng) + { + return FreeRng_fips(rng); + } + + int wc_RNG_HealthTest(int reseed, const byte* seedA, word32 seedASz, + const byte* seedB, word32 seedBSz, + byte* output, word32 outputSz) + { + return RNG_HealthTest_fips(reseed, seedA, seedASz, + seedB, seedBSz, output, outputSz); + } +#endif /* HAVE_HASHDRBG */ + +#else /* else build without fips, or for new fips */ + +#ifndef WC_NO_RNG /* if not FIPS and RNG is disabled then do not compile */ + +#include + +#ifdef WOLF_CRYPTO_CB + #include +#endif + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#if defined(WOLFSSL_SGX) + #include +#elif defined(USE_WINDOWS_API) + #ifndef _WIN32_WINNT + #define _WIN32_WINNT 0x0400 + #endif + #include + #include +#elif defined(HAVE_WNR) + #include + #include + wolfSSL_Mutex wnr_mutex; /* global netRandom mutex */ + int wnr_timeout = 0; /* entropy timeout, mililseconds */ + int wnr_mutex_init = 0; /* flag for mutex init */ + wnr_context* wnr_ctx; /* global netRandom context */ +#elif defined(FREESCALE_KSDK_2_0_TRNG) + #include "fsl_trng.h" +#elif defined(FREESCALE_KSDK_2_0_RNGA) + #include "fsl_rnga.h" +#elif defined(WOLFSSL_WICED) + #include "wiced_crypto.h" +#elif defined(WOLFSSL_NETBURNER) + #include + #include + #include +#elif defined(NO_DEV_RANDOM) +#elif defined(CUSTOM_RAND_GENERATE) +#elif defined(CUSTOM_RAND_GENERATE_BLOCK) +#elif defined(CUSTOM_RAND_GENERATE_SEED) +#elif defined(WOLFSSL_GENSEED_FORTEST) +#elif defined(WOLFSSL_MDK_ARM) +#elif defined(WOLFSSL_IAR_ARM) +#elif defined(WOLFSSL_ROWLEY_ARM) +#elif defined(WOLFSSL_EMBOS) +#elif defined(WOLFSSL_DEOS) +#elif defined(MICRIUM) +#elif defined(WOLFSSL_NUCLEUS) +#elif defined(WOLFSSL_PB) +#elif defined(WOLFSSL_ZEPHYR) +#elif defined(WOLFSSL_TELIT_M2MB) +#elif defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_TRNG) +#else + /* include headers that may be needed to get good seed */ + #include + #ifndef EBSNET + #include + #endif +#endif + + +#if defined(HAVE_INTEL_RDRAND) || defined(HAVE_INTEL_RDSEED) + static word32 intel_flags = 0; + static void wc_InitRng_IntelRD(void) + { + intel_flags = cpuid_get_flags(); + } + #ifdef HAVE_INTEL_RDSEED + static int wc_GenerateSeed_IntelRD(OS_Seed* os, byte* output, word32 sz); + #endif + #ifdef HAVE_INTEL_RDRAND + static int wc_GenerateRand_IntelRD(OS_Seed* os, byte* output, word32 sz); + #endif + +#ifdef USE_WINDOWS_API + #include +#endif /* USE_WINDOWS_API */ +#endif + +/* Start NIST DRBG code */ +#ifdef HAVE_HASHDRBG + +#define OUTPUT_BLOCK_LEN (WC_SHA256_DIGEST_SIZE) +#define MAX_REQUEST_LEN (0x10000) +#define RESEED_INTERVAL WC_RESEED_INTERVAL + + +/* For FIPS builds, the user should not be adjusting the values. */ +#if defined(HAVE_FIPS) && \ + defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2) + #if defined(RNG_SECURITY_STRENGTH) \ + || defined(ENTROPY_SCALE_FACTOR) \ + || defined(SEED_BLOCK_SZ) + + #error "Do not change the RNG parameters for FIPS builds." + #endif +#endif + + +/* The security strength for the RNG is the target number of bits of + * entropy you are looking for in a seed. */ +#ifndef RNG_SECURITY_STRENGTH + #if defined(HAVE_FIPS) && \ + defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2) + /* SHA-256 requires a minimum of 256-bits of entropy. The goal + * of 1024 will provide 4 times that. */ + #define RNG_SECURITY_STRENGTH (1024) + #else + /* If not using FIPS or using old FIPS, set the number down a bit. + * More is better, but more is also slower. */ + #define RNG_SECURITY_STRENGTH (256) + #endif +#endif + +#ifndef ENTROPY_SCALE_FACTOR + /* The entropy scale factor should be the whole number inverse of the + * minimum bits of entropy per bit of NDRNG output. */ + #if defined(HAVE_INTEL_RDSEED) || defined(HAVE_INTEL_RDRAND) + /* The value of 2 applies to Intel's RDSEED which provides about + * 0.5 bits minimum of entropy per bit. */ + #define ENTROPY_SCALE_FACTOR 2 + #else + /* Setting the default to 1. */ + #define ENTROPY_SCALE_FACTOR 1 + #endif +#endif + +#ifndef SEED_BLOCK_SZ + /* The seed block size, is the size of the output of the underlying NDRNG. + * This value is used for testing the output of the NDRNG. */ + #if defined(HAVE_INTEL_RDSEED) || defined(HAVE_INTEL_RDRAND) + /* RDSEED outputs in blocks of 64-bits. */ + #define SEED_BLOCK_SZ sizeof(word64) + #else + /* Setting the default to 4. */ + #define SEED_BLOCK_SZ 4 + #endif +#endif + +#define SEED_SZ (RNG_SECURITY_STRENGTH*ENTROPY_SCALE_FACTOR/8) + +/* The maximum seed size will be the seed size plus a seed block for the + * test, and an additional half of the seed size. This additional half + * is in case the user does not supply a nonce. A nonce will be obtained + * from the NDRNG. */ +#define MAX_SEED_SZ (SEED_SZ + SEED_SZ/2 + SEED_BLOCK_SZ) + + +/* Internal return codes */ +#define DRBG_SUCCESS 0 +#define DRBG_FAILURE 1 +#define DRBG_NEED_RESEED 2 +#define DRBG_CONT_FAILURE 3 + +/* RNG health states */ +#define DRBG_NOT_INIT 0 +#define DRBG_OK 1 +#define DRBG_FAILED 2 +#define DRBG_CONT_FAILED 3 + +#define RNG_HEALTH_TEST_CHECK_SIZE (WC_SHA256_DIGEST_SIZE * 4) + +/* Verify max gen block len */ +#if RNG_MAX_BLOCK_LEN > MAX_REQUEST_LEN + #error RNG_MAX_BLOCK_LEN is larger than NIST DBRG max request length +#endif + +enum { + drbgInitC = 0, + drbgReseed = 1, + drbgGenerateW = 2, + drbgGenerateH = 3, + drbgInitV +}; + +/* NOTE: if DRBG struct is changed please update random.h drbg_data size */ +typedef struct DRBG { + word32 reseedCtr; + word32 lastBlock; + byte V[DRBG_SEED_LEN]; + byte C[DRBG_SEED_LEN]; +#if defined(WOLFSSL_ASYNC_CRYPT) || defined(WOLF_CRYPTO_CB) + void* heap; + int devId; +#endif + byte matchCount; +#ifdef WOLFSSL_SMALL_STACK_CACHE + wc_Sha256 sha256; +#endif +} DRBG; + + +static int wc_RNG_HealthTestLocal(int reseed); + +/* Hash Derivation Function */ +/* Returns: DRBG_SUCCESS or DRBG_FAILURE */ +static int Hash_df(DRBG* drbg, byte* out, word32 outSz, byte type, + const byte* inA, word32 inASz, + const byte* inB, word32 inBSz) +{ + int ret = DRBG_FAILURE; + byte ctr; + int i; + int len; + word32 bits = (outSz * 8); /* reverse byte order */ +#ifdef WOLFSSL_SMALL_STACK_CACHE + wc_Sha256* sha = &drbg->sha256; +#else + wc_Sha256 sha[1]; +#endif +#ifdef WC_ASYNC_ENABLE_SHA256 + DECLARE_VAR(digest, byte, WC_SHA256_DIGEST_SIZE, drbg->heap); + if (digest == NULL) + return MEMORY_E; +#else + byte digest[WC_SHA256_DIGEST_SIZE]; +#endif + + (void)drbg; +#ifdef WC_ASYNC_ENABLE_SHA256 + if (digest == NULL) + return DRBG_FAILURE; +#endif + +#ifdef LITTLE_ENDIAN_ORDER + bits = ByteReverseWord32(bits); +#endif + len = (outSz / OUTPUT_BLOCK_LEN) + + ((outSz % OUTPUT_BLOCK_LEN) ? 1 : 0); + + for (i = 0, ctr = 1; i < len; i++, ctr++) { +#ifndef WOLFSSL_SMALL_STACK_CACHE + #if defined(WOLFSSL_ASYNC_CRYPT) || defined(WOLF_CRYPTO_CB) + ret = wc_InitSha256_ex(sha, drbg->heap, drbg->devId); + #else + ret = wc_InitSha256(sha); + #endif + if (ret != 0) + break; + + if (ret == 0) +#endif + ret = wc_Sha256Update(sha, &ctr, sizeof(ctr)); + if (ret == 0) + ret = wc_Sha256Update(sha, (byte*)&bits, sizeof(bits)); + + if (ret == 0) { + /* churning V is the only string that doesn't have the type added */ + if (type != drbgInitV) + ret = wc_Sha256Update(sha, &type, sizeof(type)); + } + if (ret == 0) + ret = wc_Sha256Update(sha, inA, inASz); + if (ret == 0) { + if (inB != NULL && inBSz > 0) + ret = wc_Sha256Update(sha, inB, inBSz); + } + if (ret == 0) + ret = wc_Sha256Final(sha, digest); + +#ifndef WOLFSSL_SMALL_STACK_CACHE + wc_Sha256Free(sha); +#endif + if (ret == 0) { + if (outSz > OUTPUT_BLOCK_LEN) { + XMEMCPY(out, digest, OUTPUT_BLOCK_LEN); + outSz -= OUTPUT_BLOCK_LEN; + out += OUTPUT_BLOCK_LEN; + } + else { + XMEMCPY(out, digest, outSz); + } + } + } + + ForceZero(digest, WC_SHA256_DIGEST_SIZE); + +#ifdef WC_ASYNC_ENABLE_SHA256 + FREE_VAR(digest, drbg->heap); +#endif + + return (ret == 0) ? DRBG_SUCCESS : DRBG_FAILURE; +} + +/* Returns: DRBG_SUCCESS or DRBG_FAILURE */ +static int Hash_DRBG_Reseed(DRBG* drbg, const byte* seed, word32 seedSz) +{ + byte newV[DRBG_SEED_LEN]; + + XMEMSET(newV, 0, DRBG_SEED_LEN); + + if (Hash_df(drbg, newV, sizeof(newV), drbgReseed, + drbg->V, sizeof(drbg->V), seed, seedSz) != DRBG_SUCCESS) { + return DRBG_FAILURE; + } + + XMEMCPY(drbg->V, newV, sizeof(drbg->V)); + ForceZero(newV, sizeof(newV)); + + if (Hash_df(drbg, drbg->C, sizeof(drbg->C), drbgInitC, drbg->V, + sizeof(drbg->V), NULL, 0) != DRBG_SUCCESS) { + return DRBG_FAILURE; + } + + drbg->reseedCtr = 1; + drbg->lastBlock = 0; + drbg->matchCount = 0; + return DRBG_SUCCESS; +} + +/* Returns: DRBG_SUCCESS and DRBG_FAILURE or BAD_FUNC_ARG on fail */ +int wc_RNG_DRBG_Reseed(WC_RNG* rng, const byte* seed, word32 seedSz) +{ + if (rng == NULL || seed == NULL) { + return BAD_FUNC_ARG; + } + + return Hash_DRBG_Reseed(rng->drbg, seed, seedSz); +} + +static WC_INLINE void array_add_one(byte* data, word32 dataSz) +{ + int i; + + for (i = dataSz - 1; i >= 0; i--) + { + data[i]++; + if (data[i] != 0) break; + } +} + +/* Returns: DRBG_SUCCESS or DRBG_FAILURE */ +static int Hash_gen(DRBG* drbg, byte* out, word32 outSz, const byte* V) +{ + int ret = DRBG_FAILURE; + byte data[DRBG_SEED_LEN]; + int i; + int len; + word32 checkBlock; +#ifdef WOLFSSL_SMALL_STACK_CACHE + wc_Sha256* sha = &drbg->sha256; +#else + wc_Sha256 sha[1]; +#endif +#ifdef WC_ASYNC_ENABLE_SHA256 + DECLARE_VAR(digest, byte, WC_SHA256_DIGEST_SIZE, drbg->heap); + if (digest == NULL) + return MEMORY_E; +#else + byte digest[WC_SHA256_DIGEST_SIZE]; +#endif + + /* Special case: outSz is 0 and out is NULL. wc_Generate a block to save for + * the continuous test. */ + + if (outSz == 0) outSz = 1; + + len = (outSz / OUTPUT_BLOCK_LEN) + ((outSz % OUTPUT_BLOCK_LEN) ? 1 : 0); + + XMEMCPY(data, V, sizeof(data)); + for (i = 0; i < len; i++) { +#ifndef WOLFSSL_SMALL_STACK_CACHE + #if defined(WOLFSSL_ASYNC_CRYPT) || defined(WOLF_CRYPTO_CB) + ret = wc_InitSha256_ex(sha, drbg->heap, drbg->devId); + #else + ret = wc_InitSha256(sha); + #endif + if (ret == 0) +#endif + ret = wc_Sha256Update(sha, data, sizeof(data)); + if (ret == 0) + ret = wc_Sha256Final(sha, digest); +#ifndef WOLFSSL_SMALL_STACK_CACHE + wc_Sha256Free(sha); +#endif + + if (ret == 0) { + XMEMCPY(&checkBlock, digest, sizeof(word32)); + if (drbg->reseedCtr > 1 && checkBlock == drbg->lastBlock) { + if (drbg->matchCount == 1) { + return DRBG_CONT_FAILURE; + } + else { + if (i == len) { + len++; + } + drbg->matchCount = 1; + } + } + else { + drbg->matchCount = 0; + drbg->lastBlock = checkBlock; + } + + if (out != NULL && outSz != 0) { + if (outSz >= OUTPUT_BLOCK_LEN) { + XMEMCPY(out, digest, OUTPUT_BLOCK_LEN); + outSz -= OUTPUT_BLOCK_LEN; + out += OUTPUT_BLOCK_LEN; + array_add_one(data, DRBG_SEED_LEN); + } + else { + XMEMCPY(out, digest, outSz); + outSz = 0; + } + } + } + } + ForceZero(data, sizeof(data)); + +#ifdef WC_ASYNC_ENABLE_SHA256 + FREE_VAR(digest, drbg->heap); +#endif + + return (ret == 0) ? DRBG_SUCCESS : DRBG_FAILURE; +} + +static WC_INLINE void array_add(byte* d, word32 dLen, const byte* s, word32 sLen) +{ + word16 carry = 0; + + if (dLen > 0 && sLen > 0 && dLen >= sLen) { + int sIdx, dIdx; + + for (sIdx = sLen - 1, dIdx = dLen - 1; sIdx >= 0; dIdx--, sIdx--) + { + carry += d[dIdx] + s[sIdx]; + d[dIdx] = (byte)carry; + carry >>= 8; + } + + for (; carry != 0 && dIdx >= 0; dIdx--) { + carry += d[dIdx]; + d[dIdx] = (byte)carry; + carry >>= 8; + } + } +} + +/* Returns: DRBG_SUCCESS, DRBG_NEED_RESEED, or DRBG_FAILURE */ +static int Hash_DRBG_Generate(DRBG* drbg, byte* out, word32 outSz) +{ + int ret; +#ifdef WOLFSSL_SMALL_STACK_CACHE + wc_Sha256* sha = &drbg->sha256; +#else + wc_Sha256 sha[1]; +#endif + byte type; + word32 reseedCtr; + + if (drbg->reseedCtr == RESEED_INTERVAL) { + return DRBG_NEED_RESEED; + } else { + #ifdef WC_ASYNC_ENABLE_SHA256 + DECLARE_VAR(digest, byte, WC_SHA256_DIGEST_SIZE, drbg->heap); + if (digest == NULL) + return MEMORY_E; + #else + byte digest[WC_SHA256_DIGEST_SIZE]; + #endif + type = drbgGenerateH; + reseedCtr = drbg->reseedCtr; + + ret = Hash_gen(drbg, out, outSz, drbg->V); + if (ret == DRBG_SUCCESS) { +#ifndef WOLFSSL_SMALL_STACK_CACHE + #if defined(WOLFSSL_ASYNC_CRYPT) || defined(WOLF_CRYPTO_CB) + ret = wc_InitSha256_ex(sha, drbg->heap, drbg->devId); + #else + ret = wc_InitSha256(sha); + #endif + if (ret == 0) +#endif + ret = wc_Sha256Update(sha, &type, sizeof(type)); + if (ret == 0) + ret = wc_Sha256Update(sha, drbg->V, sizeof(drbg->V)); + if (ret == 0) + ret = wc_Sha256Final(sha, digest); + +#ifndef WOLFSSL_SMALL_STACK_CACHE + wc_Sha256Free(sha); +#endif + + if (ret == 0) { + array_add(drbg->V, sizeof(drbg->V), digest, WC_SHA256_DIGEST_SIZE); + array_add(drbg->V, sizeof(drbg->V), drbg->C, sizeof(drbg->C)); + #ifdef LITTLE_ENDIAN_ORDER + reseedCtr = ByteReverseWord32(reseedCtr); + #endif + array_add(drbg->V, sizeof(drbg->V), + (byte*)&reseedCtr, sizeof(reseedCtr)); + ret = DRBG_SUCCESS; + } + drbg->reseedCtr++; + } + ForceZero(digest, WC_SHA256_DIGEST_SIZE); + #ifdef WC_ASYNC_ENABLE_SHA256 + FREE_VAR(digest, drbg->heap); + #endif + } + + return (ret == 0) ? DRBG_SUCCESS : DRBG_FAILURE; +} + +/* Returns: DRBG_SUCCESS or DRBG_FAILURE */ +static int Hash_DRBG_Instantiate(DRBG* drbg, const byte* seed, word32 seedSz, + const byte* nonce, word32 nonceSz, + void* heap, int devId) +{ + int ret; + + XMEMSET(drbg, 0, sizeof(DRBG)); +#if defined(WOLFSSL_ASYNC_CRYPT) || defined(WOLF_CRYPTO_CB) + drbg->heap = heap; + drbg->devId = devId; +#else + (void)heap; + (void)devId; +#endif + +#ifdef WOLFSSL_SMALL_STACK_CACHE + #if defined(WOLFSSL_ASYNC_CRYPT) || defined(WOLF_CRYPTO_CB) + ret = wc_InitSha256_ex(&drbg->sha256, drbg->heap, drbg->devId); + #else + ret = wc_InitSha256(&drbg->sha256); + #endif + if (ret != 0) + return ret; +#endif + + if (Hash_df(drbg, drbg->V, sizeof(drbg->V), drbgInitV, seed, seedSz, + nonce, nonceSz) == DRBG_SUCCESS && + Hash_df(drbg, drbg->C, sizeof(drbg->C), drbgInitC, drbg->V, + sizeof(drbg->V), NULL, 0) == DRBG_SUCCESS) { + + drbg->reseedCtr = 1; + drbg->lastBlock = 0; + drbg->matchCount = 0; + ret = DRBG_SUCCESS; + } + else { + ret = DRBG_FAILURE; + } + + return ret; +} + +/* Returns: DRBG_SUCCESS or DRBG_FAILURE */ +static int Hash_DRBG_Uninstantiate(DRBG* drbg) +{ + word32 i; + int compareSum = 0; + byte* compareDrbg = (byte*)drbg; + +#ifdef WOLFSSL_SMALL_STACK_CACHE + wc_Sha256Free(&drbg->sha256); +#endif + + ForceZero(drbg, sizeof(DRBG)); + + for (i = 0; i < sizeof(DRBG); i++) + compareSum |= compareDrbg[i] ^ 0; + + return (compareSum == 0) ? DRBG_SUCCESS : DRBG_FAILURE; +} + + +int wc_RNG_TestSeed(const byte* seed, word32 seedSz) +{ + int ret = DRBG_SUCCESS; + + /* Check the seed for duplicate words. */ + word32 seedIdx = 0; + word32 scratchSz = min(SEED_BLOCK_SZ, seedSz - SEED_BLOCK_SZ); + + while (seedIdx < seedSz - SEED_BLOCK_SZ) { + if (ConstantCompare(seed + seedIdx, + seed + seedIdx + scratchSz, + scratchSz) == 0) { + + ret = DRBG_CONT_FAILURE; + } + seedIdx += SEED_BLOCK_SZ; + scratchSz = min(SEED_BLOCK_SZ, (seedSz - seedIdx)); + } + + return ret; +} +#endif /* HAVE_HASHDRBG */ +/* End NIST DRBG Code */ + + +static int _InitRng(WC_RNG* rng, byte* nonce, word32 nonceSz, + void* heap, int devId) +{ + int ret = RNG_FAILURE_E; +#ifdef HAVE_HASHDRBG + word32 seedSz = SEED_SZ + SEED_BLOCK_SZ; +#endif + + (void)nonce; + (void)nonceSz; + + if (rng == NULL) + return BAD_FUNC_ARG; + if (nonce == NULL && nonceSz != 0) + return BAD_FUNC_ARG; + +#ifdef WOLFSSL_HEAP_TEST + rng->heap = (void*)WOLFSSL_HEAP_TEST; + (void)heap; +#else + rng->heap = heap; +#endif +#if defined(WOLFSSL_ASYNC_CRYPT) || defined(WOLF_CRYPTO_CB) + rng->devId = devId; + #if defined(WOLF_CRYPTO_CB) + rng->seed.devId = devId; + #endif +#else + (void)devId; +#endif + +#ifdef HAVE_HASHDRBG + /* init the DBRG to known values */ + rng->drbg = NULL; + rng->status = DRBG_NOT_INIT; +#endif + +#if defined(HAVE_INTEL_RDSEED) || defined(HAVE_INTEL_RDRAND) + /* init the intel RD seed and/or rand */ + wc_InitRng_IntelRD(); +#endif + + /* configure async RNG source if available */ +#ifdef WOLFSSL_ASYNC_CRYPT + ret = wolfAsync_DevCtxInit(&rng->asyncDev, WOLFSSL_ASYNC_MARKER_RNG, + rng->heap, rng->devId); + if (ret != 0) + return ret; +#endif + +#ifdef HAVE_INTEL_RDRAND + /* if CPU supports RDRAND, use it directly and by-pass DRBG init */ + if (IS_INTEL_RDRAND(intel_flags)) + return 0; +#endif + +#ifdef CUSTOM_RAND_GENERATE_BLOCK + ret = 0; /* success */ +#else +#ifdef HAVE_HASHDRBG + if (nonceSz == 0) + seedSz = MAX_SEED_SZ; + + if (wc_RNG_HealthTestLocal(0) == 0) { + #ifdef WC_ASYNC_ENABLE_SHA256 + DECLARE_VAR(seed, byte, MAX_SEED_SZ, rng->heap); + if (seed == NULL) + return MEMORY_E; + #else + byte seed[MAX_SEED_SZ]; + #endif + +#if !defined(WOLFSSL_NO_MALLOC) || defined(WOLFSSL_STATIC_MEMORY) + rng->drbg = + (struct DRBG*)XMALLOC(sizeof(DRBG), rng->heap, + DYNAMIC_TYPE_RNG); +#else + /* compile-time validation of drbg_data size */ + typedef char drbg_data_test[sizeof(rng->drbg_data) >= + sizeof(struct DRBG) ? 1 : -1]; + (void)sizeof(drbg_data_test); + rng->drbg = (struct DRBG*)rng->drbg_data; +#endif + + if (rng->drbg == NULL) { + ret = MEMORY_E; + } + else { + ret = wc_GenerateSeed(&rng->seed, seed, seedSz); + if (ret != 0) + ret = DRBG_FAILURE; + else + ret = wc_RNG_TestSeed(seed, seedSz); + + if (ret == DRBG_SUCCESS) + ret = Hash_DRBG_Instantiate(rng->drbg, + seed + SEED_BLOCK_SZ, seedSz - SEED_BLOCK_SZ, + nonce, nonceSz, rng->heap, devId); + + if (ret != DRBG_SUCCESS) { + #if !defined(WOLFSSL_NO_MALLOC) || defined(WOLFSSL_STATIC_MEMORY) + XFREE(rng->drbg, rng->heap, DYNAMIC_TYPE_RNG); + #endif + rng->drbg = NULL; + } + } + + ForceZero(seed, seedSz); + #ifdef WC_ASYNC_ENABLE_SHA256 + FREE_VAR(seed, rng->heap); + #endif + } + else + ret = DRBG_CONT_FAILURE; + + if (ret == DRBG_SUCCESS) { + rng->status = DRBG_OK; + ret = 0; + } + else if (ret == DRBG_CONT_FAILURE) { + rng->status = DRBG_CONT_FAILED; + ret = DRBG_CONT_FIPS_E; + } + else if (ret == DRBG_FAILURE) { + rng->status = DRBG_FAILED; + ret = RNG_FAILURE_E; + } + else { + rng->status = DRBG_FAILED; + } +#endif /* HAVE_HASHDRBG */ +#endif /* CUSTOM_RAND_GENERATE_BLOCK */ + + return ret; +} + + +WOLFSSL_ABI +WC_RNG* wc_rng_new(byte* nonce, word32 nonceSz, void* heap) +{ + WC_RNG* rng; + + rng = (WC_RNG*)XMALLOC(sizeof(WC_RNG), heap, DYNAMIC_TYPE_RNG); + if (rng) { + int error = _InitRng(rng, nonce, nonceSz, heap, INVALID_DEVID) != 0; + if (error) { + XFREE(rng, heap, DYNAMIC_TYPE_RNG); + rng = NULL; + } + } + + return rng; +} + + +WOLFSSL_ABI +void wc_rng_free(WC_RNG* rng) +{ + if (rng) { + void* heap = rng->heap; + + wc_FreeRng(rng); + ForceZero(rng, sizeof(WC_RNG)); + XFREE(rng, heap, DYNAMIC_TYPE_RNG); + (void)heap; + } +} + + +int wc_InitRng(WC_RNG* rng) +{ + return _InitRng(rng, NULL, 0, NULL, INVALID_DEVID); +} + + +int wc_InitRng_ex(WC_RNG* rng, void* heap, int devId) +{ + return _InitRng(rng, NULL, 0, heap, devId); +} + + +int wc_InitRngNonce(WC_RNG* rng, byte* nonce, word32 nonceSz) +{ + return _InitRng(rng, nonce, nonceSz, NULL, INVALID_DEVID); +} + + +int wc_InitRngNonce_ex(WC_RNG* rng, byte* nonce, word32 nonceSz, + void* heap, int devId) +{ + return _InitRng(rng, nonce, nonceSz, heap, devId); +} + + +/* place a generated block in output */ +WOLFSSL_ABI +int wc_RNG_GenerateBlock(WC_RNG* rng, byte* output, word32 sz) +{ + int ret; + + if (rng == NULL || output == NULL) + return BAD_FUNC_ARG; + +#ifdef WOLF_CRYPTO_CB + if (rng->devId != INVALID_DEVID) { + ret = wc_CryptoCb_RandomBlock(rng, output, sz); + if (ret != CRYPTOCB_UNAVAILABLE) + return ret; + /* fall-through when unavailable */ + } +#endif + +#ifdef HAVE_INTEL_RDRAND + if (IS_INTEL_RDRAND(intel_flags)) + return wc_GenerateRand_IntelRD(NULL, output, sz); +#endif + +#if defined(WOLFSSL_ASYNC_CRYPT) + if (rng->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RNG) { + /* these are blocking */ + #ifdef HAVE_CAVIUM + return NitroxRngGenerateBlock(rng, output, sz); + #elif defined(HAVE_INTEL_QA) && defined(QAT_ENABLE_RNG) + return IntelQaDrbg(&rng->asyncDev, output, sz); + #else + /* simulator not supported */ + #endif + } +#endif + +#ifdef CUSTOM_RAND_GENERATE_BLOCK + XMEMSET(output, 0, sz); + ret = CUSTOM_RAND_GENERATE_BLOCK(output, sz); +#else + +#ifdef HAVE_HASHDRBG + if (sz > RNG_MAX_BLOCK_LEN) + return BAD_FUNC_ARG; + + if (rng->status != DRBG_OK) + return RNG_FAILURE_E; + + ret = Hash_DRBG_Generate(rng->drbg, output, sz); + if (ret == DRBG_NEED_RESEED) { + if (wc_RNG_HealthTestLocal(1) == 0) { + byte newSeed[SEED_SZ + SEED_BLOCK_SZ]; + + ret = wc_GenerateSeed(&rng->seed, newSeed, + SEED_SZ + SEED_BLOCK_SZ); + if (ret != 0) + ret = DRBG_FAILURE; + else + ret = wc_RNG_TestSeed(newSeed, SEED_SZ + SEED_BLOCK_SZ); + + if (ret == DRBG_SUCCESS) + ret = Hash_DRBG_Reseed(rng->drbg, newSeed + SEED_BLOCK_SZ, + SEED_SZ); + if (ret == DRBG_SUCCESS) + ret = Hash_DRBG_Generate(rng->drbg, output, sz); + + ForceZero(newSeed, sizeof(newSeed)); + } + else + ret = DRBG_CONT_FAILURE; + } + + if (ret == DRBG_SUCCESS) { + ret = 0; + } + else if (ret == DRBG_CONT_FAILURE) { + ret = DRBG_CONT_FIPS_E; + rng->status = DRBG_CONT_FAILED; + } + else { + ret = RNG_FAILURE_E; + rng->status = DRBG_FAILED; + } +#else + + /* if we get here then there is an RNG configuration error */ + ret = RNG_FAILURE_E; + +#endif /* HAVE_HASHDRBG */ +#endif /* CUSTOM_RAND_GENERATE_BLOCK */ + + return ret; +} + + +int wc_RNG_GenerateByte(WC_RNG* rng, byte* b) +{ + return wc_RNG_GenerateBlock(rng, b, 1); +} + + +int wc_FreeRng(WC_RNG* rng) +{ + int ret = 0; + + if (rng == NULL) + return BAD_FUNC_ARG; + +#if defined(WOLFSSL_ASYNC_CRYPT) + wolfAsync_DevCtxFree(&rng->asyncDev, WOLFSSL_ASYNC_MARKER_RNG); +#endif + +#ifdef HAVE_HASHDRBG + if (rng->drbg != NULL) { + if (Hash_DRBG_Uninstantiate(rng->drbg) != DRBG_SUCCESS) + ret = RNG_FAILURE_E; + + #if !defined(WOLFSSL_NO_MALLOC) || defined(WOLFSSL_STATIC_MEMORY) + XFREE(rng->drbg, rng->heap, DYNAMIC_TYPE_RNG); + #endif + rng->drbg = NULL; + } + + rng->status = DRBG_NOT_INIT; +#endif /* HAVE_HASHDRBG */ + + return ret; +} + +#ifdef HAVE_HASHDRBG +int wc_RNG_HealthTest(int reseed, const byte* seedA, word32 seedASz, + const byte* seedB, word32 seedBSz, + byte* output, word32 outputSz) +{ + return wc_RNG_HealthTest_ex(reseed, NULL, 0, + seedA, seedASz, seedB, seedBSz, + output, outputSz, + NULL, INVALID_DEVID); +} + + +int wc_RNG_HealthTest_ex(int reseed, const byte* nonce, word32 nonceSz, + const byte* seedA, word32 seedASz, + const byte* seedB, word32 seedBSz, + byte* output, word32 outputSz, + void* heap, int devId) +{ + int ret = -1; + DRBG* drbg; +#ifndef WOLFSSL_SMALL_STACK + DRBG drbg_var; +#endif + + if (seedA == NULL || output == NULL) { + return BAD_FUNC_ARG; + } + + if (reseed != 0 && seedB == NULL) { + return BAD_FUNC_ARG; + } + + if (outputSz != RNG_HEALTH_TEST_CHECK_SIZE) { + return ret; + } + +#ifdef WOLFSSL_SMALL_STACK + drbg = (DRBG*)XMALLOC(sizeof(DRBG), NULL, DYNAMIC_TYPE_RNG); + if (drbg == NULL) { + return MEMORY_E; + } +#else + drbg = &drbg_var; +#endif + + if (Hash_DRBG_Instantiate(drbg, seedA, seedASz, nonce, nonceSz, + heap, devId) != 0) { + goto exit_rng_ht; + } + + if (reseed) { + if (Hash_DRBG_Reseed(drbg, seedB, seedBSz) != 0) { + goto exit_rng_ht; + } + } + + /* This call to generate is prescribed by the NIST DRBGVS + * procedure. The results are thrown away. The known + * answer test checks the second block of DRBG out of + * the generator to ensure the internal state is updated + * as expected. */ + if (Hash_DRBG_Generate(drbg, output, outputSz) != 0) { + goto exit_rng_ht; + } + + if (Hash_DRBG_Generate(drbg, output, outputSz) != 0) { + goto exit_rng_ht; + } + + /* Mark success */ + ret = 0; + +exit_rng_ht: + + /* This is safe to call even if Hash_DRBG_Instantiate fails */ + if (Hash_DRBG_Uninstantiate(drbg) != 0) { + ret = -1; + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(drbg, NULL, DYNAMIC_TYPE_RNG); +#endif + + return ret; +} + + +const byte seedA[] = { + 0x63, 0x36, 0x33, 0x77, 0xe4, 0x1e, 0x86, 0x46, 0x8d, 0xeb, 0x0a, 0xb4, + 0xa8, 0xed, 0x68, 0x3f, 0x6a, 0x13, 0x4e, 0x47, 0xe0, 0x14, 0xc7, 0x00, + 0x45, 0x4e, 0x81, 0xe9, 0x53, 0x58, 0xa5, 0x69, 0x80, 0x8a, 0xa3, 0x8f, + 0x2a, 0x72, 0xa6, 0x23, 0x59, 0x91, 0x5a, 0x9f, 0x8a, 0x04, 0xca, 0x68 +}; + +const byte reseedSeedA[] = { + 0xe6, 0x2b, 0x8a, 0x8e, 0xe8, 0xf1, 0x41, 0xb6, 0x98, 0x05, 0x66, 0xe3, + 0xbf, 0xe3, 0xc0, 0x49, 0x03, 0xda, 0xd4, 0xac, 0x2c, 0xdf, 0x9f, 0x22, + 0x80, 0x01, 0x0a, 0x67, 0x39, 0xbc, 0x83, 0xd3 +}; + +const byte outputA[] = { + 0x04, 0xee, 0xc6, 0x3b, 0xb2, 0x31, 0xdf, 0x2c, 0x63, 0x0a, 0x1a, 0xfb, + 0xe7, 0x24, 0x94, 0x9d, 0x00, 0x5a, 0x58, 0x78, 0x51, 0xe1, 0xaa, 0x79, + 0x5e, 0x47, 0x73, 0x47, 0xc8, 0xb0, 0x56, 0x62, 0x1c, 0x18, 0xbd, 0xdc, + 0xdd, 0x8d, 0x99, 0xfc, 0x5f, 0xc2, 0xb9, 0x20, 0x53, 0xd8, 0xcf, 0xac, + 0xfb, 0x0b, 0xb8, 0x83, 0x12, 0x05, 0xfa, 0xd1, 0xdd, 0xd6, 0xc0, 0x71, + 0x31, 0x8a, 0x60, 0x18, 0xf0, 0x3b, 0x73, 0xf5, 0xed, 0xe4, 0xd4, 0xd0, + 0x71, 0xf9, 0xde, 0x03, 0xfd, 0x7a, 0xea, 0x10, 0x5d, 0x92, 0x99, 0xb8, + 0xaf, 0x99, 0xaa, 0x07, 0x5b, 0xdb, 0x4d, 0xb9, 0xaa, 0x28, 0xc1, 0x8d, + 0x17, 0x4b, 0x56, 0xee, 0x2a, 0x01, 0x4d, 0x09, 0x88, 0x96, 0xff, 0x22, + 0x82, 0xc9, 0x55, 0xa8, 0x19, 0x69, 0xe0, 0x69, 0xfa, 0x8c, 0xe0, 0x07, + 0xa1, 0x80, 0x18, 0x3a, 0x07, 0xdf, 0xae, 0x17 +}; + +const byte seedB[] = { + 0xa6, 0x5a, 0xd0, 0xf3, 0x45, 0xdb, 0x4e, 0x0e, 0xff, 0xe8, 0x75, 0xc3, + 0xa2, 0xe7, 0x1f, 0x42, 0xc7, 0x12, 0x9d, 0x62, 0x0f, 0xf5, 0xc1, 0x19, + 0xa9, 0xef, 0x55, 0xf0, 0x51, 0x85, 0xe0, 0xfb, /* nonce next */ + 0x85, 0x81, 0xf9, 0x31, 0x75, 0x17, 0x27, 0x6e, 0x06, 0xe9, 0x60, 0x7d, + 0xdb, 0xcb, 0xcc, 0x2e +}; + +const byte outputB[] = { + 0xd3, 0xe1, 0x60, 0xc3, 0x5b, 0x99, 0xf3, 0x40, 0xb2, 0x62, 0x82, 0x64, + 0xd1, 0x75, 0x10, 0x60, 0xe0, 0x04, 0x5d, 0xa3, 0x83, 0xff, 0x57, 0xa5, + 0x7d, 0x73, 0xa6, 0x73, 0xd2, 0xb8, 0xd8, 0x0d, 0xaa, 0xf6, 0xa6, 0xc3, + 0x5a, 0x91, 0xbb, 0x45, 0x79, 0xd7, 0x3f, 0xd0, 0xc8, 0xfe, 0xd1, 0x11, + 0xb0, 0x39, 0x13, 0x06, 0x82, 0x8a, 0xdf, 0xed, 0x52, 0x8f, 0x01, 0x81, + 0x21, 0xb3, 0xfe, 0xbd, 0xc3, 0x43, 0xe7, 0x97, 0xb8, 0x7d, 0xbb, 0x63, + 0xdb, 0x13, 0x33, 0xde, 0xd9, 0xd1, 0xec, 0xe1, 0x77, 0xcf, 0xa6, 0xb7, + 0x1f, 0xe8, 0xab, 0x1d, 0xa4, 0x66, 0x24, 0xed, 0x64, 0x15, 0xe5, 0x1c, + 0xcd, 0xe2, 0xc7, 0xca, 0x86, 0xe2, 0x83, 0x99, 0x0e, 0xea, 0xeb, 0x91, + 0x12, 0x04, 0x15, 0x52, 0x8b, 0x22, 0x95, 0x91, 0x02, 0x81, 0xb0, 0x2d, + 0xd4, 0x31, 0xf4, 0xc9, 0xf7, 0x04, 0x27, 0xdf +}; + + +static int wc_RNG_HealthTestLocal(int reseed) +{ + int ret = 0; +#ifdef WOLFSSL_SMALL_STACK + byte* check; +#else + byte check[RNG_HEALTH_TEST_CHECK_SIZE]; +#endif + +#ifdef WOLFSSL_SMALL_STACK + check = (byte*)XMALLOC(RNG_HEALTH_TEST_CHECK_SIZE, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (check == NULL) { + return MEMORY_E; + } +#endif + + if (reseed) { + ret = wc_RNG_HealthTest(1, seedA, sizeof(seedA), + reseedSeedA, sizeof(reseedSeedA), + check, RNG_HEALTH_TEST_CHECK_SIZE); + if (ret == 0) { + if (ConstantCompare(check, outputA, + RNG_HEALTH_TEST_CHECK_SIZE) != 0) + ret = -1; + } + } + else { + ret = wc_RNG_HealthTest(0, seedB, sizeof(seedB), + NULL, 0, + check, RNG_HEALTH_TEST_CHECK_SIZE); + if (ret == 0) { + if (ConstantCompare(check, outputB, + RNG_HEALTH_TEST_CHECK_SIZE) != 0) + ret = -1; + } + + /* The previous test cases use a large seed instead of a seed and nonce. + * seedB is actually from a test case with a seed and nonce, and + * just concatenates them. The pivot point between seed and nonce is + * byte 32, feed them into the health test separately. */ + if (ret == 0) { + ret = wc_RNG_HealthTest_ex(0, + seedB + 32, sizeof(seedB) - 32, + seedB, 32, + NULL, 0, + check, RNG_HEALTH_TEST_CHECK_SIZE, + NULL, INVALID_DEVID); + if (ret == 0) { + if (ConstantCompare(check, outputB, sizeof(outputB)) != 0) + ret = -1; + } + } + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(check, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return ret; +} + +#endif /* HAVE_HASHDRBG */ + + +#ifdef HAVE_WNR + +/* + * Init global Whitewood netRandom context + * Returns 0 on success, negative on error + */ +int wc_InitNetRandom(const char* configFile, wnr_hmac_key hmac_cb, int timeout) +{ + if (configFile == NULL || timeout < 0) + return BAD_FUNC_ARG; + + if (wnr_mutex_init > 0) { + WOLFSSL_MSG("netRandom context already created, skipping"); + return 0; + } + + if (wc_InitMutex(&wnr_mutex) != 0) { + WOLFSSL_MSG("Bad Init Mutex wnr_mutex"); + return BAD_MUTEX_E; + } + wnr_mutex_init = 1; + + if (wc_LockMutex(&wnr_mutex) != 0) { + WOLFSSL_MSG("Bad Lock Mutex wnr_mutex"); + return BAD_MUTEX_E; + } + + /* store entropy timeout */ + wnr_timeout = timeout; + + /* create global wnr_context struct */ + if (wnr_create(&wnr_ctx) != WNR_ERROR_NONE) { + WOLFSSL_MSG("Error creating global netRandom context"); + return RNG_FAILURE_E; + } + + /* load config file */ + if (wnr_config_loadf(wnr_ctx, (char*)configFile) != WNR_ERROR_NONE) { + WOLFSSL_MSG("Error loading config file into netRandom context"); + wnr_destroy(wnr_ctx); + wnr_ctx = NULL; + return RNG_FAILURE_E; + } + + /* create/init polling mechanism */ + if (wnr_poll_create() != WNR_ERROR_NONE) { + printf("ERROR: wnr_poll_create() failed\n"); + WOLFSSL_MSG("Error initializing netRandom polling mechanism"); + wnr_destroy(wnr_ctx); + wnr_ctx = NULL; + return RNG_FAILURE_E; + } + + /* validate config, set HMAC callback (optional) */ + if (wnr_setup(wnr_ctx, hmac_cb) != WNR_ERROR_NONE) { + WOLFSSL_MSG("Error setting up netRandom context"); + wnr_destroy(wnr_ctx); + wnr_ctx = NULL; + wnr_poll_destroy(); + return RNG_FAILURE_E; + } + + wc_UnLockMutex(&wnr_mutex); + + return 0; +} + +/* + * Free global Whitewood netRandom context + * Returns 0 on success, negative on error + */ +int wc_FreeNetRandom(void) +{ + if (wnr_mutex_init > 0) { + + if (wc_LockMutex(&wnr_mutex) != 0) { + WOLFSSL_MSG("Bad Lock Mutex wnr_mutex"); + return BAD_MUTEX_E; + } + + if (wnr_ctx != NULL) { + wnr_destroy(wnr_ctx); + wnr_ctx = NULL; + } + wnr_poll_destroy(); + + wc_UnLockMutex(&wnr_mutex); + + wc_FreeMutex(&wnr_mutex); + wnr_mutex_init = 0; + } + + return 0; +} + +#endif /* HAVE_WNR */ + + +#if defined(HAVE_INTEL_RDRAND) || defined(HAVE_INTEL_RDSEED) + +#ifdef WOLFSSL_ASYNC_CRYPT + /* need more retries if multiple cores */ + #define INTELRD_RETRY (32 * 8) +#else + #define INTELRD_RETRY 32 +#endif + +#ifdef HAVE_INTEL_RDSEED + +#ifndef USE_WINDOWS_API + + /* return 0 on success */ + static WC_INLINE int IntelRDseed64(word64* seed) + { + unsigned char ok; + + __asm__ volatile("rdseed %0; setc %1":"=r"(*seed), "=qm"(ok)); + return (ok) ? 0 : -1; + } + +#else /* USE_WINDOWS_API */ + /* The compiler Visual Studio uses does not allow inline assembly. + * It does allow for Intel intrinsic functions. */ + + /* return 0 on success */ + static WC_INLINE int IntelRDseed64(word64* seed) + { + int ok; + + ok = _rdseed64_step(seed); + return (ok) ? 0 : -1; + } + +#endif /* USE_WINDOWS_API */ + +/* return 0 on success */ +static WC_INLINE int IntelRDseed64_r(word64* rnd) +{ + int i; + for (i = 0; i < INTELRD_RETRY; i++) { + if (IntelRDseed64(rnd) == 0) + return 0; + } + return -1; +} + +/* return 0 on success */ +static int wc_GenerateSeed_IntelRD(OS_Seed* os, byte* output, word32 sz) +{ + int ret; + word64 rndTmp; + + (void)os; + + if (!IS_INTEL_RDSEED(intel_flags)) + return -1; + + for (; (sz / sizeof(word64)) > 0; sz -= sizeof(word64), + output += sizeof(word64)) { + ret = IntelRDseed64_r((word64*)output); + if (ret != 0) + return ret; + } + if (sz == 0) + return 0; + + /* handle unaligned remainder */ + ret = IntelRDseed64_r(&rndTmp); + if (ret != 0) + return ret; + + XMEMCPY(output, &rndTmp, sz); + ForceZero(&rndTmp, sizeof(rndTmp)); + + return 0; +} + +#endif /* HAVE_INTEL_RDSEED */ + +#ifdef HAVE_INTEL_RDRAND + +#ifndef USE_WINDOWS_API + +/* return 0 on success */ +static WC_INLINE int IntelRDrand64(word64 *rnd) +{ + unsigned char ok; + + __asm__ volatile("rdrand %0; setc %1":"=r"(*rnd), "=qm"(ok)); + + return (ok) ? 0 : -1; +} + +#else /* USE_WINDOWS_API */ + /* The compiler Visual Studio uses does not allow inline assembly. + * It does allow for Intel intrinsic functions. */ + +/* return 0 on success */ +static WC_INLINE int IntelRDrand64(word64 *rnd) +{ + int ok; + + ok = _rdrand64_step(rnd); + + return (ok) ? 0 : -1; +} + +#endif /* USE_WINDOWS_API */ + +/* return 0 on success */ +static WC_INLINE int IntelRDrand64_r(word64 *rnd) +{ + int i; + for (i = 0; i < INTELRD_RETRY; i++) { + if (IntelRDrand64(rnd) == 0) + return 0; + } + return -1; +} + +/* return 0 on success */ +static int wc_GenerateRand_IntelRD(OS_Seed* os, byte* output, word32 sz) +{ + int ret; + word64 rndTmp; + + (void)os; + + if (!IS_INTEL_RDRAND(intel_flags)) + return -1; + + for (; (sz / sizeof(word64)) > 0; sz -= sizeof(word64), + output += sizeof(word64)) { + ret = IntelRDrand64_r((word64 *)output); + if (ret != 0) + return ret; + } + if (sz == 0) + return 0; + + /* handle unaligned remainder */ + ret = IntelRDrand64_r(&rndTmp); + if (ret != 0) + return ret; + + XMEMCPY(output, &rndTmp, sz); + + return 0; +} + +#endif /* HAVE_INTEL_RDRAND */ +#endif /* HAVE_INTEL_RDRAND || HAVE_INTEL_RDSEED */ + + +/* Begin wc_GenerateSeed Implementations */ +#if defined(CUSTOM_RAND_GENERATE_SEED) + + /* Implement your own random generation function + * Return 0 to indicate success + * int rand_gen_seed(byte* output, word32 sz); + * #define CUSTOM_RAND_GENERATE_SEED rand_gen_seed */ + + int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) + { + (void)os; /* Suppress unused arg warning */ + return CUSTOM_RAND_GENERATE_SEED(output, sz); + } + +#elif defined(CUSTOM_RAND_GENERATE_SEED_OS) + + /* Implement your own random generation function, + * which includes OS_Seed. + * Return 0 to indicate success + * int rand_gen_seed(OS_Seed* os, byte* output, word32 sz); + * #define CUSTOM_RAND_GENERATE_SEED_OS rand_gen_seed */ + + int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) + { + return CUSTOM_RAND_GENERATE_SEED_OS(os, output, sz); + } + +#elif defined(CUSTOM_RAND_GENERATE) + + /* Implement your own random generation function + * word32 rand_gen(void); + * #define CUSTOM_RAND_GENERATE rand_gen */ + + int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) + { + word32 i = 0; + + (void)os; + + while (i < sz) + { + /* If not aligned or there is odd/remainder */ + if( (i + sizeof(CUSTOM_RAND_TYPE)) > sz || + ((wolfssl_word)&output[i] % sizeof(CUSTOM_RAND_TYPE)) != 0 + ) { + /* Single byte at a time */ + output[i++] = (byte)CUSTOM_RAND_GENERATE(); + } + else { + /* Use native 8, 16, 32 or 64 copy instruction */ + *((CUSTOM_RAND_TYPE*)&output[i]) = CUSTOM_RAND_GENERATE(); + i += sizeof(CUSTOM_RAND_TYPE); + } + } + + return 0; + } + +#elif defined(WOLFSSL_SGX) + +int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) +{ + int ret = !SGX_SUCCESS; + int i, read_max = 10; + + for (i = 0; i < read_max && ret != SGX_SUCCESS; i++) { + ret = sgx_read_rand(output, sz); + } + + (void)os; + return (ret == SGX_SUCCESS) ? 0 : 1; +} + +#elif defined(USE_WINDOWS_API) + +int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) +{ +#ifdef WOLF_CRYPTO_CB + int ret; + + if (os != NULL && os->devId != INVALID_DEVID) { + ret = wc_CryptoCb_RandomSeed(os, output, sz); + if (ret != CRYPTOCB_UNAVAILABLE) + return ret; + /* fall-through when unavailable */ + } +#endif + + #ifdef HAVE_INTEL_RDSEED + if (IS_INTEL_RDSEED(intel_flags)) { + if (!wc_GenerateSeed_IntelRD(NULL, output, sz)) { + /* success, we're done */ + return 0; + } + #ifdef FORCE_FAILURE_RDSEED + /* don't fall back to CryptoAPI */ + return READ_RAN_E; + #endif + } + #endif /* HAVE_INTEL_RDSEED */ + + if(!CryptAcquireContext(&os->handle, 0, 0, PROV_RSA_FULL, + CRYPT_VERIFYCONTEXT)) + return WINCRYPT_E; + + if (!CryptGenRandom(os->handle, sz, output)) + return CRYPTGEN_E; + + CryptReleaseContext(os->handle, 0); + + return 0; +} + + +#elif defined(HAVE_RTP_SYS) || defined(EBSNET) + +#include "rtprand.h" /* rtp_rand () */ +#include "rtptime.h" /* rtp_get_system_msec() */ + +int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) +{ + word32 i; + + rtp_srand(rtp_get_system_msec()); + for (i = 0; i < sz; i++ ) { + output[i] = rtp_rand() % 256; + } + + return 0; +} + + +#elif defined(MICROCHIP_PIC32) + + #ifdef MICROCHIP_MPLAB_HARMONY + #ifdef MICROCHIP_MPLAB_HARMONY_3 + #include "system/time/sys_time.h" + #define PIC32_SEED_COUNT SYS_TIME_CounterGet + #else + #define PIC32_SEED_COUNT _CP0_GET_COUNT + #endif + #else + #if !defined(WOLFSSL_MICROCHIP_PIC32MZ) + #include + #endif + extern word32 ReadCoreTimer(void); + #define PIC32_SEED_COUNT ReadCoreTimer + #endif + + #ifdef WOLFSSL_PIC32MZ_RNG + #include "xc.h" + int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) + { + int i; + byte rnd[8]; + word32 *rnd32 = (word32 *)rnd; + word32 size = sz; + byte* op = output; + +#if ((__PIC32_FEATURE_SET0 == 'E') && (__PIC32_FEATURE_SET1 == 'C')) + RNGNUMGEN1 = _CP0_GET_COUNT(); + RNGPOLY1 = _CP0_GET_COUNT(); + RNGPOLY2 = _CP0_GET_COUNT(); + RNGNUMGEN2 = _CP0_GET_COUNT(); +#else + // All others can be seeded from the TRNG + RNGCONbits.TRNGMODE = 1; + RNGCONbits.TRNGEN = 1; + while (RNGCNT < 64); + RNGCONbits.LOAD = 1; + while (RNGCONbits.LOAD == 1); + while (RNGCNT < 64); + RNGPOLY2 = RNGSEED2; + RNGPOLY1 = RNGSEED1; +#endif + + RNGCONbits.PLEN = 0x40; + RNGCONbits.PRNGEN = 1; + for (i=0; i<5; i++) { /* wait for RNGNUMGEN ready */ + volatile int x, y; + x = RNGNUMGEN1; + y = RNGNUMGEN2; + (void)x; + (void)y; + } + do { + rnd32[0] = RNGNUMGEN1; + rnd32[1] = RNGNUMGEN2; + + for(i=0; i<8; i++, op++) { + *op = rnd[i]; + size --; + if(size==0)break; + } + } while(size); + return 0; + } + #else /* WOLFSSL_PIC32MZ_RNG */ + /* uses the core timer, in nanoseconds to seed srand */ + int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) + { + int i; + srand(PIC32_SEED_COUNT() * 25); + + for (i = 0; i < sz; i++ ) { + output[i] = rand() % 256; + if ( (i % 8) == 7) + srand(PIC32_SEED_COUNT() * 25); + } + return 0; + } + #endif /* WOLFSSL_PIC32MZ_RNG */ + +#elif defined(FREESCALE_MQX) || defined(FREESCALE_KSDK_MQX) || \ + defined(FREESCALE_KSDK_BM) || defined(FREESCALE_FREE_RTOS) + + #if defined(FREESCALE_K70_RNGA) || defined(FREESCALE_RNGA) + /* + * wc_Generates a RNG seed using the Random Number Generator Accelerator + * on the Kinetis K70. Documentation located in Chapter 37 of + * K70 Sub-Family Reference Manual (see Note 3 in the README for link). + */ + int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) + { + word32 i; + + /* turn on RNGA module */ + #if defined(SIM_SCGC3_RNGA_MASK) + SIM_SCGC3 |= SIM_SCGC3_RNGA_MASK; + #endif + #if defined(SIM_SCGC6_RNGA_MASK) + /* additionally needed for at least K64F */ + SIM_SCGC6 |= SIM_SCGC6_RNGA_MASK; + #endif + + /* set SLP bit to 0 - "RNGA is not in sleep mode" */ + RNG_CR &= ~RNG_CR_SLP_MASK; + + /* set HA bit to 1 - "security violations masked" */ + RNG_CR |= RNG_CR_HA_MASK; + + /* set GO bit to 1 - "output register loaded with data" */ + RNG_CR |= RNG_CR_GO_MASK; + + for (i = 0; i < sz; i++) { + + /* wait for RNG FIFO to be full */ + while((RNG_SR & RNG_SR_OREG_LVL(0xF)) == 0) {} + + /* get value */ + output[i] = RNG_OR; + } + + return 0; + } + + #elif defined(FREESCALE_K53_RNGB) || defined(FREESCALE_RNGB) + /* + * wc_Generates a RNG seed using the Random Number Generator (RNGB) + * on the Kinetis K53. Documentation located in Chapter 33 of + * K53 Sub-Family Reference Manual (see note in the README for link). + */ + int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) + { + int i; + + /* turn on RNGB module */ + SIM_SCGC3 |= SIM_SCGC3_RNGB_MASK; + + /* reset RNGB */ + RNG_CMD |= RNG_CMD_SR_MASK; + + /* FIFO generate interrupt, return all zeros on underflow, + * set auto reseed */ + RNG_CR |= (RNG_CR_FUFMOD_MASK | RNG_CR_AR_MASK); + + /* gen seed, clear interrupts, clear errors */ + RNG_CMD |= (RNG_CMD_GS_MASK | RNG_CMD_CI_MASK | RNG_CMD_CE_MASK); + + /* wait for seeding to complete */ + while ((RNG_SR & RNG_SR_SDN_MASK) == 0) {} + + for (i = 0; i < sz; i++) { + + /* wait for a word to be available from FIFO */ + while((RNG_SR & RNG_SR_FIFO_LVL_MASK) == 0) {} + + /* get value */ + output[i] = RNG_OUT; + } + + return 0; + } + + #elif defined(FREESCALE_KSDK_2_0_TRNG) + + int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) + { + status_t status; + status = TRNG_GetRandomData(TRNG0, output, sz); + if (status == kStatus_Success) + { + return(0); + } + else + { + return RAN_BLOCK_E; + } + } + + #elif defined(FREESCALE_KSDK_2_0_RNGA) + + int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) + { + status_t status; + status = RNGA_GetRandomData(RNG, output, sz); + if (status == kStatus_Success) + { + return(0); + } + else + { + return RAN_BLOCK_E; + } + } + + + #elif defined(FREESCALE_RNGA) + + int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) + { + RNGA_DRV_GetRandomData(RNGA_INSTANCE, output, sz); + return 0; + } + + #else + #define USE_TEST_GENSEED + #endif /* FREESCALE_K70_RNGA */ + +#elif defined(STM32_RNG) + /* Generate a RNG seed using the hardware random number generator + * on the STM32F2/F4/F7/L4. */ + + #ifdef WOLFSSL_STM32_CUBEMX + int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) + { + int ret; + RNG_HandleTypeDef hrng; + word32 i = 0; + (void)os; + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + + /* enable RNG clock source */ + __HAL_RCC_RNG_CLK_ENABLE(); + + /* enable RNG peripheral */ + XMEMSET(&hrng, 0, sizeof(hrng)); + hrng.Instance = RNG; + HAL_RNG_Init(&hrng); + + while (i < sz) { + /* If not aligned or there is odd/remainder */ + if( (i + sizeof(word32)) > sz || + ((wolfssl_word)&output[i] % sizeof(word32)) != 0 + ) { + /* Single byte at a time */ + uint32_t tmpRng = 0; + if (HAL_RNG_GenerateRandomNumber(&hrng, &tmpRng) != HAL_OK) { + wolfSSL_CryptHwMutexUnLock(); + return RAN_BLOCK_E; + } + output[i++] = (byte)tmpRng; + } + else { + /* Use native 32 instruction */ + if (HAL_RNG_GenerateRandomNumber(&hrng, (uint32_t*)&output[i]) != HAL_OK) { + wolfSSL_CryptHwMutexUnLock(); + return RAN_BLOCK_E; + } + i += sizeof(word32); + } + } + + wolfSSL_CryptHwMutexUnLock(); + + return 0; + } + #elif defined(WOLFSSL_STM32F427_RNG) || defined(WOLFSSL_STM32_RNG_NOLIB) + + /* Generate a RNG seed using the hardware RNG on the STM32F427 + * directly, following steps outlined in STM32F4 Reference + * Manual (Chapter 24) for STM32F4xx family. */ + int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) + { + int ret; + word32 i; + (void)os; + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + + /* enable RNG peripheral clock */ + RCC->AHB2ENR |= RCC_AHB2ENR_RNGEN; + + /* enable RNG interrupt, set IE bit in RNG->CR register */ + RNG->CR |= RNG_CR_IE; + + /* enable RNG, set RNGEN bit in RNG->CR. Activates RNG, + * RNG_LFSR, and error detector */ + RNG->CR |= RNG_CR_RNGEN; + + /* verify no errors, make sure SEIS and CEIS bits are 0 + * in RNG->SR register */ + if (RNG->SR & (RNG_SR_SECS | RNG_SR_CECS)) { + wolfSSL_CryptHwMutexUnLock(); + return RNG_FAILURE_E; + } + + for (i = 0; i < sz; i++) { + /* wait until RNG number is ready */ + while ((RNG->SR & RNG_SR_DRDY) == 0) { } + + /* get value */ + output[i] = RNG->DR; + } + + wolfSSL_CryptHwMutexUnLock(); + + return 0; + } + + #else + + /* Generate a RNG seed using the STM32 Standard Peripheral Library */ + int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) + { + int ret; + word32 i; + (void)os; + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + + /* enable RNG clock source */ + RCC_AHB2PeriphClockCmd(RCC_AHB2Periph_RNG, ENABLE); + + /* reset RNG */ + RNG_DeInit(); + + /* enable RNG peripheral */ + RNG_Cmd(ENABLE); + + /* verify no errors with RNG_CLK or Seed */ + if (RNG_GetFlagStatus(RNG_FLAG_SECS | RNG_FLAG_CECS) != RESET) { + wolfSSL_CryptHwMutexUnLock(); + return RNG_FAILURE_E; + } + + for (i = 0; i < sz; i++) { + /* wait until RNG number is ready */ + while (RNG_GetFlagStatus(RNG_FLAG_DRDY) == RESET) { } + + /* get value */ + output[i] = RNG_GetRandomNumber(); + } + + wolfSSL_CryptHwMutexUnLock(); + + return 0; + } + #endif /* WOLFSSL_STM32_CUBEMX */ + +#elif defined(WOLFSSL_TIRTOS) + + #include + #include + int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) + { + int i; + srand(xdc_runtime_Timestamp_get32()); + + for (i = 0; i < sz; i++ ) { + output[i] = rand() % 256; + if ((i % 8) == 7) { + srand(xdc_runtime_Timestamp_get32()); + } + } + + return 0; + } + +#elif defined(WOLFSSL_PB) + + int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) + { + word32 i; + for (i = 0; i < sz; i++) + output[i] = UTL_Rand(); + + (void)os; + + return 0; + } + +#elif defined(WOLFSSL_NUCLEUS) +#include "nucleus.h" +#include "kernel/plus_common.h" + +#warning "potential for not enough entropy, currently being used for testing" +int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) +{ + int i; + srand(NU_Get_Time_Stamp()); + + for (i = 0; i < sz; i++ ) { + output[i] = rand() % 256; + if ((i % 8) == 7) { + srand(NU_Get_Time_Stamp()); + } + } + + return 0; +} +#elif defined(WOLFSSL_DEOS) && !defined(CUSTOM_RAND_GENERATE) + #include "stdlib.h" + + #warning "potential for not enough entropy, currently being used for testing Deos" + int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) + { + int i; + int seed = XTIME(0); + (void)os; + + for (i = 0; i < sz; i++ ) { + output[i] = rand_r(&seed) % 256; + if ((i % 8) == 7) { + seed = XTIME(0); + rand_r(&seed); + } + } + + return 0; + } +#elif defined(WOLFSSL_VXWORKS) + + #include + + int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) { + STATUS status; + + #ifdef VXWORKS_SIM + /* cannot generate true entropy with VxWorks simulator */ + #warning "not enough entropy, simulator for testing only" + int i = 0; + + for (i = 0; i < 1000; i++) { + randomAddTimeStamp(); + } + #endif + + status = randBytes (output, sz); + if (status == ERROR) { + return RNG_FAILURE_E; + } + + return 0; + } + +#elif defined(WOLFSSL_NRF51) + #include "app_error.h" + #include "nrf_drv_rng.h" + int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) + { + int remaining = sz, length, pos = 0; + uint8_t available; + uint32_t err_code; + + (void)os; + + /* Make sure RNG is running */ + err_code = nrf_drv_rng_init(NULL); + if (err_code != NRF_SUCCESS && err_code != NRF_ERROR_INVALID_STATE) { + return -1; + } + + while (remaining > 0) { + err_code = nrf_drv_rng_bytes_available(&available); + if (err_code == NRF_SUCCESS) { + length = (remaining < available) ? remaining : available; + if (length > 0) { + err_code = nrf_drv_rng_rand(&output[pos], length); + remaining -= length; + pos += length; + } + } + + if (err_code != NRF_SUCCESS) { + break; + } + } + + return (err_code == NRF_SUCCESS) ? 0 : -1; + } + +#elif defined(HAVE_WNR) + + int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) + { + if (os == NULL || output == NULL || wnr_ctx == NULL || + wnr_timeout < 0) { + return BAD_FUNC_ARG; + } + + if (wnr_mutex_init == 0) { + WOLFSSL_MSG("netRandom context must be created before use"); + return RNG_FAILURE_E; + } + + if (wc_LockMutex(&wnr_mutex) != 0) { + WOLFSSL_MSG("Bad Lock Mutex wnr_mutex\n"); + return BAD_MUTEX_E; + } + + if (wnr_get_entropy(wnr_ctx, wnr_timeout, output, sz, sz) != + WNR_ERROR_NONE) + return RNG_FAILURE_E; + + wc_UnLockMutex(&wnr_mutex); + + return 0; + } + +#elif defined(WOLFSSL_ATMEL) + #include + + int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) + { + int ret = 0; + + (void)os; + if (output == NULL) { + return BUFFER_E; + } + + ret = atmel_get_random_number(sz, output); + + return ret; + } + +#elif defined(INTIME_RTOS) + int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) + { + int ret = 0; + + (void)os; + + if (output == NULL) { + return BUFFER_E; + } + + /* Note: Investigate better solution */ + /* no return to check */ + arc4random_buf(output, sz); + + return ret; + } + +#elif defined(WOLFSSL_WICED) + int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) + { + int ret; + (void)os; + + if (output == NULL || UINT16_MAX < sz) { + return BUFFER_E; + } + + if ((ret = wiced_crypto_get_random((void*) output, sz) ) + != WICED_SUCCESS) { + return ret; + } + + return ret; + } + +#elif defined(WOLFSSL_NETBURNER) + #warning using NetBurner pseudo random GetRandomByte for seed + int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) + { + word32 i; + (void)os; + + if (output == NULL) { + return BUFFER_E; + } + + for (i = 0; i < sz; i++) { + output[i] = GetRandomByte(); + + /* check if was a valid random number */ + if (!RandomValid()) + return RNG_FAILURE_E; + } + + return 0; + } +#elif defined(IDIRECT_DEV_RANDOM) + + extern int getRandom( int sz, unsigned char *output ); + + int GenerateSeed(OS_Seed* os, byte* output, word32 sz) + { + int num_bytes_returned = 0; + + num_bytes_returned = getRandom( (int) sz, (unsigned char *) output ); + + return 0; + } + +#elif (defined(WOLFSSL_IMX6_CAAM) || defined(WOLFSSL_IMX6_CAAM_RNG)) + + #include + #include + + int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) + { + Buffer buf[1]; + int ret = 0; + int times = 1000, i; + + (void)os; + + if (output == NULL) { + return BUFFER_E; + } + + buf[0].BufferType = DataBuffer | LastBuffer; + buf[0].TheAddress = (Address)output; + buf[0].Length = sz; + + /* Check Waiting to make sure entropy is ready */ + for (i = 0; i < times; i++) { + ret = wc_caamAddAndWait(buf, NULL, CAAM_ENTROPY); + if (ret == Success) { + break; + } + + /* driver could be waiting for entropy */ + if (ret != RAN_BLOCK_E) { + return ret; + } + usleep(100); + } + + if (i == times && ret != Success) { + return RNG_FAILURE_E; + } + else { /* Success case */ + ret = 0; + } + + return ret; + } + +#elif defined(WOLFSSL_APACHE_MYNEWT) + + #include + #include "os/os_time.h" + int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) + { + int i; + srand(os_time_get()); + + for (i = 0; i < sz; i++ ) { + output[i] = rand() % 256; + if ((i % 8) == 7) { + srand(os_time_get()); + } + } + + return 0; + } + +#elif defined(WOLFSSL_ESPIDF) + #if defined(WOLFSSL_ESPWROOM32) || defined(WOLFSSL_ESPWROOM32SE) + #include + + int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) + { + word32 rand; + while (sz > 0) { + word32 len = sizeof(rand); + if (sz < len) + len = sz; + /* Get one random 32-bit word from hw RNG */ + rand = esp_random( ); + XMEMCPY(output, &rand, len); + output += len; + sz -= len; + } + + return 0; + } + #endif /* end WOLFSSL_ESPWROOM32 */ + +#elif defined(WOLFSSL_RENESAS_TSIP) +#if defined(WOLFSSL_RENESA_TSIP_IAREWRX) + #include "r_bsp/mcu/all/r_rx_compiler.h" +#endif + #include "r_bsp/platform.h" + #include "r_tsip_rx_if.h" + + int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) + { + int ret; + uint32_t buffer[4]; + + while (sz > 0) { + uint32_t len = sizeof(buffer); + + if (sz < len) { + len = sz; + } + /* return 4 words random number*/ + ret = R_TSIP_GenerateRandomNumber(buffer); + if(ret == TSIP_SUCCESS) { + XMEMCPY(output, &buffer, len); + output += len; + sz -= len; + } else + return ret; + } + return ret; + } + +#elif defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_TRNG) + #include "hal_data.h" + + #ifndef WOLFSSL_SCE_TRNG_HANDLE + #define WOLFSSL_SCE_TRNG_HANDLE g_sce_trng + #endif + + int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) + { + uint32_t ret; + uint32_t blocks; + word32 len = sz; + + ret = WOLFSSL_SCE_TRNG_HANDLE.p_api->open(WOLFSSL_SCE_TRNG_HANDLE.p_ctrl, + WOLFSSL_SCE_TRNG_HANDLE.p_cfg); + if (ret != SSP_SUCCESS && ret != SSP_ERR_CRYPTO_ALREADY_OPEN) { + /* error opening TRNG driver */ + return -1; + } + + blocks = sz / sizeof(uint32_t); + if (blocks > 0) { + ret = WOLFSSL_SCE_TRNG_HANDLE.p_api->read(WOLFSSL_SCE_TRNG_HANDLE.p_ctrl, + (uint32_t*)output, blocks); + if (ret != SSP_SUCCESS) { + return -1; + } + } + + len = len - (blocks * sizeof(uint32_t)); + if (len > 0) { + uint32_t tmp; + + if (len > sizeof(uint32_t)) { + return -1; + } + ret = WOLFSSL_SCE_TRNG_HANDLE.p_api->read(WOLFSSL_SCE_TRNG_HANDLE.p_ctrl, + (uint32_t*)tmp, 1); + if (ret != SSP_SUCCESS) { + return -1; + } + XMEMCPY(output + (blocks * sizeof(uint32_t)), (byte*)&tmp, len); + } + + ret = WOLFSSL_SCE_TRNG_HANDLE.p_api->close(WOLFSSL_SCE_TRNG_HANDLE.p_ctrl); + if (ret != SSP_SUCCESS) { + /* error opening TRNG driver */ + return -1; + } + return 0; + } +#elif defined(CUSTOM_RAND_GENERATE_BLOCK) + /* #define CUSTOM_RAND_GENERATE_BLOCK myRngFunc + * extern int myRngFunc(byte* output, word32 sz); + */ + +#elif defined(WOLFSSL_SAFERTOS) || defined(WOLFSSL_LEANPSK) || \ + defined(WOLFSSL_IAR_ARM) || defined(WOLFSSL_MDK_ARM) || \ + defined(WOLFSSL_uITRON4) || defined(WOLFSSL_uTKERNEL2) || \ + defined(WOLFSSL_LPC43xx) || defined(WOLFSSL_STM32F2xx) || \ + defined(MBED) || defined(WOLFSSL_EMBOS) || \ + defined(WOLFSSL_GENSEED_FORTEST) || defined(WOLFSSL_CHIBIOS) || \ + defined(WOLFSSL_CONTIKI) || defined(WOLFSSL_AZSPHERE) + + /* these platforms do not have a default random seed and + you'll need to implement your own wc_GenerateSeed or define via + CUSTOM_RAND_GENERATE_BLOCK */ + + #define USE_TEST_GENSEED + +#elif defined(WOLFSSL_ZEPHYR) + + #include + #ifndef _POSIX_C_SOURCE + #include + #else + #include + #endif + + int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) + { + int ret = 0; + word32 rand; + while (sz > 0) { + word32 len = sizeof(rand); + if (sz < len) + len = sz; + rand = sys_rand32_get(); + XMEMCPY(output, &rand, len); + output += len; + sz -= len; + } + + return ret; + } + +#elif defined(WOLFSSL_TELIT_M2MB) + + #include "stdlib.h" + static long get_timestamp(void) { + long myTime = 0; + INT32 fd = m2mb_rtc_open("/dev/rtc0", 0); + if (fd >= 0) { + M2MB_RTC_TIMEVAL_T timeval; + m2mb_rtc_ioctl(fd, M2MB_RTC_IOCTL_GET_TIMEVAL, &timeval); + myTime = timeval.msec; + m2mb_rtc_close(fd); + } + return myTime; + } + int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) + { + int i; + srand(get_timestamp()); + for (i = 0; i < sz; i++ ) { + output[i] = rand() % 256; + if ((i % 8) == 7) { + srand(get_timestamp()); + } + } + return 0; + } + +#elif defined(NO_DEV_RANDOM) + + #error "you need to write an os specific wc_GenerateSeed() here" + + /* + int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) + { + return 0; + } + */ + +#else + + /* may block */ + int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) + { + int ret = 0; + + if (os == NULL) { + return BAD_FUNC_ARG; + } + + #ifdef WOLF_CRYPTO_CB + if (os->devId != INVALID_DEVID) { + ret = wc_CryptoCb_RandomSeed(os, output, sz); + if (ret != CRYPTOCB_UNAVAILABLE) + return ret; + /* fall-through when unavailable */ + ret = 0; /* reset error code */ + } + #endif + + #ifdef HAVE_INTEL_RDSEED + if (IS_INTEL_RDSEED(intel_flags)) { + ret = wc_GenerateSeed_IntelRD(NULL, output, sz); + if (ret == 0) { + /* success, we're done */ + return ret; + } + #ifdef FORCE_FAILURE_RDSEED + /* don't fallback to /dev/urandom */ + return ret; + #else + /* reset error and fallback to using /dev/urandom */ + ret = 0; + #endif + } + #endif /* HAVE_INTEL_RDSEED */ + + #ifndef NO_DEV_URANDOM /* way to disable use of /dev/urandom */ + os->fd = open("/dev/urandom", O_RDONLY); + if (os->fd == -1) + #endif + { + /* may still have /dev/random */ + os->fd = open("/dev/random", O_RDONLY); + if (os->fd == -1) + return OPEN_RAN_E; + } + + while (sz) { + int len = (int)read(os->fd, output, sz); + if (len == -1) { + ret = READ_RAN_E; + break; + } + + sz -= len; + output += len; + + if (sz) { + #if defined(BLOCKING) || defined(WC_RNG_BLOCKING) + sleep(0); /* context switch */ + #else + ret = RAN_BLOCK_E; + break; + #endif + } + } + close(os->fd); + + return ret; + } + +#endif + +#ifdef USE_TEST_GENSEED + #ifndef _MSC_VER + #warning "write a real random seed!!!!, just for testing now" + #else + #pragma message("Warning: write a real random seed!!!!, just for testing now") + #endif + int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) + { + word32 i; + for (i = 0; i < sz; i++ ) + output[i] = i; + + (void)os; + + return 0; + } +#endif + + +/* End wc_GenerateSeed */ +#endif /* WC_NO_RNG */ +#endif /* HAVE_FIPS */ diff --git a/client/wolfssl/wolfcrypt/src/ripemd.c b/client/wolfssl/wolfcrypt/src/ripemd.c new file mode 100644 index 0000000..484c62f --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/ripemd.c @@ -0,0 +1,366 @@ +/* ripemd.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef WOLFSSL_RIPEMD + +#include +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#include + +int wc_InitRipeMd(RipeMd* ripemd) +{ + if (ripemd == NULL) { + return BAD_FUNC_ARG; + } + + ripemd->digest[0] = 0x67452301L; + ripemd->digest[1] = 0xEFCDAB89L; + ripemd->digest[2] = 0x98BADCFEL; + ripemd->digest[3] = 0x10325476L; + ripemd->digest[4] = 0xC3D2E1F0L; + + ripemd->buffLen = 0; + ripemd->loLen = 0; + ripemd->hiLen = 0; + + return 0; +} + + +/* for all */ +#define F(x, y, z) (x ^ y ^ z) +#define G(x, y, z) (z ^ (x & (y^z))) +#define H(x, y, z) (z ^ (x | ~y)) +#define I(x, y, z) (y ^ (z & (x^y))) +#define J(x, y, z) (x ^ (y | ~z)) + +#define k0 0 +#define k1 0x5a827999 +#define k2 0x6ed9eba1 +#define k3 0x8f1bbcdc +#define k4 0xa953fd4e +#define k5 0x50a28be6 +#define k6 0x5c4dd124 +#define k7 0x6d703ef3 +#define k8 0x7a6d76e9 +#define k9 0 + +/* for 160 and 320 */ +#define Subround(f, a, b, c, d, e, x, s, k) \ + a += f(b, c, d) + x + k;\ + a = rotlFixed((word32)a, s) + e;\ + c = rotlFixed((word32)c, 10U) + +static void Transform(RipeMd* ripemd) +{ + word32 a1, b1, c1, d1, e1, a2, b2, c2, d2, e2; + a1 = a2 = ripemd->digest[0]; + b1 = b2 = ripemd->digest[1]; + c1 = c2 = ripemd->digest[2]; + d1 = d2 = ripemd->digest[3]; + e1 = e2 = ripemd->digest[4]; + + Subround(F, a1, b1, c1, d1, e1, ripemd->buffer[ 0], 11, k0); + Subround(F, e1, a1, b1, c1, d1, ripemd->buffer[ 1], 14, k0); + Subround(F, d1, e1, a1, b1, c1, ripemd->buffer[ 2], 15, k0); + Subround(F, c1, d1, e1, a1, b1, ripemd->buffer[ 3], 12, k0); + Subround(F, b1, c1, d1, e1, a1, ripemd->buffer[ 4], 5, k0); + Subround(F, a1, b1, c1, d1, e1, ripemd->buffer[ 5], 8, k0); + Subround(F, e1, a1, b1, c1, d1, ripemd->buffer[ 6], 7, k0); + Subround(F, d1, e1, a1, b1, c1, ripemd->buffer[ 7], 9, k0); + Subround(F, c1, d1, e1, a1, b1, ripemd->buffer[ 8], 11, k0); + Subround(F, b1, c1, d1, e1, a1, ripemd->buffer[ 9], 13, k0); + Subround(F, a1, b1, c1, d1, e1, ripemd->buffer[10], 14, k0); + Subround(F, e1, a1, b1, c1, d1, ripemd->buffer[11], 15, k0); + Subround(F, d1, e1, a1, b1, c1, ripemd->buffer[12], 6, k0); + Subround(F, c1, d1, e1, a1, b1, ripemd->buffer[13], 7, k0); + Subround(F, b1, c1, d1, e1, a1, ripemd->buffer[14], 9, k0); + Subround(F, a1, b1, c1, d1, e1, ripemd->buffer[15], 8, k0); + + Subround(G, e1, a1, b1, c1, d1, ripemd->buffer[ 7], 7, k1); + Subround(G, d1, e1, a1, b1, c1, ripemd->buffer[ 4], 6, k1); + Subround(G, c1, d1, e1, a1, b1, ripemd->buffer[13], 8, k1); + Subround(G, b1, c1, d1, e1, a1, ripemd->buffer[ 1], 13, k1); + Subround(G, a1, b1, c1, d1, e1, ripemd->buffer[10], 11, k1); + Subround(G, e1, a1, b1, c1, d1, ripemd->buffer[ 6], 9, k1); + Subround(G, d1, e1, a1, b1, c1, ripemd->buffer[15], 7, k1); + Subround(G, c1, d1, e1, a1, b1, ripemd->buffer[ 3], 15, k1); + Subround(G, b1, c1, d1, e1, a1, ripemd->buffer[12], 7, k1); + Subround(G, a1, b1, c1, d1, e1, ripemd->buffer[ 0], 12, k1); + Subround(G, e1, a1, b1, c1, d1, ripemd->buffer[ 9], 15, k1); + Subround(G, d1, e1, a1, b1, c1, ripemd->buffer[ 5], 9, k1); + Subround(G, c1, d1, e1, a1, b1, ripemd->buffer[ 2], 11, k1); + Subround(G, b1, c1, d1, e1, a1, ripemd->buffer[14], 7, k1); + Subround(G, a1, b1, c1, d1, e1, ripemd->buffer[11], 13, k1); + Subround(G, e1, a1, b1, c1, d1, ripemd->buffer[ 8], 12, k1); + + Subround(H, d1, e1, a1, b1, c1, ripemd->buffer[ 3], 11, k2); + Subround(H, c1, d1, e1, a1, b1, ripemd->buffer[10], 13, k2); + Subround(H, b1, c1, d1, e1, a1, ripemd->buffer[14], 6, k2); + Subround(H, a1, b1, c1, d1, e1, ripemd->buffer[ 4], 7, k2); + Subround(H, e1, a1, b1, c1, d1, ripemd->buffer[ 9], 14, k2); + Subround(H, d1, e1, a1, b1, c1, ripemd->buffer[15], 9, k2); + Subround(H, c1, d1, e1, a1, b1, ripemd->buffer[ 8], 13, k2); + Subround(H, b1, c1, d1, e1, a1, ripemd->buffer[ 1], 15, k2); + Subround(H, a1, b1, c1, d1, e1, ripemd->buffer[ 2], 14, k2); + Subround(H, e1, a1, b1, c1, d1, ripemd->buffer[ 7], 8, k2); + Subround(H, d1, e1, a1, b1, c1, ripemd->buffer[ 0], 13, k2); + Subround(H, c1, d1, e1, a1, b1, ripemd->buffer[ 6], 6, k2); + Subround(H, b1, c1, d1, e1, a1, ripemd->buffer[13], 5, k2); + Subround(H, a1, b1, c1, d1, e1, ripemd->buffer[11], 12, k2); + Subround(H, e1, a1, b1, c1, d1, ripemd->buffer[ 5], 7, k2); + Subround(H, d1, e1, a1, b1, c1, ripemd->buffer[12], 5, k2); + + Subround(I, c1, d1, e1, a1, b1, ripemd->buffer[ 1], 11, k3); + Subround(I, b1, c1, d1, e1, a1, ripemd->buffer[ 9], 12, k3); + Subround(I, a1, b1, c1, d1, e1, ripemd->buffer[11], 14, k3); + Subround(I, e1, a1, b1, c1, d1, ripemd->buffer[10], 15, k3); + Subround(I, d1, e1, a1, b1, c1, ripemd->buffer[ 0], 14, k3); + Subround(I, c1, d1, e1, a1, b1, ripemd->buffer[ 8], 15, k3); + Subround(I, b1, c1, d1, e1, a1, ripemd->buffer[12], 9, k3); + Subround(I, a1, b1, c1, d1, e1, ripemd->buffer[ 4], 8, k3); + Subround(I, e1, a1, b1, c1, d1, ripemd->buffer[13], 9, k3); + Subround(I, d1, e1, a1, b1, c1, ripemd->buffer[ 3], 14, k3); + Subround(I, c1, d1, e1, a1, b1, ripemd->buffer[ 7], 5, k3); + Subround(I, b1, c1, d1, e1, a1, ripemd->buffer[15], 6, k3); + Subround(I, a1, b1, c1, d1, e1, ripemd->buffer[14], 8, k3); + Subround(I, e1, a1, b1, c1, d1, ripemd->buffer[ 5], 6, k3); + Subround(I, d1, e1, a1, b1, c1, ripemd->buffer[ 6], 5, k3); + Subround(I, c1, d1, e1, a1, b1, ripemd->buffer[ 2], 12, k3); + + Subround(J, b1, c1, d1, e1, a1, ripemd->buffer[ 4], 9, k4); + Subround(J, a1, b1, c1, d1, e1, ripemd->buffer[ 0], 15, k4); + Subround(J, e1, a1, b1, c1, d1, ripemd->buffer[ 5], 5, k4); + Subround(J, d1, e1, a1, b1, c1, ripemd->buffer[ 9], 11, k4); + Subround(J, c1, d1, e1, a1, b1, ripemd->buffer[ 7], 6, k4); + Subround(J, b1, c1, d1, e1, a1, ripemd->buffer[12], 8, k4); + Subround(J, a1, b1, c1, d1, e1, ripemd->buffer[ 2], 13, k4); + Subround(J, e1, a1, b1, c1, d1, ripemd->buffer[10], 12, k4); + Subround(J, d1, e1, a1, b1, c1, ripemd->buffer[14], 5, k4); + Subround(J, c1, d1, e1, a1, b1, ripemd->buffer[ 1], 12, k4); + Subround(J, b1, c1, d1, e1, a1, ripemd->buffer[ 3], 13, k4); + Subround(J, a1, b1, c1, d1, e1, ripemd->buffer[ 8], 14, k4); + Subround(J, e1, a1, b1, c1, d1, ripemd->buffer[11], 11, k4); + Subround(J, d1, e1, a1, b1, c1, ripemd->buffer[ 6], 8, k4); + Subround(J, c1, d1, e1, a1, b1, ripemd->buffer[15], 5, k4); + Subround(J, b1, c1, d1, e1, a1, ripemd->buffer[13], 6, k4); + + Subround(J, a2, b2, c2, d2, e2, ripemd->buffer[ 5], 8, k5); + Subround(J, e2, a2, b2, c2, d2, ripemd->buffer[14], 9, k5); + Subround(J, d2, e2, a2, b2, c2, ripemd->buffer[ 7], 9, k5); + Subround(J, c2, d2, e2, a2, b2, ripemd->buffer[ 0], 11, k5); + Subround(J, b2, c2, d2, e2, a2, ripemd->buffer[ 9], 13, k5); + Subround(J, a2, b2, c2, d2, e2, ripemd->buffer[ 2], 15, k5); + Subround(J, e2, a2, b2, c2, d2, ripemd->buffer[11], 15, k5); + Subround(J, d2, e2, a2, b2, c2, ripemd->buffer[ 4], 5, k5); + Subround(J, c2, d2, e2, a2, b2, ripemd->buffer[13], 7, k5); + Subround(J, b2, c2, d2, e2, a2, ripemd->buffer[ 6], 7, k5); + Subround(J, a2, b2, c2, d2, e2, ripemd->buffer[15], 8, k5); + Subround(J, e2, a2, b2, c2, d2, ripemd->buffer[ 8], 11, k5); + Subround(J, d2, e2, a2, b2, c2, ripemd->buffer[ 1], 14, k5); + Subround(J, c2, d2, e2, a2, b2, ripemd->buffer[10], 14, k5); + Subround(J, b2, c2, d2, e2, a2, ripemd->buffer[ 3], 12, k5); + Subround(J, a2, b2, c2, d2, e2, ripemd->buffer[12], 6, k5); + + Subround(I, e2, a2, b2, c2, d2, ripemd->buffer[ 6], 9, k6); + Subround(I, d2, e2, a2, b2, c2, ripemd->buffer[11], 13, k6); + Subround(I, c2, d2, e2, a2, b2, ripemd->buffer[ 3], 15, k6); + Subround(I, b2, c2, d2, e2, a2, ripemd->buffer[ 7], 7, k6); + Subround(I, a2, b2, c2, d2, e2, ripemd->buffer[ 0], 12, k6); + Subround(I, e2, a2, b2, c2, d2, ripemd->buffer[13], 8, k6); + Subround(I, d2, e2, a2, b2, c2, ripemd->buffer[ 5], 9, k6); + Subround(I, c2, d2, e2, a2, b2, ripemd->buffer[10], 11, k6); + Subround(I, b2, c2, d2, e2, a2, ripemd->buffer[14], 7, k6); + Subround(I, a2, b2, c2, d2, e2, ripemd->buffer[15], 7, k6); + Subround(I, e2, a2, b2, c2, d2, ripemd->buffer[ 8], 12, k6); + Subround(I, d2, e2, a2, b2, c2, ripemd->buffer[12], 7, k6); + Subround(I, c2, d2, e2, a2, b2, ripemd->buffer[ 4], 6, k6); + Subround(I, b2, c2, d2, e2, a2, ripemd->buffer[ 9], 15, k6); + Subround(I, a2, b2, c2, d2, e2, ripemd->buffer[ 1], 13, k6); + Subround(I, e2, a2, b2, c2, d2, ripemd->buffer[ 2], 11, k6); + + Subround(H, d2, e2, a2, b2, c2, ripemd->buffer[15], 9, k7); + Subround(H, c2, d2, e2, a2, b2, ripemd->buffer[ 5], 7, k7); + Subround(H, b2, c2, d2, e2, a2, ripemd->buffer[ 1], 15, k7); + Subround(H, a2, b2, c2, d2, e2, ripemd->buffer[ 3], 11, k7); + Subround(H, e2, a2, b2, c2, d2, ripemd->buffer[ 7], 8, k7); + Subround(H, d2, e2, a2, b2, c2, ripemd->buffer[14], 6, k7); + Subround(H, c2, d2, e2, a2, b2, ripemd->buffer[ 6], 6, k7); + Subround(H, b2, c2, d2, e2, a2, ripemd->buffer[ 9], 14, k7); + Subround(H, a2, b2, c2, d2, e2, ripemd->buffer[11], 12, k7); + Subround(H, e2, a2, b2, c2, d2, ripemd->buffer[ 8], 13, k7); + Subround(H, d2, e2, a2, b2, c2, ripemd->buffer[12], 5, k7); + Subround(H, c2, d2, e2, a2, b2, ripemd->buffer[ 2], 14, k7); + Subround(H, b2, c2, d2, e2, a2, ripemd->buffer[10], 13, k7); + Subround(H, a2, b2, c2, d2, e2, ripemd->buffer[ 0], 13, k7); + Subround(H, e2, a2, b2, c2, d2, ripemd->buffer[ 4], 7, k7); + Subround(H, d2, e2, a2, b2, c2, ripemd->buffer[13], 5, k7); + + Subround(G, c2, d2, e2, a2, b2, ripemd->buffer[ 8], 15, k8); + Subround(G, b2, c2, d2, e2, a2, ripemd->buffer[ 6], 5, k8); + Subround(G, a2, b2, c2, d2, e2, ripemd->buffer[ 4], 8, k8); + Subround(G, e2, a2, b2, c2, d2, ripemd->buffer[ 1], 11, k8); + Subround(G, d2, e2, a2, b2, c2, ripemd->buffer[ 3], 14, k8); + Subround(G, c2, d2, e2, a2, b2, ripemd->buffer[11], 14, k8); + Subround(G, b2, c2, d2, e2, a2, ripemd->buffer[15], 6, k8); + Subround(G, a2, b2, c2, d2, e2, ripemd->buffer[ 0], 14, k8); + Subround(G, e2, a2, b2, c2, d2, ripemd->buffer[ 5], 6, k8); + Subround(G, d2, e2, a2, b2, c2, ripemd->buffer[12], 9, k8); + Subround(G, c2, d2, e2, a2, b2, ripemd->buffer[ 2], 12, k8); + Subround(G, b2, c2, d2, e2, a2, ripemd->buffer[13], 9, k8); + Subround(G, a2, b2, c2, d2, e2, ripemd->buffer[ 9], 12, k8); + Subround(G, e2, a2, b2, c2, d2, ripemd->buffer[ 7], 5, k8); + Subround(G, d2, e2, a2, b2, c2, ripemd->buffer[10], 15, k8); + Subround(G, c2, d2, e2, a2, b2, ripemd->buffer[14], 8, k8); + + Subround(F, b2, c2, d2, e2, a2, ripemd->buffer[12], 8, k9); + Subround(F, a2, b2, c2, d2, e2, ripemd->buffer[15], 5, k9); + Subround(F, e2, a2, b2, c2, d2, ripemd->buffer[10], 12, k9); + Subround(F, d2, e2, a2, b2, c2, ripemd->buffer[ 4], 9, k9); + Subround(F, c2, d2, e2, a2, b2, ripemd->buffer[ 1], 12, k9); + Subround(F, b2, c2, d2, e2, a2, ripemd->buffer[ 5], 5, k9); + Subround(F, a2, b2, c2, d2, e2, ripemd->buffer[ 8], 14, k9); + Subround(F, e2, a2, b2, c2, d2, ripemd->buffer[ 7], 6, k9); + Subround(F, d2, e2, a2, b2, c2, ripemd->buffer[ 6], 8, k9); + Subround(F, c2, d2, e2, a2, b2, ripemd->buffer[ 2], 13, k9); + Subround(F, b2, c2, d2, e2, a2, ripemd->buffer[13], 6, k9); + Subround(F, a2, b2, c2, d2, e2, ripemd->buffer[14], 5, k9); + Subround(F, e2, a2, b2, c2, d2, ripemd->buffer[ 0], 15, k9); + Subround(F, d2, e2, a2, b2, c2, ripemd->buffer[ 3], 13, k9); + Subround(F, c2, d2, e2, a2, b2, ripemd->buffer[ 9], 11, k9); + Subround(F, b2, c2, d2, e2, a2, ripemd->buffer[11], 11, k9); + + c1 = ripemd->digest[1] + c1 + d2; + ripemd->digest[1] = ripemd->digest[2] + d1 + e2; + ripemd->digest[2] = ripemd->digest[3] + e1 + a2; + ripemd->digest[3] = ripemd->digest[4] + a1 + b2; + ripemd->digest[4] = ripemd->digest[0] + b1 + c2; + ripemd->digest[0] = c1; +} + + +static WC_INLINE void AddLength(RipeMd* ripemd, word32 len) +{ + word32 tmp = ripemd->loLen; + if ( (ripemd->loLen += len) < tmp) + ripemd->hiLen++; /* carry low to high */ +} + + +int wc_RipeMdUpdate(RipeMd* ripemd, const byte* data, word32 len) +{ + /* do block size increments */ + byte* local; + + if (ripemd == NULL || (data == NULL && len > 0)) { + return BAD_FUNC_ARG; + } + + local = (byte*)ripemd->buffer; + + while (len) { + word32 add = min(len, RIPEMD_BLOCK_SIZE - ripemd->buffLen); + XMEMCPY(&local[ripemd->buffLen], data, add); + + ripemd->buffLen += add; + data += add; + len -= add; + + if (ripemd->buffLen == RIPEMD_BLOCK_SIZE) { + #ifdef BIG_ENDIAN_ORDER + ByteReverseWords(ripemd->buffer, ripemd->buffer, + RIPEMD_BLOCK_SIZE); + #endif + Transform(ripemd); + AddLength(ripemd, RIPEMD_BLOCK_SIZE); + ripemd->buffLen = 0; + } + } + return 0; +} + + +int wc_RipeMdFinal(RipeMd* ripemd, byte* hash) +{ + byte* local; + + if (ripemd == NULL || hash == NULL) { + return BAD_FUNC_ARG; + } + + local = (byte*)ripemd->buffer; + + AddLength(ripemd, ripemd->buffLen); /* before adding pads */ + + local[ripemd->buffLen++] = 0x80; /* add 1 */ + + /* pad with zeros */ + if (ripemd->buffLen > RIPEMD_PAD_SIZE) { + XMEMSET(&local[ripemd->buffLen], 0, RIPEMD_BLOCK_SIZE - ripemd->buffLen); + ripemd->buffLen += RIPEMD_BLOCK_SIZE - ripemd->buffLen; + + #ifdef BIG_ENDIAN_ORDER + ByteReverseWords(ripemd->buffer, ripemd->buffer, RIPEMD_BLOCK_SIZE); + #endif + Transform(ripemd); + ripemd->buffLen = 0; + } + XMEMSET(&local[ripemd->buffLen], 0, RIPEMD_PAD_SIZE - ripemd->buffLen); + + /* put lengths in bits */ + ripemd->loLen = ripemd->loLen << 3; + ripemd->hiLen = (ripemd->loLen >> (8*sizeof(ripemd->loLen) - 3)) + + (ripemd->hiLen << 3); + + /* store lengths */ + #ifdef BIG_ENDIAN_ORDER + ByteReverseWords(ripemd->buffer, ripemd->buffer, RIPEMD_BLOCK_SIZE); + #endif + /* ! length ordering dependent on digest endian type ! */ + XMEMCPY(&local[RIPEMD_PAD_SIZE], &ripemd->loLen, sizeof(word32)); + XMEMCPY(&local[RIPEMD_PAD_SIZE + sizeof(word32)], &ripemd->hiLen, + sizeof(word32)); + + Transform(ripemd); + #ifdef BIG_ENDIAN_ORDER + ByteReverseWords(ripemd->digest, ripemd->digest, RIPEMD_DIGEST_SIZE); + #endif + XMEMCPY(hash, ripemd->digest, RIPEMD_DIGEST_SIZE); + + return wc_InitRipeMd(ripemd); /* reset state */ +} + + +#endif /* WOLFSSL_RIPEMD */ diff --git a/client/wolfssl/wolfcrypt/src/rsa.c b/client/wolfssl/wolfcrypt/src/rsa.c new file mode 100644 index 0000000..69ab7b2 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/rsa.c @@ -0,0 +1,4201 @@ +/* rsa.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include +#include + +#ifndef NO_RSA + +#if defined(HAVE_FIPS) && \ + defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2) + + /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */ + #define FIPS_NO_WRAPPERS + + #ifdef USE_WINDOWS_API + #pragma code_seg(".fipsA$e") + #pragma const_seg(".fipsB$e") + #endif +#endif + +#include + +#ifdef WOLFSSL_AFALG_XILINX_RSA +#include +#endif + +#ifdef WOLFSSL_HAVE_SP_RSA +#include +#endif + +/* +Possible RSA enable options: + * NO_RSA: Overall control of RSA default: on (not defined) + * WC_RSA_BLINDING: Uses Blinding w/ Private Ops default: off + Note: slower by ~20% + * WOLFSSL_KEY_GEN: Allows Private Key Generation default: off + * RSA_LOW_MEM: NON CRT Private Operations, less memory default: off + * WC_NO_RSA_OAEP: Disables RSA OAEP padding default: on (not defined) + * WC_RSA_NONBLOCK: Enables support for RSA non-blocking default: off + * WC_RSA_NONBLOCK_TIME:Enables support for time based blocking default: off + * time calculation. +*/ + +/* +RSA Key Size Configuration: + * FP_MAX_BITS: With USE_FAST_MATH only default: 4096 + If USE_FAST_MATH then use this to override default. + Value is key size * 2. Example: RSA 3072 = 6144 +*/ + + +/* If building for old FIPS. */ +#if defined(HAVE_FIPS) && \ + (!defined(HAVE_FIPS_VERSION) || (HAVE_FIPS_VERSION < 2)) + +int wc_InitRsaKey(RsaKey* key, void* ptr) +{ + if (key == NULL) { + return BAD_FUNC_ARG; + } + + return InitRsaKey_fips(key, ptr); +} + + +int wc_InitRsaKey_ex(RsaKey* key, void* ptr, int devId) +{ + (void)devId; + if (key == NULL) { + return BAD_FUNC_ARG; + } + return InitRsaKey_fips(key, ptr); +} + + +int wc_FreeRsaKey(RsaKey* key) +{ + return FreeRsaKey_fips(key); +} + + +#ifndef WOLFSSL_RSA_VERIFY_ONLY +int wc_RsaPublicEncrypt(const byte* in, word32 inLen, byte* out, + word32 outLen, RsaKey* key, WC_RNG* rng) +{ + if (in == NULL || out == NULL || key == NULL || rng == NULL) { + return BAD_FUNC_ARG; + } + return RsaPublicEncrypt_fips(in, inLen, out, outLen, key, rng); +} +#endif + + +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +int wc_RsaPrivateDecryptInline(byte* in, word32 inLen, byte** out, + RsaKey* key) +{ + if (in == NULL || out == NULL || key == NULL) { + return BAD_FUNC_ARG; + } + return RsaPrivateDecryptInline_fips(in, inLen, out, key); +} + + +int wc_RsaPrivateDecrypt(const byte* in, word32 inLen, byte* out, + word32 outLen, RsaKey* key) +{ + if (in == NULL || out == NULL || key == NULL) { + return BAD_FUNC_ARG; + } + return RsaPrivateDecrypt_fips(in, inLen, out, outLen, key); +} + + +int wc_RsaSSL_Sign(const byte* in, word32 inLen, byte* out, + word32 outLen, RsaKey* key, WC_RNG* rng) +{ + if (in == NULL || out == NULL || key == NULL || inLen == 0) { + return BAD_FUNC_ARG; + } + return RsaSSL_Sign_fips(in, inLen, out, outLen, key, rng); +} +#endif + + +int wc_RsaSSL_VerifyInline(byte* in, word32 inLen, byte** out, RsaKey* key) +{ + if (in == NULL || out == NULL || key == NULL) { + return BAD_FUNC_ARG; + } + return RsaSSL_VerifyInline_fips(in, inLen, out, key); +} + + +int wc_RsaSSL_Verify(const byte* in, word32 inLen, byte* out, + word32 outLen, RsaKey* key) +{ + if (in == NULL || out == NULL || key == NULL || inLen == 0) { + return BAD_FUNC_ARG; + } + return RsaSSL_Verify_fips(in, inLen, out, outLen, key); +} + + +int wc_RsaEncryptSize(RsaKey* key) +{ + if (key == NULL) { + return BAD_FUNC_ARG; + } + return RsaEncryptSize_fips(key); +} + + +#ifndef WOLFSSL_RSA_VERIFY_ONLY +int wc_RsaFlattenPublicKey(RsaKey* key, byte* a, word32* aSz, byte* b, + word32* bSz) +{ + + /* not specified as fips so not needing _fips */ + return RsaFlattenPublicKey(key, a, aSz, b, bSz); +} +#endif + + +#ifdef WOLFSSL_KEY_GEN + int wc_MakeRsaKey(RsaKey* key, int size, long e, WC_RNG* rng) + { + return MakeRsaKey(key, size, e, rng); + } +#endif + + +/* these are functions in asn and are routed to wolfssl/wolfcrypt/asn.c +* wc_RsaPrivateKeyDecode +* wc_RsaPublicKeyDecode +*/ + +#else /* else build without fips, or for new fips */ + +#include +#include +#ifdef WOLF_CRYPTO_CB + #include +#endif +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + + +enum { + RSA_STATE_NONE = 0, + + RSA_STATE_ENCRYPT_PAD, + RSA_STATE_ENCRYPT_EXPTMOD, + RSA_STATE_ENCRYPT_RES, + + RSA_STATE_DECRYPT_EXPTMOD, + RSA_STATE_DECRYPT_UNPAD, + RSA_STATE_DECRYPT_RES, +}; + + +static void wc_RsaCleanup(RsaKey* key) +{ +#ifndef WOLFSSL_RSA_VERIFY_INLINE + if (key && key->data) { + /* make sure any allocated memory is free'd */ + if (key->dataIsAlloc) { + #ifndef WOLFSSL_RSA_PUBLIC_ONLY + if (key->type == RSA_PRIVATE_DECRYPT || + key->type == RSA_PRIVATE_ENCRYPT) { + ForceZero(key->data, key->dataLen); + } + #endif + XFREE(key->data, key->heap, DYNAMIC_TYPE_WOLF_BIGINT); + key->dataIsAlloc = 0; + } + key->data = NULL; + key->dataLen = 0; + } +#else + (void)key; +#endif +} + +int wc_InitRsaKey_ex(RsaKey* key, void* heap, int devId) +{ + int ret = 0; + + if (key == NULL) { + return BAD_FUNC_ARG; + } + + XMEMSET(key, 0, sizeof(RsaKey)); + + key->type = RSA_TYPE_UNKNOWN; + key->state = RSA_STATE_NONE; + key->heap = heap; +#ifndef WOLFSSL_RSA_VERIFY_INLINE + key->dataIsAlloc = 0; + key->data = NULL; +#endif + key->dataLen = 0; +#ifdef WC_RSA_BLINDING + key->rng = NULL; +#endif + +#ifdef WOLF_CRYPTO_CB + key->devId = devId; +#else + (void)devId; +#endif + +#ifdef WOLFSSL_ASYNC_CRYPT + #ifdef WOLFSSL_CERT_GEN + XMEMSET(&key->certSignCtx, 0, sizeof(CertSignCtx)); + #endif + + #ifdef WC_ASYNC_ENABLE_RSA + /* handle as async */ + ret = wolfAsync_DevCtxInit(&key->asyncDev, WOLFSSL_ASYNC_MARKER_RSA, + key->heap, devId); + if (ret != 0) + return ret; + #endif /* WC_ASYNC_ENABLE_RSA */ +#endif /* WOLFSSL_ASYNC_CRYPT */ + +#ifndef WOLFSSL_RSA_PUBLIC_ONLY + ret = mp_init_multi(&key->n, &key->e, NULL, NULL, NULL, NULL); + if (ret != MP_OKAY) + return ret; + +#if !defined(WOLFSSL_KEY_GEN) && !defined(OPENSSL_EXTRA) && defined(RSA_LOW_MEM) + ret = mp_init_multi(&key->d, &key->p, &key->q, NULL, NULL, NULL); +#else + ret = mp_init_multi(&key->d, &key->p, &key->q, &key->dP, &key->dQ, &key->u); +#endif + if (ret != MP_OKAY) { + mp_clear(&key->n); + mp_clear(&key->e); + return ret; + } +#else + ret = mp_init(&key->n); + if (ret != MP_OKAY) + return ret; + ret = mp_init(&key->e); + if (ret != MP_OKAY) { + mp_clear(&key->n); + return ret; + } +#endif + +#ifdef WOLFSSL_XILINX_CRYPT + key->pubExp = 0; + key->mod = NULL; +#endif + +#ifdef WOLFSSL_AFALG_XILINX_RSA + key->alFd = WC_SOCK_NOTSET; + key->rdFd = WC_SOCK_NOTSET; +#endif + + return ret; +} + +int wc_InitRsaKey(RsaKey* key, void* heap) +{ + return wc_InitRsaKey_ex(key, heap, INVALID_DEVID); +} + +#ifdef HAVE_PKCS11 +int wc_InitRsaKey_Id(RsaKey* key, unsigned char* id, int len, void* heap, + int devId) +{ + int ret = 0; + + if (key == NULL) + ret = BAD_FUNC_ARG; + if (ret == 0 && (len < 0 || len > RSA_MAX_ID_LEN)) + ret = BUFFER_E; + + if (ret == 0) + ret = wc_InitRsaKey_ex(key, heap, devId); + + if (ret == 0 && id != NULL && len != 0) { + XMEMCPY(key->id, id, len); + key->idLen = len; + } + + return ret; +} +#endif + + +#ifdef WOLFSSL_XILINX_CRYPT +#define MAX_E_SIZE 4 +/* Used to setup hardware state + * + * key the RSA key to setup + * + * returns 0 on success + */ +int wc_InitRsaHw(RsaKey* key) +{ + unsigned char* m; /* RSA modulous */ + word32 e = 0; /* RSA public exponent */ + int mSz; + int eSz; + + if (key == NULL) { + return BAD_FUNC_ARG; + } + + mSz = mp_unsigned_bin_size(&(key->n)); + m = (unsigned char*)XMALLOC(mSz, key->heap, DYNAMIC_TYPE_KEY); + if (m == 0) { + return MEMORY_E; + } + + if (mp_to_unsigned_bin(&(key->n), m) != MP_OKAY) { + WOLFSSL_MSG("Unable to get RSA key modulus"); + XFREE(m, key->heap, DYNAMIC_TYPE_KEY); + return MP_READ_E; + } + + eSz = mp_unsigned_bin_size(&(key->e)); + if (eSz > MAX_E_SIZE) { + WOLFSSL_MSG("Exponent of size 4 bytes expected"); + XFREE(m, key->heap, DYNAMIC_TYPE_KEY); + return BAD_FUNC_ARG; + } + + if (mp_to_unsigned_bin(&(key->e), (byte*)&e + (MAX_E_SIZE - eSz)) + != MP_OKAY) { + XFREE(m, key->heap, DYNAMIC_TYPE_KEY); + WOLFSSL_MSG("Unable to get RSA key exponent"); + return MP_READ_E; + } + + /* check for existing mod buffer to avoid memory leak */ + if (key->mod != NULL) { + XFREE(key->mod, key->heap, DYNAMIC_TYPE_KEY); + } + + key->pubExp = e; + key->mod = m; + + if (XSecure_RsaInitialize(&(key->xRsa), key->mod, NULL, + (byte*)&(key->pubExp)) != XST_SUCCESS) { + WOLFSSL_MSG("Unable to initialize RSA on hardware"); + XFREE(m, key->heap, DYNAMIC_TYPE_KEY); + return BAD_STATE_E; + } + +#ifdef WOLFSSL_XILINX_PATCH + /* currently a patch of xsecure_rsa.c for 2048 bit keys */ + if (wc_RsaEncryptSize(key) == 256) { + if (XSecure_RsaSetSize(&(key->xRsa), 2048) != XST_SUCCESS) { + WOLFSSL_MSG("Unable to set RSA key size on hardware"); + XFREE(m, key->heap, DYNAMIC_TYPE_KEY); + return BAD_STATE_E; + } + } +#endif + return 0; +} /* WOLFSSL_XILINX_CRYPT*/ + +#elif defined(WOLFSSL_CRYPTOCELL) + +int wc_InitRsaHw(RsaKey* key) +{ + CRYSError_t ret = 0; + byte e[3]; + word32 eSz = sizeof(e); + byte n[256]; + word32 nSz = sizeof(n); + byte d[256]; + word32 dSz = sizeof(d); + byte p[128]; + word32 pSz = sizeof(p); + byte q[128]; + word32 qSz = sizeof(q); + + if (key == NULL) { + return BAD_FUNC_ARG; + } + + ret = wc_RsaExportKey(key, e, &eSz, n, &nSz, d, &dSz, p, &pSz, q, &qSz); + if (ret != 0) + return MP_READ_E; + + ret = CRYS_RSA_Build_PubKey(&key->ctx.pubKey, e, eSz, n, nSz); + if (ret != SA_SILIB_RET_OK){ + WOLFSSL_MSG("CRYS_RSA_Build_PubKey failed"); + return ret; + } + + ret = CRYS_RSA_Build_PrivKey(&key->ctx.privKey, d, dSz, e, eSz, n, nSz); + + if (ret != SA_SILIB_RET_OK){ + WOLFSSL_MSG("CRYS_RSA_Build_PrivKey failed"); + return ret; + } + key->type = RSA_PRIVATE; + return 0; +} +static int cc310_RSA_GenerateKeyPair(RsaKey* key, int size, long e) +{ + CRYSError_t ret = 0; + CRYS_RSAKGData_t KeyGenData; + CRYS_RSAKGFipsContext_t FipsCtx; + byte ex[3]; + uint16_t eSz = sizeof(ex); + byte n[256]; + uint16_t nSz = sizeof(n); + + ret = CRYS_RSA_KG_GenerateKeyPair(&wc_rndState, + wc_rndGenVectFunc, + (byte*)&e, + 3*sizeof(uint8_t), + size, + &key->ctx.privKey, + &key->ctx.pubKey, + &KeyGenData, + &FipsCtx); + + if (ret != SA_SILIB_RET_OK){ + WOLFSSL_MSG("CRYS_RSA_KG_GenerateKeyPair failed"); + return ret; + } + + ret = CRYS_RSA_Get_PubKey(&key->ctx.pubKey, ex, &eSz, n, &nSz); + if (ret != SA_SILIB_RET_OK){ + WOLFSSL_MSG("CRYS_RSA_Get_PubKey failed"); + return ret; + } + ret = wc_RsaPublicKeyDecodeRaw(n, nSz, ex, eSz, key); + + key->type = RSA_PRIVATE; + + return ret; +} +#endif /* WOLFSSL_CRYPTOCELL */ + +int wc_FreeRsaKey(RsaKey* key) +{ + int ret = 0; + + if (key == NULL) { + return BAD_FUNC_ARG; + } + + wc_RsaCleanup(key); + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA) + wolfAsync_DevCtxFree(&key->asyncDev, WOLFSSL_ASYNC_MARKER_RSA); +#endif + +#ifndef WOLFSSL_RSA_PUBLIC_ONLY + if (key->type == RSA_PRIVATE) { +#if defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA) || !defined(RSA_LOW_MEM) + mp_forcezero(&key->u); + mp_forcezero(&key->dQ); + mp_forcezero(&key->dP); +#endif + mp_forcezero(&key->q); + mp_forcezero(&key->p); + mp_forcezero(&key->d); + } + /* private part */ +#if defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA) || !defined(RSA_LOW_MEM) + mp_clear(&key->u); + mp_clear(&key->dQ); + mp_clear(&key->dP); +#endif + mp_clear(&key->q); + mp_clear(&key->p); + mp_clear(&key->d); +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ + + /* public part */ + mp_clear(&key->e); + mp_clear(&key->n); + +#ifdef WOLFSSL_XILINX_CRYPT + XFREE(key->mod, key->heap, DYNAMIC_TYPE_KEY); + key->mod = NULL; +#endif + +#ifdef WOLFSSL_AFALG_XILINX_RSA + /* make sure that sockets are closed on cleanup */ + if (key->alFd > 0) { + close(key->alFd); + key->alFd = WC_SOCK_NOTSET; + } + if (key->rdFd > 0) { + close(key->rdFd); + key->rdFd = WC_SOCK_NOTSET; + } +#endif + + return ret; +} + +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +#if defined(WOLFSSL_KEY_GEN) && !defined(WOLFSSL_NO_RSA_KEY_CHECK) +/* Check the pair-wise consistency of the RSA key. + * From NIST SP 800-56B, section 6.4.1.1. + * Verify that k = (k^e)^d, for some k: 1 < k < n-1. */ +int wc_CheckRsaKey(RsaKey* key) +{ +#if defined(WOLFSSL_CRYPTOCELL) + return 0; +#endif +#ifdef WOLFSSL_SMALL_STACK + mp_int *k = NULL, *tmp = NULL; +#else + mp_int k[1], tmp[1]; +#endif + int ret = 0; + +#ifdef WOLFSSL_SMALL_STACK + k = (mp_int*)XMALLOC(sizeof(mp_int) * 2, NULL, DYNAMIC_TYPE_RSA); + if (k == NULL) + return MEMORY_E; + tmp = k + 1; +#endif + + if (mp_init_multi(k, tmp, NULL, NULL, NULL, NULL) != MP_OKAY) + ret = MP_INIT_E; + + if (ret == 0) { + if (key == NULL) + ret = BAD_FUNC_ARG; + } + + if (ret == 0) { + if (mp_set_int(k, 0x2342) != MP_OKAY) + ret = MP_READ_E; + } + +#ifdef WOLFSSL_HAVE_SP_RSA +#ifndef WOLFSSL_SP_NO_2048 + if (mp_count_bits(&key->n) == 2048) { + ret = sp_ModExp_2048(k, &key->e, &key->n, tmp); + if (ret != 0) + ret = MP_EXPTMOD_E; + ret = sp_ModExp_2048(tmp, &key->d, &key->n, tmp); + if (ret != 0) + ret = MP_EXPTMOD_E; + } + else +#endif +#ifndef WOLFSSL_SP_NO_3072 + if (mp_count_bits(&key->n) == 3072) { + ret = sp_ModExp_3072(k, &key->e, &key->n, tmp); + if (ret != 0) + ret = MP_EXPTMOD_E; + ret = sp_ModExp_3072(tmp, &key->d, &key->n, tmp); + if (ret != 0) + ret = MP_EXPTMOD_E; + } + else +#endif +#ifdef WOLFSSL_SP_4096 + if (mp_count_bits(&key->n) == 4096) { + ret = sp_ModExp_4096(k, &key->e, &key->n, tmp); + if (ret != 0) + ret = MP_EXPTMOD_E; + ret = sp_ModExp_4096(tmp, &key->d, &key->n, tmp); + if (ret != 0) + ret = MP_EXPTMOD_E; + } + else +#endif +#endif +#ifdef WOLFSSL_SP_MATH + { + ret = WC_KEY_SIZE_E; + } +#else + { + if (ret == 0) { + if (mp_exptmod(k, &key->e, &key->n, tmp) != MP_OKAY) + ret = MP_EXPTMOD_E; + } + + if (ret == 0) { + if (mp_exptmod(tmp, &key->d, &key->n, tmp) != MP_OKAY) + ret = MP_EXPTMOD_E; + } + } +#endif + + if (ret == 0) { + if (mp_cmp(k, tmp) != MP_EQ) + ret = RSA_KEY_PAIR_E; + } + + /* Check d is less than n. */ + if (ret == 0 ) { + if (mp_cmp(&key->d, &key->n) != MP_LT) { + ret = MP_EXPTMOD_E; + } + } + /* Check p*q = n. */ + if (ret == 0 ) { + if (mp_mul(&key->p, &key->q, tmp) != MP_OKAY) { + ret = MP_EXPTMOD_E; + } + } + if (ret == 0 ) { + if (mp_cmp(&key->n, tmp) != MP_EQ) { + ret = MP_EXPTMOD_E; + } + } + + /* Check dP, dQ and u if they exist */ + if (ret == 0 && !mp_iszero(&key->dP)) { + if (mp_sub_d(&key->p, 1, tmp) != MP_OKAY) { + ret = MP_EXPTMOD_E; + } + /* Check dP <= p-1. */ + if (ret == 0) { + if (mp_cmp(&key->dP, tmp) != MP_LT) { + ret = MP_EXPTMOD_E; + } + } + /* Check e*dP mod p-1 = 1. (dP = 1/e mod p-1) */ + if (ret == 0) { + if (mp_mulmod(&key->dP, &key->e, tmp, tmp) != MP_OKAY) { + ret = MP_EXPTMOD_E; + } + } + if (ret == 0 ) { + if (!mp_isone(tmp)) { + ret = MP_EXPTMOD_E; + } + } + + if (ret == 0) { + if (mp_sub_d(&key->q, 1, tmp) != MP_OKAY) { + ret = MP_EXPTMOD_E; + } + } + /* Check dQ <= q-1. */ + if (ret == 0) { + if (mp_cmp(&key->dQ, tmp) != MP_LT) { + ret = MP_EXPTMOD_E; + } + } + /* Check e*dP mod p-1 = 1. (dQ = 1/e mod q-1) */ + if (ret == 0) { + if (mp_mulmod(&key->dQ, &key->e, tmp, tmp) != MP_OKAY) { + ret = MP_EXPTMOD_E; + } + } + if (ret == 0 ) { + if (!mp_isone(tmp)) { + ret = MP_EXPTMOD_E; + } + } + + /* Check u <= p. */ + if (ret == 0) { + if (mp_cmp(&key->u, &key->p) != MP_LT) { + ret = MP_EXPTMOD_E; + } + } + /* Check u*q mod p = 1. (u = 1/q mod p) */ + if (ret == 0) { + if (mp_mulmod(&key->u, &key->q, &key->p, tmp) != MP_OKAY) { + ret = MP_EXPTMOD_E; + } + } + if (ret == 0 ) { + if (!mp_isone(tmp)) { + ret = MP_EXPTMOD_E; + } + } + } + + mp_forcezero(tmp); + mp_clear(tmp); + mp_clear(k); +#ifdef WOLFSSL_SMALL_STACK + XFREE(k, NULL, DYNAMIC_TYPE_RSA); +#endif + + return ret; +} +#endif +#endif + + +#if !defined(WC_NO_RSA_OAEP) || defined(WC_RSA_PSS) +/* Uses MGF1 standard as a mask generation function + hType: hash type used + seed: seed to use for generating mask + seedSz: size of seed buffer + out: mask output after generation + outSz: size of output buffer + */ +#if !defined(NO_SHA) || !defined(NO_SHA256) || defined(WOLFSSL_SHA384) || defined(WOLFSSL_SHA512) +static int RsaMGF1(enum wc_HashType hType, byte* seed, word32 seedSz, + byte* out, word32 outSz, void* heap) +{ + byte* tmp; + /* needs to be large enough for seed size plus counter(4) */ + byte tmpA[WC_MAX_DIGEST_SIZE + 4]; + byte tmpF; /* 1 if dynamic memory needs freed */ + word32 tmpSz; + int hLen; + int ret; + word32 counter; + word32 idx; + hLen = wc_HashGetDigestSize(hType); + counter = 0; + idx = 0; + + (void)heap; + + /* check error return of wc_HashGetDigestSize */ + if (hLen < 0) { + return hLen; + } + + /* if tmp is not large enough than use some dynamic memory */ + if ((seedSz + 4) > sizeof(tmpA) || (word32)hLen > sizeof(tmpA)) { + /* find largest amount of memory needed which will be the max of + * hLen and (seedSz + 4) since tmp is used to store the hash digest */ + tmpSz = ((seedSz + 4) > (word32)hLen)? seedSz + 4: (word32)hLen; + tmp = (byte*)XMALLOC(tmpSz, heap, DYNAMIC_TYPE_RSA_BUFFER); + if (tmp == NULL) { + return MEMORY_E; + } + tmpF = 1; /* make sure to free memory when done */ + } + else { + /* use array on the stack */ + tmpSz = sizeof(tmpA); + tmp = tmpA; + tmpF = 0; /* no need to free memory at end */ + } + + do { + int i = 0; + XMEMCPY(tmp, seed, seedSz); + + /* counter to byte array appended to tmp */ + tmp[seedSz] = (counter >> 24) & 0xFF; + tmp[seedSz + 1] = (counter >> 16) & 0xFF; + tmp[seedSz + 2] = (counter >> 8) & 0xFF; + tmp[seedSz + 3] = (counter) & 0xFF; + + /* hash and append to existing output */ + if ((ret = wc_Hash(hType, tmp, (seedSz + 4), tmp, tmpSz)) != 0) { + /* check for if dynamic memory was needed, then free */ + if (tmpF) { + XFREE(tmp, heap, DYNAMIC_TYPE_RSA_BUFFER); + } + return ret; + } + + for (i = 0; i < hLen && idx < outSz; i++) { + out[idx++] = tmp[i]; + } + counter++; + } while (idx < outSz); + + /* check for if dynamic memory was needed, then free */ + if (tmpF) { + XFREE(tmp, heap, DYNAMIC_TYPE_RSA_BUFFER); + } + + return 0; +} +#endif /* SHA2 Hashes */ + +/* helper function to direct which mask generation function is used + switched on type input + */ +static int RsaMGF(int type, byte* seed, word32 seedSz, byte* out, + word32 outSz, void* heap) +{ + int ret; + + switch(type) { + #ifndef NO_SHA + case WC_MGF1SHA1: + ret = RsaMGF1(WC_HASH_TYPE_SHA, seed, seedSz, out, outSz, heap); + break; + #endif + #ifndef NO_SHA256 + #ifdef WOLFSSL_SHA224 + case WC_MGF1SHA224: + ret = RsaMGF1(WC_HASH_TYPE_SHA224, seed, seedSz, out, outSz, heap); + break; + #endif + case WC_MGF1SHA256: + ret = RsaMGF1(WC_HASH_TYPE_SHA256, seed, seedSz, out, outSz, heap); + break; + #endif + #ifdef WOLFSSL_SHA384 + case WC_MGF1SHA384: + ret = RsaMGF1(WC_HASH_TYPE_SHA384, seed, seedSz, out, outSz, heap); + break; + #endif + #ifdef WOLFSSL_SHA512 + case WC_MGF1SHA512: + ret = RsaMGF1(WC_HASH_TYPE_SHA512, seed, seedSz, out, outSz, heap); + break; + #endif + default: + WOLFSSL_MSG("Unknown MGF type: check build options"); + ret = BAD_FUNC_ARG; + } + + /* in case of default avoid unused warning */ + (void)seed; + (void)seedSz; + (void)out; + (void)outSz; + (void)heap; + + return ret; +} +#endif /* !WC_NO_RSA_OAEP || WC_RSA_PSS */ + + +/* Padding */ +#ifndef WOLFSSL_RSA_VERIFY_ONLY +#ifndef WC_NO_RNG +#ifndef WC_NO_RSA_OAEP +static int RsaPad_OAEP(const byte* input, word32 inputLen, byte* pkcsBlock, + word32 pkcsBlockLen, byte padValue, WC_RNG* rng, + enum wc_HashType hType, int mgf, byte* optLabel, word32 labelLen, + void* heap) +{ + int ret; + int hLen; + int psLen; + int i; + word32 idx; + + byte* dbMask; + + #ifdef WOLFSSL_SMALL_STACK + byte* lHash = NULL; + byte* seed = NULL; + #else + /* must be large enough to contain largest hash */ + byte lHash[WC_MAX_DIGEST_SIZE]; + byte seed[ WC_MAX_DIGEST_SIZE]; + #endif + + /* no label is allowed, but catch if no label provided and length > 0 */ + if (optLabel == NULL && labelLen > 0) { + return BUFFER_E; + } + + /* limit of label is the same as limit of hash function which is massive */ + hLen = wc_HashGetDigestSize(hType); + if (hLen < 0) { + return hLen; + } + + #ifdef WOLFSSL_SMALL_STACK + lHash = (byte*)XMALLOC(hLen, heap, DYNAMIC_TYPE_RSA_BUFFER); + if (lHash == NULL) { + return MEMORY_E; + } + seed = (byte*)XMALLOC(hLen, heap, DYNAMIC_TYPE_RSA_BUFFER); + if (seed == NULL) { + XFREE(lHash, heap, DYNAMIC_TYPE_RSA_BUFFER); + return MEMORY_E; + } + #else + /* hLen should never be larger than lHash since size is max digest size, + but check before blindly calling wc_Hash */ + if ((word32)hLen > sizeof(lHash)) { + WOLFSSL_MSG("OAEP lHash to small for digest!!"); + return MEMORY_E; + } + #endif + + if ((ret = wc_Hash(hType, optLabel, labelLen, lHash, hLen)) != 0) { + WOLFSSL_MSG("OAEP hash type possibly not supported or lHash to small"); + #ifdef WOLFSSL_SMALL_STACK + XFREE(lHash, heap, DYNAMIC_TYPE_RSA_BUFFER); + XFREE(seed, heap, DYNAMIC_TYPE_RSA_BUFFER); + #endif + return ret; + } + + /* handles check of location for idx as well as psLen, cast to int to check + for pkcsBlockLen(k) - 2 * hLen - 2 being negative + This check is similar to decryption where k > 2 * hLen + 2 as msg + size approaches 0. In decryption if k is less than or equal -- then there + is no possible room for msg. + k = RSA key size + hLen = hash digest size -- will always be >= 0 at this point + */ + if ((word32)(2 * hLen + 2) > pkcsBlockLen) { + WOLFSSL_MSG("OAEP pad error hash to big for RSA key size"); + #ifdef WOLFSSL_SMALL_STACK + XFREE(lHash, heap, DYNAMIC_TYPE_RSA_BUFFER); + XFREE(seed, heap, DYNAMIC_TYPE_RSA_BUFFER); + #endif + return BAD_FUNC_ARG; + } + + if (inputLen > (pkcsBlockLen - 2 * hLen - 2)) { + WOLFSSL_MSG("OAEP pad error message too long"); + #ifdef WOLFSSL_SMALL_STACK + XFREE(lHash, heap, DYNAMIC_TYPE_RSA_BUFFER); + XFREE(seed, heap, DYNAMIC_TYPE_RSA_BUFFER); + #endif + return BAD_FUNC_ARG; + } + + /* concatenate lHash || PS || 0x01 || msg */ + idx = pkcsBlockLen - 1 - inputLen; + psLen = pkcsBlockLen - inputLen - 2 * hLen - 2; + if (pkcsBlockLen < inputLen) { /*make sure not writing over end of buffer */ + #ifdef WOLFSSL_SMALL_STACK + XFREE(lHash, heap, DYNAMIC_TYPE_RSA_BUFFER); + XFREE(seed, heap, DYNAMIC_TYPE_RSA_BUFFER); + #endif + return BUFFER_E; + } + XMEMCPY(pkcsBlock + (pkcsBlockLen - inputLen), input, inputLen); + pkcsBlock[idx--] = 0x01; /* PS and M separator */ + while (psLen > 0 && idx > 0) { + pkcsBlock[idx--] = 0x00; + psLen--; + } + + idx = idx - hLen + 1; + XMEMCPY(pkcsBlock + idx, lHash, hLen); + + /* generate random seed */ + if ((ret = wc_RNG_GenerateBlock(rng, seed, hLen)) != 0) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(lHash, heap, DYNAMIC_TYPE_RSA_BUFFER); + XFREE(seed, heap, DYNAMIC_TYPE_RSA_BUFFER); + #endif + return ret; + } + + /* create maskedDB from dbMask */ + dbMask = (byte*)XMALLOC(pkcsBlockLen - hLen - 1, heap, DYNAMIC_TYPE_RSA); + if (dbMask == NULL) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(lHash, heap, DYNAMIC_TYPE_RSA_BUFFER); + XFREE(seed, heap, DYNAMIC_TYPE_RSA_BUFFER); + #endif + return MEMORY_E; + } + XMEMSET(dbMask, 0, pkcsBlockLen - hLen - 1); /* help static analyzer */ + + ret = RsaMGF(mgf, seed, hLen, dbMask, pkcsBlockLen - hLen - 1, heap); + if (ret != 0) { + XFREE(dbMask, heap, DYNAMIC_TYPE_RSA); + #ifdef WOLFSSL_SMALL_STACK + XFREE(lHash, heap, DYNAMIC_TYPE_RSA_BUFFER); + XFREE(seed, heap, DYNAMIC_TYPE_RSA_BUFFER); + #endif + return ret; + } + + i = 0; + idx = hLen + 1; + while (idx < pkcsBlockLen && (word32)i < (pkcsBlockLen - hLen -1)) { + pkcsBlock[idx] = dbMask[i++] ^ pkcsBlock[idx]; + idx++; + } + XFREE(dbMask, heap, DYNAMIC_TYPE_RSA); + + + /* create maskedSeed from seedMask */ + idx = 0; + pkcsBlock[idx++] = 0x00; + /* create seedMask inline */ + if ((ret = RsaMGF(mgf, pkcsBlock + hLen + 1, pkcsBlockLen - hLen - 1, + pkcsBlock + 1, hLen, heap)) != 0) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(lHash, heap, DYNAMIC_TYPE_RSA_BUFFER); + XFREE(seed, heap, DYNAMIC_TYPE_RSA_BUFFER); + #endif + return ret; + } + + /* xor created seedMask with seed to make maskedSeed */ + i = 0; + while (idx < (word32)(hLen + 1) && i < hLen) { + pkcsBlock[idx] = pkcsBlock[idx] ^ seed[i++]; + idx++; + } + + #ifdef WOLFSSL_SMALL_STACK + XFREE(lHash, heap, DYNAMIC_TYPE_RSA_BUFFER); + XFREE(seed, heap, DYNAMIC_TYPE_RSA_BUFFER); + #endif + (void)padValue; + + return 0; +} +#endif /* !WC_NO_RSA_OAEP */ + +#ifdef WC_RSA_PSS + +/* 0x00 .. 0x00 0x01 | Salt | Gen Hash | 0xbc + * XOR MGF over all bytes down to end of Salt + * Gen Hash = HASH(8 * 0x00 | Message Hash | Salt) + * + * input Digest of the message. + * inputLen Length of digest. + * pkcsBlock Buffer to write to. + * pkcsBlockLen Length of buffer to write to. + * rng Random number generator (for salt). + * htype Hash function to use. + * mgf Mask generation function. + * saltLen Length of salt to put in padding. + * bits Length of key in bits. + * heap Used for dynamic memory allocation. + * returns 0 on success, PSS_SALTLEN_E when the salt length is invalid + * and other negative values on error. + */ +static int RsaPad_PSS(const byte* input, word32 inputLen, byte* pkcsBlock, + word32 pkcsBlockLen, WC_RNG* rng, enum wc_HashType hType, int mgf, + int saltLen, int bits, void* heap) +{ + int ret = 0; + int hLen, i, o, maskLen, hiBits; + byte* m; + byte* s; +#if defined(WOLFSSL_PSS_LONG_SALT) || defined(WOLFSSL_PSS_SALT_LEN_DISCOVER) + #if defined(WOLFSSL_NO_MALLOC) && !defined(WOLFSSL_STATIC_MEMORY) + byte salt[RSA_MAX_SIZE/8 + RSA_PSS_PAD_SZ]; + #else + byte* salt = NULL; + #endif +#else + byte salt[WC_MAX_DIGEST_SIZE]; +#endif + +#if defined(WOLFSSL_PSS_LONG_SALT) || defined(WOLFSSL_PSS_SALT_LEN_DISCOVER) + if (pkcsBlockLen > RSA_MAX_SIZE/8) { + return MEMORY_E; + } +#endif + + hLen = wc_HashGetDigestSize(hType); + if (hLen < 0) + return hLen; + + hiBits = (bits - 1) & 0x7; + if (hiBits == 0) { + *(pkcsBlock++) = 0; + pkcsBlockLen--; + } + + if (saltLen == RSA_PSS_SALT_LEN_DEFAULT) { + saltLen = hLen; + #ifdef WOLFSSL_SHA512 + /* See FIPS 186-4 section 5.5 item (e). */ + if (bits == 1024 && hLen == WC_SHA512_DIGEST_SIZE) { + saltLen = RSA_PSS_SALT_MAX_SZ; + } + #endif + } +#ifndef WOLFSSL_PSS_LONG_SALT + else if (saltLen > hLen) { + return PSS_SALTLEN_E; + } +#endif +#ifndef WOLFSSL_PSS_SALT_LEN_DISCOVER + else if (saltLen < RSA_PSS_SALT_LEN_DEFAULT) { + return PSS_SALTLEN_E; + } +#else + else if (saltLen == RSA_PSS_SALT_LEN_DISCOVER) { + saltLen = (int)pkcsBlockLen - hLen - 2; + if (saltLen < 0) { + return PSS_SALTLEN_E; + } + } + else if (saltLen < RSA_PSS_SALT_LEN_DISCOVER) { + return PSS_SALTLEN_E; + } +#endif + if ((int)pkcsBlockLen - hLen < saltLen + 2) { + return PSS_SALTLEN_E; + } + + maskLen = pkcsBlockLen - 1 - hLen; + +#if defined(WOLFSSL_PSS_LONG_SALT) || defined(WOLFSSL_PSS_SALT_LEN_DISCOVER) + #if !defined(WOLFSSL_NO_MALLOC) || defined(WOLFSSL_STATIC_MEMORY) + salt = (byte*)XMALLOC(RSA_PSS_PAD_SZ + inputLen + saltLen, heap, + DYNAMIC_TYPE_RSA_BUFFER); + if (salt == NULL) { + return MEMORY_E; + } + #endif + s = m = salt; + XMEMSET(m, 0, RSA_PSS_PAD_SZ); + m += RSA_PSS_PAD_SZ; + XMEMCPY(m, input, inputLen); + m += inputLen; + o = (int)(m - s); + if (saltLen > 0) { + ret = wc_RNG_GenerateBlock(rng, m, saltLen); + if (ret == 0) { + m += saltLen; + } + } +#else + s = m = pkcsBlock; + XMEMSET(m, 0, RSA_PSS_PAD_SZ); + m += RSA_PSS_PAD_SZ; + XMEMCPY(m, input, inputLen); + m += inputLen; + o = 0; + if (saltLen > 0) { + ret = wc_RNG_GenerateBlock(rng, salt, saltLen); + if (ret == 0) { + XMEMCPY(m, salt, saltLen); + m += saltLen; + } + } +#endif + if (ret == 0) { + /* Put Hash at end of pkcsBlock - 1 */ + ret = wc_Hash(hType, s, (word32)(m - s), pkcsBlock + maskLen, hLen); + } + if (ret == 0) { + pkcsBlock[pkcsBlockLen - 1] = RSA_PSS_PAD_TERM; + + ret = RsaMGF(mgf, pkcsBlock + maskLen, hLen, pkcsBlock, maskLen, heap); + } + if (ret == 0) { + pkcsBlock[0] &= (1 << hiBits) - 1; + + m = pkcsBlock + maskLen - saltLen - 1; + *(m++) ^= 0x01; + for (i = 0; i < saltLen; i++) { + m[i] ^= salt[o + i]; + } + } + +#if defined(WOLFSSL_PSS_LONG_SALT) || defined(WOLFSSL_PSS_SALT_LEN_DISCOVER) + #if !defined(WOLFSSL_NO_MALLOC) || defined(WOLFSSL_STATIC_MEMORY) + if (salt != NULL) { + XFREE(salt, heap, DYNAMIC_TYPE_RSA_BUFFER); + } + #endif +#endif + return ret; +} +#endif /* WC_RSA_PSS */ +#endif /* !WC_NO_RNG */ + +static int RsaPad(const byte* input, word32 inputLen, byte* pkcsBlock, + word32 pkcsBlockLen, byte padValue, WC_RNG* rng) +{ + if (input == NULL || inputLen == 0 || pkcsBlock == NULL || + pkcsBlockLen == 0) { + return BAD_FUNC_ARG; + } + + pkcsBlock[0] = 0x0; /* set first byte to zero and advance */ + pkcsBlock++; pkcsBlockLen--; + pkcsBlock[0] = padValue; /* insert padValue */ + + if (padValue == RSA_BLOCK_TYPE_1) { + if (pkcsBlockLen < inputLen + 2) { + WOLFSSL_MSG("RsaPad error, invalid length"); + return RSA_PAD_E; + } + + /* pad with 0xff bytes */ + XMEMSET(&pkcsBlock[1], 0xFF, pkcsBlockLen - inputLen - 2); + } + else { +#if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WC_NO_RNG) + /* pad with non-zero random bytes */ + word32 padLen, i; + int ret; + + if (pkcsBlockLen < inputLen + 1) { + WOLFSSL_MSG("RsaPad error, invalid length"); + return RSA_PAD_E; + } + + padLen = pkcsBlockLen - inputLen - 1; + ret = wc_RNG_GenerateBlock(rng, &pkcsBlock[1], padLen); + if (ret != 0) { + return ret; + } + + /* remove zeros */ + for (i = 1; i < padLen; i++) { + if (pkcsBlock[i] == 0) pkcsBlock[i] = 0x01; + } +#else + (void)rng; + return RSA_WRONG_TYPE_E; +#endif + } + + pkcsBlock[pkcsBlockLen-inputLen-1] = 0; /* separator */ + XMEMCPY(pkcsBlock+pkcsBlockLen-inputLen, input, inputLen); + + return 0; +} + +/* helper function to direct which padding is used */ +int wc_RsaPad_ex(const byte* input, word32 inputLen, byte* pkcsBlock, + word32 pkcsBlockLen, byte padValue, WC_RNG* rng, int padType, + enum wc_HashType hType, int mgf, byte* optLabel, word32 labelLen, + int saltLen, int bits, void* heap) +{ + int ret; + + switch (padType) + { + case WC_RSA_PKCSV15_PAD: + /*WOLFSSL_MSG("wolfSSL Using RSA PKCSV15 padding");*/ + ret = RsaPad(input, inputLen, pkcsBlock, pkcsBlockLen, + padValue, rng); + break; + +#ifndef WC_NO_RNG + #ifndef WC_NO_RSA_OAEP + case WC_RSA_OAEP_PAD: + WOLFSSL_MSG("wolfSSL Using RSA OAEP padding"); + ret = RsaPad_OAEP(input, inputLen, pkcsBlock, pkcsBlockLen, + padValue, rng, hType, mgf, optLabel, labelLen, heap); + break; + #endif + + #ifdef WC_RSA_PSS + case WC_RSA_PSS_PAD: + WOLFSSL_MSG("wolfSSL Using RSA PSS padding"); + ret = RsaPad_PSS(input, inputLen, pkcsBlock, pkcsBlockLen, rng, + hType, mgf, saltLen, bits, heap); + break; + #endif +#endif /* !WC_NO_RNG */ + + #ifdef WC_RSA_NO_PADDING + case WC_RSA_NO_PAD: + WOLFSSL_MSG("wolfSSL Using NO padding"); + + /* In the case of no padding being used check that input is exactly + * the RSA key length */ + if (bits <= 0 || inputLen != ((word32)bits/WOLFSSL_BIT_SIZE)) { + WOLFSSL_MSG("Bad input size"); + ret = RSA_PAD_E; + } + else { + XMEMCPY(pkcsBlock, input, inputLen); + ret = 0; + } + break; + #endif + + default: + WOLFSSL_MSG("Unknown RSA Pad Type"); + ret = RSA_PAD_E; + } + + /* silence warning if not used with padding scheme */ + (void)input; + (void)inputLen; + (void)pkcsBlock; + (void)pkcsBlockLen; + (void)padValue; + (void)rng; + (void)padType; + (void)hType; + (void)mgf; + (void)optLabel; + (void)labelLen; + (void)saltLen; + (void)bits; + (void)heap; + + return ret; +} +#endif /* WOLFSSL_RSA_VERIFY_ONLY */ + + +/* UnPadding */ +#ifndef WC_NO_RSA_OAEP +/* UnPad plaintext, set start to *output, return length of plaintext, + * < 0 on error */ +static int RsaUnPad_OAEP(byte *pkcsBlock, unsigned int pkcsBlockLen, + byte **output, enum wc_HashType hType, int mgf, + byte* optLabel, word32 labelLen, void* heap) +{ + int hLen; + int ret; + byte h[WC_MAX_DIGEST_SIZE]; /* max digest size */ + byte* tmp; + word32 idx; + + /* no label is allowed, but catch if no label provided and length > 0 */ + if (optLabel == NULL && labelLen > 0) { + return BUFFER_E; + } + + hLen = wc_HashGetDigestSize(hType); + if ((hLen < 0) || (pkcsBlockLen < (2 * (word32)hLen + 2))) { + return BAD_FUNC_ARG; + } + + tmp = (byte*)XMALLOC(pkcsBlockLen, heap, DYNAMIC_TYPE_RSA_BUFFER); + if (tmp == NULL) { + return MEMORY_E; + } + XMEMSET(tmp, 0, pkcsBlockLen); + + /* find seedMask value */ + if ((ret = RsaMGF(mgf, (byte*)(pkcsBlock + (hLen + 1)), + pkcsBlockLen - hLen - 1, tmp, hLen, heap)) != 0) { + XFREE(tmp, heap, DYNAMIC_TYPE_RSA_BUFFER); + return ret; + } + + /* xor seedMask value with maskedSeed to get seed value */ + for (idx = 0; idx < (word32)hLen; idx++) { + tmp[idx] = tmp[idx] ^ pkcsBlock[1 + idx]; + } + + /* get dbMask value */ + if ((ret = RsaMGF(mgf, tmp, hLen, tmp + hLen, + pkcsBlockLen - hLen - 1, heap)) != 0) { + XFREE(tmp, NULL, DYNAMIC_TYPE_RSA_BUFFER); + return ret; + } + + /* get DB value by doing maskedDB xor dbMask */ + for (idx = 0; idx < (pkcsBlockLen - hLen - 1); idx++) { + pkcsBlock[hLen + 1 + idx] = pkcsBlock[hLen + 1 + idx] ^ tmp[idx + hLen]; + } + + /* done with use of tmp buffer */ + XFREE(tmp, heap, DYNAMIC_TYPE_RSA_BUFFER); + + /* advance idx to index of PS and msg separator, account for PS size of 0*/ + idx = hLen + 1 + hLen; + while (idx < pkcsBlockLen && pkcsBlock[idx] == 0) {idx++;} + + /* create hash of label for comparison with hash sent */ + if ((ret = wc_Hash(hType, optLabel, labelLen, h, hLen)) != 0) { + return ret; + } + + /* say no to chosen ciphertext attack. + Comparison of lHash, Y, and separator value needs to all happen in + constant time. + Attackers should not be able to get error condition from the timing of + these checks. + */ + ret = 0; + ret |= ConstantCompare(pkcsBlock + hLen + 1, h, hLen); + ret += pkcsBlock[idx++] ^ 0x01; /* separator value is 0x01 */ + ret += pkcsBlock[0] ^ 0x00; /* Y, the first value, should be 0 */ + + /* Return 0 data length on error. */ + idx = ctMaskSelInt(ctMaskEq(ret, 0), idx, pkcsBlockLen); + + /* adjust pointer to correct location in array and return size of M */ + *output = (byte*)(pkcsBlock + idx); + return pkcsBlockLen - idx; +} +#endif /* WC_NO_RSA_OAEP */ + +#ifdef WC_RSA_PSS +/* 0x00 .. 0x00 0x01 | Salt | Gen Hash | 0xbc + * MGF over all bytes down to end of Salt + * + * pkcsBlock Buffer holding decrypted data. + * pkcsBlockLen Length of buffer. + * htype Hash function to use. + * mgf Mask generation function. + * saltLen Length of salt to put in padding. + * bits Length of key in bits. + * heap Used for dynamic memory allocation. + * returns 0 on success, PSS_SALTLEN_E when the salt length is invalid, + * BAD_PADDING_E when the padding is not valid, MEMORY_E when allocation fails + * and other negative values on error. + */ +static int RsaUnPad_PSS(byte *pkcsBlock, unsigned int pkcsBlockLen, + byte **output, enum wc_HashType hType, int mgf, + int saltLen, int bits, void* heap) +{ + int ret; + byte* tmp; + int hLen, i, maskLen; +#ifdef WOLFSSL_SHA512 + int orig_bits = bits; +#endif +#if defined(WOLFSSL_NO_MALLOC) && !defined(WOLFSSL_STATIC_MEMORY) + byte tmp_buf[RSA_MAX_SIZE/8]; + tmp = tmp_buf; + + if (pkcsBlockLen > RSA_MAX_SIZE/8) { + return MEMORY_E; + } +#endif + + hLen = wc_HashGetDigestSize(hType); + if (hLen < 0) + return hLen; + bits = (bits - 1) & 0x7; + if ((pkcsBlock[0] & (0xff << bits)) != 0) { + return BAD_PADDING_E; + } + if (bits == 0) { + pkcsBlock++; + pkcsBlockLen--; + } + maskLen = (int)pkcsBlockLen - 1 - hLen; + if (maskLen < 0) { + WOLFSSL_MSG("RsaUnPad_PSS: Hash too large"); + return WC_KEY_SIZE_E; + } + + if (saltLen == RSA_PSS_SALT_LEN_DEFAULT) { + saltLen = hLen; + #ifdef WOLFSSL_SHA512 + /* See FIPS 186-4 section 5.5 item (e). */ + if (orig_bits == 1024 && hLen == WC_SHA512_DIGEST_SIZE) + saltLen = RSA_PSS_SALT_MAX_SZ; + #endif + } +#ifndef WOLFSSL_PSS_LONG_SALT + else if (saltLen > hLen) + return PSS_SALTLEN_E; +#endif +#ifndef WOLFSSL_PSS_SALT_LEN_DISCOVER + else if (saltLen < RSA_PSS_SALT_LEN_DEFAULT) + return PSS_SALTLEN_E; + if (maskLen < saltLen + 1) { + return PSS_SALTLEN_E; + } +#else + else if (saltLen < RSA_PSS_SALT_LEN_DISCOVER) + return PSS_SALTLEN_E; + if (saltLen != RSA_PSS_SALT_LEN_DISCOVER && maskLen < saltLen + 1) { + return WC_KEY_SIZE_E; + } +#endif + + if (pkcsBlock[pkcsBlockLen - 1] != RSA_PSS_PAD_TERM) { + WOLFSSL_MSG("RsaUnPad_PSS: Padding Term Error"); + return BAD_PADDING_E; + } + +#if !defined(WOLFSSL_NO_MALLOC) || defined(WOLFSSL_STATIC_MEMORY) + tmp = (byte*)XMALLOC(maskLen, heap, DYNAMIC_TYPE_RSA_BUFFER); + if (tmp == NULL) { + return MEMORY_E; + } +#endif + + if ((ret = RsaMGF(mgf, pkcsBlock + maskLen, hLen, tmp, maskLen, + heap)) != 0) { + XFREE(tmp, heap, DYNAMIC_TYPE_RSA_BUFFER); + return ret; + } + + tmp[0] &= (1 << bits) - 1; + pkcsBlock[0] &= (1 << bits) - 1; +#ifdef WOLFSSL_PSS_SALT_LEN_DISCOVER + if (saltLen == RSA_PSS_SALT_LEN_DISCOVER) { + for (i = 0; i < maskLen - 1; i++) { + if (tmp[i] != pkcsBlock[i]) { + break; + } + } + if (tmp[i] != (pkcsBlock[i] ^ 0x01)) { + XFREE(tmp, heap, DYNAMIC_TYPE_RSA_BUFFER); + WOLFSSL_MSG("RsaUnPad_PSS: Padding Error Match"); + return PSS_SALTLEN_RECOVER_E; + } + saltLen = maskLen - (i + 1); + } + else +#endif + { + for (i = 0; i < maskLen - 1 - saltLen; i++) { + if (tmp[i] != pkcsBlock[i]) { + XFREE(tmp, heap, DYNAMIC_TYPE_RSA_BUFFER); + WOLFSSL_MSG("RsaUnPad_PSS: Padding Error Match"); + return PSS_SALTLEN_E; + } + } + if (tmp[i] != (pkcsBlock[i] ^ 0x01)) { + XFREE(tmp, heap, DYNAMIC_TYPE_RSA_BUFFER); + WOLFSSL_MSG("RsaUnPad_PSS: Padding Error End"); + return PSS_SALTLEN_E; + } + } + for (i++; i < maskLen; i++) + pkcsBlock[i] ^= tmp[i]; + +#if !defined(WOLFSSL_NO_MALLOC) || defined(WOLFSSL_STATIC_MEMORY) + XFREE(tmp, heap, DYNAMIC_TYPE_RSA_BUFFER); +#endif + + *output = pkcsBlock + maskLen - saltLen; + return saltLen + hLen; +} +#endif + +/* UnPad plaintext, set start to *output, return length of plaintext, + * < 0 on error */ +static int RsaUnPad(const byte *pkcsBlock, unsigned int pkcsBlockLen, + byte **output, byte padValue) +{ + int ret = BAD_FUNC_ARG; + word16 i; +#ifndef WOLFSSL_RSA_VERIFY_ONLY + byte invalid = 0; +#endif + + if (output == NULL || pkcsBlockLen == 0 || pkcsBlockLen > 0xFFFF) { + return BAD_FUNC_ARG; + } + + if (padValue == RSA_BLOCK_TYPE_1) { + /* First byte must be 0x00 and Second byte, block type, 0x01 */ + if (pkcsBlock[0] != 0 || pkcsBlock[1] != RSA_BLOCK_TYPE_1) { + WOLFSSL_MSG("RsaUnPad error, invalid formatting"); + return RSA_PAD_E; + } + + /* check the padding until we find the separator */ + for (i = 2; i < pkcsBlockLen && pkcsBlock[i++] == 0xFF; ) { } + + /* Minimum of 11 bytes of pre-message data and must have separator. */ + if (i < RSA_MIN_PAD_SZ || pkcsBlock[i-1] != 0) { + WOLFSSL_MSG("RsaUnPad error, bad formatting"); + return RSA_PAD_E; + } + + *output = (byte *)(pkcsBlock + i); + ret = pkcsBlockLen - i; + } +#ifndef WOLFSSL_RSA_VERIFY_ONLY + else { + word16 j; + word16 pastSep = 0; + + /* Decrypted with private key - unpad must be constant time. */ + for (i = 0, j = 2; j < pkcsBlockLen; j++) { + /* Update i if not passed the separator and at separator. */ + i |= (~pastSep) & ctMask16Eq(pkcsBlock[j], 0x00) & (j + 1); + pastSep |= ctMask16Eq(pkcsBlock[j], 0x00); + } + + /* Minimum of 11 bytes of pre-message data - including leading 0x00. */ + invalid |= ctMaskLT(i, RSA_MIN_PAD_SZ); + /* Must have seen separator. */ + invalid |= ~pastSep; + /* First byte must be 0x00. */ + invalid |= ctMaskNotEq(pkcsBlock[0], 0x00); + /* Check against expected block type: padValue */ + invalid |= ctMaskNotEq(pkcsBlock[1], padValue); + + *output = (byte *)(pkcsBlock + i); + ret = ((int)~invalid) & (pkcsBlockLen - i); + } +#endif + + return ret; +} + +/* helper function to direct unpadding + * + * bits is the key modulus size in bits + */ +int wc_RsaUnPad_ex(byte* pkcsBlock, word32 pkcsBlockLen, byte** out, + byte padValue, int padType, enum wc_HashType hType, + int mgf, byte* optLabel, word32 labelLen, int saltLen, + int bits, void* heap) +{ + int ret; + + switch (padType) { + case WC_RSA_PKCSV15_PAD: + /*WOLFSSL_MSG("wolfSSL Using RSA PKCSV15 un-padding");*/ + ret = RsaUnPad(pkcsBlock, pkcsBlockLen, out, padValue); + break; + + #ifndef WC_NO_RSA_OAEP + case WC_RSA_OAEP_PAD: + WOLFSSL_MSG("wolfSSL Using RSA OAEP un-padding"); + ret = RsaUnPad_OAEP((byte*)pkcsBlock, pkcsBlockLen, out, + hType, mgf, optLabel, labelLen, heap); + break; + #endif + + #ifdef WC_RSA_PSS + case WC_RSA_PSS_PAD: + WOLFSSL_MSG("wolfSSL Using RSA PSS un-padding"); + ret = RsaUnPad_PSS((byte*)pkcsBlock, pkcsBlockLen, out, hType, mgf, + saltLen, bits, heap); + break; + #endif + + #ifdef WC_RSA_NO_PADDING + case WC_RSA_NO_PAD: + WOLFSSL_MSG("wolfSSL Using NO un-padding"); + + /* In the case of no padding being used check that input is exactly + * the RSA key length */ + if (bits <= 0 || pkcsBlockLen != + ((word32)(bits+WOLFSSL_BIT_SIZE-1)/WOLFSSL_BIT_SIZE)) { + WOLFSSL_MSG("Bad input size"); + ret = RSA_PAD_E; + } + else { + if (out != NULL) { + *out = pkcsBlock; + } + ret = pkcsBlockLen; + } + break; + #endif /* WC_RSA_NO_PADDING */ + + default: + WOLFSSL_MSG("Unknown RSA UnPad Type"); + ret = RSA_PAD_E; + } + + /* silence warning if not used with padding scheme */ + (void)hType; + (void)mgf; + (void)optLabel; + (void)labelLen; + (void)saltLen; + (void)bits; + (void)heap; + + return ret; +} + +#if defined(WOLFSSL_XILINX_CRYPT) +/* + * Xilinx hardened crypto acceleration. + * + * Returns 0 on success and negative values on error. + */ +static int wc_RsaFunctionXil(const byte* in, word32 inLen, byte* out, + word32* outLen, int type, RsaKey* key, WC_RNG* rng) +{ + int ret = 0; + word32 keyLen; + (void)rng; + + keyLen = wc_RsaEncryptSize(key); + if (keyLen > *outLen) { + WOLFSSL_MSG("Output buffer is not big enough"); + return BAD_FUNC_ARG; + } + + if (inLen != keyLen) { + WOLFSSL_MSG("Expected that inLen equals RSA key length"); + return BAD_FUNC_ARG; + } + + switch(type) { + case RSA_PRIVATE_DECRYPT: + case RSA_PRIVATE_ENCRYPT: + /* Currently public exponent is loaded by default. + * In SDK 2017.1 RSA exponent values are expected to be of 4 bytes + * leading to private key operations with Xsecure_RsaDecrypt not being + * supported */ + ret = RSA_WRONG_TYPE_E; + break; + case RSA_PUBLIC_ENCRYPT: + case RSA_PUBLIC_DECRYPT: + if (XSecure_RsaDecrypt(&(key->xRsa), in, out) != XST_SUCCESS) { + ret = BAD_STATE_E; + } + break; + default: + ret = RSA_WRONG_TYPE_E; + } + + *outLen = keyLen; + + return ret; +} +#endif /* WOLFSSL_XILINX_CRYPT */ + +#ifdef WC_RSA_NONBLOCK +static int wc_RsaFunctionNonBlock(const byte* in, word32 inLen, byte* out, + word32* outLen, int type, RsaKey* key) +{ + int ret = 0; + word32 keyLen, len; + + if (key == NULL || key->nb == NULL) { + return BAD_FUNC_ARG; + } + + if (key->nb->exptmod.state == TFM_EXPTMOD_NB_INIT) { + if (mp_init(&key->nb->tmp) != MP_OKAY) { + ret = MP_INIT_E; + } + + if (ret == 0) { + if (mp_read_unsigned_bin(&key->nb->tmp, (byte*)in, inLen) != MP_OKAY) { + ret = MP_READ_E; + } + } + } + + if (ret == 0) { + switch(type) { + case RSA_PRIVATE_DECRYPT: + case RSA_PRIVATE_ENCRYPT: + ret = fp_exptmod_nb(&key->nb->exptmod, &key->nb->tmp, &key->d, + &key->n, &key->nb->tmp); + if (ret == FP_WOULDBLOCK) + return ret; + if (ret != MP_OKAY) + ret = MP_EXPTMOD_E; + break; + + case RSA_PUBLIC_ENCRYPT: + case RSA_PUBLIC_DECRYPT: + ret = fp_exptmod_nb(&key->nb->exptmod, &key->nb->tmp, &key->e, + &key->n, &key->nb->tmp); + if (ret == FP_WOULDBLOCK) + return ret; + if (ret != MP_OKAY) + ret = MP_EXPTMOD_E; + break; + default: + ret = RSA_WRONG_TYPE_E; + break; + } + } + + if (ret == 0) { + keyLen = wc_RsaEncryptSize(key); + if (keyLen > *outLen) + ret = RSA_BUFFER_E; + } + if (ret == 0) { + len = mp_unsigned_bin_size(&key->nb->tmp); + + /* pad front w/ zeros to match key length */ + while (len < keyLen) { + *out++ = 0x00; + len++; + } + + *outLen = keyLen; + + /* convert */ + if (mp_to_unsigned_bin(&key->nb->tmp, out) != MP_OKAY) { + ret = MP_TO_E; + } + } + + mp_clear(&key->nb->tmp); + + return ret; +} +#endif /* WC_RSA_NONBLOCK */ + +#ifdef WOLFSSL_AFALG_XILINX_RSA +#ifndef ERROR_OUT +#define ERROR_OUT(x) ret = (x); goto done +#endif + +static const char WC_TYPE_ASYMKEY[] = "skcipher"; +static const char WC_NAME_RSA[] = "xilinx-zynqmp-rsa"; +#ifndef MAX_XILINX_RSA_KEY + /* max key size of 4096 bits / 512 bytes */ + #define MAX_XILINX_RSA_KEY 512 +#endif +static const byte XILINX_RSA_FLAG[] = {0x1}; + + +/* AF_ALG implementation of RSA */ +static int wc_RsaFunctionSync(const byte* in, word32 inLen, byte* out, + word32* outLen, int type, RsaKey* key, WC_RNG* rng) +{ + struct msghdr msg; + struct cmsghdr* cmsg; + struct iovec iov; + byte* keyBuf = NULL; + word32 keyBufSz = 0; + char cbuf[CMSG_SPACE(4) + CMSG_SPACE(sizeof(struct af_alg_iv) + 1)] = {0}; + int ret = 0; + int op = 0; /* decryption vs encryption flag */ + word32 keyLen; + + /* input and output buffer need to be aligned */ + ALIGN64 byte outBuf[MAX_XILINX_RSA_KEY]; + ALIGN64 byte inBuf[MAX_XILINX_RSA_KEY]; + + XMEMSET(&msg, 0, sizeof(struct msghdr)); + (void)rng; + + keyLen = wc_RsaEncryptSize(key); + if (keyLen > *outLen) { + ERROR_OUT(RSA_BUFFER_E); + } + + if (keyLen > MAX_XILINX_RSA_KEY) { + WOLFSSL_MSG("RSA key size larger than supported"); + ERROR_OUT(BAD_FUNC_ARG); + } + + if ((keyBuf = (byte*)XMALLOC(keyLen * 2, key->heap, DYNAMIC_TYPE_KEY)) + == NULL) { + ERROR_OUT(MEMORY_E); + } + + if ((ret = mp_to_unsigned_bin(&(key->n), keyBuf)) != MP_OKAY) { + ERROR_OUT(MP_TO_E); + } + + switch(type) { + case RSA_PRIVATE_DECRYPT: + case RSA_PRIVATE_ENCRYPT: + op = 1; /* set as decrypt */ + { + keyBufSz = mp_unsigned_bin_size(&(key->d)); + if ((mp_to_unsigned_bin(&(key->d), keyBuf + keyLen)) + != MP_OKAY) { + ERROR_OUT(MP_TO_E); + } + } + break; + + case RSA_PUBLIC_DECRYPT: + case RSA_PUBLIC_ENCRYPT: { + word32 exp = 0; + word32 eSz = mp_unsigned_bin_size(&(key->e)); + if ((mp_to_unsigned_bin(&(key->e), (byte*)&exp + + (sizeof(word32) - eSz))) != MP_OKAY) { + ERROR_OUT(MP_TO_E); + } + keyBufSz = sizeof(word32); + XMEMCPY(keyBuf + keyLen, (byte*)&exp, keyBufSz); + break; + } + + default: + ERROR_OUT(RSA_WRONG_TYPE_E); + } + keyBufSz += keyLen; /* add size of modulus */ + + /* check for existing sockets before creating new ones */ + if (key->alFd > 0) { + close(key->alFd); + key->alFd = WC_SOCK_NOTSET; + } + if (key->rdFd > 0) { + close(key->rdFd); + key->rdFd = WC_SOCK_NOTSET; + } + + /* create new sockets and set the key to use */ + if ((key->alFd = wc_Afalg_Socket()) < 0) { + WOLFSSL_MSG("Unable to create socket"); + ERROR_OUT(key->alFd); + } + if ((key->rdFd = wc_Afalg_CreateRead(key->alFd, WC_TYPE_ASYMKEY, + WC_NAME_RSA)) < 0) { + WOLFSSL_MSG("Unable to bind and create read/send socket"); + ERROR_OUT(key->rdFd); + } + if ((ret = setsockopt(key->alFd, SOL_ALG, ALG_SET_KEY, keyBuf, + keyBufSz)) < 0) { + WOLFSSL_MSG("Error setting RSA key"); + ERROR_OUT(ret); + } + + msg.msg_control = cbuf; + msg.msg_controllen = sizeof(cbuf); + cmsg = CMSG_FIRSTHDR(&msg); + if ((ret = wc_Afalg_SetOp(cmsg, op)) < 0) { + ERROR_OUT(ret); + } + + /* set flag in IV spot, needed for Xilinx hardware acceleration use */ + cmsg = CMSG_NXTHDR(&msg, cmsg); + if ((ret = wc_Afalg_SetIv(cmsg, (byte*)XILINX_RSA_FLAG, + sizeof(XILINX_RSA_FLAG))) != 0) { + ERROR_OUT(ret); + } + + /* compose and send msg */ + XMEMCPY(inBuf, (byte*)in, inLen); /* for alignment */ + iov.iov_base = inBuf; + iov.iov_len = inLen; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + if ((ret = sendmsg(key->rdFd, &msg, 0)) <= 0) { + ERROR_OUT(WC_AFALG_SOCK_E); + } + + if ((ret = read(key->rdFd, outBuf, inLen)) <= 0) { + ERROR_OUT(WC_AFALG_SOCK_E); + } + XMEMCPY(out, outBuf, ret); + *outLen = keyLen; + +done: + /* clear key data and free buffer */ + if (keyBuf != NULL) { + ForceZero(keyBuf, keyBufSz); + } + XFREE(keyBuf, key->heap, DYNAMIC_TYPE_KEY); + + if (key->alFd > 0) { + close(key->alFd); + key->alFd = WC_SOCK_NOTSET; + } + if (key->rdFd > 0) { + close(key->rdFd); + key->rdFd = WC_SOCK_NOTSET; + } + + return ret; +} + +#else +static int wc_RsaFunctionSync(const byte* in, word32 inLen, byte* out, + word32* outLen, int type, RsaKey* key, WC_RNG* rng) +{ +#ifndef WOLFSSL_SP_MATH +#ifdef WOLFSSL_SMALL_STACK + mp_int* tmp; +#ifdef WC_RSA_BLINDING + mp_int* rnd; + mp_int* rndi; +#endif +#else + mp_int tmp[1]; +#ifdef WC_RSA_BLINDING + mp_int rnd[1], rndi[1]; +#endif +#endif + int ret = 0; + word32 keyLen = 0; +#endif + +#ifdef WOLFSSL_HAVE_SP_RSA +#ifndef WOLFSSL_SP_NO_2048 + if (mp_count_bits(&key->n) == 2048) { + switch(type) { +#ifndef WOLFSSL_RSA_PUBLIC_ONLY + case RSA_PRIVATE_DECRYPT: + case RSA_PRIVATE_ENCRYPT: + #ifdef WC_RSA_BLINDING + if (rng == NULL) + return MISSING_RNG_E; + #endif + #ifndef RSA_LOW_MEM + return sp_RsaPrivate_2048(in, inLen, &key->d, &key->p, &key->q, + &key->dP, &key->dQ, &key->u, &key->n, + out, outLen); + #else + return sp_RsaPrivate_2048(in, inLen, &key->d, &key->p, &key->q, + NULL, NULL, NULL, &key->n, out, outLen); + #endif +#endif + case RSA_PUBLIC_ENCRYPT: + case RSA_PUBLIC_DECRYPT: + return sp_RsaPublic_2048(in, inLen, &key->e, &key->n, out, outLen); + } + } +#endif +#ifndef WOLFSSL_SP_NO_3072 + if (mp_count_bits(&key->n) == 3072) { + switch(type) { +#ifndef WOLFSSL_RSA_PUBLIC_ONLY + case RSA_PRIVATE_DECRYPT: + case RSA_PRIVATE_ENCRYPT: + #ifdef WC_RSA_BLINDING + if (rng == NULL) + return MISSING_RNG_E; + #endif + #ifndef RSA_LOW_MEM + return sp_RsaPrivate_3072(in, inLen, &key->d, &key->p, &key->q, + &key->dP, &key->dQ, &key->u, &key->n, + out, outLen); + #else + return sp_RsaPrivate_3072(in, inLen, &key->d, &key->p, &key->q, + NULL, NULL, NULL, &key->n, out, outLen); + #endif +#endif + case RSA_PUBLIC_ENCRYPT: + case RSA_PUBLIC_DECRYPT: + return sp_RsaPublic_3072(in, inLen, &key->e, &key->n, out, outLen); + } + } +#endif +#ifdef WOLFSSL_SP_4096 + if (mp_count_bits(&key->n) == 4096) { + switch(type) { +#ifndef WOLFSSL_RSA_PUBLIC_ONLY + case RSA_PRIVATE_DECRYPT: + case RSA_PRIVATE_ENCRYPT: + #ifdef WC_RSA_BLINDING + if (rng == NULL) + return MISSING_RNG_E; + #endif + #ifndef RSA_LOW_MEM + return sp_RsaPrivate_4096(in, inLen, &key->d, &key->p, &key->q, + &key->dP, &key->dQ, &key->u, &key->n, + out, outLen); + #else + return sp_RsaPrivate_4096(in, inLen, &key->d, &key->p, &key->q, + NULL, NULL, NULL, &key->n, out, outLen); + #endif +#endif + case RSA_PUBLIC_ENCRYPT: + case RSA_PUBLIC_DECRYPT: + return sp_RsaPublic_4096(in, inLen, &key->e, &key->n, out, outLen); + } + } +#endif +#endif /* WOLFSSL_HAVE_SP_RSA */ + +#ifdef WOLFSSL_SP_MATH + (void)rng; + WOLFSSL_MSG("SP Key Size Error"); + return WC_KEY_SIZE_E; +#else + (void)rng; + +#ifdef WOLFSSL_SMALL_STACK + tmp = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_RSA); + if (tmp == NULL) + return MEMORY_E; +#ifdef WC_RSA_BLINDING + rnd = (mp_int*)XMALLOC(sizeof(mp_int) * 2, key->heap, DYNAMIC_TYPE_RSA); + if (rnd == NULL) { + XFREE(tmp, key->heap, DYNAMIC_TYPE_RSA); + return MEMORY_E; + } + rndi = rnd + 1; +#endif /* WC_RSA_BLINDING */ +#endif /* WOLFSSL_SMALL_STACK */ + + if (mp_init(tmp) != MP_OKAY) + ret = MP_INIT_E; + +#ifdef WC_RSA_BLINDING + if (ret == 0) { + if (type == RSA_PRIVATE_DECRYPT || type == RSA_PRIVATE_ENCRYPT) { + if (mp_init_multi(rnd, rndi, NULL, NULL, NULL, NULL) != MP_OKAY) { + mp_clear(tmp); + ret = MP_INIT_E; + } + } + } +#endif + +#ifndef TEST_UNPAD_CONSTANT_TIME + if (ret == 0 && mp_read_unsigned_bin(tmp, (byte*)in, inLen) != MP_OKAY) + ret = MP_READ_E; + + if (ret == 0) { + switch(type) { + #ifndef WOLFSSL_RSA_PUBLIC_ONLY + case RSA_PRIVATE_DECRYPT: + case RSA_PRIVATE_ENCRYPT: + { + #if defined(WC_RSA_BLINDING) && !defined(WC_NO_RNG) + /* blind */ + ret = mp_rand(rnd, get_digit_count(&key->n), rng); + + /* rndi = 1/rnd mod n */ + if (ret == 0 && mp_invmod(rnd, &key->n, rndi) != MP_OKAY) + ret = MP_INVMOD_E; + + /* rnd = rnd^e */ + if (ret == 0 && mp_exptmod(rnd, &key->e, &key->n, rnd) != MP_OKAY) + ret = MP_EXPTMOD_E; + + /* tmp = tmp*rnd mod n */ + if (ret == 0 && mp_mulmod(tmp, rnd, &key->n, tmp) != MP_OKAY) + ret = MP_MULMOD_E; + #endif /* WC_RSA_BLINDING && !WC_NO_RNG */ + + #ifdef RSA_LOW_MEM /* half as much memory but twice as slow */ + if (ret == 0 && mp_exptmod(tmp, &key->d, &key->n, tmp) != MP_OKAY) + ret = MP_EXPTMOD_E; + #else + if (ret == 0) { + #ifdef WOLFSSL_SMALL_STACK + mp_int* tmpa; + mp_int* tmpb = NULL; + #else + mp_int tmpa[1], tmpb[1]; + #endif + int cleara = 0, clearb = 0; + + #ifdef WOLFSSL_SMALL_STACK + tmpa = (mp_int*)XMALLOC(sizeof(mp_int) * 2, + key->heap, DYNAMIC_TYPE_RSA); + if (tmpa != NULL) + tmpb = tmpa + 1; + else + ret = MEMORY_E; + #endif + + if (ret == 0) { + if (mp_init(tmpa) != MP_OKAY) + ret = MP_INIT_E; + else + cleara = 1; + } + + if (ret == 0) { + if (mp_init(tmpb) != MP_OKAY) + ret = MP_INIT_E; + else + clearb = 1; + } + + /* tmpa = tmp^dP mod p */ + if (ret == 0 && mp_exptmod(tmp, &key->dP, &key->p, + tmpa) != MP_OKAY) + ret = MP_EXPTMOD_E; + + /* tmpb = tmp^dQ mod q */ + if (ret == 0 && mp_exptmod(tmp, &key->dQ, &key->q, + tmpb) != MP_OKAY) + ret = MP_EXPTMOD_E; + + /* tmp = (tmpa - tmpb) * qInv (mod p) */ + if (ret == 0 && mp_sub(tmpa, tmpb, tmp) != MP_OKAY) + ret = MP_SUB_E; + + if (ret == 0 && mp_mulmod(tmp, &key->u, &key->p, + tmp) != MP_OKAY) + ret = MP_MULMOD_E; + + /* tmp = tmpb + q * tmp */ + if (ret == 0 && mp_mul(tmp, &key->q, tmp) != MP_OKAY) + ret = MP_MUL_E; + + if (ret == 0 && mp_add(tmp, tmpb, tmp) != MP_OKAY) + ret = MP_ADD_E; + + #ifdef WOLFSSL_SMALL_STACK + if (tmpa != NULL) + #endif + { + if (cleara) + mp_clear(tmpa); + if (clearb) + mp_clear(tmpb); + #ifdef WOLFSSL_SMALL_STACK + XFREE(tmpa, key->heap, DYNAMIC_TYPE_RSA); + #endif + } + } /* tmpa/b scope */ + #endif /* RSA_LOW_MEM */ + + #ifdef WC_RSA_BLINDING + /* unblind */ + if (ret == 0 && mp_mulmod(tmp, rndi, &key->n, tmp) != MP_OKAY) + ret = MP_MULMOD_E; + #endif /* WC_RSA_BLINDING */ + + break; + } + #endif + case RSA_PUBLIC_ENCRYPT: + case RSA_PUBLIC_DECRYPT: + #ifdef WOLFSSL_XILINX_CRYPT + ret = wc_RsaFunctionXil(in, inLen, out, outLen, type, key, rng); + #else + if (mp_exptmod_nct(tmp, &key->e, &key->n, tmp) != MP_OKAY) + ret = MP_EXPTMOD_E; + #endif + break; + default: + ret = RSA_WRONG_TYPE_E; + break; + } + } + + if (ret == 0) { + keyLen = wc_RsaEncryptSize(key); + if (keyLen > *outLen) + ret = RSA_BUFFER_E; + } + if (ret == 0) { + *outLen = keyLen; + if (mp_to_unsigned_bin_len(tmp, out, keyLen) != MP_OKAY) + ret = MP_TO_E; + } +#else + (void)type; + (void)key; + (void)keyLen; + XMEMCPY(out, in, inLen); + *outLen = inLen; +#endif + + mp_clear(tmp); +#ifdef WOLFSSL_SMALL_STACK + XFREE(tmp, key->heap, DYNAMIC_TYPE_RSA); +#endif +#ifdef WC_RSA_BLINDING + if (type == RSA_PRIVATE_DECRYPT || type == RSA_PRIVATE_ENCRYPT) { + mp_clear(rndi); + mp_clear(rnd); + } +#ifdef WOLFSSL_SMALL_STACK + XFREE(rnd, key->heap, DYNAMIC_TYPE_RSA); +#endif +#endif /* WC_RSA_BLINDING */ + return ret; +#endif /* WOLFSSL_SP_MATH */ +} +#endif + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA) +static int wc_RsaFunctionAsync(const byte* in, word32 inLen, byte* out, + word32* outLen, int type, RsaKey* key, WC_RNG* rng) +{ + int ret = 0; + + (void)rng; + +#ifdef WOLFSSL_ASYNC_CRYPT_TEST + if (wc_AsyncTestInit(&key->asyncDev, ASYNC_TEST_RSA_FUNC)) { + WC_ASYNC_TEST* testDev = &key->asyncDev.test; + testDev->rsaFunc.in = in; + testDev->rsaFunc.inSz = inLen; + testDev->rsaFunc.out = out; + testDev->rsaFunc.outSz = outLen; + testDev->rsaFunc.type = type; + testDev->rsaFunc.key = key; + testDev->rsaFunc.rng = rng; + return WC_PENDING_E; + } +#endif /* WOLFSSL_ASYNC_CRYPT_TEST */ + + switch(type) { +#ifndef WOLFSSL_RSA_PUBLIC_ONLY + case RSA_PRIVATE_DECRYPT: + case RSA_PRIVATE_ENCRYPT: + #ifdef HAVE_CAVIUM + key->dataLen = key->n.raw.len; + ret = NitroxRsaExptMod(in, inLen, + key->d.raw.buf, key->d.raw.len, + key->n.raw.buf, key->n.raw.len, + out, outLen, key); + #elif defined(HAVE_INTEL_QA) + #ifdef RSA_LOW_MEM + ret = IntelQaRsaPrivate(&key->asyncDev, in, inLen, + &key->d.raw, &key->n.raw, + out, outLen); + #else + ret = IntelQaRsaCrtPrivate(&key->asyncDev, in, inLen, + &key->p.raw, &key->q.raw, + &key->dP.raw, &key->dQ.raw, + &key->u.raw, + out, outLen); + #endif + #else /* WOLFSSL_ASYNC_CRYPT_TEST */ + ret = wc_RsaFunctionSync(in, inLen, out, outLen, type, key, rng); + #endif + break; +#endif + + case RSA_PUBLIC_ENCRYPT: + case RSA_PUBLIC_DECRYPT: + #ifdef HAVE_CAVIUM + key->dataLen = key->n.raw.len; + ret = NitroxRsaExptMod(in, inLen, + key->e.raw.buf, key->e.raw.len, + key->n.raw.buf, key->n.raw.len, + out, outLen, key); + #elif defined(HAVE_INTEL_QA) + ret = IntelQaRsaPublic(&key->asyncDev, in, inLen, + &key->e.raw, &key->n.raw, + out, outLen); + #else /* WOLFSSL_ASYNC_CRYPT_TEST */ + ret = wc_RsaFunctionSync(in, inLen, out, outLen, type, key, rng); + #endif + break; + + default: + ret = RSA_WRONG_TYPE_E; + } + + return ret; +} +#endif /* WOLFSSL_ASYNC_CRYPT && WC_ASYNC_ENABLE_RSA */ + +#if defined(WC_RSA_DIRECT) || defined(WC_RSA_NO_PADDING) +/* Function that does the RSA operation directly with no padding. + * + * in buffer to do operation on + * inLen length of input buffer + * out buffer to hold results + * outSz gets set to size of result buffer. Should be passed in as length + * of out buffer. If the pointer "out" is null then outSz gets set to + * the expected buffer size needed and LENGTH_ONLY_E gets returned. + * key RSA key to use for encrypt/decrypt + * type if using private or public key {RSA_PUBLIC_ENCRYPT, + * RSA_PUBLIC_DECRYPT, RSA_PRIVATE_ENCRYPT, RSA_PRIVATE_DECRYPT} + * rng wolfSSL RNG to use if needed + * + * returns size of result on success + */ +int wc_RsaDirect(byte* in, word32 inLen, byte* out, word32* outSz, + RsaKey* key, int type, WC_RNG* rng) +{ + int ret; + + if (in == NULL || outSz == NULL || key == NULL) { + return BAD_FUNC_ARG; + } + + /* sanity check on type of RSA operation */ + switch (type) { + case RSA_PUBLIC_ENCRYPT: + case RSA_PUBLIC_DECRYPT: + case RSA_PRIVATE_ENCRYPT: + case RSA_PRIVATE_DECRYPT: + break; + default: + WOLFSSL_MSG("Bad RSA type"); + return BAD_FUNC_ARG; + } + + if ((ret = wc_RsaEncryptSize(key)) < 0) { + return BAD_FUNC_ARG; + } + + if (inLen != (word32)ret) { + WOLFSSL_MSG("Bad input length. Should be RSA key size"); + return BAD_FUNC_ARG; + } + + if (out == NULL) { + *outSz = inLen; + return LENGTH_ONLY_E; + } + + switch (key->state) { + case RSA_STATE_NONE: + case RSA_STATE_ENCRYPT_PAD: + case RSA_STATE_ENCRYPT_EXPTMOD: + case RSA_STATE_DECRYPT_EXPTMOD: + case RSA_STATE_DECRYPT_UNPAD: + key->state = (type == RSA_PRIVATE_ENCRYPT || + type == RSA_PUBLIC_ENCRYPT) ? RSA_STATE_ENCRYPT_EXPTMOD: + RSA_STATE_DECRYPT_EXPTMOD; + + key->dataLen = *outSz; + + ret = wc_RsaFunction(in, inLen, out, &key->dataLen, type, key, rng); + if (ret >= 0 || ret == WC_PENDING_E) { + key->state = (type == RSA_PRIVATE_ENCRYPT || + type == RSA_PUBLIC_ENCRYPT) ? RSA_STATE_ENCRYPT_RES: + RSA_STATE_DECRYPT_RES; + } + if (ret < 0) { + break; + } + + FALL_THROUGH; + + case RSA_STATE_ENCRYPT_RES: + case RSA_STATE_DECRYPT_RES: + ret = key->dataLen; + break; + + default: + ret = BAD_STATE_E; + } + + /* if async pending then skip cleanup*/ + if (ret == WC_PENDING_E + #ifdef WC_RSA_NONBLOCK + || ret == FP_WOULDBLOCK + #endif + ) { + return ret; + } + + key->state = RSA_STATE_NONE; + wc_RsaCleanup(key); + + return ret; +} +#endif /* WC_RSA_DIRECT || WC_RSA_NO_PADDING */ + +#if defined(WOLFSSL_CRYPTOCELL) +static int cc310_RsaPublicEncrypt(const byte* in, word32 inLen, byte* out, + word32 outLen, RsaKey* key) +{ + CRYSError_t ret = 0; + CRYS_RSAPrimeData_t primeData; + int modulusSize = wc_RsaEncryptSize(key); + + /* The out buffer must be at least modulus size bytes long. */ + if (outLen < modulusSize) + return BAD_FUNC_ARG; + + ret = CRYS_RSA_PKCS1v15_Encrypt(&wc_rndState, + wc_rndGenVectFunc, + &key->ctx.pubKey, + &primeData, + (byte*)in, + inLen, + out); + + if (ret != SA_SILIB_RET_OK){ + WOLFSSL_MSG("CRYS_RSA_PKCS1v15_Encrypt failed"); + return -1; + } + + return modulusSize; +} +static int cc310_RsaPublicDecrypt(const byte* in, word32 inLen, byte* out, + word32 outLen, RsaKey* key) +{ + CRYSError_t ret = 0; + CRYS_RSAPrimeData_t primeData; + uint16_t actualOutLen = outLen; + + ret = CRYS_RSA_PKCS1v15_Decrypt(&key->ctx.privKey, + &primeData, + (byte*)in, + inLen, + out, + &actualOutLen); + + if (ret != SA_SILIB_RET_OK){ + WOLFSSL_MSG("CRYS_RSA_PKCS1v15_Decrypt failed"); + return -1; + } + return actualOutLen; +} + +int cc310_RsaSSL_Sign(const byte* in, word32 inLen, byte* out, + word32 outLen, RsaKey* key, CRYS_RSA_HASH_OpMode_t mode) +{ + CRYSError_t ret = 0; + uint16_t actualOutLen = outLen*sizeof(byte); + CRYS_RSAPrivUserContext_t contextPrivate; + + ret = CRYS_RSA_PKCS1v15_Sign(&wc_rndState, + wc_rndGenVectFunc, + &contextPrivate, + &key->ctx.privKey, + mode, + (byte*)in, + inLen, + out, + &actualOutLen); + + if (ret != SA_SILIB_RET_OK){ + WOLFSSL_MSG("CRYS_RSA_PKCS1v15_Sign failed"); + return -1; + } + return actualOutLen; +} + +int cc310_RsaSSL_Verify(const byte* in, word32 inLen, byte* sig, + RsaKey* key, CRYS_RSA_HASH_OpMode_t mode) +{ + CRYSError_t ret = 0; + CRYS_RSAPubUserContext_t contextPub; + + /* verify the signature in the sig pointer */ + ret = CRYS_RSA_PKCS1v15_Verify(&contextPub, + &key->ctx.pubKey, + mode, + (byte*)in, + inLen, + sig); + + if (ret != SA_SILIB_RET_OK){ + WOLFSSL_MSG("CRYS_RSA_PKCS1v15_Verify failed"); + return -1; + } + + return ret; +} +#endif /* WOLFSSL_CRYPTOCELL */ + +int wc_RsaFunction(const byte* in, word32 inLen, byte* out, + word32* outLen, int type, RsaKey* key, WC_RNG* rng) +{ + int ret = 0; + + if (key == NULL || in == NULL || inLen == 0 || out == NULL || + outLen == NULL || *outLen == 0 || type == RSA_TYPE_UNKNOWN) { + return BAD_FUNC_ARG; + } + +#ifdef WOLF_CRYPTO_CB + if (key->devId != INVALID_DEVID) { + ret = wc_CryptoCb_Rsa(in, inLen, out, outLen, type, key, rng); + if (ret != CRYPTOCB_UNAVAILABLE) + return ret; + /* fall-through when unavailable */ + ret = 0; /* reset error code and try using software */ + } +#endif + +#ifndef TEST_UNPAD_CONSTANT_TIME +#ifndef NO_RSA_BOUNDS_CHECK + if (type == RSA_PRIVATE_DECRYPT && + key->state == RSA_STATE_DECRYPT_EXPTMOD) { + + /* Check that 1 < in < n-1. (Requirement of 800-56B.) */ +#ifdef WOLFSSL_SMALL_STACK + mp_int* c; +#else + mp_int c[1]; +#endif + +#ifdef WOLFSSL_SMALL_STACK + c = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_RSA); + if (c == NULL) + ret = MEMORY_E; +#endif + + if (mp_init(c) != MP_OKAY) + ret = MEMORY_E; + if (ret == 0) { + if (mp_read_unsigned_bin(c, in, inLen) != 0) + ret = MP_READ_E; + } + if (ret == 0) { + /* check c > 1 */ + if (mp_cmp_d(c, 1) != MP_GT) + ret = RSA_OUT_OF_RANGE_E; + } + if (ret == 0) { + /* add c+1 */ + if (mp_add_d(c, 1, c) != MP_OKAY) + ret = MP_ADD_E; + } + if (ret == 0) { + /* check c+1 < n */ + if (mp_cmp(c, &key->n) != MP_LT) + ret = RSA_OUT_OF_RANGE_E; + } + mp_clear(c); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(c, key->heap, DYNAMIC_TYPE_RSA); +#endif + + if (ret != 0) + return ret; + } +#endif /* NO_RSA_BOUNDS_CHECK */ +#endif + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA) + if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA && + key->n.raw.len > 0) { + ret = wc_RsaFunctionAsync(in, inLen, out, outLen, type, key, rng); + } + else +#endif +#ifdef WC_RSA_NONBLOCK + if (key->nb) { + ret = wc_RsaFunctionNonBlock(in, inLen, out, outLen, type, key); + } + else +#endif + { + ret = wc_RsaFunctionSync(in, inLen, out, outLen, type, key, rng); + } + + /* handle error */ + if (ret < 0 && ret != WC_PENDING_E + #ifdef WC_RSA_NONBLOCK + && ret != FP_WOULDBLOCK + #endif + ) { + if (ret == MP_EXPTMOD_E) { + /* This can happen due to incorrectly set FP_MAX_BITS or missing XREALLOC */ + WOLFSSL_MSG("RSA_FUNCTION MP_EXPTMOD_E: memory/config problem"); + } + + key->state = RSA_STATE_NONE; + wc_RsaCleanup(key); + } + + return ret; +} + + +#ifndef WOLFSSL_RSA_VERIFY_ONLY +/* Internal Wrappers */ +/* Gives the option of choosing padding type + in : input to be encrypted + inLen: length of input buffer + out: encrypted output + outLen: length of encrypted output buffer + key : wolfSSL initialized RSA key struct + rng : wolfSSL initialized random number struct + rsa_type : type of RSA: RSA_PUBLIC_ENCRYPT, RSA_PUBLIC_DECRYPT, + RSA_PRIVATE_ENCRYPT or RSA_PRIVATE_DECRYPT + pad_value: RSA_BLOCK_TYPE_1 or RSA_BLOCK_TYPE_2 + pad_type : type of padding: WC_RSA_PKCSV15_PAD, WC_RSA_OAEP_PAD, + WC_RSA_NO_PAD or WC_RSA_PSS_PAD + hash : type of hash algorithm to use found in wolfssl/wolfcrypt/hash.h + mgf : type of mask generation function to use + label : optional label + labelSz : size of optional label buffer + saltLen : Length of salt used in PSS + rng : random number generator */ +static int RsaPublicEncryptEx(const byte* in, word32 inLen, byte* out, + word32 outLen, RsaKey* key, int rsa_type, + byte pad_value, int pad_type, + enum wc_HashType hash, int mgf, + byte* label, word32 labelSz, int saltLen, + WC_RNG* rng) +{ + int ret, sz; + + if (in == NULL || inLen == 0 || out == NULL || key == NULL) { + return BAD_FUNC_ARG; + } + + sz = wc_RsaEncryptSize(key); + if (sz > (int)outLen) { + return RSA_BUFFER_E; + } + + if (sz < RSA_MIN_PAD_SZ) { + return WC_KEY_SIZE_E; + } + + if (inLen > (word32)(sz - RSA_MIN_PAD_SZ)) { +#ifdef WC_RSA_NO_PADDING + /* In the case that no padding is used the input length can and should + * be the same size as the RSA key. */ + if (pad_type != WC_RSA_NO_PAD) +#endif + return RSA_BUFFER_E; + } + + switch (key->state) { + case RSA_STATE_NONE: + case RSA_STATE_ENCRYPT_PAD: + #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA) && \ + defined(HAVE_CAVIUM) + if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA && + pad_type != WC_RSA_PSS_PAD && key->n.raw.buf) { + /* Async operations that include padding */ + if (rsa_type == RSA_PUBLIC_ENCRYPT && + pad_value == RSA_BLOCK_TYPE_2) { + key->state = RSA_STATE_ENCRYPT_RES; + key->dataLen = key->n.raw.len; + return NitroxRsaPublicEncrypt(in, inLen, out, outLen, key); + } + else if (rsa_type == RSA_PRIVATE_ENCRYPT && + pad_value == RSA_BLOCK_TYPE_1) { + key->state = RSA_STATE_ENCRYPT_RES; + key->dataLen = key->n.raw.len; + return NitroxRsaSSL_Sign(in, inLen, out, outLen, key); + } + } + #elif defined(WOLFSSL_CRYPTOCELL) + if (rsa_type == RSA_PUBLIC_ENCRYPT && + pad_value == RSA_BLOCK_TYPE_2) { + + return cc310_RsaPublicEncrypt(in, inLen, out, outLen, key); + } + else if (rsa_type == RSA_PRIVATE_ENCRYPT && + pad_value == RSA_BLOCK_TYPE_1) { + return cc310_RsaSSL_Sign(in, inLen, out, outLen, key, + cc310_hashModeRSA(hash, 0)); + } + #endif /* WOLFSSL_CRYPTOCELL */ + + key->state = RSA_STATE_ENCRYPT_PAD; + ret = wc_RsaPad_ex(in, inLen, out, sz, pad_value, rng, pad_type, hash, + mgf, label, labelSz, saltLen, mp_count_bits(&key->n), + key->heap); + if (ret < 0) { + break; + } + + key->state = RSA_STATE_ENCRYPT_EXPTMOD; + FALL_THROUGH; + + case RSA_STATE_ENCRYPT_EXPTMOD: + + key->dataLen = outLen; + ret = wc_RsaFunction(out, sz, out, &key->dataLen, rsa_type, key, rng); + + if (ret >= 0 || ret == WC_PENDING_E) { + key->state = RSA_STATE_ENCRYPT_RES; + } + if (ret < 0) { + break; + } + + FALL_THROUGH; + + case RSA_STATE_ENCRYPT_RES: + ret = key->dataLen; + break; + + default: + ret = BAD_STATE_E; + break; + } + + /* if async pending then return and skip done cleanup below */ + if (ret == WC_PENDING_E + #ifdef WC_RSA_NONBLOCK + || ret == FP_WOULDBLOCK + #endif + ) { + return ret; + } + + key->state = RSA_STATE_NONE; + wc_RsaCleanup(key); + + return ret; +} + +#endif + +/* Gives the option of choosing padding type + in : input to be decrypted + inLen: length of input buffer + out: decrypted message + outLen: length of decrypted message in bytes + outPtr: optional inline output pointer (if provided doing inline) + key : wolfSSL initialized RSA key struct + rsa_type : type of RSA: RSA_PUBLIC_ENCRYPT, RSA_PUBLIC_DECRYPT, + RSA_PRIVATE_ENCRYPT or RSA_PRIVATE_DECRYPT + pad_value: RSA_BLOCK_TYPE_1 or RSA_BLOCK_TYPE_2 + pad_type : type of padding: WC_RSA_PKCSV15_PAD, WC_RSA_OAEP_PAD, + WC_RSA_NO_PAD, WC_RSA_PSS_PAD + hash : type of hash algorithm to use found in wolfssl/wolfcrypt/hash.h + mgf : type of mask generation function to use + label : optional label + labelSz : size of optional label buffer + saltLen : Length of salt used in PSS + rng : random number generator */ +static int RsaPrivateDecryptEx(byte* in, word32 inLen, byte* out, + word32 outLen, byte** outPtr, RsaKey* key, + int rsa_type, byte pad_value, int pad_type, + enum wc_HashType hash, int mgf, + byte* label, word32 labelSz, int saltLen, + WC_RNG* rng) +{ + int ret = RSA_WRONG_TYPE_E; + byte* pad = NULL; + + if (in == NULL || inLen == 0 || out == NULL || key == NULL) { + return BAD_FUNC_ARG; + } + + switch (key->state) { + case RSA_STATE_NONE: + key->dataLen = inLen; + + #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA) && \ + defined(HAVE_CAVIUM) + /* Async operations that include padding */ + if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA && + pad_type != WC_RSA_PSS_PAD) { +#ifndef WOLFSSL_RSA_PUBLIC_ONLY + if (rsa_type == RSA_PRIVATE_DECRYPT && + pad_value == RSA_BLOCK_TYPE_2) { + key->state = RSA_STATE_DECRYPT_RES; + key->data = NULL; + return NitroxRsaPrivateDecrypt(in, inLen, out, &key->dataLen, + key); +#endif + } + else if (rsa_type == RSA_PUBLIC_DECRYPT && + pad_value == RSA_BLOCK_TYPE_1) { + key->state = RSA_STATE_DECRYPT_RES; + key->data = NULL; + return NitroxRsaSSL_Verify(in, inLen, out, &key->dataLen, key); + } + } + #elif defined(WOLFSSL_CRYPTOCELL) + if (rsa_type == RSA_PRIVATE_DECRYPT && + pad_value == RSA_BLOCK_TYPE_2) { + ret = cc310_RsaPublicDecrypt(in, inLen, out, outLen, key); + if (outPtr != NULL) + *outPtr = out; /* for inline */ + return ret; + } + else if (rsa_type == RSA_PUBLIC_DECRYPT && + pad_value == RSA_BLOCK_TYPE_1) { + return cc310_RsaSSL_Verify(in, inLen, out, key, + cc310_hashModeRSA(hash, 0)); + } + #endif /* WOLFSSL_CRYPTOCELL */ + + +#if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WOLFSSL_RSA_VERIFY_INLINE) + /* verify the tmp ptr is NULL, otherwise indicates bad state */ + if (key->data != NULL) { + ret = BAD_STATE_E; + break; + } + + /* if not doing this inline then allocate a buffer for it */ + if (outPtr == NULL) { + key->data = (byte*)XMALLOC(inLen, key->heap, + DYNAMIC_TYPE_WOLF_BIGINT); + key->dataIsAlloc = 1; + if (key->data == NULL) { + ret = MEMORY_E; + break; + } + XMEMCPY(key->data, in, inLen); + } + else { + key->data = out; + } +#endif + + key->state = RSA_STATE_DECRYPT_EXPTMOD; + FALL_THROUGH; + + case RSA_STATE_DECRYPT_EXPTMOD: +#if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WOLFSSL_RSA_VERIFY_INLINE) + ret = wc_RsaFunction(key->data, inLen, key->data, &key->dataLen, + rsa_type, key, rng); +#else + ret = wc_RsaFunction(in, inLen, out, &key->dataLen, rsa_type, key, rng); +#endif + + if (ret >= 0 || ret == WC_PENDING_E) { + key->state = RSA_STATE_DECRYPT_UNPAD; + } + if (ret < 0) { + break; + } + + FALL_THROUGH; + + case RSA_STATE_DECRYPT_UNPAD: +#if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WOLFSSL_RSA_VERIFY_INLINE) + ret = wc_RsaUnPad_ex(key->data, key->dataLen, &pad, pad_value, pad_type, + hash, mgf, label, labelSz, saltLen, + mp_count_bits(&key->n), key->heap); +#else + ret = wc_RsaUnPad_ex(out, key->dataLen, &pad, pad_value, pad_type, hash, + mgf, label, labelSz, saltLen, + mp_count_bits(&key->n), key->heap); +#endif + if (rsa_type == RSA_PUBLIC_DECRYPT && ret > (int)outLen) + ret = RSA_BUFFER_E; + else if (ret >= 0 && pad != NULL) { +#if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WOLFSSL_RSA_VERIFY_INLINE) + signed char c; +#endif + + /* only copy output if not inline */ + if (outPtr == NULL) { +#if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WOLFSSL_RSA_VERIFY_INLINE) + if (rsa_type == RSA_PRIVATE_DECRYPT) { + word32 i, j; + int start = (int)((size_t)pad - (size_t)key->data); + + for (i = 0, j = 0; j < key->dataLen; j++) { + out[i] = key->data[j]; + c = ctMaskGTE(j, start); + c &= ctMaskLT(i, outLen); + /* 0 - no add, -1 add */ + i += (word32)((byte)(-c)); + } + } + else +#endif + { + XMEMCPY(out, pad, ret); + } + } + else + *outPtr = pad; + +#if !defined(WOLFSSL_RSA_VERIFY_ONLY) + ret = ctMaskSelInt(ctMaskLTE(ret, outLen), ret, RSA_BUFFER_E); + ret = ctMaskSelInt(ctMaskNotEq(ret, 0), ret, RSA_BUFFER_E); +#else + if (outLen < (word32)ret) + ret = RSA_BUFFER_E; +#endif + } + + key->state = RSA_STATE_DECRYPT_RES; + FALL_THROUGH; + + case RSA_STATE_DECRYPT_RES: + #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA) && \ + defined(HAVE_CAVIUM) + if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA && + pad_type != WC_RSA_PSS_PAD) { + if (ret > 0) { + /* convert result */ + byte* dataLen = (byte*)&key->dataLen; + ret = (dataLen[0] << 8) | (dataLen[1]); + + if (outPtr) + *outPtr = in; + } + } + #endif + break; + + default: + ret = BAD_STATE_E; + break; + } + + /* if async pending then return and skip done cleanup below */ + if (ret == WC_PENDING_E + #ifdef WC_RSA_NONBLOCK + || ret == FP_WOULDBLOCK + #endif + ) { + return ret; + } + + key->state = RSA_STATE_NONE; + wc_RsaCleanup(key); + + return ret; +} + + +#ifndef WOLFSSL_RSA_VERIFY_ONLY +/* Public RSA Functions */ +int wc_RsaPublicEncrypt(const byte* in, word32 inLen, byte* out, word32 outLen, + RsaKey* key, WC_RNG* rng) +{ + return RsaPublicEncryptEx(in, inLen, out, outLen, key, + RSA_PUBLIC_ENCRYPT, RSA_BLOCK_TYPE_2, WC_RSA_PKCSV15_PAD, + WC_HASH_TYPE_NONE, WC_MGF1NONE, NULL, 0, 0, rng); +} + + +#if !defined(WC_NO_RSA_OAEP) || defined(WC_RSA_NO_PADDING) +int wc_RsaPublicEncrypt_ex(const byte* in, word32 inLen, byte* out, + word32 outLen, RsaKey* key, WC_RNG* rng, int type, + enum wc_HashType hash, int mgf, byte* label, + word32 labelSz) +{ + return RsaPublicEncryptEx(in, inLen, out, outLen, key, RSA_PUBLIC_ENCRYPT, + RSA_BLOCK_TYPE_2, type, hash, mgf, label, labelSz, 0, rng); +} +#endif /* WC_NO_RSA_OAEP */ +#endif + + +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +int wc_RsaPrivateDecryptInline(byte* in, word32 inLen, byte** out, RsaKey* key) +{ + WC_RNG* rng; +#ifdef WC_RSA_BLINDING + rng = key->rng; +#else + rng = NULL; +#endif + return RsaPrivateDecryptEx(in, inLen, in, inLen, out, key, + RSA_PRIVATE_DECRYPT, RSA_BLOCK_TYPE_2, WC_RSA_PKCSV15_PAD, + WC_HASH_TYPE_NONE, WC_MGF1NONE, NULL, 0, 0, rng); +} + + +#ifndef WC_NO_RSA_OAEP +int wc_RsaPrivateDecryptInline_ex(byte* in, word32 inLen, byte** out, + RsaKey* key, int type, enum wc_HashType hash, + int mgf, byte* label, word32 labelSz) +{ + WC_RNG* rng; +#ifdef WC_RSA_BLINDING + rng = key->rng; +#else + rng = NULL; +#endif + return RsaPrivateDecryptEx(in, inLen, in, inLen, out, key, + RSA_PRIVATE_DECRYPT, RSA_BLOCK_TYPE_2, type, hash, + mgf, label, labelSz, 0, rng); +} +#endif /* WC_NO_RSA_OAEP */ + + +int wc_RsaPrivateDecrypt(const byte* in, word32 inLen, byte* out, + word32 outLen, RsaKey* key) +{ + WC_RNG* rng; +#ifdef WC_RSA_BLINDING + rng = key->rng; +#else + rng = NULL; +#endif + return RsaPrivateDecryptEx((byte*)in, inLen, out, outLen, NULL, key, + RSA_PRIVATE_DECRYPT, RSA_BLOCK_TYPE_2, WC_RSA_PKCSV15_PAD, + WC_HASH_TYPE_NONE, WC_MGF1NONE, NULL, 0, 0, rng); +} + +#if !defined(WC_NO_RSA_OAEP) || defined(WC_RSA_NO_PADDING) +int wc_RsaPrivateDecrypt_ex(const byte* in, word32 inLen, byte* out, + word32 outLen, RsaKey* key, int type, + enum wc_HashType hash, int mgf, byte* label, + word32 labelSz) +{ + WC_RNG* rng; +#ifdef WC_RSA_BLINDING + rng = key->rng; +#else + rng = NULL; +#endif + return RsaPrivateDecryptEx((byte*)in, inLen, out, outLen, NULL, key, + RSA_PRIVATE_DECRYPT, RSA_BLOCK_TYPE_2, type, hash, mgf, label, + labelSz, 0, rng); +} +#endif /* WC_NO_RSA_OAEP || WC_RSA_NO_PADDING */ +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ + +#if !defined(WOLFSSL_CRYPTOCELL) +int wc_RsaSSL_VerifyInline(byte* in, word32 inLen, byte** out, RsaKey* key) +{ + WC_RNG* rng; +#ifdef WC_RSA_BLINDING + rng = key->rng; +#else + rng = NULL; +#endif + return RsaPrivateDecryptEx(in, inLen, in, inLen, out, key, + RSA_PUBLIC_DECRYPT, RSA_BLOCK_TYPE_1, WC_RSA_PKCSV15_PAD, + WC_HASH_TYPE_NONE, WC_MGF1NONE, NULL, 0, 0, rng); +} +#endif + +#ifndef WOLFSSL_RSA_VERIFY_ONLY +int wc_RsaSSL_Verify(const byte* in, word32 inLen, byte* out, word32 outLen, + RsaKey* key) +{ + return wc_RsaSSL_Verify_ex(in, inLen, out, outLen, key , WC_RSA_PKCSV15_PAD); +} + +int wc_RsaSSL_Verify_ex(const byte* in, word32 inLen, byte* out, word32 outLen, + RsaKey* key, int pad_type) +{ + WC_RNG* rng; + + if (key == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef WC_RSA_BLINDING + rng = key->rng; +#else + rng = NULL; +#endif + + return RsaPrivateDecryptEx((byte*)in, inLen, out, outLen, NULL, key, + RSA_PUBLIC_DECRYPT, RSA_BLOCK_TYPE_1, pad_type, + WC_HASH_TYPE_NONE, WC_MGF1NONE, NULL, 0, 0, rng); +} +#endif + +#ifdef WC_RSA_PSS +/* Verify the message signed with RSA-PSS. + * The input buffer is reused for the output buffer. + * Salt length is equal to hash length. + * + * in Buffer holding encrypted data. + * inLen Length of data in buffer. + * out Pointer to address containing the PSS data. + * hash Hash algorithm. + * mgf Mask generation function. + * key Public RSA key. + * returns the length of the PSS data on success and negative indicates failure. + */ +int wc_RsaPSS_VerifyInline(byte* in, word32 inLen, byte** out, + enum wc_HashType hash, int mgf, RsaKey* key) +{ +#ifndef WOLFSSL_PSS_SALT_LEN_DISCOVER + return wc_RsaPSS_VerifyInline_ex(in, inLen, out, hash, mgf, + RSA_PSS_SALT_LEN_DEFAULT, key); +#else + return wc_RsaPSS_VerifyInline_ex(in, inLen, out, hash, mgf, + RSA_PSS_SALT_LEN_DISCOVER, key); +#endif +} + +/* Verify the message signed with RSA-PSS. + * The input buffer is reused for the output buffer. + * + * in Buffer holding encrypted data. + * inLen Length of data in buffer. + * out Pointer to address containing the PSS data. + * hash Hash algorithm. + * mgf Mask generation function. + * key Public RSA key. + * saltLen Length of salt used. RSA_PSS_SALT_LEN_DEFAULT (-1) indicates salt + * length is the same as the hash length. RSA_PSS_SALT_LEN_DISCOVER + * indicates salt length is determined from the data. + * returns the length of the PSS data on success and negative indicates failure. + */ +int wc_RsaPSS_VerifyInline_ex(byte* in, word32 inLen, byte** out, + enum wc_HashType hash, int mgf, int saltLen, + RsaKey* key) +{ + WC_RNG* rng; +#ifdef WC_RSA_BLINDING + rng = key->rng; +#else + rng = NULL; +#endif + return RsaPrivateDecryptEx(in, inLen, in, inLen, out, key, + RSA_PUBLIC_DECRYPT, RSA_BLOCK_TYPE_1, WC_RSA_PSS_PAD, + hash, mgf, NULL, 0, saltLen, rng); +} + +/* Verify the message signed with RSA-PSS. + * Salt length is equal to hash length. + * + * in Buffer holding encrypted data. + * inLen Length of data in buffer. + * out Pointer to address containing the PSS data. + * hash Hash algorithm. + * mgf Mask generation function. + * key Public RSA key. + * returns the length of the PSS data on success and negative indicates failure. + */ +int wc_RsaPSS_Verify(byte* in, word32 inLen, byte* out, word32 outLen, + enum wc_HashType hash, int mgf, RsaKey* key) +{ +#ifndef WOLFSSL_PSS_SALT_LEN_DISCOVER + return wc_RsaPSS_Verify_ex(in, inLen, out, outLen, hash, mgf, + RSA_PSS_SALT_LEN_DEFAULT, key); +#else + return wc_RsaPSS_Verify_ex(in, inLen, out, outLen, hash, mgf, + RSA_PSS_SALT_LEN_DISCOVER, key); +#endif +} + +/* Verify the message signed with RSA-PSS. + * + * in Buffer holding encrypted data. + * inLen Length of data in buffer. + * out Pointer to address containing the PSS data. + * hash Hash algorithm. + * mgf Mask generation function. + * key Public RSA key. + * saltLen Length of salt used. RSA_PSS_SALT_LEN_DEFAULT (-1) indicates salt + * length is the same as the hash length. RSA_PSS_SALT_LEN_DISCOVER + * indicates salt length is determined from the data. + * returns the length of the PSS data on success and negative indicates failure. + */ +int wc_RsaPSS_Verify_ex(byte* in, word32 inLen, byte* out, word32 outLen, + enum wc_HashType hash, int mgf, int saltLen, + RsaKey* key) +{ + WC_RNG* rng; +#ifdef WC_RSA_BLINDING + rng = key->rng; +#else + rng = NULL; +#endif + return RsaPrivateDecryptEx(in, inLen, out, outLen, NULL, key, + RSA_PUBLIC_DECRYPT, RSA_BLOCK_TYPE_1, WC_RSA_PSS_PAD, + hash, mgf, NULL, 0, saltLen, rng); +} + + +/* Checks the PSS data to ensure that the signature matches. + * Salt length is equal to hash length. + * + * in Hash of the data that is being verified. + * inSz Length of hash. + * sig Buffer holding PSS data. + * sigSz Size of PSS data. + * hashType Hash algorithm. + * returns BAD_PADDING_E when the PSS data is invalid, BAD_FUNC_ARG when + * NULL is passed in to in or sig or inSz is not the same as the hash + * algorithm length and 0 on success. + */ +int wc_RsaPSS_CheckPadding(const byte* in, word32 inSz, byte* sig, + word32 sigSz, enum wc_HashType hashType) +{ + return wc_RsaPSS_CheckPadding_ex(in, inSz, sig, sigSz, hashType, inSz, 0); +} + +/* Checks the PSS data to ensure that the signature matches. + * + * in Hash of the data that is being verified. + * inSz Length of hash. + * sig Buffer holding PSS data. + * sigSz Size of PSS data. + * hashType Hash algorithm. + * saltLen Length of salt used. RSA_PSS_SALT_LEN_DEFAULT (-1) indicates salt + * length is the same as the hash length. RSA_PSS_SALT_LEN_DISCOVER + * indicates salt length is determined from the data. + * returns BAD_PADDING_E when the PSS data is invalid, BAD_FUNC_ARG when + * NULL is passed in to in or sig or inSz is not the same as the hash + * algorithm length and 0 on success. + */ +int wc_RsaPSS_CheckPadding_ex(const byte* in, word32 inSz, byte* sig, + word32 sigSz, enum wc_HashType hashType, + int saltLen, int bits) +{ + int ret = 0; +#ifndef WOLFSSL_PSS_LONG_SALT + byte sigCheck[WC_MAX_DIGEST_SIZE*2 + RSA_PSS_PAD_SZ]; +#else + byte *sigCheck = NULL; +#endif + + (void)bits; + + if (in == NULL || sig == NULL || + inSz != (word32)wc_HashGetDigestSize(hashType)) { + ret = BAD_FUNC_ARG; + } + + if (ret == 0) { + if (saltLen == RSA_PSS_SALT_LEN_DEFAULT) { + saltLen = inSz; + #ifdef WOLFSSL_SHA512 + /* See FIPS 186-4 section 5.5 item (e). */ + if (bits == 1024 && inSz == WC_SHA512_DIGEST_SIZE) { + saltLen = RSA_PSS_SALT_MAX_SZ; + } + #endif + } +#ifndef WOLFSSL_PSS_LONG_SALT + else if ((word32)saltLen > inSz) { + ret = PSS_SALTLEN_E; + } +#endif +#ifndef WOLFSSL_PSS_SALT_LEN_DISCOVER + else if (saltLen < RSA_PSS_SALT_LEN_DEFAULT) { + ret = PSS_SALTLEN_E; + } +#else + else if (saltLen == RSA_PSS_SALT_LEN_DISCOVER) { + saltLen = sigSz - inSz; + if (saltLen < 0) { + ret = PSS_SALTLEN_E; + } + } + else if (saltLen < RSA_PSS_SALT_LEN_DISCOVER) { + ret = PSS_SALTLEN_E; + } +#endif + } + + /* Sig = Salt | Exp Hash */ + if (ret == 0) { + if (sigSz != inSz + saltLen) { + ret = PSS_SALTLEN_E; + } + } + +#ifdef WOLFSSL_PSS_LONG_SALT + if (ret == 0) { + sigCheck = (byte*)XMALLOC(RSA_PSS_PAD_SZ + inSz + saltLen, NULL, + DYNAMIC_TYPE_RSA_BUFFER); + if (sigCheck == NULL) { + ret = MEMORY_E; + } + } +#endif + + /* Exp Hash = HASH(8 * 0x00 | Message Hash | Salt) */ + if (ret == 0) { + XMEMSET(sigCheck, 0, RSA_PSS_PAD_SZ); + XMEMCPY(sigCheck + RSA_PSS_PAD_SZ, in, inSz); + XMEMCPY(sigCheck + RSA_PSS_PAD_SZ + inSz, sig, saltLen); + ret = wc_Hash(hashType, sigCheck, RSA_PSS_PAD_SZ + inSz + saltLen, + sigCheck, inSz); + } + if (ret == 0) { + if (XMEMCMP(sigCheck, sig + saltLen, inSz) != 0) { + WOLFSSL_MSG("RsaPSS_CheckPadding: Padding Error"); + ret = BAD_PADDING_E; + } + } + +#ifdef WOLFSSL_PSS_LONG_SALT + if (sigCheck != NULL) { + XFREE(sigCheck, NULL, DYNAMIC_TYPE_RSA_BUFFER); + } +#endif + return ret; +} + + +/* Verify the message signed with RSA-PSS. + * The input buffer is reused for the output buffer. + * Salt length is equal to hash length. + * + * in Buffer holding encrypted data. + * inLen Length of data in buffer. + * out Pointer to address containing the PSS data. + * digest Hash of the data that is being verified. + * digestLen Length of hash. + * hash Hash algorithm. + * mgf Mask generation function. + * key Public RSA key. + * returns the length of the PSS data on success and negative indicates failure. + */ +int wc_RsaPSS_VerifyCheckInline(byte* in, word32 inLen, byte** out, + const byte* digest, word32 digestLen, + enum wc_HashType hash, int mgf, RsaKey* key) +{ + int ret = 0, verify, saltLen, hLen, bits = 0; + + hLen = wc_HashGetDigestSize(hash); + if (hLen < 0) + return hLen; + if ((word32)hLen != digestLen) + return BAD_FUNC_ARG; + + saltLen = hLen; + #ifdef WOLFSSL_SHA512 + /* See FIPS 186-4 section 5.5 item (e). */ + bits = mp_count_bits(&key->n); + if (bits == 1024 && hLen == WC_SHA512_DIGEST_SIZE) + saltLen = RSA_PSS_SALT_MAX_SZ; + #endif + + verify = wc_RsaPSS_VerifyInline_ex(in, inLen, out, hash, mgf, saltLen, key); + if (verify > 0) + ret = wc_RsaPSS_CheckPadding_ex(digest, digestLen, *out, verify, + hash, saltLen, bits); + if (ret == 0) + ret = verify; + + return ret; +} + + +/* Verify the message signed with RSA-PSS. + * Salt length is equal to hash length. + * + * in Buffer holding encrypted data. + * inLen Length of data in buffer. + * out Pointer to address containing the PSS data. + * outLen Length of the output. + * digest Hash of the data that is being verified. + * digestLen Length of hash. + * hash Hash algorithm. + * mgf Mask generation function. + * key Public RSA key. + * returns the length of the PSS data on success and negative indicates failure. + */ +int wc_RsaPSS_VerifyCheck(byte* in, word32 inLen, byte* out, word32 outLen, + const byte* digest, word32 digestLen, + enum wc_HashType hash, int mgf, + RsaKey* key) +{ + int ret = 0, verify, saltLen, hLen, bits = 0; + + hLen = wc_HashGetDigestSize(hash); + if (hLen < 0) + return hLen; + if ((word32)hLen != digestLen) + return BAD_FUNC_ARG; + + saltLen = hLen; + #ifdef WOLFSSL_SHA512 + /* See FIPS 186-4 section 5.5 item (e). */ + bits = mp_count_bits(&key->n); + if (bits == 1024 && hLen == WC_SHA512_DIGEST_SIZE) + saltLen = RSA_PSS_SALT_MAX_SZ; + #endif + + verify = wc_RsaPSS_Verify_ex(in, inLen, out, outLen, hash, + mgf, saltLen, key); + if (verify > 0) + ret = wc_RsaPSS_CheckPadding_ex(digest, digestLen, out, verify, + hash, saltLen, bits); + if (ret == 0) + ret = verify; + + return ret; +} + +#endif + +#if !defined(WOLFSSL_RSA_PUBLIC_ONLY) && !defined(WOLFSSL_RSA_VERIFY_ONLY) +int wc_RsaSSL_Sign(const byte* in, word32 inLen, byte* out, word32 outLen, + RsaKey* key, WC_RNG* rng) +{ + return RsaPublicEncryptEx(in, inLen, out, outLen, key, + RSA_PRIVATE_ENCRYPT, RSA_BLOCK_TYPE_1, WC_RSA_PKCSV15_PAD, + WC_HASH_TYPE_NONE, WC_MGF1NONE, NULL, 0, 0, rng); +} + +#ifdef WC_RSA_PSS +/* Sign the hash of a message using RSA-PSS. + * Salt length is equal to hash length. + * + * in Buffer holding hash of message. + * inLen Length of data in buffer (hash length). + * out Buffer to write encrypted signature into. + * outLen Size of buffer to write to. + * hash Hash algorithm. + * mgf Mask generation function. + * key Public RSA key. + * rng Random number generator. + * returns the length of the encrypted signature on success, a negative value + * indicates failure. + */ +int wc_RsaPSS_Sign(const byte* in, word32 inLen, byte* out, word32 outLen, + enum wc_HashType hash, int mgf, RsaKey* key, WC_RNG* rng) +{ + return wc_RsaPSS_Sign_ex(in, inLen, out, outLen, hash, mgf, + RSA_PSS_SALT_LEN_DEFAULT, key, rng); +} + +/* Sign the hash of a message using RSA-PSS. + * + * in Buffer holding hash of message. + * inLen Length of data in buffer (hash length). + * out Buffer to write encrypted signature into. + * outLen Size of buffer to write to. + * hash Hash algorithm. + * mgf Mask generation function. + * saltLen Length of salt used. RSA_PSS_SALT_LEN_DEFAULT (-1) indicates salt + * length is the same as the hash length. RSA_PSS_SALT_LEN_DISCOVER + * indicates salt length is determined from the data. + * key Public RSA key. + * rng Random number generator. + * returns the length of the encrypted signature on success, a negative value + * indicates failure. + */ +int wc_RsaPSS_Sign_ex(const byte* in, word32 inLen, byte* out, word32 outLen, + enum wc_HashType hash, int mgf, int saltLen, RsaKey* key, + WC_RNG* rng) +{ + return RsaPublicEncryptEx(in, inLen, out, outLen, key, + RSA_PRIVATE_ENCRYPT, RSA_BLOCK_TYPE_1, WC_RSA_PSS_PAD, + hash, mgf, NULL, 0, saltLen, rng); +} +#endif +#endif + +#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || !defined(WOLFSSL_SP_MATH) || \ + defined(WC_RSA_PSS) +int wc_RsaEncryptSize(RsaKey* key) +{ + int ret; + + if (key == NULL) { + return BAD_FUNC_ARG; + } + + ret = mp_unsigned_bin_size(&key->n); + +#ifdef WOLF_CRYPTO_CB + if (ret == 0 && key->devId != INVALID_DEVID) { + ret = 2048/8; /* hardware handles, use 2048-bit as default */ + } +#endif + + return ret; +} +#endif + +#ifndef WOLFSSL_RSA_VERIFY_ONLY +/* flatten RsaKey structure into individual elements (e, n) */ +int wc_RsaFlattenPublicKey(RsaKey* key, byte* e, word32* eSz, byte* n, + word32* nSz) +{ + int sz, ret; + + if (key == NULL || e == NULL || eSz == NULL || n == NULL || nSz == NULL) { + return BAD_FUNC_ARG; + } + + sz = mp_unsigned_bin_size(&key->e); + if ((word32)sz > *eSz) + return RSA_BUFFER_E; + ret = mp_to_unsigned_bin(&key->e, e); + if (ret != MP_OKAY) + return ret; + *eSz = (word32)sz; + + sz = wc_RsaEncryptSize(key); + if ((word32)sz > *nSz) + return RSA_BUFFER_E; + ret = mp_to_unsigned_bin(&key->n, n); + if (ret != MP_OKAY) + return ret; + *nSz = (word32)sz; + + return 0; +} +#endif + +#endif /* HAVE_FIPS */ + + +#ifndef WOLFSSL_RSA_VERIFY_ONLY +static int RsaGetValue(mp_int* in, byte* out, word32* outSz) +{ + word32 sz; + int ret = 0; + + /* Parameters ensured by calling function. */ + + sz = (word32)mp_unsigned_bin_size(in); + if (sz > *outSz) + ret = RSA_BUFFER_E; + + if (ret == 0) + ret = mp_to_unsigned_bin(in, out); + + if (ret == MP_OKAY) + *outSz = sz; + + return ret; +} + + +int wc_RsaExportKey(RsaKey* key, + byte* e, word32* eSz, byte* n, word32* nSz, + byte* d, word32* dSz, byte* p, word32* pSz, + byte* q, word32* qSz) +{ + int ret = BAD_FUNC_ARG; + + if (key && e && eSz && n && nSz && d && dSz && p && pSz && q && qSz) + ret = 0; + + if (ret == 0) + ret = RsaGetValue(&key->e, e, eSz); + if (ret == 0) + ret = RsaGetValue(&key->n, n, nSz); +#ifndef WOLFSSL_RSA_PUBLIC_ONLY + if (ret == 0) + ret = RsaGetValue(&key->d, d, dSz); + if (ret == 0) + ret = RsaGetValue(&key->p, p, pSz); + if (ret == 0) + ret = RsaGetValue(&key->q, q, qSz); +#else + /* no private parts to key */ + if (d == NULL || p == NULL || q == NULL || dSz == NULL || pSz == NULL + || qSz == NULL) { + ret = BAD_FUNC_ARG; + } + else { + *dSz = 0; + *pSz = 0; + *qSz = 0; + } +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ + + return ret; +} +#endif + + +#ifdef WOLFSSL_KEY_GEN + +/* Check that |p-q| > 2^((size/2)-100) */ +static int wc_CompareDiffPQ(mp_int* p, mp_int* q, int size) +{ + mp_int c, d; + int ret; + + if (p == NULL || q == NULL) + return BAD_FUNC_ARG; + + ret = mp_init_multi(&c, &d, NULL, NULL, NULL, NULL); + + /* c = 2^((size/2)-100) */ + if (ret == 0) + ret = mp_2expt(&c, (size/2)-100); + + /* d = |p-q| */ + if (ret == 0) + ret = mp_sub(p, q, &d); + + if (ret == 0) + ret = mp_abs(&d, &d); + + /* compare */ + if (ret == 0) + ret = mp_cmp(&d, &c); + + if (ret == MP_GT) + ret = MP_OKAY; + + mp_clear(&d); + mp_clear(&c); + + return ret; +} + + +/* The lower_bound value is floor(2^(0.5) * 2^((nlen/2)-1)) where nlen is 4096. + * This number was calculated using a small test tool written with a common + * large number math library. Other values of nlen may be checked with a subset + * of lower_bound. */ +static const byte lower_bound[] = { + 0xB5, 0x04, 0xF3, 0x33, 0xF9, 0xDE, 0x64, 0x84, + 0x59, 0x7D, 0x89, 0xB3, 0x75, 0x4A, 0xBE, 0x9F, + 0x1D, 0x6F, 0x60, 0xBA, 0x89, 0x3B, 0xA8, 0x4C, + 0xED, 0x17, 0xAC, 0x85, 0x83, 0x33, 0x99, 0x15, +/* 512 */ + 0x4A, 0xFC, 0x83, 0x04, 0x3A, 0xB8, 0xA2, 0xC3, + 0xA8, 0xB1, 0xFE, 0x6F, 0xDC, 0x83, 0xDB, 0x39, + 0x0F, 0x74, 0xA8, 0x5E, 0x43, 0x9C, 0x7B, 0x4A, + 0x78, 0x04, 0x87, 0x36, 0x3D, 0xFA, 0x27, 0x68, +/* 1024 */ + 0xD2, 0x20, 0x2E, 0x87, 0x42, 0xAF, 0x1F, 0x4E, + 0x53, 0x05, 0x9C, 0x60, 0x11, 0xBC, 0x33, 0x7B, + 0xCA, 0xB1, 0xBC, 0x91, 0x16, 0x88, 0x45, 0x8A, + 0x46, 0x0A, 0xBC, 0x72, 0x2F, 0x7C, 0x4E, 0x33, + 0xC6, 0xD5, 0xA8, 0xA3, 0x8B, 0xB7, 0xE9, 0xDC, + 0xCB, 0x2A, 0x63, 0x43, 0x31, 0xF3, 0xC8, 0x4D, + 0xF5, 0x2F, 0x12, 0x0F, 0x83, 0x6E, 0x58, 0x2E, + 0xEA, 0xA4, 0xA0, 0x89, 0x90, 0x40, 0xCA, 0x4A, +/* 2048 */ + 0x81, 0x39, 0x4A, 0xB6, 0xD8, 0xFD, 0x0E, 0xFD, + 0xF4, 0xD3, 0xA0, 0x2C, 0xEB, 0xC9, 0x3E, 0x0C, + 0x42, 0x64, 0xDA, 0xBC, 0xD5, 0x28, 0xB6, 0x51, + 0xB8, 0xCF, 0x34, 0x1B, 0x6F, 0x82, 0x36, 0xC7, + 0x01, 0x04, 0xDC, 0x01, 0xFE, 0x32, 0x35, 0x2F, + 0x33, 0x2A, 0x5E, 0x9F, 0x7B, 0xDA, 0x1E, 0xBF, + 0xF6, 0xA1, 0xBE, 0x3F, 0xCA, 0x22, 0x13, 0x07, + 0xDE, 0xA0, 0x62, 0x41, 0xF7, 0xAA, 0x81, 0xC2, +/* 3072 */ + 0xC1, 0xFC, 0xBD, 0xDE, 0xA2, 0xF7, 0xDC, 0x33, + 0x18, 0x83, 0x8A, 0x2E, 0xAF, 0xF5, 0xF3, 0xB2, + 0xD2, 0x4F, 0x4A, 0x76, 0x3F, 0xAC, 0xB8, 0x82, + 0xFD, 0xFE, 0x17, 0x0F, 0xD3, 0xB1, 0xF7, 0x80, + 0xF9, 0xAC, 0xCE, 0x41, 0x79, 0x7F, 0x28, 0x05, + 0xC2, 0x46, 0x78, 0x5E, 0x92, 0x95, 0x70, 0x23, + 0x5F, 0xCF, 0x8F, 0x7B, 0xCA, 0x3E, 0xA3, 0x3B, + 0x4D, 0x7C, 0x60, 0xA5, 0xE6, 0x33, 0xE3, 0xE1 +/* 4096 */ +}; + + +/* returns 1 on key size ok and 0 if not ok */ +static WC_INLINE int RsaSizeCheck(int size) +{ + if (size < RSA_MIN_SIZE || size > RSA_MAX_SIZE) { + return 0; + } + +#ifdef HAVE_FIPS + /* Key size requirements for CAVP */ + switch (size) { + case 1024: + case 2048: + case 3072: + case 4096: + return 1; + } + + return 0; +#else + return 1; /* allow unusual key sizes in non FIPS mode */ +#endif /* HAVE_FIPS */ +} + + +static int _CheckProbablePrime(mp_int* p, mp_int* q, mp_int* e, int nlen, + int* isPrime, WC_RNG* rng) +{ + int ret; + mp_int tmp1, tmp2; + mp_int* prime; + + if (p == NULL || e == NULL || isPrime == NULL) + return BAD_FUNC_ARG; + + if (!RsaSizeCheck(nlen)) + return BAD_FUNC_ARG; + + *isPrime = MP_NO; + + if (q != NULL) { + /* 5.4 - check that |p-q| <= (2^(1/2))(2^((nlen/2)-1)) */ + ret = wc_CompareDiffPQ(p, q, nlen); + if (ret != MP_OKAY) goto notOkay; + prime = q; + } + else + prime = p; + + ret = mp_init_multi(&tmp1, &tmp2, NULL, NULL, NULL, NULL); + if (ret != MP_OKAY) goto notOkay; + + /* 4.4,5.5 - Check that prime >= (2^(1/2))(2^((nlen/2)-1)) + * This is a comparison against lowerBound */ + ret = mp_read_unsigned_bin(&tmp1, lower_bound, nlen/16); + if (ret != MP_OKAY) goto notOkay; + ret = mp_cmp(prime, &tmp1); + if (ret == MP_LT) goto exit; + + /* 4.5,5.6 - Check that GCD(p-1, e) == 1 */ + ret = mp_sub_d(prime, 1, &tmp1); /* tmp1 = prime-1 */ + if (ret != MP_OKAY) goto notOkay; + ret = mp_gcd(&tmp1, e, &tmp2); /* tmp2 = gcd(prime-1, e) */ + if (ret != MP_OKAY) goto notOkay; + ret = mp_cmp_d(&tmp2, 1); + if (ret != MP_EQ) goto exit; /* e divides p-1 */ + + /* 4.5.1,5.6.1 - Check primality of p with 8 rounds of M-R. + * mp_prime_is_prime_ex() performs test divisions against the first 256 + * prime numbers. After that it performs 8 rounds of M-R using random + * bases between 2 and n-2. + * mp_prime_is_prime() performs the same test divisions and then does + * M-R with the first 8 primes. Both functions set isPrime as a + * side-effect. */ + if (rng != NULL) + ret = mp_prime_is_prime_ex(prime, 8, isPrime, rng); + else + ret = mp_prime_is_prime(prime, 8, isPrime); + if (ret != MP_OKAY) goto notOkay; + +exit: + ret = MP_OKAY; +notOkay: + mp_clear(&tmp1); + mp_clear(&tmp2); + return ret; +} + + +int wc_CheckProbablePrime_ex(const byte* pRaw, word32 pRawSz, + const byte* qRaw, word32 qRawSz, + const byte* eRaw, word32 eRawSz, + int nlen, int* isPrime, WC_RNG* rng) +{ + mp_int p, q, e; + mp_int* Q = NULL; + int ret; + + if (pRaw == NULL || pRawSz == 0 || + eRaw == NULL || eRawSz == 0 || + isPrime == NULL) { + + return BAD_FUNC_ARG; + } + + if ((qRaw != NULL && qRawSz == 0) || (qRaw == NULL && qRawSz != 0)) + return BAD_FUNC_ARG; + + ret = mp_init_multi(&p, &q, &e, NULL, NULL, NULL); + + if (ret == MP_OKAY) + ret = mp_read_unsigned_bin(&p, pRaw, pRawSz); + + if (ret == MP_OKAY) { + if (qRaw != NULL) { + ret = mp_read_unsigned_bin(&q, qRaw, qRawSz); + if (ret == MP_OKAY) + Q = &q; + } + } + + if (ret == MP_OKAY) + ret = mp_read_unsigned_bin(&e, eRaw, eRawSz); + + if (ret == MP_OKAY) + ret = _CheckProbablePrime(&p, Q, &e, nlen, isPrime, rng); + + ret = (ret == MP_OKAY) ? 0 : PRIME_GEN_E; + + mp_clear(&p); + mp_clear(&q); + mp_clear(&e); + + return ret; +} + + +int wc_CheckProbablePrime(const byte* pRaw, word32 pRawSz, + const byte* qRaw, word32 qRawSz, + const byte* eRaw, word32 eRawSz, + int nlen, int* isPrime) +{ + return wc_CheckProbablePrime_ex(pRaw, pRawSz, qRaw, qRawSz, + eRaw, eRawSz, nlen, isPrime, NULL); +} + +#if !defined(HAVE_FIPS) || (defined(HAVE_FIPS) && \ + defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)) +/* Make an RSA key for size bits, with e specified, 65537 is a good e */ +int wc_MakeRsaKey(RsaKey* key, int size, long e, WC_RNG* rng) +{ +#ifndef WC_NO_RNG + mp_int p, q, tmp1, tmp2, tmp3; + int err, i, failCount, primeSz, isPrime = 0; + byte* buf = NULL; + + if (key == NULL || rng == NULL) + return BAD_FUNC_ARG; + + if (!RsaSizeCheck(size)) + return BAD_FUNC_ARG; + + if (e < 3 || (e & 1) == 0) + return BAD_FUNC_ARG; + +#if defined(WOLFSSL_CRYPTOCELL) + + return cc310_RSA_GenerateKeyPair(key, size, e); + +#endif /*WOLFSSL_CRYPTOCELL*/ + +#ifdef WOLF_CRYPTO_CB + if (key->devId != INVALID_DEVID) { + int ret = wc_CryptoCb_MakeRsaKey(key, size, e, rng); + if (ret != CRYPTOCB_UNAVAILABLE) + return ret; + /* fall-through when unavailable */ + } +#endif + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA) && \ + defined(WC_ASYNC_ENABLE_RSA_KEYGEN) + if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA) { + #ifdef HAVE_CAVIUM + /* TODO: Not implemented */ + #elif defined(HAVE_INTEL_QA) + return IntelQaRsaKeyGen(&key->asyncDev, key, size, e, rng); + #else + if (wc_AsyncTestInit(&key->asyncDev, ASYNC_TEST_RSA_MAKE)) { + WC_ASYNC_TEST* testDev = &key->asyncDev.test; + testDev->rsaMake.rng = rng; + testDev->rsaMake.key = key; + testDev->rsaMake.size = size; + testDev->rsaMake.e = e; + return WC_PENDING_E; + } + #endif + } +#endif + + err = mp_init_multi(&p, &q, &tmp1, &tmp2, &tmp3, NULL); + + if (err == MP_OKAY) + err = mp_set_int(&tmp3, e); + + /* The failCount value comes from NIST FIPS 186-4, section B.3.3, + * process steps 4.7 and 5.8. */ + failCount = 5 * (size / 2); + primeSz = size / 16; /* size is the size of n in bits. + primeSz is in bytes. */ + + /* allocate buffer to work with */ + if (err == MP_OKAY) { + buf = (byte*)XMALLOC(primeSz, key->heap, DYNAMIC_TYPE_RSA); + if (buf == NULL) + err = MEMORY_E; + } + + /* make p */ + if (err == MP_OKAY) { + isPrime = 0; + i = 0; + do { +#ifdef SHOW_GEN + printf("."); + fflush(stdout); +#endif + /* generate value */ + err = wc_RNG_GenerateBlock(rng, buf, primeSz); + if (err == 0) { + /* prime lower bound has the MSB set, set it in candidate */ + buf[0] |= 0x80; + /* make candidate odd */ + buf[primeSz-1] |= 0x01; + /* load value */ + err = mp_read_unsigned_bin(&p, buf, primeSz); + } + + if (err == MP_OKAY) + err = _CheckProbablePrime(&p, NULL, &tmp3, size, &isPrime, rng); + +#ifdef HAVE_FIPS + i++; +#else + /* Keep the old retry behavior in non-FIPS build. */ + (void)i; +#endif + } while (err == MP_OKAY && !isPrime && i < failCount); + } + + if (err == MP_OKAY && !isPrime) + err = PRIME_GEN_E; + + /* make q */ + if (err == MP_OKAY) { + isPrime = 0; + i = 0; + do { +#ifdef SHOW_GEN + printf("."); + fflush(stdout); +#endif + /* generate value */ + err = wc_RNG_GenerateBlock(rng, buf, primeSz); + if (err == 0) { + /* prime lower bound has the MSB set, set it in candidate */ + buf[0] |= 0x80; + /* make candidate odd */ + buf[primeSz-1] |= 0x01; + /* load value */ + err = mp_read_unsigned_bin(&q, buf, primeSz); + } + + if (err == MP_OKAY) + err = _CheckProbablePrime(&p, &q, &tmp3, size, &isPrime, rng); + +#ifdef HAVE_FIPS + i++; +#else + /* Keep the old retry behavior in non-FIPS build. */ + (void)i; +#endif + } while (err == MP_OKAY && !isPrime && i < failCount); + } + + if (err == MP_OKAY && !isPrime) + err = PRIME_GEN_E; + + if (buf) { + ForceZero(buf, primeSz); + XFREE(buf, key->heap, DYNAMIC_TYPE_RSA); + } + + if (err == MP_OKAY && mp_cmp(&p, &q) < 0) { + err = mp_copy(&p, &tmp1); + if (err == MP_OKAY) + err = mp_copy(&q, &p); + if (err == MP_OKAY) + mp_copy(&tmp1, &q); + } + + /* Setup RsaKey buffers */ + if (err == MP_OKAY) + err = mp_init_multi(&key->n, &key->e, &key->d, &key->p, &key->q, NULL); + if (err == MP_OKAY) + err = mp_init_multi(&key->dP, &key->dQ, &key->u, NULL, NULL, NULL); + + /* Software Key Calculation */ + if (err == MP_OKAY) /* tmp1 = p-1 */ + err = mp_sub_d(&p, 1, &tmp1); + if (err == MP_OKAY) /* tmp2 = q-1 */ + err = mp_sub_d(&q, 1, &tmp2); +#ifdef WC_RSA_BLINDING + if (err == MP_OKAY) /* tmp3 = order of n */ + err = mp_mul(&tmp1, &tmp2, &tmp3); +#else + if (err == MP_OKAY) /* tmp3 = lcm(p-1, q-1), last loop */ + err = mp_lcm(&tmp1, &tmp2, &tmp3); +#endif + /* make key */ + if (err == MP_OKAY) /* key->e = e */ + err = mp_set_int(&key->e, (mp_digit)e); +#ifdef WC_RSA_BLINDING + /* Blind the inverse operation with a value that is invertable */ + if (err == MP_OKAY) { + do { + err = mp_rand(&key->p, get_digit_count(&tmp3), rng); + if (err == MP_OKAY) + err = mp_set_bit(&key->p, 0); + if (err == MP_OKAY) + err = mp_set_bit(&key->p, size - 1); + if (err == MP_OKAY) + err = mp_gcd(&key->p, &tmp3, &key->q); + } + while ((err == MP_OKAY) && !mp_isone(&key->q)); + } + if (err == MP_OKAY) + err = mp_mul_d(&key->p, (mp_digit)e, &key->e); +#endif + if (err == MP_OKAY) /* key->d = 1/e mod lcm(p-1, q-1) */ + err = mp_invmod(&key->e, &tmp3, &key->d); +#ifdef WC_RSA_BLINDING + /* Take off blinding from d and reset e */ + if (err == MP_OKAY) + err = mp_mulmod(&key->d, &key->p, &tmp3, &key->d); + if (err == MP_OKAY) + err = mp_set_int(&key->e, (mp_digit)e); +#endif + if (err == MP_OKAY) /* key->n = pq */ + err = mp_mul(&p, &q, &key->n); + if (err == MP_OKAY) /* key->dP = d mod(p-1) */ + err = mp_mod(&key->d, &tmp1, &key->dP); + if (err == MP_OKAY) /* key->dQ = d mod(q-1) */ + err = mp_mod(&key->d, &tmp2, &key->dQ); +#ifdef WOLFSSL_MP_INVMOD_CONSTANT_TIME + if (err == MP_OKAY) /* key->u = 1/q mod p */ + err = mp_invmod(&q, &p, &key->u); +#else + if (err == MP_OKAY) + err = mp_sub_d(&p, 2, &tmp3); + if (err == MP_OKAY) /* key->u = 1/q mod p = q^p-2 mod p */ + err = mp_exptmod(&q, &tmp3 , &p, &key->u); +#endif + if (err == MP_OKAY) + err = mp_copy(&p, &key->p); + if (err == MP_OKAY) + err = mp_copy(&q, &key->q); + +#ifdef HAVE_WOLF_BIGINT + /* make sure raw unsigned bin version is available */ + if (err == MP_OKAY) + err = wc_mp_to_bigint(&key->n, &key->n.raw); + if (err == MP_OKAY) + err = wc_mp_to_bigint(&key->e, &key->e.raw); + if (err == MP_OKAY) + err = wc_mp_to_bigint(&key->d, &key->d.raw); + if (err == MP_OKAY) + err = wc_mp_to_bigint(&key->p, &key->p.raw); + if (err == MP_OKAY) + err = wc_mp_to_bigint(&key->q, &key->q.raw); + if (err == MP_OKAY) + err = wc_mp_to_bigint(&key->dP, &key->dP.raw); + if (err == MP_OKAY) + err = wc_mp_to_bigint(&key->dQ, &key->dQ.raw); + if (err == MP_OKAY) + err = wc_mp_to_bigint(&key->u, &key->u.raw); +#endif + + if (err == MP_OKAY) + key->type = RSA_PRIVATE; + + mp_clear(&tmp1); + mp_clear(&tmp2); + mp_clear(&tmp3); + mp_clear(&p); + mp_clear(&q); + +#if defined(WOLFSSL_KEY_GEN) && !defined(WOLFSSL_NO_RSA_KEY_CHECK) + /* Perform the pair-wise consistency test on the new key. */ + if (err == 0) + err = wc_CheckRsaKey(key); +#endif + + if (err != 0) { + wc_FreeRsaKey(key); + return err; + } + +#if defined(WOLFSSL_XILINX_CRYPT) || defined(WOLFSSL_CRYPTOCELL) + if (wc_InitRsaHw(key) != 0) { + return BAD_STATE_E; + } +#endif + return 0; +#else + return NOT_COMPILED_IN; +#endif +} +#endif /* !FIPS || FIPS_VER >= 2 */ +#endif /* WOLFSSL_KEY_GEN */ + + +#ifdef WC_RSA_BLINDING +int wc_RsaSetRNG(RsaKey* key, WC_RNG* rng) +{ + if (key == NULL) + return BAD_FUNC_ARG; + + key->rng = rng; + + return 0; +} +#endif /* WC_RSA_BLINDING */ + +#ifdef WC_RSA_NONBLOCK +int wc_RsaSetNonBlock(RsaKey* key, RsaNb* nb) +{ + if (key == NULL) + return BAD_FUNC_ARG; + + if (nb) { + XMEMSET(nb, 0, sizeof(RsaNb)); + } + + /* Allow nb == NULL to clear non-block mode */ + key->nb = nb; + + return 0; +} +#ifdef WC_RSA_NONBLOCK_TIME +int wc_RsaSetNonBlockTime(RsaKey* key, word32 maxBlockUs, word32 cpuMHz) +{ + if (key == NULL || key->nb == NULL) { + return BAD_FUNC_ARG; + } + + /* calculate maximum number of instructions to block */ + key->nb->exptmod.maxBlockInst = cpuMHz * maxBlockUs; + + return 0; +} +#endif /* WC_RSA_NONBLOCK_TIME */ +#endif /* WC_RSA_NONBLOCK */ + +#endif /* NO_RSA */ diff --git a/client/wolfssl/wolfcrypt/src/selftest.c b/client/wolfssl/wolfcrypt/src/selftest.c new file mode 100644 index 0000000..e69de29 diff --git a/client/wolfssl/wolfcrypt/src/sha.c b/client/wolfssl/wolfcrypt/src/sha.c new file mode 100644 index 0000000..5c80563 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/sha.c @@ -0,0 +1,882 @@ +/* sha.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#if !defined(NO_SHA) + +#if defined(HAVE_FIPS) && \ + defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2) + + /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */ + #define FIPS_NO_WRAPPERS + + #ifdef USE_WINDOWS_API + #pragma code_seg(".fipsA$j") + #pragma const_seg(".fipsB$j") + #endif +#endif + +#include +#include +#include + +#ifdef WOLF_CRYPTO_CB + #include +#endif + +/* fips wrapper calls, user can call direct */ +#if defined(HAVE_FIPS) && \ + (!defined(HAVE_FIPS_VERSION) || (HAVE_FIPS_VERSION < 2)) + + int wc_InitSha(wc_Sha* sha) + { + if (sha == NULL) { + return BAD_FUNC_ARG; + } + return InitSha_fips(sha); + } + int wc_InitSha_ex(wc_Sha* sha, void* heap, int devId) + { + (void)heap; + (void)devId; + if (sha == NULL) { + return BAD_FUNC_ARG; + } + return InitSha_fips(sha); + } + + int wc_ShaUpdate(wc_Sha* sha, const byte* data, word32 len) + { + if (sha == NULL || (data == NULL && len > 0)) { + return BAD_FUNC_ARG; + } + return ShaUpdate_fips(sha, data, len); + } + + int wc_ShaFinal(wc_Sha* sha, byte* out) + { + if (sha == NULL || out == NULL) { + return BAD_FUNC_ARG; + } + return ShaFinal_fips(sha,out); + } + void wc_ShaFree(wc_Sha* sha) + { + (void)sha; + /* Not supported in FIPS */ + } + +#else /* else build without fips, or for FIPS v2 */ + + +#if defined(WOLFSSL_TI_HASH) + /* #include included by wc_port.c */ + +#else + +#include +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + + +/* Hardware Acceleration */ +#if defined(WOLFSSL_PIC32MZ_HASH) + #include + +#elif defined(STM32_HASH) + + /* Supports CubeMX HAL or Standard Peripheral Library */ + int wc_InitSha_ex(wc_Sha* sha, void* heap, int devId) + { + if (sha == NULL) { + return BAD_FUNC_ARG; + } + + (void)devId; + (void)heap; + + wc_Stm32_Hash_Init(&sha->stmCtx); + + return 0; + } + + int wc_ShaUpdate(wc_Sha* sha, const byte* data, word32 len) + { + int ret; + + if (sha == NULL || (data == NULL && len > 0)) { + return BAD_FUNC_ARG; + } + + ret = wolfSSL_CryptHwMutexLock(); + if (ret == 0) { + ret = wc_Stm32_Hash_Update(&sha->stmCtx, HASH_AlgoSelection_SHA1, + data, len); + wolfSSL_CryptHwMutexUnLock(); + } + return ret; + } + + int wc_ShaFinal(wc_Sha* sha, byte* hash) + { + int ret; + + if (sha == NULL || hash == NULL) { + return BAD_FUNC_ARG; + } + + ret = wolfSSL_CryptHwMutexLock(); + if (ret == 0) { + ret = wc_Stm32_Hash_Final(&sha->stmCtx, HASH_AlgoSelection_SHA1, + hash, WC_SHA_DIGEST_SIZE); + wolfSSL_CryptHwMutexUnLock(); + } + + (void)wc_InitSha(sha); /* reset state */ + + return ret; + } + + +#elif defined(FREESCALE_LTC_SHA) + + #include "fsl_ltc.h" + int wc_InitSha_ex(wc_Sha* sha, void* heap, int devId) + { + if (sha == NULL) { + return BAD_FUNC_ARG; + } + + (void)devId; + (void)heap; + + LTC_HASH_Init(LTC_BASE, &sha->ctx, kLTC_Sha1, NULL, 0); + return 0; + } + + int wc_ShaUpdate(wc_Sha* sha, const byte* data, word32 len) + { + LTC_HASH_Update(&sha->ctx, data, len); + return 0; + } + + int wc_ShaFinal(wc_Sha* sha, byte* hash) + { + uint32_t hashlen = WC_SHA_DIGEST_SIZE; + LTC_HASH_Finish(&sha->ctx, hash, &hashlen); + return wc_InitSha(sha); /* reset state */ + } + + +#elif defined(FREESCALE_MMCAU_SHA) + + #ifdef FREESCALE_MMCAU_CLASSIC_SHA + #include "cau_api.h" + #else + #include "fsl_mmcau.h" + #endif + + #define USE_SHA_SOFTWARE_IMPL /* Only for API's, actual transform is here */ + + #define XTRANSFORM(S,B) Transform((S),(B)) + #define XTRANSFORM_LEN(S,B,L) Transform_Len((S),(B),(L)) + + #ifndef WC_HASH_DATA_ALIGNMENT + /* these hardware API's require 4 byte (word32) alignment */ + #define WC_HASH_DATA_ALIGNMENT 4 + #endif + + static int InitSha(wc_Sha* sha) + { + int ret = 0; + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + #ifdef FREESCALE_MMCAU_CLASSIC_SHA + cau_sha1_initialize_output(sha->digest); + #else + MMCAU_SHA1_InitializeOutput((uint32_t*)sha->digest); + #endif + wolfSSL_CryptHwMutexUnLock(); + + sha->buffLen = 0; + sha->loLen = 0; + sha->hiLen = 0; + + return ret; + } + + static int Transform(wc_Sha* sha, const byte* data) + { + int ret = wolfSSL_CryptHwMutexLock(); + if (ret == 0) { + #ifdef FREESCALE_MMCAU_CLASSIC_SHA + cau_sha1_hash_n((byte*)data, 1, sha->digest); + #else + MMCAU_SHA1_HashN((byte*)data, 1, (uint32_t*)sha->digest); + #endif + wolfSSL_CryptHwMutexUnLock(); + } + return ret; + } + + static int Transform_Len(wc_Sha* sha, const byte* data, word32 len) + { + int ret = wolfSSL_CryptHwMutexLock(); + if (ret == 0) { + #if defined(WC_HASH_DATA_ALIGNMENT) && WC_HASH_DATA_ALIGNMENT > 0 + if ((size_t)data % WC_HASH_DATA_ALIGNMENT) { + /* data pointer is NOT aligned, + * so copy and perform one block at a time */ + byte* local = (byte*)sha->buffer; + while (len >= WC_SHA_BLOCK_SIZE) { + XMEMCPY(local, data, WC_SHA_BLOCK_SIZE); + #ifdef FREESCALE_MMCAU_CLASSIC_SHA + cau_sha1_hash_n(local, 1, sha->digest); + #else + MMCAU_SHA1_HashN(local, 1, sha->digest); + #endif + data += WC_SHA_BLOCK_SIZE; + len -= WC_SHA_BLOCK_SIZE; + } + } + else + #endif + { + #ifdef FREESCALE_MMCAU_CLASSIC_SHA + cau_sha1_hash_n((byte*)data, len/WC_SHA_BLOCK_SIZE, sha->digest); + #else + MMCAU_SHA1_HashN((byte*)data, len/WC_SHA_BLOCK_SIZE, + (uint32_t*)sha->digest); + #endif + } + wolfSSL_CryptHwMutexUnLock(); + } + return ret; + } + +#elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_HASH) + /* wolfcrypt/src/port/caam/caam_sha.c */ + +#elif defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + + #include "wolfssl/wolfcrypt/port/Espressif/esp32-crypt.h" + + #define USE_SHA_SOFTWARE_IMPL + + static int InitSha(wc_Sha* sha) + { + int ret = 0; + + sha->digest[0] = 0x67452301L; + sha->digest[1] = 0xEFCDAB89L; + sha->digest[2] = 0x98BADCFEL; + sha->digest[3] = 0x10325476L; + sha->digest[4] = 0xC3D2E1F0L; + + sha->buffLen = 0; + sha->loLen = 0; + sha->hiLen = 0; + + /* always start firstblock = 1 when using hw engine */ + sha->ctx.isfirstblock = 1; + sha->ctx.sha_type = SHA1; + if(sha->ctx.mode == ESP32_SHA_HW){ + /* release hw engine */ + esp_sha_hw_unlock(); + } + /* always set mode as INIT + * whether using HW or SW is determined at first call of update() + */ + sha->ctx.mode = ESP32_SHA_INIT; + + return ret; + } + +#elif defined(WOLFSSL_RENESAS_TSIP_CRYPT) && \ + !defined(NO_WOLFSSL_RENESAS_TSIP_CRYPT_HASH) + + /* implemented in wolfcrypt/src/port/Renesas/renesas_tsip_sha.c */ + +#else + /* Software implementation */ + #define USE_SHA_SOFTWARE_IMPL + + static int InitSha(wc_Sha* sha) + { + int ret = 0; + + sha->digest[0] = 0x67452301L; + sha->digest[1] = 0xEFCDAB89L; + sha->digest[2] = 0x98BADCFEL; + sha->digest[3] = 0x10325476L; + sha->digest[4] = 0xC3D2E1F0L; + + sha->buffLen = 0; + sha->loLen = 0; + sha->hiLen = 0; + #if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB) + sha->flags = 0; + #endif + + return ret; + } +#endif /* End Hardware Acceleration */ + +/* Software implementation */ +#ifdef USE_SHA_SOFTWARE_IMPL + +static WC_INLINE void AddLength(wc_Sha* sha, word32 len) +{ + word32 tmp = sha->loLen; + if ((sha->loLen += len) < tmp) + sha->hiLen++; /* carry low to high */ +} + +/* Check if custom wc_Sha transform is used */ +#ifndef XTRANSFORM + #define XTRANSFORM(S,B) Transform((S),(B)) + + #define blk0(i) (W[i] = *((word32*)&data[i*sizeof(word32)])) + #define blk1(i) (W[(i)&15] = \ + rotlFixed(W[((i)+13)&15]^W[((i)+8)&15]^W[((i)+2)&15]^W[(i)&15],1)) + + #define f1(x,y,z) ((z)^((x) &((y)^(z)))) + #define f2(x,y,z) ((x)^(y)^(z)) + #define f3(x,y,z) (((x)&(y))|((z)&((x)|(y)))) + #define f4(x,y,z) ((x)^(y)^(z)) + + #ifdef WOLFSSL_NUCLEUS_1_2 + /* nucleus.h also defines R1-R4 */ + #undef R1 + #undef R2 + #undef R3 + #undef R4 + #endif + + /* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */ + #define R0(v,w,x,y,z,i) (z)+= f1((w),(x),(y)) + blk0((i)) + 0x5A827999+ \ + rotlFixed((v),5); (w) = rotlFixed((w),30); + #define R1(v,w,x,y,z,i) (z)+= f1((w),(x),(y)) + blk1((i)) + 0x5A827999+ \ + rotlFixed((v),5); (w) = rotlFixed((w),30); + #define R2(v,w,x,y,z,i) (z)+= f2((w),(x),(y)) + blk1((i)) + 0x6ED9EBA1+ \ + rotlFixed((v),5); (w) = rotlFixed((w),30); + #define R3(v,w,x,y,z,i) (z)+= f3((w),(x),(y)) + blk1((i)) + 0x8F1BBCDC+ \ + rotlFixed((v),5); (w) = rotlFixed((w),30); + #define R4(v,w,x,y,z,i) (z)+= f4((w),(x),(y)) + blk1((i)) + 0xCA62C1D6+ \ + rotlFixed((v),5); (w) = rotlFixed((w),30); + + static int Transform(wc_Sha* sha, const byte* data) + { + word32 W[WC_SHA_BLOCK_SIZE / sizeof(word32)]; + + /* Copy context->state[] to working vars */ + word32 a = sha->digest[0]; + word32 b = sha->digest[1]; + word32 c = sha->digest[2]; + word32 d = sha->digest[3]; + word32 e = sha->digest[4]; + + #ifdef USE_SLOW_SHA + word32 t, i; + + for (i = 0; i < 16; i++) { + R0(a, b, c, d, e, i); + t = e; e = d; d = c; c = b; b = a; a = t; + } + + for (; i < 20; i++) { + R1(a, b, c, d, e, i); + t = e; e = d; d = c; c = b; b = a; a = t; + } + + for (; i < 40; i++) { + R2(a, b, c, d, e, i); + t = e; e = d; d = c; c = b; b = a; a = t; + } + + for (; i < 60; i++) { + R3(a, b, c, d, e, i); + t = e; e = d; d = c; c = b; b = a; a = t; + } + + for (; i < 80; i++) { + R4(a, b, c, d, e, i); + t = e; e = d; d = c; c = b; b = a; a = t; + } + #else + /* nearly 1 K bigger in code size but 25% faster */ + /* 4 rounds of 20 operations each. Loop unrolled. */ + R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3); + R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7); + R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11); + R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15); + + R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19); + + R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23); + R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27); + R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31); + R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35); + R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39); + + R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43); + R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47); + R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51); + R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55); + R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59); + + R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63); + R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67); + R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71); + R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75); + R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79); + #endif + + /* Add the working vars back into digest state[] */ + sha->digest[0] += a; + sha->digest[1] += b; + sha->digest[2] += c; + sha->digest[3] += d; + sha->digest[4] += e; + + (void)data; /* Not used */ + + return 0; + } +#endif /* !USE_CUSTOM_SHA_TRANSFORM */ + + +int wc_InitSha_ex(wc_Sha* sha, void* heap, int devId) +{ + int ret = 0; + + if (sha == NULL) + return BAD_FUNC_ARG; + + sha->heap = heap; +#ifdef WOLF_CRYPTO_CB + sha->devId = devId; +#endif + +#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + sha->ctx.mode = ESP32_SHA_INIT; + sha->ctx.isfirstblock = 1; +#endif + ret = InitSha(sha); + if (ret != 0) + return ret; + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA) + ret = wolfAsync_DevCtxInit(&sha->asyncDev, WOLFSSL_ASYNC_MARKER_SHA, + sha->heap, devId); +#else + (void)devId; +#endif /* WOLFSSL_ASYNC_CRYPT */ + + return ret; +} + +/* do block size increments/updates */ +int wc_ShaUpdate(wc_Sha* sha, const byte* data, word32 len) +{ + int ret = 0; + word32 blocksLen; + byte* local; + + if (sha == NULL || (data == NULL && len > 0)) { + return BAD_FUNC_ARG; + } + +#ifdef WOLF_CRYPTO_CB + if (sha->devId != INVALID_DEVID) { + ret = wc_CryptoCb_ShaHash(sha, data, len, NULL); + if (ret != CRYPTOCB_UNAVAILABLE) + return ret; + ret = 0; /* reset ret */ + /* fall-through when unavailable */ + } +#endif +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA) + if (sha->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA) { + #if defined(HAVE_INTEL_QA) + return IntelQaSymSha(&sha->asyncDev, NULL, data, len); + #endif + } +#endif /* WOLFSSL_ASYNC_CRYPT */ + + /* check that internal buffLen is valid */ + if (sha->buffLen >= WC_SHA_BLOCK_SIZE) + return BUFFER_E; + + if (data == NULL && len == 0) { + /* valid, but do nothing */ + return 0; + } + + /* add length for final */ + AddLength(sha, len); + + local = (byte*)sha->buffer; + + /* process any remainder from previous operation */ + if (sha->buffLen > 0) { + blocksLen = min(len, WC_SHA_BLOCK_SIZE - sha->buffLen); + XMEMCPY(&local[sha->buffLen], data, blocksLen); + + sha->buffLen += blocksLen; + data += blocksLen; + len -= blocksLen; + + if (sha->buffLen == WC_SHA_BLOCK_SIZE) { + #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) + ByteReverseWords(sha->buffer, sha->buffer, WC_SHA_BLOCK_SIZE); + #endif + + #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + if (sha->ctx.mode == ESP32_SHA_INIT) { + esp_sha_try_hw_lock(&sha->ctx); + } + if (sha->ctx.mode == ESP32_SHA_SW) { + ret = XTRANSFORM(sha, (const byte*)local); + } else { + esp_sha_process(sha, (const byte*)local); + } + #else + ret = XTRANSFORM(sha, (const byte*)local); + #endif + if (ret != 0) + return ret; + + sha->buffLen = 0; + } + } + + /* process blocks */ +#ifdef XTRANSFORM_LEN + /* get number of blocks */ + /* 64-1 = 0x3F (~ Inverted = 0xFFFFFFC0) */ + /* len (masked by 0xFFFFFFC0) returns block aligned length */ + blocksLen = len & ~(WC_SHA_BLOCK_SIZE-1); + if (blocksLen > 0) { + /* Byte reversal performed in function if required. */ + XTRANSFORM_LEN(sha, data, blocksLen); + data += blocksLen; + len -= blocksLen; + } +#else + while (len >= WC_SHA_BLOCK_SIZE) { + word32* local32 = sha->buffer; + /* optimization to avoid memcpy if data pointer is properly aligned */ + /* Little Endian requires byte swap, so can't use data directly */ + #if defined(WC_HASH_DATA_ALIGNMENT) && !defined(LITTLE_ENDIAN_ORDER) + if (((size_t)data % WC_HASH_DATA_ALIGNMENT) == 0) { + local32 = (word32*)data; + } + else + #endif + { + XMEMCPY(local32, data, WC_SHA_BLOCK_SIZE); + } + + data += WC_SHA_BLOCK_SIZE; + len -= WC_SHA_BLOCK_SIZE; + + #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) + ByteReverseWords(local32, local32, WC_SHA_BLOCK_SIZE); + #endif + + #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + if (sha->ctx.mode == ESP32_SHA_INIT){ + esp_sha_try_hw_lock(&sha->ctx); + } + if (sha->ctx.mode == ESP32_SHA_SW){ + ret = XTRANSFORM(sha, (const byte*)local32); + } else { + esp_sha_process(sha, (const byte*)local32); + } + #else + ret = XTRANSFORM(sha, (const byte*)local32); + #endif + } +#endif /* XTRANSFORM_LEN */ + + /* save remainder */ + if (len > 0) { + XMEMCPY(local, data, len); + sha->buffLen = len; + } + + return ret; +} + +int wc_ShaFinalRaw(wc_Sha* sha, byte* hash) +{ +#ifdef LITTLE_ENDIAN_ORDER + word32 digest[WC_SHA_DIGEST_SIZE / sizeof(word32)]; +#endif + + if (sha == NULL || hash == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef LITTLE_ENDIAN_ORDER + ByteReverseWords((word32*)digest, (word32*)sha->digest, WC_SHA_DIGEST_SIZE); + XMEMCPY(hash, digest, WC_SHA_DIGEST_SIZE); +#else + XMEMCPY(hash, sha->digest, WC_SHA_DIGEST_SIZE); +#endif + + return 0; +} + +int wc_ShaFinal(wc_Sha* sha, byte* hash) +{ + int ret; + byte* local; + + if (sha == NULL || hash == NULL) { + return BAD_FUNC_ARG; + } + + local = (byte*)sha->buffer; + +#ifdef WOLF_CRYPTO_CB + if (sha->devId != INVALID_DEVID) { + ret = wc_CryptoCb_ShaHash(sha, NULL, 0, hash); + if (ret != CRYPTOCB_UNAVAILABLE) + return ret; + ret = 0; /* reset ret */ + /* fall-through when unavailable */ + } +#endif +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA) + if (sha->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA) { + #if defined(HAVE_INTEL_QA) + return IntelQaSymSha(&sha->asyncDev, hash, NULL, WC_SHA_DIGEST_SIZE); + #endif + } +#endif /* WOLFSSL_ASYNC_CRYPT */ + + local[sha->buffLen++] = 0x80; /* add 1 */ + + /* pad with zeros */ + if (sha->buffLen > WC_SHA_PAD_SIZE) { + XMEMSET(&local[sha->buffLen], 0, WC_SHA_BLOCK_SIZE - sha->buffLen); + sha->buffLen += WC_SHA_BLOCK_SIZE - sha->buffLen; + + #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) + ByteReverseWords(sha->buffer, sha->buffer, WC_SHA_BLOCK_SIZE); + #endif + + #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + if (sha->ctx.mode == ESP32_SHA_INIT) { + esp_sha_try_hw_lock(&sha->ctx); + } + if (sha->ctx.mode == ESP32_SHA_SW) { + ret = XTRANSFORM(sha, (const byte*)local); + } else { + ret = esp_sha_process(sha, (const byte*)local); + } + #else + ret = XTRANSFORM(sha, (const byte*)local); + #endif + if (ret != 0) + return ret; + + sha->buffLen = 0; + } + XMEMSET(&local[sha->buffLen], 0, WC_SHA_PAD_SIZE - sha->buffLen); + +#if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) + ByteReverseWords(sha->buffer, sha->buffer, WC_SHA_BLOCK_SIZE); +#endif + + /* store lengths */ + /* put lengths in bits */ + sha->hiLen = (sha->loLen >> (8*sizeof(sha->loLen) - 3)) + (sha->hiLen << 3); + sha->loLen = sha->loLen << 3; + + /* ! length ordering dependent on digest endian type ! */ + XMEMCPY(&local[WC_SHA_PAD_SIZE], &sha->hiLen, sizeof(word32)); + XMEMCPY(&local[WC_SHA_PAD_SIZE + sizeof(word32)], &sha->loLen, sizeof(word32)); + +#if defined(FREESCALE_MMCAU_SHA) + /* Kinetis requires only these bytes reversed */ + ByteReverseWords(&sha->buffer[WC_SHA_PAD_SIZE/sizeof(word32)], + &sha->buffer[WC_SHA_PAD_SIZE/sizeof(word32)], + 2 * sizeof(word32)); +#endif + +#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + if (sha->ctx.mode == ESP32_SHA_INIT) { + esp_sha_try_hw_lock(&sha->ctx); + } + if (sha->ctx.mode == ESP32_SHA_SW) { + ret = XTRANSFORM(sha, (const byte*)local); + } else { + ret = esp_sha_digest_process(sha, 1); + } +#else + ret = XTRANSFORM(sha, (const byte*)local); +#endif + +#ifdef LITTLE_ENDIAN_ORDER + ByteReverseWords(sha->digest, sha->digest, WC_SHA_DIGEST_SIZE); +#endif + + XMEMCPY(hash, sha->digest, WC_SHA_DIGEST_SIZE); + + (void)InitSha(sha); /* reset state */ + + return ret; +} + +#endif /* USE_SHA_SOFTWARE_IMPL */ + + +int wc_InitSha(wc_Sha* sha) +{ + return wc_InitSha_ex(sha, NULL, INVALID_DEVID); +} + +void wc_ShaFree(wc_Sha* sha) +{ + if (sha == NULL) + return; + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA) + wolfAsync_DevCtxFree(&sha->asyncDev, WOLFSSL_ASYNC_MARKER_SHA); +#endif /* WOLFSSL_ASYNC_CRYPT */ + +#ifdef WOLFSSL_PIC32MZ_HASH + wc_ShaPic32Free(sha); +#endif +#if (defined(WOLFSSL_RENESAS_TSIP_CRYPT) && \ + !defined(NO_WOLFSSL_RENESAS_TSIP_CRYPT_HASH)) + if (sha->msg != NULL) { + XFREE(sha->msg, sha->heap, DYNAMIC_TYPE_TMP_BUFFER); + sha->msg = NULL; + } +#endif +} + +#endif /* !WOLFSSL_TI_HASH */ +#endif /* HAVE_FIPS */ + +#ifndef WOLFSSL_TI_HASH +#if !defined(WOLFSSL_RENESAS_TSIP_CRYPT) || \ + defined(NO_WOLFSSL_RENESAS_TSIP_CRYPT_HASH) +int wc_ShaGetHash(wc_Sha* sha, byte* hash) +{ + int ret; + wc_Sha tmpSha; + + if (sha == NULL || hash == NULL) + return BAD_FUNC_ARG; + +#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + if(sha->ctx.mode == ESP32_SHA_INIT){ + esp_sha_try_hw_lock(&sha->ctx); + } + if(sha->ctx.mode != ESP32_SHA_SW) + esp_sha_digest_process(sha, 0); +#endif + + ret = wc_ShaCopy(sha, &tmpSha); + if (ret == 0) { + ret = wc_ShaFinal(&tmpSha, hash); +#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + sha->ctx.mode = ESP32_SHA_SW; +#endif + + + } + return ret; +} + +int wc_ShaCopy(wc_Sha* src, wc_Sha* dst) +{ + int ret = 0; + + if (src == NULL || dst == NULL) + return BAD_FUNC_ARG; + + XMEMCPY(dst, src, sizeof(wc_Sha)); + +#ifdef WOLFSSL_ASYNC_CRYPT + ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev); +#endif +#ifdef WOLFSSL_PIC32MZ_HASH + ret = wc_Pic32HashCopy(&src->cache, &dst->cache); +#endif +#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + dst->ctx.mode = src->ctx.mode; + dst->ctx.isfirstblock = src->ctx.isfirstblock; + dst->ctx.sha_type = src->ctx.sha_type; +#endif +#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB) + dst->flags |= WC_HASH_FLAG_ISCOPY; +#endif + return ret; +} +#endif /* defined(WOLFSSL_RENESAS_TSIP_CRYPT) ... */ +#endif /* !WOLFSSL_TI_HASH */ + + +#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB) +int wc_ShaSetFlags(wc_Sha* sha, word32 flags) +{ + if (sha) { + sha->flags = flags; + } + return 0; +} +int wc_ShaGetFlags(wc_Sha* sha, word32* flags) +{ + if (sha && flags) { + *flags = sha->flags; + } + return 0; +} +#endif + +#endif /* !NO_SHA */ diff --git a/client/wolfssl/wolfcrypt/src/sha256.c b/client/wolfssl/wolfcrypt/src/sha256.c new file mode 100644 index 0000000..eb0911b --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/sha256.c @@ -0,0 +1,1644 @@ +/* sha256.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +/* + * SHA256 Build Options: + * USE_SLOW_SHA256: Reduces code size by not partially unrolling + (~2KB smaller and ~25% slower) (default OFF) + * WOLFSSL_SHA256_BY_SPEC: Uses the Ch/Maj based on SHA256 specification + (default ON) + * WOLFSSL_SHA256_ALT_CH_MAJ: Alternate Ch/Maj that is easier for compilers to + optimize and recognize as SHA256 (default OFF) + * SHA256_MANY_REGISTERS: A SHA256 version that keeps all data in registers + and partial unrolled (default OFF) + */ + +/* Default SHA256 to use Ch/Maj based on specification */ +#if !defined(WOLFSSL_SHA256_BY_SPEC) && !defined(WOLFSSL_SHA256_ALT_CH_MAJ) + #define WOLFSSL_SHA256_BY_SPEC +#endif + + +#if !defined(NO_SHA256) && !defined(WOLFSSL_ARMASM) + +#if defined(HAVE_FIPS) && \ + defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2) + + /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */ + #define FIPS_NO_WRAPPERS + + #ifdef USE_WINDOWS_API + #pragma code_seg(".fipsA$d") + #pragma const_seg(".fipsB$d") + #endif +#endif + +#include +#include +#include +#include + +#ifdef WOLF_CRYPTO_CB + #include +#endif + +/* fips wrapper calls, user can call direct */ +#if defined(HAVE_FIPS) && \ + (!defined(HAVE_FIPS_VERSION) || (HAVE_FIPS_VERSION < 2)) + + int wc_InitSha256(wc_Sha256* sha) + { + if (sha == NULL) { + return BAD_FUNC_ARG; + } + return InitSha256_fips(sha); + } + int wc_InitSha256_ex(wc_Sha256* sha, void* heap, int devId) + { + (void)heap; + (void)devId; + if (sha == NULL) { + return BAD_FUNC_ARG; + } + return InitSha256_fips(sha); + } + int wc_Sha256Update(wc_Sha256* sha, const byte* data, word32 len) + { + if (sha == NULL || (data == NULL && len > 0)) { + return BAD_FUNC_ARG; + } + + if (data == NULL && len == 0) { + /* valid, but do nothing */ + return 0; + } + + return Sha256Update_fips(sha, data, len); + } + int wc_Sha256Final(wc_Sha256* sha, byte* out) + { + if (sha == NULL || out == NULL) { + return BAD_FUNC_ARG; + } + return Sha256Final_fips(sha, out); + } + void wc_Sha256Free(wc_Sha256* sha) + { + (void)sha; + /* Not supported in FIPS */ + } + +#else /* else build without fips, or for FIPS v2 */ + + +#if defined(WOLFSSL_TI_HASH) + /* #include included by wc_port.c */ +#elif defined(WOLFSSL_CRYPTOCELL) + /* wc_port.c includes wolfcrypt/src/port/arm/cryptoCellHash.c */ +#else + +#include + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#ifdef WOLFSSL_DEVCRYPTO_HASH + #include +#endif + + + +#if defined(USE_INTEL_SPEEDUP) + #if defined(__GNUC__) && ((__GNUC__ < 4) || \ + (__GNUC__ == 4 && __GNUC_MINOR__ <= 8)) + #undef NO_AVX2_SUPPORT + #define NO_AVX2_SUPPORT + #endif + #if defined(__clang__) && ((__clang_major__ < 3) || \ + (__clang_major__ == 3 && __clang_minor__ <= 5)) + #define NO_AVX2_SUPPORT + #elif defined(__clang__) && defined(NO_AVX2_SUPPORT) + #undef NO_AVX2_SUPPORT + #endif + + #define HAVE_INTEL_AVX1 + #ifndef NO_AVX2_SUPPORT + #define HAVE_INTEL_AVX2 + #endif +#endif /* USE_INTEL_SPEEDUP */ + +#if defined(HAVE_INTEL_AVX2) + #define HAVE_INTEL_RORX +#endif + + +#if !defined(WOLFSSL_PIC32MZ_HASH) && !defined(STM32_HASH_SHA2) && \ + (!defined(WOLFSSL_IMX6_CAAM) || defined(NO_IMX6_CAAM_HASH)) && \ + !defined(WOLFSSL_AFALG_HASH) && !defined(WOLFSSL_DEVCRYPTO_HASH) && \ + (!defined(WOLFSSL_ESP32WROOM32_CRYPT) || defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)) && \ + (!defined(WOLFSSL_RENESAS_TSIP_CRYPT) || defined(NO_WOLFSSL_RENESAS_TSIP_HASH)) + +static int InitSha256(wc_Sha256* sha256) +{ + int ret = 0; + + if (sha256 == NULL) + return BAD_FUNC_ARG; + + XMEMSET(sha256->digest, 0, sizeof(sha256->digest)); + sha256->digest[0] = 0x6A09E667L; + sha256->digest[1] = 0xBB67AE85L; + sha256->digest[2] = 0x3C6EF372L; + sha256->digest[3] = 0xA54FF53AL; + sha256->digest[4] = 0x510E527FL; + sha256->digest[5] = 0x9B05688CL; + sha256->digest[6] = 0x1F83D9ABL; + sha256->digest[7] = 0x5BE0CD19L; + + sha256->buffLen = 0; + sha256->loLen = 0; + sha256->hiLen = 0; +#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB) + sha256->flags = 0; +#endif + + return ret; +} +#endif + + +/* Hardware Acceleration */ +#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) + + /* in case intel instructions aren't available, plus we need the K[] global */ + #define NEED_SOFT_SHA256 + + /***** + Intel AVX1/AVX2 Macro Control Structure + + #define HAVE_INTEL_AVX1 + #define HAVE_INTEL_AVX2 + + #define HAVE_INTEL_RORX + + + int InitSha256(wc_Sha256* sha256) { + Save/Recover XMM, YMM + ... + } + + #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2) + Transform_Sha256(); Function prototype + #else + Transform_Sha256() { } + int Sha256Final() { + Save/Recover XMM, YMM + ... + } + #endif + + #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2) + #if defined(HAVE_INTEL_RORX + #define RND with rorx instruction + #else + #define RND + #endif + #endif + + #if defined(HAVE_INTEL_AVX1) + + #define XMM Instructions/inline asm + + int Transform_Sha256() { + Stitched Message Sched/Round + } + + #elif defined(HAVE_INTEL_AVX2) + + #define YMM Instructions/inline asm + + int Transform_Sha256() { + More granular Stitched Message Sched/Round + } + + #endif + + */ + + /* Each platform needs to query info type 1 from cpuid to see if aesni is + * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts + */ + + /* #if defined(HAVE_INTEL_AVX1/2) at the tail of sha256 */ + static int Transform_Sha256(wc_Sha256* sha256, const byte* data); + +#ifdef __cplusplus + extern "C" { +#endif + + #if defined(HAVE_INTEL_AVX1) + extern int Transform_Sha256_AVX1(wc_Sha256 *sha256, const byte* data); + extern int Transform_Sha256_AVX1_Len(wc_Sha256* sha256, + const byte* data, word32 len); + #endif + #if defined(HAVE_INTEL_AVX2) + extern int Transform_Sha256_AVX2(wc_Sha256 *sha256, const byte* data); + extern int Transform_Sha256_AVX2_Len(wc_Sha256* sha256, + const byte* data, word32 len); + #ifdef HAVE_INTEL_RORX + extern int Transform_Sha256_AVX1_RORX(wc_Sha256 *sha256, const byte* data); + extern int Transform_Sha256_AVX1_RORX_Len(wc_Sha256* sha256, + const byte* data, word32 len); + extern int Transform_Sha256_AVX2_RORX(wc_Sha256 *sha256, const byte* data); + extern int Transform_Sha256_AVX2_RORX_Len(wc_Sha256* sha256, + const byte* data, word32 len); + #endif /* HAVE_INTEL_RORX */ + #endif /* HAVE_INTEL_AVX2 */ + +#ifdef __cplusplus + } /* extern "C" */ +#endif + + static int (*Transform_Sha256_p)(wc_Sha256* sha256, const byte* data); + /* = _Transform_Sha256 */ + static int (*Transform_Sha256_Len_p)(wc_Sha256* sha256, const byte* data, + word32 len); + /* = NULL */ + static int transform_check = 0; + static word32 intel_flags; + + #define XTRANSFORM(S, D) (*Transform_Sha256_p)((S),(D)) + #define XTRANSFORM_LEN(S, D, L) (*Transform_Sha256_Len_p)((S),(D),(L)) + + static void Sha256_SetTransform(void) + { + + if (transform_check) + return; + + intel_flags = cpuid_get_flags(); + + #ifdef HAVE_INTEL_AVX2 + if (1 && IS_INTEL_AVX2(intel_flags)) { + #ifdef HAVE_INTEL_RORX + if (IS_INTEL_BMI2(intel_flags)) { + Transform_Sha256_p = Transform_Sha256_AVX2_RORX; + Transform_Sha256_Len_p = Transform_Sha256_AVX2_RORX_Len; + } + else + #endif + if (1) + { + Transform_Sha256_p = Transform_Sha256_AVX2; + Transform_Sha256_Len_p = Transform_Sha256_AVX2_Len; + } + #ifdef HAVE_INTEL_RORX + else { + Transform_Sha256_p = Transform_Sha256_AVX1_RORX; + Transform_Sha256_Len_p = Transform_Sha256_AVX1_RORX_Len; + } + #endif + } + else + #endif + #ifdef HAVE_INTEL_AVX1 + if (IS_INTEL_AVX1(intel_flags)) { + Transform_Sha256_p = Transform_Sha256_AVX1; + Transform_Sha256_Len_p = Transform_Sha256_AVX1_Len; + } + else + #endif + { + Transform_Sha256_p = Transform_Sha256; + Transform_Sha256_Len_p = NULL; + } + + transform_check = 1; + } + + int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId) + { + int ret = 0; + if (sha256 == NULL) + return BAD_FUNC_ARG; + + sha256->heap = heap; + #ifdef WOLF_CRYPTO_CB + sha256->devId = devId; + #endif + + ret = InitSha256(sha256); + if (ret != 0) + return ret; + + /* choose best Transform function under this runtime environment */ + Sha256_SetTransform(); + + #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256) + ret = wolfAsync_DevCtxInit(&sha256->asyncDev, + WOLFSSL_ASYNC_MARKER_SHA256, sha256->heap, devId); + #else + (void)devId; + #endif /* WOLFSSL_ASYNC_CRYPT */ + + return ret; + } + +#elif defined(FREESCALE_LTC_SHA) + int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId) + { + (void)heap; + (void)devId; + + LTC_HASH_Init(LTC_BASE, &sha256->ctx, kLTC_Sha256, NULL, 0); + + return 0; + } + +#elif defined(FREESCALE_MMCAU_SHA) + + #ifdef FREESCALE_MMCAU_CLASSIC_SHA + #include "cau_api.h" + #else + #include "fsl_mmcau.h" + #endif + + #define XTRANSFORM(S, D) Transform_Sha256((S),(D)) + #define XTRANSFORM_LEN(S, D, L) Transform_Sha256_Len((S),(D),(L)) + + #ifndef WC_HASH_DATA_ALIGNMENT + /* these hardware API's require 4 byte (word32) alignment */ + #define WC_HASH_DATA_ALIGNMENT 4 + #endif + + int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId) + { + int ret = 0; + + (void)heap; + (void)devId; + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + + #ifdef FREESCALE_MMCAU_CLASSIC_SHA + cau_sha256_initialize_output(sha256->digest); + #else + MMCAU_SHA256_InitializeOutput((uint32_t*)sha256->digest); + #endif + wolfSSL_CryptHwMutexUnLock(); + + sha256->buffLen = 0; + sha256->loLen = 0; + sha256->hiLen = 0; + #ifdef WOLFSSL_SMALL_STACK_CACHE + sha256->W = NULL; + #endif + + return ret; + } + + static int Transform_Sha256(wc_Sha256* sha256, const byte* data) + { + int ret = wolfSSL_CryptHwMutexLock(); + if (ret == 0) { + #ifdef FREESCALE_MMCAU_CLASSIC_SHA + cau_sha256_hash_n((byte*)data, 1, sha256->digest); + #else + MMCAU_SHA256_HashN((byte*)data, 1, sha256->digest); + #endif + wolfSSL_CryptHwMutexUnLock(); + } + return ret; + } + + static int Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, + word32 len) + { + int ret = wolfSSL_CryptHwMutexLock(); + if (ret == 0) { + #if defined(WC_HASH_DATA_ALIGNMENT) && WC_HASH_DATA_ALIGNMENT > 0 + if ((size_t)data % WC_HASH_DATA_ALIGNMENT) { + /* data pointer is NOT aligned, + * so copy and perform one block at a time */ + byte* local = (byte*)sha256->buffer; + while (len >= WC_SHA256_BLOCK_SIZE) { + XMEMCPY(local, data, WC_SHA256_BLOCK_SIZE); + #ifdef FREESCALE_MMCAU_CLASSIC_SHA + cau_sha256_hash_n(local, 1, sha256->digest); + #else + MMCAU_SHA256_HashN(local, 1, sha256->digest); + #endif + data += WC_SHA256_BLOCK_SIZE; + len -= WC_SHA256_BLOCK_SIZE; + } + } + else + #endif + { + #ifdef FREESCALE_MMCAU_CLASSIC_SHA + cau_sha256_hash_n((byte*)data, len/WC_SHA256_BLOCK_SIZE, + sha256->digest); + #else + MMCAU_SHA256_HashN((byte*)data, len/WC_SHA256_BLOCK_SIZE, + sha256->digest); + #endif + } + wolfSSL_CryptHwMutexUnLock(); + } + return ret; + } + +#elif defined(WOLFSSL_PIC32MZ_HASH) + #include + +#elif defined(STM32_HASH_SHA2) + + /* Supports CubeMX HAL or Standard Peripheral Library */ + + int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId) + { + if (sha256 == NULL) + return BAD_FUNC_ARG; + + (void)devId; + (void)heap; + + wc_Stm32_Hash_Init(&sha256->stmCtx); + return 0; + } + + int wc_Sha256Update(wc_Sha256* sha256, const byte* data, word32 len) + { + int ret = 0; + + if (sha256 == NULL || (data == NULL && len > 0)) { + return BAD_FUNC_ARG; + } + + ret = wolfSSL_CryptHwMutexLock(); + if (ret == 0) { + ret = wc_Stm32_Hash_Update(&sha256->stmCtx, + HASH_AlgoSelection_SHA256, data, len); + wolfSSL_CryptHwMutexUnLock(); + } + return ret; + } + + int wc_Sha256Final(wc_Sha256* sha256, byte* hash) + { + int ret = 0; + + if (sha256 == NULL || hash == NULL) { + return BAD_FUNC_ARG; + } + + ret = wolfSSL_CryptHwMutexLock(); + if (ret == 0) { + ret = wc_Stm32_Hash_Final(&sha256->stmCtx, + HASH_AlgoSelection_SHA256, hash, WC_SHA256_DIGEST_SIZE); + wolfSSL_CryptHwMutexUnLock(); + } + + (void)wc_InitSha256(sha256); /* reset state */ + + return ret; + } + +#elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_HASH) + /* functions defined in wolfcrypt/src/port/caam/caam_sha256.c */ + +#elif defined(WOLFSSL_AFALG_HASH) + /* implemented in wolfcrypt/src/port/af_alg/afalg_hash.c */ + +#elif defined(WOLFSSL_DEVCRYPTO_HASH) + /* implemented in wolfcrypt/src/port/devcrypto/devcrypt_hash.c */ + +#elif defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_HASH) + #include "hal_data.h" + + #ifndef WOLFSSL_SCE_SHA256_HANDLE + #define WOLFSSL_SCE_SHA256_HANDLE g_sce_hash_0 + #endif + + #define WC_SHA256_DIGEST_WORD_SIZE 16 + #define XTRANSFORM(S, D) wc_Sha256SCE_XTRANSFORM((S), (D)) + static int wc_Sha256SCE_XTRANSFORM(wc_Sha256* sha256, const byte* data) + { + if (WOLFSSL_SCE_GSCE_HANDLE.p_cfg->endian_flag == + CRYPTO_WORD_ENDIAN_LITTLE) + { + ByteReverseWords((word32*)data, (word32*)data, + WC_SHA256_BLOCK_SIZE); + ByteReverseWords(sha256->digest, sha256->digest, + WC_SHA256_DIGEST_SIZE); + } + + if (WOLFSSL_SCE_SHA256_HANDLE.p_api->hashUpdate( + WOLFSSL_SCE_SHA256_HANDLE.p_ctrl, (word32*)data, + WC_SHA256_DIGEST_WORD_SIZE, sha256->digest) != SSP_SUCCESS){ + WOLFSSL_MSG("Unexpected hardware return value"); + return WC_HW_E; + } + + if (WOLFSSL_SCE_GSCE_HANDLE.p_cfg->endian_flag == + CRYPTO_WORD_ENDIAN_LITTLE) + { + ByteReverseWords((word32*)data, (word32*)data, + WC_SHA256_BLOCK_SIZE); + ByteReverseWords(sha256->digest, sha256->digest, + WC_SHA256_DIGEST_SIZE); + } + + return 0; + } + + + int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId) + { + int ret = 0; + if (sha256 == NULL) + return BAD_FUNC_ARG; + + sha256->heap = heap; + + ret = InitSha256(sha256); + if (ret != 0) + return ret; + + (void)devId; + + return ret; + } + +#elif defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + + #define NEED_SOFT_SHA256 + + static int InitSha256(wc_Sha256* sha256) + { + int ret = 0; + + if (sha256 == NULL) + return BAD_FUNC_ARG; + + XMEMSET(sha256->digest, 0, sizeof(sha256->digest)); + sha256->digest[0] = 0x6A09E667L; + sha256->digest[1] = 0xBB67AE85L; + sha256->digest[2] = 0x3C6EF372L; + sha256->digest[3] = 0xA54FF53AL; + sha256->digest[4] = 0x510E527FL; + sha256->digest[5] = 0x9B05688CL; + sha256->digest[6] = 0x1F83D9ABL; + sha256->digest[7] = 0x5BE0CD19L; + + sha256->buffLen = 0; + sha256->loLen = 0; + sha256->hiLen = 0; + + /* always start firstblock = 1 when using hw engine */ + sha256->ctx.isfirstblock = 1; + sha256->ctx.sha_type = SHA2_256; + if(sha256->ctx.mode == ESP32_SHA_HW) { + /* release hw */ + esp_sha_hw_unlock(); + } + /* always set mode as INIT + * whether using HW or SW is determined at first call of update() + */ + sha256->ctx.mode = ESP32_SHA_INIT; + + return ret; + } + int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId) + { + int ret = 0; + + if (sha256 == NULL) + return BAD_FUNC_ARG; + + XMEMSET(sha256, 0, sizeof(wc_Sha256)); + sha256->ctx.mode = ESP32_SHA_INIT; + sha256->ctx.isfirstblock = 1; + (void)devId; + + ret = InitSha256(sha256); + + return ret; + } + +#elif defined(WOLFSSL_RENESAS_TSIP_CRYPT) && \ + !defined(NO_WOLFSSL_RENESAS_TSIP_CRYPT_HASH) + + /* implemented in wolfcrypt/src/port/Renesas/renesas_tsip_sha.c */ + +#else + #define NEED_SOFT_SHA256 + + int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId) + { + int ret = 0; + if (sha256 == NULL) + return BAD_FUNC_ARG; + + sha256->heap = heap; + #ifdef WOLF_CRYPTO_CB + sha256->devId = devId; + sha256->devCtx = NULL; + #endif + + ret = InitSha256(sha256); + if (ret != 0) + return ret; + + #ifdef WOLFSSL_SMALL_STACK_CACHE + sha256->W = NULL; + #endif + + #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256) + ret = wolfAsync_DevCtxInit(&sha256->asyncDev, + WOLFSSL_ASYNC_MARKER_SHA256, sha256->heap, devId); + #else + (void)devId; + #endif /* WOLFSSL_ASYNC_CRYPT */ + + return ret; + } +#endif /* End Hardware Acceleration */ + +#ifdef NEED_SOFT_SHA256 + + static const ALIGN32 word32 K[64] = { + 0x428A2F98L, 0x71374491L, 0xB5C0FBCFL, 0xE9B5DBA5L, 0x3956C25BL, + 0x59F111F1L, 0x923F82A4L, 0xAB1C5ED5L, 0xD807AA98L, 0x12835B01L, + 0x243185BEL, 0x550C7DC3L, 0x72BE5D74L, 0x80DEB1FEL, 0x9BDC06A7L, + 0xC19BF174L, 0xE49B69C1L, 0xEFBE4786L, 0x0FC19DC6L, 0x240CA1CCL, + 0x2DE92C6FL, 0x4A7484AAL, 0x5CB0A9DCL, 0x76F988DAL, 0x983E5152L, + 0xA831C66DL, 0xB00327C8L, 0xBF597FC7L, 0xC6E00BF3L, 0xD5A79147L, + 0x06CA6351L, 0x14292967L, 0x27B70A85L, 0x2E1B2138L, 0x4D2C6DFCL, + 0x53380D13L, 0x650A7354L, 0x766A0ABBL, 0x81C2C92EL, 0x92722C85L, + 0xA2BFE8A1L, 0xA81A664BL, 0xC24B8B70L, 0xC76C51A3L, 0xD192E819L, + 0xD6990624L, 0xF40E3585L, 0x106AA070L, 0x19A4C116L, 0x1E376C08L, + 0x2748774CL, 0x34B0BCB5L, 0x391C0CB3L, 0x4ED8AA4AL, 0x5B9CCA4FL, + 0x682E6FF3L, 0x748F82EEL, 0x78A5636FL, 0x84C87814L, 0x8CC70208L, + 0x90BEFFFAL, 0xA4506CEBL, 0xBEF9A3F7L, 0xC67178F2L + }; + +/* Both versions of Ch and Maj are logically the same, but with the second set + the compilers can recognize them better for optimization */ +#ifdef WOLFSSL_SHA256_BY_SPEC + /* SHA256 math based on specification */ + #define Ch(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) + #define Maj(x,y,z) ((((x) | (y)) & (z)) | ((x) & (y))) +#else + /* SHA256 math reworked for easier compiler optimization */ + #define Ch(x,y,z) ((((y) ^ (z)) & (x)) ^ (z)) + #define Maj(x,y,z) ((((x) ^ (y)) & ((y) ^ (z))) ^ (y)) +#endif + #define R(x, n) (((x) & 0xFFFFFFFFU) >> (n)) + + #define S(x, n) rotrFixed(x, n) + #define Sigma0(x) (S(x, 2) ^ S(x, 13) ^ S(x, 22)) + #define Sigma1(x) (S(x, 6) ^ S(x, 11) ^ S(x, 25)) + #define Gamma0(x) (S(x, 7) ^ S(x, 18) ^ R(x, 3)) + #define Gamma1(x) (S(x, 17) ^ S(x, 19) ^ R(x, 10)) + + #define a(i) S[(0-i) & 7] + #define b(i) S[(1-i) & 7] + #define c(i) S[(2-i) & 7] + #define d(i) S[(3-i) & 7] + #define e(i) S[(4-i) & 7] + #define f(i) S[(5-i) & 7] + #define g(i) S[(6-i) & 7] + #define h(i) S[(7-i) & 7] + + #ifndef XTRANSFORM + #define XTRANSFORM(S, D) Transform_Sha256((S),(D)) + #endif + +#ifndef SHA256_MANY_REGISTERS + #define RND(j) \ + t0 = h(j) + Sigma1(e(j)) + Ch(e(j), f(j), g(j)) + K[i+j] + W[i+j]; \ + t1 = Sigma0(a(j)) + Maj(a(j), b(j), c(j)); \ + d(j) += t0; \ + h(j) = t0 + t1 + + static int Transform_Sha256(wc_Sha256* sha256, const byte* data) + { + word32 S[8], t0, t1; + int i; + + #ifdef WOLFSSL_SMALL_STACK_CACHE + word32* W = sha256->W; + if (W == NULL) { + W = (word32*)XMALLOC(sizeof(word32) * WC_SHA256_BLOCK_SIZE, NULL, + DYNAMIC_TYPE_DIGEST); + if (W == NULL) + return MEMORY_E; + sha256->W = W; + } + #elif defined(WOLFSSL_SMALL_STACK) + word32* W; + W = (word32*)XMALLOC(sizeof(word32) * WC_SHA256_BLOCK_SIZE, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (W == NULL) + return MEMORY_E; + #else + word32 W[WC_SHA256_BLOCK_SIZE]; + #endif + + /* Copy context->state[] to working vars */ + for (i = 0; i < 8; i++) + S[i] = sha256->digest[i]; + + for (i = 0; i < 16; i++) + W[i] = *((word32*)&data[i*sizeof(word32)]); + + for (i = 16; i < WC_SHA256_BLOCK_SIZE; i++) + W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15]) + W[i-16]; + + #ifdef USE_SLOW_SHA256 + /* not unrolled - ~2k smaller and ~25% slower */ + for (i = 0; i < WC_SHA256_BLOCK_SIZE; i += 8) { + int j; + for (j = 0; j < 8; j++) { /* braces needed here for macros {} */ + RND(j); + } + } + #else + /* partially loop unrolled */ + for (i = 0; i < WC_SHA256_BLOCK_SIZE; i += 8) { + RND(0); RND(1); RND(2); RND(3); + RND(4); RND(5); RND(6); RND(7); + } + #endif /* USE_SLOW_SHA256 */ + + /* Add the working vars back into digest state[] */ + for (i = 0; i < 8; i++) { + sha256->digest[i] += S[i]; + } + + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SMALL_STACK_CACHE) + XFREE(W, NULL, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return 0; + } +#else + /* SHA256 version that keeps all data in registers */ + #define SCHED1(j) (W[j] = *((word32*)&data[j*sizeof(word32)])) + #define SCHED(j) ( \ + W[ j & 15] += \ + Gamma1(W[(j-2) & 15])+ \ + W[(j-7) & 15] + \ + Gamma0(W[(j-15) & 15]) \ + ) + + #define RND1(j) \ + t0 = h(j) + Sigma1(e(j)) + Ch(e(j), f(j), g(j)) + K[i+j] + SCHED1(j); \ + t1 = Sigma0(a(j)) + Maj(a(j), b(j), c(j)); \ + d(j) += t0; \ + h(j) = t0 + t1 + #define RNDN(j) \ + t0 = h(j) + Sigma1(e(j)) + Ch(e(j), f(j), g(j)) + K[i+j] + SCHED(j); \ + t1 = Sigma0(a(j)) + Maj(a(j), b(j), c(j)); \ + d(j) += t0; \ + h(j) = t0 + t1 + + static int Transform_Sha256(wc_Sha256* sha256, const byte* data) + { + word32 S[8], t0, t1; + int i; + word32 W[WC_SHA256_BLOCK_SIZE/sizeof(word32)]; + + /* Copy digest to working vars */ + S[0] = sha256->digest[0]; + S[1] = sha256->digest[1]; + S[2] = sha256->digest[2]; + S[3] = sha256->digest[3]; + S[4] = sha256->digest[4]; + S[5] = sha256->digest[5]; + S[6] = sha256->digest[6]; + S[7] = sha256->digest[7]; + + i = 0; + RND1( 0); RND1( 1); RND1( 2); RND1( 3); + RND1( 4); RND1( 5); RND1( 6); RND1( 7); + RND1( 8); RND1( 9); RND1(10); RND1(11); + RND1(12); RND1(13); RND1(14); RND1(15); + /* 64 operations, partially loop unrolled */ + for (i = 16; i < 64; i += 16) { + RNDN( 0); RNDN( 1); RNDN( 2); RNDN( 3); + RNDN( 4); RNDN( 5); RNDN( 6); RNDN( 7); + RNDN( 8); RNDN( 9); RNDN(10); RNDN(11); + RNDN(12); RNDN(13); RNDN(14); RNDN(15); + } + + /* Add the working vars back into digest */ + sha256->digest[0] += S[0]; + sha256->digest[1] += S[1]; + sha256->digest[2] += S[2]; + sha256->digest[3] += S[3]; + sha256->digest[4] += S[4]; + sha256->digest[5] += S[5]; + sha256->digest[6] += S[6]; + sha256->digest[7] += S[7]; + + return 0; + } +#endif /* SHA256_MANY_REGISTERS */ +#endif +/* End wc_ software implementation */ + + +#ifdef XTRANSFORM + + static WC_INLINE void AddLength(wc_Sha256* sha256, word32 len) + { + word32 tmp = sha256->loLen; + if ((sha256->loLen += len) < tmp) { + sha256->hiLen++; /* carry low to high */ + } + } + + /* do block size increments/updates */ + static WC_INLINE int Sha256Update(wc_Sha256* sha256, const byte* data, word32 len) + { + int ret = 0; + word32 blocksLen; + byte* local; + + if (sha256 == NULL || (data == NULL && len > 0)) { + return BAD_FUNC_ARG; + } + + if (data == NULL && len == 0) { + /* valid, but do nothing */ + return 0; + } + + /* check that internal buffLen is valid */ + if (sha256->buffLen >= WC_SHA256_BLOCK_SIZE) { + return BUFFER_E; + } + + /* add length for final */ + AddLength(sha256, len); + + local = (byte*)sha256->buffer; + + /* process any remainder from previous operation */ + if (sha256->buffLen > 0) { + blocksLen = min(len, WC_SHA256_BLOCK_SIZE - sha256->buffLen); + XMEMCPY(&local[sha256->buffLen], data, blocksLen); + + sha256->buffLen += blocksLen; + data += blocksLen; + len -= blocksLen; + + if (sha256->buffLen == WC_SHA256_BLOCK_SIZE) { + #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) + #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) + if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) + #endif + { + ByteReverseWords(sha256->buffer, sha256->buffer, + WC_SHA256_BLOCK_SIZE); + } + #endif + + #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + if (sha256->ctx.mode == ESP32_SHA_INIT){ + esp_sha_try_hw_lock(&sha256->ctx); + } + if (sha256->ctx.mode == ESP32_SHA_SW){ + ret = XTRANSFORM(sha256, (const byte*)local); + } else { + esp_sha256_process(sha256, (const byte*)local); + } + #else + ret = XTRANSFORM(sha256, (const byte*)local); + #endif + + if (ret == 0) + sha256->buffLen = 0; + else + len = 0; /* error */ + } + } + + /* process blocks */ + #ifdef XTRANSFORM_LEN + #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) + if (Transform_Sha256_Len_p != NULL) + #endif + { + /* get number of blocks */ + /* 64-1 = 0x3F (~ Inverted = 0xFFFFFFC0) */ + /* len (masked by 0xFFFFFFC0) returns block aligned length */ + blocksLen = len & ~(WC_SHA256_BLOCK_SIZE-1); + if (blocksLen > 0) { + /* Byte reversal and alignment handled in function if required */ + XTRANSFORM_LEN(sha256, data, blocksLen); + data += blocksLen; + len -= blocksLen; + } + } + #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) + else + #endif + #endif /* XTRANSFORM_LEN */ + #if !defined(XTRANSFORM_LEN) || defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) + { + while (len >= WC_SHA256_BLOCK_SIZE) { + word32* local32 = sha256->buffer; + /* optimization to avoid memcpy if data pointer is properly aligned */ + /* Intel transform function requires use of sha256->buffer */ + /* Little Endian requires byte swap, so can't use data directly */ + #if defined(WC_HASH_DATA_ALIGNMENT) && !defined(LITTLE_ENDIAN_ORDER) && \ + !defined(HAVE_INTEL_AVX1) && !defined(HAVE_INTEL_AVX2) + if (((size_t)data % WC_HASH_DATA_ALIGNMENT) == 0) { + local32 = (word32*)data; + } + else + #endif + { + XMEMCPY(local32, data, WC_SHA256_BLOCK_SIZE); + } + + data += WC_SHA256_BLOCK_SIZE; + len -= WC_SHA256_BLOCK_SIZE; + + #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) + #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) + if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) + #endif + { + ByteReverseWords(local32, local32, WC_SHA256_BLOCK_SIZE); + } + #endif + + #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + if (sha256->ctx.mode == ESP32_SHA_INIT){ + esp_sha_try_hw_lock(&sha256->ctx); + } + if (sha256->ctx.mode == ESP32_SHA_SW){ + ret = XTRANSFORM(sha256, (const byte*)local32); + } else { + esp_sha256_process(sha256, (const byte*)local32); + } + #else + ret = XTRANSFORM(sha256, (const byte*)local32); + #endif + + if (ret != 0) + break; + } + } + #endif + + /* save remainder */ + if (len > 0) { + XMEMCPY(local, data, len); + sha256->buffLen = len; + } + + return ret; + } + + int wc_Sha256Update(wc_Sha256* sha256, const byte* data, word32 len) + { + if (sha256 == NULL || (data == NULL && len > 0)) { + return BAD_FUNC_ARG; + } + + if (data == NULL && len == 0) { + /* valid, but do nothing */ + return 0; + } + + #ifdef WOLF_CRYPTO_CB + if (sha256->devId != INVALID_DEVID) { + int ret = wc_CryptoCb_Sha256Hash(sha256, data, len, NULL); + if (ret != CRYPTOCB_UNAVAILABLE) + return ret; + /* fall-through when unavailable */ + } + #endif + #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256) + if (sha256->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA256) { + #if defined(HAVE_INTEL_QA) + return IntelQaSymSha256(&sha256->asyncDev, NULL, data, len); + #endif + } + #endif /* WOLFSSL_ASYNC_CRYPT */ + + return Sha256Update(sha256, data, len); + } + + static WC_INLINE int Sha256Final(wc_Sha256* sha256) + { + + int ret; + byte* local; + + if (sha256 == NULL) { + return BAD_FUNC_ARG; + } + + local = (byte*)sha256->buffer; + local[sha256->buffLen++] = 0x80; /* add 1 */ + + /* pad with zeros */ + if (sha256->buffLen > WC_SHA256_PAD_SIZE) { + XMEMSET(&local[sha256->buffLen], 0, + WC_SHA256_BLOCK_SIZE - sha256->buffLen); + sha256->buffLen += WC_SHA256_BLOCK_SIZE - sha256->buffLen; + + #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) + #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) + if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) + #endif + { + ByteReverseWords(sha256->buffer, sha256->buffer, + WC_SHA256_BLOCK_SIZE); + } + #endif + + #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + if (sha256->ctx.mode == ESP32_SHA_INIT) { + esp_sha_try_hw_lock(&sha256->ctx); + } + if (sha256->ctx.mode == ESP32_SHA_SW) { + ret = XTRANSFORM(sha256, (const byte*)local); + } else { + ret = esp_sha256_process(sha256, (const byte*)local); + } + #else + ret = XTRANSFORM(sha256, (const byte*)local); + #endif + if (ret != 0) + return ret; + + sha256->buffLen = 0; + } + XMEMSET(&local[sha256->buffLen], 0, + WC_SHA256_PAD_SIZE - sha256->buffLen); + + /* put lengths in bits */ + sha256->hiLen = (sha256->loLen >> (8 * sizeof(sha256->loLen) - 3)) + + (sha256->hiLen << 3); + sha256->loLen = sha256->loLen << 3; + + /* store lengths */ + #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) + #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) + if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) + #endif + { + ByteReverseWords(sha256->buffer, sha256->buffer, + WC_SHA256_BLOCK_SIZE); + } + #endif + /* ! length ordering dependent on digest endian type ! */ + XMEMCPY(&local[WC_SHA256_PAD_SIZE], &sha256->hiLen, sizeof(word32)); + XMEMCPY(&local[WC_SHA256_PAD_SIZE + sizeof(word32)], &sha256->loLen, + sizeof(word32)); + + #if defined(FREESCALE_MMCAU_SHA) || defined(HAVE_INTEL_AVX1) || \ + defined(HAVE_INTEL_AVX2) + /* Kinetis requires only these bytes reversed */ + #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) + if (IS_INTEL_AVX1(intel_flags) || IS_INTEL_AVX2(intel_flags)) + #endif + { + ByteReverseWords( + &sha256->buffer[WC_SHA256_PAD_SIZE / sizeof(word32)], + &sha256->buffer[WC_SHA256_PAD_SIZE / sizeof(word32)], + 2 * sizeof(word32)); + } + #endif + + #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + if (sha256->ctx.mode == ESP32_SHA_INIT) { + esp_sha_try_hw_lock(&sha256->ctx); + } + if (sha256->ctx.mode == ESP32_SHA_SW) { + ret = XTRANSFORM(sha256, (const byte*)local); + } else { + ret = esp_sha256_digest_process(sha256, 1); + } + #else + ret = XTRANSFORM(sha256, (const byte*)local); + #endif + + return ret; + } + + int wc_Sha256FinalRaw(wc_Sha256* sha256, byte* hash) + { + #ifdef LITTLE_ENDIAN_ORDER + word32 digest[WC_SHA256_DIGEST_SIZE / sizeof(word32)]; + #endif + + if (sha256 == NULL || hash == NULL) { + return BAD_FUNC_ARG; + } + + #ifdef LITTLE_ENDIAN_ORDER + ByteReverseWords((word32*)digest, (word32*)sha256->digest, + WC_SHA256_DIGEST_SIZE); + XMEMCPY(hash, digest, WC_SHA256_DIGEST_SIZE); + #else + XMEMCPY(hash, sha256->digest, WC_SHA256_DIGEST_SIZE); + #endif + + return 0; + } + + int wc_Sha256Final(wc_Sha256* sha256, byte* hash) + { + int ret; + + if (sha256 == NULL || hash == NULL) { + return BAD_FUNC_ARG; + } + + #ifdef WOLF_CRYPTO_CB + if (sha256->devId != INVALID_DEVID) { + ret = wc_CryptoCb_Sha256Hash(sha256, NULL, 0, hash); + if (ret != CRYPTOCB_UNAVAILABLE) + return ret; + /* fall-through when unavailable */ + } + #endif + + #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256) + if (sha256->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA256) { + #if defined(HAVE_INTEL_QA) + return IntelQaSymSha256(&sha256->asyncDev, hash, NULL, + WC_SHA256_DIGEST_SIZE); + #endif + } + #endif /* WOLFSSL_ASYNC_CRYPT */ + + ret = Sha256Final(sha256); + if (ret != 0) + return ret; + + #if defined(LITTLE_ENDIAN_ORDER) + ByteReverseWords(sha256->digest, sha256->digest, WC_SHA256_DIGEST_SIZE); + #endif + XMEMCPY(hash, sha256->digest, WC_SHA256_DIGEST_SIZE); + + return InitSha256(sha256); /* reset state */ + } + +#endif /* XTRANSFORM */ + +#ifdef WOLFSSL_SHA224 + +#ifdef STM32_HASH_SHA2 + + /* Supports CubeMX HAL or Standard Peripheral Library */ + + int wc_InitSha224_ex(wc_Sha224* sha224, void* heap, int devId) + { + if (sha224 == NULL) + return BAD_FUNC_ARG; + + (void)devId; + (void)heap; + + wc_Stm32_Hash_Init(&sha224->stmCtx); + return 0; + } + + int wc_Sha224Update(wc_Sha224* sha224, const byte* data, word32 len) + { + int ret = 0; + + if (sha224 == NULL || (data == NULL && len > 0)) { + return BAD_FUNC_ARG; + } + + ret = wolfSSL_CryptHwMutexLock(); + if (ret == 0) { + ret = wc_Stm32_Hash_Update(&sha224->stmCtx, + HASH_AlgoSelection_SHA224, data, len); + wolfSSL_CryptHwMutexUnLock(); + } + return ret; + } + + int wc_Sha224Final(wc_Sha224* sha224, byte* hash) + { + int ret = 0; + + if (sha224 == NULL || hash == NULL) { + return BAD_FUNC_ARG; + } + + ret = wolfSSL_CryptHwMutexLock(); + if (ret == 0) { + ret = wc_Stm32_Hash_Final(&sha224->stmCtx, + HASH_AlgoSelection_SHA224, hash, WC_SHA224_DIGEST_SIZE); + wolfSSL_CryptHwMutexUnLock(); + } + + (void)wc_InitSha224(sha224); /* reset state */ + + return ret; + } + +#elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_HASH) + /* functions defined in wolfcrypt/src/port/caam/caam_sha256.c */ + +#elif defined(WOLFSSL_AFALG_HASH) + #error SHA224 currently not supported with AF_ALG enabled + +#elif defined(WOLFSSL_DEVCRYPTO_HASH) + /* implemented in wolfcrypt/src/port/devcrypto/devcrypt_hash.c */ + +#else + + #define NEED_SOFT_SHA224 + + + static int InitSha224(wc_Sha224* sha224) + { + int ret = 0; + + if (sha224 == NULL) { + return BAD_FUNC_ARG; + } + + sha224->digest[0] = 0xc1059ed8; + sha224->digest[1] = 0x367cd507; + sha224->digest[2] = 0x3070dd17; + sha224->digest[3] = 0xf70e5939; + sha224->digest[4] = 0xffc00b31; + sha224->digest[5] = 0x68581511; + sha224->digest[6] = 0x64f98fa7; + sha224->digest[7] = 0xbefa4fa4; + + sha224->buffLen = 0; + sha224->loLen = 0; + sha224->hiLen = 0; + + #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2) + /* choose best Transform function under this runtime environment */ + Sha256_SetTransform(); + #endif + #if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB) + sha224->flags = 0; + #endif + + return ret; + } + +#endif + +#ifdef NEED_SOFT_SHA224 + int wc_InitSha224_ex(wc_Sha224* sha224, void* heap, int devId) + { + int ret = 0; + + if (sha224 == NULL) + return BAD_FUNC_ARG; + + sha224->heap = heap; + + ret = InitSha224(sha224); + if (ret != 0) + return ret; + + #ifdef WOLFSSL_SMALL_STACK_CACHE + sha224->W = NULL; + #endif + + #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA224) + ret = wolfAsync_DevCtxInit(&sha224->asyncDev, + WOLFSSL_ASYNC_MARKER_SHA224, sha224->heap, devId); + #else + (void)devId; + #endif /* WOLFSSL_ASYNC_CRYPT */ + + return ret; + } + + int wc_Sha224Update(wc_Sha224* sha224, const byte* data, word32 len) + { + int ret; + + if (sha224 == NULL || (data == NULL && len > 0)) { + return BAD_FUNC_ARG; + } + + #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA224) + if (sha224->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA224) { + #if defined(HAVE_INTEL_QA) + return IntelQaSymSha224(&sha224->asyncDev, NULL, data, len); + #endif + } + #endif /* WOLFSSL_ASYNC_CRYPT */ + + ret = Sha256Update((wc_Sha256*)sha224, data, len); + + return ret; + } + + int wc_Sha224Final(wc_Sha224* sha224, byte* hash) + { + int ret; + + if (sha224 == NULL || hash == NULL) { + return BAD_FUNC_ARG; + } + + #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA224) + if (sha224->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA224) { + #if defined(HAVE_INTEL_QA) + return IntelQaSymSha224(&sha224->asyncDev, hash, NULL, + WC_SHA224_DIGEST_SIZE); + #endif + } + #endif /* WOLFSSL_ASYNC_CRYPT */ + + ret = Sha256Final((wc_Sha256*)sha224); + if (ret != 0) + return ret; + + #if defined(LITTLE_ENDIAN_ORDER) + ByteReverseWords(sha224->digest, sha224->digest, WC_SHA224_DIGEST_SIZE); + #endif + XMEMCPY(hash, sha224->digest, WC_SHA224_DIGEST_SIZE); + + return InitSha224(sha224); /* reset state */ + } +#endif /* end of SHA224 software implementation */ + + int wc_InitSha224(wc_Sha224* sha224) + { + return wc_InitSha224_ex(sha224, NULL, INVALID_DEVID); + } + + void wc_Sha224Free(wc_Sha224* sha224) + { + if (sha224 == NULL) + return; + +#ifdef WOLFSSL_SMALL_STACK_CACHE + if (sha224->W != NULL) { + XFREE(sha224->W, NULL, DYNAMIC_TYPE_DIGEST); + sha224->W = NULL; + } +#endif + + #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA224) + wolfAsync_DevCtxFree(&sha224->asyncDev, WOLFSSL_ASYNC_MARKER_SHA224); + #endif /* WOLFSSL_ASYNC_CRYPT */ + + #ifdef WOLFSSL_PIC32MZ_HASH + wc_Sha256Pic32Free(sha224); + #endif + } +#endif /* WOLFSSL_SHA224 */ + + +int wc_InitSha256(wc_Sha256* sha256) +{ + return wc_InitSha256_ex(sha256, NULL, INVALID_DEVID); +} + +void wc_Sha256Free(wc_Sha256* sha256) +{ + if (sha256 == NULL) + return; + +#ifdef WOLFSSL_SMALL_STACK_CACHE + if (sha256->W != NULL) { + XFREE(sha256->W, NULL, DYNAMIC_TYPE_DIGEST); + sha256->W = NULL; + } +#endif + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256) + wolfAsync_DevCtxFree(&sha256->asyncDev, WOLFSSL_ASYNC_MARKER_SHA256); +#endif /* WOLFSSL_ASYNC_CRYPT */ +#ifdef WOLFSSL_PIC32MZ_HASH + wc_Sha256Pic32Free(sha256); +#endif +#if defined(WOLFSSL_AFALG_HASH) + if (sha256->alFd > 0) { + close(sha256->alFd); + sha256->alFd = -1; /* avoid possible double close on socket */ + } + if (sha256->rdFd > 0) { + close(sha256->rdFd); + sha256->rdFd = -1; /* avoid possible double close on socket */ + } +#endif /* WOLFSSL_AFALG_HASH */ +#ifdef WOLFSSL_DEVCRYPTO_HASH + wc_DevCryptoFree(&sha256->ctx); +#endif /* WOLFSSL_DEVCRYPTO */ +#if (defined(WOLFSSL_AFALG_HASH) && defined(WOLFSSL_AFALG_HASH_KEEP)) || \ + (defined(WOLFSSL_DEVCRYPTO_HASH) && defined(WOLFSSL_DEVCRYPTO_HASH_KEEP)) || \ + (defined(WOLFSSL_RENESAS_TSIP_CRYPT) && \ + !defined(NO_WOLFSSL_RENESAS_TSIP_CRYPT_HASH)) + if (sha256->msg != NULL) { + XFREE(sha256->msg, sha256->heap, DYNAMIC_TYPE_TMP_BUFFER); + sha256->msg = NULL; + } +#endif +} + +#endif /* !WOLFSSL_TI_HASH */ +#endif /* HAVE_FIPS */ + + +#ifndef WOLFSSL_TI_HASH +#ifdef WOLFSSL_SHA224 + int wc_Sha224GetHash(wc_Sha224* sha224, byte* hash) + { + int ret; + wc_Sha224 tmpSha224; + + if (sha224 == NULL || hash == NULL) + return BAD_FUNC_ARG; + + ret = wc_Sha224Copy(sha224, &tmpSha224); + if (ret == 0) { + ret = wc_Sha224Final(&tmpSha224, hash); + wc_Sha224Free(&tmpSha224); + } + return ret; + } + int wc_Sha224Copy(wc_Sha224* src, wc_Sha224* dst) + { + int ret = 0; + + if (src == NULL || dst == NULL) + return BAD_FUNC_ARG; + + XMEMCPY(dst, src, sizeof(wc_Sha224)); + #ifdef WOLFSSL_SMALL_STACK_CACHE + dst->W = NULL; + #endif + + #ifdef WOLFSSL_ASYNC_CRYPT + ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev); + #endif + #if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB) + dst->flags |= WC_HASH_FLAG_ISCOPY; + #endif + + return ret; + } + +#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB) + int wc_Sha224SetFlags(wc_Sha224* sha224, word32 flags) + { + if (sha224) { + sha224->flags = flags; + } + return 0; + } + int wc_Sha224GetFlags(wc_Sha224* sha224, word32* flags) + { + if (sha224 && flags) { + *flags = sha224->flags; + } + return 0; + } +#endif + +#endif /* WOLFSSL_SHA224 */ + +#ifdef WOLFSSL_AFALG_HASH + /* implemented in wolfcrypt/src/port/af_alg/afalg_hash.c */ + +#elif defined(WOLFSSL_DEVCRYPTO_HASH) + /* implemented in wolfcrypt/src/port/devcrypto/devcrypt_hash.c */ + +#elif defined(WOLFSSL_RENESAS_TSIP_CRYPT) && \ + !defined(NO_WOLFSSL_RENESAS_TSIP_CRYPT_HASH) + + /* implemented in wolfcrypt/src/port/Renesas/renesas_tsip_sha.c */ +#else + +int wc_Sha256GetHash(wc_Sha256* sha256, byte* hash) +{ + int ret; + wc_Sha256 tmpSha256; + + if (sha256 == NULL || hash == NULL) + return BAD_FUNC_ARG; + +#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + if(sha256->ctx.mode == ESP32_SHA_INIT){ + esp_sha_try_hw_lock(&sha256->ctx); + } + if(sha256->ctx.mode == ESP32_SHA_HW) + { + esp_sha256_digest_process(sha256, 0); + } +#endif + ret = wc_Sha256Copy(sha256, &tmpSha256); + if (ret == 0) { + ret = wc_Sha256Final(&tmpSha256, hash); +#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + sha256->ctx.mode = ESP32_SHA_SW; +#endif + + wc_Sha256Free(&tmpSha256); + } + return ret; +} +int wc_Sha256Copy(wc_Sha256* src, wc_Sha256* dst) +{ + int ret = 0; + + if (src == NULL || dst == NULL) + return BAD_FUNC_ARG; + + XMEMCPY(dst, src, sizeof(wc_Sha256)); +#ifdef WOLFSSL_SMALL_STACK_CACHE + dst->W = NULL; +#endif + +#ifdef WOLFSSL_ASYNC_CRYPT + ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev); +#endif +#ifdef WOLFSSL_PIC32MZ_HASH + ret = wc_Pic32HashCopy(&src->cache, &dst->cache); +#endif +#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + dst->ctx.mode = src->ctx.mode; + dst->ctx.isfirstblock = src->ctx.isfirstblock; + dst->ctx.sha_type = src->ctx.sha_type; +#endif +#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB) + dst->flags |= WC_HASH_FLAG_ISCOPY; +#endif + + return ret; +} +#endif + +#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB) +int wc_Sha256SetFlags(wc_Sha256* sha256, word32 flags) +{ + if (sha256) { + sha256->flags = flags; + } + return 0; +} +int wc_Sha256GetFlags(wc_Sha256* sha256, word32* flags) +{ + if (sha256 && flags) { + *flags = sha256->flags; + } + return 0; +} +#endif +#endif /* !WOLFSSL_TI_HASH */ + +#endif /* NO_SHA256 */ diff --git a/client/wolfssl/wolfcrypt/src/sha256_asm.S b/client/wolfssl/wolfcrypt/src/sha256_asm.S new file mode 100644 index 0000000..c433d34 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/sha256_asm.S @@ -0,0 +1,22653 @@ +/* sha256_asm + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#ifndef HAVE_INTEL_AVX1 +#define HAVE_INTEL_AVX1 +#endif /* HAVE_INTEL_AVX1 */ +#ifndef NO_AVX2_SUPPORT +#define HAVE_INTEL_AVX2 +#endif /* NO_AVX2_SUPPORT */ + +#ifdef HAVE_INTEL_AVX1 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +L_avx1_sha256_k: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0xfc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x6ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx1_sha256_shuf_00BA: +.quad 0xb0a090803020100, 0xffffffffffffffff +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx1_sha256_shuf_DC00: +.quad 0xffffffffffffffff, 0xb0a090803020100 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx1_sha256_flip_mask: +.quad 0x405060700010203, 0xc0d0e0f08090a0b +#ifndef __APPLE__ +.text +.globl Transform_Sha256_AVX1 +.type Transform_Sha256_AVX1,@function +.align 4 +Transform_Sha256_AVX1: +#else +.section __TEXT,__text +.globl _Transform_Sha256_AVX1 +.p2align 2 +_Transform_Sha256_AVX1: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $0x40, %rsp + leaq 32(%rdi), %rax + vmovdqa L_avx1_sha256_flip_mask(%rip), %xmm13 + vmovdqa L_avx1_sha256_shuf_00BA(%rip), %xmm11 + vmovdqa L_avx1_sha256_shuf_DC00(%rip), %xmm12 + movl (%rdi), %r8d + movl 4(%rdi), %r9d + movl 8(%rdi), %r10d + movl 12(%rdi), %r11d + movl 16(%rdi), %r12d + movl 20(%rdi), %r13d + movl 24(%rdi), %r14d + movl 28(%rdi), %r15d + # X0, X1, X2, X3 = W[0..15] + vmovdqu (%rax), %xmm0 + vmovdqu 16(%rax), %xmm1 + vpshufb %xmm13, %xmm0, %xmm0 + vpshufb %xmm13, %xmm1, %xmm1 + vmovdqu 32(%rax), %xmm2 + vmovdqu 48(%rax), %xmm3 + vpshufb %xmm13, %xmm2, %xmm2 + vpshufb %xmm13, %xmm3, %xmm3 + movl %r9d, %ebx + movl %r12d, %edx + xorl %r10d, %ebx + # set_w_k_xfer_4: 0 + vpaddd 0+L_avx1_sha256_k(%rip), %xmm0, %xmm4 + vpaddd 16+L_avx1_sha256_k(%rip), %xmm1, %xmm5 + vmovdqu %xmm4, (%rsp) + vmovdqu %xmm5, 16(%rsp) + vpaddd 32+L_avx1_sha256_k(%rip), %xmm2, %xmm6 + vpaddd 48+L_avx1_sha256_k(%rip), %xmm3, %xmm7 + vmovdqu %xmm6, 32(%rsp) + vmovdqu %xmm7, 48(%rsp) + # msg_sched: 0-3 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm0, %xmm1, %xmm5 + vpalignr $4, %xmm2, %xmm3, %xmm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl (%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 4(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm3, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm0, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 8(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 12(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %xmm4, %xmm9, %xmm0 + # msg_sched done: 0-3 + # msg_sched: 4-7 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm1, %xmm2, %xmm5 + vpalignr $4, %xmm3, %xmm0, %xmm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 16(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 20(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm0, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm1, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 24(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 28(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %xmm4, %xmm9, %xmm1 + # msg_sched done: 4-7 + # msg_sched: 8-11 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm2, %xmm3, %xmm5 + vpalignr $4, %xmm0, %xmm1, %xmm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl 32(%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 36(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm1, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm2, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 40(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 44(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %xmm4, %xmm9, %xmm2 + # msg_sched done: 8-11 + # msg_sched: 12-15 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm3, %xmm0, %xmm5 + vpalignr $4, %xmm1, %xmm2, %xmm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 48(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 52(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm2, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm3, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 56(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 60(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %xmm4, %xmm9, %xmm3 + # msg_sched done: 12-15 + # set_w_k_xfer_4: 4 + vpaddd 64+L_avx1_sha256_k(%rip), %xmm0, %xmm4 + vpaddd 80+L_avx1_sha256_k(%rip), %xmm1, %xmm5 + vmovdqu %xmm4, (%rsp) + vmovdqu %xmm5, 16(%rsp) + vpaddd 96+L_avx1_sha256_k(%rip), %xmm2, %xmm6 + vpaddd 112+L_avx1_sha256_k(%rip), %xmm3, %xmm7 + vmovdqu %xmm6, 32(%rsp) + vmovdqu %xmm7, 48(%rsp) + # msg_sched: 0-3 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm0, %xmm1, %xmm5 + vpalignr $4, %xmm2, %xmm3, %xmm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl (%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 4(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm3, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm0, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 8(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 12(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %xmm4, %xmm9, %xmm0 + # msg_sched done: 0-3 + # msg_sched: 4-7 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm1, %xmm2, %xmm5 + vpalignr $4, %xmm3, %xmm0, %xmm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 16(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 20(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm0, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm1, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 24(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 28(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %xmm4, %xmm9, %xmm1 + # msg_sched done: 4-7 + # msg_sched: 8-11 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm2, %xmm3, %xmm5 + vpalignr $4, %xmm0, %xmm1, %xmm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl 32(%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 36(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm1, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm2, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 40(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 44(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %xmm4, %xmm9, %xmm2 + # msg_sched done: 8-11 + # msg_sched: 12-15 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm3, %xmm0, %xmm5 + vpalignr $4, %xmm1, %xmm2, %xmm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 48(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 52(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm2, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm3, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 56(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 60(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %xmm4, %xmm9, %xmm3 + # msg_sched done: 12-15 + # set_w_k_xfer_4: 8 + vpaddd 128+L_avx1_sha256_k(%rip), %xmm0, %xmm4 + vpaddd 144+L_avx1_sha256_k(%rip), %xmm1, %xmm5 + vmovdqu %xmm4, (%rsp) + vmovdqu %xmm5, 16(%rsp) + vpaddd 160+L_avx1_sha256_k(%rip), %xmm2, %xmm6 + vpaddd 176+L_avx1_sha256_k(%rip), %xmm3, %xmm7 + vmovdqu %xmm6, 32(%rsp) + vmovdqu %xmm7, 48(%rsp) + # msg_sched: 0-3 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm0, %xmm1, %xmm5 + vpalignr $4, %xmm2, %xmm3, %xmm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl (%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 4(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm3, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm0, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 8(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 12(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %xmm4, %xmm9, %xmm0 + # msg_sched done: 0-3 + # msg_sched: 4-7 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm1, %xmm2, %xmm5 + vpalignr $4, %xmm3, %xmm0, %xmm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 16(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 20(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm0, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm1, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 24(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 28(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %xmm4, %xmm9, %xmm1 + # msg_sched done: 4-7 + # msg_sched: 8-11 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm2, %xmm3, %xmm5 + vpalignr $4, %xmm0, %xmm1, %xmm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl 32(%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 36(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm1, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm2, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 40(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 44(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %xmm4, %xmm9, %xmm2 + # msg_sched done: 8-11 + # msg_sched: 12-15 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm3, %xmm0, %xmm5 + vpalignr $4, %xmm1, %xmm2, %xmm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 48(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 52(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm2, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm3, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 56(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 60(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %xmm4, %xmm9, %xmm3 + # msg_sched done: 12-15 + # set_w_k_xfer_4: 12 + vpaddd 192+L_avx1_sha256_k(%rip), %xmm0, %xmm4 + vpaddd 208+L_avx1_sha256_k(%rip), %xmm1, %xmm5 + vmovdqu %xmm4, (%rsp) + vmovdqu %xmm5, 16(%rsp) + vpaddd 224+L_avx1_sha256_k(%rip), %xmm2, %xmm6 + vpaddd 240+L_avx1_sha256_k(%rip), %xmm3, %xmm7 + vmovdqu %xmm6, 32(%rsp) + vmovdqu %xmm7, 48(%rsp) + # rnd_all_4: 0-3 + addl (%rsp), %r15d + movl %r13d, %ecx + movl %r9d, %eax + xorl %r14d, %ecx + rorl $14, %edx + andl %r12d, %ecx + xorl %r12d, %edx + xorl %r14d, %ecx + rorl $5, %edx + addl %ecx, %r15d + xorl %r12d, %edx + xorl %r8d, %eax + rorl $6, %edx + movl %r8d, %ecx + addl %edx, %r15d + rorl $9, %ecx + andl %eax, %ebx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + addl 4(%rsp), %r14d + movl %r12d, %ecx + movl %r8d, %ebx + xorl %r13d, %ecx + rorl $14, %edx + andl %r11d, %ecx + xorl %r11d, %edx + xorl %r13d, %ecx + rorl $5, %edx + addl %ecx, %r14d + xorl %r11d, %edx + xorl %r15d, %ebx + rorl $6, %edx + movl %r15d, %ecx + addl %edx, %r14d + rorl $9, %ecx + andl %ebx, %eax + xorl %r15d, %ecx + xorl %r8d, %eax + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + addl 8(%rsp), %r13d + movl %r11d, %ecx + movl %r15d, %eax + xorl %r12d, %ecx + rorl $14, %edx + andl %r10d, %ecx + xorl %r10d, %edx + xorl %r12d, %ecx + rorl $5, %edx + addl %ecx, %r13d + xorl %r10d, %edx + xorl %r14d, %eax + rorl $6, %edx + movl %r14d, %ecx + addl %edx, %r13d + rorl $9, %ecx + andl %eax, %ebx + xorl %r14d, %ecx + xorl %r15d, %ebx + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + addl 12(%rsp), %r12d + movl %r10d, %ecx + movl %r14d, %ebx + xorl %r11d, %ecx + rorl $14, %edx + andl %r9d, %ecx + xorl %r9d, %edx + xorl %r11d, %ecx + rorl $5, %edx + addl %ecx, %r12d + xorl %r9d, %edx + xorl %r13d, %ebx + rorl $6, %edx + movl %r13d, %ecx + addl %edx, %r12d + rorl $9, %ecx + andl %ebx, %eax + xorl %r13d, %ecx + xorl %r14d, %eax + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + # rnd_all_4: 1-4 + addl 16(%rsp), %r11d + movl %r9d, %ecx + movl %r13d, %eax + xorl %r10d, %ecx + rorl $14, %edx + andl %r8d, %ecx + xorl %r8d, %edx + xorl %r10d, %ecx + rorl $5, %edx + addl %ecx, %r11d + xorl %r8d, %edx + xorl %r12d, %eax + rorl $6, %edx + movl %r12d, %ecx + addl %edx, %r11d + rorl $9, %ecx + andl %eax, %ebx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + addl 20(%rsp), %r10d + movl %r8d, %ecx + movl %r12d, %ebx + xorl %r9d, %ecx + rorl $14, %edx + andl %r15d, %ecx + xorl %r15d, %edx + xorl %r9d, %ecx + rorl $5, %edx + addl %ecx, %r10d + xorl %r15d, %edx + xorl %r11d, %ebx + rorl $6, %edx + movl %r11d, %ecx + addl %edx, %r10d + rorl $9, %ecx + andl %ebx, %eax + xorl %r11d, %ecx + xorl %r12d, %eax + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + addl 24(%rsp), %r9d + movl %r15d, %ecx + movl %r11d, %eax + xorl %r8d, %ecx + rorl $14, %edx + andl %r14d, %ecx + xorl %r14d, %edx + xorl %r8d, %ecx + rorl $5, %edx + addl %ecx, %r9d + xorl %r14d, %edx + xorl %r10d, %eax + rorl $6, %edx + movl %r10d, %ecx + addl %edx, %r9d + rorl $9, %ecx + andl %eax, %ebx + xorl %r10d, %ecx + xorl %r11d, %ebx + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + addl 28(%rsp), %r8d + movl %r14d, %ecx + movl %r10d, %ebx + xorl %r15d, %ecx + rorl $14, %edx + andl %r13d, %ecx + xorl %r13d, %edx + xorl %r15d, %ecx + rorl $5, %edx + addl %ecx, %r8d + xorl %r13d, %edx + xorl %r9d, %ebx + rorl $6, %edx + movl %r9d, %ecx + addl %edx, %r8d + rorl $9, %ecx + andl %ebx, %eax + xorl %r9d, %ecx + xorl %r10d, %eax + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + # rnd_all_4: 2-5 + addl 32(%rsp), %r15d + movl %r13d, %ecx + movl %r9d, %eax + xorl %r14d, %ecx + rorl $14, %edx + andl %r12d, %ecx + xorl %r12d, %edx + xorl %r14d, %ecx + rorl $5, %edx + addl %ecx, %r15d + xorl %r12d, %edx + xorl %r8d, %eax + rorl $6, %edx + movl %r8d, %ecx + addl %edx, %r15d + rorl $9, %ecx + andl %eax, %ebx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + addl 36(%rsp), %r14d + movl %r12d, %ecx + movl %r8d, %ebx + xorl %r13d, %ecx + rorl $14, %edx + andl %r11d, %ecx + xorl %r11d, %edx + xorl %r13d, %ecx + rorl $5, %edx + addl %ecx, %r14d + xorl %r11d, %edx + xorl %r15d, %ebx + rorl $6, %edx + movl %r15d, %ecx + addl %edx, %r14d + rorl $9, %ecx + andl %ebx, %eax + xorl %r15d, %ecx + xorl %r8d, %eax + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + addl 40(%rsp), %r13d + movl %r11d, %ecx + movl %r15d, %eax + xorl %r12d, %ecx + rorl $14, %edx + andl %r10d, %ecx + xorl %r10d, %edx + xorl %r12d, %ecx + rorl $5, %edx + addl %ecx, %r13d + xorl %r10d, %edx + xorl %r14d, %eax + rorl $6, %edx + movl %r14d, %ecx + addl %edx, %r13d + rorl $9, %ecx + andl %eax, %ebx + xorl %r14d, %ecx + xorl %r15d, %ebx + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + addl 44(%rsp), %r12d + movl %r10d, %ecx + movl %r14d, %ebx + xorl %r11d, %ecx + rorl $14, %edx + andl %r9d, %ecx + xorl %r9d, %edx + xorl %r11d, %ecx + rorl $5, %edx + addl %ecx, %r12d + xorl %r9d, %edx + xorl %r13d, %ebx + rorl $6, %edx + movl %r13d, %ecx + addl %edx, %r12d + rorl $9, %ecx + andl %ebx, %eax + xorl %r13d, %ecx + xorl %r14d, %eax + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + # rnd_all_4: 3-6 + addl 48(%rsp), %r11d + movl %r9d, %ecx + movl %r13d, %eax + xorl %r10d, %ecx + rorl $14, %edx + andl %r8d, %ecx + xorl %r8d, %edx + xorl %r10d, %ecx + rorl $5, %edx + addl %ecx, %r11d + xorl %r8d, %edx + xorl %r12d, %eax + rorl $6, %edx + movl %r12d, %ecx + addl %edx, %r11d + rorl $9, %ecx + andl %eax, %ebx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + addl 52(%rsp), %r10d + movl %r8d, %ecx + movl %r12d, %ebx + xorl %r9d, %ecx + rorl $14, %edx + andl %r15d, %ecx + xorl %r15d, %edx + xorl %r9d, %ecx + rorl $5, %edx + addl %ecx, %r10d + xorl %r15d, %edx + xorl %r11d, %ebx + rorl $6, %edx + movl %r11d, %ecx + addl %edx, %r10d + rorl $9, %ecx + andl %ebx, %eax + xorl %r11d, %ecx + xorl %r12d, %eax + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + addl 56(%rsp), %r9d + movl %r15d, %ecx + movl %r11d, %eax + xorl %r8d, %ecx + rorl $14, %edx + andl %r14d, %ecx + xorl %r14d, %edx + xorl %r8d, %ecx + rorl $5, %edx + addl %ecx, %r9d + xorl %r14d, %edx + xorl %r10d, %eax + rorl $6, %edx + movl %r10d, %ecx + addl %edx, %r9d + rorl $9, %ecx + andl %eax, %ebx + xorl %r10d, %ecx + xorl %r11d, %ebx + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + addl 60(%rsp), %r8d + movl %r14d, %ecx + movl %r10d, %ebx + xorl %r15d, %ecx + rorl $14, %edx + andl %r13d, %ecx + xorl %r13d, %edx + xorl %r15d, %ecx + rorl $5, %edx + addl %ecx, %r8d + xorl %r13d, %edx + xorl %r9d, %ebx + rorl $6, %edx + movl %r9d, %ecx + addl %edx, %r8d + rorl $9, %ecx + andl %ebx, %eax + xorl %r9d, %ecx + xorl %r10d, %eax + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + addl %r8d, (%rdi) + addl %r9d, 4(%rdi) + addl %r10d, 8(%rdi) + addl %r11d, 12(%rdi) + addl %r12d, 16(%rdi) + addl %r13d, 20(%rdi) + addl %r14d, 24(%rdi) + addl %r15d, 28(%rdi) + xorq %rax, %rax + vzeroupper + addq $0x40, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size Transform_Sha256_AVX1,.-Transform_Sha256_AVX1 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl Transform_Sha256_AVX1_Len +.type Transform_Sha256_AVX1_Len,@function +.align 4 +Transform_Sha256_AVX1_Len: +#else +.section __TEXT,__text +.globl _Transform_Sha256_AVX1_Len +.p2align 2 +_Transform_Sha256_AVX1_Len: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq %rbp + movq %rsi, %rbp + movq %rdx, %rsi + subq $0x40, %rsp + vmovdqa L_avx1_sha256_flip_mask(%rip), %xmm13 + vmovdqa L_avx1_sha256_shuf_00BA(%rip), %xmm11 + vmovdqa L_avx1_sha256_shuf_DC00(%rip), %xmm12 + movl (%rdi), %r8d + movl 4(%rdi), %r9d + movl 8(%rdi), %r10d + movl 12(%rdi), %r11d + movl 16(%rdi), %r12d + movl 20(%rdi), %r13d + movl 24(%rdi), %r14d + movl 28(%rdi), %r15d + # Start of loop processing a block +L_sha256_len_avx1_start: + # X0, X1, X2, X3 = W[0..15] + vmovdqu (%rbp), %xmm0 + vmovdqu 16(%rbp), %xmm1 + vpshufb %xmm13, %xmm0, %xmm0 + vpshufb %xmm13, %xmm1, %xmm1 + vmovdqu 32(%rbp), %xmm2 + vmovdqu 48(%rbp), %xmm3 + vpshufb %xmm13, %xmm2, %xmm2 + vpshufb %xmm13, %xmm3, %xmm3 + movl %r9d, %ebx + movl %r12d, %edx + xorl %r10d, %ebx + # set_w_k_xfer_4: 0 + vpaddd 0+L_avx1_sha256_k(%rip), %xmm0, %xmm4 + vpaddd 16+L_avx1_sha256_k(%rip), %xmm1, %xmm5 + vmovdqu %xmm4, (%rsp) + vmovdqu %xmm5, 16(%rsp) + vpaddd 32+L_avx1_sha256_k(%rip), %xmm2, %xmm6 + vpaddd 48+L_avx1_sha256_k(%rip), %xmm3, %xmm7 + vmovdqu %xmm6, 32(%rsp) + vmovdqu %xmm7, 48(%rsp) + # msg_sched: 0-3 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm0, %xmm1, %xmm5 + vpalignr $4, %xmm2, %xmm3, %xmm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl (%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 4(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm3, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm0, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 8(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 12(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %xmm4, %xmm9, %xmm0 + # msg_sched done: 0-3 + # msg_sched: 4-7 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm1, %xmm2, %xmm5 + vpalignr $4, %xmm3, %xmm0, %xmm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 16(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 20(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm0, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm1, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 24(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 28(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %xmm4, %xmm9, %xmm1 + # msg_sched done: 4-7 + # msg_sched: 8-11 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm2, %xmm3, %xmm5 + vpalignr $4, %xmm0, %xmm1, %xmm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl 32(%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 36(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm1, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm2, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 40(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 44(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %xmm4, %xmm9, %xmm2 + # msg_sched done: 8-11 + # msg_sched: 12-15 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm3, %xmm0, %xmm5 + vpalignr $4, %xmm1, %xmm2, %xmm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 48(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 52(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm2, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm3, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 56(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 60(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %xmm4, %xmm9, %xmm3 + # msg_sched done: 12-15 + # set_w_k_xfer_4: 4 + vpaddd 64+L_avx1_sha256_k(%rip), %xmm0, %xmm4 + vpaddd 80+L_avx1_sha256_k(%rip), %xmm1, %xmm5 + vmovdqu %xmm4, (%rsp) + vmovdqu %xmm5, 16(%rsp) + vpaddd 96+L_avx1_sha256_k(%rip), %xmm2, %xmm6 + vpaddd 112+L_avx1_sha256_k(%rip), %xmm3, %xmm7 + vmovdqu %xmm6, 32(%rsp) + vmovdqu %xmm7, 48(%rsp) + # msg_sched: 0-3 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm0, %xmm1, %xmm5 + vpalignr $4, %xmm2, %xmm3, %xmm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl (%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 4(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm3, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm0, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 8(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 12(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %xmm4, %xmm9, %xmm0 + # msg_sched done: 0-3 + # msg_sched: 4-7 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm1, %xmm2, %xmm5 + vpalignr $4, %xmm3, %xmm0, %xmm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 16(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 20(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm0, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm1, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 24(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 28(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %xmm4, %xmm9, %xmm1 + # msg_sched done: 4-7 + # msg_sched: 8-11 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm2, %xmm3, %xmm5 + vpalignr $4, %xmm0, %xmm1, %xmm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl 32(%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 36(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm1, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm2, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 40(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 44(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %xmm4, %xmm9, %xmm2 + # msg_sched done: 8-11 + # msg_sched: 12-15 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm3, %xmm0, %xmm5 + vpalignr $4, %xmm1, %xmm2, %xmm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 48(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 52(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm2, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm3, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 56(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 60(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %xmm4, %xmm9, %xmm3 + # msg_sched done: 12-15 + # set_w_k_xfer_4: 8 + vpaddd 128+L_avx1_sha256_k(%rip), %xmm0, %xmm4 + vpaddd 144+L_avx1_sha256_k(%rip), %xmm1, %xmm5 + vmovdqu %xmm4, (%rsp) + vmovdqu %xmm5, 16(%rsp) + vpaddd 160+L_avx1_sha256_k(%rip), %xmm2, %xmm6 + vpaddd 176+L_avx1_sha256_k(%rip), %xmm3, %xmm7 + vmovdqu %xmm6, 32(%rsp) + vmovdqu %xmm7, 48(%rsp) + # msg_sched: 0-3 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm0, %xmm1, %xmm5 + vpalignr $4, %xmm2, %xmm3, %xmm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl (%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 4(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm3, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm0, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 8(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 12(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %xmm4, %xmm9, %xmm0 + # msg_sched done: 0-3 + # msg_sched: 4-7 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm1, %xmm2, %xmm5 + vpalignr $4, %xmm3, %xmm0, %xmm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 16(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 20(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm0, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm1, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 24(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 28(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %xmm4, %xmm9, %xmm1 + # msg_sched done: 4-7 + # msg_sched: 8-11 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm2, %xmm3, %xmm5 + vpalignr $4, %xmm0, %xmm1, %xmm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl 32(%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 36(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm1, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm2, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 40(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 44(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %xmm4, %xmm9, %xmm2 + # msg_sched done: 8-11 + # msg_sched: 12-15 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm3, %xmm0, %xmm5 + vpalignr $4, %xmm1, %xmm2, %xmm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 48(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 52(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm2, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm3, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 56(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 60(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %xmm4, %xmm9, %xmm3 + # msg_sched done: 12-15 + # set_w_k_xfer_4: 12 + vpaddd 192+L_avx1_sha256_k(%rip), %xmm0, %xmm4 + vpaddd 208+L_avx1_sha256_k(%rip), %xmm1, %xmm5 + vmovdqu %xmm4, (%rsp) + vmovdqu %xmm5, 16(%rsp) + vpaddd 224+L_avx1_sha256_k(%rip), %xmm2, %xmm6 + vpaddd 240+L_avx1_sha256_k(%rip), %xmm3, %xmm7 + vmovdqu %xmm6, 32(%rsp) + vmovdqu %xmm7, 48(%rsp) + # rnd_all_4: 0-3 + addl (%rsp), %r15d + movl %r13d, %ecx + movl %r9d, %eax + xorl %r14d, %ecx + rorl $14, %edx + andl %r12d, %ecx + xorl %r12d, %edx + xorl %r14d, %ecx + rorl $5, %edx + addl %ecx, %r15d + xorl %r12d, %edx + xorl %r8d, %eax + rorl $6, %edx + movl %r8d, %ecx + addl %edx, %r15d + rorl $9, %ecx + andl %eax, %ebx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + addl 4(%rsp), %r14d + movl %r12d, %ecx + movl %r8d, %ebx + xorl %r13d, %ecx + rorl $14, %edx + andl %r11d, %ecx + xorl %r11d, %edx + xorl %r13d, %ecx + rorl $5, %edx + addl %ecx, %r14d + xorl %r11d, %edx + xorl %r15d, %ebx + rorl $6, %edx + movl %r15d, %ecx + addl %edx, %r14d + rorl $9, %ecx + andl %ebx, %eax + xorl %r15d, %ecx + xorl %r8d, %eax + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + addl 8(%rsp), %r13d + movl %r11d, %ecx + movl %r15d, %eax + xorl %r12d, %ecx + rorl $14, %edx + andl %r10d, %ecx + xorl %r10d, %edx + xorl %r12d, %ecx + rorl $5, %edx + addl %ecx, %r13d + xorl %r10d, %edx + xorl %r14d, %eax + rorl $6, %edx + movl %r14d, %ecx + addl %edx, %r13d + rorl $9, %ecx + andl %eax, %ebx + xorl %r14d, %ecx + xorl %r15d, %ebx + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + addl 12(%rsp), %r12d + movl %r10d, %ecx + movl %r14d, %ebx + xorl %r11d, %ecx + rorl $14, %edx + andl %r9d, %ecx + xorl %r9d, %edx + xorl %r11d, %ecx + rorl $5, %edx + addl %ecx, %r12d + xorl %r9d, %edx + xorl %r13d, %ebx + rorl $6, %edx + movl %r13d, %ecx + addl %edx, %r12d + rorl $9, %ecx + andl %ebx, %eax + xorl %r13d, %ecx + xorl %r14d, %eax + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + # rnd_all_4: 1-4 + addl 16(%rsp), %r11d + movl %r9d, %ecx + movl %r13d, %eax + xorl %r10d, %ecx + rorl $14, %edx + andl %r8d, %ecx + xorl %r8d, %edx + xorl %r10d, %ecx + rorl $5, %edx + addl %ecx, %r11d + xorl %r8d, %edx + xorl %r12d, %eax + rorl $6, %edx + movl %r12d, %ecx + addl %edx, %r11d + rorl $9, %ecx + andl %eax, %ebx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + addl 20(%rsp), %r10d + movl %r8d, %ecx + movl %r12d, %ebx + xorl %r9d, %ecx + rorl $14, %edx + andl %r15d, %ecx + xorl %r15d, %edx + xorl %r9d, %ecx + rorl $5, %edx + addl %ecx, %r10d + xorl %r15d, %edx + xorl %r11d, %ebx + rorl $6, %edx + movl %r11d, %ecx + addl %edx, %r10d + rorl $9, %ecx + andl %ebx, %eax + xorl %r11d, %ecx + xorl %r12d, %eax + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + addl 24(%rsp), %r9d + movl %r15d, %ecx + movl %r11d, %eax + xorl %r8d, %ecx + rorl $14, %edx + andl %r14d, %ecx + xorl %r14d, %edx + xorl %r8d, %ecx + rorl $5, %edx + addl %ecx, %r9d + xorl %r14d, %edx + xorl %r10d, %eax + rorl $6, %edx + movl %r10d, %ecx + addl %edx, %r9d + rorl $9, %ecx + andl %eax, %ebx + xorl %r10d, %ecx + xorl %r11d, %ebx + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + addl 28(%rsp), %r8d + movl %r14d, %ecx + movl %r10d, %ebx + xorl %r15d, %ecx + rorl $14, %edx + andl %r13d, %ecx + xorl %r13d, %edx + xorl %r15d, %ecx + rorl $5, %edx + addl %ecx, %r8d + xorl %r13d, %edx + xorl %r9d, %ebx + rorl $6, %edx + movl %r9d, %ecx + addl %edx, %r8d + rorl $9, %ecx + andl %ebx, %eax + xorl %r9d, %ecx + xorl %r10d, %eax + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + # rnd_all_4: 2-5 + addl 32(%rsp), %r15d + movl %r13d, %ecx + movl %r9d, %eax + xorl %r14d, %ecx + rorl $14, %edx + andl %r12d, %ecx + xorl %r12d, %edx + xorl %r14d, %ecx + rorl $5, %edx + addl %ecx, %r15d + xorl %r12d, %edx + xorl %r8d, %eax + rorl $6, %edx + movl %r8d, %ecx + addl %edx, %r15d + rorl $9, %ecx + andl %eax, %ebx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + addl 36(%rsp), %r14d + movl %r12d, %ecx + movl %r8d, %ebx + xorl %r13d, %ecx + rorl $14, %edx + andl %r11d, %ecx + xorl %r11d, %edx + xorl %r13d, %ecx + rorl $5, %edx + addl %ecx, %r14d + xorl %r11d, %edx + xorl %r15d, %ebx + rorl $6, %edx + movl %r15d, %ecx + addl %edx, %r14d + rorl $9, %ecx + andl %ebx, %eax + xorl %r15d, %ecx + xorl %r8d, %eax + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + addl 40(%rsp), %r13d + movl %r11d, %ecx + movl %r15d, %eax + xorl %r12d, %ecx + rorl $14, %edx + andl %r10d, %ecx + xorl %r10d, %edx + xorl %r12d, %ecx + rorl $5, %edx + addl %ecx, %r13d + xorl %r10d, %edx + xorl %r14d, %eax + rorl $6, %edx + movl %r14d, %ecx + addl %edx, %r13d + rorl $9, %ecx + andl %eax, %ebx + xorl %r14d, %ecx + xorl %r15d, %ebx + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + addl 44(%rsp), %r12d + movl %r10d, %ecx + movl %r14d, %ebx + xorl %r11d, %ecx + rorl $14, %edx + andl %r9d, %ecx + xorl %r9d, %edx + xorl %r11d, %ecx + rorl $5, %edx + addl %ecx, %r12d + xorl %r9d, %edx + xorl %r13d, %ebx + rorl $6, %edx + movl %r13d, %ecx + addl %edx, %r12d + rorl $9, %ecx + andl %ebx, %eax + xorl %r13d, %ecx + xorl %r14d, %eax + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + # rnd_all_4: 3-6 + addl 48(%rsp), %r11d + movl %r9d, %ecx + movl %r13d, %eax + xorl %r10d, %ecx + rorl $14, %edx + andl %r8d, %ecx + xorl %r8d, %edx + xorl %r10d, %ecx + rorl $5, %edx + addl %ecx, %r11d + xorl %r8d, %edx + xorl %r12d, %eax + rorl $6, %edx + movl %r12d, %ecx + addl %edx, %r11d + rorl $9, %ecx + andl %eax, %ebx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + addl 52(%rsp), %r10d + movl %r8d, %ecx + movl %r12d, %ebx + xorl %r9d, %ecx + rorl $14, %edx + andl %r15d, %ecx + xorl %r15d, %edx + xorl %r9d, %ecx + rorl $5, %edx + addl %ecx, %r10d + xorl %r15d, %edx + xorl %r11d, %ebx + rorl $6, %edx + movl %r11d, %ecx + addl %edx, %r10d + rorl $9, %ecx + andl %ebx, %eax + xorl %r11d, %ecx + xorl %r12d, %eax + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + addl 56(%rsp), %r9d + movl %r15d, %ecx + movl %r11d, %eax + xorl %r8d, %ecx + rorl $14, %edx + andl %r14d, %ecx + xorl %r14d, %edx + xorl %r8d, %ecx + rorl $5, %edx + addl %ecx, %r9d + xorl %r14d, %edx + xorl %r10d, %eax + rorl $6, %edx + movl %r10d, %ecx + addl %edx, %r9d + rorl $9, %ecx + andl %eax, %ebx + xorl %r10d, %ecx + xorl %r11d, %ebx + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + addl 60(%rsp), %r8d + movl %r14d, %ecx + movl %r10d, %ebx + xorl %r15d, %ecx + rorl $14, %edx + andl %r13d, %ecx + xorl %r13d, %edx + xorl %r15d, %ecx + rorl $5, %edx + addl %ecx, %r8d + xorl %r13d, %edx + xorl %r9d, %ebx + rorl $6, %edx + movl %r9d, %ecx + addl %edx, %r8d + rorl $9, %ecx + andl %ebx, %eax + xorl %r9d, %ecx + xorl %r10d, %eax + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + addl (%rdi), %r8d + addl 4(%rdi), %r9d + addl 8(%rdi), %r10d + addl 12(%rdi), %r11d + addl 16(%rdi), %r12d + addl 20(%rdi), %r13d + addl 24(%rdi), %r14d + addl 28(%rdi), %r15d + addq $0x40, %rbp + subl $0x40, %esi + movl %r8d, (%rdi) + movl %r9d, 4(%rdi) + movl %r10d, 8(%rdi) + movl %r11d, 12(%rdi) + movl %r12d, 16(%rdi) + movl %r13d, 20(%rdi) + movl %r14d, 24(%rdi) + movl %r15d, 28(%rdi) + jnz L_sha256_len_avx1_start + xorq %rax, %rax + vzeroupper + addq $0x40, %rsp + popq %rbp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size Transform_Sha256_AVX1_Len,.-Transform_Sha256_AVX1_Len +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +L_avx1_rorx_sha256_k: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0xfc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x6ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx1_rorx_sha256_shuf_00BA: +.quad 0xb0a090803020100, 0xffffffffffffffff +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx1_rorx_sha256_shuf_DC00: +.quad 0xffffffffffffffff, 0xb0a090803020100 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx1_rorx_sha256_flip_mask: +.quad 0x405060700010203, 0xc0d0e0f08090a0b +#ifndef __APPLE__ +.text +.globl Transform_Sha256_AVX1_RORX +.type Transform_Sha256_AVX1_RORX,@function +.align 4 +Transform_Sha256_AVX1_RORX: +#else +.section __TEXT,__text +.globl _Transform_Sha256_AVX1_RORX +.p2align 2 +_Transform_Sha256_AVX1_RORX: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $0x40, %rsp + vmovdqa L_avx1_rorx_sha256_flip_mask(%rip), %xmm13 + vmovdqa L_avx1_rorx_sha256_shuf_00BA(%rip), %xmm11 + vmovdqa L_avx1_rorx_sha256_shuf_DC00(%rip), %xmm12 + leaq 32(%rdi), %rax + # X0, X1, X2, X3 = W[0..15] + vmovdqu (%rax), %xmm0 + vmovdqu 16(%rax), %xmm1 + vpshufb %xmm13, %xmm0, %xmm0 + vpshufb %xmm13, %xmm1, %xmm1 + vmovdqu 32(%rax), %xmm2 + vmovdqu 48(%rax), %xmm3 + vpshufb %xmm13, %xmm2, %xmm2 + vpshufb %xmm13, %xmm3, %xmm3 + movl (%rdi), %r8d + movl 4(%rdi), %r9d + movl 8(%rdi), %r10d + movl 12(%rdi), %r11d + movl 16(%rdi), %r12d + movl 20(%rdi), %r13d + movl 24(%rdi), %r14d + movl 28(%rdi), %r15d + # set_w_k_xfer_4: 0 + vpaddd 0+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4 + vpaddd 16+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5 + vmovdqu %xmm4, (%rsp) + vmovdqu %xmm5, 16(%rsp) + vpaddd 32+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6 + vpaddd 48+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7 + vmovdqu %xmm6, 32(%rsp) + vmovdqu %xmm7, 48(%rsp) + movl %r9d, %ebx + rorxl $6, %r12d, %edx + xorl %r10d, %ebx + # msg_sched: 0-3 + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl (%rsp), %r15d + vpalignr $4, %xmm2, %xmm3, %xmm4 + vpalignr $4, %xmm0, %xmm1, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 4(%rsp), %r14d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpshufd $0xfa, %xmm3, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r8d, %ebx + addl %r14d, %r10d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 8(%rsp), %r13d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm0, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 12(%rsp), %r12d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r14d, %ebx + addl %r12d, %r8d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vpaddd %xmm4, %xmm9, %xmm0 + # msg_sched done: 0-3 + # msg_sched: 4-7 + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 16(%rsp), %r11d + vpalignr $4, %xmm3, %xmm0, %xmm4 + vpalignr $4, %xmm1, %xmm2, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 20(%rsp), %r10d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpshufd $0xfa, %xmm0, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r12d, %ebx + addl %r10d, %r14d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 24(%rsp), %r9d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm1, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 28(%rsp), %r8d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r10d, %ebx + addl %r8d, %r12d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vpaddd %xmm4, %xmm9, %xmm1 + # msg_sched done: 4-7 + # msg_sched: 8-11 + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl 32(%rsp), %r15d + vpalignr $4, %xmm0, %xmm1, %xmm4 + vpalignr $4, %xmm2, %xmm3, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 36(%rsp), %r14d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpshufd $0xfa, %xmm1, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r8d, %ebx + addl %r14d, %r10d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 40(%rsp), %r13d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm2, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 44(%rsp), %r12d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r14d, %ebx + addl %r12d, %r8d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vpaddd %xmm4, %xmm9, %xmm2 + # msg_sched done: 8-11 + # msg_sched: 12-15 + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 48(%rsp), %r11d + vpalignr $4, %xmm1, %xmm2, %xmm4 + vpalignr $4, %xmm3, %xmm0, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 52(%rsp), %r10d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpshufd $0xfa, %xmm2, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r12d, %ebx + addl %r10d, %r14d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 56(%rsp), %r9d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm3, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 60(%rsp), %r8d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r10d, %ebx + addl %r8d, %r12d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vpaddd %xmm4, %xmm9, %xmm3 + # msg_sched done: 12-15 + # set_w_k_xfer_4: 4 + vpaddd 64+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4 + vpaddd 80+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5 + vmovdqu %xmm4, (%rsp) + vmovdqu %xmm5, 16(%rsp) + vpaddd 96+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6 + vpaddd 112+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7 + vmovdqu %xmm6, 32(%rsp) + vmovdqu %xmm7, 48(%rsp) + # msg_sched: 0-3 + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl (%rsp), %r15d + vpalignr $4, %xmm2, %xmm3, %xmm4 + vpalignr $4, %xmm0, %xmm1, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 4(%rsp), %r14d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpshufd $0xfa, %xmm3, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r8d, %ebx + addl %r14d, %r10d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 8(%rsp), %r13d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm0, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 12(%rsp), %r12d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r14d, %ebx + addl %r12d, %r8d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vpaddd %xmm4, %xmm9, %xmm0 + # msg_sched done: 0-3 + # msg_sched: 4-7 + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 16(%rsp), %r11d + vpalignr $4, %xmm3, %xmm0, %xmm4 + vpalignr $4, %xmm1, %xmm2, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 20(%rsp), %r10d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpshufd $0xfa, %xmm0, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r12d, %ebx + addl %r10d, %r14d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 24(%rsp), %r9d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm1, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 28(%rsp), %r8d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r10d, %ebx + addl %r8d, %r12d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vpaddd %xmm4, %xmm9, %xmm1 + # msg_sched done: 4-7 + # msg_sched: 8-11 + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl 32(%rsp), %r15d + vpalignr $4, %xmm0, %xmm1, %xmm4 + vpalignr $4, %xmm2, %xmm3, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 36(%rsp), %r14d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpshufd $0xfa, %xmm1, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r8d, %ebx + addl %r14d, %r10d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 40(%rsp), %r13d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm2, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 44(%rsp), %r12d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r14d, %ebx + addl %r12d, %r8d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vpaddd %xmm4, %xmm9, %xmm2 + # msg_sched done: 8-11 + # msg_sched: 12-15 + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 48(%rsp), %r11d + vpalignr $4, %xmm1, %xmm2, %xmm4 + vpalignr $4, %xmm3, %xmm0, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 52(%rsp), %r10d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpshufd $0xfa, %xmm2, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r12d, %ebx + addl %r10d, %r14d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 56(%rsp), %r9d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm3, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 60(%rsp), %r8d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r10d, %ebx + addl %r8d, %r12d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vpaddd %xmm4, %xmm9, %xmm3 + # msg_sched done: 12-15 + # set_w_k_xfer_4: 8 + vpaddd 128+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4 + vpaddd 144+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5 + vmovdqu %xmm4, (%rsp) + vmovdqu %xmm5, 16(%rsp) + vpaddd 160+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6 + vpaddd 176+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7 + vmovdqu %xmm6, 32(%rsp) + vmovdqu %xmm7, 48(%rsp) + # msg_sched: 0-3 + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl (%rsp), %r15d + vpalignr $4, %xmm2, %xmm3, %xmm4 + vpalignr $4, %xmm0, %xmm1, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 4(%rsp), %r14d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpshufd $0xfa, %xmm3, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r8d, %ebx + addl %r14d, %r10d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 8(%rsp), %r13d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm0, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 12(%rsp), %r12d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r14d, %ebx + addl %r12d, %r8d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vpaddd %xmm4, %xmm9, %xmm0 + # msg_sched done: 0-3 + # msg_sched: 4-7 + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 16(%rsp), %r11d + vpalignr $4, %xmm3, %xmm0, %xmm4 + vpalignr $4, %xmm1, %xmm2, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 20(%rsp), %r10d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpshufd $0xfa, %xmm0, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r12d, %ebx + addl %r10d, %r14d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 24(%rsp), %r9d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm1, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 28(%rsp), %r8d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r10d, %ebx + addl %r8d, %r12d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vpaddd %xmm4, %xmm9, %xmm1 + # msg_sched done: 4-7 + # msg_sched: 8-11 + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl 32(%rsp), %r15d + vpalignr $4, %xmm0, %xmm1, %xmm4 + vpalignr $4, %xmm2, %xmm3, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 36(%rsp), %r14d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpshufd $0xfa, %xmm1, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r8d, %ebx + addl %r14d, %r10d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 40(%rsp), %r13d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm2, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 44(%rsp), %r12d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r14d, %ebx + addl %r12d, %r8d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vpaddd %xmm4, %xmm9, %xmm2 + # msg_sched done: 8-11 + # msg_sched: 12-15 + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 48(%rsp), %r11d + vpalignr $4, %xmm1, %xmm2, %xmm4 + vpalignr $4, %xmm3, %xmm0, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 52(%rsp), %r10d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpshufd $0xfa, %xmm2, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r12d, %ebx + addl %r10d, %r14d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 56(%rsp), %r9d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm3, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 60(%rsp), %r8d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r10d, %ebx + addl %r8d, %r12d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vpaddd %xmm4, %xmm9, %xmm3 + # msg_sched done: 12-15 + # set_w_k_xfer_4: 12 + vpaddd 192+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4 + vpaddd 208+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5 + vmovdqu %xmm4, (%rsp) + vmovdqu %xmm5, 16(%rsp) + vpaddd 224+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6 + vpaddd 240+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7 + vmovdqu %xmm6, 32(%rsp) + vmovdqu %xmm7, 48(%rsp) + xorl %eax, %eax + # rnd_all_4: 0-3 + rorxl $6, %r12d, %edx + rorxl $11, %r12d, %ecx + addl %eax, %r8d + addl (%rsp), %r15d + movl %r13d, %eax + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + xorl %ecx, %edx + andl %r12d, %eax + addl %edx, %r15d + rorxl $2, %r8d, %edx + rorxl $13, %r8d, %ecx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + andl %eax, %ebx + addl %edx, %r15d + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + rorxl $11, %r11d, %ecx + addl %ebx, %r15d + addl 4(%rsp), %r14d + movl %r12d, %ebx + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + xorl %ecx, %edx + andl %r11d, %ebx + addl %edx, %r14d + rorxl $2, %r15d, %edx + rorxl $13, %r15d, %ecx + xorl %r13d, %ebx + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + xorl %ecx, %edx + movl %r8d, %ebx + addl %r14d, %r10d + xorl %r15d, %ebx + andl %ebx, %eax + addl %edx, %r14d + xorl %r8d, %eax + rorxl $6, %r10d, %edx + rorxl $11, %r10d, %ecx + addl %eax, %r14d + addl 8(%rsp), %r13d + movl %r11d, %eax + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + xorl %ecx, %edx + andl %r10d, %eax + addl %edx, %r13d + rorxl $2, %r14d, %edx + rorxl $13, %r14d, %ecx + xorl %r12d, %eax + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + xorl %r14d, %eax + andl %eax, %ebx + addl %edx, %r13d + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + rorxl $11, %r9d, %ecx + addl %ebx, %r13d + addl 12(%rsp), %r12d + movl %r10d, %ebx + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + xorl %ecx, %edx + andl %r9d, %ebx + addl %edx, %r12d + rorxl $2, %r13d, %edx + rorxl $13, %r13d, %ecx + xorl %r11d, %ebx + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + xorl %ecx, %edx + movl %r14d, %ebx + addl %r12d, %r8d + xorl %r13d, %ebx + andl %ebx, %eax + addl %edx, %r12d + xorl %r14d, %eax + # rnd_all_4: 1-4 + rorxl $6, %r8d, %edx + rorxl $11, %r8d, %ecx + addl %eax, %r12d + addl 16(%rsp), %r11d + movl %r9d, %eax + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + xorl %ecx, %edx + andl %r8d, %eax + addl %edx, %r11d + rorxl $2, %r12d, %edx + rorxl $13, %r12d, %ecx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + andl %eax, %ebx + addl %edx, %r11d + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + rorxl $11, %r15d, %ecx + addl %ebx, %r11d + addl 20(%rsp), %r10d + movl %r8d, %ebx + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + xorl %ecx, %edx + andl %r15d, %ebx + addl %edx, %r10d + rorxl $2, %r11d, %edx + rorxl $13, %r11d, %ecx + xorl %r9d, %ebx + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + xorl %ecx, %edx + movl %r12d, %ebx + addl %r10d, %r14d + xorl %r11d, %ebx + andl %ebx, %eax + addl %edx, %r10d + xorl %r12d, %eax + rorxl $6, %r14d, %edx + rorxl $11, %r14d, %ecx + addl %eax, %r10d + addl 24(%rsp), %r9d + movl %r15d, %eax + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + xorl %ecx, %edx + andl %r14d, %eax + addl %edx, %r9d + rorxl $2, %r10d, %edx + rorxl $13, %r10d, %ecx + xorl %r8d, %eax + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + xorl %r10d, %eax + andl %eax, %ebx + addl %edx, %r9d + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + rorxl $11, %r13d, %ecx + addl %ebx, %r9d + addl 28(%rsp), %r8d + movl %r14d, %ebx + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + xorl %ecx, %edx + andl %r13d, %ebx + addl %edx, %r8d + rorxl $2, %r9d, %edx + rorxl $13, %r9d, %ecx + xorl %r15d, %ebx + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + xorl %ecx, %edx + movl %r10d, %ebx + addl %r8d, %r12d + xorl %r9d, %ebx + andl %ebx, %eax + addl %edx, %r8d + xorl %r10d, %eax + # rnd_all_4: 2-5 + rorxl $6, %r12d, %edx + rorxl $11, %r12d, %ecx + addl %eax, %r8d + addl 32(%rsp), %r15d + movl %r13d, %eax + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + xorl %ecx, %edx + andl %r12d, %eax + addl %edx, %r15d + rorxl $2, %r8d, %edx + rorxl $13, %r8d, %ecx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + andl %eax, %ebx + addl %edx, %r15d + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + rorxl $11, %r11d, %ecx + addl %ebx, %r15d + addl 36(%rsp), %r14d + movl %r12d, %ebx + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + xorl %ecx, %edx + andl %r11d, %ebx + addl %edx, %r14d + rorxl $2, %r15d, %edx + rorxl $13, %r15d, %ecx + xorl %r13d, %ebx + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + xorl %ecx, %edx + movl %r8d, %ebx + addl %r14d, %r10d + xorl %r15d, %ebx + andl %ebx, %eax + addl %edx, %r14d + xorl %r8d, %eax + rorxl $6, %r10d, %edx + rorxl $11, %r10d, %ecx + addl %eax, %r14d + addl 40(%rsp), %r13d + movl %r11d, %eax + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + xorl %ecx, %edx + andl %r10d, %eax + addl %edx, %r13d + rorxl $2, %r14d, %edx + rorxl $13, %r14d, %ecx + xorl %r12d, %eax + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + xorl %r14d, %eax + andl %eax, %ebx + addl %edx, %r13d + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + rorxl $11, %r9d, %ecx + addl %ebx, %r13d + addl 44(%rsp), %r12d + movl %r10d, %ebx + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + xorl %ecx, %edx + andl %r9d, %ebx + addl %edx, %r12d + rorxl $2, %r13d, %edx + rorxl $13, %r13d, %ecx + xorl %r11d, %ebx + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + xorl %ecx, %edx + movl %r14d, %ebx + addl %r12d, %r8d + xorl %r13d, %ebx + andl %ebx, %eax + addl %edx, %r12d + xorl %r14d, %eax + # rnd_all_4: 3-6 + rorxl $6, %r8d, %edx + rorxl $11, %r8d, %ecx + addl %eax, %r12d + addl 48(%rsp), %r11d + movl %r9d, %eax + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + xorl %ecx, %edx + andl %r8d, %eax + addl %edx, %r11d + rorxl $2, %r12d, %edx + rorxl $13, %r12d, %ecx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + andl %eax, %ebx + addl %edx, %r11d + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + rorxl $11, %r15d, %ecx + addl %ebx, %r11d + addl 52(%rsp), %r10d + movl %r8d, %ebx + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + xorl %ecx, %edx + andl %r15d, %ebx + addl %edx, %r10d + rorxl $2, %r11d, %edx + rorxl $13, %r11d, %ecx + xorl %r9d, %ebx + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + xorl %ecx, %edx + movl %r12d, %ebx + addl %r10d, %r14d + xorl %r11d, %ebx + andl %ebx, %eax + addl %edx, %r10d + xorl %r12d, %eax + rorxl $6, %r14d, %edx + rorxl $11, %r14d, %ecx + addl %eax, %r10d + addl 56(%rsp), %r9d + movl %r15d, %eax + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + xorl %ecx, %edx + andl %r14d, %eax + addl %edx, %r9d + rorxl $2, %r10d, %edx + rorxl $13, %r10d, %ecx + xorl %r8d, %eax + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + xorl %r10d, %eax + andl %eax, %ebx + addl %edx, %r9d + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + rorxl $11, %r13d, %ecx + addl %ebx, %r9d + addl 60(%rsp), %r8d + movl %r14d, %ebx + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + xorl %ecx, %edx + andl %r13d, %ebx + addl %edx, %r8d + rorxl $2, %r9d, %edx + rorxl $13, %r9d, %ecx + xorl %r15d, %ebx + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + xorl %ecx, %edx + movl %r10d, %ebx + addl %r8d, %r12d + xorl %r9d, %ebx + andl %ebx, %eax + addl %edx, %r8d + xorl %r10d, %eax + addl %eax, %r8d + addl %r8d, (%rdi) + addl %r9d, 4(%rdi) + addl %r10d, 8(%rdi) + addl %r11d, 12(%rdi) + addl %r12d, 16(%rdi) + addl %r13d, 20(%rdi) + addl %r14d, 24(%rdi) + addl %r15d, 28(%rdi) + xorq %rax, %rax + vzeroupper + addq $0x40, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size Transform_Sha256_AVX1_RORX,.-Transform_Sha256_AVX1_RORX +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl Transform_Sha256_AVX1_RORX_Len +.type Transform_Sha256_AVX1_RORX_Len,@function +.align 4 +Transform_Sha256_AVX1_RORX_Len: +#else +.section __TEXT,__text +.globl _Transform_Sha256_AVX1_RORX_Len +.p2align 2 +_Transform_Sha256_AVX1_RORX_Len: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq %rbp + movq %rsi, %rbp + movq %rdx, %rsi + subq $0x40, %rsp + vmovdqa L_avx1_rorx_sha256_flip_mask(%rip), %xmm13 + vmovdqa L_avx1_rorx_sha256_shuf_00BA(%rip), %xmm11 + vmovdqa L_avx1_rorx_sha256_shuf_DC00(%rip), %xmm12 + movl (%rdi), %r8d + movl 4(%rdi), %r9d + movl 8(%rdi), %r10d + movl 12(%rdi), %r11d + movl 16(%rdi), %r12d + movl 20(%rdi), %r13d + movl 24(%rdi), %r14d + movl 28(%rdi), %r15d + # Start of loop processing a block +L_sha256_len_avx1_len_rorx_start: + # X0, X1, X2, X3 = W[0..15] + vmovdqu (%rbp), %xmm0 + vmovdqu 16(%rbp), %xmm1 + vpshufb %xmm13, %xmm0, %xmm0 + vpshufb %xmm13, %xmm1, %xmm1 + vmovdqu 32(%rbp), %xmm2 + vmovdqu 48(%rbp), %xmm3 + vpshufb %xmm13, %xmm2, %xmm2 + vpshufb %xmm13, %xmm3, %xmm3 + # set_w_k_xfer_4: 0 + vpaddd 0+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4 + vpaddd 16+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5 + vmovdqu %xmm4, (%rsp) + vmovdqu %xmm5, 16(%rsp) + vpaddd 32+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6 + vpaddd 48+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7 + vmovdqu %xmm6, 32(%rsp) + vmovdqu %xmm7, 48(%rsp) + movl %r9d, %ebx + rorxl $6, %r12d, %edx + xorl %r10d, %ebx + # msg_sched: 0-3 + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl (%rsp), %r15d + vpalignr $4, %xmm2, %xmm3, %xmm4 + vpalignr $4, %xmm0, %xmm1, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 4(%rsp), %r14d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpshufd $0xfa, %xmm3, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r8d, %ebx + addl %r14d, %r10d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 8(%rsp), %r13d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm0, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 12(%rsp), %r12d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r14d, %ebx + addl %r12d, %r8d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vpaddd %xmm4, %xmm9, %xmm0 + # msg_sched done: 0-3 + # msg_sched: 4-7 + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 16(%rsp), %r11d + vpalignr $4, %xmm3, %xmm0, %xmm4 + vpalignr $4, %xmm1, %xmm2, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 20(%rsp), %r10d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpshufd $0xfa, %xmm0, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r12d, %ebx + addl %r10d, %r14d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 24(%rsp), %r9d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm1, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 28(%rsp), %r8d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r10d, %ebx + addl %r8d, %r12d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vpaddd %xmm4, %xmm9, %xmm1 + # msg_sched done: 4-7 + # msg_sched: 8-11 + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl 32(%rsp), %r15d + vpalignr $4, %xmm0, %xmm1, %xmm4 + vpalignr $4, %xmm2, %xmm3, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 36(%rsp), %r14d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpshufd $0xfa, %xmm1, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r8d, %ebx + addl %r14d, %r10d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 40(%rsp), %r13d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm2, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 44(%rsp), %r12d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r14d, %ebx + addl %r12d, %r8d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vpaddd %xmm4, %xmm9, %xmm2 + # msg_sched done: 8-11 + # msg_sched: 12-15 + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 48(%rsp), %r11d + vpalignr $4, %xmm1, %xmm2, %xmm4 + vpalignr $4, %xmm3, %xmm0, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 52(%rsp), %r10d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpshufd $0xfa, %xmm2, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r12d, %ebx + addl %r10d, %r14d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 56(%rsp), %r9d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm3, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 60(%rsp), %r8d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r10d, %ebx + addl %r8d, %r12d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vpaddd %xmm4, %xmm9, %xmm3 + # msg_sched done: 12-15 + # set_w_k_xfer_4: 4 + vpaddd 64+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4 + vpaddd 80+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5 + vmovdqu %xmm4, (%rsp) + vmovdqu %xmm5, 16(%rsp) + vpaddd 96+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6 + vpaddd 112+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7 + vmovdqu %xmm6, 32(%rsp) + vmovdqu %xmm7, 48(%rsp) + # msg_sched: 0-3 + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl (%rsp), %r15d + vpalignr $4, %xmm2, %xmm3, %xmm4 + vpalignr $4, %xmm0, %xmm1, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 4(%rsp), %r14d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpshufd $0xfa, %xmm3, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r8d, %ebx + addl %r14d, %r10d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 8(%rsp), %r13d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm0, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 12(%rsp), %r12d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r14d, %ebx + addl %r12d, %r8d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vpaddd %xmm4, %xmm9, %xmm0 + # msg_sched done: 0-3 + # msg_sched: 4-7 + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 16(%rsp), %r11d + vpalignr $4, %xmm3, %xmm0, %xmm4 + vpalignr $4, %xmm1, %xmm2, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 20(%rsp), %r10d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpshufd $0xfa, %xmm0, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r12d, %ebx + addl %r10d, %r14d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 24(%rsp), %r9d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm1, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 28(%rsp), %r8d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r10d, %ebx + addl %r8d, %r12d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vpaddd %xmm4, %xmm9, %xmm1 + # msg_sched done: 4-7 + # msg_sched: 8-11 + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl 32(%rsp), %r15d + vpalignr $4, %xmm0, %xmm1, %xmm4 + vpalignr $4, %xmm2, %xmm3, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 36(%rsp), %r14d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpshufd $0xfa, %xmm1, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r8d, %ebx + addl %r14d, %r10d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 40(%rsp), %r13d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm2, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 44(%rsp), %r12d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r14d, %ebx + addl %r12d, %r8d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vpaddd %xmm4, %xmm9, %xmm2 + # msg_sched done: 8-11 + # msg_sched: 12-15 + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 48(%rsp), %r11d + vpalignr $4, %xmm1, %xmm2, %xmm4 + vpalignr $4, %xmm3, %xmm0, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 52(%rsp), %r10d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpshufd $0xfa, %xmm2, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r12d, %ebx + addl %r10d, %r14d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 56(%rsp), %r9d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm3, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 60(%rsp), %r8d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r10d, %ebx + addl %r8d, %r12d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vpaddd %xmm4, %xmm9, %xmm3 + # msg_sched done: 12-15 + # set_w_k_xfer_4: 8 + vpaddd 128+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4 + vpaddd 144+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5 + vmovdqu %xmm4, (%rsp) + vmovdqu %xmm5, 16(%rsp) + vpaddd 160+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6 + vpaddd 176+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7 + vmovdqu %xmm6, 32(%rsp) + vmovdqu %xmm7, 48(%rsp) + # msg_sched: 0-3 + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl (%rsp), %r15d + vpalignr $4, %xmm2, %xmm3, %xmm4 + vpalignr $4, %xmm0, %xmm1, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 4(%rsp), %r14d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpshufd $0xfa, %xmm3, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r8d, %ebx + addl %r14d, %r10d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 8(%rsp), %r13d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm0, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 12(%rsp), %r12d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r14d, %ebx + addl %r12d, %r8d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vpaddd %xmm4, %xmm9, %xmm0 + # msg_sched done: 0-3 + # msg_sched: 4-7 + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 16(%rsp), %r11d + vpalignr $4, %xmm3, %xmm0, %xmm4 + vpalignr $4, %xmm1, %xmm2, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 20(%rsp), %r10d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpshufd $0xfa, %xmm0, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r12d, %ebx + addl %r10d, %r14d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 24(%rsp), %r9d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm1, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 28(%rsp), %r8d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r10d, %ebx + addl %r8d, %r12d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vpaddd %xmm4, %xmm9, %xmm1 + # msg_sched done: 4-7 + # msg_sched: 8-11 + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl 32(%rsp), %r15d + vpalignr $4, %xmm0, %xmm1, %xmm4 + vpalignr $4, %xmm2, %xmm3, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 36(%rsp), %r14d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpshufd $0xfa, %xmm1, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r8d, %ebx + addl %r14d, %r10d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 40(%rsp), %r13d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm2, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 44(%rsp), %r12d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r14d, %ebx + addl %r12d, %r8d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vpaddd %xmm4, %xmm9, %xmm2 + # msg_sched done: 8-11 + # msg_sched: 12-15 + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 48(%rsp), %r11d + vpalignr $4, %xmm1, %xmm2, %xmm4 + vpalignr $4, %xmm3, %xmm0, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 52(%rsp), %r10d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpshufd $0xfa, %xmm2, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r12d, %ebx + addl %r10d, %r14d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 56(%rsp), %r9d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm3, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 60(%rsp), %r8d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r10d, %ebx + addl %r8d, %r12d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vpaddd %xmm4, %xmm9, %xmm3 + # msg_sched done: 12-15 + # set_w_k_xfer_4: 12 + vpaddd 192+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4 + vpaddd 208+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5 + vmovdqu %xmm4, (%rsp) + vmovdqu %xmm5, 16(%rsp) + vpaddd 224+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6 + vpaddd 240+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7 + vmovdqu %xmm6, 32(%rsp) + vmovdqu %xmm7, 48(%rsp) + xorl %eax, %eax + xorl %ecx, %ecx + # rnd_all_4: 0-3 + rorxl $6, %r12d, %edx + rorxl $11, %r12d, %ecx + addl %eax, %r8d + addl (%rsp), %r15d + movl %r13d, %eax + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + xorl %ecx, %edx + andl %r12d, %eax + addl %edx, %r15d + rorxl $2, %r8d, %edx + rorxl $13, %r8d, %ecx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + andl %eax, %ebx + addl %edx, %r15d + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + rorxl $11, %r11d, %ecx + addl %ebx, %r15d + addl 4(%rsp), %r14d + movl %r12d, %ebx + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + xorl %ecx, %edx + andl %r11d, %ebx + addl %edx, %r14d + rorxl $2, %r15d, %edx + rorxl $13, %r15d, %ecx + xorl %r13d, %ebx + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + xorl %ecx, %edx + movl %r8d, %ebx + addl %r14d, %r10d + xorl %r15d, %ebx + andl %ebx, %eax + addl %edx, %r14d + xorl %r8d, %eax + rorxl $6, %r10d, %edx + rorxl $11, %r10d, %ecx + addl %eax, %r14d + addl 8(%rsp), %r13d + movl %r11d, %eax + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + xorl %ecx, %edx + andl %r10d, %eax + addl %edx, %r13d + rorxl $2, %r14d, %edx + rorxl $13, %r14d, %ecx + xorl %r12d, %eax + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + xorl %r14d, %eax + andl %eax, %ebx + addl %edx, %r13d + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + rorxl $11, %r9d, %ecx + addl %ebx, %r13d + addl 12(%rsp), %r12d + movl %r10d, %ebx + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + xorl %ecx, %edx + andl %r9d, %ebx + addl %edx, %r12d + rorxl $2, %r13d, %edx + rorxl $13, %r13d, %ecx + xorl %r11d, %ebx + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + xorl %ecx, %edx + movl %r14d, %ebx + addl %r12d, %r8d + xorl %r13d, %ebx + andl %ebx, %eax + addl %edx, %r12d + xorl %r14d, %eax + # rnd_all_4: 1-4 + rorxl $6, %r8d, %edx + rorxl $11, %r8d, %ecx + addl %eax, %r12d + addl 16(%rsp), %r11d + movl %r9d, %eax + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + xorl %ecx, %edx + andl %r8d, %eax + addl %edx, %r11d + rorxl $2, %r12d, %edx + rorxl $13, %r12d, %ecx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + andl %eax, %ebx + addl %edx, %r11d + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + rorxl $11, %r15d, %ecx + addl %ebx, %r11d + addl 20(%rsp), %r10d + movl %r8d, %ebx + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + xorl %ecx, %edx + andl %r15d, %ebx + addl %edx, %r10d + rorxl $2, %r11d, %edx + rorxl $13, %r11d, %ecx + xorl %r9d, %ebx + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + xorl %ecx, %edx + movl %r12d, %ebx + addl %r10d, %r14d + xorl %r11d, %ebx + andl %ebx, %eax + addl %edx, %r10d + xorl %r12d, %eax + rorxl $6, %r14d, %edx + rorxl $11, %r14d, %ecx + addl %eax, %r10d + addl 24(%rsp), %r9d + movl %r15d, %eax + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + xorl %ecx, %edx + andl %r14d, %eax + addl %edx, %r9d + rorxl $2, %r10d, %edx + rorxl $13, %r10d, %ecx + xorl %r8d, %eax + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + xorl %r10d, %eax + andl %eax, %ebx + addl %edx, %r9d + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + rorxl $11, %r13d, %ecx + addl %ebx, %r9d + addl 28(%rsp), %r8d + movl %r14d, %ebx + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + xorl %ecx, %edx + andl %r13d, %ebx + addl %edx, %r8d + rorxl $2, %r9d, %edx + rorxl $13, %r9d, %ecx + xorl %r15d, %ebx + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + xorl %ecx, %edx + movl %r10d, %ebx + addl %r8d, %r12d + xorl %r9d, %ebx + andl %ebx, %eax + addl %edx, %r8d + xorl %r10d, %eax + # rnd_all_4: 2-5 + rorxl $6, %r12d, %edx + rorxl $11, %r12d, %ecx + addl %eax, %r8d + addl 32(%rsp), %r15d + movl %r13d, %eax + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + xorl %ecx, %edx + andl %r12d, %eax + addl %edx, %r15d + rorxl $2, %r8d, %edx + rorxl $13, %r8d, %ecx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + andl %eax, %ebx + addl %edx, %r15d + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + rorxl $11, %r11d, %ecx + addl %ebx, %r15d + addl 36(%rsp), %r14d + movl %r12d, %ebx + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + xorl %ecx, %edx + andl %r11d, %ebx + addl %edx, %r14d + rorxl $2, %r15d, %edx + rorxl $13, %r15d, %ecx + xorl %r13d, %ebx + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + xorl %ecx, %edx + movl %r8d, %ebx + addl %r14d, %r10d + xorl %r15d, %ebx + andl %ebx, %eax + addl %edx, %r14d + xorl %r8d, %eax + rorxl $6, %r10d, %edx + rorxl $11, %r10d, %ecx + addl %eax, %r14d + addl 40(%rsp), %r13d + movl %r11d, %eax + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + xorl %ecx, %edx + andl %r10d, %eax + addl %edx, %r13d + rorxl $2, %r14d, %edx + rorxl $13, %r14d, %ecx + xorl %r12d, %eax + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + xorl %r14d, %eax + andl %eax, %ebx + addl %edx, %r13d + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + rorxl $11, %r9d, %ecx + addl %ebx, %r13d + addl 44(%rsp), %r12d + movl %r10d, %ebx + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + xorl %ecx, %edx + andl %r9d, %ebx + addl %edx, %r12d + rorxl $2, %r13d, %edx + rorxl $13, %r13d, %ecx + xorl %r11d, %ebx + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + xorl %ecx, %edx + movl %r14d, %ebx + addl %r12d, %r8d + xorl %r13d, %ebx + andl %ebx, %eax + addl %edx, %r12d + xorl %r14d, %eax + # rnd_all_4: 3-6 + rorxl $6, %r8d, %edx + rorxl $11, %r8d, %ecx + addl %eax, %r12d + addl 48(%rsp), %r11d + movl %r9d, %eax + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + xorl %ecx, %edx + andl %r8d, %eax + addl %edx, %r11d + rorxl $2, %r12d, %edx + rorxl $13, %r12d, %ecx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + andl %eax, %ebx + addl %edx, %r11d + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + rorxl $11, %r15d, %ecx + addl %ebx, %r11d + addl 52(%rsp), %r10d + movl %r8d, %ebx + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + xorl %ecx, %edx + andl %r15d, %ebx + addl %edx, %r10d + rorxl $2, %r11d, %edx + rorxl $13, %r11d, %ecx + xorl %r9d, %ebx + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + xorl %ecx, %edx + movl %r12d, %ebx + addl %r10d, %r14d + xorl %r11d, %ebx + andl %ebx, %eax + addl %edx, %r10d + xorl %r12d, %eax + rorxl $6, %r14d, %edx + rorxl $11, %r14d, %ecx + addl %eax, %r10d + addl 56(%rsp), %r9d + movl %r15d, %eax + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + xorl %ecx, %edx + andl %r14d, %eax + addl %edx, %r9d + rorxl $2, %r10d, %edx + rorxl $13, %r10d, %ecx + xorl %r8d, %eax + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + xorl %r10d, %eax + andl %eax, %ebx + addl %edx, %r9d + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + rorxl $11, %r13d, %ecx + addl %ebx, %r9d + addl 60(%rsp), %r8d + movl %r14d, %ebx + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + xorl %ecx, %edx + andl %r13d, %ebx + addl %edx, %r8d + rorxl $2, %r9d, %edx + rorxl $13, %r9d, %ecx + xorl %r15d, %ebx + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + xorl %ecx, %edx + movl %r10d, %ebx + addl %r8d, %r12d + xorl %r9d, %ebx + andl %ebx, %eax + addl %edx, %r8d + xorl %r10d, %eax + addl %eax, %r8d + addl (%rdi), %r8d + addl 4(%rdi), %r9d + addl 8(%rdi), %r10d + addl 12(%rdi), %r11d + addl 16(%rdi), %r12d + addl 20(%rdi), %r13d + addl 24(%rdi), %r14d + addl 28(%rdi), %r15d + addq $0x40, %rbp + subl $0x40, %esi + movl %r8d, (%rdi) + movl %r9d, 4(%rdi) + movl %r10d, 8(%rdi) + movl %r11d, 12(%rdi) + movl %r12d, 16(%rdi) + movl %r13d, 20(%rdi) + movl %r14d, 24(%rdi) + movl %r15d, 28(%rdi) + jnz L_sha256_len_avx1_len_rorx_start + xorq %rax, %rax + vzeroupper + addq $0x40, %rsp + popq %rbp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size Transform_Sha256_AVX1_RORX_Len,.-Transform_Sha256_AVX1_RORX_Len +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX1 */ +#ifdef HAVE_INTEL_AVX2 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +L_avx2_sha256_k: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0xfc19dc6,0x240ca1cc +.long 0xe49b69c1,0xefbe4786,0xfc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x6ca6351,0x14292967 +.long 0xc6e00bf3,0xd5a79147,0x6ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 32 +#else +.p2align 5 +#endif /* __APPLE__ */ +L_avx2_sha256_shuf_00BA: +.quad 0xb0a090803020100, 0xffffffffffffffff +.quad 0xb0a090803020100, 0xffffffffffffffff +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 32 +#else +.p2align 5 +#endif /* __APPLE__ */ +L_avx2_sha256_shuf_DC00: +.quad 0xffffffffffffffff, 0xb0a090803020100 +.quad 0xffffffffffffffff, 0xb0a090803020100 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 32 +#else +.p2align 5 +#endif /* __APPLE__ */ +L_avx2_sha256_flip_mask: +.quad 0x405060700010203, 0xc0d0e0f08090a0b +.quad 0x405060700010203, 0xc0d0e0f08090a0b +#ifndef __APPLE__ +.text +.globl Transform_Sha256_AVX2 +.type Transform_Sha256_AVX2,@function +.align 4 +Transform_Sha256_AVX2: +#else +.section __TEXT,__text +.globl _Transform_Sha256_AVX2 +.p2align 2 +_Transform_Sha256_AVX2: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $0x200, %rsp + leaq 32(%rdi), %rax + vmovdqa L_avx2_sha256_flip_mask(%rip), %xmm13 + vmovdqa L_avx2_sha256_shuf_00BA(%rip), %ymm11 + vmovdqa L_avx2_sha256_shuf_DC00(%rip), %ymm12 + movl (%rdi), %r8d + movl 4(%rdi), %r9d + movl 8(%rdi), %r10d + movl 12(%rdi), %r11d + movl 16(%rdi), %r12d + movl 20(%rdi), %r13d + movl 24(%rdi), %r14d + movl 28(%rdi), %r15d + # X0, X1, X2, X3 = W[0..15] + vmovdqu (%rax), %xmm0 + vmovdqu 16(%rax), %xmm1 + vpshufb %xmm13, %xmm0, %xmm0 + vpshufb %xmm13, %xmm1, %xmm1 + vmovdqu 32(%rax), %xmm2 + vmovdqu 48(%rax), %xmm3 + vpshufb %xmm13, %xmm2, %xmm2 + vpshufb %xmm13, %xmm3, %xmm3 + movl %r9d, %ebx + movl %r12d, %edx + xorl %r10d, %ebx + # set_w_k_xfer_4: 0 + vpaddd 0+L_avx2_sha256_k(%rip), %ymm0, %ymm4 + vpaddd 32+L_avx2_sha256_k(%rip), %ymm1, %ymm5 + vmovdqu %ymm4, (%rsp) + vmovdqu %ymm5, 32(%rsp) + vpaddd 64+L_avx2_sha256_k(%rip), %ymm2, %ymm4 + vpaddd 96+L_avx2_sha256_k(%rip), %ymm3, %ymm5 + vmovdqu %ymm4, 64(%rsp) + vmovdqu %ymm5, 96(%rsp) + # msg_sched: 0-3 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm0, %ymm1, %ymm5 + vpalignr $4, %ymm2, %ymm3, %ymm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl (%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 4(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm3, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm0, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 8(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 12(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %ymm4, %ymm9, %ymm0 + # msg_sched done: 0-3 + # msg_sched: 8-11 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm1, %ymm2, %ymm5 + vpalignr $4, %ymm3, %ymm0, %ymm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 32(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 36(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm0, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm1, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 40(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 44(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %ymm4, %ymm9, %ymm1 + # msg_sched done: 8-11 + # msg_sched: 16-19 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm2, %ymm3, %ymm5 + vpalignr $4, %ymm0, %ymm1, %ymm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl 64(%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 68(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm1, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm2, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 72(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 76(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %ymm4, %ymm9, %ymm2 + # msg_sched done: 16-19 + # msg_sched: 24-27 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm3, %ymm0, %ymm5 + vpalignr $4, %ymm1, %ymm2, %ymm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 96(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 100(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm2, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm3, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 104(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 108(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %ymm4, %ymm9, %ymm3 + # msg_sched done: 24-27 + # set_w_k_xfer_4: 4 + vpaddd 128+L_avx2_sha256_k(%rip), %ymm0, %ymm4 + vpaddd 160+L_avx2_sha256_k(%rip), %ymm1, %ymm5 + vmovdqu %ymm4, 128(%rsp) + vmovdqu %ymm5, 160(%rsp) + vpaddd 192+L_avx2_sha256_k(%rip), %ymm2, %ymm4 + vpaddd 224+L_avx2_sha256_k(%rip), %ymm3, %ymm5 + vmovdqu %ymm4, 192(%rsp) + vmovdqu %ymm5, 224(%rsp) + # msg_sched: 32-35 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm0, %ymm1, %ymm5 + vpalignr $4, %ymm2, %ymm3, %ymm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl 128(%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 132(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm3, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm0, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 136(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 140(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %ymm4, %ymm9, %ymm0 + # msg_sched done: 32-35 + # msg_sched: 40-43 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm1, %ymm2, %ymm5 + vpalignr $4, %ymm3, %ymm0, %ymm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 160(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 164(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm0, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm1, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 168(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 172(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %ymm4, %ymm9, %ymm1 + # msg_sched done: 40-43 + # msg_sched: 48-51 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm2, %ymm3, %ymm5 + vpalignr $4, %ymm0, %ymm1, %ymm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl 192(%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 196(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm1, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm2, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 200(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 204(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %ymm4, %ymm9, %ymm2 + # msg_sched done: 48-51 + # msg_sched: 56-59 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm3, %ymm0, %ymm5 + vpalignr $4, %ymm1, %ymm2, %ymm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 224(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 228(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm2, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm3, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 232(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 236(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %ymm4, %ymm9, %ymm3 + # msg_sched done: 56-59 + # set_w_k_xfer_4: 8 + vpaddd 256+L_avx2_sha256_k(%rip), %ymm0, %ymm4 + vpaddd 288+L_avx2_sha256_k(%rip), %ymm1, %ymm5 + vmovdqu %ymm4, 256(%rsp) + vmovdqu %ymm5, 288(%rsp) + vpaddd 320+L_avx2_sha256_k(%rip), %ymm2, %ymm4 + vpaddd 352+L_avx2_sha256_k(%rip), %ymm3, %ymm5 + vmovdqu %ymm4, 320(%rsp) + vmovdqu %ymm5, 352(%rsp) + # msg_sched: 64-67 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm0, %ymm1, %ymm5 + vpalignr $4, %ymm2, %ymm3, %ymm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl 256(%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 260(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm3, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm0, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 264(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 268(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %ymm4, %ymm9, %ymm0 + # msg_sched done: 64-67 + # msg_sched: 72-75 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm1, %ymm2, %ymm5 + vpalignr $4, %ymm3, %ymm0, %ymm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 288(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 292(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm0, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm1, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 296(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 300(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %ymm4, %ymm9, %ymm1 + # msg_sched done: 72-75 + # msg_sched: 80-83 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm2, %ymm3, %ymm5 + vpalignr $4, %ymm0, %ymm1, %ymm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl 320(%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 324(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm1, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm2, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 328(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 332(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %ymm4, %ymm9, %ymm2 + # msg_sched done: 80-83 + # msg_sched: 88-91 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm3, %ymm0, %ymm5 + vpalignr $4, %ymm1, %ymm2, %ymm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 352(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 356(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm2, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm3, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 360(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 364(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %ymm4, %ymm9, %ymm3 + # msg_sched done: 88-91 + # set_w_k_xfer_4: 12 + vpaddd 384+L_avx2_sha256_k(%rip), %ymm0, %ymm4 + vpaddd 416+L_avx2_sha256_k(%rip), %ymm1, %ymm5 + vmovdqu %ymm4, 384(%rsp) + vmovdqu %ymm5, 416(%rsp) + vpaddd 448+L_avx2_sha256_k(%rip), %ymm2, %ymm4 + vpaddd 480+L_avx2_sha256_k(%rip), %ymm3, %ymm5 + vmovdqu %ymm4, 448(%rsp) + vmovdqu %ymm5, 480(%rsp) + # rnd_all_4: 24-27 + addl 384(%rsp), %r15d + movl %r13d, %ecx + movl %r9d, %eax + xorl %r14d, %ecx + rorl $14, %edx + andl %r12d, %ecx + xorl %r12d, %edx + xorl %r14d, %ecx + rorl $5, %edx + addl %ecx, %r15d + xorl %r12d, %edx + xorl %r8d, %eax + rorl $6, %edx + movl %r8d, %ecx + addl %edx, %r15d + rorl $9, %ecx + andl %eax, %ebx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + addl 388(%rsp), %r14d + movl %r12d, %ecx + movl %r8d, %ebx + xorl %r13d, %ecx + rorl $14, %edx + andl %r11d, %ecx + xorl %r11d, %edx + xorl %r13d, %ecx + rorl $5, %edx + addl %ecx, %r14d + xorl %r11d, %edx + xorl %r15d, %ebx + rorl $6, %edx + movl %r15d, %ecx + addl %edx, %r14d + rorl $9, %ecx + andl %ebx, %eax + xorl %r15d, %ecx + xorl %r8d, %eax + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + addl 392(%rsp), %r13d + movl %r11d, %ecx + movl %r15d, %eax + xorl %r12d, %ecx + rorl $14, %edx + andl %r10d, %ecx + xorl %r10d, %edx + xorl %r12d, %ecx + rorl $5, %edx + addl %ecx, %r13d + xorl %r10d, %edx + xorl %r14d, %eax + rorl $6, %edx + movl %r14d, %ecx + addl %edx, %r13d + rorl $9, %ecx + andl %eax, %ebx + xorl %r14d, %ecx + xorl %r15d, %ebx + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + addl 396(%rsp), %r12d + movl %r10d, %ecx + movl %r14d, %ebx + xorl %r11d, %ecx + rorl $14, %edx + andl %r9d, %ecx + xorl %r9d, %edx + xorl %r11d, %ecx + rorl $5, %edx + addl %ecx, %r12d + xorl %r9d, %edx + xorl %r13d, %ebx + rorl $6, %edx + movl %r13d, %ecx + addl %edx, %r12d + rorl $9, %ecx + andl %ebx, %eax + xorl %r13d, %ecx + xorl %r14d, %eax + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + # rnd_all_4: 26-29 + addl 416(%rsp), %r11d + movl %r9d, %ecx + movl %r13d, %eax + xorl %r10d, %ecx + rorl $14, %edx + andl %r8d, %ecx + xorl %r8d, %edx + xorl %r10d, %ecx + rorl $5, %edx + addl %ecx, %r11d + xorl %r8d, %edx + xorl %r12d, %eax + rorl $6, %edx + movl %r12d, %ecx + addl %edx, %r11d + rorl $9, %ecx + andl %eax, %ebx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + addl 420(%rsp), %r10d + movl %r8d, %ecx + movl %r12d, %ebx + xorl %r9d, %ecx + rorl $14, %edx + andl %r15d, %ecx + xorl %r15d, %edx + xorl %r9d, %ecx + rorl $5, %edx + addl %ecx, %r10d + xorl %r15d, %edx + xorl %r11d, %ebx + rorl $6, %edx + movl %r11d, %ecx + addl %edx, %r10d + rorl $9, %ecx + andl %ebx, %eax + xorl %r11d, %ecx + xorl %r12d, %eax + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + addl 424(%rsp), %r9d + movl %r15d, %ecx + movl %r11d, %eax + xorl %r8d, %ecx + rorl $14, %edx + andl %r14d, %ecx + xorl %r14d, %edx + xorl %r8d, %ecx + rorl $5, %edx + addl %ecx, %r9d + xorl %r14d, %edx + xorl %r10d, %eax + rorl $6, %edx + movl %r10d, %ecx + addl %edx, %r9d + rorl $9, %ecx + andl %eax, %ebx + xorl %r10d, %ecx + xorl %r11d, %ebx + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + addl 428(%rsp), %r8d + movl %r14d, %ecx + movl %r10d, %ebx + xorl %r15d, %ecx + rorl $14, %edx + andl %r13d, %ecx + xorl %r13d, %edx + xorl %r15d, %ecx + rorl $5, %edx + addl %ecx, %r8d + xorl %r13d, %edx + xorl %r9d, %ebx + rorl $6, %edx + movl %r9d, %ecx + addl %edx, %r8d + rorl $9, %ecx + andl %ebx, %eax + xorl %r9d, %ecx + xorl %r10d, %eax + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + # rnd_all_4: 28-31 + addl 448(%rsp), %r15d + movl %r13d, %ecx + movl %r9d, %eax + xorl %r14d, %ecx + rorl $14, %edx + andl %r12d, %ecx + xorl %r12d, %edx + xorl %r14d, %ecx + rorl $5, %edx + addl %ecx, %r15d + xorl %r12d, %edx + xorl %r8d, %eax + rorl $6, %edx + movl %r8d, %ecx + addl %edx, %r15d + rorl $9, %ecx + andl %eax, %ebx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + addl 452(%rsp), %r14d + movl %r12d, %ecx + movl %r8d, %ebx + xorl %r13d, %ecx + rorl $14, %edx + andl %r11d, %ecx + xorl %r11d, %edx + xorl %r13d, %ecx + rorl $5, %edx + addl %ecx, %r14d + xorl %r11d, %edx + xorl %r15d, %ebx + rorl $6, %edx + movl %r15d, %ecx + addl %edx, %r14d + rorl $9, %ecx + andl %ebx, %eax + xorl %r15d, %ecx + xorl %r8d, %eax + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + addl 456(%rsp), %r13d + movl %r11d, %ecx + movl %r15d, %eax + xorl %r12d, %ecx + rorl $14, %edx + andl %r10d, %ecx + xorl %r10d, %edx + xorl %r12d, %ecx + rorl $5, %edx + addl %ecx, %r13d + xorl %r10d, %edx + xorl %r14d, %eax + rorl $6, %edx + movl %r14d, %ecx + addl %edx, %r13d + rorl $9, %ecx + andl %eax, %ebx + xorl %r14d, %ecx + xorl %r15d, %ebx + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + addl 460(%rsp), %r12d + movl %r10d, %ecx + movl %r14d, %ebx + xorl %r11d, %ecx + rorl $14, %edx + andl %r9d, %ecx + xorl %r9d, %edx + xorl %r11d, %ecx + rorl $5, %edx + addl %ecx, %r12d + xorl %r9d, %edx + xorl %r13d, %ebx + rorl $6, %edx + movl %r13d, %ecx + addl %edx, %r12d + rorl $9, %ecx + andl %ebx, %eax + xorl %r13d, %ecx + xorl %r14d, %eax + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + # rnd_all_4: 30-33 + addl 480(%rsp), %r11d + movl %r9d, %ecx + movl %r13d, %eax + xorl %r10d, %ecx + rorl $14, %edx + andl %r8d, %ecx + xorl %r8d, %edx + xorl %r10d, %ecx + rorl $5, %edx + addl %ecx, %r11d + xorl %r8d, %edx + xorl %r12d, %eax + rorl $6, %edx + movl %r12d, %ecx + addl %edx, %r11d + rorl $9, %ecx + andl %eax, %ebx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + addl 484(%rsp), %r10d + movl %r8d, %ecx + movl %r12d, %ebx + xorl %r9d, %ecx + rorl $14, %edx + andl %r15d, %ecx + xorl %r15d, %edx + xorl %r9d, %ecx + rorl $5, %edx + addl %ecx, %r10d + xorl %r15d, %edx + xorl %r11d, %ebx + rorl $6, %edx + movl %r11d, %ecx + addl %edx, %r10d + rorl $9, %ecx + andl %ebx, %eax + xorl %r11d, %ecx + xorl %r12d, %eax + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + addl 488(%rsp), %r9d + movl %r15d, %ecx + movl %r11d, %eax + xorl %r8d, %ecx + rorl $14, %edx + andl %r14d, %ecx + xorl %r14d, %edx + xorl %r8d, %ecx + rorl $5, %edx + addl %ecx, %r9d + xorl %r14d, %edx + xorl %r10d, %eax + rorl $6, %edx + movl %r10d, %ecx + addl %edx, %r9d + rorl $9, %ecx + andl %eax, %ebx + xorl %r10d, %ecx + xorl %r11d, %ebx + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + addl 492(%rsp), %r8d + movl %r14d, %ecx + movl %r10d, %ebx + xorl %r15d, %ecx + rorl $14, %edx + andl %r13d, %ecx + xorl %r13d, %edx + xorl %r15d, %ecx + rorl $5, %edx + addl %ecx, %r8d + xorl %r13d, %edx + xorl %r9d, %ebx + rorl $6, %edx + movl %r9d, %ecx + addl %edx, %r8d + rorl $9, %ecx + andl %ebx, %eax + xorl %r9d, %ecx + xorl %r10d, %eax + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + addl %r8d, (%rdi) + addl %r9d, 4(%rdi) + addl %r10d, 8(%rdi) + addl %r11d, 12(%rdi) + addl %r12d, 16(%rdi) + addl %r13d, 20(%rdi) + addl %r14d, 24(%rdi) + addl %r15d, 28(%rdi) + xorq %rax, %rax + vzeroupper + addq $0x200, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size Transform_Sha256_AVX2,.-Transform_Sha256_AVX2 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl Transform_Sha256_AVX2_Len +.type Transform_Sha256_AVX2_Len,@function +.align 4 +Transform_Sha256_AVX2_Len: +#else +.section __TEXT,__text +.globl _Transform_Sha256_AVX2_Len +.p2align 2 +_Transform_Sha256_AVX2_Len: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq %rbp + movq %rsi, %rbp + movq %rdx, %rsi + subq $0x200, %rsp + testb $0x40, %sil + je L_sha256_len_avx2_block + vmovdqu (%rbp), %ymm0 + vmovdqu 32(%rbp), %ymm1 + vmovups %ymm0, 32(%rdi) + vmovups %ymm1, 64(%rdi) +#ifndef __APPLE__ + call Transform_Sha256_AVX2@plt +#else + call _Transform_Sha256_AVX2 +#endif /* __APPLE__ */ + addq $0x40, %rbp + subl $0x40, %esi + jz L_sha256_len_avx2_done +L_sha256_len_avx2_block: + vmovdqa L_avx2_sha256_flip_mask(%rip), %ymm13 + vmovdqa L_avx2_sha256_shuf_00BA(%rip), %ymm11 + vmovdqa L_avx2_sha256_shuf_DC00(%rip), %ymm12 + movl (%rdi), %r8d + movl 4(%rdi), %r9d + movl 8(%rdi), %r10d + movl 12(%rdi), %r11d + movl 16(%rdi), %r12d + movl 20(%rdi), %r13d + movl 24(%rdi), %r14d + movl 28(%rdi), %r15d + # Start of loop processing two blocks +L_sha256_len_avx2_start: + # X0, X1, X2, X3 = W[0..15] + vmovdqu (%rbp), %xmm0 + vmovdqu 16(%rbp), %xmm1 + vmovdqu 64(%rbp), %xmm4 + vmovdqu 80(%rbp), %xmm5 + vinserti128 $0x01, %xmm4, %ymm0, %ymm0 + vinserti128 $0x01, %xmm5, %ymm1, %ymm1 + vpshufb %ymm13, %ymm0, %ymm0 + vpshufb %ymm13, %ymm1, %ymm1 + vmovdqu 32(%rbp), %xmm2 + vmovdqu 48(%rbp), %xmm3 + vmovdqu 96(%rbp), %xmm6 + vmovdqu 112(%rbp), %xmm7 + vinserti128 $0x01, %xmm6, %ymm2, %ymm2 + vinserti128 $0x01, %xmm7, %ymm3, %ymm3 + vpshufb %ymm13, %ymm2, %ymm2 + vpshufb %ymm13, %ymm3, %ymm3 + movl %r9d, %ebx + movl %r12d, %edx + xorl %r10d, %ebx + # set_w_k_xfer_4: 0 + vpaddd 0+L_avx2_sha256_k(%rip), %ymm0, %ymm4 + vpaddd 32+L_avx2_sha256_k(%rip), %ymm1, %ymm5 + vmovdqu %ymm4, (%rsp) + vmovdqu %ymm5, 32(%rsp) + vpaddd 64+L_avx2_sha256_k(%rip), %ymm2, %ymm4 + vpaddd 96+L_avx2_sha256_k(%rip), %ymm3, %ymm5 + vmovdqu %ymm4, 64(%rsp) + vmovdqu %ymm5, 96(%rsp) + # msg_sched: 0-3 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm0, %ymm1, %ymm5 + vpalignr $4, %ymm2, %ymm3, %ymm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl (%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 4(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm3, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm0, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 8(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 12(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %ymm4, %ymm9, %ymm0 + # msg_sched done: 0-3 + # msg_sched: 8-11 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm1, %ymm2, %ymm5 + vpalignr $4, %ymm3, %ymm0, %ymm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 32(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 36(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm0, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm1, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 40(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 44(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %ymm4, %ymm9, %ymm1 + # msg_sched done: 8-11 + # msg_sched: 16-19 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm2, %ymm3, %ymm5 + vpalignr $4, %ymm0, %ymm1, %ymm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl 64(%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 68(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm1, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm2, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 72(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 76(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %ymm4, %ymm9, %ymm2 + # msg_sched done: 16-19 + # msg_sched: 24-27 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm3, %ymm0, %ymm5 + vpalignr $4, %ymm1, %ymm2, %ymm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 96(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 100(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm2, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm3, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 104(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 108(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %ymm4, %ymm9, %ymm3 + # msg_sched done: 24-27 + # set_w_k_xfer_4: 4 + vpaddd 128+L_avx2_sha256_k(%rip), %ymm0, %ymm4 + vpaddd 160+L_avx2_sha256_k(%rip), %ymm1, %ymm5 + vmovdqu %ymm4, 128(%rsp) + vmovdqu %ymm5, 160(%rsp) + vpaddd 192+L_avx2_sha256_k(%rip), %ymm2, %ymm4 + vpaddd 224+L_avx2_sha256_k(%rip), %ymm3, %ymm5 + vmovdqu %ymm4, 192(%rsp) + vmovdqu %ymm5, 224(%rsp) + # msg_sched: 32-35 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm0, %ymm1, %ymm5 + vpalignr $4, %ymm2, %ymm3, %ymm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl 128(%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 132(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm3, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm0, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 136(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 140(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %ymm4, %ymm9, %ymm0 + # msg_sched done: 32-35 + # msg_sched: 40-43 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm1, %ymm2, %ymm5 + vpalignr $4, %ymm3, %ymm0, %ymm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 160(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 164(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm0, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm1, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 168(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 172(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %ymm4, %ymm9, %ymm1 + # msg_sched done: 40-43 + # msg_sched: 48-51 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm2, %ymm3, %ymm5 + vpalignr $4, %ymm0, %ymm1, %ymm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl 192(%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 196(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm1, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm2, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 200(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 204(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %ymm4, %ymm9, %ymm2 + # msg_sched done: 48-51 + # msg_sched: 56-59 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm3, %ymm0, %ymm5 + vpalignr $4, %ymm1, %ymm2, %ymm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 224(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 228(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm2, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm3, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 232(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 236(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %ymm4, %ymm9, %ymm3 + # msg_sched done: 56-59 + # set_w_k_xfer_4: 8 + vpaddd 256+L_avx2_sha256_k(%rip), %ymm0, %ymm4 + vpaddd 288+L_avx2_sha256_k(%rip), %ymm1, %ymm5 + vmovdqu %ymm4, 256(%rsp) + vmovdqu %ymm5, 288(%rsp) + vpaddd 320+L_avx2_sha256_k(%rip), %ymm2, %ymm4 + vpaddd 352+L_avx2_sha256_k(%rip), %ymm3, %ymm5 + vmovdqu %ymm4, 320(%rsp) + vmovdqu %ymm5, 352(%rsp) + # msg_sched: 64-67 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm0, %ymm1, %ymm5 + vpalignr $4, %ymm2, %ymm3, %ymm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl 256(%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 260(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm3, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm0, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 264(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 268(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %ymm4, %ymm9, %ymm0 + # msg_sched done: 64-67 + # msg_sched: 72-75 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm1, %ymm2, %ymm5 + vpalignr $4, %ymm3, %ymm0, %ymm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 288(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 292(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm0, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm1, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 296(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 300(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %ymm4, %ymm9, %ymm1 + # msg_sched done: 72-75 + # msg_sched: 80-83 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm2, %ymm3, %ymm5 + vpalignr $4, %ymm0, %ymm1, %ymm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl 320(%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 324(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm1, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm2, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 328(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 332(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %ymm4, %ymm9, %ymm2 + # msg_sched done: 80-83 + # msg_sched: 88-91 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm3, %ymm0, %ymm5 + vpalignr $4, %ymm1, %ymm2, %ymm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 352(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 356(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm2, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm3, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 360(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 364(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %ymm4, %ymm9, %ymm3 + # msg_sched done: 88-91 + # set_w_k_xfer_4: 12 + vpaddd 384+L_avx2_sha256_k(%rip), %ymm0, %ymm4 + vpaddd 416+L_avx2_sha256_k(%rip), %ymm1, %ymm5 + vmovdqu %ymm4, 384(%rsp) + vmovdqu %ymm5, 416(%rsp) + vpaddd 448+L_avx2_sha256_k(%rip), %ymm2, %ymm4 + vpaddd 480+L_avx2_sha256_k(%rip), %ymm3, %ymm5 + vmovdqu %ymm4, 448(%rsp) + vmovdqu %ymm5, 480(%rsp) + # rnd_all_4: 24-27 + addl 384(%rsp), %r15d + movl %r13d, %ecx + movl %r9d, %eax + xorl %r14d, %ecx + rorl $14, %edx + andl %r12d, %ecx + xorl %r12d, %edx + xorl %r14d, %ecx + rorl $5, %edx + addl %ecx, %r15d + xorl %r12d, %edx + xorl %r8d, %eax + rorl $6, %edx + movl %r8d, %ecx + addl %edx, %r15d + rorl $9, %ecx + andl %eax, %ebx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + addl 388(%rsp), %r14d + movl %r12d, %ecx + movl %r8d, %ebx + xorl %r13d, %ecx + rorl $14, %edx + andl %r11d, %ecx + xorl %r11d, %edx + xorl %r13d, %ecx + rorl $5, %edx + addl %ecx, %r14d + xorl %r11d, %edx + xorl %r15d, %ebx + rorl $6, %edx + movl %r15d, %ecx + addl %edx, %r14d + rorl $9, %ecx + andl %ebx, %eax + xorl %r15d, %ecx + xorl %r8d, %eax + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + addl 392(%rsp), %r13d + movl %r11d, %ecx + movl %r15d, %eax + xorl %r12d, %ecx + rorl $14, %edx + andl %r10d, %ecx + xorl %r10d, %edx + xorl %r12d, %ecx + rorl $5, %edx + addl %ecx, %r13d + xorl %r10d, %edx + xorl %r14d, %eax + rorl $6, %edx + movl %r14d, %ecx + addl %edx, %r13d + rorl $9, %ecx + andl %eax, %ebx + xorl %r14d, %ecx + xorl %r15d, %ebx + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + addl 396(%rsp), %r12d + movl %r10d, %ecx + movl %r14d, %ebx + xorl %r11d, %ecx + rorl $14, %edx + andl %r9d, %ecx + xorl %r9d, %edx + xorl %r11d, %ecx + rorl $5, %edx + addl %ecx, %r12d + xorl %r9d, %edx + xorl %r13d, %ebx + rorl $6, %edx + movl %r13d, %ecx + addl %edx, %r12d + rorl $9, %ecx + andl %ebx, %eax + xorl %r13d, %ecx + xorl %r14d, %eax + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + # rnd_all_4: 26-29 + addl 416(%rsp), %r11d + movl %r9d, %ecx + movl %r13d, %eax + xorl %r10d, %ecx + rorl $14, %edx + andl %r8d, %ecx + xorl %r8d, %edx + xorl %r10d, %ecx + rorl $5, %edx + addl %ecx, %r11d + xorl %r8d, %edx + xorl %r12d, %eax + rorl $6, %edx + movl %r12d, %ecx + addl %edx, %r11d + rorl $9, %ecx + andl %eax, %ebx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + addl 420(%rsp), %r10d + movl %r8d, %ecx + movl %r12d, %ebx + xorl %r9d, %ecx + rorl $14, %edx + andl %r15d, %ecx + xorl %r15d, %edx + xorl %r9d, %ecx + rorl $5, %edx + addl %ecx, %r10d + xorl %r15d, %edx + xorl %r11d, %ebx + rorl $6, %edx + movl %r11d, %ecx + addl %edx, %r10d + rorl $9, %ecx + andl %ebx, %eax + xorl %r11d, %ecx + xorl %r12d, %eax + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + addl 424(%rsp), %r9d + movl %r15d, %ecx + movl %r11d, %eax + xorl %r8d, %ecx + rorl $14, %edx + andl %r14d, %ecx + xorl %r14d, %edx + xorl %r8d, %ecx + rorl $5, %edx + addl %ecx, %r9d + xorl %r14d, %edx + xorl %r10d, %eax + rorl $6, %edx + movl %r10d, %ecx + addl %edx, %r9d + rorl $9, %ecx + andl %eax, %ebx + xorl %r10d, %ecx + xorl %r11d, %ebx + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + addl 428(%rsp), %r8d + movl %r14d, %ecx + movl %r10d, %ebx + xorl %r15d, %ecx + rorl $14, %edx + andl %r13d, %ecx + xorl %r13d, %edx + xorl %r15d, %ecx + rorl $5, %edx + addl %ecx, %r8d + xorl %r13d, %edx + xorl %r9d, %ebx + rorl $6, %edx + movl %r9d, %ecx + addl %edx, %r8d + rorl $9, %ecx + andl %ebx, %eax + xorl %r9d, %ecx + xorl %r10d, %eax + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + # rnd_all_4: 28-31 + addl 448(%rsp), %r15d + movl %r13d, %ecx + movl %r9d, %eax + xorl %r14d, %ecx + rorl $14, %edx + andl %r12d, %ecx + xorl %r12d, %edx + xorl %r14d, %ecx + rorl $5, %edx + addl %ecx, %r15d + xorl %r12d, %edx + xorl %r8d, %eax + rorl $6, %edx + movl %r8d, %ecx + addl %edx, %r15d + rorl $9, %ecx + andl %eax, %ebx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + addl 452(%rsp), %r14d + movl %r12d, %ecx + movl %r8d, %ebx + xorl %r13d, %ecx + rorl $14, %edx + andl %r11d, %ecx + xorl %r11d, %edx + xorl %r13d, %ecx + rorl $5, %edx + addl %ecx, %r14d + xorl %r11d, %edx + xorl %r15d, %ebx + rorl $6, %edx + movl %r15d, %ecx + addl %edx, %r14d + rorl $9, %ecx + andl %ebx, %eax + xorl %r15d, %ecx + xorl %r8d, %eax + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + addl 456(%rsp), %r13d + movl %r11d, %ecx + movl %r15d, %eax + xorl %r12d, %ecx + rorl $14, %edx + andl %r10d, %ecx + xorl %r10d, %edx + xorl %r12d, %ecx + rorl $5, %edx + addl %ecx, %r13d + xorl %r10d, %edx + xorl %r14d, %eax + rorl $6, %edx + movl %r14d, %ecx + addl %edx, %r13d + rorl $9, %ecx + andl %eax, %ebx + xorl %r14d, %ecx + xorl %r15d, %ebx + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + addl 460(%rsp), %r12d + movl %r10d, %ecx + movl %r14d, %ebx + xorl %r11d, %ecx + rorl $14, %edx + andl %r9d, %ecx + xorl %r9d, %edx + xorl %r11d, %ecx + rorl $5, %edx + addl %ecx, %r12d + xorl %r9d, %edx + xorl %r13d, %ebx + rorl $6, %edx + movl %r13d, %ecx + addl %edx, %r12d + rorl $9, %ecx + andl %ebx, %eax + xorl %r13d, %ecx + xorl %r14d, %eax + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + # rnd_all_4: 30-33 + addl 480(%rsp), %r11d + movl %r9d, %ecx + movl %r13d, %eax + xorl %r10d, %ecx + rorl $14, %edx + andl %r8d, %ecx + xorl %r8d, %edx + xorl %r10d, %ecx + rorl $5, %edx + addl %ecx, %r11d + xorl %r8d, %edx + xorl %r12d, %eax + rorl $6, %edx + movl %r12d, %ecx + addl %edx, %r11d + rorl $9, %ecx + andl %eax, %ebx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + addl 484(%rsp), %r10d + movl %r8d, %ecx + movl %r12d, %ebx + xorl %r9d, %ecx + rorl $14, %edx + andl %r15d, %ecx + xorl %r15d, %edx + xorl %r9d, %ecx + rorl $5, %edx + addl %ecx, %r10d + xorl %r15d, %edx + xorl %r11d, %ebx + rorl $6, %edx + movl %r11d, %ecx + addl %edx, %r10d + rorl $9, %ecx + andl %ebx, %eax + xorl %r11d, %ecx + xorl %r12d, %eax + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + addl 488(%rsp), %r9d + movl %r15d, %ecx + movl %r11d, %eax + xorl %r8d, %ecx + rorl $14, %edx + andl %r14d, %ecx + xorl %r14d, %edx + xorl %r8d, %ecx + rorl $5, %edx + addl %ecx, %r9d + xorl %r14d, %edx + xorl %r10d, %eax + rorl $6, %edx + movl %r10d, %ecx + addl %edx, %r9d + rorl $9, %ecx + andl %eax, %ebx + xorl %r10d, %ecx + xorl %r11d, %ebx + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + addl 492(%rsp), %r8d + movl %r14d, %ecx + movl %r10d, %ebx + xorl %r15d, %ecx + rorl $14, %edx + andl %r13d, %ecx + xorl %r13d, %edx + xorl %r15d, %ecx + rorl $5, %edx + addl %ecx, %r8d + xorl %r13d, %edx + xorl %r9d, %ebx + rorl $6, %edx + movl %r9d, %ecx + addl %edx, %r8d + rorl $9, %ecx + andl %ebx, %eax + xorl %r9d, %ecx + xorl %r10d, %eax + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + addl (%rdi), %r8d + addl 4(%rdi), %r9d + addl 8(%rdi), %r10d + addl 12(%rdi), %r11d + addl 16(%rdi), %r12d + addl 20(%rdi), %r13d + addl 24(%rdi), %r14d + addl 28(%rdi), %r15d + movl %r8d, (%rdi) + movl %r9d, 4(%rdi) + movl %r10d, 8(%rdi) + movl %r11d, 12(%rdi) + movl %r12d, 16(%rdi) + movl %r13d, 20(%rdi) + movl %r14d, 24(%rdi) + movl %r15d, 28(%rdi) + movl %r9d, %ebx + movl %r12d, %edx + xorl %r10d, %ebx + # rnd_all_4: 1-4 + addl 16(%rsp), %r15d + movl %r13d, %ecx + movl %r9d, %eax + xorl %r14d, %ecx + rorl $14, %edx + andl %r12d, %ecx + xorl %r12d, %edx + xorl %r14d, %ecx + rorl $5, %edx + addl %ecx, %r15d + xorl %r12d, %edx + xorl %r8d, %eax + rorl $6, %edx + movl %r8d, %ecx + addl %edx, %r15d + rorl $9, %ecx + andl %eax, %ebx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + addl 20(%rsp), %r14d + movl %r12d, %ecx + movl %r8d, %ebx + xorl %r13d, %ecx + rorl $14, %edx + andl %r11d, %ecx + xorl %r11d, %edx + xorl %r13d, %ecx + rorl $5, %edx + addl %ecx, %r14d + xorl %r11d, %edx + xorl %r15d, %ebx + rorl $6, %edx + movl %r15d, %ecx + addl %edx, %r14d + rorl $9, %ecx + andl %ebx, %eax + xorl %r15d, %ecx + xorl %r8d, %eax + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + addl 24(%rsp), %r13d + movl %r11d, %ecx + movl %r15d, %eax + xorl %r12d, %ecx + rorl $14, %edx + andl %r10d, %ecx + xorl %r10d, %edx + xorl %r12d, %ecx + rorl $5, %edx + addl %ecx, %r13d + xorl %r10d, %edx + xorl %r14d, %eax + rorl $6, %edx + movl %r14d, %ecx + addl %edx, %r13d + rorl $9, %ecx + andl %eax, %ebx + xorl %r14d, %ecx + xorl %r15d, %ebx + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + addl 28(%rsp), %r12d + movl %r10d, %ecx + movl %r14d, %ebx + xorl %r11d, %ecx + rorl $14, %edx + andl %r9d, %ecx + xorl %r9d, %edx + xorl %r11d, %ecx + rorl $5, %edx + addl %ecx, %r12d + xorl %r9d, %edx + xorl %r13d, %ebx + rorl $6, %edx + movl %r13d, %ecx + addl %edx, %r12d + rorl $9, %ecx + andl %ebx, %eax + xorl %r13d, %ecx + xorl %r14d, %eax + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + # rnd_all_4: 3-6 + addl 48(%rsp), %r11d + movl %r9d, %ecx + movl %r13d, %eax + xorl %r10d, %ecx + rorl $14, %edx + andl %r8d, %ecx + xorl %r8d, %edx + xorl %r10d, %ecx + rorl $5, %edx + addl %ecx, %r11d + xorl %r8d, %edx + xorl %r12d, %eax + rorl $6, %edx + movl %r12d, %ecx + addl %edx, %r11d + rorl $9, %ecx + andl %eax, %ebx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + addl 52(%rsp), %r10d + movl %r8d, %ecx + movl %r12d, %ebx + xorl %r9d, %ecx + rorl $14, %edx + andl %r15d, %ecx + xorl %r15d, %edx + xorl %r9d, %ecx + rorl $5, %edx + addl %ecx, %r10d + xorl %r15d, %edx + xorl %r11d, %ebx + rorl $6, %edx + movl %r11d, %ecx + addl %edx, %r10d + rorl $9, %ecx + andl %ebx, %eax + xorl %r11d, %ecx + xorl %r12d, %eax + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + addl 56(%rsp), %r9d + movl %r15d, %ecx + movl %r11d, %eax + xorl %r8d, %ecx + rorl $14, %edx + andl %r14d, %ecx + xorl %r14d, %edx + xorl %r8d, %ecx + rorl $5, %edx + addl %ecx, %r9d + xorl %r14d, %edx + xorl %r10d, %eax + rorl $6, %edx + movl %r10d, %ecx + addl %edx, %r9d + rorl $9, %ecx + andl %eax, %ebx + xorl %r10d, %ecx + xorl %r11d, %ebx + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + addl 60(%rsp), %r8d + movl %r14d, %ecx + movl %r10d, %ebx + xorl %r15d, %ecx + rorl $14, %edx + andl %r13d, %ecx + xorl %r13d, %edx + xorl %r15d, %ecx + rorl $5, %edx + addl %ecx, %r8d + xorl %r13d, %edx + xorl %r9d, %ebx + rorl $6, %edx + movl %r9d, %ecx + addl %edx, %r8d + rorl $9, %ecx + andl %ebx, %eax + xorl %r9d, %ecx + xorl %r10d, %eax + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + # rnd_all_4: 5-8 + addl 80(%rsp), %r15d + movl %r13d, %ecx + movl %r9d, %eax + xorl %r14d, %ecx + rorl $14, %edx + andl %r12d, %ecx + xorl %r12d, %edx + xorl %r14d, %ecx + rorl $5, %edx + addl %ecx, %r15d + xorl %r12d, %edx + xorl %r8d, %eax + rorl $6, %edx + movl %r8d, %ecx + addl %edx, %r15d + rorl $9, %ecx + andl %eax, %ebx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + addl 84(%rsp), %r14d + movl %r12d, %ecx + movl %r8d, %ebx + xorl %r13d, %ecx + rorl $14, %edx + andl %r11d, %ecx + xorl %r11d, %edx + xorl %r13d, %ecx + rorl $5, %edx + addl %ecx, %r14d + xorl %r11d, %edx + xorl %r15d, %ebx + rorl $6, %edx + movl %r15d, %ecx + addl %edx, %r14d + rorl $9, %ecx + andl %ebx, %eax + xorl %r15d, %ecx + xorl %r8d, %eax + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + addl 88(%rsp), %r13d + movl %r11d, %ecx + movl %r15d, %eax + xorl %r12d, %ecx + rorl $14, %edx + andl %r10d, %ecx + xorl %r10d, %edx + xorl %r12d, %ecx + rorl $5, %edx + addl %ecx, %r13d + xorl %r10d, %edx + xorl %r14d, %eax + rorl $6, %edx + movl %r14d, %ecx + addl %edx, %r13d + rorl $9, %ecx + andl %eax, %ebx + xorl %r14d, %ecx + xorl %r15d, %ebx + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + addl 92(%rsp), %r12d + movl %r10d, %ecx + movl %r14d, %ebx + xorl %r11d, %ecx + rorl $14, %edx + andl %r9d, %ecx + xorl %r9d, %edx + xorl %r11d, %ecx + rorl $5, %edx + addl %ecx, %r12d + xorl %r9d, %edx + xorl %r13d, %ebx + rorl $6, %edx + movl %r13d, %ecx + addl %edx, %r12d + rorl $9, %ecx + andl %ebx, %eax + xorl %r13d, %ecx + xorl %r14d, %eax + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + # rnd_all_4: 7-10 + addl 112(%rsp), %r11d + movl %r9d, %ecx + movl %r13d, %eax + xorl %r10d, %ecx + rorl $14, %edx + andl %r8d, %ecx + xorl %r8d, %edx + xorl %r10d, %ecx + rorl $5, %edx + addl %ecx, %r11d + xorl %r8d, %edx + xorl %r12d, %eax + rorl $6, %edx + movl %r12d, %ecx + addl %edx, %r11d + rorl $9, %ecx + andl %eax, %ebx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + addl 116(%rsp), %r10d + movl %r8d, %ecx + movl %r12d, %ebx + xorl %r9d, %ecx + rorl $14, %edx + andl %r15d, %ecx + xorl %r15d, %edx + xorl %r9d, %ecx + rorl $5, %edx + addl %ecx, %r10d + xorl %r15d, %edx + xorl %r11d, %ebx + rorl $6, %edx + movl %r11d, %ecx + addl %edx, %r10d + rorl $9, %ecx + andl %ebx, %eax + xorl %r11d, %ecx + xorl %r12d, %eax + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + addl 120(%rsp), %r9d + movl %r15d, %ecx + movl %r11d, %eax + xorl %r8d, %ecx + rorl $14, %edx + andl %r14d, %ecx + xorl %r14d, %edx + xorl %r8d, %ecx + rorl $5, %edx + addl %ecx, %r9d + xorl %r14d, %edx + xorl %r10d, %eax + rorl $6, %edx + movl %r10d, %ecx + addl %edx, %r9d + rorl $9, %ecx + andl %eax, %ebx + xorl %r10d, %ecx + xorl %r11d, %ebx + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + addl 124(%rsp), %r8d + movl %r14d, %ecx + movl %r10d, %ebx + xorl %r15d, %ecx + rorl $14, %edx + andl %r13d, %ecx + xorl %r13d, %edx + xorl %r15d, %ecx + rorl $5, %edx + addl %ecx, %r8d + xorl %r13d, %edx + xorl %r9d, %ebx + rorl $6, %edx + movl %r9d, %ecx + addl %edx, %r8d + rorl $9, %ecx + andl %ebx, %eax + xorl %r9d, %ecx + xorl %r10d, %eax + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + # rnd_all_4: 9-12 + addl 144(%rsp), %r15d + movl %r13d, %ecx + movl %r9d, %eax + xorl %r14d, %ecx + rorl $14, %edx + andl %r12d, %ecx + xorl %r12d, %edx + xorl %r14d, %ecx + rorl $5, %edx + addl %ecx, %r15d + xorl %r12d, %edx + xorl %r8d, %eax + rorl $6, %edx + movl %r8d, %ecx + addl %edx, %r15d + rorl $9, %ecx + andl %eax, %ebx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + addl 148(%rsp), %r14d + movl %r12d, %ecx + movl %r8d, %ebx + xorl %r13d, %ecx + rorl $14, %edx + andl %r11d, %ecx + xorl %r11d, %edx + xorl %r13d, %ecx + rorl $5, %edx + addl %ecx, %r14d + xorl %r11d, %edx + xorl %r15d, %ebx + rorl $6, %edx + movl %r15d, %ecx + addl %edx, %r14d + rorl $9, %ecx + andl %ebx, %eax + xorl %r15d, %ecx + xorl %r8d, %eax + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + addl 152(%rsp), %r13d + movl %r11d, %ecx + movl %r15d, %eax + xorl %r12d, %ecx + rorl $14, %edx + andl %r10d, %ecx + xorl %r10d, %edx + xorl %r12d, %ecx + rorl $5, %edx + addl %ecx, %r13d + xorl %r10d, %edx + xorl %r14d, %eax + rorl $6, %edx + movl %r14d, %ecx + addl %edx, %r13d + rorl $9, %ecx + andl %eax, %ebx + xorl %r14d, %ecx + xorl %r15d, %ebx + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + addl 156(%rsp), %r12d + movl %r10d, %ecx + movl %r14d, %ebx + xorl %r11d, %ecx + rorl $14, %edx + andl %r9d, %ecx + xorl %r9d, %edx + xorl %r11d, %ecx + rorl $5, %edx + addl %ecx, %r12d + xorl %r9d, %edx + xorl %r13d, %ebx + rorl $6, %edx + movl %r13d, %ecx + addl %edx, %r12d + rorl $9, %ecx + andl %ebx, %eax + xorl %r13d, %ecx + xorl %r14d, %eax + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + # rnd_all_4: 11-14 + addl 176(%rsp), %r11d + movl %r9d, %ecx + movl %r13d, %eax + xorl %r10d, %ecx + rorl $14, %edx + andl %r8d, %ecx + xorl %r8d, %edx + xorl %r10d, %ecx + rorl $5, %edx + addl %ecx, %r11d + xorl %r8d, %edx + xorl %r12d, %eax + rorl $6, %edx + movl %r12d, %ecx + addl %edx, %r11d + rorl $9, %ecx + andl %eax, %ebx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + addl 180(%rsp), %r10d + movl %r8d, %ecx + movl %r12d, %ebx + xorl %r9d, %ecx + rorl $14, %edx + andl %r15d, %ecx + xorl %r15d, %edx + xorl %r9d, %ecx + rorl $5, %edx + addl %ecx, %r10d + xorl %r15d, %edx + xorl %r11d, %ebx + rorl $6, %edx + movl %r11d, %ecx + addl %edx, %r10d + rorl $9, %ecx + andl %ebx, %eax + xorl %r11d, %ecx + xorl %r12d, %eax + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + addl 184(%rsp), %r9d + movl %r15d, %ecx + movl %r11d, %eax + xorl %r8d, %ecx + rorl $14, %edx + andl %r14d, %ecx + xorl %r14d, %edx + xorl %r8d, %ecx + rorl $5, %edx + addl %ecx, %r9d + xorl %r14d, %edx + xorl %r10d, %eax + rorl $6, %edx + movl %r10d, %ecx + addl %edx, %r9d + rorl $9, %ecx + andl %eax, %ebx + xorl %r10d, %ecx + xorl %r11d, %ebx + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + addl 188(%rsp), %r8d + movl %r14d, %ecx + movl %r10d, %ebx + xorl %r15d, %ecx + rorl $14, %edx + andl %r13d, %ecx + xorl %r13d, %edx + xorl %r15d, %ecx + rorl $5, %edx + addl %ecx, %r8d + xorl %r13d, %edx + xorl %r9d, %ebx + rorl $6, %edx + movl %r9d, %ecx + addl %edx, %r8d + rorl $9, %ecx + andl %ebx, %eax + xorl %r9d, %ecx + xorl %r10d, %eax + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + # rnd_all_4: 13-16 + addl 208(%rsp), %r15d + movl %r13d, %ecx + movl %r9d, %eax + xorl %r14d, %ecx + rorl $14, %edx + andl %r12d, %ecx + xorl %r12d, %edx + xorl %r14d, %ecx + rorl $5, %edx + addl %ecx, %r15d + xorl %r12d, %edx + xorl %r8d, %eax + rorl $6, %edx + movl %r8d, %ecx + addl %edx, %r15d + rorl $9, %ecx + andl %eax, %ebx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + addl 212(%rsp), %r14d + movl %r12d, %ecx + movl %r8d, %ebx + xorl %r13d, %ecx + rorl $14, %edx + andl %r11d, %ecx + xorl %r11d, %edx + xorl %r13d, %ecx + rorl $5, %edx + addl %ecx, %r14d + xorl %r11d, %edx + xorl %r15d, %ebx + rorl $6, %edx + movl %r15d, %ecx + addl %edx, %r14d + rorl $9, %ecx + andl %ebx, %eax + xorl %r15d, %ecx + xorl %r8d, %eax + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + addl 216(%rsp), %r13d + movl %r11d, %ecx + movl %r15d, %eax + xorl %r12d, %ecx + rorl $14, %edx + andl %r10d, %ecx + xorl %r10d, %edx + xorl %r12d, %ecx + rorl $5, %edx + addl %ecx, %r13d + xorl %r10d, %edx + xorl %r14d, %eax + rorl $6, %edx + movl %r14d, %ecx + addl %edx, %r13d + rorl $9, %ecx + andl %eax, %ebx + xorl %r14d, %ecx + xorl %r15d, %ebx + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + addl 220(%rsp), %r12d + movl %r10d, %ecx + movl %r14d, %ebx + xorl %r11d, %ecx + rorl $14, %edx + andl %r9d, %ecx + xorl %r9d, %edx + xorl %r11d, %ecx + rorl $5, %edx + addl %ecx, %r12d + xorl %r9d, %edx + xorl %r13d, %ebx + rorl $6, %edx + movl %r13d, %ecx + addl %edx, %r12d + rorl $9, %ecx + andl %ebx, %eax + xorl %r13d, %ecx + xorl %r14d, %eax + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + # rnd_all_4: 15-18 + addl 240(%rsp), %r11d + movl %r9d, %ecx + movl %r13d, %eax + xorl %r10d, %ecx + rorl $14, %edx + andl %r8d, %ecx + xorl %r8d, %edx + xorl %r10d, %ecx + rorl $5, %edx + addl %ecx, %r11d + xorl %r8d, %edx + xorl %r12d, %eax + rorl $6, %edx + movl %r12d, %ecx + addl %edx, %r11d + rorl $9, %ecx + andl %eax, %ebx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + addl 244(%rsp), %r10d + movl %r8d, %ecx + movl %r12d, %ebx + xorl %r9d, %ecx + rorl $14, %edx + andl %r15d, %ecx + xorl %r15d, %edx + xorl %r9d, %ecx + rorl $5, %edx + addl %ecx, %r10d + xorl %r15d, %edx + xorl %r11d, %ebx + rorl $6, %edx + movl %r11d, %ecx + addl %edx, %r10d + rorl $9, %ecx + andl %ebx, %eax + xorl %r11d, %ecx + xorl %r12d, %eax + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + addl 248(%rsp), %r9d + movl %r15d, %ecx + movl %r11d, %eax + xorl %r8d, %ecx + rorl $14, %edx + andl %r14d, %ecx + xorl %r14d, %edx + xorl %r8d, %ecx + rorl $5, %edx + addl %ecx, %r9d + xorl %r14d, %edx + xorl %r10d, %eax + rorl $6, %edx + movl %r10d, %ecx + addl %edx, %r9d + rorl $9, %ecx + andl %eax, %ebx + xorl %r10d, %ecx + xorl %r11d, %ebx + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + addl 252(%rsp), %r8d + movl %r14d, %ecx + movl %r10d, %ebx + xorl %r15d, %ecx + rorl $14, %edx + andl %r13d, %ecx + xorl %r13d, %edx + xorl %r15d, %ecx + rorl $5, %edx + addl %ecx, %r8d + xorl %r13d, %edx + xorl %r9d, %ebx + rorl $6, %edx + movl %r9d, %ecx + addl %edx, %r8d + rorl $9, %ecx + andl %ebx, %eax + xorl %r9d, %ecx + xorl %r10d, %eax + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + # rnd_all_4: 17-20 + addl 272(%rsp), %r15d + movl %r13d, %ecx + movl %r9d, %eax + xorl %r14d, %ecx + rorl $14, %edx + andl %r12d, %ecx + xorl %r12d, %edx + xorl %r14d, %ecx + rorl $5, %edx + addl %ecx, %r15d + xorl %r12d, %edx + xorl %r8d, %eax + rorl $6, %edx + movl %r8d, %ecx + addl %edx, %r15d + rorl $9, %ecx + andl %eax, %ebx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + addl 276(%rsp), %r14d + movl %r12d, %ecx + movl %r8d, %ebx + xorl %r13d, %ecx + rorl $14, %edx + andl %r11d, %ecx + xorl %r11d, %edx + xorl %r13d, %ecx + rorl $5, %edx + addl %ecx, %r14d + xorl %r11d, %edx + xorl %r15d, %ebx + rorl $6, %edx + movl %r15d, %ecx + addl %edx, %r14d + rorl $9, %ecx + andl %ebx, %eax + xorl %r15d, %ecx + xorl %r8d, %eax + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + addl 280(%rsp), %r13d + movl %r11d, %ecx + movl %r15d, %eax + xorl %r12d, %ecx + rorl $14, %edx + andl %r10d, %ecx + xorl %r10d, %edx + xorl %r12d, %ecx + rorl $5, %edx + addl %ecx, %r13d + xorl %r10d, %edx + xorl %r14d, %eax + rorl $6, %edx + movl %r14d, %ecx + addl %edx, %r13d + rorl $9, %ecx + andl %eax, %ebx + xorl %r14d, %ecx + xorl %r15d, %ebx + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + addl 284(%rsp), %r12d + movl %r10d, %ecx + movl %r14d, %ebx + xorl %r11d, %ecx + rorl $14, %edx + andl %r9d, %ecx + xorl %r9d, %edx + xorl %r11d, %ecx + rorl $5, %edx + addl %ecx, %r12d + xorl %r9d, %edx + xorl %r13d, %ebx + rorl $6, %edx + movl %r13d, %ecx + addl %edx, %r12d + rorl $9, %ecx + andl %ebx, %eax + xorl %r13d, %ecx + xorl %r14d, %eax + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + # rnd_all_4: 19-22 + addl 304(%rsp), %r11d + movl %r9d, %ecx + movl %r13d, %eax + xorl %r10d, %ecx + rorl $14, %edx + andl %r8d, %ecx + xorl %r8d, %edx + xorl %r10d, %ecx + rorl $5, %edx + addl %ecx, %r11d + xorl %r8d, %edx + xorl %r12d, %eax + rorl $6, %edx + movl %r12d, %ecx + addl %edx, %r11d + rorl $9, %ecx + andl %eax, %ebx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + addl 308(%rsp), %r10d + movl %r8d, %ecx + movl %r12d, %ebx + xorl %r9d, %ecx + rorl $14, %edx + andl %r15d, %ecx + xorl %r15d, %edx + xorl %r9d, %ecx + rorl $5, %edx + addl %ecx, %r10d + xorl %r15d, %edx + xorl %r11d, %ebx + rorl $6, %edx + movl %r11d, %ecx + addl %edx, %r10d + rorl $9, %ecx + andl %ebx, %eax + xorl %r11d, %ecx + xorl %r12d, %eax + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + addl 312(%rsp), %r9d + movl %r15d, %ecx + movl %r11d, %eax + xorl %r8d, %ecx + rorl $14, %edx + andl %r14d, %ecx + xorl %r14d, %edx + xorl %r8d, %ecx + rorl $5, %edx + addl %ecx, %r9d + xorl %r14d, %edx + xorl %r10d, %eax + rorl $6, %edx + movl %r10d, %ecx + addl %edx, %r9d + rorl $9, %ecx + andl %eax, %ebx + xorl %r10d, %ecx + xorl %r11d, %ebx + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + addl 316(%rsp), %r8d + movl %r14d, %ecx + movl %r10d, %ebx + xorl %r15d, %ecx + rorl $14, %edx + andl %r13d, %ecx + xorl %r13d, %edx + xorl %r15d, %ecx + rorl $5, %edx + addl %ecx, %r8d + xorl %r13d, %edx + xorl %r9d, %ebx + rorl $6, %edx + movl %r9d, %ecx + addl %edx, %r8d + rorl $9, %ecx + andl %ebx, %eax + xorl %r9d, %ecx + xorl %r10d, %eax + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + # rnd_all_4: 21-24 + addl 336(%rsp), %r15d + movl %r13d, %ecx + movl %r9d, %eax + xorl %r14d, %ecx + rorl $14, %edx + andl %r12d, %ecx + xorl %r12d, %edx + xorl %r14d, %ecx + rorl $5, %edx + addl %ecx, %r15d + xorl %r12d, %edx + xorl %r8d, %eax + rorl $6, %edx + movl %r8d, %ecx + addl %edx, %r15d + rorl $9, %ecx + andl %eax, %ebx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + addl 340(%rsp), %r14d + movl %r12d, %ecx + movl %r8d, %ebx + xorl %r13d, %ecx + rorl $14, %edx + andl %r11d, %ecx + xorl %r11d, %edx + xorl %r13d, %ecx + rorl $5, %edx + addl %ecx, %r14d + xorl %r11d, %edx + xorl %r15d, %ebx + rorl $6, %edx + movl %r15d, %ecx + addl %edx, %r14d + rorl $9, %ecx + andl %ebx, %eax + xorl %r15d, %ecx + xorl %r8d, %eax + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + addl 344(%rsp), %r13d + movl %r11d, %ecx + movl %r15d, %eax + xorl %r12d, %ecx + rorl $14, %edx + andl %r10d, %ecx + xorl %r10d, %edx + xorl %r12d, %ecx + rorl $5, %edx + addl %ecx, %r13d + xorl %r10d, %edx + xorl %r14d, %eax + rorl $6, %edx + movl %r14d, %ecx + addl %edx, %r13d + rorl $9, %ecx + andl %eax, %ebx + xorl %r14d, %ecx + xorl %r15d, %ebx + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + addl 348(%rsp), %r12d + movl %r10d, %ecx + movl %r14d, %ebx + xorl %r11d, %ecx + rorl $14, %edx + andl %r9d, %ecx + xorl %r9d, %edx + xorl %r11d, %ecx + rorl $5, %edx + addl %ecx, %r12d + xorl %r9d, %edx + xorl %r13d, %ebx + rorl $6, %edx + movl %r13d, %ecx + addl %edx, %r12d + rorl $9, %ecx + andl %ebx, %eax + xorl %r13d, %ecx + xorl %r14d, %eax + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + # rnd_all_4: 23-26 + addl 368(%rsp), %r11d + movl %r9d, %ecx + movl %r13d, %eax + xorl %r10d, %ecx + rorl $14, %edx + andl %r8d, %ecx + xorl %r8d, %edx + xorl %r10d, %ecx + rorl $5, %edx + addl %ecx, %r11d + xorl %r8d, %edx + xorl %r12d, %eax + rorl $6, %edx + movl %r12d, %ecx + addl %edx, %r11d + rorl $9, %ecx + andl %eax, %ebx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + addl 372(%rsp), %r10d + movl %r8d, %ecx + movl %r12d, %ebx + xorl %r9d, %ecx + rorl $14, %edx + andl %r15d, %ecx + xorl %r15d, %edx + xorl %r9d, %ecx + rorl $5, %edx + addl %ecx, %r10d + xorl %r15d, %edx + xorl %r11d, %ebx + rorl $6, %edx + movl %r11d, %ecx + addl %edx, %r10d + rorl $9, %ecx + andl %ebx, %eax + xorl %r11d, %ecx + xorl %r12d, %eax + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + addl 376(%rsp), %r9d + movl %r15d, %ecx + movl %r11d, %eax + xorl %r8d, %ecx + rorl $14, %edx + andl %r14d, %ecx + xorl %r14d, %edx + xorl %r8d, %ecx + rorl $5, %edx + addl %ecx, %r9d + xorl %r14d, %edx + xorl %r10d, %eax + rorl $6, %edx + movl %r10d, %ecx + addl %edx, %r9d + rorl $9, %ecx + andl %eax, %ebx + xorl %r10d, %ecx + xorl %r11d, %ebx + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + addl 380(%rsp), %r8d + movl %r14d, %ecx + movl %r10d, %ebx + xorl %r15d, %ecx + rorl $14, %edx + andl %r13d, %ecx + xorl %r13d, %edx + xorl %r15d, %ecx + rorl $5, %edx + addl %ecx, %r8d + xorl %r13d, %edx + xorl %r9d, %ebx + rorl $6, %edx + movl %r9d, %ecx + addl %edx, %r8d + rorl $9, %ecx + andl %ebx, %eax + xorl %r9d, %ecx + xorl %r10d, %eax + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + # rnd_all_4: 25-28 + addl 400(%rsp), %r15d + movl %r13d, %ecx + movl %r9d, %eax + xorl %r14d, %ecx + rorl $14, %edx + andl %r12d, %ecx + xorl %r12d, %edx + xorl %r14d, %ecx + rorl $5, %edx + addl %ecx, %r15d + xorl %r12d, %edx + xorl %r8d, %eax + rorl $6, %edx + movl %r8d, %ecx + addl %edx, %r15d + rorl $9, %ecx + andl %eax, %ebx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + addl 404(%rsp), %r14d + movl %r12d, %ecx + movl %r8d, %ebx + xorl %r13d, %ecx + rorl $14, %edx + andl %r11d, %ecx + xorl %r11d, %edx + xorl %r13d, %ecx + rorl $5, %edx + addl %ecx, %r14d + xorl %r11d, %edx + xorl %r15d, %ebx + rorl $6, %edx + movl %r15d, %ecx + addl %edx, %r14d + rorl $9, %ecx + andl %ebx, %eax + xorl %r15d, %ecx + xorl %r8d, %eax + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + addl 408(%rsp), %r13d + movl %r11d, %ecx + movl %r15d, %eax + xorl %r12d, %ecx + rorl $14, %edx + andl %r10d, %ecx + xorl %r10d, %edx + xorl %r12d, %ecx + rorl $5, %edx + addl %ecx, %r13d + xorl %r10d, %edx + xorl %r14d, %eax + rorl $6, %edx + movl %r14d, %ecx + addl %edx, %r13d + rorl $9, %ecx + andl %eax, %ebx + xorl %r14d, %ecx + xorl %r15d, %ebx + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + addl 412(%rsp), %r12d + movl %r10d, %ecx + movl %r14d, %ebx + xorl %r11d, %ecx + rorl $14, %edx + andl %r9d, %ecx + xorl %r9d, %edx + xorl %r11d, %ecx + rorl $5, %edx + addl %ecx, %r12d + xorl %r9d, %edx + xorl %r13d, %ebx + rorl $6, %edx + movl %r13d, %ecx + addl %edx, %r12d + rorl $9, %ecx + andl %ebx, %eax + xorl %r13d, %ecx + xorl %r14d, %eax + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + # rnd_all_4: 27-30 + addl 432(%rsp), %r11d + movl %r9d, %ecx + movl %r13d, %eax + xorl %r10d, %ecx + rorl $14, %edx + andl %r8d, %ecx + xorl %r8d, %edx + xorl %r10d, %ecx + rorl $5, %edx + addl %ecx, %r11d + xorl %r8d, %edx + xorl %r12d, %eax + rorl $6, %edx + movl %r12d, %ecx + addl %edx, %r11d + rorl $9, %ecx + andl %eax, %ebx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + addl 436(%rsp), %r10d + movl %r8d, %ecx + movl %r12d, %ebx + xorl %r9d, %ecx + rorl $14, %edx + andl %r15d, %ecx + xorl %r15d, %edx + xorl %r9d, %ecx + rorl $5, %edx + addl %ecx, %r10d + xorl %r15d, %edx + xorl %r11d, %ebx + rorl $6, %edx + movl %r11d, %ecx + addl %edx, %r10d + rorl $9, %ecx + andl %ebx, %eax + xorl %r11d, %ecx + xorl %r12d, %eax + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + addl 440(%rsp), %r9d + movl %r15d, %ecx + movl %r11d, %eax + xorl %r8d, %ecx + rorl $14, %edx + andl %r14d, %ecx + xorl %r14d, %edx + xorl %r8d, %ecx + rorl $5, %edx + addl %ecx, %r9d + xorl %r14d, %edx + xorl %r10d, %eax + rorl $6, %edx + movl %r10d, %ecx + addl %edx, %r9d + rorl $9, %ecx + andl %eax, %ebx + xorl %r10d, %ecx + xorl %r11d, %ebx + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + addl 444(%rsp), %r8d + movl %r14d, %ecx + movl %r10d, %ebx + xorl %r15d, %ecx + rorl $14, %edx + andl %r13d, %ecx + xorl %r13d, %edx + xorl %r15d, %ecx + rorl $5, %edx + addl %ecx, %r8d + xorl %r13d, %edx + xorl %r9d, %ebx + rorl $6, %edx + movl %r9d, %ecx + addl %edx, %r8d + rorl $9, %ecx + andl %ebx, %eax + xorl %r9d, %ecx + xorl %r10d, %eax + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + # rnd_all_4: 29-32 + addl 464(%rsp), %r15d + movl %r13d, %ecx + movl %r9d, %eax + xorl %r14d, %ecx + rorl $14, %edx + andl %r12d, %ecx + xorl %r12d, %edx + xorl %r14d, %ecx + rorl $5, %edx + addl %ecx, %r15d + xorl %r12d, %edx + xorl %r8d, %eax + rorl $6, %edx + movl %r8d, %ecx + addl %edx, %r15d + rorl $9, %ecx + andl %eax, %ebx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + addl 468(%rsp), %r14d + movl %r12d, %ecx + movl %r8d, %ebx + xorl %r13d, %ecx + rorl $14, %edx + andl %r11d, %ecx + xorl %r11d, %edx + xorl %r13d, %ecx + rorl $5, %edx + addl %ecx, %r14d + xorl %r11d, %edx + xorl %r15d, %ebx + rorl $6, %edx + movl %r15d, %ecx + addl %edx, %r14d + rorl $9, %ecx + andl %ebx, %eax + xorl %r15d, %ecx + xorl %r8d, %eax + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + addl 472(%rsp), %r13d + movl %r11d, %ecx + movl %r15d, %eax + xorl %r12d, %ecx + rorl $14, %edx + andl %r10d, %ecx + xorl %r10d, %edx + xorl %r12d, %ecx + rorl $5, %edx + addl %ecx, %r13d + xorl %r10d, %edx + xorl %r14d, %eax + rorl $6, %edx + movl %r14d, %ecx + addl %edx, %r13d + rorl $9, %ecx + andl %eax, %ebx + xorl %r14d, %ecx + xorl %r15d, %ebx + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + addl 476(%rsp), %r12d + movl %r10d, %ecx + movl %r14d, %ebx + xorl %r11d, %ecx + rorl $14, %edx + andl %r9d, %ecx + xorl %r9d, %edx + xorl %r11d, %ecx + rorl $5, %edx + addl %ecx, %r12d + xorl %r9d, %edx + xorl %r13d, %ebx + rorl $6, %edx + movl %r13d, %ecx + addl %edx, %r12d + rorl $9, %ecx + andl %ebx, %eax + xorl %r13d, %ecx + xorl %r14d, %eax + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + # rnd_all_4: 31-34 + addl 496(%rsp), %r11d + movl %r9d, %ecx + movl %r13d, %eax + xorl %r10d, %ecx + rorl $14, %edx + andl %r8d, %ecx + xorl %r8d, %edx + xorl %r10d, %ecx + rorl $5, %edx + addl %ecx, %r11d + xorl %r8d, %edx + xorl %r12d, %eax + rorl $6, %edx + movl %r12d, %ecx + addl %edx, %r11d + rorl $9, %ecx + andl %eax, %ebx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + addl 500(%rsp), %r10d + movl %r8d, %ecx + movl %r12d, %ebx + xorl %r9d, %ecx + rorl $14, %edx + andl %r15d, %ecx + xorl %r15d, %edx + xorl %r9d, %ecx + rorl $5, %edx + addl %ecx, %r10d + xorl %r15d, %edx + xorl %r11d, %ebx + rorl $6, %edx + movl %r11d, %ecx + addl %edx, %r10d + rorl $9, %ecx + andl %ebx, %eax + xorl %r11d, %ecx + xorl %r12d, %eax + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + addl 504(%rsp), %r9d + movl %r15d, %ecx + movl %r11d, %eax + xorl %r8d, %ecx + rorl $14, %edx + andl %r14d, %ecx + xorl %r14d, %edx + xorl %r8d, %ecx + rorl $5, %edx + addl %ecx, %r9d + xorl %r14d, %edx + xorl %r10d, %eax + rorl $6, %edx + movl %r10d, %ecx + addl %edx, %r9d + rorl $9, %ecx + andl %eax, %ebx + xorl %r10d, %ecx + xorl %r11d, %ebx + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + addl 508(%rsp), %r8d + movl %r14d, %ecx + movl %r10d, %ebx + xorl %r15d, %ecx + rorl $14, %edx + andl %r13d, %ecx + xorl %r13d, %edx + xorl %r15d, %ecx + rorl $5, %edx + addl %ecx, %r8d + xorl %r13d, %edx + xorl %r9d, %ebx + rorl $6, %edx + movl %r9d, %ecx + addl %edx, %r8d + rorl $9, %ecx + andl %ebx, %eax + xorl %r9d, %ecx + xorl %r10d, %eax + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + addl (%rdi), %r8d + addl 4(%rdi), %r9d + addl 8(%rdi), %r10d + addl 12(%rdi), %r11d + addl 16(%rdi), %r12d + addl 20(%rdi), %r13d + addl 24(%rdi), %r14d + addl 28(%rdi), %r15d + addq $0x80, %rbp + subl $0x80, %esi + movl %r8d, (%rdi) + movl %r9d, 4(%rdi) + movl %r10d, 8(%rdi) + movl %r11d, 12(%rdi) + movl %r12d, 16(%rdi) + movl %r13d, 20(%rdi) + movl %r14d, 24(%rdi) + movl %r15d, 28(%rdi) + jnz L_sha256_len_avx2_start +L_sha256_len_avx2_done: + xorq %rax, %rax + vzeroupper + addq $0x200, %rsp + popq %rbp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size Transform_Sha256_AVX2_Len,.-Transform_Sha256_AVX2_Len +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +L_avx2_rorx_sha256_k: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0xfc19dc6,0x240ca1cc +.long 0xe49b69c1,0xefbe4786,0xfc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x6ca6351,0x14292967 +.long 0xc6e00bf3,0xd5a79147,0x6ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 32 +#else +.p2align 5 +#endif /* __APPLE__ */ +L_avx2_rorx_sha256_flip_mask: +.quad 0x405060700010203, 0xc0d0e0f08090a0b +.quad 0x405060700010203, 0xc0d0e0f08090a0b +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 32 +#else +.p2align 5 +#endif /* __APPLE__ */ +L_avx2_rorx_sha256_shuf_00BA: +.quad 0xb0a090803020100, 0xffffffffffffffff +.quad 0xb0a090803020100, 0xffffffffffffffff +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 32 +#else +.p2align 5 +#endif /* __APPLE__ */ +L_avx2_rorx_sha256_shuf_DC00: +.quad 0xffffffffffffffff, 0xb0a090803020100 +.quad 0xffffffffffffffff, 0xb0a090803020100 +#ifndef __APPLE__ +.text +.globl Transform_Sha256_AVX2_RORX +.type Transform_Sha256_AVX2_RORX,@function +.align 4 +Transform_Sha256_AVX2_RORX: +#else +.section __TEXT,__text +.globl _Transform_Sha256_AVX2_RORX +.p2align 2 +_Transform_Sha256_AVX2_RORX: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $0x200, %rsp + leaq 32(%rdi), %rax + vmovdqa L_avx2_rorx_sha256_flip_mask(%rip), %xmm13 + vmovdqa L_avx2_rorx_sha256_shuf_00BA(%rip), %ymm11 + vmovdqa L_avx2_rorx_sha256_shuf_DC00(%rip), %ymm12 + # X0, X1, X2, X3 = W[0..15] + vmovdqu (%rax), %xmm0 + vmovdqu 16(%rax), %xmm1 + vpshufb %xmm13, %xmm0, %xmm0 + vpshufb %xmm13, %xmm1, %xmm1 + vpaddd 0+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4 + vpaddd 32+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm5 + vmovdqu %ymm4, (%rsp) + vmovdqu %ymm5, 32(%rsp) + vmovdqu 32(%rax), %xmm2 + vmovdqu 48(%rax), %xmm3 + vpshufb %xmm13, %xmm2, %xmm2 + vpshufb %xmm13, %xmm3, %xmm3 + vpaddd 64+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4 + vpaddd 96+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm5 + vmovdqu %ymm4, 64(%rsp) + vmovdqu %ymm5, 96(%rsp) + movl (%rdi), %r8d + movl 4(%rdi), %r9d + movl 8(%rdi), %r10d + movl 12(%rdi), %r11d + movl 16(%rdi), %r12d + movl 20(%rdi), %r13d + movl 24(%rdi), %r14d + movl 28(%rdi), %r15d + movl %r9d, %ebx + rorxl $6, %r12d, %edx + xorl %r10d, %ebx + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl (%rsp), %r15d + vpalignr $4, %ymm0, %ymm1, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + vpalignr $4, %ymm2, %ymm3, %ymm4 + # rnd_0: 2 - 2 + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 4(%rsp), %r14d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpshufd $0xfa, %ymm3, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r14d, %r10d + movl %r8d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + vpaddd %ymm0, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 8(%rsp), %r13d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 12(%rsp), %r12d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpaddd %ymm4, %ymm9, %ymm0 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r12d, %r8d + movl %r14d, %ebx + vpaddd 128+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vmovdqu %ymm4, 128(%rsp) + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 32(%rsp), %r11d + vpalignr $4, %ymm1, %ymm2, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + vpalignr $4, %ymm3, %ymm0, %ymm4 + # rnd_0: 2 - 2 + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 36(%rsp), %r10d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpshufd $0xfa, %ymm0, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r10d, %r14d + movl %r12d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + vpaddd %ymm1, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 40(%rsp), %r9d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 44(%rsp), %r8d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpaddd %ymm4, %ymm9, %ymm1 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r8d, %r12d + movl %r10d, %ebx + vpaddd 160+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm4 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vmovdqu %ymm4, 160(%rsp) + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl 64(%rsp), %r15d + vpalignr $4, %ymm2, %ymm3, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + vpalignr $4, %ymm0, %ymm1, %ymm4 + # rnd_0: 2 - 2 + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 68(%rsp), %r14d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpshufd $0xfa, %ymm1, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r14d, %r10d + movl %r8d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + vpaddd %ymm2, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 72(%rsp), %r13d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 76(%rsp), %r12d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpaddd %ymm4, %ymm9, %ymm2 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r12d, %r8d + movl %r14d, %ebx + vpaddd 192+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vmovdqu %ymm4, 192(%rsp) + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 96(%rsp), %r11d + vpalignr $4, %ymm3, %ymm0, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + vpalignr $4, %ymm1, %ymm2, %ymm4 + # rnd_0: 2 - 2 + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 100(%rsp), %r10d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpshufd $0xfa, %ymm2, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r10d, %r14d + movl %r12d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + vpaddd %ymm3, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 104(%rsp), %r9d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 108(%rsp), %r8d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpaddd %ymm4, %ymm9, %ymm3 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r8d, %r12d + movl %r10d, %ebx + vpaddd 224+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm4 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vmovdqu %ymm4, 224(%rsp) + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl 128(%rsp), %r15d + vpalignr $4, %ymm0, %ymm1, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + vpalignr $4, %ymm2, %ymm3, %ymm4 + # rnd_0: 2 - 2 + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 132(%rsp), %r14d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpshufd $0xfa, %ymm3, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r14d, %r10d + movl %r8d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + vpaddd %ymm0, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 136(%rsp), %r13d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 140(%rsp), %r12d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpaddd %ymm4, %ymm9, %ymm0 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r12d, %r8d + movl %r14d, %ebx + vpaddd 256+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vmovdqu %ymm4, 256(%rsp) + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 160(%rsp), %r11d + vpalignr $4, %ymm1, %ymm2, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + vpalignr $4, %ymm3, %ymm0, %ymm4 + # rnd_0: 2 - 2 + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 164(%rsp), %r10d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpshufd $0xfa, %ymm0, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r10d, %r14d + movl %r12d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + vpaddd %ymm1, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 168(%rsp), %r9d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 172(%rsp), %r8d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpaddd %ymm4, %ymm9, %ymm1 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r8d, %r12d + movl %r10d, %ebx + vpaddd 288+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm4 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vmovdqu %ymm4, 288(%rsp) + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl 192(%rsp), %r15d + vpalignr $4, %ymm2, %ymm3, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + vpalignr $4, %ymm0, %ymm1, %ymm4 + # rnd_0: 2 - 2 + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 196(%rsp), %r14d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpshufd $0xfa, %ymm1, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r14d, %r10d + movl %r8d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + vpaddd %ymm2, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 200(%rsp), %r13d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 204(%rsp), %r12d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpaddd %ymm4, %ymm9, %ymm2 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r12d, %r8d + movl %r14d, %ebx + vpaddd 320+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vmovdqu %ymm4, 320(%rsp) + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 224(%rsp), %r11d + vpalignr $4, %ymm3, %ymm0, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + vpalignr $4, %ymm1, %ymm2, %ymm4 + # rnd_0: 2 - 2 + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 228(%rsp), %r10d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpshufd $0xfa, %ymm2, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r10d, %r14d + movl %r12d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + vpaddd %ymm3, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 232(%rsp), %r9d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 236(%rsp), %r8d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpaddd %ymm4, %ymm9, %ymm3 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r8d, %r12d + movl %r10d, %ebx + vpaddd 352+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm4 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vmovdqu %ymm4, 352(%rsp) + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl 256(%rsp), %r15d + vpalignr $4, %ymm0, %ymm1, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + vpalignr $4, %ymm2, %ymm3, %ymm4 + # rnd_0: 2 - 2 + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 260(%rsp), %r14d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpshufd $0xfa, %ymm3, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r14d, %r10d + movl %r8d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + vpaddd %ymm0, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 264(%rsp), %r13d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 268(%rsp), %r12d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpaddd %ymm4, %ymm9, %ymm0 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r12d, %r8d + movl %r14d, %ebx + vpaddd 384+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vmovdqu %ymm4, 384(%rsp) + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 288(%rsp), %r11d + vpalignr $4, %ymm1, %ymm2, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + vpalignr $4, %ymm3, %ymm0, %ymm4 + # rnd_0: 2 - 2 + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 292(%rsp), %r10d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpshufd $0xfa, %ymm0, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r10d, %r14d + movl %r12d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + vpaddd %ymm1, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 296(%rsp), %r9d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 300(%rsp), %r8d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpaddd %ymm4, %ymm9, %ymm1 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r8d, %r12d + movl %r10d, %ebx + vpaddd 416+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm4 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vmovdqu %ymm4, 416(%rsp) + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl 320(%rsp), %r15d + vpalignr $4, %ymm2, %ymm3, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + vpalignr $4, %ymm0, %ymm1, %ymm4 + # rnd_0: 2 - 2 + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 324(%rsp), %r14d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpshufd $0xfa, %ymm1, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r14d, %r10d + movl %r8d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + vpaddd %ymm2, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 328(%rsp), %r13d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 332(%rsp), %r12d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpaddd %ymm4, %ymm9, %ymm2 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r12d, %r8d + movl %r14d, %ebx + vpaddd 448+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vmovdqu %ymm4, 448(%rsp) + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 352(%rsp), %r11d + vpalignr $4, %ymm3, %ymm0, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + vpalignr $4, %ymm1, %ymm2, %ymm4 + # rnd_0: 2 - 2 + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 356(%rsp), %r10d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpshufd $0xfa, %ymm2, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r10d, %r14d + movl %r12d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + vpaddd %ymm3, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 360(%rsp), %r9d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 364(%rsp), %r8d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpaddd %ymm4, %ymm9, %ymm3 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r8d, %r12d + movl %r10d, %ebx + vpaddd 480+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm4 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vmovdqu %ymm4, 480(%rsp) + xorl %eax, %eax + xorl %ecx, %ecx + rorxl $6, %r12d, %edx + rorxl $11, %r12d, %ecx + leal (%r8,%rax,1), %r8d + addl 384(%rsp), %r15d + movl %r13d, %eax + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + xorl %ecx, %edx + andl %r12d, %eax + addl %edx, %r15d + rorxl $2, %r8d, %edx + rorxl $13, %r8d, %ecx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + andl %eax, %ebx + addl %edx, %r15d + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + rorxl $11, %r11d, %ecx + addl %ebx, %r15d + addl 388(%rsp), %r14d + movl %r12d, %ebx + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + xorl %ecx, %edx + andl %r11d, %ebx + addl %edx, %r14d + rorxl $2, %r15d, %edx + rorxl $13, %r15d, %ecx + xorl %r13d, %ebx + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + xorl %ecx, %edx + movl %r8d, %ebx + leal (%r10,%r14,1), %r10d + xorl %r15d, %ebx + andl %ebx, %eax + addl %edx, %r14d + xorl %r8d, %eax + rorxl $6, %r10d, %edx + rorxl $11, %r10d, %ecx + leal (%r14,%rax,1), %r14d + addl 392(%rsp), %r13d + movl %r11d, %eax + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + xorl %ecx, %edx + andl %r10d, %eax + addl %edx, %r13d + rorxl $2, %r14d, %edx + rorxl $13, %r14d, %ecx + xorl %r12d, %eax + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + xorl %r14d, %eax + andl %eax, %ebx + addl %edx, %r13d + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + rorxl $11, %r9d, %ecx + addl %ebx, %r13d + addl 396(%rsp), %r12d + movl %r10d, %ebx + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + xorl %ecx, %edx + andl %r9d, %ebx + addl %edx, %r12d + rorxl $2, %r13d, %edx + rorxl $13, %r13d, %ecx + xorl %r11d, %ebx + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + xorl %ecx, %edx + movl %r14d, %ebx + leal (%r8,%r12,1), %r8d + xorl %r13d, %ebx + andl %ebx, %eax + addl %edx, %r12d + xorl %r14d, %eax + rorxl $6, %r8d, %edx + rorxl $11, %r8d, %ecx + leal (%r12,%rax,1), %r12d + addl 416(%rsp), %r11d + movl %r9d, %eax + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + xorl %ecx, %edx + andl %r8d, %eax + addl %edx, %r11d + rorxl $2, %r12d, %edx + rorxl $13, %r12d, %ecx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + andl %eax, %ebx + addl %edx, %r11d + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + rorxl $11, %r15d, %ecx + addl %ebx, %r11d + addl 420(%rsp), %r10d + movl %r8d, %ebx + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + xorl %ecx, %edx + andl %r15d, %ebx + addl %edx, %r10d + rorxl $2, %r11d, %edx + rorxl $13, %r11d, %ecx + xorl %r9d, %ebx + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + xorl %ecx, %edx + movl %r12d, %ebx + leal (%r14,%r10,1), %r14d + xorl %r11d, %ebx + andl %ebx, %eax + addl %edx, %r10d + xorl %r12d, %eax + rorxl $6, %r14d, %edx + rorxl $11, %r14d, %ecx + leal (%r10,%rax,1), %r10d + addl 424(%rsp), %r9d + movl %r15d, %eax + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + xorl %ecx, %edx + andl %r14d, %eax + addl %edx, %r9d + rorxl $2, %r10d, %edx + rorxl $13, %r10d, %ecx + xorl %r8d, %eax + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + xorl %r10d, %eax + andl %eax, %ebx + addl %edx, %r9d + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + rorxl $11, %r13d, %ecx + addl %ebx, %r9d + addl 428(%rsp), %r8d + movl %r14d, %ebx + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + xorl %ecx, %edx + andl %r13d, %ebx + addl %edx, %r8d + rorxl $2, %r9d, %edx + rorxl $13, %r9d, %ecx + xorl %r15d, %ebx + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + xorl %ecx, %edx + movl %r10d, %ebx + leal (%r12,%r8,1), %r12d + xorl %r9d, %ebx + andl %ebx, %eax + addl %edx, %r8d + xorl %r10d, %eax + rorxl $6, %r12d, %edx + rorxl $11, %r12d, %ecx + leal (%r8,%rax,1), %r8d + addl 448(%rsp), %r15d + movl %r13d, %eax + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + xorl %ecx, %edx + andl %r12d, %eax + addl %edx, %r15d + rorxl $2, %r8d, %edx + rorxl $13, %r8d, %ecx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + andl %eax, %ebx + addl %edx, %r15d + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + rorxl $11, %r11d, %ecx + addl %ebx, %r15d + addl 452(%rsp), %r14d + movl %r12d, %ebx + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + xorl %ecx, %edx + andl %r11d, %ebx + addl %edx, %r14d + rorxl $2, %r15d, %edx + rorxl $13, %r15d, %ecx + xorl %r13d, %ebx + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + xorl %ecx, %edx + movl %r8d, %ebx + leal (%r10,%r14,1), %r10d + xorl %r15d, %ebx + andl %ebx, %eax + addl %edx, %r14d + xorl %r8d, %eax + rorxl $6, %r10d, %edx + rorxl $11, %r10d, %ecx + leal (%r14,%rax,1), %r14d + addl 456(%rsp), %r13d + movl %r11d, %eax + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + xorl %ecx, %edx + andl %r10d, %eax + addl %edx, %r13d + rorxl $2, %r14d, %edx + rorxl $13, %r14d, %ecx + xorl %r12d, %eax + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + xorl %r14d, %eax + andl %eax, %ebx + addl %edx, %r13d + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + rorxl $11, %r9d, %ecx + addl %ebx, %r13d + addl 460(%rsp), %r12d + movl %r10d, %ebx + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + xorl %ecx, %edx + andl %r9d, %ebx + addl %edx, %r12d + rorxl $2, %r13d, %edx + rorxl $13, %r13d, %ecx + xorl %r11d, %ebx + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + xorl %ecx, %edx + movl %r14d, %ebx + leal (%r8,%r12,1), %r8d + xorl %r13d, %ebx + andl %ebx, %eax + addl %edx, %r12d + xorl %r14d, %eax + rorxl $6, %r8d, %edx + rorxl $11, %r8d, %ecx + leal (%r12,%rax,1), %r12d + addl 480(%rsp), %r11d + movl %r9d, %eax + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + xorl %ecx, %edx + andl %r8d, %eax + addl %edx, %r11d + rorxl $2, %r12d, %edx + rorxl $13, %r12d, %ecx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + andl %eax, %ebx + addl %edx, %r11d + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + rorxl $11, %r15d, %ecx + addl %ebx, %r11d + addl 484(%rsp), %r10d + movl %r8d, %ebx + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + xorl %ecx, %edx + andl %r15d, %ebx + addl %edx, %r10d + rorxl $2, %r11d, %edx + rorxl $13, %r11d, %ecx + xorl %r9d, %ebx + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + xorl %ecx, %edx + movl %r12d, %ebx + leal (%r14,%r10,1), %r14d + xorl %r11d, %ebx + andl %ebx, %eax + addl %edx, %r10d + xorl %r12d, %eax + rorxl $6, %r14d, %edx + rorxl $11, %r14d, %ecx + leal (%r10,%rax,1), %r10d + addl 488(%rsp), %r9d + movl %r15d, %eax + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + xorl %ecx, %edx + andl %r14d, %eax + addl %edx, %r9d + rorxl $2, %r10d, %edx + rorxl $13, %r10d, %ecx + xorl %r8d, %eax + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + xorl %r10d, %eax + andl %eax, %ebx + addl %edx, %r9d + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + rorxl $11, %r13d, %ecx + addl %ebx, %r9d + addl 492(%rsp), %r8d + movl %r14d, %ebx + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + xorl %ecx, %edx + andl %r13d, %ebx + addl %edx, %r8d + rorxl $2, %r9d, %edx + rorxl $13, %r9d, %ecx + xorl %r15d, %ebx + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + xorl %ecx, %edx + movl %r10d, %ebx + leal (%r12,%r8,1), %r12d + xorl %r9d, %ebx + andl %ebx, %eax + addl %edx, %r8d + xorl %r10d, %eax + addl %eax, %r8d + addl %r8d, (%rdi) + addl %r9d, 4(%rdi) + addl %r10d, 8(%rdi) + addl %r11d, 12(%rdi) + addl %r12d, 16(%rdi) + addl %r13d, 20(%rdi) + addl %r14d, 24(%rdi) + addl %r15d, 28(%rdi) + xorq %rax, %rax + vzeroupper + addq $0x200, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size Transform_Sha256_AVX2_RORX,.-Transform_Sha256_AVX2_RORX +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl Transform_Sha256_AVX2_RORX_Len +.type Transform_Sha256_AVX2_RORX_Len,@function +.align 4 +Transform_Sha256_AVX2_RORX_Len: +#else +.section __TEXT,__text +.globl _Transform_Sha256_AVX2_RORX_Len +.p2align 2 +_Transform_Sha256_AVX2_RORX_Len: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq %rbp + movq %rsi, %rbp + movq %rdx, %rsi + subq $0x200, %rsp + testb $0x40, %sil + je L_sha256_len_avx2_rorx_block + vmovdqu (%rbp), %ymm0 + vmovdqu 32(%rbp), %ymm1 + vmovups %ymm0, 32(%rdi) + vmovups %ymm1, 64(%rdi) +#ifndef __APPLE__ + call Transform_Sha256_AVX2_RORX@plt +#else + call _Transform_Sha256_AVX2_RORX +#endif /* __APPLE__ */ + addq $0x40, %rbp + subl $0x40, %esi + jz L_sha256_len_avx2_rorx_done +L_sha256_len_avx2_rorx_block: + vmovdqa L_avx2_rorx_sha256_flip_mask(%rip), %ymm13 + vmovdqa L_avx2_rorx_sha256_shuf_00BA(%rip), %ymm11 + vmovdqa L_avx2_rorx_sha256_shuf_DC00(%rip), %ymm12 + movl (%rdi), %r8d + movl 4(%rdi), %r9d + movl 8(%rdi), %r10d + movl 12(%rdi), %r11d + movl 16(%rdi), %r12d + movl 20(%rdi), %r13d + movl 24(%rdi), %r14d + movl 28(%rdi), %r15d + # Start of loop processing two blocks +L_sha256_len_avx2_rorx_start: + # X0, X1, X2, X3 = W[0..15] + vmovdqu (%rbp), %xmm0 + vmovdqu 16(%rbp), %xmm1 + vinserti128 $0x01, 64(%rbp), %ymm0, %ymm0 + vinserti128 $0x01, 80(%rbp), %ymm1, %ymm1 + vpshufb %ymm13, %ymm0, %ymm0 + vpshufb %ymm13, %ymm1, %ymm1 + vpaddd 0+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4 + vpaddd 32+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm5 + vmovdqu %ymm4, (%rsp) + vmovdqu %ymm5, 32(%rsp) + vmovdqu 32(%rbp), %xmm2 + vmovdqu 48(%rbp), %xmm3 + vinserti128 $0x01, 96(%rbp), %ymm2, %ymm2 + vinserti128 $0x01, 112(%rbp), %ymm3, %ymm3 + vpshufb %ymm13, %ymm2, %ymm2 + vpshufb %ymm13, %ymm3, %ymm3 + vpaddd 64+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4 + vpaddd 96+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm5 + vmovdqu %ymm4, 64(%rsp) + vmovdqu %ymm5, 96(%rsp) + movl %r9d, %ebx + rorxl $6, %r12d, %edx + xorl %r10d, %ebx + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl (%rsp), %r15d + vpalignr $4, %ymm0, %ymm1, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + vpalignr $4, %ymm2, %ymm3, %ymm4 + # rnd_0: 2 - 2 + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 4(%rsp), %r14d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpshufd $0xfa, %ymm3, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r14d, %r10d + movl %r8d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + vpaddd %ymm0, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 8(%rsp), %r13d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 12(%rsp), %r12d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpaddd %ymm4, %ymm9, %ymm0 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r12d, %r8d + movl %r14d, %ebx + vpaddd 128+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vmovdqu %ymm4, 128(%rsp) + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 32(%rsp), %r11d + vpalignr $4, %ymm1, %ymm2, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + vpalignr $4, %ymm3, %ymm0, %ymm4 + # rnd_0: 2 - 2 + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 36(%rsp), %r10d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpshufd $0xfa, %ymm0, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r10d, %r14d + movl %r12d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + vpaddd %ymm1, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 40(%rsp), %r9d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 44(%rsp), %r8d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpaddd %ymm4, %ymm9, %ymm1 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r8d, %r12d + movl %r10d, %ebx + vpaddd 160+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm4 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vmovdqu %ymm4, 160(%rsp) + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl 64(%rsp), %r15d + vpalignr $4, %ymm2, %ymm3, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + vpalignr $4, %ymm0, %ymm1, %ymm4 + # rnd_0: 2 - 2 + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 68(%rsp), %r14d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpshufd $0xfa, %ymm1, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r14d, %r10d + movl %r8d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + vpaddd %ymm2, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 72(%rsp), %r13d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 76(%rsp), %r12d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpaddd %ymm4, %ymm9, %ymm2 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r12d, %r8d + movl %r14d, %ebx + vpaddd 192+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vmovdqu %ymm4, 192(%rsp) + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 96(%rsp), %r11d + vpalignr $4, %ymm3, %ymm0, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + vpalignr $4, %ymm1, %ymm2, %ymm4 + # rnd_0: 2 - 2 + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 100(%rsp), %r10d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpshufd $0xfa, %ymm2, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r10d, %r14d + movl %r12d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + vpaddd %ymm3, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 104(%rsp), %r9d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 108(%rsp), %r8d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpaddd %ymm4, %ymm9, %ymm3 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r8d, %r12d + movl %r10d, %ebx + vpaddd 224+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm4 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vmovdqu %ymm4, 224(%rsp) + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl 128(%rsp), %r15d + vpalignr $4, %ymm0, %ymm1, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + vpalignr $4, %ymm2, %ymm3, %ymm4 + # rnd_0: 2 - 2 + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 132(%rsp), %r14d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpshufd $0xfa, %ymm3, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r14d, %r10d + movl %r8d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + vpaddd %ymm0, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 136(%rsp), %r13d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 140(%rsp), %r12d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpaddd %ymm4, %ymm9, %ymm0 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r12d, %r8d + movl %r14d, %ebx + vpaddd 256+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vmovdqu %ymm4, 256(%rsp) + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 160(%rsp), %r11d + vpalignr $4, %ymm1, %ymm2, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + vpalignr $4, %ymm3, %ymm0, %ymm4 + # rnd_0: 2 - 2 + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 164(%rsp), %r10d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpshufd $0xfa, %ymm0, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r10d, %r14d + movl %r12d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + vpaddd %ymm1, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 168(%rsp), %r9d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 172(%rsp), %r8d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpaddd %ymm4, %ymm9, %ymm1 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r8d, %r12d + movl %r10d, %ebx + vpaddd 288+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm4 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vmovdqu %ymm4, 288(%rsp) + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl 192(%rsp), %r15d + vpalignr $4, %ymm2, %ymm3, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + vpalignr $4, %ymm0, %ymm1, %ymm4 + # rnd_0: 2 - 2 + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 196(%rsp), %r14d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpshufd $0xfa, %ymm1, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r14d, %r10d + movl %r8d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + vpaddd %ymm2, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 200(%rsp), %r13d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 204(%rsp), %r12d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpaddd %ymm4, %ymm9, %ymm2 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r12d, %r8d + movl %r14d, %ebx + vpaddd 320+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vmovdqu %ymm4, 320(%rsp) + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 224(%rsp), %r11d + vpalignr $4, %ymm3, %ymm0, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + vpalignr $4, %ymm1, %ymm2, %ymm4 + # rnd_0: 2 - 2 + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 228(%rsp), %r10d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpshufd $0xfa, %ymm2, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r10d, %r14d + movl %r12d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + vpaddd %ymm3, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 232(%rsp), %r9d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 236(%rsp), %r8d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpaddd %ymm4, %ymm9, %ymm3 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r8d, %r12d + movl %r10d, %ebx + vpaddd 352+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm4 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vmovdqu %ymm4, 352(%rsp) + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl 256(%rsp), %r15d + vpalignr $4, %ymm0, %ymm1, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + vpalignr $4, %ymm2, %ymm3, %ymm4 + # rnd_0: 2 - 2 + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 260(%rsp), %r14d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpshufd $0xfa, %ymm3, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r14d, %r10d + movl %r8d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + vpaddd %ymm0, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 264(%rsp), %r13d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 268(%rsp), %r12d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpaddd %ymm4, %ymm9, %ymm0 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r12d, %r8d + movl %r14d, %ebx + vpaddd 384+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vmovdqu %ymm4, 384(%rsp) + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 288(%rsp), %r11d + vpalignr $4, %ymm1, %ymm2, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + vpalignr $4, %ymm3, %ymm0, %ymm4 + # rnd_0: 2 - 2 + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 292(%rsp), %r10d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpshufd $0xfa, %ymm0, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r10d, %r14d + movl %r12d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + vpaddd %ymm1, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 296(%rsp), %r9d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 300(%rsp), %r8d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpaddd %ymm4, %ymm9, %ymm1 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r8d, %r12d + movl %r10d, %ebx + vpaddd 416+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm4 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vmovdqu %ymm4, 416(%rsp) + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl 320(%rsp), %r15d + vpalignr $4, %ymm2, %ymm3, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + vpalignr $4, %ymm0, %ymm1, %ymm4 + # rnd_0: 2 - 2 + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 324(%rsp), %r14d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpshufd $0xfa, %ymm1, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r14d, %r10d + movl %r8d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + vpaddd %ymm2, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 328(%rsp), %r13d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 332(%rsp), %r12d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpaddd %ymm4, %ymm9, %ymm2 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r12d, %r8d + movl %r14d, %ebx + vpaddd 448+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vmovdqu %ymm4, 448(%rsp) + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 352(%rsp), %r11d + vpalignr $4, %ymm3, %ymm0, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + vpalignr $4, %ymm1, %ymm2, %ymm4 + # rnd_0: 2 - 2 + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 356(%rsp), %r10d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpshufd $0xfa, %ymm2, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r10d, %r14d + movl %r12d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + vpaddd %ymm3, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 360(%rsp), %r9d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 364(%rsp), %r8d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpaddd %ymm4, %ymm9, %ymm3 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r8d, %r12d + movl %r10d, %ebx + vpaddd 480+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm4 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vmovdqu %ymm4, 480(%rsp) + xorl %eax, %eax + xorl %ecx, %ecx + rorxl $6, %r12d, %edx + rorxl $11, %r12d, %ecx + leal (%r8,%rax,1), %r8d + addl 384(%rsp), %r15d + movl %r13d, %eax + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + xorl %ecx, %edx + andl %r12d, %eax + addl %edx, %r15d + rorxl $2, %r8d, %edx + rorxl $13, %r8d, %ecx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + andl %eax, %ebx + addl %edx, %r15d + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + rorxl $11, %r11d, %ecx + addl %ebx, %r15d + addl 388(%rsp), %r14d + movl %r12d, %ebx + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + xorl %ecx, %edx + andl %r11d, %ebx + addl %edx, %r14d + rorxl $2, %r15d, %edx + rorxl $13, %r15d, %ecx + xorl %r13d, %ebx + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + xorl %ecx, %edx + movl %r8d, %ebx + leal (%r10,%r14,1), %r10d + xorl %r15d, %ebx + andl %ebx, %eax + addl %edx, %r14d + xorl %r8d, %eax + rorxl $6, %r10d, %edx + rorxl $11, %r10d, %ecx + leal (%r14,%rax,1), %r14d + addl 392(%rsp), %r13d + movl %r11d, %eax + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + xorl %ecx, %edx + andl %r10d, %eax + addl %edx, %r13d + rorxl $2, %r14d, %edx + rorxl $13, %r14d, %ecx + xorl %r12d, %eax + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + xorl %r14d, %eax + andl %eax, %ebx + addl %edx, %r13d + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + rorxl $11, %r9d, %ecx + addl %ebx, %r13d + addl 396(%rsp), %r12d + movl %r10d, %ebx + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + xorl %ecx, %edx + andl %r9d, %ebx + addl %edx, %r12d + rorxl $2, %r13d, %edx + rorxl $13, %r13d, %ecx + xorl %r11d, %ebx + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + xorl %ecx, %edx + movl %r14d, %ebx + leal (%r8,%r12,1), %r8d + xorl %r13d, %ebx + andl %ebx, %eax + addl %edx, %r12d + xorl %r14d, %eax + rorxl $6, %r8d, %edx + rorxl $11, %r8d, %ecx + leal (%r12,%rax,1), %r12d + addl 416(%rsp), %r11d + movl %r9d, %eax + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + xorl %ecx, %edx + andl %r8d, %eax + addl %edx, %r11d + rorxl $2, %r12d, %edx + rorxl $13, %r12d, %ecx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + andl %eax, %ebx + addl %edx, %r11d + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + rorxl $11, %r15d, %ecx + addl %ebx, %r11d + addl 420(%rsp), %r10d + movl %r8d, %ebx + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + xorl %ecx, %edx + andl %r15d, %ebx + addl %edx, %r10d + rorxl $2, %r11d, %edx + rorxl $13, %r11d, %ecx + xorl %r9d, %ebx + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + xorl %ecx, %edx + movl %r12d, %ebx + leal (%r14,%r10,1), %r14d + xorl %r11d, %ebx + andl %ebx, %eax + addl %edx, %r10d + xorl %r12d, %eax + rorxl $6, %r14d, %edx + rorxl $11, %r14d, %ecx + leal (%r10,%rax,1), %r10d + addl 424(%rsp), %r9d + movl %r15d, %eax + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + xorl %ecx, %edx + andl %r14d, %eax + addl %edx, %r9d + rorxl $2, %r10d, %edx + rorxl $13, %r10d, %ecx + xorl %r8d, %eax + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + xorl %r10d, %eax + andl %eax, %ebx + addl %edx, %r9d + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + rorxl $11, %r13d, %ecx + addl %ebx, %r9d + addl 428(%rsp), %r8d + movl %r14d, %ebx + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + xorl %ecx, %edx + andl %r13d, %ebx + addl %edx, %r8d + rorxl $2, %r9d, %edx + rorxl $13, %r9d, %ecx + xorl %r15d, %ebx + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + xorl %ecx, %edx + movl %r10d, %ebx + leal (%r12,%r8,1), %r12d + xorl %r9d, %ebx + andl %ebx, %eax + addl %edx, %r8d + xorl %r10d, %eax + rorxl $6, %r12d, %edx + rorxl $11, %r12d, %ecx + leal (%r8,%rax,1), %r8d + addl 448(%rsp), %r15d + movl %r13d, %eax + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + xorl %ecx, %edx + andl %r12d, %eax + addl %edx, %r15d + rorxl $2, %r8d, %edx + rorxl $13, %r8d, %ecx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + andl %eax, %ebx + addl %edx, %r15d + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + rorxl $11, %r11d, %ecx + addl %ebx, %r15d + addl 452(%rsp), %r14d + movl %r12d, %ebx + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + xorl %ecx, %edx + andl %r11d, %ebx + addl %edx, %r14d + rorxl $2, %r15d, %edx + rorxl $13, %r15d, %ecx + xorl %r13d, %ebx + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + xorl %ecx, %edx + movl %r8d, %ebx + leal (%r10,%r14,1), %r10d + xorl %r15d, %ebx + andl %ebx, %eax + addl %edx, %r14d + xorl %r8d, %eax + rorxl $6, %r10d, %edx + rorxl $11, %r10d, %ecx + leal (%r14,%rax,1), %r14d + addl 456(%rsp), %r13d + movl %r11d, %eax + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + xorl %ecx, %edx + andl %r10d, %eax + addl %edx, %r13d + rorxl $2, %r14d, %edx + rorxl $13, %r14d, %ecx + xorl %r12d, %eax + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + xorl %r14d, %eax + andl %eax, %ebx + addl %edx, %r13d + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + rorxl $11, %r9d, %ecx + addl %ebx, %r13d + addl 460(%rsp), %r12d + movl %r10d, %ebx + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + xorl %ecx, %edx + andl %r9d, %ebx + addl %edx, %r12d + rorxl $2, %r13d, %edx + rorxl $13, %r13d, %ecx + xorl %r11d, %ebx + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + xorl %ecx, %edx + movl %r14d, %ebx + leal (%r8,%r12,1), %r8d + xorl %r13d, %ebx + andl %ebx, %eax + addl %edx, %r12d + xorl %r14d, %eax + rorxl $6, %r8d, %edx + rorxl $11, %r8d, %ecx + leal (%r12,%rax,1), %r12d + addl 480(%rsp), %r11d + movl %r9d, %eax + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + xorl %ecx, %edx + andl %r8d, %eax + addl %edx, %r11d + rorxl $2, %r12d, %edx + rorxl $13, %r12d, %ecx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + andl %eax, %ebx + addl %edx, %r11d + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + rorxl $11, %r15d, %ecx + addl %ebx, %r11d + addl 484(%rsp), %r10d + movl %r8d, %ebx + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + xorl %ecx, %edx + andl %r15d, %ebx + addl %edx, %r10d + rorxl $2, %r11d, %edx + rorxl $13, %r11d, %ecx + xorl %r9d, %ebx + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + xorl %ecx, %edx + movl %r12d, %ebx + leal (%r14,%r10,1), %r14d + xorl %r11d, %ebx + andl %ebx, %eax + addl %edx, %r10d + xorl %r12d, %eax + rorxl $6, %r14d, %edx + rorxl $11, %r14d, %ecx + leal (%r10,%rax,1), %r10d + addl 488(%rsp), %r9d + movl %r15d, %eax + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + xorl %ecx, %edx + andl %r14d, %eax + addl %edx, %r9d + rorxl $2, %r10d, %edx + rorxl $13, %r10d, %ecx + xorl %r8d, %eax + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + xorl %r10d, %eax + andl %eax, %ebx + addl %edx, %r9d + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + rorxl $11, %r13d, %ecx + addl %ebx, %r9d + addl 492(%rsp), %r8d + movl %r14d, %ebx + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + xorl %ecx, %edx + andl %r13d, %ebx + addl %edx, %r8d + rorxl $2, %r9d, %edx + rorxl $13, %r9d, %ecx + xorl %r15d, %ebx + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + xorl %ecx, %edx + movl %r10d, %ebx + leal (%r12,%r8,1), %r12d + xorl %r9d, %ebx + andl %ebx, %eax + addl %edx, %r8d + xorl %r10d, %eax + addl %eax, %r8d + xorl %ecx, %ecx + addl (%rdi), %r8d + addl 4(%rdi), %r9d + addl 8(%rdi), %r10d + addl 12(%rdi), %r11d + addl 16(%rdi), %r12d + addl 20(%rdi), %r13d + addl 24(%rdi), %r14d + addl 28(%rdi), %r15d + movl %r8d, (%rdi) + movl %r9d, 4(%rdi) + movl %r10d, 8(%rdi) + movl %r11d, 12(%rdi) + movl %r12d, 16(%rdi) + movl %r13d, 20(%rdi) + movl %r14d, 24(%rdi) + movl %r15d, 28(%rdi) + movl %r9d, %ebx + xorl %eax, %eax + xorl %r10d, %ebx + rorxl $6, %r12d, %edx + rorxl $11, %r12d, %ecx + leal (%r8,%rax,1), %r8d + addl 16(%rsp), %r15d + movl %r13d, %eax + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + xorl %ecx, %edx + andl %r12d, %eax + addl %edx, %r15d + rorxl $2, %r8d, %edx + rorxl $13, %r8d, %ecx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + andl %eax, %ebx + addl %edx, %r15d + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + rorxl $11, %r11d, %ecx + addl %ebx, %r15d + addl 20(%rsp), %r14d + movl %r12d, %ebx + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + xorl %ecx, %edx + andl %r11d, %ebx + addl %edx, %r14d + rorxl $2, %r15d, %edx + rorxl $13, %r15d, %ecx + xorl %r13d, %ebx + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + xorl %ecx, %edx + movl %r8d, %ebx + leal (%r10,%r14,1), %r10d + xorl %r15d, %ebx + andl %ebx, %eax + addl %edx, %r14d + xorl %r8d, %eax + rorxl $6, %r10d, %edx + rorxl $11, %r10d, %ecx + leal (%r14,%rax,1), %r14d + addl 24(%rsp), %r13d + movl %r11d, %eax + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + xorl %ecx, %edx + andl %r10d, %eax + addl %edx, %r13d + rorxl $2, %r14d, %edx + rorxl $13, %r14d, %ecx + xorl %r12d, %eax + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + xorl %r14d, %eax + andl %eax, %ebx + addl %edx, %r13d + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + rorxl $11, %r9d, %ecx + addl %ebx, %r13d + addl 28(%rsp), %r12d + movl %r10d, %ebx + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + xorl %ecx, %edx + andl %r9d, %ebx + addl %edx, %r12d + rorxl $2, %r13d, %edx + rorxl $13, %r13d, %ecx + xorl %r11d, %ebx + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + xorl %ecx, %edx + movl %r14d, %ebx + leal (%r8,%r12,1), %r8d + xorl %r13d, %ebx + andl %ebx, %eax + addl %edx, %r12d + xorl %r14d, %eax + rorxl $6, %r8d, %edx + rorxl $11, %r8d, %ecx + leal (%r12,%rax,1), %r12d + addl 48(%rsp), %r11d + movl %r9d, %eax + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + xorl %ecx, %edx + andl %r8d, %eax + addl %edx, %r11d + rorxl $2, %r12d, %edx + rorxl $13, %r12d, %ecx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + andl %eax, %ebx + addl %edx, %r11d + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + rorxl $11, %r15d, %ecx + addl %ebx, %r11d + addl 52(%rsp), %r10d + movl %r8d, %ebx + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + xorl %ecx, %edx + andl %r15d, %ebx + addl %edx, %r10d + rorxl $2, %r11d, %edx + rorxl $13, %r11d, %ecx + xorl %r9d, %ebx + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + xorl %ecx, %edx + movl %r12d, %ebx + leal (%r14,%r10,1), %r14d + xorl %r11d, %ebx + andl %ebx, %eax + addl %edx, %r10d + xorl %r12d, %eax + rorxl $6, %r14d, %edx + rorxl $11, %r14d, %ecx + leal (%r10,%rax,1), %r10d + addl 56(%rsp), %r9d + movl %r15d, %eax + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + xorl %ecx, %edx + andl %r14d, %eax + addl %edx, %r9d + rorxl $2, %r10d, %edx + rorxl $13, %r10d, %ecx + xorl %r8d, %eax + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + xorl %r10d, %eax + andl %eax, %ebx + addl %edx, %r9d + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + rorxl $11, %r13d, %ecx + addl %ebx, %r9d + addl 60(%rsp), %r8d + movl %r14d, %ebx + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + xorl %ecx, %edx + andl %r13d, %ebx + addl %edx, %r8d + rorxl $2, %r9d, %edx + rorxl $13, %r9d, %ecx + xorl %r15d, %ebx + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + xorl %ecx, %edx + movl %r10d, %ebx + leal (%r12,%r8,1), %r12d + xorl %r9d, %ebx + andl %ebx, %eax + addl %edx, %r8d + xorl %r10d, %eax + rorxl $6, %r12d, %edx + rorxl $11, %r12d, %ecx + leal (%r8,%rax,1), %r8d + addl 80(%rsp), %r15d + movl %r13d, %eax + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + xorl %ecx, %edx + andl %r12d, %eax + addl %edx, %r15d + rorxl $2, %r8d, %edx + rorxl $13, %r8d, %ecx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + andl %eax, %ebx + addl %edx, %r15d + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + rorxl $11, %r11d, %ecx + addl %ebx, %r15d + addl 84(%rsp), %r14d + movl %r12d, %ebx + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + xorl %ecx, %edx + andl %r11d, %ebx + addl %edx, %r14d + rorxl $2, %r15d, %edx + rorxl $13, %r15d, %ecx + xorl %r13d, %ebx + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + xorl %ecx, %edx + movl %r8d, %ebx + leal (%r10,%r14,1), %r10d + xorl %r15d, %ebx + andl %ebx, %eax + addl %edx, %r14d + xorl %r8d, %eax + rorxl $6, %r10d, %edx + rorxl $11, %r10d, %ecx + leal (%r14,%rax,1), %r14d + addl 88(%rsp), %r13d + movl %r11d, %eax + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + xorl %ecx, %edx + andl %r10d, %eax + addl %edx, %r13d + rorxl $2, %r14d, %edx + rorxl $13, %r14d, %ecx + xorl %r12d, %eax + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + xorl %r14d, %eax + andl %eax, %ebx + addl %edx, %r13d + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + rorxl $11, %r9d, %ecx + addl %ebx, %r13d + addl 92(%rsp), %r12d + movl %r10d, %ebx + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + xorl %ecx, %edx + andl %r9d, %ebx + addl %edx, %r12d + rorxl $2, %r13d, %edx + rorxl $13, %r13d, %ecx + xorl %r11d, %ebx + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + xorl %ecx, %edx + movl %r14d, %ebx + leal (%r8,%r12,1), %r8d + xorl %r13d, %ebx + andl %ebx, %eax + addl %edx, %r12d + xorl %r14d, %eax + rorxl $6, %r8d, %edx + rorxl $11, %r8d, %ecx + leal (%r12,%rax,1), %r12d + addl 112(%rsp), %r11d + movl %r9d, %eax + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + xorl %ecx, %edx + andl %r8d, %eax + addl %edx, %r11d + rorxl $2, %r12d, %edx + rorxl $13, %r12d, %ecx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + andl %eax, %ebx + addl %edx, %r11d + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + rorxl $11, %r15d, %ecx + addl %ebx, %r11d + addl 116(%rsp), %r10d + movl %r8d, %ebx + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + xorl %ecx, %edx + andl %r15d, %ebx + addl %edx, %r10d + rorxl $2, %r11d, %edx + rorxl $13, %r11d, %ecx + xorl %r9d, %ebx + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + xorl %ecx, %edx + movl %r12d, %ebx + leal (%r14,%r10,1), %r14d + xorl %r11d, %ebx + andl %ebx, %eax + addl %edx, %r10d + xorl %r12d, %eax + rorxl $6, %r14d, %edx + rorxl $11, %r14d, %ecx + leal (%r10,%rax,1), %r10d + addl 120(%rsp), %r9d + movl %r15d, %eax + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + xorl %ecx, %edx + andl %r14d, %eax + addl %edx, %r9d + rorxl $2, %r10d, %edx + rorxl $13, %r10d, %ecx + xorl %r8d, %eax + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + xorl %r10d, %eax + andl %eax, %ebx + addl %edx, %r9d + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + rorxl $11, %r13d, %ecx + addl %ebx, %r9d + addl 124(%rsp), %r8d + movl %r14d, %ebx + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + xorl %ecx, %edx + andl %r13d, %ebx + addl %edx, %r8d + rorxl $2, %r9d, %edx + rorxl $13, %r9d, %ecx + xorl %r15d, %ebx + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + xorl %ecx, %edx + movl %r10d, %ebx + leal (%r12,%r8,1), %r12d + xorl %r9d, %ebx + andl %ebx, %eax + addl %edx, %r8d + xorl %r10d, %eax + rorxl $6, %r12d, %edx + rorxl $11, %r12d, %ecx + leal (%r8,%rax,1), %r8d + addl 144(%rsp), %r15d + movl %r13d, %eax + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + xorl %ecx, %edx + andl %r12d, %eax + addl %edx, %r15d + rorxl $2, %r8d, %edx + rorxl $13, %r8d, %ecx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + andl %eax, %ebx + addl %edx, %r15d + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + rorxl $11, %r11d, %ecx + addl %ebx, %r15d + addl 148(%rsp), %r14d + movl %r12d, %ebx + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + xorl %ecx, %edx + andl %r11d, %ebx + addl %edx, %r14d + rorxl $2, %r15d, %edx + rorxl $13, %r15d, %ecx + xorl %r13d, %ebx + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + xorl %ecx, %edx + movl %r8d, %ebx + leal (%r10,%r14,1), %r10d + xorl %r15d, %ebx + andl %ebx, %eax + addl %edx, %r14d + xorl %r8d, %eax + rorxl $6, %r10d, %edx + rorxl $11, %r10d, %ecx + leal (%r14,%rax,1), %r14d + addl 152(%rsp), %r13d + movl %r11d, %eax + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + xorl %ecx, %edx + andl %r10d, %eax + addl %edx, %r13d + rorxl $2, %r14d, %edx + rorxl $13, %r14d, %ecx + xorl %r12d, %eax + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + xorl %r14d, %eax + andl %eax, %ebx + addl %edx, %r13d + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + rorxl $11, %r9d, %ecx + addl %ebx, %r13d + addl 156(%rsp), %r12d + movl %r10d, %ebx + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + xorl %ecx, %edx + andl %r9d, %ebx + addl %edx, %r12d + rorxl $2, %r13d, %edx + rorxl $13, %r13d, %ecx + xorl %r11d, %ebx + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + xorl %ecx, %edx + movl %r14d, %ebx + leal (%r8,%r12,1), %r8d + xorl %r13d, %ebx + andl %ebx, %eax + addl %edx, %r12d + xorl %r14d, %eax + rorxl $6, %r8d, %edx + rorxl $11, %r8d, %ecx + leal (%r12,%rax,1), %r12d + addl 176(%rsp), %r11d + movl %r9d, %eax + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + xorl %ecx, %edx + andl %r8d, %eax + addl %edx, %r11d + rorxl $2, %r12d, %edx + rorxl $13, %r12d, %ecx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + andl %eax, %ebx + addl %edx, %r11d + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + rorxl $11, %r15d, %ecx + addl %ebx, %r11d + addl 180(%rsp), %r10d + movl %r8d, %ebx + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + xorl %ecx, %edx + andl %r15d, %ebx + addl %edx, %r10d + rorxl $2, %r11d, %edx + rorxl $13, %r11d, %ecx + xorl %r9d, %ebx + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + xorl %ecx, %edx + movl %r12d, %ebx + leal (%r14,%r10,1), %r14d + xorl %r11d, %ebx + andl %ebx, %eax + addl %edx, %r10d + xorl %r12d, %eax + rorxl $6, %r14d, %edx + rorxl $11, %r14d, %ecx + leal (%r10,%rax,1), %r10d + addl 184(%rsp), %r9d + movl %r15d, %eax + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + xorl %ecx, %edx + andl %r14d, %eax + addl %edx, %r9d + rorxl $2, %r10d, %edx + rorxl $13, %r10d, %ecx + xorl %r8d, %eax + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + xorl %r10d, %eax + andl %eax, %ebx + addl %edx, %r9d + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + rorxl $11, %r13d, %ecx + addl %ebx, %r9d + addl 188(%rsp), %r8d + movl %r14d, %ebx + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + xorl %ecx, %edx + andl %r13d, %ebx + addl %edx, %r8d + rorxl $2, %r9d, %edx + rorxl $13, %r9d, %ecx + xorl %r15d, %ebx + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + xorl %ecx, %edx + movl %r10d, %ebx + leal (%r12,%r8,1), %r12d + xorl %r9d, %ebx + andl %ebx, %eax + addl %edx, %r8d + xorl %r10d, %eax + rorxl $6, %r12d, %edx + rorxl $11, %r12d, %ecx + leal (%r8,%rax,1), %r8d + addl 208(%rsp), %r15d + movl %r13d, %eax + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + xorl %ecx, %edx + andl %r12d, %eax + addl %edx, %r15d + rorxl $2, %r8d, %edx + rorxl $13, %r8d, %ecx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + andl %eax, %ebx + addl %edx, %r15d + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + rorxl $11, %r11d, %ecx + addl %ebx, %r15d + addl 212(%rsp), %r14d + movl %r12d, %ebx + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + xorl %ecx, %edx + andl %r11d, %ebx + addl %edx, %r14d + rorxl $2, %r15d, %edx + rorxl $13, %r15d, %ecx + xorl %r13d, %ebx + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + xorl %ecx, %edx + movl %r8d, %ebx + leal (%r10,%r14,1), %r10d + xorl %r15d, %ebx + andl %ebx, %eax + addl %edx, %r14d + xorl %r8d, %eax + rorxl $6, %r10d, %edx + rorxl $11, %r10d, %ecx + leal (%r14,%rax,1), %r14d + addl 216(%rsp), %r13d + movl %r11d, %eax + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + xorl %ecx, %edx + andl %r10d, %eax + addl %edx, %r13d + rorxl $2, %r14d, %edx + rorxl $13, %r14d, %ecx + xorl %r12d, %eax + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + xorl %r14d, %eax + andl %eax, %ebx + addl %edx, %r13d + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + rorxl $11, %r9d, %ecx + addl %ebx, %r13d + addl 220(%rsp), %r12d + movl %r10d, %ebx + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + xorl %ecx, %edx + andl %r9d, %ebx + addl %edx, %r12d + rorxl $2, %r13d, %edx + rorxl $13, %r13d, %ecx + xorl %r11d, %ebx + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + xorl %ecx, %edx + movl %r14d, %ebx + leal (%r8,%r12,1), %r8d + xorl %r13d, %ebx + andl %ebx, %eax + addl %edx, %r12d + xorl %r14d, %eax + rorxl $6, %r8d, %edx + rorxl $11, %r8d, %ecx + leal (%r12,%rax,1), %r12d + addl 240(%rsp), %r11d + movl %r9d, %eax + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + xorl %ecx, %edx + andl %r8d, %eax + addl %edx, %r11d + rorxl $2, %r12d, %edx + rorxl $13, %r12d, %ecx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + andl %eax, %ebx + addl %edx, %r11d + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + rorxl $11, %r15d, %ecx + addl %ebx, %r11d + addl 244(%rsp), %r10d + movl %r8d, %ebx + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + xorl %ecx, %edx + andl %r15d, %ebx + addl %edx, %r10d + rorxl $2, %r11d, %edx + rorxl $13, %r11d, %ecx + xorl %r9d, %ebx + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + xorl %ecx, %edx + movl %r12d, %ebx + leal (%r14,%r10,1), %r14d + xorl %r11d, %ebx + andl %ebx, %eax + addl %edx, %r10d + xorl %r12d, %eax + rorxl $6, %r14d, %edx + rorxl $11, %r14d, %ecx + leal (%r10,%rax,1), %r10d + addl 248(%rsp), %r9d + movl %r15d, %eax + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + xorl %ecx, %edx + andl %r14d, %eax + addl %edx, %r9d + rorxl $2, %r10d, %edx + rorxl $13, %r10d, %ecx + xorl %r8d, %eax + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + xorl %r10d, %eax + andl %eax, %ebx + addl %edx, %r9d + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + rorxl $11, %r13d, %ecx + addl %ebx, %r9d + addl 252(%rsp), %r8d + movl %r14d, %ebx + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + xorl %ecx, %edx + andl %r13d, %ebx + addl %edx, %r8d + rorxl $2, %r9d, %edx + rorxl $13, %r9d, %ecx + xorl %r15d, %ebx + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + xorl %ecx, %edx + movl %r10d, %ebx + leal (%r12,%r8,1), %r12d + xorl %r9d, %ebx + andl %ebx, %eax + addl %edx, %r8d + xorl %r10d, %eax + rorxl $6, %r12d, %edx + rorxl $11, %r12d, %ecx + leal (%r8,%rax,1), %r8d + addl 272(%rsp), %r15d + movl %r13d, %eax + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + xorl %ecx, %edx + andl %r12d, %eax + addl %edx, %r15d + rorxl $2, %r8d, %edx + rorxl $13, %r8d, %ecx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + andl %eax, %ebx + addl %edx, %r15d + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + rorxl $11, %r11d, %ecx + addl %ebx, %r15d + addl 276(%rsp), %r14d + movl %r12d, %ebx + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + xorl %ecx, %edx + andl %r11d, %ebx + addl %edx, %r14d + rorxl $2, %r15d, %edx + rorxl $13, %r15d, %ecx + xorl %r13d, %ebx + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + xorl %ecx, %edx + movl %r8d, %ebx + leal (%r10,%r14,1), %r10d + xorl %r15d, %ebx + andl %ebx, %eax + addl %edx, %r14d + xorl %r8d, %eax + rorxl $6, %r10d, %edx + rorxl $11, %r10d, %ecx + leal (%r14,%rax,1), %r14d + addl 280(%rsp), %r13d + movl %r11d, %eax + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + xorl %ecx, %edx + andl %r10d, %eax + addl %edx, %r13d + rorxl $2, %r14d, %edx + rorxl $13, %r14d, %ecx + xorl %r12d, %eax + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + xorl %r14d, %eax + andl %eax, %ebx + addl %edx, %r13d + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + rorxl $11, %r9d, %ecx + addl %ebx, %r13d + addl 284(%rsp), %r12d + movl %r10d, %ebx + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + xorl %ecx, %edx + andl %r9d, %ebx + addl %edx, %r12d + rorxl $2, %r13d, %edx + rorxl $13, %r13d, %ecx + xorl %r11d, %ebx + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + xorl %ecx, %edx + movl %r14d, %ebx + leal (%r8,%r12,1), %r8d + xorl %r13d, %ebx + andl %ebx, %eax + addl %edx, %r12d + xorl %r14d, %eax + rorxl $6, %r8d, %edx + rorxl $11, %r8d, %ecx + leal (%r12,%rax,1), %r12d + addl 304(%rsp), %r11d + movl %r9d, %eax + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + xorl %ecx, %edx + andl %r8d, %eax + addl %edx, %r11d + rorxl $2, %r12d, %edx + rorxl $13, %r12d, %ecx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + andl %eax, %ebx + addl %edx, %r11d + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + rorxl $11, %r15d, %ecx + addl %ebx, %r11d + addl 308(%rsp), %r10d + movl %r8d, %ebx + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + xorl %ecx, %edx + andl %r15d, %ebx + addl %edx, %r10d + rorxl $2, %r11d, %edx + rorxl $13, %r11d, %ecx + xorl %r9d, %ebx + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + xorl %ecx, %edx + movl %r12d, %ebx + leal (%r14,%r10,1), %r14d + xorl %r11d, %ebx + andl %ebx, %eax + addl %edx, %r10d + xorl %r12d, %eax + rorxl $6, %r14d, %edx + rorxl $11, %r14d, %ecx + leal (%r10,%rax,1), %r10d + addl 312(%rsp), %r9d + movl %r15d, %eax + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + xorl %ecx, %edx + andl %r14d, %eax + addl %edx, %r9d + rorxl $2, %r10d, %edx + rorxl $13, %r10d, %ecx + xorl %r8d, %eax + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + xorl %r10d, %eax + andl %eax, %ebx + addl %edx, %r9d + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + rorxl $11, %r13d, %ecx + addl %ebx, %r9d + addl 316(%rsp), %r8d + movl %r14d, %ebx + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + xorl %ecx, %edx + andl %r13d, %ebx + addl %edx, %r8d + rorxl $2, %r9d, %edx + rorxl $13, %r9d, %ecx + xorl %r15d, %ebx + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + xorl %ecx, %edx + movl %r10d, %ebx + leal (%r12,%r8,1), %r12d + xorl %r9d, %ebx + andl %ebx, %eax + addl %edx, %r8d + xorl %r10d, %eax + rorxl $6, %r12d, %edx + rorxl $11, %r12d, %ecx + leal (%r8,%rax,1), %r8d + addl 336(%rsp), %r15d + movl %r13d, %eax + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + xorl %ecx, %edx + andl %r12d, %eax + addl %edx, %r15d + rorxl $2, %r8d, %edx + rorxl $13, %r8d, %ecx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + andl %eax, %ebx + addl %edx, %r15d + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + rorxl $11, %r11d, %ecx + addl %ebx, %r15d + addl 340(%rsp), %r14d + movl %r12d, %ebx + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + xorl %ecx, %edx + andl %r11d, %ebx + addl %edx, %r14d + rorxl $2, %r15d, %edx + rorxl $13, %r15d, %ecx + xorl %r13d, %ebx + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + xorl %ecx, %edx + movl %r8d, %ebx + leal (%r10,%r14,1), %r10d + xorl %r15d, %ebx + andl %ebx, %eax + addl %edx, %r14d + xorl %r8d, %eax + rorxl $6, %r10d, %edx + rorxl $11, %r10d, %ecx + leal (%r14,%rax,1), %r14d + addl 344(%rsp), %r13d + movl %r11d, %eax + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + xorl %ecx, %edx + andl %r10d, %eax + addl %edx, %r13d + rorxl $2, %r14d, %edx + rorxl $13, %r14d, %ecx + xorl %r12d, %eax + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + xorl %r14d, %eax + andl %eax, %ebx + addl %edx, %r13d + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + rorxl $11, %r9d, %ecx + addl %ebx, %r13d + addl 348(%rsp), %r12d + movl %r10d, %ebx + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + xorl %ecx, %edx + andl %r9d, %ebx + addl %edx, %r12d + rorxl $2, %r13d, %edx + rorxl $13, %r13d, %ecx + xorl %r11d, %ebx + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + xorl %ecx, %edx + movl %r14d, %ebx + leal (%r8,%r12,1), %r8d + xorl %r13d, %ebx + andl %ebx, %eax + addl %edx, %r12d + xorl %r14d, %eax + rorxl $6, %r8d, %edx + rorxl $11, %r8d, %ecx + leal (%r12,%rax,1), %r12d + addl 368(%rsp), %r11d + movl %r9d, %eax + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + xorl %ecx, %edx + andl %r8d, %eax + addl %edx, %r11d + rorxl $2, %r12d, %edx + rorxl $13, %r12d, %ecx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + andl %eax, %ebx + addl %edx, %r11d + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + rorxl $11, %r15d, %ecx + addl %ebx, %r11d + addl 372(%rsp), %r10d + movl %r8d, %ebx + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + xorl %ecx, %edx + andl %r15d, %ebx + addl %edx, %r10d + rorxl $2, %r11d, %edx + rorxl $13, %r11d, %ecx + xorl %r9d, %ebx + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + xorl %ecx, %edx + movl %r12d, %ebx + leal (%r14,%r10,1), %r14d + xorl %r11d, %ebx + andl %ebx, %eax + addl %edx, %r10d + xorl %r12d, %eax + rorxl $6, %r14d, %edx + rorxl $11, %r14d, %ecx + leal (%r10,%rax,1), %r10d + addl 376(%rsp), %r9d + movl %r15d, %eax + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + xorl %ecx, %edx + andl %r14d, %eax + addl %edx, %r9d + rorxl $2, %r10d, %edx + rorxl $13, %r10d, %ecx + xorl %r8d, %eax + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + xorl %r10d, %eax + andl %eax, %ebx + addl %edx, %r9d + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + rorxl $11, %r13d, %ecx + addl %ebx, %r9d + addl 380(%rsp), %r8d + movl %r14d, %ebx + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + xorl %ecx, %edx + andl %r13d, %ebx + addl %edx, %r8d + rorxl $2, %r9d, %edx + rorxl $13, %r9d, %ecx + xorl %r15d, %ebx + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + xorl %ecx, %edx + movl %r10d, %ebx + leal (%r12,%r8,1), %r12d + xorl %r9d, %ebx + andl %ebx, %eax + addl %edx, %r8d + xorl %r10d, %eax + rorxl $6, %r12d, %edx + rorxl $11, %r12d, %ecx + leal (%r8,%rax,1), %r8d + addl 400(%rsp), %r15d + movl %r13d, %eax + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + xorl %ecx, %edx + andl %r12d, %eax + addl %edx, %r15d + rorxl $2, %r8d, %edx + rorxl $13, %r8d, %ecx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + andl %eax, %ebx + addl %edx, %r15d + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + rorxl $11, %r11d, %ecx + addl %ebx, %r15d + addl 404(%rsp), %r14d + movl %r12d, %ebx + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + xorl %ecx, %edx + andl %r11d, %ebx + addl %edx, %r14d + rorxl $2, %r15d, %edx + rorxl $13, %r15d, %ecx + xorl %r13d, %ebx + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + xorl %ecx, %edx + movl %r8d, %ebx + leal (%r10,%r14,1), %r10d + xorl %r15d, %ebx + andl %ebx, %eax + addl %edx, %r14d + xorl %r8d, %eax + rorxl $6, %r10d, %edx + rorxl $11, %r10d, %ecx + leal (%r14,%rax,1), %r14d + addl 408(%rsp), %r13d + movl %r11d, %eax + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + xorl %ecx, %edx + andl %r10d, %eax + addl %edx, %r13d + rorxl $2, %r14d, %edx + rorxl $13, %r14d, %ecx + xorl %r12d, %eax + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + xorl %r14d, %eax + andl %eax, %ebx + addl %edx, %r13d + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + rorxl $11, %r9d, %ecx + addl %ebx, %r13d + addl 412(%rsp), %r12d + movl %r10d, %ebx + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + xorl %ecx, %edx + andl %r9d, %ebx + addl %edx, %r12d + rorxl $2, %r13d, %edx + rorxl $13, %r13d, %ecx + xorl %r11d, %ebx + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + xorl %ecx, %edx + movl %r14d, %ebx + leal (%r8,%r12,1), %r8d + xorl %r13d, %ebx + andl %ebx, %eax + addl %edx, %r12d + xorl %r14d, %eax + rorxl $6, %r8d, %edx + rorxl $11, %r8d, %ecx + leal (%r12,%rax,1), %r12d + addl 432(%rsp), %r11d + movl %r9d, %eax + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + xorl %ecx, %edx + andl %r8d, %eax + addl %edx, %r11d + rorxl $2, %r12d, %edx + rorxl $13, %r12d, %ecx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + andl %eax, %ebx + addl %edx, %r11d + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + rorxl $11, %r15d, %ecx + addl %ebx, %r11d + addl 436(%rsp), %r10d + movl %r8d, %ebx + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + xorl %ecx, %edx + andl %r15d, %ebx + addl %edx, %r10d + rorxl $2, %r11d, %edx + rorxl $13, %r11d, %ecx + xorl %r9d, %ebx + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + xorl %ecx, %edx + movl %r12d, %ebx + leal (%r14,%r10,1), %r14d + xorl %r11d, %ebx + andl %ebx, %eax + addl %edx, %r10d + xorl %r12d, %eax + rorxl $6, %r14d, %edx + rorxl $11, %r14d, %ecx + leal (%r10,%rax,1), %r10d + addl 440(%rsp), %r9d + movl %r15d, %eax + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + xorl %ecx, %edx + andl %r14d, %eax + addl %edx, %r9d + rorxl $2, %r10d, %edx + rorxl $13, %r10d, %ecx + xorl %r8d, %eax + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + xorl %r10d, %eax + andl %eax, %ebx + addl %edx, %r9d + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + rorxl $11, %r13d, %ecx + addl %ebx, %r9d + addl 444(%rsp), %r8d + movl %r14d, %ebx + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + xorl %ecx, %edx + andl %r13d, %ebx + addl %edx, %r8d + rorxl $2, %r9d, %edx + rorxl $13, %r9d, %ecx + xorl %r15d, %ebx + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + xorl %ecx, %edx + movl %r10d, %ebx + leal (%r12,%r8,1), %r12d + xorl %r9d, %ebx + andl %ebx, %eax + addl %edx, %r8d + xorl %r10d, %eax + rorxl $6, %r12d, %edx + rorxl $11, %r12d, %ecx + leal (%r8,%rax,1), %r8d + addl 464(%rsp), %r15d + movl %r13d, %eax + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + xorl %ecx, %edx + andl %r12d, %eax + addl %edx, %r15d + rorxl $2, %r8d, %edx + rorxl $13, %r8d, %ecx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + andl %eax, %ebx + addl %edx, %r15d + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + rorxl $11, %r11d, %ecx + addl %ebx, %r15d + addl 468(%rsp), %r14d + movl %r12d, %ebx + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + xorl %ecx, %edx + andl %r11d, %ebx + addl %edx, %r14d + rorxl $2, %r15d, %edx + rorxl $13, %r15d, %ecx + xorl %r13d, %ebx + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + xorl %ecx, %edx + movl %r8d, %ebx + leal (%r10,%r14,1), %r10d + xorl %r15d, %ebx + andl %ebx, %eax + addl %edx, %r14d + xorl %r8d, %eax + rorxl $6, %r10d, %edx + rorxl $11, %r10d, %ecx + leal (%r14,%rax,1), %r14d + addl 472(%rsp), %r13d + movl %r11d, %eax + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + xorl %ecx, %edx + andl %r10d, %eax + addl %edx, %r13d + rorxl $2, %r14d, %edx + rorxl $13, %r14d, %ecx + xorl %r12d, %eax + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + xorl %r14d, %eax + andl %eax, %ebx + addl %edx, %r13d + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + rorxl $11, %r9d, %ecx + addl %ebx, %r13d + addl 476(%rsp), %r12d + movl %r10d, %ebx + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + xorl %ecx, %edx + andl %r9d, %ebx + addl %edx, %r12d + rorxl $2, %r13d, %edx + rorxl $13, %r13d, %ecx + xorl %r11d, %ebx + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + xorl %ecx, %edx + movl %r14d, %ebx + leal (%r8,%r12,1), %r8d + xorl %r13d, %ebx + andl %ebx, %eax + addl %edx, %r12d + xorl %r14d, %eax + rorxl $6, %r8d, %edx + rorxl $11, %r8d, %ecx + leal (%r12,%rax,1), %r12d + addl 496(%rsp), %r11d + movl %r9d, %eax + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + xorl %ecx, %edx + andl %r8d, %eax + addl %edx, %r11d + rorxl $2, %r12d, %edx + rorxl $13, %r12d, %ecx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + andl %eax, %ebx + addl %edx, %r11d + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + rorxl $11, %r15d, %ecx + addl %ebx, %r11d + addl 500(%rsp), %r10d + movl %r8d, %ebx + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + xorl %ecx, %edx + andl %r15d, %ebx + addl %edx, %r10d + rorxl $2, %r11d, %edx + rorxl $13, %r11d, %ecx + xorl %r9d, %ebx + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + xorl %ecx, %edx + movl %r12d, %ebx + leal (%r14,%r10,1), %r14d + xorl %r11d, %ebx + andl %ebx, %eax + addl %edx, %r10d + xorl %r12d, %eax + rorxl $6, %r14d, %edx + rorxl $11, %r14d, %ecx + leal (%r10,%rax,1), %r10d + addl 504(%rsp), %r9d + movl %r15d, %eax + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + xorl %ecx, %edx + andl %r14d, %eax + addl %edx, %r9d + rorxl $2, %r10d, %edx + rorxl $13, %r10d, %ecx + xorl %r8d, %eax + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + xorl %r10d, %eax + andl %eax, %ebx + addl %edx, %r9d + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + rorxl $11, %r13d, %ecx + addl %ebx, %r9d + addl 508(%rsp), %r8d + movl %r14d, %ebx + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + xorl %ecx, %edx + andl %r13d, %ebx + addl %edx, %r8d + rorxl $2, %r9d, %edx + rorxl $13, %r9d, %ecx + xorl %r15d, %ebx + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + xorl %ecx, %edx + movl %r10d, %ebx + leal (%r12,%r8,1), %r12d + xorl %r9d, %ebx + andl %ebx, %eax + addl %edx, %r8d + xorl %r10d, %eax + addl %eax, %r8d + addq $0x80, %rbp + addl (%rdi), %r8d + addl 4(%rdi), %r9d + addl 8(%rdi), %r10d + addl 12(%rdi), %r11d + addl 16(%rdi), %r12d + addl 20(%rdi), %r13d + addl 24(%rdi), %r14d + addl 28(%rdi), %r15d + subl $0x80, %esi + movl %r8d, (%rdi) + movl %r9d, 4(%rdi) + movl %r10d, 8(%rdi) + movl %r11d, 12(%rdi) + movl %r12d, 16(%rdi) + movl %r13d, 20(%rdi) + movl %r14d, 24(%rdi) + movl %r15d, 28(%rdi) + jnz L_sha256_len_avx2_rorx_start +L_sha256_len_avx2_rorx_done: + xorq %rax, %rax + vzeroupper + addq $0x200, %rsp + popq %rbp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size Transform_Sha256_AVX2_RORX_Len,.-Transform_Sha256_AVX2_RORX_Len +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX2 */ diff --git a/client/wolfssl/wolfcrypt/src/sha3.c b/client/wolfssl/wolfcrypt/src/sha3.c new file mode 100644 index 0000000..3a0c8dd --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/sha3.c @@ -0,0 +1,1216 @@ +/* sha3.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_XILINX_CRYPT) && \ + !defined(WOLFSSL_AFALG_XILINX_SHA3) + +#if defined(HAVE_FIPS) && \ + defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2) + + /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */ + #define FIPS_NO_WRAPPERS + + #ifdef USE_WINDOWS_API + #pragma code_seg(".fipsA$l") + #pragma const_seg(".fipsB$l") + #endif +#endif + +#include +#include +#include + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + + +#ifdef WOLFSSL_SHA3_SMALL +/* Rotate a 64-bit value left. + * + * a Number to rotate left. + * r Number od bits to rotate left. + * returns the rotated number. + */ +#define ROTL64(a, n) (((a)<<(n))|((a)>>(64-(n)))) + +/* An array of values to XOR for block operation. */ +static const word64 hash_keccak_r[24] = +{ + 0x0000000000000001UL, 0x0000000000008082UL, + 0x800000000000808aUL, 0x8000000080008000UL, + 0x000000000000808bUL, 0x0000000080000001UL, + 0x8000000080008081UL, 0x8000000000008009UL, + 0x000000000000008aUL, 0x0000000000000088UL, + 0x0000000080008009UL, 0x000000008000000aUL, + 0x000000008000808bUL, 0x800000000000008bUL, + 0x8000000000008089UL, 0x8000000000008003UL, + 0x8000000000008002UL, 0x8000000000000080UL, + 0x000000000000800aUL, 0x800000008000000aUL, + 0x8000000080008081UL, 0x8000000000008080UL, + 0x0000000080000001UL, 0x8000000080008008UL +}; + +/* Indices used in swap and rotate operation. */ +#define K_I_0 10 +#define K_I_1 7 +#define K_I_2 11 +#define K_I_3 17 +#define K_I_4 18 +#define K_I_5 3 +#define K_I_6 5 +#define K_I_7 16 +#define K_I_8 8 +#define K_I_9 21 +#define K_I_10 24 +#define K_I_11 4 +#define K_I_12 15 +#define K_I_13 23 +#define K_I_14 19 +#define K_I_15 13 +#define K_I_16 12 +#define K_I_17 2 +#define K_I_18 20 +#define K_I_19 14 +#define K_I_20 22 +#define K_I_21 9 +#define K_I_22 6 +#define K_I_23 1 + +/* Number of bits to rotate in swap and rotate operation. */ +#define K_R_0 1 +#define K_R_1 3 +#define K_R_2 6 +#define K_R_3 10 +#define K_R_4 15 +#define K_R_5 21 +#define K_R_6 28 +#define K_R_7 36 +#define K_R_8 45 +#define K_R_9 55 +#define K_R_10 2 +#define K_R_11 14 +#define K_R_12 27 +#define K_R_13 41 +#define K_R_14 56 +#define K_R_15 8 +#define K_R_16 25 +#define K_R_17 43 +#define K_R_18 62 +#define K_R_19 18 +#define K_R_20 39 +#define K_R_21 61 +#define K_R_22 20 +#define K_R_23 44 + +/* Swap and rotate left operation. + * + * s The state. + * t1 Temporary value. + * t2 Second temporary value. + * i The index of the loop. + */ +#define SWAP_ROTL(s, t1, t2, i) \ +do \ +{ \ + t2 = s[K_I_##i]; s[K_I_##i] = ROTL64(t1, K_R_##i); \ +} \ +while (0) + +/* Mix the XOR of the column's values into each number by column. + * + * s The state. + * b Temporary array of XORed column values. + * x The index of the column. + * t Temporary variable. + */ +#define COL_MIX(s, b, x, t) \ +do \ +{ \ + for (x = 0; x < 5; x++) \ + b[x] = s[x + 0] ^ s[x + 5] ^ s[x + 10] ^ s[x + 15] ^ s[x + 20]; \ + for (x = 0; x < 5; x++) \ + { \ + t = b[(x + 4) % 5] ^ ROTL64(b[(x + 1) % 5], 1); \ + s[x + 0] ^= t; \ + s[x + 5] ^= t; \ + s[x + 10] ^= t; \ + s[x + 15] ^= t; \ + s[x + 20] ^= t; \ + } \ +} \ +while (0) + +#ifdef SHA3_BY_SPEC +/* Mix the row values. + * BMI1 has ANDN instruction ((~a) & b) - Haswell and above. + * + * s The state. + * b Temporary array of XORed row values. + * y The index of the row to work on. + * x The index of the column. + * t0 Temporary variable. + * t1 Temporary variable. + */ +#define ROW_MIX(s, b, y, x, t0, t1) \ +do \ +{ \ + for (y = 0; y < 5; y++) \ + { \ + for (x = 0; x < 5; x++) \ + b[x] = s[y * 5 + x]; \ + for (x = 0; x < 5; x++) \ + s[y * 5 + x] = b[x] ^ (~b[(x + 1) % 5] & b[(x + 2) % 5]); \ + } \ +} \ +while (0) +#else +/* Mix the row values. + * a ^ (~b & c) == a ^ (c & (b ^ c)) == (a ^ b) ^ (b | c) + * + * s The state. + * b Temporary array of XORed row values. + * y The index of the row to work on. + * x The index of the column. + * t0 Temporary variable. + * t1 Temporary variable. + */ +#define ROW_MIX(s, b, y, x, t12, t34) \ +do \ +{ \ + for (y = 0; y < 5; y++) \ + { \ + for (x = 0; x < 5; x++) \ + b[x] = s[y * 5 + x]; \ + t12 = (b[1] ^ b[2]); t34 = (b[3] ^ b[4]); \ + s[y * 5 + 0] = b[0] ^ (b[2] & t12); \ + s[y * 5 + 1] = t12 ^ (b[2] | b[3]); \ + s[y * 5 + 2] = b[2] ^ (b[4] & t34); \ + s[y * 5 + 3] = t34 ^ (b[4] | b[0]); \ + s[y * 5 + 4] = b[4] ^ (b[1] & (b[0] ^ b[1])); \ + } \ +} \ +while (0) +#endif /* SHA3_BY_SPEC */ + +/* The block operation performed on the state. + * + * s The state. + */ +static void BlockSha3(word64 *s) +{ + byte i, x, y; + word64 t0, t1; + word64 b[5]; + + for (i = 0; i < 24; i++) + { + COL_MIX(s, b, x, t0); + + t0 = s[1]; + SWAP_ROTL(s, t0, t1, 0); + SWAP_ROTL(s, t1, t0, 1); + SWAP_ROTL(s, t0, t1, 2); + SWAP_ROTL(s, t1, t0, 3); + SWAP_ROTL(s, t0, t1, 4); + SWAP_ROTL(s, t1, t0, 5); + SWAP_ROTL(s, t0, t1, 6); + SWAP_ROTL(s, t1, t0, 7); + SWAP_ROTL(s, t0, t1, 8); + SWAP_ROTL(s, t1, t0, 9); + SWAP_ROTL(s, t0, t1, 10); + SWAP_ROTL(s, t1, t0, 11); + SWAP_ROTL(s, t0, t1, 12); + SWAP_ROTL(s, t1, t0, 13); + SWAP_ROTL(s, t0, t1, 14); + SWAP_ROTL(s, t1, t0, 15); + SWAP_ROTL(s, t0, t1, 16); + SWAP_ROTL(s, t1, t0, 17); + SWAP_ROTL(s, t0, t1, 18); + SWAP_ROTL(s, t1, t0, 19); + SWAP_ROTL(s, t0, t1, 20); + SWAP_ROTL(s, t1, t0, 21); + SWAP_ROTL(s, t0, t1, 22); + SWAP_ROTL(s, t1, t0, 23); + + ROW_MIX(s, b, y, x, t0, t1); + + s[0] ^= hash_keccak_r[i]; + } +} +#else +/* Rotate a 64-bit value left. + * + * a Number to rotate left. + * r Number od bits to rotate left. + * returns the rotated number. + */ +#define ROTL64(a, n) (((a)<<(n))|((a)>>(64-(n)))) + +/* An array of values to XOR for block operation. */ +static const word64 hash_keccak_r[24] = +{ + 0x0000000000000001UL, 0x0000000000008082UL, + 0x800000000000808aUL, 0x8000000080008000UL, + 0x000000000000808bUL, 0x0000000080000001UL, + 0x8000000080008081UL, 0x8000000000008009UL, + 0x000000000000008aUL, 0x0000000000000088UL, + 0x0000000080008009UL, 0x000000008000000aUL, + 0x000000008000808bUL, 0x800000000000008bUL, + 0x8000000000008089UL, 0x8000000000008003UL, + 0x8000000000008002UL, 0x8000000000000080UL, + 0x000000000000800aUL, 0x800000008000000aUL, + 0x8000000080008081UL, 0x8000000000008080UL, + 0x0000000080000001UL, 0x8000000080008008UL +}; + +/* Indices used in swap and rotate operation. */ +#define KI_0 6 +#define KI_1 12 +#define KI_2 18 +#define KI_3 24 +#define KI_4 3 +#define KI_5 9 +#define KI_6 10 +#define KI_7 16 +#define KI_8 22 +#define KI_9 1 +#define KI_10 7 +#define KI_11 13 +#define KI_12 19 +#define KI_13 20 +#define KI_14 4 +#define KI_15 5 +#define KI_16 11 +#define KI_17 17 +#define KI_18 23 +#define KI_19 2 +#define KI_20 8 +#define KI_21 14 +#define KI_22 15 +#define KI_23 21 + +/* Number of bits to rotate in swap and rotate operation. */ +#define KR_0 44 +#define KR_1 43 +#define KR_2 21 +#define KR_3 14 +#define KR_4 28 +#define KR_5 20 +#define KR_6 3 +#define KR_7 45 +#define KR_8 61 +#define KR_9 1 +#define KR_10 6 +#define KR_11 25 +#define KR_12 8 +#define KR_13 18 +#define KR_14 27 +#define KR_15 36 +#define KR_16 10 +#define KR_17 15 +#define KR_18 56 +#define KR_19 62 +#define KR_20 55 +#define KR_21 39 +#define KR_22 41 +#define KR_23 2 + +/* Mix the XOR of the column's values into each number by column. + * + * s The state. + * b Temporary array of XORed column values. + * x The index of the column. + * t Temporary variable. + */ +#define COL_MIX(s, b, x, t) \ +do \ +{ \ + b[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20]; \ + b[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21]; \ + b[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22]; \ + b[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23]; \ + b[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24]; \ + t = b[(0 + 4) % 5] ^ ROTL64(b[(0 + 1) % 5], 1); \ + s[ 0] ^= t; s[ 5] ^= t; s[10] ^= t; s[15] ^= t; s[20] ^= t; \ + t = b[(1 + 4) % 5] ^ ROTL64(b[(1 + 1) % 5], 1); \ + s[ 1] ^= t; s[ 6] ^= t; s[11] ^= t; s[16] ^= t; s[21] ^= t; \ + t = b[(2 + 4) % 5] ^ ROTL64(b[(2 + 1) % 5], 1); \ + s[ 2] ^= t; s[ 7] ^= t; s[12] ^= t; s[17] ^= t; s[22] ^= t; \ + t = b[(3 + 4) % 5] ^ ROTL64(b[(3 + 1) % 5], 1); \ + s[ 3] ^= t; s[ 8] ^= t; s[13] ^= t; s[18] ^= t; s[23] ^= t; \ + t = b[(4 + 4) % 5] ^ ROTL64(b[(4 + 1) % 5], 1); \ + s[ 4] ^= t; s[ 9] ^= t; s[14] ^= t; s[19] ^= t; s[24] ^= t; \ +} \ +while (0) + +#define S(s1, i) ROTL64(s1[KI_##i], KR_##i) + +#ifdef SHA3_BY_SPEC +/* Mix the row values. + * BMI1 has ANDN instruction ((~a) & b) - Haswell and above. + * + * s2 The new state. + * s1 The current state. + * b Temporary array of XORed row values. + * t0 Temporary variable. (Unused) + * t1 Temporary variable. (Unused) + */ +#define ROW_MIX(s2, s1, b, t0, t1) \ +do \ +{ \ + b[0] = s1[0]; \ + b[1] = S(s1, 0); \ + b[2] = S(s1, 1); \ + b[3] = S(s1, 2); \ + b[4] = S(s1, 3); \ + s2[0] = b[0] ^ (~b[1] & b[2]); \ + s2[1] = b[1] ^ (~b[2] & b[3]); \ + s2[2] = b[2] ^ (~b[3] & b[4]); \ + s2[3] = b[3] ^ (~b[4] & b[0]); \ + s2[4] = b[4] ^ (~b[0] & b[1]); \ + b[0] = S(s1, 4); \ + b[1] = S(s1, 5); \ + b[2] = S(s1, 6); \ + b[3] = S(s1, 7); \ + b[4] = S(s1, 8); \ + s2[5] = b[0] ^ (~b[1] & b[2]); \ + s2[6] = b[1] ^ (~b[2] & b[3]); \ + s2[7] = b[2] ^ (~b[3] & b[4]); \ + s2[8] = b[3] ^ (~b[4] & b[0]); \ + s2[9] = b[4] ^ (~b[0] & b[1]); \ + b[0] = S(s1, 9); \ + b[1] = S(s1, 10); \ + b[2] = S(s1, 11); \ + b[3] = S(s1, 12); \ + b[4] = S(s1, 13); \ + s2[10] = b[0] ^ (~b[1] & b[2]); \ + s2[11] = b[1] ^ (~b[2] & b[3]); \ + s2[12] = b[2] ^ (~b[3] & b[4]); \ + s2[13] = b[3] ^ (~b[4] & b[0]); \ + s2[14] = b[4] ^ (~b[0] & b[1]); \ + b[0] = S(s1, 14); \ + b[1] = S(s1, 15); \ + b[2] = S(s1, 16); \ + b[3] = S(s1, 17); \ + b[4] = S(s1, 18); \ + s2[15] = b[0] ^ (~b[1] & b[2]); \ + s2[16] = b[1] ^ (~b[2] & b[3]); \ + s2[17] = b[2] ^ (~b[3] & b[4]); \ + s2[18] = b[3] ^ (~b[4] & b[0]); \ + s2[19] = b[4] ^ (~b[0] & b[1]); \ + b[0] = S(s1, 19); \ + b[1] = S(s1, 20); \ + b[2] = S(s1, 21); \ + b[3] = S(s1, 22); \ + b[4] = S(s1, 23); \ + s2[20] = b[0] ^ (~b[1] & b[2]); \ + s2[21] = b[1] ^ (~b[2] & b[3]); \ + s2[22] = b[2] ^ (~b[3] & b[4]); \ + s2[23] = b[3] ^ (~b[4] & b[0]); \ + s2[24] = b[4] ^ (~b[0] & b[1]); \ +} \ +while (0) +#else +/* Mix the row values. + * a ^ (~b & c) == a ^ (c & (b ^ c)) == (a ^ b) ^ (b | c) + * + * s2 The new state. + * s1 The current state. + * b Temporary array of XORed row values. + * t12 Temporary variable. + * t34 Temporary variable. + */ +#define ROW_MIX(s2, s1, b, t12, t34) \ +do \ +{ \ + b[0] = s1[0]; \ + b[1] = S(s1, 0); \ + b[2] = S(s1, 1); \ + b[3] = S(s1, 2); \ + b[4] = S(s1, 3); \ + t12 = (b[1] ^ b[2]); t34 = (b[3] ^ b[4]); \ + s2[0] = b[0] ^ (b[2] & t12); \ + s2[1] = t12 ^ (b[2] | b[3]); \ + s2[2] = b[2] ^ (b[4] & t34); \ + s2[3] = t34 ^ (b[4] | b[0]); \ + s2[4] = b[4] ^ (b[1] & (b[0] ^ b[1])); \ + b[0] = S(s1, 4); \ + b[1] = S(s1, 5); \ + b[2] = S(s1, 6); \ + b[3] = S(s1, 7); \ + b[4] = S(s1, 8); \ + t12 = (b[1] ^ b[2]); t34 = (b[3] ^ b[4]); \ + s2[5] = b[0] ^ (b[2] & t12); \ + s2[6] = t12 ^ (b[2] | b[3]); \ + s2[7] = b[2] ^ (b[4] & t34); \ + s2[8] = t34 ^ (b[4] | b[0]); \ + s2[9] = b[4] ^ (b[1] & (b[0] ^ b[1])); \ + b[0] = S(s1, 9); \ + b[1] = S(s1, 10); \ + b[2] = S(s1, 11); \ + b[3] = S(s1, 12); \ + b[4] = S(s1, 13); \ + t12 = (b[1] ^ b[2]); t34 = (b[3] ^ b[4]); \ + s2[10] = b[0] ^ (b[2] & t12); \ + s2[11] = t12 ^ (b[2] | b[3]); \ + s2[12] = b[2] ^ (b[4] & t34); \ + s2[13] = t34 ^ (b[4] | b[0]); \ + s2[14] = b[4] ^ (b[1] & (b[0] ^ b[1])); \ + b[0] = S(s1, 14); \ + b[1] = S(s1, 15); \ + b[2] = S(s1, 16); \ + b[3] = S(s1, 17); \ + b[4] = S(s1, 18); \ + t12 = (b[1] ^ b[2]); t34 = (b[3] ^ b[4]); \ + s2[15] = b[0] ^ (b[2] & t12); \ + s2[16] = t12 ^ (b[2] | b[3]); \ + s2[17] = b[2] ^ (b[4] & t34); \ + s2[18] = t34 ^ (b[4] | b[0]); \ + s2[19] = b[4] ^ (b[1] & (b[0] ^ b[1])); \ + b[0] = S(s1, 19); \ + b[1] = S(s1, 20); \ + b[2] = S(s1, 21); \ + b[3] = S(s1, 22); \ + b[4] = S(s1, 23); \ + t12 = (b[1] ^ b[2]); t34 = (b[3] ^ b[4]); \ + s2[20] = b[0] ^ (b[2] & t12); \ + s2[21] = t12 ^ (b[2] | b[3]); \ + s2[22] = b[2] ^ (b[4] & t34); \ + s2[23] = t34 ^ (b[4] | b[0]); \ + s2[24] = b[4] ^ (b[1] & (b[0] ^ b[1])); \ +} \ +while (0) +#endif /* SHA3_BY_SPEC */ + +/* The block operation performed on the state. + * + * s The state. + */ +static void BlockSha3(word64 *s) +{ + word64 n[25]; + word64 b[5]; + word64 t0; +#ifndef SHA3_BY_SPEC + word64 t1; +#endif + byte i; + + for (i = 0; i < 24; i += 2) + { + COL_MIX(s, b, x, t0); + ROW_MIX(n, s, b, t0, t1); + n[0] ^= hash_keccak_r[i]; + + COL_MIX(n, b, x, t0); + ROW_MIX(s, n, b, t0, t1); + s[0] ^= hash_keccak_r[i+1]; + } +} +#endif /* WOLFSSL_SHA3_SMALL */ + +/* Convert the array of bytes, in little-endian order, to a 64-bit integer. + * + * a Array of bytes. + * returns a 64-bit integer. + */ +static word64 Load64BitBigEndian(const byte* a) +{ +#ifdef BIG_ENDIAN_ORDER + word64 n = 0; + int i; + + for (i = 0; i < 8; i++) + n |= (word64)a[i] << (8 * i); + + return n; +#else + return *(word64*)a; +#endif +} + +/* Initialize the state for a SHA3-224 hash operation. + * + * sha3 wc_Sha3 object holding state. + * returns 0 on success. + */ +static int InitSha3(wc_Sha3* sha3) +{ + int i; + + for (i = 0; i < 25; i++) + sha3->s[i] = 0; + sha3->i = 0; +#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB) + sha3->flags = 0; +#endif + + return 0; +} + +/* Update the SHA-3 hash state with message data. + * + * sha3 wc_Sha3 object holding state. + * data Message data to be hashed. + * len Length of the message data. + * p Number of 64-bit numbers in a block of data to process. + * returns 0 on success. + */ +static int Sha3Update(wc_Sha3* sha3, const byte* data, word32 len, byte p) +{ + byte i; + byte l; + byte *t; + + if (sha3->i > 0) + { + l = p * 8 - sha3->i; + if (l > len) { + l = (byte)len; + } + + t = &sha3->t[sha3->i]; + for (i = 0; i < l; i++) + t[i] = data[i]; + data += i; + len -= i; + sha3->i += i; + + if (sha3->i == p * 8) + { + for (i = 0; i < p; i++) + sha3->s[i] ^= Load64BitBigEndian(sha3->t + 8 * i); + BlockSha3(sha3->s); + sha3->i = 0; + } + } + while (len >= ((word32)(p * 8))) + { + for (i = 0; i < p; i++) + sha3->s[i] ^= Load64BitBigEndian(data + 8 * i); + BlockSha3(sha3->s); + len -= p * 8; + data += p * 8; + } + for (i = 0; i < len; i++) + sha3->t[i] = data[i]; + sha3->i += i; + + return 0; +} + +/* Calculate the SHA-3 hash based on all the message data seen. + * + * sha3 wc_Sha3 object holding state. + * hash Buffer to hold the hash result. + * p Number of 64-bit numbers in a block of data to process. + * len Number of bytes in output. + * returns 0 on success. + */ +static int Sha3Final(wc_Sha3* sha3, byte padChar, byte* hash, byte p, byte l) +{ + byte i; + byte *s8 = (byte *)sha3->s; + + sha3->t[p * 8 - 1] = 0x00; +#ifdef WOLFSSL_HASH_FLAGS + if (p == WC_SHA3_256_COUNT && sha3->flags & WC_HASH_SHA3_KECCAK256) { + padChar = 0x01; + } +#endif + sha3->t[ sha3->i] = padChar; + sha3->t[p * 8 - 1] |= 0x80; + for (i=sha3->i + 1; i < p * 8 - 1; i++) + sha3->t[i] = 0; + for (i = 0; i < p; i++) + sha3->s[i] ^= Load64BitBigEndian(sha3->t + 8 * i); + BlockSha3(sha3->s); +#if defined(BIG_ENDIAN_ORDER) + ByteReverseWords64(sha3->s, sha3->s, ((l+7)/8)*8); +#endif + for (i = 0; i < l; i++) + hash[i] = s8[i]; + + return 0; +} + +/* Initialize the state for a SHA-3 hash operation. + * + * sha3 wc_Sha3 object holding state. + * heap Heap reference for dynamic memory allocation. (Used in async ops.) + * devId Device identifier for asynchronous operation. + * returns 0 on success. + */ +static int wc_InitSha3(wc_Sha3* sha3, void* heap, int devId) +{ + int ret = 0; + + if (sha3 == NULL) + return BAD_FUNC_ARG; + + sha3->heap = heap; + ret = InitSha3(sha3); + if (ret != 0) + return ret; + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA3) + ret = wolfAsync_DevCtxInit(&sha3->asyncDev, + WOLFSSL_ASYNC_MARKER_SHA3, sha3->heap, devId); +#else + (void)devId; +#endif /* WOLFSSL_ASYNC_CRYPT */ + + return ret; +} + +/* Update the SHA-3 hash state with message data. + * + * sha3 wc_Sha3 object holding state. + * data Message data to be hashed. + * len Length of the message data. + * p Number of 64-bit numbers in a block of data to process. + * returns 0 on success. + */ +static int wc_Sha3Update(wc_Sha3* sha3, const byte* data, word32 len, byte p) +{ + int ret; + + if (sha3 == NULL || (data == NULL && len > 0)) { + return BAD_FUNC_ARG; + } + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA3) + if (sha3->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA3) { + #if defined(HAVE_INTEL_QA) && defined(QAT_V2) + /* QAT only supports SHA3_256 */ + if (p == WC_SHA3_256_COUNT) { + ret = IntelQaSymSha3(&sha3->asyncDev, NULL, data, len); + if (ret != NOT_COMPILED_IN) + return ret; + /* fall-through when unavailable */ + } + #endif + } +#endif /* WOLFSSL_ASYNC_CRYPT */ + + ret = Sha3Update(sha3, data, len, p); + + return ret; +} + +/* Calculate the SHA-3 hash based on all the message data seen. + * + * sha3 wc_Sha3 object holding state. + * hash Buffer to hold the hash result. + * p Number of 64-bit numbers in a block of data to process. + * len Number of bytes in output. + * returns 0 on success. + */ +static int wc_Sha3Final(wc_Sha3* sha3, byte* hash, byte p, byte len) +{ + int ret; + + if (sha3 == NULL || hash == NULL) { + return BAD_FUNC_ARG; + } + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA3) + if (sha3->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA3) { + #if defined(HAVE_INTEL_QA) && defined(QAT_V2) + /* QAT only supports SHA3_256 */ + /* QAT SHA-3 only supported on v2 (8970 or later cards) */ + if (len == WC_SHA3_256_DIGEST_SIZE) { + ret = IntelQaSymSha3(&sha3->asyncDev, hash, NULL, len); + if (ret != NOT_COMPILED_IN) + return ret; + /* fall-through when unavailable */ + } + #endif + } +#endif /* WOLFSSL_ASYNC_CRYPT */ + + ret = Sha3Final(sha3, 0x06, hash, p, len); + if (ret != 0) + return ret; + + return InitSha3(sha3); /* reset state */ +} + +/* Dispose of any dynamically allocated data from the SHA3-384 operation. + * (Required for async ops.) + * + * sha3 wc_Sha3 object holding state. + * returns 0 on success. + */ +static void wc_Sha3Free(wc_Sha3* sha3) +{ + (void)sha3; + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA3) + if (sha3 == NULL) + return; + + wolfAsync_DevCtxFree(&sha3->asyncDev, WOLFSSL_ASYNC_MARKER_SHA3); +#endif /* WOLFSSL_ASYNC_CRYPT */ +} + + +/* Copy the state of the SHA3 operation. + * + * src wc_Sha3 object holding state top copy. + * dst wc_Sha3 object to copy into. + * returns 0 on success. + */ +static int wc_Sha3Copy(wc_Sha3* src, wc_Sha3* dst) +{ + int ret = 0; + + if (src == NULL || dst == NULL) + return BAD_FUNC_ARG; + + XMEMCPY(dst, src, sizeof(wc_Sha3)); + +#ifdef WOLFSSL_ASYNC_CRYPT + ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev); +#endif +#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB) + dst->flags |= WC_HASH_FLAG_ISCOPY; +#endif + + return ret; +} + +/* Calculate the SHA3-224 hash based on all the message data so far. + * More message data can be added, after this operation, using the current + * state. + * + * sha3 wc_Sha3 object holding state. + * hash Buffer to hold the hash result. Must be at least 28 bytes. + * p Number of 64-bit numbers in a block of data to process. + * len Number of bytes in output. + * returns 0 on success. + */ +static int wc_Sha3GetHash(wc_Sha3* sha3, byte* hash, byte p, byte len) +{ + int ret; + wc_Sha3 tmpSha3; + + if (sha3 == NULL || hash == NULL) + return BAD_FUNC_ARG; + + ret = wc_Sha3Copy(sha3, &tmpSha3); + if (ret == 0) { + ret = wc_Sha3Final(&tmpSha3, hash, p, len); + } + return ret; +} + + +/* Initialize the state for a SHA3-224 hash operation. + * + * sha3 wc_Sha3 object holding state. + * heap Heap reference for dynamic memory allocation. (Used in async ops.) + * devId Device identifier for asynchronous operation. + * returns 0 on success. + */ +int wc_InitSha3_224(wc_Sha3* sha3, void* heap, int devId) +{ + return wc_InitSha3(sha3, heap, devId); +} + +/* Update the SHA3-224 hash state with message data. + * + * sha3 wc_Sha3 object holding state. + * data Message data to be hashed. + * len Length of the message data. + * returns 0 on success. + */ +int wc_Sha3_224_Update(wc_Sha3* sha3, const byte* data, word32 len) +{ + return wc_Sha3Update(sha3, data, len, WC_SHA3_224_COUNT); +} + +/* Calculate the SHA3-224 hash based on all the message data seen. + * The state is initialized ready for a new message to hash. + * + * sha3 wc_Sha3 object holding state. + * hash Buffer to hold the hash result. Must be at least 28 bytes. + * returns 0 on success. + */ +int wc_Sha3_224_Final(wc_Sha3* sha3, byte* hash) +{ + return wc_Sha3Final(sha3, hash, WC_SHA3_224_COUNT, WC_SHA3_224_DIGEST_SIZE); +} + +/* Dispose of any dynamically allocated data from the SHA3-224 operation. + * (Required for async ops.) + * + * sha3 wc_Sha3 object holding state. + * returns 0 on success. + */ +void wc_Sha3_224_Free(wc_Sha3* sha3) +{ + wc_Sha3Free(sha3); +} + +/* Calculate the SHA3-224 hash based on all the message data so far. + * More message data can be added, after this operation, using the current + * state. + * + * sha3 wc_Sha3 object holding state. + * hash Buffer to hold the hash result. Must be at least 28 bytes. + * returns 0 on success. + */ +int wc_Sha3_224_GetHash(wc_Sha3* sha3, byte* hash) +{ + return wc_Sha3GetHash(sha3, hash, WC_SHA3_224_COUNT, WC_SHA3_224_DIGEST_SIZE); +} + +/* Copy the state of the SHA3-224 operation. + * + * src wc_Sha3 object holding state top copy. + * dst wc_Sha3 object to copy into. + * returns 0 on success. + */ +int wc_Sha3_224_Copy(wc_Sha3* src, wc_Sha3* dst) +{ + return wc_Sha3Copy(src, dst); +} + + +/* Initialize the state for a SHA3-256 hash operation. + * + * sha3 wc_Sha3 object holding state. + * heap Heap reference for dynamic memory allocation. (Used in async ops.) + * devId Device identifier for asynchronous operation. + * returns 0 on success. + */ +int wc_InitSha3_256(wc_Sha3* sha3, void* heap, int devId) +{ + return wc_InitSha3(sha3, heap, devId); +} + +/* Update the SHA3-256 hash state with message data. + * + * sha3 wc_Sha3 object holding state. + * data Message data to be hashed. + * len Length of the message data. + * returns 0 on success. + */ +int wc_Sha3_256_Update(wc_Sha3* sha3, const byte* data, word32 len) +{ + return wc_Sha3Update(sha3, data, len, WC_SHA3_256_COUNT); +} + +/* Calculate the SHA3-256 hash based on all the message data seen. + * The state is initialized ready for a new message to hash. + * + * sha3 wc_Sha3 object holding state. + * hash Buffer to hold the hash result. Must be at least 32 bytes. + * returns 0 on success. + */ +int wc_Sha3_256_Final(wc_Sha3* sha3, byte* hash) +{ + return wc_Sha3Final(sha3, hash, WC_SHA3_256_COUNT, WC_SHA3_256_DIGEST_SIZE); +} + +/* Dispose of any dynamically allocated data from the SHA3-256 operation. + * (Required for async ops.) + * + * sha3 wc_Sha3 object holding state. + * returns 0 on success. + */ +void wc_Sha3_256_Free(wc_Sha3* sha3) +{ + wc_Sha3Free(sha3); +} + +/* Calculate the SHA3-256 hash based on all the message data so far. + * More message data can be added, after this operation, using the current + * state. + * + * sha3 wc_Sha3 object holding state. + * hash Buffer to hold the hash result. Must be at least 32 bytes. + * returns 0 on success. + */ +int wc_Sha3_256_GetHash(wc_Sha3* sha3, byte* hash) +{ + return wc_Sha3GetHash(sha3, hash, WC_SHA3_256_COUNT, WC_SHA3_256_DIGEST_SIZE); +} + +/* Copy the state of the SHA3-256 operation. + * + * src wc_Sha3 object holding state top copy. + * dst wc_Sha3 object to copy into. + * returns 0 on success. + */ +int wc_Sha3_256_Copy(wc_Sha3* src, wc_Sha3* dst) +{ + return wc_Sha3Copy(src, dst); +} + + +/* Initialize the state for a SHA3-384 hash operation. + * + * sha3 wc_Sha3 object holding state. + * heap Heap reference for dynamic memory allocation. (Used in async ops.) + * devId Device identifier for asynchronous operation. + * returns 0 on success. + */ +int wc_InitSha3_384(wc_Sha3* sha3, void* heap, int devId) +{ + return wc_InitSha3(sha3, heap, devId); +} + +/* Update the SHA3-384 hash state with message data. + * + * sha3 wc_Sha3 object holding state. + * data Message data to be hashed. + * len Length of the message data. + * returns 0 on success. + */ +int wc_Sha3_384_Update(wc_Sha3* sha3, const byte* data, word32 len) +{ + return wc_Sha3Update(sha3, data, len, WC_SHA3_384_COUNT); +} + +/* Calculate the SHA3-384 hash based on all the message data seen. + * The state is initialized ready for a new message to hash. + * + * sha3 wc_Sha3 object holding state. + * hash Buffer to hold the hash result. Must be at least 48 bytes. + * returns 0 on success. + */ +int wc_Sha3_384_Final(wc_Sha3* sha3, byte* hash) +{ + return wc_Sha3Final(sha3, hash, WC_SHA3_384_COUNT, WC_SHA3_384_DIGEST_SIZE); +} + +/* Dispose of any dynamically allocated data from the SHA3-384 operation. + * (Required for async ops.) + * + * sha3 wc_Sha3 object holding state. + * returns 0 on success. + */ +void wc_Sha3_384_Free(wc_Sha3* sha3) +{ + wc_Sha3Free(sha3); +} + +/* Calculate the SHA3-384 hash based on all the message data so far. + * More message data can be added, after this operation, using the current + * state. + * + * sha3 wc_Sha3 object holding state. + * hash Buffer to hold the hash result. Must be at least 48 bytes. + * returns 0 on success. + */ +int wc_Sha3_384_GetHash(wc_Sha3* sha3, byte* hash) +{ + return wc_Sha3GetHash(sha3, hash, WC_SHA3_384_COUNT, WC_SHA3_384_DIGEST_SIZE); +} + +/* Copy the state of the SHA3-384 operation. + * + * src wc_Sha3 object holding state top copy. + * dst wc_Sha3 object to copy into. + * returns 0 on success. + */ +int wc_Sha3_384_Copy(wc_Sha3* src, wc_Sha3* dst) +{ + return wc_Sha3Copy(src, dst); +} + + +/* Initialize the state for a SHA3-512 hash operation. + * + * sha3 wc_Sha3 object holding state. + * heap Heap reference for dynamic memory allocation. (Used in async ops.) + * devId Device identifier for asynchronous operation. + * returns 0 on success. + */ +int wc_InitSha3_512(wc_Sha3* sha3, void* heap, int devId) +{ + return wc_InitSha3(sha3, heap, devId); +} + +/* Update the SHA3-512 hash state with message data. + * + * sha3 wc_Sha3 object holding state. + * data Message data to be hashed. + * len Length of the message data. + * returns 0 on success. + */ +int wc_Sha3_512_Update(wc_Sha3* sha3, const byte* data, word32 len) +{ + return wc_Sha3Update(sha3, data, len, WC_SHA3_512_COUNT); +} + +/* Calculate the SHA3-512 hash based on all the message data seen. + * The state is initialized ready for a new message to hash. + * + * sha3 wc_Sha3 object holding state. + * hash Buffer to hold the hash result. Must be at least 64 bytes. + * returns 0 on success. + */ +int wc_Sha3_512_Final(wc_Sha3* sha3, byte* hash) +{ + return wc_Sha3Final(sha3, hash, WC_SHA3_512_COUNT, WC_SHA3_512_DIGEST_SIZE); +} + +/* Dispose of any dynamically allocated data from the SHA3-512 operation. + * (Required for async ops.) + * + * sha3 wc_Sha3 object holding state. + * returns 0 on success. + */ +void wc_Sha3_512_Free(wc_Sha3* sha3) +{ + wc_Sha3Free(sha3); +} + +/* Calculate the SHA3-512 hash based on all the message data so far. + * More message data can be added, after this operation, using the current + * state. + * + * sha3 wc_Sha3 object holding state. + * hash Buffer to hold the hash result. Must be at least 64 bytes. + * returns 0 on success. + */ +int wc_Sha3_512_GetHash(wc_Sha3* sha3, byte* hash) +{ + return wc_Sha3GetHash(sha3, hash, WC_SHA3_512_COUNT, WC_SHA3_512_DIGEST_SIZE); +} + +/* Copy the state of the SHA3-512 operation. + * + * src wc_Sha3 object holding state top copy. + * dst wc_Sha3 object to copy into. + * returns 0 on success. + */ +int wc_Sha3_512_Copy(wc_Sha3* src, wc_Sha3* dst) +{ + return wc_Sha3Copy(src, dst); +} + +#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB) +int wc_Sha3_SetFlags(wc_Sha3* sha3, word32 flags) +{ + if (sha3) { + sha3->flags = flags; + } + return 0; +} +int wc_Sha3_GetFlags(wc_Sha3* sha3, word32* flags) +{ + if (sha3 && flags) { + *flags = sha3->flags; + } + return 0; +} +#endif + +#if defined(WOLFSSL_SHAKE256) +/* Initialize the state for a Shake256 hash operation. + * + * shake wc_Shake object holding state. + * heap Heap reference for dynamic memory allocation. (Used in async ops.) + * devId Device identifier for asynchronous operation. + * returns 0 on success. + */ +int wc_InitShake256(wc_Shake* shake, void* heap, int devId) +{ + return wc_InitSha3(shake, heap, devId); +} + +/* Update the SHAKE256 hash state with message data. + * + * shake wc_Shake object holding state. + * data Message data to be hashed. + * len Length of the message data. + * returns 0 on success. + */ +int wc_Shake256_Update(wc_Shake* shake, const byte* data, word32 len) +{ + if (shake == NULL || (data == NULL && len > 0)) { + return BAD_FUNC_ARG; + } + + return Sha3Update(shake, data, len, WC_SHA3_256_COUNT); +} + +/* Calculate the SHAKE256 hash based on all the message data seen. + * The state is initialized ready for a new message to hash. + * + * shake wc_Shake object holding state. + * hash Buffer to hold the hash result. Must be at least 64 bytes. + * returns 0 on success. + */ +int wc_Shake256_Final(wc_Shake* shake, byte* hash, word32 hashLen) +{ + int ret; + + if (shake == NULL || hash == NULL) { + return BAD_FUNC_ARG; + } + + ret = Sha3Final(shake, 0x1f, hash, WC_SHA3_256_COUNT, hashLen); + if (ret != 0) + return ret; + + return InitSha3(shake); /* reset state */ +} + +/* Dispose of any dynamically allocated data from the SHAKE256 operation. + * (Required for async ops.) + * + * shake wc_Shake object holding state. + * returns 0 on success. + */ +void wc_Shake256_Free(wc_Shake* shake) +{ + wc_Sha3Free(shake); +} + +/* Copy the state of the SHA3-512 operation. + * + * src wc_Shake object holding state top copy. + * dst wc_Shake object to copy into. + * returns 0 on success. + */ +int wc_Shake256_Copy(wc_Shake* src, wc_Shake* dst) +{ + return wc_Sha3Copy(src, dst); +} +#endif + +#endif /* WOLFSSL_SHA3 */ diff --git a/client/wolfssl/wolfcrypt/src/sha512.c b/client/wolfssl/wolfcrypt/src/sha512.c new file mode 100644 index 0000000..0a648bf --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/sha512.c @@ -0,0 +1,1225 @@ +/* sha512.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#if (defined(WOLFSSL_SHA512) || defined(WOLFSSL_SHA384)) && !defined(WOLFSSL_ARMASM) + +#if defined(HAVE_FIPS) && \ + defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2) + + /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */ + #define FIPS_NO_WRAPPERS + + #ifdef USE_WINDOWS_API + #pragma code_seg(".fipsA$k") + #pragma const_seg(".fipsB$k") + #endif +#endif + +#include +#include +#include +#include + +/* deprecated USE_SLOW_SHA2 (replaced with USE_SLOW_SHA512) */ +#if defined(USE_SLOW_SHA2) && !defined(USE_SLOW_SHA512) + #define USE_SLOW_SHA512 +#endif + +/* fips wrapper calls, user can call direct */ +#if defined(HAVE_FIPS) && \ + (!defined(HAVE_FIPS_VERSION) || (HAVE_FIPS_VERSION < 2)) + + #ifdef WOLFSSL_SHA512 + + int wc_InitSha512(wc_Sha512* sha) + { + if (sha == NULL) { + return BAD_FUNC_ARG; + } + + return InitSha512_fips(sha); + } + int wc_InitSha512_ex(wc_Sha512* sha, void* heap, int devId) + { + (void)heap; + (void)devId; + if (sha == NULL) { + return BAD_FUNC_ARG; + } + return InitSha512_fips(sha); + } + int wc_Sha512Update(wc_Sha512* sha, const byte* data, word32 len) + { + if (sha == NULL || (data == NULL && len > 0)) { + return BAD_FUNC_ARG; + } + + return Sha512Update_fips(sha, data, len); + } + int wc_Sha512Final(wc_Sha512* sha, byte* out) + { + if (sha == NULL || out == NULL) { + return BAD_FUNC_ARG; + } + + return Sha512Final_fips(sha, out); + } + void wc_Sha512Free(wc_Sha512* sha) + { + (void)sha; + /* Not supported in FIPS */ + } + #endif + + #if defined(WOLFSSL_SHA384) || defined(HAVE_AESGCM) + int wc_InitSha384(wc_Sha384* sha) + { + if (sha == NULL) { + return BAD_FUNC_ARG; + } + return InitSha384_fips(sha); + } + int wc_InitSha384_ex(wc_Sha384* sha, void* heap, int devId) + { + (void)heap; + (void)devId; + if (sha == NULL) { + return BAD_FUNC_ARG; + } + return InitSha384_fips(sha); + } + int wc_Sha384Update(wc_Sha384* sha, const byte* data, word32 len) + { + if (sha == NULL || (data == NULL && len > 0)) { + return BAD_FUNC_ARG; + } + return Sha384Update_fips(sha, data, len); + } + int wc_Sha384Final(wc_Sha384* sha, byte* out) + { + if (sha == NULL || out == NULL) { + return BAD_FUNC_ARG; + } + return Sha384Final_fips(sha, out); + } + void wc_Sha384Free(wc_Sha384* sha) + { + (void)sha; + /* Not supported in FIPS */ + } + #endif /* WOLFSSL_SHA384 || HAVE_AESGCM */ + +#else /* else build without fips, or for FIPS v2 */ + +#include + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + + +#if defined(USE_INTEL_SPEEDUP) + #if defined(__GNUC__) && ((__GNUC__ < 4) || \ + (__GNUC__ == 4 && __GNUC_MINOR__ <= 8)) + #undef NO_AVX2_SUPPORT + #define NO_AVX2_SUPPORT + #endif + #if defined(__clang__) && ((__clang_major__ < 3) || \ + (__clang_major__ == 3 && __clang_minor__ <= 5)) + #define NO_AVX2_SUPPORT + #elif defined(__clang__) && defined(NO_AVX2_SUPPORT) + #undef NO_AVX2_SUPPORT + #endif + + #define HAVE_INTEL_AVX1 + #ifndef NO_AVX2_SUPPORT + #define HAVE_INTEL_AVX2 + #endif +#endif + +#if defined(HAVE_INTEL_AVX1) + /* #define DEBUG_XMM */ +#endif + +#if defined(HAVE_INTEL_AVX2) + #define HAVE_INTEL_RORX + /* #define DEBUG_YMM */ +#endif + +#if defined(HAVE_BYTEREVERSE64) && \ + !defined(HAVE_INTEL_AVX1) && !defined(HAVE_INTEL_AVX2) + #define ByteReverseWords64(out, in, size) ByteReverseWords64_1(out, size) + #define ByteReverseWords64_1(buf, size) \ + { unsigned int i ;\ + for(i=0; i< size/sizeof(word64); i++){\ + __asm__ volatile("bswapq %0":"+r"(buf[i])::) ;\ + }\ + } +#endif + +#if defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_HASH) + /* functions defined in wolfcrypt/src/port/caam/caam_sha.c */ + +#else + +#ifdef WOLFSSL_SHA512 + +static int InitSha512(wc_Sha512* sha512) +{ + if (sha512 == NULL) + return BAD_FUNC_ARG; + + sha512->digest[0] = W64LIT(0x6a09e667f3bcc908); + sha512->digest[1] = W64LIT(0xbb67ae8584caa73b); + sha512->digest[2] = W64LIT(0x3c6ef372fe94f82b); + sha512->digest[3] = W64LIT(0xa54ff53a5f1d36f1); + sha512->digest[4] = W64LIT(0x510e527fade682d1); + sha512->digest[5] = W64LIT(0x9b05688c2b3e6c1f); + sha512->digest[6] = W64LIT(0x1f83d9abfb41bd6b); + sha512->digest[7] = W64LIT(0x5be0cd19137e2179); + + sha512->buffLen = 0; + sha512->loLen = 0; + sha512->hiLen = 0; + +#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + + sha512->ctx.sha_type = SHA2_512; + /* always start firstblock = 1 when using hw engine */ + sha512->ctx.isfirstblock = 1; + if(sha512->ctx.mode == ESP32_SHA_HW) { + /* release hw */ + esp_sha_hw_unlock(); + } + /* always set mode as INIT + * whether using HW or SW is determined at first call of update() + */ + sha512->ctx.mode = ESP32_SHA_INIT; +#endif +#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB) + sha512->flags = 0; +#endif + return 0; +} + +#endif /* WOLFSSL_SHA512 */ + +/* Hardware Acceleration */ +#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) + +#ifdef WOLFSSL_SHA512 + + /***** + Intel AVX1/AVX2 Macro Control Structure + + #if defined(HAVE_INteL_SPEEDUP) + #define HAVE_INTEL_AVX1 + #define HAVE_INTEL_AVX2 + #endif + + int InitSha512(wc_Sha512* sha512) { + Save/Recover XMM, YMM + ... + + Check Intel AVX cpuid flags + } + + #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2) + Transform_Sha512_AVX1(); # Function prototype + Transform_Sha512_AVX2(); # + #endif + + _Transform_Sha512() { # Native Transform Function body + + } + + int Sha512Update() { + Save/Recover XMM, YMM + ... + } + + int Sha512Final() { + Save/Recover XMM, YMM + ... + } + + + #if defined(HAVE_INTEL_AVX1) + + XMM Instructions/INLINE asm Definitions + + #endif + + #if defined(HAVE_INTEL_AVX2) + + YMM Instructions/INLINE asm Definitions + + #endif + + #if defnied(HAVE_INTEL_AVX1) + + int Transform_Sha512_AVX1() { + Stitched Message Sched/Round + } + + #endif + + #if defnied(HAVE_INTEL_AVX2) + + int Transform_Sha512_AVX2() { + Stitched Message Sched/Round + } + #endif + + */ + + + /* Each platform needs to query info type 1 from cpuid to see if aesni is + * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts + */ + +#ifdef __cplusplus + extern "C" { +#endif + + #if defined(HAVE_INTEL_AVX1) + extern int Transform_Sha512_AVX1(wc_Sha512 *sha512); + extern int Transform_Sha512_AVX1_Len(wc_Sha512 *sha512, word32 len); + #endif + #if defined(HAVE_INTEL_AVX2) + extern int Transform_Sha512_AVX2(wc_Sha512 *sha512); + extern int Transform_Sha512_AVX2_Len(wc_Sha512 *sha512, word32 len); + #if defined(HAVE_INTEL_RORX) + extern int Transform_Sha512_AVX1_RORX(wc_Sha512 *sha512); + extern int Transform_Sha512_AVX1_RORX_Len(wc_Sha512 *sha512, + word32 len); + extern int Transform_Sha512_AVX2_RORX(wc_Sha512 *sha512); + extern int Transform_Sha512_AVX2_RORX_Len(wc_Sha512 *sha512, + word32 len); + #endif + #endif + +#ifdef __cplusplus + } /* extern "C" */ +#endif + + static int _Transform_Sha512(wc_Sha512 *sha512); + static int (*Transform_Sha512_p)(wc_Sha512* sha512) = _Transform_Sha512; + static int (*Transform_Sha512_Len_p)(wc_Sha512* sha512, word32 len) = NULL; + static int transform_check = 0; + static int intel_flags; + #define Transform_Sha512(sha512) (*Transform_Sha512_p)(sha512) + #define Transform_Sha512_Len(sha512, len) \ + (*Transform_Sha512_Len_p)(sha512, len) + + static void Sha512_SetTransform() + { + if (transform_check) + return; + + intel_flags = cpuid_get_flags(); + + #if defined(HAVE_INTEL_AVX2) + if (IS_INTEL_AVX2(intel_flags)) { + #ifdef HAVE_INTEL_RORX + if (IS_INTEL_BMI2(intel_flags)) { + Transform_Sha512_p = Transform_Sha512_AVX2_RORX; + Transform_Sha512_Len_p = Transform_Sha512_AVX2_RORX_Len; + } + else + #endif + if (1) { + Transform_Sha512_p = Transform_Sha512_AVX2; + Transform_Sha512_Len_p = Transform_Sha512_AVX2_Len; + } + #ifdef HAVE_INTEL_RORX + else { + Transform_Sha512_p = Transform_Sha512_AVX1_RORX; + Transform_Sha512_Len_p = Transform_Sha512_AVX1_RORX_Len; + } + #endif + } + else + #endif + #if defined(HAVE_INTEL_AVX1) + if (IS_INTEL_AVX1(intel_flags)) { + Transform_Sha512_p = Transform_Sha512_AVX1; + Transform_Sha512_Len_p = Transform_Sha512_AVX1_Len; + } + else + #endif + Transform_Sha512_p = _Transform_Sha512; + + transform_check = 1; + } +#endif /* WOLFSSL_SHA512 */ + +#else + #define Transform_Sha512(sha512) _Transform_Sha512(sha512) + +#endif + +#ifdef WOLFSSL_SHA512 + +int wc_InitSha512_ex(wc_Sha512* sha512, void* heap, int devId) +{ + int ret = 0; + + if (sha512 == NULL) + return BAD_FUNC_ARG; + + sha512->heap = heap; + + ret = InitSha512(sha512); + if (ret != 0) + return ret; + +#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) + Sha512_SetTransform(); +#endif + +#ifdef WOLFSSL_SMALL_STACK_CACHE + sha512->W = NULL; +#endif + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA512) + ret = wolfAsync_DevCtxInit(&sha512->asyncDev, + WOLFSSL_ASYNC_MARKER_SHA512, sha512->heap, devId); +#else + (void)devId; +#endif /* WOLFSSL_ASYNC_CRYPT */ + + return ret; +} + +#endif /* WOLFSSL_SHA512 */ + + +static const word64 K512[80] = { + W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd), + W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc), + W64LIT(0x3956c25bf348b538), W64LIT(0x59f111f1b605d019), + W64LIT(0x923f82a4af194f9b), W64LIT(0xab1c5ed5da6d8118), + W64LIT(0xd807aa98a3030242), W64LIT(0x12835b0145706fbe), + W64LIT(0x243185be4ee4b28c), W64LIT(0x550c7dc3d5ffb4e2), + W64LIT(0x72be5d74f27b896f), W64LIT(0x80deb1fe3b1696b1), + W64LIT(0x9bdc06a725c71235), W64LIT(0xc19bf174cf692694), + W64LIT(0xe49b69c19ef14ad2), W64LIT(0xefbe4786384f25e3), + W64LIT(0x0fc19dc68b8cd5b5), W64LIT(0x240ca1cc77ac9c65), + W64LIT(0x2de92c6f592b0275), W64LIT(0x4a7484aa6ea6e483), + W64LIT(0x5cb0a9dcbd41fbd4), W64LIT(0x76f988da831153b5), + W64LIT(0x983e5152ee66dfab), W64LIT(0xa831c66d2db43210), + W64LIT(0xb00327c898fb213f), W64LIT(0xbf597fc7beef0ee4), + W64LIT(0xc6e00bf33da88fc2), W64LIT(0xd5a79147930aa725), + W64LIT(0x06ca6351e003826f), W64LIT(0x142929670a0e6e70), + W64LIT(0x27b70a8546d22ffc), W64LIT(0x2e1b21385c26c926), + W64LIT(0x4d2c6dfc5ac42aed), W64LIT(0x53380d139d95b3df), + W64LIT(0x650a73548baf63de), W64LIT(0x766a0abb3c77b2a8), + W64LIT(0x81c2c92e47edaee6), W64LIT(0x92722c851482353b), + W64LIT(0xa2bfe8a14cf10364), W64LIT(0xa81a664bbc423001), + W64LIT(0xc24b8b70d0f89791), W64LIT(0xc76c51a30654be30), + W64LIT(0xd192e819d6ef5218), W64LIT(0xd69906245565a910), + W64LIT(0xf40e35855771202a), W64LIT(0x106aa07032bbd1b8), + W64LIT(0x19a4c116b8d2d0c8), W64LIT(0x1e376c085141ab53), + W64LIT(0x2748774cdf8eeb99), W64LIT(0x34b0bcb5e19b48a8), + W64LIT(0x391c0cb3c5c95a63), W64LIT(0x4ed8aa4ae3418acb), + W64LIT(0x5b9cca4f7763e373), W64LIT(0x682e6ff3d6b2b8a3), + W64LIT(0x748f82ee5defb2fc), W64LIT(0x78a5636f43172f60), + W64LIT(0x84c87814a1f0ab72), W64LIT(0x8cc702081a6439ec), + W64LIT(0x90befffa23631e28), W64LIT(0xa4506cebde82bde9), + W64LIT(0xbef9a3f7b2c67915), W64LIT(0xc67178f2e372532b), + W64LIT(0xca273eceea26619c), W64LIT(0xd186b8c721c0c207), + W64LIT(0xeada7dd6cde0eb1e), W64LIT(0xf57d4f7fee6ed178), + W64LIT(0x06f067aa72176fba), W64LIT(0x0a637dc5a2c898a6), + W64LIT(0x113f9804bef90dae), W64LIT(0x1b710b35131c471b), + W64LIT(0x28db77f523047d84), W64LIT(0x32caab7b40c72493), + W64LIT(0x3c9ebe0a15c9bebc), W64LIT(0x431d67c49c100d4c), + W64LIT(0x4cc5d4becb3e42b6), W64LIT(0x597f299cfc657e2a), + W64LIT(0x5fcb6fab3ad6faec), W64LIT(0x6c44198c4a475817) +}; + +#define blk0(i) (W[i] = sha512->buffer[i]) + +#define blk2(i) (\ + W[ i & 15] += \ + s1(W[(i-2) & 15])+ \ + W[(i-7) & 15] + \ + s0(W[(i-15) & 15]) \ + ) + +#define Ch(x,y,z) (z ^ (x & (y ^ z))) +#define Maj(x,y,z) ((x & y) | (z & (x | y))) + +#define a(i) T[(0-i) & 7] +#define b(i) T[(1-i) & 7] +#define c(i) T[(2-i) & 7] +#define d(i) T[(3-i) & 7] +#define e(i) T[(4-i) & 7] +#define f(i) T[(5-i) & 7] +#define g(i) T[(6-i) & 7] +#define h(i) T[(7-i) & 7] + +#define S0(x) (rotrFixed64(x,28) ^ rotrFixed64(x,34) ^ rotrFixed64(x,39)) +#define S1(x) (rotrFixed64(x,14) ^ rotrFixed64(x,18) ^ rotrFixed64(x,41)) +#define s0(x) (rotrFixed64(x,1) ^ rotrFixed64(x,8) ^ (x>>7)) +#define s1(x) (rotrFixed64(x,19) ^ rotrFixed64(x,61) ^ (x>>6)) + +#define R(i) \ + h(i) += S1(e(i)) + Ch(e(i),f(i),g(i)) + K[i+j] + (j ? blk2(i) : blk0(i)); \ + d(i) += h(i); \ + h(i) += S0(a(i)) + Maj(a(i),b(i),c(i)) + +static int _Transform_Sha512(wc_Sha512* sha512) +{ + const word64* K = K512; + word32 j; + word64 T[8]; + +#ifdef WOLFSSL_SMALL_STACK_CACHE + word64* W = sha512->W; + if (W == NULL) { + W = (word64*) XMALLOC(sizeof(word64) * 16, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (W == NULL) + return MEMORY_E; + sha512->W = W; + } +#elif defined(WOLFSSL_SMALL_STACK) + word64* W; + W = (word64*) XMALLOC(sizeof(word64) * 16, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (W == NULL) + return MEMORY_E; +#else + word64 W[16]; +#endif + + /* Copy digest to working vars */ + XMEMCPY(T, sha512->digest, sizeof(T)); + +#ifdef USE_SLOW_SHA512 + /* over twice as small, but 50% slower */ + /* 80 operations, not unrolled */ + for (j = 0; j < 80; j += 16) { + int m; + for (m = 0; m < 16; m++) { /* braces needed here for macros {} */ + R(m); + } + } +#else + /* 80 operations, partially loop unrolled */ + for (j = 0; j < 80; j += 16) { + R( 0); R( 1); R( 2); R( 3); + R( 4); R( 5); R( 6); R( 7); + R( 8); R( 9); R(10); R(11); + R(12); R(13); R(14); R(15); + } +#endif /* USE_SLOW_SHA512 */ + + /* Add the working vars back into digest */ + sha512->digest[0] += a(0); + sha512->digest[1] += b(0); + sha512->digest[2] += c(0); + sha512->digest[3] += d(0); + sha512->digest[4] += e(0); + sha512->digest[5] += f(0); + sha512->digest[6] += g(0); + sha512->digest[7] += h(0); + + /* Wipe variables */ + ForceZero(W, sizeof(word64) * 16); + ForceZero(T, sizeof(T)); + +#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SMALL_STACK_CACHE) + XFREE(W, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return 0; +} + + +static WC_INLINE void AddLength(wc_Sha512* sha512, word32 len) +{ + word64 tmp = sha512->loLen; + if ( (sha512->loLen += len) < tmp) + sha512->hiLen++; /* carry low to high */ +} + +static WC_INLINE int Sha512Update(wc_Sha512* sha512, const byte* data, word32 len) +{ + int ret = 0; + /* do block size increments */ + byte* local = (byte*)sha512->buffer; + + /* check that internal buffLen is valid */ + if (sha512->buffLen >= WC_SHA512_BLOCK_SIZE) + return BUFFER_E; + + AddLength(sha512, len); + + if (sha512->buffLen > 0) { + word32 add = min(len, WC_SHA512_BLOCK_SIZE - sha512->buffLen); + if (add > 0) { + XMEMCPY(&local[sha512->buffLen], data, add); + + sha512->buffLen += add; + data += add; + len -= add; + } + + if (sha512->buffLen == WC_SHA512_BLOCK_SIZE) { + #if defined(LITTLE_ENDIAN_ORDER) + #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) + if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) + #endif + { + #if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \ + defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + ByteReverseWords64(sha512->buffer, sha512->buffer, + WC_SHA512_BLOCK_SIZE); + #endif + } + #endif + #if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \ + defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + ret = Transform_Sha512(sha512); + #else + if(sha512->ctx.mode == ESP32_SHA_INIT) { + esp_sha_try_hw_lock(&sha512->ctx); + } + ret = esp_sha512_process(sha512); + if(ret == 0 && sha512->ctx.mode == ESP32_SHA_SW){ + ret = Transform_Sha512(sha512); + } + #endif + if (ret == 0) + sha512->buffLen = 0; + else + len = 0; + } + } + +#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) + if (Transform_Sha512_Len_p != NULL) { + word32 blocksLen = len & ~(WC_SHA512_BLOCK_SIZE-1); + + if (blocksLen > 0) { + sha512->data = data; + /* Byte reversal performed in function if required. */ + Transform_Sha512_Len(sha512, blocksLen); + data += blocksLen; + len -= blocksLen; + } + } + else +#endif +#if !defined(LITTLE_ENDIAN_ORDER) || defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) + { + while (len >= WC_SHA512_BLOCK_SIZE) { + XMEMCPY(local, data, WC_SHA512_BLOCK_SIZE); + + data += WC_SHA512_BLOCK_SIZE; + len -= WC_SHA512_BLOCK_SIZE; + + #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) + if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) + { + ByteReverseWords64(sha512->buffer, sha512->buffer, + WC_SHA512_BLOCK_SIZE); + } + #endif + /* Byte reversal performed in function if required. */ + ret = Transform_Sha512(sha512); + if (ret != 0) + break; + } + } +#else + { + while (len >= WC_SHA512_BLOCK_SIZE) { + XMEMCPY(local, data, WC_SHA512_BLOCK_SIZE); + + data += WC_SHA512_BLOCK_SIZE; + len -= WC_SHA512_BLOCK_SIZE; + #if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \ + defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + ByteReverseWords64(sha512->buffer, sha512->buffer, + WC_SHA512_BLOCK_SIZE); + #endif + #if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \ + defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + ret = Transform_Sha512(sha512); + #else + if(sha512->ctx.mode == ESP32_SHA_INIT) { + esp_sha_try_hw_lock(&sha512->ctx); + } + ret = esp_sha512_process(sha512); + if(ret == 0 && sha512->ctx.mode == ESP32_SHA_SW){ + ret = Transform_Sha512(sha512); + } + #endif + if (ret != 0) + break; + } + } +#endif + + if (len > 0) { + XMEMCPY(local, data, len); + sha512->buffLen = len; + } + + return ret; +} + +#ifdef WOLFSSL_SHA512 + +int wc_Sha512Update(wc_Sha512* sha512, const byte* data, word32 len) +{ + if (sha512 == NULL || (data == NULL && len > 0)) { + return BAD_FUNC_ARG; + } + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA512) + if (sha512->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA512) { + #if defined(HAVE_INTEL_QA) + return IntelQaSymSha512(&sha512->asyncDev, NULL, data, len); + #endif + } +#endif /* WOLFSSL_ASYNC_CRYPT */ + + return Sha512Update(sha512, data, len); +} + +#endif /* WOLFSSL_SHA512 */ + +#endif /* WOLFSSL_IMX6_CAAM */ + +static WC_INLINE int Sha512Final(wc_Sha512* sha512) +{ + byte* local = (byte*)sha512->buffer; + int ret; + + if (sha512 == NULL) { + return BAD_FUNC_ARG; + } + + local[sha512->buffLen++] = 0x80; /* add 1 */ + + /* pad with zeros */ + if (sha512->buffLen > WC_SHA512_PAD_SIZE) { + XMEMSET(&local[sha512->buffLen], 0, WC_SHA512_BLOCK_SIZE - sha512->buffLen); + sha512->buffLen += WC_SHA512_BLOCK_SIZE - sha512->buffLen; +#if defined(LITTLE_ENDIAN_ORDER) + #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) + if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) + #endif + { + + #if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \ + defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + ByteReverseWords64(sha512->buffer,sha512->buffer, + WC_SHA512_BLOCK_SIZE); + #endif + } +#endif /* LITTLE_ENDIAN_ORDER */ +#if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \ + defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + ret = Transform_Sha512(sha512); +#else + if(sha512->ctx.mode == ESP32_SHA_INIT) { + esp_sha_try_hw_lock(&sha512->ctx); + } + ret = esp_sha512_process(sha512); + if(ret == 0 && sha512->ctx.mode == ESP32_SHA_SW){ + ret = Transform_Sha512(sha512); + } +#endif + if (ret != 0) + return ret; + + sha512->buffLen = 0; + } + XMEMSET(&local[sha512->buffLen], 0, WC_SHA512_PAD_SIZE - sha512->buffLen); + + /* put lengths in bits */ + sha512->hiLen = (sha512->loLen >> (8 * sizeof(sha512->loLen) - 3)) + + (sha512->hiLen << 3); + sha512->loLen = sha512->loLen << 3; + + /* store lengths */ +#if defined(LITTLE_ENDIAN_ORDER) + #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) + if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) + #endif + #if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \ + defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + ByteReverseWords64(sha512->buffer, sha512->buffer, WC_SHA512_PAD_SIZE); + #endif +#endif + /* ! length ordering dependent on digest endian type ! */ + +#if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \ + defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + sha512->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 2] = sha512->hiLen; + sha512->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 1] = sha512->loLen; +#endif + +#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) + if (IS_INTEL_AVX1(intel_flags) || IS_INTEL_AVX2(intel_flags)) + ByteReverseWords64(&(sha512->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 2]), + &(sha512->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 2]), + WC_SHA512_BLOCK_SIZE - WC_SHA512_PAD_SIZE); +#endif +#if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \ + defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + ret = Transform_Sha512(sha512); +#else + if(sha512->ctx.mode == ESP32_SHA_INIT) { + esp_sha_try_hw_lock(&sha512->ctx); + } + ret = esp_sha512_digest_process(sha512, 1); + if(ret == 0 && sha512->ctx.mode == ESP32_SHA_SW) { + ret = Transform_Sha512(sha512); + } +#endif + if (ret != 0) + return ret; + + #ifdef LITTLE_ENDIAN_ORDER + ByteReverseWords64(sha512->digest, sha512->digest, WC_SHA512_DIGEST_SIZE); + #endif + + return 0; +} + +#ifdef WOLFSSL_SHA512 + +int wc_Sha512FinalRaw(wc_Sha512* sha512, byte* hash) +{ +#ifdef LITTLE_ENDIAN_ORDER + word64 digest[WC_SHA512_DIGEST_SIZE / sizeof(word64)]; +#endif + + if (sha512 == NULL || hash == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef LITTLE_ENDIAN_ORDER + ByteReverseWords64((word64*)digest, (word64*)sha512->digest, + WC_SHA512_DIGEST_SIZE); + XMEMCPY(hash, digest, WC_SHA512_DIGEST_SIZE); +#else + XMEMCPY(hash, sha512->digest, WC_SHA512_DIGEST_SIZE); +#endif + + return 0; +} + +int wc_Sha512Final(wc_Sha512* sha512, byte* hash) +{ + int ret; + + if (sha512 == NULL || hash == NULL) { + return BAD_FUNC_ARG; + } + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA512) + if (sha512->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA512) { + #if defined(HAVE_INTEL_QA) + return IntelQaSymSha512(&sha512->asyncDev, hash, NULL, + WC_SHA512_DIGEST_SIZE); + #endif + } +#endif /* WOLFSSL_ASYNC_CRYPT */ + + ret = Sha512Final(sha512); + if (ret != 0) + return ret; + + XMEMCPY(hash, sha512->digest, WC_SHA512_DIGEST_SIZE); + + return InitSha512(sha512); /* reset state */ +} + +int wc_InitSha512(wc_Sha512* sha512) +{ + return wc_InitSha512_ex(sha512, NULL, INVALID_DEVID); +} + +void wc_Sha512Free(wc_Sha512* sha512) +{ + if (sha512 == NULL) + return; + +#ifdef WOLFSSL_SMALL_STACK_CACHE + if (sha512->W != NULL) { + XFREE(sha512->W, NULL, DYNAMIC_TYPE_TMP_BUFFER); + sha512->W = NULL; + } +#endif + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA512) + wolfAsync_DevCtxFree(&sha512->asyncDev, WOLFSSL_ASYNC_MARKER_SHA512); +#endif /* WOLFSSL_ASYNC_CRYPT */ +} + +#endif /* WOLFSSL_SHA512 */ + +/* -------------------------------------------------------------------------- */ +/* SHA384 */ +/* -------------------------------------------------------------------------- */ +#ifdef WOLFSSL_SHA384 + +#if defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_HASH) + /* functions defined in wolfcrypt/src/port/caam/caam_sha.c */ + +#else + +static int InitSha384(wc_Sha384* sha384) +{ + if (sha384 == NULL) { + return BAD_FUNC_ARG; + } + + sha384->digest[0] = W64LIT(0xcbbb9d5dc1059ed8); + sha384->digest[1] = W64LIT(0x629a292a367cd507); + sha384->digest[2] = W64LIT(0x9159015a3070dd17); + sha384->digest[3] = W64LIT(0x152fecd8f70e5939); + sha384->digest[4] = W64LIT(0x67332667ffc00b31); + sha384->digest[5] = W64LIT(0x8eb44a8768581511); + sha384->digest[6] = W64LIT(0xdb0c2e0d64f98fa7); + sha384->digest[7] = W64LIT(0x47b5481dbefa4fa4); + + sha384->buffLen = 0; + sha384->loLen = 0; + sha384->hiLen = 0; + +#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + sha384->ctx.sha_type = SHA2_384; + /* always start firstblock = 1 when using hw engine */ + sha384->ctx.isfirstblock = 1; + if(sha384->ctx.mode == ESP32_SHA_HW) { + /* release hw */ + esp_sha_hw_unlock(); + } + /* always set mode as INIT + * whether using HW or SW is determined at first call of update() + */ + sha384->ctx.mode = ESP32_SHA_INIT; + +#endif +#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB) + sha384->flags = 0; +#endif + + return 0; +} + +int wc_Sha384Update(wc_Sha384* sha384, const byte* data, word32 len) +{ + if (sha384 == NULL || (data == NULL && len > 0)) { + return BAD_FUNC_ARG; + } + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA384) + if (sha384->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA384) { + #if defined(HAVE_INTEL_QA) + return IntelQaSymSha384(&sha384->asyncDev, NULL, data, len); + #endif + } +#endif /* WOLFSSL_ASYNC_CRYPT */ + + return Sha512Update((wc_Sha512*)sha384, data, len); +} + + +int wc_Sha384FinalRaw(wc_Sha384* sha384, byte* hash) +{ +#ifdef LITTLE_ENDIAN_ORDER + word64 digest[WC_SHA384_DIGEST_SIZE / sizeof(word64)]; +#endif + + if (sha384 == NULL || hash == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef LITTLE_ENDIAN_ORDER + ByteReverseWords64((word64*)digest, (word64*)sha384->digest, + WC_SHA384_DIGEST_SIZE); + XMEMCPY(hash, digest, WC_SHA384_DIGEST_SIZE); +#else + XMEMCPY(hash, sha384->digest, WC_SHA384_DIGEST_SIZE); +#endif + + return 0; +} + +int wc_Sha384Final(wc_Sha384* sha384, byte* hash) +{ + int ret; + + if (sha384 == NULL || hash == NULL) { + return BAD_FUNC_ARG; + } + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA384) + if (sha384->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA384) { + #if defined(HAVE_INTEL_QA) + return IntelQaSymSha384(&sha384->asyncDev, hash, NULL, + WC_SHA384_DIGEST_SIZE); + #endif + } +#endif /* WOLFSSL_ASYNC_CRYPT */ + + ret = Sha512Final((wc_Sha512*)sha384); + if (ret != 0) + return ret; + + XMEMCPY(hash, sha384->digest, WC_SHA384_DIGEST_SIZE); + + return InitSha384(sha384); /* reset state */ +} + +int wc_InitSha384_ex(wc_Sha384* sha384, void* heap, int devId) +{ + int ret; + + if (sha384 == NULL) { + return BAD_FUNC_ARG; + } + + sha384->heap = heap; + ret = InitSha384(sha384); + if (ret != 0) + return ret; + +#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) + Sha512_SetTransform(); +#endif +#ifdef WOLFSSL_SMALL_STACK_CACHE + sha384->W = NULL; +#endif + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA384) + ret = wolfAsync_DevCtxInit(&sha384->asyncDev, WOLFSSL_ASYNC_MARKER_SHA384, + sha384->heap, devId); +#else + (void)devId; +#endif /* WOLFSSL_ASYNC_CRYPT */ + + return ret; +} + +#endif /* WOLFSSL_IMX6_CAAM */ + +int wc_InitSha384(wc_Sha384* sha384) +{ + return wc_InitSha384_ex(sha384, NULL, INVALID_DEVID); +} + +void wc_Sha384Free(wc_Sha384* sha384) +{ + if (sha384 == NULL) + return; + +#ifdef WOLFSSL_SMALL_STACK_CACHE + if (sha384->W != NULL) { + XFREE(sha384->W, NULL, DYNAMIC_TYPE_TMP_BUFFER); + sha384->W = NULL; + } +#endif + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA384) + wolfAsync_DevCtxFree(&sha384->asyncDev, WOLFSSL_ASYNC_MARKER_SHA384); +#endif /* WOLFSSL_ASYNC_CRYPT */ +} + +#endif /* WOLFSSL_SHA384 */ + +#endif /* HAVE_FIPS */ + +#ifdef WOLFSSL_SHA512 + +int wc_Sha512GetHash(wc_Sha512* sha512, byte* hash) +{ + int ret; + wc_Sha512 tmpSha512; + + if (sha512 == NULL || hash == NULL) + return BAD_FUNC_ARG; + +#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + if(sha512->ctx.mode == ESP32_SHA_INIT) { + esp_sha_try_hw_lock(&sha512->ctx); + } + if(sha512->ctx.mode != ESP32_SHA_SW) + esp_sha512_digest_process(sha512, 0); +#endif + + ret = wc_Sha512Copy(sha512, &tmpSha512); + if (ret == 0) { + ret = wc_Sha512Final(&tmpSha512, hash); +#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + sha512->ctx.mode = ESP32_SHA_SW;; +#endif + wc_Sha512Free(&tmpSha512); + } + return ret; +} + +int wc_Sha512Copy(wc_Sha512* src, wc_Sha512* dst) +{ + int ret = 0; + + if (src == NULL || dst == NULL) + return BAD_FUNC_ARG; + + XMEMCPY(dst, src, sizeof(wc_Sha512)); +#ifdef WOLFSSL_SMALL_STACK_CACHE + dst->W = NULL; +#endif + +#ifdef WOLFSSL_ASYNC_CRYPT + ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev); +#endif +#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + dst->ctx.mode = src->ctx.mode; + dst->ctx.isfirstblock = src->ctx.isfirstblock; + dst->ctx.sha_type = src->ctx.sha_type; +#endif +#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB) + dst->flags |= WC_HASH_FLAG_ISCOPY; +#endif + + return ret; +} + +#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB) +int wc_Sha512SetFlags(wc_Sha512* sha512, word32 flags) +{ + if (sha512) { + sha512->flags = flags; + } + return 0; +} +int wc_Sha512GetFlags(wc_Sha512* sha512, word32* flags) +{ + if (sha512 && flags) { + *flags = sha512->flags; + } + return 0; +} +#endif + +#endif /* WOLFSSL_SHA512 */ + +#ifdef WOLFSSL_SHA384 + +int wc_Sha384GetHash(wc_Sha384* sha384, byte* hash) +{ + int ret; + wc_Sha384 tmpSha384; + + if (sha384 == NULL || hash == NULL) + return BAD_FUNC_ARG; +#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + if(sha384->ctx.mode == ESP32_SHA_INIT) { + esp_sha_try_hw_lock(&sha384->ctx); + } + if(sha384->ctx.mode != ESP32_SHA_SW) { + esp_sha512_digest_process(sha384, 0); + } +#endif + ret = wc_Sha384Copy(sha384, &tmpSha384); + if (ret == 0) { + ret = wc_Sha384Final(&tmpSha384, hash); +#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + sha384->ctx.mode = ESP32_SHA_SW; +#endif + wc_Sha384Free(&tmpSha384); + } + return ret; +} +int wc_Sha384Copy(wc_Sha384* src, wc_Sha384* dst) +{ + int ret = 0; + + if (src == NULL || dst == NULL) + return BAD_FUNC_ARG; + + XMEMCPY(dst, src, sizeof(wc_Sha384)); +#ifdef WOLFSSL_SMALL_STACK_CACHE + dst->W = NULL; +#endif + +#ifdef WOLFSSL_ASYNC_CRYPT + ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev); +#endif +#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + dst->ctx.mode = src->ctx.mode; + dst->ctx.isfirstblock = src->ctx.isfirstblock; + dst->ctx.sha_type = src->ctx.sha_type; +#endif +#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB) + dst->flags |= WC_HASH_FLAG_ISCOPY; +#endif + + return ret; +} + +#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB) +int wc_Sha384SetFlags(wc_Sha384* sha384, word32 flags) +{ + if (sha384) { + sha384->flags = flags; + } + return 0; +} +int wc_Sha384GetFlags(wc_Sha384* sha384, word32* flags) +{ + if (sha384 && flags) { + *flags = sha384->flags; + } + return 0; +} +#endif + +#endif /* WOLFSSL_SHA384 */ + +#endif /* WOLFSSL_SHA512 || WOLFSSL_SHA384 */ diff --git a/client/wolfssl/wolfcrypt/src/sha512_asm.S b/client/wolfssl/wolfcrypt/src/sha512_asm.S new file mode 100644 index 0000000..6a27ce4 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/sha512_asm.S @@ -0,0 +1,10741 @@ +/* sha512_asm + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#ifndef HAVE_INTEL_AVX1 +#define HAVE_INTEL_AVX1 +#endif /* HAVE_INTEL_AVX1 */ +#ifndef NO_AVX2_SUPPORT +#define HAVE_INTEL_AVX2 +#endif /* NO_AVX2_SUPPORT */ + +#ifdef HAVE_INTEL_AVX1 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx1_sha512_k: +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0xfc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0x6ca6351e003826f,0x142929670a0e6e70 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0x6f067aa72176fba,0xa637dc5a2c898a6 +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx1_sha512_flip_mask: +.quad 0x1020304050607, 0x8090a0b0c0d0e0f +#ifndef __APPLE__ +.text +.globl Transform_Sha512_AVX1 +.type Transform_Sha512_AVX1,@function +.align 4 +Transform_Sha512_AVX1: +#else +.section __TEXT,__text +.globl _Transform_Sha512_AVX1 +.p2align 2 +_Transform_Sha512_AVX1: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $0x88, %rsp + leaq 64(%rdi), %rax + vmovdqa L_avx1_sha512_flip_mask(%rip), %xmm14 + movq (%rdi), %r8 + movq 8(%rdi), %r9 + movq 16(%rdi), %r10 + movq 24(%rdi), %r11 + movq 32(%rdi), %r12 + movq 40(%rdi), %r13 + movq 48(%rdi), %r14 + movq 56(%rdi), %r15 + vmovdqu (%rax), %xmm0 + vmovdqu 16(%rax), %xmm1 + vpshufb %xmm14, %xmm0, %xmm0 + vpshufb %xmm14, %xmm1, %xmm1 + vmovdqu 32(%rax), %xmm2 + vmovdqu 48(%rax), %xmm3 + vpshufb %xmm14, %xmm2, %xmm2 + vpshufb %xmm14, %xmm3, %xmm3 + vmovdqu 64(%rax), %xmm4 + vmovdqu 80(%rax), %xmm5 + vpshufb %xmm14, %xmm4, %xmm4 + vpshufb %xmm14, %xmm5, %xmm5 + vmovdqu 96(%rax), %xmm6 + vmovdqu 112(%rax), %xmm7 + vpshufb %xmm14, %xmm6, %xmm6 + vpshufb %xmm14, %xmm7, %xmm7 + movl $4, 128(%rsp) + leaq L_avx1_sha512_k(%rip), %rsi + movq %r9, %rbx + movq %r12, %rax + xorq %r10, %rbx + # Start of 16 rounds +L_sha256_len_avx1_start: + vpaddq (%rsi), %xmm0, %xmm8 + vpaddq 16(%rsi), %xmm1, %xmm9 + vmovdqu %xmm8, (%rsp) + vmovdqu %xmm9, 16(%rsp) + vpaddq 32(%rsi), %xmm2, %xmm8 + vpaddq 48(%rsi), %xmm3, %xmm9 + vmovdqu %xmm8, 32(%rsp) + vmovdqu %xmm9, 48(%rsp) + vpaddq 64(%rsi), %xmm4, %xmm8 + vpaddq 80(%rsi), %xmm5, %xmm9 + vmovdqu %xmm8, 64(%rsp) + vmovdqu %xmm9, 80(%rsp) + vpaddq 96(%rsi), %xmm6, %xmm8 + vpaddq 112(%rsi), %xmm7, %xmm9 + vmovdqu %xmm8, 96(%rsp) + vmovdqu %xmm9, 112(%rsp) + addq $0x80, %rsi + # msg_sched: 0-1 + # rnd_0: 0 - 0 + rorq $23, %rax + vpalignr $8, %xmm0, %xmm1, %xmm12 + vpalignr $8, %xmm4, %xmm5, %xmm13 + # rnd_0: 1 - 1 + movq %r8, %rdx + movq %r13, %rcx + addq (%rsp), %r15 + xorq %r14, %rcx + vpsrlq $0x01, %xmm12, %xmm8 + vpsllq $63, %xmm12, %xmm9 + # rnd_0: 2 - 3 + xorq %r12, %rax + andq %r12, %rcx + rorq $4, %rax + xorq %r14, %rcx + vpsrlq $8, %xmm12, %xmm10 + vpsllq $56, %xmm12, %xmm11 + # rnd_0: 4 - 5 + xorq %r12, %rax + addq %rcx, %r15 + rorq $14, %rax + xorq %r9, %rdx + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_0: 6 - 7 + addq %rax, %r15 + movq %r8, %rcx + andq %rdx, %rbx + rorq $5, %rcx + vpsrlq $7, %xmm12, %xmm11 + vpxor %xmm10, %xmm8, %xmm8 + # rnd_0: 8 - 9 + xorq %r8, %rcx + xorq %r9, %rbx + rorq $6, %rcx + addq %r15, %r11 + vpxor %xmm11, %xmm8, %xmm8 + vpaddq %xmm0, %xmm13, %xmm0 + # rnd_0: 10 - 11 + xorq %r8, %rcx + addq %rbx, %r15 + rorq $28, %rcx + movq %r11, %rax + addq %rcx, %r15 + # rnd_1: 0 - 0 + rorq $23, %rax + vpaddq %xmm0, %xmm8, %xmm0 + # rnd_1: 1 - 1 + movq %r15, %rbx + movq %r12, %rcx + addq 8(%rsp), %r14 + xorq %r13, %rcx + vpsrlq $19, %xmm7, %xmm8 + vpsllq $45, %xmm7, %xmm9 + # rnd_1: 2 - 3 + xorq %r11, %rax + andq %r11, %rcx + rorq $4, %rax + xorq %r13, %rcx + vpsrlq $61, %xmm7, %xmm10 + vpsllq $3, %xmm7, %xmm11 + # rnd_1: 4 - 6 + xorq %r11, %rax + addq %rcx, %r14 + rorq $14, %rax + xorq %r8, %rbx + addq %rax, %r14 + movq %r15, %rcx + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_1: 7 - 8 + andq %rbx, %rdx + rorq $5, %rcx + xorq %r15, %rcx + xorq %r8, %rdx + vpxor %xmm10, %xmm8, %xmm8 + vpsrlq $6, %xmm7, %xmm11 + # rnd_1: 9 - 10 + rorq $6, %rcx + addq %r14, %r10 + xorq %r15, %rcx + addq %rdx, %r14 + vpxor %xmm11, %xmm8, %xmm8 + # rnd_1: 11 - 11 + rorq $28, %rcx + movq %r10, %rax + addq %rcx, %r14 + vpaddq %xmm0, %xmm8, %xmm0 + # msg_sched done: 0-3 + # msg_sched: 2-3 + # rnd_0: 0 - 0 + rorq $23, %rax + vpalignr $8, %xmm1, %xmm2, %xmm12 + vpalignr $8, %xmm5, %xmm6, %xmm13 + # rnd_0: 1 - 1 + movq %r14, %rdx + movq %r11, %rcx + addq 16(%rsp), %r13 + xorq %r12, %rcx + vpsrlq $0x01, %xmm12, %xmm8 + vpsllq $63, %xmm12, %xmm9 + # rnd_0: 2 - 3 + xorq %r10, %rax + andq %r10, %rcx + rorq $4, %rax + xorq %r12, %rcx + vpsrlq $8, %xmm12, %xmm10 + vpsllq $56, %xmm12, %xmm11 + # rnd_0: 4 - 5 + xorq %r10, %rax + addq %rcx, %r13 + rorq $14, %rax + xorq %r15, %rdx + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_0: 6 - 7 + addq %rax, %r13 + movq %r14, %rcx + andq %rdx, %rbx + rorq $5, %rcx + vpsrlq $7, %xmm12, %xmm11 + vpxor %xmm10, %xmm8, %xmm8 + # rnd_0: 8 - 9 + xorq %r14, %rcx + xorq %r15, %rbx + rorq $6, %rcx + addq %r13, %r9 + vpxor %xmm11, %xmm8, %xmm8 + vpaddq %xmm1, %xmm13, %xmm1 + # rnd_0: 10 - 11 + xorq %r14, %rcx + addq %rbx, %r13 + rorq $28, %rcx + movq %r9, %rax + addq %rcx, %r13 + # rnd_1: 0 - 0 + rorq $23, %rax + vpaddq %xmm1, %xmm8, %xmm1 + # rnd_1: 1 - 1 + movq %r13, %rbx + movq %r10, %rcx + addq 24(%rsp), %r12 + xorq %r11, %rcx + vpsrlq $19, %xmm0, %xmm8 + vpsllq $45, %xmm0, %xmm9 + # rnd_1: 2 - 3 + xorq %r9, %rax + andq %r9, %rcx + rorq $4, %rax + xorq %r11, %rcx + vpsrlq $61, %xmm0, %xmm10 + vpsllq $3, %xmm0, %xmm11 + # rnd_1: 4 - 6 + xorq %r9, %rax + addq %rcx, %r12 + rorq $14, %rax + xorq %r14, %rbx + addq %rax, %r12 + movq %r13, %rcx + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_1: 7 - 8 + andq %rbx, %rdx + rorq $5, %rcx + xorq %r13, %rcx + xorq %r14, %rdx + vpxor %xmm10, %xmm8, %xmm8 + vpsrlq $6, %xmm0, %xmm11 + # rnd_1: 9 - 10 + rorq $6, %rcx + addq %r12, %r8 + xorq %r13, %rcx + addq %rdx, %r12 + vpxor %xmm11, %xmm8, %xmm8 + # rnd_1: 11 - 11 + rorq $28, %rcx + movq %r8, %rax + addq %rcx, %r12 + vpaddq %xmm1, %xmm8, %xmm1 + # msg_sched done: 2-5 + # msg_sched: 4-5 + # rnd_0: 0 - 0 + rorq $23, %rax + vpalignr $8, %xmm2, %xmm3, %xmm12 + vpalignr $8, %xmm6, %xmm7, %xmm13 + # rnd_0: 1 - 1 + movq %r12, %rdx + movq %r9, %rcx + addq 32(%rsp), %r11 + xorq %r10, %rcx + vpsrlq $0x01, %xmm12, %xmm8 + vpsllq $63, %xmm12, %xmm9 + # rnd_0: 2 - 3 + xorq %r8, %rax + andq %r8, %rcx + rorq $4, %rax + xorq %r10, %rcx + vpsrlq $8, %xmm12, %xmm10 + vpsllq $56, %xmm12, %xmm11 + # rnd_0: 4 - 5 + xorq %r8, %rax + addq %rcx, %r11 + rorq $14, %rax + xorq %r13, %rdx + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_0: 6 - 7 + addq %rax, %r11 + movq %r12, %rcx + andq %rdx, %rbx + rorq $5, %rcx + vpsrlq $7, %xmm12, %xmm11 + vpxor %xmm10, %xmm8, %xmm8 + # rnd_0: 8 - 9 + xorq %r12, %rcx + xorq %r13, %rbx + rorq $6, %rcx + addq %r11, %r15 + vpxor %xmm11, %xmm8, %xmm8 + vpaddq %xmm2, %xmm13, %xmm2 + # rnd_0: 10 - 11 + xorq %r12, %rcx + addq %rbx, %r11 + rorq $28, %rcx + movq %r15, %rax + addq %rcx, %r11 + # rnd_1: 0 - 0 + rorq $23, %rax + vpaddq %xmm2, %xmm8, %xmm2 + # rnd_1: 1 - 1 + movq %r11, %rbx + movq %r8, %rcx + addq 40(%rsp), %r10 + xorq %r9, %rcx + vpsrlq $19, %xmm1, %xmm8 + vpsllq $45, %xmm1, %xmm9 + # rnd_1: 2 - 3 + xorq %r15, %rax + andq %r15, %rcx + rorq $4, %rax + xorq %r9, %rcx + vpsrlq $61, %xmm1, %xmm10 + vpsllq $3, %xmm1, %xmm11 + # rnd_1: 4 - 6 + xorq %r15, %rax + addq %rcx, %r10 + rorq $14, %rax + xorq %r12, %rbx + addq %rax, %r10 + movq %r11, %rcx + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_1: 7 - 8 + andq %rbx, %rdx + rorq $5, %rcx + xorq %r11, %rcx + xorq %r12, %rdx + vpxor %xmm10, %xmm8, %xmm8 + vpsrlq $6, %xmm1, %xmm11 + # rnd_1: 9 - 10 + rorq $6, %rcx + addq %r10, %r14 + xorq %r11, %rcx + addq %rdx, %r10 + vpxor %xmm11, %xmm8, %xmm8 + # rnd_1: 11 - 11 + rorq $28, %rcx + movq %r14, %rax + addq %rcx, %r10 + vpaddq %xmm2, %xmm8, %xmm2 + # msg_sched done: 4-7 + # msg_sched: 6-7 + # rnd_0: 0 - 0 + rorq $23, %rax + vpalignr $8, %xmm3, %xmm4, %xmm12 + vpalignr $8, %xmm7, %xmm0, %xmm13 + # rnd_0: 1 - 1 + movq %r10, %rdx + movq %r15, %rcx + addq 48(%rsp), %r9 + xorq %r8, %rcx + vpsrlq $0x01, %xmm12, %xmm8 + vpsllq $63, %xmm12, %xmm9 + # rnd_0: 2 - 3 + xorq %r14, %rax + andq %r14, %rcx + rorq $4, %rax + xorq %r8, %rcx + vpsrlq $8, %xmm12, %xmm10 + vpsllq $56, %xmm12, %xmm11 + # rnd_0: 4 - 5 + xorq %r14, %rax + addq %rcx, %r9 + rorq $14, %rax + xorq %r11, %rdx + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_0: 6 - 7 + addq %rax, %r9 + movq %r10, %rcx + andq %rdx, %rbx + rorq $5, %rcx + vpsrlq $7, %xmm12, %xmm11 + vpxor %xmm10, %xmm8, %xmm8 + # rnd_0: 8 - 9 + xorq %r10, %rcx + xorq %r11, %rbx + rorq $6, %rcx + addq %r9, %r13 + vpxor %xmm11, %xmm8, %xmm8 + vpaddq %xmm3, %xmm13, %xmm3 + # rnd_0: 10 - 11 + xorq %r10, %rcx + addq %rbx, %r9 + rorq $28, %rcx + movq %r13, %rax + addq %rcx, %r9 + # rnd_1: 0 - 0 + rorq $23, %rax + vpaddq %xmm3, %xmm8, %xmm3 + # rnd_1: 1 - 1 + movq %r9, %rbx + movq %r14, %rcx + addq 56(%rsp), %r8 + xorq %r15, %rcx + vpsrlq $19, %xmm2, %xmm8 + vpsllq $45, %xmm2, %xmm9 + # rnd_1: 2 - 3 + xorq %r13, %rax + andq %r13, %rcx + rorq $4, %rax + xorq %r15, %rcx + vpsrlq $61, %xmm2, %xmm10 + vpsllq $3, %xmm2, %xmm11 + # rnd_1: 4 - 6 + xorq %r13, %rax + addq %rcx, %r8 + rorq $14, %rax + xorq %r10, %rbx + addq %rax, %r8 + movq %r9, %rcx + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_1: 7 - 8 + andq %rbx, %rdx + rorq $5, %rcx + xorq %r9, %rcx + xorq %r10, %rdx + vpxor %xmm10, %xmm8, %xmm8 + vpsrlq $6, %xmm2, %xmm11 + # rnd_1: 9 - 10 + rorq $6, %rcx + addq %r8, %r12 + xorq %r9, %rcx + addq %rdx, %r8 + vpxor %xmm11, %xmm8, %xmm8 + # rnd_1: 11 - 11 + rorq $28, %rcx + movq %r12, %rax + addq %rcx, %r8 + vpaddq %xmm3, %xmm8, %xmm3 + # msg_sched done: 6-9 + # msg_sched: 8-9 + # rnd_0: 0 - 0 + rorq $23, %rax + vpalignr $8, %xmm4, %xmm5, %xmm12 + vpalignr $8, %xmm0, %xmm1, %xmm13 + # rnd_0: 1 - 1 + movq %r8, %rdx + movq %r13, %rcx + addq 64(%rsp), %r15 + xorq %r14, %rcx + vpsrlq $0x01, %xmm12, %xmm8 + vpsllq $63, %xmm12, %xmm9 + # rnd_0: 2 - 3 + xorq %r12, %rax + andq %r12, %rcx + rorq $4, %rax + xorq %r14, %rcx + vpsrlq $8, %xmm12, %xmm10 + vpsllq $56, %xmm12, %xmm11 + # rnd_0: 4 - 5 + xorq %r12, %rax + addq %rcx, %r15 + rorq $14, %rax + xorq %r9, %rdx + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_0: 6 - 7 + addq %rax, %r15 + movq %r8, %rcx + andq %rdx, %rbx + rorq $5, %rcx + vpsrlq $7, %xmm12, %xmm11 + vpxor %xmm10, %xmm8, %xmm8 + # rnd_0: 8 - 9 + xorq %r8, %rcx + xorq %r9, %rbx + rorq $6, %rcx + addq %r15, %r11 + vpxor %xmm11, %xmm8, %xmm8 + vpaddq %xmm4, %xmm13, %xmm4 + # rnd_0: 10 - 11 + xorq %r8, %rcx + addq %rbx, %r15 + rorq $28, %rcx + movq %r11, %rax + addq %rcx, %r15 + # rnd_1: 0 - 0 + rorq $23, %rax + vpaddq %xmm4, %xmm8, %xmm4 + # rnd_1: 1 - 1 + movq %r15, %rbx + movq %r12, %rcx + addq 72(%rsp), %r14 + xorq %r13, %rcx + vpsrlq $19, %xmm3, %xmm8 + vpsllq $45, %xmm3, %xmm9 + # rnd_1: 2 - 3 + xorq %r11, %rax + andq %r11, %rcx + rorq $4, %rax + xorq %r13, %rcx + vpsrlq $61, %xmm3, %xmm10 + vpsllq $3, %xmm3, %xmm11 + # rnd_1: 4 - 6 + xorq %r11, %rax + addq %rcx, %r14 + rorq $14, %rax + xorq %r8, %rbx + addq %rax, %r14 + movq %r15, %rcx + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_1: 7 - 8 + andq %rbx, %rdx + rorq $5, %rcx + xorq %r15, %rcx + xorq %r8, %rdx + vpxor %xmm10, %xmm8, %xmm8 + vpsrlq $6, %xmm3, %xmm11 + # rnd_1: 9 - 10 + rorq $6, %rcx + addq %r14, %r10 + xorq %r15, %rcx + addq %rdx, %r14 + vpxor %xmm11, %xmm8, %xmm8 + # rnd_1: 11 - 11 + rorq $28, %rcx + movq %r10, %rax + addq %rcx, %r14 + vpaddq %xmm4, %xmm8, %xmm4 + # msg_sched done: 8-11 + # msg_sched: 10-11 + # rnd_0: 0 - 0 + rorq $23, %rax + vpalignr $8, %xmm5, %xmm6, %xmm12 + vpalignr $8, %xmm1, %xmm2, %xmm13 + # rnd_0: 1 - 1 + movq %r14, %rdx + movq %r11, %rcx + addq 80(%rsp), %r13 + xorq %r12, %rcx + vpsrlq $0x01, %xmm12, %xmm8 + vpsllq $63, %xmm12, %xmm9 + # rnd_0: 2 - 3 + xorq %r10, %rax + andq %r10, %rcx + rorq $4, %rax + xorq %r12, %rcx + vpsrlq $8, %xmm12, %xmm10 + vpsllq $56, %xmm12, %xmm11 + # rnd_0: 4 - 5 + xorq %r10, %rax + addq %rcx, %r13 + rorq $14, %rax + xorq %r15, %rdx + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_0: 6 - 7 + addq %rax, %r13 + movq %r14, %rcx + andq %rdx, %rbx + rorq $5, %rcx + vpsrlq $7, %xmm12, %xmm11 + vpxor %xmm10, %xmm8, %xmm8 + # rnd_0: 8 - 9 + xorq %r14, %rcx + xorq %r15, %rbx + rorq $6, %rcx + addq %r13, %r9 + vpxor %xmm11, %xmm8, %xmm8 + vpaddq %xmm5, %xmm13, %xmm5 + # rnd_0: 10 - 11 + xorq %r14, %rcx + addq %rbx, %r13 + rorq $28, %rcx + movq %r9, %rax + addq %rcx, %r13 + # rnd_1: 0 - 0 + rorq $23, %rax + vpaddq %xmm5, %xmm8, %xmm5 + # rnd_1: 1 - 1 + movq %r13, %rbx + movq %r10, %rcx + addq 88(%rsp), %r12 + xorq %r11, %rcx + vpsrlq $19, %xmm4, %xmm8 + vpsllq $45, %xmm4, %xmm9 + # rnd_1: 2 - 3 + xorq %r9, %rax + andq %r9, %rcx + rorq $4, %rax + xorq %r11, %rcx + vpsrlq $61, %xmm4, %xmm10 + vpsllq $3, %xmm4, %xmm11 + # rnd_1: 4 - 6 + xorq %r9, %rax + addq %rcx, %r12 + rorq $14, %rax + xorq %r14, %rbx + addq %rax, %r12 + movq %r13, %rcx + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_1: 7 - 8 + andq %rbx, %rdx + rorq $5, %rcx + xorq %r13, %rcx + xorq %r14, %rdx + vpxor %xmm10, %xmm8, %xmm8 + vpsrlq $6, %xmm4, %xmm11 + # rnd_1: 9 - 10 + rorq $6, %rcx + addq %r12, %r8 + xorq %r13, %rcx + addq %rdx, %r12 + vpxor %xmm11, %xmm8, %xmm8 + # rnd_1: 11 - 11 + rorq $28, %rcx + movq %r8, %rax + addq %rcx, %r12 + vpaddq %xmm5, %xmm8, %xmm5 + # msg_sched done: 10-13 + # msg_sched: 12-13 + # rnd_0: 0 - 0 + rorq $23, %rax + vpalignr $8, %xmm6, %xmm7, %xmm12 + vpalignr $8, %xmm2, %xmm3, %xmm13 + # rnd_0: 1 - 1 + movq %r12, %rdx + movq %r9, %rcx + addq 96(%rsp), %r11 + xorq %r10, %rcx + vpsrlq $0x01, %xmm12, %xmm8 + vpsllq $63, %xmm12, %xmm9 + # rnd_0: 2 - 3 + xorq %r8, %rax + andq %r8, %rcx + rorq $4, %rax + xorq %r10, %rcx + vpsrlq $8, %xmm12, %xmm10 + vpsllq $56, %xmm12, %xmm11 + # rnd_0: 4 - 5 + xorq %r8, %rax + addq %rcx, %r11 + rorq $14, %rax + xorq %r13, %rdx + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_0: 6 - 7 + addq %rax, %r11 + movq %r12, %rcx + andq %rdx, %rbx + rorq $5, %rcx + vpsrlq $7, %xmm12, %xmm11 + vpxor %xmm10, %xmm8, %xmm8 + # rnd_0: 8 - 9 + xorq %r12, %rcx + xorq %r13, %rbx + rorq $6, %rcx + addq %r11, %r15 + vpxor %xmm11, %xmm8, %xmm8 + vpaddq %xmm6, %xmm13, %xmm6 + # rnd_0: 10 - 11 + xorq %r12, %rcx + addq %rbx, %r11 + rorq $28, %rcx + movq %r15, %rax + addq %rcx, %r11 + # rnd_1: 0 - 0 + rorq $23, %rax + vpaddq %xmm6, %xmm8, %xmm6 + # rnd_1: 1 - 1 + movq %r11, %rbx + movq %r8, %rcx + addq 104(%rsp), %r10 + xorq %r9, %rcx + vpsrlq $19, %xmm5, %xmm8 + vpsllq $45, %xmm5, %xmm9 + # rnd_1: 2 - 3 + xorq %r15, %rax + andq %r15, %rcx + rorq $4, %rax + xorq %r9, %rcx + vpsrlq $61, %xmm5, %xmm10 + vpsllq $3, %xmm5, %xmm11 + # rnd_1: 4 - 6 + xorq %r15, %rax + addq %rcx, %r10 + rorq $14, %rax + xorq %r12, %rbx + addq %rax, %r10 + movq %r11, %rcx + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_1: 7 - 8 + andq %rbx, %rdx + rorq $5, %rcx + xorq %r11, %rcx + xorq %r12, %rdx + vpxor %xmm10, %xmm8, %xmm8 + vpsrlq $6, %xmm5, %xmm11 + # rnd_1: 9 - 10 + rorq $6, %rcx + addq %r10, %r14 + xorq %r11, %rcx + addq %rdx, %r10 + vpxor %xmm11, %xmm8, %xmm8 + # rnd_1: 11 - 11 + rorq $28, %rcx + movq %r14, %rax + addq %rcx, %r10 + vpaddq %xmm6, %xmm8, %xmm6 + # msg_sched done: 12-15 + # msg_sched: 14-15 + # rnd_0: 0 - 0 + rorq $23, %rax + vpalignr $8, %xmm7, %xmm0, %xmm12 + vpalignr $8, %xmm3, %xmm4, %xmm13 + # rnd_0: 1 - 1 + movq %r10, %rdx + movq %r15, %rcx + addq 112(%rsp), %r9 + xorq %r8, %rcx + vpsrlq $0x01, %xmm12, %xmm8 + vpsllq $63, %xmm12, %xmm9 + # rnd_0: 2 - 3 + xorq %r14, %rax + andq %r14, %rcx + rorq $4, %rax + xorq %r8, %rcx + vpsrlq $8, %xmm12, %xmm10 + vpsllq $56, %xmm12, %xmm11 + # rnd_0: 4 - 5 + xorq %r14, %rax + addq %rcx, %r9 + rorq $14, %rax + xorq %r11, %rdx + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_0: 6 - 7 + addq %rax, %r9 + movq %r10, %rcx + andq %rdx, %rbx + rorq $5, %rcx + vpsrlq $7, %xmm12, %xmm11 + vpxor %xmm10, %xmm8, %xmm8 + # rnd_0: 8 - 9 + xorq %r10, %rcx + xorq %r11, %rbx + rorq $6, %rcx + addq %r9, %r13 + vpxor %xmm11, %xmm8, %xmm8 + vpaddq %xmm7, %xmm13, %xmm7 + # rnd_0: 10 - 11 + xorq %r10, %rcx + addq %rbx, %r9 + rorq $28, %rcx + movq %r13, %rax + addq %rcx, %r9 + # rnd_1: 0 - 0 + rorq $23, %rax + vpaddq %xmm7, %xmm8, %xmm7 + # rnd_1: 1 - 1 + movq %r9, %rbx + movq %r14, %rcx + addq 120(%rsp), %r8 + xorq %r15, %rcx + vpsrlq $19, %xmm6, %xmm8 + vpsllq $45, %xmm6, %xmm9 + # rnd_1: 2 - 3 + xorq %r13, %rax + andq %r13, %rcx + rorq $4, %rax + xorq %r15, %rcx + vpsrlq $61, %xmm6, %xmm10 + vpsllq $3, %xmm6, %xmm11 + # rnd_1: 4 - 6 + xorq %r13, %rax + addq %rcx, %r8 + rorq $14, %rax + xorq %r10, %rbx + addq %rax, %r8 + movq %r9, %rcx + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_1: 7 - 8 + andq %rbx, %rdx + rorq $5, %rcx + xorq %r9, %rcx + xorq %r10, %rdx + vpxor %xmm10, %xmm8, %xmm8 + vpsrlq $6, %xmm6, %xmm11 + # rnd_1: 9 - 10 + rorq $6, %rcx + addq %r8, %r12 + xorq %r9, %rcx + addq %rdx, %r8 + vpxor %xmm11, %xmm8, %xmm8 + # rnd_1: 11 - 11 + rorq $28, %rcx + movq %r12, %rax + addq %rcx, %r8 + vpaddq %xmm7, %xmm8, %xmm7 + # msg_sched done: 14-17 + subl $0x01, 128(%rsp) + jne L_sha256_len_avx1_start + vpaddq (%rsi), %xmm0, %xmm8 + vpaddq 16(%rsi), %xmm1, %xmm9 + vmovdqu %xmm8, (%rsp) + vmovdqu %xmm9, 16(%rsp) + vpaddq 32(%rsi), %xmm2, %xmm8 + vpaddq 48(%rsi), %xmm3, %xmm9 + vmovdqu %xmm8, 32(%rsp) + vmovdqu %xmm9, 48(%rsp) + vpaddq 64(%rsi), %xmm4, %xmm8 + vpaddq 80(%rsi), %xmm5, %xmm9 + vmovdqu %xmm8, 64(%rsp) + vmovdqu %xmm9, 80(%rsp) + vpaddq 96(%rsi), %xmm6, %xmm8 + vpaddq 112(%rsi), %xmm7, %xmm9 + vmovdqu %xmm8, 96(%rsp) + vmovdqu %xmm9, 112(%rsp) + # rnd_all_2: 0-1 + # rnd_0: 0 - 11 + rorq $23, %rax + movq %r8, %rdx + movq %r13, %rcx + addq (%rsp), %r15 + xorq %r14, %rcx + xorq %r12, %rax + andq %r12, %rcx + rorq $4, %rax + xorq %r14, %rcx + xorq %r12, %rax + addq %rcx, %r15 + rorq $14, %rax + xorq %r9, %rdx + addq %rax, %r15 + movq %r8, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r8, %rcx + xorq %r9, %rbx + rorq $6, %rcx + addq %r15, %r11 + xorq %r8, %rcx + addq %rbx, %r15 + rorq $28, %rcx + movq %r11, %rax + addq %rcx, %r15 + # rnd_1: 0 - 11 + rorq $23, %rax + movq %r15, %rbx + movq %r12, %rcx + addq 8(%rsp), %r14 + xorq %r13, %rcx + xorq %r11, %rax + andq %r11, %rcx + rorq $4, %rax + xorq %r13, %rcx + xorq %r11, %rax + addq %rcx, %r14 + rorq $14, %rax + xorq %r8, %rbx + addq %rax, %r14 + movq %r15, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r15, %rcx + xorq %r8, %rdx + rorq $6, %rcx + addq %r14, %r10 + xorq %r15, %rcx + addq %rdx, %r14 + rorq $28, %rcx + movq %r10, %rax + addq %rcx, %r14 + # rnd_all_2: 2-3 + # rnd_0: 0 - 11 + rorq $23, %rax + movq %r14, %rdx + movq %r11, %rcx + addq 16(%rsp), %r13 + xorq %r12, %rcx + xorq %r10, %rax + andq %r10, %rcx + rorq $4, %rax + xorq %r12, %rcx + xorq %r10, %rax + addq %rcx, %r13 + rorq $14, %rax + xorq %r15, %rdx + addq %rax, %r13 + movq %r14, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r14, %rcx + xorq %r15, %rbx + rorq $6, %rcx + addq %r13, %r9 + xorq %r14, %rcx + addq %rbx, %r13 + rorq $28, %rcx + movq %r9, %rax + addq %rcx, %r13 + # rnd_1: 0 - 11 + rorq $23, %rax + movq %r13, %rbx + movq %r10, %rcx + addq 24(%rsp), %r12 + xorq %r11, %rcx + xorq %r9, %rax + andq %r9, %rcx + rorq $4, %rax + xorq %r11, %rcx + xorq %r9, %rax + addq %rcx, %r12 + rorq $14, %rax + xorq %r14, %rbx + addq %rax, %r12 + movq %r13, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r13, %rcx + xorq %r14, %rdx + rorq $6, %rcx + addq %r12, %r8 + xorq %r13, %rcx + addq %rdx, %r12 + rorq $28, %rcx + movq %r8, %rax + addq %rcx, %r12 + # rnd_all_2: 4-5 + # rnd_0: 0 - 11 + rorq $23, %rax + movq %r12, %rdx + movq %r9, %rcx + addq 32(%rsp), %r11 + xorq %r10, %rcx + xorq %r8, %rax + andq %r8, %rcx + rorq $4, %rax + xorq %r10, %rcx + xorq %r8, %rax + addq %rcx, %r11 + rorq $14, %rax + xorq %r13, %rdx + addq %rax, %r11 + movq %r12, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r12, %rcx + xorq %r13, %rbx + rorq $6, %rcx + addq %r11, %r15 + xorq %r12, %rcx + addq %rbx, %r11 + rorq $28, %rcx + movq %r15, %rax + addq %rcx, %r11 + # rnd_1: 0 - 11 + rorq $23, %rax + movq %r11, %rbx + movq %r8, %rcx + addq 40(%rsp), %r10 + xorq %r9, %rcx + xorq %r15, %rax + andq %r15, %rcx + rorq $4, %rax + xorq %r9, %rcx + xorq %r15, %rax + addq %rcx, %r10 + rorq $14, %rax + xorq %r12, %rbx + addq %rax, %r10 + movq %r11, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r11, %rcx + xorq %r12, %rdx + rorq $6, %rcx + addq %r10, %r14 + xorq %r11, %rcx + addq %rdx, %r10 + rorq $28, %rcx + movq %r14, %rax + addq %rcx, %r10 + # rnd_all_2: 6-7 + # rnd_0: 0 - 11 + rorq $23, %rax + movq %r10, %rdx + movq %r15, %rcx + addq 48(%rsp), %r9 + xorq %r8, %rcx + xorq %r14, %rax + andq %r14, %rcx + rorq $4, %rax + xorq %r8, %rcx + xorq %r14, %rax + addq %rcx, %r9 + rorq $14, %rax + xorq %r11, %rdx + addq %rax, %r9 + movq %r10, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r10, %rcx + xorq %r11, %rbx + rorq $6, %rcx + addq %r9, %r13 + xorq %r10, %rcx + addq %rbx, %r9 + rorq $28, %rcx + movq %r13, %rax + addq %rcx, %r9 + # rnd_1: 0 - 11 + rorq $23, %rax + movq %r9, %rbx + movq %r14, %rcx + addq 56(%rsp), %r8 + xorq %r15, %rcx + xorq %r13, %rax + andq %r13, %rcx + rorq $4, %rax + xorq %r15, %rcx + xorq %r13, %rax + addq %rcx, %r8 + rorq $14, %rax + xorq %r10, %rbx + addq %rax, %r8 + movq %r9, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r9, %rcx + xorq %r10, %rdx + rorq $6, %rcx + addq %r8, %r12 + xorq %r9, %rcx + addq %rdx, %r8 + rorq $28, %rcx + movq %r12, %rax + addq %rcx, %r8 + # rnd_all_2: 8-9 + # rnd_0: 0 - 11 + rorq $23, %rax + movq %r8, %rdx + movq %r13, %rcx + addq 64(%rsp), %r15 + xorq %r14, %rcx + xorq %r12, %rax + andq %r12, %rcx + rorq $4, %rax + xorq %r14, %rcx + xorq %r12, %rax + addq %rcx, %r15 + rorq $14, %rax + xorq %r9, %rdx + addq %rax, %r15 + movq %r8, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r8, %rcx + xorq %r9, %rbx + rorq $6, %rcx + addq %r15, %r11 + xorq %r8, %rcx + addq %rbx, %r15 + rorq $28, %rcx + movq %r11, %rax + addq %rcx, %r15 + # rnd_1: 0 - 11 + rorq $23, %rax + movq %r15, %rbx + movq %r12, %rcx + addq 72(%rsp), %r14 + xorq %r13, %rcx + xorq %r11, %rax + andq %r11, %rcx + rorq $4, %rax + xorq %r13, %rcx + xorq %r11, %rax + addq %rcx, %r14 + rorq $14, %rax + xorq %r8, %rbx + addq %rax, %r14 + movq %r15, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r15, %rcx + xorq %r8, %rdx + rorq $6, %rcx + addq %r14, %r10 + xorq %r15, %rcx + addq %rdx, %r14 + rorq $28, %rcx + movq %r10, %rax + addq %rcx, %r14 + # rnd_all_2: 10-11 + # rnd_0: 0 - 11 + rorq $23, %rax + movq %r14, %rdx + movq %r11, %rcx + addq 80(%rsp), %r13 + xorq %r12, %rcx + xorq %r10, %rax + andq %r10, %rcx + rorq $4, %rax + xorq %r12, %rcx + xorq %r10, %rax + addq %rcx, %r13 + rorq $14, %rax + xorq %r15, %rdx + addq %rax, %r13 + movq %r14, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r14, %rcx + xorq %r15, %rbx + rorq $6, %rcx + addq %r13, %r9 + xorq %r14, %rcx + addq %rbx, %r13 + rorq $28, %rcx + movq %r9, %rax + addq %rcx, %r13 + # rnd_1: 0 - 11 + rorq $23, %rax + movq %r13, %rbx + movq %r10, %rcx + addq 88(%rsp), %r12 + xorq %r11, %rcx + xorq %r9, %rax + andq %r9, %rcx + rorq $4, %rax + xorq %r11, %rcx + xorq %r9, %rax + addq %rcx, %r12 + rorq $14, %rax + xorq %r14, %rbx + addq %rax, %r12 + movq %r13, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r13, %rcx + xorq %r14, %rdx + rorq $6, %rcx + addq %r12, %r8 + xorq %r13, %rcx + addq %rdx, %r12 + rorq $28, %rcx + movq %r8, %rax + addq %rcx, %r12 + # rnd_all_2: 12-13 + # rnd_0: 0 - 11 + rorq $23, %rax + movq %r12, %rdx + movq %r9, %rcx + addq 96(%rsp), %r11 + xorq %r10, %rcx + xorq %r8, %rax + andq %r8, %rcx + rorq $4, %rax + xorq %r10, %rcx + xorq %r8, %rax + addq %rcx, %r11 + rorq $14, %rax + xorq %r13, %rdx + addq %rax, %r11 + movq %r12, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r12, %rcx + xorq %r13, %rbx + rorq $6, %rcx + addq %r11, %r15 + xorq %r12, %rcx + addq %rbx, %r11 + rorq $28, %rcx + movq %r15, %rax + addq %rcx, %r11 + # rnd_1: 0 - 11 + rorq $23, %rax + movq %r11, %rbx + movq %r8, %rcx + addq 104(%rsp), %r10 + xorq %r9, %rcx + xorq %r15, %rax + andq %r15, %rcx + rorq $4, %rax + xorq %r9, %rcx + xorq %r15, %rax + addq %rcx, %r10 + rorq $14, %rax + xorq %r12, %rbx + addq %rax, %r10 + movq %r11, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r11, %rcx + xorq %r12, %rdx + rorq $6, %rcx + addq %r10, %r14 + xorq %r11, %rcx + addq %rdx, %r10 + rorq $28, %rcx + movq %r14, %rax + addq %rcx, %r10 + # rnd_all_2: 14-15 + # rnd_0: 0 - 11 + rorq $23, %rax + movq %r10, %rdx + movq %r15, %rcx + addq 112(%rsp), %r9 + xorq %r8, %rcx + xorq %r14, %rax + andq %r14, %rcx + rorq $4, %rax + xorq %r8, %rcx + xorq %r14, %rax + addq %rcx, %r9 + rorq $14, %rax + xorq %r11, %rdx + addq %rax, %r9 + movq %r10, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r10, %rcx + xorq %r11, %rbx + rorq $6, %rcx + addq %r9, %r13 + xorq %r10, %rcx + addq %rbx, %r9 + rorq $28, %rcx + movq %r13, %rax + addq %rcx, %r9 + # rnd_1: 0 - 11 + rorq $23, %rax + movq %r9, %rbx + movq %r14, %rcx + addq 120(%rsp), %r8 + xorq %r15, %rcx + xorq %r13, %rax + andq %r13, %rcx + rorq $4, %rax + xorq %r15, %rcx + xorq %r13, %rax + addq %rcx, %r8 + rorq $14, %rax + xorq %r10, %rbx + addq %rax, %r8 + movq %r9, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r9, %rcx + xorq %r10, %rdx + rorq $6, %rcx + addq %r8, %r12 + xorq %r9, %rcx + addq %rdx, %r8 + rorq $28, %rcx + movq %r12, %rax + addq %rcx, %r8 + addq %r8, (%rdi) + addq %r9, 8(%rdi) + addq %r10, 16(%rdi) + addq %r11, 24(%rdi) + addq %r12, 32(%rdi) + addq %r13, 40(%rdi) + addq %r14, 48(%rdi) + addq %r15, 56(%rdi) + xorq %rax, %rax + vzeroupper + addq $0x88, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size Transform_Sha512_AVX1,.-Transform_Sha512_AVX1 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl Transform_Sha512_AVX1_Len +.type Transform_Sha512_AVX1_Len,@function +.align 4 +Transform_Sha512_AVX1_Len: +#else +.section __TEXT,__text +.globl _Transform_Sha512_AVX1_Len +.p2align 2 +_Transform_Sha512_AVX1_Len: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq %rbp + movq %rsi, %rbp + subq $0x90, %rsp + movq 224(%rdi), %rsi + leaq L_avx1_sha512_k(%rip), %rdx + vmovdqa L_avx1_sha512_flip_mask(%rip), %xmm14 + movq (%rdi), %r8 + movq 8(%rdi), %r9 + movq 16(%rdi), %r10 + movq 24(%rdi), %r11 + movq 32(%rdi), %r12 + movq 40(%rdi), %r13 + movq 48(%rdi), %r14 + movq 56(%rdi), %r15 + # Start of loop processing a block +L_sha512_len_avx1_begin: + vmovdqu (%rsi), %xmm0 + vmovdqu 16(%rsi), %xmm1 + vpshufb %xmm14, %xmm0, %xmm0 + vpshufb %xmm14, %xmm1, %xmm1 + vmovdqu 32(%rsi), %xmm2 + vmovdqu 48(%rsi), %xmm3 + vpshufb %xmm14, %xmm2, %xmm2 + vpshufb %xmm14, %xmm3, %xmm3 + vmovdqu 64(%rsi), %xmm4 + vmovdqu 80(%rsi), %xmm5 + vpshufb %xmm14, %xmm4, %xmm4 + vpshufb %xmm14, %xmm5, %xmm5 + vmovdqu 96(%rsi), %xmm6 + vmovdqu 112(%rsi), %xmm7 + vpshufb %xmm14, %xmm6, %xmm6 + vpshufb %xmm14, %xmm7, %xmm7 + movl $4, 128(%rsp) + movq %r9, %rbx + movq %r12, %rax + xorq %r10, %rbx + vpaddq (%rdx), %xmm0, %xmm8 + vpaddq 16(%rdx), %xmm1, %xmm9 + vmovdqu %xmm8, (%rsp) + vmovdqu %xmm9, 16(%rsp) + vpaddq 32(%rdx), %xmm2, %xmm8 + vpaddq 48(%rdx), %xmm3, %xmm9 + vmovdqu %xmm8, 32(%rsp) + vmovdqu %xmm9, 48(%rsp) + vpaddq 64(%rdx), %xmm4, %xmm8 + vpaddq 80(%rdx), %xmm5, %xmm9 + vmovdqu %xmm8, 64(%rsp) + vmovdqu %xmm9, 80(%rsp) + vpaddq 96(%rdx), %xmm6, %xmm8 + vpaddq 112(%rdx), %xmm7, %xmm9 + vmovdqu %xmm8, 96(%rsp) + vmovdqu %xmm9, 112(%rsp) + # Start of 16 rounds +L_sha512_len_avx1_start: + addq $0x80, %rdx + movq %rdx, 136(%rsp) + # msg_sched: 0-1 + # rnd_0: 0 - 0 + rorq $23, %rax + vpalignr $8, %xmm0, %xmm1, %xmm12 + vpalignr $8, %xmm4, %xmm5, %xmm13 + # rnd_0: 1 - 1 + movq %r8, %rdx + movq %r13, %rcx + addq (%rsp), %r15 + xorq %r14, %rcx + vpsrlq $0x01, %xmm12, %xmm8 + vpsllq $63, %xmm12, %xmm9 + # rnd_0: 2 - 3 + xorq %r12, %rax + andq %r12, %rcx + rorq $4, %rax + xorq %r14, %rcx + vpsrlq $8, %xmm12, %xmm10 + vpsllq $56, %xmm12, %xmm11 + # rnd_0: 4 - 5 + xorq %r12, %rax + addq %rcx, %r15 + rorq $14, %rax + xorq %r9, %rdx + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_0: 6 - 7 + addq %rax, %r15 + movq %r8, %rcx + andq %rdx, %rbx + rorq $5, %rcx + vpsrlq $7, %xmm12, %xmm11 + vpxor %xmm10, %xmm8, %xmm8 + # rnd_0: 8 - 9 + xorq %r8, %rcx + xorq %r9, %rbx + rorq $6, %rcx + addq %r15, %r11 + vpxor %xmm11, %xmm8, %xmm8 + vpaddq %xmm0, %xmm13, %xmm0 + # rnd_0: 10 - 11 + xorq %r8, %rcx + addq %rbx, %r15 + rorq $28, %rcx + movq %r11, %rax + addq %rcx, %r15 + # rnd_1: 0 - 0 + rorq $23, %rax + vpaddq %xmm0, %xmm8, %xmm0 + # rnd_1: 1 - 1 + movq %r15, %rbx + movq %r12, %rcx + addq 8(%rsp), %r14 + xorq %r13, %rcx + vpsrlq $19, %xmm7, %xmm8 + vpsllq $45, %xmm7, %xmm9 + # rnd_1: 2 - 3 + xorq %r11, %rax + andq %r11, %rcx + rorq $4, %rax + xorq %r13, %rcx + vpsrlq $61, %xmm7, %xmm10 + vpsllq $3, %xmm7, %xmm11 + # rnd_1: 4 - 6 + xorq %r11, %rax + addq %rcx, %r14 + rorq $14, %rax + xorq %r8, %rbx + addq %rax, %r14 + movq %r15, %rcx + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_1: 7 - 8 + andq %rbx, %rdx + rorq $5, %rcx + xorq %r15, %rcx + xorq %r8, %rdx + vpxor %xmm10, %xmm8, %xmm8 + vpsrlq $6, %xmm7, %xmm11 + # rnd_1: 9 - 10 + rorq $6, %rcx + addq %r14, %r10 + xorq %r15, %rcx + addq %rdx, %r14 + vpxor %xmm11, %xmm8, %xmm8 + # rnd_1: 11 - 11 + rorq $28, %rcx + movq %r10, %rax + addq %rcx, %r14 + vpaddq %xmm0, %xmm8, %xmm0 + # msg_sched done: 0-3 + # msg_sched: 2-3 + # rnd_0: 0 - 0 + rorq $23, %rax + vpalignr $8, %xmm1, %xmm2, %xmm12 + vpalignr $8, %xmm5, %xmm6, %xmm13 + # rnd_0: 1 - 1 + movq %r14, %rdx + movq %r11, %rcx + addq 16(%rsp), %r13 + xorq %r12, %rcx + vpsrlq $0x01, %xmm12, %xmm8 + vpsllq $63, %xmm12, %xmm9 + # rnd_0: 2 - 3 + xorq %r10, %rax + andq %r10, %rcx + rorq $4, %rax + xorq %r12, %rcx + vpsrlq $8, %xmm12, %xmm10 + vpsllq $56, %xmm12, %xmm11 + # rnd_0: 4 - 5 + xorq %r10, %rax + addq %rcx, %r13 + rorq $14, %rax + xorq %r15, %rdx + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_0: 6 - 7 + addq %rax, %r13 + movq %r14, %rcx + andq %rdx, %rbx + rorq $5, %rcx + vpsrlq $7, %xmm12, %xmm11 + vpxor %xmm10, %xmm8, %xmm8 + # rnd_0: 8 - 9 + xorq %r14, %rcx + xorq %r15, %rbx + rorq $6, %rcx + addq %r13, %r9 + vpxor %xmm11, %xmm8, %xmm8 + vpaddq %xmm1, %xmm13, %xmm1 + # rnd_0: 10 - 11 + xorq %r14, %rcx + addq %rbx, %r13 + rorq $28, %rcx + movq %r9, %rax + addq %rcx, %r13 + # rnd_1: 0 - 0 + rorq $23, %rax + vpaddq %xmm1, %xmm8, %xmm1 + # rnd_1: 1 - 1 + movq %r13, %rbx + movq %r10, %rcx + addq 24(%rsp), %r12 + xorq %r11, %rcx + vpsrlq $19, %xmm0, %xmm8 + vpsllq $45, %xmm0, %xmm9 + # rnd_1: 2 - 3 + xorq %r9, %rax + andq %r9, %rcx + rorq $4, %rax + xorq %r11, %rcx + vpsrlq $61, %xmm0, %xmm10 + vpsllq $3, %xmm0, %xmm11 + # rnd_1: 4 - 6 + xorq %r9, %rax + addq %rcx, %r12 + rorq $14, %rax + xorq %r14, %rbx + addq %rax, %r12 + movq %r13, %rcx + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_1: 7 - 8 + andq %rbx, %rdx + rorq $5, %rcx + xorq %r13, %rcx + xorq %r14, %rdx + vpxor %xmm10, %xmm8, %xmm8 + vpsrlq $6, %xmm0, %xmm11 + # rnd_1: 9 - 10 + rorq $6, %rcx + addq %r12, %r8 + xorq %r13, %rcx + addq %rdx, %r12 + vpxor %xmm11, %xmm8, %xmm8 + # rnd_1: 11 - 11 + rorq $28, %rcx + movq %r8, %rax + addq %rcx, %r12 + vpaddq %xmm1, %xmm8, %xmm1 + # msg_sched done: 2-5 + # msg_sched: 4-5 + # rnd_0: 0 - 0 + rorq $23, %rax + vpalignr $8, %xmm2, %xmm3, %xmm12 + vpalignr $8, %xmm6, %xmm7, %xmm13 + # rnd_0: 1 - 1 + movq %r12, %rdx + movq %r9, %rcx + addq 32(%rsp), %r11 + xorq %r10, %rcx + vpsrlq $0x01, %xmm12, %xmm8 + vpsllq $63, %xmm12, %xmm9 + # rnd_0: 2 - 3 + xorq %r8, %rax + andq %r8, %rcx + rorq $4, %rax + xorq %r10, %rcx + vpsrlq $8, %xmm12, %xmm10 + vpsllq $56, %xmm12, %xmm11 + # rnd_0: 4 - 5 + xorq %r8, %rax + addq %rcx, %r11 + rorq $14, %rax + xorq %r13, %rdx + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_0: 6 - 7 + addq %rax, %r11 + movq %r12, %rcx + andq %rdx, %rbx + rorq $5, %rcx + vpsrlq $7, %xmm12, %xmm11 + vpxor %xmm10, %xmm8, %xmm8 + # rnd_0: 8 - 9 + xorq %r12, %rcx + xorq %r13, %rbx + rorq $6, %rcx + addq %r11, %r15 + vpxor %xmm11, %xmm8, %xmm8 + vpaddq %xmm2, %xmm13, %xmm2 + # rnd_0: 10 - 11 + xorq %r12, %rcx + addq %rbx, %r11 + rorq $28, %rcx + movq %r15, %rax + addq %rcx, %r11 + # rnd_1: 0 - 0 + rorq $23, %rax + vpaddq %xmm2, %xmm8, %xmm2 + # rnd_1: 1 - 1 + movq %r11, %rbx + movq %r8, %rcx + addq 40(%rsp), %r10 + xorq %r9, %rcx + vpsrlq $19, %xmm1, %xmm8 + vpsllq $45, %xmm1, %xmm9 + # rnd_1: 2 - 3 + xorq %r15, %rax + andq %r15, %rcx + rorq $4, %rax + xorq %r9, %rcx + vpsrlq $61, %xmm1, %xmm10 + vpsllq $3, %xmm1, %xmm11 + # rnd_1: 4 - 6 + xorq %r15, %rax + addq %rcx, %r10 + rorq $14, %rax + xorq %r12, %rbx + addq %rax, %r10 + movq %r11, %rcx + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_1: 7 - 8 + andq %rbx, %rdx + rorq $5, %rcx + xorq %r11, %rcx + xorq %r12, %rdx + vpxor %xmm10, %xmm8, %xmm8 + vpsrlq $6, %xmm1, %xmm11 + # rnd_1: 9 - 10 + rorq $6, %rcx + addq %r10, %r14 + xorq %r11, %rcx + addq %rdx, %r10 + vpxor %xmm11, %xmm8, %xmm8 + # rnd_1: 11 - 11 + rorq $28, %rcx + movq %r14, %rax + addq %rcx, %r10 + vpaddq %xmm2, %xmm8, %xmm2 + # msg_sched done: 4-7 + # msg_sched: 6-7 + # rnd_0: 0 - 0 + rorq $23, %rax + vpalignr $8, %xmm3, %xmm4, %xmm12 + vpalignr $8, %xmm7, %xmm0, %xmm13 + # rnd_0: 1 - 1 + movq %r10, %rdx + movq %r15, %rcx + addq 48(%rsp), %r9 + xorq %r8, %rcx + vpsrlq $0x01, %xmm12, %xmm8 + vpsllq $63, %xmm12, %xmm9 + # rnd_0: 2 - 3 + xorq %r14, %rax + andq %r14, %rcx + rorq $4, %rax + xorq %r8, %rcx + vpsrlq $8, %xmm12, %xmm10 + vpsllq $56, %xmm12, %xmm11 + # rnd_0: 4 - 5 + xorq %r14, %rax + addq %rcx, %r9 + rorq $14, %rax + xorq %r11, %rdx + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_0: 6 - 7 + addq %rax, %r9 + movq %r10, %rcx + andq %rdx, %rbx + rorq $5, %rcx + vpsrlq $7, %xmm12, %xmm11 + vpxor %xmm10, %xmm8, %xmm8 + # rnd_0: 8 - 9 + xorq %r10, %rcx + xorq %r11, %rbx + rorq $6, %rcx + addq %r9, %r13 + vpxor %xmm11, %xmm8, %xmm8 + vpaddq %xmm3, %xmm13, %xmm3 + # rnd_0: 10 - 11 + xorq %r10, %rcx + addq %rbx, %r9 + rorq $28, %rcx + movq %r13, %rax + addq %rcx, %r9 + # rnd_1: 0 - 0 + rorq $23, %rax + vpaddq %xmm3, %xmm8, %xmm3 + # rnd_1: 1 - 1 + movq %r9, %rbx + movq %r14, %rcx + addq 56(%rsp), %r8 + xorq %r15, %rcx + vpsrlq $19, %xmm2, %xmm8 + vpsllq $45, %xmm2, %xmm9 + # rnd_1: 2 - 3 + xorq %r13, %rax + andq %r13, %rcx + rorq $4, %rax + xorq %r15, %rcx + vpsrlq $61, %xmm2, %xmm10 + vpsllq $3, %xmm2, %xmm11 + # rnd_1: 4 - 6 + xorq %r13, %rax + addq %rcx, %r8 + rorq $14, %rax + xorq %r10, %rbx + addq %rax, %r8 + movq %r9, %rcx + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_1: 7 - 8 + andq %rbx, %rdx + rorq $5, %rcx + xorq %r9, %rcx + xorq %r10, %rdx + vpxor %xmm10, %xmm8, %xmm8 + vpsrlq $6, %xmm2, %xmm11 + # rnd_1: 9 - 10 + rorq $6, %rcx + addq %r8, %r12 + xorq %r9, %rcx + addq %rdx, %r8 + vpxor %xmm11, %xmm8, %xmm8 + # rnd_1: 11 - 11 + rorq $28, %rcx + movq %r12, %rax + addq %rcx, %r8 + vpaddq %xmm3, %xmm8, %xmm3 + # msg_sched done: 6-9 + # msg_sched: 8-9 + # rnd_0: 0 - 0 + rorq $23, %rax + vpalignr $8, %xmm4, %xmm5, %xmm12 + vpalignr $8, %xmm0, %xmm1, %xmm13 + # rnd_0: 1 - 1 + movq %r8, %rdx + movq %r13, %rcx + addq 64(%rsp), %r15 + xorq %r14, %rcx + vpsrlq $0x01, %xmm12, %xmm8 + vpsllq $63, %xmm12, %xmm9 + # rnd_0: 2 - 3 + xorq %r12, %rax + andq %r12, %rcx + rorq $4, %rax + xorq %r14, %rcx + vpsrlq $8, %xmm12, %xmm10 + vpsllq $56, %xmm12, %xmm11 + # rnd_0: 4 - 5 + xorq %r12, %rax + addq %rcx, %r15 + rorq $14, %rax + xorq %r9, %rdx + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_0: 6 - 7 + addq %rax, %r15 + movq %r8, %rcx + andq %rdx, %rbx + rorq $5, %rcx + vpsrlq $7, %xmm12, %xmm11 + vpxor %xmm10, %xmm8, %xmm8 + # rnd_0: 8 - 9 + xorq %r8, %rcx + xorq %r9, %rbx + rorq $6, %rcx + addq %r15, %r11 + vpxor %xmm11, %xmm8, %xmm8 + vpaddq %xmm4, %xmm13, %xmm4 + # rnd_0: 10 - 11 + xorq %r8, %rcx + addq %rbx, %r15 + rorq $28, %rcx + movq %r11, %rax + addq %rcx, %r15 + # rnd_1: 0 - 0 + rorq $23, %rax + vpaddq %xmm4, %xmm8, %xmm4 + # rnd_1: 1 - 1 + movq %r15, %rbx + movq %r12, %rcx + addq 72(%rsp), %r14 + xorq %r13, %rcx + vpsrlq $19, %xmm3, %xmm8 + vpsllq $45, %xmm3, %xmm9 + # rnd_1: 2 - 3 + xorq %r11, %rax + andq %r11, %rcx + rorq $4, %rax + xorq %r13, %rcx + vpsrlq $61, %xmm3, %xmm10 + vpsllq $3, %xmm3, %xmm11 + # rnd_1: 4 - 6 + xorq %r11, %rax + addq %rcx, %r14 + rorq $14, %rax + xorq %r8, %rbx + addq %rax, %r14 + movq %r15, %rcx + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_1: 7 - 8 + andq %rbx, %rdx + rorq $5, %rcx + xorq %r15, %rcx + xorq %r8, %rdx + vpxor %xmm10, %xmm8, %xmm8 + vpsrlq $6, %xmm3, %xmm11 + # rnd_1: 9 - 10 + rorq $6, %rcx + addq %r14, %r10 + xorq %r15, %rcx + addq %rdx, %r14 + vpxor %xmm11, %xmm8, %xmm8 + # rnd_1: 11 - 11 + rorq $28, %rcx + movq %r10, %rax + addq %rcx, %r14 + vpaddq %xmm4, %xmm8, %xmm4 + # msg_sched done: 8-11 + # msg_sched: 10-11 + # rnd_0: 0 - 0 + rorq $23, %rax + vpalignr $8, %xmm5, %xmm6, %xmm12 + vpalignr $8, %xmm1, %xmm2, %xmm13 + # rnd_0: 1 - 1 + movq %r14, %rdx + movq %r11, %rcx + addq 80(%rsp), %r13 + xorq %r12, %rcx + vpsrlq $0x01, %xmm12, %xmm8 + vpsllq $63, %xmm12, %xmm9 + # rnd_0: 2 - 3 + xorq %r10, %rax + andq %r10, %rcx + rorq $4, %rax + xorq %r12, %rcx + vpsrlq $8, %xmm12, %xmm10 + vpsllq $56, %xmm12, %xmm11 + # rnd_0: 4 - 5 + xorq %r10, %rax + addq %rcx, %r13 + rorq $14, %rax + xorq %r15, %rdx + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_0: 6 - 7 + addq %rax, %r13 + movq %r14, %rcx + andq %rdx, %rbx + rorq $5, %rcx + vpsrlq $7, %xmm12, %xmm11 + vpxor %xmm10, %xmm8, %xmm8 + # rnd_0: 8 - 9 + xorq %r14, %rcx + xorq %r15, %rbx + rorq $6, %rcx + addq %r13, %r9 + vpxor %xmm11, %xmm8, %xmm8 + vpaddq %xmm5, %xmm13, %xmm5 + # rnd_0: 10 - 11 + xorq %r14, %rcx + addq %rbx, %r13 + rorq $28, %rcx + movq %r9, %rax + addq %rcx, %r13 + # rnd_1: 0 - 0 + rorq $23, %rax + vpaddq %xmm5, %xmm8, %xmm5 + # rnd_1: 1 - 1 + movq %r13, %rbx + movq %r10, %rcx + addq 88(%rsp), %r12 + xorq %r11, %rcx + vpsrlq $19, %xmm4, %xmm8 + vpsllq $45, %xmm4, %xmm9 + # rnd_1: 2 - 3 + xorq %r9, %rax + andq %r9, %rcx + rorq $4, %rax + xorq %r11, %rcx + vpsrlq $61, %xmm4, %xmm10 + vpsllq $3, %xmm4, %xmm11 + # rnd_1: 4 - 6 + xorq %r9, %rax + addq %rcx, %r12 + rorq $14, %rax + xorq %r14, %rbx + addq %rax, %r12 + movq %r13, %rcx + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_1: 7 - 8 + andq %rbx, %rdx + rorq $5, %rcx + xorq %r13, %rcx + xorq %r14, %rdx + vpxor %xmm10, %xmm8, %xmm8 + vpsrlq $6, %xmm4, %xmm11 + # rnd_1: 9 - 10 + rorq $6, %rcx + addq %r12, %r8 + xorq %r13, %rcx + addq %rdx, %r12 + vpxor %xmm11, %xmm8, %xmm8 + # rnd_1: 11 - 11 + rorq $28, %rcx + movq %r8, %rax + addq %rcx, %r12 + vpaddq %xmm5, %xmm8, %xmm5 + # msg_sched done: 10-13 + # msg_sched: 12-13 + # rnd_0: 0 - 0 + rorq $23, %rax + vpalignr $8, %xmm6, %xmm7, %xmm12 + vpalignr $8, %xmm2, %xmm3, %xmm13 + # rnd_0: 1 - 1 + movq %r12, %rdx + movq %r9, %rcx + addq 96(%rsp), %r11 + xorq %r10, %rcx + vpsrlq $0x01, %xmm12, %xmm8 + vpsllq $63, %xmm12, %xmm9 + # rnd_0: 2 - 3 + xorq %r8, %rax + andq %r8, %rcx + rorq $4, %rax + xorq %r10, %rcx + vpsrlq $8, %xmm12, %xmm10 + vpsllq $56, %xmm12, %xmm11 + # rnd_0: 4 - 5 + xorq %r8, %rax + addq %rcx, %r11 + rorq $14, %rax + xorq %r13, %rdx + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_0: 6 - 7 + addq %rax, %r11 + movq %r12, %rcx + andq %rdx, %rbx + rorq $5, %rcx + vpsrlq $7, %xmm12, %xmm11 + vpxor %xmm10, %xmm8, %xmm8 + # rnd_0: 8 - 9 + xorq %r12, %rcx + xorq %r13, %rbx + rorq $6, %rcx + addq %r11, %r15 + vpxor %xmm11, %xmm8, %xmm8 + vpaddq %xmm6, %xmm13, %xmm6 + # rnd_0: 10 - 11 + xorq %r12, %rcx + addq %rbx, %r11 + rorq $28, %rcx + movq %r15, %rax + addq %rcx, %r11 + # rnd_1: 0 - 0 + rorq $23, %rax + vpaddq %xmm6, %xmm8, %xmm6 + # rnd_1: 1 - 1 + movq %r11, %rbx + movq %r8, %rcx + addq 104(%rsp), %r10 + xorq %r9, %rcx + vpsrlq $19, %xmm5, %xmm8 + vpsllq $45, %xmm5, %xmm9 + # rnd_1: 2 - 3 + xorq %r15, %rax + andq %r15, %rcx + rorq $4, %rax + xorq %r9, %rcx + vpsrlq $61, %xmm5, %xmm10 + vpsllq $3, %xmm5, %xmm11 + # rnd_1: 4 - 6 + xorq %r15, %rax + addq %rcx, %r10 + rorq $14, %rax + xorq %r12, %rbx + addq %rax, %r10 + movq %r11, %rcx + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_1: 7 - 8 + andq %rbx, %rdx + rorq $5, %rcx + xorq %r11, %rcx + xorq %r12, %rdx + vpxor %xmm10, %xmm8, %xmm8 + vpsrlq $6, %xmm5, %xmm11 + # rnd_1: 9 - 10 + rorq $6, %rcx + addq %r10, %r14 + xorq %r11, %rcx + addq %rdx, %r10 + vpxor %xmm11, %xmm8, %xmm8 + # rnd_1: 11 - 11 + rorq $28, %rcx + movq %r14, %rax + addq %rcx, %r10 + vpaddq %xmm6, %xmm8, %xmm6 + # msg_sched done: 12-15 + # msg_sched: 14-15 + # rnd_0: 0 - 0 + rorq $23, %rax + vpalignr $8, %xmm7, %xmm0, %xmm12 + vpalignr $8, %xmm3, %xmm4, %xmm13 + # rnd_0: 1 - 1 + movq %r10, %rdx + movq %r15, %rcx + addq 112(%rsp), %r9 + xorq %r8, %rcx + vpsrlq $0x01, %xmm12, %xmm8 + vpsllq $63, %xmm12, %xmm9 + # rnd_0: 2 - 3 + xorq %r14, %rax + andq %r14, %rcx + rorq $4, %rax + xorq %r8, %rcx + vpsrlq $8, %xmm12, %xmm10 + vpsllq $56, %xmm12, %xmm11 + # rnd_0: 4 - 5 + xorq %r14, %rax + addq %rcx, %r9 + rorq $14, %rax + xorq %r11, %rdx + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_0: 6 - 7 + addq %rax, %r9 + movq %r10, %rcx + andq %rdx, %rbx + rorq $5, %rcx + vpsrlq $7, %xmm12, %xmm11 + vpxor %xmm10, %xmm8, %xmm8 + # rnd_0: 8 - 9 + xorq %r10, %rcx + xorq %r11, %rbx + rorq $6, %rcx + addq %r9, %r13 + vpxor %xmm11, %xmm8, %xmm8 + vpaddq %xmm7, %xmm13, %xmm7 + # rnd_0: 10 - 11 + xorq %r10, %rcx + addq %rbx, %r9 + rorq $28, %rcx + movq %r13, %rax + addq %rcx, %r9 + # rnd_1: 0 - 0 + rorq $23, %rax + vpaddq %xmm7, %xmm8, %xmm7 + # rnd_1: 1 - 1 + movq %r9, %rbx + movq %r14, %rcx + addq 120(%rsp), %r8 + xorq %r15, %rcx + vpsrlq $19, %xmm6, %xmm8 + vpsllq $45, %xmm6, %xmm9 + # rnd_1: 2 - 3 + xorq %r13, %rax + andq %r13, %rcx + rorq $4, %rax + xorq %r15, %rcx + vpsrlq $61, %xmm6, %xmm10 + vpsllq $3, %xmm6, %xmm11 + # rnd_1: 4 - 6 + xorq %r13, %rax + addq %rcx, %r8 + rorq $14, %rax + xorq %r10, %rbx + addq %rax, %r8 + movq %r9, %rcx + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_1: 7 - 8 + andq %rbx, %rdx + rorq $5, %rcx + xorq %r9, %rcx + xorq %r10, %rdx + vpxor %xmm10, %xmm8, %xmm8 + vpsrlq $6, %xmm6, %xmm11 + # rnd_1: 9 - 10 + rorq $6, %rcx + addq %r8, %r12 + xorq %r9, %rcx + addq %rdx, %r8 + vpxor %xmm11, %xmm8, %xmm8 + # rnd_1: 11 - 11 + rorq $28, %rcx + movq %r12, %rax + addq %rcx, %r8 + vpaddq %xmm7, %xmm8, %xmm7 + # msg_sched done: 14-17 + movq 136(%rsp), %rdx + vpaddq (%rdx), %xmm0, %xmm8 + vpaddq 16(%rdx), %xmm1, %xmm9 + vmovdqu %xmm8, (%rsp) + vmovdqu %xmm9, 16(%rsp) + vpaddq 32(%rdx), %xmm2, %xmm8 + vpaddq 48(%rdx), %xmm3, %xmm9 + vmovdqu %xmm8, 32(%rsp) + vmovdqu %xmm9, 48(%rsp) + vpaddq 64(%rdx), %xmm4, %xmm8 + vpaddq 80(%rdx), %xmm5, %xmm9 + vmovdqu %xmm8, 64(%rsp) + vmovdqu %xmm9, 80(%rsp) + vpaddq 96(%rdx), %xmm6, %xmm8 + vpaddq 112(%rdx), %xmm7, %xmm9 + vmovdqu %xmm8, 96(%rsp) + vmovdqu %xmm9, 112(%rsp) + subl $0x01, 128(%rsp) + jne L_sha512_len_avx1_start + # rnd_all_2: 0-1 + # rnd_0: 0 - 11 + rorq $23, %rax + movq %r8, %rdx + movq %r13, %rcx + addq (%rsp), %r15 + xorq %r14, %rcx + xorq %r12, %rax + andq %r12, %rcx + rorq $4, %rax + xorq %r14, %rcx + xorq %r12, %rax + addq %rcx, %r15 + rorq $14, %rax + xorq %r9, %rdx + addq %rax, %r15 + movq %r8, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r8, %rcx + xorq %r9, %rbx + rorq $6, %rcx + addq %r15, %r11 + xorq %r8, %rcx + addq %rbx, %r15 + rorq $28, %rcx + movq %r11, %rax + addq %rcx, %r15 + # rnd_1: 0 - 11 + rorq $23, %rax + movq %r15, %rbx + movq %r12, %rcx + addq 8(%rsp), %r14 + xorq %r13, %rcx + xorq %r11, %rax + andq %r11, %rcx + rorq $4, %rax + xorq %r13, %rcx + xorq %r11, %rax + addq %rcx, %r14 + rorq $14, %rax + xorq %r8, %rbx + addq %rax, %r14 + movq %r15, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r15, %rcx + xorq %r8, %rdx + rorq $6, %rcx + addq %r14, %r10 + xorq %r15, %rcx + addq %rdx, %r14 + rorq $28, %rcx + movq %r10, %rax + addq %rcx, %r14 + # rnd_all_2: 2-3 + # rnd_0: 0 - 11 + rorq $23, %rax + movq %r14, %rdx + movq %r11, %rcx + addq 16(%rsp), %r13 + xorq %r12, %rcx + xorq %r10, %rax + andq %r10, %rcx + rorq $4, %rax + xorq %r12, %rcx + xorq %r10, %rax + addq %rcx, %r13 + rorq $14, %rax + xorq %r15, %rdx + addq %rax, %r13 + movq %r14, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r14, %rcx + xorq %r15, %rbx + rorq $6, %rcx + addq %r13, %r9 + xorq %r14, %rcx + addq %rbx, %r13 + rorq $28, %rcx + movq %r9, %rax + addq %rcx, %r13 + # rnd_1: 0 - 11 + rorq $23, %rax + movq %r13, %rbx + movq %r10, %rcx + addq 24(%rsp), %r12 + xorq %r11, %rcx + xorq %r9, %rax + andq %r9, %rcx + rorq $4, %rax + xorq %r11, %rcx + xorq %r9, %rax + addq %rcx, %r12 + rorq $14, %rax + xorq %r14, %rbx + addq %rax, %r12 + movq %r13, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r13, %rcx + xorq %r14, %rdx + rorq $6, %rcx + addq %r12, %r8 + xorq %r13, %rcx + addq %rdx, %r12 + rorq $28, %rcx + movq %r8, %rax + addq %rcx, %r12 + # rnd_all_2: 4-5 + # rnd_0: 0 - 11 + rorq $23, %rax + movq %r12, %rdx + movq %r9, %rcx + addq 32(%rsp), %r11 + xorq %r10, %rcx + xorq %r8, %rax + andq %r8, %rcx + rorq $4, %rax + xorq %r10, %rcx + xorq %r8, %rax + addq %rcx, %r11 + rorq $14, %rax + xorq %r13, %rdx + addq %rax, %r11 + movq %r12, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r12, %rcx + xorq %r13, %rbx + rorq $6, %rcx + addq %r11, %r15 + xorq %r12, %rcx + addq %rbx, %r11 + rorq $28, %rcx + movq %r15, %rax + addq %rcx, %r11 + # rnd_1: 0 - 11 + rorq $23, %rax + movq %r11, %rbx + movq %r8, %rcx + addq 40(%rsp), %r10 + xorq %r9, %rcx + xorq %r15, %rax + andq %r15, %rcx + rorq $4, %rax + xorq %r9, %rcx + xorq %r15, %rax + addq %rcx, %r10 + rorq $14, %rax + xorq %r12, %rbx + addq %rax, %r10 + movq %r11, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r11, %rcx + xorq %r12, %rdx + rorq $6, %rcx + addq %r10, %r14 + xorq %r11, %rcx + addq %rdx, %r10 + rorq $28, %rcx + movq %r14, %rax + addq %rcx, %r10 + # rnd_all_2: 6-7 + # rnd_0: 0 - 11 + rorq $23, %rax + movq %r10, %rdx + movq %r15, %rcx + addq 48(%rsp), %r9 + xorq %r8, %rcx + xorq %r14, %rax + andq %r14, %rcx + rorq $4, %rax + xorq %r8, %rcx + xorq %r14, %rax + addq %rcx, %r9 + rorq $14, %rax + xorq %r11, %rdx + addq %rax, %r9 + movq %r10, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r10, %rcx + xorq %r11, %rbx + rorq $6, %rcx + addq %r9, %r13 + xorq %r10, %rcx + addq %rbx, %r9 + rorq $28, %rcx + movq %r13, %rax + addq %rcx, %r9 + # rnd_1: 0 - 11 + rorq $23, %rax + movq %r9, %rbx + movq %r14, %rcx + addq 56(%rsp), %r8 + xorq %r15, %rcx + xorq %r13, %rax + andq %r13, %rcx + rorq $4, %rax + xorq %r15, %rcx + xorq %r13, %rax + addq %rcx, %r8 + rorq $14, %rax + xorq %r10, %rbx + addq %rax, %r8 + movq %r9, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r9, %rcx + xorq %r10, %rdx + rorq $6, %rcx + addq %r8, %r12 + xorq %r9, %rcx + addq %rdx, %r8 + rorq $28, %rcx + movq %r12, %rax + addq %rcx, %r8 + # rnd_all_2: 8-9 + # rnd_0: 0 - 11 + rorq $23, %rax + movq %r8, %rdx + movq %r13, %rcx + addq 64(%rsp), %r15 + xorq %r14, %rcx + xorq %r12, %rax + andq %r12, %rcx + rorq $4, %rax + xorq %r14, %rcx + xorq %r12, %rax + addq %rcx, %r15 + rorq $14, %rax + xorq %r9, %rdx + addq %rax, %r15 + movq %r8, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r8, %rcx + xorq %r9, %rbx + rorq $6, %rcx + addq %r15, %r11 + xorq %r8, %rcx + addq %rbx, %r15 + rorq $28, %rcx + movq %r11, %rax + addq %rcx, %r15 + # rnd_1: 0 - 11 + rorq $23, %rax + movq %r15, %rbx + movq %r12, %rcx + addq 72(%rsp), %r14 + xorq %r13, %rcx + xorq %r11, %rax + andq %r11, %rcx + rorq $4, %rax + xorq %r13, %rcx + xorq %r11, %rax + addq %rcx, %r14 + rorq $14, %rax + xorq %r8, %rbx + addq %rax, %r14 + movq %r15, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r15, %rcx + xorq %r8, %rdx + rorq $6, %rcx + addq %r14, %r10 + xorq %r15, %rcx + addq %rdx, %r14 + rorq $28, %rcx + movq %r10, %rax + addq %rcx, %r14 + # rnd_all_2: 10-11 + # rnd_0: 0 - 11 + rorq $23, %rax + movq %r14, %rdx + movq %r11, %rcx + addq 80(%rsp), %r13 + xorq %r12, %rcx + xorq %r10, %rax + andq %r10, %rcx + rorq $4, %rax + xorq %r12, %rcx + xorq %r10, %rax + addq %rcx, %r13 + rorq $14, %rax + xorq %r15, %rdx + addq %rax, %r13 + movq %r14, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r14, %rcx + xorq %r15, %rbx + rorq $6, %rcx + addq %r13, %r9 + xorq %r14, %rcx + addq %rbx, %r13 + rorq $28, %rcx + movq %r9, %rax + addq %rcx, %r13 + # rnd_1: 0 - 11 + rorq $23, %rax + movq %r13, %rbx + movq %r10, %rcx + addq 88(%rsp), %r12 + xorq %r11, %rcx + xorq %r9, %rax + andq %r9, %rcx + rorq $4, %rax + xorq %r11, %rcx + xorq %r9, %rax + addq %rcx, %r12 + rorq $14, %rax + xorq %r14, %rbx + addq %rax, %r12 + movq %r13, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r13, %rcx + xorq %r14, %rdx + rorq $6, %rcx + addq %r12, %r8 + xorq %r13, %rcx + addq %rdx, %r12 + rorq $28, %rcx + movq %r8, %rax + addq %rcx, %r12 + # rnd_all_2: 12-13 + # rnd_0: 0 - 11 + rorq $23, %rax + movq %r12, %rdx + movq %r9, %rcx + addq 96(%rsp), %r11 + xorq %r10, %rcx + xorq %r8, %rax + andq %r8, %rcx + rorq $4, %rax + xorq %r10, %rcx + xorq %r8, %rax + addq %rcx, %r11 + rorq $14, %rax + xorq %r13, %rdx + addq %rax, %r11 + movq %r12, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r12, %rcx + xorq %r13, %rbx + rorq $6, %rcx + addq %r11, %r15 + xorq %r12, %rcx + addq %rbx, %r11 + rorq $28, %rcx + movq %r15, %rax + addq %rcx, %r11 + # rnd_1: 0 - 11 + rorq $23, %rax + movq %r11, %rbx + movq %r8, %rcx + addq 104(%rsp), %r10 + xorq %r9, %rcx + xorq %r15, %rax + andq %r15, %rcx + rorq $4, %rax + xorq %r9, %rcx + xorq %r15, %rax + addq %rcx, %r10 + rorq $14, %rax + xorq %r12, %rbx + addq %rax, %r10 + movq %r11, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r11, %rcx + xorq %r12, %rdx + rorq $6, %rcx + addq %r10, %r14 + xorq %r11, %rcx + addq %rdx, %r10 + rorq $28, %rcx + movq %r14, %rax + addq %rcx, %r10 + # rnd_all_2: 14-15 + # rnd_0: 0 - 11 + rorq $23, %rax + movq %r10, %rdx + movq %r15, %rcx + addq 112(%rsp), %r9 + xorq %r8, %rcx + xorq %r14, %rax + andq %r14, %rcx + rorq $4, %rax + xorq %r8, %rcx + xorq %r14, %rax + addq %rcx, %r9 + rorq $14, %rax + xorq %r11, %rdx + addq %rax, %r9 + movq %r10, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r10, %rcx + xorq %r11, %rbx + rorq $6, %rcx + addq %r9, %r13 + xorq %r10, %rcx + addq %rbx, %r9 + rorq $28, %rcx + movq %r13, %rax + addq %rcx, %r9 + # rnd_1: 0 - 11 + rorq $23, %rax + movq %r9, %rbx + movq %r14, %rcx + addq 120(%rsp), %r8 + xorq %r15, %rcx + xorq %r13, %rax + andq %r13, %rcx + rorq $4, %rax + xorq %r15, %rcx + xorq %r13, %rax + addq %rcx, %r8 + rorq $14, %rax + xorq %r10, %rbx + addq %rax, %r8 + movq %r9, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r9, %rcx + xorq %r10, %rdx + rorq $6, %rcx + addq %r8, %r12 + xorq %r9, %rcx + addq %rdx, %r8 + rorq $28, %rcx + movq %r12, %rax + addq %rcx, %r8 + addq (%rdi), %r8 + addq 8(%rdi), %r9 + addq 16(%rdi), %r10 + addq 24(%rdi), %r11 + addq 32(%rdi), %r12 + addq 40(%rdi), %r13 + addq 48(%rdi), %r14 + addq 56(%rdi), %r15 + leaq L_avx1_sha512_k(%rip), %rdx + addq $0x80, %rsi + subl $0x80, %ebp + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq %r12, 32(%rdi) + movq %r13, 40(%rdi) + movq %r14, 48(%rdi) + movq %r15, 56(%rdi) + jnz L_sha512_len_avx1_begin + xorq %rax, %rax + vzeroupper + addq $0x90, %rsp + popq %rbp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size Transform_Sha512_AVX1_Len,.-Transform_Sha512_AVX1_Len +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx1_rorx_sha512_k: +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0xfc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0x6ca6351e003826f,0x142929670a0e6e70 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0x6f067aa72176fba,0xa637dc5a2c898a6 +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx1_rorx_sha512_flip_mask: +.quad 0x1020304050607, 0x8090a0b0c0d0e0f +#ifndef __APPLE__ +.text +.globl Transform_Sha512_AVX1_RORX +.type Transform_Sha512_AVX1_RORX,@function +.align 4 +Transform_Sha512_AVX1_RORX: +#else +.section __TEXT,__text +.globl _Transform_Sha512_AVX1_RORX +.p2align 2 +_Transform_Sha512_AVX1_RORX: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $0x88, %rsp + leaq 64(%rdi), %rax + vmovdqa L_avx1_rorx_sha512_flip_mask(%rip), %xmm14 + movq (%rdi), %r8 + movq 8(%rdi), %r9 + movq 16(%rdi), %r10 + movq 24(%rdi), %r11 + movq 32(%rdi), %r12 + movq 40(%rdi), %r13 + movq 48(%rdi), %r14 + movq 56(%rdi), %r15 + vmovdqu (%rax), %xmm0 + vmovdqu 16(%rax), %xmm1 + vpshufb %xmm14, %xmm0, %xmm0 + vpshufb %xmm14, %xmm1, %xmm1 + vmovdqu 32(%rax), %xmm2 + vmovdqu 48(%rax), %xmm3 + vpshufb %xmm14, %xmm2, %xmm2 + vpshufb %xmm14, %xmm3, %xmm3 + vmovdqu 64(%rax), %xmm4 + vmovdqu 80(%rax), %xmm5 + vpshufb %xmm14, %xmm4, %xmm4 + vpshufb %xmm14, %xmm5, %xmm5 + vmovdqu 96(%rax), %xmm6 + vmovdqu 112(%rax), %xmm7 + vpshufb %xmm14, %xmm6, %xmm6 + vpshufb %xmm14, %xmm7, %xmm7 + movl $4, 128(%rsp) + leaq L_avx1_rorx_sha512_k(%rip), %rsi + movq %r9, %rbx + xorq %rdx, %rdx + xorq %r10, %rbx + vpaddq (%rsi), %xmm0, %xmm8 + vpaddq 16(%rsi), %xmm1, %xmm9 + vmovdqu %xmm8, (%rsp) + vmovdqu %xmm9, 16(%rsp) + vpaddq 32(%rsi), %xmm2, %xmm8 + vpaddq 48(%rsi), %xmm3, %xmm9 + vmovdqu %xmm8, 32(%rsp) + vmovdqu %xmm9, 48(%rsp) + vpaddq 64(%rsi), %xmm4, %xmm8 + vpaddq 80(%rsi), %xmm5, %xmm9 + vmovdqu %xmm8, 64(%rsp) + vmovdqu %xmm9, 80(%rsp) + vpaddq 96(%rsi), %xmm6, %xmm8 + vpaddq 112(%rsi), %xmm7, %xmm9 + vmovdqu %xmm8, 96(%rsp) + vmovdqu %xmm9, 112(%rsp) + # Start of 16 rounds +L_sha256_len_avx1_rorx_start: + addq $0x80, %rsi + # msg_sched: 0-1 + # rnd_0: 0 - 0 + rorxq $14, %r12, %rax + rorxq $18, %r12, %rcx + addq %rdx, %r8 + vpalignr $8, %xmm0, %xmm1, %xmm12 + vpalignr $8, %xmm4, %xmm5, %xmm13 + # rnd_0: 1 - 1 + addq (%rsp), %r15 + movq %r13, %rdx + xorq %rax, %rcx + vpsrlq $0x01, %xmm12, %xmm8 + vpsllq $63, %xmm12, %xmm9 + # rnd_0: 2 - 2 + xorq %r14, %rdx + rorxq $41, %r12, %rax + xorq %rcx, %rax + vpsrlq $8, %xmm12, %xmm10 + vpsllq $56, %xmm12, %xmm11 + # rnd_0: 3 - 3 + andq %r12, %rdx + addq %rax, %r15 + rorxq $28, %r8, %rax + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_0: 4 - 4 + rorxq $34, %r8, %rcx + xorq %r14, %rdx + xorq %rax, %rcx + vpsrlq $7, %xmm12, %xmm11 + vpxor %xmm10, %xmm8, %xmm8 + # rnd_0: 5 - 5 + rorxq $39, %r8, %rax + addq %rdx, %r15 + xorq %rcx, %rax + vpxor %xmm11, %xmm8, %xmm8 + vpaddq %xmm0, %xmm13, %xmm0 + # rnd_0: 6 - 7 + movq %r9, %rdx + addq %r15, %r11 + xorq %r8, %rdx + andq %rdx, %rbx + addq %rax, %r15 + xorq %r9, %rbx + vpaddq %xmm0, %xmm8, %xmm0 + # rnd_1: 0 - 0 + rorxq $14, %r11, %rax + rorxq $18, %r11, %rcx + addq %rbx, %r15 + vpsrlq $19, %xmm7, %xmm8 + vpsllq $45, %xmm7, %xmm9 + # rnd_1: 1 - 1 + addq 8(%rsp), %r14 + movq %r12, %rbx + xorq %rax, %rcx + vpsrlq $61, %xmm7, %xmm10 + vpsllq $3, %xmm7, %xmm11 + # rnd_1: 2 - 2 + xorq %r13, %rbx + rorxq $41, %r11, %rax + xorq %rcx, %rax + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_1: 3 - 4 + andq %r11, %rbx + addq %rax, %r14 + rorxq $28, %r15, %rax + rorxq $34, %r15, %rcx + xorq %r13, %rbx + xorq %rax, %rcx + vpxor %xmm10, %xmm8, %xmm8 + vpsrlq $6, %xmm7, %xmm11 + # rnd_1: 5 - 6 + rorxq $39, %r15, %rax + addq %rbx, %r14 + xorq %rcx, %rax + movq %r8, %rbx + addq %r14, %r10 + xorq %r15, %rbx + vpxor %xmm11, %xmm8, %xmm8 + # rnd_1: 7 - 7 + andq %rbx, %rdx + addq %rax, %r14 + xorq %r8, %rdx + vpaddq %xmm0, %xmm8, %xmm0 + # msg_sched done: 0-3 + # msg_sched: 2-3 + # rnd_0: 0 - 0 + rorxq $14, %r10, %rax + rorxq $18, %r10, %rcx + addq %rdx, %r14 + vpalignr $8, %xmm1, %xmm2, %xmm12 + vpalignr $8, %xmm5, %xmm6, %xmm13 + # rnd_0: 1 - 1 + addq 16(%rsp), %r13 + movq %r11, %rdx + xorq %rax, %rcx + vpsrlq $0x01, %xmm12, %xmm8 + vpsllq $63, %xmm12, %xmm9 + # rnd_0: 2 - 2 + xorq %r12, %rdx + rorxq $41, %r10, %rax + xorq %rcx, %rax + vpsrlq $8, %xmm12, %xmm10 + vpsllq $56, %xmm12, %xmm11 + # rnd_0: 3 - 3 + andq %r10, %rdx + addq %rax, %r13 + rorxq $28, %r14, %rax + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_0: 4 - 4 + rorxq $34, %r14, %rcx + xorq %r12, %rdx + xorq %rax, %rcx + vpsrlq $7, %xmm12, %xmm11 + vpxor %xmm10, %xmm8, %xmm8 + # rnd_0: 5 - 5 + rorxq $39, %r14, %rax + addq %rdx, %r13 + xorq %rcx, %rax + vpxor %xmm11, %xmm8, %xmm8 + vpaddq %xmm1, %xmm13, %xmm1 + # rnd_0: 6 - 7 + movq %r15, %rdx + addq %r13, %r9 + xorq %r14, %rdx + andq %rdx, %rbx + addq %rax, %r13 + xorq %r15, %rbx + vpaddq %xmm1, %xmm8, %xmm1 + # rnd_1: 0 - 0 + rorxq $14, %r9, %rax + rorxq $18, %r9, %rcx + addq %rbx, %r13 + vpsrlq $19, %xmm0, %xmm8 + vpsllq $45, %xmm0, %xmm9 + # rnd_1: 1 - 1 + addq 24(%rsp), %r12 + movq %r10, %rbx + xorq %rax, %rcx + vpsrlq $61, %xmm0, %xmm10 + vpsllq $3, %xmm0, %xmm11 + # rnd_1: 2 - 2 + xorq %r11, %rbx + rorxq $41, %r9, %rax + xorq %rcx, %rax + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_1: 3 - 4 + andq %r9, %rbx + addq %rax, %r12 + rorxq $28, %r13, %rax + rorxq $34, %r13, %rcx + xorq %r11, %rbx + xorq %rax, %rcx + vpxor %xmm10, %xmm8, %xmm8 + vpsrlq $6, %xmm0, %xmm11 + # rnd_1: 5 - 6 + rorxq $39, %r13, %rax + addq %rbx, %r12 + xorq %rcx, %rax + movq %r14, %rbx + addq %r12, %r8 + xorq %r13, %rbx + vpxor %xmm11, %xmm8, %xmm8 + # rnd_1: 7 - 7 + andq %rbx, %rdx + addq %rax, %r12 + xorq %r14, %rdx + vpaddq %xmm1, %xmm8, %xmm1 + # msg_sched done: 2-5 + # msg_sched: 4-5 + # rnd_0: 0 - 0 + rorxq $14, %r8, %rax + rorxq $18, %r8, %rcx + addq %rdx, %r12 + vpalignr $8, %xmm2, %xmm3, %xmm12 + vpalignr $8, %xmm6, %xmm7, %xmm13 + # rnd_0: 1 - 1 + addq 32(%rsp), %r11 + movq %r9, %rdx + xorq %rax, %rcx + vpsrlq $0x01, %xmm12, %xmm8 + vpsllq $63, %xmm12, %xmm9 + # rnd_0: 2 - 2 + xorq %r10, %rdx + rorxq $41, %r8, %rax + xorq %rcx, %rax + vpsrlq $8, %xmm12, %xmm10 + vpsllq $56, %xmm12, %xmm11 + # rnd_0: 3 - 3 + andq %r8, %rdx + addq %rax, %r11 + rorxq $28, %r12, %rax + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_0: 4 - 4 + rorxq $34, %r12, %rcx + xorq %r10, %rdx + xorq %rax, %rcx + vpsrlq $7, %xmm12, %xmm11 + vpxor %xmm10, %xmm8, %xmm8 + # rnd_0: 5 - 5 + rorxq $39, %r12, %rax + addq %rdx, %r11 + xorq %rcx, %rax + vpxor %xmm11, %xmm8, %xmm8 + vpaddq %xmm2, %xmm13, %xmm2 + # rnd_0: 6 - 7 + movq %r13, %rdx + addq %r11, %r15 + xorq %r12, %rdx + andq %rdx, %rbx + addq %rax, %r11 + xorq %r13, %rbx + vpaddq %xmm2, %xmm8, %xmm2 + # rnd_1: 0 - 0 + rorxq $14, %r15, %rax + rorxq $18, %r15, %rcx + addq %rbx, %r11 + vpsrlq $19, %xmm1, %xmm8 + vpsllq $45, %xmm1, %xmm9 + # rnd_1: 1 - 1 + addq 40(%rsp), %r10 + movq %r8, %rbx + xorq %rax, %rcx + vpsrlq $61, %xmm1, %xmm10 + vpsllq $3, %xmm1, %xmm11 + # rnd_1: 2 - 2 + xorq %r9, %rbx + rorxq $41, %r15, %rax + xorq %rcx, %rax + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_1: 3 - 4 + andq %r15, %rbx + addq %rax, %r10 + rorxq $28, %r11, %rax + rorxq $34, %r11, %rcx + xorq %r9, %rbx + xorq %rax, %rcx + vpxor %xmm10, %xmm8, %xmm8 + vpsrlq $6, %xmm1, %xmm11 + # rnd_1: 5 - 6 + rorxq $39, %r11, %rax + addq %rbx, %r10 + xorq %rcx, %rax + movq %r12, %rbx + addq %r10, %r14 + xorq %r11, %rbx + vpxor %xmm11, %xmm8, %xmm8 + # rnd_1: 7 - 7 + andq %rbx, %rdx + addq %rax, %r10 + xorq %r12, %rdx + vpaddq %xmm2, %xmm8, %xmm2 + # msg_sched done: 4-7 + # msg_sched: 6-7 + # rnd_0: 0 - 0 + rorxq $14, %r14, %rax + rorxq $18, %r14, %rcx + addq %rdx, %r10 + vpalignr $8, %xmm3, %xmm4, %xmm12 + vpalignr $8, %xmm7, %xmm0, %xmm13 + # rnd_0: 1 - 1 + addq 48(%rsp), %r9 + movq %r15, %rdx + xorq %rax, %rcx + vpsrlq $0x01, %xmm12, %xmm8 + vpsllq $63, %xmm12, %xmm9 + # rnd_0: 2 - 2 + xorq %r8, %rdx + rorxq $41, %r14, %rax + xorq %rcx, %rax + vpsrlq $8, %xmm12, %xmm10 + vpsllq $56, %xmm12, %xmm11 + # rnd_0: 3 - 3 + andq %r14, %rdx + addq %rax, %r9 + rorxq $28, %r10, %rax + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_0: 4 - 4 + rorxq $34, %r10, %rcx + xorq %r8, %rdx + xorq %rax, %rcx + vpsrlq $7, %xmm12, %xmm11 + vpxor %xmm10, %xmm8, %xmm8 + # rnd_0: 5 - 5 + rorxq $39, %r10, %rax + addq %rdx, %r9 + xorq %rcx, %rax + vpxor %xmm11, %xmm8, %xmm8 + vpaddq %xmm3, %xmm13, %xmm3 + # rnd_0: 6 - 7 + movq %r11, %rdx + addq %r9, %r13 + xorq %r10, %rdx + andq %rdx, %rbx + addq %rax, %r9 + xorq %r11, %rbx + vpaddq %xmm3, %xmm8, %xmm3 + # rnd_1: 0 - 0 + rorxq $14, %r13, %rax + rorxq $18, %r13, %rcx + addq %rbx, %r9 + vpsrlq $19, %xmm2, %xmm8 + vpsllq $45, %xmm2, %xmm9 + # rnd_1: 1 - 1 + addq 56(%rsp), %r8 + movq %r14, %rbx + xorq %rax, %rcx + vpsrlq $61, %xmm2, %xmm10 + vpsllq $3, %xmm2, %xmm11 + # rnd_1: 2 - 2 + xorq %r15, %rbx + rorxq $41, %r13, %rax + xorq %rcx, %rax + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_1: 3 - 4 + andq %r13, %rbx + addq %rax, %r8 + rorxq $28, %r9, %rax + rorxq $34, %r9, %rcx + xorq %r15, %rbx + xorq %rax, %rcx + vpxor %xmm10, %xmm8, %xmm8 + vpsrlq $6, %xmm2, %xmm11 + # rnd_1: 5 - 6 + rorxq $39, %r9, %rax + addq %rbx, %r8 + xorq %rcx, %rax + movq %r10, %rbx + addq %r8, %r12 + xorq %r9, %rbx + vpxor %xmm11, %xmm8, %xmm8 + # rnd_1: 7 - 7 + andq %rbx, %rdx + addq %rax, %r8 + xorq %r10, %rdx + vpaddq %xmm3, %xmm8, %xmm3 + # msg_sched done: 6-9 + # msg_sched: 8-9 + # rnd_0: 0 - 0 + rorxq $14, %r12, %rax + rorxq $18, %r12, %rcx + addq %rdx, %r8 + vpalignr $8, %xmm4, %xmm5, %xmm12 + vpalignr $8, %xmm0, %xmm1, %xmm13 + # rnd_0: 1 - 1 + addq 64(%rsp), %r15 + movq %r13, %rdx + xorq %rax, %rcx + vpsrlq $0x01, %xmm12, %xmm8 + vpsllq $63, %xmm12, %xmm9 + # rnd_0: 2 - 2 + xorq %r14, %rdx + rorxq $41, %r12, %rax + xorq %rcx, %rax + vpsrlq $8, %xmm12, %xmm10 + vpsllq $56, %xmm12, %xmm11 + # rnd_0: 3 - 3 + andq %r12, %rdx + addq %rax, %r15 + rorxq $28, %r8, %rax + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_0: 4 - 4 + rorxq $34, %r8, %rcx + xorq %r14, %rdx + xorq %rax, %rcx + vpsrlq $7, %xmm12, %xmm11 + vpxor %xmm10, %xmm8, %xmm8 + # rnd_0: 5 - 5 + rorxq $39, %r8, %rax + addq %rdx, %r15 + xorq %rcx, %rax + vpxor %xmm11, %xmm8, %xmm8 + vpaddq %xmm4, %xmm13, %xmm4 + # rnd_0: 6 - 7 + movq %r9, %rdx + addq %r15, %r11 + xorq %r8, %rdx + andq %rdx, %rbx + addq %rax, %r15 + xorq %r9, %rbx + vpaddq %xmm4, %xmm8, %xmm4 + # rnd_1: 0 - 0 + rorxq $14, %r11, %rax + rorxq $18, %r11, %rcx + addq %rbx, %r15 + vpsrlq $19, %xmm3, %xmm8 + vpsllq $45, %xmm3, %xmm9 + # rnd_1: 1 - 1 + addq 72(%rsp), %r14 + movq %r12, %rbx + xorq %rax, %rcx + vpsrlq $61, %xmm3, %xmm10 + vpsllq $3, %xmm3, %xmm11 + # rnd_1: 2 - 2 + xorq %r13, %rbx + rorxq $41, %r11, %rax + xorq %rcx, %rax + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_1: 3 - 4 + andq %r11, %rbx + addq %rax, %r14 + rorxq $28, %r15, %rax + rorxq $34, %r15, %rcx + xorq %r13, %rbx + xorq %rax, %rcx + vpxor %xmm10, %xmm8, %xmm8 + vpsrlq $6, %xmm3, %xmm11 + # rnd_1: 5 - 6 + rorxq $39, %r15, %rax + addq %rbx, %r14 + xorq %rcx, %rax + movq %r8, %rbx + addq %r14, %r10 + xorq %r15, %rbx + vpxor %xmm11, %xmm8, %xmm8 + # rnd_1: 7 - 7 + andq %rbx, %rdx + addq %rax, %r14 + xorq %r8, %rdx + vpaddq %xmm4, %xmm8, %xmm4 + # msg_sched done: 8-11 + # msg_sched: 10-11 + # rnd_0: 0 - 0 + rorxq $14, %r10, %rax + rorxq $18, %r10, %rcx + addq %rdx, %r14 + vpalignr $8, %xmm5, %xmm6, %xmm12 + vpalignr $8, %xmm1, %xmm2, %xmm13 + # rnd_0: 1 - 1 + addq 80(%rsp), %r13 + movq %r11, %rdx + xorq %rax, %rcx + vpsrlq $0x01, %xmm12, %xmm8 + vpsllq $63, %xmm12, %xmm9 + # rnd_0: 2 - 2 + xorq %r12, %rdx + rorxq $41, %r10, %rax + xorq %rcx, %rax + vpsrlq $8, %xmm12, %xmm10 + vpsllq $56, %xmm12, %xmm11 + # rnd_0: 3 - 3 + andq %r10, %rdx + addq %rax, %r13 + rorxq $28, %r14, %rax + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_0: 4 - 4 + rorxq $34, %r14, %rcx + xorq %r12, %rdx + xorq %rax, %rcx + vpsrlq $7, %xmm12, %xmm11 + vpxor %xmm10, %xmm8, %xmm8 + # rnd_0: 5 - 5 + rorxq $39, %r14, %rax + addq %rdx, %r13 + xorq %rcx, %rax + vpxor %xmm11, %xmm8, %xmm8 + vpaddq %xmm5, %xmm13, %xmm5 + # rnd_0: 6 - 7 + movq %r15, %rdx + addq %r13, %r9 + xorq %r14, %rdx + andq %rdx, %rbx + addq %rax, %r13 + xorq %r15, %rbx + vpaddq %xmm5, %xmm8, %xmm5 + # rnd_1: 0 - 0 + rorxq $14, %r9, %rax + rorxq $18, %r9, %rcx + addq %rbx, %r13 + vpsrlq $19, %xmm4, %xmm8 + vpsllq $45, %xmm4, %xmm9 + # rnd_1: 1 - 1 + addq 88(%rsp), %r12 + movq %r10, %rbx + xorq %rax, %rcx + vpsrlq $61, %xmm4, %xmm10 + vpsllq $3, %xmm4, %xmm11 + # rnd_1: 2 - 2 + xorq %r11, %rbx + rorxq $41, %r9, %rax + xorq %rcx, %rax + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_1: 3 - 4 + andq %r9, %rbx + addq %rax, %r12 + rorxq $28, %r13, %rax + rorxq $34, %r13, %rcx + xorq %r11, %rbx + xorq %rax, %rcx + vpxor %xmm10, %xmm8, %xmm8 + vpsrlq $6, %xmm4, %xmm11 + # rnd_1: 5 - 6 + rorxq $39, %r13, %rax + addq %rbx, %r12 + xorq %rcx, %rax + movq %r14, %rbx + addq %r12, %r8 + xorq %r13, %rbx + vpxor %xmm11, %xmm8, %xmm8 + # rnd_1: 7 - 7 + andq %rbx, %rdx + addq %rax, %r12 + xorq %r14, %rdx + vpaddq %xmm5, %xmm8, %xmm5 + # msg_sched done: 10-13 + # msg_sched: 12-13 + # rnd_0: 0 - 0 + rorxq $14, %r8, %rax + rorxq $18, %r8, %rcx + addq %rdx, %r12 + vpalignr $8, %xmm6, %xmm7, %xmm12 + vpalignr $8, %xmm2, %xmm3, %xmm13 + # rnd_0: 1 - 1 + addq 96(%rsp), %r11 + movq %r9, %rdx + xorq %rax, %rcx + vpsrlq $0x01, %xmm12, %xmm8 + vpsllq $63, %xmm12, %xmm9 + # rnd_0: 2 - 2 + xorq %r10, %rdx + rorxq $41, %r8, %rax + xorq %rcx, %rax + vpsrlq $8, %xmm12, %xmm10 + vpsllq $56, %xmm12, %xmm11 + # rnd_0: 3 - 3 + andq %r8, %rdx + addq %rax, %r11 + rorxq $28, %r12, %rax + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_0: 4 - 4 + rorxq $34, %r12, %rcx + xorq %r10, %rdx + xorq %rax, %rcx + vpsrlq $7, %xmm12, %xmm11 + vpxor %xmm10, %xmm8, %xmm8 + # rnd_0: 5 - 5 + rorxq $39, %r12, %rax + addq %rdx, %r11 + xorq %rcx, %rax + vpxor %xmm11, %xmm8, %xmm8 + vpaddq %xmm6, %xmm13, %xmm6 + # rnd_0: 6 - 7 + movq %r13, %rdx + addq %r11, %r15 + xorq %r12, %rdx + andq %rdx, %rbx + addq %rax, %r11 + xorq %r13, %rbx + vpaddq %xmm6, %xmm8, %xmm6 + # rnd_1: 0 - 0 + rorxq $14, %r15, %rax + rorxq $18, %r15, %rcx + addq %rbx, %r11 + vpsrlq $19, %xmm5, %xmm8 + vpsllq $45, %xmm5, %xmm9 + # rnd_1: 1 - 1 + addq 104(%rsp), %r10 + movq %r8, %rbx + xorq %rax, %rcx + vpsrlq $61, %xmm5, %xmm10 + vpsllq $3, %xmm5, %xmm11 + # rnd_1: 2 - 2 + xorq %r9, %rbx + rorxq $41, %r15, %rax + xorq %rcx, %rax + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_1: 3 - 4 + andq %r15, %rbx + addq %rax, %r10 + rorxq $28, %r11, %rax + rorxq $34, %r11, %rcx + xorq %r9, %rbx + xorq %rax, %rcx + vpxor %xmm10, %xmm8, %xmm8 + vpsrlq $6, %xmm5, %xmm11 + # rnd_1: 5 - 6 + rorxq $39, %r11, %rax + addq %rbx, %r10 + xorq %rcx, %rax + movq %r12, %rbx + addq %r10, %r14 + xorq %r11, %rbx + vpxor %xmm11, %xmm8, %xmm8 + # rnd_1: 7 - 7 + andq %rbx, %rdx + addq %rax, %r10 + xorq %r12, %rdx + vpaddq %xmm6, %xmm8, %xmm6 + # msg_sched done: 12-15 + # msg_sched: 14-15 + # rnd_0: 0 - 0 + rorxq $14, %r14, %rax + rorxq $18, %r14, %rcx + addq %rdx, %r10 + vpalignr $8, %xmm7, %xmm0, %xmm12 + vpalignr $8, %xmm3, %xmm4, %xmm13 + # rnd_0: 1 - 1 + addq 112(%rsp), %r9 + movq %r15, %rdx + xorq %rax, %rcx + vpsrlq $0x01, %xmm12, %xmm8 + vpsllq $63, %xmm12, %xmm9 + # rnd_0: 2 - 2 + xorq %r8, %rdx + rorxq $41, %r14, %rax + xorq %rcx, %rax + vpsrlq $8, %xmm12, %xmm10 + vpsllq $56, %xmm12, %xmm11 + # rnd_0: 3 - 3 + andq %r14, %rdx + addq %rax, %r9 + rorxq $28, %r10, %rax + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_0: 4 - 4 + rorxq $34, %r10, %rcx + xorq %r8, %rdx + xorq %rax, %rcx + vpsrlq $7, %xmm12, %xmm11 + vpxor %xmm10, %xmm8, %xmm8 + # rnd_0: 5 - 5 + rorxq $39, %r10, %rax + addq %rdx, %r9 + xorq %rcx, %rax + vpxor %xmm11, %xmm8, %xmm8 + vpaddq %xmm7, %xmm13, %xmm7 + # rnd_0: 6 - 7 + movq %r11, %rdx + addq %r9, %r13 + xorq %r10, %rdx + andq %rdx, %rbx + addq %rax, %r9 + xorq %r11, %rbx + vpaddq %xmm7, %xmm8, %xmm7 + # rnd_1: 0 - 0 + rorxq $14, %r13, %rax + rorxq $18, %r13, %rcx + addq %rbx, %r9 + vpsrlq $19, %xmm6, %xmm8 + vpsllq $45, %xmm6, %xmm9 + # rnd_1: 1 - 1 + addq 120(%rsp), %r8 + movq %r14, %rbx + xorq %rax, %rcx + vpsrlq $61, %xmm6, %xmm10 + vpsllq $3, %xmm6, %xmm11 + # rnd_1: 2 - 2 + xorq %r15, %rbx + rorxq $41, %r13, %rax + xorq %rcx, %rax + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_1: 3 - 4 + andq %r13, %rbx + addq %rax, %r8 + rorxq $28, %r9, %rax + rorxq $34, %r9, %rcx + xorq %r15, %rbx + xorq %rax, %rcx + vpxor %xmm10, %xmm8, %xmm8 + vpsrlq $6, %xmm6, %xmm11 + # rnd_1: 5 - 6 + rorxq $39, %r9, %rax + addq %rbx, %r8 + xorq %rcx, %rax + movq %r10, %rbx + addq %r8, %r12 + xorq %r9, %rbx + vpxor %xmm11, %xmm8, %xmm8 + # rnd_1: 7 - 7 + andq %rbx, %rdx + addq %rax, %r8 + xorq %r10, %rdx + vpaddq %xmm7, %xmm8, %xmm7 + # msg_sched done: 14-17 + vpaddq (%rsi), %xmm0, %xmm8 + vpaddq 16(%rsi), %xmm1, %xmm9 + vmovdqu %xmm8, (%rsp) + vmovdqu %xmm9, 16(%rsp) + vpaddq 32(%rsi), %xmm2, %xmm8 + vpaddq 48(%rsi), %xmm3, %xmm9 + vmovdqu %xmm8, 32(%rsp) + vmovdqu %xmm9, 48(%rsp) + vpaddq 64(%rsi), %xmm4, %xmm8 + vpaddq 80(%rsi), %xmm5, %xmm9 + vmovdqu %xmm8, 64(%rsp) + vmovdqu %xmm9, 80(%rsp) + vpaddq 96(%rsi), %xmm6, %xmm8 + vpaddq 112(%rsi), %xmm7, %xmm9 + vmovdqu %xmm8, 96(%rsp) + vmovdqu %xmm9, 112(%rsp) + subl $0x01, 128(%rsp) + jne L_sha256_len_avx1_rorx_start + # rnd_all_2: 0-1 + # rnd_0: 0 - 7 + rorxq $14, %r12, %rax + rorxq $18, %r12, %rcx + addq %rdx, %r8 + addq (%rsp), %r15 + movq %r13, %rdx + xorq %rax, %rcx + xorq %r14, %rdx + rorxq $41, %r12, %rax + xorq %rcx, %rax + andq %r12, %rdx + addq %rax, %r15 + rorxq $28, %r8, %rax + rorxq $34, %r8, %rcx + xorq %r14, %rdx + xorq %rax, %rcx + rorxq $39, %r8, %rax + addq %rdx, %r15 + xorq %rcx, %rax + movq %r9, %rdx + addq %r15, %r11 + xorq %r8, %rdx + andq %rdx, %rbx + addq %rax, %r15 + xorq %r9, %rbx + # rnd_1: 0 - 7 + rorxq $14, %r11, %rax + rorxq $18, %r11, %rcx + addq %rbx, %r15 + addq 8(%rsp), %r14 + movq %r12, %rbx + xorq %rax, %rcx + xorq %r13, %rbx + rorxq $41, %r11, %rax + xorq %rcx, %rax + andq %r11, %rbx + addq %rax, %r14 + rorxq $28, %r15, %rax + rorxq $34, %r15, %rcx + xorq %r13, %rbx + xorq %rax, %rcx + rorxq $39, %r15, %rax + addq %rbx, %r14 + xorq %rcx, %rax + movq %r8, %rbx + addq %r14, %r10 + xorq %r15, %rbx + andq %rbx, %rdx + addq %rax, %r14 + xorq %r8, %rdx + # rnd_all_2: 2-3 + # rnd_0: 0 - 7 + rorxq $14, %r10, %rax + rorxq $18, %r10, %rcx + addq %rdx, %r14 + addq 16(%rsp), %r13 + movq %r11, %rdx + xorq %rax, %rcx + xorq %r12, %rdx + rorxq $41, %r10, %rax + xorq %rcx, %rax + andq %r10, %rdx + addq %rax, %r13 + rorxq $28, %r14, %rax + rorxq $34, %r14, %rcx + xorq %r12, %rdx + xorq %rax, %rcx + rorxq $39, %r14, %rax + addq %rdx, %r13 + xorq %rcx, %rax + movq %r15, %rdx + addq %r13, %r9 + xorq %r14, %rdx + andq %rdx, %rbx + addq %rax, %r13 + xorq %r15, %rbx + # rnd_1: 0 - 7 + rorxq $14, %r9, %rax + rorxq $18, %r9, %rcx + addq %rbx, %r13 + addq 24(%rsp), %r12 + movq %r10, %rbx + xorq %rax, %rcx + xorq %r11, %rbx + rorxq $41, %r9, %rax + xorq %rcx, %rax + andq %r9, %rbx + addq %rax, %r12 + rorxq $28, %r13, %rax + rorxq $34, %r13, %rcx + xorq %r11, %rbx + xorq %rax, %rcx + rorxq $39, %r13, %rax + addq %rbx, %r12 + xorq %rcx, %rax + movq %r14, %rbx + addq %r12, %r8 + xorq %r13, %rbx + andq %rbx, %rdx + addq %rax, %r12 + xorq %r14, %rdx + # rnd_all_2: 4-5 + # rnd_0: 0 - 7 + rorxq $14, %r8, %rax + rorxq $18, %r8, %rcx + addq %rdx, %r12 + addq 32(%rsp), %r11 + movq %r9, %rdx + xorq %rax, %rcx + xorq %r10, %rdx + rorxq $41, %r8, %rax + xorq %rcx, %rax + andq %r8, %rdx + addq %rax, %r11 + rorxq $28, %r12, %rax + rorxq $34, %r12, %rcx + xorq %r10, %rdx + xorq %rax, %rcx + rorxq $39, %r12, %rax + addq %rdx, %r11 + xorq %rcx, %rax + movq %r13, %rdx + addq %r11, %r15 + xorq %r12, %rdx + andq %rdx, %rbx + addq %rax, %r11 + xorq %r13, %rbx + # rnd_1: 0 - 7 + rorxq $14, %r15, %rax + rorxq $18, %r15, %rcx + addq %rbx, %r11 + addq 40(%rsp), %r10 + movq %r8, %rbx + xorq %rax, %rcx + xorq %r9, %rbx + rorxq $41, %r15, %rax + xorq %rcx, %rax + andq %r15, %rbx + addq %rax, %r10 + rorxq $28, %r11, %rax + rorxq $34, %r11, %rcx + xorq %r9, %rbx + xorq %rax, %rcx + rorxq $39, %r11, %rax + addq %rbx, %r10 + xorq %rcx, %rax + movq %r12, %rbx + addq %r10, %r14 + xorq %r11, %rbx + andq %rbx, %rdx + addq %rax, %r10 + xorq %r12, %rdx + # rnd_all_2: 6-7 + # rnd_0: 0 - 7 + rorxq $14, %r14, %rax + rorxq $18, %r14, %rcx + addq %rdx, %r10 + addq 48(%rsp), %r9 + movq %r15, %rdx + xorq %rax, %rcx + xorq %r8, %rdx + rorxq $41, %r14, %rax + xorq %rcx, %rax + andq %r14, %rdx + addq %rax, %r9 + rorxq $28, %r10, %rax + rorxq $34, %r10, %rcx + xorq %r8, %rdx + xorq %rax, %rcx + rorxq $39, %r10, %rax + addq %rdx, %r9 + xorq %rcx, %rax + movq %r11, %rdx + addq %r9, %r13 + xorq %r10, %rdx + andq %rdx, %rbx + addq %rax, %r9 + xorq %r11, %rbx + # rnd_1: 0 - 7 + rorxq $14, %r13, %rax + rorxq $18, %r13, %rcx + addq %rbx, %r9 + addq 56(%rsp), %r8 + movq %r14, %rbx + xorq %rax, %rcx + xorq %r15, %rbx + rorxq $41, %r13, %rax + xorq %rcx, %rax + andq %r13, %rbx + addq %rax, %r8 + rorxq $28, %r9, %rax + rorxq $34, %r9, %rcx + xorq %r15, %rbx + xorq %rax, %rcx + rorxq $39, %r9, %rax + addq %rbx, %r8 + xorq %rcx, %rax + movq %r10, %rbx + addq %r8, %r12 + xorq %r9, %rbx + andq %rbx, %rdx + addq %rax, %r8 + xorq %r10, %rdx + # rnd_all_2: 8-9 + # rnd_0: 0 - 7 + rorxq $14, %r12, %rax + rorxq $18, %r12, %rcx + addq %rdx, %r8 + addq 64(%rsp), %r15 + movq %r13, %rdx + xorq %rax, %rcx + xorq %r14, %rdx + rorxq $41, %r12, %rax + xorq %rcx, %rax + andq %r12, %rdx + addq %rax, %r15 + rorxq $28, %r8, %rax + rorxq $34, %r8, %rcx + xorq %r14, %rdx + xorq %rax, %rcx + rorxq $39, %r8, %rax + addq %rdx, %r15 + xorq %rcx, %rax + movq %r9, %rdx + addq %r15, %r11 + xorq %r8, %rdx + andq %rdx, %rbx + addq %rax, %r15 + xorq %r9, %rbx + # rnd_1: 0 - 7 + rorxq $14, %r11, %rax + rorxq $18, %r11, %rcx + addq %rbx, %r15 + addq 72(%rsp), %r14 + movq %r12, %rbx + xorq %rax, %rcx + xorq %r13, %rbx + rorxq $41, %r11, %rax + xorq %rcx, %rax + andq %r11, %rbx + addq %rax, %r14 + rorxq $28, %r15, %rax + rorxq $34, %r15, %rcx + xorq %r13, %rbx + xorq %rax, %rcx + rorxq $39, %r15, %rax + addq %rbx, %r14 + xorq %rcx, %rax + movq %r8, %rbx + addq %r14, %r10 + xorq %r15, %rbx + andq %rbx, %rdx + addq %rax, %r14 + xorq %r8, %rdx + # rnd_all_2: 10-11 + # rnd_0: 0 - 7 + rorxq $14, %r10, %rax + rorxq $18, %r10, %rcx + addq %rdx, %r14 + addq 80(%rsp), %r13 + movq %r11, %rdx + xorq %rax, %rcx + xorq %r12, %rdx + rorxq $41, %r10, %rax + xorq %rcx, %rax + andq %r10, %rdx + addq %rax, %r13 + rorxq $28, %r14, %rax + rorxq $34, %r14, %rcx + xorq %r12, %rdx + xorq %rax, %rcx + rorxq $39, %r14, %rax + addq %rdx, %r13 + xorq %rcx, %rax + movq %r15, %rdx + addq %r13, %r9 + xorq %r14, %rdx + andq %rdx, %rbx + addq %rax, %r13 + xorq %r15, %rbx + # rnd_1: 0 - 7 + rorxq $14, %r9, %rax + rorxq $18, %r9, %rcx + addq %rbx, %r13 + addq 88(%rsp), %r12 + movq %r10, %rbx + xorq %rax, %rcx + xorq %r11, %rbx + rorxq $41, %r9, %rax + xorq %rcx, %rax + andq %r9, %rbx + addq %rax, %r12 + rorxq $28, %r13, %rax + rorxq $34, %r13, %rcx + xorq %r11, %rbx + xorq %rax, %rcx + rorxq $39, %r13, %rax + addq %rbx, %r12 + xorq %rcx, %rax + movq %r14, %rbx + addq %r12, %r8 + xorq %r13, %rbx + andq %rbx, %rdx + addq %rax, %r12 + xorq %r14, %rdx + # rnd_all_2: 12-13 + # rnd_0: 0 - 7 + rorxq $14, %r8, %rax + rorxq $18, %r8, %rcx + addq %rdx, %r12 + addq 96(%rsp), %r11 + movq %r9, %rdx + xorq %rax, %rcx + xorq %r10, %rdx + rorxq $41, %r8, %rax + xorq %rcx, %rax + andq %r8, %rdx + addq %rax, %r11 + rorxq $28, %r12, %rax + rorxq $34, %r12, %rcx + xorq %r10, %rdx + xorq %rax, %rcx + rorxq $39, %r12, %rax + addq %rdx, %r11 + xorq %rcx, %rax + movq %r13, %rdx + addq %r11, %r15 + xorq %r12, %rdx + andq %rdx, %rbx + addq %rax, %r11 + xorq %r13, %rbx + # rnd_1: 0 - 7 + rorxq $14, %r15, %rax + rorxq $18, %r15, %rcx + addq %rbx, %r11 + addq 104(%rsp), %r10 + movq %r8, %rbx + xorq %rax, %rcx + xorq %r9, %rbx + rorxq $41, %r15, %rax + xorq %rcx, %rax + andq %r15, %rbx + addq %rax, %r10 + rorxq $28, %r11, %rax + rorxq $34, %r11, %rcx + xorq %r9, %rbx + xorq %rax, %rcx + rorxq $39, %r11, %rax + addq %rbx, %r10 + xorq %rcx, %rax + movq %r12, %rbx + addq %r10, %r14 + xorq %r11, %rbx + andq %rbx, %rdx + addq %rax, %r10 + xorq %r12, %rdx + # rnd_all_2: 14-15 + # rnd_0: 0 - 7 + rorxq $14, %r14, %rax + rorxq $18, %r14, %rcx + addq %rdx, %r10 + addq 112(%rsp), %r9 + movq %r15, %rdx + xorq %rax, %rcx + xorq %r8, %rdx + rorxq $41, %r14, %rax + xorq %rcx, %rax + andq %r14, %rdx + addq %rax, %r9 + rorxq $28, %r10, %rax + rorxq $34, %r10, %rcx + xorq %r8, %rdx + xorq %rax, %rcx + rorxq $39, %r10, %rax + addq %rdx, %r9 + xorq %rcx, %rax + movq %r11, %rdx + addq %r9, %r13 + xorq %r10, %rdx + andq %rdx, %rbx + addq %rax, %r9 + xorq %r11, %rbx + # rnd_1: 0 - 7 + rorxq $14, %r13, %rax + rorxq $18, %r13, %rcx + addq %rbx, %r9 + addq 120(%rsp), %r8 + movq %r14, %rbx + xorq %rax, %rcx + xorq %r15, %rbx + rorxq $41, %r13, %rax + xorq %rcx, %rax + andq %r13, %rbx + addq %rax, %r8 + rorxq $28, %r9, %rax + rorxq $34, %r9, %rcx + xorq %r15, %rbx + xorq %rax, %rcx + rorxq $39, %r9, %rax + addq %rbx, %r8 + xorq %rcx, %rax + movq %r10, %rbx + addq %r8, %r12 + xorq %r9, %rbx + andq %rbx, %rdx + addq %rax, %r8 + xorq %r10, %rdx + addq %rdx, %r8 + addq %r8, (%rdi) + addq %r9, 8(%rdi) + addq %r10, 16(%rdi) + addq %r11, 24(%rdi) + addq %r12, 32(%rdi) + addq %r13, 40(%rdi) + addq %r14, 48(%rdi) + addq %r15, 56(%rdi) + xorq %rax, %rax + vzeroupper + addq $0x88, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size Transform_Sha512_AVX1_RORX,.-Transform_Sha512_AVX1_RORX +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl Transform_Sha512_AVX1_RORX_Len +.type Transform_Sha512_AVX1_RORX_Len,@function +.align 4 +Transform_Sha512_AVX1_RORX_Len: +#else +.section __TEXT,__text +.globl _Transform_Sha512_AVX1_RORX_Len +.p2align 2 +_Transform_Sha512_AVX1_RORX_Len: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq %rbp + movq %rsi, %rbp + subq $0x90, %rsp + movq 224(%rdi), %rsi + leaq L_avx1_rorx_sha512_k(%rip), %rcx + vmovdqa L_avx1_rorx_sha512_flip_mask(%rip), %xmm14 + movq (%rdi), %r8 + movq 8(%rdi), %r9 + movq 16(%rdi), %r10 + movq 24(%rdi), %r11 + movq 32(%rdi), %r12 + movq 40(%rdi), %r13 + movq 48(%rdi), %r14 + movq 56(%rdi), %r15 + # Start of loop processing a block +L_sha512_len_avx1_rorx_begin: + vmovdqu (%rsi), %xmm0 + vmovdqu 16(%rsi), %xmm1 + vpshufb %xmm14, %xmm0, %xmm0 + vpshufb %xmm14, %xmm1, %xmm1 + vmovdqu 32(%rsi), %xmm2 + vmovdqu 48(%rsi), %xmm3 + vpshufb %xmm14, %xmm2, %xmm2 + vpshufb %xmm14, %xmm3, %xmm3 + vmovdqu 64(%rsi), %xmm4 + vmovdqu 80(%rsi), %xmm5 + vpshufb %xmm14, %xmm4, %xmm4 + vpshufb %xmm14, %xmm5, %xmm5 + vmovdqu 96(%rsi), %xmm6 + vmovdqu 112(%rsi), %xmm7 + vpshufb %xmm14, %xmm6, %xmm6 + vpshufb %xmm14, %xmm7, %xmm7 + movl $4, 128(%rsp) + movq %r9, %rbx + xorq %rdx, %rdx + xorq %r10, %rbx + vpaddq (%rcx), %xmm0, %xmm8 + vpaddq 16(%rcx), %xmm1, %xmm9 + vmovdqu %xmm8, (%rsp) + vmovdqu %xmm9, 16(%rsp) + vpaddq 32(%rcx), %xmm2, %xmm8 + vpaddq 48(%rcx), %xmm3, %xmm9 + vmovdqu %xmm8, 32(%rsp) + vmovdqu %xmm9, 48(%rsp) + vpaddq 64(%rcx), %xmm4, %xmm8 + vpaddq 80(%rcx), %xmm5, %xmm9 + vmovdqu %xmm8, 64(%rsp) + vmovdqu %xmm9, 80(%rsp) + vpaddq 96(%rcx), %xmm6, %xmm8 + vpaddq 112(%rcx), %xmm7, %xmm9 + vmovdqu %xmm8, 96(%rsp) + vmovdqu %xmm9, 112(%rsp) + # Start of 16 rounds +L_sha512_len_avx1_rorx_start: + addq $0x80, %rcx + movq %rcx, 136(%rsp) + # msg_sched: 0-1 + # rnd_0: 0 - 0 + rorxq $14, %r12, %rax + rorxq $18, %r12, %rcx + addq %rdx, %r8 + vpalignr $8, %xmm0, %xmm1, %xmm12 + vpalignr $8, %xmm4, %xmm5, %xmm13 + # rnd_0: 1 - 1 + addq (%rsp), %r15 + movq %r13, %rdx + xorq %rax, %rcx + vpsrlq $0x01, %xmm12, %xmm8 + vpsllq $63, %xmm12, %xmm9 + # rnd_0: 2 - 2 + xorq %r14, %rdx + rorxq $41, %r12, %rax + xorq %rcx, %rax + vpsrlq $8, %xmm12, %xmm10 + vpsllq $56, %xmm12, %xmm11 + # rnd_0: 3 - 3 + andq %r12, %rdx + addq %rax, %r15 + rorxq $28, %r8, %rax + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_0: 4 - 4 + rorxq $34, %r8, %rcx + xorq %r14, %rdx + xorq %rax, %rcx + vpsrlq $7, %xmm12, %xmm11 + vpxor %xmm10, %xmm8, %xmm8 + # rnd_0: 5 - 5 + rorxq $39, %r8, %rax + addq %rdx, %r15 + xorq %rcx, %rax + vpxor %xmm11, %xmm8, %xmm8 + vpaddq %xmm0, %xmm13, %xmm0 + # rnd_0: 6 - 7 + movq %r9, %rdx + addq %r15, %r11 + xorq %r8, %rdx + andq %rdx, %rbx + addq %rax, %r15 + xorq %r9, %rbx + vpaddq %xmm0, %xmm8, %xmm0 + # rnd_1: 0 - 0 + rorxq $14, %r11, %rax + rorxq $18, %r11, %rcx + addq %rbx, %r15 + vpsrlq $19, %xmm7, %xmm8 + vpsllq $45, %xmm7, %xmm9 + # rnd_1: 1 - 1 + addq 8(%rsp), %r14 + movq %r12, %rbx + xorq %rax, %rcx + vpsrlq $61, %xmm7, %xmm10 + vpsllq $3, %xmm7, %xmm11 + # rnd_1: 2 - 2 + xorq %r13, %rbx + rorxq $41, %r11, %rax + xorq %rcx, %rax + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_1: 3 - 4 + andq %r11, %rbx + addq %rax, %r14 + rorxq $28, %r15, %rax + rorxq $34, %r15, %rcx + xorq %r13, %rbx + xorq %rax, %rcx + vpxor %xmm10, %xmm8, %xmm8 + vpsrlq $6, %xmm7, %xmm11 + # rnd_1: 5 - 6 + rorxq $39, %r15, %rax + addq %rbx, %r14 + xorq %rcx, %rax + movq %r8, %rbx + addq %r14, %r10 + xorq %r15, %rbx + vpxor %xmm11, %xmm8, %xmm8 + # rnd_1: 7 - 7 + andq %rbx, %rdx + addq %rax, %r14 + xorq %r8, %rdx + vpaddq %xmm0, %xmm8, %xmm0 + # msg_sched done: 0-3 + # msg_sched: 2-3 + # rnd_0: 0 - 0 + rorxq $14, %r10, %rax + rorxq $18, %r10, %rcx + addq %rdx, %r14 + vpalignr $8, %xmm1, %xmm2, %xmm12 + vpalignr $8, %xmm5, %xmm6, %xmm13 + # rnd_0: 1 - 1 + addq 16(%rsp), %r13 + movq %r11, %rdx + xorq %rax, %rcx + vpsrlq $0x01, %xmm12, %xmm8 + vpsllq $63, %xmm12, %xmm9 + # rnd_0: 2 - 2 + xorq %r12, %rdx + rorxq $41, %r10, %rax + xorq %rcx, %rax + vpsrlq $8, %xmm12, %xmm10 + vpsllq $56, %xmm12, %xmm11 + # rnd_0: 3 - 3 + andq %r10, %rdx + addq %rax, %r13 + rorxq $28, %r14, %rax + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_0: 4 - 4 + rorxq $34, %r14, %rcx + xorq %r12, %rdx + xorq %rax, %rcx + vpsrlq $7, %xmm12, %xmm11 + vpxor %xmm10, %xmm8, %xmm8 + # rnd_0: 5 - 5 + rorxq $39, %r14, %rax + addq %rdx, %r13 + xorq %rcx, %rax + vpxor %xmm11, %xmm8, %xmm8 + vpaddq %xmm1, %xmm13, %xmm1 + # rnd_0: 6 - 7 + movq %r15, %rdx + addq %r13, %r9 + xorq %r14, %rdx + andq %rdx, %rbx + addq %rax, %r13 + xorq %r15, %rbx + vpaddq %xmm1, %xmm8, %xmm1 + # rnd_1: 0 - 0 + rorxq $14, %r9, %rax + rorxq $18, %r9, %rcx + addq %rbx, %r13 + vpsrlq $19, %xmm0, %xmm8 + vpsllq $45, %xmm0, %xmm9 + # rnd_1: 1 - 1 + addq 24(%rsp), %r12 + movq %r10, %rbx + xorq %rax, %rcx + vpsrlq $61, %xmm0, %xmm10 + vpsllq $3, %xmm0, %xmm11 + # rnd_1: 2 - 2 + xorq %r11, %rbx + rorxq $41, %r9, %rax + xorq %rcx, %rax + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_1: 3 - 4 + andq %r9, %rbx + addq %rax, %r12 + rorxq $28, %r13, %rax + rorxq $34, %r13, %rcx + xorq %r11, %rbx + xorq %rax, %rcx + vpxor %xmm10, %xmm8, %xmm8 + vpsrlq $6, %xmm0, %xmm11 + # rnd_1: 5 - 6 + rorxq $39, %r13, %rax + addq %rbx, %r12 + xorq %rcx, %rax + movq %r14, %rbx + addq %r12, %r8 + xorq %r13, %rbx + vpxor %xmm11, %xmm8, %xmm8 + # rnd_1: 7 - 7 + andq %rbx, %rdx + addq %rax, %r12 + xorq %r14, %rdx + vpaddq %xmm1, %xmm8, %xmm1 + # msg_sched done: 2-5 + # msg_sched: 4-5 + # rnd_0: 0 - 0 + rorxq $14, %r8, %rax + rorxq $18, %r8, %rcx + addq %rdx, %r12 + vpalignr $8, %xmm2, %xmm3, %xmm12 + vpalignr $8, %xmm6, %xmm7, %xmm13 + # rnd_0: 1 - 1 + addq 32(%rsp), %r11 + movq %r9, %rdx + xorq %rax, %rcx + vpsrlq $0x01, %xmm12, %xmm8 + vpsllq $63, %xmm12, %xmm9 + # rnd_0: 2 - 2 + xorq %r10, %rdx + rorxq $41, %r8, %rax + xorq %rcx, %rax + vpsrlq $8, %xmm12, %xmm10 + vpsllq $56, %xmm12, %xmm11 + # rnd_0: 3 - 3 + andq %r8, %rdx + addq %rax, %r11 + rorxq $28, %r12, %rax + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_0: 4 - 4 + rorxq $34, %r12, %rcx + xorq %r10, %rdx + xorq %rax, %rcx + vpsrlq $7, %xmm12, %xmm11 + vpxor %xmm10, %xmm8, %xmm8 + # rnd_0: 5 - 5 + rorxq $39, %r12, %rax + addq %rdx, %r11 + xorq %rcx, %rax + vpxor %xmm11, %xmm8, %xmm8 + vpaddq %xmm2, %xmm13, %xmm2 + # rnd_0: 6 - 7 + movq %r13, %rdx + addq %r11, %r15 + xorq %r12, %rdx + andq %rdx, %rbx + addq %rax, %r11 + xorq %r13, %rbx + vpaddq %xmm2, %xmm8, %xmm2 + # rnd_1: 0 - 0 + rorxq $14, %r15, %rax + rorxq $18, %r15, %rcx + addq %rbx, %r11 + vpsrlq $19, %xmm1, %xmm8 + vpsllq $45, %xmm1, %xmm9 + # rnd_1: 1 - 1 + addq 40(%rsp), %r10 + movq %r8, %rbx + xorq %rax, %rcx + vpsrlq $61, %xmm1, %xmm10 + vpsllq $3, %xmm1, %xmm11 + # rnd_1: 2 - 2 + xorq %r9, %rbx + rorxq $41, %r15, %rax + xorq %rcx, %rax + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_1: 3 - 4 + andq %r15, %rbx + addq %rax, %r10 + rorxq $28, %r11, %rax + rorxq $34, %r11, %rcx + xorq %r9, %rbx + xorq %rax, %rcx + vpxor %xmm10, %xmm8, %xmm8 + vpsrlq $6, %xmm1, %xmm11 + # rnd_1: 5 - 6 + rorxq $39, %r11, %rax + addq %rbx, %r10 + xorq %rcx, %rax + movq %r12, %rbx + addq %r10, %r14 + xorq %r11, %rbx + vpxor %xmm11, %xmm8, %xmm8 + # rnd_1: 7 - 7 + andq %rbx, %rdx + addq %rax, %r10 + xorq %r12, %rdx + vpaddq %xmm2, %xmm8, %xmm2 + # msg_sched done: 4-7 + # msg_sched: 6-7 + # rnd_0: 0 - 0 + rorxq $14, %r14, %rax + rorxq $18, %r14, %rcx + addq %rdx, %r10 + vpalignr $8, %xmm3, %xmm4, %xmm12 + vpalignr $8, %xmm7, %xmm0, %xmm13 + # rnd_0: 1 - 1 + addq 48(%rsp), %r9 + movq %r15, %rdx + xorq %rax, %rcx + vpsrlq $0x01, %xmm12, %xmm8 + vpsllq $63, %xmm12, %xmm9 + # rnd_0: 2 - 2 + xorq %r8, %rdx + rorxq $41, %r14, %rax + xorq %rcx, %rax + vpsrlq $8, %xmm12, %xmm10 + vpsllq $56, %xmm12, %xmm11 + # rnd_0: 3 - 3 + andq %r14, %rdx + addq %rax, %r9 + rorxq $28, %r10, %rax + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_0: 4 - 4 + rorxq $34, %r10, %rcx + xorq %r8, %rdx + xorq %rax, %rcx + vpsrlq $7, %xmm12, %xmm11 + vpxor %xmm10, %xmm8, %xmm8 + # rnd_0: 5 - 5 + rorxq $39, %r10, %rax + addq %rdx, %r9 + xorq %rcx, %rax + vpxor %xmm11, %xmm8, %xmm8 + vpaddq %xmm3, %xmm13, %xmm3 + # rnd_0: 6 - 7 + movq %r11, %rdx + addq %r9, %r13 + xorq %r10, %rdx + andq %rdx, %rbx + addq %rax, %r9 + xorq %r11, %rbx + vpaddq %xmm3, %xmm8, %xmm3 + # rnd_1: 0 - 0 + rorxq $14, %r13, %rax + rorxq $18, %r13, %rcx + addq %rbx, %r9 + vpsrlq $19, %xmm2, %xmm8 + vpsllq $45, %xmm2, %xmm9 + # rnd_1: 1 - 1 + addq 56(%rsp), %r8 + movq %r14, %rbx + xorq %rax, %rcx + vpsrlq $61, %xmm2, %xmm10 + vpsllq $3, %xmm2, %xmm11 + # rnd_1: 2 - 2 + xorq %r15, %rbx + rorxq $41, %r13, %rax + xorq %rcx, %rax + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_1: 3 - 4 + andq %r13, %rbx + addq %rax, %r8 + rorxq $28, %r9, %rax + rorxq $34, %r9, %rcx + xorq %r15, %rbx + xorq %rax, %rcx + vpxor %xmm10, %xmm8, %xmm8 + vpsrlq $6, %xmm2, %xmm11 + # rnd_1: 5 - 6 + rorxq $39, %r9, %rax + addq %rbx, %r8 + xorq %rcx, %rax + movq %r10, %rbx + addq %r8, %r12 + xorq %r9, %rbx + vpxor %xmm11, %xmm8, %xmm8 + # rnd_1: 7 - 7 + andq %rbx, %rdx + addq %rax, %r8 + xorq %r10, %rdx + vpaddq %xmm3, %xmm8, %xmm3 + # msg_sched done: 6-9 + # msg_sched: 8-9 + # rnd_0: 0 - 0 + rorxq $14, %r12, %rax + rorxq $18, %r12, %rcx + addq %rdx, %r8 + vpalignr $8, %xmm4, %xmm5, %xmm12 + vpalignr $8, %xmm0, %xmm1, %xmm13 + # rnd_0: 1 - 1 + addq 64(%rsp), %r15 + movq %r13, %rdx + xorq %rax, %rcx + vpsrlq $0x01, %xmm12, %xmm8 + vpsllq $63, %xmm12, %xmm9 + # rnd_0: 2 - 2 + xorq %r14, %rdx + rorxq $41, %r12, %rax + xorq %rcx, %rax + vpsrlq $8, %xmm12, %xmm10 + vpsllq $56, %xmm12, %xmm11 + # rnd_0: 3 - 3 + andq %r12, %rdx + addq %rax, %r15 + rorxq $28, %r8, %rax + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_0: 4 - 4 + rorxq $34, %r8, %rcx + xorq %r14, %rdx + xorq %rax, %rcx + vpsrlq $7, %xmm12, %xmm11 + vpxor %xmm10, %xmm8, %xmm8 + # rnd_0: 5 - 5 + rorxq $39, %r8, %rax + addq %rdx, %r15 + xorq %rcx, %rax + vpxor %xmm11, %xmm8, %xmm8 + vpaddq %xmm4, %xmm13, %xmm4 + # rnd_0: 6 - 7 + movq %r9, %rdx + addq %r15, %r11 + xorq %r8, %rdx + andq %rdx, %rbx + addq %rax, %r15 + xorq %r9, %rbx + vpaddq %xmm4, %xmm8, %xmm4 + # rnd_1: 0 - 0 + rorxq $14, %r11, %rax + rorxq $18, %r11, %rcx + addq %rbx, %r15 + vpsrlq $19, %xmm3, %xmm8 + vpsllq $45, %xmm3, %xmm9 + # rnd_1: 1 - 1 + addq 72(%rsp), %r14 + movq %r12, %rbx + xorq %rax, %rcx + vpsrlq $61, %xmm3, %xmm10 + vpsllq $3, %xmm3, %xmm11 + # rnd_1: 2 - 2 + xorq %r13, %rbx + rorxq $41, %r11, %rax + xorq %rcx, %rax + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_1: 3 - 4 + andq %r11, %rbx + addq %rax, %r14 + rorxq $28, %r15, %rax + rorxq $34, %r15, %rcx + xorq %r13, %rbx + xorq %rax, %rcx + vpxor %xmm10, %xmm8, %xmm8 + vpsrlq $6, %xmm3, %xmm11 + # rnd_1: 5 - 6 + rorxq $39, %r15, %rax + addq %rbx, %r14 + xorq %rcx, %rax + movq %r8, %rbx + addq %r14, %r10 + xorq %r15, %rbx + vpxor %xmm11, %xmm8, %xmm8 + # rnd_1: 7 - 7 + andq %rbx, %rdx + addq %rax, %r14 + xorq %r8, %rdx + vpaddq %xmm4, %xmm8, %xmm4 + # msg_sched done: 8-11 + # msg_sched: 10-11 + # rnd_0: 0 - 0 + rorxq $14, %r10, %rax + rorxq $18, %r10, %rcx + addq %rdx, %r14 + vpalignr $8, %xmm5, %xmm6, %xmm12 + vpalignr $8, %xmm1, %xmm2, %xmm13 + # rnd_0: 1 - 1 + addq 80(%rsp), %r13 + movq %r11, %rdx + xorq %rax, %rcx + vpsrlq $0x01, %xmm12, %xmm8 + vpsllq $63, %xmm12, %xmm9 + # rnd_0: 2 - 2 + xorq %r12, %rdx + rorxq $41, %r10, %rax + xorq %rcx, %rax + vpsrlq $8, %xmm12, %xmm10 + vpsllq $56, %xmm12, %xmm11 + # rnd_0: 3 - 3 + andq %r10, %rdx + addq %rax, %r13 + rorxq $28, %r14, %rax + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_0: 4 - 4 + rorxq $34, %r14, %rcx + xorq %r12, %rdx + xorq %rax, %rcx + vpsrlq $7, %xmm12, %xmm11 + vpxor %xmm10, %xmm8, %xmm8 + # rnd_0: 5 - 5 + rorxq $39, %r14, %rax + addq %rdx, %r13 + xorq %rcx, %rax + vpxor %xmm11, %xmm8, %xmm8 + vpaddq %xmm5, %xmm13, %xmm5 + # rnd_0: 6 - 7 + movq %r15, %rdx + addq %r13, %r9 + xorq %r14, %rdx + andq %rdx, %rbx + addq %rax, %r13 + xorq %r15, %rbx + vpaddq %xmm5, %xmm8, %xmm5 + # rnd_1: 0 - 0 + rorxq $14, %r9, %rax + rorxq $18, %r9, %rcx + addq %rbx, %r13 + vpsrlq $19, %xmm4, %xmm8 + vpsllq $45, %xmm4, %xmm9 + # rnd_1: 1 - 1 + addq 88(%rsp), %r12 + movq %r10, %rbx + xorq %rax, %rcx + vpsrlq $61, %xmm4, %xmm10 + vpsllq $3, %xmm4, %xmm11 + # rnd_1: 2 - 2 + xorq %r11, %rbx + rorxq $41, %r9, %rax + xorq %rcx, %rax + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_1: 3 - 4 + andq %r9, %rbx + addq %rax, %r12 + rorxq $28, %r13, %rax + rorxq $34, %r13, %rcx + xorq %r11, %rbx + xorq %rax, %rcx + vpxor %xmm10, %xmm8, %xmm8 + vpsrlq $6, %xmm4, %xmm11 + # rnd_1: 5 - 6 + rorxq $39, %r13, %rax + addq %rbx, %r12 + xorq %rcx, %rax + movq %r14, %rbx + addq %r12, %r8 + xorq %r13, %rbx + vpxor %xmm11, %xmm8, %xmm8 + # rnd_1: 7 - 7 + andq %rbx, %rdx + addq %rax, %r12 + xorq %r14, %rdx + vpaddq %xmm5, %xmm8, %xmm5 + # msg_sched done: 10-13 + # msg_sched: 12-13 + # rnd_0: 0 - 0 + rorxq $14, %r8, %rax + rorxq $18, %r8, %rcx + addq %rdx, %r12 + vpalignr $8, %xmm6, %xmm7, %xmm12 + vpalignr $8, %xmm2, %xmm3, %xmm13 + # rnd_0: 1 - 1 + addq 96(%rsp), %r11 + movq %r9, %rdx + xorq %rax, %rcx + vpsrlq $0x01, %xmm12, %xmm8 + vpsllq $63, %xmm12, %xmm9 + # rnd_0: 2 - 2 + xorq %r10, %rdx + rorxq $41, %r8, %rax + xorq %rcx, %rax + vpsrlq $8, %xmm12, %xmm10 + vpsllq $56, %xmm12, %xmm11 + # rnd_0: 3 - 3 + andq %r8, %rdx + addq %rax, %r11 + rorxq $28, %r12, %rax + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_0: 4 - 4 + rorxq $34, %r12, %rcx + xorq %r10, %rdx + xorq %rax, %rcx + vpsrlq $7, %xmm12, %xmm11 + vpxor %xmm10, %xmm8, %xmm8 + # rnd_0: 5 - 5 + rorxq $39, %r12, %rax + addq %rdx, %r11 + xorq %rcx, %rax + vpxor %xmm11, %xmm8, %xmm8 + vpaddq %xmm6, %xmm13, %xmm6 + # rnd_0: 6 - 7 + movq %r13, %rdx + addq %r11, %r15 + xorq %r12, %rdx + andq %rdx, %rbx + addq %rax, %r11 + xorq %r13, %rbx + vpaddq %xmm6, %xmm8, %xmm6 + # rnd_1: 0 - 0 + rorxq $14, %r15, %rax + rorxq $18, %r15, %rcx + addq %rbx, %r11 + vpsrlq $19, %xmm5, %xmm8 + vpsllq $45, %xmm5, %xmm9 + # rnd_1: 1 - 1 + addq 104(%rsp), %r10 + movq %r8, %rbx + xorq %rax, %rcx + vpsrlq $61, %xmm5, %xmm10 + vpsllq $3, %xmm5, %xmm11 + # rnd_1: 2 - 2 + xorq %r9, %rbx + rorxq $41, %r15, %rax + xorq %rcx, %rax + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_1: 3 - 4 + andq %r15, %rbx + addq %rax, %r10 + rorxq $28, %r11, %rax + rorxq $34, %r11, %rcx + xorq %r9, %rbx + xorq %rax, %rcx + vpxor %xmm10, %xmm8, %xmm8 + vpsrlq $6, %xmm5, %xmm11 + # rnd_1: 5 - 6 + rorxq $39, %r11, %rax + addq %rbx, %r10 + xorq %rcx, %rax + movq %r12, %rbx + addq %r10, %r14 + xorq %r11, %rbx + vpxor %xmm11, %xmm8, %xmm8 + # rnd_1: 7 - 7 + andq %rbx, %rdx + addq %rax, %r10 + xorq %r12, %rdx + vpaddq %xmm6, %xmm8, %xmm6 + # msg_sched done: 12-15 + # msg_sched: 14-15 + # rnd_0: 0 - 0 + rorxq $14, %r14, %rax + rorxq $18, %r14, %rcx + addq %rdx, %r10 + vpalignr $8, %xmm7, %xmm0, %xmm12 + vpalignr $8, %xmm3, %xmm4, %xmm13 + # rnd_0: 1 - 1 + addq 112(%rsp), %r9 + movq %r15, %rdx + xorq %rax, %rcx + vpsrlq $0x01, %xmm12, %xmm8 + vpsllq $63, %xmm12, %xmm9 + # rnd_0: 2 - 2 + xorq %r8, %rdx + rorxq $41, %r14, %rax + xorq %rcx, %rax + vpsrlq $8, %xmm12, %xmm10 + vpsllq $56, %xmm12, %xmm11 + # rnd_0: 3 - 3 + andq %r14, %rdx + addq %rax, %r9 + rorxq $28, %r10, %rax + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_0: 4 - 4 + rorxq $34, %r10, %rcx + xorq %r8, %rdx + xorq %rax, %rcx + vpsrlq $7, %xmm12, %xmm11 + vpxor %xmm10, %xmm8, %xmm8 + # rnd_0: 5 - 5 + rorxq $39, %r10, %rax + addq %rdx, %r9 + xorq %rcx, %rax + vpxor %xmm11, %xmm8, %xmm8 + vpaddq %xmm7, %xmm13, %xmm7 + # rnd_0: 6 - 7 + movq %r11, %rdx + addq %r9, %r13 + xorq %r10, %rdx + andq %rdx, %rbx + addq %rax, %r9 + xorq %r11, %rbx + vpaddq %xmm7, %xmm8, %xmm7 + # rnd_1: 0 - 0 + rorxq $14, %r13, %rax + rorxq $18, %r13, %rcx + addq %rbx, %r9 + vpsrlq $19, %xmm6, %xmm8 + vpsllq $45, %xmm6, %xmm9 + # rnd_1: 1 - 1 + addq 120(%rsp), %r8 + movq %r14, %rbx + xorq %rax, %rcx + vpsrlq $61, %xmm6, %xmm10 + vpsllq $3, %xmm6, %xmm11 + # rnd_1: 2 - 2 + xorq %r15, %rbx + rorxq $41, %r13, %rax + xorq %rcx, %rax + vpor %xmm9, %xmm8, %xmm8 + vpor %xmm11, %xmm10, %xmm10 + # rnd_1: 3 - 4 + andq %r13, %rbx + addq %rax, %r8 + rorxq $28, %r9, %rax + rorxq $34, %r9, %rcx + xorq %r15, %rbx + xorq %rax, %rcx + vpxor %xmm10, %xmm8, %xmm8 + vpsrlq $6, %xmm6, %xmm11 + # rnd_1: 5 - 6 + rorxq $39, %r9, %rax + addq %rbx, %r8 + xorq %rcx, %rax + movq %r10, %rbx + addq %r8, %r12 + xorq %r9, %rbx + vpxor %xmm11, %xmm8, %xmm8 + # rnd_1: 7 - 7 + andq %rbx, %rdx + addq %rax, %r8 + xorq %r10, %rdx + vpaddq %xmm7, %xmm8, %xmm7 + # msg_sched done: 14-17 + movq 136(%rsp), %rcx + vpaddq (%rcx), %xmm0, %xmm8 + vpaddq 16(%rcx), %xmm1, %xmm9 + vmovdqu %xmm8, (%rsp) + vmovdqu %xmm9, 16(%rsp) + vpaddq 32(%rcx), %xmm2, %xmm8 + vpaddq 48(%rcx), %xmm3, %xmm9 + vmovdqu %xmm8, 32(%rsp) + vmovdqu %xmm9, 48(%rsp) + vpaddq 64(%rcx), %xmm4, %xmm8 + vpaddq 80(%rcx), %xmm5, %xmm9 + vmovdqu %xmm8, 64(%rsp) + vmovdqu %xmm9, 80(%rsp) + vpaddq 96(%rcx), %xmm6, %xmm8 + vpaddq 112(%rcx), %xmm7, %xmm9 + vmovdqu %xmm8, 96(%rsp) + vmovdqu %xmm9, 112(%rsp) + subl $0x01, 128(%rsp) + jne L_sha512_len_avx1_rorx_start + vpaddq (%rcx), %xmm0, %xmm8 + vpaddq 16(%rcx), %xmm1, %xmm9 + vmovdqu %xmm8, (%rsp) + vmovdqu %xmm9, 16(%rsp) + vpaddq 32(%rcx), %xmm2, %xmm8 + vpaddq 48(%rcx), %xmm3, %xmm9 + vmovdqu %xmm8, 32(%rsp) + vmovdqu %xmm9, 48(%rsp) + vpaddq 64(%rcx), %xmm4, %xmm8 + vpaddq 80(%rcx), %xmm5, %xmm9 + vmovdqu %xmm8, 64(%rsp) + vmovdqu %xmm9, 80(%rsp) + vpaddq 96(%rcx), %xmm6, %xmm8 + vpaddq 112(%rcx), %xmm7, %xmm9 + vmovdqu %xmm8, 96(%rsp) + vmovdqu %xmm9, 112(%rsp) + # rnd_all_2: 0-1 + # rnd_0: 0 - 7 + rorxq $14, %r12, %rax + rorxq $18, %r12, %rcx + addq %rdx, %r8 + addq (%rsp), %r15 + movq %r13, %rdx + xorq %rax, %rcx + xorq %r14, %rdx + rorxq $41, %r12, %rax + xorq %rcx, %rax + andq %r12, %rdx + addq %rax, %r15 + rorxq $28, %r8, %rax + rorxq $34, %r8, %rcx + xorq %r14, %rdx + xorq %rax, %rcx + rorxq $39, %r8, %rax + addq %rdx, %r15 + xorq %rcx, %rax + movq %r9, %rdx + addq %r15, %r11 + xorq %r8, %rdx + andq %rdx, %rbx + addq %rax, %r15 + xorq %r9, %rbx + # rnd_1: 0 - 7 + rorxq $14, %r11, %rax + rorxq $18, %r11, %rcx + addq %rbx, %r15 + addq 8(%rsp), %r14 + movq %r12, %rbx + xorq %rax, %rcx + xorq %r13, %rbx + rorxq $41, %r11, %rax + xorq %rcx, %rax + andq %r11, %rbx + addq %rax, %r14 + rorxq $28, %r15, %rax + rorxq $34, %r15, %rcx + xorq %r13, %rbx + xorq %rax, %rcx + rorxq $39, %r15, %rax + addq %rbx, %r14 + xorq %rcx, %rax + movq %r8, %rbx + addq %r14, %r10 + xorq %r15, %rbx + andq %rbx, %rdx + addq %rax, %r14 + xorq %r8, %rdx + # rnd_all_2: 2-3 + # rnd_0: 0 - 7 + rorxq $14, %r10, %rax + rorxq $18, %r10, %rcx + addq %rdx, %r14 + addq 16(%rsp), %r13 + movq %r11, %rdx + xorq %rax, %rcx + xorq %r12, %rdx + rorxq $41, %r10, %rax + xorq %rcx, %rax + andq %r10, %rdx + addq %rax, %r13 + rorxq $28, %r14, %rax + rorxq $34, %r14, %rcx + xorq %r12, %rdx + xorq %rax, %rcx + rorxq $39, %r14, %rax + addq %rdx, %r13 + xorq %rcx, %rax + movq %r15, %rdx + addq %r13, %r9 + xorq %r14, %rdx + andq %rdx, %rbx + addq %rax, %r13 + xorq %r15, %rbx + # rnd_1: 0 - 7 + rorxq $14, %r9, %rax + rorxq $18, %r9, %rcx + addq %rbx, %r13 + addq 24(%rsp), %r12 + movq %r10, %rbx + xorq %rax, %rcx + xorq %r11, %rbx + rorxq $41, %r9, %rax + xorq %rcx, %rax + andq %r9, %rbx + addq %rax, %r12 + rorxq $28, %r13, %rax + rorxq $34, %r13, %rcx + xorq %r11, %rbx + xorq %rax, %rcx + rorxq $39, %r13, %rax + addq %rbx, %r12 + xorq %rcx, %rax + movq %r14, %rbx + addq %r12, %r8 + xorq %r13, %rbx + andq %rbx, %rdx + addq %rax, %r12 + xorq %r14, %rdx + # rnd_all_2: 4-5 + # rnd_0: 0 - 7 + rorxq $14, %r8, %rax + rorxq $18, %r8, %rcx + addq %rdx, %r12 + addq 32(%rsp), %r11 + movq %r9, %rdx + xorq %rax, %rcx + xorq %r10, %rdx + rorxq $41, %r8, %rax + xorq %rcx, %rax + andq %r8, %rdx + addq %rax, %r11 + rorxq $28, %r12, %rax + rorxq $34, %r12, %rcx + xorq %r10, %rdx + xorq %rax, %rcx + rorxq $39, %r12, %rax + addq %rdx, %r11 + xorq %rcx, %rax + movq %r13, %rdx + addq %r11, %r15 + xorq %r12, %rdx + andq %rdx, %rbx + addq %rax, %r11 + xorq %r13, %rbx + # rnd_1: 0 - 7 + rorxq $14, %r15, %rax + rorxq $18, %r15, %rcx + addq %rbx, %r11 + addq 40(%rsp), %r10 + movq %r8, %rbx + xorq %rax, %rcx + xorq %r9, %rbx + rorxq $41, %r15, %rax + xorq %rcx, %rax + andq %r15, %rbx + addq %rax, %r10 + rorxq $28, %r11, %rax + rorxq $34, %r11, %rcx + xorq %r9, %rbx + xorq %rax, %rcx + rorxq $39, %r11, %rax + addq %rbx, %r10 + xorq %rcx, %rax + movq %r12, %rbx + addq %r10, %r14 + xorq %r11, %rbx + andq %rbx, %rdx + addq %rax, %r10 + xorq %r12, %rdx + # rnd_all_2: 6-7 + # rnd_0: 0 - 7 + rorxq $14, %r14, %rax + rorxq $18, %r14, %rcx + addq %rdx, %r10 + addq 48(%rsp), %r9 + movq %r15, %rdx + xorq %rax, %rcx + xorq %r8, %rdx + rorxq $41, %r14, %rax + xorq %rcx, %rax + andq %r14, %rdx + addq %rax, %r9 + rorxq $28, %r10, %rax + rorxq $34, %r10, %rcx + xorq %r8, %rdx + xorq %rax, %rcx + rorxq $39, %r10, %rax + addq %rdx, %r9 + xorq %rcx, %rax + movq %r11, %rdx + addq %r9, %r13 + xorq %r10, %rdx + andq %rdx, %rbx + addq %rax, %r9 + xorq %r11, %rbx + # rnd_1: 0 - 7 + rorxq $14, %r13, %rax + rorxq $18, %r13, %rcx + addq %rbx, %r9 + addq 56(%rsp), %r8 + movq %r14, %rbx + xorq %rax, %rcx + xorq %r15, %rbx + rorxq $41, %r13, %rax + xorq %rcx, %rax + andq %r13, %rbx + addq %rax, %r8 + rorxq $28, %r9, %rax + rorxq $34, %r9, %rcx + xorq %r15, %rbx + xorq %rax, %rcx + rorxq $39, %r9, %rax + addq %rbx, %r8 + xorq %rcx, %rax + movq %r10, %rbx + addq %r8, %r12 + xorq %r9, %rbx + andq %rbx, %rdx + addq %rax, %r8 + xorq %r10, %rdx + # rnd_all_2: 8-9 + # rnd_0: 0 - 7 + rorxq $14, %r12, %rax + rorxq $18, %r12, %rcx + addq %rdx, %r8 + addq 64(%rsp), %r15 + movq %r13, %rdx + xorq %rax, %rcx + xorq %r14, %rdx + rorxq $41, %r12, %rax + xorq %rcx, %rax + andq %r12, %rdx + addq %rax, %r15 + rorxq $28, %r8, %rax + rorxq $34, %r8, %rcx + xorq %r14, %rdx + xorq %rax, %rcx + rorxq $39, %r8, %rax + addq %rdx, %r15 + xorq %rcx, %rax + movq %r9, %rdx + addq %r15, %r11 + xorq %r8, %rdx + andq %rdx, %rbx + addq %rax, %r15 + xorq %r9, %rbx + # rnd_1: 0 - 7 + rorxq $14, %r11, %rax + rorxq $18, %r11, %rcx + addq %rbx, %r15 + addq 72(%rsp), %r14 + movq %r12, %rbx + xorq %rax, %rcx + xorq %r13, %rbx + rorxq $41, %r11, %rax + xorq %rcx, %rax + andq %r11, %rbx + addq %rax, %r14 + rorxq $28, %r15, %rax + rorxq $34, %r15, %rcx + xorq %r13, %rbx + xorq %rax, %rcx + rorxq $39, %r15, %rax + addq %rbx, %r14 + xorq %rcx, %rax + movq %r8, %rbx + addq %r14, %r10 + xorq %r15, %rbx + andq %rbx, %rdx + addq %rax, %r14 + xorq %r8, %rdx + # rnd_all_2: 10-11 + # rnd_0: 0 - 7 + rorxq $14, %r10, %rax + rorxq $18, %r10, %rcx + addq %rdx, %r14 + addq 80(%rsp), %r13 + movq %r11, %rdx + xorq %rax, %rcx + xorq %r12, %rdx + rorxq $41, %r10, %rax + xorq %rcx, %rax + andq %r10, %rdx + addq %rax, %r13 + rorxq $28, %r14, %rax + rorxq $34, %r14, %rcx + xorq %r12, %rdx + xorq %rax, %rcx + rorxq $39, %r14, %rax + addq %rdx, %r13 + xorq %rcx, %rax + movq %r15, %rdx + addq %r13, %r9 + xorq %r14, %rdx + andq %rdx, %rbx + addq %rax, %r13 + xorq %r15, %rbx + # rnd_1: 0 - 7 + rorxq $14, %r9, %rax + rorxq $18, %r9, %rcx + addq %rbx, %r13 + addq 88(%rsp), %r12 + movq %r10, %rbx + xorq %rax, %rcx + xorq %r11, %rbx + rorxq $41, %r9, %rax + xorq %rcx, %rax + andq %r9, %rbx + addq %rax, %r12 + rorxq $28, %r13, %rax + rorxq $34, %r13, %rcx + xorq %r11, %rbx + xorq %rax, %rcx + rorxq $39, %r13, %rax + addq %rbx, %r12 + xorq %rcx, %rax + movq %r14, %rbx + addq %r12, %r8 + xorq %r13, %rbx + andq %rbx, %rdx + addq %rax, %r12 + xorq %r14, %rdx + # rnd_all_2: 12-13 + # rnd_0: 0 - 7 + rorxq $14, %r8, %rax + rorxq $18, %r8, %rcx + addq %rdx, %r12 + addq 96(%rsp), %r11 + movq %r9, %rdx + xorq %rax, %rcx + xorq %r10, %rdx + rorxq $41, %r8, %rax + xorq %rcx, %rax + andq %r8, %rdx + addq %rax, %r11 + rorxq $28, %r12, %rax + rorxq $34, %r12, %rcx + xorq %r10, %rdx + xorq %rax, %rcx + rorxq $39, %r12, %rax + addq %rdx, %r11 + xorq %rcx, %rax + movq %r13, %rdx + addq %r11, %r15 + xorq %r12, %rdx + andq %rdx, %rbx + addq %rax, %r11 + xorq %r13, %rbx + # rnd_1: 0 - 7 + rorxq $14, %r15, %rax + rorxq $18, %r15, %rcx + addq %rbx, %r11 + addq 104(%rsp), %r10 + movq %r8, %rbx + xorq %rax, %rcx + xorq %r9, %rbx + rorxq $41, %r15, %rax + xorq %rcx, %rax + andq %r15, %rbx + addq %rax, %r10 + rorxq $28, %r11, %rax + rorxq $34, %r11, %rcx + xorq %r9, %rbx + xorq %rax, %rcx + rorxq $39, %r11, %rax + addq %rbx, %r10 + xorq %rcx, %rax + movq %r12, %rbx + addq %r10, %r14 + xorq %r11, %rbx + andq %rbx, %rdx + addq %rax, %r10 + xorq %r12, %rdx + # rnd_all_2: 14-15 + # rnd_0: 0 - 7 + rorxq $14, %r14, %rax + rorxq $18, %r14, %rcx + addq %rdx, %r10 + addq 112(%rsp), %r9 + movq %r15, %rdx + xorq %rax, %rcx + xorq %r8, %rdx + rorxq $41, %r14, %rax + xorq %rcx, %rax + andq %r14, %rdx + addq %rax, %r9 + rorxq $28, %r10, %rax + rorxq $34, %r10, %rcx + xorq %r8, %rdx + xorq %rax, %rcx + rorxq $39, %r10, %rax + addq %rdx, %r9 + xorq %rcx, %rax + movq %r11, %rdx + addq %r9, %r13 + xorq %r10, %rdx + andq %rdx, %rbx + addq %rax, %r9 + xorq %r11, %rbx + # rnd_1: 0 - 7 + rorxq $14, %r13, %rax + rorxq $18, %r13, %rcx + addq %rbx, %r9 + addq 120(%rsp), %r8 + movq %r14, %rbx + xorq %rax, %rcx + xorq %r15, %rbx + rorxq $41, %r13, %rax + xorq %rcx, %rax + andq %r13, %rbx + addq %rax, %r8 + rorxq $28, %r9, %rax + rorxq $34, %r9, %rcx + xorq %r15, %rbx + xorq %rax, %rcx + rorxq $39, %r9, %rax + addq %rbx, %r8 + xorq %rcx, %rax + movq %r10, %rbx + addq %r8, %r12 + xorq %r9, %rbx + andq %rbx, %rdx + addq %rax, %r8 + xorq %r10, %rdx + addq %rdx, %r8 + addq (%rdi), %r8 + addq 8(%rdi), %r9 + addq 16(%rdi), %r10 + addq 24(%rdi), %r11 + addq 32(%rdi), %r12 + addq 40(%rdi), %r13 + addq 48(%rdi), %r14 + addq 56(%rdi), %r15 + leaq L_avx1_rorx_sha512_k(%rip), %rcx + addq $0x80, %rsi + subl $0x80, %ebp + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq %r12, 32(%rdi) + movq %r13, 40(%rdi) + movq %r14, 48(%rdi) + movq %r15, 56(%rdi) + jnz L_sha512_len_avx1_rorx_begin + xorq %rax, %rax + vzeroupper + addq $0x90, %rsp + popq %rbp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size Transform_Sha512_AVX1_RORX_Len,.-Transform_Sha512_AVX1_RORX_Len +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX1 */ +#ifdef HAVE_INTEL_AVX2 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx2_sha512_k: +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0xfc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0x6ca6351e003826f,0x142929670a0e6e70 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0x6f067aa72176fba,0xa637dc5a2c898a6 +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx2_sha512_k_2: +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0xfc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0xfc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0x6ca6351e003826f,0x142929670a0e6e70 +.quad 0x6ca6351e003826f,0x142929670a0e6e70 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0x6f067aa72176fba,0xa637dc5a2c898a6 +.quad 0x6f067aa72176fba,0xa637dc5a2c898a6 +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 8 +#else +.p2align 3 +#endif /* __APPLE__ */ +L_avx2_sha512_k_2_end: +.quad 1024+L_avx2_sha512_k_2 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 32 +#else +.p2align 5 +#endif /* __APPLE__ */ +L_avx2_sha512_flip_mask: +.quad 0x1020304050607, 0x8090a0b0c0d0e0f +.quad 0x1020304050607, 0x8090a0b0c0d0e0f +#ifndef __APPLE__ +.text +.globl Transform_Sha512_AVX2 +.type Transform_Sha512_AVX2,@function +.align 4 +Transform_Sha512_AVX2: +#else +.section __TEXT,__text +.globl _Transform_Sha512_AVX2 +.p2align 2 +_Transform_Sha512_AVX2: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $0x88, %rsp + leaq 64(%rdi), %rax + vmovdqa L_avx2_sha512_flip_mask(%rip), %ymm15 + movq (%rdi), %r8 + movq 8(%rdi), %r9 + movq 16(%rdi), %r10 + movq 24(%rdi), %r11 + movq 32(%rdi), %r12 + movq 40(%rdi), %r13 + movq 48(%rdi), %r14 + movq 56(%rdi), %r15 + vmovdqu (%rax), %ymm0 + vmovdqu 32(%rax), %ymm1 + vpshufb %ymm15, %ymm0, %ymm0 + vpshufb %ymm15, %ymm1, %ymm1 + vmovdqu 64(%rax), %ymm2 + vmovdqu 96(%rax), %ymm3 + vpshufb %ymm15, %ymm2, %ymm2 + vpshufb %ymm15, %ymm3, %ymm3 + movl $4, 128(%rsp) + leaq L_avx2_sha512_k(%rip), %rsi + movq %r9, %rbx + movq %r12, %rax + xorq %r10, %rbx + vpaddq (%rsi), %ymm0, %ymm8 + vpaddq 32(%rsi), %ymm1, %ymm9 + vmovdqu %ymm8, (%rsp) + vmovdqu %ymm9, 32(%rsp) + vpaddq 64(%rsi), %ymm2, %ymm8 + vpaddq 96(%rsi), %ymm3, %ymm9 + vmovdqu %ymm8, 64(%rsp) + vmovdqu %ymm9, 96(%rsp) + # Start of 16 rounds +L_sha256_avx2_start: + addq $0x80, %rsi + rorq $23, %rax + vpblendd $3, %ymm1, %ymm0, %ymm12 + vpblendd $3, %ymm3, %ymm2, %ymm13 + movq %r8, %rdx + movq %r13, %rcx + addq (%rsp), %r15 + xorq %r14, %rcx + xorq %r12, %rax + andq %r12, %rcx + vpermq $57, %ymm12, %ymm12 + rorq $4, %rax + xorq %r14, %rcx + vpermq $57, %ymm13, %ymm13 + xorq %r12, %rax + addq %rcx, %r15 + rorq $14, %rax + xorq %r9, %rdx + vpsrlq $0x01, %ymm12, %ymm8 + addq %rax, %r15 + movq %r8, %rcx + vpsllq $63, %ymm12, %ymm9 + andq %rdx, %rbx + rorq $5, %rcx + vpsrlq $8, %ymm12, %ymm10 + xorq %r8, %rcx + xorq %r9, %rbx + vpsllq $56, %ymm12, %ymm11 + rorq $6, %rcx + addq %r15, %r11 + vpor %ymm9, %ymm8, %ymm8 + xorq %r8, %rcx + addq %rbx, %r15 + vpor %ymm11, %ymm10, %ymm10 + rorq $28, %rcx + movq %r11, %rax + addq %rcx, %r15 + rorq $23, %rax + vpsrlq $7, %ymm12, %ymm11 + movq %r15, %rbx + movq %r12, %rcx + vpxor %ymm10, %ymm8, %ymm8 + addq 8(%rsp), %r14 + xorq %r13, %rcx + vpxor %ymm11, %ymm8, %ymm8 + xorq %r11, %rax + andq %r11, %rcx + vpaddq %ymm0, %ymm13, %ymm0 + rorq $4, %rax + xorq %r13, %rcx + vpaddq %ymm0, %ymm8, %ymm0 + xorq %r11, %rax + addq %rcx, %r14 + vperm2I128 $0x81, %ymm3, %ymm3, %ymm14 + rorq $14, %rax + xorq %r8, %rbx + addq %rax, %r14 + movq %r15, %rcx + andq %rbx, %rdx + rorq $5, %rcx + vpsrlq $19, %ymm14, %ymm8 + xorq %r15, %rcx + xorq %r8, %rdx + vpsllq $45, %ymm14, %ymm9 + rorq $6, %rcx + addq %r14, %r10 + vpsrlq $61, %ymm14, %ymm10 + xorq %r15, %rcx + addq %rdx, %r14 + vpsllq $3, %ymm14, %ymm11 + rorq $28, %rcx + movq %r10, %rax + addq %rcx, %r14 + rorq $23, %rax + vpor %ymm9, %ymm8, %ymm8 + movq %r14, %rdx + movq %r11, %rcx + addq 16(%rsp), %r13 + xorq %r12, %rcx + vpor %ymm11, %ymm10, %ymm10 + xorq %r10, %rax + andq %r10, %rcx + vpxor %ymm10, %ymm8, %ymm8 + rorq $4, %rax + xorq %r12, %rcx + vpsrlq $6, %ymm14, %ymm11 + xorq %r10, %rax + addq %rcx, %r13 + vpxor %ymm11, %ymm8, %ymm8 + rorq $14, %rax + xorq %r15, %rdx + vpaddq %ymm0, %ymm8, %ymm0 + addq %rax, %r13 + movq %r14, %rcx + andq %rdx, %rbx + rorq $5, %rcx + vperm2I128 $8, %ymm0, %ymm0, %ymm14 + xorq %r14, %rcx + xorq %r15, %rbx + rorq $6, %rcx + addq %r13, %r9 + vpsrlq $19, %ymm14, %ymm8 + xorq %r14, %rcx + addq %rbx, %r13 + vpsllq $45, %ymm14, %ymm9 + rorq $28, %rcx + movq %r9, %rax + addq %rcx, %r13 + rorq $23, %rax + vpsrlq $61, %ymm14, %ymm10 + movq %r13, %rbx + movq %r10, %rcx + addq 24(%rsp), %r12 + xorq %r11, %rcx + vpsllq $3, %ymm14, %ymm11 + xorq %r9, %rax + andq %r9, %rcx + vpor %ymm9, %ymm8, %ymm8 + rorq $4, %rax + xorq %r11, %rcx + vpor %ymm11, %ymm10, %ymm10 + xorq %r9, %rax + addq %rcx, %r12 + vpxor %ymm10, %ymm8, %ymm8 + rorq $14, %rax + xorq %r14, %rbx + vpsrlq $6, %ymm14, %ymm11 + addq %rax, %r12 + movq %r13, %rcx + vpxor %ymm11, %ymm8, %ymm8 + andq %rbx, %rdx + rorq $5, %rcx + vpaddq %ymm0, %ymm8, %ymm0 + xorq %r13, %rcx + xorq %r14, %rdx + rorq $6, %rcx + addq %r12, %r8 + xorq %r13, %rcx + addq %rdx, %r12 + rorq $28, %rcx + movq %r8, %rax + addq %rcx, %r12 + rorq $23, %rax + vpblendd $3, %ymm2, %ymm1, %ymm12 + vpblendd $3, %ymm0, %ymm3, %ymm13 + movq %r12, %rdx + movq %r9, %rcx + addq 32(%rsp), %r11 + xorq %r10, %rcx + xorq %r8, %rax + andq %r8, %rcx + vpermq $57, %ymm12, %ymm12 + rorq $4, %rax + xorq %r10, %rcx + vpermq $57, %ymm13, %ymm13 + xorq %r8, %rax + addq %rcx, %r11 + rorq $14, %rax + xorq %r13, %rdx + vpsrlq $0x01, %ymm12, %ymm8 + addq %rax, %r11 + movq %r12, %rcx + vpsllq $63, %ymm12, %ymm9 + andq %rdx, %rbx + rorq $5, %rcx + vpsrlq $8, %ymm12, %ymm10 + xorq %r12, %rcx + xorq %r13, %rbx + vpsllq $56, %ymm12, %ymm11 + rorq $6, %rcx + addq %r11, %r15 + vpor %ymm9, %ymm8, %ymm8 + xorq %r12, %rcx + addq %rbx, %r11 + vpor %ymm11, %ymm10, %ymm10 + rorq $28, %rcx + movq %r15, %rax + addq %rcx, %r11 + rorq $23, %rax + vpsrlq $7, %ymm12, %ymm11 + movq %r11, %rbx + movq %r8, %rcx + vpxor %ymm10, %ymm8, %ymm8 + addq 40(%rsp), %r10 + xorq %r9, %rcx + vpxor %ymm11, %ymm8, %ymm8 + xorq %r15, %rax + andq %r15, %rcx + vpaddq %ymm1, %ymm13, %ymm1 + rorq $4, %rax + xorq %r9, %rcx + vpaddq %ymm1, %ymm8, %ymm1 + xorq %r15, %rax + addq %rcx, %r10 + vperm2I128 $0x81, %ymm0, %ymm0, %ymm14 + rorq $14, %rax + xorq %r12, %rbx + addq %rax, %r10 + movq %r11, %rcx + andq %rbx, %rdx + rorq $5, %rcx + vpsrlq $19, %ymm14, %ymm8 + xorq %r11, %rcx + xorq %r12, %rdx + vpsllq $45, %ymm14, %ymm9 + rorq $6, %rcx + addq %r10, %r14 + vpsrlq $61, %ymm14, %ymm10 + xorq %r11, %rcx + addq %rdx, %r10 + vpsllq $3, %ymm14, %ymm11 + rorq $28, %rcx + movq %r14, %rax + addq %rcx, %r10 + rorq $23, %rax + vpor %ymm9, %ymm8, %ymm8 + movq %r10, %rdx + movq %r15, %rcx + addq 48(%rsp), %r9 + xorq %r8, %rcx + vpor %ymm11, %ymm10, %ymm10 + xorq %r14, %rax + andq %r14, %rcx + vpxor %ymm10, %ymm8, %ymm8 + rorq $4, %rax + xorq %r8, %rcx + vpsrlq $6, %ymm14, %ymm11 + xorq %r14, %rax + addq %rcx, %r9 + vpxor %ymm11, %ymm8, %ymm8 + rorq $14, %rax + xorq %r11, %rdx + vpaddq %ymm1, %ymm8, %ymm1 + addq %rax, %r9 + movq %r10, %rcx + andq %rdx, %rbx + rorq $5, %rcx + vperm2I128 $8, %ymm1, %ymm1, %ymm14 + xorq %r10, %rcx + xorq %r11, %rbx + rorq $6, %rcx + addq %r9, %r13 + vpsrlq $19, %ymm14, %ymm8 + xorq %r10, %rcx + addq %rbx, %r9 + vpsllq $45, %ymm14, %ymm9 + rorq $28, %rcx + movq %r13, %rax + addq %rcx, %r9 + rorq $23, %rax + vpsrlq $61, %ymm14, %ymm10 + movq %r9, %rbx + movq %r14, %rcx + addq 56(%rsp), %r8 + xorq %r15, %rcx + vpsllq $3, %ymm14, %ymm11 + xorq %r13, %rax + andq %r13, %rcx + vpor %ymm9, %ymm8, %ymm8 + rorq $4, %rax + xorq %r15, %rcx + vpor %ymm11, %ymm10, %ymm10 + xorq %r13, %rax + addq %rcx, %r8 + vpxor %ymm10, %ymm8, %ymm8 + rorq $14, %rax + xorq %r10, %rbx + vpsrlq $6, %ymm14, %ymm11 + addq %rax, %r8 + movq %r9, %rcx + vpxor %ymm11, %ymm8, %ymm8 + andq %rbx, %rdx + rorq $5, %rcx + vpaddq %ymm1, %ymm8, %ymm1 + xorq %r9, %rcx + xorq %r10, %rdx + rorq $6, %rcx + addq %r8, %r12 + xorq %r9, %rcx + addq %rdx, %r8 + rorq $28, %rcx + movq %r12, %rax + addq %rcx, %r8 + rorq $23, %rax + vpblendd $3, %ymm3, %ymm2, %ymm12 + vpblendd $3, %ymm1, %ymm0, %ymm13 + movq %r8, %rdx + movq %r13, %rcx + addq 64(%rsp), %r15 + xorq %r14, %rcx + xorq %r12, %rax + andq %r12, %rcx + vpermq $57, %ymm12, %ymm12 + rorq $4, %rax + xorq %r14, %rcx + vpermq $57, %ymm13, %ymm13 + xorq %r12, %rax + addq %rcx, %r15 + rorq $14, %rax + xorq %r9, %rdx + vpsrlq $0x01, %ymm12, %ymm8 + addq %rax, %r15 + movq %r8, %rcx + vpsllq $63, %ymm12, %ymm9 + andq %rdx, %rbx + rorq $5, %rcx + vpsrlq $8, %ymm12, %ymm10 + xorq %r8, %rcx + xorq %r9, %rbx + vpsllq $56, %ymm12, %ymm11 + rorq $6, %rcx + addq %r15, %r11 + vpor %ymm9, %ymm8, %ymm8 + xorq %r8, %rcx + addq %rbx, %r15 + vpor %ymm11, %ymm10, %ymm10 + rorq $28, %rcx + movq %r11, %rax + addq %rcx, %r15 + rorq $23, %rax + vpsrlq $7, %ymm12, %ymm11 + movq %r15, %rbx + movq %r12, %rcx + vpxor %ymm10, %ymm8, %ymm8 + addq 72(%rsp), %r14 + xorq %r13, %rcx + vpxor %ymm11, %ymm8, %ymm8 + xorq %r11, %rax + andq %r11, %rcx + vpaddq %ymm2, %ymm13, %ymm2 + rorq $4, %rax + xorq %r13, %rcx + vpaddq %ymm2, %ymm8, %ymm2 + xorq %r11, %rax + addq %rcx, %r14 + vperm2I128 $0x81, %ymm1, %ymm1, %ymm14 + rorq $14, %rax + xorq %r8, %rbx + addq %rax, %r14 + movq %r15, %rcx + andq %rbx, %rdx + rorq $5, %rcx + vpsrlq $19, %ymm14, %ymm8 + xorq %r15, %rcx + xorq %r8, %rdx + vpsllq $45, %ymm14, %ymm9 + rorq $6, %rcx + addq %r14, %r10 + vpsrlq $61, %ymm14, %ymm10 + xorq %r15, %rcx + addq %rdx, %r14 + vpsllq $3, %ymm14, %ymm11 + rorq $28, %rcx + movq %r10, %rax + addq %rcx, %r14 + rorq $23, %rax + vpor %ymm9, %ymm8, %ymm8 + movq %r14, %rdx + movq %r11, %rcx + addq 80(%rsp), %r13 + xorq %r12, %rcx + vpor %ymm11, %ymm10, %ymm10 + xorq %r10, %rax + andq %r10, %rcx + vpxor %ymm10, %ymm8, %ymm8 + rorq $4, %rax + xorq %r12, %rcx + vpsrlq $6, %ymm14, %ymm11 + xorq %r10, %rax + addq %rcx, %r13 + vpxor %ymm11, %ymm8, %ymm8 + rorq $14, %rax + xorq %r15, %rdx + vpaddq %ymm2, %ymm8, %ymm2 + addq %rax, %r13 + movq %r14, %rcx + andq %rdx, %rbx + rorq $5, %rcx + vperm2I128 $8, %ymm2, %ymm2, %ymm14 + xorq %r14, %rcx + xorq %r15, %rbx + rorq $6, %rcx + addq %r13, %r9 + vpsrlq $19, %ymm14, %ymm8 + xorq %r14, %rcx + addq %rbx, %r13 + vpsllq $45, %ymm14, %ymm9 + rorq $28, %rcx + movq %r9, %rax + addq %rcx, %r13 + rorq $23, %rax + vpsrlq $61, %ymm14, %ymm10 + movq %r13, %rbx + movq %r10, %rcx + addq 88(%rsp), %r12 + xorq %r11, %rcx + vpsllq $3, %ymm14, %ymm11 + xorq %r9, %rax + andq %r9, %rcx + vpor %ymm9, %ymm8, %ymm8 + rorq $4, %rax + xorq %r11, %rcx + vpor %ymm11, %ymm10, %ymm10 + xorq %r9, %rax + addq %rcx, %r12 + vpxor %ymm10, %ymm8, %ymm8 + rorq $14, %rax + xorq %r14, %rbx + vpsrlq $6, %ymm14, %ymm11 + addq %rax, %r12 + movq %r13, %rcx + vpxor %ymm11, %ymm8, %ymm8 + andq %rbx, %rdx + rorq $5, %rcx + vpaddq %ymm2, %ymm8, %ymm2 + xorq %r13, %rcx + xorq %r14, %rdx + rorq $6, %rcx + addq %r12, %r8 + xorq %r13, %rcx + addq %rdx, %r12 + rorq $28, %rcx + movq %r8, %rax + addq %rcx, %r12 + rorq $23, %rax + vpblendd $3, %ymm0, %ymm3, %ymm12 + vpblendd $3, %ymm2, %ymm1, %ymm13 + movq %r12, %rdx + movq %r9, %rcx + addq 96(%rsp), %r11 + xorq %r10, %rcx + xorq %r8, %rax + andq %r8, %rcx + vpermq $57, %ymm12, %ymm12 + rorq $4, %rax + xorq %r10, %rcx + vpermq $57, %ymm13, %ymm13 + xorq %r8, %rax + addq %rcx, %r11 + rorq $14, %rax + xorq %r13, %rdx + vpsrlq $0x01, %ymm12, %ymm8 + addq %rax, %r11 + movq %r12, %rcx + vpsllq $63, %ymm12, %ymm9 + andq %rdx, %rbx + rorq $5, %rcx + vpsrlq $8, %ymm12, %ymm10 + xorq %r12, %rcx + xorq %r13, %rbx + vpsllq $56, %ymm12, %ymm11 + rorq $6, %rcx + addq %r11, %r15 + vpor %ymm9, %ymm8, %ymm8 + xorq %r12, %rcx + addq %rbx, %r11 + vpor %ymm11, %ymm10, %ymm10 + rorq $28, %rcx + movq %r15, %rax + addq %rcx, %r11 + rorq $23, %rax + vpsrlq $7, %ymm12, %ymm11 + movq %r11, %rbx + movq %r8, %rcx + vpxor %ymm10, %ymm8, %ymm8 + addq 104(%rsp), %r10 + xorq %r9, %rcx + vpxor %ymm11, %ymm8, %ymm8 + xorq %r15, %rax + andq %r15, %rcx + vpaddq %ymm3, %ymm13, %ymm3 + rorq $4, %rax + xorq %r9, %rcx + vpaddq %ymm3, %ymm8, %ymm3 + xorq %r15, %rax + addq %rcx, %r10 + vperm2I128 $0x81, %ymm2, %ymm2, %ymm14 + rorq $14, %rax + xorq %r12, %rbx + addq %rax, %r10 + movq %r11, %rcx + andq %rbx, %rdx + rorq $5, %rcx + vpsrlq $19, %ymm14, %ymm8 + xorq %r11, %rcx + xorq %r12, %rdx + vpsllq $45, %ymm14, %ymm9 + rorq $6, %rcx + addq %r10, %r14 + vpsrlq $61, %ymm14, %ymm10 + xorq %r11, %rcx + addq %rdx, %r10 + vpsllq $3, %ymm14, %ymm11 + rorq $28, %rcx + movq %r14, %rax + addq %rcx, %r10 + rorq $23, %rax + vpor %ymm9, %ymm8, %ymm8 + movq %r10, %rdx + movq %r15, %rcx + addq 112(%rsp), %r9 + xorq %r8, %rcx + vpor %ymm11, %ymm10, %ymm10 + xorq %r14, %rax + andq %r14, %rcx + vpxor %ymm10, %ymm8, %ymm8 + rorq $4, %rax + xorq %r8, %rcx + vpsrlq $6, %ymm14, %ymm11 + xorq %r14, %rax + addq %rcx, %r9 + vpxor %ymm11, %ymm8, %ymm8 + rorq $14, %rax + xorq %r11, %rdx + vpaddq %ymm3, %ymm8, %ymm3 + addq %rax, %r9 + movq %r10, %rcx + andq %rdx, %rbx + rorq $5, %rcx + vperm2I128 $8, %ymm3, %ymm3, %ymm14 + xorq %r10, %rcx + xorq %r11, %rbx + rorq $6, %rcx + addq %r9, %r13 + vpsrlq $19, %ymm14, %ymm8 + xorq %r10, %rcx + addq %rbx, %r9 + vpsllq $45, %ymm14, %ymm9 + rorq $28, %rcx + movq %r13, %rax + addq %rcx, %r9 + rorq $23, %rax + vpsrlq $61, %ymm14, %ymm10 + movq %r9, %rbx + movq %r14, %rcx + addq 120(%rsp), %r8 + xorq %r15, %rcx + vpsllq $3, %ymm14, %ymm11 + xorq %r13, %rax + andq %r13, %rcx + vpor %ymm9, %ymm8, %ymm8 + rorq $4, %rax + xorq %r15, %rcx + vpor %ymm11, %ymm10, %ymm10 + xorq %r13, %rax + addq %rcx, %r8 + vpxor %ymm10, %ymm8, %ymm8 + rorq $14, %rax + xorq %r10, %rbx + vpsrlq $6, %ymm14, %ymm11 + addq %rax, %r8 + movq %r9, %rcx + vpxor %ymm11, %ymm8, %ymm8 + andq %rbx, %rdx + rorq $5, %rcx + vpaddq %ymm3, %ymm8, %ymm3 + xorq %r9, %rcx + xorq %r10, %rdx + rorq $6, %rcx + addq %r8, %r12 + xorq %r9, %rcx + addq %rdx, %r8 + rorq $28, %rcx + movq %r12, %rax + addq %rcx, %r8 + vpaddq (%rsi), %ymm0, %ymm8 + vpaddq 32(%rsi), %ymm1, %ymm9 + vmovdqu %ymm8, (%rsp) + vmovdqu %ymm9, 32(%rsp) + vpaddq 64(%rsi), %ymm2, %ymm8 + vpaddq 96(%rsi), %ymm3, %ymm9 + vmovdqu %ymm8, 64(%rsp) + vmovdqu %ymm9, 96(%rsp) + subl $0x01, 128(%rsp) + jne L_sha256_avx2_start + rorq $23, %rax + movq %r8, %rdx + movq %r13, %rcx + addq (%rsp), %r15 + xorq %r14, %rcx + xorq %r12, %rax + andq %r12, %rcx + rorq $4, %rax + xorq %r14, %rcx + xorq %r12, %rax + addq %rcx, %r15 + rorq $14, %rax + xorq %r9, %rdx + addq %rax, %r15 + movq %r8, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r8, %rcx + xorq %r9, %rbx + rorq $6, %rcx + addq %r15, %r11 + xorq %r8, %rcx + addq %rbx, %r15 + rorq $28, %rcx + movq %r11, %rax + addq %rcx, %r15 + rorq $23, %rax + movq %r15, %rbx + movq %r12, %rcx + addq 8(%rsp), %r14 + xorq %r13, %rcx + xorq %r11, %rax + andq %r11, %rcx + rorq $4, %rax + xorq %r13, %rcx + xorq %r11, %rax + addq %rcx, %r14 + rorq $14, %rax + xorq %r8, %rbx + addq %rax, %r14 + movq %r15, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r15, %rcx + xorq %r8, %rdx + rorq $6, %rcx + addq %r14, %r10 + xorq %r15, %rcx + addq %rdx, %r14 + rorq $28, %rcx + movq %r10, %rax + addq %rcx, %r14 + rorq $23, %rax + movq %r14, %rdx + movq %r11, %rcx + addq 16(%rsp), %r13 + xorq %r12, %rcx + xorq %r10, %rax + andq %r10, %rcx + rorq $4, %rax + xorq %r12, %rcx + xorq %r10, %rax + addq %rcx, %r13 + rorq $14, %rax + xorq %r15, %rdx + addq %rax, %r13 + movq %r14, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r14, %rcx + xorq %r15, %rbx + rorq $6, %rcx + addq %r13, %r9 + xorq %r14, %rcx + addq %rbx, %r13 + rorq $28, %rcx + movq %r9, %rax + addq %rcx, %r13 + rorq $23, %rax + movq %r13, %rbx + movq %r10, %rcx + addq 24(%rsp), %r12 + xorq %r11, %rcx + xorq %r9, %rax + andq %r9, %rcx + rorq $4, %rax + xorq %r11, %rcx + xorq %r9, %rax + addq %rcx, %r12 + rorq $14, %rax + xorq %r14, %rbx + addq %rax, %r12 + movq %r13, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r13, %rcx + xorq %r14, %rdx + rorq $6, %rcx + addq %r12, %r8 + xorq %r13, %rcx + addq %rdx, %r12 + rorq $28, %rcx + movq %r8, %rax + addq %rcx, %r12 + rorq $23, %rax + movq %r12, %rdx + movq %r9, %rcx + addq 32(%rsp), %r11 + xorq %r10, %rcx + xorq %r8, %rax + andq %r8, %rcx + rorq $4, %rax + xorq %r10, %rcx + xorq %r8, %rax + addq %rcx, %r11 + rorq $14, %rax + xorq %r13, %rdx + addq %rax, %r11 + movq %r12, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r12, %rcx + xorq %r13, %rbx + rorq $6, %rcx + addq %r11, %r15 + xorq %r12, %rcx + addq %rbx, %r11 + rorq $28, %rcx + movq %r15, %rax + addq %rcx, %r11 + rorq $23, %rax + movq %r11, %rbx + movq %r8, %rcx + addq 40(%rsp), %r10 + xorq %r9, %rcx + xorq %r15, %rax + andq %r15, %rcx + rorq $4, %rax + xorq %r9, %rcx + xorq %r15, %rax + addq %rcx, %r10 + rorq $14, %rax + xorq %r12, %rbx + addq %rax, %r10 + movq %r11, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r11, %rcx + xorq %r12, %rdx + rorq $6, %rcx + addq %r10, %r14 + xorq %r11, %rcx + addq %rdx, %r10 + rorq $28, %rcx + movq %r14, %rax + addq %rcx, %r10 + rorq $23, %rax + movq %r10, %rdx + movq %r15, %rcx + addq 48(%rsp), %r9 + xorq %r8, %rcx + xorq %r14, %rax + andq %r14, %rcx + rorq $4, %rax + xorq %r8, %rcx + xorq %r14, %rax + addq %rcx, %r9 + rorq $14, %rax + xorq %r11, %rdx + addq %rax, %r9 + movq %r10, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r10, %rcx + xorq %r11, %rbx + rorq $6, %rcx + addq %r9, %r13 + xorq %r10, %rcx + addq %rbx, %r9 + rorq $28, %rcx + movq %r13, %rax + addq %rcx, %r9 + rorq $23, %rax + movq %r9, %rbx + movq %r14, %rcx + addq 56(%rsp), %r8 + xorq %r15, %rcx + xorq %r13, %rax + andq %r13, %rcx + rorq $4, %rax + xorq %r15, %rcx + xorq %r13, %rax + addq %rcx, %r8 + rorq $14, %rax + xorq %r10, %rbx + addq %rax, %r8 + movq %r9, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r9, %rcx + xorq %r10, %rdx + rorq $6, %rcx + addq %r8, %r12 + xorq %r9, %rcx + addq %rdx, %r8 + rorq $28, %rcx + movq %r12, %rax + addq %rcx, %r8 + rorq $23, %rax + movq %r8, %rdx + movq %r13, %rcx + addq 64(%rsp), %r15 + xorq %r14, %rcx + xorq %r12, %rax + andq %r12, %rcx + rorq $4, %rax + xorq %r14, %rcx + xorq %r12, %rax + addq %rcx, %r15 + rorq $14, %rax + xorq %r9, %rdx + addq %rax, %r15 + movq %r8, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r8, %rcx + xorq %r9, %rbx + rorq $6, %rcx + addq %r15, %r11 + xorq %r8, %rcx + addq %rbx, %r15 + rorq $28, %rcx + movq %r11, %rax + addq %rcx, %r15 + rorq $23, %rax + movq %r15, %rbx + movq %r12, %rcx + addq 72(%rsp), %r14 + xorq %r13, %rcx + xorq %r11, %rax + andq %r11, %rcx + rorq $4, %rax + xorq %r13, %rcx + xorq %r11, %rax + addq %rcx, %r14 + rorq $14, %rax + xorq %r8, %rbx + addq %rax, %r14 + movq %r15, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r15, %rcx + xorq %r8, %rdx + rorq $6, %rcx + addq %r14, %r10 + xorq %r15, %rcx + addq %rdx, %r14 + rorq $28, %rcx + movq %r10, %rax + addq %rcx, %r14 + rorq $23, %rax + movq %r14, %rdx + movq %r11, %rcx + addq 80(%rsp), %r13 + xorq %r12, %rcx + xorq %r10, %rax + andq %r10, %rcx + rorq $4, %rax + xorq %r12, %rcx + xorq %r10, %rax + addq %rcx, %r13 + rorq $14, %rax + xorq %r15, %rdx + addq %rax, %r13 + movq %r14, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r14, %rcx + xorq %r15, %rbx + rorq $6, %rcx + addq %r13, %r9 + xorq %r14, %rcx + addq %rbx, %r13 + rorq $28, %rcx + movq %r9, %rax + addq %rcx, %r13 + rorq $23, %rax + movq %r13, %rbx + movq %r10, %rcx + addq 88(%rsp), %r12 + xorq %r11, %rcx + xorq %r9, %rax + andq %r9, %rcx + rorq $4, %rax + xorq %r11, %rcx + xorq %r9, %rax + addq %rcx, %r12 + rorq $14, %rax + xorq %r14, %rbx + addq %rax, %r12 + movq %r13, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r13, %rcx + xorq %r14, %rdx + rorq $6, %rcx + addq %r12, %r8 + xorq %r13, %rcx + addq %rdx, %r12 + rorq $28, %rcx + movq %r8, %rax + addq %rcx, %r12 + rorq $23, %rax + movq %r12, %rdx + movq %r9, %rcx + addq 96(%rsp), %r11 + xorq %r10, %rcx + xorq %r8, %rax + andq %r8, %rcx + rorq $4, %rax + xorq %r10, %rcx + xorq %r8, %rax + addq %rcx, %r11 + rorq $14, %rax + xorq %r13, %rdx + addq %rax, %r11 + movq %r12, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r12, %rcx + xorq %r13, %rbx + rorq $6, %rcx + addq %r11, %r15 + xorq %r12, %rcx + addq %rbx, %r11 + rorq $28, %rcx + movq %r15, %rax + addq %rcx, %r11 + rorq $23, %rax + movq %r11, %rbx + movq %r8, %rcx + addq 104(%rsp), %r10 + xorq %r9, %rcx + xorq %r15, %rax + andq %r15, %rcx + rorq $4, %rax + xorq %r9, %rcx + xorq %r15, %rax + addq %rcx, %r10 + rorq $14, %rax + xorq %r12, %rbx + addq %rax, %r10 + movq %r11, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r11, %rcx + xorq %r12, %rdx + rorq $6, %rcx + addq %r10, %r14 + xorq %r11, %rcx + addq %rdx, %r10 + rorq $28, %rcx + movq %r14, %rax + addq %rcx, %r10 + rorq $23, %rax + movq %r10, %rdx + movq %r15, %rcx + addq 112(%rsp), %r9 + xorq %r8, %rcx + xorq %r14, %rax + andq %r14, %rcx + rorq $4, %rax + xorq %r8, %rcx + xorq %r14, %rax + addq %rcx, %r9 + rorq $14, %rax + xorq %r11, %rdx + addq %rax, %r9 + movq %r10, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r10, %rcx + xorq %r11, %rbx + rorq $6, %rcx + addq %r9, %r13 + xorq %r10, %rcx + addq %rbx, %r9 + rorq $28, %rcx + movq %r13, %rax + addq %rcx, %r9 + rorq $23, %rax + movq %r9, %rbx + movq %r14, %rcx + addq 120(%rsp), %r8 + xorq %r15, %rcx + xorq %r13, %rax + andq %r13, %rcx + rorq $4, %rax + xorq %r15, %rcx + xorq %r13, %rax + addq %rcx, %r8 + rorq $14, %rax + xorq %r10, %rbx + addq %rax, %r8 + movq %r9, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r9, %rcx + xorq %r10, %rdx + rorq $6, %rcx + addq %r8, %r12 + xorq %r9, %rcx + addq %rdx, %r8 + rorq $28, %rcx + movq %r12, %rax + addq %rcx, %r8 + addq %r8, (%rdi) + addq %r9, 8(%rdi) + addq %r10, 16(%rdi) + addq %r11, 24(%rdi) + addq %r12, 32(%rdi) + addq %r13, 40(%rdi) + addq %r14, 48(%rdi) + addq %r15, 56(%rdi) + xorq %rax, %rax + vzeroupper + addq $0x88, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size Transform_Sha512_AVX2,.-Transform_Sha512_AVX2 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl Transform_Sha512_AVX2_Len +.type Transform_Sha512_AVX2_Len,@function +.align 4 +Transform_Sha512_AVX2_Len: +#else +.section __TEXT,__text +.globl _Transform_Sha512_AVX2_Len +.p2align 2 +_Transform_Sha512_AVX2_Len: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq %rbp + movq %rsi, %rbp + testb $0x80, %bpl + je L_sha512_len_avx2_block + movq 224(%rdi), %rcx + vmovdqu (%rcx), %ymm0 + vmovdqu 32(%rcx), %ymm1 + vmovdqu 64(%rcx), %ymm2 + vmovdqu 96(%rcx), %ymm3 + vmovups %ymm0, 64(%rdi) + vmovups %ymm1, 96(%rdi) + vmovups %ymm2, 128(%rdi) + vmovups %ymm3, 160(%rdi) +#ifndef __APPLE__ + call Transform_Sha512_AVX2@plt +#else + call _Transform_Sha512_AVX2 +#endif /* __APPLE__ */ + addq $0x80, 224(%rdi) + subl $0x80, %ebp + jz L_sha512_len_avx2_done +L_sha512_len_avx2_block: + movq 224(%rdi), %rcx + vmovdqa L_avx2_sha512_flip_mask(%rip), %ymm15 + movq (%rdi), %r8 + movq 8(%rdi), %r9 + movq 16(%rdi), %r10 + movq 24(%rdi), %r11 + movq 32(%rdi), %r12 + movq 40(%rdi), %r13 + movq 48(%rdi), %r14 + movq 56(%rdi), %r15 + # Start of loop processing two blocks +L_sha512_len_avx2_begin: + subq $0x540, %rsp + leaq L_avx2_sha512_k_2(%rip), %rsi + movq %r9, %rbx + movq %r12, %rax + vmovdqu (%rcx), %xmm0 + vmovdqu 16(%rcx), %xmm1 + vinserti128 $0x01, 128(%rcx), %ymm0, %ymm0 + vinserti128 $0x01, 144(%rcx), %ymm1, %ymm1 + vpshufb %ymm15, %ymm0, %ymm0 + vpshufb %ymm15, %ymm1, %ymm1 + vmovdqu 32(%rcx), %xmm2 + vmovdqu 48(%rcx), %xmm3 + vinserti128 $0x01, 160(%rcx), %ymm2, %ymm2 + vinserti128 $0x01, 176(%rcx), %ymm3, %ymm3 + vpshufb %ymm15, %ymm2, %ymm2 + vpshufb %ymm15, %ymm3, %ymm3 + vmovdqu 64(%rcx), %xmm4 + vmovdqu 80(%rcx), %xmm5 + vinserti128 $0x01, 192(%rcx), %ymm4, %ymm4 + vinserti128 $0x01, 208(%rcx), %ymm5, %ymm5 + vpshufb %ymm15, %ymm4, %ymm4 + vpshufb %ymm15, %ymm5, %ymm5 + vmovdqu 96(%rcx), %xmm6 + vmovdqu 112(%rcx), %xmm7 + vinserti128 $0x01, 224(%rcx), %ymm6, %ymm6 + vinserti128 $0x01, 240(%rcx), %ymm7, %ymm7 + vpshufb %ymm15, %ymm6, %ymm6 + vpshufb %ymm15, %ymm7, %ymm7 + xorq %r10, %rbx + # Start of 16 rounds +L_sha512_len_avx2_start: + vpaddq (%rsi), %ymm0, %ymm8 + vpaddq 32(%rsi), %ymm1, %ymm9 + vmovdqu %ymm8, (%rsp) + vmovdqu %ymm9, 32(%rsp) + vpaddq 64(%rsi), %ymm2, %ymm8 + vpaddq 96(%rsi), %ymm3, %ymm9 + vmovdqu %ymm8, 64(%rsp) + vmovdqu %ymm9, 96(%rsp) + vpaddq 128(%rsi), %ymm4, %ymm8 + vpaddq 160(%rsi), %ymm5, %ymm9 + vmovdqu %ymm8, 128(%rsp) + vmovdqu %ymm9, 160(%rsp) + vpaddq 192(%rsi), %ymm6, %ymm8 + vpaddq 224(%rsi), %ymm7, %ymm9 + vmovdqu %ymm8, 192(%rsp) + vmovdqu %ymm9, 224(%rsp) + # msg_sched: 0-1 + rorq $23, %rax + vpalignr $8, %ymm0, %ymm1, %ymm12 + vpalignr $8, %ymm4, %ymm5, %ymm13 + movq %r8, %rdx + movq %r13, %rcx + addq (%rsp), %r15 + xorq %r14, %rcx + vpsrlq $0x01, %ymm12, %ymm8 + vpsllq $63, %ymm12, %ymm9 + xorq %r12, %rax + andq %r12, %rcx + rorq $4, %rax + xorq %r14, %rcx + vpsrlq $8, %ymm12, %ymm10 + vpsllq $56, %ymm12, %ymm11 + xorq %r12, %rax + addq %rcx, %r15 + rorq $14, %rax + xorq %r9, %rdx + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + addq %rax, %r15 + movq %r8, %rcx + andq %rdx, %rbx + rorq $5, %rcx + vpsrlq $7, %ymm12, %ymm11 + vpxor %ymm10, %ymm8, %ymm8 + xorq %r8, %rcx + xorq %r9, %rbx + rorq $6, %rcx + addq %r15, %r11 + vpxor %ymm11, %ymm8, %ymm8 + vpaddq %ymm0, %ymm13, %ymm0 + xorq %r8, %rcx + addq %rbx, %r15 + rorq $28, %rcx + movq %r11, %rax + addq %rcx, %r15 + rorq $23, %rax + vpaddq %ymm0, %ymm8, %ymm0 + movq %r15, %rbx + movq %r12, %rcx + addq 8(%rsp), %r14 + xorq %r13, %rcx + vpsrlq $19, %ymm7, %ymm8 + vpsllq $45, %ymm7, %ymm9 + xorq %r11, %rax + andq %r11, %rcx + rorq $4, %rax + xorq %r13, %rcx + vpsrlq $61, %ymm7, %ymm10 + vpsllq $3, %ymm7, %ymm11 + xorq %r11, %rax + addq %rcx, %r14 + rorq $14, %rax + xorq %r8, %rbx + addq %rax, %r14 + movq %r15, %rcx + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + andq %rbx, %rdx + rorq $5, %rcx + xorq %r15, %rcx + xorq %r8, %rdx + vpxor %ymm10, %ymm8, %ymm8 + vpsrlq $6, %ymm7, %ymm11 + rorq $6, %rcx + addq %r14, %r10 + xorq %r15, %rcx + addq %rdx, %r14 + vpxor %ymm11, %ymm8, %ymm8 + rorq $28, %rcx + movq %r10, %rax + addq %rcx, %r14 + vpaddq %ymm0, %ymm8, %ymm0 + # msg_sched done: 0-3 + # msg_sched: 4-5 + rorq $23, %rax + vpalignr $8, %ymm1, %ymm2, %ymm12 + vpalignr $8, %ymm5, %ymm6, %ymm13 + movq %r14, %rdx + movq %r11, %rcx + addq 32(%rsp), %r13 + xorq %r12, %rcx + vpsrlq $0x01, %ymm12, %ymm8 + vpsllq $63, %ymm12, %ymm9 + xorq %r10, %rax + andq %r10, %rcx + rorq $4, %rax + xorq %r12, %rcx + vpsrlq $8, %ymm12, %ymm10 + vpsllq $56, %ymm12, %ymm11 + xorq %r10, %rax + addq %rcx, %r13 + rorq $14, %rax + xorq %r15, %rdx + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + addq %rax, %r13 + movq %r14, %rcx + andq %rdx, %rbx + rorq $5, %rcx + vpsrlq $7, %ymm12, %ymm11 + vpxor %ymm10, %ymm8, %ymm8 + xorq %r14, %rcx + xorq %r15, %rbx + rorq $6, %rcx + addq %r13, %r9 + vpxor %ymm11, %ymm8, %ymm8 + vpaddq %ymm1, %ymm13, %ymm1 + xorq %r14, %rcx + addq %rbx, %r13 + rorq $28, %rcx + movq %r9, %rax + addq %rcx, %r13 + rorq $23, %rax + vpaddq %ymm1, %ymm8, %ymm1 + movq %r13, %rbx + movq %r10, %rcx + addq 40(%rsp), %r12 + xorq %r11, %rcx + vpsrlq $19, %ymm0, %ymm8 + vpsllq $45, %ymm0, %ymm9 + xorq %r9, %rax + andq %r9, %rcx + rorq $4, %rax + xorq %r11, %rcx + vpsrlq $61, %ymm0, %ymm10 + vpsllq $3, %ymm0, %ymm11 + xorq %r9, %rax + addq %rcx, %r12 + rorq $14, %rax + xorq %r14, %rbx + addq %rax, %r12 + movq %r13, %rcx + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + andq %rbx, %rdx + rorq $5, %rcx + xorq %r13, %rcx + xorq %r14, %rdx + vpxor %ymm10, %ymm8, %ymm8 + vpsrlq $6, %ymm0, %ymm11 + rorq $6, %rcx + addq %r12, %r8 + xorq %r13, %rcx + addq %rdx, %r12 + vpxor %ymm11, %ymm8, %ymm8 + rorq $28, %rcx + movq %r8, %rax + addq %rcx, %r12 + vpaddq %ymm1, %ymm8, %ymm1 + # msg_sched done: 4-7 + # msg_sched: 8-9 + rorq $23, %rax + vpalignr $8, %ymm2, %ymm3, %ymm12 + vpalignr $8, %ymm6, %ymm7, %ymm13 + movq %r12, %rdx + movq %r9, %rcx + addq 64(%rsp), %r11 + xorq %r10, %rcx + vpsrlq $0x01, %ymm12, %ymm8 + vpsllq $63, %ymm12, %ymm9 + xorq %r8, %rax + andq %r8, %rcx + rorq $4, %rax + xorq %r10, %rcx + vpsrlq $8, %ymm12, %ymm10 + vpsllq $56, %ymm12, %ymm11 + xorq %r8, %rax + addq %rcx, %r11 + rorq $14, %rax + xorq %r13, %rdx + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + addq %rax, %r11 + movq %r12, %rcx + andq %rdx, %rbx + rorq $5, %rcx + vpsrlq $7, %ymm12, %ymm11 + vpxor %ymm10, %ymm8, %ymm8 + xorq %r12, %rcx + xorq %r13, %rbx + rorq $6, %rcx + addq %r11, %r15 + vpxor %ymm11, %ymm8, %ymm8 + vpaddq %ymm2, %ymm13, %ymm2 + xorq %r12, %rcx + addq %rbx, %r11 + rorq $28, %rcx + movq %r15, %rax + addq %rcx, %r11 + rorq $23, %rax + vpaddq %ymm2, %ymm8, %ymm2 + movq %r11, %rbx + movq %r8, %rcx + addq 72(%rsp), %r10 + xorq %r9, %rcx + vpsrlq $19, %ymm1, %ymm8 + vpsllq $45, %ymm1, %ymm9 + xorq %r15, %rax + andq %r15, %rcx + rorq $4, %rax + xorq %r9, %rcx + vpsrlq $61, %ymm1, %ymm10 + vpsllq $3, %ymm1, %ymm11 + xorq %r15, %rax + addq %rcx, %r10 + rorq $14, %rax + xorq %r12, %rbx + addq %rax, %r10 + movq %r11, %rcx + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + andq %rbx, %rdx + rorq $5, %rcx + xorq %r11, %rcx + xorq %r12, %rdx + vpxor %ymm10, %ymm8, %ymm8 + vpsrlq $6, %ymm1, %ymm11 + rorq $6, %rcx + addq %r10, %r14 + xorq %r11, %rcx + addq %rdx, %r10 + vpxor %ymm11, %ymm8, %ymm8 + rorq $28, %rcx + movq %r14, %rax + addq %rcx, %r10 + vpaddq %ymm2, %ymm8, %ymm2 + # msg_sched done: 8-11 + # msg_sched: 12-13 + rorq $23, %rax + vpalignr $8, %ymm3, %ymm4, %ymm12 + vpalignr $8, %ymm7, %ymm0, %ymm13 + movq %r10, %rdx + movq %r15, %rcx + addq 96(%rsp), %r9 + xorq %r8, %rcx + vpsrlq $0x01, %ymm12, %ymm8 + vpsllq $63, %ymm12, %ymm9 + xorq %r14, %rax + andq %r14, %rcx + rorq $4, %rax + xorq %r8, %rcx + vpsrlq $8, %ymm12, %ymm10 + vpsllq $56, %ymm12, %ymm11 + xorq %r14, %rax + addq %rcx, %r9 + rorq $14, %rax + xorq %r11, %rdx + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + addq %rax, %r9 + movq %r10, %rcx + andq %rdx, %rbx + rorq $5, %rcx + vpsrlq $7, %ymm12, %ymm11 + vpxor %ymm10, %ymm8, %ymm8 + xorq %r10, %rcx + xorq %r11, %rbx + rorq $6, %rcx + addq %r9, %r13 + vpxor %ymm11, %ymm8, %ymm8 + vpaddq %ymm3, %ymm13, %ymm3 + xorq %r10, %rcx + addq %rbx, %r9 + rorq $28, %rcx + movq %r13, %rax + addq %rcx, %r9 + rorq $23, %rax + vpaddq %ymm3, %ymm8, %ymm3 + movq %r9, %rbx + movq %r14, %rcx + addq 104(%rsp), %r8 + xorq %r15, %rcx + vpsrlq $19, %ymm2, %ymm8 + vpsllq $45, %ymm2, %ymm9 + xorq %r13, %rax + andq %r13, %rcx + rorq $4, %rax + xorq %r15, %rcx + vpsrlq $61, %ymm2, %ymm10 + vpsllq $3, %ymm2, %ymm11 + xorq %r13, %rax + addq %rcx, %r8 + rorq $14, %rax + xorq %r10, %rbx + addq %rax, %r8 + movq %r9, %rcx + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + andq %rbx, %rdx + rorq $5, %rcx + xorq %r9, %rcx + xorq %r10, %rdx + vpxor %ymm10, %ymm8, %ymm8 + vpsrlq $6, %ymm2, %ymm11 + rorq $6, %rcx + addq %r8, %r12 + xorq %r9, %rcx + addq %rdx, %r8 + vpxor %ymm11, %ymm8, %ymm8 + rorq $28, %rcx + movq %r12, %rax + addq %rcx, %r8 + vpaddq %ymm3, %ymm8, %ymm3 + # msg_sched done: 12-15 + # msg_sched: 16-17 + rorq $23, %rax + vpalignr $8, %ymm4, %ymm5, %ymm12 + vpalignr $8, %ymm0, %ymm1, %ymm13 + movq %r8, %rdx + movq %r13, %rcx + addq 128(%rsp), %r15 + xorq %r14, %rcx + vpsrlq $0x01, %ymm12, %ymm8 + vpsllq $63, %ymm12, %ymm9 + xorq %r12, %rax + andq %r12, %rcx + rorq $4, %rax + xorq %r14, %rcx + vpsrlq $8, %ymm12, %ymm10 + vpsllq $56, %ymm12, %ymm11 + xorq %r12, %rax + addq %rcx, %r15 + rorq $14, %rax + xorq %r9, %rdx + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + addq %rax, %r15 + movq %r8, %rcx + andq %rdx, %rbx + rorq $5, %rcx + vpsrlq $7, %ymm12, %ymm11 + vpxor %ymm10, %ymm8, %ymm8 + xorq %r8, %rcx + xorq %r9, %rbx + rorq $6, %rcx + addq %r15, %r11 + vpxor %ymm11, %ymm8, %ymm8 + vpaddq %ymm4, %ymm13, %ymm4 + xorq %r8, %rcx + addq %rbx, %r15 + rorq $28, %rcx + movq %r11, %rax + addq %rcx, %r15 + rorq $23, %rax + vpaddq %ymm4, %ymm8, %ymm4 + movq %r15, %rbx + movq %r12, %rcx + addq 136(%rsp), %r14 + xorq %r13, %rcx + vpsrlq $19, %ymm3, %ymm8 + vpsllq $45, %ymm3, %ymm9 + xorq %r11, %rax + andq %r11, %rcx + rorq $4, %rax + xorq %r13, %rcx + vpsrlq $61, %ymm3, %ymm10 + vpsllq $3, %ymm3, %ymm11 + xorq %r11, %rax + addq %rcx, %r14 + rorq $14, %rax + xorq %r8, %rbx + addq %rax, %r14 + movq %r15, %rcx + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + andq %rbx, %rdx + rorq $5, %rcx + xorq %r15, %rcx + xorq %r8, %rdx + vpxor %ymm10, %ymm8, %ymm8 + vpsrlq $6, %ymm3, %ymm11 + rorq $6, %rcx + addq %r14, %r10 + xorq %r15, %rcx + addq %rdx, %r14 + vpxor %ymm11, %ymm8, %ymm8 + rorq $28, %rcx + movq %r10, %rax + addq %rcx, %r14 + vpaddq %ymm4, %ymm8, %ymm4 + # msg_sched done: 16-19 + # msg_sched: 20-21 + rorq $23, %rax + vpalignr $8, %ymm5, %ymm6, %ymm12 + vpalignr $8, %ymm1, %ymm2, %ymm13 + movq %r14, %rdx + movq %r11, %rcx + addq 160(%rsp), %r13 + xorq %r12, %rcx + vpsrlq $0x01, %ymm12, %ymm8 + vpsllq $63, %ymm12, %ymm9 + xorq %r10, %rax + andq %r10, %rcx + rorq $4, %rax + xorq %r12, %rcx + vpsrlq $8, %ymm12, %ymm10 + vpsllq $56, %ymm12, %ymm11 + xorq %r10, %rax + addq %rcx, %r13 + rorq $14, %rax + xorq %r15, %rdx + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + addq %rax, %r13 + movq %r14, %rcx + andq %rdx, %rbx + rorq $5, %rcx + vpsrlq $7, %ymm12, %ymm11 + vpxor %ymm10, %ymm8, %ymm8 + xorq %r14, %rcx + xorq %r15, %rbx + rorq $6, %rcx + addq %r13, %r9 + vpxor %ymm11, %ymm8, %ymm8 + vpaddq %ymm5, %ymm13, %ymm5 + xorq %r14, %rcx + addq %rbx, %r13 + rorq $28, %rcx + movq %r9, %rax + addq %rcx, %r13 + rorq $23, %rax + vpaddq %ymm5, %ymm8, %ymm5 + movq %r13, %rbx + movq %r10, %rcx + addq 168(%rsp), %r12 + xorq %r11, %rcx + vpsrlq $19, %ymm4, %ymm8 + vpsllq $45, %ymm4, %ymm9 + xorq %r9, %rax + andq %r9, %rcx + rorq $4, %rax + xorq %r11, %rcx + vpsrlq $61, %ymm4, %ymm10 + vpsllq $3, %ymm4, %ymm11 + xorq %r9, %rax + addq %rcx, %r12 + rorq $14, %rax + xorq %r14, %rbx + addq %rax, %r12 + movq %r13, %rcx + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + andq %rbx, %rdx + rorq $5, %rcx + xorq %r13, %rcx + xorq %r14, %rdx + vpxor %ymm10, %ymm8, %ymm8 + vpsrlq $6, %ymm4, %ymm11 + rorq $6, %rcx + addq %r12, %r8 + xorq %r13, %rcx + addq %rdx, %r12 + vpxor %ymm11, %ymm8, %ymm8 + rorq $28, %rcx + movq %r8, %rax + addq %rcx, %r12 + vpaddq %ymm5, %ymm8, %ymm5 + # msg_sched done: 20-23 + # msg_sched: 24-25 + rorq $23, %rax + vpalignr $8, %ymm6, %ymm7, %ymm12 + vpalignr $8, %ymm2, %ymm3, %ymm13 + movq %r12, %rdx + movq %r9, %rcx + addq 192(%rsp), %r11 + xorq %r10, %rcx + vpsrlq $0x01, %ymm12, %ymm8 + vpsllq $63, %ymm12, %ymm9 + xorq %r8, %rax + andq %r8, %rcx + rorq $4, %rax + xorq %r10, %rcx + vpsrlq $8, %ymm12, %ymm10 + vpsllq $56, %ymm12, %ymm11 + xorq %r8, %rax + addq %rcx, %r11 + rorq $14, %rax + xorq %r13, %rdx + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + addq %rax, %r11 + movq %r12, %rcx + andq %rdx, %rbx + rorq $5, %rcx + vpsrlq $7, %ymm12, %ymm11 + vpxor %ymm10, %ymm8, %ymm8 + xorq %r12, %rcx + xorq %r13, %rbx + rorq $6, %rcx + addq %r11, %r15 + vpxor %ymm11, %ymm8, %ymm8 + vpaddq %ymm6, %ymm13, %ymm6 + xorq %r12, %rcx + addq %rbx, %r11 + rorq $28, %rcx + movq %r15, %rax + addq %rcx, %r11 + rorq $23, %rax + vpaddq %ymm6, %ymm8, %ymm6 + movq %r11, %rbx + movq %r8, %rcx + addq 200(%rsp), %r10 + xorq %r9, %rcx + vpsrlq $19, %ymm5, %ymm8 + vpsllq $45, %ymm5, %ymm9 + xorq %r15, %rax + andq %r15, %rcx + rorq $4, %rax + xorq %r9, %rcx + vpsrlq $61, %ymm5, %ymm10 + vpsllq $3, %ymm5, %ymm11 + xorq %r15, %rax + addq %rcx, %r10 + rorq $14, %rax + xorq %r12, %rbx + addq %rax, %r10 + movq %r11, %rcx + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + andq %rbx, %rdx + rorq $5, %rcx + xorq %r11, %rcx + xorq %r12, %rdx + vpxor %ymm10, %ymm8, %ymm8 + vpsrlq $6, %ymm5, %ymm11 + rorq $6, %rcx + addq %r10, %r14 + xorq %r11, %rcx + addq %rdx, %r10 + vpxor %ymm11, %ymm8, %ymm8 + rorq $28, %rcx + movq %r14, %rax + addq %rcx, %r10 + vpaddq %ymm6, %ymm8, %ymm6 + # msg_sched done: 24-27 + # msg_sched: 28-29 + rorq $23, %rax + vpalignr $8, %ymm7, %ymm0, %ymm12 + vpalignr $8, %ymm3, %ymm4, %ymm13 + movq %r10, %rdx + movq %r15, %rcx + addq 224(%rsp), %r9 + xorq %r8, %rcx + vpsrlq $0x01, %ymm12, %ymm8 + vpsllq $63, %ymm12, %ymm9 + xorq %r14, %rax + andq %r14, %rcx + rorq $4, %rax + xorq %r8, %rcx + vpsrlq $8, %ymm12, %ymm10 + vpsllq $56, %ymm12, %ymm11 + xorq %r14, %rax + addq %rcx, %r9 + rorq $14, %rax + xorq %r11, %rdx + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + addq %rax, %r9 + movq %r10, %rcx + andq %rdx, %rbx + rorq $5, %rcx + vpsrlq $7, %ymm12, %ymm11 + vpxor %ymm10, %ymm8, %ymm8 + xorq %r10, %rcx + xorq %r11, %rbx + rorq $6, %rcx + addq %r9, %r13 + vpxor %ymm11, %ymm8, %ymm8 + vpaddq %ymm7, %ymm13, %ymm7 + xorq %r10, %rcx + addq %rbx, %r9 + rorq $28, %rcx + movq %r13, %rax + addq %rcx, %r9 + rorq $23, %rax + vpaddq %ymm7, %ymm8, %ymm7 + movq %r9, %rbx + movq %r14, %rcx + addq 232(%rsp), %r8 + xorq %r15, %rcx + vpsrlq $19, %ymm6, %ymm8 + vpsllq $45, %ymm6, %ymm9 + xorq %r13, %rax + andq %r13, %rcx + rorq $4, %rax + xorq %r15, %rcx + vpsrlq $61, %ymm6, %ymm10 + vpsllq $3, %ymm6, %ymm11 + xorq %r13, %rax + addq %rcx, %r8 + rorq $14, %rax + xorq %r10, %rbx + addq %rax, %r8 + movq %r9, %rcx + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + andq %rbx, %rdx + rorq $5, %rcx + xorq %r9, %rcx + xorq %r10, %rdx + vpxor %ymm10, %ymm8, %ymm8 + vpsrlq $6, %ymm6, %ymm11 + rorq $6, %rcx + addq %r8, %r12 + xorq %r9, %rcx + addq %rdx, %r8 + vpxor %ymm11, %ymm8, %ymm8 + rorq $28, %rcx + movq %r12, %rax + addq %rcx, %r8 + vpaddq %ymm7, %ymm8, %ymm7 + # msg_sched done: 28-31 + addq $0x100, %rsi + addq $0x100, %rsp + cmpq L_avx2_sha512_k_2_end(%rip), %rsi + jne L_sha512_len_avx2_start + vpaddq (%rsi), %ymm0, %ymm8 + vpaddq 32(%rsi), %ymm1, %ymm9 + vmovdqu %ymm8, (%rsp) + vmovdqu %ymm9, 32(%rsp) + vpaddq 64(%rsi), %ymm2, %ymm8 + vpaddq 96(%rsi), %ymm3, %ymm9 + vmovdqu %ymm8, 64(%rsp) + vmovdqu %ymm9, 96(%rsp) + vpaddq 128(%rsi), %ymm4, %ymm8 + vpaddq 160(%rsi), %ymm5, %ymm9 + vmovdqu %ymm8, 128(%rsp) + vmovdqu %ymm9, 160(%rsp) + vpaddq 192(%rsi), %ymm6, %ymm8 + vpaddq 224(%rsi), %ymm7, %ymm9 + vmovdqu %ymm8, 192(%rsp) + vmovdqu %ymm9, 224(%rsp) + rorq $23, %rax + movq %r8, %rdx + movq %r13, %rcx + addq (%rsp), %r15 + xorq %r14, %rcx + xorq %r12, %rax + andq %r12, %rcx + rorq $4, %rax + xorq %r14, %rcx + xorq %r12, %rax + addq %rcx, %r15 + rorq $14, %rax + xorq %r9, %rdx + addq %rax, %r15 + movq %r8, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r8, %rcx + xorq %r9, %rbx + rorq $6, %rcx + addq %r15, %r11 + xorq %r8, %rcx + addq %rbx, %r15 + rorq $28, %rcx + movq %r11, %rax + addq %rcx, %r15 + rorq $23, %rax + movq %r15, %rbx + movq %r12, %rcx + addq 8(%rsp), %r14 + xorq %r13, %rcx + xorq %r11, %rax + andq %r11, %rcx + rorq $4, %rax + xorq %r13, %rcx + xorq %r11, %rax + addq %rcx, %r14 + rorq $14, %rax + xorq %r8, %rbx + addq %rax, %r14 + movq %r15, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r15, %rcx + xorq %r8, %rdx + rorq $6, %rcx + addq %r14, %r10 + xorq %r15, %rcx + addq %rdx, %r14 + rorq $28, %rcx + movq %r10, %rax + addq %rcx, %r14 + rorq $23, %rax + movq %r14, %rdx + movq %r11, %rcx + addq 32(%rsp), %r13 + xorq %r12, %rcx + xorq %r10, %rax + andq %r10, %rcx + rorq $4, %rax + xorq %r12, %rcx + xorq %r10, %rax + addq %rcx, %r13 + rorq $14, %rax + xorq %r15, %rdx + addq %rax, %r13 + movq %r14, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r14, %rcx + xorq %r15, %rbx + rorq $6, %rcx + addq %r13, %r9 + xorq %r14, %rcx + addq %rbx, %r13 + rorq $28, %rcx + movq %r9, %rax + addq %rcx, %r13 + rorq $23, %rax + movq %r13, %rbx + movq %r10, %rcx + addq 40(%rsp), %r12 + xorq %r11, %rcx + xorq %r9, %rax + andq %r9, %rcx + rorq $4, %rax + xorq %r11, %rcx + xorq %r9, %rax + addq %rcx, %r12 + rorq $14, %rax + xorq %r14, %rbx + addq %rax, %r12 + movq %r13, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r13, %rcx + xorq %r14, %rdx + rorq $6, %rcx + addq %r12, %r8 + xorq %r13, %rcx + addq %rdx, %r12 + rorq $28, %rcx + movq %r8, %rax + addq %rcx, %r12 + rorq $23, %rax + movq %r12, %rdx + movq %r9, %rcx + addq 64(%rsp), %r11 + xorq %r10, %rcx + xorq %r8, %rax + andq %r8, %rcx + rorq $4, %rax + xorq %r10, %rcx + xorq %r8, %rax + addq %rcx, %r11 + rorq $14, %rax + xorq %r13, %rdx + addq %rax, %r11 + movq %r12, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r12, %rcx + xorq %r13, %rbx + rorq $6, %rcx + addq %r11, %r15 + xorq %r12, %rcx + addq %rbx, %r11 + rorq $28, %rcx + movq %r15, %rax + addq %rcx, %r11 + rorq $23, %rax + movq %r11, %rbx + movq %r8, %rcx + addq 72(%rsp), %r10 + xorq %r9, %rcx + xorq %r15, %rax + andq %r15, %rcx + rorq $4, %rax + xorq %r9, %rcx + xorq %r15, %rax + addq %rcx, %r10 + rorq $14, %rax + xorq %r12, %rbx + addq %rax, %r10 + movq %r11, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r11, %rcx + xorq %r12, %rdx + rorq $6, %rcx + addq %r10, %r14 + xorq %r11, %rcx + addq %rdx, %r10 + rorq $28, %rcx + movq %r14, %rax + addq %rcx, %r10 + rorq $23, %rax + movq %r10, %rdx + movq %r15, %rcx + addq 96(%rsp), %r9 + xorq %r8, %rcx + xorq %r14, %rax + andq %r14, %rcx + rorq $4, %rax + xorq %r8, %rcx + xorq %r14, %rax + addq %rcx, %r9 + rorq $14, %rax + xorq %r11, %rdx + addq %rax, %r9 + movq %r10, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r10, %rcx + xorq %r11, %rbx + rorq $6, %rcx + addq %r9, %r13 + xorq %r10, %rcx + addq %rbx, %r9 + rorq $28, %rcx + movq %r13, %rax + addq %rcx, %r9 + rorq $23, %rax + movq %r9, %rbx + movq %r14, %rcx + addq 104(%rsp), %r8 + xorq %r15, %rcx + xorq %r13, %rax + andq %r13, %rcx + rorq $4, %rax + xorq %r15, %rcx + xorq %r13, %rax + addq %rcx, %r8 + rorq $14, %rax + xorq %r10, %rbx + addq %rax, %r8 + movq %r9, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r9, %rcx + xorq %r10, %rdx + rorq $6, %rcx + addq %r8, %r12 + xorq %r9, %rcx + addq %rdx, %r8 + rorq $28, %rcx + movq %r12, %rax + addq %rcx, %r8 + rorq $23, %rax + movq %r8, %rdx + movq %r13, %rcx + addq 128(%rsp), %r15 + xorq %r14, %rcx + xorq %r12, %rax + andq %r12, %rcx + rorq $4, %rax + xorq %r14, %rcx + xorq %r12, %rax + addq %rcx, %r15 + rorq $14, %rax + xorq %r9, %rdx + addq %rax, %r15 + movq %r8, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r8, %rcx + xorq %r9, %rbx + rorq $6, %rcx + addq %r15, %r11 + xorq %r8, %rcx + addq %rbx, %r15 + rorq $28, %rcx + movq %r11, %rax + addq %rcx, %r15 + rorq $23, %rax + movq %r15, %rbx + movq %r12, %rcx + addq 136(%rsp), %r14 + xorq %r13, %rcx + xorq %r11, %rax + andq %r11, %rcx + rorq $4, %rax + xorq %r13, %rcx + xorq %r11, %rax + addq %rcx, %r14 + rorq $14, %rax + xorq %r8, %rbx + addq %rax, %r14 + movq %r15, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r15, %rcx + xorq %r8, %rdx + rorq $6, %rcx + addq %r14, %r10 + xorq %r15, %rcx + addq %rdx, %r14 + rorq $28, %rcx + movq %r10, %rax + addq %rcx, %r14 + rorq $23, %rax + movq %r14, %rdx + movq %r11, %rcx + addq 160(%rsp), %r13 + xorq %r12, %rcx + xorq %r10, %rax + andq %r10, %rcx + rorq $4, %rax + xorq %r12, %rcx + xorq %r10, %rax + addq %rcx, %r13 + rorq $14, %rax + xorq %r15, %rdx + addq %rax, %r13 + movq %r14, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r14, %rcx + xorq %r15, %rbx + rorq $6, %rcx + addq %r13, %r9 + xorq %r14, %rcx + addq %rbx, %r13 + rorq $28, %rcx + movq %r9, %rax + addq %rcx, %r13 + rorq $23, %rax + movq %r13, %rbx + movq %r10, %rcx + addq 168(%rsp), %r12 + xorq %r11, %rcx + xorq %r9, %rax + andq %r9, %rcx + rorq $4, %rax + xorq %r11, %rcx + xorq %r9, %rax + addq %rcx, %r12 + rorq $14, %rax + xorq %r14, %rbx + addq %rax, %r12 + movq %r13, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r13, %rcx + xorq %r14, %rdx + rorq $6, %rcx + addq %r12, %r8 + xorq %r13, %rcx + addq %rdx, %r12 + rorq $28, %rcx + movq %r8, %rax + addq %rcx, %r12 + rorq $23, %rax + movq %r12, %rdx + movq %r9, %rcx + addq 192(%rsp), %r11 + xorq %r10, %rcx + xorq %r8, %rax + andq %r8, %rcx + rorq $4, %rax + xorq %r10, %rcx + xorq %r8, %rax + addq %rcx, %r11 + rorq $14, %rax + xorq %r13, %rdx + addq %rax, %r11 + movq %r12, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r12, %rcx + xorq %r13, %rbx + rorq $6, %rcx + addq %r11, %r15 + xorq %r12, %rcx + addq %rbx, %r11 + rorq $28, %rcx + movq %r15, %rax + addq %rcx, %r11 + rorq $23, %rax + movq %r11, %rbx + movq %r8, %rcx + addq 200(%rsp), %r10 + xorq %r9, %rcx + xorq %r15, %rax + andq %r15, %rcx + rorq $4, %rax + xorq %r9, %rcx + xorq %r15, %rax + addq %rcx, %r10 + rorq $14, %rax + xorq %r12, %rbx + addq %rax, %r10 + movq %r11, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r11, %rcx + xorq %r12, %rdx + rorq $6, %rcx + addq %r10, %r14 + xorq %r11, %rcx + addq %rdx, %r10 + rorq $28, %rcx + movq %r14, %rax + addq %rcx, %r10 + rorq $23, %rax + movq %r10, %rdx + movq %r15, %rcx + addq 224(%rsp), %r9 + xorq %r8, %rcx + xorq %r14, %rax + andq %r14, %rcx + rorq $4, %rax + xorq %r8, %rcx + xorq %r14, %rax + addq %rcx, %r9 + rorq $14, %rax + xorq %r11, %rdx + addq %rax, %r9 + movq %r10, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r10, %rcx + xorq %r11, %rbx + rorq $6, %rcx + addq %r9, %r13 + xorq %r10, %rcx + addq %rbx, %r9 + rorq $28, %rcx + movq %r13, %rax + addq %rcx, %r9 + rorq $23, %rax + movq %r9, %rbx + movq %r14, %rcx + addq 232(%rsp), %r8 + xorq %r15, %rcx + xorq %r13, %rax + andq %r13, %rcx + rorq $4, %rax + xorq %r15, %rcx + xorq %r13, %rax + addq %rcx, %r8 + rorq $14, %rax + xorq %r10, %rbx + addq %rax, %r8 + movq %r9, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r9, %rcx + xorq %r10, %rdx + rorq $6, %rcx + addq %r8, %r12 + xorq %r9, %rcx + addq %rdx, %r8 + rorq $28, %rcx + movq %r12, %rax + addq %rcx, %r8 + subq $0x400, %rsp + addq (%rdi), %r8 + addq 8(%rdi), %r9 + addq 16(%rdi), %r10 + addq 24(%rdi), %r11 + addq 32(%rdi), %r12 + addq 40(%rdi), %r13 + addq 48(%rdi), %r14 + addq 56(%rdi), %r15 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq %r12, 32(%rdi) + movq %r13, 40(%rdi) + movq %r14, 48(%rdi) + movq %r15, 56(%rdi) + movq %r9, %rbx + movq %r12, %rax + xorq %r10, %rbx + movq $5, %rsi +L_sha512_len_avx2_tail: + rorq $23, %rax + movq %r8, %rdx + movq %r13, %rcx + addq 16(%rsp), %r15 + xorq %r14, %rcx + xorq %r12, %rax + andq %r12, %rcx + rorq $4, %rax + xorq %r14, %rcx + xorq %r12, %rax + addq %rcx, %r15 + rorq $14, %rax + xorq %r9, %rdx + addq %rax, %r15 + movq %r8, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r8, %rcx + xorq %r9, %rbx + rorq $6, %rcx + addq %r15, %r11 + xorq %r8, %rcx + addq %rbx, %r15 + rorq $28, %rcx + movq %r11, %rax + addq %rcx, %r15 + rorq $23, %rax + movq %r15, %rbx + movq %r12, %rcx + addq 24(%rsp), %r14 + xorq %r13, %rcx + xorq %r11, %rax + andq %r11, %rcx + rorq $4, %rax + xorq %r13, %rcx + xorq %r11, %rax + addq %rcx, %r14 + rorq $14, %rax + xorq %r8, %rbx + addq %rax, %r14 + movq %r15, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r15, %rcx + xorq %r8, %rdx + rorq $6, %rcx + addq %r14, %r10 + xorq %r15, %rcx + addq %rdx, %r14 + rorq $28, %rcx + movq %r10, %rax + addq %rcx, %r14 + rorq $23, %rax + movq %r14, %rdx + movq %r11, %rcx + addq 48(%rsp), %r13 + xorq %r12, %rcx + xorq %r10, %rax + andq %r10, %rcx + rorq $4, %rax + xorq %r12, %rcx + xorq %r10, %rax + addq %rcx, %r13 + rorq $14, %rax + xorq %r15, %rdx + addq %rax, %r13 + movq %r14, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r14, %rcx + xorq %r15, %rbx + rorq $6, %rcx + addq %r13, %r9 + xorq %r14, %rcx + addq %rbx, %r13 + rorq $28, %rcx + movq %r9, %rax + addq %rcx, %r13 + rorq $23, %rax + movq %r13, %rbx + movq %r10, %rcx + addq 56(%rsp), %r12 + xorq %r11, %rcx + xorq %r9, %rax + andq %r9, %rcx + rorq $4, %rax + xorq %r11, %rcx + xorq %r9, %rax + addq %rcx, %r12 + rorq $14, %rax + xorq %r14, %rbx + addq %rax, %r12 + movq %r13, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r13, %rcx + xorq %r14, %rdx + rorq $6, %rcx + addq %r12, %r8 + xorq %r13, %rcx + addq %rdx, %r12 + rorq $28, %rcx + movq %r8, %rax + addq %rcx, %r12 + rorq $23, %rax + movq %r12, %rdx + movq %r9, %rcx + addq 80(%rsp), %r11 + xorq %r10, %rcx + xorq %r8, %rax + andq %r8, %rcx + rorq $4, %rax + xorq %r10, %rcx + xorq %r8, %rax + addq %rcx, %r11 + rorq $14, %rax + xorq %r13, %rdx + addq %rax, %r11 + movq %r12, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r12, %rcx + xorq %r13, %rbx + rorq $6, %rcx + addq %r11, %r15 + xorq %r12, %rcx + addq %rbx, %r11 + rorq $28, %rcx + movq %r15, %rax + addq %rcx, %r11 + rorq $23, %rax + movq %r11, %rbx + movq %r8, %rcx + addq 88(%rsp), %r10 + xorq %r9, %rcx + xorq %r15, %rax + andq %r15, %rcx + rorq $4, %rax + xorq %r9, %rcx + xorq %r15, %rax + addq %rcx, %r10 + rorq $14, %rax + xorq %r12, %rbx + addq %rax, %r10 + movq %r11, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r11, %rcx + xorq %r12, %rdx + rorq $6, %rcx + addq %r10, %r14 + xorq %r11, %rcx + addq %rdx, %r10 + rorq $28, %rcx + movq %r14, %rax + addq %rcx, %r10 + rorq $23, %rax + movq %r10, %rdx + movq %r15, %rcx + addq 112(%rsp), %r9 + xorq %r8, %rcx + xorq %r14, %rax + andq %r14, %rcx + rorq $4, %rax + xorq %r8, %rcx + xorq %r14, %rax + addq %rcx, %r9 + rorq $14, %rax + xorq %r11, %rdx + addq %rax, %r9 + movq %r10, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r10, %rcx + xorq %r11, %rbx + rorq $6, %rcx + addq %r9, %r13 + xorq %r10, %rcx + addq %rbx, %r9 + rorq $28, %rcx + movq %r13, %rax + addq %rcx, %r9 + rorq $23, %rax + movq %r9, %rbx + movq %r14, %rcx + addq 120(%rsp), %r8 + xorq %r15, %rcx + xorq %r13, %rax + andq %r13, %rcx + rorq $4, %rax + xorq %r15, %rcx + xorq %r13, %rax + addq %rcx, %r8 + rorq $14, %rax + xorq %r10, %rbx + addq %rax, %r8 + movq %r9, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r9, %rcx + xorq %r10, %rdx + rorq $6, %rcx + addq %r8, %r12 + xorq %r9, %rcx + addq %rdx, %r8 + rorq $28, %rcx + movq %r12, %rax + addq %rcx, %r8 + rorq $23, %rax + movq %r8, %rdx + movq %r13, %rcx + addq 144(%rsp), %r15 + xorq %r14, %rcx + xorq %r12, %rax + andq %r12, %rcx + rorq $4, %rax + xorq %r14, %rcx + xorq %r12, %rax + addq %rcx, %r15 + rorq $14, %rax + xorq %r9, %rdx + addq %rax, %r15 + movq %r8, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r8, %rcx + xorq %r9, %rbx + rorq $6, %rcx + addq %r15, %r11 + xorq %r8, %rcx + addq %rbx, %r15 + rorq $28, %rcx + movq %r11, %rax + addq %rcx, %r15 + rorq $23, %rax + movq %r15, %rbx + movq %r12, %rcx + addq 152(%rsp), %r14 + xorq %r13, %rcx + xorq %r11, %rax + andq %r11, %rcx + rorq $4, %rax + xorq %r13, %rcx + xorq %r11, %rax + addq %rcx, %r14 + rorq $14, %rax + xorq %r8, %rbx + addq %rax, %r14 + movq %r15, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r15, %rcx + xorq %r8, %rdx + rorq $6, %rcx + addq %r14, %r10 + xorq %r15, %rcx + addq %rdx, %r14 + rorq $28, %rcx + movq %r10, %rax + addq %rcx, %r14 + rorq $23, %rax + movq %r14, %rdx + movq %r11, %rcx + addq 176(%rsp), %r13 + xorq %r12, %rcx + xorq %r10, %rax + andq %r10, %rcx + rorq $4, %rax + xorq %r12, %rcx + xorq %r10, %rax + addq %rcx, %r13 + rorq $14, %rax + xorq %r15, %rdx + addq %rax, %r13 + movq %r14, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r14, %rcx + xorq %r15, %rbx + rorq $6, %rcx + addq %r13, %r9 + xorq %r14, %rcx + addq %rbx, %r13 + rorq $28, %rcx + movq %r9, %rax + addq %rcx, %r13 + rorq $23, %rax + movq %r13, %rbx + movq %r10, %rcx + addq 184(%rsp), %r12 + xorq %r11, %rcx + xorq %r9, %rax + andq %r9, %rcx + rorq $4, %rax + xorq %r11, %rcx + xorq %r9, %rax + addq %rcx, %r12 + rorq $14, %rax + xorq %r14, %rbx + addq %rax, %r12 + movq %r13, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r13, %rcx + xorq %r14, %rdx + rorq $6, %rcx + addq %r12, %r8 + xorq %r13, %rcx + addq %rdx, %r12 + rorq $28, %rcx + movq %r8, %rax + addq %rcx, %r12 + rorq $23, %rax + movq %r12, %rdx + movq %r9, %rcx + addq 208(%rsp), %r11 + xorq %r10, %rcx + xorq %r8, %rax + andq %r8, %rcx + rorq $4, %rax + xorq %r10, %rcx + xorq %r8, %rax + addq %rcx, %r11 + rorq $14, %rax + xorq %r13, %rdx + addq %rax, %r11 + movq %r12, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r12, %rcx + xorq %r13, %rbx + rorq $6, %rcx + addq %r11, %r15 + xorq %r12, %rcx + addq %rbx, %r11 + rorq $28, %rcx + movq %r15, %rax + addq %rcx, %r11 + rorq $23, %rax + movq %r11, %rbx + movq %r8, %rcx + addq 216(%rsp), %r10 + xorq %r9, %rcx + xorq %r15, %rax + andq %r15, %rcx + rorq $4, %rax + xorq %r9, %rcx + xorq %r15, %rax + addq %rcx, %r10 + rorq $14, %rax + xorq %r12, %rbx + addq %rax, %r10 + movq %r11, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r11, %rcx + xorq %r12, %rdx + rorq $6, %rcx + addq %r10, %r14 + xorq %r11, %rcx + addq %rdx, %r10 + rorq $28, %rcx + movq %r14, %rax + addq %rcx, %r10 + rorq $23, %rax + movq %r10, %rdx + movq %r15, %rcx + addq 240(%rsp), %r9 + xorq %r8, %rcx + xorq %r14, %rax + andq %r14, %rcx + rorq $4, %rax + xorq %r8, %rcx + xorq %r14, %rax + addq %rcx, %r9 + rorq $14, %rax + xorq %r11, %rdx + addq %rax, %r9 + movq %r10, %rcx + andq %rdx, %rbx + rorq $5, %rcx + xorq %r10, %rcx + xorq %r11, %rbx + rorq $6, %rcx + addq %r9, %r13 + xorq %r10, %rcx + addq %rbx, %r9 + rorq $28, %rcx + movq %r13, %rax + addq %rcx, %r9 + rorq $23, %rax + movq %r9, %rbx + movq %r14, %rcx + addq 248(%rsp), %r8 + xorq %r15, %rcx + xorq %r13, %rax + andq %r13, %rcx + rorq $4, %rax + xorq %r15, %rcx + xorq %r13, %rax + addq %rcx, %r8 + rorq $14, %rax + xorq %r10, %rbx + addq %rax, %r8 + movq %r9, %rcx + andq %rbx, %rdx + rorq $5, %rcx + xorq %r9, %rcx + xorq %r10, %rdx + rorq $6, %rcx + addq %r8, %r12 + xorq %r9, %rcx + addq %rdx, %r8 + rorq $28, %rcx + movq %r12, %rax + addq %rcx, %r8 + addq $0x100, %rsp + subq $0x01, %rsi + jnz L_sha512_len_avx2_tail + addq (%rdi), %r8 + addq 8(%rdi), %r9 + addq 16(%rdi), %r10 + addq 24(%rdi), %r11 + addq 32(%rdi), %r12 + addq 40(%rdi), %r13 + addq 48(%rdi), %r14 + addq 56(%rdi), %r15 + movq 224(%rdi), %rcx + addq $0x40, %rsp + addq $0x100, %rcx + subl $0x100, %ebp + movq %rcx, 224(%rdi) + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq %r12, 32(%rdi) + movq %r13, 40(%rdi) + movq %r14, 48(%rdi) + movq %r15, 56(%rdi) + jnz L_sha512_len_avx2_begin +L_sha512_len_avx2_done: + xorq %rax, %rax + vzeroupper + popq %rbp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size Transform_Sha512_AVX2_Len,.-Transform_Sha512_AVX2_Len +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx2_rorx_sha512_k: +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0xfc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0x6ca6351e003826f,0x142929670a0e6e70 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0x6f067aa72176fba,0xa637dc5a2c898a6 +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx2_rorx_sha512_k_2: +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0xfc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0xfc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0x6ca6351e003826f,0x142929670a0e6e70 +.quad 0x6ca6351e003826f,0x142929670a0e6e70 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0x6f067aa72176fba,0xa637dc5a2c898a6 +.quad 0x6f067aa72176fba,0xa637dc5a2c898a6 +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 8 +#else +.p2align 3 +#endif /* __APPLE__ */ +L_avx2_rorx_sha512_k_2_end: +.quad 1024+L_avx2_rorx_sha512_k_2 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 32 +#else +.p2align 5 +#endif /* __APPLE__ */ +L_avx2_rorx_sha512_flip_mask: +.quad 0x1020304050607, 0x8090a0b0c0d0e0f +.quad 0x1020304050607, 0x8090a0b0c0d0e0f +#ifndef __APPLE__ +.text +.globl Transform_Sha512_AVX2_RORX +.type Transform_Sha512_AVX2_RORX,@function +.align 4 +Transform_Sha512_AVX2_RORX: +#else +.section __TEXT,__text +.globl _Transform_Sha512_AVX2_RORX +.p2align 2 +_Transform_Sha512_AVX2_RORX: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $0x88, %rsp + leaq 64(%rdi), %rcx + vmovdqa L_avx2_rorx_sha512_flip_mask(%rip), %ymm15 + movq (%rdi), %r8 + movq 8(%rdi), %r9 + movq 16(%rdi), %r10 + movq 24(%rdi), %r11 + movq 32(%rdi), %r12 + movq 40(%rdi), %r13 + movq 48(%rdi), %r14 + movq 56(%rdi), %r15 + vmovdqu (%rcx), %ymm0 + vmovdqu 32(%rcx), %ymm1 + vpshufb %ymm15, %ymm0, %ymm0 + vpshufb %ymm15, %ymm1, %ymm1 + vmovdqu 64(%rcx), %ymm2 + vmovdqu 96(%rcx), %ymm3 + vpshufb %ymm15, %ymm2, %ymm2 + vpshufb %ymm15, %ymm3, %ymm3 + movl $4, 128(%rsp) + leaq L_avx2_rorx_sha512_k(%rip), %rsi + movq %r9, %rbx + xorq %rdx, %rdx + xorq %r10, %rbx + # set_w_k: 0 + vpaddq (%rsi), %ymm0, %ymm8 + vpaddq 32(%rsi), %ymm1, %ymm9 + vmovdqu %ymm8, (%rsp) + vmovdqu %ymm9, 32(%rsp) + vpaddq 64(%rsi), %ymm2, %ymm8 + vpaddq 96(%rsi), %ymm3, %ymm9 + vmovdqu %ymm8, 64(%rsp) + vmovdqu %ymm9, 96(%rsp) + # Start of 16 rounds +L_sha256_len_avx2_rorx_start: + addq $0x80, %rsi + rorxq $14, %r12, %rax + rorxq $18, %r12, %rcx + addq %rdx, %r8 + vpblendd $3, %ymm1, %ymm0, %ymm12 + vpblendd $3, %ymm3, %ymm2, %ymm13 + addq (%rsp), %r15 + movq %r13, %rdx + xorq %rax, %rcx + vpermq $57, %ymm12, %ymm12 + xorq %r14, %rdx + rorxq $41, %r12, %rax + xorq %rcx, %rax + vpermq $57, %ymm13, %ymm13 + andq %r12, %rdx + addq %rax, %r15 + rorxq $28, %r8, %rax + vpsrlq $0x01, %ymm12, %ymm8 + vpsllq $63, %ymm12, %ymm9 + rorxq $34, %r8, %rcx + xorq %r14, %rdx + xorq %rax, %rcx + vpsrlq $8, %ymm12, %ymm10 + vpsllq $56, %ymm12, %ymm11 + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + rorxq $39, %r8, %rax + addq %rdx, %r15 + xorq %rcx, %rax + vpsrlq $7, %ymm12, %ymm11 + movq %r9, %rdx + addq %r15, %r11 + xorq %r8, %rdx + vperm2I128 $0x81, %ymm3, %ymm3, %ymm14 + andq %rdx, %rbx + addq %rax, %r15 + xorq %r9, %rbx + rorxq $14, %r11, %rax + rorxq $18, %r11, %rcx + addq %rbx, %r15 + vpxor %ymm10, %ymm8, %ymm8 + addq 8(%rsp), %r14 + movq %r12, %rbx + xorq %rax, %rcx + vpxor %ymm11, %ymm8, %ymm8 + xorq %r13, %rbx + rorxq $41, %r11, %rax + xorq %rcx, %rax + vpaddq %ymm0, %ymm13, %ymm0 + vpaddq %ymm0, %ymm8, %ymm0 + andq %r11, %rbx + addq %rax, %r14 + rorxq $28, %r15, %rax + vpsrlq $19, %ymm14, %ymm8 + vpsllq $45, %ymm14, %ymm9 + rorxq $34, %r15, %rcx + xorq %r13, %rbx + xorq %rax, %rcx + vpsrlq $61, %ymm14, %ymm10 + vpsllq $3, %ymm14, %ymm11 + vpor %ymm9, %ymm8, %ymm8 + rorxq $39, %r15, %rax + addq %rbx, %r14 + xorq %rcx, %rax + vpor %ymm11, %ymm10, %ymm10 + movq %r8, %rbx + leaq (%r10,%r14,1), %r10 + xorq %r15, %rbx + vpxor %ymm10, %ymm8, %ymm8 + andq %rbx, %rdx + addq %rax, %r14 + xorq %r8, %rdx + vpsrlq $6, %ymm14, %ymm11 + rorxq $14, %r10, %rax + rorxq $18, %r10, %rcx + addq %rdx, %r14 + vpxor %ymm11, %ymm8, %ymm8 + addq 16(%rsp), %r13 + movq %r11, %rdx + xorq %rax, %rcx + vpaddq %ymm0, %ymm8, %ymm0 + xorq %r12, %rdx + rorxq $41, %r10, %rax + xorq %rcx, %rax + vperm2I128 $8, %ymm0, %ymm0, %ymm14 + andq %r10, %rdx + addq %rax, %r13 + rorxq $28, %r14, %rax + rorxq $34, %r14, %rcx + xorq %r12, %rdx + xorq %rax, %rcx + vpsrlq $19, %ymm14, %ymm8 + vpsllq $45, %ymm14, %ymm9 + rorxq $39, %r14, %rax + addq %rdx, %r13 + xorq %rcx, %rax + vpsrlq $61, %ymm14, %ymm10 + vpsllq $3, %ymm14, %ymm11 + vpor %ymm9, %ymm8, %ymm8 + movq %r15, %rdx + addq %r13, %r9 + xorq %r14, %rdx + vpor %ymm11, %ymm10, %ymm10 + andq %rdx, %rbx + addq %rax, %r13 + xorq %r15, %rbx + vpxor %ymm10, %ymm8, %ymm8 + rorxq $14, %r9, %rax + rorxq $18, %r9, %rcx + addq %rbx, %r13 + vpsrlq $6, %ymm14, %ymm11 + addq 24(%rsp), %r12 + movq %r10, %rbx + xorq %rax, %rcx + xorq %r11, %rbx + rorxq $41, %r9, %rax + xorq %rcx, %rax + vpxor %ymm11, %ymm8, %ymm8 + andq %r9, %rbx + addq %rax, %r12 + rorxq $28, %r13, %rax + rorxq $34, %r13, %rcx + xorq %r11, %rbx + xorq %rax, %rcx + vpaddq %ymm0, %ymm8, %ymm0 + rorxq $39, %r13, %rax + addq %rbx, %r12 + xorq %rcx, %rax + vpaddq (%rsi), %ymm0, %ymm8 + movq %r14, %rbx + leaq (%r8,%r12,1), %r8 + xorq %r13, %rbx + andq %rbx, %rdx + addq %rax, %r12 + xorq %r14, %rdx + vmovdqu %ymm8, (%rsp) + rorxq $14, %r8, %rax + rorxq $18, %r8, %rcx + addq %rdx, %r12 + vpblendd $3, %ymm2, %ymm1, %ymm12 + vpblendd $3, %ymm0, %ymm3, %ymm13 + addq 32(%rsp), %r11 + movq %r9, %rdx + xorq %rax, %rcx + vpermq $57, %ymm12, %ymm12 + xorq %r10, %rdx + rorxq $41, %r8, %rax + xorq %rcx, %rax + vpermq $57, %ymm13, %ymm13 + andq %r8, %rdx + addq %rax, %r11 + rorxq $28, %r12, %rax + vpsrlq $0x01, %ymm12, %ymm8 + vpsllq $63, %ymm12, %ymm9 + rorxq $34, %r12, %rcx + xorq %r10, %rdx + xorq %rax, %rcx + vpsrlq $8, %ymm12, %ymm10 + vpsllq $56, %ymm12, %ymm11 + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + rorxq $39, %r12, %rax + addq %rdx, %r11 + xorq %rcx, %rax + vpsrlq $7, %ymm12, %ymm11 + movq %r13, %rdx + addq %r11, %r15 + xorq %r12, %rdx + vperm2I128 $0x81, %ymm0, %ymm0, %ymm14 + andq %rdx, %rbx + addq %rax, %r11 + xorq %r13, %rbx + rorxq $14, %r15, %rax + rorxq $18, %r15, %rcx + addq %rbx, %r11 + vpxor %ymm10, %ymm8, %ymm8 + addq 40(%rsp), %r10 + movq %r8, %rbx + xorq %rax, %rcx + vpxor %ymm11, %ymm8, %ymm8 + xorq %r9, %rbx + rorxq $41, %r15, %rax + xorq %rcx, %rax + vpaddq %ymm1, %ymm13, %ymm1 + vpaddq %ymm1, %ymm8, %ymm1 + andq %r15, %rbx + addq %rax, %r10 + rorxq $28, %r11, %rax + vpsrlq $19, %ymm14, %ymm8 + vpsllq $45, %ymm14, %ymm9 + rorxq $34, %r11, %rcx + xorq %r9, %rbx + xorq %rax, %rcx + vpsrlq $61, %ymm14, %ymm10 + vpsllq $3, %ymm14, %ymm11 + vpor %ymm9, %ymm8, %ymm8 + rorxq $39, %r11, %rax + addq %rbx, %r10 + xorq %rcx, %rax + vpor %ymm11, %ymm10, %ymm10 + movq %r12, %rbx + leaq (%r14,%r10,1), %r14 + xorq %r11, %rbx + vpxor %ymm10, %ymm8, %ymm8 + andq %rbx, %rdx + addq %rax, %r10 + xorq %r12, %rdx + vpsrlq $6, %ymm14, %ymm11 + rorxq $14, %r14, %rax + rorxq $18, %r14, %rcx + addq %rdx, %r10 + vpxor %ymm11, %ymm8, %ymm8 + addq 48(%rsp), %r9 + movq %r15, %rdx + xorq %rax, %rcx + vpaddq %ymm1, %ymm8, %ymm1 + xorq %r8, %rdx + rorxq $41, %r14, %rax + xorq %rcx, %rax + vperm2I128 $8, %ymm1, %ymm1, %ymm14 + andq %r14, %rdx + addq %rax, %r9 + rorxq $28, %r10, %rax + rorxq $34, %r10, %rcx + xorq %r8, %rdx + xorq %rax, %rcx + vpsrlq $19, %ymm14, %ymm8 + vpsllq $45, %ymm14, %ymm9 + rorxq $39, %r10, %rax + addq %rdx, %r9 + xorq %rcx, %rax + vpsrlq $61, %ymm14, %ymm10 + vpsllq $3, %ymm14, %ymm11 + vpor %ymm9, %ymm8, %ymm8 + movq %r11, %rdx + addq %r9, %r13 + xorq %r10, %rdx + vpor %ymm11, %ymm10, %ymm10 + andq %rdx, %rbx + addq %rax, %r9 + xorq %r11, %rbx + vpxor %ymm10, %ymm8, %ymm8 + rorxq $14, %r13, %rax + rorxq $18, %r13, %rcx + addq %rbx, %r9 + vpsrlq $6, %ymm14, %ymm11 + addq 56(%rsp), %r8 + movq %r14, %rbx + xorq %rax, %rcx + xorq %r15, %rbx + rorxq $41, %r13, %rax + xorq %rcx, %rax + vpxor %ymm11, %ymm8, %ymm8 + andq %r13, %rbx + addq %rax, %r8 + rorxq $28, %r9, %rax + rorxq $34, %r9, %rcx + xorq %r15, %rbx + xorq %rax, %rcx + vpaddq %ymm1, %ymm8, %ymm1 + rorxq $39, %r9, %rax + addq %rbx, %r8 + xorq %rcx, %rax + vpaddq 32(%rsi), %ymm1, %ymm8 + movq %r10, %rbx + leaq (%r12,%r8,1), %r12 + xorq %r9, %rbx + andq %rbx, %rdx + addq %rax, %r8 + xorq %r10, %rdx + vmovdqu %ymm8, 32(%rsp) + rorxq $14, %r12, %rax + rorxq $18, %r12, %rcx + addq %rdx, %r8 + vpblendd $3, %ymm3, %ymm2, %ymm12 + vpblendd $3, %ymm1, %ymm0, %ymm13 + addq 64(%rsp), %r15 + movq %r13, %rdx + xorq %rax, %rcx + vpermq $57, %ymm12, %ymm12 + xorq %r14, %rdx + rorxq $41, %r12, %rax + xorq %rcx, %rax + vpermq $57, %ymm13, %ymm13 + andq %r12, %rdx + addq %rax, %r15 + rorxq $28, %r8, %rax + vpsrlq $0x01, %ymm12, %ymm8 + vpsllq $63, %ymm12, %ymm9 + rorxq $34, %r8, %rcx + xorq %r14, %rdx + xorq %rax, %rcx + vpsrlq $8, %ymm12, %ymm10 + vpsllq $56, %ymm12, %ymm11 + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + rorxq $39, %r8, %rax + addq %rdx, %r15 + xorq %rcx, %rax + vpsrlq $7, %ymm12, %ymm11 + movq %r9, %rdx + addq %r15, %r11 + xorq %r8, %rdx + vperm2I128 $0x81, %ymm1, %ymm1, %ymm14 + andq %rdx, %rbx + addq %rax, %r15 + xorq %r9, %rbx + rorxq $14, %r11, %rax + rorxq $18, %r11, %rcx + addq %rbx, %r15 + vpxor %ymm10, %ymm8, %ymm8 + addq 72(%rsp), %r14 + movq %r12, %rbx + xorq %rax, %rcx + vpxor %ymm11, %ymm8, %ymm8 + xorq %r13, %rbx + rorxq $41, %r11, %rax + xorq %rcx, %rax + vpaddq %ymm2, %ymm13, %ymm2 + vpaddq %ymm2, %ymm8, %ymm2 + andq %r11, %rbx + addq %rax, %r14 + rorxq $28, %r15, %rax + vpsrlq $19, %ymm14, %ymm8 + vpsllq $45, %ymm14, %ymm9 + rorxq $34, %r15, %rcx + xorq %r13, %rbx + xorq %rax, %rcx + vpsrlq $61, %ymm14, %ymm10 + vpsllq $3, %ymm14, %ymm11 + vpor %ymm9, %ymm8, %ymm8 + rorxq $39, %r15, %rax + addq %rbx, %r14 + xorq %rcx, %rax + vpor %ymm11, %ymm10, %ymm10 + movq %r8, %rbx + leaq (%r10,%r14,1), %r10 + xorq %r15, %rbx + vpxor %ymm10, %ymm8, %ymm8 + andq %rbx, %rdx + addq %rax, %r14 + xorq %r8, %rdx + vpsrlq $6, %ymm14, %ymm11 + rorxq $14, %r10, %rax + rorxq $18, %r10, %rcx + addq %rdx, %r14 + vpxor %ymm11, %ymm8, %ymm8 + addq 80(%rsp), %r13 + movq %r11, %rdx + xorq %rax, %rcx + vpaddq %ymm2, %ymm8, %ymm2 + xorq %r12, %rdx + rorxq $41, %r10, %rax + xorq %rcx, %rax + vperm2I128 $8, %ymm2, %ymm2, %ymm14 + andq %r10, %rdx + addq %rax, %r13 + rorxq $28, %r14, %rax + rorxq $34, %r14, %rcx + xorq %r12, %rdx + xorq %rax, %rcx + vpsrlq $19, %ymm14, %ymm8 + vpsllq $45, %ymm14, %ymm9 + rorxq $39, %r14, %rax + addq %rdx, %r13 + xorq %rcx, %rax + vpsrlq $61, %ymm14, %ymm10 + vpsllq $3, %ymm14, %ymm11 + vpor %ymm9, %ymm8, %ymm8 + movq %r15, %rdx + addq %r13, %r9 + xorq %r14, %rdx + vpor %ymm11, %ymm10, %ymm10 + andq %rdx, %rbx + addq %rax, %r13 + xorq %r15, %rbx + vpxor %ymm10, %ymm8, %ymm8 + rorxq $14, %r9, %rax + rorxq $18, %r9, %rcx + addq %rbx, %r13 + vpsrlq $6, %ymm14, %ymm11 + addq 88(%rsp), %r12 + movq %r10, %rbx + xorq %rax, %rcx + xorq %r11, %rbx + rorxq $41, %r9, %rax + xorq %rcx, %rax + vpxor %ymm11, %ymm8, %ymm8 + andq %r9, %rbx + addq %rax, %r12 + rorxq $28, %r13, %rax + rorxq $34, %r13, %rcx + xorq %r11, %rbx + xorq %rax, %rcx + vpaddq %ymm2, %ymm8, %ymm2 + rorxq $39, %r13, %rax + addq %rbx, %r12 + xorq %rcx, %rax + vpaddq 64(%rsi), %ymm2, %ymm8 + movq %r14, %rbx + leaq (%r8,%r12,1), %r8 + xorq %r13, %rbx + andq %rbx, %rdx + addq %rax, %r12 + xorq %r14, %rdx + vmovdqu %ymm8, 64(%rsp) + rorxq $14, %r8, %rax + rorxq $18, %r8, %rcx + addq %rdx, %r12 + vpblendd $3, %ymm0, %ymm3, %ymm12 + vpblendd $3, %ymm2, %ymm1, %ymm13 + addq 96(%rsp), %r11 + movq %r9, %rdx + xorq %rax, %rcx + vpermq $57, %ymm12, %ymm12 + xorq %r10, %rdx + rorxq $41, %r8, %rax + xorq %rcx, %rax + vpermq $57, %ymm13, %ymm13 + andq %r8, %rdx + addq %rax, %r11 + rorxq $28, %r12, %rax + vpsrlq $0x01, %ymm12, %ymm8 + vpsllq $63, %ymm12, %ymm9 + rorxq $34, %r12, %rcx + xorq %r10, %rdx + xorq %rax, %rcx + vpsrlq $8, %ymm12, %ymm10 + vpsllq $56, %ymm12, %ymm11 + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + rorxq $39, %r12, %rax + addq %rdx, %r11 + xorq %rcx, %rax + vpsrlq $7, %ymm12, %ymm11 + movq %r13, %rdx + addq %r11, %r15 + xorq %r12, %rdx + vperm2I128 $0x81, %ymm2, %ymm2, %ymm14 + andq %rdx, %rbx + addq %rax, %r11 + xorq %r13, %rbx + rorxq $14, %r15, %rax + rorxq $18, %r15, %rcx + addq %rbx, %r11 + vpxor %ymm10, %ymm8, %ymm8 + addq 104(%rsp), %r10 + movq %r8, %rbx + xorq %rax, %rcx + vpxor %ymm11, %ymm8, %ymm8 + xorq %r9, %rbx + rorxq $41, %r15, %rax + xorq %rcx, %rax + vpaddq %ymm3, %ymm13, %ymm3 + vpaddq %ymm3, %ymm8, %ymm3 + andq %r15, %rbx + addq %rax, %r10 + rorxq $28, %r11, %rax + vpsrlq $19, %ymm14, %ymm8 + vpsllq $45, %ymm14, %ymm9 + rorxq $34, %r11, %rcx + xorq %r9, %rbx + xorq %rax, %rcx + vpsrlq $61, %ymm14, %ymm10 + vpsllq $3, %ymm14, %ymm11 + vpor %ymm9, %ymm8, %ymm8 + rorxq $39, %r11, %rax + addq %rbx, %r10 + xorq %rcx, %rax + vpor %ymm11, %ymm10, %ymm10 + movq %r12, %rbx + leaq (%r14,%r10,1), %r14 + xorq %r11, %rbx + vpxor %ymm10, %ymm8, %ymm8 + andq %rbx, %rdx + addq %rax, %r10 + xorq %r12, %rdx + vpsrlq $6, %ymm14, %ymm11 + rorxq $14, %r14, %rax + rorxq $18, %r14, %rcx + addq %rdx, %r10 + vpxor %ymm11, %ymm8, %ymm8 + addq 112(%rsp), %r9 + movq %r15, %rdx + xorq %rax, %rcx + vpaddq %ymm3, %ymm8, %ymm3 + xorq %r8, %rdx + rorxq $41, %r14, %rax + xorq %rcx, %rax + vperm2I128 $8, %ymm3, %ymm3, %ymm14 + andq %r14, %rdx + addq %rax, %r9 + rorxq $28, %r10, %rax + rorxq $34, %r10, %rcx + xorq %r8, %rdx + xorq %rax, %rcx + vpsrlq $19, %ymm14, %ymm8 + vpsllq $45, %ymm14, %ymm9 + rorxq $39, %r10, %rax + addq %rdx, %r9 + xorq %rcx, %rax + vpsrlq $61, %ymm14, %ymm10 + vpsllq $3, %ymm14, %ymm11 + vpor %ymm9, %ymm8, %ymm8 + movq %r11, %rdx + addq %r9, %r13 + xorq %r10, %rdx + vpor %ymm11, %ymm10, %ymm10 + andq %rdx, %rbx + addq %rax, %r9 + xorq %r11, %rbx + vpxor %ymm10, %ymm8, %ymm8 + rorxq $14, %r13, %rax + rorxq $18, %r13, %rcx + addq %rbx, %r9 + vpsrlq $6, %ymm14, %ymm11 + addq 120(%rsp), %r8 + movq %r14, %rbx + xorq %rax, %rcx + xorq %r15, %rbx + rorxq $41, %r13, %rax + xorq %rcx, %rax + vpxor %ymm11, %ymm8, %ymm8 + andq %r13, %rbx + addq %rax, %r8 + rorxq $28, %r9, %rax + rorxq $34, %r9, %rcx + xorq %r15, %rbx + xorq %rax, %rcx + vpaddq %ymm3, %ymm8, %ymm3 + rorxq $39, %r9, %rax + addq %rbx, %r8 + xorq %rcx, %rax + vpaddq 96(%rsi), %ymm3, %ymm8 + movq %r10, %rbx + leaq (%r12,%r8,1), %r12 + xorq %r9, %rbx + andq %rbx, %rdx + addq %rax, %r8 + xorq %r10, %rdx + vmovdqu %ymm8, 96(%rsp) + subl $0x01, 128(%rsp) + jne L_sha256_len_avx2_rorx_start + # rnd_all_4: 0-3 + rorxq $14, %r12, %rax + rorxq $18, %r12, %rcx + addq %rdx, %r8 + addq (%rsp), %r15 + movq %r13, %rdx + xorq %rax, %rcx + xorq %r14, %rdx + rorxq $41, %r12, %rax + xorq %rcx, %rax + andq %r12, %rdx + addq %rax, %r15 + rorxq $28, %r8, %rax + rorxq $34, %r8, %rcx + xorq %r14, %rdx + xorq %rax, %rcx + rorxq $39, %r8, %rax + addq %rdx, %r15 + xorq %rcx, %rax + movq %r9, %rdx + addq %r15, %r11 + xorq %r8, %rdx + andq %rdx, %rbx + addq %rax, %r15 + xorq %r9, %rbx + rorxq $14, %r11, %rax + rorxq $18, %r11, %rcx + addq %rbx, %r15 + addq 8(%rsp), %r14 + movq %r12, %rbx + xorq %rax, %rcx + xorq %r13, %rbx + rorxq $41, %r11, %rax + xorq %rcx, %rax + andq %r11, %rbx + addq %rax, %r14 + rorxq $28, %r15, %rax + rorxq $34, %r15, %rcx + xorq %r13, %rbx + xorq %rax, %rcx + rorxq $39, %r15, %rax + addq %rbx, %r14 + xorq %rcx, %rax + movq %r8, %rbx + leaq (%r10,%r14,1), %r10 + xorq %r15, %rbx + andq %rbx, %rdx + addq %rax, %r14 + xorq %r8, %rdx + rorxq $14, %r10, %rax + rorxq $18, %r10, %rcx + addq %rdx, %r14 + addq 16(%rsp), %r13 + movq %r11, %rdx + xorq %rax, %rcx + xorq %r12, %rdx + rorxq $41, %r10, %rax + xorq %rcx, %rax + andq %r10, %rdx + addq %rax, %r13 + rorxq $28, %r14, %rax + rorxq $34, %r14, %rcx + xorq %r12, %rdx + xorq %rax, %rcx + rorxq $39, %r14, %rax + addq %rdx, %r13 + xorq %rcx, %rax + movq %r15, %rdx + addq %r13, %r9 + xorq %r14, %rdx + andq %rdx, %rbx + addq %rax, %r13 + xorq %r15, %rbx + rorxq $14, %r9, %rax + rorxq $18, %r9, %rcx + addq %rbx, %r13 + addq 24(%rsp), %r12 + movq %r10, %rbx + xorq %rax, %rcx + xorq %r11, %rbx + rorxq $41, %r9, %rax + xorq %rcx, %rax + andq %r9, %rbx + addq %rax, %r12 + rorxq $28, %r13, %rax + rorxq $34, %r13, %rcx + xorq %r11, %rbx + xorq %rax, %rcx + rorxq $39, %r13, %rax + addq %rbx, %r12 + xorq %rcx, %rax + movq %r14, %rbx + leaq (%r8,%r12,1), %r8 + xorq %r13, %rbx + andq %rbx, %rdx + addq %rax, %r12 + xorq %r14, %rdx + # rnd_all_4: 4-7 + rorxq $14, %r8, %rax + rorxq $18, %r8, %rcx + addq %rdx, %r12 + addq 32(%rsp), %r11 + movq %r9, %rdx + xorq %rax, %rcx + xorq %r10, %rdx + rorxq $41, %r8, %rax + xorq %rcx, %rax + andq %r8, %rdx + addq %rax, %r11 + rorxq $28, %r12, %rax + rorxq $34, %r12, %rcx + xorq %r10, %rdx + xorq %rax, %rcx + rorxq $39, %r12, %rax + addq %rdx, %r11 + xorq %rcx, %rax + movq %r13, %rdx + addq %r11, %r15 + xorq %r12, %rdx + andq %rdx, %rbx + addq %rax, %r11 + xorq %r13, %rbx + rorxq $14, %r15, %rax + rorxq $18, %r15, %rcx + addq %rbx, %r11 + addq 40(%rsp), %r10 + movq %r8, %rbx + xorq %rax, %rcx + xorq %r9, %rbx + rorxq $41, %r15, %rax + xorq %rcx, %rax + andq %r15, %rbx + addq %rax, %r10 + rorxq $28, %r11, %rax + rorxq $34, %r11, %rcx + xorq %r9, %rbx + xorq %rax, %rcx + rorxq $39, %r11, %rax + addq %rbx, %r10 + xorq %rcx, %rax + movq %r12, %rbx + leaq (%r14,%r10,1), %r14 + xorq %r11, %rbx + andq %rbx, %rdx + addq %rax, %r10 + xorq %r12, %rdx + rorxq $14, %r14, %rax + rorxq $18, %r14, %rcx + addq %rdx, %r10 + addq 48(%rsp), %r9 + movq %r15, %rdx + xorq %rax, %rcx + xorq %r8, %rdx + rorxq $41, %r14, %rax + xorq %rcx, %rax + andq %r14, %rdx + addq %rax, %r9 + rorxq $28, %r10, %rax + rorxq $34, %r10, %rcx + xorq %r8, %rdx + xorq %rax, %rcx + rorxq $39, %r10, %rax + addq %rdx, %r9 + xorq %rcx, %rax + movq %r11, %rdx + addq %r9, %r13 + xorq %r10, %rdx + andq %rdx, %rbx + addq %rax, %r9 + xorq %r11, %rbx + rorxq $14, %r13, %rax + rorxq $18, %r13, %rcx + addq %rbx, %r9 + addq 56(%rsp), %r8 + movq %r14, %rbx + xorq %rax, %rcx + xorq %r15, %rbx + rorxq $41, %r13, %rax + xorq %rcx, %rax + andq %r13, %rbx + addq %rax, %r8 + rorxq $28, %r9, %rax + rorxq $34, %r9, %rcx + xorq %r15, %rbx + xorq %rax, %rcx + rorxq $39, %r9, %rax + addq %rbx, %r8 + xorq %rcx, %rax + movq %r10, %rbx + leaq (%r12,%r8,1), %r12 + xorq %r9, %rbx + andq %rbx, %rdx + addq %rax, %r8 + xorq %r10, %rdx + # rnd_all_4: 8-11 + rorxq $14, %r12, %rax + rorxq $18, %r12, %rcx + addq %rdx, %r8 + addq 64(%rsp), %r15 + movq %r13, %rdx + xorq %rax, %rcx + xorq %r14, %rdx + rorxq $41, %r12, %rax + xorq %rcx, %rax + andq %r12, %rdx + addq %rax, %r15 + rorxq $28, %r8, %rax + rorxq $34, %r8, %rcx + xorq %r14, %rdx + xorq %rax, %rcx + rorxq $39, %r8, %rax + addq %rdx, %r15 + xorq %rcx, %rax + movq %r9, %rdx + addq %r15, %r11 + xorq %r8, %rdx + andq %rdx, %rbx + addq %rax, %r15 + xorq %r9, %rbx + rorxq $14, %r11, %rax + rorxq $18, %r11, %rcx + addq %rbx, %r15 + addq 72(%rsp), %r14 + movq %r12, %rbx + xorq %rax, %rcx + xorq %r13, %rbx + rorxq $41, %r11, %rax + xorq %rcx, %rax + andq %r11, %rbx + addq %rax, %r14 + rorxq $28, %r15, %rax + rorxq $34, %r15, %rcx + xorq %r13, %rbx + xorq %rax, %rcx + rorxq $39, %r15, %rax + addq %rbx, %r14 + xorq %rcx, %rax + movq %r8, %rbx + leaq (%r10,%r14,1), %r10 + xorq %r15, %rbx + andq %rbx, %rdx + addq %rax, %r14 + xorq %r8, %rdx + rorxq $14, %r10, %rax + rorxq $18, %r10, %rcx + addq %rdx, %r14 + addq 80(%rsp), %r13 + movq %r11, %rdx + xorq %rax, %rcx + xorq %r12, %rdx + rorxq $41, %r10, %rax + xorq %rcx, %rax + andq %r10, %rdx + addq %rax, %r13 + rorxq $28, %r14, %rax + rorxq $34, %r14, %rcx + xorq %r12, %rdx + xorq %rax, %rcx + rorxq $39, %r14, %rax + addq %rdx, %r13 + xorq %rcx, %rax + movq %r15, %rdx + addq %r13, %r9 + xorq %r14, %rdx + andq %rdx, %rbx + addq %rax, %r13 + xorq %r15, %rbx + rorxq $14, %r9, %rax + rorxq $18, %r9, %rcx + addq %rbx, %r13 + addq 88(%rsp), %r12 + movq %r10, %rbx + xorq %rax, %rcx + xorq %r11, %rbx + rorxq $41, %r9, %rax + xorq %rcx, %rax + andq %r9, %rbx + addq %rax, %r12 + rorxq $28, %r13, %rax + rorxq $34, %r13, %rcx + xorq %r11, %rbx + xorq %rax, %rcx + rorxq $39, %r13, %rax + addq %rbx, %r12 + xorq %rcx, %rax + movq %r14, %rbx + leaq (%r8,%r12,1), %r8 + xorq %r13, %rbx + andq %rbx, %rdx + addq %rax, %r12 + xorq %r14, %rdx + # rnd_all_4: 12-15 + rorxq $14, %r8, %rax + rorxq $18, %r8, %rcx + addq %rdx, %r12 + addq 96(%rsp), %r11 + movq %r9, %rdx + xorq %rax, %rcx + xorq %r10, %rdx + rorxq $41, %r8, %rax + xorq %rcx, %rax + andq %r8, %rdx + addq %rax, %r11 + rorxq $28, %r12, %rax + rorxq $34, %r12, %rcx + xorq %r10, %rdx + xorq %rax, %rcx + rorxq $39, %r12, %rax + addq %rdx, %r11 + xorq %rcx, %rax + movq %r13, %rdx + addq %r11, %r15 + xorq %r12, %rdx + andq %rdx, %rbx + addq %rax, %r11 + xorq %r13, %rbx + rorxq $14, %r15, %rax + rorxq $18, %r15, %rcx + addq %rbx, %r11 + addq 104(%rsp), %r10 + movq %r8, %rbx + xorq %rax, %rcx + xorq %r9, %rbx + rorxq $41, %r15, %rax + xorq %rcx, %rax + andq %r15, %rbx + addq %rax, %r10 + rorxq $28, %r11, %rax + rorxq $34, %r11, %rcx + xorq %r9, %rbx + xorq %rax, %rcx + rorxq $39, %r11, %rax + addq %rbx, %r10 + xorq %rcx, %rax + movq %r12, %rbx + leaq (%r14,%r10,1), %r14 + xorq %r11, %rbx + andq %rbx, %rdx + addq %rax, %r10 + xorq %r12, %rdx + rorxq $14, %r14, %rax + rorxq $18, %r14, %rcx + addq %rdx, %r10 + addq 112(%rsp), %r9 + movq %r15, %rdx + xorq %rax, %rcx + xorq %r8, %rdx + rorxq $41, %r14, %rax + xorq %rcx, %rax + andq %r14, %rdx + addq %rax, %r9 + rorxq $28, %r10, %rax + rorxq $34, %r10, %rcx + xorq %r8, %rdx + xorq %rax, %rcx + rorxq $39, %r10, %rax + addq %rdx, %r9 + xorq %rcx, %rax + movq %r11, %rdx + addq %r9, %r13 + xorq %r10, %rdx + andq %rdx, %rbx + addq %rax, %r9 + xorq %r11, %rbx + rorxq $14, %r13, %rax + rorxq $18, %r13, %rcx + addq %rbx, %r9 + addq 120(%rsp), %r8 + movq %r14, %rbx + xorq %rax, %rcx + xorq %r15, %rbx + rorxq $41, %r13, %rax + xorq %rcx, %rax + andq %r13, %rbx + addq %rax, %r8 + rorxq $28, %r9, %rax + rorxq $34, %r9, %rcx + xorq %r15, %rbx + xorq %rax, %rcx + rorxq $39, %r9, %rax + addq %rbx, %r8 + xorq %rcx, %rax + movq %r10, %rbx + leaq (%r12,%r8,1), %r12 + xorq %r9, %rbx + andq %rbx, %rdx + addq %rax, %r8 + xorq %r10, %rdx + addq %rdx, %r8 + addq %r8, (%rdi) + addq %r9, 8(%rdi) + addq %r10, 16(%rdi) + addq %r11, 24(%rdi) + addq %r12, 32(%rdi) + addq %r13, 40(%rdi) + addq %r14, 48(%rdi) + addq %r15, 56(%rdi) + xorq %rax, %rax + vzeroupper + addq $0x88, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size Transform_Sha512_AVX2_RORX,.-Transform_Sha512_AVX2_RORX +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl Transform_Sha512_AVX2_RORX_Len +.type Transform_Sha512_AVX2_RORX_Len,@function +.align 4 +Transform_Sha512_AVX2_RORX_Len: +#else +.section __TEXT,__text +.globl _Transform_Sha512_AVX2_RORX_Len +.p2align 2 +_Transform_Sha512_AVX2_RORX_Len: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq %rbp + testb $0x80, %sil + je L_sha512_len_avx2_rorx_block + movq 224(%rdi), %rax + push %rsi + vmovdqu (%rax), %ymm0 + vmovdqu 32(%rax), %ymm1 + vmovdqu 64(%rax), %ymm2 + vmovdqu 96(%rax), %ymm3 + vmovups %ymm0, 64(%rdi) + vmovups %ymm1, 96(%rdi) + vmovups %ymm2, 128(%rdi) + vmovups %ymm3, 160(%rdi) +#ifndef __APPLE__ + call Transform_Sha512_AVX2_RORX@plt +#else + call _Transform_Sha512_AVX2_RORX +#endif /* __APPLE__ */ + pop %rsi + addq $0x80, 224(%rdi) + subl $0x80, %esi + jz L_sha512_len_avx2_rorx_done +L_sha512_len_avx2_rorx_block: + movq 224(%rdi), %rax + vmovdqa L_avx2_rorx_sha512_flip_mask(%rip), %ymm15 + movq (%rdi), %r8 + movq 8(%rdi), %r9 + movq 16(%rdi), %r10 + movq 24(%rdi), %r11 + movq 32(%rdi), %r12 + movq 40(%rdi), %r13 + movq 48(%rdi), %r14 + movq 56(%rdi), %r15 + # Start of loop processing two blocks +L_sha512_len_avx2_rorx_begin: + subq $0x540, %rsp + leaq L_avx2_rorx_sha512_k_2(%rip), %rbp + movq %r9, %rbx + xorq %rdx, %rdx + vmovdqu (%rax), %xmm0 + vmovdqu 16(%rax), %xmm1 + vinserti128 $0x01, 128(%rax), %ymm0, %ymm0 + vinserti128 $0x01, 144(%rax), %ymm1, %ymm1 + vpshufb %ymm15, %ymm0, %ymm0 + vpshufb %ymm15, %ymm1, %ymm1 + vmovdqu 32(%rax), %xmm2 + vmovdqu 48(%rax), %xmm3 + vinserti128 $0x01, 160(%rax), %ymm2, %ymm2 + vinserti128 $0x01, 176(%rax), %ymm3, %ymm3 + vpshufb %ymm15, %ymm2, %ymm2 + vpshufb %ymm15, %ymm3, %ymm3 + vmovdqu 64(%rax), %xmm4 + vmovdqu 80(%rax), %xmm5 + vinserti128 $0x01, 192(%rax), %ymm4, %ymm4 + vinserti128 $0x01, 208(%rax), %ymm5, %ymm5 + vpshufb %ymm15, %ymm4, %ymm4 + vpshufb %ymm15, %ymm5, %ymm5 + vmovdqu 96(%rax), %xmm6 + vmovdqu 112(%rax), %xmm7 + vinserti128 $0x01, 224(%rax), %ymm6, %ymm6 + vinserti128 $0x01, 240(%rax), %ymm7, %ymm7 + vpshufb %ymm15, %ymm6, %ymm6 + vpshufb %ymm15, %ymm7, %ymm7 + xorq %r10, %rbx + # Start of 16 rounds +L_sha512_len_avx2_rorx_start: + vpaddq (%rbp), %ymm0, %ymm8 + vpaddq 32(%rbp), %ymm1, %ymm9 + vmovdqu %ymm8, (%rsp) + vmovdqu %ymm9, 32(%rsp) + vpaddq 64(%rbp), %ymm2, %ymm8 + vpaddq 96(%rbp), %ymm3, %ymm9 + vmovdqu %ymm8, 64(%rsp) + vmovdqu %ymm9, 96(%rsp) + vpaddq 128(%rbp), %ymm4, %ymm8 + vpaddq 160(%rbp), %ymm5, %ymm9 + vmovdqu %ymm8, 128(%rsp) + vmovdqu %ymm9, 160(%rsp) + vpaddq 192(%rbp), %ymm6, %ymm8 + vpaddq 224(%rbp), %ymm7, %ymm9 + vmovdqu %ymm8, 192(%rsp) + vmovdqu %ymm9, 224(%rsp) + # msg_sched: 0-1 + rorxq $14, %r12, %rax + rorxq $18, %r12, %rcx + addq %rdx, %r8 + vpalignr $8, %ymm0, %ymm1, %ymm12 + addq (%rsp), %r15 + movq %r13, %rdx + xorq %rax, %rcx + vpalignr $8, %ymm4, %ymm5, %ymm13 + xorq %r14, %rdx + rorxq $41, %r12, %rax + xorq %rcx, %rax + vpsrlq $0x01, %ymm12, %ymm8 + vpsllq $63, %ymm12, %ymm9 + andq %r12, %rdx + addq %rax, %r15 + rorxq $28, %r8, %rax + vpsrlq $8, %ymm12, %ymm10 + vpsllq $56, %ymm12, %ymm11 + rorxq $34, %r8, %rcx + xorq %r14, %rdx + xorq %rax, %rcx + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + rorxq $39, %r8, %rax + addq %rdx, %r15 + xorq %rcx, %rax + vpsrlq $7, %ymm12, %ymm11 + vpxor %ymm10, %ymm8, %ymm8 + movq %r9, %rdx + addq %r15, %r11 + xorq %r8, %rdx + vpxor %ymm11, %ymm8, %ymm8 + vpaddq %ymm0, %ymm13, %ymm0 + andq %rdx, %rbx + addq %rax, %r15 + xorq %r9, %rbx + vpaddq %ymm0, %ymm8, %ymm0 + rorxq $14, %r11, %rax + rorxq $18, %r11, %rcx + addq %rbx, %r15 + vpsrlq $19, %ymm7, %ymm8 + vpsllq $45, %ymm7, %ymm9 + addq 8(%rsp), %r14 + movq %r12, %rbx + xorq %rax, %rcx + vpsrlq $61, %ymm7, %ymm10 + vpsllq $3, %ymm7, %ymm11 + xorq %r13, %rbx + rorxq $41, %r11, %rax + xorq %rcx, %rax + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + andq %r11, %rbx + addq %rax, %r14 + rorxq $28, %r15, %rax + rorxq $34, %r15, %rcx + xorq %r13, %rbx + xorq %rax, %rcx + vpxor %ymm10, %ymm8, %ymm8 + vpsrlq $6, %ymm7, %ymm11 + rorxq $39, %r15, %rax + addq %rbx, %r14 + xorq %rcx, %rax + movq %r8, %rbx + leaq (%r10,%r14,1), %r10 + xorq %r15, %rbx + vpxor %ymm11, %ymm8, %ymm8 + andq %rbx, %rdx + addq %rax, %r14 + xorq %r8, %rdx + vpaddq %ymm0, %ymm8, %ymm0 + # msg_sched done: 0-3 + # msg_sched: 4-5 + rorxq $14, %r10, %rax + rorxq $18, %r10, %rcx + addq %rdx, %r14 + vpalignr $8, %ymm1, %ymm2, %ymm12 + addq 32(%rsp), %r13 + movq %r11, %rdx + xorq %rax, %rcx + vpalignr $8, %ymm5, %ymm6, %ymm13 + xorq %r12, %rdx + rorxq $41, %r10, %rax + xorq %rcx, %rax + vpsrlq $0x01, %ymm12, %ymm8 + vpsllq $63, %ymm12, %ymm9 + andq %r10, %rdx + addq %rax, %r13 + rorxq $28, %r14, %rax + vpsrlq $8, %ymm12, %ymm10 + vpsllq $56, %ymm12, %ymm11 + rorxq $34, %r14, %rcx + xorq %r12, %rdx + xorq %rax, %rcx + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + rorxq $39, %r14, %rax + addq %rdx, %r13 + xorq %rcx, %rax + vpsrlq $7, %ymm12, %ymm11 + vpxor %ymm10, %ymm8, %ymm8 + movq %r15, %rdx + addq %r13, %r9 + xorq %r14, %rdx + vpxor %ymm11, %ymm8, %ymm8 + vpaddq %ymm1, %ymm13, %ymm1 + andq %rdx, %rbx + addq %rax, %r13 + xorq %r15, %rbx + vpaddq %ymm1, %ymm8, %ymm1 + rorxq $14, %r9, %rax + rorxq $18, %r9, %rcx + addq %rbx, %r13 + vpsrlq $19, %ymm0, %ymm8 + vpsllq $45, %ymm0, %ymm9 + addq 40(%rsp), %r12 + movq %r10, %rbx + xorq %rax, %rcx + vpsrlq $61, %ymm0, %ymm10 + vpsllq $3, %ymm0, %ymm11 + xorq %r11, %rbx + rorxq $41, %r9, %rax + xorq %rcx, %rax + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + andq %r9, %rbx + addq %rax, %r12 + rorxq $28, %r13, %rax + rorxq $34, %r13, %rcx + xorq %r11, %rbx + xorq %rax, %rcx + vpxor %ymm10, %ymm8, %ymm8 + vpsrlq $6, %ymm0, %ymm11 + rorxq $39, %r13, %rax + addq %rbx, %r12 + xorq %rcx, %rax + movq %r14, %rbx + leaq (%r8,%r12,1), %r8 + xorq %r13, %rbx + vpxor %ymm11, %ymm8, %ymm8 + andq %rbx, %rdx + addq %rax, %r12 + xorq %r14, %rdx + vpaddq %ymm1, %ymm8, %ymm1 + # msg_sched done: 4-7 + # msg_sched: 8-9 + rorxq $14, %r8, %rax + rorxq $18, %r8, %rcx + addq %rdx, %r12 + vpalignr $8, %ymm2, %ymm3, %ymm12 + addq 64(%rsp), %r11 + movq %r9, %rdx + xorq %rax, %rcx + vpalignr $8, %ymm6, %ymm7, %ymm13 + xorq %r10, %rdx + rorxq $41, %r8, %rax + xorq %rcx, %rax + vpsrlq $0x01, %ymm12, %ymm8 + vpsllq $63, %ymm12, %ymm9 + andq %r8, %rdx + addq %rax, %r11 + rorxq $28, %r12, %rax + vpsrlq $8, %ymm12, %ymm10 + vpsllq $56, %ymm12, %ymm11 + rorxq $34, %r12, %rcx + xorq %r10, %rdx + xorq %rax, %rcx + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + rorxq $39, %r12, %rax + addq %rdx, %r11 + xorq %rcx, %rax + vpsrlq $7, %ymm12, %ymm11 + vpxor %ymm10, %ymm8, %ymm8 + movq %r13, %rdx + addq %r11, %r15 + xorq %r12, %rdx + vpxor %ymm11, %ymm8, %ymm8 + vpaddq %ymm2, %ymm13, %ymm2 + andq %rdx, %rbx + addq %rax, %r11 + xorq %r13, %rbx + vpaddq %ymm2, %ymm8, %ymm2 + rorxq $14, %r15, %rax + rorxq $18, %r15, %rcx + addq %rbx, %r11 + vpsrlq $19, %ymm1, %ymm8 + vpsllq $45, %ymm1, %ymm9 + addq 72(%rsp), %r10 + movq %r8, %rbx + xorq %rax, %rcx + vpsrlq $61, %ymm1, %ymm10 + vpsllq $3, %ymm1, %ymm11 + xorq %r9, %rbx + rorxq $41, %r15, %rax + xorq %rcx, %rax + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + andq %r15, %rbx + addq %rax, %r10 + rorxq $28, %r11, %rax + rorxq $34, %r11, %rcx + xorq %r9, %rbx + xorq %rax, %rcx + vpxor %ymm10, %ymm8, %ymm8 + vpsrlq $6, %ymm1, %ymm11 + rorxq $39, %r11, %rax + addq %rbx, %r10 + xorq %rcx, %rax + movq %r12, %rbx + leaq (%r14,%r10,1), %r14 + xorq %r11, %rbx + vpxor %ymm11, %ymm8, %ymm8 + andq %rbx, %rdx + addq %rax, %r10 + xorq %r12, %rdx + vpaddq %ymm2, %ymm8, %ymm2 + # msg_sched done: 8-11 + # msg_sched: 12-13 + rorxq $14, %r14, %rax + rorxq $18, %r14, %rcx + addq %rdx, %r10 + vpalignr $8, %ymm3, %ymm4, %ymm12 + addq 96(%rsp), %r9 + movq %r15, %rdx + xorq %rax, %rcx + vpalignr $8, %ymm7, %ymm0, %ymm13 + xorq %r8, %rdx + rorxq $41, %r14, %rax + xorq %rcx, %rax + vpsrlq $0x01, %ymm12, %ymm8 + vpsllq $63, %ymm12, %ymm9 + andq %r14, %rdx + addq %rax, %r9 + rorxq $28, %r10, %rax + vpsrlq $8, %ymm12, %ymm10 + vpsllq $56, %ymm12, %ymm11 + rorxq $34, %r10, %rcx + xorq %r8, %rdx + xorq %rax, %rcx + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + rorxq $39, %r10, %rax + addq %rdx, %r9 + xorq %rcx, %rax + vpsrlq $7, %ymm12, %ymm11 + vpxor %ymm10, %ymm8, %ymm8 + movq %r11, %rdx + addq %r9, %r13 + xorq %r10, %rdx + vpxor %ymm11, %ymm8, %ymm8 + vpaddq %ymm3, %ymm13, %ymm3 + andq %rdx, %rbx + addq %rax, %r9 + xorq %r11, %rbx + vpaddq %ymm3, %ymm8, %ymm3 + rorxq $14, %r13, %rax + rorxq $18, %r13, %rcx + addq %rbx, %r9 + vpsrlq $19, %ymm2, %ymm8 + vpsllq $45, %ymm2, %ymm9 + addq 104(%rsp), %r8 + movq %r14, %rbx + xorq %rax, %rcx + vpsrlq $61, %ymm2, %ymm10 + vpsllq $3, %ymm2, %ymm11 + xorq %r15, %rbx + rorxq $41, %r13, %rax + xorq %rcx, %rax + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + andq %r13, %rbx + addq %rax, %r8 + rorxq $28, %r9, %rax + rorxq $34, %r9, %rcx + xorq %r15, %rbx + xorq %rax, %rcx + vpxor %ymm10, %ymm8, %ymm8 + vpsrlq $6, %ymm2, %ymm11 + rorxq $39, %r9, %rax + addq %rbx, %r8 + xorq %rcx, %rax + movq %r10, %rbx + leaq (%r12,%r8,1), %r12 + xorq %r9, %rbx + vpxor %ymm11, %ymm8, %ymm8 + andq %rbx, %rdx + addq %rax, %r8 + xorq %r10, %rdx + vpaddq %ymm3, %ymm8, %ymm3 + # msg_sched done: 12-15 + # msg_sched: 16-17 + rorxq $14, %r12, %rax + rorxq $18, %r12, %rcx + addq %rdx, %r8 + vpalignr $8, %ymm4, %ymm5, %ymm12 + addq 128(%rsp), %r15 + movq %r13, %rdx + xorq %rax, %rcx + vpalignr $8, %ymm0, %ymm1, %ymm13 + xorq %r14, %rdx + rorxq $41, %r12, %rax + xorq %rcx, %rax + vpsrlq $0x01, %ymm12, %ymm8 + vpsllq $63, %ymm12, %ymm9 + andq %r12, %rdx + addq %rax, %r15 + rorxq $28, %r8, %rax + vpsrlq $8, %ymm12, %ymm10 + vpsllq $56, %ymm12, %ymm11 + rorxq $34, %r8, %rcx + xorq %r14, %rdx + xorq %rax, %rcx + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + rorxq $39, %r8, %rax + addq %rdx, %r15 + xorq %rcx, %rax + vpsrlq $7, %ymm12, %ymm11 + vpxor %ymm10, %ymm8, %ymm8 + movq %r9, %rdx + addq %r15, %r11 + xorq %r8, %rdx + vpxor %ymm11, %ymm8, %ymm8 + vpaddq %ymm4, %ymm13, %ymm4 + andq %rdx, %rbx + addq %rax, %r15 + xorq %r9, %rbx + vpaddq %ymm4, %ymm8, %ymm4 + rorxq $14, %r11, %rax + rorxq $18, %r11, %rcx + addq %rbx, %r15 + vpsrlq $19, %ymm3, %ymm8 + vpsllq $45, %ymm3, %ymm9 + addq 136(%rsp), %r14 + movq %r12, %rbx + xorq %rax, %rcx + vpsrlq $61, %ymm3, %ymm10 + vpsllq $3, %ymm3, %ymm11 + xorq %r13, %rbx + rorxq $41, %r11, %rax + xorq %rcx, %rax + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + andq %r11, %rbx + addq %rax, %r14 + rorxq $28, %r15, %rax + rorxq $34, %r15, %rcx + xorq %r13, %rbx + xorq %rax, %rcx + vpxor %ymm10, %ymm8, %ymm8 + vpsrlq $6, %ymm3, %ymm11 + rorxq $39, %r15, %rax + addq %rbx, %r14 + xorq %rcx, %rax + movq %r8, %rbx + leaq (%r10,%r14,1), %r10 + xorq %r15, %rbx + vpxor %ymm11, %ymm8, %ymm8 + andq %rbx, %rdx + addq %rax, %r14 + xorq %r8, %rdx + vpaddq %ymm4, %ymm8, %ymm4 + # msg_sched done: 16-19 + # msg_sched: 20-21 + rorxq $14, %r10, %rax + rorxq $18, %r10, %rcx + addq %rdx, %r14 + vpalignr $8, %ymm5, %ymm6, %ymm12 + addq 160(%rsp), %r13 + movq %r11, %rdx + xorq %rax, %rcx + vpalignr $8, %ymm1, %ymm2, %ymm13 + xorq %r12, %rdx + rorxq $41, %r10, %rax + xorq %rcx, %rax + vpsrlq $0x01, %ymm12, %ymm8 + vpsllq $63, %ymm12, %ymm9 + andq %r10, %rdx + addq %rax, %r13 + rorxq $28, %r14, %rax + vpsrlq $8, %ymm12, %ymm10 + vpsllq $56, %ymm12, %ymm11 + rorxq $34, %r14, %rcx + xorq %r12, %rdx + xorq %rax, %rcx + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + rorxq $39, %r14, %rax + addq %rdx, %r13 + xorq %rcx, %rax + vpsrlq $7, %ymm12, %ymm11 + vpxor %ymm10, %ymm8, %ymm8 + movq %r15, %rdx + addq %r13, %r9 + xorq %r14, %rdx + vpxor %ymm11, %ymm8, %ymm8 + vpaddq %ymm5, %ymm13, %ymm5 + andq %rdx, %rbx + addq %rax, %r13 + xorq %r15, %rbx + vpaddq %ymm5, %ymm8, %ymm5 + rorxq $14, %r9, %rax + rorxq $18, %r9, %rcx + addq %rbx, %r13 + vpsrlq $19, %ymm4, %ymm8 + vpsllq $45, %ymm4, %ymm9 + addq 168(%rsp), %r12 + movq %r10, %rbx + xorq %rax, %rcx + vpsrlq $61, %ymm4, %ymm10 + vpsllq $3, %ymm4, %ymm11 + xorq %r11, %rbx + rorxq $41, %r9, %rax + xorq %rcx, %rax + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + andq %r9, %rbx + addq %rax, %r12 + rorxq $28, %r13, %rax + rorxq $34, %r13, %rcx + xorq %r11, %rbx + xorq %rax, %rcx + vpxor %ymm10, %ymm8, %ymm8 + vpsrlq $6, %ymm4, %ymm11 + rorxq $39, %r13, %rax + addq %rbx, %r12 + xorq %rcx, %rax + movq %r14, %rbx + leaq (%r8,%r12,1), %r8 + xorq %r13, %rbx + vpxor %ymm11, %ymm8, %ymm8 + andq %rbx, %rdx + addq %rax, %r12 + xorq %r14, %rdx + vpaddq %ymm5, %ymm8, %ymm5 + # msg_sched done: 20-23 + # msg_sched: 24-25 + rorxq $14, %r8, %rax + rorxq $18, %r8, %rcx + addq %rdx, %r12 + vpalignr $8, %ymm6, %ymm7, %ymm12 + addq 192(%rsp), %r11 + movq %r9, %rdx + xorq %rax, %rcx + vpalignr $8, %ymm2, %ymm3, %ymm13 + xorq %r10, %rdx + rorxq $41, %r8, %rax + xorq %rcx, %rax + vpsrlq $0x01, %ymm12, %ymm8 + vpsllq $63, %ymm12, %ymm9 + andq %r8, %rdx + addq %rax, %r11 + rorxq $28, %r12, %rax + vpsrlq $8, %ymm12, %ymm10 + vpsllq $56, %ymm12, %ymm11 + rorxq $34, %r12, %rcx + xorq %r10, %rdx + xorq %rax, %rcx + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + rorxq $39, %r12, %rax + addq %rdx, %r11 + xorq %rcx, %rax + vpsrlq $7, %ymm12, %ymm11 + vpxor %ymm10, %ymm8, %ymm8 + movq %r13, %rdx + addq %r11, %r15 + xorq %r12, %rdx + vpxor %ymm11, %ymm8, %ymm8 + vpaddq %ymm6, %ymm13, %ymm6 + andq %rdx, %rbx + addq %rax, %r11 + xorq %r13, %rbx + vpaddq %ymm6, %ymm8, %ymm6 + rorxq $14, %r15, %rax + rorxq $18, %r15, %rcx + addq %rbx, %r11 + vpsrlq $19, %ymm5, %ymm8 + vpsllq $45, %ymm5, %ymm9 + addq 200(%rsp), %r10 + movq %r8, %rbx + xorq %rax, %rcx + vpsrlq $61, %ymm5, %ymm10 + vpsllq $3, %ymm5, %ymm11 + xorq %r9, %rbx + rorxq $41, %r15, %rax + xorq %rcx, %rax + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + andq %r15, %rbx + addq %rax, %r10 + rorxq $28, %r11, %rax + rorxq $34, %r11, %rcx + xorq %r9, %rbx + xorq %rax, %rcx + vpxor %ymm10, %ymm8, %ymm8 + vpsrlq $6, %ymm5, %ymm11 + rorxq $39, %r11, %rax + addq %rbx, %r10 + xorq %rcx, %rax + movq %r12, %rbx + leaq (%r14,%r10,1), %r14 + xorq %r11, %rbx + vpxor %ymm11, %ymm8, %ymm8 + andq %rbx, %rdx + addq %rax, %r10 + xorq %r12, %rdx + vpaddq %ymm6, %ymm8, %ymm6 + # msg_sched done: 24-27 + # msg_sched: 28-29 + rorxq $14, %r14, %rax + rorxq $18, %r14, %rcx + addq %rdx, %r10 + vpalignr $8, %ymm7, %ymm0, %ymm12 + addq 224(%rsp), %r9 + movq %r15, %rdx + xorq %rax, %rcx + vpalignr $8, %ymm3, %ymm4, %ymm13 + xorq %r8, %rdx + rorxq $41, %r14, %rax + xorq %rcx, %rax + vpsrlq $0x01, %ymm12, %ymm8 + vpsllq $63, %ymm12, %ymm9 + andq %r14, %rdx + addq %rax, %r9 + rorxq $28, %r10, %rax + vpsrlq $8, %ymm12, %ymm10 + vpsllq $56, %ymm12, %ymm11 + rorxq $34, %r10, %rcx + xorq %r8, %rdx + xorq %rax, %rcx + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + rorxq $39, %r10, %rax + addq %rdx, %r9 + xorq %rcx, %rax + vpsrlq $7, %ymm12, %ymm11 + vpxor %ymm10, %ymm8, %ymm8 + movq %r11, %rdx + addq %r9, %r13 + xorq %r10, %rdx + vpxor %ymm11, %ymm8, %ymm8 + vpaddq %ymm7, %ymm13, %ymm7 + andq %rdx, %rbx + addq %rax, %r9 + xorq %r11, %rbx + vpaddq %ymm7, %ymm8, %ymm7 + rorxq $14, %r13, %rax + rorxq $18, %r13, %rcx + addq %rbx, %r9 + vpsrlq $19, %ymm6, %ymm8 + vpsllq $45, %ymm6, %ymm9 + addq 232(%rsp), %r8 + movq %r14, %rbx + xorq %rax, %rcx + vpsrlq $61, %ymm6, %ymm10 + vpsllq $3, %ymm6, %ymm11 + xorq %r15, %rbx + rorxq $41, %r13, %rax + xorq %rcx, %rax + vpor %ymm9, %ymm8, %ymm8 + vpor %ymm11, %ymm10, %ymm10 + andq %r13, %rbx + addq %rax, %r8 + rorxq $28, %r9, %rax + rorxq $34, %r9, %rcx + xorq %r15, %rbx + xorq %rax, %rcx + vpxor %ymm10, %ymm8, %ymm8 + vpsrlq $6, %ymm6, %ymm11 + rorxq $39, %r9, %rax + addq %rbx, %r8 + xorq %rcx, %rax + movq %r10, %rbx + leaq (%r12,%r8,1), %r12 + xorq %r9, %rbx + vpxor %ymm11, %ymm8, %ymm8 + andq %rbx, %rdx + addq %rax, %r8 + xorq %r10, %rdx + vpaddq %ymm7, %ymm8, %ymm7 + # msg_sched done: 28-31 + addq $0x100, %rbp + addq $0x100, %rsp + cmpq L_avx2_rorx_sha512_k_2_end(%rip), %rbp + jne L_sha512_len_avx2_rorx_start + vpaddq (%rbp), %ymm0, %ymm8 + vpaddq 32(%rbp), %ymm1, %ymm9 + vmovdqu %ymm8, (%rsp) + vmovdqu %ymm9, 32(%rsp) + vpaddq 64(%rbp), %ymm2, %ymm8 + vpaddq 96(%rbp), %ymm3, %ymm9 + vmovdqu %ymm8, 64(%rsp) + vmovdqu %ymm9, 96(%rsp) + vpaddq 128(%rbp), %ymm4, %ymm8 + vpaddq 160(%rbp), %ymm5, %ymm9 + vmovdqu %ymm8, 128(%rsp) + vmovdqu %ymm9, 160(%rsp) + vpaddq 192(%rbp), %ymm6, %ymm8 + vpaddq 224(%rbp), %ymm7, %ymm9 + vmovdqu %ymm8, 192(%rsp) + vmovdqu %ymm9, 224(%rsp) + # rnd_all_2: 0-1 + rorxq $14, %r12, %rax + rorxq $18, %r12, %rcx + addq %rdx, %r8 + addq (%rsp), %r15 + movq %r13, %rdx + xorq %rax, %rcx + xorq %r14, %rdx + rorxq $41, %r12, %rax + xorq %rcx, %rax + andq %r12, %rdx + addq %rax, %r15 + rorxq $28, %r8, %rax + rorxq $34, %r8, %rcx + xorq %r14, %rdx + xorq %rax, %rcx + rorxq $39, %r8, %rax + addq %rdx, %r15 + xorq %rcx, %rax + movq %r9, %rdx + addq %r15, %r11 + xorq %r8, %rdx + andq %rdx, %rbx + addq %rax, %r15 + xorq %r9, %rbx + rorxq $14, %r11, %rax + rorxq $18, %r11, %rcx + addq %rbx, %r15 + addq 8(%rsp), %r14 + movq %r12, %rbx + xorq %rax, %rcx + xorq %r13, %rbx + rorxq $41, %r11, %rax + xorq %rcx, %rax + andq %r11, %rbx + addq %rax, %r14 + rorxq $28, %r15, %rax + rorxq $34, %r15, %rcx + xorq %r13, %rbx + xorq %rax, %rcx + rorxq $39, %r15, %rax + addq %rbx, %r14 + xorq %rcx, %rax + movq %r8, %rbx + leaq (%r10,%r14,1), %r10 + xorq %r15, %rbx + andq %rbx, %rdx + addq %rax, %r14 + xorq %r8, %rdx + # rnd_all_2: 4-5 + rorxq $14, %r10, %rax + rorxq $18, %r10, %rcx + addq %rdx, %r14 + addq 32(%rsp), %r13 + movq %r11, %rdx + xorq %rax, %rcx + xorq %r12, %rdx + rorxq $41, %r10, %rax + xorq %rcx, %rax + andq %r10, %rdx + addq %rax, %r13 + rorxq $28, %r14, %rax + rorxq $34, %r14, %rcx + xorq %r12, %rdx + xorq %rax, %rcx + rorxq $39, %r14, %rax + addq %rdx, %r13 + xorq %rcx, %rax + movq %r15, %rdx + addq %r13, %r9 + xorq %r14, %rdx + andq %rdx, %rbx + addq %rax, %r13 + xorq %r15, %rbx + rorxq $14, %r9, %rax + rorxq $18, %r9, %rcx + addq %rbx, %r13 + addq 40(%rsp), %r12 + movq %r10, %rbx + xorq %rax, %rcx + xorq %r11, %rbx + rorxq $41, %r9, %rax + xorq %rcx, %rax + andq %r9, %rbx + addq %rax, %r12 + rorxq $28, %r13, %rax + rorxq $34, %r13, %rcx + xorq %r11, %rbx + xorq %rax, %rcx + rorxq $39, %r13, %rax + addq %rbx, %r12 + xorq %rcx, %rax + movq %r14, %rbx + leaq (%r8,%r12,1), %r8 + xorq %r13, %rbx + andq %rbx, %rdx + addq %rax, %r12 + xorq %r14, %rdx + # rnd_all_2: 8-9 + rorxq $14, %r8, %rax + rorxq $18, %r8, %rcx + addq %rdx, %r12 + addq 64(%rsp), %r11 + movq %r9, %rdx + xorq %rax, %rcx + xorq %r10, %rdx + rorxq $41, %r8, %rax + xorq %rcx, %rax + andq %r8, %rdx + addq %rax, %r11 + rorxq $28, %r12, %rax + rorxq $34, %r12, %rcx + xorq %r10, %rdx + xorq %rax, %rcx + rorxq $39, %r12, %rax + addq %rdx, %r11 + xorq %rcx, %rax + movq %r13, %rdx + addq %r11, %r15 + xorq %r12, %rdx + andq %rdx, %rbx + addq %rax, %r11 + xorq %r13, %rbx + rorxq $14, %r15, %rax + rorxq $18, %r15, %rcx + addq %rbx, %r11 + addq 72(%rsp), %r10 + movq %r8, %rbx + xorq %rax, %rcx + xorq %r9, %rbx + rorxq $41, %r15, %rax + xorq %rcx, %rax + andq %r15, %rbx + addq %rax, %r10 + rorxq $28, %r11, %rax + rorxq $34, %r11, %rcx + xorq %r9, %rbx + xorq %rax, %rcx + rorxq $39, %r11, %rax + addq %rbx, %r10 + xorq %rcx, %rax + movq %r12, %rbx + leaq (%r14,%r10,1), %r14 + xorq %r11, %rbx + andq %rbx, %rdx + addq %rax, %r10 + xorq %r12, %rdx + # rnd_all_2: 12-13 + rorxq $14, %r14, %rax + rorxq $18, %r14, %rcx + addq %rdx, %r10 + addq 96(%rsp), %r9 + movq %r15, %rdx + xorq %rax, %rcx + xorq %r8, %rdx + rorxq $41, %r14, %rax + xorq %rcx, %rax + andq %r14, %rdx + addq %rax, %r9 + rorxq $28, %r10, %rax + rorxq $34, %r10, %rcx + xorq %r8, %rdx + xorq %rax, %rcx + rorxq $39, %r10, %rax + addq %rdx, %r9 + xorq %rcx, %rax + movq %r11, %rdx + addq %r9, %r13 + xorq %r10, %rdx + andq %rdx, %rbx + addq %rax, %r9 + xorq %r11, %rbx + rorxq $14, %r13, %rax + rorxq $18, %r13, %rcx + addq %rbx, %r9 + addq 104(%rsp), %r8 + movq %r14, %rbx + xorq %rax, %rcx + xorq %r15, %rbx + rorxq $41, %r13, %rax + xorq %rcx, %rax + andq %r13, %rbx + addq %rax, %r8 + rorxq $28, %r9, %rax + rorxq $34, %r9, %rcx + xorq %r15, %rbx + xorq %rax, %rcx + rorxq $39, %r9, %rax + addq %rbx, %r8 + xorq %rcx, %rax + movq %r10, %rbx + leaq (%r12,%r8,1), %r12 + xorq %r9, %rbx + andq %rbx, %rdx + addq %rax, %r8 + xorq %r10, %rdx + # rnd_all_2: 16-17 + rorxq $14, %r12, %rax + rorxq $18, %r12, %rcx + addq %rdx, %r8 + addq 128(%rsp), %r15 + movq %r13, %rdx + xorq %rax, %rcx + xorq %r14, %rdx + rorxq $41, %r12, %rax + xorq %rcx, %rax + andq %r12, %rdx + addq %rax, %r15 + rorxq $28, %r8, %rax + rorxq $34, %r8, %rcx + xorq %r14, %rdx + xorq %rax, %rcx + rorxq $39, %r8, %rax + addq %rdx, %r15 + xorq %rcx, %rax + movq %r9, %rdx + addq %r15, %r11 + xorq %r8, %rdx + andq %rdx, %rbx + addq %rax, %r15 + xorq %r9, %rbx + rorxq $14, %r11, %rax + rorxq $18, %r11, %rcx + addq %rbx, %r15 + addq 136(%rsp), %r14 + movq %r12, %rbx + xorq %rax, %rcx + xorq %r13, %rbx + rorxq $41, %r11, %rax + xorq %rcx, %rax + andq %r11, %rbx + addq %rax, %r14 + rorxq $28, %r15, %rax + rorxq $34, %r15, %rcx + xorq %r13, %rbx + xorq %rax, %rcx + rorxq $39, %r15, %rax + addq %rbx, %r14 + xorq %rcx, %rax + movq %r8, %rbx + leaq (%r10,%r14,1), %r10 + xorq %r15, %rbx + andq %rbx, %rdx + addq %rax, %r14 + xorq %r8, %rdx + # rnd_all_2: 20-21 + rorxq $14, %r10, %rax + rorxq $18, %r10, %rcx + addq %rdx, %r14 + addq 160(%rsp), %r13 + movq %r11, %rdx + xorq %rax, %rcx + xorq %r12, %rdx + rorxq $41, %r10, %rax + xorq %rcx, %rax + andq %r10, %rdx + addq %rax, %r13 + rorxq $28, %r14, %rax + rorxq $34, %r14, %rcx + xorq %r12, %rdx + xorq %rax, %rcx + rorxq $39, %r14, %rax + addq %rdx, %r13 + xorq %rcx, %rax + movq %r15, %rdx + addq %r13, %r9 + xorq %r14, %rdx + andq %rdx, %rbx + addq %rax, %r13 + xorq %r15, %rbx + rorxq $14, %r9, %rax + rorxq $18, %r9, %rcx + addq %rbx, %r13 + addq 168(%rsp), %r12 + movq %r10, %rbx + xorq %rax, %rcx + xorq %r11, %rbx + rorxq $41, %r9, %rax + xorq %rcx, %rax + andq %r9, %rbx + addq %rax, %r12 + rorxq $28, %r13, %rax + rorxq $34, %r13, %rcx + xorq %r11, %rbx + xorq %rax, %rcx + rorxq $39, %r13, %rax + addq %rbx, %r12 + xorq %rcx, %rax + movq %r14, %rbx + leaq (%r8,%r12,1), %r8 + xorq %r13, %rbx + andq %rbx, %rdx + addq %rax, %r12 + xorq %r14, %rdx + # rnd_all_2: 24-25 + rorxq $14, %r8, %rax + rorxq $18, %r8, %rcx + addq %rdx, %r12 + addq 192(%rsp), %r11 + movq %r9, %rdx + xorq %rax, %rcx + xorq %r10, %rdx + rorxq $41, %r8, %rax + xorq %rcx, %rax + andq %r8, %rdx + addq %rax, %r11 + rorxq $28, %r12, %rax + rorxq $34, %r12, %rcx + xorq %r10, %rdx + xorq %rax, %rcx + rorxq $39, %r12, %rax + addq %rdx, %r11 + xorq %rcx, %rax + movq %r13, %rdx + addq %r11, %r15 + xorq %r12, %rdx + andq %rdx, %rbx + addq %rax, %r11 + xorq %r13, %rbx + rorxq $14, %r15, %rax + rorxq $18, %r15, %rcx + addq %rbx, %r11 + addq 200(%rsp), %r10 + movq %r8, %rbx + xorq %rax, %rcx + xorq %r9, %rbx + rorxq $41, %r15, %rax + xorq %rcx, %rax + andq %r15, %rbx + addq %rax, %r10 + rorxq $28, %r11, %rax + rorxq $34, %r11, %rcx + xorq %r9, %rbx + xorq %rax, %rcx + rorxq $39, %r11, %rax + addq %rbx, %r10 + xorq %rcx, %rax + movq %r12, %rbx + leaq (%r14,%r10,1), %r14 + xorq %r11, %rbx + andq %rbx, %rdx + addq %rax, %r10 + xorq %r12, %rdx + # rnd_all_2: 28-29 + rorxq $14, %r14, %rax + rorxq $18, %r14, %rcx + addq %rdx, %r10 + addq 224(%rsp), %r9 + movq %r15, %rdx + xorq %rax, %rcx + xorq %r8, %rdx + rorxq $41, %r14, %rax + xorq %rcx, %rax + andq %r14, %rdx + addq %rax, %r9 + rorxq $28, %r10, %rax + rorxq $34, %r10, %rcx + xorq %r8, %rdx + xorq %rax, %rcx + rorxq $39, %r10, %rax + addq %rdx, %r9 + xorq %rcx, %rax + movq %r11, %rdx + addq %r9, %r13 + xorq %r10, %rdx + andq %rdx, %rbx + addq %rax, %r9 + xorq %r11, %rbx + rorxq $14, %r13, %rax + rorxq $18, %r13, %rcx + addq %rbx, %r9 + addq 232(%rsp), %r8 + movq %r14, %rbx + xorq %rax, %rcx + xorq %r15, %rbx + rorxq $41, %r13, %rax + xorq %rcx, %rax + andq %r13, %rbx + addq %rax, %r8 + rorxq $28, %r9, %rax + rorxq $34, %r9, %rcx + xorq %r15, %rbx + xorq %rax, %rcx + rorxq $39, %r9, %rax + addq %rbx, %r8 + xorq %rcx, %rax + movq %r10, %rbx + leaq (%r12,%r8,1), %r12 + xorq %r9, %rbx + andq %rbx, %rdx + addq %rax, %r8 + xorq %r10, %rdx + addq %rdx, %r8 + subq $0x400, %rsp + addq (%rdi), %r8 + addq 8(%rdi), %r9 + addq 16(%rdi), %r10 + addq 24(%rdi), %r11 + addq 32(%rdi), %r12 + addq 40(%rdi), %r13 + addq 48(%rdi), %r14 + addq 56(%rdi), %r15 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq %r12, 32(%rdi) + movq %r13, 40(%rdi) + movq %r14, 48(%rdi) + movq %r15, 56(%rdi) + movq %r9, %rbx + xorq %rdx, %rdx + xorq %r10, %rbx + movq $5, %rbp +L_sha512_len_avx2_rorx_tail: + # rnd_all_2: 2-3 + rorxq $14, %r12, %rax + rorxq $18, %r12, %rcx + addq %rdx, %r8 + addq 16(%rsp), %r15 + movq %r13, %rdx + xorq %rax, %rcx + xorq %r14, %rdx + rorxq $41, %r12, %rax + xorq %rcx, %rax + andq %r12, %rdx + addq %rax, %r15 + rorxq $28, %r8, %rax + rorxq $34, %r8, %rcx + xorq %r14, %rdx + xorq %rax, %rcx + rorxq $39, %r8, %rax + addq %rdx, %r15 + xorq %rcx, %rax + movq %r9, %rdx + addq %r15, %r11 + xorq %r8, %rdx + andq %rdx, %rbx + addq %rax, %r15 + xorq %r9, %rbx + rorxq $14, %r11, %rax + rorxq $18, %r11, %rcx + addq %rbx, %r15 + addq 24(%rsp), %r14 + movq %r12, %rbx + xorq %rax, %rcx + xorq %r13, %rbx + rorxq $41, %r11, %rax + xorq %rcx, %rax + andq %r11, %rbx + addq %rax, %r14 + rorxq $28, %r15, %rax + rorxq $34, %r15, %rcx + xorq %r13, %rbx + xorq %rax, %rcx + rorxq $39, %r15, %rax + addq %rbx, %r14 + xorq %rcx, %rax + movq %r8, %rbx + leaq (%r10,%r14,1), %r10 + xorq %r15, %rbx + andq %rbx, %rdx + addq %rax, %r14 + xorq %r8, %rdx + # rnd_all_2: 6-7 + rorxq $14, %r10, %rax + rorxq $18, %r10, %rcx + addq %rdx, %r14 + addq 48(%rsp), %r13 + movq %r11, %rdx + xorq %rax, %rcx + xorq %r12, %rdx + rorxq $41, %r10, %rax + xorq %rcx, %rax + andq %r10, %rdx + addq %rax, %r13 + rorxq $28, %r14, %rax + rorxq $34, %r14, %rcx + xorq %r12, %rdx + xorq %rax, %rcx + rorxq $39, %r14, %rax + addq %rdx, %r13 + xorq %rcx, %rax + movq %r15, %rdx + addq %r13, %r9 + xorq %r14, %rdx + andq %rdx, %rbx + addq %rax, %r13 + xorq %r15, %rbx + rorxq $14, %r9, %rax + rorxq $18, %r9, %rcx + addq %rbx, %r13 + addq 56(%rsp), %r12 + movq %r10, %rbx + xorq %rax, %rcx + xorq %r11, %rbx + rorxq $41, %r9, %rax + xorq %rcx, %rax + andq %r9, %rbx + addq %rax, %r12 + rorxq $28, %r13, %rax + rorxq $34, %r13, %rcx + xorq %r11, %rbx + xorq %rax, %rcx + rorxq $39, %r13, %rax + addq %rbx, %r12 + xorq %rcx, %rax + movq %r14, %rbx + leaq (%r8,%r12,1), %r8 + xorq %r13, %rbx + andq %rbx, %rdx + addq %rax, %r12 + xorq %r14, %rdx + # rnd_all_2: 10-11 + rorxq $14, %r8, %rax + rorxq $18, %r8, %rcx + addq %rdx, %r12 + addq 80(%rsp), %r11 + movq %r9, %rdx + xorq %rax, %rcx + xorq %r10, %rdx + rorxq $41, %r8, %rax + xorq %rcx, %rax + andq %r8, %rdx + addq %rax, %r11 + rorxq $28, %r12, %rax + rorxq $34, %r12, %rcx + xorq %r10, %rdx + xorq %rax, %rcx + rorxq $39, %r12, %rax + addq %rdx, %r11 + xorq %rcx, %rax + movq %r13, %rdx + addq %r11, %r15 + xorq %r12, %rdx + andq %rdx, %rbx + addq %rax, %r11 + xorq %r13, %rbx + rorxq $14, %r15, %rax + rorxq $18, %r15, %rcx + addq %rbx, %r11 + addq 88(%rsp), %r10 + movq %r8, %rbx + xorq %rax, %rcx + xorq %r9, %rbx + rorxq $41, %r15, %rax + xorq %rcx, %rax + andq %r15, %rbx + addq %rax, %r10 + rorxq $28, %r11, %rax + rorxq $34, %r11, %rcx + xorq %r9, %rbx + xorq %rax, %rcx + rorxq $39, %r11, %rax + addq %rbx, %r10 + xorq %rcx, %rax + movq %r12, %rbx + leaq (%r14,%r10,1), %r14 + xorq %r11, %rbx + andq %rbx, %rdx + addq %rax, %r10 + xorq %r12, %rdx + # rnd_all_2: 14-15 + rorxq $14, %r14, %rax + rorxq $18, %r14, %rcx + addq %rdx, %r10 + addq 112(%rsp), %r9 + movq %r15, %rdx + xorq %rax, %rcx + xorq %r8, %rdx + rorxq $41, %r14, %rax + xorq %rcx, %rax + andq %r14, %rdx + addq %rax, %r9 + rorxq $28, %r10, %rax + rorxq $34, %r10, %rcx + xorq %r8, %rdx + xorq %rax, %rcx + rorxq $39, %r10, %rax + addq %rdx, %r9 + xorq %rcx, %rax + movq %r11, %rdx + addq %r9, %r13 + xorq %r10, %rdx + andq %rdx, %rbx + addq %rax, %r9 + xorq %r11, %rbx + rorxq $14, %r13, %rax + rorxq $18, %r13, %rcx + addq %rbx, %r9 + addq 120(%rsp), %r8 + movq %r14, %rbx + xorq %rax, %rcx + xorq %r15, %rbx + rorxq $41, %r13, %rax + xorq %rcx, %rax + andq %r13, %rbx + addq %rax, %r8 + rorxq $28, %r9, %rax + rorxq $34, %r9, %rcx + xorq %r15, %rbx + xorq %rax, %rcx + rorxq $39, %r9, %rax + addq %rbx, %r8 + xorq %rcx, %rax + movq %r10, %rbx + leaq (%r12,%r8,1), %r12 + xorq %r9, %rbx + andq %rbx, %rdx + addq %rax, %r8 + xorq %r10, %rdx + # rnd_all_2: 18-19 + rorxq $14, %r12, %rax + rorxq $18, %r12, %rcx + addq %rdx, %r8 + addq 144(%rsp), %r15 + movq %r13, %rdx + xorq %rax, %rcx + xorq %r14, %rdx + rorxq $41, %r12, %rax + xorq %rcx, %rax + andq %r12, %rdx + addq %rax, %r15 + rorxq $28, %r8, %rax + rorxq $34, %r8, %rcx + xorq %r14, %rdx + xorq %rax, %rcx + rorxq $39, %r8, %rax + addq %rdx, %r15 + xorq %rcx, %rax + movq %r9, %rdx + addq %r15, %r11 + xorq %r8, %rdx + andq %rdx, %rbx + addq %rax, %r15 + xorq %r9, %rbx + rorxq $14, %r11, %rax + rorxq $18, %r11, %rcx + addq %rbx, %r15 + addq 152(%rsp), %r14 + movq %r12, %rbx + xorq %rax, %rcx + xorq %r13, %rbx + rorxq $41, %r11, %rax + xorq %rcx, %rax + andq %r11, %rbx + addq %rax, %r14 + rorxq $28, %r15, %rax + rorxq $34, %r15, %rcx + xorq %r13, %rbx + xorq %rax, %rcx + rorxq $39, %r15, %rax + addq %rbx, %r14 + xorq %rcx, %rax + movq %r8, %rbx + leaq (%r10,%r14,1), %r10 + xorq %r15, %rbx + andq %rbx, %rdx + addq %rax, %r14 + xorq %r8, %rdx + # rnd_all_2: 22-23 + rorxq $14, %r10, %rax + rorxq $18, %r10, %rcx + addq %rdx, %r14 + addq 176(%rsp), %r13 + movq %r11, %rdx + xorq %rax, %rcx + xorq %r12, %rdx + rorxq $41, %r10, %rax + xorq %rcx, %rax + andq %r10, %rdx + addq %rax, %r13 + rorxq $28, %r14, %rax + rorxq $34, %r14, %rcx + xorq %r12, %rdx + xorq %rax, %rcx + rorxq $39, %r14, %rax + addq %rdx, %r13 + xorq %rcx, %rax + movq %r15, %rdx + addq %r13, %r9 + xorq %r14, %rdx + andq %rdx, %rbx + addq %rax, %r13 + xorq %r15, %rbx + rorxq $14, %r9, %rax + rorxq $18, %r9, %rcx + addq %rbx, %r13 + addq 184(%rsp), %r12 + movq %r10, %rbx + xorq %rax, %rcx + xorq %r11, %rbx + rorxq $41, %r9, %rax + xorq %rcx, %rax + andq %r9, %rbx + addq %rax, %r12 + rorxq $28, %r13, %rax + rorxq $34, %r13, %rcx + xorq %r11, %rbx + xorq %rax, %rcx + rorxq $39, %r13, %rax + addq %rbx, %r12 + xorq %rcx, %rax + movq %r14, %rbx + leaq (%r8,%r12,1), %r8 + xorq %r13, %rbx + andq %rbx, %rdx + addq %rax, %r12 + xorq %r14, %rdx + # rnd_all_2: 26-27 + rorxq $14, %r8, %rax + rorxq $18, %r8, %rcx + addq %rdx, %r12 + addq 208(%rsp), %r11 + movq %r9, %rdx + xorq %rax, %rcx + xorq %r10, %rdx + rorxq $41, %r8, %rax + xorq %rcx, %rax + andq %r8, %rdx + addq %rax, %r11 + rorxq $28, %r12, %rax + rorxq $34, %r12, %rcx + xorq %r10, %rdx + xorq %rax, %rcx + rorxq $39, %r12, %rax + addq %rdx, %r11 + xorq %rcx, %rax + movq %r13, %rdx + addq %r11, %r15 + xorq %r12, %rdx + andq %rdx, %rbx + addq %rax, %r11 + xorq %r13, %rbx + rorxq $14, %r15, %rax + rorxq $18, %r15, %rcx + addq %rbx, %r11 + addq 216(%rsp), %r10 + movq %r8, %rbx + xorq %rax, %rcx + xorq %r9, %rbx + rorxq $41, %r15, %rax + xorq %rcx, %rax + andq %r15, %rbx + addq %rax, %r10 + rorxq $28, %r11, %rax + rorxq $34, %r11, %rcx + xorq %r9, %rbx + xorq %rax, %rcx + rorxq $39, %r11, %rax + addq %rbx, %r10 + xorq %rcx, %rax + movq %r12, %rbx + leaq (%r14,%r10,1), %r14 + xorq %r11, %rbx + andq %rbx, %rdx + addq %rax, %r10 + xorq %r12, %rdx + # rnd_all_2: 30-31 + rorxq $14, %r14, %rax + rorxq $18, %r14, %rcx + addq %rdx, %r10 + addq 240(%rsp), %r9 + movq %r15, %rdx + xorq %rax, %rcx + xorq %r8, %rdx + rorxq $41, %r14, %rax + xorq %rcx, %rax + andq %r14, %rdx + addq %rax, %r9 + rorxq $28, %r10, %rax + rorxq $34, %r10, %rcx + xorq %r8, %rdx + xorq %rax, %rcx + rorxq $39, %r10, %rax + addq %rdx, %r9 + xorq %rcx, %rax + movq %r11, %rdx + addq %r9, %r13 + xorq %r10, %rdx + andq %rdx, %rbx + addq %rax, %r9 + xorq %r11, %rbx + rorxq $14, %r13, %rax + rorxq $18, %r13, %rcx + addq %rbx, %r9 + addq 248(%rsp), %r8 + movq %r14, %rbx + xorq %rax, %rcx + xorq %r15, %rbx + rorxq $41, %r13, %rax + xorq %rcx, %rax + andq %r13, %rbx + addq %rax, %r8 + rorxq $28, %r9, %rax + rorxq $34, %r9, %rcx + xorq %r15, %rbx + xorq %rax, %rcx + rorxq $39, %r9, %rax + addq %rbx, %r8 + xorq %rcx, %rax + movq %r10, %rbx + leaq (%r12,%r8,1), %r12 + xorq %r9, %rbx + andq %rbx, %rdx + addq %rax, %r8 + xorq %r10, %rdx + addq $0x100, %rsp + subq $0x01, %rbp + jnz L_sha512_len_avx2_rorx_tail + addq %rdx, %r8 + addq (%rdi), %r8 + addq 8(%rdi), %r9 + addq 16(%rdi), %r10 + addq 24(%rdi), %r11 + addq 32(%rdi), %r12 + addq 40(%rdi), %r13 + addq 48(%rdi), %r14 + addq 56(%rdi), %r15 + movq 224(%rdi), %rax + addq $0x40, %rsp + addq $0x100, %rax + subl $0x100, %esi + movq %rax, 224(%rdi) + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq %r12, 32(%rdi) + movq %r13, 40(%rdi) + movq %r14, 48(%rdi) + movq %r15, 56(%rdi) + jnz L_sha512_len_avx2_rorx_begin +L_sha512_len_avx2_rorx_done: + xorq %rax, %rax + vzeroupper + popq %rbp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size Transform_Sha512_AVX2_RORX_Len,.-Transform_Sha512_AVX2_RORX_Len +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX2 */ diff --git a/client/wolfssl/wolfcrypt/src/signature.c b/client/wolfssl/wolfcrypt/src/signature.c new file mode 100644 index 0000000..5d50333 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/signature.c @@ -0,0 +1,559 @@ +/* signature.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include +#include +#include +#include +#ifndef NO_ASN +#include +#endif +#ifdef HAVE_ECC +#include +#endif +#ifndef NO_RSA +#include +#endif + +/* If ECC and RSA are disabled then disable signature wrapper */ +#if (!defined(HAVE_ECC) || (defined(HAVE_ECC) && !defined(HAVE_ECC_SIGN) \ + && !defined(HAVE_ECC_VERIFY))) && defined(NO_RSA) + #undef NO_SIG_WRAPPER + #define NO_SIG_WRAPPER +#endif + +/* Signature wrapper disabled check */ +#ifndef NO_SIG_WRAPPER + +#if !defined(NO_RSA) && !defined(NO_ASN) +static int wc_SignatureDerEncode(enum wc_HashType hash_type, byte* hash_data, + word32 hash_len, word32* hash_enc_len) +{ + int ret, oid; + + ret = wc_HashGetOID(hash_type); + if (ret < 0) { + return ret; + } + oid = ret; + + ret = wc_EncodeSignature(hash_data, hash_data, hash_len, oid); + if (ret > 0) { + *hash_enc_len = ret; + ret = 0; + } + + return ret; +} +#endif /* !NO_RSA && !NO_ASN */ + +int wc_SignatureGetSize(enum wc_SignatureType sig_type, + const void* key, word32 key_len) +{ + int sig_len = BAD_FUNC_ARG; + + /* Suppress possible unused args if all signature types are disabled */ + (void)key; + (void)key_len; + + switch(sig_type) { + case WC_SIGNATURE_TYPE_ECC: +#ifdef HAVE_ECC + /* Sanity check that void* key is at least ecc_key in size */ + if (key_len >= sizeof(ecc_key)) { + sig_len = wc_ecc_sig_size((ecc_key*)key); + } + else { + WOLFSSL_MSG("wc_SignatureGetSize: Invalid ECC key size"); + } +#else + sig_len = SIG_TYPE_E; +#endif + break; + + case WC_SIGNATURE_TYPE_RSA_W_ENC: + case WC_SIGNATURE_TYPE_RSA: +#ifndef NO_RSA + /* Sanity check that void* key is at least RsaKey in size */ + if (key_len >= sizeof(RsaKey)) { + sig_len = wc_RsaEncryptSize((RsaKey*)key); + } + else { + WOLFSSL_MSG("wc_SignatureGetSize: Invalid RsaKey key size"); + } +#else + sig_len = SIG_TYPE_E; +#endif + break; + + case WC_SIGNATURE_TYPE_NONE: + default: + sig_len = BAD_FUNC_ARG; + break; + } + return sig_len; +} + +int wc_SignatureVerifyHash( + enum wc_HashType hash_type, enum wc_SignatureType sig_type, + const byte* hash_data, word32 hash_len, + const byte* sig, word32 sig_len, + const void* key, word32 key_len) +{ + int ret; + + /* Check arguments */ + if (hash_data == NULL || hash_len == 0 || + sig == NULL || sig_len == 0 || + key == NULL || key_len == 0) { + return BAD_FUNC_ARG; + } + + /* Validate signature len (1 to max is okay) */ + if ((int)sig_len > wc_SignatureGetSize(sig_type, key, key_len)) { + WOLFSSL_MSG("wc_SignatureVerify: Invalid sig type/len"); + return BAD_FUNC_ARG; + } + + /* Validate hash size */ + ret = wc_HashGetDigestSize(hash_type); + if (ret < 0) { + WOLFSSL_MSG("wc_SignatureVerify: Invalid hash type/len"); + return ret; + } + ret = 0; + + /* Verify signature using hash */ + switch (sig_type) { + case WC_SIGNATURE_TYPE_ECC: + { +#if defined(HAVE_ECC) && defined(HAVE_ECC_VERIFY) + int is_valid_sig = 0; + + /* Perform verification of signature using provided ECC key */ + do { + #ifdef WOLFSSL_ASYNC_CRYPT + ret = wc_AsyncWait(ret, &((ecc_key*)key)->asyncDev, + WC_ASYNC_FLAG_CALL_AGAIN); + #endif + if (ret >= 0) + ret = wc_ecc_verify_hash(sig, sig_len, hash_data, hash_len, + &is_valid_sig, (ecc_key*)key); + } while (ret == WC_PENDING_E); + if (ret != 0 || is_valid_sig != 1) { + ret = SIG_VERIFY_E; + } +#else + ret = SIG_TYPE_E; +#endif + break; + } + + case WC_SIGNATURE_TYPE_RSA_W_ENC: + case WC_SIGNATURE_TYPE_RSA: + { +#ifndef NO_RSA +#if defined(WOLFSSL_CRYPTOCELL) + /* the signature must propagate to the cryptocell to get verfied */ + if (sig_type == WC_SIGNATURE_TYPE_RSA_W_ENC) { + ret = cc310_RsaSSL_Verify(hash_data, hash_len,(byte*)sig, key, + CRYS_RSA_HASH_SHA256_mode); + } + else { + ret = cc310_RsaSSL_Verify(hash_data, hash_len,(byte*)sig, key, + CRYS_RSA_After_SHA256_mode); + } + + if (ret != 0) { + WOLFSSL_MSG("RSA Signature Verify difference!"); + ret = SIG_VERIFY_E; + } + +#else /* WOLFSSL_CRYPTOCELL */ + + word32 plain_len = hash_len; + byte *plain_data; + + /* Make sure the plain text output is at least key size */ + if (plain_len < sig_len) { + plain_len = sig_len; + } + plain_data = (byte*)XMALLOC(plain_len, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (plain_data) { + /* Perform verification of signature using provided RSA key */ + do { + #ifdef WOLFSSL_ASYNC_CRYPT + ret = wc_AsyncWait(ret, &((RsaKey*)key)->asyncDev, + WC_ASYNC_FLAG_CALL_AGAIN); + #endif + if (ret >= 0) + ret = wc_RsaSSL_Verify(sig, sig_len, plain_data, + plain_len, (RsaKey*)key); + } while (ret == WC_PENDING_E); + if (ret >= 0) { + if ((word32)ret == hash_len && + XMEMCMP(plain_data, hash_data, hash_len) == 0) { + ret = 0; /* Success */ + } + else { + WOLFSSL_MSG("RSA Signature Verify difference!"); + ret = SIG_VERIFY_E; + } + } + XFREE(plain_data, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } + else { + ret = MEMORY_E; + } +#endif /* !WOLFSSL_CRYPTOCELL */ +#else + ret = SIG_TYPE_E; +#endif + break; + } + + case WC_SIGNATURE_TYPE_NONE: + default: + ret = BAD_FUNC_ARG; + break; + } + + return ret; +} + +int wc_SignatureVerify( + enum wc_HashType hash_type, enum wc_SignatureType sig_type, + const byte* data, word32 data_len, + const byte* sig, word32 sig_len, + const void* key, word32 key_len) +{ + int ret; + word32 hash_len, hash_enc_len; +#if defined(WOLFSSL_SMALL_STACK) || defined(NO_ASN) + byte *hash_data; +#else + byte hash_data[MAX_DER_DIGEST_SZ]; +#endif + + /* Check arguments */ + if (data == NULL || data_len == 0 || + sig == NULL || sig_len == 0 || + key == NULL || key_len == 0) { + return BAD_FUNC_ARG; + } + + /* Validate signature len (1 to max is okay) */ + if ((int)sig_len > wc_SignatureGetSize(sig_type, key, key_len)) { + WOLFSSL_MSG("wc_SignatureVerify: Invalid sig type/len"); + return BAD_FUNC_ARG; + } + + /* Validate hash size */ + ret = wc_HashGetDigestSize(hash_type); + if (ret < 0) { + WOLFSSL_MSG("wc_SignatureVerify: Invalid hash type/len"); + return ret; + } + hash_enc_len = hash_len = ret; + +#ifndef NO_RSA + if (sig_type == WC_SIGNATURE_TYPE_RSA_W_ENC) { + /* For RSA with ASN.1 encoding include room */ + hash_enc_len += MAX_DER_DIGEST_ASN_SZ; + } +#endif + +#if defined(WOLFSSL_SMALL_STACK) || defined(NO_ASN) + /* Allocate temporary buffer for hash data */ + hash_data = (byte*)XMALLOC(hash_enc_len, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (hash_data == NULL) { + return MEMORY_E; + } +#endif + + /* Perform hash of data */ + ret = wc_Hash(hash_type, data, data_len, hash_data, hash_len); + if (ret == 0) { + /* Handle RSA with DER encoding */ + if (sig_type == WC_SIGNATURE_TYPE_RSA_W_ENC) { + #if defined(NO_RSA) || defined(NO_ASN) + ret = SIG_TYPE_E; + #else + ret = wc_SignatureDerEncode(hash_type, hash_data, hash_len, + &hash_enc_len); + #endif + } + + if (ret == 0) { +#if defined(WOLFSSL_CRYPTOCELL) + if ((sig_type == WC_SIGNATURE_TYPE_RSA) + || (sig_type == WC_SIGNATURE_TYPE_RSA_W_ENC)) { + if (sig_type == WC_SIGNATURE_TYPE_RSA_W_ENC) { + ret = cc310_RsaSSL_Verify(hash_data, hash_len, sig, key, + cc310_hashModeRSA(hash_type, 0)); + } + else { + ret = cc310_RsaSSL_Verify(hash_data, hash_len, sig, key, + cc310_hashModeRSA(hash_type, 1)); + } + } +#else + /* Verify signature using hash */ + ret = wc_SignatureVerifyHash(hash_type, sig_type, + hash_data, hash_enc_len, sig, sig_len, key, key_len); +#endif /* WOLFSSL_CRYPTOCELL */ + } + } + +#if defined(WOLFSSL_SMALL_STACK) || defined(NO_ASN) + XFREE(hash_data, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return ret; +} + + +int wc_SignatureGenerateHash( + enum wc_HashType hash_type, enum wc_SignatureType sig_type, + const byte* hash_data, word32 hash_len, + byte* sig, word32 *sig_len, + const void* key, word32 key_len, WC_RNG* rng) +{ + return wc_SignatureGenerateHash_ex(hash_type, sig_type, hash_data, hash_len, + sig, sig_len, key, key_len, rng, 1); +} + +int wc_SignatureGenerateHash_ex( + enum wc_HashType hash_type, enum wc_SignatureType sig_type, + const byte* hash_data, word32 hash_len, + byte* sig, word32 *sig_len, + const void* key, word32 key_len, WC_RNG* rng, int verify) +{ + int ret; + + /* Suppress possible unused arg if all signature types are disabled */ + (void)rng; + + /* Check arguments */ + if (hash_data == NULL || hash_len == 0 || + sig == NULL || sig_len == NULL || *sig_len == 0 || + key == NULL || key_len == 0) { + return BAD_FUNC_ARG; + } + + /* Validate signature len (needs to be at least max) */ + if ((int)*sig_len < wc_SignatureGetSize(sig_type, key, key_len)) { + WOLFSSL_MSG("wc_SignatureGenerate: Invalid sig type/len"); + return BAD_FUNC_ARG; + } + + /* Validate hash size */ + ret = wc_HashGetDigestSize(hash_type); + if (ret < 0) { + WOLFSSL_MSG("wc_SignatureGenerate: Invalid hash type/len"); + return ret; + } + ret = 0; + + /* Create signature using hash as data */ + switch (sig_type) { + case WC_SIGNATURE_TYPE_ECC: +#if defined(HAVE_ECC) && defined(HAVE_ECC_SIGN) + /* Create signature using provided ECC key */ + do { + #ifdef WOLFSSL_ASYNC_CRYPT + ret = wc_AsyncWait(ret, &((ecc_key*)key)->asyncDev, + WC_ASYNC_FLAG_CALL_AGAIN); + #endif + if (ret >= 0) + ret = wc_ecc_sign_hash(hash_data, hash_len, sig, sig_len, + rng, (ecc_key*)key); + } while (ret == WC_PENDING_E); +#else + ret = SIG_TYPE_E; +#endif + break; + + case WC_SIGNATURE_TYPE_RSA_W_ENC: + case WC_SIGNATURE_TYPE_RSA: +#if !defined(NO_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) + #if defined(WOLFSSL_CRYPTOCELL) + if (sig_type == WC_SIGNATURE_TYPE_RSA_W_ENC) { + ret = cc310_RsaSSL_Sign(hash_data, hash_len, sig, *sig_len, key, + cc310_hashModeRSA(hash_type, 0)); + } + else { + ret = cc310_RsaSSL_Sign(hash_data, hash_len, sig, *sig_len, key, + cc310_hashModeRSA(hash_type, 1)); + } + #else + /* Create signature using provided RSA key */ + do { + #ifdef WOLFSSL_ASYNC_CRYPT + ret = wc_AsyncWait(ret, &((RsaKey*)key)->asyncDev, + WC_ASYNC_FLAG_CALL_AGAIN); + #endif + if (ret >= 0) + ret = wc_RsaSSL_Sign(hash_data, hash_len, sig, *sig_len, + (RsaKey*)key, rng); + } while (ret == WC_PENDING_E); + #endif /* WOLFSSL_CRYPTOCELL */ + if (ret >= 0) { + *sig_len = ret; + ret = 0; /* Success */ + } +#else + ret = SIG_TYPE_E; +#endif + break; + + case WC_SIGNATURE_TYPE_NONE: + default: + ret = BAD_FUNC_ARG; + break; + } + + if (ret == 0 && verify) { + ret = wc_SignatureVerifyHash(hash_type, sig_type, hash_data, hash_len, + sig, *sig_len, key, key_len); + } + + return ret; +} + +int wc_SignatureGenerate( + enum wc_HashType hash_type, enum wc_SignatureType sig_type, + const byte* data, word32 data_len, + byte* sig, word32 *sig_len, + const void* key, word32 key_len, WC_RNG* rng) +{ + return wc_SignatureGenerate_ex(hash_type, sig_type, data, data_len, sig, + sig_len, key, key_len, rng, 1); +} + +int wc_SignatureGenerate_ex( + enum wc_HashType hash_type, enum wc_SignatureType sig_type, + const byte* data, word32 data_len, + byte* sig, word32 *sig_len, + const void* key, word32 key_len, WC_RNG* rng, int verify) +{ + int ret; + word32 hash_len, hash_enc_len; +#if defined(WOLFSSL_SMALL_STACK) || defined(NO_ASN) + byte *hash_data; +#else + byte hash_data[MAX_DER_DIGEST_SZ]; +#endif + + /* Check arguments */ + if (data == NULL || data_len == 0 || + sig == NULL || sig_len == NULL || *sig_len == 0 || + key == NULL || key_len == 0) { + return BAD_FUNC_ARG; + } + + /* Validate signature len (needs to be at least max) */ + if ((int)*sig_len < wc_SignatureGetSize(sig_type, key, key_len)) { + WOLFSSL_MSG("wc_SignatureGenerate: Invalid sig type/len"); + return BAD_FUNC_ARG; + } + + /* Validate hash size */ + ret = wc_HashGetDigestSize(hash_type); + if (ret < 0) { + WOLFSSL_MSG("wc_SignatureGenerate: Invalid hash type/len"); + return ret; + } + hash_enc_len = hash_len = ret; + +#if !defined(NO_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) + if (sig_type == WC_SIGNATURE_TYPE_RSA_W_ENC) { + /* For RSA with ASN.1 encoding include room */ + hash_enc_len += MAX_DER_DIGEST_ASN_SZ; + } +#endif + +#if defined(WOLFSSL_SMALL_STACK) || defined(NO_ASN) + /* Allocate temporary buffer for hash data */ + hash_data = (byte*)XMALLOC(hash_enc_len, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (hash_data == NULL) { + return MEMORY_E; + } +#endif + + /* Perform hash of data */ + ret = wc_Hash(hash_type, data, data_len, hash_data, hash_len); + if (ret == 0) { + /* Handle RSA with DER encoding */ + if (sig_type == WC_SIGNATURE_TYPE_RSA_W_ENC) { + #if defined(NO_RSA) || defined(NO_ASN) || \ + defined(WOLFSSL_RSA_PUBLIC_ONLY) + ret = SIG_TYPE_E; + #else + ret = wc_SignatureDerEncode(hash_type, hash_data, hash_len, + &hash_enc_len); + #endif + } + if (ret == 0) { +#if defined(WOLFSSL_CRYPTOCELL) + if ((sig_type == WC_SIGNATURE_TYPE_RSA) + || (sig_type == WC_SIGNATURE_TYPE_RSA_W_ENC)) { + if (sig_type == WC_SIGNATURE_TYPE_RSA_W_ENC) { + ret = cc310_RsaSSL_Sign(hash_data, hash_len, sig, *sig_len, + key, cc310_hashModeRSA(hash_type, 0)); + } + else { + ret = cc310_RsaSSL_Sign(hash_data, hash_len, sig, *sig_len, + key, cc310_hashModeRSA(hash_type, 1)); + } + + if (ret == *sig_len) { + ret = 0; + } + } + } + } +#else + /* Generate signature using hash */ + ret = wc_SignatureGenerateHash(hash_type, sig_type, + hash_data, hash_enc_len, sig, sig_len, key, key_len, rng); + } + } + + if (ret == 0 && verify) { + ret = wc_SignatureVerifyHash(hash_type, sig_type, hash_data, + hash_enc_len, sig, *sig_len, key, key_len); + } +#endif /* WOLFSSL_CRYPTOCELL */ + +#if defined(WOLFSSL_SMALL_STACK) || defined(NO_ASN) + XFREE(hash_data, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return ret; +} + +#endif /* NO_SIG_WRAPPER */ diff --git a/client/wolfssl/wolfcrypt/src/sp_arm32.c b/client/wolfssl/wolfcrypt/src/sp_arm32.c new file mode 100644 index 0000000..4540dde --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/sp_arm32.c @@ -0,0 +1,89057 @@ +/* sp.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* Implementation by Sean Parkinson. */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include +#include +#include +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) || \ + defined(WOLFSSL_HAVE_SP_ECC) + +#ifdef RSA_LOW_MEM +#ifndef WOLFSSL_SP_SMALL +#define WOLFSSL_SP_SMALL +#endif +#endif + +#include + +#ifdef WOLFSSL_SP_ARM32_ASM +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) +#ifndef WOLFSSL_SP_NO_2048 +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_2048_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((sp_digit)a[i]) << s); + if (s >= 24U) { + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (sp_digit)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 32 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 32 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 32U) <= (word32)DIGIT_BIT) { + s += 32U; + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 32) { + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + s = 32 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 256 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_2048_to_bin(sp_digit* r, byte* a) +{ + int i, j, s = 0, b; + + j = 2048 / 8 - 1; + a[j] = 0; + for (i=0; i<64 && j>=0; i++) { + b = 0; + /* lint allow cast of mismatch sp_digit and int */ + a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ + b += 8 - s; + if (j < 0) { + break; + } + while (b < 32) { + a[j--] = (byte)(r[i] >> b); + b += 8; + if (j < 0) { + break; + } + } + s = 8 - (b - 32); + if (j >= 0) { + a[j] = 0; + } + if (s != 0) { + j++; + } + } +} + +#ifndef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + __asm__ __volatile__ ( + "sub sp, sp, #32\n\t" + "mov r10, #0\n\t" + "# A[0] * B[0]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r3, r4, r8, r9\n\t" + "mov r5, #0\n\t" + "str r3, [sp]\n\t" + "# A[0] * B[1]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[0]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #4]\n\t" + "# A[0] * B[2]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[1]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[0]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #8]\n\t" + "# A[0] * B[3]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[2]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[1]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[0]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #12]\n\t" + "# A[0] * B[4]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[3]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[2]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[1]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[0]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #16]\n\t" + "# A[0] * B[5]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[4]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[3]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[2]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[1]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[0]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #20]\n\t" + "# A[0] * B[6]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[5]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[4]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[3]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[2]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[1]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[0]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #24]\n\t" + "# A[0] * B[7]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[6]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[5]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[4]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[3]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[2]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[1]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[0]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #28]\n\t" + "# A[1] * B[7]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[2] * B[6]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[5]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[4]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[3]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[2]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[1]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #32]\n\t" + "# A[2] * B[7]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[3] * B[6]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[5]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[4]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[3]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[2]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #36]\n\t" + "# A[3] * B[7]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[4] * B[6]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[5]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[4]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[3]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #40]\n\t" + "# A[4] * B[7]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[5] * B[6]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[5]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[4]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #44]\n\t" + "# A[5] * B[7]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[6] * B[6]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[5]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #48]\n\t" + "# A[6] * B[7]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[7] * B[6]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #52]\n\t" + "# A[7] * B[7]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "str r5, [%[r], #56]\n\t" + "str r3, [%[r], #60]\n\t" + "ldr r3, [sp, #0]\n\t" + "ldr r4, [sp, #4]\n\t" + "ldr r5, [sp, #8]\n\t" + "ldr r6, [sp, #12]\n\t" + "str r3, [%[r], #0]\n\t" + "str r4, [%[r], #4]\n\t" + "str r5, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" + "ldr r3, [sp, #16]\n\t" + "ldr r4, [sp, #20]\n\t" + "ldr r5, [sp, #24]\n\t" + "ldr r6, [sp, #28]\n\t" + "str r3, [%[r], #16]\n\t" + "str r4, [%[r], #20]\n\t" + "str r5, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" + "add sp, sp, #32\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "sub sp, sp, #32\n\t" + "mov r14, #0\n\t" + "# A[0] * A[0]\n\t" + "ldr r10, [%[a], #0]\n\t" + "umull r8, r3, r10, r10\n\t" + "mov r4, #0\n\t" + "str r8, [sp]\n\t" + "# A[0] * A[1]\n\t" + "ldr r10, [%[a], #4]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r14, r14\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "str r3, [sp, #4]\n\t" + "# A[0] * A[2]\n\t" + "ldr r10, [%[a], #8]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r14, r14\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, r14\n\t" + "# A[1] * A[1]\n\t" + "ldr r10, [%[a], #4]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, r14\n\t" + "str r4, [sp, #8]\n\t" + "# A[0] * A[3]\n\t" + "ldr r10, [%[a], #12]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r14, r14\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "# A[1] * A[2]\n\t" + "ldr r10, [%[a], #8]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "str r2, [sp, #12]\n\t" + "# A[0] * A[4]\n\t" + "ldr r10, [%[a], #16]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r14, r14\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "# A[1] * A[3]\n\t" + "ldr r10, [%[a], #12]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "# A[2] * A[2]\n\t" + "ldr r10, [%[a], #8]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "str r3, [sp, #16]\n\t" + "# A[0] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[4]\n\t" + "ldr r10, [%[a], #16]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[3]\n\t" + "ldr r10, [%[a], #12]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #20]\n\t" + "# A[0] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[4]\n\t" + "ldr r10, [%[a], #16]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[3]\n\t" + "ldr r10, [%[a], #12]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #24]\n\t" + "# A[0] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[4]\n\t" + "ldr r10, [%[a], #16]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #28]\n\t" + "# A[1] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[2] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[4]\n\t" + "ldr r10, [%[a], #16]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #32]\n\t" + "# A[2] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[3] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #36]\n\t" + "# A[3] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r14, r14\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "# A[4] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "# A[5] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "str r3, [%[r], #40]\n\t" + "# A[4] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r14, r14\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, r14\n\t" + "# A[5] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, r14\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, r14\n\t" + "str r4, [%[r], #44]\n\t" + "# A[5] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r14, r14\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "# A[6] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "str r2, [%[r], #48]\n\t" + "# A[6] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r14, r14\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "str r3, [%[r], #52]\n\t" + "# A[7] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r4, r4, r8\n\t" + "adc r2, r2, r9\n\t" + "str r4, [%[r], #56]\n\t" + "str r2, [%[r], #60]\n\t" + "ldr r2, [sp, #0]\n\t" + "ldr r3, [sp, #4]\n\t" + "ldr r4, [sp, #8]\n\t" + "ldr r8, [sp, #12]\n\t" + "str r2, [%[r], #0]\n\t" + "str r3, [%[r], #4]\n\t" + "str r4, [%[r], #8]\n\t" + "str r8, [%[r], #12]\n\t" + "ldr r2, [sp, #16]\n\t" + "ldr r3, [sp, #20]\n\t" + "ldr r4, [sp, #24]\n\t" + "ldr r8, [sp, #28]\n\t" + "str r2, [%[r], #16]\n\t" + "str r3, [%[r], #20]\n\t" + "str r4, [%[r], #24]\n\t" + "str r8, [%[r], #28]\n\t" + "add sp, sp, #32\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r2", "r3", "r4", "r8", "r9", "r10", "r8", "r5", "r6", "r7", "r14" + ); +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r12, #0\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #12]\n\t" + "ldr r8, [%[b], #0]\n\t" + "ldr r9, [%[b], #4]\n\t" + "ldr r10, [%[b], #8]\n\t" + "ldr r14, [%[b], #12]\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #0]\n\t" + "str r5, [%[r], #4]\n\t" + "str r6, [%[r], #8]\n\t" + "str r7, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[a], #28]\n\t" + "ldr r8, [%[b], #16]\n\t" + "ldr r9, [%[b], #20]\n\t" + "ldr r10, [%[b], #24]\n\t" + "ldr r14, [%[b], #28]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #16]\n\t" + "str r5, [%[r], #20]\n\t" + "str r6, [%[r], #24]\n\t" + "str r7, [%[r], #28]\n\t" + "adc %[c], r12, r12\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); + + return c; +} + +/* Sub b from a into a. (a -= b) + * + * a A single precision integer and result. + * b A single precision integer. + */ +static sp_digit sp_2048_sub_in_place_16(sp_digit* a, const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldr r2, [%[a], #0]\n\t" + "ldr r3, [%[a], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[a], #12]\n\t" + "ldr r6, [%[b], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" + "ldr r8, [%[b], #8]\n\t" + "ldr r9, [%[b], #12]\n\t" + "subs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #0]\n\t" + "str r3, [%[a], #4]\n\t" + "str r4, [%[a], #8]\n\t" + "str r5, [%[a], #12]\n\t" + "ldr r2, [%[a], #16]\n\t" + "ldr r3, [%[a], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[a], #28]\n\t" + "ldr r6, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" + "ldr r8, [%[b], #24]\n\t" + "ldr r9, [%[b], #28]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #16]\n\t" + "str r3, [%[a], #20]\n\t" + "str r4, [%[a], #24]\n\t" + "str r5, [%[a], #28]\n\t" + "ldr r2, [%[a], #32]\n\t" + "ldr r3, [%[a], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[a], #44]\n\t" + "ldr r6, [%[b], #32]\n\t" + "ldr r7, [%[b], #36]\n\t" + "ldr r8, [%[b], #40]\n\t" + "ldr r9, [%[b], #44]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #32]\n\t" + "str r3, [%[a], #36]\n\t" + "str r4, [%[a], #40]\n\t" + "str r5, [%[a], #44]\n\t" + "ldr r2, [%[a], #48]\n\t" + "ldr r3, [%[a], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[a], #60]\n\t" + "ldr r6, [%[b], #48]\n\t" + "ldr r7, [%[b], #52]\n\t" + "ldr r8, [%[b], #56]\n\t" + "ldr r9, [%[b], #60]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #48]\n\t" + "str r3, [%[a], #52]\n\t" + "str r4, [%[a], #56]\n\t" + "str r5, [%[a], #60]\n\t" + "sbc %[c], r9, r9\n\t" + : [c] "+r" (c) + : [a] "r" (a), [b] "r" (b) + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + ); + + return c; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r12, #0\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #12]\n\t" + "ldr r8, [%[b], #0]\n\t" + "ldr r9, [%[b], #4]\n\t" + "ldr r10, [%[b], #8]\n\t" + "ldr r14, [%[b], #12]\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #0]\n\t" + "str r5, [%[r], #4]\n\t" + "str r6, [%[r], #8]\n\t" + "str r7, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[a], #28]\n\t" + "ldr r8, [%[b], #16]\n\t" + "ldr r9, [%[b], #20]\n\t" + "ldr r10, [%[b], #24]\n\t" + "ldr r14, [%[b], #28]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #16]\n\t" + "str r5, [%[r], #20]\n\t" + "str r6, [%[r], #24]\n\t" + "str r7, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[a], #36]\n\t" + "ldr r6, [%[a], #40]\n\t" + "ldr r7, [%[a], #44]\n\t" + "ldr r8, [%[b], #32]\n\t" + "ldr r9, [%[b], #36]\n\t" + "ldr r10, [%[b], #40]\n\t" + "ldr r14, [%[b], #44]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #32]\n\t" + "str r5, [%[r], #36]\n\t" + "str r6, [%[r], #40]\n\t" + "str r7, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[a], #52]\n\t" + "ldr r6, [%[a], #56]\n\t" + "ldr r7, [%[a], #60]\n\t" + "ldr r8, [%[b], #48]\n\t" + "ldr r9, [%[b], #52]\n\t" + "ldr r10, [%[b], #56]\n\t" + "ldr r14, [%[b], #60]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #48]\n\t" + "str r5, [%[r], #52]\n\t" + "str r6, [%[r], #56]\n\t" + "str r7, [%[r], #60]\n\t" + "adc %[c], r12, r12\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); + + return c; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_2048_mask_8(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<8; i++) { + r[i] = a[i] & m; + } +#else + r[0] = a[0] & m; + r[1] = a[1] & m; + r[2] = a[2] & m; + r[3] = a[3] & m; + r[4] = a[4] & m; + r[5] = a[5] & m; + r[6] = a[6] & m; + r[7] = a[7] & m; +#endif +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_2048_mul_16(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[16]; + sp_digit a1[8]; + sp_digit b1[8]; + sp_digit z2[16]; + sp_digit u, ca, cb; + + ca = sp_2048_add_8(a1, a, &a[8]); + cb = sp_2048_add_8(b1, b, &b[8]); + u = ca & cb; + sp_2048_mul_8(z1, a1, b1); + sp_2048_mul_8(z2, &a[8], &b[8]); + sp_2048_mul_8(z0, a, b); + sp_2048_mask_8(r + 16, a1, 0 - cb); + sp_2048_mask_8(b1, b1, 0 - ca); + u += sp_2048_add_8(r + 16, r + 16, b1); + u += sp_2048_sub_in_place_16(z1, z2); + u += sp_2048_sub_in_place_16(z1, z0); + u += sp_2048_add_16(r + 8, r + 8, z1); + r[24] = u; + XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1)); + (void)sp_2048_add_16(r + 16, r + 16, z2); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z2[16]; + sp_digit z1[16]; + sp_digit a1[8]; + sp_digit u; + + u = sp_2048_add_8(a1, a, &a[8]); + sp_2048_sqr_8(z1, a1); + sp_2048_sqr_8(z2, &a[8]); + sp_2048_sqr_8(z0, a); + sp_2048_mask_8(r + 16, a1, 0 - u); + u += sp_2048_add_8(r + 16, r + 16, r + 16); + u += sp_2048_sub_in_place_16(z1, z2); + u += sp_2048_sub_in_place_16(z1, z0); + u += sp_2048_add_16(r + 8, r + 8, z1); + r[24] = u; + XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1)); + (void)sp_2048_add_16(r + 16, r + 16, z2); +} + +/* Sub b from a into a. (a -= b) + * + * a A single precision integer and result. + * b A single precision integer. + */ +static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldr r2, [%[a], #0]\n\t" + "ldr r3, [%[a], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[a], #12]\n\t" + "ldr r6, [%[b], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" + "ldr r8, [%[b], #8]\n\t" + "ldr r9, [%[b], #12]\n\t" + "subs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #0]\n\t" + "str r3, [%[a], #4]\n\t" + "str r4, [%[a], #8]\n\t" + "str r5, [%[a], #12]\n\t" + "ldr r2, [%[a], #16]\n\t" + "ldr r3, [%[a], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[a], #28]\n\t" + "ldr r6, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" + "ldr r8, [%[b], #24]\n\t" + "ldr r9, [%[b], #28]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #16]\n\t" + "str r3, [%[a], #20]\n\t" + "str r4, [%[a], #24]\n\t" + "str r5, [%[a], #28]\n\t" + "ldr r2, [%[a], #32]\n\t" + "ldr r3, [%[a], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[a], #44]\n\t" + "ldr r6, [%[b], #32]\n\t" + "ldr r7, [%[b], #36]\n\t" + "ldr r8, [%[b], #40]\n\t" + "ldr r9, [%[b], #44]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #32]\n\t" + "str r3, [%[a], #36]\n\t" + "str r4, [%[a], #40]\n\t" + "str r5, [%[a], #44]\n\t" + "ldr r2, [%[a], #48]\n\t" + "ldr r3, [%[a], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[a], #60]\n\t" + "ldr r6, [%[b], #48]\n\t" + "ldr r7, [%[b], #52]\n\t" + "ldr r8, [%[b], #56]\n\t" + "ldr r9, [%[b], #60]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #48]\n\t" + "str r3, [%[a], #52]\n\t" + "str r4, [%[a], #56]\n\t" + "str r5, [%[a], #60]\n\t" + "ldr r2, [%[a], #64]\n\t" + "ldr r3, [%[a], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[a], #76]\n\t" + "ldr r6, [%[b], #64]\n\t" + "ldr r7, [%[b], #68]\n\t" + "ldr r8, [%[b], #72]\n\t" + "ldr r9, [%[b], #76]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #64]\n\t" + "str r3, [%[a], #68]\n\t" + "str r4, [%[a], #72]\n\t" + "str r5, [%[a], #76]\n\t" + "ldr r2, [%[a], #80]\n\t" + "ldr r3, [%[a], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[a], #92]\n\t" + "ldr r6, [%[b], #80]\n\t" + "ldr r7, [%[b], #84]\n\t" + "ldr r8, [%[b], #88]\n\t" + "ldr r9, [%[b], #92]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #80]\n\t" + "str r3, [%[a], #84]\n\t" + "str r4, [%[a], #88]\n\t" + "str r5, [%[a], #92]\n\t" + "ldr r2, [%[a], #96]\n\t" + "ldr r3, [%[a], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[a], #108]\n\t" + "ldr r6, [%[b], #96]\n\t" + "ldr r7, [%[b], #100]\n\t" + "ldr r8, [%[b], #104]\n\t" + "ldr r9, [%[b], #108]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #96]\n\t" + "str r3, [%[a], #100]\n\t" + "str r4, [%[a], #104]\n\t" + "str r5, [%[a], #108]\n\t" + "ldr r2, [%[a], #112]\n\t" + "ldr r3, [%[a], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[a], #124]\n\t" + "ldr r6, [%[b], #112]\n\t" + "ldr r7, [%[b], #116]\n\t" + "ldr r8, [%[b], #120]\n\t" + "ldr r9, [%[b], #124]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #112]\n\t" + "str r3, [%[a], #116]\n\t" + "str r4, [%[a], #120]\n\t" + "str r5, [%[a], #124]\n\t" + "sbc %[c], r9, r9\n\t" + : [c] "+r" (c) + : [a] "r" (a), [b] "r" (b) + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + ); + + return c; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r12, #0\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #12]\n\t" + "ldr r8, [%[b], #0]\n\t" + "ldr r9, [%[b], #4]\n\t" + "ldr r10, [%[b], #8]\n\t" + "ldr r14, [%[b], #12]\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #0]\n\t" + "str r5, [%[r], #4]\n\t" + "str r6, [%[r], #8]\n\t" + "str r7, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[a], #28]\n\t" + "ldr r8, [%[b], #16]\n\t" + "ldr r9, [%[b], #20]\n\t" + "ldr r10, [%[b], #24]\n\t" + "ldr r14, [%[b], #28]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #16]\n\t" + "str r5, [%[r], #20]\n\t" + "str r6, [%[r], #24]\n\t" + "str r7, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[a], #36]\n\t" + "ldr r6, [%[a], #40]\n\t" + "ldr r7, [%[a], #44]\n\t" + "ldr r8, [%[b], #32]\n\t" + "ldr r9, [%[b], #36]\n\t" + "ldr r10, [%[b], #40]\n\t" + "ldr r14, [%[b], #44]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #32]\n\t" + "str r5, [%[r], #36]\n\t" + "str r6, [%[r], #40]\n\t" + "str r7, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[a], #52]\n\t" + "ldr r6, [%[a], #56]\n\t" + "ldr r7, [%[a], #60]\n\t" + "ldr r8, [%[b], #48]\n\t" + "ldr r9, [%[b], #52]\n\t" + "ldr r10, [%[b], #56]\n\t" + "ldr r14, [%[b], #60]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #48]\n\t" + "str r5, [%[r], #52]\n\t" + "str r6, [%[r], #56]\n\t" + "str r7, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[a], #68]\n\t" + "ldr r6, [%[a], #72]\n\t" + "ldr r7, [%[a], #76]\n\t" + "ldr r8, [%[b], #64]\n\t" + "ldr r9, [%[b], #68]\n\t" + "ldr r10, [%[b], #72]\n\t" + "ldr r14, [%[b], #76]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #64]\n\t" + "str r5, [%[r], #68]\n\t" + "str r6, [%[r], #72]\n\t" + "str r7, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[a], #84]\n\t" + "ldr r6, [%[a], #88]\n\t" + "ldr r7, [%[a], #92]\n\t" + "ldr r8, [%[b], #80]\n\t" + "ldr r9, [%[b], #84]\n\t" + "ldr r10, [%[b], #88]\n\t" + "ldr r14, [%[b], #92]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #80]\n\t" + "str r5, [%[r], #84]\n\t" + "str r6, [%[r], #88]\n\t" + "str r7, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[a], #100]\n\t" + "ldr r6, [%[a], #104]\n\t" + "ldr r7, [%[a], #108]\n\t" + "ldr r8, [%[b], #96]\n\t" + "ldr r9, [%[b], #100]\n\t" + "ldr r10, [%[b], #104]\n\t" + "ldr r14, [%[b], #108]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #96]\n\t" + "str r5, [%[r], #100]\n\t" + "str r6, [%[r], #104]\n\t" + "str r7, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[a], #116]\n\t" + "ldr r6, [%[a], #120]\n\t" + "ldr r7, [%[a], #124]\n\t" + "ldr r8, [%[b], #112]\n\t" + "ldr r9, [%[b], #116]\n\t" + "ldr r10, [%[b], #120]\n\t" + "ldr r14, [%[b], #124]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #112]\n\t" + "str r5, [%[r], #116]\n\t" + "str r6, [%[r], #120]\n\t" + "str r7, [%[r], #124]\n\t" + "adc %[c], r12, r12\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); + + return c; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_2048_mask_16(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<16; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 16; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[32]; + sp_digit a1[16]; + sp_digit b1[16]; + sp_digit z2[32]; + sp_digit u, ca, cb; + + ca = sp_2048_add_16(a1, a, &a[16]); + cb = sp_2048_add_16(b1, b, &b[16]); + u = ca & cb; + sp_2048_mul_16(z1, a1, b1); + sp_2048_mul_16(z2, &a[16], &b[16]); + sp_2048_mul_16(z0, a, b); + sp_2048_mask_16(r + 32, a1, 0 - cb); + sp_2048_mask_16(b1, b1, 0 - ca); + u += sp_2048_add_16(r + 32, r + 32, b1); + u += sp_2048_sub_in_place_32(z1, z2); + u += sp_2048_sub_in_place_32(z1, z0); + u += sp_2048_add_32(r + 16, r + 16, z1); + r[48] = u; + XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1)); + (void)sp_2048_add_32(r + 32, r + 32, z2); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z2[32]; + sp_digit z1[32]; + sp_digit a1[16]; + sp_digit u; + + u = sp_2048_add_16(a1, a, &a[16]); + sp_2048_sqr_16(z1, a1); + sp_2048_sqr_16(z2, &a[16]); + sp_2048_sqr_16(z0, a); + sp_2048_mask_16(r + 32, a1, 0 - u); + u += sp_2048_add_16(r + 32, r + 32, r + 32); + u += sp_2048_sub_in_place_32(z1, z2); + u += sp_2048_sub_in_place_32(z1, z0); + u += sp_2048_add_32(r + 16, r + 16, z1); + r[48] = u; + XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1)); + (void)sp_2048_add_32(r + 32, r + 32, z2); +} + +/* Sub b from a into a. (a -= b) + * + * a A single precision integer and result. + * b A single precision integer. + */ +static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldr r2, [%[a], #0]\n\t" + "ldr r3, [%[a], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[a], #12]\n\t" + "ldr r6, [%[b], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" + "ldr r8, [%[b], #8]\n\t" + "ldr r9, [%[b], #12]\n\t" + "subs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #0]\n\t" + "str r3, [%[a], #4]\n\t" + "str r4, [%[a], #8]\n\t" + "str r5, [%[a], #12]\n\t" + "ldr r2, [%[a], #16]\n\t" + "ldr r3, [%[a], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[a], #28]\n\t" + "ldr r6, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" + "ldr r8, [%[b], #24]\n\t" + "ldr r9, [%[b], #28]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #16]\n\t" + "str r3, [%[a], #20]\n\t" + "str r4, [%[a], #24]\n\t" + "str r5, [%[a], #28]\n\t" + "ldr r2, [%[a], #32]\n\t" + "ldr r3, [%[a], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[a], #44]\n\t" + "ldr r6, [%[b], #32]\n\t" + "ldr r7, [%[b], #36]\n\t" + "ldr r8, [%[b], #40]\n\t" + "ldr r9, [%[b], #44]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #32]\n\t" + "str r3, [%[a], #36]\n\t" + "str r4, [%[a], #40]\n\t" + "str r5, [%[a], #44]\n\t" + "ldr r2, [%[a], #48]\n\t" + "ldr r3, [%[a], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[a], #60]\n\t" + "ldr r6, [%[b], #48]\n\t" + "ldr r7, [%[b], #52]\n\t" + "ldr r8, [%[b], #56]\n\t" + "ldr r9, [%[b], #60]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #48]\n\t" + "str r3, [%[a], #52]\n\t" + "str r4, [%[a], #56]\n\t" + "str r5, [%[a], #60]\n\t" + "ldr r2, [%[a], #64]\n\t" + "ldr r3, [%[a], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[a], #76]\n\t" + "ldr r6, [%[b], #64]\n\t" + "ldr r7, [%[b], #68]\n\t" + "ldr r8, [%[b], #72]\n\t" + "ldr r9, [%[b], #76]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #64]\n\t" + "str r3, [%[a], #68]\n\t" + "str r4, [%[a], #72]\n\t" + "str r5, [%[a], #76]\n\t" + "ldr r2, [%[a], #80]\n\t" + "ldr r3, [%[a], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[a], #92]\n\t" + "ldr r6, [%[b], #80]\n\t" + "ldr r7, [%[b], #84]\n\t" + "ldr r8, [%[b], #88]\n\t" + "ldr r9, [%[b], #92]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #80]\n\t" + "str r3, [%[a], #84]\n\t" + "str r4, [%[a], #88]\n\t" + "str r5, [%[a], #92]\n\t" + "ldr r2, [%[a], #96]\n\t" + "ldr r3, [%[a], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[a], #108]\n\t" + "ldr r6, [%[b], #96]\n\t" + "ldr r7, [%[b], #100]\n\t" + "ldr r8, [%[b], #104]\n\t" + "ldr r9, [%[b], #108]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #96]\n\t" + "str r3, [%[a], #100]\n\t" + "str r4, [%[a], #104]\n\t" + "str r5, [%[a], #108]\n\t" + "ldr r2, [%[a], #112]\n\t" + "ldr r3, [%[a], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[a], #124]\n\t" + "ldr r6, [%[b], #112]\n\t" + "ldr r7, [%[b], #116]\n\t" + "ldr r8, [%[b], #120]\n\t" + "ldr r9, [%[b], #124]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #112]\n\t" + "str r3, [%[a], #116]\n\t" + "str r4, [%[a], #120]\n\t" + "str r5, [%[a], #124]\n\t" + "ldr r2, [%[a], #128]\n\t" + "ldr r3, [%[a], #132]\n\t" + "ldr r4, [%[a], #136]\n\t" + "ldr r5, [%[a], #140]\n\t" + "ldr r6, [%[b], #128]\n\t" + "ldr r7, [%[b], #132]\n\t" + "ldr r8, [%[b], #136]\n\t" + "ldr r9, [%[b], #140]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #128]\n\t" + "str r3, [%[a], #132]\n\t" + "str r4, [%[a], #136]\n\t" + "str r5, [%[a], #140]\n\t" + "ldr r2, [%[a], #144]\n\t" + "ldr r3, [%[a], #148]\n\t" + "ldr r4, [%[a], #152]\n\t" + "ldr r5, [%[a], #156]\n\t" + "ldr r6, [%[b], #144]\n\t" + "ldr r7, [%[b], #148]\n\t" + "ldr r8, [%[b], #152]\n\t" + "ldr r9, [%[b], #156]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #144]\n\t" + "str r3, [%[a], #148]\n\t" + "str r4, [%[a], #152]\n\t" + "str r5, [%[a], #156]\n\t" + "ldr r2, [%[a], #160]\n\t" + "ldr r3, [%[a], #164]\n\t" + "ldr r4, [%[a], #168]\n\t" + "ldr r5, [%[a], #172]\n\t" + "ldr r6, [%[b], #160]\n\t" + "ldr r7, [%[b], #164]\n\t" + "ldr r8, [%[b], #168]\n\t" + "ldr r9, [%[b], #172]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #160]\n\t" + "str r3, [%[a], #164]\n\t" + "str r4, [%[a], #168]\n\t" + "str r5, [%[a], #172]\n\t" + "ldr r2, [%[a], #176]\n\t" + "ldr r3, [%[a], #180]\n\t" + "ldr r4, [%[a], #184]\n\t" + "ldr r5, [%[a], #188]\n\t" + "ldr r6, [%[b], #176]\n\t" + "ldr r7, [%[b], #180]\n\t" + "ldr r8, [%[b], #184]\n\t" + "ldr r9, [%[b], #188]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #176]\n\t" + "str r3, [%[a], #180]\n\t" + "str r4, [%[a], #184]\n\t" + "str r5, [%[a], #188]\n\t" + "ldr r2, [%[a], #192]\n\t" + "ldr r3, [%[a], #196]\n\t" + "ldr r4, [%[a], #200]\n\t" + "ldr r5, [%[a], #204]\n\t" + "ldr r6, [%[b], #192]\n\t" + "ldr r7, [%[b], #196]\n\t" + "ldr r8, [%[b], #200]\n\t" + "ldr r9, [%[b], #204]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #192]\n\t" + "str r3, [%[a], #196]\n\t" + "str r4, [%[a], #200]\n\t" + "str r5, [%[a], #204]\n\t" + "ldr r2, [%[a], #208]\n\t" + "ldr r3, [%[a], #212]\n\t" + "ldr r4, [%[a], #216]\n\t" + "ldr r5, [%[a], #220]\n\t" + "ldr r6, [%[b], #208]\n\t" + "ldr r7, [%[b], #212]\n\t" + "ldr r8, [%[b], #216]\n\t" + "ldr r9, [%[b], #220]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #208]\n\t" + "str r3, [%[a], #212]\n\t" + "str r4, [%[a], #216]\n\t" + "str r5, [%[a], #220]\n\t" + "ldr r2, [%[a], #224]\n\t" + "ldr r3, [%[a], #228]\n\t" + "ldr r4, [%[a], #232]\n\t" + "ldr r5, [%[a], #236]\n\t" + "ldr r6, [%[b], #224]\n\t" + "ldr r7, [%[b], #228]\n\t" + "ldr r8, [%[b], #232]\n\t" + "ldr r9, [%[b], #236]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #224]\n\t" + "str r3, [%[a], #228]\n\t" + "str r4, [%[a], #232]\n\t" + "str r5, [%[a], #236]\n\t" + "ldr r2, [%[a], #240]\n\t" + "ldr r3, [%[a], #244]\n\t" + "ldr r4, [%[a], #248]\n\t" + "ldr r5, [%[a], #252]\n\t" + "ldr r6, [%[b], #240]\n\t" + "ldr r7, [%[b], #244]\n\t" + "ldr r8, [%[b], #248]\n\t" + "ldr r9, [%[b], #252]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #240]\n\t" + "str r3, [%[a], #244]\n\t" + "str r4, [%[a], #248]\n\t" + "str r5, [%[a], #252]\n\t" + "sbc %[c], r9, r9\n\t" + : [c] "+r" (c) + : [a] "r" (a), [b] "r" (b) + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + ); + + return c; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r12, #0\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #12]\n\t" + "ldr r8, [%[b], #0]\n\t" + "ldr r9, [%[b], #4]\n\t" + "ldr r10, [%[b], #8]\n\t" + "ldr r14, [%[b], #12]\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #0]\n\t" + "str r5, [%[r], #4]\n\t" + "str r6, [%[r], #8]\n\t" + "str r7, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[a], #28]\n\t" + "ldr r8, [%[b], #16]\n\t" + "ldr r9, [%[b], #20]\n\t" + "ldr r10, [%[b], #24]\n\t" + "ldr r14, [%[b], #28]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #16]\n\t" + "str r5, [%[r], #20]\n\t" + "str r6, [%[r], #24]\n\t" + "str r7, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[a], #36]\n\t" + "ldr r6, [%[a], #40]\n\t" + "ldr r7, [%[a], #44]\n\t" + "ldr r8, [%[b], #32]\n\t" + "ldr r9, [%[b], #36]\n\t" + "ldr r10, [%[b], #40]\n\t" + "ldr r14, [%[b], #44]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #32]\n\t" + "str r5, [%[r], #36]\n\t" + "str r6, [%[r], #40]\n\t" + "str r7, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[a], #52]\n\t" + "ldr r6, [%[a], #56]\n\t" + "ldr r7, [%[a], #60]\n\t" + "ldr r8, [%[b], #48]\n\t" + "ldr r9, [%[b], #52]\n\t" + "ldr r10, [%[b], #56]\n\t" + "ldr r14, [%[b], #60]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #48]\n\t" + "str r5, [%[r], #52]\n\t" + "str r6, [%[r], #56]\n\t" + "str r7, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[a], #68]\n\t" + "ldr r6, [%[a], #72]\n\t" + "ldr r7, [%[a], #76]\n\t" + "ldr r8, [%[b], #64]\n\t" + "ldr r9, [%[b], #68]\n\t" + "ldr r10, [%[b], #72]\n\t" + "ldr r14, [%[b], #76]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #64]\n\t" + "str r5, [%[r], #68]\n\t" + "str r6, [%[r], #72]\n\t" + "str r7, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[a], #84]\n\t" + "ldr r6, [%[a], #88]\n\t" + "ldr r7, [%[a], #92]\n\t" + "ldr r8, [%[b], #80]\n\t" + "ldr r9, [%[b], #84]\n\t" + "ldr r10, [%[b], #88]\n\t" + "ldr r14, [%[b], #92]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #80]\n\t" + "str r5, [%[r], #84]\n\t" + "str r6, [%[r], #88]\n\t" + "str r7, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[a], #100]\n\t" + "ldr r6, [%[a], #104]\n\t" + "ldr r7, [%[a], #108]\n\t" + "ldr r8, [%[b], #96]\n\t" + "ldr r9, [%[b], #100]\n\t" + "ldr r10, [%[b], #104]\n\t" + "ldr r14, [%[b], #108]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #96]\n\t" + "str r5, [%[r], #100]\n\t" + "str r6, [%[r], #104]\n\t" + "str r7, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[a], #116]\n\t" + "ldr r6, [%[a], #120]\n\t" + "ldr r7, [%[a], #124]\n\t" + "ldr r8, [%[b], #112]\n\t" + "ldr r9, [%[b], #116]\n\t" + "ldr r10, [%[b], #120]\n\t" + "ldr r14, [%[b], #124]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #112]\n\t" + "str r5, [%[r], #116]\n\t" + "str r6, [%[r], #120]\n\t" + "str r7, [%[r], #124]\n\t" + "ldr r4, [%[a], #128]\n\t" + "ldr r5, [%[a], #132]\n\t" + "ldr r6, [%[a], #136]\n\t" + "ldr r7, [%[a], #140]\n\t" + "ldr r8, [%[b], #128]\n\t" + "ldr r9, [%[b], #132]\n\t" + "ldr r10, [%[b], #136]\n\t" + "ldr r14, [%[b], #140]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #128]\n\t" + "str r5, [%[r], #132]\n\t" + "str r6, [%[r], #136]\n\t" + "str r7, [%[r], #140]\n\t" + "ldr r4, [%[a], #144]\n\t" + "ldr r5, [%[a], #148]\n\t" + "ldr r6, [%[a], #152]\n\t" + "ldr r7, [%[a], #156]\n\t" + "ldr r8, [%[b], #144]\n\t" + "ldr r9, [%[b], #148]\n\t" + "ldr r10, [%[b], #152]\n\t" + "ldr r14, [%[b], #156]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #144]\n\t" + "str r5, [%[r], #148]\n\t" + "str r6, [%[r], #152]\n\t" + "str r7, [%[r], #156]\n\t" + "ldr r4, [%[a], #160]\n\t" + "ldr r5, [%[a], #164]\n\t" + "ldr r6, [%[a], #168]\n\t" + "ldr r7, [%[a], #172]\n\t" + "ldr r8, [%[b], #160]\n\t" + "ldr r9, [%[b], #164]\n\t" + "ldr r10, [%[b], #168]\n\t" + "ldr r14, [%[b], #172]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #160]\n\t" + "str r5, [%[r], #164]\n\t" + "str r6, [%[r], #168]\n\t" + "str r7, [%[r], #172]\n\t" + "ldr r4, [%[a], #176]\n\t" + "ldr r5, [%[a], #180]\n\t" + "ldr r6, [%[a], #184]\n\t" + "ldr r7, [%[a], #188]\n\t" + "ldr r8, [%[b], #176]\n\t" + "ldr r9, [%[b], #180]\n\t" + "ldr r10, [%[b], #184]\n\t" + "ldr r14, [%[b], #188]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #176]\n\t" + "str r5, [%[r], #180]\n\t" + "str r6, [%[r], #184]\n\t" + "str r7, [%[r], #188]\n\t" + "ldr r4, [%[a], #192]\n\t" + "ldr r5, [%[a], #196]\n\t" + "ldr r6, [%[a], #200]\n\t" + "ldr r7, [%[a], #204]\n\t" + "ldr r8, [%[b], #192]\n\t" + "ldr r9, [%[b], #196]\n\t" + "ldr r10, [%[b], #200]\n\t" + "ldr r14, [%[b], #204]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #192]\n\t" + "str r5, [%[r], #196]\n\t" + "str r6, [%[r], #200]\n\t" + "str r7, [%[r], #204]\n\t" + "ldr r4, [%[a], #208]\n\t" + "ldr r5, [%[a], #212]\n\t" + "ldr r6, [%[a], #216]\n\t" + "ldr r7, [%[a], #220]\n\t" + "ldr r8, [%[b], #208]\n\t" + "ldr r9, [%[b], #212]\n\t" + "ldr r10, [%[b], #216]\n\t" + "ldr r14, [%[b], #220]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #208]\n\t" + "str r5, [%[r], #212]\n\t" + "str r6, [%[r], #216]\n\t" + "str r7, [%[r], #220]\n\t" + "ldr r4, [%[a], #224]\n\t" + "ldr r5, [%[a], #228]\n\t" + "ldr r6, [%[a], #232]\n\t" + "ldr r7, [%[a], #236]\n\t" + "ldr r8, [%[b], #224]\n\t" + "ldr r9, [%[b], #228]\n\t" + "ldr r10, [%[b], #232]\n\t" + "ldr r14, [%[b], #236]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #224]\n\t" + "str r5, [%[r], #228]\n\t" + "str r6, [%[r], #232]\n\t" + "str r7, [%[r], #236]\n\t" + "ldr r4, [%[a], #240]\n\t" + "ldr r5, [%[a], #244]\n\t" + "ldr r6, [%[a], #248]\n\t" + "ldr r7, [%[a], #252]\n\t" + "ldr r8, [%[b], #240]\n\t" + "ldr r9, [%[b], #244]\n\t" + "ldr r10, [%[b], #248]\n\t" + "ldr r14, [%[b], #252]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #240]\n\t" + "str r5, [%[r], #244]\n\t" + "str r6, [%[r], #248]\n\t" + "str r7, [%[r], #252]\n\t" + "adc %[c], r12, r12\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); + + return c; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<32; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 32; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[64]; + sp_digit a1[32]; + sp_digit b1[32]; + sp_digit z2[64]; + sp_digit u, ca, cb; + + ca = sp_2048_add_32(a1, a, &a[32]); + cb = sp_2048_add_32(b1, b, &b[32]); + u = ca & cb; + sp_2048_mul_32(z1, a1, b1); + sp_2048_mul_32(z2, &a[32], &b[32]); + sp_2048_mul_32(z0, a, b); + sp_2048_mask_32(r + 64, a1, 0 - cb); + sp_2048_mask_32(b1, b1, 0 - ca); + u += sp_2048_add_32(r + 64, r + 64, b1); + u += sp_2048_sub_in_place_64(z1, z2); + u += sp_2048_sub_in_place_64(z1, z0); + u += sp_2048_add_64(r + 32, r + 32, z1); + r[96] = u; + XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1)); + (void)sp_2048_add_64(r + 64, r + 64, z2); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z2[64]; + sp_digit z1[64]; + sp_digit a1[32]; + sp_digit u; + + u = sp_2048_add_32(a1, a, &a[32]); + sp_2048_sqr_32(z1, a1); + sp_2048_sqr_32(z2, &a[32]); + sp_2048_sqr_32(z0, a); + sp_2048_mask_32(r + 64, a1, 0 - u); + u += sp_2048_add_32(r + 64, r + 64, r + 64); + u += sp_2048_sub_in_place_64(z1, z2); + u += sp_2048_sub_in_place_64(z1, z0); + u += sp_2048_add_64(r + 32, r + 32, z1); + r[96] = u; + XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1)); + (void)sp_2048_add_64(r + 64, r + 64, z2); +} + +#endif /* !WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "add r12, %[a], #256\n\t" + "\n1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldr r4, [%[a]], #4\n\t" + "ldr r5, [%[a]], #4\n\t" + "ldr r6, [%[a]], #4\n\t" + "ldr r7, [%[a]], #4\n\t" + "ldr r8, [%[b]], #4\n\t" + "ldr r9, [%[b]], #4\n\t" + "ldr r10, [%[b]], #4\n\t" + "ldr r14, [%[b]], #4\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r]], #4\n\t" + "str r5, [%[r]], #4\n\t" + "str r6, [%[r]], #4\n\t" + "str r7, [%[r]], #4\n\t" + "mov r4, #0\n\t" + "adc %[c], r4, #0\n\t" + "cmp %[a], r12\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into a. (a -= b) + * + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r14, #0\n\t" + "add r12, %[a], #256\n\t" + "\n1:\n\t" + "subs %[c], r14, %[c]\n\t" + "ldr r3, [%[a]]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[a], #8]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b]], #4\n\t" + "ldr r8, [%[b]], #4\n\t" + "ldr r9, [%[b]], #4\n\t" + "ldr r10, [%[b]], #4\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "str r3, [%[a]], #4\n\t" + "str r4, [%[a]], #4\n\t" + "str r5, [%[a]], #4\n\t" + "str r6, [%[a]], #4\n\t" + "sbc %[c], r14, r14\n\t" + "cmp %[a], r12\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + __asm__ __volatile__ ( + "sub sp, sp, #512\n\t" + "mov r5, #0\n\t" + "mov r6, #0\n\t" + "mov r7, #0\n\t" + "mov r8, #0\n\t" + "\n1:\n\t" + "subs r3, r5, #252\n\t" + "it cc\n\t" + "movcc r3, #0\n\t" + "sub r4, r5, r3\n\t" + "\n2:\n\t" + "ldr r14, [%[a], r3]\n\t" + "ldr r12, [%[b], r4]\n\t" + "umull r9, r10, r14, r12\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "add r3, r3, #4\n\t" + "sub r4, r4, #4\n\t" + "cmp r3, #256\n\t" + "beq 3f\n\t" + "cmp r3, r5\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "str r6, [sp, r5]\n\t" + "mov r6, r7\n\t" + "mov r7, r8\n\t" + "mov r8, #0\n\t" + "add r5, r5, #4\n\t" + "cmp r5, #504\n\t" + "ble 1b\n\t" + "str r6, [sp, r5]\n\t" + "\n4:\n\t" + "ldr r6, [sp, #0]\n\t" + "ldr r7, [sp, #4]\n\t" + "ldr r8, [sp, #8]\n\t" + "ldr r3, [sp, #12]\n\t" + "str r6, [%[r], #0]\n\t" + "str r7, [%[r], #4]\n\t" + "str r8, [%[r], #8]\n\t" + "str r3, [%[r], #12]\n\t" + "add sp, sp, #16\n\t" + "add %[r], %[r], #16\n\t" + "subs r5, r5, #16\n\t" + "bgt 4b\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "sub sp, sp, #512\n\t" + "mov r12, #0\n\t" + "mov r6, #0\n\t" + "mov r7, #0\n\t" + "mov r8, #0\n\t" + "mov r5, #0\n\t" + "\n1:\n\t" + "subs r3, r5, #252\n\t" + "it cc\n\t" + "movcc r3, r12\n\t" + "sub r4, r5, r3\n\t" + "\n2:\n\t" + "cmp r4, r3\n\t" + "beq 4f\n\t" + "ldr r14, [%[a], r3]\n\t" + "ldr r9, [%[a], r4]\n\t" + "umull r9, r10, r14, r9\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, r12\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, r12\n\t" + "bal 5f\n\t" + "\n4:\n\t" + "ldr r14, [%[a], r3]\n\t" + "umull r9, r10, r14, r14\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, r12\n\t" + "\n5:\n\t" + "add r3, r3, #4\n\t" + "sub r4, r4, #4\n\t" + "cmp r3, #256\n\t" + "beq 3f\n\t" + "cmp r3, r4\n\t" + "bgt 3f\n\t" + "cmp r3, r5\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "str r6, [sp, r5]\n\t" + "mov r6, r7\n\t" + "mov r7, r8\n\t" + "mov r8, #0\n\t" + "add r5, r5, #4\n\t" + "cmp r5, #504\n\t" + "ble 1b\n\t" + "str r6, [sp, r5]\n\t" + "\n4:\n\t" + "ldr r6, [sp, #0]\n\t" + "ldr r7, [sp, #4]\n\t" + "ldr r8, [sp, #8]\n\t" + "ldr r3, [sp, #12]\n\t" + "str r6, [%[r], #0]\n\t" + "str r7, [%[r], #4]\n\t" + "str r8, [%[r], #8]\n\t" + "str r3, [%[r], #12]\n\t" + "add sp, sp, #16\n\t" + "add %[r], %[r], #16\n\t" + "subs r5, r5, #16\n\t" + "bgt 4b\n\t" + : [r] "+r" (r) + : [a] "r" (a) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +#ifdef WOLFSSL_SP_SMALL +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m) +{ + int i; + + for (i=0; i<32; i++) { + r[i] = a[i] & m; + } +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "add r12, %[a], #128\n\t" + "\n1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldr r4, [%[a]], #4\n\t" + "ldr r5, [%[a]], #4\n\t" + "ldr r6, [%[a]], #4\n\t" + "ldr r7, [%[a]], #4\n\t" + "ldr r8, [%[b]], #4\n\t" + "ldr r9, [%[b]], #4\n\t" + "ldr r10, [%[b]], #4\n\t" + "ldr r14, [%[b]], #4\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r]], #4\n\t" + "str r5, [%[r]], #4\n\t" + "str r6, [%[r]], #4\n\t" + "str r7, [%[r]], #4\n\t" + "mov r4, #0\n\t" + "adc %[c], r4, #0\n\t" + "cmp %[a], r12\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into a. (a -= b) + * + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r14, #0\n\t" + "add r12, %[a], #128\n\t" + "\n1:\n\t" + "subs %[c], r14, %[c]\n\t" + "ldr r3, [%[a]]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[a], #8]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b]], #4\n\t" + "ldr r8, [%[b]], #4\n\t" + "ldr r9, [%[b]], #4\n\t" + "ldr r10, [%[b]], #4\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "str r3, [%[a]], #4\n\t" + "str r4, [%[a]], #4\n\t" + "str r5, [%[a]], #4\n\t" + "str r6, [%[a]], #4\n\t" + "sbc %[c], r14, r14\n\t" + "cmp %[a], r12\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + __asm__ __volatile__ ( + "sub sp, sp, #256\n\t" + "mov r5, #0\n\t" + "mov r6, #0\n\t" + "mov r7, #0\n\t" + "mov r8, #0\n\t" + "\n1:\n\t" + "subs r3, r5, #124\n\t" + "it cc\n\t" + "movcc r3, #0\n\t" + "sub r4, r5, r3\n\t" + "\n2:\n\t" + "ldr r14, [%[a], r3]\n\t" + "ldr r12, [%[b], r4]\n\t" + "umull r9, r10, r14, r12\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "add r3, r3, #4\n\t" + "sub r4, r4, #4\n\t" + "cmp r3, #128\n\t" + "beq 3f\n\t" + "cmp r3, r5\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "str r6, [sp, r5]\n\t" + "mov r6, r7\n\t" + "mov r7, r8\n\t" + "mov r8, #0\n\t" + "add r5, r5, #4\n\t" + "cmp r5, #248\n\t" + "ble 1b\n\t" + "str r6, [sp, r5]\n\t" + "\n4:\n\t" + "ldr r6, [sp, #0]\n\t" + "ldr r7, [sp, #4]\n\t" + "ldr r8, [sp, #8]\n\t" + "ldr r3, [sp, #12]\n\t" + "str r6, [%[r], #0]\n\t" + "str r7, [%[r], #4]\n\t" + "str r8, [%[r], #8]\n\t" + "str r3, [%[r], #12]\n\t" + "add sp, sp, #16\n\t" + "add %[r], %[r], #16\n\t" + "subs r5, r5, #16\n\t" + "bgt 4b\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "sub sp, sp, #256\n\t" + "mov r12, #0\n\t" + "mov r6, #0\n\t" + "mov r7, #0\n\t" + "mov r8, #0\n\t" + "mov r5, #0\n\t" + "\n1:\n\t" + "subs r3, r5, #124\n\t" + "it cc\n\t" + "movcc r3, r12\n\t" + "sub r4, r5, r3\n\t" + "\n2:\n\t" + "cmp r4, r3\n\t" + "beq 4f\n\t" + "ldr r14, [%[a], r3]\n\t" + "ldr r9, [%[a], r4]\n\t" + "umull r9, r10, r14, r9\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, r12\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, r12\n\t" + "bal 5f\n\t" + "\n4:\n\t" + "ldr r14, [%[a], r3]\n\t" + "umull r9, r10, r14, r14\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, r12\n\t" + "\n5:\n\t" + "add r3, r3, #4\n\t" + "sub r4, r4, #4\n\t" + "cmp r3, #128\n\t" + "beq 3f\n\t" + "cmp r3, r4\n\t" + "bgt 3f\n\t" + "cmp r3, r5\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "str r6, [sp, r5]\n\t" + "mov r6, r7\n\t" + "mov r7, r8\n\t" + "mov r8, #0\n\t" + "add r5, r5, #4\n\t" + "cmp r5, #248\n\t" + "ble 1b\n\t" + "str r6, [sp, r5]\n\t" + "\n4:\n\t" + "ldr r6, [sp, #0]\n\t" + "ldr r7, [sp, #4]\n\t" + "ldr r8, [sp, #8]\n\t" + "ldr r3, [sp, #12]\n\t" + "str r6, [%[r], #0]\n\t" + "str r7, [%[r], #4]\n\t" + "str r8, [%[r], #8]\n\t" + "str r3, [%[r], #12]\n\t" + "add sp, sp, #16\n\t" + "add %[r], %[r], #16\n\t" + "subs r5, r5, #16\n\t" + "bgt 4b\n\t" + : [r] "+r" (r) + : [a] "r" (a) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +/* Caclulate the bottom digit of -1/a mod 2^n. + * + * a A single precision number. + * rho Bottom word of inverse. + */ +static void sp_2048_mont_setup(const sp_digit* a, sp_digit* rho) +{ + sp_digit x, b; + + b = a[0]; + x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - b * x; /* here x*a==1 mod 2**8 */ + x *= 2 - b * x; /* here x*a==1 mod 2**16 */ + x *= 2 - b * x; /* here x*a==1 mod 2**32 */ + + /* rho = -1/m mod b */ + *rho = -x; +} + +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov r10, #0\n\t" + "# A[0] * B\n\t" + "ldr r8, [%[a]]\n\t" + "umull r5, r3, %[b], r8\n\t" + "mov r4, #0\n\t" + "str r5, [%[r]]\n\t" + "mov r5, #0\n\t" + "mov r9, #4\n\t" + "1:\n\t" + "ldr r8, [%[a], r9]\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], r9]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "add r9, r9, #4\n\t" + "cmp r9, #256\n\t" + "blt 1b\n\t" + "str r3, [%[r], #256]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); +#else + __asm__ __volatile__ ( + "mov r10, #0\n\t" + "# A[0] * B\n\t" + "ldr r8, [%[a]]\n\t" + "umull r3, r4, %[b], r8\n\t" + "mov r5, #0\n\t" + "str r3, [%[r]]\n\t" + "# A[1] * B\n\t" + "ldr r8, [%[a], #4]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #4]\n\t" + "# A[2] * B\n\t" + "ldr r8, [%[a], #8]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #8]\n\t" + "# A[3] * B\n\t" + "ldr r8, [%[a], #12]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #12]\n\t" + "# A[4] * B\n\t" + "ldr r8, [%[a], #16]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #16]\n\t" + "# A[5] * B\n\t" + "ldr r8, [%[a], #20]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #20]\n\t" + "# A[6] * B\n\t" + "ldr r8, [%[a], #24]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #24]\n\t" + "# A[7] * B\n\t" + "ldr r8, [%[a], #28]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #28]\n\t" + "# A[8] * B\n\t" + "ldr r8, [%[a], #32]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #32]\n\t" + "# A[9] * B\n\t" + "ldr r8, [%[a], #36]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #36]\n\t" + "# A[10] * B\n\t" + "ldr r8, [%[a], #40]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #40]\n\t" + "# A[11] * B\n\t" + "ldr r8, [%[a], #44]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #44]\n\t" + "# A[12] * B\n\t" + "ldr r8, [%[a], #48]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #48]\n\t" + "# A[13] * B\n\t" + "ldr r8, [%[a], #52]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #52]\n\t" + "# A[14] * B\n\t" + "ldr r8, [%[a], #56]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #56]\n\t" + "# A[15] * B\n\t" + "ldr r8, [%[a], #60]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #60]\n\t" + "# A[16] * B\n\t" + "ldr r8, [%[a], #64]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #64]\n\t" + "# A[17] * B\n\t" + "ldr r8, [%[a], #68]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #68]\n\t" + "# A[18] * B\n\t" + "ldr r8, [%[a], #72]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #72]\n\t" + "# A[19] * B\n\t" + "ldr r8, [%[a], #76]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #76]\n\t" + "# A[20] * B\n\t" + "ldr r8, [%[a], #80]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #80]\n\t" + "# A[21] * B\n\t" + "ldr r8, [%[a], #84]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #84]\n\t" + "# A[22] * B\n\t" + "ldr r8, [%[a], #88]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #88]\n\t" + "# A[23] * B\n\t" + "ldr r8, [%[a], #92]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #92]\n\t" + "# A[24] * B\n\t" + "ldr r8, [%[a], #96]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #96]\n\t" + "# A[25] * B\n\t" + "ldr r8, [%[a], #100]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #100]\n\t" + "# A[26] * B\n\t" + "ldr r8, [%[a], #104]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #104]\n\t" + "# A[27] * B\n\t" + "ldr r8, [%[a], #108]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #108]\n\t" + "# A[28] * B\n\t" + "ldr r8, [%[a], #112]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #112]\n\t" + "# A[29] * B\n\t" + "ldr r8, [%[a], #116]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #116]\n\t" + "# A[30] * B\n\t" + "ldr r8, [%[a], #120]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #120]\n\t" + "# A[31] * B\n\t" + "ldr r8, [%[a], #124]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #124]\n\t" + "# A[32] * B\n\t" + "ldr r8, [%[a], #128]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #128]\n\t" + "# A[33] * B\n\t" + "ldr r8, [%[a], #132]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #132]\n\t" + "# A[34] * B\n\t" + "ldr r8, [%[a], #136]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #136]\n\t" + "# A[35] * B\n\t" + "ldr r8, [%[a], #140]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #140]\n\t" + "# A[36] * B\n\t" + "ldr r8, [%[a], #144]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #144]\n\t" + "# A[37] * B\n\t" + "ldr r8, [%[a], #148]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #148]\n\t" + "# A[38] * B\n\t" + "ldr r8, [%[a], #152]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #152]\n\t" + "# A[39] * B\n\t" + "ldr r8, [%[a], #156]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #156]\n\t" + "# A[40] * B\n\t" + "ldr r8, [%[a], #160]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #160]\n\t" + "# A[41] * B\n\t" + "ldr r8, [%[a], #164]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #164]\n\t" + "# A[42] * B\n\t" + "ldr r8, [%[a], #168]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #168]\n\t" + "# A[43] * B\n\t" + "ldr r8, [%[a], #172]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #172]\n\t" + "# A[44] * B\n\t" + "ldr r8, [%[a], #176]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #176]\n\t" + "# A[45] * B\n\t" + "ldr r8, [%[a], #180]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #180]\n\t" + "# A[46] * B\n\t" + "ldr r8, [%[a], #184]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #184]\n\t" + "# A[47] * B\n\t" + "ldr r8, [%[a], #188]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #188]\n\t" + "# A[48] * B\n\t" + "ldr r8, [%[a], #192]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #192]\n\t" + "# A[49] * B\n\t" + "ldr r8, [%[a], #196]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #196]\n\t" + "# A[50] * B\n\t" + "ldr r8, [%[a], #200]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #200]\n\t" + "# A[51] * B\n\t" + "ldr r8, [%[a], #204]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #204]\n\t" + "# A[52] * B\n\t" + "ldr r8, [%[a], #208]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #208]\n\t" + "# A[53] * B\n\t" + "ldr r8, [%[a], #212]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #212]\n\t" + "# A[54] * B\n\t" + "ldr r8, [%[a], #216]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #216]\n\t" + "# A[55] * B\n\t" + "ldr r8, [%[a], #220]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #220]\n\t" + "# A[56] * B\n\t" + "ldr r8, [%[a], #224]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #224]\n\t" + "# A[57] * B\n\t" + "ldr r8, [%[a], #228]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #228]\n\t" + "# A[58] * B\n\t" + "ldr r8, [%[a], #232]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #232]\n\t" + "# A[59] * B\n\t" + "ldr r8, [%[a], #236]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #236]\n\t" + "# A[60] * B\n\t" + "ldr r8, [%[a], #240]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #240]\n\t" + "# A[61] * B\n\t" + "ldr r8, [%[a], #244]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #244]\n\t" + "# A[62] * B\n\t" + "ldr r8, [%[a], #248]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #248]\n\t" + "# A[63] * B\n\t" + "ldr r8, [%[a], #252]\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r3, [%[r], #252]\n\t" + "str r4, [%[r], #256]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); +#endif +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 2048 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_2048_mont_norm_32(sp_digit* r, const sp_digit* m) +{ + XMEMSET(r, 0, sizeof(sp_digit) * 32); + + /* r = 2^n mod m */ + sp_2048_sub_in_place_32(r, m); +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + sp_digit c = 0; + +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov r9, #0\n\t" + "mov r8, #0\n\t" + "1:\n\t" + "subs %[c], r9, %[c]\n\t" + "ldr r4, [%[a], r8]\n\t" + "ldr r5, [%[b], r8]\n\t" + "and r5, r5, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbc %[c], r9, r9\n\t" + "str r4, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, #128\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + ); +#else + __asm__ __volatile__ ( + + "mov r9, #0\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r6, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "subs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #0]\n\t" + "str r6, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r7, [%[b], #12]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r6, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #16]\n\t" + "str r6, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r6, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r7, [%[b], #28]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r6, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r7, [%[b], #36]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #32]\n\t" + "str r6, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r6, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r7, [%[b], #44]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #40]\n\t" + "str r6, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r6, [%[a], #52]\n\t" + "ldr r5, [%[b], #48]\n\t" + "ldr r7, [%[b], #52]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #48]\n\t" + "str r6, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r6, [%[a], #60]\n\t" + "ldr r5, [%[b], #56]\n\t" + "ldr r7, [%[b], #60]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #56]\n\t" + "str r6, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r6, [%[a], #68]\n\t" + "ldr r5, [%[b], #64]\n\t" + "ldr r7, [%[b], #68]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #64]\n\t" + "str r6, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r6, [%[a], #76]\n\t" + "ldr r5, [%[b], #72]\n\t" + "ldr r7, [%[b], #76]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #72]\n\t" + "str r6, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r6, [%[a], #84]\n\t" + "ldr r5, [%[b], #80]\n\t" + "ldr r7, [%[b], #84]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #80]\n\t" + "str r6, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r6, [%[a], #92]\n\t" + "ldr r5, [%[b], #88]\n\t" + "ldr r7, [%[b], #92]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #88]\n\t" + "str r6, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r6, [%[a], #100]\n\t" + "ldr r5, [%[b], #96]\n\t" + "ldr r7, [%[b], #100]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #96]\n\t" + "str r6, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r6, [%[a], #108]\n\t" + "ldr r5, [%[b], #104]\n\t" + "ldr r7, [%[b], #108]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #104]\n\t" + "str r6, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r6, [%[a], #116]\n\t" + "ldr r5, [%[b], #112]\n\t" + "ldr r7, [%[b], #116]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #112]\n\t" + "str r6, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r6, [%[a], #124]\n\t" + "ldr r5, [%[b], #120]\n\t" + "ldr r7, [%[b], #124]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #120]\n\t" + "str r6, [%[r], #124]\n\t" + "sbc %[c], r9, r9\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + ); +#endif /* WOLFSSL_SP_SMALL */ + + return c; +} + +/* Reduce the number back to 2048 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_digit ca = 0; + + __asm__ __volatile__ ( + "# i = 0\n\t" + "mov r12, #0\n\t" + "ldr r10, [%[a], #0]\n\t" + "ldr r14, [%[a], #4]\n\t" + "\n1:\n\t" + "# mu = a[i] * mp\n\t" + "mul r8, %[mp], r10\n\t" + "# a[i+0] += m[0] * mu\n\t" + "ldr r7, [%[m], #0]\n\t" + "ldr r9, [%[a], #0]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" + "# a[i+1] += m[1] * mu\n\t" + "ldr r7, [%[m], #4]\n\t" + "ldr r9, [%[a], #4]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r10, r14, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r10, r10, r5\n\t" + "adc r4, r4, #0\n\t" + "# a[i+2] += m[2] * mu\n\t" + "ldr r7, [%[m], #8]\n\t" + "ldr r14, [%[a], #8]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r14, r14, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r14, r14, r4\n\t" + "adc r5, r5, #0\n\t" + "# a[i+3] += m[3] * mu\n\t" + "ldr r7, [%[m], #12]\n\t" + "ldr r9, [%[a], #12]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #12]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+4] += m[4] * mu\n\t" + "ldr r7, [%[m], #16]\n\t" + "ldr r9, [%[a], #16]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+5] += m[5] * mu\n\t" + "ldr r7, [%[m], #20]\n\t" + "ldr r9, [%[a], #20]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #20]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+6] += m[6] * mu\n\t" + "ldr r7, [%[m], #24]\n\t" + "ldr r9, [%[a], #24]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+7] += m[7] * mu\n\t" + "ldr r7, [%[m], #28]\n\t" + "ldr r9, [%[a], #28]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #28]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+8] += m[8] * mu\n\t" + "ldr r7, [%[m], #32]\n\t" + "ldr r9, [%[a], #32]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #32]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+9] += m[9] * mu\n\t" + "ldr r7, [%[m], #36]\n\t" + "ldr r9, [%[a], #36]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #36]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+10] += m[10] * mu\n\t" + "ldr r7, [%[m], #40]\n\t" + "ldr r9, [%[a], #40]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #40]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+11] += m[11] * mu\n\t" + "ldr r7, [%[m], #44]\n\t" + "ldr r9, [%[a], #44]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #44]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+12] += m[12] * mu\n\t" + "ldr r7, [%[m], #48]\n\t" + "ldr r9, [%[a], #48]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #48]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+13] += m[13] * mu\n\t" + "ldr r7, [%[m], #52]\n\t" + "ldr r9, [%[a], #52]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #52]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+14] += m[14] * mu\n\t" + "ldr r7, [%[m], #56]\n\t" + "ldr r9, [%[a], #56]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #56]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+15] += m[15] * mu\n\t" + "ldr r7, [%[m], #60]\n\t" + "ldr r9, [%[a], #60]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #60]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+16] += m[16] * mu\n\t" + "ldr r7, [%[m], #64]\n\t" + "ldr r9, [%[a], #64]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #64]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+17] += m[17] * mu\n\t" + "ldr r7, [%[m], #68]\n\t" + "ldr r9, [%[a], #68]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #68]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+18] += m[18] * mu\n\t" + "ldr r7, [%[m], #72]\n\t" + "ldr r9, [%[a], #72]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #72]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+19] += m[19] * mu\n\t" + "ldr r7, [%[m], #76]\n\t" + "ldr r9, [%[a], #76]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #76]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+20] += m[20] * mu\n\t" + "ldr r7, [%[m], #80]\n\t" + "ldr r9, [%[a], #80]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #80]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+21] += m[21] * mu\n\t" + "ldr r7, [%[m], #84]\n\t" + "ldr r9, [%[a], #84]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #84]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+22] += m[22] * mu\n\t" + "ldr r7, [%[m], #88]\n\t" + "ldr r9, [%[a], #88]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #88]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+23] += m[23] * mu\n\t" + "ldr r7, [%[m], #92]\n\t" + "ldr r9, [%[a], #92]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #92]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+24] += m[24] * mu\n\t" + "ldr r7, [%[m], #96]\n\t" + "ldr r9, [%[a], #96]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #96]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+25] += m[25] * mu\n\t" + "ldr r7, [%[m], #100]\n\t" + "ldr r9, [%[a], #100]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #100]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+26] += m[26] * mu\n\t" + "ldr r7, [%[m], #104]\n\t" + "ldr r9, [%[a], #104]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #104]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+27] += m[27] * mu\n\t" + "ldr r7, [%[m], #108]\n\t" + "ldr r9, [%[a], #108]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #108]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+28] += m[28] * mu\n\t" + "ldr r7, [%[m], #112]\n\t" + "ldr r9, [%[a], #112]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #112]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+29] += m[29] * mu\n\t" + "ldr r7, [%[m], #116]\n\t" + "ldr r9, [%[a], #116]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #116]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+30] += m[30] * mu\n\t" + "ldr r7, [%[m], #120]\n\t" + "ldr r9, [%[a], #120]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #120]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+31] += m[31] * mu\n\t" + "ldr r7, [%[m], #124]\n\t" + "ldr r9, [%[a], #124]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r5, r5, r6\n\t" + "adcs r7, r7, %[ca]\n\t" + "mov %[ca], #0\n\t" + "adc %[ca], %[ca], %[ca]\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #124]\n\t" + "ldr r9, [%[a], #128]\n\t" + "adcs r9, r9, r7\n\t" + "str r9, [%[a], #128]\n\t" + "adc %[ca], %[ca], #0\n\t" + "# i += 1\n\t" + "add %[a], %[a], #4\n\t" + "add r12, r12, #4\n\t" + "cmp r12, #128\n\t" + "blt 1b\n\t" + "str r10, [%[a], #0]\n\t" + "str r14, [%[a], #4]\n\t" + : [ca] "+r" (ca), [a] "+r" (a) + : [m] "r" (m), [mp] "r" (mp) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); + + sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - ca); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_2048_mont_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_2048_mul_32(r, a, b); + sp_2048_mont_reduce_32(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_2048_mont_sqr_32(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_2048_sqr_32(r, a); + sp_2048_mont_reduce_32(r, m, mp); +} + +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov r10, #0\n\t" + "# A[0] * B\n\t" + "ldr r8, [%[a]]\n\t" + "umull r5, r3, %[b], r8\n\t" + "mov r4, #0\n\t" + "str r5, [%[r]]\n\t" + "mov r5, #0\n\t" + "mov r9, #4\n\t" + "1:\n\t" + "ldr r8, [%[a], r9]\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], r9]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "add r9, r9, #4\n\t" + "cmp r9, #128\n\t" + "blt 1b\n\t" + "str r3, [%[r], #128]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); +#else + __asm__ __volatile__ ( + "mov r10, #0\n\t" + "# A[0] * B\n\t" + "ldr r8, [%[a]]\n\t" + "umull r3, r4, %[b], r8\n\t" + "mov r5, #0\n\t" + "str r3, [%[r]]\n\t" + "# A[1] * B\n\t" + "ldr r8, [%[a], #4]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #4]\n\t" + "# A[2] * B\n\t" + "ldr r8, [%[a], #8]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #8]\n\t" + "# A[3] * B\n\t" + "ldr r8, [%[a], #12]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #12]\n\t" + "# A[4] * B\n\t" + "ldr r8, [%[a], #16]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #16]\n\t" + "# A[5] * B\n\t" + "ldr r8, [%[a], #20]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #20]\n\t" + "# A[6] * B\n\t" + "ldr r8, [%[a], #24]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #24]\n\t" + "# A[7] * B\n\t" + "ldr r8, [%[a], #28]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #28]\n\t" + "# A[8] * B\n\t" + "ldr r8, [%[a], #32]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #32]\n\t" + "# A[9] * B\n\t" + "ldr r8, [%[a], #36]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #36]\n\t" + "# A[10] * B\n\t" + "ldr r8, [%[a], #40]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #40]\n\t" + "# A[11] * B\n\t" + "ldr r8, [%[a], #44]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #44]\n\t" + "# A[12] * B\n\t" + "ldr r8, [%[a], #48]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #48]\n\t" + "# A[13] * B\n\t" + "ldr r8, [%[a], #52]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #52]\n\t" + "# A[14] * B\n\t" + "ldr r8, [%[a], #56]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #56]\n\t" + "# A[15] * B\n\t" + "ldr r8, [%[a], #60]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #60]\n\t" + "# A[16] * B\n\t" + "ldr r8, [%[a], #64]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #64]\n\t" + "# A[17] * B\n\t" + "ldr r8, [%[a], #68]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #68]\n\t" + "# A[18] * B\n\t" + "ldr r8, [%[a], #72]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #72]\n\t" + "# A[19] * B\n\t" + "ldr r8, [%[a], #76]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #76]\n\t" + "# A[20] * B\n\t" + "ldr r8, [%[a], #80]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #80]\n\t" + "# A[21] * B\n\t" + "ldr r8, [%[a], #84]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #84]\n\t" + "# A[22] * B\n\t" + "ldr r8, [%[a], #88]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #88]\n\t" + "# A[23] * B\n\t" + "ldr r8, [%[a], #92]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #92]\n\t" + "# A[24] * B\n\t" + "ldr r8, [%[a], #96]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #96]\n\t" + "# A[25] * B\n\t" + "ldr r8, [%[a], #100]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #100]\n\t" + "# A[26] * B\n\t" + "ldr r8, [%[a], #104]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #104]\n\t" + "# A[27] * B\n\t" + "ldr r8, [%[a], #108]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #108]\n\t" + "# A[28] * B\n\t" + "ldr r8, [%[a], #112]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #112]\n\t" + "# A[29] * B\n\t" + "ldr r8, [%[a], #116]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #116]\n\t" + "# A[30] * B\n\t" + "ldr r8, [%[a], #120]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #120]\n\t" + "# A[31] * B\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "str r4, [%[r], #124]\n\t" + "str r5, [%[r], #128]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); +#endif +} + +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div) +{ + sp_digit r = 0; + + __asm__ __volatile__ ( + "lsr r5, %[div], #1\n\t" + "add r5, r5, #1\n\t" + "mov r6, %[d0]\n\t" + "mov r7, %[d1]\n\t" + "# Do top 32\n\t" + "subs r8, r5, r7\n\t" + "sbc r8, r8, r8\n\t" + "add %[r], %[r], %[r]\n\t" + "sub %[r], %[r], r8\n\t" + "and r8, r8, r5\n\t" + "subs r7, r7, r8\n\t" + "# Next 30 bits\n\t" + "mov r4, #29\n\t" + "1:\n\t" + "movs r6, r6, lsl #1\n\t" + "adc r7, r7, r7\n\t" + "subs r8, r5, r7\n\t" + "sbc r8, r8, r8\n\t" + "add %[r], %[r], %[r]\n\t" + "sub %[r], %[r], r8\n\t" + "and r8, r8, r5\n\t" + "subs r7, r7, r8\n\t" + "subs r4, r4, #1\n\t" + "bpl 1b\n\t" + "add %[r], %[r], %[r]\n\t" + "add %[r], %[r], #1\n\t" + "umull r4, r5, %[r], %[div]\n\t" + "subs r4, %[d0], r4\n\t" + "sbc r5, %[d1], r5\n\t" + "add %[r], %[r], r5\n\t" + "umull r4, r5, %[r], %[div]\n\t" + "subs r4, %[d0], r4\n\t" + "sbc r5, %[d1], r5\n\t" + "add %[r], %[r], r5\n\t" + "subs r8, %[div], r4\n\t" + "sbc r8, r8, r8\n\t" + "sub %[r], %[r], r8\n\t" + : [r] "+r" (r) + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) + : "r4", "r5", "r6", "r7", "r8" + ); + return r; +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static int32_t sp_2048_cmp_32(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = -1; + sp_digit one = 1; + + +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov r7, #0\n\t" + "mov r3, #-1\n\t" + "mov r6, #124\n\t" + "1:\n\t" + "ldr r4, [%[a], r6]\n\t" + "ldr r5, [%[b], r6]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "subs r6, r6, #4\n\t" + "bcs 1b\n\t" + "eor %[r], %[r], r3\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [one] "r" (one) + : "r3", "r4", "r5", "r6", "r7" + ); +#else + __asm__ __volatile__ ( + "mov r7, #0\n\t" + "mov r3, #-1\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #124]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[b], #120]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #116]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[b], #112]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #108]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[b], #104]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #100]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[b], #96]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #92]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[b], #88]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #84]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[b], #80]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #76]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[b], #72]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #68]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[b], #64]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #60]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[b], #56]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #52]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[b], #48]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "eor %[r], %[r], r3\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [one] "r" (one) + : "r3", "r4", "r5", "r6", "r7" + ); +#endif + + return r; +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[64], t2[33]; + sp_digit div, r1; + int i; + + (void)m; + + + div = d[31]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 32); + for (i=31; i>=0; i--) { + r1 = div_2048_word_32(t1[32 + i], t1[32 + i - 1], div); + + sp_2048_mul_d_32(t2, d, r1); + t1[32 + i] += sp_2048_sub_in_place_32(&t1[i], t2); + t1[32 + i] -= t2[32]; + sp_2048_mask_32(t2, d, t1[32 + i]); + t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2); + sp_2048_mask_32(t2, d, t1[32 + i]); + t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2); + } + + r1 = sp_2048_cmp_32(t1, d) >= 0; + sp_2048_cond_sub_32(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_2048_mod_32(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_2048_div_32(a, m, NULL, r); +} + +#ifdef WOLFSSL_SP_SMALL +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[16][64]; +#else + sp_digit* t[16]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 64, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<16; i++) { + t[i] = td + i * 64; + } +#endif + norm = t[0]; + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_32(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 32U); + if (reduceA != 0) { + err = sp_2048_mod_32(t[1] + 32, a, m); + if (err == MP_OKAY) { + err = sp_2048_mod_32(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32); + err = sp_2048_mod_32(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp); + sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp); + sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp); + sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp); + sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp); + sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp); + sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp); + sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp); + sp_2048_mont_sqr_32(t[10], t[ 5], m, mp); + sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp); + sp_2048_mont_sqr_32(t[12], t[ 6], m, mp); + sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp); + sp_2048_mont_sqr_32(t[14], t[ 7], m, mp); + sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 4; + if (c == 32) { + c = 28; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 32); + for (; i>=0 || c>=4; ) { + if (c == 0) { + n = e[i--]; + y = n >> 28; + n <<= 4; + c = 28; + } + else if (c < 4) { + y = n >> 28; + n = e[i--]; + c = 4 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + } + + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + + sp_2048_mont_mul_32(r, r, t[y], m, mp); + } + + XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U); + sp_2048_mont_reduce_32(r, m, mp); + + mask = 0 - (sp_2048_cmp_32(r, m) >= 0); + sp_2048_cond_sub_32(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#else +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][64]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 64, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) { + t[i] = td + i * 64; + } +#endif + norm = t[0]; + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_32(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 32U); + if (reduceA != 0) { + err = sp_2048_mod_32(t[1] + 32, a, m); + if (err == MP_OKAY) { + err = sp_2048_mod_32(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32); + err = sp_2048_mod_32(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp); + sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp); + sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp); + sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp); + sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp); + sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp); + sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp); + sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp); + sp_2048_mont_sqr_32(t[10], t[ 5], m, mp); + sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp); + sp_2048_mont_sqr_32(t[12], t[ 6], m, mp); + sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp); + sp_2048_mont_sqr_32(t[14], t[ 7], m, mp); + sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp); + sp_2048_mont_sqr_32(t[16], t[ 8], m, mp); + sp_2048_mont_mul_32(t[17], t[ 9], t[ 8], m, mp); + sp_2048_mont_sqr_32(t[18], t[ 9], m, mp); + sp_2048_mont_mul_32(t[19], t[10], t[ 9], m, mp); + sp_2048_mont_sqr_32(t[20], t[10], m, mp); + sp_2048_mont_mul_32(t[21], t[11], t[10], m, mp); + sp_2048_mont_sqr_32(t[22], t[11], m, mp); + sp_2048_mont_mul_32(t[23], t[12], t[11], m, mp); + sp_2048_mont_sqr_32(t[24], t[12], m, mp); + sp_2048_mont_mul_32(t[25], t[13], t[12], m, mp); + sp_2048_mont_sqr_32(t[26], t[13], m, mp); + sp_2048_mont_mul_32(t[27], t[14], t[13], m, mp); + sp_2048_mont_sqr_32(t[28], t[14], m, mp); + sp_2048_mont_mul_32(t[29], t[15], t[14], m, mp); + sp_2048_mont_sqr_32(t[30], t[15], m, mp); + sp_2048_mont_mul_32(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 5; + if (c == 32) { + c = 27; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 32); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 27; + n <<= 5; + c = 27; + } + else if (c < 5) { + y = n >> 27; + n = e[i--]; + c = 5 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + + sp_2048_mont_mul_32(r, r, t[y], m, mp); + } + + XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U); + sp_2048_mont_reduce_32(r, m, mp); + + mask = 0 - (sp_2048_cmp_32(r, m) >= 0); + sp_2048_cond_sub_32(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* WOLFSSL_SP_SMALL */ + +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 2048 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_2048_mont_norm_64(sp_digit* r, const sp_digit* m) +{ + XMEMSET(r, 0, sizeof(sp_digit) * 64); + + /* r = 2^n mod m */ + sp_2048_sub_in_place_64(r, m); +} + +#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */ +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + sp_digit c = 0; + +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov r9, #0\n\t" + "mov r8, #0\n\t" + "1:\n\t" + "subs %[c], r9, %[c]\n\t" + "ldr r4, [%[a], r8]\n\t" + "ldr r5, [%[b], r8]\n\t" + "and r5, r5, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbc %[c], r9, r9\n\t" + "str r4, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, #256\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + ); +#else + __asm__ __volatile__ ( + + "mov r9, #0\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r6, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "subs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #0]\n\t" + "str r6, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r7, [%[b], #12]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r6, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #16]\n\t" + "str r6, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r6, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r7, [%[b], #28]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r6, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r7, [%[b], #36]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #32]\n\t" + "str r6, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r6, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r7, [%[b], #44]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #40]\n\t" + "str r6, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r6, [%[a], #52]\n\t" + "ldr r5, [%[b], #48]\n\t" + "ldr r7, [%[b], #52]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #48]\n\t" + "str r6, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r6, [%[a], #60]\n\t" + "ldr r5, [%[b], #56]\n\t" + "ldr r7, [%[b], #60]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #56]\n\t" + "str r6, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r6, [%[a], #68]\n\t" + "ldr r5, [%[b], #64]\n\t" + "ldr r7, [%[b], #68]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #64]\n\t" + "str r6, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r6, [%[a], #76]\n\t" + "ldr r5, [%[b], #72]\n\t" + "ldr r7, [%[b], #76]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #72]\n\t" + "str r6, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r6, [%[a], #84]\n\t" + "ldr r5, [%[b], #80]\n\t" + "ldr r7, [%[b], #84]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #80]\n\t" + "str r6, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r6, [%[a], #92]\n\t" + "ldr r5, [%[b], #88]\n\t" + "ldr r7, [%[b], #92]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #88]\n\t" + "str r6, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r6, [%[a], #100]\n\t" + "ldr r5, [%[b], #96]\n\t" + "ldr r7, [%[b], #100]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #96]\n\t" + "str r6, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r6, [%[a], #108]\n\t" + "ldr r5, [%[b], #104]\n\t" + "ldr r7, [%[b], #108]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #104]\n\t" + "str r6, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r6, [%[a], #116]\n\t" + "ldr r5, [%[b], #112]\n\t" + "ldr r7, [%[b], #116]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #112]\n\t" + "str r6, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r6, [%[a], #124]\n\t" + "ldr r5, [%[b], #120]\n\t" + "ldr r7, [%[b], #124]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #120]\n\t" + "str r6, [%[r], #124]\n\t" + "ldr r4, [%[a], #128]\n\t" + "ldr r6, [%[a], #132]\n\t" + "ldr r5, [%[b], #128]\n\t" + "ldr r7, [%[b], #132]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #128]\n\t" + "str r6, [%[r], #132]\n\t" + "ldr r4, [%[a], #136]\n\t" + "ldr r6, [%[a], #140]\n\t" + "ldr r5, [%[b], #136]\n\t" + "ldr r7, [%[b], #140]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #136]\n\t" + "str r6, [%[r], #140]\n\t" + "ldr r4, [%[a], #144]\n\t" + "ldr r6, [%[a], #148]\n\t" + "ldr r5, [%[b], #144]\n\t" + "ldr r7, [%[b], #148]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #144]\n\t" + "str r6, [%[r], #148]\n\t" + "ldr r4, [%[a], #152]\n\t" + "ldr r6, [%[a], #156]\n\t" + "ldr r5, [%[b], #152]\n\t" + "ldr r7, [%[b], #156]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #152]\n\t" + "str r6, [%[r], #156]\n\t" + "ldr r4, [%[a], #160]\n\t" + "ldr r6, [%[a], #164]\n\t" + "ldr r5, [%[b], #160]\n\t" + "ldr r7, [%[b], #164]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #160]\n\t" + "str r6, [%[r], #164]\n\t" + "ldr r4, [%[a], #168]\n\t" + "ldr r6, [%[a], #172]\n\t" + "ldr r5, [%[b], #168]\n\t" + "ldr r7, [%[b], #172]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #168]\n\t" + "str r6, [%[r], #172]\n\t" + "ldr r4, [%[a], #176]\n\t" + "ldr r6, [%[a], #180]\n\t" + "ldr r5, [%[b], #176]\n\t" + "ldr r7, [%[b], #180]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #176]\n\t" + "str r6, [%[r], #180]\n\t" + "ldr r4, [%[a], #184]\n\t" + "ldr r6, [%[a], #188]\n\t" + "ldr r5, [%[b], #184]\n\t" + "ldr r7, [%[b], #188]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #184]\n\t" + "str r6, [%[r], #188]\n\t" + "ldr r4, [%[a], #192]\n\t" + "ldr r6, [%[a], #196]\n\t" + "ldr r5, [%[b], #192]\n\t" + "ldr r7, [%[b], #196]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #192]\n\t" + "str r6, [%[r], #196]\n\t" + "ldr r4, [%[a], #200]\n\t" + "ldr r6, [%[a], #204]\n\t" + "ldr r5, [%[b], #200]\n\t" + "ldr r7, [%[b], #204]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #200]\n\t" + "str r6, [%[r], #204]\n\t" + "ldr r4, [%[a], #208]\n\t" + "ldr r6, [%[a], #212]\n\t" + "ldr r5, [%[b], #208]\n\t" + "ldr r7, [%[b], #212]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #208]\n\t" + "str r6, [%[r], #212]\n\t" + "ldr r4, [%[a], #216]\n\t" + "ldr r6, [%[a], #220]\n\t" + "ldr r5, [%[b], #216]\n\t" + "ldr r7, [%[b], #220]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #216]\n\t" + "str r6, [%[r], #220]\n\t" + "ldr r4, [%[a], #224]\n\t" + "ldr r6, [%[a], #228]\n\t" + "ldr r5, [%[b], #224]\n\t" + "ldr r7, [%[b], #228]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #224]\n\t" + "str r6, [%[r], #228]\n\t" + "ldr r4, [%[a], #232]\n\t" + "ldr r6, [%[a], #236]\n\t" + "ldr r5, [%[b], #232]\n\t" + "ldr r7, [%[b], #236]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #232]\n\t" + "str r6, [%[r], #236]\n\t" + "ldr r4, [%[a], #240]\n\t" + "ldr r6, [%[a], #244]\n\t" + "ldr r5, [%[b], #240]\n\t" + "ldr r7, [%[b], #244]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #240]\n\t" + "str r6, [%[r], #244]\n\t" + "ldr r4, [%[a], #248]\n\t" + "ldr r6, [%[a], #252]\n\t" + "ldr r5, [%[b], #248]\n\t" + "ldr r7, [%[b], #252]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #248]\n\t" + "str r6, [%[r], #252]\n\t" + "sbc %[c], r9, r9\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + ); +#endif /* WOLFSSL_SP_SMALL */ + + return c; +} + +/* Reduce the number back to 2048 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_digit ca = 0; + + __asm__ __volatile__ ( + "# i = 0\n\t" + "mov r12, #0\n\t" + "ldr r10, [%[a], #0]\n\t" + "ldr r14, [%[a], #4]\n\t" + "\n1:\n\t" + "# mu = a[i] * mp\n\t" + "mul r8, %[mp], r10\n\t" + "# a[i+0] += m[0] * mu\n\t" + "ldr r7, [%[m], #0]\n\t" + "ldr r9, [%[a], #0]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" + "# a[i+1] += m[1] * mu\n\t" + "ldr r7, [%[m], #4]\n\t" + "ldr r9, [%[a], #4]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r10, r14, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r10, r10, r5\n\t" + "adc r4, r4, #0\n\t" + "# a[i+2] += m[2] * mu\n\t" + "ldr r7, [%[m], #8]\n\t" + "ldr r14, [%[a], #8]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r14, r14, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r14, r14, r4\n\t" + "adc r5, r5, #0\n\t" + "# a[i+3] += m[3] * mu\n\t" + "ldr r7, [%[m], #12]\n\t" + "ldr r9, [%[a], #12]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #12]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+4] += m[4] * mu\n\t" + "ldr r7, [%[m], #16]\n\t" + "ldr r9, [%[a], #16]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+5] += m[5] * mu\n\t" + "ldr r7, [%[m], #20]\n\t" + "ldr r9, [%[a], #20]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #20]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+6] += m[6] * mu\n\t" + "ldr r7, [%[m], #24]\n\t" + "ldr r9, [%[a], #24]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+7] += m[7] * mu\n\t" + "ldr r7, [%[m], #28]\n\t" + "ldr r9, [%[a], #28]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #28]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+8] += m[8] * mu\n\t" + "ldr r7, [%[m], #32]\n\t" + "ldr r9, [%[a], #32]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #32]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+9] += m[9] * mu\n\t" + "ldr r7, [%[m], #36]\n\t" + "ldr r9, [%[a], #36]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #36]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+10] += m[10] * mu\n\t" + "ldr r7, [%[m], #40]\n\t" + "ldr r9, [%[a], #40]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #40]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+11] += m[11] * mu\n\t" + "ldr r7, [%[m], #44]\n\t" + "ldr r9, [%[a], #44]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #44]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+12] += m[12] * mu\n\t" + "ldr r7, [%[m], #48]\n\t" + "ldr r9, [%[a], #48]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #48]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+13] += m[13] * mu\n\t" + "ldr r7, [%[m], #52]\n\t" + "ldr r9, [%[a], #52]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #52]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+14] += m[14] * mu\n\t" + "ldr r7, [%[m], #56]\n\t" + "ldr r9, [%[a], #56]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #56]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+15] += m[15] * mu\n\t" + "ldr r7, [%[m], #60]\n\t" + "ldr r9, [%[a], #60]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #60]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+16] += m[16] * mu\n\t" + "ldr r7, [%[m], #64]\n\t" + "ldr r9, [%[a], #64]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #64]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+17] += m[17] * mu\n\t" + "ldr r7, [%[m], #68]\n\t" + "ldr r9, [%[a], #68]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #68]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+18] += m[18] * mu\n\t" + "ldr r7, [%[m], #72]\n\t" + "ldr r9, [%[a], #72]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #72]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+19] += m[19] * mu\n\t" + "ldr r7, [%[m], #76]\n\t" + "ldr r9, [%[a], #76]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #76]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+20] += m[20] * mu\n\t" + "ldr r7, [%[m], #80]\n\t" + "ldr r9, [%[a], #80]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #80]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+21] += m[21] * mu\n\t" + "ldr r7, [%[m], #84]\n\t" + "ldr r9, [%[a], #84]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #84]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+22] += m[22] * mu\n\t" + "ldr r7, [%[m], #88]\n\t" + "ldr r9, [%[a], #88]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #88]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+23] += m[23] * mu\n\t" + "ldr r7, [%[m], #92]\n\t" + "ldr r9, [%[a], #92]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #92]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+24] += m[24] * mu\n\t" + "ldr r7, [%[m], #96]\n\t" + "ldr r9, [%[a], #96]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #96]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+25] += m[25] * mu\n\t" + "ldr r7, [%[m], #100]\n\t" + "ldr r9, [%[a], #100]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #100]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+26] += m[26] * mu\n\t" + "ldr r7, [%[m], #104]\n\t" + "ldr r9, [%[a], #104]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #104]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+27] += m[27] * mu\n\t" + "ldr r7, [%[m], #108]\n\t" + "ldr r9, [%[a], #108]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #108]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+28] += m[28] * mu\n\t" + "ldr r7, [%[m], #112]\n\t" + "ldr r9, [%[a], #112]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #112]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+29] += m[29] * mu\n\t" + "ldr r7, [%[m], #116]\n\t" + "ldr r9, [%[a], #116]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #116]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+30] += m[30] * mu\n\t" + "ldr r7, [%[m], #120]\n\t" + "ldr r9, [%[a], #120]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #120]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+31] += m[31] * mu\n\t" + "ldr r7, [%[m], #124]\n\t" + "ldr r9, [%[a], #124]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #124]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+32] += m[32] * mu\n\t" + "ldr r7, [%[m], #128]\n\t" + "ldr r9, [%[a], #128]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #128]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+33] += m[33] * mu\n\t" + "ldr r7, [%[m], #132]\n\t" + "ldr r9, [%[a], #132]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #132]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+34] += m[34] * mu\n\t" + "ldr r7, [%[m], #136]\n\t" + "ldr r9, [%[a], #136]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #136]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+35] += m[35] * mu\n\t" + "ldr r7, [%[m], #140]\n\t" + "ldr r9, [%[a], #140]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #140]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+36] += m[36] * mu\n\t" + "ldr r7, [%[m], #144]\n\t" + "ldr r9, [%[a], #144]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #144]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+37] += m[37] * mu\n\t" + "ldr r7, [%[m], #148]\n\t" + "ldr r9, [%[a], #148]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #148]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+38] += m[38] * mu\n\t" + "ldr r7, [%[m], #152]\n\t" + "ldr r9, [%[a], #152]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #152]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+39] += m[39] * mu\n\t" + "ldr r7, [%[m], #156]\n\t" + "ldr r9, [%[a], #156]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #156]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+40] += m[40] * mu\n\t" + "ldr r7, [%[m], #160]\n\t" + "ldr r9, [%[a], #160]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #160]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+41] += m[41] * mu\n\t" + "ldr r7, [%[m], #164]\n\t" + "ldr r9, [%[a], #164]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #164]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+42] += m[42] * mu\n\t" + "ldr r7, [%[m], #168]\n\t" + "ldr r9, [%[a], #168]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #168]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+43] += m[43] * mu\n\t" + "ldr r7, [%[m], #172]\n\t" + "ldr r9, [%[a], #172]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #172]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+44] += m[44] * mu\n\t" + "ldr r7, [%[m], #176]\n\t" + "ldr r9, [%[a], #176]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #176]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+45] += m[45] * mu\n\t" + "ldr r7, [%[m], #180]\n\t" + "ldr r9, [%[a], #180]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #180]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+46] += m[46] * mu\n\t" + "ldr r7, [%[m], #184]\n\t" + "ldr r9, [%[a], #184]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #184]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+47] += m[47] * mu\n\t" + "ldr r7, [%[m], #188]\n\t" + "ldr r9, [%[a], #188]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #188]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+48] += m[48] * mu\n\t" + "ldr r7, [%[m], #192]\n\t" + "ldr r9, [%[a], #192]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #192]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+49] += m[49] * mu\n\t" + "ldr r7, [%[m], #196]\n\t" + "ldr r9, [%[a], #196]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #196]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+50] += m[50] * mu\n\t" + "ldr r7, [%[m], #200]\n\t" + "ldr r9, [%[a], #200]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #200]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+51] += m[51] * mu\n\t" + "ldr r7, [%[m], #204]\n\t" + "ldr r9, [%[a], #204]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #204]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+52] += m[52] * mu\n\t" + "ldr r7, [%[m], #208]\n\t" + "ldr r9, [%[a], #208]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #208]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+53] += m[53] * mu\n\t" + "ldr r7, [%[m], #212]\n\t" + "ldr r9, [%[a], #212]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #212]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+54] += m[54] * mu\n\t" + "ldr r7, [%[m], #216]\n\t" + "ldr r9, [%[a], #216]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #216]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+55] += m[55] * mu\n\t" + "ldr r7, [%[m], #220]\n\t" + "ldr r9, [%[a], #220]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #220]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+56] += m[56] * mu\n\t" + "ldr r7, [%[m], #224]\n\t" + "ldr r9, [%[a], #224]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #224]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+57] += m[57] * mu\n\t" + "ldr r7, [%[m], #228]\n\t" + "ldr r9, [%[a], #228]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #228]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+58] += m[58] * mu\n\t" + "ldr r7, [%[m], #232]\n\t" + "ldr r9, [%[a], #232]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #232]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+59] += m[59] * mu\n\t" + "ldr r7, [%[m], #236]\n\t" + "ldr r9, [%[a], #236]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #236]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+60] += m[60] * mu\n\t" + "ldr r7, [%[m], #240]\n\t" + "ldr r9, [%[a], #240]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #240]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+61] += m[61] * mu\n\t" + "ldr r7, [%[m], #244]\n\t" + "ldr r9, [%[a], #244]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #244]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+62] += m[62] * mu\n\t" + "ldr r7, [%[m], #248]\n\t" + "ldr r9, [%[a], #248]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #248]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+63] += m[63] * mu\n\t" + "ldr r7, [%[m], #252]\n\t" + "ldr r9, [%[a], #252]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r5, r5, r6\n\t" + "adcs r7, r7, %[ca]\n\t" + "mov %[ca], #0\n\t" + "adc %[ca], %[ca], %[ca]\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #252]\n\t" + "ldr r9, [%[a], #256]\n\t" + "adcs r9, r9, r7\n\t" + "str r9, [%[a], #256]\n\t" + "adc %[ca], %[ca], #0\n\t" + "# i += 1\n\t" + "add %[a], %[a], #4\n\t" + "add r12, r12, #4\n\t" + "cmp r12, #256\n\t" + "blt 1b\n\t" + "str r10, [%[a], #0]\n\t" + "str r14, [%[a], #4]\n\t" + : [ca] "+r" (ca), [a] "+r" (a) + : [m] "r" (m), [mp] "r" (mp) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); + + sp_2048_cond_sub_64(a - 64, a, m, (sp_digit)0 - ca); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_2048_mont_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_2048_mul_64(r, a, b); + sp_2048_mont_reduce_64(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_2048_mont_sqr_64(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_2048_sqr_64(r, a); + sp_2048_mont_reduce_64(r, m, mp); +} + +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, sp_digit div) +{ + sp_digit r = 0; + + __asm__ __volatile__ ( + "lsr r5, %[div], #1\n\t" + "add r5, r5, #1\n\t" + "mov r6, %[d0]\n\t" + "mov r7, %[d1]\n\t" + "# Do top 32\n\t" + "subs r8, r5, r7\n\t" + "sbc r8, r8, r8\n\t" + "add %[r], %[r], %[r]\n\t" + "sub %[r], %[r], r8\n\t" + "and r8, r8, r5\n\t" + "subs r7, r7, r8\n\t" + "# Next 30 bits\n\t" + "mov r4, #29\n\t" + "1:\n\t" + "movs r6, r6, lsl #1\n\t" + "adc r7, r7, r7\n\t" + "subs r8, r5, r7\n\t" + "sbc r8, r8, r8\n\t" + "add %[r], %[r], %[r]\n\t" + "sub %[r], %[r], r8\n\t" + "and r8, r8, r5\n\t" + "subs r7, r7, r8\n\t" + "subs r4, r4, #1\n\t" + "bpl 1b\n\t" + "add %[r], %[r], %[r]\n\t" + "add %[r], %[r], #1\n\t" + "umull r4, r5, %[r], %[div]\n\t" + "subs r4, %[d0], r4\n\t" + "sbc r5, %[d1], r5\n\t" + "add %[r], %[r], r5\n\t" + "umull r4, r5, %[r], %[div]\n\t" + "subs r4, %[d0], r4\n\t" + "sbc r5, %[d1], r5\n\t" + "add %[r], %[r], r5\n\t" + "subs r8, %[div], r4\n\t" + "sbc r8, r8, r8\n\t" + "sub %[r], %[r], r8\n\t" + : [r] "+r" (r) + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) + : "r4", "r5", "r6", "r7", "r8" + ); + return r; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_2048_mask_64(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<64; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 64; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static int32_t sp_2048_cmp_64(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = -1; + sp_digit one = 1; + + +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov r7, #0\n\t" + "mov r3, #-1\n\t" + "mov r6, #252\n\t" + "1:\n\t" + "ldr r4, [%[a], r6]\n\t" + "ldr r5, [%[b], r6]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "subs r6, r6, #4\n\t" + "bcs 1b\n\t" + "eor %[r], %[r], r3\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [one] "r" (one) + : "r3", "r4", "r5", "r6", "r7" + ); +#else + __asm__ __volatile__ ( + "mov r7, #0\n\t" + "mov r3, #-1\n\t" + "ldr r4, [%[a], #252]\n\t" + "ldr r5, [%[b], #252]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #248]\n\t" + "ldr r5, [%[b], #248]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #244]\n\t" + "ldr r5, [%[b], #244]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #240]\n\t" + "ldr r5, [%[b], #240]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #236]\n\t" + "ldr r5, [%[b], #236]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #232]\n\t" + "ldr r5, [%[b], #232]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #228]\n\t" + "ldr r5, [%[b], #228]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #224]\n\t" + "ldr r5, [%[b], #224]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #220]\n\t" + "ldr r5, [%[b], #220]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #216]\n\t" + "ldr r5, [%[b], #216]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #212]\n\t" + "ldr r5, [%[b], #212]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #208]\n\t" + "ldr r5, [%[b], #208]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #204]\n\t" + "ldr r5, [%[b], #204]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #200]\n\t" + "ldr r5, [%[b], #200]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #196]\n\t" + "ldr r5, [%[b], #196]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #192]\n\t" + "ldr r5, [%[b], #192]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #188]\n\t" + "ldr r5, [%[b], #188]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #184]\n\t" + "ldr r5, [%[b], #184]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #180]\n\t" + "ldr r5, [%[b], #180]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #176]\n\t" + "ldr r5, [%[b], #176]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #172]\n\t" + "ldr r5, [%[b], #172]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #168]\n\t" + "ldr r5, [%[b], #168]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #164]\n\t" + "ldr r5, [%[b], #164]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #160]\n\t" + "ldr r5, [%[b], #160]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #156]\n\t" + "ldr r5, [%[b], #156]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #152]\n\t" + "ldr r5, [%[b], #152]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #148]\n\t" + "ldr r5, [%[b], #148]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #144]\n\t" + "ldr r5, [%[b], #144]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #140]\n\t" + "ldr r5, [%[b], #140]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #136]\n\t" + "ldr r5, [%[b], #136]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #132]\n\t" + "ldr r5, [%[b], #132]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #128]\n\t" + "ldr r5, [%[b], #128]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #124]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[b], #120]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #116]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[b], #112]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #108]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[b], #104]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #100]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[b], #96]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #92]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[b], #88]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #84]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[b], #80]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #76]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[b], #72]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #68]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[b], #64]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #60]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[b], #56]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #52]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[b], #48]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "eor %[r], %[r], r3\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [one] "r" (one) + : "r3", "r4", "r5", "r6", "r7" + ); +#endif + + return r; +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_2048_div_64(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[128], t2[65]; + sp_digit div, r1; + int i; + + (void)m; + + + div = d[63]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 64); + for (i=63; i>=0; i--) { + r1 = div_2048_word_64(t1[64 + i], t1[64 + i - 1], div); + + sp_2048_mul_d_64(t2, d, r1); + t1[64 + i] += sp_2048_sub_in_place_64(&t1[i], t2); + t1[64 + i] -= t2[64]; + sp_2048_mask_64(t2, d, t1[64 + i]); + t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], t2); + sp_2048_mask_64(t2, d, t1[64 + i]); + t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], t2); + } + + r1 = sp_2048_cmp_64(t1, d) >= 0; + sp_2048_cond_sub_64(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_2048_mod_64(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_2048_div_64(a, m, NULL, r); +} + +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_2048_div_64_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[128], t2[65]; + sp_digit div, r1; + int i; + + (void)m; + + + div = d[63]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 64); + for (i=63; i>=0; i--) { + r1 = div_2048_word_64(t1[64 + i], t1[64 + i - 1], div); + + sp_2048_mul_d_64(t2, d, r1); + t1[64 + i] += sp_2048_sub_in_place_64(&t1[i], t2); + t1[64 + i] -= t2[64]; + if (t1[64 + i] != 0) { + t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], d); + if (t1[64 + i] != 0) + t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], d); + } + } + + r1 = sp_2048_cmp_64(t1, d) >= 0; + sp_2048_cond_sub_64(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_2048_mod_64_cond(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_2048_div_64_cond(a, m, NULL, r); +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \ + defined(WOLFSSL_HAVE_SP_DH) +#ifdef WOLFSSL_SP_SMALL +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[16][128]; +#else + sp_digit* t[16]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 128, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<16; i++) { + t[i] = td + i * 128; + } +#endif + norm = t[0]; + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_64(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 64U); + if (reduceA != 0) { + err = sp_2048_mod_64(t[1] + 64, a, m); + if (err == MP_OKAY) { + err = sp_2048_mod_64(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64); + err = sp_2048_mod_64(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_2048_mont_sqr_64(t[ 2], t[ 1], m, mp); + sp_2048_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp); + sp_2048_mont_sqr_64(t[ 4], t[ 2], m, mp); + sp_2048_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp); + sp_2048_mont_sqr_64(t[ 6], t[ 3], m, mp); + sp_2048_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp); + sp_2048_mont_sqr_64(t[ 8], t[ 4], m, mp); + sp_2048_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp); + sp_2048_mont_sqr_64(t[10], t[ 5], m, mp); + sp_2048_mont_mul_64(t[11], t[ 6], t[ 5], m, mp); + sp_2048_mont_sqr_64(t[12], t[ 6], m, mp); + sp_2048_mont_mul_64(t[13], t[ 7], t[ 6], m, mp); + sp_2048_mont_sqr_64(t[14], t[ 7], m, mp); + sp_2048_mont_mul_64(t[15], t[ 8], t[ 7], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 4; + if (c == 32) { + c = 28; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 64); + for (; i>=0 || c>=4; ) { + if (c == 0) { + n = e[i--]; + y = n >> 28; + n <<= 4; + c = 28; + } + else if (c < 4) { + y = n >> 28; + n = e[i--]; + c = 4 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + } + + sp_2048_mont_sqr_64(r, r, m, mp); + sp_2048_mont_sqr_64(r, r, m, mp); + sp_2048_mont_sqr_64(r, r, m, mp); + sp_2048_mont_sqr_64(r, r, m, mp); + + sp_2048_mont_mul_64(r, r, t[y], m, mp); + } + + XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U); + sp_2048_mont_reduce_64(r, m, mp); + + mask = 0 - (sp_2048_cmp_64(r, m) >= 0); + sp_2048_cond_sub_64(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#else +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][128]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 128, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) { + t[i] = td + i * 128; + } +#endif + norm = t[0]; + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_64(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 64U); + if (reduceA != 0) { + err = sp_2048_mod_64(t[1] + 64, a, m); + if (err == MP_OKAY) { + err = sp_2048_mod_64(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64); + err = sp_2048_mod_64(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_2048_mont_sqr_64(t[ 2], t[ 1], m, mp); + sp_2048_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp); + sp_2048_mont_sqr_64(t[ 4], t[ 2], m, mp); + sp_2048_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp); + sp_2048_mont_sqr_64(t[ 6], t[ 3], m, mp); + sp_2048_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp); + sp_2048_mont_sqr_64(t[ 8], t[ 4], m, mp); + sp_2048_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp); + sp_2048_mont_sqr_64(t[10], t[ 5], m, mp); + sp_2048_mont_mul_64(t[11], t[ 6], t[ 5], m, mp); + sp_2048_mont_sqr_64(t[12], t[ 6], m, mp); + sp_2048_mont_mul_64(t[13], t[ 7], t[ 6], m, mp); + sp_2048_mont_sqr_64(t[14], t[ 7], m, mp); + sp_2048_mont_mul_64(t[15], t[ 8], t[ 7], m, mp); + sp_2048_mont_sqr_64(t[16], t[ 8], m, mp); + sp_2048_mont_mul_64(t[17], t[ 9], t[ 8], m, mp); + sp_2048_mont_sqr_64(t[18], t[ 9], m, mp); + sp_2048_mont_mul_64(t[19], t[10], t[ 9], m, mp); + sp_2048_mont_sqr_64(t[20], t[10], m, mp); + sp_2048_mont_mul_64(t[21], t[11], t[10], m, mp); + sp_2048_mont_sqr_64(t[22], t[11], m, mp); + sp_2048_mont_mul_64(t[23], t[12], t[11], m, mp); + sp_2048_mont_sqr_64(t[24], t[12], m, mp); + sp_2048_mont_mul_64(t[25], t[13], t[12], m, mp); + sp_2048_mont_sqr_64(t[26], t[13], m, mp); + sp_2048_mont_mul_64(t[27], t[14], t[13], m, mp); + sp_2048_mont_sqr_64(t[28], t[14], m, mp); + sp_2048_mont_mul_64(t[29], t[15], t[14], m, mp); + sp_2048_mont_sqr_64(t[30], t[15], m, mp); + sp_2048_mont_mul_64(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 5; + if (c == 32) { + c = 27; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 64); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 27; + n <<= 5; + c = 27; + } + else if (c < 5) { + y = n >> 27; + n = e[i--]; + c = 5 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_2048_mont_sqr_64(r, r, m, mp); + sp_2048_mont_sqr_64(r, r, m, mp); + sp_2048_mont_sqr_64(r, r, m, mp); + sp_2048_mont_sqr_64(r, r, m, mp); + sp_2048_mont_sqr_64(r, r, m, mp); + + sp_2048_mont_mul_64(r, r, t[y], m, mp); + } + + XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U); + sp_2048_mont_reduce_64(r, m, mp); + + mask = 0 - (sp_2048_cmp_64(r, m) >= 0); + sp_2048_cond_sub_64(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* WOLFSSL_SP_SMALL */ +#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */ + +#ifdef WOLFSSL_HAVE_SP_RSA +/* RSA public key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * em Public exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 256 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPublic_2048(const byte* in, word32 inLen, mp_int* em, mp_int* mm, + byte* out, word32* outLen) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit a[128], m[64], r[128]; +#else + sp_digit* d = NULL; + sp_digit* a; + sp_digit* m; + sp_digit* r; +#endif + sp_digit *ah; + sp_digit e[1]; + int err = MP_OKAY; + + if (*outLen < 256) + err = MP_TO_E; + if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 256 || + mp_count_bits(mm) != 2048)) + err = MP_READ_E; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + a = d; + r = a + 64 * 2; + m = r + 64 * 2; + } +#endif + + if (err == MP_OKAY) { + ah = a + 64; + + sp_2048_from_bin(ah, 64, in, inLen); +#if DIGIT_BIT >= 32 + e[0] = em->dp[0]; +#else + e[0] = em->dp[0]; + if (em->used > 1) { + e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + } +#endif + if (e[0] == 0) { + err = MP_EXPTMOD_E; + } + } + if (err == MP_OKAY) { + sp_2048_from_mp(m, 64, mm); + + if (e[0] == 0x3) { + if (err == MP_OKAY) { + sp_2048_sqr_64(r, ah); + err = sp_2048_mod_64_cond(r, r, m); + } + if (err == MP_OKAY) { + sp_2048_mul_64(r, ah, r); + err = sp_2048_mod_64_cond(r, r, m); + } + } + else { + int i; + sp_digit mp; + + sp_2048_mont_setup(m, &mp); + + /* Convert to Montgomery form. */ + XMEMSET(a, 0, sizeof(sp_digit) * 64); + err = sp_2048_mod_64_cond(a, a, m); + + if (err == MP_OKAY) { + for (i = 31; i >= 0; i--) { + if (e[0] >> i) { + break; + } + } + + XMEMCPY(r, a, sizeof(sp_digit) * 64); + for (i--; i>=0; i--) { + sp_2048_mont_sqr_64(r, r, m, mp); + if (((e[0] >> i) & 1) == 1) { + sp_2048_mont_mul_64(r, r, a, m, mp); + } + } + XMEMSET(&r[64], 0, sizeof(sp_digit) * 64); + sp_2048_mont_reduce_64(r, m, mp); + + for (i = 63; i > 0; i--) { + if (r[i] != m[i]) { + break; + } + } + if (r[i] >= m[i]) { + sp_2048_sub_in_place_64(r, m); + } + } + } + } + + if (err == MP_OKAY) { + sp_2048_to_bin(r, out); + *outLen = 256; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } +#endif + + return err; +} + +#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) + sp_digit* a; + sp_digit* d = NULL; + sp_digit* m; + sp_digit* r; + int err = MP_OKAY; + + (void)pm; + (void)qm; + (void)dpm; + (void)dqm; + (void)qim; + + if (*outLen < 256U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(dm) > 2048) { + err = MP_READ_E; + } + if (inLen > 256) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 2048) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 4, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) { + err = MEMORY_E; + } + } + if (err == MP_OKAY) { + a = d + 64; + m = a + 128; + r = a; + + sp_2048_from_bin(a, 64, in, inLen); + sp_2048_from_mp(d, 64, dm); + sp_2048_from_mp(m, 64, mm); + err = sp_2048_mod_exp_64(r, a, d, 2048, m, 0); + } + if (err == MP_OKAY) { + sp_2048_to_bin(r, out); + *outLen = 256; + } + + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 64); + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + sp_digit c = 0; + +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov r9, #0\n\t" + "mov r8, #0\n\t" + "1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldr r4, [%[a], r8]\n\t" + "ldr r5, [%[b], r8]\n\t" + "and r5, r5, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adc %[c], r9, r9\n\t" + "str r4, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, #128\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + ); +#else + __asm__ __volatile__ ( + + "mov r9, #0\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r6, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adds r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #0]\n\t" + "str r6, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r7, [%[b], #12]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r6, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #16]\n\t" + "str r6, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r6, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r7, [%[b], #28]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r6, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r7, [%[b], #36]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #32]\n\t" + "str r6, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r6, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r7, [%[b], #44]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #40]\n\t" + "str r6, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r6, [%[a], #52]\n\t" + "ldr r5, [%[b], #48]\n\t" + "ldr r7, [%[b], #52]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #48]\n\t" + "str r6, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r6, [%[a], #60]\n\t" + "ldr r5, [%[b], #56]\n\t" + "ldr r7, [%[b], #60]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #56]\n\t" + "str r6, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r6, [%[a], #68]\n\t" + "ldr r5, [%[b], #64]\n\t" + "ldr r7, [%[b], #68]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #64]\n\t" + "str r6, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r6, [%[a], #76]\n\t" + "ldr r5, [%[b], #72]\n\t" + "ldr r7, [%[b], #76]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #72]\n\t" + "str r6, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r6, [%[a], #84]\n\t" + "ldr r5, [%[b], #80]\n\t" + "ldr r7, [%[b], #84]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #80]\n\t" + "str r6, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r6, [%[a], #92]\n\t" + "ldr r5, [%[b], #88]\n\t" + "ldr r7, [%[b], #92]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #88]\n\t" + "str r6, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r6, [%[a], #100]\n\t" + "ldr r5, [%[b], #96]\n\t" + "ldr r7, [%[b], #100]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #96]\n\t" + "str r6, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r6, [%[a], #108]\n\t" + "ldr r5, [%[b], #104]\n\t" + "ldr r7, [%[b], #108]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #104]\n\t" + "str r6, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r6, [%[a], #116]\n\t" + "ldr r5, [%[b], #112]\n\t" + "ldr r7, [%[b], #116]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #112]\n\t" + "str r6, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r6, [%[a], #124]\n\t" + "ldr r5, [%[b], #120]\n\t" + "ldr r7, [%[b], #124]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #120]\n\t" + "str r6, [%[r], #124]\n\t" + "adc %[c], r9, r9\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + ); +#endif /* WOLFSSL_SP_SMALL */ + + return c; +} + +/* RSA private key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * pm First prime. + * qm Second prime. + * dpm First prime's CRT exponent. + * dqm Second prime's CRT exponent. + * qim Inverse of second prime mod p. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 256 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, + mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm, + byte* out, word32* outLen) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit a[64 * 2]; + sp_digit p[32], q[32], dp[32]; + sp_digit tmpa[64], tmpb[64]; +#else + sp_digit* t = NULL; + sp_digit* a; + sp_digit* p; + sp_digit* q; + sp_digit* dp; + sp_digit* tmpa; + sp_digit* tmpb; +#endif + sp_digit* r; + sp_digit* qi; + sp_digit* dq; + sp_digit c; + int err = MP_OKAY; + + (void)dm; + (void)mm; + + if (*outLen < 256) + err = MP_TO_E; + if (err == MP_OKAY && (inLen > 256 || mp_count_bits(mm) != 2048)) + err = MP_READ_E; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 11, NULL, + DYNAMIC_TYPE_RSA); + if (t == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + a = t; + p = a + 64 * 2; + q = p + 32; + qi = dq = dp = q + 32; + tmpa = qi + 32; + tmpb = tmpa + 64; + + r = t + 64; + } +#else +#endif + + if (err == MP_OKAY) { +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + r = a; + qi = dq = dp; +#endif + sp_2048_from_bin(a, 64, in, inLen); + sp_2048_from_mp(p, 32, pm); + sp_2048_from_mp(q, 32, qm); + sp_2048_from_mp(dp, 32, dpm); + + err = sp_2048_mod_exp_32(tmpa, a, dp, 1024, p, 1); + } + if (err == MP_OKAY) { + sp_2048_from_mp(dq, 32, dqm); + err = sp_2048_mod_exp_32(tmpb, a, dq, 1024, q, 1); + } + + if (err == MP_OKAY) { + c = sp_2048_sub_in_place_32(tmpa, tmpb); + c += sp_2048_cond_add_32(tmpa, tmpa, p, c); + sp_2048_cond_add_32(tmpa, tmpa, p, c); + + sp_2048_from_mp(qi, 32, qim); + sp_2048_mul_32(tmpa, tmpa, qi); + err = sp_2048_mod_32(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { + sp_2048_mul_32(tmpa, q, tmpa); + XMEMSET(&tmpb[32], 0, sizeof(sp_digit) * 32); + sp_2048_add_64(r, tmpb, tmpa); + + sp_2048_to_bin(r, out); + *outLen = 256; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_digit) * 32 * 11); + XFREE(t, NULL, DYNAMIC_TYPE_RSA); + } +#else + XMEMSET(tmpa, 0, sizeof(tmpa)); + XMEMSET(tmpb, 0, sizeof(tmpb)); + XMEMSET(p, 0, sizeof(p)); + XMEMSET(q, 0, sizeof(q)); + XMEMSET(dp, 0, sizeof(dp)); +#endif + + return err; +} +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ +#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */ +#endif /* WOLFSSL_HAVE_SP_RSA */ +#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY)) +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_2048_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (2048 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 32 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 64); + r->used = 64; + mp_clamp(r); +#elif DIGIT_BIT < 32 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 64; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 32) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 32 - s; + } + r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 64; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 32 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 32 - s; + } + else { + s += 32; + } + } + r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ + int err = MP_OKAY; + sp_digit b[128], e[64], m[64]; + sp_digit* r = b; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 2048) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 2048) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 2048) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_2048_from_mp(b, 64, base); + sp_2048_from_mp(e, 64, exp); + sp_2048_from_mp(m, 64, mod); + + err = sp_2048_mod_exp_64(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + err = sp_2048_to_mp(r, res); + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} + +#ifdef WOLFSSL_HAVE_SP_DH + +#ifdef HAVE_FFDHE_2048 +static void sp_2048_lshift_64(sp_digit* r, sp_digit* a, byte n) +{ + __asm__ __volatile__ ( + "mov r6, #31\n\t" + "sub r6, r6, %[n]\n\t" + "ldr r3, [%[a], #252]\n\t" + "lsr r4, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r4, r4, r6\n\t" + "ldr r2, [%[a], #248]\n\t" + "str r4, [%[r], #256]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #244]\n\t" + "str r3, [%[r], #252]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #240]\n\t" + "str r2, [%[r], #248]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #236]\n\t" + "str r4, [%[r], #244]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #232]\n\t" + "str r3, [%[r], #240]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #228]\n\t" + "str r2, [%[r], #236]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #224]\n\t" + "str r4, [%[r], #232]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #220]\n\t" + "str r3, [%[r], #228]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #216]\n\t" + "str r2, [%[r], #224]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #212]\n\t" + "str r4, [%[r], #220]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #208]\n\t" + "str r3, [%[r], #216]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #204]\n\t" + "str r2, [%[r], #212]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #200]\n\t" + "str r4, [%[r], #208]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #196]\n\t" + "str r3, [%[r], #204]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #192]\n\t" + "str r2, [%[r], #200]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #188]\n\t" + "str r4, [%[r], #196]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #184]\n\t" + "str r3, [%[r], #192]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #180]\n\t" + "str r2, [%[r], #188]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #176]\n\t" + "str r4, [%[r], #184]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #172]\n\t" + "str r3, [%[r], #180]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #168]\n\t" + "str r2, [%[r], #176]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #164]\n\t" + "str r4, [%[r], #172]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #160]\n\t" + "str r3, [%[r], #168]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #156]\n\t" + "str r2, [%[r], #164]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #152]\n\t" + "str r4, [%[r], #160]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #148]\n\t" + "str r3, [%[r], #156]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #144]\n\t" + "str r2, [%[r], #152]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #140]\n\t" + "str r4, [%[r], #148]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #136]\n\t" + "str r3, [%[r], #144]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #132]\n\t" + "str r2, [%[r], #140]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #128]\n\t" + "str r4, [%[r], #136]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #124]\n\t" + "str r3, [%[r], #132]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #120]\n\t" + "str r2, [%[r], #128]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #116]\n\t" + "str r4, [%[r], #124]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #112]\n\t" + "str r3, [%[r], #120]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #108]\n\t" + "str r2, [%[r], #116]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #104]\n\t" + "str r4, [%[r], #112]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #100]\n\t" + "str r3, [%[r], #108]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #96]\n\t" + "str r2, [%[r], #104]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #92]\n\t" + "str r4, [%[r], #100]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #88]\n\t" + "str r3, [%[r], #96]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #84]\n\t" + "str r2, [%[r], #92]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #80]\n\t" + "str r4, [%[r], #88]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #76]\n\t" + "str r3, [%[r], #84]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #72]\n\t" + "str r2, [%[r], #80]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #68]\n\t" + "str r4, [%[r], #76]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #64]\n\t" + "str r3, [%[r], #72]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #60]\n\t" + "str r2, [%[r], #68]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #56]\n\t" + "str r4, [%[r], #64]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #52]\n\t" + "str r3, [%[r], #60]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #48]\n\t" + "str r2, [%[r], #56]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #44]\n\t" + "str r4, [%[r], #52]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #40]\n\t" + "str r3, [%[r], #48]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #36]\n\t" + "str r2, [%[r], #44]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #32]\n\t" + "str r4, [%[r], #40]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #28]\n\t" + "str r3, [%[r], #36]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #24]\n\t" + "str r2, [%[r], #32]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #20]\n\t" + "str r4, [%[r], #28]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #16]\n\t" + "str r3, [%[r], #24]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #12]\n\t" + "str r2, [%[r], #20]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #8]\n\t" + "str r4, [%[r], #16]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #4]\n\t" + "str r3, [%[r], #12]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #0]\n\t" + "str r2, [%[r], #8]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "str r3, [%[r]]\n\t" + "str r4, [%[r], #4]\n\t" + : + : [r] "r" (r), [a] "r" (a), [n] "r" (n) + : "memory", "r2", "r3", "r4", "r5", "r6" + ); +} + +/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m) + * + * r A single precision number that is the result of the operation. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_2048_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits, + const sp_digit* m) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit nd[128]; + sp_digit td[65]; +#else + sp_digit* td; +#endif + sp_digit* norm; + sp_digit* tmp; + sp_digit mp = 1; + sp_digit n, o; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 193, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + norm = td; + tmp = td + 128; +#else + norm = nd; + tmp = td; +#endif + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_64(norm, m); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 5; + if (c == 32) { + c = 27; + } + y = (int)(n >> c); + n <<= 32 - c; + sp_2048_lshift_64(r, norm, y); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 27; + n <<= 5; + c = 27; + } + else if (c < 5) { + y = n >> 27; + n = e[i--]; + c = 5 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_2048_mont_sqr_64(r, r, m, mp); + sp_2048_mont_sqr_64(r, r, m, mp); + sp_2048_mont_sqr_64(r, r, m, mp); + sp_2048_mont_sqr_64(r, r, m, mp); + sp_2048_mont_sqr_64(r, r, m, mp); + + sp_2048_lshift_64(r, r, y); + sp_2048_mul_d_64(tmp, norm, r[64]); + r[64] = 0; + o = sp_2048_add_64(r, r, tmp); + sp_2048_cond_sub_64(r, r, m, (sp_digit)0 - o); + } + + XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U); + sp_2048_mont_reduce_64(r, m, mp); + + mask = 0 - (sp_2048_cmp_64(r, m) >= 0); + sp_2048_cond_sub_64(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* HAVE_FFDHE_2048 */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. + * exp Array of bytes that is the exponent. + * expLen Length of data, in bytes, in exponent. + * mod Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 256 bytes long. + * outLen Length, in bytes, of exponentiation result. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen, + mp_int* mod, byte* out, word32* outLen) +{ + int err = MP_OKAY; + sp_digit b[128], e[64], m[64]; + sp_digit* r = b; + word32 i; + + if (mp_count_bits(base) > 2048) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expLen > 256) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 2048) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_2048_from_mp(b, 64, base); + sp_2048_from_bin(e, 64, exp, expLen); + sp_2048_from_mp(m, 64, mod); + + #ifdef HAVE_FFDHE_2048 + if (base->used == 1 && base->dp[0] == 2 && m[63] == (sp_digit)-1) + err = sp_2048_mod_exp_2_64(r, e, expLen * 8, m); + else + #endif + err = sp_2048_mod_exp_64(r, b, e, expLen * 8, m, 0); + + } + + if (err == MP_OKAY) { + sp_2048_to_bin(r, out); + *outLen = 256; + for (i=0; i<256 && out[i] == 0; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} +#endif /* WOLFSSL_HAVE_SP_DH */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ + int err = MP_OKAY; + sp_digit b[64], e[32], m[32]; + sp_digit* r = b; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 1024) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 1024) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 1024) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_2048_from_mp(b, 32, base); + sp_2048_from_mp(e, 32, exp); + sp_2048_from_mp(m, 32, mod); + + err = sp_2048_mod_exp_32(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + XMEMSET(r + 32, 0, sizeof(*r) * 32U); + err = sp_2048_to_mp(r, res); + res->used = mod->used; + mp_clamp(res); + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} + +#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */ + +#endif /* !WOLFSSL_SP_NO_2048 */ + +#ifndef WOLFSSL_SP_NO_3072 +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((sp_digit)a[i]) << s); + if (s >= 24U) { + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (sp_digit)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 32 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 32 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 32U) <= (word32)DIGIT_BIT) { + s += 32U; + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 32) { + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + s = 32 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 384 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_3072_to_bin(sp_digit* r, byte* a) +{ + int i, j, s = 0, b; + + j = 3072 / 8 - 1; + a[j] = 0; + for (i=0; i<96 && j>=0; i++) { + b = 0; + /* lint allow cast of mismatch sp_digit and int */ + a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ + b += 8 - s; + if (j < 0) { + break; + } + while (b < 32) { + a[j--] = (byte)(r[i] >> b); + b += 8; + if (j < 0) { + break; + } + } + s = 8 - (b - 32); + if (j >= 0) { + a[j] = 0; + } + if (s != 0) { + j++; + } + } +} + +#ifndef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + __asm__ __volatile__ ( + "sub sp, sp, #48\n\t" + "mov r10, #0\n\t" + "# A[0] * B[0]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r3, r4, r8, r9\n\t" + "mov r5, #0\n\t" + "str r3, [sp]\n\t" + "# A[0] * B[1]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[0]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #4]\n\t" + "# A[0] * B[2]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[1]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[0]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #8]\n\t" + "# A[0] * B[3]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[2]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[1]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[0]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #12]\n\t" + "# A[0] * B[4]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[3]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[2]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[1]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[0]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #16]\n\t" + "# A[0] * B[5]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[4]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[3]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[2]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[1]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[0]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #20]\n\t" + "# A[0] * B[6]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[5]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[4]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[3]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[2]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[1]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[0]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #24]\n\t" + "# A[0] * B[7]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[6]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[5]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[4]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[3]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[2]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[1]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[0]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #28]\n\t" + "# A[0] * B[8]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[7]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[6]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[5]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[4]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[3]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[2]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[1]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[0]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #32]\n\t" + "# A[0] * B[9]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[8]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[7]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[6]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[5]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[4]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[3]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[2]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[1]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[0]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #36]\n\t" + "# A[0] * B[10]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[9]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[8]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[7]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[6]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[5]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[4]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[3]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[2]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[1]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[0]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #40]\n\t" + "# A[0] * B[11]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[10]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[9]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[8]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[7]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[6]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[5]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[4]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[3]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[2]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[1]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[0]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #44]\n\t" + "# A[1] * B[11]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[2] * B[10]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[9]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[8]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[7]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[6]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[5]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[4]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[3]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[2]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[1]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #48]\n\t" + "# A[2] * B[11]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[3] * B[10]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[9]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[8]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[7]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[6]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[5]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[4]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[3]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[2]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #52]\n\t" + "# A[3] * B[11]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[4] * B[10]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[9]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[8]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[7]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[6]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[5]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[4]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[3]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #56]\n\t" + "# A[4] * B[11]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[5] * B[10]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[9]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[8]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[7]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[6]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[5]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[4]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #60]\n\t" + "# A[5] * B[11]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[6] * B[10]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[9]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[8]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[7]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[6]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[5]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #64]\n\t" + "# A[6] * B[11]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[7] * B[10]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[9]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[8]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[7]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[6]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #68]\n\t" + "# A[7] * B[11]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[8] * B[10]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[9]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[8]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[7]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #72]\n\t" + "# A[8] * B[11]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[9] * B[10]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[9]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[8]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #76]\n\t" + "# A[9] * B[11]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[10] * B[10]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[9]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #80]\n\t" + "# A[10] * B[11]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[11] * B[10]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #84]\n\t" + "# A[11] * B[11]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "str r4, [%[r], #88]\n\t" + "str r5, [%[r], #92]\n\t" + "ldr r3, [sp, #0]\n\t" + "ldr r4, [sp, #4]\n\t" + "ldr r5, [sp, #8]\n\t" + "ldr r6, [sp, #12]\n\t" + "str r3, [%[r], #0]\n\t" + "str r4, [%[r], #4]\n\t" + "str r5, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" + "ldr r3, [sp, #16]\n\t" + "ldr r4, [sp, #20]\n\t" + "ldr r5, [sp, #24]\n\t" + "ldr r6, [sp, #28]\n\t" + "str r3, [%[r], #16]\n\t" + "str r4, [%[r], #20]\n\t" + "str r5, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" + "ldr r3, [sp, #32]\n\t" + "ldr r4, [sp, #36]\n\t" + "ldr r5, [sp, #40]\n\t" + "ldr r6, [sp, #44]\n\t" + "str r3, [%[r], #32]\n\t" + "str r4, [%[r], #36]\n\t" + "str r5, [%[r], #40]\n\t" + "str r6, [%[r], #44]\n\t" + "add sp, sp, #48\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "sub sp, sp, #48\n\t" + "mov r14, #0\n\t" + "# A[0] * A[0]\n\t" + "ldr r10, [%[a], #0]\n\t" + "umull r8, r3, r10, r10\n\t" + "mov r4, #0\n\t" + "str r8, [sp]\n\t" + "# A[0] * A[1]\n\t" + "ldr r10, [%[a], #4]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r14, r14\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "str r3, [sp, #4]\n\t" + "# A[0] * A[2]\n\t" + "ldr r10, [%[a], #8]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r14, r14\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, r14\n\t" + "# A[1] * A[1]\n\t" + "ldr r10, [%[a], #4]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, r14\n\t" + "str r4, [sp, #8]\n\t" + "# A[0] * A[3]\n\t" + "ldr r10, [%[a], #12]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r14, r14\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "# A[1] * A[2]\n\t" + "ldr r10, [%[a], #8]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "str r2, [sp, #12]\n\t" + "# A[0] * A[4]\n\t" + "ldr r10, [%[a], #16]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r14, r14\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "# A[1] * A[3]\n\t" + "ldr r10, [%[a], #12]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "# A[2] * A[2]\n\t" + "ldr r10, [%[a], #8]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "str r3, [sp, #16]\n\t" + "# A[0] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[4]\n\t" + "ldr r10, [%[a], #16]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[3]\n\t" + "ldr r10, [%[a], #12]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #20]\n\t" + "# A[0] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[4]\n\t" + "ldr r10, [%[a], #16]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[3]\n\t" + "ldr r10, [%[a], #12]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #24]\n\t" + "# A[0] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[4]\n\t" + "ldr r10, [%[a], #16]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #28]\n\t" + "# A[0] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[4]\n\t" + "ldr r10, [%[a], #16]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #32]\n\t" + "# A[0] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #36]\n\t" + "# A[0] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #40]\n\t" + "# A[0] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #44]\n\t" + "# A[1] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[2] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #48]\n\t" + "# A[2] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[3] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #52]\n\t" + "# A[3] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[4] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #56]\n\t" + "# A[4] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[5] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #60]\n\t" + "# A[5] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[6] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #64]\n\t" + "# A[6] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[7] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #68]\n\t" + "# A[7] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r14, r14\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "# A[8] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "# A[9] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "str r2, [%[r], #72]\n\t" + "# A[8] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r14, r14\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "# A[9] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "str r3, [%[r], #76]\n\t" + "# A[9] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r14, r14\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, r14\n\t" + "# A[10] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, r14\n\t" + "str r4, [%[r], #80]\n\t" + "# A[10] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r14, r14\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "str r2, [%[r], #84]\n\t" + "# A[11] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r3, r3, r8\n\t" + "adc r4, r4, r9\n\t" + "str r3, [%[r], #88]\n\t" + "str r4, [%[r], #92]\n\t" + "ldr r2, [sp, #0]\n\t" + "ldr r3, [sp, #4]\n\t" + "ldr r4, [sp, #8]\n\t" + "ldr r8, [sp, #12]\n\t" + "str r2, [%[r], #0]\n\t" + "str r3, [%[r], #4]\n\t" + "str r4, [%[r], #8]\n\t" + "str r8, [%[r], #12]\n\t" + "ldr r2, [sp, #16]\n\t" + "ldr r3, [sp, #20]\n\t" + "ldr r4, [sp, #24]\n\t" + "ldr r8, [sp, #28]\n\t" + "str r2, [%[r], #16]\n\t" + "str r3, [%[r], #20]\n\t" + "str r4, [%[r], #24]\n\t" + "str r8, [%[r], #28]\n\t" + "ldr r2, [sp, #32]\n\t" + "ldr r3, [sp, #36]\n\t" + "ldr r4, [sp, #40]\n\t" + "ldr r8, [sp, #44]\n\t" + "str r2, [%[r], #32]\n\t" + "str r3, [%[r], #36]\n\t" + "str r4, [%[r], #40]\n\t" + "str r8, [%[r], #44]\n\t" + "add sp, sp, #48\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r2", "r3", "r4", "r8", "r9", "r10", "r8", "r5", "r6", "r7", "r14" + ); +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r12, #0\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #12]\n\t" + "ldr r8, [%[b], #0]\n\t" + "ldr r9, [%[b], #4]\n\t" + "ldr r10, [%[b], #8]\n\t" + "ldr r14, [%[b], #12]\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #0]\n\t" + "str r5, [%[r], #4]\n\t" + "str r6, [%[r], #8]\n\t" + "str r7, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[a], #28]\n\t" + "ldr r8, [%[b], #16]\n\t" + "ldr r9, [%[b], #20]\n\t" + "ldr r10, [%[b], #24]\n\t" + "ldr r14, [%[b], #28]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #16]\n\t" + "str r5, [%[r], #20]\n\t" + "str r6, [%[r], #24]\n\t" + "str r7, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[a], #36]\n\t" + "ldr r6, [%[a], #40]\n\t" + "ldr r7, [%[a], #44]\n\t" + "ldr r8, [%[b], #32]\n\t" + "ldr r9, [%[b], #36]\n\t" + "ldr r10, [%[b], #40]\n\t" + "ldr r14, [%[b], #44]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #32]\n\t" + "str r5, [%[r], #36]\n\t" + "str r6, [%[r], #40]\n\t" + "str r7, [%[r], #44]\n\t" + "adc %[c], r12, r12\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); + + return c; +} + +/* Sub b from a into a. (a -= b) + * + * a A single precision integer and result. + * b A single precision integer. + */ +static sp_digit sp_3072_sub_in_place_24(sp_digit* a, const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldr r2, [%[a], #0]\n\t" + "ldr r3, [%[a], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[a], #12]\n\t" + "ldr r6, [%[b], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" + "ldr r8, [%[b], #8]\n\t" + "ldr r9, [%[b], #12]\n\t" + "subs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #0]\n\t" + "str r3, [%[a], #4]\n\t" + "str r4, [%[a], #8]\n\t" + "str r5, [%[a], #12]\n\t" + "ldr r2, [%[a], #16]\n\t" + "ldr r3, [%[a], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[a], #28]\n\t" + "ldr r6, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" + "ldr r8, [%[b], #24]\n\t" + "ldr r9, [%[b], #28]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #16]\n\t" + "str r3, [%[a], #20]\n\t" + "str r4, [%[a], #24]\n\t" + "str r5, [%[a], #28]\n\t" + "ldr r2, [%[a], #32]\n\t" + "ldr r3, [%[a], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[a], #44]\n\t" + "ldr r6, [%[b], #32]\n\t" + "ldr r7, [%[b], #36]\n\t" + "ldr r8, [%[b], #40]\n\t" + "ldr r9, [%[b], #44]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #32]\n\t" + "str r3, [%[a], #36]\n\t" + "str r4, [%[a], #40]\n\t" + "str r5, [%[a], #44]\n\t" + "ldr r2, [%[a], #48]\n\t" + "ldr r3, [%[a], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[a], #60]\n\t" + "ldr r6, [%[b], #48]\n\t" + "ldr r7, [%[b], #52]\n\t" + "ldr r8, [%[b], #56]\n\t" + "ldr r9, [%[b], #60]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #48]\n\t" + "str r3, [%[a], #52]\n\t" + "str r4, [%[a], #56]\n\t" + "str r5, [%[a], #60]\n\t" + "ldr r2, [%[a], #64]\n\t" + "ldr r3, [%[a], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[a], #76]\n\t" + "ldr r6, [%[b], #64]\n\t" + "ldr r7, [%[b], #68]\n\t" + "ldr r8, [%[b], #72]\n\t" + "ldr r9, [%[b], #76]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #64]\n\t" + "str r3, [%[a], #68]\n\t" + "str r4, [%[a], #72]\n\t" + "str r5, [%[a], #76]\n\t" + "ldr r2, [%[a], #80]\n\t" + "ldr r3, [%[a], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[a], #92]\n\t" + "ldr r6, [%[b], #80]\n\t" + "ldr r7, [%[b], #84]\n\t" + "ldr r8, [%[b], #88]\n\t" + "ldr r9, [%[b], #92]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #80]\n\t" + "str r3, [%[a], #84]\n\t" + "str r4, [%[a], #88]\n\t" + "str r5, [%[a], #92]\n\t" + "sbc %[c], r9, r9\n\t" + : [c] "+r" (c) + : [a] "r" (a), [b] "r" (b) + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + ); + + return c; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r12, #0\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #12]\n\t" + "ldr r8, [%[b], #0]\n\t" + "ldr r9, [%[b], #4]\n\t" + "ldr r10, [%[b], #8]\n\t" + "ldr r14, [%[b], #12]\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #0]\n\t" + "str r5, [%[r], #4]\n\t" + "str r6, [%[r], #8]\n\t" + "str r7, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[a], #28]\n\t" + "ldr r8, [%[b], #16]\n\t" + "ldr r9, [%[b], #20]\n\t" + "ldr r10, [%[b], #24]\n\t" + "ldr r14, [%[b], #28]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #16]\n\t" + "str r5, [%[r], #20]\n\t" + "str r6, [%[r], #24]\n\t" + "str r7, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[a], #36]\n\t" + "ldr r6, [%[a], #40]\n\t" + "ldr r7, [%[a], #44]\n\t" + "ldr r8, [%[b], #32]\n\t" + "ldr r9, [%[b], #36]\n\t" + "ldr r10, [%[b], #40]\n\t" + "ldr r14, [%[b], #44]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #32]\n\t" + "str r5, [%[r], #36]\n\t" + "str r6, [%[r], #40]\n\t" + "str r7, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[a], #52]\n\t" + "ldr r6, [%[a], #56]\n\t" + "ldr r7, [%[a], #60]\n\t" + "ldr r8, [%[b], #48]\n\t" + "ldr r9, [%[b], #52]\n\t" + "ldr r10, [%[b], #56]\n\t" + "ldr r14, [%[b], #60]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #48]\n\t" + "str r5, [%[r], #52]\n\t" + "str r6, [%[r], #56]\n\t" + "str r7, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[a], #68]\n\t" + "ldr r6, [%[a], #72]\n\t" + "ldr r7, [%[a], #76]\n\t" + "ldr r8, [%[b], #64]\n\t" + "ldr r9, [%[b], #68]\n\t" + "ldr r10, [%[b], #72]\n\t" + "ldr r14, [%[b], #76]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #64]\n\t" + "str r5, [%[r], #68]\n\t" + "str r6, [%[r], #72]\n\t" + "str r7, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[a], #84]\n\t" + "ldr r6, [%[a], #88]\n\t" + "ldr r7, [%[a], #92]\n\t" + "ldr r8, [%[b], #80]\n\t" + "ldr r9, [%[b], #84]\n\t" + "ldr r10, [%[b], #88]\n\t" + "ldr r14, [%[b], #92]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #80]\n\t" + "str r5, [%[r], #84]\n\t" + "str r6, [%[r], #88]\n\t" + "str r7, [%[r], #92]\n\t" + "adc %[c], r12, r12\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); + + return c; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_3072_mask_12(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<12; i++) { + r[i] = a[i] & m; + } +#else + r[0] = a[0] & m; + r[1] = a[1] & m; + r[2] = a[2] & m; + r[3] = a[3] & m; + r[4] = a[4] & m; + r[5] = a[5] & m; + r[6] = a[6] & m; + r[7] = a[7] & m; + r[8] = a[8] & m; + r[9] = a[9] & m; + r[10] = a[10] & m; + r[11] = a[11] & m; +#endif +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_3072_mul_24(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[24]; + sp_digit a1[12]; + sp_digit b1[12]; + sp_digit z2[24]; + sp_digit u, ca, cb; + + ca = sp_3072_add_12(a1, a, &a[12]); + cb = sp_3072_add_12(b1, b, &b[12]); + u = ca & cb; + sp_3072_mul_12(z1, a1, b1); + sp_3072_mul_12(z2, &a[12], &b[12]); + sp_3072_mul_12(z0, a, b); + sp_3072_mask_12(r + 24, a1, 0 - cb); + sp_3072_mask_12(b1, b1, 0 - ca); + u += sp_3072_add_12(r + 24, r + 24, b1); + u += sp_3072_sub_in_place_24(z1, z2); + u += sp_3072_sub_in_place_24(z1, z0); + u += sp_3072_add_24(r + 12, r + 12, z1); + r[36] = u; + XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1)); + (void)sp_3072_add_24(r + 24, r + 24, z2); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z2[24]; + sp_digit z1[24]; + sp_digit a1[12]; + sp_digit u; + + u = sp_3072_add_12(a1, a, &a[12]); + sp_3072_sqr_12(z1, a1); + sp_3072_sqr_12(z2, &a[12]); + sp_3072_sqr_12(z0, a); + sp_3072_mask_12(r + 24, a1, 0 - u); + u += sp_3072_add_12(r + 24, r + 24, r + 24); + u += sp_3072_sub_in_place_24(z1, z2); + u += sp_3072_sub_in_place_24(z1, z0); + u += sp_3072_add_24(r + 12, r + 12, z1); + r[36] = u; + XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1)); + (void)sp_3072_add_24(r + 24, r + 24, z2); +} + +/* Sub b from a into a. (a -= b) + * + * a A single precision integer and result. + * b A single precision integer. + */ +static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldr r2, [%[a], #0]\n\t" + "ldr r3, [%[a], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[a], #12]\n\t" + "ldr r6, [%[b], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" + "ldr r8, [%[b], #8]\n\t" + "ldr r9, [%[b], #12]\n\t" + "subs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #0]\n\t" + "str r3, [%[a], #4]\n\t" + "str r4, [%[a], #8]\n\t" + "str r5, [%[a], #12]\n\t" + "ldr r2, [%[a], #16]\n\t" + "ldr r3, [%[a], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[a], #28]\n\t" + "ldr r6, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" + "ldr r8, [%[b], #24]\n\t" + "ldr r9, [%[b], #28]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #16]\n\t" + "str r3, [%[a], #20]\n\t" + "str r4, [%[a], #24]\n\t" + "str r5, [%[a], #28]\n\t" + "ldr r2, [%[a], #32]\n\t" + "ldr r3, [%[a], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[a], #44]\n\t" + "ldr r6, [%[b], #32]\n\t" + "ldr r7, [%[b], #36]\n\t" + "ldr r8, [%[b], #40]\n\t" + "ldr r9, [%[b], #44]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #32]\n\t" + "str r3, [%[a], #36]\n\t" + "str r4, [%[a], #40]\n\t" + "str r5, [%[a], #44]\n\t" + "ldr r2, [%[a], #48]\n\t" + "ldr r3, [%[a], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[a], #60]\n\t" + "ldr r6, [%[b], #48]\n\t" + "ldr r7, [%[b], #52]\n\t" + "ldr r8, [%[b], #56]\n\t" + "ldr r9, [%[b], #60]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #48]\n\t" + "str r3, [%[a], #52]\n\t" + "str r4, [%[a], #56]\n\t" + "str r5, [%[a], #60]\n\t" + "ldr r2, [%[a], #64]\n\t" + "ldr r3, [%[a], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[a], #76]\n\t" + "ldr r6, [%[b], #64]\n\t" + "ldr r7, [%[b], #68]\n\t" + "ldr r8, [%[b], #72]\n\t" + "ldr r9, [%[b], #76]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #64]\n\t" + "str r3, [%[a], #68]\n\t" + "str r4, [%[a], #72]\n\t" + "str r5, [%[a], #76]\n\t" + "ldr r2, [%[a], #80]\n\t" + "ldr r3, [%[a], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[a], #92]\n\t" + "ldr r6, [%[b], #80]\n\t" + "ldr r7, [%[b], #84]\n\t" + "ldr r8, [%[b], #88]\n\t" + "ldr r9, [%[b], #92]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #80]\n\t" + "str r3, [%[a], #84]\n\t" + "str r4, [%[a], #88]\n\t" + "str r5, [%[a], #92]\n\t" + "ldr r2, [%[a], #96]\n\t" + "ldr r3, [%[a], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[a], #108]\n\t" + "ldr r6, [%[b], #96]\n\t" + "ldr r7, [%[b], #100]\n\t" + "ldr r8, [%[b], #104]\n\t" + "ldr r9, [%[b], #108]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #96]\n\t" + "str r3, [%[a], #100]\n\t" + "str r4, [%[a], #104]\n\t" + "str r5, [%[a], #108]\n\t" + "ldr r2, [%[a], #112]\n\t" + "ldr r3, [%[a], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[a], #124]\n\t" + "ldr r6, [%[b], #112]\n\t" + "ldr r7, [%[b], #116]\n\t" + "ldr r8, [%[b], #120]\n\t" + "ldr r9, [%[b], #124]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #112]\n\t" + "str r3, [%[a], #116]\n\t" + "str r4, [%[a], #120]\n\t" + "str r5, [%[a], #124]\n\t" + "ldr r2, [%[a], #128]\n\t" + "ldr r3, [%[a], #132]\n\t" + "ldr r4, [%[a], #136]\n\t" + "ldr r5, [%[a], #140]\n\t" + "ldr r6, [%[b], #128]\n\t" + "ldr r7, [%[b], #132]\n\t" + "ldr r8, [%[b], #136]\n\t" + "ldr r9, [%[b], #140]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #128]\n\t" + "str r3, [%[a], #132]\n\t" + "str r4, [%[a], #136]\n\t" + "str r5, [%[a], #140]\n\t" + "ldr r2, [%[a], #144]\n\t" + "ldr r3, [%[a], #148]\n\t" + "ldr r4, [%[a], #152]\n\t" + "ldr r5, [%[a], #156]\n\t" + "ldr r6, [%[b], #144]\n\t" + "ldr r7, [%[b], #148]\n\t" + "ldr r8, [%[b], #152]\n\t" + "ldr r9, [%[b], #156]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #144]\n\t" + "str r3, [%[a], #148]\n\t" + "str r4, [%[a], #152]\n\t" + "str r5, [%[a], #156]\n\t" + "ldr r2, [%[a], #160]\n\t" + "ldr r3, [%[a], #164]\n\t" + "ldr r4, [%[a], #168]\n\t" + "ldr r5, [%[a], #172]\n\t" + "ldr r6, [%[b], #160]\n\t" + "ldr r7, [%[b], #164]\n\t" + "ldr r8, [%[b], #168]\n\t" + "ldr r9, [%[b], #172]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #160]\n\t" + "str r3, [%[a], #164]\n\t" + "str r4, [%[a], #168]\n\t" + "str r5, [%[a], #172]\n\t" + "ldr r2, [%[a], #176]\n\t" + "ldr r3, [%[a], #180]\n\t" + "ldr r4, [%[a], #184]\n\t" + "ldr r5, [%[a], #188]\n\t" + "ldr r6, [%[b], #176]\n\t" + "ldr r7, [%[b], #180]\n\t" + "ldr r8, [%[b], #184]\n\t" + "ldr r9, [%[b], #188]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #176]\n\t" + "str r3, [%[a], #180]\n\t" + "str r4, [%[a], #184]\n\t" + "str r5, [%[a], #188]\n\t" + "sbc %[c], r9, r9\n\t" + : [c] "+r" (c) + : [a] "r" (a), [b] "r" (b) + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + ); + + return c; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r12, #0\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #12]\n\t" + "ldr r8, [%[b], #0]\n\t" + "ldr r9, [%[b], #4]\n\t" + "ldr r10, [%[b], #8]\n\t" + "ldr r14, [%[b], #12]\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #0]\n\t" + "str r5, [%[r], #4]\n\t" + "str r6, [%[r], #8]\n\t" + "str r7, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[a], #28]\n\t" + "ldr r8, [%[b], #16]\n\t" + "ldr r9, [%[b], #20]\n\t" + "ldr r10, [%[b], #24]\n\t" + "ldr r14, [%[b], #28]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #16]\n\t" + "str r5, [%[r], #20]\n\t" + "str r6, [%[r], #24]\n\t" + "str r7, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[a], #36]\n\t" + "ldr r6, [%[a], #40]\n\t" + "ldr r7, [%[a], #44]\n\t" + "ldr r8, [%[b], #32]\n\t" + "ldr r9, [%[b], #36]\n\t" + "ldr r10, [%[b], #40]\n\t" + "ldr r14, [%[b], #44]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #32]\n\t" + "str r5, [%[r], #36]\n\t" + "str r6, [%[r], #40]\n\t" + "str r7, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[a], #52]\n\t" + "ldr r6, [%[a], #56]\n\t" + "ldr r7, [%[a], #60]\n\t" + "ldr r8, [%[b], #48]\n\t" + "ldr r9, [%[b], #52]\n\t" + "ldr r10, [%[b], #56]\n\t" + "ldr r14, [%[b], #60]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #48]\n\t" + "str r5, [%[r], #52]\n\t" + "str r6, [%[r], #56]\n\t" + "str r7, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[a], #68]\n\t" + "ldr r6, [%[a], #72]\n\t" + "ldr r7, [%[a], #76]\n\t" + "ldr r8, [%[b], #64]\n\t" + "ldr r9, [%[b], #68]\n\t" + "ldr r10, [%[b], #72]\n\t" + "ldr r14, [%[b], #76]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #64]\n\t" + "str r5, [%[r], #68]\n\t" + "str r6, [%[r], #72]\n\t" + "str r7, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[a], #84]\n\t" + "ldr r6, [%[a], #88]\n\t" + "ldr r7, [%[a], #92]\n\t" + "ldr r8, [%[b], #80]\n\t" + "ldr r9, [%[b], #84]\n\t" + "ldr r10, [%[b], #88]\n\t" + "ldr r14, [%[b], #92]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #80]\n\t" + "str r5, [%[r], #84]\n\t" + "str r6, [%[r], #88]\n\t" + "str r7, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[a], #100]\n\t" + "ldr r6, [%[a], #104]\n\t" + "ldr r7, [%[a], #108]\n\t" + "ldr r8, [%[b], #96]\n\t" + "ldr r9, [%[b], #100]\n\t" + "ldr r10, [%[b], #104]\n\t" + "ldr r14, [%[b], #108]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #96]\n\t" + "str r5, [%[r], #100]\n\t" + "str r6, [%[r], #104]\n\t" + "str r7, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[a], #116]\n\t" + "ldr r6, [%[a], #120]\n\t" + "ldr r7, [%[a], #124]\n\t" + "ldr r8, [%[b], #112]\n\t" + "ldr r9, [%[b], #116]\n\t" + "ldr r10, [%[b], #120]\n\t" + "ldr r14, [%[b], #124]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #112]\n\t" + "str r5, [%[r], #116]\n\t" + "str r6, [%[r], #120]\n\t" + "str r7, [%[r], #124]\n\t" + "ldr r4, [%[a], #128]\n\t" + "ldr r5, [%[a], #132]\n\t" + "ldr r6, [%[a], #136]\n\t" + "ldr r7, [%[a], #140]\n\t" + "ldr r8, [%[b], #128]\n\t" + "ldr r9, [%[b], #132]\n\t" + "ldr r10, [%[b], #136]\n\t" + "ldr r14, [%[b], #140]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #128]\n\t" + "str r5, [%[r], #132]\n\t" + "str r6, [%[r], #136]\n\t" + "str r7, [%[r], #140]\n\t" + "ldr r4, [%[a], #144]\n\t" + "ldr r5, [%[a], #148]\n\t" + "ldr r6, [%[a], #152]\n\t" + "ldr r7, [%[a], #156]\n\t" + "ldr r8, [%[b], #144]\n\t" + "ldr r9, [%[b], #148]\n\t" + "ldr r10, [%[b], #152]\n\t" + "ldr r14, [%[b], #156]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #144]\n\t" + "str r5, [%[r], #148]\n\t" + "str r6, [%[r], #152]\n\t" + "str r7, [%[r], #156]\n\t" + "ldr r4, [%[a], #160]\n\t" + "ldr r5, [%[a], #164]\n\t" + "ldr r6, [%[a], #168]\n\t" + "ldr r7, [%[a], #172]\n\t" + "ldr r8, [%[b], #160]\n\t" + "ldr r9, [%[b], #164]\n\t" + "ldr r10, [%[b], #168]\n\t" + "ldr r14, [%[b], #172]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #160]\n\t" + "str r5, [%[r], #164]\n\t" + "str r6, [%[r], #168]\n\t" + "str r7, [%[r], #172]\n\t" + "ldr r4, [%[a], #176]\n\t" + "ldr r5, [%[a], #180]\n\t" + "ldr r6, [%[a], #184]\n\t" + "ldr r7, [%[a], #188]\n\t" + "ldr r8, [%[b], #176]\n\t" + "ldr r9, [%[b], #180]\n\t" + "ldr r10, [%[b], #184]\n\t" + "ldr r14, [%[b], #188]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #176]\n\t" + "str r5, [%[r], #180]\n\t" + "str r6, [%[r], #184]\n\t" + "str r7, [%[r], #188]\n\t" + "adc %[c], r12, r12\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); + + return c; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_3072_mask_24(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<24; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 24; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[48]; + sp_digit a1[24]; + sp_digit b1[24]; + sp_digit z2[48]; + sp_digit u, ca, cb; + + ca = sp_3072_add_24(a1, a, &a[24]); + cb = sp_3072_add_24(b1, b, &b[24]); + u = ca & cb; + sp_3072_mul_24(z1, a1, b1); + sp_3072_mul_24(z2, &a[24], &b[24]); + sp_3072_mul_24(z0, a, b); + sp_3072_mask_24(r + 48, a1, 0 - cb); + sp_3072_mask_24(b1, b1, 0 - ca); + u += sp_3072_add_24(r + 48, r + 48, b1); + u += sp_3072_sub_in_place_48(z1, z2); + u += sp_3072_sub_in_place_48(z1, z0); + u += sp_3072_add_48(r + 24, r + 24, z1); + r[72] = u; + XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1)); + (void)sp_3072_add_48(r + 48, r + 48, z2); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z2[48]; + sp_digit z1[48]; + sp_digit a1[24]; + sp_digit u; + + u = sp_3072_add_24(a1, a, &a[24]); + sp_3072_sqr_24(z1, a1); + sp_3072_sqr_24(z2, &a[24]); + sp_3072_sqr_24(z0, a); + sp_3072_mask_24(r + 48, a1, 0 - u); + u += sp_3072_add_24(r + 48, r + 48, r + 48); + u += sp_3072_sub_in_place_48(z1, z2); + u += sp_3072_sub_in_place_48(z1, z0); + u += sp_3072_add_48(r + 24, r + 24, z1); + r[72] = u; + XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1)); + (void)sp_3072_add_48(r + 48, r + 48, z2); +} + +/* Sub b from a into a. (a -= b) + * + * a A single precision integer and result. + * b A single precision integer. + */ +static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldr r2, [%[a], #0]\n\t" + "ldr r3, [%[a], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[a], #12]\n\t" + "ldr r6, [%[b], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" + "ldr r8, [%[b], #8]\n\t" + "ldr r9, [%[b], #12]\n\t" + "subs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #0]\n\t" + "str r3, [%[a], #4]\n\t" + "str r4, [%[a], #8]\n\t" + "str r5, [%[a], #12]\n\t" + "ldr r2, [%[a], #16]\n\t" + "ldr r3, [%[a], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[a], #28]\n\t" + "ldr r6, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" + "ldr r8, [%[b], #24]\n\t" + "ldr r9, [%[b], #28]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #16]\n\t" + "str r3, [%[a], #20]\n\t" + "str r4, [%[a], #24]\n\t" + "str r5, [%[a], #28]\n\t" + "ldr r2, [%[a], #32]\n\t" + "ldr r3, [%[a], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[a], #44]\n\t" + "ldr r6, [%[b], #32]\n\t" + "ldr r7, [%[b], #36]\n\t" + "ldr r8, [%[b], #40]\n\t" + "ldr r9, [%[b], #44]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #32]\n\t" + "str r3, [%[a], #36]\n\t" + "str r4, [%[a], #40]\n\t" + "str r5, [%[a], #44]\n\t" + "ldr r2, [%[a], #48]\n\t" + "ldr r3, [%[a], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[a], #60]\n\t" + "ldr r6, [%[b], #48]\n\t" + "ldr r7, [%[b], #52]\n\t" + "ldr r8, [%[b], #56]\n\t" + "ldr r9, [%[b], #60]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #48]\n\t" + "str r3, [%[a], #52]\n\t" + "str r4, [%[a], #56]\n\t" + "str r5, [%[a], #60]\n\t" + "ldr r2, [%[a], #64]\n\t" + "ldr r3, [%[a], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[a], #76]\n\t" + "ldr r6, [%[b], #64]\n\t" + "ldr r7, [%[b], #68]\n\t" + "ldr r8, [%[b], #72]\n\t" + "ldr r9, [%[b], #76]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #64]\n\t" + "str r3, [%[a], #68]\n\t" + "str r4, [%[a], #72]\n\t" + "str r5, [%[a], #76]\n\t" + "ldr r2, [%[a], #80]\n\t" + "ldr r3, [%[a], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[a], #92]\n\t" + "ldr r6, [%[b], #80]\n\t" + "ldr r7, [%[b], #84]\n\t" + "ldr r8, [%[b], #88]\n\t" + "ldr r9, [%[b], #92]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #80]\n\t" + "str r3, [%[a], #84]\n\t" + "str r4, [%[a], #88]\n\t" + "str r5, [%[a], #92]\n\t" + "ldr r2, [%[a], #96]\n\t" + "ldr r3, [%[a], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[a], #108]\n\t" + "ldr r6, [%[b], #96]\n\t" + "ldr r7, [%[b], #100]\n\t" + "ldr r8, [%[b], #104]\n\t" + "ldr r9, [%[b], #108]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #96]\n\t" + "str r3, [%[a], #100]\n\t" + "str r4, [%[a], #104]\n\t" + "str r5, [%[a], #108]\n\t" + "ldr r2, [%[a], #112]\n\t" + "ldr r3, [%[a], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[a], #124]\n\t" + "ldr r6, [%[b], #112]\n\t" + "ldr r7, [%[b], #116]\n\t" + "ldr r8, [%[b], #120]\n\t" + "ldr r9, [%[b], #124]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #112]\n\t" + "str r3, [%[a], #116]\n\t" + "str r4, [%[a], #120]\n\t" + "str r5, [%[a], #124]\n\t" + "ldr r2, [%[a], #128]\n\t" + "ldr r3, [%[a], #132]\n\t" + "ldr r4, [%[a], #136]\n\t" + "ldr r5, [%[a], #140]\n\t" + "ldr r6, [%[b], #128]\n\t" + "ldr r7, [%[b], #132]\n\t" + "ldr r8, [%[b], #136]\n\t" + "ldr r9, [%[b], #140]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #128]\n\t" + "str r3, [%[a], #132]\n\t" + "str r4, [%[a], #136]\n\t" + "str r5, [%[a], #140]\n\t" + "ldr r2, [%[a], #144]\n\t" + "ldr r3, [%[a], #148]\n\t" + "ldr r4, [%[a], #152]\n\t" + "ldr r5, [%[a], #156]\n\t" + "ldr r6, [%[b], #144]\n\t" + "ldr r7, [%[b], #148]\n\t" + "ldr r8, [%[b], #152]\n\t" + "ldr r9, [%[b], #156]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #144]\n\t" + "str r3, [%[a], #148]\n\t" + "str r4, [%[a], #152]\n\t" + "str r5, [%[a], #156]\n\t" + "ldr r2, [%[a], #160]\n\t" + "ldr r3, [%[a], #164]\n\t" + "ldr r4, [%[a], #168]\n\t" + "ldr r5, [%[a], #172]\n\t" + "ldr r6, [%[b], #160]\n\t" + "ldr r7, [%[b], #164]\n\t" + "ldr r8, [%[b], #168]\n\t" + "ldr r9, [%[b], #172]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #160]\n\t" + "str r3, [%[a], #164]\n\t" + "str r4, [%[a], #168]\n\t" + "str r5, [%[a], #172]\n\t" + "ldr r2, [%[a], #176]\n\t" + "ldr r3, [%[a], #180]\n\t" + "ldr r4, [%[a], #184]\n\t" + "ldr r5, [%[a], #188]\n\t" + "ldr r6, [%[b], #176]\n\t" + "ldr r7, [%[b], #180]\n\t" + "ldr r8, [%[b], #184]\n\t" + "ldr r9, [%[b], #188]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #176]\n\t" + "str r3, [%[a], #180]\n\t" + "str r4, [%[a], #184]\n\t" + "str r5, [%[a], #188]\n\t" + "ldr r2, [%[a], #192]\n\t" + "ldr r3, [%[a], #196]\n\t" + "ldr r4, [%[a], #200]\n\t" + "ldr r5, [%[a], #204]\n\t" + "ldr r6, [%[b], #192]\n\t" + "ldr r7, [%[b], #196]\n\t" + "ldr r8, [%[b], #200]\n\t" + "ldr r9, [%[b], #204]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #192]\n\t" + "str r3, [%[a], #196]\n\t" + "str r4, [%[a], #200]\n\t" + "str r5, [%[a], #204]\n\t" + "ldr r2, [%[a], #208]\n\t" + "ldr r3, [%[a], #212]\n\t" + "ldr r4, [%[a], #216]\n\t" + "ldr r5, [%[a], #220]\n\t" + "ldr r6, [%[b], #208]\n\t" + "ldr r7, [%[b], #212]\n\t" + "ldr r8, [%[b], #216]\n\t" + "ldr r9, [%[b], #220]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #208]\n\t" + "str r3, [%[a], #212]\n\t" + "str r4, [%[a], #216]\n\t" + "str r5, [%[a], #220]\n\t" + "ldr r2, [%[a], #224]\n\t" + "ldr r3, [%[a], #228]\n\t" + "ldr r4, [%[a], #232]\n\t" + "ldr r5, [%[a], #236]\n\t" + "ldr r6, [%[b], #224]\n\t" + "ldr r7, [%[b], #228]\n\t" + "ldr r8, [%[b], #232]\n\t" + "ldr r9, [%[b], #236]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #224]\n\t" + "str r3, [%[a], #228]\n\t" + "str r4, [%[a], #232]\n\t" + "str r5, [%[a], #236]\n\t" + "ldr r2, [%[a], #240]\n\t" + "ldr r3, [%[a], #244]\n\t" + "ldr r4, [%[a], #248]\n\t" + "ldr r5, [%[a], #252]\n\t" + "ldr r6, [%[b], #240]\n\t" + "ldr r7, [%[b], #244]\n\t" + "ldr r8, [%[b], #248]\n\t" + "ldr r9, [%[b], #252]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #240]\n\t" + "str r3, [%[a], #244]\n\t" + "str r4, [%[a], #248]\n\t" + "str r5, [%[a], #252]\n\t" + "ldr r2, [%[a], #256]\n\t" + "ldr r3, [%[a], #260]\n\t" + "ldr r4, [%[a], #264]\n\t" + "ldr r5, [%[a], #268]\n\t" + "ldr r6, [%[b], #256]\n\t" + "ldr r7, [%[b], #260]\n\t" + "ldr r8, [%[b], #264]\n\t" + "ldr r9, [%[b], #268]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #256]\n\t" + "str r3, [%[a], #260]\n\t" + "str r4, [%[a], #264]\n\t" + "str r5, [%[a], #268]\n\t" + "ldr r2, [%[a], #272]\n\t" + "ldr r3, [%[a], #276]\n\t" + "ldr r4, [%[a], #280]\n\t" + "ldr r5, [%[a], #284]\n\t" + "ldr r6, [%[b], #272]\n\t" + "ldr r7, [%[b], #276]\n\t" + "ldr r8, [%[b], #280]\n\t" + "ldr r9, [%[b], #284]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #272]\n\t" + "str r3, [%[a], #276]\n\t" + "str r4, [%[a], #280]\n\t" + "str r5, [%[a], #284]\n\t" + "ldr r2, [%[a], #288]\n\t" + "ldr r3, [%[a], #292]\n\t" + "ldr r4, [%[a], #296]\n\t" + "ldr r5, [%[a], #300]\n\t" + "ldr r6, [%[b], #288]\n\t" + "ldr r7, [%[b], #292]\n\t" + "ldr r8, [%[b], #296]\n\t" + "ldr r9, [%[b], #300]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #288]\n\t" + "str r3, [%[a], #292]\n\t" + "str r4, [%[a], #296]\n\t" + "str r5, [%[a], #300]\n\t" + "ldr r2, [%[a], #304]\n\t" + "ldr r3, [%[a], #308]\n\t" + "ldr r4, [%[a], #312]\n\t" + "ldr r5, [%[a], #316]\n\t" + "ldr r6, [%[b], #304]\n\t" + "ldr r7, [%[b], #308]\n\t" + "ldr r8, [%[b], #312]\n\t" + "ldr r9, [%[b], #316]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #304]\n\t" + "str r3, [%[a], #308]\n\t" + "str r4, [%[a], #312]\n\t" + "str r5, [%[a], #316]\n\t" + "ldr r2, [%[a], #320]\n\t" + "ldr r3, [%[a], #324]\n\t" + "ldr r4, [%[a], #328]\n\t" + "ldr r5, [%[a], #332]\n\t" + "ldr r6, [%[b], #320]\n\t" + "ldr r7, [%[b], #324]\n\t" + "ldr r8, [%[b], #328]\n\t" + "ldr r9, [%[b], #332]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #320]\n\t" + "str r3, [%[a], #324]\n\t" + "str r4, [%[a], #328]\n\t" + "str r5, [%[a], #332]\n\t" + "ldr r2, [%[a], #336]\n\t" + "ldr r3, [%[a], #340]\n\t" + "ldr r4, [%[a], #344]\n\t" + "ldr r5, [%[a], #348]\n\t" + "ldr r6, [%[b], #336]\n\t" + "ldr r7, [%[b], #340]\n\t" + "ldr r8, [%[b], #344]\n\t" + "ldr r9, [%[b], #348]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #336]\n\t" + "str r3, [%[a], #340]\n\t" + "str r4, [%[a], #344]\n\t" + "str r5, [%[a], #348]\n\t" + "ldr r2, [%[a], #352]\n\t" + "ldr r3, [%[a], #356]\n\t" + "ldr r4, [%[a], #360]\n\t" + "ldr r5, [%[a], #364]\n\t" + "ldr r6, [%[b], #352]\n\t" + "ldr r7, [%[b], #356]\n\t" + "ldr r8, [%[b], #360]\n\t" + "ldr r9, [%[b], #364]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #352]\n\t" + "str r3, [%[a], #356]\n\t" + "str r4, [%[a], #360]\n\t" + "str r5, [%[a], #364]\n\t" + "ldr r2, [%[a], #368]\n\t" + "ldr r3, [%[a], #372]\n\t" + "ldr r4, [%[a], #376]\n\t" + "ldr r5, [%[a], #380]\n\t" + "ldr r6, [%[b], #368]\n\t" + "ldr r7, [%[b], #372]\n\t" + "ldr r8, [%[b], #376]\n\t" + "ldr r9, [%[b], #380]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #368]\n\t" + "str r3, [%[a], #372]\n\t" + "str r4, [%[a], #376]\n\t" + "str r5, [%[a], #380]\n\t" + "sbc %[c], r9, r9\n\t" + : [c] "+r" (c) + : [a] "r" (a), [b] "r" (b) + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + ); + + return c; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r12, #0\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #12]\n\t" + "ldr r8, [%[b], #0]\n\t" + "ldr r9, [%[b], #4]\n\t" + "ldr r10, [%[b], #8]\n\t" + "ldr r14, [%[b], #12]\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #0]\n\t" + "str r5, [%[r], #4]\n\t" + "str r6, [%[r], #8]\n\t" + "str r7, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[a], #28]\n\t" + "ldr r8, [%[b], #16]\n\t" + "ldr r9, [%[b], #20]\n\t" + "ldr r10, [%[b], #24]\n\t" + "ldr r14, [%[b], #28]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #16]\n\t" + "str r5, [%[r], #20]\n\t" + "str r6, [%[r], #24]\n\t" + "str r7, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[a], #36]\n\t" + "ldr r6, [%[a], #40]\n\t" + "ldr r7, [%[a], #44]\n\t" + "ldr r8, [%[b], #32]\n\t" + "ldr r9, [%[b], #36]\n\t" + "ldr r10, [%[b], #40]\n\t" + "ldr r14, [%[b], #44]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #32]\n\t" + "str r5, [%[r], #36]\n\t" + "str r6, [%[r], #40]\n\t" + "str r7, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[a], #52]\n\t" + "ldr r6, [%[a], #56]\n\t" + "ldr r7, [%[a], #60]\n\t" + "ldr r8, [%[b], #48]\n\t" + "ldr r9, [%[b], #52]\n\t" + "ldr r10, [%[b], #56]\n\t" + "ldr r14, [%[b], #60]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #48]\n\t" + "str r5, [%[r], #52]\n\t" + "str r6, [%[r], #56]\n\t" + "str r7, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[a], #68]\n\t" + "ldr r6, [%[a], #72]\n\t" + "ldr r7, [%[a], #76]\n\t" + "ldr r8, [%[b], #64]\n\t" + "ldr r9, [%[b], #68]\n\t" + "ldr r10, [%[b], #72]\n\t" + "ldr r14, [%[b], #76]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #64]\n\t" + "str r5, [%[r], #68]\n\t" + "str r6, [%[r], #72]\n\t" + "str r7, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[a], #84]\n\t" + "ldr r6, [%[a], #88]\n\t" + "ldr r7, [%[a], #92]\n\t" + "ldr r8, [%[b], #80]\n\t" + "ldr r9, [%[b], #84]\n\t" + "ldr r10, [%[b], #88]\n\t" + "ldr r14, [%[b], #92]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #80]\n\t" + "str r5, [%[r], #84]\n\t" + "str r6, [%[r], #88]\n\t" + "str r7, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[a], #100]\n\t" + "ldr r6, [%[a], #104]\n\t" + "ldr r7, [%[a], #108]\n\t" + "ldr r8, [%[b], #96]\n\t" + "ldr r9, [%[b], #100]\n\t" + "ldr r10, [%[b], #104]\n\t" + "ldr r14, [%[b], #108]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #96]\n\t" + "str r5, [%[r], #100]\n\t" + "str r6, [%[r], #104]\n\t" + "str r7, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[a], #116]\n\t" + "ldr r6, [%[a], #120]\n\t" + "ldr r7, [%[a], #124]\n\t" + "ldr r8, [%[b], #112]\n\t" + "ldr r9, [%[b], #116]\n\t" + "ldr r10, [%[b], #120]\n\t" + "ldr r14, [%[b], #124]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #112]\n\t" + "str r5, [%[r], #116]\n\t" + "str r6, [%[r], #120]\n\t" + "str r7, [%[r], #124]\n\t" + "ldr r4, [%[a], #128]\n\t" + "ldr r5, [%[a], #132]\n\t" + "ldr r6, [%[a], #136]\n\t" + "ldr r7, [%[a], #140]\n\t" + "ldr r8, [%[b], #128]\n\t" + "ldr r9, [%[b], #132]\n\t" + "ldr r10, [%[b], #136]\n\t" + "ldr r14, [%[b], #140]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #128]\n\t" + "str r5, [%[r], #132]\n\t" + "str r6, [%[r], #136]\n\t" + "str r7, [%[r], #140]\n\t" + "ldr r4, [%[a], #144]\n\t" + "ldr r5, [%[a], #148]\n\t" + "ldr r6, [%[a], #152]\n\t" + "ldr r7, [%[a], #156]\n\t" + "ldr r8, [%[b], #144]\n\t" + "ldr r9, [%[b], #148]\n\t" + "ldr r10, [%[b], #152]\n\t" + "ldr r14, [%[b], #156]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #144]\n\t" + "str r5, [%[r], #148]\n\t" + "str r6, [%[r], #152]\n\t" + "str r7, [%[r], #156]\n\t" + "ldr r4, [%[a], #160]\n\t" + "ldr r5, [%[a], #164]\n\t" + "ldr r6, [%[a], #168]\n\t" + "ldr r7, [%[a], #172]\n\t" + "ldr r8, [%[b], #160]\n\t" + "ldr r9, [%[b], #164]\n\t" + "ldr r10, [%[b], #168]\n\t" + "ldr r14, [%[b], #172]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #160]\n\t" + "str r5, [%[r], #164]\n\t" + "str r6, [%[r], #168]\n\t" + "str r7, [%[r], #172]\n\t" + "ldr r4, [%[a], #176]\n\t" + "ldr r5, [%[a], #180]\n\t" + "ldr r6, [%[a], #184]\n\t" + "ldr r7, [%[a], #188]\n\t" + "ldr r8, [%[b], #176]\n\t" + "ldr r9, [%[b], #180]\n\t" + "ldr r10, [%[b], #184]\n\t" + "ldr r14, [%[b], #188]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #176]\n\t" + "str r5, [%[r], #180]\n\t" + "str r6, [%[r], #184]\n\t" + "str r7, [%[r], #188]\n\t" + "ldr r4, [%[a], #192]\n\t" + "ldr r5, [%[a], #196]\n\t" + "ldr r6, [%[a], #200]\n\t" + "ldr r7, [%[a], #204]\n\t" + "ldr r8, [%[b], #192]\n\t" + "ldr r9, [%[b], #196]\n\t" + "ldr r10, [%[b], #200]\n\t" + "ldr r14, [%[b], #204]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #192]\n\t" + "str r5, [%[r], #196]\n\t" + "str r6, [%[r], #200]\n\t" + "str r7, [%[r], #204]\n\t" + "ldr r4, [%[a], #208]\n\t" + "ldr r5, [%[a], #212]\n\t" + "ldr r6, [%[a], #216]\n\t" + "ldr r7, [%[a], #220]\n\t" + "ldr r8, [%[b], #208]\n\t" + "ldr r9, [%[b], #212]\n\t" + "ldr r10, [%[b], #216]\n\t" + "ldr r14, [%[b], #220]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #208]\n\t" + "str r5, [%[r], #212]\n\t" + "str r6, [%[r], #216]\n\t" + "str r7, [%[r], #220]\n\t" + "ldr r4, [%[a], #224]\n\t" + "ldr r5, [%[a], #228]\n\t" + "ldr r6, [%[a], #232]\n\t" + "ldr r7, [%[a], #236]\n\t" + "ldr r8, [%[b], #224]\n\t" + "ldr r9, [%[b], #228]\n\t" + "ldr r10, [%[b], #232]\n\t" + "ldr r14, [%[b], #236]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #224]\n\t" + "str r5, [%[r], #228]\n\t" + "str r6, [%[r], #232]\n\t" + "str r7, [%[r], #236]\n\t" + "ldr r4, [%[a], #240]\n\t" + "ldr r5, [%[a], #244]\n\t" + "ldr r6, [%[a], #248]\n\t" + "ldr r7, [%[a], #252]\n\t" + "ldr r8, [%[b], #240]\n\t" + "ldr r9, [%[b], #244]\n\t" + "ldr r10, [%[b], #248]\n\t" + "ldr r14, [%[b], #252]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #240]\n\t" + "str r5, [%[r], #244]\n\t" + "str r6, [%[r], #248]\n\t" + "str r7, [%[r], #252]\n\t" + "ldr r4, [%[a], #256]\n\t" + "ldr r5, [%[a], #260]\n\t" + "ldr r6, [%[a], #264]\n\t" + "ldr r7, [%[a], #268]\n\t" + "ldr r8, [%[b], #256]\n\t" + "ldr r9, [%[b], #260]\n\t" + "ldr r10, [%[b], #264]\n\t" + "ldr r14, [%[b], #268]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #256]\n\t" + "str r5, [%[r], #260]\n\t" + "str r6, [%[r], #264]\n\t" + "str r7, [%[r], #268]\n\t" + "ldr r4, [%[a], #272]\n\t" + "ldr r5, [%[a], #276]\n\t" + "ldr r6, [%[a], #280]\n\t" + "ldr r7, [%[a], #284]\n\t" + "ldr r8, [%[b], #272]\n\t" + "ldr r9, [%[b], #276]\n\t" + "ldr r10, [%[b], #280]\n\t" + "ldr r14, [%[b], #284]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #272]\n\t" + "str r5, [%[r], #276]\n\t" + "str r6, [%[r], #280]\n\t" + "str r7, [%[r], #284]\n\t" + "ldr r4, [%[a], #288]\n\t" + "ldr r5, [%[a], #292]\n\t" + "ldr r6, [%[a], #296]\n\t" + "ldr r7, [%[a], #300]\n\t" + "ldr r8, [%[b], #288]\n\t" + "ldr r9, [%[b], #292]\n\t" + "ldr r10, [%[b], #296]\n\t" + "ldr r14, [%[b], #300]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #288]\n\t" + "str r5, [%[r], #292]\n\t" + "str r6, [%[r], #296]\n\t" + "str r7, [%[r], #300]\n\t" + "ldr r4, [%[a], #304]\n\t" + "ldr r5, [%[a], #308]\n\t" + "ldr r6, [%[a], #312]\n\t" + "ldr r7, [%[a], #316]\n\t" + "ldr r8, [%[b], #304]\n\t" + "ldr r9, [%[b], #308]\n\t" + "ldr r10, [%[b], #312]\n\t" + "ldr r14, [%[b], #316]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #304]\n\t" + "str r5, [%[r], #308]\n\t" + "str r6, [%[r], #312]\n\t" + "str r7, [%[r], #316]\n\t" + "ldr r4, [%[a], #320]\n\t" + "ldr r5, [%[a], #324]\n\t" + "ldr r6, [%[a], #328]\n\t" + "ldr r7, [%[a], #332]\n\t" + "ldr r8, [%[b], #320]\n\t" + "ldr r9, [%[b], #324]\n\t" + "ldr r10, [%[b], #328]\n\t" + "ldr r14, [%[b], #332]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #320]\n\t" + "str r5, [%[r], #324]\n\t" + "str r6, [%[r], #328]\n\t" + "str r7, [%[r], #332]\n\t" + "ldr r4, [%[a], #336]\n\t" + "ldr r5, [%[a], #340]\n\t" + "ldr r6, [%[a], #344]\n\t" + "ldr r7, [%[a], #348]\n\t" + "ldr r8, [%[b], #336]\n\t" + "ldr r9, [%[b], #340]\n\t" + "ldr r10, [%[b], #344]\n\t" + "ldr r14, [%[b], #348]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #336]\n\t" + "str r5, [%[r], #340]\n\t" + "str r6, [%[r], #344]\n\t" + "str r7, [%[r], #348]\n\t" + "ldr r4, [%[a], #352]\n\t" + "ldr r5, [%[a], #356]\n\t" + "ldr r6, [%[a], #360]\n\t" + "ldr r7, [%[a], #364]\n\t" + "ldr r8, [%[b], #352]\n\t" + "ldr r9, [%[b], #356]\n\t" + "ldr r10, [%[b], #360]\n\t" + "ldr r14, [%[b], #364]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #352]\n\t" + "str r5, [%[r], #356]\n\t" + "str r6, [%[r], #360]\n\t" + "str r7, [%[r], #364]\n\t" + "ldr r4, [%[a], #368]\n\t" + "ldr r5, [%[a], #372]\n\t" + "ldr r6, [%[a], #376]\n\t" + "ldr r7, [%[a], #380]\n\t" + "ldr r8, [%[b], #368]\n\t" + "ldr r9, [%[b], #372]\n\t" + "ldr r10, [%[b], #376]\n\t" + "ldr r14, [%[b], #380]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #368]\n\t" + "str r5, [%[r], #372]\n\t" + "str r6, [%[r], #376]\n\t" + "str r7, [%[r], #380]\n\t" + "adc %[c], r12, r12\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); + + return c; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<48; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 48; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[96]; + sp_digit a1[48]; + sp_digit b1[48]; + sp_digit z2[96]; + sp_digit u, ca, cb; + + ca = sp_3072_add_48(a1, a, &a[48]); + cb = sp_3072_add_48(b1, b, &b[48]); + u = ca & cb; + sp_3072_mul_48(z1, a1, b1); + sp_3072_mul_48(z2, &a[48], &b[48]); + sp_3072_mul_48(z0, a, b); + sp_3072_mask_48(r + 96, a1, 0 - cb); + sp_3072_mask_48(b1, b1, 0 - ca); + u += sp_3072_add_48(r + 96, r + 96, b1); + u += sp_3072_sub_in_place_96(z1, z2); + u += sp_3072_sub_in_place_96(z1, z0); + u += sp_3072_add_96(r + 48, r + 48, z1); + r[144] = u; + XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1)); + (void)sp_3072_add_96(r + 96, r + 96, z2); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z2[96]; + sp_digit z1[96]; + sp_digit a1[48]; + sp_digit u; + + u = sp_3072_add_48(a1, a, &a[48]); + sp_3072_sqr_48(z1, a1); + sp_3072_sqr_48(z2, &a[48]); + sp_3072_sqr_48(z0, a); + sp_3072_mask_48(r + 96, a1, 0 - u); + u += sp_3072_add_48(r + 96, r + 96, r + 96); + u += sp_3072_sub_in_place_96(z1, z2); + u += sp_3072_sub_in_place_96(z1, z0); + u += sp_3072_add_96(r + 48, r + 48, z1); + r[144] = u; + XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1)); + (void)sp_3072_add_96(r + 96, r + 96, z2); +} + +#endif /* !WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "add r12, %[a], #384\n\t" + "\n1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldr r4, [%[a]], #4\n\t" + "ldr r5, [%[a]], #4\n\t" + "ldr r6, [%[a]], #4\n\t" + "ldr r7, [%[a]], #4\n\t" + "ldr r8, [%[b]], #4\n\t" + "ldr r9, [%[b]], #4\n\t" + "ldr r10, [%[b]], #4\n\t" + "ldr r14, [%[b]], #4\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r]], #4\n\t" + "str r5, [%[r]], #4\n\t" + "str r6, [%[r]], #4\n\t" + "str r7, [%[r]], #4\n\t" + "mov r4, #0\n\t" + "adc %[c], r4, #0\n\t" + "cmp %[a], r12\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into a. (a -= b) + * + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r14, #0\n\t" + "add r12, %[a], #384\n\t" + "\n1:\n\t" + "subs %[c], r14, %[c]\n\t" + "ldr r3, [%[a]]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[a], #8]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b]], #4\n\t" + "ldr r8, [%[b]], #4\n\t" + "ldr r9, [%[b]], #4\n\t" + "ldr r10, [%[b]], #4\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "str r3, [%[a]], #4\n\t" + "str r4, [%[a]], #4\n\t" + "str r5, [%[a]], #4\n\t" + "str r6, [%[a]], #4\n\t" + "sbc %[c], r14, r14\n\t" + "cmp %[a], r12\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + __asm__ __volatile__ ( + "sub sp, sp, #768\n\t" + "mov r5, #0\n\t" + "mov r6, #0\n\t" + "mov r7, #0\n\t" + "mov r8, #0\n\t" + "\n1:\n\t" + "subs r3, r5, #380\n\t" + "it cc\n\t" + "movcc r3, #0\n\t" + "sub r4, r5, r3\n\t" + "\n2:\n\t" + "ldr r14, [%[a], r3]\n\t" + "ldr r12, [%[b], r4]\n\t" + "umull r9, r10, r14, r12\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "add r3, r3, #4\n\t" + "sub r4, r4, #4\n\t" + "cmp r3, #384\n\t" + "beq 3f\n\t" + "cmp r3, r5\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "str r6, [sp, r5]\n\t" + "mov r6, r7\n\t" + "mov r7, r8\n\t" + "mov r8, #0\n\t" + "add r5, r5, #4\n\t" + "cmp r5, #760\n\t" + "ble 1b\n\t" + "str r6, [sp, r5]\n\t" + "\n4:\n\t" + "ldr r6, [sp, #0]\n\t" + "ldr r7, [sp, #4]\n\t" + "ldr r8, [sp, #8]\n\t" + "ldr r3, [sp, #12]\n\t" + "str r6, [%[r], #0]\n\t" + "str r7, [%[r], #4]\n\t" + "str r8, [%[r], #8]\n\t" + "str r3, [%[r], #12]\n\t" + "add sp, sp, #16\n\t" + "add %[r], %[r], #16\n\t" + "subs r5, r5, #16\n\t" + "bgt 4b\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "sub sp, sp, #768\n\t" + "mov r12, #0\n\t" + "mov r6, #0\n\t" + "mov r7, #0\n\t" + "mov r8, #0\n\t" + "mov r5, #0\n\t" + "\n1:\n\t" + "subs r3, r5, #380\n\t" + "it cc\n\t" + "movcc r3, r12\n\t" + "sub r4, r5, r3\n\t" + "\n2:\n\t" + "cmp r4, r3\n\t" + "beq 4f\n\t" + "ldr r14, [%[a], r3]\n\t" + "ldr r9, [%[a], r4]\n\t" + "umull r9, r10, r14, r9\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, r12\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, r12\n\t" + "bal 5f\n\t" + "\n4:\n\t" + "ldr r14, [%[a], r3]\n\t" + "umull r9, r10, r14, r14\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, r12\n\t" + "\n5:\n\t" + "add r3, r3, #4\n\t" + "sub r4, r4, #4\n\t" + "cmp r3, #384\n\t" + "beq 3f\n\t" + "cmp r3, r4\n\t" + "bgt 3f\n\t" + "cmp r3, r5\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "str r6, [sp, r5]\n\t" + "mov r6, r7\n\t" + "mov r7, r8\n\t" + "mov r8, #0\n\t" + "add r5, r5, #4\n\t" + "cmp r5, #760\n\t" + "ble 1b\n\t" + "str r6, [sp, r5]\n\t" + "\n4:\n\t" + "ldr r6, [sp, #0]\n\t" + "ldr r7, [sp, #4]\n\t" + "ldr r8, [sp, #8]\n\t" + "ldr r3, [sp, #12]\n\t" + "str r6, [%[r], #0]\n\t" + "str r7, [%[r], #4]\n\t" + "str r8, [%[r], #8]\n\t" + "str r3, [%[r], #12]\n\t" + "add sp, sp, #16\n\t" + "add %[r], %[r], #16\n\t" + "subs r5, r5, #16\n\t" + "bgt 4b\n\t" + : [r] "+r" (r) + : [a] "r" (a) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +#ifdef WOLFSSL_SP_SMALL +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m) +{ + int i; + + for (i=0; i<48; i++) { + r[i] = a[i] & m; + } +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "add r12, %[a], #192\n\t" + "\n1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldr r4, [%[a]], #4\n\t" + "ldr r5, [%[a]], #4\n\t" + "ldr r6, [%[a]], #4\n\t" + "ldr r7, [%[a]], #4\n\t" + "ldr r8, [%[b]], #4\n\t" + "ldr r9, [%[b]], #4\n\t" + "ldr r10, [%[b]], #4\n\t" + "ldr r14, [%[b]], #4\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r]], #4\n\t" + "str r5, [%[r]], #4\n\t" + "str r6, [%[r]], #4\n\t" + "str r7, [%[r]], #4\n\t" + "mov r4, #0\n\t" + "adc %[c], r4, #0\n\t" + "cmp %[a], r12\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into a. (a -= b) + * + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r14, #0\n\t" + "add r12, %[a], #192\n\t" + "\n1:\n\t" + "subs %[c], r14, %[c]\n\t" + "ldr r3, [%[a]]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[a], #8]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b]], #4\n\t" + "ldr r8, [%[b]], #4\n\t" + "ldr r9, [%[b]], #4\n\t" + "ldr r10, [%[b]], #4\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "str r3, [%[a]], #4\n\t" + "str r4, [%[a]], #4\n\t" + "str r5, [%[a]], #4\n\t" + "str r6, [%[a]], #4\n\t" + "sbc %[c], r14, r14\n\t" + "cmp %[a], r12\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + __asm__ __volatile__ ( + "sub sp, sp, #384\n\t" + "mov r5, #0\n\t" + "mov r6, #0\n\t" + "mov r7, #0\n\t" + "mov r8, #0\n\t" + "\n1:\n\t" + "subs r3, r5, #188\n\t" + "it cc\n\t" + "movcc r3, #0\n\t" + "sub r4, r5, r3\n\t" + "\n2:\n\t" + "ldr r14, [%[a], r3]\n\t" + "ldr r12, [%[b], r4]\n\t" + "umull r9, r10, r14, r12\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "add r3, r3, #4\n\t" + "sub r4, r4, #4\n\t" + "cmp r3, #192\n\t" + "beq 3f\n\t" + "cmp r3, r5\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "str r6, [sp, r5]\n\t" + "mov r6, r7\n\t" + "mov r7, r8\n\t" + "mov r8, #0\n\t" + "add r5, r5, #4\n\t" + "cmp r5, #376\n\t" + "ble 1b\n\t" + "str r6, [sp, r5]\n\t" + "\n4:\n\t" + "ldr r6, [sp, #0]\n\t" + "ldr r7, [sp, #4]\n\t" + "ldr r8, [sp, #8]\n\t" + "ldr r3, [sp, #12]\n\t" + "str r6, [%[r], #0]\n\t" + "str r7, [%[r], #4]\n\t" + "str r8, [%[r], #8]\n\t" + "str r3, [%[r], #12]\n\t" + "add sp, sp, #16\n\t" + "add %[r], %[r], #16\n\t" + "subs r5, r5, #16\n\t" + "bgt 4b\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "sub sp, sp, #384\n\t" + "mov r12, #0\n\t" + "mov r6, #0\n\t" + "mov r7, #0\n\t" + "mov r8, #0\n\t" + "mov r5, #0\n\t" + "\n1:\n\t" + "subs r3, r5, #188\n\t" + "it cc\n\t" + "movcc r3, r12\n\t" + "sub r4, r5, r3\n\t" + "\n2:\n\t" + "cmp r4, r3\n\t" + "beq 4f\n\t" + "ldr r14, [%[a], r3]\n\t" + "ldr r9, [%[a], r4]\n\t" + "umull r9, r10, r14, r9\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, r12\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, r12\n\t" + "bal 5f\n\t" + "\n4:\n\t" + "ldr r14, [%[a], r3]\n\t" + "umull r9, r10, r14, r14\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, r12\n\t" + "\n5:\n\t" + "add r3, r3, #4\n\t" + "sub r4, r4, #4\n\t" + "cmp r3, #192\n\t" + "beq 3f\n\t" + "cmp r3, r4\n\t" + "bgt 3f\n\t" + "cmp r3, r5\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "str r6, [sp, r5]\n\t" + "mov r6, r7\n\t" + "mov r7, r8\n\t" + "mov r8, #0\n\t" + "add r5, r5, #4\n\t" + "cmp r5, #376\n\t" + "ble 1b\n\t" + "str r6, [sp, r5]\n\t" + "\n4:\n\t" + "ldr r6, [sp, #0]\n\t" + "ldr r7, [sp, #4]\n\t" + "ldr r8, [sp, #8]\n\t" + "ldr r3, [sp, #12]\n\t" + "str r6, [%[r], #0]\n\t" + "str r7, [%[r], #4]\n\t" + "str r8, [%[r], #8]\n\t" + "str r3, [%[r], #12]\n\t" + "add sp, sp, #16\n\t" + "add %[r], %[r], #16\n\t" + "subs r5, r5, #16\n\t" + "bgt 4b\n\t" + : [r] "+r" (r) + : [a] "r" (a) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +/* Caclulate the bottom digit of -1/a mod 2^n. + * + * a A single precision number. + * rho Bottom word of inverse. + */ +static void sp_3072_mont_setup(const sp_digit* a, sp_digit* rho) +{ + sp_digit x, b; + + b = a[0]; + x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - b * x; /* here x*a==1 mod 2**8 */ + x *= 2 - b * x; /* here x*a==1 mod 2**16 */ + x *= 2 - b * x; /* here x*a==1 mod 2**32 */ + + /* rho = -1/m mod b */ + *rho = -x; +} + +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov r10, #0\n\t" + "# A[0] * B\n\t" + "ldr r8, [%[a]]\n\t" + "umull r5, r3, %[b], r8\n\t" + "mov r4, #0\n\t" + "str r5, [%[r]]\n\t" + "mov r5, #0\n\t" + "mov r9, #4\n\t" + "1:\n\t" + "ldr r8, [%[a], r9]\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], r9]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "add r9, r9, #4\n\t" + "cmp r9, #384\n\t" + "blt 1b\n\t" + "str r3, [%[r], #384]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); +#else + __asm__ __volatile__ ( + "mov r10, #0\n\t" + "# A[0] * B\n\t" + "ldr r8, [%[a]]\n\t" + "umull r3, r4, %[b], r8\n\t" + "mov r5, #0\n\t" + "str r3, [%[r]]\n\t" + "# A[1] * B\n\t" + "ldr r8, [%[a], #4]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #4]\n\t" + "# A[2] * B\n\t" + "ldr r8, [%[a], #8]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #8]\n\t" + "# A[3] * B\n\t" + "ldr r8, [%[a], #12]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #12]\n\t" + "# A[4] * B\n\t" + "ldr r8, [%[a], #16]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #16]\n\t" + "# A[5] * B\n\t" + "ldr r8, [%[a], #20]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #20]\n\t" + "# A[6] * B\n\t" + "ldr r8, [%[a], #24]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #24]\n\t" + "# A[7] * B\n\t" + "ldr r8, [%[a], #28]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #28]\n\t" + "# A[8] * B\n\t" + "ldr r8, [%[a], #32]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #32]\n\t" + "# A[9] * B\n\t" + "ldr r8, [%[a], #36]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #36]\n\t" + "# A[10] * B\n\t" + "ldr r8, [%[a], #40]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #40]\n\t" + "# A[11] * B\n\t" + "ldr r8, [%[a], #44]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #44]\n\t" + "# A[12] * B\n\t" + "ldr r8, [%[a], #48]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #48]\n\t" + "# A[13] * B\n\t" + "ldr r8, [%[a], #52]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #52]\n\t" + "# A[14] * B\n\t" + "ldr r8, [%[a], #56]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #56]\n\t" + "# A[15] * B\n\t" + "ldr r8, [%[a], #60]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #60]\n\t" + "# A[16] * B\n\t" + "ldr r8, [%[a], #64]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #64]\n\t" + "# A[17] * B\n\t" + "ldr r8, [%[a], #68]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #68]\n\t" + "# A[18] * B\n\t" + "ldr r8, [%[a], #72]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #72]\n\t" + "# A[19] * B\n\t" + "ldr r8, [%[a], #76]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #76]\n\t" + "# A[20] * B\n\t" + "ldr r8, [%[a], #80]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #80]\n\t" + "# A[21] * B\n\t" + "ldr r8, [%[a], #84]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #84]\n\t" + "# A[22] * B\n\t" + "ldr r8, [%[a], #88]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #88]\n\t" + "# A[23] * B\n\t" + "ldr r8, [%[a], #92]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #92]\n\t" + "# A[24] * B\n\t" + "ldr r8, [%[a], #96]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #96]\n\t" + "# A[25] * B\n\t" + "ldr r8, [%[a], #100]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #100]\n\t" + "# A[26] * B\n\t" + "ldr r8, [%[a], #104]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #104]\n\t" + "# A[27] * B\n\t" + "ldr r8, [%[a], #108]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #108]\n\t" + "# A[28] * B\n\t" + "ldr r8, [%[a], #112]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #112]\n\t" + "# A[29] * B\n\t" + "ldr r8, [%[a], #116]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #116]\n\t" + "# A[30] * B\n\t" + "ldr r8, [%[a], #120]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #120]\n\t" + "# A[31] * B\n\t" + "ldr r8, [%[a], #124]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #124]\n\t" + "# A[32] * B\n\t" + "ldr r8, [%[a], #128]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #128]\n\t" + "# A[33] * B\n\t" + "ldr r8, [%[a], #132]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #132]\n\t" + "# A[34] * B\n\t" + "ldr r8, [%[a], #136]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #136]\n\t" + "# A[35] * B\n\t" + "ldr r8, [%[a], #140]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #140]\n\t" + "# A[36] * B\n\t" + "ldr r8, [%[a], #144]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #144]\n\t" + "# A[37] * B\n\t" + "ldr r8, [%[a], #148]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #148]\n\t" + "# A[38] * B\n\t" + "ldr r8, [%[a], #152]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #152]\n\t" + "# A[39] * B\n\t" + "ldr r8, [%[a], #156]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #156]\n\t" + "# A[40] * B\n\t" + "ldr r8, [%[a], #160]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #160]\n\t" + "# A[41] * B\n\t" + "ldr r8, [%[a], #164]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #164]\n\t" + "# A[42] * B\n\t" + "ldr r8, [%[a], #168]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #168]\n\t" + "# A[43] * B\n\t" + "ldr r8, [%[a], #172]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #172]\n\t" + "# A[44] * B\n\t" + "ldr r8, [%[a], #176]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #176]\n\t" + "# A[45] * B\n\t" + "ldr r8, [%[a], #180]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #180]\n\t" + "# A[46] * B\n\t" + "ldr r8, [%[a], #184]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #184]\n\t" + "# A[47] * B\n\t" + "ldr r8, [%[a], #188]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #188]\n\t" + "# A[48] * B\n\t" + "ldr r8, [%[a], #192]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #192]\n\t" + "# A[49] * B\n\t" + "ldr r8, [%[a], #196]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #196]\n\t" + "# A[50] * B\n\t" + "ldr r8, [%[a], #200]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #200]\n\t" + "# A[51] * B\n\t" + "ldr r8, [%[a], #204]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #204]\n\t" + "# A[52] * B\n\t" + "ldr r8, [%[a], #208]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #208]\n\t" + "# A[53] * B\n\t" + "ldr r8, [%[a], #212]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #212]\n\t" + "# A[54] * B\n\t" + "ldr r8, [%[a], #216]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #216]\n\t" + "# A[55] * B\n\t" + "ldr r8, [%[a], #220]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #220]\n\t" + "# A[56] * B\n\t" + "ldr r8, [%[a], #224]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #224]\n\t" + "# A[57] * B\n\t" + "ldr r8, [%[a], #228]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #228]\n\t" + "# A[58] * B\n\t" + "ldr r8, [%[a], #232]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #232]\n\t" + "# A[59] * B\n\t" + "ldr r8, [%[a], #236]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #236]\n\t" + "# A[60] * B\n\t" + "ldr r8, [%[a], #240]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #240]\n\t" + "# A[61] * B\n\t" + "ldr r8, [%[a], #244]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #244]\n\t" + "# A[62] * B\n\t" + "ldr r8, [%[a], #248]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #248]\n\t" + "# A[63] * B\n\t" + "ldr r8, [%[a], #252]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #252]\n\t" + "# A[64] * B\n\t" + "ldr r8, [%[a], #256]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #256]\n\t" + "# A[65] * B\n\t" + "ldr r8, [%[a], #260]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #260]\n\t" + "# A[66] * B\n\t" + "ldr r8, [%[a], #264]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #264]\n\t" + "# A[67] * B\n\t" + "ldr r8, [%[a], #268]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #268]\n\t" + "# A[68] * B\n\t" + "ldr r8, [%[a], #272]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #272]\n\t" + "# A[69] * B\n\t" + "ldr r8, [%[a], #276]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #276]\n\t" + "# A[70] * B\n\t" + "ldr r8, [%[a], #280]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #280]\n\t" + "# A[71] * B\n\t" + "ldr r8, [%[a], #284]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #284]\n\t" + "# A[72] * B\n\t" + "ldr r8, [%[a], #288]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #288]\n\t" + "# A[73] * B\n\t" + "ldr r8, [%[a], #292]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #292]\n\t" + "# A[74] * B\n\t" + "ldr r8, [%[a], #296]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #296]\n\t" + "# A[75] * B\n\t" + "ldr r8, [%[a], #300]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #300]\n\t" + "# A[76] * B\n\t" + "ldr r8, [%[a], #304]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #304]\n\t" + "# A[77] * B\n\t" + "ldr r8, [%[a], #308]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #308]\n\t" + "# A[78] * B\n\t" + "ldr r8, [%[a], #312]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #312]\n\t" + "# A[79] * B\n\t" + "ldr r8, [%[a], #316]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #316]\n\t" + "# A[80] * B\n\t" + "ldr r8, [%[a], #320]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #320]\n\t" + "# A[81] * B\n\t" + "ldr r8, [%[a], #324]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #324]\n\t" + "# A[82] * B\n\t" + "ldr r8, [%[a], #328]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #328]\n\t" + "# A[83] * B\n\t" + "ldr r8, [%[a], #332]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #332]\n\t" + "# A[84] * B\n\t" + "ldr r8, [%[a], #336]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #336]\n\t" + "# A[85] * B\n\t" + "ldr r8, [%[a], #340]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #340]\n\t" + "# A[86] * B\n\t" + "ldr r8, [%[a], #344]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #344]\n\t" + "# A[87] * B\n\t" + "ldr r8, [%[a], #348]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #348]\n\t" + "# A[88] * B\n\t" + "ldr r8, [%[a], #352]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #352]\n\t" + "# A[89] * B\n\t" + "ldr r8, [%[a], #356]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #356]\n\t" + "# A[90] * B\n\t" + "ldr r8, [%[a], #360]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #360]\n\t" + "# A[91] * B\n\t" + "ldr r8, [%[a], #364]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #364]\n\t" + "# A[92] * B\n\t" + "ldr r8, [%[a], #368]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #368]\n\t" + "# A[93] * B\n\t" + "ldr r8, [%[a], #372]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #372]\n\t" + "# A[94] * B\n\t" + "ldr r8, [%[a], #376]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #376]\n\t" + "# A[95] * B\n\t" + "ldr r8, [%[a], #380]\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "str r5, [%[r], #380]\n\t" + "str r3, [%[r], #384]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); +#endif +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 3072 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_3072_mont_norm_48(sp_digit* r, const sp_digit* m) +{ + XMEMSET(r, 0, sizeof(sp_digit) * 48); + + /* r = 2^n mod m */ + sp_3072_sub_in_place_48(r, m); +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + sp_digit c = 0; + +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov r9, #0\n\t" + "mov r8, #0\n\t" + "1:\n\t" + "subs %[c], r9, %[c]\n\t" + "ldr r4, [%[a], r8]\n\t" + "ldr r5, [%[b], r8]\n\t" + "and r5, r5, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbc %[c], r9, r9\n\t" + "str r4, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, #192\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + ); +#else + __asm__ __volatile__ ( + + "mov r9, #0\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r6, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "subs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #0]\n\t" + "str r6, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r7, [%[b], #12]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r6, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #16]\n\t" + "str r6, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r6, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r7, [%[b], #28]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r6, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r7, [%[b], #36]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #32]\n\t" + "str r6, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r6, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r7, [%[b], #44]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #40]\n\t" + "str r6, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r6, [%[a], #52]\n\t" + "ldr r5, [%[b], #48]\n\t" + "ldr r7, [%[b], #52]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #48]\n\t" + "str r6, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r6, [%[a], #60]\n\t" + "ldr r5, [%[b], #56]\n\t" + "ldr r7, [%[b], #60]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #56]\n\t" + "str r6, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r6, [%[a], #68]\n\t" + "ldr r5, [%[b], #64]\n\t" + "ldr r7, [%[b], #68]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #64]\n\t" + "str r6, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r6, [%[a], #76]\n\t" + "ldr r5, [%[b], #72]\n\t" + "ldr r7, [%[b], #76]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #72]\n\t" + "str r6, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r6, [%[a], #84]\n\t" + "ldr r5, [%[b], #80]\n\t" + "ldr r7, [%[b], #84]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #80]\n\t" + "str r6, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r6, [%[a], #92]\n\t" + "ldr r5, [%[b], #88]\n\t" + "ldr r7, [%[b], #92]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #88]\n\t" + "str r6, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r6, [%[a], #100]\n\t" + "ldr r5, [%[b], #96]\n\t" + "ldr r7, [%[b], #100]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #96]\n\t" + "str r6, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r6, [%[a], #108]\n\t" + "ldr r5, [%[b], #104]\n\t" + "ldr r7, [%[b], #108]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #104]\n\t" + "str r6, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r6, [%[a], #116]\n\t" + "ldr r5, [%[b], #112]\n\t" + "ldr r7, [%[b], #116]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #112]\n\t" + "str r6, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r6, [%[a], #124]\n\t" + "ldr r5, [%[b], #120]\n\t" + "ldr r7, [%[b], #124]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #120]\n\t" + "str r6, [%[r], #124]\n\t" + "ldr r4, [%[a], #128]\n\t" + "ldr r6, [%[a], #132]\n\t" + "ldr r5, [%[b], #128]\n\t" + "ldr r7, [%[b], #132]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #128]\n\t" + "str r6, [%[r], #132]\n\t" + "ldr r4, [%[a], #136]\n\t" + "ldr r6, [%[a], #140]\n\t" + "ldr r5, [%[b], #136]\n\t" + "ldr r7, [%[b], #140]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #136]\n\t" + "str r6, [%[r], #140]\n\t" + "ldr r4, [%[a], #144]\n\t" + "ldr r6, [%[a], #148]\n\t" + "ldr r5, [%[b], #144]\n\t" + "ldr r7, [%[b], #148]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #144]\n\t" + "str r6, [%[r], #148]\n\t" + "ldr r4, [%[a], #152]\n\t" + "ldr r6, [%[a], #156]\n\t" + "ldr r5, [%[b], #152]\n\t" + "ldr r7, [%[b], #156]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #152]\n\t" + "str r6, [%[r], #156]\n\t" + "ldr r4, [%[a], #160]\n\t" + "ldr r6, [%[a], #164]\n\t" + "ldr r5, [%[b], #160]\n\t" + "ldr r7, [%[b], #164]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #160]\n\t" + "str r6, [%[r], #164]\n\t" + "ldr r4, [%[a], #168]\n\t" + "ldr r6, [%[a], #172]\n\t" + "ldr r5, [%[b], #168]\n\t" + "ldr r7, [%[b], #172]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #168]\n\t" + "str r6, [%[r], #172]\n\t" + "ldr r4, [%[a], #176]\n\t" + "ldr r6, [%[a], #180]\n\t" + "ldr r5, [%[b], #176]\n\t" + "ldr r7, [%[b], #180]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #176]\n\t" + "str r6, [%[r], #180]\n\t" + "ldr r4, [%[a], #184]\n\t" + "ldr r6, [%[a], #188]\n\t" + "ldr r5, [%[b], #184]\n\t" + "ldr r7, [%[b], #188]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #184]\n\t" + "str r6, [%[r], #188]\n\t" + "sbc %[c], r9, r9\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + ); +#endif /* WOLFSSL_SP_SMALL */ + + return c; +} + +/* Reduce the number back to 3072 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_digit ca = 0; + + __asm__ __volatile__ ( + "# i = 0\n\t" + "mov r12, #0\n\t" + "ldr r10, [%[a], #0]\n\t" + "ldr r14, [%[a], #4]\n\t" + "\n1:\n\t" + "# mu = a[i] * mp\n\t" + "mul r8, %[mp], r10\n\t" + "# a[i+0] += m[0] * mu\n\t" + "ldr r7, [%[m], #0]\n\t" + "ldr r9, [%[a], #0]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" + "# a[i+1] += m[1] * mu\n\t" + "ldr r7, [%[m], #4]\n\t" + "ldr r9, [%[a], #4]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r10, r14, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r10, r10, r5\n\t" + "adc r4, r4, #0\n\t" + "# a[i+2] += m[2] * mu\n\t" + "ldr r7, [%[m], #8]\n\t" + "ldr r14, [%[a], #8]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r14, r14, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r14, r14, r4\n\t" + "adc r5, r5, #0\n\t" + "# a[i+3] += m[3] * mu\n\t" + "ldr r7, [%[m], #12]\n\t" + "ldr r9, [%[a], #12]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #12]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+4] += m[4] * mu\n\t" + "ldr r7, [%[m], #16]\n\t" + "ldr r9, [%[a], #16]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+5] += m[5] * mu\n\t" + "ldr r7, [%[m], #20]\n\t" + "ldr r9, [%[a], #20]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #20]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+6] += m[6] * mu\n\t" + "ldr r7, [%[m], #24]\n\t" + "ldr r9, [%[a], #24]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+7] += m[7] * mu\n\t" + "ldr r7, [%[m], #28]\n\t" + "ldr r9, [%[a], #28]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #28]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+8] += m[8] * mu\n\t" + "ldr r7, [%[m], #32]\n\t" + "ldr r9, [%[a], #32]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #32]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+9] += m[9] * mu\n\t" + "ldr r7, [%[m], #36]\n\t" + "ldr r9, [%[a], #36]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #36]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+10] += m[10] * mu\n\t" + "ldr r7, [%[m], #40]\n\t" + "ldr r9, [%[a], #40]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #40]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+11] += m[11] * mu\n\t" + "ldr r7, [%[m], #44]\n\t" + "ldr r9, [%[a], #44]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #44]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+12] += m[12] * mu\n\t" + "ldr r7, [%[m], #48]\n\t" + "ldr r9, [%[a], #48]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #48]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+13] += m[13] * mu\n\t" + "ldr r7, [%[m], #52]\n\t" + "ldr r9, [%[a], #52]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #52]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+14] += m[14] * mu\n\t" + "ldr r7, [%[m], #56]\n\t" + "ldr r9, [%[a], #56]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #56]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+15] += m[15] * mu\n\t" + "ldr r7, [%[m], #60]\n\t" + "ldr r9, [%[a], #60]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #60]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+16] += m[16] * mu\n\t" + "ldr r7, [%[m], #64]\n\t" + "ldr r9, [%[a], #64]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #64]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+17] += m[17] * mu\n\t" + "ldr r7, [%[m], #68]\n\t" + "ldr r9, [%[a], #68]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #68]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+18] += m[18] * mu\n\t" + "ldr r7, [%[m], #72]\n\t" + "ldr r9, [%[a], #72]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #72]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+19] += m[19] * mu\n\t" + "ldr r7, [%[m], #76]\n\t" + "ldr r9, [%[a], #76]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #76]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+20] += m[20] * mu\n\t" + "ldr r7, [%[m], #80]\n\t" + "ldr r9, [%[a], #80]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #80]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+21] += m[21] * mu\n\t" + "ldr r7, [%[m], #84]\n\t" + "ldr r9, [%[a], #84]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #84]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+22] += m[22] * mu\n\t" + "ldr r7, [%[m], #88]\n\t" + "ldr r9, [%[a], #88]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #88]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+23] += m[23] * mu\n\t" + "ldr r7, [%[m], #92]\n\t" + "ldr r9, [%[a], #92]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #92]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+24] += m[24] * mu\n\t" + "ldr r7, [%[m], #96]\n\t" + "ldr r9, [%[a], #96]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #96]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+25] += m[25] * mu\n\t" + "ldr r7, [%[m], #100]\n\t" + "ldr r9, [%[a], #100]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #100]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+26] += m[26] * mu\n\t" + "ldr r7, [%[m], #104]\n\t" + "ldr r9, [%[a], #104]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #104]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+27] += m[27] * mu\n\t" + "ldr r7, [%[m], #108]\n\t" + "ldr r9, [%[a], #108]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #108]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+28] += m[28] * mu\n\t" + "ldr r7, [%[m], #112]\n\t" + "ldr r9, [%[a], #112]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #112]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+29] += m[29] * mu\n\t" + "ldr r7, [%[m], #116]\n\t" + "ldr r9, [%[a], #116]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #116]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+30] += m[30] * mu\n\t" + "ldr r7, [%[m], #120]\n\t" + "ldr r9, [%[a], #120]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #120]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+31] += m[31] * mu\n\t" + "ldr r7, [%[m], #124]\n\t" + "ldr r9, [%[a], #124]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #124]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+32] += m[32] * mu\n\t" + "ldr r7, [%[m], #128]\n\t" + "ldr r9, [%[a], #128]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #128]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+33] += m[33] * mu\n\t" + "ldr r7, [%[m], #132]\n\t" + "ldr r9, [%[a], #132]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #132]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+34] += m[34] * mu\n\t" + "ldr r7, [%[m], #136]\n\t" + "ldr r9, [%[a], #136]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #136]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+35] += m[35] * mu\n\t" + "ldr r7, [%[m], #140]\n\t" + "ldr r9, [%[a], #140]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #140]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+36] += m[36] * mu\n\t" + "ldr r7, [%[m], #144]\n\t" + "ldr r9, [%[a], #144]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #144]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+37] += m[37] * mu\n\t" + "ldr r7, [%[m], #148]\n\t" + "ldr r9, [%[a], #148]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #148]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+38] += m[38] * mu\n\t" + "ldr r7, [%[m], #152]\n\t" + "ldr r9, [%[a], #152]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #152]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+39] += m[39] * mu\n\t" + "ldr r7, [%[m], #156]\n\t" + "ldr r9, [%[a], #156]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #156]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+40] += m[40] * mu\n\t" + "ldr r7, [%[m], #160]\n\t" + "ldr r9, [%[a], #160]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #160]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+41] += m[41] * mu\n\t" + "ldr r7, [%[m], #164]\n\t" + "ldr r9, [%[a], #164]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #164]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+42] += m[42] * mu\n\t" + "ldr r7, [%[m], #168]\n\t" + "ldr r9, [%[a], #168]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #168]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+43] += m[43] * mu\n\t" + "ldr r7, [%[m], #172]\n\t" + "ldr r9, [%[a], #172]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #172]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+44] += m[44] * mu\n\t" + "ldr r7, [%[m], #176]\n\t" + "ldr r9, [%[a], #176]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #176]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+45] += m[45] * mu\n\t" + "ldr r7, [%[m], #180]\n\t" + "ldr r9, [%[a], #180]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #180]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+46] += m[46] * mu\n\t" + "ldr r7, [%[m], #184]\n\t" + "ldr r9, [%[a], #184]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #184]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+47] += m[47] * mu\n\t" + "ldr r7, [%[m], #188]\n\t" + "ldr r9, [%[a], #188]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r5, r5, r6\n\t" + "adcs r7, r7, %[ca]\n\t" + "mov %[ca], #0\n\t" + "adc %[ca], %[ca], %[ca]\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #188]\n\t" + "ldr r9, [%[a], #192]\n\t" + "adcs r9, r9, r7\n\t" + "str r9, [%[a], #192]\n\t" + "adc %[ca], %[ca], #0\n\t" + "# i += 1\n\t" + "add %[a], %[a], #4\n\t" + "add r12, r12, #4\n\t" + "cmp r12, #192\n\t" + "blt 1b\n\t" + "str r10, [%[a], #0]\n\t" + "str r14, [%[a], #4]\n\t" + : [ca] "+r" (ca), [a] "+r" (a) + : [m] "r" (m), [mp] "r" (mp) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); + + sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - ca); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_3072_mont_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_3072_mul_48(r, a, b); + sp_3072_mont_reduce_48(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_3072_mont_sqr_48(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_3072_sqr_48(r, a); + sp_3072_mont_reduce_48(r, m, mp); +} + +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov r10, #0\n\t" + "# A[0] * B\n\t" + "ldr r8, [%[a]]\n\t" + "umull r5, r3, %[b], r8\n\t" + "mov r4, #0\n\t" + "str r5, [%[r]]\n\t" + "mov r5, #0\n\t" + "mov r9, #4\n\t" + "1:\n\t" + "ldr r8, [%[a], r9]\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], r9]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "add r9, r9, #4\n\t" + "cmp r9, #192\n\t" + "blt 1b\n\t" + "str r3, [%[r], #192]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); +#else + __asm__ __volatile__ ( + "mov r10, #0\n\t" + "# A[0] * B\n\t" + "ldr r8, [%[a]]\n\t" + "umull r3, r4, %[b], r8\n\t" + "mov r5, #0\n\t" + "str r3, [%[r]]\n\t" + "# A[1] * B\n\t" + "ldr r8, [%[a], #4]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #4]\n\t" + "# A[2] * B\n\t" + "ldr r8, [%[a], #8]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #8]\n\t" + "# A[3] * B\n\t" + "ldr r8, [%[a], #12]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #12]\n\t" + "# A[4] * B\n\t" + "ldr r8, [%[a], #16]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #16]\n\t" + "# A[5] * B\n\t" + "ldr r8, [%[a], #20]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #20]\n\t" + "# A[6] * B\n\t" + "ldr r8, [%[a], #24]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #24]\n\t" + "# A[7] * B\n\t" + "ldr r8, [%[a], #28]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #28]\n\t" + "# A[8] * B\n\t" + "ldr r8, [%[a], #32]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #32]\n\t" + "# A[9] * B\n\t" + "ldr r8, [%[a], #36]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #36]\n\t" + "# A[10] * B\n\t" + "ldr r8, [%[a], #40]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #40]\n\t" + "# A[11] * B\n\t" + "ldr r8, [%[a], #44]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #44]\n\t" + "# A[12] * B\n\t" + "ldr r8, [%[a], #48]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #48]\n\t" + "# A[13] * B\n\t" + "ldr r8, [%[a], #52]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #52]\n\t" + "# A[14] * B\n\t" + "ldr r8, [%[a], #56]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #56]\n\t" + "# A[15] * B\n\t" + "ldr r8, [%[a], #60]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #60]\n\t" + "# A[16] * B\n\t" + "ldr r8, [%[a], #64]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #64]\n\t" + "# A[17] * B\n\t" + "ldr r8, [%[a], #68]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #68]\n\t" + "# A[18] * B\n\t" + "ldr r8, [%[a], #72]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #72]\n\t" + "# A[19] * B\n\t" + "ldr r8, [%[a], #76]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #76]\n\t" + "# A[20] * B\n\t" + "ldr r8, [%[a], #80]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #80]\n\t" + "# A[21] * B\n\t" + "ldr r8, [%[a], #84]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #84]\n\t" + "# A[22] * B\n\t" + "ldr r8, [%[a], #88]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #88]\n\t" + "# A[23] * B\n\t" + "ldr r8, [%[a], #92]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #92]\n\t" + "# A[24] * B\n\t" + "ldr r8, [%[a], #96]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #96]\n\t" + "# A[25] * B\n\t" + "ldr r8, [%[a], #100]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #100]\n\t" + "# A[26] * B\n\t" + "ldr r8, [%[a], #104]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #104]\n\t" + "# A[27] * B\n\t" + "ldr r8, [%[a], #108]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #108]\n\t" + "# A[28] * B\n\t" + "ldr r8, [%[a], #112]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #112]\n\t" + "# A[29] * B\n\t" + "ldr r8, [%[a], #116]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #116]\n\t" + "# A[30] * B\n\t" + "ldr r8, [%[a], #120]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #120]\n\t" + "# A[31] * B\n\t" + "ldr r8, [%[a], #124]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #124]\n\t" + "# A[32] * B\n\t" + "ldr r8, [%[a], #128]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #128]\n\t" + "# A[33] * B\n\t" + "ldr r8, [%[a], #132]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #132]\n\t" + "# A[34] * B\n\t" + "ldr r8, [%[a], #136]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #136]\n\t" + "# A[35] * B\n\t" + "ldr r8, [%[a], #140]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #140]\n\t" + "# A[36] * B\n\t" + "ldr r8, [%[a], #144]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #144]\n\t" + "# A[37] * B\n\t" + "ldr r8, [%[a], #148]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #148]\n\t" + "# A[38] * B\n\t" + "ldr r8, [%[a], #152]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #152]\n\t" + "# A[39] * B\n\t" + "ldr r8, [%[a], #156]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #156]\n\t" + "# A[40] * B\n\t" + "ldr r8, [%[a], #160]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #160]\n\t" + "# A[41] * B\n\t" + "ldr r8, [%[a], #164]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #164]\n\t" + "# A[42] * B\n\t" + "ldr r8, [%[a], #168]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #168]\n\t" + "# A[43] * B\n\t" + "ldr r8, [%[a], #172]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #172]\n\t" + "# A[44] * B\n\t" + "ldr r8, [%[a], #176]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #176]\n\t" + "# A[45] * B\n\t" + "ldr r8, [%[a], #180]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #180]\n\t" + "# A[46] * B\n\t" + "ldr r8, [%[a], #184]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #184]\n\t" + "# A[47] * B\n\t" + "ldr r8, [%[a], #188]\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "str r5, [%[r], #188]\n\t" + "str r3, [%[r], #192]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); +#endif +} + +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div) +{ + sp_digit r = 0; + + __asm__ __volatile__ ( + "lsr r5, %[div], #1\n\t" + "add r5, r5, #1\n\t" + "mov r6, %[d0]\n\t" + "mov r7, %[d1]\n\t" + "# Do top 32\n\t" + "subs r8, r5, r7\n\t" + "sbc r8, r8, r8\n\t" + "add %[r], %[r], %[r]\n\t" + "sub %[r], %[r], r8\n\t" + "and r8, r8, r5\n\t" + "subs r7, r7, r8\n\t" + "# Next 30 bits\n\t" + "mov r4, #29\n\t" + "1:\n\t" + "movs r6, r6, lsl #1\n\t" + "adc r7, r7, r7\n\t" + "subs r8, r5, r7\n\t" + "sbc r8, r8, r8\n\t" + "add %[r], %[r], %[r]\n\t" + "sub %[r], %[r], r8\n\t" + "and r8, r8, r5\n\t" + "subs r7, r7, r8\n\t" + "subs r4, r4, #1\n\t" + "bpl 1b\n\t" + "add %[r], %[r], %[r]\n\t" + "add %[r], %[r], #1\n\t" + "umull r4, r5, %[r], %[div]\n\t" + "subs r4, %[d0], r4\n\t" + "sbc r5, %[d1], r5\n\t" + "add %[r], %[r], r5\n\t" + "umull r4, r5, %[r], %[div]\n\t" + "subs r4, %[d0], r4\n\t" + "sbc r5, %[d1], r5\n\t" + "add %[r], %[r], r5\n\t" + "subs r8, %[div], r4\n\t" + "sbc r8, r8, r8\n\t" + "sub %[r], %[r], r8\n\t" + : [r] "+r" (r) + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) + : "r4", "r5", "r6", "r7", "r8" + ); + return r; +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static int32_t sp_3072_cmp_48(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = -1; + sp_digit one = 1; + + +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov r7, #0\n\t" + "mov r3, #-1\n\t" + "mov r6, #188\n\t" + "1:\n\t" + "ldr r4, [%[a], r6]\n\t" + "ldr r5, [%[b], r6]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "subs r6, r6, #4\n\t" + "bcs 1b\n\t" + "eor %[r], %[r], r3\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [one] "r" (one) + : "r3", "r4", "r5", "r6", "r7" + ); +#else + __asm__ __volatile__ ( + "mov r7, #0\n\t" + "mov r3, #-1\n\t" + "ldr r4, [%[a], #188]\n\t" + "ldr r5, [%[b], #188]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #184]\n\t" + "ldr r5, [%[b], #184]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #180]\n\t" + "ldr r5, [%[b], #180]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #176]\n\t" + "ldr r5, [%[b], #176]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #172]\n\t" + "ldr r5, [%[b], #172]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #168]\n\t" + "ldr r5, [%[b], #168]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #164]\n\t" + "ldr r5, [%[b], #164]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #160]\n\t" + "ldr r5, [%[b], #160]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #156]\n\t" + "ldr r5, [%[b], #156]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #152]\n\t" + "ldr r5, [%[b], #152]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #148]\n\t" + "ldr r5, [%[b], #148]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #144]\n\t" + "ldr r5, [%[b], #144]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #140]\n\t" + "ldr r5, [%[b], #140]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #136]\n\t" + "ldr r5, [%[b], #136]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #132]\n\t" + "ldr r5, [%[b], #132]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #128]\n\t" + "ldr r5, [%[b], #128]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #124]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[b], #120]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #116]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[b], #112]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #108]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[b], #104]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #100]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[b], #96]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #92]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[b], #88]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #84]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[b], #80]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #76]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[b], #72]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #68]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[b], #64]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #60]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[b], #56]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #52]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[b], #48]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "eor %[r], %[r], r3\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [one] "r" (one) + : "r3", "r4", "r5", "r6", "r7" + ); +#endif + + return r; +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[96], t2[49]; + sp_digit div, r1; + int i; + + (void)m; + + + div = d[47]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 48); + for (i=47; i>=0; i--) { + r1 = div_3072_word_48(t1[48 + i], t1[48 + i - 1], div); + + sp_3072_mul_d_48(t2, d, r1); + t1[48 + i] += sp_3072_sub_in_place_48(&t1[i], t2); + t1[48 + i] -= t2[48]; + sp_3072_mask_48(t2, d, t1[48 + i]); + t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2); + sp_3072_mask_48(t2, d, t1[48 + i]); + t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2); + } + + r1 = sp_3072_cmp_48(t1, d) >= 0; + sp_3072_cond_sub_48(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_3072_mod_48(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_3072_div_48(a, m, NULL, r); +} + +#ifdef WOLFSSL_SP_SMALL +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[16][96]; +#else + sp_digit* t[16]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 96, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<16; i++) { + t[i] = td + i * 96; + } +#endif + norm = t[0]; + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_48(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 48U); + if (reduceA != 0) { + err = sp_3072_mod_48(t[1] + 48, a, m); + if (err == MP_OKAY) { + err = sp_3072_mod_48(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48); + err = sp_3072_mod_48(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_3072_mont_sqr_48(t[ 2], t[ 1], m, mp); + sp_3072_mont_mul_48(t[ 3], t[ 2], t[ 1], m, mp); + sp_3072_mont_sqr_48(t[ 4], t[ 2], m, mp); + sp_3072_mont_mul_48(t[ 5], t[ 3], t[ 2], m, mp); + sp_3072_mont_sqr_48(t[ 6], t[ 3], m, mp); + sp_3072_mont_mul_48(t[ 7], t[ 4], t[ 3], m, mp); + sp_3072_mont_sqr_48(t[ 8], t[ 4], m, mp); + sp_3072_mont_mul_48(t[ 9], t[ 5], t[ 4], m, mp); + sp_3072_mont_sqr_48(t[10], t[ 5], m, mp); + sp_3072_mont_mul_48(t[11], t[ 6], t[ 5], m, mp); + sp_3072_mont_sqr_48(t[12], t[ 6], m, mp); + sp_3072_mont_mul_48(t[13], t[ 7], t[ 6], m, mp); + sp_3072_mont_sqr_48(t[14], t[ 7], m, mp); + sp_3072_mont_mul_48(t[15], t[ 8], t[ 7], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 4; + if (c == 32) { + c = 28; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 48); + for (; i>=0 || c>=4; ) { + if (c == 0) { + n = e[i--]; + y = n >> 28; + n <<= 4; + c = 28; + } + else if (c < 4) { + y = n >> 28; + n = e[i--]; + c = 4 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + } + + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + + sp_3072_mont_mul_48(r, r, t[y], m, mp); + } + + XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U); + sp_3072_mont_reduce_48(r, m, mp); + + mask = 0 - (sp_3072_cmp_48(r, m) >= 0); + sp_3072_cond_sub_48(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#else +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][96]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 96, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) { + t[i] = td + i * 96; + } +#endif + norm = t[0]; + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_48(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 48U); + if (reduceA != 0) { + err = sp_3072_mod_48(t[1] + 48, a, m); + if (err == MP_OKAY) { + err = sp_3072_mod_48(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48); + err = sp_3072_mod_48(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_3072_mont_sqr_48(t[ 2], t[ 1], m, mp); + sp_3072_mont_mul_48(t[ 3], t[ 2], t[ 1], m, mp); + sp_3072_mont_sqr_48(t[ 4], t[ 2], m, mp); + sp_3072_mont_mul_48(t[ 5], t[ 3], t[ 2], m, mp); + sp_3072_mont_sqr_48(t[ 6], t[ 3], m, mp); + sp_3072_mont_mul_48(t[ 7], t[ 4], t[ 3], m, mp); + sp_3072_mont_sqr_48(t[ 8], t[ 4], m, mp); + sp_3072_mont_mul_48(t[ 9], t[ 5], t[ 4], m, mp); + sp_3072_mont_sqr_48(t[10], t[ 5], m, mp); + sp_3072_mont_mul_48(t[11], t[ 6], t[ 5], m, mp); + sp_3072_mont_sqr_48(t[12], t[ 6], m, mp); + sp_3072_mont_mul_48(t[13], t[ 7], t[ 6], m, mp); + sp_3072_mont_sqr_48(t[14], t[ 7], m, mp); + sp_3072_mont_mul_48(t[15], t[ 8], t[ 7], m, mp); + sp_3072_mont_sqr_48(t[16], t[ 8], m, mp); + sp_3072_mont_mul_48(t[17], t[ 9], t[ 8], m, mp); + sp_3072_mont_sqr_48(t[18], t[ 9], m, mp); + sp_3072_mont_mul_48(t[19], t[10], t[ 9], m, mp); + sp_3072_mont_sqr_48(t[20], t[10], m, mp); + sp_3072_mont_mul_48(t[21], t[11], t[10], m, mp); + sp_3072_mont_sqr_48(t[22], t[11], m, mp); + sp_3072_mont_mul_48(t[23], t[12], t[11], m, mp); + sp_3072_mont_sqr_48(t[24], t[12], m, mp); + sp_3072_mont_mul_48(t[25], t[13], t[12], m, mp); + sp_3072_mont_sqr_48(t[26], t[13], m, mp); + sp_3072_mont_mul_48(t[27], t[14], t[13], m, mp); + sp_3072_mont_sqr_48(t[28], t[14], m, mp); + sp_3072_mont_mul_48(t[29], t[15], t[14], m, mp); + sp_3072_mont_sqr_48(t[30], t[15], m, mp); + sp_3072_mont_mul_48(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 5; + if (c == 32) { + c = 27; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 48); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 27; + n <<= 5; + c = 27; + } + else if (c < 5) { + y = n >> 27; + n = e[i--]; + c = 5 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + + sp_3072_mont_mul_48(r, r, t[y], m, mp); + } + + XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U); + sp_3072_mont_reduce_48(r, m, mp); + + mask = 0 - (sp_3072_cmp_48(r, m) >= 0); + sp_3072_cond_sub_48(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* WOLFSSL_SP_SMALL */ + +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 3072 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_3072_mont_norm_96(sp_digit* r, const sp_digit* m) +{ + XMEMSET(r, 0, sizeof(sp_digit) * 96); + + /* r = 2^n mod m */ + sp_3072_sub_in_place_96(r, m); +} + +#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */ +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + sp_digit c = 0; + +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov r9, #0\n\t" + "mov r8, #0\n\t" + "1:\n\t" + "subs %[c], r9, %[c]\n\t" + "ldr r4, [%[a], r8]\n\t" + "ldr r5, [%[b], r8]\n\t" + "and r5, r5, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbc %[c], r9, r9\n\t" + "str r4, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, #384\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + ); +#else + __asm__ __volatile__ ( + + "mov r9, #0\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r6, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "subs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #0]\n\t" + "str r6, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r7, [%[b], #12]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r6, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #16]\n\t" + "str r6, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r6, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r7, [%[b], #28]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r6, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r7, [%[b], #36]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #32]\n\t" + "str r6, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r6, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r7, [%[b], #44]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #40]\n\t" + "str r6, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r6, [%[a], #52]\n\t" + "ldr r5, [%[b], #48]\n\t" + "ldr r7, [%[b], #52]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #48]\n\t" + "str r6, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r6, [%[a], #60]\n\t" + "ldr r5, [%[b], #56]\n\t" + "ldr r7, [%[b], #60]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #56]\n\t" + "str r6, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r6, [%[a], #68]\n\t" + "ldr r5, [%[b], #64]\n\t" + "ldr r7, [%[b], #68]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #64]\n\t" + "str r6, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r6, [%[a], #76]\n\t" + "ldr r5, [%[b], #72]\n\t" + "ldr r7, [%[b], #76]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #72]\n\t" + "str r6, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r6, [%[a], #84]\n\t" + "ldr r5, [%[b], #80]\n\t" + "ldr r7, [%[b], #84]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #80]\n\t" + "str r6, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r6, [%[a], #92]\n\t" + "ldr r5, [%[b], #88]\n\t" + "ldr r7, [%[b], #92]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #88]\n\t" + "str r6, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r6, [%[a], #100]\n\t" + "ldr r5, [%[b], #96]\n\t" + "ldr r7, [%[b], #100]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #96]\n\t" + "str r6, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r6, [%[a], #108]\n\t" + "ldr r5, [%[b], #104]\n\t" + "ldr r7, [%[b], #108]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #104]\n\t" + "str r6, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r6, [%[a], #116]\n\t" + "ldr r5, [%[b], #112]\n\t" + "ldr r7, [%[b], #116]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #112]\n\t" + "str r6, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r6, [%[a], #124]\n\t" + "ldr r5, [%[b], #120]\n\t" + "ldr r7, [%[b], #124]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #120]\n\t" + "str r6, [%[r], #124]\n\t" + "ldr r4, [%[a], #128]\n\t" + "ldr r6, [%[a], #132]\n\t" + "ldr r5, [%[b], #128]\n\t" + "ldr r7, [%[b], #132]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #128]\n\t" + "str r6, [%[r], #132]\n\t" + "ldr r4, [%[a], #136]\n\t" + "ldr r6, [%[a], #140]\n\t" + "ldr r5, [%[b], #136]\n\t" + "ldr r7, [%[b], #140]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #136]\n\t" + "str r6, [%[r], #140]\n\t" + "ldr r4, [%[a], #144]\n\t" + "ldr r6, [%[a], #148]\n\t" + "ldr r5, [%[b], #144]\n\t" + "ldr r7, [%[b], #148]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #144]\n\t" + "str r6, [%[r], #148]\n\t" + "ldr r4, [%[a], #152]\n\t" + "ldr r6, [%[a], #156]\n\t" + "ldr r5, [%[b], #152]\n\t" + "ldr r7, [%[b], #156]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #152]\n\t" + "str r6, [%[r], #156]\n\t" + "ldr r4, [%[a], #160]\n\t" + "ldr r6, [%[a], #164]\n\t" + "ldr r5, [%[b], #160]\n\t" + "ldr r7, [%[b], #164]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #160]\n\t" + "str r6, [%[r], #164]\n\t" + "ldr r4, [%[a], #168]\n\t" + "ldr r6, [%[a], #172]\n\t" + "ldr r5, [%[b], #168]\n\t" + "ldr r7, [%[b], #172]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #168]\n\t" + "str r6, [%[r], #172]\n\t" + "ldr r4, [%[a], #176]\n\t" + "ldr r6, [%[a], #180]\n\t" + "ldr r5, [%[b], #176]\n\t" + "ldr r7, [%[b], #180]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #176]\n\t" + "str r6, [%[r], #180]\n\t" + "ldr r4, [%[a], #184]\n\t" + "ldr r6, [%[a], #188]\n\t" + "ldr r5, [%[b], #184]\n\t" + "ldr r7, [%[b], #188]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #184]\n\t" + "str r6, [%[r], #188]\n\t" + "ldr r4, [%[a], #192]\n\t" + "ldr r6, [%[a], #196]\n\t" + "ldr r5, [%[b], #192]\n\t" + "ldr r7, [%[b], #196]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #192]\n\t" + "str r6, [%[r], #196]\n\t" + "ldr r4, [%[a], #200]\n\t" + "ldr r6, [%[a], #204]\n\t" + "ldr r5, [%[b], #200]\n\t" + "ldr r7, [%[b], #204]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #200]\n\t" + "str r6, [%[r], #204]\n\t" + "ldr r4, [%[a], #208]\n\t" + "ldr r6, [%[a], #212]\n\t" + "ldr r5, [%[b], #208]\n\t" + "ldr r7, [%[b], #212]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #208]\n\t" + "str r6, [%[r], #212]\n\t" + "ldr r4, [%[a], #216]\n\t" + "ldr r6, [%[a], #220]\n\t" + "ldr r5, [%[b], #216]\n\t" + "ldr r7, [%[b], #220]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #216]\n\t" + "str r6, [%[r], #220]\n\t" + "ldr r4, [%[a], #224]\n\t" + "ldr r6, [%[a], #228]\n\t" + "ldr r5, [%[b], #224]\n\t" + "ldr r7, [%[b], #228]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #224]\n\t" + "str r6, [%[r], #228]\n\t" + "ldr r4, [%[a], #232]\n\t" + "ldr r6, [%[a], #236]\n\t" + "ldr r5, [%[b], #232]\n\t" + "ldr r7, [%[b], #236]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #232]\n\t" + "str r6, [%[r], #236]\n\t" + "ldr r4, [%[a], #240]\n\t" + "ldr r6, [%[a], #244]\n\t" + "ldr r5, [%[b], #240]\n\t" + "ldr r7, [%[b], #244]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #240]\n\t" + "str r6, [%[r], #244]\n\t" + "ldr r4, [%[a], #248]\n\t" + "ldr r6, [%[a], #252]\n\t" + "ldr r5, [%[b], #248]\n\t" + "ldr r7, [%[b], #252]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #248]\n\t" + "str r6, [%[r], #252]\n\t" + "ldr r4, [%[a], #256]\n\t" + "ldr r6, [%[a], #260]\n\t" + "ldr r5, [%[b], #256]\n\t" + "ldr r7, [%[b], #260]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #256]\n\t" + "str r6, [%[r], #260]\n\t" + "ldr r4, [%[a], #264]\n\t" + "ldr r6, [%[a], #268]\n\t" + "ldr r5, [%[b], #264]\n\t" + "ldr r7, [%[b], #268]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #264]\n\t" + "str r6, [%[r], #268]\n\t" + "ldr r4, [%[a], #272]\n\t" + "ldr r6, [%[a], #276]\n\t" + "ldr r5, [%[b], #272]\n\t" + "ldr r7, [%[b], #276]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #272]\n\t" + "str r6, [%[r], #276]\n\t" + "ldr r4, [%[a], #280]\n\t" + "ldr r6, [%[a], #284]\n\t" + "ldr r5, [%[b], #280]\n\t" + "ldr r7, [%[b], #284]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #280]\n\t" + "str r6, [%[r], #284]\n\t" + "ldr r4, [%[a], #288]\n\t" + "ldr r6, [%[a], #292]\n\t" + "ldr r5, [%[b], #288]\n\t" + "ldr r7, [%[b], #292]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #288]\n\t" + "str r6, [%[r], #292]\n\t" + "ldr r4, [%[a], #296]\n\t" + "ldr r6, [%[a], #300]\n\t" + "ldr r5, [%[b], #296]\n\t" + "ldr r7, [%[b], #300]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #296]\n\t" + "str r6, [%[r], #300]\n\t" + "ldr r4, [%[a], #304]\n\t" + "ldr r6, [%[a], #308]\n\t" + "ldr r5, [%[b], #304]\n\t" + "ldr r7, [%[b], #308]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #304]\n\t" + "str r6, [%[r], #308]\n\t" + "ldr r4, [%[a], #312]\n\t" + "ldr r6, [%[a], #316]\n\t" + "ldr r5, [%[b], #312]\n\t" + "ldr r7, [%[b], #316]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #312]\n\t" + "str r6, [%[r], #316]\n\t" + "ldr r4, [%[a], #320]\n\t" + "ldr r6, [%[a], #324]\n\t" + "ldr r5, [%[b], #320]\n\t" + "ldr r7, [%[b], #324]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #320]\n\t" + "str r6, [%[r], #324]\n\t" + "ldr r4, [%[a], #328]\n\t" + "ldr r6, [%[a], #332]\n\t" + "ldr r5, [%[b], #328]\n\t" + "ldr r7, [%[b], #332]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #328]\n\t" + "str r6, [%[r], #332]\n\t" + "ldr r4, [%[a], #336]\n\t" + "ldr r6, [%[a], #340]\n\t" + "ldr r5, [%[b], #336]\n\t" + "ldr r7, [%[b], #340]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #336]\n\t" + "str r6, [%[r], #340]\n\t" + "ldr r4, [%[a], #344]\n\t" + "ldr r6, [%[a], #348]\n\t" + "ldr r5, [%[b], #344]\n\t" + "ldr r7, [%[b], #348]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #344]\n\t" + "str r6, [%[r], #348]\n\t" + "ldr r4, [%[a], #352]\n\t" + "ldr r6, [%[a], #356]\n\t" + "ldr r5, [%[b], #352]\n\t" + "ldr r7, [%[b], #356]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #352]\n\t" + "str r6, [%[r], #356]\n\t" + "ldr r4, [%[a], #360]\n\t" + "ldr r6, [%[a], #364]\n\t" + "ldr r5, [%[b], #360]\n\t" + "ldr r7, [%[b], #364]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #360]\n\t" + "str r6, [%[r], #364]\n\t" + "ldr r4, [%[a], #368]\n\t" + "ldr r6, [%[a], #372]\n\t" + "ldr r5, [%[b], #368]\n\t" + "ldr r7, [%[b], #372]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #368]\n\t" + "str r6, [%[r], #372]\n\t" + "ldr r4, [%[a], #376]\n\t" + "ldr r6, [%[a], #380]\n\t" + "ldr r5, [%[b], #376]\n\t" + "ldr r7, [%[b], #380]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #376]\n\t" + "str r6, [%[r], #380]\n\t" + "sbc %[c], r9, r9\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + ); +#endif /* WOLFSSL_SP_SMALL */ + + return c; +} + +/* Reduce the number back to 3072 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_digit ca = 0; + + __asm__ __volatile__ ( + "# i = 0\n\t" + "mov r12, #0\n\t" + "ldr r10, [%[a], #0]\n\t" + "ldr r14, [%[a], #4]\n\t" + "\n1:\n\t" + "# mu = a[i] * mp\n\t" + "mul r8, %[mp], r10\n\t" + "# a[i+0] += m[0] * mu\n\t" + "ldr r7, [%[m], #0]\n\t" + "ldr r9, [%[a], #0]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" + "# a[i+1] += m[1] * mu\n\t" + "ldr r7, [%[m], #4]\n\t" + "ldr r9, [%[a], #4]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r10, r14, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r10, r10, r5\n\t" + "adc r4, r4, #0\n\t" + "# a[i+2] += m[2] * mu\n\t" + "ldr r7, [%[m], #8]\n\t" + "ldr r14, [%[a], #8]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r14, r14, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r14, r14, r4\n\t" + "adc r5, r5, #0\n\t" + "# a[i+3] += m[3] * mu\n\t" + "ldr r7, [%[m], #12]\n\t" + "ldr r9, [%[a], #12]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #12]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+4] += m[4] * mu\n\t" + "ldr r7, [%[m], #16]\n\t" + "ldr r9, [%[a], #16]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+5] += m[5] * mu\n\t" + "ldr r7, [%[m], #20]\n\t" + "ldr r9, [%[a], #20]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #20]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+6] += m[6] * mu\n\t" + "ldr r7, [%[m], #24]\n\t" + "ldr r9, [%[a], #24]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+7] += m[7] * mu\n\t" + "ldr r7, [%[m], #28]\n\t" + "ldr r9, [%[a], #28]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #28]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+8] += m[8] * mu\n\t" + "ldr r7, [%[m], #32]\n\t" + "ldr r9, [%[a], #32]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #32]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+9] += m[9] * mu\n\t" + "ldr r7, [%[m], #36]\n\t" + "ldr r9, [%[a], #36]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #36]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+10] += m[10] * mu\n\t" + "ldr r7, [%[m], #40]\n\t" + "ldr r9, [%[a], #40]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #40]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+11] += m[11] * mu\n\t" + "ldr r7, [%[m], #44]\n\t" + "ldr r9, [%[a], #44]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #44]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+12] += m[12] * mu\n\t" + "ldr r7, [%[m], #48]\n\t" + "ldr r9, [%[a], #48]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #48]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+13] += m[13] * mu\n\t" + "ldr r7, [%[m], #52]\n\t" + "ldr r9, [%[a], #52]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #52]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+14] += m[14] * mu\n\t" + "ldr r7, [%[m], #56]\n\t" + "ldr r9, [%[a], #56]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #56]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+15] += m[15] * mu\n\t" + "ldr r7, [%[m], #60]\n\t" + "ldr r9, [%[a], #60]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #60]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+16] += m[16] * mu\n\t" + "ldr r7, [%[m], #64]\n\t" + "ldr r9, [%[a], #64]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #64]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+17] += m[17] * mu\n\t" + "ldr r7, [%[m], #68]\n\t" + "ldr r9, [%[a], #68]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #68]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+18] += m[18] * mu\n\t" + "ldr r7, [%[m], #72]\n\t" + "ldr r9, [%[a], #72]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #72]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+19] += m[19] * mu\n\t" + "ldr r7, [%[m], #76]\n\t" + "ldr r9, [%[a], #76]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #76]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+20] += m[20] * mu\n\t" + "ldr r7, [%[m], #80]\n\t" + "ldr r9, [%[a], #80]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #80]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+21] += m[21] * mu\n\t" + "ldr r7, [%[m], #84]\n\t" + "ldr r9, [%[a], #84]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #84]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+22] += m[22] * mu\n\t" + "ldr r7, [%[m], #88]\n\t" + "ldr r9, [%[a], #88]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #88]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+23] += m[23] * mu\n\t" + "ldr r7, [%[m], #92]\n\t" + "ldr r9, [%[a], #92]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #92]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+24] += m[24] * mu\n\t" + "ldr r7, [%[m], #96]\n\t" + "ldr r9, [%[a], #96]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #96]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+25] += m[25] * mu\n\t" + "ldr r7, [%[m], #100]\n\t" + "ldr r9, [%[a], #100]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #100]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+26] += m[26] * mu\n\t" + "ldr r7, [%[m], #104]\n\t" + "ldr r9, [%[a], #104]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #104]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+27] += m[27] * mu\n\t" + "ldr r7, [%[m], #108]\n\t" + "ldr r9, [%[a], #108]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #108]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+28] += m[28] * mu\n\t" + "ldr r7, [%[m], #112]\n\t" + "ldr r9, [%[a], #112]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #112]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+29] += m[29] * mu\n\t" + "ldr r7, [%[m], #116]\n\t" + "ldr r9, [%[a], #116]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #116]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+30] += m[30] * mu\n\t" + "ldr r7, [%[m], #120]\n\t" + "ldr r9, [%[a], #120]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #120]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+31] += m[31] * mu\n\t" + "ldr r7, [%[m], #124]\n\t" + "ldr r9, [%[a], #124]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #124]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+32] += m[32] * mu\n\t" + "ldr r7, [%[m], #128]\n\t" + "ldr r9, [%[a], #128]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #128]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+33] += m[33] * mu\n\t" + "ldr r7, [%[m], #132]\n\t" + "ldr r9, [%[a], #132]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #132]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+34] += m[34] * mu\n\t" + "ldr r7, [%[m], #136]\n\t" + "ldr r9, [%[a], #136]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #136]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+35] += m[35] * mu\n\t" + "ldr r7, [%[m], #140]\n\t" + "ldr r9, [%[a], #140]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #140]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+36] += m[36] * mu\n\t" + "ldr r7, [%[m], #144]\n\t" + "ldr r9, [%[a], #144]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #144]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+37] += m[37] * mu\n\t" + "ldr r7, [%[m], #148]\n\t" + "ldr r9, [%[a], #148]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #148]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+38] += m[38] * mu\n\t" + "ldr r7, [%[m], #152]\n\t" + "ldr r9, [%[a], #152]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #152]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+39] += m[39] * mu\n\t" + "ldr r7, [%[m], #156]\n\t" + "ldr r9, [%[a], #156]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #156]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+40] += m[40] * mu\n\t" + "ldr r7, [%[m], #160]\n\t" + "ldr r9, [%[a], #160]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #160]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+41] += m[41] * mu\n\t" + "ldr r7, [%[m], #164]\n\t" + "ldr r9, [%[a], #164]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #164]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+42] += m[42] * mu\n\t" + "ldr r7, [%[m], #168]\n\t" + "ldr r9, [%[a], #168]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #168]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+43] += m[43] * mu\n\t" + "ldr r7, [%[m], #172]\n\t" + "ldr r9, [%[a], #172]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #172]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+44] += m[44] * mu\n\t" + "ldr r7, [%[m], #176]\n\t" + "ldr r9, [%[a], #176]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #176]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+45] += m[45] * mu\n\t" + "ldr r7, [%[m], #180]\n\t" + "ldr r9, [%[a], #180]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #180]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+46] += m[46] * mu\n\t" + "ldr r7, [%[m], #184]\n\t" + "ldr r9, [%[a], #184]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #184]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+47] += m[47] * mu\n\t" + "ldr r7, [%[m], #188]\n\t" + "ldr r9, [%[a], #188]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #188]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+48] += m[48] * mu\n\t" + "ldr r7, [%[m], #192]\n\t" + "ldr r9, [%[a], #192]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #192]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+49] += m[49] * mu\n\t" + "ldr r7, [%[m], #196]\n\t" + "ldr r9, [%[a], #196]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #196]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+50] += m[50] * mu\n\t" + "ldr r7, [%[m], #200]\n\t" + "ldr r9, [%[a], #200]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #200]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+51] += m[51] * mu\n\t" + "ldr r7, [%[m], #204]\n\t" + "ldr r9, [%[a], #204]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #204]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+52] += m[52] * mu\n\t" + "ldr r7, [%[m], #208]\n\t" + "ldr r9, [%[a], #208]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #208]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+53] += m[53] * mu\n\t" + "ldr r7, [%[m], #212]\n\t" + "ldr r9, [%[a], #212]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #212]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+54] += m[54] * mu\n\t" + "ldr r7, [%[m], #216]\n\t" + "ldr r9, [%[a], #216]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #216]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+55] += m[55] * mu\n\t" + "ldr r7, [%[m], #220]\n\t" + "ldr r9, [%[a], #220]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #220]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+56] += m[56] * mu\n\t" + "ldr r7, [%[m], #224]\n\t" + "ldr r9, [%[a], #224]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #224]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+57] += m[57] * mu\n\t" + "ldr r7, [%[m], #228]\n\t" + "ldr r9, [%[a], #228]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #228]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+58] += m[58] * mu\n\t" + "ldr r7, [%[m], #232]\n\t" + "ldr r9, [%[a], #232]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #232]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+59] += m[59] * mu\n\t" + "ldr r7, [%[m], #236]\n\t" + "ldr r9, [%[a], #236]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #236]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+60] += m[60] * mu\n\t" + "ldr r7, [%[m], #240]\n\t" + "ldr r9, [%[a], #240]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #240]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+61] += m[61] * mu\n\t" + "ldr r7, [%[m], #244]\n\t" + "ldr r9, [%[a], #244]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #244]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+62] += m[62] * mu\n\t" + "ldr r7, [%[m], #248]\n\t" + "ldr r9, [%[a], #248]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #248]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+63] += m[63] * mu\n\t" + "ldr r7, [%[m], #252]\n\t" + "ldr r9, [%[a], #252]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #252]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+64] += m[64] * mu\n\t" + "ldr r7, [%[m], #256]\n\t" + "ldr r9, [%[a], #256]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #256]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+65] += m[65] * mu\n\t" + "ldr r7, [%[m], #260]\n\t" + "ldr r9, [%[a], #260]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #260]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+66] += m[66] * mu\n\t" + "ldr r7, [%[m], #264]\n\t" + "ldr r9, [%[a], #264]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #264]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+67] += m[67] * mu\n\t" + "ldr r7, [%[m], #268]\n\t" + "ldr r9, [%[a], #268]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #268]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+68] += m[68] * mu\n\t" + "ldr r7, [%[m], #272]\n\t" + "ldr r9, [%[a], #272]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #272]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+69] += m[69] * mu\n\t" + "ldr r7, [%[m], #276]\n\t" + "ldr r9, [%[a], #276]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #276]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+70] += m[70] * mu\n\t" + "ldr r7, [%[m], #280]\n\t" + "ldr r9, [%[a], #280]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #280]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+71] += m[71] * mu\n\t" + "ldr r7, [%[m], #284]\n\t" + "ldr r9, [%[a], #284]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #284]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+72] += m[72] * mu\n\t" + "ldr r7, [%[m], #288]\n\t" + "ldr r9, [%[a], #288]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #288]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+73] += m[73] * mu\n\t" + "ldr r7, [%[m], #292]\n\t" + "ldr r9, [%[a], #292]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #292]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+74] += m[74] * mu\n\t" + "ldr r7, [%[m], #296]\n\t" + "ldr r9, [%[a], #296]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #296]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+75] += m[75] * mu\n\t" + "ldr r7, [%[m], #300]\n\t" + "ldr r9, [%[a], #300]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #300]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+76] += m[76] * mu\n\t" + "ldr r7, [%[m], #304]\n\t" + "ldr r9, [%[a], #304]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #304]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+77] += m[77] * mu\n\t" + "ldr r7, [%[m], #308]\n\t" + "ldr r9, [%[a], #308]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #308]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+78] += m[78] * mu\n\t" + "ldr r7, [%[m], #312]\n\t" + "ldr r9, [%[a], #312]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #312]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+79] += m[79] * mu\n\t" + "ldr r7, [%[m], #316]\n\t" + "ldr r9, [%[a], #316]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #316]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+80] += m[80] * mu\n\t" + "ldr r7, [%[m], #320]\n\t" + "ldr r9, [%[a], #320]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #320]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+81] += m[81] * mu\n\t" + "ldr r7, [%[m], #324]\n\t" + "ldr r9, [%[a], #324]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #324]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+82] += m[82] * mu\n\t" + "ldr r7, [%[m], #328]\n\t" + "ldr r9, [%[a], #328]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #328]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+83] += m[83] * mu\n\t" + "ldr r7, [%[m], #332]\n\t" + "ldr r9, [%[a], #332]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #332]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+84] += m[84] * mu\n\t" + "ldr r7, [%[m], #336]\n\t" + "ldr r9, [%[a], #336]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #336]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+85] += m[85] * mu\n\t" + "ldr r7, [%[m], #340]\n\t" + "ldr r9, [%[a], #340]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #340]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+86] += m[86] * mu\n\t" + "ldr r7, [%[m], #344]\n\t" + "ldr r9, [%[a], #344]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #344]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+87] += m[87] * mu\n\t" + "ldr r7, [%[m], #348]\n\t" + "ldr r9, [%[a], #348]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #348]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+88] += m[88] * mu\n\t" + "ldr r7, [%[m], #352]\n\t" + "ldr r9, [%[a], #352]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #352]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+89] += m[89] * mu\n\t" + "ldr r7, [%[m], #356]\n\t" + "ldr r9, [%[a], #356]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #356]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+90] += m[90] * mu\n\t" + "ldr r7, [%[m], #360]\n\t" + "ldr r9, [%[a], #360]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #360]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+91] += m[91] * mu\n\t" + "ldr r7, [%[m], #364]\n\t" + "ldr r9, [%[a], #364]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #364]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+92] += m[92] * mu\n\t" + "ldr r7, [%[m], #368]\n\t" + "ldr r9, [%[a], #368]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #368]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+93] += m[93] * mu\n\t" + "ldr r7, [%[m], #372]\n\t" + "ldr r9, [%[a], #372]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #372]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+94] += m[94] * mu\n\t" + "ldr r7, [%[m], #376]\n\t" + "ldr r9, [%[a], #376]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #376]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+95] += m[95] * mu\n\t" + "ldr r7, [%[m], #380]\n\t" + "ldr r9, [%[a], #380]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r5, r5, r6\n\t" + "adcs r7, r7, %[ca]\n\t" + "mov %[ca], #0\n\t" + "adc %[ca], %[ca], %[ca]\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #380]\n\t" + "ldr r9, [%[a], #384]\n\t" + "adcs r9, r9, r7\n\t" + "str r9, [%[a], #384]\n\t" + "adc %[ca], %[ca], #0\n\t" + "# i += 1\n\t" + "add %[a], %[a], #4\n\t" + "add r12, r12, #4\n\t" + "cmp r12, #384\n\t" + "blt 1b\n\t" + "str r10, [%[a], #0]\n\t" + "str r14, [%[a], #4]\n\t" + : [ca] "+r" (ca), [a] "+r" (a) + : [m] "r" (m), [mp] "r" (mp) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); + + sp_3072_cond_sub_96(a - 96, a, m, (sp_digit)0 - ca); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_3072_mont_mul_96(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_3072_mul_96(r, a, b); + sp_3072_mont_reduce_96(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_3072_mont_sqr_96(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_3072_sqr_96(r, a); + sp_3072_mont_reduce_96(r, m, mp); +} + +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, sp_digit div) +{ + sp_digit r = 0; + + __asm__ __volatile__ ( + "lsr r5, %[div], #1\n\t" + "add r5, r5, #1\n\t" + "mov r6, %[d0]\n\t" + "mov r7, %[d1]\n\t" + "# Do top 32\n\t" + "subs r8, r5, r7\n\t" + "sbc r8, r8, r8\n\t" + "add %[r], %[r], %[r]\n\t" + "sub %[r], %[r], r8\n\t" + "and r8, r8, r5\n\t" + "subs r7, r7, r8\n\t" + "# Next 30 bits\n\t" + "mov r4, #29\n\t" + "1:\n\t" + "movs r6, r6, lsl #1\n\t" + "adc r7, r7, r7\n\t" + "subs r8, r5, r7\n\t" + "sbc r8, r8, r8\n\t" + "add %[r], %[r], %[r]\n\t" + "sub %[r], %[r], r8\n\t" + "and r8, r8, r5\n\t" + "subs r7, r7, r8\n\t" + "subs r4, r4, #1\n\t" + "bpl 1b\n\t" + "add %[r], %[r], %[r]\n\t" + "add %[r], %[r], #1\n\t" + "umull r4, r5, %[r], %[div]\n\t" + "subs r4, %[d0], r4\n\t" + "sbc r5, %[d1], r5\n\t" + "add %[r], %[r], r5\n\t" + "umull r4, r5, %[r], %[div]\n\t" + "subs r4, %[d0], r4\n\t" + "sbc r5, %[d1], r5\n\t" + "add %[r], %[r], r5\n\t" + "subs r8, %[div], r4\n\t" + "sbc r8, r8, r8\n\t" + "sub %[r], %[r], r8\n\t" + : [r] "+r" (r) + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) + : "r4", "r5", "r6", "r7", "r8" + ); + return r; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_3072_mask_96(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<96; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 96; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static int32_t sp_3072_cmp_96(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = -1; + sp_digit one = 1; + + +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov r7, #0\n\t" + "mov r3, #-1\n\t" + "mov r6, #380\n\t" + "1:\n\t" + "ldr r4, [%[a], r6]\n\t" + "ldr r5, [%[b], r6]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "subs r6, r6, #4\n\t" + "bcs 1b\n\t" + "eor %[r], %[r], r3\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [one] "r" (one) + : "r3", "r4", "r5", "r6", "r7" + ); +#else + __asm__ __volatile__ ( + "mov r7, #0\n\t" + "mov r3, #-1\n\t" + "ldr r4, [%[a], #380]\n\t" + "ldr r5, [%[b], #380]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #376]\n\t" + "ldr r5, [%[b], #376]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #372]\n\t" + "ldr r5, [%[b], #372]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #368]\n\t" + "ldr r5, [%[b], #368]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #364]\n\t" + "ldr r5, [%[b], #364]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #360]\n\t" + "ldr r5, [%[b], #360]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #356]\n\t" + "ldr r5, [%[b], #356]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #352]\n\t" + "ldr r5, [%[b], #352]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #348]\n\t" + "ldr r5, [%[b], #348]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #344]\n\t" + "ldr r5, [%[b], #344]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #340]\n\t" + "ldr r5, [%[b], #340]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #336]\n\t" + "ldr r5, [%[b], #336]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #332]\n\t" + "ldr r5, [%[b], #332]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #328]\n\t" + "ldr r5, [%[b], #328]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #324]\n\t" + "ldr r5, [%[b], #324]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #320]\n\t" + "ldr r5, [%[b], #320]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #316]\n\t" + "ldr r5, [%[b], #316]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #312]\n\t" + "ldr r5, [%[b], #312]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #308]\n\t" + "ldr r5, [%[b], #308]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #304]\n\t" + "ldr r5, [%[b], #304]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #300]\n\t" + "ldr r5, [%[b], #300]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #296]\n\t" + "ldr r5, [%[b], #296]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #292]\n\t" + "ldr r5, [%[b], #292]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #288]\n\t" + "ldr r5, [%[b], #288]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #284]\n\t" + "ldr r5, [%[b], #284]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #280]\n\t" + "ldr r5, [%[b], #280]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #276]\n\t" + "ldr r5, [%[b], #276]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #272]\n\t" + "ldr r5, [%[b], #272]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #268]\n\t" + "ldr r5, [%[b], #268]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #264]\n\t" + "ldr r5, [%[b], #264]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #260]\n\t" + "ldr r5, [%[b], #260]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #256]\n\t" + "ldr r5, [%[b], #256]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #252]\n\t" + "ldr r5, [%[b], #252]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #248]\n\t" + "ldr r5, [%[b], #248]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #244]\n\t" + "ldr r5, [%[b], #244]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #240]\n\t" + "ldr r5, [%[b], #240]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #236]\n\t" + "ldr r5, [%[b], #236]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #232]\n\t" + "ldr r5, [%[b], #232]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #228]\n\t" + "ldr r5, [%[b], #228]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #224]\n\t" + "ldr r5, [%[b], #224]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #220]\n\t" + "ldr r5, [%[b], #220]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #216]\n\t" + "ldr r5, [%[b], #216]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #212]\n\t" + "ldr r5, [%[b], #212]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #208]\n\t" + "ldr r5, [%[b], #208]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #204]\n\t" + "ldr r5, [%[b], #204]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #200]\n\t" + "ldr r5, [%[b], #200]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #196]\n\t" + "ldr r5, [%[b], #196]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #192]\n\t" + "ldr r5, [%[b], #192]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #188]\n\t" + "ldr r5, [%[b], #188]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #184]\n\t" + "ldr r5, [%[b], #184]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #180]\n\t" + "ldr r5, [%[b], #180]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #176]\n\t" + "ldr r5, [%[b], #176]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #172]\n\t" + "ldr r5, [%[b], #172]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #168]\n\t" + "ldr r5, [%[b], #168]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #164]\n\t" + "ldr r5, [%[b], #164]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #160]\n\t" + "ldr r5, [%[b], #160]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #156]\n\t" + "ldr r5, [%[b], #156]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #152]\n\t" + "ldr r5, [%[b], #152]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #148]\n\t" + "ldr r5, [%[b], #148]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #144]\n\t" + "ldr r5, [%[b], #144]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #140]\n\t" + "ldr r5, [%[b], #140]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #136]\n\t" + "ldr r5, [%[b], #136]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #132]\n\t" + "ldr r5, [%[b], #132]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #128]\n\t" + "ldr r5, [%[b], #128]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #124]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[b], #120]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #116]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[b], #112]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #108]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[b], #104]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #100]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[b], #96]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #92]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[b], #88]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #84]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[b], #80]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #76]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[b], #72]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #68]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[b], #64]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #60]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[b], #56]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #52]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[b], #48]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "eor %[r], %[r], r3\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [one] "r" (one) + : "r3", "r4", "r5", "r6", "r7" + ); +#endif + + return r; +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_3072_div_96(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[192], t2[97]; + sp_digit div, r1; + int i; + + (void)m; + + + div = d[95]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 96); + for (i=95; i>=0; i--) { + r1 = div_3072_word_96(t1[96 + i], t1[96 + i - 1], div); + + sp_3072_mul_d_96(t2, d, r1); + t1[96 + i] += sp_3072_sub_in_place_96(&t1[i], t2); + t1[96 + i] -= t2[96]; + sp_3072_mask_96(t2, d, t1[96 + i]); + t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], t2); + sp_3072_mask_96(t2, d, t1[96 + i]); + t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], t2); + } + + r1 = sp_3072_cmp_96(t1, d) >= 0; + sp_3072_cond_sub_96(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_3072_mod_96(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_3072_div_96(a, m, NULL, r); +} + +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_3072_div_96_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[192], t2[97]; + sp_digit div, r1; + int i; + + (void)m; + + + div = d[95]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 96); + for (i=95; i>=0; i--) { + r1 = div_3072_word_96(t1[96 + i], t1[96 + i - 1], div); + + sp_3072_mul_d_96(t2, d, r1); + t1[96 + i] += sp_3072_sub_in_place_96(&t1[i], t2); + t1[96 + i] -= t2[96]; + if (t1[96 + i] != 0) { + t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], d); + if (t1[96 + i] != 0) + t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], d); + } + } + + r1 = sp_3072_cmp_96(t1, d) >= 0; + sp_3072_cond_sub_96(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_3072_mod_96_cond(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_3072_div_96_cond(a, m, NULL, r); +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \ + defined(WOLFSSL_HAVE_SP_DH) +#ifdef WOLFSSL_SP_SMALL +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[16][192]; +#else + sp_digit* t[16]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 192, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<16; i++) { + t[i] = td + i * 192; + } +#endif + norm = t[0]; + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_96(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 96U); + if (reduceA != 0) { + err = sp_3072_mod_96(t[1] + 96, a, m); + if (err == MP_OKAY) { + err = sp_3072_mod_96(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 96, a, sizeof(sp_digit) * 96); + err = sp_3072_mod_96(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_3072_mont_sqr_96(t[ 2], t[ 1], m, mp); + sp_3072_mont_mul_96(t[ 3], t[ 2], t[ 1], m, mp); + sp_3072_mont_sqr_96(t[ 4], t[ 2], m, mp); + sp_3072_mont_mul_96(t[ 5], t[ 3], t[ 2], m, mp); + sp_3072_mont_sqr_96(t[ 6], t[ 3], m, mp); + sp_3072_mont_mul_96(t[ 7], t[ 4], t[ 3], m, mp); + sp_3072_mont_sqr_96(t[ 8], t[ 4], m, mp); + sp_3072_mont_mul_96(t[ 9], t[ 5], t[ 4], m, mp); + sp_3072_mont_sqr_96(t[10], t[ 5], m, mp); + sp_3072_mont_mul_96(t[11], t[ 6], t[ 5], m, mp); + sp_3072_mont_sqr_96(t[12], t[ 6], m, mp); + sp_3072_mont_mul_96(t[13], t[ 7], t[ 6], m, mp); + sp_3072_mont_sqr_96(t[14], t[ 7], m, mp); + sp_3072_mont_mul_96(t[15], t[ 8], t[ 7], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 4; + if (c == 32) { + c = 28; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 96); + for (; i>=0 || c>=4; ) { + if (c == 0) { + n = e[i--]; + y = n >> 28; + n <<= 4; + c = 28; + } + else if (c < 4) { + y = n >> 28; + n = e[i--]; + c = 4 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + } + + sp_3072_mont_sqr_96(r, r, m, mp); + sp_3072_mont_sqr_96(r, r, m, mp); + sp_3072_mont_sqr_96(r, r, m, mp); + sp_3072_mont_sqr_96(r, r, m, mp); + + sp_3072_mont_mul_96(r, r, t[y], m, mp); + } + + XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U); + sp_3072_mont_reduce_96(r, m, mp); + + mask = 0 - (sp_3072_cmp_96(r, m) >= 0); + sp_3072_cond_sub_96(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#else +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][192]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 192, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) { + t[i] = td + i * 192; + } +#endif + norm = t[0]; + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_96(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 96U); + if (reduceA != 0) { + err = sp_3072_mod_96(t[1] + 96, a, m); + if (err == MP_OKAY) { + err = sp_3072_mod_96(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 96, a, sizeof(sp_digit) * 96); + err = sp_3072_mod_96(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_3072_mont_sqr_96(t[ 2], t[ 1], m, mp); + sp_3072_mont_mul_96(t[ 3], t[ 2], t[ 1], m, mp); + sp_3072_mont_sqr_96(t[ 4], t[ 2], m, mp); + sp_3072_mont_mul_96(t[ 5], t[ 3], t[ 2], m, mp); + sp_3072_mont_sqr_96(t[ 6], t[ 3], m, mp); + sp_3072_mont_mul_96(t[ 7], t[ 4], t[ 3], m, mp); + sp_3072_mont_sqr_96(t[ 8], t[ 4], m, mp); + sp_3072_mont_mul_96(t[ 9], t[ 5], t[ 4], m, mp); + sp_3072_mont_sqr_96(t[10], t[ 5], m, mp); + sp_3072_mont_mul_96(t[11], t[ 6], t[ 5], m, mp); + sp_3072_mont_sqr_96(t[12], t[ 6], m, mp); + sp_3072_mont_mul_96(t[13], t[ 7], t[ 6], m, mp); + sp_3072_mont_sqr_96(t[14], t[ 7], m, mp); + sp_3072_mont_mul_96(t[15], t[ 8], t[ 7], m, mp); + sp_3072_mont_sqr_96(t[16], t[ 8], m, mp); + sp_3072_mont_mul_96(t[17], t[ 9], t[ 8], m, mp); + sp_3072_mont_sqr_96(t[18], t[ 9], m, mp); + sp_3072_mont_mul_96(t[19], t[10], t[ 9], m, mp); + sp_3072_mont_sqr_96(t[20], t[10], m, mp); + sp_3072_mont_mul_96(t[21], t[11], t[10], m, mp); + sp_3072_mont_sqr_96(t[22], t[11], m, mp); + sp_3072_mont_mul_96(t[23], t[12], t[11], m, mp); + sp_3072_mont_sqr_96(t[24], t[12], m, mp); + sp_3072_mont_mul_96(t[25], t[13], t[12], m, mp); + sp_3072_mont_sqr_96(t[26], t[13], m, mp); + sp_3072_mont_mul_96(t[27], t[14], t[13], m, mp); + sp_3072_mont_sqr_96(t[28], t[14], m, mp); + sp_3072_mont_mul_96(t[29], t[15], t[14], m, mp); + sp_3072_mont_sqr_96(t[30], t[15], m, mp); + sp_3072_mont_mul_96(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 5; + if (c == 32) { + c = 27; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 96); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 27; + n <<= 5; + c = 27; + } + else if (c < 5) { + y = n >> 27; + n = e[i--]; + c = 5 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_3072_mont_sqr_96(r, r, m, mp); + sp_3072_mont_sqr_96(r, r, m, mp); + sp_3072_mont_sqr_96(r, r, m, mp); + sp_3072_mont_sqr_96(r, r, m, mp); + sp_3072_mont_sqr_96(r, r, m, mp); + + sp_3072_mont_mul_96(r, r, t[y], m, mp); + } + + XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U); + sp_3072_mont_reduce_96(r, m, mp); + + mask = 0 - (sp_3072_cmp_96(r, m) >= 0); + sp_3072_cond_sub_96(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* WOLFSSL_SP_SMALL */ +#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */ + +#ifdef WOLFSSL_HAVE_SP_RSA +/* RSA public key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * em Public exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 384 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm, + byte* out, word32* outLen) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit a[192], m[96], r[192]; +#else + sp_digit* d = NULL; + sp_digit* a; + sp_digit* m; + sp_digit* r; +#endif + sp_digit *ah; + sp_digit e[1]; + int err = MP_OKAY; + + if (*outLen < 384) + err = MP_TO_E; + if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 384 || + mp_count_bits(mm) != 3072)) + err = MP_READ_E; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 96 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + a = d; + r = a + 96 * 2; + m = r + 96 * 2; + } +#endif + + if (err == MP_OKAY) { + ah = a + 96; + + sp_3072_from_bin(ah, 96, in, inLen); +#if DIGIT_BIT >= 32 + e[0] = em->dp[0]; +#else + e[0] = em->dp[0]; + if (em->used > 1) { + e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + } +#endif + if (e[0] == 0) { + err = MP_EXPTMOD_E; + } + } + if (err == MP_OKAY) { + sp_3072_from_mp(m, 96, mm); + + if (e[0] == 0x3) { + if (err == MP_OKAY) { + sp_3072_sqr_96(r, ah); + err = sp_3072_mod_96_cond(r, r, m); + } + if (err == MP_OKAY) { + sp_3072_mul_96(r, ah, r); + err = sp_3072_mod_96_cond(r, r, m); + } + } + else { + int i; + sp_digit mp; + + sp_3072_mont_setup(m, &mp); + + /* Convert to Montgomery form. */ + XMEMSET(a, 0, sizeof(sp_digit) * 96); + err = sp_3072_mod_96_cond(a, a, m); + + if (err == MP_OKAY) { + for (i = 31; i >= 0; i--) { + if (e[0] >> i) { + break; + } + } + + XMEMCPY(r, a, sizeof(sp_digit) * 96); + for (i--; i>=0; i--) { + sp_3072_mont_sqr_96(r, r, m, mp); + if (((e[0] >> i) & 1) == 1) { + sp_3072_mont_mul_96(r, r, a, m, mp); + } + } + XMEMSET(&r[96], 0, sizeof(sp_digit) * 96); + sp_3072_mont_reduce_96(r, m, mp); + + for (i = 95; i > 0; i--) { + if (r[i] != m[i]) { + break; + } + } + if (r[i] >= m[i]) { + sp_3072_sub_in_place_96(r, m); + } + } + } + } + + if (err == MP_OKAY) { + sp_3072_to_bin(r, out); + *outLen = 384; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } +#endif + + return err; +} + +#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) + sp_digit* a; + sp_digit* d = NULL; + sp_digit* m; + sp_digit* r; + int err = MP_OKAY; + + (void)pm; + (void)qm; + (void)dpm; + (void)dqm; + (void)qim; + + if (*outLen < 384U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(dm) > 3072) { + err = MP_READ_E; + } + if (inLen > 384) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 3072) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 96 * 4, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) { + err = MEMORY_E; + } + } + if (err == MP_OKAY) { + a = d + 96; + m = a + 192; + r = a; + + sp_3072_from_bin(a, 96, in, inLen); + sp_3072_from_mp(d, 96, dm); + sp_3072_from_mp(m, 96, mm); + err = sp_3072_mod_exp_96(r, a, d, 3072, m, 0); + } + if (err == MP_OKAY) { + sp_3072_to_bin(r, out); + *outLen = 384; + } + + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 96); + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + sp_digit c = 0; + +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov r9, #0\n\t" + "mov r8, #0\n\t" + "1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldr r4, [%[a], r8]\n\t" + "ldr r5, [%[b], r8]\n\t" + "and r5, r5, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adc %[c], r9, r9\n\t" + "str r4, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, #192\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + ); +#else + __asm__ __volatile__ ( + + "mov r9, #0\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r6, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adds r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #0]\n\t" + "str r6, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r7, [%[b], #12]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r6, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #16]\n\t" + "str r6, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r6, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r7, [%[b], #28]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r6, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r7, [%[b], #36]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #32]\n\t" + "str r6, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r6, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r7, [%[b], #44]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #40]\n\t" + "str r6, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r6, [%[a], #52]\n\t" + "ldr r5, [%[b], #48]\n\t" + "ldr r7, [%[b], #52]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #48]\n\t" + "str r6, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r6, [%[a], #60]\n\t" + "ldr r5, [%[b], #56]\n\t" + "ldr r7, [%[b], #60]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #56]\n\t" + "str r6, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r6, [%[a], #68]\n\t" + "ldr r5, [%[b], #64]\n\t" + "ldr r7, [%[b], #68]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #64]\n\t" + "str r6, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r6, [%[a], #76]\n\t" + "ldr r5, [%[b], #72]\n\t" + "ldr r7, [%[b], #76]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #72]\n\t" + "str r6, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r6, [%[a], #84]\n\t" + "ldr r5, [%[b], #80]\n\t" + "ldr r7, [%[b], #84]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #80]\n\t" + "str r6, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r6, [%[a], #92]\n\t" + "ldr r5, [%[b], #88]\n\t" + "ldr r7, [%[b], #92]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #88]\n\t" + "str r6, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r6, [%[a], #100]\n\t" + "ldr r5, [%[b], #96]\n\t" + "ldr r7, [%[b], #100]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #96]\n\t" + "str r6, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r6, [%[a], #108]\n\t" + "ldr r5, [%[b], #104]\n\t" + "ldr r7, [%[b], #108]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #104]\n\t" + "str r6, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r6, [%[a], #116]\n\t" + "ldr r5, [%[b], #112]\n\t" + "ldr r7, [%[b], #116]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #112]\n\t" + "str r6, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r6, [%[a], #124]\n\t" + "ldr r5, [%[b], #120]\n\t" + "ldr r7, [%[b], #124]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #120]\n\t" + "str r6, [%[r], #124]\n\t" + "ldr r4, [%[a], #128]\n\t" + "ldr r6, [%[a], #132]\n\t" + "ldr r5, [%[b], #128]\n\t" + "ldr r7, [%[b], #132]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #128]\n\t" + "str r6, [%[r], #132]\n\t" + "ldr r4, [%[a], #136]\n\t" + "ldr r6, [%[a], #140]\n\t" + "ldr r5, [%[b], #136]\n\t" + "ldr r7, [%[b], #140]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #136]\n\t" + "str r6, [%[r], #140]\n\t" + "ldr r4, [%[a], #144]\n\t" + "ldr r6, [%[a], #148]\n\t" + "ldr r5, [%[b], #144]\n\t" + "ldr r7, [%[b], #148]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #144]\n\t" + "str r6, [%[r], #148]\n\t" + "ldr r4, [%[a], #152]\n\t" + "ldr r6, [%[a], #156]\n\t" + "ldr r5, [%[b], #152]\n\t" + "ldr r7, [%[b], #156]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #152]\n\t" + "str r6, [%[r], #156]\n\t" + "ldr r4, [%[a], #160]\n\t" + "ldr r6, [%[a], #164]\n\t" + "ldr r5, [%[b], #160]\n\t" + "ldr r7, [%[b], #164]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #160]\n\t" + "str r6, [%[r], #164]\n\t" + "ldr r4, [%[a], #168]\n\t" + "ldr r6, [%[a], #172]\n\t" + "ldr r5, [%[b], #168]\n\t" + "ldr r7, [%[b], #172]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #168]\n\t" + "str r6, [%[r], #172]\n\t" + "ldr r4, [%[a], #176]\n\t" + "ldr r6, [%[a], #180]\n\t" + "ldr r5, [%[b], #176]\n\t" + "ldr r7, [%[b], #180]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #176]\n\t" + "str r6, [%[r], #180]\n\t" + "ldr r4, [%[a], #184]\n\t" + "ldr r6, [%[a], #188]\n\t" + "ldr r5, [%[b], #184]\n\t" + "ldr r7, [%[b], #188]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #184]\n\t" + "str r6, [%[r], #188]\n\t" + "adc %[c], r9, r9\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + ); +#endif /* WOLFSSL_SP_SMALL */ + + return c; +} + +/* RSA private key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * pm First prime. + * qm Second prime. + * dpm First prime's CRT exponent. + * dqm Second prime's CRT exponent. + * qim Inverse of second prime mod p. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 384 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, + mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm, + byte* out, word32* outLen) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit a[96 * 2]; + sp_digit p[48], q[48], dp[48]; + sp_digit tmpa[96], tmpb[96]; +#else + sp_digit* t = NULL; + sp_digit* a; + sp_digit* p; + sp_digit* q; + sp_digit* dp; + sp_digit* tmpa; + sp_digit* tmpb; +#endif + sp_digit* r; + sp_digit* qi; + sp_digit* dq; + sp_digit c; + int err = MP_OKAY; + + (void)dm; + (void)mm; + + if (*outLen < 384) + err = MP_TO_E; + if (err == MP_OKAY && (inLen > 384 || mp_count_bits(mm) != 3072)) + err = MP_READ_E; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 48 * 11, NULL, + DYNAMIC_TYPE_RSA); + if (t == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + a = t; + p = a + 96 * 2; + q = p + 48; + qi = dq = dp = q + 48; + tmpa = qi + 48; + tmpb = tmpa + 96; + + r = t + 96; + } +#else +#endif + + if (err == MP_OKAY) { +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + r = a; + qi = dq = dp; +#endif + sp_3072_from_bin(a, 96, in, inLen); + sp_3072_from_mp(p, 48, pm); + sp_3072_from_mp(q, 48, qm); + sp_3072_from_mp(dp, 48, dpm); + + err = sp_3072_mod_exp_48(tmpa, a, dp, 1536, p, 1); + } + if (err == MP_OKAY) { + sp_3072_from_mp(dq, 48, dqm); + err = sp_3072_mod_exp_48(tmpb, a, dq, 1536, q, 1); + } + + if (err == MP_OKAY) { + c = sp_3072_sub_in_place_48(tmpa, tmpb); + c += sp_3072_cond_add_48(tmpa, tmpa, p, c); + sp_3072_cond_add_48(tmpa, tmpa, p, c); + + sp_3072_from_mp(qi, 48, qim); + sp_3072_mul_48(tmpa, tmpa, qi); + err = sp_3072_mod_48(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { + sp_3072_mul_48(tmpa, q, tmpa); + XMEMSET(&tmpb[48], 0, sizeof(sp_digit) * 48); + sp_3072_add_96(r, tmpb, tmpa); + + sp_3072_to_bin(r, out); + *outLen = 384; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_digit) * 48 * 11); + XFREE(t, NULL, DYNAMIC_TYPE_RSA); + } +#else + XMEMSET(tmpa, 0, sizeof(tmpa)); + XMEMSET(tmpb, 0, sizeof(tmpb)); + XMEMSET(p, 0, sizeof(p)); + XMEMSET(q, 0, sizeof(q)); + XMEMSET(dp, 0, sizeof(dp)); +#endif + + return err; +} +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ +#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */ +#endif /* WOLFSSL_HAVE_SP_RSA */ +#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY)) +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_3072_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (3072 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 32 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 96); + r->used = 96; + mp_clamp(r); +#elif DIGIT_BIT < 32 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 96; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 32) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 32 - s; + } + r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 96; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 32 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 32 - s; + } + else { + s += 32; + } + } + r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ + int err = MP_OKAY; + sp_digit b[192], e[96], m[96]; + sp_digit* r = b; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 3072) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 3072) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 3072) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_3072_from_mp(b, 96, base); + sp_3072_from_mp(e, 96, exp); + sp_3072_from_mp(m, 96, mod); + + err = sp_3072_mod_exp_96(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + err = sp_3072_to_mp(r, res); + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} + +#ifdef WOLFSSL_HAVE_SP_DH + +#ifdef HAVE_FFDHE_3072 +static void sp_3072_lshift_96(sp_digit* r, sp_digit* a, byte n) +{ + __asm__ __volatile__ ( + "mov r6, #31\n\t" + "sub r6, r6, %[n]\n\t" + "ldr r3, [%[a], #380]\n\t" + "lsr r4, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r4, r4, r6\n\t" + "ldr r2, [%[a], #376]\n\t" + "str r4, [%[r], #384]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #372]\n\t" + "str r3, [%[r], #380]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #368]\n\t" + "str r2, [%[r], #376]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #364]\n\t" + "str r4, [%[r], #372]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #360]\n\t" + "str r3, [%[r], #368]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #356]\n\t" + "str r2, [%[r], #364]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #352]\n\t" + "str r4, [%[r], #360]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #348]\n\t" + "str r3, [%[r], #356]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #344]\n\t" + "str r2, [%[r], #352]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #340]\n\t" + "str r4, [%[r], #348]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #336]\n\t" + "str r3, [%[r], #344]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #332]\n\t" + "str r2, [%[r], #340]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #328]\n\t" + "str r4, [%[r], #336]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #324]\n\t" + "str r3, [%[r], #332]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #320]\n\t" + "str r2, [%[r], #328]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #316]\n\t" + "str r4, [%[r], #324]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #312]\n\t" + "str r3, [%[r], #320]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #308]\n\t" + "str r2, [%[r], #316]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #304]\n\t" + "str r4, [%[r], #312]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #300]\n\t" + "str r3, [%[r], #308]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #296]\n\t" + "str r2, [%[r], #304]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #292]\n\t" + "str r4, [%[r], #300]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #288]\n\t" + "str r3, [%[r], #296]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #284]\n\t" + "str r2, [%[r], #292]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #280]\n\t" + "str r4, [%[r], #288]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #276]\n\t" + "str r3, [%[r], #284]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #272]\n\t" + "str r2, [%[r], #280]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #268]\n\t" + "str r4, [%[r], #276]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #264]\n\t" + "str r3, [%[r], #272]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #260]\n\t" + "str r2, [%[r], #268]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #256]\n\t" + "str r4, [%[r], #264]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #252]\n\t" + "str r3, [%[r], #260]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #248]\n\t" + "str r2, [%[r], #256]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #244]\n\t" + "str r4, [%[r], #252]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #240]\n\t" + "str r3, [%[r], #248]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #236]\n\t" + "str r2, [%[r], #244]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #232]\n\t" + "str r4, [%[r], #240]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #228]\n\t" + "str r3, [%[r], #236]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #224]\n\t" + "str r2, [%[r], #232]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #220]\n\t" + "str r4, [%[r], #228]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #216]\n\t" + "str r3, [%[r], #224]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #212]\n\t" + "str r2, [%[r], #220]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #208]\n\t" + "str r4, [%[r], #216]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #204]\n\t" + "str r3, [%[r], #212]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #200]\n\t" + "str r2, [%[r], #208]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #196]\n\t" + "str r4, [%[r], #204]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #192]\n\t" + "str r3, [%[r], #200]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #188]\n\t" + "str r2, [%[r], #196]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #184]\n\t" + "str r4, [%[r], #192]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #180]\n\t" + "str r3, [%[r], #188]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #176]\n\t" + "str r2, [%[r], #184]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #172]\n\t" + "str r4, [%[r], #180]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #168]\n\t" + "str r3, [%[r], #176]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #164]\n\t" + "str r2, [%[r], #172]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #160]\n\t" + "str r4, [%[r], #168]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #156]\n\t" + "str r3, [%[r], #164]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #152]\n\t" + "str r2, [%[r], #160]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #148]\n\t" + "str r4, [%[r], #156]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #144]\n\t" + "str r3, [%[r], #152]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #140]\n\t" + "str r2, [%[r], #148]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #136]\n\t" + "str r4, [%[r], #144]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #132]\n\t" + "str r3, [%[r], #140]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #128]\n\t" + "str r2, [%[r], #136]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #124]\n\t" + "str r4, [%[r], #132]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #120]\n\t" + "str r3, [%[r], #128]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #116]\n\t" + "str r2, [%[r], #124]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #112]\n\t" + "str r4, [%[r], #120]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #108]\n\t" + "str r3, [%[r], #116]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #104]\n\t" + "str r2, [%[r], #112]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #100]\n\t" + "str r4, [%[r], #108]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #96]\n\t" + "str r3, [%[r], #104]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #92]\n\t" + "str r2, [%[r], #100]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #88]\n\t" + "str r4, [%[r], #96]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #84]\n\t" + "str r3, [%[r], #92]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #80]\n\t" + "str r2, [%[r], #88]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #76]\n\t" + "str r4, [%[r], #84]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #72]\n\t" + "str r3, [%[r], #80]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #68]\n\t" + "str r2, [%[r], #76]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #64]\n\t" + "str r4, [%[r], #72]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #60]\n\t" + "str r3, [%[r], #68]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #56]\n\t" + "str r2, [%[r], #64]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #52]\n\t" + "str r4, [%[r], #60]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #48]\n\t" + "str r3, [%[r], #56]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #44]\n\t" + "str r2, [%[r], #52]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #40]\n\t" + "str r4, [%[r], #48]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #36]\n\t" + "str r3, [%[r], #44]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #32]\n\t" + "str r2, [%[r], #40]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #28]\n\t" + "str r4, [%[r], #36]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #24]\n\t" + "str r3, [%[r], #32]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #20]\n\t" + "str r2, [%[r], #28]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #16]\n\t" + "str r4, [%[r], #24]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #12]\n\t" + "str r3, [%[r], #20]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #8]\n\t" + "str r2, [%[r], #16]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #4]\n\t" + "str r4, [%[r], #12]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #0]\n\t" + "str r3, [%[r], #8]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "str r4, [%[r]]\n\t" + "str r2, [%[r], #4]\n\t" + : + : [r] "r" (r), [a] "r" (a), [n] "r" (n) + : "memory", "r2", "r3", "r4", "r5", "r6" + ); +} + +/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m) + * + * r A single precision number that is the result of the operation. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_3072_mod_exp_2_96(sp_digit* r, const sp_digit* e, int bits, + const sp_digit* m) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit nd[192]; + sp_digit td[97]; +#else + sp_digit* td; +#endif + sp_digit* norm; + sp_digit* tmp; + sp_digit mp = 1; + sp_digit n, o; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 289, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + norm = td; + tmp = td + 192; +#else + norm = nd; + tmp = td; +#endif + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_96(norm, m); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 5; + if (c == 32) { + c = 27; + } + y = (int)(n >> c); + n <<= 32 - c; + sp_3072_lshift_96(r, norm, y); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 27; + n <<= 5; + c = 27; + } + else if (c < 5) { + y = n >> 27; + n = e[i--]; + c = 5 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_3072_mont_sqr_96(r, r, m, mp); + sp_3072_mont_sqr_96(r, r, m, mp); + sp_3072_mont_sqr_96(r, r, m, mp); + sp_3072_mont_sqr_96(r, r, m, mp); + sp_3072_mont_sqr_96(r, r, m, mp); + + sp_3072_lshift_96(r, r, y); + sp_3072_mul_d_96(tmp, norm, r[96]); + r[96] = 0; + o = sp_3072_add_96(r, r, tmp); + sp_3072_cond_sub_96(r, r, m, (sp_digit)0 - o); + } + + XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U); + sp_3072_mont_reduce_96(r, m, mp); + + mask = 0 - (sp_3072_cmp_96(r, m) >= 0); + sp_3072_cond_sub_96(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* HAVE_FFDHE_3072 */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. + * exp Array of bytes that is the exponent. + * expLen Length of data, in bytes, in exponent. + * mod Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 384 bytes long. + * outLen Length, in bytes, of exponentiation result. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen, + mp_int* mod, byte* out, word32* outLen) +{ + int err = MP_OKAY; + sp_digit b[192], e[96], m[96]; + sp_digit* r = b; + word32 i; + + if (mp_count_bits(base) > 3072) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expLen > 384) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 3072) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_3072_from_mp(b, 96, base); + sp_3072_from_bin(e, 96, exp, expLen); + sp_3072_from_mp(m, 96, mod); + + #ifdef HAVE_FFDHE_3072 + if (base->used == 1 && base->dp[0] == 2 && m[95] == (sp_digit)-1) + err = sp_3072_mod_exp_2_96(r, e, expLen * 8, m); + else + #endif + err = sp_3072_mod_exp_96(r, b, e, expLen * 8, m, 0); + + } + + if (err == MP_OKAY) { + sp_3072_to_bin(r, out); + *outLen = 384; + for (i=0; i<384 && out[i] == 0; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} +#endif /* WOLFSSL_HAVE_SP_DH */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ + int err = MP_OKAY; + sp_digit b[96], e[48], m[48]; + sp_digit* r = b; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 1536) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 1536) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 1536) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_3072_from_mp(b, 48, base); + sp_3072_from_mp(e, 48, exp); + sp_3072_from_mp(m, 48, mod); + + err = sp_3072_mod_exp_48(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + XMEMSET(r + 48, 0, sizeof(*r) * 48U); + err = sp_3072_to_mp(r, res); + res->used = mod->used; + mp_clamp(res); + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} + +#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */ + +#endif /* !WOLFSSL_SP_NO_3072 */ + +#ifdef WOLFSSL_SP_4096 +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((sp_digit)a[i]) << s); + if (s >= 24U) { + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (sp_digit)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 32 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 32 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 32U) <= (word32)DIGIT_BIT) { + s += 32U; + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 32) { + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + s = 32 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 512 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_4096_to_bin(sp_digit* r, byte* a) +{ + int i, j, s = 0, b; + + j = 4096 / 8 - 1; + a[j] = 0; + for (i=0; i<128 && j>=0; i++) { + b = 0; + /* lint allow cast of mismatch sp_digit and int */ + a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ + b += 8 - s; + if (j < 0) { + break; + } + while (b < 32) { + a[j--] = (byte)(r[i] >> b); + b += 8; + if (j < 0) { + break; + } + } + s = 8 - (b - 32); + if (j >= 0) { + a[j] = 0; + } + if (s != 0) { + j++; + } + } +} + +#ifndef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_4096_add_64(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r12, #0\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #12]\n\t" + "ldr r8, [%[b], #0]\n\t" + "ldr r9, [%[b], #4]\n\t" + "ldr r10, [%[b], #8]\n\t" + "ldr r14, [%[b], #12]\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #0]\n\t" + "str r5, [%[r], #4]\n\t" + "str r6, [%[r], #8]\n\t" + "str r7, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[a], #28]\n\t" + "ldr r8, [%[b], #16]\n\t" + "ldr r9, [%[b], #20]\n\t" + "ldr r10, [%[b], #24]\n\t" + "ldr r14, [%[b], #28]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #16]\n\t" + "str r5, [%[r], #20]\n\t" + "str r6, [%[r], #24]\n\t" + "str r7, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[a], #36]\n\t" + "ldr r6, [%[a], #40]\n\t" + "ldr r7, [%[a], #44]\n\t" + "ldr r8, [%[b], #32]\n\t" + "ldr r9, [%[b], #36]\n\t" + "ldr r10, [%[b], #40]\n\t" + "ldr r14, [%[b], #44]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #32]\n\t" + "str r5, [%[r], #36]\n\t" + "str r6, [%[r], #40]\n\t" + "str r7, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[a], #52]\n\t" + "ldr r6, [%[a], #56]\n\t" + "ldr r7, [%[a], #60]\n\t" + "ldr r8, [%[b], #48]\n\t" + "ldr r9, [%[b], #52]\n\t" + "ldr r10, [%[b], #56]\n\t" + "ldr r14, [%[b], #60]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #48]\n\t" + "str r5, [%[r], #52]\n\t" + "str r6, [%[r], #56]\n\t" + "str r7, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[a], #68]\n\t" + "ldr r6, [%[a], #72]\n\t" + "ldr r7, [%[a], #76]\n\t" + "ldr r8, [%[b], #64]\n\t" + "ldr r9, [%[b], #68]\n\t" + "ldr r10, [%[b], #72]\n\t" + "ldr r14, [%[b], #76]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #64]\n\t" + "str r5, [%[r], #68]\n\t" + "str r6, [%[r], #72]\n\t" + "str r7, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[a], #84]\n\t" + "ldr r6, [%[a], #88]\n\t" + "ldr r7, [%[a], #92]\n\t" + "ldr r8, [%[b], #80]\n\t" + "ldr r9, [%[b], #84]\n\t" + "ldr r10, [%[b], #88]\n\t" + "ldr r14, [%[b], #92]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #80]\n\t" + "str r5, [%[r], #84]\n\t" + "str r6, [%[r], #88]\n\t" + "str r7, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[a], #100]\n\t" + "ldr r6, [%[a], #104]\n\t" + "ldr r7, [%[a], #108]\n\t" + "ldr r8, [%[b], #96]\n\t" + "ldr r9, [%[b], #100]\n\t" + "ldr r10, [%[b], #104]\n\t" + "ldr r14, [%[b], #108]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #96]\n\t" + "str r5, [%[r], #100]\n\t" + "str r6, [%[r], #104]\n\t" + "str r7, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[a], #116]\n\t" + "ldr r6, [%[a], #120]\n\t" + "ldr r7, [%[a], #124]\n\t" + "ldr r8, [%[b], #112]\n\t" + "ldr r9, [%[b], #116]\n\t" + "ldr r10, [%[b], #120]\n\t" + "ldr r14, [%[b], #124]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #112]\n\t" + "str r5, [%[r], #116]\n\t" + "str r6, [%[r], #120]\n\t" + "str r7, [%[r], #124]\n\t" + "ldr r4, [%[a], #128]\n\t" + "ldr r5, [%[a], #132]\n\t" + "ldr r6, [%[a], #136]\n\t" + "ldr r7, [%[a], #140]\n\t" + "ldr r8, [%[b], #128]\n\t" + "ldr r9, [%[b], #132]\n\t" + "ldr r10, [%[b], #136]\n\t" + "ldr r14, [%[b], #140]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #128]\n\t" + "str r5, [%[r], #132]\n\t" + "str r6, [%[r], #136]\n\t" + "str r7, [%[r], #140]\n\t" + "ldr r4, [%[a], #144]\n\t" + "ldr r5, [%[a], #148]\n\t" + "ldr r6, [%[a], #152]\n\t" + "ldr r7, [%[a], #156]\n\t" + "ldr r8, [%[b], #144]\n\t" + "ldr r9, [%[b], #148]\n\t" + "ldr r10, [%[b], #152]\n\t" + "ldr r14, [%[b], #156]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #144]\n\t" + "str r5, [%[r], #148]\n\t" + "str r6, [%[r], #152]\n\t" + "str r7, [%[r], #156]\n\t" + "ldr r4, [%[a], #160]\n\t" + "ldr r5, [%[a], #164]\n\t" + "ldr r6, [%[a], #168]\n\t" + "ldr r7, [%[a], #172]\n\t" + "ldr r8, [%[b], #160]\n\t" + "ldr r9, [%[b], #164]\n\t" + "ldr r10, [%[b], #168]\n\t" + "ldr r14, [%[b], #172]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #160]\n\t" + "str r5, [%[r], #164]\n\t" + "str r6, [%[r], #168]\n\t" + "str r7, [%[r], #172]\n\t" + "ldr r4, [%[a], #176]\n\t" + "ldr r5, [%[a], #180]\n\t" + "ldr r6, [%[a], #184]\n\t" + "ldr r7, [%[a], #188]\n\t" + "ldr r8, [%[b], #176]\n\t" + "ldr r9, [%[b], #180]\n\t" + "ldr r10, [%[b], #184]\n\t" + "ldr r14, [%[b], #188]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #176]\n\t" + "str r5, [%[r], #180]\n\t" + "str r6, [%[r], #184]\n\t" + "str r7, [%[r], #188]\n\t" + "ldr r4, [%[a], #192]\n\t" + "ldr r5, [%[a], #196]\n\t" + "ldr r6, [%[a], #200]\n\t" + "ldr r7, [%[a], #204]\n\t" + "ldr r8, [%[b], #192]\n\t" + "ldr r9, [%[b], #196]\n\t" + "ldr r10, [%[b], #200]\n\t" + "ldr r14, [%[b], #204]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #192]\n\t" + "str r5, [%[r], #196]\n\t" + "str r6, [%[r], #200]\n\t" + "str r7, [%[r], #204]\n\t" + "ldr r4, [%[a], #208]\n\t" + "ldr r5, [%[a], #212]\n\t" + "ldr r6, [%[a], #216]\n\t" + "ldr r7, [%[a], #220]\n\t" + "ldr r8, [%[b], #208]\n\t" + "ldr r9, [%[b], #212]\n\t" + "ldr r10, [%[b], #216]\n\t" + "ldr r14, [%[b], #220]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #208]\n\t" + "str r5, [%[r], #212]\n\t" + "str r6, [%[r], #216]\n\t" + "str r7, [%[r], #220]\n\t" + "ldr r4, [%[a], #224]\n\t" + "ldr r5, [%[a], #228]\n\t" + "ldr r6, [%[a], #232]\n\t" + "ldr r7, [%[a], #236]\n\t" + "ldr r8, [%[b], #224]\n\t" + "ldr r9, [%[b], #228]\n\t" + "ldr r10, [%[b], #232]\n\t" + "ldr r14, [%[b], #236]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #224]\n\t" + "str r5, [%[r], #228]\n\t" + "str r6, [%[r], #232]\n\t" + "str r7, [%[r], #236]\n\t" + "ldr r4, [%[a], #240]\n\t" + "ldr r5, [%[a], #244]\n\t" + "ldr r6, [%[a], #248]\n\t" + "ldr r7, [%[a], #252]\n\t" + "ldr r8, [%[b], #240]\n\t" + "ldr r9, [%[b], #244]\n\t" + "ldr r10, [%[b], #248]\n\t" + "ldr r14, [%[b], #252]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #240]\n\t" + "str r5, [%[r], #244]\n\t" + "str r6, [%[r], #248]\n\t" + "str r7, [%[r], #252]\n\t" + "adc %[c], r12, r12\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); + + return c; +} + +/* Sub b from a into a. (a -= b) + * + * a A single precision integer and result. + * b A single precision integer. + */ +static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldr r2, [%[a], #0]\n\t" + "ldr r3, [%[a], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[a], #12]\n\t" + "ldr r6, [%[b], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" + "ldr r8, [%[b], #8]\n\t" + "ldr r9, [%[b], #12]\n\t" + "subs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #0]\n\t" + "str r3, [%[a], #4]\n\t" + "str r4, [%[a], #8]\n\t" + "str r5, [%[a], #12]\n\t" + "ldr r2, [%[a], #16]\n\t" + "ldr r3, [%[a], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[a], #28]\n\t" + "ldr r6, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" + "ldr r8, [%[b], #24]\n\t" + "ldr r9, [%[b], #28]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #16]\n\t" + "str r3, [%[a], #20]\n\t" + "str r4, [%[a], #24]\n\t" + "str r5, [%[a], #28]\n\t" + "ldr r2, [%[a], #32]\n\t" + "ldr r3, [%[a], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[a], #44]\n\t" + "ldr r6, [%[b], #32]\n\t" + "ldr r7, [%[b], #36]\n\t" + "ldr r8, [%[b], #40]\n\t" + "ldr r9, [%[b], #44]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #32]\n\t" + "str r3, [%[a], #36]\n\t" + "str r4, [%[a], #40]\n\t" + "str r5, [%[a], #44]\n\t" + "ldr r2, [%[a], #48]\n\t" + "ldr r3, [%[a], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[a], #60]\n\t" + "ldr r6, [%[b], #48]\n\t" + "ldr r7, [%[b], #52]\n\t" + "ldr r8, [%[b], #56]\n\t" + "ldr r9, [%[b], #60]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #48]\n\t" + "str r3, [%[a], #52]\n\t" + "str r4, [%[a], #56]\n\t" + "str r5, [%[a], #60]\n\t" + "ldr r2, [%[a], #64]\n\t" + "ldr r3, [%[a], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[a], #76]\n\t" + "ldr r6, [%[b], #64]\n\t" + "ldr r7, [%[b], #68]\n\t" + "ldr r8, [%[b], #72]\n\t" + "ldr r9, [%[b], #76]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #64]\n\t" + "str r3, [%[a], #68]\n\t" + "str r4, [%[a], #72]\n\t" + "str r5, [%[a], #76]\n\t" + "ldr r2, [%[a], #80]\n\t" + "ldr r3, [%[a], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[a], #92]\n\t" + "ldr r6, [%[b], #80]\n\t" + "ldr r7, [%[b], #84]\n\t" + "ldr r8, [%[b], #88]\n\t" + "ldr r9, [%[b], #92]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #80]\n\t" + "str r3, [%[a], #84]\n\t" + "str r4, [%[a], #88]\n\t" + "str r5, [%[a], #92]\n\t" + "ldr r2, [%[a], #96]\n\t" + "ldr r3, [%[a], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[a], #108]\n\t" + "ldr r6, [%[b], #96]\n\t" + "ldr r7, [%[b], #100]\n\t" + "ldr r8, [%[b], #104]\n\t" + "ldr r9, [%[b], #108]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #96]\n\t" + "str r3, [%[a], #100]\n\t" + "str r4, [%[a], #104]\n\t" + "str r5, [%[a], #108]\n\t" + "ldr r2, [%[a], #112]\n\t" + "ldr r3, [%[a], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[a], #124]\n\t" + "ldr r6, [%[b], #112]\n\t" + "ldr r7, [%[b], #116]\n\t" + "ldr r8, [%[b], #120]\n\t" + "ldr r9, [%[b], #124]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #112]\n\t" + "str r3, [%[a], #116]\n\t" + "str r4, [%[a], #120]\n\t" + "str r5, [%[a], #124]\n\t" + "ldr r2, [%[a], #128]\n\t" + "ldr r3, [%[a], #132]\n\t" + "ldr r4, [%[a], #136]\n\t" + "ldr r5, [%[a], #140]\n\t" + "ldr r6, [%[b], #128]\n\t" + "ldr r7, [%[b], #132]\n\t" + "ldr r8, [%[b], #136]\n\t" + "ldr r9, [%[b], #140]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #128]\n\t" + "str r3, [%[a], #132]\n\t" + "str r4, [%[a], #136]\n\t" + "str r5, [%[a], #140]\n\t" + "ldr r2, [%[a], #144]\n\t" + "ldr r3, [%[a], #148]\n\t" + "ldr r4, [%[a], #152]\n\t" + "ldr r5, [%[a], #156]\n\t" + "ldr r6, [%[b], #144]\n\t" + "ldr r7, [%[b], #148]\n\t" + "ldr r8, [%[b], #152]\n\t" + "ldr r9, [%[b], #156]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #144]\n\t" + "str r3, [%[a], #148]\n\t" + "str r4, [%[a], #152]\n\t" + "str r5, [%[a], #156]\n\t" + "ldr r2, [%[a], #160]\n\t" + "ldr r3, [%[a], #164]\n\t" + "ldr r4, [%[a], #168]\n\t" + "ldr r5, [%[a], #172]\n\t" + "ldr r6, [%[b], #160]\n\t" + "ldr r7, [%[b], #164]\n\t" + "ldr r8, [%[b], #168]\n\t" + "ldr r9, [%[b], #172]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #160]\n\t" + "str r3, [%[a], #164]\n\t" + "str r4, [%[a], #168]\n\t" + "str r5, [%[a], #172]\n\t" + "ldr r2, [%[a], #176]\n\t" + "ldr r3, [%[a], #180]\n\t" + "ldr r4, [%[a], #184]\n\t" + "ldr r5, [%[a], #188]\n\t" + "ldr r6, [%[b], #176]\n\t" + "ldr r7, [%[b], #180]\n\t" + "ldr r8, [%[b], #184]\n\t" + "ldr r9, [%[b], #188]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #176]\n\t" + "str r3, [%[a], #180]\n\t" + "str r4, [%[a], #184]\n\t" + "str r5, [%[a], #188]\n\t" + "ldr r2, [%[a], #192]\n\t" + "ldr r3, [%[a], #196]\n\t" + "ldr r4, [%[a], #200]\n\t" + "ldr r5, [%[a], #204]\n\t" + "ldr r6, [%[b], #192]\n\t" + "ldr r7, [%[b], #196]\n\t" + "ldr r8, [%[b], #200]\n\t" + "ldr r9, [%[b], #204]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #192]\n\t" + "str r3, [%[a], #196]\n\t" + "str r4, [%[a], #200]\n\t" + "str r5, [%[a], #204]\n\t" + "ldr r2, [%[a], #208]\n\t" + "ldr r3, [%[a], #212]\n\t" + "ldr r4, [%[a], #216]\n\t" + "ldr r5, [%[a], #220]\n\t" + "ldr r6, [%[b], #208]\n\t" + "ldr r7, [%[b], #212]\n\t" + "ldr r8, [%[b], #216]\n\t" + "ldr r9, [%[b], #220]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #208]\n\t" + "str r3, [%[a], #212]\n\t" + "str r4, [%[a], #216]\n\t" + "str r5, [%[a], #220]\n\t" + "ldr r2, [%[a], #224]\n\t" + "ldr r3, [%[a], #228]\n\t" + "ldr r4, [%[a], #232]\n\t" + "ldr r5, [%[a], #236]\n\t" + "ldr r6, [%[b], #224]\n\t" + "ldr r7, [%[b], #228]\n\t" + "ldr r8, [%[b], #232]\n\t" + "ldr r9, [%[b], #236]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #224]\n\t" + "str r3, [%[a], #228]\n\t" + "str r4, [%[a], #232]\n\t" + "str r5, [%[a], #236]\n\t" + "ldr r2, [%[a], #240]\n\t" + "ldr r3, [%[a], #244]\n\t" + "ldr r4, [%[a], #248]\n\t" + "ldr r5, [%[a], #252]\n\t" + "ldr r6, [%[b], #240]\n\t" + "ldr r7, [%[b], #244]\n\t" + "ldr r8, [%[b], #248]\n\t" + "ldr r9, [%[b], #252]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #240]\n\t" + "str r3, [%[a], #244]\n\t" + "str r4, [%[a], #248]\n\t" + "str r5, [%[a], #252]\n\t" + "ldr r2, [%[a], #256]\n\t" + "ldr r3, [%[a], #260]\n\t" + "ldr r4, [%[a], #264]\n\t" + "ldr r5, [%[a], #268]\n\t" + "ldr r6, [%[b], #256]\n\t" + "ldr r7, [%[b], #260]\n\t" + "ldr r8, [%[b], #264]\n\t" + "ldr r9, [%[b], #268]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #256]\n\t" + "str r3, [%[a], #260]\n\t" + "str r4, [%[a], #264]\n\t" + "str r5, [%[a], #268]\n\t" + "ldr r2, [%[a], #272]\n\t" + "ldr r3, [%[a], #276]\n\t" + "ldr r4, [%[a], #280]\n\t" + "ldr r5, [%[a], #284]\n\t" + "ldr r6, [%[b], #272]\n\t" + "ldr r7, [%[b], #276]\n\t" + "ldr r8, [%[b], #280]\n\t" + "ldr r9, [%[b], #284]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #272]\n\t" + "str r3, [%[a], #276]\n\t" + "str r4, [%[a], #280]\n\t" + "str r5, [%[a], #284]\n\t" + "ldr r2, [%[a], #288]\n\t" + "ldr r3, [%[a], #292]\n\t" + "ldr r4, [%[a], #296]\n\t" + "ldr r5, [%[a], #300]\n\t" + "ldr r6, [%[b], #288]\n\t" + "ldr r7, [%[b], #292]\n\t" + "ldr r8, [%[b], #296]\n\t" + "ldr r9, [%[b], #300]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #288]\n\t" + "str r3, [%[a], #292]\n\t" + "str r4, [%[a], #296]\n\t" + "str r5, [%[a], #300]\n\t" + "ldr r2, [%[a], #304]\n\t" + "ldr r3, [%[a], #308]\n\t" + "ldr r4, [%[a], #312]\n\t" + "ldr r5, [%[a], #316]\n\t" + "ldr r6, [%[b], #304]\n\t" + "ldr r7, [%[b], #308]\n\t" + "ldr r8, [%[b], #312]\n\t" + "ldr r9, [%[b], #316]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #304]\n\t" + "str r3, [%[a], #308]\n\t" + "str r4, [%[a], #312]\n\t" + "str r5, [%[a], #316]\n\t" + "ldr r2, [%[a], #320]\n\t" + "ldr r3, [%[a], #324]\n\t" + "ldr r4, [%[a], #328]\n\t" + "ldr r5, [%[a], #332]\n\t" + "ldr r6, [%[b], #320]\n\t" + "ldr r7, [%[b], #324]\n\t" + "ldr r8, [%[b], #328]\n\t" + "ldr r9, [%[b], #332]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #320]\n\t" + "str r3, [%[a], #324]\n\t" + "str r4, [%[a], #328]\n\t" + "str r5, [%[a], #332]\n\t" + "ldr r2, [%[a], #336]\n\t" + "ldr r3, [%[a], #340]\n\t" + "ldr r4, [%[a], #344]\n\t" + "ldr r5, [%[a], #348]\n\t" + "ldr r6, [%[b], #336]\n\t" + "ldr r7, [%[b], #340]\n\t" + "ldr r8, [%[b], #344]\n\t" + "ldr r9, [%[b], #348]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #336]\n\t" + "str r3, [%[a], #340]\n\t" + "str r4, [%[a], #344]\n\t" + "str r5, [%[a], #348]\n\t" + "ldr r2, [%[a], #352]\n\t" + "ldr r3, [%[a], #356]\n\t" + "ldr r4, [%[a], #360]\n\t" + "ldr r5, [%[a], #364]\n\t" + "ldr r6, [%[b], #352]\n\t" + "ldr r7, [%[b], #356]\n\t" + "ldr r8, [%[b], #360]\n\t" + "ldr r9, [%[b], #364]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #352]\n\t" + "str r3, [%[a], #356]\n\t" + "str r4, [%[a], #360]\n\t" + "str r5, [%[a], #364]\n\t" + "ldr r2, [%[a], #368]\n\t" + "ldr r3, [%[a], #372]\n\t" + "ldr r4, [%[a], #376]\n\t" + "ldr r5, [%[a], #380]\n\t" + "ldr r6, [%[b], #368]\n\t" + "ldr r7, [%[b], #372]\n\t" + "ldr r8, [%[b], #376]\n\t" + "ldr r9, [%[b], #380]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #368]\n\t" + "str r3, [%[a], #372]\n\t" + "str r4, [%[a], #376]\n\t" + "str r5, [%[a], #380]\n\t" + "ldr r2, [%[a], #384]\n\t" + "ldr r3, [%[a], #388]\n\t" + "ldr r4, [%[a], #392]\n\t" + "ldr r5, [%[a], #396]\n\t" + "ldr r6, [%[b], #384]\n\t" + "ldr r7, [%[b], #388]\n\t" + "ldr r8, [%[b], #392]\n\t" + "ldr r9, [%[b], #396]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #384]\n\t" + "str r3, [%[a], #388]\n\t" + "str r4, [%[a], #392]\n\t" + "str r5, [%[a], #396]\n\t" + "ldr r2, [%[a], #400]\n\t" + "ldr r3, [%[a], #404]\n\t" + "ldr r4, [%[a], #408]\n\t" + "ldr r5, [%[a], #412]\n\t" + "ldr r6, [%[b], #400]\n\t" + "ldr r7, [%[b], #404]\n\t" + "ldr r8, [%[b], #408]\n\t" + "ldr r9, [%[b], #412]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #400]\n\t" + "str r3, [%[a], #404]\n\t" + "str r4, [%[a], #408]\n\t" + "str r5, [%[a], #412]\n\t" + "ldr r2, [%[a], #416]\n\t" + "ldr r3, [%[a], #420]\n\t" + "ldr r4, [%[a], #424]\n\t" + "ldr r5, [%[a], #428]\n\t" + "ldr r6, [%[b], #416]\n\t" + "ldr r7, [%[b], #420]\n\t" + "ldr r8, [%[b], #424]\n\t" + "ldr r9, [%[b], #428]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #416]\n\t" + "str r3, [%[a], #420]\n\t" + "str r4, [%[a], #424]\n\t" + "str r5, [%[a], #428]\n\t" + "ldr r2, [%[a], #432]\n\t" + "ldr r3, [%[a], #436]\n\t" + "ldr r4, [%[a], #440]\n\t" + "ldr r5, [%[a], #444]\n\t" + "ldr r6, [%[b], #432]\n\t" + "ldr r7, [%[b], #436]\n\t" + "ldr r8, [%[b], #440]\n\t" + "ldr r9, [%[b], #444]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #432]\n\t" + "str r3, [%[a], #436]\n\t" + "str r4, [%[a], #440]\n\t" + "str r5, [%[a], #444]\n\t" + "ldr r2, [%[a], #448]\n\t" + "ldr r3, [%[a], #452]\n\t" + "ldr r4, [%[a], #456]\n\t" + "ldr r5, [%[a], #460]\n\t" + "ldr r6, [%[b], #448]\n\t" + "ldr r7, [%[b], #452]\n\t" + "ldr r8, [%[b], #456]\n\t" + "ldr r9, [%[b], #460]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #448]\n\t" + "str r3, [%[a], #452]\n\t" + "str r4, [%[a], #456]\n\t" + "str r5, [%[a], #460]\n\t" + "ldr r2, [%[a], #464]\n\t" + "ldr r3, [%[a], #468]\n\t" + "ldr r4, [%[a], #472]\n\t" + "ldr r5, [%[a], #476]\n\t" + "ldr r6, [%[b], #464]\n\t" + "ldr r7, [%[b], #468]\n\t" + "ldr r8, [%[b], #472]\n\t" + "ldr r9, [%[b], #476]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #464]\n\t" + "str r3, [%[a], #468]\n\t" + "str r4, [%[a], #472]\n\t" + "str r5, [%[a], #476]\n\t" + "ldr r2, [%[a], #480]\n\t" + "ldr r3, [%[a], #484]\n\t" + "ldr r4, [%[a], #488]\n\t" + "ldr r5, [%[a], #492]\n\t" + "ldr r6, [%[b], #480]\n\t" + "ldr r7, [%[b], #484]\n\t" + "ldr r8, [%[b], #488]\n\t" + "ldr r9, [%[b], #492]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #480]\n\t" + "str r3, [%[a], #484]\n\t" + "str r4, [%[a], #488]\n\t" + "str r5, [%[a], #492]\n\t" + "ldr r2, [%[a], #496]\n\t" + "ldr r3, [%[a], #500]\n\t" + "ldr r4, [%[a], #504]\n\t" + "ldr r5, [%[a], #508]\n\t" + "ldr r6, [%[b], #496]\n\t" + "ldr r7, [%[b], #500]\n\t" + "ldr r8, [%[b], #504]\n\t" + "ldr r9, [%[b], #508]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #496]\n\t" + "str r3, [%[a], #500]\n\t" + "str r4, [%[a], #504]\n\t" + "str r5, [%[a], #508]\n\t" + "sbc %[c], r9, r9\n\t" + : [c] "+r" (c) + : [a] "r" (a), [b] "r" (b) + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + ); + + return c; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r12, #0\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #12]\n\t" + "ldr r8, [%[b], #0]\n\t" + "ldr r9, [%[b], #4]\n\t" + "ldr r10, [%[b], #8]\n\t" + "ldr r14, [%[b], #12]\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #0]\n\t" + "str r5, [%[r], #4]\n\t" + "str r6, [%[r], #8]\n\t" + "str r7, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[a], #28]\n\t" + "ldr r8, [%[b], #16]\n\t" + "ldr r9, [%[b], #20]\n\t" + "ldr r10, [%[b], #24]\n\t" + "ldr r14, [%[b], #28]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #16]\n\t" + "str r5, [%[r], #20]\n\t" + "str r6, [%[r], #24]\n\t" + "str r7, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[a], #36]\n\t" + "ldr r6, [%[a], #40]\n\t" + "ldr r7, [%[a], #44]\n\t" + "ldr r8, [%[b], #32]\n\t" + "ldr r9, [%[b], #36]\n\t" + "ldr r10, [%[b], #40]\n\t" + "ldr r14, [%[b], #44]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #32]\n\t" + "str r5, [%[r], #36]\n\t" + "str r6, [%[r], #40]\n\t" + "str r7, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[a], #52]\n\t" + "ldr r6, [%[a], #56]\n\t" + "ldr r7, [%[a], #60]\n\t" + "ldr r8, [%[b], #48]\n\t" + "ldr r9, [%[b], #52]\n\t" + "ldr r10, [%[b], #56]\n\t" + "ldr r14, [%[b], #60]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #48]\n\t" + "str r5, [%[r], #52]\n\t" + "str r6, [%[r], #56]\n\t" + "str r7, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[a], #68]\n\t" + "ldr r6, [%[a], #72]\n\t" + "ldr r7, [%[a], #76]\n\t" + "ldr r8, [%[b], #64]\n\t" + "ldr r9, [%[b], #68]\n\t" + "ldr r10, [%[b], #72]\n\t" + "ldr r14, [%[b], #76]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #64]\n\t" + "str r5, [%[r], #68]\n\t" + "str r6, [%[r], #72]\n\t" + "str r7, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[a], #84]\n\t" + "ldr r6, [%[a], #88]\n\t" + "ldr r7, [%[a], #92]\n\t" + "ldr r8, [%[b], #80]\n\t" + "ldr r9, [%[b], #84]\n\t" + "ldr r10, [%[b], #88]\n\t" + "ldr r14, [%[b], #92]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #80]\n\t" + "str r5, [%[r], #84]\n\t" + "str r6, [%[r], #88]\n\t" + "str r7, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[a], #100]\n\t" + "ldr r6, [%[a], #104]\n\t" + "ldr r7, [%[a], #108]\n\t" + "ldr r8, [%[b], #96]\n\t" + "ldr r9, [%[b], #100]\n\t" + "ldr r10, [%[b], #104]\n\t" + "ldr r14, [%[b], #108]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #96]\n\t" + "str r5, [%[r], #100]\n\t" + "str r6, [%[r], #104]\n\t" + "str r7, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[a], #116]\n\t" + "ldr r6, [%[a], #120]\n\t" + "ldr r7, [%[a], #124]\n\t" + "ldr r8, [%[b], #112]\n\t" + "ldr r9, [%[b], #116]\n\t" + "ldr r10, [%[b], #120]\n\t" + "ldr r14, [%[b], #124]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #112]\n\t" + "str r5, [%[r], #116]\n\t" + "str r6, [%[r], #120]\n\t" + "str r7, [%[r], #124]\n\t" + "ldr r4, [%[a], #128]\n\t" + "ldr r5, [%[a], #132]\n\t" + "ldr r6, [%[a], #136]\n\t" + "ldr r7, [%[a], #140]\n\t" + "ldr r8, [%[b], #128]\n\t" + "ldr r9, [%[b], #132]\n\t" + "ldr r10, [%[b], #136]\n\t" + "ldr r14, [%[b], #140]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #128]\n\t" + "str r5, [%[r], #132]\n\t" + "str r6, [%[r], #136]\n\t" + "str r7, [%[r], #140]\n\t" + "ldr r4, [%[a], #144]\n\t" + "ldr r5, [%[a], #148]\n\t" + "ldr r6, [%[a], #152]\n\t" + "ldr r7, [%[a], #156]\n\t" + "ldr r8, [%[b], #144]\n\t" + "ldr r9, [%[b], #148]\n\t" + "ldr r10, [%[b], #152]\n\t" + "ldr r14, [%[b], #156]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #144]\n\t" + "str r5, [%[r], #148]\n\t" + "str r6, [%[r], #152]\n\t" + "str r7, [%[r], #156]\n\t" + "ldr r4, [%[a], #160]\n\t" + "ldr r5, [%[a], #164]\n\t" + "ldr r6, [%[a], #168]\n\t" + "ldr r7, [%[a], #172]\n\t" + "ldr r8, [%[b], #160]\n\t" + "ldr r9, [%[b], #164]\n\t" + "ldr r10, [%[b], #168]\n\t" + "ldr r14, [%[b], #172]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #160]\n\t" + "str r5, [%[r], #164]\n\t" + "str r6, [%[r], #168]\n\t" + "str r7, [%[r], #172]\n\t" + "ldr r4, [%[a], #176]\n\t" + "ldr r5, [%[a], #180]\n\t" + "ldr r6, [%[a], #184]\n\t" + "ldr r7, [%[a], #188]\n\t" + "ldr r8, [%[b], #176]\n\t" + "ldr r9, [%[b], #180]\n\t" + "ldr r10, [%[b], #184]\n\t" + "ldr r14, [%[b], #188]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #176]\n\t" + "str r5, [%[r], #180]\n\t" + "str r6, [%[r], #184]\n\t" + "str r7, [%[r], #188]\n\t" + "ldr r4, [%[a], #192]\n\t" + "ldr r5, [%[a], #196]\n\t" + "ldr r6, [%[a], #200]\n\t" + "ldr r7, [%[a], #204]\n\t" + "ldr r8, [%[b], #192]\n\t" + "ldr r9, [%[b], #196]\n\t" + "ldr r10, [%[b], #200]\n\t" + "ldr r14, [%[b], #204]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #192]\n\t" + "str r5, [%[r], #196]\n\t" + "str r6, [%[r], #200]\n\t" + "str r7, [%[r], #204]\n\t" + "ldr r4, [%[a], #208]\n\t" + "ldr r5, [%[a], #212]\n\t" + "ldr r6, [%[a], #216]\n\t" + "ldr r7, [%[a], #220]\n\t" + "ldr r8, [%[b], #208]\n\t" + "ldr r9, [%[b], #212]\n\t" + "ldr r10, [%[b], #216]\n\t" + "ldr r14, [%[b], #220]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #208]\n\t" + "str r5, [%[r], #212]\n\t" + "str r6, [%[r], #216]\n\t" + "str r7, [%[r], #220]\n\t" + "ldr r4, [%[a], #224]\n\t" + "ldr r5, [%[a], #228]\n\t" + "ldr r6, [%[a], #232]\n\t" + "ldr r7, [%[a], #236]\n\t" + "ldr r8, [%[b], #224]\n\t" + "ldr r9, [%[b], #228]\n\t" + "ldr r10, [%[b], #232]\n\t" + "ldr r14, [%[b], #236]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #224]\n\t" + "str r5, [%[r], #228]\n\t" + "str r6, [%[r], #232]\n\t" + "str r7, [%[r], #236]\n\t" + "ldr r4, [%[a], #240]\n\t" + "ldr r5, [%[a], #244]\n\t" + "ldr r6, [%[a], #248]\n\t" + "ldr r7, [%[a], #252]\n\t" + "ldr r8, [%[b], #240]\n\t" + "ldr r9, [%[b], #244]\n\t" + "ldr r10, [%[b], #248]\n\t" + "ldr r14, [%[b], #252]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #240]\n\t" + "str r5, [%[r], #244]\n\t" + "str r6, [%[r], #248]\n\t" + "str r7, [%[r], #252]\n\t" + "ldr r4, [%[a], #256]\n\t" + "ldr r5, [%[a], #260]\n\t" + "ldr r6, [%[a], #264]\n\t" + "ldr r7, [%[a], #268]\n\t" + "ldr r8, [%[b], #256]\n\t" + "ldr r9, [%[b], #260]\n\t" + "ldr r10, [%[b], #264]\n\t" + "ldr r14, [%[b], #268]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #256]\n\t" + "str r5, [%[r], #260]\n\t" + "str r6, [%[r], #264]\n\t" + "str r7, [%[r], #268]\n\t" + "ldr r4, [%[a], #272]\n\t" + "ldr r5, [%[a], #276]\n\t" + "ldr r6, [%[a], #280]\n\t" + "ldr r7, [%[a], #284]\n\t" + "ldr r8, [%[b], #272]\n\t" + "ldr r9, [%[b], #276]\n\t" + "ldr r10, [%[b], #280]\n\t" + "ldr r14, [%[b], #284]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #272]\n\t" + "str r5, [%[r], #276]\n\t" + "str r6, [%[r], #280]\n\t" + "str r7, [%[r], #284]\n\t" + "ldr r4, [%[a], #288]\n\t" + "ldr r5, [%[a], #292]\n\t" + "ldr r6, [%[a], #296]\n\t" + "ldr r7, [%[a], #300]\n\t" + "ldr r8, [%[b], #288]\n\t" + "ldr r9, [%[b], #292]\n\t" + "ldr r10, [%[b], #296]\n\t" + "ldr r14, [%[b], #300]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #288]\n\t" + "str r5, [%[r], #292]\n\t" + "str r6, [%[r], #296]\n\t" + "str r7, [%[r], #300]\n\t" + "ldr r4, [%[a], #304]\n\t" + "ldr r5, [%[a], #308]\n\t" + "ldr r6, [%[a], #312]\n\t" + "ldr r7, [%[a], #316]\n\t" + "ldr r8, [%[b], #304]\n\t" + "ldr r9, [%[b], #308]\n\t" + "ldr r10, [%[b], #312]\n\t" + "ldr r14, [%[b], #316]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #304]\n\t" + "str r5, [%[r], #308]\n\t" + "str r6, [%[r], #312]\n\t" + "str r7, [%[r], #316]\n\t" + "ldr r4, [%[a], #320]\n\t" + "ldr r5, [%[a], #324]\n\t" + "ldr r6, [%[a], #328]\n\t" + "ldr r7, [%[a], #332]\n\t" + "ldr r8, [%[b], #320]\n\t" + "ldr r9, [%[b], #324]\n\t" + "ldr r10, [%[b], #328]\n\t" + "ldr r14, [%[b], #332]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #320]\n\t" + "str r5, [%[r], #324]\n\t" + "str r6, [%[r], #328]\n\t" + "str r7, [%[r], #332]\n\t" + "ldr r4, [%[a], #336]\n\t" + "ldr r5, [%[a], #340]\n\t" + "ldr r6, [%[a], #344]\n\t" + "ldr r7, [%[a], #348]\n\t" + "ldr r8, [%[b], #336]\n\t" + "ldr r9, [%[b], #340]\n\t" + "ldr r10, [%[b], #344]\n\t" + "ldr r14, [%[b], #348]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #336]\n\t" + "str r5, [%[r], #340]\n\t" + "str r6, [%[r], #344]\n\t" + "str r7, [%[r], #348]\n\t" + "ldr r4, [%[a], #352]\n\t" + "ldr r5, [%[a], #356]\n\t" + "ldr r6, [%[a], #360]\n\t" + "ldr r7, [%[a], #364]\n\t" + "ldr r8, [%[b], #352]\n\t" + "ldr r9, [%[b], #356]\n\t" + "ldr r10, [%[b], #360]\n\t" + "ldr r14, [%[b], #364]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #352]\n\t" + "str r5, [%[r], #356]\n\t" + "str r6, [%[r], #360]\n\t" + "str r7, [%[r], #364]\n\t" + "ldr r4, [%[a], #368]\n\t" + "ldr r5, [%[a], #372]\n\t" + "ldr r6, [%[a], #376]\n\t" + "ldr r7, [%[a], #380]\n\t" + "ldr r8, [%[b], #368]\n\t" + "ldr r9, [%[b], #372]\n\t" + "ldr r10, [%[b], #376]\n\t" + "ldr r14, [%[b], #380]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #368]\n\t" + "str r5, [%[r], #372]\n\t" + "str r6, [%[r], #376]\n\t" + "str r7, [%[r], #380]\n\t" + "ldr r4, [%[a], #384]\n\t" + "ldr r5, [%[a], #388]\n\t" + "ldr r6, [%[a], #392]\n\t" + "ldr r7, [%[a], #396]\n\t" + "ldr r8, [%[b], #384]\n\t" + "ldr r9, [%[b], #388]\n\t" + "ldr r10, [%[b], #392]\n\t" + "ldr r14, [%[b], #396]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #384]\n\t" + "str r5, [%[r], #388]\n\t" + "str r6, [%[r], #392]\n\t" + "str r7, [%[r], #396]\n\t" + "ldr r4, [%[a], #400]\n\t" + "ldr r5, [%[a], #404]\n\t" + "ldr r6, [%[a], #408]\n\t" + "ldr r7, [%[a], #412]\n\t" + "ldr r8, [%[b], #400]\n\t" + "ldr r9, [%[b], #404]\n\t" + "ldr r10, [%[b], #408]\n\t" + "ldr r14, [%[b], #412]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #400]\n\t" + "str r5, [%[r], #404]\n\t" + "str r6, [%[r], #408]\n\t" + "str r7, [%[r], #412]\n\t" + "ldr r4, [%[a], #416]\n\t" + "ldr r5, [%[a], #420]\n\t" + "ldr r6, [%[a], #424]\n\t" + "ldr r7, [%[a], #428]\n\t" + "ldr r8, [%[b], #416]\n\t" + "ldr r9, [%[b], #420]\n\t" + "ldr r10, [%[b], #424]\n\t" + "ldr r14, [%[b], #428]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #416]\n\t" + "str r5, [%[r], #420]\n\t" + "str r6, [%[r], #424]\n\t" + "str r7, [%[r], #428]\n\t" + "ldr r4, [%[a], #432]\n\t" + "ldr r5, [%[a], #436]\n\t" + "ldr r6, [%[a], #440]\n\t" + "ldr r7, [%[a], #444]\n\t" + "ldr r8, [%[b], #432]\n\t" + "ldr r9, [%[b], #436]\n\t" + "ldr r10, [%[b], #440]\n\t" + "ldr r14, [%[b], #444]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #432]\n\t" + "str r5, [%[r], #436]\n\t" + "str r6, [%[r], #440]\n\t" + "str r7, [%[r], #444]\n\t" + "ldr r4, [%[a], #448]\n\t" + "ldr r5, [%[a], #452]\n\t" + "ldr r6, [%[a], #456]\n\t" + "ldr r7, [%[a], #460]\n\t" + "ldr r8, [%[b], #448]\n\t" + "ldr r9, [%[b], #452]\n\t" + "ldr r10, [%[b], #456]\n\t" + "ldr r14, [%[b], #460]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #448]\n\t" + "str r5, [%[r], #452]\n\t" + "str r6, [%[r], #456]\n\t" + "str r7, [%[r], #460]\n\t" + "ldr r4, [%[a], #464]\n\t" + "ldr r5, [%[a], #468]\n\t" + "ldr r6, [%[a], #472]\n\t" + "ldr r7, [%[a], #476]\n\t" + "ldr r8, [%[b], #464]\n\t" + "ldr r9, [%[b], #468]\n\t" + "ldr r10, [%[b], #472]\n\t" + "ldr r14, [%[b], #476]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #464]\n\t" + "str r5, [%[r], #468]\n\t" + "str r6, [%[r], #472]\n\t" + "str r7, [%[r], #476]\n\t" + "ldr r4, [%[a], #480]\n\t" + "ldr r5, [%[a], #484]\n\t" + "ldr r6, [%[a], #488]\n\t" + "ldr r7, [%[a], #492]\n\t" + "ldr r8, [%[b], #480]\n\t" + "ldr r9, [%[b], #484]\n\t" + "ldr r10, [%[b], #488]\n\t" + "ldr r14, [%[b], #492]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #480]\n\t" + "str r5, [%[r], #484]\n\t" + "str r6, [%[r], #488]\n\t" + "str r7, [%[r], #492]\n\t" + "ldr r4, [%[a], #496]\n\t" + "ldr r5, [%[a], #500]\n\t" + "ldr r6, [%[a], #504]\n\t" + "ldr r7, [%[a], #508]\n\t" + "ldr r8, [%[b], #496]\n\t" + "ldr r9, [%[b], #500]\n\t" + "ldr r10, [%[b], #504]\n\t" + "ldr r14, [%[b], #508]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #496]\n\t" + "str r5, [%[r], #500]\n\t" + "str r6, [%[r], #504]\n\t" + "str r7, [%[r], #508]\n\t" + "adc %[c], r12, r12\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); + + return c; +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_4096_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + __asm__ __volatile__ ( + "sub sp, sp, #256\n\t" + "mov r10, #0\n\t" + "# A[0] * B[0]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r3, r4, r8, r9\n\t" + "mov r5, #0\n\t" + "str r3, [sp]\n\t" + "# A[0] * B[1]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[0]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #4]\n\t" + "# A[0] * B[2]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[1]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[0]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #8]\n\t" + "# A[0] * B[3]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[2]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[1]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[0]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #12]\n\t" + "# A[0] * B[4]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[3]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[2]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[1]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[0]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #16]\n\t" + "# A[0] * B[5]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[4]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[3]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[2]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[1]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[0]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #20]\n\t" + "# A[0] * B[6]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[5]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[4]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[3]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[2]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[1]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[0]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #24]\n\t" + "# A[0] * B[7]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[6]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[5]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[4]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[3]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[2]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[1]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[0]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #28]\n\t" + "# A[0] * B[8]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[7]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[6]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[5]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[4]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[3]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[2]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[1]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[0]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #32]\n\t" + "# A[0] * B[9]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[8]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[7]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[6]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[5]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[4]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[3]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[2]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[1]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[0]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #36]\n\t" + "# A[0] * B[10]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[9]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[8]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[7]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[6]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[5]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[4]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[3]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[2]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[1]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[0]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #40]\n\t" + "# A[0] * B[11]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[10]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[9]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[8]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[7]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[6]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[5]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[4]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[3]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[2]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[1]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[0]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #44]\n\t" + "# A[0] * B[12]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[11]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[10]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[9]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[8]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[7]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[6]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[5]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[4]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[3]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[2]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[1]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[0]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #48]\n\t" + "# A[0] * B[13]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[12]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[11]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[10]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[9]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[8]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[7]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[6]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[5]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[4]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[3]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[2]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[1]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[0]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #52]\n\t" + "# A[0] * B[14]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[13]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[12]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[11]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[10]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[9]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[8]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[7]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[6]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[5]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[4]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[3]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[2]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[1]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[0]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #56]\n\t" + "# A[0] * B[15]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[14]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[13]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[12]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[11]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[10]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[9]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[8]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[7]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[6]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[5]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[4]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[3]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[2]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[1]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[0]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #60]\n\t" + "# A[0] * B[16]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[15]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[14]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[13]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[12]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[11]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[10]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[9]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[8]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[7]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[6]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[5]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[4]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[3]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[2]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[1]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[0]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #64]\n\t" + "# A[0] * B[17]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[16]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[15]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[14]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[13]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[12]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[11]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[10]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[9]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[8]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[7]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[6]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[5]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[4]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[3]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[2]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[1]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[0]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #68]\n\t" + "# A[0] * B[18]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[17]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[16]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[15]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[14]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[13]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[12]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[11]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[10]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[9]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[8]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[7]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[6]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[5]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[4]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[3]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[2]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[1]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[0]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #72]\n\t" + "# A[0] * B[19]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[18]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[17]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[16]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[15]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[14]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[13]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[12]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[11]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[10]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[9]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[8]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[7]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[6]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[5]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[4]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[3]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[2]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[1]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[0]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #76]\n\t" + "# A[0] * B[20]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[19]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[18]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[17]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[16]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[15]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[14]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[13]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[12]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[11]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[10]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[9]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[8]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[7]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[6]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[5]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[4]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[3]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[2]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[1]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[0]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #80]\n\t" + "# A[0] * B[21]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[20]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[19]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[18]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[17]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[16]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[15]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[14]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[13]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[12]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[11]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[10]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[9]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[8]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[7]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[6]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[5]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[4]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[3]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[2]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[1]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[0]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #84]\n\t" + "# A[0] * B[22]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[21]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[20]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[19]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[18]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[17]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[16]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[15]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[14]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[13]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[12]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[11]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[10]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[9]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[8]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[7]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[6]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[5]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[4]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[3]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[2]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[1]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[0]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #88]\n\t" + "# A[0] * B[23]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[22]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[21]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[20]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[19]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[18]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[17]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[16]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[15]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[14]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[13]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[12]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[11]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[10]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[9]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[8]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[7]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[6]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[5]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[4]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[3]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[2]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[1]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[0]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #92]\n\t" + "# A[0] * B[24]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[23]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[22]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[21]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[20]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[19]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[18]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[17]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[16]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[15]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[14]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[13]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[12]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[11]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[10]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[9]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[8]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[7]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[6]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[5]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[4]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[3]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[2]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[1]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[0]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #96]\n\t" + "# A[0] * B[25]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[24]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[23]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[22]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[21]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[20]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[19]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[18]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[17]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[16]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[15]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[14]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[13]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[12]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[11]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[10]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[9]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[8]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[7]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[6]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[5]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[4]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[3]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[2]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[1]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[0]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #100]\n\t" + "# A[0] * B[26]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[25]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[24]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[23]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[22]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[21]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[20]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[19]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[18]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[17]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[16]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[15]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[14]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[13]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[12]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[11]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[10]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[9]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[8]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[7]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[6]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[5]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[4]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[3]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[2]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[1]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[0]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #104]\n\t" + "# A[0] * B[27]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[26]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[25]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[24]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[23]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[22]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[21]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[20]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[19]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[18]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[17]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[16]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[15]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[14]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[13]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[12]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[11]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[10]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[9]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[8]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[7]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[6]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[5]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[4]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[3]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[2]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[1]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[0]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #108]\n\t" + "# A[0] * B[28]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[27]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[26]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[25]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[24]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[23]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[22]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[21]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[20]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[19]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[18]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[17]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[16]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[15]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[14]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[13]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[12]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[11]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[10]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[9]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[8]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[7]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[6]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[5]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[4]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[3]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[2]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[1]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[0]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #112]\n\t" + "# A[0] * B[29]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[28]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[27]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[26]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[25]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[24]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[23]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[22]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[21]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[20]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[19]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[18]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[17]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[16]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[15]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[14]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[13]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[12]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[11]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[10]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[9]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[8]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[7]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[6]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[5]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[4]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[3]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[2]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[1]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[0]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #116]\n\t" + "# A[0] * B[30]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[29]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[28]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[27]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[26]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[25]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[24]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[23]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[22]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[21]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[20]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[19]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[18]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[17]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[16]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[15]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[14]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[13]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[12]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[11]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[10]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[9]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[8]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[7]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[6]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[5]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[4]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[3]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[2]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[1]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[0]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #120]\n\t" + "# A[0] * B[31]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[30]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[29]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[28]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[27]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[26]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[25]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[24]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[23]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[22]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[21]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[20]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[19]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[18]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[17]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[16]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[15]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[14]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[13]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[12]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[11]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[10]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[9]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[8]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[7]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[6]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[5]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[4]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[3]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[2]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[1]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[0]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #124]\n\t" + "# A[0] * B[32]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[31]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[30]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[29]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[28]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[27]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[26]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[25]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[24]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[23]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[22]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[21]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[20]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[19]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[18]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[17]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[16]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[15]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[14]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[13]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[12]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[11]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[10]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[9]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[8]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[7]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[6]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[5]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[4]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[3]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[2]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[1]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[0]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #128]\n\t" + "# A[0] * B[33]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[32]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[31]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[30]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[29]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[28]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[27]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[26]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[25]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[24]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[23]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[22]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[21]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[20]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[19]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[18]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[17]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[16]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[15]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[14]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[13]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[12]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[11]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[10]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[9]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[8]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[7]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[6]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[5]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[4]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[3]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[2]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[1]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[0]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #132]\n\t" + "# A[0] * B[34]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[33]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[32]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[31]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[30]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[29]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[28]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[27]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[26]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[25]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[24]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[23]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[22]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[21]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[20]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[19]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[18]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[17]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[16]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[15]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[14]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[13]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[12]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[11]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[10]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[9]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[8]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[7]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[6]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[5]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[4]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[3]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[2]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[1]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[0]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #136]\n\t" + "# A[0] * B[35]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[34]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[33]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[32]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[31]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[30]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[29]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[28]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[27]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[26]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[25]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[24]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[23]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[22]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[21]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[20]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[19]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[18]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[17]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[16]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[15]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[14]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[13]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[12]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[11]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[10]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[9]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[8]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[7]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[6]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[5]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[4]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[3]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[2]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[1]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[0]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #140]\n\t" + "# A[0] * B[36]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[35]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[34]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[33]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[32]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[31]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[30]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[29]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[28]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[27]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[26]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[25]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[24]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[23]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[22]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[21]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[20]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[19]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[18]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[17]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[16]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[15]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[14]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[13]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[12]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[11]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[10]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[9]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[8]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[7]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[6]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[5]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[4]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[3]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[2]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[1]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[0]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #144]\n\t" + "# A[0] * B[37]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[36]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[35]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[34]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[33]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[32]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[31]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[30]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[29]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[28]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[27]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[26]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[25]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[24]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[23]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[22]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[21]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[20]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[19]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[18]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[17]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[16]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[15]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[14]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[13]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[12]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[11]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[10]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[9]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[8]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[7]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[6]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[5]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[4]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[3]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[2]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[1]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[0]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #148]\n\t" + "# A[0] * B[38]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[37]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[36]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[35]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[34]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[33]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[32]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[31]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[30]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[29]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[28]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[27]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[26]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[25]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[24]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[23]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[22]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[21]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[20]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[19]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[18]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[17]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[16]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[15]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[14]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[13]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[12]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[11]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[10]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[9]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[8]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[7]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[6]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[5]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[4]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[3]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[2]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[1]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[0]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #152]\n\t" + "# A[0] * B[39]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[38]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[37]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[36]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[35]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[34]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[33]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[32]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[31]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[30]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[29]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[28]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[27]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[26]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[25]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[24]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[23]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[22]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[21]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[20]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[19]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[18]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[17]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[16]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[15]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[14]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[13]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[12]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[11]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[10]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[9]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[8]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[7]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[6]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[5]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[4]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[3]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[2]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[1]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[0]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #156]\n\t" + "# A[0] * B[40]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[39]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[38]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[37]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[36]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[35]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[34]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[33]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[32]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[31]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[30]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[29]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[28]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[27]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[26]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[25]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[24]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[23]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[22]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[21]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[20]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[19]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[18]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[17]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[16]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[15]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[14]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[13]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[12]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[11]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[10]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[9]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[8]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[7]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[6]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[5]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[4]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[3]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[2]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[1]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[0]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #160]\n\t" + "# A[0] * B[41]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[40]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[39]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[38]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[37]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[36]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[35]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[34]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[33]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[32]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[31]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[30]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[29]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[28]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[27]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[26]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[25]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[24]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[23]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[22]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[21]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[20]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[19]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[18]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[17]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[16]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[15]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[14]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[13]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[12]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[11]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[10]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[9]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[8]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[7]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[6]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[5]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[4]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[3]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[2]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[1]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[0]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #164]\n\t" + "# A[0] * B[42]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[41]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[40]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[39]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[38]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[37]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[36]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[35]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[34]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[33]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[32]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[31]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[30]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[29]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[28]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[27]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[26]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[25]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[24]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[23]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[22]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[21]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[20]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[19]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[18]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[17]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[16]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[15]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[14]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[13]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[12]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[11]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[10]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[9]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[8]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[7]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[6]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[5]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[4]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[3]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[2]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[1]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[0]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #168]\n\t" + "# A[0] * B[43]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[42]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[41]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[40]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[39]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[38]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[37]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[36]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[35]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[34]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[33]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[32]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[31]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[30]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[29]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[28]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[27]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[26]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[25]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[24]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[23]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[22]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[21]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[20]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[19]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[18]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[17]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[16]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[15]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[14]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[13]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[12]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[11]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[10]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[9]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[8]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[7]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[6]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[5]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[4]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[3]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[2]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[1]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[0]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #172]\n\t" + "# A[0] * B[44]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[43]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[42]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[41]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[40]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[39]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[38]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[37]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[36]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[35]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[34]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[33]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[32]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[31]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[30]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[29]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[28]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[27]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[26]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[25]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[24]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[23]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[22]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[21]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[20]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[19]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[18]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[17]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[16]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[15]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[14]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[13]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[12]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[11]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[10]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[9]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[8]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[7]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[6]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[5]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[4]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[3]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[2]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[1]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[0]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #176]\n\t" + "# A[0] * B[45]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[44]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[43]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[42]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[41]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[40]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[39]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[38]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[37]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[36]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[35]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[34]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[33]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[32]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[31]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[30]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[29]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[28]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[27]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[26]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[25]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[24]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[23]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[22]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[21]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[20]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[19]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[18]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[17]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[16]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[15]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[14]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[13]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[12]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[11]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[10]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[9]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[8]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[7]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[6]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[5]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[4]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[3]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[2]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[1]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[0]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #180]\n\t" + "# A[0] * B[46]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[45]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[44]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[43]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[42]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[41]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[40]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[39]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[38]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[37]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[36]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[35]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[34]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[33]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[32]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[31]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[30]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[29]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[28]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[27]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[26]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[25]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[24]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[23]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[22]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[21]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[20]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[19]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[18]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[17]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[16]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[15]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[14]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[13]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[12]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[11]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[10]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[9]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[8]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[7]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[6]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[5]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[4]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[3]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[2]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[1]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[0]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #184]\n\t" + "# A[0] * B[47]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[46]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[45]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[44]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[43]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[42]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[41]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[40]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[39]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[38]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[37]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[36]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[35]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[34]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[33]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[32]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[31]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[30]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[29]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[28]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[27]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[26]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[25]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[24]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[23]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[22]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[21]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[20]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[19]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[18]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[17]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[16]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[15]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[14]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[13]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[12]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[11]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[10]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[9]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[8]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[7]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[6]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[5]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[4]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[3]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[2]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[1]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[0]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #188]\n\t" + "# A[0] * B[48]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[47]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[46]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[45]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[44]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[43]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[42]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[41]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[40]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[39]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[38]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[37]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[36]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[35]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[34]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[33]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[32]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[31]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[30]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[29]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[28]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[27]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[26]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[25]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[24]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[23]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[22]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[21]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[20]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[19]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[18]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[17]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[16]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[15]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[14]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[13]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[12]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[11]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[10]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[9]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[8]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[7]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[6]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[5]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[4]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[3]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[2]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[1]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[0]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #192]\n\t" + "# A[0] * B[49]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[48]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[47]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[46]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[45]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[44]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[43]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[42]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[41]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[40]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[39]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[38]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[37]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[36]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[35]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[34]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[33]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[32]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[31]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[30]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[29]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[28]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[27]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[26]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[25]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[24]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[23]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[22]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[21]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[20]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[19]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[18]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[17]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[16]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[15]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[14]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[13]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[12]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[11]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[10]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[9]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[8]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[7]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[6]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[5]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[4]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[3]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[2]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[1]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[0]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #196]\n\t" + "# A[0] * B[50]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[49]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[48]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[47]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[46]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[45]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[44]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[43]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[42]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[41]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[40]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[39]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[38]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[37]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[36]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[35]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[34]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[33]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[32]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[31]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[30]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[29]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[28]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[27]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[26]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[25]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[24]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[23]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[22]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[21]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[20]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[19]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[18]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[17]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[16]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[15]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[14]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[13]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[12]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[11]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[10]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[9]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[8]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[7]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[6]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[5]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[4]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[3]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[2]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[1]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[0]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #200]\n\t" + "# A[0] * B[51]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[50]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[49]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[48]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[47]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[46]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[45]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[44]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[43]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[42]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[41]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[40]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[39]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[38]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[37]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[36]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[35]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[34]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[33]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[32]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[31]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[30]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[29]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[28]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[27]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[26]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[25]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[24]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[23]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[22]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[21]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[20]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[19]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[18]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[17]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[16]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[15]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[14]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[13]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[12]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[11]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[10]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[9]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[8]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[7]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[6]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[5]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[4]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[3]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[2]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[1]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[0]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #204]\n\t" + "# A[0] * B[52]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[51]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[50]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[49]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[48]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[47]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[46]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[45]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[44]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[43]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[42]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[41]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[40]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[39]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[38]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[37]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[36]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[35]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[34]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[33]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[32]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[31]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[30]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[29]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[28]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[27]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[26]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[25]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[24]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[23]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[22]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[21]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[20]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[19]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[18]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[17]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[16]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[15]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[14]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[13]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[12]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[11]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[10]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[9]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[8]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[7]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[6]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[5]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[4]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[3]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[2]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[1]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[0]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #208]\n\t" + "# A[0] * B[53]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[52]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[51]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[50]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[49]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[48]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[47]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[46]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[45]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[44]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[43]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[42]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[41]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[40]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[39]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[38]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[37]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[36]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[35]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[34]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[33]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[32]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[31]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[30]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[29]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[28]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[27]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[26]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[25]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[24]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[23]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[22]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[21]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[20]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[19]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[18]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[17]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[16]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[15]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[14]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[13]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[12]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[11]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[10]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[9]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[8]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[7]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[6]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[5]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[4]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[3]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[2]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[1]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[0]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #212]\n\t" + "# A[0] * B[54]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[53]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[52]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[51]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[50]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[49]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[48]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[47]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[46]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[45]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[44]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[43]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[42]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[41]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[40]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[39]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[38]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[37]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[36]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[35]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[34]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[33]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[32]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[31]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[30]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[29]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[28]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[27]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[26]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[25]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[24]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[23]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[22]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[21]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[20]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[19]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[18]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[17]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[16]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[15]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[14]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[13]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[12]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[11]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[10]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[9]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[8]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[7]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[6]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[5]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[4]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[3]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[2]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[1]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[0]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #216]\n\t" + "# A[0] * B[55]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[54]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[53]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[52]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[51]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[50]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[49]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[48]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[47]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[46]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[45]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[44]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[43]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[42]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[41]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[40]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[39]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[38]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[37]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[36]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[35]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[34]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[33]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[32]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[31]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[30]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[29]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[28]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[27]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[26]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[25]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[24]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[23]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[22]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[21]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[20]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[19]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[18]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[17]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[16]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[15]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[14]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[13]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[12]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[11]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[10]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[9]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[8]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[7]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[6]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[5]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[4]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[3]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[2]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[1]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[0]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #220]\n\t" + "# A[0] * B[56]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[55]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[54]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[53]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[52]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[51]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[50]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[49]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[48]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[47]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[46]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[45]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[44]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[43]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[42]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[41]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[40]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[39]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[38]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[37]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[36]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[35]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[34]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[33]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[32]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[31]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[30]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[29]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[28]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[27]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[26]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[25]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[24]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[23]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[22]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[21]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[20]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[19]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[18]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[17]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[16]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[15]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[14]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[13]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[12]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[11]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[10]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[9]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[8]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[7]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[6]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[5]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[4]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[3]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[2]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[1]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[0]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #224]\n\t" + "# A[0] * B[57]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[56]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[55]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[54]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[53]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[52]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[51]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[50]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[49]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[48]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[47]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[46]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[45]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[44]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[43]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[42]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[41]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[40]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[39]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[38]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[37]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[36]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[35]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[34]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[33]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[32]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[31]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[30]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[29]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[28]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[27]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[26]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[25]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[24]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[23]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[22]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[21]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[20]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[19]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[18]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[17]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[16]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[15]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[14]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[13]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[12]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[11]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[10]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[9]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[8]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[7]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[6]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[5]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[4]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[3]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[2]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[1]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[0]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #228]\n\t" + "# A[0] * B[58]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[57]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[56]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[55]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[54]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[53]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[52]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[51]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[50]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[49]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[48]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[47]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[46]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[45]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[44]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[43]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[42]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[41]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[40]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[39]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[38]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[37]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[36]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[35]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[34]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[33]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[32]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[31]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[30]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[29]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[28]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[27]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[26]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[25]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[24]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[23]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[22]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[21]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[20]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[19]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[18]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[17]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[16]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[15]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[14]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[13]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[12]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[11]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[10]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[9]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[8]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[7]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[6]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[5]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[4]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[3]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[2]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[1]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[0]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #232]\n\t" + "# A[0] * B[59]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[58]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[57]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[56]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[55]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[54]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[53]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[52]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[51]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[50]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[49]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[48]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[47]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[46]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[45]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[44]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[43]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[42]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[41]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[40]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[39]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[38]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[37]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[36]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[35]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[34]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[33]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[32]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[31]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[30]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[29]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[28]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[27]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[26]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[25]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[24]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[23]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[22]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[21]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[20]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[19]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[18]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[17]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[16]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[15]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[14]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[13]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[12]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[11]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[10]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[9]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[8]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[7]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[6]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[5]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[4]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[3]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[2]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[1]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[0]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #236]\n\t" + "# A[0] * B[60]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[59]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[58]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[57]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[56]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[55]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[54]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[53]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[52]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[51]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[50]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[49]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[48]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[47]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[46]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[45]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[44]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[43]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[42]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[41]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[40]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[39]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[38]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[37]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[36]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[35]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[34]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[33]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[32]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[31]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[30]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[29]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[28]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[27]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[26]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[25]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[24]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[23]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[22]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[21]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[20]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[19]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[18]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[17]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[16]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[15]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[14]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[13]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[12]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[11]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[10]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[9]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[8]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[7]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[6]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[5]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[4]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[3]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[2]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[1]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[0]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #240]\n\t" + "# A[0] * B[61]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[60]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[59]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[58]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[57]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[56]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[55]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[54]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[53]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[52]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[51]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[50]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[49]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[48]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[47]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[46]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[45]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[44]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[43]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[42]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[41]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[40]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[39]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[38]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[37]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[36]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[35]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[34]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[33]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[32]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[31]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[30]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[29]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[28]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[27]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[26]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[25]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[24]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[23]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[22]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[21]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[20]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[19]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[18]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[17]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[16]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[15]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[14]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[13]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[12]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[11]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[10]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[9]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[8]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[7]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[6]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[5]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[4]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[3]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[2]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[1]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[0]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #244]\n\t" + "# A[0] * B[62]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[61]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[60]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[59]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[58]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[57]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[56]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[55]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[54]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[53]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[52]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[51]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[50]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[49]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[48]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[47]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[46]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[45]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[44]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[43]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[42]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[41]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[40]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[39]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[38]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[37]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[36]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[35]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[34]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[33]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[32]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[31]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[30]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[29]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[28]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[27]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[26]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[25]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[24]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[23]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[22]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[21]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[20]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[19]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[18]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[17]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[16]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[15]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[14]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[13]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[12]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[11]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[10]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[9]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[8]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[7]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[6]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[5]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[4]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[3]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[2]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[1]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[0]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #248]\n\t" + "# A[0] * B[63]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[62]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[61]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[60]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[59]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[58]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[57]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[56]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[55]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[54]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[53]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[52]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[51]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[50]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[49]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[48]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[47]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[46]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[45]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[44]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[43]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[42]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[41]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[40]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[39]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[38]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[37]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[36]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[35]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[34]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[33]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[32]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[31]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[30]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[29]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[28]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[27]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[26]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[25]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[24]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[23]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[22]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[21]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[20]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[19]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[18]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[17]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[16]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[15]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[14]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[13]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[12]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[11]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[10]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[9]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[8]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[7]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[6]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[5]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[4]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[3]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[2]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[1]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[0]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #252]\n\t" + "# A[1] * B[63]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[2] * B[62]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[61]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[60]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[59]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[58]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[57]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[56]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[55]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[54]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[53]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[52]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[51]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[50]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[49]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[48]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[47]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[46]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[45]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[44]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[43]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[42]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[41]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[40]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[39]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[38]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[37]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[36]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[35]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[34]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[33]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[32]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[31]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[30]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[29]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[28]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[27]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[26]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[25]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[24]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[23]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[22]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[21]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[20]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[19]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[18]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[17]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[16]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[15]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[14]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[13]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[12]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[11]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[10]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[9]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[8]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[7]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[6]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[5]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[4]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[3]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[2]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[1]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #256]\n\t" + "# A[2] * B[63]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[3] * B[62]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[61]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[60]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[59]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[58]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[57]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[56]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[55]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[54]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[53]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[52]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[51]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[50]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[49]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[48]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[47]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[46]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[45]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[44]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[43]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[42]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[41]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[40]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[39]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[38]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[37]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[36]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[35]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[34]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[33]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[32]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[31]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[30]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[29]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[28]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[27]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[26]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[25]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[24]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[23]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[22]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[21]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[20]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[19]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[18]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[17]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[16]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[15]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[14]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[13]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[12]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[11]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[10]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[9]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[8]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[7]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[6]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[5]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[4]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[3]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[2]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #260]\n\t" + "# A[3] * B[63]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[4] * B[62]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[61]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[60]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[59]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[58]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[57]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[56]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[55]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[54]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[53]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[52]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[51]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[50]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[49]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[48]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[47]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[46]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[45]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[44]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[43]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[42]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[41]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[40]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[39]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[38]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[37]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[36]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[35]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[34]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[33]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[32]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[31]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[30]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[29]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[28]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[27]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[26]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[25]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[24]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[23]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[22]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[21]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[20]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[19]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[18]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[17]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[16]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[15]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[14]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[13]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[12]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[11]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[10]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[9]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[8]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[7]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[6]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[5]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[4]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[3]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #264]\n\t" + "# A[4] * B[63]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[5] * B[62]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[61]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[60]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[59]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[58]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[57]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[56]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[55]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[54]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[53]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[52]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[51]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[50]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[49]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[48]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[47]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[46]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[45]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[44]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[43]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[42]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[41]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[40]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[39]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[38]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[37]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[36]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[35]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[34]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[33]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[32]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[31]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[30]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[29]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[28]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[27]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[26]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[25]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[24]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[23]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[22]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[21]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[20]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[19]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[18]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[17]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[16]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[15]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[14]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[13]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[12]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[11]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[10]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[9]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[8]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[7]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[6]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[5]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[4]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #268]\n\t" + "# A[5] * B[63]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[6] * B[62]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[61]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[60]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[59]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[58]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[57]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[56]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[55]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[54]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[53]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[52]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[51]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[50]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[49]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[48]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[47]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[46]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[45]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[44]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[43]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[42]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[41]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[40]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[39]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[38]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[37]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[36]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[35]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[34]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[33]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[32]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[31]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[30]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[29]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[28]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[27]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[26]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[25]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[24]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[23]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[22]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[21]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[20]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[19]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[18]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[17]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[16]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[15]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[14]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[13]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[12]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[11]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[10]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[9]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[8]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[7]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[6]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[5]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #272]\n\t" + "# A[6] * B[63]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[7] * B[62]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[61]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[60]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[59]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[58]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[57]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[56]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[55]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[54]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[53]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[52]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[51]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[50]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[49]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[48]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[47]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[46]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[45]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[44]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[43]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[42]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[41]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[40]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[39]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[38]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[37]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[36]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[35]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[34]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[33]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[32]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[31]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[30]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[29]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[28]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[27]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[26]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[25]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[24]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[23]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[22]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[21]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[20]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[19]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[18]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[17]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[16]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[15]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[14]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[13]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[12]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[11]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[10]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[9]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[8]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[7]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[6]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #276]\n\t" + "# A[7] * B[63]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[8] * B[62]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[61]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[60]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[59]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[58]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[57]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[56]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[55]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[54]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[53]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[52]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[51]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[50]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[49]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[48]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[47]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[46]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[45]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[44]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[43]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[42]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[41]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[40]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[39]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[38]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[37]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[36]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[35]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[34]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[33]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[32]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[31]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[30]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[29]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[28]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[27]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[26]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[25]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[24]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[23]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[22]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[21]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[20]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[19]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[18]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[17]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[16]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[15]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[14]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[13]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[12]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[11]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[10]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[9]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[8]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[7]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #280]\n\t" + "# A[8] * B[63]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[9] * B[62]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[61]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[60]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[59]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[58]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[57]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[56]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[55]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[54]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[53]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[52]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[51]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[50]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[49]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[48]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[47]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[46]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[45]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[44]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[43]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[42]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[41]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[40]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[39]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[38]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[37]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[36]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[35]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[34]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[33]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[32]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[31]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[30]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[29]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[28]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[27]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[26]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[25]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[24]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[23]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[22]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[21]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[20]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[19]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[18]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[17]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[16]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[15]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[14]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[13]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[12]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[11]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[10]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[9]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[8]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #284]\n\t" + "# A[9] * B[63]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[10] * B[62]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[61]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[60]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[59]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[58]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[57]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[56]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[55]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[54]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[53]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[52]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[51]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[50]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[49]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[48]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[47]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[46]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[45]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[44]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[43]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[42]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[41]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[40]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[39]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[38]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[37]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[36]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[35]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[34]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[33]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[32]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[31]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[30]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[29]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[28]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[27]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[26]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[25]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[24]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[23]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[22]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[21]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[20]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[19]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[18]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[17]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[16]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[15]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[14]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[13]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[12]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[11]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[10]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[9]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #288]\n\t" + "# A[10] * B[63]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[11] * B[62]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[61]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[60]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[59]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[58]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[57]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[56]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[55]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[54]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[53]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[52]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[51]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[50]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[49]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[48]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[47]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[46]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[45]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[44]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[43]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[42]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[41]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[40]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[39]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[38]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[37]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[36]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[35]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[34]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[33]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[32]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[31]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[30]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[29]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[28]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[27]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[26]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[25]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[24]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[23]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[22]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[21]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[20]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[19]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[18]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[17]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[16]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[15]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[14]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[13]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[12]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[11]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[10]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #292]\n\t" + "# A[11] * B[63]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[12] * B[62]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[61]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[60]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[59]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[58]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[57]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[56]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[55]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[54]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[53]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[52]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[51]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[50]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[49]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[48]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[47]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[46]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[45]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[44]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[43]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[42]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[41]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[40]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[39]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[38]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[37]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[36]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[35]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[34]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[33]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[32]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[31]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[30]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[29]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[28]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[27]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[26]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[25]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[24]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[23]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[22]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[21]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[20]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[19]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[18]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[17]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[16]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[15]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[14]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[13]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[12]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[11]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #296]\n\t" + "# A[12] * B[63]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[13] * B[62]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[61]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[60]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[59]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[58]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[57]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[56]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[55]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[54]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[53]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[52]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[51]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[50]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[49]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[48]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[47]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[46]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[45]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[44]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[43]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[42]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[41]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[40]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[39]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[38]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[37]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[36]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[35]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[34]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[33]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[32]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[31]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[30]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[29]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[28]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[27]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[26]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[25]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[24]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[23]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[22]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[21]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[20]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[19]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[18]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[17]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[16]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[15]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[14]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[13]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[12]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #300]\n\t" + "# A[13] * B[63]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[14] * B[62]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[61]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[60]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[59]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[58]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[57]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[56]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[55]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[54]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[53]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[52]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[51]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[50]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[49]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[48]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[47]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[46]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[45]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[44]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[43]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[42]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[41]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[40]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[39]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[38]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[37]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[36]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[35]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[34]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[33]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[32]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[31]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[30]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[29]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[28]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[27]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[26]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[25]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[24]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[23]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[22]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[21]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[20]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[19]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[18]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[17]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[16]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[15]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[14]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[13]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #304]\n\t" + "# A[14] * B[63]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[15] * B[62]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[61]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[60]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[59]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[58]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[57]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[56]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[55]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[54]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[53]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[52]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[51]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[50]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[49]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[48]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[47]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[46]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[45]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[44]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[43]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[42]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[41]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[40]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[39]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[38]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[37]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[36]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[35]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[34]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[33]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[32]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[31]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[30]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[29]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[28]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[27]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[26]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[25]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[24]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[23]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[22]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[21]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[20]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[19]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[18]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[17]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[16]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[15]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[14]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #308]\n\t" + "# A[15] * B[63]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[16] * B[62]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[61]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[60]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[59]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[58]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[57]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[56]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[55]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[54]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[53]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[52]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[51]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[50]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[49]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[48]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[47]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[46]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[45]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[44]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[43]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[42]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[41]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[40]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[39]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[38]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[37]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[36]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[35]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[34]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[33]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[32]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[31]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[30]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[29]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[28]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[27]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[26]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[25]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[24]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[23]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[22]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[21]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[20]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[19]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[18]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[17]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[16]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[15]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #312]\n\t" + "# A[16] * B[63]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[17] * B[62]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[61]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[60]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[59]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[58]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[57]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[56]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[55]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[54]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[53]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[52]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[51]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[50]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[49]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[48]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[47]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[46]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[45]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[44]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[43]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[42]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[41]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[40]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[39]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[38]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[37]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[36]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[35]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[34]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[33]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[32]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[31]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[30]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[29]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[28]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[27]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[26]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[25]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[24]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[23]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[22]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[21]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[20]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[19]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[18]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[17]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[16]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #316]\n\t" + "# A[17] * B[63]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[18] * B[62]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[61]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[60]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[59]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[58]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[57]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[56]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[55]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[54]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[53]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[52]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[51]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[50]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[49]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[48]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[47]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[46]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[45]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[44]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[43]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[42]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[41]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[40]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[39]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[38]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[37]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[36]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[35]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[34]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[33]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[32]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[31]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[30]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[29]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[28]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[27]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[26]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[25]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[24]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[23]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[22]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[21]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[20]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[19]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[18]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[17]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #320]\n\t" + "# A[18] * B[63]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[19] * B[62]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[61]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[60]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[59]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[58]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[57]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[56]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[55]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[54]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[53]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[52]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[51]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[50]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[49]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[48]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[47]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[46]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[45]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[44]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[43]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[42]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[41]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[40]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[39]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[38]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[37]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[36]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[35]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[34]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[33]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[32]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[31]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[30]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[29]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[28]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[27]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[26]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[25]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[24]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[23]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[22]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[21]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[20]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[19]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[18]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #324]\n\t" + "# A[19] * B[63]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[20] * B[62]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[61]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[60]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[59]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[58]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[57]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[56]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[55]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[54]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[53]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[52]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[51]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[50]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[49]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[48]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[47]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[46]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[45]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[44]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[43]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[42]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[41]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[40]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[39]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[38]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[37]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[36]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[35]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[34]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[33]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[32]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[31]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[30]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[29]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[28]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[27]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[26]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[25]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[24]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[23]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[22]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[21]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[20]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[19]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #328]\n\t" + "# A[20] * B[63]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[21] * B[62]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[61]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[60]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[59]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[58]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[57]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[56]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[55]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[54]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[53]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[52]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[51]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[50]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[49]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[48]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[47]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[46]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[45]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[44]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[43]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[42]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[41]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[40]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[39]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[38]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[37]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[36]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[35]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[34]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[33]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[32]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[31]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[30]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[29]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[28]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[27]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[26]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[25]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[24]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[23]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[22]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[21]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[20]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #332]\n\t" + "# A[21] * B[63]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[22] * B[62]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[61]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[60]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[59]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[58]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[57]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[56]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[55]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[54]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[53]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[52]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[51]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[50]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[49]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[48]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[47]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[46]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[45]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[44]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[43]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[42]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[41]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[40]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[39]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[38]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[37]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[36]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[35]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[34]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[33]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[32]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[31]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[30]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[29]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[28]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[27]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[26]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[25]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[24]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[23]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[22]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[21]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #336]\n\t" + "# A[22] * B[63]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[23] * B[62]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[61]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[60]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[59]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[58]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[57]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[56]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[55]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[54]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[53]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[52]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[51]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[50]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[49]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[48]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[47]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[46]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[45]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[44]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[43]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[42]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[41]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[40]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[39]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[38]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[37]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[36]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[35]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[34]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[33]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[32]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[31]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[30]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[29]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[28]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[27]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[26]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[25]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[24]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[23]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[22]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #340]\n\t" + "# A[23] * B[63]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[24] * B[62]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[61]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[60]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[59]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[58]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[57]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[56]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[55]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[54]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[53]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[52]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[51]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[50]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[49]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[48]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[47]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[46]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[45]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[44]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[43]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[42]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[41]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[40]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[39]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[38]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[37]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[36]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[35]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[34]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[33]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[32]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[31]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[30]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[29]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[28]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[27]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[26]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[25]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[24]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[23]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #344]\n\t" + "# A[24] * B[63]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[25] * B[62]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[61]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[60]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[59]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[58]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[57]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[56]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[55]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[54]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[53]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[52]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[51]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[50]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[49]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[48]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[47]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[46]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[45]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[44]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[43]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[42]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[41]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[40]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[39]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[38]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[37]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[36]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[35]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[34]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[33]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[32]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[31]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[30]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[29]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[28]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[27]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[26]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[25]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[24]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #348]\n\t" + "# A[25] * B[63]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[26] * B[62]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[61]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[60]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[59]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[58]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[57]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[56]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[55]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[54]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[53]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[52]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[51]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[50]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[49]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[48]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[47]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[46]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[45]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[44]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[43]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[42]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[41]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[40]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[39]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[38]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[37]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[36]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[35]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[34]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[33]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[32]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[31]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[30]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[29]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[28]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[27]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[26]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[25]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #352]\n\t" + "# A[26] * B[63]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[27] * B[62]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[61]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[60]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[59]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[58]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[57]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[56]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[55]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[54]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[53]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[52]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[51]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[50]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[49]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[48]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[47]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[46]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[45]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[44]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[43]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[42]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[41]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[40]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[39]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[38]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[37]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[36]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[35]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[34]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[33]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[32]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[31]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[30]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[29]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[28]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[27]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[26]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #356]\n\t" + "# A[27] * B[63]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[28] * B[62]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[61]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[60]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[59]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[58]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[57]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[56]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[55]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[54]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[53]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[52]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[51]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[50]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[49]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[48]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[47]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[46]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[45]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[44]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[43]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[42]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[41]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[40]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[39]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[38]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[37]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[36]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[35]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[34]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[33]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[32]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[31]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[30]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[29]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[28]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[27]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #360]\n\t" + "# A[28] * B[63]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[29] * B[62]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[61]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[60]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[59]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[58]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[57]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[56]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[55]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[54]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[53]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[52]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[51]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[50]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[49]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[48]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[47]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[46]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[45]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[44]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[43]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[42]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[41]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[40]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[39]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[38]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[37]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[36]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[35]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[34]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[33]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[32]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[31]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[30]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[29]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[28]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #364]\n\t" + "# A[29] * B[63]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[30] * B[62]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[61]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[60]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[59]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[58]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[57]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[56]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[55]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[54]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[53]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[52]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[51]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[50]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[49]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[48]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[47]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[46]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[45]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[44]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[43]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[42]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[41]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[40]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[39]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[38]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[37]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[36]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[35]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[34]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[33]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[32]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[31]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[30]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[29]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #368]\n\t" + "# A[30] * B[63]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[31] * B[62]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[61]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[60]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[59]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[58]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[57]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[56]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[55]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[54]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[53]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[52]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[51]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[50]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[49]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[48]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[47]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[46]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[45]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[44]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[43]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[42]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[41]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[40]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[39]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[38]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[37]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[36]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[35]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[34]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[33]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[32]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[31]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[30]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #372]\n\t" + "# A[31] * B[63]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[32] * B[62]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[61]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[60]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[59]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[58]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[57]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[56]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[55]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[54]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[53]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[52]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[51]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[50]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[49]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[48]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[47]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[46]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[45]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[44]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[43]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[42]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[41]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[40]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[39]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[38]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[37]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[36]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[35]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[34]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[33]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[32]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[31]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #376]\n\t" + "# A[32] * B[63]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[33] * B[62]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[61]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[60]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[59]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[58]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[57]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[56]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[55]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[54]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[53]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[52]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[51]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[50]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[49]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[48]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[47]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[46]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[45]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[44]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[43]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[42]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[41]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[40]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[39]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[38]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[37]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[36]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[35]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[34]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[33]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[32]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #380]\n\t" + "# A[33] * B[63]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[34] * B[62]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[61]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[60]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[59]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[58]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[57]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[56]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[55]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[54]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[53]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[52]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[51]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[50]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[49]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[48]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[47]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[46]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[45]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[44]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[43]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[42]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[41]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[40]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[39]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[38]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[37]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[36]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[35]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[34]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[33]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #384]\n\t" + "# A[34] * B[63]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[35] * B[62]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[61]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[60]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[59]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[58]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[57]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[56]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[55]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[54]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[53]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[52]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[51]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[50]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[49]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[48]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[47]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[46]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[45]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[44]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[43]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[42]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[41]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[40]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[39]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[38]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[37]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[36]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[35]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[34]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #388]\n\t" + "# A[35] * B[63]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[36] * B[62]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[61]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[60]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[59]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[58]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[57]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[56]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[55]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[54]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[53]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[52]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[51]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[50]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[49]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[48]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[47]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[46]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[45]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[44]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[43]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[42]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[41]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[40]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[39]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[38]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[37]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[36]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[35]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #392]\n\t" + "# A[36] * B[63]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[37] * B[62]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[61]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[60]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[59]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[58]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[57]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[56]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[55]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[54]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[53]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[52]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[51]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[50]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[49]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[48]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[47]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[46]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[45]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[44]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[43]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[42]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[41]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[40]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[39]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[38]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[37]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[36]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #396]\n\t" + "# A[37] * B[63]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[38] * B[62]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[61]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[60]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[59]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[58]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[57]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[56]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[55]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[54]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[53]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[52]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[51]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[50]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[49]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[48]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[47]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[46]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[45]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[44]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[43]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[42]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[41]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[40]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[39]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[38]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[37]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #400]\n\t" + "# A[38] * B[63]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[39] * B[62]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[61]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[60]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[59]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[58]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[57]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[56]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[55]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[54]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[53]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[52]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[51]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[50]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[49]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[48]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[47]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[46]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[45]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[44]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[43]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[42]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[41]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[40]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[39]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[38]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #404]\n\t" + "# A[39] * B[63]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[40] * B[62]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[61]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[60]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[59]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[58]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[57]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[56]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[55]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[54]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[53]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[52]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[51]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[50]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[49]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[48]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[47]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[46]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[45]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[44]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[43]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[42]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[41]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[40]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[39]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #408]\n\t" + "# A[40] * B[63]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[41] * B[62]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[61]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[60]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[59]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[58]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[57]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[56]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[55]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[54]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[53]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[52]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[51]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[50]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[49]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[48]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[47]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[46]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[45]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[44]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[43]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[42]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[41]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[40]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #412]\n\t" + "# A[41] * B[63]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[42] * B[62]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[61]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[60]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[59]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[58]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[57]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[56]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[55]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[54]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[53]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[52]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[51]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[50]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[49]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[48]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[47]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[46]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[45]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[44]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[43]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[42]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[41]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #416]\n\t" + "# A[42] * B[63]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[43] * B[62]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[61]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[60]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[59]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[58]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[57]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[56]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[55]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[54]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[53]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[52]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[51]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[50]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[49]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[48]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[47]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[46]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[45]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[44]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[43]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[42]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #420]\n\t" + "# A[43] * B[63]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[44] * B[62]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[61]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[60]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[59]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[58]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[57]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[56]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[55]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[54]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[53]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[52]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[51]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[50]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[49]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[48]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[47]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[46]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[45]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[44]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[43]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #424]\n\t" + "# A[44] * B[63]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[45] * B[62]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[61]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[60]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[59]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[58]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[57]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[56]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[55]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[54]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[53]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[52]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[51]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[50]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[49]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[48]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[47]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[46]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[45]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[44]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #428]\n\t" + "# A[45] * B[63]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[46] * B[62]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[61]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[60]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[59]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[58]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[57]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[56]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[55]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[54]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[53]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[52]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[51]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[50]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[49]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[48]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[47]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[46]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[45]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #432]\n\t" + "# A[46] * B[63]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[47] * B[62]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[61]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[60]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[59]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[58]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[57]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[56]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[55]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[54]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[53]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[52]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[51]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[50]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[49]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[48]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[47]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[46]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #436]\n\t" + "# A[47] * B[63]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[48] * B[62]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[61]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[60]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[59]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[58]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[57]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[56]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[55]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[54]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[53]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[52]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[51]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[50]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[49]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[48]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[47]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #440]\n\t" + "# A[48] * B[63]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[49] * B[62]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[61]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[60]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[59]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[58]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[57]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[56]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[55]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[54]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[53]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[52]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[51]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[50]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[49]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[48]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #444]\n\t" + "# A[49] * B[63]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[50] * B[62]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[61]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[60]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[59]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[58]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[57]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[56]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[55]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[54]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[53]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[52]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[51]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[50]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[49]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #448]\n\t" + "# A[50] * B[63]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[51] * B[62]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[61]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[60]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[59]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[58]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[57]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[56]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[55]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[54]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[53]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[52]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[51]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[50]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #452]\n\t" + "# A[51] * B[63]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[52] * B[62]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[61]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[60]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[59]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[58]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[57]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[56]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[55]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[54]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[53]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[52]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[51]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #456]\n\t" + "# A[52] * B[63]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[53] * B[62]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[61]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[60]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[59]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[58]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[57]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[56]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[55]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[54]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[53]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[52]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #460]\n\t" + "# A[53] * B[63]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[54] * B[62]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[61]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[60]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[59]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[58]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[57]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[56]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[55]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[54]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[53]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #464]\n\t" + "# A[54] * B[63]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[55] * B[62]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[61]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[60]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[59]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[58]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[57]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[56]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[55]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[54]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #468]\n\t" + "# A[55] * B[63]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[56] * B[62]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[61]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[60]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[59]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[58]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[57]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[56]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[55]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #472]\n\t" + "# A[56] * B[63]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[57] * B[62]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[61]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[60]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[59]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[58]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[57]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[56]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #476]\n\t" + "# A[57] * B[63]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[58] * B[62]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[61]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[60]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[59]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[58]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[57]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #480]\n\t" + "# A[58] * B[63]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[59] * B[62]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[61]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[60]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[59]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[58]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #484]\n\t" + "# A[59] * B[63]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[60] * B[62]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[61]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[60]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[59]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #488]\n\t" + "# A[60] * B[63]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[61] * B[62]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[61]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[60]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #492]\n\t" + "# A[61] * B[63]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[62] * B[62]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[61]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #496]\n\t" + "# A[62] * B[63]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[63] * B[62]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #500]\n\t" + "# A[63] * B[63]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r3, [%[r], #504]\n\t" + "str r4, [%[r], #508]\n\t" + "ldr r3, [sp, #0]\n\t" + "ldr r4, [sp, #4]\n\t" + "ldr r5, [sp, #8]\n\t" + "ldr r6, [sp, #12]\n\t" + "str r3, [%[r], #0]\n\t" + "str r4, [%[r], #4]\n\t" + "str r5, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" + "ldr r3, [sp, #16]\n\t" + "ldr r4, [sp, #20]\n\t" + "ldr r5, [sp, #24]\n\t" + "ldr r6, [sp, #28]\n\t" + "str r3, [%[r], #16]\n\t" + "str r4, [%[r], #20]\n\t" + "str r5, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" + "ldr r3, [sp, #32]\n\t" + "ldr r4, [sp, #36]\n\t" + "ldr r5, [sp, #40]\n\t" + "ldr r6, [sp, #44]\n\t" + "str r3, [%[r], #32]\n\t" + "str r4, [%[r], #36]\n\t" + "str r5, [%[r], #40]\n\t" + "str r6, [%[r], #44]\n\t" + "ldr r3, [sp, #48]\n\t" + "ldr r4, [sp, #52]\n\t" + "ldr r5, [sp, #56]\n\t" + "ldr r6, [sp, #60]\n\t" + "str r3, [%[r], #48]\n\t" + "str r4, [%[r], #52]\n\t" + "str r5, [%[r], #56]\n\t" + "str r6, [%[r], #60]\n\t" + "ldr r3, [sp, #64]\n\t" + "ldr r4, [sp, #68]\n\t" + "ldr r5, [sp, #72]\n\t" + "ldr r6, [sp, #76]\n\t" + "str r3, [%[r], #64]\n\t" + "str r4, [%[r], #68]\n\t" + "str r5, [%[r], #72]\n\t" + "str r6, [%[r], #76]\n\t" + "ldr r3, [sp, #80]\n\t" + "ldr r4, [sp, #84]\n\t" + "ldr r5, [sp, #88]\n\t" + "ldr r6, [sp, #92]\n\t" + "str r3, [%[r], #80]\n\t" + "str r4, [%[r], #84]\n\t" + "str r5, [%[r], #88]\n\t" + "str r6, [%[r], #92]\n\t" + "ldr r3, [sp, #96]\n\t" + "ldr r4, [sp, #100]\n\t" + "ldr r5, [sp, #104]\n\t" + "ldr r6, [sp, #108]\n\t" + "str r3, [%[r], #96]\n\t" + "str r4, [%[r], #100]\n\t" + "str r5, [%[r], #104]\n\t" + "str r6, [%[r], #108]\n\t" + "ldr r3, [sp, #112]\n\t" + "ldr r4, [sp, #116]\n\t" + "ldr r5, [sp, #120]\n\t" + "ldr r6, [sp, #124]\n\t" + "str r3, [%[r], #112]\n\t" + "str r4, [%[r], #116]\n\t" + "str r5, [%[r], #120]\n\t" + "str r6, [%[r], #124]\n\t" + "ldr r3, [sp, #128]\n\t" + "ldr r4, [sp, #132]\n\t" + "ldr r5, [sp, #136]\n\t" + "ldr r6, [sp, #140]\n\t" + "str r3, [%[r], #128]\n\t" + "str r4, [%[r], #132]\n\t" + "str r5, [%[r], #136]\n\t" + "str r6, [%[r], #140]\n\t" + "ldr r3, [sp, #144]\n\t" + "ldr r4, [sp, #148]\n\t" + "ldr r5, [sp, #152]\n\t" + "ldr r6, [sp, #156]\n\t" + "str r3, [%[r], #144]\n\t" + "str r4, [%[r], #148]\n\t" + "str r5, [%[r], #152]\n\t" + "str r6, [%[r], #156]\n\t" + "ldr r3, [sp, #160]\n\t" + "ldr r4, [sp, #164]\n\t" + "ldr r5, [sp, #168]\n\t" + "ldr r6, [sp, #172]\n\t" + "str r3, [%[r], #160]\n\t" + "str r4, [%[r], #164]\n\t" + "str r5, [%[r], #168]\n\t" + "str r6, [%[r], #172]\n\t" + "ldr r3, [sp, #176]\n\t" + "ldr r4, [sp, #180]\n\t" + "ldr r5, [sp, #184]\n\t" + "ldr r6, [sp, #188]\n\t" + "str r3, [%[r], #176]\n\t" + "str r4, [%[r], #180]\n\t" + "str r5, [%[r], #184]\n\t" + "str r6, [%[r], #188]\n\t" + "ldr r3, [sp, #192]\n\t" + "ldr r4, [sp, #196]\n\t" + "ldr r5, [sp, #200]\n\t" + "ldr r6, [sp, #204]\n\t" + "str r3, [%[r], #192]\n\t" + "str r4, [%[r], #196]\n\t" + "str r5, [%[r], #200]\n\t" + "str r6, [%[r], #204]\n\t" + "ldr r3, [sp, #208]\n\t" + "ldr r4, [sp, #212]\n\t" + "ldr r5, [sp, #216]\n\t" + "ldr r6, [sp, #220]\n\t" + "str r3, [%[r], #208]\n\t" + "str r4, [%[r], #212]\n\t" + "str r5, [%[r], #216]\n\t" + "str r6, [%[r], #220]\n\t" + "ldr r3, [sp, #224]\n\t" + "ldr r4, [sp, #228]\n\t" + "ldr r5, [sp, #232]\n\t" + "ldr r6, [sp, #236]\n\t" + "str r3, [%[r], #224]\n\t" + "str r4, [%[r], #228]\n\t" + "str r5, [%[r], #232]\n\t" + "str r6, [%[r], #236]\n\t" + "ldr r3, [sp, #240]\n\t" + "ldr r4, [sp, #244]\n\t" + "ldr r5, [sp, #248]\n\t" + "ldr r6, [sp, #252]\n\t" + "str r3, [%[r], #240]\n\t" + "str r4, [%[r], #244]\n\t" + "str r5, [%[r], #248]\n\t" + "str r6, [%[r], #252]\n\t" + "add sp, sp, #256\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_4096_mask_64(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<64; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 64; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[128]; + sp_digit a1[64]; + sp_digit b1[64]; + sp_digit z2[128]; + sp_digit u, ca, cb; + + ca = sp_2048_add_64(a1, a, &a[64]); + cb = sp_2048_add_64(b1, b, &b[64]); + u = ca & cb; + sp_2048_mul_64(z1, a1, b1); + sp_2048_mul_64(z2, &a[64], &b[64]); + sp_2048_mul_64(z0, a, b); + sp_2048_mask_64(r + 128, a1, 0 - cb); + sp_2048_mask_64(b1, b1, 0 - ca); + u += sp_2048_add_64(r + 128, r + 128, b1); + u += sp_4096_sub_in_place_128(z1, z2); + u += sp_4096_sub_in_place_128(z1, z0); + u += sp_4096_add_128(r + 64, r + 64, z1); + r[192] = u; + XMEMSET(r + 192 + 1, 0, sizeof(sp_digit) * (64 - 1)); + (void)sp_4096_add_128(r + 128, r + 128, z2); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_4096_sqr_64(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "sub sp, sp, #256\n\t" + "mov r14, #0\n\t" + "# A[0] * A[0]\n\t" + "ldr r10, [%[a], #0]\n\t" + "umull r8, r3, r10, r10\n\t" + "mov r4, #0\n\t" + "str r8, [sp]\n\t" + "# A[0] * A[1]\n\t" + "ldr r10, [%[a], #4]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r14, r14\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "str r3, [sp, #4]\n\t" + "# A[0] * A[2]\n\t" + "ldr r10, [%[a], #8]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r14, r14\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, r14\n\t" + "# A[1] * A[1]\n\t" + "ldr r10, [%[a], #4]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, r14\n\t" + "str r4, [sp, #8]\n\t" + "# A[0] * A[3]\n\t" + "ldr r10, [%[a], #12]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r14, r14\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "# A[1] * A[2]\n\t" + "ldr r10, [%[a], #8]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "str r2, [sp, #12]\n\t" + "# A[0] * A[4]\n\t" + "ldr r10, [%[a], #16]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r14, r14\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "# A[1] * A[3]\n\t" + "ldr r10, [%[a], #12]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "# A[2] * A[2]\n\t" + "ldr r10, [%[a], #8]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "str r3, [sp, #16]\n\t" + "# A[0] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[4]\n\t" + "ldr r10, [%[a], #16]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[3]\n\t" + "ldr r10, [%[a], #12]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #20]\n\t" + "# A[0] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[4]\n\t" + "ldr r10, [%[a], #16]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[3]\n\t" + "ldr r10, [%[a], #12]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #24]\n\t" + "# A[0] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[4]\n\t" + "ldr r10, [%[a], #16]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #28]\n\t" + "# A[0] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[4]\n\t" + "ldr r10, [%[a], #16]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #32]\n\t" + "# A[0] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #36]\n\t" + "# A[0] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #40]\n\t" + "# A[0] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #44]\n\t" + "# A[0] * A[12]\n\t" + "ldr r10, [%[a], #48]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #48]\n\t" + "# A[0] * A[13]\n\t" + "ldr r10, [%[a], #52]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[12]\n\t" + "ldr r10, [%[a], #48]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #52]\n\t" + "# A[0] * A[14]\n\t" + "ldr r10, [%[a], #56]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[13]\n\t" + "ldr r10, [%[a], #52]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[12]\n\t" + "ldr r10, [%[a], #48]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #56]\n\t" + "# A[0] * A[15]\n\t" + "ldr r10, [%[a], #60]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[14]\n\t" + "ldr r10, [%[a], #56]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[13]\n\t" + "ldr r10, [%[a], #52]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[12]\n\t" + "ldr r10, [%[a], #48]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #60]\n\t" + "# A[0] * A[16]\n\t" + "ldr r10, [%[a], #64]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[15]\n\t" + "ldr r10, [%[a], #60]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[14]\n\t" + "ldr r10, [%[a], #56]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[13]\n\t" + "ldr r10, [%[a], #52]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[12]\n\t" + "ldr r10, [%[a], #48]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #64]\n\t" + "# A[0] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[16]\n\t" + "ldr r10, [%[a], #64]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[15]\n\t" + "ldr r10, [%[a], #60]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[14]\n\t" + "ldr r10, [%[a], #56]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[13]\n\t" + "ldr r10, [%[a], #52]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[12]\n\t" + "ldr r10, [%[a], #48]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #68]\n\t" + "# A[0] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[16]\n\t" + "ldr r10, [%[a], #64]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[15]\n\t" + "ldr r10, [%[a], #60]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[14]\n\t" + "ldr r10, [%[a], #56]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[13]\n\t" + "ldr r10, [%[a], #52]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[12]\n\t" + "ldr r10, [%[a], #48]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #72]\n\t" + "# A[0] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[16]\n\t" + "ldr r10, [%[a], #64]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[15]\n\t" + "ldr r10, [%[a], #60]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[14]\n\t" + "ldr r10, [%[a], #56]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[13]\n\t" + "ldr r10, [%[a], #52]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[12]\n\t" + "ldr r10, [%[a], #48]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #76]\n\t" + "# A[0] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[16]\n\t" + "ldr r10, [%[a], #64]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[15]\n\t" + "ldr r10, [%[a], #60]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[14]\n\t" + "ldr r10, [%[a], #56]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[13]\n\t" + "ldr r10, [%[a], #52]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[12]\n\t" + "ldr r10, [%[a], #48]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #80]\n\t" + "# A[0] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[16]\n\t" + "ldr r10, [%[a], #64]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[15]\n\t" + "ldr r10, [%[a], #60]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[14]\n\t" + "ldr r10, [%[a], #56]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[13]\n\t" + "ldr r10, [%[a], #52]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[12]\n\t" + "ldr r10, [%[a], #48]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #84]\n\t" + "# A[0] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[16]\n\t" + "ldr r10, [%[a], #64]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[15]\n\t" + "ldr r10, [%[a], #60]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[14]\n\t" + "ldr r10, [%[a], #56]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[13]\n\t" + "ldr r10, [%[a], #52]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[12]\n\t" + "ldr r10, [%[a], #48]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #88]\n\t" + "# A[0] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[16]\n\t" + "ldr r10, [%[a], #64]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[15]\n\t" + "ldr r10, [%[a], #60]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[14]\n\t" + "ldr r10, [%[a], #56]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[13]\n\t" + "ldr r10, [%[a], #52]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[12]\n\t" + "ldr r10, [%[a], #48]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #92]\n\t" + "# A[0] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[16]\n\t" + "ldr r10, [%[a], #64]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[15]\n\t" + "ldr r10, [%[a], #60]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[14]\n\t" + "ldr r10, [%[a], #56]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[13]\n\t" + "ldr r10, [%[a], #52]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[12]\n\t" + "ldr r10, [%[a], #48]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #96]\n\t" + "# A[0] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[16]\n\t" + "ldr r10, [%[a], #64]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[15]\n\t" + "ldr r10, [%[a], #60]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[14]\n\t" + "ldr r10, [%[a], #56]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[13]\n\t" + "ldr r10, [%[a], #52]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #100]\n\t" + "# A[0] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[16]\n\t" + "ldr r10, [%[a], #64]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[15]\n\t" + "ldr r10, [%[a], #60]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[14]\n\t" + "ldr r10, [%[a], #56]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[13]\n\t" + "ldr r10, [%[a], #52]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #104]\n\t" + "# A[0] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[16]\n\t" + "ldr r10, [%[a], #64]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[15]\n\t" + "ldr r10, [%[a], #60]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[14]\n\t" + "ldr r10, [%[a], #56]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #108]\n\t" + "# A[0] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[16]\n\t" + "ldr r10, [%[a], #64]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[15]\n\t" + "ldr r10, [%[a], #60]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[14]\n\t" + "ldr r10, [%[a], #56]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #112]\n\t" + "# A[0] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[16]\n\t" + "ldr r10, [%[a], #64]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[15]\n\t" + "ldr r10, [%[a], #60]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #116]\n\t" + "# A[0] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[16]\n\t" + "ldr r10, [%[a], #64]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[15]\n\t" + "ldr r10, [%[a], #60]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #120]\n\t" + "# A[0] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[16]\n\t" + "ldr r10, [%[a], #64]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #124]\n\t" + "# A[0] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[16]\n\t" + "ldr r10, [%[a], #64]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #128]\n\t" + "# A[0] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #132]\n\t" + "# A[0] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #136]\n\t" + "# A[0] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #140]\n\t" + "# A[0] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #144]\n\t" + "# A[0] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #148]\n\t" + "# A[0] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #152]\n\t" + "# A[0] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #156]\n\t" + "# A[0] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #160]\n\t" + "# A[0] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #164]\n\t" + "# A[0] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #168]\n\t" + "# A[0] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #172]\n\t" + "# A[0] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #176]\n\t" + "# A[0] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #180]\n\t" + "# A[0] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #184]\n\t" + "# A[0] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #188]\n\t" + "# A[0] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #192]\n\t" + "# A[0] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #196]\n\t" + "# A[0] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #200]\n\t" + "# A[0] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #204]\n\t" + "# A[0] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #208]\n\t" + "# A[0] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #212]\n\t" + "# A[0] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #216]\n\t" + "# A[0] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #220]\n\t" + "# A[0] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #224]\n\t" + "# A[0] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #228]\n\t" + "# A[0] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #232]\n\t" + "# A[0] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #236]\n\t" + "# A[0] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #240]\n\t" + "# A[0] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #244]\n\t" + "# A[0] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #248]\n\t" + "# A[0] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #252]\n\t" + "# A[1] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[2] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #256]\n\t" + "# A[2] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[3] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #260]\n\t" + "# A[3] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[4] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #264]\n\t" + "# A[4] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[5] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #268]\n\t" + "# A[5] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[6] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #272]\n\t" + "# A[6] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[7] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #276]\n\t" + "# A[7] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[8] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #280]\n\t" + "# A[8] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[9] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #284]\n\t" + "# A[9] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[10] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #288]\n\t" + "# A[10] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[11] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #292]\n\t" + "# A[11] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[12] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #296]\n\t" + "# A[12] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[13] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #300]\n\t" + "# A[13] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[14] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #304]\n\t" + "# A[14] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[15] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #308]\n\t" + "# A[15] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[16] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #312]\n\t" + "# A[16] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[17] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #316]\n\t" + "# A[17] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[18] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #320]\n\t" + "# A[18] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[19] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #324]\n\t" + "# A[19] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[20] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #328]\n\t" + "# A[20] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[21] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #332]\n\t" + "# A[21] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[22] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #336]\n\t" + "# A[22] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[23] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #340]\n\t" + "# A[23] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[24] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #344]\n\t" + "# A[24] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[25] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #348]\n\t" + "# A[25] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[26] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #352]\n\t" + "# A[26] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[27] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #356]\n\t" + "# A[27] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[28] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[45] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #360]\n\t" + "# A[28] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[29] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[45] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #364]\n\t" + "# A[29] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[30] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[45] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[46] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #368]\n\t" + "# A[30] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[31] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[45] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[46] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #184]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #372]\n\t" + "# A[31] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[32] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[45] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[46] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #184]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[47] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #376]\n\t" + "# A[32] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[33] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[45] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[46] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #184]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[47] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #188]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #380]\n\t" + "# A[33] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[34] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[45] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[46] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #184]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[47] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #188]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[48] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #384]\n\t" + "# A[34] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[35] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[45] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[46] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #184]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[47] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #188]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[48] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #192]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #388]\n\t" + "# A[35] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[36] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[45] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[46] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #184]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[47] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #188]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[48] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #192]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[49] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #392]\n\t" + "# A[36] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[37] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[45] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[46] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #184]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[47] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #188]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[48] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #192]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[49] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #196]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #396]\n\t" + "# A[37] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[38] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[45] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[46] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #184]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[47] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #188]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[48] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #192]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[49] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #196]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[50] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #400]\n\t" + "# A[38] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[39] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[45] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[46] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #184]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[47] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #188]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[48] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #192]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[49] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #196]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[50] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #200]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #404]\n\t" + "# A[39] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[40] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[45] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[46] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #184]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[47] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #188]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[48] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #192]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[49] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #196]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[50] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #200]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[51] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #408]\n\t" + "# A[40] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[41] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[45] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[46] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #184]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[47] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #188]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[48] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #192]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[49] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #196]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[50] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #200]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[51] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #204]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #412]\n\t" + "# A[41] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[42] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[45] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[46] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #184]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[47] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #188]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[48] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #192]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[49] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #196]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[50] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #200]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[51] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #204]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[52] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #416]\n\t" + "# A[42] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[43] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[45] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[46] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #184]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[47] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #188]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[48] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #192]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[49] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #196]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[50] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #200]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[51] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #204]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[52] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #208]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #420]\n\t" + "# A[43] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[44] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[45] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[46] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #184]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[47] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #188]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[48] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #192]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[49] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #196]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[50] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #200]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[51] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #204]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[52] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #208]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[53] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #424]\n\t" + "# A[44] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[45] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[46] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #184]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[47] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #188]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[48] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #192]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[49] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #196]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[50] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #200]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[51] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #204]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[52] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #208]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[53] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #212]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #428]\n\t" + "# A[45] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[46] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #184]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[47] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #188]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[48] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #192]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[49] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #196]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[50] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #200]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[51] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #204]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[52] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #208]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[53] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #212]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[54] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #432]\n\t" + "# A[46] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #184]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[47] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #188]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[48] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #192]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[49] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #196]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[50] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #200]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[51] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #204]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[52] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #208]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[53] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #212]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[54] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #216]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #436]\n\t" + "# A[47] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #188]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[48] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #192]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[49] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #196]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[50] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #200]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[51] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #204]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[52] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #208]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[53] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #212]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[54] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #216]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[55] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #440]\n\t" + "# A[48] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #192]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[49] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #196]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[50] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #200]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[51] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #204]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[52] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #208]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[53] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #212]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[54] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #216]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[55] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #220]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #444]\n\t" + "# A[49] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #196]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[50] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #200]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[51] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #204]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[52] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #208]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[53] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #212]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[54] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #216]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[55] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #220]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[56] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #448]\n\t" + "# A[50] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #200]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[51] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #204]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[52] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #208]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[53] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #212]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[54] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #216]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[55] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #220]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[56] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #224]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #452]\n\t" + "# A[51] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #204]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[52] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #208]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[53] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #212]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[54] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #216]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[55] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #220]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[56] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #224]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[57] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #456]\n\t" + "# A[52] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #208]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[53] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #212]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[54] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #216]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[55] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #220]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[56] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #224]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[57] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #228]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #460]\n\t" + "# A[53] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #212]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[54] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #216]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[55] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #220]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[56] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #224]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[57] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #228]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[58] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #464]\n\t" + "# A[54] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #216]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[55] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #220]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[56] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #224]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[57] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #228]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[58] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #232]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #468]\n\t" + "# A[55] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #220]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[56] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #224]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[57] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #228]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[58] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #232]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[59] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #472]\n\t" + "# A[56] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #224]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[57] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #228]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[58] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #232]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[59] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #236]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #476]\n\t" + "# A[57] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #228]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[58] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #232]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[59] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #236]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[60] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #480]\n\t" + "# A[58] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #232]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[59] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #236]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[60] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #240]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #484]\n\t" + "# A[59] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #236]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r14, r14\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, r14\n\t" + "# A[60] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #240]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, r14\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, r14\n\t" + "# A[61] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, r14\n\t" + "str r4, [%[r], #488]\n\t" + "# A[60] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #240]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r14, r14\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "# A[61] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #244]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "str r2, [%[r], #492]\n\t" + "# A[61] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #244]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r14, r14\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "# A[62] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "str r3, [%[r], #496]\n\t" + "# A[62] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #248]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r14, r14\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, r14\n\t" + "str r4, [%[r], #500]\n\t" + "# A[63] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r2, r2, r8\n\t" + "adc r3, r3, r9\n\t" + "str r2, [%[r], #504]\n\t" + "str r3, [%[r], #508]\n\t" + "ldr r2, [sp, #0]\n\t" + "ldr r3, [sp, #4]\n\t" + "ldr r4, [sp, #8]\n\t" + "ldr r8, [sp, #12]\n\t" + "str r2, [%[r], #0]\n\t" + "str r3, [%[r], #4]\n\t" + "str r4, [%[r], #8]\n\t" + "str r8, [%[r], #12]\n\t" + "ldr r2, [sp, #16]\n\t" + "ldr r3, [sp, #20]\n\t" + "ldr r4, [sp, #24]\n\t" + "ldr r8, [sp, #28]\n\t" + "str r2, [%[r], #16]\n\t" + "str r3, [%[r], #20]\n\t" + "str r4, [%[r], #24]\n\t" + "str r8, [%[r], #28]\n\t" + "ldr r2, [sp, #32]\n\t" + "ldr r3, [sp, #36]\n\t" + "ldr r4, [sp, #40]\n\t" + "ldr r8, [sp, #44]\n\t" + "str r2, [%[r], #32]\n\t" + "str r3, [%[r], #36]\n\t" + "str r4, [%[r], #40]\n\t" + "str r8, [%[r], #44]\n\t" + "ldr r2, [sp, #48]\n\t" + "ldr r3, [sp, #52]\n\t" + "ldr r4, [sp, #56]\n\t" + "ldr r8, [sp, #60]\n\t" + "str r2, [%[r], #48]\n\t" + "str r3, [%[r], #52]\n\t" + "str r4, [%[r], #56]\n\t" + "str r8, [%[r], #60]\n\t" + "ldr r2, [sp, #64]\n\t" + "ldr r3, [sp, #68]\n\t" + "ldr r4, [sp, #72]\n\t" + "ldr r8, [sp, #76]\n\t" + "str r2, [%[r], #64]\n\t" + "str r3, [%[r], #68]\n\t" + "str r4, [%[r], #72]\n\t" + "str r8, [%[r], #76]\n\t" + "ldr r2, [sp, #80]\n\t" + "ldr r3, [sp, #84]\n\t" + "ldr r4, [sp, #88]\n\t" + "ldr r8, [sp, #92]\n\t" + "str r2, [%[r], #80]\n\t" + "str r3, [%[r], #84]\n\t" + "str r4, [%[r], #88]\n\t" + "str r8, [%[r], #92]\n\t" + "ldr r2, [sp, #96]\n\t" + "ldr r3, [sp, #100]\n\t" + "ldr r4, [sp, #104]\n\t" + "ldr r8, [sp, #108]\n\t" + "str r2, [%[r], #96]\n\t" + "str r3, [%[r], #100]\n\t" + "str r4, [%[r], #104]\n\t" + "str r8, [%[r], #108]\n\t" + "ldr r2, [sp, #112]\n\t" + "ldr r3, [sp, #116]\n\t" + "ldr r4, [sp, #120]\n\t" + "ldr r8, [sp, #124]\n\t" + "str r2, [%[r], #112]\n\t" + "str r3, [%[r], #116]\n\t" + "str r4, [%[r], #120]\n\t" + "str r8, [%[r], #124]\n\t" + "ldr r2, [sp, #128]\n\t" + "ldr r3, [sp, #132]\n\t" + "ldr r4, [sp, #136]\n\t" + "ldr r8, [sp, #140]\n\t" + "str r2, [%[r], #128]\n\t" + "str r3, [%[r], #132]\n\t" + "str r4, [%[r], #136]\n\t" + "str r8, [%[r], #140]\n\t" + "ldr r2, [sp, #144]\n\t" + "ldr r3, [sp, #148]\n\t" + "ldr r4, [sp, #152]\n\t" + "ldr r8, [sp, #156]\n\t" + "str r2, [%[r], #144]\n\t" + "str r3, [%[r], #148]\n\t" + "str r4, [%[r], #152]\n\t" + "str r8, [%[r], #156]\n\t" + "ldr r2, [sp, #160]\n\t" + "ldr r3, [sp, #164]\n\t" + "ldr r4, [sp, #168]\n\t" + "ldr r8, [sp, #172]\n\t" + "str r2, [%[r], #160]\n\t" + "str r3, [%[r], #164]\n\t" + "str r4, [%[r], #168]\n\t" + "str r8, [%[r], #172]\n\t" + "ldr r2, [sp, #176]\n\t" + "ldr r3, [sp, #180]\n\t" + "ldr r4, [sp, #184]\n\t" + "ldr r8, [sp, #188]\n\t" + "str r2, [%[r], #176]\n\t" + "str r3, [%[r], #180]\n\t" + "str r4, [%[r], #184]\n\t" + "str r8, [%[r], #188]\n\t" + "ldr r2, [sp, #192]\n\t" + "ldr r3, [sp, #196]\n\t" + "ldr r4, [sp, #200]\n\t" + "ldr r8, [sp, #204]\n\t" + "str r2, [%[r], #192]\n\t" + "str r3, [%[r], #196]\n\t" + "str r4, [%[r], #200]\n\t" + "str r8, [%[r], #204]\n\t" + "ldr r2, [sp, #208]\n\t" + "ldr r3, [sp, #212]\n\t" + "ldr r4, [sp, #216]\n\t" + "ldr r8, [sp, #220]\n\t" + "str r2, [%[r], #208]\n\t" + "str r3, [%[r], #212]\n\t" + "str r4, [%[r], #216]\n\t" + "str r8, [%[r], #220]\n\t" + "ldr r2, [sp, #224]\n\t" + "ldr r3, [sp, #228]\n\t" + "ldr r4, [sp, #232]\n\t" + "ldr r8, [sp, #236]\n\t" + "str r2, [%[r], #224]\n\t" + "str r3, [%[r], #228]\n\t" + "str r4, [%[r], #232]\n\t" + "str r8, [%[r], #236]\n\t" + "ldr r2, [sp, #240]\n\t" + "ldr r3, [sp, #244]\n\t" + "ldr r4, [sp, #248]\n\t" + "ldr r8, [sp, #252]\n\t" + "str r2, [%[r], #240]\n\t" + "str r3, [%[r], #244]\n\t" + "str r4, [%[r], #248]\n\t" + "str r8, [%[r], #252]\n\t" + "add sp, sp, #256\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r2", "r3", "r4", "r8", "r9", "r10", "r8", "r5", "r6", "r7", "r14" + ); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z2[128]; + sp_digit z1[128]; + sp_digit a1[64]; + sp_digit u; + + u = sp_2048_add_64(a1, a, &a[64]); + sp_2048_sqr_64(z1, a1); + sp_2048_sqr_64(z2, &a[64]); + sp_2048_sqr_64(z0, a); + sp_2048_mask_64(r + 128, a1, 0 - u); + u += sp_2048_add_64(r + 128, r + 128, r + 128); + u += sp_4096_sub_in_place_128(z1, z2); + u += sp_4096_sub_in_place_128(z1, z0); + u += sp_4096_add_128(r + 64, r + 64, z1); + r[192] = u; + XMEMSET(r + 192 + 1, 0, sizeof(sp_digit) * (64 - 1)); + (void)sp_4096_add_128(r + 128, r + 128, z2); +} + +#endif /* !WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "add r12, %[a], #512\n\t" + "\n1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldr r4, [%[a]], #4\n\t" + "ldr r5, [%[a]], #4\n\t" + "ldr r6, [%[a]], #4\n\t" + "ldr r7, [%[a]], #4\n\t" + "ldr r8, [%[b]], #4\n\t" + "ldr r9, [%[b]], #4\n\t" + "ldr r10, [%[b]], #4\n\t" + "ldr r14, [%[b]], #4\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r]], #4\n\t" + "str r5, [%[r]], #4\n\t" + "str r6, [%[r]], #4\n\t" + "str r7, [%[r]], #4\n\t" + "mov r4, #0\n\t" + "adc %[c], r4, #0\n\t" + "cmp %[a], r12\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into a. (a -= b) + * + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r14, #0\n\t" + "add r12, %[a], #512\n\t" + "\n1:\n\t" + "subs %[c], r14, %[c]\n\t" + "ldr r3, [%[a]]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[a], #8]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b]], #4\n\t" + "ldr r8, [%[b]], #4\n\t" + "ldr r9, [%[b]], #4\n\t" + "ldr r10, [%[b]], #4\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "str r3, [%[a]], #4\n\t" + "str r4, [%[a]], #4\n\t" + "str r5, [%[a]], #4\n\t" + "str r6, [%[a]], #4\n\t" + "sbc %[c], r14, r14\n\t" + "cmp %[a], r12\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + __asm__ __volatile__ ( + "sub sp, sp, #1024\n\t" + "mov r5, #0\n\t" + "mov r6, #0\n\t" + "mov r7, #0\n\t" + "mov r8, #0\n\t" + "\n1:\n\t" + "subs r3, r5, #508\n\t" + "it cc\n\t" + "movcc r3, #0\n\t" + "sub r4, r5, r3\n\t" + "\n2:\n\t" + "ldr r14, [%[a], r3]\n\t" + "ldr r12, [%[b], r4]\n\t" + "umull r9, r10, r14, r12\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "add r3, r3, #4\n\t" + "sub r4, r4, #4\n\t" + "cmp r3, #512\n\t" + "beq 3f\n\t" + "cmp r3, r5\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "str r6, [sp, r5]\n\t" + "mov r6, r7\n\t" + "mov r7, r8\n\t" + "mov r8, #0\n\t" + "add r5, r5, #4\n\t" + "cmp r5, #1016\n\t" + "ble 1b\n\t" + "str r6, [sp, r5]\n\t" + "\n4:\n\t" + "ldr r6, [sp, #0]\n\t" + "ldr r7, [sp, #4]\n\t" + "ldr r8, [sp, #8]\n\t" + "ldr r3, [sp, #12]\n\t" + "str r6, [%[r], #0]\n\t" + "str r7, [%[r], #4]\n\t" + "str r8, [%[r], #8]\n\t" + "str r3, [%[r], #12]\n\t" + "add sp, sp, #16\n\t" + "add %[r], %[r], #16\n\t" + "subs r5, r5, #16\n\t" + "bgt 4b\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "sub sp, sp, #1024\n\t" + "mov r12, #0\n\t" + "mov r6, #0\n\t" + "mov r7, #0\n\t" + "mov r8, #0\n\t" + "mov r5, #0\n\t" + "\n1:\n\t" + "subs r3, r5, #508\n\t" + "it cc\n\t" + "movcc r3, r12\n\t" + "sub r4, r5, r3\n\t" + "\n2:\n\t" + "cmp r4, r3\n\t" + "beq 4f\n\t" + "ldr r14, [%[a], r3]\n\t" + "ldr r9, [%[a], r4]\n\t" + "umull r9, r10, r14, r9\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, r12\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, r12\n\t" + "bal 5f\n\t" + "\n4:\n\t" + "ldr r14, [%[a], r3]\n\t" + "umull r9, r10, r14, r14\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, r12\n\t" + "\n5:\n\t" + "add r3, r3, #4\n\t" + "sub r4, r4, #4\n\t" + "cmp r3, #512\n\t" + "beq 3f\n\t" + "cmp r3, r4\n\t" + "bgt 3f\n\t" + "cmp r3, r5\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "str r6, [sp, r5]\n\t" + "mov r6, r7\n\t" + "mov r7, r8\n\t" + "mov r8, #0\n\t" + "add r5, r5, #4\n\t" + "cmp r5, #1016\n\t" + "ble 1b\n\t" + "str r6, [sp, r5]\n\t" + "\n4:\n\t" + "ldr r6, [sp, #0]\n\t" + "ldr r7, [sp, #4]\n\t" + "ldr r8, [sp, #8]\n\t" + "ldr r3, [sp, #12]\n\t" + "str r6, [%[r], #0]\n\t" + "str r7, [%[r], #4]\n\t" + "str r8, [%[r], #8]\n\t" + "str r3, [%[r], #12]\n\t" + "add sp, sp, #16\n\t" + "add %[r], %[r], #16\n\t" + "subs r5, r5, #16\n\t" + "bgt 4b\n\t" + : [r] "+r" (r) + : [a] "r" (a) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Caclulate the bottom digit of -1/a mod 2^n. + * + * a A single precision number. + * rho Bottom word of inverse. + */ +static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho) +{ + sp_digit x, b; + + b = a[0]; + x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - b * x; /* here x*a==1 mod 2**8 */ + x *= 2 - b * x; /* here x*a==1 mod 2**16 */ + x *= 2 - b * x; /* here x*a==1 mod 2**32 */ + + /* rho = -1/m mod b */ + *rho = -x; +} + +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov r10, #0\n\t" + "# A[0] * B\n\t" + "ldr r8, [%[a]]\n\t" + "umull r5, r3, %[b], r8\n\t" + "mov r4, #0\n\t" + "str r5, [%[r]]\n\t" + "mov r5, #0\n\t" + "mov r9, #4\n\t" + "1:\n\t" + "ldr r8, [%[a], r9]\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], r9]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "add r9, r9, #4\n\t" + "cmp r9, #512\n\t" + "blt 1b\n\t" + "str r3, [%[r], #512]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); +#else + __asm__ __volatile__ ( + "mov r10, #0\n\t" + "# A[0] * B\n\t" + "ldr r8, [%[a]]\n\t" + "umull r3, r4, %[b], r8\n\t" + "mov r5, #0\n\t" + "str r3, [%[r]]\n\t" + "# A[1] * B\n\t" + "ldr r8, [%[a], #4]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #4]\n\t" + "# A[2] * B\n\t" + "ldr r8, [%[a], #8]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #8]\n\t" + "# A[3] * B\n\t" + "ldr r8, [%[a], #12]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #12]\n\t" + "# A[4] * B\n\t" + "ldr r8, [%[a], #16]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #16]\n\t" + "# A[5] * B\n\t" + "ldr r8, [%[a], #20]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #20]\n\t" + "# A[6] * B\n\t" + "ldr r8, [%[a], #24]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #24]\n\t" + "# A[7] * B\n\t" + "ldr r8, [%[a], #28]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #28]\n\t" + "# A[8] * B\n\t" + "ldr r8, [%[a], #32]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #32]\n\t" + "# A[9] * B\n\t" + "ldr r8, [%[a], #36]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #36]\n\t" + "# A[10] * B\n\t" + "ldr r8, [%[a], #40]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #40]\n\t" + "# A[11] * B\n\t" + "ldr r8, [%[a], #44]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #44]\n\t" + "# A[12] * B\n\t" + "ldr r8, [%[a], #48]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #48]\n\t" + "# A[13] * B\n\t" + "ldr r8, [%[a], #52]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #52]\n\t" + "# A[14] * B\n\t" + "ldr r8, [%[a], #56]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #56]\n\t" + "# A[15] * B\n\t" + "ldr r8, [%[a], #60]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #60]\n\t" + "# A[16] * B\n\t" + "ldr r8, [%[a], #64]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #64]\n\t" + "# A[17] * B\n\t" + "ldr r8, [%[a], #68]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #68]\n\t" + "# A[18] * B\n\t" + "ldr r8, [%[a], #72]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #72]\n\t" + "# A[19] * B\n\t" + "ldr r8, [%[a], #76]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #76]\n\t" + "# A[20] * B\n\t" + "ldr r8, [%[a], #80]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #80]\n\t" + "# A[21] * B\n\t" + "ldr r8, [%[a], #84]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #84]\n\t" + "# A[22] * B\n\t" + "ldr r8, [%[a], #88]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #88]\n\t" + "# A[23] * B\n\t" + "ldr r8, [%[a], #92]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #92]\n\t" + "# A[24] * B\n\t" + "ldr r8, [%[a], #96]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #96]\n\t" + "# A[25] * B\n\t" + "ldr r8, [%[a], #100]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #100]\n\t" + "# A[26] * B\n\t" + "ldr r8, [%[a], #104]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #104]\n\t" + "# A[27] * B\n\t" + "ldr r8, [%[a], #108]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #108]\n\t" + "# A[28] * B\n\t" + "ldr r8, [%[a], #112]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #112]\n\t" + "# A[29] * B\n\t" + "ldr r8, [%[a], #116]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #116]\n\t" + "# A[30] * B\n\t" + "ldr r8, [%[a], #120]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #120]\n\t" + "# A[31] * B\n\t" + "ldr r8, [%[a], #124]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #124]\n\t" + "# A[32] * B\n\t" + "ldr r8, [%[a], #128]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #128]\n\t" + "# A[33] * B\n\t" + "ldr r8, [%[a], #132]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #132]\n\t" + "# A[34] * B\n\t" + "ldr r8, [%[a], #136]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #136]\n\t" + "# A[35] * B\n\t" + "ldr r8, [%[a], #140]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #140]\n\t" + "# A[36] * B\n\t" + "ldr r8, [%[a], #144]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #144]\n\t" + "# A[37] * B\n\t" + "ldr r8, [%[a], #148]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #148]\n\t" + "# A[38] * B\n\t" + "ldr r8, [%[a], #152]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #152]\n\t" + "# A[39] * B\n\t" + "ldr r8, [%[a], #156]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #156]\n\t" + "# A[40] * B\n\t" + "ldr r8, [%[a], #160]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #160]\n\t" + "# A[41] * B\n\t" + "ldr r8, [%[a], #164]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #164]\n\t" + "# A[42] * B\n\t" + "ldr r8, [%[a], #168]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #168]\n\t" + "# A[43] * B\n\t" + "ldr r8, [%[a], #172]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #172]\n\t" + "# A[44] * B\n\t" + "ldr r8, [%[a], #176]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #176]\n\t" + "# A[45] * B\n\t" + "ldr r8, [%[a], #180]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #180]\n\t" + "# A[46] * B\n\t" + "ldr r8, [%[a], #184]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #184]\n\t" + "# A[47] * B\n\t" + "ldr r8, [%[a], #188]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #188]\n\t" + "# A[48] * B\n\t" + "ldr r8, [%[a], #192]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #192]\n\t" + "# A[49] * B\n\t" + "ldr r8, [%[a], #196]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #196]\n\t" + "# A[50] * B\n\t" + "ldr r8, [%[a], #200]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #200]\n\t" + "# A[51] * B\n\t" + "ldr r8, [%[a], #204]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #204]\n\t" + "# A[52] * B\n\t" + "ldr r8, [%[a], #208]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #208]\n\t" + "# A[53] * B\n\t" + "ldr r8, [%[a], #212]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #212]\n\t" + "# A[54] * B\n\t" + "ldr r8, [%[a], #216]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #216]\n\t" + "# A[55] * B\n\t" + "ldr r8, [%[a], #220]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #220]\n\t" + "# A[56] * B\n\t" + "ldr r8, [%[a], #224]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #224]\n\t" + "# A[57] * B\n\t" + "ldr r8, [%[a], #228]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #228]\n\t" + "# A[58] * B\n\t" + "ldr r8, [%[a], #232]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #232]\n\t" + "# A[59] * B\n\t" + "ldr r8, [%[a], #236]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #236]\n\t" + "# A[60] * B\n\t" + "ldr r8, [%[a], #240]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #240]\n\t" + "# A[61] * B\n\t" + "ldr r8, [%[a], #244]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #244]\n\t" + "# A[62] * B\n\t" + "ldr r8, [%[a], #248]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #248]\n\t" + "# A[63] * B\n\t" + "ldr r8, [%[a], #252]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #252]\n\t" + "# A[64] * B\n\t" + "ldr r8, [%[a], #256]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #256]\n\t" + "# A[65] * B\n\t" + "ldr r8, [%[a], #260]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #260]\n\t" + "# A[66] * B\n\t" + "ldr r8, [%[a], #264]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #264]\n\t" + "# A[67] * B\n\t" + "ldr r8, [%[a], #268]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #268]\n\t" + "# A[68] * B\n\t" + "ldr r8, [%[a], #272]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #272]\n\t" + "# A[69] * B\n\t" + "ldr r8, [%[a], #276]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #276]\n\t" + "# A[70] * B\n\t" + "ldr r8, [%[a], #280]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #280]\n\t" + "# A[71] * B\n\t" + "ldr r8, [%[a], #284]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #284]\n\t" + "# A[72] * B\n\t" + "ldr r8, [%[a], #288]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #288]\n\t" + "# A[73] * B\n\t" + "ldr r8, [%[a], #292]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #292]\n\t" + "# A[74] * B\n\t" + "ldr r8, [%[a], #296]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #296]\n\t" + "# A[75] * B\n\t" + "ldr r8, [%[a], #300]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #300]\n\t" + "# A[76] * B\n\t" + "ldr r8, [%[a], #304]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #304]\n\t" + "# A[77] * B\n\t" + "ldr r8, [%[a], #308]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #308]\n\t" + "# A[78] * B\n\t" + "ldr r8, [%[a], #312]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #312]\n\t" + "# A[79] * B\n\t" + "ldr r8, [%[a], #316]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #316]\n\t" + "# A[80] * B\n\t" + "ldr r8, [%[a], #320]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #320]\n\t" + "# A[81] * B\n\t" + "ldr r8, [%[a], #324]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #324]\n\t" + "# A[82] * B\n\t" + "ldr r8, [%[a], #328]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #328]\n\t" + "# A[83] * B\n\t" + "ldr r8, [%[a], #332]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #332]\n\t" + "# A[84] * B\n\t" + "ldr r8, [%[a], #336]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #336]\n\t" + "# A[85] * B\n\t" + "ldr r8, [%[a], #340]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #340]\n\t" + "# A[86] * B\n\t" + "ldr r8, [%[a], #344]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #344]\n\t" + "# A[87] * B\n\t" + "ldr r8, [%[a], #348]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #348]\n\t" + "# A[88] * B\n\t" + "ldr r8, [%[a], #352]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #352]\n\t" + "# A[89] * B\n\t" + "ldr r8, [%[a], #356]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #356]\n\t" + "# A[90] * B\n\t" + "ldr r8, [%[a], #360]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #360]\n\t" + "# A[91] * B\n\t" + "ldr r8, [%[a], #364]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #364]\n\t" + "# A[92] * B\n\t" + "ldr r8, [%[a], #368]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #368]\n\t" + "# A[93] * B\n\t" + "ldr r8, [%[a], #372]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #372]\n\t" + "# A[94] * B\n\t" + "ldr r8, [%[a], #376]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #376]\n\t" + "# A[95] * B\n\t" + "ldr r8, [%[a], #380]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #380]\n\t" + "# A[96] * B\n\t" + "ldr r8, [%[a], #384]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #384]\n\t" + "# A[97] * B\n\t" + "ldr r8, [%[a], #388]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #388]\n\t" + "# A[98] * B\n\t" + "ldr r8, [%[a], #392]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #392]\n\t" + "# A[99] * B\n\t" + "ldr r8, [%[a], #396]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #396]\n\t" + "# A[100] * B\n\t" + "ldr r8, [%[a], #400]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #400]\n\t" + "# A[101] * B\n\t" + "ldr r8, [%[a], #404]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #404]\n\t" + "# A[102] * B\n\t" + "ldr r8, [%[a], #408]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #408]\n\t" + "# A[103] * B\n\t" + "ldr r8, [%[a], #412]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #412]\n\t" + "# A[104] * B\n\t" + "ldr r8, [%[a], #416]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #416]\n\t" + "# A[105] * B\n\t" + "ldr r8, [%[a], #420]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #420]\n\t" + "# A[106] * B\n\t" + "ldr r8, [%[a], #424]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #424]\n\t" + "# A[107] * B\n\t" + "ldr r8, [%[a], #428]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #428]\n\t" + "# A[108] * B\n\t" + "ldr r8, [%[a], #432]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #432]\n\t" + "# A[109] * B\n\t" + "ldr r8, [%[a], #436]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #436]\n\t" + "# A[110] * B\n\t" + "ldr r8, [%[a], #440]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #440]\n\t" + "# A[111] * B\n\t" + "ldr r8, [%[a], #444]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #444]\n\t" + "# A[112] * B\n\t" + "ldr r8, [%[a], #448]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #448]\n\t" + "# A[113] * B\n\t" + "ldr r8, [%[a], #452]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #452]\n\t" + "# A[114] * B\n\t" + "ldr r8, [%[a], #456]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #456]\n\t" + "# A[115] * B\n\t" + "ldr r8, [%[a], #460]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #460]\n\t" + "# A[116] * B\n\t" + "ldr r8, [%[a], #464]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #464]\n\t" + "# A[117] * B\n\t" + "ldr r8, [%[a], #468]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #468]\n\t" + "# A[118] * B\n\t" + "ldr r8, [%[a], #472]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #472]\n\t" + "# A[119] * B\n\t" + "ldr r8, [%[a], #476]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #476]\n\t" + "# A[120] * B\n\t" + "ldr r8, [%[a], #480]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #480]\n\t" + "# A[121] * B\n\t" + "ldr r8, [%[a], #484]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #484]\n\t" + "# A[122] * B\n\t" + "ldr r8, [%[a], #488]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #488]\n\t" + "# A[123] * B\n\t" + "ldr r8, [%[a], #492]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #492]\n\t" + "# A[124] * B\n\t" + "ldr r8, [%[a], #496]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #496]\n\t" + "# A[125] * B\n\t" + "ldr r8, [%[a], #500]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #500]\n\t" + "# A[126] * B\n\t" + "ldr r8, [%[a], #504]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #504]\n\t" + "# A[127] * B\n\t" + "ldr r8, [%[a], #508]\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "str r4, [%[r], #508]\n\t" + "str r5, [%[r], #512]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); +#endif +} + +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 4096 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_4096_mont_norm_128(sp_digit* r, const sp_digit* m) +{ + XMEMSET(r, 0, sizeof(sp_digit) * 128); + + /* r = 2^n mod m */ + sp_4096_sub_in_place_128(r, m); +} + +#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */ +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + sp_digit c = 0; + +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov r9, #0\n\t" + "mov r8, #0\n\t" + "1:\n\t" + "subs %[c], r9, %[c]\n\t" + "ldr r4, [%[a], r8]\n\t" + "ldr r5, [%[b], r8]\n\t" + "and r5, r5, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbc %[c], r9, r9\n\t" + "str r4, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, #512\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + ); +#else + __asm__ __volatile__ ( + + "mov r9, #0\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r6, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "subs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #0]\n\t" + "str r6, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r7, [%[b], #12]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r6, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #16]\n\t" + "str r6, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r6, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r7, [%[b], #28]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r6, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r7, [%[b], #36]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #32]\n\t" + "str r6, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r6, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r7, [%[b], #44]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #40]\n\t" + "str r6, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r6, [%[a], #52]\n\t" + "ldr r5, [%[b], #48]\n\t" + "ldr r7, [%[b], #52]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #48]\n\t" + "str r6, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r6, [%[a], #60]\n\t" + "ldr r5, [%[b], #56]\n\t" + "ldr r7, [%[b], #60]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #56]\n\t" + "str r6, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r6, [%[a], #68]\n\t" + "ldr r5, [%[b], #64]\n\t" + "ldr r7, [%[b], #68]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #64]\n\t" + "str r6, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r6, [%[a], #76]\n\t" + "ldr r5, [%[b], #72]\n\t" + "ldr r7, [%[b], #76]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #72]\n\t" + "str r6, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r6, [%[a], #84]\n\t" + "ldr r5, [%[b], #80]\n\t" + "ldr r7, [%[b], #84]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #80]\n\t" + "str r6, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r6, [%[a], #92]\n\t" + "ldr r5, [%[b], #88]\n\t" + "ldr r7, [%[b], #92]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #88]\n\t" + "str r6, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r6, [%[a], #100]\n\t" + "ldr r5, [%[b], #96]\n\t" + "ldr r7, [%[b], #100]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #96]\n\t" + "str r6, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r6, [%[a], #108]\n\t" + "ldr r5, [%[b], #104]\n\t" + "ldr r7, [%[b], #108]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #104]\n\t" + "str r6, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r6, [%[a], #116]\n\t" + "ldr r5, [%[b], #112]\n\t" + "ldr r7, [%[b], #116]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #112]\n\t" + "str r6, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r6, [%[a], #124]\n\t" + "ldr r5, [%[b], #120]\n\t" + "ldr r7, [%[b], #124]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #120]\n\t" + "str r6, [%[r], #124]\n\t" + "ldr r4, [%[a], #128]\n\t" + "ldr r6, [%[a], #132]\n\t" + "ldr r5, [%[b], #128]\n\t" + "ldr r7, [%[b], #132]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #128]\n\t" + "str r6, [%[r], #132]\n\t" + "ldr r4, [%[a], #136]\n\t" + "ldr r6, [%[a], #140]\n\t" + "ldr r5, [%[b], #136]\n\t" + "ldr r7, [%[b], #140]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #136]\n\t" + "str r6, [%[r], #140]\n\t" + "ldr r4, [%[a], #144]\n\t" + "ldr r6, [%[a], #148]\n\t" + "ldr r5, [%[b], #144]\n\t" + "ldr r7, [%[b], #148]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #144]\n\t" + "str r6, [%[r], #148]\n\t" + "ldr r4, [%[a], #152]\n\t" + "ldr r6, [%[a], #156]\n\t" + "ldr r5, [%[b], #152]\n\t" + "ldr r7, [%[b], #156]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #152]\n\t" + "str r6, [%[r], #156]\n\t" + "ldr r4, [%[a], #160]\n\t" + "ldr r6, [%[a], #164]\n\t" + "ldr r5, [%[b], #160]\n\t" + "ldr r7, [%[b], #164]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #160]\n\t" + "str r6, [%[r], #164]\n\t" + "ldr r4, [%[a], #168]\n\t" + "ldr r6, [%[a], #172]\n\t" + "ldr r5, [%[b], #168]\n\t" + "ldr r7, [%[b], #172]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #168]\n\t" + "str r6, [%[r], #172]\n\t" + "ldr r4, [%[a], #176]\n\t" + "ldr r6, [%[a], #180]\n\t" + "ldr r5, [%[b], #176]\n\t" + "ldr r7, [%[b], #180]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #176]\n\t" + "str r6, [%[r], #180]\n\t" + "ldr r4, [%[a], #184]\n\t" + "ldr r6, [%[a], #188]\n\t" + "ldr r5, [%[b], #184]\n\t" + "ldr r7, [%[b], #188]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #184]\n\t" + "str r6, [%[r], #188]\n\t" + "ldr r4, [%[a], #192]\n\t" + "ldr r6, [%[a], #196]\n\t" + "ldr r5, [%[b], #192]\n\t" + "ldr r7, [%[b], #196]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #192]\n\t" + "str r6, [%[r], #196]\n\t" + "ldr r4, [%[a], #200]\n\t" + "ldr r6, [%[a], #204]\n\t" + "ldr r5, [%[b], #200]\n\t" + "ldr r7, [%[b], #204]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #200]\n\t" + "str r6, [%[r], #204]\n\t" + "ldr r4, [%[a], #208]\n\t" + "ldr r6, [%[a], #212]\n\t" + "ldr r5, [%[b], #208]\n\t" + "ldr r7, [%[b], #212]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #208]\n\t" + "str r6, [%[r], #212]\n\t" + "ldr r4, [%[a], #216]\n\t" + "ldr r6, [%[a], #220]\n\t" + "ldr r5, [%[b], #216]\n\t" + "ldr r7, [%[b], #220]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #216]\n\t" + "str r6, [%[r], #220]\n\t" + "ldr r4, [%[a], #224]\n\t" + "ldr r6, [%[a], #228]\n\t" + "ldr r5, [%[b], #224]\n\t" + "ldr r7, [%[b], #228]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #224]\n\t" + "str r6, [%[r], #228]\n\t" + "ldr r4, [%[a], #232]\n\t" + "ldr r6, [%[a], #236]\n\t" + "ldr r5, [%[b], #232]\n\t" + "ldr r7, [%[b], #236]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #232]\n\t" + "str r6, [%[r], #236]\n\t" + "ldr r4, [%[a], #240]\n\t" + "ldr r6, [%[a], #244]\n\t" + "ldr r5, [%[b], #240]\n\t" + "ldr r7, [%[b], #244]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #240]\n\t" + "str r6, [%[r], #244]\n\t" + "ldr r4, [%[a], #248]\n\t" + "ldr r6, [%[a], #252]\n\t" + "ldr r5, [%[b], #248]\n\t" + "ldr r7, [%[b], #252]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #248]\n\t" + "str r6, [%[r], #252]\n\t" + "ldr r4, [%[a], #256]\n\t" + "ldr r6, [%[a], #260]\n\t" + "ldr r5, [%[b], #256]\n\t" + "ldr r7, [%[b], #260]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #256]\n\t" + "str r6, [%[r], #260]\n\t" + "ldr r4, [%[a], #264]\n\t" + "ldr r6, [%[a], #268]\n\t" + "ldr r5, [%[b], #264]\n\t" + "ldr r7, [%[b], #268]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #264]\n\t" + "str r6, [%[r], #268]\n\t" + "ldr r4, [%[a], #272]\n\t" + "ldr r6, [%[a], #276]\n\t" + "ldr r5, [%[b], #272]\n\t" + "ldr r7, [%[b], #276]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #272]\n\t" + "str r6, [%[r], #276]\n\t" + "ldr r4, [%[a], #280]\n\t" + "ldr r6, [%[a], #284]\n\t" + "ldr r5, [%[b], #280]\n\t" + "ldr r7, [%[b], #284]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #280]\n\t" + "str r6, [%[r], #284]\n\t" + "ldr r4, [%[a], #288]\n\t" + "ldr r6, [%[a], #292]\n\t" + "ldr r5, [%[b], #288]\n\t" + "ldr r7, [%[b], #292]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #288]\n\t" + "str r6, [%[r], #292]\n\t" + "ldr r4, [%[a], #296]\n\t" + "ldr r6, [%[a], #300]\n\t" + "ldr r5, [%[b], #296]\n\t" + "ldr r7, [%[b], #300]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #296]\n\t" + "str r6, [%[r], #300]\n\t" + "ldr r4, [%[a], #304]\n\t" + "ldr r6, [%[a], #308]\n\t" + "ldr r5, [%[b], #304]\n\t" + "ldr r7, [%[b], #308]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #304]\n\t" + "str r6, [%[r], #308]\n\t" + "ldr r4, [%[a], #312]\n\t" + "ldr r6, [%[a], #316]\n\t" + "ldr r5, [%[b], #312]\n\t" + "ldr r7, [%[b], #316]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #312]\n\t" + "str r6, [%[r], #316]\n\t" + "ldr r4, [%[a], #320]\n\t" + "ldr r6, [%[a], #324]\n\t" + "ldr r5, [%[b], #320]\n\t" + "ldr r7, [%[b], #324]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #320]\n\t" + "str r6, [%[r], #324]\n\t" + "ldr r4, [%[a], #328]\n\t" + "ldr r6, [%[a], #332]\n\t" + "ldr r5, [%[b], #328]\n\t" + "ldr r7, [%[b], #332]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #328]\n\t" + "str r6, [%[r], #332]\n\t" + "ldr r4, [%[a], #336]\n\t" + "ldr r6, [%[a], #340]\n\t" + "ldr r5, [%[b], #336]\n\t" + "ldr r7, [%[b], #340]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #336]\n\t" + "str r6, [%[r], #340]\n\t" + "ldr r4, [%[a], #344]\n\t" + "ldr r6, [%[a], #348]\n\t" + "ldr r5, [%[b], #344]\n\t" + "ldr r7, [%[b], #348]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #344]\n\t" + "str r6, [%[r], #348]\n\t" + "ldr r4, [%[a], #352]\n\t" + "ldr r6, [%[a], #356]\n\t" + "ldr r5, [%[b], #352]\n\t" + "ldr r7, [%[b], #356]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #352]\n\t" + "str r6, [%[r], #356]\n\t" + "ldr r4, [%[a], #360]\n\t" + "ldr r6, [%[a], #364]\n\t" + "ldr r5, [%[b], #360]\n\t" + "ldr r7, [%[b], #364]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #360]\n\t" + "str r6, [%[r], #364]\n\t" + "ldr r4, [%[a], #368]\n\t" + "ldr r6, [%[a], #372]\n\t" + "ldr r5, [%[b], #368]\n\t" + "ldr r7, [%[b], #372]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #368]\n\t" + "str r6, [%[r], #372]\n\t" + "ldr r4, [%[a], #376]\n\t" + "ldr r6, [%[a], #380]\n\t" + "ldr r5, [%[b], #376]\n\t" + "ldr r7, [%[b], #380]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #376]\n\t" + "str r6, [%[r], #380]\n\t" + "ldr r4, [%[a], #384]\n\t" + "ldr r6, [%[a], #388]\n\t" + "ldr r5, [%[b], #384]\n\t" + "ldr r7, [%[b], #388]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #384]\n\t" + "str r6, [%[r], #388]\n\t" + "ldr r4, [%[a], #392]\n\t" + "ldr r6, [%[a], #396]\n\t" + "ldr r5, [%[b], #392]\n\t" + "ldr r7, [%[b], #396]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #392]\n\t" + "str r6, [%[r], #396]\n\t" + "ldr r4, [%[a], #400]\n\t" + "ldr r6, [%[a], #404]\n\t" + "ldr r5, [%[b], #400]\n\t" + "ldr r7, [%[b], #404]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #400]\n\t" + "str r6, [%[r], #404]\n\t" + "ldr r4, [%[a], #408]\n\t" + "ldr r6, [%[a], #412]\n\t" + "ldr r5, [%[b], #408]\n\t" + "ldr r7, [%[b], #412]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #408]\n\t" + "str r6, [%[r], #412]\n\t" + "ldr r4, [%[a], #416]\n\t" + "ldr r6, [%[a], #420]\n\t" + "ldr r5, [%[b], #416]\n\t" + "ldr r7, [%[b], #420]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #416]\n\t" + "str r6, [%[r], #420]\n\t" + "ldr r4, [%[a], #424]\n\t" + "ldr r6, [%[a], #428]\n\t" + "ldr r5, [%[b], #424]\n\t" + "ldr r7, [%[b], #428]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #424]\n\t" + "str r6, [%[r], #428]\n\t" + "ldr r4, [%[a], #432]\n\t" + "ldr r6, [%[a], #436]\n\t" + "ldr r5, [%[b], #432]\n\t" + "ldr r7, [%[b], #436]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #432]\n\t" + "str r6, [%[r], #436]\n\t" + "ldr r4, [%[a], #440]\n\t" + "ldr r6, [%[a], #444]\n\t" + "ldr r5, [%[b], #440]\n\t" + "ldr r7, [%[b], #444]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #440]\n\t" + "str r6, [%[r], #444]\n\t" + "ldr r4, [%[a], #448]\n\t" + "ldr r6, [%[a], #452]\n\t" + "ldr r5, [%[b], #448]\n\t" + "ldr r7, [%[b], #452]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #448]\n\t" + "str r6, [%[r], #452]\n\t" + "ldr r4, [%[a], #456]\n\t" + "ldr r6, [%[a], #460]\n\t" + "ldr r5, [%[b], #456]\n\t" + "ldr r7, [%[b], #460]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #456]\n\t" + "str r6, [%[r], #460]\n\t" + "ldr r4, [%[a], #464]\n\t" + "ldr r6, [%[a], #468]\n\t" + "ldr r5, [%[b], #464]\n\t" + "ldr r7, [%[b], #468]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #464]\n\t" + "str r6, [%[r], #468]\n\t" + "ldr r4, [%[a], #472]\n\t" + "ldr r6, [%[a], #476]\n\t" + "ldr r5, [%[b], #472]\n\t" + "ldr r7, [%[b], #476]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #472]\n\t" + "str r6, [%[r], #476]\n\t" + "ldr r4, [%[a], #480]\n\t" + "ldr r6, [%[a], #484]\n\t" + "ldr r5, [%[b], #480]\n\t" + "ldr r7, [%[b], #484]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #480]\n\t" + "str r6, [%[r], #484]\n\t" + "ldr r4, [%[a], #488]\n\t" + "ldr r6, [%[a], #492]\n\t" + "ldr r5, [%[b], #488]\n\t" + "ldr r7, [%[b], #492]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #488]\n\t" + "str r6, [%[r], #492]\n\t" + "ldr r4, [%[a], #496]\n\t" + "ldr r6, [%[a], #500]\n\t" + "ldr r5, [%[b], #496]\n\t" + "ldr r7, [%[b], #500]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #496]\n\t" + "str r6, [%[r], #500]\n\t" + "ldr r4, [%[a], #504]\n\t" + "ldr r6, [%[a], #508]\n\t" + "ldr r5, [%[b], #504]\n\t" + "ldr r7, [%[b], #508]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #504]\n\t" + "str r6, [%[r], #508]\n\t" + "sbc %[c], r9, r9\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + ); +#endif /* WOLFSSL_SP_SMALL */ + + return c; +} + +/* Reduce the number back to 4096 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_digit ca = 0; + + __asm__ __volatile__ ( + "# i = 0\n\t" + "mov r12, #0\n\t" + "ldr r10, [%[a], #0]\n\t" + "ldr r14, [%[a], #4]\n\t" + "\n1:\n\t" + "# mu = a[i] * mp\n\t" + "mul r8, %[mp], r10\n\t" + "# a[i+0] += m[0] * mu\n\t" + "ldr r7, [%[m], #0]\n\t" + "ldr r9, [%[a], #0]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" + "# a[i+1] += m[1] * mu\n\t" + "ldr r7, [%[m], #4]\n\t" + "ldr r9, [%[a], #4]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r10, r14, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r10, r10, r5\n\t" + "adc r4, r4, #0\n\t" + "# a[i+2] += m[2] * mu\n\t" + "ldr r7, [%[m], #8]\n\t" + "ldr r14, [%[a], #8]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r14, r14, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r14, r14, r4\n\t" + "adc r5, r5, #0\n\t" + "# a[i+3] += m[3] * mu\n\t" + "ldr r7, [%[m], #12]\n\t" + "ldr r9, [%[a], #12]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #12]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+4] += m[4] * mu\n\t" + "ldr r7, [%[m], #16]\n\t" + "ldr r9, [%[a], #16]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+5] += m[5] * mu\n\t" + "ldr r7, [%[m], #20]\n\t" + "ldr r9, [%[a], #20]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #20]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+6] += m[6] * mu\n\t" + "ldr r7, [%[m], #24]\n\t" + "ldr r9, [%[a], #24]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+7] += m[7] * mu\n\t" + "ldr r7, [%[m], #28]\n\t" + "ldr r9, [%[a], #28]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #28]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+8] += m[8] * mu\n\t" + "ldr r7, [%[m], #32]\n\t" + "ldr r9, [%[a], #32]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #32]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+9] += m[9] * mu\n\t" + "ldr r7, [%[m], #36]\n\t" + "ldr r9, [%[a], #36]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #36]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+10] += m[10] * mu\n\t" + "ldr r7, [%[m], #40]\n\t" + "ldr r9, [%[a], #40]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #40]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+11] += m[11] * mu\n\t" + "ldr r7, [%[m], #44]\n\t" + "ldr r9, [%[a], #44]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #44]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+12] += m[12] * mu\n\t" + "ldr r7, [%[m], #48]\n\t" + "ldr r9, [%[a], #48]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #48]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+13] += m[13] * mu\n\t" + "ldr r7, [%[m], #52]\n\t" + "ldr r9, [%[a], #52]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #52]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+14] += m[14] * mu\n\t" + "ldr r7, [%[m], #56]\n\t" + "ldr r9, [%[a], #56]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #56]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+15] += m[15] * mu\n\t" + "ldr r7, [%[m], #60]\n\t" + "ldr r9, [%[a], #60]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #60]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+16] += m[16] * mu\n\t" + "ldr r7, [%[m], #64]\n\t" + "ldr r9, [%[a], #64]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #64]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+17] += m[17] * mu\n\t" + "ldr r7, [%[m], #68]\n\t" + "ldr r9, [%[a], #68]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #68]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+18] += m[18] * mu\n\t" + "ldr r7, [%[m], #72]\n\t" + "ldr r9, [%[a], #72]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #72]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+19] += m[19] * mu\n\t" + "ldr r7, [%[m], #76]\n\t" + "ldr r9, [%[a], #76]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #76]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+20] += m[20] * mu\n\t" + "ldr r7, [%[m], #80]\n\t" + "ldr r9, [%[a], #80]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #80]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+21] += m[21] * mu\n\t" + "ldr r7, [%[m], #84]\n\t" + "ldr r9, [%[a], #84]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #84]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+22] += m[22] * mu\n\t" + "ldr r7, [%[m], #88]\n\t" + "ldr r9, [%[a], #88]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #88]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+23] += m[23] * mu\n\t" + "ldr r7, [%[m], #92]\n\t" + "ldr r9, [%[a], #92]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #92]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+24] += m[24] * mu\n\t" + "ldr r7, [%[m], #96]\n\t" + "ldr r9, [%[a], #96]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #96]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+25] += m[25] * mu\n\t" + "ldr r7, [%[m], #100]\n\t" + "ldr r9, [%[a], #100]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #100]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+26] += m[26] * mu\n\t" + "ldr r7, [%[m], #104]\n\t" + "ldr r9, [%[a], #104]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #104]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+27] += m[27] * mu\n\t" + "ldr r7, [%[m], #108]\n\t" + "ldr r9, [%[a], #108]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #108]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+28] += m[28] * mu\n\t" + "ldr r7, [%[m], #112]\n\t" + "ldr r9, [%[a], #112]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #112]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+29] += m[29] * mu\n\t" + "ldr r7, [%[m], #116]\n\t" + "ldr r9, [%[a], #116]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #116]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+30] += m[30] * mu\n\t" + "ldr r7, [%[m], #120]\n\t" + "ldr r9, [%[a], #120]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #120]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+31] += m[31] * mu\n\t" + "ldr r7, [%[m], #124]\n\t" + "ldr r9, [%[a], #124]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #124]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+32] += m[32] * mu\n\t" + "ldr r7, [%[m], #128]\n\t" + "ldr r9, [%[a], #128]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #128]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+33] += m[33] * mu\n\t" + "ldr r7, [%[m], #132]\n\t" + "ldr r9, [%[a], #132]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #132]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+34] += m[34] * mu\n\t" + "ldr r7, [%[m], #136]\n\t" + "ldr r9, [%[a], #136]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #136]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+35] += m[35] * mu\n\t" + "ldr r7, [%[m], #140]\n\t" + "ldr r9, [%[a], #140]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #140]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+36] += m[36] * mu\n\t" + "ldr r7, [%[m], #144]\n\t" + "ldr r9, [%[a], #144]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #144]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+37] += m[37] * mu\n\t" + "ldr r7, [%[m], #148]\n\t" + "ldr r9, [%[a], #148]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #148]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+38] += m[38] * mu\n\t" + "ldr r7, [%[m], #152]\n\t" + "ldr r9, [%[a], #152]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #152]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+39] += m[39] * mu\n\t" + "ldr r7, [%[m], #156]\n\t" + "ldr r9, [%[a], #156]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #156]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+40] += m[40] * mu\n\t" + "ldr r7, [%[m], #160]\n\t" + "ldr r9, [%[a], #160]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #160]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+41] += m[41] * mu\n\t" + "ldr r7, [%[m], #164]\n\t" + "ldr r9, [%[a], #164]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #164]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+42] += m[42] * mu\n\t" + "ldr r7, [%[m], #168]\n\t" + "ldr r9, [%[a], #168]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #168]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+43] += m[43] * mu\n\t" + "ldr r7, [%[m], #172]\n\t" + "ldr r9, [%[a], #172]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #172]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+44] += m[44] * mu\n\t" + "ldr r7, [%[m], #176]\n\t" + "ldr r9, [%[a], #176]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #176]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+45] += m[45] * mu\n\t" + "ldr r7, [%[m], #180]\n\t" + "ldr r9, [%[a], #180]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #180]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+46] += m[46] * mu\n\t" + "ldr r7, [%[m], #184]\n\t" + "ldr r9, [%[a], #184]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #184]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+47] += m[47] * mu\n\t" + "ldr r7, [%[m], #188]\n\t" + "ldr r9, [%[a], #188]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #188]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+48] += m[48] * mu\n\t" + "ldr r7, [%[m], #192]\n\t" + "ldr r9, [%[a], #192]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #192]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+49] += m[49] * mu\n\t" + "ldr r7, [%[m], #196]\n\t" + "ldr r9, [%[a], #196]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #196]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+50] += m[50] * mu\n\t" + "ldr r7, [%[m], #200]\n\t" + "ldr r9, [%[a], #200]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #200]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+51] += m[51] * mu\n\t" + "ldr r7, [%[m], #204]\n\t" + "ldr r9, [%[a], #204]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #204]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+52] += m[52] * mu\n\t" + "ldr r7, [%[m], #208]\n\t" + "ldr r9, [%[a], #208]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #208]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+53] += m[53] * mu\n\t" + "ldr r7, [%[m], #212]\n\t" + "ldr r9, [%[a], #212]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #212]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+54] += m[54] * mu\n\t" + "ldr r7, [%[m], #216]\n\t" + "ldr r9, [%[a], #216]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #216]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+55] += m[55] * mu\n\t" + "ldr r7, [%[m], #220]\n\t" + "ldr r9, [%[a], #220]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #220]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+56] += m[56] * mu\n\t" + "ldr r7, [%[m], #224]\n\t" + "ldr r9, [%[a], #224]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #224]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+57] += m[57] * mu\n\t" + "ldr r7, [%[m], #228]\n\t" + "ldr r9, [%[a], #228]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #228]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+58] += m[58] * mu\n\t" + "ldr r7, [%[m], #232]\n\t" + "ldr r9, [%[a], #232]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #232]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+59] += m[59] * mu\n\t" + "ldr r7, [%[m], #236]\n\t" + "ldr r9, [%[a], #236]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #236]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+60] += m[60] * mu\n\t" + "ldr r7, [%[m], #240]\n\t" + "ldr r9, [%[a], #240]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #240]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+61] += m[61] * mu\n\t" + "ldr r7, [%[m], #244]\n\t" + "ldr r9, [%[a], #244]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #244]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+62] += m[62] * mu\n\t" + "ldr r7, [%[m], #248]\n\t" + "ldr r9, [%[a], #248]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #248]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+63] += m[63] * mu\n\t" + "ldr r7, [%[m], #252]\n\t" + "ldr r9, [%[a], #252]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #252]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+64] += m[64] * mu\n\t" + "ldr r7, [%[m], #256]\n\t" + "ldr r9, [%[a], #256]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #256]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+65] += m[65] * mu\n\t" + "ldr r7, [%[m], #260]\n\t" + "ldr r9, [%[a], #260]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #260]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+66] += m[66] * mu\n\t" + "ldr r7, [%[m], #264]\n\t" + "ldr r9, [%[a], #264]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #264]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+67] += m[67] * mu\n\t" + "ldr r7, [%[m], #268]\n\t" + "ldr r9, [%[a], #268]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #268]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+68] += m[68] * mu\n\t" + "ldr r7, [%[m], #272]\n\t" + "ldr r9, [%[a], #272]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #272]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+69] += m[69] * mu\n\t" + "ldr r7, [%[m], #276]\n\t" + "ldr r9, [%[a], #276]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #276]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+70] += m[70] * mu\n\t" + "ldr r7, [%[m], #280]\n\t" + "ldr r9, [%[a], #280]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #280]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+71] += m[71] * mu\n\t" + "ldr r7, [%[m], #284]\n\t" + "ldr r9, [%[a], #284]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #284]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+72] += m[72] * mu\n\t" + "ldr r7, [%[m], #288]\n\t" + "ldr r9, [%[a], #288]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #288]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+73] += m[73] * mu\n\t" + "ldr r7, [%[m], #292]\n\t" + "ldr r9, [%[a], #292]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #292]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+74] += m[74] * mu\n\t" + "ldr r7, [%[m], #296]\n\t" + "ldr r9, [%[a], #296]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #296]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+75] += m[75] * mu\n\t" + "ldr r7, [%[m], #300]\n\t" + "ldr r9, [%[a], #300]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #300]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+76] += m[76] * mu\n\t" + "ldr r7, [%[m], #304]\n\t" + "ldr r9, [%[a], #304]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #304]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+77] += m[77] * mu\n\t" + "ldr r7, [%[m], #308]\n\t" + "ldr r9, [%[a], #308]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #308]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+78] += m[78] * mu\n\t" + "ldr r7, [%[m], #312]\n\t" + "ldr r9, [%[a], #312]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #312]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+79] += m[79] * mu\n\t" + "ldr r7, [%[m], #316]\n\t" + "ldr r9, [%[a], #316]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #316]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+80] += m[80] * mu\n\t" + "ldr r7, [%[m], #320]\n\t" + "ldr r9, [%[a], #320]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #320]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+81] += m[81] * mu\n\t" + "ldr r7, [%[m], #324]\n\t" + "ldr r9, [%[a], #324]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #324]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+82] += m[82] * mu\n\t" + "ldr r7, [%[m], #328]\n\t" + "ldr r9, [%[a], #328]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #328]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+83] += m[83] * mu\n\t" + "ldr r7, [%[m], #332]\n\t" + "ldr r9, [%[a], #332]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #332]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+84] += m[84] * mu\n\t" + "ldr r7, [%[m], #336]\n\t" + "ldr r9, [%[a], #336]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #336]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+85] += m[85] * mu\n\t" + "ldr r7, [%[m], #340]\n\t" + "ldr r9, [%[a], #340]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #340]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+86] += m[86] * mu\n\t" + "ldr r7, [%[m], #344]\n\t" + "ldr r9, [%[a], #344]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #344]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+87] += m[87] * mu\n\t" + "ldr r7, [%[m], #348]\n\t" + "ldr r9, [%[a], #348]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #348]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+88] += m[88] * mu\n\t" + "ldr r7, [%[m], #352]\n\t" + "ldr r9, [%[a], #352]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #352]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+89] += m[89] * mu\n\t" + "ldr r7, [%[m], #356]\n\t" + "ldr r9, [%[a], #356]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #356]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+90] += m[90] * mu\n\t" + "ldr r7, [%[m], #360]\n\t" + "ldr r9, [%[a], #360]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #360]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+91] += m[91] * mu\n\t" + "ldr r7, [%[m], #364]\n\t" + "ldr r9, [%[a], #364]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #364]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+92] += m[92] * mu\n\t" + "ldr r7, [%[m], #368]\n\t" + "ldr r9, [%[a], #368]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #368]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+93] += m[93] * mu\n\t" + "ldr r7, [%[m], #372]\n\t" + "ldr r9, [%[a], #372]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #372]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+94] += m[94] * mu\n\t" + "ldr r7, [%[m], #376]\n\t" + "ldr r9, [%[a], #376]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #376]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+95] += m[95] * mu\n\t" + "ldr r7, [%[m], #380]\n\t" + "ldr r9, [%[a], #380]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #380]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+96] += m[96] * mu\n\t" + "ldr r7, [%[m], #384]\n\t" + "ldr r9, [%[a], #384]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #384]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+97] += m[97] * mu\n\t" + "ldr r7, [%[m], #388]\n\t" + "ldr r9, [%[a], #388]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #388]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+98] += m[98] * mu\n\t" + "ldr r7, [%[m], #392]\n\t" + "ldr r9, [%[a], #392]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #392]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+99] += m[99] * mu\n\t" + "ldr r7, [%[m], #396]\n\t" + "ldr r9, [%[a], #396]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #396]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+100] += m[100] * mu\n\t" + "ldr r7, [%[m], #400]\n\t" + "ldr r9, [%[a], #400]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #400]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+101] += m[101] * mu\n\t" + "ldr r7, [%[m], #404]\n\t" + "ldr r9, [%[a], #404]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #404]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+102] += m[102] * mu\n\t" + "ldr r7, [%[m], #408]\n\t" + "ldr r9, [%[a], #408]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #408]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+103] += m[103] * mu\n\t" + "ldr r7, [%[m], #412]\n\t" + "ldr r9, [%[a], #412]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #412]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+104] += m[104] * mu\n\t" + "ldr r7, [%[m], #416]\n\t" + "ldr r9, [%[a], #416]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #416]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+105] += m[105] * mu\n\t" + "ldr r7, [%[m], #420]\n\t" + "ldr r9, [%[a], #420]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #420]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+106] += m[106] * mu\n\t" + "ldr r7, [%[m], #424]\n\t" + "ldr r9, [%[a], #424]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #424]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+107] += m[107] * mu\n\t" + "ldr r7, [%[m], #428]\n\t" + "ldr r9, [%[a], #428]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #428]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+108] += m[108] * mu\n\t" + "ldr r7, [%[m], #432]\n\t" + "ldr r9, [%[a], #432]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #432]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+109] += m[109] * mu\n\t" + "ldr r7, [%[m], #436]\n\t" + "ldr r9, [%[a], #436]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #436]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+110] += m[110] * mu\n\t" + "ldr r7, [%[m], #440]\n\t" + "ldr r9, [%[a], #440]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #440]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+111] += m[111] * mu\n\t" + "ldr r7, [%[m], #444]\n\t" + "ldr r9, [%[a], #444]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #444]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+112] += m[112] * mu\n\t" + "ldr r7, [%[m], #448]\n\t" + "ldr r9, [%[a], #448]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #448]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+113] += m[113] * mu\n\t" + "ldr r7, [%[m], #452]\n\t" + "ldr r9, [%[a], #452]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #452]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+114] += m[114] * mu\n\t" + "ldr r7, [%[m], #456]\n\t" + "ldr r9, [%[a], #456]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #456]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+115] += m[115] * mu\n\t" + "ldr r7, [%[m], #460]\n\t" + "ldr r9, [%[a], #460]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #460]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+116] += m[116] * mu\n\t" + "ldr r7, [%[m], #464]\n\t" + "ldr r9, [%[a], #464]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #464]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+117] += m[117] * mu\n\t" + "ldr r7, [%[m], #468]\n\t" + "ldr r9, [%[a], #468]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #468]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+118] += m[118] * mu\n\t" + "ldr r7, [%[m], #472]\n\t" + "ldr r9, [%[a], #472]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #472]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+119] += m[119] * mu\n\t" + "ldr r7, [%[m], #476]\n\t" + "ldr r9, [%[a], #476]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #476]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+120] += m[120] * mu\n\t" + "ldr r7, [%[m], #480]\n\t" + "ldr r9, [%[a], #480]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #480]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+121] += m[121] * mu\n\t" + "ldr r7, [%[m], #484]\n\t" + "ldr r9, [%[a], #484]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #484]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+122] += m[122] * mu\n\t" + "ldr r7, [%[m], #488]\n\t" + "ldr r9, [%[a], #488]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #488]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+123] += m[123] * mu\n\t" + "ldr r7, [%[m], #492]\n\t" + "ldr r9, [%[a], #492]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #492]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+124] += m[124] * mu\n\t" + "ldr r7, [%[m], #496]\n\t" + "ldr r9, [%[a], #496]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #496]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+125] += m[125] * mu\n\t" + "ldr r7, [%[m], #500]\n\t" + "ldr r9, [%[a], #500]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #500]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+126] += m[126] * mu\n\t" + "ldr r7, [%[m], #504]\n\t" + "ldr r9, [%[a], #504]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #504]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+127] += m[127] * mu\n\t" + "ldr r7, [%[m], #508]\n\t" + "ldr r9, [%[a], #508]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r5, r5, r6\n\t" + "adcs r7, r7, %[ca]\n\t" + "mov %[ca], #0\n\t" + "adc %[ca], %[ca], %[ca]\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #508]\n\t" + "ldr r9, [%[a], #512]\n\t" + "adcs r9, r9, r7\n\t" + "str r9, [%[a], #512]\n\t" + "adc %[ca], %[ca], #0\n\t" + "# i += 1\n\t" + "add %[a], %[a], #4\n\t" + "add r12, r12, #4\n\t" + "cmp r12, #512\n\t" + "blt 1b\n\t" + "str r10, [%[a], #0]\n\t" + "str r14, [%[a], #4]\n\t" + : [ca] "+r" (ca), [a] "+r" (a) + : [m] "r" (m), [mp] "r" (mp) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); + + sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - ca); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_mul_128(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_4096_mul_128(r, a, b); + sp_4096_mont_reduce_128(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_sqr_128(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_4096_sqr_128(r, a); + sp_4096_mont_reduce_128(r, m, mp); +} + +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, sp_digit div) +{ + sp_digit r = 0; + + __asm__ __volatile__ ( + "lsr r5, %[div], #1\n\t" + "add r5, r5, #1\n\t" + "mov r6, %[d0]\n\t" + "mov r7, %[d1]\n\t" + "# Do top 32\n\t" + "subs r8, r5, r7\n\t" + "sbc r8, r8, r8\n\t" + "add %[r], %[r], %[r]\n\t" + "sub %[r], %[r], r8\n\t" + "and r8, r8, r5\n\t" + "subs r7, r7, r8\n\t" + "# Next 30 bits\n\t" + "mov r4, #29\n\t" + "1:\n\t" + "movs r6, r6, lsl #1\n\t" + "adc r7, r7, r7\n\t" + "subs r8, r5, r7\n\t" + "sbc r8, r8, r8\n\t" + "add %[r], %[r], %[r]\n\t" + "sub %[r], %[r], r8\n\t" + "and r8, r8, r5\n\t" + "subs r7, r7, r8\n\t" + "subs r4, r4, #1\n\t" + "bpl 1b\n\t" + "add %[r], %[r], %[r]\n\t" + "add %[r], %[r], #1\n\t" + "umull r4, r5, %[r], %[div]\n\t" + "subs r4, %[d0], r4\n\t" + "sbc r5, %[d1], r5\n\t" + "add %[r], %[r], r5\n\t" + "umull r4, r5, %[r], %[div]\n\t" + "subs r4, %[d0], r4\n\t" + "sbc r5, %[d1], r5\n\t" + "add %[r], %[r], r5\n\t" + "subs r8, %[div], r4\n\t" + "sbc r8, r8, r8\n\t" + "sub %[r], %[r], r8\n\t" + : [r] "+r" (r) + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) + : "r4", "r5", "r6", "r7", "r8" + ); + return r; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_4096_mask_128(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<128; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 128; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static int32_t sp_4096_cmp_128(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = -1; + sp_digit one = 1; + + +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov r7, #0\n\t" + "mov r3, #-1\n\t" + "mov r6, #508\n\t" + "1:\n\t" + "ldr r4, [%[a], r6]\n\t" + "ldr r5, [%[b], r6]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "subs r6, r6, #4\n\t" + "bcs 1b\n\t" + "eor %[r], %[r], r3\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [one] "r" (one) + : "r3", "r4", "r5", "r6", "r7" + ); +#else + __asm__ __volatile__ ( + "mov r7, #0\n\t" + "mov r3, #-1\n\t" + "ldr r4, [%[a], #508]\n\t" + "ldr r5, [%[b], #508]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #504]\n\t" + "ldr r5, [%[b], #504]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #500]\n\t" + "ldr r5, [%[b], #500]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #496]\n\t" + "ldr r5, [%[b], #496]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #492]\n\t" + "ldr r5, [%[b], #492]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #488]\n\t" + "ldr r5, [%[b], #488]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #484]\n\t" + "ldr r5, [%[b], #484]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #480]\n\t" + "ldr r5, [%[b], #480]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #476]\n\t" + "ldr r5, [%[b], #476]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #472]\n\t" + "ldr r5, [%[b], #472]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #468]\n\t" + "ldr r5, [%[b], #468]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #464]\n\t" + "ldr r5, [%[b], #464]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #460]\n\t" + "ldr r5, [%[b], #460]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #456]\n\t" + "ldr r5, [%[b], #456]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #452]\n\t" + "ldr r5, [%[b], #452]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #448]\n\t" + "ldr r5, [%[b], #448]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #444]\n\t" + "ldr r5, [%[b], #444]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #440]\n\t" + "ldr r5, [%[b], #440]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #436]\n\t" + "ldr r5, [%[b], #436]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #432]\n\t" + "ldr r5, [%[b], #432]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #428]\n\t" + "ldr r5, [%[b], #428]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #424]\n\t" + "ldr r5, [%[b], #424]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #420]\n\t" + "ldr r5, [%[b], #420]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #416]\n\t" + "ldr r5, [%[b], #416]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #412]\n\t" + "ldr r5, [%[b], #412]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #408]\n\t" + "ldr r5, [%[b], #408]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #404]\n\t" + "ldr r5, [%[b], #404]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #400]\n\t" + "ldr r5, [%[b], #400]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #396]\n\t" + "ldr r5, [%[b], #396]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #392]\n\t" + "ldr r5, [%[b], #392]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #388]\n\t" + "ldr r5, [%[b], #388]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #384]\n\t" + "ldr r5, [%[b], #384]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #380]\n\t" + "ldr r5, [%[b], #380]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #376]\n\t" + "ldr r5, [%[b], #376]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #372]\n\t" + "ldr r5, [%[b], #372]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #368]\n\t" + "ldr r5, [%[b], #368]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #364]\n\t" + "ldr r5, [%[b], #364]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #360]\n\t" + "ldr r5, [%[b], #360]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #356]\n\t" + "ldr r5, [%[b], #356]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #352]\n\t" + "ldr r5, [%[b], #352]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #348]\n\t" + "ldr r5, [%[b], #348]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #344]\n\t" + "ldr r5, [%[b], #344]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #340]\n\t" + "ldr r5, [%[b], #340]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #336]\n\t" + "ldr r5, [%[b], #336]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #332]\n\t" + "ldr r5, [%[b], #332]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #328]\n\t" + "ldr r5, [%[b], #328]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #324]\n\t" + "ldr r5, [%[b], #324]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #320]\n\t" + "ldr r5, [%[b], #320]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #316]\n\t" + "ldr r5, [%[b], #316]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #312]\n\t" + "ldr r5, [%[b], #312]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #308]\n\t" + "ldr r5, [%[b], #308]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #304]\n\t" + "ldr r5, [%[b], #304]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #300]\n\t" + "ldr r5, [%[b], #300]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #296]\n\t" + "ldr r5, [%[b], #296]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #292]\n\t" + "ldr r5, [%[b], #292]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #288]\n\t" + "ldr r5, [%[b], #288]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #284]\n\t" + "ldr r5, [%[b], #284]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #280]\n\t" + "ldr r5, [%[b], #280]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #276]\n\t" + "ldr r5, [%[b], #276]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #272]\n\t" + "ldr r5, [%[b], #272]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #268]\n\t" + "ldr r5, [%[b], #268]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #264]\n\t" + "ldr r5, [%[b], #264]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #260]\n\t" + "ldr r5, [%[b], #260]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #256]\n\t" + "ldr r5, [%[b], #256]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #252]\n\t" + "ldr r5, [%[b], #252]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #248]\n\t" + "ldr r5, [%[b], #248]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #244]\n\t" + "ldr r5, [%[b], #244]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #240]\n\t" + "ldr r5, [%[b], #240]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #236]\n\t" + "ldr r5, [%[b], #236]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #232]\n\t" + "ldr r5, [%[b], #232]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #228]\n\t" + "ldr r5, [%[b], #228]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #224]\n\t" + "ldr r5, [%[b], #224]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #220]\n\t" + "ldr r5, [%[b], #220]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #216]\n\t" + "ldr r5, [%[b], #216]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #212]\n\t" + "ldr r5, [%[b], #212]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #208]\n\t" + "ldr r5, [%[b], #208]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #204]\n\t" + "ldr r5, [%[b], #204]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #200]\n\t" + "ldr r5, [%[b], #200]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #196]\n\t" + "ldr r5, [%[b], #196]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #192]\n\t" + "ldr r5, [%[b], #192]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #188]\n\t" + "ldr r5, [%[b], #188]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #184]\n\t" + "ldr r5, [%[b], #184]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #180]\n\t" + "ldr r5, [%[b], #180]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #176]\n\t" + "ldr r5, [%[b], #176]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #172]\n\t" + "ldr r5, [%[b], #172]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #168]\n\t" + "ldr r5, [%[b], #168]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #164]\n\t" + "ldr r5, [%[b], #164]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #160]\n\t" + "ldr r5, [%[b], #160]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #156]\n\t" + "ldr r5, [%[b], #156]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #152]\n\t" + "ldr r5, [%[b], #152]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #148]\n\t" + "ldr r5, [%[b], #148]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #144]\n\t" + "ldr r5, [%[b], #144]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #140]\n\t" + "ldr r5, [%[b], #140]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #136]\n\t" + "ldr r5, [%[b], #136]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #132]\n\t" + "ldr r5, [%[b], #132]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #128]\n\t" + "ldr r5, [%[b], #128]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #124]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[b], #120]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #116]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[b], #112]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #108]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[b], #104]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #100]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[b], #96]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #92]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[b], #88]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #84]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[b], #80]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #76]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[b], #72]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #68]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[b], #64]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #60]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[b], #56]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #52]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[b], #48]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "eor %[r], %[r], r3\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [one] "r" (one) + : "r3", "r4", "r5", "r6", "r7" + ); +#endif + + return r; +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_div_128(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[256], t2[129]; + sp_digit div, r1; + int i; + + (void)m; + + + div = d[127]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 128); + for (i=127; i>=0; i--) { + r1 = div_4096_word_128(t1[128 + i], t1[128 + i - 1], div); + + sp_4096_mul_d_128(t2, d, r1); + t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2); + t1[128 + i] -= t2[128]; + sp_4096_mask_128(t2, d, t1[128 + i]); + t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], t2); + sp_4096_mask_128(t2, d, t1[128 + i]); + t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], t2); + } + + r1 = sp_4096_cmp_128(t1, d) >= 0; + sp_4096_cond_sub_128(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_mod_128(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_4096_div_128(a, m, NULL, r); +} + +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_div_128_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[256], t2[129]; + sp_digit div, r1; + int i; + + (void)m; + + + div = d[127]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 128); + for (i=127; i>=0; i--) { + r1 = div_4096_word_128(t1[128 + i], t1[128 + i - 1], div); + + sp_4096_mul_d_128(t2, d, r1); + t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2); + t1[128 + i] -= t2[128]; + if (t1[128 + i] != 0) { + t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], d); + if (t1[128 + i] != 0) + t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], d); + } + } + + r1 = sp_4096_cmp_128(t1, d) >= 0; + sp_4096_cond_sub_128(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_mod_128_cond(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_4096_div_128_cond(a, m, NULL, r); +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \ + defined(WOLFSSL_HAVE_SP_DH) +#ifdef WOLFSSL_SP_SMALL +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[16][256]; +#else + sp_digit* t[16]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 256, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<16; i++) { + t[i] = td + i * 256; + } +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_128(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 128U); + if (reduceA != 0) { + err = sp_4096_mod_128(t[1] + 128, a, m); + if (err == MP_OKAY) { + err = sp_4096_mod_128(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 128, a, sizeof(sp_digit) * 128); + err = sp_4096_mod_128(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_4096_mont_sqr_128(t[ 2], t[ 1], m, mp); + sp_4096_mont_mul_128(t[ 3], t[ 2], t[ 1], m, mp); + sp_4096_mont_sqr_128(t[ 4], t[ 2], m, mp); + sp_4096_mont_mul_128(t[ 5], t[ 3], t[ 2], m, mp); + sp_4096_mont_sqr_128(t[ 6], t[ 3], m, mp); + sp_4096_mont_mul_128(t[ 7], t[ 4], t[ 3], m, mp); + sp_4096_mont_sqr_128(t[ 8], t[ 4], m, mp); + sp_4096_mont_mul_128(t[ 9], t[ 5], t[ 4], m, mp); + sp_4096_mont_sqr_128(t[10], t[ 5], m, mp); + sp_4096_mont_mul_128(t[11], t[ 6], t[ 5], m, mp); + sp_4096_mont_sqr_128(t[12], t[ 6], m, mp); + sp_4096_mont_mul_128(t[13], t[ 7], t[ 6], m, mp); + sp_4096_mont_sqr_128(t[14], t[ 7], m, mp); + sp_4096_mont_mul_128(t[15], t[ 8], t[ 7], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 4; + if (c == 32) { + c = 28; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 128); + for (; i>=0 || c>=4; ) { + if (c == 0) { + n = e[i--]; + y = n >> 28; + n <<= 4; + c = 28; + } + else if (c < 4) { + y = n >> 28; + n = e[i--]; + c = 4 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + } + + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + + sp_4096_mont_mul_128(r, r, t[y], m, mp); + } + + XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U); + sp_4096_mont_reduce_128(r, m, mp); + + mask = 0 - (sp_4096_cmp_128(r, m) >= 0); + sp_4096_cond_sub_128(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#else +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][256]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 256, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) { + t[i] = td + i * 256; + } +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_128(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 128U); + if (reduceA != 0) { + err = sp_4096_mod_128(t[1] + 128, a, m); + if (err == MP_OKAY) { + err = sp_4096_mod_128(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 128, a, sizeof(sp_digit) * 128); + err = sp_4096_mod_128(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_4096_mont_sqr_128(t[ 2], t[ 1], m, mp); + sp_4096_mont_mul_128(t[ 3], t[ 2], t[ 1], m, mp); + sp_4096_mont_sqr_128(t[ 4], t[ 2], m, mp); + sp_4096_mont_mul_128(t[ 5], t[ 3], t[ 2], m, mp); + sp_4096_mont_sqr_128(t[ 6], t[ 3], m, mp); + sp_4096_mont_mul_128(t[ 7], t[ 4], t[ 3], m, mp); + sp_4096_mont_sqr_128(t[ 8], t[ 4], m, mp); + sp_4096_mont_mul_128(t[ 9], t[ 5], t[ 4], m, mp); + sp_4096_mont_sqr_128(t[10], t[ 5], m, mp); + sp_4096_mont_mul_128(t[11], t[ 6], t[ 5], m, mp); + sp_4096_mont_sqr_128(t[12], t[ 6], m, mp); + sp_4096_mont_mul_128(t[13], t[ 7], t[ 6], m, mp); + sp_4096_mont_sqr_128(t[14], t[ 7], m, mp); + sp_4096_mont_mul_128(t[15], t[ 8], t[ 7], m, mp); + sp_4096_mont_sqr_128(t[16], t[ 8], m, mp); + sp_4096_mont_mul_128(t[17], t[ 9], t[ 8], m, mp); + sp_4096_mont_sqr_128(t[18], t[ 9], m, mp); + sp_4096_mont_mul_128(t[19], t[10], t[ 9], m, mp); + sp_4096_mont_sqr_128(t[20], t[10], m, mp); + sp_4096_mont_mul_128(t[21], t[11], t[10], m, mp); + sp_4096_mont_sqr_128(t[22], t[11], m, mp); + sp_4096_mont_mul_128(t[23], t[12], t[11], m, mp); + sp_4096_mont_sqr_128(t[24], t[12], m, mp); + sp_4096_mont_mul_128(t[25], t[13], t[12], m, mp); + sp_4096_mont_sqr_128(t[26], t[13], m, mp); + sp_4096_mont_mul_128(t[27], t[14], t[13], m, mp); + sp_4096_mont_sqr_128(t[28], t[14], m, mp); + sp_4096_mont_mul_128(t[29], t[15], t[14], m, mp); + sp_4096_mont_sqr_128(t[30], t[15], m, mp); + sp_4096_mont_mul_128(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 5; + if (c == 32) { + c = 27; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 128); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 27; + n <<= 5; + c = 27; + } + else if (c < 5) { + y = n >> 27; + n = e[i--]; + c = 5 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + + sp_4096_mont_mul_128(r, r, t[y], m, mp); + } + + XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U); + sp_4096_mont_reduce_128(r, m, mp); + + mask = 0 - (sp_4096_cmp_128(r, m) >= 0); + sp_4096_cond_sub_128(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* WOLFSSL_SP_SMALL */ +#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */ + +#ifdef WOLFSSL_HAVE_SP_RSA +/* RSA public key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * em Public exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm, + byte* out, word32* outLen) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit a[256], m[128], r[256]; +#else + sp_digit* d = NULL; + sp_digit* a; + sp_digit* m; + sp_digit* r; +#endif + sp_digit *ah; + sp_digit e[1]; + int err = MP_OKAY; + + if (*outLen < 512) + err = MP_TO_E; + if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 512 || + mp_count_bits(mm) != 4096)) + err = MP_READ_E; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 128 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + a = d; + r = a + 128 * 2; + m = r + 128 * 2; + } +#endif + + if (err == MP_OKAY) { + ah = a + 128; + + sp_4096_from_bin(ah, 128, in, inLen); +#if DIGIT_BIT >= 32 + e[0] = em->dp[0]; +#else + e[0] = em->dp[0]; + if (em->used > 1) { + e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + } +#endif + if (e[0] == 0) { + err = MP_EXPTMOD_E; + } + } + if (err == MP_OKAY) { + sp_4096_from_mp(m, 128, mm); + + if (e[0] == 0x3) { + if (err == MP_OKAY) { + sp_4096_sqr_128(r, ah); + err = sp_4096_mod_128_cond(r, r, m); + } + if (err == MP_OKAY) { + sp_4096_mul_128(r, ah, r); + err = sp_4096_mod_128_cond(r, r, m); + } + } + else { + int i; + sp_digit mp; + + sp_4096_mont_setup(m, &mp); + + /* Convert to Montgomery form. */ + XMEMSET(a, 0, sizeof(sp_digit) * 128); + err = sp_4096_mod_128_cond(a, a, m); + + if (err == MP_OKAY) { + for (i = 31; i >= 0; i--) { + if (e[0] >> i) { + break; + } + } + + XMEMCPY(r, a, sizeof(sp_digit) * 128); + for (i--; i>=0; i--) { + sp_4096_mont_sqr_128(r, r, m, mp); + if (((e[0] >> i) & 1) == 1) { + sp_4096_mont_mul_128(r, r, a, m, mp); + } + } + XMEMSET(&r[128], 0, sizeof(sp_digit) * 128); + sp_4096_mont_reduce_128(r, m, mp); + + for (i = 127; i > 0; i--) { + if (r[i] != m[i]) { + break; + } + } + if (r[i] >= m[i]) { + sp_4096_sub_in_place_128(r, m); + } + } + } + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } +#endif + + return err; +} + +#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) + sp_digit* a; + sp_digit* d = NULL; + sp_digit* m; + sp_digit* r; + int err = MP_OKAY; + + (void)pm; + (void)qm; + (void)dpm; + (void)dqm; + (void)qim; + + if (*outLen < 512U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(dm) > 4096) { + err = MP_READ_E; + } + if (inLen > 512) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 128 * 4, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) { + err = MEMORY_E; + } + } + if (err == MP_OKAY) { + a = d + 128; + m = a + 256; + r = a; + + sp_4096_from_bin(a, 128, in, inLen); + sp_4096_from_mp(d, 128, dm); + sp_4096_from_mp(m, 128, mm); + err = sp_4096_mod_exp_128(r, a, d, 4096, m, 0); + } + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + } + + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 128); + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + sp_digit c = 0; + +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov r9, #0\n\t" + "mov r8, #0\n\t" + "1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldr r4, [%[a], r8]\n\t" + "ldr r5, [%[b], r8]\n\t" + "and r5, r5, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adc %[c], r9, r9\n\t" + "str r4, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, #256\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + ); +#else + __asm__ __volatile__ ( + + "mov r9, #0\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r6, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adds r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #0]\n\t" + "str r6, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r7, [%[b], #12]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r6, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #16]\n\t" + "str r6, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r6, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r7, [%[b], #28]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r6, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r7, [%[b], #36]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #32]\n\t" + "str r6, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r6, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r7, [%[b], #44]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #40]\n\t" + "str r6, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r6, [%[a], #52]\n\t" + "ldr r5, [%[b], #48]\n\t" + "ldr r7, [%[b], #52]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #48]\n\t" + "str r6, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r6, [%[a], #60]\n\t" + "ldr r5, [%[b], #56]\n\t" + "ldr r7, [%[b], #60]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #56]\n\t" + "str r6, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r6, [%[a], #68]\n\t" + "ldr r5, [%[b], #64]\n\t" + "ldr r7, [%[b], #68]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #64]\n\t" + "str r6, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r6, [%[a], #76]\n\t" + "ldr r5, [%[b], #72]\n\t" + "ldr r7, [%[b], #76]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #72]\n\t" + "str r6, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r6, [%[a], #84]\n\t" + "ldr r5, [%[b], #80]\n\t" + "ldr r7, [%[b], #84]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #80]\n\t" + "str r6, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r6, [%[a], #92]\n\t" + "ldr r5, [%[b], #88]\n\t" + "ldr r7, [%[b], #92]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #88]\n\t" + "str r6, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r6, [%[a], #100]\n\t" + "ldr r5, [%[b], #96]\n\t" + "ldr r7, [%[b], #100]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #96]\n\t" + "str r6, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r6, [%[a], #108]\n\t" + "ldr r5, [%[b], #104]\n\t" + "ldr r7, [%[b], #108]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #104]\n\t" + "str r6, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r6, [%[a], #116]\n\t" + "ldr r5, [%[b], #112]\n\t" + "ldr r7, [%[b], #116]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #112]\n\t" + "str r6, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r6, [%[a], #124]\n\t" + "ldr r5, [%[b], #120]\n\t" + "ldr r7, [%[b], #124]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #120]\n\t" + "str r6, [%[r], #124]\n\t" + "ldr r4, [%[a], #128]\n\t" + "ldr r6, [%[a], #132]\n\t" + "ldr r5, [%[b], #128]\n\t" + "ldr r7, [%[b], #132]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #128]\n\t" + "str r6, [%[r], #132]\n\t" + "ldr r4, [%[a], #136]\n\t" + "ldr r6, [%[a], #140]\n\t" + "ldr r5, [%[b], #136]\n\t" + "ldr r7, [%[b], #140]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #136]\n\t" + "str r6, [%[r], #140]\n\t" + "ldr r4, [%[a], #144]\n\t" + "ldr r6, [%[a], #148]\n\t" + "ldr r5, [%[b], #144]\n\t" + "ldr r7, [%[b], #148]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #144]\n\t" + "str r6, [%[r], #148]\n\t" + "ldr r4, [%[a], #152]\n\t" + "ldr r6, [%[a], #156]\n\t" + "ldr r5, [%[b], #152]\n\t" + "ldr r7, [%[b], #156]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #152]\n\t" + "str r6, [%[r], #156]\n\t" + "ldr r4, [%[a], #160]\n\t" + "ldr r6, [%[a], #164]\n\t" + "ldr r5, [%[b], #160]\n\t" + "ldr r7, [%[b], #164]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #160]\n\t" + "str r6, [%[r], #164]\n\t" + "ldr r4, [%[a], #168]\n\t" + "ldr r6, [%[a], #172]\n\t" + "ldr r5, [%[b], #168]\n\t" + "ldr r7, [%[b], #172]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #168]\n\t" + "str r6, [%[r], #172]\n\t" + "ldr r4, [%[a], #176]\n\t" + "ldr r6, [%[a], #180]\n\t" + "ldr r5, [%[b], #176]\n\t" + "ldr r7, [%[b], #180]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #176]\n\t" + "str r6, [%[r], #180]\n\t" + "ldr r4, [%[a], #184]\n\t" + "ldr r6, [%[a], #188]\n\t" + "ldr r5, [%[b], #184]\n\t" + "ldr r7, [%[b], #188]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #184]\n\t" + "str r6, [%[r], #188]\n\t" + "ldr r4, [%[a], #192]\n\t" + "ldr r6, [%[a], #196]\n\t" + "ldr r5, [%[b], #192]\n\t" + "ldr r7, [%[b], #196]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #192]\n\t" + "str r6, [%[r], #196]\n\t" + "ldr r4, [%[a], #200]\n\t" + "ldr r6, [%[a], #204]\n\t" + "ldr r5, [%[b], #200]\n\t" + "ldr r7, [%[b], #204]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #200]\n\t" + "str r6, [%[r], #204]\n\t" + "ldr r4, [%[a], #208]\n\t" + "ldr r6, [%[a], #212]\n\t" + "ldr r5, [%[b], #208]\n\t" + "ldr r7, [%[b], #212]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #208]\n\t" + "str r6, [%[r], #212]\n\t" + "ldr r4, [%[a], #216]\n\t" + "ldr r6, [%[a], #220]\n\t" + "ldr r5, [%[b], #216]\n\t" + "ldr r7, [%[b], #220]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #216]\n\t" + "str r6, [%[r], #220]\n\t" + "ldr r4, [%[a], #224]\n\t" + "ldr r6, [%[a], #228]\n\t" + "ldr r5, [%[b], #224]\n\t" + "ldr r7, [%[b], #228]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #224]\n\t" + "str r6, [%[r], #228]\n\t" + "ldr r4, [%[a], #232]\n\t" + "ldr r6, [%[a], #236]\n\t" + "ldr r5, [%[b], #232]\n\t" + "ldr r7, [%[b], #236]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #232]\n\t" + "str r6, [%[r], #236]\n\t" + "ldr r4, [%[a], #240]\n\t" + "ldr r6, [%[a], #244]\n\t" + "ldr r5, [%[b], #240]\n\t" + "ldr r7, [%[b], #244]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #240]\n\t" + "str r6, [%[r], #244]\n\t" + "ldr r4, [%[a], #248]\n\t" + "ldr r6, [%[a], #252]\n\t" + "ldr r5, [%[b], #248]\n\t" + "ldr r7, [%[b], #252]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #248]\n\t" + "str r6, [%[r], #252]\n\t" + "adc %[c], r9, r9\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + ); +#endif /* WOLFSSL_SP_SMALL */ + + return c; +} + +/* RSA private key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * pm First prime. + * qm Second prime. + * dpm First prime's CRT exponent. + * dqm Second prime's CRT exponent. + * qim Inverse of second prime mod p. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, + mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm, + byte* out, word32* outLen) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit a[128 * 2]; + sp_digit p[64], q[64], dp[64]; + sp_digit tmpa[128], tmpb[128]; +#else + sp_digit* t = NULL; + sp_digit* a; + sp_digit* p; + sp_digit* q; + sp_digit* dp; + sp_digit* tmpa; + sp_digit* tmpb; +#endif + sp_digit* r; + sp_digit* qi; + sp_digit* dq; + sp_digit c; + int err = MP_OKAY; + + (void)dm; + (void)mm; + + if (*outLen < 512) + err = MP_TO_E; + if (err == MP_OKAY && (inLen > 512 || mp_count_bits(mm) != 4096)) + err = MP_READ_E; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 11, NULL, + DYNAMIC_TYPE_RSA); + if (t == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + a = t; + p = a + 128 * 2; + q = p + 64; + qi = dq = dp = q + 64; + tmpa = qi + 64; + tmpb = tmpa + 128; + + r = t + 128; + } +#else +#endif + + if (err == MP_OKAY) { +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + r = a; + qi = dq = dp; +#endif + sp_4096_from_bin(a, 128, in, inLen); + sp_4096_from_mp(p, 64, pm); + sp_4096_from_mp(q, 64, qm); + sp_4096_from_mp(dp, 64, dpm); + + err = sp_2048_mod_exp_64(tmpa, a, dp, 2048, p, 1); + } + if (err == MP_OKAY) { + sp_4096_from_mp(dq, 64, dqm); + err = sp_2048_mod_exp_64(tmpb, a, dq, 2048, q, 1); + } + + if (err == MP_OKAY) { + c = sp_2048_sub_in_place_64(tmpa, tmpb); + c += sp_4096_cond_add_64(tmpa, tmpa, p, c); + sp_4096_cond_add_64(tmpa, tmpa, p, c); + + sp_2048_from_mp(qi, 64, qim); + sp_2048_mul_64(tmpa, tmpa, qi); + err = sp_2048_mod_64(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { + sp_2048_mul_64(tmpa, q, tmpa); + XMEMSET(&tmpb[64], 0, sizeof(sp_digit) * 64); + sp_4096_add_128(r, tmpb, tmpa); + + sp_4096_to_bin(r, out); + *outLen = 512; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_digit) * 64 * 11); + XFREE(t, NULL, DYNAMIC_TYPE_RSA); + } +#else + XMEMSET(tmpa, 0, sizeof(tmpa)); + XMEMSET(tmpb, 0, sizeof(tmpb)); + XMEMSET(p, 0, sizeof(p)); + XMEMSET(q, 0, sizeof(q)); + XMEMSET(dp, 0, sizeof(dp)); +#endif + + return err; +} +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ +#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */ +#endif /* WOLFSSL_HAVE_SP_RSA */ +#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY)) +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_4096_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (4096 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 32 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 128); + r->used = 128; + mp_clamp(r); +#elif DIGIT_BIT < 32 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 128; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 32) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 32 - s; + } + r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 128; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 32 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 32 - s; + } + else { + s += 32; + } + } + r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ + int err = MP_OKAY; + sp_digit b[256], e[128], m[128]; + sp_digit* r = b; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_4096_from_mp(b, 128, base); + sp_4096_from_mp(e, 128, exp); + sp_4096_from_mp(m, 128, mod); + + err = sp_4096_mod_exp_128(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + err = sp_4096_to_mp(r, res); + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} + +#ifdef WOLFSSL_HAVE_SP_DH + +#ifdef HAVE_FFDHE_4096 +static void sp_4096_lshift_128(sp_digit* r, sp_digit* a, byte n) +{ + __asm__ __volatile__ ( + "mov r6, #31\n\t" + "sub r6, r6, %[n]\n\t" + "ldr r3, [%[a], #508]\n\t" + "lsr r4, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r4, r4, r6\n\t" + "ldr r2, [%[a], #504]\n\t" + "str r4, [%[r], #512]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #500]\n\t" + "str r3, [%[r], #508]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #496]\n\t" + "str r2, [%[r], #504]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #492]\n\t" + "str r4, [%[r], #500]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #488]\n\t" + "str r3, [%[r], #496]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #484]\n\t" + "str r2, [%[r], #492]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #480]\n\t" + "str r4, [%[r], #488]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #476]\n\t" + "str r3, [%[r], #484]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #472]\n\t" + "str r2, [%[r], #480]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #468]\n\t" + "str r4, [%[r], #476]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #464]\n\t" + "str r3, [%[r], #472]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #460]\n\t" + "str r2, [%[r], #468]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #456]\n\t" + "str r4, [%[r], #464]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #452]\n\t" + "str r3, [%[r], #460]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #448]\n\t" + "str r2, [%[r], #456]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #444]\n\t" + "str r4, [%[r], #452]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #440]\n\t" + "str r3, [%[r], #448]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #436]\n\t" + "str r2, [%[r], #444]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #432]\n\t" + "str r4, [%[r], #440]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #428]\n\t" + "str r3, [%[r], #436]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #424]\n\t" + "str r2, [%[r], #432]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #420]\n\t" + "str r4, [%[r], #428]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #416]\n\t" + "str r3, [%[r], #424]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #412]\n\t" + "str r2, [%[r], #420]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #408]\n\t" + "str r4, [%[r], #416]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #404]\n\t" + "str r3, [%[r], #412]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #400]\n\t" + "str r2, [%[r], #408]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #396]\n\t" + "str r4, [%[r], #404]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #392]\n\t" + "str r3, [%[r], #400]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #388]\n\t" + "str r2, [%[r], #396]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #384]\n\t" + "str r4, [%[r], #392]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #380]\n\t" + "str r3, [%[r], #388]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #376]\n\t" + "str r2, [%[r], #384]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #372]\n\t" + "str r4, [%[r], #380]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #368]\n\t" + "str r3, [%[r], #376]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #364]\n\t" + "str r2, [%[r], #372]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #360]\n\t" + "str r4, [%[r], #368]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #356]\n\t" + "str r3, [%[r], #364]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #352]\n\t" + "str r2, [%[r], #360]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #348]\n\t" + "str r4, [%[r], #356]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #344]\n\t" + "str r3, [%[r], #352]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #340]\n\t" + "str r2, [%[r], #348]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #336]\n\t" + "str r4, [%[r], #344]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #332]\n\t" + "str r3, [%[r], #340]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #328]\n\t" + "str r2, [%[r], #336]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #324]\n\t" + "str r4, [%[r], #332]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #320]\n\t" + "str r3, [%[r], #328]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #316]\n\t" + "str r2, [%[r], #324]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #312]\n\t" + "str r4, [%[r], #320]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #308]\n\t" + "str r3, [%[r], #316]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #304]\n\t" + "str r2, [%[r], #312]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #300]\n\t" + "str r4, [%[r], #308]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #296]\n\t" + "str r3, [%[r], #304]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #292]\n\t" + "str r2, [%[r], #300]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #288]\n\t" + "str r4, [%[r], #296]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #284]\n\t" + "str r3, [%[r], #292]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #280]\n\t" + "str r2, [%[r], #288]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #276]\n\t" + "str r4, [%[r], #284]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #272]\n\t" + "str r3, [%[r], #280]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #268]\n\t" + "str r2, [%[r], #276]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #264]\n\t" + "str r4, [%[r], #272]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #260]\n\t" + "str r3, [%[r], #268]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #256]\n\t" + "str r2, [%[r], #264]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #252]\n\t" + "str r4, [%[r], #260]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #248]\n\t" + "str r3, [%[r], #256]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #244]\n\t" + "str r2, [%[r], #252]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #240]\n\t" + "str r4, [%[r], #248]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #236]\n\t" + "str r3, [%[r], #244]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #232]\n\t" + "str r2, [%[r], #240]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #228]\n\t" + "str r4, [%[r], #236]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #224]\n\t" + "str r3, [%[r], #232]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #220]\n\t" + "str r2, [%[r], #228]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #216]\n\t" + "str r4, [%[r], #224]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #212]\n\t" + "str r3, [%[r], #220]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #208]\n\t" + "str r2, [%[r], #216]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #204]\n\t" + "str r4, [%[r], #212]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #200]\n\t" + "str r3, [%[r], #208]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #196]\n\t" + "str r2, [%[r], #204]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #192]\n\t" + "str r4, [%[r], #200]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #188]\n\t" + "str r3, [%[r], #196]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #184]\n\t" + "str r2, [%[r], #192]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #180]\n\t" + "str r4, [%[r], #188]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #176]\n\t" + "str r3, [%[r], #184]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #172]\n\t" + "str r2, [%[r], #180]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #168]\n\t" + "str r4, [%[r], #176]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #164]\n\t" + "str r3, [%[r], #172]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #160]\n\t" + "str r2, [%[r], #168]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #156]\n\t" + "str r4, [%[r], #164]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #152]\n\t" + "str r3, [%[r], #160]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #148]\n\t" + "str r2, [%[r], #156]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #144]\n\t" + "str r4, [%[r], #152]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #140]\n\t" + "str r3, [%[r], #148]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #136]\n\t" + "str r2, [%[r], #144]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #132]\n\t" + "str r4, [%[r], #140]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #128]\n\t" + "str r3, [%[r], #136]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #124]\n\t" + "str r2, [%[r], #132]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #120]\n\t" + "str r4, [%[r], #128]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #116]\n\t" + "str r3, [%[r], #124]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #112]\n\t" + "str r2, [%[r], #120]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #108]\n\t" + "str r4, [%[r], #116]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #104]\n\t" + "str r3, [%[r], #112]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #100]\n\t" + "str r2, [%[r], #108]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #96]\n\t" + "str r4, [%[r], #104]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #92]\n\t" + "str r3, [%[r], #100]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #88]\n\t" + "str r2, [%[r], #96]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #84]\n\t" + "str r4, [%[r], #92]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #80]\n\t" + "str r3, [%[r], #88]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #76]\n\t" + "str r2, [%[r], #84]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #72]\n\t" + "str r4, [%[r], #80]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #68]\n\t" + "str r3, [%[r], #76]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #64]\n\t" + "str r2, [%[r], #72]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #60]\n\t" + "str r4, [%[r], #68]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #56]\n\t" + "str r3, [%[r], #64]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #52]\n\t" + "str r2, [%[r], #60]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #48]\n\t" + "str r4, [%[r], #56]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #44]\n\t" + "str r3, [%[r], #52]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #40]\n\t" + "str r2, [%[r], #48]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #36]\n\t" + "str r4, [%[r], #44]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #32]\n\t" + "str r3, [%[r], #40]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #28]\n\t" + "str r2, [%[r], #36]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #24]\n\t" + "str r4, [%[r], #32]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #20]\n\t" + "str r3, [%[r], #28]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #16]\n\t" + "str r2, [%[r], #24]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #12]\n\t" + "str r4, [%[r], #20]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #8]\n\t" + "str r3, [%[r], #16]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #4]\n\t" + "str r2, [%[r], #12]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #0]\n\t" + "str r4, [%[r], #8]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "str r2, [%[r]]\n\t" + "str r3, [%[r], #4]\n\t" + : + : [r] "r" (r), [a] "r" (a), [n] "r" (n) + : "memory", "r2", "r3", "r4", "r5", "r6" + ); +} + +/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m) + * + * r A single precision number that is the result of the operation. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_2_128(sp_digit* r, const sp_digit* e, int bits, + const sp_digit* m) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit nd[256]; + sp_digit td[129]; +#else + sp_digit* td; +#endif + sp_digit* norm; + sp_digit* tmp; + sp_digit mp = 1; + sp_digit n, o; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 385, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + norm = td; + tmp = td + 256; +#else + norm = nd; + tmp = td; +#endif + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_128(norm, m); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 5; + if (c == 32) { + c = 27; + } + y = (int)(n >> c); + n <<= 32 - c; + sp_4096_lshift_128(r, norm, y); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 27; + n <<= 5; + c = 27; + } + else if (c < 5) { + y = n >> 27; + n = e[i--]; + c = 5 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + + sp_4096_lshift_128(r, r, y); + sp_4096_mul_d_128(tmp, norm, r[128]); + r[128] = 0; + o = sp_4096_add_128(r, r, tmp); + sp_4096_cond_sub_128(r, r, m, (sp_digit)0 - o); + } + + XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U); + sp_4096_mont_reduce_128(r, m, mp); + + mask = 0 - (sp_4096_cmp_128(r, m) >= 0); + sp_4096_cond_sub_128(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* HAVE_FFDHE_4096 */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. + * exp Array of bytes that is the exponent. + * expLen Length of data, in bytes, in exponent. + * mod Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Length, in bytes, of exponentiation result. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_DhExp_4096(mp_int* base, const byte* exp, word32 expLen, + mp_int* mod, byte* out, word32* outLen) +{ + int err = MP_OKAY; + sp_digit b[256], e[128], m[128]; + sp_digit* r = b; + word32 i; + + if (mp_count_bits(base) > 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expLen > 512) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_4096_from_mp(b, 128, base); + sp_4096_from_bin(e, 128, exp, expLen); + sp_4096_from_mp(m, 128, mod); + + #ifdef HAVE_FFDHE_4096 + if (base->used == 1 && base->dp[0] == 2 && m[127] == (sp_digit)-1) + err = sp_4096_mod_exp_2_128(r, e, expLen * 8, m); + else + #endif + err = sp_4096_mod_exp_128(r, b, e, expLen * 8, m, 0); + + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + for (i=0; i<512 && out[i] == 0; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} +#endif /* WOLFSSL_HAVE_SP_DH */ + +#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */ + +#endif /* WOLFSSL_SP_4096 */ + +#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */ +#ifdef WOLFSSL_HAVE_SP_ECC +#ifndef WOLFSSL_SP_NO_256 + +/* Point structure to use. */ +typedef struct sp_point_256 { + sp_digit x[2 * 8]; + sp_digit y[2 * 8]; + sp_digit z[2 * 8]; + int infinity; +} sp_point_256; + +/* The modulus (prime) of the curve P256. */ +static const sp_digit p256_mod[8] = { + 0xffffffff,0xffffffff,0xffffffff,0x00000000,0x00000000,0x00000000, + 0x00000001,0xffffffff +}; +/* The Montogmery normalizer for modulus of the curve P256. */ +static const sp_digit p256_norm_mod[8] = { + 0x00000001,0x00000000,0x00000000,0xffffffff,0xffffffff,0xffffffff, + 0xfffffffe,0x00000000 +}; +/* The Montogmery multiplier for modulus of the curve P256. */ +static const sp_digit p256_mp_mod = 0x00000001; +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +/* The order of the curve P256. */ +static const sp_digit p256_order[8] = { + 0xfc632551,0xf3b9cac2,0xa7179e84,0xbce6faad,0xffffffff,0xffffffff, + 0x00000000,0xffffffff +}; +#endif +/* The order of the curve P256 minus 2. */ +static const sp_digit p256_order2[8] = { + 0xfc63254f,0xf3b9cac2,0xa7179e84,0xbce6faad,0xffffffff,0xffffffff, + 0x00000000,0xffffffff +}; +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montogmery normalizer for order of the curve P256. */ +static const sp_digit p256_norm_order[8] = { + 0x039cdaaf,0x0c46353d,0x58e8617b,0x43190552,0x00000000,0x00000000, + 0xffffffff,0x00000000 +}; +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montogmery multiplier for order of the curve P256. */ +static const sp_digit p256_mp_order = 0xee00bc4f; +#endif +/* The base point of curve P256. */ +static const sp_point_256 p256_base = { + /* X ordinate */ + { + 0xd898c296,0xf4a13945,0x2deb33a0,0x77037d81,0x63a440f2,0xf8bce6e5, + 0xe12c4247,0x6b17d1f2, + 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L + }, + /* Y ordinate */ + { + 0x37bf51f5,0xcbb64068,0x6b315ece,0x2bce3357,0x7c0f9e16,0x8ee7eb4a, + 0xfe1a7f9b,0x4fe342e2, + 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L + }, + /* Z ordinate */ + { + 0x00000001,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000, + 0x00000000,0x00000000, + 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L + }, + /* infinity */ + 0 +}; +#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY) +static const sp_digit p256_b[8] = { + 0x27d2604b,0x3bce3c3e,0xcc53b0f6,0x651d06b0,0x769886bc,0xb3ebbd55, + 0xaa3a93e7,0x5ac635d8 +}; +#endif + +static int sp_256_point_new_ex_8(void* heap, sp_point_256* sp, sp_point_256** p) +{ + int ret = MP_OKAY; + (void)heap; +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + (void)sp; + *p = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC); +#else + *p = sp; +#endif + if (*p == NULL) { + ret = MEMORY_E; + } + return ret; +} + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +/* Allocate memory for point and return error. */ +#define sp_256_point_new_8(heap, sp, p) sp_256_point_new_ex_8((heap), NULL, &(p)) +#else +/* Set pointer to data and return no error. */ +#define sp_256_point_new_8(heap, sp, p) sp_256_point_new_ex_8((heap), &(sp), &(p)) +#endif + + +static void sp_256_point_free_8(sp_point_256* p, int clear, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +/* If valid pointer then clear point data if requested and free data. */ + if (p != NULL) { + if (clear != 0) { + XMEMSET(p, 0, sizeof(*p)); + } + XFREE(p, heap, DYNAMIC_TYPE_ECC); + } +#else +/* Clear point data if requested. */ + if (clear != 0) { + XMEMSET(p, 0, sizeof(*p)); + } +#endif + (void)heap; +} + +/* Multiply a number by Montogmery normalizer mod modulus (prime). + * + * r The resulting Montgomery form number. + * a The number to convert. + * m The modulus (prime). + */ +static int sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + (void)m; + + __asm__ __volatile__ ( + "sub sp, sp, #24\n\t" + "ldr r2, [%[a], #0]\n\t" + "ldr r3, [%[a], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[a], #12]\n\t" + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[a], #20]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[a], #28]\n\t" + "# Clear overflow and underflow\n\t" + "mov r14, #0\n\t" + "mov r12, #0\n\t" + "# t[0] = 1 1 0 -1 -1 -1 -1 0\n\t" + "adds r10, r2, r3\n\t" + "adc r14, r14, #0\n\t" + "subs r10, r10, r5\n\t" + "sbc r12, r12, #0\n\t" + "subs r10, r10, r6\n\t" + "sbc r12, r12, #0\n\t" + "subs r10, r10, r7\n\t" + "sbc r12, r12, #0\n\t" + "subs r10, r10, r8\n\t" + "sbc r12, r12, #0\n\t" + "# Store t[0]\n\t" + "str r10, [sp, #0]\n\t" + "neg r12, r12\n\t" + "mov r10, #0\n\t" + "# t[1] = 0 1 1 0 -1 -1 -1 -1\n\t" + "adds r14, r14, r3\n\t" + "adc r10, r10, #0\n\t" + "adds r14, r14, r4\n\t" + "adc r10, r10, #0\n\t" + "subs r14, r14, r12\n\t" + "mov r12, #0\n\t" + "sbc r12, r12, #0\n\t" + "subs r14, r14, r6\n\t" + "sbc r12, r12, #0\n\t" + "subs r14, r14, r7\n\t" + "sbc r12, r12, #0\n\t" + "subs r14, r14, r8\n\t" + "sbc r12, r12, #0\n\t" + "subs r14, r14, r9\n\t" + "sbc r12, r12, #0\n\t" + "# Store t[1]\n\t" + "str r14, [sp, #4]\n\t" + "neg r12, r12\n\t" + "mov r14, #0\n\t" + "# t[2] = 0 0 1 1 0 -1 -1 -1\n\t" + "adds r10, r10, r4\n\t" + "adc r14, r14, #0\n\t" + "adds r10, r10, r5\n\t" + "adc r14, r14, #0\n\t" + "subs r10, r10, r12\n\t" + "mov r12, #0\n\t" + "sbc r12, r12, #0\n\t" + "subs r10, r10, r7\n\t" + "sbc r12, r12, #0\n\t" + "subs r10, r10, r8\n\t" + "sbc r12, r12, #0\n\t" + "subs r10, r10, r9\n\t" + "sbc r12, r12, #0\n\t" + "# Store t[2]\n\t" + "str r10, [sp, #8]\n\t" + "neg r12, r12\n\t" + "mov r10, #0\n\t" + "# t[3] = -1 -1 0 2 2 1 0 -1\n\t" + "adds r14, r14, r5\n\t" + "adc r10, r10, #0\n\t" + "adds r14, r14, r5\n\t" + "adc r10, r10, #0\n\t" + "adds r14, r14, r6\n\t" + "adc r10, r10, #0\n\t" + "adds r14, r14, r6\n\t" + "adc r10, r10, #0\n\t" + "adds r14, r14, r7\n\t" + "adc r10, r10, #0\n\t" + "subs r14, r14, r12\n\t" + "mov r12, #0\n\t" + "sbc r12, r12, #0\n\t" + "subs r14, r14, r2\n\t" + "sbc r12, r12, #0\n\t" + "subs r14, r14, r3\n\t" + "sbc r12, r12, #0\n\t" + "subs r14, r14, r9\n\t" + "sbc r12, r12, #0\n\t" + "# Store t[3]\n\t" + "str r14, [sp, #12]\n\t" + "neg r12, r12\n\t" + "mov r14, #0\n\t" + "# t[4] = 0 -1 -1 0 2 2 1 0\n\t" + "adds r10, r10, r6\n\t" + "adc r14, r14, #0\n\t" + "adds r10, r10, r6\n\t" + "adc r14, r14, #0\n\t" + "adds r10, r10, r7\n\t" + "adc r14, r14, #0\n\t" + "adds r10, r10, r7\n\t" + "adc r14, r14, #0\n\t" + "adds r10, r10, r8\n\t" + "adc r14, r14, #0\n\t" + "subs r10, r10, r12\n\t" + "mov r12, #0\n\t" + "sbc r12, r12, #0\n\t" + "subs r10, r10, r3\n\t" + "sbc r12, r12, #0\n\t" + "subs r10, r10, r4\n\t" + "sbc r12, r12, #0\n\t" + "# Store t[4]\n\t" + "str r10, [sp, #16]\n\t" + "neg r12, r12\n\t" + "mov r10, #0\n\t" + "# t[5] = 0 0 -1 -1 0 2 2 1\n\t" + "adds r14, r14, r7\n\t" + "adc r10, r10, #0\n\t" + "adds r14, r14, r7\n\t" + "adc r10, r10, #0\n\t" + "adds r14, r14, r8\n\t" + "adc r10, r10, #0\n\t" + "adds r14, r14, r8\n\t" + "adc r10, r10, #0\n\t" + "adds r14, r14, r9\n\t" + "adc r10, r10, #0\n\t" + "subs r14, r14, r12\n\t" + "mov r12, #0\n\t" + "sbc r12, r12, #0\n\t" + "subs r14, r14, r4\n\t" + "sbc r12, r12, #0\n\t" + "subs r14, r14, r5\n\t" + "sbc r12, r12, #0\n\t" + "# Store t[5]\n\t" + "str r14, [sp, #20]\n\t" + "neg r12, r12\n\t" + "mov r14, #0\n\t" + "# t[6] = -1 -1 0 0 0 1 3 2\n\t" + "adds r10, r10, r7\n\t" + "adc r14, r14, #0\n\t" + "adds r10, r10, r8\n\t" + "adc r14, r14, #0\n\t" + "adds r10, r10, r8\n\t" + "adc r14, r14, #0\n\t" + "adds r10, r10, r8\n\t" + "adc r14, r14, #0\n\t" + "adds r10, r10, r9\n\t" + "adc r14, r14, #0\n\t" + "adds r10, r10, r9\n\t" + "adc r14, r14, #0\n\t" + "subs r10, r10, r12\n\t" + "mov r12, #0\n\t" + "sbc r12, r12, #0\n\t" + "subs r10, r10, r2\n\t" + "sbc r12, r12, #0\n\t" + "subs r10, r10, r3\n\t" + "sbc r12, r12, #0\n\t" + "# Store t[6]\n\t" + "mov r8, r10\n\t" + "neg r12, r12\n\t" + "mov r10, #0\n\t" + "# t[7] = 1 0 -1 -1 -1 -1 0 3\n\t" + "adds r14, r14, r2\n\t" + "adc r10, r10, #0\n\t" + "adds r14, r14, r9\n\t" + "adc r10, r10, #0\n\t" + "adds r14, r14, r9\n\t" + "adc r10, r10, #0\n\t" + "adds r14, r14, r9\n\t" + "adc r10, r10, #0\n\t" + "subs r14, r14, r12\n\t" + "mov r12, #0\n\t" + "sbc r12, r12, #0\n\t" + "subs r14, r14, r4\n\t" + "sbc r12, r12, #0\n\t" + "subs r14, r14, r5\n\t" + "sbc r12, r12, #0\n\t" + "subs r14, r14, r6\n\t" + "sbc r12, r12, #0\n\t" + "subs r14, r14, r7\n\t" + "sbc r12, r12, #0\n\t" + "# Store t[7]\n\t" + "# Load intermediate\n\t" + "ldr r2, [sp, #0]\n\t" + "ldr r3, [sp, #4]\n\t" + "ldr r4, [sp, #8]\n\t" + "ldr r5, [sp, #12]\n\t" + "ldr r6, [sp, #16]\n\t" + "ldr r7, [sp, #20]\n\t" + "neg r12, r12\n\t" + "# Add overflow\n\t" + "# Subtract underflow - add neg underflow\n\t" + "adds r2, r2, r10\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "adds r5, r5, r12\n\t" + "adcs r6, r6, #0\n\t" + "adcs r7, r7, #0\n\t" + "adcs r8, r8, r12\n\t" + "adc r14, r14, r10\n\t" + "# Subtract overflow\n\t" + "# Add underflow - subtract neg underflow\n\t" + "subs r2, r2, r12\n\t" + "sbcs r3, r3, #0\n\t" + "sbcs r4, r4, #0\n\t" + "subs r5, r5, r10\n\t" + "sbcs r6, r6, #0\n\t" + "sbcs r7, r7, #0\n\t" + "sbcs r8, r8, r10\n\t" + "sbc r14, r14, r12\n\t" + "# Store result\n\t" + "str r2, [%[r], #0]\n\t" + "str r3, [%[r], #4]\n\t" + "str r4, [%[r], #8]\n\t" + "str r5, [%[r], #12]\n\t" + "str r6, [%[r], #16]\n\t" + "str r7, [%[r], #20]\n\t" + "str r8, [%[r], #24]\n\t" + "str r14, [%[r], #28]\n\t" + "add sp, sp, #24\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); + + return MP_OKAY; +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_256_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 32 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 32 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 32U) <= (word32)DIGIT_BIT) { + s += 32U; + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 32) { + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + s = 32 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Convert a point of type ecc_point to type sp_point_256. + * + * p Point of type sp_point_256 (result). + * pm Point of type ecc_point. + */ +static void sp_256_point_from_ecc_point_8(sp_point_256* p, const ecc_point* pm) +{ + XMEMSET(p->x, 0, sizeof(p->x)); + XMEMSET(p->y, 0, sizeof(p->y)); + XMEMSET(p->z, 0, sizeof(p->z)); + sp_256_from_mp(p->x, 8, pm->x); + sp_256_from_mp(p->y, 8, pm->y); + sp_256_from_mp(p->z, 8, pm->z); + p->infinity = 0; +} + +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_256_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (256 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 32 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 8); + r->used = 8; + mp_clamp(r); +#elif DIGIT_BIT < 32 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 8; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 32) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 32 - s; + } + r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 8; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 32 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 32 - s; + } + else { + s += 32; + } + } + r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Convert a point of type sp_point_256 to type ecc_point. + * + * p Point of type sp_point_256. + * pm Point of type ecc_point (result). + * returns MEMORY_E when allocation of memory in ecc_point fails otherwise + * MP_OKAY. + */ +static int sp_256_point_to_ecc_point_8(const sp_point_256* p, ecc_point* pm) +{ + int err; + + err = sp_256_to_mp(p->x, pm->x); + if (err == MP_OKAY) { + err = sp_256_to_mp(p->y, pm->y); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->z, pm->z); + } + + return err; +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +SP_NOINLINE static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + (void)mp; + (void)m; + + __asm__ __volatile__ ( + "sub sp, sp, #68\n\t" + "mov r5, #0\n\t" + "# A[0] * B[0]\n\t" + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[b], #0]\n\t" + "umull r8, r9, r6, r7\n\t" + "str r8, [sp, #0]\n\t" + "# A[0] * B[1]\n\t" + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r9, r3, r9\n\t" + "adc r10, r4, #0\n\t" + "# A[1] * B[0]\n\t" + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[b], #0]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r9, r3, r9\n\t" + "adcs r10, r4, r10\n\t" + "adc r14, r5, #0\n\t" + "str r9, [sp, #4]\n\t" + "# A[0] * B[2]\n\t" + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[b], #8]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r10, r3, r10\n\t" + "adc r14, r4, r14\n\t" + "# A[1] * B[1]\n\t" + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[b], #4]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r10, r3, r10\n\t" + "adcs r14, r4, r14\n\t" + "adc r8, r5, #0\n\t" + "# A[2] * B[0]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #0]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r10, r3, r10\n\t" + "adcs r14, r4, r14\n\t" + "adc r8, r5, r8\n\t" + "str r10, [sp, #8]\n\t" + "# A[0] * B[3]\n\t" + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[b], #12]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r14, r3, r14\n\t" + "adcs r8, r4, r8\n\t" + "adc r9, r5, #0\n\t" + "# A[1] * B[2]\n\t" + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[b], #8]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r14, r3, r14\n\t" + "adcs r8, r4, r8\n\t" + "adc r9, r5, r9\n\t" + "# A[2] * B[1]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #4]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r14, r3, r14\n\t" + "adcs r8, r4, r8\n\t" + "adc r9, r5, r9\n\t" + "# A[3] * B[0]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #0]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r14, r3, r14\n\t" + "adcs r8, r4, r8\n\t" + "adc r9, r5, r9\n\t" + "str r14, [sp, #12]\n\t" + "# A[0] * B[4]\n\t" + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[b], #16]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r8, r3, r8\n\t" + "adcs r9, r4, r9\n\t" + "adc r10, r5, #0\n\t" + "# A[1] * B[3]\n\t" + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[b], #12]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r8, r3, r8\n\t" + "adcs r9, r4, r9\n\t" + "adc r10, r5, r10\n\t" + "# A[2] * B[2]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #8]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r8, r3, r8\n\t" + "adcs r9, r4, r9\n\t" + "adc r10, r5, r10\n\t" + "# A[3] * B[1]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #4]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r8, r3, r8\n\t" + "adcs r9, r4, r9\n\t" + "adc r10, r5, r10\n\t" + "# A[4] * B[0]\n\t" + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #0]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r8, r3, r8\n\t" + "adcs r9, r4, r9\n\t" + "adc r10, r5, r10\n\t" + "str r8, [sp, #16]\n\t" + "# A[0] * B[5]\n\t" + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[b], #20]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r9, r3, r9\n\t" + "adcs r10, r4, r10\n\t" + "adc r14, r5, #0\n\t" + "# A[1] * B[4]\n\t" + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[b], #16]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r9, r3, r9\n\t" + "adcs r10, r4, r10\n\t" + "adc r14, r5, r14\n\t" + "# A[2] * B[3]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #12]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r9, r3, r9\n\t" + "adcs r10, r4, r10\n\t" + "adc r14, r5, r14\n\t" + "# A[3] * B[2]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #8]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r9, r3, r9\n\t" + "adcs r10, r4, r10\n\t" + "adc r14, r5, r14\n\t" + "# A[4] * B[1]\n\t" + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #4]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r9, r3, r9\n\t" + "adcs r10, r4, r10\n\t" + "adc r14, r5, r14\n\t" + "# A[5] * B[0]\n\t" + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[b], #0]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r9, r3, r9\n\t" + "adcs r10, r4, r10\n\t" + "adc r14, r5, r14\n\t" + "str r9, [sp, #20]\n\t" + "# A[0] * B[6]\n\t" + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[b], #24]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r10, r3, r10\n\t" + "adcs r14, r4, r14\n\t" + "adc r8, r5, #0\n\t" + "# A[1] * B[5]\n\t" + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[b], #20]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r10, r3, r10\n\t" + "adcs r14, r4, r14\n\t" + "adc r8, r5, r8\n\t" + "# A[2] * B[4]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #16]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r10, r3, r10\n\t" + "adcs r14, r4, r14\n\t" + "adc r8, r5, r8\n\t" + "# A[3] * B[3]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #12]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r10, r3, r10\n\t" + "adcs r14, r4, r14\n\t" + "adc r8, r5, r8\n\t" + "# A[4] * B[2]\n\t" + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #8]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r10, r3, r10\n\t" + "adcs r14, r4, r14\n\t" + "adc r8, r5, r8\n\t" + "# A[5] * B[1]\n\t" + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[b], #4]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r10, r3, r10\n\t" + "adcs r14, r4, r14\n\t" + "adc r8, r5, r8\n\t" + "# A[6] * B[0]\n\t" + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #0]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r10, r3, r10\n\t" + "adcs r14, r4, r14\n\t" + "adc r8, r5, r8\n\t" + "str r10, [sp, #24]\n\t" + "# A[0] * B[7]\n\t" + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[b], #28]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r14, r3, r14\n\t" + "adcs r8, r4, r8\n\t" + "adc r9, r5, #0\n\t" + "# A[1] * B[6]\n\t" + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[b], #24]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r14, r3, r14\n\t" + "adcs r8, r4, r8\n\t" + "adc r9, r5, r9\n\t" + "# A[2] * B[5]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #20]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r14, r3, r14\n\t" + "adcs r8, r4, r8\n\t" + "adc r9, r5, r9\n\t" + "# A[3] * B[4]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #16]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r14, r3, r14\n\t" + "adcs r8, r4, r8\n\t" + "adc r9, r5, r9\n\t" + "# A[4] * B[3]\n\t" + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #12]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r14, r3, r14\n\t" + "adcs r8, r4, r8\n\t" + "adc r9, r5, r9\n\t" + "# A[5] * B[2]\n\t" + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[b], #8]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r14, r3, r14\n\t" + "adcs r8, r4, r8\n\t" + "adc r9, r5, r9\n\t" + "# A[6] * B[1]\n\t" + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #4]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r14, r3, r14\n\t" + "adcs r8, r4, r8\n\t" + "adc r9, r5, r9\n\t" + "# A[7] * B[0]\n\t" + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[b], #0]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r14, r3, r14\n\t" + "adcs r8, r4, r8\n\t" + "adc r9, r5, r9\n\t" + "str r14, [sp, #28]\n\t" + "# A[1] * B[7]\n\t" + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[b], #28]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r8, r3, r8\n\t" + "adcs r9, r4, r9\n\t" + "adc r10, r5, #0\n\t" + "# A[2] * B[6]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #24]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r8, r3, r8\n\t" + "adcs r9, r4, r9\n\t" + "adc r10, r5, r10\n\t" + "# A[3] * B[5]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #20]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r8, r3, r8\n\t" + "adcs r9, r4, r9\n\t" + "adc r10, r5, r10\n\t" + "# A[4] * B[4]\n\t" + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #16]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r8, r3, r8\n\t" + "adcs r9, r4, r9\n\t" + "adc r10, r5, r10\n\t" + "# A[5] * B[3]\n\t" + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[b], #12]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r8, r3, r8\n\t" + "adcs r9, r4, r9\n\t" + "adc r10, r5, r10\n\t" + "# A[6] * B[2]\n\t" + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #8]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r8, r3, r8\n\t" + "adcs r9, r4, r9\n\t" + "adc r10, r5, r10\n\t" + "# A[7] * B[1]\n\t" + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[b], #4]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r8, r3, r8\n\t" + "adcs r9, r4, r9\n\t" + "adc r10, r5, r10\n\t" + "str r8, [sp, #32]\n\t" + "# A[2] * B[7]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #28]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r9, r3, r9\n\t" + "adcs r10, r4, r10\n\t" + "adc r14, r5, #0\n\t" + "# A[3] * B[6]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #24]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r9, r3, r9\n\t" + "adcs r10, r4, r10\n\t" + "adc r14, r5, r14\n\t" + "# A[4] * B[5]\n\t" + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r9, r3, r9\n\t" + "adcs r10, r4, r10\n\t" + "adc r14, r5, r14\n\t" + "# A[5] * B[4]\n\t" + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[b], #16]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r9, r3, r9\n\t" + "adcs r10, r4, r10\n\t" + "adc r14, r5, r14\n\t" + "# A[6] * B[3]\n\t" + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #12]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r9, r3, r9\n\t" + "adcs r10, r4, r10\n\t" + "adc r14, r5, r14\n\t" + "# A[7] * B[2]\n\t" + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[b], #8]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r9, r3, r9\n\t" + "adcs r10, r4, r10\n\t" + "adc r14, r5, r14\n\t" + "str r9, [sp, #36]\n\t" + "# A[3] * B[7]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #28]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r10, r3, r10\n\t" + "adcs r14, r4, r14\n\t" + "adc r8, r5, #0\n\t" + "# A[4] * B[6]\n\t" + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #24]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r10, r3, r10\n\t" + "adcs r14, r4, r14\n\t" + "adc r8, r5, r8\n\t" + "# A[5] * B[5]\n\t" + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[b], #20]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r10, r3, r10\n\t" + "adcs r14, r4, r14\n\t" + "adc r8, r5, r8\n\t" + "# A[6] * B[4]\n\t" + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #16]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r10, r3, r10\n\t" + "adcs r14, r4, r14\n\t" + "adc r8, r5, r8\n\t" + "# A[7] * B[3]\n\t" + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[b], #12]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r10, r3, r10\n\t" + "adcs r14, r4, r14\n\t" + "adc r8, r5, r8\n\t" + "str r10, [sp, #40]\n\t" + "# A[4] * B[7]\n\t" + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #28]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r14, r3, r14\n\t" + "adcs r8, r4, r8\n\t" + "adc r9, r5, #0\n\t" + "# A[5] * B[6]\n\t" + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[b], #24]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r14, r3, r14\n\t" + "adcs r8, r4, r8\n\t" + "adc r9, r5, r9\n\t" + "# A[6] * B[5]\n\t" + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #20]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r14, r3, r14\n\t" + "adcs r8, r4, r8\n\t" + "adc r9, r5, r9\n\t" + "# A[7] * B[4]\n\t" + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[b], #16]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r14, r3, r14\n\t" + "adcs r8, r4, r8\n\t" + "adc r9, r5, r9\n\t" + "str r14, [sp, #44]\n\t" + "# A[5] * B[7]\n\t" + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[b], #28]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r8, r3, r8\n\t" + "adcs r9, r4, r9\n\t" + "adc r10, r5, #0\n\t" + "# A[6] * B[6]\n\t" + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #24]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r8, r3, r8\n\t" + "adcs r9, r4, r9\n\t" + "adc r10, r5, r10\n\t" + "# A[7] * B[5]\n\t" + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[b], #20]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r8, r3, r8\n\t" + "adcs r9, r4, r9\n\t" + "adc r10, r5, r10\n\t" + "# A[6] * B[7]\n\t" + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #28]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r9, r3, r9\n\t" + "adcs r10, r4, r10\n\t" + "adc r14, r5, #0\n\t" + "# A[7] * B[6]\n\t" + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[b], #24]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r9, r3, r9\n\t" + "adcs r10, r4, r10\n\t" + "adc r14, r5, r14\n\t" + "# A[7] * B[7]\n\t" + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[b], #28]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r10, r3, r10\n\t" + "adc r14, r4, r14\n\t" + "str r8, [sp, #48]\n\t" + "str r9, [sp, #52]\n\t" + "str r10, [sp, #56]\n\t" + "str r14, [sp, #60]\n\t" + "# Start Reduction\n\t" + "ldr r4, [sp, #0]\n\t" + "ldr r5, [sp, #4]\n\t" + "ldr r6, [sp, #8]\n\t" + "ldr r7, [sp, #12]\n\t" + "ldr r8, [sp, #16]\n\t" + "ldr r9, [sp, #20]\n\t" + "ldr r10, [sp, #24]\n\t" + "ldr r14, [sp, #28]\n\t" + "# mu = a[0]-a[7] + a[0]-a[4] << 96 + (a[0]-a[1] * 2) << 192\n\t" + "# - a[0] << 224\n\t" + "# + (a[0]-a[1] * 2) << (6 * 32)\n\t" + "adds r10, r10, r4\n\t" + "adc r14, r14, r5\n\t" + "adds r10, r10, r4\n\t" + "adc r14, r14, r5\n\t" + "# - a[0] << (7 * 32)\n\t" + "sub r14, r14, r4\n\t" + "# + a[0]-a[4] << (3 * 32)\n\t" + "mov %[a], r7\n\t" + "mov %[b], r8\n\t" + "adds r7, r7, r4\n\t" + "adcs r8, r8, r5\n\t" + "adcs r9, r9, r6\n\t" + "adcs r10, r10, %[a]\n\t" + "adc r14, r14, %[b]\n\t" + "str r4, [sp, #0]\n\t" + "str r5, [sp, #4]\n\t" + "str r6, [sp, #8]\n\t" + "str r7, [sp, #12]\n\t" + "str r8, [sp, #16]\n\t" + "str r9, [sp, #20]\n\t" + "# a += mu * m\n\t" + "# += mu * ((1 << 256) - (1 << 224) + (1 << 192) + (1 << 96) - 1)\n\t" + "mov %[a], #0\n\t" + "# a[6] += t[0] + t[3]\n\t" + "ldr r3, [sp, #24]\n\t" + "adds r3, r3, r4\n\t" + "adc %[b], %[a], #0\n\t" + "adds r3, r3, r7\n\t" + "adc %[b], %[b], #0\n\t" + "str r10, [sp, #24]\n\t" + "# a[7] += t[1] + t[4]\n\t" + "ldr r3, [sp, #28]\n\t" + "adds r3, r3, %[b]\n\t" + "adc %[b], %[a], #0\n\t" + "adds r3, r3, r5\n\t" + "adc %[b], %[b], #0\n\t" + "adds r3, r3, r8\n\t" + "adc %[b], %[b], #0\n\t" + "str r14, [sp, #28]\n\t" + "str r3, [sp, #64]\n\t" + "# a[8] += t[0] + t[2] + t[5]\n\t" + "ldr r3, [sp, #32]\n\t" + "adds r3, r3, %[b]\n\t" + "adc %[b], %[a], #0\n\t" + "adds r3, r3, r4\n\t" + "adc %[b], %[b], #0\n\t" + "adds r3, r3, r6\n\t" + "adc %[b], %[b], #0\n\t" + "adds r3, r3, r9\n\t" + "adc %[b], %[b], #0\n\t" + "str r3, [sp, #32]\n\t" + "# a[9] += t[1] + t[3] + t[6]\n\t" + "# a[10] += t[2] + t[4] + t[7]\n\t" + "ldr r3, [sp, #36]\n\t" + "ldr r4, [sp, #40]\n\t" + "adds r3, r3, %[b]\n\t" + "adcs r4, r4, #0\n\t" + "adc %[b], %[a], #0\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc %[b], %[b], #0\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, r8\n\t" + "adc %[b], %[b], #0\n\t" + "adds r3, r3, r10\n\t" + "adcs r4, r4, r14\n\t" + "adc %[b], %[b], #0\n\t" + "str r3, [sp, #36]\n\t" + "str r4, [sp, #40]\n\t" + "# a[11] += t[3] + t[5]\n\t" + "# a[12] += t[4] + t[6]\n\t" + "# a[13] += t[5] + t[7]\n\t" + "# a[14] += t[6]\n\t" + "ldr r3, [sp, #44]\n\t" + "ldr r4, [sp, #48]\n\t" + "ldr r5, [sp, #52]\n\t" + "ldr r6, [sp, #56]\n\t" + "adds r3, r3, %[b]\n\t" + "adcs r4, r4, #0\n\t" + "adcs r5, r5, #0\n\t" + "adcs r6, r6, #0\n\t" + "adc %[b], %[a], #0\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adc %[b], %[b], #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, r10\n\t" + "adcs r5, r5, r14\n\t" + "adcs r6, r6, #0\n\t" + "adc %[b], %[b], #0\n\t" + "str r3, [sp, #44]\n\t" + "str r4, [sp, #48]\n\t" + "str r5, [sp, #52]\n\t" + "str r6, [sp, #56]\n\t" + "# a[15] += t[7]\n\t" + "ldr r3, [sp, #60]\n\t" + "adds r3, r3, %[b]\n\t" + "adc %[b], %[a], #0\n\t" + "adds r3, r3, r14\n\t" + "adc %[b], %[b], #0\n\t" + "str r3, [sp, #60]\n\t" + "ldr r3, [sp, #64]\n\t" + "ldr r4, [sp, #32]\n\t" + "ldr r5, [sp, #36]\n\t" + "ldr r6, [sp, #40]\n\t" + "ldr r8, [sp, #0]\n\t" + "ldr r9, [sp, #4]\n\t" + "ldr r10, [sp, #8]\n\t" + "ldr r14, [sp, #12]\n\t" + "subs r3, r3, r8\n\t" + "sbcs r4, r4, r9\n\t" + "sbcs r5, r5, r10\n\t" + "sbcs r6, r6, r14\n\t" + "str r4, [sp, #32]\n\t" + "str r5, [sp, #36]\n\t" + "str r6, [sp, #40]\n\t" + "ldr r3, [sp, #44]\n\t" + "ldr r4, [sp, #48]\n\t" + "ldr r5, [sp, #52]\n\t" + "ldr r6, [sp, #56]\n\t" + "ldr r7, [sp, #60]\n\t" + "ldr r8, [sp, #16]\n\t" + "ldr r9, [sp, #20]\n\t" + "ldr r10, [sp, #24]\n\t" + "ldr r14, [sp, #28]\n\t" + "sbcs r3, r3, r8\n\t" + "sbcs r4, r4, r9\n\t" + "sbcs r5, r5, r10\n\t" + "sbcs r6, r6, r14\n\t" + "sbc r7, r7, #0\n\t" + "str r3, [sp, #44]\n\t" + "str r4, [sp, #48]\n\t" + "str r5, [sp, #52]\n\t" + "str r6, [sp, #56]\n\t" + "str r7, [sp, #60]\n\t" + "# mask m and sub from result if overflow\n\t" + "sub %[b], %[a], %[b]\n\t" + "and %[a], %[b], #1\n\t" + "ldr r3, [sp, #32]\n\t" + "ldr r4, [sp, #36]\n\t" + "ldr r5, [sp, #40]\n\t" + "ldr r6, [sp, #44]\n\t" + "ldr r7, [sp, #48]\n\t" + "ldr r8, [sp, #52]\n\t" + "ldr r9, [sp, #56]\n\t" + "ldr r10, [sp, #60]\n\t" + "subs r3, r3, %[b]\n\t" + "sbcs r4, r4, %[b]\n\t" + "sbcs r5, r5, %[b]\n\t" + "sbcs r6, r6, #0\n\t" + "sbcs r7, r7, #0\n\t" + "sbcs r8, r8, #0\n\t" + "sbcs r9, r9, %[a]\n\t" + "sbc r10, r10, %[b]\n\t" + "str r3, [%[r], #0]\n\t" + "str r4, [%[r], #4]\n\t" + "str r5, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" + "str r7, [%[r], #16]\n\t" + "str r8, [%[r], #20]\n\t" + "str r9, [%[r], #24]\n\t" + "str r10, [%[r], #28]\n\t" + "add sp, sp, #68\n\t" + : [a] "+r" (a), [b] "+r" (b) + : [r] "r" (r) + : "memory", "r8", "r9", "r10", "r14", "r3", "r4", "r5", "r6", "r7" + ); +} + +/* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +SP_NOINLINE static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + (void)mp; + (void)m; + + __asm__ __volatile__ ( + "sub sp, sp, #68\n\t" + "mov r5, #0\n\t" + "# A[0] * A[1]\n\t" + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[a], #4]\n\t" + "umull r9, r10, r6, r7\n\t" + "str r9, [sp, #4]\n\t" + "# A[0] * A[2]\n\t" + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[a], #8]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r10, r3, r10\n\t" + "adc r14, r4, #0\n\t" + "str r10, [sp, #8]\n\t" + "# A[0] * A[3]\n\t" + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[a], #12]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r14, r3, r14\n\t" + "adc r8, r4, #0\n\t" + "# A[1] * A[2]\n\t" + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[a], #8]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r14, r3, r14\n\t" + "adcs r8, r4, r8\n\t" + "adc r9, r5, #0\n\t" + "str r14, [sp, #12]\n\t" + "# A[0] * A[4]\n\t" + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[a], #16]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r8, r3, r8\n\t" + "adc r9, r4, r9\n\t" + "# A[1] * A[3]\n\t" + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[a], #12]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r8, r3, r8\n\t" + "adcs r9, r4, r9\n\t" + "adc r10, r5, #0\n\t" + "str r8, [sp, #16]\n\t" + "# A[0] * A[5]\n\t" + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[a], #20]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r9, r3, r9\n\t" + "adc r10, r4, r10\n\t" + "# A[1] * A[4]\n\t" + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[a], #16]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r9, r3, r9\n\t" + "adcs r10, r4, r10\n\t" + "adc r14, r5, #0\n\t" + "# A[2] * A[3]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #12]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r9, r3, r9\n\t" + "adcs r10, r4, r10\n\t" + "adc r14, r5, r14\n\t" + "str r9, [sp, #20]\n\t" + "# A[0] * A[6]\n\t" + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[a], #24]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r10, r3, r10\n\t" + "adcs r14, r4, r14\n\t" + "adc r8, r5, #0\n\t" + "# A[1] * A[5]\n\t" + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[a], #20]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r10, r3, r10\n\t" + "adcs r14, r4, r14\n\t" + "adc r8, r5, r8\n\t" + "# A[2] * A[4]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #16]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r10, r3, r10\n\t" + "adcs r14, r4, r14\n\t" + "adc r8, r5, r8\n\t" + "str r10, [sp, #24]\n\t" + "# A[0] * A[7]\n\t" + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[a], #28]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r14, r3, r14\n\t" + "adcs r8, r4, r8\n\t" + "adc r9, r5, #0\n\t" + "# A[1] * A[6]\n\t" + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[a], #24]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r14, r3, r14\n\t" + "adcs r8, r4, r8\n\t" + "adc r9, r5, r9\n\t" + "# A[2] * A[5]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #20]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r14, r3, r14\n\t" + "adcs r8, r4, r8\n\t" + "adc r9, r5, r9\n\t" + "# A[3] * A[4]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[a], #16]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r14, r3, r14\n\t" + "adcs r8, r4, r8\n\t" + "adc r9, r5, r9\n\t" + "str r14, [sp, #28]\n\t" + "# A[1] * A[7]\n\t" + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[a], #28]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r8, r3, r8\n\t" + "adcs r9, r4, r9\n\t" + "adc r10, r5, #0\n\t" + "# A[2] * A[6]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #24]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r8, r3, r8\n\t" + "adcs r9, r4, r9\n\t" + "adc r10, r5, r10\n\t" + "# A[3] * A[5]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[a], #20]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r8, r3, r8\n\t" + "adcs r9, r4, r9\n\t" + "adc r10, r5, r10\n\t" + "str r8, [sp, #32]\n\t" + "# A[2] * A[7]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #28]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r9, r3, r9\n\t" + "adcs r10, r4, r10\n\t" + "adc r14, r5, #0\n\t" + "# A[3] * A[6]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[a], #24]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r9, r3, r9\n\t" + "adcs r10, r4, r10\n\t" + "adc r14, r5, r14\n\t" + "# A[4] * A[5]\n\t" + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[a], #20]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r9, r3, r9\n\t" + "adcs r10, r4, r10\n\t" + "adc r14, r5, r14\n\t" + "str r9, [sp, #36]\n\t" + "# A[3] * A[7]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[a], #28]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r10, r3, r10\n\t" + "adcs r14, r4, r14\n\t" + "adc r8, r5, #0\n\t" + "# A[4] * A[6]\n\t" + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[a], #24]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r10, r3, r10\n\t" + "adcs r14, r4, r14\n\t" + "adc r8, r5, r8\n\t" + "str r10, [sp, #40]\n\t" + "# A[4] * A[7]\n\t" + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[a], #28]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r14, r3, r14\n\t" + "adcs r8, r4, r8\n\t" + "adc r9, r5, #0\n\t" + "# A[5] * A[6]\n\t" + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[a], #24]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r14, r3, r14\n\t" + "adcs r8, r4, r8\n\t" + "adc r9, r5, r9\n\t" + "str r14, [sp, #44]\n\t" + "# A[5] * A[7]\n\t" + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[a], #28]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r8, r3, r8\n\t" + "adcs r9, r4, r9\n\t" + "adc r10, r5, #0\n\t" + "str r8, [sp, #48]\n\t" + "# A[6] * A[7]\n\t" + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[a], #28]\n\t" + "umull r3, r4, r6, r7\n\t" + "adds r9, r3, r9\n\t" + "adc r10, r4, r10\n\t" + "str r9, [sp, #52]\n\t" + "str r10, [sp, #56]\n\t" + "# Double\n\t" + "ldr r4, [sp, #4]\n\t" + "ldr r6, [sp, #8]\n\t" + "ldr r7, [sp, #12]\n\t" + "ldr r8, [sp, #16]\n\t" + "ldr r9, [sp, #20]\n\t" + "ldr r10, [sp, #24]\n\t" + "ldr r14, [sp, #28]\n\t" + "ldr r12, [sp, #32]\n\t" + "ldr r3, [sp, #36]\n\t" + "adds r4, r4, r4\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adcs r14, r14, r14\n\t" + "adcs r12, r12, r12\n\t" + "adcs r3, r3, r3\n\t" + "str r4, [sp, #4]\n\t" + "str r6, [sp, #8]\n\t" + "str r7, [sp, #12]\n\t" + "str r8, [sp, #16]\n\t" + "str r9, [sp, #20]\n\t" + "str r10, [sp, #24]\n\t" + "str r14, [sp, #28]\n\t" + "str r12, [sp, #32]\n\t" + "str r3, [sp, #36]\n\t" + "ldr r4, [sp, #40]\n\t" + "ldr r6, [sp, #44]\n\t" + "ldr r7, [sp, #48]\n\t" + "ldr r8, [sp, #52]\n\t" + "ldr r9, [sp, #56]\n\t" + "adcs r4, r4, r4\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "str r4, [sp, #40]\n\t" + "str r6, [sp, #44]\n\t" + "str r7, [sp, #48]\n\t" + "str r8, [sp, #52]\n\t" + "str r9, [sp, #56]\n\t" + "adc r10, r5, #0\n\t" + "str r10, [sp, #60]\n\t" + "ldr r4, [sp, #4]\n\t" + "ldr r5, [sp, #8]\n\t" + "ldr r12, [sp, #12]\n\t" + "# A[0] * A[0]\n\t" + "ldr r6, [%[a], #0]\n\t" + "umull r8, r9, r6, r6\n\t" + "# A[1] * A[1]\n\t" + "ldr r6, [%[a], #4]\n\t" + "umull r10, r14, r6, r6\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, r5\n\t" + "adcs r14, r14, r12\n\t" + "str r8, [sp, #0]\n\t" + "str r9, [sp, #4]\n\t" + "str r10, [sp, #8]\n\t" + "str r14, [sp, #12]\n\t" + "ldr r3, [sp, #16]\n\t" + "ldr r4, [sp, #20]\n\t" + "ldr r5, [sp, #24]\n\t" + "ldr r12, [sp, #28]\n\t" + "# A[2] * A[2]\n\t" + "ldr r6, [%[a], #8]\n\t" + "umull r8, r9, r6, r6\n\t" + "# A[3] * A[3]\n\t" + "ldr r6, [%[a], #12]\n\t" + "umull r10, r14, r6, r6\n\t" + "adcs r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adcs r10, r10, r5\n\t" + "adcs r14, r14, r12\n\t" + "str r8, [sp, #16]\n\t" + "str r9, [sp, #20]\n\t" + "str r10, [sp, #24]\n\t" + "str r14, [sp, #28]\n\t" + "ldr r3, [sp, #32]\n\t" + "ldr r4, [sp, #36]\n\t" + "ldr r5, [sp, #40]\n\t" + "ldr r12, [sp, #44]\n\t" + "# A[4] * A[4]\n\t" + "ldr r6, [%[a], #16]\n\t" + "umull r8, r9, r6, r6\n\t" + "# A[5] * A[5]\n\t" + "ldr r6, [%[a], #20]\n\t" + "umull r10, r14, r6, r6\n\t" + "adcs r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adcs r10, r10, r5\n\t" + "adcs r14, r14, r12\n\t" + "str r8, [sp, #32]\n\t" + "str r9, [sp, #36]\n\t" + "str r10, [sp, #40]\n\t" + "str r14, [sp, #44]\n\t" + "ldr r3, [sp, #48]\n\t" + "ldr r4, [sp, #52]\n\t" + "ldr r5, [sp, #56]\n\t" + "ldr r12, [sp, #60]\n\t" + "# A[6] * A[6]\n\t" + "ldr r6, [%[a], #24]\n\t" + "umull r8, r9, r6, r6\n\t" + "# A[7] * A[7]\n\t" + "ldr r6, [%[a], #28]\n\t" + "umull r10, r14, r6, r6\n\t" + "adcs r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adcs r10, r10, r5\n\t" + "adc r14, r14, r12\n\t" + "str r8, [sp, #48]\n\t" + "str r9, [sp, #52]\n\t" + "str r10, [sp, #56]\n\t" + "str r14, [sp, #60]\n\t" + "# Start Reduction\n\t" + "ldr r4, [sp, #0]\n\t" + "ldr r5, [sp, #4]\n\t" + "ldr r6, [sp, #8]\n\t" + "ldr r7, [sp, #12]\n\t" + "ldr r8, [sp, #16]\n\t" + "ldr r9, [sp, #20]\n\t" + "ldr r10, [sp, #24]\n\t" + "ldr r14, [sp, #28]\n\t" + "# mu = a[0]-a[7] + a[0]-a[4] << 96 + (a[0]-a[1] * 2) << 192\n\t" + "# - a[0] << 224\n\t" + "# + (a[0]-a[1] * 2) << (6 * 32)\n\t" + "adds r10, r10, r4\n\t" + "adc r14, r14, r5\n\t" + "adds r10, r10, r4\n\t" + "adc r14, r14, r5\n\t" + "# - a[0] << (7 * 32)\n\t" + "sub r14, r14, r4\n\t" + "# + a[0]-a[4] << (3 * 32)\n\t" + "mov %[a], r7\n\t" + "mov r12, r8\n\t" + "adds r7, r7, r4\n\t" + "adcs r8, r8, r5\n\t" + "adcs r9, r9, r6\n\t" + "adcs r10, r10, %[a]\n\t" + "adc r14, r14, r12\n\t" + "str r4, [sp, #0]\n\t" + "str r5, [sp, #4]\n\t" + "str r6, [sp, #8]\n\t" + "str r7, [sp, #12]\n\t" + "str r8, [sp, #16]\n\t" + "str r9, [sp, #20]\n\t" + "# a += mu * m\n\t" + "# += mu * ((1 << 256) - (1 << 224) + (1 << 192) + (1 << 96) - 1)\n\t" + "mov %[a], #0\n\t" + "# a[6] += t[0] + t[3]\n\t" + "ldr r3, [sp, #24]\n\t" + "adds r3, r3, r4\n\t" + "adc r12, %[a], #0\n\t" + "adds r3, r3, r7\n\t" + "adc r12, r12, #0\n\t" + "str r10, [sp, #24]\n\t" + "# a[7] += t[1] + t[4]\n\t" + "ldr r3, [sp, #28]\n\t" + "adds r3, r3, r12\n\t" + "adc r12, %[a], #0\n\t" + "adds r3, r3, r5\n\t" + "adc r12, r12, #0\n\t" + "adds r3, r3, r8\n\t" + "adc r12, r12, #0\n\t" + "str r14, [sp, #28]\n\t" + "str r3, [sp, #64]\n\t" + "# a[8] += t[0] + t[2] + t[5]\n\t" + "ldr r3, [sp, #32]\n\t" + "adds r3, r3, r12\n\t" + "adc r12, %[a], #0\n\t" + "adds r3, r3, r4\n\t" + "adc r12, r12, #0\n\t" + "adds r3, r3, r6\n\t" + "adc r12, r12, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r12, r12, #0\n\t" + "str r3, [sp, #32]\n\t" + "# a[9] += t[1] + t[3] + t[6]\n\t" + "# a[10] += t[2] + t[4] + t[7]\n\t" + "ldr r3, [sp, #36]\n\t" + "ldr r4, [sp, #40]\n\t" + "adds r3, r3, r12\n\t" + "adcs r4, r4, #0\n\t" + "adc r12, %[a], #0\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r12, r12, #0\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, r8\n\t" + "adc r12, r12, #0\n\t" + "adds r3, r3, r10\n\t" + "adcs r4, r4, r14\n\t" + "adc r12, r12, #0\n\t" + "str r3, [sp, #36]\n\t" + "str r4, [sp, #40]\n\t" + "# a[11] += t[3] + t[5]\n\t" + "# a[12] += t[4] + t[6]\n\t" + "# a[13] += t[5] + t[7]\n\t" + "# a[14] += t[6]\n\t" + "ldr r3, [sp, #44]\n\t" + "ldr r4, [sp, #48]\n\t" + "ldr r5, [sp, #52]\n\t" + "ldr r6, [sp, #56]\n\t" + "adds r3, r3, r12\n\t" + "adcs r4, r4, #0\n\t" + "adcs r5, r5, #0\n\t" + "adcs r6, r6, #0\n\t" + "adc r12, %[a], #0\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adc r12, r12, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, r10\n\t" + "adcs r5, r5, r14\n\t" + "adcs r6, r6, #0\n\t" + "adc r12, r12, #0\n\t" + "str r3, [sp, #44]\n\t" + "str r4, [sp, #48]\n\t" + "str r5, [sp, #52]\n\t" + "str r6, [sp, #56]\n\t" + "# a[15] += t[7]\n\t" + "ldr r3, [sp, #60]\n\t" + "adds r3, r3, r12\n\t" + "adc r12, %[a], #0\n\t" + "adds r3, r3, r14\n\t" + "adc r12, r12, #0\n\t" + "str r3, [sp, #60]\n\t" + "ldr r3, [sp, #64]\n\t" + "ldr r4, [sp, #32]\n\t" + "ldr r5, [sp, #36]\n\t" + "ldr r6, [sp, #40]\n\t" + "ldr r8, [sp, #0]\n\t" + "ldr r9, [sp, #4]\n\t" + "ldr r10, [sp, #8]\n\t" + "ldr r14, [sp, #12]\n\t" + "subs r3, r3, r8\n\t" + "sbcs r4, r4, r9\n\t" + "sbcs r5, r5, r10\n\t" + "sbcs r6, r6, r14\n\t" + "str r4, [sp, #32]\n\t" + "str r5, [sp, #36]\n\t" + "str r6, [sp, #40]\n\t" + "ldr r3, [sp, #44]\n\t" + "ldr r4, [sp, #48]\n\t" + "ldr r5, [sp, #52]\n\t" + "ldr r6, [sp, #56]\n\t" + "ldr r7, [sp, #60]\n\t" + "ldr r8, [sp, #16]\n\t" + "ldr r9, [sp, #20]\n\t" + "ldr r10, [sp, #24]\n\t" + "ldr r14, [sp, #28]\n\t" + "sbcs r3, r3, r8\n\t" + "sbcs r4, r4, r9\n\t" + "sbcs r5, r5, r10\n\t" + "sbcs r6, r6, r14\n\t" + "sbc r7, r7, #0\n\t" + "str r3, [sp, #44]\n\t" + "str r4, [sp, #48]\n\t" + "str r5, [sp, #52]\n\t" + "str r6, [sp, #56]\n\t" + "str r7, [sp, #60]\n\t" + "# mask m and sub from result if overflow\n\t" + "sub r12, %[a], r12\n\t" + "and %[a], r12, #1\n\t" + "ldr r3, [sp, #32]\n\t" + "ldr r4, [sp, #36]\n\t" + "ldr r5, [sp, #40]\n\t" + "ldr r6, [sp, #44]\n\t" + "ldr r7, [sp, #48]\n\t" + "ldr r8, [sp, #52]\n\t" + "ldr r9, [sp, #56]\n\t" + "ldr r10, [sp, #60]\n\t" + "subs r3, r3, r12\n\t" + "sbcs r4, r4, r12\n\t" + "sbcs r5, r5, r12\n\t" + "sbcs r6, r6, #0\n\t" + "sbcs r7, r7, #0\n\t" + "sbcs r8, r8, #0\n\t" + "sbcs r9, r9, %[a]\n\t" + "sbc r10, r10, r12\n\t" + "str r3, [%[r], #0]\n\t" + "str r4, [%[r], #4]\n\t" + "str r5, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" + "str r7, [%[r], #16]\n\t" + "str r8, [%[r], #20]\n\t" + "str r9, [%[r], #24]\n\t" + "str r10, [%[r], #28]\n\t" + "add sp, sp, #68\n\t" + : [a] "+r" (a) + : [r] "r" (r) + : "memory", "r8", "r9", "r10", "r14", "r3", "r4", "r5", "r6", "r7", "r12" + ); +} + +#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY) +/* Square the Montgomery form number a number of times. (r = a ^ n mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * n Number of times to square. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_256_mont_sqr_n_8(sp_digit* r, const sp_digit* a, int n, + const sp_digit* m, sp_digit mp) +{ + sp_256_mont_sqr_8(r, a, m, mp); + for (; n > 1; n--) { + sp_256_mont_sqr_8(r, r, m, mp); + } +} + +#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */ +#ifdef WOLFSSL_SP_SMALL +/* Mod-2 for the P256 curve. */ +static const uint32_t p256_mod_minus_2[8] = { + 0xfffffffdU,0xffffffffU,0xffffffffU,0x00000000U,0x00000000U,0x00000000U, + 0x00000001U,0xffffffffU +}; +#endif /* !WOLFSSL_SP_SMALL */ + +/* Invert the number, in Montgomery form, modulo the modulus (prime) of the + * P256 curve. (r = 1 / a mod m) + * + * r Inverse result. + * a Number to invert. + * td Temporary data. + */ +static void sp_256_mont_inv_8(sp_digit* r, const sp_digit* a, sp_digit* td) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* t = td; + int i; + + XMEMCPY(t, a, sizeof(sp_digit) * 8); + for (i=254; i>=0; i--) { + sp_256_mont_sqr_8(t, t, p256_mod, p256_mp_mod); + if (p256_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32))) + sp_256_mont_mul_8(t, t, a, p256_mod, p256_mp_mod); + } + XMEMCPY(r, t, sizeof(sp_digit) * 8); +#else + sp_digit* t1 = td; + sp_digit* t2 = td + 2 * 8; + sp_digit* t3 = td + 4 * 8; + /* 0x2 */ + sp_256_mont_sqr_8(t1, a, p256_mod, p256_mp_mod); + /* 0x3 */ + sp_256_mont_mul_8(t2, t1, a, p256_mod, p256_mp_mod); + /* 0xc */ + sp_256_mont_sqr_n_8(t1, t2, 2, p256_mod, p256_mp_mod); + /* 0xd */ + sp_256_mont_mul_8(t3, t1, a, p256_mod, p256_mp_mod); + /* 0xf */ + sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xf0 */ + sp_256_mont_sqr_n_8(t1, t2, 4, p256_mod, p256_mp_mod); + /* 0xfd */ + sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod); + /* 0xff */ + sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xff00 */ + sp_256_mont_sqr_n_8(t1, t2, 8, p256_mod, p256_mp_mod); + /* 0xfffd */ + sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod); + /* 0xffff */ + sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xffff0000 */ + sp_256_mont_sqr_n_8(t1, t2, 16, p256_mod, p256_mp_mod); + /* 0xfffffffd */ + sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod); + /* 0xffffffff */ + sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xffffffff00000000 */ + sp_256_mont_sqr_n_8(t1, t2, 32, p256_mod, p256_mp_mod); + /* 0xffffffffffffffff */ + sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xffffffff00000001 */ + sp_256_mont_mul_8(r, t1, a, p256_mod, p256_mp_mod); + /* 0xffffffff000000010000000000000000000000000000000000000000 */ + sp_256_mont_sqr_n_8(r, r, 160, p256_mod, p256_mp_mod); + /* 0xffffffff00000001000000000000000000000000ffffffffffffffff */ + sp_256_mont_mul_8(r, r, t2, p256_mod, p256_mp_mod); + /* 0xffffffff00000001000000000000000000000000ffffffffffffffff00000000 */ + sp_256_mont_sqr_n_8(r, r, 32, p256_mod, p256_mp_mod); + /* 0xffffffff00000001000000000000000000000000fffffffffffffffffffffffd */ + sp_256_mont_mul_8(r, r, t3, p256_mod, p256_mp_mod); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static int32_t sp_256_cmp_8(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = -1; + sp_digit one = 1; + + +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov r7, #0\n\t" + "mov r3, #-1\n\t" + "mov r6, #28\n\t" + "1:\n\t" + "ldr r4, [%[a], r6]\n\t" + "ldr r5, [%[b], r6]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "subs r6, r6, #4\n\t" + "bcs 1b\n\t" + "eor %[r], %[r], r3\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [one] "r" (one) + : "r3", "r4", "r5", "r6", "r7" + ); +#else + __asm__ __volatile__ ( + "mov r7, #0\n\t" + "mov r3, #-1\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "eor %[r], %[r], r3\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [one] "r" (one) + : "r3", "r4", "r5", "r6", "r7" + ); +#endif + + return r; +} + +/* Normalize the values in each word to 32. + * + * a Array of sp_digit to normalize. + */ +#define sp_256_norm_8(a) + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + sp_digit c = 0; + +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov r9, #0\n\t" + "mov r8, #0\n\t" + "1:\n\t" + "subs %[c], r9, %[c]\n\t" + "ldr r4, [%[a], r8]\n\t" + "ldr r5, [%[b], r8]\n\t" + "and r5, r5, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbc %[c], r9, r9\n\t" + "str r4, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, #32\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + ); +#else + __asm__ __volatile__ ( + + "mov r9, #0\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r6, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "subs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #0]\n\t" + "str r6, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r7, [%[b], #12]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r6, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #16]\n\t" + "str r6, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r6, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r7, [%[b], #28]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" + "sbc %[c], r9, r9\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + ); +#endif /* WOLFSSL_SP_SMALL */ + + return c; +} + +#define sp_256_mont_reduce_order_8 sp_256_mont_reduce_8 + +/* Reduce the number back to 256 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_digit ca = 0; + + __asm__ __volatile__ ( + "# i = 0\n\t" + "mov r12, #0\n\t" + "ldr r10, [%[a], #0]\n\t" + "ldr r14, [%[a], #4]\n\t" + "\n1:\n\t" + "# mu = a[i] * mp\n\t" + "mul r8, %[mp], r10\n\t" + "# a[i+0] += m[0] * mu\n\t" + "ldr r7, [%[m], #0]\n\t" + "ldr r9, [%[a], #0]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" + "# a[i+1] += m[1] * mu\n\t" + "ldr r7, [%[m], #4]\n\t" + "ldr r9, [%[a], #4]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r10, r14, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r10, r10, r5\n\t" + "adc r4, r4, #0\n\t" + "# a[i+2] += m[2] * mu\n\t" + "ldr r7, [%[m], #8]\n\t" + "ldr r14, [%[a], #8]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r14, r14, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r14, r14, r4\n\t" + "adc r5, r5, #0\n\t" + "# a[i+3] += m[3] * mu\n\t" + "ldr r7, [%[m], #12]\n\t" + "ldr r9, [%[a], #12]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #12]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+4] += m[4] * mu\n\t" + "ldr r7, [%[m], #16]\n\t" + "ldr r9, [%[a], #16]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+5] += m[5] * mu\n\t" + "ldr r7, [%[m], #20]\n\t" + "ldr r9, [%[a], #20]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #20]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+6] += m[6] * mu\n\t" + "ldr r7, [%[m], #24]\n\t" + "ldr r9, [%[a], #24]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+7] += m[7] * mu\n\t" + "ldr r7, [%[m], #28]\n\t" + "ldr r9, [%[a], #28]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r5, r5, r6\n\t" + "adcs r7, r7, %[ca]\n\t" + "mov %[ca], #0\n\t" + "adc %[ca], %[ca], %[ca]\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #28]\n\t" + "ldr r9, [%[a], #32]\n\t" + "adcs r9, r9, r7\n\t" + "str r9, [%[a], #32]\n\t" + "adc %[ca], %[ca], #0\n\t" + "# i += 1\n\t" + "add %[a], %[a], #4\n\t" + "add r12, r12, #4\n\t" + "cmp r12, #32\n\t" + "blt 1b\n\t" + "str r10, [%[a], #0]\n\t" + "str r14, [%[a], #4]\n\t" + : [ca] "+r" (ca), [a] "+r" (a) + : [m] "r" (m), [mp] "r" (mp) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); + + sp_256_cond_sub_8(a - 8, a, m, (sp_digit)0 - ca); +} + +/* Map the Montgomery form projective coordinate point to an affine point. + * + * r Resulting affine coordinate point. + * p Montgomery form projective coordinate point. + * t Temporary ordinate data. + */ +static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*8; + int32_t n; + + sp_256_mont_inv_8(t1, p->z, t + 2*8); + + sp_256_mont_sqr_8(t2, t1, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t1, t2, t1, p256_mod, p256_mp_mod); + + /* x /= z^2 */ + sp_256_mont_mul_8(r->x, p->x, t2, p256_mod, p256_mp_mod); + XMEMSET(r->x + 8, 0, sizeof(r->x) / 2U); + sp_256_mont_reduce_8(r->x, p256_mod, p256_mp_mod); + /* Reduce x to less than modulus */ + n = sp_256_cmp_8(r->x, p256_mod); + sp_256_cond_sub_8(r->x, r->x, p256_mod, 0 - ((n >= 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_256_norm_8(r->x); + + /* y /= z^3 */ + sp_256_mont_mul_8(r->y, p->y, t1, p256_mod, p256_mp_mod); + XMEMSET(r->y + 8, 0, sizeof(r->y) / 2U); + sp_256_mont_reduce_8(r->y, p256_mod, p256_mp_mod); + /* Reduce y to less than modulus */ + n = sp_256_cmp_8(r->y, p256_mod); + sp_256_cond_sub_8(r->y, r->y, p256_mod, 0 - ((n >= 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_256_norm_8(r->y); + + XMEMSET(r->z, 0, sizeof(r->z)); + r->z[0] = 1; + +} + +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "add r12, %[a], #32\n\t" + "\n1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldr r4, [%[a]], #4\n\t" + "ldr r5, [%[a]], #4\n\t" + "ldr r6, [%[a]], #4\n\t" + "ldr r7, [%[a]], #4\n\t" + "ldr r8, [%[b]], #4\n\t" + "ldr r9, [%[b]], #4\n\t" + "ldr r10, [%[b]], #4\n\t" + "ldr r14, [%[b]], #4\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r]], #4\n\t" + "str r5, [%[r]], #4\n\t" + "str r6, [%[r]], #4\n\t" + "str r7, [%[r]], #4\n\t" + "mov r4, #0\n\t" + "adc %[c], r4, #0\n\t" + "cmp %[a], r12\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); + + return c; +} + +#else +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r12, #0\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #12]\n\t" + "ldr r8, [%[b], #0]\n\t" + "ldr r9, [%[b], #4]\n\t" + "ldr r10, [%[b], #8]\n\t" + "ldr r14, [%[b], #12]\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #0]\n\t" + "str r5, [%[r], #4]\n\t" + "str r6, [%[r], #8]\n\t" + "str r7, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[a], #28]\n\t" + "ldr r8, [%[b], #16]\n\t" + "ldr r9, [%[b], #20]\n\t" + "ldr r10, [%[b], #24]\n\t" + "ldr r14, [%[b], #28]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #16]\n\t" + "str r5, [%[r], #20]\n\t" + "str r6, [%[r], #24]\n\t" + "str r7, [%[r], #28]\n\t" + "adc %[c], r12, r12\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Add two Montgomery form numbers (r = a + b % m). + * + * r Result of addition. + * a First number to add in Montogmery form. + * b Second number to add in Montogmery form. + * m Modulus (prime). + */ +static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m) +{ + (void)m; + + __asm__ __volatile__ ( + "mov r12, #0\n\t" + "ldr r4, [%[a],#0]\n\t" + "ldr r5, [%[a],#4]\n\t" + "ldr r6, [%[a],#8]\n\t" + "ldr r7, [%[a],#12]\n\t" + "ldr r8, [%[b],#0]\n\t" + "ldr r9, [%[b],#4]\n\t" + "ldr r10, [%[b],#8]\n\t" + "ldr r14, [%[b],#12]\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r],#0]\n\t" + "str r5, [%[r],#4]\n\t" + "str r6, [%[r],#8]\n\t" + "str r7, [%[r],#12]\n\t" + "ldr r4, [%[a],#16]\n\t" + "ldr r5, [%[a],#20]\n\t" + "ldr r6, [%[a],#24]\n\t" + "ldr r7, [%[a],#28]\n\t" + "ldr r8, [%[b],#16]\n\t" + "ldr r9, [%[b],#20]\n\t" + "ldr r10, [%[b],#24]\n\t" + "ldr r14, [%[b],#28]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "adc r3, r12, #0\n\t" + "sub r3, r12, r3\n\t" + "and r12, r3, #1\n\t" + "ldr r8, [%[r],#0]\n\t" + "ldr r9, [%[r],#4]\n\t" + "ldr r10, [%[r],#8]\n\t" + "ldr r14, [%[r],#12]\n\t" + "subs r8, r8, r3\n\t" + "sbcs r9, r9, r3\n\t" + "sbcs r10, r10, r3\n\t" + "sbcs r14, r14, #0\n\t" + "sbcs r4, r4, #0\n\t" + "sbcs r5, r5, #0\n\t" + "sbcs r6, r6, r12\n\t" + "sbc r7, r7, r3\n\t" + "str r8, [%[r],#0]\n\t" + "str r9, [%[r],#4]\n\t" + "str r10, [%[r],#8]\n\t" + "str r14, [%[r],#12]\n\t" + "str r4, [%[r],#16]\n\t" + "str r5, [%[r],#20]\n\t" + "str r6, [%[r],#24]\n\t" + "str r7, [%[r],#28]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r3", "r12" + ); +} + +/* Double a Montgomery form number (r = a + a % m). + * + * r Result of doubling. + * a Number to double in Montogmery form. + * m Modulus (prime). + */ +static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + (void)m; + + __asm__ __volatile__ ( + "mov r12, #0\n\t" + "ldr r4, [%[a],#0]\n\t" + "ldr r5, [%[a],#4]\n\t" + "ldr r6, [%[a],#8]\n\t" + "ldr r7, [%[a],#12]\n\t" + "ldr r8, [%[a],#16]\n\t" + "ldr r9, [%[a],#20]\n\t" + "ldr r10, [%[a],#24]\n\t" + "ldr r14, [%[a],#28]\n\t" + "adds r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adcs r14, r14, r14\n\t" + "adc r3, r12, #0\n\t" + "sub r3, r12, r3\n\t" + "and r12, r3, #1\n\t" + "subs r4, r4, r3\n\t" + "sbcs r5, r5, r3\n\t" + "sbcs r6, r6, r3\n\t" + "sbcs r7, r7, #0\n\t" + "sbcs r8, r8, #0\n\t" + "sbcs r9, r9, #0\n\t" + "sbcs r10, r10, r12\n\t" + "sbc r14, r14, r3\n\t" + "str r4, [%[r],#0]\n\t" + "str r5, [%[r],#4]\n\t" + "str r6, [%[r],#8]\n\t" + "str r7, [%[r],#12]\n\t" + "str r8, [%[r],#16]\n\t" + "str r9, [%[r],#20]\n\t" + "str r10, [%[r],#24]\n\t" + "str r14, [%[r],#28]\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r3", "r12" + ); +} + +/* Triple a Montgomery form number (r = a + a + a % m). + * + * r Result of Tripling. + * a Number to triple in Montogmery form. + * m Modulus (prime). + */ +static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + (void)m; + + __asm__ __volatile__ ( + "mov r12, #0\n\t" + "ldr r4, [%[a],#0]\n\t" + "ldr r5, [%[a],#4]\n\t" + "ldr r6, [%[a],#8]\n\t" + "ldr r7, [%[a],#12]\n\t" + "ldr r8, [%[a],#16]\n\t" + "ldr r9, [%[a],#20]\n\t" + "ldr r10, [%[a],#24]\n\t" + "ldr r14, [%[a],#28]\n\t" + "adds r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adcs r14, r14, r14\n\t" + "adc r3, r12, #0\n\t" + "sub r3, r12, r3\n\t" + "and r12, r3, #1\n\t" + "subs r4, r4, r3\n\t" + "sbcs r5, r5, r3\n\t" + "sbcs r6, r6, r3\n\t" + "sbcs r7, r7, #0\n\t" + "sbcs r8, r8, #0\n\t" + "sbcs r9, r9, #0\n\t" + "sbcs r10, r10, r12\n\t" + "sbc r14, r14, r3\n\t" + "str r8, [%[r],#16]\n\t" + "str r9, [%[r],#20]\n\t" + "str r10, [%[r],#24]\n\t" + "str r14, [%[r],#28]\n\t" + "mov r12, #0\n\t" + "ldr r8, [%[a],#0]\n\t" + "ldr r9, [%[a],#4]\n\t" + "ldr r10, [%[a],#8]\n\t" + "ldr r14, [%[a],#12]\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, r5\n\t" + "adcs r10, r10, r6\n\t" + "adcs r14, r14, r7\n\t" + "str r8, [%[r],#0]\n\t" + "str r9, [%[r],#4]\n\t" + "str r10, [%[r],#8]\n\t" + "str r14, [%[r],#12]\n\t" + "ldr r8, [%[a],#16]\n\t" + "ldr r9, [%[a],#20]\n\t" + "ldr r10, [%[a],#24]\n\t" + "ldr r14, [%[a],#28]\n\t" + "ldr r4, [%[r],#16]\n\t" + "ldr r5, [%[r],#20]\n\t" + "ldr r6, [%[r],#24]\n\t" + "ldr r7, [%[r],#28]\n\t" + "adcs r8, r8, r4\n\t" + "adcs r9, r9, r5\n\t" + "adcs r10, r10, r6\n\t" + "adcs r14, r14, r7\n\t" + "adc r3, r12, #0\n\t" + "sub r3, r12, r3\n\t" + "and r12, r3, #1\n\t" + "ldr r4, [%[r],#0]\n\t" + "ldr r5, [%[r],#4]\n\t" + "ldr r6, [%[r],#8]\n\t" + "ldr r7, [%[r],#12]\n\t" + "subs r4, r4, r3\n\t" + "sbcs r5, r5, r3\n\t" + "sbcs r6, r6, r3\n\t" + "sbcs r7, r7, #0\n\t" + "sbcs r8, r8, #0\n\t" + "sbcs r9, r9, #0\n\t" + "sbcs r10, r10, r12\n\t" + "sbc r14, r14, r3\n\t" + "str r4, [%[r],#0]\n\t" + "str r5, [%[r],#4]\n\t" + "str r6, [%[r],#8]\n\t" + "str r7, [%[r],#12]\n\t" + "str r8, [%[r],#16]\n\t" + "str r9, [%[r],#20]\n\t" + "str r10, [%[r],#24]\n\t" + "str r14, [%[r],#28]\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r3", "r12" + ); +} + +/* Subtract two Montgomery form numbers (r = a - b % m). + * + * r Result of subtration. + * a Number to subtract from in Montogmery form. + * b Number to subtract with in Montogmery form. + * m Modulus (prime). + */ +static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m) +{ + (void)m; + + __asm__ __volatile__ ( + "mov r12, #0\n\t" + "ldr r4, [%[a],#0]\n\t" + "ldr r5, [%[a],#4]\n\t" + "ldr r6, [%[a],#8]\n\t" + "ldr r7, [%[a],#12]\n\t" + "ldr r8, [%[b],#0]\n\t" + "ldr r9, [%[b],#4]\n\t" + "ldr r10, [%[b],#8]\n\t" + "ldr r14, [%[b],#12]\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r14\n\t" + "str r4, [%[r],#0]\n\t" + "str r5, [%[r],#4]\n\t" + "str r6, [%[r],#8]\n\t" + "str r7, [%[r],#12]\n\t" + "ldr r4, [%[a],#16]\n\t" + "ldr r5, [%[a],#20]\n\t" + "ldr r6, [%[a],#24]\n\t" + "ldr r7, [%[a],#28]\n\t" + "ldr r8, [%[b],#16]\n\t" + "ldr r9, [%[b],#20]\n\t" + "ldr r10, [%[b],#24]\n\t" + "ldr r14, [%[b],#28]\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r14\n\t" + "sbc r3, r12, #0\n\t" + "and r12, r3, #1\n\t" + "ldr r8, [%[r],#0]\n\t" + "ldr r9, [%[r],#4]\n\t" + "ldr r10, [%[r],#8]\n\t" + "ldr r14, [%[r],#12]\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r3\n\t" + "adcs r10, r10, r3\n\t" + "adcs r14, r14, #0\n\t" + "adcs r4, r4, #0\n\t" + "adcs r5, r5, #0\n\t" + "adcs r6, r6, r12\n\t" + "adc r7, r7, r3\n\t" + "str r8, [%[r],#0]\n\t" + "str r9, [%[r],#4]\n\t" + "str r10, [%[r],#8]\n\t" + "str r14, [%[r],#12]\n\t" + "str r4, [%[r],#16]\n\t" + "str r5, [%[r],#20]\n\t" + "str r6, [%[r],#24]\n\t" + "str r7, [%[r],#28]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r3", "r12" + ); +} + +/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) + * + * r Result of division by 2. + * a Number to divide. + * m Modulus (prime). + */ +static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + __asm__ __volatile__ ( + "mov r10, #0\n\t" + "ldr r3, [%[a], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[a], #8]\n\t" + "ldr r6, [%[a], #12]\n\t" + "and r9, r3, #1\n\t" + "sub r7, r10, r9\n\t" + "and r8, r7, #1\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, r7\n\t" + "adcs r5, r5, r7\n\t" + "adcs r6, r6, r10\n\t" + "str r3, [%[r], #0]\n\t" + "str r4, [%[r], #4]\n\t" + "str r5, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" + "ldr r3, [%[a], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[a], #24]\n\t" + "ldr r6, [%[a], #28]\n\t" + "adcs r3, r3, r10\n\t" + "adcs r4, r4, r10\n\t" + "adcs r5, r5, r8\n\t" + "adcs r6, r6, r7\n\t" + "adc r9, r10, r10\n\t" + "lsr r7, r3, #1\n\t" + "and r3, r3, #1\n\t" + "lsr r8, r4, #1\n\t" + "lsr r10, r5, #1\n\t" + "lsr r14, r6, #1\n\t" + "orr r7, r7, r4, lsl #31\n\t" + "orr r8, r8, r5, lsl #31\n\t" + "orr r10, r10, r6, lsl #31\n\t" + "orr r14, r14, r9, lsl #31\n\t" + "mov r9, r3\n\t" + "str r7, [%[r], #16]\n\t" + "str r8, [%[r], #20]\n\t" + "str r10, [%[r], #24]\n\t" + "str r14, [%[r], #28]\n\t" + "ldr r3, [%[r], #0]\n\t" + "ldr r4, [%[r], #4]\n\t" + "ldr r5, [%[r], #8]\n\t" + "ldr r6, [%[r], #12]\n\t" + "lsr r7, r3, #1\n\t" + "lsr r8, r4, #1\n\t" + "lsr r10, r5, #1\n\t" + "lsr r14, r6, #1\n\t" + "orr r7, r7, r4, lsl #31\n\t" + "orr r8, r8, r5, lsl #31\n\t" + "orr r10, r10, r6, lsl #31\n\t" + "orr r14, r14, r9, lsl #31\n\t" + "str r7, [%[r], #0]\n\t" + "str r8, [%[r], #4]\n\t" + "str r10, [%[r], #8]\n\t" + "str r14, [%[r], #12]\n\t" + : + : [r] "r" (r), [a] "r" (a), [m] "r" (m) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r10", "r14", "r9" + ); + +} + +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static void sp_256_proj_point_dbl_8(sp_point_256* r, const sp_point_256* p, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*8; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_256_mont_sqr_8(t1, p->z, p256_mod, p256_mp_mod); + /* Z = Y * Z */ + sp_256_mont_mul_8(z, p->y, p->z, p256_mod, p256_mp_mod); + /* Z = 2Z */ + sp_256_mont_dbl_8(z, z, p256_mod); + /* T2 = X - T1 */ + sp_256_mont_sub_8(t2, p->x, t1, p256_mod); + /* T1 = X + T1 */ + sp_256_mont_add_8(t1, p->x, t1, p256_mod); + /* T2 = T1 * T2 */ + sp_256_mont_mul_8(t2, t1, t2, p256_mod, p256_mp_mod); + /* T1 = 3T2 */ + sp_256_mont_tpl_8(t1, t2, p256_mod); + /* Y = 2Y */ + sp_256_mont_dbl_8(y, p->y, p256_mod); + /* Y = Y * Y */ + sp_256_mont_sqr_8(y, y, p256_mod, p256_mp_mod); + /* T2 = Y * Y */ + sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod); + /* T2 = T2/2 */ + sp_256_div2_8(t2, t2, p256_mod); + /* Y = Y * X */ + sp_256_mont_mul_8(y, y, p->x, p256_mod, p256_mp_mod); + /* X = T1 * T1 */ + sp_256_mont_sqr_8(x, t1, p256_mod, p256_mp_mod); + /* X = X - Y */ + sp_256_mont_sub_8(x, x, y, p256_mod); + /* X = X - Y */ + sp_256_mont_sub_8(x, x, y, p256_mod); + /* Y = Y - X */ + sp_256_mont_sub_8(y, y, x, p256_mod); + /* Y = Y * T1 */ + sp_256_mont_mul_8(y, y, t1, p256_mod, p256_mp_mod); + /* Y = Y - T2 */ + sp_256_mont_sub_8(y, y, t2, p256_mod); +} + +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "add r12, %[a], #32\n\t" + "\n1:\n\t" + "rsbs %[c], %[c], #0\n\t" + "ldr r4, [%[a]], #4\n\t" + "ldr r5, [%[a]], #4\n\t" + "ldr r6, [%[a]], #4\n\t" + "ldr r7, [%[a]], #4\n\t" + "ldr r8, [%[b]], #4\n\t" + "ldr r9, [%[b]], #4\n\t" + "ldr r10, [%[b]], #4\n\t" + "ldr r14, [%[b]], #4\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r14\n\t" + "str r4, [%[r]], #4\n\t" + "str r5, [%[r]], #4\n\t" + "str r6, [%[r]], #4\n\t" + "str r7, [%[r]], #4\n\t" + "sbc %[c], r4, r4\n\t" + "cmp %[a], r12\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); + + return c; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldr r3, [%[a], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[a], #8]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #0]\n\t" + "ldr r8, [%[b], #4]\n\t" + "ldr r9, [%[b], #8]\n\t" + "ldr r10, [%[b], #12]\n\t" + "subs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "str r3, [%[r], #0]\n\t" + "str r4, [%[r], #4]\n\t" + "str r5, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" + "ldr r3, [%[a], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[a], #24]\n\t" + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[b], #16]\n\t" + "ldr r8, [%[b], #20]\n\t" + "ldr r9, [%[b], #24]\n\t" + "ldr r10, [%[b], #28]\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "str r3, [%[r], #16]\n\t" + "str r4, [%[r], #20]\n\t" + "str r5, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" + "sbc %[c], %[c], #0\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Compare two numbers to determine if they are equal. + * Constant time implementation. + * + * a First number to compare. + * b Second number to compare. + * returns 1 when equal and 0 otherwise. + */ +static int sp_256_cmp_equal_8(const sp_digit* a, const sp_digit* b) +{ + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) | + (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6]) | (a[7] ^ b[7])) == 0; +} + +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_256_proj_point_add_8(sp_point_256* r, const sp_point_256* p, const sp_point_256* q, + sp_digit* t) +{ + const sp_point_256* ap[2]; + sp_point_256* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*8; + sp_digit* t3 = t + 4*8; + sp_digit* t4 = t + 6*8; + sp_digit* t5 = t + 8*8; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* Ensure only the first point is the same as the result. */ + if (q == r) { + const sp_point_256* a = p; + p = q; + q = a; + } + + /* Check double */ + (void)sp_256_sub_8(t1, p256_mod, q->y); + sp_256_norm_8(t1); + if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & + (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) { + sp_256_proj_point_dbl_8(r, p, t); + } + else { + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point_256)); + x = rp[p->infinity | q->infinity]->x; + y = rp[p->infinity | q->infinity]->y; + z = rp[p->infinity | q->infinity]->z; + + ap[0] = p; + ap[1] = q; + for (i=0; i<8; i++) { + r->x[i] = ap[p->infinity]->x[i]; + } + for (i=0; i<8; i++) { + r->y[i] = ap[p->infinity]->y[i]; + } + for (i=0; i<8; i++) { + r->z[i] = ap[p->infinity]->z[i]; + } + r->infinity = ap[p->infinity]->infinity; + + /* U1 = X1*Z2^2 */ + sp_256_mont_sqr_8(t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t3, t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t1, t1, x, p256_mod, p256_mp_mod); + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_8(t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t4, t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_8(t3, t3, y, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod); + /* H = U2 - U1 */ + sp_256_mont_sub_8(t2, t2, t1, p256_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_8(t4, t4, t3, p256_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_8(z, z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(z, z, t2, p256_mod, p256_mp_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_8(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(x, x, t5, p256_mod); + sp_256_mont_dbl_8(t1, y, p256_mod); + sp_256_mont_sub_8(x, x, t1, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_8(y, y, x, p256_mod); + sp_256_mont_mul_8(y, y, t4, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t5, t5, t3, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(y, y, t5, p256_mod); + } +} + +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_fast_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k, + int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 td[16]; + sp_point_256 rtd; + sp_digit tmpd[2 * 8 * 5]; +#endif + sp_point_256* t; + sp_point_256* rt; + sp_digit* tmp; + sp_digit n; + int i; + int c, y; + int err; + + (void)heap; + + err = sp_256_point_new_8(heap, rtd, rt); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 16, heap, DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; +#else + t = td; + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + /* t[0] = {0, 0, 1} * norm */ + XMEMSET(&t[0], 0, sizeof(t[0])); + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + (void)sp_256_mod_mul_norm_8(t[1].x, g->x, p256_mod); + (void)sp_256_mod_mul_norm_8(t[1].y, g->y, p256_mod); + (void)sp_256_mod_mul_norm_8(t[1].z, g->z, p256_mod); + t[1].infinity = 0; + sp_256_proj_point_dbl_8(&t[ 2], &t[ 1], tmp); + t[ 2].infinity = 0; + sp_256_proj_point_add_8(&t[ 3], &t[ 2], &t[ 1], tmp); + t[ 3].infinity = 0; + sp_256_proj_point_dbl_8(&t[ 4], &t[ 2], tmp); + t[ 4].infinity = 0; + sp_256_proj_point_add_8(&t[ 5], &t[ 3], &t[ 2], tmp); + t[ 5].infinity = 0; + sp_256_proj_point_dbl_8(&t[ 6], &t[ 3], tmp); + t[ 6].infinity = 0; + sp_256_proj_point_add_8(&t[ 7], &t[ 4], &t[ 3], tmp); + t[ 7].infinity = 0; + sp_256_proj_point_dbl_8(&t[ 8], &t[ 4], tmp); + t[ 8].infinity = 0; + sp_256_proj_point_add_8(&t[ 9], &t[ 5], &t[ 4], tmp); + t[ 9].infinity = 0; + sp_256_proj_point_dbl_8(&t[10], &t[ 5], tmp); + t[10].infinity = 0; + sp_256_proj_point_add_8(&t[11], &t[ 6], &t[ 5], tmp); + t[11].infinity = 0; + sp_256_proj_point_dbl_8(&t[12], &t[ 6], tmp); + t[12].infinity = 0; + sp_256_proj_point_add_8(&t[13], &t[ 7], &t[ 6], tmp); + t[13].infinity = 0; + sp_256_proj_point_dbl_8(&t[14], &t[ 7], tmp); + t[14].infinity = 0; + sp_256_proj_point_add_8(&t[15], &t[ 8], &t[ 7], tmp); + t[15].infinity = 0; + + i = 6; + n = k[i+1] << 0; + c = 28; + y = n >> 28; + XMEMCPY(rt, &t[y], sizeof(sp_point_256)); + n <<= 4; + for (; i>=0 || c>=4; ) { + if (c < 4) { + n |= k[i--]; + c += 32; + } + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + + sp_256_proj_point_dbl_8(rt, rt, tmp); + sp_256_proj_point_dbl_8(rt, rt, tmp); + sp_256_proj_point_dbl_8(rt, rt, tmp); + sp_256_proj_point_dbl_8(rt, rt, tmp); + + sp_256_proj_point_add_8(rt, rt, &t[y], tmp); + } + + if (map != 0) { + sp_256_map_8(r, rt, tmp); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_256)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 8 * 5); + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); + } + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_point_256) * 16); + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#else + ForceZero(tmpd, sizeof(tmpd)); + ForceZero(td, sizeof(td)); +#endif + sp_256_point_free_8(rt, 1, heap); + + return err; +} + +/* A table entry for pre-computed points. */ +typedef struct sp_table_entry_256 { + sp_digit x[8]; + sp_digit y[8]; +} sp_table_entry_256; + +#ifdef FP_ECC +/* Double the Montgomery form projective point p a number of times. + * + * r Result of repeated doubling of point. + * p Point to double. + * n Number of times to double + * t Temporary ordinate data. + */ +static void sp_256_proj_point_dbl_n_8(sp_point_256* p, int n, sp_digit* t) +{ + sp_digit* w = t; + sp_digit* a = t + 2*8; + sp_digit* b = t + 4*8; + sp_digit* t1 = t + 6*8; + sp_digit* t2 = t + 8*8; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = p->x; + y = p->y; + z = p->z; + + /* Y = 2*Y */ + sp_256_mont_dbl_8(y, y, p256_mod); + /* W = Z^4 */ + sp_256_mont_sqr_8(w, z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_8(w, w, p256_mod, p256_mp_mod); + +#ifndef WOLFSSL_SP_SMALL + while (--n > 0) +#else + while (--n >= 0) +#endif + { + /* A = 3*(X^2 - W) */ + sp_256_mont_sqr_8(t1, x, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(t1, t1, w, p256_mod); + sp_256_mont_tpl_8(a, t1, p256_mod); + /* B = X*Y^2 */ + sp_256_mont_sqr_8(t1, y, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(b, t1, x, p256_mod, p256_mp_mod); + /* X = A^2 - 2B */ + sp_256_mont_sqr_8(x, a, p256_mod, p256_mp_mod); + sp_256_mont_dbl_8(t2, b, p256_mod); + sp_256_mont_sub_8(x, x, t2, p256_mod); + /* Z = Z*Y */ + sp_256_mont_mul_8(z, z, y, p256_mod, p256_mp_mod); + /* t2 = Y^4 */ + sp_256_mont_sqr_8(t1, t1, p256_mod, p256_mp_mod); +#ifdef WOLFSSL_SP_SMALL + if (n != 0) +#endif + { + /* W = W*Y^4 */ + sp_256_mont_mul_8(w, w, t1, p256_mod, p256_mp_mod); + } + /* y = 2*A*(B - X) - Y^4 */ + sp_256_mont_sub_8(y, b, x, p256_mod); + sp_256_mont_mul_8(y, y, a, p256_mod, p256_mp_mod); + sp_256_mont_dbl_8(y, y, p256_mod); + sp_256_mont_sub_8(y, y, t1, p256_mod); + } +#ifndef WOLFSSL_SP_SMALL + /* A = 3*(X^2 - W) */ + sp_256_mont_sqr_8(t1, x, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(t1, t1, w, p256_mod); + sp_256_mont_tpl_8(a, t1, p256_mod); + /* B = X*Y^2 */ + sp_256_mont_sqr_8(t1, y, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(b, t1, x, p256_mod, p256_mp_mod); + /* X = A^2 - 2B */ + sp_256_mont_sqr_8(x, a, p256_mod, p256_mp_mod); + sp_256_mont_dbl_8(t2, b, p256_mod); + sp_256_mont_sub_8(x, x, t2, p256_mod); + /* Z = Z*Y */ + sp_256_mont_mul_8(z, z, y, p256_mod, p256_mp_mod); + /* t2 = Y^4 */ + sp_256_mont_sqr_8(t1, t1, p256_mod, p256_mp_mod); + /* y = 2*A*(B - X) - Y^4 */ + sp_256_mont_sub_8(y, b, x, p256_mod); + sp_256_mont_mul_8(y, y, a, p256_mod, p256_mp_mod); + sp_256_mont_dbl_8(y, y, p256_mod); + sp_256_mont_sub_8(y, y, t1, p256_mod); +#endif + /* Y = Y/2 */ + sp_256_div2_8(y, y, p256_mod); +} + +#endif /* FP_ECC */ +/* Add two Montgomery form projective points. The second point has a q value of + * one. + * Only the first point can be the same pointer as the result point. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_256_proj_point_add_qz1_8(sp_point_256* r, const sp_point_256* p, + const sp_point_256* q, sp_digit* t) +{ + const sp_point_256* ap[2]; + sp_point_256* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*8; + sp_digit* t3 = t + 4*8; + sp_digit* t4 = t + 6*8; + sp_digit* t5 = t + 8*8; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* Check double */ + (void)sp_256_sub_8(t1, p256_mod, q->y); + sp_256_norm_8(t1); + if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & + (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) { + sp_256_proj_point_dbl_8(r, p, t); + } + else { + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point_256)); + x = rp[p->infinity | q->infinity]->x; + y = rp[p->infinity | q->infinity]->y; + z = rp[p->infinity | q->infinity]->z; + + ap[0] = p; + ap[1] = q; + for (i=0; i<8; i++) { + r->x[i] = ap[p->infinity]->x[i]; + } + for (i=0; i<8; i++) { + r->y[i] = ap[p->infinity]->y[i]; + } + for (i=0; i<8; i++) { + r->z[i] = ap[p->infinity]->z[i]; + } + r->infinity = ap[p->infinity]->infinity; + + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_8(t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t4, t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod); + /* H = U2 - X1 */ + sp_256_mont_sub_8(t2, t2, x, p256_mod); + /* R = S2 - Y1 */ + sp_256_mont_sub_8(t4, t4, y, p256_mod); + /* Z3 = H*Z1 */ + sp_256_mont_mul_8(z, z, t2, p256_mod, p256_mp_mod); + /* X3 = R^2 - H^3 - 2*X1*H^2 */ + sp_256_mont_sqr_8(t1, t4, p256_mod, p256_mp_mod); + sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t3, x, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(x, t1, t5, p256_mod); + sp_256_mont_dbl_8(t1, t3, p256_mod); + sp_256_mont_sub_8(x, x, t1, p256_mod); + /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */ + sp_256_mont_sub_8(t3, t3, x, p256_mod); + sp_256_mont_mul_8(t3, t3, t4, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t5, t5, y, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(y, t3, t5, p256_mod); + } +} + +#ifdef WOLFSSL_SP_SMALL +#ifdef FP_ECC +/* Convert the projective point to affine. + * Ordinates are in Montgomery form. + * + * a Point to convert. + * t Temporary data. + */ +static void sp_256_proj_to_affine_8(sp_point_256* a, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2 * 8; + sp_digit* tmp = t + 4 * 8; + + sp_256_mont_inv_8(t1, a->z, tmp); + + sp_256_mont_sqr_8(t2, t1, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t1, t2, t1, p256_mod, p256_mp_mod); + + sp_256_mont_mul_8(a->x, a->x, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(a->y, a->y, t1, p256_mod, p256_mp_mod); + XMEMCPY(a->z, p256_norm_mod, sizeof(p256_norm_mod)); +} + +/* Generate the pre-computed table of points for the base point. + * + * a The base point. + * table Place to store generated point data. + * tmp Temporary data. + * heap Heap to use for allocation. + */ +static int sp_256_gen_stripe_table_8(const sp_point_256* a, + sp_table_entry_256* table, sp_digit* tmp, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 td, s1d, s2d; +#endif + sp_point_256* t; + sp_point_256* s1 = NULL; + sp_point_256* s2 = NULL; + int i, j; + int err; + + (void)heap; + + err = sp_256_point_new_8(heap, td, t); + if (err == MP_OKAY) { + err = sp_256_point_new_8(heap, s1d, s1); + } + if (err == MP_OKAY) { + err = sp_256_point_new_8(heap, s2d, s2); + } + + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_8(t->x, a->x, p256_mod); + } + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_8(t->y, a->y, p256_mod); + } + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_8(t->z, a->z, p256_mod); + } + if (err == MP_OKAY) { + t->infinity = 0; + sp_256_proj_to_affine_8(t, tmp); + + XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod)); + s1->infinity = 0; + XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod)); + s2->infinity = 0; + + /* table[0] = {0, 0, infinity} */ + XMEMSET(&table[0], 0, sizeof(sp_table_entry_256)); + /* table[1] = Affine version of 'a' in Montgomery form */ + XMEMCPY(table[1].x, t->x, sizeof(table->x)); + XMEMCPY(table[1].y, t->y, sizeof(table->y)); + + for (i=1; i<4; i++) { + sp_256_proj_point_dbl_n_8(t, 64, tmp); + sp_256_proj_to_affine_8(t, tmp); + XMEMCPY(table[1<x, sizeof(table->x)); + XMEMCPY(table[1<y, sizeof(table->y)); + } + + for (i=1; i<4; i++) { + XMEMCPY(s1->x, table[1<x)); + XMEMCPY(s1->y, table[1<y)); + for (j=(1<x, table[j-(1<x)); + XMEMCPY(s2->y, table[j-(1<y)); + sp_256_proj_point_add_qz1_8(t, s1, s2, tmp); + sp_256_proj_to_affine_8(t, tmp); + XMEMCPY(table[j].x, t->x, sizeof(table->x)); + XMEMCPY(table[j].y, t->y, sizeof(table->y)); + } + } + } + + sp_256_point_free_8(s2, 0, heap); + sp_256_point_free_8(s1, 0, heap); + sp_256_point_free_8( t, 0, heap); + + return err; +} + +#endif /* FP_ECC */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_stripe_8(sp_point_256* r, const sp_point_256* g, + const sp_table_entry_256* table, const sp_digit* k, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 rtd; + sp_point_256 pd; + sp_digit td[2 * 8 * 5]; +#endif + sp_point_256* rt; + sp_point_256* p = NULL; + sp_digit* t; + int i, j; + int y, x; + int err; + + (void)g; + (void)heap; + + + err = sp_256_point_new_8(heap, rtd, rt); + if (err == MP_OKAY) { + err = sp_256_point_new_8(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) { + err = MEMORY_E; + } +#else + t = td; +#endif + + if (err == MP_OKAY) { + XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod)); + XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod)); + + y = 0; + for (j=0,x=63; j<4; j++,x+=64) { + y |= ((k[x / 32] >> (x % 32)) & 1) << j; + } + XMEMCPY(rt->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(rt->y, table[y].y, sizeof(table[y].y)); + rt->infinity = !y; + for (i=62; i>=0; i--) { + y = 0; + for (j=0,x=i; j<4; j++,x+=64) { + y |= ((k[x / 32] >> (x % 32)) & 1) << j; + } + + sp_256_proj_point_dbl_8(rt, rt, t); + XMEMCPY(p->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(p->y, table[y].y, sizeof(table[y].y)); + p->infinity = !y; + sp_256_proj_point_add_qz1_8(rt, rt, p, t); + } + + if (map != 0) { + sp_256_map_8(r, rt, t); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_256)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_8(p, 0, heap); + sp_256_point_free_8(rt, 0, heap); + + return err; +} + +#ifdef FP_ECC +#ifndef FP_ENTRIES + #define FP_ENTRIES 16 +#endif + +typedef struct sp_cache_256_t { + sp_digit x[8]; + sp_digit y[8]; + sp_table_entry_256 table[16]; + uint32_t cnt; + int set; +} sp_cache_256_t; + +static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES]; +static THREAD_LS_T int sp_cache_256_last = -1; +static THREAD_LS_T int sp_cache_256_inited = 0; + +#ifndef HAVE_THREAD_LS + static volatile int initCacheMutex_256 = 0; + static wolfSSL_Mutex sp_cache_256_lock; +#endif + +static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache) +{ + int i, j; + uint32_t least; + + if (sp_cache_256_inited == 0) { + for (i=0; ix, sp_cache_256[i].x) & + sp_256_cmp_equal_8(g->y, sp_cache_256[i].y)) { + sp_cache_256[i].cnt++; + break; + } + } + + /* No match. */ + if (i == FP_ENTRIES) { + /* Find empty entry. */ + i = (sp_cache_256_last + 1) % FP_ENTRIES; + for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) { + if (!sp_cache_256[i].set) { + break; + } + } + + /* Evict least used. */ + if (i == sp_cache_256_last) { + least = sp_cache_256[0].cnt; + for (j=1; jx, sizeof(sp_cache_256[i].x)); + XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y)); + sp_cache_256[i].set = 1; + sp_cache_256[i].cnt = 1; + } + + *cache = &sp_cache_256[i]; + sp_cache_256_last = i; +} +#endif /* FP_ECC */ + +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k, + int map, void* heap) +{ +#ifndef FP_ECC + return sp_256_ecc_mulmod_fast_8(r, g, k, map, heap); +#else + sp_digit tmp[2 * 8 * 5]; + sp_cache_256_t* cache; + int err = MP_OKAY; + +#ifndef HAVE_THREAD_LS + if (initCacheMutex_256 == 0) { + wc_InitMutex(&sp_cache_256_lock); + initCacheMutex_256 = 1; + } + if (wc_LockMutex(&sp_cache_256_lock) != 0) + err = BAD_MUTEX_E; +#endif /* HAVE_THREAD_LS */ + + if (err == MP_OKAY) { + sp_ecc_get_cache_256(g, &cache); + if (cache->cnt == 2) + sp_256_gen_stripe_table_8(g, cache->table, tmp, heap); + +#ifndef HAVE_THREAD_LS + wc_UnLockMutex(&sp_cache_256_lock); +#endif /* HAVE_THREAD_LS */ + + if (cache->cnt < 2) { + err = sp_256_ecc_mulmod_fast_8(r, g, k, map, heap); + } + else { + err = sp_256_ecc_mulmod_stripe_8(r, g, cache->table, k, + map, heap); + } + } + + return err; +#endif +} + +#else +#ifdef FP_ECC +/* Generate the pre-computed table of points for the base point. + * + * a The base point. + * table Place to store generated point data. + * tmp Temporary data. + * heap Heap to use for allocation. + */ +static int sp_256_gen_stripe_table_8(const sp_point_256* a, + sp_table_entry_256* table, sp_digit* tmp, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 td, s1d, s2d; +#endif + sp_point_256* t; + sp_point_256* s1 = NULL; + sp_point_256* s2 = NULL; + int i, j; + int err; + + (void)heap; + + err = sp_256_point_new_8(heap, td, t); + if (err == MP_OKAY) { + err = sp_256_point_new_8(heap, s1d, s1); + } + if (err == MP_OKAY) { + err = sp_256_point_new_8(heap, s2d, s2); + } + + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_8(t->x, a->x, p256_mod); + } + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_8(t->y, a->y, p256_mod); + } + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_8(t->z, a->z, p256_mod); + } + if (err == MP_OKAY) { + t->infinity = 0; + sp_256_proj_to_affine_8(t, tmp); + + XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod)); + s1->infinity = 0; + XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod)); + s2->infinity = 0; + + /* table[0] = {0, 0, infinity} */ + XMEMSET(&table[0], 0, sizeof(sp_table_entry_256)); + /* table[1] = Affine version of 'a' in Montgomery form */ + XMEMCPY(table[1].x, t->x, sizeof(table->x)); + XMEMCPY(table[1].y, t->y, sizeof(table->y)); + + for (i=1; i<8; i++) { + sp_256_proj_point_dbl_n_8(t, 32, tmp); + sp_256_proj_to_affine_8(t, tmp); + XMEMCPY(table[1<x, sizeof(table->x)); + XMEMCPY(table[1<y, sizeof(table->y)); + } + + for (i=1; i<8; i++) { + XMEMCPY(s1->x, table[1<x)); + XMEMCPY(s1->y, table[1<y)); + for (j=(1<x, table[j-(1<x)); + XMEMCPY(s2->y, table[j-(1<y)); + sp_256_proj_point_add_qz1_8(t, s1, s2, tmp); + sp_256_proj_to_affine_8(t, tmp); + XMEMCPY(table[j].x, t->x, sizeof(table->x)); + XMEMCPY(table[j].y, t->y, sizeof(table->y)); + } + } + } + + sp_256_point_free_8(s2, 0, heap); + sp_256_point_free_8(s1, 0, heap); + sp_256_point_free_8( t, 0, heap); + + return err; +} + +#endif /* FP_ECC */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_stripe_8(sp_point_256* r, const sp_point_256* g, + const sp_table_entry_256* table, const sp_digit* k, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 rtd; + sp_point_256 pd; + sp_digit td[2 * 8 * 5]; +#endif + sp_point_256* rt; + sp_point_256* p = NULL; + sp_digit* t; + int i, j; + int y, x; + int err; + + (void)g; + (void)heap; + + + err = sp_256_point_new_8(heap, rtd, rt); + if (err == MP_OKAY) { + err = sp_256_point_new_8(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) { + err = MEMORY_E; + } +#else + t = td; +#endif + + if (err == MP_OKAY) { + XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod)); + XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod)); + + y = 0; + for (j=0,x=31; j<8; j++,x+=32) { + y |= ((k[x / 32] >> (x % 32)) & 1) << j; + } + XMEMCPY(rt->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(rt->y, table[y].y, sizeof(table[y].y)); + rt->infinity = !y; + for (i=30; i>=0; i--) { + y = 0; + for (j=0,x=i; j<8; j++,x+=32) { + y |= ((k[x / 32] >> (x % 32)) & 1) << j; + } + + sp_256_proj_point_dbl_8(rt, rt, t); + XMEMCPY(p->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(p->y, table[y].y, sizeof(table[y].y)); + p->infinity = !y; + sp_256_proj_point_add_qz1_8(rt, rt, p, t); + } + + if (map != 0) { + sp_256_map_8(r, rt, t); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_256)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_8(p, 0, heap); + sp_256_point_free_8(rt, 0, heap); + + return err; +} + +#ifdef FP_ECC +#ifndef FP_ENTRIES + #define FP_ENTRIES 16 +#endif + +typedef struct sp_cache_256_t { + sp_digit x[8]; + sp_digit y[8]; + sp_table_entry_256 table[256]; + uint32_t cnt; + int set; +} sp_cache_256_t; + +static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES]; +static THREAD_LS_T int sp_cache_256_last = -1; +static THREAD_LS_T int sp_cache_256_inited = 0; + +#ifndef HAVE_THREAD_LS + static volatile int initCacheMutex_256 = 0; + static wolfSSL_Mutex sp_cache_256_lock; +#endif + +static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache) +{ + int i, j; + uint32_t least; + + if (sp_cache_256_inited == 0) { + for (i=0; ix, sp_cache_256[i].x) & + sp_256_cmp_equal_8(g->y, sp_cache_256[i].y)) { + sp_cache_256[i].cnt++; + break; + } + } + + /* No match. */ + if (i == FP_ENTRIES) { + /* Find empty entry. */ + i = (sp_cache_256_last + 1) % FP_ENTRIES; + for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) { + if (!sp_cache_256[i].set) { + break; + } + } + + /* Evict least used. */ + if (i == sp_cache_256_last) { + least = sp_cache_256[0].cnt; + for (j=1; jx, sizeof(sp_cache_256[i].x)); + XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y)); + sp_cache_256[i].set = 1; + sp_cache_256[i].cnt = 1; + } + + *cache = &sp_cache_256[i]; + sp_cache_256_last = i; +} +#endif /* FP_ECC */ + +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k, + int map, void* heap) +{ +#ifndef FP_ECC + return sp_256_ecc_mulmod_fast_8(r, g, k, map, heap); +#else + sp_digit tmp[2 * 8 * 5]; + sp_cache_256_t* cache; + int err = MP_OKAY; + +#ifndef HAVE_THREAD_LS + if (initCacheMutex_256 == 0) { + wc_InitMutex(&sp_cache_256_lock); + initCacheMutex_256 = 1; + } + if (wc_LockMutex(&sp_cache_256_lock) != 0) + err = BAD_MUTEX_E; +#endif /* HAVE_THREAD_LS */ + + if (err == MP_OKAY) { + sp_ecc_get_cache_256(g, &cache); + if (cache->cnt == 2) + sp_256_gen_stripe_table_8(g, cache->table, tmp, heap); + +#ifndef HAVE_THREAD_LS + wc_UnLockMutex(&sp_cache_256_lock); +#endif /* HAVE_THREAD_LS */ + + if (cache->cnt < 2) { + err = sp_256_ecc_mulmod_fast_8(r, g, k, map, heap); + } + else { + err = sp_256_ecc_mulmod_stripe_8(r, g, cache->table, k, + map, heap); + } + } + + return err; +#endif +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * p Point to multiply. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map, + void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 p; + sp_digit kd[8]; +#endif + sp_point_256* point; + sp_digit* k = NULL; + int err = MP_OKAY; + + err = sp_256_point_new_8(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#else + k = kd; +#endif + if (err == MP_OKAY) { + sp_256_from_mp(k, 8, km); + sp_256_point_from_ecc_point_8(point, gm); + + err = sp_256_ecc_mulmod_8(point, point, k, map, heap); + } + if (err == MP_OKAY) { + err = sp_256_point_to_ecc_point_8(point, r); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_8(point, 0, heap); + + return err; +} + +#ifdef WOLFSSL_SP_SMALL +static const sp_table_entry_256 p256_table[16] = { + /* 0 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 */ + { { 0x18a9143c,0x79e730d4,0x5fedb601,0x75ba95fc,0x77622510,0x79fb732b, + 0xa53755c6,0x18905f76 }, + { 0xce95560a,0xddf25357,0xba19e45c,0x8b4ab8e4,0xdd21f325,0xd2e88688, + 0x25885d85,0x8571ff18 } }, + /* 2 */ + { { 0x16a0d2bb,0x4f922fc5,0x1a623499,0x0d5cc16c,0x57c62c8b,0x9241cf3a, + 0xfd1b667f,0x2f5e6961 }, + { 0xf5a01797,0x5c15c70b,0x60956192,0x3d20b44d,0x071fdb52,0x04911b37, + 0x8d6f0f7b,0xf648f916 } }, + /* 3 */ + { { 0xe137bbbc,0x9e566847,0x8a6a0bec,0xe434469e,0x79d73463,0xb1c42761, + 0x133d0015,0x5abe0285 }, + { 0xc04c7dab,0x92aa837c,0x43260c07,0x573d9f4c,0x78e6cc37,0x0c931562, + 0x6b6f7383,0x94bb725b } }, + /* 4 */ + { { 0xbfe20925,0x62a8c244,0x8fdce867,0x91c19ac3,0xdd387063,0x5a96a5d5, + 0x21d324f6,0x61d587d4 }, + { 0xa37173ea,0xe87673a2,0x53778b65,0x23848008,0x05bab43e,0x10f8441e, + 0x4621efbe,0xfa11fe12 } }, + /* 5 */ + { { 0x2cb19ffd,0x1c891f2b,0xb1923c23,0x01ba8d5b,0x8ac5ca8e,0xb6d03d67, + 0x1f13bedc,0x586eb04c }, + { 0x27e8ed09,0x0c35c6e5,0x1819ede2,0x1e81a33c,0x56c652fa,0x278fd6c0, + 0x70864f11,0x19d5ac08 } }, + /* 6 */ + { { 0xd2b533d5,0x62577734,0xa1bdddc0,0x673b8af6,0xa79ec293,0x577e7c9a, + 0xc3b266b1,0xbb6de651 }, + { 0xb65259b3,0xe7e9303a,0xd03a7480,0xd6a0afd3,0x9b3cfc27,0xc5ac83d1, + 0x5d18b99b,0x60b4619a } }, + /* 7 */ + { { 0x1ae5aa1c,0xbd6a38e1,0x49e73658,0xb8b7652b,0xee5f87ed,0x0b130014, + 0xaeebffcd,0x9d0f27b2 }, + { 0x7a730a55,0xca924631,0xddbbc83a,0x9c955b2f,0xac019a71,0x07c1dfe0, + 0x356ec48d,0x244a566d } }, + /* 8 */ + { { 0xf4f8b16a,0x56f8410e,0xc47b266a,0x97241afe,0x6d9c87c1,0x0a406b8e, + 0xcd42ab1b,0x803f3e02 }, + { 0x04dbec69,0x7f0309a8,0x3bbad05f,0xa83b85f7,0xad8e197f,0xc6097273, + 0x5067adc1,0xc097440e } }, + /* 9 */ + { { 0xc379ab34,0x846a56f2,0x841df8d1,0xa8ee068b,0x176c68ef,0x20314459, + 0x915f1f30,0xf1af32d5 }, + { 0x5d75bd50,0x99c37531,0xf72f67bc,0x837cffba,0x48d7723f,0x0613a418, + 0xe2d41c8b,0x23d0f130 } }, + /* 10 */ + { { 0xd5be5a2b,0xed93e225,0x5934f3c6,0x6fe79983,0x22626ffc,0x43140926, + 0x7990216a,0x50bbb4d9 }, + { 0xe57ec63e,0x378191c6,0x181dcdb2,0x65422c40,0x0236e0f6,0x41a8099b, + 0x01fe49c3,0x2b100118 } }, + /* 11 */ + { { 0x9b391593,0xfc68b5c5,0x598270fc,0xc385f5a2,0xd19adcbb,0x7144f3aa, + 0x83fbae0c,0xdd558999 }, + { 0x74b82ff4,0x93b88b8e,0x71e734c9,0xd2e03c40,0x43c0322a,0x9a7a9eaf, + 0x149d6041,0xe6e4c551 } }, + /* 12 */ + { { 0x80ec21fe,0x5fe14bfe,0xc255be82,0xf6ce116a,0x2f4a5d67,0x98bc5a07, + 0xdb7e63af,0xfad27148 }, + { 0x29ab05b3,0x90c0b6ac,0x4e251ae6,0x37a9a83c,0xc2aade7d,0x0a7dc875, + 0x9f0e1a84,0x77387de3 } }, + /* 13 */ + { { 0xa56c0dd7,0x1e9ecc49,0x46086c74,0xa5cffcd8,0xf505aece,0x8f7a1408, + 0xbef0c47e,0xb37b85c0 }, + { 0xcc0e6a8f,0x3596b6e4,0x6b388f23,0xfd6d4bbf,0xc39cef4e,0xaba453fa, + 0xf9f628d5,0x9c135ac8 } }, + /* 14 */ + { { 0x95c8f8be,0x0a1c7294,0x3bf362bf,0x2961c480,0xdf63d4ac,0x9e418403, + 0x91ece900,0xc109f9cb }, + { 0x58945705,0xc2d095d0,0xddeb85c0,0xb9083d96,0x7a40449b,0x84692b8d, + 0x2eee1ee1,0x9bc3344f } }, + /* 15 */ + { { 0x42913074,0x0d5ae356,0x48a542b1,0x55491b27,0xb310732a,0x469ca665, + 0x5f1a4cc1,0x29591d52 }, + { 0xb84f983f,0xe76f5b6b,0x9f5f84e1,0xbe7eef41,0x80baa189,0x1200d496, + 0x18ef332c,0x6376551f } }, +}; + +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_base_8(sp_point_256* r, const sp_digit* k, + int map, void* heap) +{ + return sp_256_ecc_mulmod_stripe_8(r, &p256_base, p256_table, + k, map, heap); +} + +#else +static const sp_table_entry_256 p256_table[256] = { + /* 0 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 */ + { { 0x18a9143c,0x79e730d4,0x5fedb601,0x75ba95fc,0x77622510,0x79fb732b, + 0xa53755c6,0x18905f76 }, + { 0xce95560a,0xddf25357,0xba19e45c,0x8b4ab8e4,0xdd21f325,0xd2e88688, + 0x25885d85,0x8571ff18 } }, + /* 2 */ + { { 0x4147519a,0x20288602,0x26b372f0,0xd0981eac,0xa785ebc8,0xa9d4a7ca, + 0xdbdf58e9,0xd953c50d }, + { 0xfd590f8f,0x9d6361cc,0x44e6c917,0x72e9626b,0x22eb64cf,0x7fd96110, + 0x9eb288f3,0x863ebb7e } }, + /* 3 */ + { { 0x5cdb6485,0x7856b623,0x2f0a2f97,0x808f0ea2,0x4f7e300b,0x3e68d954, + 0xb5ff80a0,0x00076055 }, + { 0x838d2010,0x7634eb9b,0x3243708a,0x54014fbb,0x842a6606,0xe0e47d39, + 0x34373ee0,0x83087761 } }, + /* 4 */ + { { 0x16a0d2bb,0x4f922fc5,0x1a623499,0x0d5cc16c,0x57c62c8b,0x9241cf3a, + 0xfd1b667f,0x2f5e6961 }, + { 0xf5a01797,0x5c15c70b,0x60956192,0x3d20b44d,0x071fdb52,0x04911b37, + 0x8d6f0f7b,0xf648f916 } }, + /* 5 */ + { { 0xe137bbbc,0x9e566847,0x8a6a0bec,0xe434469e,0x79d73463,0xb1c42761, + 0x133d0015,0x5abe0285 }, + { 0xc04c7dab,0x92aa837c,0x43260c07,0x573d9f4c,0x78e6cc37,0x0c931562, + 0x6b6f7383,0x94bb725b } }, + /* 6 */ + { { 0x720f141c,0xbbf9b48f,0x2df5bc74,0x6199b3cd,0x411045c4,0xdc3f6129, + 0x2f7dc4ef,0xcdd6bbcb }, + { 0xeaf436fd,0xcca6700b,0xb99326be,0x6f647f6d,0x014f2522,0x0c0fa792, + 0x4bdae5f6,0xa361bebd } }, + /* 7 */ + { { 0x597c13c7,0x28aa2558,0x50b7c3e1,0xc38d635f,0xf3c09d1d,0x07039aec, + 0xc4b5292c,0xba12ca09 }, + { 0x59f91dfd,0x9e408fa4,0xceea07fb,0x3af43b66,0x9d780b29,0x1eceb089, + 0x701fef4b,0x53ebb99d } }, + /* 8 */ + { { 0xb0e63d34,0x4fe7ee31,0xa9e54fab,0xf4600572,0xd5e7b5a4,0xc0493334, + 0x06d54831,0x8589fb92 }, + { 0x6583553a,0xaa70f5cc,0xe25649e5,0x0879094a,0x10044652,0xcc904507, + 0x02541c4f,0xebb0696d } }, + /* 9 */ + { { 0xac1647c5,0x4616ca15,0xc4cf5799,0xb8127d47,0x764dfbac,0xdc666aa3, + 0xd1b27da3,0xeb2820cb }, + { 0x6a87e008,0x9406f8d8,0x922378f3,0xd87dfa9d,0x80ccecb2,0x56ed2e42, + 0x55a7da1d,0x1f28289b } }, + /* 10 */ + { { 0x3b89da99,0xabbaa0c0,0xb8284022,0xa6f2d79e,0xb81c05e8,0x27847862, + 0x05e54d63,0x337a4b59 }, + { 0x21f7794a,0x3c67500d,0x7d6d7f61,0x207005b7,0x04cfd6e8,0x0a5a3781, + 0xf4c2fbd6,0x0d65e0d5 } }, + /* 11 */ + { { 0xb5275d38,0xd9d09bbe,0x0be0a358,0x4268a745,0x973eb265,0xf0762ff4, + 0x52f4a232,0xc23da242 }, + { 0x0b94520c,0x5da1b84f,0xb05bd78e,0x09666763,0x94d29ea1,0x3a4dcb86, + 0xc790cff1,0x19de3b8c } }, + /* 12 */ + { { 0x26c5fe04,0x183a716c,0x3bba1bdb,0x3b28de0b,0xa4cb712c,0x7432c586, + 0x91fccbfd,0xe34dcbd4 }, + { 0xaaa58403,0xb408d46b,0x82e97a53,0x9a697486,0x36aaa8af,0x9e390127, + 0x7b4e0f7f,0xe7641f44 } }, + /* 13 */ + { { 0xdf64ba59,0x7d753941,0x0b0242fc,0xd33f10ec,0xa1581859,0x4f06dfc6, + 0x052a57bf,0x4a12df57 }, + { 0x9439dbd0,0xbfa6338f,0xbde53e1f,0xd3c24bd4,0x21f1b314,0xfd5e4ffa, + 0xbb5bea46,0x6af5aa93 } }, + /* 14 */ + { { 0x10c91999,0xda10b699,0x2a580491,0x0a24b440,0xb8cc2090,0x3e0094b4, + 0x66a44013,0x5fe3475a }, + { 0xf93e7b4b,0xb0f8cabd,0x7c23f91a,0x292b501a,0xcd1e6263,0x42e889ae, + 0xecfea916,0xb544e308 } }, + /* 15 */ + { { 0x16ddfdce,0x6478c6e9,0xf89179e6,0x2c329166,0x4d4e67e1,0x4e8d6e76, + 0xa6b0c20b,0xe0b6b2bd }, + { 0xbb7efb57,0x0d312df2,0x790c4007,0x1aac0dde,0x679bc944,0xf90336ad, + 0x25a63774,0x71c023de } }, + /* 16 */ + { { 0xbfe20925,0x62a8c244,0x8fdce867,0x91c19ac3,0xdd387063,0x5a96a5d5, + 0x21d324f6,0x61d587d4 }, + { 0xa37173ea,0xe87673a2,0x53778b65,0x23848008,0x05bab43e,0x10f8441e, + 0x4621efbe,0xfa11fe12 } }, + /* 17 */ + { { 0x2cb19ffd,0x1c891f2b,0xb1923c23,0x01ba8d5b,0x8ac5ca8e,0xb6d03d67, + 0x1f13bedc,0x586eb04c }, + { 0x27e8ed09,0x0c35c6e5,0x1819ede2,0x1e81a33c,0x56c652fa,0x278fd6c0, + 0x70864f11,0x19d5ac08 } }, + /* 18 */ + { { 0x309a4e1f,0x1e99f581,0xe9270074,0xab7de71b,0xefd28d20,0x26a5ef0b, + 0x7f9c563f,0xe7c0073f }, + { 0x0ef59f76,0x1f6d663a,0x20fcb050,0x669b3b54,0x7a6602d4,0xc08c1f7a, + 0xc65b3c0a,0xe08504fe } }, + /* 19 */ + { { 0xa031b3ca,0xf098f68d,0xe6da6d66,0x6d1cab9e,0x94f246e8,0x5bfd81fa, + 0x5b0996b4,0x78f01882 }, + { 0x3a25787f,0xb7eefde4,0x1dccac9b,0x8016f80d,0xb35bfc36,0x0cea4877, + 0x7e94747a,0x43a773b8 } }, + /* 20 */ + { { 0xd2b533d5,0x62577734,0xa1bdddc0,0x673b8af6,0xa79ec293,0x577e7c9a, + 0xc3b266b1,0xbb6de651 }, + { 0xb65259b3,0xe7e9303a,0xd03a7480,0xd6a0afd3,0x9b3cfc27,0xc5ac83d1, + 0x5d18b99b,0x60b4619a } }, + /* 21 */ + { { 0x1ae5aa1c,0xbd6a38e1,0x49e73658,0xb8b7652b,0xee5f87ed,0x0b130014, + 0xaeebffcd,0x9d0f27b2 }, + { 0x7a730a55,0xca924631,0xddbbc83a,0x9c955b2f,0xac019a71,0x07c1dfe0, + 0x356ec48d,0x244a566d } }, + /* 22 */ + { { 0xeacf1f96,0x6db0394a,0x024c271c,0x9f2122a9,0x82cbd3b9,0x2626ac1b, + 0x3581ef69,0x45e58c87 }, + { 0xa38f9dbc,0xd3ff479d,0xe888a040,0xa8aaf146,0x46e0bed7,0x945adfb2, + 0xc1e4b7a4,0xc040e21c } }, + /* 23 */ + { { 0x6f8117b6,0x847af000,0x73a35433,0x651969ff,0x1d9475eb,0x482b3576, + 0x682c6ec7,0x1cdf5c97 }, + { 0x11f04839,0x7db775b4,0x48de1698,0x7dbeacf4,0xb70b3219,0xb2921dd1, + 0xa92dff3d,0x046755f8 } }, + /* 24 */ + { { 0xbce8ffcd,0xcc8ac5d2,0x2fe61a82,0x0d53c48b,0x7202d6c7,0xf6f16172, + 0x3b83a5f3,0x046e5e11 }, + { 0xd8007f01,0xe7b8ff64,0x5af43183,0x7fb1ef12,0x35e1a03c,0x045c5ea6, + 0x303d005b,0x6e0106c3 } }, + /* 25 */ + { { 0x88dd73b1,0x48c73584,0x995ed0d9,0x7670708f,0xc56a2ab7,0x38385ea8, + 0xe901cf1f,0x442594ed }, + { 0x12d4b65b,0xf8faa2c9,0x96c90c37,0x94c2343b,0x5e978d1f,0xd326e4a1, + 0x4c2ee68e,0xa796fa51 } }, + /* 26 */ + { { 0x823addd7,0x359fb604,0xe56693b3,0x9e2a6183,0x3cbf3c80,0xf885b78e, + 0xc69766e9,0xe4ad2da9 }, + { 0x8e048a61,0x357f7f42,0xc092d9a0,0x082d198c,0xc03ed8ef,0xfc3a1af4, + 0xc37b5143,0xc5e94046 } }, + /* 27 */ + { { 0x2be75f9e,0x476a538c,0xcb123a78,0x6fd1a9e8,0xb109c04b,0xd85e4df0, + 0xdb464747,0x63283daf }, + { 0xbaf2df15,0xce728cf7,0x0ad9a7f4,0xe592c455,0xe834bcc3,0xfab226ad, + 0x1981a938,0x68bd19ab } }, + /* 28 */ + { { 0x1887d659,0xc08ead51,0xb359305a,0x3374d5f4,0xcfe74fe3,0x96986981, + 0x3c6fdfd6,0x495292f5 }, + { 0x1acec896,0x4a878c9e,0xec5b4484,0xd964b210,0x664d60a7,0x6696f7e2, + 0x26036837,0x0ec7530d } }, + /* 29 */ + { { 0xad2687bb,0x2da13a05,0xf32e21fa,0xa1f83b6a,0x1dd4607b,0x390f5ef5, + 0x64863f0b,0x0f6207a6 }, + { 0x0f138233,0xbd67e3bb,0x272aa718,0xdd66b96c,0x26ec88ae,0x8ed00407, + 0x08ed6dcf,0xff0db072 } }, + /* 30 */ + { { 0x4c95d553,0x749fa101,0x5d680a8a,0xa44052fd,0xff3b566f,0x183b4317, + 0x88740ea3,0x313b513c }, + { 0x08d11549,0xb402e2ac,0xb4dee21c,0x071ee10b,0x47f2320e,0x26b987dd, + 0x86f19f81,0x2d3abcf9 } }, + /* 31 */ + { { 0x815581a2,0x4c288501,0x632211af,0x9a0a6d56,0x0cab2e99,0x19ba7a0f, + 0xded98cdf,0xc036fa10 }, + { 0xc1fbd009,0x29ae08ba,0x06d15816,0x0b68b190,0x9b9e0d8f,0xc2eb3277, + 0xb6d40194,0xa6b2a2c4 } }, + /* 32 */ + { { 0x6d3549cf,0xd433e50f,0xfacd665e,0x6f33696f,0xce11fcb4,0x695bfdac, + 0xaf7c9860,0x810ee252 }, + { 0x7159bb2c,0x65450fe1,0x758b357b,0xf7dfbebe,0xd69fea72,0x2b057e74, + 0x92731745,0xd485717a } }, + /* 33 */ + { { 0xf0cb5a98,0x11741a8a,0x1f3110bf,0xd3da8f93,0xab382adf,0x1994e2cb, + 0x2f9a604e,0x6a6045a7 }, + { 0xa2b2411d,0x170c0d3f,0x510e96e0,0xbe0eb83e,0x8865b3cc,0x3bcc9f73, + 0xf9e15790,0xd3e45cfa } }, + /* 34 */ + { { 0xe83f7669,0xce1f69bb,0x72877d6b,0x09f8ae82,0x3244278d,0x9548ae54, + 0xe3c2c19c,0x207755de }, + { 0x6fef1945,0x87bd61d9,0xb12d28c3,0x18813cef,0x72df64aa,0x9fbcd1d6, + 0x7154b00d,0x48dc5ee5 } }, + /* 35 */ + { { 0xf7e5a199,0x123790bf,0x989ccbb7,0xe0efb8cf,0x0a519c79,0xc27a2bfe, + 0xdff6f445,0xf2fb0aed }, + { 0xf0b5025f,0x41c09575,0x40fa9f22,0x550543d7,0x380bfbd0,0x8fa3c8ad, + 0xdb28d525,0xa13e9015 } }, + /* 36 */ + { { 0xa2b65cbc,0xf9f7a350,0x2a464226,0x0b04b972,0xe23f07a1,0x265ce241, + 0x1497526f,0x2bf0d6b0 }, + { 0x4b216fb7,0xd3d4dd3f,0xfbdda26a,0xf7d7b867,0x6708505c,0xaeb7b83f, + 0x162fe89f,0x42a94a5a } }, + /* 37 */ + { { 0xeaadf191,0x5846ad0b,0x25a268d7,0x0f8a4890,0x494dc1f6,0xe8603050, + 0xc65ede3d,0x2c2dd969 }, + { 0x93849c17,0x6d02171d,0x1da250dd,0x460488ba,0x3c3a5485,0x4810c706, + 0x42c56dbc,0xf437fa1f } }, + /* 38 */ + { { 0x4a0f7dab,0x6aa0d714,0x1776e9ac,0x0f049793,0xf5f39786,0x52c0a050, + 0x54707aa8,0xaaf45b33 }, + { 0xc18d364a,0x85e37c33,0x3e497165,0xd40b9b06,0x15ec5444,0xf4171681, + 0xf4f272bc,0xcdf6310d } }, + /* 39 */ + { { 0x8ea8b7ef,0x7473c623,0x85bc2287,0x08e93518,0x2bda8e34,0x41956772, + 0xda9e2ff2,0xf0d008ba }, + { 0x2414d3b1,0x2912671d,0xb019ea76,0xb3754985,0x453bcbdb,0x5c61b96d, + 0xca887b8b,0x5bd5c2f5 } }, + /* 40 */ + { { 0xf49a3154,0xef0f469e,0x6e2b2e9a,0x3e85a595,0xaa924a9c,0x45aaec1e, + 0xa09e4719,0xaa12dfc8 }, + { 0x4df69f1d,0x26f27227,0xa2ff5e73,0xe0e4c82c,0xb7a9dd44,0xb9d8ce73, + 0xe48ca901,0x6c036e73 } }, + /* 41 */ + { { 0x0f6e3138,0x5cfae12a,0x25ad345a,0x6966ef00,0x45672bc5,0x8993c64b, + 0x96afbe24,0x292ff658 }, + { 0x5e213402,0xd5250d44,0x4392c9fe,0xf6580e27,0xda1c72e8,0x097b397f, + 0x311b7276,0x644e0c90 } }, + /* 42 */ + { { 0xa47153f0,0xe1e421e1,0x920418c9,0xb86c3b79,0x705d7672,0x93bdce87, + 0xcab79a77,0xf25ae793 }, + { 0x6d869d0c,0x1f3194a3,0x4986c264,0x9d55c882,0x096e945e,0x49fb5ea3, + 0x13db0a3e,0x39b8e653 } }, + /* 43 */ + { { 0xb6fd2e59,0x37754200,0x9255c98f,0x35e2c066,0x0e2a5739,0xd9dab21a, + 0x0f19db06,0x39122f2f }, + { 0x03cad53c,0xcfbce1e0,0xe65c17e3,0x225b2c0f,0x9aa13877,0x72baf1d2, + 0xce80ff8d,0x8de80af8 } }, + /* 44 */ + { { 0x207bbb76,0xafbea8d9,0x21782758,0x921c7e7c,0x1c0436b1,0xdfa2b74b, + 0x2e368c04,0x87194906 }, + { 0xa3993df5,0xb5f928bb,0xf3b3d26a,0x639d75b5,0x85b55050,0x011aa78a, + 0x5b74fde1,0xfc315e6a } }, + /* 45 */ + { { 0xe8d6ecfa,0x561fd41a,0x1aec7f86,0x5f8c44f6,0x4924741d,0x98452a7b, + 0xee389088,0xe6d4a7ad }, + { 0x4593c75d,0x60552ed1,0xdd271162,0x70a70da4,0x7ba2c7db,0xd2aede93, + 0x9be2ae57,0x35dfaf9a } }, + /* 46 */ + { { 0xaa736636,0x6b956fcd,0xae2cab7e,0x09f51d97,0x0f349966,0xfb10bf41, + 0x1c830d2b,0x1da5c7d7 }, + { 0x3cce6825,0x5c41e483,0xf9573c3b,0x15ad118f,0xf23036b8,0xa28552c7, + 0xdbf4b9d6,0x7077c0fd } }, + /* 47 */ + { { 0x46b9661c,0xbf63ff8d,0x0d2cfd71,0xa1dfd36b,0xa847f8f7,0x0373e140, + 0xe50efe44,0x53a8632e }, + { 0x696d8051,0x0976ff68,0xc74f468a,0xdaec0c95,0x5e4e26bd,0x62994dc3, + 0x34e1fcc1,0x028ca76d } }, + /* 48 */ + { { 0xfc9877ee,0xd11d47dc,0x801d0002,0xc8b36210,0x54c260b6,0xd002c117, + 0x6962f046,0x04c17cd8 }, + { 0xb0daddf5,0x6d9bd094,0x24ce55c0,0xbea23575,0x72da03b5,0x663356e6, + 0xfed97474,0xf7ba4de9 } }, + /* 49 */ + { { 0xebe1263f,0xd0dbfa34,0x71ae7ce6,0x55763735,0x82a6f523,0xd2440553, + 0x52131c41,0xe31f9600 }, + { 0xea6b6ec6,0xd1bb9216,0x73c2fc44,0x37a1d12e,0x89d0a294,0xc10e7eac, + 0xce34d47b,0xaa3a6259 } }, + /* 50 */ + { { 0x36f3dcd3,0xfbcf9df5,0xd2bf7360,0x6ceded50,0xdf504f5b,0x491710fa, + 0x7e79daee,0x2398dd62 }, + { 0x6d09569e,0xcf4705a3,0x5149f769,0xea0619bb,0x35f6034c,0xff9c0377, + 0x1c046210,0x5717f5b2 } }, + /* 51 */ + { { 0x21dd895e,0x9fe229c9,0x40c28451,0x8e518500,0x1d637ecd,0xfa13d239, + 0x0e3c28de,0x660a2c56 }, + { 0xd67fcbd0,0x9cca88ae,0x0ea9f096,0xc8472478,0x72e92b4d,0x32b2f481, + 0x4f522453,0x624ee54c } }, + /* 52 */ + { { 0xd897eccc,0x09549ce4,0x3f9880aa,0x4d49d1d9,0x043a7c20,0x723c2423, + 0x92bdfbc0,0x4f392afb }, + { 0x7de44fd9,0x6969f8fa,0x57b32156,0xb66cfbe4,0x368ebc3c,0xdb2fa803, + 0xccdb399c,0x8a3e7977 } }, + /* 53 */ + { { 0x06c4b125,0xdde1881f,0xf6e3ca8c,0xae34e300,0x5c7a13e9,0xef6999de, + 0x70c24404,0x3888d023 }, + { 0x44f91081,0x76280356,0x5f015504,0x3d9fcf61,0x632cd36e,0x1827edc8, + 0x18102336,0xa5e62e47 } }, + /* 54 */ + { { 0x2facd6c8,0x1a825ee3,0x54bcbc66,0x699c6354,0x98df9931,0x0ce3edf7, + 0x466a5adc,0x2c4768e6 }, + { 0x90a64bc9,0xb346ff8c,0xe4779f5c,0x630a6020,0xbc05e884,0xd949d064, + 0xf9e652a0,0x7b5e6441 } }, + /* 55 */ + { { 0x1d28444a,0x2169422c,0xbe136a39,0xe996c5d8,0xfb0c7fce,0x2387afe5, + 0x0c8d744a,0xb8af73cb }, + { 0x338b86fd,0x5fde83aa,0xa58a5cff,0xfee3f158,0x20ac9433,0xc9ee8f6f, + 0x7f3f0895,0xa036395f } }, + /* 56 */ + { { 0xa10f7770,0x8c73c6bb,0xa12a0e24,0xa6f16d81,0x51bc2b9f,0x100df682, + 0x875fb533,0x4be36b01 }, + { 0x9fb56dbb,0x9226086e,0x07e7a4f8,0x306fef8b,0x66d52f20,0xeeaccc05, + 0x1bdc00c0,0x8cbc9a87 } }, + /* 57 */ + { { 0xc0dac4ab,0xe131895c,0x712ff112,0xa874a440,0x6a1cee57,0x6332ae7c, + 0x0c0835f8,0x44e7553e }, + { 0x7734002d,0x6d503fff,0x0b34425c,0x9d35cb8b,0x0e8738b5,0x95f70276, + 0x5eb8fc18,0x470a683a } }, + /* 58 */ + { { 0x90513482,0x81b761dc,0x01e9276a,0x0287202a,0x0ce73083,0xcda441ee, + 0xc63dc6ef,0x16410690 }, + { 0x6d06a2ed,0xf5034a06,0x189b100b,0xdd4d7745,0xab8218c9,0xd914ae72, + 0x7abcbb4f,0xd73479fd } }, + /* 59 */ + { { 0x5ad4c6e5,0x7edefb16,0x5b06d04d,0x262cf08f,0x8575cb14,0x12ed5bb1, + 0x0771666b,0x816469e3 }, + { 0x561e291e,0xd7ab9d79,0xc1de1661,0xeb9daf22,0x135e0513,0xf49827eb, + 0xf0dd3f9c,0x0a36dd23 } }, + /* 60 */ + { { 0x41d5533c,0x098d32c7,0x8684628f,0x7c5f5a9e,0xe349bd11,0x39a228ad, + 0xfdbab118,0xe331dfd6 }, + { 0x6bcc6ed8,0x5100ab68,0xef7a260e,0x7160c3bd,0xbce850d7,0x9063d9a7, + 0x492e3389,0xd3b4782a } }, + /* 61 */ + { { 0xf3821f90,0xa149b6e8,0x66eb7aad,0x92edd9ed,0x1a013116,0x0bb66953, + 0x4c86a5bd,0x7281275a }, + { 0xd3ff47e5,0x503858f7,0x61016441,0x5e1616bc,0x7dfd9bb1,0x62b0f11a, + 0xce145059,0x2c062e7e } }, + /* 62 */ + { { 0x0159ac2e,0xa76f996f,0xcbdb2713,0x281e7736,0x08e46047,0x2ad6d288, + 0x2c4e7ef1,0x282a35f9 }, + { 0xc0ce5cd2,0x9c354b1e,0x1379c229,0xcf99efc9,0x3e82c11e,0x992caf38, + 0x554d2abd,0xc71cd513 } }, + /* 63 */ + { { 0x09b578f4,0x4885de9c,0xe3affa7a,0x1884e258,0x59182f1f,0x8f76b1b7, + 0xcf47f3a3,0xc50f6740 }, + { 0x374b68ea,0xa9c4adf3,0x69965fe2,0xa406f323,0x85a53050,0x2f86a222, + 0x212958dc,0xb9ecb3a7 } }, + /* 64 */ + { { 0xf4f8b16a,0x56f8410e,0xc47b266a,0x97241afe,0x6d9c87c1,0x0a406b8e, + 0xcd42ab1b,0x803f3e02 }, + { 0x04dbec69,0x7f0309a8,0x3bbad05f,0xa83b85f7,0xad8e197f,0xc6097273, + 0x5067adc1,0xc097440e } }, + /* 65 */ + { { 0xc379ab34,0x846a56f2,0x841df8d1,0xa8ee068b,0x176c68ef,0x20314459, + 0x915f1f30,0xf1af32d5 }, + { 0x5d75bd50,0x99c37531,0xf72f67bc,0x837cffba,0x48d7723f,0x0613a418, + 0xe2d41c8b,0x23d0f130 } }, + /* 66 */ + { { 0xf41500d9,0x857ab6ed,0xfcbeada8,0x0d890ae5,0x89725951,0x52fe8648, + 0xc0a3fadd,0xb0288dd6 }, + { 0x650bcb08,0x85320f30,0x695d6e16,0x71af6313,0xb989aa76,0x31f520a7, + 0xf408c8d2,0xffd3724f } }, + /* 67 */ + { { 0xb458e6cb,0x53968e64,0x317a5d28,0x992dad20,0x7aa75f56,0x3814ae0b, + 0xd78c26df,0xf5590f4a }, + { 0xcf0ba55a,0x0fc24bd3,0x0c778bae,0x0fc4724a,0x683b674a,0x1ce9864f, + 0xf6f74a20,0x18d6da54 } }, + /* 68 */ + { { 0xd5be5a2b,0xed93e225,0x5934f3c6,0x6fe79983,0x22626ffc,0x43140926, + 0x7990216a,0x50bbb4d9 }, + { 0xe57ec63e,0x378191c6,0x181dcdb2,0x65422c40,0x0236e0f6,0x41a8099b, + 0x01fe49c3,0x2b100118 } }, + /* 69 */ + { { 0x9b391593,0xfc68b5c5,0x598270fc,0xc385f5a2,0xd19adcbb,0x7144f3aa, + 0x83fbae0c,0xdd558999 }, + { 0x74b82ff4,0x93b88b8e,0x71e734c9,0xd2e03c40,0x43c0322a,0x9a7a9eaf, + 0x149d6041,0xe6e4c551 } }, + /* 70 */ + { { 0x1e9af288,0x55f655bb,0xf7ada931,0x647e1a64,0xcb2820e5,0x43697e4b, + 0x07ed56ff,0x51e00db1 }, + { 0x771c327e,0x43d169b8,0x4a96c2ad,0x29cdb20b,0x3deb4779,0xc07d51f5, + 0x49829177,0xe22f4241 } }, + /* 71 */ + { { 0x635f1abb,0xcd45e8f4,0x68538874,0x7edc0cb5,0xb5a8034d,0xc9472c1f, + 0x52dc48c9,0xf709373d }, + { 0xa8af30d6,0x401966bb,0xf137b69c,0x95bf5f4a,0x9361c47e,0x3966162a, + 0xe7275b11,0xbd52d288 } }, + /* 72 */ + { { 0x9c5fa877,0xab155c7a,0x7d3a3d48,0x17dad672,0x73d189d8,0x43f43f9e, + 0xc8aa77a6,0xa0d0f8e4 }, + { 0xcc94f92d,0x0bbeafd8,0x0c4ddb3a,0xd818c8be,0xb82eba14,0x22cc65f8, + 0x946d6a00,0xa56c78c7 } }, + /* 73 */ + { { 0x0dd09529,0x2962391b,0x3daddfcf,0x803e0ea6,0x5b5bf481,0x2c77351f, + 0x731a367a,0xd8befdf8 }, + { 0xfc0157f4,0xab919d42,0xfec8e650,0xf51caed7,0x02d48b0a,0xcdf9cb40, + 0xce9f6478,0x854a68a5 } }, + /* 74 */ + { { 0x63506ea5,0xdc35f67b,0xa4fe0d66,0x9286c489,0xfe95cd4d,0x3f101d3b, + 0x98846a95,0x5cacea0b }, + { 0x9ceac44d,0xa90df60c,0x354d1c3a,0x3db29af4,0xad5dbabe,0x08dd3de8, + 0x35e4efa9,0xe4982d12 } }, + /* 75 */ + { { 0xc34cd55e,0x23104a22,0x2680d132,0x58695bb3,0x1fa1d943,0xfb345afa, + 0x16b20499,0x8046b7f6 }, + { 0x38e7d098,0xb533581e,0xf46f0b70,0xd7f61e8d,0x44cb78c4,0x30dea9ea, + 0x9082af55,0xeb17ca7b } }, + /* 76 */ + { { 0x76a145b9,0x1751b598,0xc1bc71ec,0xa5cf6b0f,0x392715bb,0xd3e03565, + 0xfab5e131,0x097b00ba }, + { 0x565f69e1,0xaa66c8e9,0xb5be5199,0x77e8f75a,0xda4fd984,0x6033ba11, + 0xafdbcc9e,0xf95c747b } }, + /* 77 */ + { { 0xbebae45e,0x558f01d3,0xc4bc6955,0xa8ebe9f0,0xdbc64fc6,0xaeb705b1, + 0x566ed837,0x3512601e }, + { 0xfa1161cd,0x9336f1e1,0x4c65ef87,0x328ab8d5,0x724f21e5,0x4757eee2, + 0x6068ab6b,0x0ef97123 } }, + /* 78 */ + { { 0x54ca4226,0x02598cf7,0xf8642c8e,0x5eede138,0x468e1790,0x48963f74, + 0x3b4fbc95,0xfc16d933 }, + { 0xe7c800ca,0xbe96fb31,0x2678adaa,0x13806331,0x6ff3e8b5,0x3d624497, + 0xb95d7a17,0x14ca4af1 } }, + /* 79 */ + { { 0xbd2f81d5,0x7a4771ba,0x01f7d196,0x1a5f9d69,0xcad9c907,0xd898bef7, + 0xf59c231d,0x4057b063 }, + { 0x89c05c0a,0xbffd82fe,0x1dc0df85,0xe4911c6f,0xa35a16db,0x3befccae, + 0xf1330b13,0x1c3b5d64 } }, + /* 80 */ + { { 0x80ec21fe,0x5fe14bfe,0xc255be82,0xf6ce116a,0x2f4a5d67,0x98bc5a07, + 0xdb7e63af,0xfad27148 }, + { 0x29ab05b3,0x90c0b6ac,0x4e251ae6,0x37a9a83c,0xc2aade7d,0x0a7dc875, + 0x9f0e1a84,0x77387de3 } }, + /* 81 */ + { { 0xa56c0dd7,0x1e9ecc49,0x46086c74,0xa5cffcd8,0xf505aece,0x8f7a1408, + 0xbef0c47e,0xb37b85c0 }, + { 0xcc0e6a8f,0x3596b6e4,0x6b388f23,0xfd6d4bbf,0xc39cef4e,0xaba453fa, + 0xf9f628d5,0x9c135ac8 } }, + /* 82 */ + { { 0x84e35743,0x32aa3202,0x85a3cdef,0x320d6ab1,0x1df19819,0xb821b176, + 0xc433851f,0x5721361f }, + { 0x71fc9168,0x1f0db36a,0x5e5c403c,0x5f98ba73,0x37bcd8f5,0xf64ca87e, + 0xe6bb11bd,0xdcbac3c9 } }, + /* 83 */ + { { 0x4518cbe2,0xf01d9968,0x9c9eb04e,0xd242fc18,0xe47feebf,0x727663c7, + 0x2d626862,0xb8c1c89e }, + { 0xc8e1d569,0x51a58bdd,0xb7d88cd0,0x563809c8,0xf11f31eb,0x26c27fd9, + 0x2f9422d4,0x5d23bbda } }, + /* 84 */ + { { 0x95c8f8be,0x0a1c7294,0x3bf362bf,0x2961c480,0xdf63d4ac,0x9e418403, + 0x91ece900,0xc109f9cb }, + { 0x58945705,0xc2d095d0,0xddeb85c0,0xb9083d96,0x7a40449b,0x84692b8d, + 0x2eee1ee1,0x9bc3344f } }, + /* 85 */ + { { 0x42913074,0x0d5ae356,0x48a542b1,0x55491b27,0xb310732a,0x469ca665, + 0x5f1a4cc1,0x29591d52 }, + { 0xb84f983f,0xe76f5b6b,0x9f5f84e1,0xbe7eef41,0x80baa189,0x1200d496, + 0x18ef332c,0x6376551f } }, + /* 86 */ + { { 0x562976cc,0xbda5f14e,0x0ef12c38,0x22bca3e6,0x6cca9852,0xbbfa3064, + 0x08e2987a,0xbdb79dc8 }, + { 0xcb06a772,0xfd2cb5c9,0xfe536dce,0x38f475aa,0x7c2b5db8,0xc2a3e022, + 0xadd3c14a,0x8ee86001 } }, + /* 87 */ + { { 0xa4ade873,0xcbe96981,0xc4fba48c,0x7ee9aa4d,0x5a054ba5,0x2cee2899, + 0x6f77aa4b,0x92e51d7a }, + { 0x7190a34d,0x948bafa8,0xf6bd1ed1,0xd698f75b,0x0caf1144,0xd00ee6e3, + 0x0a56aaaa,0x5182f86f } }, + /* 88 */ + { { 0x7a4cc99c,0xfba6212c,0x3e6d9ca1,0xff609b68,0x5ac98c5a,0x5dbb27cb, + 0x4073a6f2,0x91dcab5d }, + { 0x5f575a70,0x01b6cc3d,0x6f8d87fa,0x0cb36139,0x89981736,0x165d4e8c, + 0x97974f2b,0x17a0cedb } }, + /* 89 */ + { { 0x076c8d3a,0x38861e2a,0x210f924b,0x701aad39,0x13a835d9,0x94d0eae4, + 0x7f4cdf41,0x2e8ce36c }, + { 0x037a862b,0x91273dab,0x60e4c8fa,0x01ba9bb7,0x33baf2dd,0xf9645388, + 0x34f668f3,0xf4ccc6cb } }, + /* 90 */ + { { 0xf1f79687,0x44ef525c,0x92efa815,0x7c595495,0xa5c78d29,0xe1231741, + 0x9a0df3c9,0xac0db488 }, + { 0xdf01747f,0x86bfc711,0xef17df13,0x592b9358,0x5ccb6bb5,0xe5880e4f, + 0x94c974a2,0x95a64a61 } }, + /* 91 */ + { { 0xc15a4c93,0x72c1efda,0x82585141,0x40269b73,0x16cb0bad,0x6a8dfb1c, + 0x29210677,0x231e54ba }, + { 0x8ae6d2dc,0xa70df917,0x39112918,0x4d6aa63f,0x5e5b7223,0xf627726b, + 0xd8a731e1,0xab0be032 } }, + /* 92 */ + { { 0x8d131f2d,0x097ad0e9,0x3b04f101,0x637f09e3,0xd5e9a748,0x1ac86196, + 0x2cf6a679,0xf1bcc880 }, + { 0xe8daacb4,0x25c69140,0x60f65009,0x3c4e4055,0x477937a6,0x591cc8fc, + 0x5aebb271,0x85169469 } }, + /* 93 */ + { { 0xf1dcf593,0xde35c143,0xb018be3b,0x78202b29,0x9bdd9d3d,0xe9cdadc2, + 0xdaad55d8,0x8f67d9d2 }, + { 0x7481ea5f,0x84111656,0xe34c590c,0xe7d2dde9,0x05053fa8,0xffdd43f4, + 0xc0728b5d,0xf84572b9 } }, + /* 94 */ + { { 0x97af71c9,0x5e1a7a71,0x7a736565,0xa1449444,0x0e1d5063,0xa1b4ae07, + 0x616b2c19,0xedee2710 }, + { 0x11734121,0xb2f034f5,0x4a25e9f0,0x1cac6e55,0xa40c2ecf,0x8dc148f3, + 0x44ebd7f4,0x9fd27e9b } }, + /* 95 */ + { { 0xf6e2cb16,0x3cc7658a,0xfe5919b6,0xe3eb7d2c,0x168d5583,0x5a8c5816, + 0x958ff387,0xa40c2fb6 }, + { 0xfedcc158,0x8c9ec560,0x55f23056,0x7ad804c6,0x9a307e12,0xd9396704, + 0x7dc6decf,0x99bc9bb8 } }, + /* 96 */ + { { 0x927dafc6,0x84a9521d,0x5c09cd19,0x52c1fb69,0xf9366dde,0x9d9581a0, + 0xa16d7e64,0x9abe210b }, + { 0x48915220,0x480af84a,0x4dd816c6,0xfa73176a,0x1681ca5a,0xc7d53987, + 0x87f344b0,0x7881c257 } }, + /* 97 */ + { { 0xe0bcf3ff,0x93399b51,0x127f74f6,0x0d02cbc5,0xdd01d968,0x8fb465a2, + 0xa30e8940,0x15e6e319 }, + { 0x3e0e05f4,0x646d6e0d,0x43588404,0xfad7bddc,0xc4f850d3,0xbe61c7d1, + 0x191172ce,0x0e55facf } }, + /* 98 */ + { { 0xf8787564,0x7e9d9806,0x31e85ce6,0x1a331721,0xb819e8d6,0x6b0158ca, + 0x6fe96577,0xd73d0976 }, + { 0x1eb7206e,0x42483425,0xc618bb42,0xa519290f,0x5e30a520,0x5dcbb859, + 0x8f15a50b,0x9250a374 } }, + /* 99 */ + { { 0xbe577410,0xcaff08f8,0x5077a8c6,0xfd408a03,0xec0a63a4,0xf1f63289, + 0xc1cc8c0b,0x77414082 }, + { 0xeb0991cd,0x05a40fa6,0x49fdc296,0xc1ca0866,0xb324fd40,0x3a68a3c7, + 0x12eb20b9,0x8cb04f4d } }, + /* 100 */ + { { 0x6906171c,0xb1c2d055,0xb0240c3f,0x9073e9cd,0xd8906841,0xdb8e6b4f, + 0x47123b51,0xe4e429ef }, + { 0x38ec36f4,0x0b8dd53c,0xff4b6a27,0xf9d2dc01,0x879a9a48,0x5d066e07, + 0x3c6e6552,0x37bca2ff } }, + /* 101 */ + { { 0xdf562470,0x4cd2e3c7,0xc0964ac9,0x44f272a2,0x80c793be,0x7c6d5df9, + 0x3002b22a,0x59913edc }, + { 0x5750592a,0x7a139a83,0xe783de02,0x99e01d80,0xea05d64f,0xcf8c0375, + 0xb013e226,0x43786e4a } }, + /* 102 */ + { { 0x9e56b5a6,0xff32b0ed,0xd9fc68f9,0x0750d9a6,0x597846a7,0xec15e845, + 0xb7e79e7a,0x8638ca98 }, + { 0x0afc24b2,0x2f5ae096,0x4dace8f2,0x05398eaf,0xaecba78f,0x3b765dd0, + 0x7b3aa6f0,0x1ecdd36a } }, + /* 103 */ + { { 0x6c5ff2f3,0x5d3acd62,0x2873a978,0xa2d516c0,0xd2110d54,0xad94c9fa, + 0xd459f32d,0xd85d0f85 }, + { 0x10b11da3,0x9f700b8d,0xa78318c4,0xd2c22c30,0x9208decd,0x556988f4, + 0xb4ed3c62,0xa04f19c3 } }, + /* 104 */ + { { 0xed7f93bd,0x087924c8,0x392f51f6,0xcb64ac5d,0x821b71af,0x7cae330a, + 0x5c0950b0,0x92b2eeea }, + { 0x85b6e235,0x85ac4c94,0x2936c0f0,0xab2ca4a9,0xe0508891,0x80faa6b3, + 0x5834276c,0x1ee78221 } }, + /* 105 */ + { { 0xe63e79f7,0xa60a2e00,0xf399d906,0xf590e7b2,0x6607c09d,0x9021054a, + 0x57a6e150,0xf3f2ced8 }, + { 0xf10d9b55,0x200510f3,0xd8642648,0x9d2fcfac,0xe8bd0e7c,0xe5631aa7, + 0x3da3e210,0x0f56a454 } }, + /* 106 */ + { { 0x1043e0df,0x5b21bffa,0x9c007e6d,0x6c74b6cc,0xd4a8517a,0x1a656ec0, + 0x1969e263,0xbd8f1741 }, + { 0xbeb7494a,0x8a9bbb86,0x45f3b838,0x1567d46f,0xa4e5a79a,0xdf7a12a7, + 0x30ccfa09,0x2d1a1c35 } }, + /* 107 */ + { { 0x506508da,0x192e3813,0xa1d795a7,0x336180c4,0x7a9944b3,0xcddb5949, + 0xb91fba46,0xa107a65e }, + { 0x0f94d639,0xe6d1d1c5,0x8a58b7d7,0x8b4af375,0xbd37ca1c,0x1a7c5584, + 0xf87a9af2,0x183d760a } }, + /* 108 */ + { { 0x0dde59a4,0x29d69711,0x0e8bef87,0xf1ad8d07,0x4f2ebe78,0x229b4963, + 0xc269d754,0x1d44179d }, + { 0x8390d30e,0xb32dc0cf,0x0de8110c,0x0a3b2753,0x2bc0339a,0x31af1dc5, + 0x9606d262,0x771f9cc2 } }, + /* 109 */ + { { 0x85040739,0x99993e77,0x8026a939,0x44539db9,0xf5f8fc26,0xcf40f6f2, + 0x0362718e,0x64427a31 }, + { 0x85428aa8,0x4f4f2d87,0xebfb49a8,0x7b7adc3f,0xf23d01ac,0x201b2c6d, + 0x6ae90d6d,0x49d9b749 } }, + /* 110 */ + { { 0x435d1099,0xcc78d8bc,0x8e8d1a08,0x2adbcd4e,0x2cb68a41,0x02c2e2a0, + 0x3f605445,0x9037d81b }, + { 0x074c7b61,0x7cdbac27,0x57bfd72e,0xfe2031ab,0x596d5352,0x61ccec96, + 0x7cc0639c,0x08c3de6a } }, + /* 111 */ + { { 0xf6d552ab,0x20fdd020,0x05cd81f1,0x56baff98,0x91351291,0x06fb7c3e, + 0x45796b2f,0xc6909442 }, + { 0x41231bd1,0x17b3ae9c,0x5cc58205,0x1eac6e87,0xf9d6a122,0x208837ab, + 0xcafe3ac0,0x3fa3db02 } }, + /* 112 */ + { { 0x05058880,0xd75a3e65,0x643943f2,0x7da365ef,0xfab24925,0x4147861c, + 0xfdb808ff,0xc5c4bdb0 }, + { 0xb272b56b,0x73513e34,0x11b9043a,0xc8327e95,0xf8844969,0xfd8ce37d, + 0x46c2b6b5,0x2d56db94 } }, + /* 113 */ + { { 0xff46ac6b,0x2461782f,0x07a2e425,0xd19f7926,0x09a48de1,0xfafea3c4, + 0xe503ba42,0x0f56bd9d }, + { 0x345cda49,0x137d4ed1,0x816f299d,0x821158fc,0xaeb43402,0xe7c6a54a, + 0x1173b5f1,0x4003bb9d } }, + /* 114 */ + { { 0xa0803387,0x3b8e8189,0x39cbd404,0xece115f5,0xd2877f21,0x4297208d, + 0xa07f2f9e,0x53765522 }, + { 0xa8a4182d,0xa4980a21,0x3219df79,0xa2bbd07a,0x1a19a2d4,0x674d0a2e, + 0x6c5d4549,0x7a056f58 } }, + /* 115 */ + { { 0x9d8a2a47,0x646b2558,0xc3df2773,0x5b582948,0xabf0d539,0x51ec000e, + 0x7a1a2675,0x77d482f1 }, + { 0x87853948,0xb8a1bd95,0x6cfbffee,0xa6f817bd,0x80681e47,0xab6ec057, + 0x2b38b0e4,0x4115012b } }, + /* 116 */ + { { 0x6de28ced,0x3c73f0f4,0x9b13ec47,0x1d5da760,0x6e5c6392,0x61b8ce9e, + 0xfbea0946,0xcdf04572 }, + { 0x6c53c3b0,0x1cb3c58b,0x447b843c,0x97fe3c10,0x2cb9780e,0xfb2b8ae1, + 0x97383109,0xee703dda } }, + /* 117 */ + { { 0xff57e43a,0x34515140,0xb1b811b8,0xd44660d3,0x8f42b986,0x2b3b5dff, + 0xa162ce21,0x2a0ad89d }, + { 0x6bc277ba,0x64e4a694,0xc141c276,0xc788c954,0xcabf6274,0x141aa64c, + 0xac2b4659,0xd62d0b67 } }, + /* 118 */ + { { 0x2c054ac4,0x39c5d87b,0xf27df788,0x57005859,0xb18128d6,0xedf7cbf3, + 0x991c2426,0xb39a23f2 }, + { 0xf0b16ae5,0x95284a15,0xa136f51b,0x0c6a05b1,0xf2700783,0x1d63c137, + 0xc0674cc5,0x04ed0092 } }, + /* 119 */ + { { 0x9ae90393,0x1f4185d1,0x4a3d64e6,0x3047b429,0x9854fc14,0xae0001a6, + 0x0177c387,0xa0a91fc1 }, + { 0xae2c831e,0xff0a3f01,0x2b727e16,0xbb76ae82,0x5a3075b4,0x8f12c8a1, + 0x9ed20c41,0x084cf988 } }, + /* 120 */ + { { 0xfca6becf,0xd98509de,0x7dffb328,0x2fceae80,0x4778e8b9,0x5d8a15c4, + 0x73abf77e,0xd57955b2 }, + { 0x31b5d4f1,0x210da79e,0x3cfa7a1c,0xaa52f04b,0xdc27c20b,0xd4d12089, + 0x02d141f1,0x8e14ea42 } }, + /* 121 */ + { { 0xf2897042,0xeed50345,0x43402c4a,0x8d05331f,0xc8bdfb21,0xc8d9c194, + 0x2aa4d158,0x597e1a37 }, + { 0xcf0bd68c,0x0327ec1a,0xab024945,0x6d4be0dc,0xc9fe3e84,0x5b9c8d7a, + 0x199b4dea,0xca3f0236 } }, + /* 122 */ + { { 0x6170bd20,0x592a10b5,0x6d3f5de7,0x0ea897f1,0x44b2ade2,0xa3363ff1, + 0x309c07e4,0xbde7fd7e }, + { 0xb8f5432c,0x516bb6d2,0xe043444b,0x210dc1cb,0xf8f95b5a,0x3db01e6f, + 0x0a7dd198,0xb623ad0e } }, + /* 123 */ + { { 0x60c7b65b,0xa75bd675,0x23a4a289,0xab8c5590,0xd7b26795,0xf8220fd0, + 0x58ec137b,0xd6aa2e46 }, + { 0x5138bb85,0x10abc00b,0xd833a95c,0x8c31d121,0x1702a32e,0xb24ff00b, + 0x2dcc513a,0x111662e0 } }, + /* 124 */ + { { 0xefb42b87,0x78114015,0x1b6c4dff,0xbd9f5d70,0xa7d7c129,0x66ecccd7, + 0x94b750f8,0xdb3ee1cb }, + { 0xf34837cf,0xb26f3db0,0xb9578d4f,0xe7eed18b,0x7c56657d,0x5d2cdf93, + 0x52206a59,0x886a6442 } }, + /* 125 */ + { { 0x65b569ea,0x3c234cfb,0xf72119c1,0x20011141,0xa15a619e,0x8badc85d, + 0x018a17bc,0xa70cf4eb }, + { 0x8c4a6a65,0x224f97ae,0x0134378f,0x36e5cf27,0x4f7e0960,0xbe3a609e, + 0xd1747b77,0xaa4772ab } }, + /* 126 */ + { { 0x7aa60cc0,0x67676131,0x0368115f,0xc7916361,0xbbc1bb5a,0xded98bb4, + 0x30faf974,0x611a6ddc }, + { 0xc15ee47a,0x30e78cbc,0x4e0d96a5,0x2e896282,0x3dd9ed88,0x36f35adf, + 0x16429c88,0x5cfffaf8 } }, + /* 127 */ + { { 0x9b7a99cd,0xc0d54cff,0x843c45a1,0x7bf3b99d,0x62c739e1,0x038a908f, + 0x7dc1994c,0x6e5a6b23 }, + { 0x0ba5db77,0xef8b454e,0xacf60d63,0xb7b8807f,0x76608378,0xe591c0c6, + 0x242dabcc,0x481a238d } }, + /* 128 */ + { { 0x35d0b34a,0xe3417bc0,0x8327c0a7,0x440b386b,0xac0362d1,0x8fb7262d, + 0xe0cdf943,0x2c41114c }, + { 0xad95a0b1,0x2ba5cef1,0x67d54362,0xc09b37a8,0x01e486c9,0x26d6cdd2, + 0x42ff9297,0x20477abf } }, + /* 129 */ + { { 0x18d65dbf,0x2f75173c,0x339edad8,0x77bf940e,0xdcf1001c,0x7022d26b, + 0xc77396b6,0xac66409a }, + { 0xc6261cc3,0x8b0bb36f,0x190e7e90,0x213f7bc9,0xa45e6c10,0x6541ceba, + 0xcc122f85,0xce8e6975 } }, + /* 130 */ + { { 0xbc0a67d2,0x0f121b41,0x444d248a,0x62d4760a,0x659b4737,0x0e044f1d, + 0x250bb4a8,0x08fde365 }, + { 0x848bf287,0xaceec3da,0xd3369d6e,0xc2a62182,0x92449482,0x3582dfdc, + 0x565d6cd7,0x2f7e2fd2 } }, + /* 131 */ + { { 0xc3770fa7,0xae4b92db,0x379043f9,0x095e8d5c,0x17761171,0x54f34e9d, + 0x907702ae,0xc65be92e }, + { 0xf6fd0a40,0x2758a303,0xbcce784b,0xe7d822e3,0x4f9767bf,0x7ae4f585, + 0xd1193b3a,0x4bff8e47 } }, + /* 132 */ + { { 0x00ff1480,0xcd41d21f,0x0754db16,0x2ab8fb7d,0xbbe0f3ea,0xac81d2ef, + 0x5772967d,0x3e4e4ae6 }, + { 0x3c5303e6,0x7e18f36d,0x92262397,0x3bd9994b,0x1324c3c0,0x9ed70e26, + 0x58ec6028,0x5388aefd } }, + /* 133 */ + { { 0x5e5d7713,0xad1317eb,0x75de49da,0x09b985ee,0xc74fb261,0x32f5bc4f, + 0x4f75be0e,0x5cf908d1 }, + { 0x8e657b12,0x76043510,0xb96ed9e6,0xbfd421a5,0x8970ccc2,0x0e29f51f, + 0x60f00ce2,0xa698ba40 } }, + /* 134 */ + { { 0xef748fec,0x73db1686,0x7e9d2cf9,0xe6e755a2,0xce265eff,0x630b6544, + 0x7aebad8d,0xb142ef8a }, + { 0x17d5770a,0xad31af9f,0x2cb3412f,0x66af3b67,0xdf3359de,0x6bd60d1b, + 0x58515075,0xd1896a96 } }, + /* 135 */ + { { 0x33c41c08,0xec5957ab,0x5468e2e1,0x87de94ac,0xac472f6c,0x18816b73, + 0x7981da39,0x267b0e0b }, + { 0x8e62b988,0x6e554e5d,0x116d21e7,0xd8ddc755,0x3d2a6f99,0x4610faf0, + 0xa1119393,0xb54e287a } }, + /* 136 */ + { { 0x178a876b,0x0a0122b5,0x085104b4,0x51ff96ff,0x14f29f76,0x050b31ab, + 0x5f87d4e6,0x84abb28b }, + { 0x8270790a,0xd5ed439f,0x85e3f46b,0x2d6cb59d,0x6c1e2212,0x75f55c1b, + 0x17655640,0xe5436f67 } }, + /* 137 */ + { { 0x2286e8d5,0x53f9025e,0x864453be,0x353c95b4,0xe408e3a0,0xd832f5bd, + 0x5b9ce99e,0x0404f68b }, + { 0xa781e8e5,0xcad33bde,0x163c2f5b,0x3cdf5018,0x0119caa3,0x57576960, + 0x0ac1c701,0x3a4263df } }, + /* 138 */ + { { 0x9aeb596d,0xc2965ecc,0x023c92b4,0x01ea03e7,0x2e013961,0x4704b4b6, + 0x905ea367,0x0ca8fd3f }, + { 0x551b2b61,0x92523a42,0x390fcd06,0x1eb7a89c,0x0392a63e,0xe7f1d2be, + 0x4ddb0c33,0x96dca264 } }, + /* 139 */ + { { 0x387510af,0x203bb43a,0xa9a36a01,0x846feaa8,0x2f950378,0xd23a5770, + 0x3aad59dc,0x4363e212 }, + { 0x40246a47,0xca43a1c7,0xe55dd24d,0xb362b8d2,0x5d8faf96,0xf9b08604, + 0xd8bb98c4,0x840e115c } }, + /* 140 */ + { { 0x1023e8a7,0xf12205e2,0xd8dc7a0b,0xc808a8cd,0x163a5ddf,0xe292a272, + 0x30ded6d4,0x5e0d6abd }, + { 0x7cfc0f64,0x07a721c2,0x0e55ed88,0x42eec01d,0x1d1f9db2,0x26a7bef9, + 0x2945a25a,0x7dea48f4 } }, + /* 141 */ + { { 0xe5060a81,0xabdf6f1c,0xf8f95615,0xe79f9c72,0x06ac268b,0xcfd36c54, + 0xebfd16d1,0xabc2a2be }, + { 0xd3e2eac7,0x8ac66f91,0xd2dd0466,0x6f10ba63,0x0282d31b,0x6790e377, + 0x6c7eefc1,0x4ea35394 } }, + /* 142 */ + { { 0x5266309d,0xed8a2f8d,0x81945a3e,0x0a51c6c0,0x578c5dc1,0xcecaf45a, + 0x1c94ffc3,0x3a76e689 }, + { 0x7d7b0d0f,0x9aace8a4,0x8f584a5f,0x963ace96,0x4e697fbe,0x51a30c72, + 0x465e6464,0x8212a10a } }, + /* 143 */ + { { 0xcfab8caa,0xef7c61c3,0x0e142390,0x18eb8e84,0x7e9733ca,0xcd1dff67, + 0x599cb164,0xaa7cab71 }, + { 0xbc837bd1,0x02fc9273,0xc36af5d7,0xc06407d0,0xf423da49,0x17621292, + 0xfe0617c3,0x40e38073 } }, + /* 144 */ + { { 0xa7bf9b7c,0xf4f80824,0x3fbe30d0,0x365d2320,0x97cf9ce3,0xbfbe5320, + 0xb3055526,0xe3604700 }, + { 0x6cc6c2c7,0x4dcb9911,0xba4cbee6,0x72683708,0x637ad9ec,0xdcded434, + 0xa3dee15f,0x6542d677 } }, + /* 145 */ + { { 0x7b6c377a,0x3f32b6d0,0x903448be,0x6cb03847,0x20da8af7,0xd6fdd3a8, + 0x09bb6f21,0xa6534aee }, + { 0x1035facf,0x30a1780d,0x9dcb47e6,0x35e55a33,0xc447f393,0x6ea50fe1, + 0xdc9aef22,0xf3cb672f } }, + /* 146 */ + { { 0x3b55fd83,0xeb3719fe,0x875ddd10,0xe0d7a46c,0x05cea784,0x33ac9fa9, + 0xaae870e7,0x7cafaa2e }, + { 0x1d53b338,0x9b814d04,0xef87e6c6,0xe0acc0a0,0x11672b0f,0xfb93d108, + 0xb9bd522e,0x0aab13c1 } }, + /* 147 */ + { { 0xd2681297,0xddcce278,0xb509546a,0xcb350eb1,0x7661aaf2,0x2dc43173, + 0x847012e9,0x4b91a602 }, + { 0x72f8ddcf,0xdcff1095,0x9a911af4,0x08ebf61e,0xc372430e,0x48f4360a, + 0x72321cab,0x49534c53 } }, + /* 148 */ + { { 0xf07b7e9d,0x83df7d71,0x13cd516f,0xa478efa3,0x6c047ee3,0x78ef264b, + 0xd65ac5ee,0xcaf46c4f }, + { 0x92aa8266,0xa04d0c77,0x913684bb,0xedf45466,0xae4b16b0,0x56e65168, + 0x04c6770f,0x14ce9e57 } }, + /* 149 */ + { { 0x965e8f91,0x99445e3e,0xcb0f2492,0xd3aca1ba,0x90c8a0a0,0xd31cc70f, + 0x3e4c9a71,0x1bb708a5 }, + { 0x558bdd7a,0xd5ca9e69,0x018a26b1,0x734a0508,0x4c9cf1ec,0xb093aa71, + 0xda300102,0xf9d126f2 } }, + /* 150 */ + { { 0xaff9563e,0x749bca7a,0xb49914a0,0xdd077afe,0xbf5f1671,0xe27a0311, + 0x729ecc69,0x807afcb9 }, + { 0xc9b08b77,0x7f8a9337,0x443c7e38,0x86c3a785,0x476fd8ba,0x85fafa59, + 0x6568cd8c,0x751adcd1 } }, + /* 151 */ + { { 0x10715c0d,0x8aea38b4,0x8f7697f7,0xd113ea71,0x93fbf06d,0x665eab14, + 0x2537743f,0x29ec4468 }, + { 0xb50bebbc,0x3d94719c,0xe4505422,0x399ee5bf,0x8d2dedb1,0x90cd5b3a, + 0x92a4077d,0xff9370e3 } }, + /* 152 */ + { { 0xc6b75b65,0x59a2d69b,0x266651c5,0x4188f8d5,0x3de9d7d2,0x28a9f33e, + 0xa2a9d01a,0x9776478b }, + { 0x929af2c7,0x8852622d,0x4e690923,0x334f5d6d,0xa89a51e9,0xce6cc7e5, + 0xac2f82fa,0x74a6313f } }, + /* 153 */ + { { 0xb75f079c,0xb2f4dfdd,0x18e36fbb,0x85b07c95,0xe7cd36dd,0x1b6cfcf0, + 0x0ff4863d,0xab75be15 }, + { 0x173fc9b7,0x81b367c0,0xd2594fd0,0xb90a7420,0xc4091236,0x15fdbf03, + 0x0b4459f6,0x4ebeac2e } }, + /* 154 */ + { { 0x5c9f2c53,0xeb6c5fe7,0x8eae9411,0xd2522011,0xf95ac5d8,0xc8887633, + 0x2c1baffc,0xdf99887b }, + { 0x850aaecb,0xbb78eed2,0x01d6a272,0x9d49181b,0xb1cdbcac,0x978dd511, + 0x779f4058,0x27b040a7 } }, + /* 155 */ + { { 0xf73b2eb2,0x90405db7,0x8e1b2118,0xe0df8508,0x5962327e,0x501b7152, + 0xe4cfa3f5,0xb393dd37 }, + { 0x3fd75165,0xa1230e7b,0xbcd33554,0xd66344c2,0x0f7b5022,0x6c36f1be, + 0xd0463419,0x09588c12 } }, + /* 156 */ + { { 0x02601c3b,0xe086093f,0xcf5c335f,0xfb0252f8,0x894aff28,0x955cf280, + 0xdb9f648b,0x81c879a9 }, + { 0xc6f56c51,0x040e687c,0x3f17618c,0xfed47169,0x9059353b,0x44f88a41, + 0x5fc11bc4,0xfa0d48f5 } }, + /* 157 */ + { { 0xe1608e4d,0xbc6e1c9d,0x3582822c,0x010dda11,0x157ec2d7,0xf6b7ddc1, + 0xb6a367d6,0x8ea0e156 }, + { 0x2383b3b4,0xa354e02f,0x3f01f53c,0x69966b94,0x2de03ca5,0x4ff6632b, + 0xfa00b5ac,0x3f5ab924 } }, + /* 158 */ + { { 0x59739efb,0x337bb0d9,0xe7ebec0d,0xc751b0f4,0x411a67d1,0x2da52dd6, + 0x2b74256e,0x8bc76887 }, + { 0x82d3d253,0xa5be3b72,0xf58d779f,0xa9f679a1,0xe16767bb,0xa1cac168, + 0x60fcf34f,0xb386f190 } }, + /* 159 */ + { { 0x2fedcfc2,0x31f3c135,0x62f8af0d,0x5396bf62,0xe57288c2,0x9a02b4ea, + 0x1b069c4d,0x4cb460f7 }, + { 0x5b8095ea,0xae67b4d3,0x6fc07603,0x92bbf859,0xb614a165,0xe1475f66, + 0x95ef5223,0x52c0d508 } }, + /* 160 */ + { { 0x15339848,0x231c210e,0x70778c8d,0xe87a28e8,0x6956e170,0x9d1de661, + 0x2bb09c0b,0x4ac3c938 }, + { 0x6998987d,0x19be0551,0xae09f4d6,0x8b2376c4,0x1a3f933d,0x1de0b765, + 0xe39705f4,0x380d94c7 } }, + /* 161 */ + { { 0x81542e75,0x01a355aa,0xee01b9b7,0x96c724a1,0x624d7087,0x6b3a2977, + 0xde2637af,0x2ce3e171 }, + { 0xf5d5bc1a,0xcfefeb49,0x2777e2b5,0xa655607e,0x9513756c,0x4feaac2f, + 0x0b624e4d,0x2e6cd852 } }, + /* 162 */ + { { 0x8c31c31d,0x3685954b,0x5bf21a0c,0x68533d00,0x75c79ec9,0x0bd7626e, + 0x42c69d54,0xca177547 }, + { 0xf6d2dbb2,0xcc6edaff,0x174a9d18,0xfd0d8cbd,0xaa4578e8,0x875e8793, + 0x9cab2ce6,0xa976a713 } }, + /* 163 */ + { { 0x93fb353d,0x0a651f1b,0x57fcfa72,0xd75cab8b,0x31b15281,0xaa88cfa7, + 0x0a1f4999,0x8720a717 }, + { 0x693e1b90,0x8c3e8d37,0x16f6dfc3,0xd345dc0b,0xb52a8742,0x8ea8d00a, + 0xc769893c,0x9719ef29 } }, + /* 164 */ + { { 0x58e35909,0x820eed8d,0x33ddc116,0x9366d8dc,0x6e205026,0xd7f999d0, + 0xe15704c1,0xa5072976 }, + { 0xc4e70b2e,0x002a37ea,0x6890aa8a,0x84dcf657,0x645b2a5c,0xcd71bf18, + 0xf7b77725,0x99389c9d } }, + /* 165 */ + { { 0x7ada7a4b,0x238c08f2,0xfd389366,0x3abe9d03,0x766f512c,0x6b672e89, + 0x202c82e4,0xa88806aa }, + { 0xd380184e,0x6602044a,0x126a8b85,0xa8cb78c4,0xad844f17,0x79d670c0, + 0x4738dcfe,0x0043bffb } }, + /* 166 */ + { { 0x36d5192e,0x8d59b5dc,0x4590b2af,0xacf885d3,0x11601781,0x83566d0a, + 0xba6c4866,0x52f3ef01 }, + { 0x0edcb64d,0x3986732a,0x8068379f,0x0a482c23,0x7040f309,0x16cbe5fa, + 0x9ef27e75,0x3296bd89 } }, + /* 167 */ + { { 0x454d81d7,0x476aba89,0x51eb9b3c,0x9eade7ef,0x81c57986,0x619a21cd, + 0xaee571e9,0x3b90febf }, + { 0x5496f7cb,0x9393023e,0x7fb51bc4,0x55be41d8,0x99beb5ce,0x03f1dd48, + 0x9f810b18,0x6e88069d } }, + /* 168 */ + { { 0xb43ea1db,0xce37ab11,0x5259d292,0x0a7ff1a9,0x8f84f186,0x851b0221, + 0xdefaad13,0xa7222bea }, + { 0x2b0a9144,0xa2ac78ec,0xf2fa59c5,0x5a024051,0x6147ce38,0x91d1eca5, + 0xbc2ac690,0xbe94d523 } }, + /* 169 */ + { { 0x0b226ce7,0x72f4945e,0x967e8b70,0xb8afd747,0x85a6c63e,0xedea46f1, + 0x9be8c766,0x7782defe }, + { 0x3db38626,0x760d2aa4,0x76f67ad1,0x460ae787,0x54499cdb,0x341b86fc, + 0xa2892e4b,0x03838567 } }, + /* 170 */ + { { 0x79ec1a0f,0x2d8daefd,0xceb39c97,0x3bbcd6fd,0x58f61a95,0xf5575ffc, + 0xadf7b420,0xdbd986c4 }, + { 0x15f39eb7,0x81aa8814,0xb98d976c,0x6ee2fcf5,0xcf2f717d,0x5465475d, + 0x6860bbd0,0x8e24d3c4 } }, + /* 171 */ + { { 0x9a587390,0x749d8e54,0x0cbec588,0x12bb194f,0xb25983c6,0x46e07da4, + 0x407bafc8,0x541a99c4 }, + { 0x624c8842,0xdb241692,0xd86c05ff,0x6044c12a,0x4f7fcf62,0xc59d14b4, + 0xf57d35d1,0xc0092c49 } }, + /* 172 */ + { { 0xdf2e61ef,0xd3cc75c3,0x2e1b35ca,0x7e8841c8,0x909f29f4,0xc62d30d1, + 0x7286944d,0x75e40634 }, + { 0xbbc237d0,0xe7d41fc5,0xec4f01c9,0xc9537bf0,0x282bd534,0x91c51a16, + 0xc7848586,0x5b7cb658 } }, + /* 173 */ + { { 0x8a28ead1,0x964a7084,0xfd3b47f6,0x802dc508,0x767e5b39,0x9ae4bfd1, + 0x8df097a1,0x7ae13eba }, + { 0xeadd384e,0xfd216ef8,0xb6b2ff06,0x0361a2d9,0x4bcdb5f3,0x204b9878, + 0xe2a8e3fd,0x787d8074 } }, + /* 174 */ + { { 0x757fbb1c,0xc5e25d6b,0xca201deb,0xe47bddb2,0x6d2233ff,0x4a55e9a3, + 0x9ef28484,0x5c222819 }, + { 0x88315250,0x773d4a85,0x827097c1,0x21b21a2b,0xdef5d33f,0xab7c4ea1, + 0xbaf0f2b0,0xe45d37ab } }, + /* 175 */ + { { 0x28511c8a,0xd2df1e34,0xbdca6cd3,0xebb229c8,0x627c39a7,0x578a71a7, + 0x84dfb9d3,0xed7bc122 }, + { 0x93dea561,0xcf22a6df,0xd48f0ed1,0x5443f18d,0x5bad23e8,0xd8b86140, + 0x45ca6d27,0xaac97cc9 } }, + /* 176 */ + { { 0xa16bd00a,0xeb54ea74,0xf5c0bcc1,0xd839e9ad,0x1f9bfc06,0x092bb7f1, + 0x1163dc4e,0x318f97b3 }, + { 0xc30d7138,0xecc0c5be,0xabc30220,0x44e8df23,0xb0223606,0x2bb7972f, + 0x9a84ff4d,0xfa41faa1 } }, + /* 177 */ + { { 0xa6642269,0x4402d974,0x9bb783bd,0xc81814ce,0x7941e60b,0x398d38e4, + 0x1d26e9e2,0x38bb6b2c }, + { 0x6a577f87,0xc64e4a25,0xdc11fe1c,0x8b52d253,0x62280728,0xff336abf, + 0xce7601a5,0x94dd0905 } }, + /* 178 */ + { { 0xde93f92a,0x156cf7dc,0x89b5f315,0xa01333cb,0xc995e750,0x02404df9, + 0xd25c2ae9,0x92077867 }, + { 0x0bf39d44,0xe2471e01,0x96bb53d7,0x5f2c9020,0x5c9c3d8f,0x4c44b7b3, + 0xd29beb51,0x81e8428b } }, + /* 179 */ + { { 0xc477199f,0x6dd9c2ba,0x6b5ecdd9,0x8cb8eeee,0xee40fd0e,0x8af7db3f, + 0xdbbfa4b1,0x1b94ab62 }, + { 0xce47f143,0x44f0d8b3,0x63f46163,0x51e623fc,0xcc599383,0xf18f270f, + 0x055590ee,0x06a38e28 } }, + /* 180 */ + { { 0xb3355b49,0x2e5b0139,0xb4ebf99b,0x20e26560,0xd269f3dc,0xc08ffa6b, + 0x83d9d4f8,0xa7b36c20 }, + { 0x1b3e8830,0x64d15c3a,0xa89f9c0b,0xd5fceae1,0xe2d16930,0xcfeee4a2, + 0xa2822a20,0xbe54c6b4 } }, + /* 181 */ + { { 0x8d91167c,0xd6cdb3df,0xe7a6625e,0x517c3f79,0x346ac7f4,0x7105648f, + 0xeae022bb,0xbf30a5ab }, + { 0x93828a68,0x8e7785be,0x7f3ef036,0x5161c332,0x592146b2,0xe11b5feb, + 0x2732d13a,0xd1c820de } }, + /* 182 */ + { { 0x9038b363,0x043e1347,0x6b05e519,0x58c11f54,0x6026cad1,0x4fe57abe, + 0x68a18da3,0xb7d17bed }, + { 0xe29c2559,0x44ca5891,0x5bfffd84,0x4f7a0376,0x74e46948,0x498de4af, + 0x6412cc64,0x3997fd5e } }, + /* 183 */ + { { 0x8bd61507,0xf2074682,0x34a64d2a,0x29e132d5,0x8a8a15e3,0xffeddfb0, + 0x3c6c13e8,0x0eeb8929 }, + { 0xa7e259f8,0xe9b69a3e,0xd13e7e67,0xce1db7e6,0xad1fa685,0x277318f6, + 0xc922b6ef,0x228916f8 } }, + /* 184 */ + { { 0x0a12ab5b,0x959ae25b,0x957bc136,0xcc11171f,0xd16e2b0c,0x8058429e, + 0x6e93097e,0xec05ad1d }, + { 0xac3f3708,0x157ba5be,0x30b59d77,0x31baf935,0x118234e5,0x47b55237, + 0x7ff11b37,0x7d314156 } }, + /* 185 */ + { { 0xf6dfefab,0x7bd9c05c,0xdcb37707,0xbe2f2268,0x3a38bb95,0xe53ead97, + 0x9bc1d7a3,0xe9ce66fc }, + { 0x6f6a02a1,0x75aa1576,0x60e600ed,0x38c087df,0x68cdc1b9,0xf8947f34, + 0x72280651,0xd9650b01 } }, + /* 186 */ + { { 0x5a057e60,0x504b4c4a,0x8def25e4,0xcbccc3be,0x17c1ccbd,0xa6353208, + 0x804eb7a2,0x14d6699a }, + { 0xdb1f411a,0x2c8a8415,0xf80d769c,0x09fbaf0b,0x1c2f77ad,0xb4deef90, + 0x0d43598a,0x6f4c6841 } }, + /* 187 */ + { { 0x96c24a96,0x8726df4e,0xfcbd99a3,0x534dbc85,0x8b2ae30a,0x3c466ef2, + 0x61189abb,0x4c4350fd }, + { 0xf855b8da,0x2967f716,0x463c38a1,0x41a42394,0xeae93343,0xc37e1413, + 0x5a3118b5,0xa726d242 } }, + /* 188 */ + { { 0x948c1086,0xdae6b3ee,0xcbd3a2e1,0xf1de503d,0x03d022f3,0x3f35ed3f, + 0xcc6cf392,0x13639e82 }, + { 0xcdafaa86,0x9ac938fb,0x2654a258,0xf45bc5fb,0x45051329,0x1963b26e, + 0xc1a335a3,0xca9365e1 } }, + /* 189 */ + { { 0x4c3b2d20,0x3615ac75,0x904e241b,0x742a5417,0xcc9d071d,0xb08521c4, + 0x970b72a5,0x9ce29c34 }, + { 0x6d3e0ad6,0x8cc81f73,0xf2f8434c,0x8060da9e,0x6ce862d9,0x35ed1d1a, + 0xab42af98,0x48c4abd7 } }, + /* 190 */ + { { 0x40c7485a,0xd221b0cc,0xe5274dbf,0xead455bb,0x9263d2e8,0x493c7698, + 0xf67b33cb,0x78017c32 }, + { 0x930cb5ee,0xb9d35769,0x0c408ed2,0xc0d14e94,0x272f1a4d,0xf8b7bf55, + 0xde5c1c04,0x53cd0454 } }, + /* 191 */ + { { 0x5d28ccac,0xbcd585fa,0x005b746e,0x5f823e56,0xcd0123aa,0x7c79f0a1, + 0xd3d7fa8f,0xeea465c1 }, + { 0x0551803b,0x7810659f,0x7ce6af70,0x6c0b599f,0x29288e70,0x4195a770, + 0x7ae69193,0x1b6e42a4 } }, + /* 192 */ + { { 0xf67d04c3,0x2e80937c,0x89eeb811,0x1e312be2,0x92594d60,0x56b5d887, + 0x187fbd3d,0x0224da14 }, + { 0x0c5fe36f,0x87abb863,0x4ef51f5f,0x580f3c60,0xb3b429ec,0x964fb1bf, + 0x42bfff33,0x60838ef0 } }, + /* 193 */ + { { 0x7e0bbe99,0x432cb2f2,0x04aa39ee,0x7bda44f3,0x9fa93903,0x5f497c7a, + 0x2d331643,0x636eb202 }, + { 0x93ae00aa,0xfcfd0e61,0x31ae6d2f,0x875a00fe,0x9f93901c,0xf43658a2, + 0x39218bac,0x8844eeb6 } }, + /* 194 */ + { { 0x6b3bae58,0x114171d2,0x17e39f3e,0x7db3df71,0x81a8eada,0xcd37bc7f, + 0x51fb789e,0x27ba83dc }, + { 0xfbf54de5,0xa7df439f,0xb5fe1a71,0x7277030b,0xdb297a48,0x42ee8e35, + 0x87f3a4ab,0xadb62d34 } }, + /* 195 */ + { { 0xa175df2a,0x9b1168a2,0x618c32e9,0x082aa04f,0x146b0916,0xc9e4f2e7, + 0x75e7c8b2,0xb990fd76 }, + { 0x4df37313,0x0829d96b,0xd0b40789,0x1c205579,0x78087711,0x66c9ae4a, + 0x4d10d18d,0x81707ef9 } }, + /* 196 */ + { { 0x03d6ff96,0x97d7cab2,0x0d843360,0x5b851bfc,0xd042db4b,0x268823c4, + 0xd5a8aa5c,0x3792daea }, + { 0x941afa0b,0x52818865,0x42d83671,0xf3e9e741,0x5be4e0a7,0x17c82527, + 0x94b001ba,0x5abd635e } }, + /* 197 */ + { { 0x0ac4927c,0x727fa84e,0xa7c8cf23,0xe3886035,0x4adca0df,0xa4bcd5ea, + 0x846ab610,0x5995bf21 }, + { 0x829dfa33,0xe90f860b,0x958fc18b,0xcaafe2ae,0x78630366,0x9b3baf44, + 0xd483411e,0x44c32ca2 } }, + /* 198 */ + { { 0xe40ed80c,0xa74a97f1,0x31d2ca82,0x5f938cb1,0x7c2d6ad9,0x53f2124b, + 0x8082a54c,0x1f2162fb }, + { 0x720b173e,0x7e467cc5,0x085f12f9,0x40e8a666,0x4c9d65dc,0x8cebc20e, + 0xc3e907c9,0x8f1d402b } }, + /* 199 */ + { { 0xfbc4058a,0x4f592f9c,0x292f5670,0xb15e14b6,0xbc1d8c57,0xc55cfe37, + 0x926edbf9,0xb1980f43 }, + { 0x32c76b09,0x98c33e09,0x33b07f78,0x1df5279d,0x863bb461,0x6f08ead4, + 0x37448e45,0x2828ad9b } }, + /* 200 */ + { { 0xc4cf4ac5,0x696722c4,0xdde64afb,0xf5ac1a3f,0xe0890832,0x0551baa2, + 0x5a14b390,0x4973f127 }, + { 0x322eac5d,0xe59d8335,0x0bd9b568,0x5e07eef5,0xa2588393,0xab36720f, + 0xdb168ac7,0x6dac8ed0 } }, + /* 201 */ + { { 0xeda835ef,0xf7b545ae,0x1d10ed51,0x4aa113d2,0x13741b09,0x035a65e0, + 0x20b9de4c,0x4b23ef59 }, + { 0x3c4c7341,0xe82bb680,0x3f58bc37,0xd457706d,0xa51e3ee8,0x73527863, + 0xddf49a4e,0x4dd71534 } }, + /* 202 */ + { { 0x95476cd9,0xbf944672,0xe31a725b,0x648d072f,0xfc4b67e0,0x1441c8b8, + 0x2f4a4dbb,0xfd317000 }, + { 0x8995d0e1,0x1cb43ff4,0x0ef729aa,0x76e695d1,0x41798982,0xe0d5f976, + 0x9569f365,0x14fac58c } }, + /* 203 */ + { { 0xf312ae18,0xad9a0065,0xfcc93fc9,0x51958dc0,0x8a7d2846,0xd9a14240, + 0x36abda50,0xed7c7651 }, + { 0x25d4abbc,0x46270f1a,0xf1a113ea,0x9b5dd8f3,0x5b51952f,0xc609b075, + 0x4d2e9f53,0xfefcb7f7 } }, + /* 204 */ + { { 0xba119185,0xbd09497a,0xaac45ba4,0xd54e8c30,0xaa521179,0x492479de, + 0x87e0d80b,0x1801a57e }, + { 0xfcafffb0,0x073d3f8d,0xae255240,0x6cf33c0b,0x5b5fdfbc,0x781d763b, + 0x1ead1064,0x9f8fc11e } }, + /* 205 */ + { { 0x5e69544c,0x1583a171,0xf04b7813,0x0eaf8567,0x278a4c32,0x1e22a8fd, + 0x3d3a69a9,0xa9d3809d }, + { 0x59a2da3b,0x936c2c2c,0x1895c847,0x38ccbcf6,0x63d50869,0x5e65244e, + 0xe1178ef7,0x3006b9ae } }, + /* 206 */ + { { 0xc9eead28,0x0bb1f2b0,0x89f4dfbc,0x7eef635d,0xb2ce8939,0x074757fd, + 0x45f8f761,0x0ab85fd7 }, + { 0x3e5b4549,0xecda7c93,0x97922f21,0x4be2bb5c,0xb43b8040,0x261a1274, + 0x11e942c2,0xb122d675 } }, + /* 207 */ + { { 0x66a5ae7a,0x3be607be,0x76adcbe3,0x01e703fa,0x4eb6e5c5,0xaf904301, + 0x097dbaec,0x9f599dc1 }, + { 0x0ff250ed,0x6d75b718,0x349a20dc,0x8eb91574,0x10b227a3,0x425605a4, + 0x8a294b78,0x7d5528e0 } }, + /* 208 */ + { { 0x20c26def,0xf0f58f66,0x582b2d1e,0x025585ea,0x01ce3881,0xfbe7d79b, + 0x303f1730,0x28ccea01 }, + { 0x79644ba5,0xd1dabcd1,0x06fff0b8,0x1fc643e8,0x66b3e17b,0xa60a76fc, + 0xa1d013bf,0xc18baf48 } }, + /* 209 */ + { { 0x5dc4216d,0x34e638c8,0x206142ac,0x00c01067,0x95f5064a,0xd453a171, + 0xb7a9596b,0x9def809d }, + { 0x67ab8d2c,0x41e8642e,0x6237a2b6,0xb4240433,0x64c4218b,0x7d506a6d, + 0x68808ce5,0x0357f8b0 } }, + /* 210 */ + { { 0x4cd2cc88,0x8e9dbe64,0xf0b8f39d,0xcc61c28d,0xcd30a0c8,0x4a309874, + 0x1b489887,0xe4a01add }, + { 0xf57cd8f9,0x2ed1eeac,0xbd594c48,0x1b767d3e,0x7bd2f787,0xa7295c71, + 0xce10cc30,0x466d7d79 } }, + /* 211 */ + { { 0x9dada2c7,0x47d31892,0x8f9aa27d,0x4fa0a6c3,0x820a59e1,0x90e4fd28, + 0x451ead1a,0xc672a522 }, + { 0x5d86b655,0x30607cc8,0xf9ad4af1,0xf0235d3b,0x571172a6,0x99a08680, + 0xf2a67513,0x5e3d64fa } }, + /* 212 */ + { { 0x9b3b4416,0xaa6410c7,0xeab26d99,0xcd8fcf85,0xdb656a74,0x5ebff74a, + 0xeb8e42fc,0x6c8a7a95 }, + { 0xb02a63bd,0x10c60ba7,0x8b8f0047,0x6b2f2303,0x312d90b0,0x8c6c3738, + 0xad82ca91,0x348ae422 } }, + /* 213 */ + { { 0x5ccda2fb,0x7f474663,0x8e0726d2,0x22accaa1,0x492b1f20,0x85adf782, + 0xd9ef2d2e,0xc1074de0 }, + { 0xae9a65b3,0xfcf3ce44,0x05d7151b,0xfd71e4ac,0xce6a9788,0xd4711f50, + 0xc9e54ffc,0xfbadfbdb } }, + /* 214 */ + { { 0x20a99363,0x1713f1cd,0x6cf22775,0xb915658f,0x24d359b2,0x968175cd, + 0x83716fcd,0xb7f976b4 }, + { 0x5d6dbf74,0x5758e24d,0x71c3af36,0x8d23bafd,0x0243dfe3,0x48f47760, + 0xcafcc805,0xf4d41b2e } }, + /* 215 */ + { { 0xfdabd48d,0x51f1cf28,0x32c078a4,0xce81be36,0x117146e9,0x6ace2974, + 0xe0160f10,0x180824ea }, + { 0x66e58358,0x0387698b,0xce6ca358,0x63568752,0x5e41e6c5,0x82380e34, + 0x83cf6d25,0x67e5f639 } }, + /* 216 */ + { { 0xcf4899ef,0xf89ccb8d,0x9ebb44c0,0x949015f0,0xb2598ec9,0x546f9276, + 0x04c11fc6,0x9fef789a }, + { 0x53d2a071,0x6d367ecf,0xa4519b09,0xb10e1a7f,0x611e2eef,0xca6b3fb0, + 0xa99c4e20,0xbc80c181 } }, + /* 217 */ + { { 0xe5eb82e6,0x972536f8,0xf56cb920,0x1a484fc7,0x50b5da5e,0xc78e2171, + 0x9f8cdf10,0x49270e62 }, + { 0xea6b50ad,0x1a39b7bb,0xa2388ffc,0x9a0284c1,0x8107197b,0x5403eb17, + 0x61372f7f,0xd2ee52f9 } }, + /* 218 */ + { { 0x88e0362a,0xd37cd285,0x8fa5d94d,0x442fa8a7,0xa434a526,0xaff836e5, + 0xe5abb733,0xdfb478be }, + { 0x673eede6,0xa91f1ce7,0x2b5b2f04,0xa5390ad4,0x5530da2f,0x5e66f7bf, + 0x08df473a,0xd9a140b4 } }, + /* 219 */ + { { 0x6e8ea498,0x0e0221b5,0x3563ee09,0x62347829,0x335d2ade,0xe06b8391, + 0x623f4b1a,0x760c058d }, + { 0xc198aa79,0x0b89b58c,0xf07aba7f,0xf74890d2,0xfde2556a,0x4e204110, + 0x8f190409,0x7141982d } }, + /* 220 */ + { { 0x4d4b0f45,0x6f0a0e33,0x392a94e1,0xd9280b38,0xb3c61d5e,0x3af324c6, + 0x89d54e47,0x3af9d1ce }, + { 0x20930371,0xfd8f7981,0x21c17097,0xeda2664c,0xdc42309b,0x0e9545dc, + 0x73957dd6,0xb1f815c3 } }, + /* 221 */ + { { 0x89fec44a,0x84faa78e,0x3caa4caf,0xc8c2ae47,0xc1b6a624,0x691c807d, + 0x1543f052,0xa41aed14 }, + { 0x7d5ffe04,0x42435399,0x625b6e20,0x8bacb2df,0x87817775,0x85d660be, + 0x86fb60ef,0xd6e9c1dd } }, + /* 222 */ + { { 0xc6853264,0x3aa2e97e,0xe2304a0b,0x771533b7,0xb8eae9be,0x1b912bb7, + 0xae9bf8c2,0x9c9c6e10 }, + { 0xe030b74c,0xa2309a59,0x6a631e90,0x4ed7494d,0xa49b79f2,0x89f44b23, + 0x40fa61b6,0x566bd596 } }, + /* 223 */ + { { 0xc18061f3,0x066c0118,0x7c83fc70,0x190b25d3,0x27273245,0xf05fc8e0, + 0xf525345e,0xcf2c7390 }, + { 0x10eb30cf,0xa09bceb4,0x0d77703a,0xcfd2ebba,0x150ff255,0xe842c43a, + 0x8aa20979,0x02f51755 } }, + /* 224 */ + { { 0xaddb7d07,0x396ef794,0x24455500,0x0b4fc742,0xc78aa3ce,0xfaff8eac, + 0xe8d4d97d,0x14e9ada5 }, + { 0x2f7079e2,0xdaa480a1,0xe4b0800e,0x45baa3cd,0x7838157d,0x01765e2d, + 0x8e9d9ae8,0xa0ad4fab } }, + /* 225 */ + { { 0x4a653618,0x0bfb7621,0x31eaaa5f,0x1872813c,0x44949d5e,0x1553e737, + 0x6e56ed1e,0xbcd530b8 }, + { 0x32e9c47b,0x169be853,0xb50059ab,0xdc2776fe,0x192bfbb4,0xcdba9761, + 0x6979341d,0x909283cf } }, + /* 226 */ + { { 0x76e81a13,0x67b00324,0x62171239,0x9bee1a99,0xd32e19d6,0x08ed361b, + 0xace1549a,0x35eeb7c9 }, + { 0x7e4e5bdc,0x1280ae5a,0xb6ceec6e,0x2dcd2cd3,0x6e266bc1,0x52e4224c, + 0x448ae864,0x9a8b2cf4 } }, + /* 227 */ + { { 0x09d03b59,0xf6471bf2,0xb65af2ab,0xc90e62a3,0xebd5eec9,0xff7ff168, + 0xd4491379,0x6bdb60f4 }, + { 0x8a55bc30,0xdadafebc,0x10097fe0,0xc79ead16,0x4c1e3bdd,0x42e19741, + 0x94ba08a9,0x01ec3cfd } }, + /* 228 */ + { { 0xdc9485c2,0xba6277eb,0x22fb10c7,0x48cc9a79,0x70a28d8a,0x4f61d60f, + 0x475464f6,0xd1acb1c0 }, + { 0x26f36612,0xd26902b1,0xe0618d8b,0x59c3a44e,0x308357ee,0x4df8a813, + 0x405626c2,0x7dcd079d } }, + /* 229 */ + { { 0xf05a4b48,0x5ce7d4d3,0x37230772,0xadcd2952,0x812a915a,0xd18f7971, + 0x377d19b8,0x0bf53589 }, + { 0x6c68ea73,0x35ecd95a,0x823a584d,0xc7f3bbca,0xf473a723,0x9fb674c6, + 0xe16686fc,0xd28be4d9 } }, + /* 230 */ + { { 0x38fa8e4b,0x5d2b9906,0x893fd8fc,0x559f186e,0x436fb6fc,0x3a6de2aa, + 0x510f88ce,0xd76007aa }, + { 0x523a4988,0x2d10aab6,0x74dd0273,0xb455cf44,0xa3407278,0x7f467082, + 0xb303bb01,0xf2b52f68 } }, + /* 231 */ + { { 0x9835b4ca,0x0d57eafa,0xbb669cbc,0x2d2232fc,0xc6643198,0x8eeeb680, + 0xcc5aed3a,0xd8dbe98e }, + { 0xc5a02709,0xcba9be3f,0xf5ba1fa8,0x30be68e5,0xf10ea852,0xfebd43cd, + 0xee559705,0xe01593a3 } }, + /* 232 */ + { { 0xea75a0a6,0xd3e5af50,0x57858033,0x512226ac,0xd0176406,0x6fe6d50f, + 0xaeb8ef06,0xafec07b1 }, + { 0x80bb0a31,0x7fb99567,0x37309aae,0x6f1af3cc,0x01abf389,0x9153a15a, + 0x6e2dbfdd,0xa71b9354 } }, + /* 233 */ + { { 0x18f593d2,0xbf8e12e0,0xa078122b,0xd1a90428,0x0ba4f2ad,0x150505db, + 0x628523d9,0x53a2005c }, + { 0xe7f2b935,0x07c8b639,0xc182961a,0x2bff975a,0x7518ca2c,0x86bceea7, + 0x3d588e3d,0xbf47d19b } }, + /* 234 */ + { { 0xdd7665d5,0x672967a7,0x2f2f4de5,0x4e303057,0x80d4903f,0x144005ae, + 0x39c9a1b6,0x001c2c7f }, + { 0x69efc6d6,0x143a8014,0x7bc7a724,0xc810bdaa,0xa78150a4,0x5f65670b, + 0x86ffb99b,0xfdadf8e7 } }, + /* 235 */ + { { 0xffc00785,0xfd38cb88,0x3b48eb67,0x77fa7591,0xbf368fbc,0x0454d055, + 0x5aa43c94,0x3a838e4d }, + { 0x3e97bb9a,0x56166329,0x441d94d9,0x9eb93363,0x0adb2a83,0x515591a6, + 0x873e1da3,0x3cdb8257 } }, + /* 236 */ + { { 0x7de77eab,0x137140a9,0x41648109,0xf7e1c50d,0xceb1d0df,0x762dcad2, + 0xf1f57fba,0x5a60cc89 }, + { 0x40d45673,0x80b36382,0x5913c655,0x1b82be19,0xdd64b741,0x057284b8, + 0xdbfd8fc0,0x922ff56f } }, + /* 237 */ + { { 0xc9a129a1,0x1b265dee,0xcc284e04,0xa5b1ce57,0xcebfbe3c,0x04380c46, + 0xf6c5cd62,0x72919a7d }, + { 0x8fb90f9a,0x298f453a,0x88e4031b,0xd719c00b,0x796f1856,0xe32c0e77, + 0x3624089a,0x5e791780 } }, + /* 238 */ + { { 0x7f63cdfb,0x5c16ec55,0xf1cae4fd,0x8e6a3571,0x560597ca,0xfce26bea, + 0xe24c2fab,0x4e0a5371 }, + { 0xa5765357,0x276a40d3,0x0d73a2b4,0x3c89af44,0x41d11a32,0xb8f370ae, + 0xd56604ee,0xf5ff7818 } }, + /* 239 */ + { { 0x1a09df21,0xfbf3e3fe,0xe66e8e47,0x26d5d28e,0x29c89015,0x2096bd0a, + 0x533f5e64,0xe41df0e9 }, + { 0xb3ba9e3f,0x305fda40,0x2604d895,0xf2340ceb,0x7f0367c7,0x0866e192, + 0xac4f155f,0x8edd7d6e } }, + /* 240 */ + { { 0x0bfc8ff3,0xc9a1dc0e,0xe936f42f,0x14efd82b,0xcca381ef,0x67016f7c, + 0xed8aee96,0x1432c1ca }, + { 0x70b23c26,0xec684829,0x0735b273,0xa64fe873,0xeaef0f5a,0xe389f6e5, + 0x5ac8d2c6,0xcaef480b } }, + /* 241 */ + { { 0x75315922,0x5245c978,0x3063cca5,0xd8295171,0xb64ef2cb,0xf3ce60d0, + 0x8efae236,0xd0ba177e }, + { 0xb1b3af60,0x53a9ae8f,0x3d2da20e,0x1a796ae5,0xdf9eef28,0x01d63605, + 0x1c54ae16,0xf31c957c } }, + /* 242 */ + { { 0x49cc4597,0xc0f58d52,0xbae0a028,0xdc5015b0,0x734a814a,0xefc5fc55, + 0x96e17c3a,0x013404cb }, + { 0xc9a824bf,0xb29e2585,0x001eaed7,0xd593185e,0x61ef68ac,0x8d6ee682, + 0x91933e6c,0x6f377c4b } }, + /* 243 */ + { { 0xa8333fd2,0x9f93bad1,0x5a2a95b8,0xa8930202,0xeaf75ace,0x211e5037, + 0xd2d09506,0x6dba3e4e }, + { 0xd04399cd,0xa48ef98c,0xe6b73ade,0x1811c66e,0xc17ecaf3,0x72f60752, + 0x3becf4a7,0xf13cf342 } }, + /* 244 */ + { { 0xa919e2eb,0xceeb9ec0,0xf62c0f68,0x83a9a195,0x7aba2299,0xcfba3bb6, + 0x274bbad3,0xc83fa9a9 }, + { 0x62fa1ce0,0x0d7d1b0b,0x3418efbf,0xe58b60f5,0x52706f04,0xbfa8ef9e, + 0x5d702683,0xb49d70f4 } }, + /* 245 */ + { { 0xfad5513b,0x914c7510,0xb1751e2d,0x05f32eec,0xd9fb9d59,0x6d850418, + 0x0c30f1cf,0x59cfadbb }, + { 0x55cb7fd6,0xe167ac23,0x820426a3,0x249367b8,0x90a78864,0xeaeec58c, + 0x354a4b67,0x5babf362 } }, + /* 246 */ + { { 0xee424865,0x37c981d1,0xf2e5577f,0x8b002878,0xb9e0c058,0x702970f1, + 0x9026c8f0,0x6188c6a7 }, + { 0xd0f244da,0x06f9a19b,0xfb080873,0x1ecced5c,0x9f213637,0x35470f9b, + 0xdf50b9d9,0x993fe475 } }, + /* 247 */ + { { 0x9b2c3609,0x68e31cdf,0x2c46d4ea,0x84eb19c0,0x9a775101,0x7ac9ec1a, + 0x4c80616b,0x81f76466 }, + { 0x75fbe978,0x1d7c2a5a,0xf183b356,0x6743fed3,0x501dd2bf,0x838d1f04, + 0x5fe9060d,0x564a812a } }, + /* 248 */ + { { 0xfa817d1d,0x7a5a64f4,0xbea82e0f,0x55f96844,0xcd57f9aa,0xb5ff5a0f, + 0x00e51d6c,0x226bf3cf }, + { 0x2f2833cf,0xd6d1a9f9,0x4f4f89a8,0x20a0a35a,0x8f3f7f77,0x11536c49, + 0xff257836,0x68779f47 } }, + /* 249 */ + { { 0x73043d08,0x79b0c1c1,0x1fc020fa,0xa5446774,0x9a6d26d0,0xd3767e28, + 0xeb092e0b,0x97bcb0d1 }, + { 0xf32ed3c3,0x2ab6eaa8,0xb281bc48,0xc8a4f151,0xbfa178f3,0x4d1bf4f3, + 0x0a784655,0xa872ffe8 } }, + /* 250 */ + { { 0xa32b2086,0xb1ab7935,0x8160f486,0xe1eb710e,0x3b6ae6be,0x9bd0cd91, + 0xb732a36a,0x02812bfc }, + { 0xcf605318,0xa63fd7ca,0xfdfd6d1d,0x646e5d50,0x2102d619,0xa1d68398, + 0xfe5396af,0x07391cc9 } }, + /* 251 */ + { { 0x8b80d02b,0xc50157f0,0x62877f7f,0x6b8333d1,0x78d542ae,0x7aca1af8, + 0x7e6d2a08,0x355d2adc }, + { 0x287386e1,0xb41f335a,0xf8e43275,0xfd272a94,0xe79989ea,0x286ca2cd, + 0x7c2a3a79,0x3dc2b1e3 } }, + /* 252 */ + { { 0x04581352,0xd689d21c,0x376782be,0x0a00c825,0x9fed701f,0x203bd590, + 0x3ccd846b,0xc4786910 }, + { 0x24c768ed,0x5dba7708,0x6841f657,0x72feea02,0x6accce0e,0x73313ed5, + 0xd5bb4d32,0xccc42968 } }, + /* 253 */ + { { 0x3d7620b9,0x94e50de1,0x5992a56a,0xd89a5c8a,0x675487c9,0xdc007640, + 0xaa4871cf,0xe147eb42 }, + { 0xacf3ae46,0x274ab4ee,0x50350fbe,0xfd4936fb,0x48c840ea,0xdf2afe47, + 0x080e96e3,0x239ac047 } }, + /* 254 */ + { { 0x2bfee8d4,0x481d1f35,0xfa7b0fec,0xce80b5cf,0x2ce9af3c,0x105c4c9e, + 0xf5f7e59d,0xc55fa1a3 }, + { 0x8257c227,0x3186f14e,0x342be00b,0xc5b1653f,0xaa904fb2,0x09afc998, + 0xd4f4b699,0x094cd99c } }, + /* 255 */ + { { 0xd703beba,0x8a981c84,0x32ceb291,0x8631d150,0xe3bd49ec,0xa445f2c9, + 0x42abad33,0xb90a30b6 }, + { 0xb4a5abf9,0xb465404f,0x75db7603,0x004750c3,0xca35d89f,0x6f9a42cc, + 0x1b7924f7,0x019f8b9a } }, +}; + +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_base_8(sp_point_256* r, const sp_digit* k, + int map, void* heap) +{ + return sp_256_ecc_mulmod_stripe_8(r, &p256_base, p256_table, + k, map, heap); +} + +#endif + +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 p; + sp_digit kd[8]; +#endif + sp_point_256* point; + sp_digit* k = NULL; + int err = MP_OKAY; + + err = sp_256_point_new_8(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) { + err = MEMORY_E; + } + } +#else + k = kd; +#endif + if (err == MP_OKAY) { + sp_256_from_mp(k, 8, km); + + err = sp_256_ecc_mulmod_base_8(point, k, map, heap); + } + if (err == MP_OKAY) { + err = sp_256_point_to_ecc_point_8(point, r); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_8(point, 0, heap); + + return err; +} + +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_256_iszero_8(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7]) == 0; +} + +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */ +/* Add 1 to a. (a = a + 1) + * + * a A single precision integer. + */ +static void sp_256_add_one_8(sp_digit* a) +{ + __asm__ __volatile__ ( + "ldr r1, [%[a], #0]\n\t" + "ldr r2, [%[a], #4]\n\t" + "ldr r3, [%[a], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "adds r1, r1, #1\n\t" + "adcs r2, r2, #0\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "str r1, [%[a], #0]\n\t" + "str r2, [%[a], #4]\n\t" + "str r3, [%[a], #8]\n\t" + "str r4, [%[a], #12]\n\t" + "ldr r1, [%[a], #16]\n\t" + "ldr r2, [%[a], #20]\n\t" + "ldr r3, [%[a], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "adcs r1, r1, #0\n\t" + "adcs r2, r2, #0\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "str r1, [%[a], #16]\n\t" + "str r2, [%[a], #20]\n\t" + "str r3, [%[a], #24]\n\t" + "str r4, [%[a], #28]\n\t" + : + : [a] "r" (a) + : "memory", "r1", "r2", "r3", "r4" + ); +} + +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_256_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((sp_digit)a[i]) << s); + if (s >= 24U) { + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (sp_digit)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Generates a scalar that is in the range 1..order-1. + * + * rng Random number generator. + * k Scalar value. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +static int sp_256_ecc_gen_k_8(WC_RNG* rng, sp_digit* k) +{ + int err; + byte buf[32]; + + do { + err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf)); + if (err == 0) { + sp_256_from_bin(k, 8, buf, (int)sizeof(buf)); + if (sp_256_cmp_8(k, p256_order2) < 0) { + sp_256_add_one_8(k); + break; + } + } + } + while (err == 0); + + return err; +} + +/* Makes a random EC key pair. + * + * rng Random number generator. + * priv Generated private value. + * pub Generated public point. + * heap Heap to use for allocation. + * returns ECC_INF_E when the point does not have the correct order, RNG + * failures, MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 p; + sp_digit kd[8]; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_256 inf; +#endif +#endif + sp_point_256* point; + sp_digit* k = NULL; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_256* infinity; +#endif + int err; + + (void)heap; + + err = sp_256_point_new_8(heap, p, point); +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + if (err == MP_OKAY) { + err = sp_256_point_new_8(heap, inf, infinity); + } +#endif +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) { + err = MEMORY_E; + } + } +#else + k = kd; +#endif + + if (err == MP_OKAY) { + err = sp_256_ecc_gen_k_8(rng, k); + } + if (err == MP_OKAY) { + err = sp_256_ecc_mulmod_base_8(point, k, 1, NULL); + } + +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + if (err == MP_OKAY) { + err = sp_256_ecc_mulmod_8(infinity, point, p256_order, 1, NULL); + } + if (err == MP_OKAY) { + if ((sp_256_iszero_8(point->x) == 0) || (sp_256_iszero_8(point->y) == 0)) { + err = ECC_INF_E; + } + } +#endif + + if (err == MP_OKAY) { + err = sp_256_to_mp(k, priv); + } + if (err == MP_OKAY) { + err = sp_256_point_to_ecc_point_8(point, pub); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_256_point_free_8(infinity, 1, heap); +#endif + sp_256_point_free_8(point, 1, heap); + + return err; +} + +#ifdef HAVE_ECC_DHE +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 32 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_256_to_bin(sp_digit* r, byte* a) +{ + int i, j, s = 0, b; + + j = 256 / 8 - 1; + a[j] = 0; + for (i=0; i<8 && j>=0; i++) { + b = 0; + /* lint allow cast of mismatch sp_digit and int */ + a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ + b += 8 - s; + if (j < 0) { + break; + } + while (b < 32) { + a[j--] = (byte)(r[i] >> b); + b += 8; + if (j < 0) { + break; + } + } + s = 8 - (b - 32); + if (j >= 0) { + a[j] = 0; + } + if (s != 0) { + j++; + } + } +} + +/* Multiply the point by the scalar and serialize the X ordinate. + * The number is 0 padded to maximum size on output. + * + * priv Scalar to multiply the point by. + * pub Point to multiply. + * out Buffer to hold X ordinate. + * outLen On entry, size of the buffer in bytes. + * On exit, length of data in buffer in bytes. + * heap Heap to use for allocation. + * returns BUFFER_E if the buffer is to small for output size, + * MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out, + word32* outLen, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 p; + sp_digit kd[8]; +#endif + sp_point_256* point = NULL; + sp_digit* k = NULL; + int err = MP_OKAY; + + if (*outLen < 32U) { + err = BUFFER_E; + } + + if (err == MP_OKAY) { + err = sp_256_point_new_8(heap, p, point); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#else + k = kd; +#endif + + if (err == MP_OKAY) { + sp_256_from_mp(k, 8, priv); + sp_256_point_from_ecc_point_8(point, pub); + err = sp_256_ecc_mulmod_8(point, point, k, 1, heap); + } + if (err == MP_OKAY) { + sp_256_to_bin(point->x, out); + *outLen = 32; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_8(point, 0, heap); + + return err; +} +#endif /* HAVE_ECC_DHE */ + +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + __asm__ __volatile__ ( + "sub sp, sp, #64\n\t" + "mov r5, #0\n\t" + "mov r6, #0\n\t" + "mov r7, #0\n\t" + "mov r8, #0\n\t" + "\n1:\n\t" + "subs r3, r5, #28\n\t" + "it cc\n\t" + "movcc r3, #0\n\t" + "sub r4, r5, r3\n\t" + "\n2:\n\t" + "ldr r14, [%[a], r3]\n\t" + "ldr r12, [%[b], r4]\n\t" + "umull r9, r10, r14, r12\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "add r3, r3, #4\n\t" + "sub r4, r4, #4\n\t" + "cmp r3, #32\n\t" + "beq 3f\n\t" + "cmp r3, r5\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "str r6, [sp, r5]\n\t" + "mov r6, r7\n\t" + "mov r7, r8\n\t" + "mov r8, #0\n\t" + "add r5, r5, #4\n\t" + "cmp r5, #56\n\t" + "ble 1b\n\t" + "str r6, [sp, r5]\n\t" + "\n4:\n\t" + "ldr r6, [sp, #0]\n\t" + "ldr r7, [sp, #4]\n\t" + "ldr r8, [sp, #8]\n\t" + "ldr r3, [sp, #12]\n\t" + "str r6, [%[r], #0]\n\t" + "str r7, [%[r], #4]\n\t" + "str r8, [%[r], #8]\n\t" + "str r3, [%[r], #12]\n\t" + "add sp, sp, #16\n\t" + "add %[r], %[r], #16\n\t" + "subs r5, r5, #16\n\t" + "bgt 4b\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); +} + +#else +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + __asm__ __volatile__ ( + "sub sp, sp, #32\n\t" + "mov r10, #0\n\t" + "# A[0] * B[0]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r3, r4, r8, r9\n\t" + "mov r5, #0\n\t" + "str r3, [sp]\n\t" + "# A[0] * B[1]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[0]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #4]\n\t" + "# A[0] * B[2]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[1]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[0]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #8]\n\t" + "# A[0] * B[3]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[2]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[1]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[0]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #12]\n\t" + "# A[0] * B[4]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[3]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[2]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[1]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[0]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #16]\n\t" + "# A[0] * B[5]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[4]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[3]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[2]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[1]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[0]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #20]\n\t" + "# A[0] * B[6]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[5]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[4]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[3]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[2]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[1]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[0]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #24]\n\t" + "# A[0] * B[7]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[6]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[5]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[4]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[3]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[2]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[1]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[0]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #28]\n\t" + "# A[1] * B[7]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[2] * B[6]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[5]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[4]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[3]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[2]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[1]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #32]\n\t" + "# A[2] * B[7]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[3] * B[6]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[5]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[4]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[3]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[2]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #36]\n\t" + "# A[3] * B[7]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[4] * B[6]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[5]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[4]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[3]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #40]\n\t" + "# A[4] * B[7]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[5] * B[6]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[5]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[4]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #44]\n\t" + "# A[5] * B[7]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[6] * B[6]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[5]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #48]\n\t" + "# A[6] * B[7]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[7] * B[6]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #52]\n\t" + "# A[7] * B[7]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "str r5, [%[r], #56]\n\t" + "str r3, [%[r], #60]\n\t" + "ldr r3, [sp, #0]\n\t" + "ldr r4, [sp, #4]\n\t" + "ldr r5, [sp, #8]\n\t" + "ldr r6, [sp, #12]\n\t" + "str r3, [%[r], #0]\n\t" + "str r4, [%[r], #4]\n\t" + "str r5, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" + "ldr r3, [sp, #16]\n\t" + "ldr r4, [sp, #20]\n\t" + "ldr r5, [sp, #24]\n\t" + "ldr r6, [sp, #28]\n\t" + "str r3, [%[r], #16]\n\t" + "str r4, [%[r], #20]\n\t" + "str r5, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" + "add sp, sp, #32\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into a. (a -= b) + * + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_256_sub_in_place_8(sp_digit* a, const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r14, #0\n\t" + "add r12, %[a], #32\n\t" + "\n1:\n\t" + "subs %[c], r14, %[c]\n\t" + "ldr r3, [%[a]]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[a], #8]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b]], #4\n\t" + "ldr r8, [%[b]], #4\n\t" + "ldr r9, [%[b]], #4\n\t" + "ldr r10, [%[b]], #4\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "str r3, [%[a]], #4\n\t" + "str r4, [%[a]], #4\n\t" + "str r5, [%[a]], #4\n\t" + "str r6, [%[a]], #4\n\t" + "sbc %[c], r14, r14\n\t" + "cmp %[a], r12\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14" + ); + + return c; +} + +#else +/* Sub b from a into a. (a -= b) + * + * a A single precision integer and result. + * b A single precision integer. + */ +static sp_digit sp_256_sub_in_place_8(sp_digit* a, const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldr r2, [%[a], #0]\n\t" + "ldr r3, [%[a], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[a], #12]\n\t" + "ldr r6, [%[b], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" + "ldr r8, [%[b], #8]\n\t" + "ldr r9, [%[b], #12]\n\t" + "subs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #0]\n\t" + "str r3, [%[a], #4]\n\t" + "str r4, [%[a], #8]\n\t" + "str r5, [%[a], #12]\n\t" + "ldr r2, [%[a], #16]\n\t" + "ldr r3, [%[a], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[a], #28]\n\t" + "ldr r6, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" + "ldr r8, [%[b], #24]\n\t" + "ldr r9, [%[b], #28]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #16]\n\t" + "str r3, [%[a], #20]\n\t" + "str r4, [%[a], #24]\n\t" + "str r5, [%[a], #28]\n\t" + "sbc %[c], r9, r9\n\t" + : [c] "+r" (c) + : [a] "r" (a), [b] "r" (b) + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov r10, #0\n\t" + "# A[0] * B\n\t" + "ldr r8, [%[a]]\n\t" + "umull r5, r3, %[b], r8\n\t" + "mov r4, #0\n\t" + "str r5, [%[r]]\n\t" + "mov r5, #0\n\t" + "mov r9, #4\n\t" + "1:\n\t" + "ldr r8, [%[a], r9]\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], r9]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "add r9, r9, #4\n\t" + "cmp r9, #32\n\t" + "blt 1b\n\t" + "str r3, [%[r], #32]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); +#else + __asm__ __volatile__ ( + "mov r10, #0\n\t" + "# A[0] * B\n\t" + "ldr r8, [%[a]]\n\t" + "umull r3, r4, %[b], r8\n\t" + "mov r5, #0\n\t" + "str r3, [%[r]]\n\t" + "# A[1] * B\n\t" + "ldr r8, [%[a], #4]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #4]\n\t" + "# A[2] * B\n\t" + "ldr r8, [%[a], #8]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #8]\n\t" + "# A[3] * B\n\t" + "ldr r8, [%[a], #12]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #12]\n\t" + "# A[4] * B\n\t" + "ldr r8, [%[a], #16]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #16]\n\t" + "# A[5] * B\n\t" + "ldr r8, [%[a], #20]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #20]\n\t" + "# A[6] * B\n\t" + "ldr r8, [%[a], #24]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #24]\n\t" + "# A[7] * B\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "str r4, [%[r], #28]\n\t" + "str r5, [%[r], #32]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); +#endif +} + +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +static sp_digit div_256_word_8(sp_digit d1, sp_digit d0, sp_digit div) +{ + sp_digit r = 0; + + __asm__ __volatile__ ( + "lsr r5, %[div], #1\n\t" + "add r5, r5, #1\n\t" + "mov r6, %[d0]\n\t" + "mov r7, %[d1]\n\t" + "# Do top 32\n\t" + "subs r8, r5, r7\n\t" + "sbc r8, r8, r8\n\t" + "add %[r], %[r], %[r]\n\t" + "sub %[r], %[r], r8\n\t" + "and r8, r8, r5\n\t" + "subs r7, r7, r8\n\t" + "# Next 30 bits\n\t" + "mov r4, #29\n\t" + "1:\n\t" + "movs r6, r6, lsl #1\n\t" + "adc r7, r7, r7\n\t" + "subs r8, r5, r7\n\t" + "sbc r8, r8, r8\n\t" + "add %[r], %[r], %[r]\n\t" + "sub %[r], %[r], r8\n\t" + "and r8, r8, r5\n\t" + "subs r7, r7, r8\n\t" + "subs r4, r4, #1\n\t" + "bpl 1b\n\t" + "add %[r], %[r], %[r]\n\t" + "add %[r], %[r], #1\n\t" + "umull r4, r5, %[r], %[div]\n\t" + "subs r4, %[d0], r4\n\t" + "sbc r5, %[d1], r5\n\t" + "add %[r], %[r], r5\n\t" + "umull r4, r5, %[r], %[div]\n\t" + "subs r4, %[d0], r4\n\t" + "sbc r5, %[d1], r5\n\t" + "add %[r], %[r], r5\n\t" + "subs r8, %[div], r4\n\t" + "sbc r8, r8, r8\n\t" + "sub %[r], %[r], r8\n\t" + : [r] "+r" (r) + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) + : "r4", "r5", "r6", "r7", "r8" + ); + return r; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_256_mask_8(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<8; i++) { + r[i] = a[i] & m; + } +#else + r[0] = a[0] & m; + r[1] = a[1] & m; + r[2] = a[2] & m; + r[3] = a[3] & m; + r[4] = a[4] & m; + r[5] = a[5] & m; + r[6] = a[6] & m; + r[7] = a[7] & m; +#endif +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_256_div_8(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[16], t2[9]; + sp_digit div, r1; + int i; + + (void)m; + + + div = d[7]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 8); + for (i=7; i>=0; i--) { + r1 = div_256_word_8(t1[8 + i], t1[8 + i - 1], div); + + sp_256_mul_d_8(t2, d, r1); + t1[8 + i] += sp_256_sub_in_place_8(&t1[i], t2); + t1[8 + i] -= t2[8]; + sp_256_mask_8(t2, d, t1[8 + i]); + t1[8 + i] += sp_256_add_8(&t1[i], &t1[i], t2); + sp_256_mask_8(t2, d, t1[8 + i]); + t1[8 + i] += sp_256_add_8(&t1[i], &t1[i], t2); + } + + r1 = sp_256_cmp_8(t1, d) >= 0; + sp_256_cond_sub_8(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_256_mod_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_256_div_8(a, m, NULL, r); +} + +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#ifdef WOLFSSL_SP_SMALL +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "sub sp, sp, #64\n\t" + "mov r12, #0\n\t" + "mov r6, #0\n\t" + "mov r7, #0\n\t" + "mov r8, #0\n\t" + "mov r5, #0\n\t" + "\n1:\n\t" + "subs r3, r5, #28\n\t" + "it cc\n\t" + "movcc r3, r12\n\t" + "sub r4, r5, r3\n\t" + "\n2:\n\t" + "cmp r4, r3\n\t" + "beq 4f\n\t" + "ldr r14, [%[a], r3]\n\t" + "ldr r9, [%[a], r4]\n\t" + "umull r9, r10, r14, r9\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, r12\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, r12\n\t" + "bal 5f\n\t" + "\n4:\n\t" + "ldr r14, [%[a], r3]\n\t" + "umull r9, r10, r14, r14\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, r12\n\t" + "\n5:\n\t" + "add r3, r3, #4\n\t" + "sub r4, r4, #4\n\t" + "cmp r3, #32\n\t" + "beq 3f\n\t" + "cmp r3, r4\n\t" + "bgt 3f\n\t" + "cmp r3, r5\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "str r6, [sp, r5]\n\t" + "mov r6, r7\n\t" + "mov r7, r8\n\t" + "mov r8, #0\n\t" + "add r5, r5, #4\n\t" + "cmp r5, #56\n\t" + "ble 1b\n\t" + "str r6, [sp, r5]\n\t" + "\n4:\n\t" + "ldr r6, [sp, #0]\n\t" + "ldr r7, [sp, #4]\n\t" + "ldr r8, [sp, #8]\n\t" + "ldr r3, [sp, #12]\n\t" + "str r6, [%[r], #0]\n\t" + "str r7, [%[r], #4]\n\t" + "str r8, [%[r], #8]\n\t" + "str r3, [%[r], #12]\n\t" + "add sp, sp, #16\n\t" + "add %[r], %[r], #16\n\t" + "subs r5, r5, #16\n\t" + "bgt 4b\n\t" + : [r] "+r" (r) + : [a] "r" (a) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12" + ); +} + +#else +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "sub sp, sp, #32\n\t" + "mov r14, #0\n\t" + "# A[0] * A[0]\n\t" + "ldr r10, [%[a], #0]\n\t" + "umull r8, r3, r10, r10\n\t" + "mov r4, #0\n\t" + "str r8, [sp]\n\t" + "# A[0] * A[1]\n\t" + "ldr r10, [%[a], #4]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r14, r14\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "str r3, [sp, #4]\n\t" + "# A[0] * A[2]\n\t" + "ldr r10, [%[a], #8]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r14, r14\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, r14\n\t" + "# A[1] * A[1]\n\t" + "ldr r10, [%[a], #4]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, r14\n\t" + "str r4, [sp, #8]\n\t" + "# A[0] * A[3]\n\t" + "ldr r10, [%[a], #12]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r14, r14\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "# A[1] * A[2]\n\t" + "ldr r10, [%[a], #8]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "str r2, [sp, #12]\n\t" + "# A[0] * A[4]\n\t" + "ldr r10, [%[a], #16]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r14, r14\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "# A[1] * A[3]\n\t" + "ldr r10, [%[a], #12]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "# A[2] * A[2]\n\t" + "ldr r10, [%[a], #8]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "str r3, [sp, #16]\n\t" + "# A[0] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[4]\n\t" + "ldr r10, [%[a], #16]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[3]\n\t" + "ldr r10, [%[a], #12]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #20]\n\t" + "# A[0] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[4]\n\t" + "ldr r10, [%[a], #16]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[3]\n\t" + "ldr r10, [%[a], #12]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #24]\n\t" + "# A[0] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[4]\n\t" + "ldr r10, [%[a], #16]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #28]\n\t" + "# A[1] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[2] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[4]\n\t" + "ldr r10, [%[a], #16]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #32]\n\t" + "# A[2] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[3] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #36]\n\t" + "# A[3] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r14, r14\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "# A[4] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "# A[5] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "str r3, [%[r], #40]\n\t" + "# A[4] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r14, r14\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, r14\n\t" + "# A[5] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, r14\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, r14\n\t" + "str r4, [%[r], #44]\n\t" + "# A[5] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r14, r14\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "# A[6] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "str r2, [%[r], #48]\n\t" + "# A[6] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r14, r14\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "str r3, [%[r], #52]\n\t" + "# A[7] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r4, r4, r8\n\t" + "adc r2, r2, r9\n\t" + "str r4, [%[r], #56]\n\t" + "str r2, [%[r], #60]\n\t" + "ldr r2, [sp, #0]\n\t" + "ldr r3, [sp, #4]\n\t" + "ldr r4, [sp, #8]\n\t" + "ldr r8, [sp, #12]\n\t" + "str r2, [%[r], #0]\n\t" + "str r3, [%[r], #4]\n\t" + "str r4, [%[r], #8]\n\t" + "str r8, [%[r], #12]\n\t" + "ldr r2, [sp, #16]\n\t" + "ldr r3, [sp, #20]\n\t" + "ldr r4, [sp, #24]\n\t" + "ldr r8, [sp, #28]\n\t" + "str r2, [%[r], #16]\n\t" + "str r3, [%[r], #20]\n\t" + "str r4, [%[r], #24]\n\t" + "str r8, [%[r], #28]\n\t" + "add sp, sp, #32\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r2", "r3", "r4", "r8", "r9", "r10", "r8", "r5", "r6", "r7", "r14" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Order-2 for the P256 curve. */ +static const uint32_t p256_order_minus_2[8] = { + 0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU,0xffffffffU,0xffffffffU, + 0x00000000U,0xffffffffU +}; +#else +/* The low half of the order-2 of the P256 curve. */ +static const uint32_t p256_order_low[4] = { + 0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU +}; +#endif /* WOLFSSL_SP_SMALL */ + +/* Multiply two number mod the order of P256 curve. (r = a * b mod order) + * + * r Result of the multiplication. + * a First operand of the multiplication. + * b Second operand of the multiplication. + */ +static void sp_256_mont_mul_order_8(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_256_mul_8(r, a, b); + sp_256_mont_reduce_order_8(r, p256_order, p256_mp_order); +} + +/* Square number mod the order of P256 curve. (r = a * a mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_256_mont_sqr_order_8(sp_digit* r, const sp_digit* a) +{ + sp_256_sqr_8(r, a); + sp_256_mont_reduce_order_8(r, p256_order, p256_mp_order); +} + +#ifndef WOLFSSL_SP_SMALL +/* Square number mod the order of P256 curve a number of times. + * (r = a ^ n mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_256_mont_sqr_n_order_8(sp_digit* r, const sp_digit* a, int n) +{ + int i; + + sp_256_mont_sqr_order_8(r, a); + for (i=1; i=0; i--) { + sp_256_mont_sqr_order_8(t, t); + if ((p256_order_minus_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_256_mont_mul_order_8(t, t, a); + } + } + XMEMCPY(r, t, sizeof(sp_digit) * 8U); +#else + sp_digit* t = td; + sp_digit* t2 = td + 2 * 8; + sp_digit* t3 = td + 4 * 8; + int i; + + /* t = a^2 */ + sp_256_mont_sqr_order_8(t, a); + /* t = a^3 = t * a */ + sp_256_mont_mul_order_8(t, t, a); + /* t2= a^c = t ^ 2 ^ 2 */ + sp_256_mont_sqr_n_order_8(t2, t, 2); + /* t3= a^f = t2 * t */ + sp_256_mont_mul_order_8(t3, t2, t); + /* t2= a^f0 = t3 ^ 2 ^ 4 */ + sp_256_mont_sqr_n_order_8(t2, t3, 4); + /* t = a^ff = t2 * t3 */ + sp_256_mont_mul_order_8(t, t2, t3); + /* t3= a^ff00 = t ^ 2 ^ 8 */ + sp_256_mont_sqr_n_order_8(t2, t, 8); + /* t = a^ffff = t2 * t */ + sp_256_mont_mul_order_8(t, t2, t); + /* t2= a^ffff0000 = t ^ 2 ^ 16 */ + sp_256_mont_sqr_n_order_8(t2, t, 16); + /* t = a^ffffffff = t2 * t */ + sp_256_mont_mul_order_8(t, t2, t); + /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64 */ + sp_256_mont_sqr_n_order_8(t2, t, 64); + /* t2= a^ffffffff00000000ffffffff = t2 * t */ + sp_256_mont_mul_order_8(t2, t2, t); + /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32 */ + sp_256_mont_sqr_n_order_8(t2, t2, 32); + /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */ + sp_256_mont_mul_order_8(t2, t2, t); + /* t2= a^ffffffff00000000ffffffffffffffffbce6 */ + for (i=127; i>=112; i--) { + sp_256_mont_sqr_order_8(t2, t2); + if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_256_mont_mul_order_8(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6f */ + sp_256_mont_sqr_n_order_8(t2, t2, 4); + sp_256_mont_mul_order_8(t2, t2, t3); + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */ + for (i=107; i>=64; i--) { + sp_256_mont_sqr_order_8(t2, t2); + if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_256_mont_mul_order_8(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */ + sp_256_mont_sqr_n_order_8(t2, t2, 4); + sp_256_mont_mul_order_8(t2, t2, t3); + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */ + for (i=59; i>=32; i--) { + sp_256_mont_sqr_order_8(t2, t2); + if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_256_mont_mul_order_8(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */ + sp_256_mont_sqr_n_order_8(t2, t2, 4); + sp_256_mont_mul_order_8(t2, t2, t3); + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */ + for (i=27; i>=0; i--) { + sp_256_mont_sqr_order_8(t2, t2); + if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_256_mont_mul_order_8(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */ + sp_256_mont_sqr_n_order_8(t2, t2, 4); + /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */ + sp_256_mont_mul_order_8(r, t2, t3); +#endif /* WOLFSSL_SP_SMALL */ +} + +#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */ +#ifdef HAVE_ECC_SIGN +#ifndef SP_ECC_MAX_SIG_GEN +#define SP_ECC_MAX_SIG_GEN 64 +#endif + +/* Sign the hash using the private key. + * e = [hash, 256 bits] from binary + * r = (k.G)->x mod order + * s = (r * x + e) / k mod order + * The hash is truncated to the first 256 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv, + mp_int* rm, mp_int* sm, mp_int* km, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit ed[2*8]; + sp_digit xd[2*8]; + sp_digit kd[2*8]; + sp_digit rd[2*8]; + sp_digit td[3 * 2*8]; + sp_point_256 p; +#endif + sp_digit* e = NULL; + sp_digit* x = NULL; + sp_digit* k = NULL; + sp_digit* r = NULL; + sp_digit* tmp = NULL; + sp_point_256* point = NULL; + sp_digit carry; + sp_digit* s = NULL; + sp_digit* kInv = NULL; + int err = MP_OKAY; + int32_t c; + int i; + + (void)heap; + + err = sp_256_point_new_8(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 8, heap, + DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + e = d + 0 * 8; + x = d + 2 * 8; + k = d + 4 * 8; + r = d + 6 * 8; + tmp = d + 8 * 8; +#else + e = ed; + x = xd; + k = kd; + r = rd; + tmp = td; +#endif + s = e; + kInv = k; + + if (hashLen > 32U) { + hashLen = 32U; + } + + sp_256_from_bin(e, 8, hash, (int)hashLen); + } + + for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) { + sp_256_from_mp(x, 8, priv); + + /* New random point. */ + if (km == NULL || mp_iszero(km)) { + err = sp_256_ecc_gen_k_8(rng, k); + } + else { + sp_256_from_mp(k, 8, km); + mp_zero(km); + } + if (err == MP_OKAY) { + err = sp_256_ecc_mulmod_base_8(point, k, 1, NULL); + } + + if (err == MP_OKAY) { + /* r = point->x mod order */ + XMEMCPY(r, point->x, sizeof(sp_digit) * 8U); + sp_256_norm_8(r); + c = sp_256_cmp_8(r, p256_order); + sp_256_cond_sub_8(r, r, p256_order, 0L - (sp_digit)(c >= 0)); + sp_256_norm_8(r); + + /* Conv k to Montgomery form (mod order) */ + sp_256_mul_8(k, k, p256_norm_order); + err = sp_256_mod_8(k, k, p256_order); + } + if (err == MP_OKAY) { + sp_256_norm_8(k); + /* kInv = 1/k mod order */ + sp_256_mont_inv_order_8(kInv, k, tmp); + sp_256_norm_8(kInv); + + /* s = r * x + e */ + sp_256_mul_8(x, x, r); + err = sp_256_mod_8(x, x, p256_order); + } + if (err == MP_OKAY) { + sp_256_norm_8(x); + carry = sp_256_add_8(s, e, x); + sp_256_cond_sub_8(s, s, p256_order, 0 - carry); + sp_256_norm_8(s); + c = sp_256_cmp_8(s, p256_order); + sp_256_cond_sub_8(s, s, p256_order, 0L - (sp_digit)(c >= 0)); + sp_256_norm_8(s); + + /* s = s * k^-1 mod order */ + sp_256_mont_mul_order_8(s, s, kInv); + sp_256_norm_8(s); + + /* Check that signature is usable. */ + if (sp_256_iszero_8(s) == 0) { + break; + } + } + } + + if (i == 0) { + err = RNG_FAILURE_E; + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(r, rm); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(s, sm); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 8 * 8); + XFREE(d, heap, DYNAMIC_TYPE_ECC); + } +#else + XMEMSET(e, 0, sizeof(sp_digit) * 2U * 8U); + XMEMSET(x, 0, sizeof(sp_digit) * 2U * 8U); + XMEMSET(k, 0, sizeof(sp_digit) * 2U * 8U); + XMEMSET(r, 0, sizeof(sp_digit) * 2U * 8U); + XMEMSET(r, 0, sizeof(sp_digit) * 2U * 8U); + XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 8U); +#endif + sp_256_point_free_8(point, 1, heap); + + return err; +} +#endif /* HAVE_ECC_SIGN */ + +#ifdef HAVE_ECC_VERIFY +/* Verify the signature values with the hash and public key. + * e = Truncate(hash, 256) + * u1 = e/s mod order + * u2 = r/s mod order + * r == (u1.G + u2.Q)->x mod order + * Optimization: Leave point in projective form. + * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z') + * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' + * The hash is truncated to the first 256 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX, + mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit u1d[2*8]; + sp_digit u2d[2*8]; + sp_digit sd[2*8]; + sp_digit tmpd[2*8 * 5]; + sp_point_256 p1d; + sp_point_256 p2d; +#endif + sp_digit* u1 = NULL; + sp_digit* u2 = NULL; + sp_digit* s = NULL; + sp_digit* tmp = NULL; + sp_point_256* p1; + sp_point_256* p2 = NULL; + sp_digit carry; + int32_t c; + int err; + + err = sp_256_point_new_8(heap, p1d, p1); + if (err == MP_OKAY) { + err = sp_256_point_new_8(heap, p2d, p2); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 8, heap, + DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + u1 = d + 0 * 8; + u2 = d + 2 * 8; + s = d + 4 * 8; + tmp = d + 6 * 8; +#else + u1 = u1d; + u2 = u2d; + s = sd; + tmp = tmpd; +#endif + + if (hashLen > 32U) { + hashLen = 32U; + } + + sp_256_from_bin(u1, 8, hash, (int)hashLen); + sp_256_from_mp(u2, 8, r); + sp_256_from_mp(s, 8, sm); + sp_256_from_mp(p2->x, 8, pX); + sp_256_from_mp(p2->y, 8, pY); + sp_256_from_mp(p2->z, 8, pZ); + + { + sp_256_mul_8(s, s, p256_norm_order); + } + err = sp_256_mod_8(s, s, p256_order); + } + if (err == MP_OKAY) { + sp_256_norm_8(s); + { + sp_256_mont_inv_order_8(s, s, tmp); + sp_256_mont_mul_order_8(u1, u1, s); + sp_256_mont_mul_order_8(u2, u2, s); + } + + err = sp_256_ecc_mulmod_base_8(p1, u1, 0, heap); + } + if (err == MP_OKAY) { + err = sp_256_ecc_mulmod_8(p2, p2, u2, 0, heap); + } + + if (err == MP_OKAY) { + { + sp_256_proj_point_add_8(p1, p1, p2, tmp); + if (sp_256_iszero_8(p1->z)) { + if (sp_256_iszero_8(p1->x) && sp_256_iszero_8(p1->y)) { + sp_256_proj_point_dbl_8(p1, p2, tmp); + } + else { + /* Y ordinate is not used from here - don't set. */ + p1->x[0] = 0; + p1->x[1] = 0; + p1->x[2] = 0; + p1->x[3] = 0; + p1->x[4] = 0; + p1->x[5] = 0; + p1->x[6] = 0; + p1->x[7] = 0; + XMEMCPY(p1->z, p256_norm_mod, sizeof(p256_norm_mod)); + } + } + } + + /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ + /* Reload r and convert to Montgomery form. */ + sp_256_from_mp(u2, 8, r); + err = sp_256_mod_mul_norm_8(u2, u2, p256_mod); + } + + if (err == MP_OKAY) { + /* u1 = r.z'.z' mod prime */ + sp_256_mont_sqr_8(p1->z, p1->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(u1, u2, p1->z, p256_mod, p256_mp_mod); + *res = (int)(sp_256_cmp_8(p1->x, u1) == 0); + if (*res == 0) { + /* Reload r and add order. */ + sp_256_from_mp(u2, 8, r); + carry = sp_256_add_8(u2, u2, p256_order); + /* Carry means result is greater than mod and is not valid. */ + if (carry == 0) { + sp_256_norm_8(u2); + + /* Compare with mod and if greater or equal then not valid. */ + c = sp_256_cmp_8(u2, p256_mod); + if (c < 0) { + /* Convert to Montogomery form */ + err = sp_256_mod_mul_norm_8(u2, u2, p256_mod); + if (err == MP_OKAY) { + /* u1 = (r + 1*order).z'.z' mod prime */ + sp_256_mont_mul_8(u1, u2, p1->z, p256_mod, + p256_mp_mod); + *res = (int)(sp_256_cmp_8(p1->x, u1) == 0); + } + } + } + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) + XFREE(d, heap, DYNAMIC_TYPE_ECC); +#endif + sp_256_point_free_8(p1, 0, heap); + sp_256_point_free_8(p2, 0, heap); + + return err; +} +#endif /* HAVE_ECC_VERIFY */ + +#ifdef HAVE_ECC_CHECK_KEY +/* Check that the x and y oridinates are a valid point on the curve. + * + * point EC point. + * heap Heap to use if dynamically allocating. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +static int sp_256_ecc_is_point_8(sp_point_256* point, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit t1d[2*8]; + sp_digit t2d[2*8]; +#endif + sp_digit* t1; + sp_digit* t2; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8 * 4, heap, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = d + 0 * 8; + t2 = d + 2 * 8; +#else + (void)heap; + + t1 = t1d; + t2 = t2d; +#endif + + sp_256_sqr_8(t1, point->y); + (void)sp_256_mod_8(t1, t1, p256_mod); + sp_256_sqr_8(t2, point->x); + (void)sp_256_mod_8(t2, t2, p256_mod); + sp_256_mul_8(t2, t2, point->x); + (void)sp_256_mod_8(t2, t2, p256_mod); + (void)sp_256_sub_8(t2, p256_mod, t2); + sp_256_mont_add_8(t1, t1, t2, p256_mod); + + sp_256_mont_add_8(t1, t1, point->x, p256_mod); + sp_256_mont_add_8(t1, t1, point->x, p256_mod); + sp_256_mont_add_8(t1, t1, point->x, p256_mod); + + if (sp_256_cmp_8(t1, p256_b) != 0) { + err = MP_VAL; + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, heap, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + +/* Check that the x and y oridinates are a valid point on the curve. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +int sp_ecc_is_point_256(mp_int* pX, mp_int* pY) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 pubd; +#endif + sp_point_256* pub; + byte one[1] = { 1 }; + int err; + + err = sp_256_point_new_8(NULL, pubd, pub); + if (err == MP_OKAY) { + sp_256_from_mp(pub->x, 8, pX); + sp_256_from_mp(pub->y, 8, pY); + sp_256_from_bin(pub->z, 8, one, (int)sizeof(one)); + + err = sp_256_ecc_is_point_8(pub, NULL); + } + + sp_256_point_free_8(pub, 0, NULL); + + return err; +} + +/* Check that the private scalar generates the EC point (px, py), the point is + * on the curve and the point has the correct order. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * privm Private scalar that generates EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve, ECC_INF_E if the point does not have the correct order, + * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and + * MP_OKAY otherwise. + */ +int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit privd[8]; + sp_point_256 pubd; + sp_point_256 pd; +#endif + sp_digit* priv = NULL; + sp_point_256* pub; + sp_point_256* p = NULL; + byte one[1] = { 1 }; + int err; + + err = sp_256_point_new_8(heap, pubd, pub); + if (err == MP_OKAY) { + err = sp_256_point_new_8(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap, + DYNAMIC_TYPE_ECC); + if (priv == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + priv = privd; +#endif + + sp_256_from_mp(pub->x, 8, pX); + sp_256_from_mp(pub->y, 8, pY); + sp_256_from_bin(pub->z, 8, one, (int)sizeof(one)); + sp_256_from_mp(priv, 8, privm); + + /* Check point at infinitiy. */ + if ((sp_256_iszero_8(pub->x) != 0) && + (sp_256_iszero_8(pub->y) != 0)) { + err = ECC_INF_E; + } + } + + if (err == MP_OKAY) { + /* Check range of X and Y */ + if (sp_256_cmp_8(pub->x, p256_mod) >= 0 || + sp_256_cmp_8(pub->y, p256_mod) >= 0) { + err = ECC_OUT_OF_RANGE_E; + } + } + + if (err == MP_OKAY) { + /* Check point is on curve */ + err = sp_256_ecc_is_point_8(pub, heap); + } + + if (err == MP_OKAY) { + /* Point * order = infinity */ + err = sp_256_ecc_mulmod_8(p, pub, p256_order, 1, heap); + } + if (err == MP_OKAY) { + /* Check result is infinity */ + if ((sp_256_iszero_8(p->x) == 0) || + (sp_256_iszero_8(p->y) == 0)) { + err = ECC_INF_E; + } + } + + if (err == MP_OKAY) { + /* Base * private = point */ + err = sp_256_ecc_mulmod_base_8(p, priv, 1, heap); + } + if (err == MP_OKAY) { + /* Check result is public key */ + if (sp_256_cmp_8(p->x, pub->x) != 0 || + sp_256_cmp_8(p->y, pub->y) != 0) { + err = ECC_PRIV_KEY_E; + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (priv != NULL) { + XFREE(priv, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_8(p, 0, heap); + sp_256_point_free_8(pub, 0, heap); + + return err; +} +#endif +#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL +/* Add two projective EC points together. + * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ) + * + * pX First EC point's X ordinate. + * pY First EC point's Y ordinate. + * pZ First EC point's Z ordinate. + * qX Second EC point's X ordinate. + * qY Second EC point's Y ordinate. + * qZ Second EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* qX, mp_int* qY, mp_int* qZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 8 * 5]; + sp_point_256 pd; + sp_point_256 qd; +#endif + sp_digit* tmp; + sp_point_256* p; + sp_point_256* q = NULL; + int err; + + err = sp_256_point_new_8(NULL, pd, p); + if (err == MP_OKAY) { + err = sp_256_point_new_8(NULL, qd, q); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + sp_256_from_mp(p->x, 8, pX); + sp_256_from_mp(p->y, 8, pY); + sp_256_from_mp(p->z, 8, pZ); + sp_256_from_mp(q->x, 8, qX); + sp_256_from_mp(q->y, 8, qY); + sp_256_from_mp(q->z, 8, qZ); + + sp_256_proj_point_add_8(p, p, q, tmp); + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->z, rZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_8(q, 0, NULL); + sp_256_point_free_8(p, 0, NULL); + + return err; +} + +/* Double a projective EC point. + * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ) + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 8 * 2]; + sp_point_256 pd; +#endif + sp_digit* tmp; + sp_point_256* p; + int err; + + err = sp_256_point_new_8(NULL, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 2, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + sp_256_from_mp(p->x, 8, pX); + sp_256_from_mp(p->y, 8, pY); + sp_256_from_mp(p->z, 8, pZ); + + sp_256_proj_point_dbl_8(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->z, rZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_8(p, 0, NULL); + + return err; +} + +/* Map a projective EC point to affine in place. + * pZ will be one. + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 8 * 4]; + sp_point_256 pd; +#endif + sp_digit* tmp; + sp_point_256* p; + int err; + + err = sp_256_point_new_8(NULL, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 4, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + if (err == MP_OKAY) { + sp_256_from_mp(p->x, 8, pX); + sp_256_from_mp(p->y, 8, pY); + sp_256_from_mp(p->z, 8, pZ); + + sp_256_map_8(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(p->x, pX); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->y, pY); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->z, pZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_8(p, 0, NULL); + + return err; +} +#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */ +#ifdef HAVE_COMP_KEY +/* Find the square root of a number mod the prime of the curve. + * + * y The number to operate on and the result. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +static int sp_256_mont_sqrt_8(sp_digit* y) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d; +#else + sp_digit t1d[2 * 8]; + sp_digit t2d[2 * 8]; +#endif + sp_digit* t1; + sp_digit* t2; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 8, NULL, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = d + 0 * 8; + t2 = d + 2 * 8; +#else + t1 = t1d; + t2 = t2d; +#endif + + { + /* t2 = y ^ 0x2 */ + sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod); + /* t1 = y ^ 0x3 */ + sp_256_mont_mul_8(t1, t2, y, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xc */ + sp_256_mont_sqr_n_8(t2, t1, 2, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xf */ + sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xf0 */ + sp_256_mont_sqr_n_8(t2, t1, 4, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xff */ + sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xff00 */ + sp_256_mont_sqr_n_8(t2, t1, 8, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffff */ + sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xffff0000 */ + sp_256_mont_sqr_n_8(t2, t1, 16, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff */ + sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000000 */ + sp_256_mont_sqr_n_8(t1, t1, 32, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000001 */ + sp_256_mont_mul_8(t1, t1, y, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */ + sp_256_mont_sqr_n_8(t1, t1, 96, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */ + sp_256_mont_mul_8(t1, t1, y, p256_mod, p256_mp_mod); + sp_256_mont_sqr_n_8(y, t1, 94, p256_mod, p256_mp_mod); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + + +/* Uncompress the point given the X ordinate. + * + * xm X ordinate. + * odd Whether the Y ordinate is odd. + * ym Calculated Y ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d; +#else + sp_digit xd[2 * 8]; + sp_digit yd[2 * 8]; +#endif + sp_digit* x = NULL; + sp_digit* y = NULL; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 8, NULL, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + x = d + 0 * 8; + y = d + 2 * 8; +#else + x = xd; + y = yd; +#endif + + sp_256_from_mp(x, 8, xm); + err = sp_256_mod_mul_norm_8(x, x, p256_mod); + } + if (err == MP_OKAY) { + /* y = x^3 */ + { + sp_256_mont_sqr_8(y, x, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(y, y, x, p256_mod, p256_mp_mod); + } + /* y = x^3 - 3x */ + sp_256_mont_sub_8(y, y, x, p256_mod); + sp_256_mont_sub_8(y, y, x, p256_mod); + sp_256_mont_sub_8(y, y, x, p256_mod); + /* y = x^3 - 3x + b */ + err = sp_256_mod_mul_norm_8(x, p256_b, p256_mod); + } + if (err == MP_OKAY) { + sp_256_mont_add_8(y, y, x, p256_mod); + /* y = sqrt(x^3 - 3x + b) */ + err = sp_256_mont_sqrt_8(y); + } + if (err == MP_OKAY) { + XMEMSET(y + 8, 0, 8U * sizeof(sp_digit)); + sp_256_mont_reduce_8(y, p256_mod, p256_mp_mod); + if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) { + sp_256_mont_sub_8(y, p256_mod, y, p256_mod); + } + + err = sp_256_to_mp(y, ym); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} +#endif +#endif /* !WOLFSSL_SP_NO_256 */ +#ifdef WOLFSSL_SP_384 + +/* Point structure to use. */ +typedef struct sp_point_384 { + sp_digit x[2 * 12]; + sp_digit y[2 * 12]; + sp_digit z[2 * 12]; + int infinity; +} sp_point_384; + +/* The modulus (prime) of the curve P384. */ +static const sp_digit p384_mod[12] = { + 0xffffffff,0x00000000,0x00000000,0xffffffff,0xfffffffe,0xffffffff, + 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff +}; +/* The Montogmery normalizer for modulus of the curve P384. */ +static const sp_digit p384_norm_mod[12] = { + 0x00000001,0xffffffff,0xffffffff,0x00000000,0x00000001,0x00000000, + 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000 +}; +/* The Montogmery multiplier for modulus of the curve P384. */ +static sp_digit p384_mp_mod = 0x00000001; +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +/* The order of the curve P384. */ +static const sp_digit p384_order[12] = { + 0xccc52973,0xecec196a,0x48b0a77a,0x581a0db2,0xf4372ddf,0xc7634d81, + 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff +}; +#endif +/* The order of the curve P384 minus 2. */ +static const sp_digit p384_order2[12] = { + 0xccc52971,0xecec196a,0x48b0a77a,0x581a0db2,0xf4372ddf,0xc7634d81, + 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff +}; +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montogmery normalizer for order of the curve P384. */ +static const sp_digit p384_norm_order[12] = { + 0x333ad68d,0x1313e695,0xb74f5885,0xa7e5f24d,0x0bc8d220,0x389cb27e, + 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000 +}; +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montogmery multiplier for order of the curve P384. */ +static sp_digit p384_mp_order = 0xe88fdc45; +#endif +/* The base point of curve P384. */ +static const sp_point_384 p384_base = { + /* X ordinate */ + { + 0x72760ab7,0x3a545e38,0xbf55296c,0x5502f25d,0x82542a38,0x59f741e0, + 0x8ba79b98,0x6e1d3b62,0xf320ad74,0x8eb1c71e,0xbe8b0537,0xaa87ca22, + 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L + }, + /* Y ordinate */ + { + 0x90ea0e5f,0x7a431d7c,0x1d7e819d,0x0a60b1ce,0xb5f0b8c0,0xe9da3113, + 0x289a147c,0xf8f41dbd,0x9292dc29,0x5d9e98bf,0x96262c6f,0x3617de4a, + 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L + }, + /* Z ordinate */ + { + 0x00000001,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000, + 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000, + 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L + }, + /* infinity */ + 0 +}; +#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY) +static const sp_digit p384_b[12] = { + 0xd3ec2aef,0x2a85c8ed,0x8a2ed19d,0xc656398d,0x5013875a,0x0314088f, + 0xfe814112,0x181d9c6e,0xe3f82d19,0x988e056b,0xe23ee7e4,0xb3312fa7 +}; +#endif + +static int sp_384_point_new_ex_12(void* heap, sp_point_384* sp, sp_point_384** p) +{ + int ret = MP_OKAY; + (void)heap; +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + (void)sp; + *p = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC); +#else + *p = sp; +#endif + if (*p == NULL) { + ret = MEMORY_E; + } + return ret; +} + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +/* Allocate memory for point and return error. */ +#define sp_384_point_new_12(heap, sp, p) sp_384_point_new_ex_12((heap), NULL, &(p)) +#else +/* Set pointer to data and return no error. */ +#define sp_384_point_new_12(heap, sp, p) sp_384_point_new_ex_12((heap), &(sp), &(p)) +#endif + + +static void sp_384_point_free_12(sp_point_384* p, int clear, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +/* If valid pointer then clear point data if requested and free data. */ + if (p != NULL) { + if (clear != 0) { + XMEMSET(p, 0, sizeof(*p)); + } + XFREE(p, heap, DYNAMIC_TYPE_ECC); + } +#else +/* Clear point data if requested. */ + if (clear != 0) { + XMEMSET(p, 0, sizeof(*p)); + } +#endif + (void)heap; +} + +/* Multiply a number by Montogmery normalizer mod modulus (prime). + * + * r The resulting Montgomery form number. + * a The number to convert. + * m The modulus (prime). + * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise. + */ +static int sp_384_mod_mul_norm_12(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + int64_t* t; +#else + int64_t t[12]; +#endif + int64_t o; + int err = MP_OKAY; + + (void)m; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (int64_t*)XMALLOC(sizeof(int64_t) * 12, NULL, DYNAMIC_TYPE_ECC); + if (t == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + /* 1 0 0 0 0 0 0 0 1 1 0 -1 */ + t[0] = 0 + (uint64_t)a[0] + (uint64_t)a[8] + (uint64_t)a[9] - (uint64_t)a[11]; + /* -1 1 0 0 0 0 0 0 -1 0 1 1 */ + t[1] = 0 - (uint64_t)a[0] + (uint64_t)a[1] - (uint64_t)a[8] + (uint64_t)a[10] + (uint64_t)a[11]; + /* 0 -1 1 0 0 0 0 0 0 -1 0 1 */ + t[2] = 0 - (uint64_t)a[1] + (uint64_t)a[2] - (uint64_t)a[9] + (uint64_t)a[11]; + /* 1 0 -1 1 0 0 0 0 1 1 -1 -1 */ + t[3] = 0 + (uint64_t)a[0] - (uint64_t)a[2] + (uint64_t)a[3] + (uint64_t)a[8] + (uint64_t)a[9] - (uint64_t)a[10] - (uint64_t)a[11]; + /* 1 1 0 -1 1 0 0 0 1 2 1 -2 */ + t[4] = 0 + (uint64_t)a[0] + (uint64_t)a[1] - (uint64_t)a[3] + (uint64_t)a[4] + (uint64_t)a[8] + 2 * (uint64_t)a[9] + (uint64_t)a[10] - 2 * (uint64_t)a[11]; + /* 0 1 1 0 -1 1 0 0 0 1 2 1 */ + t[5] = 0 + (uint64_t)a[1] + (uint64_t)a[2] - (uint64_t)a[4] + (uint64_t)a[5] + (uint64_t)a[9] + 2 * (uint64_t)a[10] + (uint64_t)a[11]; + /* 0 0 1 1 0 -1 1 0 0 0 1 2 */ + t[6] = 0 + (uint64_t)a[2] + (uint64_t)a[3] - (uint64_t)a[5] + (uint64_t)a[6] + (uint64_t)a[10] + 2 * (uint64_t)a[11]; + /* 0 0 0 1 1 0 -1 1 0 0 0 1 */ + t[7] = 0 + (uint64_t)a[3] + (uint64_t)a[4] - (uint64_t)a[6] + (uint64_t)a[7] + (uint64_t)a[11]; + /* 0 0 0 0 1 1 0 -1 1 0 0 0 */ + t[8] = 0 + (uint64_t)a[4] + (uint64_t)a[5] - (uint64_t)a[7] + (uint64_t)a[8]; + /* 0 0 0 0 0 1 1 0 -1 1 0 0 */ + t[9] = 0 + (uint64_t)a[5] + (uint64_t)a[6] - (uint64_t)a[8] + (uint64_t)a[9]; + /* 0 0 0 0 0 0 1 1 0 -1 1 0 */ + t[10] = 0 + (uint64_t)a[6] + (uint64_t)a[7] - (uint64_t)a[9] + (uint64_t)a[10]; + /* 0 0 0 0 0 0 0 1 1 0 -1 1 */ + t[11] = 0 + (uint64_t)a[7] + (uint64_t)a[8] - (uint64_t)a[10] + (uint64_t)a[11]; + + t[1] += t[0] >> 32; t[0] &= 0xffffffff; + t[2] += t[1] >> 32; t[1] &= 0xffffffff; + t[3] += t[2] >> 32; t[2] &= 0xffffffff; + t[4] += t[3] >> 32; t[3] &= 0xffffffff; + t[5] += t[4] >> 32; t[4] &= 0xffffffff; + t[6] += t[5] >> 32; t[5] &= 0xffffffff; + t[7] += t[6] >> 32; t[6] &= 0xffffffff; + t[8] += t[7] >> 32; t[7] &= 0xffffffff; + t[9] += t[8] >> 32; t[8] &= 0xffffffff; + t[10] += t[9] >> 32; t[9] &= 0xffffffff; + t[11] += t[10] >> 32; t[10] &= 0xffffffff; + o = t[11] >> 32; t[11] &= 0xffffffff; + t[0] += o; + t[1] -= o; + t[3] += o; + t[4] += o; + t[1] += t[0] >> 32; t[0] &= 0xffffffff; + t[2] += t[1] >> 32; t[1] &= 0xffffffff; + t[3] += t[2] >> 32; t[2] &= 0xffffffff; + t[4] += t[3] >> 32; t[3] &= 0xffffffff; + t[5] += t[4] >> 32; t[4] &= 0xffffffff; + t[6] += t[5] >> 32; t[5] &= 0xffffffff; + t[7] += t[6] >> 32; t[6] &= 0xffffffff; + t[8] += t[7] >> 32; t[7] &= 0xffffffff; + t[9] += t[8] >> 32; t[8] &= 0xffffffff; + t[10] += t[9] >> 32; t[9] &= 0xffffffff; + t[11] += t[10] >> 32; t[10] &= 0xffffffff; + + r[0] = t[0]; + r[1] = t[1]; + r[2] = t[2]; + r[3] = t[3]; + r[4] = t[4]; + r[5] = t[5]; + r[6] = t[6]; + r[7] = t[7]; + r[8] = t[8]; + r[9] = t[9]; + r[10] = t[10]; + r[11] = t[11]; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) + XFREE(t, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_384_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 32 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 32 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 32U) <= (word32)DIGIT_BIT) { + s += 32U; + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 32) { + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + s = 32 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Convert a point of type ecc_point to type sp_point_384. + * + * p Point of type sp_point_384 (result). + * pm Point of type ecc_point. + */ +static void sp_384_point_from_ecc_point_12(sp_point_384* p, const ecc_point* pm) +{ + XMEMSET(p->x, 0, sizeof(p->x)); + XMEMSET(p->y, 0, sizeof(p->y)); + XMEMSET(p->z, 0, sizeof(p->z)); + sp_384_from_mp(p->x, 12, pm->x); + sp_384_from_mp(p->y, 12, pm->y); + sp_384_from_mp(p->z, 12, pm->z); + p->infinity = 0; +} + +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_384_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (384 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 32 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 12); + r->used = 12; + mp_clamp(r); +#elif DIGIT_BIT < 32 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 12; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 32) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 32 - s; + } + r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 12; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 32 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 32 - s; + } + else { + s += 32; + } + } + r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Convert a point of type sp_point_384 to type ecc_point. + * + * p Point of type sp_point_384. + * pm Point of type ecc_point (result). + * returns MEMORY_E when allocation of memory in ecc_point fails otherwise + * MP_OKAY. + */ +static int sp_384_point_to_ecc_point_12(const sp_point_384* p, ecc_point* pm) +{ + int err; + + err = sp_384_to_mp(p->x, pm->x); + if (err == MP_OKAY) { + err = sp_384_to_mp(p->y, pm->y); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->z, pm->z); + } + + return err; +} + +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_384_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + __asm__ __volatile__ ( + "sub sp, sp, #96\n\t" + "mov r5, #0\n\t" + "mov r6, #0\n\t" + "mov r7, #0\n\t" + "mov r8, #0\n\t" + "\n1:\n\t" + "subs r3, r5, #44\n\t" + "it cc\n\t" + "movcc r3, #0\n\t" + "sub r4, r5, r3\n\t" + "\n2:\n\t" + "ldr r14, [%[a], r3]\n\t" + "ldr r12, [%[b], r4]\n\t" + "umull r9, r10, r14, r12\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "add r3, r3, #4\n\t" + "sub r4, r4, #4\n\t" + "cmp r3, #48\n\t" + "beq 3f\n\t" + "cmp r3, r5\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "str r6, [sp, r5]\n\t" + "mov r6, r7\n\t" + "mov r7, r8\n\t" + "mov r8, #0\n\t" + "add r5, r5, #4\n\t" + "cmp r5, #88\n\t" + "ble 1b\n\t" + "str r6, [sp, r5]\n\t" + "\n4:\n\t" + "ldr r6, [sp, #0]\n\t" + "ldr r7, [sp, #4]\n\t" + "ldr r8, [sp, #8]\n\t" + "ldr r3, [sp, #12]\n\t" + "str r6, [%[r], #0]\n\t" + "str r7, [%[r], #4]\n\t" + "str r8, [%[r], #8]\n\t" + "str r3, [%[r], #12]\n\t" + "add sp, sp, #16\n\t" + "add %[r], %[r], #16\n\t" + "subs r5, r5, #16\n\t" + "bgt 4b\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); +} + +#else +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_384_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + __asm__ __volatile__ ( + "sub sp, sp, #48\n\t" + "mov r10, #0\n\t" + "# A[0] * B[0]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r3, r4, r8, r9\n\t" + "mov r5, #0\n\t" + "str r3, [sp]\n\t" + "# A[0] * B[1]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[0]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #4]\n\t" + "# A[0] * B[2]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[1]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[0]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #8]\n\t" + "# A[0] * B[3]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[2]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[1]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[0]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #12]\n\t" + "# A[0] * B[4]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[3]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[2]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[1]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[0]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #16]\n\t" + "# A[0] * B[5]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[4]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[3]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[2]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[1]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[0]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #20]\n\t" + "# A[0] * B[6]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[5]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[4]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[3]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[2]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[1]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[0]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #24]\n\t" + "# A[0] * B[7]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[6]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[5]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[4]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[3]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[2]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[1]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[0]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #28]\n\t" + "# A[0] * B[8]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[7]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[6]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[5]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[4]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[3]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[2]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[1]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[0]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #32]\n\t" + "# A[0] * B[9]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[8]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[7]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[6]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[5]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[4]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[3]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[2]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[1]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[0]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #36]\n\t" + "# A[0] * B[10]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[9]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[8]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[7]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[6]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[5]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[4]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[3]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[2]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[1]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[0]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #40]\n\t" + "# A[0] * B[11]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[10]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[9]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[8]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[7]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[6]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[5]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[4]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[3]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[2]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[1]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[0]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #44]\n\t" + "# A[1] * B[11]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[2] * B[10]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[9]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[8]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[7]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[6]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[5]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[4]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[3]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[2]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[1]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #48]\n\t" + "# A[2] * B[11]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[3] * B[10]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[9]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[8]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[7]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[6]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[5]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[4]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[3]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[2]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #52]\n\t" + "# A[3] * B[11]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[4] * B[10]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[9]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[8]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[7]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[6]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[5]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[4]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[3]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #56]\n\t" + "# A[4] * B[11]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[5] * B[10]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[9]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[8]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[7]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[6]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[5]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[4]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #60]\n\t" + "# A[5] * B[11]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[6] * B[10]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[9]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[8]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[7]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[6]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[5]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #64]\n\t" + "# A[6] * B[11]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[7] * B[10]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[9]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[8]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[7]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[6]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #68]\n\t" + "# A[7] * B[11]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[8] * B[10]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[9]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[8]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[7]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #72]\n\t" + "# A[8] * B[11]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[9] * B[10]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[9]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[8]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #76]\n\t" + "# A[9] * B[11]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[10] * B[10]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[9]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #80]\n\t" + "# A[10] * B[11]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[11] * B[10]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #84]\n\t" + "# A[11] * B[11]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "str r4, [%[r], #88]\n\t" + "str r5, [%[r], #92]\n\t" + "ldr r3, [sp, #0]\n\t" + "ldr r4, [sp, #4]\n\t" + "ldr r5, [sp, #8]\n\t" + "ldr r6, [sp, #12]\n\t" + "str r3, [%[r], #0]\n\t" + "str r4, [%[r], #4]\n\t" + "str r5, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" + "ldr r3, [sp, #16]\n\t" + "ldr r4, [sp, #20]\n\t" + "ldr r5, [sp, #24]\n\t" + "ldr r6, [sp, #28]\n\t" + "str r3, [%[r], #16]\n\t" + "str r4, [%[r], #20]\n\t" + "str r5, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" + "ldr r3, [sp, #32]\n\t" + "ldr r4, [sp, #36]\n\t" + "ldr r5, [sp, #40]\n\t" + "ldr r6, [sp, #44]\n\t" + "str r3, [%[r], #32]\n\t" + "str r4, [%[r], #36]\n\t" + "str r5, [%[r], #40]\n\t" + "str r6, [%[r], #44]\n\t" + "add sp, sp, #48\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + sp_digit c = 0; + +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov r9, #0\n\t" + "mov r8, #0\n\t" + "1:\n\t" + "subs %[c], r9, %[c]\n\t" + "ldr r4, [%[a], r8]\n\t" + "ldr r5, [%[b], r8]\n\t" + "and r5, r5, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbc %[c], r9, r9\n\t" + "str r4, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, #48\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + ); +#else + __asm__ __volatile__ ( + + "mov r9, #0\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r6, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "subs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #0]\n\t" + "str r6, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r7, [%[b], #12]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r6, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #16]\n\t" + "str r6, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r6, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r7, [%[b], #28]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r6, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r7, [%[b], #36]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #32]\n\t" + "str r6, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r6, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r7, [%[b], #44]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #40]\n\t" + "str r6, [%[r], #44]\n\t" + "sbc %[c], r9, r9\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + ); +#endif /* WOLFSSL_SP_SMALL */ + + return c; +} + +#define sp_384_mont_reduce_order_12 sp_384_mont_reduce_12 + +/* Reduce the number back to 384 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_digit ca = 0; + + __asm__ __volatile__ ( + "# i = 0\n\t" + "mov r12, #0\n\t" + "ldr r10, [%[a], #0]\n\t" + "ldr r14, [%[a], #4]\n\t" + "\n1:\n\t" + "# mu = a[i] * mp\n\t" + "mul r8, %[mp], r10\n\t" + "# a[i+0] += m[0] * mu\n\t" + "ldr r7, [%[m], #0]\n\t" + "ldr r9, [%[a], #0]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" + "# a[i+1] += m[1] * mu\n\t" + "ldr r7, [%[m], #4]\n\t" + "ldr r9, [%[a], #4]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r10, r14, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r10, r10, r5\n\t" + "adc r4, r4, #0\n\t" + "# a[i+2] += m[2] * mu\n\t" + "ldr r7, [%[m], #8]\n\t" + "ldr r14, [%[a], #8]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r14, r14, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r14, r14, r4\n\t" + "adc r5, r5, #0\n\t" + "# a[i+3] += m[3] * mu\n\t" + "ldr r7, [%[m], #12]\n\t" + "ldr r9, [%[a], #12]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #12]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+4] += m[4] * mu\n\t" + "ldr r7, [%[m], #16]\n\t" + "ldr r9, [%[a], #16]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+5] += m[5] * mu\n\t" + "ldr r7, [%[m], #20]\n\t" + "ldr r9, [%[a], #20]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #20]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+6] += m[6] * mu\n\t" + "ldr r7, [%[m], #24]\n\t" + "ldr r9, [%[a], #24]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+7] += m[7] * mu\n\t" + "ldr r7, [%[m], #28]\n\t" + "ldr r9, [%[a], #28]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #28]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+8] += m[8] * mu\n\t" + "ldr r7, [%[m], #32]\n\t" + "ldr r9, [%[a], #32]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #32]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+9] += m[9] * mu\n\t" + "ldr r7, [%[m], #36]\n\t" + "ldr r9, [%[a], #36]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #36]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+10] += m[10] * mu\n\t" + "ldr r7, [%[m], #40]\n\t" + "ldr r9, [%[a], #40]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #40]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+11] += m[11] * mu\n\t" + "ldr r7, [%[m], #44]\n\t" + "ldr r9, [%[a], #44]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r5, r5, r6\n\t" + "adcs r7, r7, %[ca]\n\t" + "mov %[ca], #0\n\t" + "adc %[ca], %[ca], %[ca]\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #44]\n\t" + "ldr r9, [%[a], #48]\n\t" + "adcs r9, r9, r7\n\t" + "str r9, [%[a], #48]\n\t" + "adc %[ca], %[ca], #0\n\t" + "# i += 1\n\t" + "add %[a], %[a], #4\n\t" + "add r12, r12, #4\n\t" + "cmp r12, #48\n\t" + "blt 1b\n\t" + "str r10, [%[a], #0]\n\t" + "str r14, [%[a], #4]\n\t" + : [ca] "+r" (ca), [a] "+r" (a) + : [m] "r" (m), [mp] "r" (mp) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); + + sp_384_cond_sub_12(a - 12, a, m, (sp_digit)0 - ca); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_384_mont_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_384_mul_12(r, a, b); + sp_384_mont_reduce_12(r, m, mp); +} + +#ifdef WOLFSSL_SP_SMALL +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "sub sp, sp, #96\n\t" + "mov r12, #0\n\t" + "mov r6, #0\n\t" + "mov r7, #0\n\t" + "mov r8, #0\n\t" + "mov r5, #0\n\t" + "\n1:\n\t" + "subs r3, r5, #44\n\t" + "it cc\n\t" + "movcc r3, r12\n\t" + "sub r4, r5, r3\n\t" + "\n2:\n\t" + "cmp r4, r3\n\t" + "beq 4f\n\t" + "ldr r14, [%[a], r3]\n\t" + "ldr r9, [%[a], r4]\n\t" + "umull r9, r10, r14, r9\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, r12\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, r12\n\t" + "bal 5f\n\t" + "\n4:\n\t" + "ldr r14, [%[a], r3]\n\t" + "umull r9, r10, r14, r14\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, r12\n\t" + "\n5:\n\t" + "add r3, r3, #4\n\t" + "sub r4, r4, #4\n\t" + "cmp r3, #48\n\t" + "beq 3f\n\t" + "cmp r3, r4\n\t" + "bgt 3f\n\t" + "cmp r3, r5\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "str r6, [sp, r5]\n\t" + "mov r6, r7\n\t" + "mov r7, r8\n\t" + "mov r8, #0\n\t" + "add r5, r5, #4\n\t" + "cmp r5, #88\n\t" + "ble 1b\n\t" + "str r6, [sp, r5]\n\t" + "\n4:\n\t" + "ldr r6, [sp, #0]\n\t" + "ldr r7, [sp, #4]\n\t" + "ldr r8, [sp, #8]\n\t" + "ldr r3, [sp, #12]\n\t" + "str r6, [%[r], #0]\n\t" + "str r7, [%[r], #4]\n\t" + "str r8, [%[r], #8]\n\t" + "str r3, [%[r], #12]\n\t" + "add sp, sp, #16\n\t" + "add %[r], %[r], #16\n\t" + "subs r5, r5, #16\n\t" + "bgt 4b\n\t" + : [r] "+r" (r) + : [a] "r" (a) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12" + ); +} + +#else +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "sub sp, sp, #48\n\t" + "mov r14, #0\n\t" + "# A[0] * A[0]\n\t" + "ldr r10, [%[a], #0]\n\t" + "umull r8, r3, r10, r10\n\t" + "mov r4, #0\n\t" + "str r8, [sp]\n\t" + "# A[0] * A[1]\n\t" + "ldr r10, [%[a], #4]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r14, r14\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "str r3, [sp, #4]\n\t" + "# A[0] * A[2]\n\t" + "ldr r10, [%[a], #8]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r14, r14\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, r14\n\t" + "# A[1] * A[1]\n\t" + "ldr r10, [%[a], #4]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, r14\n\t" + "str r4, [sp, #8]\n\t" + "# A[0] * A[3]\n\t" + "ldr r10, [%[a], #12]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r14, r14\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "# A[1] * A[2]\n\t" + "ldr r10, [%[a], #8]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "str r2, [sp, #12]\n\t" + "# A[0] * A[4]\n\t" + "ldr r10, [%[a], #16]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r14, r14\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "# A[1] * A[3]\n\t" + "ldr r10, [%[a], #12]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "# A[2] * A[2]\n\t" + "ldr r10, [%[a], #8]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "str r3, [sp, #16]\n\t" + "# A[0] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[4]\n\t" + "ldr r10, [%[a], #16]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[3]\n\t" + "ldr r10, [%[a], #12]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #20]\n\t" + "# A[0] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[4]\n\t" + "ldr r10, [%[a], #16]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[3]\n\t" + "ldr r10, [%[a], #12]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #24]\n\t" + "# A[0] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[4]\n\t" + "ldr r10, [%[a], #16]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #28]\n\t" + "# A[0] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[4]\n\t" + "ldr r10, [%[a], #16]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #32]\n\t" + "# A[0] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #36]\n\t" + "# A[0] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #40]\n\t" + "# A[0] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #44]\n\t" + "# A[1] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[2] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #48]\n\t" + "# A[2] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[3] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #52]\n\t" + "# A[3] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[4] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #56]\n\t" + "# A[4] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[5] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #60]\n\t" + "# A[5] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[6] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #64]\n\t" + "# A[6] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[7] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #68]\n\t" + "# A[7] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r14, r14\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "# A[8] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "# A[9] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "str r2, [%[r], #72]\n\t" + "# A[8] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r14, r14\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "# A[9] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "str r3, [%[r], #76]\n\t" + "# A[9] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r14, r14\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, r14\n\t" + "# A[10] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, r14\n\t" + "str r4, [%[r], #80]\n\t" + "# A[10] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r14, r14\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "str r2, [%[r], #84]\n\t" + "# A[11] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r3, r3, r8\n\t" + "adc r4, r4, r9\n\t" + "str r3, [%[r], #88]\n\t" + "str r4, [%[r], #92]\n\t" + "ldr r2, [sp, #0]\n\t" + "ldr r3, [sp, #4]\n\t" + "ldr r4, [sp, #8]\n\t" + "ldr r8, [sp, #12]\n\t" + "str r2, [%[r], #0]\n\t" + "str r3, [%[r], #4]\n\t" + "str r4, [%[r], #8]\n\t" + "str r8, [%[r], #12]\n\t" + "ldr r2, [sp, #16]\n\t" + "ldr r3, [sp, #20]\n\t" + "ldr r4, [sp, #24]\n\t" + "ldr r8, [sp, #28]\n\t" + "str r2, [%[r], #16]\n\t" + "str r3, [%[r], #20]\n\t" + "str r4, [%[r], #24]\n\t" + "str r8, [%[r], #28]\n\t" + "ldr r2, [sp, #32]\n\t" + "ldr r3, [sp, #36]\n\t" + "ldr r4, [sp, #40]\n\t" + "ldr r8, [sp, #44]\n\t" + "str r2, [%[r], #32]\n\t" + "str r3, [%[r], #36]\n\t" + "str r4, [%[r], #40]\n\t" + "str r8, [%[r], #44]\n\t" + "add sp, sp, #48\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r2", "r3", "r4", "r8", "r9", "r10", "r8", "r5", "r6", "r7", "r14" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_384_mont_sqr_12(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_384_sqr_12(r, a); + sp_384_mont_reduce_12(r, m, mp); +} + +#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY) +/* Square the Montgomery form number a number of times. (r = a ^ n mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * n Number of times to square. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_384_mont_sqr_n_12(sp_digit* r, const sp_digit* a, int n, + const sp_digit* m, sp_digit mp) +{ + sp_384_mont_sqr_12(r, a, m, mp); + for (; n > 1; n--) { + sp_384_mont_sqr_12(r, r, m, mp); + } +} + +#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */ +#ifdef WOLFSSL_SP_SMALL +/* Mod-2 for the P384 curve. */ +static const uint32_t p384_mod_minus_2[12] = { + 0xfffffffdU,0x00000000U,0x00000000U,0xffffffffU,0xfffffffeU,0xffffffffU, + 0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU +}; +#endif /* !WOLFSSL_SP_SMALL */ + +/* Invert the number, in Montgomery form, modulo the modulus (prime) of the + * P384 curve. (r = 1 / a mod m) + * + * r Inverse result. + * a Number to invert. + * td Temporary data. + */ +static void sp_384_mont_inv_12(sp_digit* r, const sp_digit* a, sp_digit* td) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* t = td; + int i; + + XMEMCPY(t, a, sizeof(sp_digit) * 12); + for (i=382; i>=0; i--) { + sp_384_mont_sqr_12(t, t, p384_mod, p384_mp_mod); + if (p384_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32))) + sp_384_mont_mul_12(t, t, a, p384_mod, p384_mp_mod); + } + XMEMCPY(r, t, sizeof(sp_digit) * 12); +#else + sp_digit* t1 = td; + sp_digit* t2 = td + 2 * 12; + sp_digit* t3 = td + 4 * 12; + sp_digit* t4 = td + 6 * 12; + sp_digit* t5 = td + 8 * 12; + + /* 0x2 */ + sp_384_mont_sqr_12(t1, a, p384_mod, p384_mp_mod); + /* 0x3 */ + sp_384_mont_mul_12(t5, t1, a, p384_mod, p384_mp_mod); + /* 0xc */ + sp_384_mont_sqr_n_12(t1, t5, 2, p384_mod, p384_mp_mod); + /* 0xf */ + sp_384_mont_mul_12(t2, t5, t1, p384_mod, p384_mp_mod); + /* 0x1e */ + sp_384_mont_sqr_12(t1, t2, p384_mod, p384_mp_mod); + /* 0x1f */ + sp_384_mont_mul_12(t4, t1, a, p384_mod, p384_mp_mod); + /* 0x3e0 */ + sp_384_mont_sqr_n_12(t1, t4, 5, p384_mod, p384_mp_mod); + /* 0x3ff */ + sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod); + /* 0x7fe0 */ + sp_384_mont_sqr_n_12(t1, t2, 5, p384_mod, p384_mp_mod); + /* 0x7fff */ + sp_384_mont_mul_12(t4, t4, t1, p384_mod, p384_mp_mod); + /* 0x3fff8000 */ + sp_384_mont_sqr_n_12(t1, t4, 15, p384_mod, p384_mp_mod); + /* 0x3fffffff */ + sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod); + /* 0xfffffffc */ + sp_384_mont_sqr_n_12(t3, t2, 2, p384_mod, p384_mp_mod); + /* 0xfffffffd */ + sp_384_mont_mul_12(r, t3, a, p384_mod, p384_mp_mod); + /* 0xffffffff */ + sp_384_mont_mul_12(t3, t5, t3, p384_mod, p384_mp_mod); + /* 0xfffffffc0000000 */ + sp_384_mont_sqr_n_12(t1, t2, 30, p384_mod, p384_mp_mod); + /* 0xfffffffffffffff */ + sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod); + /* 0xfffffffffffffff000000000000000 */ + sp_384_mont_sqr_n_12(t1, t2, 60, p384_mod, p384_mp_mod); + /* 0xffffffffffffffffffffffffffffff */ + sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod); + /* 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */ + sp_384_mont_sqr_n_12(t1, t2, 120, p384_mod, p384_mp_mod); + /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod); + /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */ + sp_384_mont_sqr_n_12(t1, t2, 15, p384_mod, p384_mp_mod); + /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod); + /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000 */ + sp_384_mont_sqr_n_12(t1, t2, 33, p384_mod, p384_mp_mod); + /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff */ + sp_384_mont_mul_12(t2, t3, t1, p384_mod, p384_mp_mod); + /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff000000000000000000000000 */ + sp_384_mont_sqr_n_12(t1, t2, 96, p384_mod, p384_mp_mod); + /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000fffffffd */ + sp_384_mont_mul_12(r, r, t1, p384_mod, p384_mp_mod); + +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static int32_t sp_384_cmp_12(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = -1; + sp_digit one = 1; + + +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov r7, #0\n\t" + "mov r3, #-1\n\t" + "mov r6, #44\n\t" + "1:\n\t" + "ldr r4, [%[a], r6]\n\t" + "ldr r5, [%[b], r6]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "subs r6, r6, #4\n\t" + "bcs 1b\n\t" + "eor %[r], %[r], r3\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [one] "r" (one) + : "r3", "r4", "r5", "r6", "r7" + ); +#else + __asm__ __volatile__ ( + "mov r7, #0\n\t" + "mov r3, #-1\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "eor %[r], %[r], r3\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [one] "r" (one) + : "r3", "r4", "r5", "r6", "r7" + ); +#endif + + return r; +} + +/* Normalize the values in each word to 32. + * + * a Array of sp_digit to normalize. + */ +#define sp_384_norm_12(a) + +/* Map the Montgomery form projective coordinate point to an affine point. + * + * r Resulting affine coordinate point. + * p Montgomery form projective coordinate point. + * t Temporary ordinate data. + */ +static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*12; + int32_t n; + + sp_384_mont_inv_12(t1, p->z, t + 2*12); + + sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t1, t2, t1, p384_mod, p384_mp_mod); + + /* x /= z^2 */ + sp_384_mont_mul_12(r->x, p->x, t2, p384_mod, p384_mp_mod); + XMEMSET(r->x + 12, 0, sizeof(r->x) / 2U); + sp_384_mont_reduce_12(r->x, p384_mod, p384_mp_mod); + /* Reduce x to less than modulus */ + n = sp_384_cmp_12(r->x, p384_mod); + sp_384_cond_sub_12(r->x, r->x, p384_mod, 0 - ((n >= 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_384_norm_12(r->x); + + /* y /= z^3 */ + sp_384_mont_mul_12(r->y, p->y, t1, p384_mod, p384_mp_mod); + XMEMSET(r->y + 12, 0, sizeof(r->y) / 2U); + sp_384_mont_reduce_12(r->y, p384_mod, p384_mp_mod); + /* Reduce y to less than modulus */ + n = sp_384_cmp_12(r->y, p384_mod); + sp_384_cond_sub_12(r->y, r->y, p384_mod, 0 - ((n >= 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_384_norm_12(r->y); + + XMEMSET(r->z, 0, sizeof(r->z)); + r->z[0] = 1; + +} + +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "add r12, %[a], #48\n\t" + "\n1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldr r4, [%[a]], #4\n\t" + "ldr r5, [%[a]], #4\n\t" + "ldr r6, [%[a]], #4\n\t" + "ldr r7, [%[a]], #4\n\t" + "ldr r8, [%[b]], #4\n\t" + "ldr r9, [%[b]], #4\n\t" + "ldr r10, [%[b]], #4\n\t" + "ldr r14, [%[b]], #4\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r]], #4\n\t" + "str r5, [%[r]], #4\n\t" + "str r6, [%[r]], #4\n\t" + "str r7, [%[r]], #4\n\t" + "mov r4, #0\n\t" + "adc %[c], r4, #0\n\t" + "cmp %[a], r12\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); + + return c; +} + +#else +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r12, #0\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #12]\n\t" + "ldr r8, [%[b], #0]\n\t" + "ldr r9, [%[b], #4]\n\t" + "ldr r10, [%[b], #8]\n\t" + "ldr r14, [%[b], #12]\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #0]\n\t" + "str r5, [%[r], #4]\n\t" + "str r6, [%[r], #8]\n\t" + "str r7, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[a], #28]\n\t" + "ldr r8, [%[b], #16]\n\t" + "ldr r9, [%[b], #20]\n\t" + "ldr r10, [%[b], #24]\n\t" + "ldr r14, [%[b], #28]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #16]\n\t" + "str r5, [%[r], #20]\n\t" + "str r6, [%[r], #24]\n\t" + "str r7, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[a], #36]\n\t" + "ldr r6, [%[a], #40]\n\t" + "ldr r7, [%[a], #44]\n\t" + "ldr r8, [%[b], #32]\n\t" + "ldr r9, [%[b], #36]\n\t" + "ldr r10, [%[b], #40]\n\t" + "ldr r14, [%[b], #44]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #32]\n\t" + "str r5, [%[r], #36]\n\t" + "str r6, [%[r], #40]\n\t" + "str r7, [%[r], #44]\n\t" + "adc %[c], r12, r12\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Add two Montgomery form numbers (r = a + b % m). + * + * r Result of addition. + * a First number to add in Montogmery form. + * b Second number to add in Montogmery form. + * m Modulus (prime). + */ +static void sp_384_mont_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m) +{ + sp_digit o; + + o = sp_384_add_12(r, a, b); + sp_384_cond_sub_12(r, r, m, 0 - o); +} + +/* Double a Montgomery form number (r = a + a % m). + * + * r Result of doubling. + * a Number to double in Montogmery form. + * m Modulus (prime). + */ +static void sp_384_mont_dbl_12(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + sp_digit o; + + o = sp_384_add_12(r, a, a); + sp_384_cond_sub_12(r, r, m, 0 - o); +} + +/* Triple a Montgomery form number (r = a + a + a % m). + * + * r Result of Tripling. + * a Number to triple in Montogmery form. + * m Modulus (prime). + */ +static void sp_384_mont_tpl_12(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + sp_digit o; + + o = sp_384_add_12(r, a, a); + sp_384_cond_sub_12(r, r, m, 0 - o); + o = sp_384_add_12(r, r, a); + sp_384_cond_sub_12(r, r, m, 0 - o); +} + +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "add r12, %[a], #48\n\t" + "\n1:\n\t" + "rsbs %[c], %[c], #0\n\t" + "ldr r4, [%[a]], #4\n\t" + "ldr r5, [%[a]], #4\n\t" + "ldr r6, [%[a]], #4\n\t" + "ldr r7, [%[a]], #4\n\t" + "ldr r8, [%[b]], #4\n\t" + "ldr r9, [%[b]], #4\n\t" + "ldr r10, [%[b]], #4\n\t" + "ldr r14, [%[b]], #4\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r14\n\t" + "str r4, [%[r]], #4\n\t" + "str r5, [%[r]], #4\n\t" + "str r6, [%[r]], #4\n\t" + "str r7, [%[r]], #4\n\t" + "sbc %[c], r4, r4\n\t" + "cmp %[a], r12\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); + + return c; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldr r3, [%[a], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[a], #8]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #0]\n\t" + "ldr r8, [%[b], #4]\n\t" + "ldr r9, [%[b], #8]\n\t" + "ldr r10, [%[b], #12]\n\t" + "subs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "str r3, [%[r], #0]\n\t" + "str r4, [%[r], #4]\n\t" + "str r5, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" + "ldr r3, [%[a], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[a], #24]\n\t" + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[b], #16]\n\t" + "ldr r8, [%[b], #20]\n\t" + "ldr r9, [%[b], #24]\n\t" + "ldr r10, [%[b], #28]\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "str r3, [%[r], #16]\n\t" + "str r4, [%[r], #20]\n\t" + "str r5, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" + "ldr r3, [%[a], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[a], #40]\n\t" + "ldr r6, [%[a], #44]\n\t" + "ldr r7, [%[b], #32]\n\t" + "ldr r8, [%[b], #36]\n\t" + "ldr r9, [%[b], #40]\n\t" + "ldr r10, [%[b], #44]\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "str r3, [%[r], #32]\n\t" + "str r4, [%[r], #36]\n\t" + "str r5, [%[r], #40]\n\t" + "str r6, [%[r], #44]\n\t" + "sbc %[c], %[c], #0\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + sp_digit c = 0; + +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov r9, #0\n\t" + "mov r8, #0\n\t" + "1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldr r4, [%[a], r8]\n\t" + "ldr r5, [%[b], r8]\n\t" + "and r5, r5, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adc %[c], r9, r9\n\t" + "str r4, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, #48\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + ); +#else + __asm__ __volatile__ ( + + "mov r9, #0\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r6, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adds r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #0]\n\t" + "str r6, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r7, [%[b], #12]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r6, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #16]\n\t" + "str r6, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r6, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r7, [%[b], #28]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r6, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r7, [%[b], #36]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #32]\n\t" + "str r6, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r6, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r7, [%[b], #44]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #40]\n\t" + "str r6, [%[r], #44]\n\t" + "adc %[c], r9, r9\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + ); +#endif /* WOLFSSL_SP_SMALL */ + + return c; +} + +/* Subtract two Montgomery form numbers (r = a - b % m). + * + * r Result of subtration. + * a Number to subtract from in Montogmery form. + * b Number to subtract with in Montogmery form. + * m Modulus (prime). + */ +static void sp_384_mont_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m) +{ + sp_digit o; + + o = sp_384_sub_12(r, a, b); + sp_384_cond_add_12(r, r, m, o); +} + +static void sp_384_rshift1_12(sp_digit* r, sp_digit* a) +{ + __asm__ __volatile__ ( + "ldr r2, [%[a]]\n\t" + "ldr r3, [%[a], #4]\n\t" + "lsr r2, r2, #1\n\t" + "orr r2, r2, r3, lsl #31\n\t" + "lsr r3, r3, #1\n\t" + "ldr r4, [%[a], #8]\n\t" + "str r2, [%[r], #0]\n\t" + "orr r3, r3, r4, lsl #31\n\t" + "lsr r4, r4, #1\n\t" + "ldr r2, [%[a], #12]\n\t" + "str r3, [%[r], #4]\n\t" + "orr r4, r4, r2, lsl #31\n\t" + "lsr r2, r2, #1\n\t" + "ldr r3, [%[a], #16]\n\t" + "str r4, [%[r], #8]\n\t" + "orr r2, r2, r3, lsl #31\n\t" + "lsr r3, r3, #1\n\t" + "ldr r4, [%[a], #20]\n\t" + "str r2, [%[r], #12]\n\t" + "orr r3, r3, r4, lsl #31\n\t" + "lsr r4, r4, #1\n\t" + "ldr r2, [%[a], #24]\n\t" + "str r3, [%[r], #16]\n\t" + "orr r4, r4, r2, lsl #31\n\t" + "lsr r2, r2, #1\n\t" + "ldr r3, [%[a], #28]\n\t" + "str r4, [%[r], #20]\n\t" + "orr r2, r2, r3, lsl #31\n\t" + "lsr r3, r3, #1\n\t" + "ldr r4, [%[a], #32]\n\t" + "str r2, [%[r], #24]\n\t" + "orr r3, r3, r4, lsl #31\n\t" + "lsr r4, r4, #1\n\t" + "ldr r2, [%[a], #36]\n\t" + "str r3, [%[r], #28]\n\t" + "orr r4, r4, r2, lsl #31\n\t" + "lsr r2, r2, #1\n\t" + "ldr r3, [%[a], #40]\n\t" + "str r4, [%[r], #32]\n\t" + "orr r2, r2, r3, lsl #31\n\t" + "lsr r3, r3, #1\n\t" + "ldr r4, [%[a], #44]\n\t" + "str r2, [%[r], #36]\n\t" + "orr r3, r3, r4, lsl #31\n\t" + "lsr r4, r4, #1\n\t" + "str r3, [%[r], #40]\n\t" + "str r4, [%[r], #44]\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r2", "r3", "r4" + ); +} + +/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) + * + * r Result of division by 2. + * a Number to divide. + * m Modulus (prime). + */ +static void sp_384_div2_12(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + sp_digit o; + + o = sp_384_cond_add_12(r, a, m, 0 - (a[0] & 1)); + sp_384_rshift1_12(r, r); + r[11] |= o << 31; +} + +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static void sp_384_proj_point_dbl_12(sp_point_384* r, const sp_point_384* p, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*12; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_384_mont_sqr_12(t1, p->z, p384_mod, p384_mp_mod); + /* Z = Y * Z */ + sp_384_mont_mul_12(z, p->y, p->z, p384_mod, p384_mp_mod); + /* Z = 2Z */ + sp_384_mont_dbl_12(z, z, p384_mod); + /* T2 = X - T1 */ + sp_384_mont_sub_12(t2, p->x, t1, p384_mod); + /* T1 = X + T1 */ + sp_384_mont_add_12(t1, p->x, t1, p384_mod); + /* T2 = T1 * T2 */ + sp_384_mont_mul_12(t2, t1, t2, p384_mod, p384_mp_mod); + /* T1 = 3T2 */ + sp_384_mont_tpl_12(t1, t2, p384_mod); + /* Y = 2Y */ + sp_384_mont_dbl_12(y, p->y, p384_mod); + /* Y = Y * Y */ + sp_384_mont_sqr_12(y, y, p384_mod, p384_mp_mod); + /* T2 = Y * Y */ + sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod); + /* T2 = T2/2 */ + sp_384_div2_12(t2, t2, p384_mod); + /* Y = Y * X */ + sp_384_mont_mul_12(y, y, p->x, p384_mod, p384_mp_mod); + /* X = T1 * T1 */ + sp_384_mont_sqr_12(x, t1, p384_mod, p384_mp_mod); + /* X = X - Y */ + sp_384_mont_sub_12(x, x, y, p384_mod); + /* X = X - Y */ + sp_384_mont_sub_12(x, x, y, p384_mod); + /* Y = Y - X */ + sp_384_mont_sub_12(y, y, x, p384_mod); + /* Y = Y * T1 */ + sp_384_mont_mul_12(y, y, t1, p384_mod, p384_mp_mod); + /* Y = Y - T2 */ + sp_384_mont_sub_12(y, y, t2, p384_mod); +} + +/* Compare two numbers to determine if they are equal. + * Constant time implementation. + * + * a First number to compare. + * b Second number to compare. + * returns 1 when equal and 0 otherwise. + */ +static int sp_384_cmp_equal_12(const sp_digit* a, const sp_digit* b) +{ + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) | + (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6]) | (a[7] ^ b[7]) | + (a[8] ^ b[8]) | (a[9] ^ b[9]) | (a[10] ^ b[10]) | (a[11] ^ b[11])) == 0; +} + +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_384_proj_point_add_12(sp_point_384* r, const sp_point_384* p, const sp_point_384* q, + sp_digit* t) +{ + const sp_point_384* ap[2]; + sp_point_384* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*12; + sp_digit* t3 = t + 4*12; + sp_digit* t4 = t + 6*12; + sp_digit* t5 = t + 8*12; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* Ensure only the first point is the same as the result. */ + if (q == r) { + const sp_point_384* a = p; + p = q; + q = a; + } + + /* Check double */ + (void)sp_384_sub_12(t1, p384_mod, q->y); + sp_384_norm_12(t1); + if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & + (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) { + sp_384_proj_point_dbl_12(r, p, t); + } + else { + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point_384)); + x = rp[p->infinity | q->infinity]->x; + y = rp[p->infinity | q->infinity]->y; + z = rp[p->infinity | q->infinity]->z; + + ap[0] = p; + ap[1] = q; + for (i=0; i<12; i++) { + r->x[i] = ap[p->infinity]->x[i]; + } + for (i=0; i<12; i++) { + r->y[i] = ap[p->infinity]->y[i]; + } + for (i=0; i<12; i++) { + r->z[i] = ap[p->infinity]->z[i]; + } + r->infinity = ap[p->infinity]->infinity; + + /* U1 = X1*Z2^2 */ + sp_384_mont_sqr_12(t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t3, t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t1, t1, x, p384_mod, p384_mp_mod); + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_12(t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t4, t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_12(t3, t3, y, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod); + /* H = U2 - U1 */ + sp_384_mont_sub_12(t2, t2, t1, p384_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_12(t4, t4, t3, p384_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_12(z, z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(z, z, t2, p384_mod, p384_mp_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_12(x, t4, p384_mod, p384_mp_mod); + sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(y, t1, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(x, x, t5, p384_mod); + sp_384_mont_dbl_12(t1, y, p384_mod); + sp_384_mont_sub_12(x, x, t1, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_12(y, y, x, p384_mod); + sp_384_mont_mul_12(y, y, t4, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t5, t5, t3, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(y, y, t5, p384_mod); + } +} + +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_fast_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k, + int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 td[16]; + sp_point_384 rtd; + sp_digit tmpd[2 * 12 * 6]; +#endif + sp_point_384* t; + sp_point_384* rt; + sp_digit* tmp; + sp_digit n; + int i; + int c, y; + int err; + + (void)heap; + + err = sp_384_point_new_12(heap, rtd, rt); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 16, heap, DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; +#else + t = td; + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + /* t[0] = {0, 0, 1} * norm */ + XMEMSET(&t[0], 0, sizeof(t[0])); + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + (void)sp_384_mod_mul_norm_12(t[1].x, g->x, p384_mod); + (void)sp_384_mod_mul_norm_12(t[1].y, g->y, p384_mod); + (void)sp_384_mod_mul_norm_12(t[1].z, g->z, p384_mod); + t[1].infinity = 0; + sp_384_proj_point_dbl_12(&t[ 2], &t[ 1], tmp); + t[ 2].infinity = 0; + sp_384_proj_point_add_12(&t[ 3], &t[ 2], &t[ 1], tmp); + t[ 3].infinity = 0; + sp_384_proj_point_dbl_12(&t[ 4], &t[ 2], tmp); + t[ 4].infinity = 0; + sp_384_proj_point_add_12(&t[ 5], &t[ 3], &t[ 2], tmp); + t[ 5].infinity = 0; + sp_384_proj_point_dbl_12(&t[ 6], &t[ 3], tmp); + t[ 6].infinity = 0; + sp_384_proj_point_add_12(&t[ 7], &t[ 4], &t[ 3], tmp); + t[ 7].infinity = 0; + sp_384_proj_point_dbl_12(&t[ 8], &t[ 4], tmp); + t[ 8].infinity = 0; + sp_384_proj_point_add_12(&t[ 9], &t[ 5], &t[ 4], tmp); + t[ 9].infinity = 0; + sp_384_proj_point_dbl_12(&t[10], &t[ 5], tmp); + t[10].infinity = 0; + sp_384_proj_point_add_12(&t[11], &t[ 6], &t[ 5], tmp); + t[11].infinity = 0; + sp_384_proj_point_dbl_12(&t[12], &t[ 6], tmp); + t[12].infinity = 0; + sp_384_proj_point_add_12(&t[13], &t[ 7], &t[ 6], tmp); + t[13].infinity = 0; + sp_384_proj_point_dbl_12(&t[14], &t[ 7], tmp); + t[14].infinity = 0; + sp_384_proj_point_add_12(&t[15], &t[ 8], &t[ 7], tmp); + t[15].infinity = 0; + + i = 10; + n = k[i+1] << 0; + c = 28; + y = n >> 28; + XMEMCPY(rt, &t[y], sizeof(sp_point_384)); + n <<= 4; + for (; i>=0 || c>=4; ) { + if (c < 4) { + n |= k[i--]; + c += 32; + } + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + + sp_384_proj_point_dbl_12(rt, rt, tmp); + sp_384_proj_point_dbl_12(rt, rt, tmp); + sp_384_proj_point_dbl_12(rt, rt, tmp); + sp_384_proj_point_dbl_12(rt, rt, tmp); + + sp_384_proj_point_add_12(rt, rt, &t[y], tmp); + } + + if (map != 0) { + sp_384_map_12(r, rt, tmp); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_384)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 12 * 6); + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); + } + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_point_384) * 16); + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#else + ForceZero(tmpd, sizeof(tmpd)); + ForceZero(td, sizeof(td)); +#endif + sp_384_point_free_12(rt, 1, heap); + + return err; +} + +/* A table entry for pre-computed points. */ +typedef struct sp_table_entry_384 { + sp_digit x[12]; + sp_digit y[12]; +} sp_table_entry_384; + +#ifdef FP_ECC +/* Double the Montgomery form projective point p a number of times. + * + * r Result of repeated doubling of point. + * p Point to double. + * n Number of times to double + * t Temporary ordinate data. + */ +static void sp_384_proj_point_dbl_n_12(sp_point_384* p, int n, sp_digit* t) +{ + sp_digit* w = t; + sp_digit* a = t + 2*12; + sp_digit* b = t + 4*12; + sp_digit* t1 = t + 6*12; + sp_digit* t2 = t + 8*12; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = p->x; + y = p->y; + z = p->z; + + /* Y = 2*Y */ + sp_384_mont_dbl_12(y, y, p384_mod); + /* W = Z^4 */ + sp_384_mont_sqr_12(w, z, p384_mod, p384_mp_mod); + sp_384_mont_sqr_12(w, w, p384_mod, p384_mp_mod); + +#ifndef WOLFSSL_SP_SMALL + while (--n > 0) +#else + while (--n >= 0) +#endif + { + /* A = 3*(X^2 - W) */ + sp_384_mont_sqr_12(t1, x, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(t1, t1, w, p384_mod); + sp_384_mont_tpl_12(a, t1, p384_mod); + /* B = X*Y^2 */ + sp_384_mont_sqr_12(t1, y, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(b, t1, x, p384_mod, p384_mp_mod); + /* X = A^2 - 2B */ + sp_384_mont_sqr_12(x, a, p384_mod, p384_mp_mod); + sp_384_mont_dbl_12(t2, b, p384_mod); + sp_384_mont_sub_12(x, x, t2, p384_mod); + /* Z = Z*Y */ + sp_384_mont_mul_12(z, z, y, p384_mod, p384_mp_mod); + /* t2 = Y^4 */ + sp_384_mont_sqr_12(t1, t1, p384_mod, p384_mp_mod); +#ifdef WOLFSSL_SP_SMALL + if (n != 0) +#endif + { + /* W = W*Y^4 */ + sp_384_mont_mul_12(w, w, t1, p384_mod, p384_mp_mod); + } + /* y = 2*A*(B - X) - Y^4 */ + sp_384_mont_sub_12(y, b, x, p384_mod); + sp_384_mont_mul_12(y, y, a, p384_mod, p384_mp_mod); + sp_384_mont_dbl_12(y, y, p384_mod); + sp_384_mont_sub_12(y, y, t1, p384_mod); + } +#ifndef WOLFSSL_SP_SMALL + /* A = 3*(X^2 - W) */ + sp_384_mont_sqr_12(t1, x, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(t1, t1, w, p384_mod); + sp_384_mont_tpl_12(a, t1, p384_mod); + /* B = X*Y^2 */ + sp_384_mont_sqr_12(t1, y, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(b, t1, x, p384_mod, p384_mp_mod); + /* X = A^2 - 2B */ + sp_384_mont_sqr_12(x, a, p384_mod, p384_mp_mod); + sp_384_mont_dbl_12(t2, b, p384_mod); + sp_384_mont_sub_12(x, x, t2, p384_mod); + /* Z = Z*Y */ + sp_384_mont_mul_12(z, z, y, p384_mod, p384_mp_mod); + /* t2 = Y^4 */ + sp_384_mont_sqr_12(t1, t1, p384_mod, p384_mp_mod); + /* y = 2*A*(B - X) - Y^4 */ + sp_384_mont_sub_12(y, b, x, p384_mod); + sp_384_mont_mul_12(y, y, a, p384_mod, p384_mp_mod); + sp_384_mont_dbl_12(y, y, p384_mod); + sp_384_mont_sub_12(y, y, t1, p384_mod); +#endif + /* Y = Y/2 */ + sp_384_div2_12(y, y, p384_mod); +} + +#endif /* FP_ECC */ +/* Add two Montgomery form projective points. The second point has a q value of + * one. + * Only the first point can be the same pointer as the result point. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_384_proj_point_add_qz1_12(sp_point_384* r, const sp_point_384* p, + const sp_point_384* q, sp_digit* t) +{ + const sp_point_384* ap[2]; + sp_point_384* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*12; + sp_digit* t3 = t + 4*12; + sp_digit* t4 = t + 6*12; + sp_digit* t5 = t + 8*12; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* Check double */ + (void)sp_384_sub_12(t1, p384_mod, q->y); + sp_384_norm_12(t1); + if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & + (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) { + sp_384_proj_point_dbl_12(r, p, t); + } + else { + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point_384)); + x = rp[p->infinity | q->infinity]->x; + y = rp[p->infinity | q->infinity]->y; + z = rp[p->infinity | q->infinity]->z; + + ap[0] = p; + ap[1] = q; + for (i=0; i<12; i++) { + r->x[i] = ap[p->infinity]->x[i]; + } + for (i=0; i<12; i++) { + r->y[i] = ap[p->infinity]->y[i]; + } + for (i=0; i<12; i++) { + r->z[i] = ap[p->infinity]->z[i]; + } + r->infinity = ap[p->infinity]->infinity; + + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_12(t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t4, t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod); + /* H = U2 - X1 */ + sp_384_mont_sub_12(t2, t2, x, p384_mod); + /* R = S2 - Y1 */ + sp_384_mont_sub_12(t4, t4, y, p384_mod); + /* Z3 = H*Z1 */ + sp_384_mont_mul_12(z, z, t2, p384_mod, p384_mp_mod); + /* X3 = R^2 - H^3 - 2*X1*H^2 */ + sp_384_mont_sqr_12(t1, t4, p384_mod, p384_mp_mod); + sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t3, x, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(x, t1, t5, p384_mod); + sp_384_mont_dbl_12(t1, t3, p384_mod); + sp_384_mont_sub_12(x, x, t1, p384_mod); + /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */ + sp_384_mont_sub_12(t3, t3, x, p384_mod); + sp_384_mont_mul_12(t3, t3, t4, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t5, t5, y, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(y, t3, t5, p384_mod); + } +} + +#ifdef WOLFSSL_SP_SMALL +#ifdef FP_ECC +/* Convert the projective point to affine. + * Ordinates are in Montgomery form. + * + * a Point to convert. + * t Temporary data. + */ +static void sp_384_proj_to_affine_12(sp_point_384* a, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2 * 12; + sp_digit* tmp = t + 4 * 12; + + sp_384_mont_inv_12(t1, a->z, tmp); + + sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t1, t2, t1, p384_mod, p384_mp_mod); + + sp_384_mont_mul_12(a->x, a->x, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(a->y, a->y, t1, p384_mod, p384_mp_mod); + XMEMCPY(a->z, p384_norm_mod, sizeof(p384_norm_mod)); +} + +/* Generate the pre-computed table of points for the base point. + * + * a The base point. + * table Place to store generated point data. + * tmp Temporary data. + * heap Heap to use for allocation. + */ +static int sp_384_gen_stripe_table_12(const sp_point_384* a, + sp_table_entry_384* table, sp_digit* tmp, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 td, s1d, s2d; +#endif + sp_point_384* t; + sp_point_384* s1 = NULL; + sp_point_384* s2 = NULL; + int i, j; + int err; + + (void)heap; + + err = sp_384_point_new_12(heap, td, t); + if (err == MP_OKAY) { + err = sp_384_point_new_12(heap, s1d, s1); + } + if (err == MP_OKAY) { + err = sp_384_point_new_12(heap, s2d, s2); + } + + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_12(t->x, a->x, p384_mod); + } + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_12(t->y, a->y, p384_mod); + } + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_12(t->z, a->z, p384_mod); + } + if (err == MP_OKAY) { + t->infinity = 0; + sp_384_proj_to_affine_12(t, tmp); + + XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod)); + s1->infinity = 0; + XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod)); + s2->infinity = 0; + + /* table[0] = {0, 0, infinity} */ + XMEMSET(&table[0], 0, sizeof(sp_table_entry_384)); + /* table[1] = Affine version of 'a' in Montgomery form */ + XMEMCPY(table[1].x, t->x, sizeof(table->x)); + XMEMCPY(table[1].y, t->y, sizeof(table->y)); + + for (i=1; i<4; i++) { + sp_384_proj_point_dbl_n_12(t, 96, tmp); + sp_384_proj_to_affine_12(t, tmp); + XMEMCPY(table[1<x, sizeof(table->x)); + XMEMCPY(table[1<y, sizeof(table->y)); + } + + for (i=1; i<4; i++) { + XMEMCPY(s1->x, table[1<x)); + XMEMCPY(s1->y, table[1<y)); + for (j=(1<x, table[j-(1<x)); + XMEMCPY(s2->y, table[j-(1<y)); + sp_384_proj_point_add_qz1_12(t, s1, s2, tmp); + sp_384_proj_to_affine_12(t, tmp); + XMEMCPY(table[j].x, t->x, sizeof(table->x)); + XMEMCPY(table[j].y, t->y, sizeof(table->y)); + } + } + } + + sp_384_point_free_12(s2, 0, heap); + sp_384_point_free_12(s1, 0, heap); + sp_384_point_free_12( t, 0, heap); + + return err; +} + +#endif /* FP_ECC */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_stripe_12(sp_point_384* r, const sp_point_384* g, + const sp_table_entry_384* table, const sp_digit* k, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 rtd; + sp_point_384 pd; + sp_digit td[2 * 12 * 6]; +#endif + sp_point_384* rt; + sp_point_384* p = NULL; + sp_digit* t; + int i, j; + int y, x; + int err; + + (void)g; + (void)heap; + + + err = sp_384_point_new_12(heap, rtd, rt); + if (err == MP_OKAY) { + err = sp_384_point_new_12(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) { + err = MEMORY_E; + } +#else + t = td; +#endif + + if (err == MP_OKAY) { + XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod)); + XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod)); + + y = 0; + for (j=0,x=95; j<4; j++,x+=96) { + y |= ((k[x / 32] >> (x % 32)) & 1) << j; + } + XMEMCPY(rt->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(rt->y, table[y].y, sizeof(table[y].y)); + rt->infinity = !y; + for (i=94; i>=0; i--) { + y = 0; + for (j=0,x=i; j<4; j++,x+=96) { + y |= ((k[x / 32] >> (x % 32)) & 1) << j; + } + + sp_384_proj_point_dbl_12(rt, rt, t); + XMEMCPY(p->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(p->y, table[y].y, sizeof(table[y].y)); + p->infinity = !y; + sp_384_proj_point_add_qz1_12(rt, rt, p, t); + } + + if (map != 0) { + sp_384_map_12(r, rt, t); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_384)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_12(p, 0, heap); + sp_384_point_free_12(rt, 0, heap); + + return err; +} + +#ifdef FP_ECC +#ifndef FP_ENTRIES + #define FP_ENTRIES 16 +#endif + +typedef struct sp_cache_384_t { + sp_digit x[12]; + sp_digit y[12]; + sp_table_entry_384 table[16]; + uint32_t cnt; + int set; +} sp_cache_384_t; + +static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES]; +static THREAD_LS_T int sp_cache_384_last = -1; +static THREAD_LS_T int sp_cache_384_inited = 0; + +#ifndef HAVE_THREAD_LS + static volatile int initCacheMutex_384 = 0; + static wolfSSL_Mutex sp_cache_384_lock; +#endif + +static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache) +{ + int i, j; + uint32_t least; + + if (sp_cache_384_inited == 0) { + for (i=0; ix, sp_cache_384[i].x) & + sp_384_cmp_equal_12(g->y, sp_cache_384[i].y)) { + sp_cache_384[i].cnt++; + break; + } + } + + /* No match. */ + if (i == FP_ENTRIES) { + /* Find empty entry. */ + i = (sp_cache_384_last + 1) % FP_ENTRIES; + for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) { + if (!sp_cache_384[i].set) { + break; + } + } + + /* Evict least used. */ + if (i == sp_cache_384_last) { + least = sp_cache_384[0].cnt; + for (j=1; jx, sizeof(sp_cache_384[i].x)); + XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y)); + sp_cache_384[i].set = 1; + sp_cache_384[i].cnt = 1; + } + + *cache = &sp_cache_384[i]; + sp_cache_384_last = i; +} +#endif /* FP_ECC */ + +/* Multiply the base point of P384 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k, + int map, void* heap) +{ +#ifndef FP_ECC + return sp_384_ecc_mulmod_fast_12(r, g, k, map, heap); +#else + sp_digit tmp[2 * 12 * 7]; + sp_cache_384_t* cache; + int err = MP_OKAY; + +#ifndef HAVE_THREAD_LS + if (initCacheMutex_384 == 0) { + wc_InitMutex(&sp_cache_384_lock); + initCacheMutex_384 = 1; + } + if (wc_LockMutex(&sp_cache_384_lock) != 0) + err = BAD_MUTEX_E; +#endif /* HAVE_THREAD_LS */ + + if (err == MP_OKAY) { + sp_ecc_get_cache_384(g, &cache); + if (cache->cnt == 2) + sp_384_gen_stripe_table_12(g, cache->table, tmp, heap); + +#ifndef HAVE_THREAD_LS + wc_UnLockMutex(&sp_cache_384_lock); +#endif /* HAVE_THREAD_LS */ + + if (cache->cnt < 2) { + err = sp_384_ecc_mulmod_fast_12(r, g, k, map, heap); + } + else { + err = sp_384_ecc_mulmod_stripe_12(r, g, cache->table, k, + map, heap); + } + } + + return err; +#endif +} + +#else +#ifdef FP_ECC +/* Generate the pre-computed table of points for the base point. + * + * a The base point. + * table Place to store generated point data. + * tmp Temporary data. + * heap Heap to use for allocation. + */ +static int sp_384_gen_stripe_table_12(const sp_point_384* a, + sp_table_entry_384* table, sp_digit* tmp, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 td, s1d, s2d; +#endif + sp_point_384* t; + sp_point_384* s1 = NULL; + sp_point_384* s2 = NULL; + int i, j; + int err; + + (void)heap; + + err = sp_384_point_new_12(heap, td, t); + if (err == MP_OKAY) { + err = sp_384_point_new_12(heap, s1d, s1); + } + if (err == MP_OKAY) { + err = sp_384_point_new_12(heap, s2d, s2); + } + + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_12(t->x, a->x, p384_mod); + } + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_12(t->y, a->y, p384_mod); + } + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_12(t->z, a->z, p384_mod); + } + if (err == MP_OKAY) { + t->infinity = 0; + sp_384_proj_to_affine_12(t, tmp); + + XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod)); + s1->infinity = 0; + XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod)); + s2->infinity = 0; + + /* table[0] = {0, 0, infinity} */ + XMEMSET(&table[0], 0, sizeof(sp_table_entry_384)); + /* table[1] = Affine version of 'a' in Montgomery form */ + XMEMCPY(table[1].x, t->x, sizeof(table->x)); + XMEMCPY(table[1].y, t->y, sizeof(table->y)); + + for (i=1; i<8; i++) { + sp_384_proj_point_dbl_n_12(t, 48, tmp); + sp_384_proj_to_affine_12(t, tmp); + XMEMCPY(table[1<x, sizeof(table->x)); + XMEMCPY(table[1<y, sizeof(table->y)); + } + + for (i=1; i<8; i++) { + XMEMCPY(s1->x, table[1<x)); + XMEMCPY(s1->y, table[1<y)); + for (j=(1<x, table[j-(1<x)); + XMEMCPY(s2->y, table[j-(1<y)); + sp_384_proj_point_add_qz1_12(t, s1, s2, tmp); + sp_384_proj_to_affine_12(t, tmp); + XMEMCPY(table[j].x, t->x, sizeof(table->x)); + XMEMCPY(table[j].y, t->y, sizeof(table->y)); + } + } + } + + sp_384_point_free_12(s2, 0, heap); + sp_384_point_free_12(s1, 0, heap); + sp_384_point_free_12( t, 0, heap); + + return err; +} + +#endif /* FP_ECC */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_stripe_12(sp_point_384* r, const sp_point_384* g, + const sp_table_entry_384* table, const sp_digit* k, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 rtd; + sp_point_384 pd; + sp_digit td[2 * 12 * 6]; +#endif + sp_point_384* rt; + sp_point_384* p = NULL; + sp_digit* t; + int i, j; + int y, x; + int err; + + (void)g; + (void)heap; + + + err = sp_384_point_new_12(heap, rtd, rt); + if (err == MP_OKAY) { + err = sp_384_point_new_12(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) { + err = MEMORY_E; + } +#else + t = td; +#endif + + if (err == MP_OKAY) { + XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod)); + XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod)); + + y = 0; + for (j=0,x=47; j<8; j++,x+=48) { + y |= ((k[x / 32] >> (x % 32)) & 1) << j; + } + XMEMCPY(rt->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(rt->y, table[y].y, sizeof(table[y].y)); + rt->infinity = !y; + for (i=46; i>=0; i--) { + y = 0; + for (j=0,x=i; j<8; j++,x+=48) { + y |= ((k[x / 32] >> (x % 32)) & 1) << j; + } + + sp_384_proj_point_dbl_12(rt, rt, t); + XMEMCPY(p->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(p->y, table[y].y, sizeof(table[y].y)); + p->infinity = !y; + sp_384_proj_point_add_qz1_12(rt, rt, p, t); + } + + if (map != 0) { + sp_384_map_12(r, rt, t); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_384)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_12(p, 0, heap); + sp_384_point_free_12(rt, 0, heap); + + return err; +} + +#ifdef FP_ECC +#ifndef FP_ENTRIES + #define FP_ENTRIES 16 +#endif + +typedef struct sp_cache_384_t { + sp_digit x[12]; + sp_digit y[12]; + sp_table_entry_384 table[256]; + uint32_t cnt; + int set; +} sp_cache_384_t; + +static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES]; +static THREAD_LS_T int sp_cache_384_last = -1; +static THREAD_LS_T int sp_cache_384_inited = 0; + +#ifndef HAVE_THREAD_LS + static volatile int initCacheMutex_384 = 0; + static wolfSSL_Mutex sp_cache_384_lock; +#endif + +static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache) +{ + int i, j; + uint32_t least; + + if (sp_cache_384_inited == 0) { + for (i=0; ix, sp_cache_384[i].x) & + sp_384_cmp_equal_12(g->y, sp_cache_384[i].y)) { + sp_cache_384[i].cnt++; + break; + } + } + + /* No match. */ + if (i == FP_ENTRIES) { + /* Find empty entry. */ + i = (sp_cache_384_last + 1) % FP_ENTRIES; + for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) { + if (!sp_cache_384[i].set) { + break; + } + } + + /* Evict least used. */ + if (i == sp_cache_384_last) { + least = sp_cache_384[0].cnt; + for (j=1; jx, sizeof(sp_cache_384[i].x)); + XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y)); + sp_cache_384[i].set = 1; + sp_cache_384[i].cnt = 1; + } + + *cache = &sp_cache_384[i]; + sp_cache_384_last = i; +} +#endif /* FP_ECC */ + +/* Multiply the base point of P384 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k, + int map, void* heap) +{ +#ifndef FP_ECC + return sp_384_ecc_mulmod_fast_12(r, g, k, map, heap); +#else + sp_digit tmp[2 * 12 * 7]; + sp_cache_384_t* cache; + int err = MP_OKAY; + +#ifndef HAVE_THREAD_LS + if (initCacheMutex_384 == 0) { + wc_InitMutex(&sp_cache_384_lock); + initCacheMutex_384 = 1; + } + if (wc_LockMutex(&sp_cache_384_lock) != 0) + err = BAD_MUTEX_E; +#endif /* HAVE_THREAD_LS */ + + if (err == MP_OKAY) { + sp_ecc_get_cache_384(g, &cache); + if (cache->cnt == 2) + sp_384_gen_stripe_table_12(g, cache->table, tmp, heap); + +#ifndef HAVE_THREAD_LS + wc_UnLockMutex(&sp_cache_384_lock); +#endif /* HAVE_THREAD_LS */ + + if (cache->cnt < 2) { + err = sp_384_ecc_mulmod_fast_12(r, g, k, map, heap); + } + else { + err = sp_384_ecc_mulmod_stripe_12(r, g, cache->table, k, + map, heap); + } + } + + return err; +#endif +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * p Point to multiply. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_384(mp_int* km, ecc_point* gm, ecc_point* r, int map, + void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 p; + sp_digit kd[12]; +#endif + sp_point_384* point; + sp_digit* k = NULL; + int err = MP_OKAY; + + err = sp_384_point_new_12(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#else + k = kd; +#endif + if (err == MP_OKAY) { + sp_384_from_mp(k, 12, km); + sp_384_point_from_ecc_point_12(point, gm); + + err = sp_384_ecc_mulmod_12(point, point, k, map, heap); + } + if (err == MP_OKAY) { + err = sp_384_point_to_ecc_point_12(point, r); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_12(point, 0, heap); + + return err; +} + +#ifdef WOLFSSL_SP_SMALL +static const sp_table_entry_384 p384_table[16] = { + /* 0 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 */ + { { 0x49c0b528,0x3dd07566,0xa0d6ce38,0x20e378e2,0x541b4d6e,0x879c3afc, + 0x59a30eff,0x64548684,0x614ede2b,0x812ff723,0x299e1513,0x4d3aadc2 }, + { 0x4b03a4fe,0x23043dad,0x7bb4a9ac,0xa1bfa8bf,0x2e83b050,0x8bade756, + 0x68f4ffd9,0xc6c35219,0x3969a840,0xdd800226,0x5a15c5e9,0x2b78abc2 } }, + /* 2 */ + { { 0xf26feef9,0x24480c57,0x3a0e1240,0xc31a2694,0x273e2bc7,0x735002c3, + 0x3ef1ed4c,0x8c42e9c5,0x7f4948e8,0x028babf6,0x8a978632,0x6a502f43 }, + { 0xb74536fe,0xf5f13a46,0xd8a9f0eb,0x1d218bab,0x37232768,0x30f36bcc, + 0x576e8c18,0xc5317b31,0x9bbcb766,0xef1d57a6,0xb3e3d4dc,0x917c4930 } }, + /* 3 */ + { { 0xe349ddd0,0x11426e2e,0x9b2fc250,0x9f117ef9,0xec0174a6,0xff36b480, + 0x18458466,0x4f4bde76,0x05806049,0x2f2edb6d,0x19dfca92,0x8adc75d1 }, + { 0xb7d5a7ce,0xa619d097,0xa34411e9,0x874275e5,0x0da4b4ef,0x5403e047, + 0x77901d8f,0x2ebaafd9,0xa747170f,0x5e63ebce,0x7f9d8036,0x12a36944 } }, + /* 4 */ + { { 0x2f9fbe67,0x378205de,0x7f728e44,0xc4afcb83,0x682e00f1,0xdbcec06c, + 0x114d5423,0xf2a145c3,0x7a52463e,0xa01d9874,0x7d717b0a,0xfc0935b1 }, + { 0xd4d01f95,0x9653bc4f,0x9560ad34,0x9aa83ea8,0xaf8e3f3f,0xf77943dc, + 0xe86fe16e,0x70774a10,0xbf9ffdcf,0x6b62e6f1,0x588745c9,0x8a72f39e } }, + /* 5 */ + { { 0x2341c342,0x73ade4da,0xea704422,0xdd326e54,0x3741cef3,0x336c7d98, + 0x59e61549,0x1eafa00d,0xbd9a3efd,0xcd3ed892,0xc5c6c7e4,0x03faf26c }, + { 0x3045f8ac,0x087e2fcf,0x174f1e73,0x14a65532,0xfe0af9a7,0x2cf84f28, + 0x2cdc935b,0xddfd7a84,0x6929c895,0x4c0f117b,0x4c8bcfcc,0x356572d6 } }, + /* 6 */ + { { 0x3f3b236f,0xfab08607,0x81e221da,0x19e9d41d,0x3927b428,0xf3f6571e, + 0x7550f1f6,0x4348a933,0xa85e62f0,0x7167b996,0x7f5452bf,0x62d43759 }, + { 0xf2955926,0xd85feb9e,0x6df78353,0x440a561f,0x9ca36b59,0x389668ec, + 0xa22da016,0x052bf1a1,0xf6093254,0xbdfbff72,0xe22209f3,0x94e50f28 } }, + /* 7 */ + { { 0x3062e8af,0x90b2e5b3,0xe8a3d369,0xa8572375,0x201db7b1,0x3fe1b00b, + 0xee651aa2,0xe926def0,0xb9b10ad7,0x6542c9be,0xa2fcbe74,0x098e309b }, + { 0xfff1d63f,0x779deeb3,0x20bfd374,0x23d0e80a,0x8768f797,0x8452bb3b, + 0x1f952856,0xcf75bb4d,0x29ea3faa,0x8fe6b400,0x81373a53,0x12bd3e40 } }, + /* 8 */ + { { 0x16973cf4,0x070d34e1,0x7e4f34f7,0x20aee08b,0x5eb8ad29,0x269af9b9, + 0xa6a45dda,0xdde0a036,0x63df41e0,0xa18b528e,0xa260df2a,0x03cc71b2 }, + { 0xa06b1dd7,0x24a6770a,0x9d2675d3,0x5bfa9c11,0x96844432,0x73c1e2a1, + 0x131a6cf0,0x3660558d,0x2ee79454,0xb0289c83,0xc6d8ddcd,0xa6aefb01 } }, + /* 9 */ + { { 0x01ab5245,0xba1464b4,0xc48d93ff,0x9b8d0b6d,0x93ad272c,0x939867dc, + 0xae9fdc77,0xbebe085e,0x894ea8bd,0x73ae5103,0x39ac22e1,0x740fc89a }, + { 0x28e23b23,0x5e28b0a3,0xe13104d0,0x2352722e,0xb0a2640d,0xf4667a18, + 0x49bb37c3,0xac74a72e,0xe81e183a,0x79f734f0,0x3fd9c0eb,0xbffe5b6c } }, + /* 10 */ + { { 0x00623f3b,0x03cf2922,0x5f29ebff,0x095c7111,0x80aa6823,0x42d72247, + 0x7458c0b0,0x044c7ba1,0x0959ec20,0xca62f7ef,0xf8ca929f,0x40ae2ab7 }, + { 0xa927b102,0xb8c5377a,0xdc031771,0x398a86a0,0xc216a406,0x04908f9d, + 0x918d3300,0xb423a73a,0xe0b94739,0x634b0ff1,0x2d69f697,0xe29de725 } }, + /* 11 */ + { { 0x8435af04,0x744d1400,0xfec192da,0x5f255b1d,0x336dc542,0x1f17dc12, + 0x636a68a8,0x5c90c2a7,0x7704ca1e,0x960c9eb7,0x6fb3d65a,0x9de8cf1e }, + { 0x511d3d06,0xc60fee0d,0xf9eb52c7,0x466e2313,0x206b0914,0x743c0f5f, + 0x2191aa4d,0x42f55bac,0xffebdbc2,0xcefc7c8f,0xe6e8ed1c,0xd4fa6081 } }, + /* 12 */ + { { 0x98683186,0x867db639,0xddcc4ea9,0xfb5cf424,0xd4f0e7bd,0xcc9a7ffe, + 0x7a779f7e,0x7c57f71c,0xd6b25ef2,0x90774079,0xb4081680,0x90eae903 }, + { 0x0ee1fceb,0xdf2aae5e,0xe86c1a1f,0x3ff1da24,0xca193edf,0x80f587d6, + 0xdc9b9d6a,0xa5695523,0x85920303,0x7b840900,0xba6dbdef,0x1efa4dfc } }, + /* 13 */ + { { 0xe0540015,0xfbd838f9,0xc39077dc,0x2c323946,0xad619124,0x8b1fb9e6, + 0x0ca62ea8,0x9612440c,0x2dbe00ff,0x9ad9b52c,0xae197643,0xf52abaa1 }, + { 0x2cac32ad,0xd0e89894,0x62a98f91,0xdfb79e42,0x276f55cb,0x65452ecf, + 0x7ad23e12,0xdb1ac0d2,0xde4986f0,0xf68c5f6a,0x82ce327d,0x389ac37b } }, + /* 14 */ + { { 0xb8a9e8c9,0xcd96866d,0x5bb8091e,0xa11963b8,0x045b3cd2,0xc7f90d53, + 0x80f36504,0x755a72b5,0x21d3751c,0x46f8b399,0x53c193de,0x4bffdc91 }, + { 0xb89554e7,0xcd15c049,0xf7a26be6,0x353c6754,0xbd41d970,0x79602370, + 0x12b176c0,0xde16470b,0x40c8809d,0x56ba1175,0xe435fb1e,0xe2db35c3 } }, + /* 15 */ + { { 0x6328e33f,0xd71e4aab,0xaf8136d1,0x5486782b,0x86d57231,0x07a4995f, + 0x1651a968,0xf1f0a5bd,0x76803b6d,0xa5dc5b24,0x42dda935,0x5c587cbc }, + { 0xbae8b4c0,0x2b6cdb32,0xb1331138,0x66d1598b,0x5d7e9614,0x4a23b2d2, + 0x74a8c05d,0x93e402a6,0xda7ce82e,0x45ac94e6,0xe463d465,0xeb9f8281 } }, +}; + +/* Multiply the base point of P384 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_base_12(sp_point_384* r, const sp_digit* k, + int map, void* heap) +{ + return sp_384_ecc_mulmod_stripe_12(r, &p384_base, p384_table, + k, map, heap); +} + +#else +static const sp_table_entry_384 p384_table[256] = { + /* 0 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 */ + { { 0x49c0b528,0x3dd07566,0xa0d6ce38,0x20e378e2,0x541b4d6e,0x879c3afc, + 0x59a30eff,0x64548684,0x614ede2b,0x812ff723,0x299e1513,0x4d3aadc2 }, + { 0x4b03a4fe,0x23043dad,0x7bb4a9ac,0xa1bfa8bf,0x2e83b050,0x8bade756, + 0x68f4ffd9,0xc6c35219,0x3969a840,0xdd800226,0x5a15c5e9,0x2b78abc2 } }, + /* 2 */ + { { 0x2b0c535b,0x29864753,0x70506296,0x90dd6953,0x216ab9ac,0x038cd6b4, + 0xbe12d76a,0x3df9b7b7,0x5f347bdb,0x13f4d978,0x13e94489,0x222c5c9c }, + { 0x2680dc64,0x5f8e796f,0x58352417,0x120e7cb7,0xd10740b8,0x254b5d8a, + 0x5337dee6,0xc38b8efb,0x94f02247,0xf688c2e1,0x6c25bc4c,0x7b5c75f3 } }, + /* 3 */ + { { 0x9edffea5,0xe26a3cc3,0x37d7e9fc,0x35bbfd1c,0x9bde3ef6,0xf0e7700d, + 0x1a538f5a,0x0380eb47,0x05bf9eb3,0x2e9da8bb,0x1a460c3e,0xdbb93c73 }, + { 0xf526b605,0x37dba260,0xfd785537,0x95d4978e,0xed72a04a,0x24ed793a, + 0x76005b1a,0x26948377,0x9e681f82,0x99f557b9,0xd64954ef,0xae5f9557 } }, + /* 4 */ + { { 0xf26feef9,0x24480c57,0x3a0e1240,0xc31a2694,0x273e2bc7,0x735002c3, + 0x3ef1ed4c,0x8c42e9c5,0x7f4948e8,0x028babf6,0x8a978632,0x6a502f43 }, + { 0xb74536fe,0xf5f13a46,0xd8a9f0eb,0x1d218bab,0x37232768,0x30f36bcc, + 0x576e8c18,0xc5317b31,0x9bbcb766,0xef1d57a6,0xb3e3d4dc,0x917c4930 } }, + /* 5 */ + { { 0xe349ddd0,0x11426e2e,0x9b2fc250,0x9f117ef9,0xec0174a6,0xff36b480, + 0x18458466,0x4f4bde76,0x05806049,0x2f2edb6d,0x19dfca92,0x8adc75d1 }, + { 0xb7d5a7ce,0xa619d097,0xa34411e9,0x874275e5,0x0da4b4ef,0x5403e047, + 0x77901d8f,0x2ebaafd9,0xa747170f,0x5e63ebce,0x7f9d8036,0x12a36944 } }, + /* 6 */ + { { 0x4fc52870,0x28f9c07a,0x1a53a961,0xce0b3748,0x0e1828d9,0xd550fa18, + 0x6adb225a,0xa24abaf7,0x6e58a348,0xd11ed0a5,0x948acb62,0xf3d811e6 }, + { 0x4c61ed22,0x8618dd77,0x80b47c9d,0x0bb747f9,0xde6b8559,0x22bf796f, + 0x680a21e9,0xfdfd1c6d,0x2af2c9dd,0xc0db1577,0xc1e90f3d,0xa09379e6 } }, + /* 7 */ + { { 0xe085c629,0x386c66ef,0x095bc89a,0x5fc2a461,0x203f4b41,0x1353d631, + 0x7e4bd8f5,0x7ca1972b,0xa7df8ce9,0xb077380a,0xee7e4ea3,0xd8a90389 }, + { 0xe7b14461,0x1bc74dc7,0x0c9c4f78,0xdc2cb014,0x84ef0a10,0x52b4b3a6, + 0x20327fe2,0xbde6ea5d,0x660f9615,0xb71ec435,0xb8ad8173,0xeede5a04 } }, + /* 8 */ + { { 0x893b9a2d,0x5584cbb3,0x00850c5d,0x820c660b,0x7df2d43d,0x4126d826, + 0x0109e801,0xdd5bbbf0,0x38172f1c,0x85b92ee3,0xf31430d9,0x609d4f93 }, + { 0xeadaf9d6,0x1e059a07,0x0f125fb0,0x70e6536c,0x560f20e7,0xd6220751, + 0x7aaf3a9a,0xa59489ae,0x64bae14e,0x7b70e2f6,0x76d08249,0x0dd03701 } }, + /* 9 */ + { { 0x8510521f,0x4cc13be8,0xf724cc17,0x87315ba9,0x353dc263,0xb49d83bb, + 0x0c279257,0x8b677efe,0xc93c9537,0x510a1c1c,0xa4702c99,0x33e30cd8 }, + { 0x2208353f,0xf0ffc89d,0xced42b2b,0x0170fa8d,0x26e2a5f5,0x090851ed, + 0xecb52c96,0x81276455,0x7fe1adf4,0x0646c4e1,0xb0868eab,0x513f047e } }, + /* 10 */ + { { 0xdf5bdf53,0xc07611f4,0x58b11a6d,0x45d331a7,0x1c4ee394,0x58965daf, + 0x5a5878d1,0xba8bebe7,0x82dd3025,0xaecc0a18,0xa923eb8b,0xcf2a3899 }, + { 0xd24fd048,0xf98c9281,0x8bbb025d,0x841bfb59,0xc9ab9d53,0xb8ddf8ce, + 0x7fef044e,0x538a4cb6,0x23236662,0x092ac21f,0x0b66f065,0xa919d385 } }, + /* 11 */ + { { 0x85d480d8,0x3db03b40,0x1b287a7d,0x8cd9f479,0x4a8f3bae,0x8f24dc75, + 0x3db41892,0x482eb800,0x9c56e0f5,0x38bf9eb3,0x9a91dc6f,0x8b977320 }, + { 0x7209cfc2,0xa31b05b2,0x05b2db70,0x4c49bf85,0xd619527b,0x56462498, + 0x1fac51ba,0x3fe51039,0xab4b8342,0xfb04f55e,0x04c6eabf,0xc07c10dc } }, + /* 12 */ + { { 0xdb32f048,0xad22fe4c,0x475ed6df,0x5f23bf91,0xaa66b6cb,0xa50ce0c0, + 0xf03405c0,0xdf627a89,0xf95e2d6a,0x3674837d,0xba42e64e,0x081c95b6 }, + { 0xe71d6ceb,0xeba3e036,0x6c6b0271,0xb45bcccf,0x0684701d,0x67b47e63, + 0xe712523f,0x60f8f942,0x5cd47adc,0x82423472,0x87649cbb,0x83027d79 } }, + /* 13 */ + { { 0x3615b0b8,0xb3929ea6,0xa54dac41,0xb41441fd,0xb5b6a368,0x8995d556, + 0x167ef05e,0xa80d4529,0x6d25a27f,0xf6bcb4a1,0x7bd55b68,0x210d6a4c }, + { 0x25351130,0xf3804abb,0x903e37eb,0x1d2df699,0x084c25c8,0x5f201efc, + 0xa1c68e91,0x31a28c87,0x563f62a5,0x81dad253,0xd6c415d4,0x5dd6de70 } }, + /* 14 */ + { { 0x846612ce,0x29f470fd,0xda18d997,0x986f3eec,0x2f34af86,0x6b84c161, + 0x46ddaf8b,0x5ef0a408,0xe49e795f,0x14405a00,0xaa2f7a37,0x5f491b16 }, + { 0xdb41b38d,0xc7f07ae4,0x18fbfcaa,0xef7d119e,0x14443b19,0x3a18e076, + 0x79a19926,0x4356841a,0xe2226fbe,0x91f4a91c,0x3cc88721,0xdc77248c } }, + /* 15 */ + { { 0xe4b1ec9d,0xd570ff1a,0xe7eef706,0x21d23e0e,0xca19e086,0x3cde40f4, + 0xcd4bb270,0x7d6523c4,0xbf13aa6c,0x16c1f06c,0xd14c4b60,0x5aa7245a }, + { 0x44b74de8,0x37f81467,0x620a934e,0x839e7a17,0xde8b1aa1,0xf74d14e8, + 0xf30d75e2,0x8789fa51,0xc81c261e,0x09b24052,0x33c565ee,0x654e2678 } }, + /* 16 */ + { { 0x2f9fbe67,0x378205de,0x7f728e44,0xc4afcb83,0x682e00f1,0xdbcec06c, + 0x114d5423,0xf2a145c3,0x7a52463e,0xa01d9874,0x7d717b0a,0xfc0935b1 }, + { 0xd4d01f95,0x9653bc4f,0x9560ad34,0x9aa83ea8,0xaf8e3f3f,0xf77943dc, + 0xe86fe16e,0x70774a10,0xbf9ffdcf,0x6b62e6f1,0x588745c9,0x8a72f39e } }, + /* 17 */ + { { 0x2341c342,0x73ade4da,0xea704422,0xdd326e54,0x3741cef3,0x336c7d98, + 0x59e61549,0x1eafa00d,0xbd9a3efd,0xcd3ed892,0xc5c6c7e4,0x03faf26c }, + { 0x3045f8ac,0x087e2fcf,0x174f1e73,0x14a65532,0xfe0af9a7,0x2cf84f28, + 0x2cdc935b,0xddfd7a84,0x6929c895,0x4c0f117b,0x4c8bcfcc,0x356572d6 } }, + /* 18 */ + { { 0x7d8c1bba,0x7ecbac01,0x90b0f3d5,0x6058f9c3,0xf6197d0f,0xaee116e3, + 0x4033b128,0xc4dd7068,0xc209b983,0xf084dba6,0x831dbc4a,0x97c7c2cf }, + { 0xf96010e8,0x2f4e61dd,0x529faa17,0xd97e4e20,0x69d37f20,0x4ee66660, + 0x3d366d72,0xccc139ed,0x13488e0f,0x690b6ee2,0xf3a6d533,0x7cad1dc5 } }, + /* 19 */ + { { 0xda57a41f,0x660a9a81,0xec0039b6,0xe74a0412,0x5e1dad15,0x42343c6b, + 0x46681d4c,0x284f3ff5,0x63749e89,0xb51087f1,0x6f9f2f13,0x070f23cc }, + { 0x5d186e14,0x542211da,0xfddb0dff,0x84748f37,0xdb1f4180,0x41a3aab4, + 0xa6402d0e,0x25ed667b,0x02f58355,0x2f2924a9,0xfa44a689,0x5844ee7c } }, + /* 20 */ + { { 0x3f3b236f,0xfab08607,0x81e221da,0x19e9d41d,0x3927b428,0xf3f6571e, + 0x7550f1f6,0x4348a933,0xa85e62f0,0x7167b996,0x7f5452bf,0x62d43759 }, + { 0xf2955926,0xd85feb9e,0x6df78353,0x440a561f,0x9ca36b59,0x389668ec, + 0xa22da016,0x052bf1a1,0xf6093254,0xbdfbff72,0xe22209f3,0x94e50f28 } }, + /* 21 */ + { { 0x3062e8af,0x90b2e5b3,0xe8a3d369,0xa8572375,0x201db7b1,0x3fe1b00b, + 0xee651aa2,0xe926def0,0xb9b10ad7,0x6542c9be,0xa2fcbe74,0x098e309b }, + { 0xfff1d63f,0x779deeb3,0x20bfd374,0x23d0e80a,0x8768f797,0x8452bb3b, + 0x1f952856,0xcf75bb4d,0x29ea3faa,0x8fe6b400,0x81373a53,0x12bd3e40 } }, + /* 22 */ + { { 0x104cbba5,0xc023780d,0xfa35dd4c,0x6207e747,0x1ca9b6a3,0x35c23928, + 0x97987b10,0x4ff19be8,0x8022eee8,0xb8476bbf,0xd3bbe74d,0xaa0a4a14 }, + { 0x187d4543,0x20f94331,0x79f6e066,0x32153870,0xac7e82e1,0x83b0f74e, + 0x828f06ab,0xa7748ba2,0xc26ef35f,0xc5f0298a,0x8e9a7dbd,0x0f0c5070 } }, + /* 23 */ + { { 0xdef029dd,0x0c5c244c,0x850661b8,0x3dabc687,0xfe11d981,0x9992b865, + 0x6274dbad,0xe9801b8f,0x098da242,0xe54e6319,0x91a53d08,0x9929a91a }, + { 0x35285887,0x37bffd72,0xf1418102,0xbc759425,0xfd2e6e20,0x9280cc35, + 0xfbc42ee5,0x735c600c,0x8837619a,0xb7ad2864,0xa778c57b,0xa3627231 } }, + /* 24 */ + { { 0x91361ed8,0xae799b5c,0x6c63366c,0x47d71b75,0x1b265a6a,0x54cdd521, + 0x98d77b74,0xe0215a59,0xbab29db0,0x4424d9b7,0x7fd9e536,0x8b0ffacc }, + { 0x37b5d9ef,0x46d85d12,0xbfa91747,0x5b106d62,0x5f99ba2d,0xed0479f8, + 0x1d104de4,0x0e6f3923,0x25e8983f,0x83a84c84,0xf8105a70,0xa9507e0a } }, + /* 25 */ + { { 0x14cf381c,0xf6c68a6e,0xc22e31cc,0xaf9d27bd,0xaa8a5ccb,0x23568d4d, + 0xe338e4d2,0xe431eec0,0x8f52ad1f,0xf1a828fe,0xe86acd80,0xdb6a0579 }, + { 0x4507832a,0x2885672e,0x887e5289,0x73fc275f,0x05610d08,0x65f80278, + 0x075ff5b0,0x8d9b4554,0x09f712b5,0x3a8e8fb1,0x2ebe9cf2,0x39f0ac86 } }, + /* 26 */ + { { 0x4c52edf5,0xd8fabf78,0xa589ae53,0xdcd737e5,0xd791ab17,0x94918bf0, + 0xbcff06c9,0xb5fbd956,0xdca46d45,0xf6d3032e,0x41a3e486,0x2cdff7e1 }, + { 0x61f47ec8,0x6674b3ba,0xeef84608,0x8a882163,0x4c687f90,0xa257c705, + 0xf6cdf227,0xe30cb2ed,0x7f6ea846,0x2c4c64ca,0xcc6bcd3c,0x186fa17c } }, + /* 27 */ + { { 0x1dfcb91e,0x48a3f536,0x646d358a,0x83595e13,0x91128798,0xbd15827b, + 0x2187757a,0x3ce612b8,0x61bd7372,0x873150a1,0xb662f568,0xf4684530 }, + { 0x401896f6,0x8833950b,0x77f3e090,0xe11cb89a,0x48e7f4a5,0xb2f12cac, + 0xf606677e,0x313dd769,0x16579f93,0xfdcf08b3,0x46b8f22b,0x6429cec9 } }, + /* 28 */ + { { 0xbb75f9a4,0x4984dd54,0x29d3b570,0x4aef06b9,0x3d6e4c1e,0xb5f84ca2, + 0xb083ef35,0x24c61c11,0x392ca9ff,0xce4a7392,0x6730a800,0x865d6517 }, + { 0x722b4a2b,0xca3dfe76,0x7b083e0e,0x12c04bf9,0x1b86b8a5,0x803ce5b5, + 0x6a7e3e0c,0x3fc7632d,0xc81adbe4,0xc89970c2,0x120e16b1,0x3cbcd3ad } }, + /* 29 */ + { { 0xec30ce93,0xfbfb4cc7,0xb72720a2,0x10ed6c7d,0x47b55500,0xec675bf7, + 0x333ff7c3,0x90725903,0x5075bfc0,0xc7c3973e,0x07acf31b,0xb049ecb0 }, + { 0x4f58839c,0xb4076eaf,0xa2b05e4f,0x101896da,0xab40c66e,0x3f6033b0, + 0xc8d864ba,0x19ee9eeb,0x47bf6d2a,0xeb6cf155,0xf826477d,0x8e5a9663 } }, + /* 30 */ + { { 0xf7fbd5e1,0x69e62fdd,0x76912b1d,0x38ecfe54,0xd1da3bfb,0x845a3d56, + 0x1c86f0d4,0x0494950e,0x3bc36ce8,0x83cadbf9,0x4fccc8d1,0x41fce572 }, + { 0x8332c144,0x05f939c2,0x0871e46e,0xb17f248b,0x66e8aff6,0x3d8534e2, + 0x3b85c629,0x1d06f1dc,0xa3131b73,0xdb06a32e,0x8b3f64e5,0xf295184d } }, + /* 31 */ + { { 0x36ddc103,0xd9653ff7,0x95ef606f,0x25f43e37,0xfe06dce8,0x09e301fc, + 0x30b6eebf,0x85af2341,0x0ff56b20,0x79b12b53,0xfe9a3c6b,0x9b4fb499 }, + { 0x51d27ac2,0x0154f892,0x56ca5389,0xd33167e3,0xafc065a6,0x7828ec1f, + 0x7f746c9b,0x0959a258,0x0c44f837,0xb18f1be3,0xc4132fdb,0xa7946117 } }, + /* 32 */ + { { 0x5e3c647b,0xc0426b77,0x8cf05348,0xbfcbd939,0x172c0d3d,0x31d312e3, + 0xee754737,0x5f49fde6,0x6da7ee61,0x895530f0,0xe8b3a5fb,0xcf281b0a }, + { 0x41b8a543,0xfd149735,0x3080dd30,0x41a625a7,0x653908cf,0xe2baae07, + 0xba02a278,0xc3d01436,0x7b21b8f8,0xa0d0222e,0xd7ec1297,0xfdc270e9 } }, + /* 33 */ + { { 0xbc7f41d6,0x00873c0c,0x1b7ad641,0xd976113e,0x238443fb,0x2a536ff4, + 0x41e62e45,0x030d00e2,0x5f545fc6,0x532e9867,0x8e91208c,0xcd033108 }, + { 0x9797612c,0xd1a04c99,0xeea674e2,0xd4393e02,0xe19742a1,0xd56fa69e, + 0x85f0590e,0xdd2ab480,0x48a2243d,0xa5cefc52,0x54383f41,0x48cc67b6 } }, + /* 34 */ + { { 0xfc14ab48,0x4e50430e,0x26706a74,0x195b7f4f,0xcc881ff6,0x2fe8a228, + 0xd945013d,0xb1b968e2,0x4b92162b,0x936aa579,0x364e754a,0x4fb766b7 }, + { 0x31e1ff7f,0x13f93bca,0xce4f2691,0x696eb5ca,0xa2b09e02,0xff754bf8, + 0xe58e3ff8,0x58f13c9c,0x1678c0b0,0xb757346f,0xa86692b3,0xd54200db } }, + /* 35 */ + { { 0x6dda1265,0x9a030bbd,0xe89718dd,0xf7b4f3fc,0x936065b8,0xa6a4931f, + 0x5f72241c,0xbce72d87,0x65775857,0x6cbb51cb,0x4e993675,0xc7161815 }, + { 0x2ee32189,0xe81a0f79,0x277dc0b2,0xef2fab26,0xb71f469f,0x9e64f6fe, + 0xdfdaf859,0xb448ce33,0xbe6b5df1,0x3f5c1c4c,0x1de45f7b,0xfb8dfb00 } }, + /* 36 */ + { { 0x4d5bb921,0xc7345fa7,0x4d2b667e,0x5c7e04be,0x282d7a3e,0x47ed3a80, + 0x7e47b2a4,0x5c2777f8,0x08488e2e,0x89b3b100,0xb2eb5b45,0x9aad77c2 }, + { 0xdaac34ae,0xd681bca7,0x26afb326,0x2452e4e5,0x41a1ee14,0x0c887924, + 0xc2407ade,0x743b04d4,0xfc17a2ac,0xcb5e999b,0x4a701a06,0x4dca2f82 } }, + /* 37 */ + { { 0x1127bc1a,0x68e31ca6,0x17ead3be,0xa3edd59b,0xe25f5a15,0x67b6b645, + 0xa420e15e,0x76221794,0x4b1e872e,0x794fd83b,0xb2dece1b,0x7cab3f03 }, + { 0xca9b3586,0x7119bf15,0x4d250bd7,0xa5545924,0xcc6bcf24,0x173633ea, + 0xb1b6f884,0x9bd308c2,0x447d38c3,0x3bae06f5,0xf341fe1c,0x54dcc135 } }, + /* 38 */ + { { 0x943caf0d,0x56d3598d,0x225ff133,0xce044ea9,0x563fadea,0x9edf6a7c, + 0x73e8dc27,0x632eb944,0x3190dcab,0x814b467e,0x6dbb1e31,0x2d4f4f31 }, + { 0xa143b7ca,0x8d69811c,0xde7cf950,0x4ec1ac32,0x37b5fe82,0x223ab5fd, + 0x9390f1d9,0xe82616e4,0x75804610,0xabff4b20,0x875b08f0,0x11b9be15 } }, + /* 39 */ + { { 0x3bbe682c,0x4ae31a3d,0x74eef2dd,0xbc7c5d26,0x3c47dd40,0x92afd10a, + 0xc14ab9e1,0xec7e0a3b,0xb2e495e4,0x6a6c3dd1,0x309bcd85,0x085ee5e9 }, + { 0x8c2e67fd,0xf381a908,0xe261eaf2,0x32083a80,0x96deee15,0x0fcd6a49, + 0x5e524c79,0xe3b8fb03,0x1d5b08b9,0x8dc360d9,0x7f26719f,0x3a06e2c8 } }, + /* 40 */ + { { 0x7237cac0,0x5cd9f5a8,0x43586794,0x93f0b59d,0xe94f6c4e,0x4384a764, + 0xb62782d3,0x8304ed2b,0xcde06015,0x0b8db8b3,0x5dbe190f,0x4336dd53 }, + { 0x92ab473a,0x57443553,0xbe5ed046,0x031c7275,0x21909aa4,0x3e78678c, + 0x99202ddb,0x4ab7e04f,0x6977e635,0x2648d206,0x093198be,0xd427d184 } }, + /* 41 */ + { { 0x0f9b5a31,0x822848f5,0xbaadb62a,0xbb003468,0x3357559c,0x233a0472, + 0x79aee843,0x49ef6880,0xaeb9e1e3,0xa89867a0,0x1f6f9a55,0xc151931b }, + { 0xad74251e,0xd264eb0b,0x4abf295e,0x37b9b263,0x04960d10,0xb600921b, + 0x4da77dc0,0x0de53dbc,0xd2b18697,0x01d9bab3,0xf7156ddf,0xad54ec7a } }, + /* 42 */ + { { 0x79efdc58,0x8e74dc35,0x4ff68ddb,0x456bd369,0xd32096a5,0x724e74cc, + 0x386783d0,0xe41cff42,0x7c70d8a4,0xa04c7f21,0xe61a19a2,0x41199d2f }, + { 0x29c05dd2,0xd389a3e0,0xe7e3fda9,0x535f2a6b,0x7c2b4df8,0x26ecf72d, + 0xfe745294,0x678275f4,0x9d23f519,0x6319c9cc,0x88048fc4,0x1e05a02d } }, + /* 43 */ + { { 0xd4d5ffe8,0x75cc8e2e,0xdbea17f2,0xf8bb4896,0xcee3cb4a,0x35059790, + 0xa47c6165,0x4c06ee85,0x92935d2f,0xf98fff25,0x32ffd7c7,0x34c4a572 }, + { 0xea0376a2,0xc4b14806,0x4f115e02,0x2ea5e750,0x1e55d7c0,0x532d76e2, + 0xf31044da,0x68dc9411,0x71b77993,0x9272e465,0x93a8cfd5,0xadaa38bb } }, + /* 44 */ + { { 0x7d4ed72a,0x4bf0c712,0xba1f79a3,0xda0e9264,0xf4c39ea4,0x48c0258b, + 0x2a715138,0xa5394ed8,0xbf06c660,0x4af511ce,0xec5c37cd,0xfcebceef }, + { 0x779ae8c1,0xf23b75aa,0xad1e606e,0xdeff59cc,0x22755c82,0xf3f526fd, + 0xbb32cefd,0x64c5ab44,0x915bdefd,0xa96e11a2,0x1143813e,0xab19746a } }, + /* 45 */ + { { 0xec837d7d,0x43c78585,0xb8ee0ba4,0xca5b6fbc,0xd5dbb5ee,0x34e924d9, + 0xbb4f1ca5,0x3f4fa104,0x398640f7,0x15458b72,0xd7f407ea,0x4231faa9 }, + { 0xf96e6896,0x53e0661e,0xd03b0f9d,0x554e4c69,0x9c7858d1,0xd4fcb07b, + 0x52cb04fa,0x7e952793,0x8974e7f7,0x5f5f1574,0x6b6d57c8,0x2e3fa558 } }, + /* 46 */ + { { 0x6a9951a8,0x42cd4803,0x42792ad0,0xa8b15b88,0xabb29a73,0x18e8bcf9, + 0x409933e8,0xbfd9a092,0xefb88dc4,0x760a3594,0x40724458,0x14418863 }, + { 0x99caedc7,0x162a56ee,0x91d101c9,0x8fb12ecd,0x393202da,0xea671967, + 0xa4ccd796,0x1aac8c4a,0x1cf185a8,0x7db05036,0x8cfd095a,0x0c9f86cd } }, + /* 47 */ + { { 0x10b2a556,0x9a728147,0x327b70b2,0x767ca964,0x5e3799b7,0x04ed9e12, + 0x22a3eb2a,0x6781d2dc,0x0d9450ac,0x5bd116eb,0xa7ebe08a,0xeccac1fc }, + { 0xdc2d6e94,0xde68444f,0x35ecf21b,0x3621f429,0x29e03a2c,0x14e2d543, + 0x7d3e7f0a,0x53e42cd5,0x73ed00b9,0xbba26c09,0xc57d2272,0x00297c39 } }, + /* 48 */ + { { 0xb8243a7d,0x3aaaab10,0x8fa58c5b,0x6eeef93e,0x9ae7f764,0xf866fca3, + 0x61ab04d3,0x64105a26,0x03945d66,0xa3578d8a,0x791b848c,0xb08cd3e4 }, + { 0x756d2411,0x45edc5f8,0xa755128c,0xd4a790d9,0x49e5f6a0,0xc2cf0963, + 0xf649beaa,0xc66d267d,0x8467039e,0x3ce6d968,0x42f7816f,0x50046c6b } }, + /* 49 */ + { { 0x66425043,0x92ae1602,0xf08db890,0x1ff66afd,0x8f162ce5,0x386f5a7f, + 0xfcf5598f,0x18d2dea0,0x1a8ca18e,0x78372b3a,0x8cd0e6f7,0xdf0d20eb }, + { 0x75bb4045,0x7edd5e1d,0xb96d94b7,0x252a47ce,0x2c626776,0xbdb29358, + 0x40dd1031,0x853c3943,0x7d5f47fd,0x9dc9becf,0xbae4044a,0x27c2302f } }, + /* 50 */ + { { 0x8f2d49ce,0x2d1d208a,0x162df0a2,0x0d91aa02,0x09a07f65,0x9c5cce87, + 0x84339012,0xdf07238b,0x419442cd,0x5028e2c8,0x72062aba,0x2dcbd358 }, + { 0xe4680967,0xb5fbc3cb,0x9f92d72c,0x2a7bc645,0x116c369d,0x806c76e1, + 0x3177e8d8,0x5c50677a,0x4569df57,0x753739eb,0x36c3f40b,0x2d481ef6 } }, + /* 51 */ + { { 0xfea1103e,0x1a2d39fd,0x95f81b17,0xeaae5592,0xf59b264a,0xdbd0aa18, + 0xcb592ee0,0x90c39c1a,0x9750cca3,0xdf62f80d,0xdf97cc6c,0xda4d8283 }, + { 0x1e201067,0x0a6dd346,0x69fb1f6b,0x1531f859,0x1d60121f,0x4895e552, + 0x4c041c91,0x0b21aab0,0xbcc1ccf8,0x9d896c46,0x3141bde7,0xd24da3b3 } }, + /* 52 */ + { { 0x53b0a354,0x575a0537,0x0c6ddcd8,0x392ff2f4,0x56157b94,0x0b8e8cff, + 0x3b1b80d1,0x073e57bd,0x3fedee15,0x2a75e0f0,0xaa8e6f19,0x752380e4 }, + { 0x6558ffe9,0x1f4e227c,0x19ec5415,0x3a348618,0xf7997085,0xab382d5e, + 0xddc46ac2,0x5e6deaff,0xfc8d094c,0xe5144078,0xf60e37c6,0xf674fe51 } }, + /* 53 */ + { { 0xaf63408f,0x6fb87ae5,0xcd75a737,0xa39c36a9,0xcf4c618d,0x7833313f, + 0xf034c88d,0xfbcd4482,0x39b35288,0x4469a761,0x66b5d9c9,0x77a711c5 }, + { 0x944f8d65,0x4a695dc7,0x161aaba8,0xe6da5f65,0x24601669,0x8654e9c3, + 0x28ae7491,0xbc8b93f5,0x8f5580d8,0x5f1d1e83,0xcea32cc8,0x8ccf9a1a } }, + /* 54 */ + { { 0x7196fee2,0x28ab110c,0x874c8945,0x75799d63,0x29aedadd,0xa2629348, + 0x2be88ff4,0x9714cc7b,0xd58d60d6,0xf71293cf,0x32a564e9,0xda6b6cb3 }, + { 0x3dd821c2,0xf43fddb1,0x90dd323d,0xf2f2785f,0x048489f8,0x91246419, + 0xd24c6749,0x61660f26,0xc803c15c,0x961d9e8c,0xfaadc4c9,0x631c6158 } }, + /* 55 */ + { { 0xfd752366,0xacf2ebe0,0x139be88b,0xb93c340e,0x0f20179e,0x98f66485, + 0xff1da785,0x14820254,0x4f85c16e,0x5278e276,0x7aab1913,0xa246ee45 }, + { 0x53763b33,0x43861eb4,0x45c0bc0d,0xc49f03fc,0xad6b1ea1,0xafff16bc, + 0x6fd49c99,0xce33908b,0xf7fde8c3,0x5c51e9bf,0xff142c5e,0x076a7a39 } }, + /* 56 */ + { { 0x9e338d10,0x04639dfe,0xf42b411b,0x8ee6996f,0xa875cef2,0x960461d1, + 0x95b4d0ba,0x1057b6d6,0xa906e0bc,0x27639252,0xe1c20f8a,0x2c19f09a }, + { 0xeef4c43d,0x5b8fc3f0,0x07a84aa9,0xe2e1b1a8,0x835d2bdb,0x5f455528, + 0x207132dd,0x0f4aee4d,0x3907f675,0xe9f8338c,0x0e0531f0,0x7a874dc9 } }, + /* 57 */ + { { 0x97c27050,0x84b22d45,0x59e70bf8,0xbd0b8df7,0x79738b9b,0xb4d67405, + 0xcd917c4f,0x47f4d5f5,0x13ce6e33,0x9099c4ce,0x521d0f8b,0x942bfd39 }, + { 0xa43b566d,0x5028f0f6,0x21bff7de,0xaf6e8669,0xc44232cd,0x83f6f856, + 0xf915069a,0x65680579,0xecfecb85,0xd12095a2,0xdb01ba16,0xcf7f06ae } }, + /* 58 */ + { { 0x8ef96c80,0x0f56e3c4,0x3ddb609c,0xd521f2b3,0x7dc1450d,0x2be94102, + 0x02a91fe2,0x2d21a071,0x1efa37de,0x2e6f74fa,0x156c28a1,0x9a9a90b8 }, + { 0x9dc7dfcb,0xc54ea9ea,0x2c2c1d62,0xc74e66fc,0x49d3e067,0x9f23f967, + 0x54dd38ad,0x1c7c3a46,0x5946cee3,0xc7005884,0x45cc045d,0x89856368 } }, + /* 59 */ + { { 0xfce73946,0x29da7cd4,0x23168563,0x8f697db5,0xcba92ec6,0x8e235e9c, + 0x9f91d3ea,0x55d4655f,0xaa50a6cd,0xf3689f23,0x21e6a1a0,0xdcf21c26 }, + { 0x61b818bf,0xcffbc82e,0xda47a243,0xc74a2f96,0x8bc1a0cf,0x234e980a, + 0x7929cb6d,0xf35fd6b5,0xefe17d6c,0x81468e12,0x58b2dafb,0xddea6ae5 } }, + /* 60 */ + { { 0x7e787b2e,0x294de887,0x39a9310d,0x258acc1f,0xac14265d,0x92d9714a, + 0x708b48a0,0x18b5591c,0xe1abbf71,0x27cc6bb0,0x568307b9,0xc0581fa3 }, + { 0xf24d4d58,0x9e0f58a3,0xe0ce2327,0xfebe9bb8,0x9d1be702,0x91fd6a41, + 0xfacac993,0x9a7d8a45,0x9e50d66d,0xabc0a08c,0x06498201,0x02c342f7 } }, + /* 61 */ + { { 0x157bdbc2,0xccd71407,0xad0e1605,0x72fa89c6,0xb92a015f,0xb1d3da2b, + 0xa0a3fe56,0x8ad9e7cd,0x24f06737,0x160edcbd,0x61275be6,0x79d4db33 }, + { 0x5f3497c4,0xd3d31fd9,0x04192fb0,0x8cafeaee,0x13a50af3,0xe13ca745, + 0x8c85aae5,0x18826167,0x9eb556ff,0xce06cea8,0xbdb549f3,0x2eef1995 } }, + /* 62 */ + { { 0x50596edc,0x8ed7d3eb,0x905243a2,0xaa359362,0xa4b6d02b,0xa212c2c2, + 0xc4fbec68,0x611fd727,0xb84f733d,0x8a0b8ff7,0x5f0daf0e,0xd85a6b90 }, + { 0xd4091cf7,0x60e899f5,0x2eff2768,0x4fef2b67,0x10c33964,0xc1f195cb, + 0x93626a8f,0x8275d369,0x0d6c840a,0xc77904f4,0x7a868acd,0x88d8b7fd } }, + /* 63 */ + { { 0x7bd98425,0x85f23723,0xc70b154e,0xd4463992,0x96687a2e,0xcbb00ee2, + 0xc83214fd,0x905fdbf7,0x13593684,0x2019d293,0xef51218e,0x0428c393 }, + { 0x981e909a,0x40c7623f,0x7be192da,0x92513385,0x4010907e,0x48fe480f, + 0x3120b459,0xdd7a187c,0xa1fd8f3c,0xc9d7702d,0xe358efc5,0x66e4753b } }, + /* 64 */ + { { 0x16973cf4,0x070d34e1,0x7e4f34f7,0x20aee08b,0x5eb8ad29,0x269af9b9, + 0xa6a45dda,0xdde0a036,0x63df41e0,0xa18b528e,0xa260df2a,0x03cc71b2 }, + { 0xa06b1dd7,0x24a6770a,0x9d2675d3,0x5bfa9c11,0x96844432,0x73c1e2a1, + 0x131a6cf0,0x3660558d,0x2ee79454,0xb0289c83,0xc6d8ddcd,0xa6aefb01 } }, + /* 65 */ + { { 0x01ab5245,0xba1464b4,0xc48d93ff,0x9b8d0b6d,0x93ad272c,0x939867dc, + 0xae9fdc77,0xbebe085e,0x894ea8bd,0x73ae5103,0x39ac22e1,0x740fc89a }, + { 0x28e23b23,0x5e28b0a3,0xe13104d0,0x2352722e,0xb0a2640d,0xf4667a18, + 0x49bb37c3,0xac74a72e,0xe81e183a,0x79f734f0,0x3fd9c0eb,0xbffe5b6c } }, + /* 66 */ + { { 0xc6a2123f,0xb1a358f5,0xfe28df6d,0x927b2d95,0xf199d2f9,0x89702753, + 0x1a3f82dc,0x0a73754c,0x777affe1,0x063d029d,0xdae6d34d,0x5439817e }, + { 0x6b8b83c4,0xf7979eef,0x9d945682,0x615cb214,0xc5e57eae,0x8f0e4fac, + 0x113047dd,0x042b89b8,0x93f36508,0x888356dc,0x5fd1f32f,0xbf008d18 } }, + /* 67 */ + { { 0x4e8068db,0x8012aa24,0xa5729a47,0xc72cc641,0x43f0691d,0x3c33df2c, + 0x1d92145f,0xfa057347,0xb97f7946,0xaefc0f2f,0x2f8121bf,0x813d75cb }, + { 0x4383bba6,0x05613c72,0xa4224b3f,0xa924ce70,0x5f2179a6,0xe59cecbe, + 0x79f62b61,0x78e2e8aa,0x53ad8079,0x3ac2cc3b,0xd8f4fa96,0x55518d71 } }, + /* 68 */ + { { 0x00623f3b,0x03cf2922,0x5f29ebff,0x095c7111,0x80aa6823,0x42d72247, + 0x7458c0b0,0x044c7ba1,0x0959ec20,0xca62f7ef,0xf8ca929f,0x40ae2ab7 }, + { 0xa927b102,0xb8c5377a,0xdc031771,0x398a86a0,0xc216a406,0x04908f9d, + 0x918d3300,0xb423a73a,0xe0b94739,0x634b0ff1,0x2d69f697,0xe29de725 } }, + /* 69 */ + { { 0x8435af04,0x744d1400,0xfec192da,0x5f255b1d,0x336dc542,0x1f17dc12, + 0x636a68a8,0x5c90c2a7,0x7704ca1e,0x960c9eb7,0x6fb3d65a,0x9de8cf1e }, + { 0x511d3d06,0xc60fee0d,0xf9eb52c7,0x466e2313,0x206b0914,0x743c0f5f, + 0x2191aa4d,0x42f55bac,0xffebdbc2,0xcefc7c8f,0xe6e8ed1c,0xd4fa6081 } }, + /* 70 */ + { { 0xb0ab9645,0xb5e405d3,0xd5f1f711,0xaeec7f98,0x585c2a6e,0x8ad42311, + 0x512c6944,0x045acb9e,0xa90db1c6,0xae106c4e,0x898e6563,0xb89f33d5 }, + { 0x7fed2ce4,0x43b07cd9,0xdd815b20,0xf9934e17,0x0a81a349,0x6778d4d5, + 0x52918061,0x9e616ade,0xd7e67112,0xfa06db06,0x88488091,0x1da23cf1 } }, + /* 71 */ + { { 0x42f2c4b5,0x821c46b3,0x66059e47,0x931513ef,0x66f50cd1,0x7030ae43, + 0x43e7b127,0x43b536c9,0x5fca5360,0x006258cf,0x6b557abf,0xe4e3ee79 }, + { 0x24c8b22f,0xbb6b3900,0xfcbf1054,0x2eb5e2c1,0x567492af,0x937b18c9, + 0xacf53957,0xf09432e4,0x1dbf3a56,0x585f5a9d,0xbe0887cf,0xf86751fd } }, + /* 72 */ + { { 0x9d10e0b2,0x157399cb,0x60dc51b7,0x1c0d5956,0x1f583090,0x1d496b8a, + 0x88590484,0x6658bc26,0x03213f28,0x88c08ab7,0x7ae58de4,0x8d2e0f73 }, + { 0x486cfee6,0x9b79bc95,0xe9e5bc57,0x036a26c7,0xcd8ae97a,0x1ad03601, + 0xff3a0494,0x06907f87,0x2c7eb584,0x078f4bbf,0x7e8d0a5a,0xe3731bf5 } }, + /* 73 */ + { { 0xe1cd0abe,0x72f2282b,0x87efefa2,0xd4f9015e,0x6c3834bd,0x9d189806, + 0xb8a29ced,0x9c8cdcc1,0xfee82ebc,0x0601b9f4,0x7206a756,0x371052bc }, + { 0x46f32562,0x76fa1092,0x17351bb4,0xdaad534c,0xb3636bb5,0xc3d64c37, + 0x45d54e00,0x038a8c51,0x32c09e7c,0x301e6180,0x95735151,0x9764eae7 } }, + /* 74 */ + { { 0xcbd5256a,0x8791b19f,0x6ca13a3b,0x4007e0f2,0x4cf06904,0x03b79460, + 0xb6c17589,0xb18a9c22,0x81d45908,0xa1cb7d7d,0x21bb68f1,0x6e13fa9d }, + { 0xa71e6e16,0x47183c62,0xe18749ed,0x5cf0ef8e,0x2e5ed409,0x2c9c7f9b, + 0xe6e117e1,0x042eeacc,0x13fb5a7f,0xb86d4816,0xc9e5feb1,0xea1cf0ed } }, + /* 75 */ + { { 0xcea4cc9b,0x6e6573c9,0xafcec8f3,0x5417961d,0xa438b6f6,0x804bf02a, + 0xdcd4ea88,0xb894b03c,0x3799571f,0xd0f807e9,0x862156e8,0x3466a7f5 }, + { 0x56515664,0x51e59acd,0xa3c5eb0b,0x55b0f93c,0x6a4279db,0x84a06b02, + 0xc5fae08e,0x5c850579,0xa663a1a2,0xcf07b8db,0xf46ffc8d,0x49a36bbc } }, + /* 76 */ + { { 0x46d93106,0xe47f5acc,0xaa897c9c,0x65b7ade0,0x12d7e4be,0x37cf4c94, + 0xd4b2caa9,0xa2ae9b80,0xe60357a3,0x5e7ce09c,0xc8ecd5f9,0x29f77667 }, + { 0xa8a0b1c5,0xdf6868f5,0x62978ad8,0x240858cf,0xdc0002a1,0x0f7ac101, + 0xffe9aa05,0x1d28a9d7,0x5b962c97,0x744984d6,0x3d28c8b2,0xa8a7c00b } }, + /* 77 */ + { { 0xae11a338,0x7c58a852,0xd1af96e7,0xa78613f1,0x5355cc73,0x7e9767d2, + 0x792a2de6,0x6ba37009,0x124386b2,0x7d60f618,0x11157674,0xab09b531 }, + { 0x98eb9dd0,0x95a04841,0x15070328,0xe6c17acc,0x489c6e49,0xafc6da45, + 0xbb211530,0xab45a60a,0x7d7ea933,0xc58d6592,0x095642c6,0xa3ef3c65 } }, + /* 78 */ + { { 0xdf010879,0x89d420e9,0x39576179,0x9d25255d,0xe39513b6,0x9cdefd50, + 0xd5d1c313,0xe4efe45b,0x3f7af771,0xc0149de7,0x340ab06b,0x55a6b4f4 }, + { 0xebeaf771,0xf1325251,0x878d4288,0x2ab44128,0x18e05afe,0xfcd5832e, + 0xcc1fb62b,0xef52a348,0xc1c4792a,0x2bd08274,0x877c6dc7,0x345c5846 } }, + /* 79 */ + { { 0xbea65e90,0xde15ceb0,0x2416d99c,0x0987f72b,0xfd863dec,0x44db578d, + 0xac6a3578,0xf617b74b,0xdb48e999,0x9e62bd7a,0xeab1a1be,0x877cae61 }, + { 0x3a358610,0x23adddaa,0x325e2b07,0x2fc4d6d1,0x1585754e,0x897198f5, + 0xb392b584,0xf741852c,0xb55f7de1,0x9927804c,0x1aa8efae,0xe9e6c4ed } }, + /* 80 */ + { { 0x98683186,0x867db639,0xddcc4ea9,0xfb5cf424,0xd4f0e7bd,0xcc9a7ffe, + 0x7a779f7e,0x7c57f71c,0xd6b25ef2,0x90774079,0xb4081680,0x90eae903 }, + { 0x0ee1fceb,0xdf2aae5e,0xe86c1a1f,0x3ff1da24,0xca193edf,0x80f587d6, + 0xdc9b9d6a,0xa5695523,0x85920303,0x7b840900,0xba6dbdef,0x1efa4dfc } }, + /* 81 */ + { { 0xe0540015,0xfbd838f9,0xc39077dc,0x2c323946,0xad619124,0x8b1fb9e6, + 0x0ca62ea8,0x9612440c,0x2dbe00ff,0x9ad9b52c,0xae197643,0xf52abaa1 }, + { 0x2cac32ad,0xd0e89894,0x62a98f91,0xdfb79e42,0x276f55cb,0x65452ecf, + 0x7ad23e12,0xdb1ac0d2,0xde4986f0,0xf68c5f6a,0x82ce327d,0x389ac37b } }, + /* 82 */ + { { 0xf8e60f5b,0x511188b4,0x48aa2ada,0x7fe67015,0x381abca2,0xdb333cb8, + 0xdaf3fc97,0xb15e6d9d,0x36aabc03,0x4b24f6eb,0x72a748b4,0xc59789df }, + { 0x29cf5279,0x26fcb8a5,0x01ad9a6c,0x7a3c6bfc,0x4b8bac9b,0x866cf88d, + 0x9c80d041,0xf4c89989,0x70add148,0xf0a04241,0x45d81a41,0x5a02f479 } }, + /* 83 */ + { { 0xc1c90202,0xfa5c877c,0xf8ac7570,0xd099d440,0xd17881f7,0x428a5b1b, + 0x5b2501d7,0x61e267db,0xf2e4465b,0xf889bf04,0x76aa4cb8,0x4da3ae08 }, + { 0xe3e66861,0x3ef0fe26,0x3318b86d,0x5e772953,0x747396df,0xc3c35fbc, + 0x439ffd37,0x5115a29c,0xb2d70374,0xbfc4bd97,0x56246b9d,0x088630ea } }, + /* 84 */ + { { 0xb8a9e8c9,0xcd96866d,0x5bb8091e,0xa11963b8,0x045b3cd2,0xc7f90d53, + 0x80f36504,0x755a72b5,0x21d3751c,0x46f8b399,0x53c193de,0x4bffdc91 }, + { 0xb89554e7,0xcd15c049,0xf7a26be6,0x353c6754,0xbd41d970,0x79602370, + 0x12b176c0,0xde16470b,0x40c8809d,0x56ba1175,0xe435fb1e,0xe2db35c3 } }, + /* 85 */ + { { 0x6328e33f,0xd71e4aab,0xaf8136d1,0x5486782b,0x86d57231,0x07a4995f, + 0x1651a968,0xf1f0a5bd,0x76803b6d,0xa5dc5b24,0x42dda935,0x5c587cbc }, + { 0xbae8b4c0,0x2b6cdb32,0xb1331138,0x66d1598b,0x5d7e9614,0x4a23b2d2, + 0x74a8c05d,0x93e402a6,0xda7ce82e,0x45ac94e6,0xe463d465,0xeb9f8281 } }, + /* 86 */ + { { 0xfecf5b9b,0x34e0f9d1,0xf206966a,0xa115b12b,0x1eaa0534,0x5591cf3b, + 0xfb1558f9,0x5f0293cb,0x1bc703a5,0x1c8507a4,0x862c1f81,0x92e6b81c }, + { 0xcdaf24e3,0xcc9ebc66,0x72fcfc70,0x68917ecd,0x8157ba48,0x6dc9a930, + 0xb06ab2b2,0x5d425c08,0x36e929c4,0x362f8ce7,0x62e89324,0x09f6f57c } }, + /* 87 */ + { { 0xd29375fb,0x1c7d6b78,0xe35d1157,0xfabd851e,0x4243ea47,0xf6f62dcd, + 0x8fe30b0f,0x1dd92460,0xffc6e709,0x08166dfa,0x0881e6a7,0xc6c4c693 }, + { 0xd6a53fb0,0x20368f87,0x9eb4d1f9,0x38718e9f,0xafd7e790,0x03f08acd, + 0x72fe2a1c,0x0835eb44,0x88076e5d,0x7e050903,0xa638e731,0x538f765e } }, + /* 88 */ + { { 0xc2663b4b,0x0e0249d9,0x47cd38dd,0xe700ab5b,0x2c46559f,0xb192559d, + 0x4bcde66d,0x8f9f74a8,0x3e2aced5,0xad161523,0x3dd03a5b,0xc155c047 }, + { 0x3be454eb,0x346a8799,0x83b7dccd,0x66ee94db,0xab9d2abe,0x1f6d8378, + 0x7733f355,0x4a396dd2,0xf53553c2,0x419bd40a,0x731dd943,0xd0ead98d } }, + /* 89 */ + { { 0xec142408,0x908e0b0e,0x4114b310,0x98943cb9,0x1742b1d7,0x03dbf7d8, + 0x693412f4,0xd270df6b,0x8f69e20c,0xc5065494,0x697e43a1,0xa76a90c3 }, + { 0x4624825a,0xe0fa3384,0x8acc34c2,0x82e48c0b,0xe9a14f2b,0x7b24bd14, + 0x4db30803,0x4f5dd5e2,0x932da0a3,0x0c77a9e7,0x74c653dc,0x20db90f2 } }, + /* 90 */ + { { 0x0e6c5fd9,0x261179b7,0x6c982eea,0xf8bec123,0xd4957b7e,0x47683338, + 0x0a72f66a,0xcc47e664,0x1bad9350,0xbd54bf6a,0xf454e95a,0xdfbf4c6a }, + { 0x6907f4fa,0x3f7a7afa,0x865ca735,0x7311fae0,0x2a496ada,0x24737ab8, + 0x15feb79b,0x13e425f1,0xa1b93c21,0xe9e97c50,0x4ddd3eb5,0xb26b6eac } }, + /* 91 */ + { { 0x2a2e5f2b,0x81cab9f5,0xbf385ac4,0xf93caf29,0xc909963a,0xf4bf35c3, + 0x74c9143c,0x081e7300,0xc281b4c5,0x3ea57fa8,0x9b340741,0xe497905c }, + { 0x55ab3cfb,0xf556dd8a,0x518db6ad,0xd444b96b,0x5ef4b955,0x34f5425a, + 0xecd26aa3,0xdda7a3ac,0xda655e97,0xb57da11b,0xc2024c70,0x02da3eff } }, + /* 92 */ + { { 0x6481d0d9,0xe24b0036,0x818fdfe2,0x3740dbe5,0x190fda00,0xc1fc1f45, + 0x3cf27fde,0x329c9280,0x6934f43e,0x7435cb53,0x7884e8fe,0x2b505a5d }, + { 0x711adcc9,0x6cfcc6a6,0x531e21e1,0xf034325c,0x9b2a8a99,0xa2f4a967, + 0x3c21bdff,0x9d5f3842,0x31b57d66,0xb25c7811,0x0b8093b9,0xdb5344d8 } }, + /* 93 */ + { { 0xae50a2f5,0x0d72e667,0xe4a861d1,0x9b7f8d8a,0x330df1cb,0xa129f70f, + 0xe04fefc3,0xe90aa5d7,0xe72c3ae1,0xff561ecb,0xcdb955fa,0x0d8fb428 }, + { 0xd7663784,0xd2235f73,0x7e2c456a,0xc05baec6,0x2adbfccc,0xe5c292e4, + 0xefb110d5,0x4fd17988,0xd19d49f3,0x27e57734,0x84f679fe,0x188ac4ce } }, + /* 94 */ + { { 0xa796c53e,0x7ee344cf,0x0868009b,0xbbf6074d,0x474a1295,0x1f1594f7, + 0xac11632d,0x66776edc,0x04e2fa5a,0x1862278b,0xc854a89a,0x52665cf2 }, + { 0x8104ab58,0x7e376464,0x7204fd6d,0x16775913,0x44ea1199,0x86ca06a5, + 0x1c9240dd,0xaa3f765b,0x24746149,0x5f8501a9,0xdcd251d7,0x7b982e30 } }, + /* 95 */ + { { 0xc15f3060,0xe44e9efc,0xa87ebbe6,0x5ad62f2e,0xc79500d4,0x36499d41, + 0x336fa9d1,0xa66d6dc0,0x5afd3b1f,0xf8afc495,0xe5c9822b,0x1d8ccb24 }, + { 0x79d7584b,0x4031422b,0xea3f20dd,0xc54a0580,0x958468c5,0x3f837c8f, + 0xfbea7735,0x3d82f110,0x7dffe2fc,0x679a8778,0x20704803,0x48eba63b } }, + /* 96 */ + { { 0xdf46e2f6,0x89b10d41,0x19514367,0x13ab57f8,0x1d469c87,0x067372b9, + 0x4f6c5798,0x0c195afa,0x272c9acf,0xea43a12a,0x678abdac,0x9dadd8cb }, + { 0xe182579a,0xcce56c6b,0x2d26c2d8,0x86febadb,0x2a44745c,0x1c668ee1, + 0x98dc047a,0x580acd86,0x51b9ec2d,0x5a2b79cc,0x4054f6a0,0x007da608 } }, + /* 97 */ + { { 0x17b00dd0,0x9e3ca352,0x0e81a7a6,0x046779cb,0xd482d871,0xb999fef3, + 0xd9233fbc,0xe6f38134,0xf48cd0e0,0x112c3001,0x3c6c66ae,0x934e7576 }, + { 0xd73234dc,0xb44d4fc3,0x864eafc1,0xfcae2062,0x26bef21a,0x843afe25, + 0xf3b75fdf,0x61355107,0x794c2e6b,0x8367a5aa,0x8548a372,0x3d2629b1 } }, + /* 98 */ + { { 0x437cfaf8,0x6230618f,0x2032c299,0x5b8742cb,0x2293643a,0x949f7247, + 0x09464f79,0xb8040f1a,0x4f254143,0x049462d2,0x366c7e76,0xabd6b522 }, + { 0xd5338f55,0x119b392b,0x01495a0c,0x1a80a9ce,0xf8d7537e,0xf3118ca7, + 0x6bf4b762,0xb715adc2,0xa8482b6c,0x24506165,0x96a7c84d,0xd958d7c6 } }, + /* 99 */ + { { 0xbdc21f31,0x9ad8aa87,0x8063e58c,0xadb3cab4,0xb07dd7b8,0xefd86283, + 0x1be7c6b4,0xc7b9b762,0x015582de,0x2ef58741,0x299addf3,0xc970c52e }, + { 0x22f24d66,0x78f02e2a,0x74cc100a,0xefec1d10,0x09316e1a,0xaf2a6a39, + 0x5849dd49,0xce7c2205,0x96bffc4c,0x9c1fe75c,0x7ba06ec0,0xcad98fd2 } }, + /* 100 */ + { { 0xb648b73e,0xed76e2d0,0x1cfd285e,0xa9f92ce5,0x2ed13de1,0xa8c86c06, + 0xa5191a93,0x1d3a574e,0x1ad1b8bf,0x385cdf8b,0x47d2cfe3,0xbbecc28a }, + { 0x69cec548,0x98d326c0,0xf240a0b2,0x4f5bc1dd,0x29057236,0x241a7062, + 0xc68294a4,0x0fc6e9c5,0xa319f17a,0x4d04838b,0x9ffc1c6f,0x8b612cf1 } }, + /* 101 */ + { { 0x4c3830eb,0x9bb0b501,0x8ee0d0c5,0x3d08f83c,0x79ba9389,0xa4a62642, + 0x9cbc2914,0x5d5d4044,0x074c46f0,0xae9eb83e,0x74ead7d6,0x63bb758f }, + { 0xc6bb29e0,0x1c40d2ea,0x4b02f41e,0x95aa2d87,0x53cb199a,0x92989175, + 0x51584f6d,0xdd91bafe,0x31a1aaec,0x3715efb9,0x46780f9e,0xc1b6ae5b } }, + /* 102 */ + { { 0x42772f41,0xcded3e4b,0x3bcb79d1,0x3a700d5d,0x80feee60,0x4430d50e, + 0xf5e5d4bb,0x444ef1fc,0xe6e358ff,0xc660194f,0x6a91b43c,0xe68a2f32 }, + { 0x977fe4d2,0x5842775c,0x7e2a41eb,0x78fdef5c,0xff8df00e,0x5f3bec02, + 0x5852525d,0xf4b840cd,0x4e6988bd,0x0870483a,0xcc64b837,0x39499e39 } }, + /* 103 */ + { { 0xb08df5fe,0xfc05de80,0x63ba0362,0x0c12957c,0xd5cf1428,0xea379414, + 0x54ef6216,0xc559132a,0xb9e65cf8,0x33d5f12f,0x1695d663,0x09c60278 }, + { 0x61f7a2fb,0x3ac1ced4,0xd4f5eeb8,0xdd838444,0x8318fcad,0x82a38c6c, + 0xe9f1a864,0x315be2e5,0x442daf47,0x317b5771,0x95aa5f9e,0x81b5904a } }, + /* 104 */ + { { 0x8b21d232,0x6b6b1c50,0x8c2cba75,0x87f3dbc0,0xae9f0faf,0xa7e74b46, + 0xbb7b8079,0x036a0985,0x8d974a25,0x4f185b90,0xd9af5ec9,0x5aa7cef0 }, + { 0x57dcfffc,0xe0566a70,0xb8453225,0x6ea311da,0x23368aa9,0x72ea1a8d, + 0x48cd552d,0xed9b2083,0xc80ea435,0xb987967c,0x6c104173,0xad735c75 } }, + /* 105 */ + { { 0xcee76ef4,0xaea85ab3,0xaf1d2b93,0x44997444,0xeacb923f,0x0851929b, + 0x51e3bc0c,0xb080b590,0x59be68a2,0xc4ee1d86,0x64b26cda,0xf00de219 }, + { 0xf2e90d4d,0x8d7fb5c0,0x77d9ec64,0x00e219a7,0x5d1c491c,0xc4e6febd, + 0x1a8f4585,0x080e3754,0x48d2af9c,0x4a9b86c8,0xb6679851,0x2ed70db6 } }, + /* 106 */ + { { 0x586f25cb,0xaee44116,0xa0fcf70f,0xf7b6861f,0x18a350e8,0x55d2cd20, + 0x92dc286f,0x861bf3e5,0x6226aba7,0x9ab18ffa,0xa9857b03,0xd15827be }, + { 0x92e6acef,0x26c1f547,0xac1fbac3,0x422c63c8,0xfcbfd71d,0xa2d8760d, + 0xb2511224,0x35f6a539,0x048d1a21,0xbaa88fa1,0xebf999db,0x49f1abe9 } }, + /* 107 */ + { { 0xf7492b73,0x16f9f4f4,0xcb392b1a,0xcf28ec1e,0x69ca6ffc,0x45b130d4, + 0xb72efa58,0x28ba8d40,0x5ca066f5,0xace987c7,0x4ad022eb,0x3e399246 }, + { 0x752555bb,0x63a2d84e,0x9c2ae394,0xaaa93b4a,0xc89539ca,0xcd80424e, + 0xaa119a99,0x6d6b5a6d,0x379f2629,0xbd50334c,0xef3cc7d3,0x899e925e } }, + /* 108 */ + { { 0xbf825dc4,0xb7ff3651,0x40b9c462,0x0f741cc4,0x5cc4fb5b,0x771ff5a9, + 0x47fd56fe,0xcb9e9c9b,0x5626c0d3,0xbdf053db,0xf7e14098,0xa97ce675 }, + { 0x6c934f5e,0x68afe5a3,0xccefc46f,0x6cd5e148,0xd7a88586,0xc7758570, + 0xdd558d40,0x49978f5e,0x64ae00c1,0xa1d5088a,0xf1d65bb2,0x58f2a720 } }, + /* 109 */ + { { 0x3e4daedb,0x66fdda4a,0x65d1b052,0x38318c12,0x4c4bbf5c,0x28d910a2, + 0x78a9cd14,0x762fe5c4,0xd2cc0aee,0x08e5ebaa,0xca0c654c,0xd2cdf257 }, + { 0x08b717d2,0x48f7c58b,0x386cd07a,0x3807184a,0xae7d0112,0x3240f626, + 0xc43917b0,0x03e9361b,0x20aea018,0xf261a876,0x7e1e6372,0x53f556a4 } }, + /* 110 */ + { { 0x2f512a90,0xc84cee56,0x1b0ea9f1,0x24b3c004,0xe26cc1ea,0x0ee15d2d, + 0xf0c9ef7d,0xd848762c,0xd5341435,0x1026e9c5,0xfdb16b31,0x8f5b73dc }, + { 0xd2c75d95,0x1f69bef2,0xbe064dda,0x8d33d581,0x57ed35e6,0x8c024c12, + 0xc309c281,0xf8d435f9,0xd6960193,0xfd295061,0xe9e49541,0x66618d78 } }, + /* 111 */ + { { 0x8ce382de,0x571cfd45,0xde900dde,0x175806ee,0x34aba3b5,0x61849965, + 0xde7aec95,0xe899778a,0xff4aa97f,0xe8f00f6e,0x010b0c6d,0xae971cb5 }, + { 0x3af788f1,0x1827eebc,0xe413fe2d,0xd46229ff,0x4741c9b4,0x8a15455b, + 0xf8e424eb,0x5f02e690,0xdae87712,0x40a1202e,0x64944f6d,0x49b3bda2 } }, + /* 112 */ + { { 0x035b2d69,0xd63c6067,0x6bed91b0,0xb507150d,0x7afb39b2,0x1f35f82f, + 0x16012b66,0xb9bd9c01,0xed0a5f50,0x00d97960,0x2716f7c9,0xed705451 }, + { 0x127abdb4,0x1576eff4,0xf01e701c,0x6850d698,0x3fc87e2f,0x9fa7d749, + 0xb0ce3e48,0x0b6bcc6f,0xf7d8c1c0,0xf4fbe1f5,0x02719cc6,0xcf75230e } }, + /* 113 */ + { { 0x722d94ed,0x6761d6c2,0x3718820e,0xd1ec3f21,0x25d0e7c6,0x65a40b70, + 0xbaf3cf31,0xd67f830e,0xb93ea430,0x633b3807,0x0bc96c69,0x17faa0ea }, + { 0xdf866b98,0xe6bf3482,0xa9db52d4,0x205c1ee9,0xff9ab869,0x51ef9bbd, + 0x75eeb985,0x3863dad1,0xd3cf442a,0xef216c3b,0xf9c8e321,0x3fb228e3 } }, + /* 114 */ + { { 0x0760ac07,0x94f9b70c,0x9d79bf4d,0xf3c9ccae,0xc5ffc83d,0x73cea084, + 0xdc49c38e,0xef50f943,0xbc9e7330,0xf467a2ae,0x44ea7fba,0x5ee534b6 }, + { 0x03609e7f,0x20cb6272,0x62fdc9f0,0x09844355,0x0f1457f7,0xaf5c8e58, + 0xb4b25941,0xd1f50a6c,0x2ec82395,0x77cb247c,0xda3dca33,0xa5f3e1e5 } }, + /* 115 */ + { { 0x7d85fa94,0x023489d6,0x2db9ce47,0x0ba40537,0xaed7aad1,0x0fdf7a1f, + 0x9a4ccb40,0xa57b0d73,0x5b18967c,0x48fcec99,0xb7274d24,0xf30b5b6e }, + { 0xc81c5338,0x7ccb4773,0xa3ed6bd0,0xb85639e6,0x1d56eada,0x7d9df95f, + 0x0a1607ad,0xe256d57f,0x957574d6,0x6da7ffdc,0x01c7a8c4,0x65f84046 } }, + /* 116 */ + { { 0xcba1e7f1,0x8d45d0cb,0x02b55f64,0xef0a08c0,0x17e19892,0x771ca31b, + 0x4885907e,0xe1843ecb,0x364ce16a,0x67797ebc,0x8df4b338,0x816d2b2d }, + { 0x39aa8671,0xe870b0e5,0xc102b5f5,0x9f0db3e4,0x1720c697,0x34296659, + 0x613c0d2a,0x0ad4c89e,0x418ddd61,0x1af900b2,0xd336e20e,0xe087ca72 } }, + /* 117 */ + { { 0xaba10079,0x222831ff,0x6d64fff2,0x0dc5f87b,0x3e8cb330,0x44547907, + 0x702a33fb,0xe815aaa2,0x5fba3215,0x338d6b2e,0x79f549c8,0x0f7535cb }, + { 0x2ee95923,0x471ecd97,0xc6d1c09f,0x1e868b37,0xc666ef4e,0x2bc7b8ec, + 0x808a4bfc,0xf5416589,0x3fbc4d2e,0xf23e9ee2,0x2d75125b,0x4357236c } }, + /* 118 */ + { { 0xba9cdb1b,0xfe176d95,0x2f82791e,0x45a1ca01,0x4de4cca2,0x97654af2, + 0x5cc4bcb9,0xbdbf9d0e,0xad97ac0a,0xf6a7df50,0x61359fd6,0xc52112b0 }, + { 0x4f05eae3,0x696d9ce3,0xe943ac2b,0x903adc02,0x0848be17,0xa9075347, + 0x2a3973e5,0x1e20f170,0x6feb67e9,0xe1aacc1c,0xe16bc6b9,0x2ca0ac32 } }, + /* 119 */ + { { 0xef871eb5,0xffea12e4,0xa8bf0a7a,0x94c2f25d,0x78134eaa,0x4d1e4c2a, + 0x0360fb10,0x11ed16fb,0x85fc11be,0x4029b6db,0xf4d390fa,0x5e9f7ab7 }, + { 0x30646612,0x5076d72f,0xdda1d0d8,0xa0afed1d,0x85a1d103,0x29022257, + 0x4e276bcd,0xcb499e17,0x51246c3d,0x16d1da71,0x589a0443,0xc72d56d3 } }, + /* 120 */ + { { 0xdae5bb45,0xdf5ffc74,0x261bd6dc,0x99068c4a,0xaa98ec7b,0xdc0afa7a, + 0xf121e96d,0xedd2ee00,0x1414045c,0x163cc7be,0x335af50e,0xb0b1bbce }, + { 0x01a06293,0xd440d785,0x6552e644,0xcdebab7c,0x8c757e46,0x48cb8dbc, + 0x3cabe3cb,0x81f9cf78,0xb123f59a,0xddd02611,0xeeb3784d,0x3dc7b88e } }, + /* 121 */ + { { 0xc4741456,0xe1b8d398,0x6032a121,0xa9dfa902,0x1263245b,0x1cbfc86d, + 0x5244718c,0xf411c762,0x05b0fc54,0x96521d54,0xdbaa4985,0x1afab46e }, + { 0x8674b4ad,0xa75902ba,0x5ad87d12,0x486b43ad,0x36e0d099,0x72b1c736, + 0xbb6cd6d6,0x39890e07,0x59bace4e,0x8128999c,0x7b535e33,0xd8da430b } }, + /* 122 */ + { { 0xc6b75791,0x39f65642,0x21806bfb,0x050947a6,0x1362ef84,0x0ca3e370, + 0x8c3d2391,0x9bc60aed,0x732e1ddc,0x9b488671,0xa98ee077,0x12d10d9e }, + { 0x3651b7dc,0xb6f2822d,0x80abd138,0x6345a5ba,0x472d3c84,0x62033262, + 0xacc57527,0xd54a1d40,0x424447cb,0x6ea46b3a,0x2fb1a496,0x5bc41057 } }, + /* 123 */ + { { 0xa751cd0e,0xe70c57a3,0xeba3c7d6,0x190d8419,0x9d47d55a,0xb1c3bee7, + 0xf912c6d8,0xda941266,0x407a6ad6,0x12e9aacc,0x6e838911,0xd6ce5f11 }, + { 0x70e1f2ce,0x063ca97b,0x8213d434,0xa3e47c72,0x84df810a,0xa016e241, + 0xdfd881a4,0x688ad7b0,0xa89bf0ad,0xa37d99fc,0xa23c2d23,0xd8e3f339 } }, + /* 124 */ + { { 0x750bed6f,0xbdf53163,0x83e68b0a,0x808abc32,0x5bb08a33,0x85a36627, + 0x6b0e4abe,0xf72a3a0f,0xfaf0c6ad,0xf7716d19,0x5379b25f,0x22dcc020 }, + { 0xf9a56e11,0x7400bf8d,0x56a47f21,0x6cb8bad7,0x7a6eb644,0x7c97176f, + 0xd1f5b646,0xe8fd84f7,0x44ddb054,0x98320a94,0x1dde86f5,0x07071ba3 } }, + /* 125 */ + { { 0x98f8fcb9,0x6fdfa0e5,0x94d0d70c,0x89cec8e0,0x106d20a8,0xa0899397, + 0xba8acc9c,0x915bfb9a,0x5507e01c,0x1370c94b,0x8a821ffb,0x83246a60 }, + { 0xbe3c378f,0xa8273a9f,0x35a25be9,0x7e544789,0x4dd929d7,0x6cfa4972, + 0x365bd878,0x987fed9d,0x5c29a7ae,0x4982ac94,0x5ddd7ec5,0x4589a5d7 } }, + /* 126 */ + { { 0xa95540a9,0x9fabb174,0x0162c5b0,0x7cfb886f,0xea3dee18,0x17be766b, + 0xe88e624c,0xff7da41f,0x8b919c38,0xad0b71eb,0xf31ff9a9,0x86a522e0 }, + { 0x868bc259,0xbc8e6f72,0x3ccef9e4,0x6130c638,0x9a466555,0x09f1f454, + 0x19b2bfb4,0x8e6c0f09,0x0ca7bb22,0x945c46c9,0x4dafb67b,0xacd87168 } }, + /* 127 */ + { { 0x10c53841,0x090c72ca,0x55a4fced,0xc20ae01b,0xe10234ad,0x03f7ebd5, + 0x85892064,0xb3f42a6a,0xb4a14722,0xbdbc30c0,0x8ca124cc,0x971bc437 }, + { 0x517ff2ff,0x6f79f46d,0xecba947b,0x6a9c96e2,0x62925122,0x5e79f2f4, + 0x6a4e91f1,0x30a96bb1,0x2d4c72da,0x1147c923,0x5811e4df,0x65bc311f } }, + /* 128 */ + { { 0x139b3239,0x87c7dd7d,0x4d833bae,0x8b57824e,0x9fff0015,0xbcbc4878, + 0x909eaf1a,0x8ffcef8b,0xf1443a78,0x9905f4ee,0xe15cbfed,0x020dd4a2 }, + { 0xa306d695,0xca2969ec,0xb93caf60,0xdf940cad,0x87ea6e39,0x67f7fab7, + 0xf98c4fe5,0x0d0ee10f,0xc19cb91e,0xc646879a,0x7d1d7ab4,0x4b4ea50c } }, + /* 129 */ + { { 0x7a0db57e,0x19e40945,0x9a8c9702,0xe6017cad,0x1be5cff9,0xdbf739e5, + 0xa7a938a2,0x3646b3cd,0x68350dfc,0x04511085,0x56e098b5,0xad3bd6f3 }, + { 0xee2e3e3e,0x935ebabf,0x473926cb,0xfbd01702,0x9e9fb5aa,0x7c735b02, + 0x2e3feff0,0xc52a1b85,0x046b405a,0x9199abd3,0x39039971,0xe306fcec } }, + /* 130 */ + { { 0x23e4712c,0xd6d9aec8,0xc3c198ee,0x7ca8376c,0x31bebd8a,0xe6d83187, + 0xd88bfef3,0xed57aff3,0xcf44edc7,0x72a645ee,0x5cbb1517,0xd4e63d0b }, + { 0xceee0ecf,0x98ce7a1c,0x5383ee8e,0x8f012633,0xa6b455e8,0x3b879078, + 0xc7658c06,0xcbcd3d96,0x0783336a,0x721d6fe7,0x5a677136,0xf21a7263 } }, + /* 131 */ + { { 0x9586ba11,0x19d8b3cd,0x8a5c0480,0xd9e0aeb2,0x2230ef5c,0xe4261dbf, + 0x02e6bf09,0x095a9dee,0x80dc7784,0x8963723c,0x145157b1,0x5c97dbaf }, + { 0x4bc4503e,0x97e74434,0x85a6b370,0x0fb1cb31,0xcd205d4b,0x3e8df2be, + 0xf8f765da,0x497dd1bc,0x6c988a1a,0x92ef95c7,0x64dc4cfa,0x3f924baa } }, + /* 132 */ + { { 0x7268b448,0x6bf1b8dd,0xefd79b94,0xd4c28ba1,0xe4e3551f,0x2fa1f8c8, + 0x5c9187a9,0x769e3ad4,0x40326c0d,0x28843b4d,0x50d5d669,0xfefc8094 }, + { 0x90339366,0x30c85bfd,0x5ccf6c3a,0x4eeb56f1,0x28ccd1dc,0x0e72b149, + 0xf2ce978e,0x73ee85b5,0x3165bb23,0xcdeb2bf3,0x4e410abf,0x8106c923 } }, + /* 133 */ + { { 0x7d02f4ee,0xc8df0161,0x18e21225,0x8a781547,0x6acf9e40,0x4ea895eb, + 0x6e5a633d,0x8b000cb5,0x7e981ffb,0xf31d86d5,0x4475bc32,0xf5c8029c }, + { 0x1b568973,0x764561ce,0xa62996ec,0x2f809b81,0xda085408,0x9e513d64, + 0xe61ce309,0xc27d815d,0x272999e0,0x0da6ff99,0xfead73f7,0xbd284779 } }, + /* 134 */ + { { 0x9b1cdf2b,0x6033c2f9,0xbc5fa151,0x2a99cf06,0x12177b3b,0x7d27d259, + 0xc4485483,0xb1f15273,0x102e2297,0x5fd57d81,0xc7f6acb7,0x3d43e017 }, + { 0x3a70eb28,0x41a8bb0b,0x3e80b06b,0x67de2d8e,0x70c28de5,0x09245a41, + 0xa7b26023,0xad7dbcb1,0x2cbc6c1e,0x70b08a35,0x9b33041f,0xb504fb66 } }, + /* 135 */ + { { 0xf97a27c2,0xa8e85ab5,0xc10a011b,0x6ac5ec8b,0xffbcf161,0x55745533, + 0x65790a60,0x01780e85,0x99ee75b0,0xe451bf85,0x39c29881,0x8907a63b }, + { 0x260189ed,0x76d46738,0x47bd35cb,0x284a4436,0x20cab61e,0xd74e8c40, + 0x416cf20a,0x6264bf8c,0x5fd820ce,0xfa5a6c95,0xf24bb5fc,0xfa7154d0 } }, + /* 136 */ + { { 0x9b3f5034,0x18482cec,0xcd9e68fd,0x962d445a,0x95746f23,0x266fb1d6, + 0x58c94a4b,0xc66ade5a,0xed68a5b6,0xdbbda826,0x7ab0d6ae,0x05664a4d }, + { 0x025e32fc,0xbcd4fe51,0xa96df252,0x61a5aebf,0x31592a31,0xd88a07e2, + 0x98905517,0x5d9d94de,0x5fd440e7,0x96bb4010,0xe807db4c,0x1b0c47a2 } }, + /* 137 */ + { { 0x08223878,0x5c2a6ac8,0xe65a5558,0xba08c269,0x9bbc27fd,0xd22b1b9b, + 0x72b9607d,0x919171bf,0xe588dc58,0x9ab455f9,0x23662d93,0x6d54916e }, + { 0x3b1de0c1,0x8da8e938,0x804f278f,0xa84d186a,0xd3461695,0xbf4988cc, + 0xe10eb0cb,0xf5eae3be,0xbf2a66ed,0x1ff8b68f,0xc305b570,0xa68daf67 } }, + /* 138 */ + { { 0x44b2e045,0xc1004cff,0x4b1c05d4,0x91b5e136,0x88a48a07,0x53ae4090, + 0xea11bb1a,0x73fb2995,0x3d93a4ea,0x32048570,0x3bfc8a5f,0xcce45de8 }, + { 0xc2b3106e,0xaff4a97e,0xb6848b4f,0x9069c630,0xed76241c,0xeda837a6, + 0x6cc3f6cf,0x8a0daf13,0x3da018a8,0x199d049d,0xd9093ba3,0xf867c6b1 } }, + /* 139 */ + { { 0x56527296,0xe4d42a56,0xce71178d,0xae26c73d,0x6c251664,0x70a0adac, + 0x5dc0ae1d,0x813483ae,0xdaab2daf,0x7574eacd,0xc2d55f4f,0xc56b52dc }, + { 0x95f32923,0x872bc167,0x5bdd2a89,0x4be17581,0xa7699f00,0x9b57f1e7, + 0x3ac2de02,0x5fcd9c72,0x92377739,0x83af3ba1,0xfc50b97f,0xa64d4e2b } }, + /* 140 */ + { { 0x0e552b40,0x2172dae2,0xd34d52e8,0x62f49725,0x07958f98,0x7930ee40, + 0x751fdd74,0x56da2a90,0xf53e48c3,0xf1192834,0x8e53c343,0x34d2ac26 }, + { 0x13111286,0x1073c218,0xda9d9827,0x201dac14,0xee95d378,0xec2c29db, + 0x1f3ee0b1,0x9316f119,0x544ce71c,0x7890c9f0,0x27612127,0xd77138af } }, + /* 141 */ + { { 0x3b4ad1cd,0x78045e6d,0x4aa49bc1,0xcd86b94e,0xfd677a16,0x57e51f1d, + 0xfa613697,0xd9290935,0x34f4d893,0x7a3f9593,0x5d5fcf9b,0x8c9c248b }, + { 0x6f70d4e9,0x9f23a482,0x63190ae9,0x17273454,0x5b081a48,0x4bdd7c13, + 0x28d65271,0x1e2de389,0xe5841d1f,0x0bbaaa25,0x746772e5,0xc4c18a79 } }, + /* 142 */ + { { 0x593375ac,0x10ee2681,0x7dd5e113,0x4f3288be,0x240f3538,0x9a97b2fb, + 0x1de6b1e2,0xfa11089f,0x1351bc58,0x516da562,0x2dfa85b5,0x573b6119 }, + { 0x6cba7df5,0x89e96683,0x8c28ab40,0xf299be15,0xad43fcbf,0xe91c9348, + 0x9a1cefb3,0xe9bbc7cc,0x738b2775,0xc8add876,0x775eaa01,0x6e3b1f2e } }, + /* 143 */ + { { 0xb677788b,0x0365a888,0x3fd6173c,0x634ae8c4,0x9e498dbe,0x30498761, + 0xc8f779ab,0x08c43e6d,0x4c09aca9,0x068ae384,0x2018d170,0x2380c70b }, + { 0xa297c5ec,0xcf77fbc3,0xca457948,0xdacbc853,0x336bec7e,0x3690de04, + 0x14eec461,0x26bbac64,0x1f713abf,0xd1c23c7e,0xe6fd569e,0xf08bbfcd } }, + /* 144 */ + { { 0x84770ee3,0x5f8163f4,0x744a1706,0x0e0c7f94,0xe1b2d46d,0x9c8f05f7, + 0xd01fd99a,0x417eafe7,0x11440e5b,0x2ba15df5,0x91a6fbcf,0xdc5c552a }, + { 0xa270f721,0x86271d74,0xa004485b,0x32c0a075,0x8defa075,0x9d1a87e3, + 0xbf0d20fe,0xb590a7ac,0x8feda1f5,0x430c41c2,0x58f6ec24,0x454d2879 } }, + /* 145 */ + { { 0x7c525435,0x52b7a635,0x37c4bdbc,0x3d9ef57f,0xdffcc475,0x2bb93e9e, + 0x7710f3be,0xf7b8ba98,0x21b727de,0x42ee86da,0x2e490d01,0x55ac3f19 }, + { 0xc0c1c390,0x487e3a6e,0x446cde7b,0x036fb345,0x496ae951,0x089eb276, + 0x71ed1234,0xedfed4d9,0x900f0b46,0x661b0dd5,0x8582f0d3,0x11bd6f1b } }, + /* 146 */ + { { 0x076bc9d1,0x5cf9350f,0xcf3cd2c3,0x15d903be,0x25af031c,0x21cfc8c2, + 0x8b1cc657,0xe0ad3248,0x70014e87,0xdd9fb963,0x297f1658,0xf0f3a5a1 }, + { 0xf1f703aa,0xbb908fba,0x2f6760ba,0x2f9cc420,0x66a38b51,0x00ceec66, + 0x05d645da,0x4deda330,0xf7de3394,0xb9cf5c72,0x1ad4c906,0xaeef6502 } }, + /* 147 */ + { { 0x7a19045d,0x0583c8b1,0xd052824c,0xae7c3102,0xff6cfa58,0x2a234979, + 0x62c733c0,0xfe9dffc9,0x9c0c4b09,0x3a7fa250,0x4fe21805,0x516437bb }, + { 0xc2a23ddb,0x9454e3d5,0x289c104e,0x0726d887,0x4fd15243,0x8977d918, + 0x6d7790ba,0xc559e73f,0x465af85f,0x8fd3e87d,0x5feee46b,0xa2615c74 } }, + /* 148 */ + { { 0x4335167d,0xc8d607a8,0xe0f5c887,0x8b42d804,0x398d11f9,0x5f9f13df, + 0x20740c67,0x5aaa5087,0xa3d9234b,0x83da9a6a,0x2a54bad1,0xbd3a5c4e }, + { 0x2db0f658,0xdd13914c,0x5a3f373a,0x29dcb66e,0x5245a72b,0xbfd62df5, + 0x91e40847,0x19d18023,0xb136b1ae,0xd9df74db,0x3f93bc5b,0x72a06b6b } }, + /* 149 */ + { { 0xad19d96f,0x6da19ec3,0xfb2a4099,0xb342daa4,0x662271ea,0x0e61633a, + 0xce8c054b,0x3bcece81,0x8bd62dc6,0x7cc8e061,0xee578d8b,0xae189e19 }, + { 0xdced1eed,0x73e7a25d,0x7875d3ab,0xc1257f0a,0x1cfef026,0x2cb2d5a2, + 0xb1fdf61c,0xd98ef39b,0x24e83e6c,0xcd8e6f69,0xc7b7088b,0xd71e7076 } }, + /* 150 */ + { { 0x9d4245bf,0x33936830,0x2ac2953b,0x22d96217,0x56c3c3cd,0xb3bf5a82, + 0x0d0699e8,0x50c9be91,0x8f366459,0xec094463,0x513b7c35,0x6c056dba }, + { 0x045ab0e3,0x687a6a83,0x445c9295,0x8d40b57f,0xa16f5954,0x0f345048, + 0x3d8f0a87,0x64b5c639,0x9f71c5e2,0x106353a2,0x874f0dd4,0xdd58b475 } }, + /* 151 */ + { { 0x62230c72,0x67ec084f,0x481385e3,0xf14f6cca,0x4cda7774,0xf58bb407, + 0xaa2dbb6b,0xe15011b1,0x0c035ab1,0xd488369d,0x8245f2fd,0xef83c24a }, + { 0x9fdc2538,0xfb57328f,0x191fe46a,0x79808293,0x32ede548,0xe28f5c44, + 0xea1a022c,0x1b3cda99,0x3df2ec7f,0x39e639b7,0x760e9a18,0x77b6272b } }, + /* 152 */ + { { 0xa65d56d5,0x2b1d51bd,0x7ea696e0,0x3a9b71f9,0x9904f4c4,0x95250ecc, + 0xe75774b7,0x8bc4d6eb,0xeaeeb9aa,0x0e343f8a,0x930e04cb,0xc473c1d1 }, + { 0x064cd8ae,0x282321b1,0x5562221c,0xf4b4371e,0xd1bf1221,0xc1cc81ec, + 0xe2c8082f,0xa52a07a9,0xba64a958,0x350d8e59,0x6fb32c9a,0x29e4f3de } }, + /* 153 */ + { { 0xba89aaa5,0x0aa9d56c,0xc4c6059e,0xf0208ac0,0xbd6ddca4,0x7400d9c6, + 0xf2c2f74a,0xb384e475,0xb1562dd3,0x4c1061fc,0x2e153b8d,0x3924e248 }, + { 0x849808ab,0xf38b8d98,0xa491aa36,0x29bf3260,0x88220ede,0x85159ada, + 0xbe5bc422,0x8b47915b,0xd7300967,0xa934d72e,0x2e515d0d,0xc4f30398 } }, + /* 154 */ + { { 0x1b1de38b,0xe3e9ee42,0x42636760,0xa124e25a,0x90165b1a,0x90bf73c0, + 0x146434c5,0x21802a34,0x2e1fa109,0x54aa83f2,0xed9c51e9,0x1d4bd03c }, + { 0x798751e6,0xc2d96a38,0x8c3507f5,0xed27235f,0xc8c24f88,0xb5fb80e2, + 0xd37f4f78,0xf873eefa,0xf224ba96,0x7229fd74,0x9edd7149,0x9dcd9199 } }, + /* 155 */ + { { 0x4e94f22a,0xee9f81a6,0xf71ec341,0xe5609892,0xa998284e,0x6c818ddd, + 0x3b54b098,0x9fd47295,0x0e8a7cc9,0x47a6ac03,0xb207a382,0xde684e5e }, + { 0x2b6b956b,0x4bdd1ecd,0xf01b3583,0x09084414,0x55233b14,0xe2f80b32, + 0xef5ebc5e,0x5a0fec54,0xbf8b29a2,0x74cf25e6,0x7f29e014,0x1c757fa0 } }, + /* 156 */ + { { 0xeb0fdfe4,0x1bcb5c4a,0xf0899367,0xd7c649b3,0x05bc083b,0xaef68e3f, + 0xa78aa607,0x57a06e46,0x21223a44,0xa2136ecc,0x52f5a50b,0x89bd6484 }, + { 0x4455f15a,0x724411b9,0x08a9c0fd,0x23dfa970,0x6db63bef,0x7b0da4d1, + 0xfb162443,0x6f8a7ec1,0xe98284fb,0xc1ac9cee,0x33566022,0x085a582b } }, + /* 157 */ + { { 0xec1f138a,0x15cb61f9,0x668f0c28,0x11c9a230,0xdf93f38f,0xac829729, + 0x4048848d,0xcef25698,0x2bba8fbf,0x3f686da0,0x111c619a,0xed5fea78 }, + { 0xd6d1c833,0x9b4f73bc,0x86e7bf80,0x50951606,0x042b1d51,0xa2a73508, + 0x5fb89ec2,0x9ef6ea49,0x5ef8b892,0xf1008ce9,0x9ae8568b,0x78a7e684 } }, + /* 158 */ + { { 0x10470cd8,0x3fe83a7c,0xf86df000,0x92734682,0xda9409b5,0xb5dac06b, + 0x94939c5f,0x1e7a9660,0x5cc116dc,0xdec6c150,0x66bac8cc,0x1a52b408 }, + { 0x6e864045,0x5303a365,0x9139efc1,0x45eae72a,0x6f31d54f,0x83bec646, + 0x6e958a6d,0x2fb4a86f,0x4ff44030,0x6760718e,0xe91ae0df,0x008117e3 } }, + /* 159 */ + { { 0x384310a2,0x5d5833ba,0x1fd6c9fc,0xbdfb4edc,0x849c4fb8,0xb9a4f102, + 0x581c1e1f,0xe5fb239a,0xd0a9746d,0xba44b2e7,0x3bd942b9,0x78f7b768 }, + { 0xc87607ae,0x076c8ca1,0xd5caaa7e,0x82b23c2e,0x2763e461,0x6a581f39, + 0x3886df11,0xca8a5e4a,0x264e7f22,0xc87e90cf,0x215cfcfc,0x04f74870 } }, + /* 160 */ + { { 0x141d161c,0x5285d116,0x93c4ed17,0x67cd2e0e,0x7c36187e,0x12c62a64, + 0xed2584ca,0xf5329539,0x42fbbd69,0xc4c777c4,0x1bdfc50a,0x107de776 }, + { 0xe96beebd,0x9976dcc5,0xa865a151,0xbe2aff95,0x9d8872af,0x0e0a9da1, + 0xa63c17cc,0x5e357a3d,0xe15cc67c,0xd31fdfd8,0x7970c6d8,0xc44bbefd } }, + /* 161 */ + { { 0x4c0c62f1,0x703f83e2,0x4e195572,0x9b1e28ee,0xfe26cced,0x6a82858b, + 0xc43638fa,0xd381c84b,0xa5ba43d8,0x94f72867,0x10b82743,0x3b4a783d }, + { 0x7576451e,0xee1ad7b5,0x14b6b5c8,0xc3d0b597,0xfcacc1b8,0x3dc30954, + 0x472c9d7b,0x55df110e,0x02f8a328,0x97c86ed7,0x88dc098f,0xd0433413 } }, + /* 162 */ + { { 0x2ca8f2fe,0x1a60d152,0x491bd41f,0x61640948,0x58dfe035,0x6dae29a5, + 0x278e4863,0x9a615bea,0x9ad7c8e5,0xbbdb4477,0x2ceac2fc,0x1c706630 }, + { 0x99699b4b,0x5e2b54c6,0x239e17e8,0xb509ca6d,0xea063a82,0x728165fe, + 0xb6a22e02,0x6b5e609d,0xb26ee1df,0x12813905,0x439491fa,0x07b9f722 } }, + /* 163 */ + { { 0x48ff4e49,0x1592ec14,0x6d644129,0x3e4e9f17,0x1156acc0,0x7acf8288, + 0xbb092b0b,0x5aa34ba8,0x7d38393d,0xcd0f9022,0xea4f8187,0x416724dd }, + { 0xc0139e73,0x3c4e641c,0x91e4d87d,0xe0fe46cf,0xcab61f8a,0xedb3c792, + 0xd3868753,0x4cb46de4,0x20f1098a,0xe449c21d,0xf5b8ea6e,0x5e5fd059 } }, + /* 164 */ + { { 0x75856031,0x7fcadd46,0xeaf2fbd0,0x89c7a4cd,0x7a87c480,0x1af523ce, + 0x61d9ae90,0xe5fc1095,0xbcdb95f5,0x3fb5864f,0xbb5b2c7d,0xbeb5188e }, + { 0x3ae65825,0x3d1563c3,0x0e57d641,0x116854c4,0x1942ebd3,0x11f73d34, + 0xc06955b3,0x24dc5904,0x995a0a62,0x8a0d4c83,0x5d577b7d,0xfb26b86d } }, + /* 165 */ + { { 0xc686ae17,0xc53108e7,0xd1c1da56,0x9090d739,0x9aec50ae,0x4583b013, + 0xa49a6ab2,0xdd9a088b,0xf382f850,0x28192eea,0xf5fe910e,0xcc8df756 }, + { 0x9cab7630,0x877823a3,0xfb8e7fc1,0x64984a9a,0x364bfc16,0x5448ef9c, + 0xc44e2a9a,0xbbb4f871,0x435c95e9,0x901a41ab,0xaaa50a06,0xc6c23e5f } }, + /* 166 */ + { { 0x9034d8dd,0xb78016c1,0x0b13e79b,0x856bb44b,0xb3241a05,0x85c6409a, + 0x2d78ed21,0x8d2fe19a,0x726eddf2,0xdcc7c26d,0x25104f04,0x3ccaff5f }, + { 0x6b21f843,0x397d7edc,0xe975de4c,0xda88e4dd,0x4f5ab69e,0x5273d396, + 0x9aae6cc0,0x537680e3,0x3e6f9461,0xf749cce5,0x957bffd3,0x021ddbd9 } }, + /* 167 */ + { { 0x777233cf,0x7b64585f,0x0942a6f0,0xfe6771f6,0xdfe6eef0,0x636aba7a, + 0x86038029,0x63bbeb56,0xde8fcf36,0xacee5842,0xd4a20524,0x48d9aa99 }, + { 0x0da5e57a,0xcff7a74c,0xe549d6c9,0xc232593c,0xf0f2287b,0x68504bcc, + 0xbc8360b5,0x6d7d098d,0x5b402f41,0xeac5f149,0xb87d1bf1,0x61936f11 } }, + /* 168 */ + { { 0xb8153a9d,0xaa9da167,0x9e83ecf0,0xa49fe3ac,0x1b661384,0x14c18f8e, + 0x38434de1,0x61c24dab,0x283dae96,0x3d973c3a,0x82754fc9,0xc99baa01 }, + { 0x4c26b1e3,0x477d198f,0xa7516202,0x12e8e186,0x362addfa,0x386e52f6, + 0xc3962853,0x31e8f695,0x6aaedb60,0xdec2af13,0x29cf74ac,0xfcfdb4c6 } }, + /* 169 */ + { { 0xcca40298,0x6b3ee958,0xf2f5d195,0xc3878153,0xed2eae5b,0x0c565630, + 0x3a697cf2,0xd089b37e,0xad5029ea,0xc2ed2ac7,0x0f0dda6a,0x7e5cdfad }, + { 0xd9b86202,0xf98426df,0x4335e054,0xed1960b1,0x3f14639e,0x1fdb0246, + 0x0db6c670,0x17f709c3,0x773421e1,0xbfc687ae,0x26c1a8ac,0x13fefc4a } }, + /* 170 */ + { { 0x7ffa0a5f,0xe361a198,0xc63fe109,0xf4b26102,0x6c74e111,0x264acbc5, + 0x77abebaf,0x4af445fa,0x24cddb75,0x448c4fdd,0x44506eea,0x0b13157d }, + { 0x72e9993d,0x22a6b159,0x85e5ecbe,0x2c3c57e4,0xfd83e1a1,0xa673560b, + 0xc3b8c83b,0x6be23f82,0x40bbe38e,0x40b13a96,0xad17399b,0x66eea033 } }, + /* 171 */ + { { 0xb4c6c693,0x49fc6e95,0x36af7d38,0xefc735de,0x35fe42fc,0xe053343d, + 0x6a9ab7c3,0xf0aa427c,0x4a0fcb24,0xc79f0436,0x93ebbc50,0x16287243 }, + { 0x16927e1e,0x5c3d6bd0,0x673b984c,0x40158ed2,0x4cd48b9a,0xa7f86fc8, + 0x60ea282d,0x1643eda6,0xe2a1beed,0x45b393ea,0x19571a94,0x664c839e } }, + /* 172 */ + { { 0x27eeaf94,0x57745750,0xea99e1e7,0x2875c925,0x5086adea,0xc127e7ba, + 0x86fe424f,0x765252a0,0x2b6c0281,0x1143cc6c,0xd671312d,0xc9bb2989 }, + { 0x51acb0a5,0x880c337c,0xd3c60f78,0xa3710915,0x9262b6ed,0x496113c0, + 0x9ce48182,0x5d25d9f8,0xb3813586,0x53b6ad72,0x4c0e159c,0x0ea3bebc } }, + /* 173 */ + { { 0xc5e49bea,0xcaba450a,0x7c05da59,0x684e5415,0xde7ac36c,0xa2e9cab9, + 0x2e6f957b,0x4ca79b5f,0x09b817b1,0xef7b0247,0x7d89df0f,0xeb304990 }, + { 0x46fe5096,0x508f7307,0x2e04eaaf,0x695810e8,0x3512f76c,0x88ef1bd9, + 0x3ebca06b,0x77661351,0xccf158b7,0xf7d4863a,0x94ee57da,0xb2a81e44 } }, + /* 174 */ + { { 0x6d53e6ba,0xff288e5b,0x14484ea2,0xa90de1a9,0xed33c8ec,0x2fadb60c, + 0x28b66a40,0x579d6ef3,0xec24372d,0x4f2dd6dd,0x1d66ec7d,0xe9e33fc9 }, + { 0x039eab6e,0x110899d2,0x3e97bb5e,0xa31a667a,0xcfdce68e,0x6200166d, + 0x5137d54b,0xbe83ebae,0x4800acdf,0x085f7d87,0x0c6f8c86,0xcf4ab133 } }, + /* 175 */ + { { 0x931e08fb,0x03f65845,0x1506e2c0,0x6438551e,0x9c36961f,0x5791f0dc, + 0xe3dcc916,0x68107b29,0xf495d2ca,0x83242374,0x6ee5895b,0xd8cfb663 }, + { 0xa0349b1b,0x525e0f16,0x4a0fab86,0x33cd2c6c,0x2af8dda9,0x46c12ee8, + 0x71e97ad3,0x7cc424ba,0x37621eb0,0x69766ddf,0xa5f0d390,0x95565f56 } }, + /* 176 */ + { { 0x1a0f5e94,0xe0e7bbf2,0x1d82d327,0xf771e115,0xceb111fa,0x10033e3d, + 0xd3426638,0xd269744d,0x00d01ef6,0xbdf2d9da,0xa049ceaf,0x1cb80c71 }, + { 0x9e21c677,0x17f18328,0x19c8f98b,0x6452af05,0x80b67997,0x35b9c5f7, + 0x40f8f3d4,0x5c2e1cbe,0x66d667ca,0x43f91656,0xcf9d6e79,0x9faaa059 } }, + /* 177 */ + { { 0x0a078fe6,0x8ad24618,0x464fd1dd,0xf6cc73e6,0xc3e37448,0x4d2ce34d, + 0xe3271b5f,0x624950c5,0xefc5af72,0x62910f5e,0xaa132bc6,0x8b585bf8 }, + { 0xa839327f,0x11723985,0x4aac252f,0x34e2d27d,0x6296cc4e,0x402f59ef, + 0x47053de9,0x00ae055c,0x28b4f09b,0xfc22a972,0xfa0c180e,0xa9e86264 } }, + /* 178 */ + { { 0xbc310ecc,0x0b7b6224,0x67fa14ed,0x8a1a74f1,0x7214395c,0x87dd0960, + 0xf5c91128,0xdf1b3d09,0x86b264a8,0x39ff23c6,0x3e58d4c5,0xdc2d49d0 }, + { 0xa9d6f501,0x2152b7d3,0xc04094f7,0xf4c32e24,0xd938990f,0xc6366596, + 0x94fb207f,0x084d078f,0x328594cb,0xfd99f1d7,0xcb2d96b3,0x36defa64 } }, + /* 179 */ + { { 0x13ed7cbe,0x4619b781,0x9784bd0e,0x95e50015,0x2c7705fe,0x2a32251c, + 0x5f0dd083,0xa376af99,0x0361a45b,0x55425c6c,0x1f291e7b,0x812d2cef }, + { 0x5fd94972,0xccf581a0,0xe56dc383,0x26e20e39,0x63dbfbf0,0x0093685d, + 0x36b8c575,0x1fc164cc,0x390ef5e7,0xb9c5ab81,0x26908c66,0x40086beb } }, + /* 180 */ + { { 0x37e3c115,0xe5e54f79,0xc1445a8a,0x69b8ee8c,0xb7659709,0x79aedff2, + 0x1b46fbe6,0xe288e163,0xd18d7bb7,0xdb4844f0,0x48aa6424,0xe0ea23d0 }, + { 0xf3d80a73,0x714c0e4e,0x3bd64f98,0x87a0aa9e,0x2ec63080,0x8844b8a8, + 0x255d81a3,0xe0ac9c30,0x455397fc,0x86151237,0x2f820155,0x0b979464 } }, + /* 181 */ + { { 0x4ae03080,0x127a255a,0x580a89fb,0x232306b4,0x6416f539,0x04e8cd6a, + 0x13b02a0e,0xaeb70dee,0x4c09684a,0xa3038cf8,0x28e433ee,0xa710ec3c }, + { 0x681b1f7d,0x77a72567,0x2fc28170,0x86fbce95,0xf5735ac8,0xd3408683, + 0x6bd68e93,0x3a324e2a,0xc027d155,0x7ec74353,0xd4427177,0xab60354c } }, + /* 182 */ + { { 0xef4c209d,0x32a5342a,0x08d62704,0x2ba75274,0xc825d5fe,0x4bb4af6f, + 0xd28e7ff1,0x1c3919ce,0xde0340f6,0x1dfc2fdc,0x29f33ba9,0xc6580baf }, + { 0x41d442cb,0xae121e75,0x3a4724e4,0x4c7727fd,0x524f3474,0xe556d6a4, + 0x785642a2,0x87e13cc7,0xa17845fd,0x182efbb1,0x4e144857,0xdcec0cf1 } }, + /* 183 */ + { { 0xe9539819,0x1cb89541,0x9d94dbf1,0xc8cb3b4f,0x417da578,0x1d353f63, + 0x8053a09e,0xb7a697fb,0xc35d8b78,0x8d841731,0xb656a7a9,0x85748d6f }, + { 0xc1859c5d,0x1fd03947,0x535d22a2,0x6ce965c1,0x0ca3aadc,0x1966a13e, + 0x4fb14eff,0x9802e41d,0x76dd3fcd,0xa9048cbb,0xe9455bba,0x89b182b5 } }, + /* 184 */ + { { 0x43360710,0xd777ad6a,0x55e9936b,0x841287ef,0x04a21b24,0xbaf5c670, + 0x35ad86f1,0xf2c0725f,0xc707e72e,0x338fa650,0xd8883e52,0x2bf8ed2e }, + { 0xb56e0d6a,0xb0212cf4,0x6843290c,0x50537e12,0x98b3dc6f,0xd8b184a1, + 0x0210b722,0xd2be9a35,0x559781ee,0x407406db,0x0bc18534,0x5a78d591 } }, + /* 185 */ + { { 0xd748b02c,0x4d57aa2a,0xa12b3b95,0xbe5b3451,0x64711258,0xadca7a45, + 0x322153db,0x597e091a,0x32eb1eab,0xf3271006,0x2873f301,0xbd9adcba }, + { 0x38543f7f,0xd1dc79d1,0x921b1fef,0x00022092,0x1e5df8ed,0x86db3ef5, + 0x9e6b944a,0x888cae04,0x791a32b4,0x71bd29ec,0xa6d1c13e,0xd3516206 } }, + /* 186 */ + { { 0x55924f43,0x2ef6b952,0x4f9de8d5,0xd2f401ae,0xadc68042,0xfc73e8d7, + 0x0d9d1bb4,0x627ea70c,0xbbf35679,0xc3bb3e3e,0xd882dee4,0x7e8a254a }, + { 0xb5924407,0x08906f50,0xa1ad444a,0xf14a0e61,0x65f3738e,0xaa0efa21, + 0xae71f161,0xd60c7dd6,0xf175894d,0x9e8390fa,0x149f4c00,0xd115cd20 } }, + /* 187 */ + { { 0xa52abf77,0x2f2e2c1d,0x54232568,0xc2a0dca5,0x54966dcc,0xed423ea2, + 0xcd0dd039,0xe48c93c7,0x176405c7,0x1e54a225,0x70d58f2e,0x1efb5b16 }, + { 0x94fb1471,0xa751f9d9,0x67d2941d,0xfdb31e1f,0x53733698,0xa6c74eb2, + 0x89a0f64a,0xd3155d11,0xa4b8d2b6,0x4414cfe4,0xf7a8e9e3,0x8d5a4be8 } }, + /* 188 */ + { { 0x52669e98,0x5c96b4d4,0x8fd42a03,0x4547f922,0xd285174e,0xcf5c1319, + 0x064bffa0,0x805cd1ae,0x246d27e7,0x50e8bc4f,0xd5781e11,0xf89ef98f }, + { 0xdee0b63f,0xb4ff95f6,0x222663a4,0xad850047,0x4d23ce9c,0x02691860, + 0x50019f59,0x3e5309ce,0x69a508ae,0x27e6f722,0x267ba52c,0xe9376652 } }, + /* 189 */ + { { 0xc0368708,0xa04d289c,0x5e306e1d,0xc458872f,0x33112fea,0x76fa23de, + 0x6efde42e,0x718e3974,0x1d206091,0xf0c98cdc,0x14a71987,0x5fa3ca62 }, + { 0xdcaa9f2a,0xeee8188b,0x589a860d,0x312cc732,0xc63aeb1f,0xf9808dd6, + 0x4ea62b53,0x70fd43db,0x890b6e97,0x2c2bfe34,0xfa426aa6,0x105f863c } }, + /* 190 */ + { { 0xb38059ad,0x0b29795d,0x90647ea0,0x5686b77e,0xdb473a3e,0xeff0470e, + 0xf9b6d1e2,0x278d2340,0xbd594ec7,0xebbff95b,0xd3a7f23d,0xf4b72334 }, + { 0xa5a83f0b,0x2a285980,0x9716a8b3,0x0786c41a,0x22511812,0x138901bd, + 0xe2fede6e,0xd1b55221,0xdf4eb590,0x0806e264,0x762e462e,0x6c4c897e } }, + /* 191 */ + { { 0xb4b41d9d,0xd10b905f,0x4523a65b,0x826ca466,0xb699fa37,0x535bbd13, + 0x73bc8f90,0x5b9933d7,0xcd2118ad,0x9332d61f,0xd4a65fd0,0x158c693e }, + { 0xe6806e63,0x4ddfb2a8,0xb5de651b,0xe31ed3ec,0x819bc69a,0xf9460e51, + 0x2c76b1f8,0x6229c0d6,0x901970a3,0xbb78f231,0x9cee72b8,0x31f3820f } }, + /* 192 */ + { { 0xc09e1c72,0xe931caf2,0x12990cf4,0x0715f298,0x943262d8,0x33aad81d, + 0x73048d3f,0x5d292b7a,0xdc7415f6,0xb152aaa4,0x0fd19587,0xc3d10fd9 }, + { 0x75ddadd0,0xf76b35c5,0x1e7b694c,0x9f5f4a51,0xc0663025,0x2f1ab7eb, + 0x920260b0,0x01c9cc87,0x05d39da6,0xc4b1f61a,0xeb4a9c4e,0x6dcd76c4 } }, + /* 193 */ + { { 0xfdc83f01,0x0ba0916f,0x9553e4f9,0x354c8b44,0xffc5e622,0xa6cc511a, + 0xe95be787,0xb954726a,0x75b41a62,0xcb048115,0xebfde989,0xfa2ae6cd }, + { 0x0f24659a,0x6376bbc7,0x4c289c43,0x13a999fd,0xec9abd8b,0xc7134184, + 0xa789ab04,0x28c02bf6,0xd3e526ec,0xff841ebc,0x640893a8,0x442b191e } }, + /* 194 */ + { { 0xfa2b6e20,0x4cac6c62,0xf6d69861,0x97f29e9b,0xbc96d12d,0x228ab1db, + 0x5e8e108d,0x6eb91327,0x40771245,0xd4b3d4d1,0xca8a803a,0x61b20623 }, + { 0xa6a560b1,0x2c2f3b41,0x3859fcf4,0x879e1d40,0x024dbfc3,0x7cdb5145, + 0x3bfa5315,0x55d08f15,0xaa93823a,0x2f57d773,0xc6a2c9a2,0xa97f259c } }, + /* 195 */ + { { 0xe58edbbb,0xc306317b,0x79dfdf13,0x25ade51c,0x16d83dd6,0x6b5beaf1, + 0x1dd8f925,0xe8038a44,0xb2a87b6b,0x7f00143c,0xf5b438de,0xa885d00d }, + { 0xcf9e48bd,0xe9f76790,0xa5162768,0xf0bdf9f0,0xad7b57cb,0x0436709f, + 0xf7c15db7,0x7e151c12,0x5d90ee3b,0x3514f022,0x2c361a8d,0x2e84e803 } }, + /* 196 */ + { { 0x563ec8d8,0x2277607d,0xe3934cb7,0xa661811f,0xf58fd5de,0x3ca72e7a, + 0x62294c6a,0x7989da04,0xf6bbefe9,0x88b3708b,0x53ed7c82,0x0d524cf7 }, + { 0x2f30c073,0x69f699ca,0x9dc1dcf3,0xf0fa264b,0x05f0aaf6,0x44ca4568, + 0xd19b9baf,0x0f5b23c7,0xeabd1107,0x39193f41,0x2a7c9b83,0x9e3e10ad } }, + /* 197 */ + { { 0xd4ae972f,0xa90824f0,0xc6e846e7,0x43eef02b,0x29d2160a,0x7e460612, + 0xfe604e91,0x29a178ac,0x4eb184b2,0x23056f04,0xeb54cdf4,0x4fcad55f }, + { 0xae728d15,0xa0ff96f3,0xc6a00331,0x8a2680c6,0x7ee52556,0x5f84cae0, + 0xc5a65dad,0x5e462c3a,0xe2d23f4f,0x5d2b81df,0xc5b1eb07,0x6e47301b } }, + /* 198 */ + { { 0xaf8219b9,0x77411d68,0x51b1907a,0xcb883ce6,0x101383b5,0x25c87e57, + 0x982f970d,0x9c7d9859,0x118305d2,0xaa6abca5,0x9013a5db,0x725fed2f }, + { 0xababd109,0x487cdbaf,0x87586528,0xc0f8cf56,0x8ad58254,0xa02591e6, + 0xdebbd526,0xc071b1d1,0x961e7e31,0x927dfe8b,0x9263dfe1,0x55f895f9 } }, + /* 199 */ + { { 0xb175645b,0xf899b00d,0xb65b4b92,0x51f3a627,0xb67399ef,0xa2f3ac8d, + 0xe400bc20,0xe717867f,0x1967b952,0x42cc9020,0x3ecd1de1,0x3d596751 }, + { 0xdb979775,0xd41ebcde,0x6a2e7e88,0x99ba61bc,0x321504f2,0x039149a5, + 0x27ba2fad,0xe7dc2314,0xb57d8368,0x9f556308,0x57da80a7,0x2b6d16c9 } }, + /* 200 */ + { { 0x279ad982,0x84af5e76,0x9c8b81a6,0x9bb4c92d,0x0e698e67,0xd79ad44e, + 0x265fc167,0xe8be9048,0x0c3a4ccc,0xf135f7e6,0xb8863a33,0xa0a10d38 }, + { 0xd386efd9,0xe197247c,0xb52346c2,0x0eefd3f9,0x78607bc8,0xc22415f9, + 0x508674ce,0xa2a8f862,0xc8c9d607,0xa72ad09e,0x50fa764f,0xcd9f0ede } }, + /* 201 */ + { { 0xd1a46d4d,0x063391c7,0x9eb01693,0x2df51c11,0x849e83de,0xc5849800, + 0x8ad08382,0x48fd09aa,0xaa742736,0xa405d873,0xe1f9600c,0xee49e61e }, + { 0x48c76f73,0xd76676be,0x01274b2a,0xd9c100f6,0x83f8718d,0x110bb67c, + 0x02fc0d73,0xec85a420,0x744656ad,0xc0449e1e,0x37d9939b,0x28ce7376 } }, + /* 202 */ + { { 0x44544ac7,0x97e9af72,0xba010426,0xf2c658d5,0xfb3adfbd,0x732dec39, + 0xa2df0b07,0xd12faf91,0x2171e208,0x8ac26725,0x5b24fa54,0xf820cdc8 }, + { 0x94f4cf77,0x307a6eea,0x944a33c6,0x18c783d2,0x0b741ac5,0x4b939d4c, + 0x3ffbb6e4,0x1d7acd15,0x7a255e44,0x06a24858,0xce336d50,0x14fbc494 } }, + /* 203 */ + { { 0x51584e3c,0x9b920c0c,0xf7e54027,0xc7733c59,0x88422bbe,0xe24ce139, + 0x523bd6ab,0x11ada812,0xb88e6def,0xde068800,0xfe8c582d,0x7b872671 }, + { 0x7de53510,0x4e746f28,0xf7971968,0x492f8b99,0x7d928ac2,0x1ec80bc7, + 0x432eb1b5,0xb3913e48,0x32028f6e,0xad084866,0x8fc2f38b,0x122bb835 } }, + /* 204 */ + { { 0x3b0b29c3,0x0a9f3b1e,0x4fa44151,0x837b6432,0x17b28ea7,0xb9905c92, + 0x98451750,0xf39bc937,0xce8b6da1,0xcd383c24,0x010620b2,0x299f57db }, + { 0x58afdce3,0x7b6ac396,0x3d05ef47,0xa15206b3,0xb9bb02ff,0xa0ae37e2, + 0x9db3964c,0x107760ab,0x67954bea,0xe29de9a0,0x431c3f82,0x446a1ad8 } }, + /* 205 */ + { { 0x5c6b8195,0xc6fecea0,0xf49e71b9,0xd744a7c5,0x177a7ae7,0xa8e96acc, + 0x358773a7,0x1a05746c,0x37567369,0xa4162146,0x87d1c971,0xaa0217f7 }, + { 0x77fd3226,0x61e9d158,0xe4f600be,0x0f6f2304,0x7a6dff07,0xa9c4cebc, + 0x09f12a24,0xd15afa01,0x8c863ee9,0x2bbadb22,0xe5eb8c78,0xa28290e4 } }, + /* 206 */ + { { 0x3e9de330,0x55b87fa0,0x195c145b,0x12b26066,0xa920bef0,0xe08536e0, + 0x4d195adc,0x7bff6f2c,0x945f4187,0x7f319e9d,0xf892ce47,0xf9848863 }, + { 0x4fe37657,0xd0efc1d3,0x5cf0e45a,0x3c58de82,0x8b0ccbbe,0x626ad21a, + 0xaf952fc5,0xd2a31208,0xeb437357,0x81791995,0x98e95d4f,0x5f19d30f } }, + /* 207 */ + { { 0x0e6865bb,0x72e83d9a,0xf63456a6,0x22f5af3b,0x463c8d9e,0x409e9c73, + 0xdfe6970e,0x40e9e578,0x711b91ca,0x876b6efa,0x942625a3,0x895512cf }, + { 0xcb4e462b,0x84c8eda8,0x4412e7c8,0x84c0154a,0xceb7b71f,0x04325db1, + 0x66f70877,0x1537dde3,0x1992b9ac,0xf3a09399,0xd498ae77,0xa7316606 } }, + /* 208 */ + { { 0xcad260f5,0x13990d2f,0xeec0e8c0,0x76c3be29,0x0f7bd7d5,0x7dc5bee0, + 0xefebda4b,0x9be167d2,0x9122b87e,0xcce3dde6,0x82b5415c,0x75a28b09 }, + { 0xe84607a6,0xf6810bcd,0x6f4dbf0d,0xc6d58128,0x1b4dafeb,0xfead577d, + 0x066b28eb,0x9bc440b2,0x8b17e84b,0x53f1da97,0xcda9a575,0x0459504b } }, + /* 209 */ + { { 0x329e5836,0x13e39a02,0xf717269d,0x2c9e7d51,0xf26c963b,0xc5ac58d6, + 0x79967bf5,0x3b0c6c43,0x55908d9d,0x60bbea3f,0xf07c9ad1,0xd84811e7 }, + { 0x5bd20e4a,0xfe7609a7,0x0a70baa8,0xe4325dd2,0xb3600386,0x3711f370, + 0xd0924302,0x97f9562f,0x4acc4436,0x040dc0c3,0xde79cdd4,0xfd6d725c } }, + /* 210 */ + { { 0xcf13eafb,0xb3efd0e3,0x5aa0ae5f,0x21009cbb,0x79022279,0xe480c553, + 0xb2fc9a6d,0x755cf334,0x07096ae7,0x8564a5bf,0xbd238139,0xddd649d0 }, + { 0x8a045041,0xd0de10b1,0xc957d572,0x6e05b413,0x4e0fb25c,0x5c5ff806, + 0x641162fb,0xd933179b,0xe57439f9,0x42d48485,0x8a8d72aa,0x70c5bd0a } }, + /* 211 */ + { { 0x97bdf646,0xa7671738,0xab329f7c,0xaa1485b4,0xf8f25fdf,0xce3e11d6, + 0xc6221824,0x76a3fc7e,0xf3924740,0x045f281f,0x96d13a9a,0x24557d4e }, + { 0xdd4c27cd,0x875c804b,0x0f5c7fea,0x11c5f0f4,0xdc55ff7e,0xac8c880b, + 0x1103f101,0x2acddec5,0xf99faa89,0x38341a21,0xce9d6b57,0xc7b67a2c } }, + /* 212 */ + { { 0x8e357586,0x9a0d724f,0xdf648da0,0x1d7f4ff5,0xfdee62a5,0x9c3e6c9b, + 0x0389b372,0x0499cef0,0x98eab879,0xe904050d,0x6c051617,0xe8eef1b6 }, + { 0xc37e3ca9,0xebf5bfeb,0xa4e0b91d,0x7c5e946d,0x2c4bea28,0x79097314, + 0xee67b2b7,0x81f6c109,0xdafc5ede,0xaf237d9b,0x2abb04c7,0xd2e60201 } }, + /* 213 */ + { { 0x8a4f57bf,0x6156060c,0xff11182a,0xf9758696,0x6296ef00,0x8336773c, + 0xff666899,0x9c054bce,0x719cd11c,0xd6a11611,0xdbe1acfa,0x9824a641 }, + { 0xba89fd01,0x0b7b7a5f,0x889f79d8,0xf8d3b809,0xf578285c,0xc5e1ea08, + 0xae6d8288,0x7ac74536,0x7521ef5f,0x5d37a200,0xb260a25d,0x5ecc4184 } }, + /* 214 */ + { { 0xa708c8d3,0xddcebb19,0xc63f81ec,0xe63ed04f,0x11873f95,0xd045f5a0, + 0x79f276d5,0x3b5ad544,0x425ae5b3,0x81272a3d,0x10ce1605,0x8bfeb501 }, + { 0x888228bf,0x4233809c,0xb2aff7df,0x4bd82acf,0x0cbd4a7f,0x9c68f180, + 0x6b44323d,0xfcd77124,0x891db957,0x60c0fcf6,0x04da8f7f,0xcfbb4d89 } }, + /* 215 */ + { { 0x3b26139a,0x9a6a5df9,0xb2cc7eb8,0x3e076a83,0x5a964bcd,0x47a8e82d, + 0xb9278d6b,0x8a4e2a39,0xe4443549,0x93506c98,0xf1e0d566,0x06497a8f }, + { 0x2b1efa05,0x3dee8d99,0x45393e33,0x2da63ca8,0xcf0579ad,0xa4af7277, + 0x3236d8ea,0xaf4b4639,0x32b617f5,0x6ccad95b,0xb88bb124,0xce76d8b8 } }, + /* 216 */ + { { 0x083843dc,0x63d2537a,0x1e4153b4,0x89eb3514,0xea9afc94,0x5175ebc4, + 0x8ed1aed7,0x7a652580,0xd85e8297,0x67295611,0xb584b73d,0x8dd2d68b }, + { 0x0133c3a4,0x237139e6,0x4bd278ea,0x9de838ab,0xc062fcd9,0xe829b072, + 0x63ba8706,0x70730d4f,0xd3cd05ec,0x6080483f,0x0c85f84d,0x872ab5b8 } }, + /* 217 */ + { { 0x999d4d49,0xfc0776d3,0xec3f45e7,0xa3eb59de,0x0dae1fc1,0xbc990e44, + 0xa15371ff,0x33596b1e,0x9bc7ab25,0xd447dcb2,0x35979582,0xcd5b63e9 }, + { 0x77d1ff11,0xae3366fa,0xedee6903,0x59f28f05,0xa4433bf2,0x6f43fed1, + 0xdf9ce00e,0x15409c9b,0xaca9c5dc,0x21b5cded,0x82d7bdb4,0xf9f33595 } }, + /* 218 */ + { { 0x9422c792,0x95944378,0xc958b8bf,0x239ea923,0xdf076541,0x4b61a247, + 0xbb9fc544,0x4d29ce85,0x0b424559,0x9a692a67,0x0e486900,0x6e0ca5a0 }, + { 0x85b3bece,0x6b79a782,0xc61f9892,0x41f35e39,0xae747f82,0xff82099a, + 0xd0ca59d6,0x58c8ae3f,0x99406b5f,0x4ac930e2,0x9df24243,0x2ce04eb9 } }, + /* 219 */ + { { 0x1ac37b82,0x4366b994,0x25b04d83,0xff0c728d,0x19c47b7c,0x1f551361, + 0xbeff13e7,0xdbf2d5ed,0xe12a683d,0xf78efd51,0x989cf9c4,0x82cd85b9 }, + { 0xe0cb5d37,0xe23c6db6,0x72ee1a15,0x818aeebd,0x28771b14,0x8212aafd, + 0x1def817d,0x7bc221d9,0x9445c51f,0xdac403a2,0x12c3746b,0x711b0517 } }, + /* 220 */ + { { 0x5ea99ecc,0x0ed9ed48,0xb8cab5e1,0xf799500d,0xb570cbdc,0xa8ec87dc, + 0xd35dfaec,0x52cfb2c2,0x6e4d80a4,0x8d31fae2,0xdcdeabe5,0xe6a37dc9 }, + { 0x1deca452,0x5d365a34,0x0d68b44e,0x09a5f8a5,0xa60744b1,0x59238ea5, + 0xbb4249e9,0xf2fedc0d,0xa909b2e3,0xe395c74e,0x39388250,0xe156d1a5 } }, + /* 221 */ + { { 0x47181ae9,0xd796b3d0,0x44197808,0xbaf44ba8,0x34cf3fac,0xe6933094, + 0xc3bd5c46,0x41aa6ade,0xeed947c6,0x4fda75d8,0x9ea5a525,0xacd9d412 }, + { 0xd430301b,0x65cc55a3,0x7b52ea49,0x3c9a5bcf,0x159507f0,0x22d319cf, + 0xde74a8dd,0x2ee0b9b5,0x877ac2b6,0x20c26a1e,0x92e7c314,0x387d73da } }, + /* 222 */ + { { 0x8cd3fdac,0x13c4833e,0x332e5b8e,0x76fcd473,0xe2fe1fd3,0xff671b4b, + 0x5d98d8ec,0x4d734e8b,0x514bbc11,0xb1ead3c6,0x7b390494,0xd14ca858 }, + { 0x5d2d37e9,0x95a443af,0x00464622,0x73c6ea73,0x15755044,0xa44aeb4b, + 0xfab58fee,0xba3f8575,0xdc680a6f,0x9779dbc9,0x7b37ddfc,0xe1ee5f5a } }, + /* 223 */ + { { 0x12d29f46,0xcd0b4648,0x0ed53137,0x93295b0b,0x80bef6c9,0xbfe26094, + 0x54248b00,0xa6565788,0x80e7f9c4,0x69c43fca,0xbe141ea1,0x2190837b }, + { 0xa1b26cfb,0x875e159a,0x7affe852,0x90ca9f87,0x92ca598e,0x15e6550d, + 0x1938ad11,0xe3e0945d,0x366ef937,0xef7636bb,0xb39869e5,0xb6034d0b } }, + /* 224 */ + { { 0x26d8356e,0x4d255e30,0xd314626f,0xf83666ed,0xd0c8ed64,0x421ddf61, + 0x26677b61,0x96e473c5,0x9e9b18b3,0xdad4af7e,0xa9393f75,0xfceffd4a }, + { 0x11c731d5,0x843138a1,0xb2f141d9,0x05bcb3a1,0x617b7671,0x20e1fa95, + 0x88ccec7b,0xbefce812,0x90f1b568,0x582073dc,0x1f055cb7,0xf572261a } }, + /* 225 */ + { { 0x36973088,0xf3148277,0x86a9f980,0xc008e708,0xe046c261,0x1b795947, + 0xca76bca0,0xdf1e6a7d,0x71acddf0,0xabafd886,0x1364d8f4,0xff7054d9 }, + { 0xe2260594,0x2cf63547,0xd73b277e,0x468a5372,0xef9bd35e,0xc7419e24, + 0x24043cc3,0x2b4a1c20,0x890b39cd,0xa28f047a,0x46f9a2e3,0xdca2cea1 } }, + /* 226 */ + { { 0x53277538,0xab788736,0xcf697738,0xa734e225,0x6b22e2c1,0x66ee1d1e, + 0xebe1d212,0x2c615389,0x02bb0766,0xf36cad40,0x3e64f207,0x120885c3 }, + { 0x90fbfec2,0x59e77d56,0xd7a574ae,0xf9e781aa,0x5d045e53,0x801410b0, + 0xa91b5f0e,0xd3b5f0aa,0x7fbb3521,0xb3d1df00,0xc72bee9a,0x11c4b33e } }, + /* 227 */ + { { 0x83c3a7f3,0xd32b9832,0x88d8a354,0x8083abcf,0x50f4ec5a,0xdeb16404, + 0x641e2907,0x18d747f0,0xf1bbf03e,0x4e8978ae,0x88a0cd89,0x932447dc }, + { 0xcf3d5897,0x561e0feb,0x13600e6d,0xfc3a682f,0xd16a6b73,0xc78b9d73, + 0xd29bf580,0xe713fede,0x08d69e5c,0x0a225223,0x1ff7fda4,0x3a924a57 } }, + /* 228 */ + { { 0xb4093bee,0xfb64554c,0xa58c6ec0,0xa6d65a25,0x43d0ed37,0x4126994d, + 0x55152d44,0xa5689a51,0x284caa8d,0xb8e5ea8c,0xd1f25538,0x33f05d4f }, + { 0x1b615d6e,0xe0fdfe09,0x705507da,0x2ded7e8f,0x17bbcc80,0xdd5631e5, + 0x267fd11f,0x4f87453e,0xff89d62d,0xc6da723f,0xe3cda21d,0x55cbcae2 } }, + /* 229 */ + { { 0x6b4e84f3,0x336bc94e,0x4ef72c35,0x72863031,0xeeb57f99,0x6d85fdee, + 0xa42ece1b,0x7f4e3272,0x36f0320a,0x7f86cbb5,0x923331e6,0xf09b6a2b }, + { 0x56778435,0x21d3ecf1,0x8323b2d2,0x2977ba99,0x1704bc0f,0x6a1b57fb, + 0x389f048a,0xd777cf8b,0xac6b42cd,0x9ce2174f,0x09e6c55a,0x404e2bff } }, + /* 230 */ + { { 0x204c5ddb,0x9b9b135e,0x3eff550e,0x9dbfe044,0xec3be0f6,0x35eab4bf, + 0x0a43e56f,0x8b4c3f0d,0x0e73f9b3,0x4c1c6673,0x2c78c905,0x92ed38bd }, + { 0xa386e27c,0xc7003f6a,0xaced8507,0xb9c4f46f,0x59df5464,0xea024ec8, + 0x429572ea,0x4af96152,0xe1fc1194,0x279cd5e2,0x281e358c,0xaa376a03 } }, + /* 231 */ + { { 0x3cdbc95c,0x07859223,0xef2e337a,0xaae1aa6a,0x472a8544,0xc040108d, + 0x8d037b7d,0x80c853e6,0x8c7eee24,0xd221315c,0x8ee47752,0x195d3856 }, + { 0xdacd7fbe,0xd4b1ba03,0xd3e0c52b,0x4b5ac61e,0x6aab7b52,0x68d3c052, + 0x660e3fea,0xf0d7248c,0x3145efb4,0xafdb3f89,0x8f40936d,0xa73fd9a3 } }, + /* 232 */ + { { 0xbb1b17ce,0x891b9ef3,0xc6127f31,0x14023667,0x305521fd,0x12b2e58d, + 0xe3508088,0x3a47e449,0xff751507,0xe49fc84b,0x5310d16e,0x4023f722 }, + { 0xb73399fa,0xa608e5ed,0xd532aa3e,0xf12632d8,0x845e8415,0x13a2758e, + 0x1fc2d861,0xae4b6f85,0x339d02f2,0x3879f5b1,0x80d99ebd,0x446d22a6 } }, + /* 233 */ + { { 0x4be164f1,0x0f502302,0x88b81920,0x8d09d2d6,0x984aceff,0x514056f1, + 0x75e9e80d,0xa5c4ddf0,0xdf496a93,0x38cb47e6,0x38df6bf7,0x899e1d6b }, + { 0xb59eb2a6,0x69e87e88,0x9b47f38b,0x280d9d63,0x3654e955,0x599411ea, + 0x969aa581,0xcf8dd4fd,0x530742a7,0xff5c2baf,0x1a373085,0xa4391536 } }, + /* 234 */ + { { 0xa8a4bdd2,0x6ace72a3,0xb68ef702,0xc656cdd1,0x90c4dad8,0xd4a33e7e, + 0x9d951c50,0x4aece08a,0x085d68e6,0xea8005ae,0x6f7502b8,0xfdd7a7d7 }, + { 0x98d6fa45,0xce6fb0a6,0x1104eb8c,0x228f8672,0xda09d7dc,0xd23d8787, + 0x2ae93065,0x5521428b,0xea56c366,0x95faba3d,0x0a88aca5,0xedbe5039 } }, + /* 235 */ + { { 0xbfb26c82,0xd64da0ad,0x952c2f9c,0xe5d70b3c,0xf7e77f68,0xf5e8f365, + 0x08f2d695,0x7234e002,0xd12e7be6,0xfaf900ee,0x4acf734e,0x27dc6934 }, + { 0xc260a46a,0x80e4ff5e,0x2dc31c28,0x7da5ebce,0xca69f552,0x485c5d73, + 0x69cc84c2,0xcdfb6b29,0xed6d4eca,0x031c5afe,0x22247637,0xc7bbf4c8 } }, + /* 236 */ + { { 0x49fe01b2,0x9d5b72c7,0x793a91b8,0x34785186,0xcf460438,0xa3ba3c54, + 0x3ab21b6f,0x73e8e43d,0xbe57b8ab,0x50cde8e0,0xdd204264,0x6488b3a7 }, + { 0xdddc4582,0xa9e398b3,0x5bec46fe,0x1698c1a9,0x156d3843,0x7f1446ef, + 0x770329a2,0x3fd25dd8,0x2c710668,0x05b1221a,0xa72ee6cf,0x65b2dc2a } }, + /* 237 */ + { { 0xcd021d63,0x21a885f7,0xfea61f08,0x3f344b15,0xc5cf73e6,0xad5ba6dd, + 0x227a8b23,0x154d0d8f,0xdc559311,0x9b74373c,0x98620fa1,0x4feab715 }, + { 0x7d9ec924,0x5098938e,0x6d47e550,0x84d54a5e,0x1b617506,0x1a2d1bdc, + 0x615868a4,0x99fe1782,0x3005a924,0x171da780,0x7d8f79b6,0xa70bf5ed } }, + /* 238 */ + { { 0xfe2216c5,0x0bc1250d,0x7601b351,0x2c37e250,0xd6f06b7e,0xb6300175, + 0x8bfeb9b7,0x4dde8ca1,0xb82f843d,0x4f210432,0xb1ac0afd,0x8d70e2f9 }, + { 0xaae91abb,0x25c73b78,0x863028f2,0x0230dca3,0xe5cf30b7,0x8b923ecf, + 0x5506f265,0xed754ec2,0x729a5e39,0x8e41b88c,0xbabf889b,0xee67cec2 } }, + /* 239 */ + { { 0x1be46c65,0xe183acf5,0xe7565d7a,0x9789538f,0xd9627b4e,0x87873391, + 0x9f1d9187,0xbf4ac4c1,0x4691f5c8,0x5db99f63,0x74a1fb98,0xa68df803 }, + { 0xbf92b5fa,0x3c448ed1,0x3e0bdc32,0xa098c841,0x79bf016c,0x8e74cd55, + 0x115e244d,0x5df0d09c,0x3410b66e,0x9418ad01,0x17a02130,0x8b6124cb } }, + /* 240 */ + { { 0xc26e3392,0x425ec3af,0xa1722e00,0xc07f8470,0xe2356b43,0xdcc28190, + 0xb1ef59a6,0x4ed97dff,0xc63028c1,0xc22b3ad1,0x68c18988,0x070723c2 }, + { 0x4cf49e7d,0x70da302f,0x3f12a522,0xc5e87c93,0x18594148,0x74acdd1d, + 0xca74124c,0xad5f73ab,0xd69fd478,0xe72e4a3e,0x7b117cc3,0x61593868 } }, + /* 241 */ + { { 0xa9aa0486,0x7b7b9577,0xa063d557,0x6e41fb35,0xda9047d7,0xb017d5c7, + 0x68a87ba9,0x8c748280,0xdf08ad93,0xab45fa5c,0x4c288a28,0xcd9fb217 }, + { 0x5747843d,0x59544642,0xa56111e3,0x34d64c6c,0x4bfce8d5,0x12e47ea1, + 0x6169267f,0x17740e05,0xeed03fb5,0x5c49438e,0x4fc3f513,0x9da30add } }, + /* 242 */ + { { 0xccfa5200,0xc4e85282,0x6a19b13d,0x2707608f,0xf5726e2f,0xdcb9a53d, + 0xe9427de5,0x612407c9,0xd54d582a,0x3e5a17e1,0x655ae118,0xb99877de }, + { 0x015254de,0x6f0e972b,0xf0a6f7c5,0x92a56db1,0xa656f8b2,0xd297e4e1, + 0xad981983,0x99fe0052,0x07cfed84,0xd3652d2f,0x843c1738,0xc784352e } }, + /* 243 */ + { { 0x7e9b2d8a,0x6ee90af0,0x57cf1964,0xac8d7018,0x71f28efc,0xf6ed9031, + 0x6812b20e,0x7f70d5a9,0xf1c61eee,0x27b557f4,0xc6263758,0xf1c9bd57 }, + { 0x2a1a6194,0x5cf7d014,0x1890ab84,0xdd614e0b,0x0e93c2a6,0x3ef9de10, + 0xe0cd91c5,0xf98cf575,0x14befc32,0x504ec0c6,0x6279d68c,0xd0513a66 } }, + /* 244 */ + { { 0xa859fb6a,0xa8eadbad,0xdb283666,0xcf8346e7,0x3e22e355,0x7b35e61a, + 0x99639c6b,0x293ece2c,0x56f241c8,0xfa0162e2,0xbf7a1dda,0xd2e6c7b9 }, + { 0x40075e63,0xd0de6253,0xf9ec8286,0x2405aa61,0x8fe45494,0x2237830a, + 0x364e9c8c,0x4fd01ac7,0x904ba750,0x4d9c3d21,0xaf1b520b,0xd589be14 } }, + /* 245 */ + { { 0x4662e53b,0x13576a4f,0xf9077676,0x35ec2f51,0x97c0af97,0x66297d13, + 0x9e598b58,0xed3201fe,0x5e70f604,0x49bc752a,0xbb12d951,0xb54af535 }, + { 0x212c1c76,0x36ea4c2b,0xeb250dfd,0x18f5bbc7,0x9a0a1a46,0xa0d466cc, + 0xdac2d917,0x52564da4,0x8e95fab5,0x206559f4,0x9ca67a33,0x7487c190 } }, + /* 246 */ + { { 0xdde98e9c,0x75abfe37,0x2a411199,0x99b90b26,0xdcdb1f7c,0x1b410996, + 0x8b3b5675,0xab346f11,0xf1f8ae1e,0x04852193,0x6b8b98c1,0x1ec4d227 }, + { 0x45452baa,0xba3bc926,0xacc4a572,0x387d1858,0xe51f171e,0x9478eff6, + 0x931e1c00,0xf357077d,0xe54c8ca8,0xffee77cd,0x551dc9a4,0xfb4892ff } }, + /* 247 */ + { { 0x2db8dff8,0x5b1bdad0,0x5a2285a2,0xd462f4fd,0xda00b461,0x1d6aad8e, + 0x41306d1b,0x43fbefcf,0x6a13fe19,0x428e86f3,0x17f89404,0xc8b2f118 }, + { 0xf0d51afb,0x762528aa,0x549b1d06,0xa3e2fea4,0xea3ddf66,0x86fad8f2, + 0x4fbdd206,0x0d9ccc4b,0xc189ff5a,0xcde97d4c,0x199f19a6,0xc36793d6 } }, + /* 248 */ + { { 0x51b85197,0xea38909b,0xb4c92895,0xffb17dd0,0x1ddb3f3f,0x0eb0878b, + 0xc57cf0f2,0xb05d28ff,0x1abd57e2,0xd8bde2e7,0xc40c1b20,0x7f2be28d }, + { 0x299a2d48,0x6554dca2,0x8377982d,0x5130ba2e,0x1071971a,0x8863205f, + 0x7cf2825d,0x15ee6282,0x03748f2b,0xd4b6c57f,0x430385a0,0xa9e3f4da } }, + /* 249 */ + { { 0x83fbc9c6,0x33eb7cec,0x4541777e,0x24a311c7,0x4f0767fc,0xc81377f7, + 0x4ab702da,0x12adae36,0x2a779696,0xb7fcb6db,0x01cea6ad,0x4a6fb284 }, + { 0xcdfc73de,0x5e8b1d2a,0x1b02fd32,0xd0efae8d,0xd81d8519,0x3f99c190, + 0xfc808971,0x3c18f7fa,0x51b7ae7b,0x41f713e7,0xf07fc3f8,0x0a4b3435 } }, + /* 250 */ + { { 0x019b7d2e,0x7dda3c4c,0xd4dc4b89,0x631c8d1a,0x1cdb313c,0x5489cd6e, + 0x4c07bb06,0xd44aed10,0x75f000d1,0x8f97e13a,0xdda5df4d,0x0e9ee64f }, + { 0x3e346910,0xeaa99f3b,0xfa294ad7,0x622f6921,0x0d0b2fe9,0x22aaa20d, + 0x1e5881ba,0x4fed2f99,0xc1571802,0x9af3b2d6,0xdc7ee17c,0x919e67a8 } }, + /* 251 */ + { { 0x76250533,0xc724fe4c,0x7d817ef8,0x8a2080e5,0x172c9751,0xa2afb0f4, + 0x17c0702e,0x9b10cdeb,0xc9b7e3e9,0xbf3975e3,0x1cd0cdc5,0x206117df }, + { 0xbe05ebd5,0xfb049e61,0x16c782c0,0xeb0bb55c,0xab7fed09,0x13a331b8, + 0x632863f0,0xf6c58b1d,0x4d3b6195,0x6264ef6e,0x9a53f116,0x92c51b63 } }, + /* 252 */ + { { 0x288b364d,0xa57c7bc8,0x7b41e5c4,0x4a562e08,0x698a9a11,0x699d21c6, + 0xf3f849b9,0xa4ed9581,0x9eb726ba,0xa223eef3,0xcc2884f9,0x13159c23 }, + { 0x3a3f4963,0x73931e58,0x0ada6a81,0x96500389,0x5ab2950b,0x3ee8a1c6, + 0x775fab52,0xeedf4949,0x4f2671b6,0x63d652e1,0x3c4e2f55,0xfed4491c } }, + /* 253 */ + { { 0xf4eb453e,0x335eadc3,0xcadd1a5b,0x5ff74b63,0x5d84a91a,0x6933d0d7, + 0xb49ba337,0x9ca3eeb9,0xc04c15b8,0x1f6facce,0xdc09a7e4,0x4ef19326 }, + { 0x3dca3233,0x53d2d324,0xa2259d4b,0x0ee40590,0x5546f002,0x18c22edb, + 0x09ea6b71,0x92429801,0xb0e91e61,0xaada0add,0x99963c50,0x5fe53ef4 } }, + /* 254 */ + { { 0x90c28c65,0x372dd06b,0x119ce47d,0x1765242c,0x6b22fc82,0xc041fb80, + 0xb0a7ccc1,0x667edf07,0x1261bece,0xc79599e7,0x19cff22a,0xbc69d9ba }, + { 0x13c06819,0x009d77cd,0xe282b79d,0x635a66ae,0x225b1be8,0x4edac4a6, + 0x524008f9,0x57d4f4e4,0xb056af84,0xee299ac5,0x3a0bc386,0xcc38444c } }, + /* 255 */ + { { 0xcd4c2356,0x490643b1,0x750547be,0x740a4851,0xd4944c04,0x643eaf29, + 0x299a98a0,0xba572479,0xee05fdf9,0x48b29f16,0x089b2d7b,0x33fb4f61 }, + { 0xa950f955,0x86704902,0xfedc3ddf,0x97e1034d,0x05fbb6a2,0x211320b6, + 0x432299bb,0x23d7b93f,0x8590e4a3,0x1fe1a057,0xf58c0ce6,0x8e1d0586 } }, +}; + +/* Multiply the base point of P384 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_base_12(sp_point_384* r, const sp_digit* k, + int map, void* heap) +{ + return sp_384_ecc_mulmod_stripe_12(r, &p384_base, p384_table, + k, map, heap); +} + +#endif + +/* Multiply the base point of P384 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_base_384(mp_int* km, ecc_point* r, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 p; + sp_digit kd[12]; +#endif + sp_point_384* point; + sp_digit* k = NULL; + int err = MP_OKAY; + + err = sp_384_point_new_12(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) { + err = MEMORY_E; + } + } +#else + k = kd; +#endif + if (err == MP_OKAY) { + sp_384_from_mp(k, 12, km); + + err = sp_384_ecc_mulmod_base_12(point, k, map, heap); + } + if (err == MP_OKAY) { + err = sp_384_point_to_ecc_point_12(point, r); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_12(point, 0, heap); + + return err; +} + +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_384_iszero_12(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8] | a[9] | a[10] | a[11]) == 0; +} + +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */ +/* Add 1 to a. (a = a + 1) + * + * a A single precision integer. + */ +static void sp_384_add_one_12(sp_digit* a) +{ + __asm__ __volatile__ ( + "ldr r1, [%[a], #0]\n\t" + "ldr r2, [%[a], #4]\n\t" + "ldr r3, [%[a], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "adds r1, r1, #1\n\t" + "adcs r2, r2, #0\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "str r1, [%[a], #0]\n\t" + "str r2, [%[a], #4]\n\t" + "str r3, [%[a], #8]\n\t" + "str r4, [%[a], #12]\n\t" + "ldr r1, [%[a], #16]\n\t" + "ldr r2, [%[a], #20]\n\t" + "ldr r3, [%[a], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "adcs r1, r1, #0\n\t" + "adcs r2, r2, #0\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "str r1, [%[a], #16]\n\t" + "str r2, [%[a], #20]\n\t" + "str r3, [%[a], #24]\n\t" + "str r4, [%[a], #28]\n\t" + "ldr r1, [%[a], #32]\n\t" + "ldr r2, [%[a], #36]\n\t" + "ldr r3, [%[a], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "adcs r1, r1, #0\n\t" + "adcs r2, r2, #0\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "str r1, [%[a], #32]\n\t" + "str r2, [%[a], #36]\n\t" + "str r3, [%[a], #40]\n\t" + "str r4, [%[a], #44]\n\t" + : + : [a] "r" (a) + : "memory", "r1", "r2", "r3", "r4" + ); +} + +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_384_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((sp_digit)a[i]) << s); + if (s >= 24U) { + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (sp_digit)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Generates a scalar that is in the range 1..order-1. + * + * rng Random number generator. + * k Scalar value. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +static int sp_384_ecc_gen_k_12(WC_RNG* rng, sp_digit* k) +{ + int err; + byte buf[48]; + + do { + err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf)); + if (err == 0) { + sp_384_from_bin(k, 12, buf, (int)sizeof(buf)); + if (sp_384_cmp_12(k, p384_order2) < 0) { + sp_384_add_one_12(k); + break; + } + } + } + while (err == 0); + + return err; +} + +/* Makes a random EC key pair. + * + * rng Random number generator. + * priv Generated private value. + * pub Generated public point. + * heap Heap to use for allocation. + * returns ECC_INF_E when the point does not have the correct order, RNG + * failures, MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 p; + sp_digit kd[12]; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_384 inf; +#endif +#endif + sp_point_384* point; + sp_digit* k = NULL; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_384* infinity; +#endif + int err; + + (void)heap; + + err = sp_384_point_new_12(heap, p, point); +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + if (err == MP_OKAY) { + err = sp_384_point_new_12(heap, inf, infinity); + } +#endif +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) { + err = MEMORY_E; + } + } +#else + k = kd; +#endif + + if (err == MP_OKAY) { + err = sp_384_ecc_gen_k_12(rng, k); + } + if (err == MP_OKAY) { + err = sp_384_ecc_mulmod_base_12(point, k, 1, NULL); + } + +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + if (err == MP_OKAY) { + err = sp_384_ecc_mulmod_12(infinity, point, p384_order, 1, NULL); + } + if (err == MP_OKAY) { + if ((sp_384_iszero_12(point->x) == 0) || (sp_384_iszero_12(point->y) == 0)) { + err = ECC_INF_E; + } + } +#endif + + if (err == MP_OKAY) { + err = sp_384_to_mp(k, priv); + } + if (err == MP_OKAY) { + err = sp_384_point_to_ecc_point_12(point, pub); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_384_point_free_12(infinity, 1, heap); +#endif + sp_384_point_free_12(point, 1, heap); + + return err; +} + +#ifdef HAVE_ECC_DHE +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 48 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_384_to_bin(sp_digit* r, byte* a) +{ + int i, j, s = 0, b; + + j = 384 / 8 - 1; + a[j] = 0; + for (i=0; i<12 && j>=0; i++) { + b = 0; + /* lint allow cast of mismatch sp_digit and int */ + a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ + b += 8 - s; + if (j < 0) { + break; + } + while (b < 32) { + a[j--] = (byte)(r[i] >> b); + b += 8; + if (j < 0) { + break; + } + } + s = 8 - (b - 32); + if (j >= 0) { + a[j] = 0; + } + if (s != 0) { + j++; + } + } +} + +/* Multiply the point by the scalar and serialize the X ordinate. + * The number is 0 padded to maximum size on output. + * + * priv Scalar to multiply the point by. + * pub Point to multiply. + * out Buffer to hold X ordinate. + * outLen On entry, size of the buffer in bytes. + * On exit, length of data in buffer in bytes. + * heap Heap to use for allocation. + * returns BUFFER_E if the buffer is to small for output size, + * MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_secret_gen_384(mp_int* priv, ecc_point* pub, byte* out, + word32* outLen, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 p; + sp_digit kd[12]; +#endif + sp_point_384* point = NULL; + sp_digit* k = NULL; + int err = MP_OKAY; + + if (*outLen < 48U) { + err = BUFFER_E; + } + + if (err == MP_OKAY) { + err = sp_384_point_new_12(heap, p, point); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#else + k = kd; +#endif + + if (err == MP_OKAY) { + sp_384_from_mp(k, 12, priv); + sp_384_point_from_ecc_point_12(point, pub); + err = sp_384_ecc_mulmod_12(point, point, k, 1, heap); + } + if (err == MP_OKAY) { + sp_384_to_bin(point->x, out); + *outLen = 48; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_12(point, 0, heap); + + return err; +} +#endif /* HAVE_ECC_DHE */ + +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into a. (a -= b) + * + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_384_sub_in_place_12(sp_digit* a, const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r14, #0\n\t" + "add r12, %[a], #48\n\t" + "\n1:\n\t" + "subs %[c], r14, %[c]\n\t" + "ldr r3, [%[a]]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[a], #8]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b]], #4\n\t" + "ldr r8, [%[b]], #4\n\t" + "ldr r9, [%[b]], #4\n\t" + "ldr r10, [%[b]], #4\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "str r3, [%[a]], #4\n\t" + "str r4, [%[a]], #4\n\t" + "str r5, [%[a]], #4\n\t" + "str r6, [%[a]], #4\n\t" + "sbc %[c], r14, r14\n\t" + "cmp %[a], r12\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14" + ); + + return c; +} + +#else +/* Sub b from a into a. (a -= b) + * + * a A single precision integer and result. + * b A single precision integer. + */ +static sp_digit sp_384_sub_in_place_12(sp_digit* a, const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldr r2, [%[a], #0]\n\t" + "ldr r3, [%[a], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[a], #12]\n\t" + "ldr r6, [%[b], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" + "ldr r8, [%[b], #8]\n\t" + "ldr r9, [%[b], #12]\n\t" + "subs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #0]\n\t" + "str r3, [%[a], #4]\n\t" + "str r4, [%[a], #8]\n\t" + "str r5, [%[a], #12]\n\t" + "ldr r2, [%[a], #16]\n\t" + "ldr r3, [%[a], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[a], #28]\n\t" + "ldr r6, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" + "ldr r8, [%[b], #24]\n\t" + "ldr r9, [%[b], #28]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #16]\n\t" + "str r3, [%[a], #20]\n\t" + "str r4, [%[a], #24]\n\t" + "str r5, [%[a], #28]\n\t" + "ldr r2, [%[a], #32]\n\t" + "ldr r3, [%[a], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[a], #44]\n\t" + "ldr r6, [%[b], #32]\n\t" + "ldr r7, [%[b], #36]\n\t" + "ldr r8, [%[b], #40]\n\t" + "ldr r9, [%[b], #44]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #32]\n\t" + "str r3, [%[a], #36]\n\t" + "str r4, [%[a], #40]\n\t" + "str r5, [%[a], #44]\n\t" + "sbc %[c], r9, r9\n\t" + : [c] "+r" (c) + : [a] "r" (a), [b] "r" (b) + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov r10, #0\n\t" + "# A[0] * B\n\t" + "ldr r8, [%[a]]\n\t" + "umull r5, r3, %[b], r8\n\t" + "mov r4, #0\n\t" + "str r5, [%[r]]\n\t" + "mov r5, #0\n\t" + "mov r9, #4\n\t" + "1:\n\t" + "ldr r8, [%[a], r9]\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], r9]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "add r9, r9, #4\n\t" + "cmp r9, #48\n\t" + "blt 1b\n\t" + "str r3, [%[r], #48]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); +#else + __asm__ __volatile__ ( + "mov r10, #0\n\t" + "# A[0] * B\n\t" + "ldr r8, [%[a]]\n\t" + "umull r3, r4, %[b], r8\n\t" + "mov r5, #0\n\t" + "str r3, [%[r]]\n\t" + "# A[1] * B\n\t" + "ldr r8, [%[a], #4]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #4]\n\t" + "# A[2] * B\n\t" + "ldr r8, [%[a], #8]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #8]\n\t" + "# A[3] * B\n\t" + "ldr r8, [%[a], #12]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #12]\n\t" + "# A[4] * B\n\t" + "ldr r8, [%[a], #16]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #16]\n\t" + "# A[5] * B\n\t" + "ldr r8, [%[a], #20]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #20]\n\t" + "# A[6] * B\n\t" + "ldr r8, [%[a], #24]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #24]\n\t" + "# A[7] * B\n\t" + "ldr r8, [%[a], #28]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #28]\n\t" + "# A[8] * B\n\t" + "ldr r8, [%[a], #32]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #32]\n\t" + "# A[9] * B\n\t" + "ldr r8, [%[a], #36]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #36]\n\t" + "# A[10] * B\n\t" + "ldr r8, [%[a], #40]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #40]\n\t" + "# A[11] * B\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "str r5, [%[r], #44]\n\t" + "str r3, [%[r], #48]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); +#endif +} + +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +static sp_digit div_384_word_12(sp_digit d1, sp_digit d0, sp_digit div) +{ + sp_digit r = 0; + + __asm__ __volatile__ ( + "lsr r5, %[div], #1\n\t" + "add r5, r5, #1\n\t" + "mov r6, %[d0]\n\t" + "mov r7, %[d1]\n\t" + "# Do top 32\n\t" + "subs r8, r5, r7\n\t" + "sbc r8, r8, r8\n\t" + "add %[r], %[r], %[r]\n\t" + "sub %[r], %[r], r8\n\t" + "and r8, r8, r5\n\t" + "subs r7, r7, r8\n\t" + "# Next 30 bits\n\t" + "mov r4, #29\n\t" + "1:\n\t" + "movs r6, r6, lsl #1\n\t" + "adc r7, r7, r7\n\t" + "subs r8, r5, r7\n\t" + "sbc r8, r8, r8\n\t" + "add %[r], %[r], %[r]\n\t" + "sub %[r], %[r], r8\n\t" + "and r8, r8, r5\n\t" + "subs r7, r7, r8\n\t" + "subs r4, r4, #1\n\t" + "bpl 1b\n\t" + "add %[r], %[r], %[r]\n\t" + "add %[r], %[r], #1\n\t" + "umull r4, r5, %[r], %[div]\n\t" + "subs r4, %[d0], r4\n\t" + "sbc r5, %[d1], r5\n\t" + "add %[r], %[r], r5\n\t" + "umull r4, r5, %[r], %[div]\n\t" + "subs r4, %[d0], r4\n\t" + "sbc r5, %[d1], r5\n\t" + "add %[r], %[r], r5\n\t" + "subs r8, %[div], r4\n\t" + "sbc r8, r8, r8\n\t" + "sub %[r], %[r], r8\n\t" + : [r] "+r" (r) + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) + : "r4", "r5", "r6", "r7", "r8" + ); + return r; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_384_mask_12(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<12; i++) { + r[i] = a[i] & m; + } +#else + r[0] = a[0] & m; + r[1] = a[1] & m; + r[2] = a[2] & m; + r[3] = a[3] & m; + r[4] = a[4] & m; + r[5] = a[5] & m; + r[6] = a[6] & m; + r[7] = a[7] & m; + r[8] = a[8] & m; + r[9] = a[9] & m; + r[10] = a[10] & m; + r[11] = a[11] & m; +#endif +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_384_div_12(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[24], t2[13]; + sp_digit div, r1; + int i; + + (void)m; + + + div = d[11]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 12); + for (i=11; i>=0; i--) { + r1 = div_384_word_12(t1[12 + i], t1[12 + i - 1], div); + + sp_384_mul_d_12(t2, d, r1); + t1[12 + i] += sp_384_sub_in_place_12(&t1[i], t2); + t1[12 + i] -= t2[12]; + sp_384_mask_12(t2, d, t1[12 + i]); + t1[12 + i] += sp_384_add_12(&t1[i], &t1[i], t2); + sp_384_mask_12(t2, d, t1[12 + i]); + t1[12 + i] += sp_384_add_12(&t1[i], &t1[i], t2); + } + + r1 = sp_384_cmp_12(t1, d) >= 0; + sp_384_cond_sub_12(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_384_mod_12(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_384_div_12(a, m, NULL, r); +} + +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#ifdef WOLFSSL_SP_SMALL +/* Order-2 for the P384 curve. */ +static const uint32_t p384_order_minus_2[12] = { + 0xccc52971U,0xecec196aU,0x48b0a77aU,0x581a0db2U,0xf4372ddfU,0xc7634d81U, + 0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU +}; +#else +/* The low half of the order-2 of the P384 curve. */ +static const uint32_t p384_order_low[6] = { + 0xccc52971U,0xecec196aU,0x48b0a77aU,0x581a0db2U,0xf4372ddfU,0xc7634d81U + +}; +#endif /* WOLFSSL_SP_SMALL */ + +/* Multiply two number mod the order of P384 curve. (r = a * b mod order) + * + * r Result of the multiplication. + * a First operand of the multiplication. + * b Second operand of the multiplication. + */ +static void sp_384_mont_mul_order_12(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_384_mul_12(r, a, b); + sp_384_mont_reduce_order_12(r, p384_order, p384_mp_order); +} + +/* Square number mod the order of P384 curve. (r = a * a mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_384_mont_sqr_order_12(sp_digit* r, const sp_digit* a) +{ + sp_384_sqr_12(r, a); + sp_384_mont_reduce_order_12(r, p384_order, p384_mp_order); +} + +#ifndef WOLFSSL_SP_SMALL +/* Square number mod the order of P384 curve a number of times. + * (r = a ^ n mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_384_mont_sqr_n_order_12(sp_digit* r, const sp_digit* a, int n) +{ + int i; + + sp_384_mont_sqr_order_12(r, a); + for (i=1; i=0; i--) { + sp_384_mont_sqr_order_12(t, t); + if ((p384_order_minus_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_384_mont_mul_order_12(t, t, a); + } + } + XMEMCPY(r, t, sizeof(sp_digit) * 12U); +#else + sp_digit* t = td; + sp_digit* t2 = td + 2 * 12; + sp_digit* t3 = td + 4 * 12; + int i; + + /* t = a^2 */ + sp_384_mont_sqr_order_12(t, a); + /* t = a^3 = t * a */ + sp_384_mont_mul_order_12(t, t, a); + /* t2= a^c = t ^ 2 ^ 2 */ + sp_384_mont_sqr_n_order_12(t2, t, 2); + /* t = a^f = t2 * t */ + sp_384_mont_mul_order_12(t, t2, t); + /* t2= a^f0 = t ^ 2 ^ 4 */ + sp_384_mont_sqr_n_order_12(t2, t, 4); + /* t = a^ff = t2 * t */ + sp_384_mont_mul_order_12(t, t2, t); + /* t2= a^ff00 = t ^ 2 ^ 8 */ + sp_384_mont_sqr_n_order_12(t2, t, 8); + /* t3= a^ffff = t2 * t */ + sp_384_mont_mul_order_12(t3, t2, t); + /* t2= a^ffff0000 = t3 ^ 2 ^ 16 */ + sp_384_mont_sqr_n_order_12(t2, t3, 16); + /* t = a^ffffffff = t2 * t3 */ + sp_384_mont_mul_order_12(t, t2, t3); + /* t2= a^ffffffff0000 = t ^ 2 ^ 16 */ + sp_384_mont_sqr_n_order_12(t2, t, 16); + /* t = a^ffffffffffff = t2 * t3 */ + sp_384_mont_mul_order_12(t, t2, t3); + /* t2= a^ffffffffffff000000000000 = t ^ 2 ^ 48 */ + sp_384_mont_sqr_n_order_12(t2, t, 48); + /* t= a^fffffffffffffffffffffffff = t2 * t */ + sp_384_mont_mul_order_12(t, t2, t); + /* t2= a^ffffffffffffffffffffffff000000000000000000000000 */ + sp_384_mont_sqr_n_order_12(t2, t, 96); + /* t2= a^ffffffffffffffffffffffffffffffffffffffffffffffff = t2 * t */ + sp_384_mont_mul_order_12(t2, t2, t); + for (i=191; i>=1; i--) { + sp_384_mont_sqr_order_12(t2, t2); + if (((sp_digit)p384_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_384_mont_mul_order_12(t2, t2, a); + } + } + sp_384_mont_sqr_order_12(t2, t2); + sp_384_mont_mul_order_12(r, t2, a); +#endif /* WOLFSSL_SP_SMALL */ +} + +#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */ +#ifdef HAVE_ECC_SIGN +#ifndef SP_ECC_MAX_SIG_GEN +#define SP_ECC_MAX_SIG_GEN 64 +#endif + +/* Sign the hash using the private key. + * e = [hash, 384 bits] from binary + * r = (k.G)->x mod order + * s = (r * x + e) / k mod order + * The hash is truncated to the first 384 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv, + mp_int* rm, mp_int* sm, mp_int* km, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit ed[2*12]; + sp_digit xd[2*12]; + sp_digit kd[2*12]; + sp_digit rd[2*12]; + sp_digit td[3 * 2*12]; + sp_point_384 p; +#endif + sp_digit* e = NULL; + sp_digit* x = NULL; + sp_digit* k = NULL; + sp_digit* r = NULL; + sp_digit* tmp = NULL; + sp_point_384* point = NULL; + sp_digit carry; + sp_digit* s = NULL; + sp_digit* kInv = NULL; + int err = MP_OKAY; + int32_t c; + int i; + + (void)heap; + + err = sp_384_point_new_12(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 12, heap, + DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + e = d + 0 * 12; + x = d + 2 * 12; + k = d + 4 * 12; + r = d + 6 * 12; + tmp = d + 8 * 12; +#else + e = ed; + x = xd; + k = kd; + r = rd; + tmp = td; +#endif + s = e; + kInv = k; + + if (hashLen > 48U) { + hashLen = 48U; + } + + sp_384_from_bin(e, 12, hash, (int)hashLen); + } + + for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) { + sp_384_from_mp(x, 12, priv); + + /* New random point. */ + if (km == NULL || mp_iszero(km)) { + err = sp_384_ecc_gen_k_12(rng, k); + } + else { + sp_384_from_mp(k, 12, km); + mp_zero(km); + } + if (err == MP_OKAY) { + err = sp_384_ecc_mulmod_base_12(point, k, 1, NULL); + } + + if (err == MP_OKAY) { + /* r = point->x mod order */ + XMEMCPY(r, point->x, sizeof(sp_digit) * 12U); + sp_384_norm_12(r); + c = sp_384_cmp_12(r, p384_order); + sp_384_cond_sub_12(r, r, p384_order, 0L - (sp_digit)(c >= 0)); + sp_384_norm_12(r); + + /* Conv k to Montgomery form (mod order) */ + sp_384_mul_12(k, k, p384_norm_order); + err = sp_384_mod_12(k, k, p384_order); + } + if (err == MP_OKAY) { + sp_384_norm_12(k); + /* kInv = 1/k mod order */ + sp_384_mont_inv_order_12(kInv, k, tmp); + sp_384_norm_12(kInv); + + /* s = r * x + e */ + sp_384_mul_12(x, x, r); + err = sp_384_mod_12(x, x, p384_order); + } + if (err == MP_OKAY) { + sp_384_norm_12(x); + carry = sp_384_add_12(s, e, x); + sp_384_cond_sub_12(s, s, p384_order, 0 - carry); + sp_384_norm_12(s); + c = sp_384_cmp_12(s, p384_order); + sp_384_cond_sub_12(s, s, p384_order, 0L - (sp_digit)(c >= 0)); + sp_384_norm_12(s); + + /* s = s * k^-1 mod order */ + sp_384_mont_mul_order_12(s, s, kInv); + sp_384_norm_12(s); + + /* Check that signature is usable. */ + if (sp_384_iszero_12(s) == 0) { + break; + } + } + } + + if (i == 0) { + err = RNG_FAILURE_E; + } + + if (err == MP_OKAY) { + err = sp_384_to_mp(r, rm); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(s, sm); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 8 * 12); + XFREE(d, heap, DYNAMIC_TYPE_ECC); + } +#else + XMEMSET(e, 0, sizeof(sp_digit) * 2U * 12U); + XMEMSET(x, 0, sizeof(sp_digit) * 2U * 12U); + XMEMSET(k, 0, sizeof(sp_digit) * 2U * 12U); + XMEMSET(r, 0, sizeof(sp_digit) * 2U * 12U); + XMEMSET(r, 0, sizeof(sp_digit) * 2U * 12U); + XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 12U); +#endif + sp_384_point_free_12(point, 1, heap); + + return err; +} +#endif /* HAVE_ECC_SIGN */ + +#ifdef HAVE_ECC_VERIFY +/* Verify the signature values with the hash and public key. + * e = Truncate(hash, 384) + * u1 = e/s mod order + * u2 = r/s mod order + * r == (u1.G + u2.Q)->x mod order + * Optimization: Leave point in projective form. + * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z') + * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' + * The hash is truncated to the first 384 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +int sp_ecc_verify_384(const byte* hash, word32 hashLen, mp_int* pX, + mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit u1d[2*12]; + sp_digit u2d[2*12]; + sp_digit sd[2*12]; + sp_digit tmpd[2*12 * 5]; + sp_point_384 p1d; + sp_point_384 p2d; +#endif + sp_digit* u1 = NULL; + sp_digit* u2 = NULL; + sp_digit* s = NULL; + sp_digit* tmp = NULL; + sp_point_384* p1; + sp_point_384* p2 = NULL; + sp_digit carry; + int32_t c; + int err; + + err = sp_384_point_new_12(heap, p1d, p1); + if (err == MP_OKAY) { + err = sp_384_point_new_12(heap, p2d, p2); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 12, heap, + DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + u1 = d + 0 * 12; + u2 = d + 2 * 12; + s = d + 4 * 12; + tmp = d + 6 * 12; +#else + u1 = u1d; + u2 = u2d; + s = sd; + tmp = tmpd; +#endif + + if (hashLen > 48U) { + hashLen = 48U; + } + + sp_384_from_bin(u1, 12, hash, (int)hashLen); + sp_384_from_mp(u2, 12, r); + sp_384_from_mp(s, 12, sm); + sp_384_from_mp(p2->x, 12, pX); + sp_384_from_mp(p2->y, 12, pY); + sp_384_from_mp(p2->z, 12, pZ); + + { + sp_384_mul_12(s, s, p384_norm_order); + } + err = sp_384_mod_12(s, s, p384_order); + } + if (err == MP_OKAY) { + sp_384_norm_12(s); + { + sp_384_mont_inv_order_12(s, s, tmp); + sp_384_mont_mul_order_12(u1, u1, s); + sp_384_mont_mul_order_12(u2, u2, s); + } + + err = sp_384_ecc_mulmod_base_12(p1, u1, 0, heap); + } + if (err == MP_OKAY) { + err = sp_384_ecc_mulmod_12(p2, p2, u2, 0, heap); + } + + if (err == MP_OKAY) { + { + sp_384_proj_point_add_12(p1, p1, p2, tmp); + if (sp_384_iszero_12(p1->z)) { + if (sp_384_iszero_12(p1->x) && sp_384_iszero_12(p1->y)) { + sp_384_proj_point_dbl_12(p1, p2, tmp); + } + else { + /* Y ordinate is not used from here - don't set. */ + p1->x[0] = 0; + p1->x[1] = 0; + p1->x[2] = 0; + p1->x[3] = 0; + p1->x[4] = 0; + p1->x[5] = 0; + p1->x[6] = 0; + p1->x[7] = 0; + p1->x[8] = 0; + p1->x[9] = 0; + p1->x[10] = 0; + p1->x[11] = 0; + XMEMCPY(p1->z, p384_norm_mod, sizeof(p384_norm_mod)); + } + } + } + + /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ + /* Reload r and convert to Montgomery form. */ + sp_384_from_mp(u2, 12, r); + err = sp_384_mod_mul_norm_12(u2, u2, p384_mod); + } + + if (err == MP_OKAY) { + /* u1 = r.z'.z' mod prime */ + sp_384_mont_sqr_12(p1->z, p1->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(u1, u2, p1->z, p384_mod, p384_mp_mod); + *res = (int)(sp_384_cmp_12(p1->x, u1) == 0); + if (*res == 0) { + /* Reload r and add order. */ + sp_384_from_mp(u2, 12, r); + carry = sp_384_add_12(u2, u2, p384_order); + /* Carry means result is greater than mod and is not valid. */ + if (carry == 0) { + sp_384_norm_12(u2); + + /* Compare with mod and if greater or equal then not valid. */ + c = sp_384_cmp_12(u2, p384_mod); + if (c < 0) { + /* Convert to Montogomery form */ + err = sp_384_mod_mul_norm_12(u2, u2, p384_mod); + if (err == MP_OKAY) { + /* u1 = (r + 1*order).z'.z' mod prime */ + sp_384_mont_mul_12(u1, u2, p1->z, p384_mod, + p384_mp_mod); + *res = (int)(sp_384_cmp_12(p1->x, u1) == 0); + } + } + } + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) + XFREE(d, heap, DYNAMIC_TYPE_ECC); +#endif + sp_384_point_free_12(p1, 0, heap); + sp_384_point_free_12(p2, 0, heap); + + return err; +} +#endif /* HAVE_ECC_VERIFY */ + +#ifdef HAVE_ECC_CHECK_KEY +/* Check that the x and y oridinates are a valid point on the curve. + * + * point EC point. + * heap Heap to use if dynamically allocating. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +static int sp_384_ecc_is_point_12(sp_point_384* point, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit t1d[2*12]; + sp_digit t2d[2*12]; +#endif + sp_digit* t1; + sp_digit* t2; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12 * 4, heap, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = d + 0 * 12; + t2 = d + 2 * 12; +#else + (void)heap; + + t1 = t1d; + t2 = t2d; +#endif + + sp_384_sqr_12(t1, point->y); + (void)sp_384_mod_12(t1, t1, p384_mod); + sp_384_sqr_12(t2, point->x); + (void)sp_384_mod_12(t2, t2, p384_mod); + sp_384_mul_12(t2, t2, point->x); + (void)sp_384_mod_12(t2, t2, p384_mod); + (void)sp_384_sub_12(t2, p384_mod, t2); + sp_384_mont_add_12(t1, t1, t2, p384_mod); + + sp_384_mont_add_12(t1, t1, point->x, p384_mod); + sp_384_mont_add_12(t1, t1, point->x, p384_mod); + sp_384_mont_add_12(t1, t1, point->x, p384_mod); + + if (sp_384_cmp_12(t1, p384_b) != 0) { + err = MP_VAL; + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, heap, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + +/* Check that the x and y oridinates are a valid point on the curve. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +int sp_ecc_is_point_384(mp_int* pX, mp_int* pY) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 pubd; +#endif + sp_point_384* pub; + byte one[1] = { 1 }; + int err; + + err = sp_384_point_new_12(NULL, pubd, pub); + if (err == MP_OKAY) { + sp_384_from_mp(pub->x, 12, pX); + sp_384_from_mp(pub->y, 12, pY); + sp_384_from_bin(pub->z, 12, one, (int)sizeof(one)); + + err = sp_384_ecc_is_point_12(pub, NULL); + } + + sp_384_point_free_12(pub, 0, NULL); + + return err; +} + +/* Check that the private scalar generates the EC point (px, py), the point is + * on the curve and the point has the correct order. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * privm Private scalar that generates EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve, ECC_INF_E if the point does not have the correct order, + * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and + * MP_OKAY otherwise. + */ +int sp_ecc_check_key_384(mp_int* pX, mp_int* pY, mp_int* privm, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit privd[12]; + sp_point_384 pubd; + sp_point_384 pd; +#endif + sp_digit* priv = NULL; + sp_point_384* pub; + sp_point_384* p = NULL; + byte one[1] = { 1 }; + int err; + + err = sp_384_point_new_12(heap, pubd, pub); + if (err == MP_OKAY) { + err = sp_384_point_new_12(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap, + DYNAMIC_TYPE_ECC); + if (priv == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + priv = privd; +#endif + + sp_384_from_mp(pub->x, 12, pX); + sp_384_from_mp(pub->y, 12, pY); + sp_384_from_bin(pub->z, 12, one, (int)sizeof(one)); + sp_384_from_mp(priv, 12, privm); + + /* Check point at infinitiy. */ + if ((sp_384_iszero_12(pub->x) != 0) && + (sp_384_iszero_12(pub->y) != 0)) { + err = ECC_INF_E; + } + } + + if (err == MP_OKAY) { + /* Check range of X and Y */ + if (sp_384_cmp_12(pub->x, p384_mod) >= 0 || + sp_384_cmp_12(pub->y, p384_mod) >= 0) { + err = ECC_OUT_OF_RANGE_E; + } + } + + if (err == MP_OKAY) { + /* Check point is on curve */ + err = sp_384_ecc_is_point_12(pub, heap); + } + + if (err == MP_OKAY) { + /* Point * order = infinity */ + err = sp_384_ecc_mulmod_12(p, pub, p384_order, 1, heap); + } + if (err == MP_OKAY) { + /* Check result is infinity */ + if ((sp_384_iszero_12(p->x) == 0) || + (sp_384_iszero_12(p->y) == 0)) { + err = ECC_INF_E; + } + } + + if (err == MP_OKAY) { + /* Base * private = point */ + err = sp_384_ecc_mulmod_base_12(p, priv, 1, heap); + } + if (err == MP_OKAY) { + /* Check result is public key */ + if (sp_384_cmp_12(p->x, pub->x) != 0 || + sp_384_cmp_12(p->y, pub->y) != 0) { + err = ECC_PRIV_KEY_E; + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (priv != NULL) { + XFREE(priv, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_12(p, 0, heap); + sp_384_point_free_12(pub, 0, heap); + + return err; +} +#endif +#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL +/* Add two projective EC points together. + * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ) + * + * pX First EC point's X ordinate. + * pY First EC point's Y ordinate. + * pZ First EC point's Z ordinate. + * qX Second EC point's X ordinate. + * qY Second EC point's Y ordinate. + * qZ Second EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* qX, mp_int* qY, mp_int* qZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 12 * 5]; + sp_point_384 pd; + sp_point_384 qd; +#endif + sp_digit* tmp; + sp_point_384* p; + sp_point_384* q = NULL; + int err; + + err = sp_384_point_new_12(NULL, pd, p); + if (err == MP_OKAY) { + err = sp_384_point_new_12(NULL, qd, q); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 5, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + sp_384_from_mp(p->x, 12, pX); + sp_384_from_mp(p->y, 12, pY); + sp_384_from_mp(p->z, 12, pZ); + sp_384_from_mp(q->x, 12, qX); + sp_384_from_mp(q->y, 12, qY); + sp_384_from_mp(q->z, 12, qZ); + + sp_384_proj_point_add_12(p, p, q, tmp); + } + + if (err == MP_OKAY) { + err = sp_384_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->z, rZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_12(q, 0, NULL); + sp_384_point_free_12(p, 0, NULL); + + return err; +} + +/* Double a projective EC point. + * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ) + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 12 * 2]; + sp_point_384 pd; +#endif + sp_digit* tmp; + sp_point_384* p; + int err; + + err = sp_384_point_new_12(NULL, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 2, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + sp_384_from_mp(p->x, 12, pX); + sp_384_from_mp(p->y, 12, pY); + sp_384_from_mp(p->z, 12, pZ); + + sp_384_proj_point_dbl_12(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_384_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->z, rZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_12(p, 0, NULL); + + return err; +} + +/* Map a projective EC point to affine in place. + * pZ will be one. + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 12 * 6]; + sp_point_384 pd; +#endif + sp_digit* tmp; + sp_point_384* p; + int err; + + err = sp_384_point_new_12(NULL, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + if (err == MP_OKAY) { + sp_384_from_mp(p->x, 12, pX); + sp_384_from_mp(p->y, 12, pY); + sp_384_from_mp(p->z, 12, pZ); + + sp_384_map_12(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_384_to_mp(p->x, pX); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->y, pY); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->z, pZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_12(p, 0, NULL); + + return err; +} +#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */ +#ifdef HAVE_COMP_KEY +/* Find the square root of a number mod the prime of the curve. + * + * y The number to operate on and the result. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +static int sp_384_mont_sqrt_12(sp_digit* y) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d; +#else + sp_digit t1d[2 * 12]; + sp_digit t2d[2 * 12]; + sp_digit t3d[2 * 12]; + sp_digit t4d[2 * 12]; + sp_digit t5d[2 * 12]; +#endif + sp_digit* t1; + sp_digit* t2; + sp_digit* t3; + sp_digit* t4; + sp_digit* t5; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 2 * 12, NULL, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = d + 0 * 12; + t2 = d + 2 * 12; + t3 = d + 4 * 12; + t4 = d + 6 * 12; + t5 = d + 8 * 12; +#else + t1 = t1d; + t2 = t2d; + t3 = t3d; + t4 = t4d; + t5 = t5d; +#endif + + { + /* t2 = y ^ 0x2 */ + sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x3 */ + sp_384_mont_mul_12(t1, t2, y, p384_mod, p384_mp_mod); + /* t5 = y ^ 0xc */ + sp_384_mont_sqr_n_12(t5, t1, 2, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xf */ + sp_384_mont_mul_12(t1, t1, t5, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x1e */ + sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod); + /* t3 = y ^ 0x1f */ + sp_384_mont_mul_12(t3, t2, y, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3e0 */ + sp_384_mont_sqr_n_12(t2, t3, 5, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x3ff */ + sp_384_mont_mul_12(t1, t3, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x7fe0 */ + sp_384_mont_sqr_n_12(t2, t1, 5, p384_mod, p384_mp_mod); + /* t3 = y ^ 0x7fff */ + sp_384_mont_mul_12(t3, t3, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3fff800 */ + sp_384_mont_sqr_n_12(t2, t3, 15, p384_mod, p384_mp_mod); + /* t4 = y ^ 0x3ffffff */ + sp_384_mont_mul_12(t4, t3, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0xffffffc000000 */ + sp_384_mont_sqr_n_12(t2, t4, 30, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xfffffffffffff */ + sp_384_mont_mul_12(t1, t4, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0xfffffffffffffff000000000000000 */ + sp_384_mont_sqr_n_12(t2, t1, 60, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xffffffffffffffffffffffffffffff */ + sp_384_mont_mul_12(t1, t1, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */ + sp_384_mont_sqr_n_12(t2, t1, 120, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_384_mont_mul_12(t1, t1, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */ + sp_384_mont_sqr_n_12(t2, t1, 15, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_384_mont_mul_12(t1, t3, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff80000000 */ + sp_384_mont_sqr_n_12(t2, t1, 31, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff */ + sp_384_mont_mul_12(t1, t4, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff0 */ + sp_384_mont_sqr_n_12(t2, t1, 4, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc */ + sp_384_mont_mul_12(t1, t5, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000 */ + sp_384_mont_sqr_n_12(t2, t1, 62, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000001 */ + sp_384_mont_mul_12(t1, y, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc00000000000000040000000 */ + sp_384_mont_sqr_n_12(y, t1, 30, p384_mod, p384_mp_mod); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + + +/* Uncompress the point given the X ordinate. + * + * xm X ordinate. + * odd Whether the Y ordinate is odd. + * ym Calculated Y ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d; +#else + sp_digit xd[2 * 12]; + sp_digit yd[2 * 12]; +#endif + sp_digit* x = NULL; + sp_digit* y = NULL; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 12, NULL, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + x = d + 0 * 12; + y = d + 2 * 12; +#else + x = xd; + y = yd; +#endif + + sp_384_from_mp(x, 12, xm); + err = sp_384_mod_mul_norm_12(x, x, p384_mod); + } + if (err == MP_OKAY) { + /* y = x^3 */ + { + sp_384_mont_sqr_12(y, x, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(y, y, x, p384_mod, p384_mp_mod); + } + /* y = x^3 - 3x */ + sp_384_mont_sub_12(y, y, x, p384_mod); + sp_384_mont_sub_12(y, y, x, p384_mod); + sp_384_mont_sub_12(y, y, x, p384_mod); + /* y = x^3 - 3x + b */ + err = sp_384_mod_mul_norm_12(x, p384_b, p384_mod); + } + if (err == MP_OKAY) { + sp_384_mont_add_12(y, y, x, p384_mod); + /* y = sqrt(x^3 - 3x + b) */ + err = sp_384_mont_sqrt_12(y); + } + if (err == MP_OKAY) { + XMEMSET(y + 12, 0, 12U * sizeof(sp_digit)); + sp_384_mont_reduce_12(y, p384_mod, p384_mp_mod); + if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) { + sp_384_mont_sub_12(y, p384_mod, y, p384_mod); + } + + err = sp_384_to_mp(y, ym); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} +#endif +#endif /* WOLFSSL_SP_384 */ +#endif /* WOLFSSL_HAVE_SP_ECC */ +#endif /* WOLFSSL_SP_ARM32_ASM */ +#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_ECC */ diff --git a/client/wolfssl/wolfcrypt/src/sp_arm64.c b/client/wolfssl/wolfcrypt/src/sp_arm64.c new file mode 100644 index 0000000..ebebe2a --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/sp_arm64.c @@ -0,0 +1,42082 @@ +/* sp.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* Implementation by Sean Parkinson. */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include +#include +#include +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) || \ + defined(WOLFSSL_HAVE_SP_ECC) + +#ifdef RSA_LOW_MEM +#ifndef WOLFSSL_SP_SMALL +#define WOLFSSL_SP_SMALL +#endif +#endif + +#include + +#ifdef WOLFSSL_SP_ARM64_ASM +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) +#ifndef WOLFSSL_SP_NO_2048 +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_2048_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j; + byte* d; + + for (i = n - 1,j = 0; i >= 7; i -= 8) { + r[j] = ((sp_digit)a[i - 0] << 0) | + ((sp_digit)a[i - 1] << 8) | + ((sp_digit)a[i - 2] << 16) | + ((sp_digit)a[i - 3] << 24) | + ((sp_digit)a[i - 4] << 32) | + ((sp_digit)a[i - 5] << 40) | + ((sp_digit)a[i - 6] << 48) | + ((sp_digit)a[i - 7] << 56); + j++; + } + + if (i >= 0) { + r[j] = 0; + + d = (byte*)r; + switch (i) { + case 6: d[n - 1 - 6] = a[6]; //fallthrough + case 5: d[n - 1 - 5] = a[5]; //fallthrough + case 4: d[n - 1 - 4] = a[4]; //fallthrough + case 3: d[n - 1 - 3] = a[3]; //fallthrough + case 2: d[n - 1 - 2] = a[2]; //fallthrough + case 1: d[n - 1 - 1] = a[1]; //fallthrough + case 0: d[n - 1 - 0] = a[0]; //fallthrough + } + j++; + } + + for (; j < size; j++) { + r[j] = 0; + } +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 64 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 64 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0xffffffffffffffffl; + s = 64U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 64U) <= (word32)DIGIT_BIT) { + s += 64U; + r[j] &= 0xffffffffffffffffl; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 64) { + r[j] &= 0xffffffffffffffffl; + if (j + 1 >= size) { + break; + } + s = 64 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 256 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_2048_to_bin(sp_digit* r, byte* a) +{ + int i, j; + + for (i = 31, j = 0; i >= 0; i--) { + a[j++] = r[i] >> 56; + a[j++] = r[i] >> 48; + a[j++] = r[i] >> 40; + a[j++] = r[i] >> 32; + a[j++] = r[i] >> 24; + a[j++] = r[i] >> 16; + a[j++] = r[i] >> 8; + a[j++] = r[i] >> 0; + } +} + +#ifndef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_digit tmp[8]; + + __asm__ __volatile__ ( + "ldp x9, x10, [%[a], 0]\n\t" + "ldp x11, x12, [%[a], 16]\n\t" + "ldp x13, x14, [%[a], 32]\n\t" + "ldp x15, x16, [%[a], 48]\n\t" + "ldp x17, x19, [%[b], 0]\n\t" + "ldp x20, x21, [%[b], 16]\n\t" + "ldp x22, x23, [%[b], 32]\n\t" + "ldp x24, x25, [%[b], 48]\n\t" + "# A[0] * B[0]\n\t" + "mul x4, x9, x17\n\t" + "umulh x5, x9, x17\n\t" + "str x4, [%[tmp]]\n\t" + "# A[0] * B[1]\n\t" + "mul x7, x9, x19\n\t" + "umulh x8, x9, x19\n\t" + "adds x5, x5, x7\n\t" + "# A[1] * B[0]\n\t" + "mul x7, x10, x17\n\t" + "adc x6, xzr, x8\n\t" + "umulh x8, x10, x17\n\t" + "adds x5, x5, x7\n\t" + "adcs x6, x6, x8\n\t" + "str x5, [%[tmp], 8]\n\t" + "adc x4, xzr, xzr\n\t" + "# A[0] * B[2]\n\t" + "mul x7, x9, x20\n\t" + "umulh x8, x9, x20\n\t" + "adds x6, x6, x7\n\t" + "# A[1] * B[1]\n\t" + "mul x7, x10, x19\n\t" + "adcs x4, x4, x8\n\t" + "umulh x8, x10, x19\n\t" + "adc x5, xzr, xzr\n\t" + "adds x6, x6, x7\n\t" + "# A[2] * B[0]\n\t" + "mul x7, x11, x17\n\t" + "adcs x4, x4, x8\n\t" + "umulh x8, x11, x17\n\t" + "adc x5, x5, xzr\n\t" + "adds x6, x6, x7\n\t" + "adcs x4, x4, x8\n\t" + "str x6, [%[tmp], 16]\n\t" + "adc x5, x5, xzr\n\t" + "# A[0] * B[3]\n\t" + "mul x7, x9, x21\n\t" + "umulh x8, x9, x21\n\t" + "adds x4, x4, x7\n\t" + "# A[1] * B[2]\n\t" + "mul x7, x10, x20\n\t" + "adcs x5, x5, x8\n\t" + "umulh x8, x10, x20\n\t" + "adc x6, xzr, xzr\n\t" + "adds x4, x4, x7\n\t" + "# A[2] * B[1]\n\t" + "mul x7, x11, x19\n\t" + "adcs x5, x5, x8\n\t" + "umulh x8, x11, x19\n\t" + "adc x6, x6, xzr\n\t" + "adds x4, x4, x7\n\t" + "# A[3] * B[0]\n\t" + "mul x7, x12, x17\n\t" + "adcs x5, x5, x8\n\t" + "umulh x8, x12, x17\n\t" + "adc x6, x6, xzr\n\t" + "adds x4, x4, x7\n\t" + "adcs x5, x5, x8\n\t" + "str x4, [%[tmp], 24]\n\t" + "adc x6, x6, xzr\n\t" + "# A[0] * B[4]\n\t" + "mul x7, x9, x22\n\t" + "umulh x8, x9, x22\n\t" + "adds x5, x5, x7\n\t" + "# A[1] * B[3]\n\t" + "mul x7, x10, x21\n\t" + "adcs x6, x6, x8\n\t" + "umulh x8, x10, x21\n\t" + "adc x4, xzr, xzr\n\t" + "adds x5, x5, x7\n\t" + "# A[2] * B[2]\n\t" + "mul x7, x11, x20\n\t" + "adcs x6, x6, x8\n\t" + "umulh x8, x11, x20\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x7\n\t" + "# A[3] * B[1]\n\t" + "mul x7, x12, x19\n\t" + "adcs x6, x6, x8\n\t" + "umulh x8, x12, x19\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x7\n\t" + "# A[4] * B[0]\n\t" + "mul x7, x13, x17\n\t" + "adcs x6, x6, x8\n\t" + "umulh x8, x13, x17\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x7\n\t" + "adcs x6, x6, x8\n\t" + "str x5, [%[tmp], 32]\n\t" + "adc x4, x4, xzr\n\t" + "# A[0] * B[5]\n\t" + "mul x7, x9, x23\n\t" + "umulh x8, x9, x23\n\t" + "adds x6, x6, x7\n\t" + "# A[1] * B[4]\n\t" + "mul x7, x10, x22\n\t" + "adcs x4, x4, x8\n\t" + "umulh x8, x10, x22\n\t" + "adc x5, xzr, xzr\n\t" + "adds x6, x6, x7\n\t" + "# A[2] * B[3]\n\t" + "mul x7, x11, x21\n\t" + "adcs x4, x4, x8\n\t" + "umulh x8, x11, x21\n\t" + "adc x5, x5, xzr\n\t" + "adds x6, x6, x7\n\t" + "# A[3] * B[2]\n\t" + "mul x7, x12, x20\n\t" + "adcs x4, x4, x8\n\t" + "umulh x8, x12, x20\n\t" + "adc x5, x5, xzr\n\t" + "adds x6, x6, x7\n\t" + "# A[4] * B[1]\n\t" + "mul x7, x13, x19\n\t" + "adcs x4, x4, x8\n\t" + "umulh x8, x13, x19\n\t" + "adc x5, x5, xzr\n\t" + "adds x6, x6, x7\n\t" + "# A[5] * B[0]\n\t" + "mul x7, x14, x17\n\t" + "adcs x4, x4, x8\n\t" + "umulh x8, x14, x17\n\t" + "adc x5, x5, xzr\n\t" + "adds x6, x6, x7\n\t" + "adcs x4, x4, x8\n\t" + "str x6, [%[tmp], 40]\n\t" + "adc x5, x5, xzr\n\t" + "# A[0] * B[6]\n\t" + "mul x7, x9, x24\n\t" + "umulh x8, x9, x24\n\t" + "adds x4, x4, x7\n\t" + "# A[1] * B[5]\n\t" + "mul x7, x10, x23\n\t" + "adcs x5, x5, x8\n\t" + "umulh x8, x10, x23\n\t" + "adc x6, xzr, xzr\n\t" + "adds x4, x4, x7\n\t" + "# A[2] * B[4]\n\t" + "mul x7, x11, x22\n\t" + "adcs x5, x5, x8\n\t" + "umulh x8, x11, x22\n\t" + "adc x6, x6, xzr\n\t" + "adds x4, x4, x7\n\t" + "# A[3] * B[3]\n\t" + "mul x7, x12, x21\n\t" + "adcs x5, x5, x8\n\t" + "umulh x8, x12, x21\n\t" + "adc x6, x6, xzr\n\t" + "adds x4, x4, x7\n\t" + "# A[4] * B[2]\n\t" + "mul x7, x13, x20\n\t" + "adcs x5, x5, x8\n\t" + "umulh x8, x13, x20\n\t" + "adc x6, x6, xzr\n\t" + "adds x4, x4, x7\n\t" + "# A[5] * B[1]\n\t" + "mul x7, x14, x19\n\t" + "adcs x5, x5, x8\n\t" + "umulh x8, x14, x19\n\t" + "adc x6, x6, xzr\n\t" + "adds x4, x4, x7\n\t" + "# A[6] * B[0]\n\t" + "mul x7, x15, x17\n\t" + "adcs x5, x5, x8\n\t" + "umulh x8, x15, x17\n\t" + "adc x6, x6, xzr\n\t" + "adds x4, x4, x7\n\t" + "adcs x5, x5, x8\n\t" + "str x4, [%[tmp], 48]\n\t" + "adc x6, x6, xzr\n\t" + "# A[0] * B[7]\n\t" + "mul x7, x9, x25\n\t" + "umulh x8, x9, x25\n\t" + "adds x5, x5, x7\n\t" + "# A[1] * B[6]\n\t" + "mul x7, x10, x24\n\t" + "adcs x6, x6, x8\n\t" + "umulh x8, x10, x24\n\t" + "adc x4, xzr, xzr\n\t" + "adds x5, x5, x7\n\t" + "# A[2] * B[5]\n\t" + "mul x7, x11, x23\n\t" + "adcs x6, x6, x8\n\t" + "umulh x8, x11, x23\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x7\n\t" + "# A[3] * B[4]\n\t" + "mul x7, x12, x22\n\t" + "adcs x6, x6, x8\n\t" + "umulh x8, x12, x22\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x7\n\t" + "# A[4] * B[3]\n\t" + "mul x7, x13, x21\n\t" + "adcs x6, x6, x8\n\t" + "umulh x8, x13, x21\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x7\n\t" + "# A[5] * B[2]\n\t" + "mul x7, x14, x20\n\t" + "adcs x6, x6, x8\n\t" + "umulh x8, x14, x20\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x7\n\t" + "# A[6] * B[1]\n\t" + "mul x7, x15, x19\n\t" + "adcs x6, x6, x8\n\t" + "umulh x8, x15, x19\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x7\n\t" + "# A[7] * B[0]\n\t" + "mul x7, x16, x17\n\t" + "adcs x6, x6, x8\n\t" + "umulh x8, x16, x17\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x7\n\t" + "adcs x6, x6, x8\n\t" + "str x5, [%[tmp], 56]\n\t" + "adc x4, x4, xzr\n\t" + "# A[1] * B[7]\n\t" + "mul x7, x10, x25\n\t" + "umulh x8, x10, x25\n\t" + "adds x6, x6, x7\n\t" + "# A[2] * B[6]\n\t" + "mul x7, x11, x24\n\t" + "adcs x4, x4, x8\n\t" + "umulh x8, x11, x24\n\t" + "adc x5, xzr, xzr\n\t" + "adds x6, x6, x7\n\t" + "# A[3] * B[5]\n\t" + "mul x7, x12, x23\n\t" + "adcs x4, x4, x8\n\t" + "umulh x8, x12, x23\n\t" + "adc x5, x5, xzr\n\t" + "adds x6, x6, x7\n\t" + "# A[4] * B[4]\n\t" + "mul x7, x13, x22\n\t" + "adcs x4, x4, x8\n\t" + "umulh x8, x13, x22\n\t" + "adc x5, x5, xzr\n\t" + "adds x6, x6, x7\n\t" + "# A[5] * B[3]\n\t" + "mul x7, x14, x21\n\t" + "adcs x4, x4, x8\n\t" + "umulh x8, x14, x21\n\t" + "adc x5, x5, xzr\n\t" + "adds x6, x6, x7\n\t" + "# A[6] * B[2]\n\t" + "mul x7, x15, x20\n\t" + "adcs x4, x4, x8\n\t" + "umulh x8, x15, x20\n\t" + "adc x5, x5, xzr\n\t" + "adds x6, x6, x7\n\t" + "# A[7] * B[1]\n\t" + "mul x7, x16, x19\n\t" + "adcs x4, x4, x8\n\t" + "umulh x8, x16, x19\n\t" + "adc x5, x5, xzr\n\t" + "adds x6, x6, x7\n\t" + "adcs x4, x4, x8\n\t" + "str x6, [%[r], 64]\n\t" + "adc x5, x5, xzr\n\t" + "# A[2] * B[7]\n\t" + "mul x7, x11, x25\n\t" + "umulh x8, x11, x25\n\t" + "adds x4, x4, x7\n\t" + "# A[3] * B[6]\n\t" + "mul x7, x12, x24\n\t" + "adcs x5, x5, x8\n\t" + "umulh x8, x12, x24\n\t" + "adc x6, xzr, xzr\n\t" + "adds x4, x4, x7\n\t" + "# A[4] * B[5]\n\t" + "mul x7, x13, x23\n\t" + "adcs x5, x5, x8\n\t" + "umulh x8, x13, x23\n\t" + "adc x6, x6, xzr\n\t" + "adds x4, x4, x7\n\t" + "# A[5] * B[4]\n\t" + "mul x7, x14, x22\n\t" + "adcs x5, x5, x8\n\t" + "umulh x8, x14, x22\n\t" + "adc x6, x6, xzr\n\t" + "adds x4, x4, x7\n\t" + "# A[6] * B[3]\n\t" + "mul x7, x15, x21\n\t" + "adcs x5, x5, x8\n\t" + "umulh x8, x15, x21\n\t" + "adc x6, x6, xzr\n\t" + "adds x4, x4, x7\n\t" + "# A[7] * B[2]\n\t" + "mul x7, x16, x20\n\t" + "adcs x5, x5, x8\n\t" + "umulh x8, x16, x20\n\t" + "adc x6, x6, xzr\n\t" + "adds x4, x4, x7\n\t" + "adcs x5, x5, x8\n\t" + "str x4, [%[r], 72]\n\t" + "adc x6, x6, xzr\n\t" + "# A[3] * B[7]\n\t" + "mul x7, x12, x25\n\t" + "umulh x8, x12, x25\n\t" + "adds x5, x5, x7\n\t" + "# A[4] * B[6]\n\t" + "mul x7, x13, x24\n\t" + "adcs x6, x6, x8\n\t" + "umulh x8, x13, x24\n\t" + "adc x4, xzr, xzr\n\t" + "adds x5, x5, x7\n\t" + "# A[5] * B[5]\n\t" + "mul x7, x14, x23\n\t" + "adcs x6, x6, x8\n\t" + "umulh x8, x14, x23\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x7\n\t" + "# A[6] * B[4]\n\t" + "mul x7, x15, x22\n\t" + "adcs x6, x6, x8\n\t" + "umulh x8, x15, x22\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x7\n\t" + "# A[7] * B[3]\n\t" + "mul x7, x16, x21\n\t" + "adcs x6, x6, x8\n\t" + "umulh x8, x16, x21\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x7\n\t" + "adcs x6, x6, x8\n\t" + "str x5, [%[r], 80]\n\t" + "adc x4, x4, xzr\n\t" + "# A[4] * B[7]\n\t" + "mul x7, x13, x25\n\t" + "umulh x8, x13, x25\n\t" + "adds x6, x6, x7\n\t" + "# A[5] * B[6]\n\t" + "mul x7, x14, x24\n\t" + "adcs x4, x4, x8\n\t" + "umulh x8, x14, x24\n\t" + "adc x5, xzr, xzr\n\t" + "adds x6, x6, x7\n\t" + "# A[6] * B[5]\n\t" + "mul x7, x15, x23\n\t" + "adcs x4, x4, x8\n\t" + "umulh x8, x15, x23\n\t" + "adc x5, x5, xzr\n\t" + "adds x6, x6, x7\n\t" + "# A[7] * B[4]\n\t" + "mul x7, x16, x22\n\t" + "adcs x4, x4, x8\n\t" + "umulh x8, x16, x22\n\t" + "adc x5, x5, xzr\n\t" + "adds x6, x6, x7\n\t" + "adcs x4, x4, x8\n\t" + "str x6, [%[r], 88]\n\t" + "adc x5, x5, xzr\n\t" + "# A[5] * B[7]\n\t" + "mul x7, x14, x25\n\t" + "umulh x8, x14, x25\n\t" + "adds x4, x4, x7\n\t" + "# A[6] * B[6]\n\t" + "mul x7, x15, x24\n\t" + "adcs x5, x5, x8\n\t" + "umulh x8, x15, x24\n\t" + "adc x6, xzr, xzr\n\t" + "adds x4, x4, x7\n\t" + "# A[7] * B[5]\n\t" + "mul x7, x16, x23\n\t" + "adcs x5, x5, x8\n\t" + "umulh x8, x16, x23\n\t" + "adc x6, x6, xzr\n\t" + "adds x4, x4, x7\n\t" + "adcs x5, x5, x8\n\t" + "str x4, [%[r], 96]\n\t" + "adc x6, x6, xzr\n\t" + "# A[6] * B[7]\n\t" + "mul x7, x15, x25\n\t" + "umulh x8, x15, x25\n\t" + "adds x5, x5, x7\n\t" + "# A[7] * B[6]\n\t" + "mul x7, x16, x24\n\t" + "adcs x6, x6, x8\n\t" + "umulh x8, x16, x24\n\t" + "adc x4, xzr, xzr\n\t" + "adds x5, x5, x7\n\t" + "adcs x6, x6, x8\n\t" + "str x5, [%[r], 104]\n\t" + "adc x4, x4, xzr\n\t" + "# A[7] * B[7]\n\t" + "mul x7, x16, x25\n\t" + "umulh x8, x16, x25\n\t" + "adds x6, x6, x7\n\t" + "adc x4, x4, x8\n\t" + "stp x6, x4, [%[r], 112]\n\t" + "ldp x9, x10, [%[tmp], 0]\n\t" + "ldp x11, x12, [%[tmp], 16]\n\t" + "ldp x13, x14, [%[tmp], 32]\n\t" + "ldp x15, x16, [%[tmp], 48]\n\t" + "stp x9, x10, [%[r], 0]\n\t" + "stp x11, x12, [%[r], 16]\n\t" + "stp x13, x14, [%[r], 32]\n\t" + "stp x15, x16, [%[r], 48]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp) + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25" + ); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "ldp x22, x23, [%[a], 0]\n\t" + "ldp x24, x25, [%[a], 16]\n\t" + "ldp x26, x27, [%[a], 32]\n\t" + "ldp x28, x29, [%[a], 48]\n\t" + "# A[0] * A[1]\n\t" + "mul x3, x22, x23\n\t" + "umulh x7, x22, x23\n\t" + "# A[0] * A[2]\n\t" + "mul x4, x22, x24\n\t" + "umulh x5, x22, x24\n\t" + "adds x7, x7, x4\n\t" + "# A[0] * A[3]\n\t" + "mul x4, x22, x25\n\t" + "adc x8, xzr, x5\n\t" + "umulh x5, x22, x25\n\t" + "adds x8, x8, x4\n\t" + "# A[1] * A[2]\n\t" + "mul x4, x23, x24\n\t" + "adc x9, xzr, x5\n\t" + "umulh x5, x23, x24\n\t" + "adds x8, x8, x4\n\t" + "# A[0] * A[4]\n\t" + "mul x4, x22, x26\n\t" + "adcs x9, x9, x5\n\t" + "umulh x5, x22, x26\n\t" + "adc x10, xzr, xzr\n\t" + "adds x9, x9, x4\n\t" + "# A[1] * A[3]\n\t" + "mul x4, x23, x25\n\t" + "adc x10, x10, x5\n\t" + "umulh x5, x23, x25\n\t" + "adds x9, x9, x4\n\t" + "# A[0] * A[5]\n\t" + "mul x4, x22, x27\n\t" + "adcs x10, x10, x5\n\t" + "umulh x5, x22, x27\n\t" + "adc x11, xzr, xzr\n\t" + "adds x10, x10, x4\n\t" + "# A[1] * A[4]\n\t" + "mul x4, x23, x26\n\t" + "adc x11, x11, x5\n\t" + "umulh x5, x23, x26\n\t" + "adds x10, x10, x4\n\t" + "# A[2] * A[3]\n\t" + "mul x4, x24, x25\n\t" + "adcs x11, x11, x5\n\t" + "umulh x5, x24, x25\n\t" + "adc x12, xzr, xzr\n\t" + "adds x10, x10, x4\n\t" + "# A[0] * A[6]\n\t" + "mul x4, x22, x28\n\t" + "adcs x11, x11, x5\n\t" + "umulh x5, x22, x28\n\t" + "adc x12, x12, xzr\n\t" + "adds x11, x11, x4\n\t" + "# A[1] * A[5]\n\t" + "mul x4, x23, x27\n\t" + "adcs x12, x12, x5\n\t" + "umulh x5, x23, x27\n\t" + "adc x13, xzr, xzr\n\t" + "adds x11, x11, x4\n\t" + "# A[2] * A[4]\n\t" + "mul x4, x24, x26\n\t" + "adcs x12, x12, x5\n\t" + "umulh x5, x24, x26\n\t" + "adc x13, x13, xzr\n\t" + "adds x11, x11, x4\n\t" + "# A[0] * A[7]\n\t" + "mul x4, x22, x29\n\t" + "adcs x12, x12, x5\n\t" + "umulh x5, x22, x29\n\t" + "adc x13, x13, xzr\n\t" + "adds x12, x12, x4\n\t" + "# A[1] * A[6]\n\t" + "mul x4, x23, x28\n\t" + "adcs x13, x13, x5\n\t" + "umulh x5, x23, x28\n\t" + "adc x14, xzr, xzr\n\t" + "adds x12, x12, x4\n\t" + "# A[2] * A[5]\n\t" + "mul x4, x24, x27\n\t" + "adcs x13, x13, x5\n\t" + "umulh x5, x24, x27\n\t" + "adc x14, x14, xzr\n\t" + "adds x12, x12, x4\n\t" + "# A[3] * A[4]\n\t" + "mul x4, x25, x26\n\t" + "adcs x13, x13, x5\n\t" + "umulh x5, x25, x26\n\t" + "adc x14, x14, xzr\n\t" + "adds x12, x12, x4\n\t" + "# A[1] * A[7]\n\t" + "mul x4, x23, x29\n\t" + "adcs x13, x13, x5\n\t" + "umulh x5, x23, x29\n\t" + "adc x14, x14, xzr\n\t" + "adds x13, x13, x4\n\t" + "# A[2] * A[6]\n\t" + "mul x4, x24, x28\n\t" + "adcs x14, x14, x5\n\t" + "umulh x5, x24, x28\n\t" + "adc x15, xzr, xzr\n\t" + "adds x13, x13, x4\n\t" + "# A[3] * A[5]\n\t" + "mul x4, x25, x27\n\t" + "adcs x14, x14, x5\n\t" + "umulh x5, x25, x27\n\t" + "adc x15, x15, xzr\n\t" + "adds x13, x13, x4\n\t" + "# A[2] * A[7]\n\t" + "mul x4, x24, x29\n\t" + "adcs x14, x14, x5\n\t" + "umulh x5, x24, x29\n\t" + "adc x15, x15, xzr\n\t" + "adds x14, x14, x4\n\t" + "# A[3] * A[6]\n\t" + "mul x4, x25, x28\n\t" + "adcs x15, x15, x5\n\t" + "umulh x5, x25, x28\n\t" + "adc x16, xzr, xzr\n\t" + "adds x14, x14, x4\n\t" + "# A[4] * A[5]\n\t" + "mul x4, x26, x27\n\t" + "adcs x15, x15, x5\n\t" + "umulh x5, x26, x27\n\t" + "adc x16, x16, xzr\n\t" + "adds x14, x14, x4\n\t" + "# A[3] * A[7]\n\t" + "mul x4, x25, x29\n\t" + "adcs x15, x15, x5\n\t" + "umulh x5, x25, x29\n\t" + "adc x16, x16, xzr\n\t" + "adds x15, x15, x4\n\t" + "# A[4] * A[6]\n\t" + "mul x4, x26, x28\n\t" + "adcs x16, x16, x5\n\t" + "umulh x5, x26, x28\n\t" + "adc x17, xzr, xzr\n\t" + "adds x15, x15, x4\n\t" + "# A[4] * A[7]\n\t" + "mul x4, x26, x29\n\t" + "adcs x16, x16, x5\n\t" + "umulh x5, x26, x29\n\t" + "adc x17, x17, xzr\n\t" + "adds x16, x16, x4\n\t" + "# A[5] * A[6]\n\t" + "mul x4, x27, x28\n\t" + "adcs x17, x17, x5\n\t" + "umulh x5, x27, x28\n\t" + "adc x19, xzr, xzr\n\t" + "adds x16, x16, x4\n\t" + "# A[5] * A[7]\n\t" + "mul x4, x27, x29\n\t" + "adcs x17, x17, x5\n\t" + "umulh x5, x27, x29\n\t" + "adc x19, x19, xzr\n\t" + "adds x17, x17, x4\n\t" + "# A[6] * A[7]\n\t" + "mul x4, x28, x29\n\t" + "adcs x19, x19, x5\n\t" + "umulh x5, x28, x29\n\t" + "adc x20, xzr, xzr\n\t" + "adds x19, x19, x4\n\t" + "adc x20, x20, x5\n\t" + "# Double\n\t" + "adds x3, x3, x3\n\t" + "adcs x7, x7, x7\n\t" + "adcs x8, x8, x8\n\t" + "adcs x9, x9, x9\n\t" + "adcs x10, x10, x10\n\t" + "adcs x11, x11, x11\n\t" + "adcs x12, x12, x12\n\t" + "adcs x13, x13, x13\n\t" + "adcs x14, x14, x14\n\t" + "adcs x15, x15, x15\n\t" + "adcs x16, x16, x16\n\t" + "adcs x17, x17, x17\n\t" + "adcs x19, x19, x19\n\t" + "# A[0] * A[0]\n\t" + "mul x2, x22, x22\n\t" + "adcs x20, x20, x20\n\t" + "umulh x4, x22, x22\n\t" + "cset x21, cs\n\t" + "# A[1] * A[1]\n\t" + "mul x5, x23, x23\n\t" + "adds x3, x3, x4\n\t" + "umulh x6, x23, x23\n\t" + "adcs x7, x7, x5\n\t" + "# A[2] * A[2]\n\t" + "mul x4, x24, x24\n\t" + "adcs x8, x8, x6\n\t" + "umulh x5, x24, x24\n\t" + "adcs x9, x9, x4\n\t" + "# A[3] * A[3]\n\t" + "mul x6, x25, x25\n\t" + "adcs x10, x10, x5\n\t" + "umulh x4, x25, x25\n\t" + "adcs x11, x11, x6\n\t" + "# A[4] * A[4]\n\t" + "mul x5, x26, x26\n\t" + "adcs x12, x12, x4\n\t" + "umulh x6, x26, x26\n\t" + "adcs x13, x13, x5\n\t" + "# A[5] * A[5]\n\t" + "mul x4, x27, x27\n\t" + "adcs x14, x14, x6\n\t" + "umulh x5, x27, x27\n\t" + "adcs x15, x15, x4\n\t" + "# A[6] * A[6]\n\t" + "mul x6, x28, x28\n\t" + "adcs x16, x16, x5\n\t" + "umulh x4, x28, x28\n\t" + "adcs x17, x17, x6\n\t" + "# A[7] * A[7]\n\t" + "mul x5, x29, x29\n\t" + "adcs x19, x19, x4\n\t" + "umulh x6, x29, x29\n\t" + "adcs x20, x20, x5\n\t" + "stp x2, x3, [%[r], 0]\n\t" + "adc x21, x21, x6\n\t" + "stp x7, x8, [%[r], 16]\n\t" + "stp x9, x10, [%[r], 32]\n\t" + "stp x11, x12, [%[r], 48]\n\t" + "stp x13, x14, [%[r], 64]\n\t" + "stp x15, x16, [%[r], 80]\n\t" + "stp x17, x19, [%[r], 96]\n\t" + "stp x20, x21, [%[r], 112]\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "x4", "x5", "x6", "x2", "x3", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x29" + ); +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[a], 0]\n\t" + "ldp x7, x8, [%[b], 0]\n\t" + "adds x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 16]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "ldp x3, x4, [%[a], 32]\n\t" + "ldp x7, x8, [%[b], 32]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 48]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 48]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 32]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 48]\n\t" + "cset %[r], cs\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + ); + + return (sp_digit)r; +} + +/* Sub b from a into a. (a -= b) + * + * a A single precision integer and result. + * b A single precision integer. + */ +static sp_digit sp_2048_sub_in_place_16(sp_digit* a, const sp_digit* b) +{ + __asm__ __volatile__ ( + "ldp x2, x3, [%[a], 0]\n\t" + "ldp x6, x7, [%[b], 0]\n\t" + "subs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 16]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 16]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 0]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 16]\n\t" + "ldp x2, x3, [%[a], 32]\n\t" + "ldp x6, x7, [%[b], 32]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 48]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 48]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 32]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 48]\n\t" + "ldp x2, x3, [%[a], 64]\n\t" + "ldp x6, x7, [%[b], 64]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 80]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 80]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 64]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 80]\n\t" + "ldp x2, x3, [%[a], 96]\n\t" + "ldp x6, x7, [%[b], 96]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 112]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 112]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 96]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 112]\n\t" + "csetm %[a], cc\n\t" + : [a] "+r" (a) + : [b] "r" (b) + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + ); + + return (sp_digit)a; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[a], 0]\n\t" + "ldp x7, x8, [%[b], 0]\n\t" + "adds x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 16]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "ldp x3, x4, [%[a], 32]\n\t" + "ldp x7, x8, [%[b], 32]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 48]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 48]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 32]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 48]\n\t" + "ldp x3, x4, [%[a], 64]\n\t" + "ldp x7, x8, [%[b], 64]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 80]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 80]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 64]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 80]\n\t" + "ldp x3, x4, [%[a], 96]\n\t" + "ldp x7, x8, [%[b], 96]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 112]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 112]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 96]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 112]\n\t" + "cset %[r], cs\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + ); + + return (sp_digit)r; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_2048_mask_8(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<8; i++) { + r[i] = a[i] & m; + } +#else + r[0] = a[0] & m; + r[1] = a[1] & m; + r[2] = a[2] & m; + r[3] = a[3] & m; + r[4] = a[4] & m; + r[5] = a[5] & m; + r[6] = a[6] & m; + r[7] = a[7] & m; +#endif +} + +/* Add digit to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_2048_add_zero_8(sp_digit* r, const sp_digit* a, + const sp_digit d) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[a], 0]\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "adds x3, x3, %[d]\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "ldp x3, x4, [%[a], 32]\n\t" + "ldp x5, x6, [%[a], 48]\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 32]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 48]\n\t" + : + : [r] "r" (r), [a] "r" (a), [d] "r" (d) + : "memory", "x3", "x4", "x5", "x6" + ); +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_2048_mul_16(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[16]; + sp_digit a1[8]; + sp_digit b1[8]; + sp_digit z2[16]; + sp_digit u, ca, cb; + + ca = sp_2048_add_8(a1, a, &a[8]); + cb = sp_2048_add_8(b1, b, &b[8]); + u = ca & cb; + sp_2048_mul_8(z1, a1, b1); + sp_2048_mul_8(z2, &a[8], &b[8]); + sp_2048_mul_8(z0, a, b); + sp_2048_mask_8(r + 16, a1, 0 - cb); + sp_2048_mask_8(b1, b1, 0 - ca); + u += sp_2048_add_8(r + 16, r + 16, b1); + u += sp_2048_sub_in_place_16(z1, z2); + u += sp_2048_sub_in_place_16(z1, z0); + u += sp_2048_add_16(r + 8, r + 8, z1); + u += sp_2048_add_8(r + 16, r + 16, z2); + sp_2048_add_zero_8(r + 24, z2 + 8, u); +} + +#ifdef WOLFSSL_SP_SMALL +/* Double a into r. (r = a + a) + * + * r A single precision integer. + * a A single precision integer. + */ +static sp_digit sp_2048_dbl_8(sp_digit* r, const sp_digit* a) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "add x11, %[a], 64\n\t" + "\n1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldp x3, x4, [%[a]], #16\n\t" + "ldp x5, x6, [%[a]], #16\n\t" + "adcs x3, x3, x3\n\t" + "adcs x4, x4, x4\n\t" + "adcs x5, x5, x5\n\t" + "stp x3, x4, [%[r]], #16\n\t" + "adcs x6, x6, x6\n\t" + "stp x5, x6, [%[r]], #16\n\t" + "cset %[c], cs\n\t" + "cmp %[a], x11\n\t" + "b.ne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a) + : + : "memory", "x3", "x4", "x5", "x6", "x11" + ); + + return c; +} + +#else +/* Double a into r. (r = a + a) + * + * r A single precision integer. + * a A single precision integer. + */ +static sp_digit sp_2048_dbl_8(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[a], 0]\n\t" + "adds x3, x3, x3\n\t" + "ldr x5, [%[a], 16]\n\t" + "adcs x4, x4, x4\n\t" + "ldr x6, [%[a], 24]\n\t" + "adcs x5, x5, x5\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "adcs x6, x6, x6\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "ldp x3, x4, [%[a], 32]\n\t" + "adcs x3, x3, x3\n\t" + "ldr x5, [%[a], 48]\n\t" + "adcs x4, x4, x4\n\t" + "ldr x6, [%[a], 56]\n\t" + "adcs x5, x5, x5\n\t" + "stp x3, x4, [%[r], 32]\n\t" + "adcs x6, x6, x6\n\t" + "stp x5, x6, [%[r], 48]\n\t" + "cset %[r], cs\n\t" + : [r] "+r" (r) + : [a] "r" (a) + : "memory", "x3", "x4", "x5", "x6" + ); + + return (sp_digit)r; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z2[16]; + sp_digit z1[16]; + sp_digit a1[8]; + sp_digit u; + + u = sp_2048_add_8(a1, a, &a[8]); + sp_2048_sqr_8(z1, a1); + sp_2048_sqr_8(z2, &a[8]); + sp_2048_sqr_8(z0, a); + sp_2048_mask_8(r + 16, a1, 0 - u); + u += sp_2048_dbl_8(r + 16, r + 16); + u += sp_2048_sub_in_place_16(z1, z2); + u += sp_2048_sub_in_place_16(z1, z0); + u += sp_2048_add_16(r + 8, r + 8, z1); + u += sp_2048_add_8(r + 16, r + 16, z2); + sp_2048_add_zero_8(r + 24, z2 + 8, u); + +} + +/* Sub b from a into a. (a -= b) + * + * a A single precision integer and result. + * b A single precision integer. + */ +static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b) +{ + __asm__ __volatile__ ( + "ldp x2, x3, [%[a], 0]\n\t" + "ldp x6, x7, [%[b], 0]\n\t" + "subs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 16]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 16]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 0]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 16]\n\t" + "ldp x2, x3, [%[a], 32]\n\t" + "ldp x6, x7, [%[b], 32]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 48]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 48]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 32]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 48]\n\t" + "ldp x2, x3, [%[a], 64]\n\t" + "ldp x6, x7, [%[b], 64]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 80]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 80]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 64]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 80]\n\t" + "ldp x2, x3, [%[a], 96]\n\t" + "ldp x6, x7, [%[b], 96]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 112]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 112]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 96]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 112]\n\t" + "ldp x2, x3, [%[a], 128]\n\t" + "ldp x6, x7, [%[b], 128]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 144]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 144]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 128]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 144]\n\t" + "ldp x2, x3, [%[a], 160]\n\t" + "ldp x6, x7, [%[b], 160]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 176]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 176]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 160]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 176]\n\t" + "ldp x2, x3, [%[a], 192]\n\t" + "ldp x6, x7, [%[b], 192]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 208]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 208]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 192]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 208]\n\t" + "ldp x2, x3, [%[a], 224]\n\t" + "ldp x6, x7, [%[b], 224]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 240]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 240]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 224]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 240]\n\t" + "csetm %[a], cc\n\t" + : [a] "+r" (a) + : [b] "r" (b) + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + ); + + return (sp_digit)a; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[a], 0]\n\t" + "ldp x7, x8, [%[b], 0]\n\t" + "adds x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 16]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "ldp x3, x4, [%[a], 32]\n\t" + "ldp x7, x8, [%[b], 32]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 48]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 48]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 32]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 48]\n\t" + "ldp x3, x4, [%[a], 64]\n\t" + "ldp x7, x8, [%[b], 64]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 80]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 80]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 64]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 80]\n\t" + "ldp x3, x4, [%[a], 96]\n\t" + "ldp x7, x8, [%[b], 96]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 112]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 112]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 96]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 112]\n\t" + "ldp x3, x4, [%[a], 128]\n\t" + "ldp x7, x8, [%[b], 128]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 144]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 144]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 128]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 144]\n\t" + "ldp x3, x4, [%[a], 160]\n\t" + "ldp x7, x8, [%[b], 160]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 176]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 176]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 160]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 176]\n\t" + "ldp x3, x4, [%[a], 192]\n\t" + "ldp x7, x8, [%[b], 192]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 208]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 208]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 192]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 208]\n\t" + "ldp x3, x4, [%[a], 224]\n\t" + "ldp x7, x8, [%[b], 224]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 240]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 240]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 224]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 240]\n\t" + "cset %[r], cs\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + ); + + return (sp_digit)r; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_2048_mask_16(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<16; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 16; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Add digit to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_2048_add_zero_16(sp_digit* r, const sp_digit* a, + const sp_digit d) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[a], 0]\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "adds x3, x3, %[d]\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "ldp x3, x4, [%[a], 32]\n\t" + "ldp x5, x6, [%[a], 48]\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 32]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 48]\n\t" + "ldp x3, x4, [%[a], 64]\n\t" + "ldp x5, x6, [%[a], 80]\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 64]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 80]\n\t" + "ldp x3, x4, [%[a], 96]\n\t" + "ldp x5, x6, [%[a], 112]\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 96]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 112]\n\t" + : + : [r] "r" (r), [a] "r" (a), [d] "r" (d) + : "memory", "x3", "x4", "x5", "x6" + ); +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[32]; + sp_digit a1[16]; + sp_digit b1[16]; + sp_digit z2[32]; + sp_digit u, ca, cb; + + ca = sp_2048_add_16(a1, a, &a[16]); + cb = sp_2048_add_16(b1, b, &b[16]); + u = ca & cb; + sp_2048_mul_16(z1, a1, b1); + sp_2048_mul_16(z2, &a[16], &b[16]); + sp_2048_mul_16(z0, a, b); + sp_2048_mask_16(r + 32, a1, 0 - cb); + sp_2048_mask_16(b1, b1, 0 - ca); + u += sp_2048_add_16(r + 32, r + 32, b1); + u += sp_2048_sub_in_place_32(z1, z2); + u += sp_2048_sub_in_place_32(z1, z0); + u += sp_2048_add_32(r + 16, r + 16, z1); + u += sp_2048_add_16(r + 32, r + 32, z2); + sp_2048_add_zero_16(r + 48, z2 + 16, u); +} + +#ifdef WOLFSSL_SP_SMALL +/* Double a into r. (r = a + a) + * + * r A single precision integer. + * a A single precision integer. + */ +static sp_digit sp_2048_dbl_16(sp_digit* r, const sp_digit* a) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "add x11, %[a], 128\n\t" + "\n1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldp x3, x4, [%[a]], #16\n\t" + "ldp x5, x6, [%[a]], #16\n\t" + "adcs x3, x3, x3\n\t" + "adcs x4, x4, x4\n\t" + "adcs x5, x5, x5\n\t" + "stp x3, x4, [%[r]], #16\n\t" + "adcs x6, x6, x6\n\t" + "stp x5, x6, [%[r]], #16\n\t" + "cset %[c], cs\n\t" + "cmp %[a], x11\n\t" + "b.ne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a) + : + : "memory", "x3", "x4", "x5", "x6", "x11" + ); + + return c; +} + +#else +/* Double a into r. (r = a + a) + * + * r A single precision integer. + * a A single precision integer. + */ +static sp_digit sp_2048_dbl_16(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[a], 0]\n\t" + "adds x3, x3, x3\n\t" + "ldr x5, [%[a], 16]\n\t" + "adcs x4, x4, x4\n\t" + "ldr x6, [%[a], 24]\n\t" + "adcs x5, x5, x5\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "adcs x6, x6, x6\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "ldp x3, x4, [%[a], 32]\n\t" + "adcs x3, x3, x3\n\t" + "ldr x5, [%[a], 48]\n\t" + "adcs x4, x4, x4\n\t" + "ldr x6, [%[a], 56]\n\t" + "adcs x5, x5, x5\n\t" + "stp x3, x4, [%[r], 32]\n\t" + "adcs x6, x6, x6\n\t" + "stp x5, x6, [%[r], 48]\n\t" + "ldp x3, x4, [%[a], 64]\n\t" + "adcs x3, x3, x3\n\t" + "ldr x5, [%[a], 80]\n\t" + "adcs x4, x4, x4\n\t" + "ldr x6, [%[a], 88]\n\t" + "adcs x5, x5, x5\n\t" + "stp x3, x4, [%[r], 64]\n\t" + "adcs x6, x6, x6\n\t" + "stp x5, x6, [%[r], 80]\n\t" + "ldp x3, x4, [%[a], 96]\n\t" + "adcs x3, x3, x3\n\t" + "ldr x5, [%[a], 112]\n\t" + "adcs x4, x4, x4\n\t" + "ldr x6, [%[a], 120]\n\t" + "adcs x5, x5, x5\n\t" + "stp x3, x4, [%[r], 96]\n\t" + "adcs x6, x6, x6\n\t" + "stp x5, x6, [%[r], 112]\n\t" + "cset %[r], cs\n\t" + : [r] "+r" (r) + : [a] "r" (a) + : "memory", "x3", "x4", "x5", "x6" + ); + + return (sp_digit)r; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z2[32]; + sp_digit z1[32]; + sp_digit a1[16]; + sp_digit u; + + u = sp_2048_add_16(a1, a, &a[16]); + sp_2048_sqr_16(z1, a1); + sp_2048_sqr_16(z2, &a[16]); + sp_2048_sqr_16(z0, a); + sp_2048_mask_16(r + 32, a1, 0 - u); + u += sp_2048_dbl_16(r + 32, r + 32); + u += sp_2048_sub_in_place_32(z1, z2); + u += sp_2048_sub_in_place_32(z1, z0); + u += sp_2048_add_32(r + 16, r + 16, z1); + u += sp_2048_add_16(r + 32, r + 32, z2); + sp_2048_add_zero_16(r + 48, z2 + 16, u); + +} + +#endif /* !WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "add x11, %[a], 256\n\t" + "\n1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldp x3, x4, [%[a]], #16\n\t" + "ldp x5, x6, [%[a]], #16\n\t" + "ldp x7, x8, [%[b]], #16\n\t" + "adcs x3, x3, x7\n\t" + "ldp x9, x10, [%[b]], #16\n\t" + "adcs x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r]], #16\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r]], #16\n\t" + "cset %[c], cs\n\t" + "cmp %[a], x11\n\t" + "b.ne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into a. (a -= b) + * + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "add x10, %[a], 256\n\t" + "\n1:\n\t" + "subs %[c], xzr, %[c]\n\t" + "ldp x2, x3, [%[a]]\n\t" + "ldp x4, x5, [%[a], #16]\n\t" + "ldp x6, x7, [%[b]], #16\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x8, x9, [%[b]], #16\n\t" + "sbcs x3, x3, x7\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a]], #16\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a]], #16\n\t" + "csetm %[c], cc\n\t" + "cmp %[a], x10\n\t" + "b.ne 1b\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_digit tmp[64]; + + __asm__ __volatile__ ( + "mov x5, 0\n\t" + "mov x6, 0\n\t" + "mov x7, 0\n\t" + "mov x8, 0\n\t" + "\n1:\n\t" + "subs x3, x5, 248\n\t" + "csel x3, xzr, x3, cc\n\t" + "sub x4, x5, x3\n\t" + "\n2:\n\t" + "ldr x10, [%[a], x3]\n\t" + "ldr x11, [%[b], x4]\n\t" + "mul x9, x10, x11\n\t" + "umulh x10, x10, x11\n\t" + "adds x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x8, x8, xzr\n\t" + "add x3, x3, #8\n\t" + "sub x4, x4, #8\n\t" + "cmp x3, 256\n\t" + "b.eq 3f\n\t" + "cmp x3, x5\n\t" + "b.le 2b\n\t" + "\n3:\n\t" + "str x6, [%[r], x5]\n\t" + "mov x6, x7\n\t" + "mov x7, x8\n\t" + "mov x8, #0\n\t" + "add x5, x5, #8\n\t" + "cmp x5, 496\n\t" + "b.le 1b\n\t" + "str x6, [%[r], x5]\n\t" + : + : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) +{ + sp_digit tmp[64]; + + __asm__ __volatile__ ( + "mov x6, 0\n\t" + "mov x7, 0\n\t" + "mov x8, 0\n\t" + "mov x5, 0\n\t" + "\n1:\n\t" + "subs x3, x5, 248\n\t" + "csel x3, xzr, x3, cc\n\t" + "sub x4, x5, x3\n\t" + "\n2:\n\t" + "cmp x4, x3\n\t" + "b.eq 4f\n\t" + "ldr x10, [%[a], x3]\n\t" + "ldr x11, [%[a], x4]\n\t" + "mul x9, x10, x11\n\t" + "umulh x10, x10, x11\n\t" + "adds x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x8, x8, xzr\n\t" + "adds x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x8, x8, xzr\n\t" + "b.al 5f\n\t" + "\n4:\n\t" + "ldr x10, [%[a], x3]\n\t" + "mul x9, x10, x10\n\t" + "umulh x10, x10, x10\n\t" + "adds x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x8, x8, xzr\n\t" + "\n5:\n\t" + "add x3, x3, #8\n\t" + "sub x4, x4, #8\n\t" + "cmp x3, 256\n\t" + "b.eq 3f\n\t" + "cmp x3, x4\n\t" + "b.gt 3f\n\t" + "cmp x3, x5\n\t" + "b.le 2b\n\t" + "\n3:\n\t" + "str x6, [%[r], x5]\n\t" + "mov x6, x7\n\t" + "mov x7, x8\n\t" + "mov x8, #0\n\t" + "add x5, x5, #8\n\t" + "cmp x5, 496\n\t" + "b.le 1b\n\t" + "str x6, [%[r], x5]\n\t" + : + : [r] "r" (tmp), [a] "r" (a) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +#endif /* WOLFSSL_SP_SMALL */ +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +#ifdef WOLFSSL_SP_SMALL +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_2048_mask_16(sp_digit* r, const sp_digit* a, sp_digit m) +{ + int i; + + for (i=0; i<16; i++) { + r[i] = a[i] & m; + } +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "add x11, %[a], 128\n\t" + "\n1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldp x3, x4, [%[a]], #16\n\t" + "ldp x5, x6, [%[a]], #16\n\t" + "ldp x7, x8, [%[b]], #16\n\t" + "adcs x3, x3, x7\n\t" + "ldp x9, x10, [%[b]], #16\n\t" + "adcs x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r]], #16\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r]], #16\n\t" + "cset %[c], cs\n\t" + "cmp %[a], x11\n\t" + "b.ne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into a. (a -= b) + * + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_2048_sub_in_place_16(sp_digit* a, const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "add x10, %[a], 128\n\t" + "\n1:\n\t" + "subs %[c], xzr, %[c]\n\t" + "ldp x2, x3, [%[a]]\n\t" + "ldp x4, x5, [%[a], #16]\n\t" + "ldp x6, x7, [%[b]], #16\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x8, x9, [%[b]], #16\n\t" + "sbcs x3, x3, x7\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a]], #16\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a]], #16\n\t" + "csetm %[c], cc\n\t" + "cmp %[a], x10\n\t" + "b.ne 1b\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_2048_mul_16(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_digit tmp[32]; + + __asm__ __volatile__ ( + "mov x5, 0\n\t" + "mov x6, 0\n\t" + "mov x7, 0\n\t" + "mov x8, 0\n\t" + "\n1:\n\t" + "subs x3, x5, 120\n\t" + "csel x3, xzr, x3, cc\n\t" + "sub x4, x5, x3\n\t" + "\n2:\n\t" + "ldr x10, [%[a], x3]\n\t" + "ldr x11, [%[b], x4]\n\t" + "mul x9, x10, x11\n\t" + "umulh x10, x10, x11\n\t" + "adds x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x8, x8, xzr\n\t" + "add x3, x3, #8\n\t" + "sub x4, x4, #8\n\t" + "cmp x3, 128\n\t" + "b.eq 3f\n\t" + "cmp x3, x5\n\t" + "b.le 2b\n\t" + "\n3:\n\t" + "str x6, [%[r], x5]\n\t" + "mov x6, x7\n\t" + "mov x7, x8\n\t" + "mov x8, #0\n\t" + "add x5, x5, #8\n\t" + "cmp x5, 240\n\t" + "b.le 1b\n\t" + "str x6, [%[r], x5]\n\t" + : + : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a) +{ + sp_digit tmp[32]; + + __asm__ __volatile__ ( + "mov x6, 0\n\t" + "mov x7, 0\n\t" + "mov x8, 0\n\t" + "mov x5, 0\n\t" + "\n1:\n\t" + "subs x3, x5, 120\n\t" + "csel x3, xzr, x3, cc\n\t" + "sub x4, x5, x3\n\t" + "\n2:\n\t" + "cmp x4, x3\n\t" + "b.eq 4f\n\t" + "ldr x10, [%[a], x3]\n\t" + "ldr x11, [%[a], x4]\n\t" + "mul x9, x10, x11\n\t" + "umulh x10, x10, x11\n\t" + "adds x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x8, x8, xzr\n\t" + "adds x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x8, x8, xzr\n\t" + "b.al 5f\n\t" + "\n4:\n\t" + "ldr x10, [%[a], x3]\n\t" + "mul x9, x10, x10\n\t" + "umulh x10, x10, x10\n\t" + "adds x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x8, x8, xzr\n\t" + "\n5:\n\t" + "add x3, x3, #8\n\t" + "sub x4, x4, #8\n\t" + "cmp x3, 128\n\t" + "b.eq 3f\n\t" + "cmp x3, x4\n\t" + "b.gt 3f\n\t" + "cmp x3, x5\n\t" + "b.le 2b\n\t" + "\n3:\n\t" + "str x6, [%[r], x5]\n\t" + "mov x6, x7\n\t" + "mov x7, x8\n\t" + "mov x8, #0\n\t" + "add x5, x5, #8\n\t" + "cmp x5, 240\n\t" + "b.le 1b\n\t" + "str x6, [%[r], x5]\n\t" + : + : [r] "r" (tmp), [a] "r" (a) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +#endif /* WOLFSSL_SP_SMALL */ +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +/* Caclulate the bottom digit of -1/a mod 2^n. + * + * a A single precision number. + * rho Bottom word of inverse. + */ +static void sp_2048_mont_setup(const sp_digit* a, sp_digit* rho) +{ + sp_digit x, b; + + b = a[0]; + x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - b * x; /* here x*a==1 mod 2**8 */ + x *= 2 - b * x; /* here x*a==1 mod 2**16 */ + x *= 2 - b * x; /* here x*a==1 mod 2**32 */ + x *= 2 - b * x; /* here x*a==1 mod 2**64 */ + + /* rho = -1/m mod b */ + *rho = -x; +} + +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "# A[0] * B\n\t" + "ldr x8, [%[a]]\n\t" + "mul x5, %[b], x8\n\t" + "umulh x3, %[b], x8\n\t" + "mov x4, 0\n\t" + "str x5, [%[r]]\n\t" + "mov x5, 0\n\t" + "mov x9, #8\n\t" + "1:\n\t" + "ldr x8, [%[a], x9]\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], x9]\n\t" + "mov x3, x4\n\t" + "mov x4, x5\n\t" + "mov x5, #0\n\t" + "add x9, x9, #8\n\t" + "cmp x9, 256\n\t" + "b.lt 1b\n\t" + "str x3, [%[r], 256]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + ); +#else + __asm__ __volatile__ ( + "# A[0] * B\n\t" + "ldp x8, x9, [%[a]]\n\t" + "mul x3, %[b], x8\n\t" + "umulh x4, %[b], x8\n\t" + "mov x5, 0\n\t" + "# A[1] * B\n\t" + "str x3, [%[r]]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x9\n\t" + "umulh x7, %[b], x9\n\t" + "adds x4, x4, x6\n\t" + "# A[2] * B\n\t" + "ldp x8, x9, [%[a], 16]\n\t" + "str x4, [%[r], 8]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[3] * B\n\t" + "str x5, [%[r], 16]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[4] * B\n\t" + "ldp x8, x9, [%[a], 32]\n\t" + "str x3, [%[r], 24]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[5] * B\n\t" + "str x4, [%[r], 32]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[6] * B\n\t" + "ldp x8, x9, [%[a], 48]\n\t" + "str x5, [%[r], 40]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[7] * B\n\t" + "str x3, [%[r], 48]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[8] * B\n\t" + "ldp x8, x9, [%[a], 64]\n\t" + "str x4, [%[r], 56]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[9] * B\n\t" + "str x5, [%[r], 64]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[10] * B\n\t" + "ldp x8, x9, [%[a], 80]\n\t" + "str x3, [%[r], 72]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[11] * B\n\t" + "str x4, [%[r], 80]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[12] * B\n\t" + "ldp x8, x9, [%[a], 96]\n\t" + "str x5, [%[r], 88]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[13] * B\n\t" + "str x3, [%[r], 96]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[14] * B\n\t" + "ldp x8, x9, [%[a], 112]\n\t" + "str x4, [%[r], 104]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[15] * B\n\t" + "str x5, [%[r], 112]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[16] * B\n\t" + "ldp x8, x9, [%[a], 128]\n\t" + "str x3, [%[r], 120]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[17] * B\n\t" + "str x4, [%[r], 128]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[18] * B\n\t" + "ldp x8, x9, [%[a], 144]\n\t" + "str x5, [%[r], 136]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[19] * B\n\t" + "str x3, [%[r], 144]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[20] * B\n\t" + "ldp x8, x9, [%[a], 160]\n\t" + "str x4, [%[r], 152]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[21] * B\n\t" + "str x5, [%[r], 160]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[22] * B\n\t" + "ldp x8, x9, [%[a], 176]\n\t" + "str x3, [%[r], 168]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[23] * B\n\t" + "str x4, [%[r], 176]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[24] * B\n\t" + "ldp x8, x9, [%[a], 192]\n\t" + "str x5, [%[r], 184]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[25] * B\n\t" + "str x3, [%[r], 192]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[26] * B\n\t" + "ldp x8, x9, [%[a], 208]\n\t" + "str x4, [%[r], 200]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[27] * B\n\t" + "str x5, [%[r], 208]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[28] * B\n\t" + "ldp x8, x9, [%[a], 224]\n\t" + "str x3, [%[r], 216]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[29] * B\n\t" + "str x4, [%[r], 224]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[30] * B\n\t" + "ldp x8, x9, [%[a], 240]\n\t" + "str x5, [%[r], 232]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[31] * B\n\t" + "str x3, [%[r], 240]\n\t" + "mul x6, %[b], x9\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "adc x5, x5, x7\n\t" + "stp x4, x5, [%[r], 248]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + ); +#endif +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 2048 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_2048_mont_norm_16(sp_digit* r, const sp_digit* m) +{ + XMEMSET(r, 0, sizeof(sp_digit) * 16); + + /* r = 2^n mod m */ + sp_2048_sub_in_place_16(r, m); +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_2048_cond_sub_16(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov x8, #0\n\t" + "1:\n\t" + "subs %[c], xzr, %[c]\n\t" + "ldr x4, [%[a], x8]\n\t" + "ldr x5, [%[b], x8]\n\t" + "and x5, x5, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "csetm %[c], cc\n\t" + "str x4, [%[r], x8]\n\t" + "add x8, x8, #8\n\t" + "cmp x8, 128\n\t" + "b.lt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + ); + + return c; +#else + __asm__ __volatile__ ( + + "ldp x5, x7, [%[b], 0]\n\t" + "ldp x11, x12, [%[b], 16]\n\t" + "ldp x4, x6, [%[a], 0]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 16]\n\t" + "and x7, x7, %[m]\n\t" + "subs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 0]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 16]\n\t" + "ldp x5, x7, [%[b], 32]\n\t" + "ldp x11, x12, [%[b], 48]\n\t" + "ldp x4, x6, [%[a], 32]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 48]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 32]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 48]\n\t" + "ldp x5, x7, [%[b], 64]\n\t" + "ldp x11, x12, [%[b], 80]\n\t" + "ldp x4, x6, [%[a], 64]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 80]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 64]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 80]\n\t" + "ldp x5, x7, [%[b], 96]\n\t" + "ldp x11, x12, [%[b], 112]\n\t" + "ldp x4, x6, [%[a], 96]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 112]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 96]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 112]\n\t" + "csetm %[r], cc\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + ); + + return (sp_digit)r; +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Reduce the number back to 2048 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_2048_mont_reduce_16(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_digit ca = 0; + + __asm__ __volatile__ ( + "ldp x14, x15, [%[m], 0]\n\t" + "ldp x16, x17, [%[m], 16]\n\t" + "ldp x19, x20, [%[m], 32]\n\t" + "ldp x21, x22, [%[m], 48]\n\t" + "ldp x23, x24, [%[m], 64]\n\t" + "ldp x25, x26, [%[m], 80]\n\t" + "ldp x27, x28, [%[m], 96]\n\t" + "# i = 16\n\t" + "mov x4, 16\n\t" + "ldp x12, x13, [%[a], 0]\n\t" + "\n1:\n\t" + "# mu = a[i] * mp\n\t" + "mul x9, %[mp], x12\n\t" + "# a[i+0] += m[0] * mu\n\t" + "mul x7, x14, x9\n\t" + "umulh x8, x14, x9\n\t" + "adds x12, x12, x7\n\t" + "# a[i+1] += m[1] * mu\n\t" + "mul x7, x15, x9\n\t" + "adc x6, x8, xzr\n\t" + "umulh x8, x15, x9\n\t" + "adds x12, x13, x7\n\t" + "# a[i+2] += m[2] * mu\n\t" + "ldr x13, [%[a], 16]\n\t" + "adc x5, x8, xzr\n\t" + "mul x7, x16, x9\n\t" + "adds x12, x12, x6\n\t" + "umulh x8, x16, x9\n\t" + "adc x5, x5, xzr\n\t" + "adds x13, x13, x7\n\t" + "# a[i+3] += m[3] * mu\n\t" + "ldr x10, [%[a], 24]\n\t" + "adc x6, x8, xzr\n\t" + "mul x7, x17, x9\n\t" + "adds x13, x13, x5\n\t" + "umulh x8, x17, x9\n\t" + "adc x6, x6, xzr\n\t" + "adds x10, x10, x7\n\t" + "# a[i+4] += m[4] * mu\n\t" + "ldr x11, [%[a], 32]\n\t" + "adc x5, x8, xzr\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x19, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x19, x9\n\t" + "str x10, [%[a], 24]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+5] += m[5] * mu\n\t" + "ldr x10, [%[a], 40]\n\t" + "adc x6, x8, xzr\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x20, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x20, x9\n\t" + "str x11, [%[a], 32]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+6] += m[6] * mu\n\t" + "ldr x11, [%[a], 48]\n\t" + "adc x5, x8, xzr\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x21, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x21, x9\n\t" + "str x10, [%[a], 40]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+7] += m[7] * mu\n\t" + "ldr x10, [%[a], 56]\n\t" + "adc x6, x8, xzr\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x22, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x22, x9\n\t" + "str x11, [%[a], 48]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+8] += m[8] * mu\n\t" + "ldr x11, [%[a], 64]\n\t" + "adc x5, x8, xzr\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x23, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x23, x9\n\t" + "str x10, [%[a], 56]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+9] += m[9] * mu\n\t" + "ldr x10, [%[a], 72]\n\t" + "adc x6, x8, xzr\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x24, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x24, x9\n\t" + "str x11, [%[a], 64]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+10] += m[10] * mu\n\t" + "ldr x11, [%[a], 80]\n\t" + "adc x5, x8, xzr\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x25, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x25, x9\n\t" + "str x10, [%[a], 72]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+11] += m[11] * mu\n\t" + "ldr x10, [%[a], 88]\n\t" + "adc x6, x8, xzr\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x26, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x26, x9\n\t" + "str x11, [%[a], 80]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+12] += m[12] * mu\n\t" + "ldr x11, [%[a], 96]\n\t" + "adc x5, x8, xzr\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x27, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x27, x9\n\t" + "str x10, [%[a], 88]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+13] += m[13] * mu\n\t" + "ldr x10, [%[a], 104]\n\t" + "adc x6, x8, xzr\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x28, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x28, x9\n\t" + "str x11, [%[a], 96]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+14] += m[14] * mu\n\t" + "ldr x11, [%[a], 112]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 112]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 104]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+15] += m[15] * mu\n\t" + "ldr x10, [%[a], 120]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 120]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "adds x6, x6, x7\n\t" + "adcs x8, x8, %[ca]\n\t" + "str x11, [%[a], 112]\n\t" + "cset %[ca], cs\n\t" + "adds x10, x10, x6\n\t" + "ldr x11, [%[a], 128]\n\t" + "str x10, [%[a], 120]\n\t" + "adcs x11, x11, x8\n\t" + "str x11, [%[a], 128]\n\t" + "adc %[ca], %[ca], xzr\n\t" + "subs x4, x4, 1\n\t" + "add %[a], %[a], 8\n\t" + "bne 1b\n\t" + "stp x12, x13, [%[a], 0]\n\t" + : [ca] "+r" (ca), [a] "+r" (a) + : [m] "r" (m), [mp] "r" (mp) + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" + ); + + sp_2048_cond_sub_16(a - 16, a, m, (sp_digit)0 - ca); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_2048_mont_mul_16(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_2048_mul_16(r, a, b); + sp_2048_mont_reduce_16(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_2048_mont_sqr_16(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_2048_sqr_16(r, a); + sp_2048_mont_reduce_16(r, m, mp); +} + +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_2048_mul_d_16(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "# A[0] * B\n\t" + "ldr x8, [%[a]]\n\t" + "mul x5, %[b], x8\n\t" + "umulh x3, %[b], x8\n\t" + "mov x4, 0\n\t" + "str x5, [%[r]]\n\t" + "mov x5, 0\n\t" + "mov x9, #8\n\t" + "1:\n\t" + "ldr x8, [%[a], x9]\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], x9]\n\t" + "mov x3, x4\n\t" + "mov x4, x5\n\t" + "mov x5, #0\n\t" + "add x9, x9, #8\n\t" + "cmp x9, 128\n\t" + "b.lt 1b\n\t" + "str x3, [%[r], 128]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + ); +#else + __asm__ __volatile__ ( + "# A[0] * B\n\t" + "ldp x8, x9, [%[a]]\n\t" + "mul x3, %[b], x8\n\t" + "umulh x4, %[b], x8\n\t" + "mov x5, 0\n\t" + "# A[1] * B\n\t" + "str x3, [%[r]]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x9\n\t" + "umulh x7, %[b], x9\n\t" + "adds x4, x4, x6\n\t" + "# A[2] * B\n\t" + "ldp x8, x9, [%[a], 16]\n\t" + "str x4, [%[r], 8]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[3] * B\n\t" + "str x5, [%[r], 16]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[4] * B\n\t" + "ldp x8, x9, [%[a], 32]\n\t" + "str x3, [%[r], 24]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[5] * B\n\t" + "str x4, [%[r], 32]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[6] * B\n\t" + "ldp x8, x9, [%[a], 48]\n\t" + "str x5, [%[r], 40]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[7] * B\n\t" + "str x3, [%[r], 48]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[8] * B\n\t" + "ldp x8, x9, [%[a], 64]\n\t" + "str x4, [%[r], 56]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[9] * B\n\t" + "str x5, [%[r], 64]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[10] * B\n\t" + "ldp x8, x9, [%[a], 80]\n\t" + "str x3, [%[r], 72]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[11] * B\n\t" + "str x4, [%[r], 80]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[12] * B\n\t" + "ldp x8, x9, [%[a], 96]\n\t" + "str x5, [%[r], 88]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[13] * B\n\t" + "str x3, [%[r], 96]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[14] * B\n\t" + "ldp x8, x9, [%[a], 112]\n\t" + "str x4, [%[r], 104]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[15] * B\n\t" + "str x5, [%[r], 112]\n\t" + "mul x6, %[b], x9\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "stp x3, x4, [%[r], 120]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + ); +#endif +} + +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + */ +static sp_digit div_2048_word_16(sp_digit d1, sp_digit d0, sp_digit div) +{ + sp_digit r; + + __asm__ __volatile__ ( + "lsr x5, %[div], 32\n\t" + "add x5, x5, 1\n\t" + + "udiv x3, %[d1], x5\n\t" + "lsl x6, x3, 32\n\t" + "mul x4, %[div], x6\n\t" + "umulh x3, %[div], x6\n\t" + "subs %[d0], %[d0], x4\n\t" + "sbc %[d1], %[d1], x3\n\t" + + "udiv x3, %[d1], x5\n\t" + "lsl x3, x3, 32\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "umulh x3, %[div], x3\n\t" + "subs %[d0], %[d0], x4\n\t" + "sbc %[d1], %[d1], x3\n\t" + + "lsr x3, %[d0], 32\n\t" + "orr x3, x3, %[d1], lsl 32\n\t" + + "udiv x3, x3, x5\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "umulh x3, %[div], x3\n\t" + "subs %[d0], %[d0], x4\n\t" + "sbc %[d1], %[d1], x3\n\t" + + "lsr x3, %[d0], 32\n\t" + "orr x3, x3, %[d1], lsl 32\n\t" + + "udiv x3, x3, x5\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "sub %[d0], %[d0], x4\n\t" + + "udiv x3, %[d0], %[div]\n\t" + "add %[r], x6, x3\n\t" + + : [r] "=r" (r) + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) + : "x3", "x4", "x5", "x6" + ); + + return r; +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static int64_t sp_2048_cmp_16(const sp_digit* a, const sp_digit* b) +{ +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov x2, -1\n\t" + "mov x3, 1\n\t" + "mov x4, -1\n\t" + "mov x5, 120\n\t" + "1:\n\t" + "ldr x6, [%[a], x5]\n\t" + "ldr x7, [%[b], x5]\n\t" + "and x6, x6, x4\n\t" + "and x7, x7, x4\n\t" + "subs x6, x6, x7\n\t" + "csel x2, x3, x2, hi\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "subs x5, x5, #8\n\t" + "b.cs 1b\n\t" + "eor %[a], x2, x4\n\t" + : [a] "+r" (a) + : [b] "r" (b) + : "x2", "x3", "x4", "x5", "x6", "x7", "x8" + ); +#else + __asm__ __volatile__ ( + "mov x2, -1\n\t" + "mov x3, 1\n\t" + "mov x4, -1\n\t" + "ldp x5, x6, [%[a], 112]\n\t" + "ldp x7, x8, [%[b], 112]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 96]\n\t" + "ldp x7, x8, [%[b], 96]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 80]\n\t" + "ldp x7, x8, [%[b], 80]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 64]\n\t" + "ldp x7, x8, [%[b], 64]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 48]\n\t" + "ldp x7, x8, [%[b], 48]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 32]\n\t" + "ldp x7, x8, [%[b], 32]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "ldp x7, x8, [%[b], 16]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 0]\n\t" + "ldp x7, x8, [%[b], 0]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "eor %[a], x2, x4\n\t" + : [a] "+r" (a) + : [b] "r" (b) + : "x2", "x3", "x4", "x5", "x6", "x7", "x8" + ); +#endif + + return (int64_t)a; +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_2048_div_16(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[32], t2[17]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[15]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 16); + for (i=15; i>=0; i--) { + r1 = div_2048_word_16(t1[16 + i], t1[16 + i - 1], div); + + sp_2048_mul_d_16(t2, d, r1); + t1[16 + i] += sp_2048_sub_in_place_16(&t1[i], t2); + t1[16 + i] -= t2[16]; + sp_2048_mask_16(t2, d, t1[16 + i]); + t1[16 + i] += sp_2048_add_16(&t1[i], &t1[i], t2); + sp_2048_mask_16(t2, d, t1[16 + i]); + t1[16 + i] += sp_2048_add_16(&t1[i], &t1[i], t2); + } + + r1 = sp_2048_cmp_16(t1, d) >= 0; + sp_2048_cond_sub_16(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_2048_mod_16(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_2048_div_16(a, m, NULL, r); +} + +#ifdef WOLFSSL_SP_SMALL +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_2048_mod_exp_16(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[16][32]; +#else + sp_digit* t[16]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 32, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<16; i++) { + t[i] = td + i * 32; + } +#endif + norm = t[0]; + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_16(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 16U); + if (reduceA != 0) { + err = sp_2048_mod_16(t[1] + 16, a, m); + if (err == MP_OKAY) { + err = sp_2048_mod_16(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 16, a, sizeof(sp_digit) * 16); + err = sp_2048_mod_16(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_2048_mont_sqr_16(t[ 2], t[ 1], m, mp); + sp_2048_mont_mul_16(t[ 3], t[ 2], t[ 1], m, mp); + sp_2048_mont_sqr_16(t[ 4], t[ 2], m, mp); + sp_2048_mont_mul_16(t[ 5], t[ 3], t[ 2], m, mp); + sp_2048_mont_sqr_16(t[ 6], t[ 3], m, mp); + sp_2048_mont_mul_16(t[ 7], t[ 4], t[ 3], m, mp); + sp_2048_mont_sqr_16(t[ 8], t[ 4], m, mp); + sp_2048_mont_mul_16(t[ 9], t[ 5], t[ 4], m, mp); + sp_2048_mont_sqr_16(t[10], t[ 5], m, mp); + sp_2048_mont_mul_16(t[11], t[ 6], t[ 5], m, mp); + sp_2048_mont_sqr_16(t[12], t[ 6], m, mp); + sp_2048_mont_mul_16(t[13], t[ 7], t[ 6], m, mp); + sp_2048_mont_sqr_16(t[14], t[ 7], m, mp); + sp_2048_mont_mul_16(t[15], t[ 8], t[ 7], m, mp); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) { + c = 64; + } + c -= bits % 4; + if (c == 64) { + c = 60; + } + y = (int)(n >> c); + n <<= 64 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 16); + for (; i>=0 || c>=4; ) { + if (c == 0) { + n = e[i--]; + y = n >> 60; + n <<= 4; + c = 60; + } + else if (c < 4) { + y = n >> 60; + n = e[i--]; + c = 4 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + else { + y = (n >> 60) & 0xf; + n <<= 4; + c -= 4; + } + + sp_2048_mont_sqr_16(r, r, m, mp); + sp_2048_mont_sqr_16(r, r, m, mp); + sp_2048_mont_sqr_16(r, r, m, mp); + sp_2048_mont_sqr_16(r, r, m, mp); + + sp_2048_mont_mul_16(r, r, t[y], m, mp); + } + + XMEMSET(&r[16], 0, sizeof(sp_digit) * 16U); + sp_2048_mont_reduce_16(r, m, mp); + + mask = 0 - (sp_2048_cmp_16(r, m) >= 0); + sp_2048_cond_sub_16(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#else +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_2048_mod_exp_16(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][32]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 32, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) { + t[i] = td + i * 32; + } +#endif + norm = t[0]; + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_16(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 16U); + if (reduceA != 0) { + err = sp_2048_mod_16(t[1] + 16, a, m); + if (err == MP_OKAY) { + err = sp_2048_mod_16(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 16, a, sizeof(sp_digit) * 16); + err = sp_2048_mod_16(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_2048_mont_sqr_16(t[ 2], t[ 1], m, mp); + sp_2048_mont_mul_16(t[ 3], t[ 2], t[ 1], m, mp); + sp_2048_mont_sqr_16(t[ 4], t[ 2], m, mp); + sp_2048_mont_mul_16(t[ 5], t[ 3], t[ 2], m, mp); + sp_2048_mont_sqr_16(t[ 6], t[ 3], m, mp); + sp_2048_mont_mul_16(t[ 7], t[ 4], t[ 3], m, mp); + sp_2048_mont_sqr_16(t[ 8], t[ 4], m, mp); + sp_2048_mont_mul_16(t[ 9], t[ 5], t[ 4], m, mp); + sp_2048_mont_sqr_16(t[10], t[ 5], m, mp); + sp_2048_mont_mul_16(t[11], t[ 6], t[ 5], m, mp); + sp_2048_mont_sqr_16(t[12], t[ 6], m, mp); + sp_2048_mont_mul_16(t[13], t[ 7], t[ 6], m, mp); + sp_2048_mont_sqr_16(t[14], t[ 7], m, mp); + sp_2048_mont_mul_16(t[15], t[ 8], t[ 7], m, mp); + sp_2048_mont_sqr_16(t[16], t[ 8], m, mp); + sp_2048_mont_mul_16(t[17], t[ 9], t[ 8], m, mp); + sp_2048_mont_sqr_16(t[18], t[ 9], m, mp); + sp_2048_mont_mul_16(t[19], t[10], t[ 9], m, mp); + sp_2048_mont_sqr_16(t[20], t[10], m, mp); + sp_2048_mont_mul_16(t[21], t[11], t[10], m, mp); + sp_2048_mont_sqr_16(t[22], t[11], m, mp); + sp_2048_mont_mul_16(t[23], t[12], t[11], m, mp); + sp_2048_mont_sqr_16(t[24], t[12], m, mp); + sp_2048_mont_mul_16(t[25], t[13], t[12], m, mp); + sp_2048_mont_sqr_16(t[26], t[13], m, mp); + sp_2048_mont_mul_16(t[27], t[14], t[13], m, mp); + sp_2048_mont_sqr_16(t[28], t[14], m, mp); + sp_2048_mont_mul_16(t[29], t[15], t[14], m, mp); + sp_2048_mont_sqr_16(t[30], t[15], m, mp); + sp_2048_mont_mul_16(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) { + c = 64; + } + c -= bits % 5; + if (c == 64) { + c = 59; + } + y = (int)(n >> c); + n <<= 64 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 16); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 59; + n <<= 5; + c = 59; + } + else if (c < 5) { + y = n >> 59; + n = e[i--]; + c = 5 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + else { + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_2048_mont_sqr_16(r, r, m, mp); + sp_2048_mont_sqr_16(r, r, m, mp); + sp_2048_mont_sqr_16(r, r, m, mp); + sp_2048_mont_sqr_16(r, r, m, mp); + sp_2048_mont_sqr_16(r, r, m, mp); + + sp_2048_mont_mul_16(r, r, t[y], m, mp); + } + + XMEMSET(&r[16], 0, sizeof(sp_digit) * 16U); + sp_2048_mont_reduce_16(r, m, mp); + + mask = 0 - (sp_2048_cmp_16(r, m) >= 0); + sp_2048_cond_sub_16(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* WOLFSSL_SP_SMALL */ + +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 2048 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_2048_mont_norm_32(sp_digit* r, const sp_digit* m) +{ + XMEMSET(r, 0, sizeof(sp_digit) * 32); + + /* r = 2^n mod m */ + sp_2048_sub_in_place_32(r, m); +} + +#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */ +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov x8, #0\n\t" + "1:\n\t" + "subs %[c], xzr, %[c]\n\t" + "ldr x4, [%[a], x8]\n\t" + "ldr x5, [%[b], x8]\n\t" + "and x5, x5, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "csetm %[c], cc\n\t" + "str x4, [%[r], x8]\n\t" + "add x8, x8, #8\n\t" + "cmp x8, 256\n\t" + "b.lt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + ); + + return c; +#else + __asm__ __volatile__ ( + + "ldp x5, x7, [%[b], 0]\n\t" + "ldp x11, x12, [%[b], 16]\n\t" + "ldp x4, x6, [%[a], 0]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 16]\n\t" + "and x7, x7, %[m]\n\t" + "subs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 0]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 16]\n\t" + "ldp x5, x7, [%[b], 32]\n\t" + "ldp x11, x12, [%[b], 48]\n\t" + "ldp x4, x6, [%[a], 32]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 48]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 32]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 48]\n\t" + "ldp x5, x7, [%[b], 64]\n\t" + "ldp x11, x12, [%[b], 80]\n\t" + "ldp x4, x6, [%[a], 64]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 80]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 64]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 80]\n\t" + "ldp x5, x7, [%[b], 96]\n\t" + "ldp x11, x12, [%[b], 112]\n\t" + "ldp x4, x6, [%[a], 96]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 112]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 96]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 112]\n\t" + "ldp x5, x7, [%[b], 128]\n\t" + "ldp x11, x12, [%[b], 144]\n\t" + "ldp x4, x6, [%[a], 128]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 144]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 128]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 144]\n\t" + "ldp x5, x7, [%[b], 160]\n\t" + "ldp x11, x12, [%[b], 176]\n\t" + "ldp x4, x6, [%[a], 160]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 176]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 160]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 176]\n\t" + "ldp x5, x7, [%[b], 192]\n\t" + "ldp x11, x12, [%[b], 208]\n\t" + "ldp x4, x6, [%[a], 192]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 208]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 192]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 208]\n\t" + "ldp x5, x7, [%[b], 224]\n\t" + "ldp x11, x12, [%[b], 240]\n\t" + "ldp x4, x6, [%[a], 224]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 240]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 224]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 240]\n\t" + "csetm %[r], cc\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + ); + + return (sp_digit)r; +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Reduce the number back to 2048 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_digit ca = 0; + + __asm__ __volatile__ ( + "ldp x14, x15, [%[m], 0]\n\t" + "ldp x16, x17, [%[m], 16]\n\t" + "ldp x19, x20, [%[m], 32]\n\t" + "ldp x21, x22, [%[m], 48]\n\t" + "ldp x23, x24, [%[m], 64]\n\t" + "ldp x25, x26, [%[m], 80]\n\t" + "ldp x27, x28, [%[m], 96]\n\t" + "# i = 32\n\t" + "mov x4, 32\n\t" + "ldp x12, x13, [%[a], 0]\n\t" + "\n1:\n\t" + "# mu = a[i] * mp\n\t" + "mul x9, %[mp], x12\n\t" + "# a[i+0] += m[0] * mu\n\t" + "mul x7, x14, x9\n\t" + "umulh x8, x14, x9\n\t" + "adds x12, x12, x7\n\t" + "# a[i+1] += m[1] * mu\n\t" + "mul x7, x15, x9\n\t" + "adc x6, x8, xzr\n\t" + "umulh x8, x15, x9\n\t" + "adds x12, x13, x7\n\t" + "# a[i+2] += m[2] * mu\n\t" + "ldr x13, [%[a], 16]\n\t" + "adc x5, x8, xzr\n\t" + "mul x7, x16, x9\n\t" + "adds x12, x12, x6\n\t" + "umulh x8, x16, x9\n\t" + "adc x5, x5, xzr\n\t" + "adds x13, x13, x7\n\t" + "# a[i+3] += m[3] * mu\n\t" + "ldr x10, [%[a], 24]\n\t" + "adc x6, x8, xzr\n\t" + "mul x7, x17, x9\n\t" + "adds x13, x13, x5\n\t" + "umulh x8, x17, x9\n\t" + "adc x6, x6, xzr\n\t" + "adds x10, x10, x7\n\t" + "# a[i+4] += m[4] * mu\n\t" + "ldr x11, [%[a], 32]\n\t" + "adc x5, x8, xzr\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x19, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x19, x9\n\t" + "str x10, [%[a], 24]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+5] += m[5] * mu\n\t" + "ldr x10, [%[a], 40]\n\t" + "adc x6, x8, xzr\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x20, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x20, x9\n\t" + "str x11, [%[a], 32]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+6] += m[6] * mu\n\t" + "ldr x11, [%[a], 48]\n\t" + "adc x5, x8, xzr\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x21, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x21, x9\n\t" + "str x10, [%[a], 40]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+7] += m[7] * mu\n\t" + "ldr x10, [%[a], 56]\n\t" + "adc x6, x8, xzr\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x22, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x22, x9\n\t" + "str x11, [%[a], 48]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+8] += m[8] * mu\n\t" + "ldr x11, [%[a], 64]\n\t" + "adc x5, x8, xzr\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x23, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x23, x9\n\t" + "str x10, [%[a], 56]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+9] += m[9] * mu\n\t" + "ldr x10, [%[a], 72]\n\t" + "adc x6, x8, xzr\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x24, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x24, x9\n\t" + "str x11, [%[a], 64]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+10] += m[10] * mu\n\t" + "ldr x11, [%[a], 80]\n\t" + "adc x5, x8, xzr\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x25, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x25, x9\n\t" + "str x10, [%[a], 72]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+11] += m[11] * mu\n\t" + "ldr x10, [%[a], 88]\n\t" + "adc x6, x8, xzr\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x26, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x26, x9\n\t" + "str x11, [%[a], 80]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+12] += m[12] * mu\n\t" + "ldr x11, [%[a], 96]\n\t" + "adc x5, x8, xzr\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x27, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x27, x9\n\t" + "str x10, [%[a], 88]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+13] += m[13] * mu\n\t" + "ldr x10, [%[a], 104]\n\t" + "adc x6, x8, xzr\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x28, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x28, x9\n\t" + "str x11, [%[a], 96]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+14] += m[14] * mu\n\t" + "ldr x11, [%[a], 112]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 112]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 104]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+15] += m[15] * mu\n\t" + "ldr x10, [%[a], 120]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 120]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 112]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+16] += m[16] * mu\n\t" + "ldr x11, [%[a], 128]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 128]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 120]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+17] += m[17] * mu\n\t" + "ldr x10, [%[a], 136]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 136]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 128]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+18] += m[18] * mu\n\t" + "ldr x11, [%[a], 144]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 144]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 136]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+19] += m[19] * mu\n\t" + "ldr x10, [%[a], 152]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 152]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 144]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+20] += m[20] * mu\n\t" + "ldr x11, [%[a], 160]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 160]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 152]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+21] += m[21] * mu\n\t" + "ldr x10, [%[a], 168]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 168]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 160]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+22] += m[22] * mu\n\t" + "ldr x11, [%[a], 176]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 176]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 168]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+23] += m[23] * mu\n\t" + "ldr x10, [%[a], 184]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 184]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 176]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+24] += m[24] * mu\n\t" + "ldr x11, [%[a], 192]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 192]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 184]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+25] += m[25] * mu\n\t" + "ldr x10, [%[a], 200]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 200]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 192]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+26] += m[26] * mu\n\t" + "ldr x11, [%[a], 208]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 208]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 200]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+27] += m[27] * mu\n\t" + "ldr x10, [%[a], 216]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 216]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 208]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+28] += m[28] * mu\n\t" + "ldr x11, [%[a], 224]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 224]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 216]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+29] += m[29] * mu\n\t" + "ldr x10, [%[a], 232]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 232]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 224]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+30] += m[30] * mu\n\t" + "ldr x11, [%[a], 240]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 240]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 232]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+31] += m[31] * mu\n\t" + "ldr x10, [%[a], 248]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 248]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "adds x6, x6, x7\n\t" + "adcs x8, x8, %[ca]\n\t" + "str x11, [%[a], 240]\n\t" + "cset %[ca], cs\n\t" + "adds x10, x10, x6\n\t" + "ldr x11, [%[a], 256]\n\t" + "str x10, [%[a], 248]\n\t" + "adcs x11, x11, x8\n\t" + "str x11, [%[a], 256]\n\t" + "adc %[ca], %[ca], xzr\n\t" + "subs x4, x4, 1\n\t" + "add %[a], %[a], 8\n\t" + "bne 1b\n\t" + "stp x12, x13, [%[a], 0]\n\t" + : [ca] "+r" (ca), [a] "+r" (a) + : [m] "r" (m), [mp] "r" (mp) + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" + ); + + sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - ca); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_2048_mont_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_2048_mul_32(r, a, b); + sp_2048_mont_reduce_32(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_2048_mont_sqr_32(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_2048_sqr_32(r, a); + sp_2048_mont_reduce_32(r, m, mp); +} + +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + */ +static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div) +{ + sp_digit r; + + __asm__ __volatile__ ( + "lsr x5, %[div], 32\n\t" + "add x5, x5, 1\n\t" + + "udiv x3, %[d1], x5\n\t" + "lsl x6, x3, 32\n\t" + "mul x4, %[div], x6\n\t" + "umulh x3, %[div], x6\n\t" + "subs %[d0], %[d0], x4\n\t" + "sbc %[d1], %[d1], x3\n\t" + + "udiv x3, %[d1], x5\n\t" + "lsl x3, x3, 32\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "umulh x3, %[div], x3\n\t" + "subs %[d0], %[d0], x4\n\t" + "sbc %[d1], %[d1], x3\n\t" + + "lsr x3, %[d0], 32\n\t" + "orr x3, x3, %[d1], lsl 32\n\t" + + "udiv x3, x3, x5\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "umulh x3, %[div], x3\n\t" + "subs %[d0], %[d0], x4\n\t" + "sbc %[d1], %[d1], x3\n\t" + + "lsr x3, %[d0], 32\n\t" + "orr x3, x3, %[d1], lsl 32\n\t" + + "udiv x3, x3, x5\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "sub %[d0], %[d0], x4\n\t" + + "udiv x3, %[d0], %[div]\n\t" + "add %[r], x6, x3\n\t" + + : [r] "=r" (r) + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) + : "x3", "x4", "x5", "x6" + ); + + return r; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<32; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 32; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static int64_t sp_2048_cmp_32(const sp_digit* a, const sp_digit* b) +{ +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov x2, -1\n\t" + "mov x3, 1\n\t" + "mov x4, -1\n\t" + "mov x5, 248\n\t" + "1:\n\t" + "ldr x6, [%[a], x5]\n\t" + "ldr x7, [%[b], x5]\n\t" + "and x6, x6, x4\n\t" + "and x7, x7, x4\n\t" + "subs x6, x6, x7\n\t" + "csel x2, x3, x2, hi\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "subs x5, x5, #8\n\t" + "b.cs 1b\n\t" + "eor %[a], x2, x4\n\t" + : [a] "+r" (a) + : [b] "r" (b) + : "x2", "x3", "x4", "x5", "x6", "x7", "x8" + ); +#else + __asm__ __volatile__ ( + "mov x2, -1\n\t" + "mov x3, 1\n\t" + "mov x4, -1\n\t" + "ldp x5, x6, [%[a], 240]\n\t" + "ldp x7, x8, [%[b], 240]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 224]\n\t" + "ldp x7, x8, [%[b], 224]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 208]\n\t" + "ldp x7, x8, [%[b], 208]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 192]\n\t" + "ldp x7, x8, [%[b], 192]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 176]\n\t" + "ldp x7, x8, [%[b], 176]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 160]\n\t" + "ldp x7, x8, [%[b], 160]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 144]\n\t" + "ldp x7, x8, [%[b], 144]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 128]\n\t" + "ldp x7, x8, [%[b], 128]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 112]\n\t" + "ldp x7, x8, [%[b], 112]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 96]\n\t" + "ldp x7, x8, [%[b], 96]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 80]\n\t" + "ldp x7, x8, [%[b], 80]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 64]\n\t" + "ldp x7, x8, [%[b], 64]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 48]\n\t" + "ldp x7, x8, [%[b], 48]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 32]\n\t" + "ldp x7, x8, [%[b], 32]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "ldp x7, x8, [%[b], 16]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 0]\n\t" + "ldp x7, x8, [%[b], 0]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "eor %[a], x2, x4\n\t" + : [a] "+r" (a) + : [b] "r" (b) + : "x2", "x3", "x4", "x5", "x6", "x7", "x8" + ); +#endif + + return (int64_t)a; +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[64], t2[33]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[31]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 32); + for (i=31; i>=0; i--) { + r1 = div_2048_word_32(t1[32 + i], t1[32 + i - 1], div); + + sp_2048_mul_d_32(t2, d, r1); + t1[32 + i] += sp_2048_sub_in_place_32(&t1[i], t2); + t1[32 + i] -= t2[32]; + sp_2048_mask_32(t2, d, t1[32 + i]); + t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2); + sp_2048_mask_32(t2, d, t1[32 + i]); + t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2); + } + + r1 = sp_2048_cmp_32(t1, d) >= 0; + sp_2048_cond_sub_32(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_2048_mod_32(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_2048_div_32(a, m, NULL, r); +} + +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_2048_sub_32(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "add x11, %[a], 256\n\t" + "\n1:\n\t" + "subs %[c], xzr, %[c]\n\t" + "ldp x3, x4, [%[a]], #16\n\t" + "ldp x5, x6, [%[a]], #16\n\t" + "ldp x7, x8, [%[b]], #16\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x9, x10, [%[b]], #16\n\t" + "sbcs x4, x4, x8\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r]], #16\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r]], #16\n\t" + "csetm %[c], cc\n\t" + "cmp %[a], x11\n\t" + "b.ne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + ); + + return c; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_2048_sub_32(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[a], 0]\n\t" + "ldp x7, x8, [%[b], 0]\n\t" + "subs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 16]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "ldp x3, x4, [%[a], 32]\n\t" + "ldp x7, x8, [%[b], 32]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 48]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 48]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 32]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 48]\n\t" + "ldp x3, x4, [%[a], 64]\n\t" + "ldp x7, x8, [%[b], 64]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 80]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 80]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 64]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 80]\n\t" + "ldp x3, x4, [%[a], 96]\n\t" + "ldp x7, x8, [%[b], 96]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 112]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 112]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 96]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 112]\n\t" + "ldp x3, x4, [%[a], 128]\n\t" + "ldp x7, x8, [%[b], 128]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 144]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 144]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 128]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 144]\n\t" + "ldp x3, x4, [%[a], 160]\n\t" + "ldp x7, x8, [%[b], 160]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 176]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 176]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 160]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 176]\n\t" + "ldp x3, x4, [%[a], 192]\n\t" + "ldp x7, x8, [%[b], 192]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 208]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 208]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 192]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 208]\n\t" + "ldp x3, x4, [%[a], 224]\n\t" + "ldp x7, x8, [%[b], 224]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 240]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 240]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 224]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 240]\n\t" + "csetm %[r], cc\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + ); + + return (sp_digit)r; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_2048_div_32_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[64], t2[33]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[31]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 32); + for (i=31; i>=0; i--) { + r1 = div_2048_word_32(t1[32 + i], t1[32 + i - 1], div); + + sp_2048_mul_d_32(t2, d, r1); + t1[32 + i] += sp_2048_sub_in_place_32(&t1[i], t2); + t1[32 + i] -= t2[32]; + if (t1[32 + i] != 0) { + t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], d); + if (t1[32 + i] != 0) + t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], d); + } + } + + for (i = 31; i > 0; i--) { + if (t1[i] != d[i]) + break; + } + if (t1[i] >= d[i]) { + sp_2048_sub_32(r, t1, d); + } + else { + XMEMCPY(r, t1, sizeof(*t1) * 32); + } + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_2048_mod_32_cond(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_2048_div_32_cond(a, m, NULL, r); +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \ + defined(WOLFSSL_HAVE_SP_DH) +#ifdef WOLFSSL_SP_SMALL +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[16][64]; +#else + sp_digit* t[16]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 64, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<16; i++) { + t[i] = td + i * 64; + } +#endif + norm = t[0]; + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_32(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 32U); + if (reduceA != 0) { + err = sp_2048_mod_32(t[1] + 32, a, m); + if (err == MP_OKAY) { + err = sp_2048_mod_32(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32); + err = sp_2048_mod_32(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp); + sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp); + sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp); + sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp); + sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp); + sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp); + sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp); + sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp); + sp_2048_mont_sqr_32(t[10], t[ 5], m, mp); + sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp); + sp_2048_mont_sqr_32(t[12], t[ 6], m, mp); + sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp); + sp_2048_mont_sqr_32(t[14], t[ 7], m, mp); + sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) { + c = 64; + } + c -= bits % 4; + if (c == 64) { + c = 60; + } + y = (int)(n >> c); + n <<= 64 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 32); + for (; i>=0 || c>=4; ) { + if (c == 0) { + n = e[i--]; + y = n >> 60; + n <<= 4; + c = 60; + } + else if (c < 4) { + y = n >> 60; + n = e[i--]; + c = 4 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + else { + y = (n >> 60) & 0xf; + n <<= 4; + c -= 4; + } + + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + + sp_2048_mont_mul_32(r, r, t[y], m, mp); + } + + XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U); + sp_2048_mont_reduce_32(r, m, mp); + + mask = 0 - (sp_2048_cmp_32(r, m) >= 0); + sp_2048_cond_sub_32(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#else +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][64]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 64, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) { + t[i] = td + i * 64; + } +#endif + norm = t[0]; + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_32(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 32U); + if (reduceA != 0) { + err = sp_2048_mod_32(t[1] + 32, a, m); + if (err == MP_OKAY) { + err = sp_2048_mod_32(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32); + err = sp_2048_mod_32(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp); + sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp); + sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp); + sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp); + sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp); + sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp); + sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp); + sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp); + sp_2048_mont_sqr_32(t[10], t[ 5], m, mp); + sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp); + sp_2048_mont_sqr_32(t[12], t[ 6], m, mp); + sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp); + sp_2048_mont_sqr_32(t[14], t[ 7], m, mp); + sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp); + sp_2048_mont_sqr_32(t[16], t[ 8], m, mp); + sp_2048_mont_mul_32(t[17], t[ 9], t[ 8], m, mp); + sp_2048_mont_sqr_32(t[18], t[ 9], m, mp); + sp_2048_mont_mul_32(t[19], t[10], t[ 9], m, mp); + sp_2048_mont_sqr_32(t[20], t[10], m, mp); + sp_2048_mont_mul_32(t[21], t[11], t[10], m, mp); + sp_2048_mont_sqr_32(t[22], t[11], m, mp); + sp_2048_mont_mul_32(t[23], t[12], t[11], m, mp); + sp_2048_mont_sqr_32(t[24], t[12], m, mp); + sp_2048_mont_mul_32(t[25], t[13], t[12], m, mp); + sp_2048_mont_sqr_32(t[26], t[13], m, mp); + sp_2048_mont_mul_32(t[27], t[14], t[13], m, mp); + sp_2048_mont_sqr_32(t[28], t[14], m, mp); + sp_2048_mont_mul_32(t[29], t[15], t[14], m, mp); + sp_2048_mont_sqr_32(t[30], t[15], m, mp); + sp_2048_mont_mul_32(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) { + c = 64; + } + c -= bits % 5; + if (c == 64) { + c = 59; + } + y = (int)(n >> c); + n <<= 64 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 32); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 59; + n <<= 5; + c = 59; + } + else if (c < 5) { + y = n >> 59; + n = e[i--]; + c = 5 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + else { + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + + sp_2048_mont_mul_32(r, r, t[y], m, mp); + } + + XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U); + sp_2048_mont_reduce_32(r, m, mp); + + mask = 0 - (sp_2048_cmp_32(r, m) >= 0); + sp_2048_cond_sub_32(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* WOLFSSL_SP_SMALL */ +#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */ + +#ifdef WOLFSSL_HAVE_SP_RSA +/* RSA public key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * em Public exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 256 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPublic_2048(const byte* in, word32 inLen, mp_int* em, mp_int* mm, + byte* out, word32* outLen) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit a[64], m[32], r[64]; +#else + sp_digit* d = NULL; + sp_digit* a; + sp_digit* m; + sp_digit* r; +#endif + sp_digit *ah; + sp_digit e[1]; + int err = MP_OKAY; + + if (*outLen < 256) + err = MP_TO_E; + if (err == MP_OKAY && (mp_count_bits(em) > 64 || inLen > 256 || + mp_count_bits(mm) != 2048)) + err = MP_READ_E; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + a = d; + r = a + 32 * 2; + m = r + 32 * 2; + } +#endif + + if (err == MP_OKAY) { + ah = a + 32; + + sp_2048_from_bin(ah, 32, in, inLen); +#if DIGIT_BIT >= 64 + e[0] = em->dp[0]; +#else + e[0] = em->dp[0]; + if (em->used > 1) { + e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + } +#endif + if (e[0] == 0) { + err = MP_EXPTMOD_E; + } + } + if (err == MP_OKAY) { + sp_2048_from_mp(m, 32, mm); + + if (e[0] == 0x3) { + if (err == MP_OKAY) { + sp_2048_sqr_32(r, ah); + err = sp_2048_mod_32_cond(r, r, m); + } + if (err == MP_OKAY) { + sp_2048_mul_32(r, ah, r); + err = sp_2048_mod_32_cond(r, r, m); + } + } + else { + int i; + sp_digit mp; + + sp_2048_mont_setup(m, &mp); + + /* Convert to Montgomery form. */ + XMEMSET(a, 0, sizeof(sp_digit) * 32); + err = sp_2048_mod_32_cond(a, a, m); + + if (err == MP_OKAY) { + for (i = 63; i >= 0; i--) { + if (e[0] >> i) { + break; + } + } + + XMEMCPY(r, a, sizeof(sp_digit) * 32); + for (i--; i>=0; i--) { + sp_2048_mont_sqr_32(r, r, m, mp); + if (((e[0] >> i) & 1) == 1) { + sp_2048_mont_mul_32(r, r, a, m, mp); + } + } + XMEMSET(&r[32], 0, sizeof(sp_digit) * 32); + sp_2048_mont_reduce_32(r, m, mp); + + for (i = 31; i > 0; i--) { + if (r[i] != m[i]) { + break; + } + } + if (r[i] >= m[i]) { + sp_2048_sub_in_place_32(r, m); + } + } + } + } + + if (err == MP_OKAY) { + sp_2048_to_bin(r, out); + *outLen = 256; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } +#endif + + return err; +} + +#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) + sp_digit* a; + sp_digit* d = NULL; + sp_digit* m; + sp_digit* r; + int err = MP_OKAY; + + (void)pm; + (void)qm; + (void)dpm; + (void)dqm; + (void)qim; + + if (*outLen < 256U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(dm) > 2048) { + err = MP_READ_E; + } + if (inLen > 256) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 2048) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 4, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) { + err = MEMORY_E; + } + } + if (err == MP_OKAY) { + a = d + 32; + m = a + 64; + r = a; + + sp_2048_from_bin(a, 32, in, inLen); + sp_2048_from_mp(d, 32, dm); + sp_2048_from_mp(m, 32, mm); + err = sp_2048_mod_exp_32(r, a, d, 2048, m, 0); + } + if (err == MP_OKAY) { + sp_2048_to_bin(r, out); + *outLen = 256; + } + + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 32); + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static sp_digit sp_2048_cond_add_16(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov x8, #0\n\t" + "1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldr x4, [%[a], x8]\n\t" + "ldr x5, [%[b], x8]\n\t" + "and x5, x5, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "cset %[c], cs\n\t" + "str x4, [%[r], x8]\n\t" + "add x8, x8, #8\n\t" + "cmp x8, 128\n\t" + "b.lt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + ); + + return c; +#else + __asm__ __volatile__ ( + + "ldp x5, x7, [%[b], 0]\n\t" + "ldp x11, x12, [%[b], 16]\n\t" + "ldp x4, x6, [%[a], 0]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 16]\n\t" + "and x7, x7, %[m]\n\t" + "adds x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "adcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 0]\n\t" + "adcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 16]\n\t" + "ldp x5, x7, [%[b], 32]\n\t" + "ldp x11, x12, [%[b], 48]\n\t" + "ldp x4, x6, [%[a], 32]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 48]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "adcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 32]\n\t" + "adcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 48]\n\t" + "ldp x5, x7, [%[b], 64]\n\t" + "ldp x11, x12, [%[b], 80]\n\t" + "ldp x4, x6, [%[a], 64]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 80]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "adcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 64]\n\t" + "adcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 80]\n\t" + "ldp x5, x7, [%[b], 96]\n\t" + "ldp x11, x12, [%[b], 112]\n\t" + "ldp x4, x6, [%[a], 96]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 112]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "adcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 96]\n\t" + "adcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 112]\n\t" + "cset %[r], cs\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + ); + + return (sp_digit)r; +#endif /* WOLFSSL_SP_SMALL */ +} + +/* RSA private key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * pm First prime. + * qm Second prime. + * dpm First prime's CRT exponent. + * dqm Second prime's CRT exponent. + * qim Inverse of second prime mod p. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 256 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, + mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm, + byte* out, word32* outLen) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit a[32 * 2]; + sp_digit p[16], q[16], dp[16]; + sp_digit tmpa[32], tmpb[32]; +#else + sp_digit* t = NULL; + sp_digit* a; + sp_digit* p; + sp_digit* q; + sp_digit* dp; + sp_digit* tmpa; + sp_digit* tmpb; +#endif + sp_digit* r; + sp_digit* qi; + sp_digit* dq; + sp_digit c; + int err = MP_OKAY; + + (void)dm; + (void)mm; + + if (*outLen < 256) + err = MP_TO_E; + if (err == MP_OKAY && (inLen > 256 || mp_count_bits(mm) != 2048)) + err = MP_READ_E; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 11, NULL, + DYNAMIC_TYPE_RSA); + if (t == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + a = t; + p = a + 32 * 2; + q = p + 16; + qi = dq = dp = q + 16; + tmpa = qi + 16; + tmpb = tmpa + 32; + + r = t + 32; + } +#else +#endif + + if (err == MP_OKAY) { +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + r = a; + qi = dq = dp; +#endif + sp_2048_from_bin(a, 32, in, inLen); + sp_2048_from_mp(p, 16, pm); + sp_2048_from_mp(q, 16, qm); + sp_2048_from_mp(dp, 16, dpm); + + err = sp_2048_mod_exp_16(tmpa, a, dp, 1024, p, 1); + } + if (err == MP_OKAY) { + sp_2048_from_mp(dq, 16, dqm); + err = sp_2048_mod_exp_16(tmpb, a, dq, 1024, q, 1); + } + + if (err == MP_OKAY) { + c = sp_2048_sub_in_place_16(tmpa, tmpb); + c += sp_2048_cond_add_16(tmpa, tmpa, p, c); + sp_2048_cond_add_16(tmpa, tmpa, p, c); + + sp_2048_from_mp(qi, 16, qim); + sp_2048_mul_16(tmpa, tmpa, qi); + err = sp_2048_mod_16(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { + sp_2048_mul_16(tmpa, q, tmpa); + XMEMSET(&tmpb[16], 0, sizeof(sp_digit) * 16); + sp_2048_add_32(r, tmpb, tmpa); + + sp_2048_to_bin(r, out); + *outLen = 256; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_digit) * 16 * 11); + XFREE(t, NULL, DYNAMIC_TYPE_RSA); + } +#else + XMEMSET(tmpa, 0, sizeof(tmpa)); + XMEMSET(tmpb, 0, sizeof(tmpb)); + XMEMSET(p, 0, sizeof(p)); + XMEMSET(q, 0, sizeof(q)); + XMEMSET(dp, 0, sizeof(dp)); +#endif + + return err; +} +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ +#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */ +#endif /* WOLFSSL_HAVE_SP_RSA */ +#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY)) +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_2048_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (2048 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 64 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 32); + r->used = 32; + mp_clamp(r); +#elif DIGIT_BIT < 64 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 32; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 64) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 64 - s; + } + r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 32; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 64 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 64 - s; + } + else { + s += 64; + } + } + r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ + int err = MP_OKAY; + sp_digit b[64], e[32], m[32]; + sp_digit* r = b; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 2048) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 2048) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 2048) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_2048_from_mp(b, 32, base); + sp_2048_from_mp(e, 32, exp); + sp_2048_from_mp(m, 32, mod); + + err = sp_2048_mod_exp_32(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + err = sp_2048_to_mp(r, res); + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} + +#ifdef WOLFSSL_HAVE_SP_DH + +#ifdef HAVE_FFDHE_2048 +static void sp_2048_lshift_32(sp_digit* r, sp_digit* a, byte n) +{ + __asm__ __volatile__ ( + "mov x6, 63\n\t" + "sub x6, x6, %[n]\n\t" + "ldr x3, [%[a], 248]\n\t" + "lsr x4, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x4, x4, x6\n\t" + "ldr x2, [%[a], 240]\n\t" + "str x4, [%[r], 256]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 232]\n\t" + "str x3, [%[r], 248]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 224]\n\t" + "str x2, [%[r], 240]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 216]\n\t" + "str x4, [%[r], 232]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 208]\n\t" + "str x3, [%[r], 224]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 200]\n\t" + "str x2, [%[r], 216]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 192]\n\t" + "str x4, [%[r], 208]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 184]\n\t" + "str x3, [%[r], 200]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 176]\n\t" + "str x2, [%[r], 192]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 168]\n\t" + "str x4, [%[r], 184]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 160]\n\t" + "str x3, [%[r], 176]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 152]\n\t" + "str x2, [%[r], 168]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 144]\n\t" + "str x4, [%[r], 160]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 136]\n\t" + "str x3, [%[r], 152]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 128]\n\t" + "str x2, [%[r], 144]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 120]\n\t" + "str x4, [%[r], 136]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 112]\n\t" + "str x3, [%[r], 128]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 104]\n\t" + "str x2, [%[r], 120]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 96]\n\t" + "str x4, [%[r], 112]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 88]\n\t" + "str x3, [%[r], 104]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 80]\n\t" + "str x2, [%[r], 96]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 72]\n\t" + "str x4, [%[r], 88]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 64]\n\t" + "str x3, [%[r], 80]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 56]\n\t" + "str x2, [%[r], 72]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 48]\n\t" + "str x4, [%[r], 64]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 40]\n\t" + "str x3, [%[r], 56]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 32]\n\t" + "str x2, [%[r], 48]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 24]\n\t" + "str x4, [%[r], 40]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 16]\n\t" + "str x3, [%[r], 32]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 8]\n\t" + "str x2, [%[r], 24]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 0]\n\t" + "str x4, [%[r], 16]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "str x2, [%[r]]\n\t" + "str x3, [%[r], 8]\n\t" + : + : [r] "r" (r), [a] "r" (a), [n] "r" (n) + : "memory", "x2", "x3", "x4", "x5", "x6" + ); +} + +/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m) + * + * r A single precision number that is the result of the operation. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_2048_mod_exp_2_32(sp_digit* r, const sp_digit* e, int bits, + const sp_digit* m) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit nd[64]; + sp_digit td[33]; +#else + sp_digit* td; +#endif + sp_digit* norm; + sp_digit* tmp; + sp_digit mp = 1; + sp_digit n, o; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 97, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + norm = td; + tmp = td + 64; +#else + norm = nd; + tmp = td; +#endif + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_32(norm, m); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) { + c = 64; + } + c -= bits % 6; + if (c == 64) { + c = 58; + } + y = (int)(n >> c); + n <<= 64 - c; + sp_2048_lshift_32(r, norm, y); + for (; i>=0 || c>=6; ) { + if (c == 0) { + n = e[i--]; + y = n >> 58; + n <<= 6; + c = 58; + } + else if (c < 6) { + y = n >> 58; + n = e[i--]; + c = 6 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + else { + y = (n >> 58) & 0x3f; + n <<= 6; + c -= 6; + } + + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + + sp_2048_lshift_32(r, r, y); + sp_2048_mul_d_32(tmp, norm, r[32]); + r[32] = 0; + o = sp_2048_add_32(r, r, tmp); + sp_2048_cond_sub_32(r, r, m, (sp_digit)0 - o); + } + + XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U); + sp_2048_mont_reduce_32(r, m, mp); + + mask = 0 - (sp_2048_cmp_32(r, m) >= 0); + sp_2048_cond_sub_32(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* HAVE_FFDHE_2048 */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. + * exp Array of bytes that is the exponent. + * expLen Length of data, in bytes, in exponent. + * mod Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 256 bytes long. + * outLen Length, in bytes, of exponentiation result. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen, + mp_int* mod, byte* out, word32* outLen) +{ + int err = MP_OKAY; + sp_digit b[64], e[32], m[32]; + sp_digit* r = b; + word32 i; + + if (mp_count_bits(base) > 2048) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expLen > 256) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 2048) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_2048_from_mp(b, 32, base); + sp_2048_from_bin(e, 32, exp, expLen); + sp_2048_from_mp(m, 32, mod); + + #ifdef HAVE_FFDHE_2048 + if (base->used == 1 && base->dp[0] == 2 && m[31] == (sp_digit)-1) + err = sp_2048_mod_exp_2_32(r, e, expLen * 8, m); + else + #endif + err = sp_2048_mod_exp_32(r, b, e, expLen * 8, m, 0); + + } + + if (err == MP_OKAY) { + sp_2048_to_bin(r, out); + *outLen = 256; + for (i=0; i<256 && out[i] == 0; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} +#endif /* WOLFSSL_HAVE_SP_DH */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ + int err = MP_OKAY; + sp_digit b[32], e[16], m[16]; + sp_digit* r = b; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 1024) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 1024) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 1024) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_2048_from_mp(b, 16, base); + sp_2048_from_mp(e, 16, exp); + sp_2048_from_mp(m, 16, mod); + + err = sp_2048_mod_exp_16(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + XMEMSET(r + 16, 0, sizeof(*r) * 16U); + err = sp_2048_to_mp(r, res); + res->used = mod->used; + mp_clamp(res); + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} + +#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */ + +#endif /* !WOLFSSL_SP_NO_2048 */ + +#ifndef WOLFSSL_SP_NO_3072 +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j; + byte* d; + + for (i = n - 1,j = 0; i >= 7; i -= 8) { + r[j] = ((sp_digit)a[i - 0] << 0) | + ((sp_digit)a[i - 1] << 8) | + ((sp_digit)a[i - 2] << 16) | + ((sp_digit)a[i - 3] << 24) | + ((sp_digit)a[i - 4] << 32) | + ((sp_digit)a[i - 5] << 40) | + ((sp_digit)a[i - 6] << 48) | + ((sp_digit)a[i - 7] << 56); + j++; + } + + if (i >= 0) { + r[j] = 0; + + d = (byte*)r; + switch (i) { + case 6: d[n - 1 - 6] = a[6]; //fallthrough + case 5: d[n - 1 - 5] = a[5]; //fallthrough + case 4: d[n - 1 - 4] = a[4]; //fallthrough + case 3: d[n - 1 - 3] = a[3]; //fallthrough + case 2: d[n - 1 - 2] = a[2]; //fallthrough + case 1: d[n - 1 - 1] = a[1]; //fallthrough + case 0: d[n - 1 - 0] = a[0]; //fallthrough + } + j++; + } + + for (; j < size; j++) { + r[j] = 0; + } +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 64 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 64 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0xffffffffffffffffl; + s = 64U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 64U) <= (word32)DIGIT_BIT) { + s += 64U; + r[j] &= 0xffffffffffffffffl; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 64) { + r[j] &= 0xffffffffffffffffl; + if (j + 1 >= size) { + break; + } + s = 64 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 384 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_3072_to_bin(sp_digit* r, byte* a) +{ + int i, j; + + for (i = 47, j = 0; i >= 0; i--) { + a[j++] = r[i] >> 56; + a[j++] = r[i] >> 48; + a[j++] = r[i] >> 40; + a[j++] = r[i] >> 32; + a[j++] = r[i] >> 24; + a[j++] = r[i] >> 16; + a[j++] = r[i] >> 8; + a[j++] = r[i] >> 0; + } +} + +#ifndef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_digit tmp[12]; + + __asm__ __volatile__ ( + "ldp x10, x11, [%[a], 0]\n\t" + "ldp x12, x13, [%[a], 16]\n\t" + "ldp x14, x15, [%[a], 32]\n\t" + "ldp x16, x17, [%[a], 48]\n\t" + "ldp x19, x20, [%[a], 64]\n\t" + "ldp x21, x22, [%[a], 80]\n\t" + "# A[0] * B[0]\n\t" + "ldr x9, [%[b], 0]\n\t" + "mul x4, x10, x9\n\t" + "umulh x5, x10, x9\n\t" + "mov x6, 0\n\t" + "str x4, [%[tmp]]\n\t" + "# A[0] * B[1]\n\t" + "ldr x9, [%[b], 8]\n\t" + "mul x7, x10, x9\n\t" + "umulh x8, x10, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[1] * B[0]\n\t" + "ldr x9, [%[b], 0]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x11, x9\n\t" + "adc x4, xzr, xzr\n\t" + "umulh x8, x11, x9\n\t" + "adds x5, x5, x7\n\t" + "adcs x6, x6, x8\n\t" + "str x5, [%[tmp], 8]\n\t" + "adc x4, x4, xzr\n\t" + "# A[0] * B[2]\n\t" + "ldr x9, [%[b], 16]\n\t" + "mul x7, x10, x9\n\t" + "umulh x8, x10, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[1] * B[1]\n\t" + "ldr x9, [%[b], 8]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x11, x9\n\t" + "adc x5, xzr, xzr\n\t" + "umulh x8, x11, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[2] * B[0]\n\t" + "ldr x9, [%[b], 0]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x12, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x12, x9\n\t" + "adds x6, x6, x7\n\t" + "adcs x4, x4, x8\n\t" + "str x6, [%[tmp], 16]\n\t" + "adc x5, x5, xzr\n\t" + "# A[0] * B[3]\n\t" + "ldr x9, [%[b], 24]\n\t" + "mul x7, x10, x9\n\t" + "umulh x8, x10, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[1] * B[2]\n\t" + "ldr x9, [%[b], 16]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x11, x9\n\t" + "adc x6, xzr, xzr\n\t" + "umulh x8, x11, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[2] * B[1]\n\t" + "ldr x9, [%[b], 8]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x12, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x12, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[3] * B[0]\n\t" + "ldr x9, [%[b], 0]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x13, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x13, x9\n\t" + "adds x4, x4, x7\n\t" + "adcs x5, x5, x8\n\t" + "str x4, [%[tmp], 24]\n\t" + "adc x6, x6, xzr\n\t" + "# A[0] * B[4]\n\t" + "ldr x9, [%[b], 32]\n\t" + "mul x7, x10, x9\n\t" + "umulh x8, x10, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[1] * B[3]\n\t" + "ldr x9, [%[b], 24]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x11, x9\n\t" + "adc x4, xzr, xzr\n\t" + "umulh x8, x11, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[2] * B[2]\n\t" + "ldr x9, [%[b], 16]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x12, x9\n\t" + "adc x4, x4, xzr\n\t" + "umulh x8, x12, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[3] * B[1]\n\t" + "ldr x9, [%[b], 8]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x13, x9\n\t" + "adc x4, x4, xzr\n\t" + "umulh x8, x13, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[4] * B[0]\n\t" + "ldr x9, [%[b], 0]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x14, x9\n\t" + "adc x4, x4, xzr\n\t" + "umulh x8, x14, x9\n\t" + "adds x5, x5, x7\n\t" + "adcs x6, x6, x8\n\t" + "str x5, [%[tmp], 32]\n\t" + "adc x4, x4, xzr\n\t" + "# A[0] * B[5]\n\t" + "ldr x9, [%[b], 40]\n\t" + "mul x7, x10, x9\n\t" + "umulh x8, x10, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[1] * B[4]\n\t" + "ldr x9, [%[b], 32]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x11, x9\n\t" + "adc x5, xzr, xzr\n\t" + "umulh x8, x11, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[2] * B[3]\n\t" + "ldr x9, [%[b], 24]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x12, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x12, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[3] * B[2]\n\t" + "ldr x9, [%[b], 16]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x13, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x13, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[4] * B[1]\n\t" + "ldr x9, [%[b], 8]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x14, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x14, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[5] * B[0]\n\t" + "ldr x9, [%[b], 0]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x15, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x15, x9\n\t" + "adds x6, x6, x7\n\t" + "adcs x4, x4, x8\n\t" + "str x6, [%[tmp], 40]\n\t" + "adc x5, x5, xzr\n\t" + "# A[0] * B[6]\n\t" + "ldr x9, [%[b], 48]\n\t" + "mul x7, x10, x9\n\t" + "umulh x8, x10, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[1] * B[5]\n\t" + "ldr x9, [%[b], 40]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x11, x9\n\t" + "adc x6, xzr, xzr\n\t" + "umulh x8, x11, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[2] * B[4]\n\t" + "ldr x9, [%[b], 32]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x12, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x12, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[3] * B[3]\n\t" + "ldr x9, [%[b], 24]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x13, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x13, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[4] * B[2]\n\t" + "ldr x9, [%[b], 16]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x14, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x14, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[5] * B[1]\n\t" + "ldr x9, [%[b], 8]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x15, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x15, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[6] * B[0]\n\t" + "ldr x9, [%[b], 0]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x16, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x16, x9\n\t" + "adds x4, x4, x7\n\t" + "adcs x5, x5, x8\n\t" + "str x4, [%[tmp], 48]\n\t" + "adc x6, x6, xzr\n\t" + "# A[0] * B[7]\n\t" + "ldr x9, [%[b], 56]\n\t" + "mul x7, x10, x9\n\t" + "umulh x8, x10, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[1] * B[6]\n\t" + "ldr x9, [%[b], 48]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x11, x9\n\t" + "adc x4, xzr, xzr\n\t" + "umulh x8, x11, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[2] * B[5]\n\t" + "ldr x9, [%[b], 40]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x12, x9\n\t" + "adc x4, x4, xzr\n\t" + "umulh x8, x12, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[3] * B[4]\n\t" + "ldr x9, [%[b], 32]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x13, x9\n\t" + "adc x4, x4, xzr\n\t" + "umulh x8, x13, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[4] * B[3]\n\t" + "ldr x9, [%[b], 24]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x14, x9\n\t" + "adc x4, x4, xzr\n\t" + "umulh x8, x14, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[5] * B[2]\n\t" + "ldr x9, [%[b], 16]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x15, x9\n\t" + "adc x4, x4, xzr\n\t" + "umulh x8, x15, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[6] * B[1]\n\t" + "ldr x9, [%[b], 8]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x16, x9\n\t" + "adc x4, x4, xzr\n\t" + "umulh x8, x16, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[7] * B[0]\n\t" + "ldr x9, [%[b], 0]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x17, x9\n\t" + "adc x4, x4, xzr\n\t" + "umulh x8, x17, x9\n\t" + "adds x5, x5, x7\n\t" + "adcs x6, x6, x8\n\t" + "str x5, [%[tmp], 56]\n\t" + "adc x4, x4, xzr\n\t" + "# A[0] * B[8]\n\t" + "ldr x9, [%[b], 64]\n\t" + "mul x7, x10, x9\n\t" + "umulh x8, x10, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[1] * B[7]\n\t" + "ldr x9, [%[b], 56]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x11, x9\n\t" + "adc x5, xzr, xzr\n\t" + "umulh x8, x11, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[2] * B[6]\n\t" + "ldr x9, [%[b], 48]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x12, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x12, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[3] * B[5]\n\t" + "ldr x9, [%[b], 40]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x13, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x13, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[4] * B[4]\n\t" + "ldr x9, [%[b], 32]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x14, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x14, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[5] * B[3]\n\t" + "ldr x9, [%[b], 24]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x15, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x15, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[6] * B[2]\n\t" + "ldr x9, [%[b], 16]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x16, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x16, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[7] * B[1]\n\t" + "ldr x9, [%[b], 8]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x17, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x17, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[8] * B[0]\n\t" + "ldr x9, [%[b], 0]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x19, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x19, x9\n\t" + "adds x6, x6, x7\n\t" + "adcs x4, x4, x8\n\t" + "str x6, [%[tmp], 64]\n\t" + "adc x5, x5, xzr\n\t" + "# A[0] * B[9]\n\t" + "ldr x9, [%[b], 72]\n\t" + "mul x7, x10, x9\n\t" + "umulh x8, x10, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[1] * B[8]\n\t" + "ldr x9, [%[b], 64]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x11, x9\n\t" + "adc x6, xzr, xzr\n\t" + "umulh x8, x11, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[2] * B[7]\n\t" + "ldr x9, [%[b], 56]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x12, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x12, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[3] * B[6]\n\t" + "ldr x9, [%[b], 48]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x13, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x13, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[4] * B[5]\n\t" + "ldr x9, [%[b], 40]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x14, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x14, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[5] * B[4]\n\t" + "ldr x9, [%[b], 32]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x15, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x15, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[6] * B[3]\n\t" + "ldr x9, [%[b], 24]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x16, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x16, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[7] * B[2]\n\t" + "ldr x9, [%[b], 16]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x17, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x17, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[8] * B[1]\n\t" + "ldr x9, [%[b], 8]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x19, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x19, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[9] * B[0]\n\t" + "ldr x9, [%[b], 0]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x20, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x20, x9\n\t" + "adds x4, x4, x7\n\t" + "adcs x5, x5, x8\n\t" + "str x4, [%[tmp], 72]\n\t" + "adc x6, x6, xzr\n\t" + "# A[0] * B[10]\n\t" + "ldr x9, [%[b], 80]\n\t" + "mul x7, x10, x9\n\t" + "umulh x8, x10, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[1] * B[9]\n\t" + "ldr x9, [%[b], 72]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x11, x9\n\t" + "adc x4, xzr, xzr\n\t" + "umulh x8, x11, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[2] * B[8]\n\t" + "ldr x9, [%[b], 64]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x12, x9\n\t" + "adc x4, x4, xzr\n\t" + "umulh x8, x12, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[3] * B[7]\n\t" + "ldr x9, [%[b], 56]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x13, x9\n\t" + "adc x4, x4, xzr\n\t" + "umulh x8, x13, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[4] * B[6]\n\t" + "ldr x9, [%[b], 48]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x14, x9\n\t" + "adc x4, x4, xzr\n\t" + "umulh x8, x14, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[5] * B[5]\n\t" + "ldr x9, [%[b], 40]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x15, x9\n\t" + "adc x4, x4, xzr\n\t" + "umulh x8, x15, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[6] * B[4]\n\t" + "ldr x9, [%[b], 32]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x16, x9\n\t" + "adc x4, x4, xzr\n\t" + "umulh x8, x16, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[7] * B[3]\n\t" + "ldr x9, [%[b], 24]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x17, x9\n\t" + "adc x4, x4, xzr\n\t" + "umulh x8, x17, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[8] * B[2]\n\t" + "ldr x9, [%[b], 16]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x19, x9\n\t" + "adc x4, x4, xzr\n\t" + "umulh x8, x19, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[9] * B[1]\n\t" + "ldr x9, [%[b], 8]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x20, x9\n\t" + "adc x4, x4, xzr\n\t" + "umulh x8, x20, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[10] * B[0]\n\t" + "ldr x9, [%[b], 0]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x21, x9\n\t" + "adc x4, x4, xzr\n\t" + "umulh x8, x21, x9\n\t" + "adds x5, x5, x7\n\t" + "adcs x6, x6, x8\n\t" + "str x5, [%[tmp], 80]\n\t" + "adc x4, x4, xzr\n\t" + "# A[0] * B[11]\n\t" + "ldr x9, [%[b], 88]\n\t" + "mul x7, x10, x9\n\t" + "umulh x8, x10, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[1] * B[10]\n\t" + "ldr x9, [%[b], 80]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x11, x9\n\t" + "adc x5, xzr, xzr\n\t" + "umulh x8, x11, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[2] * B[9]\n\t" + "ldr x9, [%[b], 72]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x12, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x12, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[3] * B[8]\n\t" + "ldr x9, [%[b], 64]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x13, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x13, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[4] * B[7]\n\t" + "ldr x9, [%[b], 56]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x14, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x14, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[5] * B[6]\n\t" + "ldr x9, [%[b], 48]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x15, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x15, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[6] * B[5]\n\t" + "ldr x9, [%[b], 40]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x16, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x16, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[7] * B[4]\n\t" + "ldr x9, [%[b], 32]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x17, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x17, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[8] * B[3]\n\t" + "ldr x9, [%[b], 24]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x19, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x19, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[9] * B[2]\n\t" + "ldr x9, [%[b], 16]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x20, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x20, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[10] * B[1]\n\t" + "ldr x9, [%[b], 8]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x21, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x21, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[11] * B[0]\n\t" + "ldr x9, [%[b], 0]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x22, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x22, x9\n\t" + "adds x6, x6, x7\n\t" + "adcs x4, x4, x8\n\t" + "str x6, [%[tmp], 88]\n\t" + "adc x5, x5, xzr\n\t" + "# A[1] * B[11]\n\t" + "ldr x9, [%[b], 88]\n\t" + "mul x7, x11, x9\n\t" + "umulh x8, x11, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[2] * B[10]\n\t" + "ldr x9, [%[b], 80]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x12, x9\n\t" + "adc x6, xzr, xzr\n\t" + "umulh x8, x12, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[3] * B[9]\n\t" + "ldr x9, [%[b], 72]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x13, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x13, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[4] * B[8]\n\t" + "ldr x9, [%[b], 64]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x14, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x14, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[5] * B[7]\n\t" + "ldr x9, [%[b], 56]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x15, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x15, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[6] * B[6]\n\t" + "ldr x9, [%[b], 48]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x16, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x16, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[7] * B[5]\n\t" + "ldr x9, [%[b], 40]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x17, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x17, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[8] * B[4]\n\t" + "ldr x9, [%[b], 32]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x19, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x19, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[9] * B[3]\n\t" + "ldr x9, [%[b], 24]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x20, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x20, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[10] * B[2]\n\t" + "ldr x9, [%[b], 16]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x21, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x21, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[11] * B[1]\n\t" + "ldr x9, [%[b], 8]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x22, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x22, x9\n\t" + "adds x4, x4, x7\n\t" + "adcs x5, x5, x8\n\t" + "str x4, [%[r], 96]\n\t" + "adc x6, x6, xzr\n\t" + "# A[2] * B[11]\n\t" + "ldr x9, [%[b], 88]\n\t" + "mul x7, x12, x9\n\t" + "umulh x8, x12, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[3] * B[10]\n\t" + "ldr x9, [%[b], 80]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x13, x9\n\t" + "adc x4, xzr, xzr\n\t" + "umulh x8, x13, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[4] * B[9]\n\t" + "ldr x9, [%[b], 72]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x14, x9\n\t" + "adc x4, x4, xzr\n\t" + "umulh x8, x14, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[5] * B[8]\n\t" + "ldr x9, [%[b], 64]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x15, x9\n\t" + "adc x4, x4, xzr\n\t" + "umulh x8, x15, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[6] * B[7]\n\t" + "ldr x9, [%[b], 56]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x16, x9\n\t" + "adc x4, x4, xzr\n\t" + "umulh x8, x16, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[7] * B[6]\n\t" + "ldr x9, [%[b], 48]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x17, x9\n\t" + "adc x4, x4, xzr\n\t" + "umulh x8, x17, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[8] * B[5]\n\t" + "ldr x9, [%[b], 40]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x19, x9\n\t" + "adc x4, x4, xzr\n\t" + "umulh x8, x19, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[9] * B[4]\n\t" + "ldr x9, [%[b], 32]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x20, x9\n\t" + "adc x4, x4, xzr\n\t" + "umulh x8, x20, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[10] * B[3]\n\t" + "ldr x9, [%[b], 24]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x21, x9\n\t" + "adc x4, x4, xzr\n\t" + "umulh x8, x21, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[11] * B[2]\n\t" + "ldr x9, [%[b], 16]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x22, x9\n\t" + "adc x4, x4, xzr\n\t" + "umulh x8, x22, x9\n\t" + "adds x5, x5, x7\n\t" + "adcs x6, x6, x8\n\t" + "str x5, [%[r], 104]\n\t" + "adc x4, x4, xzr\n\t" + "# A[3] * B[11]\n\t" + "ldr x9, [%[b], 88]\n\t" + "mul x7, x13, x9\n\t" + "umulh x8, x13, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[4] * B[10]\n\t" + "ldr x9, [%[b], 80]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x14, x9\n\t" + "adc x5, xzr, xzr\n\t" + "umulh x8, x14, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[5] * B[9]\n\t" + "ldr x9, [%[b], 72]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x15, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x15, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[6] * B[8]\n\t" + "ldr x9, [%[b], 64]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x16, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x16, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[7] * B[7]\n\t" + "ldr x9, [%[b], 56]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x17, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x17, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[8] * B[6]\n\t" + "ldr x9, [%[b], 48]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x19, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x19, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[9] * B[5]\n\t" + "ldr x9, [%[b], 40]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x20, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x20, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[10] * B[4]\n\t" + "ldr x9, [%[b], 32]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x21, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x21, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[11] * B[3]\n\t" + "ldr x9, [%[b], 24]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x22, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x22, x9\n\t" + "adds x6, x6, x7\n\t" + "adcs x4, x4, x8\n\t" + "str x6, [%[r], 112]\n\t" + "adc x5, x5, xzr\n\t" + "# A[4] * B[11]\n\t" + "ldr x9, [%[b], 88]\n\t" + "mul x7, x14, x9\n\t" + "umulh x8, x14, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[5] * B[10]\n\t" + "ldr x9, [%[b], 80]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x15, x9\n\t" + "adc x6, xzr, xzr\n\t" + "umulh x8, x15, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[6] * B[9]\n\t" + "ldr x9, [%[b], 72]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x16, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x16, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[7] * B[8]\n\t" + "ldr x9, [%[b], 64]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x17, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x17, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[8] * B[7]\n\t" + "ldr x9, [%[b], 56]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x19, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x19, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[9] * B[6]\n\t" + "ldr x9, [%[b], 48]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x20, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x20, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[10] * B[5]\n\t" + "ldr x9, [%[b], 40]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x21, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x21, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[11] * B[4]\n\t" + "ldr x9, [%[b], 32]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x22, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x22, x9\n\t" + "adds x4, x4, x7\n\t" + "adcs x5, x5, x8\n\t" + "str x4, [%[r], 120]\n\t" + "adc x6, x6, xzr\n\t" + "# A[5] * B[11]\n\t" + "ldr x9, [%[b], 88]\n\t" + "mul x7, x15, x9\n\t" + "umulh x8, x15, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[6] * B[10]\n\t" + "ldr x9, [%[b], 80]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x16, x9\n\t" + "adc x4, xzr, xzr\n\t" + "umulh x8, x16, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[7] * B[9]\n\t" + "ldr x9, [%[b], 72]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x17, x9\n\t" + "adc x4, x4, xzr\n\t" + "umulh x8, x17, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[8] * B[8]\n\t" + "ldr x9, [%[b], 64]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x19, x9\n\t" + "adc x4, x4, xzr\n\t" + "umulh x8, x19, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[9] * B[7]\n\t" + "ldr x9, [%[b], 56]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x20, x9\n\t" + "adc x4, x4, xzr\n\t" + "umulh x8, x20, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[10] * B[6]\n\t" + "ldr x9, [%[b], 48]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x21, x9\n\t" + "adc x4, x4, xzr\n\t" + "umulh x8, x21, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[11] * B[5]\n\t" + "ldr x9, [%[b], 40]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x22, x9\n\t" + "adc x4, x4, xzr\n\t" + "umulh x8, x22, x9\n\t" + "adds x5, x5, x7\n\t" + "adcs x6, x6, x8\n\t" + "str x5, [%[r], 128]\n\t" + "adc x4, x4, xzr\n\t" + "# A[6] * B[11]\n\t" + "ldr x9, [%[b], 88]\n\t" + "mul x7, x16, x9\n\t" + "umulh x8, x16, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[7] * B[10]\n\t" + "ldr x9, [%[b], 80]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x17, x9\n\t" + "adc x5, xzr, xzr\n\t" + "umulh x8, x17, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[8] * B[9]\n\t" + "ldr x9, [%[b], 72]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x19, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x19, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[9] * B[8]\n\t" + "ldr x9, [%[b], 64]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x20, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x20, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[10] * B[7]\n\t" + "ldr x9, [%[b], 56]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x21, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x21, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[11] * B[6]\n\t" + "ldr x9, [%[b], 48]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x22, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x22, x9\n\t" + "adds x6, x6, x7\n\t" + "adcs x4, x4, x8\n\t" + "str x6, [%[r], 136]\n\t" + "adc x5, x5, xzr\n\t" + "# A[7] * B[11]\n\t" + "ldr x9, [%[b], 88]\n\t" + "mul x7, x17, x9\n\t" + "umulh x8, x17, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[8] * B[10]\n\t" + "ldr x9, [%[b], 80]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x19, x9\n\t" + "adc x6, xzr, xzr\n\t" + "umulh x8, x19, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[9] * B[9]\n\t" + "ldr x9, [%[b], 72]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x20, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x20, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[10] * B[8]\n\t" + "ldr x9, [%[b], 64]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x21, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x21, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[11] * B[7]\n\t" + "ldr x9, [%[b], 56]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x22, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x22, x9\n\t" + "adds x4, x4, x7\n\t" + "adcs x5, x5, x8\n\t" + "str x4, [%[r], 144]\n\t" + "adc x6, x6, xzr\n\t" + "# A[8] * B[11]\n\t" + "ldr x9, [%[b], 88]\n\t" + "mul x7, x19, x9\n\t" + "umulh x8, x19, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[9] * B[10]\n\t" + "ldr x9, [%[b], 80]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x20, x9\n\t" + "adc x4, xzr, xzr\n\t" + "umulh x8, x20, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[10] * B[9]\n\t" + "ldr x9, [%[b], 72]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x21, x9\n\t" + "adc x4, x4, xzr\n\t" + "umulh x8, x21, x9\n\t" + "adds x5, x5, x7\n\t" + "# A[11] * B[8]\n\t" + "ldr x9, [%[b], 64]\n\t" + "adcs x6, x6, x8\n\t" + "mul x7, x22, x9\n\t" + "adc x4, x4, xzr\n\t" + "umulh x8, x22, x9\n\t" + "adds x5, x5, x7\n\t" + "adcs x6, x6, x8\n\t" + "str x5, [%[r], 152]\n\t" + "adc x4, x4, xzr\n\t" + "# A[9] * B[11]\n\t" + "ldr x9, [%[b], 88]\n\t" + "mul x7, x20, x9\n\t" + "umulh x8, x20, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[10] * B[10]\n\t" + "ldr x9, [%[b], 80]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x21, x9\n\t" + "adc x5, xzr, xzr\n\t" + "umulh x8, x21, x9\n\t" + "adds x6, x6, x7\n\t" + "# A[11] * B[9]\n\t" + "ldr x9, [%[b], 72]\n\t" + "adcs x4, x4, x8\n\t" + "mul x7, x22, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x22, x9\n\t" + "adds x6, x6, x7\n\t" + "adcs x4, x4, x8\n\t" + "str x6, [%[r], 160]\n\t" + "adc x5, x5, xzr\n\t" + "# A[10] * B[11]\n\t" + "ldr x9, [%[b], 88]\n\t" + "mul x7, x21, x9\n\t" + "umulh x8, x21, x9\n\t" + "adds x4, x4, x7\n\t" + "# A[11] * B[10]\n\t" + "ldr x9, [%[b], 80]\n\t" + "adcs x5, x5, x8\n\t" + "mul x7, x22, x9\n\t" + "adc x6, xzr, xzr\n\t" + "umulh x8, x22, x9\n\t" + "adds x4, x4, x7\n\t" + "adcs x5, x5, x8\n\t" + "str x4, [%[r], 168]\n\t" + "adc x6, x6, xzr\n\t" + "# A[11] * B[11]\n\t" + "ldr x9, [%[b], 88]\n\t" + "mul x7, x22, x9\n\t" + "umulh x8, x22, x9\n\t" + "adds x5, x5, x7\n\t" + "adc x6, x6, x8\n\t" + "stp x5, x6, [%[r], 176]\n\t" + "ldp x10, x11, [%[tmp], 0]\n\t" + "ldp x12, x13, [%[tmp], 16]\n\t" + "ldp x14, x15, [%[tmp], 32]\n\t" + "ldp x16, x17, [%[tmp], 48]\n\t" + "ldp x19, x20, [%[tmp], 64]\n\t" + "ldp x21, x22, [%[tmp], 80]\n\t" + "stp x10, x11, [%[r], 0]\n\t" + "stp x12, x13, [%[r], 16]\n\t" + "stp x14, x15, [%[r], 32]\n\t" + "stp x16, x17, [%[r], 48]\n\t" + "stp x19, x20, [%[r], 64]\n\t" + "stp x21, x22, [%[r], 80]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp) + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22" + ); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "ldp x10, x11, [%[a], 0]\n\t" + "ldp x12, x13, [%[a], 16]\n\t" + "ldp x14, x15, [%[a], 32]\n\t" + "ldp x16, x17, [%[a], 48]\n\t" + "ldp x19, x20, [%[a], 64]\n\t" + "ldp x21, x22, [%[a], 80]\n\t" + "# A[0] * A[0]\n\t" + "mul x2, x10, x10\n\t" + "umulh x3, x10, x10\n\t" + "str x2, [%[r]]\n\t" + "mov x4, 0\n\t" + "# A[0] * A[1]\n\t" + "mul x8, x10, x11\n\t" + "umulh x9, x10, x11\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, xzr, xzr\n\t" + "adds x3, x3, x8\n\t" + "str x3, [%[r], 8]\n\t" + "# A[0] * A[2]\n\t" + "mul x8, x10, x12\n\t" + "adcs x4, x4, x9\n\t" + "umulh x9, x10, x12\n\t" + "adc x2, x2, xzr\n\t" + "adds x4, x4, x8\n\t" + "adcs x2, x2, x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x4, x4, x8\n\t" + "# A[1] * A[1]\n\t" + "mul x8, x11, x11\n\t" + "adcs x2, x2, x9\n\t" + "umulh x9, x11, x11\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x8\n\t" + "str x4, [%[r], 16]\n\t" + "# A[0] * A[3]\n\t" + "mul x8, x10, x13\n\t" + "adcs x2, x2, x9\n\t" + "umulh x9, x10, x13\n\t" + "adc x3, x3, xzr\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x2, x2, x8\n\t" + "# A[1] * A[2]\n\t" + "mul x8, x11, x12\n\t" + "adcs x3, x3, x9\n\t" + "umulh x9, x11, x12\n\t" + "adc x4, x4, xzr\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, x4, xzr\n\t" + "adds x2, x2, x8\n\t" + "str x2, [%[r], 24]\n\t" + "# A[0] * A[4]\n\t" + "mul x8, x10, x14\n\t" + "adcs x3, x3, x9\n\t" + "umulh x9, x10, x14\n\t" + "adc x4, x4, xzr\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, xzr, xzr\n\t" + "adds x3, x3, x8\n\t" + "# A[1] * A[3]\n\t" + "mul x8, x11, x13\n\t" + "adcs x4, x4, x9\n\t" + "umulh x9, x11, x13\n\t" + "adc x2, x2, xzr\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, x2, xzr\n\t" + "adds x3, x3, x8\n\t" + "# A[2] * A[2]\n\t" + "mul x8, x12, x12\n\t" + "adcs x4, x4, x9\n\t" + "umulh x9, x12, x12\n\t" + "adc x2, x2, xzr\n\t" + "adds x3, x3, x8\n\t" + "str x3, [%[r], 32]\n\t" + "# A[0] * A[5]\n\t" + "mul x5, x10, x15\n\t" + "adcs x4, x4, x9\n\t" + "umulh x6, x10, x15\n\t" + "adc x2, x2, xzr\n\t" + "mov x3, 0\n\t" + "mov x7, 0\n\t" + "# A[1] * A[4]\n\t" + "mul x8, x11, x14\n\t" + "umulh x9, x11, x14\n\t" + "adds x5, x5, x8\n\t" + "# A[2] * A[3]\n\t" + "mul x8, x12, x13\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x12, x13\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[r], 40]\n\t" + "# A[0] * A[6]\n\t" + "mul x5, x10, x16\n\t" + "umulh x6, x10, x16\n\t" + "mov x4, 0\n\t" + "mov x7, 0\n\t" + "# A[1] * A[5]\n\t" + "mul x8, x11, x15\n\t" + "umulh x9, x11, x15\n\t" + "adds x5, x5, x8\n\t" + "# A[2] * A[4]\n\t" + "mul x8, x12, x14\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x12, x14\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[3] * A[3]\n\t" + "mul x8, x13, x13\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x13, x13\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[r], 48]\n\t" + "# A[0] * A[7]\n\t" + "mul x5, x10, x17\n\t" + "umulh x6, x10, x17\n\t" + "mov x2, 0\n\t" + "mov x7, 0\n\t" + "# A[1] * A[6]\n\t" + "mul x8, x11, x16\n\t" + "umulh x9, x11, x16\n\t" + "adds x5, x5, x8\n\t" + "# A[2] * A[5]\n\t" + "mul x8, x12, x15\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x12, x15\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[3] * A[4]\n\t" + "mul x8, x13, x14\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x13, x14\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[r], 56]\n\t" + "# A[0] * A[8]\n\t" + "mul x5, x10, x19\n\t" + "umulh x6, x10, x19\n\t" + "mov x3, 0\n\t" + "mov x7, 0\n\t" + "# A[1] * A[7]\n\t" + "mul x8, x11, x17\n\t" + "umulh x9, x11, x17\n\t" + "adds x5, x5, x8\n\t" + "# A[2] * A[6]\n\t" + "mul x8, x12, x16\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x12, x16\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[3] * A[5]\n\t" + "mul x8, x13, x15\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x13, x15\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[4] * A[4]\n\t" + "mul x8, x14, x14\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x14, x14\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[r], 64]\n\t" + "# A[0] * A[9]\n\t" + "mul x5, x10, x20\n\t" + "umulh x6, x10, x20\n\t" + "mov x4, 0\n\t" + "mov x7, 0\n\t" + "# A[1] * A[8]\n\t" + "mul x8, x11, x19\n\t" + "umulh x9, x11, x19\n\t" + "adds x5, x5, x8\n\t" + "# A[2] * A[7]\n\t" + "mul x8, x12, x17\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x12, x17\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[3] * A[6]\n\t" + "mul x8, x13, x16\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x13, x16\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[4] * A[5]\n\t" + "mul x8, x14, x15\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x14, x15\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[r], 72]\n\t" + "# A[0] * A[10]\n\t" + "mul x5, x10, x21\n\t" + "umulh x6, x10, x21\n\t" + "mov x2, 0\n\t" + "mov x7, 0\n\t" + "# A[1] * A[9]\n\t" + "mul x8, x11, x20\n\t" + "umulh x9, x11, x20\n\t" + "adds x5, x5, x8\n\t" + "# A[2] * A[8]\n\t" + "mul x8, x12, x19\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x12, x19\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[3] * A[7]\n\t" + "mul x8, x13, x17\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x13, x17\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[4] * A[6]\n\t" + "mul x8, x14, x16\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x14, x16\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[5] * A[5]\n\t" + "mul x8, x15, x15\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x15, x15\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[r], 80]\n\t" + "# A[0] * A[11]\n\t" + "mul x5, x10, x22\n\t" + "umulh x6, x10, x22\n\t" + "mov x3, 0\n\t" + "mov x7, 0\n\t" + "# A[1] * A[10]\n\t" + "mul x8, x11, x21\n\t" + "umulh x9, x11, x21\n\t" + "adds x5, x5, x8\n\t" + "# A[2] * A[9]\n\t" + "mul x8, x12, x20\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x12, x20\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[3] * A[8]\n\t" + "mul x8, x13, x19\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x13, x19\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[4] * A[7]\n\t" + "mul x8, x14, x17\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x14, x17\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[5] * A[6]\n\t" + "mul x8, x15, x16\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x15, x16\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[r], 88]\n\t" + "# A[1] * A[11]\n\t" + "mul x5, x11, x22\n\t" + "umulh x6, x11, x22\n\t" + "mov x4, 0\n\t" + "mov x7, 0\n\t" + "# A[2] * A[10]\n\t" + "mul x8, x12, x21\n\t" + "umulh x9, x12, x21\n\t" + "adds x5, x5, x8\n\t" + "# A[3] * A[9]\n\t" + "mul x8, x13, x20\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x13, x20\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[4] * A[8]\n\t" + "mul x8, x14, x19\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x14, x19\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[5] * A[7]\n\t" + "mul x8, x15, x17\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x15, x17\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[6] * A[6]\n\t" + "mul x8, x16, x16\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x16, x16\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[r], 96]\n\t" + "# A[2] * A[11]\n\t" + "mul x5, x12, x22\n\t" + "umulh x6, x12, x22\n\t" + "mov x2, 0\n\t" + "mov x7, 0\n\t" + "# A[3] * A[10]\n\t" + "mul x8, x13, x21\n\t" + "umulh x9, x13, x21\n\t" + "adds x5, x5, x8\n\t" + "# A[4] * A[9]\n\t" + "mul x8, x14, x20\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x14, x20\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[5] * A[8]\n\t" + "mul x8, x15, x19\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x15, x19\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[6] * A[7]\n\t" + "mul x8, x16, x17\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x16, x17\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[r], 104]\n\t" + "# A[3] * A[11]\n\t" + "mul x5, x13, x22\n\t" + "umulh x6, x13, x22\n\t" + "mov x3, 0\n\t" + "mov x7, 0\n\t" + "# A[4] * A[10]\n\t" + "mul x8, x14, x21\n\t" + "umulh x9, x14, x21\n\t" + "adds x5, x5, x8\n\t" + "# A[5] * A[9]\n\t" + "mul x8, x15, x20\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x15, x20\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[6] * A[8]\n\t" + "mul x8, x16, x19\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x16, x19\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[7] * A[7]\n\t" + "mul x8, x17, x17\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x17, x17\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[r], 112]\n\t" + "# A[4] * A[11]\n\t" + "mul x5, x14, x22\n\t" + "umulh x6, x14, x22\n\t" + "mov x4, 0\n\t" + "mov x7, 0\n\t" + "# A[5] * A[10]\n\t" + "mul x8, x15, x21\n\t" + "umulh x9, x15, x21\n\t" + "adds x5, x5, x8\n\t" + "# A[6] * A[9]\n\t" + "mul x8, x16, x20\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x16, x20\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[7] * A[8]\n\t" + "mul x8, x17, x19\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x17, x19\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[r], 120]\n\t" + "# A[5] * A[11]\n\t" + "mul x5, x15, x22\n\t" + "umulh x6, x15, x22\n\t" + "mov x2, 0\n\t" + "mov x7, 0\n\t" + "# A[6] * A[10]\n\t" + "mul x8, x16, x21\n\t" + "umulh x9, x16, x21\n\t" + "adds x5, x5, x8\n\t" + "# A[7] * A[9]\n\t" + "mul x8, x17, x20\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x17, x20\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[8] * A[8]\n\t" + "mul x8, x19, x19\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x19, x19\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[r], 128]\n\t" + "# A[6] * A[11]\n\t" + "mul x5, x16, x22\n\t" + "umulh x6, x16, x22\n\t" + "mov x3, 0\n\t" + "mov x7, 0\n\t" + "# A[7] * A[10]\n\t" + "mul x8, x17, x21\n\t" + "umulh x9, x17, x21\n\t" + "adds x5, x5, x8\n\t" + "# A[8] * A[9]\n\t" + "mul x8, x19, x20\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x19, x20\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[r], 136]\n\t" + "# A[7] * A[11]\n\t" + "mul x8, x17, x22\n\t" + "umulh x9, x17, x22\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x2, x2, x8\n\t" + "# A[8] * A[10]\n\t" + "mul x8, x19, x21\n\t" + "adcs x3, x3, x9\n\t" + "umulh x9, x19, x21\n\t" + "adc x4, x4, xzr\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, x4, xzr\n\t" + "adds x2, x2, x8\n\t" + "# A[9] * A[9]\n\t" + "mul x8, x20, x20\n\t" + "adcs x3, x3, x9\n\t" + "umulh x9, x20, x20\n\t" + "adc x4, x4, xzr\n\t" + "adds x2, x2, x8\n\t" + "str x2, [%[r], 144]\n\t" + "# A[8] * A[11]\n\t" + "mul x8, x19, x22\n\t" + "adcs x3, x3, x9\n\t" + "umulh x9, x19, x22\n\t" + "adc x4, x4, xzr\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, xzr, xzr\n\t" + "adds x3, x3, x8\n\t" + "# A[9] * A[10]\n\t" + "mul x8, x20, x21\n\t" + "adcs x4, x4, x9\n\t" + "umulh x9, x20, x21\n\t" + "adc x2, x2, xzr\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, x2, xzr\n\t" + "adds x3, x3, x8\n\t" + "str x3, [%[r], 152]\n\t" + "# A[9] * A[11]\n\t" + "mul x8, x20, x22\n\t" + "adcs x4, x4, x9\n\t" + "umulh x9, x20, x22\n\t" + "adc x2, x2, xzr\n\t" + "adds x4, x4, x8\n\t" + "adcs x2, x2, x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x4, x4, x8\n\t" + "# A[10] * A[10]\n\t" + "mul x8, x21, x21\n\t" + "adcs x2, x2, x9\n\t" + "umulh x9, x21, x21\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x8\n\t" + "str x4, [%[r], 160]\n\t" + "# A[10] * A[11]\n\t" + "mul x8, x21, x22\n\t" + "adcs x2, x2, x9\n\t" + "umulh x9, x21, x22\n\t" + "adc x3, x3, xzr\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x2, x2, x8\n\t" + "str x2, [%[r], 168]\n\t" + "# A[11] * A[11]\n\t" + "mul x8, x22, x22\n\t" + "adcs x3, x3, x9\n\t" + "umulh x9, x22, x22\n\t" + "adc x4, x4, xzr\n\t" + "adds x3, x3, x8\n\t" + "adc x4, x4, x9\n\t" + "stp x3, x4, [%[r], 176]\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "x2", "x3", "x4", "x8", "x9", "x10", "x5", "x6", "x7", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22" + ); +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[a], 0]\n\t" + "ldp x7, x8, [%[b], 0]\n\t" + "adds x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 16]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "ldp x3, x4, [%[a], 32]\n\t" + "ldp x7, x8, [%[b], 32]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 48]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 48]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 32]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 48]\n\t" + "ldp x3, x4, [%[a], 64]\n\t" + "ldp x7, x8, [%[b], 64]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 80]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 80]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 64]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 80]\n\t" + "cset %[r], cs\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + ); + + return (sp_digit)r; +} + +/* Sub b from a into a. (a -= b) + * + * a A single precision integer and result. + * b A single precision integer. + */ +static sp_digit sp_3072_sub_in_place_24(sp_digit* a, const sp_digit* b) +{ + __asm__ __volatile__ ( + "ldp x2, x3, [%[a], 0]\n\t" + "ldp x6, x7, [%[b], 0]\n\t" + "subs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 16]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 16]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 0]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 16]\n\t" + "ldp x2, x3, [%[a], 32]\n\t" + "ldp x6, x7, [%[b], 32]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 48]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 48]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 32]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 48]\n\t" + "ldp x2, x3, [%[a], 64]\n\t" + "ldp x6, x7, [%[b], 64]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 80]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 80]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 64]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 80]\n\t" + "ldp x2, x3, [%[a], 96]\n\t" + "ldp x6, x7, [%[b], 96]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 112]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 112]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 96]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 112]\n\t" + "ldp x2, x3, [%[a], 128]\n\t" + "ldp x6, x7, [%[b], 128]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 144]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 144]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 128]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 144]\n\t" + "ldp x2, x3, [%[a], 160]\n\t" + "ldp x6, x7, [%[b], 160]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 176]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 176]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 160]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 176]\n\t" + "csetm %[a], cc\n\t" + : [a] "+r" (a) + : [b] "r" (b) + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + ); + + return (sp_digit)a; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[a], 0]\n\t" + "ldp x7, x8, [%[b], 0]\n\t" + "adds x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 16]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "ldp x3, x4, [%[a], 32]\n\t" + "ldp x7, x8, [%[b], 32]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 48]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 48]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 32]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 48]\n\t" + "ldp x3, x4, [%[a], 64]\n\t" + "ldp x7, x8, [%[b], 64]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 80]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 80]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 64]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 80]\n\t" + "ldp x3, x4, [%[a], 96]\n\t" + "ldp x7, x8, [%[b], 96]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 112]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 112]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 96]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 112]\n\t" + "ldp x3, x4, [%[a], 128]\n\t" + "ldp x7, x8, [%[b], 128]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 144]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 144]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 128]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 144]\n\t" + "ldp x3, x4, [%[a], 160]\n\t" + "ldp x7, x8, [%[b], 160]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 176]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 176]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 160]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 176]\n\t" + "cset %[r], cs\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + ); + + return (sp_digit)r; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_3072_mask_12(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<12; i++) { + r[i] = a[i] & m; + } +#else + r[0] = a[0] & m; + r[1] = a[1] & m; + r[2] = a[2] & m; + r[3] = a[3] & m; + r[4] = a[4] & m; + r[5] = a[5] & m; + r[6] = a[6] & m; + r[7] = a[7] & m; + r[8] = a[8] & m; + r[9] = a[9] & m; + r[10] = a[10] & m; + r[11] = a[11] & m; +#endif +} + +/* Add digit to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_3072_add_zero_12(sp_digit* r, const sp_digit* a, + const sp_digit d) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[a], 0]\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "adds x3, x3, %[d]\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "ldp x3, x4, [%[a], 32]\n\t" + "ldp x5, x6, [%[a], 48]\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 32]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 48]\n\t" + "ldp x3, x4, [%[a], 64]\n\t" + "ldp x5, x6, [%[a], 80]\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 64]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 80]\n\t" + : + : [r] "r" (r), [a] "r" (a), [d] "r" (d) + : "memory", "x3", "x4", "x5", "x6" + ); +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_3072_mul_24(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[24]; + sp_digit a1[12]; + sp_digit b1[12]; + sp_digit z2[24]; + sp_digit u, ca, cb; + + ca = sp_3072_add_12(a1, a, &a[12]); + cb = sp_3072_add_12(b1, b, &b[12]); + u = ca & cb; + sp_3072_mul_12(z1, a1, b1); + sp_3072_mul_12(z2, &a[12], &b[12]); + sp_3072_mul_12(z0, a, b); + sp_3072_mask_12(r + 24, a1, 0 - cb); + sp_3072_mask_12(b1, b1, 0 - ca); + u += sp_3072_add_12(r + 24, r + 24, b1); + u += sp_3072_sub_in_place_24(z1, z2); + u += sp_3072_sub_in_place_24(z1, z0); + u += sp_3072_add_24(r + 12, r + 12, z1); + u += sp_3072_add_12(r + 24, r + 24, z2); + sp_3072_add_zero_12(r + 36, z2 + 12, u); +} + +#ifdef WOLFSSL_SP_SMALL +/* Double a into r. (r = a + a) + * + * r A single precision integer. + * a A single precision integer. + */ +static sp_digit sp_3072_dbl_12(sp_digit* r, const sp_digit* a) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "add x11, %[a], 96\n\t" + "\n1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldp x3, x4, [%[a]], #16\n\t" + "ldp x5, x6, [%[a]], #16\n\t" + "adcs x3, x3, x3\n\t" + "adcs x4, x4, x4\n\t" + "adcs x5, x5, x5\n\t" + "stp x3, x4, [%[r]], #16\n\t" + "adcs x6, x6, x6\n\t" + "stp x5, x6, [%[r]], #16\n\t" + "cset %[c], cs\n\t" + "cmp %[a], x11\n\t" + "b.ne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a) + : + : "memory", "x3", "x4", "x5", "x6", "x11" + ); + + return c; +} + +#else +/* Double a into r. (r = a + a) + * + * r A single precision integer. + * a A single precision integer. + */ +static sp_digit sp_3072_dbl_12(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[a], 0]\n\t" + "adds x3, x3, x3\n\t" + "ldr x5, [%[a], 16]\n\t" + "adcs x4, x4, x4\n\t" + "ldr x6, [%[a], 24]\n\t" + "adcs x5, x5, x5\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "adcs x6, x6, x6\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "ldp x3, x4, [%[a], 32]\n\t" + "adcs x3, x3, x3\n\t" + "ldr x5, [%[a], 48]\n\t" + "adcs x4, x4, x4\n\t" + "ldr x6, [%[a], 56]\n\t" + "adcs x5, x5, x5\n\t" + "stp x3, x4, [%[r], 32]\n\t" + "adcs x6, x6, x6\n\t" + "stp x5, x6, [%[r], 48]\n\t" + "ldp x3, x4, [%[a], 64]\n\t" + "adcs x3, x3, x3\n\t" + "ldr x5, [%[a], 80]\n\t" + "adcs x4, x4, x4\n\t" + "ldr x6, [%[a], 88]\n\t" + "adcs x5, x5, x5\n\t" + "stp x3, x4, [%[r], 64]\n\t" + "adcs x6, x6, x6\n\t" + "stp x5, x6, [%[r], 80]\n\t" + "cset %[r], cs\n\t" + : [r] "+r" (r) + : [a] "r" (a) + : "memory", "x3", "x4", "x5", "x6" + ); + + return (sp_digit)r; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z2[24]; + sp_digit z1[24]; + sp_digit a1[12]; + sp_digit u; + + u = sp_3072_add_12(a1, a, &a[12]); + sp_3072_sqr_12(z1, a1); + sp_3072_sqr_12(z2, &a[12]); + sp_3072_sqr_12(z0, a); + sp_3072_mask_12(r + 24, a1, 0 - u); + u += sp_3072_dbl_12(r + 24, r + 24); + u += sp_3072_sub_in_place_24(z1, z2); + u += sp_3072_sub_in_place_24(z1, z0); + u += sp_3072_add_24(r + 12, r + 12, z1); + u += sp_3072_add_12(r + 24, r + 24, z2); + sp_3072_add_zero_12(r + 36, z2 + 12, u); + +} + +/* Sub b from a into a. (a -= b) + * + * a A single precision integer and result. + * b A single precision integer. + */ +static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b) +{ + __asm__ __volatile__ ( + "ldp x2, x3, [%[a], 0]\n\t" + "ldp x6, x7, [%[b], 0]\n\t" + "subs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 16]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 16]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 0]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 16]\n\t" + "ldp x2, x3, [%[a], 32]\n\t" + "ldp x6, x7, [%[b], 32]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 48]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 48]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 32]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 48]\n\t" + "ldp x2, x3, [%[a], 64]\n\t" + "ldp x6, x7, [%[b], 64]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 80]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 80]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 64]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 80]\n\t" + "ldp x2, x3, [%[a], 96]\n\t" + "ldp x6, x7, [%[b], 96]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 112]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 112]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 96]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 112]\n\t" + "ldp x2, x3, [%[a], 128]\n\t" + "ldp x6, x7, [%[b], 128]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 144]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 144]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 128]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 144]\n\t" + "ldp x2, x3, [%[a], 160]\n\t" + "ldp x6, x7, [%[b], 160]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 176]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 176]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 160]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 176]\n\t" + "ldp x2, x3, [%[a], 192]\n\t" + "ldp x6, x7, [%[b], 192]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 208]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 208]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 192]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 208]\n\t" + "ldp x2, x3, [%[a], 224]\n\t" + "ldp x6, x7, [%[b], 224]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 240]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 240]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 224]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 240]\n\t" + "ldp x2, x3, [%[a], 256]\n\t" + "ldp x6, x7, [%[b], 256]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 272]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 272]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 256]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 272]\n\t" + "ldp x2, x3, [%[a], 288]\n\t" + "ldp x6, x7, [%[b], 288]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 304]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 304]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 288]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 304]\n\t" + "ldp x2, x3, [%[a], 320]\n\t" + "ldp x6, x7, [%[b], 320]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 336]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 336]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 320]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 336]\n\t" + "ldp x2, x3, [%[a], 352]\n\t" + "ldp x6, x7, [%[b], 352]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 368]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 368]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 352]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 368]\n\t" + "csetm %[a], cc\n\t" + : [a] "+r" (a) + : [b] "r" (b) + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + ); + + return (sp_digit)a; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[a], 0]\n\t" + "ldp x7, x8, [%[b], 0]\n\t" + "adds x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 16]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "ldp x3, x4, [%[a], 32]\n\t" + "ldp x7, x8, [%[b], 32]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 48]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 48]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 32]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 48]\n\t" + "ldp x3, x4, [%[a], 64]\n\t" + "ldp x7, x8, [%[b], 64]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 80]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 80]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 64]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 80]\n\t" + "ldp x3, x4, [%[a], 96]\n\t" + "ldp x7, x8, [%[b], 96]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 112]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 112]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 96]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 112]\n\t" + "ldp x3, x4, [%[a], 128]\n\t" + "ldp x7, x8, [%[b], 128]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 144]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 144]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 128]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 144]\n\t" + "ldp x3, x4, [%[a], 160]\n\t" + "ldp x7, x8, [%[b], 160]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 176]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 176]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 160]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 176]\n\t" + "ldp x3, x4, [%[a], 192]\n\t" + "ldp x7, x8, [%[b], 192]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 208]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 208]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 192]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 208]\n\t" + "ldp x3, x4, [%[a], 224]\n\t" + "ldp x7, x8, [%[b], 224]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 240]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 240]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 224]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 240]\n\t" + "ldp x3, x4, [%[a], 256]\n\t" + "ldp x7, x8, [%[b], 256]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 272]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 272]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 256]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 272]\n\t" + "ldp x3, x4, [%[a], 288]\n\t" + "ldp x7, x8, [%[b], 288]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 304]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 304]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 288]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 304]\n\t" + "ldp x3, x4, [%[a], 320]\n\t" + "ldp x7, x8, [%[b], 320]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 336]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 336]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 320]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 336]\n\t" + "ldp x3, x4, [%[a], 352]\n\t" + "ldp x7, x8, [%[b], 352]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 368]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 368]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 352]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 368]\n\t" + "cset %[r], cs\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + ); + + return (sp_digit)r; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_3072_mask_24(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<24; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 24; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Add digit to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_3072_add_zero_24(sp_digit* r, const sp_digit* a, + const sp_digit d) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[a], 0]\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "adds x3, x3, %[d]\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "ldp x3, x4, [%[a], 32]\n\t" + "ldp x5, x6, [%[a], 48]\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 32]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 48]\n\t" + "ldp x3, x4, [%[a], 64]\n\t" + "ldp x5, x6, [%[a], 80]\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 64]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 80]\n\t" + "ldp x3, x4, [%[a], 96]\n\t" + "ldp x5, x6, [%[a], 112]\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 96]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 112]\n\t" + "ldp x3, x4, [%[a], 128]\n\t" + "ldp x5, x6, [%[a], 144]\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 128]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 144]\n\t" + "ldp x3, x4, [%[a], 160]\n\t" + "ldp x5, x6, [%[a], 176]\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 160]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 176]\n\t" + : + : [r] "r" (r), [a] "r" (a), [d] "r" (d) + : "memory", "x3", "x4", "x5", "x6" + ); +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[48]; + sp_digit a1[24]; + sp_digit b1[24]; + sp_digit z2[48]; + sp_digit u, ca, cb; + + ca = sp_3072_add_24(a1, a, &a[24]); + cb = sp_3072_add_24(b1, b, &b[24]); + u = ca & cb; + sp_3072_mul_24(z1, a1, b1); + sp_3072_mul_24(z2, &a[24], &b[24]); + sp_3072_mul_24(z0, a, b); + sp_3072_mask_24(r + 48, a1, 0 - cb); + sp_3072_mask_24(b1, b1, 0 - ca); + u += sp_3072_add_24(r + 48, r + 48, b1); + u += sp_3072_sub_in_place_48(z1, z2); + u += sp_3072_sub_in_place_48(z1, z0); + u += sp_3072_add_48(r + 24, r + 24, z1); + u += sp_3072_add_24(r + 48, r + 48, z2); + sp_3072_add_zero_24(r + 72, z2 + 24, u); +} + +#ifdef WOLFSSL_SP_SMALL +/* Double a into r. (r = a + a) + * + * r A single precision integer. + * a A single precision integer. + */ +static sp_digit sp_3072_dbl_24(sp_digit* r, const sp_digit* a) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "add x11, %[a], 192\n\t" + "\n1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldp x3, x4, [%[a]], #16\n\t" + "ldp x5, x6, [%[a]], #16\n\t" + "adcs x3, x3, x3\n\t" + "adcs x4, x4, x4\n\t" + "adcs x5, x5, x5\n\t" + "stp x3, x4, [%[r]], #16\n\t" + "adcs x6, x6, x6\n\t" + "stp x5, x6, [%[r]], #16\n\t" + "cset %[c], cs\n\t" + "cmp %[a], x11\n\t" + "b.ne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a) + : + : "memory", "x3", "x4", "x5", "x6", "x11" + ); + + return c; +} + +#else +/* Double a into r. (r = a + a) + * + * r A single precision integer. + * a A single precision integer. + */ +static sp_digit sp_3072_dbl_24(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[a], 0]\n\t" + "adds x3, x3, x3\n\t" + "ldr x5, [%[a], 16]\n\t" + "adcs x4, x4, x4\n\t" + "ldr x6, [%[a], 24]\n\t" + "adcs x5, x5, x5\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "adcs x6, x6, x6\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "ldp x3, x4, [%[a], 32]\n\t" + "adcs x3, x3, x3\n\t" + "ldr x5, [%[a], 48]\n\t" + "adcs x4, x4, x4\n\t" + "ldr x6, [%[a], 56]\n\t" + "adcs x5, x5, x5\n\t" + "stp x3, x4, [%[r], 32]\n\t" + "adcs x6, x6, x6\n\t" + "stp x5, x6, [%[r], 48]\n\t" + "ldp x3, x4, [%[a], 64]\n\t" + "adcs x3, x3, x3\n\t" + "ldr x5, [%[a], 80]\n\t" + "adcs x4, x4, x4\n\t" + "ldr x6, [%[a], 88]\n\t" + "adcs x5, x5, x5\n\t" + "stp x3, x4, [%[r], 64]\n\t" + "adcs x6, x6, x6\n\t" + "stp x5, x6, [%[r], 80]\n\t" + "ldp x3, x4, [%[a], 96]\n\t" + "adcs x3, x3, x3\n\t" + "ldr x5, [%[a], 112]\n\t" + "adcs x4, x4, x4\n\t" + "ldr x6, [%[a], 120]\n\t" + "adcs x5, x5, x5\n\t" + "stp x3, x4, [%[r], 96]\n\t" + "adcs x6, x6, x6\n\t" + "stp x5, x6, [%[r], 112]\n\t" + "ldp x3, x4, [%[a], 128]\n\t" + "adcs x3, x3, x3\n\t" + "ldr x5, [%[a], 144]\n\t" + "adcs x4, x4, x4\n\t" + "ldr x6, [%[a], 152]\n\t" + "adcs x5, x5, x5\n\t" + "stp x3, x4, [%[r], 128]\n\t" + "adcs x6, x6, x6\n\t" + "stp x5, x6, [%[r], 144]\n\t" + "ldp x3, x4, [%[a], 160]\n\t" + "adcs x3, x3, x3\n\t" + "ldr x5, [%[a], 176]\n\t" + "adcs x4, x4, x4\n\t" + "ldr x6, [%[a], 184]\n\t" + "adcs x5, x5, x5\n\t" + "stp x3, x4, [%[r], 160]\n\t" + "adcs x6, x6, x6\n\t" + "stp x5, x6, [%[r], 176]\n\t" + "cset %[r], cs\n\t" + : [r] "+r" (r) + : [a] "r" (a) + : "memory", "x3", "x4", "x5", "x6" + ); + + return (sp_digit)r; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z2[48]; + sp_digit z1[48]; + sp_digit a1[24]; + sp_digit u; + + u = sp_3072_add_24(a1, a, &a[24]); + sp_3072_sqr_24(z1, a1); + sp_3072_sqr_24(z2, &a[24]); + sp_3072_sqr_24(z0, a); + sp_3072_mask_24(r + 48, a1, 0 - u); + u += sp_3072_dbl_24(r + 48, r + 48); + u += sp_3072_sub_in_place_48(z1, z2); + u += sp_3072_sub_in_place_48(z1, z0); + u += sp_3072_add_48(r + 24, r + 24, z1); + u += sp_3072_add_24(r + 48, r + 48, z2); + sp_3072_add_zero_24(r + 72, z2 + 24, u); + +} + +#endif /* !WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "add x11, %[a], 384\n\t" + "\n1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldp x3, x4, [%[a]], #16\n\t" + "ldp x5, x6, [%[a]], #16\n\t" + "ldp x7, x8, [%[b]], #16\n\t" + "adcs x3, x3, x7\n\t" + "ldp x9, x10, [%[b]], #16\n\t" + "adcs x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r]], #16\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r]], #16\n\t" + "cset %[c], cs\n\t" + "cmp %[a], x11\n\t" + "b.ne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into a. (a -= b) + * + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "add x10, %[a], 384\n\t" + "\n1:\n\t" + "subs %[c], xzr, %[c]\n\t" + "ldp x2, x3, [%[a]]\n\t" + "ldp x4, x5, [%[a], #16]\n\t" + "ldp x6, x7, [%[b]], #16\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x8, x9, [%[b]], #16\n\t" + "sbcs x3, x3, x7\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a]], #16\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a]], #16\n\t" + "csetm %[c], cc\n\t" + "cmp %[a], x10\n\t" + "b.ne 1b\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_digit tmp[96]; + + __asm__ __volatile__ ( + "mov x5, 0\n\t" + "mov x6, 0\n\t" + "mov x7, 0\n\t" + "mov x8, 0\n\t" + "\n1:\n\t" + "subs x3, x5, 376\n\t" + "csel x3, xzr, x3, cc\n\t" + "sub x4, x5, x3\n\t" + "\n2:\n\t" + "ldr x10, [%[a], x3]\n\t" + "ldr x11, [%[b], x4]\n\t" + "mul x9, x10, x11\n\t" + "umulh x10, x10, x11\n\t" + "adds x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x8, x8, xzr\n\t" + "add x3, x3, #8\n\t" + "sub x4, x4, #8\n\t" + "cmp x3, 384\n\t" + "b.eq 3f\n\t" + "cmp x3, x5\n\t" + "b.le 2b\n\t" + "\n3:\n\t" + "str x6, [%[r], x5]\n\t" + "mov x6, x7\n\t" + "mov x7, x8\n\t" + "mov x8, #0\n\t" + "add x5, x5, #8\n\t" + "cmp x5, 752\n\t" + "b.le 1b\n\t" + "str x6, [%[r], x5]\n\t" + : + : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) +{ + sp_digit tmp[96]; + + __asm__ __volatile__ ( + "mov x6, 0\n\t" + "mov x7, 0\n\t" + "mov x8, 0\n\t" + "mov x5, 0\n\t" + "\n1:\n\t" + "subs x3, x5, 376\n\t" + "csel x3, xzr, x3, cc\n\t" + "sub x4, x5, x3\n\t" + "\n2:\n\t" + "cmp x4, x3\n\t" + "b.eq 4f\n\t" + "ldr x10, [%[a], x3]\n\t" + "ldr x11, [%[a], x4]\n\t" + "mul x9, x10, x11\n\t" + "umulh x10, x10, x11\n\t" + "adds x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x8, x8, xzr\n\t" + "adds x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x8, x8, xzr\n\t" + "b.al 5f\n\t" + "\n4:\n\t" + "ldr x10, [%[a], x3]\n\t" + "mul x9, x10, x10\n\t" + "umulh x10, x10, x10\n\t" + "adds x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x8, x8, xzr\n\t" + "\n5:\n\t" + "add x3, x3, #8\n\t" + "sub x4, x4, #8\n\t" + "cmp x3, 384\n\t" + "b.eq 3f\n\t" + "cmp x3, x4\n\t" + "b.gt 3f\n\t" + "cmp x3, x5\n\t" + "b.le 2b\n\t" + "\n3:\n\t" + "str x6, [%[r], x5]\n\t" + "mov x6, x7\n\t" + "mov x7, x8\n\t" + "mov x8, #0\n\t" + "add x5, x5, #8\n\t" + "cmp x5, 752\n\t" + "b.le 1b\n\t" + "str x6, [%[r], x5]\n\t" + : + : [r] "r" (tmp), [a] "r" (a) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +#endif /* WOLFSSL_SP_SMALL */ +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +#ifdef WOLFSSL_SP_SMALL +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_3072_mask_24(sp_digit* r, const sp_digit* a, sp_digit m) +{ + int i; + + for (i=0; i<24; i++) { + r[i] = a[i] & m; + } +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "add x11, %[a], 192\n\t" + "\n1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldp x3, x4, [%[a]], #16\n\t" + "ldp x5, x6, [%[a]], #16\n\t" + "ldp x7, x8, [%[b]], #16\n\t" + "adcs x3, x3, x7\n\t" + "ldp x9, x10, [%[b]], #16\n\t" + "adcs x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r]], #16\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r]], #16\n\t" + "cset %[c], cs\n\t" + "cmp %[a], x11\n\t" + "b.ne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into a. (a -= b) + * + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_3072_sub_in_place_24(sp_digit* a, const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "add x10, %[a], 192\n\t" + "\n1:\n\t" + "subs %[c], xzr, %[c]\n\t" + "ldp x2, x3, [%[a]]\n\t" + "ldp x4, x5, [%[a], #16]\n\t" + "ldp x6, x7, [%[b]], #16\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x8, x9, [%[b]], #16\n\t" + "sbcs x3, x3, x7\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a]], #16\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a]], #16\n\t" + "csetm %[c], cc\n\t" + "cmp %[a], x10\n\t" + "b.ne 1b\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_3072_mul_24(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_digit tmp[48]; + + __asm__ __volatile__ ( + "mov x5, 0\n\t" + "mov x6, 0\n\t" + "mov x7, 0\n\t" + "mov x8, 0\n\t" + "\n1:\n\t" + "subs x3, x5, 184\n\t" + "csel x3, xzr, x3, cc\n\t" + "sub x4, x5, x3\n\t" + "\n2:\n\t" + "ldr x10, [%[a], x3]\n\t" + "ldr x11, [%[b], x4]\n\t" + "mul x9, x10, x11\n\t" + "umulh x10, x10, x11\n\t" + "adds x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x8, x8, xzr\n\t" + "add x3, x3, #8\n\t" + "sub x4, x4, #8\n\t" + "cmp x3, 192\n\t" + "b.eq 3f\n\t" + "cmp x3, x5\n\t" + "b.le 2b\n\t" + "\n3:\n\t" + "str x6, [%[r], x5]\n\t" + "mov x6, x7\n\t" + "mov x7, x8\n\t" + "mov x8, #0\n\t" + "add x5, x5, #8\n\t" + "cmp x5, 368\n\t" + "b.le 1b\n\t" + "str x6, [%[r], x5]\n\t" + : + : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a) +{ + sp_digit tmp[48]; + + __asm__ __volatile__ ( + "mov x6, 0\n\t" + "mov x7, 0\n\t" + "mov x8, 0\n\t" + "mov x5, 0\n\t" + "\n1:\n\t" + "subs x3, x5, 184\n\t" + "csel x3, xzr, x3, cc\n\t" + "sub x4, x5, x3\n\t" + "\n2:\n\t" + "cmp x4, x3\n\t" + "b.eq 4f\n\t" + "ldr x10, [%[a], x3]\n\t" + "ldr x11, [%[a], x4]\n\t" + "mul x9, x10, x11\n\t" + "umulh x10, x10, x11\n\t" + "adds x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x8, x8, xzr\n\t" + "adds x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x8, x8, xzr\n\t" + "b.al 5f\n\t" + "\n4:\n\t" + "ldr x10, [%[a], x3]\n\t" + "mul x9, x10, x10\n\t" + "umulh x10, x10, x10\n\t" + "adds x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x8, x8, xzr\n\t" + "\n5:\n\t" + "add x3, x3, #8\n\t" + "sub x4, x4, #8\n\t" + "cmp x3, 192\n\t" + "b.eq 3f\n\t" + "cmp x3, x4\n\t" + "b.gt 3f\n\t" + "cmp x3, x5\n\t" + "b.le 2b\n\t" + "\n3:\n\t" + "str x6, [%[r], x5]\n\t" + "mov x6, x7\n\t" + "mov x7, x8\n\t" + "mov x8, #0\n\t" + "add x5, x5, #8\n\t" + "cmp x5, 368\n\t" + "b.le 1b\n\t" + "str x6, [%[r], x5]\n\t" + : + : [r] "r" (tmp), [a] "r" (a) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +#endif /* WOLFSSL_SP_SMALL */ +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +/* Caclulate the bottom digit of -1/a mod 2^n. + * + * a A single precision number. + * rho Bottom word of inverse. + */ +static void sp_3072_mont_setup(const sp_digit* a, sp_digit* rho) +{ + sp_digit x, b; + + b = a[0]; + x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - b * x; /* here x*a==1 mod 2**8 */ + x *= 2 - b * x; /* here x*a==1 mod 2**16 */ + x *= 2 - b * x; /* here x*a==1 mod 2**32 */ + x *= 2 - b * x; /* here x*a==1 mod 2**64 */ + + /* rho = -1/m mod b */ + *rho = -x; +} + +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "# A[0] * B\n\t" + "ldr x8, [%[a]]\n\t" + "mul x5, %[b], x8\n\t" + "umulh x3, %[b], x8\n\t" + "mov x4, 0\n\t" + "str x5, [%[r]]\n\t" + "mov x5, 0\n\t" + "mov x9, #8\n\t" + "1:\n\t" + "ldr x8, [%[a], x9]\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], x9]\n\t" + "mov x3, x4\n\t" + "mov x4, x5\n\t" + "mov x5, #0\n\t" + "add x9, x9, #8\n\t" + "cmp x9, 384\n\t" + "b.lt 1b\n\t" + "str x3, [%[r], 384]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + ); +#else + __asm__ __volatile__ ( + "# A[0] * B\n\t" + "ldp x8, x9, [%[a]]\n\t" + "mul x3, %[b], x8\n\t" + "umulh x4, %[b], x8\n\t" + "mov x5, 0\n\t" + "# A[1] * B\n\t" + "str x3, [%[r]]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x9\n\t" + "umulh x7, %[b], x9\n\t" + "adds x4, x4, x6\n\t" + "# A[2] * B\n\t" + "ldp x8, x9, [%[a], 16]\n\t" + "str x4, [%[r], 8]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[3] * B\n\t" + "str x5, [%[r], 16]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[4] * B\n\t" + "ldp x8, x9, [%[a], 32]\n\t" + "str x3, [%[r], 24]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[5] * B\n\t" + "str x4, [%[r], 32]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[6] * B\n\t" + "ldp x8, x9, [%[a], 48]\n\t" + "str x5, [%[r], 40]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[7] * B\n\t" + "str x3, [%[r], 48]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[8] * B\n\t" + "ldp x8, x9, [%[a], 64]\n\t" + "str x4, [%[r], 56]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[9] * B\n\t" + "str x5, [%[r], 64]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[10] * B\n\t" + "ldp x8, x9, [%[a], 80]\n\t" + "str x3, [%[r], 72]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[11] * B\n\t" + "str x4, [%[r], 80]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[12] * B\n\t" + "ldp x8, x9, [%[a], 96]\n\t" + "str x5, [%[r], 88]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[13] * B\n\t" + "str x3, [%[r], 96]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[14] * B\n\t" + "ldp x8, x9, [%[a], 112]\n\t" + "str x4, [%[r], 104]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[15] * B\n\t" + "str x5, [%[r], 112]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[16] * B\n\t" + "ldp x8, x9, [%[a], 128]\n\t" + "str x3, [%[r], 120]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[17] * B\n\t" + "str x4, [%[r], 128]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[18] * B\n\t" + "ldp x8, x9, [%[a], 144]\n\t" + "str x5, [%[r], 136]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[19] * B\n\t" + "str x3, [%[r], 144]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[20] * B\n\t" + "ldp x8, x9, [%[a], 160]\n\t" + "str x4, [%[r], 152]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[21] * B\n\t" + "str x5, [%[r], 160]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[22] * B\n\t" + "ldp x8, x9, [%[a], 176]\n\t" + "str x3, [%[r], 168]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[23] * B\n\t" + "str x4, [%[r], 176]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[24] * B\n\t" + "ldp x8, x9, [%[a], 192]\n\t" + "str x5, [%[r], 184]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[25] * B\n\t" + "str x3, [%[r], 192]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[26] * B\n\t" + "ldp x8, x9, [%[a], 208]\n\t" + "str x4, [%[r], 200]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[27] * B\n\t" + "str x5, [%[r], 208]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[28] * B\n\t" + "ldp x8, x9, [%[a], 224]\n\t" + "str x3, [%[r], 216]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[29] * B\n\t" + "str x4, [%[r], 224]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[30] * B\n\t" + "ldp x8, x9, [%[a], 240]\n\t" + "str x5, [%[r], 232]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[31] * B\n\t" + "str x3, [%[r], 240]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[32] * B\n\t" + "ldp x8, x9, [%[a], 256]\n\t" + "str x4, [%[r], 248]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[33] * B\n\t" + "str x5, [%[r], 256]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[34] * B\n\t" + "ldp x8, x9, [%[a], 272]\n\t" + "str x3, [%[r], 264]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[35] * B\n\t" + "str x4, [%[r], 272]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[36] * B\n\t" + "ldp x8, x9, [%[a], 288]\n\t" + "str x5, [%[r], 280]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[37] * B\n\t" + "str x3, [%[r], 288]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[38] * B\n\t" + "ldp x8, x9, [%[a], 304]\n\t" + "str x4, [%[r], 296]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[39] * B\n\t" + "str x5, [%[r], 304]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[40] * B\n\t" + "ldp x8, x9, [%[a], 320]\n\t" + "str x3, [%[r], 312]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[41] * B\n\t" + "str x4, [%[r], 320]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[42] * B\n\t" + "ldp x8, x9, [%[a], 336]\n\t" + "str x5, [%[r], 328]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[43] * B\n\t" + "str x3, [%[r], 336]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[44] * B\n\t" + "ldp x8, x9, [%[a], 352]\n\t" + "str x4, [%[r], 344]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[45] * B\n\t" + "str x5, [%[r], 352]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[46] * B\n\t" + "ldp x8, x9, [%[a], 368]\n\t" + "str x3, [%[r], 360]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[47] * B\n\t" + "str x4, [%[r], 368]\n\t" + "mul x6, %[b], x9\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "adc x3, x3, x7\n\t" + "stp x5, x3, [%[r], 376]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + ); +#endif +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 3072 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_3072_mont_norm_24(sp_digit* r, const sp_digit* m) +{ + XMEMSET(r, 0, sizeof(sp_digit) * 24); + + /* r = 2^n mod m */ + sp_3072_sub_in_place_24(r, m); +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_3072_cond_sub_24(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov x8, #0\n\t" + "1:\n\t" + "subs %[c], xzr, %[c]\n\t" + "ldr x4, [%[a], x8]\n\t" + "ldr x5, [%[b], x8]\n\t" + "and x5, x5, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "csetm %[c], cc\n\t" + "str x4, [%[r], x8]\n\t" + "add x8, x8, #8\n\t" + "cmp x8, 192\n\t" + "b.lt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + ); + + return c; +#else + __asm__ __volatile__ ( + + "ldp x5, x7, [%[b], 0]\n\t" + "ldp x11, x12, [%[b], 16]\n\t" + "ldp x4, x6, [%[a], 0]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 16]\n\t" + "and x7, x7, %[m]\n\t" + "subs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 0]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 16]\n\t" + "ldp x5, x7, [%[b], 32]\n\t" + "ldp x11, x12, [%[b], 48]\n\t" + "ldp x4, x6, [%[a], 32]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 48]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 32]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 48]\n\t" + "ldp x5, x7, [%[b], 64]\n\t" + "ldp x11, x12, [%[b], 80]\n\t" + "ldp x4, x6, [%[a], 64]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 80]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 64]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 80]\n\t" + "ldp x5, x7, [%[b], 96]\n\t" + "ldp x11, x12, [%[b], 112]\n\t" + "ldp x4, x6, [%[a], 96]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 112]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 96]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 112]\n\t" + "ldp x5, x7, [%[b], 128]\n\t" + "ldp x11, x12, [%[b], 144]\n\t" + "ldp x4, x6, [%[a], 128]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 144]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 128]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 144]\n\t" + "ldp x5, x7, [%[b], 160]\n\t" + "ldp x11, x12, [%[b], 176]\n\t" + "ldp x4, x6, [%[a], 160]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 176]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 160]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 176]\n\t" + "csetm %[r], cc\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + ); + + return (sp_digit)r; +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Reduce the number back to 3072 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_3072_mont_reduce_24(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_digit ca = 0; + + __asm__ __volatile__ ( + "ldp x14, x15, [%[m], 0]\n\t" + "ldp x16, x17, [%[m], 16]\n\t" + "ldp x19, x20, [%[m], 32]\n\t" + "ldp x21, x22, [%[m], 48]\n\t" + "ldp x23, x24, [%[m], 64]\n\t" + "ldp x25, x26, [%[m], 80]\n\t" + "ldp x27, x28, [%[m], 96]\n\t" + "# i = 24\n\t" + "mov x4, 24\n\t" + "ldp x12, x13, [%[a], 0]\n\t" + "\n1:\n\t" + "# mu = a[i] * mp\n\t" + "mul x9, %[mp], x12\n\t" + "# a[i+0] += m[0] * mu\n\t" + "mul x7, x14, x9\n\t" + "umulh x8, x14, x9\n\t" + "adds x12, x12, x7\n\t" + "# a[i+1] += m[1] * mu\n\t" + "mul x7, x15, x9\n\t" + "adc x6, x8, xzr\n\t" + "umulh x8, x15, x9\n\t" + "adds x12, x13, x7\n\t" + "# a[i+2] += m[2] * mu\n\t" + "ldr x13, [%[a], 16]\n\t" + "adc x5, x8, xzr\n\t" + "mul x7, x16, x9\n\t" + "adds x12, x12, x6\n\t" + "umulh x8, x16, x9\n\t" + "adc x5, x5, xzr\n\t" + "adds x13, x13, x7\n\t" + "# a[i+3] += m[3] * mu\n\t" + "ldr x10, [%[a], 24]\n\t" + "adc x6, x8, xzr\n\t" + "mul x7, x17, x9\n\t" + "adds x13, x13, x5\n\t" + "umulh x8, x17, x9\n\t" + "adc x6, x6, xzr\n\t" + "adds x10, x10, x7\n\t" + "# a[i+4] += m[4] * mu\n\t" + "ldr x11, [%[a], 32]\n\t" + "adc x5, x8, xzr\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x19, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x19, x9\n\t" + "str x10, [%[a], 24]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+5] += m[5] * mu\n\t" + "ldr x10, [%[a], 40]\n\t" + "adc x6, x8, xzr\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x20, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x20, x9\n\t" + "str x11, [%[a], 32]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+6] += m[6] * mu\n\t" + "ldr x11, [%[a], 48]\n\t" + "adc x5, x8, xzr\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x21, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x21, x9\n\t" + "str x10, [%[a], 40]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+7] += m[7] * mu\n\t" + "ldr x10, [%[a], 56]\n\t" + "adc x6, x8, xzr\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x22, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x22, x9\n\t" + "str x11, [%[a], 48]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+8] += m[8] * mu\n\t" + "ldr x11, [%[a], 64]\n\t" + "adc x5, x8, xzr\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x23, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x23, x9\n\t" + "str x10, [%[a], 56]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+9] += m[9] * mu\n\t" + "ldr x10, [%[a], 72]\n\t" + "adc x6, x8, xzr\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x24, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x24, x9\n\t" + "str x11, [%[a], 64]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+10] += m[10] * mu\n\t" + "ldr x11, [%[a], 80]\n\t" + "adc x5, x8, xzr\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x25, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x25, x9\n\t" + "str x10, [%[a], 72]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+11] += m[11] * mu\n\t" + "ldr x10, [%[a], 88]\n\t" + "adc x6, x8, xzr\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x26, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x26, x9\n\t" + "str x11, [%[a], 80]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+12] += m[12] * mu\n\t" + "ldr x11, [%[a], 96]\n\t" + "adc x5, x8, xzr\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x27, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x27, x9\n\t" + "str x10, [%[a], 88]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+13] += m[13] * mu\n\t" + "ldr x10, [%[a], 104]\n\t" + "adc x6, x8, xzr\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x28, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x28, x9\n\t" + "str x11, [%[a], 96]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+14] += m[14] * mu\n\t" + "ldr x11, [%[a], 112]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 112]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 104]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+15] += m[15] * mu\n\t" + "ldr x10, [%[a], 120]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 120]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 112]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+16] += m[16] * mu\n\t" + "ldr x11, [%[a], 128]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 128]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 120]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+17] += m[17] * mu\n\t" + "ldr x10, [%[a], 136]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 136]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 128]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+18] += m[18] * mu\n\t" + "ldr x11, [%[a], 144]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 144]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 136]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+19] += m[19] * mu\n\t" + "ldr x10, [%[a], 152]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 152]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 144]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+20] += m[20] * mu\n\t" + "ldr x11, [%[a], 160]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 160]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 152]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+21] += m[21] * mu\n\t" + "ldr x10, [%[a], 168]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 168]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 160]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+22] += m[22] * mu\n\t" + "ldr x11, [%[a], 176]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 176]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 168]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+23] += m[23] * mu\n\t" + "ldr x10, [%[a], 184]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 184]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "adds x6, x6, x7\n\t" + "adcs x8, x8, %[ca]\n\t" + "str x11, [%[a], 176]\n\t" + "cset %[ca], cs\n\t" + "adds x10, x10, x6\n\t" + "ldr x11, [%[a], 192]\n\t" + "str x10, [%[a], 184]\n\t" + "adcs x11, x11, x8\n\t" + "str x11, [%[a], 192]\n\t" + "adc %[ca], %[ca], xzr\n\t" + "subs x4, x4, 1\n\t" + "add %[a], %[a], 8\n\t" + "bne 1b\n\t" + "stp x12, x13, [%[a], 0]\n\t" + : [ca] "+r" (ca), [a] "+r" (a) + : [m] "r" (m), [mp] "r" (mp) + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" + ); + + sp_3072_cond_sub_24(a - 24, a, m, (sp_digit)0 - ca); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_3072_mont_mul_24(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_3072_mul_24(r, a, b); + sp_3072_mont_reduce_24(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_3072_mont_sqr_24(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_3072_sqr_24(r, a); + sp_3072_mont_reduce_24(r, m, mp); +} + +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_3072_mul_d_24(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "# A[0] * B\n\t" + "ldr x8, [%[a]]\n\t" + "mul x5, %[b], x8\n\t" + "umulh x3, %[b], x8\n\t" + "mov x4, 0\n\t" + "str x5, [%[r]]\n\t" + "mov x5, 0\n\t" + "mov x9, #8\n\t" + "1:\n\t" + "ldr x8, [%[a], x9]\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], x9]\n\t" + "mov x3, x4\n\t" + "mov x4, x5\n\t" + "mov x5, #0\n\t" + "add x9, x9, #8\n\t" + "cmp x9, 192\n\t" + "b.lt 1b\n\t" + "str x3, [%[r], 192]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + ); +#else + __asm__ __volatile__ ( + "# A[0] * B\n\t" + "ldp x8, x9, [%[a]]\n\t" + "mul x3, %[b], x8\n\t" + "umulh x4, %[b], x8\n\t" + "mov x5, 0\n\t" + "# A[1] * B\n\t" + "str x3, [%[r]]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x9\n\t" + "umulh x7, %[b], x9\n\t" + "adds x4, x4, x6\n\t" + "# A[2] * B\n\t" + "ldp x8, x9, [%[a], 16]\n\t" + "str x4, [%[r], 8]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[3] * B\n\t" + "str x5, [%[r], 16]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[4] * B\n\t" + "ldp x8, x9, [%[a], 32]\n\t" + "str x3, [%[r], 24]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[5] * B\n\t" + "str x4, [%[r], 32]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[6] * B\n\t" + "ldp x8, x9, [%[a], 48]\n\t" + "str x5, [%[r], 40]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[7] * B\n\t" + "str x3, [%[r], 48]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[8] * B\n\t" + "ldp x8, x9, [%[a], 64]\n\t" + "str x4, [%[r], 56]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[9] * B\n\t" + "str x5, [%[r], 64]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[10] * B\n\t" + "ldp x8, x9, [%[a], 80]\n\t" + "str x3, [%[r], 72]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[11] * B\n\t" + "str x4, [%[r], 80]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[12] * B\n\t" + "ldp x8, x9, [%[a], 96]\n\t" + "str x5, [%[r], 88]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[13] * B\n\t" + "str x3, [%[r], 96]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[14] * B\n\t" + "ldp x8, x9, [%[a], 112]\n\t" + "str x4, [%[r], 104]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[15] * B\n\t" + "str x5, [%[r], 112]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[16] * B\n\t" + "ldp x8, x9, [%[a], 128]\n\t" + "str x3, [%[r], 120]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[17] * B\n\t" + "str x4, [%[r], 128]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[18] * B\n\t" + "ldp x8, x9, [%[a], 144]\n\t" + "str x5, [%[r], 136]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[19] * B\n\t" + "str x3, [%[r], 144]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[20] * B\n\t" + "ldp x8, x9, [%[a], 160]\n\t" + "str x4, [%[r], 152]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[21] * B\n\t" + "str x5, [%[r], 160]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[22] * B\n\t" + "ldp x8, x9, [%[a], 176]\n\t" + "str x3, [%[r], 168]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[23] * B\n\t" + "str x4, [%[r], 176]\n\t" + "mul x6, %[b], x9\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "adc x3, x3, x7\n\t" + "stp x5, x3, [%[r], 184]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + ); +#endif +} + +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + */ +static sp_digit div_3072_word_24(sp_digit d1, sp_digit d0, sp_digit div) +{ + sp_digit r; + + __asm__ __volatile__ ( + "lsr x5, %[div], 32\n\t" + "add x5, x5, 1\n\t" + + "udiv x3, %[d1], x5\n\t" + "lsl x6, x3, 32\n\t" + "mul x4, %[div], x6\n\t" + "umulh x3, %[div], x6\n\t" + "subs %[d0], %[d0], x4\n\t" + "sbc %[d1], %[d1], x3\n\t" + + "udiv x3, %[d1], x5\n\t" + "lsl x3, x3, 32\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "umulh x3, %[div], x3\n\t" + "subs %[d0], %[d0], x4\n\t" + "sbc %[d1], %[d1], x3\n\t" + + "lsr x3, %[d0], 32\n\t" + "orr x3, x3, %[d1], lsl 32\n\t" + + "udiv x3, x3, x5\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "umulh x3, %[div], x3\n\t" + "subs %[d0], %[d0], x4\n\t" + "sbc %[d1], %[d1], x3\n\t" + + "lsr x3, %[d0], 32\n\t" + "orr x3, x3, %[d1], lsl 32\n\t" + + "udiv x3, x3, x5\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "sub %[d0], %[d0], x4\n\t" + + "udiv x3, %[d0], %[div]\n\t" + "add %[r], x6, x3\n\t" + + : [r] "=r" (r) + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) + : "x3", "x4", "x5", "x6" + ); + + return r; +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static int64_t sp_3072_cmp_24(const sp_digit* a, const sp_digit* b) +{ +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov x2, -1\n\t" + "mov x3, 1\n\t" + "mov x4, -1\n\t" + "mov x5, 184\n\t" + "1:\n\t" + "ldr x6, [%[a], x5]\n\t" + "ldr x7, [%[b], x5]\n\t" + "and x6, x6, x4\n\t" + "and x7, x7, x4\n\t" + "subs x6, x6, x7\n\t" + "csel x2, x3, x2, hi\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "subs x5, x5, #8\n\t" + "b.cs 1b\n\t" + "eor %[a], x2, x4\n\t" + : [a] "+r" (a) + : [b] "r" (b) + : "x2", "x3", "x4", "x5", "x6", "x7", "x8" + ); +#else + __asm__ __volatile__ ( + "mov x2, -1\n\t" + "mov x3, 1\n\t" + "mov x4, -1\n\t" + "ldp x5, x6, [%[a], 176]\n\t" + "ldp x7, x8, [%[b], 176]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 160]\n\t" + "ldp x7, x8, [%[b], 160]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 144]\n\t" + "ldp x7, x8, [%[b], 144]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 128]\n\t" + "ldp x7, x8, [%[b], 128]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 112]\n\t" + "ldp x7, x8, [%[b], 112]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 96]\n\t" + "ldp x7, x8, [%[b], 96]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 80]\n\t" + "ldp x7, x8, [%[b], 80]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 64]\n\t" + "ldp x7, x8, [%[b], 64]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 48]\n\t" + "ldp x7, x8, [%[b], 48]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 32]\n\t" + "ldp x7, x8, [%[b], 32]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "ldp x7, x8, [%[b], 16]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 0]\n\t" + "ldp x7, x8, [%[b], 0]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "eor %[a], x2, x4\n\t" + : [a] "+r" (a) + : [b] "r" (b) + : "x2", "x3", "x4", "x5", "x6", "x7", "x8" + ); +#endif + + return (int64_t)a; +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_3072_div_24(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[48], t2[25]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[23]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 24); + for (i=23; i>=0; i--) { + r1 = div_3072_word_24(t1[24 + i], t1[24 + i - 1], div); + + sp_3072_mul_d_24(t2, d, r1); + t1[24 + i] += sp_3072_sub_in_place_24(&t1[i], t2); + t1[24 + i] -= t2[24]; + sp_3072_mask_24(t2, d, t1[24 + i]); + t1[24 + i] += sp_3072_add_24(&t1[i], &t1[i], t2); + sp_3072_mask_24(t2, d, t1[24 + i]); + t1[24 + i] += sp_3072_add_24(&t1[i], &t1[i], t2); + } + + r1 = sp_3072_cmp_24(t1, d) >= 0; + sp_3072_cond_sub_24(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_3072_mod_24(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_3072_div_24(a, m, NULL, r); +} + +#ifdef WOLFSSL_SP_SMALL +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_3072_mod_exp_24(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[16][48]; +#else + sp_digit* t[16]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 48, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<16; i++) { + t[i] = td + i * 48; + } +#endif + norm = t[0]; + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_24(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 24U); + if (reduceA != 0) { + err = sp_3072_mod_24(t[1] + 24, a, m); + if (err == MP_OKAY) { + err = sp_3072_mod_24(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 24, a, sizeof(sp_digit) * 24); + err = sp_3072_mod_24(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_3072_mont_sqr_24(t[ 2], t[ 1], m, mp); + sp_3072_mont_mul_24(t[ 3], t[ 2], t[ 1], m, mp); + sp_3072_mont_sqr_24(t[ 4], t[ 2], m, mp); + sp_3072_mont_mul_24(t[ 5], t[ 3], t[ 2], m, mp); + sp_3072_mont_sqr_24(t[ 6], t[ 3], m, mp); + sp_3072_mont_mul_24(t[ 7], t[ 4], t[ 3], m, mp); + sp_3072_mont_sqr_24(t[ 8], t[ 4], m, mp); + sp_3072_mont_mul_24(t[ 9], t[ 5], t[ 4], m, mp); + sp_3072_mont_sqr_24(t[10], t[ 5], m, mp); + sp_3072_mont_mul_24(t[11], t[ 6], t[ 5], m, mp); + sp_3072_mont_sqr_24(t[12], t[ 6], m, mp); + sp_3072_mont_mul_24(t[13], t[ 7], t[ 6], m, mp); + sp_3072_mont_sqr_24(t[14], t[ 7], m, mp); + sp_3072_mont_mul_24(t[15], t[ 8], t[ 7], m, mp); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) { + c = 64; + } + c -= bits % 4; + if (c == 64) { + c = 60; + } + y = (int)(n >> c); + n <<= 64 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 24); + for (; i>=0 || c>=4; ) { + if (c == 0) { + n = e[i--]; + y = n >> 60; + n <<= 4; + c = 60; + } + else if (c < 4) { + y = n >> 60; + n = e[i--]; + c = 4 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + else { + y = (n >> 60) & 0xf; + n <<= 4; + c -= 4; + } + + sp_3072_mont_sqr_24(r, r, m, mp); + sp_3072_mont_sqr_24(r, r, m, mp); + sp_3072_mont_sqr_24(r, r, m, mp); + sp_3072_mont_sqr_24(r, r, m, mp); + + sp_3072_mont_mul_24(r, r, t[y], m, mp); + } + + XMEMSET(&r[24], 0, sizeof(sp_digit) * 24U); + sp_3072_mont_reduce_24(r, m, mp); + + mask = 0 - (sp_3072_cmp_24(r, m) >= 0); + sp_3072_cond_sub_24(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#else +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_3072_mod_exp_24(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][48]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 48, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) { + t[i] = td + i * 48; + } +#endif + norm = t[0]; + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_24(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 24U); + if (reduceA != 0) { + err = sp_3072_mod_24(t[1] + 24, a, m); + if (err == MP_OKAY) { + err = sp_3072_mod_24(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 24, a, sizeof(sp_digit) * 24); + err = sp_3072_mod_24(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_3072_mont_sqr_24(t[ 2], t[ 1], m, mp); + sp_3072_mont_mul_24(t[ 3], t[ 2], t[ 1], m, mp); + sp_3072_mont_sqr_24(t[ 4], t[ 2], m, mp); + sp_3072_mont_mul_24(t[ 5], t[ 3], t[ 2], m, mp); + sp_3072_mont_sqr_24(t[ 6], t[ 3], m, mp); + sp_3072_mont_mul_24(t[ 7], t[ 4], t[ 3], m, mp); + sp_3072_mont_sqr_24(t[ 8], t[ 4], m, mp); + sp_3072_mont_mul_24(t[ 9], t[ 5], t[ 4], m, mp); + sp_3072_mont_sqr_24(t[10], t[ 5], m, mp); + sp_3072_mont_mul_24(t[11], t[ 6], t[ 5], m, mp); + sp_3072_mont_sqr_24(t[12], t[ 6], m, mp); + sp_3072_mont_mul_24(t[13], t[ 7], t[ 6], m, mp); + sp_3072_mont_sqr_24(t[14], t[ 7], m, mp); + sp_3072_mont_mul_24(t[15], t[ 8], t[ 7], m, mp); + sp_3072_mont_sqr_24(t[16], t[ 8], m, mp); + sp_3072_mont_mul_24(t[17], t[ 9], t[ 8], m, mp); + sp_3072_mont_sqr_24(t[18], t[ 9], m, mp); + sp_3072_mont_mul_24(t[19], t[10], t[ 9], m, mp); + sp_3072_mont_sqr_24(t[20], t[10], m, mp); + sp_3072_mont_mul_24(t[21], t[11], t[10], m, mp); + sp_3072_mont_sqr_24(t[22], t[11], m, mp); + sp_3072_mont_mul_24(t[23], t[12], t[11], m, mp); + sp_3072_mont_sqr_24(t[24], t[12], m, mp); + sp_3072_mont_mul_24(t[25], t[13], t[12], m, mp); + sp_3072_mont_sqr_24(t[26], t[13], m, mp); + sp_3072_mont_mul_24(t[27], t[14], t[13], m, mp); + sp_3072_mont_sqr_24(t[28], t[14], m, mp); + sp_3072_mont_mul_24(t[29], t[15], t[14], m, mp); + sp_3072_mont_sqr_24(t[30], t[15], m, mp); + sp_3072_mont_mul_24(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) { + c = 64; + } + c -= bits % 5; + if (c == 64) { + c = 59; + } + y = (int)(n >> c); + n <<= 64 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 24); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 59; + n <<= 5; + c = 59; + } + else if (c < 5) { + y = n >> 59; + n = e[i--]; + c = 5 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + else { + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_3072_mont_sqr_24(r, r, m, mp); + sp_3072_mont_sqr_24(r, r, m, mp); + sp_3072_mont_sqr_24(r, r, m, mp); + sp_3072_mont_sqr_24(r, r, m, mp); + sp_3072_mont_sqr_24(r, r, m, mp); + + sp_3072_mont_mul_24(r, r, t[y], m, mp); + } + + XMEMSET(&r[24], 0, sizeof(sp_digit) * 24U); + sp_3072_mont_reduce_24(r, m, mp); + + mask = 0 - (sp_3072_cmp_24(r, m) >= 0); + sp_3072_cond_sub_24(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* WOLFSSL_SP_SMALL */ + +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 3072 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_3072_mont_norm_48(sp_digit* r, const sp_digit* m) +{ + XMEMSET(r, 0, sizeof(sp_digit) * 48); + + /* r = 2^n mod m */ + sp_3072_sub_in_place_48(r, m); +} + +#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */ +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov x8, #0\n\t" + "1:\n\t" + "subs %[c], xzr, %[c]\n\t" + "ldr x4, [%[a], x8]\n\t" + "ldr x5, [%[b], x8]\n\t" + "and x5, x5, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "csetm %[c], cc\n\t" + "str x4, [%[r], x8]\n\t" + "add x8, x8, #8\n\t" + "cmp x8, 384\n\t" + "b.lt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + ); + + return c; +#else + __asm__ __volatile__ ( + + "ldp x5, x7, [%[b], 0]\n\t" + "ldp x11, x12, [%[b], 16]\n\t" + "ldp x4, x6, [%[a], 0]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 16]\n\t" + "and x7, x7, %[m]\n\t" + "subs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 0]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 16]\n\t" + "ldp x5, x7, [%[b], 32]\n\t" + "ldp x11, x12, [%[b], 48]\n\t" + "ldp x4, x6, [%[a], 32]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 48]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 32]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 48]\n\t" + "ldp x5, x7, [%[b], 64]\n\t" + "ldp x11, x12, [%[b], 80]\n\t" + "ldp x4, x6, [%[a], 64]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 80]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 64]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 80]\n\t" + "ldp x5, x7, [%[b], 96]\n\t" + "ldp x11, x12, [%[b], 112]\n\t" + "ldp x4, x6, [%[a], 96]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 112]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 96]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 112]\n\t" + "ldp x5, x7, [%[b], 128]\n\t" + "ldp x11, x12, [%[b], 144]\n\t" + "ldp x4, x6, [%[a], 128]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 144]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 128]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 144]\n\t" + "ldp x5, x7, [%[b], 160]\n\t" + "ldp x11, x12, [%[b], 176]\n\t" + "ldp x4, x6, [%[a], 160]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 176]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 160]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 176]\n\t" + "ldp x5, x7, [%[b], 192]\n\t" + "ldp x11, x12, [%[b], 208]\n\t" + "ldp x4, x6, [%[a], 192]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 208]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 192]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 208]\n\t" + "ldp x5, x7, [%[b], 224]\n\t" + "ldp x11, x12, [%[b], 240]\n\t" + "ldp x4, x6, [%[a], 224]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 240]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 224]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 240]\n\t" + "ldp x5, x7, [%[b], 256]\n\t" + "ldp x11, x12, [%[b], 272]\n\t" + "ldp x4, x6, [%[a], 256]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 272]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 256]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 272]\n\t" + "ldp x5, x7, [%[b], 288]\n\t" + "ldp x11, x12, [%[b], 304]\n\t" + "ldp x4, x6, [%[a], 288]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 304]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 288]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 304]\n\t" + "ldp x5, x7, [%[b], 320]\n\t" + "ldp x11, x12, [%[b], 336]\n\t" + "ldp x4, x6, [%[a], 320]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 336]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 320]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 336]\n\t" + "ldp x5, x7, [%[b], 352]\n\t" + "ldp x11, x12, [%[b], 368]\n\t" + "ldp x4, x6, [%[a], 352]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 368]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 352]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 368]\n\t" + "csetm %[r], cc\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + ); + + return (sp_digit)r; +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Reduce the number back to 3072 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_digit ca = 0; + + __asm__ __volatile__ ( + "ldp x14, x15, [%[m], 0]\n\t" + "ldp x16, x17, [%[m], 16]\n\t" + "ldp x19, x20, [%[m], 32]\n\t" + "ldp x21, x22, [%[m], 48]\n\t" + "ldp x23, x24, [%[m], 64]\n\t" + "ldp x25, x26, [%[m], 80]\n\t" + "ldp x27, x28, [%[m], 96]\n\t" + "# i = 48\n\t" + "mov x4, 48\n\t" + "ldp x12, x13, [%[a], 0]\n\t" + "\n1:\n\t" + "# mu = a[i] * mp\n\t" + "mul x9, %[mp], x12\n\t" + "# a[i+0] += m[0] * mu\n\t" + "mul x7, x14, x9\n\t" + "umulh x8, x14, x9\n\t" + "adds x12, x12, x7\n\t" + "# a[i+1] += m[1] * mu\n\t" + "mul x7, x15, x9\n\t" + "adc x6, x8, xzr\n\t" + "umulh x8, x15, x9\n\t" + "adds x12, x13, x7\n\t" + "# a[i+2] += m[2] * mu\n\t" + "ldr x13, [%[a], 16]\n\t" + "adc x5, x8, xzr\n\t" + "mul x7, x16, x9\n\t" + "adds x12, x12, x6\n\t" + "umulh x8, x16, x9\n\t" + "adc x5, x5, xzr\n\t" + "adds x13, x13, x7\n\t" + "# a[i+3] += m[3] * mu\n\t" + "ldr x10, [%[a], 24]\n\t" + "adc x6, x8, xzr\n\t" + "mul x7, x17, x9\n\t" + "adds x13, x13, x5\n\t" + "umulh x8, x17, x9\n\t" + "adc x6, x6, xzr\n\t" + "adds x10, x10, x7\n\t" + "# a[i+4] += m[4] * mu\n\t" + "ldr x11, [%[a], 32]\n\t" + "adc x5, x8, xzr\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x19, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x19, x9\n\t" + "str x10, [%[a], 24]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+5] += m[5] * mu\n\t" + "ldr x10, [%[a], 40]\n\t" + "adc x6, x8, xzr\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x20, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x20, x9\n\t" + "str x11, [%[a], 32]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+6] += m[6] * mu\n\t" + "ldr x11, [%[a], 48]\n\t" + "adc x5, x8, xzr\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x21, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x21, x9\n\t" + "str x10, [%[a], 40]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+7] += m[7] * mu\n\t" + "ldr x10, [%[a], 56]\n\t" + "adc x6, x8, xzr\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x22, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x22, x9\n\t" + "str x11, [%[a], 48]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+8] += m[8] * mu\n\t" + "ldr x11, [%[a], 64]\n\t" + "adc x5, x8, xzr\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x23, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x23, x9\n\t" + "str x10, [%[a], 56]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+9] += m[9] * mu\n\t" + "ldr x10, [%[a], 72]\n\t" + "adc x6, x8, xzr\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x24, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x24, x9\n\t" + "str x11, [%[a], 64]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+10] += m[10] * mu\n\t" + "ldr x11, [%[a], 80]\n\t" + "adc x5, x8, xzr\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x25, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x25, x9\n\t" + "str x10, [%[a], 72]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+11] += m[11] * mu\n\t" + "ldr x10, [%[a], 88]\n\t" + "adc x6, x8, xzr\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x26, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x26, x9\n\t" + "str x11, [%[a], 80]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+12] += m[12] * mu\n\t" + "ldr x11, [%[a], 96]\n\t" + "adc x5, x8, xzr\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x27, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x27, x9\n\t" + "str x10, [%[a], 88]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+13] += m[13] * mu\n\t" + "ldr x10, [%[a], 104]\n\t" + "adc x6, x8, xzr\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x28, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x28, x9\n\t" + "str x11, [%[a], 96]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+14] += m[14] * mu\n\t" + "ldr x11, [%[a], 112]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 112]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 104]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+15] += m[15] * mu\n\t" + "ldr x10, [%[a], 120]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 120]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 112]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+16] += m[16] * mu\n\t" + "ldr x11, [%[a], 128]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 128]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 120]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+17] += m[17] * mu\n\t" + "ldr x10, [%[a], 136]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 136]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 128]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+18] += m[18] * mu\n\t" + "ldr x11, [%[a], 144]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 144]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 136]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+19] += m[19] * mu\n\t" + "ldr x10, [%[a], 152]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 152]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 144]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+20] += m[20] * mu\n\t" + "ldr x11, [%[a], 160]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 160]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 152]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+21] += m[21] * mu\n\t" + "ldr x10, [%[a], 168]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 168]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 160]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+22] += m[22] * mu\n\t" + "ldr x11, [%[a], 176]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 176]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 168]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+23] += m[23] * mu\n\t" + "ldr x10, [%[a], 184]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 184]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 176]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+24] += m[24] * mu\n\t" + "ldr x11, [%[a], 192]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 192]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 184]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+25] += m[25] * mu\n\t" + "ldr x10, [%[a], 200]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 200]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 192]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+26] += m[26] * mu\n\t" + "ldr x11, [%[a], 208]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 208]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 200]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+27] += m[27] * mu\n\t" + "ldr x10, [%[a], 216]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 216]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 208]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+28] += m[28] * mu\n\t" + "ldr x11, [%[a], 224]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 224]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 216]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+29] += m[29] * mu\n\t" + "ldr x10, [%[a], 232]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 232]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 224]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+30] += m[30] * mu\n\t" + "ldr x11, [%[a], 240]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 240]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 232]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+31] += m[31] * mu\n\t" + "ldr x10, [%[a], 248]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 248]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 240]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+32] += m[32] * mu\n\t" + "ldr x11, [%[a], 256]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 256]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 248]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+33] += m[33] * mu\n\t" + "ldr x10, [%[a], 264]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 264]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 256]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+34] += m[34] * mu\n\t" + "ldr x11, [%[a], 272]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 272]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 264]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+35] += m[35] * mu\n\t" + "ldr x10, [%[a], 280]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 280]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 272]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+36] += m[36] * mu\n\t" + "ldr x11, [%[a], 288]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 288]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 280]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+37] += m[37] * mu\n\t" + "ldr x10, [%[a], 296]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 296]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 288]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+38] += m[38] * mu\n\t" + "ldr x11, [%[a], 304]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 304]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 296]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+39] += m[39] * mu\n\t" + "ldr x10, [%[a], 312]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 312]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 304]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+40] += m[40] * mu\n\t" + "ldr x11, [%[a], 320]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 320]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 312]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+41] += m[41] * mu\n\t" + "ldr x10, [%[a], 328]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 328]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 320]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+42] += m[42] * mu\n\t" + "ldr x11, [%[a], 336]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 336]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 328]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+43] += m[43] * mu\n\t" + "ldr x10, [%[a], 344]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 344]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 336]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+44] += m[44] * mu\n\t" + "ldr x11, [%[a], 352]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 352]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 344]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+45] += m[45] * mu\n\t" + "ldr x10, [%[a], 360]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 360]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 352]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+46] += m[46] * mu\n\t" + "ldr x11, [%[a], 368]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 368]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 360]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+47] += m[47] * mu\n\t" + "ldr x10, [%[a], 376]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 376]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "adds x6, x6, x7\n\t" + "adcs x8, x8, %[ca]\n\t" + "str x11, [%[a], 368]\n\t" + "cset %[ca], cs\n\t" + "adds x10, x10, x6\n\t" + "ldr x11, [%[a], 384]\n\t" + "str x10, [%[a], 376]\n\t" + "adcs x11, x11, x8\n\t" + "str x11, [%[a], 384]\n\t" + "adc %[ca], %[ca], xzr\n\t" + "subs x4, x4, 1\n\t" + "add %[a], %[a], 8\n\t" + "bne 1b\n\t" + "stp x12, x13, [%[a], 0]\n\t" + : [ca] "+r" (ca), [a] "+r" (a) + : [m] "r" (m), [mp] "r" (mp) + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" + ); + + sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - ca); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_3072_mont_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_3072_mul_48(r, a, b); + sp_3072_mont_reduce_48(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_3072_mont_sqr_48(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_3072_sqr_48(r, a); + sp_3072_mont_reduce_48(r, m, mp); +} + +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + */ +static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div) +{ + sp_digit r; + + __asm__ __volatile__ ( + "lsr x5, %[div], 32\n\t" + "add x5, x5, 1\n\t" + + "udiv x3, %[d1], x5\n\t" + "lsl x6, x3, 32\n\t" + "mul x4, %[div], x6\n\t" + "umulh x3, %[div], x6\n\t" + "subs %[d0], %[d0], x4\n\t" + "sbc %[d1], %[d1], x3\n\t" + + "udiv x3, %[d1], x5\n\t" + "lsl x3, x3, 32\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "umulh x3, %[div], x3\n\t" + "subs %[d0], %[d0], x4\n\t" + "sbc %[d1], %[d1], x3\n\t" + + "lsr x3, %[d0], 32\n\t" + "orr x3, x3, %[d1], lsl 32\n\t" + + "udiv x3, x3, x5\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "umulh x3, %[div], x3\n\t" + "subs %[d0], %[d0], x4\n\t" + "sbc %[d1], %[d1], x3\n\t" + + "lsr x3, %[d0], 32\n\t" + "orr x3, x3, %[d1], lsl 32\n\t" + + "udiv x3, x3, x5\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "sub %[d0], %[d0], x4\n\t" + + "udiv x3, %[d0], %[div]\n\t" + "add %[r], x6, x3\n\t" + + : [r] "=r" (r) + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) + : "x3", "x4", "x5", "x6" + ); + + return r; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<48; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 48; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static int64_t sp_3072_cmp_48(const sp_digit* a, const sp_digit* b) +{ +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov x2, -1\n\t" + "mov x3, 1\n\t" + "mov x4, -1\n\t" + "mov x5, 376\n\t" + "1:\n\t" + "ldr x6, [%[a], x5]\n\t" + "ldr x7, [%[b], x5]\n\t" + "and x6, x6, x4\n\t" + "and x7, x7, x4\n\t" + "subs x6, x6, x7\n\t" + "csel x2, x3, x2, hi\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "subs x5, x5, #8\n\t" + "b.cs 1b\n\t" + "eor %[a], x2, x4\n\t" + : [a] "+r" (a) + : [b] "r" (b) + : "x2", "x3", "x4", "x5", "x6", "x7", "x8" + ); +#else + __asm__ __volatile__ ( + "mov x2, -1\n\t" + "mov x3, 1\n\t" + "mov x4, -1\n\t" + "ldp x5, x6, [%[a], 368]\n\t" + "ldp x7, x8, [%[b], 368]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 352]\n\t" + "ldp x7, x8, [%[b], 352]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 336]\n\t" + "ldp x7, x8, [%[b], 336]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 320]\n\t" + "ldp x7, x8, [%[b], 320]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 304]\n\t" + "ldp x7, x8, [%[b], 304]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 288]\n\t" + "ldp x7, x8, [%[b], 288]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 272]\n\t" + "ldp x7, x8, [%[b], 272]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 256]\n\t" + "ldp x7, x8, [%[b], 256]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 240]\n\t" + "ldp x7, x8, [%[b], 240]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 224]\n\t" + "ldp x7, x8, [%[b], 224]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 208]\n\t" + "ldp x7, x8, [%[b], 208]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 192]\n\t" + "ldp x7, x8, [%[b], 192]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 176]\n\t" + "ldp x7, x8, [%[b], 176]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 160]\n\t" + "ldp x7, x8, [%[b], 160]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 144]\n\t" + "ldp x7, x8, [%[b], 144]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 128]\n\t" + "ldp x7, x8, [%[b], 128]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 112]\n\t" + "ldp x7, x8, [%[b], 112]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 96]\n\t" + "ldp x7, x8, [%[b], 96]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 80]\n\t" + "ldp x7, x8, [%[b], 80]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 64]\n\t" + "ldp x7, x8, [%[b], 64]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 48]\n\t" + "ldp x7, x8, [%[b], 48]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 32]\n\t" + "ldp x7, x8, [%[b], 32]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "ldp x7, x8, [%[b], 16]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 0]\n\t" + "ldp x7, x8, [%[b], 0]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "eor %[a], x2, x4\n\t" + : [a] "+r" (a) + : [b] "r" (b) + : "x2", "x3", "x4", "x5", "x6", "x7", "x8" + ); +#endif + + return (int64_t)a; +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[96], t2[49]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[47]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 48); + for (i=47; i>=0; i--) { + r1 = div_3072_word_48(t1[48 + i], t1[48 + i - 1], div); + + sp_3072_mul_d_48(t2, d, r1); + t1[48 + i] += sp_3072_sub_in_place_48(&t1[i], t2); + t1[48 + i] -= t2[48]; + sp_3072_mask_48(t2, d, t1[48 + i]); + t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2); + sp_3072_mask_48(t2, d, t1[48 + i]); + t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2); + } + + r1 = sp_3072_cmp_48(t1, d) >= 0; + sp_3072_cond_sub_48(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_3072_mod_48(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_3072_div_48(a, m, NULL, r); +} + +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_3072_sub_48(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "add x11, %[a], 384\n\t" + "\n1:\n\t" + "subs %[c], xzr, %[c]\n\t" + "ldp x3, x4, [%[a]], #16\n\t" + "ldp x5, x6, [%[a]], #16\n\t" + "ldp x7, x8, [%[b]], #16\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x9, x10, [%[b]], #16\n\t" + "sbcs x4, x4, x8\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r]], #16\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r]], #16\n\t" + "csetm %[c], cc\n\t" + "cmp %[a], x11\n\t" + "b.ne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + ); + + return c; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_3072_sub_48(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[a], 0]\n\t" + "ldp x7, x8, [%[b], 0]\n\t" + "subs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 16]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "ldp x3, x4, [%[a], 32]\n\t" + "ldp x7, x8, [%[b], 32]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 48]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 48]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 32]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 48]\n\t" + "ldp x3, x4, [%[a], 64]\n\t" + "ldp x7, x8, [%[b], 64]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 80]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 80]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 64]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 80]\n\t" + "ldp x3, x4, [%[a], 96]\n\t" + "ldp x7, x8, [%[b], 96]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 112]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 112]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 96]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 112]\n\t" + "ldp x3, x4, [%[a], 128]\n\t" + "ldp x7, x8, [%[b], 128]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 144]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 144]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 128]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 144]\n\t" + "ldp x3, x4, [%[a], 160]\n\t" + "ldp x7, x8, [%[b], 160]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 176]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 176]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 160]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 176]\n\t" + "ldp x3, x4, [%[a], 192]\n\t" + "ldp x7, x8, [%[b], 192]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 208]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 208]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 192]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 208]\n\t" + "ldp x3, x4, [%[a], 224]\n\t" + "ldp x7, x8, [%[b], 224]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 240]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 240]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 224]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 240]\n\t" + "ldp x3, x4, [%[a], 256]\n\t" + "ldp x7, x8, [%[b], 256]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 272]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 272]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 256]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 272]\n\t" + "ldp x3, x4, [%[a], 288]\n\t" + "ldp x7, x8, [%[b], 288]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 304]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 304]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 288]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 304]\n\t" + "ldp x3, x4, [%[a], 320]\n\t" + "ldp x7, x8, [%[b], 320]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 336]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 336]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 320]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 336]\n\t" + "ldp x3, x4, [%[a], 352]\n\t" + "ldp x7, x8, [%[b], 352]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 368]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 368]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 352]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 368]\n\t" + "csetm %[r], cc\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + ); + + return (sp_digit)r; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_3072_div_48_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[96], t2[49]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[47]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 48); + for (i=47; i>=0; i--) { + r1 = div_3072_word_48(t1[48 + i], t1[48 + i - 1], div); + + sp_3072_mul_d_48(t2, d, r1); + t1[48 + i] += sp_3072_sub_in_place_48(&t1[i], t2); + t1[48 + i] -= t2[48]; + if (t1[48 + i] != 0) { + t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], d); + if (t1[48 + i] != 0) + t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], d); + } + } + + for (i = 47; i > 0; i--) { + if (t1[i] != d[i]) + break; + } + if (t1[i] >= d[i]) { + sp_3072_sub_48(r, t1, d); + } + else { + XMEMCPY(r, t1, sizeof(*t1) * 48); + } + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_3072_mod_48_cond(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_3072_div_48_cond(a, m, NULL, r); +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \ + defined(WOLFSSL_HAVE_SP_DH) +#ifdef WOLFSSL_SP_SMALL +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[16][96]; +#else + sp_digit* t[16]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 96, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<16; i++) { + t[i] = td + i * 96; + } +#endif + norm = t[0]; + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_48(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 48U); + if (reduceA != 0) { + err = sp_3072_mod_48(t[1] + 48, a, m); + if (err == MP_OKAY) { + err = sp_3072_mod_48(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48); + err = sp_3072_mod_48(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_3072_mont_sqr_48(t[ 2], t[ 1], m, mp); + sp_3072_mont_mul_48(t[ 3], t[ 2], t[ 1], m, mp); + sp_3072_mont_sqr_48(t[ 4], t[ 2], m, mp); + sp_3072_mont_mul_48(t[ 5], t[ 3], t[ 2], m, mp); + sp_3072_mont_sqr_48(t[ 6], t[ 3], m, mp); + sp_3072_mont_mul_48(t[ 7], t[ 4], t[ 3], m, mp); + sp_3072_mont_sqr_48(t[ 8], t[ 4], m, mp); + sp_3072_mont_mul_48(t[ 9], t[ 5], t[ 4], m, mp); + sp_3072_mont_sqr_48(t[10], t[ 5], m, mp); + sp_3072_mont_mul_48(t[11], t[ 6], t[ 5], m, mp); + sp_3072_mont_sqr_48(t[12], t[ 6], m, mp); + sp_3072_mont_mul_48(t[13], t[ 7], t[ 6], m, mp); + sp_3072_mont_sqr_48(t[14], t[ 7], m, mp); + sp_3072_mont_mul_48(t[15], t[ 8], t[ 7], m, mp); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) { + c = 64; + } + c -= bits % 4; + if (c == 64) { + c = 60; + } + y = (int)(n >> c); + n <<= 64 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 48); + for (; i>=0 || c>=4; ) { + if (c == 0) { + n = e[i--]; + y = n >> 60; + n <<= 4; + c = 60; + } + else if (c < 4) { + y = n >> 60; + n = e[i--]; + c = 4 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + else { + y = (n >> 60) & 0xf; + n <<= 4; + c -= 4; + } + + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + + sp_3072_mont_mul_48(r, r, t[y], m, mp); + } + + XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U); + sp_3072_mont_reduce_48(r, m, mp); + + mask = 0 - (sp_3072_cmp_48(r, m) >= 0); + sp_3072_cond_sub_48(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#else +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][96]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 96, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) { + t[i] = td + i * 96; + } +#endif + norm = t[0]; + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_48(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 48U); + if (reduceA != 0) { + err = sp_3072_mod_48(t[1] + 48, a, m); + if (err == MP_OKAY) { + err = sp_3072_mod_48(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48); + err = sp_3072_mod_48(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_3072_mont_sqr_48(t[ 2], t[ 1], m, mp); + sp_3072_mont_mul_48(t[ 3], t[ 2], t[ 1], m, mp); + sp_3072_mont_sqr_48(t[ 4], t[ 2], m, mp); + sp_3072_mont_mul_48(t[ 5], t[ 3], t[ 2], m, mp); + sp_3072_mont_sqr_48(t[ 6], t[ 3], m, mp); + sp_3072_mont_mul_48(t[ 7], t[ 4], t[ 3], m, mp); + sp_3072_mont_sqr_48(t[ 8], t[ 4], m, mp); + sp_3072_mont_mul_48(t[ 9], t[ 5], t[ 4], m, mp); + sp_3072_mont_sqr_48(t[10], t[ 5], m, mp); + sp_3072_mont_mul_48(t[11], t[ 6], t[ 5], m, mp); + sp_3072_mont_sqr_48(t[12], t[ 6], m, mp); + sp_3072_mont_mul_48(t[13], t[ 7], t[ 6], m, mp); + sp_3072_mont_sqr_48(t[14], t[ 7], m, mp); + sp_3072_mont_mul_48(t[15], t[ 8], t[ 7], m, mp); + sp_3072_mont_sqr_48(t[16], t[ 8], m, mp); + sp_3072_mont_mul_48(t[17], t[ 9], t[ 8], m, mp); + sp_3072_mont_sqr_48(t[18], t[ 9], m, mp); + sp_3072_mont_mul_48(t[19], t[10], t[ 9], m, mp); + sp_3072_mont_sqr_48(t[20], t[10], m, mp); + sp_3072_mont_mul_48(t[21], t[11], t[10], m, mp); + sp_3072_mont_sqr_48(t[22], t[11], m, mp); + sp_3072_mont_mul_48(t[23], t[12], t[11], m, mp); + sp_3072_mont_sqr_48(t[24], t[12], m, mp); + sp_3072_mont_mul_48(t[25], t[13], t[12], m, mp); + sp_3072_mont_sqr_48(t[26], t[13], m, mp); + sp_3072_mont_mul_48(t[27], t[14], t[13], m, mp); + sp_3072_mont_sqr_48(t[28], t[14], m, mp); + sp_3072_mont_mul_48(t[29], t[15], t[14], m, mp); + sp_3072_mont_sqr_48(t[30], t[15], m, mp); + sp_3072_mont_mul_48(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) { + c = 64; + } + c -= bits % 5; + if (c == 64) { + c = 59; + } + y = (int)(n >> c); + n <<= 64 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 48); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 59; + n <<= 5; + c = 59; + } + else if (c < 5) { + y = n >> 59; + n = e[i--]; + c = 5 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + else { + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + + sp_3072_mont_mul_48(r, r, t[y], m, mp); + } + + XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U); + sp_3072_mont_reduce_48(r, m, mp); + + mask = 0 - (sp_3072_cmp_48(r, m) >= 0); + sp_3072_cond_sub_48(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* WOLFSSL_SP_SMALL */ +#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */ + +#ifdef WOLFSSL_HAVE_SP_RSA +/* RSA public key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * em Public exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 384 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm, + byte* out, word32* outLen) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit a[96], m[48], r[96]; +#else + sp_digit* d = NULL; + sp_digit* a; + sp_digit* m; + sp_digit* r; +#endif + sp_digit *ah; + sp_digit e[1]; + int err = MP_OKAY; + + if (*outLen < 384) + err = MP_TO_E; + if (err == MP_OKAY && (mp_count_bits(em) > 64 || inLen > 384 || + mp_count_bits(mm) != 3072)) + err = MP_READ_E; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 48 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + a = d; + r = a + 48 * 2; + m = r + 48 * 2; + } +#endif + + if (err == MP_OKAY) { + ah = a + 48; + + sp_3072_from_bin(ah, 48, in, inLen); +#if DIGIT_BIT >= 64 + e[0] = em->dp[0]; +#else + e[0] = em->dp[0]; + if (em->used > 1) { + e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + } +#endif + if (e[0] == 0) { + err = MP_EXPTMOD_E; + } + } + if (err == MP_OKAY) { + sp_3072_from_mp(m, 48, mm); + + if (e[0] == 0x3) { + if (err == MP_OKAY) { + sp_3072_sqr_48(r, ah); + err = sp_3072_mod_48_cond(r, r, m); + } + if (err == MP_OKAY) { + sp_3072_mul_48(r, ah, r); + err = sp_3072_mod_48_cond(r, r, m); + } + } + else { + int i; + sp_digit mp; + + sp_3072_mont_setup(m, &mp); + + /* Convert to Montgomery form. */ + XMEMSET(a, 0, sizeof(sp_digit) * 48); + err = sp_3072_mod_48_cond(a, a, m); + + if (err == MP_OKAY) { + for (i = 63; i >= 0; i--) { + if (e[0] >> i) { + break; + } + } + + XMEMCPY(r, a, sizeof(sp_digit) * 48); + for (i--; i>=0; i--) { + sp_3072_mont_sqr_48(r, r, m, mp); + if (((e[0] >> i) & 1) == 1) { + sp_3072_mont_mul_48(r, r, a, m, mp); + } + } + XMEMSET(&r[48], 0, sizeof(sp_digit) * 48); + sp_3072_mont_reduce_48(r, m, mp); + + for (i = 47; i > 0; i--) { + if (r[i] != m[i]) { + break; + } + } + if (r[i] >= m[i]) { + sp_3072_sub_in_place_48(r, m); + } + } + } + } + + if (err == MP_OKAY) { + sp_3072_to_bin(r, out); + *outLen = 384; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } +#endif + + return err; +} + +#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) + sp_digit* a; + sp_digit* d = NULL; + sp_digit* m; + sp_digit* r; + int err = MP_OKAY; + + (void)pm; + (void)qm; + (void)dpm; + (void)dqm; + (void)qim; + + if (*outLen < 384U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(dm) > 3072) { + err = MP_READ_E; + } + if (inLen > 384) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 3072) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 48 * 4, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) { + err = MEMORY_E; + } + } + if (err == MP_OKAY) { + a = d + 48; + m = a + 96; + r = a; + + sp_3072_from_bin(a, 48, in, inLen); + sp_3072_from_mp(d, 48, dm); + sp_3072_from_mp(m, 48, mm); + err = sp_3072_mod_exp_48(r, a, d, 3072, m, 0); + } + if (err == MP_OKAY) { + sp_3072_to_bin(r, out); + *outLen = 384; + } + + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 48); + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static sp_digit sp_3072_cond_add_24(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov x8, #0\n\t" + "1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldr x4, [%[a], x8]\n\t" + "ldr x5, [%[b], x8]\n\t" + "and x5, x5, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "cset %[c], cs\n\t" + "str x4, [%[r], x8]\n\t" + "add x8, x8, #8\n\t" + "cmp x8, 192\n\t" + "b.lt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + ); + + return c; +#else + __asm__ __volatile__ ( + + "ldp x5, x7, [%[b], 0]\n\t" + "ldp x11, x12, [%[b], 16]\n\t" + "ldp x4, x6, [%[a], 0]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 16]\n\t" + "and x7, x7, %[m]\n\t" + "adds x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "adcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 0]\n\t" + "adcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 16]\n\t" + "ldp x5, x7, [%[b], 32]\n\t" + "ldp x11, x12, [%[b], 48]\n\t" + "ldp x4, x6, [%[a], 32]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 48]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "adcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 32]\n\t" + "adcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 48]\n\t" + "ldp x5, x7, [%[b], 64]\n\t" + "ldp x11, x12, [%[b], 80]\n\t" + "ldp x4, x6, [%[a], 64]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 80]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "adcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 64]\n\t" + "adcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 80]\n\t" + "ldp x5, x7, [%[b], 96]\n\t" + "ldp x11, x12, [%[b], 112]\n\t" + "ldp x4, x6, [%[a], 96]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 112]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "adcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 96]\n\t" + "adcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 112]\n\t" + "ldp x5, x7, [%[b], 128]\n\t" + "ldp x11, x12, [%[b], 144]\n\t" + "ldp x4, x6, [%[a], 128]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 144]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "adcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 128]\n\t" + "adcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 144]\n\t" + "ldp x5, x7, [%[b], 160]\n\t" + "ldp x11, x12, [%[b], 176]\n\t" + "ldp x4, x6, [%[a], 160]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 176]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "adcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 160]\n\t" + "adcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 176]\n\t" + "cset %[r], cs\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + ); + + return (sp_digit)r; +#endif /* WOLFSSL_SP_SMALL */ +} + +/* RSA private key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * pm First prime. + * qm Second prime. + * dpm First prime's CRT exponent. + * dqm Second prime's CRT exponent. + * qim Inverse of second prime mod p. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 384 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, + mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm, + byte* out, word32* outLen) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit a[48 * 2]; + sp_digit p[24], q[24], dp[24]; + sp_digit tmpa[48], tmpb[48]; +#else + sp_digit* t = NULL; + sp_digit* a; + sp_digit* p; + sp_digit* q; + sp_digit* dp; + sp_digit* tmpa; + sp_digit* tmpb; +#endif + sp_digit* r; + sp_digit* qi; + sp_digit* dq; + sp_digit c; + int err = MP_OKAY; + + (void)dm; + (void)mm; + + if (*outLen < 384) + err = MP_TO_E; + if (err == MP_OKAY && (inLen > 384 || mp_count_bits(mm) != 3072)) + err = MP_READ_E; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 24 * 11, NULL, + DYNAMIC_TYPE_RSA); + if (t == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + a = t; + p = a + 48 * 2; + q = p + 24; + qi = dq = dp = q + 24; + tmpa = qi + 24; + tmpb = tmpa + 48; + + r = t + 48; + } +#else +#endif + + if (err == MP_OKAY) { +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + r = a; + qi = dq = dp; +#endif + sp_3072_from_bin(a, 48, in, inLen); + sp_3072_from_mp(p, 24, pm); + sp_3072_from_mp(q, 24, qm); + sp_3072_from_mp(dp, 24, dpm); + + err = sp_3072_mod_exp_24(tmpa, a, dp, 1536, p, 1); + } + if (err == MP_OKAY) { + sp_3072_from_mp(dq, 24, dqm); + err = sp_3072_mod_exp_24(tmpb, a, dq, 1536, q, 1); + } + + if (err == MP_OKAY) { + c = sp_3072_sub_in_place_24(tmpa, tmpb); + c += sp_3072_cond_add_24(tmpa, tmpa, p, c); + sp_3072_cond_add_24(tmpa, tmpa, p, c); + + sp_3072_from_mp(qi, 24, qim); + sp_3072_mul_24(tmpa, tmpa, qi); + err = sp_3072_mod_24(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { + sp_3072_mul_24(tmpa, q, tmpa); + XMEMSET(&tmpb[24], 0, sizeof(sp_digit) * 24); + sp_3072_add_48(r, tmpb, tmpa); + + sp_3072_to_bin(r, out); + *outLen = 384; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_digit) * 24 * 11); + XFREE(t, NULL, DYNAMIC_TYPE_RSA); + } +#else + XMEMSET(tmpa, 0, sizeof(tmpa)); + XMEMSET(tmpb, 0, sizeof(tmpb)); + XMEMSET(p, 0, sizeof(p)); + XMEMSET(q, 0, sizeof(q)); + XMEMSET(dp, 0, sizeof(dp)); +#endif + + return err; +} +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ +#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */ +#endif /* WOLFSSL_HAVE_SP_RSA */ +#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY)) +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_3072_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (3072 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 64 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 48); + r->used = 48; + mp_clamp(r); +#elif DIGIT_BIT < 64 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 48; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 64) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 64 - s; + } + r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 48; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 64 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 64 - s; + } + else { + s += 64; + } + } + r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ + int err = MP_OKAY; + sp_digit b[96], e[48], m[48]; + sp_digit* r = b; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 3072) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 3072) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 3072) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_3072_from_mp(b, 48, base); + sp_3072_from_mp(e, 48, exp); + sp_3072_from_mp(m, 48, mod); + + err = sp_3072_mod_exp_48(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + err = sp_3072_to_mp(r, res); + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} + +#ifdef WOLFSSL_HAVE_SP_DH + +#ifdef HAVE_FFDHE_3072 +static void sp_3072_lshift_48(sp_digit* r, sp_digit* a, byte n) +{ + __asm__ __volatile__ ( + "mov x6, 63\n\t" + "sub x6, x6, %[n]\n\t" + "ldr x3, [%[a], 376]\n\t" + "lsr x4, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x4, x4, x6\n\t" + "ldr x2, [%[a], 368]\n\t" + "str x4, [%[r], 384]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 360]\n\t" + "str x3, [%[r], 376]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 352]\n\t" + "str x2, [%[r], 368]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 344]\n\t" + "str x4, [%[r], 360]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 336]\n\t" + "str x3, [%[r], 352]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 328]\n\t" + "str x2, [%[r], 344]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 320]\n\t" + "str x4, [%[r], 336]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 312]\n\t" + "str x3, [%[r], 328]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 304]\n\t" + "str x2, [%[r], 320]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 296]\n\t" + "str x4, [%[r], 312]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 288]\n\t" + "str x3, [%[r], 304]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 280]\n\t" + "str x2, [%[r], 296]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 272]\n\t" + "str x4, [%[r], 288]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 264]\n\t" + "str x3, [%[r], 280]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 256]\n\t" + "str x2, [%[r], 272]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 248]\n\t" + "str x4, [%[r], 264]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 240]\n\t" + "str x3, [%[r], 256]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 232]\n\t" + "str x2, [%[r], 248]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 224]\n\t" + "str x4, [%[r], 240]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 216]\n\t" + "str x3, [%[r], 232]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 208]\n\t" + "str x2, [%[r], 224]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 200]\n\t" + "str x4, [%[r], 216]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 192]\n\t" + "str x3, [%[r], 208]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 184]\n\t" + "str x2, [%[r], 200]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 176]\n\t" + "str x4, [%[r], 192]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 168]\n\t" + "str x3, [%[r], 184]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 160]\n\t" + "str x2, [%[r], 176]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 152]\n\t" + "str x4, [%[r], 168]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 144]\n\t" + "str x3, [%[r], 160]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 136]\n\t" + "str x2, [%[r], 152]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 128]\n\t" + "str x4, [%[r], 144]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 120]\n\t" + "str x3, [%[r], 136]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 112]\n\t" + "str x2, [%[r], 128]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 104]\n\t" + "str x4, [%[r], 120]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 96]\n\t" + "str x3, [%[r], 112]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 88]\n\t" + "str x2, [%[r], 104]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 80]\n\t" + "str x4, [%[r], 96]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 72]\n\t" + "str x3, [%[r], 88]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 64]\n\t" + "str x2, [%[r], 80]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 56]\n\t" + "str x4, [%[r], 72]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 48]\n\t" + "str x3, [%[r], 64]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 40]\n\t" + "str x2, [%[r], 56]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 32]\n\t" + "str x4, [%[r], 48]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 24]\n\t" + "str x3, [%[r], 40]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 16]\n\t" + "str x2, [%[r], 32]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 8]\n\t" + "str x4, [%[r], 24]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 0]\n\t" + "str x3, [%[r], 16]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "str x4, [%[r]]\n\t" + "str x2, [%[r], 8]\n\t" + : + : [r] "r" (r), [a] "r" (a), [n] "r" (n) + : "memory", "x2", "x3", "x4", "x5", "x6" + ); +} + +/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m) + * + * r A single precision number that is the result of the operation. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_3072_mod_exp_2_48(sp_digit* r, const sp_digit* e, int bits, + const sp_digit* m) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit nd[96]; + sp_digit td[49]; +#else + sp_digit* td; +#endif + sp_digit* norm; + sp_digit* tmp; + sp_digit mp = 1; + sp_digit n, o; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 145, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + norm = td; + tmp = td + 96; +#else + norm = nd; + tmp = td; +#endif + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_48(norm, m); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) { + c = 64; + } + c -= bits % 6; + if (c == 64) { + c = 58; + } + y = (int)(n >> c); + n <<= 64 - c; + sp_3072_lshift_48(r, norm, y); + for (; i>=0 || c>=6; ) { + if (c == 0) { + n = e[i--]; + y = n >> 58; + n <<= 6; + c = 58; + } + else if (c < 6) { + y = n >> 58; + n = e[i--]; + c = 6 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + else { + y = (n >> 58) & 0x3f; + n <<= 6; + c -= 6; + } + + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + + sp_3072_lshift_48(r, r, y); + sp_3072_mul_d_48(tmp, norm, r[48]); + r[48] = 0; + o = sp_3072_add_48(r, r, tmp); + sp_3072_cond_sub_48(r, r, m, (sp_digit)0 - o); + } + + XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U); + sp_3072_mont_reduce_48(r, m, mp); + + mask = 0 - (sp_3072_cmp_48(r, m) >= 0); + sp_3072_cond_sub_48(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* HAVE_FFDHE_3072 */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. + * exp Array of bytes that is the exponent. + * expLen Length of data, in bytes, in exponent. + * mod Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 384 bytes long. + * outLen Length, in bytes, of exponentiation result. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen, + mp_int* mod, byte* out, word32* outLen) +{ + int err = MP_OKAY; + sp_digit b[96], e[48], m[48]; + sp_digit* r = b; + word32 i; + + if (mp_count_bits(base) > 3072) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expLen > 384) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 3072) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_3072_from_mp(b, 48, base); + sp_3072_from_bin(e, 48, exp, expLen); + sp_3072_from_mp(m, 48, mod); + + #ifdef HAVE_FFDHE_3072 + if (base->used == 1 && base->dp[0] == 2 && m[47] == (sp_digit)-1) + err = sp_3072_mod_exp_2_48(r, e, expLen * 8, m); + else + #endif + err = sp_3072_mod_exp_48(r, b, e, expLen * 8, m, 0); + + } + + if (err == MP_OKAY) { + sp_3072_to_bin(r, out); + *outLen = 384; + for (i=0; i<384 && out[i] == 0; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} +#endif /* WOLFSSL_HAVE_SP_DH */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ + int err = MP_OKAY; + sp_digit b[48], e[24], m[24]; + sp_digit* r = b; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 1536) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 1536) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 1536) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_3072_from_mp(b, 24, base); + sp_3072_from_mp(e, 24, exp); + sp_3072_from_mp(m, 24, mod); + + err = sp_3072_mod_exp_24(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + XMEMSET(r + 24, 0, sizeof(*r) * 24U); + err = sp_3072_to_mp(r, res); + res->used = mod->used; + mp_clamp(res); + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} + +#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */ + +#endif /* !WOLFSSL_SP_NO_3072 */ + +#ifdef WOLFSSL_SP_4096 +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j; + byte* d; + + for (i = n - 1,j = 0; i >= 7; i -= 8) { + r[j] = ((sp_digit)a[i - 0] << 0) | + ((sp_digit)a[i - 1] << 8) | + ((sp_digit)a[i - 2] << 16) | + ((sp_digit)a[i - 3] << 24) | + ((sp_digit)a[i - 4] << 32) | + ((sp_digit)a[i - 5] << 40) | + ((sp_digit)a[i - 6] << 48) | + ((sp_digit)a[i - 7] << 56); + j++; + } + + if (i >= 0) { + r[j] = 0; + + d = (byte*)r; + switch (i) { + case 6: d[n - 1 - 6] = a[6]; //fallthrough + case 5: d[n - 1 - 5] = a[5]; //fallthrough + case 4: d[n - 1 - 4] = a[4]; //fallthrough + case 3: d[n - 1 - 3] = a[3]; //fallthrough + case 2: d[n - 1 - 2] = a[2]; //fallthrough + case 1: d[n - 1 - 1] = a[1]; //fallthrough + case 0: d[n - 1 - 0] = a[0]; //fallthrough + } + j++; + } + + for (; j < size; j++) { + r[j] = 0; + } +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 64 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 64 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0xffffffffffffffffl; + s = 64U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 64U) <= (word32)DIGIT_BIT) { + s += 64U; + r[j] &= 0xffffffffffffffffl; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 64) { + r[j] &= 0xffffffffffffffffl; + if (j + 1 >= size) { + break; + } + s = 64 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 512 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_4096_to_bin(sp_digit* r, byte* a) +{ + int i, j; + + for (i = 63, j = 0; i >= 0; i--) { + a[j++] = r[i] >> 56; + a[j++] = r[i] >> 48; + a[j++] = r[i] >> 40; + a[j++] = r[i] >> 32; + a[j++] = r[i] >> 24; + a[j++] = r[i] >> 16; + a[j++] = r[i] >> 8; + a[j++] = r[i] >> 0; + } +} + +#ifndef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_4096_add_32(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[a], 0]\n\t" + "ldp x7, x8, [%[b], 0]\n\t" + "adds x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 16]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "ldp x3, x4, [%[a], 32]\n\t" + "ldp x7, x8, [%[b], 32]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 48]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 48]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 32]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 48]\n\t" + "ldp x3, x4, [%[a], 64]\n\t" + "ldp x7, x8, [%[b], 64]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 80]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 80]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 64]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 80]\n\t" + "ldp x3, x4, [%[a], 96]\n\t" + "ldp x7, x8, [%[b], 96]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 112]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 112]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 96]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 112]\n\t" + "ldp x3, x4, [%[a], 128]\n\t" + "ldp x7, x8, [%[b], 128]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 144]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 144]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 128]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 144]\n\t" + "ldp x3, x4, [%[a], 160]\n\t" + "ldp x7, x8, [%[b], 160]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 176]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 176]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 160]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 176]\n\t" + "ldp x3, x4, [%[a], 192]\n\t" + "ldp x7, x8, [%[b], 192]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 208]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 208]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 192]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 208]\n\t" + "ldp x3, x4, [%[a], 224]\n\t" + "ldp x7, x8, [%[b], 224]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 240]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 240]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 224]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 240]\n\t" + "cset %[r], cs\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + ); + + return (sp_digit)r; +} + +/* Sub b from a into a. (a -= b) + * + * a A single precision integer and result. + * b A single precision integer. + */ +static sp_digit sp_4096_sub_in_place_64(sp_digit* a, const sp_digit* b) +{ + __asm__ __volatile__ ( + "ldp x2, x3, [%[a], 0]\n\t" + "ldp x6, x7, [%[b], 0]\n\t" + "subs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 16]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 16]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 0]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 16]\n\t" + "ldp x2, x3, [%[a], 32]\n\t" + "ldp x6, x7, [%[b], 32]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 48]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 48]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 32]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 48]\n\t" + "ldp x2, x3, [%[a], 64]\n\t" + "ldp x6, x7, [%[b], 64]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 80]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 80]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 64]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 80]\n\t" + "ldp x2, x3, [%[a], 96]\n\t" + "ldp x6, x7, [%[b], 96]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 112]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 112]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 96]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 112]\n\t" + "ldp x2, x3, [%[a], 128]\n\t" + "ldp x6, x7, [%[b], 128]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 144]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 144]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 128]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 144]\n\t" + "ldp x2, x3, [%[a], 160]\n\t" + "ldp x6, x7, [%[b], 160]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 176]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 176]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 160]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 176]\n\t" + "ldp x2, x3, [%[a], 192]\n\t" + "ldp x6, x7, [%[b], 192]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 208]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 208]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 192]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 208]\n\t" + "ldp x2, x3, [%[a], 224]\n\t" + "ldp x6, x7, [%[b], 224]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 240]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 240]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 224]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 240]\n\t" + "ldp x2, x3, [%[a], 256]\n\t" + "ldp x6, x7, [%[b], 256]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 272]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 272]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 256]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 272]\n\t" + "ldp x2, x3, [%[a], 288]\n\t" + "ldp x6, x7, [%[b], 288]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 304]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 304]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 288]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 304]\n\t" + "ldp x2, x3, [%[a], 320]\n\t" + "ldp x6, x7, [%[b], 320]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 336]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 336]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 320]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 336]\n\t" + "ldp x2, x3, [%[a], 352]\n\t" + "ldp x6, x7, [%[b], 352]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 368]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 368]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 352]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 368]\n\t" + "ldp x2, x3, [%[a], 384]\n\t" + "ldp x6, x7, [%[b], 384]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 400]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 400]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 384]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 400]\n\t" + "ldp x2, x3, [%[a], 416]\n\t" + "ldp x6, x7, [%[b], 416]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 432]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 432]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 416]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 432]\n\t" + "ldp x2, x3, [%[a], 448]\n\t" + "ldp x6, x7, [%[b], 448]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 464]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 464]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 448]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 464]\n\t" + "ldp x2, x3, [%[a], 480]\n\t" + "ldp x6, x7, [%[b], 480]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 496]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 496]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 480]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 496]\n\t" + "csetm %[a], cc\n\t" + : [a] "+r" (a) + : [b] "r" (b) + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + ); + + return (sp_digit)a; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_4096_add_64(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[a], 0]\n\t" + "ldp x7, x8, [%[b], 0]\n\t" + "adds x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 16]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "ldp x3, x4, [%[a], 32]\n\t" + "ldp x7, x8, [%[b], 32]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 48]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 48]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 32]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 48]\n\t" + "ldp x3, x4, [%[a], 64]\n\t" + "ldp x7, x8, [%[b], 64]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 80]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 80]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 64]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 80]\n\t" + "ldp x3, x4, [%[a], 96]\n\t" + "ldp x7, x8, [%[b], 96]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 112]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 112]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 96]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 112]\n\t" + "ldp x3, x4, [%[a], 128]\n\t" + "ldp x7, x8, [%[b], 128]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 144]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 144]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 128]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 144]\n\t" + "ldp x3, x4, [%[a], 160]\n\t" + "ldp x7, x8, [%[b], 160]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 176]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 176]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 160]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 176]\n\t" + "ldp x3, x4, [%[a], 192]\n\t" + "ldp x7, x8, [%[b], 192]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 208]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 208]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 192]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 208]\n\t" + "ldp x3, x4, [%[a], 224]\n\t" + "ldp x7, x8, [%[b], 224]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 240]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 240]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 224]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 240]\n\t" + "ldp x3, x4, [%[a], 256]\n\t" + "ldp x7, x8, [%[b], 256]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 272]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 272]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 256]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 272]\n\t" + "ldp x3, x4, [%[a], 288]\n\t" + "ldp x7, x8, [%[b], 288]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 304]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 304]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 288]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 304]\n\t" + "ldp x3, x4, [%[a], 320]\n\t" + "ldp x7, x8, [%[b], 320]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 336]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 336]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 320]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 336]\n\t" + "ldp x3, x4, [%[a], 352]\n\t" + "ldp x7, x8, [%[b], 352]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 368]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 368]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 352]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 368]\n\t" + "ldp x3, x4, [%[a], 384]\n\t" + "ldp x7, x8, [%[b], 384]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 400]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 400]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 384]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 400]\n\t" + "ldp x3, x4, [%[a], 416]\n\t" + "ldp x7, x8, [%[b], 416]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 432]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 432]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 416]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 432]\n\t" + "ldp x3, x4, [%[a], 448]\n\t" + "ldp x7, x8, [%[b], 448]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 464]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 464]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 448]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 464]\n\t" + "ldp x3, x4, [%[a], 480]\n\t" + "ldp x7, x8, [%[b], 480]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 496]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 496]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 480]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 496]\n\t" + "cset %[r], cs\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + ); + + return (sp_digit)r; +} + +/* Add digit to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_4096_add_zero_32(sp_digit* r, const sp_digit* a, + const sp_digit d) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[a], 0]\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "adds x3, x3, %[d]\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "ldp x3, x4, [%[a], 32]\n\t" + "ldp x5, x6, [%[a], 48]\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 32]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 48]\n\t" + "ldp x3, x4, [%[a], 64]\n\t" + "ldp x5, x6, [%[a], 80]\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 64]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 80]\n\t" + "ldp x3, x4, [%[a], 96]\n\t" + "ldp x5, x6, [%[a], 112]\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 96]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 112]\n\t" + "ldp x3, x4, [%[a], 128]\n\t" + "ldp x5, x6, [%[a], 144]\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 128]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 144]\n\t" + "ldp x3, x4, [%[a], 160]\n\t" + "ldp x5, x6, [%[a], 176]\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 160]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 176]\n\t" + "ldp x3, x4, [%[a], 192]\n\t" + "ldp x5, x6, [%[a], 208]\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 192]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 208]\n\t" + "ldp x3, x4, [%[a], 224]\n\t" + "ldp x5, x6, [%[a], 240]\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 224]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 240]\n\t" + : + : [r] "r" (r), [a] "r" (a), [d] "r" (d) + : "memory", "x3", "x4", "x5", "x6" + ); +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_64(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[64]; + sp_digit a1[32]; + sp_digit b1[32]; + sp_digit z2[64]; + sp_digit u, ca, cb; + + ca = sp_2048_add_32(a1, a, &a[32]); + cb = sp_2048_add_32(b1, b, &b[32]); + u = ca & cb; + sp_2048_mul_32(z1, a1, b1); + sp_2048_mul_32(z2, &a[32], &b[32]); + sp_2048_mul_32(z0, a, b); + sp_2048_mask_32(r + 64, a1, 0 - cb); + sp_2048_mask_32(b1, b1, 0 - ca); + u += sp_2048_add_32(r + 64, r + 64, b1); + u += sp_4096_sub_in_place_64(z1, z2); + u += sp_4096_sub_in_place_64(z1, z0); + u += sp_4096_add_64(r + 32, r + 32, z1); + u += sp_4096_add_32(r + 64, r + 64, z2); + sp_4096_add_zero_32(r + 96, z2 + 32, u); +} + +#ifdef WOLFSSL_SP_SMALL +/* Double a into r. (r = a + a) + * + * r A single precision integer. + * a A single precision integer. + */ +static sp_digit sp_2048_dbl_32(sp_digit* r, const sp_digit* a) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "add x11, %[a], 256\n\t" + "\n1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldp x3, x4, [%[a]], #16\n\t" + "ldp x5, x6, [%[a]], #16\n\t" + "adcs x3, x3, x3\n\t" + "adcs x4, x4, x4\n\t" + "adcs x5, x5, x5\n\t" + "stp x3, x4, [%[r]], #16\n\t" + "adcs x6, x6, x6\n\t" + "stp x5, x6, [%[r]], #16\n\t" + "cset %[c], cs\n\t" + "cmp %[a], x11\n\t" + "b.ne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a) + : + : "memory", "x3", "x4", "x5", "x6", "x11" + ); + + return c; +} + +#else +/* Double a into r. (r = a + a) + * + * r A single precision integer. + * a A single precision integer. + */ +static sp_digit sp_2048_dbl_32(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[a], 0]\n\t" + "adds x3, x3, x3\n\t" + "ldr x5, [%[a], 16]\n\t" + "adcs x4, x4, x4\n\t" + "ldr x6, [%[a], 24]\n\t" + "adcs x5, x5, x5\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "adcs x6, x6, x6\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "ldp x3, x4, [%[a], 32]\n\t" + "adcs x3, x3, x3\n\t" + "ldr x5, [%[a], 48]\n\t" + "adcs x4, x4, x4\n\t" + "ldr x6, [%[a], 56]\n\t" + "adcs x5, x5, x5\n\t" + "stp x3, x4, [%[r], 32]\n\t" + "adcs x6, x6, x6\n\t" + "stp x5, x6, [%[r], 48]\n\t" + "ldp x3, x4, [%[a], 64]\n\t" + "adcs x3, x3, x3\n\t" + "ldr x5, [%[a], 80]\n\t" + "adcs x4, x4, x4\n\t" + "ldr x6, [%[a], 88]\n\t" + "adcs x5, x5, x5\n\t" + "stp x3, x4, [%[r], 64]\n\t" + "adcs x6, x6, x6\n\t" + "stp x5, x6, [%[r], 80]\n\t" + "ldp x3, x4, [%[a], 96]\n\t" + "adcs x3, x3, x3\n\t" + "ldr x5, [%[a], 112]\n\t" + "adcs x4, x4, x4\n\t" + "ldr x6, [%[a], 120]\n\t" + "adcs x5, x5, x5\n\t" + "stp x3, x4, [%[r], 96]\n\t" + "adcs x6, x6, x6\n\t" + "stp x5, x6, [%[r], 112]\n\t" + "ldp x3, x4, [%[a], 128]\n\t" + "adcs x3, x3, x3\n\t" + "ldr x5, [%[a], 144]\n\t" + "adcs x4, x4, x4\n\t" + "ldr x6, [%[a], 152]\n\t" + "adcs x5, x5, x5\n\t" + "stp x3, x4, [%[r], 128]\n\t" + "adcs x6, x6, x6\n\t" + "stp x5, x6, [%[r], 144]\n\t" + "ldp x3, x4, [%[a], 160]\n\t" + "adcs x3, x3, x3\n\t" + "ldr x5, [%[a], 176]\n\t" + "adcs x4, x4, x4\n\t" + "ldr x6, [%[a], 184]\n\t" + "adcs x5, x5, x5\n\t" + "stp x3, x4, [%[r], 160]\n\t" + "adcs x6, x6, x6\n\t" + "stp x5, x6, [%[r], 176]\n\t" + "ldp x3, x4, [%[a], 192]\n\t" + "adcs x3, x3, x3\n\t" + "ldr x5, [%[a], 208]\n\t" + "adcs x4, x4, x4\n\t" + "ldr x6, [%[a], 216]\n\t" + "adcs x5, x5, x5\n\t" + "stp x3, x4, [%[r], 192]\n\t" + "adcs x6, x6, x6\n\t" + "stp x5, x6, [%[r], 208]\n\t" + "ldp x3, x4, [%[a], 224]\n\t" + "adcs x3, x3, x3\n\t" + "ldr x5, [%[a], 240]\n\t" + "adcs x4, x4, x4\n\t" + "ldr x6, [%[a], 248]\n\t" + "adcs x5, x5, x5\n\t" + "stp x3, x4, [%[r], 224]\n\t" + "adcs x6, x6, x6\n\t" + "stp x5, x6, [%[r], 240]\n\t" + "cset %[r], cs\n\t" + : [r] "+r" (r) + : [a] "r" (a) + : "memory", "x3", "x4", "x5", "x6" + ); + + return (sp_digit)r; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_64(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z2[64]; + sp_digit z1[64]; + sp_digit a1[32]; + sp_digit u; + + u = sp_2048_add_32(a1, a, &a[32]); + sp_2048_sqr_32(z1, a1); + sp_2048_sqr_32(z2, &a[32]); + sp_2048_sqr_32(z0, a); + sp_2048_mask_32(r + 64, a1, 0 - u); + u += sp_2048_dbl_32(r + 64, r + 64); + u += sp_4096_sub_in_place_64(z1, z2); + u += sp_4096_sub_in_place_64(z1, z0); + u += sp_4096_add_64(r + 32, r + 32, z1); + u += sp_4096_add_32(r + 64, r + 64, z2); + sp_4096_add_zero_32(r + 96, z2 + 32, u); + +} + +#endif /* !WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_4096_add_64(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "add x11, %[a], 512\n\t" + "\n1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldp x3, x4, [%[a]], #16\n\t" + "ldp x5, x6, [%[a]], #16\n\t" + "ldp x7, x8, [%[b]], #16\n\t" + "adcs x3, x3, x7\n\t" + "ldp x9, x10, [%[b]], #16\n\t" + "adcs x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r]], #16\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r]], #16\n\t" + "cset %[c], cs\n\t" + "cmp %[a], x11\n\t" + "b.ne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into a. (a -= b) + * + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_4096_sub_in_place_64(sp_digit* a, const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "add x10, %[a], 512\n\t" + "\n1:\n\t" + "subs %[c], xzr, %[c]\n\t" + "ldp x2, x3, [%[a]]\n\t" + "ldp x4, x5, [%[a], #16]\n\t" + "ldp x6, x7, [%[b]], #16\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x8, x9, [%[b]], #16\n\t" + "sbcs x3, x3, x7\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a]], #16\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a]], #16\n\t" + "csetm %[c], cc\n\t" + "cmp %[a], x10\n\t" + "b.ne 1b\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_4096_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_digit tmp[128]; + + __asm__ __volatile__ ( + "mov x5, 0\n\t" + "mov x6, 0\n\t" + "mov x7, 0\n\t" + "mov x8, 0\n\t" + "\n1:\n\t" + "subs x3, x5, 504\n\t" + "csel x3, xzr, x3, cc\n\t" + "sub x4, x5, x3\n\t" + "\n2:\n\t" + "ldr x10, [%[a], x3]\n\t" + "ldr x11, [%[b], x4]\n\t" + "mul x9, x10, x11\n\t" + "umulh x10, x10, x11\n\t" + "adds x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x8, x8, xzr\n\t" + "add x3, x3, #8\n\t" + "sub x4, x4, #8\n\t" + "cmp x3, 512\n\t" + "b.eq 3f\n\t" + "cmp x3, x5\n\t" + "b.le 2b\n\t" + "\n3:\n\t" + "str x6, [%[r], x5]\n\t" + "mov x6, x7\n\t" + "mov x7, x8\n\t" + "mov x8, #0\n\t" + "add x5, x5, #8\n\t" + "cmp x5, 1008\n\t" + "b.le 1b\n\t" + "str x6, [%[r], x5]\n\t" + : + : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_4096_sqr_64(sp_digit* r, const sp_digit* a) +{ + sp_digit tmp[128]; + + __asm__ __volatile__ ( + "mov x6, 0\n\t" + "mov x7, 0\n\t" + "mov x8, 0\n\t" + "mov x5, 0\n\t" + "\n1:\n\t" + "subs x3, x5, 504\n\t" + "csel x3, xzr, x3, cc\n\t" + "sub x4, x5, x3\n\t" + "\n2:\n\t" + "cmp x4, x3\n\t" + "b.eq 4f\n\t" + "ldr x10, [%[a], x3]\n\t" + "ldr x11, [%[a], x4]\n\t" + "mul x9, x10, x11\n\t" + "umulh x10, x10, x11\n\t" + "adds x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x8, x8, xzr\n\t" + "adds x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x8, x8, xzr\n\t" + "b.al 5f\n\t" + "\n4:\n\t" + "ldr x10, [%[a], x3]\n\t" + "mul x9, x10, x10\n\t" + "umulh x10, x10, x10\n\t" + "adds x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x8, x8, xzr\n\t" + "\n5:\n\t" + "add x3, x3, #8\n\t" + "sub x4, x4, #8\n\t" + "cmp x3, 512\n\t" + "b.eq 3f\n\t" + "cmp x3, x4\n\t" + "b.gt 3f\n\t" + "cmp x3, x5\n\t" + "b.le 2b\n\t" + "\n3:\n\t" + "str x6, [%[r], x5]\n\t" + "mov x6, x7\n\t" + "mov x7, x8\n\t" + "mov x8, #0\n\t" + "add x5, x5, #8\n\t" + "cmp x5, 1008\n\t" + "b.le 1b\n\t" + "str x6, [%[r], x5]\n\t" + : + : [r] "r" (tmp), [a] "r" (a) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Caclulate the bottom digit of -1/a mod 2^n. + * + * a A single precision number. + * rho Bottom word of inverse. + */ +static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho) +{ + sp_digit x, b; + + b = a[0]; + x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - b * x; /* here x*a==1 mod 2**8 */ + x *= 2 - b * x; /* here x*a==1 mod 2**16 */ + x *= 2 - b * x; /* here x*a==1 mod 2**32 */ + x *= 2 - b * x; /* here x*a==1 mod 2**64 */ + + /* rho = -1/m mod b */ + *rho = -x; +} + +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_4096_mul_d_64(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "# A[0] * B\n\t" + "ldr x8, [%[a]]\n\t" + "mul x5, %[b], x8\n\t" + "umulh x3, %[b], x8\n\t" + "mov x4, 0\n\t" + "str x5, [%[r]]\n\t" + "mov x5, 0\n\t" + "mov x9, #8\n\t" + "1:\n\t" + "ldr x8, [%[a], x9]\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], x9]\n\t" + "mov x3, x4\n\t" + "mov x4, x5\n\t" + "mov x5, #0\n\t" + "add x9, x9, #8\n\t" + "cmp x9, 512\n\t" + "b.lt 1b\n\t" + "str x3, [%[r], 512]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + ); +#else + __asm__ __volatile__ ( + "# A[0] * B\n\t" + "ldp x8, x9, [%[a]]\n\t" + "mul x3, %[b], x8\n\t" + "umulh x4, %[b], x8\n\t" + "mov x5, 0\n\t" + "# A[1] * B\n\t" + "str x3, [%[r]]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x9\n\t" + "umulh x7, %[b], x9\n\t" + "adds x4, x4, x6\n\t" + "# A[2] * B\n\t" + "ldp x8, x9, [%[a], 16]\n\t" + "str x4, [%[r], 8]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[3] * B\n\t" + "str x5, [%[r], 16]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[4] * B\n\t" + "ldp x8, x9, [%[a], 32]\n\t" + "str x3, [%[r], 24]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[5] * B\n\t" + "str x4, [%[r], 32]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[6] * B\n\t" + "ldp x8, x9, [%[a], 48]\n\t" + "str x5, [%[r], 40]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[7] * B\n\t" + "str x3, [%[r], 48]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[8] * B\n\t" + "ldp x8, x9, [%[a], 64]\n\t" + "str x4, [%[r], 56]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[9] * B\n\t" + "str x5, [%[r], 64]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[10] * B\n\t" + "ldp x8, x9, [%[a], 80]\n\t" + "str x3, [%[r], 72]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[11] * B\n\t" + "str x4, [%[r], 80]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[12] * B\n\t" + "ldp x8, x9, [%[a], 96]\n\t" + "str x5, [%[r], 88]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[13] * B\n\t" + "str x3, [%[r], 96]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[14] * B\n\t" + "ldp x8, x9, [%[a], 112]\n\t" + "str x4, [%[r], 104]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[15] * B\n\t" + "str x5, [%[r], 112]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[16] * B\n\t" + "ldp x8, x9, [%[a], 128]\n\t" + "str x3, [%[r], 120]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[17] * B\n\t" + "str x4, [%[r], 128]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[18] * B\n\t" + "ldp x8, x9, [%[a], 144]\n\t" + "str x5, [%[r], 136]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[19] * B\n\t" + "str x3, [%[r], 144]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[20] * B\n\t" + "ldp x8, x9, [%[a], 160]\n\t" + "str x4, [%[r], 152]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[21] * B\n\t" + "str x5, [%[r], 160]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[22] * B\n\t" + "ldp x8, x9, [%[a], 176]\n\t" + "str x3, [%[r], 168]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[23] * B\n\t" + "str x4, [%[r], 176]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[24] * B\n\t" + "ldp x8, x9, [%[a], 192]\n\t" + "str x5, [%[r], 184]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[25] * B\n\t" + "str x3, [%[r], 192]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[26] * B\n\t" + "ldp x8, x9, [%[a], 208]\n\t" + "str x4, [%[r], 200]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[27] * B\n\t" + "str x5, [%[r], 208]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[28] * B\n\t" + "ldp x8, x9, [%[a], 224]\n\t" + "str x3, [%[r], 216]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[29] * B\n\t" + "str x4, [%[r], 224]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[30] * B\n\t" + "ldp x8, x9, [%[a], 240]\n\t" + "str x5, [%[r], 232]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[31] * B\n\t" + "str x3, [%[r], 240]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[32] * B\n\t" + "ldp x8, x9, [%[a], 256]\n\t" + "str x4, [%[r], 248]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[33] * B\n\t" + "str x5, [%[r], 256]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[34] * B\n\t" + "ldp x8, x9, [%[a], 272]\n\t" + "str x3, [%[r], 264]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[35] * B\n\t" + "str x4, [%[r], 272]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[36] * B\n\t" + "ldp x8, x9, [%[a], 288]\n\t" + "str x5, [%[r], 280]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[37] * B\n\t" + "str x3, [%[r], 288]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[38] * B\n\t" + "ldp x8, x9, [%[a], 304]\n\t" + "str x4, [%[r], 296]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[39] * B\n\t" + "str x5, [%[r], 304]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[40] * B\n\t" + "ldp x8, x9, [%[a], 320]\n\t" + "str x3, [%[r], 312]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[41] * B\n\t" + "str x4, [%[r], 320]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[42] * B\n\t" + "ldp x8, x9, [%[a], 336]\n\t" + "str x5, [%[r], 328]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[43] * B\n\t" + "str x3, [%[r], 336]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[44] * B\n\t" + "ldp x8, x9, [%[a], 352]\n\t" + "str x4, [%[r], 344]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[45] * B\n\t" + "str x5, [%[r], 352]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[46] * B\n\t" + "ldp x8, x9, [%[a], 368]\n\t" + "str x3, [%[r], 360]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[47] * B\n\t" + "str x4, [%[r], 368]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[48] * B\n\t" + "ldp x8, x9, [%[a], 384]\n\t" + "str x5, [%[r], 376]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[49] * B\n\t" + "str x3, [%[r], 384]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[50] * B\n\t" + "ldp x8, x9, [%[a], 400]\n\t" + "str x4, [%[r], 392]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[51] * B\n\t" + "str x5, [%[r], 400]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[52] * B\n\t" + "ldp x8, x9, [%[a], 416]\n\t" + "str x3, [%[r], 408]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[53] * B\n\t" + "str x4, [%[r], 416]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[54] * B\n\t" + "ldp x8, x9, [%[a], 432]\n\t" + "str x5, [%[r], 424]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[55] * B\n\t" + "str x3, [%[r], 432]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[56] * B\n\t" + "ldp x8, x9, [%[a], 448]\n\t" + "str x4, [%[r], 440]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[57] * B\n\t" + "str x5, [%[r], 448]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[58] * B\n\t" + "ldp x8, x9, [%[a], 464]\n\t" + "str x3, [%[r], 456]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[59] * B\n\t" + "str x4, [%[r], 464]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[60] * B\n\t" + "ldp x8, x9, [%[a], 480]\n\t" + "str x5, [%[r], 472]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[61] * B\n\t" + "str x3, [%[r], 480]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[62] * B\n\t" + "ldp x8, x9, [%[a], 496]\n\t" + "str x4, [%[r], 488]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[63] * B\n\t" + "str x5, [%[r], 496]\n\t" + "mul x6, %[b], x9\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "stp x3, x4, [%[r], 504]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + ); +#endif +} + +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 4096 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_4096_mont_norm_64(sp_digit* r, const sp_digit* m) +{ + XMEMSET(r, 0, sizeof(sp_digit) * 64); + + /* r = 2^n mod m */ + sp_4096_sub_in_place_64(r, m); +} + +#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */ +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_4096_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov x8, #0\n\t" + "1:\n\t" + "subs %[c], xzr, %[c]\n\t" + "ldr x4, [%[a], x8]\n\t" + "ldr x5, [%[b], x8]\n\t" + "and x5, x5, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "csetm %[c], cc\n\t" + "str x4, [%[r], x8]\n\t" + "add x8, x8, #8\n\t" + "cmp x8, 512\n\t" + "b.lt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + ); + + return c; +#else + __asm__ __volatile__ ( + + "ldp x5, x7, [%[b], 0]\n\t" + "ldp x11, x12, [%[b], 16]\n\t" + "ldp x4, x6, [%[a], 0]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 16]\n\t" + "and x7, x7, %[m]\n\t" + "subs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 0]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 16]\n\t" + "ldp x5, x7, [%[b], 32]\n\t" + "ldp x11, x12, [%[b], 48]\n\t" + "ldp x4, x6, [%[a], 32]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 48]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 32]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 48]\n\t" + "ldp x5, x7, [%[b], 64]\n\t" + "ldp x11, x12, [%[b], 80]\n\t" + "ldp x4, x6, [%[a], 64]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 80]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 64]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 80]\n\t" + "ldp x5, x7, [%[b], 96]\n\t" + "ldp x11, x12, [%[b], 112]\n\t" + "ldp x4, x6, [%[a], 96]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 112]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 96]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 112]\n\t" + "ldp x5, x7, [%[b], 128]\n\t" + "ldp x11, x12, [%[b], 144]\n\t" + "ldp x4, x6, [%[a], 128]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 144]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 128]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 144]\n\t" + "ldp x5, x7, [%[b], 160]\n\t" + "ldp x11, x12, [%[b], 176]\n\t" + "ldp x4, x6, [%[a], 160]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 176]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 160]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 176]\n\t" + "ldp x5, x7, [%[b], 192]\n\t" + "ldp x11, x12, [%[b], 208]\n\t" + "ldp x4, x6, [%[a], 192]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 208]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 192]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 208]\n\t" + "ldp x5, x7, [%[b], 224]\n\t" + "ldp x11, x12, [%[b], 240]\n\t" + "ldp x4, x6, [%[a], 224]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 240]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 224]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 240]\n\t" + "ldp x5, x7, [%[b], 256]\n\t" + "ldp x11, x12, [%[b], 272]\n\t" + "ldp x4, x6, [%[a], 256]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 272]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 256]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 272]\n\t" + "ldp x5, x7, [%[b], 288]\n\t" + "ldp x11, x12, [%[b], 304]\n\t" + "ldp x4, x6, [%[a], 288]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 304]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 288]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 304]\n\t" + "ldp x5, x7, [%[b], 320]\n\t" + "ldp x11, x12, [%[b], 336]\n\t" + "ldp x4, x6, [%[a], 320]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 336]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 320]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 336]\n\t" + "ldp x5, x7, [%[b], 352]\n\t" + "ldp x11, x12, [%[b], 368]\n\t" + "ldp x4, x6, [%[a], 352]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 368]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 352]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 368]\n\t" + "ldp x5, x7, [%[b], 384]\n\t" + "ldp x11, x12, [%[b], 400]\n\t" + "ldp x4, x6, [%[a], 384]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 400]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 384]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 400]\n\t" + "ldp x5, x7, [%[b], 416]\n\t" + "ldp x11, x12, [%[b], 432]\n\t" + "ldp x4, x6, [%[a], 416]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 432]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 416]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 432]\n\t" + "ldp x5, x7, [%[b], 448]\n\t" + "ldp x11, x12, [%[b], 464]\n\t" + "ldp x4, x6, [%[a], 448]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 464]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 448]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 464]\n\t" + "ldp x5, x7, [%[b], 480]\n\t" + "ldp x11, x12, [%[b], 496]\n\t" + "ldp x4, x6, [%[a], 480]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 496]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 480]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 496]\n\t" + "csetm %[r], cc\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + ); + + return (sp_digit)r; +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Reduce the number back to 4096 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_4096_mont_reduce_64(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_digit ca = 0; + + __asm__ __volatile__ ( + "ldp x14, x15, [%[m], 0]\n\t" + "ldp x16, x17, [%[m], 16]\n\t" + "ldp x19, x20, [%[m], 32]\n\t" + "ldp x21, x22, [%[m], 48]\n\t" + "ldp x23, x24, [%[m], 64]\n\t" + "ldp x25, x26, [%[m], 80]\n\t" + "ldp x27, x28, [%[m], 96]\n\t" + "# i = 64\n\t" + "mov x4, 64\n\t" + "ldp x12, x13, [%[a], 0]\n\t" + "\n1:\n\t" + "# mu = a[i] * mp\n\t" + "mul x9, %[mp], x12\n\t" + "# a[i+0] += m[0] * mu\n\t" + "mul x7, x14, x9\n\t" + "umulh x8, x14, x9\n\t" + "adds x12, x12, x7\n\t" + "# a[i+1] += m[1] * mu\n\t" + "mul x7, x15, x9\n\t" + "adc x6, x8, xzr\n\t" + "umulh x8, x15, x9\n\t" + "adds x12, x13, x7\n\t" + "# a[i+2] += m[2] * mu\n\t" + "ldr x13, [%[a], 16]\n\t" + "adc x5, x8, xzr\n\t" + "mul x7, x16, x9\n\t" + "adds x12, x12, x6\n\t" + "umulh x8, x16, x9\n\t" + "adc x5, x5, xzr\n\t" + "adds x13, x13, x7\n\t" + "# a[i+3] += m[3] * mu\n\t" + "ldr x10, [%[a], 24]\n\t" + "adc x6, x8, xzr\n\t" + "mul x7, x17, x9\n\t" + "adds x13, x13, x5\n\t" + "umulh x8, x17, x9\n\t" + "adc x6, x6, xzr\n\t" + "adds x10, x10, x7\n\t" + "# a[i+4] += m[4] * mu\n\t" + "ldr x11, [%[a], 32]\n\t" + "adc x5, x8, xzr\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x19, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x19, x9\n\t" + "str x10, [%[a], 24]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+5] += m[5] * mu\n\t" + "ldr x10, [%[a], 40]\n\t" + "adc x6, x8, xzr\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x20, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x20, x9\n\t" + "str x11, [%[a], 32]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+6] += m[6] * mu\n\t" + "ldr x11, [%[a], 48]\n\t" + "adc x5, x8, xzr\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x21, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x21, x9\n\t" + "str x10, [%[a], 40]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+7] += m[7] * mu\n\t" + "ldr x10, [%[a], 56]\n\t" + "adc x6, x8, xzr\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x22, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x22, x9\n\t" + "str x11, [%[a], 48]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+8] += m[8] * mu\n\t" + "ldr x11, [%[a], 64]\n\t" + "adc x5, x8, xzr\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x23, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x23, x9\n\t" + "str x10, [%[a], 56]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+9] += m[9] * mu\n\t" + "ldr x10, [%[a], 72]\n\t" + "adc x6, x8, xzr\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x24, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x24, x9\n\t" + "str x11, [%[a], 64]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+10] += m[10] * mu\n\t" + "ldr x11, [%[a], 80]\n\t" + "adc x5, x8, xzr\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x25, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x25, x9\n\t" + "str x10, [%[a], 72]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+11] += m[11] * mu\n\t" + "ldr x10, [%[a], 88]\n\t" + "adc x6, x8, xzr\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x26, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x26, x9\n\t" + "str x11, [%[a], 80]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+12] += m[12] * mu\n\t" + "ldr x11, [%[a], 96]\n\t" + "adc x5, x8, xzr\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x27, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x27, x9\n\t" + "str x10, [%[a], 88]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+13] += m[13] * mu\n\t" + "ldr x10, [%[a], 104]\n\t" + "adc x6, x8, xzr\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x28, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x28, x9\n\t" + "str x11, [%[a], 96]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+14] += m[14] * mu\n\t" + "ldr x11, [%[a], 112]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 112]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 104]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+15] += m[15] * mu\n\t" + "ldr x10, [%[a], 120]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 120]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 112]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+16] += m[16] * mu\n\t" + "ldr x11, [%[a], 128]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 128]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 120]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+17] += m[17] * mu\n\t" + "ldr x10, [%[a], 136]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 136]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 128]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+18] += m[18] * mu\n\t" + "ldr x11, [%[a], 144]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 144]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 136]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+19] += m[19] * mu\n\t" + "ldr x10, [%[a], 152]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 152]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 144]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+20] += m[20] * mu\n\t" + "ldr x11, [%[a], 160]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 160]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 152]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+21] += m[21] * mu\n\t" + "ldr x10, [%[a], 168]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 168]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 160]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+22] += m[22] * mu\n\t" + "ldr x11, [%[a], 176]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 176]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 168]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+23] += m[23] * mu\n\t" + "ldr x10, [%[a], 184]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 184]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 176]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+24] += m[24] * mu\n\t" + "ldr x11, [%[a], 192]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 192]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 184]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+25] += m[25] * mu\n\t" + "ldr x10, [%[a], 200]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 200]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 192]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+26] += m[26] * mu\n\t" + "ldr x11, [%[a], 208]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 208]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 200]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+27] += m[27] * mu\n\t" + "ldr x10, [%[a], 216]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 216]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 208]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+28] += m[28] * mu\n\t" + "ldr x11, [%[a], 224]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 224]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 216]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+29] += m[29] * mu\n\t" + "ldr x10, [%[a], 232]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 232]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 224]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+30] += m[30] * mu\n\t" + "ldr x11, [%[a], 240]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 240]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 232]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+31] += m[31] * mu\n\t" + "ldr x10, [%[a], 248]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 248]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 240]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+32] += m[32] * mu\n\t" + "ldr x11, [%[a], 256]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 256]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 248]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+33] += m[33] * mu\n\t" + "ldr x10, [%[a], 264]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 264]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 256]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+34] += m[34] * mu\n\t" + "ldr x11, [%[a], 272]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 272]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 264]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+35] += m[35] * mu\n\t" + "ldr x10, [%[a], 280]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 280]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 272]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+36] += m[36] * mu\n\t" + "ldr x11, [%[a], 288]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 288]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 280]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+37] += m[37] * mu\n\t" + "ldr x10, [%[a], 296]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 296]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 288]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+38] += m[38] * mu\n\t" + "ldr x11, [%[a], 304]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 304]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 296]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+39] += m[39] * mu\n\t" + "ldr x10, [%[a], 312]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 312]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 304]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+40] += m[40] * mu\n\t" + "ldr x11, [%[a], 320]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 320]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 312]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+41] += m[41] * mu\n\t" + "ldr x10, [%[a], 328]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 328]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 320]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+42] += m[42] * mu\n\t" + "ldr x11, [%[a], 336]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 336]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 328]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+43] += m[43] * mu\n\t" + "ldr x10, [%[a], 344]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 344]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 336]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+44] += m[44] * mu\n\t" + "ldr x11, [%[a], 352]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 352]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 344]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+45] += m[45] * mu\n\t" + "ldr x10, [%[a], 360]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 360]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 352]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+46] += m[46] * mu\n\t" + "ldr x11, [%[a], 368]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 368]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 360]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+47] += m[47] * mu\n\t" + "ldr x10, [%[a], 376]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 376]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 368]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+48] += m[48] * mu\n\t" + "ldr x11, [%[a], 384]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 384]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 376]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+49] += m[49] * mu\n\t" + "ldr x10, [%[a], 392]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 392]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 384]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+50] += m[50] * mu\n\t" + "ldr x11, [%[a], 400]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 400]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 392]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+51] += m[51] * mu\n\t" + "ldr x10, [%[a], 408]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 408]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 400]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+52] += m[52] * mu\n\t" + "ldr x11, [%[a], 416]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 416]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 408]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+53] += m[53] * mu\n\t" + "ldr x10, [%[a], 424]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 424]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 416]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+54] += m[54] * mu\n\t" + "ldr x11, [%[a], 432]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 432]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 424]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+55] += m[55] * mu\n\t" + "ldr x10, [%[a], 440]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 440]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 432]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+56] += m[56] * mu\n\t" + "ldr x11, [%[a], 448]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 448]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 440]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+57] += m[57] * mu\n\t" + "ldr x10, [%[a], 456]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 456]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 448]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+58] += m[58] * mu\n\t" + "ldr x11, [%[a], 464]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 464]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 456]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+59] += m[59] * mu\n\t" + "ldr x10, [%[a], 472]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 472]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 464]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+60] += m[60] * mu\n\t" + "ldr x11, [%[a], 480]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 480]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 472]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+61] += m[61] * mu\n\t" + "ldr x10, [%[a], 488]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 488]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x11, [%[a], 480]\n\t" + "adds x10, x10, x7\n\t" + "# a[i+62] += m[62] * mu\n\t" + "ldr x11, [%[a], 496]\n\t" + "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 496]\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x8, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x8, x9\n\t" + "str x10, [%[a], 488]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+63] += m[63] * mu\n\t" + "ldr x10, [%[a], 504]\n\t" + "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 504]\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x8, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x8, x9\n\t" + "adds x6, x6, x7\n\t" + "adcs x8, x8, %[ca]\n\t" + "str x11, [%[a], 496]\n\t" + "cset %[ca], cs\n\t" + "adds x10, x10, x6\n\t" + "ldr x11, [%[a], 512]\n\t" + "str x10, [%[a], 504]\n\t" + "adcs x11, x11, x8\n\t" + "str x11, [%[a], 512]\n\t" + "adc %[ca], %[ca], xzr\n\t" + "subs x4, x4, 1\n\t" + "add %[a], %[a], 8\n\t" + "bne 1b\n\t" + "stp x12, x13, [%[a], 0]\n\t" + : [ca] "+r" (ca), [a] "+r" (a) + : [m] "r" (m), [mp] "r" (mp) + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" + ); + + sp_4096_cond_sub_64(a - 64, a, m, (sp_digit)0 - ca); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_4096_mul_64(r, a, b); + sp_4096_mont_reduce_64(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_sqr_64(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_4096_sqr_64(r, a); + sp_4096_mont_reduce_64(r, m, mp); +} + +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + */ +static sp_digit div_4096_word_64(sp_digit d1, sp_digit d0, sp_digit div) +{ + sp_digit r; + + __asm__ __volatile__ ( + "lsr x5, %[div], 32\n\t" + "add x5, x5, 1\n\t" + + "udiv x3, %[d1], x5\n\t" + "lsl x6, x3, 32\n\t" + "mul x4, %[div], x6\n\t" + "umulh x3, %[div], x6\n\t" + "subs %[d0], %[d0], x4\n\t" + "sbc %[d1], %[d1], x3\n\t" + + "udiv x3, %[d1], x5\n\t" + "lsl x3, x3, 32\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "umulh x3, %[div], x3\n\t" + "subs %[d0], %[d0], x4\n\t" + "sbc %[d1], %[d1], x3\n\t" + + "lsr x3, %[d0], 32\n\t" + "orr x3, x3, %[d1], lsl 32\n\t" + + "udiv x3, x3, x5\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "umulh x3, %[div], x3\n\t" + "subs %[d0], %[d0], x4\n\t" + "sbc %[d1], %[d1], x3\n\t" + + "lsr x3, %[d0], 32\n\t" + "orr x3, x3, %[d1], lsl 32\n\t" + + "udiv x3, x3, x5\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "sub %[d0], %[d0], x4\n\t" + + "udiv x3, %[d0], %[div]\n\t" + "add %[r], x6, x3\n\t" + + : [r] "=r" (r) + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) + : "x3", "x4", "x5", "x6" + ); + + return r; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_4096_mask_64(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<64; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 64; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static int64_t sp_4096_cmp_64(const sp_digit* a, const sp_digit* b) +{ +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov x2, -1\n\t" + "mov x3, 1\n\t" + "mov x4, -1\n\t" + "mov x5, 504\n\t" + "1:\n\t" + "ldr x6, [%[a], x5]\n\t" + "ldr x7, [%[b], x5]\n\t" + "and x6, x6, x4\n\t" + "and x7, x7, x4\n\t" + "subs x6, x6, x7\n\t" + "csel x2, x3, x2, hi\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "subs x5, x5, #8\n\t" + "b.cs 1b\n\t" + "eor %[a], x2, x4\n\t" + : [a] "+r" (a) + : [b] "r" (b) + : "x2", "x3", "x4", "x5", "x6", "x7", "x8" + ); +#else + __asm__ __volatile__ ( + "mov x2, -1\n\t" + "mov x3, 1\n\t" + "mov x4, -1\n\t" + "ldp x5, x6, [%[a], 496]\n\t" + "ldp x7, x8, [%[b], 496]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 480]\n\t" + "ldp x7, x8, [%[b], 480]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 464]\n\t" + "ldp x7, x8, [%[b], 464]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 448]\n\t" + "ldp x7, x8, [%[b], 448]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 432]\n\t" + "ldp x7, x8, [%[b], 432]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 416]\n\t" + "ldp x7, x8, [%[b], 416]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 400]\n\t" + "ldp x7, x8, [%[b], 400]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 384]\n\t" + "ldp x7, x8, [%[b], 384]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 368]\n\t" + "ldp x7, x8, [%[b], 368]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 352]\n\t" + "ldp x7, x8, [%[b], 352]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 336]\n\t" + "ldp x7, x8, [%[b], 336]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 320]\n\t" + "ldp x7, x8, [%[b], 320]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 304]\n\t" + "ldp x7, x8, [%[b], 304]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 288]\n\t" + "ldp x7, x8, [%[b], 288]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 272]\n\t" + "ldp x7, x8, [%[b], 272]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 256]\n\t" + "ldp x7, x8, [%[b], 256]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 240]\n\t" + "ldp x7, x8, [%[b], 240]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 224]\n\t" + "ldp x7, x8, [%[b], 224]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 208]\n\t" + "ldp x7, x8, [%[b], 208]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 192]\n\t" + "ldp x7, x8, [%[b], 192]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 176]\n\t" + "ldp x7, x8, [%[b], 176]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 160]\n\t" + "ldp x7, x8, [%[b], 160]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 144]\n\t" + "ldp x7, x8, [%[b], 144]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 128]\n\t" + "ldp x7, x8, [%[b], 128]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 112]\n\t" + "ldp x7, x8, [%[b], 112]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 96]\n\t" + "ldp x7, x8, [%[b], 96]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 80]\n\t" + "ldp x7, x8, [%[b], 80]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 64]\n\t" + "ldp x7, x8, [%[b], 64]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 48]\n\t" + "ldp x7, x8, [%[b], 48]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 32]\n\t" + "ldp x7, x8, [%[b], 32]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "ldp x7, x8, [%[b], 16]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "ldp x5, x6, [%[a], 0]\n\t" + "ldp x7, x8, [%[b], 0]\n\t" + "and x6, x6, x4\n\t" + "and x8, x8, x4\n\t" + "subs x6, x6, x8\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x7, x7, x4\n\t" + "subs x5, x5, x7\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "eor %[a], x2, x4\n\t" + : [a] "+r" (a) + : [b] "r" (b) + : "x2", "x3", "x4", "x5", "x6", "x7", "x8" + ); +#endif + + return (int64_t)a; +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_div_64(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[128], t2[65]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[63]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 64); + for (i=63; i>=0; i--) { + r1 = div_4096_word_64(t1[64 + i], t1[64 + i - 1], div); + + sp_4096_mul_d_64(t2, d, r1); + t1[64 + i] += sp_4096_sub_in_place_64(&t1[i], t2); + t1[64 + i] -= t2[64]; + sp_4096_mask_64(t2, d, t1[64 + i]); + t1[64 + i] += sp_4096_add_64(&t1[i], &t1[i], t2); + sp_4096_mask_64(t2, d, t1[64 + i]); + t1[64 + i] += sp_4096_add_64(&t1[i], &t1[i], t2); + } + + r1 = sp_4096_cmp_64(t1, d) >= 0; + sp_4096_cond_sub_64(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_mod_64(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_4096_div_64(a, m, NULL, r); +} + +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_4096_sub_64(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "add x11, %[a], 512\n\t" + "\n1:\n\t" + "subs %[c], xzr, %[c]\n\t" + "ldp x3, x4, [%[a]], #16\n\t" + "ldp x5, x6, [%[a]], #16\n\t" + "ldp x7, x8, [%[b]], #16\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x9, x10, [%[b]], #16\n\t" + "sbcs x4, x4, x8\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r]], #16\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r]], #16\n\t" + "csetm %[c], cc\n\t" + "cmp %[a], x11\n\t" + "b.ne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + ); + + return c; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_4096_sub_64(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[a], 0]\n\t" + "ldp x7, x8, [%[b], 0]\n\t" + "subs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 16]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "ldp x3, x4, [%[a], 32]\n\t" + "ldp x7, x8, [%[b], 32]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 48]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 48]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 32]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 48]\n\t" + "ldp x3, x4, [%[a], 64]\n\t" + "ldp x7, x8, [%[b], 64]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 80]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 80]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 64]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 80]\n\t" + "ldp x3, x4, [%[a], 96]\n\t" + "ldp x7, x8, [%[b], 96]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 112]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 112]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 96]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 112]\n\t" + "ldp x3, x4, [%[a], 128]\n\t" + "ldp x7, x8, [%[b], 128]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 144]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 144]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 128]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 144]\n\t" + "ldp x3, x4, [%[a], 160]\n\t" + "ldp x7, x8, [%[b], 160]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 176]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 176]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 160]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 176]\n\t" + "ldp x3, x4, [%[a], 192]\n\t" + "ldp x7, x8, [%[b], 192]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 208]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 208]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 192]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 208]\n\t" + "ldp x3, x4, [%[a], 224]\n\t" + "ldp x7, x8, [%[b], 224]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 240]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 240]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 224]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 240]\n\t" + "ldp x3, x4, [%[a], 256]\n\t" + "ldp x7, x8, [%[b], 256]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 272]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 272]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 256]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 272]\n\t" + "ldp x3, x4, [%[a], 288]\n\t" + "ldp x7, x8, [%[b], 288]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 304]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 304]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 288]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 304]\n\t" + "ldp x3, x4, [%[a], 320]\n\t" + "ldp x7, x8, [%[b], 320]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 336]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 336]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 320]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 336]\n\t" + "ldp x3, x4, [%[a], 352]\n\t" + "ldp x7, x8, [%[b], 352]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 368]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 368]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 352]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 368]\n\t" + "ldp x3, x4, [%[a], 384]\n\t" + "ldp x7, x8, [%[b], 384]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 400]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 400]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 384]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 400]\n\t" + "ldp x3, x4, [%[a], 416]\n\t" + "ldp x7, x8, [%[b], 416]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 432]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 432]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 416]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 432]\n\t" + "ldp x3, x4, [%[a], 448]\n\t" + "ldp x7, x8, [%[b], 448]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 464]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 464]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 448]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 464]\n\t" + "ldp x3, x4, [%[a], 480]\n\t" + "ldp x7, x8, [%[b], 480]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 496]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 496]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 480]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 496]\n\t" + "csetm %[r], cc\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + ); + + return (sp_digit)r; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_div_64_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[128], t2[65]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[63]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 64); + for (i=63; i>=0; i--) { + r1 = div_4096_word_64(t1[64 + i], t1[64 + i - 1], div); + + sp_4096_mul_d_64(t2, d, r1); + t1[64 + i] += sp_4096_sub_in_place_64(&t1[i], t2); + t1[64 + i] -= t2[64]; + if (t1[64 + i] != 0) { + t1[64 + i] += sp_4096_add_64(&t1[i], &t1[i], d); + if (t1[64 + i] != 0) + t1[64 + i] += sp_4096_add_64(&t1[i], &t1[i], d); + } + } + + for (i = 63; i > 0; i--) { + if (t1[i] != d[i]) + break; + } + if (t1[i] >= d[i]) { + sp_4096_sub_64(r, t1, d); + } + else { + XMEMCPY(r, t1, sizeof(*t1) * 64); + } + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_mod_64_cond(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_4096_div_64_cond(a, m, NULL, r); +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \ + defined(WOLFSSL_HAVE_SP_DH) +#ifdef WOLFSSL_SP_SMALL +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[16][128]; +#else + sp_digit* t[16]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 128, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<16; i++) { + t[i] = td + i * 128; + } +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_64(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 64U); + if (reduceA != 0) { + err = sp_4096_mod_64(t[1] + 64, a, m); + if (err == MP_OKAY) { + err = sp_4096_mod_64(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64); + err = sp_4096_mod_64(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_4096_mont_sqr_64(t[ 2], t[ 1], m, mp); + sp_4096_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp); + sp_4096_mont_sqr_64(t[ 4], t[ 2], m, mp); + sp_4096_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp); + sp_4096_mont_sqr_64(t[ 6], t[ 3], m, mp); + sp_4096_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp); + sp_4096_mont_sqr_64(t[ 8], t[ 4], m, mp); + sp_4096_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp); + sp_4096_mont_sqr_64(t[10], t[ 5], m, mp); + sp_4096_mont_mul_64(t[11], t[ 6], t[ 5], m, mp); + sp_4096_mont_sqr_64(t[12], t[ 6], m, mp); + sp_4096_mont_mul_64(t[13], t[ 7], t[ 6], m, mp); + sp_4096_mont_sqr_64(t[14], t[ 7], m, mp); + sp_4096_mont_mul_64(t[15], t[ 8], t[ 7], m, mp); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) { + c = 64; + } + c -= bits % 4; + if (c == 64) { + c = 60; + } + y = (int)(n >> c); + n <<= 64 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 64); + for (; i>=0 || c>=4; ) { + if (c == 0) { + n = e[i--]; + y = n >> 60; + n <<= 4; + c = 60; + } + else if (c < 4) { + y = n >> 60; + n = e[i--]; + c = 4 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + else { + y = (n >> 60) & 0xf; + n <<= 4; + c -= 4; + } + + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + + sp_4096_mont_mul_64(r, r, t[y], m, mp); + } + + XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U); + sp_4096_mont_reduce_64(r, m, mp); + + mask = 0 - (sp_4096_cmp_64(r, m) >= 0); + sp_4096_cond_sub_64(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#else +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][128]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 128, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) { + t[i] = td + i * 128; + } +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_64(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 64U); + if (reduceA != 0) { + err = sp_4096_mod_64(t[1] + 64, a, m); + if (err == MP_OKAY) { + err = sp_4096_mod_64(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64); + err = sp_4096_mod_64(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_4096_mont_sqr_64(t[ 2], t[ 1], m, mp); + sp_4096_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp); + sp_4096_mont_sqr_64(t[ 4], t[ 2], m, mp); + sp_4096_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp); + sp_4096_mont_sqr_64(t[ 6], t[ 3], m, mp); + sp_4096_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp); + sp_4096_mont_sqr_64(t[ 8], t[ 4], m, mp); + sp_4096_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp); + sp_4096_mont_sqr_64(t[10], t[ 5], m, mp); + sp_4096_mont_mul_64(t[11], t[ 6], t[ 5], m, mp); + sp_4096_mont_sqr_64(t[12], t[ 6], m, mp); + sp_4096_mont_mul_64(t[13], t[ 7], t[ 6], m, mp); + sp_4096_mont_sqr_64(t[14], t[ 7], m, mp); + sp_4096_mont_mul_64(t[15], t[ 8], t[ 7], m, mp); + sp_4096_mont_sqr_64(t[16], t[ 8], m, mp); + sp_4096_mont_mul_64(t[17], t[ 9], t[ 8], m, mp); + sp_4096_mont_sqr_64(t[18], t[ 9], m, mp); + sp_4096_mont_mul_64(t[19], t[10], t[ 9], m, mp); + sp_4096_mont_sqr_64(t[20], t[10], m, mp); + sp_4096_mont_mul_64(t[21], t[11], t[10], m, mp); + sp_4096_mont_sqr_64(t[22], t[11], m, mp); + sp_4096_mont_mul_64(t[23], t[12], t[11], m, mp); + sp_4096_mont_sqr_64(t[24], t[12], m, mp); + sp_4096_mont_mul_64(t[25], t[13], t[12], m, mp); + sp_4096_mont_sqr_64(t[26], t[13], m, mp); + sp_4096_mont_mul_64(t[27], t[14], t[13], m, mp); + sp_4096_mont_sqr_64(t[28], t[14], m, mp); + sp_4096_mont_mul_64(t[29], t[15], t[14], m, mp); + sp_4096_mont_sqr_64(t[30], t[15], m, mp); + sp_4096_mont_mul_64(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) { + c = 64; + } + c -= bits % 5; + if (c == 64) { + c = 59; + } + y = (int)(n >> c); + n <<= 64 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 64); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 59; + n <<= 5; + c = 59; + } + else if (c < 5) { + y = n >> 59; + n = e[i--]; + c = 5 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + else { + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + + sp_4096_mont_mul_64(r, r, t[y], m, mp); + } + + XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U); + sp_4096_mont_reduce_64(r, m, mp); + + mask = 0 - (sp_4096_cmp_64(r, m) >= 0); + sp_4096_cond_sub_64(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* WOLFSSL_SP_SMALL */ +#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */ + +#ifdef WOLFSSL_HAVE_SP_RSA +/* RSA public key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * em Public exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm, + byte* out, word32* outLen) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit a[128], m[64], r[128]; +#else + sp_digit* d = NULL; + sp_digit* a; + sp_digit* m; + sp_digit* r; +#endif + sp_digit *ah; + sp_digit e[1]; + int err = MP_OKAY; + + if (*outLen < 512) + err = MP_TO_E; + if (err == MP_OKAY && (mp_count_bits(em) > 64 || inLen > 512 || + mp_count_bits(mm) != 4096)) + err = MP_READ_E; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + a = d; + r = a + 64 * 2; + m = r + 64 * 2; + } +#endif + + if (err == MP_OKAY) { + ah = a + 64; + + sp_4096_from_bin(ah, 64, in, inLen); +#if DIGIT_BIT >= 64 + e[0] = em->dp[0]; +#else + e[0] = em->dp[0]; + if (em->used > 1) { + e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + } +#endif + if (e[0] == 0) { + err = MP_EXPTMOD_E; + } + } + if (err == MP_OKAY) { + sp_4096_from_mp(m, 64, mm); + + if (e[0] == 0x3) { + if (err == MP_OKAY) { + sp_4096_sqr_64(r, ah); + err = sp_4096_mod_64_cond(r, r, m); + } + if (err == MP_OKAY) { + sp_4096_mul_64(r, ah, r); + err = sp_4096_mod_64_cond(r, r, m); + } + } + else { + int i; + sp_digit mp; + + sp_4096_mont_setup(m, &mp); + + /* Convert to Montgomery form. */ + XMEMSET(a, 0, sizeof(sp_digit) * 64); + err = sp_4096_mod_64_cond(a, a, m); + + if (err == MP_OKAY) { + for (i = 63; i >= 0; i--) { + if (e[0] >> i) { + break; + } + } + + XMEMCPY(r, a, sizeof(sp_digit) * 64); + for (i--; i>=0; i--) { + sp_4096_mont_sqr_64(r, r, m, mp); + if (((e[0] >> i) & 1) == 1) { + sp_4096_mont_mul_64(r, r, a, m, mp); + } + } + XMEMSET(&r[64], 0, sizeof(sp_digit) * 64); + sp_4096_mont_reduce_64(r, m, mp); + + for (i = 63; i > 0; i--) { + if (r[i] != m[i]) { + break; + } + } + if (r[i] >= m[i]) { + sp_4096_sub_in_place_64(r, m); + } + } + } + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } +#endif + + return err; +} + +#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) + sp_digit* a; + sp_digit* d = NULL; + sp_digit* m; + sp_digit* r; + int err = MP_OKAY; + + (void)pm; + (void)qm; + (void)dpm; + (void)dqm; + (void)qim; + + if (*outLen < 512U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(dm) > 4096) { + err = MP_READ_E; + } + if (inLen > 512) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 4, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) { + err = MEMORY_E; + } + } + if (err == MP_OKAY) { + a = d + 64; + m = a + 128; + r = a; + + sp_4096_from_bin(a, 64, in, inLen); + sp_4096_from_mp(d, 64, dm); + sp_4096_from_mp(m, 64, mm); + err = sp_4096_mod_exp_64(r, a, d, 4096, m, 0); + } + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + } + + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 64); + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static sp_digit sp_4096_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov x8, #0\n\t" + "1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldr x4, [%[a], x8]\n\t" + "ldr x5, [%[b], x8]\n\t" + "and x5, x5, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "cset %[c], cs\n\t" + "str x4, [%[r], x8]\n\t" + "add x8, x8, #8\n\t" + "cmp x8, 256\n\t" + "b.lt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + ); + + return c; +#else + __asm__ __volatile__ ( + + "ldp x5, x7, [%[b], 0]\n\t" + "ldp x11, x12, [%[b], 16]\n\t" + "ldp x4, x6, [%[a], 0]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 16]\n\t" + "and x7, x7, %[m]\n\t" + "adds x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "adcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 0]\n\t" + "adcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 16]\n\t" + "ldp x5, x7, [%[b], 32]\n\t" + "ldp x11, x12, [%[b], 48]\n\t" + "ldp x4, x6, [%[a], 32]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 48]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "adcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 32]\n\t" + "adcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 48]\n\t" + "ldp x5, x7, [%[b], 64]\n\t" + "ldp x11, x12, [%[b], 80]\n\t" + "ldp x4, x6, [%[a], 64]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 80]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "adcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 64]\n\t" + "adcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 80]\n\t" + "ldp x5, x7, [%[b], 96]\n\t" + "ldp x11, x12, [%[b], 112]\n\t" + "ldp x4, x6, [%[a], 96]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 112]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "adcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 96]\n\t" + "adcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 112]\n\t" + "ldp x5, x7, [%[b], 128]\n\t" + "ldp x11, x12, [%[b], 144]\n\t" + "ldp x4, x6, [%[a], 128]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 144]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "adcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 128]\n\t" + "adcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 144]\n\t" + "ldp x5, x7, [%[b], 160]\n\t" + "ldp x11, x12, [%[b], 176]\n\t" + "ldp x4, x6, [%[a], 160]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 176]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "adcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 160]\n\t" + "adcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 176]\n\t" + "ldp x5, x7, [%[b], 192]\n\t" + "ldp x11, x12, [%[b], 208]\n\t" + "ldp x4, x6, [%[a], 192]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 208]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "adcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 192]\n\t" + "adcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 208]\n\t" + "ldp x5, x7, [%[b], 224]\n\t" + "ldp x11, x12, [%[b], 240]\n\t" + "ldp x4, x6, [%[a], 224]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 240]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "adcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 224]\n\t" + "adcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 240]\n\t" + "cset %[r], cs\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + ); + + return (sp_digit)r; +#endif /* WOLFSSL_SP_SMALL */ +} + +/* RSA private key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * pm First prime. + * qm Second prime. + * dpm First prime's CRT exponent. + * dqm Second prime's CRT exponent. + * qim Inverse of second prime mod p. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, + mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm, + byte* out, word32* outLen) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit a[64 * 2]; + sp_digit p[32], q[32], dp[32]; + sp_digit tmpa[64], tmpb[64]; +#else + sp_digit* t = NULL; + sp_digit* a; + sp_digit* p; + sp_digit* q; + sp_digit* dp; + sp_digit* tmpa; + sp_digit* tmpb; +#endif + sp_digit* r; + sp_digit* qi; + sp_digit* dq; + sp_digit c; + int err = MP_OKAY; + + (void)dm; + (void)mm; + + if (*outLen < 512) + err = MP_TO_E; + if (err == MP_OKAY && (inLen > 512 || mp_count_bits(mm) != 4096)) + err = MP_READ_E; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 11, NULL, + DYNAMIC_TYPE_RSA); + if (t == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + a = t; + p = a + 64 * 2; + q = p + 32; + qi = dq = dp = q + 32; + tmpa = qi + 32; + tmpb = tmpa + 64; + + r = t + 64; + } +#else +#endif + + if (err == MP_OKAY) { +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + r = a; + qi = dq = dp; +#endif + sp_4096_from_bin(a, 64, in, inLen); + sp_4096_from_mp(p, 32, pm); + sp_4096_from_mp(q, 32, qm); + sp_4096_from_mp(dp, 32, dpm); + + err = sp_2048_mod_exp_32(tmpa, a, dp, 2048, p, 1); + } + if (err == MP_OKAY) { + sp_4096_from_mp(dq, 32, dqm); + err = sp_2048_mod_exp_32(tmpb, a, dq, 2048, q, 1); + } + + if (err == MP_OKAY) { + c = sp_2048_sub_in_place_32(tmpa, tmpb); + c += sp_4096_cond_add_32(tmpa, tmpa, p, c); + sp_4096_cond_add_32(tmpa, tmpa, p, c); + + sp_2048_from_mp(qi, 32, qim); + sp_2048_mul_32(tmpa, tmpa, qi); + err = sp_2048_mod_32(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { + sp_2048_mul_32(tmpa, q, tmpa); + XMEMSET(&tmpb[32], 0, sizeof(sp_digit) * 32); + sp_4096_add_64(r, tmpb, tmpa); + + sp_4096_to_bin(r, out); + *outLen = 512; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_digit) * 32 * 11); + XFREE(t, NULL, DYNAMIC_TYPE_RSA); + } +#else + XMEMSET(tmpa, 0, sizeof(tmpa)); + XMEMSET(tmpb, 0, sizeof(tmpb)); + XMEMSET(p, 0, sizeof(p)); + XMEMSET(q, 0, sizeof(q)); + XMEMSET(dp, 0, sizeof(dp)); +#endif + + return err; +} +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ +#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */ +#endif /* WOLFSSL_HAVE_SP_RSA */ +#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY)) +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_4096_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (4096 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 64 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 64); + r->used = 64; + mp_clamp(r); +#elif DIGIT_BIT < 64 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 64; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 64) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 64 - s; + } + r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 64; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 64 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 64 - s; + } + else { + s += 64; + } + } + r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ + int err = MP_OKAY; + sp_digit b[128], e[64], m[64]; + sp_digit* r = b; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_4096_from_mp(b, 64, base); + sp_4096_from_mp(e, 64, exp); + sp_4096_from_mp(m, 64, mod); + + err = sp_4096_mod_exp_64(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + err = sp_4096_to_mp(r, res); + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} + +#ifdef WOLFSSL_HAVE_SP_DH + +#ifdef HAVE_FFDHE_4096 +static void sp_4096_lshift_64(sp_digit* r, sp_digit* a, byte n) +{ + __asm__ __volatile__ ( + "mov x6, 63\n\t" + "sub x6, x6, %[n]\n\t" + "ldr x3, [%[a], 504]\n\t" + "lsr x4, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x4, x4, x6\n\t" + "ldr x2, [%[a], 496]\n\t" + "str x4, [%[r], 512]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 488]\n\t" + "str x3, [%[r], 504]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 480]\n\t" + "str x2, [%[r], 496]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 472]\n\t" + "str x4, [%[r], 488]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 464]\n\t" + "str x3, [%[r], 480]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 456]\n\t" + "str x2, [%[r], 472]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 448]\n\t" + "str x4, [%[r], 464]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 440]\n\t" + "str x3, [%[r], 456]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 432]\n\t" + "str x2, [%[r], 448]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 424]\n\t" + "str x4, [%[r], 440]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 416]\n\t" + "str x3, [%[r], 432]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 408]\n\t" + "str x2, [%[r], 424]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 400]\n\t" + "str x4, [%[r], 416]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 392]\n\t" + "str x3, [%[r], 408]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 384]\n\t" + "str x2, [%[r], 400]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 376]\n\t" + "str x4, [%[r], 392]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 368]\n\t" + "str x3, [%[r], 384]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 360]\n\t" + "str x2, [%[r], 376]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 352]\n\t" + "str x4, [%[r], 368]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 344]\n\t" + "str x3, [%[r], 360]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 336]\n\t" + "str x2, [%[r], 352]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 328]\n\t" + "str x4, [%[r], 344]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 320]\n\t" + "str x3, [%[r], 336]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 312]\n\t" + "str x2, [%[r], 328]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 304]\n\t" + "str x4, [%[r], 320]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 296]\n\t" + "str x3, [%[r], 312]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 288]\n\t" + "str x2, [%[r], 304]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 280]\n\t" + "str x4, [%[r], 296]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 272]\n\t" + "str x3, [%[r], 288]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 264]\n\t" + "str x2, [%[r], 280]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 256]\n\t" + "str x4, [%[r], 272]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 248]\n\t" + "str x3, [%[r], 264]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 240]\n\t" + "str x2, [%[r], 256]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 232]\n\t" + "str x4, [%[r], 248]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 224]\n\t" + "str x3, [%[r], 240]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 216]\n\t" + "str x2, [%[r], 232]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 208]\n\t" + "str x4, [%[r], 224]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 200]\n\t" + "str x3, [%[r], 216]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 192]\n\t" + "str x2, [%[r], 208]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 184]\n\t" + "str x4, [%[r], 200]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 176]\n\t" + "str x3, [%[r], 192]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 168]\n\t" + "str x2, [%[r], 184]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 160]\n\t" + "str x4, [%[r], 176]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 152]\n\t" + "str x3, [%[r], 168]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 144]\n\t" + "str x2, [%[r], 160]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 136]\n\t" + "str x4, [%[r], 152]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 128]\n\t" + "str x3, [%[r], 144]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 120]\n\t" + "str x2, [%[r], 136]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 112]\n\t" + "str x4, [%[r], 128]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 104]\n\t" + "str x3, [%[r], 120]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 96]\n\t" + "str x2, [%[r], 112]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 88]\n\t" + "str x4, [%[r], 104]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 80]\n\t" + "str x3, [%[r], 96]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 72]\n\t" + "str x2, [%[r], 88]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 64]\n\t" + "str x4, [%[r], 80]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 56]\n\t" + "str x3, [%[r], 72]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 48]\n\t" + "str x2, [%[r], 64]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 40]\n\t" + "str x4, [%[r], 56]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 32]\n\t" + "str x3, [%[r], 48]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 24]\n\t" + "str x2, [%[r], 40]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 16]\n\t" + "str x4, [%[r], 32]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 8]\n\t" + "str x3, [%[r], 24]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 0]\n\t" + "str x2, [%[r], 16]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "str x3, [%[r]]\n\t" + "str x4, [%[r], 8]\n\t" + : + : [r] "r" (r), [a] "r" (a), [n] "r" (n) + : "memory", "x2", "x3", "x4", "x5", "x6" + ); +} + +/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m) + * + * r A single precision number that is the result of the operation. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits, + const sp_digit* m) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit nd[128]; + sp_digit td[65]; +#else + sp_digit* td; +#endif + sp_digit* norm; + sp_digit* tmp; + sp_digit mp = 1; + sp_digit n, o; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 193, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + norm = td; + tmp = td + 128; +#else + norm = nd; + tmp = td; +#endif + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_64(norm, m); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) { + c = 64; + } + c -= bits % 6; + if (c == 64) { + c = 58; + } + y = (int)(n >> c); + n <<= 64 - c; + sp_4096_lshift_64(r, norm, y); + for (; i>=0 || c>=6; ) { + if (c == 0) { + n = e[i--]; + y = n >> 58; + n <<= 6; + c = 58; + } + else if (c < 6) { + y = n >> 58; + n = e[i--]; + c = 6 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + else { + y = (n >> 58) & 0x3f; + n <<= 6; + c -= 6; + } + + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + + sp_4096_lshift_64(r, r, y); + sp_4096_mul_d_64(tmp, norm, r[64]); + r[64] = 0; + o = sp_4096_add_64(r, r, tmp); + sp_4096_cond_sub_64(r, r, m, (sp_digit)0 - o); + } + + XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U); + sp_4096_mont_reduce_64(r, m, mp); + + mask = 0 - (sp_4096_cmp_64(r, m) >= 0); + sp_4096_cond_sub_64(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* HAVE_FFDHE_4096 */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. + * exp Array of bytes that is the exponent. + * expLen Length of data, in bytes, in exponent. + * mod Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Length, in bytes, of exponentiation result. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_DhExp_4096(mp_int* base, const byte* exp, word32 expLen, + mp_int* mod, byte* out, word32* outLen) +{ + int err = MP_OKAY; + sp_digit b[128], e[64], m[64]; + sp_digit* r = b; + word32 i; + + if (mp_count_bits(base) > 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expLen > 512) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_4096_from_mp(b, 64, base); + sp_4096_from_bin(e, 64, exp, expLen); + sp_4096_from_mp(m, 64, mod); + + #ifdef HAVE_FFDHE_4096 + if (base->used == 1 && base->dp[0] == 2 && m[63] == (sp_digit)-1) + err = sp_4096_mod_exp_2_64(r, e, expLen * 8, m); + else + #endif + err = sp_4096_mod_exp_64(r, b, e, expLen * 8, m, 0); + + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + for (i=0; i<512 && out[i] == 0; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} +#endif /* WOLFSSL_HAVE_SP_DH */ + +#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */ + +#endif /* WOLFSSL_SP_4096 */ + +#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */ +#ifdef WOLFSSL_HAVE_SP_ECC +#ifndef WOLFSSL_SP_NO_256 + +/* Point structure to use. */ +typedef struct sp_point_256 { + sp_digit x[2 * 4]; + sp_digit y[2 * 4]; + sp_digit z[2 * 4]; + int infinity; +} sp_point_256; + +/* The modulus (prime) of the curve P256. */ +static const sp_digit p256_mod[4] = { + 0xffffffffffffffffL,0x00000000ffffffffL,0x0000000000000000L, + 0xffffffff00000001L +}; +/* The Montogmery normalizer for modulus of the curve P256. */ +static const sp_digit p256_norm_mod[4] = { + 0x0000000000000001L,0xffffffff00000000L,0xffffffffffffffffL, + 0x00000000fffffffeL +}; +/* The Montogmery multiplier for modulus of the curve P256. */ +static const sp_digit p256_mp_mod = 0x0000000000000001; +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +/* The order of the curve P256. */ +static const sp_digit p256_order[4] = { + 0xf3b9cac2fc632551L,0xbce6faada7179e84L,0xffffffffffffffffL, + 0xffffffff00000000L +}; +#endif +/* The order of the curve P256 minus 2. */ +static const sp_digit p256_order2[4] = { + 0xf3b9cac2fc63254fL,0xbce6faada7179e84L,0xffffffffffffffffL, + 0xffffffff00000000L +}; +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montogmery normalizer for order of the curve P256. */ +static const sp_digit p256_norm_order[4] = { + 0x0c46353d039cdaafL,0x4319055258e8617bL,0x0000000000000000L, + 0x00000000ffffffffL +}; +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montogmery multiplier for order of the curve P256. */ +static const sp_digit p256_mp_order = 0xccd1c8aaee00bc4fL; +#endif +#ifdef WOLFSSL_SP_SMALL +/* The base point of curve P256. */ +static const sp_point_256 p256_base = { + /* X ordinate */ + { + 0xf4a13945d898c296L,0x77037d812deb33a0L,0xf8bce6e563a440f2L, + 0x6b17d1f2e12c4247L, + 0L, 0L, 0L, 0L + }, + /* Y ordinate */ + { + 0xcbb6406837bf51f5L,0x2bce33576b315eceL,0x8ee7eb4a7c0f9e16L, + 0x4fe342e2fe1a7f9bL, + 0L, 0L, 0L, 0L + }, + /* Z ordinate */ + { + 0x0000000000000001L,0x0000000000000000L,0x0000000000000000L, + 0x0000000000000000L, + 0L, 0L, 0L, 0L + }, + /* infinity */ + 0 +}; +#endif /* WOLFSSL_SP_SMALL */ +#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY) +static const sp_digit p256_b[4] = { + 0x3bce3c3e27d2604bL,0x651d06b0cc53b0f6L,0xb3ebbd55769886bcL, + 0x5ac635d8aa3a93e7L +}; +#endif + +static int sp_256_point_new_ex_4(void* heap, sp_point_256* sp, sp_point_256** p) +{ + int ret = MP_OKAY; + (void)heap; +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + (void)sp; + *p = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC); +#else + *p = sp; +#endif + if (*p == NULL) { + ret = MEMORY_E; + } + return ret; +} + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +/* Allocate memory for point and return error. */ +#define sp_256_point_new_4(heap, sp, p) sp_256_point_new_ex_4((heap), NULL, &(p)) +#else +/* Set pointer to data and return no error. */ +#define sp_256_point_new_4(heap, sp, p) sp_256_point_new_ex_4((heap), &(sp), &(p)) +#endif + + +static void sp_256_point_free_4(sp_point_256* p, int clear, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +/* If valid pointer then clear point data if requested and free data. */ + if (p != NULL) { + if (clear != 0) { + XMEMSET(p, 0, sizeof(*p)); + } + XFREE(p, heap, DYNAMIC_TYPE_ECC); + } +#else +/* Clear point data if requested. */ + if (clear != 0) { + XMEMSET(p, 0, sizeof(*p)); + } +#endif + (void)heap; +} + +/* Multiply a number by Montogmery normalizer mod modulus (prime). + * + * r The resulting Montgomery form number. + * a The number to convert. + * m The modulus (prime). + */ +static int sp_256_mod_mul_norm_4(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + int64_t t[8]; + int64_t a32[8]; + int64_t o; + + (void)m; + + a32[0] = a[0] & 0xffffffff; + a32[1] = a[0] >> 32; + a32[2] = a[1] & 0xffffffff; + a32[3] = a[1] >> 32; + a32[4] = a[2] & 0xffffffff; + a32[5] = a[2] >> 32; + a32[6] = a[3] & 0xffffffff; + a32[7] = a[3] >> 32; + + /* 1 1 0 -1 -1 -1 -1 0 */ + t[0] = 0 + a32[0] + a32[1] - a32[3] - a32[4] - a32[5] - a32[6]; + /* 0 1 1 0 -1 -1 -1 -1 */ + t[1] = 0 + a32[1] + a32[2] - a32[4] - a32[5] - a32[6] - a32[7]; + /* 0 0 1 1 0 -1 -1 -1 */ + t[2] = 0 + a32[2] + a32[3] - a32[5] - a32[6] - a32[7]; + /* -1 -1 0 2 2 1 0 -1 */ + t[3] = 0 - a32[0] - a32[1] + 2 * a32[3] + 2 * a32[4] + a32[5] - a32[7]; + /* 0 -1 -1 0 2 2 1 0 */ + t[4] = 0 - a32[1] - a32[2] + 2 * a32[4] + 2 * a32[5] + a32[6]; + /* 0 0 -1 -1 0 2 2 1 */ + t[5] = 0 - a32[2] - a32[3] + 2 * a32[5] + 2 * a32[6] + a32[7]; + /* -1 -1 0 0 0 1 3 2 */ + t[6] = 0 - a32[0] - a32[1] + a32[5] + 3 * a32[6] + 2 * a32[7]; + /* 1 0 -1 -1 -1 -1 0 3 */ + t[7] = 0 + a32[0] - a32[2] - a32[3] - a32[4] - a32[5] + 3 * a32[7]; + + t[1] += t[0] >> 32; t[0] &= 0xffffffff; + t[2] += t[1] >> 32; t[1] &= 0xffffffff; + t[3] += t[2] >> 32; t[2] &= 0xffffffff; + t[4] += t[3] >> 32; t[3] &= 0xffffffff; + t[5] += t[4] >> 32; t[4] &= 0xffffffff; + t[6] += t[5] >> 32; t[5] &= 0xffffffff; + t[7] += t[6] >> 32; t[6] &= 0xffffffff; + o = t[7] >> 32; t[7] &= 0xffffffff; + t[0] += o; + t[3] -= o; + t[6] -= o; + t[7] += o; + t[1] += t[0] >> 32; t[0] &= 0xffffffff; + t[2] += t[1] >> 32; t[1] &= 0xffffffff; + t[3] += t[2] >> 32; t[2] &= 0xffffffff; + t[4] += t[3] >> 32; t[3] &= 0xffffffff; + t[5] += t[4] >> 32; t[4] &= 0xffffffff; + t[6] += t[5] >> 32; t[5] &= 0xffffffff; + t[7] += t[6] >> 32; t[6] &= 0xffffffff; + r[0] = (t[1] << 32) | t[0]; + r[1] = (t[3] << 32) | t[2]; + r[2] = (t[5] << 32) | t[4]; + r[3] = (t[7] << 32) | t[6]; + + return MP_OKAY; +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_256_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 64 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 64 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0xffffffffffffffffl; + s = 64U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 64U) <= (word32)DIGIT_BIT) { + s += 64U; + r[j] &= 0xffffffffffffffffl; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 64) { + r[j] &= 0xffffffffffffffffl; + if (j + 1 >= size) { + break; + } + s = 64 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Convert a point of type ecc_point to type sp_point_256. + * + * p Point of type sp_point_256 (result). + * pm Point of type ecc_point. + */ +static void sp_256_point_from_ecc_point_4(sp_point_256* p, const ecc_point* pm) +{ + XMEMSET(p->x, 0, sizeof(p->x)); + XMEMSET(p->y, 0, sizeof(p->y)); + XMEMSET(p->z, 0, sizeof(p->z)); + sp_256_from_mp(p->x, 4, pm->x); + sp_256_from_mp(p->y, 4, pm->y); + sp_256_from_mp(p->z, 4, pm->z); + p->infinity = 0; +} + +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_256_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (256 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 64 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 4); + r->used = 4; + mp_clamp(r); +#elif DIGIT_BIT < 64 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 4; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 64) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 64 - s; + } + r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 4; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 64 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 64 - s; + } + else { + s += 64; + } + } + r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Convert a point of type sp_point_256 to type ecc_point. + * + * p Point of type sp_point_256. + * pm Point of type ecc_point (result). + * returns MEMORY_E when allocation of memory in ecc_point fails otherwise + * MP_OKAY. + */ +static int sp_256_point_to_ecc_point_4(const sp_point_256* p, ecc_point* pm) +{ + int err; + + err = sp_256_to_mp(p->x, pm->x); + if (err == MP_OKAY) { + err = sp_256_to_mp(p->y, pm->y); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->z, pm->z); + } + + return err; +} + +/* Conditionally copy a into r using the mask m. + * m is -1 to copy and 0 when not. + * + * r A single precision number to copy over. + * a A single precision number to copy. + * m Mask value to apply. + */ +static void sp_256_cond_copy_4(sp_digit* r, const sp_digit* a, sp_digit m) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[r], 0]\n\t" + "ldp x7, x8, [%[a], 0]\n\t" + "eor x7, x7, x3\n\t" + "ldp x5, x6, [%[r], 16]\n\t" + "eor x8, x8, x4\n\t" + "ldp x9, x10, [%[a], 16]\n\t" + "eor x9, x9, x5\n\t" + "eor x10, x10, x6\n\t" + "and x7, x7, %[m]\n\t" + "and x8, x8, %[m]\n\t" + "and x9, x9, %[m]\n\t" + "and x10, x10, %[m]\n\t" + "eor x3, x3, x7\n\t" + "eor x4, x4, x8\n\t" + "eor x5, x5, x9\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "eor x6, x6, x10\n\t" + "stp x5, x6, [%[r], 16]\n\t" + : + : [r] "r" (r), [a] "r" (a), [m] "r" (m) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + ); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +SP_NOINLINE static void sp_256_mont_mul_4(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + (void)m; + (void)mp; + + __asm__ __volatile__ ( + "ldp x16, x17, [%[a], 0]\n\t" + "ldp x21, x22, [%[b], 0]\n\t" + "# A[0] * B[0]\n\t" + "mul x8, x16, x21\n\t" + "ldr x19, [%[a], 16]\n\t" + "umulh x9, x16, x21\n\t" + "ldr x23, [%[b], 16]\n\t" + "# A[0] * B[1]\n\t" + "mul x4, x16, x22\n\t" + "ldr x20, [%[a], 24]\n\t" + "umulh x5, x16, x22\n\t" + "ldr x24, [%[b], 24]\n\t" + "adds x9, x9, x4\n\t" + "# A[1] * B[0]\n\t" + "mul x4, x17, x21\n\t" + "adc x10, xzr, x5\n\t" + "umulh x5, x17, x21\n\t" + "adds x9, x9, x4\n\t" + "# A[0] * B[2]\n\t" + "mul x4, x16, x23\n\t" + "adcs x10, x10, x5\n\t" + "umulh x5, x16, x23\n\t" + "adc x11, xzr, xzr\n\t" + "adds x10, x10, x4\n\t" + "# A[1] * B[1]\n\t" + "mul x4, x17, x22\n\t" + "adc x11, x11, x5\n\t" + "umulh x5, x17, x22\n\t" + "adds x10, x10, x4\n\t" + "# A[2] * B[0]\n\t" + "mul x4, x19, x21\n\t" + "adcs x11, x11, x5\n\t" + "umulh x5, x19, x21\n\t" + "adc x12, xzr, xzr\n\t" + "adds x10, x10, x4\n\t" + "# A[0] * B[3]\n\t" + "mul x4, x16, x24\n\t" + "adcs x11, x11, x5\n\t" + "umulh x5, x16, x24\n\t" + "adc x12, x12, xzr\n\t" + "adds x11, x11, x4\n\t" + "# A[1] * B[2]\n\t" + "mul x4, x17, x23\n\t" + "adcs x12, x12, x5\n\t" + "umulh x5, x17, x23\n\t" + "adc x13, xzr, xzr\n\t" + "adds x11, x11, x4\n\t" + "# A[2] * B[1]\n\t" + "mul x4, x19, x22\n\t" + "adcs x12, x12, x5\n\t" + "umulh x5, x19, x22\n\t" + "adc x13, x13, xzr\n\t" + "adds x11, x11, x4\n\t" + "# A[3] * B[0]\n\t" + "mul x4, x20, x21\n\t" + "adcs x12, x12, x5\n\t" + "umulh x5, x20, x21\n\t" + "adc x13, x13, xzr\n\t" + "adds x11, x11, x4\n\t" + "# A[1] * B[3]\n\t" + "mul x4, x17, x24\n\t" + "adcs x12, x12, x5\n\t" + "umulh x5, x17, x24\n\t" + "adc x13, x13, xzr\n\t" + "adds x12, x12, x4\n\t" + "# A[2] * B[2]\n\t" + "mul x4, x19, x23\n\t" + "adcs x13, x13, x5\n\t" + "umulh x5, x19, x23\n\t" + "adc x14, xzr, xzr\n\t" + "adds x12, x12, x4\n\t" + "# A[3] * B[1]\n\t" + "mul x4, x20, x22\n\t" + "adcs x13, x13, x5\n\t" + "umulh x5, x20, x22\n\t" + "adc x14, x14, xzr\n\t" + "adds x12, x12, x4\n\t" + "# A[2] * B[3]\n\t" + "mul x4, x19, x24\n\t" + "adcs x13, x13, x5\n\t" + "umulh x5, x19, x24\n\t" + "adc x14, x14, xzr\n\t" + "adds x13, x13, x4\n\t" + "# A[3] * B[2]\n\t" + "mul x4, x20, x23\n\t" + "adcs x14, x14, x5\n\t" + "umulh x5, x20, x23\n\t" + "adc x15, xzr, xzr\n\t" + "adds x13, x13, x4\n\t" + "# A[3] * B[3]\n\t" + "mul x4, x20, x24\n\t" + "adcs x14, x14, x5\n\t" + "umulh x5, x20, x24\n\t" + "adc x15, x15, xzr\n\t" + "adds x14, x14, x4\n\t" + "mov x4, x8\n\t" + "adc x15, x15, x5\n\t" + "# Start Reduction\n\t" + "mov x5, x9\n\t" + "mov x6, x10\n\t" + "# mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192\n\t" + "# - a[0] << 32 << 192\n\t" + "# + (a[0] * 2) << 192\n\t" + "# a[0]-a[2] << 32\n\t" + "lsl x10, x10, 32\n\t" + "add x7, x11, x8\n\t" + "eor x10, x10, x9, lsr #32\n\t" + "lsl x9, x9, 32\n\t" + "add x7, x7, x8\n\t" + "eor x9, x9, x8, lsr #32\n\t" + "# + a[0]-a[2] << 32 << 64\n\t" + "# - a[0] << 32 << 192\n\t" + "adds x5, x5, x8, lsl #32\n\t" + "sub x7, x7, x8, lsl #32\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, x10\n\t" + "# a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu\n\t" + "# a += mu << 256\n\t" + "adds x12, x12, x4\n\t" + "adcs x13, x13, x5\n\t" + "adcs x14, x14, x6\n\t" + "adcs x15, x15, x7\n\t" + "cset x8, cs\n\t" + "# a += mu << 192\n\t" + "# mu <<= 32\n\t" + "# a += (mu << 32) << 64\n\t" + "adds x11, x11, x4\n\t" + "adcs x12, x12, x5\n\t" + "adcs x13, x13, x6\n\t" + "lsr x16, x7, 32\n\t" + "adcs x14, x14, x7\n\t" + "lsl x7, x7, 32\n\t" + "adcs x15, x15, xzr\n\t" + "eor x7, x7, x6, lsr #32\n\t" + "adc x8, x8, xzr\n\t" + "lsl x6, x6, 32\n\t" + "eor x6, x6, x5, lsr #32\n\t" + "adds x11, x11, x6\n\t" + "lsl x5, x5, 32\n\t" + "adcs x12, x12, x7\n\t" + "eor x5, x5, x4, lsr #32\n\t" + "adcs x13, x13, x16\n\t" + "lsl x4, x4, 32\n\t" + "adcs x14, x14, xzr\n\t" + "adcs x15, x15, xzr\n\t" + "adc x8, x8, xzr\n\t" + "# a -= (mu << 32) << 192\n\t" + "subs x11, x11, x4\n\t" + "sbcs x12, x12, x5\n\t" + "sbcs x13, x13, x6\n\t" + "sub x8, xzr, x8\n\t" + "sbcs x14, x14, x7\n\t" + "sub x8, x8, #1\n\t" + "sbcs x15, x15, x16\n\t" + "mov x19, 0xffffffff00000001\n\t" + "adc x8, x8, xzr\n\t" + "# mask m and sub from result if overflow\n\t" + "# m[0] = -1 & mask = mask\n\t" + "subs x12, x12, x8\n\t" + "# m[1] = 0xffffffff & mask = mask >> 32 as mask is all 1s or 0s\n\t" + "lsr x17, x8, 32\n\t" + "sbcs x13, x13, x17\n\t" + "and x19, x19, x8\n\t" + "# m[2] = 0 & mask = 0\n\t" + "sbcs x14, x14, xzr\n\t" + "stp x12, x13, [%[r], 0]\n\t" + "# m[3] = 0xffffffff00000001 & mask\n\t" + "sbc x15, x15, x19\n\t" + "stp x14, x15, [%[r], 16]\n\t" + : [a] "+r" (a), [b] "+r" (b) + : [r] "r" (r) + : "memory", "x4", "x5", "x6", "x7", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15" + ); +} + +/* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +SP_NOINLINE static void sp_256_mont_sqr_4(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + (void)m; + (void)mp; + + __asm__ __volatile__ ( + "ldp x16, x17, [%[a], 0]\n\t" + "# A[0] * A[1]\n\t" + "mul x9, x16, x17\n\t" + "ldr x19, [%[a], 16]\n\t" + "umulh x10, x16, x17\n\t" + "ldr x20, [%[a], 24]\n\t" + "# A[0] * A[2]\n\t" + "mul x4, x16, x19\n\t" + "umulh x5, x16, x19\n\t" + "adds x10, x10, x4\n\t" + "# A[0] * A[3]\n\t" + "mul x4, x16, x20\n\t" + "adc x11, xzr, x5\n\t" + "umulh x5, x16, x20\n\t" + "adds x11, x11, x4\n\t" + "# A[1] * A[2]\n\t" + "mul x4, x17, x19\n\t" + "adc x12, xzr, x5\n\t" + "umulh x5, x17, x19\n\t" + "adds x11, x11, x4\n\t" + "# A[1] * A[3]\n\t" + "mul x4, x17, x20\n\t" + "adcs x12, x12, x5\n\t" + "umulh x5, x17, x20\n\t" + "adc x13, xzr, xzr\n\t" + "adds x12, x12, x4\n\t" + "# A[2] * A[3]\n\t" + "mul x4, x19, x20\n\t" + "adc x13, x13, x5\n\t" + "umulh x5, x19, x20\n\t" + "adds x13, x13, x4\n\t" + "adc x14, xzr, x5\n\t" + "# Double\n\t" + "adds x9, x9, x9\n\t" + "adcs x10, x10, x10\n\t" + "adcs x11, x11, x11\n\t" + "adcs x12, x12, x12\n\t" + "adcs x13, x13, x13\n\t" + "# A[0] * A[0]\n\t" + "mul x8, x16, x16\n\t" + "adcs x14, x14, x14\n\t" + "umulh x3, x16, x16\n\t" + "cset x15, cs\n\t" + "# A[1] * A[1]\n\t" + "mul x4, x17, x17\n\t" + "adds x9, x9, x3\n\t" + "umulh x5, x17, x17\n\t" + "adcs x10, x10, x4\n\t" + "# A[2] * A[2]\n\t" + "mul x6, x19, x19\n\t" + "adcs x11, x11, x5\n\t" + "umulh x7, x19, x19\n\t" + "adcs x12, x12, x6\n\t" + "# A[3] * A[3]\n\t" + "mul x16, x20, x20\n\t" + "adcs x13, x13, x7\n\t" + "umulh x17, x20, x20\n\t" + "adcs x14, x14, x16\n\t" + "mov x3, x8\n\t" + "adc x15, x15, x17\n\t" + "# Start Reduction\n\t" + "mov x4, x9\n\t" + "mov x5, x10\n\t" + "# mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192\n\t" + "# - a[0] << 32 << 192\n\t" + "# + (a[0] * 2) << 192\n\t" + "# a[0]-a[2] << 32\n\t" + "lsl x10, x10, 32\n\t" + "add x6, x11, x8\n\t" + "eor x10, x10, x9, lsr #32\n\t" + "lsl x9, x9, 32\n\t" + "add x6, x6, x8\n\t" + "eor x9, x9, x8, lsr #32\n\t" + "# + a[0]-a[2] << 32 << 64\n\t" + "# - a[0] << 32 << 192\n\t" + "adds x4, x4, x8, lsl #32\n\t" + "sub x6, x6, x8, lsl #32\n\t" + "adcs x5, x5, x9\n\t" + "adc x6, x6, x10\n\t" + "# a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu\n\t" + "# a += mu << 256\n\t" + "adds x12, x12, x3\n\t" + "adcs x13, x13, x4\n\t" + "adcs x14, x14, x5\n\t" + "adcs x15, x15, x6\n\t" + "cset x8, cs\n\t" + "# a += mu << 192\n\t" + "# mu <<= 32\n\t" + "# a += (mu << 32) << 64\n\t" + "adds x11, x11, x3\n\t" + "adcs x12, x12, x4\n\t" + "adcs x13, x13, x5\n\t" + "lsr x7, x6, 32\n\t" + "adcs x14, x14, x6\n\t" + "lsl x6, x6, 32\n\t" + "adcs x15, x15, xzr\n\t" + "eor x6, x6, x5, lsr #32\n\t" + "adc x8, x8, xzr\n\t" + "lsl x5, x5, 32\n\t" + "eor x5, x5, x4, lsr #32\n\t" + "adds x11, x11, x5\n\t" + "lsl x4, x4, 32\n\t" + "adcs x12, x12, x6\n\t" + "eor x4, x4, x3, lsr #32\n\t" + "adcs x13, x13, x7\n\t" + "lsl x3, x3, 32\n\t" + "adcs x14, x14, xzr\n\t" + "adcs x15, x15, xzr\n\t" + "adc x8, x8, xzr\n\t" + "# a -= (mu << 32) << 192\n\t" + "subs x11, x11, x3\n\t" + "sbcs x12, x12, x4\n\t" + "sbcs x13, x13, x5\n\t" + "sub x8, xzr, x8\n\t" + "sbcs x14, x14, x6\n\t" + "sub x8, x8, #1\n\t" + "sbcs x15, x15, x7\n\t" + "mov x17, 0xffffffff00000001\n\t" + "adc x8, x8, xzr\n\t" + "# mask m and sub from result if overflow\n\t" + "# m[0] = -1 & mask = mask\n\t" + "subs x12, x12, x8\n\t" + "# m[1] = 0xffffffff & mask = mask >> 32 as mask is all 1s or 0s\n\t" + "lsr x16, x8, 32\n\t" + "sbcs x13, x13, x16\n\t" + "and x17, x17, x8\n\t" + "# m[2] = 0 & mask = 0\n\t" + "sbcs x14, x14, xzr\n\t" + "stp x12, x13, [%[r], 0]\n\t" + "# m[3] = 0xffffffff00000001 & mask\n\t" + "sbc x15, x15, x17\n\t" + "stp x14, x15, [%[r], 16]\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20" + ); +} + +#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY) +/* Square the Montgomery form number a number of times. (r = a ^ n mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * n Number of times to square. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_256_mont_sqr_n_4(sp_digit* r, const sp_digit* a, int n, + const sp_digit* m, sp_digit mp) +{ + sp_256_mont_sqr_4(r, a, m, mp); + for (; n > 1; n--) { + sp_256_mont_sqr_4(r, r, m, mp); + } +} + +#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */ +#ifdef WOLFSSL_SP_SMALL +/* Mod-2 for the P256 curve. */ +static const uint64_t p256_mod_minus_2[4] = { + 0xfffffffffffffffdU,0x00000000ffffffffU,0x0000000000000000U, + 0xffffffff00000001U +}; +#endif /* !WOLFSSL_SP_SMALL */ + +/* Invert the number, in Montgomery form, modulo the modulus (prime) of the + * P256 curve. (r = 1 / a mod m) + * + * r Inverse result. + * a Number to invert. + * td Temporary data. + */ +static void sp_256_mont_inv_4(sp_digit* r, const sp_digit* a, sp_digit* td) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* t = td; + int i; + + XMEMCPY(t, a, sizeof(sp_digit) * 4); + for (i=254; i>=0; i--) { + sp_256_mont_sqr_4(t, t, p256_mod, p256_mp_mod); + if (p256_mod_minus_2[i / 64] & ((sp_digit)1 << (i % 64))) + sp_256_mont_mul_4(t, t, a, p256_mod, p256_mp_mod); + } + XMEMCPY(r, t, sizeof(sp_digit) * 4); +#else + sp_digit* t1 = td; + sp_digit* t2 = td + 2 * 4; + sp_digit* t3 = td + 4 * 4; + /* 0x2 */ + sp_256_mont_sqr_4(t1, a, p256_mod, p256_mp_mod); + /* 0x3 */ + sp_256_mont_mul_4(t2, t1, a, p256_mod, p256_mp_mod); + /* 0xc */ + sp_256_mont_sqr_n_4(t1, t2, 2, p256_mod, p256_mp_mod); + /* 0xd */ + sp_256_mont_mul_4(t3, t1, a, p256_mod, p256_mp_mod); + /* 0xf */ + sp_256_mont_mul_4(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xf0 */ + sp_256_mont_sqr_n_4(t1, t2, 4, p256_mod, p256_mp_mod); + /* 0xfd */ + sp_256_mont_mul_4(t3, t3, t1, p256_mod, p256_mp_mod); + /* 0xff */ + sp_256_mont_mul_4(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xff00 */ + sp_256_mont_sqr_n_4(t1, t2, 8, p256_mod, p256_mp_mod); + /* 0xfffd */ + sp_256_mont_mul_4(t3, t3, t1, p256_mod, p256_mp_mod); + /* 0xffff */ + sp_256_mont_mul_4(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xffff0000 */ + sp_256_mont_sqr_n_4(t1, t2, 16, p256_mod, p256_mp_mod); + /* 0xfffffffd */ + sp_256_mont_mul_4(t3, t3, t1, p256_mod, p256_mp_mod); + /* 0xffffffff */ + sp_256_mont_mul_4(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xffffffff00000000 */ + sp_256_mont_sqr_n_4(t1, t2, 32, p256_mod, p256_mp_mod); + /* 0xffffffffffffffff */ + sp_256_mont_mul_4(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xffffffff00000001 */ + sp_256_mont_mul_4(r, t1, a, p256_mod, p256_mp_mod); + /* 0xffffffff000000010000000000000000000000000000000000000000 */ + sp_256_mont_sqr_n_4(r, r, 160, p256_mod, p256_mp_mod); + /* 0xffffffff00000001000000000000000000000000ffffffffffffffff */ + sp_256_mont_mul_4(r, r, t2, p256_mod, p256_mp_mod); + /* 0xffffffff00000001000000000000000000000000ffffffffffffffff00000000 */ + sp_256_mont_sqr_n_4(r, r, 32, p256_mod, p256_mp_mod); + /* 0xffffffff00000001000000000000000000000000fffffffffffffffffffffffd */ + sp_256_mont_mul_4(r, r, t3, p256_mod, p256_mp_mod); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static int64_t sp_256_cmp_4(const sp_digit* a, const sp_digit* b) +{ +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov x2, -1\n\t" + "mov x3, 1\n\t" + "mov x4, -1\n\t" + "mov x5, 24\n\t" + "1:\n\t" + "ldr x6, [%[a], x5]\n\t" + "ldr x7, [%[b], x5]\n\t" + "and x6, x6, x4\n\t" + "and x7, x7, x4\n\t" + "subs x6, x6, x7\n\t" + "csel x2, x3, x2, hi\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "subs x5, x5, #8\n\t" + "b.cs 1b\n\t" + "eor %[a], x2, x4\n\t" + : [a] "+r" (a) + : [b] "r" (b) + : "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12" + ); +#else + __asm__ __volatile__ ( + "mov x2, -1\n\t" + "mov x3, 1\n\t" + "mov x4, -1\n\t" + "ldp x5, x6, [%[a], 0]\n\t" + "ldp x7, x8, [%[a], 16]\n\t" + "ldp x9, x10, [%[b], 0]\n\t" + "ldp x11, x12, [%[b], 16]\n\t" + "and x8, x8, x4\n\t" + "and x12, x12, x4\n\t" + "subs x8, x8, x12\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x7, x7, x4\n\t" + "and x11, x11, x4\n\t" + "subs x7, x7, x11\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x6, x6, x4\n\t" + "and x10, x10, x4\n\t" + "subs x6, x6, x10\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x9, x9, x4\n\t" + "subs x5, x5, x9\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "eor %[a], x2, x4\n\t" + : [a] "+r" (a) + : [b] "r" (b) + : "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12" + ); +#endif + + return (int64_t)a; +} + +/* Normalize the values in each word to 64. + * + * a Array of sp_digit to normalize. + */ +#define sp_256_norm_4(a) + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_256_cond_sub_4(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + __asm__ __volatile__ ( + + "ldp x5, x7, [%[b], 0]\n\t" + "ldp x11, x12, [%[b], 16]\n\t" + "ldp x4, x6, [%[a], 0]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 16]\n\t" + "and x7, x7, %[m]\n\t" + "subs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 0]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 16]\n\t" + "csetm %[r], cc\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + ); + + return (sp_digit)r; +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_256_sub_4(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[a], 0]\n\t" + "ldp x7, x8, [%[b], 0]\n\t" + "subs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 16]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "csetm %[r], cc\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + ); + + return (sp_digit)r; +} + +#define sp_256_mont_reduce_order_4 sp_256_mont_reduce_4 + +/* Reduce the number back to 256 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_256_mont_reduce_4(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + __asm__ __volatile__ ( + "ldp x9, x10, [%[a], 0]\n\t" + "ldp x11, x12, [%[a], 16]\n\t" + "ldp x17, x19, [%[m], 0]\n\t" + "ldp x20, x21, [%[m], 16]\n\t" + "mov x8, xzr\n\t" + "# mu = a[0] * mp\n\t" + "mul x5, %[mp], x9\n\t" + "ldr x13, [%[a], 32]\n\t" + "# a[0+0] += m[0] * mu\n\t" + "mul x3, x17, x5\n\t" + "ldr x14, [%[a], 40]\n\t" + "umulh x6, x17, x5\n\t" + "ldr x15, [%[a], 48]\n\t" + "adds x9, x9, x3\n\t" + "ldr x16, [%[a], 56]\n\t" + "adc x6, x6, xzr\n\t" + "# a[0+1] += m[1] * mu\n\t" + "mul x3, x19, x5\n\t" + "umulh x7, x19, x5\n\t" + "adds x3, x3, x6\n\t" + "adc x7, x7, xzr\n\t" + "adds x10, x10, x3\n\t" + "adc x7, x7, xzr\n\t" + "# a[0+2] += m[2] * mu\n\t" + "mul x3, x20, x5\n\t" + "umulh x6, x20, x5\n\t" + "adds x3, x3, x7\n\t" + "adc x6, x6, xzr\n\t" + "adds x11, x11, x3\n\t" + "adc x6, x6, xzr\n\t" + "# a[0+3] += m[3] * mu\n\t" + "mul x3, x21, x5\n\t" + "umulh x4, x21, x5\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x8\n\t" + "cset x8, cs\n\t" + "adds x12, x12, x3\n\t" + "adcs x13, x13, x4\n\t" + "adc x8, x8, xzr\n\t" + "# mu = a[1] * mp\n\t" + "mul x5, %[mp], x10\n\t" + "# a[1+0] += m[0] * mu\n\t" + "mul x3, x17, x5\n\t" + "umulh x6, x17, x5\n\t" + "adds x10, x10, x3\n\t" + "adc x6, x6, xzr\n\t" + "# a[1+1] += m[1] * mu\n\t" + "mul x3, x19, x5\n\t" + "umulh x7, x19, x5\n\t" + "adds x3, x3, x6\n\t" + "adc x7, x7, xzr\n\t" + "adds x11, x11, x3\n\t" + "adc x7, x7, xzr\n\t" + "# a[1+2] += m[2] * mu\n\t" + "mul x3, x20, x5\n\t" + "umulh x6, x20, x5\n\t" + "adds x3, x3, x7\n\t" + "adc x6, x6, xzr\n\t" + "adds x12, x12, x3\n\t" + "adc x6, x6, xzr\n\t" + "# a[1+3] += m[3] * mu\n\t" + "mul x3, x21, x5\n\t" + "umulh x4, x21, x5\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x8\n\t" + "cset x8, cs\n\t" + "adds x13, x13, x3\n\t" + "adcs x14, x14, x4\n\t" + "adc x8, x8, xzr\n\t" + "# mu = a[2] * mp\n\t" + "mul x5, %[mp], x11\n\t" + "# a[2+0] += m[0] * mu\n\t" + "mul x3, x17, x5\n\t" + "umulh x6, x17, x5\n\t" + "adds x11, x11, x3\n\t" + "adc x6, x6, xzr\n\t" + "# a[2+1] += m[1] * mu\n\t" + "mul x3, x19, x5\n\t" + "umulh x7, x19, x5\n\t" + "adds x3, x3, x6\n\t" + "adc x7, x7, xzr\n\t" + "adds x12, x12, x3\n\t" + "adc x7, x7, xzr\n\t" + "# a[2+2] += m[2] * mu\n\t" + "mul x3, x20, x5\n\t" + "umulh x6, x20, x5\n\t" + "adds x3, x3, x7\n\t" + "adc x6, x6, xzr\n\t" + "adds x13, x13, x3\n\t" + "adc x6, x6, xzr\n\t" + "# a[2+3] += m[3] * mu\n\t" + "mul x3, x21, x5\n\t" + "umulh x4, x21, x5\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x8\n\t" + "cset x8, cs\n\t" + "adds x14, x14, x3\n\t" + "adcs x15, x15, x4\n\t" + "adc x8, x8, xzr\n\t" + "# mu = a[3] * mp\n\t" + "mul x5, %[mp], x12\n\t" + "# a[3+0] += m[0] * mu\n\t" + "mul x3, x17, x5\n\t" + "umulh x6, x17, x5\n\t" + "adds x12, x12, x3\n\t" + "adc x6, x6, xzr\n\t" + "# a[3+1] += m[1] * mu\n\t" + "mul x3, x19, x5\n\t" + "umulh x7, x19, x5\n\t" + "adds x3, x3, x6\n\t" + "adc x7, x7, xzr\n\t" + "adds x13, x13, x3\n\t" + "adc x7, x7, xzr\n\t" + "# a[3+2] += m[2] * mu\n\t" + "mul x3, x20, x5\n\t" + "umulh x6, x20, x5\n\t" + "adds x3, x3, x7\n\t" + "adc x6, x6, xzr\n\t" + "adds x14, x14, x3\n\t" + "adc x6, x6, xzr\n\t" + "# a[3+3] += m[3] * mu\n\t" + "mul x3, x21, x5\n\t" + "umulh x4, x21, x5\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x8\n\t" + "cset x8, cs\n\t" + "adds x15, x15, x3\n\t" + "adcs x16, x16, x4\n\t" + "adc x8, x8, xzr\n\t" + "sub x3, xzr, x8\n\t" + "and x17, x17, x3\n\t" + "and x19, x19, x3\n\t" + "and x20, x20, x3\n\t" + "and x21, x21, x3\n\t" + "subs x13, x13, x17\n\t" + "sbcs x14, x14, x19\n\t" + "sbcs x15, x15, x20\n\t" + "stp x13, x14, [%[a], 0]\n\t" + "sbc x16, x16, x21\n\t" + "stp x15, x16, [%[a], 16]\n\t" + : + : [a] "r" (a), [m] "r" (m), [mp] "r" (mp) + : "memory", "x3", "x4", "x5", "x8", "x6", "x7", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21" + ); +} + +/* Map the Montgomery form projective coordinate point to an affine point. + * + * r Resulting affine coordinate point. + * p Montgomery form projective coordinate point. + * t Temporary ordinate data. + */ +static void sp_256_map_4(sp_point_256* r, const sp_point_256* p, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*4; + int64_t n; + + sp_256_mont_inv_4(t1, p->z, t + 2*4); + + sp_256_mont_sqr_4(t2, t1, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t1, t2, t1, p256_mod, p256_mp_mod); + + /* x /= z^2 */ + sp_256_mont_mul_4(r->x, p->x, t2, p256_mod, p256_mp_mod); + XMEMSET(r->x + 4, 0, sizeof(r->x) / 2U); + sp_256_mont_reduce_4(r->x, p256_mod, p256_mp_mod); + /* Reduce x to less than modulus */ + n = sp_256_cmp_4(r->x, p256_mod); + sp_256_cond_sub_4(r->x, r->x, p256_mod, 0 - ((n >= 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_256_norm_4(r->x); + + /* y /= z^3 */ + sp_256_mont_mul_4(r->y, p->y, t1, p256_mod, p256_mp_mod); + XMEMSET(r->y + 4, 0, sizeof(r->y) / 2U); + sp_256_mont_reduce_4(r->y, p256_mod, p256_mp_mod); + /* Reduce y to less than modulus */ + n = sp_256_cmp_4(r->y, p256_mod); + sp_256_cond_sub_4(r->y, r->y, p256_mod, 0 - ((n >= 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_256_norm_4(r->y); + + XMEMSET(r->z, 0, sizeof(r->z)); + r->z[0] = 1; + +} + +/* Add two Montgomery form numbers (r = a + b % m). + * + * r Result of addition. + * a First number to add in Montogmery form. + * b Second number to add in Montogmery form. + * m Modulus (prime). + */ +static void sp_256_mont_add_4(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m) +{ + __asm__ __volatile__ ( + "ldp x4, x5, [%[a], 0]\n\t" + "ldp x8, x9, [%[b], 0]\n\t" + "adds x4, x4, x8\n\t" + "ldp x6, x7, [%[a], 16]\n\t" + "adcs x5, x5, x9\n\t" + "ldp x10, x11, [%[b], 16]\n\t" + "adcs x6, x6, x10\n\t" + "adcs x7, x7, x11\n\t" + "mov x13, 0xffffffff00000001\n\t" + "csetm x14, cs\n\t" + "subs x4, x4, x14\n\t" + "lsr x12, x14, 32\n\t" + "sbcs x5, x5, x12\n\t" + "and x13, x13, x14\n\t" + "sbcs x6, x6, xzr\n\t" + "stp x4, x5, [%[r],0]\n\t" + "sbc x7, x7, x13\n\t" + "stp x6, x7, [%[r],16]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14" + ); +} + +/* Double a Montgomery form number (r = a + a % m). + * + * r Result of doubling. + * a Number to double in Montogmery form. + * m Modulus (prime). + */ +static void sp_256_mont_dbl_4(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[a]]\n\t" + "ldp x5, x6, [%[a],16]\n\t" + "adds x3, x3, x3\n\t" + "adcs x4, x4, x4\n\t" + "adcs x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "mov x8, 0xffffffff00000001\n\t" + "csetm x9, cs\n\t" + "subs x3, x3, x9\n\t" + "lsr x7, x9, 32\n\t" + "sbcs x4, x4, x7\n\t" + "and x8, x8, x9\n\t" + "sbcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r],0]\n\t" + "sbc x6, x6, x8\n\t" + "stp x5, x6, [%[r],16]\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + ); + + (void)m; +} + +/* Triple a Montgomery form number (r = a + a + a % m). + * + * r Result of Tripling. + * a Number to triple in Montogmery form. + * m Modulus (prime). + */ +static void sp_256_mont_tpl_4(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + __asm__ __volatile__ ( + "ldp x10, x11, [%[a]]\n\t" + "adds x3, x10, x10\n\t" + "ldr x12, [%[a], 16]\n\t" + "adcs x4, x11, x11\n\t" + "ldr x13, [%[a], 24]\n\t" + "adcs x5, x12, x12\n\t" + "adcs x6, x13, x13\n\t" + "mov x8, 0xffffffff00000001\n\t" + "csetm x9, cs\n\t" + "subs x3, x3, x9\n\t" + "lsr x7, x9, 32\n\t" + "sbcs x4, x4, x7\n\t" + "and x8, x8, x9\n\t" + "sbcs x5, x5, xzr\n\t" + "sbc x6, x6, x8\n\t" + "adds x3, x3, x10\n\t" + "adcs x4, x4, x11\n\t" + "adcs x5, x5, x12\n\t" + "adcs x6, x6, x13\n\t" + "mov x8, 0xffffffff00000001\n\t" + "csetm x9, cs\n\t" + "subs x3, x3, x9\n\t" + "lsr x7, x9, 32\n\t" + "sbcs x4, x4, x7\n\t" + "and x8, x8, x9\n\t" + "sbcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "sbc x6, x6, x8\n\t" + "stp x5, x6, [%[r], 16]\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "x10", "x11", "x12", "x13", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + ); + + (void)m; +} + +/* Subtract two Montgomery form numbers (r = a - b % m). + * + * r Result of subtration. + * a Number to subtract from in Montogmery form. + * b Number to subtract with in Montogmery form. + * m Modulus (prime). + */ +static void sp_256_mont_sub_4(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m) +{ + __asm__ __volatile__ ( + "ldp x4, x5, [%[a], 0]\n\t" + "ldp x8, x9, [%[b], 0]\n\t" + "subs x4, x4, x8\n\t" + "ldp x6, x7, [%[a], 16]\n\t" + "sbcs x5, x5, x9\n\t" + "ldp x10, x11, [%[b], 16]\n\t" + "sbcs x6, x6, x10\n\t" + "sbcs x7, x7, x11\n\t" + "mov x13, 0xffffffff00000001\n\t" + "csetm x14, cc\n\t" + "adds x4, x4, x14\n\t" + "lsr x12, x14, 32\n\t" + "adcs x5, x5, x12\n\t" + "and x13, x13, x14\n\t" + "adcs x6, x6, xzr\n\t" + "stp x4, x5, [%[r],0]\n\t" + "adc x7, x7, x13\n\t" + "stp x6, x7, [%[r],16]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14" + ); +} + +/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) + * + * r Result of division by 2. + * a Number to divide. + * m Modulus (prime). + */ +static void sp_256_div2_4(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[a], 0]\n\t" + "and x9, x3, 1\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "sub x10, xzr, x9\n\t" + "lsr x7, x10, 32\n\t" + "adds x3, x3, x10\n\t" + "and x8, x10, 0xffffffff00000001\n\t" + "adcs x4, x4, x7\n\t" + "lsr x3, x3, 1\n\t" + "adcs x5, x5, xzr\n\t" + "lsr x7, x4, 1\n\t" + "adcs x6, x6, x8\n\t" + "lsr x8, x5, 1\n\t" + "cset x9, cs\n\t" + "lsr x10, x6, 1\n\t" + "orr x3, x3, x4, lsl 63\n\t" + "orr x4, x7, x5, lsl 63\n\t" + "orr x5, x8, x6, lsl 63\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "orr x6, x10, x9, lsl 63\n\t" + "stp x5, x6, [%[r], 16]\n\t" + : + : [r] "r" (r), [a] "r" (a), [m] "r" (m) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + ); + +} + +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static void sp_256_proj_point_dbl_4(sp_point_256* r, const sp_point_256* p, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*4; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_256_mont_sqr_4(t1, p->z, p256_mod, p256_mp_mod); + /* Z = Y * Z */ + sp_256_mont_mul_4(z, p->y, p->z, p256_mod, p256_mp_mod); + /* Z = 2Z */ + sp_256_mont_dbl_4(z, z, p256_mod); + /* T2 = X - T1 */ + sp_256_mont_sub_4(t2, p->x, t1, p256_mod); + /* T1 = X + T1 */ + sp_256_mont_add_4(t1, p->x, t1, p256_mod); + /* T2 = T1 * T2 */ + sp_256_mont_mul_4(t2, t1, t2, p256_mod, p256_mp_mod); + /* T1 = 3T2 */ + sp_256_mont_tpl_4(t1, t2, p256_mod); + /* Y = 2Y */ + sp_256_mont_dbl_4(y, p->y, p256_mod); + /* Y = Y * Y */ + sp_256_mont_sqr_4(y, y, p256_mod, p256_mp_mod); + /* T2 = Y * Y */ + sp_256_mont_sqr_4(t2, y, p256_mod, p256_mp_mod); + /* T2 = T2/2 */ + sp_256_div2_4(t2, t2, p256_mod); + /* Y = Y * X */ + sp_256_mont_mul_4(y, y, p->x, p256_mod, p256_mp_mod); + /* X = T1 * T1 */ + sp_256_mont_sqr_4(x, t1, p256_mod, p256_mp_mod); + /* X = X - Y */ + sp_256_mont_sub_4(x, x, y, p256_mod); + /* X = X - Y */ + sp_256_mont_sub_4(x, x, y, p256_mod); + /* Y = Y - X */ + sp_256_mont_sub_4(y, y, x, p256_mod); + /* Y = Y * T1 */ + sp_256_mont_mul_4(y, y, t1, p256_mod, p256_mp_mod); + /* Y = Y - T2 */ + sp_256_mont_sub_4(y, y, t2, p256_mod); +} + +/* Subtract two Montgomery form numbers (r = a - b % m). + * + * r Result of subtration. + * a Number to subtract from in Montogmery form. + * b Number to subtract with in Montogmery form. + * m Modulus (prime). + */ +static void sp_256_mont_sub_dbl_4(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m) +{ + __asm__ __volatile__ ( + "ldp x8, x9, [%[b]]\n\t" + "ldp x10, x11, [%[b],16]\n\t" + "adds x8, x8, x8\n\t" + "ldp x4, x5, [%[a]]\n\t" + "adcs x9, x9, x9\n\t" + "ldp x6, x7, [%[a],16]\n\t" + "adcs x10, x10, x10\n\t" + "adcs x11, x11, x11\n\t" + "mov x13, 0xffffffff00000001\n\t" + "csetm x14, cs\n\t" + "subs x8, x8, x14\n\t" + "lsr x12, x14, 32\n\t" + "sbcs x9, x9, x12\n\t" + "and x13, x13, x14\n\t" + "sbcs x10, x10, xzr\n\t" + "sbc x11, x11, x13\n\t" + "subs x4, x4, x8\n\t" + "sbcs x5, x5, x9\n\t" + "sbcs x6, x6, x10\n\t" + "sbcs x7, x7, x11\n\t" + "mov x13, 0xffffffff00000001\n\t" + "csetm x14, cc\n\t" + "adds x4, x4, x14\n\t" + "lsr x12, x14, 32\n\t" + "adcs x5, x5, x12\n\t" + "and x13, x13, x14\n\t" + "adcs x6, x6, xzr\n\t" + "stp x4, x5, [%[r],0]\n\t" + "adc x7, x7, x13\n\t" + "stp x6, x7, [%[r],16]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14" + ); +} + +/* Subtract two Montgomery form numbers (r = a - b % m). + * + * r Result of subtration. + * a Number to subtract from in Montogmery form. + * b Number to subtract with in Montogmery form. + * m Modulus (prime). + */ +static void sp_256_mont_dbl_sub_4(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m) +{ + __asm__ __volatile__ ( + "ldp x4, x5, [%[a]]\n\t" + "ldp x6, x7, [%[a],16]\n\t" + "adds x4, x4, x4\n\t" + "ldp x8, x9, [%[b]]\n\t" + "adcs x5, x5, x5\n\t" + "ldp x10, x11, [%[b],16]\n\t" + "adcs x6, x6, x6\n\t" + "adcs x7, x7, x7\n\t" + "mov x13, 0xffffffff00000001\n\t" + "csetm x14, cs\n\t" + "subs x4, x4, x14\n\t" + "lsr x12, x14, 32\n\t" + "sbcs x5, x5, x12\n\t" + "and x13, x13, x14\n\t" + "sbcs x6, x6, xzr\n\t" + "sbc x7, x7, x13\n\t" + "subs x4, x4, x8\n\t" + "sbcs x5, x5, x9\n\t" + "sbcs x6, x6, x10\n\t" + "sbcs x7, x7, x11\n\t" + "mov x13, 0xffffffff00000001\n\t" + "csetm x14, cc\n\t" + "adds x4, x4, x14\n\t" + "lsr x12, x14, 32\n\t" + "adcs x5, x5, x12\n\t" + "and x13, x13, x14\n\t" + "adcs x6, x6, xzr\n\t" + "stp x4, x5, [%[r],0]\n\t" + "adc x7, x7, x13\n\t" + "stp x6, x7, [%[r],16]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14" + ); +} + +/* Double the Montgomery form projective point p a number of times. + * + * r Result of repeated doubling of point. + * p Point to double. + * n Number of times to double + * t Temporary ordinate data. + */ +static void sp_256_proj_point_dbl_n_4(sp_point_256* p, int n, sp_digit* t) +{ + sp_digit* w = t; + sp_digit* a = t + 2*4; + sp_digit* b = t + 4*4; + sp_digit* t1 = t + 6*4; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = p->x; + y = p->y; + z = p->z; + + /* Y = 2*Y */ + sp_256_mont_dbl_4(y, y, p256_mod); + /* W = Z^4 */ + sp_256_mont_sqr_4(w, z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_4(w, w, p256_mod, p256_mp_mod); + +#ifndef WOLFSSL_SP_SMALL + while (--n > 0) +#else + while (--n >= 0) +#endif + { + /* A = 3*(X^2 - W) */ + sp_256_mont_sqr_4(t1, x, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(t1, t1, w, p256_mod); + sp_256_mont_tpl_4(a, t1, p256_mod); + /* B = X*Y^2 */ + sp_256_mont_sqr_4(t1, y, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(b, t1, x, p256_mod, p256_mp_mod); + /* X = A^2 - 2B */ + sp_256_mont_sqr_4(x, a, p256_mod, p256_mp_mod); + sp_256_mont_sub_dbl_4(x, x, b, p256_mod); + /* Z = Z*Y */ + sp_256_mont_mul_4(z, z, y, p256_mod, p256_mp_mod); + /* t2 = Y^4 */ + sp_256_mont_sqr_4(t1, t1, p256_mod, p256_mp_mod); +#ifdef WOLFSSL_SP_SMALL + if (n != 0) +#endif + { + /* W = W*Y^4 */ + sp_256_mont_mul_4(w, w, t1, p256_mod, p256_mp_mod); + } + /* y = 2*A*(B - X) - Y^4 */ + sp_256_mont_sub_4(y, b, x, p256_mod); + sp_256_mont_mul_4(y, y, a, p256_mod, p256_mp_mod); + sp_256_mont_dbl_sub_4(y, y, t1, p256_mod); + } +#ifndef WOLFSSL_SP_SMALL + /* A = 3*(X^2 - W) */ + sp_256_mont_sqr_4(t1, x, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(t1, t1, w, p256_mod); + sp_256_mont_tpl_4(a, t1, p256_mod); + /* B = X*Y^2 */ + sp_256_mont_sqr_4(t1, y, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(b, t1, x, p256_mod, p256_mp_mod); + /* X = A^2 - 2B */ + sp_256_mont_sqr_4(x, a, p256_mod, p256_mp_mod); + sp_256_mont_sub_dbl_4(x, x, b, p256_mod); + /* Z = Z*Y */ + sp_256_mont_mul_4(z, z, y, p256_mod, p256_mp_mod); + /* t2 = Y^4 */ + sp_256_mont_sqr_4(t1, t1, p256_mod, p256_mp_mod); + /* y = 2*A*(B - X) - Y^4 */ + sp_256_mont_sub_4(y, b, x, p256_mod); + sp_256_mont_mul_4(y, y, a, p256_mod, p256_mp_mod); + sp_256_mont_dbl_sub_4(y, y, t1, p256_mod); +#endif + /* Y = Y/2 */ + sp_256_div2_4(y, y, p256_mod); +} + +/* Compare two numbers to determine if they are equal. + * Constant time implementation. + * + * a First number to compare. + * b Second number to compare. + * returns 1 when equal and 0 otherwise. + */ +static int sp_256_cmp_equal_4(const sp_digit* a, const sp_digit* b) +{ + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3])) == 0; +} + +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_256_proj_point_add_4(sp_point_256* r, const sp_point_256* p, const sp_point_256* q, + sp_digit* t) +{ + const sp_point_256* ap[2]; + sp_point_256* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*4; + sp_digit* t3 = t + 4*4; + sp_digit* t4 = t + 6*4; + sp_digit* t5 = t + 8*4; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* Ensure only the first point is the same as the result. */ + if (q == r) { + const sp_point_256* a = p; + p = q; + q = a; + } + + /* Check double */ + (void)sp_256_sub_4(t1, p256_mod, q->y); + sp_256_norm_4(t1); + if ((sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) & + (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, t1))) != 0) { + sp_256_proj_point_dbl_4(r, p, t); + } + else { + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point_256)); + x = rp[p->infinity | q->infinity]->x; + y = rp[p->infinity | q->infinity]->y; + z = rp[p->infinity | q->infinity]->z; + + ap[0] = p; + ap[1] = q; + for (i=0; i<4; i++) { + r->x[i] = ap[p->infinity]->x[i]; + } + for (i=0; i<4; i++) { + r->y[i] = ap[p->infinity]->y[i]; + } + for (i=0; i<4; i++) { + r->z[i] = ap[p->infinity]->z[i]; + } + r->infinity = ap[p->infinity]->infinity; + + /* U1 = X1*Z2^2 */ + sp_256_mont_sqr_4(t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t3, t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t1, t1, x, p256_mod, p256_mp_mod); + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_4(t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t4, t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_4(t3, t3, y, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_4(t4, t4, q->y, p256_mod, p256_mp_mod); + /* H = U2 - U1 */ + sp_256_mont_sub_4(t2, t2, t1, p256_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_4(t4, t4, t3, p256_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_4(z, z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(z, z, t2, p256_mod, p256_mp_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_4(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sqr_4(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t5, t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(x, x, t5, p256_mod); + sp_256_mont_dbl_4(t1, y, p256_mod); + sp_256_mont_sub_4(x, x, t1, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_4(y, y, x, p256_mod); + sp_256_mont_mul_4(y, y, t4, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t5, t5, t3, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(y, y, t5, p256_mod); + } +} + +/* Double the Montgomery form projective point p a number of times. + * + * r Result of repeated doubling of point. + * p Point to double. + * n Number of times to double + * t Temporary ordinate data. + */ +static void sp_256_proj_point_dbl_n_store_4(sp_point_256* r, const sp_point_256* p, + int n, int m, sp_digit* t) +{ + sp_digit* w = t; + sp_digit* a = t + 2*4; + sp_digit* b = t + 4*4; + sp_digit* t1 = t + 6*4; + sp_digit* t2 = t + 8*4; + sp_digit* x = r[2*m].x; + sp_digit* y = r[(1<x[i]; + } + for (i=0; i<4; i++) { + y[i] = p->y[i]; + } + for (i=0; i<4; i++) { + z[i] = p->z[i]; + } + + /* Y = 2*Y */ + sp_256_mont_dbl_4(y, y, p256_mod); + /* W = Z^4 */ + sp_256_mont_sqr_4(w, z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_4(w, w, p256_mod, p256_mp_mod); + for (i=1; i<=n; i++) { + /* A = 3*(X^2 - W) */ + sp_256_mont_sqr_4(t1, x, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(t1, t1, w, p256_mod); + sp_256_mont_tpl_4(a, t1, p256_mod); + /* B = X*Y^2 */ + sp_256_mont_sqr_4(t2, y, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(b, t2, x, p256_mod, p256_mp_mod); + x = r[(1<x; + sp_digit* y = ra->y; + sp_digit* z = ra->z; + sp_digit* xs = rs->x; + sp_digit* ys = rs->y; + sp_digit* zs = rs->z; + + + XMEMCPY(x, p->x, sizeof(p->x) / 2); + XMEMCPY(y, p->y, sizeof(p->y) / 2); + XMEMCPY(z, p->z, sizeof(p->z) / 2); + ra->infinity = 0; + rs->infinity = 0; + + /* U1 = X1*Z2^2 */ + sp_256_mont_sqr_4(t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t3, t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t1, t1, x, p256_mod, p256_mp_mod); + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_4(t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t4, t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_4(t3, t3, y, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_4(t4, t4, q->y, p256_mod, p256_mp_mod); + /* H = U2 - U1 */ + sp_256_mont_sub_4(t2, t2, t1, p256_mod); + /* RS = S2 + S1 */ + sp_256_mont_add_4(t6, t4, t3, p256_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_4(t4, t4, t3, p256_mod); + /* Z3 = H*Z1*Z2 */ + /* ZS = H*Z1*Z2 */ + sp_256_mont_mul_4(z, z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(z, z, t2, p256_mod, p256_mp_mod); + XMEMCPY(zs, z, sizeof(p->z)/2); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + /* XS = RS^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_4(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sqr_4(xs, t6, p256_mod, p256_mp_mod); + sp_256_mont_sqr_4(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t5, t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(x, x, t5, p256_mod); + sp_256_mont_sub_4(xs, xs, t5, p256_mod); + sp_256_mont_dbl_4(t1, y, p256_mod); + sp_256_mont_sub_4(x, x, t1, p256_mod); + sp_256_mont_sub_4(xs, xs, t1, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + /* YS = -RS*(U1*H^2 - XS) - S1*H^3 */ + sp_256_mont_sub_4(ys, y, xs, p256_mod); + sp_256_mont_sub_4(y, y, x, p256_mod); + sp_256_mont_mul_4(y, y, t4, p256_mod, p256_mp_mod); + sp_256_sub_4(t6, p256_mod, t6); + sp_256_mont_mul_4(ys, ys, t6, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t5, t5, t3, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(y, y, t5, p256_mod); + sp_256_mont_sub_4(ys, ys, t5, p256_mod); +} + +/* Structure used to describe recoding of scalar multiplication. */ +typedef struct ecc_recode_256 { + /* Index into pre-computation table. */ + uint8_t i; + /* Use the negative of the point. */ + uint8_t neg; +} ecc_recode_256; + +/* The index into pre-computation table to use. */ +static const uint8_t recode_index_4_6[66] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, + 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, + 0, 1, +}; + +/* Whether to negate y-ordinate. */ +static const uint8_t recode_neg_4_6[66] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, +}; + +/* Recode the scalar for multiplication using pre-computed values and + * subtraction. + * + * k Scalar to multiply by. + * v Vector of operations to perform. + */ +static void sp_256_ecc_recode_6_4(const sp_digit* k, ecc_recode_256* v) +{ + int i, j; + uint8_t y; + int carry = 0; + int o; + sp_digit n; + + j = 0; + n = k[j]; + o = 0; + for (i=0; i<43; i++) { + y = n; + if (o + 6 < 64) { + y &= 0x3f; + n >>= 6; + o += 6; + } + else if (o + 6 == 64) { + n >>= 6; + if (++j < 4) + n = k[j]; + o = 0; + } + else if (++j < 4) { + n = k[j]; + y |= (n << (64 - o)) & 0x3f; + o -= 58; + n >>= o; + } + + y += carry; + v[i].i = recode_index_4_6[y]; + v[i].neg = recode_neg_4_6[y]; + carry = (y >> 6) + v[i].neg; + } +} + +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_win_add_sub_4(sp_point_256* r, const sp_point_256* g, + const sp_digit* k, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 td[33]; + sp_point_256 rtd, pd; + sp_digit tmpd[2 * 4 * 6]; +#endif + sp_point_256* t; + sp_point_256* rt; + sp_point_256* p = NULL; + sp_digit* tmp; + sp_digit* negy; + int i; + ecc_recode_256 v[43]; + int err; + + (void)heap; + + err = sp_256_point_new_4(heap, rtd, rt); + if (err == MP_OKAY) + err = sp_256_point_new_4(heap, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 33, heap, DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 6, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; +#else + t = td; + tmp = tmpd; +#endif + + + if (err == MP_OKAY) { + /* t[0] = {0, 0, 1} * norm */ + XMEMSET(&t[0], 0, sizeof(t[0])); + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + err = sp_256_mod_mul_norm_4(t[1].x, g->x, p256_mod); + } + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_4(t[1].y, g->y, p256_mod); + } + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_4(t[1].z, g->z, p256_mod); + } + + if (err == MP_OKAY) { + t[1].infinity = 0; + /* t[2] ... t[32] */ + sp_256_proj_point_dbl_n_store_4(t, &t[ 1], 5, 1, tmp); + sp_256_proj_point_add_4(&t[ 3], &t[ 2], &t[ 1], tmp); + sp_256_proj_point_dbl_4(&t[ 6], &t[ 3], tmp); + sp_256_proj_point_add_sub_4(&t[ 7], &t[ 5], &t[ 6], &t[ 1], tmp); + sp_256_proj_point_dbl_4(&t[10], &t[ 5], tmp); + sp_256_proj_point_add_sub_4(&t[11], &t[ 9], &t[10], &t[ 1], tmp); + sp_256_proj_point_dbl_4(&t[12], &t[ 6], tmp); + sp_256_proj_point_dbl_4(&t[14], &t[ 7], tmp); + sp_256_proj_point_add_sub_4(&t[15], &t[13], &t[14], &t[ 1], tmp); + sp_256_proj_point_dbl_4(&t[18], &t[ 9], tmp); + sp_256_proj_point_add_sub_4(&t[19], &t[17], &t[18], &t[ 1], tmp); + sp_256_proj_point_dbl_4(&t[20], &t[10], tmp); + sp_256_proj_point_dbl_4(&t[22], &t[11], tmp); + sp_256_proj_point_add_sub_4(&t[23], &t[21], &t[22], &t[ 1], tmp); + sp_256_proj_point_dbl_4(&t[24], &t[12], tmp); + sp_256_proj_point_dbl_4(&t[26], &t[13], tmp); + sp_256_proj_point_add_sub_4(&t[27], &t[25], &t[26], &t[ 1], tmp); + sp_256_proj_point_dbl_4(&t[28], &t[14], tmp); + sp_256_proj_point_dbl_4(&t[30], &t[15], tmp); + sp_256_proj_point_add_sub_4(&t[31], &t[29], &t[30], &t[ 1], tmp); + + negy = t[0].y; + + sp_256_ecc_recode_6_4(k, v); + + i = 42; + XMEMCPY(rt, &t[v[i].i], sizeof(sp_point_256)); + for (--i; i>=0; i--) { + sp_256_proj_point_dbl_n_4(rt, 6, tmp); + + XMEMCPY(p, &t[v[i].i], sizeof(sp_point_256)); + sp_256_sub_4(negy, p256_mod, p->y); + sp_256_cond_copy_4(p->y, negy, (sp_digit)0 - v[i].neg); + sp_256_proj_point_add_4(rt, rt, p, tmp); + } + + if (map != 0) { + sp_256_map_4(r, rt, tmp); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_256)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) + XFREE(t, heap, DYNAMIC_TYPE_ECC); + if (tmp != NULL) + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); +#endif + sp_256_point_free_4(p, 0, heap); + sp_256_point_free_4(rt, 0, heap); + + return err; +} + +/* A table entry for pre-computed points. */ +typedef struct sp_table_entry_256 { + sp_digit x[4]; + sp_digit y[4]; +} sp_table_entry_256; + +#if defined(FP_ECC) || defined(WOLFSSL_SP_SMALL) +#endif /* FP_ECC || WOLFSSL_SP_SMALL */ +/* Add two Montgomery form projective points. The second point has a q value of + * one. + * Only the first point can be the same pointer as the result point. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_256_proj_point_add_qz1_4(sp_point_256* r, const sp_point_256* p, + const sp_point_256* q, sp_digit* t) +{ + const sp_point_256* ap[2]; + sp_point_256* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*4; + sp_digit* t3 = t + 4*4; + sp_digit* t4 = t + 6*4; + sp_digit* t5 = t + 8*4; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* Check double */ + (void)sp_256_sub_4(t1, p256_mod, q->y); + sp_256_norm_4(t1); + if ((sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) & + (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, t1))) != 0) { + sp_256_proj_point_dbl_4(r, p, t); + } + else { + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point_256)); + x = rp[p->infinity | q->infinity]->x; + y = rp[p->infinity | q->infinity]->y; + z = rp[p->infinity | q->infinity]->z; + + ap[0] = p; + ap[1] = q; + for (i=0; i<4; i++) { + r->x[i] = ap[p->infinity]->x[i]; + } + for (i=0; i<4; i++) { + r->y[i] = ap[p->infinity]->y[i]; + } + for (i=0; i<4; i++) { + r->z[i] = ap[p->infinity]->z[i]; + } + r->infinity = ap[p->infinity]->infinity; + + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_4(t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t4, t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_4(t4, t4, q->y, p256_mod, p256_mp_mod); + /* H = U2 - X1 */ + sp_256_mont_sub_4(t2, t2, x, p256_mod); + /* R = S2 - Y1 */ + sp_256_mont_sub_4(t4, t4, y, p256_mod); + /* Z3 = H*Z1 */ + sp_256_mont_mul_4(z, z, t2, p256_mod, p256_mp_mod); + /* X3 = R^2 - H^3 - 2*X1*H^2 */ + sp_256_mont_sqr_4(t1, t4, p256_mod, p256_mp_mod); + sp_256_mont_sqr_4(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t3, x, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t5, t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(x, t1, t5, p256_mod); + sp_256_mont_dbl_4(t1, t3, p256_mod); + sp_256_mont_sub_4(x, x, t1, p256_mod); + /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */ + sp_256_mont_sub_4(t3, t3, x, p256_mod); + sp_256_mont_mul_4(t3, t3, t4, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t5, t5, y, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(y, t3, t5, p256_mod); + } +} + +#ifdef FP_ECC +/* Convert the projective point to affine. + * Ordinates are in Montgomery form. + * + * a Point to convert. + * t Temporary data. + */ +static void sp_256_proj_to_affine_4(sp_point_256* a, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2 * 4; + sp_digit* tmp = t + 4 * 4; + + sp_256_mont_inv_4(t1, a->z, tmp); + + sp_256_mont_sqr_4(t2, t1, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t1, t2, t1, p256_mod, p256_mp_mod); + + sp_256_mont_mul_4(a->x, a->x, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(a->y, a->y, t1, p256_mod, p256_mp_mod); + XMEMCPY(a->z, p256_norm_mod, sizeof(p256_norm_mod)); +} + +/* Generate the pre-computed table of points for the base point. + * + * a The base point. + * table Place to store generated point data. + * tmp Temporary data. + * heap Heap to use for allocation. + */ +static int sp_256_gen_stripe_table_4(const sp_point_256* a, + sp_table_entry_256* table, sp_digit* tmp, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 td, s1d, s2d; +#endif + sp_point_256* t; + sp_point_256* s1 = NULL; + sp_point_256* s2 = NULL; + int i, j; + int err; + + (void)heap; + + err = sp_256_point_new_4(heap, td, t); + if (err == MP_OKAY) { + err = sp_256_point_new_4(heap, s1d, s1); + } + if (err == MP_OKAY) { + err = sp_256_point_new_4(heap, s2d, s2); + } + + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_4(t->x, a->x, p256_mod); + } + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_4(t->y, a->y, p256_mod); + } + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_4(t->z, a->z, p256_mod); + } + if (err == MP_OKAY) { + t->infinity = 0; + sp_256_proj_to_affine_4(t, tmp); + + XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod)); + s1->infinity = 0; + XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod)); + s2->infinity = 0; + + /* table[0] = {0, 0, infinity} */ + XMEMSET(&table[0], 0, sizeof(sp_table_entry_256)); + /* table[1] = Affine version of 'a' in Montgomery form */ + XMEMCPY(table[1].x, t->x, sizeof(table->x)); + XMEMCPY(table[1].y, t->y, sizeof(table->y)); + + for (i=1; i<8; i++) { + sp_256_proj_point_dbl_n_4(t, 32, tmp); + sp_256_proj_to_affine_4(t, tmp); + XMEMCPY(table[1<x, sizeof(table->x)); + XMEMCPY(table[1<y, sizeof(table->y)); + } + + for (i=1; i<8; i++) { + XMEMCPY(s1->x, table[1<x)); + XMEMCPY(s1->y, table[1<y)); + for (j=(1<x, table[j-(1<x)); + XMEMCPY(s2->y, table[j-(1<y)); + sp_256_proj_point_add_qz1_4(t, s1, s2, tmp); + sp_256_proj_to_affine_4(t, tmp); + XMEMCPY(table[j].x, t->x, sizeof(table->x)); + XMEMCPY(table[j].y, t->y, sizeof(table->y)); + } + } + } + + sp_256_point_free_4(s2, 0, heap); + sp_256_point_free_4(s1, 0, heap); + sp_256_point_free_4( t, 0, heap); + + return err; +} + +#endif /* FP_ECC */ +#if defined(FP_ECC) || defined(WOLFSSL_SP_SMALL) +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_stripe_4(sp_point_256* r, const sp_point_256* g, + const sp_table_entry_256* table, const sp_digit* k, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 rtd; + sp_point_256 pd; + sp_digit td[2 * 4 * 5]; +#endif + sp_point_256* rt; + sp_point_256* p = NULL; + sp_digit* t; + int i, j; + int y, x; + int err; + + (void)g; + (void)heap; + + + err = sp_256_point_new_4(heap, rtd, rt); + if (err == MP_OKAY) { + err = sp_256_point_new_4(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 5, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) { + err = MEMORY_E; + } +#else + t = td; +#endif + + if (err == MP_OKAY) { + XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod)); + XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod)); + + y = 0; + for (j=0,x=31; j<8; j++,x+=32) { + y |= ((k[x / 64] >> (x % 64)) & 1) << j; + } + XMEMCPY(rt->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(rt->y, table[y].y, sizeof(table[y].y)); + rt->infinity = !y; + for (i=30; i>=0; i--) { + y = 0; + for (j=0,x=i; j<8; j++,x+=32) { + y |= ((k[x / 64] >> (x % 64)) & 1) << j; + } + + sp_256_proj_point_dbl_4(rt, rt, t); + XMEMCPY(p->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(p->y, table[y].y, sizeof(table[y].y)); + p->infinity = !y; + sp_256_proj_point_add_qz1_4(rt, rt, p, t); + } + + if (map != 0) { + sp_256_map_4(r, rt, t); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_256)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_4(p, 0, heap); + sp_256_point_free_4(rt, 0, heap); + + return err; +} + +#endif /* FP_ECC || WOLFSSL_SP_SMALL */ +#ifdef FP_ECC +#ifndef FP_ENTRIES + #define FP_ENTRIES 16 +#endif + +typedef struct sp_cache_256_t { + sp_digit x[4]; + sp_digit y[4]; + sp_table_entry_256 table[256]; + uint32_t cnt; + int set; +} sp_cache_256_t; + +static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES]; +static THREAD_LS_T int sp_cache_256_last = -1; +static THREAD_LS_T int sp_cache_256_inited = 0; + +#ifndef HAVE_THREAD_LS + static volatile int initCacheMutex_256 = 0; + static wolfSSL_Mutex sp_cache_256_lock; +#endif + +static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache) +{ + int i, j; + uint32_t least; + + if (sp_cache_256_inited == 0) { + for (i=0; ix, sp_cache_256[i].x) & + sp_256_cmp_equal_4(g->y, sp_cache_256[i].y)) { + sp_cache_256[i].cnt++; + break; + } + } + + /* No match. */ + if (i == FP_ENTRIES) { + /* Find empty entry. */ + i = (sp_cache_256_last + 1) % FP_ENTRIES; + for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) { + if (!sp_cache_256[i].set) { + break; + } + } + + /* Evict least used. */ + if (i == sp_cache_256_last) { + least = sp_cache_256[0].cnt; + for (j=1; jx, sizeof(sp_cache_256[i].x)); + XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y)); + sp_cache_256[i].set = 1; + sp_cache_256[i].cnt = 1; + } + + *cache = &sp_cache_256[i]; + sp_cache_256_last = i; +} +#endif /* FP_ECC */ + +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_4(sp_point_256* r, const sp_point_256* g, const sp_digit* k, + int map, void* heap) +{ +#ifndef FP_ECC + return sp_256_ecc_mulmod_win_add_sub_4(r, g, k, map, heap); +#else + sp_digit tmp[2 * 4 * 5]; + sp_cache_256_t* cache; + int err = MP_OKAY; + +#ifndef HAVE_THREAD_LS + if (initCacheMutex_256 == 0) { + wc_InitMutex(&sp_cache_256_lock); + initCacheMutex_256 = 1; + } + if (wc_LockMutex(&sp_cache_256_lock) != 0) + err = BAD_MUTEX_E; +#endif /* HAVE_THREAD_LS */ + + if (err == MP_OKAY) { + sp_ecc_get_cache_256(g, &cache); + if (cache->cnt == 2) + sp_256_gen_stripe_table_4(g, cache->table, tmp, heap); + +#ifndef HAVE_THREAD_LS + wc_UnLockMutex(&sp_cache_256_lock); +#endif /* HAVE_THREAD_LS */ + + if (cache->cnt < 2) { + err = sp_256_ecc_mulmod_win_add_sub_4(r, g, k, map, heap); + } + else { + err = sp_256_ecc_mulmod_stripe_4(r, g, cache->table, k, + map, heap); + } + } + + return err; +#endif +} + +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * p Point to multiply. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map, + void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 p; + sp_digit kd[4]; +#endif + sp_point_256* point; + sp_digit* k = NULL; + int err = MP_OKAY; + + err = sp_256_point_new_4(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#else + k = kd; +#endif + if (err == MP_OKAY) { + sp_256_from_mp(k, 4, km); + sp_256_point_from_ecc_point_4(point, gm); + + err = sp_256_ecc_mulmod_4(point, point, k, map, heap); + } + if (err == MP_OKAY) { + err = sp_256_point_to_ecc_point_4(point, r); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_4(point, 0, heap); + + return err; +} + +#ifdef WOLFSSL_SP_SMALL +static const sp_table_entry_256 p256_table[256] = { + /* 0 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 */ + { { 0x79e730d418a9143cL,0x75ba95fc5fedb601L,0x79fb732b77622510L, + 0x18905f76a53755c6L }, + { 0xddf25357ce95560aL,0x8b4ab8e4ba19e45cL,0xd2e88688dd21f325L, + 0x8571ff1825885d85L } }, + /* 2 */ + { { 0x202886024147519aL,0xd0981eac26b372f0L,0xa9d4a7caa785ebc8L, + 0xd953c50ddbdf58e9L }, + { 0x9d6361ccfd590f8fL,0x72e9626b44e6c917L,0x7fd9611022eb64cfL, + 0x863ebb7e9eb288f3L } }, + /* 3 */ + { { 0x7856b6235cdb6485L,0x808f0ea22f0a2f97L,0x3e68d9544f7e300bL, + 0x00076055b5ff80a0L }, + { 0x7634eb9b838d2010L,0x54014fbb3243708aL,0xe0e47d39842a6606L, + 0x8308776134373ee0L } }, + /* 4 */ + { { 0x4f922fc516a0d2bbL,0x0d5cc16c1a623499L,0x9241cf3a57c62c8bL, + 0x2f5e6961fd1b667fL }, + { 0x5c15c70bf5a01797L,0x3d20b44d60956192L,0x04911b37071fdb52L, + 0xf648f9168d6f0f7bL } }, + /* 5 */ + { { 0x9e566847e137bbbcL,0xe434469e8a6a0becL,0xb1c4276179d73463L, + 0x5abe0285133d0015L }, + { 0x92aa837cc04c7dabL,0x573d9f4c43260c07L,0x0c93156278e6cc37L, + 0x94bb725b6b6f7383L } }, + /* 6 */ + { { 0xbbf9b48f720f141cL,0x6199b3cd2df5bc74L,0xdc3f6129411045c4L, + 0xcdd6bbcb2f7dc4efL }, + { 0xcca6700beaf436fdL,0x6f647f6db99326beL,0x0c0fa792014f2522L, + 0xa361bebd4bdae5f6L } }, + /* 7 */ + { { 0x28aa2558597c13c7L,0xc38d635f50b7c3e1L,0x07039aecf3c09d1dL, + 0xba12ca09c4b5292cL }, + { 0x9e408fa459f91dfdL,0x3af43b66ceea07fbL,0x1eceb0899d780b29L, + 0x53ebb99d701fef4bL } }, + /* 8 */ + { { 0x4fe7ee31b0e63d34L,0xf4600572a9e54fabL,0xc0493334d5e7b5a4L, + 0x8589fb9206d54831L }, + { 0xaa70f5cc6583553aL,0x0879094ae25649e5L,0xcc90450710044652L, + 0xebb0696d02541c4fL } }, + /* 9 */ + { { 0x4616ca15ac1647c5L,0xb8127d47c4cf5799L,0xdc666aa3764dfbacL, + 0xeb2820cbd1b27da3L }, + { 0x9406f8d86a87e008L,0xd87dfa9d922378f3L,0x56ed2e4280ccecb2L, + 0x1f28289b55a7da1dL } }, + /* 10 */ + { { 0xabbaa0c03b89da99L,0xa6f2d79eb8284022L,0x27847862b81c05e8L, + 0x337a4b5905e54d63L }, + { 0x3c67500d21f7794aL,0x207005b77d6d7f61L,0x0a5a378104cfd6e8L, + 0x0d65e0d5f4c2fbd6L } }, + /* 11 */ + { { 0xd9d09bbeb5275d38L,0x4268a7450be0a358L,0xf0762ff4973eb265L, + 0xc23da24252f4a232L }, + { 0x5da1b84f0b94520cL,0x09666763b05bd78eL,0x3a4dcb8694d29ea1L, + 0x19de3b8cc790cff1L } }, + /* 12 */ + { { 0x183a716c26c5fe04L,0x3b28de0b3bba1bdbL,0x7432c586a4cb712cL, + 0xe34dcbd491fccbfdL }, + { 0xb408d46baaa58403L,0x9a69748682e97a53L,0x9e39012736aaa8afL, + 0xe7641f447b4e0f7fL } }, + /* 13 */ + { { 0x7d753941df64ba59L,0xd33f10ec0b0242fcL,0x4f06dfc6a1581859L, + 0x4a12df57052a57bfL }, + { 0xbfa6338f9439dbd0L,0xd3c24bd4bde53e1fL,0xfd5e4ffa21f1b314L, + 0x6af5aa93bb5bea46L } }, + /* 14 */ + { { 0xda10b69910c91999L,0x0a24b4402a580491L,0x3e0094b4b8cc2090L, + 0x5fe3475a66a44013L }, + { 0xb0f8cabdf93e7b4bL,0x292b501a7c23f91aL,0x42e889aecd1e6263L, + 0xb544e308ecfea916L } }, + /* 15 */ + { { 0x6478c6e916ddfdceL,0x2c329166f89179e6L,0x4e8d6e764d4e67e1L, + 0xe0b6b2bda6b0c20bL }, + { 0x0d312df2bb7efb57L,0x1aac0dde790c4007L,0xf90336ad679bc944L, + 0x71c023de25a63774L } }, + /* 16 */ + { { 0x62a8c244bfe20925L,0x91c19ac38fdce867L,0x5a96a5d5dd387063L, + 0x61d587d421d324f6L }, + { 0xe87673a2a37173eaL,0x2384800853778b65L,0x10f8441e05bab43eL, + 0xfa11fe124621efbeL } }, + /* 17 */ + { { 0x1c891f2b2cb19ffdL,0x01ba8d5bb1923c23L,0xb6d03d678ac5ca8eL, + 0x586eb04c1f13bedcL }, + { 0x0c35c6e527e8ed09L,0x1e81a33c1819ede2L,0x278fd6c056c652faL, + 0x19d5ac0870864f11L } }, + /* 18 */ + { { 0x1e99f581309a4e1fL,0xab7de71be9270074L,0x26a5ef0befd28d20L, + 0xe7c0073f7f9c563fL }, + { 0x1f6d663a0ef59f76L,0x669b3b5420fcb050L,0xc08c1f7a7a6602d4L, + 0xe08504fec65b3c0aL } }, + /* 19 */ + { { 0xf098f68da031b3caL,0x6d1cab9ee6da6d66L,0x5bfd81fa94f246e8L, + 0x78f018825b0996b4L }, + { 0xb7eefde43a25787fL,0x8016f80d1dccac9bL,0x0cea4877b35bfc36L, + 0x43a773b87e94747aL } }, + /* 20 */ + { { 0x62577734d2b533d5L,0x673b8af6a1bdddc0L,0x577e7c9aa79ec293L, + 0xbb6de651c3b266b1L }, + { 0xe7e9303ab65259b3L,0xd6a0afd3d03a7480L,0xc5ac83d19b3cfc27L, + 0x60b4619a5d18b99bL } }, + /* 21 */ + { { 0xbd6a38e11ae5aa1cL,0xb8b7652b49e73658L,0x0b130014ee5f87edL, + 0x9d0f27b2aeebffcdL }, + { 0xca9246317a730a55L,0x9c955b2fddbbc83aL,0x07c1dfe0ac019a71L, + 0x244a566d356ec48dL } }, + /* 22 */ + { { 0x6db0394aeacf1f96L,0x9f2122a9024c271cL,0x2626ac1b82cbd3b9L, + 0x45e58c873581ef69L }, + { 0xd3ff479da38f9dbcL,0xa8aaf146e888a040L,0x945adfb246e0bed7L, + 0xc040e21cc1e4b7a4L } }, + /* 23 */ + { { 0x847af0006f8117b6L,0x651969ff73a35433L,0x482b35761d9475ebL, + 0x1cdf5c97682c6ec7L }, + { 0x7db775b411f04839L,0x7dbeacf448de1698L,0xb2921dd1b70b3219L, + 0x046755f8a92dff3dL } }, + /* 24 */ + { { 0xcc8ac5d2bce8ffcdL,0x0d53c48b2fe61a82L,0xf6f161727202d6c7L, + 0x046e5e113b83a5f3L }, + { 0xe7b8ff64d8007f01L,0x7fb1ef125af43183L,0x045c5ea635e1a03cL, + 0x6e0106c3303d005bL } }, + /* 25 */ + { { 0x48c7358488dd73b1L,0x7670708f995ed0d9L,0x38385ea8c56a2ab7L, + 0x442594ede901cf1fL }, + { 0xf8faa2c912d4b65bL,0x94c2343b96c90c37L,0xd326e4a15e978d1fL, + 0xa796fa514c2ee68eL } }, + /* 26 */ + { { 0x359fb604823addd7L,0x9e2a6183e56693b3L,0xf885b78e3cbf3c80L, + 0xe4ad2da9c69766e9L }, + { 0x357f7f428e048a61L,0x082d198cc092d9a0L,0xfc3a1af4c03ed8efL, + 0xc5e94046c37b5143L } }, + /* 27 */ + { { 0x476a538c2be75f9eL,0x6fd1a9e8cb123a78L,0xd85e4df0b109c04bL, + 0x63283dafdb464747L }, + { 0xce728cf7baf2df15L,0xe592c4550ad9a7f4L,0xfab226ade834bcc3L, + 0x68bd19ab1981a938L } }, + /* 28 */ + { { 0xc08ead511887d659L,0x3374d5f4b359305aL,0x96986981cfe74fe3L, + 0x495292f53c6fdfd6L }, + { 0x4a878c9e1acec896L,0xd964b210ec5b4484L,0x6696f7e2664d60a7L, + 0x0ec7530d26036837L } }, + /* 29 */ + { { 0x2da13a05ad2687bbL,0xa1f83b6af32e21faL,0x390f5ef51dd4607bL, + 0x0f6207a664863f0bL }, + { 0xbd67e3bb0f138233L,0xdd66b96c272aa718L,0x8ed0040726ec88aeL, + 0xff0db07208ed6dcfL } }, + /* 30 */ + { { 0x749fa1014c95d553L,0xa44052fd5d680a8aL,0x183b4317ff3b566fL, + 0x313b513c88740ea3L }, + { 0xb402e2ac08d11549L,0x071ee10bb4dee21cL,0x26b987dd47f2320eL, + 0x2d3abcf986f19f81L } }, + /* 31 */ + { { 0x4c288501815581a2L,0x9a0a6d56632211afL,0x19ba7a0f0cab2e99L, + 0xc036fa10ded98cdfL }, + { 0x29ae08bac1fbd009L,0x0b68b19006d15816L,0xc2eb32779b9e0d8fL, + 0xa6b2a2c4b6d40194L } }, + /* 32 */ + { { 0xd433e50f6d3549cfL,0x6f33696ffacd665eL,0x695bfdacce11fcb4L, + 0x810ee252af7c9860L }, + { 0x65450fe17159bb2cL,0xf7dfbebe758b357bL,0x2b057e74d69fea72L, + 0xd485717a92731745L } }, + /* 33 */ + { { 0x11741a8af0cb5a98L,0xd3da8f931f3110bfL,0x1994e2cbab382adfL, + 0x6a6045a72f9a604eL }, + { 0x170c0d3fa2b2411dL,0xbe0eb83e510e96e0L,0x3bcc9f738865b3ccL, + 0xd3e45cfaf9e15790L } }, + /* 34 */ + { { 0xce1f69bbe83f7669L,0x09f8ae8272877d6bL,0x9548ae543244278dL, + 0x207755dee3c2c19cL }, + { 0x87bd61d96fef1945L,0x18813cefb12d28c3L,0x9fbcd1d672df64aaL, + 0x48dc5ee57154b00dL } }, + /* 35 */ + { { 0x123790bff7e5a199L,0xe0efb8cf989ccbb7L,0xc27a2bfe0a519c79L, + 0xf2fb0aeddff6f445L }, + { 0x41c09575f0b5025fL,0x550543d740fa9f22L,0x8fa3c8ad380bfbd0L, + 0xa13e9015db28d525L } }, + /* 36 */ + { { 0xf9f7a350a2b65cbcL,0x0b04b9722a464226L,0x265ce241e23f07a1L, + 0x2bf0d6b01497526fL }, + { 0xd3d4dd3f4b216fb7L,0xf7d7b867fbdda26aL,0xaeb7b83f6708505cL, + 0x42a94a5a162fe89fL } }, + /* 37 */ + { { 0x5846ad0beaadf191L,0x0f8a489025a268d7L,0xe8603050494dc1f6L, + 0x2c2dd969c65ede3dL }, + { 0x6d02171d93849c17L,0x460488ba1da250ddL,0x4810c7063c3a5485L, + 0xf437fa1f42c56dbcL } }, + /* 38 */ + { { 0x6aa0d7144a0f7dabL,0x0f0497931776e9acL,0x52c0a050f5f39786L, + 0xaaf45b3354707aa8L }, + { 0x85e37c33c18d364aL,0xd40b9b063e497165L,0xf417168115ec5444L, + 0xcdf6310df4f272bcL } }, + /* 39 */ + { { 0x7473c6238ea8b7efL,0x08e9351885bc2287L,0x419567722bda8e34L, + 0xf0d008bada9e2ff2L }, + { 0x2912671d2414d3b1L,0xb3754985b019ea76L,0x5c61b96d453bcbdbL, + 0x5bd5c2f5ca887b8bL } }, + /* 40 */ + { { 0xef0f469ef49a3154L,0x3e85a5956e2b2e9aL,0x45aaec1eaa924a9cL, + 0xaa12dfc8a09e4719L }, + { 0x26f272274df69f1dL,0xe0e4c82ca2ff5e73L,0xb9d8ce73b7a9dd44L, + 0x6c036e73e48ca901L } }, + /* 41 */ + { { 0x5cfae12a0f6e3138L,0x6966ef0025ad345aL,0x8993c64b45672bc5L, + 0x292ff65896afbe24L }, + { 0xd5250d445e213402L,0xf6580e274392c9feL,0x097b397fda1c72e8L, + 0x644e0c90311b7276L } }, + /* 42 */ + { { 0xe1e421e1a47153f0L,0xb86c3b79920418c9L,0x93bdce87705d7672L, + 0xf25ae793cab79a77L }, + { 0x1f3194a36d869d0cL,0x9d55c8824986c264L,0x49fb5ea3096e945eL, + 0x39b8e65313db0a3eL } }, + /* 43 */ + { { 0x37754200b6fd2e59L,0x35e2c0669255c98fL,0xd9dab21a0e2a5739L, + 0x39122f2f0f19db06L }, + { 0xcfbce1e003cad53cL,0x225b2c0fe65c17e3L,0x72baf1d29aa13877L, + 0x8de80af8ce80ff8dL } }, + /* 44 */ + { { 0xafbea8d9207bbb76L,0x921c7e7c21782758L,0xdfa2b74b1c0436b1L, + 0x871949062e368c04L }, + { 0xb5f928bba3993df5L,0x639d75b5f3b3d26aL,0x011aa78a85b55050L, + 0xfc315e6a5b74fde1L } }, + /* 45 */ + { { 0x561fd41ae8d6ecfaL,0x5f8c44f61aec7f86L,0x98452a7b4924741dL, + 0xe6d4a7adee389088L }, + { 0x60552ed14593c75dL,0x70a70da4dd271162L,0xd2aede937ba2c7dbL, + 0x35dfaf9a9be2ae57L } }, + /* 46 */ + { { 0x6b956fcdaa736636L,0x09f51d97ae2cab7eL,0xfb10bf410f349966L, + 0x1da5c7d71c830d2bL }, + { 0x5c41e4833cce6825L,0x15ad118ff9573c3bL,0xa28552c7f23036b8L, + 0x7077c0fddbf4b9d6L } }, + /* 47 */ + { { 0xbf63ff8d46b9661cL,0xa1dfd36b0d2cfd71L,0x0373e140a847f8f7L, + 0x53a8632ee50efe44L }, + { 0x0976ff68696d8051L,0xdaec0c95c74f468aL,0x62994dc35e4e26bdL, + 0x028ca76d34e1fcc1L } }, + /* 48 */ + { { 0xd11d47dcfc9877eeL,0xc8b36210801d0002L,0xd002c11754c260b6L, + 0x04c17cd86962f046L }, + { 0x6d9bd094b0daddf5L,0xbea2357524ce55c0L,0x663356e672da03b5L, + 0xf7ba4de9fed97474L } }, + /* 49 */ + { { 0xd0dbfa34ebe1263fL,0x5576373571ae7ce6L,0xd244055382a6f523L, + 0xe31f960052131c41L }, + { 0xd1bb9216ea6b6ec6L,0x37a1d12e73c2fc44L,0xc10e7eac89d0a294L, + 0xaa3a6259ce34d47bL } }, + /* 50 */ + { { 0xfbcf9df536f3dcd3L,0x6ceded50d2bf7360L,0x491710fadf504f5bL, + 0x2398dd627e79daeeL }, + { 0xcf4705a36d09569eL,0xea0619bb5149f769L,0xff9c037735f6034cL, + 0x5717f5b21c046210L } }, + /* 51 */ + { { 0x9fe229c921dd895eL,0x8e51850040c28451L,0xfa13d2391d637ecdL, + 0x660a2c560e3c28deL }, + { 0x9cca88aed67fcbd0L,0xc84724780ea9f096L,0x32b2f48172e92b4dL, + 0x624ee54c4f522453L } }, + /* 52 */ + { { 0x09549ce4d897ecccL,0x4d49d1d93f9880aaL,0x723c2423043a7c20L, + 0x4f392afb92bdfbc0L }, + { 0x6969f8fa7de44fd9L,0xb66cfbe457b32156L,0xdb2fa803368ebc3cL, + 0x8a3e7977ccdb399cL } }, + /* 53 */ + { { 0xdde1881f06c4b125L,0xae34e300f6e3ca8cL,0xef6999de5c7a13e9L, + 0x3888d02370c24404L }, + { 0x7628035644f91081L,0x3d9fcf615f015504L,0x1827edc8632cd36eL, + 0xa5e62e4718102336L } }, + /* 54 */ + { { 0x1a825ee32facd6c8L,0x699c635454bcbc66L,0x0ce3edf798df9931L, + 0x2c4768e6466a5adcL }, + { 0xb346ff8c90a64bc9L,0x630a6020e4779f5cL,0xd949d064bc05e884L, + 0x7b5e6441f9e652a0L } }, + /* 55 */ + { { 0x2169422c1d28444aL,0xe996c5d8be136a39L,0x2387afe5fb0c7fceL, + 0xb8af73cb0c8d744aL }, + { 0x5fde83aa338b86fdL,0xfee3f158a58a5cffL,0xc9ee8f6f20ac9433L, + 0xa036395f7f3f0895L } }, + /* 56 */ + { { 0x8c73c6bba10f7770L,0xa6f16d81a12a0e24L,0x100df68251bc2b9fL, + 0x4be36b01875fb533L }, + { 0x9226086e9fb56dbbL,0x306fef8b07e7a4f8L,0xeeaccc0566d52f20L, + 0x8cbc9a871bdc00c0L } }, + /* 57 */ + { { 0xe131895cc0dac4abL,0xa874a440712ff112L,0x6332ae7c6a1cee57L, + 0x44e7553e0c0835f8L }, + { 0x6d503fff7734002dL,0x9d35cb8b0b34425cL,0x95f702760e8738b5L, + 0x470a683a5eb8fc18L } }, + /* 58 */ + { { 0x81b761dc90513482L,0x0287202a01e9276aL,0xcda441ee0ce73083L, + 0x16410690c63dc6efL }, + { 0xf5034a066d06a2edL,0xdd4d7745189b100bL,0xd914ae72ab8218c9L, + 0xd73479fd7abcbb4fL } }, + /* 59 */ + { { 0x7edefb165ad4c6e5L,0x262cf08f5b06d04dL,0x12ed5bb18575cb14L, + 0x816469e30771666bL }, + { 0xd7ab9d79561e291eL,0xeb9daf22c1de1661L,0xf49827eb135e0513L, + 0x0a36dd23f0dd3f9cL } }, + /* 60 */ + { { 0x098d32c741d5533cL,0x7c5f5a9e8684628fL,0x39a228ade349bd11L, + 0xe331dfd6fdbab118L }, + { 0x5100ab686bcc6ed8L,0x7160c3bdef7a260eL,0x9063d9a7bce850d7L, + 0xd3b4782a492e3389L } }, + /* 61 */ + { { 0xa149b6e8f3821f90L,0x92edd9ed66eb7aadL,0x0bb669531a013116L, + 0x7281275a4c86a5bdL }, + { 0x503858f7d3ff47e5L,0x5e1616bc61016441L,0x62b0f11a7dfd9bb1L, + 0x2c062e7ece145059L } }, + /* 62 */ + { { 0xa76f996f0159ac2eL,0x281e7736cbdb2713L,0x2ad6d28808e46047L, + 0x282a35f92c4e7ef1L }, + { 0x9c354b1ec0ce5cd2L,0xcf99efc91379c229L,0x992caf383e82c11eL, + 0xc71cd513554d2abdL } }, + /* 63 */ + { { 0x4885de9c09b578f4L,0x1884e258e3affa7aL,0x8f76b1b759182f1fL, + 0xc50f6740cf47f3a3L }, + { 0xa9c4adf3374b68eaL,0xa406f32369965fe2L,0x2f86a22285a53050L, + 0xb9ecb3a7212958dcL } }, + /* 64 */ + { { 0x56f8410ef4f8b16aL,0x97241afec47b266aL,0x0a406b8e6d9c87c1L, + 0x803f3e02cd42ab1bL }, + { 0x7f0309a804dbec69L,0xa83b85f73bbad05fL,0xc6097273ad8e197fL, + 0xc097440e5067adc1L } }, + /* 65 */ + { { 0x846a56f2c379ab34L,0xa8ee068b841df8d1L,0x20314459176c68efL, + 0xf1af32d5915f1f30L }, + { 0x99c375315d75bd50L,0x837cffbaf72f67bcL,0x0613a41848d7723fL, + 0x23d0f130e2d41c8bL } }, + /* 66 */ + { { 0x857ab6edf41500d9L,0x0d890ae5fcbeada8L,0x52fe864889725951L, + 0xb0288dd6c0a3faddL }, + { 0x85320f30650bcb08L,0x71af6313695d6e16L,0x31f520a7b989aa76L, + 0xffd3724ff408c8d2L } }, + /* 67 */ + { { 0x53968e64b458e6cbL,0x992dad20317a5d28L,0x3814ae0b7aa75f56L, + 0xf5590f4ad78c26dfL }, + { 0x0fc24bd3cf0ba55aL,0x0fc4724a0c778baeL,0x1ce9864f683b674aL, + 0x18d6da54f6f74a20L } }, + /* 68 */ + { { 0xed93e225d5be5a2bL,0x6fe799835934f3c6L,0x4314092622626ffcL, + 0x50bbb4d97990216aL }, + { 0x378191c6e57ec63eL,0x65422c40181dcdb2L,0x41a8099b0236e0f6L, + 0x2b10011801fe49c3L } }, + /* 69 */ + { { 0xfc68b5c59b391593L,0xc385f5a2598270fcL,0x7144f3aad19adcbbL, + 0xdd55899983fbae0cL }, + { 0x93b88b8e74b82ff4L,0xd2e03c4071e734c9L,0x9a7a9eaf43c0322aL, + 0xe6e4c551149d6041L } }, + /* 70 */ + { { 0x55f655bb1e9af288L,0x647e1a64f7ada931L,0x43697e4bcb2820e5L, + 0x51e00db107ed56ffL }, + { 0x43d169b8771c327eL,0x29cdb20b4a96c2adL,0xc07d51f53deb4779L, + 0xe22f424149829177L } }, + /* 71 */ + { { 0xcd45e8f4635f1abbL,0x7edc0cb568538874L,0xc9472c1fb5a8034dL, + 0xf709373d52dc48c9L }, + { 0x401966bba8af30d6L,0x95bf5f4af137b69cL,0x3966162a9361c47eL, + 0xbd52d288e7275b11L } }, + /* 72 */ + { { 0xab155c7a9c5fa877L,0x17dad6727d3a3d48L,0x43f43f9e73d189d8L, + 0xa0d0f8e4c8aa77a6L }, + { 0x0bbeafd8cc94f92dL,0xd818c8be0c4ddb3aL,0x22cc65f8b82eba14L, + 0xa56c78c7946d6a00L } }, + /* 73 */ + { { 0x2962391b0dd09529L,0x803e0ea63daddfcfL,0x2c77351f5b5bf481L, + 0xd8befdf8731a367aL }, + { 0xab919d42fc0157f4L,0xf51caed7fec8e650L,0xcdf9cb4002d48b0aL, + 0x854a68a5ce9f6478L } }, + /* 74 */ + { { 0xdc35f67b63506ea5L,0x9286c489a4fe0d66L,0x3f101d3bfe95cd4dL, + 0x5cacea0b98846a95L }, + { 0xa90df60c9ceac44dL,0x3db29af4354d1c3aL,0x08dd3de8ad5dbabeL, + 0xe4982d1235e4efa9L } }, + /* 75 */ + { { 0x23104a22c34cd55eL,0x58695bb32680d132L,0xfb345afa1fa1d943L, + 0x8046b7f616b20499L }, + { 0xb533581e38e7d098L,0xd7f61e8df46f0b70L,0x30dea9ea44cb78c4L, + 0xeb17ca7b9082af55L } }, + /* 76 */ + { { 0x1751b59876a145b9L,0xa5cf6b0fc1bc71ecL,0xd3e03565392715bbL, + 0x097b00bafab5e131L }, + { 0xaa66c8e9565f69e1L,0x77e8f75ab5be5199L,0x6033ba11da4fd984L, + 0xf95c747bafdbcc9eL } }, + /* 77 */ + { { 0x558f01d3bebae45eL,0xa8ebe9f0c4bc6955L,0xaeb705b1dbc64fc6L, + 0x3512601e566ed837L }, + { 0x9336f1e1fa1161cdL,0x328ab8d54c65ef87L,0x4757eee2724f21e5L, + 0x0ef971236068ab6bL } }, + /* 78 */ + { { 0x02598cf754ca4226L,0x5eede138f8642c8eL,0x48963f74468e1790L, + 0xfc16d9333b4fbc95L }, + { 0xbe96fb31e7c800caL,0x138063312678adaaL,0x3d6244976ff3e8b5L, + 0x14ca4af1b95d7a17L } }, + /* 79 */ + { { 0x7a4771babd2f81d5L,0x1a5f9d6901f7d196L,0xd898bef7cad9c907L, + 0x4057b063f59c231dL }, + { 0xbffd82fe89c05c0aL,0xe4911c6f1dc0df85L,0x3befccaea35a16dbL, + 0x1c3b5d64f1330b13L } }, + /* 80 */ + { { 0x5fe14bfe80ec21feL,0xf6ce116ac255be82L,0x98bc5a072f4a5d67L, + 0xfad27148db7e63afL }, + { 0x90c0b6ac29ab05b3L,0x37a9a83c4e251ae6L,0x0a7dc875c2aade7dL, + 0x77387de39f0e1a84L } }, + /* 81 */ + { { 0x1e9ecc49a56c0dd7L,0xa5cffcd846086c74L,0x8f7a1408f505aeceL, + 0xb37b85c0bef0c47eL }, + { 0x3596b6e4cc0e6a8fL,0xfd6d4bbf6b388f23L,0xaba453fac39cef4eL, + 0x9c135ac8f9f628d5L } }, + /* 82 */ + { { 0x32aa320284e35743L,0x320d6ab185a3cdefL,0xb821b1761df19819L, + 0x5721361fc433851fL }, + { 0x1f0db36a71fc9168L,0x5f98ba735e5c403cL,0xf64ca87e37bcd8f5L, + 0xdcbac3c9e6bb11bdL } }, + /* 83 */ + { { 0xf01d99684518cbe2L,0xd242fc189c9eb04eL,0x727663c7e47feebfL, + 0xb8c1c89e2d626862L }, + { 0x51a58bddc8e1d569L,0x563809c8b7d88cd0L,0x26c27fd9f11f31ebL, + 0x5d23bbda2f9422d4L } }, + /* 84 */ + { { 0x0a1c729495c8f8beL,0x2961c4803bf362bfL,0x9e418403df63d4acL, + 0xc109f9cb91ece900L }, + { 0xc2d095d058945705L,0xb9083d96ddeb85c0L,0x84692b8d7a40449bL, + 0x9bc3344f2eee1ee1L } }, + /* 85 */ + { { 0x0d5ae35642913074L,0x55491b2748a542b1L,0x469ca665b310732aL, + 0x29591d525f1a4cc1L }, + { 0xe76f5b6bb84f983fL,0xbe7eef419f5f84e1L,0x1200d49680baa189L, + 0x6376551f18ef332cL } }, + /* 86 */ + { { 0xbda5f14e562976ccL,0x22bca3e60ef12c38L,0xbbfa30646cca9852L, + 0xbdb79dc808e2987aL }, + { 0xfd2cb5c9cb06a772L,0x38f475aafe536dceL,0xc2a3e0227c2b5db8L, + 0x8ee86001add3c14aL } }, + /* 87 */ + { { 0xcbe96981a4ade873L,0x7ee9aa4dc4fba48cL,0x2cee28995a054ba5L, + 0x92e51d7a6f77aa4bL }, + { 0x948bafa87190a34dL,0xd698f75bf6bd1ed1L,0xd00ee6e30caf1144L, + 0x5182f86f0a56aaaaL } }, + /* 88 */ + { { 0xfba6212c7a4cc99cL,0xff609b683e6d9ca1L,0x5dbb27cb5ac98c5aL, + 0x91dcab5d4073a6f2L }, + { 0x01b6cc3d5f575a70L,0x0cb361396f8d87faL,0x165d4e8c89981736L, + 0x17a0cedb97974f2bL } }, + /* 89 */ + { { 0x38861e2a076c8d3aL,0x701aad39210f924bL,0x94d0eae413a835d9L, + 0x2e8ce36c7f4cdf41L }, + { 0x91273dab037a862bL,0x01ba9bb760e4c8faL,0xf964538833baf2ddL, + 0xf4ccc6cb34f668f3L } }, + /* 90 */ + { { 0x44ef525cf1f79687L,0x7c59549592efa815L,0xe1231741a5c78d29L, + 0xac0db4889a0df3c9L }, + { 0x86bfc711df01747fL,0x592b9358ef17df13L,0xe5880e4f5ccb6bb5L, + 0x95a64a6194c974a2L } }, + /* 91 */ + { { 0x72c1efdac15a4c93L,0x40269b7382585141L,0x6a8dfb1c16cb0badL, + 0x231e54ba29210677L }, + { 0xa70df9178ae6d2dcL,0x4d6aa63f39112918L,0xf627726b5e5b7223L, + 0xab0be032d8a731e1L } }, + /* 92 */ + { { 0x097ad0e98d131f2dL,0x637f09e33b04f101L,0x1ac86196d5e9a748L, + 0xf1bcc8802cf6a679L }, + { 0x25c69140e8daacb4L,0x3c4e405560f65009L,0x591cc8fc477937a6L, + 0x851694695aebb271L } }, + /* 93 */ + { { 0xde35c143f1dcf593L,0x78202b29b018be3bL,0xe9cdadc29bdd9d3dL, + 0x8f67d9d2daad55d8L }, + { 0x841116567481ea5fL,0xe7d2dde9e34c590cL,0xffdd43f405053fa8L, + 0xf84572b9c0728b5dL } }, + /* 94 */ + { { 0x5e1a7a7197af71c9L,0xa14494447a736565L,0xa1b4ae070e1d5063L, + 0xedee2710616b2c19L }, + { 0xb2f034f511734121L,0x1cac6e554a25e9f0L,0x8dc148f3a40c2ecfL, + 0x9fd27e9b44ebd7f4L } }, + /* 95 */ + { { 0x3cc7658af6e2cb16L,0xe3eb7d2cfe5919b6L,0x5a8c5816168d5583L, + 0xa40c2fb6958ff387L }, + { 0x8c9ec560fedcc158L,0x7ad804c655f23056L,0xd93967049a307e12L, + 0x99bc9bb87dc6decfL } }, + /* 96 */ + { { 0x84a9521d927dafc6L,0x52c1fb695c09cd19L,0x9d9581a0f9366ddeL, + 0x9abe210ba16d7e64L }, + { 0x480af84a48915220L,0xfa73176a4dd816c6L,0xc7d539871681ca5aL, + 0x7881c25787f344b0L } }, + /* 97 */ + { { 0x93399b51e0bcf3ffL,0x0d02cbc5127f74f6L,0x8fb465a2dd01d968L, + 0x15e6e319a30e8940L }, + { 0x646d6e0d3e0e05f4L,0xfad7bddc43588404L,0xbe61c7d1c4f850d3L, + 0x0e55facf191172ceL } }, + /* 98 */ + { { 0x7e9d9806f8787564L,0x1a33172131e85ce6L,0x6b0158cab819e8d6L, + 0xd73d09766fe96577L }, + { 0x424834251eb7206eL,0xa519290fc618bb42L,0x5dcbb8595e30a520L, + 0x9250a3748f15a50bL } }, + /* 99 */ + { { 0xcaff08f8be577410L,0xfd408a035077a8c6L,0xf1f63289ec0a63a4L, + 0x77414082c1cc8c0bL }, + { 0x05a40fa6eb0991cdL,0xc1ca086649fdc296L,0x3a68a3c7b324fd40L, + 0x8cb04f4d12eb20b9L } }, + /* 100 */ + { { 0xb1c2d0556906171cL,0x9073e9cdb0240c3fL,0xdb8e6b4fd8906841L, + 0xe4e429ef47123b51L }, + { 0x0b8dd53c38ec36f4L,0xf9d2dc01ff4b6a27L,0x5d066e07879a9a48L, + 0x37bca2ff3c6e6552L } }, + /* 101 */ + { { 0x4cd2e3c7df562470L,0x44f272a2c0964ac9L,0x7c6d5df980c793beL, + 0x59913edc3002b22aL }, + { 0x7a139a835750592aL,0x99e01d80e783de02L,0xcf8c0375ea05d64fL, + 0x43786e4ab013e226L } }, + /* 102 */ + { { 0xff32b0ed9e56b5a6L,0x0750d9a6d9fc68f9L,0xec15e845597846a7L, + 0x8638ca98b7e79e7aL }, + { 0x2f5ae0960afc24b2L,0x05398eaf4dace8f2L,0x3b765dd0aecba78fL, + 0x1ecdd36a7b3aa6f0L } }, + /* 103 */ + { { 0x5d3acd626c5ff2f3L,0xa2d516c02873a978L,0xad94c9fad2110d54L, + 0xd85d0f85d459f32dL }, + { 0x9f700b8d10b11da3L,0xd2c22c30a78318c4L,0x556988f49208decdL, + 0xa04f19c3b4ed3c62L } }, + /* 104 */ + { { 0x087924c8ed7f93bdL,0xcb64ac5d392f51f6L,0x7cae330a821b71afL, + 0x92b2eeea5c0950b0L }, + { 0x85ac4c9485b6e235L,0xab2ca4a92936c0f0L,0x80faa6b3e0508891L, + 0x1ee782215834276cL } }, + /* 105 */ + { { 0xa60a2e00e63e79f7L,0xf590e7b2f399d906L,0x9021054a6607c09dL, + 0xf3f2ced857a6e150L }, + { 0x200510f3f10d9b55L,0x9d2fcfacd8642648L,0xe5631aa7e8bd0e7cL, + 0x0f56a4543da3e210L } }, + /* 106 */ + { { 0x5b21bffa1043e0dfL,0x6c74b6cc9c007e6dL,0x1a656ec0d4a8517aL, + 0xbd8f17411969e263L }, + { 0x8a9bbb86beb7494aL,0x1567d46f45f3b838L,0xdf7a12a7a4e5a79aL, + 0x2d1a1c3530ccfa09L } }, + /* 107 */ + { { 0x192e3813506508daL,0x336180c4a1d795a7L,0xcddb59497a9944b3L, + 0xa107a65eb91fba46L }, + { 0xe6d1d1c50f94d639L,0x8b4af3758a58b7d7L,0x1a7c5584bd37ca1cL, + 0x183d760af87a9af2L } }, + /* 108 */ + { { 0x29d697110dde59a4L,0xf1ad8d070e8bef87L,0x229b49634f2ebe78L, + 0x1d44179dc269d754L }, + { 0xb32dc0cf8390d30eL,0x0a3b27530de8110cL,0x31af1dc52bc0339aL, + 0x771f9cc29606d262L } }, + /* 109 */ + { { 0x99993e7785040739L,0x44539db98026a939L,0xcf40f6f2f5f8fc26L, + 0x64427a310362718eL }, + { 0x4f4f2d8785428aa8L,0x7b7adc3febfb49a8L,0x201b2c6df23d01acL, + 0x49d9b7496ae90d6dL } }, + /* 110 */ + { { 0xcc78d8bc435d1099L,0x2adbcd4e8e8d1a08L,0x02c2e2a02cb68a41L, + 0x9037d81b3f605445L }, + { 0x7cdbac27074c7b61L,0xfe2031ab57bfd72eL,0x61ccec96596d5352L, + 0x08c3de6a7cc0639cL } }, + /* 111 */ + { { 0x20fdd020f6d552abL,0x56baff9805cd81f1L,0x06fb7c3e91351291L, + 0xc690944245796b2fL }, + { 0x17b3ae9c41231bd1L,0x1eac6e875cc58205L,0x208837abf9d6a122L, + 0x3fa3db02cafe3ac0L } }, + /* 112 */ + { { 0xd75a3e6505058880L,0x7da365ef643943f2L,0x4147861cfab24925L, + 0xc5c4bdb0fdb808ffL }, + { 0x73513e34b272b56bL,0xc8327e9511b9043aL,0xfd8ce37df8844969L, + 0x2d56db9446c2b6b5L } }, + /* 113 */ + { { 0x2461782fff46ac6bL,0xd19f792607a2e425L,0xfafea3c409a48de1L, + 0x0f56bd9de503ba42L }, + { 0x137d4ed1345cda49L,0x821158fc816f299dL,0xe7c6a54aaeb43402L, + 0x4003bb9d1173b5f1L } }, + /* 114 */ + { { 0x3b8e8189a0803387L,0xece115f539cbd404L,0x4297208dd2877f21L, + 0x53765522a07f2f9eL }, + { 0xa4980a21a8a4182dL,0xa2bbd07a3219df79L,0x674d0a2e1a19a2d4L, + 0x7a056f586c5d4549L } }, + /* 115 */ + { { 0x646b25589d8a2a47L,0x5b582948c3df2773L,0x51ec000eabf0d539L, + 0x77d482f17a1a2675L }, + { 0xb8a1bd9587853948L,0xa6f817bd6cfbffeeL,0xab6ec05780681e47L, + 0x4115012b2b38b0e4L } }, + /* 116 */ + { { 0x3c73f0f46de28cedL,0x1d5da7609b13ec47L,0x61b8ce9e6e5c6392L, + 0xcdf04572fbea0946L }, + { 0x1cb3c58b6c53c3b0L,0x97fe3c10447b843cL,0xfb2b8ae12cb9780eL, + 0xee703dda97383109L } }, + /* 117 */ + { { 0x34515140ff57e43aL,0xd44660d3b1b811b8L,0x2b3b5dff8f42b986L, + 0x2a0ad89da162ce21L }, + { 0x64e4a6946bc277baL,0xc788c954c141c276L,0x141aa64ccabf6274L, + 0xd62d0b67ac2b4659L } }, + /* 118 */ + { { 0x39c5d87b2c054ac4L,0x57005859f27df788L,0xedf7cbf3b18128d6L, + 0xb39a23f2991c2426L }, + { 0x95284a15f0b16ae5L,0x0c6a05b1a136f51bL,0x1d63c137f2700783L, + 0x04ed0092c0674cc5L } }, + /* 119 */ + { { 0x1f4185d19ae90393L,0x3047b4294a3d64e6L,0xae0001a69854fc14L, + 0xa0a91fc10177c387L }, + { 0xff0a3f01ae2c831eL,0xbb76ae822b727e16L,0x8f12c8a15a3075b4L, + 0x084cf9889ed20c41L } }, + /* 120 */ + { { 0xd98509defca6becfL,0x2fceae807dffb328L,0x5d8a15c44778e8b9L, + 0xd57955b273abf77eL }, + { 0x210da79e31b5d4f1L,0xaa52f04b3cfa7a1cL,0xd4d12089dc27c20bL, + 0x8e14ea4202d141f1L } }, + /* 121 */ + { { 0xeed50345f2897042L,0x8d05331f43402c4aL,0xc8d9c194c8bdfb21L, + 0x597e1a372aa4d158L }, + { 0x0327ec1acf0bd68cL,0x6d4be0dcab024945L,0x5b9c8d7ac9fe3e84L, + 0xca3f0236199b4deaL } }, + /* 122 */ + { { 0x592a10b56170bd20L,0x0ea897f16d3f5de7L,0xa3363ff144b2ade2L, + 0xbde7fd7e309c07e4L }, + { 0x516bb6d2b8f5432cL,0x210dc1cbe043444bL,0x3db01e6ff8f95b5aL, + 0xb623ad0e0a7dd198L } }, + /* 123 */ + { { 0xa75bd67560c7b65bL,0xab8c559023a4a289L,0xf8220fd0d7b26795L, + 0xd6aa2e4658ec137bL }, + { 0x10abc00b5138bb85L,0x8c31d121d833a95cL,0xb24ff00b1702a32eL, + 0x111662e02dcc513aL } }, + /* 124 */ + { { 0x78114015efb42b87L,0xbd9f5d701b6c4dffL,0x66ecccd7a7d7c129L, + 0xdb3ee1cb94b750f8L }, + { 0xb26f3db0f34837cfL,0xe7eed18bb9578d4fL,0x5d2cdf937c56657dL, + 0x886a644252206a59L } }, + /* 125 */ + { { 0x3c234cfb65b569eaL,0x20011141f72119c1L,0x8badc85da15a619eL, + 0xa70cf4eb018a17bcL }, + { 0x224f97ae8c4a6a65L,0x36e5cf270134378fL,0xbe3a609e4f7e0960L, + 0xaa4772abd1747b77L } }, + /* 126 */ + { { 0x676761317aa60cc0L,0xc79163610368115fL,0xded98bb4bbc1bb5aL, + 0x611a6ddc30faf974L }, + { 0x30e78cbcc15ee47aL,0x2e8962824e0d96a5L,0x36f35adf3dd9ed88L, + 0x5cfffaf816429c88L } }, + /* 127 */ + { { 0xc0d54cff9b7a99cdL,0x7bf3b99d843c45a1L,0x038a908f62c739e1L, + 0x6e5a6b237dc1994cL }, + { 0xef8b454e0ba5db77L,0xb7b8807facf60d63L,0xe591c0c676608378L, + 0x481a238d242dabccL } }, + /* 128 */ + { { 0xe3417bc035d0b34aL,0x440b386b8327c0a7L,0x8fb7262dac0362d1L, + 0x2c41114ce0cdf943L }, + { 0x2ba5cef1ad95a0b1L,0xc09b37a867d54362L,0x26d6cdd201e486c9L, + 0x20477abf42ff9297L } }, + /* 129 */ + { { 0x2f75173c18d65dbfL,0x77bf940e339edad8L,0x7022d26bdcf1001cL, + 0xac66409ac77396b6L }, + { 0x8b0bb36fc6261cc3L,0x213f7bc9190e7e90L,0x6541cebaa45e6c10L, + 0xce8e6975cc122f85L } }, + /* 130 */ + { { 0x0f121b41bc0a67d2L,0x62d4760a444d248aL,0x0e044f1d659b4737L, + 0x08fde365250bb4a8L }, + { 0xaceec3da848bf287L,0xc2a62182d3369d6eL,0x3582dfdc92449482L, + 0x2f7e2fd2565d6cd7L } }, + /* 131 */ + { { 0xae4b92dbc3770fa7L,0x095e8d5c379043f9L,0x54f34e9d17761171L, + 0xc65be92e907702aeL }, + { 0x2758a303f6fd0a40L,0xe7d822e3bcce784bL,0x7ae4f5854f9767bfL, + 0x4bff8e47d1193b3aL } }, + /* 132 */ + { { 0xcd41d21f00ff1480L,0x2ab8fb7d0754db16L,0xac81d2efbbe0f3eaL, + 0x3e4e4ae65772967dL }, + { 0x7e18f36d3c5303e6L,0x3bd9994b92262397L,0x9ed70e261324c3c0L, + 0x5388aefd58ec6028L } }, + /* 133 */ + { { 0xad1317eb5e5d7713L,0x09b985ee75de49daL,0x32f5bc4fc74fb261L, + 0x5cf908d14f75be0eL }, + { 0x760435108e657b12L,0xbfd421a5b96ed9e6L,0x0e29f51f8970ccc2L, + 0xa698ba4060f00ce2L } }, + /* 134 */ + { { 0x73db1686ef748fecL,0xe6e755a27e9d2cf9L,0x630b6544ce265effL, + 0xb142ef8a7aebad8dL }, + { 0xad31af9f17d5770aL,0x66af3b672cb3412fL,0x6bd60d1bdf3359deL, + 0xd1896a9658515075L } }, + /* 135 */ + { { 0xec5957ab33c41c08L,0x87de94ac5468e2e1L,0x18816b73ac472f6cL, + 0x267b0e0b7981da39L }, + { 0x6e554e5d8e62b988L,0xd8ddc755116d21e7L,0x4610faf03d2a6f99L, + 0xb54e287aa1119393L } }, + /* 136 */ + { { 0x0a0122b5178a876bL,0x51ff96ff085104b4L,0x050b31ab14f29f76L, + 0x84abb28b5f87d4e6L }, + { 0xd5ed439f8270790aL,0x2d6cb59d85e3f46bL,0x75f55c1b6c1e2212L, + 0xe5436f6717655640L } }, + /* 137 */ + { { 0x53f9025e2286e8d5L,0x353c95b4864453beL,0xd832f5bde408e3a0L, + 0x0404f68b5b9ce99eL }, + { 0xcad33bdea781e8e5L,0x3cdf5018163c2f5bL,0x575769600119caa3L, + 0x3a4263df0ac1c701L } }, + /* 138 */ + { { 0xc2965ecc9aeb596dL,0x01ea03e7023c92b4L,0x4704b4b62e013961L, + 0x0ca8fd3f905ea367L }, + { 0x92523a42551b2b61L,0x1eb7a89c390fcd06L,0xe7f1d2be0392a63eL, + 0x96dca2644ddb0c33L } }, + /* 139 */ + { { 0x203bb43a387510afL,0x846feaa8a9a36a01L,0xd23a57702f950378L, + 0x4363e2123aad59dcL }, + { 0xca43a1c740246a47L,0xb362b8d2e55dd24dL,0xf9b086045d8faf96L, + 0x840e115cd8bb98c4L } }, + /* 140 */ + { { 0xf12205e21023e8a7L,0xc808a8cdd8dc7a0bL,0xe292a272163a5ddfL, + 0x5e0d6abd30ded6d4L }, + { 0x07a721c27cfc0f64L,0x42eec01d0e55ed88L,0x26a7bef91d1f9db2L, + 0x7dea48f42945a25aL } }, + /* 141 */ + { { 0xabdf6f1ce5060a81L,0xe79f9c72f8f95615L,0xcfd36c5406ac268bL, + 0xabc2a2beebfd16d1L }, + { 0x8ac66f91d3e2eac7L,0x6f10ba63d2dd0466L,0x6790e3770282d31bL, + 0x4ea353946c7eefc1L } }, + /* 142 */ + { { 0xed8a2f8d5266309dL,0x0a51c6c081945a3eL,0xcecaf45a578c5dc1L, + 0x3a76e6891c94ffc3L }, + { 0x9aace8a47d7b0d0fL,0x963ace968f584a5fL,0x51a30c724e697fbeL, + 0x8212a10a465e6464L } }, + /* 143 */ + { { 0xef7c61c3cfab8caaL,0x18eb8e840e142390L,0xcd1dff677e9733caL, + 0xaa7cab71599cb164L }, + { 0x02fc9273bc837bd1L,0xc06407d0c36af5d7L,0x17621292f423da49L, + 0x40e38073fe0617c3L } }, + /* 144 */ + { { 0xf4f80824a7bf9b7cL,0x365d23203fbe30d0L,0xbfbe532097cf9ce3L, + 0xe3604700b3055526L }, + { 0x4dcb99116cc6c2c7L,0x72683708ba4cbee6L,0xdcded434637ad9ecL, + 0x6542d677a3dee15fL } }, + /* 145 */ + { { 0x3f32b6d07b6c377aL,0x6cb03847903448beL,0xd6fdd3a820da8af7L, + 0xa6534aee09bb6f21L }, + { 0x30a1780d1035facfL,0x35e55a339dcb47e6L,0x6ea50fe1c447f393L, + 0xf3cb672fdc9aef22L } }, + /* 146 */ + { { 0xeb3719fe3b55fd83L,0xe0d7a46c875ddd10L,0x33ac9fa905cea784L, + 0x7cafaa2eaae870e7L }, + { 0x9b814d041d53b338L,0xe0acc0a0ef87e6c6L,0xfb93d10811672b0fL, + 0x0aab13c1b9bd522eL } }, + /* 147 */ + { { 0xddcce278d2681297L,0xcb350eb1b509546aL,0x2dc431737661aaf2L, + 0x4b91a602847012e9L }, + { 0xdcff109572f8ddcfL,0x08ebf61e9a911af4L,0x48f4360ac372430eL, + 0x49534c5372321cabL } }, + /* 148 */ + { { 0x83df7d71f07b7e9dL,0xa478efa313cd516fL,0x78ef264b6c047ee3L, + 0xcaf46c4fd65ac5eeL }, + { 0xa04d0c7792aa8266L,0xedf45466913684bbL,0x56e65168ae4b16b0L, + 0x14ce9e5704c6770fL } }, + /* 149 */ + { { 0x99445e3e965e8f91L,0xd3aca1bacb0f2492L,0xd31cc70f90c8a0a0L, + 0x1bb708a53e4c9a71L }, + { 0xd5ca9e69558bdd7aL,0x734a0508018a26b1L,0xb093aa714c9cf1ecL, + 0xf9d126f2da300102L } }, + /* 150 */ + { { 0x749bca7aaff9563eL,0xdd077afeb49914a0L,0xe27a0311bf5f1671L, + 0x807afcb9729ecc69L }, + { 0x7f8a9337c9b08b77L,0x86c3a785443c7e38L,0x85fafa59476fd8baL, + 0x751adcd16568cd8cL } }, + /* 151 */ + { { 0x8aea38b410715c0dL,0xd113ea718f7697f7L,0x665eab1493fbf06dL, + 0x29ec44682537743fL }, + { 0x3d94719cb50bebbcL,0x399ee5bfe4505422L,0x90cd5b3a8d2dedb1L, + 0xff9370e392a4077dL } }, + /* 152 */ + { { 0x59a2d69bc6b75b65L,0x4188f8d5266651c5L,0x28a9f33e3de9d7d2L, + 0x9776478ba2a9d01aL }, + { 0x8852622d929af2c7L,0x334f5d6d4e690923L,0xce6cc7e5a89a51e9L, + 0x74a6313fac2f82faL } }, + /* 153 */ + { { 0xb2f4dfddb75f079cL,0x85b07c9518e36fbbL,0x1b6cfcf0e7cd36ddL, + 0xab75be150ff4863dL }, + { 0x81b367c0173fc9b7L,0xb90a7420d2594fd0L,0x15fdbf03c4091236L, + 0x4ebeac2e0b4459f6L } }, + /* 154 */ + { { 0xeb6c5fe75c9f2c53L,0xd25220118eae9411L,0xc8887633f95ac5d8L, + 0xdf99887b2c1baffcL }, + { 0xbb78eed2850aaecbL,0x9d49181b01d6a272L,0x978dd511b1cdbcacL, + 0x27b040a7779f4058L } }, + /* 155 */ + { { 0x90405db7f73b2eb2L,0xe0df85088e1b2118L,0x501b71525962327eL, + 0xb393dd37e4cfa3f5L }, + { 0xa1230e7b3fd75165L,0xd66344c2bcd33554L,0x6c36f1be0f7b5022L, + 0x09588c12d0463419L } }, + /* 156 */ + { { 0xe086093f02601c3bL,0xfb0252f8cf5c335fL,0x955cf280894aff28L, + 0x81c879a9db9f648bL }, + { 0x040e687cc6f56c51L,0xfed471693f17618cL,0x44f88a419059353bL, + 0xfa0d48f55fc11bc4L } }, + /* 157 */ + { { 0xbc6e1c9de1608e4dL,0x010dda113582822cL,0xf6b7ddc1157ec2d7L, + 0x8ea0e156b6a367d6L }, + { 0xa354e02f2383b3b4L,0x69966b943f01f53cL,0x4ff6632b2de03ca5L, + 0x3f5ab924fa00b5acL } }, + /* 158 */ + { { 0x337bb0d959739efbL,0xc751b0f4e7ebec0dL,0x2da52dd6411a67d1L, + 0x8bc768872b74256eL }, + { 0xa5be3b7282d3d253L,0xa9f679a1f58d779fL,0xa1cac168e16767bbL, + 0xb386f19060fcf34fL } }, + /* 159 */ + { { 0x31f3c1352fedcfc2L,0x5396bf6262f8af0dL,0x9a02b4eae57288c2L, + 0x4cb460f71b069c4dL }, + { 0xae67b4d35b8095eaL,0x92bbf8596fc07603L,0xe1475f66b614a165L, + 0x52c0d50895ef5223L } }, + /* 160 */ + { { 0x231c210e15339848L,0xe87a28e870778c8dL,0x9d1de6616956e170L, + 0x4ac3c9382bb09c0bL }, + { 0x19be05516998987dL,0x8b2376c4ae09f4d6L,0x1de0b7651a3f933dL, + 0x380d94c7e39705f4L } }, + /* 161 */ + { { 0x01a355aa81542e75L,0x96c724a1ee01b9b7L,0x6b3a2977624d7087L, + 0x2ce3e171de2637afL }, + { 0xcfefeb49f5d5bc1aL,0xa655607e2777e2b5L,0x4feaac2f9513756cL, + 0x2e6cd8520b624e4dL } }, + /* 162 */ + { { 0x3685954b8c31c31dL,0x68533d005bf21a0cL,0x0bd7626e75c79ec9L, + 0xca17754742c69d54L }, + { 0xcc6edafff6d2dbb2L,0xfd0d8cbd174a9d18L,0x875e8793aa4578e8L, + 0xa976a7139cab2ce6L } }, + /* 163 */ + { { 0x0a651f1b93fb353dL,0xd75cab8b57fcfa72L,0xaa88cfa731b15281L, + 0x8720a7170a1f4999L }, + { 0x8c3e8d37693e1b90L,0xd345dc0b16f6dfc3L,0x8ea8d00ab52a8742L, + 0x9719ef29c769893cL } }, + /* 164 */ + { { 0x820eed8d58e35909L,0x9366d8dc33ddc116L,0xd7f999d06e205026L, + 0xa5072976e15704c1L }, + { 0x002a37eac4e70b2eL,0x84dcf6576890aa8aL,0xcd71bf18645b2a5cL, + 0x99389c9df7b77725L } }, + /* 165 */ + { { 0x238c08f27ada7a4bL,0x3abe9d03fd389366L,0x6b672e89766f512cL, + 0xa88806aa202c82e4L }, + { 0x6602044ad380184eL,0xa8cb78c4126a8b85L,0x79d670c0ad844f17L, + 0x0043bffb4738dcfeL } }, + /* 166 */ + { { 0x8d59b5dc36d5192eL,0xacf885d34590b2afL,0x83566d0a11601781L, + 0x52f3ef01ba6c4866L }, + { 0x3986732a0edcb64dL,0x0a482c238068379fL,0x16cbe5fa7040f309L, + 0x3296bd899ef27e75L } }, + /* 167 */ + { { 0x476aba89454d81d7L,0x9eade7ef51eb9b3cL,0x619a21cd81c57986L, + 0x3b90febfaee571e9L }, + { 0x9393023e5496f7cbL,0x55be41d87fb51bc4L,0x03f1dd4899beb5ceL, + 0x6e88069d9f810b18L } }, + /* 168 */ + { { 0xce37ab11b43ea1dbL,0x0a7ff1a95259d292L,0x851b02218f84f186L, + 0xa7222beadefaad13L }, + { 0xa2ac78ec2b0a9144L,0x5a024051f2fa59c5L,0x91d1eca56147ce38L, + 0xbe94d523bc2ac690L } }, + /* 169 */ + { { 0x72f4945e0b226ce7L,0xb8afd747967e8b70L,0xedea46f185a6c63eL, + 0x7782defe9be8c766L }, + { 0x760d2aa43db38626L,0x460ae78776f67ad1L,0x341b86fc54499cdbL, + 0x03838567a2892e4bL } }, + /* 170 */ + { { 0x2d8daefd79ec1a0fL,0x3bbcd6fdceb39c97L,0xf5575ffc58f61a95L, + 0xdbd986c4adf7b420L }, + { 0x81aa881415f39eb7L,0x6ee2fcf5b98d976cL,0x5465475dcf2f717dL, + 0x8e24d3c46860bbd0L } }, + /* 171 */ + { { 0x749d8e549a587390L,0x12bb194f0cbec588L,0x46e07da4b25983c6L, + 0x541a99c4407bafc8L }, + { 0xdb241692624c8842L,0x6044c12ad86c05ffL,0xc59d14b44f7fcf62L, + 0xc0092c49f57d35d1L } }, + /* 172 */ + { { 0xd3cc75c3df2e61efL,0x7e8841c82e1b35caL,0xc62d30d1909f29f4L, + 0x75e406347286944dL }, + { 0xe7d41fc5bbc237d0L,0xc9537bf0ec4f01c9L,0x91c51a16282bd534L, + 0x5b7cb658c7848586L } }, + /* 173 */ + { { 0x964a70848a28ead1L,0x802dc508fd3b47f6L,0x9ae4bfd1767e5b39L, + 0x7ae13eba8df097a1L }, + { 0xfd216ef8eadd384eL,0x0361a2d9b6b2ff06L,0x204b98784bcdb5f3L, + 0x787d8074e2a8e3fdL } }, + /* 174 */ + { { 0xc5e25d6b757fbb1cL,0xe47bddb2ca201debL,0x4a55e9a36d2233ffL, + 0x5c2228199ef28484L }, + { 0x773d4a8588315250L,0x21b21a2b827097c1L,0xab7c4ea1def5d33fL, + 0xe45d37abbaf0f2b0L } }, + /* 175 */ + { { 0xd2df1e3428511c8aL,0xebb229c8bdca6cd3L,0x578a71a7627c39a7L, + 0xed7bc12284dfb9d3L }, + { 0xcf22a6df93dea561L,0x5443f18dd48f0ed1L,0xd8b861405bad23e8L, + 0xaac97cc945ca6d27L } }, + /* 176 */ + { { 0xeb54ea74a16bd00aL,0xd839e9adf5c0bcc1L,0x092bb7f11f9bfc06L, + 0x318f97b31163dc4eL }, + { 0xecc0c5bec30d7138L,0x44e8df23abc30220L,0x2bb7972fb0223606L, + 0xfa41faa19a84ff4dL } }, + /* 177 */ + { { 0x4402d974a6642269L,0xc81814ce9bb783bdL,0x398d38e47941e60bL, + 0x38bb6b2c1d26e9e2L }, + { 0xc64e4a256a577f87L,0x8b52d253dc11fe1cL,0xff336abf62280728L, + 0x94dd0905ce7601a5L } }, + /* 178 */ + { { 0x156cf7dcde93f92aL,0xa01333cb89b5f315L,0x02404df9c995e750L, + 0x92077867d25c2ae9L }, + { 0xe2471e010bf39d44L,0x5f2c902096bb53d7L,0x4c44b7b35c9c3d8fL, + 0x81e8428bd29beb51L } }, + /* 179 */ + { { 0x6dd9c2bac477199fL,0x8cb8eeee6b5ecdd9L,0x8af7db3fee40fd0eL, + 0x1b94ab62dbbfa4b1L }, + { 0x44f0d8b3ce47f143L,0x51e623fc63f46163L,0xf18f270fcc599383L, + 0x06a38e28055590eeL } }, + /* 180 */ + { { 0x2e5b0139b3355b49L,0x20e26560b4ebf99bL,0xc08ffa6bd269f3dcL, + 0xa7b36c2083d9d4f8L }, + { 0x64d15c3a1b3e8830L,0xd5fceae1a89f9c0bL,0xcfeee4a2e2d16930L, + 0xbe54c6b4a2822a20L } }, + /* 181 */ + { { 0xd6cdb3df8d91167cL,0x517c3f79e7a6625eL,0x7105648f346ac7f4L, + 0xbf30a5abeae022bbL }, + { 0x8e7785be93828a68L,0x5161c3327f3ef036L,0xe11b5feb592146b2L, + 0xd1c820de2732d13aL } }, + /* 182 */ + { { 0x043e13479038b363L,0x58c11f546b05e519L,0x4fe57abe6026cad1L, + 0xb7d17bed68a18da3L }, + { 0x44ca5891e29c2559L,0x4f7a03765bfffd84L,0x498de4af74e46948L, + 0x3997fd5e6412cc64L } }, + /* 183 */ + { { 0xf20746828bd61507L,0x29e132d534a64d2aL,0xffeddfb08a8a15e3L, + 0x0eeb89293c6c13e8L }, + { 0xe9b69a3ea7e259f8L,0xce1db7e6d13e7e67L,0x277318f6ad1fa685L, + 0x228916f8c922b6efL } }, + /* 184 */ + { { 0x959ae25b0a12ab5bL,0xcc11171f957bc136L,0x8058429ed16e2b0cL, + 0xec05ad1d6e93097eL }, + { 0x157ba5beac3f3708L,0x31baf93530b59d77L,0x47b55237118234e5L, + 0x7d3141567ff11b37L } }, + /* 185 */ + { { 0x7bd9c05cf6dfefabL,0xbe2f2268dcb37707L,0xe53ead973a38bb95L, + 0xe9ce66fc9bc1d7a3L }, + { 0x75aa15766f6a02a1L,0x38c087df60e600edL,0xf8947f3468cdc1b9L, + 0xd9650b0172280651L } }, + /* 186 */ + { { 0x504b4c4a5a057e60L,0xcbccc3be8def25e4L,0xa635320817c1ccbdL, + 0x14d6699a804eb7a2L }, + { 0x2c8a8415db1f411aL,0x09fbaf0bf80d769cL,0xb4deef901c2f77adL, + 0x6f4c68410d43598aL } }, + /* 187 */ + { { 0x8726df4e96c24a96L,0x534dbc85fcbd99a3L,0x3c466ef28b2ae30aL, + 0x4c4350fd61189abbL }, + { 0x2967f716f855b8daL,0x41a42394463c38a1L,0xc37e1413eae93343L, + 0xa726d2425a3118b5L } }, + /* 188 */ + { { 0xdae6b3ee948c1086L,0xf1de503dcbd3a2e1L,0x3f35ed3f03d022f3L, + 0x13639e82cc6cf392L }, + { 0x9ac938fbcdafaa86L,0xf45bc5fb2654a258L,0x1963b26e45051329L, + 0xca9365e1c1a335a3L } }, + /* 189 */ + { { 0x3615ac754c3b2d20L,0x742a5417904e241bL,0xb08521c4cc9d071dL, + 0x9ce29c34970b72a5L }, + { 0x8cc81f736d3e0ad6L,0x8060da9ef2f8434cL,0x35ed1d1a6ce862d9L, + 0x48c4abd7ab42af98L } }, + /* 190 */ + { { 0xd221b0cc40c7485aL,0xead455bbe5274dbfL,0x493c76989263d2e8L, + 0x78017c32f67b33cbL }, + { 0xb9d35769930cb5eeL,0xc0d14e940c408ed2L,0xf8b7bf55272f1a4dL, + 0x53cd0454de5c1c04L } }, + /* 191 */ + { { 0xbcd585fa5d28ccacL,0x5f823e56005b746eL,0x7c79f0a1cd0123aaL, + 0xeea465c1d3d7fa8fL }, + { 0x7810659f0551803bL,0x6c0b599f7ce6af70L,0x4195a77029288e70L, + 0x1b6e42a47ae69193L } }, + /* 192 */ + { { 0x2e80937cf67d04c3L,0x1e312be289eeb811L,0x56b5d88792594d60L, + 0x0224da14187fbd3dL }, + { 0x87abb8630c5fe36fL,0x580f3c604ef51f5fL,0x964fb1bfb3b429ecL, + 0x60838ef042bfff33L } }, + /* 193 */ + { { 0x432cb2f27e0bbe99L,0x7bda44f304aa39eeL,0x5f497c7a9fa93903L, + 0x636eb2022d331643L }, + { 0xfcfd0e6193ae00aaL,0x875a00fe31ae6d2fL,0xf43658a29f93901cL, + 0x8844eeb639218bacL } }, + /* 194 */ + { { 0x114171d26b3bae58L,0x7db3df7117e39f3eL,0xcd37bc7f81a8eadaL, + 0x27ba83dc51fb789eL }, + { 0xa7df439ffbf54de5L,0x7277030bb5fe1a71L,0x42ee8e35db297a48L, + 0xadb62d3487f3a4abL } }, + /* 195 */ + { { 0x9b1168a2a175df2aL,0x082aa04f618c32e9L,0xc9e4f2e7146b0916L, + 0xb990fd7675e7c8b2L }, + { 0x0829d96b4df37313L,0x1c205579d0b40789L,0x66c9ae4a78087711L, + 0x81707ef94d10d18dL } }, + /* 196 */ + { { 0x97d7cab203d6ff96L,0x5b851bfc0d843360L,0x268823c4d042db4bL, + 0x3792daead5a8aa5cL }, + { 0x52818865941afa0bL,0xf3e9e74142d83671L,0x17c825275be4e0a7L, + 0x5abd635e94b001baL } }, + /* 197 */ + { { 0x727fa84e0ac4927cL,0xe3886035a7c8cf23L,0xa4bcd5ea4adca0dfL, + 0x5995bf21846ab610L }, + { 0xe90f860b829dfa33L,0xcaafe2ae958fc18bL,0x9b3baf4478630366L, + 0x44c32ca2d483411eL } }, + /* 198 */ + { { 0xa74a97f1e40ed80cL,0x5f938cb131d2ca82L,0x53f2124b7c2d6ad9L, + 0x1f2162fb8082a54cL }, + { 0x7e467cc5720b173eL,0x40e8a666085f12f9L,0x8cebc20e4c9d65dcL, + 0x8f1d402bc3e907c9L } }, + /* 199 */ + { { 0x4f592f9cfbc4058aL,0xb15e14b6292f5670L,0xc55cfe37bc1d8c57L, + 0xb1980f43926edbf9L }, + { 0x98c33e0932c76b09L,0x1df5279d33b07f78L,0x6f08ead4863bb461L, + 0x2828ad9b37448e45L } }, + /* 200 */ + { { 0x696722c4c4cf4ac5L,0xf5ac1a3fdde64afbL,0x0551baa2e0890832L, + 0x4973f1275a14b390L }, + { 0xe59d8335322eac5dL,0x5e07eef50bd9b568L,0xab36720fa2588393L, + 0x6dac8ed0db168ac7L } }, + /* 201 */ + { { 0xf7b545aeeda835efL,0x4aa113d21d10ed51L,0x035a65e013741b09L, + 0x4b23ef5920b9de4cL }, + { 0xe82bb6803c4c7341L,0xd457706d3f58bc37L,0x73527863a51e3ee8L, + 0x4dd71534ddf49a4eL } }, + /* 202 */ + { { 0xbf94467295476cd9L,0x648d072fe31a725bL,0x1441c8b8fc4b67e0L, + 0xfd3170002f4a4dbbL }, + { 0x1cb43ff48995d0e1L,0x76e695d10ef729aaL,0xe0d5f97641798982L, + 0x14fac58c9569f365L } }, + /* 203 */ + { { 0xad9a0065f312ae18L,0x51958dc0fcc93fc9L,0xd9a142408a7d2846L, + 0xed7c765136abda50L }, + { 0x46270f1a25d4abbcL,0x9b5dd8f3f1a113eaL,0xc609b0755b51952fL, + 0xfefcb7f74d2e9f53L } }, + /* 204 */ + { { 0xbd09497aba119185L,0xd54e8c30aac45ba4L,0x492479deaa521179L, + 0x1801a57e87e0d80bL }, + { 0x073d3f8dfcafffb0L,0x6cf33c0bae255240L,0x781d763b5b5fdfbcL, + 0x9f8fc11e1ead1064L } }, + /* 205 */ + { { 0x1583a1715e69544cL,0x0eaf8567f04b7813L,0x1e22a8fd278a4c32L, + 0xa9d3809d3d3a69a9L }, + { 0x936c2c2c59a2da3bL,0x38ccbcf61895c847L,0x5e65244e63d50869L, + 0x3006b9aee1178ef7L } }, + /* 206 */ + { { 0x0bb1f2b0c9eead28L,0x7eef635d89f4dfbcL,0x074757fdb2ce8939L, + 0x0ab85fd745f8f761L }, + { 0xecda7c933e5b4549L,0x4be2bb5c97922f21L,0x261a1274b43b8040L, + 0xb122d67511e942c2L } }, + /* 207 */ + { { 0x3be607be66a5ae7aL,0x01e703fa76adcbe3L,0xaf9043014eb6e5c5L, + 0x9f599dc1097dbaecL }, + { 0x6d75b7180ff250edL,0x8eb91574349a20dcL,0x425605a410b227a3L, + 0x7d5528e08a294b78L } }, + /* 208 */ + { { 0xf0f58f6620c26defL,0x025585ea582b2d1eL,0xfbe7d79b01ce3881L, + 0x28ccea01303f1730L }, + { 0xd1dabcd179644ba5L,0x1fc643e806fff0b8L,0xa60a76fc66b3e17bL, + 0xc18baf48a1d013bfL } }, + /* 209 */ + { { 0x34e638c85dc4216dL,0x00c01067206142acL,0xd453a17195f5064aL, + 0x9def809db7a9596bL }, + { 0x41e8642e67ab8d2cL,0xb42404336237a2b6L,0x7d506a6d64c4218bL, + 0x0357f8b068808ce5L } }, + /* 210 */ + { { 0x8e9dbe644cd2cc88L,0xcc61c28df0b8f39dL,0x4a309874cd30a0c8L, + 0xe4a01add1b489887L }, + { 0x2ed1eeacf57cd8f9L,0x1b767d3ebd594c48L,0xa7295c717bd2f787L, + 0x466d7d79ce10cc30L } }, + /* 211 */ + { { 0x47d318929dada2c7L,0x4fa0a6c38f9aa27dL,0x90e4fd28820a59e1L, + 0xc672a522451ead1aL }, + { 0x30607cc85d86b655L,0xf0235d3bf9ad4af1L,0x99a08680571172a6L, + 0x5e3d64faf2a67513L } }, + /* 212 */ + { { 0xaa6410c79b3b4416L,0xcd8fcf85eab26d99L,0x5ebff74adb656a74L, + 0x6c8a7a95eb8e42fcL }, + { 0x10c60ba7b02a63bdL,0x6b2f23038b8f0047L,0x8c6c3738312d90b0L, + 0x348ae422ad82ca91L } }, + /* 213 */ + { { 0x7f4746635ccda2fbL,0x22accaa18e0726d2L,0x85adf782492b1f20L, + 0xc1074de0d9ef2d2eL }, + { 0xfcf3ce44ae9a65b3L,0xfd71e4ac05d7151bL,0xd4711f50ce6a9788L, + 0xfbadfbdbc9e54ffcL } }, + /* 214 */ + { { 0x1713f1cd20a99363L,0xb915658f6cf22775L,0x968175cd24d359b2L, + 0xb7f976b483716fcdL }, + { 0x5758e24d5d6dbf74L,0x8d23bafd71c3af36L,0x48f477600243dfe3L, + 0xf4d41b2ecafcc805L } }, + /* 215 */ + { { 0x51f1cf28fdabd48dL,0xce81be3632c078a4L,0x6ace2974117146e9L, + 0x180824eae0160f10L }, + { 0x0387698b66e58358L,0x63568752ce6ca358L,0x82380e345e41e6c5L, + 0x67e5f63983cf6d25L } }, + /* 216 */ + { { 0xf89ccb8dcf4899efL,0x949015f09ebb44c0L,0x546f9276b2598ec9L, + 0x9fef789a04c11fc6L }, + { 0x6d367ecf53d2a071L,0xb10e1a7fa4519b09L,0xca6b3fb0611e2eefL, + 0xbc80c181a99c4e20L } }, + /* 217 */ + { { 0x972536f8e5eb82e6L,0x1a484fc7f56cb920L,0xc78e217150b5da5eL, + 0x49270e629f8cdf10L }, + { 0x1a39b7bbea6b50adL,0x9a0284c1a2388ffcL,0x5403eb178107197bL, + 0xd2ee52f961372f7fL } }, + /* 218 */ + { { 0xd37cd28588e0362aL,0x442fa8a78fa5d94dL,0xaff836e5a434a526L, + 0xdfb478bee5abb733L }, + { 0xa91f1ce7673eede6L,0xa5390ad42b5b2f04L,0x5e66f7bf5530da2fL, + 0xd9a140b408df473aL } }, + /* 219 */ + { { 0x0e0221b56e8ea498L,0x623478293563ee09L,0xe06b8391335d2adeL, + 0x760c058d623f4b1aL }, + { 0x0b89b58cc198aa79L,0xf74890d2f07aba7fL,0x4e204110fde2556aL, + 0x7141982d8f190409L } }, + /* 220 */ + { { 0x6f0a0e334d4b0f45L,0xd9280b38392a94e1L,0x3af324c6b3c61d5eL, + 0x3af9d1ce89d54e47L }, + { 0xfd8f798120930371L,0xeda2664c21c17097L,0x0e9545dcdc42309bL, + 0xb1f815c373957dd6L } }, + /* 221 */ + { { 0x84faa78e89fec44aL,0xc8c2ae473caa4cafL,0x691c807dc1b6a624L, + 0xa41aed141543f052L }, + { 0x424353997d5ffe04L,0x8bacb2df625b6e20L,0x85d660be87817775L, + 0xd6e9c1dd86fb60efL } }, + /* 222 */ + { { 0x3aa2e97ec6853264L,0x771533b7e2304a0bL,0x1b912bb7b8eae9beL, + 0x9c9c6e10ae9bf8c2L }, + { 0xa2309a59e030b74cL,0x4ed7494d6a631e90L,0x89f44b23a49b79f2L, + 0x566bd59640fa61b6L } }, + /* 223 */ + { { 0x066c0118c18061f3L,0x190b25d37c83fc70L,0xf05fc8e027273245L, + 0xcf2c7390f525345eL }, + { 0xa09bceb410eb30cfL,0xcfd2ebba0d77703aL,0xe842c43a150ff255L, + 0x02f517558aa20979L } }, + /* 224 */ + { { 0x396ef794addb7d07L,0x0b4fc74224455500L,0xfaff8eacc78aa3ceL, + 0x14e9ada5e8d4d97dL }, + { 0xdaa480a12f7079e2L,0x45baa3cde4b0800eL,0x01765e2d7838157dL, + 0xa0ad4fab8e9d9ae8L } }, + /* 225 */ + { { 0x0bfb76214a653618L,0x1872813c31eaaa5fL,0x1553e73744949d5eL, + 0xbcd530b86e56ed1eL }, + { 0x169be85332e9c47bL,0xdc2776feb50059abL,0xcdba9761192bfbb4L, + 0x909283cf6979341dL } }, + /* 226 */ + { { 0x67b0032476e81a13L,0x9bee1a9962171239L,0x08ed361bd32e19d6L, + 0x35eeb7c9ace1549aL }, + { 0x1280ae5a7e4e5bdcL,0x2dcd2cd3b6ceec6eL,0x52e4224c6e266bc1L, + 0x9a8b2cf4448ae864L } }, + /* 227 */ + { { 0xf6471bf209d03b59L,0xc90e62a3b65af2abL,0xff7ff168ebd5eec9L, + 0x6bdb60f4d4491379L }, + { 0xdadafebc8a55bc30L,0xc79ead1610097fe0L,0x42e197414c1e3bddL, + 0x01ec3cfd94ba08a9L } }, + /* 228 */ + { { 0xba6277ebdc9485c2L,0x48cc9a7922fb10c7L,0x4f61d60f70a28d8aL, + 0xd1acb1c0475464f6L }, + { 0xd26902b126f36612L,0x59c3a44ee0618d8bL,0x4df8a813308357eeL, + 0x7dcd079d405626c2L } }, + /* 229 */ + { { 0x5ce7d4d3f05a4b48L,0xadcd295237230772L,0xd18f7971812a915aL, + 0x0bf53589377d19b8L }, + { 0x35ecd95a6c68ea73L,0xc7f3bbca823a584dL,0x9fb674c6f473a723L, + 0xd28be4d9e16686fcL } }, + /* 230 */ + { { 0x5d2b990638fa8e4bL,0x559f186e893fd8fcL,0x3a6de2aa436fb6fcL, + 0xd76007aa510f88ceL }, + { 0x2d10aab6523a4988L,0xb455cf4474dd0273L,0x7f467082a3407278L, + 0xf2b52f68b303bb01L } }, + /* 231 */ + { { 0x0d57eafa9835b4caL,0x2d2232fcbb669cbcL,0x8eeeb680c6643198L, + 0xd8dbe98ecc5aed3aL }, + { 0xcba9be3fc5a02709L,0x30be68e5f5ba1fa8L,0xfebd43cdf10ea852L, + 0xe01593a3ee559705L } }, + /* 232 */ + { { 0xd3e5af50ea75a0a6L,0x512226ac57858033L,0x6fe6d50fd0176406L, + 0xafec07b1aeb8ef06L }, + { 0x7fb9956780bb0a31L,0x6f1af3cc37309aaeL,0x9153a15a01abf389L, + 0xa71b93546e2dbfddL } }, + /* 233 */ + { { 0xbf8e12e018f593d2L,0xd1a90428a078122bL,0x150505db0ba4f2adL, + 0x53a2005c628523d9L }, + { 0x07c8b639e7f2b935L,0x2bff975ac182961aL,0x86bceea77518ca2cL, + 0xbf47d19b3d588e3dL } }, + /* 234 */ + { { 0x672967a7dd7665d5L,0x4e3030572f2f4de5L,0x144005ae80d4903fL, + 0x001c2c7f39c9a1b6L }, + { 0x143a801469efc6d6L,0xc810bdaa7bc7a724L,0x5f65670ba78150a4L, + 0xfdadf8e786ffb99bL } }, + /* 235 */ + { { 0xfd38cb88ffc00785L,0x77fa75913b48eb67L,0x0454d055bf368fbcL, + 0x3a838e4d5aa43c94L }, + { 0x561663293e97bb9aL,0x9eb93363441d94d9L,0x515591a60adb2a83L, + 0x3cdb8257873e1da3L } }, + /* 236 */ + { { 0x137140a97de77eabL,0xf7e1c50d41648109L,0x762dcad2ceb1d0dfL, + 0x5a60cc89f1f57fbaL }, + { 0x80b3638240d45673L,0x1b82be195913c655L,0x057284b8dd64b741L, + 0x922ff56fdbfd8fc0L } }, + /* 237 */ + { { 0x1b265deec9a129a1L,0xa5b1ce57cc284e04L,0x04380c46cebfbe3cL, + 0x72919a7df6c5cd62L }, + { 0x298f453a8fb90f9aL,0xd719c00b88e4031bL,0xe32c0e77796f1856L, + 0x5e7917803624089aL } }, + /* 238 */ + { { 0x5c16ec557f63cdfbL,0x8e6a3571f1cae4fdL,0xfce26bea560597caL, + 0x4e0a5371e24c2fabL }, + { 0x276a40d3a5765357L,0x3c89af440d73a2b4L,0xb8f370ae41d11a32L, + 0xf5ff7818d56604eeL } }, + /* 239 */ + { { 0xfbf3e3fe1a09df21L,0x26d5d28ee66e8e47L,0x2096bd0a29c89015L, + 0xe41df0e9533f5e64L }, + { 0x305fda40b3ba9e3fL,0xf2340ceb2604d895L,0x0866e1927f0367c7L, + 0x8edd7d6eac4f155fL } }, + /* 240 */ + { { 0xc9a1dc0e0bfc8ff3L,0x14efd82be936f42fL,0x67016f7ccca381efL, + 0x1432c1caed8aee96L }, + { 0xec68482970b23c26L,0xa64fe8730735b273L,0xe389f6e5eaef0f5aL, + 0xcaef480b5ac8d2c6L } }, + /* 241 */ + { { 0x5245c97875315922L,0xd82951713063cca5L,0xf3ce60d0b64ef2cbL, + 0xd0ba177e8efae236L }, + { 0x53a9ae8fb1b3af60L,0x1a796ae53d2da20eL,0x01d63605df9eef28L, + 0xf31c957c1c54ae16L } }, + /* 242 */ + { { 0xc0f58d5249cc4597L,0xdc5015b0bae0a028L,0xefc5fc55734a814aL, + 0x013404cb96e17c3aL }, + { 0xb29e2585c9a824bfL,0xd593185e001eaed7L,0x8d6ee68261ef68acL, + 0x6f377c4b91933e6cL } }, + /* 243 */ + { { 0x9f93bad1a8333fd2L,0xa89302025a2a95b8L,0x211e5037eaf75aceL, + 0x6dba3e4ed2d09506L }, + { 0xa48ef98cd04399cdL,0x1811c66ee6b73adeL,0x72f60752c17ecaf3L, + 0xf13cf3423becf4a7L } }, + /* 244 */ + { { 0xceeb9ec0a919e2ebL,0x83a9a195f62c0f68L,0xcfba3bb67aba2299L, + 0xc83fa9a9274bbad3L }, + { 0x0d7d1b0b62fa1ce0L,0xe58b60f53418efbfL,0xbfa8ef9e52706f04L, + 0xb49d70f45d702683L } }, + /* 245 */ + { { 0x914c7510fad5513bL,0x05f32eecb1751e2dL,0x6d850418d9fb9d59L, + 0x59cfadbb0c30f1cfL }, + { 0xe167ac2355cb7fd6L,0x249367b8820426a3L,0xeaeec58c90a78864L, + 0x5babf362354a4b67L } }, + /* 246 */ + { { 0x37c981d1ee424865L,0x8b002878f2e5577fL,0x702970f1b9e0c058L, + 0x6188c6a79026c8f0L }, + { 0x06f9a19bd0f244daL,0x1ecced5cfb080873L,0x35470f9b9f213637L, + 0x993fe475df50b9d9L } }, + /* 247 */ + { { 0x68e31cdf9b2c3609L,0x84eb19c02c46d4eaL,0x7ac9ec1a9a775101L, + 0x81f764664c80616bL }, + { 0x1d7c2a5a75fbe978L,0x6743fed3f183b356L,0x838d1f04501dd2bfL, + 0x564a812a5fe9060dL } }, + /* 248 */ + { { 0x7a5a64f4fa817d1dL,0x55f96844bea82e0fL,0xb5ff5a0fcd57f9aaL, + 0x226bf3cf00e51d6cL }, + { 0xd6d1a9f92f2833cfL,0x20a0a35a4f4f89a8L,0x11536c498f3f7f77L, + 0x68779f47ff257836L } }, + /* 249 */ + { { 0x79b0c1c173043d08L,0xa54467741fc020faL,0xd3767e289a6d26d0L, + 0x97bcb0d1eb092e0bL }, + { 0x2ab6eaa8f32ed3c3L,0xc8a4f151b281bc48L,0x4d1bf4f3bfa178f3L, + 0xa872ffe80a784655L } }, + /* 250 */ + { { 0xb1ab7935a32b2086L,0xe1eb710e8160f486L,0x9bd0cd913b6ae6beL, + 0x02812bfcb732a36aL }, + { 0xa63fd7cacf605318L,0x646e5d50fdfd6d1dL,0xa1d683982102d619L, + 0x07391cc9fe5396afL } }, + /* 251 */ + { { 0xc50157f08b80d02bL,0x6b8333d162877f7fL,0x7aca1af878d542aeL, + 0x355d2adc7e6d2a08L }, + { 0xb41f335a287386e1L,0xfd272a94f8e43275L,0x286ca2cde79989eaL, + 0x3dc2b1e37c2a3a79L } }, + /* 252 */ + { { 0xd689d21c04581352L,0x0a00c825376782beL,0x203bd5909fed701fL, + 0xc47869103ccd846bL }, + { 0x5dba770824c768edL,0x72feea026841f657L,0x73313ed56accce0eL, + 0xccc42968d5bb4d32L } }, + /* 253 */ + { { 0x94e50de13d7620b9L,0xd89a5c8a5992a56aL,0xdc007640675487c9L, + 0xe147eb42aa4871cfL }, + { 0x274ab4eeacf3ae46L,0xfd4936fb50350fbeL,0xdf2afe4748c840eaL, + 0x239ac047080e96e3L } }, + /* 254 */ + { { 0x481d1f352bfee8d4L,0xce80b5cffa7b0fecL,0x105c4c9e2ce9af3cL, + 0xc55fa1a3f5f7e59dL }, + { 0x3186f14e8257c227L,0xc5b1653f342be00bL,0x09afc998aa904fb2L, + 0x094cd99cd4f4b699L } }, + /* 255 */ + { { 0x8a981c84d703bebaL,0x8631d15032ceb291L,0xa445f2c9e3bd49ecL, + 0xb90a30b642abad33L }, + { 0xb465404fb4a5abf9L,0x004750c375db7603L,0x6f9a42ccca35d89fL, + 0x019f8b9a1b7924f7L } }, +}; + +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_base_4(sp_point_256* r, const sp_digit* k, + int map, void* heap) +{ + return sp_256_ecc_mulmod_stripe_4(r, &p256_base, p256_table, + k, map, heap); +} + +#else +/* The index into pre-computation table to use. */ +static const uint8_t recode_index_4_7[130] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, + 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, + 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, + 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, + 0, 1, +}; + +/* Whether to negate y-ordinate. */ +static const uint8_t recode_neg_4_7[130] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, +}; + +/* Recode the scalar for multiplication using pre-computed values and + * subtraction. + * + * k Scalar to multiply by. + * v Vector of operations to perform. + */ +static void sp_256_ecc_recode_7_4(const sp_digit* k, ecc_recode_256* v) +{ + int i, j; + uint8_t y; + int carry = 0; + int o; + sp_digit n; + + j = 0; + n = k[j]; + o = 0; + for (i=0; i<37; i++) { + y = n; + if (o + 7 < 64) { + y &= 0x7f; + n >>= 7; + o += 7; + } + else if (o + 7 == 64) { + n >>= 7; + if (++j < 4) + n = k[j]; + o = 0; + } + else if (++j < 4) { + n = k[j]; + y |= (n << (64 - o)) & 0x7f; + o -= 57; + n >>= o; + } + + y += carry; + v[i].i = recode_index_4_7[y]; + v[i].neg = recode_neg_4_7[y]; + carry = (y >> 7) + v[i].neg; + } +} + +static const sp_table_entry_256 p256_table[2405] = { + /* 0 << 0 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 0 */ + { { 0x79e730d418a9143cL,0x75ba95fc5fedb601L,0x79fb732b77622510L, + 0x18905f76a53755c6L }, + { 0xddf25357ce95560aL,0x8b4ab8e4ba19e45cL,0xd2e88688dd21f325L, + 0x8571ff1825885d85L } }, + /* 2 << 0 */ + { { 0x850046d410ddd64dL,0xaa6ae3c1a433827dL,0x732205038d1490d9L, + 0xf6bb32e43dcf3a3bL }, + { 0x2f3648d361bee1a5L,0x152cd7cbeb236ff8L,0x19a8fb0e92042dbeL, + 0x78c577510a5b8a3bL } }, + /* 3 << 0 */ + { { 0xffac3f904eebc127L,0xb027f84a087d81fbL,0x66ad77dd87cbbc98L, + 0x26936a3fb6ff747eL }, + { 0xb04c5c1fc983a7ebL,0x583e47ad0861fe1aL,0x788208311a2ee98eL, + 0xd5f06a29e587cc07L } }, + /* 4 << 0 */ + { { 0x74b0b50d46918dccL,0x4650a6edc623c173L,0x0cdaacace8100af2L, + 0x577362f541b0176bL }, + { 0x2d96f24ce4cbaba6L,0x17628471fad6f447L,0x6b6c36dee5ddd22eL, + 0x84b14c394c5ab863L } }, + /* 5 << 0 */ + { { 0xbe1b8aaec45c61f5L,0x90ec649a94b9537dL,0x941cb5aad076c20cL, + 0xc9079605890523c8L }, + { 0xeb309b4ae7ba4f10L,0x73c568efe5eb882bL,0x3540a9877e7a1f68L, + 0x73a076bb2dd1e916L } }, + /* 6 << 0 */ + { { 0x403947373e77664aL,0x55ae744f346cee3eL,0xd50a961a5b17a3adL, + 0x13074b5954213673L }, + { 0x93d36220d377e44bL,0x299c2b53adff14b5L,0xf424d44cef639f11L, + 0xa4c9916d4a07f75fL } }, + /* 7 << 0 */ + { { 0x0746354ea0173b4fL,0x2bd20213d23c00f7L,0xf43eaab50c23bb08L, + 0x13ba5119c3123e03L }, + { 0x2847d0303f5b9d4dL,0x6742f2f25da67bddL,0xef933bdc77c94195L, + 0xeaedd9156e240867L } }, + /* 8 << 0 */ + { { 0x27f14cd19499a78fL,0x462ab5c56f9b3455L,0x8f90f02af02cfc6bL, + 0xb763891eb265230dL }, + { 0xf59da3a9532d4977L,0x21e3327dcf9eba15L,0x123c7b84be60bbf0L, + 0x56ec12f27706df76L } }, + /* 9 << 0 */ + { { 0x75c96e8f264e20e8L,0xabe6bfed59a7a841L,0x2cc09c0444c8eb00L, + 0xe05b3080f0c4e16bL }, + { 0x1eb7777aa45f3314L,0x56af7bedce5d45e3L,0x2b6e019a88b12f1aL, + 0x086659cdfd835f9bL } }, + /* 10 << 0 */ + { { 0x2c18dbd19dc21ec8L,0x98f9868a0fcf8139L,0x737d2cd648250b49L, + 0xcc61c94724b3428fL }, + { 0x0c2b407880dd9e76L,0xc43a8991383fbe08L,0x5f7d2d65779be5d2L, + 0x78719a54eb3b4ab5L } }, + /* 11 << 0 */ + { { 0xea7d260a6245e404L,0x9de407956e7fdfe0L,0x1ff3a4158dac1ab5L, + 0x3e7090f1649c9073L }, + { 0x1a7685612b944e88L,0x250f939ee57f61c8L,0x0c0daa891ead643dL, + 0x68930023e125b88eL } }, + /* 12 << 0 */ + { { 0x04b71aa7d2697768L,0xabdedef5ca345a33L,0x2409d29dee37385eL, + 0x4ee1df77cb83e156L }, + { 0x0cac12d91cbb5b43L,0x170ed2f6ca895637L,0x28228cfa8ade6d66L, + 0x7ff57c9553238acaL } }, + /* 13 << 0 */ + { { 0xccc425634b2ed709L,0x0e356769856fd30dL,0xbcbcd43f559e9811L, + 0x738477ac5395b759L }, + { 0x35752b90c00ee17fL,0x68748390742ed2e3L,0x7cd06422bd1f5bc1L, + 0xfbc08769c9e7b797L } }, + /* 14 << 0 */ + { { 0xa242a35bb0cf664aL,0x126e48f77f9707e3L,0x1717bf54c6832660L, + 0xfaae7332fd12c72eL }, + { 0x27b52db7995d586bL,0xbe29569e832237c2L,0xe8e4193e2a65e7dbL, + 0x152706dc2eaa1bbbL } }, + /* 15 << 0 */ + { { 0x72bcd8b7bc60055bL,0x03cc23ee56e27e4bL,0xee337424e4819370L, + 0xe2aa0e430ad3da09L }, + { 0x40b8524f6383c45dL,0xd766355442a41b25L,0x64efa6de778a4797L, + 0x2042170a7079adf4L } }, + /* 16 << 0 */ + { { 0x808b0b650bc6fb80L,0x5882e0753ffe2e6bL,0xd5ef2f7c2c83f549L, + 0x54d63c809103b723L }, + { 0xf2f11bd652a23f9bL,0x3670c3194b0b6587L,0x55c4623bb1580e9eL, + 0x64edf7b201efe220L } }, + /* 17 << 0 */ + { { 0x97091dcbd53c5c9dL,0xf17624b6ac0a177bL,0xb0f139752cfe2dffL, + 0xc1a35c0a6c7a574eL }, + { 0x227d314693e79987L,0x0575bf30e89cb80eL,0x2f4e247f0d1883bbL, + 0xebd512263274c3d0L } }, + /* 18 << 0 */ + { { 0x5f3e51c856ada97aL,0x4afc964d8f8b403eL,0xa6f247ab412e2979L, + 0x675abd1b6f80ebdaL }, + { 0x66a2bd725e485a1dL,0x4b2a5caf8f4f0b3cL,0x2626927f1b847bbaL, + 0x6c6fc7d90502394dL } }, + /* 19 << 0 */ + { { 0xfea912baa5659ae8L,0x68363aba25e1a16eL,0xb8842277752c41acL, + 0xfe545c282897c3fcL }, + { 0x2d36e9e7dc4c696bL,0x5806244afba977c5L,0x85665e9be39508c1L, + 0xf720ee256d12597bL } }, + /* 20 << 0 */ + { { 0x8a979129d2337a31L,0x5916868f0f862bdcL,0x048099d95dd283baL, + 0xe2d1eeb6fe5bfb4eL }, + { 0x82ef1c417884005dL,0xa2d4ec17ffffcbaeL,0x9161c53f8aa95e66L, + 0x5ee104e1c5fee0d0L } }, + /* 21 << 0 */ + { { 0x562e4cecc135b208L,0x74e1b2654783f47dL,0x6d2a506c5a3f3b30L, + 0xecead9f4c16762fcL }, + { 0xf29dd4b2e286e5b9L,0x1b0fadc083bb3c61L,0x7a75023e7fac29a4L, + 0xc086d5f1c9477fa3L } }, + /* 22 << 0 */ + { { 0x0fc611352f6f3076L,0xc99ffa23e3912a9aL,0x6a0b0685d2f8ba3dL, + 0xfdc777e8e93358a4L }, + { 0x94a787bb35415f04L,0x640c2d6a4d23fea4L,0x9de917da153a35b5L, + 0x793e8d075d5cd074L } }, + /* 23 << 0 */ + { { 0xf4f876532de45068L,0x37c7a7e89e2e1f6eL,0xd0825fa2a3584069L, + 0xaf2cea7c1727bf42L }, + { 0x0360a4fb9e4785a9L,0xe5fda49c27299f4aL,0x48068e1371ac2f71L, + 0x83d0687b9077666fL } }, + /* 24 << 0 */ + { { 0x6d3883b215d02819L,0x6d0d755040dd9a35L,0x61d7cbf91d2b469fL, + 0xf97b232f2efc3115L }, + { 0xa551d750b24bcbc7L,0x11ea494988a1e356L,0x7669f03193cb7501L, + 0x595dc55eca737b8aL } }, + /* 25 << 0 */ + { { 0xa4a319acd837879fL,0x6fc1b49eed6b67b0L,0xe395993332f1f3afL, + 0x966742eb65432a2eL }, + { 0x4b8dc9feb4966228L,0x96cc631243f43950L,0x12068859c9b731eeL, + 0x7b948dc356f79968L } }, + /* 26 << 0 */ + { { 0x61e4ad32ed1f8008L,0xe6c9267ad8b17538L,0x1ac7c5eb857ff6fbL, + 0x994baaa855f2fb10L }, + { 0x84cf14e11d248018L,0x5a39898b628ac508L,0x14fde97b5fa944f5L, + 0xed178030d12e5ac7L } }, + /* 27 << 0 */ + { { 0x042c2af497e2feb4L,0xd36a42d7aebf7313L,0x49d2c9eb084ffdd7L, + 0x9f8aa54b2ef7c76aL }, + { 0x9200b7ba09895e70L,0x3bd0c66fddb7fb58L,0x2d97d10878eb4cbbL, + 0x2d431068d84bde31L } }, + /* 28 << 0 */ + { { 0x4b523eb7172ccd1fL,0x7323cb2830a6a892L,0x97082ec0cfe153ebL, + 0xe97f6b6af2aadb97L }, + { 0x1d3d393ed1a83da1L,0xa6a7f9c7804b2a68L,0x4a688b482d0cb71eL, + 0xa9b4cc5f40585278L } }, + /* 29 << 0 */ + { { 0x5e5db46acb66e132L,0xf1be963a0d925880L,0x944a70270317b9e2L, + 0xe266f95948603d48L }, + { 0x98db66735c208899L,0x90472447a2fb18a3L,0x8a966939777c619fL, + 0x3798142a2a3be21bL } }, + /* 30 << 0 */ + { { 0xb4241cb13298b343L,0xa3a14e49b44f65a1L,0xc5f4d6cd3ac77acdL, + 0xd0288cb552b6fc3cL }, + { 0xd5cc8c2f1c040abcL,0xb675511e06bf9b4aL,0xd667da379b3aa441L, + 0x460d45ce51601f72L } }, + /* 31 << 0 */ + { { 0xe2f73c696755ff89L,0xdd3cf7e7473017e6L,0x8ef5689d3cf7600dL, + 0x948dc4f8b1fc87b4L }, + { 0xd9e9fe814ea53299L,0x2d921ca298eb6028L,0xfaecedfd0c9803fcL, + 0xf38ae8914d7b4745L } }, + /* 32 << 0 */ + { { 0xd8c5fccfc5e3a3d8L,0xbefd904c4079dfbfL,0xbc6d6a58fead0197L, + 0x39227077695532a4L }, + { 0x09e23e6ddbef42f5L,0x7e449b64480a9908L,0x7b969c1aad9a2e40L, + 0x6231d7929591c2a4L } }, + /* 33 << 0 */ + { { 0x871514560f664534L,0x85ceae7c4b68f103L,0xac09c4ae65578ab9L, + 0x33ec6868f044b10cL }, + { 0x6ac4832b3a8ec1f1L,0x5509d1285847d5efL,0xf909604f763f1574L, + 0xb16c4303c32f63c4L } }, + /* 34 << 0 */ + { { 0xb6ab20147ca23cd3L,0xcaa7a5c6a391849dL,0x5b0673a375678d94L, + 0xc982ddd4dd303e64L }, + { 0xfd7b000b5db6f971L,0xbba2cb1f6f876f92L,0xc77332a33c569426L, + 0xa159100c570d74f8L } }, + /* 35 << 0 */ + { { 0xfd16847fdec67ef5L,0x742ee464233e76b7L,0x0b8e4134efc2b4c8L, + 0xca640b8642a3e521L }, + { 0x653a01908ceb6aa9L,0x313c300c547852d5L,0x24e4ab126b237af7L, + 0x2ba901628bb47af8L } }, + /* 36 << 0 */ + { { 0x3d5e58d6a8219bb7L,0xc691d0bd1b06c57fL,0x0ae4cb10d257576eL, + 0x3569656cd54a3dc3L }, + { 0xe5ebaebd94cda03aL,0x934e82d3162bfe13L,0x450ac0bae251a0c6L, + 0x480b9e11dd6da526L } }, + /* 37 << 0 */ + { { 0x00467bc58cce08b5L,0xb636458c7f178d55L,0xc5748baea677d806L, + 0x2763a387dfa394ebL }, + { 0xa12b448a7d3cebb6L,0xe7adda3e6f20d850L,0xf63ebce51558462cL, + 0x58b36143620088a8L } }, + /* 38 << 0 */ + { { 0x8a2cc3ca4d63c0eeL,0x512331170fe948ceL,0x7463fd85222ef33bL, + 0xadf0c7dc7c603d6cL }, + { 0x0ec32d3bfe7765e5L,0xccaab359bf380409L,0xbdaa84d68e59319cL, + 0xd9a4c2809c80c34dL } }, + /* 39 << 0 */ + { { 0xa9d89488a059c142L,0x6f5ae714ff0b9346L,0x068f237d16fb3664L, + 0x5853e4c4363186acL }, + { 0xe2d87d2363c52f98L,0x2ec4a76681828876L,0x47b864fae14e7b1cL, + 0x0c0bc0e569192408L } }, + /* 40 << 0 */ + { { 0xe4d7681db82e9f3eL,0x83200f0bdf25e13cL,0x8909984c66f27280L, + 0x462d7b0075f73227L }, + { 0xd90ba188f2651798L,0x74c6e18c36ab1c34L,0xab256ea35ef54359L, + 0x03466612d1aa702fL } }, + /* 41 << 0 */ + { { 0x624d60492ed22e91L,0x6fdfe0b56f072822L,0xeeca111539ce2271L, + 0x98100a4fdb01614fL }, + { 0xb6b0daa2a35c628fL,0xb6f94d2ec87e9a47L,0xc67732591d57d9ceL, + 0xf70bfeec03884a7bL } }, + /* 42 << 0 */ + { { 0x5fb35ccfed2bad01L,0xa155cbe31da6a5c7L,0xc2e2594c30a92f8fL, + 0x649c89ce5bfafe43L }, + { 0xd158667de9ff257aL,0x9b359611f32c50aeL,0x4b00b20b906014cfL, + 0xf3a8cfe389bc7d3dL } }, + /* 43 << 0 */ + { { 0x4ff23ffd248a7d06L,0x80c5bfb4878873faL,0xb7d9ad9005745981L, + 0x179c85db3db01994L }, + { 0xba41b06261a6966cL,0x4d82d052eadce5a8L,0x9e91cd3ba5e6a318L, + 0x47795f4f95b2dda0L } }, + /* 44 << 0 */ + { { 0xecfd7c1fd55a897cL,0x009194abb29110fbL,0x5f0e2046e381d3b0L, + 0x5f3425f6a98dd291L }, + { 0xbfa06687730d50daL,0x0423446c4b083b7fL,0x397a247dd69d3417L, + 0xeb629f90387ba42aL } }, + /* 45 << 0 */ + { { 0x1ee426ccd5cd79bfL,0x0032940b946c6e18L,0x1b1e8ae057477f58L, + 0xe94f7d346d823278L }, + { 0xc747cb96782ba21aL,0xc5254469f72b33a5L,0x772ef6dec7f80c81L, + 0xd73acbfe2cd9e6b5L } }, + /* 46 << 0 */ + { { 0x4075b5b149ee90d9L,0x785c339aa06e9ebaL,0xa1030d5babf825e0L, + 0xcec684c3a42931dcL }, + { 0x42ab62c9c1586e63L,0x45431d665ab43f2bL,0x57c8b2c055f7835dL, + 0x033da338c1b7f865L } }, + /* 47 << 0 */ + { { 0x283c7513caa76097L,0x0a624fa936c83906L,0x6b20afec715af2c7L, + 0x4b969974eba78bfdL }, + { 0x220755ccd921d60eL,0x9b944e107baeca13L,0x04819d515ded93d4L, + 0x9bbff86e6dddfd27L } }, + /* 48 << 0 */ + { { 0x6b34413077adc612L,0xa7496529bbd803a0L,0x1a1baaa76d8805bdL, + 0xc8403902470343adL }, + { 0x39f59f66175adff1L,0x0b26d7fbb7d8c5b7L,0xa875f5ce529d75e3L, + 0x85efc7e941325cc2L } }, + /* 49 << 0 */ + { { 0x21950b421ff6acd3L,0xffe7048453dc6909L,0xff4cd0b228766127L, + 0xabdbe6084fb7db2bL }, + { 0x837c92285e1109e8L,0x26147d27f4645b5aL,0x4d78f592f7818ed8L, + 0xd394077ef247fa36L } }, + /* 50 << 0 */ + { { 0x0fb9c2d0488c171aL,0xa78bfbaa13685278L,0xedfbe268d5b1fa6aL, + 0x0dceb8db2b7eaba7L }, + { 0xbf9e80899ae2b710L,0xefde7ae6a4449c96L,0x43b7716bcc143a46L, + 0xd7d34194c3628c13L } }, + /* 51 << 0 */ + { { 0x508cec1c3b3f64c9L,0xe20bc0ba1e5edf3fL,0xda1deb852f4318d4L, + 0xd20ebe0d5c3fa443L }, + { 0x370b4ea773241ea3L,0x61f1511c5e1a5f65L,0x99a5e23d82681c62L, + 0xd731e383a2f54c2dL } }, + /* 52 << 0 */ + { { 0x2692f36e83445904L,0x2e0ec469af45f9c0L,0x905a3201c67528b7L, + 0x88f77f34d0e5e542L }, + { 0xf67a8d295864687cL,0x23b92eae22df3562L,0x5c27014b9bbec39eL, + 0x7ef2f2269c0f0f8dL } }, + /* 53 << 0 */ + { { 0x97359638546c4d8dL,0x5f9c3fc492f24679L,0x912e8beda8c8acd9L, + 0xec3a318d306634b0L }, + { 0x80167f41c31cb264L,0x3db82f6f522113f2L,0xb155bcd2dcafe197L, + 0xfba1da5943465283L } }, + /* 54 << 0 */ + { { 0xa0425b8eb212cf53L,0x4f2e512ef8557c5fL,0xc1286ff925c4d56cL, + 0xbb8a0feaee26c851L }, + { 0xc28f70d2e7d6107eL,0x7ee0c444e76265aaL,0x3df277a41d1936b1L, + 0x1a556e3fea9595ebL } }, + /* 55 << 0 */ + { { 0x258bbbf9e7305683L,0x31eea5bf07ef5be6L,0x0deb0e4a46c814c1L, + 0x5cee8449a7b730ddL }, + { 0xeab495c5a0182bdeL,0xee759f879e27a6b4L,0xc2cf6a6880e518caL, + 0x25e8013ff14cf3f4L } }, + /* 56 << 0 */ + { { 0x8fc441407e8d7a14L,0xbb1ff3ca9556f36aL,0x6a84438514600044L, + 0xba3f0c4a7451ae63L }, + { 0xdfcac25b1f9af32aL,0x01e0db86b1f2214bL,0x4e9a5bc2a4b596acL, + 0x83927681026c2c08L } }, + /* 57 << 0 */ + { { 0x3ec832e77acaca28L,0x1bfeea57c7385b29L,0x068212e3fd1eaf38L, + 0xc13298306acf8cccL }, + { 0xb909f2db2aac9e59L,0x5748060db661782aL,0xc5ab2632c79b7a01L, + 0xda44c6c600017626L } }, + /* 58 << 0 */ + { { 0xf26c00e8a7ea82f0L,0x99cac80de4299aafL,0xd66fe3b67ed78be1L, + 0x305f725f648d02cdL }, + { 0x33ed1bc4623fb21bL,0xfa70533e7a6319adL,0x17ab562dbe5ffb3eL, + 0x0637499456674741L } }, + /* 59 << 0 */ + { { 0x69d44ed65c46aa8eL,0x2100d5d3a8d063d1L,0xcb9727eaa2d17c36L, + 0x4c2bab1b8add53b7L }, + { 0xa084e90c15426704L,0x778afcd3a837ebeaL,0x6651f7017ce477f8L, + 0xa062499846fb7a8bL } }, + /* 60 << 0 */ + { { 0xdc1e6828ed8a6e19L,0x33fc23364189d9c7L,0x026f8fe2671c39bcL, + 0xd40c4ccdbc6f9915L }, + { 0xafa135bbf80e75caL,0x12c651a022adff2cL,0xc40a04bd4f51ad96L, + 0x04820109bbe4e832L } }, + /* 61 << 0 */ + { { 0x3667eb1a7f4c04ccL,0x59556621a9404f84L,0x71cdf6537eceb50aL, + 0x994a44a69b8335faL }, + { 0xd7faf819dbeb9b69L,0x473c5680eed4350dL,0xb6658466da44bba2L, + 0x0d1bc780872bdbf3L } }, + /* 62 << 0 */ + { { 0xe535f175a1962f91L,0x6ed7e061ed58f5a7L,0x177aa4c02089a233L, + 0x0dbcb03ae539b413L }, + { 0xe3dc424ebb32e38eL,0x6472e5ef6806701eL,0xdd47ff98814be9eeL, + 0x6b60cfff35ace009L } }, + /* 63 << 0 */ + { { 0xb8d3d9319ff91fe5L,0x039c4800f0518eedL,0x95c376329182cb26L, + 0x0763a43482fc568dL }, + { 0x707c04d5383e76baL,0xac98b930824e8197L,0x92bf7c8f91230de0L, + 0x90876a0140959b70L } }, + /* 64 << 0 */ + { { 0xdb6d96f305968b80L,0x380a0913089f73b9L,0x7da70b83c2c61e01L, + 0x95fb8394569b38c7L }, + { 0x9a3c651280edfe2fL,0x8f726bb98faeaf82L,0x8010a4a078424bf8L, + 0x296720440e844970L } }, + /* 0 << 7 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 7 */ + { { 0x63c5cb817a2ad62aL,0x7ef2b6b9ac62ff54L,0x3749bba4b3ad9db5L, + 0xad311f2c46d5a617L }, + { 0xb77a8087c2ff3b6dL,0xb46feaf3367834ffL,0xf8aa266d75d6b138L, + 0xfa38d320ec008188L } }, + /* 2 << 7 */ + { { 0x486d8ffa696946fcL,0x50fbc6d8b9cba56dL,0x7e3d423e90f35a15L, + 0x7c3da195c0dd962cL }, + { 0xe673fdb03cfd5d8bL,0x0704b7c2889dfca5L,0xf6ce581ff52305aaL, + 0x399d49eb914d5e53L } }, + /* 3 << 7 */ + { { 0x380a496d6ec293cdL,0x733dbda78e7051f5L,0x037e388db849140aL, + 0xee4b32b05946dbf6L }, + { 0xb1c4fda9cae368d1L,0x5001a7b0fdb0b2f3L,0x6df593742e3ac46eL, + 0x4af675f239b3e656L } }, + /* 4 << 7 */ + { { 0x44e3811039949296L,0x5b63827b361db1b5L,0x3e5323ed206eaff5L, + 0x942370d2c21f4290L }, + { 0xf2caaf2ee0d985a1L,0x192cc64b7239846dL,0x7c0b8f47ae6312f8L, + 0x7dc61f9196620108L } }, + /* 5 << 7 */ + { { 0xb830fb5bc2da7de9L,0xd0e643df0ff8d3beL,0x31ee77ba188a9641L, + 0x4e8aa3aabcf6d502L }, + { 0xf9fb65329a49110fL,0xd18317f62dd6b220L,0x7e3ced4152c3ea5aL, + 0x0d296a147d579c4aL } }, + /* 6 << 7 */ + { { 0x35d6a53eed4c3717L,0x9f8240cf3d0ed2a3L,0x8c0d4d05e5543aa5L, + 0x45d5bbfbdd33b4b4L }, + { 0xfa04cc73137fd28eL,0x862ac6efc73b3ffdL,0x403ff9f531f51ef2L, + 0x34d5e0fcbc73f5a2L } }, + /* 7 << 7 */ + { { 0xf252682008913f4fL,0xea20ed61eac93d95L,0x51ed38b46ca6b26cL, + 0x8662dcbcea4327b0L }, + { 0x6daf295c725d2aaaL,0xbad2752f8e52dcdaL,0x2210e7210b17daccL, + 0xa37f7912d51e8232L } }, + /* 8 << 7 */ + { { 0x4f7081e144cc3addL,0xd5ffa1d687be82cfL,0x89890b6c0edd6472L, + 0xada26e1a3ed17863L }, + { 0x276f271563483caaL,0xe6924cd92f6077fdL,0x05a7fe980a466e3cL, + 0xf1c794b0b1902d1fL } }, + /* 9 << 7 */ + { { 0xe521368882a8042cL,0xd931cfafcd278298L,0x069a0ae0f597a740L, + 0x0adbb3f3eb59107cL }, + { 0x983e951e5eaa8eb8L,0xe663a8b511b48e78L,0x1631cc0d8a03f2c5L, + 0x7577c11e11e271e2L } }, + /* 10 << 7 */ + { { 0x33b2385c08369a90L,0x2990c59b190eb4f8L,0x819a6145c68eac80L, + 0x7a786d622ec4a014L }, + { 0x33faadbe20ac3a8dL,0x31a217815aba2d30L,0x209d2742dba4f565L, + 0xdb2ce9e355aa0fbbL } }, + /* 11 << 7 */ + { { 0x8cef334b168984dfL,0xe81dce1733879638L,0xf6e6949c263720f0L, + 0x5c56feaff593cbecL }, + { 0x8bff5601fde58c84L,0x74e241172eccb314L,0xbcf01b614c9a8a78L, + 0xa233e35e544c9868L } }, + /* 12 << 7 */ + { { 0xb3156bf38bd7aff1L,0x1b5ee4cb1d81b146L,0x7ba1ac41d628a915L, + 0x8f3a8f9cfd89699eL }, + { 0x7329b9c9a0748be7L,0x1d391c95a92e621fL,0xe51e6b214d10a837L, + 0xd255f53a4947b435L } }, + /* 13 << 7 */ + { { 0x07669e04f1788ee3L,0xc14f27afa86938a2L,0x8b47a334e93a01c0L, + 0xff627438d9366808L }, + { 0x7a0985d8ca2a5965L,0x3d9a5542d6e9b9b3L,0xc23eb80b4cf972e8L, + 0x5c1c33bb4fdf72fdL } }, + /* 14 << 7 */ + { { 0x0c4a58d474a86108L,0xf8048a8fee4c5d90L,0xe3c7c924e86d4c80L, + 0x28c889de056a1e60L }, + { 0x57e2662eb214a040L,0xe8c48e9837e10347L,0x8774286280ac748aL, + 0xf1c24022186b06f2L } }, + /* 15 << 7 */ + { { 0xac2dd4c35f74040aL,0x409aeb71fceac957L,0x4fbad78255c4ec23L, + 0xb359ed618a7b76ecL }, + { 0x12744926ed6f4a60L,0xe21e8d7f4b912de3L,0xe2575a59fc705a59L, + 0x72f1d4deed2dbc0eL } }, + /* 16 << 7 */ + { { 0x3d2b24b9eb7926b8L,0xbff88cb3cdbe5509L,0xd0f399afe4dd640bL, + 0x3c5fe1302f76ed45L }, + { 0x6f3562f43764fb3dL,0x7b5af3183151b62dL,0xd5bd0bc7d79ce5f3L, + 0xfdaf6b20ec66890fL } }, + /* 17 << 7 */ + { { 0x735c67ec6063540cL,0x50b259c2e5f9cb8fL,0xb8734f9a3f99c6abL, + 0xf8cc13d5a3a7bc85L }, + { 0x80c1b305c5217659L,0xfe5364d44ec12a54L,0xbd87045e681345feL, + 0x7f8efeb1582f897fL } }, + /* 18 << 7 */ + { { 0xe8cbf1e5d5923359L,0xdb0cea9d539b9fb0L,0x0c5b34cf49859b98L, + 0x5e583c56a4403cc6L }, + { 0x11fc1a2dd48185b7L,0xc93fbc7e6e521787L,0x47e7a05805105b8bL, + 0x7b4d4d58db8260c8L } }, + /* 19 << 7 */ + { { 0xe33930b046eb842aL,0x8e844a9a7bdae56dL,0x34ef3a9e13f7fdfcL, + 0xb3768f82636ca176L }, + { 0x2821f4e04e09e61cL,0x414dc3a1a0c7cddcL,0xd537943754945fcdL, + 0x151b6eefb3555ff1L } }, + /* 20 << 7 */ + { { 0xb31bd6136339c083L,0x39ff8155dfb64701L,0x7c3388d2e29604abL, + 0x1e19084ba6b10442L }, + { 0x17cf54c0eccd47efL,0x896933854a5dfb30L,0x69d023fb47daf9f6L, + 0x9222840b7d91d959L } }, + /* 21 << 7 */ + { { 0x439108f5803bac62L,0x0b7dd91d379bd45fL,0xd651e827ca63c581L, + 0x5c5d75f6509c104fL }, + { 0x7d5fc7381f2dc308L,0x20faa7bfd98454beL,0x95374beea517b031L, + 0xf036b9b1642692acL } }, + /* 22 << 7 */ + { { 0xc510610939842194L,0xb7e2353e49d05295L,0xfc8c1d5cefb42ee0L, + 0xe04884eb08ce811cL }, + { 0xf1f75d817419f40eL,0x5b0ac162a995c241L,0x120921bbc4c55646L, + 0x713520c28d33cf97L } }, + /* 23 << 7 */ + { { 0xb4a65a5ce98c5100L,0x6cec871d2ddd0f5aL,0x251f0b7f9ba2e78bL, + 0x224a8434ce3a2a5fL }, + { 0x26827f6125f5c46fL,0x6a22bedc48545ec0L,0x25ae5fa0b1bb5cdcL, + 0xd693682ffcb9b98fL } }, + /* 24 << 7 */ + { { 0x32027fe891e5d7d3L,0xf14b7d1773a07678L,0xf88497b3c0dfdd61L, + 0xf7c2eec02a8c4f48L }, + { 0xaa5573f43756e621L,0xc013a2401825b948L,0x1c03b34563878572L, + 0xa0472bea653a4184L } }, + /* 25 << 7 */ + { { 0xf4222e270ac69a80L,0x34096d25f51e54f6L,0x00a648cb8fffa591L, + 0x4e87acdc69b6527fL }, + { 0x0575e037e285ccb4L,0x188089e450ddcf52L,0xaa96c9a8870ff719L, + 0x74a56cd81fc7e369L } }, + /* 26 << 7 */ + { { 0x41d04ee21726931aL,0x0bbbb2c83660ecfdL,0xa6ef6de524818e18L, + 0xe421cc51e7d57887L }, + { 0xf127d208bea87be6L,0x16a475d3b1cdd682L,0x9db1b684439b63f7L, + 0x5359b3dbf0f113b6L } }, + /* 27 << 7 */ + { { 0xdfccf1de8bf06e31L,0x1fdf8f44dd383901L,0x10775cad5017e7d2L, + 0xdfc3a59758d11eefL }, + { 0x6ec9c8a0b1ecff10L,0xee6ed6cc28400549L,0xb5ad7bae1b4f8d73L, + 0x61b4f11de00aaab9L } }, + /* 28 << 7 */ + { { 0x7b32d69bd4eff2d7L,0x88ae67714288b60fL,0x159461b437a1e723L, + 0x1f3d4789570aae8cL }, + { 0x869118c07f9871daL,0x35fbda78f635e278L,0x738f3641e1541dacL, + 0x6794b13ac0dae45fL } }, + /* 29 << 7 */ + { { 0x065064ac09cc0917L,0x27c53729c68540fdL,0x0d2d4c8eef227671L, + 0xd23a9f80a1785a04L }, + { 0x98c5952852650359L,0xfa09ad0174a1acadL,0x082d5a290b55bf5cL, + 0xa40f1c67419b8084L } }, + /* 30 << 7 */ + { { 0x3a5c752edcc18770L,0x4baf1f2f8825c3a5L,0xebd63f7421b153edL, + 0xa2383e47b2f64723L }, + { 0xe7bf620a2646d19aL,0x56cb44ec03c83ffdL,0xaf7267c94f6be9f1L, + 0x8b2dfd7bc06bb5e9L } }, + /* 31 << 7 */ + { { 0xb87072f2a672c5c7L,0xeacb11c80d53c5e2L,0x22dac29dff435932L, + 0x37bdb99d4408693cL }, + { 0xf6e62fb62899c20fL,0x3535d512447ece24L,0xfbdc6b88ff577ce3L, + 0x726693bd190575f2L } }, + /* 32 << 7 */ + { { 0x6772b0e5ab4b35a2L,0x1d8b6001f5eeaacfL,0x728f7ce4795b9580L, + 0x4a20ed2a41fb81daL }, + { 0x9f685cd44fec01e6L,0x3ed7ddcca7ff50adL,0x460fd2640c2d97fdL, + 0x3a241426eb82f4f9L } }, + /* 33 << 7 */ + { { 0x17d1df2c6a8ea820L,0xb2b50d3bf22cc254L,0x03856cbab7291426L, + 0x87fd26ae04f5ee39L }, + { 0x9cb696cc02bee4baL,0x5312180406820fd6L,0xa5dfc2690212e985L, + 0x666f7ffa160f9a09L } }, + /* 34 << 7 */ + { { 0xc503cd33bccd9617L,0x365dede4ba7730a3L,0x798c63555ddb0786L, + 0xa6c3200efc9cd3bcL }, + { 0x060ffb2ce5e35efdL,0x99a4e25b5555a1c1L,0x11d95375f70b3751L, + 0x0a57354a160e1bf6L } }, + /* 35 << 7 */ + { { 0xecb3ae4bf8e4b065L,0x07a834c42e53022bL,0x1cd300b38692ed96L, + 0x16a6f79261ee14ecL }, + { 0x8f1063c66a8649edL,0xfbcdfcfe869f3e14L,0x2cfb97c100a7b3ecL, + 0xcea49b3c7130c2f1L } }, + /* 36 << 7 */ + { { 0x462d044fe9d96488L,0x4b53d52e8182a0c1L,0x84b6ddd30391e9e9L, + 0x80ab7b48b1741a09L }, + { 0xec0e15d427d3317fL,0x8dfc1ddb1a64671eL,0x93cc5d5fd49c5b92L, + 0xc995d53d3674a331L } }, + /* 37 << 7 */ + { { 0x302e41ec090090aeL,0x2278a0ccedb06830L,0x1d025932fbc99690L, + 0x0c32fbd2b80d68daL }, + { 0xd79146daf341a6c1L,0xae0ba1391bef68a0L,0xc6b8a5638d774b3aL, + 0x1cf307bd880ba4d7L } }, + /* 38 << 7 */ + { { 0xc033bdc719803511L,0xa9f97b3b8888c3beL,0x3d68aebc85c6d05eL, + 0xc3b88a9d193919ebL }, + { 0x2d300748c48b0ee3L,0x7506bc7c07a746c1L,0xfc48437c6e6d57f3L, + 0x5bd71587cfeaa91aL } }, + /* 39 << 7 */ + { { 0xa4ed0408c1bc5225L,0xd0b946db2719226dL,0x109ecd62758d2d43L, + 0x75c8485a2751759bL }, + { 0xb0b75f499ce4177aL,0x4fa61a1e79c10c3dL,0xc062d300a167fcd7L, + 0x4df3874c750f0fa8L } }, + /* 40 << 7 */ + { { 0x29ae2cf983dfedc9L,0xf84371348d87631aL,0xaf5717117429c8d2L, + 0x18d15867146d9272L }, + { 0x83053ecf69769bb7L,0xc55eb856c479ab82L,0x5ef7791c21b0f4b2L, + 0xaa5956ba3d491525L } }, + /* 41 << 7 */ + { { 0x407a96c29fe20ebaL,0xf27168bbe52a5ad3L,0x43b60ab3bf1d9d89L, + 0xe45c51ef710e727aL }, + { 0xdfca5276099b4221L,0x8dc6407c2557a159L,0x0ead833591035895L, + 0x0a9db9579c55dc32L } }, + /* 42 << 7 */ + { { 0xe40736d3df61bc76L,0x13a619c03f778cdbL,0x6dd921a4c56ea28fL, + 0x76a524332fa647b4L }, + { 0x23591891ac5bdc5dL,0xff4a1a72bac7dc01L,0x9905e26162df8453L, + 0x3ac045dfe63b265fL } }, + /* 43 << 7 */ + { { 0x8a3f341bad53dba7L,0x8ec269cc837b625aL,0xd71a27823ae31189L, + 0x8fb4f9a355e96120L }, + { 0x804af823ff9875cfL,0x23224f575d442a9bL,0x1c4d3b9eecc62679L, + 0x91da22fba0e7ddb1L } }, + /* 44 << 7 */ + { { 0xa370324d6c04a661L,0x9710d3b65e376d17L,0xed8c98f03044e357L, + 0xc364ebbe6422701cL }, + { 0x347f5d517733d61cL,0xd55644b9cea826c3L,0x80c6e0ad55a25548L, + 0x0aa7641d844220a7L } }, + /* 45 << 7 */ + { { 0x1438ec8131810660L,0x9dfa6507de4b4043L,0x10b515d8cc3e0273L, + 0x1b6066dd28d8cfb2L }, + { 0xd3b045919c9efebdL,0x425d4bdfa21c1ff4L,0x5fe5af19d57607d3L, + 0xbbf773f754481084L } }, + /* 46 << 7 */ + { { 0x8435bd6994b03ed1L,0xd9ad1de3634cc546L,0x2cf423fc00e420caL, + 0xeed26d80a03096ddL }, + { 0xd7f60be7a4db09d2L,0xf47f569d960622f7L,0xe5925fd77296c729L, + 0xeff2db2626ca2715L } }, + /* 47 << 7 */ + { { 0xa6fcd014b913e759L,0x53da47868ff4de93L,0x14616d79c32068e1L, + 0xb187d664ccdf352eL }, + { 0xf7afb6501dc90b59L,0x8170e9437daa1b26L,0xc8e3bdd8700c0a84L, + 0x6e8d345f6482bdfaL } }, + /* 48 << 7 */ + { { 0x84cfbfa1c5c5ea50L,0xd3baf14c67960681L,0x263984030dd50942L, + 0xe4b7839c4716a663L }, + { 0xd5f1f794e7de6dc0L,0x5cd0f4d4622aa7ceL,0x5295f3f159acfeecL, + 0x8d933552953e0607L } }, + /* 49 << 7 */ + { { 0xc7db8ec5776c5722L,0xdc467e622b5f290cL,0xd4297e704ff425a9L, + 0x4be924c10cf7bb72L }, + { 0x0d5dc5aea1892131L,0x8bf8a8e3a705c992L,0x73a0b0647a305ac5L, + 0x00c9ca4e9a8c77a8L } }, + /* 50 << 7 */ + { { 0x5dfee80f83774bddL,0x6313160285734485L,0xa1b524ae914a69a9L, + 0xebc2ffafd4e300d7L }, + { 0x52c93db77cfa46a5L,0x71e6161f21653b50L,0x3574fc57a4bc580aL, + 0xc09015dde1bc1253L } }, + /* 51 << 7 */ + { { 0x4b7b47b2d174d7aaL,0x4072d8e8f3a15d04L,0xeeb7d47fd6fa07edL, + 0x6f2b9ff9edbdafb1L }, + { 0x18c516153760fe8aL,0x7a96e6bff06c6c13L,0x4d7a04100ea2d071L, + 0xa1914e9b0be2a5ceL } }, + /* 52 << 7 */ + { { 0x5726e357d8a3c5cfL,0x1197ecc32abb2b13L,0x6c0d7f7f31ae88ddL, + 0x15b20d1afdbb3efeL }, + { 0xcd06aa2670584039L,0x2277c969a7dc9747L,0xbca695877855d815L, + 0x899ea2385188b32aL } }, + /* 53 << 7 */ + { { 0x37d9228b760c1c9dL,0xc7efbb119b5c18daL,0x7f0d1bc819f6dbc5L, + 0x4875384b07e6905bL }, + { 0xc7c50baa3ba8cd86L,0xb0ce40fbc2905de0L,0x708406737a231952L, + 0xa912a262cf43de26L } }, + /* 54 << 7 */ + { { 0x9c38ddcceb5b76c1L,0x746f528526fc0ab4L,0x52a63a50d62c269fL, + 0x60049c5599458621L }, + { 0xe7f48f823c2f7c9eL,0x6bd99043917d5cf3L,0xeb1317a88701f469L, + 0xbd3fe2ed9a449fe0L } }, + /* 55 << 7 */ + { { 0x421e79ca12ef3d36L,0x9ee3c36c3e7ea5deL,0xe48198b5cdff36f7L, + 0xaff4f967c6b82228L }, + { 0x15e19dd0c47adb7eL,0x45699b23032e7dfaL,0x40680c8b1fae026aL, + 0x5a347a48550dbf4dL } }, + /* 56 << 7 */ + { { 0xe652533b3cef0d7dL,0xd94f7b182bbb4381L,0x838752be0e80f500L, + 0x8e6e24889e9c9bfbL }, + { 0xc975169716caca6aL,0x866c49d838531ad9L,0xc917e2397151ade1L, + 0x2d016ec16037c407L } }, + /* 57 << 7 */ + { { 0xa407ccc900eac3f9L,0x835f6280e2ed4748L,0xcc54c3471cc98e0dL, + 0x0e969937dcb572ebL }, + { 0x1b16c8e88f30c9cbL,0xa606ae75373c4661L,0x47aa689b35502cabL, + 0xf89014ae4d9bb64fL } }, + /* 58 << 7 */ + { { 0x202f6a9c31c71f7bL,0x01f95aa3296ffe5cL,0x5fc0601453cec3a3L, + 0xeb9912375f498a45L }, + { 0xae9a935e5d91ba87L,0xc6ac62810b564a19L,0x8a8fe81c3bd44e69L, + 0x7c8b467f9dd11d45L } }, + /* 59 << 7 */ + { { 0xf772251fea5b8e69L,0xaeecb3bdc5b75fbcL,0x1aca3331887ff0e5L, + 0xbe5d49ff19f0a131L }, + { 0x582c13aae5c8646fL,0xdbaa12e820e19980L,0x8f40f31af7abbd94L, + 0x1f13f5a81dfc7663L } }, + /* 60 << 7 */ + { { 0x5d81f1eeaceb4fc0L,0x362560025e6f0f42L,0x4b67d6d7751370c8L, + 0x2608b69803e80589L }, + { 0xcfc0d2fc05268301L,0xa6943d3940309212L,0x192a90c21fd0e1c2L, + 0xb209f11337f1dc76L } }, + /* 61 << 7 */ + { { 0xefcc5e0697bf1298L,0xcbdb6730219d639eL,0xd009c116b81e8c6fL, + 0xa3ffdde31a7ce2e5L }, + { 0xc53fbaaaa914d3baL,0x836d500f88df85eeL,0xd98dc71b66ee0751L, + 0x5a3d7005714516fdL } }, + /* 62 << 7 */ + { { 0x21d3634d39eedbbaL,0x35cd2e680455a46dL,0xc8cafe65f9d7eb0cL, + 0xbda3ce9e00cefb3eL }, + { 0xddc17a602c9cf7a4L,0x01572ee47bcb8773L,0xa92b2b018c7548dfL, + 0x732fd309a84600e3L } }, + /* 63 << 7 */ + { { 0xe22109c716543a40L,0x9acafd36fede3c6cL,0xfb2068526824e614L, + 0x2a4544a9da25dca0L }, + { 0x2598526291d60b06L,0x281b7be928753545L,0xec667b1a90f13b27L, + 0x33a83aff940e2eb4L } }, + /* 64 << 7 */ + { { 0x80009862d5d721d5L,0x0c3357a35bd3a182L,0x27f3a83b7aa2cda4L, + 0xb58ae74ef6f83085L }, + { 0x2a911a812e6dad6bL,0xde286051f43d6c5bL,0x4bdccc41f996c4d8L, + 0xe7312ec00ae1e24eL } }, + /* 0 << 14 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 14 */ + { { 0xf8d112e76e6485b3L,0x4d3e24db771c52f8L,0x48e3ee41684a2f6dL, + 0x7161957d21d95551L }, + { 0x19631283cdb12a6cL,0xbf3fa8822e50e164L,0xf6254b633166cc73L, + 0x3aefa7aeaee8cc38L } }, + /* 2 << 14 */ + { { 0x79b0fe623b36f9fdL,0x26543b23fde19fc0L,0x136e64a0958482efL, + 0x23f637719b095825L }, + { 0x14cfd596b6a1142eL,0x5ea6aac6335aac0bL,0x86a0e8bdf3081dd5L, + 0x5fb89d79003dc12aL } }, + /* 3 << 14 */ + { { 0xf615c33af72e34d4L,0x0bd9ea40110eec35L,0x1c12bc5bc1dea34eL, + 0x686584c949ae4699L }, + { 0x13ad95d38c97b942L,0x4609561a4e5c7562L,0x9e94a4aef2737f89L, + 0xf57594c6371c78b6L } }, + /* 4 << 14 */ + { { 0x0f0165fce3779ee3L,0xe00e7f9dbd495d9eL,0x1fa4efa220284e7aL, + 0x4564bade47ac6219L }, + { 0x90e6312ac4708e8eL,0x4f5725fba71e9adfL,0xe95f55ae3d684b9fL, + 0x47f7ccb11e94b415L } }, + /* 5 << 14 */ + { { 0x7322851b8d946581L,0xf0d13133bdf4a012L,0xa3510f696584dae0L, + 0x03a7c1713c9f6c6dL }, + { 0x5be97f38e475381aL,0xca1ba42285823334L,0xf83cc5c70be17ddaL, + 0x158b14940b918c0fL } }, + /* 6 << 14 */ + { { 0xda3a77e5522e6b69L,0x69c908c3bbcd6c18L,0x1f1b9e48d924fd56L, + 0x37c64e36aa4bb3f7L }, + { 0x5a4fdbdfee478d7dL,0xba75c8bc0193f7a0L,0x84bc1e8456cd16dfL, + 0x1fb08f0846fad151L } }, + /* 7 << 14 */ + { { 0x8a7cabf9842e9f30L,0xa331d4bf5eab83afL,0xd272cfba017f2a6aL, + 0x27560abc83aba0e3L }, + { 0x94b833870e3a6b75L,0x25c6aea26b9f50f5L,0x803d691db5fdf6d0L, + 0x03b77509e6333514L } }, + /* 8 << 14 */ + { { 0x3617890361a341c1L,0x3604dc600cfd6142L,0x022295eb8533316cL, + 0x3dbde4ac44af2922L }, + { 0x898afc5d1c7eef69L,0x58896805d14f4fa1L,0x05002160203c21caL, + 0x6f0d1f3040ef730bL } }, + /* 9 << 14 */ + { { 0x8e8c44d4196224f8L,0x75a4ab95374d079dL,0x79085ecc7d48f123L, + 0x56f04d311bf65ad8L }, + { 0xe220bf1cbda602b2L,0x73ee1742f9612c69L,0x76008fc8084fd06bL, + 0x4000ef9ff11380d1L } }, + /* 10 << 14 */ + { { 0x48201b4b12cfe297L,0x3eee129c292f74e5L,0xe1fe114ec9e874e8L, + 0x899b055c92c5fc41L }, + { 0x4e477a643a39c8cfL,0x82f09efe78963cc9L,0x6fd3fd8fd333f863L, + 0x85132b2adc949c63L } }, + /* 11 << 14 */ + { { 0x7e06a3ab516eb17bL,0x73bec06fd2c7372bL,0xe4f74f55ba896da6L, + 0xbb4afef88e9eb40fL }, + { 0x2d75bec8e61d66b0L,0x02bda4b4ef29300bL,0x8bbaa8de026baa5aL, + 0xff54befda07f4440L } }, + /* 12 << 14 */ + { { 0xbd9b8b1dbe7a2af3L,0xec51caa94fb74a72L,0xb9937a4b63879697L, + 0x7c9a9d20ec2687d5L }, + { 0x1773e44f6ef5f014L,0x8abcf412e90c6900L,0x387bd0228142161eL, + 0x50393755fcb6ff2aL } }, + /* 13 << 14 */ + { { 0x9813fd56ed6def63L,0x53cf64827d53106cL,0x991a35bd431f7ac1L, + 0xf1e274dd63e65fafL }, + { 0xf63ffa3c44cc7880L,0x411a426b7c256981L,0xb698b9fd93a420e0L, + 0x89fdddc0ae53f8feL } }, + /* 14 << 14 */ + { { 0x766e072232398baaL,0x205fee425cfca031L,0xa49f53417a029cf2L, + 0xa88c68b84023890dL }, + { 0xbc2750417337aaa8L,0x9ed364ad0eb384f4L,0xe0816f8529aba92fL, + 0x2e9e194104e38a88L } }, + /* 15 << 14 */ + { { 0x57eef44a3dafd2d5L,0x35d1fae597ed98d8L,0x50628c092307f9b1L, + 0x09d84aaed6cba5c6L }, + { 0x67071bc788aaa691L,0x2dea57a9afe6cb03L,0xdfe11bb43d78ac01L, + 0x7286418c7fd7aa51L } }, + /* 16 << 14 */ + { { 0xfabf770977f7195aL,0x8ec86167adeb838fL,0xea1285a8bb4f012dL, + 0xd68835039a3eab3fL }, + { 0xee5d24f8309004c2L,0xa96e4b7613ffe95eL,0x0cdffe12bd223ea4L, + 0x8f5c2ee5b6739a53L } }, + /* 17 << 14 */ + { { 0x5cb4aaa5dd968198L,0xfa131c5272413a6cL,0x53d46a909536d903L, + 0xb270f0d348606d8eL }, + { 0x518c7564a053a3bcL,0x088254b71a86caefL,0xb3ba8cb40ab5efd0L, + 0x5c59900e4605945dL } }, + /* 18 << 14 */ + { { 0xecace1dda1887395L,0x40960f36932a65deL,0x9611ff5c3aa95529L, + 0xc58215b07c1e5a36L }, + { 0xd48c9b58f0e1a524L,0xb406856bf590dfb8L,0xc7605e049cd95662L, + 0x0dd036eea33ecf82L } }, + /* 19 << 14 */ + { { 0xa50171acc33156b3L,0xf09d24ea4a80172eL,0x4e1f72c676dc8eefL, + 0xe60caadc5e3d44eeL }, + { 0x006ef8a6979b1d8fL,0x60908a1c97788d26L,0x6e08f95b266feec0L, + 0x618427c222e8c94eL } }, + /* 20 << 14 */ + { { 0x3d61333959145a65L,0xcd9bc368fa406337L,0x82d11be32d8a52a0L, + 0xf6877b2797a1c590L }, + { 0x837a819bf5cbdb25L,0x2a4fd1d8de090249L,0x622a7de774990e5fL, + 0x840fa5a07945511bL } }, + /* 21 << 14 */ + { { 0x30b974be6558842dL,0x70df8c6417f3d0a6L,0x7c8035207542e46dL, + 0x7251fe7fe4ecc823L }, + { 0xe59134cb5e9aac9aL,0x11bb0934f0045d71L,0x53e5d9b5dbcb1d4eL, + 0x8d97a90592defc91L } }, + /* 22 << 14 */ + { { 0xfe2893277946d3f9L,0xe132bd2407472273L,0xeeeb510c1eb6ae86L, + 0x777708c5f0595067L }, + { 0x18e2c8cd1297029eL,0x2c61095cbbf9305eL,0xe466c2586b85d6d9L, + 0x8ac06c36da1ea530L } }, + /* 23 << 14 */ + { { 0xa365dc39a1304668L,0xe4a9c88507f89606L,0x65a4898facc7228dL, + 0x3e2347ff84ca8303L }, + { 0xa5f6fb77ea7d23a3L,0x2fac257d672a71cdL,0x6908bef87e6a44d3L, + 0x8ff87566891d3d7aL } }, + /* 24 << 14 */ + { { 0xe58e90b36b0cf82eL,0x6438d2462615b5e7L,0x07b1f8fc669c145aL, + 0xb0d8b2da36f1e1cbL }, + { 0x54d5dadbd9184c4dL,0x3dbb18d5f93d9976L,0x0a3e0f56d1147d47L, + 0x2afa8c8da0a48609L } }, + /* 25 << 14 */ + { { 0x275353e8bc36742cL,0x898f427eeea0ed90L,0x26f4947e3e477b00L, + 0x8ad8848a308741e3L }, + { 0x6c703c38d74a2a46L,0x5e3e05a99ba17ba2L,0xc1fa6f664ab9a9e4L, + 0x474a2d9a3841d6ecL } }, + /* 26 << 14 */ + { { 0x871239ad653ae326L,0x14bcf72aa74cbb43L,0x8737650e20d4c083L, + 0x3df86536110ed4afL }, + { 0xd2d86fe7b53ca555L,0x688cb00dabd5d538L,0xcf81bda31ad38468L, + 0x7ccfe3ccf01167b6L } }, + /* 27 << 14 */ + { { 0xcf4f47e06c4c1fe6L,0x557e1f1a298bbb79L,0xf93b974f30d45a14L, + 0x174a1d2d0baf97c4L }, + { 0x7a003b30c51fbf53L,0xd8940991ee68b225L,0x5b0aa7b71c0f4173L, + 0x975797c9a20a7153L } }, + /* 28 << 14 */ + { { 0x26e08c07e3533d77L,0xd7222e6a2e341c99L,0x9d60ec3d8d2dc4edL, + 0xbdfe0d8f7c476cf8L }, + { 0x1fe59ab61d056605L,0xa9ea9df686a8551fL,0x8489941e47fb8d8cL, + 0xfeb874eb4a7f1b10L } }, + /* 29 << 14 */ + { { 0xfe5fea867ee0d98fL,0x201ad34bdbf61864L,0x45d8fe4737c031d4L, + 0xd5f49fae795f0822L }, + { 0xdb0fb291c7f4a40cL,0x2e69d9c1730ddd92L,0x754e105449d76987L, + 0x8a24911d7662db87L } }, + /* 30 << 14 */ + { { 0x61fc181060a71676L,0xe852d1a8f66a8ad1L,0x172bbd656417231eL, + 0x0d6de7bd3babb11fL }, + { 0x6fde6f88c8e347f8L,0x1c5875479bd99cc3L,0x78e54ed034076950L, + 0x97f0f334796e83baL } }, + /* 31 << 14 */ + { { 0xe4dbe1ce4924867aL,0xbd5f51b060b84917L,0x375300403cb09a79L, + 0xdb3fe0f8ff1743d8L }, + { 0xed7894d8556fa9dbL,0xfa26216923412fbfL,0x563be0dbba7b9291L, + 0x6ca8b8c00c9fb234L } }, + /* 32 << 14 */ + { { 0xed406aa9bd763802L,0xc21486a065303da1L,0x61ae291ec7e62ec4L, + 0x622a0492df99333eL }, + { 0x7fd80c9dbb7a8ee0L,0xdc2ed3bc6c01aedbL,0x35c35a1208be74ecL, + 0xd540cb1a469f671fL } }, + /* 33 << 14 */ + { { 0xd16ced4ecf84f6c7L,0x8561fb9c2d090f43L,0x7e693d796f239db4L, + 0xa736f92877bd0d94L }, + { 0x07b4d9292c1950eeL,0xda17754356dc11b3L,0xa5dfbbaa7a6a878eL, + 0x1c70cb294decb08aL } }, + /* 34 << 14 */ + { { 0xfba28c8b6f0f7c50L,0xa8eba2b8854dcc6dL,0x5ff8e89a36b78642L, + 0x070c1c8ef6873adfL }, + { 0xbbd3c3716484d2e4L,0xfb78318f0d414129L,0x2621a39c6ad93b0bL, + 0x979d74c2a9e917f7L } }, + /* 35 << 14 */ + { { 0xfc19564761fb0428L,0x4d78954abee624d4L,0xb94896e0b8ae86fdL, + 0x6667ac0cc91c8b13L }, + { 0x9f18051243bcf832L,0xfbadf8b7a0010137L,0xc69b4089b3ba8aa7L, + 0xfac4bacde687ce85L } }, + /* 36 << 14 */ + { { 0x9164088d977eab40L,0x51f4c5b62760b390L,0xd238238f340dd553L, + 0x358566c3db1d31c9L }, + { 0x3a5ad69e5068f5ffL,0xf31435fcdaff6b06L,0xae549a5bd6debff0L, + 0x59e5f0b775e01331L } }, + /* 37 << 14 */ + { { 0x5d492fb898559acfL,0x96018c2e4db79b50L,0x55f4a48f609f66aaL, + 0x1943b3af4900a14fL }, + { 0xc22496df15a40d39L,0xb2a446844c20f7c5L,0x76a35afa3b98404cL, + 0xbec75725ff5d1b77L } }, + /* 38 << 14 */ + { { 0xb67aa163bea06444L,0x27e95bb2f724b6f2L,0x3c20e3e9d238c8abL, + 0x1213754eddd6ae17L }, + { 0x8c431020716e0f74L,0x6679c82effc095c2L,0x2eb3adf4d0ac2932L, + 0x2cc970d301bb7a76L } }, + /* 39 << 14 */ + { { 0x70c71f2f740f0e66L,0x545c616b2b6b23ccL,0x4528cfcbb40a8bd7L, + 0xff8396332ab27722L }, + { 0x049127d9025ac99aL,0xd314d4a02b63e33bL,0xc8c310e728d84519L, + 0x0fcb8983b3bc84baL } }, + /* 40 << 14 */ + { { 0x2cc5226138634818L,0x501814f4b44c2e0bL,0xf7e181aa54dfdba3L, + 0xcfd58ff0e759718cL }, + { 0xf90cdb14d3b507a8L,0x57bd478ec50bdad8L,0x29c197e250e5f9aaL, + 0x4db6eef8e40bc855L } }, + /* 41 << 14 */ + { { 0x2cc8f21ad1fc0654L,0xc71cc96381269d73L,0xecfbb204077f49f9L, + 0xdde92571ca56b793L }, + { 0x9abed6a3f97ad8f7L,0xe6c19d3f924de3bdL,0x8dce92f4a140a800L, + 0x85f44d1e1337af07L } }, + /* 42 << 14 */ + { { 0x5953c08b09d64c52L,0xa1b5e49ff5df9749L,0x336a8fb852735f7dL, + 0xb332b6db9add676bL }, + { 0x558b88a0b4511aa4L,0x09788752dbd5cc55L,0x16b43b9cd8cd52bdL, + 0x7f0bc5a0c2a2696bL } }, + /* 43 << 14 */ + { { 0x146e12d4c11f61efL,0x9ce107543a83e79eL,0x08ec73d96cbfca15L, + 0x09ff29ad5b49653fL }, + { 0xe31b72bde7da946eL,0xebf9eb3bee80a4f2L,0xd1aabd0817598ce4L, + 0x18b5fef453f37e80L } }, + /* 44 << 14 */ + { { 0xd5d5cdd35958cd79L,0x3580a1b51d373114L,0xa36e4c91fa935726L, + 0xa38c534def20d760L }, + { 0x7088e40a2ff5845bL,0xe5bb40bdbd78177fL,0x4f06a7a8857f9920L, + 0xe3cc3e50e968f05dL } }, + /* 45 << 14 */ + { { 0x1d68b7fee5682d26L,0x5206f76faec7f87cL,0x41110530041951abL, + 0x58ec52c1d4b5a71aL }, + { 0xf3488f990f75cf9aL,0xf411951fba82d0d5L,0x27ee75be618895abL, + 0xeae060d46d8aab14L } }, + /* 46 << 14 */ + { { 0x9ae1df737fb54dc2L,0x1f3e391b25963649L,0x242ec32afe055081L, + 0x5bd450ef8491c9bdL }, + { 0x367efc67981eb389L,0xed7e19283a0550d5L,0x362e776bab3ce75cL, + 0xe890e3081f24c523L } }, + /* 47 << 14 */ + { { 0xb961b682feccef76L,0x8b8e11f58bba6d92L,0x8f2ccc4c2b2375c4L, + 0x0d7f7a52e2f86cfaL }, + { 0xfd94d30a9efe5633L,0x2d8d246b5451f934L,0x2234c6e3244e6a00L, + 0xde2b5b0dddec8c50L } }, + /* 48 << 14 */ + { { 0x2ce53c5abf776f5bL,0x6f72407160357b05L,0xb259371771bf3f7aL, + 0x87d2501c440c4a9fL }, + { 0x440552e187b05340L,0xb7bf7cc821624c32L,0x4155a6ce22facddbL, + 0x5a4228cb889837efL } }, + /* 49 << 14 */ + { { 0xef87d6d6fd4fd671L,0xa233687ec2daa10eL,0x7562224403c0eb96L, + 0x7632d1848bf19be6L }, + { 0x05d0f8e940735ff4L,0x3a3e6e13c00931f1L,0x31ccde6adafe3f18L, + 0xf381366acfe51207L } }, + /* 50 << 14 */ + { { 0x24c222a960167d92L,0x62f9d6f87529f18cL,0x412397c00353b114L, + 0x334d89dcef808043L }, + { 0xd9ec63ba2a4383ceL,0xcec8e9375cf92ba0L,0xfb8b4288c8be74c0L, + 0x67d6912f105d4391L } }, + /* 51 << 14 */ + { { 0x7b996c461b913149L,0x36aae2ef3a4e02daL,0xb68aa003972de594L, + 0x284ec70d4ec6d545L }, + { 0xf3d2b2d061391d54L,0x69c5d5d6fe114e92L,0xbe0f00b5b4482dffL, + 0xe1596fa5f5bf33c5L } }, + /* 52 << 14 */ + { { 0x10595b5696a71cbaL,0x944938b2fdcadeb7L,0xa282da4cfccd8471L, + 0x98ec05f30d37bfe1L }, + { 0xe171ce1b0698304aL,0x2d69144421bdf79bL,0xd0cd3b741b21dec1L, + 0x712ecd8b16a15f71L } }, + /* 53 << 14 */ + { { 0x8d4c00a700fd56e1L,0x02ec9692f9527c18L,0x21c449374a3e42e1L, + 0x9176fbab1392ae0aL }, + { 0x8726f1ba44b7b618L,0xb4d7aae9f1de491cL,0xf91df7b907b582c0L, + 0x7e116c30ef60aa3aL } }, + /* 54 << 14 */ + { { 0x99270f81466265d7L,0xb15b6fe24df7adf0L,0xfe33b2d3f9738f7fL, + 0x48553ab9d6d70f95L }, + { 0x2cc72ac8c21e94dbL,0x795ac38dbdc0bbeeL,0x0a1be4492e40478fL, + 0x81bd3394052bde55L } }, + /* 55 << 14 */ + { { 0x63c8dbe956b3c4f2L,0x017a99cf904177ccL,0x947bbddb4d010fc1L, + 0xacf9b00bbb2c9b21L }, + { 0x2970bc8d47173611L,0x1a4cbe08ac7d756fL,0x06d9f4aa67d541a2L, + 0xa3e8b68959c2cf44L } }, + /* 56 << 14 */ + { { 0xaad066da4d88f1ddL,0xc604f1657ad35deaL,0x7edc07204478ca67L, + 0xa10dfae0ba02ce06L }, + { 0xeceb1c76af36f4e4L,0x994b2292af3f8f48L,0xbf9ed77b77c8a68cL, + 0x74f544ea51744c9dL } }, + /* 57 << 14 */ + { { 0x82d05bb98113a757L,0x4ef2d2b48a9885e4L,0x1e332be51aa7865fL, + 0x22b76b18290d1a52L }, + { 0x308a231044351683L,0x9d861896a3f22840L,0x5959ddcd841ed947L, + 0x0def0c94154b73bfL } }, + /* 58 << 14 */ + { { 0xf01054174c7c15e0L,0x539bfb023a277c32L,0xe699268ef9dccf5fL, + 0x9f5796a50247a3bdL }, + { 0x8b839de84f157269L,0xc825c1e57a30196bL,0x6ef0aabcdc8a5a91L, + 0xf4a8ce6c498b7fe6L } }, + /* 59 << 14 */ + { { 0x1cce35a770cbac78L,0x83488e9bf6b23958L,0x0341a070d76cb011L, + 0xda6c9d06ae1b2658L }, + { 0xb701fb30dd648c52L,0x994ca02c52fb9fd1L,0x069331176f563086L, + 0x3d2b810017856babL } }, + /* 60 << 14 */ + { { 0xe89f48c85963a46eL,0x658ab875a99e61c7L,0x6e296f874b8517b4L, + 0x36c4fcdcfc1bc656L }, + { 0xde5227a1a3906defL,0x9fe95f5762418945L,0x20c91e81fdd96cdeL, + 0x5adbe47eda4480deL } }, + /* 61 << 14 */ + { { 0xa009370f396de2b6L,0x98583d4bf0ecc7bdL,0xf44f6b57e51d0672L, + 0x03d6b078556b1984L }, + { 0x27dbdd93b0b64912L,0x9b3a343415687b09L,0x0dba646151ec20a9L, + 0xec93db7fff28187cL } }, + /* 62 << 14 */ + { { 0x00ff8c2466e48bddL,0x2514f2f911ccd78eL,0xeba11f4fe1250603L, + 0x8a22cd41243fa156L }, + { 0xa4e58df4b283e4c6L,0x78c298598b39783fL,0x5235aee2a5259809L, + 0xc16284b50e0227ddL } }, + /* 63 << 14 */ + { { 0xa5f579161338830dL,0x6d4b8a6bd2123fcaL,0x236ea68af9c546f8L, + 0xc1d36873fa608d36L }, + { 0xcd76e4958d436d13L,0xd4d9c2218fb080afL,0x665c1728e8ad3fb5L, + 0xcf1ebe4db3d572e0L } }, + /* 64 << 14 */ + { { 0xa7a8746a584c5e20L,0x267e4ea1b9dc7035L,0x593a15cfb9548c9bL, + 0x5e6e21354bd012f3L }, + { 0xdf31cc6a8c8f936eL,0x8af84d04b5c241dcL,0x63990a6f345efb86L, + 0x6fef4e61b9b962cbL } }, + /* 0 << 21 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 21 */ + { { 0xf6368f0925722608L,0x131260db131cf5c6L,0x40eb353bfab4f7acL, + 0x85c7888037eee829L }, + { 0x4c1581ffc3bdf24eL,0x5bff75cbf5c3c5a8L,0x35e8c83fa14e6f40L, + 0xb81d1c0f0295e0caL } }, + /* 2 << 21 */ + { { 0xfcde7cc8f43a730fL,0xe89b6f3c33ab590eL,0xc823f529ad03240bL, + 0x82b79afe98bea5dbL }, + { 0x568f2856962fe5deL,0x0c590adb60c591f3L,0x1fc74a144a28a858L, + 0x3b662498b3203f4cL } }, + /* 3 << 21 */ + { { 0x91e3cf0d6c39765aL,0xa2db3acdac3cca0bL,0x288f2f08cb953b50L, + 0x2414582ccf43cf1aL }, + { 0x8dec8bbc60eee9a8L,0x54c79f02729aa042L,0xd81cd5ec6532f5d5L, + 0xa672303acf82e15fL } }, + /* 4 << 21 */ + { { 0x376aafa8719c0563L,0xcd8ad2dcbc5fc79fL,0x303fdb9fcb750cd3L, + 0x14ff052f4418b08eL }, + { 0xf75084cf3e2d6520L,0x7ebdf0f8144ed509L,0xf43bf0f2d3f25b98L, + 0x86ad71cfa354d837L } }, + /* 5 << 21 */ + { { 0xb827fe9226f43572L,0xdfd3ab5b5d824758L,0x315dd23a539094c1L, + 0x85c0e37a66623d68L }, + { 0x575c79727be19ae0L,0x616a3396df0d36b5L,0xa1ebb3c826b1ff7eL, + 0x635b9485140ad453L } }, + /* 6 << 21 */ + { { 0x92bf3cdada430c0bL,0x4702850e3a96dac6L,0xc91cf0a515ac326aL, + 0x95de4f49ab8c25e4L }, + { 0xb01bad09e265c17cL,0x24e45464087b3881L,0xd43e583ce1fac5caL, + 0xe17cb3186ead97a6L } }, + /* 7 << 21 */ + { { 0x6cc3924374dcec46L,0x33cfc02d54c2b73fL,0x82917844f26cd99cL, + 0x8819dd95d1773f89L }, + { 0x09572aa60871f427L,0x8e0cf365f6f01c34L,0x7fa52988bff1f5afL, + 0x4eb357eae75e8e50L } }, + /* 8 << 21 */ + { { 0xd9d0c8c4868af75dL,0xd7325cff45c8c7eaL,0xab471996cc81ecb0L, + 0xff5d55f3611824edL }, + { 0xbe3145411977a0eeL,0x5085c4c5722038c6L,0x2d5335bff94bb495L, + 0x894ad8a6c8e2a082L } }, + /* 9 << 21 */ + { { 0x5c3e2341ada35438L,0xf4a9fc89049b8c4eL,0xbeeb355a9f17cf34L, + 0x3f311e0e6c91fe10L }, + { 0xc2d2003892ab9891L,0x257bdcc13e8ce9a9L,0x1b2d978988c53beeL, + 0x927ce89acdba143aL } }, + /* 10 << 21 */ + { { 0xb0a32cca523db280L,0x5c889f8a50d43783L,0x503e04b34897d16fL, + 0x8cdb6e7808f5f2e8L }, + { 0x6ab91cf0179c8e74L,0xd8874e5248211d60L,0xf948d4d5ea851200L, + 0x4076d41ee6f9840aL } }, + /* 11 << 21 */ + { { 0xc20e263c47b517eaL,0x79a448fd30685e5eL,0xe55f6f78f90631a0L, + 0x88a790b1a79e6346L }, + { 0x62160c7d80969fe8L,0x54f92fd441491bb9L,0xa6645c235c957526L, + 0xf44cc5aebea3ce7bL } }, + /* 12 << 21 */ + { { 0xf76283278b1e68b7L,0xc731ad7a303f29d3L,0xfe5a9ca957d03ecbL, + 0x96c0d50c41bc97a7L }, + { 0xc4669fe79b4f7f24L,0xfdd781d83d9967efL,0x7892c7c35d2c208dL, + 0x8bf64f7cae545cb3L } }, + /* 13 << 21 */ + { { 0xc01f862c467be912L,0xf4c85ee9c73d30ccL,0x1fa6f4be6ab83ec7L, + 0xa07a3c1c4e3e3cf9L }, + { 0x87f8ef450c00beb3L,0x30e2c2b3000d4c3eL,0x1aa00b94fe08bf5bL, + 0x32c133aa9224ef52L } }, + /* 14 << 21 */ + { { 0x38df16bb32e5685dL,0x68a9e06958e6f544L,0x495aaff7cdc5ebc6L, + 0xf894a645378b135fL }, + { 0xf316350a09e27ecfL,0xeced201e58f7179dL,0x2eec273ce97861baL, + 0x47ec2caed693be2eL } }, + /* 15 << 21 */ + { { 0xfa4c97c4f68367ceL,0xe4f47d0bbe5a5755L,0x17de815db298a979L, + 0xd7eca659c177dc7dL }, + { 0x20fdbb7149ded0a3L,0x4cb2aad4fb34d3c5L,0x2cf31d2860858a33L, + 0x3b6873efa24aa40fL } }, + /* 16 << 21 */ + { { 0x540234b22c11bb37L,0x2d0366dded4c74a3L,0xf9a968daeec5f25dL, + 0x3660106867b63142L }, + { 0x07cd6d2c68d7b6d4L,0xa8f74f090c842942L,0xe27514047768b1eeL, + 0x4b5f7e89fe62aee4L } }, + /* 17 << 21 */ + { { 0xc6a7717789070d26L,0xa1f28e4edd1c8bc7L,0xea5f4f06469e1f17L, + 0x78fc242afbdb78e0L }, + { 0xc9c7c5928b0588f1L,0xb6b7a0fd1535921eL,0xcc5bdb91bde5ae35L, + 0xb42c485e12ff1864L } }, + /* 18 << 21 */ + { { 0xa1113e13dbab98aaL,0xde9d469ba17b1024L,0x23f48b37c0462d3aL, + 0x3752e5377c5c078dL }, + { 0xe3a86add15544eb9L,0xf013aea780fba279L,0x8b5bb76cf22001b5L, + 0xe617ba14f02891abL } }, + /* 19 << 21 */ + { { 0xd39182a6936219d3L,0x5ce1f194ae51cb19L,0xc78f8598bf07a74cL, + 0x6d7158f222cbf1bcL }, + { 0x3b846b21e300ce18L,0x35fba6302d11275dL,0x5fe25c36a0239b9bL, + 0xd8beb35ddf05d940L } }, + /* 20 << 21 */ + { { 0x4db02bb01f7e320dL,0x0641c3646da320eaL,0x6d95fa5d821389a3L, + 0x926997488fcd8e3dL }, + { 0x316fef17ceb6c143L,0x67fcb841d933762bL,0xbb837e35118b17f8L, + 0x4b92552f9fd24821L } }, + /* 21 << 21 */ + { { 0xae6bc70e46aca793L,0x1cf0b0e4e579311bL,0x8dc631be5802f716L, + 0x099bdc6fbddbee4dL }, + { 0xcc352bb20caf8b05L,0xf74d505a72d63df2L,0xb9876d4b91c4f408L, + 0x1ce184739e229b2dL } }, + /* 22 << 21 */ + { { 0x4950759783abdb4aL,0x850fbcb6dee84b18L,0x6325236e609e67dcL, + 0x04d831d99336c6d8L }, + { 0x8deaae3bfa12d45dL,0xe425f8ce4746e246L,0x8004c17524f5f31eL, + 0xaca16d8fad62c3b7L } }, + /* 23 << 21 */ + { { 0x0dc15a6a9152f934L,0xf1235e5ded0e12c1L,0xc33c06ecda477dacL, + 0x76be8732b2ea0006L }, + { 0xcf3f78310c0cd313L,0x3c524553a614260dL,0x31a756f8cab22d15L, + 0x03ee10d177827a20L } }, + /* 24 << 21 */ + { { 0xd1e059b21994ef20L,0x2a653b69638ae318L,0x70d5eb582f699010L, + 0x279739f709f5f84aL }, + { 0x5da4663c8b799336L,0xfdfdf14d203c37ebL,0x32d8a9dca1dbfb2dL, + 0xab40cff077d48f9bL } }, + /* 25 << 21 */ + { { 0xc018b383d20b42d5L,0xf9a810ef9f78845fL,0x40af3753bdba9df0L, + 0xb90bdcfc131dfdf9L }, + { 0x18720591f01ab782L,0xc823f2116af12a88L,0xa51b80f30dc14401L, + 0xde248f77fb2dfbe3L } }, + /* 26 << 21 */ + { { 0xef5a44e50cafe751L,0x73997c9cd4dcd221L,0x32fd86d1de854024L, + 0xd5b53adca09b84bbL }, + { 0x008d7a11dcedd8d1L,0x406bd1c874b32c84L,0x5d4472ff05dde8b1L, + 0x2e25f2cdfce2b32fL } }, + /* 27 << 21 */ + { { 0xbec0dd5e29dfc254L,0x4455fcf62b98b267L,0x0b4d43a5c72df2adL, + 0xea70e6be48a75397L }, + { 0x2aad61695820f3bfL,0xf410d2dd9e37f68fL,0x70fb7dba7be5ac83L, + 0x636bb64536ec3eecL } }, + /* 28 << 21 */ + { { 0x27104ea39754e21cL,0xbc87a3e68d63c373L,0x483351d74109db9aL, + 0x0fa724e360134da7L }, + { 0x9ff44c29b0720b16L,0x2dd0cf1306aceeadL,0x5942758ce26929a6L, + 0x96c5db92b766a92bL } }, + /* 29 << 21 */ + { { 0xcec7d4c05f18395eL,0xd3f227441f80d032L,0x7a68b37acb86075bL, + 0x074764ddafef92dbL }, + { 0xded1e9507bc7f389L,0xc580c850b9756460L,0xaeeec2a47da48157L, + 0x3f0b4e7f82c587b3L } }, + /* 30 << 21 */ + { { 0x231c6de8a9f19c53L,0x5717bd736974e34eL,0xd9e1d216f1508fa9L, + 0x9f112361dadaa124L }, + { 0x80145e31823b7348L,0x4dd8f0d5ac634069L,0xe3d82fc72297c258L, + 0x276fcfee9cee7431L } }, + /* 31 << 21 */ + { { 0x8eb61b5e2bc0aea9L,0x4f668fd5de329431L,0x03a32ab138e4b87eL, + 0xe137451773d0ef0bL }, + { 0x1a46f7e6853ac983L,0xc3bdf42e68e78a57L,0xacf207852ea96dd1L, + 0xa10649b9f1638460L } }, + /* 32 << 21 */ + { { 0xf2369f0b879fbbedL,0x0ff0ae86da9d1869L,0x5251d75956766f45L, + 0x4984d8c02be8d0fcL }, + { 0x7ecc95a6d21008f0L,0x29bd54a03a1a1c49L,0xab9828c5d26c50f3L, + 0x32c0087c51d0d251L } }, + /* 33 << 21 */ + { { 0x9bac3ce60c1cdb26L,0xcd94d947557ca205L,0x1b1bd5989db1fdcdL, + 0x0eda0108a3d8b149L }, + { 0x9506661056152fccL,0xc2f037e6e7192b33L,0xdeffb41ac92e05a4L, + 0x1105f6c2c2f6c62eL } }, + /* 34 << 21 */ + { { 0x68e735008733913cL,0xcce861633f3adc40L,0xf407a94238a278e9L, + 0xd13c1b9d2ab21292L }, + { 0x93ed7ec71c74cf5cL,0x8887dc48f1a4c1b4L,0x3830ff304b3a11f1L, + 0x358c5a3c58937cb6L } }, + /* 35 << 21 */ + { { 0x027dc40489022829L,0x40e939773b798f79L,0x90ad333738be6eadL, + 0x9c23f6bcf34c0a5dL }, + { 0xd1711a35fbffd8bbL,0x60fcfb491949d3ddL,0x09c8ef4b7825d93aL, + 0x24233cffa0a8c968L } }, + /* 36 << 21 */ + { { 0x67ade46ce6d982afL,0xebb6bf3ee7544d7cL,0xd6b9ba763d8bd087L, + 0x46fe382d4dc61280L }, + { 0xbd39a7e8b5bdbd75L,0xab381331b8f228feL,0x0709a77cce1c4300L, + 0x6a247e56f337ceacL } }, + /* 37 << 21 */ + { { 0x8f34f21b636288beL,0x9dfdca74c8a7c305L,0x6decfd1bea919e04L, + 0xcdf2688d8e1991f8L }, + { 0xe607df44d0f8a67eL,0xd985df4b0b58d010L,0x57f834c50c24f8f4L, + 0xe976ef56a0bf01aeL } }, + /* 38 << 21 */ + { { 0x536395aca1c32373L,0x351027aa734c0a13L,0xd2f1b5d65e6bd5bcL, + 0x2b539e24223debedL }, + { 0xd4994cec0eaa1d71L,0x2a83381d661dcf65L,0x5f1aed2f7b54c740L, + 0x0bea3fa5d6dda5eeL } }, + /* 39 << 21 */ + { { 0x9d4fb68436cc6134L,0x8eb9bbf3c0a443ddL,0xfc500e2e383b7d2aL, + 0x7aad621c5b775257L }, + { 0x69284d740a8f7cc0L,0xe820c2ce07562d65L,0xbf9531b9499758eeL, + 0x73e95ca56ee0cc2dL } }, + /* 40 << 21 */ + { { 0xf61790abfbaf50a5L,0xdf55e76b684e0750L,0xec516da7f176b005L, + 0x575553bb7a2dddc7L }, + { 0x37c87ca3553afa73L,0x315f3ffc4d55c251L,0xe846442aaf3e5d35L, + 0x61b911496495ff28L } }, + /* 41 << 21 */ + { { 0x23cc95d3fa326dc3L,0x1df4da1f18fc2ceaL,0x24bf9adcd0a37d59L, + 0xb6710053320d6e1eL }, + { 0x96f9667e618344d1L,0xcc7ce042a06445afL,0xa02d8514d68dbc3aL, + 0x4ea109e4280b5a5bL } }, + /* 42 << 21 */ + { { 0x5741a7acb40961bfL,0x4ada59376aa56bfaL,0x7feb914502b765d1L, + 0x561e97bee6ad1582L }, + { 0xbbc4a5b6da3982f5L,0x0c2659edb546f468L,0xb8e7e6aa59612d20L, + 0xd83dfe20ac19e8e0L } }, + /* 43 << 21 */ + { { 0x8530c45fb835398cL,0x6106a8bfb38a41c2L,0x21e8f9a635f5dcdbL, + 0x39707137cae498edL }, + { 0x70c23834d8249f00L,0x9f14b58fab2537a0L,0xd043c3655f61c0c2L, + 0xdc5926d609a194a7L } }, + /* 44 << 21 */ + { { 0xddec03398e77738aL,0xd07a63effba46426L,0x2e58e79cee7f6e86L, + 0xe59b0459ff32d241L }, + { 0xc5ec84e520fa0338L,0x97939ac8eaff5aceL,0x0310a4e3b4a38313L, + 0x9115fba28f9d9885L } }, + /* 45 << 21 */ + { { 0x8dd710c25fadf8c3L,0x66be38a2ce19c0e2L,0xd42a279c4cfe5022L, + 0x597bb5300e24e1b8L }, + { 0x3cde86b7c153ca7fL,0xa8d30fb3707d63bdL,0xac905f92bd60d21eL, + 0x98e7ffb67b9a54abL } }, + /* 46 << 21 */ + { { 0xd7147df8e9726a30L,0xb5e216ffafce3533L,0xb550b7992ff1ec40L, + 0x6b613b87a1e953fdL }, + { 0x87b88dba792d5610L,0x2ee1270aa190fbe1L,0x02f4e2dc2ef581daL, + 0x016530e4eff82a95L } }, + /* 47 << 21 */ + { { 0xcbb93dfd8fd6ee89L,0x16d3d98646848fffL,0x600eff241da47adfL, + 0x1b9754a00ad47a71L }, + { 0x8f9266df70c33b98L,0xaadc87aedf34186eL,0x0d2ce8e14ad24132L, + 0x8a47cbfc19946ebaL } }, + /* 48 << 21 */ + { { 0x47feeb6662b5f3afL,0xcefab5610abb3734L,0x449de60e19f35cb1L, + 0x39f8db14157f0eb9L }, + { 0xffaecc5b3c61bfd6L,0xa5a4d41d41216703L,0x7f8fabed224e1cc2L, + 0x0d5a8186871ad953L } }, + /* 49 << 21 */ + { { 0xf10774f7d22da9a9L,0x45b8a678cc8a9b0dL,0xd9c2e722bdc32cffL, + 0xbf71b5f5337202a5L }, + { 0x95c57f2f69fc4db9L,0xb6dad34c765d01e1L,0x7e0bd13fcb904635L, + 0x61751253763a588cL } }, + /* 50 << 21 */ + { { 0xd85c299781af2c2dL,0xc0f7d9c481b9d7daL,0x838a34ae08533e8dL, + 0x15c4cb08311d8311L }, + { 0x97f832858e121e14L,0xeea7dc1e85000a5fL,0x0c6059b65d256274L, + 0xec9beaceb95075c0L } }, + /* 51 << 21 */ + { { 0x173daad71df97828L,0xbf851cb5a8937877L,0xb083c59401646f3cL, + 0x3bad30cf50c6d352L }, + { 0xfeb2b202496bbceaL,0x3cf9fd4f18a1e8baL,0xd26de7ff1c066029L, + 0x39c81e9e4e9ed4f8L } }, + /* 52 << 21 */ + { { 0xd8be0cb97b390d35L,0x01df2bbd964aab27L,0x3e8c1a65c3ef64f8L, + 0x567291d1716ed1ddL }, + { 0x95499c6c5f5406d3L,0x71fdda395ba8e23fL,0xcfeb320ed5096eceL, + 0xbe7ba92bca66dd16L } }, + /* 53 << 21 */ + { { 0x4608d36bc6fb5a7dL,0xe3eea15a6d2dd0e0L,0x75b0a3eb8f97a36aL, + 0xf59814cc1c83de1eL }, + { 0x56c9c5b01c33c23fL,0xa96c1da46faa4136L,0x46bf2074de316551L, + 0x3b866e7b1f756c8fL } }, + /* 54 << 21 */ + { { 0x727727d81495ed6bL,0xb2394243b682dce7L,0x8ab8454e758610f3L, + 0xc243ce84857d72a4L }, + { 0x7b320d71dbbf370fL,0xff9afa3778e0f7caL,0x0119d1e0ea7b523fL, + 0xb997f8cb058c7d42L } }, + /* 55 << 21 */ + { { 0x285bcd2a37bbb184L,0x51dcec49a45d1fa6L,0x6ade3b64e29634cbL, + 0x080c94a726b86ef1L }, + { 0xba583db12283fbe3L,0x902bddc85a9315edL,0x07c1ccb386964becL, + 0x78f4eacfb6258301L } }, + /* 56 << 21 */ + { { 0x4bdf3a4956f90823L,0xba0f5080741d777bL,0x091d71c3f38bf760L, + 0x9633d50f9b625b02L }, + { 0x03ecb743b8c9de61L,0xb47512545de74720L,0x9f9defc974ce1cb2L, + 0x774a4f6a00bd32efL } }, + /* 57 << 21 */ + { { 0xaca385f773848f22L,0x53dad716f3f8558eL,0xab7b34b093c471f9L, + 0xf530e06919644bc7L }, + { 0x3d9fb1ffdd59d31aL,0x4382e0df08daa795L,0x165c6f4bd5cc88d7L, + 0xeaa392d54a18c900L } }, + /* 58 << 21 */ + { { 0x94203c67648024eeL,0x188763f28c2fabcdL,0xa80f87acbbaec835L, + 0x632c96e0f29d8d54L }, + { 0x29b0a60e4c00a95eL,0x2ef17f40e011e9faL,0xf6c0e1d115b77223L, + 0xaaec2c6214b04e32L } }, + /* 59 << 21 */ + { { 0xd35688d83d84e58cL,0x2af5094c958571dbL,0x4fff7e19760682a6L, + 0x4cb27077e39a407cL }, + { 0x0f59c5474ff0e321L,0x169f34a61b34c8ffL,0x2bff109652bc1ba7L, + 0xa25423b783583544L } }, + /* 60 << 21 */ + { { 0x5d55d5d50ac8b782L,0xff6622ec2db3c892L,0x48fce7416b8bb642L, + 0x31d6998c69d7e3dcL }, + { 0xdbaf8004cadcaed0L,0x801b0142d81d053cL,0x94b189fc59630ec6L, + 0x120e9934af762c8eL } }, + /* 61 << 21 */ + { { 0x53a29aa4fdc6a404L,0x19d8e01ea1909948L,0x3cfcabf1d7e89681L, + 0x3321a50d4e132d37L }, + { 0xd0496863e9a86111L,0x8c0cde6106a3bc65L,0xaf866c49fc9f8eefL, + 0x2066350eff7f5141L } }, + /* 62 << 21 */ + { { 0x4f8a4689e56ddfbdL,0xea1b0c07fe32983aL,0x2b317462873cb8cbL, + 0x658deddc2d93229fL }, + { 0x65efaf4d0f64ef58L,0xfe43287d730cc7a8L,0xaebc0c723d047d70L, + 0x92efa539d92d26c9L } }, + /* 63 << 21 */ + { { 0x06e7845794b56526L,0x415cb80f0961002dL,0x89e5c56576dcb10fL, + 0x8bbb6982ff9259feL }, + { 0x4fe8795b9abc2668L,0xb5d4f5341e678fb1L,0x6601f3be7b7da2b9L, + 0x98da59e2a13d6805L } }, + /* 64 << 21 */ + { { 0x190d8ea601799a52L,0xa20cec41b86d2952L,0x3062ffb27fff2a7cL, + 0x741b32e579f19d37L }, + { 0xf80d81814eb57d47L,0x7a2d0ed416aef06bL,0x09735fb01cecb588L, + 0x1641caaac6061f5bL } }, + /* 0 << 28 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 28 */ + { { 0x7f99824f20151427L,0x206828b692430206L,0xaa9097d7e1112357L, + 0xacf9a2f209e414ecL }, + { 0xdbdac9da27915356L,0x7e0734b7001efee3L,0x54fab5bbd2b288e2L, + 0x4c630fc4f62dd09cL } }, + /* 2 << 28 */ + { { 0x8537107a1ac2703bL,0xb49258d86bc857b5L,0x57df14debcdaccd1L, + 0x24ab68d7c4ae8529L }, + { 0x7ed8b5d4734e59d0L,0x5f8740c8c495cc80L,0x84aedd5a291db9b3L, + 0x80b360f84fb995beL } }, + /* 3 << 28 */ + { { 0xae915f5d5fa067d1L,0x4134b57f9668960cL,0xbd3656d6a48edaacL, + 0xdac1e3e4fc1d7436L }, + { 0x674ff869d81fbb26L,0x449ed3ecb26c33d4L,0x85138705d94203e8L, + 0xccde538bbeeb6f4aL } }, + /* 4 << 28 */ + { { 0x55d5c68da61a76faL,0x598b441dca1554dcL,0xd39923b9773b279cL, + 0x33331d3c36bf9efcL }, + { 0x2d4c848e298de399L,0xcfdb8e77a1a27f56L,0x94c855ea57b8ab70L, + 0xdcdb9dae6f7879baL } }, + /* 5 << 28 */ + { { 0x7bdff8c2019f2a59L,0xb3ce5bb3cb4fbc74L,0xea907f688a9173ddL, + 0x6cd3d0d395a75439L }, + { 0x92ecc4d6efed021cL,0x09a9f9b06a77339aL,0x87ca6b157188c64aL, + 0x10c2996844899158L } }, + /* 6 << 28 */ + { { 0x5859a229ed6e82efL,0x16f338e365ebaf4eL,0x0cd313875ead67aeL, + 0x1c73d22854ef0bb4L }, + { 0x4cb5513174a5c8c7L,0x01cd29707f69ad6aL,0xa04d00dde966f87eL, + 0xd96fe4470b7b0321L } }, + /* 7 << 28 */ + { { 0x342ac06e88fbd381L,0x02cd4a845c35a493L,0xe8fa89de54f1bbcdL, + 0x341d63672575ed4cL }, + { 0xebe357fbd238202bL,0x600b4d1aa984ead9L,0xc35c9f4452436ea0L, + 0x96fe0a39a370751bL } }, + /* 8 << 28 */ + { { 0x4c4f07367f636a38L,0x9f943fb70e76d5cbL,0xb03510baa8b68b8bL, + 0xc246780a9ed07a1fL }, + { 0x3c0514156d549fc2L,0xc2953f31607781caL,0x955e2c69d8d95413L, + 0xb300fadc7bd282e3L } }, + /* 9 << 28 */ + { { 0x81fe7b5087e9189fL,0xdb17375cf42dda27L,0x22f7d896cf0a5904L, + 0xa0e57c5aebe348e6L }, + { 0xa61011d3f40e3c80L,0xb11893218db705c5L,0x4ed9309e50fedec3L, + 0xdcf14a104d6d5c1dL } }, + /* 10 << 28 */ + { { 0x056c265b55691342L,0xe8e0850491049dc7L,0x131329f5c9bae20aL, + 0x96c8b3e8d9dccdb4L }, + { 0x8c5ff838fb4ee6b4L,0xfc5a9aeb41e8ccf0L,0x7417b764fae050c6L, + 0x0953c3d700452080L } }, + /* 11 << 28 */ + { { 0x2137268238dfe7e8L,0xea417e152bb79d4bL,0x59641f1c76e7cf2dL, + 0x271e3059ea0bcfccL }, + { 0x624c7dfd7253ecbdL,0x2f552e254fca6186L,0xcbf84ecd4d866e9cL, + 0x73967709f68d4610L } }, + /* 12 << 28 */ + { { 0xa14b1163c27901b4L,0xfd9236e0899b8bf3L,0x42b091eccbc6da0aL, + 0xbb1dac6f5ad1d297L }, + { 0x80e61d53a91cf76eL,0x4110a412d31f1ee7L,0x2d87c3ba13efcf77L, + 0x1f374bb4df450d76L } }, + /* 13 << 28 */ + { { 0x5e78e2f20d188dabL,0xe3968ed0f4b885efL,0x46c0568e7314570fL, + 0x3161633801170521L }, + { 0x18e1e7e24f0c8afeL,0x4caa75ffdeea78daL,0x82db67f27c5d8a51L, + 0x36a44d866f505370L } }, + /* 14 << 28 */ + { { 0xd72c5bda0333974fL,0x5db516ae27a70146L,0x34705281210ef921L, + 0xbff17a8f0c9c38e5L }, + { 0x78f4814e12476da1L,0xc1e1661333c16980L,0x9e5b386f424d4bcaL, + 0x4c274e87c85740deL } }, + /* 15 << 28 */ + { { 0xb6a9b88d6c2f5226L,0x14d1b944550d7ca8L,0x580c85fc1fc41709L, + 0xc1da368b54c6d519L }, + { 0x2b0785ced5113cf7L,0x0670f6335a34708fL,0x46e2376715cc3f88L, + 0x1b480cfa50c72c8fL } }, + /* 16 << 28 */ + { { 0x202886024147519aL,0xd0981eac26b372f0L,0xa9d4a7caa785ebc8L, + 0xd953c50ddbdf58e9L }, + { 0x9d6361ccfd590f8fL,0x72e9626b44e6c917L,0x7fd9611022eb64cfL, + 0x863ebb7e9eb288f3L } }, + /* 17 << 28 */ + { { 0x6e6ab7616aca8ee7L,0x97d10b39d7b40358L,0x1687d3771e5feb0dL, + 0xc83e50e48265a27aL }, + { 0x8f75a9fec954b313L,0xcc2e8f47310d1f61L,0xf5ba81c56557d0e0L, + 0x25f9680c3eaf6207L } }, + /* 18 << 28 */ + { { 0xf95c66094354080bL,0x5225bfa57bf2fe1cL,0xc5c004e25c7d98faL, + 0x3561bf1c019aaf60L }, + { 0x5e6f9f17ba151474L,0xdec2f934b04f6ecaL,0x64e368a1269acb1eL, + 0x1332d9e40cdda493L } }, + /* 19 << 28 */ + { { 0x60d6cf69df23de05L,0x66d17da2009339a0L,0x9fcac9850a693923L, + 0xbcf057fced7c6a6dL }, + { 0xc3c5c8c5f0b5662cL,0x25318dd8dcba4f24L,0x60e8cb75082b69ffL, + 0x7c23b3ee1e728c01L } }, + /* 20 << 28 */ + { { 0x15e10a0a097e4403L,0xcb3d0a8619854665L,0x88d8e211d67d4826L, + 0xb39af66e0b9d2839L }, + { 0xa5f94588bd475ca8L,0xe06b7966c077b80bL,0xfedb1485da27c26cL, + 0xd290d33afe0fd5e0L } }, + /* 21 << 28 */ + { { 0xa40bcc47f34fb0faL,0xb4760cc81fb1ab09L,0x8fca0993a273bfe3L, + 0x13e4fe07f70b213cL }, + { 0x3bcdb992fdb05163L,0x8c484b110c2b19b6L,0x1acb815faaf2e3e2L, + 0xc6905935b89ff1b4L } }, + /* 22 << 28 */ + { { 0xb2ad6f9d586e74e1L,0x488883ad67b80484L,0x758aa2c7369c3ddbL, + 0x8ab74e699f9afd31L }, + { 0x10fc2d285e21beb1L,0x3484518a318c42f9L,0x377427dc53cf40c3L, + 0x9de0781a391bc1d9L } }, + /* 23 << 28 */ + { { 0x8faee858693807e1L,0xa38653274e81ccc7L,0x02c30ff26f835b84L, + 0xb604437b0d3d38d4L }, + { 0xb3fc8a985ca1823dL,0xb82f7ec903be0324L,0xee36d761cf684a33L, + 0x5a01df0e9f29bf7dL } }, + /* 24 << 28 */ + { { 0x686202f31306583dL,0x05b10da0437c622eL,0xbf9aaa0f076a7bc8L, + 0x25e94efb8f8f4e43L }, + { 0x8a35c9b7fa3dc26dL,0xe0e5fb9396ff03c5L,0xa77e3843ebc394ceL, + 0xcede65958361de60L } }, + /* 25 << 28 */ + { { 0xd27c22f6a1993545L,0xab01cc3624d671baL,0x63fa2877a169c28eL, + 0x925ef9042eb08376L }, + { 0x3b2fa3cf53aa0b32L,0xb27beb5b71c49d7aL,0xb60e1834d105e27fL, + 0xd60897884f68570dL } }, + /* 26 << 28 */ + { { 0x23094ce0d6fbc2acL,0x738037a1815ff551L,0xda73b1bb6bef119cL, + 0xdcf6c430eef506baL }, + { 0x00e4fe7be3ef104aL,0xebdd9a2c0a065628L,0x853a81c38792043eL, + 0x22ad6eceb3b59108L } }, + /* 27 << 28 */ + { { 0x9fb813c039cd297dL,0x8ec7e16e05bda5d9L,0x2834797c0d104b96L, + 0xcc11a2e77c511510L }, + { 0x96ca5a5396ee6380L,0x054c8655cea38742L,0xb5946852d54dfa7dL, + 0x97c422e71f4ab207L } }, + /* 28 << 28 */ + { { 0xbf9075090c22b540L,0x2cde42aab7c267d4L,0xba18f9ed5ab0d693L, + 0x3ba62aa66e4660d9L }, + { 0xb24bf97bab9ea96aL,0x5d039642e3b60e32L,0x4e6a45067c4d9bd5L, + 0x666c5b9e7ed4a6a4L } }, + /* 29 << 28 */ + { { 0xfa3fdcd98edbd7ccL,0x4660bb87c6ccd753L,0x9ae9082021e6b64fL, + 0x8a56a713b36bfb3fL }, + { 0xabfce0965726d47fL,0x9eed01b20b1a9a7fL,0x30e9cad44eb74a37L, + 0x7b2524cc53e9666dL } }, + /* 30 << 28 */ + { { 0x6a29683b8f4b002fL,0xc2200d7a41f4fc20L,0xcf3af47a3a338accL, + 0x6539a4fbe7128975L }, + { 0xcec31c14c33c7fcfL,0x7eb6799bc7be322bL,0x119ef4e96646f623L, + 0x7b7a26a554d7299bL } }, + /* 31 << 28 */ + { { 0xcb37f08d403f46f2L,0x94b8fc431a0ec0c7L,0xbb8514e3c332142fL, + 0xf3ed2c33e80d2a7aL }, + { 0x8d2080afb639126cL,0xf7b6be60e3553adeL,0x3950aa9f1c7e2b09L, + 0x847ff9586410f02bL } }, + /* 32 << 28 */ + { { 0x877b7cf5678a31b0L,0xd50301ae3998b620L,0x734257c5c00fb396L, + 0xf9fb18a004e672a6L }, + { 0xff8bd8ebe8758851L,0x1e64e4c65d99ba44L,0x4b8eaedf7dfd93b7L, + 0xba2f2a9804e76b8cL } }, + /* 33 << 28 */ + { { 0x7d790cbae8053433L,0xc8e725a03d2c9585L,0x58c5c476cdd8f5edL, + 0xd106b952efa9fe1dL }, + { 0x3c5c775b0eff13a9L,0x242442bae057b930L,0xe9f458d4c9b70cbdL, + 0x69b71448a3cdb89aL } }, + /* 34 << 28 */ + { { 0x41ee46f60e2ed742L,0x573f104540067493L,0xb1e154ff9d54c304L, + 0x2ad0436a8d3a7502L }, + { 0xee4aaa2d431a8121L,0xcd38b3ab886f11edL,0x57d49ea6034a0eb7L, + 0xd2b773bdf7e85e58L } }, + /* 35 << 28 */ + { { 0x4a559ac49b5c1f14L,0xc444be1a3e54df2bL,0x13aad704eda41891L, + 0xcd927bec5eb5c788L }, + { 0xeb3c8516e48c8a34L,0x1b7ac8124b546669L,0x1815f896594df8ecL, + 0x87c6a79c79227865L } }, + /* 36 << 28 */ + { { 0xae02a2f09b56ddbdL,0x1339b5ac8a2f1cf3L,0xf2b569c7839dff0dL, + 0xb0b9e864fee9a43dL }, + { 0x4ff8ca4177bb064eL,0x145a2812fd249f63L,0x3ab7beacf86f689aL, + 0x9bafec2701d35f5eL } }, + /* 37 << 28 */ + { { 0x28054c654265aa91L,0xa4b18304035efe42L,0x6887b0e69639dec7L, + 0xf4b8f6ad3d52aea5L }, + { 0xfb9293cc971a8a13L,0x3f159e5d4c934d07L,0x2c50e9b109acbc29L, + 0x08eb65e67154d129L } }, + /* 38 << 28 */ + { { 0x4feff58930b75c3eL,0x0bb82fe294491c93L,0xd8ac377a89af62bbL, + 0xd7b514909685e49fL }, + { 0xabca9a7b04497f19L,0x1b35ed0a1a7ad13fL,0x6b601e213ec86ed6L, + 0xda91fcb9ce0c76f1L } }, + /* 39 << 28 */ + { { 0x9e28507bd7ab27e1L,0x7c19a55563945b7bL,0x6b43f0a1aafc9827L, + 0x443b4fbd3aa55b91L }, + { 0x962b2e656962c88fL,0x139da8d4ce0db0caL,0xb93f05dd1b8d6c4fL, + 0x779cdff7180b9824L } }, + /* 40 << 28 */ + { { 0xbba23fddae57c7b7L,0x345342f21b932522L,0xfd9c80fe556d4aa3L, + 0xa03907ba6525bb61L }, + { 0x38b010e1ff218933L,0xc066b654aa52117bL,0x8e14192094f2e6eaL, + 0x66a27dca0d32f2b2L } }, + /* 41 << 28 */ + { { 0x69c7f993048b3717L,0xbf5a989ab178ae1cL,0x49fa9058564f1d6bL, + 0x27ec6e15d31fde4eL }, + { 0x4cce03737276e7fcL,0x64086d7989d6bf02L,0x5a72f0464ccdd979L, + 0x909c356647775631L } }, + /* 42 << 28 */ + { { 0x1c07bc6b75dd7125L,0xb4c6bc9787a0428dL,0x507ece52fdeb6b9dL, + 0xfca56512b2c95432L }, + { 0x15d97181d0e8bd06L,0x384dd317c6bb46eaL,0x5441ea203952b624L, + 0xbcf70dee4e7dc2fbL } }, + /* 43 << 28 */ + { { 0x372b016e6628e8c3L,0x07a0d667b60a7522L,0xcf05751b0a344ee2L, + 0x0ec09a48118bdeecL }, + { 0x6e4b3d4ed83dce46L,0x43a6316d99d2fc6eL,0xa99d898956cf044cL, + 0x7c7f4454ae3e5fb7L } }, + /* 44 << 28 */ + { { 0xb2e6b121fbabbe92L,0x281850fbe1330076L,0x093581ec97890015L, + 0x69b1dded75ff77f5L }, + { 0x7cf0b18fab105105L,0x953ced31a89ccfefL,0x3151f85feb914009L, + 0x3c9f1b8788ed48adL } }, + /* 45 << 28 */ + { { 0xc9aba1a14a7eadcbL,0x928e7501522e71cfL,0xeaede7273a2e4f83L, + 0x467e10d11ce3bbd3L }, + { 0xf3442ac3b955dcf0L,0xba96307dd3d5e527L,0xf763a10efd77f474L, + 0x5d744bd06a6e1ff0L } }, + /* 46 << 28 */ + { { 0xd287282aa777899eL,0xe20eda8fd03f3cdeL,0x6a7e75bb50b07d31L, + 0x0b7e2a946f379de4L }, + { 0x31cb64ad19f593cfL,0x7b1a9e4f1e76ef1dL,0xe18c9c9db62d609cL, + 0x439bad6de779a650L } }, + /* 47 << 28 */ + { { 0x219d9066e032f144L,0x1db632b8e8b2ec6aL,0xff0d0fd4fda12f78L, + 0x56fb4c2d2a25d265L }, + { 0x5f4e2ee1255a03f1L,0x61cd6af2e96af176L,0xe0317ba8d068bc97L, + 0x927d6bab264b988eL } }, + /* 48 << 28 */ + { { 0xa18f07e0e90fb21eL,0x00fd2b80bba7fca1L,0x20387f2795cd67b5L, + 0x5b89a4e7d39707f7L }, + { 0x8f83ad3f894407ceL,0xa0025b946c226132L,0xc79563c7f906c13bL, + 0x5f548f314e7bb025L } }, + /* 49 << 28 */ + { { 0x2b4c6b8feac6d113L,0xa67e3f9c0e813c76L,0x3982717c3fe1f4b9L, + 0x5886581926d8050eL }, + { 0x99f3640cf7f06f20L,0xdc6102162a66ebc2L,0x52f2c175767a1e08L, + 0x05660e1a5999871bL } }, + /* 50 << 28 */ + { { 0x6b0f17626d3c4693L,0xf0e7d62737ed7beaL,0xc51758c7b75b226dL, + 0x40a886281f91613bL }, + { 0x889dbaa7bbb38ce0L,0xe0404b65bddcad81L,0xfebccd3a8bc9671fL, + 0xfbf9a357ee1f5375L } }, + /* 51 << 28 */ + { { 0x5dc169b028f33398L,0xb07ec11d72e90f65L,0xae7f3b4afaab1eb1L, + 0xd970195e5f17538aL }, + { 0x52b05cbe0181e640L,0xf5debd622643313dL,0x761481545df31f82L, + 0x23e03b333a9e13c5L } }, + /* 52 << 28 */ + { { 0xff7589494fde0c1fL,0xbf8a1abee5b6ec20L,0x702278fb87e1db6cL, + 0xc447ad7a35ed658fL }, + { 0x48d4aa3803d0ccf2L,0x80acb338819a7c03L,0x9bc7c89e6e17ceccL, + 0x46736b8b03be1d82L } }, + /* 53 << 28 */ + { { 0xd65d7b60c0432f96L,0xddebe7a3deb5442fL,0x79a253077dff69a2L, + 0x37a56d9402cf3122L }, + { 0x8bab8aedf2350d0aL,0x13c3f276037b0d9aL,0xc664957c44c65caeL, + 0x88b44089c2e71a88L } }, + /* 54 << 28 */ + { { 0xdb88e5a35cb02664L,0x5d4c0bf18686c72eL,0xea3d9b62a682d53eL, + 0x9b605ef40b2ad431L }, + { 0x71bac202c69645d0L,0xa115f03a6a1b66e7L,0xfe2c563a158f4dc4L, + 0xf715b3a04d12a78cL } }, + /* 55 << 28 */ + { { 0x8f7f0a48d413213aL,0x2035806dc04becdbL,0xecd34a995d8587f5L, + 0x4d8c30799f6d3a71L }, + { 0x1b2a2a678d95a8f6L,0xc58c9d7df2110d0dL,0xdeee81d5cf8fba3fL, + 0xa42be3c00c7cdf68L } }, + /* 56 << 28 */ + { { 0x2126f742d43b5eaaL,0x054a0766dfa59b85L,0x9d0d5e36126bfd45L, + 0xa1f8fbd7384f8a8fL }, + { 0x317680f5d563fcccL,0x48ca5055f280a928L,0xe00b81b227b578cfL, + 0x10aad9182994a514L } }, + /* 57 << 28 */ + { { 0xd9e07b62b7bdc953L,0x9f0f6ff25bc086ddL,0x09d1ccff655eee77L, + 0x45475f795bef7df1L }, + { 0x3faa28fa86f702ccL,0x92e609050f021f07L,0xe9e629687f8fa8c6L, + 0xbd71419af036ea2cL } }, + /* 58 << 28 */ + { { 0x171ee1cc6028da9aL,0x5352fe1ac251f573L,0xf8ff236e3fa997f4L, + 0xd831b6c9a5749d5fL }, + { 0x7c872e1de350e2c2L,0xc56240d91e0ce403L,0xf9deb0776974f5cbL, + 0x7d50ba87961c3728L } }, + /* 59 << 28 */ + { { 0xd6f894265a3a2518L,0xcf817799c6303d43L,0x510a0471619e5696L, + 0xab049ff63a5e307bL }, + { 0xe4cdf9b0feb13ec7L,0xd5e971179d8ff90cL,0xf6f64d069afa96afL, + 0x00d0bf5e9d2012a2L } }, + /* 60 << 28 */ + { { 0xe63f301f358bcdc0L,0x07689e990a9d47f8L,0x1f689e2f4f43d43aL, + 0x4d542a1690920904L }, + { 0xaea293d59ca0a707L,0xd061fe458ac68065L,0x1033bf1b0090008cL, + 0x29749558c08a6db6L } }, + /* 61 << 28 */ + { { 0x74b5fc59c1d5d034L,0xf712e9f667e215e0L,0xfd520cbd860200e6L, + 0x0229acb43ea22588L }, + { 0x9cd1e14cfff0c82eL,0x87684b6259c69e73L,0xda85e61c96ccb989L, + 0x2d5dbb02a3d06493L } }, + /* 62 << 28 */ + { { 0xf22ad33ae86b173cL,0xe8e41ea5a79ff0e3L,0x01d2d725dd0d0c10L, + 0x31f39088032d28f9L }, + { 0x7b3f71e17829839eL,0x0cf691b44502ae58L,0xef658dbdbefc6115L, + 0xa5cd6ee5b3ab5314L } }, + /* 63 << 28 */ + { { 0x206c8d7b5f1d2347L,0x794645ba4cc2253aL,0xd517d8ff58389e08L, + 0x4fa20dee9f847288L }, + { 0xeba072d8d797770aL,0x7360c91dbf429e26L,0x7200a3b380af8279L, + 0x6a1c915082dadce3L } }, + /* 64 << 28 */ + { { 0x0ee6d3a7c35d8794L,0x042e65580356bae5L,0x9f59698d643322fdL, + 0x9379ae1550a61967L }, + { 0x64b9ae62fcc9981eL,0xaed3d6316d2934c6L,0x2454b3025e4e65ebL, + 0xab09f647f9950428L } }, + /* 0 << 35 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 35 */ + { { 0xb2083a1222248accL,0x1f6ec0ef3264e366L,0x5659b7045afdee28L, + 0x7a823a40e6430bb5L }, + { 0x24592a04e1900a79L,0xcde09d4ac9ee6576L,0x52b6463f4b5ea54aL, + 0x1efe9ed3d3ca65a7L } }, + /* 2 << 35 */ + { { 0xe27a6dbe305406ddL,0x8eb7dc7fdd5d1957L,0xf54a6876387d4d8fL, + 0x9c479409c7762de4L }, + { 0xbe4d5b5d99b30778L,0x25380c566e793682L,0x602d37f3dac740e3L, + 0x140deabe1566e4aeL } }, + /* 3 << 35 */ + { { 0x4481d067afd32acfL,0xd8f0fccae1f71ccfL,0xd208dd0cb596f2daL, + 0xd049d7309aad93f9L }, + { 0xc79f263d42ab580eL,0x09411bb123f707b4L,0x8cfde1ff835e0edaL, + 0x7270749090f03402L } }, + /* 4 << 35 */ + { { 0xeaee6126c49a861eL,0x024f3b65e14f0d06L,0x51a3f1e8c69bfc17L, + 0xc3c3a8e9a7686381L }, + { 0x3400752cb103d4c8L,0x02bc46139218b36bL,0xc67f75eb7651504aL, + 0xd6848b56d02aebfaL } }, + /* 5 << 35 */ + { { 0xbd9802e6c30fa92bL,0x5a70d96d9a552784L,0x9085c4ea3f83169bL, + 0xfa9423bb06908228L }, + { 0x2ffebe12fe97a5b9L,0x85da604971b99118L,0x9cbc2f7f63178846L, + 0xfd96bc709153218eL } }, + /* 6 << 35 */ + { { 0x958381db1782269bL,0xae34bf792597e550L,0xbb5c60645f385153L, + 0x6f0e96afe3088048L }, + { 0xbf6a021577884456L,0xb3b5688c69310ea7L,0x17c9429504fad2deL, + 0xe020f0e517896d4dL } }, + /* 7 << 35 */ + { { 0x730ba0ab0976505fL,0x567f6813095e2ec5L,0x470620106331ab71L, + 0x72cfa97741d22b9fL }, + { 0x33e55ead8a2373daL,0xa8d0d5f47ba45a68L,0xba1d8f9c03029d15L, + 0x8f34f1ccfc55b9f3L } }, + /* 8 << 35 */ + { { 0xcca4428dbbe5a1a9L,0x8187fd5f3126bd67L,0x0036973a48105826L, + 0xa39b6663b8bd61a0L }, + { 0x6d42deef2d65a808L,0x4969044f94636b19L,0xf611ee47dd5d564cL, + 0x7b2f3a49d2873077L } }, + /* 9 << 35 */ + { { 0x94157d45300eb294L,0x2b2a656e169c1494L,0xc000dd76d3a47aa9L, + 0xa2864e4fa6243ea4L }, + { 0x82716c47db89842eL,0x12dfd7d761479fb7L,0x3b9a2c56e0b2f6dcL, + 0x46be862ad7f85d67L } }, + /* 10 << 35 */ + { { 0x03b0d8dd0f82b214L,0x460c34f9f103cbc6L,0xf32e5c0318d79e19L, + 0x8b8888baa84117f8L }, + { 0x8f3c37dcc0722677L,0x10d21be91c1c0f27L,0xd47c8468e0f7a0c6L, + 0x9bf02213adecc0e0L } }, + /* 11 << 35 */ + { { 0x0baa7d1242b48b99L,0x1bcb665d48424096L,0x8b847cd6ebfb5cfbL, + 0x87c2ae569ad4d10dL }, + { 0xf1cbb1220de36726L,0xe7043c683fdfbd21L,0x4bd0826a4e79d460L, + 0x11f5e5984bd1a2cbL } }, + /* 12 << 35 */ + { { 0x97554160b7fe7b6eL,0x7d16189a400a3fb2L,0xd73e9beae328ca1eL, + 0x0dd04b97e793d8ccL }, + { 0xa9c83c9b506db8ccL,0x5cd47aaecf38814cL,0x26fc430db64b45e6L, + 0x079b5499d818ea84L } }, + /* 13 << 35 */ + { { 0xebb01102c1c24a3bL,0xca24e5681c161c1aL,0x103eea6936f00a4aL, + 0x9ad76ee876176c7bL }, + { 0x97451fc2538e0ff7L,0x94f898096604b3b0L,0x6311436e3249cfd7L, + 0x27b4a7bd41224f69L } }, + /* 14 << 35 */ + { { 0x03b5d21ae0ac2941L,0x279b0254c2d31937L,0x3307c052cac992d0L, + 0x6aa7cb92efa8b1f3L }, + { 0x5a1825800d37c7a5L,0x13380c37342d5422L,0x92ac2d66d5d2ef92L, + 0x035a70c9030c63c6L } }, + /* 15 << 35 */ + { { 0xc16025dd4ce4f152L,0x1f419a71f9df7c06L,0x6d5b221491e4bb14L, + 0xfc43c6cc839fb4ceL }, + { 0x49f06591925d6b2dL,0x4b37d9d362186598L,0x8c54a971d01b1629L, + 0xe1a9c29f51d50e05L } }, + /* 16 << 35 */ + { { 0x5109b78571ba1861L,0x48b22d5cd0c8f93dL,0xe8fa84a78633bb93L, + 0x53fba6ba5aebbd08L }, + { 0x7ff27df3e5eea7d8L,0x521c879668ca7158L,0xb9d5133bce6f1a05L, + 0x2d50cd53fd0ebee4L } }, + /* 17 << 35 */ + { { 0xc82115d6c5a3ef16L,0x993eff9dba079221L,0xe4da2c5e4b5da81cL, + 0x9a89dbdb8033fd85L }, + { 0x60819ebf2b892891L,0x53902b215d14a4d5L,0x6ac35051d7fda421L, + 0xcc6ab88561c83284L } }, + /* 18 << 35 */ + { { 0x14eba133f74cff17L,0x240aaa03ecb813f2L,0xcfbb65406f665beeL, + 0x084b1fe4a425ad73L }, + { 0x009d5d16d081f6a6L,0x35304fe8eef82c90L,0xf20346d5aa9eaa22L, + 0x0ada9f07ac1c91e3L } }, + /* 19 << 35 */ + { { 0xa6e21678968a6144L,0x54c1f77c07b31a1eL,0xd6bb787e5781fbe1L, + 0x61bd2ee0e31f1c4aL }, + { 0xf25aa1e9781105fcL,0x9cf2971f7b2f8e80L,0x26d15412cdff919bL, + 0x01db4ebe34bc896eL } }, + /* 20 << 35 */ + { { 0x7d9b3e23b40df1cfL,0x5933737394e971b4L,0xbf57bd14669cf921L, + 0x865daedf0c1a1064L }, + { 0x3eb70bd383279125L,0xbc3d5b9f34ecdaabL,0x91e3ed7e5f755cafL, + 0x49699f54d41e6f02L } }, + /* 21 << 35 */ + { { 0x185770e1d4a7a15bL,0x08f3587aeaac87e7L,0x352018db473133eaL, + 0x674ce71904fd30fcL }, + { 0x7b8d9835088b3e0eL,0x7a0356a95d0d47a1L,0x9d9e76596474a3c4L, + 0x61ea48a7ff66966cL } }, + /* 22 << 35 */ + { { 0x304177580f3e4834L,0xfdbb21c217a9afcbL,0x756fa17f2f9a67b3L, + 0x2a6b2421a245c1a8L }, + { 0x64be27944af02291L,0xade465c62a5804feL,0x8dffbd39a6f08fd7L, + 0xc4efa84caa14403bL } }, + /* 23 << 35 */ + { { 0xa1b91b2a442b0f5cL,0xb748e317cf997736L,0x8d1b62bfcee90e16L, + 0x907ae2710b2078c0L }, + { 0xdf31534b0c9bcdddL,0x043fb05439adce83L,0x99031043d826846aL, + 0x61a9c0d6b144f393L } }, + /* 24 << 35 */ + { { 0xdab4804647718427L,0xdf17ff9b6e830f8bL,0x408d7ee8e49a1347L, + 0x6ac71e2391c1d4aeL }, + { 0xc8cbb9fd1defd73cL,0x19840657bbbbfec5L,0x39db1cb59e7ef8eaL, + 0x78aa829664105f30L } }, + /* 25 << 35 */ + { { 0xa3d9b7f0a3738c29L,0x0a2f235abc3250a3L,0x55e506f6445e4cafL, + 0x0974f73d33475f7aL }, + { 0xd37dbba35ba2f5a8L,0x542c6e636af40066L,0x26d99b53c5d73e2cL, + 0x06060d7d6c3ca33eL } }, + /* 26 << 35 */ + { { 0xcdbef1c2065fef4aL,0x77e60f7dfd5b92e3L,0xd7c549f026708350L, + 0x201b3ad034f121bfL }, + { 0x5fcac2a10334fc14L,0x8a9a9e09344552f6L,0x7dd8a1d397653082L, + 0x5fc0738f79d4f289L } }, + /* 27 << 35 */ + { { 0x787d244d17d2d8c3L,0xeffc634570830684L,0x5ddb96dde4f73ae5L, + 0x8efb14b1172549a5L }, + { 0x6eb73eee2245ae7aL,0xbca4061eea11f13eL,0xb577421d30b01f5dL, + 0xaa688b24782e152cL } }, + /* 28 << 35 */ + { { 0x67608e71bd3502baL,0x4ef41f24b4de75a0L,0xb08dde5efd6125e5L, + 0xde484825a409543fL }, + { 0x1f198d9865cc2295L,0x428a37716e0edfa2L,0x4f9697a2adf35fc7L, + 0x01a43c79f7cac3c7L } }, + /* 29 << 35 */ + { { 0xb05d70590fd3659aL,0x8927f30cbb7f2d9aL,0x4023d1ac8cf984d3L, + 0x32125ed302897a45L }, + { 0xfb572dad3d414205L,0x73000ef2e3fa82a9L,0x4c0868e9f10a5581L, + 0x5b61fc676b0b3ca5L } }, + /* 30 << 35 */ + { { 0xc1258d5b7cae440cL,0x21c08b41402b7531L,0xf61a8955de932321L, + 0x3568faf82d1408afL }, + { 0x71b15e999ecf965bL,0xf14ed248e917276fL,0xc6f4caa1820cf9e2L, + 0x681b20b218d83c7eL } }, + /* 31 << 35 */ + { { 0x6cde738dc6c01120L,0x71db0813ae70e0dbL,0x95fc064474afe18cL, + 0x34619053129e2be7L }, + { 0x80615ceadb2a3b15L,0x0a49a19edb4c7073L,0x0e1b84c88fd2d367L, + 0xd74bf462033fb8aaL } }, + /* 32 << 35 */ + { { 0x889f6d65533ef217L,0x7158c7e4c3ca2e87L,0xfb670dfbdc2b4167L, + 0x75910a01844c257fL }, + { 0xf336bf07cf88577dL,0x22245250e45e2aceL,0x2ed92e8d7ca23d85L, + 0x29f8be4c2b812f58L } }, + /* 33 << 35 */ + { { 0xdd9ebaa7076fe12bL,0x3f2400cbae1537f9L,0x1aa9352817bdfb46L, + 0xc0f9843067883b41L }, + { 0x5590ede10170911dL,0x7562f5bb34d4b17fL,0xe1fa1df21826b8d2L, + 0xb40b796a6bd80d59L } }, + /* 34 << 35 */ + { { 0xd65bf1973467ba92L,0x8c9b46dbf70954b0L,0x97c8a0f30e78f15dL, + 0xa8f3a69a85a4c961L }, + { 0x4242660f61e4ce9bL,0xbf06aab36ea6790cL,0xc6706f8eec986416L, + 0x9e56dec19a9fc225L } }, + /* 35 << 35 */ + { { 0x527c46f49a9898d9L,0xd799e77b5633cdefL,0x24eacc167d9e4297L, + 0xabb61cea6b1cb734L }, + { 0xbee2e8a7f778443cL,0x3bb42bf129de2fe6L,0xcbed86a13003bb6fL, + 0xd3918e6cd781cdf6L } }, + /* 36 << 35 */ + { { 0x4bee32719a5103f1L,0x5243efc6f50eac06L,0xb8e122cb6adcc119L, + 0x1b7faa84c0b80a08L }, + { 0x32c3d1bd6dfcd08cL,0x129dec4e0be427deL,0x98ab679c1d263c83L, + 0xafc83cb7cef64effL } }, + /* 37 << 35 */ + { { 0x85eb60882fa6be76L,0x892585fb1328cbfeL,0xc154d3edcf618ddaL, + 0xc44f601b3abaf26eL }, + { 0x7bf57d0b2be1fdfdL,0xa833bd2d21137feeL,0x9353af362db591a8L, + 0xc76f26dc5562a056L } }, + /* 38 << 35 */ + { { 0x1d87e47d3fdf5a51L,0x7afb5f9355c9cab0L,0x91bbf58f89e0586eL, + 0x7c72c0180d843709L }, + { 0xa9a5aafb99b5c3dcL,0xa48a0f1d3844aeb0L,0x7178b7ddb667e482L, + 0x453985e96e23a59aL } }, + /* 39 << 35 */ + { { 0x4a54c86001b25dd8L,0x0dd37f48fb897c8aL,0x5f8aa6100ea90cd9L, + 0xc8892c6816d5830dL }, + { 0xeb4befc0ef514ca5L,0x478eb679e72c9ee6L,0x9bca20dadbc40d5fL, + 0xf015de21dde4f64aL } }, + /* 40 << 35 */ + { { 0xaa6a4de0eaf4b8a5L,0x68cfd9ca4bc60e32L,0x668a4b017fd15e70L, + 0xd9f0694af27dc09dL }, + { 0xf6c3cad5ba708bcdL,0x5cd2ba695bb95c2aL,0xaa28c1d333c0a58fL, + 0x23e274e3abc77870L } }, + /* 41 << 35 */ + { { 0x44c3692ddfd20a4aL,0x091c5fd381a66653L,0x6c0bb69109a0757dL, + 0x9072e8b9667343eaL }, + { 0x31d40eb080848becL,0x95bd480a79fd36ccL,0x01a77c6165ed43f5L, + 0xafccd1272e0d40bfL } }, + /* 42 << 35 */ + { { 0xeccfc82d1cc1884bL,0xc85ac2015d4753b4L,0xc7a6caac658e099fL, + 0xcf46369e04b27390L }, + { 0xe2e7d049506467eaL,0x481b63a237cdecccL,0x4029abd8ed80143aL, + 0x28bfe3c7bcb00b88L } }, + /* 43 << 35 */ + { { 0x3bec10090643d84aL,0x885f3668abd11041L,0xdb02432cf83a34d6L, + 0x32f7b360719ceebeL }, + { 0xf06c7837dad1fe7aL,0x60a157a95441a0b0L,0x704970e9e2d47550L, + 0xcd2bd553271b9020L } }, + /* 44 << 35 */ + { { 0xff57f82f33e24a0bL,0x9cbee23ff2565079L,0x16353427eb5f5825L, + 0x276feec4e948d662L }, + { 0xd1b62bc6da10032bL,0x718351ddf0e72a53L,0x934520762420e7baL, + 0x96368fff3a00118dL } }, + /* 45 << 35 */ + { { 0x00ce2d26150a49e4L,0x0c28b6363f04706bL,0xbad65a4658b196d0L, + 0x6c8455fcec9f8b7cL }, + { 0xe90c895f2d71867eL,0x5c0be31bedf9f38cL,0x2a37a15ed8f6ec04L, + 0x239639e78cd85251L } }, + /* 46 << 35 */ + { { 0xd89753159c7c4c6bL,0x603aa3c0d7409af7L,0xb8d53d0c007132fbL, + 0x68d12af7a6849238L }, + { 0xbe0607e7bf5d9279L,0x9aa50055aada74ceL,0xe81079cbba7e8ccbL, + 0x610c71d1a5f4ff5eL } }, + /* 47 << 35 */ + { { 0x9e2ee1a75aa07093L,0xca84004ba75da47cL,0x074d39513de75401L, + 0xf938f756bb311592L }, + { 0x9619761800a43421L,0x39a2536207bc78c8L,0x278f710a0a171276L, + 0xb28446ea8d1a8f08L } }, + /* 48 << 35 */ + { { 0x184781bfe3b6a661L,0x7751cb1de6d279f7L,0xf8ff95d6c59eb662L, + 0x186d90b758d3dea7L }, + { 0x0e4bb6c1dfb4f754L,0x5c5cf56b2b2801dcL,0xc561e4521f54564dL, + 0xb4fb8c60f0dd7f13L } }, + /* 49 << 35 */ + { { 0xf884963033ff98c7L,0x9619fffacf17769cL,0xf8090bf61bfdd80aL, + 0x14d9a149422cfe63L }, + { 0xb354c3606f6df9eaL,0xdbcf770d218f17eaL,0x207db7c879eb3480L, + 0x213dbda8559b6a26L } }, + /* 50 << 35 */ + { { 0xac4c200b29fc81b3L,0xebc3e09f171d87c1L,0x917995301481aa9eL, + 0x051b92e192e114faL }, + { 0xdf8f92e9ecb5537fL,0x44b1b2cc290c7483L,0xa711455a2adeb016L, + 0x964b685681a10c2cL } }, + /* 51 << 35 */ + { { 0x4f159d99cec03623L,0x05532225ef3271eaL,0xb231bea3c5ee4849L, + 0x57a54f507094f103L }, + { 0x3e2d421d9598b352L,0xe865a49c67412ab4L,0xd2998a251cc3a912L, + 0x5d0928080c74d65dL } }, + /* 52 << 35 */ + { { 0x73f459084088567aL,0xeb6b280e1f214a61L,0x8c9adc34caf0c13dL, + 0x39d12938f561fb80L }, + { 0xb2dc3a5ebc6edfb4L,0x7485b1b1fe4d210eL,0x062e0400e186ae72L, + 0x91e32d5c6eeb3b88L } }, + /* 53 << 35 */ + { { 0x6df574d74be59224L,0xebc88ccc716d55f3L,0x26c2e6d0cad6ed33L, + 0xc6e21e7d0d3e8b10L }, + { 0x2cc5840e5bcc36bbL,0x9292445e7da74f69L,0x8be8d3214e5193a8L, + 0x3ec236298df06413L } }, + /* 54 << 35 */ + { { 0xc7e9ae85b134defaL,0x6073b1d01bb2d475L,0xb9ad615e2863c00dL, + 0x9e29493d525f4ac4L }, + { 0xc32b1dea4e9acf4fL,0x3e1f01c8a50db88dL,0xb05d70ea04da916cL, + 0x714b0d0ad865803eL } }, + /* 55 << 35 */ + { { 0x4bd493fc9920cb5eL,0x5b44b1f792c7a3acL,0xa2a77293bcec9235L, + 0x5ee06e87cd378553L }, + { 0xceff8173da621607L,0x2bb03e4c99f5d290L,0x2945106aa6f734acL, + 0xb5056604d25c4732L } }, + /* 56 << 35 */ + { { 0x5945920ce079afeeL,0x686e17a06789831fL,0x5966bee8b74a5ae5L, + 0x38a673a21e258d46L }, + { 0xbd1cc1f283141c95L,0x3b2ecf4f0e96e486L,0xcd3aa89674e5fc78L, + 0x415ec10c2482fa7aL } }, + /* 57 << 35 */ + { { 0x1523441980503380L,0x513d917ad314b392L,0xb0b52f4e63caecaeL, + 0x07bf22ad2dc7780bL }, + { 0xe761e8a1e4306839L,0x1b3be9625dd7feaaL,0x4fe728de74c778f1L, + 0xf1fa0bda5e0070f6L } }, + /* 58 << 35 */ + { { 0x85205a316ec3f510L,0x2c7e4a14d2980475L,0xde3c19c06f30ebfdL, + 0xdb1c1f38d4b7e644L }, + { 0xfe291a755dce364aL,0xb7b22a3c058f5be3L,0x2cd2c30237fea38cL, + 0x2930967a2e17be17L } }, + /* 59 << 35 */ + { { 0x87f009de0c061c65L,0xcb014aacedc6ed44L,0x49bd1cb43bafb1ebL, + 0x81bd8b5c282d3688L }, + { 0x1cdab87ef01a17afL,0x21f37ac4e710063bL,0x5a6c567642fc8193L, + 0xf4753e7056a6015cL } }, + /* 60 << 35 */ + { { 0x020f795ea15b0a44L,0x8f37c8d78958a958L,0x63b7e89ba4b675b5L, + 0xb4fb0c0c0fc31aeaL }, + { 0xed95e639a7ff1f2eL,0x9880f5a3619614fbL,0xdeb6ff02947151abL, + 0x5bc5118ca868dcdbL } }, + /* 61 << 35 */ + { { 0xd8da20554c20cea5L,0xcac2776e14c4d69aL,0xcccb22c1622d599bL, + 0xa4ddb65368a9bb50L }, + { 0x2c4ff1511b4941b4L,0xe1ff19b46efba588L,0x35034363c48345e0L, + 0x45542e3d1e29dfc4L } }, + /* 62 << 35 */ + { { 0xf197cb91349f7aedL,0x3b2b5a008fca8420L,0x7c175ee823aaf6d8L, + 0x54dcf42135af32b6L }, + { 0x0ba1430727d6561eL,0x879d5ee4d175b1e2L,0xc7c4367399807db5L, + 0x77a544559cd55bcdL } }, + /* 63 << 35 */ + { { 0xe6c2ff130105c072L,0x18f7a99f8dda7da4L,0x4c3018200e2d35c1L, + 0x06a53ca0d9cc6c82L }, + { 0xaa21cc1ef1aa1d9eL,0x324143344a75b1e8L,0x2a6d13280ebe9fdcL, + 0x16bd173f98a4755aL } }, + /* 64 << 35 */ + { { 0xfbb9b2452133ffd9L,0x39a8b2f1830f1a20L,0x484bc97dd5a1f52aL, + 0xd6aebf56a40eddf8L }, + { 0x32257acb76ccdac6L,0xaf4d36ec1586ff27L,0x8eaa8863f8de7dd1L, + 0x0045d5cf88647c16L } }, + /* 0 << 42 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 42 */ + { { 0xa6f3d574c005979dL,0xc2072b426a40e350L,0xfca5c1568de2ecf9L, + 0xa8c8bf5ba515344eL }, + { 0x97aee555114df14aL,0xd4374a4dfdc5ec6bL,0x754cc28f2ca85418L, + 0x71cb9e27d3c41f78L } }, + /* 2 << 42 */ + { { 0x8910507903605c39L,0xf0843d9ea142c96cL,0xf374493416923684L, + 0x732caa2ffa0a2893L }, + { 0xb2e8c27061160170L,0xc32788cc437fbaa3L,0x39cd818ea6eda3acL, + 0xe2e942399e2b2e07L } }, + /* 3 << 42 */ + { { 0x6967d39b0260e52aL,0xd42585cc90653325L,0x0d9bd60521ca7954L, + 0x4fa2087781ed57b3L }, + { 0x60c1eff8e34a0bbeL,0x56b0040c84f6ef64L,0x28be2b24b1af8483L, + 0xb2278163f5531614L } }, + /* 4 << 42 */ + { { 0x8df275455922ac1cL,0xa7b3ef5ca52b3f63L,0x8e77b21471de57c4L, + 0x31682c10834c008bL }, + { 0xc76824f04bd55d31L,0xb6d1c08617b61c71L,0x31db0903c2a5089dL, + 0x9c092172184e5d3fL } }, + /* 5 << 42 */ + { { 0xdd7ced5bc00cc638L,0x1a2015eb61278fc2L,0x2e8e52886a37f8d6L, + 0xc457786fe79933adL }, + { 0xb3fe4cce2c51211aL,0xad9b10b224c20498L,0x90d87a4fd28db5e5L, + 0x698cd1053aca2fc3L } }, + /* 6 << 42 */ + { { 0x4f112d07e91b536dL,0xceb982f29eba09d6L,0x3c157b2c197c396fL, + 0xe23c2d417b66eb24L }, + { 0x480c57d93f330d37L,0xb3a4c8a179108debL,0x702388decb199ce5L, + 0x0b019211b944a8d4L } }, + /* 7 << 42 */ + { { 0x24f2a692840bb336L,0x7c353bdca669fa7bL,0xda20d6fcdec9c300L, + 0x625fbe2fa13a4f17L }, + { 0xa2b1b61adbc17328L,0x008965bfa9515621L,0x49690939c620ff46L, + 0x182dd27d8717e91cL } }, + /* 8 << 42 */ + { { 0x5ace5035ea6c3997L,0x54259aaac2610befL,0xef18bb3f3c80dd39L, + 0x6910b95b5fc3fa39L }, + { 0xfce2f51043e09aeeL,0xced56c9fa7675665L,0x10e265acd872db61L, + 0x6982812eae9fce69L } }, + /* 9 << 42 */ + { { 0x29be11c6ce800998L,0x72bb1752b90360d9L,0x2c1931975a4ad590L, + 0x2ba2f5489fc1dbc0L }, + { 0x7fe4eebbe490ebe0L,0x12a0a4cd7fae11c0L,0x7197cf81e903ba37L, + 0xcf7d4aa8de1c6dd8L } }, + /* 10 << 42 */ + { { 0x92af6bf43fd5684cL,0x2b26eecf80360aa1L,0xbd960f3000546a82L, + 0x407b3c43f59ad8feL }, + { 0x86cae5fe249c82baL,0x9e0faec72463744cL,0x87f551e894916272L, + 0x033f93446ceb0615L } }, + /* 11 << 42 */ + { { 0x1e5eb0d18be82e84L,0x89967f0e7a582fefL,0xbcf687d5a6e921faL, + 0xdfee4cf3d37a09baL }, + { 0x94f06965b493c465L,0x638b9a1c7635c030L,0x7666786466f05e9fL, + 0xccaf6808c04da725L } }, + /* 12 << 42 */ + { { 0xca2eb690768fccfcL,0xf402d37db835b362L,0x0efac0d0e2fdfcceL, + 0xefc9cdefb638d990L }, + { 0x2af12b72d1669a8bL,0x33c536bc5774ccbdL,0x30b21909fb34870eL, + 0xc38fa2f77df25acaL } }, + /* 13 << 42 */ + { { 0x74c5f02bbf81f3f5L,0x0525a5aeaf7e4581L,0x88d2aaba433c54aeL, + 0xed9775db806a56c5L }, + { 0xd320738ac0edb37dL,0x25fdb6ee66cc1f51L,0xac661d1710600d76L, + 0x931ec1f3bdd1ed76L } }, + /* 14 << 42 */ + { { 0x65c11d6219ee43f1L,0x5cd57c3e60829d97L,0xd26c91a3984be6e8L, + 0xf08d93098b0c53bdL }, + { 0x94bc9e5bc016e4eaL,0xd391683911d43d2bL,0x886c5ad773701155L, + 0xe037762620b00715L } }, + /* 15 << 42 */ + { { 0x7f01c9ecaa80ba59L,0x3083411a68538e51L,0x970370f1e88128afL, + 0x625cc3db91dec14bL }, + { 0xfef9666c01ac3107L,0xb2a8d577d5057ac3L,0xb0f2629992be5df7L, + 0xf579c8e500353924L } }, + /* 16 << 42 */ + { { 0xb8fa3d931341ed7aL,0x4223272ca7b59d49L,0x3dcb194783b8c4a4L, + 0x4e413c01ed1302e4L }, + { 0x6d999127e17e44ceL,0xee86bf7533b3adfbL,0xf6902fe625aa96caL, + 0xb73540e4e5aae47dL } }, + /* 17 << 42 */ + { { 0x32801d7b1b4a158cL,0xe571c99e27e2a369L,0x40cb76c010d9f197L, + 0xc308c2893167c0aeL }, + { 0xa6ef9dd3eb7958f2L,0xa7226dfc300879b1L,0x6cd0b3627edf0636L, + 0x4efbce6c7bc37eedL } }, + /* 18 << 42 */ + { { 0x75f92a058d699021L,0x586d4c79772566e3L,0x378ca5f1761ad23aL, + 0x650d86fc1465a8acL }, + { 0x7a4ed457842ba251L,0x6b65e3e642234933L,0xaf1543b731aad657L, + 0xa4cefe98cbfec369L } }, + /* 19 << 42 */ + { { 0xb587da909f47befbL,0x6562e9fb41312d13L,0xa691ea59eff1cefeL, + 0xcc30477a05fc4cf6L }, + { 0xa16324610b0ffd3dL,0xa1f16f3b5b355956L,0x5b148d534224ec24L, + 0xdc834e7bf977012aL } }, + /* 20 << 42 */ + { { 0x7bfc5e75b2c69dbcL,0x3aa77a2903c3da6cL,0xde0df03cca910271L, + 0xcbd5ca4a7806dc55L }, + { 0xe1ca58076db476cbL,0xfde15d625f37a31eL,0xf49af520f41af416L, + 0x96c5c5b17d342db5L } }, + /* 21 << 42 */ + { { 0x155c43b7eb4ceb9bL,0x2e9930104e77371aL,0x1d2987da675d43afL, + 0xef2bc1c08599fd72L }, + { 0x96894b7b9342f6b2L,0x201eadf27c8e71f0L,0xf3479d9f4a1f3efcL, + 0xe0f8a742702a9704L } }, + /* 22 << 42 */ + { { 0xeafd44b6b3eba40cL,0xf9739f29c1c1e0d0L,0x0091471a619d505eL, + 0xc15f9c969d7c263eL }, + { 0x5be4728583afbe33L,0xa3b6d6af04f1e092L,0xe76526b9751a9d11L, + 0x2ec5b26d9a4ae4d2L } }, + /* 23 << 42 */ + { { 0xeb66f4d902f6fb8dL,0x4063c56196912164L,0xeb7050c180ef3000L, + 0x288d1c33eaa5b3f0L }, + { 0xe87c68d607806fd8L,0xb2f7f9d54bbbf50fL,0x25972f3aac8d6627L, + 0xf854777410e8c13bL } }, + /* 24 << 42 */ + { { 0xcc50ef6c872b4a60L,0xab2a34a44613521bL,0x39c5c190983e15d1L, + 0x61dde5df59905512L }, + { 0xe417f6219f2275f3L,0x0750c8b6451d894bL,0x75b04ab978b0bdaaL, + 0x3bfd9fd4458589bdL } }, + /* 25 << 42 */ + { { 0xf1013e30ee9120b6L,0x2b51af9323a4743eL,0xea96ffae48d14d9eL, + 0x71dc0dbe698a1d32L }, + { 0x914962d20180cca4L,0x1ae60677c3568963L,0x8cf227b1437bc444L, + 0xc650c83bc9962c7aL } }, + /* 26 << 42 */ + { { 0x23c2c7ddfe7ccfc4L,0xf925c89d1b929d48L,0x4460f74b06783c33L, + 0xac2c8d49a590475aL }, + { 0xfb40b407b807bba0L,0x9d1e362d69ff8f3aL,0xa33e9681cbef64a4L, + 0x67ece5fa332fb4b2L } }, + /* 27 << 42 */ + { { 0x6900a99b739f10e3L,0xc3341ca9ff525925L,0xee18a626a9e2d041L, + 0xa5a8368529580dddL }, + { 0xf3470c819d7de3cdL,0xedf025862062cf9cL,0xf43522fac010edb0L, + 0x3031413513a4b1aeL } }, + /* 28 << 42 */ + { { 0xc792e02adb22b94bL,0x993d8ae9a1eaa45bL,0x8aad6cd3cd1e1c63L, + 0x89529ca7c5ce688aL }, + { 0x2ccee3aae572a253L,0xe02b643802a21efbL,0xa7091b6ec9430358L, + 0x06d1b1fa9d7db504L } }, + /* 29 << 42 */ + { { 0x58846d32c4744733L,0x40517c71379f9e34L,0x2f65655f130ef6caL, + 0x526e4488f1f3503fL }, + { 0x8467bd177ee4a976L,0x1d9dc913921363d1L,0xd8d24c33b069e041L, + 0x5eb5da0a2cdf7f51L } }, + /* 30 << 42 */ + { { 0x1c0f3cb1197b994fL,0x3c95a6c52843eae9L,0x7766ffc9a6097ea5L, + 0x7bea4093d723b867L }, + { 0xb48e1f734db378f9L,0x70025b00e37b77acL,0x943dc8e7af24ad46L, + 0xb98a15ac16d00a85L } }, + /* 31 << 42 */ + { { 0x3adc38ba2743b004L,0xb1c7f4f7334415eeL,0xea43df8f1e62d05aL, + 0x326189059d76a3b6L }, + { 0x2fbd0bb5a23a0f46L,0x5bc971db6a01918cL,0x7801d94ab4743f94L, + 0xb94df65e676ae22bL } }, + /* 32 << 42 */ + { { 0xaafcbfabaf95894cL,0x7b9bdc07276b2241L,0xeaf983625bdda48bL, + 0x5977faf2a3fcb4dfL }, + { 0xbed042ef052c4b5bL,0x9fe87f71067591f0L,0xc89c73ca22f24ec7L, + 0x7d37fa9ee64a9f1bL } }, + /* 33 << 42 */ + { { 0x2710841a15562627L,0x2c01a613c243b034L,0x1d135c562bc68609L, + 0xc2ca17158b03f1f6L }, + { 0xc9966c2d3eb81d82L,0xc02abf4a8f6df13eL,0x77b34bd78f72b43bL, + 0xaff6218f360c82b0L } }, + /* 34 << 42 */ + { { 0x0aa5726c8d55b9d2L,0xdc0adbe999e9bffbL,0x9097549cefb9e72aL, + 0x167557129dfb3111L }, + { 0xdd8bf984f26847f9L,0xbcb8e387dfb30cb7L,0xc1fd32a75171ef9cL, + 0x977f3fc7389b363fL } }, + /* 35 << 42 */ + { { 0x116eaf2bf4babda0L,0xfeab68bdf7113c8eL,0xd1e3f064b7def526L, + 0x1ac30885e0b3fa02L }, + { 0x1c5a6e7b40142d9dL,0x839b560330921c0bL,0x48f301fa36a116a3L, + 0x380e1107cfd9ee6dL } }, + /* 36 << 42 */ + { { 0x7945ead858854be1L,0x4111c12ecbd4d49dL,0xece3b1ec3a29c2efL, + 0x6356d4048d3616f5L }, + { 0x9f0d6a8f594d320eL,0x0989316df651ccd2L,0x6c32117a0f8fdde4L, + 0x9abe5cc5a26a9bbcL } }, + /* 37 << 42 */ + { { 0xcff560fb9723f671L,0x21b2a12d7f3d593cL,0xe4cb18da24ba0696L, + 0x186e2220c3543384L }, + { 0x722f64e088312c29L,0x94282a9917dc7752L,0x62467bbf5a85ee89L, + 0xf435c650f10076a0L } }, + /* 38 << 42 */ + { { 0xc9ff153943b3a50bL,0x7132130c1a53efbcL,0x31bfe063f7b0c5b7L, + 0xb0179a7d4ea994ccL }, + { 0x12d064b3c85f455bL,0x472593288f6e0062L,0xf64e590bb875d6d9L, + 0x22dd6225ad92bcc7L } }, + /* 39 << 42 */ + { { 0xb658038eb9c3bd6dL,0x00cdb0d6fbba27c8L,0x0c6813371062c45dL, + 0xd8515b8c2d33407dL }, + { 0xcb8f699e8cbb5ecfL,0x8c4347f8c608d7d8L,0x2c11850abb3e00dbL, + 0x20a8dafdecb49d19L } }, + /* 40 << 42 */ + { { 0xbd78148045ee2f40L,0x75e354af416b60cfL,0xde0b58a18d49a8c4L, + 0xe40e94e2fa359536L }, + { 0xbd4fa59f62accd76L,0x05cf466a8c762837L,0xb5abda99448c277bL, + 0x5a9e01bf48b13740L } }, + /* 41 << 42 */ + { { 0x9d457798326aad8dL,0xbdef4954c396f7e7L,0x6fb274a2c253e292L, + 0x2800bf0a1cfe53e7L }, + { 0x22426d3144438fd4L,0xef2339235e259f9aL,0x4188503c03f66264L, + 0x9e5e7f137f9fdfabL } }, + /* 42 << 42 */ + { { 0x565eb76c5fcc1abaL,0xea63254859b5bff8L,0x5587c087aab6d3faL, + 0x92b639ea6ce39c1bL }, + { 0x0706e782953b135cL,0x7308912e425268efL,0x599e92c7090e7469L, + 0x83b90f529bc35e75L } }, + /* 43 << 42 */ + { { 0x4750b3d0244975b3L,0xf3a4435811965d72L,0x179c67749c8dc751L, + 0xff18cdfed23d9ff0L }, + { 0xc40138332028e247L,0x96e280e2f3bfbc79L,0xf60417bdd0880a84L, + 0x263c9f3d2a568151L } }, + /* 44 << 42 */ + { { 0x36be15b32d2ce811L,0x846dc0c2f8291d21L,0x5cfa0ecb789fcfdbL, + 0x45a0beedd7535b9aL }, + { 0xec8e9f0796d69af1L,0x31a7c5b8599ab6dcL,0xd36d45eff9e2e09fL, + 0x3cf49ef1dcee954bL } }, + /* 45 << 42 */ + { { 0x6be34cf3086cff9bL,0x88dbd49139a3360fL,0x1e96b8cc0dbfbd1dL, + 0xc1e5f7bfcb7e2552L }, + { 0x0547b21428819d98L,0xc770dd9c7aea9dcbL,0xaef0d4c7041d68c8L, + 0xcc2b981813cb9ba8L } }, + /* 46 << 42 */ + { { 0x7fc7bc76fe86c607L,0x6b7b9337502a9a95L,0x1948dc27d14dab63L, + 0x249dd198dae047beL }, + { 0xe8356584a981a202L,0x3531dd183a893387L,0x1be11f90c85c7209L, + 0x93d2fe1ee2a52b5aL } }, + /* 47 << 42 */ + { { 0x8225bfe2ec6d6b97L,0x9cf6d6f4bd0aa5deL,0x911459cb54779f5fL, + 0x5649cddb86aeb1f3L }, + { 0x321335793f26ce5aL,0xc289a102550f431eL,0x559dcfda73b84c6fL, + 0x84973819ee3ac4d7L } }, + /* 48 << 42 */ + { { 0xb51e55e6f2606a82L,0xe25f706190f2fb57L,0xacef6c2ab1a4e37cL, + 0x864e359d5dcf2706L }, + { 0x479e6b187ce57316L,0x2cab25003a96b23dL,0xed4898628ef16df7L, + 0x2056538cef3758b5L } }, + /* 49 << 42 */ + { { 0xa7df865ef15d3101L,0x80c5533a61b553d7L,0x366e19974ed14294L, + 0x6620741fb3c0bcd6L }, + { 0x21d1d9c4edc45418L,0x005b859ec1cc4a9dL,0xdf01f630a1c462f0L, + 0x15d06cf3f26820c7L } }, + /* 50 << 42 */ + { { 0x9f7f24ee3484be47L,0x2ff33e964a0c902fL,0x00bdf4575a0bc453L, + 0x2378dfaf1aa238dbL }, + { 0x272420ec856720f2L,0x2ad9d95b96797291L,0xd1242cc6768a1558L, + 0x2e287f8b5cc86aa8L } }, + /* 51 << 42 */ + { { 0x796873d0990cecaaL,0xade55f81675d4080L,0x2645eea321f0cd84L, + 0x7a1efa0fb4e17d02L }, + { 0xf6858420037cc061L,0x682e05f0d5d43e12L,0x59c3699427218710L, + 0x85cbba4d3f7cd2fcL } }, + /* 52 << 42 */ + { { 0x726f97297a3cd22aL,0x9f8cd5dc4a628397L,0x17b93ab9c23165edL, + 0xff5f5dbf122823d4L }, + { 0xc1e4e4b5654a446dL,0xd1a9496f677257baL,0x6387ba94de766a56L, + 0x23608bc8521ec74aL } }, + /* 53 << 42 */ + { { 0x16a522d76688c4d4L,0x9d6b428207373abdL,0xa62f07acb42efaa3L, + 0xf73e00f7e3b90180L }, + { 0x36175fec49421c3eL,0xc4e44f9b3dcf2678L,0x76df436b7220f09fL, + 0x172755fb3aa8b6cfL } }, + /* 54 << 42 */ + { { 0xbab89d57446139ccL,0x0a0a6e025fe0208fL,0xcdbb63e211e5d399L, + 0x33ecaa12a8977f0bL }, + { 0x59598b21f7c42664L,0xb3e91b32ab65d08aL,0x035822eef4502526L, + 0x1dcf0176720a82a9L } }, + /* 55 << 42 */ + { { 0x50f8598f3d589e02L,0xdf0478ffb1d63d2cL,0x8b8068bd1571cd07L, + 0x30c3aa4fd79670cdL }, + { 0x25e8fd4b941ade7fL,0x3d1debdc32790011L,0x65b6dcbd3a3f9ff0L, + 0x282736a4793de69cL } }, + /* 56 << 42 */ + { { 0xef69a0c3d41d3bd3L,0xb533b8c907a26bdeL,0xe2801d97db2edf9fL, + 0xdc4a8269e1877af0L }, + { 0x6c1c58513d590dbeL,0x84632f6bee4e9357L,0xd36d36b779b33374L, + 0xb46833e39bbca2e6L } }, + /* 57 << 42 */ + { { 0x37893913f7fc0586L,0x385315f766bf4719L,0x72c56293b31855dcL, + 0xd1416d4e849061feL }, + { 0xbeb3ab7851047213L,0x447f6e61f040c996L,0xd06d310d638b1d0cL, + 0xe28a413fbad1522eL } }, + /* 58 << 42 */ + { { 0x685a76cb82003f86L,0x610d07f70bcdbca3L,0x6ff660219ca4c455L, + 0x7df39b87cea10eecL }, + { 0xb9255f96e22db218L,0x8cc6d9eb08a34c44L,0xcd4ffb86859f9276L, + 0x8fa15eb250d07335L } }, + /* 59 << 42 */ + { { 0xdf553845cf2c24b5L,0x89f66a9f52f9c3baL,0x8f22b5b9e4a7ceb3L, + 0xaffef8090e134686L }, + { 0x3e53e1c68eb8fac2L,0x93c1e4eb28aec98eL,0xb6b91ec532a43bcbL, + 0x2dbfa947b2d74a51L } }, + /* 60 << 42 */ + { { 0xe065d190ca84bad7L,0xfb13919fad58e65cL,0x3c41718bf1cb6e31L, + 0x688969f006d05c3fL }, + { 0xd4f94ce721264d45L,0xfdfb65e97367532bL,0x5b1be8b10945a39dL, + 0x229f789c2b8baf3bL } }, + /* 61 << 42 */ + { { 0xd8f41f3e6f49f15dL,0x678ce828907f0792L,0xc69ace82fca6e867L, + 0x106451aed01dcc89L }, + { 0x1bb4f7f019fc32d2L,0x64633dfcb00c52d2L,0x8f13549aad9ea445L, + 0x99a3bf50fb323705L } }, + /* 62 << 42 */ + { { 0x0c9625a2534d4dbcL,0x45b8f1d1c2a2fea3L,0x76ec21a1a530fc1aL, + 0x4bac9c2a9e5bd734L }, + { 0x5996d76a7b4e3587L,0x0045cdee1182d9e3L,0x1aee24b91207f13dL, + 0x66452e9797345a41L } }, + /* 63 << 42 */ + { { 0x16e5b0549f950cd0L,0x9cc72fb1d7fdd075L,0x6edd61e766249663L, + 0xde4caa4df043cccbL }, + { 0x11b1f57a55c7ac17L,0x779cbd441a85e24dL,0x78030f86e46081e7L, + 0xfd4a60328e20f643L } }, + /* 64 << 42 */ + { { 0xcc7a64880a750c0fL,0x39bacfe34e548e83L,0x3d418c760c110f05L, + 0x3e4daa4cb1f11588L }, + { 0x2733e7b55ffc69ffL,0x46f147bc92053127L,0x885b2434d722df94L, + 0x6a444f65e6fc6b7cL } }, + /* 0 << 49 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 49 */ + { { 0x7a1a465ac3f16ea8L,0x115a461db2f1d11cL,0x4767dd956c68a172L, + 0x3392f2ebd13a4698L }, + { 0xc7a99ccde526cdc7L,0x8e537fdc22292b81L,0x76d8cf69a6d39198L, + 0xffc5ff432446852dL } }, + /* 2 << 49 */ + { { 0x97b14f7ea90567e6L,0x513257b7b6ae5cb7L,0x85454a3c9f10903dL, + 0xd8d2c9ad69bc3724L }, + { 0x38da93246b29cb44L,0xb540a21d77c8cbacL,0x9bbfe43501918e42L, + 0xfffa707a56c3614eL } }, + /* 3 << 49 */ + { { 0x0ce4e3f1d4e353b7L,0x062d8a14ef46b0a0L,0x6408d5ab574b73fdL, + 0xbc41d1c9d3273ffdL }, + { 0x3538e1e76be77800L,0x71fe8b37c5655031L,0x1cd916216b9b331aL, + 0xad825d0bbb388f73L } }, + /* 4 << 49 */ + { { 0x56c2e05b1cb76219L,0x0ec0bf9171567e7eL,0xe7076f8661c4c910L, + 0xd67b085bbabc04d9L }, + { 0x9fb904595e93a96aL,0x7526c1eafbdc249aL,0x0d44d367ecdd0bb7L, + 0x953999179dc0d695L } }, + /* 5 << 49 */ + { { 0x61360ee99e240d18L,0x057cdcacb4b94466L,0xe7667cd12fe5325cL, + 0x1fa297b521974e3bL }, + { 0xfa4081e7db083d76L,0x31993be6f206bd15L,0x8949269b14c19f8cL, + 0x21468d72a9d92357L } }, + /* 6 << 49 */ + { { 0x2ccbc583a4c506ecL,0x957ed188d1acfe97L,0x8baed83312f1aea2L, + 0xef2a6cb48325362dL }, + { 0x130dde428e195c43L,0xc842025a0e6050c6L,0x2da972a708686a5dL, + 0xb52999a1e508b4a8L } }, + /* 7 << 49 */ + { { 0xd9f090b910a5a8bdL,0xca91d249096864daL,0x8e6a93be3f67dbc1L, + 0xacae6fbaf5f4764cL }, + { 0x1563c6e0d21411a0L,0x28fa787fda0a4ad8L,0xd524491c908c8030L, + 0x1257ba0e4c795f07L } }, + /* 8 << 49 */ + { { 0x83f49167ceca9754L,0x426d2cf64b7939a0L,0x2555e355723fd0bfL, + 0xa96e6d06c4f144e2L }, + { 0x4768a8dd87880e61L,0x15543815e508e4d5L,0x09d7e772b1b65e15L, + 0x63439dd6ac302fa0L } }, + /* 9 << 49 */ + { { 0xb93f802fc14e35c2L,0x71735b7c4341333cL,0x03a2510416d4f362L, + 0x3f4d069bbf433c8eL }, + { 0x0d83ae01f78f5a7cL,0x50a8ffbe7c4eed07L,0xc74f890676e10f83L, + 0x7d0809669ddaf8e1L } }, + /* 10 << 49 */ + { { 0xb11df8e1698e04ccL,0x877be203169005c8L,0x32749e8c4f3c6179L, + 0x2dbc9d0a7853fc05L }, + { 0x187d4f939454d937L,0xe682ce9db4800e1bL,0xa9129ad8165e68e8L, + 0x0fe29735be7f785bL } }, + /* 11 << 49 */ + { { 0x5303f40c5b9e02b7L,0xa37c969235ee04e8L,0x5f46cc2034d6632bL, + 0x55ef72b296ac545bL }, + { 0xabec5c1f7b91b062L,0x0a79e1c7bb33e821L,0xbb04b4283a9f4117L, + 0x0de1f28ffd2a475aL } }, + /* 12 << 49 */ + { { 0x31019ccf3a4434b4L,0xa34581111a7954dcL,0xa9dac80de34972a7L, + 0xb043d05474f6b8ddL }, + { 0x021c319e11137b1aL,0x00a754ceed5cc03fL,0x0aa2c794cbea5ad4L, + 0x093e67f470c015b6L } }, + /* 13 << 49 */ + { { 0x72cdfee9c97e3f6bL,0xc10bcab4b6da7461L,0x3b02d2fcb59806b9L, + 0x85185e89a1de6f47L }, + { 0x39e6931f0eb6c4d4L,0x4d4440bdd4fa5b04L,0x5418786e34be7eb8L, + 0x6380e5219d7259bcL } }, + /* 14 << 49 */ + { { 0x20ac0351d598d710L,0x272c4166cb3a4da4L,0xdb82fe1aca71de1fL, + 0x746e79f2d8f54b0fL }, + { 0x6e7fc7364b573e9bL,0x75d03f46fd4b5040L,0x5c1cc36d0b98d87bL, + 0x513ba3f11f472da1L } }, + /* 15 << 49 */ + { { 0x79d0af26abb177ddL,0xf82ab5687891d564L,0x2b6768a972232173L, + 0xefbb3bb08c1f6619L }, + { 0xb29c11dba6d18358L,0x519e2797b0916d3aL,0xd4dc18f09188e290L, + 0x648e86e398b0ca7fL } }, + /* 16 << 49 */ + { { 0x859d3145983c38b5L,0xb14f176c637abc8bL,0x2793fb9dcaff7be6L, + 0xebe5a55f35a66a5aL }, + { 0x7cec1dcd9f87dc59L,0x7c595cd3fbdbf560L,0x5b543b2226eb3257L, + 0x69080646c4c935fdL } }, + /* 17 << 49 */ + { { 0x7f2e440381e9ede3L,0x243c3894caf6df0aL,0x7c605bb11c073b11L, + 0xcd06a541ba6a4a62L }, + { 0x2916894949d4e2e5L,0x33649d074af66880L,0xbfc0c885e9a85035L, + 0xb4e52113fc410f4bL } }, + /* 18 << 49 */ + { { 0xdca3b70678a6513bL,0x92ea4a2a9edb1943L,0x02642216db6e2dd8L, + 0x9b45d0b49fd57894L }, + { 0x114e70dbc69d11aeL,0x1477dd194c57595fL,0xbc2208b4ec77c272L, + 0x95c5b4d7db68f59cL } }, + /* 19 << 49 */ + { { 0xb8c4fc6342e532b7L,0x386ba4229ae35290L,0xfb5dda42d201ecbcL, + 0x2353dc8ba0e38fd6L }, + { 0x9a0b85ea68f7e978L,0x96ec56822ad6d11fL,0x5e279d6ce5f6886dL, + 0xd3fe03cd3cb1914dL } }, + /* 20 << 49 */ + { { 0xfe541fa47ea67c77L,0x952bd2afe3ea810cL,0x791fef568d01d374L, + 0xa3a1c6210f11336eL }, + { 0x5ad0d5a9c7ec6d79L,0xff7038af3225c342L,0x003c6689bc69601bL, + 0x25059bc745e8747dL } }, + /* 21 << 49 */ + { { 0xfa4965b2f2086fbfL,0xf6840ea686916078L,0xd7ac762070081d6cL, + 0xe600da31b5328645L }, + { 0x01916f63529b8a80L,0xe80e48582d7d6f3eL,0x29eb0fe8d664ca7cL, + 0xf017637be7b43b0cL } }, + /* 22 << 49 */ + { { 0x9a75c80676cb2566L,0x8f76acb1b24892d9L,0x7ae7b9cc1f08fe45L, + 0x19ef73296a4907d8L }, + { 0x2db4ab715f228bf0L,0xf3cdea39817032d7L,0x0b1f482edcabe3c0L, + 0x3baf76b4bb86325cL } }, + /* 23 << 49 */ + { { 0xd49065e010089465L,0x3bab5d298e77c596L,0x7636c3a6193dbd95L, + 0xdef5d294b246e499L }, + { 0xb22c58b9286b2475L,0xa0b93939cd80862bL,0x3002c83af0992388L, + 0x6de01f9beacbe14cL } }, + /* 24 << 49 */ + { { 0x6aac688eadd70482L,0x708de92a7b4a4e8aL,0x75b6dd73758a6eefL, + 0xea4bf352725b3c43L }, + { 0x10041f2c87912868L,0xb1b1be95ef09297aL,0x19ae23c5a9f3860aL, + 0xc4f0f839515dcf4bL } }, + /* 25 << 49 */ + { { 0x3c7ecca397f6306aL,0x744c44ae68a3a4b0L,0x69cd13a0b3a1d8a2L, + 0x7cad0a1e5256b578L }, + { 0xea653fcd33791d9eL,0x9cc2a05d74b2e05fL,0x73b391dcfd7affa2L, + 0xddb7091eb6b05442L } }, + /* 26 << 49 */ + { { 0xc71e27bf8538a5c6L,0x195c63dd89abff17L,0xfd3152851b71e3daL, + 0x9cbdfda7fa680fa0L }, + { 0x9db876ca849d7eabL,0xebe2764b3c273271L,0x663357e3f208dceaL, + 0x8c5bd833565b1b70L } }, + /* 27 << 49 */ + { { 0xccc3b4f59837fc0dL,0x9b641ba8a79cf00fL,0x7428243ddfdf3990L, + 0x83a594c4020786b1L }, + { 0xb712451a526c4502L,0x9d39438e6adb3f93L,0xfdb261e3e9ff0ccdL, + 0x80344e3ce07af4c3L } }, + /* 28 << 49 */ + { { 0x75900d7c2fa4f126L,0x08a3b8655c99a232L,0x2478b6bfdb25e0c3L, + 0x482cc2c271db2edfL }, + { 0x37df7e645f321bb8L,0x8a93821b9a8005b4L,0x3fa2f10ccc8c1958L, + 0x0d3322182c269d0aL } }, + /* 29 << 49 */ + { { 0x20ab8119e246b0e6L,0xb39781e4d349fd17L,0xd293231eb31aa100L, + 0x4b779c97bb032168L }, + { 0x4b3f19e1c8470500L,0x45b7efe90c4c869dL,0xdb84f38aa1a6bbccL, + 0x3b59cb15b2fddbc1L } }, + /* 30 << 49 */ + { { 0xba5514df3fd165e8L,0x499fd6a9061f8811L,0x72cd1fe0bfef9f00L, + 0x120a4bb979ad7e8aL }, + { 0xf2ffd0955f4a5ac5L,0xcfd174f195a7a2f0L,0xd42301ba9d17baf1L, + 0xd2fa487a77f22089L } }, + /* 31 << 49 */ + { { 0x9cb09efeb1dc77e1L,0xe956693921c99682L,0x8c5469016c6067bbL, + 0xfd37857461c24456L }, + { 0x2b6a6cbe81796b33L,0x62d550f658e87f8bL,0x1b763e1c7f1b01b4L, + 0x4b93cfea1b1b5e12L } }, + /* 32 << 49 */ + { { 0xb93452381d531696L,0x57201c0088cdde69L,0xdde922519a86afc7L, + 0xe3043895bd35cea8L }, + { 0x7608c1e18555970dL,0x8267dfa92535935eL,0xd4c60a57322ea38bL, + 0xe0bf7977804ef8b5L } }, + /* 33 << 49 */ + { { 0x1a0dab28c06fece4L,0xd405991e94e7b49dL,0xc542b6d2706dab28L, + 0xcb228da3a91618fbL }, + { 0x224e4164107d1ceaL,0xeb9fdab3d0f5d8f1L,0xc02ba3860d6e41cdL, + 0x676a72c59b1f7146L } }, + /* 34 << 49 */ + { { 0xffd6dd984d6cb00bL,0xcef9c5cade2e8d7cL,0xa1bbf5d7641c7936L, + 0x1b95b230ee8f772eL }, + { 0xf765a92ee8ac25b1L,0xceb04cfc3a18b7c6L,0x27944cef0acc8966L, + 0xcbb3c957434c1004L } }, + /* 35 << 49 */ + { { 0x9c9971a1a43ff93cL,0x5bc2db17a1e358a9L,0x45b4862ea8d9bc82L, + 0x70ebfbfb2201e052L }, + { 0xafdf64c792871591L,0xea5bcae6b42d0219L,0xde536c552ad8f03cL, + 0xcd6c3f4da76aa33cL } }, + /* 36 << 49 */ + { { 0xbeb5f6230bca6de3L,0xdd20dd99b1e706fdL,0x90b3ff9dac9059d4L, + 0x2d7b29027ccccc4eL }, + { 0x8a090a59ce98840fL,0xa5d947e08410680aL,0x49ae346a923379a5L, + 0x7dbc84f9b28a3156L } }, + /* 37 << 49 */ + { { 0xfd40d91654a1aff2L,0xabf318ba3a78fb9bL,0x50152ed83029f95eL, + 0x9fc1dd77c58ad7faL }, + { 0x5fa5791513595c17L,0xb95046688f62b3a9L,0x907b5b24ff3055b0L, + 0x2e995e359a84f125L } }, + /* 38 << 49 */ + { { 0x87dacf697e9bbcfbL,0x95d0c1d6e86d96e3L,0x65726e3c2d95a75cL, + 0x2c3c9001acd27f21L }, + { 0x1deab5616c973f57L,0x108b7e2ca5221643L,0x5fee9859c4ef79d4L, + 0xbd62b88a40d4b8c6L } }, + /* 39 << 49 */ + { { 0xb4dd29c4197c75d6L,0x266a6df2b7076febL,0x9512d0ea4bf2df11L, + 0x1320c24f6b0cc9ecL }, + { 0x6bb1e0e101a59596L,0x8317c5bbeff9aaacL,0x65bb405e385aa6c9L, + 0x613439c18f07988fL } }, + /* 40 << 49 */ + { { 0xd730049f16a66e91L,0xe97f2820fa1b0e0dL,0x4131e003304c28eaL, + 0x820ab732526bac62L }, + { 0xb2ac9ef928714423L,0x54ecfffaadb10cb2L,0x8781476ef886a4ccL, + 0x4b2c87b5db2f8d49L } }, + /* 41 << 49 */ + { { 0xe857cd200a44295dL,0x707d7d2158c6b044L,0xae8521f9f596757cL, + 0x87448f0367b2b714L }, + { 0x13a9bc455ebcd58dL,0x79bcced99122d3c1L,0x3c6442479e076642L, + 0x0cf227782df4767dL } }, + /* 42 << 49 */ + { { 0x5e61aee471d444b6L,0x211236bfc5084a1dL,0x7e15bc9a4fd3eaf6L, + 0x68df2c34ab622bf5L }, + { 0x9e674f0f59bf4f36L,0xf883669bd7f34d73L,0xc48ac1b831497b1dL, + 0x323b925d5106703bL } }, + /* 43 << 49 */ + { { 0x22156f4274082008L,0xeffc521ac8482bcbL,0x5c6831bf12173479L, + 0xcaa2528fc4739490L }, + { 0x84d2102a8f1b3c4dL,0xcf64dfc12d9bec0dL,0x433febad78a546efL, + 0x1f621ec37b73cef1L } }, + /* 44 << 49 */ + { { 0x6aecd62737338615L,0x162082ab01d8edf6L,0x833a811919e86b66L, + 0x6023a251d299b5dbL }, + { 0xf5bb0c3abbf04b89L,0x6735eb69ae749a44L,0xd0e058c54713de3bL, + 0xfdf2593e2c3d4ccdL } }, + /* 45 << 49 */ + { { 0x1b8f414efdd23667L,0xdd52aacafa2015eeL,0x3e31b517bd9625ffL, + 0x5ec9322d8db5918cL }, + { 0xbc73ac85a96f5294L,0x82aa5bf361a0666aL,0x49755810bf08ac42L, + 0xd21cdfd5891cedfcL } }, + /* 46 << 49 */ + { { 0x918cb57b67f8be10L,0x365d1a7c56ffa726L,0x2435c5046532de93L, + 0xc0fc5e102674cd02L }, + { 0x6e51fcf89cbbb142L,0x1d436e5aafc50692L,0x766bffff3fbcae22L, + 0x3148c2fdfd55d3b8L } }, + /* 47 << 49 */ + { { 0x52c7fdc9233222faL,0x89ff1092e419fb6bL,0x3cd6db9925254977L, + 0x2e85a1611cf12ca7L }, + { 0xadd2547cdc810bc9L,0xea3f458f9d257c22L,0x642c1fbe27d6b19bL, + 0xed07e6b5140481a6L } }, + /* 48 << 49 */ + { { 0x6ada1d4286d2e0f8L,0xe59201220e8a9fd5L,0x02c936af708c1b49L, + 0x60f30fee2b4bfaffL }, + { 0x6637ad06858e6a61L,0xce4c77673fd374d0L,0x39d54b2d7188defbL, + 0xa8c9d250f56a6b66L } }, + /* 49 << 49 */ + { { 0x58fc0f5eb24fe1dcL,0x9eaf9dee6b73f24cL,0xa90d588b33650705L, + 0xde5b62c5af2ec729L }, + { 0x5c72cfaed3c2b36eL,0x868c19d5034435daL,0x88605f93e17ee145L, + 0xaa60c4ee77a5d5b1L } }, + /* 50 << 49 */ + { { 0xbcf5bfd23b60c472L,0xaf4ef13ceb1d3049L,0x373f44fce13895c9L, + 0xf29b382f0cbc9822L }, + { 0x1bfcb85373efaef6L,0xcf56ac9ca8c96f40L,0xd7adf1097a191e24L, + 0x98035f44bf8a8dc2L } }, + /* 51 << 49 */ + { { 0xf40a71b91e750c84L,0xc57f7b0c5dc6c469L,0x49a0e79c6fbc19c1L, + 0x6b0f5889a48ebdb8L }, + { 0x5d3fd084a07c4e9fL,0xc3830111ab27de14L,0x0e4929fe33e08dccL, + 0xf4a5ad2440bb73a3L } }, + /* 52 << 49 */ + { { 0xde86c2bf490f97caL,0x288f09c667a1ce18L,0x364bb8861844478dL, + 0x7840fa42ceedb040L }, + { 0x1269fdd25a631b37L,0x94761f1ea47c8b7dL,0xfc0c2e17481c6266L, + 0x85e16ea23daa5fa7L } }, + /* 53 << 49 */ + { { 0xccd8603392491048L,0x0c2f6963f4d402d7L,0x6336f7dfdf6a865cL, + 0x0a2a463cb5c02a87L }, + { 0xb0e29be7bf2f12eeL,0xf0a2200266bad988L,0x27f87e039123c1d7L, + 0x21669c55328a8c98L } }, + /* 54 << 49 */ + { { 0x186b980392f14529L,0xd3d056cc63954df3L,0x2f03fd58175a46f6L, + 0x63e34ebe11558558L }, + { 0xe13fedee5b80cfa5L,0xe872a120d401dbd1L,0x52657616e8a9d667L, + 0xbc8da4b6e08d6693L } }, + /* 55 << 49 */ + { { 0x370fb9bb1b703e75L,0x6773b186d4338363L,0x18dad378ecef7bffL, + 0xaac787ed995677daL }, + { 0x4801ea8b0437164bL,0xf430ad2073fe795eL,0xb164154d8ee5eb73L, + 0x0884ecd8108f7c0eL } }, + /* 56 << 49 */ + { { 0x0e6ec0965f520698L,0x640631fe44f7b8d9L,0x92fd34fca35a68b9L, + 0x9c5a4b664d40cf4eL }, + { 0x949454bf80b6783dL,0x80e701fe3a320a10L,0x8d1a564a1a0a39b2L, + 0x1436d53d320587dbL } }, + /* 57 << 49 */ + { { 0xf5096e6d6556c362L,0xbc23a3c0e2455d7eL,0x3a7aee54807230f9L, + 0x9ba1cfa622ae82fdL }, + { 0x833a057a99c5d706L,0x8be85f4b842315c9L,0xd083179a66a72f12L, + 0x2fc77d5dcdcc73cdL } }, + /* 58 << 49 */ + { { 0x22b88a805616ee30L,0xfb09548fe7ab1083L,0x8ad6ab0d511270cdL, + 0x61f6c57a6924d9abL }, + { 0xa0f7bf7290aecb08L,0x849f87c90df784a4L,0x27c79c15cfaf1d03L, + 0xbbf9f675c463faceL } }, + /* 59 << 49 */ + { { 0x91502c65765ba543L,0x18ce3cac42ea60ddL,0xe5cee6ac6e43ecb3L, + 0x63e4e91068f2aeebL }, + { 0x26234fa3c85932eeL,0x96883e8b4c90c44dL,0x29b9e738a18a50f6L, + 0xbfc62b2a3f0420dfL } }, + /* 60 << 49 */ + { { 0xd22a7d906d3e1fa9L,0x17115618fe05b8a3L,0x2a0c9926bb2b9c01L, + 0xc739fcc6e07e76a2L }, + { 0x540e9157165e439aL,0x06353a626a9063d8L,0x84d9559461e927a3L, + 0x013b9b26e2e0be7fL } }, + /* 61 << 49 */ + { { 0x4feaec3b973497f1L,0x15c0f94e093ebc2dL,0x6af5f22733af0583L, + 0x0c2af206c61f3340L }, + { 0xd25dbdf14457397cL,0x2e8ed017cabcbae0L,0xe3010938c2815306L, + 0xbaa99337e8c6cd68L } }, + /* 62 << 49 */ + { { 0x085131823b0ec7deL,0x1e1b822b58df05dfL,0x5c14842fa5c3b683L, + 0x98fe977e3eba34ceL }, + { 0xfd2316c20d5e8873L,0xe48d839abd0d427dL,0x495b2218623fc961L, + 0x24ee56e7b46fba5eL } }, + /* 63 << 49 */ + { { 0x9184a55b91e4de58L,0xa7488ca5dfdea288L,0xa723862ea8dcc943L, + 0x92d762b2849dc0fcL }, + { 0x3c444a12091ff4a9L,0x581113fa0cada274L,0xb9de0a4530d8eae2L, + 0x5e0fcd85df6b41eaL } }, + /* 64 << 49 */ + { { 0x6233ea68c094dbb5L,0xb77d062ed968d410L,0x3e719bbc58b3002dL, + 0x68e7dd3d3dc49d58L }, + { 0x8d825740013a5e58L,0x213117473c9e3c1bL,0x0cb0a2a77c99b6abL, + 0x5c48a3b3c2f888f2L } }, + /* 0 << 56 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 56 */ + { { 0xc7913e91991724f3L,0x5eda799c39cbd686L,0xddb595c763d4fc1eL, + 0x6b63b80bac4fed54L }, + { 0x6ea0fc697e5fb516L,0x737708bad0f1c964L,0x9628745f11a92ca5L, + 0x61f379589a86967aL } }, + /* 2 << 56 */ + { { 0x9af39b2caa665072L,0x78322fa4efd324efL,0x3d153394c327bd31L, + 0x81d5f2713129dab0L }, + { 0xc72e0c42f48027f5L,0xaa40cdbc8536e717L,0xf45a657a2d369d0fL, + 0xb03bbfc4ea7f74e6L } }, + /* 3 << 56 */ + { { 0x46a8c4180d738dedL,0x6f1a5bb0e0de5729L,0xf10230b98ba81675L, + 0x32c6f30c112b33d4L }, + { 0x7559129dd8fffb62L,0x6a281b47b459bf05L,0x77c1bd3afa3b6776L, + 0x0709b3807829973aL } }, + /* 4 << 56 */ + { { 0x8c26b232a3326505L,0x38d69272ee1d41bfL,0x0459453effe32afaL, + 0xce8143ad7cb3ea87L }, + { 0x932ec1fa7e6ab666L,0x6cd2d23022286264L,0x459a46fe6736f8edL, + 0x50bf0d009eca85bbL } }, + /* 5 << 56 */ + { { 0x0b825852877a21ecL,0x300414a70f537a94L,0x3f1cba4021a9a6a2L, + 0x50824eee76943c00L }, + { 0xa0dbfcecf83cba5dL,0xf953814893b4f3c0L,0x6174416248f24dd7L, + 0x5322d64de4fb09ddL } }, + /* 6 << 56 */ + { { 0x574473843d9325f3L,0xa9bef2d0f371cb84L,0x77d2188ba61e36c5L, + 0xbbd6a7d7c602df72L }, + { 0xba3aa9028f61bc0bL,0xf49085ed6ed0b6a1L,0x8bc625d6ae6e8298L, + 0x832b0b1da2e9c01dL } }, + /* 7 << 56 */ + { { 0xa337c447f1f0ced1L,0x800cc7939492dd2bL,0x4b93151dbea08efaL, + 0x820cf3f8de0a741eL }, + { 0xff1982dc1c0f7d13L,0xef92196084dde6caL,0x1ad7d97245f96ee3L, + 0x319c8dbe29dea0c7L } }, + /* 8 << 56 */ + { { 0xd3ea38717b82b99bL,0x75922d4d470eb624L,0x8f66ec543b95d466L, + 0x66e673ccbee1e346L }, + { 0x6afe67c4b5f2b89aL,0x3de9c1e6290e5cd3L,0x8c278bb6310a2adaL, + 0x420fa3840bdb323bL } }, + /* 9 << 56 */ + { { 0x0ae1d63b0eb919b0L,0xd74ee51da74b9620L,0x395458d0a674290cL, + 0x324c930f4620a510L }, + { 0x2d1f4d19fbac27d4L,0x4086e8ca9bedeeacL,0x0cdd211b9b679ab8L, + 0x5970167d7090fec4L } }, + /* 10 << 56 */ + { { 0x3420f2c9faf1fc63L,0x616d333a328c8bb4L,0x7d65364c57f1fe4aL, + 0x9343e87755e5c73aL }, + { 0x5795176be970e78cL,0xa36ccebf60533627L,0xfc7c738009cdfc1bL, + 0xb39a2afeb3fec326L } }, + /* 11 << 56 */ + { { 0xb7ff1ba16224408aL,0xcc856e92247cfc5eL,0x01f102e7c18bc493L, + 0x4613ab742091c727L }, + { 0xaa25e89cc420bf2bL,0x00a5317690337ec2L,0xd2be9f437d025fc7L, + 0x3316fb856e6fe3dcL } }, + /* 12 << 56 */ + { { 0x27520af59ac50814L,0xfdf95e789a8e4223L,0xb7e7df2a56bec5a0L, + 0xf7022f7ddf159e5dL }, + { 0x93eeeab1cac1fe8fL,0x8040188c37451168L,0x7ee8aa8ad967dce6L, + 0xfa0e79e73abc9299L } }, + /* 13 << 56 */ + { { 0x67332cfc2064cfd1L,0x339c31deb0651934L,0x719b28d52a3bcbeaL, + 0xee74c82b9d6ae5c6L }, + { 0x0927d05ebaf28ee6L,0x82cecf2c9d719028L,0x0b0d353eddb30289L, + 0xfe4bb977fddb2e29L } }, + /* 14 << 56 */ + { { 0xbb5bb990640bfd9eL,0xd226e27782f62108L,0x4bf0098502ffdd56L, + 0x7756758a2ca1b1b5L }, + { 0xc32b62a35285fe91L,0xedbc546a8c9cd140L,0x1e47a013af5cb008L, + 0xbca7e720073ce8f2L } }, + /* 15 << 56 */ + { { 0xe10b2ab817a91caeL,0xb89aab6508e27f63L,0x7b3074a7dba3ddf9L, + 0x1c20ce09330c2972L }, + { 0x6b9917b45fcf7e33L,0xe6793743945ceb42L,0x18fc22155c633d19L, + 0xad1adb3cc7485474L } }, + /* 16 << 56 */ + { { 0x646f96796424c49bL,0xf888dfe867c241c9L,0xe12d4b9324f68b49L, + 0x9a6b62d8a571df20L }, + { 0x81b4b26d179483cbL,0x666f96329511fae2L,0xd281b3e4d53aa51fL, + 0x7f96a7657f3dbd16L } }, + /* 17 << 56 */ + { { 0xa7f8b5bf074a30ceL,0xd7f52107005a32e6L,0x6f9e090750237ed4L, + 0x2f21da478096fa2bL }, + { 0xf3e19cb4eec863a0L,0xd18f77fd9527620aL,0x9505c81c407c1cf8L, + 0x9998db4e1b6ec284L } }, + /* 18 << 56 */ + { { 0x7e3389e5c247d44dL,0x125071413f4f3d80L,0xd4ba01104a78a6c7L, + 0x312874a0767720beL }, + { 0xded059a675944370L,0xd6123d903b2c0bddL,0xa56b717b51c108e3L, + 0x9bb7940e070623e9L } }, + /* 19 << 56 */ + { { 0x794e2d5984ac066cL,0xf5954a92e68c69a0L,0x28c524584fd99dccL, + 0x60e639fcb1012517L }, + { 0xc2e601257de79248L,0xe9ef6404f12fc6d7L,0x4c4f28082a3b5d32L, + 0x865ad32ec768eb8aL } }, + /* 20 << 56 */ + { { 0xac02331b13fb70b6L,0x037b44c195599b27L,0x1a860fc460bd082cL, + 0xa2e25745c980cd01L }, + { 0xee3387a81da0263eL,0x931bfb952d10f3d6L,0x5b687270a1f24a32L, + 0xf140e65dca494b86L } }, + /* 21 << 56 */ + { { 0x4f4ddf91b2f1ac7aL,0xf99eaabb760fee27L,0x57f4008a49c228e5L, + 0x090be4401cf713bbL }, + { 0xac91fbe45004f022L,0xd838c2c2569e1af6L,0xd6c7d20b0f1daaa5L, + 0xaa063ac11bbb02c0L } }, + /* 22 << 56 */ + { { 0x0938a42259558a78L,0x5343c6698435da2fL,0x96f67b18034410dcL, + 0x7cc1e42484510804L }, + { 0x86a1543f16dfbb7dL,0x921fa9425b5bd592L,0x9dcccb6eb33dd03cL, + 0x8581ddd9b843f51eL } }, + /* 23 << 56 */ + { { 0x54935fcb81d73c9eL,0x6d07e9790a5e97abL,0x4dc7b30acf3a6babL, + 0x147ab1f3170bee11L }, + { 0x0aaf8e3d9fafdee4L,0xfab3dbcb538a8b95L,0x405df4b36ef13871L, + 0xf1f4e9cb088d5a49L } }, + /* 24 << 56 */ + { { 0x9bcd24d366b33f1dL,0x3b97b8205ce445c0L,0xe2926549ba93ff61L, + 0xd9c341ce4dafe616L }, + { 0xfb30a76e16efb6f3L,0xdf24b8ca605b953cL,0x8bd52afec2fffb9fL, + 0xbbac5ff7e19d0b96L } }, + /* 25 << 56 */ + { { 0x43c01b87459afccdL,0x6bd45143b7432652L,0x8473453055b5d78eL, + 0x81088fdb1554ba7dL }, + { 0xada0a52c1e269375L,0xf9f037c42dc5ec10L,0xc066060794bfbc11L, + 0xc0a630bbc9c40d2fL } }, + /* 26 << 56 */ + { { 0x5efc797eab64c31eL,0xffdb1dab74507144L,0xf61242871ca6790cL, + 0xe9609d81e69bf1bfL }, + { 0xdb89859500d24fc9L,0x9c750333e51fb417L,0x51830a91fef7bbdeL, + 0x0ce67dc8945f585cL } }, + /* 27 << 56 */ + { { 0x9a730ed44763eb50L,0x24a0e221c1ab0d66L,0x643b6393648748f3L, + 0x1982daa16d3c6291L }, + { 0x6f00a9f78bbc5549L,0x7a1783e17f36384eL,0xe8346323de977f50L, + 0x91ab688db245502aL } }, + /* 28 << 56 */ + { { 0x331ab6b56d0bdd66L,0x0a6ef32e64b71229L,0x1028150efe7c352fL, + 0x27e04350ce7b39d3L }, + { 0x2a3c8acdc1070c82L,0xfb2034d380c9feefL,0x2d729621709f3729L, + 0x8df290bf62cb4549L } }, + /* 29 << 56 */ + { { 0x02f99f33fc2e4326L,0x3b30076d5eddf032L,0xbb21f8cf0c652fb5L, + 0x314fb49eed91cf7bL }, + { 0xa013eca52f700750L,0x2b9e3c23712a4575L,0xe5355557af30fbb0L, + 0x1ada35167c77e771L } }, + /* 30 << 56 */ + { { 0x45f6ecb27b135670L,0xe85d19df7cfc202eL,0x0f1b50c758d1be9fL, + 0x5ebf2c0aead2e344L }, + { 0x1531fe4eabc199c9L,0xc703259256bab0aeL,0x16ab2e486c1fec54L, + 0x0f87fda804280188L } }, + /* 31 << 56 */ + { { 0xdc9f46fc609e4a74L,0x2a44a143ba667f91L,0xbc3d8b95b4d83436L, + 0xa01e4bd0c7bd2958L }, + { 0x7b18293273483c90L,0xa79c6aa1a7c7b598L,0xbf3983c6eaaac07eL, + 0x8f18181e96e0d4e6L } }, + /* 32 << 56 */ + { { 0x8553d37c051af62bL,0xe9a998eb0bf94496L,0xe0844f9fb0d59aa1L, + 0x983fd558e6afb813L }, + { 0x9670c0ca65d69804L,0x732b22de6ea5ff2dL,0xd7640ba95fd8623bL, + 0x9f619163a6351782L } }, + /* 33 << 56 */ + { { 0x0bfc27eeacee5043L,0xae419e732eb10f02L,0x19c028d18943fb05L, + 0x71f01cf7ff13aa2aL }, + { 0x7790737e8887a132L,0x6751330966318410L,0x9819e8a37ddb795eL, + 0xfecb8ef5dad100b2L } }, + /* 34 << 56 */ + { { 0x59f74a223021926aL,0xb7c28a496f9b4c1cL,0xed1a733f912ad0abL, + 0x42a910af01a5659cL }, + { 0x3842c6e07bd68cabL,0x2b57fa3876d70ac8L,0x8a6707a83c53aaebL, + 0x62c1c51065b4db18L } }, + /* 35 << 56 */ + { { 0x8de2c1fbb2d09dc7L,0xc3dfed12266bd23bL,0x927d039bd5b27db6L, + 0x2fb2f0f1103243daL }, + { 0xf855a07b80be7399L,0xed9327ce1f9f27a8L,0xa0bd99c7729bdef7L, + 0x2b67125e28250d88L } }, + /* 36 << 56 */ + { { 0x784b26e88670ced7L,0xe3dfe41fc31bd3b4L,0x9e353a06bcc85cbcL, + 0x302e290960178a9dL }, + { 0x860abf11a6eac16eL,0x76447000aa2b3aacL,0x46ff9d19850afdabL, + 0x35bdd6a5fdb2d4c1L } }, + /* 37 << 56 */ + { { 0xe82594b07e5c9ce9L,0x0f379e5320af346eL,0x608b31e3bc65ad4aL, + 0x710c6b12267c4826L }, + { 0x51c966f971954cf1L,0xb1cec7930d0aa215L,0x1f15598986bd23a8L, + 0xae2ff99cf9452e86L } }, + /* 38 << 56 */ + { { 0xd8dd953c340ceaa2L,0x263552752e2e9333L,0x15d4e5f98586f06dL, + 0xd6bf94a8f7cab546L }, + { 0x33c59a0ab76a9af0L,0x52740ab3ba095af7L,0xc444de8a24389ca0L, + 0xcc6f9863706da0cbL } }, + /* 39 << 56 */ + { { 0xb5a741a76b2515cfL,0x71c416019585c749L,0x78350d4fe683de97L, + 0x31d6152463d0b5f5L }, + { 0x7a0cc5e1fbce090bL,0xaac927edfbcb2a5bL,0xe920de4920d84c35L, + 0x8c06a0b622b4de26L } }, + /* 40 << 56 */ + { { 0xd34dd58bafe7ddf3L,0x55851fedc1e6e55bL,0xd1395616960696e7L, + 0x940304b25f22705fL }, + { 0x6f43f861b0a2a860L,0xcf1212820e7cc981L,0x121862120ab64a96L, + 0x09215b9ab789383cL } }, + /* 41 << 56 */ + { { 0x311eb30537387c09L,0xc5832fcef03ee760L,0x30358f5832f7ea19L, + 0xe01d3c3491d53551L }, + { 0x1ca5ee41da48ea80L,0x34e71e8ecf4fa4c1L,0x312abd257af1e1c7L, + 0xe3afcdeb2153f4a5L } }, + /* 42 << 56 */ + { { 0x9d5c84d700235e9aL,0x0308d3f48c4c836fL,0xc0a66b0489332de5L, + 0x610dd39989e566efL }, + { 0xf8eea460d1ac1635L,0x84cbb3fb20a2c0dfL,0x40afb488e74a48c5L, + 0x29738198d326b150L } }, + /* 43 << 56 */ + { { 0x2a17747fa6d74081L,0x60ea4c0555a26214L,0x53514bb41f88c5feL, + 0xedd645677e83426cL }, + { 0xd5d6cbec96460b25L,0xa12fd0ce68dc115eL,0xc5bc3ed2697840eaL, + 0x969876a8a6331e31L } }, + /* 44 << 56 */ + { { 0x60c36217472ff580L,0xf42297054ad41393L,0x4bd99ef0a03b8b92L, + 0x501c7317c144f4f6L }, + { 0x159009b318464945L,0x6d5e594c74c5c6beL,0x2d587011321a3660L, + 0xd1e184b13898d022L } }, + /* 45 << 56 */ + { { 0x5ba047524c6a7e04L,0x47fa1e2b45550b65L,0x9419daf048c0a9a5L, + 0x663629537c243236L }, + { 0xcd0744b15cb12a88L,0x561b6f9a2b646188L,0x599415a566c2c0c0L, + 0xbe3f08590f83f09aL } }, + /* 46 << 56 */ + { { 0x9141c5beb92041b8L,0x01ae38c726477d0dL,0xca8b71f3d12c7a94L, + 0xfab5b31f765c70dbL }, + { 0x76ae7492487443e9L,0x8595a310990d1349L,0xf8dbeda87d460a37L, + 0x7f7ad0821e45a38fL } }, + /* 47 << 56 */ + { { 0xed1d4db61059705aL,0xa3dd492ae6b9c697L,0x4b92ee3a6eb38bd5L, + 0xbab2609d67cc0bb7L }, + { 0x7fc4fe896e70ee82L,0xeff2c56e13e6b7e3L,0x9b18959e34d26fcaL, + 0x2517ab66889d6b45L } }, + /* 48 << 56 */ + { { 0xf167b4e0bdefdd4fL,0x69958465f366e401L,0x5aa368aba73bbec0L, + 0x121487097b240c21L }, + { 0x378c323318969006L,0xcb4d73cee1fe53d1L,0x5f50a80e130c4361L, + 0xd67f59517ef5212bL } }, + /* 49 << 56 */ + { { 0xf145e21e9e70c72eL,0xb2e52e295566d2fbL,0x44eaba4a032397f5L, + 0x5e56937b7e31a7deL }, + { 0x68dcf517456c61e1L,0xbc2e954aa8b0a388L,0xe3552fa760a8b755L, + 0x03442dae73ad0cdeL } }, + /* 50 << 56 */ + { { 0x37ffe747ceb26210L,0x983545e8787baef9L,0x8b8c853586a3de31L, + 0xc621dbcbfacd46dbL }, + { 0x82e442e959266fbbL,0xa3514c37339d471cL,0x3a11b77162cdad96L, + 0xf0cb3b3cecf9bdf0L } }, + /* 51 << 56 */ + { { 0x3fcbdbce478e2135L,0x7547b5cfbda35342L,0xa97e81f18a677af6L, + 0xc8c2bf8328817987L }, + { 0xdf07eaaf45580985L,0xc68d1f05c93b45cbL,0x106aa2fec77b4cacL, + 0x4c1d8afc04a7ae86L } }, + /* 52 << 56 */ + { { 0xdb41c3fd9eb45ab2L,0x5b234b5bd4b22e74L,0xda253decf215958aL, + 0x67e0606ea04edfa0L }, + { 0xabbbf070ef751b11L,0xf352f175f6f06dceL,0xdfc4b6af6839f6b4L, + 0x53ddf9a89959848eL } }, + /* 53 << 56 */ + { { 0xda49c379c21520b0L,0x90864ff0dbd5d1b6L,0x2f055d235f49c7f7L, + 0xe51e4e6aa796b2d8L }, + { 0xc361a67f5c9dc340L,0x5ad53c37bca7c620L,0xda1d658832c756d0L, + 0xad60d9118bb67e13L } }, + /* 54 << 56 */ + { { 0xd6c47bdf0eeec8c6L,0x4a27fec1078a1821L,0x081f7415c3099524L, + 0x8effdf0b82cd8060L }, + { 0xdb70ec1c65842df8L,0x8821b358d319a901L,0x72ee56eede42b529L, + 0x5bb39592236e4286L } }, + /* 55 << 56 */ + { { 0xd1183316fd6f7140L,0xf9fadb5bbd8e81f7L,0x701d5e0c5a02d962L, + 0xfdee4dbf1b601324L }, + { 0xbed1740735d7620eL,0x04e3c2c3f48c0012L,0x9ee29da73455449aL, + 0x562cdef491a836c4L } }, + /* 56 << 56 */ + { { 0x8f682a5f47701097L,0x617125d8ff88d0c2L,0x948fda2457bb86ddL, + 0x348abb8f289f7286L }, + { 0xeb10eab599d94bbdL,0xd51ba28e4684d160L,0xabe0e51c30c8f41aL, + 0x66588b4513254f4aL } }, + /* 57 << 56 */ + { { 0x147ebf01fad097a5L,0x49883ea8610e815dL,0xe44d60ba8a11de56L, + 0xa970de6e827a7a6dL }, + { 0x2be414245e17fc19L,0xd833c65701214057L,0x1375813b363e723fL, + 0x6820bb88e6a52e9bL } }, + /* 58 << 56 */ + { { 0x7e7f6970d875d56aL,0xd6a0a9ac51fbf6bfL,0x54ba8790a3083c12L, + 0xebaeb23d6ae7eb64L }, + { 0xa8685c3ab99a907aL,0xf1e74550026bf40bL,0x7b73a027c802cd9eL, + 0x9a8a927c4fef4635L } }, + /* 59 << 56 */ + { { 0xe1b6f60c08191224L,0xc4126ebbde4ec091L,0xe1dff4dc4ae38d84L, + 0xde3f57db4f2ef985L }, + { 0x34964337d446a1ddL,0x7bf217a0859e77f6L,0x8ff105278e1d13f5L, + 0xa304ef0374eeae27L } }, + /* 60 << 56 */ + { { 0xfc6f5e47d19dfa5aL,0xdb007de37fad982bL,0x28205ad1613715f5L, + 0x251e67297889529eL }, + { 0x727051841ae98e78L,0xf818537d271cac32L,0xc8a15b7eb7f410f5L, + 0xc474356f81f62393L } }, + /* 61 << 56 */ + { { 0x92dbdc5ac242316bL,0xabe060acdbf4aff5L,0x6e8c38fe909a8ec6L, + 0x43e514e56116cb94L }, + { 0x2078fa3807d784f9L,0x1161a880f4b5b357L,0x5283ce7913adea3dL, + 0x0756c3e6cc6a910bL } }, + /* 62 << 56 */ + { { 0x60bcfe01aaa79697L,0x04a73b2956391db1L,0xdd8dad47189b45a0L, + 0xbfac0dd048d5b8d9L }, + { 0x34ab3af57d3d2ec2L,0x6fa2fc2d207bd3afL,0x9ff4009266550dedL, + 0x719b3e871fd5b913L } }, + /* 63 << 56 */ + { { 0xa573a4966d17fbc7L,0x0cd1a70a73d2b24eL,0x34e2c5cab2676937L, + 0xe7050b06bf669f21L }, + { 0xfbe948b61ede9046L,0xa053005197662659L,0x58cbd4edf10124c5L, + 0xde2646e4dd6c06c8L } }, + /* 64 << 56 */ + { { 0x332f81088cad38c0L,0x471b7e906bd68ae2L,0x56ac3fb20d8e27a3L, + 0xb54660db136b4b0dL }, + { 0x123a1e11a6fd8de4L,0x44dbffeaa37799efL,0x4540b977ce6ac17cL, + 0x495173a8af60acefL } }, + /* 0 << 63 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 63 */ + { { 0x9ebb284d391c2a82L,0xbcdd4863158308e8L,0x006f16ec83f1edcaL, + 0xa13e2c37695dc6c8L }, + { 0x2ab756f04a057a87L,0xa8765500a6b48f98L,0x4252face68651c44L, + 0xa52b540be1765e02L } }, + /* 2 << 63 */ + { { 0x4f922fc516a0d2bbL,0x0d5cc16c1a623499L,0x9241cf3a57c62c8bL, + 0x2f5e6961fd1b667fL }, + { 0x5c15c70bf5a01797L,0x3d20b44d60956192L,0x04911b37071fdb52L, + 0xf648f9168d6f0f7bL } }, + /* 3 << 63 */ + { { 0x6dc1acafe60b7cf7L,0x25860a5084a9d869L,0x56fc6f09e7ba8ac4L, + 0x828c5bd06148d29eL }, + { 0xac6b435edc55ae5fL,0xa527f56cc0117411L,0x94d5045efd24342cL, + 0x2c4c0a3570b67c0dL } }, + /* 4 << 63 */ + { { 0x027cc8b8fac61d9aL,0x7d25e062e3c6fe8aL,0xe08805bfe5bff503L, + 0x13271e6c6ff632f7L }, + { 0x55dca6c0232f76a5L,0x8957c32d701ef426L,0xee728bcba10a5178L, + 0x5ea60411b62c5173L } }, + /* 5 << 63 */ + { { 0xfc4e964ed0b8892bL,0x9ea176839301bb74L,0x6265c5aefcc48626L, + 0xe60cf82ebb3e9102L }, + { 0x57adf797d4df5531L,0x235b59a18deeefe2L,0x60adcf583f306eb1L, + 0x105c27533d09492dL } }, + /* 6 << 63 */ + { { 0x4090914bb5def996L,0x1cb69c83233dd1e7L,0xc1e9c1d39b3d5e76L, + 0x1f3338edfccf6012L }, + { 0xb1e95d0d2f5378a8L,0xacf4c2c72f00cd21L,0x6e984240eb5fe290L, + 0xd66c038d248088aeL } }, + /* 7 << 63 */ + { { 0x804d264af94d70cfL,0xbdb802ef7314bf7eL,0x8fb54de24333ed02L, + 0x740461e0285635d9L }, + { 0x4113b2c8365e9383L,0xea762c833fdef652L,0x4eec6e2e47b956c1L, + 0xa3d814be65620fa4L } }, + /* 8 << 63 */ + { { 0x9ad5462bb4d8bc50L,0x181c0b16a9195770L,0xebd4fe1c78412a68L, + 0xae0341bcc0dff48cL }, + { 0xb6bc45cf7003e866L,0xf11a6dea8a24a41bL,0x5407151ad04c24c2L, + 0x62c9d27dda5b7b68L } }, + /* 9 << 63 */ + { { 0x2e96423588cceff6L,0x8594c54f8b07ed69L,0x1578e73cc84d0d0dL, + 0x7b4e1055ff532868L }, + { 0xa348c0d5b5ec995aL,0xbf4b9d5514289a54L,0x9ba155a658fbd777L, + 0x186ed7a81a84491dL } }, + /* 10 << 63 */ + { { 0xd4992b30614c0900L,0xda98d121bd00c24bL,0x7f534dc87ec4bfa1L, + 0x4a5ff67437dc34bcL }, + { 0x68c196b81d7ea1d7L,0x38cf289380a6d208L,0xfd56cd09e3cbbd6eL, + 0xec72e27e4205a5b6L } }, + /* 11 << 63 */ + { { 0x15ea68f5a44f77f7L,0x7aa5f9fdb43c52bcL,0x86ff676f94f0e609L, + 0xa4cde9632e2d432bL }, + { 0x8cafa0c0eee470afL,0x84137d0e8a3f5ec8L,0xebb40411faa31231L, + 0xa239c13f6f7f7ccfL } }, + /* 12 << 63 */ + { { 0x32865719a8afd30bL,0x867983288a826dceL,0xdf04e891c4a8fbe0L, + 0xbb6b6e1bebf56ad3L }, + { 0x0a695b11471f1ff0L,0xd76c3389be15baf0L,0x018edb95be96c43eL, + 0xf2beaaf490794158L } }, + /* 13 << 63 */ + { { 0x152db09ec3076a27L,0x5e82908ee416545dL,0xa2c41272356d6f2eL, + 0xdc9c964231fd74e1L }, + { 0x66ceb88d519bf615L,0xe29ecd7605a2274eL,0x3a0473c4bf5e2fa0L, + 0x6b6eb67164284e67L } }, + /* 14 << 63 */ + { { 0xe8b97932b88756ddL,0xed4e8652f17e3e61L,0xc2dd14993ee1c4a4L, + 0xc0aaee17597f8c0eL }, + { 0x15c4edb96c168af3L,0x6563c7bfb39ae875L,0xadfadb6f20adb436L, + 0xad55e8c99a042ac0L } }, + /* 15 << 63 */ + { { 0x975a1ed8b76da1f5L,0x10dfa466a58acb94L,0x8dd7f7e3ac060282L, + 0x6813e66a572a051eL }, + { 0xb4ccae1e350cb901L,0xb653d65650cb7822L,0x42484710dfab3b87L, + 0xcd7ee5379b670fd0L } }, + /* 16 << 63 */ + { { 0x0a50b12e523b8bf6L,0x8009eb5b8f910c1bL,0xf535af824a167588L, + 0x0f835f9cfb2a2abdL }, + { 0xf59b29312afceb62L,0xc797df2a169d383fL,0xeb3f5fb066ac02b0L, + 0x029d4c6fdaa2d0caL } }, + /* 17 << 63 */ + { { 0xd4059bc1afab4bc5L,0x833f5c6f56783247L,0xb53466308d2d3605L, + 0x83387891d34d8433L }, + { 0xd973b30fadd9419aL,0xbcca1099afe3fce8L,0x081783150809aac6L, + 0x01b7f21a540f0f11L } }, + /* 18 << 63 */ + { { 0x65c29219909523c8L,0xa62f648fa3a1c741L,0x88598d4f60c9e55aL, + 0xbce9141b0e4f347aL }, + { 0x9af97d8435f9b988L,0x0210da62320475b6L,0x3c076e229191476cL, + 0x7520dbd944fc7834L } }, + /* 19 << 63 */ + { { 0x6a6b2cfec1ab1bbdL,0xef8a65bedc650938L,0x72855540805d7bc4L, + 0xda389396ed11fdfdL }, + { 0xa9d5bd3674660876L,0x11d67c54b45dff35L,0x6af7d148a4f5da94L, + 0xbb8d4c3fc0bbeb31L } }, + /* 20 << 63 */ + { { 0x87a7ebd1e0a1b12aL,0x1e4ef88d770ba95fL,0x8c33345cdc2ae9cbL, + 0xcecf127601cc8403L }, + { 0x687c012e1b39b80fL,0xfd90d0ad35c33ba4L,0xa3ef5a675c9661c2L, + 0x368fc88ee017429eL } }, + /* 21 << 63 */ + { { 0xd30c6761196a2fa2L,0x931b9817bd5b312eL,0xba01000c72f54a31L, + 0xa203d2c866eaa541L }, + { 0xf2abdee098939db3L,0xe37d6c2c3e606c02L,0xf2921574521ff643L, + 0x2781b3c4d7e2fca3L } }, + /* 22 << 63 */ + { { 0x664300b07850ec06L,0xac5a38b97d3a10cfL,0x9233188de34ab39dL, + 0xe77057e45072cbb9L }, + { 0xbcf0c042b59e78dfL,0x4cfc91e81d97de52L,0x4661a26c3ee0ca4aL, + 0x5620a4c1fb8507bcL } }, + /* 23 << 63 */ + { { 0x4b44d4aa049f842cL,0xceabc5d51540e82bL,0x306710fd15c6f156L, + 0xbe5ae52b63db1d72L }, + { 0x06f1e7e6334957f1L,0x57e388f031144a70L,0xfb69bb2fdf96447bL, + 0x0f78ebd373e38a12L } }, + /* 24 << 63 */ + { { 0xb82226052b7ce542L,0xe6d4ce997472bde1L,0x53e16ebe09d2f4daL, + 0x180ff42e53b92b2eL }, + { 0xc59bcc022c34a1c6L,0x3803d6f9422c46c2L,0x18aff74f5c14a8a2L, + 0x55aebf8010a08b28L } }, + /* 25 << 63 */ + { { 0x66097d587135593fL,0x32e6eff72be570cdL,0x584e6a102a8c860dL, + 0xcd185890a2eb4163L }, + { 0x7ceae99d6d97e134L,0xd42c6b70dd8447ceL,0x59ddbb4ab8c50273L, + 0x03c612df3cf34e1eL } }, + /* 26 << 63 */ + { { 0x84b9ca1504b6c5a0L,0x35216f3918f0e3a3L,0x3ec2d2bcbd986c00L, + 0x8bf546d9d19228feL }, + { 0xd1c655a44cd623c3L,0x366ce718502b8e5aL,0x2cfc84b4eea0bfe7L, + 0xe01d5ceecf443e8eL } }, + /* 27 << 63 */ + { { 0x8ec045d9036520f8L,0xdfb3c3d192d40e98L,0x0bac4ccecc559a04L, + 0x35eccae5240ea6b1L }, + { 0x180b32dbf8a5a0acL,0x547972a5eb699700L,0xa3765801ca26bca0L, + 0x57e09d0ea647f25aL } }, + /* 28 << 63 */ + { { 0xb956970e2fdd23ccL,0xb80288bc5682e971L,0xe6e6d91e9ae86ebcL, + 0x0564c83f8c9f1939L }, + { 0x551932a239560368L,0xe893752b049c28e2L,0x0b03cee5a6a158c3L, + 0xe12d656b04964263L } }, + /* 29 << 63 */ + { { 0x4b47554e63e3bc1dL,0xc719b6a245044ff7L,0x4f24d30ae48daa07L, + 0xa3f37556c8c1edc3L }, + { 0x9a47bf760700d360L,0xbb1a1824822ae4e2L,0x22e275a389f1fb4cL, + 0x72b1aa239968c5f5L } }, + /* 30 << 63 */ + { { 0xa75feacabe063f64L,0x9b392f43bce47a09L,0xd42415091ad07acaL, + 0x4b0c591b8d26cd0fL }, + { 0x2d42ddfd92f1169aL,0x63aeb1ac4cbf2392L,0x1de9e8770691a2afL, + 0xebe79af7d98021daL } }, + /* 31 << 63 */ + { { 0xcfdf2a4e40e50acfL,0xf0a98ad7af01d665L,0xefb640bf1831be1fL, + 0x6fe8bd2f80e9ada0L }, + { 0x94c103a16cafbc91L,0x170f87598308e08cL,0x5de2d2ab9780ff4fL, + 0x666466bc45b201f2L } }, + /* 32 << 63 */ + { { 0x58af2010f5b343bcL,0x0f2e400af2f142feL,0x3483bfdea85f4bdfL, + 0xf0b1d09303bfeaa9L }, + { 0x2ea01b95c7081603L,0xe943e4c93dba1097L,0x47be92adb438f3a6L, + 0x00bb7742e5bf6636L } }, + /* 33 << 63 */ + { { 0x136b7083824297b4L,0x9d0e55805584455fL,0xab48cedcf1c7d69eL, + 0x53a9e4812a256e76L }, + { 0x0402b0e065eb2413L,0xdadbbb848fc407a7L,0xa65cd5a48d7f5492L, + 0x21d4429374bae294L } }, + /* 34 << 63 */ + { { 0x66917ce63b5f1cc4L,0x37ae52eace872e62L,0xbb087b722905f244L, + 0x120770861e6af74fL }, + { 0x4b644e491058edeaL,0x827510e3b638ca1dL,0x8cf2b7046038591cL, + 0xffc8b47afe635063L } }, + /* 35 << 63 */ + { { 0x3ae220e61b4d5e63L,0xbd8647429d961b4bL,0x610c107e9bd16bedL, + 0x4270352a1127147bL }, + { 0x7d17ffe664cfc50eL,0x50dee01a1e36cb42L,0x068a762235dc5f9aL, + 0x9a08d536df53f62cL } }, + /* 36 << 63 */ + { { 0x4ed714576be5f7deL,0xd93006f8c2263c9eL,0xe073694ccacacb36L, + 0x2ff7a5b43ae118abL }, + { 0x3cce53f1cd871236L,0xf156a39dc2aa6d52L,0x9cc5f271b198d76dL, + 0xbc615b6f81383d39L } }, + /* 37 << 63 */ + { { 0xa54538e8de3eee6bL,0x58c77538ab910d91L,0x31e5bdbc58d278bdL, + 0x3cde4adfb963acaeL }, + { 0xb1881fd25302169cL,0x8ca60fa0a989ed8bL,0xa1999458ff96a0eeL, + 0xc1141f03ac6c283dL } }, + /* 38 << 63 */ + { { 0x7677408d6dfafed3L,0x33a0165339661588L,0x3c9c15ec0b726fa0L, + 0x090cfd936c9b56daL }, + { 0xe34f4baea3c40af5L,0x3469eadbd21129f1L,0xcc51674a1e207ce8L, + 0x1e293b24c83b1ef9L } }, + /* 39 << 63 */ + { { 0x17173d131e6c0bb4L,0x1900469590776d35L,0xe7980e346de6f922L, + 0x873554cbf4dd9a22L }, + { 0x0316c627cbf18a51L,0x4d93651b3032c081L,0x207f27713946834dL, + 0x2c08d7b430cdbf80L } }, + /* 40 << 63 */ + { { 0x137a4fb486df2a61L,0xa1ed9c07ecf7b4a2L,0xb2e460e27bd042ffL, + 0xb7f5e2fa5f62f5ecL }, + { 0x7aa6ec6bcc2423b7L,0x75ce0a7fba63eea7L,0x67a45fb1f250a6e1L, + 0x93bc919ce53cdc9fL } }, + /* 41 << 63 */ + { { 0x9271f56f871942dfL,0x2372ff6f7859ad66L,0x5f4c2b9633cb1a78L, + 0xe3e291015838aa83L }, + { 0xa7ed1611e4e8110cL,0x2a2d70d5330198ceL,0xbdf132e86720efe0L, + 0xe61a896266a471bfL } }, + /* 42 << 63 */ + { { 0x796d3a85825808bdL,0x51dc3cb73fd6e902L,0x643c768a916219d1L, + 0x36cd7685a2ad7d32L }, + { 0xe3db9d05b22922a4L,0x6494c87edba29660L,0xf0ac91dfbcd2ebc7L, + 0x4deb57a045107f8dL } }, + /* 43 << 63 */ + { { 0x42271f59c3d12a73L,0x5f71687ca5c2c51dL,0xcb1f50c605797bcbL, + 0x29ed0ed9d6d34eb0L }, + { 0xe5fe5b474683c2ebL,0x4956eeb597447c46L,0x5b163a4371207167L, + 0x93fa2fed0248c5efL } }, + /* 44 << 63 */ + { { 0x67930af231f63950L,0xa77797c114caa2c9L,0x526e80ee27ac7e62L, + 0xe1e6e62658b28aecL }, + { 0x636178b0b3c9fef0L,0xaf7752e06d5f90beL,0x94ecaf18eece51cfL, + 0x2864d0edca806e1fL } }, + /* 45 << 63 */ + { { 0x6de2e38397c69134L,0x5a42c316eb291293L,0xc77792196a60bae0L, + 0xa24de3466b7599d1L }, + { 0x49d374aab75d4941L,0x989005862d501ff0L,0x9f16d40eeb7974cfL, + 0x1033860bcdd8c115L } }, + /* 46 << 63 */ + { { 0xb6c69ac82094cec3L,0x9976fb88403b770cL,0x1dea026c4859590dL, + 0xb6acbb468562d1fdL }, + { 0x7cd6c46144569d85L,0xc3190a3697f0891dL,0xc6f5319548d5a17dL, + 0x7d919966d749abc8L } }, + /* 47 << 63 */ + { { 0x65104837dd1c8a20L,0x7e5410c82f683419L,0x958c3ca8be94022eL, + 0x605c31976145dac2L }, + { 0x3fc0750101683d54L,0x1d7127c5595b1234L,0x10b8f87c9481277fL, + 0x677db2a8e65a1adbL } }, + /* 48 << 63 */ + { { 0xec2fccaaddce3345L,0x2a6811b7012a4350L,0x96760ff1ac598bdcL, + 0x054d652ad1bf4128L }, + { 0x0a1151d492a21005L,0xad7f397133110fdfL,0x8c95928c1960100fL, + 0x6c91c8257bf03362L } }, + /* 49 << 63 */ + { { 0xc8c8b2a2ce309f06L,0xfdb27b59ca27204bL,0xd223eaa50848e32eL, + 0xb93e4b2ee7bfaf1eL }, + { 0xc5308ae644aa3dedL,0x317a666ac015d573L,0xc888ce231a979707L, + 0xf141c1e60d5c4958L } }, + /* 50 << 63 */ + { { 0xb53b7de561906373L,0x858dbadeeb999595L,0x8cbb47b2a59e5c36L, + 0x660318b3dcf4e842L }, + { 0xbd161ccd12ba4b7aL,0xf399daabf8c8282aL,0x1587633aeeb2130dL, + 0xa465311ada38dd7dL } }, + /* 51 << 63 */ + { { 0x5f75eec864d3779bL,0x3c5d0476ad64c171L,0x874103712a914428L, + 0x8096a89190e2fc29L }, + { 0xd3d2ae9d23b3ebc2L,0x90bdd6dba580cfd6L,0x52dbb7f3c5b01f6cL, + 0xe68eded4e102a2dcL } }, + /* 52 << 63 */ + { { 0x17785b7799eb6df0L,0x26c3cc517386b779L,0x345ed9886417a48eL, + 0xe990b4e407d6ef31L }, + { 0x0f456b7e2586abbaL,0x239ca6a559c96e9aL,0xe327459ce2eb4206L, + 0x3a4c3313a002b90aL } }, + /* 53 << 63 */ + { { 0x2a114806f6a3f6fbL,0xad5cad2f85c251ddL,0x92c1f613f5a784d3L, + 0xec7bfacf349766d5L }, + { 0x04b3cd333e23cb3bL,0x3979fe84c5a64b2dL,0x192e27207e589106L, + 0xa60c43d1a15b527fL } }, + /* 54 << 63 */ + { { 0x2dae9082be7cf3a6L,0xcc86ba92bc967274L,0xf28a2ce8aea0a8a9L, + 0x404ca6d96ee988b3L }, + { 0xfd7e9c5d005921b8L,0xf56297f144e79bf9L,0xa163b4600d75ddc2L, + 0x30b23616a1f2be87L } }, + /* 55 << 63 */ + { { 0x4b070d21bfe50e2bL,0x7ef8cfd0e1bfede1L,0xadba00112aac4ae0L, + 0x2a3e7d01b9ebd033L }, + { 0x995277ece38d9d1cL,0xb500249e9c5d2de3L,0x8912b820f13ca8c9L, + 0xc8798114877793afL } }, + /* 56 << 63 */ + { { 0x19e6125dec3f1decL,0x07b1f040911178daL,0xd93ededa904a6738L, + 0x55187a5a0bebedcdL }, + { 0xf7d04722eb329d41L,0xf449099ef170b391L,0xfd317a69ca99f828L, + 0x50c3db2b34a4976dL } }, + /* 57 << 63 */ + { { 0xe9ba77843757b392L,0x326caefdaa3ca05aL,0x78e5293bf1e593d4L, + 0x7842a9370d98fd13L }, + { 0xe694bf965f96b10dL,0x373a9df606a8cd05L,0x997d1e51e8f0c7fcL, + 0x1d01979063fd972eL } }, + /* 58 << 63 */ + { { 0x0064d8585499fb32L,0x7b67bad977a8aeb7L,0x1d3eb9772d08eec5L, + 0x5fc047a6cbabae1dL }, + { 0x0577d159e54a64bbL,0x8862201bc43497e4L,0xad6b4e282ce0608dL, + 0x8b687b7d0b167aacL } }, + /* 59 << 63 */ + { { 0x6ed4d3678b2ecfa9L,0x24dfe62da90c3c38L,0xa1862e103fe5c42bL, + 0x1ca73dcad5732a9fL }, + { 0x35f038b776bb87adL,0x674976abf242b81fL,0x4f2bde7eb0fd90cdL, + 0x6efc172ea7fdf092L } }, + /* 60 << 63 */ + { { 0x3806b69b92222f1fL,0x5a2459ca6cf7ae70L,0x6789f69ca85217eeL, + 0x5f232b5ee3dc85acL }, + { 0x660e3ec548e9e516L,0x124b4e473197eb31L,0x10a0cb13aafcca23L, + 0x7bd63ba48213224fL } }, + /* 61 << 63 */ + { { 0xaffad7cc290a7f4fL,0x6b409c9e0286b461L,0x58ab809fffa407afL, + 0xc3122eedc68ac073L }, + { 0x17bf9e504ef24d7eL,0x5d9297943e2a5811L,0x519bc86702902e01L, + 0x76bba5da39c8a851L } }, + /* 62 << 63 */ + { { 0xe9f9669cda94951eL,0x4b6af58d66b8d418L,0xfa32107417d426a4L, + 0xc78e66a99dde6027L }, + { 0x0516c0834a53b964L,0xfc659d38ff602330L,0x0ab55e5c58c5c897L, + 0x985099b2838bc5dfL } }, + /* 63 << 63 */ + { { 0x061d9efcc52fc238L,0x712b27286ac1da3fL,0xfb6581499283fe08L, + 0x4954ac94b8aaa2f7L }, + { 0x85c0ada47fb2e74fL,0xee8ba98eb89926b0L,0xe4f9d37d23d1af5bL, + 0x14ccdbf9ba9b015eL } }, + /* 64 << 63 */ + { { 0xb674481b7bfe7178L,0x4e1debae65405868L,0x061b2821c48c867dL, + 0x69c15b35513b30eaL }, + { 0x3b4a166636871088L,0xe5e29f5d1220b1ffL,0x4b82bb35233d9f4dL, + 0x4e07633318cdc675L } }, + /* 0 << 70 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 70 */ + { { 0x0d53f5c7a3e6fcedL,0xe8cbbdd5f45fbdebL,0xf85c01df13339a70L, + 0x0ff71880142ceb81L }, + { 0x4c4e8774bd70437aL,0x5fb32891ba0bda6aL,0x1cdbebd2f18bd26eL, + 0x2f9526f103a9d522L } }, + /* 2 << 70 */ + { { 0x40ce305192c4d684L,0x8b04d7257612efcdL,0xb9dcda366f9cae20L, + 0x0edc4d24f058856cL }, + { 0x64f2e6bf85427900L,0x3de81295dc09dfeaL,0xd41b4487379bf26cL, + 0x50b62c6d6df135a9L } }, + /* 3 << 70 */ + { { 0xd4f8e3b4c72dfe67L,0xc416b0f690e19fdfL,0x18b9098d4c13bd35L, + 0xac11118a15b8cb9eL }, + { 0xf598a318f0062841L,0xbfe0602f89f356f4L,0x7ae3637e30177a0cL, + 0x3409774761136537L } }, + /* 4 << 70 */ + { { 0x0db2fb5ed005832aL,0x5f5efd3b91042e4fL,0x8c4ffdc6ed70f8caL, + 0xe4645d0bb52da9ccL }, + { 0x9596f58bc9001d1fL,0x52c8f0bc4e117205L,0xfd4aa0d2e398a084L, + 0x815bfe3a104f49deL } }, + /* 5 << 70 */ + { { 0x97e5443f23885e5fL,0xf72f8f99e8433aabL,0xbd00b154e4d4e604L, + 0xd0b35e6ae5e173ffL }, + { 0x57b2a0489164722dL,0x3e3c665b88761ec8L,0x6bdd13973da83832L, + 0x3c8b1a1e73dafe3bL } }, + /* 6 << 70 */ + { { 0x4497ace654317cacL,0xbe600ab9521771b3L,0xb42e409eb0dfe8b8L, + 0x386a67d73942310fL }, + { 0x25548d8d4431cc28L,0xa7cff142985dc524L,0x4d60f5a193c4be32L, + 0x83ebd5c8d071c6e1L } }, + /* 7 << 70 */ + { { 0xba3a80a7b1fd2b0bL,0x9b3ad3965bec33e8L,0xb3868d6179743fb3L, + 0xcfd169fcfdb462faL }, + { 0xd3b499d79ce0a6afL,0x55dc1cf1e42d3ff8L,0x04fb9e6cc6c3e1b2L, + 0x47e6961d6f69a474L } }, + /* 8 << 70 */ + { { 0x54eb3acce548b37bL,0xb38e754284d40549L,0x8c3daa517b341b4fL, + 0x2f6928ec690bf7faL }, + { 0x0496b32386ce6c41L,0x01be1c5510adadcdL,0xc04e67e74bb5faf9L, + 0x3cbaf678e15c9985L } }, + /* 9 << 70 */ + { { 0x8cd1214550ca4247L,0xba1aa47ae7dd30aaL,0x2f81ddf1e58fee24L, + 0x03452936eec9b0e8L }, + { 0x8bdc3b81243aea96L,0x9a2919af15c3d0e5L,0x9ea640ec10948361L, + 0x5ac86d5b6e0bcccfL } }, + /* 10 << 70 */ + { { 0xf892d918c36cf440L,0xaed3e837c939719cL,0xb07b08d2c0218b64L, + 0x6f1bcbbace9790ddL }, + { 0x4a84d6ed60919b8eL,0xd89007918ac1f9ebL,0xf84941aa0dd5daefL, + 0xb22fe40a67fd62c5L } }, + /* 11 << 70 */ + { { 0x97e15ba2157f2db3L,0xbda2fc8f8e28ca9cL,0x5d050da437b9f454L, + 0x3d57eb572379d72eL }, + { 0xe9b5eba2fb5ee997L,0x01648ca2e11538caL,0x32bb76f6f6327974L, + 0x338f14b8ff3f4bb7L } }, + /* 12 << 70 */ + { { 0x524d226ad7ab9a2dL,0x9c00090d7dfae958L,0x0ba5f5398751d8c2L, + 0x8afcbcdd3ab8262dL }, + { 0x57392729e99d043bL,0xef51263baebc943aL,0x9feace9320862935L, + 0x639efc03b06c817bL } }, + /* 13 << 70 */ + { { 0x1fe054b366b4be7aL,0x3f25a9de84a37a1eL,0xf39ef1ad78d75cd9L, + 0xd7b58f495062c1b5L }, + { 0x6f74f9a9ff563436L,0xf718ff29e8af51e7L,0x5234d31315e97fecL, + 0xb6a8e2b1292f1c0aL } }, + /* 14 << 70 */ + { { 0xa7f53aa8327720c1L,0x956ca322ba092cc8L,0x8f03d64a28746c4dL, + 0x51fe178266d0d392L }, + { 0xd19b34db3c832c80L,0x60dccc5c6da2e3b4L,0x245dd62e0a104cccL, + 0xa7ab1de1620b21fdL } }, + /* 15 << 70 */ + { { 0xb293ae0b3893d123L,0xf7b75783b15ee71cL,0x5aa3c61442a9468bL, + 0xd686123cdb15d744L }, + { 0x8c616891a7ab4116L,0x6fcd72c8a4e6a459L,0xac21911077e5fad7L, + 0xfb6a20e7704fa46bL } }, + /* 16 << 70 */ + { { 0xe839be7d341d81dcL,0xcddb688932148379L,0xda6211a1f7026eadL, + 0xf3b2575ff4d1cc5eL }, + { 0x40cfc8f6a7a73ae6L,0x83879a5e61d5b483L,0xc5acb1ed41a50ebcL, + 0x59a60cc83c07d8faL } }, + /* 17 << 70 */ + { { 0x1b73bdceb1876262L,0x2b0d79f012af4ee9L,0x8bcf3b0bd46e1d07L, + 0x17d6af9de45d152fL }, + { 0x735204616d736451L,0x43cbbd9756b0bf5aL,0xb0833a5bd5999b9dL, + 0x702614f0eb72e398L } }, + /* 18 << 70 */ + { { 0x0aadf01a59c3e9f8L,0x40200e77ce6b3d16L,0xda22bdd3deddafadL, + 0x76dedaf4310d72e1L }, + { 0x49ef807c4bc2e88fL,0x6ba81291146dd5a5L,0xa1a4077a7d8d59e9L, + 0x87b6a2e7802db349L } }, + /* 19 << 70 */ + { { 0xd56799971b4e598eL,0xf499ef1f06fe4b1dL,0x3978d3aefcb267c5L, + 0xb582b557235786d0L }, + { 0x32b3b2ca1715cb07L,0x4c3de6a28480241dL,0x63b5ffedcb571ecdL, + 0xeaf53900ed2fe9a9L } }, + /* 20 << 70 */ + { { 0xdec98d4ac3b81990L,0x1cb837229e0cc8feL,0xfe0b0491d2b427b9L, + 0x0f2386ace983a66cL }, + { 0x930c4d1eb3291213L,0xa2f82b2e59a62ae4L,0x77233853f93e89e3L, + 0x7f8063ac11777c7fL } }, + /* 21 << 70 */ + { { 0xff0eb56759ad2877L,0x6f4546429865c754L,0xe6fe701a236e9a84L, + 0xc586ef1606e40fc3L }, + { 0x3f62b6e024bafad9L,0xc8b42bd264da906aL,0xc98e1eb4da3276a0L, + 0x30d0e5fc06cbf852L } }, + /* 22 << 70 */ + { { 0x1b6b2ae1e8b4dfd4L,0xd754d5c78301cbacL,0x66097629112a39acL, + 0xf86b599993ba4ab9L }, + { 0x26c9dea799f9d581L,0x0473b1a8c2fafeaaL,0x1469af553b2505a5L, + 0x227d16d7d6a43323L } }, + /* 23 << 70 */ + { { 0x3316f73cad3d97f9L,0x52bf3bb51f137455L,0x953eafeb09954e7cL, + 0xa721dfeddd732411L }, + { 0xb4929821141d4579L,0x3411321caa3bd435L,0xafb355aa17fa6015L, + 0xb4e7ef4a18e42f0eL } }, + /* 24 << 70 */ + { { 0x604ac97c59371000L,0xe1c48c707f759c18L,0x3f62ecc5a5db6b65L, + 0x0a78b17338a21495L }, + { 0x6be1819dbcc8ad94L,0x70dc04f6d89c3400L,0x462557b4a6b4840aL, + 0x544c6ade60bd21c0L } }, + /* 25 << 70 */ + { { 0x6a00f24e907a544bL,0xa7520dcb313da210L,0xfe939b7511e4994bL, + 0x918b6ba6bc275d70L }, + { 0xd3e5e0fc644be892L,0x707a9816fdaf6c42L,0x60145567f15c13feL, + 0x4818ebaae130a54aL } }, + /* 26 << 70 */ + { { 0x28aad3ad58d2f767L,0xdc5267fdd7e7c773L,0x4919cc88c3afcc98L, + 0xaa2e6ab02db8cd4bL }, + { 0xd46fec04d0c63eaaL,0xa1cb92c519ffa832L,0x678dd178e43a631fL, + 0xfb5ae1cd3dc788b3L } }, + /* 27 << 70 */ + { { 0x68b4fb906e77de04L,0x7992bcf0f06dbb97L,0x896e6a13c417c01dL, + 0x8d96332cb956be01L }, + { 0x902fc93a413aa2b9L,0x99a4d915fc98c8a5L,0x52c29407565f1137L, + 0x4072690f21e4f281L } }, + /* 28 << 70 */ + { { 0x36e607cf02ff6072L,0xa47d2ca98ad98cdcL,0xbf471d1ef5f56609L, + 0xbcf86623f264ada0L }, + { 0xb70c0687aa9e5cb6L,0xc98124f217401c6cL,0x8189635fd4a61435L, + 0xd28fb8afa9d98ea6L } }, + /* 29 << 70 */ + { { 0xb9a67c2a40c251f8L,0x88cd5d87a2da44beL,0x437deb96e09b5423L, + 0x150467db64287dc1L }, + { 0xe161debbcdabb839L,0xa79e9742f1839a3eL,0xbb8dd3c2652d202bL, + 0x7b3e67f7e9f97d96L } }, + /* 30 << 70 */ + { { 0x5aa5d78fb1cb6ac9L,0xffa13e8eca1d0d45L,0x369295dd2ba5bf95L, + 0xd68bd1f839aff05eL }, + { 0xaf0d86f926d783f2L,0x543a59b3fc3aafc1L,0x3fcf81d27b7da97cL, + 0xc990a056d25dee46L } }, + /* 31 << 70 */ + { { 0x3e6775b8519cce2cL,0xfc9af71fae13d863L,0x774a4a6f47c1605cL, + 0x46ba42452fd205e8L }, + { 0xa06feea4d3fd524dL,0x1e7246416de1acc2L,0xf53816f1334e2b42L, + 0x49e5918e922f0024L } }, + /* 32 << 70 */ + { { 0x439530b665c7322dL,0xcf12cc01b3c1b3fbL,0xc70b01860172f685L, + 0xb915ee221b58391dL }, + { 0x9afdf03ba317db24L,0x87dec65917b8ffc4L,0x7f46597be4d3d050L, + 0x80a1c1ed006500e7L } }, + /* 33 << 70 */ + { { 0x84902a9678bf030eL,0xfb5e9c9a50560148L,0x6dae0a9263362426L, + 0xdcaeecf4a9e30c40L }, + { 0xc0d887bb518d0c6bL,0x99181152cb985b9dL,0xad186898ef7bc381L, + 0x18168ffb9ee46201L } }, + /* 34 << 70 */ + { { 0x9a04cdaa2502753cL,0xbb279e2651407c41L,0xeacb03aaf23564e5L, + 0x1833658271e61016L }, + { 0x8684b8c4eb809877L,0xb336e18dea0e672eL,0xefb601f034ee5867L, + 0x2733edbe1341cfd1L } }, + /* 35 << 70 */ + { { 0xb15e809a26025c3cL,0xe6e981a69350df88L,0x923762378502fd8eL, + 0x4791f2160c12be9bL }, + { 0xb725678925f02425L,0xec8631947a974443L,0x7c0ce882fb41cc52L, + 0xc266ff7ef25c07f2L } }, + /* 36 << 70 */ + { { 0x3d4da8c3017025f3L,0xefcf628cfb9579b4L,0x5c4d00161f3716ecL, + 0x9c27ebc46801116eL }, + { 0x5eba0ea11da1767eL,0xfe15145247004c57L,0x3ace6df68c2373b7L, + 0x75c3dffe5dbc37acL } }, + /* 37 << 70 */ + { { 0x3dc32a73ddc925fcL,0xb679c8412f65ee0bL,0x715a3295451cbfebL, + 0xd9889768f76e9a29L }, + { 0xec20ce7fb28ad247L,0xe99146c400894d79L,0x71457d7c9f5e3ea7L, + 0x097b266238030031L } }, + /* 38 << 70 */ + { { 0xdb7f6ae6cf9f82a8L,0x319decb9438f473aL,0xa63ab386283856c3L, + 0x13e3172fb06a361bL }, + { 0x2959f8dc7d5a006cL,0x2dbc27c675fba752L,0xc1227ab287c22c9eL, + 0x06f61f7571a268b2L } }, + /* 39 << 70 */ + { { 0x1b6bb97104779ce2L,0xaca838120aadcb1dL,0x297ae0bcaeaab2d5L, + 0xa5c14ee75bfb9f13L }, + { 0xaa00c583f17a62c7L,0x39eb962c173759f6L,0x1eeba1d486c9a88fL, + 0x0ab6c37adf016c5eL } }, + /* 40 << 70 */ + { { 0xa2a147dba28a0749L,0x246c20d6ee519165L,0x5068d1b1d3810715L, + 0xb1e7018c748160b9L }, + { 0x03f5b1faf380ff62L,0xef7fb1ddf3cb2c1eL,0xeab539a8fc91a7daL, + 0x83ddb707f3f9b561L } }, + /* 41 << 70 */ + { { 0xc550e211fe7df7a4L,0xa7cd07f2063f6f40L,0xb0de36352976879cL, + 0xb5f83f85e55741daL }, + { 0x4ea9d25ef3d8ac3dL,0x6fe2066f62819f02L,0x4ab2b9c2cef4a564L, + 0x1e155d965ffa2de3L } }, + /* 42 << 70 */ + { { 0x0eb0a19bc3a72d00L,0x4037665b8513c31bL,0x2fb2b6bf04c64637L, + 0x45c34d6e08cdc639L }, + { 0x56f1e10ff01fd796L,0x4dfb8101fe3667b8L,0xe0eda2539021d0c0L, + 0x7a94e9ff8a06c6abL } }, + /* 43 << 70 */ + { { 0x2d3bb0d9bb9aa882L,0xea20e4e5ec05fd10L,0xed7eeb5f1a1ca64eL, + 0x2fa6b43cc6327cbdL }, + { 0xb577e3cf3aa91121L,0x8c6bd5ea3a34079bL,0xd7e5ba3960e02fc0L, + 0xf16dd2c390141bf8L } }, + /* 44 << 70 */ + { { 0xb57276d980101b98L,0x760883fdb82f0f66L,0x89d7de754bc3eff3L, + 0x03b606435dc2ab40L }, + { 0xcd6e53dfe05beeacL,0xf2f1e862bc3325cdL,0xdd0f7921774f03c3L, + 0x97ca72214552cc1bL } }, + /* 45 << 70 */ + { { 0x5a0d6afe1cd19f72L,0xa20915dcf183fbebL,0x9fda4b40832c403cL, + 0x32738eddbe425442L }, + { 0x469a1df6b5eccf1aL,0x4b5aff4228bbe1f0L,0x31359d7f570dfc93L, + 0xa18be235f0088628L } }, + /* 46 << 70 */ + { { 0xa5b30fbab00ed3a9L,0x34c6137473cdf8beL,0x2c5c5f46abc56797L, + 0x5cecf93db82a8ae2L }, + { 0x7d3dbe41a968fbf0L,0xd23d45831a5c7f3dL,0xf28f69a0c087a9c7L, + 0xc2d75471474471caL } }, + /* 47 << 70 */ + { { 0x36ec9f4a4eb732ecL,0x6c943bbdb1ca6bedL,0xd64535e1f2457892L, + 0x8b84a8eaf7e2ac06L }, + { 0xe0936cd32499dd5fL,0x12053d7e0ed04e57L,0x4bdd0076e4305d9dL, + 0x34a527b91f67f0a2L } }, + /* 48 << 70 */ + { { 0xe79a4af09cec46eaL,0xb15347a1658b9bc7L,0x6bd2796f35af2f75L, + 0xac9579904051c435L }, + { 0x2669dda3c33a655dL,0x5d503c2e88514aa3L,0xdfa113373753dd41L, + 0x3f0546730b754f78L } }, + /* 49 << 70 */ + { { 0xbf185677496125bdL,0xfb0023c83775006cL,0xfa0f072f3a037899L, + 0x4222b6eb0e4aea57L }, + { 0x3dde5e767866d25aL,0xb6eb04f84837aa6fL,0x5315591a2cf1cdb8L, + 0x6dfb4f412d4e683cL } }, + /* 50 << 70 */ + { { 0x7e923ea448ee1f3aL,0x9604d9f705a2afd5L,0xbe1d4a3340ea4948L, + 0x5b45f1f4b44cbd2fL }, + { 0x5faf83764acc757eL,0xa7cf9ab863d68ff7L,0x8ad62f69df0e404bL, + 0xd65f33c212bdafdfL } }, + /* 51 << 70 */ + { { 0xc365de15a377b14eL,0x6bf5463b8e39f60cL,0x62030d2d2ce68148L, + 0xd95867efe6f843a8L }, + { 0xd39a0244ef5ab017L,0x0bd2d8c14ab55d12L,0xc9503db341639169L, + 0x2d4e25b0f7660c8aL } }, + /* 52 << 70 */ + { { 0x760cb3b5e224c5d7L,0xfa3baf8c68616919L,0x9fbca1138d142552L, + 0x1ab18bf17669ebf5L }, + { 0x55e6f53e9bdf25ddL,0x04cc0bf3cb6cd154L,0x595bef4995e89080L, + 0xfe9459a8104a9ac1L } }, + /* 53 << 70 */ + { { 0xad2d89cacce9bb32L,0xddea65e1f7de8285L,0x62ed8c35b351bd4bL, + 0x4150ff360c0e19a7L }, + { 0x86e3c801345f4e47L,0x3bf21f71203a266cL,0x7ae110d4855b1f13L, + 0x5d6aaf6a07262517L } }, + /* 54 << 70 */ + { { 0x1e0f12e1813d28f1L,0x6000e11d7ad7a523L,0xc7d8deefc744a17bL, + 0x1e990b4814c05a00L }, + { 0x68fddaee93e976d5L,0x696241d146610d63L,0xb204e7c3893dda88L, + 0x8bccfa656a3a6946L } }, + /* 55 << 70 */ + { { 0xb59425b4c5cd1411L,0x701b4042ff3658b1L,0xe3e56bca4784cf93L, + 0x27de5f158fe68d60L }, + { 0x4ab9cfcef8d53f19L,0xddb10311a40a730dL,0x6fa73cd14eee0a8aL, + 0xfd5487485249719dL } }, + /* 56 << 70 */ + { { 0x49d66316a8123ef0L,0x73c32db4e7f95438L,0x2e2ed2090d9e7854L, + 0xf98a93299d9f0507L }, + { 0xc5d33cf60c6aa20aL,0x9a32ba1475279bb2L,0x7e3202cb774a7307L, + 0x64ed4bc4e8c42dbdL } }, + /* 57 << 70 */ + { { 0xc20f1a06d4caed0dL,0xb8021407171d22b3L,0xd426ca04d13268d7L, + 0x9237700725f4d126L }, + { 0x4204cbc371f21a85L,0x18461b7af82369baL,0xc0c07d313fc858f9L, + 0x5deb5a50e2bab569L } }, + /* 58 << 70 */ + { { 0xd5959d46d5eea89eL,0xfdff842408437f4bL,0xf21071e43cfe254fL, + 0x7241769695468321L }, + { 0x5d8288b9102cae3eL,0x2d143e3df1965dffL,0x00c9a376a078d847L, + 0x6fc0da3126028731L } }, + /* 59 << 70 */ + { { 0xa2baeadfe45083a2L,0x66bc72185e5b4bcdL,0x2c826442d04b8e7fL, + 0xc19f54516c4b586bL }, + { 0x60182c495b7eeed5L,0xd9954ecd7aa9dfa1L,0xa403a8ecc73884adL, + 0x7fb17de29bb39041L } }, + /* 60 << 70 */ + { { 0x694b64c5abb020e8L,0x3d18c18419c4eec7L,0x9c4673ef1c4793e5L, + 0xc7b8aeb5056092e6L }, + { 0x3aa1ca43f0f8c16bL,0x224ed5ecd679b2f6L,0x0d56eeaf55a205c9L, + 0xbfe115ba4b8e028bL } }, + /* 61 << 70 */ + { { 0x97e608493927f4feL,0xf91fbf94759aa7c5L,0x985af7696be90a51L, + 0xc1277b7878ccb823L }, + { 0x395b656ee7a75952L,0x00df7de0928da5f5L,0x09c231754ca4454fL, + 0x4ec971f47aa2d3c1L } }, + /* 62 << 70 */ + { { 0x45c3c507e75d9cccL,0x63b7be8a3dc90306L,0x37e09c665db44bdcL, + 0x50d60da16841c6a2L }, + { 0x6f9b65ee08df1b12L,0x387348797ff089dfL,0x9c331a663fe8013dL, + 0x017f5de95f42fcc8L } }, + /* 63 << 70 */ + { { 0x43077866e8e57567L,0xc9f781cef9fcdb18L,0x38131dda9b12e174L, + 0x25d84aa38a03752aL }, + { 0x45e09e094d0c0ce2L,0x1564008b92bebba5L,0xf7e8ad31a87284c7L, + 0xb7c4b46c97e7bbaaL } }, + /* 64 << 70 */ + { { 0x3e22a7b397acf4ecL,0x0426c4005ea8b640L,0x5e3295a64e969285L, + 0x22aabc59a6a45670L }, + { 0xb929714c5f5942bcL,0x9a6168bdfa3182edL,0x2216a665104152baL, + 0x46908d03b6926368L } }, + /* 0 << 77 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 77 */ + { { 0xa9f5d8745a1251fbL,0x967747a8c72725c7L,0x195c33e531ffe89eL, + 0x609d210fe964935eL }, + { 0xcafd6ca82fe12227L,0xaf9b5b960426469dL,0x2e9ee04c5693183cL, + 0x1084a333c8146fefL } }, + /* 2 << 77 */ + { { 0x96649933aed1d1f7L,0x566eaff350563090L,0x345057f0ad2e39cfL, + 0x148ff65b1f832124L }, + { 0x042e89d4cf94cf0dL,0x319bec84520c58b3L,0x2a2676265361aa0dL, + 0xc86fa3028fbc87adL } }, + /* 3 << 77 */ + { { 0xfc83d2ab5c8b06d5L,0xb1a785a2fe4eac46L,0xb99315bc846f7779L, + 0xcf31d816ef9ea505L }, + { 0x2391fe6a15d7dc85L,0x2f132b04b4016b33L,0x29547fe3181cb4c7L, + 0xdb66d8a6650155a1L } }, + /* 4 << 77 */ + { { 0x6b66d7e1adc1696fL,0x98ebe5930acd72d0L,0x65f24550cc1b7435L, + 0xce231393b4b9a5ecL }, + { 0x234a22d4db067df9L,0x98dda095caff9b00L,0x1bbc75a06100c9c1L, + 0x1560a9c8939cf695L } }, + /* 5 << 77 */ + { { 0xcf006d3e99e0925fL,0x2dd74a966322375aL,0xc58b446ab56af5baL, + 0x50292683e0b9b4f1L }, + { 0xe2c34cb41aeaffa3L,0x8b17203f9b9587c1L,0x6d559207ead1350cL, + 0x2b66a215fb7f9604L } }, + /* 6 << 77 */ + { { 0x0850325efe51bf74L,0x9c4f579e5e460094L,0x5c87b92a76da2f25L, + 0x889de4e06febef33L }, + { 0x6900ec06646083ceL,0xbe2a0335bfe12773L,0xadd1da35c5344110L, + 0x757568b7b802cd20L } }, + /* 7 << 77 */ + { { 0x7555977900f7e6c8L,0x38e8b94f0facd2f0L,0xfea1f3af03fde375L, + 0x5e11a1d875881dfcL }, + { 0xb3a6b02ec1e2f2efL,0x193d2bbbc605a6c5L,0x325ffeee339a0b2dL, + 0x27b6a7249e0c8846L } }, + /* 8 << 77 */ + { { 0xe4050f1cf1c367caL,0x9bc85a9bc90fbc7dL,0xa373c4a2e1a11032L, + 0xb64232b7ad0393a9L }, + { 0xf5577eb0167dad29L,0x1604f30194b78ab2L,0x0baa94afe829348bL, + 0x77fbd8dd41654342L } }, + /* 9 << 77 */ + { { 0xdab50ea5b964e39aL,0xd4c29e3cd0d3c76eL,0x80dae67c56d11964L, + 0x7307a8bfe5ffcc2fL }, + { 0x65bbc1aa91708c3bL,0xa151e62c28bf0eebL,0x6cb533816fa34db7L, + 0x5139e05ca29403a8L } }, + /* 10 << 77 */ + { { 0x6ff651b494a7cd2eL,0x5671ffd10699336cL,0x6f5fd2cc979a896aL, + 0x11e893a8d8148cefL }, + { 0x988906a165cf7b10L,0x81b67178c50d8485L,0x7c0deb358a35b3deL, + 0x423ac855c1d29799L } }, + /* 11 << 77 */ + { { 0xaf580d87dac50b74L,0x28b2b89f5869734cL,0x99a3b936874e28fbL, + 0xbb2c919025f3f73aL }, + { 0x199f691884a9d5b7L,0x7ebe23257e770374L,0xf442e1070738efe2L, + 0xcf9f3f56cf9082d2L } }, + /* 12 << 77 */ + { { 0x719f69e109618708L,0xcc9e8364c183f9b1L,0xec203a95366a21afL, + 0x6aec5d6d068b141fL }, + { 0xee2df78a994f04e9L,0xb39ccae8271245b0L,0xb875a4a997e43f4fL, + 0x507dfe11db2cea98L } }, + /* 13 << 77 */ + { { 0x4fbf81cb489b03e9L,0xdb86ec5b6ec414faL,0xfad444f9f51b3ae5L, + 0xca7d33d61914e3feL }, + { 0xa9c32f5c0ae6c4d0L,0xa9ca1d1e73969568L,0x98043c311aa7467eL, + 0xe832e75ce21b5ac6L } }, + /* 14 << 77 */ + { { 0x314b7aea5232123dL,0x08307c8c65ae86dbL,0x06e7165caa4668edL, + 0xb170458bb4d3ec39L }, + { 0x4d2e3ec6c19bb986L,0xc5f34846ae0304edL,0x917695a06c9f9722L, + 0x6c7f73174cab1c0aL } }, + /* 15 << 77 */ + { { 0x6295940e9d6d2e8bL,0xd318b8c1549f7c97L,0x2245320497713885L, + 0x468d834ba8a440feL }, + { 0xd81fe5b2bfba796eL,0x152364db6d71f116L,0xbb8c7c59b5b66e53L, + 0x0b12c61b2641a192L } }, + /* 16 << 77 */ + { { 0x31f14802fcf0a7fdL,0x42fd07895488b01eL,0x71d78d6d9952b498L, + 0x8eb572d907ac5201L }, + { 0xe0a2a44c4d194a88L,0xd2b63fd9ba017e66L,0x78efc6c8f888aefcL, + 0xb76f6bda4a881a11L } }, + /* 17 << 77 */ + { { 0x187f314bb46c2397L,0x004cf5665ded2819L,0xa9ea570438764d34L, + 0xbba4521778084709L }, + { 0x064745711171121eL,0xad7b7eb1e7c9b671L,0xdacfbc40730f7507L, + 0x178cd8c6c7ad7bd1L } }, + /* 18 << 77 */ + { { 0xbf0be101b2a67238L,0x3556d367af9c14f2L,0x104b7831a5662075L, + 0x58ca59bb79d9e60aL }, + { 0x4bc45392a569a73bL,0x517a52e85698f6c9L,0x85643da5aeadd755L, + 0x1aed0cd52a581b84L } }, + /* 19 << 77 */ + { { 0xb9b4ff8480af1372L,0x244c3113f1ba5d1fL,0x2a5dacbef5f98d31L, + 0x2c3323e84375bc2aL }, + { 0x17a3ab4a5594b1ddL,0xa1928bfbceb4797eL,0xe83af245e4886a19L, + 0x8979d54672b5a74aL } }, + /* 20 << 77 */ + { { 0xa0f726bc19f9e967L,0xd9d03152e8fbbf4eL,0xcfd6f51db7707d40L, + 0x633084d963f6e6e0L }, + { 0xedcd9cdc55667eafL,0x73b7f92b2e44d56fL,0xfb2e39b64e962b14L, + 0x7d408f6ef671fcbfL } }, + /* 21 << 77 */ + { { 0xcc634ddc164a89bbL,0x74a42bb23ef3bd05L,0x1280dbb2428decbbL, + 0x6103f6bb402c8596L }, + { 0xfa2bf581355a5752L,0x562f96a800946674L,0x4e4ca16d6da0223bL, + 0xfe47819f28d3aa25L } }, + /* 22 << 77 */ + { { 0x9eea3075f8dfcf8aL,0xa284f0aa95669825L,0xb3fca250867d3fd8L, + 0x20757b5f269d691eL }, + { 0xf2c2402093b8a5deL,0xd3f93359ebc06da6L,0x1178293eb2739c33L, + 0xd2a3e770bcd686e5L } }, + /* 23 << 77 */ + { { 0xa76f49f4cd941534L,0x0d37406be3c71c0eL,0x172d93973b97f7e3L, + 0xec17e239bd7fd0deL }, + { 0xe32905516f496ba2L,0x6a69317236ad50e7L,0xc4e539a283e7eff5L, + 0x752737e718e1b4cfL } }, + /* 24 << 77 */ + { { 0xa2f7932c68af43eeL,0x5502468e703d00bdL,0xe5dc978f2fb061f5L, + 0xc9a1904a28c815adL }, + { 0xd3af538d470c56a4L,0x159abc5f193d8cedL,0x2a37245f20108ef3L, + 0xfa17081e223f7178L } }, + /* 25 << 77 */ + { { 0x27b0fb2b10c8c0f5L,0x2102c3ea40650547L,0x594564df8ac3bfa7L, + 0x98102033509dad96L }, + { 0x6989643ff1d18a13L,0x35eebd91d7fc5af0L,0x078d096afaeaafd8L, + 0xb7a89341def3de98L } }, + /* 26 << 77 */ + { { 0x2a206e8decf2a73aL,0x066a63978e551994L,0x3a6a088ab98d53a2L, + 0x0ce7c67c2d1124aaL }, + { 0x48cec671759a113cL,0xe3b373d34f6f67faL,0x5455d479fd36727bL, + 0xe5a428eea13c0d81L } }, + /* 27 << 77 */ + { { 0xb853dbc81c86682bL,0xb78d2727b8d02b2aL,0xaaf69bed8ebc329aL, + 0xdb6b40b3293b2148L }, + { 0xe42ea77db8c4961fL,0xb1a12f7c20e5e0abL,0xa0ec527479e8b05eL, + 0x68027391fab60a80L } }, + /* 28 << 77 */ + { { 0x6bfeea5f16b1bd5eL,0xf957e4204de30ad3L,0xcbaf664e6a353b9eL, + 0x5c87331226d14febL }, + { 0x4e87f98cb65f57cbL,0xdb60a6215e0cdd41L,0x67c16865a6881440L, + 0x1093ef1a46ab52aaL } }, + /* 29 << 77 */ + { { 0xc095afb53f4ece64L,0x6a6bb02e7604551aL,0x55d44b4e0b26b8cdL, + 0xe5f9a999f971268aL }, + { 0xc08ec42511a7de84L,0x83568095fda469ddL,0x737bfba16c6c90a2L, + 0x1cb9c4a0be229831L } }, + /* 30 << 77 */ + { { 0x93bccbbabb2eec64L,0xa0c23b64da03adbeL,0x5f7aa00ae0e86ac4L, + 0x470b941efc1401e6L }, + { 0x5ad8d6799df43574L,0x4ccfb8a90f65d810L,0x1bce80e3aa7fbd81L, + 0x273291ad9508d20aL } }, + /* 31 << 77 */ + { { 0xf5c4b46b42a92806L,0x810684eca86ab44aL,0x4591640bca0bc9f8L, + 0xb5efcdfc5c4b6054L }, + { 0x16fc89076e9edd12L,0xe29d0b50d4d792f9L,0xa45fd01c9b03116dL, + 0x85035235c81765a4L } }, + /* 32 << 77 */ + { { 0x1fe2a9b2b4b4b67cL,0xc1d10df0e8020604L,0x9d64abfcbc8058d8L, + 0x8943b9b2712a0fbbL }, + { 0x90eed9143b3def04L,0x85ab3aa24ce775ffL,0x605fd4ca7bbc9040L, + 0x8b34a564e2c75dfbL } }, + /* 33 << 77 */ + { { 0x41ffc94a10358560L,0x2d8a50729e5c28aaL,0xe915a0fc4cc7eb15L, + 0xe9efab058f6d0f5dL }, + { 0xdbab47a9d19e9b91L,0x8cfed7450276154cL,0x154357ae2cfede0dL, + 0x520630df19f5a4efL } }, + /* 34 << 77 */ + { { 0x25759f7ce382360fL,0xb6db05c988bf5857L,0x2917d61d6c58d46cL, + 0x14f8e491fd20cb7aL }, + { 0xb68a727a11c20340L,0x0386f86faf7ccbb6L,0x5c8bc6ccfee09a20L, + 0x7d76ff4abb7eea35L } }, + /* 35 << 77 */ + { { 0xa7bdebe7db15be7aL,0x67a08054d89f0302L,0x56bf0ea9c1193364L, + 0xc824446762837ebeL }, + { 0x32bd8e8b20d841b8L,0x127a0548dbb8a54fL,0x83dd4ca663b20236L, + 0x87714718203491faL } }, + /* 36 << 77 */ + { { 0x4dabcaaaaa8a5288L,0x91cc0c8aaf23a1c9L,0x34c72c6a3f220e0cL, + 0xbcc20bdf1232144aL }, + { 0x6e2f42daa20ede1bL,0xc441f00c74a00515L,0xbf46a5b6734b8c4bL, + 0x574095037b56c9a4L } }, + /* 37 << 77 */ + { { 0x9f735261e4585d45L,0x9231faed6734e642L,0x1158a176be70ee6cL, + 0x35f1068d7c3501bfL }, + { 0x6beef900a2d26115L,0x649406f2ef0afee3L,0x3f43a60abc2420a1L, + 0x509002a7d5aee4acL } }, + /* 38 << 77 */ + { { 0xb46836a53ff3571bL,0x24f98b78837927c1L,0x6254256a4533c716L, + 0xf27abb0bd07ee196L }, + { 0xd7cf64fc5c6d5bfdL,0x6915c751f0cd7a77L,0xd9f590128798f534L, + 0x772b0da8f81d8b5fL } }, + /* 39 << 77 */ + { { 0x1244260c2e03fa69L,0x36cf0e3a3be1a374L,0x6e7c1633ef06b960L, + 0xa71a4c55671f90f6L }, + { 0x7a94125133c673dbL,0xc0bea51073e8c131L,0x61a8a699d4f6c734L, + 0x25e78c88341ed001L } }, + /* 40 << 77 */ + { { 0x5c18acf88e2f7d90L,0xfdbf33d777be32cdL,0x0a085cd7d2eb5ee9L, + 0x2d702cfbb3201115L }, + { 0xb6e0ebdb85c88ce8L,0x23a3ce3c1e01d617L,0x3041618e567333acL, + 0x9dd0fd8f157edb6bL } }, + /* 41 << 77 */ + { { 0x27f74702b57872b8L,0x2ef26b4f657d5fe1L,0x95426f0a57cf3d40L, + 0x847e2ad165a6067aL }, + { 0xd474d9a009996a74L,0x16a56acd2a26115cL,0x02a615c3d16f4d43L, + 0xcc3fc965aadb85b7L } }, + /* 42 << 77 */ + { { 0x386bda73ce07d1b0L,0xd82910c258ad4178L,0x124f82cfcd2617f4L, + 0xcc2f5e8def691770L }, + { 0x82702550b8c30cccL,0x7b856aea1a8e575aL,0xbb822fefb1ab9459L, + 0x085928bcec24e38eL } }, + /* 43 << 77 */ + { { 0x5d0402ecba8f4b4dL,0xc07cd4ba00b4d58bL,0x5d8dffd529227e7aL, + 0x61d44d0c31bf386fL }, + { 0xe486dc2b135e6f4dL,0x680962ebe79410efL,0xa61bd343f10088b5L, + 0x6aa76076e2e28686L } }, + /* 44 << 77 */ + { { 0x80463d118fb98871L,0xcb26f5c3bbc76affL,0xd4ab8eddfbe03614L, + 0xc8eb579bc0cf2deeL }, + { 0xcc004c15c93bae41L,0x46fbae5d3aeca3b2L,0x671235cf0f1e9ab1L, + 0xadfba9349ec285c1L } }, + /* 45 << 77 */ + { { 0x88ded013f216c980L,0xc8ac4fb8f79e0bc1L,0xa29b89c6fb97a237L, + 0xb697b7809922d8e7L }, + { 0x3142c639ddb945b5L,0x447b06c7e094c3a9L,0xcdcb364272266c90L, + 0x633aad08a9385046L } }, + /* 46 << 77 */ + { { 0xa36c936bb57c6477L,0x871f8b64e94dbcc6L,0x28d0fb62a591a67bL, + 0x9d40e081c1d926f5L }, + { 0x3111eaf6f2d84b5aL,0x228993f9a565b644L,0x0ccbf5922c83188bL, + 0xf87b30ab3df3e197L } }, + /* 47 << 77 */ + { { 0xb8658b317642bca8L,0x1a032d7f52800f17L,0x051dcae579bf9445L, + 0xeba6b8ee54a2e253L }, + { 0x5c8b9cadd4485692L,0x84bda40e8986e9beL,0xd16d16a42f0db448L, + 0x8ec80050a14d4188L } }, + /* 48 << 77 */ + { { 0xb2b2610798fa7aaaL,0x41209ee4f073aa4eL,0xf1570359f2d6b19bL, + 0xcbe6868cfc577cafL }, + { 0x186c4bdc32c04dd3L,0xa6c35faecfeee397L,0xb4a1b312f086c0cfL, + 0xe0a5ccc6d9461fe2L } }, + /* 49 << 77 */ + { { 0xc32278aa1536189fL,0x1126c55fba6df571L,0x0f71a602b194560eL, + 0x8b2d7405324bd6e1L }, + { 0x8481939e3738be71L,0xb5090b1a1a4d97a9L,0x116c65a3f05ba915L, + 0x21863ad3aae448aaL } }, + /* 50 << 77 */ + { { 0xd24e2679a7aae5d3L,0x7076013d0de5c1c4L,0x2d50f8babb05b629L, + 0x73c1abe26e66efbbL }, + { 0xefd4b422f2488af7L,0xe4105d02663ba575L,0x7eb60a8b53a69457L, + 0x62210008c945973bL } }, + /* 51 << 77 */ + { { 0xfb25547877a50ec6L,0xbf0392f70a37a72cL,0xa0a7a19c4be18e7aL, + 0x90d8ea1625b1e0afL }, + { 0x7582a293ef953f57L,0x90a64d05bdc5465aL,0xca79c497e2510717L, + 0x560dbb7c18cb641fL } }, + /* 52 << 77 */ + { { 0x1d8e32864b66abfbL,0xd26f52e559030900L,0x1ee3f6435584941aL, + 0x6d3b3730569f5958L }, + { 0x9ff2a62f4789dba5L,0x91fcb81572b5c9b7L,0xf446cb7d6c8f9a0eL, + 0x48f625c139b7ecb5L } }, + /* 53 << 77 */ + { { 0xbabae8011c6219b8L,0xe7a562d928ac2f23L,0xe1b4873226e20588L, + 0x06ee1cad775af051L }, + { 0xda29ae43faff79f7L,0xc141a412652ee9e0L,0x1e127f6f195f4bd0L, + 0x29c6ab4f072f34f8L } }, + /* 54 << 77 */ + { { 0x7b7c147730448112L,0x82b51af1e4a38656L,0x2bf2028a2f315010L, + 0xc9a4a01f6ea88cd4L }, + { 0xf63e95d8257e5818L,0xdd8efa10b4519b16L,0xed8973e00da910bfL, + 0xed49d0775c0fe4a9L } }, + /* 55 << 77 */ + { { 0xac3aac5eb7caee1eL,0x1033898da7f4da57L,0x42145c0e5c6669b9L, + 0x42daa688c1aa2aa0L }, + { 0x629cc15c1a1d885aL,0x25572ec0f4b76817L,0x8312e4359c8f8f28L, + 0x8107f8cd81965490L } }, + /* 56 << 77 */ + { { 0x516ff3a36fa6110cL,0x74fb1eb1fb93561fL,0x6c0c90478457522bL, + 0xcfd321046bb8bdc6L }, + { 0x2d6884a2cc80ad57L,0x7c27fc3586a9b637L,0x3461baedadf4e8cdL, + 0x1d56251a617242f0L } }, + /* 57 << 77 */ + { { 0x0b80d209c955bef4L,0xdf02cad206adb047L,0xf0d7cb915ec74feeL, + 0xd25033751111ba44L }, + { 0x9671755edf53cb36L,0x54dcb6123368551bL,0x66d69aacc8a025a4L, + 0x6be946c6e77ef445L } }, + /* 58 << 77 */ + { { 0x719946d1a995e094L,0x65e848f6e51e04d8L,0xe62f33006a1e3113L, + 0x1541c7c1501de503L }, + { 0x4daac9faf4acfadeL,0x0e58589744cd0b71L,0x544fd8690a51cd77L, + 0x60fc20ed0031016dL } }, + /* 59 << 77 */ + { { 0x58b404eca4276867L,0x46f6c3cc34f34993L,0x477ca007c636e5bdL, + 0x8018f5e57c458b47L }, + { 0xa1202270e47b668fL,0xcef48ccdee14f203L,0x23f98bae62ff9b4dL, + 0x55acc035c589edddL } }, + /* 60 << 77 */ + { { 0x3fe712af64db4444L,0x19e9d634becdd480L,0xe08bc047a930978aL, + 0x2dbf24eca1280733L }, + { 0x3c0ae38c2cd706b2L,0x5b012a5b359017b9L,0x3943c38c72e0f5aeL, + 0x786167ea57176fa3L } }, + /* 61 << 77 */ + { { 0xe5f9897d594881dcL,0x6b5efad8cfb820c1L,0xb2179093d55018deL, + 0x39ad7d320bac56ceL }, + { 0xb55122e02cfc0e81L,0x117c4661f6d89daaL,0x362d01e1cb64fa09L, + 0x6a309b4e3e9c4dddL } }, + /* 62 << 77 */ + { { 0xfa979fb7abea49b1L,0xb4b1d27d10e2c6c5L,0xbd61c2c423afde7aL, + 0xeb6614f89786d358L }, + { 0x4a5d816b7f6f7459L,0xe431a44f09360e7bL,0x8c27a032c309914cL, + 0xcea5d68acaede3d8L } }, + /* 63 << 77 */ + { { 0x3668f6653a0a3f95L,0x893694167ceba27bL,0x89981fade4728fe9L, + 0x7102c8a08a093562L }, + { 0xbb80310e235d21c8L,0x505e55d1befb7f7bL,0xa0a9081112958a67L, + 0xd67e106a4d851fefL } }, + /* 64 << 77 */ + { { 0xb84011a9431dd80eL,0xeb7c7cca73306cd9L,0x20fadd29d1b3b730L, + 0x83858b5bfe37b3d3L }, + { 0xbf4cd193b6251d5cL,0x1cca1fd31352d952L,0xc66157a490fbc051L, + 0x7990a63889b98636L } }, + /* 0 << 84 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 84 */ + { { 0xe5aa692a87dec0e1L,0x010ded8df7b39d00L,0x7b1b80c854cfa0b5L, + 0x66beb876a0f8ea28L }, + { 0x50d7f5313476cd0eL,0xa63d0e65b08d3949L,0x1a09eea953479fc6L, + 0x82ae9891f499e742L } }, + /* 2 << 84 */ + { { 0xab58b9105ca7d866L,0x582967e23adb3b34L,0x89ae4447cceac0bcL, + 0x919c667c7bf56af5L }, + { 0x9aec17b160f5dcd7L,0xec697b9fddcaadbcL,0x0b98f341463467f5L, + 0xb187f1f7a967132fL } }, + /* 3 << 84 */ + { { 0x90fe7a1d214aeb18L,0x1506af3c741432f7L,0xbb5565f9e591a0c4L, + 0x10d41a77b44f1bc3L }, + { 0xa09d65e4a84bde96L,0x42f060d8f20a6a1cL,0x652a3bfdf27f9ce7L, + 0xb6bdb65c3b3d739fL } }, + /* 4 << 84 */ + { { 0xeb5ddcb6ec7fae9fL,0x995f2714efb66e5aL,0xdee95d8e69445d52L, + 0x1b6c2d4609e27620L }, + { 0x32621c318129d716L,0xb03909f10958c1aaL,0x8c468ef91af4af63L, + 0x162c429ffba5cdf6L } }, + /* 5 << 84 */ + { { 0x2f682343753b9371L,0x29cab45a5f1f9cd7L,0x571623abb245db96L, + 0xc507db093fd79999L }, + { 0x4e2ef652af036c32L,0x86f0cc7805018e5cL,0xc10a73d4ab8be350L, + 0x6519b3977e826327L } }, + /* 6 << 84 */ + { { 0xe8cb5eef9c053df7L,0x8de25b37b300ea6fL,0xdb03fa92c849cffbL, + 0x242e43a7e84169bbL }, + { 0xe4fa51f4dd6f958eL,0x6925a77ff4445a8dL,0xe6e72a50e90d8949L, + 0xc66648e32b1f6390L } }, + /* 7 << 84 */ + { { 0xb2ab1957173e460cL,0x1bbbce7530704590L,0xc0a90dbddb1c7162L, + 0x505e399e15cdd65dL }, + { 0x68434dcb57797ab7L,0x60ad35ba6a2ca8e8L,0x4bfdb1e0de3336c1L, + 0xbbef99ebd8b39015L } }, + /* 8 << 84 */ + { { 0x6c3b96f31711ebecL,0x2da40f1fce98fdc4L,0xb99774d357b4411fL, + 0x87c8bdf415b65bb6L }, + { 0xda3a89e3c2eef12dL,0xde95bb9b3c7471f3L,0x600f225bd812c594L, + 0x54907c5d2b75a56bL } }, + /* 9 << 84 */ + { { 0xa93cc5f08db60e35L,0x743e3cd6fa833319L,0x7dad5c41f81683c9L, + 0x70c1e7d99c34107eL }, + { 0x0edc4a39a6be0907L,0x36d4703586d0b7d3L,0x8c76da03272bfa60L, + 0x0b4a07ea0f08a414L } }, + /* 10 << 84 */ + { { 0x699e4d2945c1dd53L,0xcadc5898231debb5L,0xdf49fcc7a77f00e0L, + 0x93057bbfa73e5a0eL }, + { 0x2f8b7ecd027a4cd1L,0x114734b3c614011aL,0xe7a01db767677c68L, + 0x89d9be5e7e273f4fL } }, + /* 11 << 84 */ + { { 0xd225cb2e089808efL,0xf1f7a27dd59e4107L,0x53afc7618211b9c9L, + 0x0361bc67e6819159L }, + { 0x2a865d0b7f071426L,0x6a3c1810e7072567L,0x3e3bca1e0d6bcabdL, + 0xa1b02bc1408591bcL } }, + /* 12 << 84 */ + { { 0xe0deee5931fba239L,0xf47424d398bd91d1L,0x0f8886f4071a3c1dL, + 0x3f7d41e8a819233bL }, + { 0x708623c2cf6eb998L,0x86bb49af609a287fL,0x942bb24963c90762L, + 0x0ef6eea555a9654bL } }, + /* 13 << 84 */ + { { 0x5f6d2d7236f5defeL,0xfa9922dc56f99176L,0x6c8c5ecef78ce0c7L, + 0x7b44589dbe09b55eL }, + { 0xe11b3bca9ea83770L,0xd7fa2c7f2ab71547L,0x2a3dd6fa2a1ddcc0L, + 0x09acb4305a7b7707L } }, + /* 14 << 84 */ + { { 0x4add4a2e649d4e57L,0xcd53a2b01917526eL,0xc526233020b44ac4L, + 0x4028746abaa2c31dL }, + { 0x5131839064291d4cL,0xbf48f151ee5ad909L,0xcce57f597b185681L, + 0x7c3ac1b04854d442L } }, + /* 15 << 84 */ + { { 0x65587dc3c093c171L,0xae7acb2424f42b65L,0x5a338adb955996cbL, + 0xc8e656756051f91bL }, + { 0x66711fba28b8d0b1L,0x15d74137b6c10a90L,0x70cdd7eb3a232a80L, + 0xc9e2f07f6191ed24L } }, + /* 16 << 84 */ + { { 0xa80d1db6f79588c0L,0xfa52fc69b55768ccL,0x0b4df1ae7f54438aL, + 0x0cadd1a7f9b46a4fL }, + { 0xb40ea6b31803dd6fL,0x488e4fa555eaae35L,0x9f047d55382e4e16L, + 0xc9b5b7e02f6e0c98L } }, + /* 17 << 84 */ + { { 0x6b1bd2d395762649L,0xa9604ee7c7aea3f6L,0x3646ff276dc6f896L, + 0x9bf0e7f52860bad1L }, + { 0x2d92c8217cb44b92L,0xa2f5ce63aea9c182L,0xd0a2afb19154a5fdL, + 0x482e474c95801da6L } }, + /* 18 << 84 */ + { { 0xc19972d0b611c24bL,0x1d468e6560a8f351L,0xeb7580697bcf6421L, + 0xec9dd0ee88fbc491L }, + { 0x5b59d2bf956c2e32L,0x73dc6864dcddf94eL,0xfd5e2321bcee7665L, + 0xa7b4f8ef5e9a06c4L } }, + /* 19 << 84 */ + { { 0xfba918dd7280f855L,0xbbaac2608baec688L,0xa3b3f00f33400f42L, + 0x3d2dba2966f2e6e4L }, + { 0xb6f71a9498509375L,0x8f33031fcea423ccL,0x009b8dd04807e6fbL, + 0x5163cfe55cdb954cL } }, + /* 20 << 84 */ + { { 0x03cc8f17cf41c6e8L,0xf1f03c2a037b925cL,0xc39c19cc66d2427cL, + 0x823d24ba7b6c18e4L }, + { 0x32ef9013901f0b4fL,0x684360f1f8941c2eL,0x0ebaff522c28092eL, + 0x7891e4e3256c932fL } }, + /* 21 << 84 */ + { { 0x51264319ac445e3dL,0x553432e78ea74381L,0xe6eeaa6967e9c50aL, + 0x27ced28462e628c7L }, + { 0x3f96d3757a4afa57L,0xde0a14c3e484c150L,0x364a24eb38bd9923L, + 0x1df18da0e5177422L } }, + /* 22 << 84 */ + { { 0x174e8f82d8d38a9bL,0x2e97c600e7de1391L,0xc5709850a1c175ddL, + 0x969041a032ae5035L }, + { 0xcbfd533b76a2086bL,0xd6bba71bd7c2e8feL,0xb2d58ee6099dfb67L, + 0x3a8b342d064a85d9L } }, + /* 23 << 84 */ + { { 0x3bc07649522f9be3L,0x690c075bdf1f49a8L,0x80e1aee83854ec42L, + 0x2a7dbf4417689dc7L }, + { 0xc004fc0e3faf4078L,0xb2f02e9edf11862cL,0xf10a5e0fa0a1b7b3L, + 0x30aca6238936ec80L } }, + /* 24 << 84 */ + { { 0xf83cbf0502f40d9aL,0x4681c4682c318a4dL,0x985756180e9c2674L, + 0xbe79d0461847092eL }, + { 0xaf1e480a78bd01e0L,0x6dd359e472a51db9L,0x62ce3821e3afbab6L, + 0xc5cee5b617733199L } }, + /* 25 << 84 */ + { { 0xe08b30d46ffd9fbbL,0x6e5bc69936c610b7L,0xf343cff29ce262cfL, + 0xca2e4e3568b914c1L }, + { 0x011d64c016de36c5L,0xe0b10fdd42e2b829L,0x789429816685aaf8L, + 0xe7511708230ede97L } }, + /* 26 << 84 */ + { { 0x671ed8fc3b922bf8L,0xe4d8c0a04c29b133L,0x87eb12393b6e99c4L, + 0xaff3974c8793bebaL }, + { 0x037494052c18df9bL,0xc5c3a29391007139L,0x6a77234fe37a0b95L, + 0x02c29a21b661c96bL } }, + /* 27 << 84 */ + { { 0xc3aaf1d6141ecf61L,0x9195509e3bb22f53L,0x2959740422d51357L, + 0x1b083822537bed60L }, + { 0xcd7d6e35e07289f0L,0x1f94c48c6dd86effL,0xc8bb1f82eb0f9cfaL, + 0x9ee0b7e61b2eb97dL } }, + /* 28 << 84 */ + { { 0x5a52fe2e34d74e31L,0xa352c3103bf79ab6L,0x97ff6c5aabfeeb8fL, + 0xbfbe8feff5c97305L }, + { 0xd6081ce6a7904608L,0x1f812f3ac4fca249L,0x9b24bc9ab9e5e200L, + 0x91022c6738012ee8L } }, + /* 29 << 84 */ + { { 0xe83d9c5d30a713a1L,0x4876e3f084ef0f93L,0xc9777029c1fbf928L, + 0xef7a6bb3bce7d2a4L }, + { 0xb8067228dfa2a659L,0xd5cd3398d877a48fL,0xbea4fd8f025d0f3fL, + 0xd67d2e352eae7c2bL } }, + /* 30 << 84 */ + { { 0x184de7d7cc5f4394L,0xb5551b5c4536e142L,0x2e89b212d34aa60aL, + 0x14a96feaf50051d5L }, + { 0x4e21ef740d12bb0bL,0xc522f02060b9677eL,0x8b12e4672df7731dL, + 0x39f803827b326d31L } }, + /* 31 << 84 */ + { { 0xdfb8630c39024a94L,0xaacb96a897319452L,0xd68a3961eda3867cL, + 0x0c58e2b077c4ffcaL }, + { 0x3d545d634da919faL,0xef79b69af15e2289L,0x54bc3d3d808bab10L, + 0xc8ab300745f82c37L } }, + /* 32 << 84 */ + { { 0xc12738b67c4a658aL,0xb3c4763940e72182L,0x3b77be468798e44fL, + 0xdc047df217a7f85fL }, + { 0x2439d4c55e59d92dL,0xcedca475e8e64d8dL,0xa724cd0d87ca9b16L, + 0x35e4fd59a5540dfeL } }, + /* 33 << 84 */ + { { 0xf8c1ff18e4bcf6b1L,0x856d6285295018faL,0x433f665c3263c949L, + 0xa6a76dd6a1f21409L }, + { 0x17d32334cc7b4f79L,0xa1d0312206720e4aL,0xadb6661d81d9bed5L, + 0xf0d6fb0211db15d1L } }, + /* 34 << 84 */ + { { 0x7fd11ad51fb747d2L,0xab50f9593033762bL,0x2a7e711bfbefaf5aL, + 0xc73932783fef2bbfL }, + { 0xe29fa2440df6f9beL,0x9092757b71efd215L,0xee60e3114f3d6fd9L, + 0x338542d40acfb78bL } }, + /* 35 << 84 */ + { { 0x44a23f0838961a0fL,0x1426eade986987caL,0x36e6ee2e4a863cc6L, + 0x48059420628b8b79L }, + { 0x30303ad87396e1deL,0x5c8bdc4838c5aad1L,0x3e40e11f5c8f5066L, + 0xabd6e7688d246bbdL } }, + /* 36 << 84 */ + { { 0x68aa40bb23330a01L,0xd23f5ee4c34eafa0L,0x3bbee3155de02c21L, + 0x18dd4397d1d8dd06L }, + { 0x3ba1939a122d7b44L,0xe6d3b40aa33870d6L,0x8e620f701c4fe3f8L, + 0xf6bba1a5d3a50cbfL } }, + /* 37 << 84 */ + { { 0x4a78bde5cfc0aee0L,0x847edc46c08c50bdL,0xbaa2439cad63c9b2L, + 0xceb4a72810fc2acbL }, + { 0xa419e40e26da033dL,0x6cc3889d03e02683L,0x1cd28559fdccf725L, + 0x0fd7e0f18d13d208L } }, + /* 38 << 84 */ + { { 0x01b9733b1f0df9d4L,0x8cc2c5f3a2b5e4f3L,0x43053bfa3a304fd4L, + 0x8e87665c0a9f1aa7L }, + { 0x087f29ecd73dc965L,0x15ace4553e9023dbL,0x2370e3092bce28b4L, + 0xf9723442b6b1e84aL } }, + /* 39 << 84 */ + { { 0xbeee662eb72d9f26L,0xb19396def0e47109L,0x85b1fa73e13289d0L, + 0x436cf77e54e58e32L }, + { 0x0ec833b3e990ef77L,0x7373e3ed1b11fc25L,0xbe0eda870fc332ceL, + 0xced049708d7ea856L } }, + /* 40 << 84 */ + { { 0xf85ff7857e977ca0L,0xb66ee8dadfdd5d2bL,0xf5e37950905af461L, + 0x587b9090966d487cL }, + { 0x6a198a1b32ba0127L,0xa7720e07141615acL,0xa23f3499996ef2f2L, + 0xef5f64b4470bcb3dL } }, + /* 41 << 84 */ + { { 0xa526a96292b8c559L,0x0c14aac069740a0fL,0x0d41a9e3a6bdc0a5L, + 0x97d521069c48aef4L }, + { 0xcf16bd303e7c253bL,0xcc834b1a47fdedc1L,0x7362c6e5373aab2eL, + 0x264ed85ec5f590ffL } }, + /* 42 << 84 */ + { { 0x7a46d9c066d41870L,0xa50c20b14787ba09L,0x185e7e51e3d44635L, + 0xb3b3e08031e2d8dcL }, + { 0xbed1e558a179e9d9L,0x2daa3f7974a76781L,0x4372baf23a40864fL, + 0x46900c544fe75cb5L } }, + /* 43 << 84 */ + { { 0xb95f171ef76765d0L,0x4ad726d295c87502L,0x2ec769da4d7c99bdL, + 0x5e2ddd19c36cdfa8L }, + { 0xc22117fca93e6deaL,0xe8a2583b93771123L,0xbe2f6089fa08a3a2L, + 0x4809d5ed8f0e1112L } }, + /* 44 << 84 */ + { { 0x3b414aa3da7a095eL,0x9049acf126f5aaddL,0x78d46a4d6be8b84aL, + 0xd66b1963b732b9b3L }, + { 0x5c2ac2a0de6e9555L,0xcf52d098b5bd8770L,0x15a15fa60fd28921L, + 0x56ccb81e8b27536dL } }, + /* 45 << 84 */ + { { 0x0f0d8ab89f4ccbb8L,0xed5f44d2db221729L,0x4314198800bed10cL, + 0xc94348a41d735b8bL }, + { 0x79f3e9c429ef8479L,0x4c13a4e3614c693fL,0x32c9af568e143a14L, + 0xbc517799e29ac5c4L } }, + /* 46 << 84 */ + { { 0x05e179922774856fL,0x6e52fb056c1bf55fL,0xaeda4225e4f19e16L, + 0x70f4728aaf5ccb26L }, + { 0x5d2118d1b2947f22L,0xc827ea16281d6fb9L,0x8412328d8cf0eabdL, + 0x45ee9fb203ef9dcfL } }, + /* 47 << 84 */ + { { 0x8e700421bb937d63L,0xdf8ff2d5cc4b37a6L,0xa4c0d5b25ced7b68L, + 0x6537c1efc7308f59L }, + { 0x25ce6a263b37f8e8L,0x170e9a9bdeebc6ceL,0xdd0379528728d72cL, + 0x445b0e55850154bcL } }, + /* 48 << 84 */ + { { 0x4b7d0e0683a7337bL,0x1e3416d4ffecf249L,0x24840eff66a2b71fL, + 0xd0d9a50ab37cc26dL }, + { 0xe21981506fe28ef7L,0x3cc5ef1623324c7fL,0x220f3455769b5263L, + 0xe2ade2f1a10bf475L } }, + /* 49 << 84 */ + { { 0x28cd20fa458d3671L,0x1549722c2dc4847bL,0x6dd01e55591941e3L, + 0x0e6fbcea27128ccbL }, + { 0xae1a1e6b3bef0262L,0xfa8c472c8f54e103L,0x7539c0a872c052ecL, + 0xd7b273695a3490e9L } }, + /* 50 << 84 */ + { { 0x143fe1f171684349L,0x36b4722e32e19b97L,0xdc05922790980affL, + 0x175c9c889e13d674L }, + { 0xa7de5b226e6bfdb1L,0x5ea5b7b2bedb4b46L,0xd5570191d34a6e44L, + 0xfcf60d2ea24ff7e6L } }, + /* 51 << 84 */ + { { 0x614a392d677819e1L,0x7be74c7eaa5a29e8L,0xab50fece63c85f3fL, + 0xaca2e2a946cab337L }, + { 0x7f700388122a6fe3L,0xdb69f703882a04a8L,0x9a77935dcf7aed57L, + 0xdf16207c8d91c86fL } }, + /* 52 << 84 */ + { { 0x2fca49ab63ed9998L,0xa3125c44a77ddf96L,0x05dd8a8624344072L, + 0xa023dda2fec3fb56L }, + { 0x421b41fc0c743032L,0x4f2120c15e438639L,0xfb7cae51c83c1b07L, + 0xb2370caacac2171aL } }, + /* 53 << 84 */ + { { 0x2eb2d9626cc820fbL,0x59feee5cb85a44bfL,0x94620fca5b6598f0L, + 0x6b922cae7e314051L }, + { 0xff8745ad106bed4eL,0x546e71f5dfa1e9abL,0x935c1e481ec29487L, + 0x9509216c4d936530L } }, + /* 54 << 84 */ + { { 0xc7ca306785c9a2dbL,0xd6ae51526be8606fL,0x09dbcae6e14c651dL, + 0xc9536e239bc32f96L }, + { 0xa90535a934521b03L,0xf39c526c878756ffL,0x383172ec8aedf03cL, + 0x20a8075eefe0c034L } }, + /* 55 << 84 */ + { { 0xf22f9c6264026422L,0x8dd1078024b9d076L,0x944c742a3bef2950L, + 0x55b9502e88a2b00bL }, + { 0xa59e14b486a09817L,0xa39dd3ac47bb4071L,0x55137f663be0592fL, + 0x07fcafd4c9e63f5bL } }, + /* 56 << 84 */ + { { 0x963652ee346eb226L,0x7dfab085ec2facb7L,0x273bf2b8691add26L, + 0x30d74540f2b46c44L }, + { 0x05e8e73ef2c2d065L,0xff9b8a00d42eeac9L,0x2fcbd20597209d22L, + 0xeb740ffade14ea2cL } }, + /* 57 << 84 */ + { { 0xc71ff913a8aef518L,0x7bfc74bbfff4cfa2L,0x1716680cb6b36048L, + 0x121b2cce9ef79af1L }, + { 0xbff3c836a01eb3d3L,0x50eb1c6a5f79077bL,0xa48c32d6a004bbcfL, + 0x47a593167d64f61dL } }, + /* 58 << 84 */ + { { 0x6068147f93102016L,0x12c5f65494d12576L,0xefb071a7c9bc6b91L, + 0x7c2da0c56e23ea95L }, + { 0xf4fd45b6d4a1dd5dL,0x3e7ad9b69122b13cL,0x342ca118e6f57a48L, + 0x1c2e94a706f8288fL } }, + /* 59 << 84 */ + { { 0x99e68f075a97d231L,0x7c80de974d838758L,0xbce0f5d005872727L, + 0xbe5d95c219c4d016L }, + { 0x921d5cb19c2492eeL,0x42192dc1404d6fb3L,0x4c84dcd132f988d3L, + 0xde26d61fa17b8e85L } }, + /* 60 << 84 */ + { { 0xc466dcb6137c7408L,0x9a38d7b636a266daL,0x7ef5cb0683bebf1bL, + 0xe5cdcbbf0fd014e3L }, + { 0x30aa376df65965a0L,0x60fe88c2ebb3e95eL,0x33fd0b6166ee6f20L, + 0x8827dcdb3f41f0a0L } }, + /* 61 << 84 */ + { { 0xbf8a9d240c56c690L,0x40265dadddb7641dL,0x522b05bf3a6b662bL, + 0x466d1dfeb1478c9bL }, + { 0xaa6169621484469bL,0x0db6054902df8f9fL,0xc37bca023cb8bf51L, + 0x5effe34621371ce8L } }, + /* 62 << 84 */ + { { 0xe8f65264ff112c32L,0x8a9c736d7b971fb2L,0xa4f194707b75080dL, + 0xfc3f2c5a8839c59bL }, + { 0x1d6c777e5aeb49c2L,0xf3db034dda1addfeL,0xd76fee5a5535affcL, + 0x0853ac70b92251fdL } }, + /* 63 << 84 */ + { { 0x37e3d5948b2a29d5L,0x28f1f4574de00ddbL,0x8083c1b5f42c328bL, + 0xd8ef1d8fe493c73bL }, + { 0x96fb626041dc61bdL,0xf74e8a9d27ee2f8aL,0x7c605a802c946a5dL, + 0xeed48d653839ccfdL } }, + /* 64 << 84 */ + { { 0x9894344f3a29467aL,0xde81e949c51eba6dL,0xdaea066ba5e5c2f2L, + 0x3fc8a61408c8c7b3L }, + { 0x7adff88f06d0de9fL,0xbbc11cf53b75ce0aL,0x9fbb7accfbbc87d5L, + 0xa1458e267badfde2L } }, + /* 0 << 91 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 91 */ + { { 0x1cb43668e039c256L,0x5f26fb8b7c17fd5dL,0xeee426af79aa062bL, + 0x072002d0d78fbf04L }, + { 0x4c9ca237e84fb7e3L,0xb401d8a10c82133dL,0xaaa525926d7e4181L, + 0xe943083373dbb152L } }, + /* 2 << 91 */ + { { 0xf92dda31be24319aL,0x03f7d28be095a8e7L,0xa52fe84098782185L, + 0x276ddafe29c24dbcL }, + { 0x80cd54961d7a64ebL,0xe43608897f1dbe42L,0x2f81a8778438d2d5L, + 0x7e4d52a885169036L } }, + /* 3 << 91 */ + { { 0x19e3d5b11d59715dL,0xc7eaa762d788983eL,0xe5a730b0abf1f248L, + 0xfbab8084fae3fd83L }, + { 0x65e50d2153765b2fL,0xbdd4e083fa127f3dL,0x9cf3c074397b1b10L, + 0x59f8090cb1b59fd3L } }, + /* 4 << 91 */ + { { 0x7b15fd9d615faa8fL,0x8fa1eb40968554edL,0x7bb4447e7aa44882L, + 0x2bb2d0d1029fff32L }, + { 0x075e2a646caa6d2fL,0x8eb879de22e7351bL,0xbcd5624e9a506c62L, + 0x218eaef0a87e24dcL } }, + /* 5 << 91 */ + { { 0x37e5684744ddfa35L,0x9ccfc5c5dab3f747L,0x9ac1df3f1ee96cf4L, + 0x0c0571a13b480b8fL }, + { 0x2fbeb3d54b3a7b3cL,0x35c036695dcdbb99L,0x52a0f5dcb2415b3aL, + 0xd57759b44413ed9aL } }, + /* 6 << 91 */ + { { 0x1fe647d83d30a2c5L,0x0857f77ef78a81dcL,0x11d5a334131a4a9bL, + 0xc0a94af929d393f5L }, + { 0xbc3a5c0bdaa6ec1aL,0xba9fe49388d2d7edL,0xbb4335b4bb614797L, + 0x991c4d6872f83533L } }, + /* 7 << 91 */ + { { 0x53258c28d2f01cb3L,0x93d6eaa3d75db0b1L,0x419a2b0de87d0db4L, + 0xa1e48f03d8fe8493L }, + { 0xf747faf6c508b23aL,0xf137571a35d53549L,0x9f5e58e2fcf9b838L, + 0xc7186ceea7fd3cf5L } }, + /* 8 << 91 */ + { { 0x77b868cee978a1d3L,0xe3a68b337ab92d04L,0x5102979487a5b862L, + 0x5f0606c33a61d41dL }, + { 0x2814be276f9326f1L,0x2f521c14c6fe3c2eL,0x17464d7dacdf7351L, + 0x10f5f9d3777f7e44L } }, + /* 9 << 91 */ + { { 0xce8e616b269fb37dL,0xaaf738047de62de5L,0xaba111754fdd4153L, + 0x515759ba3770b49bL }, + { 0x8b09ebf8aa423a61L,0x592245a1cd41fb92L,0x1cba8ec19b4c8936L, + 0xa87e91e3af36710eL } }, + /* 10 << 91 */ + { { 0x1fd84ce43d34a2e3L,0xee3759ceb43b5d61L,0x895bc78c619186c7L, + 0xf19c3809cbb9725aL }, + { 0xc0be21aade744b1fL,0xa7d222b060f8056bL,0x74be6157b23efe11L, + 0x6fab2b4f0cd68253L } }, + /* 11 << 91 */ + { { 0xad33ea5f4bf1d725L,0x9c1d8ee24f6c950fL,0x544ee78aa377af06L, + 0x54f489bb94a113e1L }, + { 0x8f11d634992fb7e8L,0x0169a7aaa2a44347L,0x1d49d4af95020e00L, + 0x95945722e08e120bL } }, + /* 12 << 91 */ + { { 0xb6e33878a4d32282L,0xe36e029d48020ae7L,0xe05847fb37a9b750L, + 0xf876812cb29e3819L }, + { 0x84ad138ed23a17f0L,0x6d7b4480f0b3950eL,0xdfa8aef42fd67ae0L, + 0x8d3eea2452333af6L } }, + /* 13 << 91 */ + { { 0x0d052075b15d5accL,0xc6d9c79fbd815bc4L,0x8dcafd88dfa36cf2L, + 0x908ccbe238aa9070L }, + { 0x638722c4ba35afceL,0x5a3da8b0fd6abf0bL,0x2dce252cc9c335c1L, + 0x84e7f0de65aa799bL } }, + /* 14 << 91 */ + { { 0x2101a522b99a72cbL,0x06de6e6787618016L,0x5ff8c7cde6f3653eL, + 0x0a821ab5c7a6754aL }, + { 0x7e3fa52b7cb0b5a2L,0xa7fb121cc9048790L,0x1a72502006ce053aL, + 0xb490a31f04e929b0L } }, + /* 15 << 91 */ + { { 0xe17be47d62dd61adL,0x781a961c6be01371L,0x1063bfd3dae3cbbaL, + 0x356474067f73c9baL }, + { 0xf50e957b2736a129L,0xa6313702ed13f256L,0x9436ee653a19fcc5L, + 0xcf2bdb29e7a4c8b6L } }, + /* 16 << 91 */ + { { 0xb06b1244c5f95cd8L,0xda8c8af0f4ab95f4L,0x1bae59c2b9e5836dL, + 0x07d51e7e3acffffcL }, + { 0x01e15e6ac2ccbcdaL,0x3bc1923f8528c3e0L,0x43324577a49fead4L, + 0x61a1b8842aa7a711L } }, + /* 17 << 91 */ + { { 0xf9a86e08700230efL,0x0af585a1bd19adf8L,0x7645f361f55ad8f2L, + 0x6e67622346c3614cL }, + { 0x23cb257c4e774d3fL,0x82a38513ac102d1bL,0x9bcddd887b126aa5L, + 0xe716998beefd3ee4L } }, + /* 18 << 91 */ + { { 0x4239d571fb167583L,0xdd011c78d16c8f8aL,0x271c289569a27519L, + 0x9ce0a3b7d2d64b6aL }, + { 0x8c977289d5ec6738L,0xa3b49f9a8840ef6bL,0x808c14c99a453419L, + 0x5c00295b0cf0a2d5L } }, + /* 19 << 91 */ + { { 0x524414fb1d4bcc76L,0xb07691d2459a88f1L,0x77f43263f70d110fL, + 0x64ada5e0b7abf9f3L }, + { 0xafd0f94e5b544cf5L,0xb4a13a15fd2713feL,0xb99b7d6e250c74f4L, + 0x097f2f7320324e45L } }, + /* 20 << 91 */ + { { 0x994b37d8affa8208L,0xc3c31b0bdc29aafcL,0x3da746517a3a607fL, + 0xd8e1b8c1fe6955d6L }, + { 0x716e1815c8418682L,0x541d487f7dc91d97L,0x48a04669c6996982L, + 0xf39cab1583a6502eL } }, + /* 21 << 91 */ + { { 0x025801a0e68db055L,0xf3569758ba3338d5L,0xb0c8c0aaee2afa84L, + 0x4f6985d3fb6562d1L }, + { 0x351f1f15132ed17aL,0x510ed0b4c04365feL,0xa3f98138e5b1f066L, + 0xbc9d95d632df03dcL } }, + /* 22 << 91 */ + { { 0xa83ccf6e19abd09eL,0x0b4097c14ff17edbL,0x58a5c478d64a06ceL, + 0x2ddcc3fd544a58fdL }, + { 0xd449503d9e8153b8L,0x3324fd027774179bL,0xaf5d47c8dbd9120cL, + 0xeb86016234fa94dbL } }, + /* 23 << 91 */ + { { 0x5817bdd1972f07f4L,0xe5579e2ed27bbcebL,0x86847a1f5f11e5a6L, + 0xb39ed2557c3cf048L }, + { 0xe1076417a2f62e55L,0x6b9ab38f1bcf82a2L,0x4bb7c3197aeb29f9L, + 0xf6d17da317227a46L } }, + /* 24 << 91 */ + { { 0xab53ddbd0f968c00L,0xa03da7ec000c880bL,0x7b2396246a9ad24dL, + 0x612c040101ec60d0L }, + { 0x70d10493109f5df1L,0xfbda403080af7550L,0x30b93f95c6b9a9b3L, + 0x0c74ec71007d9418L } }, + /* 25 << 91 */ + { { 0x941755646edb951fL,0x5f4a9d787f22c282L,0xb7870895b38d1196L, + 0xbc593df3a228ce7cL }, + { 0xc78c5bd46af3641aL,0x7802200b3d9b3dccL,0x0dc73f328be33304L, + 0x847ed87d61ffb79aL } }, + /* 26 << 91 */ + { { 0xf85c974e6d671192L,0x1e14100ade16f60fL,0x45cb0d5a95c38797L, + 0x18923bba9b022da4L }, + { 0xef2be899bbe7e86eL,0x4a1510ee216067bfL,0xd98c815484d5ce3eL, + 0x1af777f0f92a2b90L } }, + /* 27 << 91 */ + { { 0x9fbcb4004ef65724L,0x3e04a4c93c0ca6feL,0xfb3e2cb555002994L, + 0x1f3a93c55363ecabL }, + { 0x1fe00efe3923555bL,0x744bedd91e1751eaL,0x3fb2db596ab69357L, + 0x8dbd7365f5e6618bL } }, + /* 28 << 91 */ + { { 0x99d53099df1ea40eL,0xb3f24a0b57d61e64L,0xd088a198596eb812L, + 0x22c8361b5762940bL }, + { 0x66f01f97f9c0d95cL,0x884611728e43cdaeL,0x11599a7fb72b15c3L, + 0x135a7536420d95ccL } }, + /* 29 << 91 */ + { { 0x2dcdf0f75f7ae2f6L,0x15fc6e1dd7fa6da2L,0x81ca829ad1d441b6L, + 0x84c10cf804a106b6L }, + { 0xa9b26c95a73fbbd0L,0x7f24e0cb4d8f6ee8L,0x48b459371e25a043L, + 0xf8a74fca036f3dfeL } }, + /* 30 << 91 */ + { { 0x1ed46585c9f84296L,0x7fbaa8fb3bc278b0L,0xa8e96cd46c4fcbd0L, + 0x940a120273b60a5fL }, + { 0x34aae12055a4aec8L,0x550e9a74dbd742f0L,0x794456d7228c68abL, + 0x492f8868a4e25ec6L } }, + /* 31 << 91 */ + { { 0x682915adb2d8f398L,0xf13b51cc5b84c953L,0xcda90ab85bb917d6L, + 0x4b6155604ea3dee1L }, + { 0x578b4e850a52c1c8L,0xeab1a69520b75fc4L,0x60c14f3caa0bb3c6L, + 0x220f448ab8216094L } }, + /* 32 << 91 */ + { { 0x4fe7ee31b0e63d34L,0xf4600572a9e54fabL,0xc0493334d5e7b5a4L, + 0x8589fb9206d54831L }, + { 0xaa70f5cc6583553aL,0x0879094ae25649e5L,0xcc90450710044652L, + 0xebb0696d02541c4fL } }, + /* 33 << 91 */ + { { 0x5a171fdeb9718710L,0x38f1bed8f374a9f5L,0xc8c582e1ba39bdc1L, + 0xfc457b0a908cc0ceL }, + { 0x9a187fd4883841e2L,0x8ec25b3938725381L,0x2553ed0596f84395L, + 0x095c76616f6c6897L } }, + /* 34 << 91 */ + { { 0x917ac85c4bdc5610L,0xb2885fe4179eb301L,0x5fc655478b78bdccL, + 0x4a9fc893e59e4699L }, + { 0xbb7ff0cd3ce299afL,0x195be9b3adf38b20L,0x6a929c87d38ddb8fL, + 0x55fcc99cb21a51b9L } }, + /* 35 << 91 */ + { { 0x2b695b4c721a4593L,0xed1e9a15768eaac2L,0xfb63d71c7489f914L, + 0xf98ba31c78118910L }, + { 0x802913739b128eb4L,0x7801214ed448af4aL,0xdbd2e22b55418dd3L, + 0xeffb3c0dd3998242L } }, + /* 36 << 91 */ + { { 0xdfa6077cc7bf3827L,0xf2165bcb47f8238fL,0xfe37cf688564d554L, + 0xe5f825c40a81fb98L }, + { 0x43cc4f67ffed4d6fL,0xbc609578b50a34b0L,0x8aa8fcf95041faf1L, + 0x5659f053651773b6L } }, + /* 37 << 91 */ + { { 0xe87582c36044d63bL,0xa60894090cdb0ca0L,0x8c993e0fbfb2bcf6L, + 0xfc64a71945985cfcL }, + { 0x15c4da8083dbedbaL,0x804ae1122be67df7L,0xda4c9658a23defdeL, + 0x12002ddd5156e0d3L } }, + /* 38 << 91 */ + { { 0xe68eae895dd21b96L,0x8b99f28bcf44624dL,0x0ae008081ec8897aL, + 0xdd0a93036712f76eL }, + { 0x962375224e233de4L,0x192445b12b36a8a5L,0xabf9ff74023993d9L, + 0x21f37bf42aad4a8fL } }, + /* 39 << 91 */ + { { 0x340a4349f8bd2bbdL,0x1d902cd94868195dL,0x3d27bbf1e5fdb6f1L, + 0x7a5ab088124f9f1cL }, + { 0xc466ab06f7a09e03L,0x2f8a197731f2c123L,0xda355dc7041b6657L, + 0xcb840d128ece2a7cL } }, + /* 40 << 91 */ + { { 0xb600ad9f7db32675L,0x78fea13307a06f1bL,0x5d032269b31f6094L, + 0x07753ef583ec37aaL }, + { 0x03485aed9c0bea78L,0x41bb3989bc3f4524L,0x09403761697f726dL, + 0x6109beb3df394820L } }, + /* 41 << 91 */ + { { 0x804111ea3b6d1145L,0xb6271ea9a8582654L,0x619615e624e66562L, + 0xa2554945d7b6ad9cL }, + { 0xd9c4985e99bfe35fL,0x9770ccc07b51cdf6L,0x7c32701392881832L, + 0x8777d45f286b26d1L } }, + /* 42 << 91 */ + { { 0x9bbeda22d847999dL,0x03aa33b6c3525d32L,0x4b7b96d428a959a1L, + 0xbb3786e531e5d234L }, + { 0xaeb5d3ce6961f247L,0x20aa85af02f93d3fL,0x9cd1ad3dd7a7ae4fL, + 0xbf6688f0781adaa8L } }, + /* 43 << 91 */ + { { 0xb1b40e867469ceadL,0x1904c524309fca48L,0x9b7312af4b54bbc7L, + 0xbe24bf8f593affa2L }, + { 0xbe5e0790bd98764bL,0xa0f45f17a26e299eL,0x4af0d2c26b8fe4c7L, + 0xef170db18ae8a3e6L } }, + /* 44 << 91 */ + { { 0x0e8d61a029e0ccc1L,0xcd53e87e60ad36caL,0x328c6623c8173822L, + 0x7ee1767da496be55L }, + { 0x89f13259648945afL,0x9e45a5fd25c8009cL,0xaf2febd91f61ab8cL, + 0x43f6bc868a275385L } }, + /* 45 << 91 */ + { { 0x87792348f2142e79L,0x17d89259c6e6238aL,0x7536d2f64a839d9bL, + 0x1f428fce76a1fbdcL }, + { 0x1c1096010db06dfeL,0xbfc16bc150a3a3ccL,0xf9cbd9ec9b30f41bL, + 0x5b5da0d600138cceL } }, + /* 46 << 91 */ + { { 0xec1d0a4856ef96a7L,0xb47eb848982bf842L,0x66deae32ec3f700dL, + 0x4e43c42caa1181e0L }, + { 0xa1d72a31d1a4aa2aL,0x440d4668c004f3ceL,0x0d6a2d3b45fe8a7aL, + 0x820e52e2fb128365L } }, + /* 47 << 91 */ + { { 0x29ac5fcf25e51b09L,0x180cd2bf2023d159L,0xa9892171a1ebf90eL, + 0xf97c4c877c132181L }, + { 0x9f1dc724c03dbb7eL,0xae043765018cbbe4L,0xfb0b2a360767d153L, + 0xa8e2f4d6249cbaebL } }, + /* 48 << 91 */ + { { 0x172a5247d95ea168L,0x1758fada2970764aL,0xac803a511d978169L, + 0x299cfe2ede77e01bL }, + { 0x652a1e17b0a98927L,0x2e26e1d120014495L,0x7ae0af9f7175b56aL, + 0xc2e22a80d64b9f95L } }, + /* 49 << 91 */ + { { 0x4d0ff9fbd90a060aL,0x496a27dbbaf38085L,0x32305401da776bcfL, + 0xb8cdcef6725f209eL }, + { 0x61ba0f37436a0bbaL,0x263fa10876860049L,0x92beb98eda3542cfL, + 0xa2d4d14ad5849538L } }, + /* 50 << 91 */ + { { 0x989b9d6812e9a1bcL,0x61d9075c5f6e3268L,0x352c6aa999ace638L, + 0xde4e4a55920f43ffL }, + { 0xe5e4144ad673c017L,0x667417ae6f6e05eaL,0x613416aedcd1bd56L, + 0x5eb3620186693711L } }, + /* 51 << 91 */ + { { 0x2d7bc5043a1aa914L,0x175a129976dc5975L,0xe900e0f23fc8125cL, + 0x569ef68c11198875L }, + { 0x9012db6363a113b4L,0xe3bd3f5698835766L,0xa5c94a5276412deaL, + 0xad9e2a09aa735e5cL } }, + /* 52 << 91 */ + { { 0x405a984c508b65e9L,0xbde4a1d16df1a0d1L,0x1a9433a1dfba80daL, + 0xe9192ff99440ad2eL }, + { 0x9f6496965099fe92L,0x25ddb65c0b27a54aL,0x178279ddc590da61L, + 0x5479a999fbde681aL } }, + /* 53 << 91 */ + { { 0xd0e84e05013fe162L,0xbe11dc92632d471bL,0xdf0b0c45fc0e089fL, + 0x04fb15b04c144025L }, + { 0xa61d5fc213c99927L,0xa033e9e03de2eb35L,0xf8185d5cb8dacbb4L, + 0x9a88e2658644549dL } }, + /* 54 << 91 */ + { { 0xf717af6254671ff6L,0x4bd4241b5fa58603L,0x06fba40be67773c0L, + 0xc1d933d26a2847e9L }, + { 0xf4f5acf3689e2c70L,0x92aab0e746bafd31L,0x798d76aa3473f6e5L, + 0xcc6641db93141934L } }, + /* 55 << 91 */ + { { 0xcae27757d31e535eL,0x04cc43b687c2ee11L,0x8d1f96752e029ffaL, + 0xc2150672e4cc7a2cL }, + { 0x3b03c1e08d68b013L,0xa9d6816fedf298f3L,0x1bfbb529a2804464L, + 0x95a52fae5db22125L } }, + /* 56 << 91 */ + { { 0x55b321600e1cb64eL,0x004828f67e7fc9feL,0x13394b821bb0fb93L, + 0xb6293a2d35f1a920L }, + { 0xde35ef21d145d2d9L,0xbe6225b3bb8fa603L,0x00fc8f6b32cf252dL, + 0xa28e52e6117cf8c2L } }, + /* 57 << 91 */ + { { 0x9d1dc89b4c371e6dL,0xcebe067536ef0f28L,0x5de05d09a4292f81L, + 0xa8303593353e3083L }, + { 0xa1715b0a7e37a9bbL,0x8c56f61e2b8faec3L,0x5250743133c9b102L, + 0x0130cefca44431f0L } }, + /* 58 << 91 */ + { { 0x56039fa0bd865cfbL,0x4b03e578bc5f1dd7L,0x40edf2e4babe7224L, + 0xc752496d3a1988f6L }, + { 0xd1572d3b564beb6bL,0x0db1d11039a1c608L,0x568d193416f60126L, + 0x05ae9668f354af33L } }, + /* 59 << 91 */ + { { 0x19de6d37c92544f2L,0xcc084353a35837d5L,0xcbb6869c1a514eceL, + 0xb633e7282e1d1066L }, + { 0xf15dd69f936c581cL,0x96e7b8ce7439c4f9L,0x5e676f482e448a5bL, + 0xb2ca7d5bfd916bbbL } }, + /* 60 << 91 */ + { { 0xd55a2541f5024025L,0x47bc5769e4c2d937L,0x7d31b92a0362189fL, + 0x83f3086eef7816f9L }, + { 0xf9f46d94b587579aL,0xec2d22d830e76c5fL,0x27d57461b000ffcfL, + 0xbb7e65f9364ffc2cL } }, + /* 61 << 91 */ + { { 0x7c7c94776652a220L,0x61618f89d696c981L,0x5021701d89effff3L, + 0xf2c8ff8e7c314163L }, + { 0x2da413ad8efb4d3eL,0x937b5adfce176d95L,0x22867d342a67d51cL, + 0x262b9b1018eb3ac9L } }, + /* 62 << 91 */ + { { 0x4e314fe4c43ff28bL,0x764766276a664e7aL,0x3e90e40bb7a565c2L, + 0x8588993ac1acf831L }, + { 0xd7b501d68f938829L,0x996627ee3edd7d4cL,0x37d44a6290cd34c7L, + 0xa8327499f3833e8dL } }, + /* 63 << 91 */ + { { 0x2e18917d4bf50353L,0x85dd726b556765fbL,0x54fe65d693d5ab66L, + 0x3ddbaced915c25feL }, + { 0xa799d9a412f22e85L,0xe2a248676d06f6bcL,0xf4f1ee5643ca1637L, + 0xfda2828b61ece30aL } }, + /* 64 << 91 */ + { { 0x758c1a3ea2dee7a6L,0xdcde2f3c734b2284L,0xaba445d24eaba6adL, + 0x35aaf66876cee0a7L }, + { 0x7e0b04a9e5aa049aL,0xe74083ad91103e84L,0xbeb183ce40afecc3L, + 0x6b89de9fea043f7aL } }, + /* 0 << 98 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 98 */ + { { 0x0e299d23fe67ba66L,0x9145076093cf2f34L,0xf45b5ea997fcf913L, + 0x5be008438bd7dddaL }, + { 0x358c3e05d53ff04dL,0xbf7ccdc35de91ef7L,0xad684dbfb69ec1a0L, + 0x367e7cf2801fd997L } }, + /* 2 << 98 */ + { { 0x0ca1f3b7b0dc8595L,0x27de46089f1d9f2eL,0x1af3bf39badd82a7L, + 0x79356a7965862448L }, + { 0xc0602345f5f9a052L,0x1a8b0f89139a42f9L,0xb53eee42844d40fcL, + 0x93b0bfe54e5b6368L } }, + /* 3 << 98 */ + { { 0x5434dd02c024789cL,0x90dca9ea41b57bfcL,0x8aa898e2243398dfL, + 0xf607c834894a94bbL }, + { 0xbb07be97c2c99b76L,0x6576ba6718c29302L,0x3d79efcce703a88cL, + 0xf259ced7b6a0d106L } }, + /* 4 << 98 */ + { { 0x0f893a5dc8de610bL,0xe8c515fb67e223ceL,0x7774bfa64ead6dc5L, + 0x89d20f95925c728fL }, + { 0x7a1e0966098583ceL,0xa2eedb9493f2a7d7L,0x1b2820974c304d4aL, + 0x0842e3dac077282dL } }, + /* 5 << 98 */ + { { 0xe4d972a33b9e2d7bL,0x7cc60b27c48218ffL,0x8fc7083884149d91L, + 0x5c04346f2f461eccL }, + { 0xebe9fdf2614650a9L,0x5e35b537c1f666acL,0x645613d188babc83L, + 0x88cace3ac5e1c93eL } }, + /* 6 << 98 */ + { { 0x209ca3753de92e23L,0xccb03cc85fbbb6e3L,0xccb90f03d7b1487eL, + 0xfa9c2a38c710941fL }, + { 0x756c38236724ceedL,0x3a902258192d0323L,0xb150e519ea5e038eL, + 0xdcba2865c7427591L } }, + /* 7 << 98 */ + { { 0xe549237f78890732L,0xc443bef953fcb4d9L,0x9884d8a6eb3480d6L, + 0x8a35b6a13048b186L }, + { 0xb4e4471665e9a90aL,0x45bf380d653006c0L,0x8f3f820d4fe9ae3bL, + 0x244a35a0979a3b71L } }, + /* 8 << 98 */ + { { 0xa1010e9d74cd06ffL,0x9c17c7dfaca3eeacL,0x74c86cd38063aa2bL, + 0x8595c4b3734614ffL }, + { 0xa3de00ca990f62ccL,0xd9bed213ca0c3be5L,0x7886078adf8ce9f5L, + 0xddb27ce35cd44444L } }, + /* 9 << 98 */ + { { 0xed374a6658926dddL,0x138b2d49908015b8L,0x886c6579de1f7ab8L, + 0x888b9aa0c3020b7aL }, + { 0xd3ec034e3a96e355L,0xba65b0b8f30fbe9aL,0x064c8e50ff21367aL, + 0x1f508ea40b04b46eL } }, + /* 10 << 98 */ + { { 0x98561a49747c866cL,0xbbb1e5fe0518a062L,0x20ff4e8becdc3608L, + 0x7f55cded20184027L }, + { 0x8d73ec95f38c85f0L,0x5b589fdf8bc3b8c3L,0xbe95dd980f12b66fL, + 0xf5bd1a090e338e01L } }, + /* 11 << 98 */ + { { 0x65163ae55e915918L,0x6158d6d986f8a46bL,0x8466b538eeebf99cL, + 0xca8761f6bca477efL }, + { 0xaf3449c29ebbc601L,0xef3b0f41e0c3ae2fL,0xaa6c577d5de63752L, + 0xe916660164682a51L } }, + /* 12 << 98 */ + { { 0x5a3097befc15aa1eL,0x40d12548b54b0745L,0x5bad4706519a5f12L, + 0xed03f717a439dee6L }, + { 0x0794bb6c4a02c499L,0xf725083dcffe71d2L,0x2cad75190f3adcafL, + 0x7f68ea1c43729310L } }, + /* 13 << 98 */ + { { 0xe747c8c7b7ffd977L,0xec104c3580761a22L,0x8395ebaf5a3ffb83L, + 0xfb3261f4e4b63db7L }, + { 0x53544960d883e544L,0x13520d708cc2eeb8L,0x08f6337bd3d65f99L, + 0x83997db2781cf95bL } }, + /* 14 << 98 */ + { { 0xce6ff1060dbd2c01L,0x4f8eea6b1f9ce934L,0x546f7c4b0e993921L, + 0x6236a3245e753fc7L }, + { 0x65a41f84a16022e9L,0x0c18d87843d1dbb2L,0x73c556402d4cef9cL, + 0xa042810870444c74L } }, + /* 15 << 98 */ + { { 0x68e4f15e9afdfb3cL,0x49a561435bdfb6dfL,0xa9bc1bd45f823d97L, + 0xbceb5970ea111c2aL }, + { 0x366b455fb269bbc4L,0x7cd85e1ee9bc5d62L,0xc743c41c4f18b086L, + 0xa4b4099095294fb9L } }, + /* 16 << 98 */ + { { 0x9c7c581d26ee8382L,0xcf17dcc5359d638eL,0xee8273abb728ae3dL, + 0x1d112926f821f047L }, + { 0x1149847750491a74L,0x687fa761fde0dfb9L,0x2c2580227ea435abL, + 0x6b8bdb9491ce7e3fL } }, + /* 17 << 98 */ + { { 0x4c5b5dc93bf834aaL,0x043718194f6c7e4bL,0xc284e00a3736bcadL, + 0x0d88111821ae8f8dL }, + { 0xf9cf0f82f48c8e33L,0xa11fd075a1bf40dbL,0xdceab0dedc2733e5L, + 0xc560a8b58e986bd7L } }, + /* 18 << 98 */ + { { 0x48dd1fe23929d097L,0x3885b29092f188f1L,0x0f2ae613da6fcdacL, + 0x9054303eb662a46cL }, + { 0xb6871e440738042aL,0x98e6a977bdaf6449L,0xd8bc0650d1c9df1bL, + 0xef3d645136e098f9L } }, + /* 19 << 98 */ + { { 0x03fbae82b6d72d28L,0x77ca9db1f5d84080L,0x8a112cffa58efc1cL, + 0x518d761cc564cb4aL }, + { 0x69b5740ef0d1b5ceL,0x717039cce9eb1785L,0x3fe29f9022f53382L, + 0x8e54ba566bc7c95cL } }, + /* 20 << 98 */ + { { 0x9c806d8af7f91d0fL,0x3b61b0f1a82a5728L,0x4640032d94d76754L, + 0x273eb5de47d834c6L }, + { 0x2988abf77b4e4d53L,0xb7ce66bfde401777L,0x9fba6b32715071b3L, + 0x82413c24ad3a1a98L } }, + /* 21 << 98 */ + { { 0x5b7fc8c4e0e8ad93L,0xb5679aee5fab868dL,0xb1f9d2fa2b3946f3L, + 0x458897dc5685b50aL }, + { 0x1e98c93089d0caf3L,0x39564c5f78642e92L,0x1b77729a0dbdaf18L, + 0xf9170722579e82e6L } }, + /* 22 << 98 */ + { { 0x680c0317e4515fa5L,0xf85cff84fb0c790fL,0xc7a82aab6d2e0765L, + 0x7446bca935c82b32L }, + { 0x5de607aa6d63184fL,0x7c1a46a8262803a6L,0xd218313daebe8035L, + 0x92113ffdc73c51f8L } }, + /* 23 << 98 */ + { { 0x4b38e08312e7e46cL,0x69d0a37a56126bd5L,0xfb3f324b73c07e04L, + 0xa0c22f678fda7267L }, + { 0x8f2c00514d2c7d8fL,0xbc45ced3cbe2cae5L,0xe1c6cf07a8f0f277L, + 0xbc3923121eb99a98L } }, + /* 24 << 98 */ + { { 0x75537b7e3cc8ac85L,0x8d725f57dd02753bL,0xfd05ff64b737df2fL, + 0x55fe8712f6d2531dL }, + { 0x57ce04a96ab6b01cL,0x69a02a897cd93724L,0x4f82ac35cf86699bL, + 0x8242d3ad9cb4b232L } }, + /* 25 << 98 */ + { { 0x713d0f65d62105e5L,0xbb222bfa2d29be61L,0xf2f9a79e6cfbef09L, + 0xfc24d8d3d5d6782fL }, + { 0x5db77085d4129967L,0xdb81c3ccdc3c2a43L,0x9d655fc005d8d9a3L, + 0x3f5d057a54298026L } }, + /* 26 << 98 */ + { { 0x1157f56d88c54694L,0xb26baba59b09573eL,0x2cab03b022adffd1L, + 0x60a412c8dd69f383L }, + { 0xed76e98b54b25039L,0xd4ee67d3687e714dL,0x877396487b00b594L, + 0xce419775c9ef709bL } }, + /* 27 << 98 */ + { { 0x40f76f851c203a40L,0x30d352d6eafd8f91L,0xaf196d3d95578dd2L, + 0xea4bb3d777cc3f3dL }, + { 0x42a5bd03b98e782bL,0xac958c400624920dL,0xb838134cfc56fcc8L, + 0x86ec4ccf89572e5eL } }, + /* 28 << 98 */ + { { 0x69c435269be47be0L,0x323b7dd8cb28fea1L,0xfa5538ba3a6c67e5L, + 0xef921d701d378e46L }, + { 0xf92961fc3c4b880eL,0x3f6f914e98940a67L,0xa990eb0afef0ff39L, + 0xa6c2920ff0eeff9cL } }, + /* 29 << 98 */ + { { 0xca80416651b8d9a3L,0x42531bc90ffb0db1L,0x72ce4718aa82e7ceL, + 0x6e199913df574741L }, + { 0xd5f1b13dd5d36946L,0x8255dc65f68f0194L,0xdc9df4cd8710d230L, + 0x3453c20f138c1988L } }, + /* 30 << 98 */ + { { 0x9af98dc089a6ef01L,0x4dbcc3f09857df85L,0x348056015c1ad924L, + 0x40448da5d0493046L }, + { 0xf629926d4ee343e2L,0x6343f1bd90e8a301L,0xefc9349140815b3fL, + 0xf882a423de8f66fbL } }, + /* 31 << 98 */ + { { 0x3a12d5f4e7db9f57L,0x7dfba38a3c384c27L,0x7a904bfd6fc660b1L, + 0xeb6c5db32773b21cL }, + { 0xc350ee661cdfe049L,0x9baac0ce44540f29L,0xbc57b6aba5ec6aadL, + 0x167ce8c30a7c1baaL } }, + /* 32 << 98 */ + { { 0xb23a03a553fb2b56L,0x6ce141e74e057f78L,0x796525c389e490d9L, + 0x0bc95725a31a7e75L }, + { 0x1ec567911220fd06L,0x716e3a3c408b0bd6L,0x31cd6bf7e8ebeba9L, + 0xa7326ca6bee6b670L } }, + /* 33 << 98 */ + { { 0x3d9f851ccd090c43L,0x561e8f13f12c3988L,0x50490b6a904b7be4L, + 0x61690ce10410737bL }, + { 0x299e9a370f009052L,0x258758f0f026092eL,0x9fa255f3fdfcdc0fL, + 0xdbc9fb1fc0e1bcd2L } }, + /* 34 << 98 */ + { { 0x35f9dd6e24651840L,0xdca45a84a5c59abcL,0x103d396fecca4938L, + 0x4532da0ab97b3f29L }, + { 0xc4135ea51999a6bfL,0x3aa9505a5e6bf2eeL,0xf77cef063f5be093L, + 0x97d1a0f8a943152eL } }, + /* 35 << 98 */ + { { 0x2cb0ebba2e1c21ddL,0xf41b29fc2c6797c4L,0xc6e17321b300101fL, + 0x4422b0e9d0d79a89L }, + { 0x49e4901c92f1bfc4L,0x06ab1f8fe1e10ed9L,0x84d35577db2926b8L, + 0xca349d39356e8ec2L } }, + /* 36 << 98 */ + { { 0x70b63d32343bf1a9L,0x8fd3bd2837d1a6b1L,0x0454879c316865b4L, + 0xee959ff6c458efa2L }, + { 0x0461dcf89706dc3fL,0x737db0e2164e4b2eL,0x092626802f8843c8L, + 0x54498bbc7745e6f6L } }, + /* 37 << 98 */ + { { 0x359473faa29e24afL,0xfcc3c45470aa87a1L,0xfd2c4bf500573aceL, + 0xb65b514e28dd1965L }, + { 0xe46ae7cf2193e393L,0x60e9a4e1f5444d97L,0xe7594e9600ff38edL, + 0x43d84d2f0a0e0f02L } }, + /* 38 << 98 */ + { { 0x8b6db141ee398a21L,0xb88a56aee3bcc5beL,0x0a1aa52f373460eaL, + 0x20da1a56160bb19bL }, + { 0xfb54999d65bf0384L,0x71a14d245d5a180eL,0xbc44db7b21737b04L, + 0xd84fcb1801dd8e92L } }, + /* 39 << 98 */ + { { 0x80de937bfa44b479L,0x535054995c98fd4fL,0x1edb12ab28f08727L, + 0x4c58b582a5f3ef53L }, + { 0xbfb236d88327f246L,0xc3a3bfaa4d7df320L,0xecd96c59b96024f2L, + 0xfc293a537f4e0433L } }, + /* 40 << 98 */ + { { 0x5341352b5acf6e10L,0xc50343fdafe652c3L,0x4af3792d18577a7fL, + 0xe1a4c617af16823dL }, + { 0x9b26d0cd33425d0aL,0x306399ed9b7bc47fL,0x2a792f33706bb20bL, + 0x3121961498111055L } }, + /* 41 << 98 */ + { { 0x864ec06487f5d28bL,0x11392d91962277fdL,0xb5aa7942bb6aed5fL, + 0x080094dc47e799d9L }, + { 0x4afa588c208ba19bL,0xd3e7570f8512f284L,0xcbae64e602f5799aL, + 0xdeebe7ef514b9492L } }, + /* 42 << 98 */ + { { 0x30300f98e5c298ffL,0x17f561be3678361fL,0xf52ff31298cb9a16L, + 0x6233c3bc5562d490L }, + { 0x7bfa15a192e3a2cbL,0x961bcfd1e6365119L,0x3bdd29bf2c8c53b1L, + 0x739704df822844baL } }, + /* 43 << 98 */ + { { 0x7dacfb587e7b754bL,0x23360791a806c9b9L,0xe7eb88c923504452L, + 0x2983e996852c1783L }, + { 0xdd4ae529958d881dL,0x026bae03262c7b3cL,0x3a6f9193960b52d1L, + 0xd0980f9092696cfbL } }, + /* 44 << 98 */ + { { 0x4c1f428cd5f30851L,0x94dfed272a4f6630L,0x4df53772fc5d48a4L, + 0xdd2d5a2f933260ceL }, + { 0x574115bdd44cc7a5L,0x4ba6b20dbd12533aL,0x30e93cb8243057c9L, + 0x794c486a14de320eL } }, + /* 45 << 98 */ + { { 0xe925d4cef21496e4L,0xf951d198ec696331L,0x9810e2de3e8d812fL, + 0xd0a47259389294abL }, + { 0x513ba2b50e3bab66L,0x462caff5abad306fL,0xe2dc6d59af04c49eL, + 0x1aeb8750e0b84b0bL } }, + /* 46 << 98 */ + { { 0xc034f12f2f7d0ca2L,0x6d2e8128e06acf2fL,0x801f4f8321facc2fL, + 0xa1170c03f40ef607L }, + { 0xfe0a1d4f7805a99cL,0xbde56a36cc26aba5L,0x5b1629d035531f40L, + 0xac212c2b9afa6108L } }, + /* 47 << 98 */ + { { 0x30a06bf315697be5L,0x6f0545dc2c63c7c1L,0x5d8cb8427ccdadafL, + 0xd52e379bac7015bbL }, + { 0xc4f56147f462c23eL,0xd44a429846bc24b0L,0xbc73d23ae2856d4fL, + 0x61cedd8c0832bcdfL } }, + /* 48 << 98 */ + { { 0x6095355699f241d7L,0xee4adbd7001a349dL,0x0b35bf6aaa89e491L, + 0x7f0076f4136f7546L }, + { 0xd19a18ba9264da3dL,0x6eb2d2cd62a7a28bL,0xcdba941f8761c971L, + 0x1550518ba3be4a5dL } }, + /* 49 << 98 */ + { { 0xd0e8e2f057d0b70cL,0xeea8612ecd133ba3L,0x814670f044416aecL, + 0x424db6c330775061L }, + { 0xd96039d116213fd1L,0xc61e7fa518a3478fL,0xa805bdcccb0c5021L, + 0xbdd6f3a80cc616ddL } }, + /* 50 << 98 */ + { { 0x060096675d97f7e2L,0x31db0fc1af0bf4b6L,0x23680ed45491627aL, + 0xb99a3c667d741fb1L }, + { 0xe9bb5f5536b1ff92L,0x29738577512b388dL,0xdb8a2ce750fcf263L, + 0x385346d46c4f7b47L } }, + /* 51 << 98 */ + { { 0xbe86c5ef31631f9eL,0xbf91da2103a57a29L,0xc3b1f7967b23f821L, + 0x0f7d00d2770db354L }, + { 0x8ffc6c3bd8fe79daL,0xcc5e8c40d525c996L,0x4640991dcfff632aL, + 0x64d97e8c67112528L } }, + /* 52 << 98 */ + { { 0xc232d97302f1cd1eL,0xce87eacb1dd212a4L,0x6e4c8c73e69802f7L, + 0x12ef02901fffddbdL }, + { 0x941ec74e1bcea6e2L,0xd0b540243cb92cbbL,0x809fb9d47e8f9d05L, + 0x3bf16159f2992aaeL } }, + /* 53 << 98 */ + { { 0xad40f279f8a7a838L,0x11aea63105615660L,0xbf52e6f1a01f6fa1L, + 0xef0469953dc2aec9L }, + { 0x785dbec9d8080711L,0xe1aec60a9fdedf76L,0xece797b5fa21c126L, + 0xc66e898f05e52732L } }, + /* 54 << 98 */ + { { 0x39bb69c408811fdbL,0x8bfe1ef82fc7f082L,0xc8e7a393174f4138L, + 0xfba8ad1dd58d1f98L }, + { 0xbc21d0cebfd2fd5bL,0x0b839a826ee60d61L,0xaacf7658afd22253L, + 0xb526bed8aae396b3L } }, + /* 55 << 98 */ + { { 0xccc1bbc238564464L,0x9e3ff9478c45bc73L,0xcde9bca358188a78L, + 0x138b8ee0d73bf8f7L }, + { 0x5c7e234c4123c489L,0x66e69368fa643297L,0x0629eeee39a15fa3L, + 0x95fab881a9e2a927L } }, + /* 56 << 98 */ + { { 0xb2497007eafbb1e1L,0xd75c9ce6e75b7a93L,0x3558352defb68d78L, + 0xa2f26699223f6396L }, + { 0xeb911ecfe469b17aL,0x62545779e72d3ec2L,0x8ea47de782cb113fL, + 0xebe4b0864e1fa98dL } }, + /* 57 << 98 */ + { { 0xec2d5ed78cdfedb1L,0xa535c077fe211a74L,0x9678109b11d244c5L, + 0xf17c8bfbbe299a76L }, + { 0xb651412efb11fbc4L,0xea0b548294ab3f65L,0xd8dffd950cf78243L, + 0x2e719e57ce0361d4L } }, + /* 58 << 98 */ + { { 0x9007f085304ddc5bL,0x095e8c6d4daba2eaL,0x5a33cdb43f9d28a9L, + 0x85b95cd8e2283003L }, + { 0xbcd6c819b9744733L,0x29c5f538fc7f5783L,0x6c49b2fad59038e4L, + 0x68349cc13bbe1018L } }, + /* 59 << 98 */ + { { 0xcc490c1d21830ee5L,0x36f9c4eee9bfa297L,0x58fd729448de1a94L, + 0xaadb13a84e8f2cdcL }, + { 0x515eaaa081313dbaL,0xc76bb468c2152dd8L,0x357f8d75a653dbf8L, + 0xe4d8c4d1b14ac143L } }, + /* 60 << 98 */ + { { 0xbdb8e675b055cb40L,0x898f8e7b977b5167L,0xecc65651b82fb863L, + 0x565448146d88f01fL }, + { 0xb0928e95263a75a9L,0xcfb6836f1a22fcdaL,0x651d14db3f3bd37cL, + 0x1d3837fbb6ad4664L } }, + /* 61 << 98 */ + { { 0x7c5fb538ff4f94abL,0x7243c7126d7fb8f2L,0xef13d60ca85c5287L, + 0x18cfb7c74bb8dd1bL }, + { 0x82f9bfe672908219L,0x35c4592b9d5144abL,0x52734f379cf4b42fL, + 0x6bac55e78c60ddc4L } }, + /* 62 << 98 */ + { { 0xb5cd811e94dea0f6L,0x259ecae4e18cc1a3L,0x6a0e836e15e660f8L, + 0x6c639ea60e02bff2L }, + { 0x8721b8cb7e1026fdL,0x9e73b50b63261942L,0xb8c7097477f01da3L, + 0x1839e6a68268f57fL } }, + /* 63 << 98 */ + { { 0x571b94155150b805L,0x1892389ef92c7097L,0x8d69c18e4a084b95L, + 0x7014c512be5b495cL }, + { 0x4780db361b07523cL,0x2f6219ce2c1c64faL,0xc38b81b0602c105aL, + 0xab4f4f205dc8e360L } }, + /* 64 << 98 */ + { { 0x20d3c982cf7d62d2L,0x1f36e29d23ba8150L,0x48ae0bf092763f9eL, + 0x7a527e6b1d3a7007L }, + { 0xb4a89097581a85e3L,0x1f1a520fdc158be5L,0xf98db37d167d726eL, + 0x8802786e1113e862L } }, + /* 0 << 105 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 105 */ + { { 0xefb2149e36f09ab0L,0x03f163ca4a10bb5bL,0xd029704506e20998L, + 0x56f0af001b5a3babL }, + { 0x7af4cfec70880e0dL,0x7332a66fbe3d913fL,0x32e6c84a7eceb4bdL, + 0xedc4a79a9c228f55L } }, + /* 2 << 105 */ + { { 0xc37c7dd0c55c4496L,0xa6a9635725bbabd2L,0x5b7e63f2add7f363L, + 0x9dce37822e73f1dfL }, + { 0xe1e5a16ab2b91f71L,0xe44898235ba0163cL,0xf2759c32f6e515adL, + 0xa5e2f1f88615eecfL } }, + /* 3 << 105 */ + { { 0x74519be7abded551L,0x03d358b8c8b74410L,0x4d00b10b0e10d9a9L, + 0x6392b0b128da52b7L }, + { 0x6744a2980b75c904L,0xc305b0aea8f7f96cL,0x042e421d182cf932L, + 0xf6fc5d509e4636caL } }, + /* 4 << 105 */ + { { 0x795847c9d64cc78cL,0x6c50621b9b6cb27bL,0x07099bf8df8022abL, + 0x48f862ebc04eda1dL }, + { 0xd12732ede1603c16L,0x19a80e0f5c9a9450L,0xe2257f54b429b4fcL, + 0x66d3b2c645460515L } }, + /* 5 << 105 */ + { { 0x6ca4f87e822e37beL,0x73f237b4253bda4eL,0xf747f3a241190aebL, + 0xf06fa36f804cf284L }, + { 0x0a6bbb6efc621c12L,0x5d624b6440b80ec6L,0x4b0724257ba556f3L, + 0x7fa0c3543e2d20a8L } }, + /* 6 << 105 */ + { { 0xe921fa31e3229d41L,0xa929c65294531bd4L,0x84156027a6d38209L, + 0xf3d69f736bdb97bdL }, + { 0x8906d19a16833631L,0x68a34c2e03d51be3L,0xcb59583b0e511cd8L, + 0x99ce6bfdfdc132a8L } }, + /* 7 << 105 */ + { { 0x3facdaaaffcdb463L,0x658bbc1a34a38b08L,0x12a801f8f1a9078dL, + 0x1567bcf96ab855deL }, + { 0xe08498e03572359bL,0xcf0353e58659e68bL,0xbb86e9c87d23807cL, + 0xbc08728d2198e8a2L } }, + /* 8 << 105 */ + { { 0x8de2b7bc453cadd6L,0x203900a7bc0bc1f8L,0xbcd86e47a6abd3afL, + 0x911cac128502effbL }, + { 0x2d550242ec965469L,0x0e9f769229e0017eL,0x633f078f65979885L, + 0xfb87d4494cf751efL } }, + /* 9 << 105 */ + { { 0xe1790e4bfc25419aL,0x364672034bff3cfdL,0xc8db638625b6e83fL, + 0x6cc69f236cad6fd2L }, + { 0x0219e45a6bc68bb9L,0xe43d79b6297f7334L,0x7d445368465dc97cL, + 0x4b9eea322a0b949aL } }, + /* 10 << 105 */ + { { 0x1b96c6ba6102d021L,0xeaafac782f4461eaL,0xd4b85c41c49f19a8L, + 0x275c28e4cf538875L }, + { 0x35451a9ddd2e54e0L,0x6991adb50605618bL,0x5b8b4bcd7b36cd24L, + 0x372a4f8c56f37216L } }, + /* 11 << 105 */ + { { 0xc890bd73a6a5da60L,0x6f083da0dc4c9ff0L,0xf4e14d94f0536e57L, + 0xf9ee1edaaaec8243L }, + { 0x571241ec8bdcf8e7L,0xa5db82710b041e26L,0x9a0b9a99e3fff040L, + 0xcaaf21dd7c271202L } }, + /* 12 << 105 */ + { { 0xb4e2b2e14f0dd2e8L,0xe77e7c4f0a377ac7L,0x69202c3f0d7a2198L, + 0xf759b7ff28200eb8L }, + { 0xc87526eddcfe314eL,0xeb84c52453d5cf99L,0xb1b52ace515138b6L, + 0x5aa7ff8c23fca3f4L } }, + /* 13 << 105 */ + { { 0xff0b13c3b9791a26L,0x960022dacdd58b16L,0xdbd55c9257aad2deL, + 0x3baaaaa3f30fe619L }, + { 0x9a4b23460d881efdL,0x506416c046325e2aL,0x91381e76035c18d4L, + 0xb3bb68bef27817b0L } }, + /* 14 << 105 */ + { { 0x15bfb8bf5116f937L,0x7c64a586c1268943L,0x71e25cc38419a2c8L, + 0x9fd6b0c48335f463L }, + { 0x4bf0ba3ce8ee0e0eL,0x6f6fba60298c21faL,0x57d57b39ae66bee0L, + 0x292d513022672544L } }, + /* 15 << 105 */ + { { 0xf451105dbab093b3L,0x012f59b902839986L,0x8a9158023474a89cL, + 0x048c919c2de03e97L }, + { 0xc476a2b591071cd5L,0x791ed89a034970a5L,0x89bd9042e1b7994bL, + 0x8eaf5179a1057ffdL } }, + /* 16 << 105 */ + { { 0x6066e2a2d551ee10L,0x87a8f1d8727e09a6L,0x00d08bab2c01148dL, + 0x6da8e4f1424f33feL }, + { 0x466d17f0cf9a4e71L,0xff5020103bf5cb19L,0xdccf97d8d062ecc0L, + 0x80c0d9af81d80ac4L } }, + /* 17 << 105 */ + { { 0xe87771d8033f2876L,0xb0186ec67d5cc3dbL,0x58e8bb803bc9bc1dL, + 0x4d1395cc6f6ef60eL }, + { 0xa73c62d6186244a0L,0x918e5f23110a5b53L,0xed4878ca741b7eabL, + 0x3038d71adbe03e51L } }, + /* 18 << 105 */ + { { 0x840204b7a93c3246L,0x21ab6069a0b9b4cdL,0xf5fa6e2bb1d64218L, + 0x1de6ad0ef3d56191L }, + { 0x570aaa88ff1929c7L,0xc6df4c6b640e87b5L,0xde8a74f2c65f0cccL, + 0x8b972fd5e6f6cc01L } }, + /* 19 << 105 */ + { { 0x3fff36b60b846531L,0xba7e45e610a5e475L,0x84a1d10e4145b6c5L, + 0xf1f7f91a5e046d9dL }, + { 0x0317a69244de90d7L,0x951a1d4af199c15eL,0x91f78046c9d73debL, + 0x74c82828fab8224fL } }, + /* 20 << 105 */ + { { 0xaa6778fce7560b90L,0xb4073e61a7e824ceL,0xff0d693cd642eba8L, + 0x7ce2e57a5dccef38L }, + { 0x89c2c7891df1ad46L,0x83a06922098346fdL,0x2d715d72da2fc177L, + 0x7b6dd71d85b6cf1dL } }, + /* 21 << 105 */ + { { 0xc60a6d0a73fa9cb0L,0xedd3992e328bf5a9L,0xc380ddd0832c8c82L, + 0xd182d410a2a0bf50L }, + { 0x7d9d7438d9a528dbL,0xe8b1a0e9caf53994L,0xddd6e5fe0e19987cL, + 0xacb8df03190b059dL } }, + /* 22 << 105 */ + { { 0x53703a328300129fL,0x1f63766268c43bfdL,0xbcbd191300e54051L, + 0x812fcc627bf5a8c5L }, + { 0x3f969d5f29fb85daL,0x72f4e00a694759e8L,0x426b6e52790726b7L, + 0x617bbc873bdbb209L } }, + /* 23 << 105 */ + { { 0x511f8bb997aee317L,0x812a4096e81536a8L,0x137dfe593ac09b9bL, + 0x0682238fba8c9a7aL }, + { 0x7072ead6aeccb4bdL,0x6a34e9aa692ba633L,0xc82eaec26fff9d33L, + 0xfb7535121d4d2b62L } }, + /* 24 << 105 */ + { { 0x1a0445ff1d7aadabL,0x65d38260d5f6a67cL,0x6e62fb0891cfb26fL, + 0xef1e0fa55c7d91d6L }, + { 0x47e7c7ba33db72cdL,0x017cbc09fa7c74b2L,0x3c931590f50a503cL, + 0xcac54f60616baa42L } }, + /* 25 << 105 */ + { { 0x9b6cd380b2369f0fL,0x97d3a70d23c76151L,0x5f9dd6fc9862a9c6L, + 0x044c4ab212312f51L }, + { 0x035ea0fd834a2ddcL,0x49e6b862cc7b826dL,0xb03d688362fce490L, + 0x62f2497ab37e36e9L } }, + /* 26 << 105 */ + { { 0x04b005b6c6458293L,0x36bb5276e8d10af7L,0xacf2dc138ee617b8L, + 0x470d2d35b004b3d4L }, + { 0x06790832feeb1b77L,0x2bb75c3985657f9cL,0xd70bd4edc0f60004L, + 0xfe797ecc219b018bL } }, + /* 27 << 105 */ + { { 0x9b5bec2a753aebccL,0xdaf9f3dcc939eca5L,0xd6bc6833d095ad09L, + 0x98abdd51daa4d2fcL }, + { 0xd9840a318d168be5L,0xcf7c10e02325a23cL,0xa5c02aa07e6ecfafL, + 0x2462e7e6b5bfdf18L } }, + /* 28 << 105 */ + { { 0xab2d8a8ba0cc3f12L,0x68dd485dbc672a29L,0x72039752596f2cd3L, + 0x5d3eea67a0cf3d8dL }, + { 0x810a1a81e6602671L,0x8f144a4014026c0cL,0xbc753a6d76b50f85L, + 0xc4dc21e8645cd4a4L } }, + /* 29 << 105 */ + { { 0xc5262dea521d0378L,0x802b8e0e05011c6fL,0x1ba19cbb0b4c19eaL, + 0x21db64b5ebf0aaecL }, + { 0x1f394ee970342f9dL,0x93a10aee1bc44a14L,0xa7eed31b3efd0baaL, + 0x6e7c824e1d154e65L } }, + /* 30 << 105 */ + { { 0xee23fa819966e7eeL,0x64ec4aa805b7920dL,0x2d44462d2d90aad4L, + 0xf44dd195df277ad5L }, + { 0x8d6471f1bb46b6a1L,0x1e65d313fd885090L,0x33a800f513a977b4L, + 0xaca9d7210797e1efL } }, + /* 31 << 105 */ + { { 0x9a5a85a0fcff6a17L,0x9970a3f31eca7ceeL,0xbb9f0d6bc9504be3L, + 0xe0c504beadd24ee2L }, + { 0x7e09d95677fcc2f4L,0xef1a522765bb5fc4L,0x145d4fb18b9286aaL, + 0x66fd0c5d6649028bL } }, + /* 32 << 105 */ + { { 0x98857ceb1bf4581cL,0xe635e186aca7b166L,0x278ddd22659722acL, + 0xa0903c4c1db68007L }, + { 0x366e458948f21402L,0x31b49c14b96abda2L,0x329c4b09e0403190L, + 0x97197ca3d29f43feL } }, + /* 33 << 105 */ + { { 0x8073dd1e274983d8L,0xda1a3bde55717c8fL,0xfd3d4da20361f9d1L, + 0x1332d0814c7de1ceL }, + { 0x9b7ef7a3aa6d0e10L,0x17db2e73f54f1c4aL,0xaf3dffae4cd35567L, + 0xaaa2f406e56f4e71L } }, + /* 34 << 105 */ + { { 0x8966759e7ace3fc7L,0x9594eacf45a8d8c6L,0x8de3bd8b91834e0eL, + 0xafe4ca53548c0421L }, + { 0xfdd7e856e6ee81c6L,0x8f671beb6b891a3aL,0xf7a58f2bfae63829L, + 0x9ab186fb9c11ac9fL } }, + /* 35 << 105 */ + { { 0x8d6eb36910b5be76L,0x046b7739fb040bcdL,0xccb4529fcb73de88L, + 0x1df0fefccf26be03L }, + { 0xad7757a6bcfcd027L,0xa8786c75bb3165caL,0xe9db1e347e99a4d9L, + 0x99ee86dfb06c504bL } }, + /* 36 << 105 */ + { { 0x5b7c2dddc15c9f0aL,0xdf87a7344295989eL,0x59ece47c03d08fdaL, + 0xb074d3ddad5fc702L }, + { 0x2040790351a03776L,0x2bb1f77b2a608007L,0x25c58f4fe1153185L, + 0xe6df62f6766e6447L } }, + /* 37 << 105 */ + { { 0xefb3d1beed51275aL,0x5de47dc72f0f483fL,0x7932d98e97c2bedfL, + 0xd5c119270219f8a1L }, + { 0x9d751200a73a294eL,0x5f88434a9dc20172L,0xd28d9fd3a26f506aL, + 0xa890cd319d1dcd48L } }, + /* 38 << 105 */ + { { 0x0aebaec170f4d3b4L,0xfd1a13690ffc8d00L,0xb9d9c24057d57838L, + 0x45929d2668bac361L }, + { 0x5a2cd06025b15ca6L,0x4b3c83e16e474446L,0x1aac7578ee1e5134L, + 0xa418f5d6c91e2f41L } }, + /* 39 << 105 */ + { { 0x6936fc8a213ed68bL,0x860ae7ed510a5224L,0x63660335def09b53L, + 0x641b2897cd79c98dL }, + { 0x29bd38e101110f35L,0x79c26f42648b1937L,0x64dae5199d9164f4L, + 0xd85a23100265c273L } }, + /* 40 << 105 */ + { { 0x7173dd5d4b07e2b1L,0xd144c4cb8d9ea221L,0xe8b04ea41105ab14L, + 0x92dda542fe80d8f1L }, + { 0xe9982fa8cf03dce6L,0x8b5ea9651a22cffcL,0xf7f4ea7f3fad88c4L, + 0x62db773e6a5ba95cL } }, + /* 41 << 105 */ + { { 0xd20f02fb93f24567L,0xfd46c69a315257caL,0x0ac74cc78bcab987L, + 0x46f31c015ceca2f5L }, + { 0x40aedb59888b219eL,0xe50ecc37e1fccd02L,0x1bcd9dad911f816cL, + 0x583cc1ec8db9b00cL } }, + /* 42 << 105 */ + { { 0xf3cd2e66a483bf11L,0xfa08a6f5b1b2c169L,0xf375e2454be9fa28L, + 0x99a7ffec5b6d011fL }, + { 0x6a3ebddbc4ae62daL,0x6cea00ae374aef5dL,0xab5fb98d9d4d05bcL, + 0x7cba1423d560f252L } }, + /* 43 << 105 */ + { { 0x49b2cc21208490deL,0x1ca66ec3bcfb2879L,0x7f1166b71b6fb16fL, + 0xfff63e0865fe5db3L }, + { 0xb8345abe8b2610beL,0xb732ed8039de3df4L,0x0e24ed50211c32b4L, + 0xd10d8a69848ff27dL } }, + /* 44 << 105 */ + { { 0xc1074398ed4de248L,0xd7cedace10488927L,0xa4aa6bf885673e13L, + 0xb46bae916daf30afL }, + { 0x07088472fcef7ad8L,0x61151608d4b35e97L,0xbcfe8f26dde29986L, + 0xeb84c4c7d5a34c79L } }, + /* 45 << 105 */ + { { 0xc1eec55c164e1214L,0x891be86da147bb03L,0x9fab4d100ba96835L, + 0xbf01e9b8a5c1ae9fL }, + { 0x6b4de139b186ebc0L,0xd5c74c2685b91bcaL,0x5086a99cc2d93854L, + 0xeed62a7ba7a9dfbcL } }, + /* 46 << 105 */ + { { 0x8778ed6f76b7618aL,0xbff750a503b66062L,0x4cb7be22b65186dbL, + 0x369dfbf0cc3a6d13L }, + { 0xc7dab26c7191a321L,0x9edac3f940ed718eL,0xbc142b36d0cfd183L, + 0xc8af82f67c991693L } }, + /* 47 << 105 */ + { { 0xb3d1e4d897ce0b2aL,0xe6d7c87fc3a55cdfL,0x35846b9568b81afeL, + 0x018d12afd3c239d8L }, + { 0x2b2c620801206e15L,0xe0e42453a3b882c6L,0x854470a3a50162d5L, + 0x081574787017a62aL } }, + /* 48 << 105 */ + { { 0x18bd3fb4820357c7L,0x992039ae6f1458adL,0x9a1df3c525b44aa1L, + 0x2d780357ed3d5281L }, + { 0x58cf7e4dc77ad4d4L,0xd49a7998f9df4fc4L,0x4465a8b51d71205eL, + 0xa0ee0ea6649254aaL } }, + /* 49 << 105 */ + { { 0x4b5eeecfab7bd771L,0x6c87307335c262b9L,0xdc5bd6483c9d61e7L, + 0x233d6d54321460d2L }, + { 0xd20c5626fc195bccL,0x2544595804d78b63L,0xe03fcb3d17ec8ef3L, + 0x54b690d146b8f781L } }, + /* 50 << 105 */ + { { 0x82fa2c8a21230646L,0xf51aabb9084f418cL,0xff4fbec11a30ba43L, + 0x6a5acf73743c9df7L }, + { 0x1da2b357d635b4d5L,0xc3de68ddecd5c1daL,0xa689080bd61af0ddL, + 0xdea5938ad665bf99L } }, + /* 51 << 105 */ + { { 0x0231d71afe637294L,0x01968aa6a5a81cd8L,0x11252d50048e63b5L, + 0xc446bc526ca007e9L }, + { 0xef8c50a696d6134bL,0x9361fbf59e09a05cL,0xf17f85a6dca3291aL, + 0xb178d548ff251a21L } }, + /* 52 << 105 */ + { { 0x87f6374ba4df3915L,0x566ce1bf2fd5d608L,0x425cba4d7de35102L, + 0x6b745f8f58c5d5e2L }, + { 0x88402af663122edfL,0x3190f9ed3b989a89L,0x4ad3d387ebba3156L, + 0xef385ad9c7c469a5L } }, + /* 53 << 105 */ + { { 0xb08281de3f642c29L,0x20be0888910ffb88L,0xf353dd4ad5292546L, + 0x3f1627de8377a262L }, + { 0xa5faa013eefcd638L,0x8f3bf62674cc77c3L,0x32618f65a348f55eL, + 0x5787c0dc9fefeb9eL } }, + /* 54 << 105 */ + { { 0xf1673aa2d9a23e44L,0x88dfa9934e10690dL,0x1ced1b362bf91108L, + 0x9193ceca3af48649L }, + { 0xfb34327d2d738fc5L,0x6697b037975fee6cL,0x2f485da0c04079a5L, + 0x2cdf57352feaa1acL } }, + /* 55 << 105 */ + { { 0x76944420bd55659eL,0x7973e32b4376090cL,0x86bb4fe1163b591aL, + 0x10441aedc196f0caL }, + { 0x3b431f4a045ad915L,0x6c11b437a4afacb1L,0x30b0c7db71fdbbd8L, + 0xb642931feda65acdL } }, + /* 56 << 105 */ + { { 0x4baae6e89c92b235L,0xa73bbd0e6b3993a1L,0xd06d60ec693dd031L, + 0x03cab91b7156881cL }, + { 0xd615862f1db3574bL,0x485b018564bb061aL,0x27434988a0181e06L, + 0x2cd61ad4c1c0c757L } }, + /* 57 << 105 */ + { { 0x3effed5a2ff9f403L,0x8dc98d8b62239029L,0x2206021e1f17b70dL, + 0xafbec0cabf510015L }, + { 0x9fed716480130dfaL,0x306dc2b58a02dcf5L,0x48f06620feb10fc0L, + 0x78d1e1d55a57cf51L } }, + /* 58 << 105 */ + { { 0xadef8c5a192ef710L,0x88afbd4b3b7431f9L,0x7e1f740764250c9eL, + 0x6e31318db58bec07L }, + { 0xfd4fc4b824f89b4eL,0x65a5dd8848c36a2aL,0x4f1eccfff024baa7L, + 0x22a21cf2cba94650L } }, + /* 59 << 105 */ + { { 0x95d29dee42a554f7L,0x828983a5002ec4baL,0x8112a1f78badb73dL, + 0x79ea8897a27c1839L }, + { 0x8969a5a7d065fd83L,0xf49af791b262a0bcL,0xfcdea8b6af2b5127L, + 0x10e913e1564c2dbcL } }, + /* 60 << 105 */ + { { 0x51239d14bc21ef51L,0xe51c3ceb4ce57292L,0x795ff06847bbcc3bL, + 0x86b46e1ebd7e11e6L }, + { 0x0ea6ba2380041ef4L,0xd72fe5056262342eL,0x8abc6dfd31d294d4L, + 0xbbe017a21278c2c9L } }, + /* 61 << 105 */ + { { 0xb1fcfa09b389328aL,0x322fbc62d01771b5L,0x04c0d06360b045bfL, + 0xdb652edc10e52d01L }, + { 0x50ef932c03ec6627L,0xde1b3b2dc1ee50e3L,0x5ab7bdc5dc37a90dL, + 0xfea6721331e33a96L } }, + /* 62 << 105 */ + { { 0x6482b5cb4f2999aaL,0x38476cc6b8cbf0ddL,0x93ebfacb173405bbL, + 0x15cdafe7e52369ecL }, + { 0xd42d5ba4d935b7dbL,0x648b60041c99a4cdL,0x785101bda3b5545bL, + 0x4bf2c38a9dd67fafL } }, + /* 63 << 105 */ + { { 0xb1aadc634442449cL,0xe0e9921a33ad4fb8L,0x5c552313aa686d82L, + 0xdee635fa465d866cL }, + { 0xbc3c224a18ee6e8aL,0xeed748a6ed42e02fL,0xe70f930ad474cd08L, + 0x774ea6ecfff24adfL } }, + /* 64 << 105 */ + { { 0x03e2de1cf3480d4aL,0xf0d8edc7bc8acf1aL,0xf23e330368295a9cL, + 0xfadd5f68c546a97dL }, + { 0x895597ad96f8acb1L,0xbddd49d5671bdae2L,0x16fcd52821dd43f4L, + 0xa5a454126619141aL } }, + /* 0 << 112 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 112 */ + { { 0x8ce9b6bfc360e25aL,0xe6425195075a1a78L,0x9dc756a8481732f4L, + 0x83c0440f5432b57aL }, + { 0xc670b3f1d720281fL,0x2205910ed135e051L,0xded14b0edb052be7L, + 0x697b3d27c568ea39L } }, + /* 2 << 112 */ + { { 0x2e599b9afb3ff9edL,0x28c2e0ab17f6515cL,0x1cbee4fd474da449L, + 0x071279a44f364452L }, + { 0x97abff6601fbe855L,0x3ee394e85fda51c4L,0x190385f667597c0bL, + 0x6e9fccc6a27ee34bL } }, + /* 3 << 112 */ + { { 0x0b89de9314092ebbL,0xf17256bd428e240cL,0xcf89a7f393d2f064L, + 0x4f57841ee1ed3b14L }, + { 0x4ee14405e708d855L,0x856aae7203f1c3d0L,0xc8e5424fbdd7eed5L, + 0x3333e4ef73ab4270L } }, + /* 4 << 112 */ + { { 0x3bc77adedda492f8L,0xc11a3aea78297205L,0x5e89a3e734931b4cL, + 0x17512e2e9f5694bbL }, + { 0x5dc349f3177bf8b6L,0x232ea4ba08c7ff3eL,0x9c4f9d16f511145dL, + 0xccf109a333b379c3L } }, + /* 5 << 112 */ + { { 0xe75e7a88a1f25897L,0x7ac6961fa1b5d4d8L,0xe3e1077308f3ed5cL, + 0x208a54ec0a892dfbL }, + { 0xbe826e1978660710L,0x0cf70a97237df2c8L,0x418a7340ed704da5L, + 0xa3eeb9a908ca33fdL } }, + /* 6 << 112 */ + { { 0x49d96233169bca96L,0x04d286d42da6aafbL,0xc09606eca0c2fa94L, + 0x8869d0d523ff0fb3L }, + { 0xa99937e5d0150d65L,0xa92e2503240c14c9L,0x656bf945108e2d49L, + 0x152a733aa2f59e2bL } }, + /* 7 << 112 */ + { { 0xb4323d588434a920L,0xc0af8e93622103c5L,0x667518ef938dbf9aL, + 0xa184307383a9cdf2L }, + { 0x350a94aa5447ab80L,0xe5e5a325c75a3d61L,0x74ba507f68411a9eL, + 0x10581fc1594f70c5L } }, + /* 8 << 112 */ + { { 0x60e2857080eb24a9L,0x7bedfb4d488e0cfdL,0x721ebbd7c259cdb8L, + 0x0b0da855bc6390a9L }, + { 0x2b4d04dbde314c70L,0xcdbf1fbc6c32e846L,0x33833eabb162fc9eL, + 0x9939b48bb0dd3ab7L } }, + /* 9 << 112 */ + { { 0x5aaa98a7cb0c9c8cL,0x75105f3081c4375cL,0xceee50575ef1c90fL, + 0xb31e065fc23a17bfL }, + { 0x5364d275d4b6d45aL,0xd363f3ad62ec8996L,0xb5d212394391c65bL, + 0x84564765ebb41b47L } }, + /* 10 << 112 */ + { { 0x20d18ecc37107c78L,0xacff3b6b570c2a66L,0x22f975d99bd0d845L, + 0xef0a0c46ba178fa0L }, + { 0x1a41965176b6028eL,0xc49ec674248612d4L,0x5b6ac4f27338af55L, + 0x06145e627bee5a36L } }, + /* 11 << 112 */ + { { 0x33e95d07e75746b5L,0x1c1e1f6dc40c78beL,0x967833ef222ff8e2L, + 0x4bedcf6ab49180adL }, + { 0x6b37e9c13d7a4c8aL,0x2748887c6ddfe760L,0xf7055123aa3a5bbcL, + 0x954ff2257bbb8e74L } }, + /* 12 << 112 */ + { { 0xc42b8ab197c3dfb9L,0x55a549b0cf168154L,0xad6748e7c1b50692L, + 0x2775780f6fc5cbcbL }, + { 0x4eab80b8e1c9d7c8L,0x8c69dae13fdbcd56L,0x47e6b4fb9969eaceL, + 0x002f1085a705cb5aL } }, + /* 13 << 112 */ + { { 0x4e23ca446d3fea55L,0xb4ae9c86f4810568L,0x47bfb91b2a62f27dL, + 0x60deb4c9d9bac28cL }, + { 0xa892d8947de6c34cL,0x4ee682594494587dL,0x914ee14e1a3f8a5bL, + 0xbb113eaa28700385L } }, + /* 14 << 112 */ + { { 0x81ca03b92115b4c9L,0x7c163d388908cad1L,0xc912a118aa18179aL, + 0xe09ed750886e3081L }, + { 0xa676e3fa26f516caL,0x753cacf78e732f91L,0x51592aea833da8b4L, + 0xc626f42f4cbea8aaL } }, + /* 15 << 112 */ + { { 0xef9dc899a7b56eafL,0x00c0e52c34ef7316L,0x5b1e4e24fe818a86L, + 0x9d31e20dc538be47L }, + { 0x22eb932d3ed68974L,0xe44bbc087c4e87c4L,0x4121086e0dde9aefL, + 0x8e6b9cff134f4345L } }, + /* 16 << 112 */ + { { 0x96892c1f711b0eb9L,0xb905f2c8780ab954L,0xace26309a20792dbL, + 0xec8ac9b30684e126L }, + { 0x486ad8b6b40a2447L,0x60121fc19fe3fb24L,0x5626fccf1a8e3b3fL, + 0x4e5686226ad1f394L } }, + /* 17 << 112 */ + { { 0xda7aae0d196aa5a1L,0xe0df8c771041b5fbL,0x451465d926b318b7L, + 0xc29b6e557ab136e9L }, + { 0x2c2ab48b71148463L,0xb5738de364454a76L,0x54ccf9a05a03abe4L, + 0x377c02960427d58eL } }, + /* 18 << 112 */ + { { 0x73f5f0b92bb39c1fL,0x14373f2ce608d8c5L,0xdcbfd31400fbb805L, + 0xdf18fb2083afdcfbL }, + { 0x81a57f4242b3523fL,0xe958532d87f650fbL,0xaa8dc8b68b0a7d7cL, + 0x1b75dfb7150166beL } }, + /* 19 << 112 */ + { { 0x90e4f7c92d7d1413L,0x67e2d6b59834f597L,0x4fd4f4f9a808c3e8L, + 0xaf8237e0d5281ec1L }, + { 0x25ab5fdc84687ceeL,0xc5ded6b1a5b26c09L,0x8e4a5aecc8ea7650L, + 0x23b73e5c14cc417fL } }, + /* 20 << 112 */ + { { 0x2bfb43183037bf52L,0xb61e6db578c725d7L,0x8efd4060bbb3e5d7L, + 0x2e014701dbac488eL }, + { 0xac75cf9a360aa449L,0xb70cfd0579634d08L,0xa591536dfffb15efL, + 0xb2c37582d07c106cL } }, + /* 21 << 112 */ + { { 0xb4293fdcf50225f9L,0xc52e175cb0e12b03L,0xf649c3bad0a8bf64L, + 0x745a8fefeb8ae3c6L }, + { 0x30d7e5a358321bc3L,0xb1732be70bc4df48L,0x1f217993e9ea5058L, + 0xf7a71cde3e4fd745L } }, + /* 22 << 112 */ + { { 0x86cc533e894c5bbbL,0x6915c7d969d83082L,0xa6aa2d055815c244L, + 0xaeeee59249b22ce5L }, + { 0x89e39d1378135486L,0x3a275c1f16b76f2fL,0xdb6bcc1be036e8f5L, + 0x4df69b215e4709f5L } }, + /* 23 << 112 */ + { { 0xa188b2502d0f39aaL,0x622118bb15a85947L,0x2ebf520ffde0f4faL, + 0xa40e9f294860e539L }, + { 0x7b6a51eb22b57f0fL,0x849a33b97e80644aL,0x50e5d16f1cf095feL, + 0xd754b54eec55f002L } }, + /* 24 << 112 */ + { { 0x5cfbbb22236f4a98L,0x0b0c59e9066800bbL,0x4ac69a8f5a9a7774L, + 0x2b33f804d6bec948L }, + { 0xb372929532e6c466L,0x68956d0f4e599c73L,0xa47a249f155c31ccL, + 0x24d80f0de1ce284eL } }, + /* 25 << 112 */ + { { 0xcd821dfb988baf01L,0xe6331a7ddbb16647L,0x1eb8ad33094cb960L, + 0x593cca38c91bbca5L }, + { 0x384aac8d26567456L,0x40fa0309c04b6490L,0x97834cd6dab6c8f6L, + 0x68a7318d3f91e55fL } }, + /* 26 << 112 */ + { { 0xa00fd04efc4d3157L,0xb56f8ab22bf3bdeaL,0x014f56484fa57172L, + 0x948c5860450abdb3L }, + { 0x342b5df00ebd4f08L,0x3e5168cd0e82938eL,0x7aedc1ceb0df5dd0L, + 0x6bbbc6d9e5732516L } }, + /* 27 << 112 */ + { { 0xc7bfd486605daaa6L,0x46fd72b7bb9a6c9eL,0xe4847fb1a124fb89L, + 0x75959cbda2d8ffbcL }, + { 0x42579f65c8a588eeL,0x368c92e6b80b499dL,0xea4ef6cd999a5df1L, + 0xaa73bb7f936fe604L } }, + /* 28 << 112 */ + { { 0xf347a70d6457d188L,0x86eda86b8b7a388bL,0xb7cdff060ccd6013L, + 0xbeb1b6c7d0053fb2L }, + { 0x0b02238799240a9fL,0x1bbb384f776189b2L,0x8695e71e9066193aL, + 0x2eb5009706ffac7eL } }, + /* 29 << 112 */ + { { 0x0654a9c04a7d2caaL,0x6f3fb3d1a5aaa290L,0x835db041ff476e8fL, + 0x540b8b0bc42295e4L }, + { 0xa5c73ac905e214f5L,0x9a74075a56a0b638L,0x2e4b1090ce9e680bL, + 0x57a5b4796b8d9afaL } }, + /* 30 << 112 */ + { { 0x0dca48e726bfe65cL,0x097e391c7290c307L,0x683c462e6669e72eL, + 0xf505be1e062559acL }, + { 0x5fbe3ea1e3a3035aL,0x6431ebf69cd50da8L,0xfd169d5c1f6407f2L, + 0x8d838a9560fce6b8L } }, + /* 31 << 112 */ + { { 0x2a2bfa7f650006f0L,0xdfd7dad350c0fbb2L,0x92452495ccf9ad96L, + 0x183bf494d95635f9L }, + { 0x02d5df434a7bd989L,0x505385cca5431095L,0xdd98e67dfd43f53eL, + 0xd61e1a6c500c34a9L } }, + /* 32 << 112 */ + { { 0x5a4b46c64a8a3d62L,0x8469c4d0247743d2L,0x2bb3a13d88f7e433L, + 0x62b23a1001be5849L }, + { 0xe83596b4a63d1a4cL,0x454e7fea7d183f3eL,0x643fce6117afb01cL, + 0x4e65e5e61c4c3638L } }, + /* 33 << 112 */ + { { 0x41d85ea1ef74c45bL,0x2cfbfa66ae328506L,0x98b078f53ada7da9L, + 0xd985fe37ec752fbbL }, + { 0xeece68fe5a0148b4L,0x6f9a55c72d78136dL,0x232dccc4d2b729ceL, + 0xa27e0dfd90aafbc4L } }, + /* 34 << 112 */ + { { 0x9647445212b4603eL,0xa876c5516b706d14L,0xdf145fcf69a9d412L, + 0xe2ab75b72d479c34L }, + { 0x12df9a761a23ff97L,0xc61389925d359d10L,0x6e51c7aefa835f22L, + 0x69a79cb1c0fcc4d9L } }, + /* 35 << 112 */ + { { 0xf57f350d594cc7e1L,0x3079ca633350ab79L,0x226fb6149aff594aL, + 0x35afec026d59a62bL }, + { 0x9bee46f406ed2c6eL,0x58da17357d939a57L,0x44c504028fd1797eL, + 0xd8853e7c5ccea6caL } }, + /* 36 << 112 */ + { { 0x4065508da35fcd5fL,0x8965df8c495ccaebL,0x0f2da85012e1a962L, + 0xee471b94c1cf1cc4L }, + { 0xcef19bc80a08fb75L,0x704958f581de3591L,0x2867f8b23aef4f88L, + 0x8d749384ea9f9a5fL } }, + /* 37 << 112 */ + { { 0x1b3855378c9049f4L,0x5be948f37b92d8b6L,0xd96f725db6e2bd6bL, + 0x37a222bc958c454dL }, + { 0xe7c61abb8809bf61L,0x46f07fbc1346f18dL,0xfb567a7ae87c0d1cL, + 0x84a461c87ef3d07aL } }, + /* 38 << 112 */ + { { 0x0a5adce6d9278d98L,0x24d948139dfc73e1L,0x4f3528b6054321c3L, + 0x2e03fdde692ea706L }, + { 0x10e6061947b533c0L,0x1a8bc73f2ca3c055L,0xae58d4b21bb62b8fL, + 0xb2045a73584a24e3L } }, + /* 39 << 112 */ + { { 0x3ab3d5afbd76e195L,0x478dd1ad6938a810L,0x6ffab3936ee3d5cbL, + 0xdfb693db22b361e4L }, + { 0xf969449651dbf1a7L,0xcab4b4ef08a2e762L,0xe8c92f25d39bba9aL, + 0x850e61bcf1464d96L } }, + /* 40 << 112 */ + { { 0xb7e830e3dc09508bL,0xfaf6d2cf74317655L,0x72606cebdf690355L, + 0x48bb92b3d0c3ded6L }, + { 0x65b754845c7cf892L,0xf6cd7ac9d5d5f01fL,0xc2c30a5996401d69L, + 0x91268650ed921878L } }, + /* 41 << 112 */ + { { 0x380bf913b78c558fL,0x43c0baebc8afdaa9L,0x377f61d554f169d3L, + 0xf8da07e3ae5ff20bL }, + { 0xb676c49da8a90ea8L,0x81c1ff2b83a29b21L,0x383297ac2ad8d276L, + 0x3001122fba89f982L } }, + /* 42 << 112 */ + { { 0xe1d794be6718e448L,0x246c14827c3e6e13L,0x56646ef85d26b5efL, + 0x80f5091e88069cddL }, + { 0xc5992e2f724bdd38L,0x02e915b48471e8c7L,0x96ff320a0d0ff2a9L, + 0xbf8864874384d1a0L } }, + /* 43 << 112 */ + { { 0xbbe1e6a6c93f72d6L,0xd5f75d12cad800eaL,0xfa40a09fe7acf117L, + 0x32c8cdd57581a355L }, + { 0x742219927023c499L,0xa8afe5d738ec3901L,0x5691afcba90e83f0L, + 0x41bcaa030b8f8eacL } }, + /* 44 << 112 */ + { { 0xe38b5ff98d2668d5L,0x0715281a7ad81965L,0x1bc8fc7c03c6ce11L, + 0xcbbee6e28b650436L }, + { 0x06b00fe80cdb9808L,0x17d6e066fe3ed315L,0x2e9d38c64d0b5018L, + 0xab8bfd56844dcaefL } }, + /* 45 << 112 */ + { { 0x42894a59513aed8bL,0xf77f3b6d314bd07aL,0xbbdecb8f8e42b582L, + 0xf10e2fa8d2390fe6L }, + { 0xefb9502262a2f201L,0x4d59ea5050ee32b0L,0xd87f77286da789a8L, + 0xcf98a2cff79492c4L } }, + /* 46 << 112 */ + { { 0xf9577239720943c2L,0xba044cf53990b9d0L,0x5aa8e82395f2884aL, + 0x834de6ed0278a0afL }, + { 0xc8e1ee9a5f25bd12L,0x9259ceaa6f7ab271L,0x7e6d97a277d00b76L, + 0x5c0c6eeaa437832aL } }, + /* 47 << 112 */ + { { 0x5232c20f5606b81dL,0xabd7b3750d991ee5L,0x4d2bfe358632d951L, + 0x78f8514698ed9364L }, + { 0x951873f0f30c3282L,0x0da8ac80a789230bL,0x3ac7789c5398967fL, + 0xa69b8f7fbdda0fb5L } }, + /* 48 << 112 */ + { { 0xe5db77176add8545L,0x1b71cb6672c49b66L,0xd856073968421d77L, + 0x03840fe883e3afeaL }, + { 0xb391dad51ec69977L,0xae243fb9307f6726L,0xc88ac87be8ca160cL, + 0x5174cced4ce355f4L } }, + /* 49 << 112 */ + { { 0x98a35966e58ba37dL,0xfdcc8da27817335dL,0x5b75283083fbc7bfL, + 0x68e419d4d9c96984L }, + { 0x409a39f402a40380L,0x88940faf1fe977bcL,0xc640a94b8f8edea6L, + 0x1e22cd17ed11547dL } }, + /* 50 << 112 */ + { { 0xe28568ce59ffc3e2L,0x60aa1b55c1dee4e7L,0xc67497c8837cb363L, + 0x06fb438a105a2bf2L }, + { 0x30357ec4500d8e20L,0x1ad9095d0670db10L,0x7f589a05c73b7cfdL, + 0xf544607d880d6d28L } }, + /* 51 << 112 */ + { { 0x17ba93b1a20ef103L,0xad8591306ba6577bL,0x65c91cf66fa214a0L, + 0xd7d49c6c27990da5L }, + { 0xecd9ec8d20bb569dL,0xbd4b2502eeffbc33L,0x2056ca5a6bed0467L, + 0x7916a1f75b63728cL } }, + /* 52 << 112 */ + { { 0xd4f9497d53a4f566L,0x8973466497b56810L,0xf8e1da740494a621L, + 0x82546a938d011c68L }, + { 0x1f3acb19c61ac162L,0x52f8fa9cabad0d3eL,0x15356523b4b7ea43L, + 0x5a16ad61ae608125L } }, + /* 53 << 112 */ + { { 0xb0bcb87f4faed184L,0x5f236b1d5029f45fL,0xd42c76070bc6b1fcL, + 0xc644324e68aefce3L }, + { 0x8e191d595c5d8446L,0xc020807713ae1979L,0xadcaee553ba59cc7L, + 0x20ed6d6ba2cb81baL } }, + /* 54 << 112 */ + { { 0x0952ba19b6efcffcL,0x60f12d6897c0b87cL,0x4ee2c7c49caa30bcL, + 0x767238b797fbff4eL }, + { 0xebc73921501b5d92L,0x3279e3dfc2a37737L,0x9fc12bc86d197543L, + 0xfa94dc6f0a40db4eL } }, + /* 55 << 112 */ + { { 0x7392b41a530ccbbdL,0x87c82146ea823525L,0xa52f984c05d98d0cL, + 0x2ae57d735ef6974cL }, + { 0x9377f7bf3042a6ddL,0xb1a007c019647a64L,0xfaa9079a0cca9767L, + 0x3d81a25bf68f72d5L } }, + /* 56 << 112 */ + { { 0x752067f8ff81578eL,0x786221509045447dL,0xc0c22fcf0505aa6fL, + 0x1030f0a66bed1c77L }, + { 0x31f29f151f0bd739L,0x2d7989c7e6debe85L,0x5c070e728e677e98L, + 0x0a817bd306e81fd5L } }, + /* 57 << 112 */ + { { 0xc110d830b0f2ac95L,0x48d0995aab20e64eL,0x0f3e00e17729cd9aL, + 0x2a570c20dd556946L }, + { 0x912dbcfd4e86214dL,0x2d014ee2cf615498L,0x55e2b1e63530d76eL, + 0xc5135ae4fd0fd6d1L } }, + /* 58 << 112 */ + { { 0x0066273ad4f3049fL,0xbb8e9893e7087477L,0x2dba1ddb14c6e5fdL, + 0xdba3788651f57e6cL }, + { 0x5aaee0a65a72f2cfL,0x1208bfbf7bea5642L,0xf5c6aa3b67872c37L, + 0xd726e08343f93224L } }, + /* 59 << 112 */ + { { 0x1854daa5061f1658L,0xc0016df1df0cd2b3L,0xc2a3f23e833d50deL, + 0x73b681d2bbbd3017L }, + { 0x2f046dc43ac343c0L,0x9c847e7d85716421L,0xe1e13c910917eed4L, + 0x3fc9eebd63a1b9c6L } }, + /* 60 << 112 */ + { { 0x0f816a727fe02299L,0x6335ccc2294f3319L,0x3820179f4745c5beL, + 0xe647b782922f066eL }, + { 0xc22e49de02cafb8aL,0x299bc2fffcc2ecccL,0x9a8feea26e0e8282L, + 0xa627278bfe893205L } }, + /* 61 << 112 */ + { { 0xa7e197337933e47bL,0xf4ff6b132e766402L,0xa4d8be0a98440d9fL, + 0x658f5c2f38938808L }, + { 0x90b75677c95b3b3eL,0xfa0442693137b6ffL,0x077b039b43c47c29L, + 0xcca95dd38a6445b2L } }, + /* 62 << 112 */ + { { 0x0b498ba42333fc4cL,0x274f8e68f736a1b1L,0x6ca348fd5f1d4b2eL, + 0x24d3be78a8f10199L }, + { 0x8535f858ca14f530L,0xa6e7f1635b982e51L,0x847c851236e1bf62L, + 0xf6a7c58e03448418L } }, + /* 63 << 112 */ + { { 0x583f3703f9374ab6L,0x864f91956e564145L,0x33bc3f4822526d50L, + 0x9f323c801262a496L }, + { 0xaa97a7ae3f046a9aL,0x70da183edf8a039aL,0x5b68f71c52aa0ba6L, + 0x9be0fe5121459c2dL } }, + /* 64 << 112 */ + { { 0xc1e17eb6cbc613e5L,0x33131d55497ea61cL,0x2f69d39eaf7eded5L, + 0x73c2f434de6af11bL }, + { 0x4ca52493a4a375faL,0x5f06787cb833c5c2L,0x814e091f3e6e71cfL, + 0x76451f578b746666L } }, + /* 0 << 119 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 119 */ + { { 0x80f9bdef694db7e0L,0xedca8787b9fcddc6L,0x51981c3403b8dce1L, + 0x4274dcf170e10ba1L }, + { 0xf72743b86def6d1aL,0xd25b1670ebdb1866L,0xc4491e8c050c6f58L, + 0x2be2b2ab87fbd7f5L } }, + /* 2 << 119 */ + { { 0x3e0e5c9dd111f8ecL,0xbcc33f8db7c4e760L,0x702f9a91bd392a51L, + 0x7da4a795c132e92dL }, + { 0x1a0b0ae30bb1151bL,0x54febac802e32251L,0xea3a5082694e9e78L, + 0xe58ffec1e4fe40b8L } }, + /* 3 << 119 */ + { { 0xf85592fcd1e0cf9eL,0xdea75f0dc0e7b2e8L,0xc04215cfc135584eL, + 0x174fc7272f57092aL }, + { 0xe7277877eb930beaL,0x504caccb5eb02a5aL,0xf9fe08f7f5241b9bL, + 0xe7fb62f48d5ca954L } }, + /* 4 << 119 */ + { { 0xfbb8349d29c4120bL,0x9f94391fc0d0d915L,0xc4074fa75410ba51L, + 0xa66adbf6150a5911L }, + { 0xc164543c34bfca38L,0xe0f27560b9e1ccfcL,0x99da0f53e820219cL, + 0xe8234498c6b4997aL } }, + /* 5 << 119 */ + { { 0xcfb88b769d4c5423L,0x9e56eb10b0521c49L,0x418e0b5ebe8700a1L, + 0x00cbaad6f93cb58aL }, + { 0xe923fbded92a5e67L,0xca4979ac1f347f11L,0x89162d856bc0585bL, + 0xdd6254afac3c70e3L } }, + /* 6 << 119 */ + { { 0x7b23c513516e19e4L,0x56e2e847c5c4d593L,0x9f727d735ce71ef6L, + 0x5b6304a6f79a44c5L }, + { 0x6638a7363ab7e433L,0x1adea470fe742f83L,0xe054b8545b7fc19fL, + 0xf935381aba1d0698L } }, + /* 7 << 119 */ + { { 0x546eab2d799e9a74L,0x96239e0ea949f729L,0xca274c6b7090055aL, + 0x835142c39020c9b0L }, + { 0xa405667aa2e8807fL,0x29f2c0851aa3d39eL,0xcc555d6442fc72f5L, + 0xe856e0e7fbeacb3cL } }, + /* 8 << 119 */ + { { 0xb5504f9d918e4936L,0x65035ef6b2513982L,0x0553a0c26f4d9cb9L, + 0x6cb10d56bea85509L }, + { 0x48d957b7a242da11L,0x16a4d3dd672b7268L,0x3d7e637c8502a96bL, + 0x27c7032b730d463bL } }, + /* 9 << 119 */ + { { 0xbdc02b18e4136a14L,0xbacf969d678e32bfL,0xc98d89a3dd9c3c03L, + 0x7b92420a23becc4fL }, + { 0xd4b41f78c64d565cL,0x9f969d0010f28295L,0xec7f7f76b13d051aL, + 0x08945e1ea92da585L } }, + /* 10 << 119 */ + { { 0x55366b7d5846426fL,0xe7d09e89247d441dL,0x510b404d736fbf48L, + 0x7fa003d0e784bd7dL }, + { 0x25f7614f17fd9596L,0x49e0e0a135cb98dbL,0x2c65957b2e83a76aL, + 0x5d40da8dcddbe0f8L } }, + /* 11 << 119 */ + { { 0xf2b8c405050bad24L,0x8918426dc2aa4823L,0x2aeab3dda38365a7L, + 0x720317177c91b690L }, + { 0x8b00d69960a94120L,0x478a255de99eaeecL,0xbf656a5f6f60aafdL, + 0xdfd7cb755dee77b3L } }, + /* 12 << 119 */ + { { 0x37f68bb4a595939dL,0x0355647928740217L,0x8e740e7c84ad7612L, + 0xd89bc8439044695fL }, + { 0xf7f3da5d85a9184dL,0x562563bb9fc0b074L,0x06d2e6aaf88a888eL, + 0x612d8643161fbe7cL } }, + /* 13 << 119 */ + { { 0x465edba7f64085e7L,0xb230f30429aa8511L,0x53388426cda2d188L, + 0x908857354b666649L }, + { 0x6f02ff9a652f54f6L,0x65c822945fae2bf0L,0x7816ade062f5eee3L, + 0xdcdbdf43fcc56d70L } }, + /* 14 << 119 */ + { { 0x9fb3bba354530bb2L,0xbde3ef77cb0869eaL,0x89bc90460b431163L, + 0x4d03d7d2e4819a35L }, + { 0x33ae4f9e43b6a782L,0x216db3079c88a686L,0x91dd88e000ffedd9L, + 0xb280da9f12bd4840L } }, + /* 15 << 119 */ + { { 0x32a7cb8a1635e741L,0xfe14008a78be02a7L,0x3fafb3341b7ae030L, + 0x7fd508e75add0ce9L }, + { 0x72c83219d607ad51L,0x0f229c0a8d40964aL,0x1be2c3361c878da2L, + 0xe0c96742eab2ab86L } }, + /* 16 << 119 */ + { { 0x458f86913e538cd7L,0xa7001f6c8e08ad53L,0x52b8c6e6bf5d15ffL, + 0x548234a4011215ddL }, + { 0xff5a9d2d3d5b4045L,0xb0ffeeb64a904190L,0x55a3aca448607f8bL, + 0x8cbd665c30a0672aL } }, + /* 17 << 119 */ + { { 0x87f834e042583068L,0x02da2aebf3f6e683L,0x6b763e5d05c12248L, + 0x7230378f65a8aefcL }, + { 0x93bd80b571e8e5caL,0x53ab041cb3b62524L,0x1b8605136c9c552eL, + 0xe84d402cd5524e66L } }, + /* 18 << 119 */ + { { 0xa37f3573f37f5937L,0xeb0f6c7dd1e4fca5L,0x2965a554ac8ab0fcL, + 0x17fbf56c274676acL }, + { 0x2e2f6bd9acf7d720L,0x41fc8f8810224766L,0x517a14b385d53befL, + 0xdae327a57d76a7d1L } }, + /* 19 << 119 */ + { { 0x6ad0a065c4818267L,0x33aa189b37c1bbc1L,0x64970b5227392a92L, + 0x21699a1c2d1535eaL }, + { 0xcd20779cc2d7a7fdL,0xe318605999c83cf2L,0x9b69440b72c0b8c7L, + 0xa81497d77b9e0e4dL } }, + /* 20 << 119 */ + { { 0x515d5c891f5f82dcL,0x9a7f67d76361079eL,0xa8da81e311a35330L, + 0xe44990c44b18be1bL }, + { 0xc7d5ed95af103e59L,0xece8aba78dac9261L,0xbe82b0999394b8d3L, + 0x6830f09a16adfe83L } }, + /* 21 << 119 */ + { { 0x250a29b488172d01L,0x8b20bd65caff9e02L,0xb8a7661ee8a6329aL, + 0x4520304dd3fce920L }, + { 0xae45da1f2b47f7efL,0xe07f52885bffc540L,0xf79970093464f874L, + 0x2244c2cda6fa1f38L } }, + /* 22 << 119 */ + { { 0x43c41ac194d7d9b1L,0x5bafdd82c82e7f17L,0xdf0614c15fda0fcaL, + 0x74b043a7a8ae37adL }, + { 0x3ba6afa19e71734cL,0x15d5437e9c450f2eL,0x4a5883fe67e242b1L, + 0x5143bdc22c1953c2L } }, + /* 23 << 119 */ + { { 0x542b8b53fc5e8920L,0x363bf9a89a9cee08L,0x02375f10c3486e08L, + 0x2037543b8c5e70d2L }, + { 0x7109bccc625640b4L,0xcbc1051e8bc62c3bL,0xf8455fed803f26eaL, + 0x6badceabeb372424L } }, + /* 24 << 119 */ + { { 0xa2a9ce7c6b53f5f9L,0x642465951b176d99L,0xb1298d36b95c081bL, + 0x53505bb81d9a9ee6L }, + { 0x3f6f9e61f2ba70b0L,0xd07e16c98afad453L,0x9f1694bbe7eb4a6aL, + 0xdfebced93cb0bc8eL } }, + /* 25 << 119 */ + { { 0x92d3dcdc53868c8bL,0x174311a2386107a6L,0x4109e07c689b4e64L, + 0x30e4587f2df3dcb6L }, + { 0x841aea310811b3b2L,0x6144d41d0cce43eaL,0x464c45812a9a7803L, + 0xd03d371f3e158930L } }, + /* 26 << 119 */ + { { 0xc676d7f2b1f3390bL,0x9f7a1b8ca5b61272L,0x4ebebfc9c2e127a9L, + 0x4602500c5dd997bfL }, + { 0x7f09771c4711230fL,0x058eb37c020f09c1L,0xab693d4bfee5e38bL, + 0x9289eb1f4653cbc0L } }, + /* 27 << 119 */ + { { 0xbecf46abd51b9cf5L,0xd2aa9c029f0121afL,0x36aaf7d2e90dc274L, + 0x909e4ea048b95a3cL }, + { 0xe6b704966f32dbdbL,0x672188a08b030b3eL,0xeeffe5b3cfb617e2L, + 0x87e947de7c82709eL } }, + /* 28 << 119 */ + { { 0xa44d2b391770f5a7L,0xe4d4d7910e44eb82L,0x42e69d1e3f69712aL, + 0xbf11c4d6ac6a820eL }, + { 0xb5e7f3e542c4224cL,0xd6b4e81c449d941cL,0x5d72bd165450e878L, + 0x6a61e28aee25ac54L } }, + /* 29 << 119 */ + { { 0x33272094e6f1cd95L,0x7512f30d0d18673fL,0x32f7a4ca5afc1464L, + 0x2f0956566bbb977bL }, + { 0x586f47caa8226200L,0x02c868ad1ac07369L,0x4ef2b845c613acbeL, + 0x43d7563e0386054cL } }, + /* 30 << 119 */ + { { 0x54da9dc7ab952578L,0xb5423df226e84d0bL,0xa8b64eeb9b872042L, + 0xac2057825990f6dfL }, + { 0x4ff696eb21f4c77aL,0x1a79c3e4aab273afL,0x29bc922e9436b3f1L, + 0xff807ef8d6d9a27aL } }, + /* 31 << 119 */ + { { 0x82acea3d778f22a0L,0xfb10b2e85b5e7469L,0xc0b169802818ee7dL, + 0x011afff4c91c1a2fL }, + { 0x95a6d126ad124418L,0x31c081a5e72e295fL,0x36bb283af2f4db75L, + 0xd115540f7acef462L } }, + /* 32 << 119 */ + { { 0xc7f3a8f833f6746cL,0x21e46f65fea990caL,0x915fd5c5caddb0a9L, + 0xbd41f01678614555L }, + { 0x346f4434426ffb58L,0x8055943614dbc204L,0xf3dd20fe5a969b7fL, + 0x9d59e956e899a39aL } }, + /* 33 << 119 */ + { { 0xf1b0971c8ad4cf4bL,0x034488602ffb8fb8L,0xf071ac3c65340ba4L, + 0x408d0596b27fd758L }, + { 0xe7c78ea498c364b0L,0xa4aac4a5051e8ab5L,0xb9e1d560485d9002L, + 0x9acd518a88844455L } }, + /* 34 << 119 */ + { { 0xe4ca688fd06f56c0L,0xa48af70ddf027972L,0x691f0f045e9a609dL, + 0xa9dd82cdee61270eL }, + { 0x8903ca63a0ef18d3L,0x9fb7ee353d6ca3bdL,0xa7b4a09cabf47d03L, + 0x4cdada011c67de8eL } }, + /* 35 << 119 */ + { { 0x520037499355a244L,0xe77fd2b64f2151a9L,0x695d6cf666b4efcbL, + 0xc5a0cacfda2cfe25L }, + { 0x104efe5cef811865L,0xf52813e89ea5cc3dL,0x855683dc40b58dbcL, + 0x0338ecde175fcb11L } }, + /* 36 << 119 */ + { { 0xf9a0563774921592L,0xb4f1261db9bb9d31L,0x551429b74e9c5459L, + 0xbe182e6f6ea71f53L }, + { 0xd3a3b07cdfc50573L,0x9ba1afda62be8d44L,0x9bcfd2cb52ab65d3L, + 0xdf11d547a9571802L } }, + /* 37 << 119 */ + { { 0x099403ee02a2404aL,0x497406f421088a71L,0x994794095004ae71L, + 0xbdb42078a812c362L }, + { 0x2b72a30fd8828442L,0x283add27fcb5ed1cL,0xf7c0e20066a40015L, + 0x3e3be64108b295efL } }, + /* 38 << 119 */ + { { 0xac127dc1e038a675L,0x729deff38c5c6320L,0xb7df8fd4a90d2c53L, + 0x9b74b0ec681e7cd3L }, + { 0x5cb5a623dab407e5L,0xcdbd361576b340c6L,0xa184415a7d28392cL, + 0xc184c1d8e96f7830L } }, + /* 39 << 119 */ + { { 0xc3204f1981d3a80fL,0xfde0c841c8e02432L,0x78203b3e8149e0c1L, + 0x5904bdbb08053a73L }, + { 0x30fc1dd1101b6805L,0x43c223bc49aa6d49L,0x9ed671417a174087L, + 0x311469a0d5997008L } }, + /* 40 << 119 */ + { { 0xb189b6845e43fc61L,0xf3282375e0d3ab57L,0x4fa34b67b1181da8L, + 0x621ed0b299ee52b8L }, + { 0x9b178de1ad990676L,0xd51de67b56d54065L,0x2a2c27c47538c201L, + 0x33856ec838a40f5cL } }, + /* 41 << 119 */ + { { 0x2522fc15be6cdcdeL,0x1e603f339f0c6f89L,0x7994edc3103e30a6L, + 0x033a00db220c853eL }, + { 0xd3cfa409f7bb7fd7L,0x70f8781e462d18f6L,0xbbd82980687fe295L, + 0x6eef4c32595669f3L } }, + /* 42 << 119 */ + { { 0x86a9303b2f7e85c3L,0x5fce462171988f9bL,0x5b935bf6c138acb5L, + 0x30ea7d6725661212L }, + { 0xef1eb5f4e51ab9a2L,0x0587c98aae067c78L,0xb3ce1b3c77ca9ca6L, + 0x2a553d4d54b5f057L } }, + /* 43 << 119 */ + { { 0xc78982364da29ec2L,0xdbdd5d13b9c57316L,0xc57d6e6b2cd80d47L, + 0x80b460cffe9e7391L }, + { 0x98648cabf963c31eL,0x67f9f633cc4d32fdL,0x0af42a9dfdf7c687L, + 0x55f292a30b015ea7L } }, + /* 44 << 119 */ + { { 0x89e468b2cd21ab3dL,0xe504f022c393d392L,0xab21e1d4a5013af9L, + 0xe3283f78c2c28acbL }, + { 0xf38b35f6226bf99fL,0xe83542740e291e69L,0x61673a15b20c162dL, + 0xc101dc75b04fbdbeL } }, + /* 45 << 119 */ + { { 0x8323b4c2255bd617L,0x6c9696936c2a9154L,0xc6e6586062679387L, + 0x8e01db0cb8c88e23L }, + { 0x33c42873893a5559L,0x7630f04b47a3e149L,0xb5d80805ddcf35f8L, + 0x582ca08077dfe732L } }, + /* 46 << 119 */ + { { 0x2c7156e10b1894a0L,0x92034001d81c68c0L,0xed225d00c8b115b5L, + 0x237f9c2283b907f2L }, + { 0x0ea2f32f4470e2c0L,0xb725f7c158be4e95L,0x0f1dcafab1ae5463L, + 0x59ed51871ba2fc04L } }, + /* 47 << 119 */ + { { 0xf6e0f316d0115d4dL,0x5180b12fd3691599L,0x157e32c9527f0a41L, + 0x7b0b081da8e0ecc0L }, + { 0x6dbaaa8abf4f0dd0L,0x99b289c74d252696L,0x79b7755edbf864feL, + 0x6974e2b176cad3abL } }, + /* 48 << 119 */ + { { 0x35dbbee206ddd657L,0xe7cbdd112ff3a96dL,0x88381968076be758L, + 0x2d737e7208c91f5dL }, + { 0x5f83ab6286ec3776L,0x98aa649d945fa7a1L,0xf477ec3772ef0933L, + 0x66f52b1e098c17b1L } }, + /* 49 << 119 */ + { { 0x9eec58fbd803738bL,0x91aaade7e4e86aa4L,0x6b1ae617a5b51492L, + 0x63272121bbc45974L }, + { 0x7e0e28f0862c5129L,0x0a8f79a93321a4a0L,0xe26d16645041c88fL, + 0x0571b80553233e3aL } }, + /* 50 << 119 */ + { { 0xd1b0ccdec9520711L,0x55a9e4ed3c8b84bfL,0x9426bd39a1fef314L, + 0x4f5f638e6eb93f2bL }, + { 0xba2a1ed32bf9341bL,0xd63c13214d42d5a9L,0xd2964a89316dc7c5L, + 0xd1759606ca511851L } }, + /* 51 << 119 */ + { { 0xd8a9201ff9e6ed35L,0xb7b5ee456736925aL,0x0a83fbbc99581af7L, + 0x3076bc4064eeb051L }, + { 0x5511c98c02dec312L,0x270de898238dcb78L,0x2cf4cf9c539c08c9L, + 0xa70cb65e38d3b06eL } }, + /* 52 << 119 */ + { { 0xb12ec10ecfe57bbdL,0x82c7b65635a0c2b5L,0xddc7d5cd161c67bdL, + 0xe32e8985ae3a32ccL }, + { 0x7aba9444d11a5529L,0xe964ed022427fa1aL,0x1528392d24a1770aL, + 0xa152ce2c12c72fcdL } }, + /* 53 << 119 */ + { { 0x714553a48ec07649L,0x18b4c290459dd453L,0xea32b7147b64b110L, + 0xb871bfa52e6f07a2L }, + { 0xb67112e59e2e3c9bL,0xfbf250e544aa90f6L,0xf77aedb8bd539006L, + 0x3b0cdf9ad172a66fL } }, + /* 54 << 119 */ + { { 0xedf69feaf8c51187L,0x05bb67ec741e4da7L,0x47df0f3208114345L, + 0x56facb07bb9792b1L }, + { 0xf3e007e98f6229e4L,0x62d103f4526fba0fL,0x4f33bef7b0339d79L, + 0x9841357bb59bfec1L } }, + /* 55 << 119 */ + { { 0xfa8dbb59c34e6705L,0xc3c7180b7fdaa84cL,0xf95872fca4108537L, + 0x8750cc3b932a3e5aL }, + { 0xb61cc69db7275d7dL,0xffa0168b2e59b2e9L,0xca032abc6ecbb493L, + 0x1d86dbd32c9082d8L } }, + /* 56 << 119 */ + { { 0xae1e0b67e28ef5baL,0x2c9a4699cb18e169L,0x0ecd0e331e6bbd20L, + 0x571b360eaf5e81d2L }, + { 0xcd9fea58101c1d45L,0x6651788e18880452L,0xa99726351f8dd446L, + 0x44bed022e37281d0L } }, + /* 57 << 119 */ + { { 0x094b2b2d33da525dL,0xf193678e13144fd8L,0xb8ab5ba4f4c1061dL, + 0x4343b5fadccbe0f4L }, + { 0xa870237163812713L,0x47bf6d2df7611d93L,0x46729b8cbd21e1d7L, + 0x7484d4e0d629e77dL } }, + /* 58 << 119 */ + { { 0x830e6eea60dbac1fL,0x23d8c484da06a2f7L,0x896714b050ca535bL, + 0xdc8d3644ebd97a9bL }, + { 0x106ef9fab12177b4L,0xf79bf464534d5d9cL,0x2537a349a6ab360bL, + 0xc7c54253a00c744fL } }, + /* 59 << 119 */ + { { 0xb3c7a047e5911a76L,0x61ffa5c8647f1ee7L,0x15aed36f8f56ab42L, + 0x6a0d41b0a3ff9ac9L }, + { 0x68f469f5cc30d357L,0xbe9adf816b72be96L,0x1cd926fe903ad461L, + 0x7e89e38fcaca441bL } }, + /* 60 << 119 */ + { { 0xf0f82de5facf69d4L,0x363b7e764775344cL,0x6894f312b2e36d04L, + 0x3c6cb4fe11d1c9a5L }, + { 0x85d9c3394008e1f2L,0x5e9a85ea249f326cL,0xdc35c60a678c5e06L, + 0xc08b944f9f86fba9L } }, + /* 61 << 119 */ + { { 0xde40c02c89f71f0fL,0xad8f3e31ff3da3c0L,0x3ea5096b42125dedL, + 0x13879cbfa7379183L }, + { 0x6f4714a56b306a0bL,0x359c2ea667646c5eL,0xfacf894307726368L, + 0x07a5893565ff431eL } }, + /* 62 << 119 */ + { { 0x24d661d168754ab0L,0x801fce1d6f429a76L,0xc068a85fa58ce769L, + 0xedc35c545d5eca2bL }, + { 0xea31276fa3f660d1L,0xa0184ebeb8fc7167L,0x0f20f21a1d8db0aeL, + 0xd96d095f56c35e12L } }, + /* 63 << 119 */ + { { 0xedf402b5f8c2a25bL,0x1bb772b9059204b6L,0x50cbeae219b4e34cL, + 0x93109d803fa0845aL }, + { 0x54f7ccf78ef59fb5L,0x3b438fe288070963L,0x9e28c65931f3ba9bL, + 0x9cc31b46ead9da92L } }, + /* 64 << 119 */ + { { 0x3c2f0ba9b733aa5fL,0xdece47cbf05af235L,0xf8e3f715a2ac82a5L, + 0xc97ba6412203f18aL }, + { 0xc3af550409c11060L,0x56ea2c0546af512dL,0xfac28daff3f28146L, + 0x87fab43a959ef494L } }, + /* 0 << 126 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 126 */ + { { 0x09891641d4c5105fL,0x1ae80f8e6d7fbd65L,0x9d67225fbee6bdb0L, + 0x3b433b597fc4d860L }, + { 0x44e66db693e85638L,0xf7b59252e3e9862fL,0xdb785157665c32ecL, + 0x702fefd7ae362f50L } }, + /* 2 << 126 */ + { { 0x3754475d0fefb0c3L,0xd48fb56b46d7c35dL,0xa070b633363798a4L, + 0xae89f3d28fdb98e6L }, + { 0x970b89c86363d14cL,0x8981752167abd27dL,0x9bf7d47444d5a021L, + 0xb3083bafcac72aeeL } }, + /* 3 << 126 */ + { { 0x389741debe949a44L,0x638e9388546a4fa5L,0x3fe6419ca0047bdcL, + 0x7047f648aaea57caL }, + { 0x54e48a9041fbab17L,0xda8e0b28576bdba2L,0xe807eebcc72afddcL, + 0x07d3336df42577bfL } }, + /* 4 << 126 */ + { { 0x62a8c244bfe20925L,0x91c19ac38fdce867L,0x5a96a5d5dd387063L, + 0x61d587d421d324f6L }, + { 0xe87673a2a37173eaL,0x2384800853778b65L,0x10f8441e05bab43eL, + 0xfa11fe124621efbeL } }, + /* 5 << 126 */ + { { 0x047b772e81685d7bL,0x23f27d81bf34a976L,0xc27608e2915f48efL, + 0x3b0b43faa521d5c3L }, + { 0x7613fb2663ca7284L,0x7f5729b41d4db837L,0x87b14898583b526bL, + 0x00b732a6bbadd3d1L } }, + /* 6 << 126 */ + { { 0x8e02f4262048e396L,0x436b50b6383d9de4L,0xf78d3481471e85adL, + 0x8b01ea6ad005c8d6L }, + { 0xd3c7afee97015c07L,0x46cdf1a94e3ba2aeL,0x7a42e50183d3a1d2L, + 0xd54b5268b541dff4L } }, + /* 7 << 126 */ + { { 0x3f24cf304e23e9bcL,0x4387f816126e3624L,0x26a46a033b0b6d61L, + 0xaf1bc8458b2d777cL }, + { 0x25c401ba527de79cL,0x0e1346d44261bbb6L,0x4b96c44b287b4bc7L, + 0x658493c75254562fL } }, + /* 8 << 126 */ + { { 0x23f949feb8a24a20L,0x17ebfed1f52ca53fL,0x9b691bbebcfb4853L, + 0x5617ff6b6278a05dL }, + { 0x241b34c5e3c99ebdL,0xfc64242e1784156aL,0x4206482f695d67dfL, + 0xb967ce0eee27c011L } }, + /* 9 << 126 */ + { { 0x65db375121c80b5dL,0x2e7a563ca31ecca0L,0xe56ffc4e5238a07eL, + 0x3d6c296632ced854L }, + { 0xe99d7d1aaf70b885L,0xafc3bad92d686459L,0x9c78bf460cc8ba5bL, + 0x5a43951918955aa3L } }, + /* 10 << 126 */ + { { 0xf8b517a85fe4e314L,0xe60234d0fcb8906fL,0xffe542acf2061b23L, + 0x287e191f6b4cb59cL }, + { 0x21857ddc09d877d8L,0x1c23478c14678941L,0xbbf0c056b6e05ea4L, + 0x82da4b53b01594feL } }, + /* 11 << 126 */ + { { 0xf7526791fadb8608L,0x049e832d7b74cdf6L,0xa43581ccc2b90a34L, + 0x73639eb89360b10cL }, + { 0x4fba331fe1e4a71bL,0x6ffd6b938072f919L,0x6e53271c65679032L, + 0x67206444f14272ceL } }, + /* 12 << 126 */ + { { 0xc0f734a3b2335834L,0x9526205a90ef6860L,0xcb8be71704e2bb0dL, + 0x2418871e02f383faL }, + { 0xd71776814082c157L,0xcc914ad029c20073L,0xf186c1ebe587e728L, + 0x6fdb3c2261bcd5fdL } }, + /* 13 << 126 */ + { { 0x30d014a6f2f9f8e9L,0x963ece234fec49d2L,0x862025c59605a8d9L, + 0x3987444519f8929aL }, + { 0x01b6ff6512bf476aL,0x598a64d809cf7d91L,0xd7ec774993be56caL, + 0x10899785cbb33615L } }, + /* 14 << 126 */ + { { 0xb8a092fd02eee3adL,0xa86b3d3530145270L,0x323d98c68512b675L, + 0x4b8bc78562ebb40fL }, + { 0x7d301f54413f9cdeL,0xa5e4fb4f2bab5664L,0x1d2b252d1cbfec23L, + 0xfcd576bbe177120dL } }, + /* 15 << 126 */ + { { 0x04427d3e83731a34L,0x2bb9028eed836e8eL,0xb36acff8b612ca7cL, + 0xb88fe5efd3d9c73aL }, + { 0xbe2a6bc6edea4eb3L,0x43b93133488eec77L,0xf41ff566b17106e1L, + 0x469e9172654efa32L } }, + /* 16 << 126 */ + { { 0xb4480f0441c23fa3L,0xb4712eb0c1989a2eL,0x3ccbba0f93a29ca7L, + 0x6e205c14d619428cL }, + { 0x90db7957b3641686L,0x0432691d45ac8b4eL,0x07a759acf64e0350L, + 0x0514d89c9c972517L } }, + /* 17 << 126 */ + { { 0x1701147fa8e67fc3L,0x9e2e0b8bab2085beL,0xd5651824ac284e57L, + 0x890d432574893664L }, + { 0x8a7c5e6ec55e68a3L,0xbf12e90b4339c85aL,0x31846b85f922b655L, + 0x9a54ce4d0bf4d700L } }, + /* 18 << 126 */ + { { 0xd7f4e83af1a14295L,0x916f955cb285d4f9L,0xe57bb0e099ffdabaL, + 0x28a43034eab0d152L }, + { 0x0a36ffa2b8a9cef8L,0x5517407eb9ec051aL,0x9c796096ea68e672L, + 0x853db5fbfb3c77fbL } }, + /* 19 << 126 */ + { { 0x21474ba9e864a51aL,0x6c2676996e8a1b8bL,0x7c82362694120a28L, + 0xe61e9a488383a5dbL }, + { 0x7dd750039f84216dL,0xab020d07ad43cd85L,0x9437ae48da12c659L, + 0x6449c2ebe65452adL } }, + /* 20 << 126 */ + { { 0xcc7c4c1c2cf9d7c1L,0x1320886aee95e5abL,0xbb7b9056beae170cL, + 0xc8a5b250dbc0d662L }, + { 0x4ed81432c11d2303L,0x7da669121f03769fL,0x3ac7a5fd84539828L, + 0x14dada943bccdd02L } }, + /* 21 << 126 */ + { { 0x8b84c3217ef6b0d1L,0x52a9477a7c933f22L,0x5ef6728afd440b82L, + 0x5c3bd8596ce4bd5eL }, + { 0x918b80f5f22c2d3eL,0x368d5040b7bb6cc5L,0xb66142a12695a11cL, + 0x60ac583aeb19ea70L } }, + /* 22 << 126 */ + { { 0x317cbb980eab2437L,0x8cc08c555e2654c8L,0xfe2d6520e6d8307fL, + 0xe9f147f357428993L }, + { 0x5f9c7d14d2fd6cf1L,0xa3ecd0642d4fcbb0L,0xad83fef08e7341f7L, + 0x643f23a03a63115cL } }, + /* 23 << 126 */ + { { 0xd38a78abe65ab743L,0xbf7c75b135edc89cL,0x3dd8752e530df568L, + 0xf85c4a76e308c682L }, + { 0x4c9955b2e68acf37L,0xa544df3dab32af85L,0x4b8ec3f5a25cf493L, + 0x4d8f27641a622febL } }, + /* 24 << 126 */ + { { 0x7bb4f7aaf0dcbc49L,0x7de551f970bbb45bL,0xcfd0f3e49f2ca2e5L, + 0xece587091f5c76efL }, + { 0x32920edd167d79aeL,0x039df8a2fa7d7ec1L,0xf46206c0bb30af91L, + 0x1ff5e2f522676b59L } }, + /* 25 << 126 */ + { { 0x11f4a0396ea51d66L,0x506c1445807d7a26L,0x60da5705755a9b24L, + 0x8fc8cc321f1a319eL }, + { 0x83642d4d9433d67dL,0x7fa5cb8f6a7dd296L,0x576591db9b7bde07L, + 0x13173d25419716fbL } }, + /* 26 << 126 */ + { { 0xea30599dd5b340ffL,0xfc6b5297b0fe76c5L,0x1c6968c8ab8f5adcL, + 0xf723c7f5901c928dL }, + { 0x4203c3219773d402L,0xdf7c6aa31b51dd47L,0x3d49e37a552be23cL, + 0x57febee80b5a6e87L } }, + /* 27 << 126 */ + { { 0xc5ecbee47bd8e739L,0x79d44994ae63bf75L,0x168bd00f38fb8923L, + 0x75d48ee4d0533130L }, + { 0x554f77aadb5cdf33L,0x3396e8963c696769L,0x2fdddbf2d3fd674eL, + 0xbbb8f6ee99d0e3e5L } }, + /* 28 << 126 */ + { { 0x51b90651cbae2f70L,0xefc4bc0593aaa8ebL,0x8ecd8689dd1df499L, + 0x1aee99a822f367a5L }, + { 0x95d485b9ae8274c5L,0x6c14d4457d30b39cL,0xbafea90bbcc1ef81L, + 0x7c5f317aa459a2edL } }, + /* 29 << 126 */ + { { 0x012110754ef44227L,0xa17bed6edc20f496L,0x0cdfe424819853cdL, + 0x13793298f71e2ce7L }, + { 0x3c1f3078dbbe307bL,0x6dd1c20e76ee9936L,0x23ee4b57423caa20L, + 0x4ac3793b8efb840eL } }, + /* 30 << 126 */ + { { 0x934438ebed1f8ca0L,0x3e5466584ebb25a2L,0xc415af0ec069896fL, + 0xc13eddb09a5aa43dL }, + { 0x7a04204fd49eb8f6L,0xd0d5bdfcd74f1670L,0x3697e28656fc0558L, + 0x1020737101cebadeL } }, + /* 31 << 126 */ + { { 0x5f87e6900647a82bL,0x908e0ed48f40054fL,0xa9f633d479853803L, + 0x8ed13c9a4a28b252L }, + { 0x3e2ef6761f460f64L,0x53930b9b36d06336L,0x347073ac8fc4979bL, + 0x84380e0e5ecd5597L } }, + /* 32 << 126 */ + { { 0xe3b22c6bc4fe3c39L,0xba4a81536c7bebdfL,0xf23ab6b725693459L, + 0x53bc377014922b11L }, + { 0x4645c8ab5afc60dbL,0xaa02235520b9f2a3L,0x52a2954cce0fc507L, + 0x8c2731bb7ce1c2e7L } }, + /* 33 << 126 */ + { { 0xf39608ab18a0339dL,0xac7a658d3735436cL,0xb22c2b07cd992b4fL, + 0x4e83daecf40dcfd4L }, + { 0x8a34c7be2f39ea3eL,0xef0c005fb0a56d2eL,0x62731f6a6edd8038L, + 0x5721d7404e3cb075L } }, + /* 34 << 126 */ + { { 0x1ea41511fbeeee1bL,0xd1ef5e73ef1d0c05L,0x42feefd173c07d35L, + 0xe530a00a8a329493L }, + { 0x5d55b7fef15ebfb0L,0x549de03cd322491aL,0xf7b5f602745b3237L, + 0x3632a3a21ab6e2b6L } }, + /* 35 << 126 */ + { { 0x0d3bba890ef59f78L,0x0dfc6443c9e52b9aL,0x1dc7969972631447L, + 0xef033917b3be20b1L }, + { 0x0c92735db1383948L,0xc1fc29a2c0dd7d7dL,0x6485b697403ed068L, + 0x13bfaab3aac93bdcL } }, + /* 36 << 126 */ + { { 0x410dc6a90deeaf52L,0xb003fb024c641c15L,0x1384978c5bc504c4L, + 0x37640487864a6a77L }, + { 0x05991bc6222a77daL,0x62260a575e47eb11L,0xc7af6613f21b432cL, + 0x22f3acc9ab4953e9L } }, + /* 37 << 126 */ + { { 0x529349228e41d155L,0x4d0245683ac059efL,0xb02017554d884411L, + 0xce8055cfa59a178fL }, + { 0xcd77d1aff6204549L,0xa0a00a3ec7066759L,0x471071ef0272c229L, + 0x009bcf6bd3c4b6b0L } }, + /* 38 << 126 */ + { { 0x2a2638a822305177L,0xd51d59df41645bbfL,0xa81142fdc0a7a3c0L, + 0xa17eca6d4c7063eeL }, + { 0x0bb887ed60d9dcecL,0xd6d28e5120ad2455L,0xebed6308a67102baL, + 0x042c31148bffa408L } }, + /* 39 << 126 */ + { { 0xfd099ac58aa68e30L,0x7a6a3d7c1483513eL,0xffcc6b75ba2d8f0cL, + 0x54dacf961e78b954L }, + { 0xf645696fa4a9af89L,0x3a41194006ac98ecL,0x41b8b3f622a67a20L, + 0x2d0b1e0f99dec626L } }, + /* 40 << 126 */ + { { 0x27c8919240be34e8L,0xc7162b3791907f35L,0x90188ec1a956702bL, + 0xca132f7ddf93769cL }, + { 0x3ece44f90e2025b4L,0x67aaec690c62f14cL,0xad74141822e3cc11L, + 0xcf9b75c37ff9a50eL } }, + /* 41 << 126 */ + { { 0x02fa2b164d348272L,0xbd99d61a9959d56dL,0xbc4f19db18762916L, + 0xcc7cce5049c1ac80L }, + { 0x4d59ebaad846bd83L,0x8775a9dca9202849L,0x07ec4ae16e1f4ca9L, + 0x27eb5875ba893f11L } }, + /* 42 << 126 */ + { { 0x00284d51662cc565L,0x82353a6b0db4138dL,0xd9c7aaaaaa32a594L, + 0xf5528b5ea5669c47L }, + { 0xf32202312f23c5ffL,0xe3e8147a6affa3a1L,0xfb423d5c202ddda0L, + 0x3d6414ac6b871bd4L } }, + /* 43 << 126 */ + { { 0x586f82e1a51a168aL,0xb712c67148ae5448L,0x9a2e4bd176233eb8L, + 0x0188223a78811ca9L }, + { 0x553c5e21f7c18de1L,0x7682e451b27bb286L,0x3ed036b30e51e929L, + 0xf487211bec9cb34fL } }, + /* 44 << 126 */ + { { 0x0d0942770c24efc8L,0x0349fd04bef737a4L,0x6d1c9dd2514cdd28L, + 0x29c135ff30da9521L }, + { 0xea6e4508f78b0b6fL,0x176f5dd2678c143cL,0x081484184be21e65L, + 0x27f7525ce7df38c4L } }, + /* 45 << 126 */ + { { 0x1fb70e09748ab1a4L,0x9cba50a05efe4433L,0x7846c7a615f75af2L, + 0x2a7c2c575ee73ea8L }, + { 0x42e566a43f0a449aL,0x45474c3bad90fc3dL,0x7447be3d8b61d057L, + 0x3e9d1cf13a4ec092L } }, + /* 46 << 126 */ + { { 0x1603e453f380a6e6L,0x0b86e4319b1437c2L,0x7a4173f2ef29610aL, + 0x8fa729a7f03d57f7L }, + { 0x3e186f6e6c9c217eL,0xbe1d307991919524L,0x92a62a70153d4fb1L, + 0x32ed3e34d68c2f71L } }, + /* 47 << 126 */ + { { 0xd785027f9eb1a8b7L,0xbc37eb77c5b22fe8L,0x466b34f0b9d6a191L, + 0x008a89af9a05f816L }, + { 0x19b028fb7d42c10aL,0x7fe8c92f49b3f6b8L,0x58907cc0a5a0ade3L, + 0xb3154f51559d1a7cL } }, + /* 48 << 126 */ + { { 0x5066efb6d9790ed6L,0xa77a0cbca6aa793bL,0x1a915f3c223e042eL, + 0x1c5def0469c5874bL }, + { 0x0e83007873b6c1daL,0x55cf85d2fcd8557aL,0x0f7c7c760460f3b1L, + 0x87052acb46e58063L } }, + /* 49 << 126 */ + { { 0x09212b80907eae66L,0x3cb068e04d721c89L,0xa87941aedd45ac1cL, + 0xde8d5c0d0daa0dbbL }, + { 0xda421fdce3502e6eL,0xc89442014d89a084L,0x7307ba5ef0c24bfbL, + 0xda212beb20bde0efL } }, + /* 50 << 126 */ + { { 0xea2da24bf82ce682L,0x058d381607f71fe4L,0x35a024625ffad8deL, + 0xcd7b05dcaadcefabL }, + { 0xd442f8ed1d9f54ecL,0x8be3d618b2d3b5caL,0xe2220ed0e06b2ce2L, + 0x82699a5f1b0da4c0L } }, + /* 51 << 126 */ + { { 0x3ff106f571c0c3a7L,0x8f580f5a0d34180cL,0x4ebb120e22d7d375L, + 0x5e5782cce9513675L }, + { 0x2275580c99c82a70L,0xe8359fbf15ea8c4cL,0x53b48db87b415e70L, + 0xaacf2240100c6014L } }, + /* 52 << 126 */ + { { 0x9faaccf5e4652f1dL,0xbd6fdd2ad56157b2L,0xa4f4fb1f6261ec50L, + 0x244e55ad476bcd52L }, + { 0x881c9305047d320bL,0x1ca983d56181263fL,0x354e9a44278fb8eeL, + 0xad2dbc0f396e4964L } }, + /* 53 << 126 */ + { { 0x723f3aa29268b3deL,0x0d1ca29ae6e0609aL,0x794866aa6cf44252L, + 0x0b59f3e301af87edL }, + { 0xe234e5ff7f4a6c51L,0xa8768fd261dc2f7eL,0xdafc73320a94d81fL, + 0xd7f8428206938ce1L } }, + /* 54 << 126 */ + { { 0xae0b3c0e0546063eL,0x7fbadcb25d61abc6L,0xd5d7a2c9369ac400L, + 0xa5978d09ae67d10cL }, + { 0x290f211e4f85eaacL,0xe61e2ad1facac681L,0xae125225388384cdL, + 0xa7fb68e9ccfde30fL } }, + /* 55 << 126 */ + { { 0x7a59b9363daed4c2L,0x80a9aa402606f789L,0xb40c1ea5f6a6d90aL, + 0x948364d3514d5885L }, + { 0x062ebc6070985182L,0xa6db5b0e33310895L,0x64a12175e329c2f5L, + 0xc5f25bd290ea237eL } }, + /* 56 << 126 */ + { { 0x7915c5242d0a4c23L,0xeb5d26e46bb3cc52L,0x369a9116c09e2c92L, + 0x0c527f92cf182cf8L }, + { 0x9e5919382aede0acL,0xb29222086cc34939L,0x3c9d896299a34361L, + 0x3c81836dc1905fe6L } }, + /* 57 << 126 */ + { { 0x4bfeb57fa001ec5aL,0xe993f5bba0dc5dbaL,0x47884109724a1380L, + 0x8a0369ab32fe9a04L }, + { 0xea068d608c927db8L,0xbf5f37cf94655741L,0x47d402a204b6c7eaL, + 0x4551c2956af259cbL } }, + /* 58 << 126 */ + { { 0x698b71e7ed77ee8bL,0xbddf7bd0f309d5c7L,0x6201c22c34e780caL, + 0xab04f7d84c295ef4L }, + { 0x1c9472944313a8ceL,0xe532e4ac92ca4cfeL,0x89738f80d0a7a97aL, + 0xec088c88a580fd5bL } }, + /* 59 << 126 */ + { { 0x612b1ecc42ce9e51L,0x8f9840fdb25fdd2aL,0x3cda78c001e7f839L, + 0x546b3d3aece05480L }, + { 0x271719a980d30916L,0x45497107584c20c4L,0xaf8f94785bc78608L, + 0x28c7d484277e2a4cL } }, + /* 60 << 126 */ + { { 0xfce0176788a2ffe4L,0xdc506a3528e169a5L,0x0ea108617af9c93aL, + 0x1ed2436103fa0e08L }, + { 0x96eaaa92a3d694e7L,0xc0f43b4def50bc74L,0xce6aa58c64114db4L, + 0x8218e8ea7c000fd4L } }, + /* 61 << 126 */ + { { 0xac815dfb185f8844L,0xcd7e90cb1557abfbL,0x23d16655afbfecdfL, + 0x80f3271f085cac4aL }, + { 0x7fc39aa7d0e62f47L,0x88d519d1460a48e5L,0x59559ac4d28f101eL, + 0x7981d9e9ca9ae816L } }, + /* 62 << 126 */ + { { 0x5c38652c9ac38203L,0x86eaf87f57657fe5L,0x568fc472e21f5416L, + 0x2afff39ce7e597b5L }, + { 0x3adbbb07256d4eabL,0x225986928285ab89L,0x35f8112a041caefeL, + 0x95df02e3a5064c8bL } }, + /* 63 << 126 */ + { { 0x4d63356ec7004bf3L,0x230a08f4db83c7deL,0xca27b2708709a7b7L, + 0x0d1c4cc4cb9abd2dL }, + { 0x8a0bc66e7550fee8L,0x369cd4c79cf7247eL,0x75562e8492b5b7e7L, + 0x8fed0da05802af7bL } }, + /* 64 << 126 */ + { { 0x6a7091c2e48fb889L,0x26882c137b8a9d06L,0xa24986631b82a0e2L, + 0x844ed7363518152dL }, + { 0x282f476fd86e27c7L,0xa04edaca04afefdcL,0x8b256ebc6119e34dL, + 0x56a413e90787d78bL } }, + /* 0 << 133 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 133 */ + { { 0x82ee061d5a74be50L,0xe41781c4dea16ff5L,0xe0b0c81e99bfc8a2L, + 0x624f4d690b547e2dL }, + { 0x3a83545dbdcc9ae4L,0x2573dbb6409b1e8eL,0x482960c4a6c93539L, + 0xf01059ad5ae18798L } }, + /* 2 << 133 */ + { { 0x715c9f973112795fL,0xe8244437984e6ee1L,0x55cb4858ecb66bcdL, + 0x7c136735abaffbeeL }, + { 0x546615955dbec38eL,0x51c0782c388ad153L,0x9ba4c53ac6e0952fL, + 0x27e6782a1b21dfa8L } }, + /* 3 << 133 */ + { { 0x682f903d4ed2dbc2L,0x0eba59c87c3b2d83L,0x8e9dc84d9c7e9335L, + 0x5f9b21b00eb226d7L }, + { 0xe33bd394af267baeL,0xaa86cc25be2e15aeL,0x4f0bf67d6a8ec500L, + 0x5846aa44f9630658L } }, + /* 4 << 133 */ + { { 0xfeb09740e2c2bf15L,0x627a2205a9e99704L,0xec8d73d0c2fbc565L, + 0x223eed8fc20c8de8L }, + { 0x1ee32583a8363b49L,0x1a0b6cb9c9c2b0a6L,0x49f7c3d290dbc85cL, + 0xa8dfbb971ef4c1acL } }, + /* 5 << 133 */ + { { 0xafb34d4c65c7c2abL,0x1d4610e7e2c5ea84L,0x893f6d1b973c4ab5L, + 0xa3cdd7e9945ba5c4L }, + { 0x60514983064417eeL,0x1459b23cad6bdf2bL,0x23b2c3415cf726c3L, + 0x3a82963532d6354aL } }, + /* 6 << 133 */ + { { 0x294f901fab192c18L,0xec5fcbfe7030164fL,0xe2e2fcb7e2246ba6L, + 0x1e7c88b3221a1a0cL }, + { 0x72c7dd93c92d88c5L,0x41c2148e1106fb59L,0x547dd4f5a0f60f14L, + 0xed9b52b263960f31L } }, + /* 7 << 133 */ + { { 0x6c8349ebb0a5b358L,0xb154c5c29e7e2ed6L,0xcad5eccfeda462dbL, + 0xf2d6dbe42de66b69L }, + { 0x426aedf38665e5b2L,0x488a85137b7f5723L,0x15cc43b38bcbb386L, + 0x27ad0af3d791d879L } }, + /* 8 << 133 */ + { { 0xc16c236e846e364fL,0x7f33527cdea50ca0L,0xc48107750926b86dL, + 0x6c2a36090598e70cL }, + { 0xa6755e52f024e924L,0xe0fa07a49db4afcaL,0x15c3ce7d66831790L, + 0x5b4ef350a6cbb0d6L } }, + /* 9 << 133 */ + { { 0x2c4aafc4b6205969L,0x42563f02f6c7854fL,0x016aced51d983b48L, + 0xfeb356d899949755L }, + { 0x8c2a2c81d1a39bd7L,0x8f44340fe6934ae9L,0x148cf91c447904daL, + 0x7340185f0f51a926L } }, + /* 10 << 133 */ + { { 0x2f8f00fb7409ab46L,0x057e78e680e289b2L,0x03e5022ca888e5d1L, + 0x3c87111a9dede4e2L }, + { 0x5b9b0e1c7809460bL,0xe751c85271c9abc7L,0x8b944e28c7cc1dc9L, + 0x4f201ffa1d3cfa08L } }, + /* 11 << 133 */ + { { 0x02fc905c3e6721ceL,0xd52d70dad0b3674cL,0x5dc2e5ca18810da4L, + 0xa984b2735c69dd99L }, + { 0x63b9252784de5ca4L,0x2f1c9872c852dec4L,0x18b03593c2e3de09L, + 0x19d70b019813dc2fL } }, + /* 12 << 133 */ + { { 0x42806b2da6dc1d29L,0xd3030009f871e144L,0xa1feb333aaf49276L, + 0xb5583b9ec70bc04bL }, + { 0x1db0be7895695f20L,0xfc84181189d012b5L,0x6409f27205f61643L, + 0x40d34174d5883128L } }, + /* 13 << 133 */ + { { 0xd79196f567419833L,0x6059e252863b7b08L,0x84da18171c56700cL, + 0x5758ee56b28d3ec4L }, + { 0x7da2771d013b0ea6L,0xfddf524b54c5e9b9L,0x7df4faf824305d80L, + 0x58f5c1bf3a97763fL } }, + /* 14 << 133 */ + { { 0xa5af37f17c696042L,0xd4cba22c4a2538deL,0x211cb9959ea42600L, + 0xcd105f417b069889L }, + { 0xb1e1cf19ddb81e74L,0x472f2d895157b8caL,0x086fb008ee9db885L, + 0x365cd5700f26d131L } }, + /* 15 << 133 */ + { { 0x284b02bba2be7053L,0xdcbbf7c67ab9a6d6L,0x4425559c20f7a530L, + 0x961f2dfa188767c8L }, + { 0xe2fd943570dc80c4L,0x104d6b63f0784120L,0x7f592bc153567122L, + 0xf6bc1246f688ad77L } }, + /* 16 << 133 */ + { { 0x05214c050f15dde9L,0xa47a76a80d5f2b82L,0xbb254d3062e82b62L, + 0x11a05fe03ec955eeL }, + { 0x7eaff46e9d529b36L,0x55ab13018f9e3df6L,0xc463e37199317698L, + 0xfd251438ccda47adL } }, + /* 17 << 133 */ + { { 0xca9c354723d695eaL,0x48ce626e16e589b5L,0x6b5b64c7b187d086L, + 0xd02e1794b2207948L }, + { 0x8b58e98f7198111dL,0x90ca6305dcf9c3ccL,0x5691fe72f34089b0L, + 0x60941af1fc7c80ffL } }, + /* 18 << 133 */ + { { 0xa09bc0a222eb51e5L,0xc0bb7244aa9cf09aL,0x36a8077f80159f06L, + 0x8b5c989edddc560eL }, + { 0x19d2f316512e1f43L,0x02eac554ad08ff62L,0x012ab84c07d20b4eL, + 0x37d1e115d6d4e4e1L } }, + /* 19 << 133 */ + { { 0xb6443e1aab7b19a8L,0xf08d067edef8cd45L,0x63adf3e9685e03daL, + 0xcf15a10e4792b916L }, + { 0xf44bcce5b738a425L,0xebe131d59636b2fdL,0x940688417850d605L, + 0x09684eaab40d749dL } }, + /* 20 << 133 */ + { { 0x8c3c669c72ba075bL,0x89f78b55ba469015L,0x5706aade3e9f8ba8L, + 0x6d8bd565b32d7ed7L }, + { 0x25f4e63b805f08d6L,0x7f48200dc3bcc1b5L,0x4e801968b025d847L, + 0x74afac0487cbe0a8L } }, + /* 21 << 133 */ + { { 0x43ed2c2b7e63d690L,0xefb6bbf00223cdb8L,0x4fec3cae2884d3feL, + 0x065ecce6d75e25a4L }, + { 0x6c2294ce69f79071L,0x0d9a8e5f044b8666L,0x5009f23817b69d8fL, + 0x3c29f8fec5dfdaf7L } }, + /* 22 << 133 */ + { { 0x9067528febae68c4L,0x5b38563230c5ba21L,0x540df1191fdd1aecL, + 0xcf37825bcfba4c78L }, + { 0x77eff980beb11454L,0x40a1a99160c1b066L,0xe8018980f889a1c7L, + 0xb9c52ae976c24be0L } }, + /* 23 << 133 */ + { { 0x05fbbcce45650ef4L,0xae000f108aa29ac7L,0x884b71724f04c470L, + 0x7cd4fde219bb5c25L }, + { 0x6477b22ae8840869L,0xa88688595fbd0686L,0xf23cc02e1116dfbaL, + 0x76cd563fd87d7776L } }, + /* 24 << 133 */ + { { 0xe2a37598a9d82abfL,0x5f188ccbe6c170f5L,0x816822005066b087L, + 0xda22c212c7155adaL }, + { 0x151e5d3afbddb479L,0x4b606b846d715b99L,0x4a73b54bf997cb2eL, + 0x9a1bfe433ecd8b66L } }, + /* 25 << 133 */ + { { 0x1c3128092a67d48aL,0xcd6a671e031fa9e2L,0xbec3312a0e43a34aL, + 0x1d93563955ef47d3L }, + { 0x5ea024898fea73eaL,0x8247b364a035afb2L,0xb58300a65265b54cL, + 0x3286662f722c7148L } }, + /* 26 << 133 */ + { { 0xb77fd76bb4ec4c20L,0xf0a12fa70f3fe3fdL,0xf845bbf541d8c7e8L, + 0xe4d969ca5ec10aa8L }, + { 0x4c0053b743e232a3L,0xdc7a3fac37f8a45aL,0x3c4261c520d81c8fL, + 0xfd4b3453b00eab00L } }, + /* 27 << 133 */ + { { 0x76d48f86d36e3062L,0x626c5277a143ff02L,0x538174deaf76f42eL, + 0x2267aa866407ceacL }, + { 0xfad7635172e572d5L,0xab861af7ba7330ebL,0xa0a1c8c7418d8657L, + 0x988821cb20289a52L } }, + /* 28 << 133 */ + { { 0x79732522cccc18adL,0xaadf3f8df1a6e027L,0xf7382c9317c2354dL, + 0x5ce1680cd818b689L }, + { 0x359ebbfcd9ecbee9L,0x4330689c1cae62acL,0xb55ce5b4c51ac38aL, + 0x7921dfeafe238ee8L } }, + /* 29 << 133 */ + { { 0x3972bef8271d1ca5L,0x3e423bc7e8aabd18L,0x57b09f3f44a3e5e3L, + 0x5da886ae7b444d66L }, + { 0x68206634a9964375L,0x356a2fa3699cd0ffL,0xaf0faa24dba515e9L, + 0x536e1f5cb321d79aL } }, + /* 30 << 133 */ + { { 0xd3b9913a5c04e4eaL,0xd549dcfed6f11513L,0xee227bf579fd1d94L, + 0x9f35afeeb43f2c67L }, + { 0xd2638d24f1314f53L,0x62baf948cabcd822L,0x5542de294ef48db0L, + 0xb3eb6a04fc5f6bb2L } }, + /* 31 << 133 */ + { { 0x23c110ae1208e16aL,0x1a4d15b5f8363e24L,0x30716844164be00bL, + 0xa8e24824f6f4690dL }, + { 0x548773a290b170cfL,0xa1bef33142f191f4L,0x70f418d09247aa97L, + 0xea06028e48be9147L } }, + /* 32 << 133 */ + { { 0xe13122f3dbfb894eL,0xbe9b79f6ce274b18L,0x85a49de5ca58aadfL, + 0x2495775811487351L }, + { 0x111def61bb939099L,0x1d6a974a26d13694L,0x4474b4ced3fc253bL, + 0x3a1485e64c5db15eL } }, + /* 33 << 133 */ + { { 0xe79667b4147c15b4L,0xe34f553b7bc61301L,0x032b80f817094381L, + 0x55d8bafd723eaa21L }, + { 0x5a987995f1c0e74eL,0x5a9b292eebba289cL,0x413cd4b2eb4c8251L, + 0x98b5d243d162db0aL } }, + /* 34 << 133 */ + { { 0xbb47bf6668342520L,0x08d68949baa862d1L,0x11f349c7e906abcdL, + 0x454ce985ed7bf00eL }, + { 0xacab5c9eb55b803bL,0xb03468ea31e3c16dL,0x5c24213dd273bf12L, + 0x211538eb71587887L } }, + /* 35 << 133 */ + { { 0x198e4a2f731dea2dL,0xd5856cf274ed7b2aL,0x86a632eb13a664feL, + 0x932cd909bda41291L }, + { 0x850e95d4c0c4ddc0L,0xc0f422f8347fc2c9L,0xe68cbec486076bcbL, + 0xf9e7c0c0cd6cd286L } }, + /* 36 << 133 */ + { { 0x65994ddb0f5f27caL,0xe85461fba80d59ffL,0xff05481a66601023L, + 0xc665427afc9ebbfbL }, + { 0xb0571a697587fd52L,0x935289f88d49efceL,0x61becc60ea420688L, + 0xb22639d913a786afL } }, + /* 37 << 133 */ + { { 0x1a8e6220361ecf90L,0x001f23e025506463L,0xe4ae9b5d0a5c2b79L, + 0xebc9cdadd8149db5L }, + { 0xb33164a1934aa728L,0x750eb00eae9b60f3L,0x5a91615b9b9cfbfdL, + 0x97015cbfef45f7f6L } }, + /* 38 << 133 */ + { { 0xb462c4a5bf5151dfL,0x21adcc41b07118f2L,0xd60c545b043fa42cL, + 0xfc21aa54e96be1abL }, + { 0xe84bc32f4e51ea80L,0x3dae45f0259b5d8dL,0xbb73c7ebc38f1b5eL, + 0xe405a74ae8ae617dL } }, + /* 39 << 133 */ + { { 0xbb1ae9c69f1c56bdL,0x8c176b9849f196a4L,0xc448f3116875092bL, + 0xb5afe3de9f976033L }, + { 0xa8dafd49145813e5L,0x687fc4d9e2b34226L,0xf2dfc92d4c7ff57fL, + 0x004e3fc1401f1b46L } }, + /* 40 << 133 */ + { { 0x5afddab61430c9abL,0x0bdd41d32238e997L,0xf0947430418042aeL, + 0x71f9addacdddc4cbL }, + { 0x7090c016c52dd907L,0xd9bdf44d29e2047fL,0xe6f1fe801b1011a6L, + 0xb63accbcd9acdc78L } }, + /* 41 << 133 */ + { { 0xcfc7e2351272a95bL,0x0c667717a6276ac8L,0x3c0d3709e2d7eef7L, + 0x5add2b069a685b3eL }, + { 0x363ad32d14ea5d65L,0xf8e01f068d7dd506L,0xc9ea221375b4aac6L, + 0xed2a2bf90d353466L } }, + /* 42 << 133 */ + { { 0x439d79b5e9d3a7c3L,0x8e0ee5a681b7f34bL,0xcf3dacf51dc4ba75L, + 0x1d3d1773eb3310c7L }, + { 0xa8e671127747ae83L,0x31f43160197d6b40L,0x0521cceecd961400L, + 0x67246f11f6535768L } }, + /* 43 << 133 */ + { { 0x702fcc5aef0c3133L,0x247cc45d7e16693bL,0xfd484e49c729b749L, + 0x522cef7db218320fL }, + { 0xe56ef40559ab93b3L,0x225fba119f181071L,0x33bd659515330ed0L, + 0xc4be69d51ddb32f7L } }, + /* 44 << 133 */ + { { 0x264c76680448087cL,0xac30903f71432daeL,0x3851b26600f9bf47L, + 0x400ed3116cdd6d03L }, + { 0x045e79fef8fd2424L,0xfdfd974afa6da98bL,0x45c9f6410c1e673aL, + 0x76f2e7335b2c5168L } }, + /* 45 << 133 */ + { { 0x1adaebb52a601753L,0xb286514cc57c2d49L,0xd87696701e0bfd24L, + 0x950c547e04478922L }, + { 0xd1d41969e5d32bfeL,0x30bc1472750d6c3eL,0x8f3679fee0e27f3aL, + 0x8f64a7dca4a6ee0cL } }, + /* 46 << 133 */ + { { 0x2fe59937633dfb1fL,0xea82c395977f2547L,0xcbdfdf1a661ea646L, + 0xc7ccc591b9085451L }, + { 0x8217796281761e13L,0xda57596f9196885cL,0xbc17e84928ffbd70L, + 0x1e6e0a412671d36fL } }, + /* 47 << 133 */ + { { 0x61ae872c4152fcf5L,0x441c87b09e77e754L,0xd0799dd5a34dff09L, + 0x766b4e4488a6b171L }, + { 0xdc06a51211f1c792L,0xea02ae934be35c3eL,0xe5ca4d6de90c469eL, + 0x4df4368e56e4ff5cL } }, + /* 48 << 133 */ + { { 0x7817acab4baef62eL,0x9f5a2202a85b91e8L,0x9666ebe66ce57610L, + 0x32ad31f3f73bfe03L }, + { 0x628330a425bcf4d6L,0xea950593515056e6L,0x59811c89e1332156L, + 0xc89cf1fe8c11b2d7L } }, + /* 49 << 133 */ + { { 0x75b6391304e60cc0L,0xce811e8d4625d375L,0x030e43fc2d26e562L, + 0xfbb30b4b608d36a0L }, + { 0x634ff82c48528118L,0x7c6fe085cd285911L,0x7f2830c099358f28L, + 0x2e60a95e665e6c09L } }, + /* 50 << 133 */ + { { 0x08407d3d9b785dbfL,0x530889aba759bce7L,0xf228e0e652f61239L, + 0x2b6d14616879be3cL }, + { 0xe6902c0451a7bbf7L,0x30ad99f076f24a64L,0x66d9317a98bc6da0L, + 0xf4f877f3cb596ac0L } }, + /* 51 << 133 */ + { { 0xb05ff62d4c44f119L,0x4555f536e9b77416L,0xc7c0d0598caed63bL, + 0x0cd2b7cec358b2a9L }, + { 0x3f33287b46945fa3L,0xf8785b20d67c8791L,0xc54a7a619637bd08L, + 0x54d4598c18be79d7L } }, + /* 52 << 133 */ + { { 0x889e5acbc46d7ce1L,0x9a515bb78b085877L,0xfac1a03d0b7a5050L, + 0x7d3e738af2926035L }, + { 0x861cc2ce2a6cb0ebL,0x6f2e29558f7adc79L,0x61c4d45133016376L, + 0xd9fd2c805ad59090L } }, + /* 53 << 133 */ + { { 0xe5a83738b2b836a1L,0x855b41a07c0d6622L,0x186fe3177cc19af1L, + 0x6465c1fffdd99acbL }, + { 0x46e5c23f6974b99eL,0x75a7cf8ba2717cbeL,0x4d2ebc3f062be658L, + 0x094b44475f209c98L } }, + /* 54 << 133 */ + { { 0x4af285edb940cb5aL,0x6706d7927cc82f10L,0xc8c8776c030526faL, + 0xfa8e6f76a0da9140L }, + { 0x77ea9d34591ee4f0L,0x5f46e33740274166L,0x1bdf98bbea671457L, + 0xd7c08b46862a1fe2L } }, + /* 55 << 133 */ + { { 0x46cc303c1c08ad63L,0x995434404c845e7bL,0x1b8fbdb548f36bf7L, + 0x5b82c3928c8273a7L }, + { 0x08f712c4928435d5L,0x071cf0f179330380L,0xc74c2d24a8da054aL, + 0xcb0e720143c46b5cL } }, + /* 56 << 133 */ + { { 0x0ad7337ac0b7eff3L,0x8552225ec5e48b3cL,0xe6f78b0c73f13a5fL, + 0x5e70062e82349cbeL }, + { 0x6b8d5048e7073969L,0x392d2a29c33cb3d2L,0xee4f727c4ecaa20fL, + 0xa068c99e2ccde707L } }, + /* 57 << 133 */ + { { 0xfcd5651fb87a2913L,0xea3e3c153cc252f0L,0x777d92df3b6cd3e4L, + 0x7a414143c5a732e7L }, + { 0xa895951aa71ff493L,0xfe980c92bbd37cf6L,0x45bd5e64decfeeffL, + 0x910dc2a9a44c43e9L } }, + /* 58 << 133 */ + { { 0xcb403f26cca9f54dL,0x928bbdfb9303f6dbL,0x3c37951ea9eee67cL, + 0x3bd61a52f79961c3L }, + { 0x09a238e6395c9a79L,0x6940ca2d61eb352dL,0x7d1e5c5ec1875631L, + 0x1e19742c1e1b20d1L } }, + /* 59 << 133 */ + { { 0x4633d90823fc2e6eL,0xa76e29a908959149L,0x61069d9c84ed7da5L, + 0x0baa11cf5dbcad51L }, + { 0xd01eec64961849daL,0x93b75f1faf3d8c28L,0x57bc4f9f1ca2ee44L, + 0x5a26322d00e00558L } }, + /* 60 << 133 */ + { { 0x1888d65861a023efL,0x1d72aab4b9e5246eL,0xa9a26348e5563ec0L, + 0xa0971963c3439a43L }, + { 0x567dd54badb9b5b7L,0x73fac1a1c45a524bL,0x8fe97ef7fe38e608L, + 0x608748d23f384f48L } }, + /* 61 << 133 */ + { { 0xb0571794c486094fL,0x869254a38bf3a8d6L,0x148a8dd1310b0e25L, + 0x99ab9f3f9aa3f7d8L }, + { 0x0927c68a6706c02eL,0x22b5e76c69790e6cL,0x6c3252606c71376cL, + 0x53a5769009ef6657L } }, + /* 62 << 133 */ + { { 0x8d63f852edffcf3aL,0xb4d2ed043c0a6f55L,0xdb3aa8de12519b9eL, + 0x5d38e9c41e0a569aL }, + { 0x871528bf303747e2L,0xa208e77cf5b5c18dL,0x9d129c88ca6bf923L, + 0xbcbf197fbf02839fL } }, + /* 63 << 133 */ + { { 0x9b9bf03027323194L,0x3b055a8b339ca59dL,0xb46b23120f669520L, + 0x19789f1f497e5f24L }, + { 0x9c499468aaf01801L,0x72ee11908b69d59cL,0x8bd39595acf4c079L, + 0x3ee11ece8e0cd048L } }, + /* 64 << 133 */ + { { 0xebde86ec1ed66f18L,0x225d906bd61fce43L,0x5cab07d6e8bed74dL, + 0x16e4617f27855ab7L }, + { 0x6568aaddb2fbc3ddL,0xedb5484f8aeddf5bL,0x878f20e86dcf2fadL, + 0x3516497c615f5699L } }, + /* 0 << 140 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 140 */ + { { 0xef0a3fecfa181e69L,0x9ea02f8130d69a98L,0xb2e9cf8e66eab95dL, + 0x520f2beb24720021L }, + { 0x621c540a1df84361L,0x1203772171fa6d5dL,0x6e3c7b510ff5f6ffL, + 0x817a069babb2bef3L } }, + /* 2 << 140 */ + { { 0x83572fb6b294cda6L,0x6ce9bf75b9039f34L,0x20e012f0095cbb21L, + 0xa0aecc1bd063f0daL }, + { 0x57c21c3af02909e5L,0xc7d59ecf48ce9cdcL,0x2732b8448ae336f8L, + 0x056e37233f4f85f4L } }, + /* 3 << 140 */ + { { 0x8a10b53189e800caL,0x50fe0c17145208fdL,0x9e43c0d3b714ba37L, + 0x427d200e34189accL }, + { 0x05dee24fe616e2c0L,0x9c25f4c8ee1854c1L,0x4d3222a58f342a73L, + 0x0807804fa027c952L } }, + /* 4 << 140 */ + { { 0xc222653a4f0d56f3L,0x961e4047ca28b805L,0x2c03f8b04a73434bL, + 0x4c966787ab712a19L }, + { 0xcc196c42864fee42L,0xc1be93da5b0ece5cL,0xa87d9f22c131c159L, + 0x2bb6d593dce45655L } }, + /* 5 << 140 */ + { { 0x22c49ec9b809b7ceL,0x8a41486be2c72c2cL,0x813b9420fea0bf36L, + 0xb3d36ee9a66dac69L }, + { 0x6fddc08a328cc987L,0x0a3bcd2c3a326461L,0x7103c49dd810dbbaL, + 0xf9d81a284b78a4c4L } }, + /* 6 << 140 */ + { { 0x3de865ade4d55941L,0xdedafa5e30384087L,0x6f414abb4ef18b9bL, + 0x9ee9ea42faee5268L }, + { 0x260faa1637a55a4aL,0xeb19a514015f93b9L,0x51d7ebd29e9c3598L, + 0x523fc56d1932178eL } }, + /* 7 << 140 */ + { { 0x501d070cb98fe684L,0xd60fbe9a124a1458L,0xa45761c892bc6b3fL, + 0xf5384858fe6f27cbL }, + { 0x4b0271f7b59e763bL,0x3d4606a95b5a8e5eL,0x1eda5d9b05a48292L, + 0xda7731d0e6fec446L } }, + /* 8 << 140 */ + { { 0xa3e3369390d45871L,0xe976404006166d8dL,0xb5c3368289a90403L, + 0x4bd1798372f1d637L }, + { 0xa616679ed5d2c53aL,0x5ec4bcd8fdcf3b87L,0xae6d7613b66a694eL, + 0x7460fc76e3fc27e5L } }, + /* 9 << 140 */ + { { 0x70469b8295caabeeL,0xde024ca5889501e3L,0x6bdadc06076ed265L, + 0x0cb1236b5a0ef8b2L }, + { 0x4065ddbf0972ebf9L,0xf1dd387522aca432L,0xa88b97cf744aff76L, + 0xd1359afdfe8e3d24L } }, + /* 10 << 140 */ + { { 0x52a3ba2b91502cf3L,0x2c3832a8084db75dL,0x04a12dddde30b1c9L, + 0x7802eabce31fd60cL }, + { 0x33707327a37fddabL,0x65d6f2abfaafa973L,0x3525c5b811e6f91aL, + 0x76aeb0c95f46530bL } }, + /* 11 << 140 */ + { { 0xe8815ff62f93a675L,0xa6ec968405f48679L,0x6dcbb556358ae884L, + 0x0af61472e19e3873L }, + { 0x72334372a5f696beL,0xc65e57ea6f22fb70L,0x268da30c946cea90L, + 0x136a8a8765681b2aL } }, + /* 12 << 140 */ + { { 0xad5e81dc0f9f44d4L,0xf09a69602c46585aL,0xd1649164c447d1b1L, + 0x3b4b36c8879dc8b1L }, + { 0x20d4177b3b6b234cL,0x096a25051730d9d0L,0x0611b9b8ef80531dL, + 0xba904b3b64bb495dL } }, + /* 13 << 140 */ + { { 0x1192d9d493a3147aL,0x9f30a5dc9a565545L,0x90b1f9cb6ef07212L, + 0x299585460d87fc13L }, + { 0xd3323effc17db9baL,0xcb18548ccb1644a8L,0x18a306d44f49ffbcL, + 0x28d658f14c2e8684L } }, + /* 14 << 140 */ + { { 0x44ba60cda99f8c71L,0x67b7abdb4bf742ffL,0x66310f9c914b3f99L, + 0xae430a32f412c161L }, + { 0x1e6776d388ace52fL,0x4bc0fa2452d7067dL,0x03c286aa8f07cd1bL, + 0x4cb8f38ca985b2c1L } }, + /* 15 << 140 */ + { { 0x83ccbe808c3bff36L,0x005a0bd25263e575L,0x460d7dda259bdcd1L, + 0x4a1c5642fa5cab6bL }, + { 0x2b7bdbb99fe4fc88L,0x09418e28cc97bbb5L,0xd8274fb4a12321aeL, + 0xb137007d5c87b64eL } }, + /* 16 << 140 */ + { { 0x80531fe1c63c4962L,0x50541e89981fdb25L,0xdc1291a1fd4c2b6bL, + 0xc0693a17a6df4fcaL }, + { 0xb2c4604e0117f203L,0x245f19630a99b8d0L,0xaedc20aac6212c44L, + 0xb1ed4e56520f52a8L } }, + /* 17 << 140 */ + { { 0xfe48f575f8547be3L,0x0a7033cda9e45f98L,0x4b45d3a918c50100L, + 0xb2a6cd6aa61d41daL }, + { 0x60bbb4f557933c6bL,0xa7538ebd2b0d7ffcL,0x9ea3ab8d8cd626b6L, + 0x8273a4843601625aL } }, + /* 18 << 140 */ + { { 0x888598450168e508L,0x8cbc9bb299a94abdL,0x713ac792fab0a671L, + 0xa3995b196c9ebffcL }, + { 0xe711668e1239e152L,0x56892558bbb8dff4L,0x8bfc7dabdbf17963L, + 0x5b59fe5ab3de1253L } }, + /* 19 << 140 */ + { { 0x7e3320eb34a9f7aeL,0xe5e8cf72d751efe4L,0x7ea003bcd9be2f37L, + 0xc0f551a0b6c08ef7L }, + { 0x56606268038f6725L,0x1dd38e356d92d3b6L,0x07dfce7cc3cbd686L, + 0x4e549e04651c5da8L } }, + /* 20 << 140 */ + { { 0x4058f93b08b19340L,0xc2fae6f4cac6d89dL,0x4bad8a8c8f159cc7L, + 0x0ddba4b3cb0b601cL }, + { 0xda4fc7b51dd95f8cL,0x1d163cd7cea5c255L,0x30707d06274a8c4cL, + 0x79d9e0082802e9ceL } }, + /* 21 << 140 */ + { { 0x02a29ebfe6ddd505L,0x37064e74b50bed1aL,0x3f6bae65a7327d57L, + 0x3846f5f1f83920bcL }, + { 0x87c3749160df1b9bL,0x4cfb28952d1da29fL,0x10a478ca4ed1743cL, + 0x390c60303edd47c6L } }, + /* 22 << 140 */ + { { 0x8f3e53128c0a78deL,0xccd02bda1e85df70L,0xd6c75c03a61b6582L, + 0x0762921cfc0eebd1L }, + { 0xd34d0823d85010c0L,0xd73aaacb0044cf1fL,0xfb4159bba3b5e78aL, + 0x2287c7f7e5826f3fL } }, + /* 23 << 140 */ + { { 0x4aeaf742580b1a01L,0xf080415d60423b79L,0xe12622cda7dea144L, + 0x49ea499659d62472L }, + { 0xb42991ef571f3913L,0x0610f214f5b25a8aL,0x47adc58530b79e8fL, + 0xf90e3df607a065a2L } }, + /* 24 << 140 */ + { { 0x5d0a5deb43e2e034L,0x53fb5a34444024aaL,0xa8628c686b0c9f7fL, + 0x9c69c29cac563656L }, + { 0x5a231febbace47b6L,0xbdce02899ea5a2ecL,0x05da1fac9463853eL, + 0x96812c52509e78aaL } }, + /* 25 << 140 */ + { { 0xd3fb577157151692L,0xeb2721f8d98e1c44L,0xc050608732399be1L, + 0xda5a5511d979d8b8L }, + { 0x737ed55dc6f56780L,0xe20d30040dc7a7f4L,0x02ce7301f5941a03L, + 0x91ef5215ed30f83aL } }, + /* 26 << 140 */ + { { 0x28727fc14092d85fL,0x72d223c65c49e41aL,0xa7cf30a2ba6a4d81L, + 0x7c086209b030d87dL }, + { 0x04844c7dfc588b09L,0x728cd4995874bbb0L,0xcc1281eee84c0495L, + 0x0769b5baec31958fL } }, + /* 27 << 140 */ + { { 0x665c228bf99c2471L,0xf2d8a11b191eb110L,0x4594f494d36d7024L, + 0x482ded8bcdcb25a1L }, + { 0xc958a9d8dadd4885L,0x7004477ef1d2b547L,0x0a45f6ef2a0af550L, + 0x4fc739d62f8d6351L } }, + /* 28 << 140 */ + { { 0x75cdaf27786f08a9L,0x8700bb2642c2737fL,0x855a71411c4e2670L, + 0x810188c115076fefL }, + { 0xc251d0c9abcd3297L,0xae4c8967f48108ebL,0xbd146de718ceed30L, + 0xf9d4f07ac986bcedL } }, + /* 29 << 140 */ + { { 0x5ad98ed583fa1e08L,0x7780d33ebeabd1fbL,0xe330513c903b1196L, + 0xba11de9ea47bc8c4L }, + { 0x684334da02c2d064L,0x7ecf360da48de23bL,0x57a1b4740a9089d8L, + 0xf28fa439ff36734cL } }, + /* 30 << 140 */ + { { 0xf2a482cbea4570b3L,0xee65d68ba5ebcee9L,0x988d0036b9694cd5L, + 0x53edd0e937885d32L }, + { 0xe37e3307beb9bc6dL,0xe9abb9079f5c6768L,0x4396ccd551f2160fL, + 0x2500888c47336da6L } }, + /* 31 << 140 */ + { { 0x383f9ed9926fce43L,0x809dd1c704da2930L,0x30f6f5968a4cb227L, + 0x0d700c7f73a56b38L }, + { 0x1825ea33ab64a065L,0xaab9b7351338df80L,0x1516100d9b63f57fL, + 0x2574395a27a6a634L } }, + /* 32 << 140 */ + { { 0xb5560fb6700a1acdL,0xe823fd73fd999681L,0xda915d1f6cb4e1baL, + 0x0d0301186ebe00a3L }, + { 0x744fb0c989fca8cdL,0x970d01dbf9da0e0bL,0x0ad8c5647931d76fL, + 0xb15737bff659b96aL } }, + /* 33 << 140 */ + { { 0xdc9933e8a8b484e7L,0xb2fdbdf97a26dec7L,0x2349e9a49f1f0136L, + 0x7860368e70fddddbL }, + { 0xd93d2c1cf9ad3e18L,0x6d6c5f17689f4e79L,0x7a544d91b24ff1b6L, + 0x3e12a5ebfe16cd8cL } }, + /* 34 << 140 */ + { { 0x543574e9a56b872fL,0xa1ad550cfcf68ea2L,0x689e37d23f560ef7L, + 0x8c54b9cac9d47a8bL }, + { 0x46d40a4a088ac342L,0xec450c7c1576c6d0L,0xb589e31c1f9689e9L, + 0xdacf2602b8781718L } }, + /* 35 << 140 */ + { { 0xa89237c6c8cb6b42L,0x1326fc93b96ef381L,0x55d56c6db5f07825L, + 0xacba2eea7449e22dL }, + { 0x74e0887a633c3000L,0xcb6cd172d7cbcf71L,0x309e81dec36cf1beL, + 0x07a18a6d60ae399bL } }, + /* 36 << 140 */ + { { 0xb36c26799edce57eL,0x52b892f4df001d41L,0xd884ae5d16a1f2c6L, + 0x9b329424efcc370aL }, + { 0x3120daf2bd2e21dfL,0x55298d2d02470a99L,0x0b78af6ca05db32eL, + 0x5c76a331601f5636L } }, + /* 37 << 140 */ + { { 0xaae861fff8a4f29cL,0x70dc9240d68f8d49L,0x960e649f81b1321cL, + 0x3d2c801b8792e4ceL }, + { 0xf479f77242521876L,0x0bed93bc416c79b1L,0xa67fbc05263e5bc9L, + 0x01e8e630521db049L } }, + /* 38 << 140 */ + { { 0x76f26738c6f3431eL,0xe609cb02e3267541L,0xb10cff2d818c877cL, + 0x1f0e75ce786a13cbL }, + { 0xf4fdca641158544dL,0x5d777e896cb71ed0L,0x3c233737a9aa4755L, + 0x7b453192e527ab40L } }, + /* 39 << 140 */ + { { 0xdb59f68839f05ffeL,0x8f4f4be06d82574eL,0xcce3450cee292d1bL, + 0xaa448a1261ccd086L }, + { 0xabce91b3f7914967L,0x4537f09b1908a5edL,0xa812421ef51042e7L, + 0xfaf5cebcec0b3a34L } }, + /* 40 << 140 */ + { { 0x730ffd874ca6b39aL,0x70fb72ed02efd342L,0xeb4735f9d75c8edbL, + 0xc11f2157c278aa51L }, + { 0xc459f635bf3bfebfL,0x3a1ff0b46bd9601fL,0xc9d12823c420cb73L, + 0x3e9af3e23c2915a3L } }, + /* 41 << 140 */ + { { 0xe0c82c72b41c3440L,0x175239e5e3039a5fL,0xe1084b8a558795a3L, + 0x328d0a1dd01e5c60L }, + { 0x0a495f2ed3788a04L,0x25d8ff1666c11a9fL,0xf5155f059ed692d6L, + 0x954fa1074f425fe4L } }, + /* 42 << 140 */ + { { 0xd16aabf2e98aaa99L,0x90cd8ba096b0f88aL,0x957f4782c154026aL, + 0x54ee073452af56d2L }, + { 0xbcf89e5445b4147aL,0x3d102f219a52816cL,0x6808517e39b62e77L, + 0x92e2542169169ad8L } }, + /* 43 << 140 */ + { { 0xd721d871bb608558L,0x60e4ebaef6d4ff9bL,0x0ba1081941f2763eL, + 0xca2e45be51ee3247L }, + { 0x66d172ec2bfd7a5fL,0x528a8f2f74d0b12dL,0xe17f1e38dabe70dcL, + 0x1d5d73169f93983cL } }, + /* 44 << 140 */ + { { 0x51b2184adf423e31L,0xcb417291aedb1a10L,0x2054ca93625bcab9L, + 0x54396860a98998f0L }, + { 0x4e53f6c4a54ae57eL,0x0ffeb590ee648e9dL,0xfbbdaadc6afaf6bcL, + 0xf88ae796aa3bfb8aL } }, + /* 45 << 140 */ + { { 0x209f1d44d2359ed9L,0xac68dd03f3544ce2L,0xf378da47fd51e569L, + 0xe1abd8602cc80097L }, + { 0x23ca18d9343b6e3aL,0x480797e8b40a1baeL,0xd1f0c717533f3e67L, + 0x4489697006e6cdfcL } }, + /* 46 << 140 */ + { { 0x8ca2105552a82e8dL,0xb2caf78578460cdcL,0x4c1b7b62e9037178L, + 0xefc09d2cdb514b58L }, + { 0x5f2df9ee9113be5cL,0x2fbda78fb3f9271cL,0xe09a81af8f83fc54L, + 0x06b138668afb5141L } }, + /* 47 << 140 */ + { { 0x38f6480f43e3865dL,0x72dd77a81ddf47d9L,0xf2a8e9714c205ff7L, + 0x46d449d89d088ad8L }, + { 0x926619ea185d706fL,0xe47e02ebc7dd7f62L,0xe7f120a78cbc2031L, + 0xc18bef00998d4ac9L } }, + /* 48 << 140 */ + { { 0x18f37a9c6bdf22daL,0xefbc432f90dc82dfL,0xc52cef8e5d703651L, + 0x82887ba0d99881a5L }, + { 0x7cec9ddab920ec1dL,0xd0d7e8c3ec3e8d3bL,0x445bc3954ca88747L, + 0xedeaa2e09fd53535L } }, + /* 49 << 140 */ + { { 0x461b1d936cc87475L,0xd92a52e26d2383bdL,0xfabccb59d7903546L, + 0x6111a7613d14b112L }, + { 0x0ae584feb3d5f612L,0x5ea69b8d60e828ecL,0x6c07898554087030L, + 0x649cab04ac4821feL } }, + /* 50 << 140 */ + { { 0x25ecedcf8bdce214L,0xb5622f7286af7361L,0x0e1227aa7038b9e2L, + 0xd0efb273ac20fa77L }, + { 0x817ff88b79df975bL,0x856bf2861999503eL,0xb4d5351f5038ec46L, + 0x740a52c5fc42af6eL } }, + /* 51 << 140 */ + { { 0x2e38bb152cbb1a3fL,0xc3eb99fe17a83429L,0xca4fcbf1dd66bb74L, + 0x880784d6cde5e8fcL }, + { 0xddc84c1cb4e7a0beL,0x8780510dbd15a72fL,0x44bcf1af81ec30e1L, + 0x141e50a80a61073eL } }, + /* 52 << 140 */ + { { 0x0d95571847be87aeL,0x68a61417f76a4372L,0xf57e7e87c607c3d3L, + 0x043afaf85252f332L }, + { 0xcc14e1211552a4d2L,0xb6dee692bb4d4ab4L,0xb6ab74c8a03816a4L, + 0x84001ae46f394a29L } }, + /* 53 << 140 */ + { { 0x5bed8344d795fb45L,0x57326e7db79f55a5L,0xc9533ce04accdffcL, + 0x53473caf3993fa04L }, + { 0x7906eb93a13df4c8L,0xa73e51f697cbe46fL,0xd1ab3ae10ae4ccf8L, + 0x256145088a5b3dbcL } }, + /* 54 << 140 */ + { { 0x61eff96211a71b27L,0xdf71412b6bb7fa39L,0xb31ba6b82bd7f3efL, + 0xb0b9c41569180d29L }, + { 0xeec14552014cdde5L,0x702c624b227b4bbbL,0x2b15e8c2d3e988f3L, + 0xee3bcc6da4f7fd04L } }, + /* 55 << 140 */ + { { 0x9d00822a42ac6c85L,0x2db0cea61df9f2b7L,0xd7cad2ab42de1e58L, + 0x346ed5262d6fbb61L }, + { 0xb39629951a2faf09L,0x2fa8a5807c25612eL,0x30ae04da7cf56490L, + 0x756629080eea3961L } }, + /* 56 << 140 */ + { { 0x3609f5c53d080847L,0xcb081d395241d4f6L,0xb4fb381077961a63L, + 0xc20c59842abb66fcL }, + { 0x3d40aa7cf902f245L,0x9cb127364e536b1eL,0x5eda24da99b3134fL, + 0xafbd9c695cd011afL } }, + /* 57 << 140 */ + { { 0x9a16e30ac7088c7dL,0x5ab657103207389fL,0x1b09547fe7407a53L, + 0x2322f9d74fdc6eabL }, + { 0xc0f2f22d7430de4dL,0x19382696e68ca9a9L,0x17f1eff1918e5868L, + 0xe3b5b635586f4204L } }, + /* 58 << 140 */ + { { 0x146ef9803fbc4341L,0x359f2c805b5eed4eL,0x9f35744e7482e41dL, + 0x9a9ac3ecf3b224c2L }, + { 0x9161a6fe91fc50aeL,0x89ccc66bc613fa7cL,0x89268b14c732f15aL, + 0x7cd6f4e2b467ed03L } }, + /* 59 << 140 */ + { { 0xfbf79869ce56b40eL,0xf93e094cc02dde98L,0xefe0c3a8edee2cd7L, + 0x90f3ffc0b268fd42L }, + { 0x81a7fd5608241aedL,0x95ab7ad800b1afe8L,0x401270563e310d52L, + 0xd3ffdeb109d9fc43L } }, + /* 60 << 140 */ + { { 0xc8f85c91d11a8594L,0x2e74d25831cf6db8L,0x829c7ca302b5dfd0L, + 0xe389cfbe69143c86L }, + { 0xd01b6405941768d8L,0x4510399503bf825dL,0xcc4ee16656cd17e2L, + 0xbea3c283ba037e79L } }, + /* 61 << 140 */ + { { 0x4e1ac06ed9a47520L,0xfbfe18aaaf852404L,0x5615f8e28087648aL, + 0x7301e47eb9d150d9L }, + { 0x79f9f9ddb299b977L,0x76697a7ba5b78314L,0x10d674687d7c90e7L, + 0x7afffe03937210b5L } }, + /* 62 << 140 */ + { { 0x5aef3e4b28c22ceeL,0xefb0ecd809fd55aeL,0x4cea71320d2a5d6aL, + 0x9cfb5fa101db6357L }, + { 0x395e0b57f36e1ac5L,0x008fa9ad36cafb7dL,0x8f6cdf705308c4dbL, + 0x51527a3795ed2477L } }, + /* 63 << 140 */ + { { 0xba0dee305bd21311L,0x6ed41b22909c90d7L,0xc5f6b7587c8696d3L, + 0x0db8eaa83ce83a80L }, + { 0xd297fe37b24b4b6fL,0xfe58afe8522d1f0dL,0x973587368c98dbd9L, + 0x6bc226ca9454a527L } }, + /* 64 << 140 */ + { { 0xa12b384ece53c2d0L,0x779d897d5e4606daL,0xa53e47b073ec12b0L, + 0x462dbbba5756f1adL }, + { 0x69fe09f2cafe37b6L,0x273d1ebfecce2e17L,0x8ac1d5383cf607fdL, + 0x8035f7ff12e10c25L } }, + /* 0 << 147 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 147 */ + { { 0x854d34c77e6c5520L,0xc27df9efdcb9ea58L,0x405f2369d686666dL, + 0x29d1febf0417aa85L }, + { 0x9846819e93470afeL,0x3e6a9669e2a27f9eL,0x24d008a2e31e6504L, + 0xdba7cecf9cb7680aL } }, + /* 2 << 147 */ + { { 0xecaff541338d6e43L,0x56f7dd734541d5ccL,0xb5d426de96bc88caL, + 0x48d94f6b9ed3a2c3L }, + { 0x6354a3bb2ef8279cL,0xd575465b0b1867f2L,0xef99b0ff95225151L, + 0xf3e19d88f94500d8L } }, + /* 3 << 147 */ + { { 0x92a83268e32dd620L,0x913ec99f627849a2L,0xedd8fdfa2c378882L, + 0xaf96f33eee6f8cfeL }, + { 0xc06737e5dc3fa8a5L,0x236bb531b0b03a1dL,0x33e59f2989f037b0L, + 0x13f9b5a7d9a12a53L } }, + /* 4 << 147 */ + { { 0x0d0df6ce51efb310L,0xcb5b2eb4958df5beL,0xd6459e2936158e59L, + 0x82aae2b91466e336L }, + { 0xfb658a39411aa636L,0x7152ecc5d4c0a933L,0xf10c758a49f026b7L, + 0xf4837f97cb09311fL } }, + /* 5 << 147 */ + { { 0xddfb02c4c753c45fL,0x18ca81b6f9c840feL,0x846fd09ab0f8a3e6L, + 0xb1162adde7733dbcL }, + { 0x7070ad20236e3ab6L,0xf88cdaf5b2a56326L,0x05fc8719997cbc7aL, + 0x442cd4524b665272L } }, + /* 6 << 147 */ + { { 0x7807f364b71698f5L,0x6ba418d29f7b605eL,0xfd20b00fa03b2cbbL, + 0x883eca37da54386fL }, + { 0xff0be43ff3437f24L,0xe910b432a48bb33cL,0x4963a128329df765L, + 0xac1dd556be2fe6f7L } }, + /* 7 << 147 */ + { { 0x557610f924a0a3fcL,0x38e17bf4e881c3f9L,0x6ba84fafed0dac99L, + 0xd4a222c359eeb918L }, + { 0xc79c1dbe13f542b6L,0x1fc65e0de425d457L,0xeffb754f1debb779L, + 0x638d8fd09e08af60L } }, + /* 8 << 147 */ + { { 0x994f523a626332d5L,0x7bc388335561bb44L,0x005ed4b03d845ea2L, + 0xd39d3ee1c2a1f08aL }, + { 0x6561fdd3e7676b0dL,0x620e35fffb706017L,0x36ce424ff264f9a8L, + 0xc4c3419fda2681f7L } }, + /* 9 << 147 */ + { { 0xfb6afd2f69beb6e8L,0x3a50b9936d700d03L,0xc840b2ad0c83a14fL, + 0x573207be54085befL }, + { 0x5af882e309fe7e5bL,0x957678a43b40a7e1L,0x172d4bdd543056e2L, + 0x9c1b26b40df13c0aL } }, + /* 10 << 147 */ + { { 0x1c30861cf405ff06L,0xebac86bd486e828bL,0xe791a971636933fcL, + 0x50e7c2be7aeee947L }, + { 0xc3d4a095fa90d767L,0xae60eb7be670ab7bL,0x17633a64397b056dL, + 0x93a21f33105012aaL } }, + /* 11 << 147 */ + { { 0x663c370babb88643L,0x91df36d722e21599L,0x183ba8358b761671L, + 0x381eea1d728f3bf1L }, + { 0xb9b2f1ba39966e6cL,0x7c464a28e7295492L,0x0fd5f70a09b26b7fL, + 0xa9aba1f9fbe009dfL } }, + /* 12 << 147 */ + { { 0x857c1f22369b87adL,0x3c00e5d932fca556L,0x1ad74cab90b06466L, + 0xa7112386550faaf2L }, + { 0x7435e1986d9bd5f5L,0x2dcc7e3859c3463fL,0xdc7df748ca7bd4b2L, + 0x13cd4c089dec2f31L } }, + /* 13 << 147 */ + { { 0x0d3b5df8e3237710L,0x0dadb26ecbd2f7b0L,0x9f5966abe4aa082bL, + 0x666ec8de350e966eL }, + { 0x1bfd1ed5ee524216L,0xcd93c59b41dab0b6L,0x658a8435d186d6baL, + 0x1b7d34d2159d1195L } }, + /* 14 << 147 */ + { { 0x5936e46022caf46bL,0x6a45dd8f9a96fe4fL,0xf7925434b98f474eL, + 0x414104120053ef15L }, + { 0x71cf8d1241de97bfL,0xb8547b61bd80bef4L,0xb47d3970c4db0037L, + 0xf1bcd328fef20dffL } }, + /* 15 << 147 */ + { { 0x31a92e0910caad67L,0x1f5919605531a1e1L,0x3bb852e05f4fc840L, + 0x63e297ca93a72c6cL }, + { 0x3c2b0b2e49abad67L,0x6ec405fced3db0d9L,0xdc14a5307fef1d40L, + 0xccd19846280896fcL } }, + /* 16 << 147 */ + { { 0x00f831769bb81648L,0xd69eb485653120d0L,0xd17d75f44ccabc62L, + 0x34a07f82b749fcb1L }, + { 0x2c3af787bbfb5554L,0xb06ed4d062e283f8L,0x5722889fa19213a0L, + 0x162b085edcf3c7b4L } }, + /* 17 << 147 */ + { { 0xbcaecb31e0dd3ecaL,0xc6237fbce52f13a5L,0xcc2b6b0327bac297L, + 0x2ae1cac5b917f54aL }, + { 0x474807d47845ae4fL,0xfec7dd92ce5972e0L,0xc3bd25411d7915bbL, + 0x66f85dc4d94907caL } }, + /* 18 << 147 */ + { { 0xd981b888bdbcf0caL,0xd75f5da6df279e9fL,0x128bbf247054e934L, + 0x3c6ff6e581db134bL }, + { 0x795b7cf4047d26e4L,0xf370f7b85049ec37L,0xc6712d4dced945afL, + 0xdf30b5ec095642bcL } }, + /* 19 << 147 */ + { { 0x9b034c624896246eL,0x5652c016ee90bbd1L,0xeb38636f87fedb73L, + 0x5e32f8470135a613L }, + { 0x0703b312cf933c83L,0xd05bb76e1a7f47e6L,0x825e4f0c949c2415L, + 0x569e56227250d6f8L } }, + /* 20 << 147 */ + { { 0xbbe9eb3a6568013eL,0x8dbd203f22f243fcL,0x9dbd7694b342734aL, + 0x8f6d12f846afa984L }, + { 0xb98610a2c9eade29L,0xbab4f32347dd0f18L,0x5779737b671c0d46L, + 0x10b6a7c6d3e0a42aL } }, + /* 21 << 147 */ + { { 0xfb19ddf33035b41cL,0xd336343f99c45895L,0x61fe493854c857e5L, + 0xc4d506beae4e57d5L }, + { 0x3cd8c8cbbbc33f75L,0x7281f08a9262c77dL,0x083f4ea6f11a2823L, + 0x8895041e9fba2e33L } }, + /* 22 << 147 */ + { { 0xfcdfea499c438edfL,0x7678dcc391edba44L,0xf07b3b87e2ba50f0L, + 0xc13888ef43948c1bL }, + { 0xc2135ad41140af42L,0x8e5104f3926ed1a7L,0xf24430cb88f6695fL, + 0x0ce0637b6d73c120L } }, + /* 23 << 147 */ + { { 0xb2db01e6fe631e8fL,0x1c5563d7d7bdd24bL,0x8daea3ba369ad44fL, + 0x000c81b68187a9f9L }, + { 0x5f48a951aae1fd9aL,0xe35626c78d5aed8aL,0x209527630498c622L, + 0x76d17634773aa504L } }, + /* 24 << 147 */ + { { 0x36d90ddaeb300f7aL,0x9dcf7dfcedb5e801L,0x645cb26874d5244cL, + 0xa127ee79348e3aa2L }, + { 0x488acc53575f1dbbL,0x95037e8580e6161eL,0x57e59283292650d0L, + 0xabe67d9914938216L } }, + /* 25 << 147 */ + { { 0x3c7f944b3f8e1065L,0xed908cb6330e8924L,0x08ee8fd56f530136L, + 0x2227b7d5d7ffc169L }, + { 0x4f55c893b5cd6dd5L,0x82225e11a62796e8L,0x5c6cead1cb18e12cL, + 0x4381ae0c84f5a51aL } }, + /* 26 << 147 */ + { { 0x345913d37fafa4c8L,0x3d9180820491aac0L,0x9347871f3e69264cL, + 0xbea9dd3cb4f4f0cdL }, + { 0xbda5d0673eadd3e7L,0x0033c1b80573bcd8L,0x255893795da2486cL, + 0xcb89ee5b86abbee7L } }, + /* 27 << 147 */ + { { 0x8fe0a8f322532e5dL,0xb6410ff0727dfc4cL,0x619b9d58226726dbL, + 0x5ec256697a2b2dc7L }, + { 0xaf4d2e064c3beb01L,0x852123d07acea556L,0x0e9470faf783487aL, + 0x75a7ea045664b3ebL } }, + /* 28 << 147 */ + { { 0x4ad78f356798e4baL,0x9214e6e5c7d0e091L,0xc420b488b1290403L, + 0x64049e0afc295749L }, + { 0x03ef5af13ae9841fL,0xdbe4ca19b0b662a6L,0x46845c5ffa453458L, + 0xf8dabf1910b66722L } }, + /* 29 << 147 */ + { { 0xb650f0aacce2793bL,0x71db851ec5ec47c1L,0x3eb78f3e3b234fa9L, + 0xb0c60f35fc0106ceL }, + { 0x05427121774eadbdL,0x25367fafce323863L,0x7541b5c9cd086976L, + 0x4ff069e2dc507ad1L } }, + /* 30 << 147 */ + { { 0x741452568776e667L,0x6e76142cb23c6bb5L,0xdbf307121b3a8a87L, + 0x60e7363e98450836L }, + { 0x5741450eb7366d80L,0xe4ee14ca4837dbdfL,0xa765eb9b69d4316fL, + 0x04548dca8ef43825L } }, + /* 31 << 147 */ + { { 0x9c9f4e4c5ae888ebL,0x733abb5156e9ac99L,0xdaad3c20ba6ac029L, + 0x9b8dd3d32ba3e38eL }, + { 0xa9bb4c920bc5d11aL,0xf20127a79c5f88a3L,0x4f52b06e161d3cb8L, + 0x26c1ff096afaf0a6L } }, + /* 32 << 147 */ + { { 0x32670d2f7189e71fL,0xc64387485ecf91e7L,0x15758e57db757a21L, + 0x427d09f8290a9ce5L }, + { 0x846a308f38384a7aL,0xaac3acb4b0732b99L,0x9e94100917845819L, + 0x95cba111a7ce5e03L } }, + /* 33 << 147 */ + { { 0x6f3d4f7fb00009c4L,0xb8396c278ff28b5fL,0xb1a9ae431c97975dL, + 0x9d7ba8afe5d9fed5L }, + { 0x338cf09f34f485b6L,0xbc0ddacc64122516L,0xa450da1205d471feL, + 0x4c3a6250628dd8c9L } }, + /* 34 << 147 */ + { { 0x69c7d103d1295837L,0xa2893e503807eb2fL,0xd6e1e1debdb41491L, + 0xc630745b5e138235L }, + { 0xc892109e48661ae1L,0x8d17e7ebea2b2674L,0x00ec0f87c328d6b5L, + 0x6d858645f079ff9eL } }, + /* 35 << 147 */ + { { 0x6cdf243e19115eadL,0x1ce1393e4bac4fcfL,0x2c960ed09c29f25bL, + 0x59be4d8e9d388a05L }, + { 0x0d46e06cd0def72bL,0xb923db5de0342748L,0xf7d3aacd936d4a3dL, + 0x558519cc0b0b099eL } }, + /* 36 << 147 */ + { { 0x3ea8ebf8827097efL,0x259353dbd054f55dL,0x84c89abc6d2ed089L, + 0x5c548b698e096a7cL }, + { 0xd587f616994b995dL,0x4d1531f6a5845601L,0x792ab31e451fd9f0L, + 0xc8b57bb265adf6caL } }, + /* 37 << 147 */ + { { 0x68440fcb1cd5ad73L,0xb9c860e66144da4fL,0x2ab286aa8462beb8L, + 0xcc6b8fffef46797fL }, + { 0xac820da420c8a471L,0x69ae05a177ff7fafL,0xb9163f39bfb5da77L, + 0xbd03e5902c73ab7aL } }, + /* 38 << 147 */ + { { 0x7e862b5eb2940d9eL,0x3c663d864b9af564L,0xd8309031bde3033dL, + 0x298231b2d42c5bc6L }, + { 0x42090d2c552ad093L,0xa4799d1cff854695L,0x0a88b5d6d31f0d00L, + 0xf8b40825a2f26b46L } }, + /* 39 << 147 */ + { { 0xec29b1edf1bd7218L,0xd491c53b4b24c86eL,0xd2fe588f3395ea65L, + 0x6f3764f74456ef15L }, + { 0xdb43116dcdc34800L,0xcdbcd456c1e33955L,0xefdb554074ab286bL, + 0x948c7a51d18c5d7cL } }, + /* 40 << 147 */ + { { 0xeb81aa377378058eL,0x41c746a104411154L,0xa10c73bcfb828ac7L, + 0x6439be919d972b29L }, + { 0x4bf3b4b043a2fbadL,0x39e6dadf82b5e840L,0x4f7164086397bd4cL, + 0x0f7de5687f1eeccbL } }, + /* 41 << 147 */ + { { 0x5865c5a1d2ffbfc1L,0xf74211fa4ccb6451L,0x66368a88c0b32558L, + 0x5b539dc29ad7812eL }, + { 0x579483d02f3af6f6L,0x5213207899934eceL,0x50b9650fdcc9e983L, + 0xca989ec9aee42b8aL } }, + /* 42 << 147 */ + { { 0x6a44c829d6f62f99L,0x8f06a3094c2a7c0cL,0x4ea2b3a098a0cb0aL, + 0x5c547b70beee8364L }, + { 0x461d40e1682afe11L,0x9e0fc77a7b41c0a8L,0x79e4aefde20d5d36L, + 0x2916e52032dd9f63L } }, + /* 43 << 147 */ + { { 0xf59e52e83f883fafL,0x396f96392b868d35L,0xc902a9df4ca19881L, + 0x0fc96822db2401a6L }, + { 0x4123758766f1c68dL,0x10fc6de3fb476c0dL,0xf8b6b579841f5d90L, + 0x2ba8446cfa24f44aL } }, + /* 44 << 147 */ + { { 0xa237b920ef4a9975L,0x60bb60042330435fL,0xd6f4ab5acfb7e7b5L, + 0xb2ac509783435391L }, + { 0xf036ee2fb0d1ea67L,0xae779a6a74c56230L,0x59bff8c8ab838ae6L, + 0xcd83ca999b38e6f0L } }, + /* 45 << 147 */ + { { 0xbb27bef5e33deed3L,0xe6356f6f001892a8L,0xbf3be6cc7adfbd3eL, + 0xaecbc81c33d1ac9dL }, + { 0xe4feb909e6e861dcL,0x90a247a453f5f801L,0x01c50acb27346e57L, + 0xce29242e461acc1bL } }, + /* 46 << 147 */ + { { 0x04dd214a2f998a91L,0x271ee9b1d4baf27bL,0x7e3027d1e8c26722L, + 0x21d1645c1820dce5L }, + { 0x086f242c7501779cL,0xf0061407fa0e8009L,0xf23ce47760187129L, + 0x05bbdedb0fde9bd0L } }, + /* 47 << 147 */ + { { 0x682f483225d98473L,0xf207fe855c658427L,0xb6fdd7ba4166ffa1L, + 0x0c3140569eed799dL }, + { 0x0db8048f4107e28fL,0x74ed387141216840L,0x74489f8f56a3c06eL, + 0x1e1c005b12777134L } }, + /* 48 << 147 */ + { { 0xdb332a73f37ec3c3L,0xc65259bddd59eba0L,0x2291709cdb4d3257L, + 0x9a793b25bd389390L }, + { 0xf39fe34be43756f0L,0x2f76bdce9afb56c9L,0x9f37867a61208b27L, + 0xea1d4307089972c3L } }, + /* 49 << 147 */ + { { 0x8c5953308bdf623aL,0x5f5accda8441fb7dL,0xfafa941832ddfd95L, + 0x6ad40c5a0fde9be7L }, + { 0x43faba89aeca8709L,0xc64a7cf12c248a9dL,0x1662025272637a76L, + 0xaee1c79122b8d1bbL } }, + /* 50 << 147 */ + { { 0xf0f798fd21a843b2L,0x56e4ed4d8d005cb1L,0x355f77801f0d8abeL, + 0x197b04cf34522326L }, + { 0x41f9b31ffd42c13fL,0x5ef7feb2b40f933dL,0x27326f425d60bad4L, + 0x027ecdb28c92cf89L } }, + /* 51 << 147 */ + { { 0x04aae4d14e3352feL,0x08414d2f73591b90L,0x5ed6124eb7da7d60L, + 0xb985b9314d13d4ecL }, + { 0xa592d3ab96bf36f9L,0x012dbed5bbdf51dfL,0xa57963c0df6c177dL, + 0x010ec86987ca29cfL } }, + /* 52 << 147 */ + { { 0xba1700f6bf926dffL,0x7c9fdbd1f4bf6bc2L,0xdc18dc8f64da11f5L, + 0xa6074b7ad938ae75L }, + { 0x14270066e84f44a4L,0x99998d38d27b954eL,0xc1be8ab2b4f38e9aL, + 0x8bb55bbf15c01016L } }, + /* 53 << 147 */ + { { 0xf73472b40ea2ab30L,0xd365a340f73d68ddL,0xc01a716819c2e1ebL, + 0x32f49e3734061719L }, + { 0xb73c57f101d8b4d6L,0x03c8423c26b47700L,0x321d0bc8a4d8826aL, + 0x6004213c4bc0e638L } }, + /* 54 << 147 */ + { { 0xf78c64a1c1c06681L,0x16e0a16fef018e50L,0x31cbdf91db42b2b3L, + 0xf8f4ffcee0d36f58L }, + { 0xcdcc71cd4cc5e3e0L,0xd55c7cfaa129e3e0L,0xccdb6ba00fb2cbf1L, + 0x6aba0005c4bce3cbL } }, + /* 55 << 147 */ + { { 0x501cdb30d232cfc4L,0x9ddcf12ed58a3cefL,0x02d2cf9c87e09149L, + 0xdc5d7ec72c976257L }, + { 0x6447986e0b50d7ddL,0x88fdbaf7807f112aL,0x58c9822ab00ae9f6L, + 0x6abfb9506d3d27e0L } }, + /* 56 << 147 */ + { { 0xd0a744878a429f4fL,0x0649712bdb516609L,0xb826ba57e769b5dfL, + 0x82335df21fc7aaf2L }, + { 0x2389f0675c93d995L,0x59ac367a68677be6L,0xa77985ff21d9951bL, + 0x038956fb85011cceL } }, + /* 57 << 147 */ + { { 0x608e48cbbb734e37L,0xc08c0bf22be5b26fL,0x17bbdd3bf9b1a0d9L, + 0xeac7d89810483319L }, + { 0xc95c4bafbc1a6deaL,0xfdd0e2bf172aafdbL,0x40373cbc8235c41aL, + 0x14303f21fb6f41d5L } }, + /* 58 << 147 */ + { { 0xba0636210408f237L,0xcad3b09aecd2d1edL,0x4667855a52abb6a2L, + 0xba9157dcaa8b417bL }, + { 0xfe7f35074f013efbL,0x1b112c4baa38c4a2L,0xa1406a609ba64345L, + 0xe53cba336993c80bL } }, + /* 59 << 147 */ + { { 0x45466063ded40d23L,0x3d5f1f4d54908e25L,0x9ebefe62403c3c31L, + 0x274ea0b50672a624L }, + { 0xff818d99451d1b71L,0x80e826438f79cf79L,0xa165df1373ce37f5L, + 0xa744ef4ffe3a21fdL } }, + /* 60 << 147 */ + { { 0x73f1e7f5cf551396L,0xc616898e868c676bL,0x671c28c78c442c36L, + 0xcfe5e5585e0a317dL }, + { 0x1242d8187051f476L,0x56fad2a614f03442L,0x262068bc0a44d0f6L, + 0xdfa2cd6ece6edf4eL } }, + /* 61 << 147 */ + { { 0x0f43813ad15d1517L,0x61214cb2377d44f5L,0xd399aa29c639b35fL, + 0x42136d7154c51c19L }, + { 0x9774711b08417221L,0x0a5546b352545a57L,0x80624c411150582dL, + 0x9ec5c418fbc555bcL } }, + /* 62 << 147 */ + { { 0x2c87dcad771849f1L,0xb0c932c501d7bf6fL,0x6aa5cd3e89116eb2L, + 0xd378c25a51ca7bd3L }, + { 0xc612a0da9e6e3e31L,0x0417a54db68ad5d0L,0x00451e4a22c6edb8L, + 0x9fbfe019b42827ceL } }, + /* 63 << 147 */ + { { 0x2fa92505ba9384a2L,0x21b8596e64ad69c1L,0x8f4fcc49983b35a6L, + 0xde09376072754672L }, + { 0x2f14ccc8f7bffe6dL,0x27566bff5d94263dL,0xb5b4e9c62df3ec30L, + 0x94f1d7d53e6ea6baL } }, + /* 64 << 147 */ + { { 0x97b7851aaaca5e9bL,0x518aa52156713b97L,0x3357e8c7150a61f6L, + 0x7842e7e2ec2c2b69L }, + { 0x8dffaf656868a548L,0xd963bd82e068fc81L,0x64da5c8b65917733L, + 0x927090ff7b247328L } }, + /* 0 << 154 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 154 */ + { { 0x214bc9a7d298c241L,0xe3b697ba56807cfdL,0xef1c78024564eadbL, + 0xdde8cdcfb48149c5L }, + { 0x946bf0a75a4d2604L,0x27154d7f6c1538afL,0x95cc9230de5b1fccL, + 0xd88519e966864f82L } }, + /* 2 << 154 */ + { { 0xb828dd1a7cb1282cL,0xa08d7626be46973aL,0x6baf8d40e708d6b2L, + 0x72571fa14daeb3f3L }, + { 0x85b1732ff22dfd98L,0x87ab01a70087108dL,0xaaaafea85988207aL, + 0xccc832f869f00755L } }, + /* 3 << 154 */ + { { 0x964d950e36ff3bf0L,0x8ad20f6ff0b34638L,0x4d9177b3b5d7585fL, + 0xcf839760ef3f019fL }, + { 0x582fc5b38288c545L,0x2f8e4e9b13116bd1L,0xf91e1b2f332120efL, + 0xcf5687242a17dd23L } }, + /* 4 << 154 */ + { { 0x488f1185ca8d9d1aL,0xadf2c77dd987ded2L,0x5f3039f060c46124L, + 0xe5d70b7571e095f4L }, + { 0x82d586506260e70fL,0x39d75ea7f750d105L,0x8cf3d0b175bac364L, + 0xf3a7564d21d01329L } }, + /* 5 << 154 */ + { { 0x182f04cd2f52d2a7L,0x4fde149ae2df565aL,0xb80c5eeca79fb2f7L, + 0xab491d7b22ddc897L }, + { 0x99d76c18c6312c7fL,0xca0d5f3d6aa41a57L,0x71207325d15363a0L, + 0xe82aa265beb252c2L } }, + /* 6 << 154 */ + { { 0x94ab4700ec3128c2L,0x6c76d8628e383f49L,0xdc36b150c03024ebL, + 0xfb43947753daac69L }, + { 0xfc68764a8dc79623L,0x5b86995db440fbb2L,0xd66879bfccc5ee0dL, + 0x0522894295aa8bd3L } }, + /* 7 << 154 */ + { { 0xb51a40a51e6a75c1L,0x24327c760ea7d817L,0x0663018207774597L, + 0xd6fdbec397fa7164L }, + { 0x20c99dfb13c90f48L,0xd6ac5273686ef263L,0xc6a50bdcfef64eebL, + 0xcd87b28186fdfc32L } }, + /* 8 << 154 */ + { { 0xb24aa43e3fcd3efcL,0xdd26c034b8088e9aL,0xa5ef4dc9bd3d46eaL, + 0xa2f99d588a4c6a6fL }, + { 0xddabd3552f1da46cL,0x72c3f8ce1afacdd1L,0xd90c4eee92d40578L, + 0xd28bb41fca623b94L } }, + /* 9 << 154 */ + { { 0x50fc0711745edc11L,0x9dd9ad7d3dc87558L,0xce6931fbb49d1e64L, + 0x6c77a0a2c98bd0f9L }, + { 0x62b9a6296baf7cb1L,0xcf065f91ccf72d22L,0x7203cce979639071L, + 0x09ae4885f9cb732fL } }, + /* 10 << 154 */ + { { 0x5e7c3becee8314f3L,0x1c068aeddbea298fL,0x08d381f17c80acecL, + 0x03b56be8e330495bL }, + { 0xaeffb8f29222882dL,0x95ff38f6c4af8bf7L,0x50e32d351fc57d8cL, + 0x6635be5217b444f0L } }, + /* 11 << 154 */ + { { 0x04d15276a5177900L,0x4e1dbb47f6858752L,0x5b475622c615796cL, + 0xa6fa0387691867bfL }, + { 0xed7f5d562844c6d0L,0xc633cf9b03a2477dL,0xf6be5c402d3721d6L, + 0xaf312eb7e9fd68e6L } }, + /* 12 << 154 */ + { { 0x242792d2e7417ce1L,0xff42bc71970ee7f5L,0x1ff4dc6d5c67a41eL, + 0x77709b7b20882a58L }, + { 0x3554731dbe217f2cL,0x2af2a8cd5bb72177L,0x58eee769591dd059L, + 0xbb2930c94bba6477L } }, + /* 13 << 154 */ + { { 0x863ee0477d930cfcL,0x4c262ad1396fd1f4L,0xf4765bc8039af7e1L, + 0x2519834b5ba104f6L }, + { 0x7cd61b4cd105f961L,0xa5415da5d63bca54L,0x778280a088a1f17cL, + 0xc49689492329512cL } }, + /* 14 << 154 */ + { { 0x174a9126cecdaa7aL,0xfc8c7e0e0b13247bL,0x29c110d23484c1c4L, + 0xf8eb8757831dfc3bL }, + { 0x022f0212c0067452L,0x3f6f69ee7b9b926cL,0x09032da0ef42daf4L, + 0x79f00ade83f80de4L } }, + /* 15 << 154 */ + { { 0x6210db7181236c97L,0x74f7685b3ee0781fL,0x4df7da7ba3e41372L, + 0x2aae38b1b1a1553eL }, + { 0x1688e222f6dd9d1bL,0x576954485b8b6487L,0x478d21274b2edeaaL, + 0xb2818fa51e85956aL } }, + /* 16 << 154 */ + { { 0x1e6adddaf176f2c0L,0x01ca4604e2572658L,0x0a404ded85342ffbL, + 0x8cf60f96441838d6L }, + { 0x9bbc691cc9071c4aL,0xfd58874434442803L,0x97101c85809c0d81L, + 0xa7fb754c8c456f7fL } }, + /* 17 << 154 */ + { { 0xc95f3c5cd51805e1L,0xab4ccd39b299dca8L,0x3e03d20b47eaf500L, + 0xfa3165c1d7b80893L }, + { 0x005e8b54e160e552L,0xdc4972ba9019d11fL,0x21a6972e0c9a4a7aL, + 0xa52c258f37840fd7L } }, + /* 18 << 154 */ + { { 0xf8559ff4c1e99d81L,0x08e1a7d6a3c617c0L,0xb398fd43248c6ba7L, + 0x6ffedd91d1283794L }, + { 0x8a6a59d2d629d208L,0xa9d141d53490530eL,0x42f6fc1838505989L, + 0x09bf250d479d94eeL } }, + /* 19 << 154 */ + { { 0x223ad3b1b3822790L,0x6c5926c093b8971cL,0x609efc7e75f7fa62L, + 0x45d66a6d1ec2d989L }, + { 0x4422d663987d2792L,0x4a73caad3eb31d2bL,0xf06c2ac1a32cb9e6L, + 0xd9445c5f91aeba84L } }, + /* 20 << 154 */ + { { 0x6af7a1d5af71013fL,0xe68216e50bedc946L,0xf4cba30bd27370a0L, + 0x7981afbf870421ccL }, + { 0x02496a679449f0e1L,0x86cfc4be0a47edaeL,0x3073c936b1feca22L, + 0xf569461203f8f8fbL } }, + /* 21 << 154 */ + { { 0xd063b723901515eaL,0x4c6c77a5749cf038L,0x6361e360ab9e5059L, + 0x596cf171a76a37c0L }, + { 0x800f53fa6530ae7aL,0x0f5e631e0792a7a6L,0x5cc29c24efdb81c9L, + 0xa269e8683f9c40baL } }, + /* 22 << 154 */ + { { 0xec14f9e12cb7191eL,0x78ea1bd8e5b08ea6L,0x3c65aa9b46332bb9L, + 0x84cc22b3bf80ce25L }, + { 0x0098e9e9d49d5bf1L,0xcd4ec1c619087da4L,0x3c9d07c5aef6e357L, + 0x839a02689f8f64b8L } }, + /* 23 << 154 */ + { { 0xc5e9eb62c6d8607fL,0x759689f56aa995e4L,0x70464669bbb48317L, + 0x921474bfe402417dL }, + { 0xcabe135b2a354c8cL,0xd51e52d2812fa4b5L,0xec74109653311fe8L, + 0x4f774535b864514bL } }, + /* 24 << 154 */ + { { 0xbcadd6715bde48f8L,0xc97038732189bc7dL,0x5d45299ec709ee8aL, + 0xd1287ee2845aaff8L }, + { 0x7d1f8874db1dbf1fL,0xea46588b990c88d6L,0x60ba649a84368313L, + 0xd5fdcbce60d543aeL } }, + /* 25 << 154 */ + { { 0x90b46d43810d5ab0L,0x6739d8f904d7e5ccL,0x021c1a580d337c33L, + 0x00a6116268e67c40L }, + { 0x95ef413b379f0a1fL,0xfe126605e9e2ab95L,0x67578b852f5f199cL, + 0xf5c003292cb84913L } }, + /* 26 << 154 */ + { { 0xf795643037577dd8L,0x83b82af429c5fe88L,0x9c1bea26cdbdc132L, + 0x589fa0869c04339eL }, + { 0x033e9538b13799dfL,0x85fa8b21d295d034L,0xdf17f73fbd9ddccaL, + 0xf32bd122ddb66334L } }, + /* 27 << 154 */ + { { 0x55ef88a7858b044cL,0x1f0d69c25aa9e397L,0x55fd9cc340d85559L, + 0xc774df727785ddb2L }, + { 0x5dcce9f6d3bd2e1cL,0xeb30da20a85dfed0L,0x5ed7f5bbd3ed09c4L, + 0x7d42a35c82a9c1bdL } }, + /* 28 << 154 */ + { { 0xcf3de9959890272dL,0x75f3432a3e713a10L,0x5e13479fe28227b8L, + 0xb8561ea9fefacdc8L }, + { 0xa6a297a08332aafdL,0x9b0d8bb573809b62L,0xd2fa1cfd0c63036fL, + 0x7a16eb55bd64bda8L } }, + /* 29 << 154 */ + { { 0x3f5cf5f678e62ddcL,0x2267c45407fd752bL,0x5e361b6b5e437bbeL, + 0x95c595018354e075L }, + { 0xec725f85f2b254d9L,0x844b617d2cb52b4eL,0xed8554f5cf425fb5L, + 0xab67703e2af9f312L } }, + /* 30 << 154 */ + { { 0x4cc34ec13cf48283L,0xb09daa259c8a705eL,0xd1e9d0d05b7d4f84L, + 0x4df6ef64db38929dL }, + { 0xe16b0763aa21ba46L,0xc6b1d178a293f8fbL,0x0ff5b602d520aabfL, + 0x94d671bdc339397aL } }, + /* 31 << 154 */ + { { 0x7c7d98cf4f5792faL,0x7c5e0d6711215261L,0x9b19a631a7c5a6d4L, + 0xc8511a627a45274dL }, + { 0x0c16621ca5a60d99L,0xf7fbab88cf5e48cbL,0xab1e6ca2f7ddee08L, + 0x83bd08cee7867f3cL } }, + /* 32 << 154 */ + { { 0xf7e48e8a2ac13e27L,0x4494f6df4eb1a9f5L,0xedbf84eb981f0a62L, + 0x49badc32536438f0L }, + { 0x50bea541004f7571L,0xbac67d10df1c94eeL,0x253d73a1b727bc31L, + 0xb3d01cf230686e28L } }, + /* 33 << 154 */ + { { 0x51b77b1b55fd0b8bL,0xa099d183feec3173L,0x202b1fb7670e72b7L, + 0xadc88b33a8e1635fL }, + { 0x34e8216af989d905L,0xc2e68d2029b58d01L,0x11f81c926fe55a93L, + 0x15f1462a8f296f40L } }, + /* 34 << 154 */ + { { 0x1915d375ea3d62f2L,0xa17765a301c8977dL,0x7559710ae47b26f6L, + 0xe0bd29c8535077a5L }, + { 0x615f976d08d84858L,0x370dfe8569ced5c1L,0xbbc7503ca734fa56L, + 0xfbb9f1ec91ac4574L } }, + /* 35 << 154 */ + { { 0x95d7ec53060dd7efL,0xeef2dacd6e657979L,0x54511af3e2a08235L, + 0x1e324aa41f4aea3dL }, + { 0x550e7e71e6e67671L,0xbccd5190bf52faf7L,0xf880d316223cc62aL, + 0x0d402c7e2b32eb5dL } }, + /* 36 << 154 */ + { { 0xa40bc039306a5a3bL,0x4e0a41fd96783a1bL,0xa1e8d39a0253cdd4L, + 0x6480be26c7388638L }, + { 0xee365e1d2285f382L,0x188d8d8fec0b5c36L,0x34ef1a481f0f4d82L, + 0x1a8f43e1a487d29aL } }, + /* 37 << 154 */ + { { 0x8168226d77aefb3aL,0xf69a751e1e72c253L,0x8e04359ae9594df1L, + 0x475ffd7dd14c0467L }, + { 0xb5a2c2b13844e95cL,0x85caf647dd12ef94L,0x1ecd2a9ff1063d00L, + 0x1dd2e22923843311L } }, + /* 38 << 154 */ + { { 0x38f0e09d73d17244L,0x3ede77468fc653f1L,0xae4459f5dc20e21cL, + 0x00db2ffa6a8599eaL }, + { 0x11682c3930cfd905L,0x4934d074a5c112a6L,0xbdf063c5568bfe95L, + 0x779a440a016c441aL } }, + /* 39 << 154 */ + { { 0x0c23f21897d6fbdcL,0xd3a5cd87e0776aacL,0xcee37f72d712e8dbL, + 0xfb28c70d26f74e8dL }, + { 0xffe0c728b61301a0L,0xa6282168d3724354L,0x7ff4cb00768ffedcL, + 0xc51b308803b02de9L } }, + /* 40 << 154 */ + { { 0xa5a8147c3902dda5L,0x35d2f706fe6973b4L,0x5ac2efcfc257457eL, + 0x933f48d48700611bL }, + { 0xc365af884912beb2L,0x7f5a4de6162edf94L,0xc646ba7c0c32f34bL, + 0x632c6af3b2091074L } }, + /* 41 << 154 */ + { { 0x58d4f2e3753e43a9L,0x70e1d21724d4e23fL,0xb24bf729afede6a6L, + 0x7f4a94d8710c8b60L }, + { 0xaad90a968d4faa6aL,0xd9ed0b32b066b690L,0x52fcd37b78b6dbfdL, + 0x0b64615e8bd2b431L } }, + /* 42 << 154 */ + { { 0x228e2048cfb9fad5L,0xbeaa386d240b76bdL,0x2d6681c890dad7bcL, + 0x3e553fc306d38f5eL }, + { 0xf27cdb9b9d5f9750L,0x3e85c52ad28c5b0eL,0x190795af5247c39bL, + 0x547831ebbddd6828L } }, + /* 43 << 154 */ + { { 0xf327a2274a82f424L,0x36919c787e47f89dL,0xe478391943c7392cL, + 0xf101b9aa2316fefeL }, + { 0xbcdc9e9c1c5009d2L,0xfb55ea139cd18345L,0xf5b5e231a3ce77c7L, + 0xde6b4527d2f2cb3dL } }, + /* 44 << 154 */ + { { 0x10f6a3339bb26f5fL,0x1e85db8e044d85b6L,0xc3697a0894197e54L, + 0x65e18cc0a7cb4ea8L }, + { 0xa38c4f50a471fe6eL,0xf031747a2f13439cL,0x53c4a6bac007318bL, + 0xa8da3ee51deccb3dL } }, + /* 45 << 154 */ + { { 0x0555b31c558216b1L,0x90c7810c2f79e6c2L,0x9b669f4dfe8eed3cL, + 0x70398ec8e0fac126L }, + { 0xa96a449ef701b235L,0x0ceecdb3eb94f395L,0x285fc368d0cb7431L, + 0x0d37bb5216a18c64L } }, + /* 46 << 154 */ + { { 0x05110d38b880d2ddL,0xa60f177b65930d57L,0x7da34a67f36235f5L, + 0x47f5e17c183816b9L }, + { 0xc7664b57db394af4L,0x39ba215d7036f789L,0x46d2ca0e2f27b472L, + 0xc42647eef73a84b7L } }, + /* 47 << 154 */ + { { 0x44bc754564488f1dL,0xaa922708f4cf85d5L,0x721a01d553e4df63L, + 0x649c0c515db46cedL }, + { 0x6bf0d64e3cffcb6cL,0xe3bf93fe50f71d96L,0x75044558bcc194a0L, + 0x16ae33726afdc554L } }, + /* 48 << 154 */ + { { 0xbfc01adf5ca48f3fL,0x64352f06e22a9b84L,0xcee54da1c1099e4aL, + 0xbbda54e8fa1b89c0L }, + { 0x166a3df56f6e55fbL,0x1ca44a2420176f88L,0x936afd88dfb7b5ffL, + 0xe34c24378611d4a0L } }, + /* 49 << 154 */ + { { 0x7effbb7586142103L,0x6704ba1b1f34fc4dL,0x7c2a468f10c1b122L, + 0x36b3a6108c6aace9L }, + { 0xabfcc0a775a0d050L,0x066f91973ce33e32L,0xce905ef429fe09beL, + 0x89ee25baa8376351L } }, + /* 50 << 154 */ + { { 0x2a3ede22fd29dc76L,0x7fd32ed936f17260L,0x0cadcf68284b4126L, + 0x63422f08a7951fc8L }, + { 0x562b24f40807e199L,0xfe9ce5d122ad4490L,0xc2f51b100db2b1b4L, + 0xeb3613ffe4541d0dL } }, + /* 51 << 154 */ + { { 0xbd2c4a052680813bL,0x527aa55d561b08d6L,0xa9f8a40ea7205558L, + 0xe3eea56f243d0becL }, + { 0x7b853817a0ff58b3L,0xb67d3f651a69e627L,0x0b76bbb9a869b5d6L, + 0xa3afeb82546723edL } }, + /* 52 << 154 */ + { { 0x5f24416d3e554892L,0x8413b53d430e2a45L,0x99c56aee9032a2a0L, + 0x09432bf6eec367b1L }, + { 0x552850c6daf0ecc1L,0x49ebce555bc92048L,0xdfb66ba654811307L, + 0x1b84f7976f298597L } }, + /* 53 << 154 */ + { { 0x795904818d1d7a0dL,0xd9fabe033a6fa556L,0xa40f9c59ba9e5d35L, + 0xcb1771c1f6247577L }, + { 0x542a47cae9a6312bL,0xa34b3560552dd8c5L,0xfdf94de00d794716L, + 0xd46124a99c623094L } }, + /* 54 << 154 */ + { { 0x56b7435d68afe8b4L,0x27f205406c0d8ea1L,0x12b77e1473186898L, + 0xdbc3dd467479490fL }, + { 0x951a9842c03b0c05L,0x8b1b3bb37921bc96L,0xa573b3462b202e0aL, + 0x77e4665d47254d56L } }, + /* 55 << 154 */ + { { 0x08b70dfcd23e3984L,0xab86e8bcebd14236L,0xaa3e07f857114ba7L, + 0x5ac71689ab0ef4f2L }, + { 0x88fca3840139d9afL,0x72733f8876644af0L,0xf122f72a65d74f4aL, + 0x13931577a5626c7aL } }, + /* 56 << 154 */ + { { 0xd5b5d9eb70f8d5a4L,0x375adde7d7bbb228L,0x31e88b860c1c0b32L, + 0xd1f568c4173edbaaL }, + { 0x1592fc835459df02L,0x2beac0fb0fcd9a7eL,0xb0a6fdb81b473b0aL, + 0xe3224c6f0fe8fc48L } }, + /* 57 << 154 */ + { { 0x680bd00ee87edf5bL,0x30385f0220e77cf5L,0xe9ab98c04d42d1b2L, + 0x72d191d2d3816d77L }, + { 0x1564daca0917d9e5L,0x394eab591f8fed7fL,0xa209aa8d7fbb3896L, + 0x5564f3b9be6ac98eL } }, + /* 58 << 154 */ + { { 0xead21d05d73654efL,0x68d1a9c413d78d74L,0x61e017086d4973a0L, + 0x83da350046e6d32aL }, + { 0x6a3dfca468ae0118L,0xa1b9a4c9d02da069L,0x0b2ff9c7ebab8302L, + 0x98af07c3944ba436L } }, + /* 59 << 154 */ + { { 0x85997326995f0f9fL,0x467fade071b58bc6L,0x47e4495abd625a2bL, + 0xfdd2d01d33c3b8cdL }, + { 0x2c38ae28c693f9faL,0x48622329348f7999L,0x97bf738e2161f583L, + 0x15ee2fa7565e8cc9L } }, + /* 60 << 154 */ + { { 0xa1a5c8455777e189L,0xcc10bee0456f2829L,0x8ad95c56da762bd5L, + 0x152e2214e9d91da8L }, + { 0x975b0e727cb23c74L,0xfd5d7670a90c66dfL,0xb5b5b8ad225ffc53L, + 0xab6dff73faded2aeL } }, + /* 61 << 154 */ + { { 0xebd567816f4cbe9dL,0x0ed8b2496a574bd7L,0x41c246fe81a881faL, + 0x91564805c3db9c70L }, + { 0xd7c12b085b862809L,0x1facd1f155858d7bL,0x7693747caf09e92aL, + 0x3b69dcba189a425fL } }, + /* 62 << 154 */ + { { 0x0be28e9f967365efL,0x57300eb2e801f5c9L,0x93b8ac6ad583352fL, + 0xa2cf1f89cd05b2b7L }, + { 0x7c0c9b744dcc40ccL,0xfee38c45ada523fbL,0xb49a4dec1099cc4dL, + 0x325c377f69f069c6L } }, + /* 63 << 154 */ + { { 0xe12458ce476cc9ffL,0x580e0b6cc6d4cb63L,0xd561c8b79072289bL, + 0x0377f264a619e6daL }, + { 0x2668536288e591a5L,0xa453a7bd7523ca2bL,0x8a9536d2c1df4533L, + 0xc8e50f2fbe972f79L } }, + /* 64 << 154 */ + { { 0xd433e50f6d3549cfL,0x6f33696ffacd665eL,0x695bfdacce11fcb4L, + 0x810ee252af7c9860L }, + { 0x65450fe17159bb2cL,0xf7dfbebe758b357bL,0x2b057e74d69fea72L, + 0xd485717a92731745L } }, + /* 0 << 161 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 161 */ + { { 0x896c42e8ee36860cL,0xdaf04dfd4113c22dL,0x1adbb7b744104213L, + 0xe5fd5fa11fd394eaL }, + { 0x68235d941a4e0551L,0x6772cfbe18d10151L,0x276071e309984523L, + 0xe4e879de5a56ba98L } }, + /* 2 << 161 */ + { { 0xaaafafb0285b9491L,0x01a0be881e4c705eL,0xff1d4f5d2ad9caabL, + 0x6e349a4ac37a233fL }, + { 0xcf1c12464a1c6a16L,0xd99e6b6629383260L,0xea3d43665f6d5471L, + 0x36974d04ff8cc89bL } }, + /* 3 << 161 */ + { { 0xc26c49a1cfe89d80L,0xb42c026dda9c8371L,0xca6c013adad066d2L, + 0xfb8f722856a4f3eeL }, + { 0x08b579ecd850935bL,0x34c1a74cd631e1b3L,0xcb5fe596ac198534L, + 0x39ff21f6e1f24f25L } }, + /* 4 << 161 */ + { { 0x27f29e148f929057L,0x7a64ae06c0c853dfL,0x256cd18358e9c5ceL, + 0x9d9cce82ded092a5L }, + { 0xcc6e59796e93b7c7L,0xe1e4709231bb9e27L,0xb70b3083aa9e29a0L, + 0xbf181a753785e644L } }, + /* 5 << 161 */ + { { 0xf53f2c658ead09f7L,0x1335e1d59780d14dL,0x69cc20e0cd1b66bcL, + 0x9b670a37bbe0bfc8L }, + { 0xce53dc8128efbeedL,0x0c74e77c8326a6e5L,0x3604e0d2b88e9a63L, + 0xbab38fca13dc2248L } }, + /* 6 << 161 */ + { { 0x8ed6e8c85c0a3f1eL,0xbcad24927c87c37fL,0xfdfb62bb9ee3b78dL, + 0xeba8e477cbceba46L }, + { 0x37d38cb0eeaede4bL,0x0bc498e87976deb6L,0xb2944c046b6147fbL, + 0x8b123f35f71f9609L } }, + /* 7 << 161 */ + { { 0xa155dcc7de79dc24L,0xf1168a32558f69cdL,0xbac215950d1850dfL, + 0x15c8295bb204c848L }, + { 0xf661aa367d8184ffL,0xc396228e30447bdbL,0x11cd5143bde4a59eL, + 0xe3a26e3b6beab5e6L } }, + /* 8 << 161 */ + { { 0xd3b3a13f1402b9d0L,0x573441c32c7bc863L,0x4b301ec4578c3e6eL, + 0xc26fc9c40adaf57eL }, + { 0x96e71bfd7493cea3L,0xd05d4b3f1af81456L,0xdaca2a8a6a8c608fL, + 0x53ef07f60725b276L } }, + /* 9 << 161 */ + { { 0x07a5fbd27824fc56L,0x3467521813289077L,0x5bf69fd5e0c48349L, + 0xa613ddd3b6aa7875L }, + { 0x7f78c19c5450d866L,0x46f4409c8f84a481L,0x9f1d192890fce239L, + 0x016c4168b2ce44b9L } }, + /* 10 << 161 */ + { { 0xbae023f0c7435978L,0xb152c88820e30e19L,0x9c241645e3fa6fafL, + 0x735d95c184823e60L }, + { 0x0319757303955317L,0x0b4b02a9f03b4995L,0x076bf55970274600L, + 0x32c5cc53aaf57508L } }, + /* 11 << 161 */ + { { 0xe8af6d1f60624129L,0xb7bc5d649a5e2b5eL,0x3814b0485f082d72L, + 0x76f267f2ce19677aL }, + { 0x626c630fb36eed93L,0x55230cd73bf56803L,0x78837949ce2736a0L, + 0x0d792d60aa6c55f1L } }, + /* 12 << 161 */ + { { 0x0318dbfdd5c7c5d2L,0xb38f8da7072b342dL,0x3569bddc7b8de38aL, + 0xf25b5887a1c94842L }, + { 0xb2d5b2842946ad60L,0x854f29ade9d1707eL,0xaa5159dc2c6a4509L, + 0x899f94c057189837L } }, + /* 13 << 161 */ + { { 0xcf6adc51f4a55b03L,0x261762de35e3b2d5L,0x4cc4301204827b51L, + 0xcd22a113c6021442L }, + { 0xce2fd61a247c9569L,0x59a50973d152becaL,0x6c835a1163a716d4L, + 0xc26455ed187dedcfL } }, + /* 14 << 161 */ + { { 0x27f536e049ce89e7L,0x18908539cc890cb5L,0x308909abd83c2aa1L, + 0xecd3142b1ab73bd3L }, + { 0x6a85bf59b3f5ab84L,0x3c320a68f2bea4c6L,0xad8dc5386da4541fL, + 0xeaf34eb0b7c41186L } }, + /* 15 << 161 */ + { { 0x1c780129977c97c4L,0x5ff9beebc57eb9faL,0xa24d0524c822c478L, + 0xfd8eec2a461cd415L }, + { 0xfbde194ef027458cL,0xb4ff53191d1be115L,0x63f874d94866d6f4L, + 0x35c75015b21ad0c9L } }, + /* 16 << 161 */ + { { 0xa6b5c9d646ac49d2L,0x42c77c0b83137aa9L,0x24d000fc68225a38L, + 0x0f63cfc82fe1e907L }, + { 0x22d1b01bc6441f95L,0x7d38f719ec8e448fL,0x9b33fa5f787fb1baL, + 0x94dcfda1190158dfL } }, + /* 17 << 161 */ + { { 0xc47cb3395f6d4a09L,0x6b4f355cee52b826L,0x3d100f5df51b930aL, + 0xf4512fac9f668f69L }, + { 0x546781d5206c4c74L,0xd021d4d4cb4d2e48L,0x494a54c2ca085c2dL, + 0xf1dbaca4520850a8L } }, + /* 18 << 161 */ + { { 0x63c79326490a1acaL,0xcb64dd9c41526b02L,0xbb772591a2979258L, + 0x3f58297048d97846L }, + { 0xd66b70d17c213ba7L,0xc28febb5e8a0ced4L,0x6b911831c10338c1L, + 0x0d54e389bf0126f3L } }, + /* 19 << 161 */ + { { 0x7048d4604af206eeL,0x786c88f677e97cb9L,0xd4375ae1ac64802eL, + 0x469bcfe1d53ec11cL }, + { 0xfc9b340d47062230L,0xe743bb57c5b4a3acL,0xfe00b4aa59ef45acL, + 0x29a4ef2359edf188L } }, + /* 20 << 161 */ + { { 0x40242efeb483689bL,0x2575d3f6513ac262L,0xf30037c80ca6db72L, + 0xc9fcce8298864be2L }, + { 0x84a112ff0149362dL,0x95e575821c4ae971L,0x1fa4b1a8945cf86cL, + 0x4525a7340b024a2fL } }, + /* 21 << 161 */ + { { 0xe76c8b628f338360L,0x483ff59328edf32bL,0x67e8e90a298b1aecL, + 0x9caab338736d9a21L }, + { 0x5c09d2fd66892709L,0x2496b4dcb55a1d41L,0x93f5fb1ae24a4394L, + 0x08c750496fa8f6c1L } }, + /* 22 << 161 */ + { { 0xcaead1c2c905d85fL,0xe9d7f7900733ae57L,0x24c9a65cf07cdd94L, + 0x7389359ca4b55931L }, + { 0xf58709b7367e45f7L,0x1f203067cb7e7adcL,0x82444bffc7b72818L, + 0x07303b35baac8033L } }, + /* 23 << 161 */ + { { 0x1e1ee4e4d13b7ea1L,0xe6489b24e0e74180L,0xa5f2c6107e70ef70L, + 0xa1655412bdd10894L }, + { 0x555ebefb7af4194eL,0x533c1c3c8e89bd9cL,0x735b9b5789895856L, + 0x15fb3cd2567f5c15L } }, + /* 24 << 161 */ + { { 0x057fed45526f09fdL,0xe8a4f10c8128240aL,0x9332efc4ff2bfd8dL, + 0x214e77a0bd35aa31L }, + { 0x32896d7314faa40eL,0x767867ec01e5f186L,0xc9adf8f117a1813eL, + 0xcb6cda7854741795L } }, + /* 25 << 161 */ + { { 0xb7521b6d349d51aaL,0xf56b5a9ee3c7b8e9L,0xc6f1e5c932a096dfL, + 0x083667c4a3635024L }, + { 0x365ea13518087f2fL,0xf1b8eaacd136e45dL,0xc8a0e48473aec989L, + 0xd75a324b142c9259L } }, + /* 26 << 161 */ + { { 0xb7b4d00101dae185L,0x45434e0b9b7a94bcL,0xf54339affbd8cb0bL, + 0xdcc4569ee98ef49eL }, + { 0x7789318a09a51299L,0x81b4d206b2b025d8L,0xf64aa418fae85792L, + 0x3e50258facd7baf7L } }, + /* 27 << 161 */ + { { 0xdce84cdb2996864bL,0xa2e670891f485fa4L,0xb28b2bb6534c6a5aL, + 0x31a7ec6bc94b9d39L }, + { 0x1d217766d6bc20daL,0x4acdb5ec86761190L,0x6872632873701063L, + 0x4d24ee7c2128c29bL } }, + /* 28 << 161 */ + { { 0xc072ebd3a19fd868L,0x612e481cdb8ddd3bL,0xb4e1d7541a64d852L, + 0x00ef95acc4c6c4abL }, + { 0x1536d2edaa0a6c46L,0x6129408643774790L,0x54af25e8343fda10L, + 0x9ff9d98dfd25d6f2L } }, + /* 29 << 161 */ + { { 0x0746af7c468b8835L,0x977a31cb730ecea7L,0xa5096b80c2cf4a81L, + 0xaa9868336458c37aL }, + { 0x6af29bf3a6bd9d34L,0x6a62fe9b33c5d854L,0x50e6c304b7133b5eL, + 0x04b601597d6e6848L } }, + /* 30 << 161 */ + { { 0x4cd296df5579bea4L,0x10e35ac85ceedaf1L,0x04c4c5fde3bcc5b1L, + 0x95f9ee8a89412cf9L }, + { 0x2c9459ee82b6eb0fL,0x2e84576595c2aaddL,0x774a84aed327fcfeL, + 0xd8c937220368d476L } }, + /* 31 << 161 */ + { { 0x0dbd5748f83e8a3bL,0xa579aa968d2495f3L,0x535996a0ae496e9bL, + 0x07afbfe9b7f9bcc2L }, + { 0x3ac1dc6d5b7bd293L,0x3b592cff7022323dL,0xba0deb989c0a3e76L, + 0x18e78e9f4b197acbL } }, + /* 32 << 161 */ + { { 0x211cde10296c36efL,0x7ee8967282c4da77L,0xb617d270a57836daL, + 0xf0cd9c319cb7560bL }, + { 0x01fdcbf7e455fe90L,0x3fb53cbb7e7334f3L,0x781e2ea44e7de4ecL, + 0x8adab3ad0b384fd0L } }, + /* 33 << 161 */ + { { 0x129eee2f53d64829L,0x7a471e17a261492bL,0xe4f9adb9e4cb4a2cL, + 0x3d359f6f97ba2c2dL }, + { 0x346c67860aacd697L,0x92b444c375c2f8a8L,0xc79fa117d85df44eL, + 0x56782372398ddf31L } }, + /* 34 << 161 */ + { { 0x60e690f2bbbab3b8L,0x4851f8ae8b04816bL,0xc72046ab9c92e4d2L, + 0x518c74a17cf3136bL }, + { 0xff4eb50af9877d4cL,0x14578d90a919cabbL,0x8218f8c4ac5eb2b6L, + 0xa3ccc547542016e4L } }, + /* 35 << 161 */ + { { 0x025bf48e327f8349L,0xf3e97346f43cb641L,0xdc2bafdf500f1085L, + 0x571678762f063055L }, + { 0x5bd914b9411925a6L,0x7c078d48a1123de5L,0xee6bf835182b165dL, + 0xb11b5e5bba519727L } }, + /* 36 << 161 */ + { { 0xe33ea76c1eea7b85L,0x2352b46192d4f85eL,0xf101d334afe115bbL, + 0xfabc1294889175a3L }, + { 0x7f6bcdc05233f925L,0xe0a802dbe77fec55L,0xbdb47b758069b659L, + 0x1c5e12def98fbd74L } }, + /* 37 << 161 */ + { { 0x869c58c64b8457eeL,0xa5360f694f7ea9f7L,0xe576c09ff460b38fL, + 0x6b70d54822b7fb36L }, + { 0x3fd237f13bfae315L,0x33797852cbdff369L,0x97df25f525b516f9L, + 0x46f388f2ba38ad2dL } }, + /* 38 << 161 */ + { { 0x656c465889d8ddbbL,0x8830b26e70f38ee8L,0x4320fd5cde1212b0L, + 0xc34f30cfe4a2edb2L }, + { 0xabb131a356ab64b8L,0x7f77f0ccd99c5d26L,0x66856a37bf981d94L, + 0x19e76d09738bd76eL } }, + /* 39 << 161 */ + { { 0xe76c8ac396238f39L,0xc0a482bea830b366L,0xb7b8eaff0b4eb499L, + 0x8ecd83bc4bfb4865L }, + { 0x971b2cb7a2f3776fL,0xb42176a4f4b88adfL,0xb9617df5be1fa446L, + 0x8b32d508cd031bd2L } }, + /* 40 << 161 */ + { { 0x1c6bd47d53b618c0L,0xc424f46c6a227923L,0x7303ffdedd92d964L, + 0xe971287871b5abf2L }, + { 0x8f48a632f815561dL,0x85f48ff5d3c055d1L,0x222a14277525684fL, + 0xd0d841a067360cc3L } }, + /* 41 << 161 */ + { { 0x4245a9260b9267c6L,0xc78913f1cf07f863L,0xaa844c8e4d0d9e24L, + 0xa42ad5223d5f9017L }, + { 0xbd371749a2c989d5L,0x928292dfe1f5e78eL,0x493b383e0a1ea6daL, + 0x5136fd8d13aee529L } }, + /* 42 << 161 */ + { { 0x860c44b1f2c34a99L,0x3b00aca4bf5855acL,0xabf6aaa0faaf37beL, + 0x65f436822a53ec08L }, + { 0x1d9a5801a11b12e1L,0x78a7ab2ce20ed475L,0x0de1067e9a41e0d5L, + 0x30473f5f305023eaL } }, + /* 43 << 161 */ + { { 0xdd3ae09d169c7d97L,0x5cd5baa4cfaef9cdL,0x5cd7440b65a44803L, + 0xdc13966a47f364deL }, + { 0x077b2be82b8357c1L,0x0cb1b4c5e9d57c2aL,0x7a4ceb3205ff363eL, + 0xf310fa4dca35a9efL } }, + /* 44 << 161 */ + { { 0xdbb7b352f97f68c6L,0x0c773b500b02cf58L,0xea2e48213c1f96d9L, + 0xffb357b0eee01815L }, + { 0xb9c924cde0f28039L,0x0b36c95a46a3fbe4L,0x1faaaea45e46db6cL, + 0xcae575c31928aaffL } }, + /* 45 << 161 */ + { { 0x7f671302a70dab86L,0xfcbd12a971c58cfcL,0xcbef9acfbee0cb92L, + 0x573da0b9f8c1b583L }, + { 0x4752fcfe0d41d550L,0xe7eec0e32155cffeL,0x0fc39fcb545ae248L, + 0x522cb8d18065f44eL } }, + /* 46 << 161 */ + { { 0x263c962a70cbb96cL,0xe034362abcd124a9L,0xf120db283c2ae58dL, + 0xb9a38d49fef6d507L }, + { 0xb1fd2a821ff140fdL,0xbd162f3020aee7e0L,0x4e17a5d4cb251949L, + 0x2aebcb834f7e1c3dL } }, + /* 47 << 161 */ + { { 0x608eb25f937b0527L,0xf42e1e47eb7d9997L,0xeba699c4b8a53a29L, + 0x1f921c71e091b536L }, + { 0xcce29e7b5b26bbd5L,0x7a8ef5ed3b61a680L,0xe5ef8043ba1f1c7eL, + 0x16ea821718158ddaL } }, + /* 48 << 161 */ + { { 0x01778a2b599ff0f9L,0x68a923d78104fc6bL,0x5bfa44dfda694ff3L, + 0x4f7199dbf7667f12L }, + { 0xc06d8ff6e46f2a79L,0x08b5deade9f8131dL,0x02519a59abb4ce7cL, + 0xc4f710bcb42aec3eL } }, + /* 49 << 161 */ + { { 0x3d77b05778bde41aL,0x6474bf80b4186b5aL,0x048b3f6788c65741L, + 0xc64519de03c7c154L }, + { 0xdf0738460edfcc4fL,0x319aa73748f1aa6bL,0x8b9f8a02ca909f77L, + 0x902581397580bfefL } }, + /* 50 << 161 */ + { { 0xd8bfd3cac0c22719L,0xc60209e4c9ca151eL,0x7a744ab5d9a1a69cL, + 0x6de5048b14937f8fL }, + { 0x171938d8e115ac04L,0x7df709401c6b16d2L,0xa6aeb6637f8e94e7L, + 0xc130388e2a2cf094L } }, + /* 51 << 161 */ + { { 0x1850be8477f54e6eL,0x9f258a7265d60fe5L,0xff7ff0c06c9146d6L, + 0x039aaf90e63a830bL }, + { 0x38f27a739460342fL,0x4703148c3f795f8aL,0x1bb5467b9681a97eL, + 0x00931ba5ecaeb594L } }, + /* 52 << 161 */ + { { 0xcdb6719d786f337cL,0xd9c01cd2e704397dL,0x0f4a3f20555c2fefL, + 0x004525097c0af223L }, + { 0x54a5804784db8e76L,0x3bacf1aa93c8aa06L,0x11ca957cf7919422L, + 0x5064105378cdaa40L } }, + /* 53 << 161 */ + { { 0x7a3038749f7144aeL,0x170c963f43d4acfdL,0x5e14814958ddd3efL, + 0xa7bde5829e72dba8L }, + { 0x0769da8b6fa68750L,0xfa64e532572e0249L,0xfcaadf9d2619ad31L, + 0x87882daaa7b349cdL } }, + /* 54 << 161 */ + { { 0x9f6eb7316c67a775L,0xcb10471aefc5d0b1L,0xb433750ce1b806b2L, + 0x19c5714d57b1ae7eL }, + { 0xc0dc8b7bed03fd3fL,0xdd03344f31bc194eL,0xa66c52a78c6320b5L, + 0x8bc82ce3d0b6fd93L } }, + /* 55 << 161 */ + { { 0xf8e13501b35f1341L,0xe53156dd25a43e42L,0xd3adf27e4daeb85cL, + 0xb81d8379bbeddeb5L }, + { 0x1b0b546e2e435867L,0x9020eb94eba5dd60L,0x37d911618210cb9dL, + 0x4c596b315c91f1cfL } }, + /* 56 << 161 */ + { { 0xb228a90f0e0b040dL,0xbaf02d8245ff897fL,0x2aac79e600fa6122L, + 0x248288178e36f557L }, + { 0xb9521d31113ec356L,0x9e48861e15eff1f8L,0x2aa1d412e0d41715L, + 0x71f8620353f131b8L } }, + /* 57 << 161 */ + { { 0xf60da8da3fd19408L,0x4aa716dc278d9d99L,0x394531f7a8c51c90L, + 0xb560b0e8f59db51cL }, + { 0xa28fc992fa34bdadL,0xf024fa149cd4f8bdL,0x5cf530f723a9d0d3L, + 0x615ca193e28c9b56L } }, + /* 58 << 161 */ + { { 0x6d2a483d6f73c51eL,0xa4cb2412ea0dc2ddL,0x50663c411eb917ffL, + 0x3d3a74cfeade299eL }, + { 0x29b3990f4a7a9202L,0xa9bccf59a7b15c3dL,0x66a3ccdca5df9208L, + 0x48027c1443f2f929L } }, + /* 59 << 161 */ + { { 0xd385377c40b557f0L,0xe001c366cd684660L,0x1b18ed6be2183a27L, + 0x879738d863210329L }, + { 0xa687c74bbda94882L,0xd1bbcc48a684b299L,0xaf6f1112863b3724L, + 0x6943d1b42c8ce9f8L } }, + /* 60 << 161 */ + { { 0xe044a3bb098cafb4L,0x27ed231060d48cafL,0x542b56753a31b84dL, + 0xcbf3dd50fcddbed7L }, + { 0x25031f1641b1d830L,0xa7ec851dcb0c1e27L,0xac1c8fe0b5ae75dbL, + 0xb24c755708c52120L } }, + /* 61 << 161 */ + { { 0x57f811dc1d4636c3L,0xf8436526681a9939L,0x1f6bc6d99c81adb3L, + 0x840f8ac35b7d80d4L }, + { 0x731a9811f4387f1aL,0x7c501cd3b5156880L,0xa5ca4a07dfe68867L, + 0xf123d8f05fcea120L } }, + /* 62 << 161 */ + { { 0x1fbb0e71d607039eL,0x2b70e215cd3a4546L,0x32d2f01d53324091L, + 0xb796ff08180ab19bL }, + { 0x32d87a863c57c4aaL,0x2aed9cafb7c49a27L,0x9fb35eac31630d98L, + 0x338e8cdf5c3e20a3L } }, + /* 63 << 161 */ + { { 0x80f1618266cde8dbL,0x4e1599802d72fd36L,0xd7b8f13b9b6e5072L, + 0xf52139073b7b5dc1L }, + { 0x4d431f1d8ce4396eL,0x37a1a680a7ed2142L,0xbf375696d01aaf6bL, + 0xaa1c0c54e63aab66L } }, + /* 64 << 161 */ + { { 0x3014368b4ed80940L,0x67e6d0567a6fceddL,0x7c208c49ca97579fL, + 0xfe3d7a81a23597f6L }, + { 0x5e2032027e096ae2L,0xb1f3e1e724b39366L,0x26da26f32fdcdffcL, + 0x79422f1d6097be83L } }, + /* 0 << 168 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 168 */ + { { 0x263a2cfb9db3b381L,0x9c3a2deed4df0a4bL,0x728d06e97d04e61fL, + 0x8b1adfbc42449325L }, + { 0x6ec1d9397e053a1bL,0xee2be5c766daf707L,0x80ba1e14810ac7abL, + 0xdd2ae778f530f174L } }, + /* 2 << 168 */ + { { 0x0435d97a205b9d8bL,0x6eb8f064056756d4L,0xd5e88a8bb6f8210eL, + 0x070ef12dec9fd9eaL }, + { 0x4d8495053bcc876aL,0x12a75338a7404ce3L,0xd22b49e1b8a1db5eL, + 0xec1f205114bfa5adL } }, + /* 3 << 168 */ + { { 0xadbaeb79b6828f36L,0x9d7a025801bd5b9eL,0xeda01e0d1e844b0cL, + 0x4b625175887edfc9L }, + { 0x14109fdd9669b621L,0x88a2ca56f6f87b98L,0xfe2eb788170df6bcL, + 0x0cea06f4ffa473f9L } }, + /* 4 << 168 */ + { { 0x43ed81b5c4e83d33L,0xd9f358795efd488bL,0x164a620f9deb4d0fL, + 0xc6927bdbac6a7394L }, + { 0x45c28df79f9e0f03L,0x2868661efcd7e1a9L,0x7cf4e8d0ffa348f1L, + 0x6bd4c284398538e0L } }, + /* 5 << 168 */ + { { 0x2618a091289a8619L,0xef796e606671b173L,0x664e46e59090c632L, + 0xa38062d41e66f8fbL }, + { 0x6c744a200573274eL,0xd07b67e4a9271394L,0x391223b26bdc0e20L, + 0xbe2d93f1eb0a05a7L } }, + /* 6 << 168 */ + { { 0xf23e2e533f36d141L,0xe84bb3d44dfca442L,0xb804a48d6b7c023aL, + 0x1e16a8fa76431c3bL }, + { 0x1b5452adddd472e0L,0x7d405ee70d1ee127L,0x50fc6f1dffa27599L, + 0x351ac53cbf391b35L } }, + /* 7 << 168 */ + { { 0x7efa14b84444896bL,0x64974d2ff94027fbL,0xefdcd0e8de84487dL, + 0x8c45b2602b48989bL }, + { 0xa8fcbbc2d8463487L,0xd1b2b3f73fbc476cL,0x21d005b7c8f443c0L, + 0x518f2e6740c0139cL } }, + /* 8 << 168 */ + { { 0x56036e8c06d75fc1L,0x2dcf7bb73249a89fL,0x81dd1d3de245e7ddL, + 0xf578dc4bebd6e2a7L }, + { 0x4c028903df2ce7a0L,0xaee362889c39afacL,0xdc847c31146404abL, + 0x6304c0d8a4e97818L } }, + /* 9 << 168 */ + { { 0xae51dca2a91f6791L,0x2abe41909baa9efcL,0xd9d2e2f4559c7ac1L, + 0xe82f4b51fc9f773aL }, + { 0xa77130274073e81cL,0xc0276facfbb596fcL,0x1d819fc9a684f70cL, + 0x29b47fddc9f7b1e0L } }, + /* 10 << 168 */ + { { 0x358de103459b1940L,0xec881c595b013e93L,0x51574c9349532ad3L, + 0x2db1d445b37b46deL }, + { 0xc6445b87df239fd8L,0xc718af75151d24eeL,0xaea1c4a4f43c6259L, + 0x40c0e5d770be02f7L } }, + /* 11 << 168 */ + { { 0x6a4590f4721b33f2L,0x2124f1fbfedf04eaL,0xf8e53cde9745efe7L, + 0xe7e1043265f046d9L }, + { 0xc3fca28ee4d0c7e6L,0x847e339a87253b1bL,0x9b5953483743e643L, + 0xcb6a0a0b4fd12fc5L } }, + /* 12 << 168 */ + { { 0xfb6836c327d02dccL,0x5ad009827a68bcc2L,0x1b24b44c005e912dL, + 0xcc83d20f811fdcfeL }, + { 0x36527ec1666fba0cL,0x6994819714754635L,0xfcdcb1a8556da9c2L, + 0xa593426781a732b2L } }, + /* 13 << 168 */ + { { 0xec1214eda714181dL,0x609ac13b6067b341L,0xff4b4c97a545df1fL, + 0xa124050134d2076bL }, + { 0x6efa0c231409ca97L,0x254cc1a820638c43L,0xd4e363afdcfb46cdL, + 0x62c2adc303942a27L } }, + /* 14 << 168 */ + { { 0xc67b9df056e46483L,0xa55abb2063736356L,0xab93c098c551bc52L, + 0x382b49f9b15fe64bL }, + { 0x9ec221ad4dff8d47L,0x79caf615437df4d6L,0x5f13dc64bb456509L, + 0xe4c589d9191f0714L } }, + /* 15 << 168 */ + { { 0x27b6a8ab3fd40e09L,0xe455842e77313ea9L,0x8b51d1e21f55988bL, + 0x5716dd73062bbbfcL }, + { 0x633c11e54e8bf3deL,0x9a0e77b61b85be3bL,0x565107290911cca6L, + 0x27e76495efa6590fL } }, + /* 16 << 168 */ + { { 0xe4ac8b33070d3aabL,0x2643672b9a2cd5e5L,0x52eff79b1cfc9173L, + 0x665ca49b90a7c13fL }, + { 0x5a8dda59b3efb998L,0x8a5b922d052f1341L,0xae9ebbab3cf9a530L, + 0x35986e7bf56da4d7L } }, + /* 17 << 168 */ + { { 0x3a636b5cff3513ccL,0xbb0cf8ba3198f7ddL,0xb8d4052241f16f86L, + 0x760575d8de13a7bfL }, + { 0x36f74e169f7aa181L,0x163a3ecff509ed1cL,0x6aead61f3c40a491L, + 0x158c95fcdfe8fcaaL } }, + /* 18 << 168 */ + { { 0xa3991b6e13cda46fL,0x79482415342faed0L,0xf3ba5bde666b5970L, + 0x1d52e6bcb26ab6ddL }, + { 0x768ba1e78608dd3dL,0x4930db2aea076586L,0xd9575714e7dc1afaL, + 0x1fc7bf7df7c58817L } }, + /* 19 << 168 */ + { { 0x6b47accdd9eee96cL,0x0ca277fbe58cec37L,0x113fe413e702c42aL, + 0xdd1764eec47cbe51L }, + { 0x041e7cde7b3ed739L,0x50cb74595ce9e1c0L,0x355685132925b212L, + 0x7cff95c4001b081cL } }, + /* 20 << 168 */ + { { 0x63ee4cbd8088b454L,0xdb7f32f79a9e0c8aL,0xb377d4186b2447cbL, + 0xe3e982aad370219bL }, + { 0x06ccc1e4c2a2a593L,0x72c368650773f24fL,0xa13b4da795859423L, + 0x8bbf1d3375040c8fL } }, + /* 21 << 168 */ + { { 0x726f0973da50c991L,0x48afcd5b822d6ee2L,0xe5fc718b20fd7771L, + 0xb9e8e77dfd0807a1L }, + { 0x7f5e0f4499a7703dL,0x6972930e618e36f3L,0x2b7c77b823807bbeL, + 0xe5b82405cb27ff50L } }, + /* 22 << 168 */ + { { 0xba8b8be3bd379062L,0xd64b7a1d2dce4a92L,0x040a73c5b2952e37L, + 0x0a9e252ed438aecaL }, + { 0xdd43956bc39d3bcbL,0x1a31ca00b32b2d63L,0xd67133b85c417a18L, + 0xd08e47902ef442c8L } }, + /* 23 << 168 */ + { { 0x98cb1ae9255c0980L,0x4bd863812b4a739fL,0x5a5c31e11e4a45a1L, + 0x1e5d55fe9cb0db2fL }, + { 0x74661b068ff5cc29L,0x026b389f0eb8a4f4L,0x536b21a458848c24L, + 0x2e5bf8ec81dc72b0L } }, + /* 24 << 168 */ + { { 0x03c187d0ad886aacL,0x5c16878ab771b645L,0xb07dfc6fc74045abL, + 0x2c6360bf7800caedL }, + { 0x24295bb5b9c972a3L,0xc9e6f88e7c9a6dbaL,0x90ffbf2492a79aa6L, + 0xde29d50a41c26ac2L } }, + /* 25 << 168 */ + { { 0x9f0af483d309cbe6L,0x5b020d8ae0bced4fL,0x606e986db38023e3L, + 0xad8f2c9d1abc6933L }, + { 0x19292e1de7400e93L,0xfe3e18a952be5e4dL,0xe8e9771d2e0680bfL, + 0x8c5bec98c54db063L } }, + /* 26 << 168 */ + { { 0x2af9662a74a55d1fL,0xe3fbf28f046f66d8L,0xa3a72ab4d4dc4794L, + 0x09779f455c7c2dd8L }, + { 0xd893bdafc3d19d8dL,0xd5a7509457d6a6dfL,0x8cf8fef9952e6255L, + 0x3da67cfbda9a8affL } }, + /* 27 << 168 */ + { { 0x4c23f62a2c160dcdL,0x34e6c5e38f90eaefL,0x35865519a9a65d5aL, + 0x07c48aae8fd38a3dL }, + { 0xb7e7aeda50068527L,0x2c09ef231c90936aL,0x31ecfeb6e879324cL, + 0xa0871f6bfb0ec938L } }, + /* 28 << 168 */ + { { 0xb1f0fb68d84d835dL,0xc90caf39861dc1e6L,0x12e5b0467594f8d7L, + 0x26897ae265012b92L }, + { 0xbcf68a08a4d6755dL,0x403ee41c0991fbdaL,0x733e343e3bbf17e8L, + 0xd2c7980d679b3d65L } }, + /* 29 << 168 */ + { { 0x33056232d2e11305L,0x966be492f3c07a6fL,0x6a8878ffbb15509dL, + 0xff2211010a9b59a4L }, + { 0x6c9f564aabe30129L,0xc6f2c940336e64cfL,0x0fe752628b0c8022L, + 0xbe0267e96ae8db87L } }, + /* 30 << 168 */ + { { 0x22e192f193bc042bL,0xf085b534b237c458L,0xa0d192bd832c4168L, + 0x7a76e9e3bdf6271dL }, + { 0x52a882fab88911b5L,0xc85345e4b4db0eb5L,0xa3be02a681a7c3ffL, + 0x51889c8cf0ec0469L } }, + /* 31 << 168 */ + { { 0x9d031369a5e829e5L,0xcbb4c6fc1607aa41L,0x75ac59a6241d84c1L, + 0xc043f2bf8829e0eeL }, + { 0x82a38f758ea5e185L,0x8bda40b9d87cbd9fL,0x9e65e75e2d8fc601L, + 0x3d515f74a35690b3L } }, + /* 32 << 168 */ + { { 0x534acf4fda79e5acL,0x68b83b3a8630215fL,0x5c748b2ed085756eL, + 0xb0317258e5d37cb2L }, + { 0x6735841ac5ccc2c4L,0x7d7dc96b3d9d5069L,0xa147e410fd1754bdL, + 0x65296e94d399ddd5L } }, + /* 33 << 168 */ + { { 0xf6b5b2d0bc8fa5bcL,0x8a5ead67500c277bL,0x214625e6dfa08a5dL, + 0x51fdfedc959cf047L }, + { 0x6bc9430b289fca32L,0xe36ff0cf9d9bdc3fL,0x2fe187cb58ea0edeL, + 0xed66af205a900b3fL } }, + /* 34 << 168 */ + { { 0x00e0968b5fa9f4d6L,0x2d4066ce37a362e7L,0xa99a9748bd07e772L, + 0x710989c006a4f1d0L }, + { 0xd5dedf35ce40cbd8L,0xab55c5f01743293dL,0x766f11448aa24e2cL, + 0x94d874f8605fbcb4L } }, + /* 35 << 168 */ + { { 0xa365f0e8a518001bL,0xee605eb69d04ef0fL,0x5a3915cdba8d4d25L, + 0x44c0e1b8b5113472L }, + { 0xcbb024e88b6740dcL,0x89087a53ee1d4f0cL,0xa88fa05c1fc4e372L, + 0x8bf395cbaf8b3af2L } }, + /* 36 << 168 */ + { { 0x1e71c9a1deb8568bL,0xa35daea080fb3d32L,0xe8b6f2662cf8fb81L, + 0x6d51afe89490696aL }, + { 0x81beac6e51803a19L,0xe3d24b7f86219080L,0x727cfd9ddf6f463cL, + 0x8c6865ca72284ee8L } }, + /* 37 << 168 */ + { { 0x32c88b7db743f4efL,0x3793909be7d11dceL,0xd398f9222ff2ebe8L, + 0x2c70ca44e5e49796L }, + { 0xdf4d9929cb1131b1L,0x7826f29825888e79L,0x4d3a112cf1d8740aL, + 0x00384cb6270afa8bL } }, + /* 38 << 168 */ + { { 0xcb64125b3ab48095L,0x3451c25662d05106L,0xd73d577da4955845L, + 0x39570c16bf9f4433L }, + { 0xd7dfaad3adecf263L,0xf1c3d8d1dc76e102L,0x5e774a5854c6a836L, + 0xdad4b6723e92d47bL } }, + /* 39 << 168 */ + { { 0xbe7e990ff0d796a0L,0x5fc62478df0e8b02L,0x8aae8bf4030c00adL, + 0x3d2db93b9004ba0fL }, + { 0xe48c8a79d85d5ddcL,0xe907caa76bb07f34L,0x58db343aa39eaed5L, + 0x0ea6e007adaf5724L } }, + /* 40 << 168 */ + { { 0xe00df169d23233f3L,0x3e32279677cb637fL,0x1f897c0e1da0cf6cL, + 0xa651f5d831d6bbddL }, + { 0xdd61af191a230c76L,0xbd527272cdaa5e4aL,0xca753636d0abcd7eL, + 0x78bdd37c370bd8dcL } }, + /* 41 << 168 */ + { { 0xc23916c217cd93feL,0x65b97a4ddadce6e2L,0xe04ed4eb174e42f8L, + 0x1491ccaabb21480aL }, + { 0x145a828023196332L,0x3c3862d7587b479aL,0x9f4a88a301dcd0edL, + 0x4da2b7ef3ea12f1fL } }, + /* 42 << 168 */ + { { 0xf8e7ae33b126e48eL,0x404a0b32f494e237L,0x9beac474c55acadbL, + 0x4ee5cf3bcbec9fd9L }, + { 0x336b33b97df3c8c3L,0xbd905fe3b76808fdL,0x8f436981aa45c16aL, + 0x255c5bfa3dd27b62L } }, + /* 43 << 168 */ + { { 0x71965cbfc3dd9b4dL,0xce23edbffc068a87L,0xb78d4725745b029bL, + 0x74610713cefdd9bdL }, + { 0x7116f75f1266bf52L,0x0204672218e49bb6L,0xdf43df9f3d6f19e3L, + 0xef1bc7d0e685cb2fL } }, + /* 44 << 168 */ + { { 0xcddb27c17078c432L,0xe1961b9cb77fedb7L,0x1edc2f5cc2290570L, + 0x2c3fefca19cbd886L }, + { 0xcf880a36c2af389aL,0x96c610fdbda71ceaL,0xf03977a932aa8463L, + 0x8eb7763f8586d90aL } }, + /* 45 << 168 */ + { { 0x3f3424542a296e77L,0xc871868342837a35L,0x7dc710906a09c731L, + 0x54778ffb51b816dbL }, + { 0x6b33bfecaf06defdL,0xfe3c105f8592b70bL,0xf937fda461da6114L, + 0x3c13e6514c266ad7L } }, + /* 46 << 168 */ + { { 0xe363a829855938e8L,0x2eeb5d9e9de54b72L,0xbeb93b0e20ccfab9L, + 0x3dffbb5f25e61a25L }, + { 0x7f655e431acc093dL,0x0cb6cc3d3964ce61L,0x6ab283a1e5e9b460L, + 0x55d787c5a1c7e72dL } }, + /* 47 << 168 */ + { { 0x4d2efd47deadbf02L,0x11e80219ac459068L,0x810c762671f311f0L, + 0xfa17ef8d4ab6ef53L }, + { 0xaf47fd2593e43bffL,0x5cb5ff3f0be40632L,0x546871068ee61da3L, + 0x7764196eb08afd0fL } }, + /* 48 << 168 */ + { { 0x831ab3edf0290a8fL,0xcae81966cb47c387L,0xaad7dece184efb4fL, + 0xdcfc53b34749110eL }, + { 0x6698f23c4cb632f9L,0xc42a1ad6b91f8067L,0xb116a81d6284180aL, + 0xebedf5f8e901326fL } }, + /* 49 << 168 */ + { { 0xf2274c9f97e3e044L,0x4201852011d09fc9L,0x56a65f17d18e6e23L, + 0x2ea61e2a352b683cL }, + { 0x27d291bc575eaa94L,0x9e7bc721b8ff522dL,0x5f7268bfa7f04d6fL, + 0x5868c73faba41748L } }, + /* 50 << 168 */ + { { 0x9f85c2db7be0eeadL,0x511e7842ff719135L,0x5a06b1e9c5ea90d7L, + 0x0c19e28326fab631L }, + { 0x8af8f0cfe9206c55L,0x89389cb43553c06aL,0x39dbed97f65f8004L, + 0x0621b037c508991dL } }, + /* 51 << 168 */ + { { 0x1c52e63596e78cc4L,0x5385c8b20c06b4a8L,0xd84ddfdbb0e87d03L, + 0xc49dfb66934bafadL }, + { 0x7071e17059f70772L,0x3a073a843a1db56bL,0x034949033b8af190L, + 0x7d882de3d32920f0L } }, + /* 52 << 168 */ + { { 0x91633f0ab2cf8940L,0x72b0b1786f948f51L,0x2d28dc30782653c8L, + 0x88829849db903a05L }, + { 0xb8095d0c6a19d2bbL,0x4b9e7f0c86f782cbL,0x7af739882d907064L, + 0xd12be0fe8b32643cL } }, + /* 53 << 168 */ + { { 0x358ed23d0e165dc3L,0x3d47ce624e2378ceL,0x7e2bb0b9feb8a087L, + 0x3246e8aee29e10b9L }, + { 0x459f4ec703ce2b4dL,0xe9b4ca1bbbc077cfL,0x2613b4f20e9940c1L, + 0xfc598bb9047d1eb1L } }, + /* 54 << 168 */ + { { 0x9744c62b45036099L,0xa9dee742167c65d8L,0x0c511525dabe1943L, + 0xda11055493c6c624L }, + { 0xae00a52c651a3be2L,0xcda5111d884449a6L,0x063c06f4ff33bed1L, + 0x73baaf9a0d3d76b4L } }, + /* 55 << 168 */ + { { 0x52fb0c9d7fc63668L,0x6886c9dd0c039cdeL,0x602bd59955b22351L, + 0xb00cab02360c7c13L }, + { 0x8cb616bc81b69442L,0x41486700b55c3ceeL,0x71093281f49ba278L, + 0xad956d9c64a50710L } }, + /* 56 << 168 */ + { { 0x9561f28b638a7e81L,0x54155cdf5980ddc3L,0xb2db4a96d26f247aL, + 0x9d774e4e4787d100L }, + { 0x1a9e6e2e078637d2L,0x1c363e2d5e0ae06aL,0x7493483ee9cfa354L, + 0x76843cb37f74b98dL } }, + /* 57 << 168 */ + { { 0xbaca6591d4b66947L,0xb452ce9804460a8cL,0x6830d24643768f55L, + 0xf4197ed87dff12dfL }, + { 0x6521b472400dd0f7L,0x59f5ca8f4b1e7093L,0x6feff11b080338aeL, + 0x0ada31f6a29ca3c6L } }, + /* 58 << 168 */ + { { 0x24794eb694a2c215L,0xd83a43ab05a57ab4L,0x264a543a2a6f89feL, + 0x2c2a3868dd5ec7c2L }, + { 0xd33739408439d9b2L,0x715ea6720acd1f11L,0x42c1d235e7e6cc19L, + 0x81ce6e96b990585cL } }, + /* 59 << 168 */ + { { 0x04e5dfe0d809c7bdL,0xd7b2580c8f1050abL,0x6d91ad78d8a4176fL, + 0x0af556ee4e2e897cL }, + { 0x162a8b73921de0acL,0x52ac9c227ea78400L,0xee2a4eeaefce2174L, + 0xbe61844e6d637f79L } }, + /* 60 << 168 */ + { { 0x0491f1bc789a283bL,0x72d3ac3d880836f4L,0xaa1c5ea388e5402dL, + 0x1b192421d5cc473dL }, + { 0x5c0b99989dc84cacL,0xb0a8482d9c6e75b8L,0x639961d03a191ce2L, + 0xda3bc8656d837930L } }, + /* 61 << 168 */ + { { 0xca990653056e6f8fL,0x84861c4164d133a7L,0x8b403276746abe40L, + 0xb7b4d51aebf8e303L }, + { 0x05b43211220a255dL,0xc997152c02419e6eL,0x76ff47b6630c2feaL, + 0x50518677281fdadeL } }, + /* 62 << 168 */ + { { 0x3283b8bacf902b0bL,0x8d4b4eb537db303bL,0xcc89f42d755011bcL, + 0xb43d74bbdd09d19bL }, + { 0x65746bc98adba350L,0x364eaf8cb51c1927L,0x13c7659610ad72ecL, + 0x30045121f8d40c20L } }, + /* 63 << 168 */ + { { 0x6d2d99b7ea7b979bL,0xcd78cd74e6fb3bcdL,0x11e45a9e86cffbfeL, + 0x78a61cf4637024f6L }, + { 0xd06bc8723d502295L,0xf1376854458cb288L,0xb9db26a1342f8586L, + 0xf33effcf4beee09eL } }, + /* 64 << 168 */ + { { 0xd7e0c4cdb30cfb3aL,0x6d09b8c16c9db4c8L,0x40ba1a4207c8d9dfL, + 0x6fd495f71c52c66dL }, + { 0xfb0e169f275264daL,0x80c2b746e57d8362L,0xedd987f749ad7222L, + 0xfdc229af4398ec7bL } }, + /* 0 << 175 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 175 */ + { { 0xb0d1ed8452666a58L,0x4bcb6e00e6a9c3c2L,0x3c57411c26906408L, + 0xcfc2075513556400L }, + { 0xa08b1c505294dba3L,0xa30ba2868b7dd31eL,0xd70ba90e991eca74L, + 0x094e142ce762c2b9L } }, + /* 2 << 175 */ + { { 0xb81d783e979f3925L,0x1efd130aaf4c89a7L,0x525c2144fd1bf7faL, + 0x4b2969041b265a9eL }, + { 0xed8e9634b9db65b6L,0x35c82e3203599d8aL,0xdaa7a54f403563f3L, + 0x9df088ad022c38abL } }, + /* 3 << 175 */ + { { 0xe5cfb066bb3fd30aL,0x429169daeff0354eL,0x809cf8523524e36cL, + 0x136f4fb30155be1dL }, + { 0x4826af011fbba712L,0x6ef0f0b4506ba1a1L,0xd9928b3177aea73eL, + 0xe2bf6af25eaa244eL } }, + /* 4 << 175 */ + { { 0x8d084f124237b64bL,0x688ebe99e3ecfd07L,0x57b8a70cf6845dd8L, + 0x808fc59c5da4a325L }, + { 0xa9032b2ba3585862L,0xb66825d5edf29386L,0xb5a5a8db431ec29bL, + 0xbb143a983a1e8dc8L } }, + /* 5 << 175 */ + { { 0x35ee94ce12ae381bL,0x3a7f176c86ccda90L,0xc63a657e4606eacaL, + 0x9ae5a38043cd04dfL }, + { 0x9bec8d15ed251b46L,0x1f5d6d30caca5e64L,0x347b3b359ff20f07L, + 0x4d65f034f7e4b286L } }, + /* 6 << 175 */ + { { 0x9e93ba24f111661eL,0xedced484b105eb04L,0x96dc9ba1f424b578L, + 0xbf8f66b7e83e9069L }, + { 0x872d4df4d7ed8216L,0xbf07f3778e2cbecfL,0x4281d89998e73754L, + 0xfec85fbb8aab8708L } }, + /* 7 << 175 */ + { { 0x9a3c0deea5ba5b0bL,0xe6a116ce42d05299L,0xae9775fee9b02d42L, + 0x72b05200a1545cb6L }, + { 0xbc506f7d31a3b4eaL,0xe58930788bbd9b32L,0xc8bc5f37e4b12a97L, + 0x6b000c064a73b671L } }, + /* 8 << 175 */ + { { 0x13b5bf22765fa7d0L,0x59805bf01d6a5370L,0x67a5e29d4280db98L, + 0x4f53916f776b1ce3L }, + { 0x714ff61f33ddf626L,0x4206238ea085d103L,0x1c50d4b7e5809ee3L, + 0x999f450d85f8eb1dL } }, + /* 9 << 175 */ + { { 0x658a6051e4c79e9bL,0x1394cb73c66a9feaL,0x27f31ed5c6be7b23L, + 0xf4c88f365aa6f8feL }, + { 0x0fb0721f4aaa499eL,0x68b3a7d5e3fb2a6bL,0xa788097d3a92851dL, + 0x060e7f8ae96f4913L } }, + /* 10 << 175 */ + { { 0x82eebe731a3a93bcL,0x42bbf465a21adc1aL,0xc10b6fa4ef030efdL, + 0x247aa4c787b097bbL }, + { 0x8b8dc632f60c77daL,0x6ffbc26ac223523eL,0xa4f6ff11344579cfL, + 0x5825653c980250f6L } }, + /* 11 << 175 */ + { { 0xb2dd097ebc1aa2b9L,0x0788939337a0333aL,0x1cf55e7137a0db38L, + 0x2648487f792c1613L }, + { 0xdad013363fcef261L,0x6239c81d0eabf129L,0x8ee761de9d276be2L, + 0x406a7a341eda6ad3L } }, + /* 12 << 175 */ + { { 0x4bf367ba4a493b31L,0x54f20a529bf7f026L,0xb696e0629795914bL, + 0xcddab96d8bf236acL }, + { 0x4ff2c70aed25ea13L,0xfa1d09eb81cbbbe7L,0x88fc8c87468544c5L, + 0x847a670d696b3317L } }, + /* 13 << 175 */ + { { 0xf133421e64bcb626L,0xaea638c826dee0b5L,0xd6e7680bb310346cL, + 0xe06f4097d5d4ced3L }, + { 0x099614527512a30bL,0xf3d867fde589a59aL,0x2e73254f52d0c180L, + 0x9063d8a3333c74acL } }, + /* 14 << 175 */ + { { 0xeda6c595d314e7bcL,0x2ee7464b467899edL,0x1cef423c0a1ed5d3L, + 0x217e76ea69cc7613L }, + { 0x27ccce1fe7cda917L,0x12d8016b8a893f16L,0xbcd6de849fc74f6bL, + 0xfa5817e2f3144e61L } }, + /* 15 << 175 */ + { { 0x1f3541640821ee4cL,0x1583eab40bc61992L,0x7490caf61d72879fL, + 0x998ad9f3f76ae7b2L }, + { 0x1e181950a41157f7L,0xa9d7e1e6e8da3a7eL,0x963784eb8426b95fL, + 0x0ee4ed6e542e2a10L } }, + /* 16 << 175 */ + { { 0xb79d4cc5ac751e7bL,0x93f96472fd4211bdL,0x8c72d3d2c8de4fc6L, + 0x7b69cbf5df44f064L }, + { 0x3da90ca2f4bf94e1L,0x1a5325f8f12894e2L,0x0a437f6c7917d60bL, + 0x9be7048696c9cb5dL } }, + /* 17 << 175 */ + { { 0xb4d880bfe1dc5c05L,0xd738addaeebeeb57L,0x6f0119d3df0fe6a3L, + 0x5c686e5566eaaf5aL }, + { 0x9cb10b50dfd0b7ecL,0xbdd0264b6a497c21L,0xfc0935148c546c96L, + 0x58a947fa79dbf42aL } }, + /* 18 << 175 */ + { { 0xc0b48d4e49ccd6d7L,0xff8fb02c88bd5580L,0xc75235e907d473b2L, + 0x4fab1ac5a2188af3L }, + { 0x030fa3bc97576ec0L,0xe8c946e80b7e7d2fL,0x40a5c9cc70305600L, + 0x6d8260a9c8b013b4L } }, + /* 19 << 175 */ + { { 0x0368304f70bba85cL,0xad090da1a4a0d311L,0x7170e8702415eec1L, + 0xbfba35fe8461ea47L }, + { 0x6279019ac1e91938L,0xa47638f31afc415fL,0x36c65cbbbcba0e0fL, + 0x02160efb034e2c48L } }, + /* 20 << 175 */ + { { 0xe6c51073615cd9e4L,0x498ec047f1243c06L,0x3e5a8809b17b3d8cL, + 0x5cd99e610cc565f1L }, + { 0x81e312df7851dafeL,0xf156f5baa79061e2L,0x80d62b71880c590eL, + 0xbec9746f0a39faa1L } }, + /* 21 << 175 */ + { { 0x1d98a9c1c8ed1f7aL,0x09e43bb5a81d5ff2L,0xd5f00f680da0794aL, + 0x412050d9661aa836L }, + { 0xa89f7c4e90747e40L,0x6dc05ebbb62a3686L,0xdf4de847308e3353L, + 0x53868fbb9fb53bb9L } }, + /* 22 << 175 */ + { { 0x2b09d2c3cfdcf7ddL,0x41a9fce3723fcab4L,0x73d905f707f57ca3L, + 0x080f9fb1ac8e1555L }, + { 0x7c088e849ba7a531L,0x07d35586ed9a147fL,0x602846abaf48c336L, + 0x7320fd320ccf0e79L } }, + /* 23 << 175 */ + { { 0xaa780798b18bd1ffL,0x52c2e300afdd2905L,0xf27ea3d6434267cdL, + 0x8b96d16d15605b5fL }, + { 0x7bb310494b45706bL,0xe7f58b8e743d25f8L,0xe9b5e45b87f30076L, + 0xd19448d65d053d5aL } }, + /* 24 << 175 */ + { { 0x1ecc8cb9d3210a04L,0x6bc7d463dafb5269L,0x3e59b10a67c3489fL, + 0x1769788c65641e1bL }, + { 0x8a53b82dbd6cb838L,0x7066d6e6236d5f22L,0x03aa1c616908536eL, + 0xc971da0d66ae9809L } }, + /* 25 << 175 */ + { { 0x01b3a86bc49a2facL,0x3b8420c03092e77aL,0x020573007d6fb556L, + 0x6941b2a1bff40a87L }, + { 0x140b63080658ff2aL,0x878043633424ab36L,0x0253bd515751e299L, + 0xc75bcd76449c3e3aL } }, + /* 26 << 175 */ + { { 0x92eb40907f8f875dL,0x9c9d754e56c26bbfL,0x158cea618110bbe7L, + 0x62a6b802745f91eaL }, + { 0xa79c41aac6e7394bL,0x445b6a83ad57ef10L,0x0c5277eb6ea6f40cL, + 0x319fe96b88633365L } }, + /* 27 << 175 */ + { { 0x0b0fc61f385f63cbL,0x41250c8422bdd127L,0x67d153f109e942c2L, + 0x60920d08c021ad5dL }, + { 0x229f5746724d81a5L,0xb7ffb8925bba3299L,0x518c51a1de413032L, + 0x2a9bfe773c2fd94cL } }, + /* 28 << 175 */ + { { 0xcbcde2393191f4fdL,0x43093e16d3d6ada1L,0x184579f358769606L, + 0x2c94a8b3d236625cL }, + { 0x6922b9c05c437d8eL,0x3d4ae423d8d9f3c8L,0xf72c31c12e7090a2L, + 0x4ac3f5f3d76a55bdL } }, + /* 29 << 175 */ + { { 0x342508fc6b6af991L,0x0d5271001b5cebbdL,0xb84740d0dd440dd7L, + 0x748ef841780162fdL }, + { 0xa8dbfe0edfc6fafbL,0xeadfdf05f7300f27L,0x7d06555ffeba4ec9L, + 0x12c56f839e25fa97L } }, + /* 30 << 175 */ + { { 0x77f84203d39b8c34L,0xed8b1be63125eddbL,0x5bbf2441f6e39dc5L, + 0xb00f6ee66a5d678aL }, + { 0xba456ecf57d0ea99L,0xdcae0f5817e06c43L,0x01643de40f5b4baaL, + 0x2c324341d161b9beL } }, + /* 31 << 175 */ + { { 0x80177f55e126d468L,0xed325f1f76748e09L,0x6116004acfa9bdc2L, + 0x2d8607e63a9fb468L }, + { 0x0e573e276009d660L,0x3a525d2e8d10c5a1L,0xd26cb45c3b9009a0L, + 0xb6b0cdc0de9d7448L } }, + /* 32 << 175 */ + { { 0x949c9976e1337c26L,0x6faadebdd73d68e5L,0x9e158614f1b768d9L, + 0x22dfa5579cc4f069L }, + { 0xccd6da17be93c6d6L,0x24866c61a504f5b9L,0x2121353c8d694da1L, + 0x1c6ca5800140b8c6L } }, + /* 33 << 175 */ + { { 0xc245ad8ce964021eL,0xb83bffba032b82b3L,0xfaa220c647ef9898L, + 0x7e8d3ac6982c948aL }, + { 0x1faa2091bc2d124aL,0xbd54c3dd05b15ff4L,0x386bf3abc87c6fb7L, + 0xfb2b0563fdeb6f66L } }, + /* 34 << 175 */ + { { 0x4e77c5575b45afb4L,0xe9ded649efb8912dL,0x7ec9bbf542f6e557L, + 0x2570dfff62671f00L }, + { 0x2b3bfb7888e084bdL,0xa024b238f37fe5b4L,0x44e7dc0495649aeeL, + 0x498ca2555e7ec1d8L } }, + /* 35 << 175 */ + { { 0x3bc766eaaaa07e86L,0x0db6facbf3608586L,0xbadd2549bdc259c8L, + 0x95af3c6e041c649fL }, + { 0xb36a928c02e30afbL,0x9b5356ad008a88b8L,0x4b67a5f1cf1d9e9dL, + 0xc6542e47a5d8d8ceL } }, + /* 36 << 175 */ + { { 0x73061fe87adfb6ccL,0xcc826fd398678141L,0x00e758b13c80515aL, + 0x6afe324741485083L }, + { 0x0fcb08b9b6ae8a75L,0xb8cf388d4acf51e1L,0x344a55606961b9d6L, + 0x1a6778b86a97fd0cL } }, + /* 37 << 175 */ + { { 0xd840fdc1ecc4c7e3L,0xde9fe47d16db68ccL,0xe95f89dea3e216aaL, + 0x84f1a6a49594a8beL }, + { 0x7ddc7d725a7b162bL,0xc5cfda19adc817a3L,0x80a5d35078b58d46L, + 0x93365b1382978f19L } }, + /* 38 << 175 */ + { { 0x2e44d22526a1fc90L,0x0d6d10d24d70705dL,0xd94b6b10d70c45f4L, + 0x0f201022b216c079L }, + { 0xcec966c5658fde41L,0xa8d2bc7d7e27601dL,0xbfcce3e1ff230be7L, + 0x3394ff6b0033ffb5L } }, + /* 39 << 175 */ + { { 0xd890c5098132c9afL,0xaac4b0eb361e7868L,0x5194ded3e82d15aaL, + 0x4550bd2e23ae6b7dL }, + { 0x3fda318eea5399d4L,0xd989bffa91638b80L,0x5ea124d0a14aa12dL, + 0x1fb1b8993667b944L } }, + /* 40 << 175 */ + { { 0x95ec796944c44d6aL,0x91df144a57e86137L,0x915fd62073adac44L, + 0x8f01732d59a83801L }, + { 0xec579d253aa0a633L,0x06de5e7cc9d6d59cL,0xc132f958b1ef8010L, + 0x29476f96e65c1a02L } }, + /* 41 << 175 */ + { { 0x336a77c0d34c3565L,0xef1105b21b9f1e9eL,0x63e6d08bf9e08002L, + 0x9aff2f21c613809eL }, + { 0xb5754f853a80e75dL,0xde71853e6bbda681L,0x86f041df8197fd7aL, + 0x8b332e08127817faL } }, + /* 42 << 175 */ + { { 0x05d99be8b9c20cdaL,0x89f7aad5d5cd0c98L,0x7ef936fe5bb94183L, + 0x92ca0753b05cd7f2L }, + { 0x9d65db1174a1e035L,0x02628cc813eaea92L,0xf2d9e24249e4fbf2L, + 0x94fdfd9be384f8b7L } }, + /* 43 << 175 */ + { { 0x65f5605463428c6bL,0x2f7205b290b409a5L,0xf778bb78ff45ae11L, + 0xa13045bec5ee53b2L }, + { 0xe00a14ff03ef77feL,0x689cd59fffef8befL,0x3578f0ed1e9ade22L, + 0xe99f3ec06268b6a8L } }, + /* 44 << 175 */ + { { 0xa2057d91ea1b3c3eL,0x2d1a7053b8823a4aL,0xabbb336a2cca451eL, + 0xcd2466e32218bb5dL }, + { 0x3ac1f42fc8cb762dL,0x7e312aae7690211fL,0xebb9bd7345d07450L, + 0x207c4b8246c2213fL } }, + /* 45 << 175 */ + { { 0x99d425c1375913ecL,0x94e45e9667908220L,0xc08f3087cd67dbf6L, + 0xa5670fbec0887056L }, + { 0x6717b64a66f5b8fcL,0xd5a56aea786fec28L,0xa8c3f55fc0ff4952L, + 0xa77fefae457ac49bL } }, + /* 46 << 175 */ + { { 0x29882d7c98379d44L,0xd000bdfb509edc8aL,0xc6f95979e66fe464L, + 0x504a6115fa61bde0L }, + { 0x56b3b871effea31aL,0x2d3de26df0c21a54L,0x21dbff31834753bfL, + 0xe67ecf4969269d86L } }, + /* 47 << 175 */ + { { 0x7a176952151fe690L,0x035158047f2adb5fL,0xee794b15d1b62a8dL, + 0xf004ceecaae454e6L }, + { 0x0897ea7cf0386facL,0x3b62ff12d1fca751L,0x154181df1b7a04ecL, + 0x2008e04afb5847ecL } }, + /* 48 << 175 */ + { { 0xd147148e41dbd772L,0x2b419f7322942654L,0x669f30d3e9c544f7L, + 0x52a2c223c8540149L }, + { 0x5da9ee14634dfb02L,0x5f074ff0f47869f3L,0x74ee878da3933accL, + 0xe65106514fe35ed1L } }, + /* 49 << 175 */ + { { 0xb3eb9482f1012e7aL,0x51013cc0a8a566aeL,0xdd5e924347c00d3bL, + 0x7fde089d946bb0e5L }, + { 0x030754fec731b4b3L,0x12a136a499fda062L,0x7c1064b85a1a35bcL, + 0xbf1f5763446c84efL } }, + /* 50 << 175 */ + { { 0xed29a56da16d4b34L,0x7fba9d09dca21c4fL,0x66d7ac006d8de486L, + 0x6006198773a2a5e1L }, + { 0x8b400f869da28ff0L,0x3133f70843c4599cL,0x9911c9b8ee28cb0dL, + 0xcd7e28748e0af61dL } }, + /* 51 << 175 */ + { { 0x5a85f0f272ed91fcL,0x85214f319cd4a373L,0x881fe5be1925253cL, + 0xd8dc98e091e8bc76L }, + { 0x7120affe585cc3a2L,0x724952ed735bf97aL,0x5581e7dc3eb34581L, + 0x5cbff4f2e52ee57dL } }, + /* 52 << 175 */ + { { 0x8d320a0e87d8cc7bL,0x9beaa7f3f1d280d0L,0x7a0b95719beec704L, + 0x9126332e5b7f0057L }, + { 0x01fbc1b48ed3bd6dL,0x35bb2c12d945eb24L,0x6404694e9a8ae255L, + 0xb6092eec8d6abfb3L } }, + /* 53 << 175 */ + { { 0x4d76143fcc058865L,0x7b0a5af26e249922L,0x8aef94406a50d353L, + 0xe11e4bcc64f0e07aL }, + { 0x4472993aa14a90faL,0x7706e20cba0c51d4L,0xf403292f1532672dL, + 0x52573bfa21829382L } }, + /* 54 << 175 */ + { { 0x6a7bb6a93b5bdb83L,0x08da65c0a4a72318L,0xc58d22aa63eb065fL, + 0x1717596c1b15d685L }, + { 0x112df0d0b266d88bL,0xf688ae975941945aL,0x487386e37c292cacL, + 0x42f3b50d57d6985cL } }, + /* 55 << 175 */ + { { 0x6da4f9986a90fc34L,0xc8f257d365ca8a8dL,0xc2feabca6951f762L, + 0xe1bc81d074c323acL }, + { 0x1bc68f67251a2a12L,0x10d86587be8a70dcL,0xd648af7ff0f84d2eL, + 0xf0aa9ebc6a43ac92L } }, + /* 56 << 175 */ + { { 0x69e3be0427596893L,0xb6bb02a645bf452bL,0x0875c11af4c698c8L, + 0x6652b5c7bece3794L }, + { 0x7b3755fd4f5c0499L,0x6ea16558b5532b38L,0xd1c69889a2e96ef7L, + 0x9c773c3a61ed8f48L } }, + /* 57 << 175 */ + { { 0x2b653a409b323abcL,0xe26605e1f0e1d791L,0x45d410644a87157aL, + 0x8f9a78b7cbbce616L }, + { 0xcf1e44aac407edddL,0x81ddd1d8a35b964fL,0x473e339efd083999L, + 0x6c94bdde8e796802L } }, + /* 58 << 175 */ + { { 0x5a304ada8545d185L,0x82ae44ea738bb8cbL,0x628a35e3df87e10eL, + 0xd3624f3da15b9fe3L }, + { 0xcc44209b14be4254L,0x7d0efcbcbdbc2ea5L,0x1f60336204c37bbeL, + 0x21f363f556a5852cL } }, + /* 59 << 175 */ + { { 0xa1503d1ca8501550L,0x2251e0e1d8ab10bbL,0xde129c966961c51cL, + 0x1f7246a481910f68L }, + { 0x2eb744ee5f2591f2L,0x3c47d33f5e627157L,0x4d6d62c922f3bd68L, + 0x6120a64bcb8df856L } }, + /* 60 << 175 */ + { { 0x3a9ac6c07b5d07dfL,0xa92b95587ef39783L,0xe128a134ab3a9b4fL, + 0x41c18807b1252f05L }, + { 0xfc7ed08980ba9b1cL,0xac8dc6dec532a9ddL,0xbf829cef55246809L, + 0x101b784f5b4ee80fL } }, + /* 61 << 175 */ + { { 0xc09945bbb6f11603L,0x57b09dbe41d2801eL,0xfba5202fa97534a8L, + 0x7fd8ae5fc17b9614L }, + { 0xa50ba66678308435L,0x9572f77cd3868c4dL,0x0cef7bfd2dd7aab0L, + 0xe7958e082c7c79ffL } }, + /* 62 << 175 */ + { { 0x81262e4225346689L,0x716da290b07c7004L,0x35f911eab7950ee3L, + 0x6fd72969261d21b5L }, + { 0x5238980308b640d3L,0x5b0026ee887f12a1L,0x20e21660742e9311L, + 0x0ef6d5415ff77ff7L } }, + /* 63 << 175 */ + { { 0x969127f0f9c41135L,0xf21d60c968a64993L,0x656e5d0ce541875cL, + 0xf1e0f84ea1d3c233L }, + { 0x9bcca35906002d60L,0xbe2da60c06191552L,0x5da8bbae61181ec3L, + 0x9f04b82365806f19L } }, + /* 64 << 175 */ + { { 0xf1604a7dd4b79bb8L,0xaee806fb52c878c8L,0x34144f118d47b8e8L, + 0x72edf52b949f9054L }, + { 0xebfca84e2127015aL,0x9051d0c09cb7cef3L,0x86e8fe58296deec8L, + 0x33b2818841010d74L } }, + /* 0 << 182 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 182 */ + { { 0x01079383171b445fL,0x9bcf21e38131ad4cL,0x8cdfe205c93987e8L, + 0xe63f4152c92e8c8fL }, + { 0x729462a930add43dL,0x62ebb143c980f05aL,0x4f3954e53b06e968L, + 0xfe1d75ad242cf6b1L } }, + /* 2 << 182 */ + { { 0x5f95c6c7af8685c8L,0xd4c1c8ce2f8f01aaL,0xc44bbe322574692aL, + 0xb8003478d4a4a068L }, + { 0x7c8fc6e52eca3cdbL,0xea1db16bec04d399L,0xb05bc82e8f2bc5cfL, + 0x763d517ff44793d2L } }, + /* 3 << 182 */ + { { 0x4451c1b808bd98d0L,0x644b1cd46575f240L,0x6907eb337375d270L, + 0x56c8bebdfa2286bdL }, + { 0xc713d2acc4632b46L,0x17da427aafd60242L,0x313065b7c95c7546L, + 0xf8239898bf17a3deL } }, + /* 4 << 182 */ + { { 0xf3b7963f4c830320L,0x842c7aa0903203e3L,0xaf22ca0ae7327afbL, + 0x38e13092967609b6L }, + { 0x73b8fb62757558f1L,0x3cc3e831f7eca8c1L,0xe4174474f6331627L, + 0xa77989cac3c40234L } }, + /* 5 << 182 */ + { { 0xe5fd17a144a081e0L,0xd797fb7db70e296aL,0x2b472b30481f719cL, + 0x0e632a98fe6f8c52L }, + { 0x89ccd116c5f0c284L,0xf51088af2d987c62L,0x2a2bccda4c2de6cfL, + 0x810f9efef679f0f9L } }, + /* 6 << 182 */ + { { 0xb0f394b97ffe4b3eL,0x0b691d21e5fa5d21L,0xb0bd77479dfbbc75L, + 0xd2830fdafaf78b00L }, + { 0xf78c249c52434f57L,0x4b1f754598096dabL,0x73bf6f948ff8c0b3L, + 0x34aef03d454e134cL } }, + /* 7 << 182 */ + { { 0xf8d151f4b7ac7ec5L,0xd6ceb95ae50da7d5L,0xa1b492b0dc3a0eb8L, + 0x75157b69b3dd2863L }, + { 0xe2c4c74ec5413d62L,0xbe329ff7bc5fc4c7L,0x835a2aea60fa9ddaL, + 0xf117f5ad7445cb87L } }, + /* 8 << 182 */ + { { 0xae8317f4b0166f7aL,0xfbd3e3f7ceec74e6L,0xfdb516ace0874bfdL, + 0x3d846019c681f3a3L }, + { 0x0b12ee5c7c1620b0L,0xba68b4dd2b63c501L,0xac03cd326668c51eL, + 0x2a6279f74e0bcb5bL } }, + /* 9 << 182 */ + { { 0x17bd69b06ae85c10L,0x729469791dfdd3a6L,0xd9a032682c078becL, + 0x41c6a658bfd68a52L }, + { 0xcdea10240e023900L,0xbaeec121b10d144dL,0x5a600e74058ab8dcL, + 0x1333af21bb89ccddL } }, + /* 10 << 182 */ + { { 0xdf25eae03aaba1f1L,0x2cada16e3b7144cfL,0x657ee27d71ab98bcL, + 0x99088b4c7a6fc96eL }, + { 0x05d5c0a03549dbd4L,0x42cbdf8ff158c3acL,0x3fb6b3b087edd685L, + 0x22071cf686f064d0L } }, + /* 11 << 182 */ + { { 0xd2d6721fff2811e5L,0xdb81b703fe7fae8cL,0x3cfb74efd3f1f7bbL, + 0x0cdbcd7616cdeb5dL }, + { 0x4f39642a566a808cL,0x02b74454340064d6L,0xfabbadca0528fa6fL, + 0xe4c3074cd3fc0bb6L } }, + /* 12 << 182 */ + { { 0xb32cb8b0b796d219L,0xc3e95f4f34741dd9L,0x8721212568edf6f5L, + 0x7a03aee4a2b9cb8eL }, + { 0x0cd3c376f53a89aaL,0x0d8af9b1948a28dcL,0xcf86a3f4902ab04fL, + 0x8aacb62a7f42002dL } }, + /* 13 << 182 */ + { { 0x106985ebf62ffd52L,0xe670b54e5797bf10L,0x4b405209c5e30aefL, + 0x12c97a204365b5e9L }, + { 0x104646ce1fe32093L,0x13cb4ff63907a8c9L,0x8b9f30d1d46e726bL, + 0xe1985e21aba0f499L } }, + /* 14 << 182 */ + { { 0xc573dea910a230cdL,0x24f46a93cd30f947L,0xf2623fcfabe2010aL, + 0x3f278cb273f00e4fL }, + { 0xed55c67d50b920ebL,0xf1cb9a2d8e760571L,0x7c50d1090895b709L, + 0x4207cf07190d4369L } }, + /* 15 << 182 */ + { { 0x3b027e81c4127fe1L,0xa9f8b9ad3ae9c566L,0x5ab10851acbfbba5L, + 0xa747d648569556f5L }, + { 0xcc172b5c2ba97bf7L,0x15e0f77dbcfa3324L,0xa345b7977686279dL, + 0x5a723480e38003d3L } }, + /* 16 << 182 */ + { { 0xfd8e139f8f5fcda8L,0xf3e558c4bdee5bfdL,0xd76cbaf4e33f9f77L, + 0x3a4c97a471771969L }, + { 0xda27e84bf6dce6a7L,0xff373d9613e6c2d1L,0xf115193cd759a6e9L, + 0x3f9b702563d2262cL } }, + /* 17 << 182 */ + { { 0xd9764a31317cd062L,0x30779d8e199f8332L,0xd807410616b11b0bL, + 0x7917ab9f78aeaed8L }, + { 0xb67a9cbe28fb1d8eL,0x2e313563136eda33L,0x010b7069a371a86cL, + 0x44d90fa26744e6b7L } }, + /* 18 << 182 */ + { { 0x68190867d6b3e243L,0x9fe6cd9d59048c48L,0xb900b02895731538L, + 0xa012062f32cae04fL }, + { 0x8107c8bc9399d082L,0x47e8c54a41df12e2L,0x14ba5117b6ef3f73L, + 0x22260bea81362f0bL } }, + /* 19 << 182 */ + { { 0x90ea261e1a18cc20L,0x2192999f2321d636L,0xef64d314e311b6a0L, + 0xd7401e4c3b54a1f5L }, + { 0x190199836fbca2baL,0x46ad32938fbffc4bL,0xa142d3f63786bf40L, + 0xeb5cbc26b67039fcL } }, + /* 20 << 182 */ + { { 0x9cb0ae6c252bd479L,0x05e0f88a12b5848fL,0x78f6d2b2a5c97663L, + 0x6f6e149bc162225cL }, + { 0xe602235cde601a89L,0xd17bbe98f373be1fL,0xcaf49a5ba8471827L, + 0x7e1a0a8518aaa116L } }, + /* 21 << 182 */ + { { 0x6c833196270580c3L,0x1e233839f1c98a14L,0x67b2f7b4ae34e0a5L, + 0x47ac8745d8ce7289L }, + { 0x2b74779a100dd467L,0x274a43374ee50d09L,0x603dcf1383608bc9L, + 0xcd9da6c3c89e8388L } }, + /* 22 << 182 */ + { { 0x2660199f355116acL,0xcc38bb59b6d18eedL,0x3075f31f2f4bc071L, + 0x9774457f265dc57eL }, + { 0x06a6a9c8c6db88bbL,0x6429d07f4ec98e04L,0x8d05e57b05ecaa8bL, + 0x20f140b17872ea7bL } }, + /* 23 << 182 */ + { { 0xdf8c0f09ca494693L,0x48d3a020f252e909L,0x4c5c29af57b14b12L, + 0x7e6fa37dbf47ad1cL }, + { 0x66e7b50649a0c938L,0xb72c0d486be5f41fL,0x6a6242b8b2359412L, + 0xcd35c7748e859480L } }, + /* 24 << 182 */ + { { 0x12536fea87baa627L,0x58c1fec1f72aa680L,0x6c29b637601e5dc9L, + 0x9e3c3c1cde9e01b9L }, + { 0xefc8127b2bcfe0b0L,0x351071022a12f50dL,0x6ccd6cb14879b397L, + 0xf792f804f8a82f21L } }, + /* 25 << 182 */ + { { 0x509d4804a9b46402L,0xedddf85dc10f0850L,0x928410dc4b6208aaL, + 0xf6229c46391012dcL }, + { 0xc5a7c41e7727b9b6L,0x289e4e4baa444842L,0x049ba1d9e9a947eaL, + 0x44f9e47f83c8debcL } }, + /* 26 << 182 */ + { { 0xfa77a1fe611f8b8eL,0xfd2e416af518f427L,0xc5fffa70114ebac3L, + 0xfe57c4e95d89697bL }, + { 0xfdd053acb1aaf613L,0x31df210fea585a45L,0x318cc10e24985034L, + 0x1a38efd15f1d6130L } }, + /* 27 << 182 */ + { { 0xbf86f2370b1e9e21L,0xb258514d1dbe88aaL,0x1e38a58890c1baf9L, + 0x2936a01ebdb9b692L }, + { 0xd576de986dd5b20cL,0xb586bf7170f98ecfL,0xcccf0f12c42d2fd7L, + 0x8717e61cfb35bd7bL } }, + /* 28 << 182 */ + { { 0x8b1e572235e6fc06L,0x3477728f0b3e13d5L,0x150c294daa8a7372L, + 0xc0291d433bfa528aL }, + { 0xc6c8bc67cec5a196L,0xdeeb31e45c2e8a7cL,0xba93e244fb6e1c51L, + 0xb9f8b71b2e28e156L } }, + /* 29 << 182 */ + { { 0xce65a287968a2ab9L,0xe3c5ce6946bbcb1fL,0xf8c835b9e7ae3f30L, + 0x16bbee26ff72b82bL }, + { 0x665e2017fd42cd22L,0x1e139970f8b1d2a0L,0x125cda2979204932L, + 0x7aee94a549c3bee5L } }, + /* 30 << 182 */ + { { 0x68c7016089821a66L,0xf7c376788f981669L,0xd90829fc48cc3645L, + 0x346af049d70addfcL }, + { 0x2057b232370bf29cL,0xf90c73ce42e650eeL,0xe03386eaa126ab90L, + 0x0e266e7e975a087bL } }, + /* 31 << 182 */ + { { 0x80578eb90fca65d9L,0x7e2989ea16af45b8L,0x7438212dcac75a4eL, + 0x38c7ca394fef36b8L }, + { 0x8650c494d402676aL,0x26ab5a66f72c7c48L,0x4e6cb426ce3a464eL, + 0xf8f998962b72f841L } }, + /* 32 << 182 */ + { { 0x8c3184911a335cc8L,0x563459ba6a5913e4L,0x1b920d61c7b32919L, + 0x805ab8b6a02425adL }, + { 0x2ac512da8d006086L,0x6ca4846abcf5c0fdL,0xafea51d8ac2138d7L, + 0xcb647545344cd443L } }, + /* 33 << 182 */ + { { 0x0429ee8fbd7d9040L,0xee66a2de819b9c96L,0x54f9ec25dea7d744L, + 0x2ffea642671721bbL }, + { 0x4f19dbd1114344eaL,0x04304536fd0dbc8bL,0x014b50aa29ec7f91L, + 0xb5fc22febb06014dL } }, + /* 34 << 182 */ + { { 0x60d963a91ee682e0L,0xdf48abc0fe85c727L,0x0cadba132e707c2dL, + 0xde608d3aa645aeffL }, + { 0x05f1c28bedafd883L,0x3c362edebd94de1fL,0x8dd0629d13593e41L, + 0x0a5e736f766d6eafL } }, + /* 35 << 182 */ + { { 0xbfa92311f68cf9d1L,0xa4f9ef87c1797556L,0x10d75a1f5601c209L, + 0x651c374c09b07361L }, + { 0x49950b5888b5ceadL,0x0ef000586fa9dbaaL,0xf51ddc264e15f33aL, + 0x1f8b5ca62ef46140L } }, + /* 36 << 182 */ + { { 0x343ac0a3ee9523f0L,0xbb75eab2975ea978L,0x1bccf332107387f4L, + 0x790f92599ab0062eL }, + { 0xf1a363ad1e4f6a5fL,0x06e08b8462519a50L,0x609151877265f1eeL, + 0x6a80ca3493ae985eL } }, + /* 37 << 182 */ + { { 0x81b29768aaba4864L,0xb13cabf28d52a7d6L,0xb5c363488ead03f1L, + 0xc932ad9581c7c1c0L }, + { 0x5452708ecae1e27bL,0x9dac42691b0df648L,0x233e3f0cdfcdb8bcL, + 0xe6ceccdfec540174L } }, + /* 38 << 182 */ + { { 0xbd0d845e95081181L,0xcc8a7920699355d5L,0x111c0f6dc3b375a8L, + 0xfd95bc6bfd51e0dcL }, + { 0x4a106a266888523aL,0x4d142bd6cb01a06dL,0x79bfd289adb9b397L, + 0x0bdbfb94e9863914L } }, + /* 39 << 182 */ + { { 0x29d8a2291660f6a6L,0x7f6abcd6551c042dL,0x13039deb0ac3ffe8L, + 0xa01be628ec8523fbL }, + { 0x6ea341030ca1c328L,0xc74114bdb903928eL,0x8aa4ff4e9e9144b0L, + 0x7064091f7f9a4b17L } }, + /* 40 << 182 */ + { { 0xa3f4f521e447f2c4L,0x81b8da7a604291f0L,0xd680bc467d5926deL, + 0x84f21fd534a1202fL }, + { 0x1d1e31814e9df3d8L,0x1ca4861a39ab8d34L,0x809ddeec5b19aa4aL, + 0x59f72f7e4d329366L } }, + /* 41 << 182 */ + { { 0xa2f93f41386d5087L,0x40bf739cdd67d64fL,0xb449420566702158L, + 0xc33c65be73b1e178L }, + { 0xcdcd657c38ca6153L,0x97f4519adc791976L,0xcc7c7f29cd6e1f39L, + 0x38de9cfb7e3c3932L } }, + /* 42 << 182 */ + { { 0xe448eba37b793f85L,0xe9f8dbf9f067e914L,0xc0390266f114ae87L, + 0x39ed75a7cd6a8e2aL }, + { 0xadb148487ffba390L,0x67f8cb8b6af9bc09L,0x322c38489c7476dbL, + 0xa320fecf52a538d6L } }, + /* 43 << 182 */ + { { 0xe0493002b2aced2bL,0xdfba1809616bd430L,0x531c4644c331be70L, + 0xbc04d32e90d2e450L }, + { 0x1805a0d10f9f142dL,0x2c44a0c547ee5a23L,0x31875a433989b4e3L, + 0x6b1949fd0c063481L } }, + /* 44 << 182 */ + { { 0x2dfb9e08be0f4492L,0x3ff0da03e9d5e517L,0x03dbe9a1f79466a8L, + 0x0b87bcd015ea9932L }, + { 0xeb64fc83ab1f58abL,0x6d9598da817edc8aL,0x699cff661d3b67e5L, + 0x645c0f2992635853L } }, + /* 45 << 182 */ + { { 0x253cdd82eabaf21cL,0x82b9602a2241659eL,0x2cae07ec2d9f7091L, + 0xbe4c720c8b48cd9bL }, + { 0x6ce5bc036f08d6c9L,0x36e8a997af10bf40L,0x83422d213e10ff12L, + 0x7b26d3ebbcc12494L } }, + /* 46 << 182 */ + { { 0xb240d2d0c9469ad6L,0xc4a11b4d30afa05bL,0x4b604acedd6ba286L, + 0x184866003ee2864cL }, + { 0x5869d6ba8d9ce5beL,0x0d8f68c5ff4bfb0dL,0xb69f210b5700cf73L, + 0x61f6653a6d37c135L } }, + /* 47 << 182 */ + { { 0xff3d432b5aff5a48L,0x0d81c4b972ba3a69L,0xee879ae9fa1899efL, + 0xbac7e2a02d6acafdL }, + { 0xd6d93f6c1c664399L,0x4c288de15bcb135dL,0x83031dab9dab7cbfL, + 0xfe23feb03abbf5f0L } }, + /* 48 << 182 */ + { { 0x9f1b2466cdedca85L,0x140bb7101a09538cL,0xac8ae8515e11115dL, + 0x0d63ff676f03f59eL }, + { 0x755e55517d234afbL,0x61c2db4e7e208fc1L,0xaa9859cef28a4b5dL, + 0xbdd6d4fc34af030fL } }, + /* 49 << 182 */ + { { 0xd1c4a26d3be01cb1L,0x9ba14ffc243aa07cL,0xf95cd3a9b2503502L, + 0xe379bc067d2a93abL }, + { 0x3efc18e9d4ca8d68L,0x083558ec80bb412aL,0xd903b9409645a968L, + 0xa499f0b69ba6054fL } }, + /* 50 << 182 */ + { { 0x208b573cb8349abeL,0x3baab3e530b4fc1cL,0x87e978bacb524990L, + 0x3524194eccdf0e80L }, + { 0x627117257d4bcc42L,0xe90a3d9bb90109baL,0x3b1bdd571323e1e0L, + 0xb78e9bd55eae1599L } }, + /* 51 << 182 */ + { { 0x0794b7469e03d278L,0x80178605d70e6297L,0x171792f899c97855L, + 0x11b393eef5a86b5cL }, + { 0x48ef6582d8884f27L,0xbd44737abf19ba5fL,0x8698de4ca42062c6L, + 0x8975eb8061ce9c54L } }, + /* 52 << 182 */ + { { 0xd50e57c7d7fe71f3L,0x15342190bc97ce38L,0x51bda2de4df07b63L, + 0xba12aeae200eb87dL }, + { 0xabe135d2a9b4f8f6L,0x04619d65fad6d99cL,0x4a6683a77994937cL, + 0x7a778c8b6f94f09aL } }, + /* 53 << 182 */ + { { 0x8c50862320a71b89L,0x241a2aed1c229165L,0x352be595aaf83a99L, + 0x9fbfee7f1562bac8L }, + { 0xeaf658b95c4017e3L,0x1dc7f9e015120b86L,0xd84f13dd4c034d6fL, + 0x283dd737eaea3038L } }, + /* 54 << 182 */ + { { 0x197f2609cd85d6a2L,0x6ebbc345fae60177L,0xb80f031b4e12fedeL, + 0xde55d0c207a2186bL }, + { 0x1fb3e37f24dcdd5aL,0x8d602da57ed191fbL,0x108fb05676023e0dL, + 0x70178c71459c20c0L } }, + /* 55 << 182 */ + { { 0xfad5a3863fe54cf0L,0xa4a3ec4f02bbb475L,0x1aa5ec20919d94d7L, + 0x5d3b63b5a81e4ab3L }, + { 0x7fa733d85ad3d2afL,0xfbc586ddd1ac7a37L,0x282925de40779614L, + 0xfe0ffffbe74a242aL } }, + /* 56 << 182 */ + { { 0x3f39e67f906151e5L,0xcea27f5f55e10649L,0xdca1d4e1c17cf7b7L, + 0x0c326d122fe2362dL }, + { 0x05f7ac337dd35df3L,0x0c3b7639c396dbdfL,0x0912f5ac03b7db1cL, + 0x9dea4b705c9ed4a9L } }, + /* 57 << 182 */ + { { 0x475e6e53aae3f639L,0xfaba0e7cfc278bacL,0x16f9e2219490375fL, + 0xaebf9746a5a7ed0aL }, + { 0x45f9af3ff41ad5d6L,0x03c4623cb2e99224L,0x82c5bb5cb3cf56aaL, + 0x6431181934567ed3L } }, + /* 58 << 182 */ + { { 0xec57f2118be489acL,0x2821895db9a1104bL,0x610dc8756064e007L, + 0x8e526f3f5b20d0feL }, + { 0x6e71ca775b645aeeL,0x3d1dcb9f800e10ffL,0x36b51162189cf6deL, + 0x2c5a3e306bb17353L } }, + /* 59 << 182 */ + { { 0xc186cd3e2a6c6fbfL,0xa74516fa4bf97906L,0x5b4b8f4b279d6901L, + 0x0c4e57b42b573743L }, + { 0x75fdb229b6e386b6L,0xb46793fd99deac27L,0xeeec47eacf712629L, + 0xe965f3c4cbc3b2ddL } }, + /* 60 << 182 */ + { { 0x8dd1fb83425c6559L,0x7fc00ee60af06fdaL,0xe98c922533d956dfL, + 0x0f1ef3354fbdc8a2L }, + { 0x2abb5145b79b8ea2L,0x40fd2945bdbff288L,0x6a814ac4d7185db7L, + 0xc4329d6fc084609aL } }, + /* 61 << 182 */ + { { 0xc9ba7b52ed1be45dL,0x891dd20de4cd2c74L,0x5a4d4a7f824139b1L, + 0x66c17716b873c710L }, + { 0x5e5bc1412843c4e0L,0xd5ac4817b97eb5bfL,0xc0f8af54450c95c7L, + 0xc91b3fa0318406c5L } }, + /* 62 << 182 */ + { { 0x360c340aab9d97f8L,0xfb57bd0790a2d611L,0x4339ae3ca6a6f7e5L, + 0x9c1fcd2a2feb8a10L }, + { 0x972bcca9c7ea7432L,0x1b0b924c308076f6L,0x80b2814a2a5b4ca5L, + 0x2f78f55b61ef3b29L } }, + /* 63 << 182 */ + { { 0xf838744ac18a414fL,0xc611eaae903d0a86L,0x94dabc162a453f55L, + 0xe6f2e3da14efb279L }, + { 0x5b7a60179320dc3cL,0x692e382f8df6b5a4L,0x3f5e15e02d40fa90L, + 0xc87883ae643dd318L } }, + /* 64 << 182 */ + { { 0x511053e453544774L,0x834d0ecc3adba2bcL,0x4215d7f7bae371f5L, + 0xfcfd57bf6c8663bcL }, + { 0xded2383dd6901b1dL,0x3b49fbb4b5587dc3L,0xfd44a08d07625f62L, + 0x3ee4d65b9de9b762L } }, + /* 0 << 189 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 189 */ + { { 0x64e5137d0d63d1faL,0x658fc05202a9d89fL,0x4889487450436309L, + 0xe9ae30f8d598da61L }, + { 0x2ed710d1818baf91L,0xe27e9e068b6a0c20L,0x1e28dcfb1c1a6b44L, + 0x883acb64d6ac57dcL } }, + /* 2 << 189 */ + { { 0x8735728dc2c6ff70L,0x79d6122fc5dc2235L,0x23f5d00319e277f9L, + 0x7ee84e25dded8cc7L }, + { 0x91a8afb063cd880aL,0x3f3ea7c63574af60L,0x0cfcdc8402de7f42L, + 0x62d0792fb31aa152L } }, + /* 3 << 189 */ + { { 0x8e1b4e438a5807ceL,0xad283893e4109a7eL,0xc30cc9cbafd59ddaL, + 0xf65f36c63d8d8093L }, + { 0xdf31469ea60d32b2L,0xee93df4b3e8191c8L,0x9c1017c5355bdeb5L, + 0xd26231858616aa28L } }, + /* 4 << 189 */ + { { 0xb02c83f9dec31a21L,0x988c8b236ad9d573L,0x53e983aea57be365L, + 0xe968734d646f834eL }, + { 0x9137ea8f5da6309bL,0x10f3a624c1f1ce16L,0x782a9ea2ca440921L, + 0xdf94739e5b46f1b5L } }, + /* 5 << 189 */ + { { 0x9f9be006cce85c9bL,0x360e70d6a4c7c2d3L,0x2cd5beeaaefa1e60L, + 0x64cf63c08c3d2b6dL }, + { 0xfb107fa3e1cf6f90L,0xb7e937c6d5e044e6L,0x74e8ca78ce34db9fL, + 0x4f8b36c13e210bd0L } }, + /* 6 << 189 */ + { { 0x1df165a434a35ea8L,0x3418e0f74d4412f6L,0x5af1f8af518836c3L, + 0x42ceef4d130e1965L }, + { 0x5560ca0b543a1957L,0xc33761e5886cb123L,0x66624b1ffe98ed30L, + 0xf772f4bf1090997dL } }, + /* 7 << 189 */ + { { 0xf4e540bb4885d410L,0x7287f8109ba5f8d7L,0x22d0d865de98dfb1L, + 0x49ff51a1bcfbb8a3L }, + { 0xb6b6fa536bc3012eL,0x3d31fd72170d541dL,0x8018724f4b0f4966L, + 0x79e7399f87dbde07L } }, + /* 8 << 189 */ + { { 0x56f8410ef4f8b16aL,0x97241afec47b266aL,0x0a406b8e6d9c87c1L, + 0x803f3e02cd42ab1bL }, + { 0x7f0309a804dbec69L,0xa83b85f73bbad05fL,0xc6097273ad8e197fL, + 0xc097440e5067adc1L } }, + /* 9 << 189 */ + { { 0x730eafb63524ff16L,0xd7f9b51e823fc6ceL,0x27bd0d32443e4ac0L, + 0x40c59ad94d66f217L }, + { 0x6c33136f17c387a4L,0x5043b8d5eb86804dL,0x74970312675a73c9L, + 0x838fdb31f16669b6L } }, + /* 10 << 189 */ + { { 0xc507b6dd418e7dddL,0x39888d93472f19d6L,0x7eae26be0c27eb4dL, + 0x17b53ed3fbabb884L }, + { 0xfc27021b2b01ae4fL,0x88462e87cf488682L,0xbee096ec215e2d87L, + 0xeb2fea9ad242e29bL } }, + /* 11 << 189 */ + { { 0x5d985b5fb821fc28L,0x89d2e197dc1e2ad2L,0x55b566b89030ba62L, + 0xe3fd41b54f41b1c6L }, + { 0xb738ac2eb9a96d61L,0x7f8567ca369443f4L,0x8698622df803a440L, + 0x2b5862368fe2f4dcL } }, + /* 12 << 189 */ + { { 0xbbcc00c756b95bceL,0x5ec03906616da680L,0x79162ee672214252L, + 0x43132b6386a892d2L }, + { 0x4bdd3ff22f3263bfL,0xd5b3733c9cd0a142L,0x592eaa8244415ccbL, + 0x663e89248d5474eaL } }, + /* 13 << 189 */ + { { 0x8058a25e5236344eL,0x82e8df9dbda76ee6L,0xdcf6efd811cc3d22L, + 0x00089cda3b4ab529L }, + { 0x91d3a071bd38a3dbL,0x4ea97fc0ef72b925L,0x0c9fc15bea3edf75L, + 0x5a6297cda4348ed3L } }, + /* 14 << 189 */ + { { 0x0d38ab35ce7c42d4L,0x9fd493ef82feab10L,0x46056b6d82111b45L, + 0xda11dae173efc5c3L }, + { 0xdc7402785545a7fbL,0xbdb2601c40d507e6L,0x121dfeeb7066fa58L, + 0x214369a839ae8c2aL } }, + /* 15 << 189 */ + { { 0x195709cb06e0956cL,0x4c9d254f010cd34bL,0xf51e13f70471a532L, + 0xe19d67911e73054dL }, + { 0xf702a628db5c7be3L,0xc7141218b24dde05L,0xdc18233cf29b2e2eL, + 0x3a6bd1e885342dbaL } }, + /* 16 << 189 */ + { { 0x3f747fa0b311898cL,0xe2a272e4cd0eac65L,0x4bba5851f914d0bcL, + 0x7a1a9660c4a43ee3L }, + { 0xe5a367cea1c8cde9L,0x9d958ba97271abe3L,0xf3ff7eb63d1615cdL, + 0xa2280dcef5ae20b0L } }, + /* 17 << 189 */ + { { 0x56dba5c1cf640147L,0xea5a2e3d5e83d118L,0x04cd6b6dda24c511L, + 0x1c0f4671e854d214L }, + { 0x91a6b7a969565381L,0xdc966240decf1f5bL,0x1b22d21cfcf5d009L, + 0x2a05f6419021dbd5L } }, + /* 18 << 189 */ + { { 0x8c0ed566d4312483L,0x5179a95d643e216fL,0xcc185fec17044493L, + 0xb306333954991a21L }, + { 0xd801ecdb0081a726L,0x0149b0c64fa89bbbL,0xafe9065a4391b6b9L, + 0xedc92786d633f3a3L } }, + /* 19 << 189 */ + { { 0xe408c24aae6a8e13L,0x85833fde9f3897abL,0x43800e7ed81a0715L, + 0xde08e346b44ffc5fL }, + { 0x7094184ccdeff2e0L,0x49f9387b165eaed1L,0x635d6129777c468aL, + 0x8c0dcfd1538c2dd8L } }, + /* 20 << 189 */ + { { 0xd6d9d9e37a6a308bL,0x623758304c2767d3L,0x874a8bc6f38cbeb6L, + 0xd94d3f1accb6fd9eL }, + { 0x92a9735bba21f248L,0x272ad0e56cd1efb0L,0x7437b69c05b03284L, + 0xe7f047026948c225L } }, + /* 21 << 189 */ + { { 0x8a56c04acba2ececL,0x0c181270e3a73e41L,0x6cb34e9d03e93725L, + 0xf77c8713496521a9L }, + { 0x94569183fa7f9f90L,0xf2e7aa4c8c9707adL,0xced2c9ba26c1c9a3L, + 0x9109fe9640197507L } }, + /* 22 << 189 */ + { { 0x9ae868a9e9adfe1cL,0x3984403d314e39bbL,0xb5875720f2fe378fL, + 0x33f901e0ba44a628L }, + { 0xea1125fe3652438cL,0xae9ec4e69dd1f20bL,0x1e740d9ebebf7fbdL, + 0x6dbd3ddc42dbe79cL } }, + /* 23 << 189 */ + { { 0x62082aecedd36776L,0xf612c478e9859039L,0xa493b201032f7065L, + 0xebd4d8f24ff9b211L }, + { 0x3f23a0aaaac4cb32L,0xea3aadb715ed4005L,0xacf17ea4afa27e63L, + 0x56125c1ac11fd66cL } }, + /* 24 << 189 */ + { { 0x266344a43794f8dcL,0xdcca923a483c5c36L,0x2d6b6bbf3f9d10a0L, + 0xb320c5ca81d9bdf3L }, + { 0x620e28ff47b50a95L,0x933e3b01cef03371L,0xf081bf8599100153L, + 0x183be9a0c3a8c8d6L } }, + /* 25 << 189 */ + { { 0x4e3ddc5ad6bbe24dL,0xc6c7463053843795L,0x78193dd765ec2d4cL, + 0xb8df26cccd3c89b2L }, + { 0x98dbe3995a483f8dL,0x72d8a9577dd3313aL,0x65087294ab0bd375L, + 0xfcd892487c259d16L } }, + /* 26 << 189 */ + { { 0x8a9443d77613aa81L,0x8010080085fe6584L,0x70fc4dbc7fb10288L, + 0xf58280d3e86beee8L }, + { 0x14fdd82f7c978c38L,0xdf1204c10de44d7bL,0xa08a1c844160252fL, + 0x591554cac17646a5L } }, + /* 27 << 189 */ + { { 0x214a37d6a05bd525L,0x48d5f09b07957b3cL,0x0247cdcbd7109bc9L, + 0x40f9e4bb30599ce7L }, + { 0xc325fa03f46ad2ecL,0x00f766cfc3e3f9eeL,0xab556668d43a4577L, + 0x68d30a613ee03b93L } }, + /* 28 << 189 */ + { { 0x7ddc81ea77b46a08L,0xcf5a6477c7480699L,0x43a8cb346633f683L, + 0x1b867e6b92363c60L }, + { 0x439211141f60558eL,0xcdbcdd632f41450eL,0x7fc04601cc630e8bL, + 0xea7c66d597038b43L } }, + /* 29 << 189 */ + { { 0x7259b8a504e99fd8L,0x98a8dd124785549aL,0x0e459a7c840552e1L, + 0xcdfcf4d04bb0909eL }, + { 0x34a86db253758da7L,0xe643bb83eac997e1L,0x96400bd7530c5b7eL, + 0x9f97af87b41c8b52L } }, + /* 30 << 189 */ + { { 0x34fc8820fbeee3f9L,0x93e5349049091afdL,0x764b9be59a31f35cL, + 0x71f3786457e3d924L }, + { 0x02fb34e0943aa75eL,0xa18c9c58ab8ff6e4L,0x080f31b133cf0d19L, + 0x5c9682db083518a7L } }, + /* 31 << 189 */ + { { 0x873d4ca6b709c3deL,0x64a842623575b8f0L,0x6275da1f020154bbL, + 0x97678caad17cf1abL }, + { 0x8779795f951a95c3L,0xdd35b16350fccc08L,0x3270962733d8f031L, + 0x3c5ab10a498dd85cL } }, + /* 32 << 189 */ + { { 0xb6c185c341dca566L,0x7de7fedad8622aa3L,0x99e84d92901b6dfbL, + 0x30a02b0e7c4ad288L }, + { 0xc7c81daa2fd3cf36L,0xd1319547df89e59fL,0xb2be8184cd496733L, + 0xd5f449eb93d3412bL } }, + /* 33 << 189 */ + { { 0x7ea41b1b25fe531dL,0xf97974326a1d5646L,0x86067f722bde501aL, + 0xf91481c00c85e89cL }, + { 0xca8ee465f8b05bc6L,0x1844e1cf02e83cdaL,0xca82114ab4dbe33bL, + 0x0f9f87694eabfde2L } }, + /* 34 << 189 */ + { { 0x4936b1c038b27fe2L,0x63b6359baba402dfL,0x40c0ea2f656bdbabL, + 0x9c992a896580c39cL }, + { 0x600e8f152a60aed1L,0xeb089ca4e0bf49dfL,0x9c233d7d2d42d99aL, + 0x648d3f954c6bc2faL } }, + /* 35 << 189 */ + { { 0xdcc383a8e1add3f3L,0xf42c0c6a4f64a348L,0x2abd176f0030dbdbL, + 0x4de501a37d6c215eL }, + { 0x4a107c1f4b9a64bcL,0xa77f0ad32496cd59L,0xfb78ac627688dffbL, + 0x7025a2ca67937d8eL } }, + /* 36 << 189 */ + { { 0xfde8b2d1d1a8f4e7L,0xf5b3da477354927cL,0xe48606a3d9205735L, + 0xac477cc6e177b917L }, + { 0xfb1f73d2a883239aL,0xe12572f6cc8b8357L,0x9d355e9cfb1f4f86L, + 0x89b795f8d9f3ec6eL } }, + /* 37 << 189 */ + { { 0x27be56f1b54398dcL,0x1890efd73fedeed5L,0x62f77f1f9c6d0140L, + 0x7ef0e314596f0ee4L }, + { 0x50ca6631cc61dab3L,0x4a39801df4866e4fL,0x66c8d032ae363b39L, + 0x22c591e52ead66aaL } }, + /* 38 << 189 */ + { { 0x954ba308de02a53eL,0x2a6c060fd389f357L,0xe6cfcde8fbf40b66L, + 0x8e02fc56c6340ce1L }, + { 0xe495779573adb4baL,0x7b86122ca7b03805L,0x63f835120c8e6fa6L, + 0x83660ea0057d7804L } }, + /* 39 << 189 */ + { { 0xbad7910521ba473cL,0xb6c50beeded5389dL,0xee2caf4daa7c9bc0L, + 0xd97b8de48c4e98a7L }, + { 0xa9f63e70ab3bbddbL,0x3898aabf2597815aL,0x7659af89ac15b3d9L, + 0xedf7725b703ce784L } }, + /* 40 << 189 */ + { { 0x25470fabe085116bL,0x04a4337587285310L,0x4e39187ee2bfd52fL, + 0x36166b447d9ebc74L }, + { 0x92ad433cfd4b322cL,0x726aa817ba79ab51L,0xf96eacd8c1db15ebL, + 0xfaf71e910476be63L } }, + /* 41 << 189 */ + { { 0xdd69a640641fad98L,0xb799591829622559L,0x03c6daa5de4199dcL, + 0x92cadc97ad545eb4L }, + { 0x1028238b256534e4L,0x73e80ce68595409aL,0x690d4c66d05dc59bL, + 0xc95f7b8f981dee80L } }, + /* 42 << 189 */ + { { 0xf4337014d856ac25L,0x441bd9ddac524dcaL,0x640b3d855f0499f5L, + 0x39cf84a9d5fda182L }, + { 0x04e7b055b2aa95a0L,0x29e33f0a0ddf1860L,0x082e74b5423f6b43L, + 0x217edeb90aaa2b0fL } }, + /* 43 << 189 */ + { { 0x58b83f3583cbea55L,0xc485ee4dbc185d70L,0x833ff03b1e5f6992L, + 0xb5b9b9cccf0c0dd5L }, + { 0x7caaee8e4e9e8a50L,0x462e907b6269dafdL,0x6ed5cee9fbe791c6L, + 0x68ca3259ed430790L } }, + /* 44 << 189 */ + { { 0x2b72bdf213b5ba88L,0x60294c8a35ef0ac4L,0x9c3230ed19b99b08L, + 0x560fff176c2589aaL }, + { 0x552b8487d6770374L,0xa373202d9a56f685L,0xd3e7f90745f175d9L, + 0x3c2f315fd080d810L } }, + /* 45 << 189 */ + { { 0x1130e9dd7b9520e8L,0xc078f9e20af037b5L,0x38cd2ec71e9c104cL, + 0x0f684368c472fe92L }, + { 0xd3f1b5ed6247e7efL,0xb32d33a9396dfe21L,0x46f59cf44a9aa2c2L, + 0x69cd5168ff0f7e41L } }, + /* 46 << 189 */ + { { 0x3f59da0f4b3234daL,0xcf0b0235b4579ebeL,0x6d1cbb256d2476c7L, + 0x4f0837e69dc30f08L }, + { 0x9a4075bb906f6e98L,0x253bb434c761e7d1L,0xde2e645f6e73af10L, + 0xb89a40600c5f131cL } }, + /* 47 << 189 */ + { { 0xd12840c5b8cc037fL,0x3d093a5b7405bb47L,0x6202c253206348b8L, + 0xbf5d57fcc55a3ca7L }, + { 0x89f6c90c8c3bef48L,0x23ac76235a0a960aL,0xdfbd3d6b552b42abL, + 0x3ef22458132061f6L } }, + /* 48 << 189 */ + { { 0xd74e9bdac97e6516L,0x88779360c230f49eL,0xa6ec1de31e74ea49L, + 0x581dcee53fb645a2L }, + { 0xbaef23918f483f14L,0x6d2dddfcd137d13bL,0x54cde50ed2743a42L, + 0x89a34fc5e4d97e67L } }, + /* 49 << 189 */ + { { 0x13f1f5b312e08ce5L,0xa80540b8a7f0b2caL,0x854bcf7701982805L, + 0xb8653ffd233bea04L }, + { 0x8e7b878702b0b4c9L,0x2675261f9acb170aL,0x061a9d90930c14e5L, + 0xb59b30e0def0abeaL } }, + /* 50 << 189 */ + { { 0x1dc19ea60200ec7dL,0xb6f4a3f90bce132bL,0xb8d5de90f13e27e0L, + 0xbaee5ef01fade16fL }, + { 0x6f406aaae4c6cf38L,0xab4cfe06d1369815L,0x0dcffe87efd550c6L, + 0x9d4f59c775ff7d39L } }, + /* 51 << 189 */ + { { 0xb02553b151deb6adL,0x812399a4b1877749L,0xce90f71fca6006e1L, + 0xc32363a6b02b6e77L }, + { 0x02284fbedc36c64dL,0x86c81e31a7e1ae61L,0x2576c7e5b909d94aL, + 0x8b6f7d02818b2bb0L } }, + /* 52 << 189 */ + { { 0xeca3ed0756faa38aL,0xa3790e6c9305bb54L,0xd784eeda7bc73061L, + 0xbd56d3696dd50614L }, + { 0xd6575949229a8aa9L,0xdcca8f474595ec28L,0x814305c106ab4fe6L, + 0xc8c3976824f43f16L } }, + /* 53 << 189 */ + { { 0xe2a45f36523f2b36L,0x995c6493920d93bbL,0xf8afdab790f1632bL, + 0x79ebbecd1c295954L }, + { 0xc7bb3ddb79592f48L,0x67216a7b5f88e998L,0xd91f098bbc01193eL, + 0xf7d928a5b1db83fcL } }, + /* 54 << 189 */ + { { 0x55e38417e991f600L,0x2a91113e2981a934L,0xcbc9d64806b13bdeL, + 0xb011b6ac0755ff44L }, + { 0x6f4cb518045ec613L,0x522d2d31c2f5930aL,0x5acae1af382e65deL, + 0x5764306727bc966fL } }, + /* 55 << 189 */ + { { 0x5e12705d1c7193f0L,0xf0f32f473be8858eL,0x785c3d7d96c6dfc7L, + 0xd75b4a20bf31795dL }, + { 0x91acf17b342659d4L,0xe596ea3444f0378fL,0x4515708fce52129dL, + 0x17387e1e79f2f585L } }, + /* 56 << 189 */ + { { 0x72cfd2e949dee168L,0x1ae052233e2af239L,0x009e75be1d94066aL, + 0x6cca31c738abf413L }, + { 0xb50bd61d9bc49908L,0x4a9b4a8cf5e2bc1eL,0xeb6cc5f7946f83acL, + 0x27da93fcebffab28L } }, + /* 57 << 189 */ + { { 0xea314c964821c8c5L,0x8de49deda83c15f4L,0x7a64cf207af33004L, + 0x45f1bfebc9627e10L }, + { 0x878b062654b9df60L,0x5e4fdc3ca95c0b33L,0xe54a37cac2035d8eL, + 0x9087cda980f20b8cL } }, + /* 58 << 189 */ + { { 0x36f61c238319ade4L,0x766f287ade8cfdf8L,0x48821948346f3705L, + 0x49a7b85316e4f4a2L }, + { 0xb9b3f8a75cedadfdL,0x8f5628158db2a815L,0xc0b7d55401f68f95L, + 0x12971e27688a208eL } }, + /* 59 << 189 */ + { { 0xc9f8b696d0ff34fcL,0x20824de21222718cL,0x7213cf9f0c95284dL, + 0xe2ad741bdc158240L }, + { 0x0ee3a6df54043ccfL,0x16ff479bd84412b3L,0xf6c74ee0dfc98af0L, + 0xa78a169f52fcd2fbL } }, + /* 60 << 189 */ + { { 0xd8ae874699c930e9L,0x1d33e85849e117a5L,0x7581fcb46624759fL, + 0xde50644f5bedc01dL }, + { 0xbeec5d00caf3155eL,0x672d66acbc73e75fL,0x86b9d8c6270b01dbL, + 0xd249ef8350f55b79L } }, + /* 61 << 189 */ + { { 0x6131d6d473978fe3L,0xcc4e4542754b00a1L,0x4e05df0557dfcfe9L, + 0x94b29cdd51ef6bf0L }, + { 0xe4530cff9bc7edf2L,0x8ac236fdd3da65f3L,0x0faf7d5fc8eb0b48L, + 0x4d2de14c660eb039L } }, + /* 62 << 189 */ + { { 0xc006bba760430e54L,0x10a2d0d6da3289abL,0x9c037a5dd7979c59L, + 0x04d1f3d3a116d944L }, + { 0x9ff224738a0983cdL,0x28e25b38c883cabbL,0xe968dba547a58995L, + 0x2c80b505774eebdfL } }, + /* 63 << 189 */ + { { 0xee763b714a953bebL,0x502e223f1642e7f6L,0x6fe4b64161d5e722L, + 0x9d37c5b0dbef5316L }, + { 0x0115ed70f8330bc7L,0x139850e675a72789L,0x27d7faecffceccc2L, + 0x3016a8604fd9f7f6L } }, + /* 64 << 189 */ + { { 0xc492ec644cd8f64cL,0x58a2d790279d7b51L,0x0ced1fc51fc75256L, + 0x3e658aed8f433017L }, + { 0x0b61942e05da59ebL,0xba3d60a30ddc3722L,0x7c311cd1742e7f87L, + 0x6473ffeef6b01b6eL } }, + /* 0 << 196 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 196 */ + { { 0x8303604f692ac542L,0xf079ffe1227b91d3L,0x19f63e6315aaf9bdL, + 0xf99ee565f1f344fbL }, + { 0x8a1d661fd6219199L,0x8c883bc6d48ce41cL,0x1065118f3c74d904L, + 0x713889ee0faf8b1bL } }, + /* 2 << 196 */ + { { 0x972b3f8f81a1b3beL,0x4f3ce145ce2764a0L,0xe2d0f1cc28c4f5f7L, + 0xdeee0c0dc7f3985bL }, + { 0x7df4adc0d39e25c3L,0x40619820c467a080L,0x440ebc9361cf5a58L, + 0x527729a6422ad600L } }, + /* 3 << 196 */ + { { 0xca6c0937b1b76ba6L,0x1a2eab854d2026dcL,0xb1715e1519d9ae0aL, + 0xf1ad9199bac4a026L }, + { 0x35b3dfb807ea7b0eL,0xedf5496f3ed9eb89L,0x8932e5ff2d6d08abL, + 0xf314874e25bd2731L } }, + /* 4 << 196 */ + { { 0xefb26a753f73f449L,0x1d1c94f88d44fc79L,0x49f0fbc53bc0dc4dL, + 0xb747ea0b3698a0d0L }, + { 0x5218c3fe228d291eL,0x35b804b543c129d6L,0xfac859b8d1acc516L, + 0x6c10697d95d6e668L } }, + /* 5 << 196 */ + { { 0xc38e438f0876fd4eL,0x45f0c30783d2f383L,0x203cc2ecb10934cbL, + 0x6a8f24392c9d46eeL }, + { 0xf16b431b65ccde7bL,0x41e2cd1827e76a6fL,0xb9c8cf8f4e3484d7L, + 0x64426efd8315244aL } }, + /* 6 << 196 */ + { { 0x1c0a8e44fc94dea3L,0x34c8cdbfdad6a0b0L,0x919c384004113cefL, + 0xfd32fba415490ffaL }, + { 0x58d190f6795dcfb7L,0xfef01b0383588bafL,0x9e6d1d63ca1fc1c0L, + 0x53173f96f0a41ac9L } }, + /* 7 << 196 */ + { { 0x2b1d402aba16f73bL,0x2fb310148cf9b9fcL,0x2d51e60e446ef7bfL, + 0xc731021bb91e1745L }, + { 0x9d3b47244fee99d4L,0x4bca48b6fac5c1eaL,0x70f5f514bbea9af7L, + 0x751f55a5974c283aL } }, + /* 8 << 196 */ + { { 0x6e30251acb452fdbL,0x31ee696550f30650L,0xb0b3e508933548d9L, + 0xb8949a4ff4b0ef5bL }, + { 0x208b83263c88f3bdL,0xab147c30db1d9989L,0xed6515fd44d4df03L, + 0x17a12f75e72eb0c5L } }, + /* 9 << 196 */ + { { 0x3b59796d36cf69dbL,0x1219eee956670c18L,0xfe3341f77a070d8eL, + 0x9b70130ba327f90cL }, + { 0x36a324620ae18e0eL,0x2021a62346c0a638L,0x251b5817c62eb0d4L, + 0x87bfbcdf4c762293L } }, + /* 10 << 196 */ + { { 0xf78ab505cdd61d64L,0x8c7a53fcc8c18857L,0xa653ce6f16147515L, + 0x9c923aa5ea7d52d5L }, + { 0xc24709cb5c18871fL,0x7d53bec873b3cc74L,0x59264afffdd1d4c4L, + 0x5555917e240da582L } }, + /* 11 << 196 */ + { { 0xcae8bbda548f5a0eL,0x1910eaba3bbfbbe1L,0xae5796857677afc3L, + 0x49ea61f173ff0b5cL }, + { 0x786554784f7c3922L,0x95d337cd20c68eefL,0x68f1e1e5df779ab9L, + 0x14b491b0b5cf69a8L } }, + /* 12 << 196 */ + { { 0x7a6cbbe028e3fe89L,0xe7e1fee4c5aac0ebL,0x7f47eda5697e5140L, + 0x4f450137b454921fL }, + { 0xdb625f8495cd8185L,0x74be0ba1cdb2e583L,0xaee4fd7cdd5e6de4L, + 0x4251437de8101739L } }, + /* 13 << 196 */ + { { 0x686d72a0ac620366L,0x4be3fb9cb6d59344L,0x6e8b44e7a1eb75b9L, + 0x84e39da391a5c10cL }, + { 0x37cc1490b38f0409L,0x029519432c2ade82L,0x9b6887831190a2d8L, + 0x25627d14231182baL } }, + /* 14 << 196 */ + { { 0x6eb550aa658a6d87L,0x1405aaa7cf9c7325L,0xd147142e5c8748c9L, + 0x7f637e4f53ede0e0L }, + { 0xf8ca277614ffad2cL,0xe58fb1bdbafb6791L,0x17158c23bf8f93fcL, + 0x7f15b3730a4a4655L } }, + /* 15 << 196 */ + { { 0x39d4add2d842ca72L,0xa71e43913ed96305L,0x5bb09cbe6700be14L, + 0x68d69d54d8befcf6L }, + { 0xa45f536737183bcfL,0x7152b7bb3370dff7L,0xcf887baabf12525bL, + 0xe7ac7bddd6d1e3cdL } }, + /* 16 << 196 */ + { { 0x25914f7881fdad90L,0xcf638f560d2cf6abL,0xb90bc03fcc054de5L, + 0x932811a718b06350L }, + { 0x2f00b3309bbd11ffL,0x76108a6fb4044974L,0x801bb9e0a851d266L, + 0x0dd099bebf8990c1L } }, + /* 17 << 196 */ + { { 0x58c5aaaaabe32986L,0x0fe9dd2a50d59c27L,0x84951ff48d307305L, + 0x6c23f82986529b78L }, + { 0x50bb22180b136a79L,0x7e2174de77a20996L,0x6f00a4b9c0bb4da6L, + 0x89a25a17efdde8daL } }, + /* 18 << 196 */ + { { 0xf728a27ec11ee01dL,0xf900553ae5f10dfbL,0x189a83c802ec893cL, + 0x3ca5bdc123f66d77L }, + { 0x9878153797eada9fL,0x59c50ab310256230L,0x346042d9323c69b3L, + 0x1b715a6d2c460449L } }, + /* 19 << 196 */ + { { 0xa41dd4766ae06e0bL,0xcdd7888e9d42e25fL,0x0f395f7456b25a20L, + 0xeadfe0ae8700e27eL }, + { 0xb09d52a969950093L,0x3525d9cb327f8d40L,0xb8235a9467df886aL, + 0x77e4b0dd035faec2L } }, + /* 20 << 196 */ + { { 0x115eb20a517d7061L,0x77fe34336c2df683L,0x6870ddc7cdc6fc67L, + 0xb16105880b87de83L }, + { 0x343584cad9c4ddbeL,0xb3164f1c3d754be2L,0x0731ed3ac1e6c894L, + 0x26327dec4f6b904cL } }, + /* 21 << 196 */ + { { 0x9d49c6de97b5cd32L,0x40835daeb5eceecdL,0xc66350edd9ded7feL, + 0x8aeebb5c7a678804L }, + { 0x51d42fb75b8ee9ecL,0xd7a17bdd8e3ca118L,0x40d7511a2ef4400eL, + 0xc48990ac875a66f4L } }, + /* 22 << 196 */ + { { 0x8de07d2a2199e347L,0xbee755562a39e051L,0x56918786916e51dcL, + 0xeb1913134a2d89ecL }, + { 0x6679610d37d341edL,0x434fbb4156d51c2bL,0xe54b7ee7d7492dbaL, + 0xaa33a79a59021493L } }, + /* 23 << 196 */ + { { 0x49fc5054e4bd6d3dL,0x09540f045ab551d0L,0x8acc90854942d3a6L, + 0x231af02f2d28323bL }, + { 0x93458cac0992c163L,0x1fef8e71888e3bb4L,0x27578da5be8c268cL, + 0xcc8be792e805ec00L } }, + /* 24 << 196 */ + { { 0x29267baec61c3855L,0xebff429d58c1fd3bL,0x22d886c08c0b93b8L, + 0xca5e00b22ddb8953L }, + { 0xcf330117c3fed8b7L,0xd49ac6fa819c01f6L,0x6ddaa6bd3c0fbd54L, + 0x917430688049a2cfL } }, + /* 25 << 196 */ + { { 0xd67f981eaff2ef81L,0xc3654d352818ae80L,0x81d050441b2aa892L, + 0x2db067bf3d099328L }, + { 0xe7c79e86703dcc97L,0xe66f9b37e133e215L,0xcdf119a6e39a7a5cL, + 0x47c60de3876f1b61L } }, + /* 26 << 196 */ + { { 0x6e405939d860f1b2L,0x3e9a1dbcf5ed4d4aL,0x3f23619ec9b6bcbdL, + 0x5ee790cf734e4497L }, + { 0xf0a834b15bdaf9bbL,0x02cedda74ca295f0L,0x4619aa2bcb8e378cL, + 0xe5613244cc987ea4L } }, + /* 27 << 196 */ + { { 0x0bc022cc76b23a50L,0x4a2793ad0a6c21ceL,0x3832878089cac3f5L, + 0x29176f1bcba26d56L }, + { 0x062961874f6f59ebL,0x86e9bca98bdc658eL,0x2ca9c4d357e30402L, + 0x5438b216516a09bbL } }, + /* 28 << 196 */ + { { 0x0a6a063c7672765aL,0x37a3ce640547b9bfL,0x42c099c898b1a633L, + 0xb5ab800d05ee6961L }, + { 0xf1963f5911a5acd6L,0xbaee615746201063L,0x36d9a649a596210aL, + 0xaed043631ba7138cL } }, + /* 29 << 196 */ + { { 0xcf817d1ca4a82b76L,0x5586960ef3806be9L,0x7ab67c8909dc6bb5L, + 0x52ace7a0114fe7ebL }, + { 0xcd987618cbbc9b70L,0x4f06fd5a604ca5e1L,0x90af14ca6dbde133L, + 0x1afe4322948a3264L } }, + /* 30 << 196 */ + { { 0xa70d2ca6c44b2c6cL,0xab7267990ef87dfeL,0x310f64dc2e696377L, + 0x49b42e684c8126a0L }, + { 0x0ea444c3cea0b176L,0x53a8ddf7cb269182L,0xf3e674ebbbba9dcbL, + 0x0d2878a8d8669d33L } }, + /* 31 << 196 */ + { { 0x04b935d5d019b6a3L,0xbb5cf88e406f1e46L,0xa1912d165b57c111L, + 0x9803fc2119ebfd78L }, + { 0x4f231c9ec07764a9L,0xd93286eeb75bd055L,0x83a9457d8ee6c9deL, + 0x046959156087ec90L } }, + /* 32 << 196 */ + { { 0x14c6dd8a58d6cd46L,0x9cb633b58e6634d2L,0xc1305047f81bc328L, + 0x12ede0e226a177e5L }, + { 0x332cca62065a6f4fL,0xc3a47ecd67be487bL,0x741eb1870f47ed1cL, + 0x99e66e58e7598b14L } }, + /* 33 << 196 */ + { { 0x6f0544ca63d0ff12L,0xe5efc784b610a05fL,0xf72917b17cad7b47L, + 0x3ff6ea20f2cac0c0L }, + { 0xcc23791bf21db8b7L,0x7dac70b1d7d93565L,0x682cda1d694bdaadL, + 0xeb88bb8c1023516dL } }, + /* 34 << 196 */ + { { 0xc4c634b4dfdbeb1bL,0x22f5ca72b4ee4deaL,0x1045a368e6524821L, + 0xed9e8a3f052b18b2L }, + { 0x9b7f2cb1b961f49aL,0x7fee2ec17b009670L,0x350d875422507a6dL, + 0x561bd7114db55f1dL } }, + /* 35 << 196 */ + { { 0x4c189ccc320bbcafL,0x568434cfdf1de48cL,0x6af1b00e0fa8f128L, + 0xf0ba9d028907583cL }, + { 0x735a400432ff9f60L,0x3dd8e4b6c25dcf33L,0xf2230f1642c74cefL, + 0xd8117623013fa8adL } }, + /* 36 << 196 */ + { { 0x36822876f51fe76eL,0x8a6811cc11d62589L,0xc3fc7e6546225718L, + 0xb7df2c9fc82fdbcdL }, + { 0x3b1d4e52dd7b205bL,0xb695947847a2e414L,0x05e4d793efa91148L, + 0xb47ed446fd2e9675L } }, + /* 37 << 196 */ + { { 0x1a7098b904c9d9bfL,0x661e28811b793048L,0xb1a16966b01ee461L, + 0xbc5213082954746fL }, + { 0xc909a0fc2477de50L,0xd80bb41c7dbd51efL,0xa85be7ec53294905L, + 0x6d465b1883958f97L } }, + /* 38 << 196 */ + { { 0x16f6f330fb6840fdL,0xfaaeb2143401e6c8L,0xaf83d30fccb5b4f8L, + 0x22885739266dec4bL }, + { 0x51b4367c7bc467dfL,0x926562e3d842d27aL,0xdfcb66140fea14a6L, + 0xeb394daef2734cd9L } }, + /* 39 << 196 */ + { { 0x3eeae5d211c0be98L,0xb1e6ed11814e8165L,0x191086bce52bce1cL, + 0x14b74cc6a75a04daL }, + { 0x63cf11868c060985L,0x071047de2dbd7f7cL,0x4e433b8bce0942caL, + 0xecbac447d8fec61dL } }, + /* 40 << 196 */ + { { 0x8f0ed0e2ebf3232fL,0xfff80f9ec52a2eddL,0xad9ab43375b55fdbL, + 0x73ca7820e42e0c11L }, + { 0x6dace0a0e6251b46L,0x89bc6b5c4c0d932dL,0x3438cd77095da19aL, + 0x2f24a9398d48bdfbL } }, + /* 41 << 196 */ + { { 0x99b47e46766561b7L,0x736600e60ed0322aL,0x06a47cb1638e1865L, + 0x927c1c2dcb136000L }, + { 0x295423370cc5df69L,0x99b37c0209d649a9L,0xc5f0043c6aefdb27L, + 0x6cdd99871be95c27L } }, + /* 42 << 196 */ + { { 0x69850931390420d2L,0x299c40ac0983efa4L,0x3a05e778af39aeadL, + 0x8427440843a45193L }, + { 0x6bcd0fb991a711a0L,0x461592c89f52ab17L,0xb49302b4da3c6ed6L, + 0xc51fddc7330d7067L } }, + /* 43 << 196 */ + { { 0x94babeb6da50d531L,0x521b840da6a7b9daL,0x5305151e404bdc89L, + 0x1bcde201d0d07449L }, + { 0xf427a78b3b76a59aL,0xf84841ce07791a1bL,0xebd314bebf91ed1cL, + 0x8e61d34cbf172943L } }, + /* 44 << 196 */ + { { 0x1d5dc4515541b892L,0xb186ee41fc9d9e54L,0x9d9f345ed5bf610dL, + 0x3e7ba65df6acca9fL }, + { 0x9dda787aa8369486L,0x09f9dab78eb5ba53L,0x5afb2033d6481bc3L, + 0x76f4ce30afa62104L } }, + /* 45 << 196 */ + { { 0xa8fa00cff4f066b5L,0x89ab5143461dafc2L,0x44339ed7a3389998L, + 0x2ff862f1bc214903L }, + { 0x2c88f985b05556e3L,0xcd96058e3467081eL,0x7d6a4176edc637eaL, + 0xe1743d0936a5acdcL } }, + /* 46 << 196 */ + { { 0x66fd72e27eb37726L,0xf7fa264e1481a037L,0x9fbd3bde45f4aa79L, + 0xed1e0147767c3e22L }, + { 0x7621f97982e7abe2L,0x19eedc7245f633f8L,0xe69b155e6137bf3aL, + 0xa0ad13ce414ee94eL } }, + /* 47 << 196 */ + { { 0x93e3d5241c0e651aL,0xab1a6e2a02ce227eL,0xe7af17974ab27ecaL, + 0x245446debd444f39L }, + { 0x59e22a2156c07613L,0x43deafcef4275498L,0x10834ccb67fd0946L, + 0xa75841e547406edfL } }, + /* 48 << 196 */ + { { 0xebd6a6777b0ac93dL,0xa6e37b0d78f5e0d7L,0x2516c09676f5492bL, + 0x1e4bf8889ac05f3aL }, + { 0xcdb42ce04df0ba2bL,0x935d5cfd5062341bL,0x8a30333382acac20L, + 0x429438c45198b00eL } }, + /* 49 << 196 */ + { { 0x1d083bc9049d33faL,0x58b82dda946f67ffL,0xac3e2db867a1d6a3L, + 0x62e6bead1798aac8L }, + { 0xfc85980fde46c58cL,0xa7f6937969c8d7beL,0x23557927837b35ecL, + 0x06a933d8e0790c0cL } }, + /* 50 << 196 */ + { { 0x827c0e9b077ff55dL,0x53977798bb26e680L,0x595308741d9cb54fL, + 0xcca3f4494aac53efL }, + { 0x11dc5c87a07eda0fL,0xc138bccffd6400c8L,0x549680d313e5da72L, + 0xc93eed824540617eL } }, + /* 51 << 196 */ + { { 0xfd3db1574d0b75c0L,0x9716eb426386075bL,0x0639605c817b2c16L, + 0x09915109f1e4f201L }, + { 0x35c9a9285cca6c3bL,0xb25f7d1a3505c900L,0xeb9f7d20630480c4L, + 0xc3c7b8c62a1a501cL } }, + /* 52 << 196 */ + { { 0x3f99183c5a1f8e24L,0xfdb118fa9dd255f0L,0xb9b18b90c27f62a6L, + 0xe8f732f7396ec191L }, + { 0x524a2d910be786abL,0x5d32adef0ac5a0f5L,0x9b53d4d69725f694L, + 0x032a76c60510ba89L } }, + /* 53 << 196 */ + { { 0x840391a3ebeb1544L,0x44b7b88c3ed73ac3L,0xd24bae7a256cb8b3L, + 0x7ceb151ae394cb12L }, + { 0xbd6b66d05bc1e6a8L,0xec70cecb090f07bfL,0x270644ed7d937589L, + 0xee9e1a3d5f1dccfeL } }, + /* 54 << 196 */ + { { 0xb0d40a84745b98d2L,0xda429a212556ed40L,0xf676eced85148cb9L, + 0x5a22d40cded18936L }, + { 0x3bc4b9e570e8a4ceL,0xbfd1445b9eae0379L,0xf23f2c0c1a0bd47eL, + 0xa9c0bb31e1845531L } }, + /* 55 << 196 */ + { { 0x9ddc4d600a4c3f6bL,0xbdfaad792c15ef44L,0xce55a2367f484accL, + 0x08653ca7055b1f15L }, + { 0x2efa8724538873a3L,0x09299e5dace1c7e7L,0x07afab66ade332baL, + 0x9be1fdf692dd71b7L } }, + /* 56 << 196 */ + { { 0xa49b5d595758b11cL,0x0b852893c8654f40L,0xb63ef6f452379447L, + 0xd4957d29105e690cL }, + { 0x7d484363646559b0L,0xf4a8273c49788a8eL,0xee406cb834ce54a9L, + 0x1e1c260ff86fda9bL } }, + /* 57 << 196 */ + { { 0xe150e228cf6a4a81L,0x1fa3b6a31b488772L,0x1e6ff110c5a9c15bL, + 0xc6133b918ad6aa47L }, + { 0x8ac5d55c9dffa978L,0xba1d1c1d5f3965f2L,0xf969f4e07732b52fL, + 0xfceecdb5a5172a07L } }, + /* 58 << 196 */ + { { 0xb0120a5f10f2b8f5L,0xc83a6cdf5c4c2f63L,0x4d47a491f8f9c213L, + 0xd9e1cce5d3f1bbd5L }, + { 0x0d91bc7caba7e372L,0xfcdc74c8dfd1a2dbL,0x05efa800374618e5L, + 0x1121696915a7925eL } }, + /* 59 << 196 */ + { { 0xd4c89823f6021c5dL,0x880d5e84eff14423L,0x6523bc5a6dcd1396L, + 0xd1acfdfc113c978bL }, + { 0xb0c164e8bbb66840L,0xf7f4301e72b58459L,0xc29ad4a6a638e8ecL, + 0xf5ab896146b78699L } }, + /* 60 << 196 */ + { { 0x9dbd79740e954750L,0x0121de8864f9d2c6L,0x2e597b42d985232eL, + 0x55b6c3c553451777L }, + { 0xbb53e547519cb9fbL,0xf134019f8428600dL,0x5a473176e081791aL, + 0x2f3e226335fb0c08L } }, + /* 61 << 196 */ + { { 0xb28c301773d273b0L,0xccd210767721ef9aL,0x054cc292b650dc39L, + 0x662246de6188045eL }, + { 0x904b52fa6b83c0d1L,0xa72df26797e9cd46L,0x886b43cd899725e4L, + 0x2b651688d849ff22L } }, + /* 62 << 196 */ + { { 0x60479b7902f34533L,0x5e354c140c77c148L,0xb4bb7581a8537c78L, + 0x188043d7efe1495fL }, + { 0x9ba12f428c1d5026L,0x2e0c8a2693d4aaabL,0xbdba7b8baa57c450L, + 0x140c9ad69bbdafefL } }, + /* 63 << 196 */ + { { 0x2067aa4225ac0f18L,0xf7b1295b04d1fbf3L,0x14829111a4b04824L, + 0x2ce3f19233bd5e91L }, + { 0x9c7a1d558f2e1b72L,0xfe932286302aa243L,0x497ca7b4d4be9554L, + 0xb8e821b8e0547a6eL } }, + /* 64 << 196 */ + { { 0xfb2838be67e573e0L,0x05891db94084c44bL,0x9131137396c1c2c5L, + 0x6aebfa3fd958444bL }, + { 0xac9cdce9e56e55c1L,0x7148ced32caa46d0L,0x2e10c7efb61fe8ebL, + 0x9fd835daff97cf4dL } }, + /* 0 << 203 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 203 */ + { { 0xa36da109081e9387L,0xfb9780d78c935828L,0xd5940332e540b015L, + 0xc9d7b51be0f466faL }, + { 0xfaadcd41d6d9f671L,0xba6c1e28b1a2ac17L,0x066a7833ed201e5fL, + 0x19d99719f90f462bL } }, + /* 2 << 203 */ + { { 0xf431f462060b5f61L,0xa56f46b47bd057c2L,0x348dca6c47e1bf65L, + 0x9a38783e41bcf1ffL }, + { 0x7a5d33a9da710718L,0x5a7799872e0aeaf6L,0xca87314d2d29d187L, + 0xfa0edc3ec687d733L } }, + /* 3 << 203 */ + { { 0x9df336216a31e09bL,0xde89e44dc1350e35L,0x292148714ca0cf52L, + 0xdf3796720b88a538L }, + { 0xc92a510a2591d61bL,0x79aa87d7585b447bL,0xf67db604e5287f77L, + 0x1697c8bf5efe7a80L } }, + /* 4 << 203 */ + { { 0x1c894849cb198ac7L,0xa884a93d0f264665L,0x2da964ef9b200678L, + 0x3c351b87009834e6L }, + { 0xafb2ef9fe2c4b44bL,0x580f6c473326790cL,0xb84805210b02264aL, + 0x8ba6f9e242a194e2L } }, + /* 5 << 203 */ + { { 0xfc87975f8fb54738L,0x3516078827c3ead3L,0x834116d2b74a085aL, + 0x53c99a73a62fe996L }, + { 0x87585be05b81c51bL,0x925bafa8be0852b7L,0x76a4fafda84d19a7L, + 0x39a45982585206d4L } }, + /* 6 << 203 */ + { { 0x499b6ab65eb03c0eL,0xf19b795472bc3fdeL,0xa86b5b9c6e3a80d2L, + 0xe43775086d42819fL }, + { 0xc1663650bb3ee8a3L,0x75eb14fcb132075fL,0xa8ccc9067ad834f6L, + 0xea6a2474e6e92ffdL } }, + /* 7 << 203 */ + { { 0x9d72fd950f8d6758L,0xcb84e101408c07ddL,0xb9114bfda5e23221L, + 0x358b5fe2e94e742cL }, + { 0x1c0577ec95f40e75L,0xf01554513d73f3d6L,0x9d55cd67bd1b9b66L, + 0x63e86e78af8d63c7L } }, + /* 8 << 203 */ + { { 0x39d934abd3c095f1L,0x04b261bee4b76d71L,0x1d2e6970e73e6984L, + 0x879fb23b5e5fcb11L }, + { 0x11506c72dfd75490L,0x3a97d08561bcf1c1L,0x43201d82bf5e7007L, + 0x7f0ac52f798232a7L } }, + /* 9 << 203 */ + { { 0x2715cbc46eb564d4L,0x8d6c752c9e570e29L,0xf80247c89ef5fd5dL, + 0xc3c66b46d53eb514L }, + { 0x9666b4010f87de56L,0xce62c06fc6c603b5L,0xae7b4c607e4fc942L, + 0x38ac0b77663a9c19L } }, + /* 10 << 203 */ + { { 0xcb4d20ee4b049136L,0x8b63bf12356a4613L,0x1221aef670e08128L, + 0xe62d8c514acb6b16L }, + { 0x71f64a67379e7896L,0xb25237a2cafd7fa5L,0xf077bd983841ba6aL, + 0xc4ac02443cd16e7eL } }, + /* 11 << 203 */ + { { 0x548ba86921fea4caL,0xd36d0817f3dfdac1L,0x09d8d71ff4685fafL, + 0x8eff66bec52c459aL }, + { 0x182faee70b57235eL,0xee3c39b10106712bL,0x5107331fc0fcdcb0L, + 0x669fb9dca51054baL } }, + /* 12 << 203 */ + { { 0xb25101fb319d7682L,0xb02931290a982feeL,0x51c1c9b90261b344L, + 0x0e008c5bbfd371faL }, + { 0xd866dd1c0278ca33L,0x666f76a6e5aa53b1L,0xe5cfb7796013a2cfL, + 0x1d3a1aada3521836L } }, + /* 13 << 203 */ + { { 0xcedd253173faa485L,0xc8ee6c4fc0a76878L,0xddbccfc92a11667dL, + 0x1a418ea91c2f695aL }, + { 0xdb11bd9251f73971L,0x3e4b3c82da2ed89fL,0x9a44f3f4e73e0319L, + 0xd1e3de0f303431afL } }, + /* 14 << 203 */ + { { 0x3c5604ff50f75f9cL,0x1d8eddf37e752b22L,0x0ef074dd3c9a1118L, + 0xd0ffc172ccb86d7bL }, + { 0xabd1ece3037d90f2L,0xe3f307d66055856cL,0x422f93287e4c6dafL, + 0x902aac66334879a0L } }, + /* 15 << 203 */ + { { 0xb6a1e7bf94cdfadeL,0x6c97e1ed7fc6d634L,0x662ad24da2fb63f8L, + 0xf81be1b9a5928405L }, + { 0x86d765e4d14b4206L,0xbecc2e0e8fa0db65L,0xa28838e0b17fc76cL, + 0xe49a602ae37cf24eL } }, + /* 16 << 203 */ + { { 0x76b4131a567193ecL,0xaf3c305ae5f6e70bL,0x9587bd39031eebddL, + 0x5709def871bbe831L }, + { 0x570599830eb2b669L,0x4d80ce1b875b7029L,0x838a7da80364ac16L, + 0x2f431d23be1c83abL } }, + /* 17 << 203 */ + { { 0xe56812a6f9294dd3L,0xb448d01f9b4b0d77L,0xf3ae606104e8305cL, + 0x2bead64594d8c63eL }, + { 0x0a85434d84fd8b07L,0x537b983ff7a9dee5L,0xedcc5f18ef55bd85L, + 0x2041af6221c6cf8bL } }, + /* 18 << 203 */ + { { 0x8e52874cb940c71eL,0x211935a9db5f4b3aL,0x94350492301b1dc3L, + 0x33d2646d29958620L }, + { 0x16b0d64bef911404L,0x9d1f25ea9a3c5ef4L,0x20f200eb4a352c78L, + 0x43929f2c4bd0b428L } }, + /* 19 << 203 */ + { { 0xa5656667c7196e29L,0x7992c2f09391be48L,0xaaa97cbd9ee0cd6eL, + 0x51b0310c3dc8c9bfL }, + { 0x237f8acfdd9f22cbL,0xbb1d81a1b585d584L,0x8d5d85f58c416388L, + 0x0d6e5a5a42fe474fL } }, + /* 20 << 203 */ + { { 0xe781276638235d4eL,0x1c62bd67496e3298L,0x8378660c3f175bc8L, + 0x4d04e18917afdd4dL }, + { 0x32a8160185a8068cL,0xdb58e4e192b29a85L,0xe8a65b86c70d8a3bL, + 0x5f0e6f4e98a0403bL } }, + /* 21 << 203 */ + { { 0x0812968469ed2370L,0x34dc30bd0871ee26L,0x3a5ce9487c9c5b05L, + 0x7d487b8043a90c87L }, + { 0x4089ba37dd0e7179L,0x45f80191b4041811L,0x1c3e105898747ba5L, + 0x98c4e13a6e1ae592L } }, + /* 22 << 203 */ + { { 0xd44636e6e82c9f9eL,0x711db87cc33a1043L,0x6f431263aa8aec05L, + 0x43ff120d2744a4aaL }, + { 0xd3bd892fae77779bL,0xf0fe0cc98cdc9f82L,0xca5f7fe6f1c5b1bcL, + 0xcc63a68244929a72L } }, + /* 23 << 203 */ + { { 0xc7eaba0c09dbe19aL,0x2f3585ad6b5c73c2L,0x8ab8924b0ae50c30L, + 0x17fcd27a638b30baL }, + { 0xaf414d3410b3d5a5L,0x09c107d22a9accf1L,0x15dac49f946a6242L, + 0xaec3df2ad707d642L } }, + /* 24 << 203 */ + { { 0x2c2492b73f894ae0L,0xf59df3e5b75f18ceL,0x7cb740d28f53cad0L, + 0x3eb585fbc4f01294L }, + { 0x17da0c8632c7f717L,0xeb8c795baf943f4cL,0x4ee23fb5f67c51d2L, + 0xef18757568889949L } }, + /* 25 << 203 */ + { { 0xa6b4bdb20389168bL,0xc4ecd258ea577d03L,0x3a63782b55743082L, + 0x6f678f4cc72f08cdL }, + { 0x553511cf65e58dd8L,0xd53b4e3ed402c0cdL,0x37de3e29a037c14cL, + 0x86b6c516c05712aaL } }, + /* 26 << 203 */ + { { 0x2834da3eb38dff6fL,0xbe012c52ea636be8L,0x292d238c61dd37f8L, + 0x0e54523f8f8142dbL }, + { 0xe31eb436036a05d8L,0x83e3cdff1e93c0ffL,0x3fd2fe0f50821ddfL, + 0xc8e19b0dff9eb33bL } }, + /* 27 << 203 */ + { { 0xc8cc943fb569a5feL,0xad0090d4d4342d75L,0x82090b4bcaeca000L, + 0xca39687f1bd410ebL }, + { 0xe7bb0df765959d77L,0x39d782189c964999L,0xd87f62e8b2415451L, + 0xe5efb774bed76108L } }, + /* 28 << 203 */ + { { 0x3ea011a4e822f0d0L,0xbc647ad15a8704f8L,0xbb315b3550c6820fL, + 0x863dec3db7e76becL }, + { 0x01ff5d3af017bfc7L,0x20054439976b8229L,0x067fca370bbd0d3bL, + 0xf63dde647f5e3d0fL } }, + /* 29 << 203 */ + { { 0x22dbefb32a4c94e9L,0xafbff0fe96f8278aL,0x80aea0b13503793dL, + 0xb22380295f06cd29L }, + { 0x65703e578ec3fecaL,0x06c38314393e7053L,0xa0b751eb7c6734c4L, + 0xd2e8a435c59f0f1eL } }, + /* 30 << 203 */ + { { 0x147d90525e9ca895L,0x2f4dd31e972072dfL,0xa16fda8ee6c6755cL, + 0xc66826ffcf196558L }, + { 0x1f1a76a30cf43895L,0xa9d604e083c3097bL,0xe190830966390e0eL, + 0xa50bf753b3c85effL } }, + /* 31 << 203 */ + { { 0x0696bddef6a70251L,0x548b801b3c6ab16aL,0x37fcf704a4d08762L, + 0x090b3defdff76c4eL }, + { 0x87e8cb8969cb9158L,0x44a90744995ece43L,0xf85395f40ad9fbf5L, + 0x49b0f6c54fb0c82dL } }, + /* 32 << 203 */ + { { 0x75d9bc15adf7cccfL,0x81a3e5d6dfa1e1b0L,0x8c39e444249bc17eL, + 0xf37dccb28ea7fd43L }, + { 0xda654873907fba12L,0x35daa6da4a372904L,0x0564cfc66283a6c5L, + 0xd09fa4f64a9395bfL } }, + /* 33 << 203 */ + { { 0x688e9ec9aeb19a36L,0xd913f1cec7bfbfb4L,0x797b9a3c61c2faa6L, + 0x2f979bec6a0a9c12L }, + { 0xb5969d0f359679ecL,0xebcf523d079b0460L,0xfd6b000810fab870L, + 0x3f2edcda9373a39cL } }, + /* 34 << 203 */ + { { 0x0d64f9a76f568431L,0xf848c27c02f8898cL,0xf418ade1260b5bd5L, + 0xc1f3e3236973dee8L }, + { 0x46e9319c26c185ddL,0x6d85b7d8546f0ac4L,0x427965f2247f9d57L, + 0xb519b636b0035f48L } }, + /* 35 << 203 */ + { { 0x6b6163a9ab87d59cL,0xff9f58c339caaa11L,0x4ac39cde3177387bL, + 0x5f6557c2873e77f9L }, + { 0x6750400636a83041L,0x9b1c96ca75ef196cL,0xf34283deb08c7940L, + 0x7ea096441128c316L } }, + /* 36 << 203 */ + { { 0xb510b3b56aa39dffL,0x59b43da29f8e4d8cL,0xa8ce31fd9e4c4b9fL, + 0x0e20be26c1303c01L }, + { 0x18187182e8ee47c9L,0xd9687cdb7db98101L,0x7a520e4da1e14ff6L, + 0x429808ba8836d572L } }, + /* 37 << 203 */ + { { 0xa37ca60d4944b663L,0xf901f7a9a3f91ae5L,0xe4e3e76e9e36e3b1L, + 0x9aa219cf29d93250L }, + { 0x347fe275056a2512L,0xa4d643d9de65d95cL,0x9669d396699fc3edL, + 0xb598dee2cf8c6bbeL } }, + /* 38 << 203 */ + { { 0x682ac1e5dda9e5c6L,0x4e0d3c72caa9fc95L,0x17faaade772bea44L, + 0x5ef8428cab0009c8L }, + { 0xcc4ce47a460ff016L,0xda6d12bf725281cbL,0x44c678480223aad2L, + 0x6e342afa36256e28L } }, + /* 39 << 203 */ + { { 0x1400bb0b93a37c04L,0x62b1bc9bdd10bd96L,0x7251adeb0dac46b7L, + 0x7d33b92e7be4ef51L }, + { 0x28b2a94be61fa29aL,0x4b2be13f06422233L,0x36d6d062330d8d37L, + 0x5ef80e1eb28ca005L } }, + /* 40 << 203 */ + { { 0x174d46996d16768eL,0x9fc4ff6a628bf217L,0x77705a94154e490dL, + 0x9d96dd288d2d997aL }, + { 0x77e2d9d8ce5d72c4L,0x9d06c5a4c11c714fL,0x02aa513679e4a03eL, + 0x1386b3c2030ff28bL } }, + /* 41 << 203 */ + { { 0xfe82e8a6fb283f61L,0x7df203e5f3abc3fbL,0xeec7c3513a4d3622L, + 0xf7d17dbfdf762761L }, + { 0xc3956e44522055f0L,0xde3012db8fa748dbL,0xca9fcb63bf1dcc14L, + 0xa56d9dcfbe4e2f3aL } }, + /* 42 << 203 */ + { { 0xb86186b68bcec9c2L,0x7cf24df9680b9f06L,0xc46b45eac0d29281L, + 0xfff42bc507b10e12L }, + { 0x12263c404d289427L,0x3d5f1899b4848ec4L,0x11f97010d040800cL, + 0xb4c5f529300feb20L } }, + /* 43 << 203 */ + { { 0xcc543f8fde94fdcbL,0xe96af739c7c2f05eL,0xaa5e0036882692e1L, + 0x09c75b68950d4ae9L }, + { 0x62f63df2b5932a7aL,0x2658252ede0979adL,0x2a19343fb5e69631L, + 0x718c7501525b666bL } }, + /* 44 << 203 */ + { { 0x26a42d69ea40dc3aL,0xdc84ad22aecc018fL,0x25c36c7b3270f04aL, + 0x46ba6d4750fa72edL }, + { 0x6c37d1c593e58a8eL,0xa2394731120c088cL,0xc3be4263cb6e86daL, + 0x2c417d367126d038L } }, + /* 45 << 203 */ + { { 0x5b70f9c58b6f8efaL,0x671a2faa37718536L,0xd3ced3c6b539c92bL, + 0xe56f1bd9a31203c2L }, + { 0x8b096ec49ff3c8ebL,0x2deae43243491ceaL,0x2465c6eb17943794L, + 0x5d267e6620586843L } }, + /* 46 << 203 */ + { { 0x9d3d116db07159d0L,0xae07a67fc1896210L,0x8fc84d87bb961579L, + 0x30009e491c1f8dd6L }, + { 0x8a8caf22e3132819L,0xcffa197cf23ab4ffL,0x58103a44205dd687L, + 0x57b796c30ded67a2L } }, + /* 47 << 203 */ + { { 0x0b9c3a6ca1779ad7L,0xa33cfe2e357c09c5L,0x2ea293153db4a57eL, + 0x919596958ebeb52eL }, + { 0x118db9a6e546c879L,0x8e996df46295c8d6L,0xdd99048455ec806bL, + 0x24f291ca165c1035L } }, + /* 48 << 203 */ + { { 0xcca523bb440e2229L,0x324673a273ef4d04L,0xaf3adf343e11ec39L, + 0x6136d7f1dc5968d3L }, + { 0x7a7b2899b053a927L,0x3eaa2661ae067ecdL,0x8549b9c802779cd9L, + 0x061d7940c53385eaL } }, + /* 49 << 203 */ + { { 0x3e0ba883f06d18bdL,0x4ba6de53b2700843L,0xb966b668591a9e4dL, + 0x93f675677f4fa0edL }, + { 0x5a02711b4347237bL,0xbc041e2fe794608eL,0x55af10f570f73d8cL, + 0xd2d4d4f7bb7564f7L } }, + /* 50 << 203 */ + { { 0xd7d27a89b3e93ce7L,0xf7b5a8755d3a2c1bL,0xb29e68a0255b218aL, + 0xb533837e8af76754L }, + { 0xd1b05a73579fab2eL,0xb41055a1ecd74385L,0xb2369274445e9115L, + 0x2972a7c4f520274eL } }, + /* 51 << 203 */ + { { 0x6c08334ef678e68aL,0x4e4160f099b057edL,0x3cfe11b852ccb69aL, + 0x2fd1823a21c8f772L }, + { 0xdf7f072f3298f055L,0x8c0566f9fec74a6eL,0xe549e0195bb4d041L, + 0x7c3930ba9208d850L } }, + /* 52 << 203 */ + { { 0xe07141fcaaa2902bL,0x539ad799e4f69ad3L,0xa6453f94813f9ffdL, + 0xc58d3c48375bc2f7L }, + { 0xb3326fad5dc64e96L,0x3aafcaa9b240e354L,0x1d1b0903aca1e7a9L, + 0x4ceb97671211b8a0L } }, + /* 53 << 203 */ + { { 0xeca83e49e32a858eL,0x4c32892eae907badL,0xd5b42ab62eb9b494L, + 0x7fde3ee21eabae1bL }, + { 0x13b5ab09caf54957L,0xbfb028bee5f5d5d5L,0x928a06502003e2c0L, + 0x90793aac67476843L } }, + /* 54 << 203 */ + { { 0x5e942e79c81710a0L,0x557e4a3627ccadd4L,0x72a2bc564bcf6d0cL, + 0x09ee5f4326d7b80cL }, + { 0x6b70dbe9d4292f19L,0x56f74c2663f16b18L,0xc23db0f735fbb42aL, + 0xb606bdf66ae10040L } }, + /* 55 << 203 */ + { { 0x1eb15d4d044573acL,0x7dc3cf86556b0ba4L,0x97af9a33c60df6f7L, + 0x0b1ef85ca716ce8cL }, + { 0x2922f884c96958beL,0x7c32fa9435690963L,0x2d7f667ceaa00061L, + 0xeaaf7c173547365cL } }, + /* 56 << 203 */ + { { 0x1eb4de4687032d58L,0xc54f3d835e2c79e0L,0x07818df45d04ef23L, + 0x55faa9c8673d41b4L }, + { 0xced64f6f89b95355L,0x4860d2eab7415c84L,0x5fdb9bd2050ebad3L, + 0xdb53e0cc6685a5bfL } }, + /* 57 << 203 */ + { { 0xb830c0319feb6593L,0xdd87f3106accff17L,0x2303ebab9f555c10L, + 0x94603695287e7065L }, + { 0xf88311c32e83358cL,0x508dd9b4eefb0178L,0x7ca237062dba8652L, + 0x62aac5a30047abe5L } }, + /* 58 << 203 */ + { { 0x9a61d2a08b1ea7b3L,0xd495ab63ae8b1485L,0x38740f8487052f99L, + 0x178ebe5bb2974eeaL }, + { 0x030bbcca5b36d17fL,0xb5e4cce3aaf86eeaL,0xb51a022068f8e9e0L, + 0xa434879609eb3e75L } }, + /* 59 << 203 */ + { { 0xbe592309eef1a752L,0x5d7162d76f2aa1edL,0xaebfb5ed0f007dd2L, + 0x255e14b2c89edd22L }, + { 0xba85e0720303b697L,0xc5d17e25f05720ffL,0x02b58d6e5128ebb6L, + 0x2c80242dd754e113L } }, + /* 60 << 203 */ + { { 0x919fca5fabfae1caL,0x937afaac1a21459bL,0x9e0ca91c1f66a4d2L, + 0x194cc7f323ec1331L }, + { 0xad25143a8aa11690L,0xbe40ad8d09b59e08L,0x37d60d9be750860aL, + 0x6c53b008c6bf434cL } }, + /* 61 << 203 */ + { { 0xb572415d1356eb80L,0xb8bf9da39578ded8L,0x22658e365e8fb38bL, + 0x9b70ce225af8cb22L }, + { 0x7c00018a829a8180L,0x84329f93b81ed295L,0x7c343ea25f3cea83L, + 0x38f8655f67586536L } }, + /* 62 << 203 */ + { { 0xa661a0d01d3ec517L,0x98744652512321aeL,0x084ca591eca92598L, + 0xa9bb9dc91dcb3febL }, + { 0x14c5435578b4c240L,0x5ed62a3b610cafdcL,0x07512f371b38846bL, + 0x571bb70ab0e38161L } }, + /* 63 << 203 */ + { { 0xb556b95b2da705d2L,0x3ef8ada6b1a08f98L,0x85302ca7ddecfbe5L, + 0x0e530573943105cdL }, + { 0x60554d5521a9255dL,0x63a32fa1f2f3802aL,0x35c8c5b0cd477875L, + 0x97f458ea6ad42da1L } }, + /* 64 << 203 */ + { { 0x832d7080eb6b242dL,0xd30bd0233b71e246L,0x7027991bbe31139dL, + 0x68797e91462e4e53L }, + { 0x423fe20a6b4e185aL,0x82f2c67e42d9b707L,0x25c817684cf7811bL, + 0xbd53005e045bb95dL } }, + /* 0 << 210 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 210 */ + { { 0xe5f649be9d8e68fdL,0xdb0f05331b044320L,0xf6fde9b3e0c33398L, + 0x92f4209b66c8cfaeL }, + { 0xe9d1afcc1a739d4bL,0x09aea75fa28ab8deL,0x14375fb5eac6f1d0L, + 0x6420b560708f7aa5L } }, + /* 2 << 210 */ + { { 0x9eae499c6254dc41L,0x7e2939247a837e7eL,0x74aec08c090524a7L, + 0xf82b92198d6f55f2L }, + { 0x493c962e1402cec5L,0x9f17ca17fa2f30e7L,0xbcd783e8e9b879cbL, + 0xea3d8c145a6f145fL } }, + /* 3 << 210 */ + { { 0xdede15e75e0dee6eL,0x74f24872dc628aa2L,0xd3e9c4fe7861bb93L, + 0x56d4822a6187b2e0L }, + { 0xb66417cfc59826f9L,0xca2609692408169eL,0xedf69d06c79ef885L, + 0x00031f8adc7d138fL } }, + /* 4 << 210 */ + { { 0x103c46e60ebcf726L,0x4482b8316231470eL,0x6f6dfaca487c2109L, + 0x2e0ace9762e666efL }, + { 0x3246a9d31f8d1f42L,0x1b1e83f1574944d2L,0x13dfa63aa57f334bL, + 0x0cf8daed9f025d81L } }, + /* 5 << 210 */ + { { 0x30d78ea800ee11c1L,0xeb053cd4b5e3dd75L,0x9b65b13ed58c43c5L, + 0xc3ad49bdbd151663L }, + { 0x99fd8e41b6427990L,0x12cf15bd707eae1eL,0x29ad4f1b1aabb71eL, + 0x5143e74d07545d0eL } }, + /* 6 << 210 */ + { { 0x30266336c88bdee1L,0x25f293065876767cL,0x9c078571c6731996L, + 0xc88690b2ed552951L }, + { 0x274f2c2d852705b4L,0xb0bf8d444e09552dL,0x7628beeb986575d1L, + 0x407be2387f864651L } }, + /* 7 << 210 */ + { { 0x0e5e3049a639fc6bL,0xe75c35d986003625L,0x0cf35bd85dcc1646L, + 0x8bcaced26c26273aL }, + { 0xe22ecf1db5536742L,0x013dd8971a9e068bL,0x17f411cb8a7909c5L, + 0x5757ac98861dd506L } }, + /* 8 << 210 */ + { { 0x85de1f0d1e935abbL,0xdefd10b4154de37aL,0xb8d9e392369cebb5L, + 0x54d5ef9b761324beL }, + { 0x4d6341ba74f17e26L,0xc0a0e3c878c1dde4L,0xa6d7758187d918fdL, + 0x6687601502ca3a13L } }, + /* 9 << 210 */ + { { 0xc7313e9cf36658f0L,0xc433ef1c71f8057eL,0x853262461b6a835aL, + 0xc8f053987c86394cL }, + { 0xff398cdfe983c4a1L,0xbf5e816203b7b931L,0x93193c46b7b9045bL, + 0x1e4ebf5da4a6e46bL } }, + /* 10 << 210 */ + { { 0xf9942a6043a24fe7L,0x29c1191effb3492bL,0x9f662449902fde05L, + 0xc792a7ac6713c32dL }, + { 0x2fd88ad8b737982cL,0x7e3a0319a21e60e3L,0x09b0de447383591aL, + 0x6df141ee8310a456L } }, + /* 11 << 210 */ + { { 0xaec1a039e6d6f471L,0x14b2ba0f1198d12eL,0xebc1a1603aeee5acL, + 0x401f4836e0b964ceL }, + { 0x2ee437964fd03f66L,0x3fdb4e49dd8f3f12L,0x6ef267f629380f18L, + 0x3e8e96708da64d16L } }, + /* 12 << 210 */ + { { 0xbc19180c207674f1L,0x112e09a733ae8fdbL,0x996675546aaeb71eL, + 0x79432af1e101b1c7L }, + { 0xd5eb558fde2ddec6L,0x81392d1f5357753fL,0xa7a76b973ae1158aL, + 0x416fbbff4a899991L } }, + /* 13 << 210 */ + { { 0x9e65fdfd0d4a9dcfL,0x7bc29e48944ddf12L,0xbc1a92d93c856866L, + 0x273c69056e98dfe2L }, + { 0x69fce418cdfaa6b8L,0x606bd8235061c69fL,0x42d495a06af75e27L, + 0x8ed3d5056d873a1fL } }, + /* 14 << 210 */ + { { 0xaf5528416ab25b6aL,0xc6c0ffc72b1a4523L,0xab18827b21c99e03L, + 0x060e86489034691bL }, + { 0x5207f90f93c7f398L,0x9f4a96cb82f8d10bL,0xdd71cd793ad0f9e3L, + 0x84f435d2fc3a54f5L } }, + /* 15 << 210 */ + { { 0x4b03c55b8e33787fL,0xef42f975a6384673L,0xff7304f75051b9f0L, + 0x18aca1dc741c87c2L }, + { 0x56f120a72d4bfe80L,0xfd823b3d053e732cL,0x11bccfe47537ca16L, + 0xdf6c9c741b5a996bL } }, + /* 16 << 210 */ + { { 0xee7332c7904fc3faL,0x14a23f45c7e3636aL,0xc38659c3f091d9aaL, + 0x4a995e5db12d8540L }, + { 0x20a53becf3a5598aL,0x56534b17b1eaa995L,0x9ed3dca4bf04e03cL, + 0x716c563ad8d56268L } }, + /* 17 << 210 */ + { { 0x27ba77a41d6178e7L,0xe4c80c4068a1ff8eL,0x750110990a13f63dL, + 0x7bf33521a61d46f3L }, + { 0x0aff218e10b365bbL,0x810218040fd7ea75L,0x05a3fd8aa4b3a925L, + 0xb829e75f9b3db4e6L } }, + /* 18 << 210 */ + { { 0x6bdc75a54d53e5fbL,0x04a5dc02d52717e3L,0x86af502fe9a42ec2L, + 0x8867e8fb2630e382L }, + { 0xbf845c6ebec9889bL,0x54f491f2cb47c98dL,0xa3091fba790c2a12L, + 0xd7f6fd78c20f708bL } }, + /* 19 << 210 */ + { { 0xa569ac30acde5e17L,0xd0f996d06852b4d7L,0xe51d4bb54609ae54L, + 0x3fa37d170daed061L }, + { 0x62a8868434b8fb41L,0x99a2acbd9efb64f1L,0xb75c1a5e6448e1f2L, + 0xfa99951a42b5a069L } }, + /* 20 << 210 */ + { { 0x6d956e892f3b26e7L,0xf4709860da875247L,0x3ad151792482dda3L, + 0xd64110e3017d82f0L }, + { 0x14928d2cfad414e4L,0x2b155f582ed02b24L,0x481a141bcb821bf1L, + 0x12e3c7704f81f5daL } }, + /* 21 << 210 */ + { { 0xe49c5de59fff8381L,0x110532325bbec894L,0xa0d051cc454d88c4L, + 0x4f6db89c1f8e531bL }, + { 0x34fe3fd6ca563a44L,0x7f5c221558da8ab9L,0x8445016d9474f0a1L, + 0x17d34d61cb7d8a0aL } }, + /* 22 << 210 */ + { { 0x8e9d39101c474019L,0xcaff2629d52ceefbL,0xf9cf3e32c1622c2bL, + 0xd4b95e3ce9071a05L }, + { 0xfbbca61f1594438cL,0x1eb6e6a604aadedfL,0x853027f468e14940L, + 0x221d322adfabda9cL } }, + /* 23 << 210 */ + { { 0xed8ea9f6b7cb179aL,0xdc7b764db7934dccL,0xfcb139405e09180dL, + 0x6629a6bfb47dc2ddL }, + { 0xbfc55e4e9f5a915eL,0xb1db9d376204441eL,0xf82d68cf930c5f53L, + 0x17d3a142cbb605b1L } }, + /* 24 << 210 */ + { { 0xdd5944ea308780f2L,0xdc8de7613845f5e4L,0x6beaba7d7624d7a3L, + 0x1e709afd304df11eL }, + { 0x9536437602170456L,0xbf204b3ac8f94b64L,0x4e53af7c5680ca68L, + 0x0526074ae0c67574L } }, + /* 25 << 210 */ + { { 0x95d8cef8ecd92af6L,0xe6b9fa7a6cd1745aL,0x3d546d3da325c3e4L, + 0x1f57691d9ae93aaeL }, + { 0xe891f3fe9d2e1a33L,0xd430093fac063d35L,0xeda59b125513a327L, + 0xdc2134f35536f18fL } }, + /* 26 << 210 */ + { { 0xaa51fe2c5c210286L,0x3f68aaee1cab658cL,0x5a23a00bf9357292L, + 0x9a626f397efdabedL }, + { 0xfe2b3bf3199d78e3L,0xb7a2af7771bbc345L,0x3d19827a1e59802cL, + 0x823bbc15b487a51cL } }, + /* 27 << 210 */ + { { 0x856139f299d0a422L,0x9ac3df65f456c6fbL,0xaddf65c6701f8bd6L, + 0x149f321e3758df87L }, + { 0xb1ecf714721b7ebaL,0xe17df09831a3312aL,0xdb2fd6ecd5c4d581L, + 0xfd02996f8fcea1b3L } }, + /* 28 << 210 */ + { { 0xe29fa63e7882f14fL,0xc9f6dc3507c6cadcL,0x46f22d6fb882bed0L, + 0x1a45755bd118e52cL }, + { 0x9f2c7c277c4608cfL,0x7ccbdf32568012c2L,0xfcb0aedd61729b0eL, + 0x7ca2ca9ef7d75dbfL } }, + /* 29 << 210 */ + { { 0xf58fecb16f640f62L,0xe274b92b39f51946L,0x7f4dfc046288af44L, + 0x0a91f32aeac329e5L }, + { 0x43ad274bd6aaba31L,0x719a16400f6884f9L,0x685d29f6daf91e20L, + 0x5ec1cc3327e49d52L } }, + /* 30 << 210 */ + { { 0x38f4de963b54a059L,0x0e0015e5efbcfdb3L,0x177d23d94dbb8da6L, + 0x98724aa297a617adL }, + { 0x30f0885bfdb6558eL,0xf9f7a28ac7899a96L,0xd2ae8ac8872dc112L, + 0xfa0642ca73c3c459L } }, + /* 31 << 210 */ + { { 0x15296981e7dfc8d6L,0x67cd44501fb5b94aL,0x0ec71cf10eddfd37L, + 0xc7e5eeb39a8eddc7L }, + { 0x02ac8e3d81d95028L,0x0088f17270b0e35dL,0xec041fabe1881fe3L, + 0x62cf71b8d99e7faaL } }, + /* 32 << 210 */ + { { 0x5043dea7e0f222c2L,0x309d42ac72e65142L,0x94fe9ddd9216cd30L, + 0xd6539c7d0f87feecL }, + { 0x03c5a57c432ac7d7L,0x72692cf0327fda10L,0xec28c85f280698deL, + 0x2331fb467ec283b1L } }, + /* 33 << 210 */ + { { 0xd34bfa322867e633L,0x78709a820a9cc815L,0xb7fe6964875e2fa5L, + 0x25cc064f9e98bfb5L }, + { 0x9eb0151c493a65c5L,0x5fb5d94153182464L,0x69e6f130f04618e2L, + 0xa8ecec22f89c8ab6L } }, + /* 34 << 210 */ + { { 0xcd6ac88bb96209bdL,0x65fa8cdbb3e1c9e0L,0xa47d22f54a8d8eacL, + 0x83895cdf8d33f963L }, + { 0xa8adca59b56cd3d1L,0x10c8350bdaf38232L,0x2b161fb3a5080a9fL, + 0xbe7f5c643af65b3aL } }, + /* 35 << 210 */ + { { 0x2c75403997403a11L,0x94626cf7121b96afL,0x431de7c46a983ec2L, + 0x3780dd3a52cc3df7L }, + { 0xe28a0e462baf8e3bL,0xabe68aad51d299aeL,0x603eb8f9647a2408L, + 0x14c61ed65c750981L } }, + /* 36 << 210 */ + { { 0x88b34414c53352e7L,0x5a34889c1337d46eL,0x612c1560f95f2bc8L, + 0x8a3f8441d4807a3aL }, + { 0x680d9e975224da68L,0x60cd6e88c3eb00e9L,0x3875a98e9a6bc375L, + 0xdc80f9244fd554c2L } }, + /* 37 << 210 */ + { { 0x6c4b34156ac77407L,0xa1e5ea8f25420681L,0x541bfa144607a458L, + 0x5dbc7e7a96d7fbf9L }, + { 0x646a851b31590a47L,0x039e85ba15ee6df8L,0xd19fa231d7b43fc0L, + 0x84bc8be8299a0e04L } }, + /* 38 << 210 */ + { { 0x2b9d2936f20df03aL,0x240543828608d472L,0x76b6ba049149202aL, + 0xb21c38313670e7b7L }, + { 0xddd93059d6fdee10L,0x9da47ad378488e71L,0x99cc1dfda0fcfb25L, + 0x42abde1064696954L } }, + /* 39 << 210 */ + { { 0x14cc15fc17eab9feL,0xd6e863e4d3e70972L,0x29a7765c6432112cL, + 0x886600015b0774d8L }, + { 0x3729175a2c088eaeL,0x13afbcae8230b8d4L,0x44768151915f4379L, + 0xf086431ad8d22812L } }, + /* 40 << 210 */ + { { 0x37461955c298b974L,0x905fb5f0f8711e04L,0x787abf3afe969d18L, + 0x392167c26f6a494eL }, + { 0xfc7a0d2d28c511daL,0xf127c7dcb66a262dL,0xf9c4bb95fd63fdf0L, + 0x900165893913ef46L } }, + /* 41 << 210 */ + { { 0x74d2a73c11aa600dL,0x2f5379bd9fb5ab52L,0xe49e53a47fb70068L, + 0x68dd39e5404aa9a7L }, + { 0xb9b0cf572ecaa9c3L,0xba0e103be824826bL,0x60c2198b4631a3c4L, + 0xc5ff84abfa8966a2L } }, + /* 42 << 210 */ + { { 0x2d6ebe22ac95aff8L,0x1c9bb6dbb5a46d09L,0x419062da53ee4f8dL, + 0x7b9042d0bb97efefL }, + { 0x0f87f080830cf6bdL,0x4861d19a6ec8a6c6L,0xd3a0daa1202f01aaL, + 0xb0111674f25afbd5L } }, + /* 43 << 210 */ + { { 0x6d00d6cf1afb20d9L,0x1369500040671bc5L,0x913ab0dc2485ea9bL, + 0x1f2bed069eef61acL }, + { 0x850c82176d799e20L,0x93415f373271c2deL,0x5afb06e96c4f5910L, + 0x688a52dfc4e9e421L } }, + /* 44 << 210 */ + { { 0x30495ba3e2a9a6dbL,0x4601303d58f9268bL,0xbe3b0dad7eb0f04fL, + 0x4ea472504456936dL }, + { 0x8caf8798d33fd3e7L,0x1ccd8a89eb433708L,0x9effe3e887fd50adL, + 0xbe240a566b29c4dfL } }, + /* 45 << 210 */ + { { 0xec4ffd98ca0e7ebdL,0xf586783ae748616eL,0xa5b00d8fc77baa99L, + 0x0acada29b4f34c9cL }, + { 0x36dad67d0fe723acL,0x1d8e53a539c36c1eL,0xe4dd342d1f4bea41L, + 0x64fd5e35ebc9e4e0L } }, + /* 46 << 210 */ + { { 0x96f01f9057908805L,0xb5b9ea3d5ed480ddL,0x366c5dc23efd2dd0L, + 0xed2fe3056e9dfa27L }, + { 0x4575e8926e9197e2L,0x11719c09ab502a5dL,0x264c7bece81f213fL, + 0x741b924155f5c457L } }, + /* 47 << 210 */ + { { 0x78ac7b6849a5f4f4L,0xf91d70a29fc45b7dL,0x39b05544b0f5f355L, + 0x11f06bceeef930d9L }, + { 0xdb84d25d038d05e1L,0x04838ee5bacc1d51L,0x9da3ce869e8ee00bL, + 0xc3412057c36eda1fL } }, + /* 48 << 210 */ + { { 0xae80b91364d9c2f4L,0x7468bac3a010a8ffL,0xdfd2003737359d41L, + 0x1a0f5ab815efeaccL }, + { 0x7c25ad2f659d0ce0L,0x4011bcbb6785cff1L,0x128b99127e2192c7L, + 0xa549d8e113ccb0e8L } }, + /* 49 << 210 */ + { { 0x805588d8c85438b1L,0x5680332dbc25cb27L,0xdcd1bc961a4bfdf4L, + 0x779ff428706f6566L }, + { 0x8bbee998f059987aL,0xf6ce8cf2cc686de7L,0xf8ad3c4a953cfdb2L, + 0xd1d426d92205da36L } }, + /* 50 << 210 */ + { { 0xb3c0f13fc781a241L,0x3e89360ed75362a8L,0xccd05863c8a91184L, + 0x9bd0c9b7efa8a7f4L }, + { 0x97ee4d538a912a4bL,0xde5e15f8bcf518fdL,0x6a055bf8c467e1e0L, + 0x10be4b4b1587e256L } }, + /* 51 << 210 */ + { { 0xd90c14f2668621c9L,0xd5518f51ab9c92c1L,0x8e6a0100d6d47b3cL, + 0xcbe980dd66716175L }, + { 0x500d3f10ddd83683L,0x3b6cb35d99cac73cL,0x53730c8b6083d550L, + 0xcf159767df0a1987L } }, + /* 52 << 210 */ + { { 0x84bfcf5343ad73b3L,0x1b528c204f035a94L,0x4294edf733eeac69L, + 0xb6283e83817f3240L }, + { 0xc3fdc9590a5f25b1L,0xefaf8aa55844ee22L,0xde269ba5dbdde4deL, + 0xe3347160c56133bfL } }, + /* 53 << 210 */ + { { 0xc11842198d9ea9f8L,0x090de5dbf3fc1ab5L,0x404c37b10bf22cdaL, + 0x7de20ec8f5618894L }, + { 0x754c588eecdaecabL,0x6ca4b0ed88342743L,0x76f08bddf4a938ecL, + 0xd182de8991493ccbL } }, + /* 54 << 210 */ + { { 0xd652c53ec8a4186aL,0xb3e878db946d8e33L,0x088453c05f37663cL, + 0x5cd9daaab407748bL }, + { 0xa1f5197f586d5e72L,0x47500be8c443ca59L,0x78ef35b2e2652424L, + 0x09c5d26f6dd7767dL } }, + /* 55 << 210 */ + { { 0x7175a79aa74d3f7bL,0x0428fd8dcf5ea459L,0x511cb97ca5d1746dL, + 0x36363939e71d1278L }, + { 0xcf2df95510350bf4L,0xb381743960aae782L,0xa748c0e43e688809L, + 0x98021fbfd7a5a006L } }, + /* 56 << 210 */ + { { 0x9076a70c0e367a98L,0xbea1bc150f62b7c2L,0x2645a68c30fe0343L, + 0xacaffa78699dc14fL }, + { 0xf4469964457bf9c4L,0x0db6407b0d2ead83L,0x68d56cadb2c6f3ebL, + 0x3b512e73f376356cL } }, + /* 57 << 210 */ + { { 0xe43b0e1ffce10408L,0x89ddc0035a5e257dL,0xb0ae0d120362e5b3L, + 0x07f983c7b0519161L }, + { 0xc2e94d155d5231e7L,0xcff22aed0b4f9513L,0xb02588dd6ad0b0b5L, + 0xb967d1ac11d0dcd5L } }, + /* 58 << 210 */ + { { 0x8dac6bc6cf777b6cL,0x0062bdbd4c6d1959L,0x53da71b50ef5cc85L, + 0x07012c7d4006f14fL }, + { 0x4617f962ac47800dL,0x53365f2bc102ed75L,0xb422efcb4ab8c9d3L, + 0x195cb26b34af31c9L } }, + /* 59 << 210 */ + { { 0x3a926e2905f2c4ceL,0xbd2bdecb9856966cL,0x5d16ab3a85527015L, + 0x9f81609e4486c231L }, + { 0xd8b96b2cda350002L,0xbd054690fa1b7d36L,0xdc90ebf5e71d79bcL, + 0xf241b6f908964e4eL } }, + /* 60 << 210 */ + { { 0x7c8386432fe3cd4cL,0xe0f33acbb4bc633cL,0xb4a9ecec3d139f1fL, + 0x05ce69cddc4a1f49L }, + { 0xa19d1b16f5f98aafL,0x45bb71d66f23e0efL,0x33789fcd46cdfdd3L, + 0x9b8e2978cee040caL } }, + /* 61 << 210 */ + { { 0x9c69b246ae0a6828L,0xba533d247078d5aaL,0x7a2e42c07bb4fbdbL, + 0xcfb4879a7035385cL }, + { 0x8c3dd30b3281705bL,0x7e361c6c404fe081L,0x7b21649c3f604edfL, + 0x5dbf6a3fe52ffe47L } }, + /* 62 << 210 */ + { { 0xc41b7c234b54d9bfL,0x1374e6813511c3d9L,0x1863bf16c1b2b758L, + 0x90e785071e9e6a96L }, + { 0xab4bf98d5d86f174L,0xd74e0bd385e96fe4L,0x8afde39fcac5d344L, + 0x90946dbcbd91b847L } }, + /* 63 << 210 */ + { { 0xf5b42358fe1a838cL,0x05aae6c5620ac9d8L,0x8e193bd8a1ce5a0bL, + 0x8f7105714dabfd72L }, + { 0x8d8fdd48182caaacL,0x8c4aeefa040745cfL,0x73c6c30af3b93e6dL, + 0x991241f316f42011L } }, + /* 64 << 210 */ + { { 0xa0158eeae457a477L,0xd19857dbee6ddc05L,0xb326522418c41671L, + 0x3ffdfc7e3c2c0d58L }, + { 0x3a3a525426ee7cdaL,0x341b0869df02c3a8L,0xa023bf42723bbfc8L, + 0x3d15002a14452691L } }, + /* 0 << 217 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 217 */ + { { 0x5ef7324c85edfa30L,0x2597655487d4f3daL,0x352f5bc0dcb50c86L, + 0x8f6927b04832a96cL }, + { 0xd08ee1ba55f2f94cL,0x6a996f99344b45faL,0xe133cb8da8aa455dL, + 0x5d0721ec758dc1f7L } }, + /* 2 << 217 */ + { { 0x6ba7a92079e5fb67L,0xe1331feb70aa725eL,0x5080ccf57df5d837L, + 0xe4cae01d7ff72e21L }, + { 0xd9243ee60412a77dL,0x06ff7cacdf449025L,0xbe75f7cd23ef5a31L, + 0xbc9578220ddef7a8L } }, + /* 3 << 217 */ + { { 0x8cf7230cb0ce1c55L,0x5b534d050bbfb607L,0xee1ef1130e16363bL, + 0x27e0aa7ab4999e82L }, + { 0xce1dac2d79362c41L,0x67920c9091bb6cb0L,0x1e648d632223df24L, + 0x0f7d9eefe32e8f28L } }, + /* 4 << 217 */ + { { 0x6943f39afa833834L,0x22951722a6328562L,0x81d63dd54170fc10L, + 0x9f5fa58faecc2e6dL }, + { 0xb66c8725e77d9a3bL,0x11235cea6384ebe0L,0x06a8c1185845e24aL, + 0x0137b286ebd093b1L } }, + /* 5 << 217 */ + { { 0xc589e1ce44ace150L,0xe0f8d3d94381e97cL,0x59e99b1162c5a4b8L, + 0x90d262f7fd0ec9f9L }, + { 0xfbc854c9283e13c9L,0x2d04fde7aedc7085L,0x057d776547dcbecbL, + 0x8dbdf5919a76fa5fL } }, + /* 6 << 217 */ + { { 0xd01506950de1e578L,0x2e1463e7e9f72bc6L,0xffa684411b39eca5L, + 0x673c85307c037f2fL }, + { 0xd0d6a600747f91daL,0xb08d43e1c9cb78e9L,0x0fc0c64427b5cef5L, + 0x5c1d160aa60a2fd6L } }, + /* 7 << 217 */ + { { 0xf98cae5328c8e13bL,0x375f10c4b2eddcd1L,0xd4eb8b7f5cce06adL, + 0xb4669f4580a2e1efL }, + { 0xd593f9d05bbd8699L,0x5528a4c9e7976d13L,0x3923e0951c7e28d3L, + 0xb92937903f6bb577L } }, + /* 8 << 217 */ + { { 0xdb567d6ac42bd6d2L,0x6df86468bb1f96aeL,0x0efe5b1a4843b28eL, + 0x961bbb056379b240L }, + { 0xb6caf5f070a6a26bL,0x70686c0d328e6e39L,0x80da06cf895fc8d3L, + 0x804d8810b363fdc9L } }, + /* 9 << 217 */ + { { 0xbe22877b207f1670L,0x9b0dd1884e615291L,0x625ae8dc97a3c2bfL, + 0x08584ef7439b86e8L }, + { 0xde7190a5dcd898ffL,0x26286c402058ee3dL,0x3db0b2175f87b1c1L, + 0xcc334771102a6db5L } }, + /* 10 << 217 */ + { { 0xd99de9542f770fb1L,0x97c1c6204cd7535eL,0xd3b6c4483f09cefcL, + 0xd725af155a63b4f8L }, + { 0x0c95d24fc01e20ecL,0xdfd374949ae7121fL,0x7d6ddb72ec77b7ecL, + 0xfe079d3b0353a4aeL } }, + /* 11 << 217 */ + { { 0x3066e70a2e6ac8d2L,0x9c6b5a43106e5c05L,0x52d3c6f5ede59b8cL, + 0x30d6a5c3fccec9aeL }, + { 0xedec7c224fc0a9efL,0x190ff08395c16cedL,0xbe12ec8f94de0fdeL, + 0x0d131ab8852d3433L } }, + /* 12 << 217 */ + { { 0x42ace07e85701291L,0x94793ed9194061a8L,0x30e83ed6d7f4a485L, + 0x9eec7269f9eeff4dL }, + { 0x90acba590c9d8005L,0x5feca4581e79b9d1L,0x8fbe54271d506a1eL, + 0xa32b2c8e2439cfa7L } }, + /* 13 << 217 */ + { { 0x1671c17373dd0b4eL,0x37a2821444a054c6L,0x81760a1b4e8b53f1L, + 0xa6c04224f9f93b9eL }, + { 0x18784b34cf671e3cL,0x81bbecd2cda9b994L,0x38831979b2ab3848L, + 0xef54feb7f2e03c2dL } }, + /* 14 << 217 */ + { { 0xcf197ca7fb8088faL,0x014272474ddc96c5L,0xa2d2550a30777176L, + 0x534698984d0cf71dL }, + { 0x6ce937b83a2aaac6L,0xe9f91dc35af38d9bL,0x2598ad83c8bf2899L, + 0x8e706ac9b5536c16L } }, + /* 15 << 217 */ + { { 0x40dc7495f688dc98L,0x26490cd7124c4afcL,0xe651ec841f18775cL, + 0x393ea6c3b4fdaf4aL }, + { 0x1e1f33437f338e0dL,0x39fb832b6053e7b5L,0x46e702da619e14d5L, + 0x859cacd1cdeef6e0L } }, + /* 16 << 217 */ + { { 0x63b99ce74462007dL,0xb8ab48a54cb5f5b7L,0x9ec673d2f55edde7L, + 0xd1567f748cfaefdaL }, + { 0x46381b6b0887bcecL,0x694497cee178f3c2L,0x5e6525e31e6266cbL, + 0x5931de26697d6413L } }, + /* 17 << 217 */ + { { 0x87f8df7c0e58d493L,0xb1ae5ed058b73f12L,0xc368f784dea0c34dL, + 0x9bd0a120859a91a0L }, + { 0xb00d88b7cc863c68L,0x3a1cc11e3d1f4d65L,0xea38e0e70aa85593L, + 0x37f13e987dc4aee8L } }, + /* 18 << 217 */ + { { 0x10d38667bc947badL,0x738e07ce2a36ee2eL,0xc93470cdc577fcacL, + 0xdee1b6162782470dL }, + { 0x36a25e672e793d12L,0xd6aa6caee0f186daL,0x474d0fd980e07af7L, + 0xf7cdc47dba8a5cd4L } }, + /* 19 << 217 */ + { { 0x28af6d9dab15247fL,0x7c789c10493a537fL,0x7ac9b11023a334e7L, + 0x0236ac0912c9c277L }, + { 0xa7e5bd251d7a5144L,0x098b9c2af13ec4ecL,0x3639dacad3f0abcaL, + 0x642da81aa23960f9L } }, + /* 20 << 217 */ + { { 0x7d2e5c054f7269b1L,0xfcf30777e287c385L,0x10edc84ff2a46f21L, + 0x354417574f43fa36L }, + { 0xf1327899fd703431L,0xa438d7a616dd587aL,0x65c34c57e9c8352dL, + 0xa728edab5cc5a24eL } }, + /* 21 << 217 */ + { { 0xaed78abc42531689L,0x0a51a0e8010963efL,0x5776fa0ad717d9b3L, + 0xf356c2397dd3428bL }, + { 0x29903fff8d3a3dacL,0x409597fa3d94491fL,0x4cd7a5ffbf4a56a4L, + 0xe50964748adab462L } }, + /* 22 << 217 */ + { { 0xa97b51265c3427b0L,0x6401405cd282c9bdL,0x3629f8d7222c5c45L, + 0xb1c02c16e8d50aedL }, + { 0xbea2ed75d9635bc9L,0x226790c76e24552fL,0x3c33f2a365f1d066L, + 0x2a43463e6dfccc2eL } }, + /* 23 << 217 */ + { { 0x8cc3453adb483761L,0xe7cc608565d5672bL,0x277ed6cbde3efc87L, + 0x19f2f36869234eafL }, + { 0x9aaf43175c0b800bL,0x1f1e7c898b6da6e2L,0x6cfb4715b94ec75eL, + 0xd590dd5f453118c2L } }, + /* 24 << 217 */ + { { 0x14e49da11f17a34cL,0x5420ab39235a1456L,0xb76372412f50363bL, + 0x7b15d623c3fabb6eL }, + { 0xa0ef40b1e274e49cL,0x5cf5074496b1860aL,0xd6583fbf66afe5a4L, + 0x44240510f47e3e9aL } }, + /* 25 << 217 */ + { { 0x9925434311b2d595L,0xf1367499eec8df57L,0x3cb12c613e73dd05L, + 0xd248c0337dac102aL }, + { 0xcf154f13a77739f5L,0xbf4288cb23d2af42L,0xaa64c9b632e4a1cfL, + 0xee8c07a8c8a208f3L } }, + /* 26 << 217 */ + { { 0xe10d49996fe8393fL,0x0f809a3fe91f3a32L,0x61096d1c802f63c8L, + 0x289e146257750d3dL }, + { 0xed06167e9889feeaL,0xd5c9c0e2e0993909L,0x46fca0d856508ac6L, + 0x918260474f1b8e83L } }, + /* 27 << 217 */ + { { 0x4f2c877a9a4a2751L,0x71bd0072cae6feadL,0x38df8dcc06aa1941L, + 0x5a074b4c63beeaa8L }, + { 0xd6d65934c1cec8edL,0xa6ecb49eaabc03bdL,0xaade91c2de8a8415L, + 0xcfb0efdf691136e0L } }, + /* 28 << 217 */ + { { 0x11af45ee23ab3495L,0xa132df880b77463dL,0x8923c15c815d06f4L, + 0xc3ceb3f50d61a436L }, + { 0xaf52291de88fb1daL,0xea0579741da12179L,0xb0d7218cd2fef720L, + 0x6c0899c98e1d8845L } }, + /* 29 << 217 */ + { { 0x98157504752ddad7L,0xd60bd74fa1a68a97L,0x7047a3a9f658fb99L, + 0x1f5d86d65f8511e4L }, + { 0xb8a4bc424b5a6d88L,0x69eb2c331abefa7dL,0x95bf39e813c9c510L, + 0xf571960ad48aab43L } }, + /* 30 << 217 */ + { { 0x7e8cfbcf704e23c6L,0xc71b7d2228aaa65bL,0xa041b2bd245e3c83L, + 0x69b98834d21854ffL }, + { 0x89d227a3963bfeecL,0x99947aaade7da7cbL,0x1d9ee9dbee68a9b1L, + 0x0a08f003698ec368L } }, + /* 31 << 217 */ + { { 0xe9ea409478ef2487L,0xc8d2d41502cfec26L,0xc52f9a6eb7dcf328L, + 0x0ed489e385b6a937L }, + { 0x9b94986bbef3366eL,0x0de59c70edddddb8L,0xffdb748ceadddbe2L, + 0x9b9784bb8266ea40L } }, + /* 32 << 217 */ + { { 0x142b55021a93507aL,0xb4cd11878d3c06cfL,0xdf70e76a91ec3f40L, + 0x484e81ad4e7553c2L }, + { 0x830f87b5272e9d6eL,0xea1c93e5c6ff514aL,0x67cc2adcc4192a8eL, + 0xc77e27e242f4535aL } }, + /* 33 << 217 */ + { { 0x9cdbab36d2b713c5L,0x86274ea0cf7b0cd3L,0x784680f309af826bL, + 0xbfcc837a0c72dea3L }, + { 0xa8bdfe9dd6529b73L,0x708aa22863a88002L,0x6c7a9a54c91d45b9L, + 0xdf1a38bbfd004f56L } }, + /* 34 << 217 */ + { { 0x2e8c9a26b8bad853L,0x2d52cea33723eae7L,0x054d6d8156ca2830L, + 0xa3317d149a8dc411L }, + { 0xa08662fefd4ddedaL,0xed2a153ab55d792bL,0x7035c16abfc6e944L, + 0xb6bc583400171cf3L } }, + /* 35 << 217 */ + { { 0xe27152b383d102b6L,0xfe695a470646b848L,0xa5bb09d8916e6d37L, + 0xb4269d640d17015eL }, + { 0x8d8156a10a1d2285L,0xfeef6c5146d26d72L,0x9dac57c84c5434a7L, + 0x0282e5be59d39e31L } }, + /* 36 << 217 */ + { { 0xedfff181721c486dL,0x301baf10bc58824eL,0x8136a6aa00570031L, + 0x55aaf78c1cddde68L }, + { 0x2682937159c63952L,0x3a3bd2748bc25bafL,0xecdf8657b7e52dc3L, + 0x2dd8c087fd78e6c8L } }, + /* 37 << 217 */ + { { 0x20553274f5531461L,0x8b4a12815d95499bL,0xe2c8763a1a80f9d2L, + 0xd1dbe32b4ddec758L }, + { 0xaf12210d30c34169L,0xba74a95378baa533L,0x3d133c6ea438f254L, + 0xa431531a201bef5bL } }, + /* 38 << 217 */ + { { 0x15295e22f669d7ecL,0xca374f64357fb515L,0x8a8406ffeaa3fdb3L, + 0x106ae448df3f2da8L }, + { 0x8f9b0a9033c8e9a1L,0x234645e271ad5885L,0x3d0832241c0aed14L, + 0xf10a7d3e7a942d46L } }, + /* 39 << 217 */ + { { 0x7c11deee40d5c9beL,0xb2bae7ffba84ed98L,0x93e97139aad58dddL, + 0x3d8727963f6d1fa3L }, + { 0x483aca818569ff13L,0x8b89a5fb9a600f72L,0x4cbc27c3c06f2b86L, + 0x2213071363ad9c0bL } }, + /* 40 << 217 */ + { { 0xb5358b1e48ac2840L,0x18311294ecba9477L,0xda58f990a6946b43L, + 0x3098baf99ab41819L }, + { 0x66c4c1584198da52L,0xab4fc17c146bfd1bL,0x2f0a4c3cbf36a908L, + 0x2ae9e34b58cf7838L } }, + /* 41 << 217 */ + { { 0xf411529e3fa11b1fL,0x21e43677974af2b4L,0x7c20958ec230793bL, + 0x710ea88516e840f3L }, + { 0xfc0b21fcc5dc67cfL,0x08d5164788405718L,0xd955c21fcfe49eb7L, + 0x9722a5d556dd4a1fL } }, + /* 42 << 217 */ + { { 0xc9ef50e2c861baa5L,0xc0c21a5d9505ac3eL,0xaf6b9a338b7c063fL, + 0xc63703392f4779c1L }, + { 0x22df99c7638167c3L,0xfe6ffe76795db30cL,0x2b822d33a4854989L, + 0xfef031dd30563aa5L } }, + /* 43 << 217 */ + { { 0x16b09f82d57c667fL,0xc70312cecc0b76f1L,0xbf04a9e6c9118aecL, + 0x82fcb4193409d133L }, + { 0x1a8ab385ab45d44dL,0xfba07222617b83a3L,0xb05f50dd58e81b52L, + 0x1d8db55321ce5affL } }, + /* 44 << 217 */ + { { 0x3097b8d4e344a873L,0x7d8d116dfe36d53eL,0x6db22f587875e750L, + 0x2dc5e37343e144eaL }, + { 0xc05f32e6e799eb95L,0xe9e5f4df6899e6ecL,0xbdc3bd681fab23d5L, + 0xb72b8ab773af60e6L } }, + /* 45 << 217 */ + { { 0x8db27ae02cecc84aL,0x600016d87bdb871cL,0x42a44b13d7c46f58L, + 0xb8919727c3a77d39L }, + { 0xcfc6bbbddafd6088L,0x1a7401466bd20d39L,0x8c747abd98c41072L, + 0x4c91e765bdf68ea1L } }, + /* 46 << 217 */ + { { 0x7c95e5ca08819a78L,0xcf48b729c9587921L,0x091c7c5fdebbcc7dL, + 0x6f287404f0e05149L }, + { 0xf83b5ac226cd44ecL,0x88ae32a6cfea250eL,0x6ac5047a1d06ebc5L, + 0xc7e550b4d434f781L } }, + /* 47 << 217 */ + { { 0x61ab1cf25c727bd2L,0x2e4badb11cf915b0L,0x1b4dadecf69d3920L, + 0xe61b1ca6f14c1dfeL }, + { 0x90b479ccbd6bd51fL,0x8024e4018045ec30L,0xcab29ca325ef0e62L, + 0x4f2e941649e4ebc0L } }, + /* 48 << 217 */ + { { 0x45eb40ec0ccced58L,0x25cd4b9c0da44f98L,0x43e06458871812c6L, + 0x99f80d5516cef651L }, + { 0x571340c9ce6dc153L,0x138d5117d8665521L,0xacdb45bc4e07014dL, + 0x2f34bb3884b60b91L } }, + /* 49 << 217 */ + { { 0xf44a4fd22ae8921eL,0xb039288e892ba1e2L,0x9da50174b1c180b2L, + 0x6b70ab661693dc87L }, + { 0x7e9babc9e7057481L,0x4581ddef9c80dc41L,0x0c890da951294682L, + 0x0b5629d33f4736e5L } }, + /* 50 << 217 */ + { { 0x2340c79eb06f5b41L,0xa42e84ce4e243469L,0xf9a20135045a71a9L, + 0xefbfb415d27b6fb6L }, + { 0x25ebea239d33cd6fL,0x9caedb88aa6c0af8L,0x53dc7e9ad9ce6f96L, + 0x3897f9fd51e0b15aL } }, + /* 51 << 217 */ + { { 0xf51cb1f88e5d788eL,0x1aec7ba8e1d490eeL,0x265991e0cc58cb3cL, + 0x9f306e8c9fc3ad31L }, + { 0x5fed006e5040a0acL,0xca9d5043fb476f2eL,0xa19c06e8beea7a23L, + 0xd28658010edabb63L } }, + /* 52 << 217 */ + { { 0xdb92293f6967469aL,0x2894d8398d8a8ed8L,0x87c9e406bbc77122L, + 0x8671c6f12ea3a26aL }, + { 0xe42df8d6d7de9853L,0x2e3ce346b1f2bcc7L,0xda601dfc899d50cfL, + 0xbfc913defb1b598fL } }, + /* 53 << 217 */ + { { 0x81c4909fe61f7908L,0x192e304f9bbc7b29L,0xc3ed8738c104b338L, + 0xedbe9e47783f5d61L }, + { 0x0c06e9be2db30660L,0xda3e613fc0eb7d8eL,0xd8fa3e97322e096eL, + 0xfebd91e8d336e247L } }, + /* 54 << 217 */ + { { 0x8f13ccc4df655a49L,0xa9e00dfc5eb20210L,0x84631d0fc656b6eaL, + 0x93a058cdd8c0d947L }, + { 0x6846904a67bd3448L,0x4a3d4e1af394fd5cL,0xc102c1a5db225f52L, + 0xe3455bbafc4f5e9aL } }, + /* 55 << 217 */ + { { 0x6b36985b4b9ad1ceL,0xa98185365bb7f793L,0x6c25e1d048b1a416L, + 0x1381dd533c81bee7L }, + { 0xd2a30d617a4a7620L,0xc841292639b8944cL,0x3c1c6fbe7a97c33aL, + 0x941e541d938664e7L } }, + /* 56 << 217 */ + { { 0x417499e84a34f239L,0x15fdb83cb90402d5L,0xb75f46bf433aa832L, + 0xb61e15af63215db1L }, + { 0xaabe59d4a127f89aL,0x5d541e0c07e816daL,0xaaba0659a618b692L, + 0x5532773317266026L } }, + /* 57 << 217 */ + { { 0xaf53a0fc95f57552L,0x329476506cacb0c9L,0x253ff58dc821be01L, + 0xb0309531a06f1146L }, + { 0x59bbbdf505c2e54dL,0x158f27ad26e8dd22L,0xcc5b7ffb397e1e53L, + 0xae03f65b7fc1e50dL } }, + /* 58 << 217 */ + { { 0xa9784ebd9c95f0f9L,0x5ed9deb224640771L,0x31244af7035561c4L, + 0x87332f3a7ee857deL }, + { 0x09e16e9e2b9e0d88L,0x52d910f456a06049L,0x507ed477a9592f48L, + 0x85cb917b2365d678L } }, + /* 59 << 217 */ + { { 0xf8511c934c8998d1L,0x2186a3f1730ea58fL,0x50189626b2029db0L, + 0x9137a6d902ceb75aL }, + { 0x2fe17f37748bc82cL,0x87c2e93180469f8cL,0x850f71cdbf891aa2L, + 0x0ca1b89b75ec3d8dL } }, + /* 60 << 217 */ + { { 0x516c43aa5e1cd3cdL,0x893978089a887c28L,0x0059c699ddea1f9fL, + 0x7737d6fa8e6868f7L }, + { 0x6d93746a60f1524bL,0x36985e55ba052aa7L,0x41b1d322ed923ea5L, + 0x3429759f25852a11L } }, + /* 61 << 217 */ + { { 0xbeca6ec3092e9f41L,0x3a238c6662256bbdL,0xd82958ea70ad487dL, + 0x4ac8aaf965610d93L }, + { 0x3fa101b15e4ccab0L,0x9bf430f29de14bfbL,0xa10f5cc66531899dL, + 0x590005fbea8ce17dL } }, + /* 62 << 217 */ + { { 0xc437912f24544cb6L,0x9987b71ad79ac2e3L,0x13e3d9ddc058a212L, + 0x00075aacd2de9606L }, + { 0x80ab508b6cac8369L,0x87842be7f54f6c89L,0xa7ad663d6bc532a4L, + 0x67813de778a91bc8L } }, + /* 63 << 217 */ + { { 0x5dcb61cec3427239L,0x5f3c7cf0c56934d9L,0xc079e0fbe3191591L, + 0xe40896bdb01aada7L }, + { 0x8d4667910492d25fL,0x8aeb30c9e7408276L,0xe94374959287aaccL, + 0x23d4708d79fe03d4L } }, + /* 64 << 217 */ + { { 0x8cda9cf2d0c05199L,0x502fbc22fae78454L,0xc0bda9dff572a182L, + 0x5f9b71b86158b372L }, + { 0xe0f33a592b82dd07L,0x763027359523032eL,0x7fe1a721c4505a32L, + 0x7b6e3e82f796409fL } }, + /* 0 << 224 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 224 */ + { { 0xe3417bc035d0b34aL,0x440b386b8327c0a7L,0x8fb7262dac0362d1L, + 0x2c41114ce0cdf943L }, + { 0x2ba5cef1ad95a0b1L,0xc09b37a867d54362L,0x26d6cdd201e486c9L, + 0x20477abf42ff9297L } }, + /* 2 << 224 */ + { { 0xa004dcb3292a9287L,0xddc15cf677b092c7L,0x083a8464806c0605L, + 0x4a68df703db997b0L }, + { 0x9c134e4505bf7dd0L,0xa4e63d398ccf7f8cL,0xa6e6517f41b5f8afL, + 0xaa8b9342ad7bc1ccL } }, + /* 3 << 224 */ + { { 0x126f35b51e706ad9L,0xb99cebb4c3a9ebdfL,0xa75389afbf608d90L, + 0x76113c4fc6c89858L }, + { 0x80de8eb097e2b5aaL,0x7e1022cc63b91304L,0x3bdab6056ccc066cL, + 0x33cbb144b2edf900L } }, + /* 4 << 224 */ + { { 0xc41764717af715d2L,0xe2f7f594d0134a96L,0x2c1873efa41ec956L, + 0xe4e7b4f677821304L }, + { 0xe5c8ff9788d5374aL,0x2b915e6380823d5bL,0xea6bc755b2ee8fe2L, + 0x6657624ce7112651L } }, + /* 5 << 224 */ + { { 0x157af101dace5acaL,0xc4fdbcf211a6a267L,0xdaddf340c49c8609L, + 0x97e49f52e9604a65L }, + { 0x9be8e790937e2ad5L,0x846e2508326e17f1L,0x3f38007a0bbbc0dcL, + 0xcf03603fb11e16d6L } }, + /* 6 << 224 */ + { { 0xd6f800e07442f1d5L,0x475607d166e0e3abL,0x82807f16b7c64047L, + 0x8858e1e3a749883dL }, + { 0x5859120b8231ee10L,0x1b80e7eb638a1eceL,0xcb72525ac6aa73a4L, + 0xa7cdea3d844423acL } }, + /* 7 << 224 */ + { { 0x5ed0c007f8ae7c38L,0x6db07a5c3d740192L,0xbe5e9c2a5fe36db3L, + 0xd5b9d57a76e95046L }, + { 0x54ac32e78eba20f2L,0xef11ca8f71b9a352L,0x305e373eff98a658L, + 0xffe5a100823eb667L } }, + /* 8 << 224 */ + { { 0x57477b11e51732d2L,0xdfd6eb282538fc0eL,0x5c43b0cc3b39eec5L, + 0x6af12778cb36cc57L }, + { 0x70b0852d06c425aeL,0x6df92f8c5c221b9bL,0x6c8d4f9ece826d9cL, + 0xf59aba7bb49359c3L } }, + /* 9 << 224 */ + { { 0x5c8ed8d5da64309dL,0x61a6de5691b30704L,0xd6b52f6a2f9b5808L, + 0x0eee419498c958a7L }, + { 0xcddd9aab771e4caaL,0x83965dfd78bc21beL,0x02affce3b3b504f5L, + 0x30847a21561c8291L } }, + /* 10 << 224 */ + { { 0xd2eb2cf152bfda05L,0xe0e4c4e96197b98cL,0x1d35076cf8a1726fL, + 0x6c06085b2db11e3dL }, + { 0x15c0c4d74463ba14L,0x9d292f830030238cL,0x1311ee8b3727536dL, + 0xfeea86efbeaedc1eL } }, + /* 11 << 224 */ + { { 0xb9d18cd366131e2eL,0xf31d974f80fe2682L,0xb6e49e0fe4160289L, + 0x7c48ec0b08e92799L }, + { 0x818111d8d1989aa7L,0xb34fa0aaebf926f9L,0xdb5fe2f5a245474aL, + 0xf80a6ebb3c7ca756L } }, + /* 12 << 224 */ + { { 0xa7f96054afa05dd8L,0x26dfcf21fcaf119eL,0xe20ef2e30564bb59L, + 0xef4dca5061cb02b8L }, + { 0xcda7838a65d30672L,0x8b08d534fd657e86L,0x4c5b439546d595c8L, + 0x39b58725425cb836L } }, + /* 13 << 224 */ + { { 0x8ea610593de9abe3L,0x404348819cdc03beL,0x9b261245cfedce8cL, + 0x78c318b4cf5234a1L }, + { 0x510bcf16fde24c99L,0x2a77cb75a2c2ff5dL,0x9c895c2b27960fb4L, + 0xd30ce975b0eda42bL } }, + /* 14 << 224 */ + { { 0xfda853931a62cc26L,0x23c69b9650c0e052L,0xa227df15bfc633f3L, + 0x2ac788481bae7d48L }, + { 0x487878f9187d073dL,0x6c2be919967f807dL,0x765861d8336e6d8fL, + 0x88b8974cce528a43L } }, + /* 15 << 224 */ + { { 0x09521177ff57d051L,0x2ff38037fb6a1961L,0xfc0aba74a3d76ad4L, + 0x7c76480325a7ec17L }, + { 0x7532d75f48879bc8L,0xea7eacc058ce6bc1L,0xc82176b48e896c16L, + 0x9a30e0b22c750fedL } }, + /* 16 << 224 */ + { { 0xc37e2c2e421d3aa4L,0xf926407ce84fa840L,0x18abc03d1454e41cL, + 0x26605ecd3f7af644L }, + { 0x242341a6d6a5eabfL,0x1edb84f4216b668eL,0xd836edb804010102L, + 0x5b337ce7945e1d8cL } }, + /* 17 << 224 */ + { { 0xd2075c77c055dc14L,0x2a0ffa2581d89cdfL,0x8ce815ea6ffdcbafL, + 0xa3428878fb648867L }, + { 0x277699cf884655fbL,0xfa5b5bd6364d3e41L,0x01f680c6441e1cb7L, + 0x3fd61e66b70a7d67L } }, + /* 18 << 224 */ + { { 0x666ba2dccc78cf66L,0xb30181746fdbff77L,0x8d4dd0db168d4668L, + 0x259455d01dab3a2aL }, + { 0xf58564c5cde3acecL,0x7714192513adb276L,0x527d725d8a303f65L, + 0x55deb6c9e6f38f7bL } }, + /* 19 << 224 */ + { { 0xfd5bb657b1fa70fbL,0xfa07f50fd8073a00L,0xf72e3aa7bca02500L, + 0xf68f895d9975740dL }, + { 0x301120605cae2a6aL,0x01bd721802874842L,0x3d4238917ce47bd3L, + 0xa66663c1789544f6L } }, + /* 20 << 224 */ + { { 0x864d05d73272d838L,0xe22924f9fa6295c5L,0x8189593f6c2fda32L, + 0x330d7189b184b544L }, + { 0x79efa62cbde1f714L,0x35771c94e5cb1a63L,0x2f4826b8641c8332L, + 0x00a894fbc8cee854L } }, + /* 21 << 224 */ + { { 0xb4b9a39b36194d40L,0xe857a7c577612601L,0xf4209dd24ecf2f58L, + 0x82b9e66d5a033487L }, + { 0xc1e36934e4e8b9ddL,0xd2372c9da42377d7L,0x51dc94c70e3ae43bL, + 0x4c57761e04474f6fL } }, + /* 22 << 224 */ + { { 0xdcdacd0a1058a318L,0x369cf3f578053a9aL,0xc6c3de5031c68de2L, + 0x4653a5763c4b6d9fL }, + { 0x1688dd5aaa4e5c97L,0x5be80aa1b7ab3c74L,0x70cefe7cbc65c283L, + 0x57f95f1306867091L } }, + /* 23 << 224 */ + { { 0xa39114e24415503bL,0xc08ff7c64cbb17e9L,0x1eff674dd7dec966L, + 0x6d4690af53376f63L }, + { 0xff6fe32eea74237bL,0xc436d17ecd57508eL,0x15aa28e1edcc40feL, + 0x0d769c04581bbb44L } }, + /* 24 << 224 */ + { { 0xc240b6de34eaacdaL,0xd9e116e82ba0f1deL,0xcbe45ec779438e55L, + 0x91787c9d96f752d7L }, + { 0x897f532bf129ac2fL,0xd307b7c85a36e22cL,0x91940675749fb8f3L, + 0xd14f95d0157fdb28L } }, + /* 25 << 224 */ + { { 0xfe51d0296ae55043L,0x8931e98f44a87de1L,0xe57f1cc609e4fee2L, + 0x0d063b674e072d92L }, + { 0x70a998b9ed0e4316L,0xe74a736b306aca46L,0xecf0fbf24fda97c7L, + 0xa40f65cb3e178d93L } }, + /* 26 << 224 */ + { { 0x1625360416df4285L,0xb0c9babbd0c56ae2L,0x73032b19cfc5cfc3L, + 0xe497e5c309752056L }, + { 0x12096bb4164bda96L,0x1ee42419a0b74da1L,0x8fc36243403826baL, + 0x0c8f0069dc09e660L } }, + /* 27 << 224 */ + { { 0x8667e981c27253c9L,0x05a6aefb92b36a45L,0xa62c4b369cb7bb46L, + 0x8394f37511f7027bL }, + { 0x747bc79c5f109d0fL,0xcad88a765b8cc60aL,0x80c5a66b58f09e68L, + 0xe753d451f6127eacL } }, + /* 28 << 224 */ + { { 0xc44b74a15b0ec6f5L,0x47989fe45289b2b8L,0x745f848458d6fc73L, + 0xec362a6ff61c70abL }, + { 0x070c98a7b3a8ad41L,0x73a20fc07b63db51L,0xed2c2173f44c35f4L, + 0x8a56149d9acc9dcaL } }, + /* 29 << 224 */ + { { 0x98f178819ac6e0f4L,0x360fdeafa413b5edL,0x0625b8f4a300b0fdL, + 0xf1f4d76a5b3222d3L }, + { 0x9d6f5109587f76b8L,0x8b4ee08d2317fdb5L,0x88089bb78c68b095L, + 0x95570e9a5808d9b9L } }, + /* 30 << 224 */ + { { 0xa395c36f35d33ae7L,0x200ea12350bb5a94L,0x20c789bd0bafe84bL, + 0x243ef52d0919276aL }, + { 0x3934c577e23ae233L,0xb93807afa460d1ecL,0xb72a53b1f8fa76a4L, + 0xd8914cb0c3ca4491L } }, + /* 31 << 224 */ + { { 0x2e1284943fb42622L,0x3b2700ac500907d5L,0xf370fb091a95ec63L, + 0xf8f30be231b6dfbdL }, + { 0xf2b2f8d269e55f15L,0x1fead851cc1323e9L,0xfa366010d9e5eef6L, + 0x64d487b0e316107eL } }, + /* 32 << 224 */ + { { 0x4c076b86d23ddc82L,0x03fd344c7e0143f0L,0xa95362ff317af2c5L, + 0x0add3db7e18b7a4fL }, + { 0x9c673e3f8260e01bL,0xfbeb49e554a1cc91L,0x91351bf292f2e433L, + 0xc755e7ec851141ebL } }, + /* 33 << 224 */ + { { 0xc9a9513929607745L,0x0ca07420a26f2b28L,0xcb2790e74bc6f9ddL, + 0x345bbb58adcaffc0L }, + { 0xc65ea38cbe0f27a2L,0x67c24d7c641fcb56L,0x2c25f0a7a9e2c757L, + 0x93f5cdb016f16c49L } }, + /* 34 << 224 */ + { { 0x2ca5a9d7c5ee30a1L,0xd1593635b909b729L,0x804ce9f3dadeff48L, + 0xec464751b07c30c3L }, + { 0x89d65ff39e49af6aL,0xf2d6238a6f3d01bcL,0x1095561e0bced843L, + 0x51789e12c8a13fd8L } }, + /* 35 << 224 */ + { { 0xd633f929763231dfL,0x46df9f7de7cbddefL,0x01c889c0cb265da8L, + 0xfce1ad10af4336d2L }, + { 0x8d110df6fc6a0a7eL,0xdd431b986da425dcL,0xcdc4aeab1834aabeL, + 0x84deb1248439b7fcL } }, + /* 36 << 224 */ + { { 0x8796f1693c2a5998L,0x9b9247b47947190dL,0x55b9d9a511597014L, + 0x7e9dd70d7b1566eeL }, + { 0x94ad78f7cbcd5e64L,0x0359ac179bd4c032L,0x3b11baaf7cc222aeL, + 0xa6a6e284ba78e812L } }, + /* 37 << 224 */ + { { 0x8392053f24cea1a0L,0xc97bce4a33621491L,0x7eb1db3435399ee9L, + 0x473f78efece81ad1L }, + { 0x41d72fe0f63d3d0dL,0xe620b880afab62fcL,0x92096bc993158383L, + 0x41a213578f896f6cL } }, + /* 38 << 224 */ + { { 0x1b5ee2fac7dcfcabL,0x650acfde9546e007L,0xc081b749b1b02e07L, + 0xda9e41a0f9eca03dL }, + { 0x013ba727175a54abL,0xca0cd190ea5d8d10L,0x85ea52c095fd96a9L, + 0x2c591b9fbc5c3940L } }, + /* 39 << 224 */ + { { 0x6fb4d4e42bad4d5fL,0xfa4c3590fef0059bL,0x6a10218af5122294L, + 0x9a78a81aa85751d1L }, + { 0x04f20579a98e84e7L,0xfe1242c04997e5b5L,0xe77a273bca21e1e4L, + 0xfcc8b1ef9411939dL } }, + /* 40 << 224 */ + { { 0xe20ea30292d0487aL,0x1442dbec294b91feL,0x1f7a4afebb6b0e8fL, + 0x1700ef746889c318L }, + { 0xf5bbffc370f1fc62L,0x3b31d4b669c79ccaL,0xe8bc2aaba7f6340dL, + 0xb0b08ab4a725e10aL } }, + /* 41 << 224 */ + { { 0x44f05701ae340050L,0xba4b30161cf0c569L,0x5aa29f83fbe19a51L, + 0x1b9ed428b71d752eL }, + { 0x1666e54eeb4819f5L,0x616cdfed9e18b75bL,0x112ed5be3ee27b0bL, + 0xfbf2831944c7de4dL } }, + /* 42 << 224 */ + { { 0xd685ec85e0e60d84L,0x68037e301db7ee78L,0x5b65bdcd003c4d6eL, + 0x33e7363a93e29a6aL }, + { 0x995b3a6108d0756cL,0xd727f85c2faf134bL,0xfac6edf71d337823L, + 0x99b9aa500439b8b4L } }, + /* 43 << 224 */ + { { 0x722eb104e2b4e075L,0x49987295437c4926L,0xb1e4c0e446a9b82dL, + 0xd0cb319757a006f5L }, + { 0xf3de0f7dd7808c56L,0xb5c54d8f51f89772L,0x500a114aadbd31aaL, + 0x9afaaaa6295f6cabL } }, + /* 44 << 224 */ + { { 0x94705e2104cf667aL,0xfc2a811b9d3935d7L,0x560b02806d09267cL, + 0xf19ed119f780e53bL }, + { 0xf0227c09067b6269L,0x967b85335caef599L,0x155b924368efeebcL, + 0xcd6d34f5c497bae6L } }, + /* 45 << 224 */ + { { 0x1dd8d5d36cceb370L,0x2aeac579a78d7bf9L,0x5d65017d70b67a62L, + 0x70c8e44f17c53f67L }, + { 0xd1fc095086a34d09L,0xe0fca256e7134907L,0xe24fa29c80fdd315L, + 0x2c4acd03d87499adL } }, + /* 46 << 224 */ + { { 0xbaaf75173b5a9ba6L,0xb9cbe1f612e51a51L,0xd88edae35e154897L, + 0xe4309c3c77b66ca0L }, + { 0xf5555805f67f3746L,0x85fc37baa36401ffL,0xdf86e2cad9499a53L, + 0x6270b2a3ecbc955bL } }, + /* 47 << 224 */ + { { 0xafae64f5974ad33bL,0x04d85977fe7b2df1L,0x2a3db3ff4ab03f73L, + 0x0b87878a8702740aL }, + { 0x6d263f015a061732L,0xc25430cea32a1901L,0xf7ebab3ddb155018L, + 0x3a86f69363a9b78eL } }, + /* 48 << 224 */ + { { 0x349ae368da9f3804L,0x470f07fea164349cL,0xd52f4cc98562baa5L, + 0xc74a9e862b290df3L }, + { 0xd3a1aa3543471a24L,0x239446beb8194511L,0xbec2dd0081dcd44dL, + 0xca3d7f0fc42ac82dL } }, + /* 49 << 224 */ + { { 0x1f3db085fdaf4520L,0xbb6d3e804549daf2L,0xf5969d8a19ad5c42L, + 0x7052b13ddbfd1511L }, + { 0x11890d1b682b9060L,0xa71d3883ac34452cL,0xa438055b783805b4L, + 0x432412774725b23eL } }, + /* 50 << 224 */ + { { 0xf20cf96e4901bbedL,0x6419c710f432a2bbL,0x57a0fbb9dfa9cd7dL, + 0x589111e400daa249L }, + { 0x19809a337b60554eL,0xea5f8887ede283a4L,0x2d713802503bfd35L, + 0x151bb0af585d2a53L } }, + /* 51 << 224 */ + { { 0x40b08f7443b30ca8L,0xe10b5bbad9934583L,0xe8a546d6b51110adL, + 0x1dd50e6628e0b6c5L }, + { 0x292e9d54cff2b821L,0x3882555d47281760L,0x134838f83724d6e3L, + 0xf2c679e022ddcda1L } }, + /* 52 << 224 */ + { { 0x40ee88156d2a5768L,0x7f227bd21c1e7e2dL,0x487ba134d04ff443L, + 0x76e2ff3dc614e54bL }, + { 0x36b88d6fa3177ec7L,0xbf731d512328fff5L,0x758caea249ba158eL, + 0x5ab8ff4c02938188L } }, + /* 53 << 224 */ + { { 0x33e1605635edc56dL,0x5a69d3497e940d79L,0x6c4fd00103866dcbL, + 0x20a38f574893cdefL }, + { 0xfbf3e790fac3a15bL,0x6ed7ea2e7a4f8e6bL,0xa663eb4fbc3aca86L, + 0x22061ea5080d53f7L } }, + /* 54 << 224 */ + { { 0x2480dfe6f546783fL,0xd38bc6da5a0a641eL,0xfb093cd12ede8965L, + 0x89654db4acb455cfL }, + { 0x413cbf9a26e1adeeL,0x291f3764373294d4L,0x00797257648083feL, + 0x25f504d3208cc341L } }, + /* 55 << 224 */ + { { 0x635a8e5ec3a0ee43L,0x70aaebca679898ffL,0x9ee9f5475dc63d56L, + 0xce987966ffb34d00L }, + { 0xf9f86b195e26310aL,0x9e435484382a8ca8L,0x253bcb81c2352fe4L, + 0xa4eac8b04474b571L } }, + /* 56 << 224 */ + { { 0xc1b97512c1ad8cf8L,0x193b4e9e99e0b697L,0x939d271601e85df0L, + 0x4fb265b3cd44eafdL }, + { 0x321e7dcde51e1ae2L,0x8e3a8ca6e3d8b096L,0x8de46cb052604998L, + 0x91099ad839072aa7L } }, + /* 57 << 224 */ + { { 0x2617f91c93aa96b8L,0x0fc8716b7fca2e13L,0xa7106f5e95328723L, + 0xd1c9c40b262e6522L }, + { 0xb9bafe8642b7c094L,0x1873439d1543c021L,0xe1baa5de5cbefd5dL, + 0xa363fc5e521e8affL } }, + /* 58 << 224 */ + { { 0xefe6320df862eaacL,0x14419c6322c647dcL,0x0e06707c4e46d428L, + 0xcb6c834f4a178f8fL }, + { 0x0f993a45d30f917cL,0xd4c4b0499879afeeL,0xb6142a1e70500063L, + 0x7c9b41c3a5d9d605L } }, + /* 59 << 224 */ + { { 0xbc00fc2f2f8ba2c7L,0x0966eb2f7c67aa28L,0x13f7b5165a786972L, + 0x3bfb75578a2fbba0L }, + { 0x131c4f235a2b9620L,0xbff3ed276faf46beL,0x9b4473d17e172323L, + 0x421e8878339f6246L } }, + /* 60 << 224 */ + { { 0x0fa8587a25a41632L,0xc0814124a35b6c93L,0x2b18a9f559ebb8dbL, + 0x264e335776edb29cL }, + { 0xaf245ccdc87c51e2L,0x16b3015b501e6214L,0xbb31c5600a3882ceL, + 0x6961bb94fec11e04L } }, + /* 61 << 224 */ + { { 0x3b825b8deff7a3a0L,0xbec33738b1df7326L,0x68ad747c99604a1fL, + 0xd154c9349a3bd499L }, + { 0xac33506f1cc7a906L,0x73bb53926c560e8fL,0x6428fcbe263e3944L, + 0xc11828d51c387434L } }, + /* 62 << 224 */ + { { 0x3cd04be13e4b12ffL,0xc3aad9f92d88667cL,0xc52ddcf8248120cfL, + 0x985a892e2a389532L }, + { 0xfbb4b21b3bb85fa0L,0xf95375e08dfc6269L,0xfb4fb06c7ee2aceaL, + 0x6785426e309c4d1fL } }, + /* 63 << 224 */ + { { 0x659b17c8d8ceb147L,0x9b649eeeb70a5554L,0x6b7fa0b5ac6bc634L, + 0xd99fe2c71d6e732fL }, + { 0x30e6e7628d3abba2L,0x18fee6e7a797b799L,0x5c9d360dc696464dL, + 0xe3baeb4827bfde12L } }, + /* 64 << 224 */ + { { 0x2bf5db47f23206d5L,0x2f6d34201d260152L,0x17b876533f8ff89aL, + 0x5157c30c378fa458L }, + { 0x7517c5c52d4fb936L,0xef22f7ace6518cdcL,0xdeb483e6bf847a64L, + 0xf508455892e0fa89L } }, + /* 0 << 231 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 231 */ + { { 0xab9659d8df7304d4L,0xb71bcf1bff210e8eL,0xa9a2438bd73fbd60L, + 0x4595cd1f5d11b4deL }, + { 0x9c0d329a4835859dL,0x4a0f0d2d7dbb6e56L,0xc6038e5edf928a4eL, + 0xc94296218f5ad154L } }, + /* 2 << 231 */ + { { 0x91213462f23f2d92L,0x6cab71bd60b94078L,0x6bdd0a63176cde20L, + 0x54c9b20cee4d54bcL }, + { 0x3cd2d8aa9f2ac02fL,0x03f8e617206eedb0L,0xc7f68e1693086434L, + 0x831469c592dd3db9L } }, + /* 3 << 231 */ + { { 0x8521df248f981354L,0x587e23ec3588a259L,0xcbedf281d7a0992cL, + 0x06930a5538961407L }, + { 0x09320debbe5bbe21L,0xa7ffa5b52491817fL,0xe6c8b4d909065160L, + 0xac4f3992fff6d2a9L } }, + /* 4 << 231 */ + { { 0x7aa7a1583ae9c1bdL,0xe0af6d98e37ce240L,0xe54342d928ab38b4L, + 0xe8b750070a1c98caL }, + { 0xefce86afe02358f2L,0x31b8b856ea921228L,0x052a19120a1c67fcL, + 0xb4069ea4e3aead59L } }, + /* 5 << 231 */ + { { 0x3232d6e27fa03cb3L,0xdb938e5b0fdd7d88L,0x04c1d2cd2ccbfc5dL, + 0xd2f45c12af3a580fL }, + { 0x592620b57883e614L,0x5fd27e68be7c5f26L,0x139e45a91567e1e3L, + 0x2cc71d2d44d8aaafL } }, + /* 6 << 231 */ + { { 0x4a9090cde36d0757L,0xf722d7b1d9a29382L,0xfb7fb04c04b48ddfL, + 0x628ad2a7ebe16f43L }, + { 0xcd3fbfb520226040L,0x6c34ecb15104b6c4L,0x30c0754ec903c188L, + 0xec336b082d23cab0L } }, + /* 7 << 231 */ + { { 0x473d62a21e206ee5L,0xf1e274808c49a633L,0x87ab956ce9f6b2c3L, + 0x61830b4862b606eaL }, + { 0x67cd6846e78e815fL,0xfe40139f4c02082aL,0x52bbbfcb952ec365L, + 0x74c116426b9836abL } }, + /* 8 << 231 */ + { { 0x9f51439e558df019L,0x230da4baac712b27L,0x518919e355185a24L, + 0x4dcefcdd84b78f50L }, + { 0xa7d90fb2a47d4c5aL,0x55ac9abfb30e009eL,0xfd2fc35974eed273L, + 0xb72d824cdbea8fafL } }, + /* 9 << 231 */ + { { 0xce721a744513e2caL,0x0b41861238240b2cL,0x05199968d5baa450L, + 0xeb1757ed2b0e8c25L }, + { 0x6ebc3e283dfac6d5L,0xb2431e2e48a237f5L,0x2acb5e2352f61499L, + 0x5558a2a7e06c936bL } }, + /* 10 << 231 */ + { { 0xd213f923cbb13d1bL,0x98799f425bfb9bfeL,0x1ae8ddc9701144a9L, + 0x0b8b3bb64c5595eeL }, + { 0x0ea9ef2e3ecebb21L,0x17cb6c4b3671f9a7L,0x47ef464f726f1d1fL, + 0x171b94846943a276L } }, + /* 11 << 231 */ + { { 0x51a4ae2d7ef0329cL,0x0850922291c4402aL,0x64a61d35afd45bbcL, + 0x38f096fe3035a851L }, + { 0xc7468b74a1dec027L,0xe8cf10e74fc7dcbaL,0xea35ff40f4a06353L, + 0x0b4c0dfa8b77dd66L } }, + /* 12 << 231 */ + { { 0x779b8552de7e5c19L,0xfab28609c1c0256cL,0x64f58eeeabd4743dL, + 0x4e8ef8387b6cc93bL }, + { 0xee650d264cb1bf3dL,0x4c1f9d0973dedf61L,0xaef7c9d7bfb70cedL, + 0x1ec0507e1641de1eL } }, + /* 13 << 231 */ + { { 0xcd7e5cc7cde45079L,0xde173c9a516ac9e4L,0x517a8494c170315cL, + 0x438fd90591d8e8fbL }, + { 0x5145c506c7d9630bL,0x6457a87bf47d4d75L,0xd31646bf0d9a80e8L, + 0x453add2bcef3aabeL } }, + /* 14 << 231 */ + { { 0xc9941109a607419dL,0xfaa71e62bb6bca80L,0x34158c1307c431f3L, + 0x594abebc992bc47aL }, + { 0x6dfea691eb78399fL,0x48aafb353f42cba4L,0xedcd65af077c04f0L, + 0x1a29a366e884491aL } }, + /* 15 << 231 */ + { { 0x023a40e51c21f2bfL,0xf99a513ca5057aeeL,0xa3fe7e25bcab072eL, + 0x8568d2e140e32bcfL }, + { 0x904594ebd3f69d9fL,0x181a973307affab1L,0xe4d68d76b6e330f4L, + 0x87a6dafbc75a7fc1L } }, + /* 16 << 231 */ + { { 0x549db2b5ef7d9289L,0x2480d4a8197f015aL,0x61d5590bc40493b6L, + 0x3a55b52e6f780331L }, + { 0x40eb8115309eadb0L,0xdea7de5a92e5c625L,0x64d631f0cc6a3d5aL, + 0x9d5e9d7c93e8dd61L } }, + /* 17 << 231 */ + { { 0xf297bef5206d3ffcL,0x23d5e0337d808bd4L,0x4a4f6912d24cf5baL, + 0xe4d8163b09cdaa8aL }, + { 0x0e0de9efd3082e8eL,0x4fe1246c0192f360L,0x1f9001504b8eee0aL, + 0x5219da81f1da391bL } }, + /* 18 << 231 */ + { { 0x7bf6a5c1f7ea25aaL,0xd165e6bffbb07d5fL,0xe353936189e78671L, + 0xa3fcac892bac4219L }, + { 0xdfab6fd4f0baa8abL,0x5a4adac1e2c1c2e5L,0x6cd75e3140d85849L, + 0xce263fea19b39181L } }, + /* 19 << 231 */ + { { 0xcb6803d307032c72L,0x7f40d5ce790968c8L,0xa6de86bddce978f0L, + 0x25547c4f368f751cL }, + { 0xb1e685fd65fb2a9eL,0xce69336f1eb9179cL,0xb15d1c2712504442L, + 0xb7df465cb911a06bL } }, + /* 20 << 231 */ + { { 0xb8d804a3315980cdL,0x693bc492fa3bebf7L,0x3578aeee2253c504L, + 0x158de498cd2474a2L }, + { 0x1331f5c7cfda8368L,0xd2d7bbb378d7177eL,0xdf61133af3c1e46eL, + 0x5836ce7dd30e7be8L } }, + /* 21 << 231 */ + { { 0x83084f1994f834cbL,0xd35653d4429ed782L,0xa542f16f59e58243L, + 0xc2b52f650470a22dL }, + { 0xe3b6221b18f23d96L,0xcb05abac3f5252b4L,0xca00938b87d61402L, + 0x2f186cdd411933e4L } }, + /* 22 << 231 */ + { { 0xe042ece59a29a5c5L,0xb19b3c073b6c8402L,0xc97667c719d92684L, + 0xb5624622ebc66372L }, + { 0x0cb96e653c04fa02L,0x83a7176c8eaa39aaL,0x2033561deaa1633fL, + 0x45a9d0864533df73L } }, + /* 23 << 231 */ + { { 0xe0542c1d3dc090bcL,0x82c996efaa59c167L,0xe3f735e80ee7fc4dL, + 0x7b1793937c35db79L }, + { 0xb6419e25f8c5dbfdL,0x4d9d7a1e1f327b04L,0x979f6f9b298dfca8L, + 0xc7c5dff18de9366aL } }, + /* 24 << 231 */ + { { 0x1b7a588d04c82bddL,0x68005534f8319dfdL,0xde8a55b5d8eb9580L, + 0x5ea886da8d5bca81L }, + { 0xe8530a01252a0b4dL,0x1bffb4fe35eaa0a1L,0x2ad828b1d8e99563L, + 0x7de96ef595f9cd87L } }, + /* 25 << 231 */ + { { 0x4abb2d0cd77d970cL,0x03cfb933d33ef9cbL,0xb0547c018b211fe9L, + 0x2fe64809a56ed1c6L }, + { 0xcb7d5624c2ac98ccL,0x2a1372c01a393e33L,0xc8d1ec1c29660521L, + 0xf3d31b04b37ac3e9L } }, + /* 26 << 231 */ + { { 0xa29ae9df5ece6e7cL,0x0603ac8f0facfb55L,0xcfe85b7adda233a5L, + 0xe618919fbd75f0b8L }, + { 0xf555a3d299bf1603L,0x1f43afc9f184255aL,0xdcdaf341319a3e02L, + 0xd3b117ef03903a39L } }, + /* 27 << 231 */ + { { 0xe095da1365d1d131L,0x86f16367c37ad03eL,0x5f37389e462cd8ddL, + 0xc103fa04d67a60e6L }, + { 0x57c34344f4b478f0L,0xce91edd8e117c98dL,0x001777b0231fc12eL, + 0x11ae47f2b207bccbL } }, + /* 28 << 231 */ + { { 0xd983cf8d20f8a242L,0x7aff5b1df22e1ad8L,0x68fd11d07fc4feb3L, + 0x5d53ae90b0f1c3e1L }, + { 0x50fb7905ec041803L,0x85e3c97714404888L,0x0e67faedac628d8fL, + 0x2e8651506668532cL } }, + /* 29 << 231 */ + { { 0x15acaaa46a67a6b0L,0xf4cdee25b25cec41L,0x49ee565ae4c6701eL, + 0x2a04ca66fc7d63d8L }, + { 0xeb105018ef0543fbL,0xf709a4f5d1b0d81dL,0x5b906ee62915d333L, + 0xf4a8741296f1f0abL } }, + /* 30 << 231 */ + { { 0xb6b82fa74d82f4c2L,0x90725a606804efb3L,0xbc82ec46adc3425eL, + 0xb7b805812787843eL }, + { 0xdf46d91cdd1fc74cL,0xdc1c62cbe783a6c4L,0x59d1b9f31a04cbbaL, + 0xd87f6f7295e40764L } }, + /* 31 << 231 */ + { { 0x02b4cfc1317f4a76L,0x8d2703eb91036bceL,0x98206cc6a5e72a56L, + 0x57be9ed1cf53fb0fL }, + { 0x09374571ef0b17acL,0x74b2655ed9181b38L,0xc8f80ea889935d0eL, + 0xc0d9e94291529936L } }, + /* 32 << 231 */ + { { 0x196860411e84e0e5L,0xa5db84d3aea34c93L,0xf9d5bb197073a732L, + 0xb8d2fe566bcfd7c0L }, + { 0x45775f36f3eb82faL,0x8cb20cccfdff8b58L,0x1659b65f8374c110L, + 0xb8b4a422330c789aL } }, + /* 33 << 231 */ + { { 0x75e3c3ea6fe8208bL,0xbd74b9e4286e78feL,0x0be2e81bd7d93a1aL, + 0x7ed06e27dd0a5aaeL }, + { 0x721f5a586be8b800L,0x428299d1d846db28L,0x95cb8e6b5be88ed3L, + 0xc3186b231c034e11L } }, + /* 34 << 231 */ + { { 0xa6312c9e8977d99bL,0xbe94433183f531e7L,0x8232c0c218d3b1d4L, + 0x617aae8be1247b73L }, + { 0x40153fc4282aec3bL,0xc6063d2ff7b8f823L,0x68f10e583304f94cL, + 0x31efae74ee676346L } }, + /* 35 << 231 */ + { { 0xbadb6c6d40a9b97cL,0x14702c634f666256L,0xdeb954f15184b2e3L, + 0x5184a52694b6ca40L }, + { 0xfff05337003c32eaL,0x5aa374dd205974c7L,0x9a7638544b0dd71aL, + 0x459cd27fdeb947ecL } }, + /* 36 << 231 */ + { { 0xa6e28161459c2b92L,0x2f020fa875ee8ef5L,0xb132ec2d30b06310L, + 0xc3e15899bc6a4530L }, + { 0xdc5f53feaa3f451aL,0x3a3c7f23c2d9acacL,0x2ec2f8926b27e58bL, + 0x68466ee7d742799fL } }, + /* 37 << 231 */ + { { 0x98324dd41fa26613L,0xa2dc6dabbdc29d63L,0xf9675faad712d657L, + 0x813994be21fd8d15L }, + { 0x5ccbb722fd4f7553L,0x5135ff8bf3a36b20L,0x44be28af69559df5L, + 0x40b65bed9d41bf30L } }, + /* 38 << 231 */ + { { 0xd98bf2a43734e520L,0x5e3abbe3209bdcbaL,0x77c76553bc945b35L, + 0x5331c093c6ef14aaL }, + { 0x518ffe2976b60c80L,0x2285593b7ace16f8L,0xab1f64ccbe2b9784L, + 0xe8f2c0d9ab2421b6L } }, + /* 39 << 231 */ + { { 0x617d7174c1df065cL,0xafeeb5ab5f6578faL,0x16ff1329263b54a8L, + 0x45c55808c990dce3L }, + { 0x42eab6c0ecc8c177L,0x799ea9b55982ecaaL,0xf65da244b607ef8eL, + 0x8ab226ce32a3fc2cL } }, + /* 40 << 231 */ + { { 0x745741e57ea973dcL,0x5c00ca7020888f2eL,0x7cdce3cf45fd9cf1L, + 0x8a741ef15507f872L }, + { 0x47c51c2f196b4cecL,0x70d08e43c97ea618L,0x930da15c15b18a2bL, + 0x33b6c6782f610514L } }, + /* 41 << 231 */ + { { 0xc662e4f807ac9794L,0x1eccf050ba06cb79L,0x1ff08623e7d954e5L, + 0x6ef2c5fb24cf71c3L }, + { 0xb2c063d267978453L,0xa0cf37961d654af8L,0x7cb242ea7ebdaa37L, + 0x206e0b10b86747e0L } }, + /* 42 << 231 */ + { { 0x481dae5fd5ecfefcL,0x07084fd8c2bff8fcL,0x8040a01aea324596L, + 0x4c646980d4de4036L }, + { 0x9eb8ab4ed65abfc3L,0xe01cb91f13541ec7L,0x8f029adbfd695012L, + 0x9ae284833c7569ecL } }, + /* 43 << 231 */ + { { 0xa5614c9ea66d80a1L,0x680a3e4475f5f911L,0x0c07b14dceba4fc1L, + 0x891c285ba13071c1L }, + { 0xcac67ceb799ece3cL,0x29b910a941e07e27L,0x66bdb409f2e43123L, + 0x06f8b1377ac9ecbeL } }, + /* 44 << 231 */ + { { 0x5981fafd38547090L,0x19ab8b9f85e3415dL,0xfc28c194c7e31b27L, + 0x843be0aa6fbcbb42L }, + { 0xf3b1ed43a6db836cL,0x2a1330e401a45c05L,0x4f19f3c595c1a377L, + 0xa85f39d044b5ee33L } }, + /* 45 << 231 */ + { { 0x3da18e6d4ae52834L,0x5a403b397423dcb0L,0xbb555e0af2374aefL, + 0x2ad599c41e8ca111L }, + { 0x1b3a2fb9014b3bf8L,0x73092684f66d5007L,0x079f1426c4340102L, + 0x1827cf818fddf4deL } }, + /* 46 << 231 */ + { { 0xc83605f6f10ff927L,0xd387145123739fc6L,0x6d163450cac1c2ccL, + 0x6b521296a2ec1ac5L }, + { 0x0606c4f96e3cb4a5L,0xe47d3f41778abff7L,0x425a8d5ebe8e3a45L, + 0x53ea9e97a6102160L } }, + /* 47 << 231 */ + { { 0x477a106e39cbb688L,0x532401d2f3386d32L,0x8e564f64b1b9b421L, + 0xca9b838881dad33fL }, + { 0xb1422b4e2093913eL,0x533d2f9269bc8112L,0x3fa017beebe7b2c7L, + 0xb2767c4acaf197c6L } }, + /* 48 << 231 */ + { { 0xc925ff87aedbae9fL,0x7daf0eb936880a54L,0x9284ddf59c4d0e71L, + 0x1581cf93316f8cf5L }, + { 0x3eeca8873ac1f452L,0xb417fce9fb6aeffeL,0xa5918046eefb8dc3L, + 0x73d318ac02209400L } }, + /* 49 << 231 */ + { { 0xe800400f728693e5L,0xe87d814b339927edL,0x93e94d3b57ea9910L, + 0xff8a35b62245fb69L }, + { 0x043853d77f200d34L,0x470f1e680f653ce1L,0x81ac05bd59a06379L, + 0xa14052c203930c29L } }, + /* 50 << 231 */ + { { 0x6b72fab526bc2797L,0x13670d1699f16771L,0x001700521e3e48d1L, + 0x978fe401b7adf678L }, + { 0x55ecfb92d41c5dd4L,0x5ff8e247c7b27da5L,0xe7518272013fb606L, + 0x5768d7e52f547a3cL } }, + /* 51 << 231 */ + { { 0xbb24eaa360017a5fL,0x6b18e6e49c64ce9bL,0xc225c655103dde07L, + 0xfc3672ae7592f7eaL }, + { 0x9606ad77d06283a1L,0x542fc650e4d59d99L,0xabb57c492a40e7c2L, + 0xac948f13a8db9f55L } }, + /* 52 << 231 */ + { { 0x6d4c9682b04465c3L,0xe3d062fa6468bd15L,0xa51729ac5f318d7eL, + 0x1fc87df69eb6fc95L }, + { 0x63d146a80591f652L,0xa861b8f7589621aaL,0x59f5f15ace31348cL, + 0x8f663391440da6daL } }, + /* 53 << 231 */ + { { 0xcfa778acb591ffa3L,0x027ca9c54cdfebceL,0xbe8e05a5444ea6b3L, + 0x8aab4e69a78d8254L }, + { 0x2437f04fb474d6b8L,0x6597ffd4045b3855L,0xbb0aea4eca47ecaaL, + 0x568aae8385c7ebfcL } }, + /* 54 << 231 */ + { { 0x0e966e64c73b2383L,0x49eb3447d17d8762L,0xde1078218da05dabL, + 0x443d8baa016b7236L }, + { 0x163b63a5ea7610d6L,0xe47e4185ce1ca979L,0xae648b6580baa132L, + 0xebf53de20e0d5b64L } }, + /* 55 << 231 */ + { { 0x8d3bfcb4d3c8c1caL,0x0d914ef35d04b309L,0x55ef64153de7d395L, + 0xbde1666f26b850e8L }, + { 0xdbe1ca6ed449ab19L,0x8902b322e89a2672L,0xb1674b7edacb7a53L, + 0x8e9faf6ef52523ffL } }, + /* 56 << 231 */ + { { 0x6ba535da9a85788bL,0xd21f03aebd0626d4L,0x099f8c47e873dc64L, + 0xcda8564d018ec97eL }, + { 0x3e8d7a5cde92c68cL,0x78e035a173323cc4L,0x3ef26275f880ff7cL, + 0xa4ee3dff273eedaaL } }, + /* 57 << 231 */ + { { 0x58823507af4e18f8L,0x967ec9b50672f328L,0x9ded19d9559d3186L, + 0x5e2ab3de6cdce39cL }, + { 0xabad6e4d11c226dfL,0xf9783f4387723014L,0x9a49a0cf1a885719L, + 0xfc0c1a5a90da9dbfL } }, + /* 58 << 231 */ + { { 0x8bbaec49571d92acL,0x569e85fe4692517fL,0x8333b014a14ea4afL, + 0x32f2a62f12e5c5adL }, + { 0x98c2ce3a06d89b85L,0xb90741aa2ff77a08L,0x2530defc01f795a2L, + 0xd6e5ba0b84b3c199L } }, + /* 59 << 231 */ + { { 0x7d8e845112e4c936L,0xae419f7dbd0be17bL,0xa583fc8c22262bc9L, + 0x6b842ac791bfe2bdL }, + { 0x33cef4e9440d6827L,0x5f69f4deef81fb14L,0xf16cf6f6234fbb92L, + 0x76ae3fc3d9e7e158L } }, + /* 60 << 231 */ + { { 0x4e89f6c2e9740b33L,0x677bc85d4962d6a1L,0x6c6d8a7f68d10d15L, + 0x5f9a72240257b1cdL }, + { 0x7096b9164ad85961L,0x5f8c47f7e657ab4aL,0xde57d7d0f7461d7eL, + 0x7eb6094d80ce5ee2L } }, + /* 61 << 231 */ + { { 0x0b1e1dfd34190547L,0x8a394f43f05dd150L,0x0a9eb24d97df44e6L, + 0x78ca06bf87675719L }, + { 0x6f0b34626ffeec22L,0x9d91bcea36cdd8fbL,0xac83363ca105be47L, + 0x81ba76c1069710e3L } }, + /* 62 << 231 */ + { { 0x3d1b24cb28c682c6L,0x27f252288612575bL,0xb587c779e8e66e98L, + 0x7b0c03e9405eb1feL }, + { 0xfdf0d03015b548e7L,0xa8be76e038b36af7L,0x4cdab04a4f310c40L, + 0x6287223ef47ecaecL } }, + /* 63 << 231 */ + { { 0x678e60558b399320L,0x61fe3fa6c01e4646L,0xc482866b03261a5eL, + 0xdfcf45b85c2f244aL }, + { 0x8fab9a512f684b43L,0xf796c654c7220a66L,0x1d90707ef5afa58fL, + 0x2c421d974fdbe0deL } }, + /* 64 << 231 */ + { { 0xc4f4cda3af2ebc2fL,0xa0af843dcb4efe24L,0x53b857c19ccd10b1L, + 0xddc9d1eb914d3e04L }, + { 0x7bdec8bb62771debL,0x829277aa91c5aa81L,0x7af18dd6832391aeL, + 0x1740f316c71a84caL } }, + /* 0 << 238 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 238 */ + { { 0x8928e99aeeaf8c49L,0xee7aa73d6e24d728L,0x4c5007c2e72b156cL, + 0x5fcf57c5ed408a1dL }, + { 0x9f719e39b6057604L,0x7d343c01c2868bbfL,0x2cca254b7e103e2dL, + 0xe6eb38a9f131bea2L } }, + /* 2 << 238 */ + { { 0xb33e624f8be762b4L,0x2a9ee4d1058e3413L,0x968e636967d805faL, + 0x9848949b7db8bfd7L }, + { 0x5308d7e5d23a8417L,0x892f3b1df3e29da5L,0xc95c139e3dee471fL, + 0x8631594dd757e089L } }, + /* 3 << 238 */ + { { 0xe0c82a3cde918dccL,0x2e7b599426fdcf4bL,0x82c5024932cb1b2dL, + 0xea613a9d7657ae07L }, + { 0xc2eb5f6cf1fdc9f7L,0xb6eae8b8879fe682L,0x253dfee0591cbc7fL, + 0x000da7133e1290e6L } }, + /* 4 << 238 */ + { { 0x1083e2ea1f095615L,0x0a28ad7714e68c33L,0x6bfc02523d8818beL, + 0xb585113af35850cdL }, + { 0x7d935f0b30df8aa1L,0xaddda07c4ab7e3acL,0x92c34299552f00cbL, + 0xc33ed1de2909df6cL } }, + /* 5 << 238 */ + { { 0x22c2195d80e87766L,0x9e99e6d89ddf4ac0L,0x09642e4e65e74934L, + 0x2610ffa2ff1ff241L }, + { 0x4d1d47d4751c8159L,0x697b4985af3a9363L,0x0318ca4687477c33L, + 0xa90cb5659441eff3L } }, + /* 6 << 238 */ + { { 0x58bb384836f024cbL,0x85be1f7736016168L,0x6c59587cdc7e07f1L, + 0x191be071af1d8f02L }, + { 0xbf169fa5cca5e55cL,0x3864ba3cf7d04eacL,0x915e367f8d7d05dbL, + 0xb48a876da6549e5dL } }, + /* 7 << 238 */ + { { 0xef89c656580e40a2L,0xf194ed8c728068bcL,0x74528045a47990c9L, + 0xf53fc7d75e1a4649L }, + { 0xbec5ae9b78593e7dL,0x2cac4ee341db65d7L,0xa8c1eb2404a3d39bL, + 0x53b7d63403f8f3efL } }, + /* 8 << 238 */ + { { 0x2dc40d483e07113cL,0x6e4a5d397d8b63aeL,0x5582a94b79684c2bL, + 0x932b33d4622da26cL }, + { 0xf534f6510dbbf08dL,0x211d07c964c23a52L,0x0eeece0fee5bdc9bL, + 0xdf178168f7015558L } }, + /* 9 << 238 */ + { { 0xd42946350a712229L,0x93cbe44809273f8cL,0x00b095ef8f13bc83L, + 0xbb7419728798978cL }, + { 0x9d7309a256dbe6e7L,0xe578ec565a5d39ecL,0x3961151b851f9a31L, + 0x2da7715de5709eb4L } }, + /* 10 << 238 */ + { { 0x867f301753dfabf0L,0x728d2078b8e39259L,0x5c75a0cd815d9958L, + 0xf84867a616603be1L }, + { 0xc865b13d70e35b1cL,0x0241446819b03e2cL,0xe46041daac1f3121L, + 0x7c9017ad6f028a7cL } }, + /* 11 << 238 */ + { { 0xabc96de90a482873L,0x4265d6b1b77e54d4L,0x68c38e79a57d88e7L, + 0xd461d7669ce82de3L }, + { 0x817a9ec564a7e489L,0xcc5675cda0def5f2L,0x9a00e785985d494eL, + 0xc626833f1b03514aL } }, + /* 12 << 238 */ + { { 0xabe7905a83cdd60eL,0x50602fb5a1170184L,0x689886cdb023642aL, + 0xd568d090a6e1fb00L }, + { 0x5b1922c70259217fL,0x93831cd9c43141e4L,0xdfca35870c95f86eL, + 0xdec2057a568ae828L } }, + /* 13 << 238 */ + { { 0xc44ea599f98a759aL,0x55a0a7a2f7c23c1dL,0xd5ffb6e694c4f687L, + 0x3563cce212848478L }, + { 0x812b3517e7b1fbe1L,0x8a7dc9794f7338e0L,0x211ecee952d048dbL, + 0x2eea4056c86ea3b8L } }, + /* 14 << 238 */ + { { 0xd8cb68a7ba772b34L,0xe16ed3415f4e2541L,0x9b32f6a60fec14dbL, + 0xeee376f7391698beL }, + { 0xe9a7aa1783674c02L,0x65832f975843022aL,0x29f3a8da5ba4990fL, + 0x79a59c3afb8e3216L } }, + /* 15 << 238 */ + { { 0x9cdc4d2ebd19bb16L,0xc6c7cfd0b3262d86L,0xd4ce14d0969c0b47L, + 0x1fa352b713e56128L }, + { 0x383d55b8973db6d3L,0x71836850e8e5b7bfL,0xc7714596e6bb571fL, + 0x259df31f2d5b2dd2L } }, + /* 16 << 238 */ + { { 0x568f8925913cc16dL,0x18bc5b6de1a26f5aL,0xdfa413bef5f499aeL, + 0xf8835decc3f0ae84L }, + { 0xb6e60bd865a40ab0L,0x65596439194b377eL,0xbcd8562592084a69L, + 0x5ce433b94f23ede0L } }, + /* 17 << 238 */ + { { 0xe8e8f04f6ad65143L,0x11511827d6e14af6L,0x3d390a108295c0c7L, + 0x71e29ee4621eba16L }, + { 0xa588fc0963717b46L,0x02be02fee06ad4a2L,0x931558c604c22b22L, + 0xbb4d4bd612f3c849L } }, + /* 18 << 238 */ + { { 0x54a4f49620efd662L,0x92ba6d20c5952d14L,0x2db8ea1ecc9784c2L, + 0x81cc10ca4b353644L }, + { 0x40b570ad4b4d7f6cL,0x5c9f1d9684a1dcd2L,0x01379f813147e797L, + 0xe5c6097b2bd499f5L } }, + /* 19 << 238 */ + { { 0x40dcafa6328e5e20L,0xf7b5244a54815550L,0xb9a4f11847bfc978L, + 0x0ea0e79fd25825b1L }, + { 0xa50f96eb646c7ecfL,0xeb811493446dea9dL,0x2af04677dfabcf69L, + 0xbe3a068fc713f6e8L } }, + /* 20 << 238 */ + { { 0x860d523d42e06189L,0xbf0779414e3aff13L,0x0b616dcac1b20650L, + 0xe66dd6d12131300dL }, + { 0xd4a0fd67ff99abdeL,0xc9903550c7aac50dL,0x022ecf8b7c46b2d7L, + 0x3333b1e83abf92afL } }, + /* 21 << 238 */ + { { 0x11cc113c6c491c14L,0x0597668880dd3f88L,0xf5b4d9e729d932edL, + 0xe982aad8a2c38b6dL }, + { 0x6f9253478be0dcf0L,0x700080ae65ca53f2L,0xd8131156443ca77fL, + 0xe92d6942ec51f984L } }, + /* 22 << 238 */ + { { 0xd2a08af885dfe9aeL,0xd825d9a54d2a86caL,0x2c53988d39dff020L, + 0xf38b135a430cdc40L }, + { 0x0c918ae062a7150bL,0xf31fd8de0c340e9bL,0xafa0e7ae4dbbf02eL, + 0x5847fb2a5eba6239L } }, + /* 23 << 238 */ + { { 0x6b1647dcdccbac8bL,0xb642aa7806f485c8L,0x873f37657038ecdfL, + 0x2ce5e865fa49d3feL }, + { 0xea223788c98c4400L,0x8104a8cdf1fa5279L,0xbcf7cc7a06becfd7L, + 0x49424316c8f974aeL } }, + /* 24 << 238 */ + { { 0xc0da65e784d6365dL,0xbcb7443f8f759fb8L,0x35c712b17ae81930L, + 0x80428dff4c6e08abL }, + { 0xf19dafefa4faf843L,0xced8538dffa9855fL,0x20ac409cbe3ac7ceL, + 0x358c1fb6882da71eL } }, + /* 25 << 238 */ + { { 0xafa9c0e5fd349961L,0x2b2cfa518421c2fcL,0x2a80db17f3a28d38L, + 0xa8aba5395d138e7eL }, + { 0x52012d1d6e96eb8dL,0x65d8dea0cbaf9622L,0x57735447b264f56cL, + 0xbeebef3f1b6c8da2L } }, + /* 26 << 238 */ + { { 0xfc346d98ce785254L,0xd50e8d72bb64a161L,0xc03567c749794addL, + 0x15a76065752c7ef6L }, + { 0x59f3a222961f23d6L,0x378e443873ecc0b0L,0xc74be4345a82fde4L, + 0xae509af2d8b9cf34L } }, + /* 27 << 238 */ + { { 0x4a61ee46577f44a1L,0xe09b748cb611deebL,0xc0481b2cf5f7b884L, + 0x3562667861acfa6bL }, + { 0x37f4c518bf8d21e6L,0x22d96531b205a76dL,0x37fb85e1954073c0L, + 0xbceafe4f65b3a567L } }, + /* 28 << 238 */ + { { 0xefecdef7be42a582L,0xd3fc608065046be6L,0xc9af13c809e8dba9L, + 0x1e6c9847641491ffL }, + { 0x3b574925d30c31f7L,0xb7eb72baac2a2122L,0x776a0dacef0859e7L, + 0x06fec31421900942L } }, + /* 29 << 238 */ + { { 0x2464bc10f8c22049L,0x9bfbcce7875ebf69L,0xd7a88e2a4336326bL, + 0xda05261c5bc2acfaL }, + { 0xc29f5bdceba7efc8L,0x471237ca25dbbf2eL,0xa72773f22975f127L, + 0xdc744e8e04d0b326L } }, + /* 30 << 238 */ + { { 0x38a7ed16a56edb73L,0x64357e372c007e70L,0xa167d15b5080b400L, + 0x07b4116423de4be1L }, + { 0xb2d91e3274c89883L,0x3c1628212882e7edL,0xad6b36ba7503e482L, + 0x48434e8e0ea34331L } }, + /* 31 << 238 */ + { { 0x79f4f24f2c7ae0b9L,0xc46fbf811939b44aL,0x76fefae856595eb1L, + 0x417b66abcd5f29c7L }, + { 0x5f2332b2c5ceec20L,0xd69661ffe1a1cae2L,0x5ede7e529b0286e6L, + 0x9d062529e276b993L } }, + /* 32 << 238 */ + { { 0x324794b07e50122bL,0xdd744f8b4af07ca5L,0x30a12f08d63fc97bL, + 0x39650f1a76626d9dL }, + { 0x101b47f71fa38477L,0x3d815f19d4dc124fL,0x1569ae95b26eb58aL, + 0xc3cde18895fb1887L } }, + /* 33 << 238 */ + { { 0x54e9f37bf9539a48L,0xb0100e067408c1a5L,0x821d9811ea580cbbL, + 0x8af52d3586e50c56L }, + { 0xdfbd9d47dbbf698bL,0x2961a1ea03dc1c73L,0x203d38f8e76a5df8L, + 0x08a53a686def707aL } }, + /* 34 << 238 */ + { { 0x26eefb481bee45d4L,0xb3cee3463c688036L,0x463c5315c42f2469L, + 0x19d84d2e81378162L }, + { 0x22d7c3c51c4d349fL,0x65965844163d59c5L,0xcf198c56b8abceaeL, + 0x6fb1fb1b628559d5L } }, + /* 35 << 238 */ + { { 0x8bbffd0607bf8fe3L,0x46259c583467734bL,0xd8953cea35f7f0d3L, + 0x1f0bece2d65b0ff1L }, + { 0xf7d5b4b3f3c72914L,0x29e8ea953cb53389L,0x4a365626836b6d46L, + 0xe849f910ea174fdeL } }, + /* 36 << 238 */ + { { 0x7ec62fbbf4737f21L,0xd8dba5ab6209f5acL,0x24b5d7a9a5f9adbeL, + 0x707d28f7a61dc768L }, + { 0x7711460bcaa999eaL,0xba7b174d1c92e4ccL,0x3c4bab6618d4bf2dL, + 0xb8f0c980eb8bd279L } }, + /* 37 << 238 */ + { { 0x024bea9a324b4737L,0xfba9e42332a83bcaL,0x6e635643a232dcedL, + 0x996193672571c8baL }, + { 0xe8c9f35754b7032bL,0xf936b3ba2442d54aL,0x2263f0f08290c65aL, + 0x48989780ee2c7fdbL } }, + /* 38 << 238 */ + { { 0xadc5d55a13d4f95eL,0x737cff85ad9b8500L,0x271c557b8a73f43dL, + 0xbed617a4e18bc476L }, + { 0x662454017dfd8ab2L,0xae7b89ae3a2870aaL,0x1b555f5323a7e545L, + 0x6791e247be057e4cL } }, + /* 39 << 238 */ + { { 0x860136ad324fa34dL,0xea1114474cbeae28L,0x023a4270bedd3299L, + 0x3d5c3a7fc1c35c34L }, + { 0xb0f6db678d0412d2L,0xd92625e2fcdc6b9aL,0x92ae5ccc4e28a982L, + 0xea251c3647a3ce7eL } }, + /* 40 << 238 */ + { { 0x9d658932790691bfL,0xed61058906b736aeL,0x712c2f04c0d63b6eL, + 0x5cf06fd5c63d488fL }, + { 0x97363facd9588e41L,0x1f9bf7622b93257eL,0xa9d1ffc4667acaceL, + 0x1cf4a1aa0a061ecfL } }, + /* 41 << 238 */ + { { 0x40e48a49dc1818d0L,0x0643ff39a3621ab0L,0x5768640ce39ef639L, + 0x1fc099ea04d86854L }, + { 0x9130b9c3eccd28fdL,0xd743cbd27eec54abL,0x052b146fe5b475b6L, + 0x058d9a82900a7d1fL } }, + /* 42 << 238 */ + { { 0x65e0229291262b72L,0x96f924f9bb0edf03L,0x5cfa59c8fe206842L, + 0xf60370045eafa720L }, + { 0x5f30699e18d7dd96L,0x381e8782cbab2495L,0x91669b46dd8be949L, + 0xb40606f526aae8efL } }, + /* 43 << 238 */ + { { 0x2812b839fc6751a4L,0x16196214fba800efL,0x4398d5ca4c1a2875L, + 0x720c00ee653d8349L }, + { 0xc2699eb0d820007cL,0x880ee660a39b5825L,0x70694694471f6984L, + 0xf7d16ea8e3dda99aL } }, + /* 44 << 238 */ + { { 0x28d675b2c0519a23L,0x9ebf94fe4f6952e3L,0xf28bb767a2294a8aL, + 0x85512b4dfe0af3f5L }, + { 0x18958ba899b16a0dL,0x95c2430cba7548a7L,0xb30d1b10a16be615L, + 0xe3ebbb9785bfb74cL } }, + /* 45 << 238 */ + { { 0xa3273cfe18549fdbL,0xf6e200bf4fcdb792L,0x54a76e1883aba56cL, + 0x73ec66f689ef6aa2L }, + { 0x8d17add7d1b9a305L,0xa959c5b9b7ae1b9dL,0x886435226bcc094aL, + 0xcc5616c4d7d429b9L } }, + /* 46 << 238 */ + { { 0xa6dada01e6a33f7cL,0xc6217a079d4e70adL,0xd619a81809c15b7cL, + 0xea06b3290e80c854L }, + { 0x174811cea5f5e7b9L,0x66dfc310787c65f4L,0x4ea7bd693316ab54L, + 0xc12c4acb1dcc0f70L } }, + /* 47 << 238 */ + { { 0xe4308d1a1e407dd9L,0xe8a3587c91afa997L,0xea296c12ab77b7a5L, + 0xb5ad49e4673c0d52L }, + { 0x40f9b2b27006085aL,0xa88ff34087bf6ec2L,0x978603b14e3066a6L, + 0xb3f99fc2b5e486e2L } }, + /* 48 << 238 */ + { { 0x07b53f5eb2e63645L,0xbe57e54784c84232L,0xd779c2167214d5cfL, + 0x617969cd029a3acaL }, + { 0xd17668cd8a7017a0L,0x77b4d19abe9b7ee8L,0x58fd0e939c161776L, + 0xa8c4f4efd5968a72L } }, + /* 49 << 238 */ + { { 0x296071cc67b3de77L,0xae3c0b8e634f7905L,0x67e440c28a7100c9L, + 0xbb8c3c1beb4b9b42L }, + { 0x6d71e8eac51b3583L,0x7591f5af9525e642L,0xf73a2f7b13f509f3L, + 0x618487aa5619ac9bL } }, + /* 50 << 238 */ + { { 0x3a72e5f79d61718aL,0x00413bcc7592d28cL,0x7d9b11d3963c35cfL, + 0x77623bcfb90a46edL }, + { 0xdeef273bdcdd2a50L,0x4a741f9b0601846eL,0x33b89e510ec6e929L, + 0xcb02319f8b7f22cdL } }, + /* 51 << 238 */ + { { 0xbbe1500d084bae24L,0x2f0ae8d7343d2693L,0xacffb5f27cdef811L, + 0xaa0c030a263fb94fL }, + { 0x6eef0d61a0f442deL,0xf92e181727b139d3L,0x1ae6deb70ad8bc28L, + 0xa89e38dcc0514130L } }, + /* 52 << 238 */ + { { 0x81eeb865d2fdca23L,0x5a15ee08cc8ef895L,0x768fa10a01905614L, + 0xeff5b8ef880ee19bL }, + { 0xf0c0cabbcb1c8a0eL,0x2e1ee9cdb8c838f9L,0x0587d8b88a4a14c0L, + 0xf6f278962ff698e5L } }, + /* 53 << 238 */ + { { 0xed38ef1c89ee6256L,0xf44ee1fe6b353b45L,0x9115c0c770e903b3L, + 0xc78ec0a1818f31dfL }, + { 0x6c003324b7dccbc6L,0xd96dd1f3163bbc25L,0x33aa82dd5cedd805L, + 0x123aae4f7f7eb2f1L } }, + /* 54 << 238 */ + { { 0x1723fcf5a26262cdL,0x1f7f4d5d0060ebd5L,0xf19c5c01b2eaa3afL, + 0x2ccb9b149790accfL }, + { 0x1f9c1cad52324aa6L,0x632005267247df54L,0x5732fe42bac96f82L, + 0x52fe771f01a1c384L } }, + /* 55 << 238 */ + { { 0x546ca13db1001684L,0xb56b4eeea1709f75L,0x266545a9d5db8672L, + 0xed971c901e8f3cfbL }, + { 0x4e7d8691e3a07b29L,0x7570d9ece4b696b9L,0xdc5fa0677bc7e9aeL, + 0x68b44cafc82c4844L } }, + /* 56 << 238 */ + { { 0x519d34b3bf44da80L,0x283834f95ab32e66L,0x6e6087976278a000L, + 0x1e62960e627312f6L }, + { 0x9b87b27be6901c55L,0x80e7853824fdbc1fL,0xbbbc09512facc27dL, + 0x06394239ac143b5aL } }, + /* 57 << 238 */ + { { 0x35bb4a40376c1944L,0x7cb6269463da1511L,0xafd29161b7148a3bL, + 0xa6f9d9ed4e2ea2eeL }, + { 0x15dc2ca2880dd212L,0x903c3813a61139a9L,0x2aa7b46d6c0f8785L, + 0x36ce2871901c60ffL } }, + /* 58 << 238 */ + { { 0xc683b028e10d9c12L,0x7573baa2032f33d3L,0x87a9b1f667a31b58L, + 0xfd3ed11af4ffae12L }, + { 0x83dcaa9a0cb2748eL,0x8239f0185d6fdf16L,0xba67b49c72753941L, + 0x2beec455c321cb36L } }, + /* 59 << 238 */ + { { 0x880156063f8b84ceL,0x764170838d38c86fL,0x054f1ca7598953ddL, + 0xc939e1104e8e7429L }, + { 0x9b1ac2b35a914f2fL,0x39e35ed3e74b8f9cL,0xd0debdb2781b2fb0L, + 0x1585638f2d997ba2L } }, + /* 60 << 238 */ + { { 0x9c4b646e9e2fce99L,0x68a210811e80857fL,0x06d54e443643b52aL, + 0xde8d6d630d8eb843L }, + { 0x7032156342146a0aL,0x8ba826f25eaa3622L,0x227a58bd86138787L, + 0x43b6c03c10281d37L } }, + /* 61 << 238 */ + { { 0x6326afbbb54dde39L,0x744e5e8adb6f2d5fL,0x48b2a99acff158e1L, + 0xa93c8fa0ef87918fL }, + { 0x2182f956de058c5cL,0x216235d2936f9e7aL,0xace0c0dbd2e31e67L, + 0xc96449bff23ac3e7L } }, + /* 62 << 238 */ + { { 0x7e9a2874170693bdL,0xa28e14fda45e6335L,0x5757f6b356427344L, + 0x822e4556acf8edf9L }, + { 0x2b7a6ee2e6a285cdL,0x5866f211a9df3af0L,0x40dde2ddf845b844L, + 0x986c3726110e5e49L } }, + /* 63 << 238 */ + { { 0x73680c2af7172277L,0x57b94f0f0cccb244L,0xbdff72672d438ca7L, + 0xbad1ce11cf4663fdL }, + { 0x9813ed9dd8f71caeL,0xf43272a6961fdaa6L,0xbeff0119bd6d1637L, + 0xfebc4f9130361978L } }, + /* 64 << 238 */ + { { 0x02b37a952f41deffL,0x0e44a59ae63b89b7L,0x673257dc143ff951L, + 0x19c02205d752baf4L }, + { 0x46c23069c4b7d692L,0x2e6392c3fd1502acL,0x6057b1a21b220846L, + 0xe51ff9460c1b5b63L } }, + /* 0 << 245 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 245 */ + { { 0x6e85cb51566c5c43L,0xcff9c9193597f046L,0x9354e90c4994d94aL, + 0xe0a393322147927dL }, + { 0x8427fac10dc1eb2bL,0x88cfd8c22ff319faL,0xe2d4e68401965274L, + 0xfa2e067d67aaa746L } }, + /* 2 << 245 */ + { { 0xb6d92a7f3e5f9f11L,0x9afe153ad6cb3b8eL,0x4d1a6dd7ddf800bdL, + 0xf6c13cc0caf17e19L }, + { 0x15f6c58e325fc3eeL,0x71095400a31dc3b2L,0x168e7c07afa3d3e7L, + 0x3f8417a194c7ae2dL } }, + /* 3 << 245 */ + { { 0xec234772813b230dL,0x634d0f5f17344427L,0x11548ab1d77fc56aL, + 0x7fab1750ce06af77L }, + { 0xb62c10a74f7c4f83L,0xa7d2edc4220a67d9L,0x1c404170921209a0L, + 0x0b9815a0face59f0L } }, + /* 4 << 245 */ + { { 0x2842589b319540c3L,0x18490f59a283d6f8L,0xa2731f84daae9fcbL, + 0x3db6d960c3683ba0L }, + { 0xc85c63bb14611069L,0xb19436af0788bf05L,0x905459df347460d2L, + 0x73f6e094e11a7db1L } }, + /* 5 << 245 */ + { { 0xdc7f938eb6357f37L,0xc5d00f792bd8aa62L,0xc878dcb92ca979fcL, + 0x37e83ed9eb023a99L }, + { 0x6b23e2731560bf3dL,0x1086e4591d0fae61L,0x782483169a9414bdL, + 0x1b956bc0f0ea9ea1L } }, + /* 6 << 245 */ + { { 0x7b85bb91c31b9c38L,0x0c5aa90b48ef57b5L,0xdedeb169af3bab6fL, + 0xe610ad732d373685L }, + { 0xf13870df02ba8e15L,0x0337edb68ca7f771L,0xe4acf747b62c036cL, + 0xd921d576b6b94e81L } }, + /* 7 << 245 */ + { { 0xdbc864392c422f7aL,0xfb635362ed348898L,0x83084668c45bfcd1L, + 0xc357c9e32b315e11L }, + { 0xb173b5405b2e5b8cL,0x7e946931e102b9a4L,0x17c890eb7b0fb199L, + 0xec225a83d61b662bL } }, + /* 8 << 245 */ + { { 0xf306a3c8ee3c76cbL,0x3cf11623d32a1f6eL,0xe6d5ab646863e956L, + 0x3b8a4cbe5c005c26L }, + { 0xdcd529a59ce6bb27L,0xc4afaa5204d4b16fL,0xb0624a267923798dL, + 0x85e56df66b307fabL } }, + /* 9 << 245 */ + { { 0x0281893c2bf29698L,0x91fc19a4d7ce7603L,0x75a5dca3ad9a558fL, + 0x40ceb3fa4d50bf77L }, + { 0x1baf6060bc9ba369L,0x927e1037597888c2L,0xd936bf1986a34c07L, + 0xd4cf10c1c34ae980L } }, + /* 10 << 245 */ + { { 0x3a3e5334859dd614L,0x9c475b5b18d0c8eeL,0x63080d1f07cd51d5L, + 0xc9c0d0a6b88b4326L }, + { 0x1ac98691c234296fL,0x2a0a83a494887fb6L,0x565114270cea9cf2L, + 0x5230a6e8a24802f5L } }, + /* 11 << 245 */ + { { 0xf7a2bf0f72e3d5c1L,0x377174464f21439eL,0xfedcbf259ce30334L, + 0xe0030a787ce202f9L }, + { 0x6f2d9ebf1202e9caL,0xe79dde6c75e6e591L,0xf52072aff1dac4f8L, + 0x6c8d087ebb9b404dL } }, + /* 12 << 245 */ + { { 0xad0fc73dbce913afL,0x909e587b458a07cbL,0x1300da84d4f00c8aL, + 0x425cd048b54466acL }, + { 0xb59cb9be90e9d8bfL,0x991616db3e431b0eL,0xd3aa117a531aecffL, + 0x91af92d359f4dc3bL } }, + /* 13 << 245 */ + { { 0x9b1ec292e93fda29L,0x76bb6c17e97d91bcL,0x7509d95faface1e6L, + 0x3653fe47be855ae3L }, + { 0x73180b280f680e75L,0x75eefd1beeb6c26cL,0xa4cdf29fb66d4236L, + 0x2d70a9976b5821d8L } }, + /* 14 << 245 */ + { { 0x7a3ee20720445c36L,0x71d1ac8259877174L,0x0fc539f7949f73e9L, + 0xd05cf3d7982e3081L }, + { 0x8758e20b7b1c7129L,0xffadcc20569e61f2L,0xb05d3a2f59544c2dL, + 0xbe16f5c19fff5e53L } }, + /* 15 << 245 */ + { { 0x73cf65b8aad58135L,0x622c2119037aa5beL,0x79373b3f646fd6a0L, + 0x0e029db50d3978cfL }, + { 0x8bdfc43794fba037L,0xaefbd687620797a6L,0x3fa5382bbd30d38eL, + 0x7627cfbf585d7464L } }, + /* 16 << 245 */ + { { 0xb2330fef4e4ca463L,0xbcef72873566cc63L,0xd161d2cacf780900L, + 0x135dc5395b54827dL }, + { 0x638f052e27bf1bc6L,0x10a224f007dfa06cL,0xe973586d6d3321daL, + 0x8b0c573826152c8fL } }, + /* 17 << 245 */ + { { 0x07ef4f2a34606074L,0x80fe7fe8a0f7047aL,0x3d1a8152e1a0e306L, + 0x32cf43d888da5222L }, + { 0xbf89a95f5f02ffe6L,0x3d9eb9a4806ad3eaL,0x012c17bb79c8e55eL, + 0xfdcd1a7499c81dacL } }, + /* 18 << 245 */ + { { 0x7043178bb9556098L,0x4090a1df801c3886L,0x759800ff9b67b912L, + 0x3e5c0304232620c8L }, + { 0x4b9d3c4b70dceecaL,0xbb2d3c15181f648eL,0xf981d8376e33345cL, + 0xb626289b0cf2297aL } }, + /* 19 << 245 */ + { { 0x766ac6598baebdcfL,0x1a28ae0975df01e5L,0xb71283da375876d8L, + 0x4865a96d607b9800L }, + { 0x25dd1bcd237936b2L,0x332f4f4b60417494L,0xd0923d68370a2147L, + 0x497f5dfbdc842203L } }, + /* 20 << 245 */ + { { 0x9dc74cbd32be5e0fL,0x7475bcb717a01375L,0x438477c950d872b1L, + 0xcec67879ffe1d63dL }, + { 0x9b006014d8578c70L,0xc9ad99a878bb6b8bL,0x6799008e11fb3806L, + 0xcfe81435cd44cab3L } }, + /* 21 << 245 */ + { { 0xa2ee15822f4fb344L,0xb8823450483fa6ebL,0x622d323d652c7749L, + 0xd8474a98beb0a15bL }, + { 0xe43c154d5d1c00d0L,0x7fd581d90e3e7aacL,0x2b44c6192525ddf8L, + 0x67a033ebb8ae9739L } }, + /* 22 << 245 */ + { { 0x113ffec19ef2d2e4L,0x1bf6767ed5a0ea7fL,0x57fff75e03714c0aL, + 0xa23c422e0a23e9eeL }, + { 0xdd5f6b2d540f83afL,0xc2c2c27e55ea46a7L,0xeb6b4246672a1208L, + 0xd13599f7ae634f7aL } }, + /* 23 << 245 */ + { { 0xcf914b5cd7b32c6eL,0x61a5a640eaf61814L,0x8dc3df8b208a1bbbL, + 0xef627fd6b6d79aa5L }, + { 0x44232ffcc4c86bc8L,0xe6f9231b061539feL,0x1d04f25a958b9533L, + 0x180cf93449e8c885L } }, + /* 24 << 245 */ + { { 0x896895959884aaf7L,0xb1959be307b348a6L,0x96250e573c147c87L, + 0xae0efb3add0c61f8L }, + { 0xed00745eca8c325eL,0x3c911696ecff3f70L,0x73acbc65319ad41dL, + 0x7b01a020f0b1c7efL } }, + /* 25 << 245 */ + { { 0xea32b29363a1483fL,0x89eabe717a248f96L,0x9c6231d3343157e5L, + 0x93a375e5df3c546dL }, + { 0xe76e93436a2afe69L,0xc4f89100e166c88eL,0x248efd0d4f872093L, + 0xae0eb3ea8fe0ea61L } }, + /* 26 << 245 */ + { { 0xaf89790d9d79046eL,0x4d650f2d6cee0976L,0xa3935d9a43071ecaL, + 0x66fcd2c9283b0bfeL }, + { 0x0e665eb5696605f1L,0xe77e5d07a54cd38dL,0x90ee050a43d950cfL, + 0x86ddebdad32e69b5L } }, + /* 27 << 245 */ + { { 0x6ad94a3dfddf7415L,0xf7fa13093f6e8d5aL,0xc4831d1de9957f75L, + 0x7de28501d5817447L }, + { 0x6f1d70789e2aeb6bL,0xba2b9ff4f67a53c2L,0x36963767df9defc3L, + 0x479deed30d38022cL } }, + /* 28 << 245 */ + { { 0xd2edb89b3a8631e8L,0x8de855de7a213746L,0xb2056cb7b00c5f11L, + 0xdeaefbd02c9b85e4L }, + { 0x03f39a8dd150892dL,0x37b84686218b7985L,0x36296dd8b7375f1aL, + 0x472cd4b1b78e898eL } }, + /* 29 << 245 */ + { { 0x15dff651e9f05de9L,0xd40450692ce98ba9L,0x8466a7ae9b38024cL, + 0xb910e700e5a6b5efL }, + { 0xae1c56eab3aa8f0dL,0xbab2a5077eee74a6L,0x0dca11e24b4c4620L, + 0xfd896e2e4c47d1f4L } }, + /* 30 << 245 */ + { { 0xeb45ae53308fbd93L,0x46cd5a2e02c36fdaL,0x6a3d4e90baa48385L, + 0xdd55e62e9dbe9960L }, + { 0xa1406aa02a81ede7L,0x6860dd14f9274ea7L,0xcfdcb0c280414f86L, + 0xff410b1022f94327L } }, + /* 31 << 245 */ + { { 0x5a33cc3849ad467bL,0xefb48b6c0a7335f1L,0x14fb54a4b153a360L, + 0x604aa9d2b52469ccL }, + { 0x5e9dc486754e48e9L,0x693cb45537471e8eL,0xfb2fd7cd8d3b37b6L, + 0x63345e16cf09ff07L } }, + /* 32 << 245 */ + { { 0x9910ba6b23a5d896L,0x1fe19e357fe4364eL,0x6e1da8c39a33c677L, + 0x15b4488b29fd9fd0L }, + { 0x1f4392541a1f22bfL,0x920a8a70ab8163e8L,0x3fd1b24907e5658eL, + 0xf2c4f79cb6ec839bL } }, + /* 33 << 245 */ + { { 0x1abbc3d04aa38d1bL,0x3b0db35cb5d9510eL,0x1754ac783e60dec0L, + 0x53272fd7ea099b33L }, + { 0x5fb0494f07a8e107L,0x4a89e1376a8191faL,0xa113b7f63c4ad544L, + 0x88a2e9096cb9897bL } }, + /* 34 << 245 */ + { { 0x17d55de3b44a3f84L,0xacb2f34417c6c690L,0x3208816810232390L, + 0xf2e8a61f6c733bf7L }, + { 0xa774aab69c2d7652L,0xfb5307e3ed95c5bcL,0xa05c73c24981f110L, + 0x1baae31ca39458c9L } }, + /* 35 << 245 */ + { { 0x1def185bcbea62e7L,0xe8ac9eaeeaf63059L,0x098a8cfd9921851cL, + 0xd959c3f13abe2f5bL }, + { 0xa4f1952520e40ae5L,0x320789e307a24aa1L,0x259e69277392b2bcL, + 0x58f6c6671918668bL } }, + /* 36 << 245 */ + { { 0xce1db2bbc55d2d8bL,0x41d58bb7f4f6ca56L,0x7650b6808f877614L, + 0x905e16baf4c349edL }, + { 0xed415140f661acacL,0x3b8784f0cb2270afL,0x3bc280ac8a402cbaL, + 0xd53f71460937921aL } }, + /* 37 << 245 */ + { { 0xc03c8ee5e5681e83L,0x62126105f6ac9e4aL,0x9503a53f936b1a38L, + 0x3d45e2d4782fecbdL }, + { 0x69a5c43976e8ae98L,0xb53b2eebbfb4b00eL,0xf167471272386c89L, + 0x30ca34a24268bce4L } }, + /* 38 << 245 */ + { { 0x7f1ed86c78341730L,0x8ef5beb8b525e248L,0xbbc489fdb74fbf38L, + 0x38a92a0e91a0b382L }, + { 0x7a77ba3f22433ccfL,0xde8362d6a29f05a9L,0x7f6a30ea61189afcL, + 0x693b550559ef114fL } }, + /* 39 << 245 */ + { { 0x50266bc0cd1797a1L,0xea17b47ef4b7af2dL,0xd6c4025c3df9483eL, + 0x8cbb9d9fa37b18c9L }, + { 0x91cbfd9c4d8424cfL,0xdb7048f1ab1c3506L,0x9eaf641f028206a3L, + 0xf986f3f925bdf6ceL } }, + /* 40 << 245 */ + { { 0x262143b5224c08dcL,0x2bbb09b481b50c91L,0xc16ed709aca8c84fL, + 0xa6210d9db2850ca8L }, + { 0x6d8df67a09cb54d6L,0x91eef6e0500919a4L,0x90f613810f132857L, + 0x9acede47f8d5028bL } }, + /* 41 << 245 */ + { { 0x844d1b7190b771c3L,0x563b71e4ba6426beL,0x2efa2e83bdb802ffL, + 0x3410cbabab5b4a41L }, + { 0x555b2d2630da84ddL,0xd0711ae9ee1cc29aL,0xcf3e8c602f547792L, + 0x03d7d5dedc678b35L } }, + /* 42 << 245 */ + { { 0x071a2fa8ced806b8L,0x222e6134697f1478L,0xdc16fd5dabfcdbbfL, + 0x44912ebf121b53b8L }, + { 0xac9436742496c27cL,0x8ea3176c1ffc26b0L,0xb6e224ac13debf2cL, + 0x524cc235f372a832L } }, + /* 43 << 245 */ + { { 0xd706e1d89f6f1b18L,0x2552f00544cce35bL,0x8c8326c2a88e31fcL, + 0xb5468b2cf9552047L }, + { 0xce683e883ff90f2bL,0x77947bdf2f0a5423L,0xd0a1b28bed56e328L, + 0xaee35253c20134acL } }, + /* 44 << 245 */ + { { 0x7e98367d3567962fL,0x379ed61f8188bffbL,0x73bba348faf130a1L, + 0x6c1f75e1904ed734L }, + { 0x189566423b4a79fcL,0xf20bc83d54ef4493L,0x836d425d9111eca1L, + 0xe5b5c318009a8dcfL } }, + /* 45 << 245 */ + { { 0x3360b25d13221bc5L,0x707baad26b3eeaf7L,0xd7279ed8743a95a1L, + 0x7450a875969e809fL }, + { 0x32b6bd53e5d0338fL,0x1e77f7af2b883bbcL,0x90da12cc1063ecd0L, + 0xe2697b58c315be47L } }, + /* 46 << 245 */ + { { 0x2771a5bdda85d534L,0x53e78c1fff980eeaL,0xadf1cf84900385e7L, + 0x7d3b14f6c9387b62L }, + { 0x170e74b0cb8f2bd2L,0x2d50b486827fa993L,0xcdbe8c9af6f32babL, + 0x55e906b0c3b93ab8L } }, + /* 47 << 245 */ + { { 0x747f22fc8fe280d1L,0xcd8e0de5b2e114abL,0x5ab7dbebe10b68b0L, + 0x9dc63a9ca480d4b2L }, + { 0x78d4bc3b4be1495fL,0x25eb3db89359122dL,0x3f8ac05b0809cbdcL, + 0xbf4187bbd37c702fL } }, + /* 48 << 245 */ + { { 0x84cea0691416a6a5L,0x8f860c7943ef881cL,0x41311f8a38038a5dL, + 0xe78c2ec0fc612067L }, + { 0x494d2e815ad73581L,0xb4cc9e0059604097L,0xff558aecf3612cbaL, + 0x35beef7a9e36c39eL } }, + /* 49 << 245 */ + { { 0x1845c7cfdbcf41b9L,0x5703662aaea997c0L,0x8b925afee402f6d8L, + 0xd0a1b1ae4dd72162L }, + { 0x9f47b37503c41c4bL,0xa023829b0391d042L,0x5f5045c3503b8b0aL, + 0x123c268898c010e5L } }, + /* 50 << 245 */ + { { 0x324ec0cc36ba06eeL,0xface31153dd2cc0cL,0xb364f3bef333e91fL, + 0xef8aff7328e832b0L }, + { 0x1e9bad042d05841bL,0x42f0e3df356a21e2L,0xa3270bcb4add627eL, + 0xb09a8158d322e711L } }, + /* 51 << 245 */ + { { 0x86e326a10fee104aL,0xad7788f83703f65dL,0x7e76543047bc4833L, + 0x6cee582b2b9b893aL }, + { 0x9cd2a167e8f55a7bL,0xefbee3c6d9e4190dL,0x33ee7185d40c2e9dL, + 0x844cc9c5a380b548L } }, + /* 52 << 245 */ + { { 0x323f8ecd66926e04L,0x0001e38f8110c1baL,0x8dbcac12fc6a7f07L, + 0xd65e1d580cec0827L }, + { 0xd2cd4141be76ca2dL,0x7895cf5ce892f33aL,0x956d230d367139d2L, + 0xa91abd3ed012c4c1L } }, + /* 53 << 245 */ + { { 0x34fa488387eb36bfL,0xc5f07102914b8fb4L,0x90f0e579adb9c95fL, + 0xfe6ea8cb28888195L }, + { 0x7b9b5065edfa9284L,0x6c510bd22b8c8d65L,0xd7b8ebefcbe8aafdL, + 0xedb3af9896b1da07L } }, + /* 54 << 245 */ + { { 0x28ff779d6295d426L,0x0c4f6ac73fa3ad7bL,0xec44d0548b8e2604L, + 0x9b32a66d8b0050e1L }, + { 0x1f943366f0476ce2L,0x7554d953a602c7b4L,0xbe35aca6524f2809L, + 0xb6881229fd4edbeaL } }, + /* 55 << 245 */ + { { 0xe8cd0c8f508efb63L,0x9eb5b5c86abcefc7L,0xf5621f5fb441ab4fL, + 0x79e6c046b76a2b22L }, + { 0x74a4792ce37a1f69L,0xcbd252cb03542b60L,0x785f65d5b3c20bd3L, + 0x8dea61434fabc60cL } }, + /* 56 << 245 */ + { { 0x45e21446de673629L,0x57f7aa1e703c2d21L,0xa0e99b7f98c868c7L, + 0x4e42f66d8b641676L }, + { 0x602884dc91077896L,0xa0d690cfc2c9885bL,0xfeb4da333b9a5187L, + 0x5f789598153c87eeL } }, + /* 57 << 245 */ + { { 0x2192dd4752b16dbaL,0xdeefc0e63524c1b1L,0x465ea76ee4383693L, + 0x79401711361b8d98L }, + { 0xa5f9ace9f21a15cbL,0x73d26163efee9aebL,0xcca844b3e677016cL, + 0x6c122b0757eaee06L } }, + /* 58 << 245 */ + { { 0xb782dce715f09690L,0x508b9b122dfc0fc9L,0x9015ab4b65d89fc6L, + 0x5e79dab7d6d5bb0fL }, + { 0x64f021f06c775aa2L,0xdf09d8cc37c7eca1L,0x9a761367ef2fa506L, + 0xed4ca4765b81eec6L } }, + /* 59 << 245 */ + { { 0x262ede3610bbb8b5L,0x0737ce830641ada3L,0x4c94288ae9831cccL, + 0x487fc1ce8065e635L }, + { 0xb13d7ab3b8bb3659L,0xdea5df3e855e4120L,0xb9a1857385eb0244L, + 0x1a1b8ea3a7cfe0a3L } }, + /* 60 << 245 */ + { { 0x3b83711967b0867cL,0x8d5e0d089d364520L,0x52dccc1ed930f0e3L, + 0xefbbcec7bf20bbafL }, + { 0x99cffcab0263ad10L,0xd8199e6dfcd18f8aL,0x64e2773fe9f10617L, + 0x0079e8e108704848L } }, + /* 61 << 245 */ + { { 0x1169989f8a342283L,0x8097799ca83012e6L,0xece966cb8a6a9001L, + 0x93b3afef072ac7fcL }, + { 0xe6893a2a2db3d5baL,0x263dc46289bf4fdcL,0x8852dfc9e0396673L, + 0x7ac708953af362b6L } }, + /* 62 << 245 */ + { { 0xbb9cce4d5c2f342bL,0xbf80907ab52d7aaeL,0x97f3d3cd2161bcd0L, + 0xb25b08340962744dL }, + { 0xc5b18ea56c3a1ddaL,0xfe4ec7eb06c92317L,0xb787b890ad1c4afeL, + 0xdccd9a920ede801aL } }, + /* 63 << 245 */ + { { 0x9ac6dddadb58da1fL,0x22bbc12fb8cae6eeL,0xc6f8bced815c4a43L, + 0x8105a92cf96480c7L }, + { 0x0dc3dbf37a859d51L,0xe3ec7ce63041196bL,0xd9f64b250d1067c9L, + 0xf23213213d1f8dd8L } }, + /* 64 << 245 */ + { { 0x8b5c619c76497ee8L,0x5d2b0ac6c717370eL,0x98204cb64fcf68e1L, + 0x0bdec21162bc6792L }, + { 0x6973ccefa63b1011L,0xf9e3fa97e0de1ac5L,0x5efb693e3d0e0c8bL, + 0x037248e9d2d4fcb4L } }, + /* 0 << 252 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 252 */ + { { 0x80802dc91ec34f9eL,0xd8772d3533810603L,0x3f06d66c530cb4f3L, + 0x7be5ed0dc475c129L }, + { 0xcb9e3c1931e82b10L,0xc63d2857c9ff6b4cL,0xb92118c692a1b45eL, + 0x0aec44147285bbcaL } }, + /* 2 << 252 */ + { { 0xfc189ae71e29a3efL,0xcbe906f04c93302eL,0xd0107914ceaae10eL, + 0xb7a23f34b68e19f8L }, + { 0xe9d875c2efd2119dL,0x03198c6efcadc9c8L,0x65591bf64da17113L, + 0x3cf0bbf83d443038L } }, + /* 3 << 252 */ + { { 0xae485bb72b724759L,0x945353e1b2d4c63aL,0x82159d07de7d6f2cL, + 0x389caef34ec5b109L }, + { 0x4a8ebb53db65ef14L,0x2dc2cb7edd99de43L,0x816fa3ed83f2405fL, + 0x73429bb9c14208a3L } }, + /* 4 << 252 */ + { { 0xb618d590b01e6e27L,0x047e2ccde180b2dcL,0xd1b299b504aea4a9L, + 0x412c9e1e9fa403a4L }, + { 0x88d28a3679407552L,0x49c50136f332b8e3L,0x3a1b6fcce668de19L, + 0x178851bc75122b97L } }, + /* 5 << 252 */ + { { 0xb1e13752fb85fa4cL,0xd61257ce383c8ce9L,0xd43da670d2f74daeL, + 0xa35aa23fbf846bbbL }, + { 0x5e74235d4421fc83L,0xf6df8ee0c363473bL,0x34d7f52a3c4aa158L, + 0x50d05aab9bc6d22eL } }, + /* 6 << 252 */ + { { 0x8c56e735a64785f4L,0xbc56637b5f29cd07L,0x53b2bb803ee35067L, + 0x50235a0fdc919270L }, + { 0x191ab6d8f2c4aa65L,0xc34758318396023bL,0x80400ba5f0f805baL, + 0x8881065b5ec0f80fL } }, + /* 7 << 252 */ + { { 0xc370e522cc1b5e83L,0xde2d4ad1860b8bfbL,0xad364df067b256dfL, + 0x8f12502ee0138997L }, + { 0x503fa0dc7783920aL,0xe80014adc0bc866aL,0x3f89b744d3064ba6L, + 0x03511dcdcba5dba5L } }, + /* 8 << 252 */ + { { 0x197dd46d95a7b1a2L,0x9c4e7ad63c6341fbL,0x426eca29484c2eceL, + 0x9211e489de7f4f8aL }, + { 0x14997f6ec78ef1f4L,0x2b2c091006574586L,0x17286a6e1c3eede8L, + 0x25f92e470f60e018L } }, + /* 9 << 252 */ + { { 0x805c564631890a36L,0x703ef60057feea5bL,0x389f747caf3c3030L, + 0xe0e5daeb54dd3739L }, + { 0xfe24a4c3c9c9f155L,0x7e4bf176b5393962L,0x37183de2af20bf29L, + 0x4a1bd7b5f95a8c3bL } }, + /* 10 << 252 */ + { { 0xa83b969946191d3dL,0x281fc8dd7b87f257L,0xb18e2c1354107588L, + 0x6372def79b2bafe8L }, + { 0xdaf4bb480d8972caL,0x3f2dd4b756167a3fL,0x1eace32d84310cf4L, + 0xe3bcefafe42700aaL } }, + /* 11 << 252 */ + { { 0x5fe5691ed785e73dL,0xa5db5ab62ea60467L,0x02e23d41dfc6514aL, + 0x35e8048ee03c3665L }, + { 0x3f8b118f1adaa0f8L,0x28ec3b4584ce1a5aL,0xe8cacc6e2c6646b8L, + 0x1343d185dbd0e40fL } }, + /* 12 << 252 */ + { { 0xe5d7f844caaa358cL,0x1a1db7e49924182aL,0xd64cd42d9c875d9aL, + 0xb37b515f042eeec8L }, + { 0x4d4dd4097b165fbeL,0xfc322ed9e206eff3L,0x7dee410259b7e17eL, + 0x55a481c08236ca00L } }, + /* 13 << 252 */ + { { 0x8c885312c23fc975L,0x1571580605d6297bL,0xa078868ef78edd39L, + 0x956b31e003c45e52L }, + { 0x470275d5ff7b33a6L,0xc8d5dc3a0c7e673fL,0x419227b47e2f2598L, + 0x8b37b6344c14a975L } }, + /* 14 << 252 */ + { { 0xd0667ed68b11888cL,0x5e0e8c3e803e25dcL,0x34e5d0dcb987a24aL, + 0x9f40ac3bae920323L }, + { 0x5463de9534e0f63aL,0xa128bf926b6328f9L,0x491ccd7cda64f1b7L, + 0x7ef1ec27c47bde35L } }, + /* 15 << 252 */ + { { 0xa857240fa36a2737L,0x35dc136663621bc1L,0x7a3a6453d4fb6897L, + 0x80f1a439c929319dL }, + { 0xfc18274bf8cb0ba0L,0xb0b537668078c5ebL,0xfb0d49241e01d0efL, + 0x50d7c67d372ab09cL } }, + /* 16 << 252 */ + { { 0xb4e370af3aeac968L,0xe4f7fee9c4b63266L,0xb4acd4c2e3ac5664L, + 0xf8910bd2ceb38cbfL }, + { 0x1c3ae50cc9c0726eL,0x15309569d97b40bfL,0x70884b7ffd5a5a1bL, + 0x3890896aef8314cdL } }, + /* 17 << 252 */ + { { 0x58e1515ca5618c93L,0xe665432b77d942d1L,0xb32181bfb6f767a8L, + 0x753794e83a604110L }, + { 0x09afeb7ce8c0dbccL,0x31e02613598673a3L,0x5d98e5577d46db00L, + 0xfc21fb8c9d985b28L } }, + /* 18 << 252 */ + { { 0xc9040116b0843e0bL,0x53b1b3a869b04531L,0xdd1649f085d7d830L, + 0xbb3bcc87cb7427e8L }, + { 0x77261100c93dce83L,0x7e79da61a1922a2aL,0x587a2b02f3149ce8L, + 0x147e1384de92ec83L } }, + /* 19 << 252 */ + { { 0x484c83d3af077f30L,0xea78f8440658b53aL,0x912076c2027aec53L, + 0xf34714e393c8177dL }, + { 0x37ef5d15c2376c84L,0x8315b6593d1aa783L,0x3a75c484ef852a90L, + 0x0ba0c58a16086bd4L } }, + /* 20 << 252 */ + { { 0x29688d7a529a6d48L,0x9c7f250dc2f19203L,0x123042fb682e2df9L, + 0x2b7587e7ad8121bcL }, + { 0x30fc0233e0182a65L,0xb82ecf87e3e1128aL,0x7168286193fb098fL, + 0x043e21ae85e9e6a7L } }, + /* 21 << 252 */ + { { 0xab5b49d666c834eaL,0x3be43e1847414287L,0xf40fb859219a2a47L, + 0x0e6559e9cc58df3cL }, + { 0xfe1dfe8e0c6615b4L,0x14abc8fd56459d70L,0x7be0fa8e05de0386L, + 0x8e63ef68e9035c7cL } }, + /* 22 << 252 */ + { { 0x116401b453b31e91L,0x0cba7ad44436b4d8L,0x9151f9a0107afd66L, + 0xafaca8d01f0ee4c4L }, + { 0x75fe5c1d9ee9761cL,0x3497a16bf0c0588fL,0x3ee2bebd0304804cL, + 0xa8fb9a60c2c990b9L } }, + /* 23 << 252 */ + { { 0xd14d32fe39251114L,0x36bf25bccac73366L,0xc9562c66dba7495cL, + 0x324d301b46ad348bL }, + { 0x9f46620cd670407eL,0x0ea8d4f1e3733a01L,0xd396d532b0c324e0L, + 0x5b211a0e03c317cdL } }, + /* 24 << 252 */ + { { 0x090d7d205ffe7b37L,0x3b7f3efb1747d2daL,0xa2cb525fb54fc519L, + 0x6e220932f66a971eL }, + { 0xddc160dfb486d440L,0x7fcfec463fe13465L,0x83da7e4e76e4c151L, + 0xd6fa48a1d8d302b5L } }, + /* 25 << 252 */ + { { 0xc6304f265872cd88L,0x806c1d3c278b90a1L,0x3553e725caf0bc1cL, + 0xff59e603bb9d8d5cL }, + { 0xa4550f327a0b85ddL,0xdec5720a93ecc217L,0x0b88b74169d62213L, + 0x7212f2455b365955L } }, + /* 26 << 252 */ + { { 0x20764111b5cae787L,0x13cb7f581dfd3124L,0x2dca77da1175aefbL, + 0xeb75466bffaae775L }, + { 0x74d76f3bdb6cff32L,0x7440f37a61fcda9aL,0x1bb3ac92b525028bL, + 0x20fbf8f7a1975f29L } }, + /* 27 << 252 */ + { { 0x982692e1df83097fL,0x28738f6c554b0800L,0xdc703717a2ce2f2fL, + 0x7913b93c40814194L }, + { 0x049245931fe89636L,0x7b98443ff78834a6L,0x11c6ab015114a5a1L, + 0x60deb383ffba5f4cL } }, + /* 28 << 252 */ + { { 0x4caa54c601a982e6L,0x1dd35e113491cd26L,0x973c315f7cbd6b05L, + 0xcab0077552494724L }, + { 0x04659b1f6565e15aL,0xbf30f5298c8fb026L,0xfc21641ba8a0de37L, + 0xe9c7a366fa5e5114L } }, + /* 29 << 252 */ + { { 0xdb849ca552f03ad8L,0xc7e8dbe9024e35c0L,0xa1a2bbaccfc3c789L, + 0xbf733e7d9c26f262L }, + { 0x882ffbf5b8444823L,0xb7224e886bf8483bL,0x53023b8b65bef640L, + 0xaabfec91d4d5f8cdL } }, + /* 30 << 252 */ + { { 0xa40e1510079ea1bdL,0x1ad9addcd05d5d26L,0xdb3f2eab13e68d4fL, + 0x1cff1ae2640f803fL }, + { 0xe0e7b749d4cee117L,0x8e9f275b4036d909L,0xce34e31d8f4d4c38L, + 0x22b37f69d75130fcL } }, + /* 31 << 252 */ + { { 0x83e0f1fdb4014604L,0xa8ce991989415078L,0x82375b7541792efeL, + 0x4f59bf5c97d4515bL }, + { 0xac4f324f923a277dL,0xd9bc9b7d650f3406L,0xc6fa87d18a39bc51L, + 0x825885305ccc108fL } }, + /* 32 << 252 */ + { { 0x5ced3c9f82e4c634L,0x8efb83143a4464f8L,0xe706381b7a1dca25L, + 0x6cd15a3c5a2a412bL }, + { 0x9347a8fdbfcd8fb5L,0x31db2eef6e54cd22L,0xc4aeb11ef8d8932fL, + 0x11e7c1ed344411afL } }, + /* 33 << 252 */ + { { 0x2653050cdc9a151eL,0x9edbfc083bb0a859L,0x926c81c7fd5691e7L, + 0x9c1b23426f39019aL }, + { 0x64a81c8b7f8474b9L,0x90657c0701761819L,0x390b333155e0375aL, + 0xc676c626b6ebc47dL } }, + /* 34 << 252 */ + { { 0x51623247b7d6dee8L,0x0948d92779659313L,0x99700161e9ab35edL, + 0x06cc32b48ddde408L }, + { 0x6f2fd664061ef338L,0x1606fa02c202e9edL,0x55388bc1929ba99bL, + 0xc4428c5e1e81df69L } }, + /* 35 << 252 */ + { { 0xce2028aef91b0b2aL,0xce870a23f03dfd3fL,0x66ec2c870affe8edL, + 0xb205fb46284d0c00L }, + { 0xbf5dffe744cefa48L,0xb6fc37a8a19876d7L,0xbecfa84c08b72863L, + 0xd7205ff52576374fL } }, + /* 36 << 252 */ + { { 0x80330d328887de41L,0x5de0df0c869ea534L,0x13f427533c56ea17L, + 0xeb1f6069452b1a78L }, + { 0x50474396e30ea15cL,0x575816a1c1494125L,0xbe1ce55bfe6bb38fL, + 0xb901a94896ae30f7L } }, + /* 37 << 252 */ + { { 0xe5af0f08d8fc3548L,0x5010b5d0d73bfd08L,0x993d288053fe655aL, + 0x99f2630b1c1309fdL }, + { 0xd8677bafb4e3b76fL,0x14e51ddcb840784bL,0x326c750cbf0092ceL, + 0xc83d306bf528320fL } }, + /* 38 << 252 */ + { { 0xc445671577d4715cL,0xd30019f96b703235L,0x207ccb2ed669e986L, + 0x57c824aff6dbfc28L }, + { 0xf0eb532fd8f92a23L,0x4a557fd49bb98fd2L,0xa57acea7c1e6199aL, + 0x0c6638208b94b1edL } }, + /* 39 << 252 */ + { { 0x9b42be8ff83a9266L,0xc7741c970101bd45L,0x95770c1107bd9cebL, + 0x1f50250a8b2e0744L }, + { 0xf762eec81477b654L,0xc65b900e15efe59aL,0x88c961489546a897L, + 0x7e8025b3c30b4d7cL } }, + /* 40 << 252 */ + { { 0xae4065ef12045cf9L,0x6fcb2caf9ccce8bdL,0x1fa0ba4ef2cf6525L, + 0xf683125dcb72c312L }, + { 0xa01da4eae312410eL,0x67e286776cd8e830L,0xabd9575298fb3f07L, + 0x05f11e11eef649a5L } }, + /* 41 << 252 */ + { { 0xba47faef9d3472c2L,0x3adff697c77d1345L,0x4761fa04dd15afeeL, + 0x64f1f61ab9e69462L }, + { 0xfa691fab9bfb9093L,0x3df8ae8fa1133dfeL,0xcd5f896758cc710dL, + 0xfbb88d5016c7fe79L } }, + /* 42 << 252 */ + { { 0x8e011b4ce88c50d1L,0x7532e807a8771c4fL,0x64c78a48e2278ee4L, + 0x0b283e833845072aL }, + { 0x98a6f29149e69274L,0xb96e96681868b21cL,0x38f0adc2b1a8908eL, + 0x90afcff71feb829dL } }, + /* 43 << 252 */ + { { 0x9915a383210b0856L,0xa5a80602def04889L,0x800e9af97c64d509L, + 0x81382d0bb8996f6fL }, + { 0x490eba5381927e27L,0x46c63b324af50182L,0x784c5fd9d3ad62ceL, + 0xe4fa1870f8ae8736L } }, + /* 44 << 252 */ + { { 0x4ec9d0bcd7466b25L,0x84ddbe1adb235c65L,0x5e2645ee163c1688L, + 0x570bd00e00eba747L }, + { 0xfa51b629128bfa0fL,0x92fce1bd6c1d3b68L,0x3e7361dcb66778b1L, + 0x9c7d249d5561d2bbL } }, + /* 45 << 252 */ + { { 0xa40b28bf0bbc6229L,0x1c83c05edfd91497L,0x5f9f5154f083df05L, + 0xbac38b3ceee66c9dL }, + { 0xf71db7e3ec0dfcfdL,0xf2ecda8e8b0a8416L,0x52fddd867812aa66L, + 0x2896ef104e6f4272L } }, + /* 46 << 252 */ + { { 0xff27186a0fe9a745L,0x08249fcd49ca70dbL,0x7425a2e6441cac49L, + 0xf4a0885aece5ff57L }, + { 0x6e2cb7317d7ead58L,0xf96cf7d61898d104L,0xafe67c9d4f2c9a89L, + 0x89895a501c7bf5bcL } }, + /* 47 << 252 */ + { { 0xdc7cb8e5573cecfaL,0x66497eaed15f03e6L,0x6bc0de693f084420L, + 0x323b9b36acd532b0L }, + { 0xcfed390a0115a3c1L,0x9414c40b2d65ca0eL,0x641406bd2f530c78L, + 0x29369a44833438f2L } }, + /* 48 << 252 */ + { { 0x996884f5903fa271L,0xe6da0fd2b9da921eL,0xa6f2f2695db01e54L, + 0x1ee3e9bd6876214eL }, + { 0xa26e181ce27a9497L,0x36d254e48e215e04L,0x42f32a6c252cabcaL, + 0x9948148780b57614L } }, + /* 49 << 252 */ + { { 0x4c4dfe6940d9cae1L,0x0586958011a10f09L,0xca287b573491b64bL, + 0x77862d5d3fd4a53bL }, + { 0xbf94856e50349126L,0x2be30bd171c5268fL,0x10393f19cbb650a6L, + 0x639531fe778cf9fdL } }, + /* 50 << 252 */ + { { 0x02556a11b2935359L,0xda38aa96af8c126eL,0x47dbe6c20960167fL, + 0x37bbabb6501901cdL }, + { 0xb6e979e02c947778L,0xd69a51757a1a1dc6L,0xc3ed50959d9faf0cL, + 0x4dd9c0961d5fa5f0L } }, + /* 51 << 252 */ + { { 0xa0c4304d64f16ea8L,0x8b1cac167e718623L,0x0b5765467c67f03eL, + 0x559cf5adcbd88c01L }, + { 0x074877bb0e2af19aL,0x1f717ec1a1228c92L,0x70bcb800326e8920L, + 0xec6e2c5c4f312804L } }, + /* 52 << 252 */ + { { 0x426aea7d3fca4752L,0xf12c09492211f62aL,0x24beecd87be7b6b5L, + 0xb77eaf4c36d7a27dL }, + { 0x154c2781fda78fd3L,0x848a83b0264eeabeL,0x81287ef04ffe2bc4L, + 0x7b6d88c6b6b6fc2aL } }, + /* 53 << 252 */ + { { 0x805fb947ce417d99L,0x4b93dcc38b916cc4L,0x72e65bb321273323L, + 0xbcc1badd6ea9886eL }, + { 0x0e2230114bc5ee85L,0xa561be74c18ee1e4L,0x762fd2d4a6bcf1f1L, + 0x50e6a5a495231489L } }, + /* 54 << 252 */ + { { 0xca96001fa00b500bL,0x5c098cfc5d7dcdf5L,0xa64e2d2e8c446a85L, + 0xbae9bcf1971f3c62L }, + { 0x4ec226838435a2c5L,0x8ceaed6c4bad4643L,0xe9f8fb47ccccf4e3L, + 0xbd4f3fa41ce3b21eL } }, + /* 55 << 252 */ + { { 0xd79fb110a3db3292L,0xe28a37dab536c66aL,0x279ce87b8e49e6a9L, + 0x70ccfe8dfdcec8e3L }, + { 0x2193e4e03ba464b2L,0x0f39d60eaca9a398L,0x7d7932aff82c12abL, + 0xd8ff50ed91e7e0f7L } }, + /* 56 << 252 */ + { { 0xea961058fa28a7e0L,0xc726cf250bf5ec74L,0xe74d55c8db229666L, + 0x0bd9abbfa57f5799L }, + { 0x7479ef074dfc47b3L,0xd9c65fc30c52f91dL,0x8e0283fe36a8bde2L, + 0xa32a8b5e7d4b7280L } }, + /* 57 << 252 */ + { { 0x6a677c6112e83233L,0x0fbb3512dcc9bf28L,0x562e8ea50d780f61L, + 0x0db8b22b1dc4e89cL }, + { 0x0a6fd1fb89be0144L,0x8c77d246ca57113bL,0x4639075dff09c91cL, + 0x5b47b17f5060824cL } }, + /* 58 << 252 */ + { { 0x58aea2b016287b52L,0xa1343520d0cd8eb0L,0x6148b4d0c5d58573L, + 0xdd2b6170291c68aeL }, + { 0xa61b39291da3b3b7L,0x5f946d7908c4ac10L,0x4105d4a57217d583L, + 0x5061da3d25e6de5eL } }, + /* 59 << 252 */ + { { 0x3113940dec1b4991L,0xf12195e136f485aeL,0xa7507fb2731a2ee0L, + 0x95057a8e6e9e196eL }, + { 0xa3c2c9112e130136L,0x97dfbb3633c60d15L,0xcaf3c581b300ee2bL, + 0x77f25d90f4bac8b8L } }, + /* 60 << 252 */ + { { 0xdb1c4f986d840cd6L,0x471d62c0e634288cL,0x8ec2f85ecec8a161L, + 0x41f37cbcfa6f4ae2L }, + { 0x6793a20f4b709985L,0x7a7bd33befa8985bL,0x2c6a3fbd938e6446L, + 0x190426192a8d47c1L } }, + /* 61 << 252 */ + { { 0x16848667cc36975fL,0x02acf1689d5f1dfbL,0x62d41ad4613baa94L, + 0xb56fbb929f684670L }, + { 0xce610d0de9e40569L,0x7b99c65f35489fefL,0x0c88ad1b3df18b97L, + 0x81b7d9be5d0e9edbL } }, + /* 62 << 252 */ + { { 0xd85218c0c716cc0aL,0xf4b5ff9085691c49L,0xa4fd666bce356ac6L, + 0x17c728954b327a7aL }, + { 0xf93d5085da6be7deL,0xff71530e3301d34eL,0x4cd96442d8f448e8L, + 0x9283d3312ed18ffaL } }, + /* 63 << 252 */ + { { 0x4d33dd992a849870L,0xa716964b41576335L,0xff5e3a9b179be0e5L, + 0x5b9d6b1b83b13632L }, + { 0x3b8bd7d4a52f313bL,0xc9dd95a0637a4660L,0x300359620b3e218fL, + 0xce1481a3c7b28a3cL } }, + /* 64 << 252 */ + { { 0xab41b43a43228d83L,0x24ae1c304ad63f99L,0x8e525f1a46a51229L, + 0x14af860fcd26d2b4L }, + { 0xd6baef613f714aa1L,0xf51865adeb78795eL,0xd3e21fcee6a9d694L, + 0x82ceb1dd8a37b527L } }, +}; + +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_add_only_4(sp_point_256* r, const sp_point_256* g, + const sp_table_entry_256* table, const sp_digit* k, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 rtd; + sp_point_256 pd; + sp_digit tmpd[2 * 4 * 5]; +#endif + sp_point_256* rt; + sp_point_256* p = NULL; + sp_digit* tmp; + sp_digit* negy; + int i; + ecc_recode_256 v[37]; + int err; + + (void)g; + (void)heap; + + err = sp_256_point_new_4(heap, rtd, rt); + if (err == MP_OKAY) + err = sp_256_point_new_4(heap, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 5, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; +#else + tmp = tmpd; +#endif + negy = tmp; + + if (err == MP_OKAY) { + sp_256_ecc_recode_7_4(k, v); + + XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod)); + XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod)); + + i = 36; + XMEMCPY(rt->x, table[i * 65 + v[i].i].x, sizeof(table->x)); + XMEMCPY(rt->y, table[i * 65 + v[i].i].y, sizeof(table->y)); + rt->infinity = !v[i].i; + for (--i; i>=0; i--) { + XMEMCPY(p->x, table[i * 65 + v[i].i].x, sizeof(table->x)); + XMEMCPY(p->y, table[i * 65 + v[i].i].y, sizeof(table->y)); + p->infinity = !v[i].i; + sp_256_sub_4(negy, p256_mod, p->y); + sp_256_cond_copy_4(p->y, negy, 0 - v[i].neg); + sp_256_proj_point_add_qz1_4(rt, rt, p, tmp); + } + if (map != 0) { + sp_256_map_4(r, rt, tmp); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_256)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 4 * 5); + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); + } +#else + ForceZero(tmp, sizeof(sp_digit) * 2 * 4 * 5); +#endif + sp_256_point_free_4(p, 0, heap); + sp_256_point_free_4(rt, 0, heap); + + return MP_OKAY; +} + +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_base_4(sp_point_256* r, const sp_digit* k, + int map, void* heap) +{ + return sp_256_ecc_mulmod_add_only_4(r, NULL, p256_table, + k, map, heap); +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 p; + sp_digit kd[4]; +#endif + sp_point_256* point; + sp_digit* k = NULL; + int err = MP_OKAY; + + err = sp_256_point_new_4(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) { + err = MEMORY_E; + } + } +#else + k = kd; +#endif + if (err == MP_OKAY) { + sp_256_from_mp(k, 4, km); + + err = sp_256_ecc_mulmod_base_4(point, k, map, heap); + } + if (err == MP_OKAY) { + err = sp_256_point_to_ecc_point_4(point, r); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_4(point, 0, heap); + + return err; +} + +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_256_iszero_4(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3]) == 0; +} + +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */ +/* Add 1 to a. (a = a + 1) + * + * a A single precision integer. + */ +static void sp_256_add_one_4(sp_digit* a) +{ + __asm__ __volatile__ ( + "ldp x1, x2, [%[a], 0]\n\t" + "adds x1, x1, #1\n\t" + "ldr x3, [%[a], 16]\n\t" + "adcs x2, x2, xzr\n\t" + "ldr x4, [%[a], 24]\n\t" + "adcs x3, x3, xzr\n\t" + "stp x1, x2, [%[a], 0]\n\t" + "adcs x4, x4, xzr\n\t" + "stp x3, x4, [%[a], 16]\n\t" + : + : [a] "r" (a) + : "memory", "x1", "x2", "x3", "x4" + ); +} + +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_256_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j; + byte* d; + + for (i = n - 1,j = 0; i >= 7; i -= 8) { + r[j] = ((sp_digit)a[i - 0] << 0) | + ((sp_digit)a[i - 1] << 8) | + ((sp_digit)a[i - 2] << 16) | + ((sp_digit)a[i - 3] << 24) | + ((sp_digit)a[i - 4] << 32) | + ((sp_digit)a[i - 5] << 40) | + ((sp_digit)a[i - 6] << 48) | + ((sp_digit)a[i - 7] << 56); + j++; + } + + if (i >= 0) { + r[j] = 0; + + d = (byte*)r; + switch (i) { + case 6: d[n - 1 - 6] = a[6]; //fallthrough + case 5: d[n - 1 - 5] = a[5]; //fallthrough + case 4: d[n - 1 - 4] = a[4]; //fallthrough + case 3: d[n - 1 - 3] = a[3]; //fallthrough + case 2: d[n - 1 - 2] = a[2]; //fallthrough + case 1: d[n - 1 - 1] = a[1]; //fallthrough + case 0: d[n - 1 - 0] = a[0]; //fallthrough + } + j++; + } + + for (; j < size; j++) { + r[j] = 0; + } +} + +/* Generates a scalar that is in the range 1..order-1. + * + * rng Random number generator. + * k Scalar value. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +static int sp_256_ecc_gen_k_4(WC_RNG* rng, sp_digit* k) +{ + int err; + byte buf[32]; + + do { + err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf)); + if (err == 0) { + sp_256_from_bin(k, 4, buf, (int)sizeof(buf)); + if (sp_256_cmp_4(k, p256_order2) < 0) { + sp_256_add_one_4(k); + break; + } + } + } + while (err == 0); + + return err; +} + +/* Makes a random EC key pair. + * + * rng Random number generator. + * priv Generated private value. + * pub Generated public point. + * heap Heap to use for allocation. + * returns ECC_INF_E when the point does not have the correct order, RNG + * failures, MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 p; + sp_digit kd[4]; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_256 inf; +#endif +#endif + sp_point_256* point; + sp_digit* k = NULL; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_256* infinity; +#endif + int err; + + (void)heap; + + err = sp_256_point_new_4(heap, p, point); +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + if (err == MP_OKAY) { + err = sp_256_point_new_4(heap, inf, infinity); + } +#endif +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) { + err = MEMORY_E; + } + } +#else + k = kd; +#endif + + if (err == MP_OKAY) { + err = sp_256_ecc_gen_k_4(rng, k); + } + if (err == MP_OKAY) { + err = sp_256_ecc_mulmod_base_4(point, k, 1, NULL); + } + +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + if (err == MP_OKAY) { + err = sp_256_ecc_mulmod_4(infinity, point, p256_order, 1, NULL); + } + if (err == MP_OKAY) { + if ((sp_256_iszero_4(point->x) == 0) || (sp_256_iszero_4(point->y) == 0)) { + err = ECC_INF_E; + } + } +#endif + + if (err == MP_OKAY) { + err = sp_256_to_mp(k, priv); + } + if (err == MP_OKAY) { + err = sp_256_point_to_ecc_point_4(point, pub); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_256_point_free_4(infinity, 1, heap); +#endif + sp_256_point_free_4(point, 1, heap); + + return err; +} + +#ifdef HAVE_ECC_DHE +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 32 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_256_to_bin(sp_digit* r, byte* a) +{ + int i, j; + + for (i = 3, j = 0; i >= 0; i--) { + a[j++] = r[i] >> 56; + a[j++] = r[i] >> 48; + a[j++] = r[i] >> 40; + a[j++] = r[i] >> 32; + a[j++] = r[i] >> 24; + a[j++] = r[i] >> 16; + a[j++] = r[i] >> 8; + a[j++] = r[i] >> 0; + } +} + +/* Multiply the point by the scalar and serialize the X ordinate. + * The number is 0 padded to maximum size on output. + * + * priv Scalar to multiply the point by. + * pub Point to multiply. + * out Buffer to hold X ordinate. + * outLen On entry, size of the buffer in bytes. + * On exit, length of data in buffer in bytes. + * heap Heap to use for allocation. + * returns BUFFER_E if the buffer is to small for output size, + * MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out, + word32* outLen, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 p; + sp_digit kd[4]; +#endif + sp_point_256* point = NULL; + sp_digit* k = NULL; + int err = MP_OKAY; + + if (*outLen < 32U) { + err = BUFFER_E; + } + + if (err == MP_OKAY) { + err = sp_256_point_new_4(heap, p, point); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#else + k = kd; +#endif + + if (err == MP_OKAY) { + sp_256_from_mp(k, 4, priv); + sp_256_point_from_ecc_point_4(point, pub); + err = sp_256_ecc_mulmod_4(point, point, k, 1, heap); + } + if (err == MP_OKAY) { + sp_256_to_bin(point->x, out); + *outLen = 32; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_4(point, 0, heap); + + return err; +} +#endif /* HAVE_ECC_DHE */ + +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_256_add_4(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[a], 0]\n\t" + "ldp x7, x8, [%[b], 0]\n\t" + "adds x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 16]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "cset %[r], cs\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + ); + + return (sp_digit)r; +} + +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_256_mul_4(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_digit tmp[8]; + + __asm__ __volatile__ ( + "mov x5, 0\n\t" + "mov x6, 0\n\t" + "mov x7, 0\n\t" + "mov x8, 0\n\t" + "\n1:\n\t" + "subs x3, x5, 24\n\t" + "csel x3, xzr, x3, cc\n\t" + "sub x4, x5, x3\n\t" + "\n2:\n\t" + "ldr x10, [%[a], x3]\n\t" + "ldr x11, [%[b], x4]\n\t" + "mul x9, x10, x11\n\t" + "umulh x10, x10, x11\n\t" + "adds x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x8, x8, xzr\n\t" + "add x3, x3, #8\n\t" + "sub x4, x4, #8\n\t" + "cmp x3, 32\n\t" + "b.eq 3f\n\t" + "cmp x3, x5\n\t" + "b.le 2b\n\t" + "\n3:\n\t" + "str x6, [%[r], x5]\n\t" + "mov x6, x7\n\t" + "mov x7, x8\n\t" + "mov x8, #0\n\t" + "add x5, x5, #8\n\t" + "cmp x5, 48\n\t" + "b.le 1b\n\t" + "str x6, [%[r], x5]\n\t" + : + : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +#else +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_256_mul_4(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_digit tmp[4]; + + __asm__ __volatile__ ( + "ldp x16, x17, [%[a], 0]\n\t" + "ldp x21, x22, [%[b], 0]\n\t" + "# A[0] * B[0]\n\t" + "mul x8, x16, x21\n\t" + "ldr x19, [%[a], 16]\n\t" + "umulh x9, x16, x21\n\t" + "ldr x23, [%[b], 16]\n\t" + "# A[0] * B[1]\n\t" + "mul x4, x16, x22\n\t" + "ldr x20, [%[a], 24]\n\t" + "umulh x5, x16, x22\n\t" + "ldr x24, [%[b], 24]\n\t" + "adds x9, x9, x4\n\t" + "# A[1] * B[0]\n\t" + "mul x4, x17, x21\n\t" + "adc x10, xzr, x5\n\t" + "umulh x5, x17, x21\n\t" + "adds x9, x9, x4\n\t" + "# A[0] * B[2]\n\t" + "mul x4, x16, x23\n\t" + "adcs x10, x10, x5\n\t" + "umulh x5, x16, x23\n\t" + "adc x11, xzr, xzr\n\t" + "adds x10, x10, x4\n\t" + "# A[1] * B[1]\n\t" + "mul x4, x17, x22\n\t" + "adc x11, x11, x5\n\t" + "umulh x5, x17, x22\n\t" + "adds x10, x10, x4\n\t" + "# A[2] * B[0]\n\t" + "mul x4, x19, x21\n\t" + "adcs x11, x11, x5\n\t" + "umulh x5, x19, x21\n\t" + "adc x12, xzr, xzr\n\t" + "adds x10, x10, x4\n\t" + "# A[0] * B[3]\n\t" + "mul x4, x16, x24\n\t" + "adcs x11, x11, x5\n\t" + "umulh x5, x16, x24\n\t" + "adc x12, x12, xzr\n\t" + "adds x11, x11, x4\n\t" + "# A[1] * B[2]\n\t" + "mul x4, x17, x23\n\t" + "adcs x12, x12, x5\n\t" + "umulh x5, x17, x23\n\t" + "adc x13, xzr, xzr\n\t" + "adds x11, x11, x4\n\t" + "# A[2] * B[1]\n\t" + "mul x4, x19, x22\n\t" + "adcs x12, x12, x5\n\t" + "umulh x5, x19, x22\n\t" + "adc x13, x13, xzr\n\t" + "adds x11, x11, x4\n\t" + "# A[3] * B[0]\n\t" + "mul x4, x20, x21\n\t" + "adcs x12, x12, x5\n\t" + "umulh x5, x20, x21\n\t" + "adc x13, x13, xzr\n\t" + "adds x11, x11, x4\n\t" + "# A[1] * B[3]\n\t" + "mul x4, x17, x24\n\t" + "adcs x12, x12, x5\n\t" + "umulh x5, x17, x24\n\t" + "adc x13, x13, xzr\n\t" + "adds x12, x12, x4\n\t" + "# A[2] * B[2]\n\t" + "mul x4, x19, x23\n\t" + "adcs x13, x13, x5\n\t" + "umulh x5, x19, x23\n\t" + "adc x14, xzr, xzr\n\t" + "adds x12, x12, x4\n\t" + "# A[3] * B[1]\n\t" + "mul x4, x20, x22\n\t" + "adcs x13, x13, x5\n\t" + "umulh x5, x20, x22\n\t" + "adc x14, x14, xzr\n\t" + "adds x12, x12, x4\n\t" + "# A[2] * B[3]\n\t" + "mul x4, x19, x24\n\t" + "adcs x13, x13, x5\n\t" + "umulh x5, x19, x24\n\t" + "adc x14, x14, xzr\n\t" + "adds x13, x13, x4\n\t" + "# A[3] * B[2]\n\t" + "mul x4, x20, x23\n\t" + "adcs x14, x14, x5\n\t" + "umulh x5, x20, x23\n\t" + "adc x15, xzr, xzr\n\t" + "adds x13, x13, x4\n\t" + "# A[3] * B[3]\n\t" + "mul x4, x20, x24\n\t" + "adcs x14, x14, x5\n\t" + "umulh x5, x20, x24\n\t" + "adc x15, x15, xzr\n\t" + "adds x14, x14, x4\n\t" + "adc x15, x15, x5\n\t" + "stp x8, x9, [%[r], 0]\n\t" + "stp x10, x11, [%[r], 16]\n\t" + "stp x12, x13, [%[r], 32]\n\t" + "stp x14, x15, [%[r], 48]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp) + : "memory", "x4", "x5", "x6", "x7", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* Sub b from a into a. (a -= b) + * + * a A single precision integer and result. + * b A single precision integer. + */ +static sp_digit sp_256_sub_in_place_4(sp_digit* a, const sp_digit* b) +{ + __asm__ __volatile__ ( + "ldp x2, x3, [%[a], 0]\n\t" + "ldp x6, x7, [%[b], 0]\n\t" + "subs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 16]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 16]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 0]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 16]\n\t" + "csetm %[a], cc\n\t" + : [a] "+r" (a) + : [b] "r" (b) + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + ); + + return (sp_digit)a; +} + +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_256_mul_d_4(sp_digit* r, const sp_digit* a, + sp_digit b) +{ + __asm__ __volatile__ ( + "# A[0] * B\n\t" + "ldp x2, x3, [%[a]]\n\t" + "ldp x4, x5, [%[a], 16]\n\t" + "umulh x7, %[b], x2\n\t" + "mul x2, %[b], x2\n\t" + "# A[1] * B\n\t" + "mul x8, %[b], x3\n\t" + "umulh x9, %[b], x3\n\t" + "adds x3, x7, x8\n\t" + "# A[2] * B\n\t" + "mul x8, %[b], x4\n\t" + "adc x7, xzr, x9\n\t" + "umulh x9, %[b], x4\n\t" + "adds x4, x7, x8\n\t" + "# A[3] * B\n\t" + "mul x8, %[b], x5\n\t" + "adc x7, xzr, x9\n\t" + "umulh x9, %[b], x5\n\t" + "adds x5, x7, x8\n\t" + "str x2, [%[r]]\n\t" + "adc x6, xzr, x9\n\t" + "stp x3, x4, [%[r], 8]\n\t" + "stp x5, x6, [%[r], 24]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + ); +} + +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + */ +static sp_digit div_256_word_4(sp_digit d1, sp_digit d0, sp_digit div) +{ + sp_digit r; + + __asm__ __volatile__ ( + "lsr x5, %[div], 32\n\t" + "add x5, x5, 1\n\t" + + "udiv x3, %[d1], x5\n\t" + "lsl x6, x3, 32\n\t" + "mul x4, %[div], x6\n\t" + "umulh x3, %[div], x6\n\t" + "subs %[d0], %[d0], x4\n\t" + "sbc %[d1], %[d1], x3\n\t" + + "udiv x3, %[d1], x5\n\t" + "lsl x3, x3, 32\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "umulh x3, %[div], x3\n\t" + "subs %[d0], %[d0], x4\n\t" + "sbc %[d1], %[d1], x3\n\t" + + "lsr x3, %[d0], 32\n\t" + "orr x3, x3, %[d1], lsl 32\n\t" + + "udiv x3, x3, x5\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "umulh x3, %[div], x3\n\t" + "subs %[d0], %[d0], x4\n\t" + "sbc %[d1], %[d1], x3\n\t" + + "lsr x3, %[d0], 32\n\t" + "orr x3, x3, %[d1], lsl 32\n\t" + + "udiv x3, x3, x5\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "sub %[d0], %[d0], x4\n\t" + + "udiv x3, %[d0], %[div]\n\t" + "add %[r], x6, x3\n\t" + + : [r] "=r" (r) + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) + : "x3", "x4", "x5", "x6" + ); + + return r; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_256_mask_4(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<4; i++) { + r[i] = a[i] & m; + } +#else + r[0] = a[0] & m; + r[1] = a[1] & m; + r[2] = a[2] & m; + r[3] = a[3] & m; +#endif +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_256_div_4(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[8], t2[5]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[3]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 4); + for (i=3; i>=0; i--) { + r1 = div_256_word_4(t1[4 + i], t1[4 + i - 1], div); + + sp_256_mul_d_4(t2, d, r1); + t1[4 + i] += sp_256_sub_in_place_4(&t1[i], t2); + t1[4 + i] -= t2[4]; + sp_256_mask_4(t2, d, t1[4 + i]); + t1[4 + i] += sp_256_add_4(&t1[i], &t1[i], t2); + sp_256_mask_4(t2, d, t1[4 + i]); + t1[4 + i] += sp_256_add_4(&t1[i], &t1[i], t2); + } + + r1 = sp_256_cmp_4(t1, d) >= 0; + sp_256_cond_sub_4(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_256_mod_4(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_256_div_4(a, m, NULL, r); +} + +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_256_sqr_4(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "ldp x16, x17, [%[a], 0]\n\t" + "# A[0] * A[1]\n\t" + "mul x9, x16, x17\n\t" + "ldr x19, [%[a], 16]\n\t" + "umulh x10, x16, x17\n\t" + "ldr x20, [%[a], 24]\n\t" + "# A[0] * A[2]\n\t" + "mul x4, x16, x19\n\t" + "umulh x5, x16, x19\n\t" + "adds x10, x10, x4\n\t" + "# A[0] * A[3]\n\t" + "mul x4, x16, x20\n\t" + "adc x11, xzr, x5\n\t" + "umulh x5, x16, x20\n\t" + "adds x11, x11, x4\n\t" + "# A[1] * A[2]\n\t" + "mul x4, x17, x19\n\t" + "adc x12, xzr, x5\n\t" + "umulh x5, x17, x19\n\t" + "adds x11, x11, x4\n\t" + "# A[1] * A[3]\n\t" + "mul x4, x17, x20\n\t" + "adcs x12, x12, x5\n\t" + "umulh x5, x17, x20\n\t" + "adc x13, xzr, xzr\n\t" + "adds x12, x12, x4\n\t" + "# A[2] * A[3]\n\t" + "mul x4, x19, x20\n\t" + "adc x13, x13, x5\n\t" + "umulh x5, x19, x20\n\t" + "adds x13, x13, x4\n\t" + "adc x14, xzr, x5\n\t" + "# Double\n\t" + "adds x9, x9, x9\n\t" + "adcs x10, x10, x10\n\t" + "adcs x11, x11, x11\n\t" + "adcs x12, x12, x12\n\t" + "adcs x13, x13, x13\n\t" + "# A[0] * A[0]\n\t" + "mul x8, x16, x16\n\t" + "adcs x14, x14, x14\n\t" + "umulh x3, x16, x16\n\t" + "cset x15, cs\n\t" + "# A[1] * A[1]\n\t" + "mul x4, x17, x17\n\t" + "adds x9, x9, x3\n\t" + "umulh x5, x17, x17\n\t" + "adcs x10, x10, x4\n\t" + "# A[2] * A[2]\n\t" + "mul x6, x19, x19\n\t" + "adcs x11, x11, x5\n\t" + "umulh x7, x19, x19\n\t" + "adcs x12, x12, x6\n\t" + "# A[3] * A[3]\n\t" + "mul x16, x20, x20\n\t" + "adcs x13, x13, x7\n\t" + "umulh x17, x20, x20\n\t" + "adcs x14, x14, x16\n\t" + "adc x15, x15, x17\n\t" + "stp x8, x9, [%[r], 0]\n\t" + "stp x10, x11, [%[r], 16]\n\t" + "stp x12, x13, [%[r], 32]\n\t" + "stp x14, x15, [%[r], 48]\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20" + ); +} + +#ifdef WOLFSSL_SP_SMALL +/* Order-2 for the P256 curve. */ +static const uint64_t p256_order_minus_2[4] = { + 0xf3b9cac2fc63254fU,0xbce6faada7179e84U,0xffffffffffffffffU, + 0xffffffff00000000U +}; +#else +/* The low half of the order-2 of the P256 curve. */ +static const uint64_t p256_order_low[2] = { + 0xf3b9cac2fc63254fU,0xbce6faada7179e84U +}; +#endif /* WOLFSSL_SP_SMALL */ + +/* Multiply two number mod the order of P256 curve. (r = a * b mod order) + * + * r Result of the multiplication. + * a First operand of the multiplication. + * b Second operand of the multiplication. + */ +static void sp_256_mont_mul_order_4(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_256_mul_4(r, a, b); + sp_256_mont_reduce_order_4(r, p256_order, p256_mp_order); +} + +/* Square number mod the order of P256 curve. (r = a * a mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_256_mont_sqr_order_4(sp_digit* r, const sp_digit* a) +{ + sp_256_sqr_4(r, a); + sp_256_mont_reduce_order_4(r, p256_order, p256_mp_order); +} + +#ifndef WOLFSSL_SP_SMALL +/* Square number mod the order of P256 curve a number of times. + * (r = a ^ n mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_256_mont_sqr_n_order_4(sp_digit* r, const sp_digit* a, int n) +{ + int i; + + sp_256_mont_sqr_order_4(r, a); + for (i=1; i=0; i--) { + sp_256_mont_sqr_order_4(t, t); + if ((p256_order_minus_2[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { + sp_256_mont_mul_order_4(t, t, a); + } + } + XMEMCPY(r, t, sizeof(sp_digit) * 4U); +#else + sp_digit* t = td; + sp_digit* t2 = td + 2 * 4; + sp_digit* t3 = td + 4 * 4; + int i; + + /* t = a^2 */ + sp_256_mont_sqr_order_4(t, a); + /* t = a^3 = t * a */ + sp_256_mont_mul_order_4(t, t, a); + /* t2= a^c = t ^ 2 ^ 2 */ + sp_256_mont_sqr_n_order_4(t2, t, 2); + /* t3= a^f = t2 * t */ + sp_256_mont_mul_order_4(t3, t2, t); + /* t2= a^f0 = t3 ^ 2 ^ 4 */ + sp_256_mont_sqr_n_order_4(t2, t3, 4); + /* t = a^ff = t2 * t3 */ + sp_256_mont_mul_order_4(t, t2, t3); + /* t3= a^ff00 = t ^ 2 ^ 8 */ + sp_256_mont_sqr_n_order_4(t2, t, 8); + /* t = a^ffff = t2 * t */ + sp_256_mont_mul_order_4(t, t2, t); + /* t2= a^ffff0000 = t ^ 2 ^ 16 */ + sp_256_mont_sqr_n_order_4(t2, t, 16); + /* t = a^ffffffff = t2 * t */ + sp_256_mont_mul_order_4(t, t2, t); + /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64 */ + sp_256_mont_sqr_n_order_4(t2, t, 64); + /* t2= a^ffffffff00000000ffffffff = t2 * t */ + sp_256_mont_mul_order_4(t2, t2, t); + /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32 */ + sp_256_mont_sqr_n_order_4(t2, t2, 32); + /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */ + sp_256_mont_mul_order_4(t2, t2, t); + /* t2= a^ffffffff00000000ffffffffffffffffbce6 */ + for (i=127; i>=112; i--) { + sp_256_mont_sqr_order_4(t2, t2); + if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { + sp_256_mont_mul_order_4(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6f */ + sp_256_mont_sqr_n_order_4(t2, t2, 4); + sp_256_mont_mul_order_4(t2, t2, t3); + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */ + for (i=107; i>=64; i--) { + sp_256_mont_sqr_order_4(t2, t2); + if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { + sp_256_mont_mul_order_4(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */ + sp_256_mont_sqr_n_order_4(t2, t2, 4); + sp_256_mont_mul_order_4(t2, t2, t3); + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */ + for (i=59; i>=32; i--) { + sp_256_mont_sqr_order_4(t2, t2); + if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { + sp_256_mont_mul_order_4(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */ + sp_256_mont_sqr_n_order_4(t2, t2, 4); + sp_256_mont_mul_order_4(t2, t2, t3); + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */ + for (i=27; i>=0; i--) { + sp_256_mont_sqr_order_4(t2, t2); + if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { + sp_256_mont_mul_order_4(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */ + sp_256_mont_sqr_n_order_4(t2, t2, 4); + /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */ + sp_256_mont_mul_order_4(r, t2, t3); +#endif /* WOLFSSL_SP_SMALL */ +} + +#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */ +#ifdef HAVE_ECC_SIGN +#ifndef SP_ECC_MAX_SIG_GEN +#define SP_ECC_MAX_SIG_GEN 64 +#endif + +/* Sign the hash using the private key. + * e = [hash, 256 bits] from binary + * r = (k.G)->x mod order + * s = (r * x + e) / k mod order + * The hash is truncated to the first 256 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv, + mp_int* rm, mp_int* sm, mp_int* km, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit ed[2*4]; + sp_digit xd[2*4]; + sp_digit kd[2*4]; + sp_digit rd[2*4]; + sp_digit td[3 * 2*4]; + sp_point_256 p; +#endif + sp_digit* e = NULL; + sp_digit* x = NULL; + sp_digit* k = NULL; + sp_digit* r = NULL; + sp_digit* tmp = NULL; + sp_point_256* point = NULL; + sp_digit carry; + sp_digit* s = NULL; + sp_digit* kInv = NULL; + int err = MP_OKAY; + int64_t c; + int i; + + (void)heap; + + err = sp_256_point_new_4(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 4, heap, + DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + e = d + 0 * 4; + x = d + 2 * 4; + k = d + 4 * 4; + r = d + 6 * 4; + tmp = d + 8 * 4; +#else + e = ed; + x = xd; + k = kd; + r = rd; + tmp = td; +#endif + s = e; + kInv = k; + + if (hashLen > 32U) { + hashLen = 32U; + } + + sp_256_from_bin(e, 4, hash, (int)hashLen); + } + + for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) { + sp_256_from_mp(x, 4, priv); + + /* New random point. */ + if (km == NULL || mp_iszero(km)) { + err = sp_256_ecc_gen_k_4(rng, k); + } + else { + sp_256_from_mp(k, 4, km); + mp_zero(km); + } + if (err == MP_OKAY) { + err = sp_256_ecc_mulmod_base_4(point, k, 1, NULL); + } + + if (err == MP_OKAY) { + /* r = point->x mod order */ + XMEMCPY(r, point->x, sizeof(sp_digit) * 4U); + sp_256_norm_4(r); + c = sp_256_cmp_4(r, p256_order); + sp_256_cond_sub_4(r, r, p256_order, 0L - (sp_digit)(c >= 0)); + sp_256_norm_4(r); + + /* Conv k to Montgomery form (mod order) */ + sp_256_mul_4(k, k, p256_norm_order); + err = sp_256_mod_4(k, k, p256_order); + } + if (err == MP_OKAY) { + sp_256_norm_4(k); + /* kInv = 1/k mod order */ + sp_256_mont_inv_order_4(kInv, k, tmp); + sp_256_norm_4(kInv); + + /* s = r * x + e */ + sp_256_mul_4(x, x, r); + err = sp_256_mod_4(x, x, p256_order); + } + if (err == MP_OKAY) { + sp_256_norm_4(x); + carry = sp_256_add_4(s, e, x); + sp_256_cond_sub_4(s, s, p256_order, 0 - carry); + sp_256_norm_4(s); + c = sp_256_cmp_4(s, p256_order); + sp_256_cond_sub_4(s, s, p256_order, 0L - (sp_digit)(c >= 0)); + sp_256_norm_4(s); + + /* s = s * k^-1 mod order */ + sp_256_mont_mul_order_4(s, s, kInv); + sp_256_norm_4(s); + + /* Check that signature is usable. */ + if (sp_256_iszero_4(s) == 0) { + break; + } + } + } + + if (i == 0) { + err = RNG_FAILURE_E; + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(r, rm); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(s, sm); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 8 * 4); + XFREE(d, heap, DYNAMIC_TYPE_ECC); + } +#else + XMEMSET(e, 0, sizeof(sp_digit) * 2U * 4U); + XMEMSET(x, 0, sizeof(sp_digit) * 2U * 4U); + XMEMSET(k, 0, sizeof(sp_digit) * 2U * 4U); + XMEMSET(r, 0, sizeof(sp_digit) * 2U * 4U); + XMEMSET(r, 0, sizeof(sp_digit) * 2U * 4U); + XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 4U); +#endif + sp_256_point_free_4(point, 1, heap); + + return err; +} +#endif /* HAVE_ECC_SIGN */ + +#ifdef HAVE_ECC_VERIFY +/* Verify the signature values with the hash and public key. + * e = Truncate(hash, 256) + * u1 = e/s mod order + * u2 = r/s mod order + * r == (u1.G + u2.Q)->x mod order + * Optimization: Leave point in projective form. + * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z') + * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' + * The hash is truncated to the first 256 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX, + mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit u1d[2*4]; + sp_digit u2d[2*4]; + sp_digit sd[2*4]; + sp_digit tmpd[2*4 * 5]; + sp_point_256 p1d; + sp_point_256 p2d; +#endif + sp_digit* u1 = NULL; + sp_digit* u2 = NULL; + sp_digit* s = NULL; + sp_digit* tmp = NULL; + sp_point_256* p1; + sp_point_256* p2 = NULL; + sp_digit carry; + int64_t c; + int err; + + err = sp_256_point_new_4(heap, p1d, p1); + if (err == MP_OKAY) { + err = sp_256_point_new_4(heap, p2d, p2); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 4, heap, + DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + u1 = d + 0 * 4; + u2 = d + 2 * 4; + s = d + 4 * 4; + tmp = d + 6 * 4; +#else + u1 = u1d; + u2 = u2d; + s = sd; + tmp = tmpd; +#endif + + if (hashLen > 32U) { + hashLen = 32U; + } + + sp_256_from_bin(u1, 4, hash, (int)hashLen); + sp_256_from_mp(u2, 4, r); + sp_256_from_mp(s, 4, sm); + sp_256_from_mp(p2->x, 4, pX); + sp_256_from_mp(p2->y, 4, pY); + sp_256_from_mp(p2->z, 4, pZ); + + { + sp_256_mul_4(s, s, p256_norm_order); + } + err = sp_256_mod_4(s, s, p256_order); + } + if (err == MP_OKAY) { + sp_256_norm_4(s); + { + sp_256_mont_inv_order_4(s, s, tmp); + sp_256_mont_mul_order_4(u1, u1, s); + sp_256_mont_mul_order_4(u2, u2, s); + } + + err = sp_256_ecc_mulmod_base_4(p1, u1, 0, heap); + } + if (err == MP_OKAY) { + err = sp_256_ecc_mulmod_4(p2, p2, u2, 0, heap); + } + + if (err == MP_OKAY) { + { + sp_256_proj_point_add_4(p1, p1, p2, tmp); + if (sp_256_iszero_4(p1->z)) { + if (sp_256_iszero_4(p1->x) && sp_256_iszero_4(p1->y)) { + sp_256_proj_point_dbl_4(p1, p2, tmp); + } + else { + /* Y ordinate is not used from here - don't set. */ + p1->x[0] = 0; + p1->x[1] = 0; + p1->x[2] = 0; + p1->x[3] = 0; + XMEMCPY(p1->z, p256_norm_mod, sizeof(p256_norm_mod)); + } + } + } + + /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ + /* Reload r and convert to Montgomery form. */ + sp_256_from_mp(u2, 4, r); + err = sp_256_mod_mul_norm_4(u2, u2, p256_mod); + } + + if (err == MP_OKAY) { + /* u1 = r.z'.z' mod prime */ + sp_256_mont_sqr_4(p1->z, p1->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(u1, u2, p1->z, p256_mod, p256_mp_mod); + *res = (int)(sp_256_cmp_4(p1->x, u1) == 0); + if (*res == 0) { + /* Reload r and add order. */ + sp_256_from_mp(u2, 4, r); + carry = sp_256_add_4(u2, u2, p256_order); + /* Carry means result is greater than mod and is not valid. */ + if (carry == 0) { + sp_256_norm_4(u2); + + /* Compare with mod and if greater or equal then not valid. */ + c = sp_256_cmp_4(u2, p256_mod); + if (c < 0) { + /* Convert to Montogomery form */ + err = sp_256_mod_mul_norm_4(u2, u2, p256_mod); + if (err == MP_OKAY) { + /* u1 = (r + 1*order).z'.z' mod prime */ + sp_256_mont_mul_4(u1, u2, p1->z, p256_mod, + p256_mp_mod); + *res = (int)(sp_256_cmp_4(p1->x, u1) == 0); + } + } + } + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) + XFREE(d, heap, DYNAMIC_TYPE_ECC); +#endif + sp_256_point_free_4(p1, 0, heap); + sp_256_point_free_4(p2, 0, heap); + + return err; +} +#endif /* HAVE_ECC_VERIFY */ + +#ifdef HAVE_ECC_CHECK_KEY +/* Check that the x and y oridinates are a valid point on the curve. + * + * point EC point. + * heap Heap to use if dynamically allocating. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +static int sp_256_ecc_is_point_4(sp_point_256* point, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit t1d[2*4]; + sp_digit t2d[2*4]; +#endif + sp_digit* t1; + sp_digit* t2; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 4, heap, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = d + 0 * 4; + t2 = d + 2 * 4; +#else + (void)heap; + + t1 = t1d; + t2 = t2d; +#endif + + sp_256_sqr_4(t1, point->y); + (void)sp_256_mod_4(t1, t1, p256_mod); + sp_256_sqr_4(t2, point->x); + (void)sp_256_mod_4(t2, t2, p256_mod); + sp_256_mul_4(t2, t2, point->x); + (void)sp_256_mod_4(t2, t2, p256_mod); + (void)sp_256_sub_4(t2, p256_mod, t2); + sp_256_mont_add_4(t1, t1, t2, p256_mod); + + sp_256_mont_add_4(t1, t1, point->x, p256_mod); + sp_256_mont_add_4(t1, t1, point->x, p256_mod); + sp_256_mont_add_4(t1, t1, point->x, p256_mod); + + if (sp_256_cmp_4(t1, p256_b) != 0) { + err = MP_VAL; + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, heap, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + +/* Check that the x and y oridinates are a valid point on the curve. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +int sp_ecc_is_point_256(mp_int* pX, mp_int* pY) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 pubd; +#endif + sp_point_256* pub; + byte one[1] = { 1 }; + int err; + + err = sp_256_point_new_4(NULL, pubd, pub); + if (err == MP_OKAY) { + sp_256_from_mp(pub->x, 4, pX); + sp_256_from_mp(pub->y, 4, pY); + sp_256_from_bin(pub->z, 4, one, (int)sizeof(one)); + + err = sp_256_ecc_is_point_4(pub, NULL); + } + + sp_256_point_free_4(pub, 0, NULL); + + return err; +} + +/* Check that the private scalar generates the EC point (px, py), the point is + * on the curve and the point has the correct order. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * privm Private scalar that generates EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve, ECC_INF_E if the point does not have the correct order, + * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and + * MP_OKAY otherwise. + */ +int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit privd[4]; + sp_point_256 pubd; + sp_point_256 pd; +#endif + sp_digit* priv = NULL; + sp_point_256* pub; + sp_point_256* p = NULL; + byte one[1] = { 1 }; + int err; + + err = sp_256_point_new_4(heap, pubd, pub); + if (err == MP_OKAY) { + err = sp_256_point_new_4(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4, heap, + DYNAMIC_TYPE_ECC); + if (priv == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + priv = privd; +#endif + + sp_256_from_mp(pub->x, 4, pX); + sp_256_from_mp(pub->y, 4, pY); + sp_256_from_bin(pub->z, 4, one, (int)sizeof(one)); + sp_256_from_mp(priv, 4, privm); + + /* Check point at infinitiy. */ + if ((sp_256_iszero_4(pub->x) != 0) && + (sp_256_iszero_4(pub->y) != 0)) { + err = ECC_INF_E; + } + } + + if (err == MP_OKAY) { + /* Check range of X and Y */ + if (sp_256_cmp_4(pub->x, p256_mod) >= 0 || + sp_256_cmp_4(pub->y, p256_mod) >= 0) { + err = ECC_OUT_OF_RANGE_E; + } + } + + if (err == MP_OKAY) { + /* Check point is on curve */ + err = sp_256_ecc_is_point_4(pub, heap); + } + + if (err == MP_OKAY) { + /* Point * order = infinity */ + err = sp_256_ecc_mulmod_4(p, pub, p256_order, 1, heap); + } + if (err == MP_OKAY) { + /* Check result is infinity */ + if ((sp_256_iszero_4(p->x) == 0) || + (sp_256_iszero_4(p->y) == 0)) { + err = ECC_INF_E; + } + } + + if (err == MP_OKAY) { + /* Base * private = point */ + err = sp_256_ecc_mulmod_base_4(p, priv, 1, heap); + } + if (err == MP_OKAY) { + /* Check result is public key */ + if (sp_256_cmp_4(p->x, pub->x) != 0 || + sp_256_cmp_4(p->y, pub->y) != 0) { + err = ECC_PRIV_KEY_E; + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (priv != NULL) { + XFREE(priv, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_4(p, 0, heap); + sp_256_point_free_4(pub, 0, heap); + + return err; +} +#endif +#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL +/* Add two projective EC points together. + * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ) + * + * pX First EC point's X ordinate. + * pY First EC point's Y ordinate. + * pZ First EC point's Z ordinate. + * qX Second EC point's X ordinate. + * qY Second EC point's Y ordinate. + * qZ Second EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* qX, mp_int* qY, mp_int* qZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 4 * 5]; + sp_point_256 pd; + sp_point_256 qd; +#endif + sp_digit* tmp; + sp_point_256* p; + sp_point_256* q = NULL; + int err; + + err = sp_256_point_new_4(NULL, pd, p); + if (err == MP_OKAY) { + err = sp_256_point_new_4(NULL, qd, q); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 5, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + sp_256_from_mp(p->x, 4, pX); + sp_256_from_mp(p->y, 4, pY); + sp_256_from_mp(p->z, 4, pZ); + sp_256_from_mp(q->x, 4, qX); + sp_256_from_mp(q->y, 4, qY); + sp_256_from_mp(q->z, 4, qZ); + + sp_256_proj_point_add_4(p, p, q, tmp); + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->z, rZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_4(q, 0, NULL); + sp_256_point_free_4(p, 0, NULL); + + return err; +} + +/* Double a projective EC point. + * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ) + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 4 * 2]; + sp_point_256 pd; +#endif + sp_digit* tmp; + sp_point_256* p; + int err; + + err = sp_256_point_new_4(NULL, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 2, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + sp_256_from_mp(p->x, 4, pX); + sp_256_from_mp(p->y, 4, pY); + sp_256_from_mp(p->z, 4, pZ); + + sp_256_proj_point_dbl_4(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->z, rZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_4(p, 0, NULL); + + return err; +} + +/* Map a projective EC point to affine in place. + * pZ will be one. + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 4 * 4]; + sp_point_256 pd; +#endif + sp_digit* tmp; + sp_point_256* p; + int err; + + err = sp_256_point_new_4(NULL, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 4, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + if (err == MP_OKAY) { + sp_256_from_mp(p->x, 4, pX); + sp_256_from_mp(p->y, 4, pY); + sp_256_from_mp(p->z, 4, pZ); + + sp_256_map_4(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(p->x, pX); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->y, pY); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->z, pZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_4(p, 0, NULL); + + return err; +} +#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */ +#ifdef HAVE_COMP_KEY +/* Find the square root of a number mod the prime of the curve. + * + * y The number to operate on and the result. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +static int sp_256_mont_sqrt_4(sp_digit* y) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d; +#else + sp_digit t1d[2 * 4]; + sp_digit t2d[2 * 4]; +#endif + sp_digit* t1; + sp_digit* t2; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 4, NULL, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = d + 0 * 4; + t2 = d + 2 * 4; +#else + t1 = t1d; + t2 = t2d; +#endif + + { + /* t2 = y ^ 0x2 */ + sp_256_mont_sqr_4(t2, y, p256_mod, p256_mp_mod); + /* t1 = y ^ 0x3 */ + sp_256_mont_mul_4(t1, t2, y, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xc */ + sp_256_mont_sqr_n_4(t2, t1, 2, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xf */ + sp_256_mont_mul_4(t1, t1, t2, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xf0 */ + sp_256_mont_sqr_n_4(t2, t1, 4, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xff */ + sp_256_mont_mul_4(t1, t1, t2, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xff00 */ + sp_256_mont_sqr_n_4(t2, t1, 8, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffff */ + sp_256_mont_mul_4(t1, t1, t2, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xffff0000 */ + sp_256_mont_sqr_n_4(t2, t1, 16, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff */ + sp_256_mont_mul_4(t1, t1, t2, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000000 */ + sp_256_mont_sqr_n_4(t1, t1, 32, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000001 */ + sp_256_mont_mul_4(t1, t1, y, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */ + sp_256_mont_sqr_n_4(t1, t1, 96, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */ + sp_256_mont_mul_4(t1, t1, y, p256_mod, p256_mp_mod); + sp_256_mont_sqr_n_4(y, t1, 94, p256_mod, p256_mp_mod); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + + +/* Uncompress the point given the X ordinate. + * + * xm X ordinate. + * odd Whether the Y ordinate is odd. + * ym Calculated Y ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d; +#else + sp_digit xd[2 * 4]; + sp_digit yd[2 * 4]; +#endif + sp_digit* x = NULL; + sp_digit* y = NULL; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 4, NULL, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + x = d + 0 * 4; + y = d + 2 * 4; +#else + x = xd; + y = yd; +#endif + + sp_256_from_mp(x, 4, xm); + err = sp_256_mod_mul_norm_4(x, x, p256_mod); + } + if (err == MP_OKAY) { + /* y = x^3 */ + { + sp_256_mont_sqr_4(y, x, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(y, y, x, p256_mod, p256_mp_mod); + } + /* y = x^3 - 3x */ + sp_256_mont_sub_4(y, y, x, p256_mod); + sp_256_mont_sub_4(y, y, x, p256_mod); + sp_256_mont_sub_4(y, y, x, p256_mod); + /* y = x^3 - 3x + b */ + err = sp_256_mod_mul_norm_4(x, p256_b, p256_mod); + } + if (err == MP_OKAY) { + sp_256_mont_add_4(y, y, x, p256_mod); + /* y = sqrt(x^3 - 3x + b) */ + err = sp_256_mont_sqrt_4(y); + } + if (err == MP_OKAY) { + XMEMSET(y + 4, 0, 4U * sizeof(sp_digit)); + sp_256_mont_reduce_4(y, p256_mod, p256_mp_mod); + if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) { + sp_256_mont_sub_4(y, p256_mod, y, p256_mod); + } + + err = sp_256_to_mp(y, ym); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} +#endif +#endif /* !WOLFSSL_SP_NO_256 */ +#ifdef WOLFSSL_SP_384 + +/* Point structure to use. */ +typedef struct sp_point_384 { + sp_digit x[2 * 6]; + sp_digit y[2 * 6]; + sp_digit z[2 * 6]; + int infinity; +} sp_point_384; + +/* The modulus (prime) of the curve P384. */ +static const sp_digit p384_mod[6] = { + 0x00000000ffffffffL,0xffffffff00000000L,0xfffffffffffffffeL, + 0xffffffffffffffffL,0xffffffffffffffffL,0xffffffffffffffffL +}; +/* The Montogmery normalizer for modulus of the curve P384. */ +static const sp_digit p384_norm_mod[6] = { + 0xffffffff00000001L,0x00000000ffffffffL,0x0000000000000001L, + 0x0000000000000000L,0x0000000000000000L,0x0000000000000000L +}; +/* The Montogmery multiplier for modulus of the curve P384. */ +static sp_digit p384_mp_mod = 0x0000000100000001; +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +/* The order of the curve P384. */ +static const sp_digit p384_order[6] = { + 0xecec196accc52973L,0x581a0db248b0a77aL,0xc7634d81f4372ddfL, + 0xffffffffffffffffL,0xffffffffffffffffL,0xffffffffffffffffL +}; +#endif +/* The order of the curve P384 minus 2. */ +static const sp_digit p384_order2[6] = { + 0xecec196accc52971L,0x581a0db248b0a77aL,0xc7634d81f4372ddfL, + 0xffffffffffffffffL,0xffffffffffffffffL,0xffffffffffffffffL +}; +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montogmery normalizer for order of the curve P384. */ +static const sp_digit p384_norm_order[6] = { + 0x1313e695333ad68dL,0xa7e5f24db74f5885L,0x389cb27e0bc8d220L, + 0x0000000000000000L,0x0000000000000000L,0x0000000000000000L +}; +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montogmery multiplier for order of the curve P384. */ +static sp_digit p384_mp_order = 0x6ed46089e88fdc45l; +#endif +/* The base point of curve P384. */ +static const sp_point_384 p384_base = { + /* X ordinate */ + { + 0x3a545e3872760ab7L,0x5502f25dbf55296cL,0x59f741e082542a38L, + 0x6e1d3b628ba79b98L,0x8eb1c71ef320ad74L,0xaa87ca22be8b0537L, + 0L, 0L, 0L, 0L, 0L, 0L + }, + /* Y ordinate */ + { + 0x7a431d7c90ea0e5fL,0x0a60b1ce1d7e819dL,0xe9da3113b5f0b8c0L, + 0xf8f41dbd289a147cL,0x5d9e98bf9292dc29L,0x3617de4a96262c6fL, + 0L, 0L, 0L, 0L, 0L, 0L + }, + /* Z ordinate */ + { + 0x0000000000000001L,0x0000000000000000L,0x0000000000000000L, + 0x0000000000000000L,0x0000000000000000L,0x0000000000000000L, + 0L, 0L, 0L, 0L, 0L, 0L + }, + /* infinity */ + 0 +}; +#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY) +static const sp_digit p384_b[6] = { + 0x2a85c8edd3ec2aefL,0xc656398d8a2ed19dL,0x0314088f5013875aL, + 0x181d9c6efe814112L,0x988e056be3f82d19L,0xb3312fa7e23ee7e4L +}; +#endif + +static int sp_384_point_new_ex_6(void* heap, sp_point_384* sp, sp_point_384** p) +{ + int ret = MP_OKAY; + (void)heap; +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + (void)sp; + *p = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC); +#else + *p = sp; +#endif + if (*p == NULL) { + ret = MEMORY_E; + } + return ret; +} + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +/* Allocate memory for point and return error. */ +#define sp_384_point_new_6(heap, sp, p) sp_384_point_new_ex_6((heap), NULL, &(p)) +#else +/* Set pointer to data and return no error. */ +#define sp_384_point_new_6(heap, sp, p) sp_384_point_new_ex_6((heap), &(sp), &(p)) +#endif + + +static void sp_384_point_free_6(sp_point_384* p, int clear, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +/* If valid pointer then clear point data if requested and free data. */ + if (p != NULL) { + if (clear != 0) { + XMEMSET(p, 0, sizeof(*p)); + } + XFREE(p, heap, DYNAMIC_TYPE_ECC); + } +#else +/* Clear point data if requested. */ + if (clear != 0) { + XMEMSET(p, 0, sizeof(*p)); + } +#endif + (void)heap; +} + +/* Multiply a number by Montogmery normalizer mod modulus (prime). + * + * r The resulting Montgomery form number. + * a The number to convert. + * m The modulus (prime). + * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise. + */ +static int sp_384_mod_mul_norm_6(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + int64_t* td; +#else + int64_t td[12]; + int64_t a32d[12]; +#endif + int64_t* t; + int64_t* a32; + int64_t o; + int err = MP_OKAY; + + (void)m; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + td = (int64_t*)XMALLOC(sizeof(int64_t) * 2 * 12, NULL, DYNAMIC_TYPE_ECC); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = td; + a32 = td + 12; +#else + t = td; + a32 = a32d; +#endif + + a32[0] = a[0] & 0xffffffff; + a32[1] = a[0] >> 32; + a32[2] = a[1] & 0xffffffff; + a32[3] = a[1] >> 32; + a32[4] = a[2] & 0xffffffff; + a32[5] = a[2] >> 32; + a32[6] = a[3] & 0xffffffff; + a32[7] = a[3] >> 32; + a32[8] = a[4] & 0xffffffff; + a32[9] = a[4] >> 32; + a32[10] = a[5] & 0xffffffff; + a32[11] = a[5] >> 32; + + /* 1 0 0 0 0 0 0 0 1 1 0 -1 */ + t[0] = 0 + a32[0] + a32[8] + a32[9] - a32[11]; + /* -1 1 0 0 0 0 0 0 -1 0 1 1 */ + t[1] = 0 - a32[0] + a32[1] - a32[8] + a32[10] + a32[11]; + /* 0 -1 1 0 0 0 0 0 0 -1 0 1 */ + t[2] = 0 - a32[1] + a32[2] - a32[9] + a32[11]; + /* 1 0 -1 1 0 0 0 0 1 1 -1 -1 */ + t[3] = 0 + a32[0] - a32[2] + a32[3] + a32[8] + a32[9] - a32[10] - a32[11]; + /* 1 1 0 -1 1 0 0 0 1 2 1 -2 */ + t[4] = 0 + a32[0] + a32[1] - a32[3] + a32[4] + a32[8] + 2 * a32[9] + a32[10] - 2 * a32[11]; + /* 0 1 1 0 -1 1 0 0 0 1 2 1 */ + t[5] = 0 + a32[1] + a32[2] - a32[4] + a32[5] + a32[9] + 2 * a32[10] + a32[11]; + /* 0 0 1 1 0 -1 1 0 0 0 1 2 */ + t[6] = 0 + a32[2] + a32[3] - a32[5] + a32[6] + a32[10] + 2 * a32[11]; + /* 0 0 0 1 1 0 -1 1 0 0 0 1 */ + t[7] = 0 + a32[3] + a32[4] - a32[6] + a32[7] + a32[11]; + /* 0 0 0 0 1 1 0 -1 1 0 0 0 */ + t[8] = 0 + a32[4] + a32[5] - a32[7] + a32[8]; + /* 0 0 0 0 0 1 1 0 -1 1 0 0 */ + t[9] = 0 + a32[5] + a32[6] - a32[8] + a32[9]; + /* 0 0 0 0 0 0 1 1 0 -1 1 0 */ + t[10] = 0 + a32[6] + a32[7] - a32[9] + a32[10]; + /* 0 0 0 0 0 0 0 1 1 0 -1 1 */ + t[11] = 0 + a32[7] + a32[8] - a32[10] + a32[11]; + + t[1] += t[0] >> 32; t[0] &= 0xffffffff; + t[2] += t[1] >> 32; t[1] &= 0xffffffff; + t[3] += t[2] >> 32; t[2] &= 0xffffffff; + t[4] += t[3] >> 32; t[3] &= 0xffffffff; + t[5] += t[4] >> 32; t[4] &= 0xffffffff; + t[6] += t[5] >> 32; t[5] &= 0xffffffff; + t[7] += t[6] >> 32; t[6] &= 0xffffffff; + t[8] += t[7] >> 32; t[7] &= 0xffffffff; + t[9] += t[8] >> 32; t[8] &= 0xffffffff; + t[10] += t[9] >> 32; t[9] &= 0xffffffff; + t[11] += t[10] >> 32; t[10] &= 0xffffffff; + o = t[11] >> 32; t[11] &= 0xffffffff; + t[0] += o; + t[1] -= o; + t[3] += o; + t[4] += o; + t[1] += t[0] >> 32; t[0] &= 0xffffffff; + t[2] += t[1] >> 32; t[1] &= 0xffffffff; + t[3] += t[2] >> 32; t[2] &= 0xffffffff; + t[4] += t[3] >> 32; t[3] &= 0xffffffff; + t[5] += t[4] >> 32; t[4] &= 0xffffffff; + t[6] += t[5] >> 32; t[5] &= 0xffffffff; + t[7] += t[6] >> 32; t[6] &= 0xffffffff; + t[8] += t[7] >> 32; t[7] &= 0xffffffff; + t[9] += t[8] >> 32; t[8] &= 0xffffffff; + t[10] += t[9] >> 32; t[9] &= 0xffffffff; + t[11] += t[10] >> 32; t[10] &= 0xffffffff; + + r[0] = (t[1] << 32) | t[0]; + r[1] = (t[3] << 32) | t[2]; + r[2] = (t[5] << 32) | t[4]; + r[3] = (t[7] << 32) | t[6]; + r[4] = (t[9] << 32) | t[8]; + r[5] = (t[11] << 32) | t[10]; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (td != NULL) + XFREE(td, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_384_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 64 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 64 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0xffffffffffffffffl; + s = 64U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 64U) <= (word32)DIGIT_BIT) { + s += 64U; + r[j] &= 0xffffffffffffffffl; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 64) { + r[j] &= 0xffffffffffffffffl; + if (j + 1 >= size) { + break; + } + s = 64 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Convert a point of type ecc_point to type sp_point_384. + * + * p Point of type sp_point_384 (result). + * pm Point of type ecc_point. + */ +static void sp_384_point_from_ecc_point_6(sp_point_384* p, const ecc_point* pm) +{ + XMEMSET(p->x, 0, sizeof(p->x)); + XMEMSET(p->y, 0, sizeof(p->y)); + XMEMSET(p->z, 0, sizeof(p->z)); + sp_384_from_mp(p->x, 6, pm->x); + sp_384_from_mp(p->y, 6, pm->y); + sp_384_from_mp(p->z, 6, pm->z); + p->infinity = 0; +} + +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_384_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (384 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 64 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 6); + r->used = 6; + mp_clamp(r); +#elif DIGIT_BIT < 64 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 6; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 64) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 64 - s; + } + r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 6; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 64 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 64 - s; + } + else { + s += 64; + } + } + r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Convert a point of type sp_point_384 to type ecc_point. + * + * p Point of type sp_point_384. + * pm Point of type ecc_point (result). + * returns MEMORY_E when allocation of memory in ecc_point fails otherwise + * MP_OKAY. + */ +static int sp_384_point_to_ecc_point_6(const sp_point_384* p, ecc_point* pm) +{ + int err; + + err = sp_384_to_mp(p->x, pm->x); + if (err == MP_OKAY) { + err = sp_384_to_mp(p->y, pm->y); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->z, pm->z); + } + + return err; +} + +/* Conditionally copy a into r using the mask m. + * m is -1 to copy and 0 when not. + * + * r A single precision number to copy over. + * a A single precision number to copy. + * m Mask value to apply. + */ +static void sp_384_cond_copy_6(sp_digit* r, const sp_digit* a, sp_digit m) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[r], 0]\n\t" + "ldp x5, x6, [%[r], 16]\n\t" + "ldp x7, x8, [%[r], 32]\n\t" + "ldp x9, x10, [%[a], 0]\n\t" + "ldp x11, x12, [%[a], 16]\n\t" + "ldp x13, x14, [%[a], 32]\n\t" + "eor x9, x9, x3\n\t" + "eor x10, x10, x4\n\t" + "eor x11, x11, x5\n\t" + "eor x12, x12, x6\n\t" + "eor x13, x13, x7\n\t" + "eor x14, x14, x8\n\t" + "and x9, x9, %[m]\n\t" + "and x10, x10, %[m]\n\t" + "and x11, x11, %[m]\n\t" + "and x12, x12, %[m]\n\t" + "and x13, x13, %[m]\n\t" + "and x14, x14, %[m]\n\t" + "eor x3, x3, x9\n\t" + "eor x4, x4, x10\n\t" + "eor x5, x5, x11\n\t" + "eor x6, x6, x12\n\t" + "eor x7, x7, x13\n\t" + "eor x8, x8, x14\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "stp x7, x8, [%[r], 32]\n\t" + : + : [r] "r" (r), [a] "r" (a), [m] "r" (m) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14" + ); +} + +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_384_mul_6(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_digit tmp[12]; + + __asm__ __volatile__ ( + "mov x5, 0\n\t" + "mov x6, 0\n\t" + "mov x7, 0\n\t" + "mov x8, 0\n\t" + "\n1:\n\t" + "subs x3, x5, 40\n\t" + "csel x3, xzr, x3, cc\n\t" + "sub x4, x5, x3\n\t" + "\n2:\n\t" + "ldr x10, [%[a], x3]\n\t" + "ldr x11, [%[b], x4]\n\t" + "mul x9, x10, x11\n\t" + "umulh x10, x10, x11\n\t" + "adds x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x8, x8, xzr\n\t" + "add x3, x3, #8\n\t" + "sub x4, x4, #8\n\t" + "cmp x3, 48\n\t" + "b.eq 3f\n\t" + "cmp x3, x5\n\t" + "b.le 2b\n\t" + "\n3:\n\t" + "str x6, [%[r], x5]\n\t" + "mov x6, x7\n\t" + "mov x7, x8\n\t" + "mov x8, #0\n\t" + "add x5, x5, #8\n\t" + "cmp x5, 80\n\t" + "b.le 1b\n\t" + "str x6, [%[r], x5]\n\t" + : + : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +#else +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_384_mul_6(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_digit tmp[6]; + + __asm__ __volatile__ ( + "ldp x9, x10, [%[a], 0]\n\t" + "ldp x11, x12, [%[a], 16]\n\t" + "ldp x13, x14, [%[a], 32]\n\t" + "ldp x15, x16, [%[b], 0]\n\t" + "ldp x17, x19, [%[b], 16]\n\t" + "ldp x20, x21, [%[b], 32]\n\t" + "# A[0] * B[0]\n\t" + "mul x4, x9, x15\n\t" + "umulh x5, x9, x15\n\t" + "str x4, [%[tmp]]\n\t" + "# A[0] * B[1]\n\t" + "mul x7, x9, x16\n\t" + "umulh x8, x9, x16\n\t" + "adds x5, x5, x7\n\t" + "# A[1] * B[0]\n\t" + "mul x7, x10, x15\n\t" + "adc x6, xzr, x8\n\t" + "umulh x8, x10, x15\n\t" + "adds x5, x5, x7\n\t" + "adcs x6, x6, x8\n\t" + "str x5, [%[tmp], 8]\n\t" + "adc x4, xzr, xzr\n\t" + "# A[0] * B[2]\n\t" + "mul x7, x9, x17\n\t" + "umulh x8, x9, x17\n\t" + "adds x6, x6, x7\n\t" + "# A[1] * B[1]\n\t" + "mul x7, x10, x16\n\t" + "adcs x4, x4, x8\n\t" + "umulh x8, x10, x16\n\t" + "adc x5, xzr, xzr\n\t" + "adds x6, x6, x7\n\t" + "# A[2] * B[0]\n\t" + "mul x7, x11, x15\n\t" + "adcs x4, x4, x8\n\t" + "umulh x8, x11, x15\n\t" + "adc x5, x5, xzr\n\t" + "adds x6, x6, x7\n\t" + "adcs x4, x4, x8\n\t" + "str x6, [%[tmp], 16]\n\t" + "adc x5, x5, xzr\n\t" + "# A[0] * B[3]\n\t" + "mul x7, x9, x19\n\t" + "umulh x8, x9, x19\n\t" + "adds x4, x4, x7\n\t" + "# A[1] * B[2]\n\t" + "mul x7, x10, x17\n\t" + "adcs x5, x5, x8\n\t" + "umulh x8, x10, x17\n\t" + "adc x6, xzr, xzr\n\t" + "adds x4, x4, x7\n\t" + "# A[2] * B[1]\n\t" + "mul x7, x11, x16\n\t" + "adcs x5, x5, x8\n\t" + "umulh x8, x11, x16\n\t" + "adc x6, x6, xzr\n\t" + "adds x4, x4, x7\n\t" + "# A[3] * B[0]\n\t" + "mul x7, x12, x15\n\t" + "adcs x5, x5, x8\n\t" + "umulh x8, x12, x15\n\t" + "adc x6, x6, xzr\n\t" + "adds x4, x4, x7\n\t" + "adcs x5, x5, x8\n\t" + "str x4, [%[tmp], 24]\n\t" + "adc x6, x6, xzr\n\t" + "# A[0] * B[4]\n\t" + "mul x7, x9, x20\n\t" + "umulh x8, x9, x20\n\t" + "adds x5, x5, x7\n\t" + "# A[1] * B[3]\n\t" + "mul x7, x10, x19\n\t" + "adcs x6, x6, x8\n\t" + "umulh x8, x10, x19\n\t" + "adc x4, xzr, xzr\n\t" + "adds x5, x5, x7\n\t" + "# A[2] * B[2]\n\t" + "mul x7, x11, x17\n\t" + "adcs x6, x6, x8\n\t" + "umulh x8, x11, x17\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x7\n\t" + "# A[3] * B[1]\n\t" + "mul x7, x12, x16\n\t" + "adcs x6, x6, x8\n\t" + "umulh x8, x12, x16\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x7\n\t" + "# A[4] * B[0]\n\t" + "mul x7, x13, x15\n\t" + "adcs x6, x6, x8\n\t" + "umulh x8, x13, x15\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x7\n\t" + "adcs x6, x6, x8\n\t" + "str x5, [%[tmp], 32]\n\t" + "adc x4, x4, xzr\n\t" + "# A[0] * B[5]\n\t" + "mul x7, x9, x21\n\t" + "umulh x8, x9, x21\n\t" + "adds x6, x6, x7\n\t" + "# A[1] * B[4]\n\t" + "mul x7, x10, x20\n\t" + "adcs x4, x4, x8\n\t" + "umulh x8, x10, x20\n\t" + "adc x5, xzr, xzr\n\t" + "adds x6, x6, x7\n\t" + "# A[2] * B[3]\n\t" + "mul x7, x11, x19\n\t" + "adcs x4, x4, x8\n\t" + "umulh x8, x11, x19\n\t" + "adc x5, x5, xzr\n\t" + "adds x6, x6, x7\n\t" + "# A[3] * B[2]\n\t" + "mul x7, x12, x17\n\t" + "adcs x4, x4, x8\n\t" + "umulh x8, x12, x17\n\t" + "adc x5, x5, xzr\n\t" + "adds x6, x6, x7\n\t" + "# A[4] * B[1]\n\t" + "mul x7, x13, x16\n\t" + "adcs x4, x4, x8\n\t" + "umulh x8, x13, x16\n\t" + "adc x5, x5, xzr\n\t" + "adds x6, x6, x7\n\t" + "# A[5] * B[0]\n\t" + "mul x7, x14, x15\n\t" + "adcs x4, x4, x8\n\t" + "umulh x8, x14, x15\n\t" + "adc x5, x5, xzr\n\t" + "adds x6, x6, x7\n\t" + "adcs x4, x4, x8\n\t" + "str x6, [%[tmp], 40]\n\t" + "adc x5, x5, xzr\n\t" + "# A[1] * B[5]\n\t" + "mul x7, x10, x21\n\t" + "umulh x8, x10, x21\n\t" + "adds x4, x4, x7\n\t" + "# A[2] * B[4]\n\t" + "mul x7, x11, x20\n\t" + "adcs x5, x5, x8\n\t" + "umulh x8, x11, x20\n\t" + "adc x6, xzr, xzr\n\t" + "adds x4, x4, x7\n\t" + "# A[3] * B[3]\n\t" + "mul x7, x12, x19\n\t" + "adcs x5, x5, x8\n\t" + "umulh x8, x12, x19\n\t" + "adc x6, x6, xzr\n\t" + "adds x4, x4, x7\n\t" + "# A[4] * B[2]\n\t" + "mul x7, x13, x17\n\t" + "adcs x5, x5, x8\n\t" + "umulh x8, x13, x17\n\t" + "adc x6, x6, xzr\n\t" + "adds x4, x4, x7\n\t" + "# A[5] * B[1]\n\t" + "mul x7, x14, x16\n\t" + "adcs x5, x5, x8\n\t" + "umulh x8, x14, x16\n\t" + "adc x6, x6, xzr\n\t" + "adds x4, x4, x7\n\t" + "adcs x5, x5, x8\n\t" + "str x4, [%[r], 48]\n\t" + "adc x6, x6, xzr\n\t" + "# A[2] * B[5]\n\t" + "mul x7, x11, x21\n\t" + "umulh x8, x11, x21\n\t" + "adds x5, x5, x7\n\t" + "# A[3] * B[4]\n\t" + "mul x7, x12, x20\n\t" + "adcs x6, x6, x8\n\t" + "umulh x8, x12, x20\n\t" + "adc x4, xzr, xzr\n\t" + "adds x5, x5, x7\n\t" + "# A[4] * B[3]\n\t" + "mul x7, x13, x19\n\t" + "adcs x6, x6, x8\n\t" + "umulh x8, x13, x19\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x7\n\t" + "# A[5] * B[2]\n\t" + "mul x7, x14, x17\n\t" + "adcs x6, x6, x8\n\t" + "umulh x8, x14, x17\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x7\n\t" + "adcs x6, x6, x8\n\t" + "str x5, [%[r], 56]\n\t" + "adc x4, x4, xzr\n\t" + "# A[3] * B[5]\n\t" + "mul x7, x12, x21\n\t" + "umulh x8, x12, x21\n\t" + "adds x6, x6, x7\n\t" + "# A[4] * B[4]\n\t" + "mul x7, x13, x20\n\t" + "adcs x4, x4, x8\n\t" + "umulh x8, x13, x20\n\t" + "adc x5, xzr, xzr\n\t" + "adds x6, x6, x7\n\t" + "# A[5] * B[3]\n\t" + "mul x7, x14, x19\n\t" + "adcs x4, x4, x8\n\t" + "umulh x8, x14, x19\n\t" + "adc x5, x5, xzr\n\t" + "adds x6, x6, x7\n\t" + "adcs x4, x4, x8\n\t" + "str x6, [%[r], 64]\n\t" + "adc x5, x5, xzr\n\t" + "# A[4] * B[5]\n\t" + "mul x7, x13, x21\n\t" + "umulh x8, x13, x21\n\t" + "adds x4, x4, x7\n\t" + "# A[5] * B[4]\n\t" + "mul x7, x14, x20\n\t" + "adcs x5, x5, x8\n\t" + "umulh x8, x14, x20\n\t" + "adc x6, xzr, xzr\n\t" + "adds x4, x4, x7\n\t" + "adcs x5, x5, x8\n\t" + "str x4, [%[r], 72]\n\t" + "adc x6, x6, xzr\n\t" + "# A[5] * B[5]\n\t" + "mul x7, x14, x21\n\t" + "umulh x8, x14, x21\n\t" + "adds x5, x5, x7\n\t" + "adc x6, x6, x8\n\t" + "stp x5, x6, [%[r], 80]\n\t" + "ldp x9, x10, [%[tmp], 0]\n\t" + "ldp x11, x12, [%[tmp], 16]\n\t" + "ldp x13, x14, [%[tmp], 32]\n\t" + "stp x9, x10, [%[r], 0]\n\t" + "stp x11, x12, [%[r], 16]\n\t" + "stp x13, x14, [%[r], 32]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp) + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_384_cond_sub_6(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + __asm__ __volatile__ ( + + "ldp x5, x7, [%[b], 0]\n\t" + "ldp x11, x12, [%[b], 16]\n\t" + "ldp x4, x6, [%[a], 0]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 16]\n\t" + "and x7, x7, %[m]\n\t" + "subs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 0]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 16]\n\t" + "ldp x5, x7, [%[b], 32]\n\t" + "ldp x4, x6, [%[a], 32]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "sbcs x6, x6, x7\n\t" + "stp x4, x6, [%[r], 32]\n\t" + "csetm %[r], cc\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + ); + + return (sp_digit)r; +} + +#define sp_384_mont_reduce_order_6 sp_384_mont_reduce_6 + +/* Reduce the number back to 384 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_384_mont_reduce_6(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_digit ca = 0; + + __asm__ __volatile__ ( + "ldp x14, x15, [%[m], 0]\n\t" + "ldp x16, x17, [%[m], 16]\n\t" + "ldp x19, x20, [%[m], 32]\n\t" + "# i = 6\n\t" + "mov x4, 6\n\t" + "ldp x12, x13, [%[a], 0]\n\t" + "\n1:\n\t" + "# mu = a[i] * mp\n\t" + "mul x9, %[mp], x12\n\t" + "# a[i+0] += m[0] * mu\n\t" + "mul x7, x14, x9\n\t" + "umulh x8, x14, x9\n\t" + "adds x12, x12, x7\n\t" + "# a[i+1] += m[1] * mu\n\t" + "mul x7, x15, x9\n\t" + "adc x6, x8, xzr\n\t" + "umulh x8, x15, x9\n\t" + "adds x12, x13, x7\n\t" + "# a[i+2] += m[2] * mu\n\t" + "ldr x13, [%[a], 16]\n\t" + "adc x5, x8, xzr\n\t" + "mul x7, x16, x9\n\t" + "adds x12, x12, x6\n\t" + "umulh x8, x16, x9\n\t" + "adc x5, x5, xzr\n\t" + "adds x13, x13, x7\n\t" + "# a[i+3] += m[3] * mu\n\t" + "ldr x10, [%[a], 24]\n\t" + "adc x6, x8, xzr\n\t" + "mul x7, x17, x9\n\t" + "adds x13, x13, x5\n\t" + "umulh x8, x17, x9\n\t" + "adc x6, x6, xzr\n\t" + "adds x10, x10, x7\n\t" + "# a[i+4] += m[4] * mu\n\t" + "ldr x11, [%[a], 32]\n\t" + "adc x5, x8, xzr\n\t" + "adds x10, x10, x6\n\t" + "mul x7, x19, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x19, x9\n\t" + "str x10, [%[a], 24]\n\t" + "adds x11, x11, x7\n\t" + "# a[i+5] += m[5] * mu\n\t" + "ldr x10, [%[a], 40]\n\t" + "adc x6, x8, xzr\n\t" + "adds x11, x11, x5\n\t" + "mul x7, x20, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x20, x9\n\t" + "adds x6, x6, x7\n\t" + "adcs x8, x8, %[ca]\n\t" + "str x11, [%[a], 32]\n\t" + "cset %[ca], cs\n\t" + "adds x10, x10, x6\n\t" + "ldr x11, [%[a], 48]\n\t" + "str x10, [%[a], 40]\n\t" + "adcs x11, x11, x8\n\t" + "str x11, [%[a], 48]\n\t" + "adc %[ca], %[ca], xzr\n\t" + "subs x4, x4, 1\n\t" + "add %[a], %[a], 8\n\t" + "bne 1b\n\t" + "stp x12, x13, [%[a], 0]\n\t" + : [ca] "+r" (ca), [a] "+r" (a) + : [m] "r" (m), [mp] "r" (mp) + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20" + ); + + sp_384_cond_sub_6(a - 6, a, m, (sp_digit)0 - ca); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_384_mont_mul_6(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_384_mul_6(r, a, b); + sp_384_mont_reduce_6(r, m, mp); +} + +#ifdef WOLFSSL_SP_SMALL +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_384_sqr_6(sp_digit* r, const sp_digit* a) +{ + sp_digit tmp[12]; + + __asm__ __volatile__ ( + "mov x6, 0\n\t" + "mov x7, 0\n\t" + "mov x8, 0\n\t" + "mov x5, 0\n\t" + "\n1:\n\t" + "subs x3, x5, 40\n\t" + "csel x3, xzr, x3, cc\n\t" + "sub x4, x5, x3\n\t" + "\n2:\n\t" + "cmp x4, x3\n\t" + "b.eq 4f\n\t" + "ldr x10, [%[a], x3]\n\t" + "ldr x11, [%[a], x4]\n\t" + "mul x9, x10, x11\n\t" + "umulh x10, x10, x11\n\t" + "adds x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x8, x8, xzr\n\t" + "adds x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x8, x8, xzr\n\t" + "b.al 5f\n\t" + "\n4:\n\t" + "ldr x10, [%[a], x3]\n\t" + "mul x9, x10, x10\n\t" + "umulh x10, x10, x10\n\t" + "adds x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x8, x8, xzr\n\t" + "\n5:\n\t" + "add x3, x3, #8\n\t" + "sub x4, x4, #8\n\t" + "cmp x3, 48\n\t" + "b.eq 3f\n\t" + "cmp x3, x4\n\t" + "b.gt 3f\n\t" + "cmp x3, x5\n\t" + "b.le 2b\n\t" + "\n3:\n\t" + "str x6, [%[r], x5]\n\t" + "mov x6, x7\n\t" + "mov x7, x8\n\t" + "mov x8, #0\n\t" + "add x5, x5, #8\n\t" + "cmp x5, 80\n\t" + "b.le 1b\n\t" + "str x6, [%[r], x5]\n\t" + : + : [r] "r" (tmp), [a] "r" (a) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +#else +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_384_sqr_6(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "ldp x17, x19, [%[a], 0]\n\t" + "ldp x20, x21, [%[a], 16]\n\t" + "ldp x22, x23, [%[a], 32]\n\t" + "# A[0] * A[1]\n\t" + "mul x3, x17, x19\n\t" + "umulh x7, x17, x19\n\t" + "# A[0] * A[2]\n\t" + "mul x4, x17, x20\n\t" + "umulh x5, x17, x20\n\t" + "adds x7, x7, x4\n\t" + "# A[0] * A[3]\n\t" + "mul x4, x17, x21\n\t" + "adc x8, xzr, x5\n\t" + "umulh x5, x17, x21\n\t" + "adds x8, x8, x4\n\t" + "# A[1] * A[2]\n\t" + "mul x4, x19, x20\n\t" + "adc x9, xzr, x5\n\t" + "umulh x5, x19, x20\n\t" + "adds x8, x8, x4\n\t" + "# A[0] * A[4]\n\t" + "mul x4, x17, x22\n\t" + "adcs x9, x9, x5\n\t" + "umulh x5, x17, x22\n\t" + "adc x10, xzr, xzr\n\t" + "adds x9, x9, x4\n\t" + "# A[1] * A[3]\n\t" + "mul x4, x19, x21\n\t" + "adc x10, x10, x5\n\t" + "umulh x5, x19, x21\n\t" + "adds x9, x9, x4\n\t" + "# A[0] * A[5]\n\t" + "mul x4, x17, x23\n\t" + "adcs x10, x10, x5\n\t" + "umulh x5, x17, x23\n\t" + "adc x11, xzr, xzr\n\t" + "adds x10, x10, x4\n\t" + "# A[1] * A[4]\n\t" + "mul x4, x19, x22\n\t" + "adc x11, x11, x5\n\t" + "umulh x5, x19, x22\n\t" + "adds x10, x10, x4\n\t" + "# A[2] * A[3]\n\t" + "mul x4, x20, x21\n\t" + "adcs x11, x11, x5\n\t" + "umulh x5, x20, x21\n\t" + "adc x12, xzr, xzr\n\t" + "adds x10, x10, x4\n\t" + "# A[1] * A[5]\n\t" + "mul x4, x19, x23\n\t" + "adcs x11, x11, x5\n\t" + "umulh x5, x19, x23\n\t" + "adc x12, x12, xzr\n\t" + "adds x11, x11, x4\n\t" + "# A[2] * A[4]\n\t" + "mul x4, x20, x22\n\t" + "adcs x12, x12, x5\n\t" + "umulh x5, x20, x22\n\t" + "adc x13, xzr, xzr\n\t" + "adds x11, x11, x4\n\t" + "# A[2] * A[5]\n\t" + "mul x4, x20, x23\n\t" + "adcs x12, x12, x5\n\t" + "umulh x5, x20, x23\n\t" + "adc x13, x13, xzr\n\t" + "adds x12, x12, x4\n\t" + "# A[3] * A[4]\n\t" + "mul x4, x21, x22\n\t" + "adcs x13, x13, x5\n\t" + "umulh x5, x21, x22\n\t" + "adc x14, xzr, xzr\n\t" + "adds x12, x12, x4\n\t" + "# A[3] * A[5]\n\t" + "mul x4, x21, x23\n\t" + "adcs x13, x13, x5\n\t" + "umulh x5, x21, x23\n\t" + "adc x14, x14, xzr\n\t" + "adds x13, x13, x4\n\t" + "# A[4] * A[5]\n\t" + "mul x4, x22, x23\n\t" + "adcs x14, x14, x5\n\t" + "umulh x5, x22, x23\n\t" + "adc x15, xzr, xzr\n\t" + "adds x14, x14, x4\n\t" + "adc x15, x15, x5\n\t" + "# Double\n\t" + "adds x3, x3, x3\n\t" + "adcs x7, x7, x7\n\t" + "adcs x8, x8, x8\n\t" + "adcs x9, x9, x9\n\t" + "adcs x10, x10, x10\n\t" + "adcs x11, x11, x11\n\t" + "adcs x12, x12, x12\n\t" + "adcs x13, x13, x13\n\t" + "adcs x14, x14, x14\n\t" + "# A[0] * A[0]\n\t" + "mul x2, x17, x17\n\t" + "adcs x15, x15, x15\n\t" + "umulh x4, x17, x17\n\t" + "cset x16, cs\n\t" + "# A[1] * A[1]\n\t" + "mul x5, x19, x19\n\t" + "adds x3, x3, x4\n\t" + "umulh x6, x19, x19\n\t" + "adcs x7, x7, x5\n\t" + "# A[2] * A[2]\n\t" + "mul x4, x20, x20\n\t" + "adcs x8, x8, x6\n\t" + "umulh x5, x20, x20\n\t" + "adcs x9, x9, x4\n\t" + "# A[3] * A[3]\n\t" + "mul x6, x21, x21\n\t" + "adcs x10, x10, x5\n\t" + "umulh x4, x21, x21\n\t" + "adcs x11, x11, x6\n\t" + "# A[4] * A[4]\n\t" + "mul x5, x22, x22\n\t" + "adcs x12, x12, x4\n\t" + "umulh x6, x22, x22\n\t" + "adcs x13, x13, x5\n\t" + "# A[5] * A[5]\n\t" + "mul x4, x23, x23\n\t" + "adcs x14, x14, x6\n\t" + "umulh x5, x23, x23\n\t" + "adcs x15, x15, x4\n\t" + "stp x2, x3, [%[r], 0]\n\t" + "adc x16, x16, x5\n\t" + "stp x7, x8, [%[r], 16]\n\t" + "stp x9, x10, [%[r], 32]\n\t" + "stp x11, x12, [%[r], 48]\n\t" + "stp x13, x14, [%[r], 64]\n\t" + "stp x15, x16, [%[r], 80]\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "x4", "x5", "x6", "x2", "x3", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_384_mont_sqr_6(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_384_sqr_6(r, a); + sp_384_mont_reduce_6(r, m, mp); +} + +#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY) +/* Square the Montgomery form number a number of times. (r = a ^ n mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * n Number of times to square. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_384_mont_sqr_n_6(sp_digit* r, const sp_digit* a, int n, + const sp_digit* m, sp_digit mp) +{ + sp_384_mont_sqr_6(r, a, m, mp); + for (; n > 1; n--) { + sp_384_mont_sqr_6(r, r, m, mp); + } +} + +#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */ +#ifdef WOLFSSL_SP_SMALL +/* Mod-2 for the P384 curve. */ +static const uint64_t p384_mod_minus_2[6] = { + 0x00000000fffffffdU,0xffffffff00000000U,0xfffffffffffffffeU, + 0xffffffffffffffffU,0xffffffffffffffffU,0xffffffffffffffffU +}; +#endif /* !WOLFSSL_SP_SMALL */ + +/* Invert the number, in Montgomery form, modulo the modulus (prime) of the + * P384 curve. (r = 1 / a mod m) + * + * r Inverse result. + * a Number to invert. + * td Temporary data. + */ +static void sp_384_mont_inv_6(sp_digit* r, const sp_digit* a, sp_digit* td) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* t = td; + int i; + + XMEMCPY(t, a, sizeof(sp_digit) * 6); + for (i=382; i>=0; i--) { + sp_384_mont_sqr_6(t, t, p384_mod, p384_mp_mod); + if (p384_mod_minus_2[i / 64] & ((sp_digit)1 << (i % 64))) + sp_384_mont_mul_6(t, t, a, p384_mod, p384_mp_mod); + } + XMEMCPY(r, t, sizeof(sp_digit) * 6); +#else + sp_digit* t1 = td; + sp_digit* t2 = td + 2 * 6; + sp_digit* t3 = td + 4 * 6; + sp_digit* t4 = td + 6 * 6; + sp_digit* t5 = td + 8 * 6; + + /* 0x2 */ + sp_384_mont_sqr_6(t1, a, p384_mod, p384_mp_mod); + /* 0x3 */ + sp_384_mont_mul_6(t5, t1, a, p384_mod, p384_mp_mod); + /* 0xc */ + sp_384_mont_sqr_n_6(t1, t5, 2, p384_mod, p384_mp_mod); + /* 0xf */ + sp_384_mont_mul_6(t2, t5, t1, p384_mod, p384_mp_mod); + /* 0x1e */ + sp_384_mont_sqr_6(t1, t2, p384_mod, p384_mp_mod); + /* 0x1f */ + sp_384_mont_mul_6(t4, t1, a, p384_mod, p384_mp_mod); + /* 0x3e0 */ + sp_384_mont_sqr_n_6(t1, t4, 5, p384_mod, p384_mp_mod); + /* 0x3ff */ + sp_384_mont_mul_6(t2, t4, t1, p384_mod, p384_mp_mod); + /* 0x7fe0 */ + sp_384_mont_sqr_n_6(t1, t2, 5, p384_mod, p384_mp_mod); + /* 0x7fff */ + sp_384_mont_mul_6(t4, t4, t1, p384_mod, p384_mp_mod); + /* 0x3fff8000 */ + sp_384_mont_sqr_n_6(t1, t4, 15, p384_mod, p384_mp_mod); + /* 0x3fffffff */ + sp_384_mont_mul_6(t2, t4, t1, p384_mod, p384_mp_mod); + /* 0xfffffffc */ + sp_384_mont_sqr_n_6(t3, t2, 2, p384_mod, p384_mp_mod); + /* 0xfffffffd */ + sp_384_mont_mul_6(r, t3, a, p384_mod, p384_mp_mod); + /* 0xffffffff */ + sp_384_mont_mul_6(t3, t5, t3, p384_mod, p384_mp_mod); + /* 0xfffffffc0000000 */ + sp_384_mont_sqr_n_6(t1, t2, 30, p384_mod, p384_mp_mod); + /* 0xfffffffffffffff */ + sp_384_mont_mul_6(t2, t2, t1, p384_mod, p384_mp_mod); + /* 0xfffffffffffffff000000000000000 */ + sp_384_mont_sqr_n_6(t1, t2, 60, p384_mod, p384_mp_mod); + /* 0xffffffffffffffffffffffffffffff */ + sp_384_mont_mul_6(t2, t2, t1, p384_mod, p384_mp_mod); + /* 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */ + sp_384_mont_sqr_n_6(t1, t2, 120, p384_mod, p384_mp_mod); + /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_384_mont_mul_6(t2, t2, t1, p384_mod, p384_mp_mod); + /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */ + sp_384_mont_sqr_n_6(t1, t2, 15, p384_mod, p384_mp_mod); + /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_384_mont_mul_6(t2, t4, t1, p384_mod, p384_mp_mod); + /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000 */ + sp_384_mont_sqr_n_6(t1, t2, 33, p384_mod, p384_mp_mod); + /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff */ + sp_384_mont_mul_6(t2, t3, t1, p384_mod, p384_mp_mod); + /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff000000000000000000000000 */ + sp_384_mont_sqr_n_6(t1, t2, 96, p384_mod, p384_mp_mod); + /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000fffffffd */ + sp_384_mont_mul_6(r, r, t1, p384_mod, p384_mp_mod); + +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static int64_t sp_384_cmp_6(const sp_digit* a, const sp_digit* b) +{ +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov x2, -1\n\t" + "mov x3, 1\n\t" + "mov x4, -1\n\t" + "mov x5, 40\n\t" + "1:\n\t" + "ldr x6, [%[a], x5]\n\t" + "ldr x7, [%[b], x5]\n\t" + "and x6, x6, x4\n\t" + "and x7, x7, x4\n\t" + "subs x6, x6, x7\n\t" + "csel x2, x3, x2, hi\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "subs x5, x5, #8\n\t" + "b.cs 1b\n\t" + "eor %[a], x2, x4\n\t" + : [a] "+r" (a) + : [b] "r" (b) + : "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16" + ); +#else + __asm__ __volatile__ ( + "mov x2, -1\n\t" + "mov x3, 1\n\t" + "mov x4, -1\n\t" + "ldp x5, x6, [%[a], 0]\n\t" + "ldp x7, x8, [%[a], 16]\n\t" + "ldp x9, x10, [%[a], 32]\n\t" + "ldp x11, x12, [%[b], 0]\n\t" + "ldp x13, x14, [%[b], 16]\n\t" + "ldp x15, x16, [%[b], 32]\n\t" + "and x10, x10, x4\n\t" + "and x16, x16, x4\n\t" + "subs x10, x10, x16\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x9, x9, x4\n\t" + "and x15, x15, x4\n\t" + "subs x9, x9, x15\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x8, x8, x4\n\t" + "and x14, x14, x4\n\t" + "subs x8, x8, x14\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x7, x7, x4\n\t" + "and x13, x13, x4\n\t" + "subs x7, x7, x13\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x6, x6, x4\n\t" + "and x12, x12, x4\n\t" + "subs x6, x6, x12\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "and x5, x5, x4\n\t" + "and x11, x11, x4\n\t" + "subs x5, x5, x11\n\t" + "csel x2, x4, x2, lo\n\t" + "csel x4, x4, xzr, eq\n\t" + "csel x2, x3, x2, hi\n\t" + "eor %[a], x2, x4\n\t" + : [a] "+r" (a) + : [b] "r" (b) + : "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16" + ); +#endif + + return (int64_t)a; +} + +/* Normalize the values in each word to 64. + * + * a Array of sp_digit to normalize. + */ +#define sp_384_norm_6(a) + +/* Map the Montgomery form projective coordinate point to an affine point. + * + * r Resulting affine coordinate point. + * p Montgomery form projective coordinate point. + * t Temporary ordinate data. + */ +static void sp_384_map_6(sp_point_384* r, const sp_point_384* p, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*6; + int64_t n; + + sp_384_mont_inv_6(t1, p->z, t + 2*6); + + sp_384_mont_sqr_6(t2, t1, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t1, t2, t1, p384_mod, p384_mp_mod); + + /* x /= z^2 */ + sp_384_mont_mul_6(r->x, p->x, t2, p384_mod, p384_mp_mod); + XMEMSET(r->x + 6, 0, sizeof(r->x) / 2U); + sp_384_mont_reduce_6(r->x, p384_mod, p384_mp_mod); + /* Reduce x to less than modulus */ + n = sp_384_cmp_6(r->x, p384_mod); + sp_384_cond_sub_6(r->x, r->x, p384_mod, 0 - ((n >= 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_384_norm_6(r->x); + + /* y /= z^3 */ + sp_384_mont_mul_6(r->y, p->y, t1, p384_mod, p384_mp_mod); + XMEMSET(r->y + 6, 0, sizeof(r->y) / 2U); + sp_384_mont_reduce_6(r->y, p384_mod, p384_mp_mod); + /* Reduce y to less than modulus */ + n = sp_384_cmp_6(r->y, p384_mod); + sp_384_cond_sub_6(r->y, r->y, p384_mod, 0 - ((n >= 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_384_norm_6(r->y); + + XMEMSET(r->z, 0, sizeof(r->z)); + r->z[0] = 1; + +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_384_add_6(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[a], 0]\n\t" + "ldp x7, x8, [%[b], 0]\n\t" + "adds x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 16]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "ldr x3, [%[a], 32]\n\t" + "ldr x4, [%[a], 40]\n\t" + "ldr x7, [%[b], 32]\n\t" + "ldr x8, [%[b], 40]\n\t" + "adcs x3, x3, x7\n\t" + "adcs x4, x4, x8\n\t" + "str x3, [%[r], 32]\n\t" + "str x4, [%[r], 40]\n\t" + "cset %[r], cs\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + ); + + return (sp_digit)r; +} + +/* Add two Montgomery form numbers (r = a + b % m). + * + * r Result of addition. + * a First number to add in Montogmery form. + * b Second number to add in Montogmery form. + * m Modulus (prime). + */ +static void sp_384_mont_add_6(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m) +{ + sp_digit o; + + o = sp_384_add_6(r, a, b); + sp_384_cond_sub_6(r, r, m, 0 - o); +} + +/* Double a Montgomery form number (r = a + a % m). + * + * r Result of doubling. + * a Number to double in Montogmery form. + * m Modulus (prime). + */ +static void sp_384_mont_dbl_6(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + sp_digit o; + + o = sp_384_add_6(r, a, a); + sp_384_cond_sub_6(r, r, m, 0 - o); +} + +/* Triple a Montgomery form number (r = a + a + a % m). + * + * r Result of Tripling. + * a Number to triple in Montogmery form. + * m Modulus (prime). + */ +static void sp_384_mont_tpl_6(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + sp_digit o; + + o = sp_384_add_6(r, a, a); + sp_384_cond_sub_6(r, r, m, 0 - o); + o = sp_384_add_6(r, r, a); + sp_384_cond_sub_6(r, r, m, 0 - o); +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_384_sub_6(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[a], 0]\n\t" + "ldp x7, x8, [%[b], 0]\n\t" + "subs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 16]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "ldr x3, [%[a], 32]\n\t" + "ldr x4, [%[a], 40]\n\t" + "ldr x7, [%[b], 32]\n\t" + "ldr x8, [%[b], 40]\n\t" + "sbcs x3, x3, x7\n\t" + "sbcs x4, x4, x8\n\t" + "str x3, [%[r], 32]\n\t" + "str x4, [%[r], 40]\n\t" + "csetm %[r], cc\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + ); + + return (sp_digit)r; +} + +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static sp_digit sp_384_cond_add_6(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov x8, #0\n\t" + "1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldr x4, [%[a], x8]\n\t" + "ldr x5, [%[b], x8]\n\t" + "and x5, x5, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "cset %[c], cs\n\t" + "str x4, [%[r], x8]\n\t" + "add x8, x8, #8\n\t" + "cmp x8, 48\n\t" + "b.lt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + ); + + return c; +#else + __asm__ __volatile__ ( + + "ldp x5, x7, [%[b], 0]\n\t" + "ldp x11, x12, [%[b], 16]\n\t" + "ldp x4, x6, [%[a], 0]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 16]\n\t" + "and x7, x7, %[m]\n\t" + "adds x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "adcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 0]\n\t" + "adcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 16]\n\t" + "ldp x5, x7, [%[b], 32]\n\t" + "ldp x4, x6, [%[a], 32]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "stp x4, x6, [%[r], 32]\n\t" + "cset %[r], cs\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + ); + + return (sp_digit)r; +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Subtract two Montgomery form numbers (r = a - b % m). + * + * r Result of subtration. + * a Number to subtract from in Montogmery form. + * b Number to subtract with in Montogmery form. + * m Modulus (prime). + */ +static void sp_384_mont_sub_6(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m) +{ + sp_digit o; + + o = sp_384_sub_6(r, a, b); + sp_384_cond_add_6(r, r, m, o); +} + +static void sp_384_rshift1_6(sp_digit* r, sp_digit* a) +{ + __asm__ __volatile__ ( + "ldp x2, x3, [%[a]]\n\t" + "ldp x4, x5, [%[a], 16]\n\t" + "ldp x6, x7, [%[a], 32]\n\t" + "lsr x11, x6, 1\n\t" + "lsr x10, x5, 1\n\t" + "lsr x9, x4, 1\n\t" + "lsr x8, x3, 1\n\t" + "lsr x2, x2, 1\n\t" + "orr x2, x2, x3, lsl 63\n\t" + "orr x3, x8, x4, lsl 63\n\t" + "orr x4, x9, x5, lsl 63\n\t" + "orr x5, x10, x6, lsl 63\n\t" + "orr x6, x11, x7, lsl 63\n\t" + "lsr x7, x7, 1\n\t" + "stp x2, x3, [%[r]]\n\t" + "stp x4, x5, [%[r], 16]\n\t" + "stp x6, x7, [%[r], 32]\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + ); +} + +/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) + * + * r Result of division by 2. + * a Number to divide. + * m Modulus (prime). + */ +static void sp_384_div2_6(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + sp_digit o; + + o = sp_384_cond_add_6(r, a, m, 0 - (a[0] & 1)); + sp_384_rshift1_6(r, r); + r[5] |= o << 63; +} + +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static void sp_384_proj_point_dbl_6(sp_point_384* r, const sp_point_384* p, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*6; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_384_mont_sqr_6(t1, p->z, p384_mod, p384_mp_mod); + /* Z = Y * Z */ + sp_384_mont_mul_6(z, p->y, p->z, p384_mod, p384_mp_mod); + /* Z = 2Z */ + sp_384_mont_dbl_6(z, z, p384_mod); + /* T2 = X - T1 */ + sp_384_mont_sub_6(t2, p->x, t1, p384_mod); + /* T1 = X + T1 */ + sp_384_mont_add_6(t1, p->x, t1, p384_mod); + /* T2 = T1 * T2 */ + sp_384_mont_mul_6(t2, t1, t2, p384_mod, p384_mp_mod); + /* T1 = 3T2 */ + sp_384_mont_tpl_6(t1, t2, p384_mod); + /* Y = 2Y */ + sp_384_mont_dbl_6(y, p->y, p384_mod); + /* Y = Y * Y */ + sp_384_mont_sqr_6(y, y, p384_mod, p384_mp_mod); + /* T2 = Y * Y */ + sp_384_mont_sqr_6(t2, y, p384_mod, p384_mp_mod); + /* T2 = T2/2 */ + sp_384_div2_6(t2, t2, p384_mod); + /* Y = Y * X */ + sp_384_mont_mul_6(y, y, p->x, p384_mod, p384_mp_mod); + /* X = T1 * T1 */ + sp_384_mont_sqr_6(x, t1, p384_mod, p384_mp_mod); + /* X = X - Y */ + sp_384_mont_sub_6(x, x, y, p384_mod); + /* X = X - Y */ + sp_384_mont_sub_6(x, x, y, p384_mod); + /* Y = Y - X */ + sp_384_mont_sub_6(y, y, x, p384_mod); + /* Y = Y * T1 */ + sp_384_mont_mul_6(y, y, t1, p384_mod, p384_mp_mod); + /* Y = Y - T2 */ + sp_384_mont_sub_6(y, y, t2, p384_mod); +} + +/* Double the Montgomery form projective point p a number of times. + * + * r Result of repeated doubling of point. + * p Point to double. + * n Number of times to double + * t Temporary ordinate data. + */ +static void sp_384_proj_point_dbl_n_6(sp_point_384* p, int n, sp_digit* t) +{ + sp_digit* w = t; + sp_digit* a = t + 2*6; + sp_digit* b = t + 4*6; + sp_digit* t1 = t + 6*6; + sp_digit* t2 = t + 8*6; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = p->x; + y = p->y; + z = p->z; + + /* Y = 2*Y */ + sp_384_mont_dbl_6(y, y, p384_mod); + /* W = Z^4 */ + sp_384_mont_sqr_6(w, z, p384_mod, p384_mp_mod); + sp_384_mont_sqr_6(w, w, p384_mod, p384_mp_mod); + +#ifndef WOLFSSL_SP_SMALL + while (--n > 0) +#else + while (--n >= 0) +#endif + { + /* A = 3*(X^2 - W) */ + sp_384_mont_sqr_6(t1, x, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(t1, t1, w, p384_mod); + sp_384_mont_tpl_6(a, t1, p384_mod); + /* B = X*Y^2 */ + sp_384_mont_sqr_6(t1, y, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(b, t1, x, p384_mod, p384_mp_mod); + /* X = A^2 - 2B */ + sp_384_mont_sqr_6(x, a, p384_mod, p384_mp_mod); + sp_384_mont_dbl_6(t2, b, p384_mod); + sp_384_mont_sub_6(x, x, t2, p384_mod); + /* Z = Z*Y */ + sp_384_mont_mul_6(z, z, y, p384_mod, p384_mp_mod); + /* t2 = Y^4 */ + sp_384_mont_sqr_6(t1, t1, p384_mod, p384_mp_mod); +#ifdef WOLFSSL_SP_SMALL + if (n != 0) +#endif + { + /* W = W*Y^4 */ + sp_384_mont_mul_6(w, w, t1, p384_mod, p384_mp_mod); + } + /* y = 2*A*(B - X) - Y^4 */ + sp_384_mont_sub_6(y, b, x, p384_mod); + sp_384_mont_mul_6(y, y, a, p384_mod, p384_mp_mod); + sp_384_mont_dbl_6(y, y, p384_mod); + sp_384_mont_sub_6(y, y, t1, p384_mod); + } +#ifndef WOLFSSL_SP_SMALL + /* A = 3*(X^2 - W) */ + sp_384_mont_sqr_6(t1, x, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(t1, t1, w, p384_mod); + sp_384_mont_tpl_6(a, t1, p384_mod); + /* B = X*Y^2 */ + sp_384_mont_sqr_6(t1, y, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(b, t1, x, p384_mod, p384_mp_mod); + /* X = A^2 - 2B */ + sp_384_mont_sqr_6(x, a, p384_mod, p384_mp_mod); + sp_384_mont_dbl_6(t2, b, p384_mod); + sp_384_mont_sub_6(x, x, t2, p384_mod); + /* Z = Z*Y */ + sp_384_mont_mul_6(z, z, y, p384_mod, p384_mp_mod); + /* t2 = Y^4 */ + sp_384_mont_sqr_6(t1, t1, p384_mod, p384_mp_mod); + /* y = 2*A*(B - X) - Y^4 */ + sp_384_mont_sub_6(y, b, x, p384_mod); + sp_384_mont_mul_6(y, y, a, p384_mod, p384_mp_mod); + sp_384_mont_dbl_6(y, y, p384_mod); + sp_384_mont_sub_6(y, y, t1, p384_mod); +#endif + /* Y = Y/2 */ + sp_384_div2_6(y, y, p384_mod); +} + +/* Compare two numbers to determine if they are equal. + * Constant time implementation. + * + * a First number to compare. + * b Second number to compare. + * returns 1 when equal and 0 otherwise. + */ +static int sp_384_cmp_equal_6(const sp_digit* a, const sp_digit* b) +{ + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) | + (a[4] ^ b[4]) | (a[5] ^ b[5])) == 0; +} + +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_384_proj_point_add_6(sp_point_384* r, const sp_point_384* p, const sp_point_384* q, + sp_digit* t) +{ + const sp_point_384* ap[2]; + sp_point_384* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*6; + sp_digit* t3 = t + 4*6; + sp_digit* t4 = t + 6*6; + sp_digit* t5 = t + 8*6; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* Ensure only the first point is the same as the result. */ + if (q == r) { + const sp_point_384* a = p; + p = q; + q = a; + } + + /* Check double */ + (void)sp_384_sub_6(t1, p384_mod, q->y); + sp_384_norm_6(t1); + if ((sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) & + (sp_384_cmp_equal_6(p->y, q->y) | sp_384_cmp_equal_6(p->y, t1))) != 0) { + sp_384_proj_point_dbl_6(r, p, t); + } + else { + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point_384)); + x = rp[p->infinity | q->infinity]->x; + y = rp[p->infinity | q->infinity]->y; + z = rp[p->infinity | q->infinity]->z; + + ap[0] = p; + ap[1] = q; + for (i=0; i<6; i++) { + r->x[i] = ap[p->infinity]->x[i]; + } + for (i=0; i<6; i++) { + r->y[i] = ap[p->infinity]->y[i]; + } + for (i=0; i<6; i++) { + r->z[i] = ap[p->infinity]->z[i]; + } + r->infinity = ap[p->infinity]->infinity; + + /* U1 = X1*Z2^2 */ + sp_384_mont_sqr_6(t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t3, t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t1, t1, x, p384_mod, p384_mp_mod); + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_6(t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t4, t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_6(t3, t3, y, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_6(t4, t4, q->y, p384_mod, p384_mp_mod); + /* H = U2 - U1 */ + sp_384_mont_sub_6(t2, t2, t1, p384_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_6(t4, t4, t3, p384_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_6(z, z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(z, z, t2, p384_mod, p384_mp_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_6(x, t4, p384_mod, p384_mp_mod); + sp_384_mont_sqr_6(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(y, t1, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t5, t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(x, x, t5, p384_mod); + sp_384_mont_dbl_6(t1, y, p384_mod); + sp_384_mont_sub_6(x, x, t1, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_6(y, y, x, p384_mod); + sp_384_mont_mul_6(y, y, t4, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t5, t5, t3, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(y, y, t5, p384_mod); + } +} + +/* Double the Montgomery form projective point p a number of times. + * + * r Result of repeated doubling of point. + * p Point to double. + * n Number of times to double + * t Temporary ordinate data. + */ +static void sp_384_proj_point_dbl_n_store_6(sp_point_384* r, const sp_point_384* p, + int n, int m, sp_digit* t) +{ + sp_digit* w = t; + sp_digit* a = t + 2*6; + sp_digit* b = t + 4*6; + sp_digit* t1 = t + 6*6; + sp_digit* t2 = t + 8*6; + sp_digit* x = r[2*m].x; + sp_digit* y = r[(1<x[i]; + } + for (i=0; i<6; i++) { + y[i] = p->y[i]; + } + for (i=0; i<6; i++) { + z[i] = p->z[i]; + } + + /* Y = 2*Y */ + sp_384_mont_dbl_6(y, y, p384_mod); + /* W = Z^4 */ + sp_384_mont_sqr_6(w, z, p384_mod, p384_mp_mod); + sp_384_mont_sqr_6(w, w, p384_mod, p384_mp_mod); + for (i=1; i<=n; i++) { + /* A = 3*(X^2 - W) */ + sp_384_mont_sqr_6(t1, x, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(t1, t1, w, p384_mod); + sp_384_mont_tpl_6(a, t1, p384_mod); + /* B = X*Y^2 */ + sp_384_mont_sqr_6(t2, y, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(b, t2, x, p384_mod, p384_mp_mod); + x = r[(1<x; + sp_digit* y = ra->y; + sp_digit* z = ra->z; + sp_digit* xs = rs->x; + sp_digit* ys = rs->y; + sp_digit* zs = rs->z; + + + XMEMCPY(x, p->x, sizeof(p->x) / 2); + XMEMCPY(y, p->y, sizeof(p->y) / 2); + XMEMCPY(z, p->z, sizeof(p->z) / 2); + ra->infinity = 0; + rs->infinity = 0; + + /* U1 = X1*Z2^2 */ + sp_384_mont_sqr_6(t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t3, t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t1, t1, x, p384_mod, p384_mp_mod); + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_6(t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t4, t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_6(t3, t3, y, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_6(t4, t4, q->y, p384_mod, p384_mp_mod); + /* H = U2 - U1 */ + sp_384_mont_sub_6(t2, t2, t1, p384_mod); + /* RS = S2 + S1 */ + sp_384_mont_add_6(t6, t4, t3, p384_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_6(t4, t4, t3, p384_mod); + /* Z3 = H*Z1*Z2 */ + /* ZS = H*Z1*Z2 */ + sp_384_mont_mul_6(z, z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(z, z, t2, p384_mod, p384_mp_mod); + XMEMCPY(zs, z, sizeof(p->z)/2); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + /* XS = RS^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_6(x, t4, p384_mod, p384_mp_mod); + sp_384_mont_sqr_6(xs, t6, p384_mod, p384_mp_mod); + sp_384_mont_sqr_6(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(y, t1, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t5, t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(x, x, t5, p384_mod); + sp_384_mont_sub_6(xs, xs, t5, p384_mod); + sp_384_mont_dbl_6(t1, y, p384_mod); + sp_384_mont_sub_6(x, x, t1, p384_mod); + sp_384_mont_sub_6(xs, xs, t1, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + /* YS = -RS*(U1*H^2 - XS) - S1*H^3 */ + sp_384_mont_sub_6(ys, y, xs, p384_mod); + sp_384_mont_sub_6(y, y, x, p384_mod); + sp_384_mont_mul_6(y, y, t4, p384_mod, p384_mp_mod); + sp_384_sub_6(t6, p384_mod, t6); + sp_384_mont_mul_6(ys, ys, t6, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t5, t5, t3, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(y, y, t5, p384_mod); + sp_384_mont_sub_6(ys, ys, t5, p384_mod); +} + +/* Structure used to describe recoding of scalar multiplication. */ +typedef struct ecc_recode_384 { + /* Index into pre-computation table. */ + uint8_t i; + /* Use the negative of the point. */ + uint8_t neg; +} ecc_recode_384; + +/* The index into pre-computation table to use. */ +static const uint8_t recode_index_6_6[66] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, + 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, + 0, 1, +}; + +/* Whether to negate y-ordinate. */ +static const uint8_t recode_neg_6_6[66] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, +}; + +/* Recode the scalar for multiplication using pre-computed values and + * subtraction. + * + * k Scalar to multiply by. + * v Vector of operations to perform. + */ +static void sp_384_ecc_recode_6_6(const sp_digit* k, ecc_recode_384* v) +{ + int i, j; + uint8_t y; + int carry = 0; + int o; + sp_digit n; + + j = 0; + n = k[j]; + o = 0; + for (i=0; i<65; i++) { + y = n; + if (o + 6 < 64) { + y &= 0x3f; + n >>= 6; + o += 6; + } + else if (o + 6 == 64) { + n >>= 6; + if (++j < 6) + n = k[j]; + o = 0; + } + else if (++j < 6) { + n = k[j]; + y |= (n << (64 - o)) & 0x3f; + o -= 58; + n >>= o; + } + + y += carry; + v[i].i = recode_index_6_6[y]; + v[i].neg = recode_neg_6_6[y]; + carry = (y >> 6) + v[i].neg; + } +} + +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_win_add_sub_6(sp_point_384* r, const sp_point_384* g, + const sp_digit* k, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 td[33]; + sp_point_384 rtd, pd; + sp_digit tmpd[2 * 6 * 6]; +#endif + sp_point_384* t; + sp_point_384* rt; + sp_point_384* p = NULL; + sp_digit* tmp; + sp_digit* negy; + int i; + ecc_recode_384 v[65]; + int err; + + (void)heap; + + err = sp_384_point_new_6(heap, rtd, rt); + if (err == MP_OKAY) + err = sp_384_point_new_6(heap, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 33, heap, DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 6 * 6, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; +#else + t = td; + tmp = tmpd; +#endif + + + if (err == MP_OKAY) { + /* t[0] = {0, 0, 1} * norm */ + XMEMSET(&t[0], 0, sizeof(t[0])); + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + err = sp_384_mod_mul_norm_6(t[1].x, g->x, p384_mod); + } + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_6(t[1].y, g->y, p384_mod); + } + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_6(t[1].z, g->z, p384_mod); + } + + if (err == MP_OKAY) { + t[1].infinity = 0; + /* t[2] ... t[32] */ + sp_384_proj_point_dbl_n_store_6(t, &t[ 1], 5, 1, tmp); + sp_384_proj_point_add_6(&t[ 3], &t[ 2], &t[ 1], tmp); + sp_384_proj_point_dbl_6(&t[ 6], &t[ 3], tmp); + sp_384_proj_point_add_sub_6(&t[ 7], &t[ 5], &t[ 6], &t[ 1], tmp); + sp_384_proj_point_dbl_6(&t[10], &t[ 5], tmp); + sp_384_proj_point_add_sub_6(&t[11], &t[ 9], &t[10], &t[ 1], tmp); + sp_384_proj_point_dbl_6(&t[12], &t[ 6], tmp); + sp_384_proj_point_dbl_6(&t[14], &t[ 7], tmp); + sp_384_proj_point_add_sub_6(&t[15], &t[13], &t[14], &t[ 1], tmp); + sp_384_proj_point_dbl_6(&t[18], &t[ 9], tmp); + sp_384_proj_point_add_sub_6(&t[19], &t[17], &t[18], &t[ 1], tmp); + sp_384_proj_point_dbl_6(&t[20], &t[10], tmp); + sp_384_proj_point_dbl_6(&t[22], &t[11], tmp); + sp_384_proj_point_add_sub_6(&t[23], &t[21], &t[22], &t[ 1], tmp); + sp_384_proj_point_dbl_6(&t[24], &t[12], tmp); + sp_384_proj_point_dbl_6(&t[26], &t[13], tmp); + sp_384_proj_point_add_sub_6(&t[27], &t[25], &t[26], &t[ 1], tmp); + sp_384_proj_point_dbl_6(&t[28], &t[14], tmp); + sp_384_proj_point_dbl_6(&t[30], &t[15], tmp); + sp_384_proj_point_add_sub_6(&t[31], &t[29], &t[30], &t[ 1], tmp); + + negy = t[0].y; + + sp_384_ecc_recode_6_6(k, v); + + i = 64; + XMEMCPY(rt, &t[v[i].i], sizeof(sp_point_384)); + for (--i; i>=0; i--) { + sp_384_proj_point_dbl_n_6(rt, 6, tmp); + + XMEMCPY(p, &t[v[i].i], sizeof(sp_point_384)); + sp_384_sub_6(negy, p384_mod, p->y); + sp_384_cond_copy_6(p->y, negy, (sp_digit)0 - v[i].neg); + sp_384_proj_point_add_6(rt, rt, p, tmp); + } + + if (map != 0) { + sp_384_map_6(r, rt, tmp); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_384)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) + XFREE(t, heap, DYNAMIC_TYPE_ECC); + if (tmp != NULL) + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); +#endif + sp_384_point_free_6(p, 0, heap); + sp_384_point_free_6(rt, 0, heap); + + return err; +} + +/* A table entry for pre-computed points. */ +typedef struct sp_table_entry_384 { + sp_digit x[6]; + sp_digit y[6]; +} sp_table_entry_384; + +#ifdef FP_ECC +#endif /* FP_ECC */ +/* Add two Montgomery form projective points. The second point has a q value of + * one. + * Only the first point can be the same pointer as the result point. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_384_proj_point_add_qz1_6(sp_point_384* r, const sp_point_384* p, + const sp_point_384* q, sp_digit* t) +{ + const sp_point_384* ap[2]; + sp_point_384* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*6; + sp_digit* t3 = t + 4*6; + sp_digit* t4 = t + 6*6; + sp_digit* t5 = t + 8*6; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* Check double */ + (void)sp_384_sub_6(t1, p384_mod, q->y); + sp_384_norm_6(t1); + if ((sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) & + (sp_384_cmp_equal_6(p->y, q->y) | sp_384_cmp_equal_6(p->y, t1))) != 0) { + sp_384_proj_point_dbl_6(r, p, t); + } + else { + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point_384)); + x = rp[p->infinity | q->infinity]->x; + y = rp[p->infinity | q->infinity]->y; + z = rp[p->infinity | q->infinity]->z; + + ap[0] = p; + ap[1] = q; + for (i=0; i<6; i++) { + r->x[i] = ap[p->infinity]->x[i]; + } + for (i=0; i<6; i++) { + r->y[i] = ap[p->infinity]->y[i]; + } + for (i=0; i<6; i++) { + r->z[i] = ap[p->infinity]->z[i]; + } + r->infinity = ap[p->infinity]->infinity; + + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_6(t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t4, t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_6(t4, t4, q->y, p384_mod, p384_mp_mod); + /* H = U2 - X1 */ + sp_384_mont_sub_6(t2, t2, x, p384_mod); + /* R = S2 - Y1 */ + sp_384_mont_sub_6(t4, t4, y, p384_mod); + /* Z3 = H*Z1 */ + sp_384_mont_mul_6(z, z, t2, p384_mod, p384_mp_mod); + /* X3 = R^2 - H^3 - 2*X1*H^2 */ + sp_384_mont_sqr_6(t1, t4, p384_mod, p384_mp_mod); + sp_384_mont_sqr_6(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t3, x, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t5, t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(x, t1, t5, p384_mod); + sp_384_mont_dbl_6(t1, t3, p384_mod); + sp_384_mont_sub_6(x, x, t1, p384_mod); + /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */ + sp_384_mont_sub_6(t3, t3, x, p384_mod); + sp_384_mont_mul_6(t3, t3, t4, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t5, t5, y, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(y, t3, t5, p384_mod); + } +} + +#ifdef FP_ECC +/* Convert the projective point to affine. + * Ordinates are in Montgomery form. + * + * a Point to convert. + * t Temporary data. + */ +static void sp_384_proj_to_affine_6(sp_point_384* a, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2 * 6; + sp_digit* tmp = t + 4 * 6; + + sp_384_mont_inv_6(t1, a->z, tmp); + + sp_384_mont_sqr_6(t2, t1, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t1, t2, t1, p384_mod, p384_mp_mod); + + sp_384_mont_mul_6(a->x, a->x, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(a->y, a->y, t1, p384_mod, p384_mp_mod); + XMEMCPY(a->z, p384_norm_mod, sizeof(p384_norm_mod)); +} + +/* Generate the pre-computed table of points for the base point. + * + * a The base point. + * table Place to store generated point data. + * tmp Temporary data. + * heap Heap to use for allocation. + */ +static int sp_384_gen_stripe_table_6(const sp_point_384* a, + sp_table_entry_384* table, sp_digit* tmp, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 td, s1d, s2d; +#endif + sp_point_384* t; + sp_point_384* s1 = NULL; + sp_point_384* s2 = NULL; + int i, j; + int err; + + (void)heap; + + err = sp_384_point_new_6(heap, td, t); + if (err == MP_OKAY) { + err = sp_384_point_new_6(heap, s1d, s1); + } + if (err == MP_OKAY) { + err = sp_384_point_new_6(heap, s2d, s2); + } + + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_6(t->x, a->x, p384_mod); + } + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_6(t->y, a->y, p384_mod); + } + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_6(t->z, a->z, p384_mod); + } + if (err == MP_OKAY) { + t->infinity = 0; + sp_384_proj_to_affine_6(t, tmp); + + XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod)); + s1->infinity = 0; + XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod)); + s2->infinity = 0; + + /* table[0] = {0, 0, infinity} */ + XMEMSET(&table[0], 0, sizeof(sp_table_entry_384)); + /* table[1] = Affine version of 'a' in Montgomery form */ + XMEMCPY(table[1].x, t->x, sizeof(table->x)); + XMEMCPY(table[1].y, t->y, sizeof(table->y)); + + for (i=1; i<8; i++) { + sp_384_proj_point_dbl_n_6(t, 48, tmp); + sp_384_proj_to_affine_6(t, tmp); + XMEMCPY(table[1<x, sizeof(table->x)); + XMEMCPY(table[1<y, sizeof(table->y)); + } + + for (i=1; i<8; i++) { + XMEMCPY(s1->x, table[1<x)); + XMEMCPY(s1->y, table[1<y)); + for (j=(1<x, table[j-(1<x)); + XMEMCPY(s2->y, table[j-(1<y)); + sp_384_proj_point_add_qz1_6(t, s1, s2, tmp); + sp_384_proj_to_affine_6(t, tmp); + XMEMCPY(table[j].x, t->x, sizeof(table->x)); + XMEMCPY(table[j].y, t->y, sizeof(table->y)); + } + } + } + + sp_384_point_free_6(s2, 0, heap); + sp_384_point_free_6(s1, 0, heap); + sp_384_point_free_6( t, 0, heap); + + return err; +} + +#endif /* FP_ECC */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_stripe_6(sp_point_384* r, const sp_point_384* g, + const sp_table_entry_384* table, const sp_digit* k, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 rtd; + sp_point_384 pd; + sp_digit td[2 * 6 * 6]; +#endif + sp_point_384* rt; + sp_point_384* p = NULL; + sp_digit* t; + int i, j; + int y, x; + int err; + + (void)g; + (void)heap; + + + err = sp_384_point_new_6(heap, rtd, rt); + if (err == MP_OKAY) { + err = sp_384_point_new_6(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 6 * 6, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) { + err = MEMORY_E; + } +#else + t = td; +#endif + + if (err == MP_OKAY) { + XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod)); + XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod)); + + y = 0; + for (j=0,x=47; j<8; j++,x+=48) { + y |= ((k[x / 64] >> (x % 64)) & 1) << j; + } + XMEMCPY(rt->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(rt->y, table[y].y, sizeof(table[y].y)); + rt->infinity = !y; + for (i=46; i>=0; i--) { + y = 0; + for (j=0,x=i; j<8; j++,x+=48) { + y |= ((k[x / 64] >> (x % 64)) & 1) << j; + } + + sp_384_proj_point_dbl_6(rt, rt, t); + XMEMCPY(p->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(p->y, table[y].y, sizeof(table[y].y)); + p->infinity = !y; + sp_384_proj_point_add_qz1_6(rt, rt, p, t); + } + + if (map != 0) { + sp_384_map_6(r, rt, t); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_384)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_6(p, 0, heap); + sp_384_point_free_6(rt, 0, heap); + + return err; +} + +#ifdef FP_ECC +#ifndef FP_ENTRIES + #define FP_ENTRIES 16 +#endif + +typedef struct sp_cache_384_t { + sp_digit x[6]; + sp_digit y[6]; + sp_table_entry_384 table[256]; + uint32_t cnt; + int set; +} sp_cache_384_t; + +static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES]; +static THREAD_LS_T int sp_cache_384_last = -1; +static THREAD_LS_T int sp_cache_384_inited = 0; + +#ifndef HAVE_THREAD_LS + static volatile int initCacheMutex_384 = 0; + static wolfSSL_Mutex sp_cache_384_lock; +#endif + +static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache) +{ + int i, j; + uint32_t least; + + if (sp_cache_384_inited == 0) { + for (i=0; ix, sp_cache_384[i].x) & + sp_384_cmp_equal_6(g->y, sp_cache_384[i].y)) { + sp_cache_384[i].cnt++; + break; + } + } + + /* No match. */ + if (i == FP_ENTRIES) { + /* Find empty entry. */ + i = (sp_cache_384_last + 1) % FP_ENTRIES; + for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) { + if (!sp_cache_384[i].set) { + break; + } + } + + /* Evict least used. */ + if (i == sp_cache_384_last) { + least = sp_cache_384[0].cnt; + for (j=1; jx, sizeof(sp_cache_384[i].x)); + XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y)); + sp_cache_384[i].set = 1; + sp_cache_384[i].cnt = 1; + } + + *cache = &sp_cache_384[i]; + sp_cache_384_last = i; +} +#endif /* FP_ECC */ + +/* Multiply the base point of P384 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_6(sp_point_384* r, const sp_point_384* g, const sp_digit* k, + int map, void* heap) +{ +#ifndef FP_ECC + return sp_384_ecc_mulmod_win_add_sub_6(r, g, k, map, heap); +#else + sp_digit tmp[2 * 6 * 7]; + sp_cache_384_t* cache; + int err = MP_OKAY; + +#ifndef HAVE_THREAD_LS + if (initCacheMutex_384 == 0) { + wc_InitMutex(&sp_cache_384_lock); + initCacheMutex_384 = 1; + } + if (wc_LockMutex(&sp_cache_384_lock) != 0) + err = BAD_MUTEX_E; +#endif /* HAVE_THREAD_LS */ + + if (err == MP_OKAY) { + sp_ecc_get_cache_384(g, &cache); + if (cache->cnt == 2) + sp_384_gen_stripe_table_6(g, cache->table, tmp, heap); + +#ifndef HAVE_THREAD_LS + wc_UnLockMutex(&sp_cache_384_lock); +#endif /* HAVE_THREAD_LS */ + + if (cache->cnt < 2) { + err = sp_384_ecc_mulmod_win_add_sub_6(r, g, k, map, heap); + } + else { + err = sp_384_ecc_mulmod_stripe_6(r, g, cache->table, k, + map, heap); + } + } + + return err; +#endif +} + +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * p Point to multiply. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_384(mp_int* km, ecc_point* gm, ecc_point* r, int map, + void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 p; + sp_digit kd[6]; +#endif + sp_point_384* point; + sp_digit* k = NULL; + int err = MP_OKAY; + + err = sp_384_point_new_6(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#else + k = kd; +#endif + if (err == MP_OKAY) { + sp_384_from_mp(k, 6, km); + sp_384_point_from_ecc_point_6(point, gm); + + err = sp_384_ecc_mulmod_6(point, point, k, map, heap); + } + if (err == MP_OKAY) { + err = sp_384_point_to_ecc_point_6(point, r); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_6(point, 0, heap); + + return err; +} + +static const sp_table_entry_384 p384_table[256] = { + /* 0 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 */ + { { 0x3dd0756649c0b528L,0x20e378e2a0d6ce38L,0x879c3afc541b4d6eL, + 0x6454868459a30effL,0x812ff723614ede2bL,0x4d3aadc2299e1513L }, + { 0x23043dad4b03a4feL,0xa1bfa8bf7bb4a9acL,0x8bade7562e83b050L, + 0xc6c3521968f4ffd9L,0xdd8002263969a840L,0x2b78abc25a15c5e9L } }, + /* 2 */ + { { 0x298647532b0c535bL,0x90dd695370506296L,0x038cd6b4216ab9acL, + 0x3df9b7b7be12d76aL,0x13f4d9785f347bdbL,0x222c5c9c13e94489L }, + { 0x5f8e796f2680dc64L,0x120e7cb758352417L,0x254b5d8ad10740b8L, + 0xc38b8efb5337dee6L,0xf688c2e194f02247L,0x7b5c75f36c25bc4cL } }, + /* 3 */ + { { 0xe26a3cc39edffea5L,0x35bbfd1c37d7e9fcL,0xf0e7700d9bde3ef6L, + 0x0380eb471a538f5aL,0x2e9da8bb05bf9eb3L,0xdbb93c731a460c3eL }, + { 0x37dba260f526b605L,0x95d4978efd785537L,0x24ed793aed72a04aL, + 0x2694837776005b1aL,0x99f557b99e681f82L,0xae5f9557d64954efL } }, + /* 4 */ + { { 0x24480c57f26feef9L,0xc31a26943a0e1240L,0x735002c3273e2bc7L, + 0x8c42e9c53ef1ed4cL,0x028babf67f4948e8L,0x6a502f438a978632L }, + { 0xf5f13a46b74536feL,0x1d218babd8a9f0ebL,0x30f36bcc37232768L, + 0xc5317b31576e8c18L,0xef1d57a69bbcb766L,0x917c4930b3e3d4dcL } }, + /* 5 */ + { { 0x11426e2ee349ddd0L,0x9f117ef99b2fc250L,0xff36b480ec0174a6L, + 0x4f4bde7618458466L,0x2f2edb6d05806049L,0x8adc75d119dfca92L }, + { 0xa619d097b7d5a7ceL,0x874275e5a34411e9L,0x5403e0470da4b4efL, + 0x2ebaafd977901d8fL,0x5e63ebcea747170fL,0x12a369447f9d8036L } }, + /* 6 */ + { { 0x28f9c07a4fc52870L,0xce0b37481a53a961L,0xd550fa180e1828d9L, + 0xa24abaf76adb225aL,0xd11ed0a56e58a348L,0xf3d811e6948acb62L }, + { 0x8618dd774c61ed22L,0x0bb747f980b47c9dL,0x22bf796fde6b8559L, + 0xfdfd1c6d680a21e9L,0xc0db15772af2c9ddL,0xa09379e6c1e90f3dL } }, + /* 7 */ + { { 0x386c66efe085c629L,0x5fc2a461095bc89aL,0x1353d631203f4b41L, + 0x7ca1972b7e4bd8f5L,0xb077380aa7df8ce9L,0xd8a90389ee7e4ea3L }, + { 0x1bc74dc7e7b14461L,0xdc2cb0140c9c4f78L,0x52b4b3a684ef0a10L, + 0xbde6ea5d20327fe2L,0xb71ec435660f9615L,0xeede5a04b8ad8173L } }, + /* 8 */ + { { 0x5584cbb3893b9a2dL,0x820c660b00850c5dL,0x4126d8267df2d43dL, + 0xdd5bbbf00109e801L,0x85b92ee338172f1cL,0x609d4f93f31430d9L }, + { 0x1e059a07eadaf9d6L,0x70e6536c0f125fb0L,0xd6220751560f20e7L, + 0xa59489ae7aaf3a9aL,0x7b70e2f664bae14eL,0x0dd0370176d08249L } }, + /* 9 */ + { { 0x4cc13be88510521fL,0x87315ba9f724cc17L,0xb49d83bb353dc263L, + 0x8b677efe0c279257L,0x510a1c1cc93c9537L,0x33e30cd8a4702c99L }, + { 0xf0ffc89d2208353fL,0x0170fa8dced42b2bL,0x090851ed26e2a5f5L, + 0x81276455ecb52c96L,0x0646c4e17fe1adf4L,0x513f047eb0868eabL } }, + /* 10 */ + { { 0xc07611f4df5bdf53L,0x45d331a758b11a6dL,0x58965daf1c4ee394L, + 0xba8bebe75a5878d1L,0xaecc0a1882dd3025L,0xcf2a3899a923eb8bL }, + { 0xf98c9281d24fd048L,0x841bfb598bbb025dL,0xb8ddf8cec9ab9d53L, + 0x538a4cb67fef044eL,0x092ac21f23236662L,0xa919d3850b66f065L } }, + /* 11 */ + { { 0x3db03b4085d480d8L,0x8cd9f4791b287a7dL,0x8f24dc754a8f3baeL, + 0x482eb8003db41892L,0x38bf9eb39c56e0f5L,0x8b9773209a91dc6fL }, + { 0xa31b05b27209cfc2L,0x4c49bf8505b2db70L,0x56462498d619527bL, + 0x3fe510391fac51baL,0xfb04f55eab4b8342L,0xc07c10dc04c6eabfL } }, + /* 12 */ + { { 0xad22fe4cdb32f048L,0x5f23bf91475ed6dfL,0xa50ce0c0aa66b6cbL, + 0xdf627a89f03405c0L,0x3674837df95e2d6aL,0x081c95b6ba42e64eL }, + { 0xeba3e036e71d6cebL,0xb45bcccf6c6b0271L,0x67b47e630684701dL, + 0x60f8f942e712523fL,0x824234725cd47adcL,0x83027d7987649cbbL } }, + /* 13 */ + { { 0xb3929ea63615b0b8L,0xb41441fda54dac41L,0x8995d556b5b6a368L, + 0xa80d4529167ef05eL,0xf6bcb4a16d25a27fL,0x210d6a4c7bd55b68L }, + { 0xf3804abb25351130L,0x1d2df699903e37ebL,0x5f201efc084c25c8L, + 0x31a28c87a1c68e91L,0x81dad253563f62a5L,0x5dd6de70d6c415d4L } }, + /* 14 */ + { { 0x29f470fd846612ceL,0x986f3eecda18d997L,0x6b84c1612f34af86L, + 0x5ef0a40846ddaf8bL,0x14405a00e49e795fL,0x5f491b16aa2f7a37L }, + { 0xc7f07ae4db41b38dL,0xef7d119e18fbfcaaL,0x3a18e07614443b19L, + 0x4356841a79a19926L,0x91f4a91ce2226fbeL,0xdc77248c3cc88721L } }, + /* 15 */ + { { 0xd570ff1ae4b1ec9dL,0x21d23e0ee7eef706L,0x3cde40f4ca19e086L, + 0x7d6523c4cd4bb270L,0x16c1f06cbf13aa6cL,0x5aa7245ad14c4b60L }, + { 0x37f8146744b74de8L,0x839e7a17620a934eL,0xf74d14e8de8b1aa1L, + 0x8789fa51f30d75e2L,0x09b24052c81c261eL,0x654e267833c565eeL } }, + /* 16 */ + { { 0x378205de2f9fbe67L,0xc4afcb837f728e44L,0xdbcec06c682e00f1L, + 0xf2a145c3114d5423L,0xa01d98747a52463eL,0xfc0935b17d717b0aL }, + { 0x9653bc4fd4d01f95L,0x9aa83ea89560ad34L,0xf77943dcaf8e3f3fL, + 0x70774a10e86fe16eL,0x6b62e6f1bf9ffdcfL,0x8a72f39e588745c9L } }, + /* 17 */ + { { 0x73ade4da2341c342L,0xdd326e54ea704422L,0x336c7d983741cef3L, + 0x1eafa00d59e61549L,0xcd3ed892bd9a3efdL,0x03faf26cc5c6c7e4L }, + { 0x087e2fcf3045f8acL,0x14a65532174f1e73L,0x2cf84f28fe0af9a7L, + 0xddfd7a842cdc935bL,0x4c0f117b6929c895L,0x356572d64c8bcfccL } }, + /* 18 */ + { { 0x7ecbac017d8c1bbaL,0x6058f9c390b0f3d5L,0xaee116e3f6197d0fL, + 0xc4dd70684033b128L,0xf084dba6c209b983L,0x97c7c2cf831dbc4aL }, + { 0x2f4e61ddf96010e8L,0xd97e4e20529faa17L,0x4ee6666069d37f20L, + 0xccc139ed3d366d72L,0x690b6ee213488e0fL,0x7cad1dc5f3a6d533L } }, + /* 19 */ + { { 0x660a9a81da57a41fL,0xe74a0412ec0039b6L,0x42343c6b5e1dad15L, + 0x284f3ff546681d4cL,0xb51087f163749e89L,0x070f23cc6f9f2f13L }, + { 0x542211da5d186e14L,0x84748f37fddb0dffL,0x41a3aab4db1f4180L, + 0x25ed667ba6402d0eL,0x2f2924a902f58355L,0x5844ee7cfa44a689L } }, + /* 20 */ + { { 0xfab086073f3b236fL,0x19e9d41d81e221daL,0xf3f6571e3927b428L, + 0x4348a9337550f1f6L,0x7167b996a85e62f0L,0x62d437597f5452bfL }, + { 0xd85feb9ef2955926L,0x440a561f6df78353L,0x389668ec9ca36b59L, + 0x052bf1a1a22da016L,0xbdfbff72f6093254L,0x94e50f28e22209f3L } }, + /* 21 */ + { { 0x90b2e5b33062e8afL,0xa8572375e8a3d369L,0x3fe1b00b201db7b1L, + 0xe926def0ee651aa2L,0x6542c9beb9b10ad7L,0x098e309ba2fcbe74L }, + { 0x779deeb3fff1d63fL,0x23d0e80a20bfd374L,0x8452bb3b8768f797L, + 0xcf75bb4d1f952856L,0x8fe6b40029ea3faaL,0x12bd3e4081373a53L } }, + /* 22 */ + { { 0xc023780d104cbba5L,0x6207e747fa35dd4cL,0x35c239281ca9b6a3L, + 0x4ff19be897987b10L,0xb8476bbf8022eee8L,0xaa0a4a14d3bbe74dL }, + { 0x20f94331187d4543L,0x3215387079f6e066L,0x83b0f74eac7e82e1L, + 0xa7748ba2828f06abL,0xc5f0298ac26ef35fL,0x0f0c50708e9a7dbdL } }, + /* 23 */ + { { 0x0c5c244cdef029ddL,0x3dabc687850661b8L,0x9992b865fe11d981L, + 0xe9801b8f6274dbadL,0xe54e6319098da242L,0x9929a91a91a53d08L }, + { 0x37bffd7235285887L,0xbc759425f1418102L,0x9280cc35fd2e6e20L, + 0x735c600cfbc42ee5L,0xb7ad28648837619aL,0xa3627231a778c57bL } }, + /* 24 */ + { { 0xae799b5c91361ed8L,0x47d71b756c63366cL,0x54cdd5211b265a6aL, + 0xe0215a5998d77b74L,0x4424d9b7bab29db0L,0x8b0ffacc7fd9e536L }, + { 0x46d85d1237b5d9efL,0x5b106d62bfa91747L,0xed0479f85f99ba2dL, + 0x0e6f39231d104de4L,0x83a84c8425e8983fL,0xa9507e0af8105a70L } }, + /* 25 */ + { { 0xf6c68a6e14cf381cL,0xaf9d27bdc22e31ccL,0x23568d4daa8a5ccbL, + 0xe431eec0e338e4d2L,0xf1a828fe8f52ad1fL,0xdb6a0579e86acd80L }, + { 0x2885672e4507832aL,0x73fc275f887e5289L,0x65f8027805610d08L, + 0x8d9b4554075ff5b0L,0x3a8e8fb109f712b5L,0x39f0ac862ebe9cf2L } }, + /* 26 */ + { { 0xd8fabf784c52edf5L,0xdcd737e5a589ae53L,0x94918bf0d791ab17L, + 0xb5fbd956bcff06c9L,0xf6d3032edca46d45L,0x2cdff7e141a3e486L }, + { 0x6674b3ba61f47ec8L,0x8a882163eef84608L,0xa257c7054c687f90L, + 0xe30cb2edf6cdf227L,0x2c4c64ca7f6ea846L,0x186fa17ccc6bcd3cL } }, + /* 27 */ + { { 0x48a3f5361dfcb91eL,0x83595e13646d358aL,0xbd15827b91128798L, + 0x3ce612b82187757aL,0x873150a161bd7372L,0xf4684530b662f568L }, + { 0x8833950b401896f6L,0xe11cb89a77f3e090L,0xb2f12cac48e7f4a5L, + 0x313dd769f606677eL,0xfdcf08b316579f93L,0x6429cec946b8f22bL } }, + /* 28 */ + { { 0x4984dd54bb75f9a4L,0x4aef06b929d3b570L,0xb5f84ca23d6e4c1eL, + 0x24c61c11b083ef35L,0xce4a7392392ca9ffL,0x865d65176730a800L }, + { 0xca3dfe76722b4a2bL,0x12c04bf97b083e0eL,0x803ce5b51b86b8a5L, + 0x3fc7632d6a7e3e0cL,0xc89970c2c81adbe4L,0x3cbcd3ad120e16b1L } }, + /* 29 */ + { { 0xfbfb4cc7ec30ce93L,0x10ed6c7db72720a2L,0xec675bf747b55500L, + 0x90725903333ff7c3L,0xc7c3973e5075bfc0L,0xb049ecb007acf31bL }, + { 0xb4076eaf4f58839cL,0x101896daa2b05e4fL,0x3f6033b0ab40c66eL, + 0x19ee9eebc8d864baL,0xeb6cf15547bf6d2aL,0x8e5a9663f826477dL } }, + /* 30 */ + { { 0x69e62fddf7fbd5e1L,0x38ecfe5476912b1dL,0x845a3d56d1da3bfbL, + 0x0494950e1c86f0d4L,0x83cadbf93bc36ce8L,0x41fce5724fccc8d1L }, + { 0x05f939c28332c144L,0xb17f248b0871e46eL,0x3d8534e266e8aff6L, + 0x1d06f1dc3b85c629L,0xdb06a32ea3131b73L,0xf295184d8b3f64e5L } }, + /* 31 */ + { { 0xd9653ff736ddc103L,0x25f43e3795ef606fL,0x09e301fcfe06dce8L, + 0x85af234130b6eebfL,0x79b12b530ff56b20L,0x9b4fb499fe9a3c6bL }, + { 0x0154f89251d27ac2L,0xd33167e356ca5389L,0x7828ec1fafc065a6L, + 0x0959a2587f746c9bL,0xb18f1be30c44f837L,0xa7946117c4132fdbL } }, + /* 32 */ + { { 0xc0426b775e3c647bL,0xbfcbd9398cf05348L,0x31d312e3172c0d3dL, + 0x5f49fde6ee754737L,0x895530f06da7ee61L,0xcf281b0ae8b3a5fbL }, + { 0xfd14973541b8a543L,0x41a625a73080dd30L,0xe2baae07653908cfL, + 0xc3d01436ba02a278L,0xa0d0222e7b21b8f8L,0xfdc270e9d7ec1297L } }, + /* 33 */ + { { 0x00873c0cbc7f41d6L,0xd976113e1b7ad641L,0x2a536ff4238443fbL, + 0x030d00e241e62e45L,0x532e98675f545fc6L,0xcd0331088e91208cL }, + { 0xd1a04c999797612cL,0xd4393e02eea674e2L,0xd56fa69ee19742a1L, + 0xdd2ab48085f0590eL,0xa5cefc5248a2243dL,0x48cc67b654383f41L } }, + /* 34 */ + { { 0x4e50430efc14ab48L,0x195b7f4f26706a74L,0x2fe8a228cc881ff6L, + 0xb1b968e2d945013dL,0x936aa5794b92162bL,0x4fb766b7364e754aL }, + { 0x13f93bca31e1ff7fL,0x696eb5cace4f2691L,0xff754bf8a2b09e02L, + 0x58f13c9ce58e3ff8L,0xb757346f1678c0b0L,0xd54200dba86692b3L } }, + /* 35 */ + { { 0x9a030bbd6dda1265L,0xf7b4f3fce89718ddL,0xa6a4931f936065b8L, + 0xbce72d875f72241cL,0x6cbb51cb65775857L,0xc71618154e993675L }, + { 0xe81a0f792ee32189L,0xef2fab26277dc0b2L,0x9e64f6feb71f469fL, + 0xb448ce33dfdaf859L,0x3f5c1c4cbe6b5df1L,0xfb8dfb001de45f7bL } }, + /* 36 */ + { { 0xc7345fa74d5bb921L,0x5c7e04be4d2b667eL,0x47ed3a80282d7a3eL, + 0x5c2777f87e47b2a4L,0x89b3b10008488e2eL,0x9aad77c2b2eb5b45L }, + { 0xd681bca7daac34aeL,0x2452e4e526afb326L,0x0c88792441a1ee14L, + 0x743b04d4c2407adeL,0xcb5e999bfc17a2acL,0x4dca2f824a701a06L } }, + /* 37 */ + { { 0x68e31ca61127bc1aL,0xa3edd59b17ead3beL,0x67b6b645e25f5a15L, + 0x76221794a420e15eL,0x794fd83b4b1e872eL,0x7cab3f03b2dece1bL }, + { 0x7119bf15ca9b3586L,0xa55459244d250bd7L,0x173633eacc6bcf24L, + 0x9bd308c2b1b6f884L,0x3bae06f5447d38c3L,0x54dcc135f341fe1cL } }, + /* 38 */ + { { 0x56d3598d943caf0dL,0xce044ea9225ff133L,0x9edf6a7c563fadeaL, + 0x632eb94473e8dc27L,0x814b467e3190dcabL,0x2d4f4f316dbb1e31L }, + { 0x8d69811ca143b7caL,0x4ec1ac32de7cf950L,0x223ab5fd37b5fe82L, + 0xe82616e49390f1d9L,0xabff4b2075804610L,0x11b9be15875b08f0L } }, + /* 39 */ + { { 0x4ae31a3d3bbe682cL,0xbc7c5d2674eef2ddL,0x92afd10a3c47dd40L, + 0xec7e0a3bc14ab9e1L,0x6a6c3dd1b2e495e4L,0x085ee5e9309bcd85L }, + { 0xf381a9088c2e67fdL,0x32083a80e261eaf2L,0x0fcd6a4996deee15L, + 0xe3b8fb035e524c79L,0x8dc360d91d5b08b9L,0x3a06e2c87f26719fL } }, + /* 40 */ + { { 0x5cd9f5a87237cac0L,0x93f0b59d43586794L,0x4384a764e94f6c4eL, + 0x8304ed2bb62782d3L,0x0b8db8b3cde06015L,0x4336dd535dbe190fL }, + { 0x5744355392ab473aL,0x031c7275be5ed046L,0x3e78678c21909aa4L, + 0x4ab7e04f99202ddbL,0x2648d2066977e635L,0xd427d184093198beL } }, + /* 41 */ + { { 0x822848f50f9b5a31L,0xbb003468baadb62aL,0x233a04723357559cL, + 0x49ef688079aee843L,0xa89867a0aeb9e1e3L,0xc151931b1f6f9a55L }, + { 0xd264eb0bad74251eL,0x37b9b2634abf295eL,0xb600921b04960d10L, + 0x0de53dbc4da77dc0L,0x01d9bab3d2b18697L,0xad54ec7af7156ddfL } }, + /* 42 */ + { { 0x8e74dc3579efdc58L,0x456bd3694ff68ddbL,0x724e74ccd32096a5L, + 0xe41cff42386783d0L,0xa04c7f217c70d8a4L,0x41199d2fe61a19a2L }, + { 0xd389a3e029c05dd2L,0x535f2a6be7e3fda9L,0x26ecf72d7c2b4df8L, + 0x678275f4fe745294L,0x6319c9cc9d23f519L,0x1e05a02d88048fc4L } }, + /* 43 */ + { { 0x75cc8e2ed4d5ffe8L,0xf8bb4896dbea17f2L,0x35059790cee3cb4aL, + 0x4c06ee85a47c6165L,0xf98fff2592935d2fL,0x34c4a57232ffd7c7L }, + { 0xc4b14806ea0376a2L,0x2ea5e7504f115e02L,0x532d76e21e55d7c0L, + 0x68dc9411f31044daL,0x9272e46571b77993L,0xadaa38bb93a8cfd5L } }, + /* 44 */ + { { 0x4bf0c7127d4ed72aL,0xda0e9264ba1f79a3L,0x48c0258bf4c39ea4L, + 0xa5394ed82a715138L,0x4af511cebf06c660L,0xfcebceefec5c37cdL }, + { 0xf23b75aa779ae8c1L,0xdeff59ccad1e606eL,0xf3f526fd22755c82L, + 0x64c5ab44bb32cefdL,0xa96e11a2915bdefdL,0xab19746a1143813eL } }, + /* 45 */ + { { 0x43c78585ec837d7dL,0xca5b6fbcb8ee0ba4L,0x34e924d9d5dbb5eeL, + 0x3f4fa104bb4f1ca5L,0x15458b72398640f7L,0x4231faa9d7f407eaL }, + { 0x53e0661ef96e6896L,0x554e4c69d03b0f9dL,0xd4fcb07b9c7858d1L, + 0x7e95279352cb04faL,0x5f5f15748974e7f7L,0x2e3fa5586b6d57c8L } }, + /* 46 */ + { { 0x42cd48036a9951a8L,0xa8b15b8842792ad0L,0x18e8bcf9abb29a73L, + 0xbfd9a092409933e8L,0x760a3594efb88dc4L,0x1441886340724458L }, + { 0x162a56ee99caedc7L,0x8fb12ecd91d101c9L,0xea671967393202daL, + 0x1aac8c4aa4ccd796L,0x7db050361cf185a8L,0x0c9f86cd8cfd095aL } }, + /* 47 */ + { { 0x9a72814710b2a556L,0x767ca964327b70b2L,0x04ed9e125e3799b7L, + 0x6781d2dc22a3eb2aL,0x5bd116eb0d9450acL,0xeccac1fca7ebe08aL }, + { 0xde68444fdc2d6e94L,0x3621f42935ecf21bL,0x14e2d54329e03a2cL, + 0x53e42cd57d3e7f0aL,0xbba26c0973ed00b9L,0x00297c39c57d2272L } }, + /* 48 */ + { { 0x3aaaab10b8243a7dL,0x6eeef93e8fa58c5bL,0xf866fca39ae7f764L, + 0x64105a2661ab04d3L,0xa3578d8a03945d66L,0xb08cd3e4791b848cL }, + { 0x45edc5f8756d2411L,0xd4a790d9a755128cL,0xc2cf096349e5f6a0L, + 0xc66d267df649beaaL,0x3ce6d9688467039eL,0x50046c6b42f7816fL } }, + /* 49 */ + { { 0x92ae160266425043L,0x1ff66afdf08db890L,0x386f5a7f8f162ce5L, + 0x18d2dea0fcf5598fL,0x78372b3a1a8ca18eL,0xdf0d20eb8cd0e6f7L }, + { 0x7edd5e1d75bb4045L,0x252a47ceb96d94b7L,0xbdb293582c626776L, + 0x853c394340dd1031L,0x9dc9becf7d5f47fdL,0x27c2302fbae4044aL } }, + /* 50 */ + { { 0x2d1d208a8f2d49ceL,0x0d91aa02162df0a2L,0x9c5cce8709a07f65L, + 0xdf07238b84339012L,0x5028e2c8419442cdL,0x2dcbd35872062abaL }, + { 0xb5fbc3cbe4680967L,0x2a7bc6459f92d72cL,0x806c76e1116c369dL, + 0x5c50677a3177e8d8L,0x753739eb4569df57L,0x2d481ef636c3f40bL } }, + /* 51 */ + { { 0x1a2d39fdfea1103eL,0xeaae559295f81b17L,0xdbd0aa18f59b264aL, + 0x90c39c1acb592ee0L,0xdf62f80d9750cca3L,0xda4d8283df97cc6cL }, + { 0x0a6dd3461e201067L,0x1531f85969fb1f6bL,0x4895e5521d60121fL, + 0x0b21aab04c041c91L,0x9d896c46bcc1ccf8L,0xd24da3b33141bde7L } }, + /* 52 */ + { { 0x575a053753b0a354L,0x392ff2f40c6ddcd8L,0x0b8e8cff56157b94L, + 0x073e57bd3b1b80d1L,0x2a75e0f03fedee15L,0x752380e4aa8e6f19L }, + { 0x1f4e227c6558ffe9L,0x3a34861819ec5415L,0xab382d5ef7997085L, + 0x5e6deaffddc46ac2L,0xe5144078fc8d094cL,0xf674fe51f60e37c6L } }, + /* 53 */ + { { 0x6fb87ae5af63408fL,0xa39c36a9cd75a737L,0x7833313fcf4c618dL, + 0xfbcd4482f034c88dL,0x4469a76139b35288L,0x77a711c566b5d9c9L }, + { 0x4a695dc7944f8d65L,0xe6da5f65161aaba8L,0x8654e9c324601669L, + 0xbc8b93f528ae7491L,0x5f1d1e838f5580d8L,0x8ccf9a1acea32cc8L } }, + /* 54 */ + { { 0x28ab110c7196fee2L,0x75799d63874c8945L,0xa262934829aedaddL, + 0x9714cc7b2be88ff4L,0xf71293cfd58d60d6L,0xda6b6cb332a564e9L }, + { 0xf43fddb13dd821c2L,0xf2f2785f90dd323dL,0x91246419048489f8L, + 0x61660f26d24c6749L,0x961d9e8cc803c15cL,0x631c6158faadc4c9L } }, + /* 55 */ + { { 0xacf2ebe0fd752366L,0xb93c340e139be88bL,0x98f664850f20179eL, + 0x14820254ff1da785L,0x5278e2764f85c16eL,0xa246ee457aab1913L }, + { 0x43861eb453763b33L,0xc49f03fc45c0bc0dL,0xafff16bcad6b1ea1L, + 0xce33908b6fd49c99L,0x5c51e9bff7fde8c3L,0x076a7a39ff142c5eL } }, + /* 56 */ + { { 0x04639dfe9e338d10L,0x8ee6996ff42b411bL,0x960461d1a875cef2L, + 0x1057b6d695b4d0baL,0x27639252a906e0bcL,0x2c19f09ae1c20f8aL }, + { 0x5b8fc3f0eef4c43dL,0xe2e1b1a807a84aa9L,0x5f455528835d2bdbL, + 0x0f4aee4d207132ddL,0xe9f8338c3907f675L,0x7a874dc90e0531f0L } }, + /* 57 */ + { { 0x84b22d4597c27050L,0xbd0b8df759e70bf8L,0xb4d6740579738b9bL, + 0x47f4d5f5cd917c4fL,0x9099c4ce13ce6e33L,0x942bfd39521d0f8bL }, + { 0x5028f0f6a43b566dL,0xaf6e866921bff7deL,0x83f6f856c44232cdL, + 0x65680579f915069aL,0xd12095a2ecfecb85L,0xcf7f06aedb01ba16L } }, + /* 58 */ + { { 0x0f56e3c48ef96c80L,0xd521f2b33ddb609cL,0x2be941027dc1450dL, + 0x2d21a07102a91fe2L,0x2e6f74fa1efa37deL,0x9a9a90b8156c28a1L }, + { 0xc54ea9ea9dc7dfcbL,0xc74e66fc2c2c1d62L,0x9f23f96749d3e067L, + 0x1c7c3a4654dd38adL,0xc70058845946cee3L,0x8985636845cc045dL } }, + /* 59 */ + { { 0x29da7cd4fce73946L,0x8f697db523168563L,0x8e235e9ccba92ec6L, + 0x55d4655f9f91d3eaL,0xf3689f23aa50a6cdL,0xdcf21c2621e6a1a0L }, + { 0xcffbc82e61b818bfL,0xc74a2f96da47a243L,0x234e980a8bc1a0cfL, + 0xf35fd6b57929cb6dL,0x81468e12efe17d6cL,0xddea6ae558b2dafbL } }, + /* 60 */ + { { 0x294de8877e787b2eL,0x258acc1f39a9310dL,0x92d9714aac14265dL, + 0x18b5591c708b48a0L,0x27cc6bb0e1abbf71L,0xc0581fa3568307b9L }, + { 0x9e0f58a3f24d4d58L,0xfebe9bb8e0ce2327L,0x91fd6a419d1be702L, + 0x9a7d8a45facac993L,0xabc0a08c9e50d66dL,0x02c342f706498201L } }, + /* 61 */ + { { 0xccd71407157bdbc2L,0x72fa89c6ad0e1605L,0xb1d3da2bb92a015fL, + 0x8ad9e7cda0a3fe56L,0x160edcbd24f06737L,0x79d4db3361275be6L }, + { 0xd3d31fd95f3497c4L,0x8cafeaee04192fb0L,0xe13ca74513a50af3L, + 0x188261678c85aae5L,0xce06cea89eb556ffL,0x2eef1995bdb549f3L } }, + /* 62 */ + { { 0x8ed7d3eb50596edcL,0xaa359362905243a2L,0xa212c2c2a4b6d02bL, + 0x611fd727c4fbec68L,0x8a0b8ff7b84f733dL,0xd85a6b905f0daf0eL }, + { 0x60e899f5d4091cf7L,0x4fef2b672eff2768L,0xc1f195cb10c33964L, + 0x8275d36993626a8fL,0xc77904f40d6c840aL,0x88d8b7fd7a868acdL } }, + /* 63 */ + { { 0x85f237237bd98425L,0xd4463992c70b154eL,0xcbb00ee296687a2eL, + 0x905fdbf7c83214fdL,0x2019d29313593684L,0x0428c393ef51218eL }, + { 0x40c7623f981e909aL,0x925133857be192daL,0x48fe480f4010907eL, + 0xdd7a187c3120b459L,0xc9d7702da1fd8f3cL,0x66e4753be358efc5L } }, + /* 64 */ + { { 0x070d34e116973cf4L,0x20aee08b7e4f34f7L,0x269af9b95eb8ad29L, + 0xdde0a036a6a45ddaL,0xa18b528e63df41e0L,0x03cc71b2a260df2aL }, + { 0x24a6770aa06b1dd7L,0x5bfa9c119d2675d3L,0x73c1e2a196844432L, + 0x3660558d131a6cf0L,0xb0289c832ee79454L,0xa6aefb01c6d8ddcdL } }, + /* 65 */ + { { 0xba1464b401ab5245L,0x9b8d0b6dc48d93ffL,0x939867dc93ad272cL, + 0xbebe085eae9fdc77L,0x73ae5103894ea8bdL,0x740fc89a39ac22e1L }, + { 0x5e28b0a328e23b23L,0x2352722ee13104d0L,0xf4667a18b0a2640dL, + 0xac74a72e49bb37c3L,0x79f734f0e81e183aL,0xbffe5b6c3fd9c0ebL } }, + /* 66 */ + { { 0xb1a358f5c6a2123fL,0x927b2d95fe28df6dL,0x89702753f199d2f9L, + 0x0a73754c1a3f82dcL,0x063d029d777affe1L,0x5439817edae6d34dL }, + { 0xf7979eef6b8b83c4L,0x615cb2149d945682L,0x8f0e4facc5e57eaeL, + 0x042b89b8113047ddL,0x888356dc93f36508L,0xbf008d185fd1f32fL } }, + /* 67 */ + { { 0x8012aa244e8068dbL,0xc72cc641a5729a47L,0x3c33df2c43f0691dL, + 0xfa0573471d92145fL,0xaefc0f2fb97f7946L,0x813d75cb2f8121bfL }, + { 0x05613c724383bba6L,0xa924ce70a4224b3fL,0xe59cecbe5f2179a6L, + 0x78e2e8aa79f62b61L,0x3ac2cc3b53ad8079L,0x55518d71d8f4fa96L } }, + /* 68 */ + { { 0x03cf292200623f3bL,0x095c71115f29ebffL,0x42d7224780aa6823L, + 0x044c7ba17458c0b0L,0xca62f7ef0959ec20L,0x40ae2ab7f8ca929fL }, + { 0xb8c5377aa927b102L,0x398a86a0dc031771L,0x04908f9dc216a406L, + 0xb423a73a918d3300L,0x634b0ff1e0b94739L,0xe29de7252d69f697L } }, + /* 69 */ + { { 0x744d14008435af04L,0x5f255b1dfec192daL,0x1f17dc12336dc542L, + 0x5c90c2a7636a68a8L,0x960c9eb77704ca1eL,0x9de8cf1e6fb3d65aL }, + { 0xc60fee0d511d3d06L,0x466e2313f9eb52c7L,0x743c0f5f206b0914L, + 0x42f55bac2191aa4dL,0xcefc7c8fffebdbc2L,0xd4fa6081e6e8ed1cL } }, + /* 70 */ + { { 0xb5e405d3b0ab9645L,0xaeec7f98d5f1f711L,0x8ad42311585c2a6eL, + 0x045acb9e512c6944L,0xae106c4ea90db1c6L,0xb89f33d5898e6563L }, + { 0x43b07cd97fed2ce4L,0xf9934e17dd815b20L,0x6778d4d50a81a349L, + 0x9e616ade52918061L,0xfa06db06d7e67112L,0x1da23cf188488091L } }, + /* 71 */ + { { 0x821c46b342f2c4b5L,0x931513ef66059e47L,0x7030ae4366f50cd1L, + 0x43b536c943e7b127L,0x006258cf5fca5360L,0xe4e3ee796b557abfL }, + { 0xbb6b390024c8b22fL,0x2eb5e2c1fcbf1054L,0x937b18c9567492afL, + 0xf09432e4acf53957L,0x585f5a9d1dbf3a56L,0xf86751fdbe0887cfL } }, + /* 72 */ + { { 0x157399cb9d10e0b2L,0x1c0d595660dc51b7L,0x1d496b8a1f583090L, + 0x6658bc2688590484L,0x88c08ab703213f28L,0x8d2e0f737ae58de4L }, + { 0x9b79bc95486cfee6L,0x036a26c7e9e5bc57L,0x1ad03601cd8ae97aL, + 0x06907f87ff3a0494L,0x078f4bbf2c7eb584L,0xe3731bf57e8d0a5aL } }, + /* 73 */ + { { 0x72f2282be1cd0abeL,0xd4f9015e87efefa2L,0x9d1898066c3834bdL, + 0x9c8cdcc1b8a29cedL,0x0601b9f4fee82ebcL,0x371052bc7206a756L }, + { 0x76fa109246f32562L,0xdaad534c17351bb4L,0xc3d64c37b3636bb5L, + 0x038a8c5145d54e00L,0x301e618032c09e7cL,0x9764eae795735151L } }, + /* 74 */ + { { 0x8791b19fcbd5256aL,0x4007e0f26ca13a3bL,0x03b794604cf06904L, + 0xb18a9c22b6c17589L,0xa1cb7d7d81d45908L,0x6e13fa9d21bb68f1L }, + { 0x47183c62a71e6e16L,0x5cf0ef8ee18749edL,0x2c9c7f9b2e5ed409L, + 0x042eeacce6e117e1L,0xb86d481613fb5a7fL,0xea1cf0edc9e5feb1L } }, + /* 75 */ + { { 0x6e6573c9cea4cc9bL,0x5417961dafcec8f3L,0x804bf02aa438b6f6L, + 0xb894b03cdcd4ea88L,0xd0f807e93799571fL,0x3466a7f5862156e8L }, + { 0x51e59acd56515664L,0x55b0f93ca3c5eb0bL,0x84a06b026a4279dbL, + 0x5c850579c5fae08eL,0xcf07b8dba663a1a2L,0x49a36bbcf46ffc8dL } }, + /* 76 */ + { { 0xe47f5acc46d93106L,0x65b7ade0aa897c9cL,0x37cf4c9412d7e4beL, + 0xa2ae9b80d4b2caa9L,0x5e7ce09ce60357a3L,0x29f77667c8ecd5f9L }, + { 0xdf6868f5a8a0b1c5L,0x240858cf62978ad8L,0x0f7ac101dc0002a1L, + 0x1d28a9d7ffe9aa05L,0x744984d65b962c97L,0xa8a7c00b3d28c8b2L } }, + /* 77 */ + { { 0x7c58a852ae11a338L,0xa78613f1d1af96e7L,0x7e9767d25355cc73L, + 0x6ba37009792a2de6L,0x7d60f618124386b2L,0xab09b53111157674L }, + { 0x95a0484198eb9dd0L,0xe6c17acc15070328L,0xafc6da45489c6e49L, + 0xab45a60abb211530L,0xc58d65927d7ea933L,0xa3ef3c65095642c6L } }, + /* 78 */ + { { 0x89d420e9df010879L,0x9d25255d39576179L,0x9cdefd50e39513b6L, + 0xe4efe45bd5d1c313L,0xc0149de73f7af771L,0x55a6b4f4340ab06bL }, + { 0xf1325251ebeaf771L,0x2ab44128878d4288L,0xfcd5832e18e05afeL, + 0xef52a348cc1fb62bL,0x2bd08274c1c4792aL,0x345c5846877c6dc7L } }, + /* 79 */ + { { 0xde15ceb0bea65e90L,0x0987f72b2416d99cL,0x44db578dfd863decL, + 0xf617b74bac6a3578L,0x9e62bd7adb48e999L,0x877cae61eab1a1beL }, + { 0x23adddaa3a358610L,0x2fc4d6d1325e2b07L,0x897198f51585754eL, + 0xf741852cb392b584L,0x9927804cb55f7de1L,0xe9e6c4ed1aa8efaeL } }, + /* 80 */ + { { 0x867db63998683186L,0xfb5cf424ddcc4ea9L,0xcc9a7ffed4f0e7bdL, + 0x7c57f71c7a779f7eL,0x90774079d6b25ef2L,0x90eae903b4081680L }, + { 0xdf2aae5e0ee1fcebL,0x3ff1da24e86c1a1fL,0x80f587d6ca193edfL, + 0xa5695523dc9b9d6aL,0x7b84090085920303L,0x1efa4dfcba6dbdefL } }, + /* 81 */ + { { 0xfbd838f9e0540015L,0x2c323946c39077dcL,0x8b1fb9e6ad619124L, + 0x9612440c0ca62ea8L,0x9ad9b52c2dbe00ffL,0xf52abaa1ae197643L }, + { 0xd0e898942cac32adL,0xdfb79e4262a98f91L,0x65452ecf276f55cbL, + 0xdb1ac0d27ad23e12L,0xf68c5f6ade4986f0L,0x389ac37b82ce327dL } }, + /* 82 */ + { { 0x511188b4f8e60f5bL,0x7fe6701548aa2adaL,0xdb333cb8381abca2L, + 0xb15e6d9ddaf3fc97L,0x4b24f6eb36aabc03L,0xc59789df72a748b4L }, + { 0x26fcb8a529cf5279L,0x7a3c6bfc01ad9a6cL,0x866cf88d4b8bac9bL, + 0xf4c899899c80d041L,0xf0a0424170add148L,0x5a02f47945d81a41L } }, + /* 83 */ + { { 0xfa5c877cc1c90202L,0xd099d440f8ac7570L,0x428a5b1bd17881f7L, + 0x61e267db5b2501d7L,0xf889bf04f2e4465bL,0x4da3ae0876aa4cb8L }, + { 0x3ef0fe26e3e66861L,0x5e7729533318b86dL,0xc3c35fbc747396dfL, + 0x5115a29c439ffd37L,0xbfc4bd97b2d70374L,0x088630ea56246b9dL } }, + /* 84 */ + { { 0xcd96866db8a9e8c9L,0xa11963b85bb8091eL,0xc7f90d53045b3cd2L, + 0x755a72b580f36504L,0x46f8b39921d3751cL,0x4bffdc9153c193deL }, + { 0xcd15c049b89554e7L,0x353c6754f7a26be6L,0x79602370bd41d970L, + 0xde16470b12b176c0L,0x56ba117540c8809dL,0xe2db35c3e435fb1eL } }, + /* 85 */ + { { 0xd71e4aab6328e33fL,0x5486782baf8136d1L,0x07a4995f86d57231L, + 0xf1f0a5bd1651a968L,0xa5dc5b2476803b6dL,0x5c587cbc42dda935L }, + { 0x2b6cdb32bae8b4c0L,0x66d1598bb1331138L,0x4a23b2d25d7e9614L, + 0x93e402a674a8c05dL,0x45ac94e6da7ce82eL,0xeb9f8281e463d465L } }, + /* 86 */ + { { 0x34e0f9d1fecf5b9bL,0xa115b12bf206966aL,0x5591cf3b1eaa0534L, + 0x5f0293cbfb1558f9L,0x1c8507a41bc703a5L,0x92e6b81c862c1f81L }, + { 0xcc9ebc66cdaf24e3L,0x68917ecd72fcfc70L,0x6dc9a9308157ba48L, + 0x5d425c08b06ab2b2L,0x362f8ce736e929c4L,0x09f6f57c62e89324L } }, + /* 87 */ + { { 0x1c7d6b78d29375fbL,0xfabd851ee35d1157L,0xf6f62dcd4243ea47L, + 0x1dd924608fe30b0fL,0x08166dfaffc6e709L,0xc6c4c6930881e6a7L }, + { 0x20368f87d6a53fb0L,0x38718e9f9eb4d1f9L,0x03f08acdafd7e790L, + 0x0835eb4472fe2a1cL,0x7e05090388076e5dL,0x538f765ea638e731L } }, + /* 88 */ + { { 0x0e0249d9c2663b4bL,0xe700ab5b47cd38ddL,0xb192559d2c46559fL, + 0x8f9f74a84bcde66dL,0xad1615233e2aced5L,0xc155c0473dd03a5bL }, + { 0x346a87993be454ebL,0x66ee94db83b7dccdL,0x1f6d8378ab9d2abeL, + 0x4a396dd27733f355L,0x419bd40af53553c2L,0xd0ead98d731dd943L } }, + /* 89 */ + { { 0x908e0b0eec142408L,0x98943cb94114b310L,0x03dbf7d81742b1d7L, + 0xd270df6b693412f4L,0xc50654948f69e20cL,0xa76a90c3697e43a1L }, + { 0xe0fa33844624825aL,0x82e48c0b8acc34c2L,0x7b24bd14e9a14f2bL, + 0x4f5dd5e24db30803L,0x0c77a9e7932da0a3L,0x20db90f274c653dcL } }, + /* 90 */ + { { 0x261179b70e6c5fd9L,0xf8bec1236c982eeaL,0x47683338d4957b7eL, + 0xcc47e6640a72f66aL,0xbd54bf6a1bad9350L,0xdfbf4c6af454e95aL }, + { 0x3f7a7afa6907f4faL,0x7311fae0865ca735L,0x24737ab82a496adaL, + 0x13e425f115feb79bL,0xe9e97c50a1b93c21L,0xb26b6eac4ddd3eb5L } }, + /* 91 */ + { { 0x81cab9f52a2e5f2bL,0xf93caf29bf385ac4L,0xf4bf35c3c909963aL, + 0x081e730074c9143cL,0x3ea57fa8c281b4c5L,0xe497905c9b340741L }, + { 0xf556dd8a55ab3cfbL,0xd444b96b518db6adL,0x34f5425a5ef4b955L, + 0xdda7a3acecd26aa3L,0xb57da11bda655e97L,0x02da3effc2024c70L } }, + /* 92 */ + { { 0xe24b00366481d0d9L,0x3740dbe5818fdfe2L,0xc1fc1f45190fda00L, + 0x329c92803cf27fdeL,0x7435cb536934f43eL,0x2b505a5d7884e8feL }, + { 0x6cfcc6a6711adcc9L,0xf034325c531e21e1L,0xa2f4a9679b2a8a99L, + 0x9d5f38423c21bdffL,0xb25c781131b57d66L,0xdb5344d80b8093b9L } }, + /* 93 */ + { { 0x0d72e667ae50a2f5L,0x9b7f8d8ae4a861d1L,0xa129f70f330df1cbL, + 0xe90aa5d7e04fefc3L,0xff561ecbe72c3ae1L,0x0d8fb428cdb955faL }, + { 0xd2235f73d7663784L,0xc05baec67e2c456aL,0xe5c292e42adbfcccL, + 0x4fd17988efb110d5L,0x27e57734d19d49f3L,0x188ac4ce84f679feL } }, + /* 94 */ + { { 0x7ee344cfa796c53eL,0xbbf6074d0868009bL,0x1f1594f7474a1295L, + 0x66776edcac11632dL,0x1862278b04e2fa5aL,0x52665cf2c854a89aL }, + { 0x7e3764648104ab58L,0x167759137204fd6dL,0x86ca06a544ea1199L, + 0xaa3f765b1c9240ddL,0x5f8501a924746149L,0x7b982e30dcd251d7L } }, + /* 95 */ + { { 0xe44e9efcc15f3060L,0x5ad62f2ea87ebbe6L,0x36499d41c79500d4L, + 0xa66d6dc0336fa9d1L,0xf8afc4955afd3b1fL,0x1d8ccb24e5c9822bL }, + { 0x4031422b79d7584bL,0xc54a0580ea3f20ddL,0x3f837c8f958468c5L, + 0x3d82f110fbea7735L,0x679a87787dffe2fcL,0x48eba63b20704803L } }, + /* 96 */ + { { 0x89b10d41df46e2f6L,0x13ab57f819514367L,0x067372b91d469c87L, + 0x0c195afa4f6c5798L,0xea43a12a272c9acfL,0x9dadd8cb678abdacL }, + { 0xcce56c6be182579aL,0x86febadb2d26c2d8L,0x1c668ee12a44745cL, + 0x580acd8698dc047aL,0x5a2b79cc51b9ec2dL,0x007da6084054f6a0L } }, + /* 97 */ + { { 0x9e3ca35217b00dd0L,0x046779cb0e81a7a6L,0xb999fef3d482d871L, + 0xe6f38134d9233fbcL,0x112c3001f48cd0e0L,0x934e75763c6c66aeL }, + { 0xb44d4fc3d73234dcL,0xfcae2062864eafc1L,0x843afe2526bef21aL, + 0x61355107f3b75fdfL,0x8367a5aa794c2e6bL,0x3d2629b18548a372L } }, + /* 98 */ + { { 0x6230618f437cfaf8L,0x5b8742cb2032c299L,0x949f72472293643aL, + 0xb8040f1a09464f79L,0x049462d24f254143L,0xabd6b522366c7e76L }, + { 0x119b392bd5338f55L,0x1a80a9ce01495a0cL,0xf3118ca7f8d7537eL, + 0xb715adc26bf4b762L,0x24506165a8482b6cL,0xd958d7c696a7c84dL } }, + /* 99 */ + { { 0x9ad8aa87bdc21f31L,0xadb3cab48063e58cL,0xefd86283b07dd7b8L, + 0xc7b9b7621be7c6b4L,0x2ef58741015582deL,0xc970c52e299addf3L }, + { 0x78f02e2a22f24d66L,0xefec1d1074cc100aL,0xaf2a6a3909316e1aL, + 0xce7c22055849dd49L,0x9c1fe75c96bffc4cL,0xcad98fd27ba06ec0L } }, + /* 100 */ + { { 0xed76e2d0b648b73eL,0xa9f92ce51cfd285eL,0xa8c86c062ed13de1L, + 0x1d3a574ea5191a93L,0x385cdf8b1ad1b8bfL,0xbbecc28a47d2cfe3L }, + { 0x98d326c069cec548L,0x4f5bc1ddf240a0b2L,0x241a706229057236L, + 0x0fc6e9c5c68294a4L,0x4d04838ba319f17aL,0x8b612cf19ffc1c6fL } }, + /* 101 */ + { { 0x9bb0b5014c3830ebL,0x3d08f83c8ee0d0c5L,0xa4a6264279ba9389L, + 0x5d5d40449cbc2914L,0xae9eb83e074c46f0L,0x63bb758f74ead7d6L }, + { 0x1c40d2eac6bb29e0L,0x95aa2d874b02f41eL,0x9298917553cb199aL, + 0xdd91bafe51584f6dL,0x3715efb931a1aaecL,0xc1b6ae5b46780f9eL } }, + /* 102 */ + { { 0xcded3e4b42772f41L,0x3a700d5d3bcb79d1L,0x4430d50e80feee60L, + 0x444ef1fcf5e5d4bbL,0xc660194fe6e358ffL,0xe68a2f326a91b43cL }, + { 0x5842775c977fe4d2L,0x78fdef5c7e2a41ebL,0x5f3bec02ff8df00eL, + 0xf4b840cd5852525dL,0x0870483a4e6988bdL,0x39499e39cc64b837L } }, + /* 103 */ + { { 0xfc05de80b08df5feL,0x0c12957c63ba0362L,0xea379414d5cf1428L, + 0xc559132a54ef6216L,0x33d5f12fb9e65cf8L,0x09c602781695d663L }, + { 0x3ac1ced461f7a2fbL,0xdd838444d4f5eeb8L,0x82a38c6c8318fcadL, + 0x315be2e5e9f1a864L,0x317b5771442daf47L,0x81b5904a95aa5f9eL } }, + /* 104 */ + { { 0x6b6b1c508b21d232L,0x87f3dbc08c2cba75L,0xa7e74b46ae9f0fafL, + 0x036a0985bb7b8079L,0x4f185b908d974a25L,0x5aa7cef0d9af5ec9L }, + { 0xe0566a7057dcfffcL,0x6ea311dab8453225L,0x72ea1a8d23368aa9L, + 0xed9b208348cd552dL,0xb987967cc80ea435L,0xad735c756c104173L } }, + /* 105 */ + { { 0xaea85ab3cee76ef4L,0x44997444af1d2b93L,0x0851929beacb923fL, + 0xb080b59051e3bc0cL,0xc4ee1d8659be68a2L,0xf00de21964b26cdaL }, + { 0x8d7fb5c0f2e90d4dL,0x00e219a777d9ec64L,0xc4e6febd5d1c491cL, + 0x080e37541a8f4585L,0x4a9b86c848d2af9cL,0x2ed70db6b6679851L } }, + /* 106 */ + { { 0xaee44116586f25cbL,0xf7b6861fa0fcf70fL,0x55d2cd2018a350e8L, + 0x861bf3e592dc286fL,0x9ab18ffa6226aba7L,0xd15827bea9857b03L }, + { 0x26c1f54792e6acefL,0x422c63c8ac1fbac3L,0xa2d8760dfcbfd71dL, + 0x35f6a539b2511224L,0xbaa88fa1048d1a21L,0x49f1abe9ebf999dbL } }, + /* 107 */ + { { 0x16f9f4f4f7492b73L,0xcf28ec1ecb392b1aL,0x45b130d469ca6ffcL, + 0x28ba8d40b72efa58L,0xace987c75ca066f5L,0x3e3992464ad022ebL }, + { 0x63a2d84e752555bbL,0xaaa93b4a9c2ae394L,0xcd80424ec89539caL, + 0x6d6b5a6daa119a99L,0xbd50334c379f2629L,0x899e925eef3cc7d3L } }, + /* 108 */ + { { 0xb7ff3651bf825dc4L,0x0f741cc440b9c462L,0x771ff5a95cc4fb5bL, + 0xcb9e9c9b47fd56feL,0xbdf053db5626c0d3L,0xa97ce675f7e14098L }, + { 0x68afe5a36c934f5eL,0x6cd5e148ccefc46fL,0xc7758570d7a88586L, + 0x49978f5edd558d40L,0xa1d5088a64ae00c1L,0x58f2a720f1d65bb2L } }, + /* 109 */ + { { 0x66fdda4a3e4daedbL,0x38318c1265d1b052L,0x28d910a24c4bbf5cL, + 0x762fe5c478a9cd14L,0x08e5ebaad2cc0aeeL,0xd2cdf257ca0c654cL }, + { 0x48f7c58b08b717d2L,0x3807184a386cd07aL,0x3240f626ae7d0112L, + 0x03e9361bc43917b0L,0xf261a87620aea018L,0x53f556a47e1e6372L } }, + /* 110 */ + { { 0xc84cee562f512a90L,0x24b3c0041b0ea9f1L,0x0ee15d2de26cc1eaL, + 0xd848762cf0c9ef7dL,0x1026e9c5d5341435L,0x8f5b73dcfdb16b31L }, + { 0x1f69bef2d2c75d95L,0x8d33d581be064ddaL,0x8c024c1257ed35e6L, + 0xf8d435f9c309c281L,0xfd295061d6960193L,0x66618d78e9e49541L } }, + /* 111 */ + { { 0x571cfd458ce382deL,0x175806eede900ddeL,0x6184996534aba3b5L, + 0xe899778ade7aec95L,0xe8f00f6eff4aa97fL,0xae971cb5010b0c6dL }, + { 0x1827eebc3af788f1L,0xd46229ffe413fe2dL,0x8a15455b4741c9b4L, + 0x5f02e690f8e424ebL,0x40a1202edae87712L,0x49b3bda264944f6dL } }, + /* 112 */ + { { 0xd63c6067035b2d69L,0xb507150d6bed91b0L,0x1f35f82f7afb39b2L, + 0xb9bd9c0116012b66L,0x00d97960ed0a5f50L,0xed7054512716f7c9L }, + { 0x1576eff4127abdb4L,0x6850d698f01e701cL,0x9fa7d7493fc87e2fL, + 0x0b6bcc6fb0ce3e48L,0xf4fbe1f5f7d8c1c0L,0xcf75230e02719cc6L } }, + /* 113 */ + { { 0x6761d6c2722d94edL,0xd1ec3f213718820eL,0x65a40b7025d0e7c6L, + 0xd67f830ebaf3cf31L,0x633b3807b93ea430L,0x17faa0ea0bc96c69L }, + { 0xe6bf3482df866b98L,0x205c1ee9a9db52d4L,0x51ef9bbdff9ab869L, + 0x3863dad175eeb985L,0xef216c3bd3cf442aL,0x3fb228e3f9c8e321L } }, + /* 114 */ + { { 0x94f9b70c0760ac07L,0xf3c9ccae9d79bf4dL,0x73cea084c5ffc83dL, + 0xef50f943dc49c38eL,0xf467a2aebc9e7330L,0x5ee534b644ea7fbaL }, + { 0x20cb627203609e7fL,0x0984435562fdc9f0L,0xaf5c8e580f1457f7L, + 0xd1f50a6cb4b25941L,0x77cb247c2ec82395L,0xa5f3e1e5da3dca33L } }, + /* 115 */ + { { 0x023489d67d85fa94L,0x0ba405372db9ce47L,0x0fdf7a1faed7aad1L, + 0xa57b0d739a4ccb40L,0x48fcec995b18967cL,0xf30b5b6eb7274d24L }, + { 0x7ccb4773c81c5338L,0xb85639e6a3ed6bd0L,0x7d9df95f1d56eadaL, + 0xe256d57f0a1607adL,0x6da7ffdc957574d6L,0x65f8404601c7a8c4L } }, + /* 116 */ + { { 0x8d45d0cbcba1e7f1L,0xef0a08c002b55f64L,0x771ca31b17e19892L, + 0xe1843ecb4885907eL,0x67797ebc364ce16aL,0x816d2b2d8df4b338L }, + { 0xe870b0e539aa8671L,0x9f0db3e4c102b5f5L,0x342966591720c697L, + 0x0ad4c89e613c0d2aL,0x1af900b2418ddd61L,0xe087ca72d336e20eL } }, + /* 117 */ + { { 0x222831ffaba10079L,0x0dc5f87b6d64fff2L,0x445479073e8cb330L, + 0xe815aaa2702a33fbL,0x338d6b2e5fba3215L,0x0f7535cb79f549c8L }, + { 0x471ecd972ee95923L,0x1e868b37c6d1c09fL,0x2bc7b8ecc666ef4eL, + 0xf5416589808a4bfcL,0xf23e9ee23fbc4d2eL,0x4357236c2d75125bL } }, + /* 118 */ + { { 0xfe176d95ba9cdb1bL,0x45a1ca012f82791eL,0x97654af24de4cca2L, + 0xbdbf9d0e5cc4bcb9L,0xf6a7df50ad97ac0aL,0xc52112b061359fd6L }, + { 0x696d9ce34f05eae3L,0x903adc02e943ac2bL,0xa90753470848be17L, + 0x1e20f1702a3973e5L,0xe1aacc1c6feb67e9L,0x2ca0ac32e16bc6b9L } }, + /* 119 */ + { { 0xffea12e4ef871eb5L,0x94c2f25da8bf0a7aL,0x4d1e4c2a78134eaaL, + 0x11ed16fb0360fb10L,0x4029b6db85fc11beL,0x5e9f7ab7f4d390faL }, + { 0x5076d72f30646612L,0xa0afed1ddda1d0d8L,0x2902225785a1d103L, + 0xcb499e174e276bcdL,0x16d1da7151246c3dL,0xc72d56d3589a0443L } }, + /* 120 */ + { { 0xdf5ffc74dae5bb45L,0x99068c4a261bd6dcL,0xdc0afa7aaa98ec7bL, + 0xedd2ee00f121e96dL,0x163cc7be1414045cL,0xb0b1bbce335af50eL }, + { 0xd440d78501a06293L,0xcdebab7c6552e644L,0x48cb8dbc8c757e46L, + 0x81f9cf783cabe3cbL,0xddd02611b123f59aL,0x3dc7b88eeeb3784dL } }, + /* 121 */ + { { 0xe1b8d398c4741456L,0xa9dfa9026032a121L,0x1cbfc86d1263245bL, + 0xf411c7625244718cL,0x96521d5405b0fc54L,0x1afab46edbaa4985L }, + { 0xa75902ba8674b4adL,0x486b43ad5ad87d12L,0x72b1c73636e0d099L, + 0x39890e07bb6cd6d6L,0x8128999c59bace4eL,0xd8da430b7b535e33L } }, + /* 122 */ + { { 0x39f65642c6b75791L,0x050947a621806bfbL,0x0ca3e3701362ef84L, + 0x9bc60aed8c3d2391L,0x9b488671732e1ddcL,0x12d10d9ea98ee077L }, + { 0xb6f2822d3651b7dcL,0x6345a5ba80abd138L,0x62033262472d3c84L, + 0xd54a1d40acc57527L,0x6ea46b3a424447cbL,0x5bc410572fb1a496L } }, + /* 123 */ + { { 0xe70c57a3a751cd0eL,0x190d8419eba3c7d6L,0xb1c3bee79d47d55aL, + 0xda941266f912c6d8L,0x12e9aacc407a6ad6L,0xd6ce5f116e838911L }, + { 0x063ca97b70e1f2ceL,0xa3e47c728213d434L,0xa016e24184df810aL, + 0x688ad7b0dfd881a4L,0xa37d99fca89bf0adL,0xd8e3f339a23c2d23L } }, + /* 124 */ + { { 0xbdf53163750bed6fL,0x808abc3283e68b0aL,0x85a366275bb08a33L, + 0xf72a3a0f6b0e4abeL,0xf7716d19faf0c6adL,0x22dcc0205379b25fL }, + { 0x7400bf8df9a56e11L,0x6cb8bad756a47f21L,0x7c97176f7a6eb644L, + 0xe8fd84f7d1f5b646L,0x98320a9444ddb054L,0x07071ba31dde86f5L } }, + /* 125 */ + { { 0x6fdfa0e598f8fcb9L,0x89cec8e094d0d70cL,0xa0899397106d20a8L, + 0x915bfb9aba8acc9cL,0x1370c94b5507e01cL,0x83246a608a821ffbL }, + { 0xa8273a9fbe3c378fL,0x7e54478935a25be9L,0x6cfa49724dd929d7L, + 0x987fed9d365bd878L,0x4982ac945c29a7aeL,0x4589a5d75ddd7ec5L } }, + /* 126 */ + { { 0x9fabb174a95540a9L,0x7cfb886f0162c5b0L,0x17be766bea3dee18L, + 0xff7da41fe88e624cL,0xad0b71eb8b919c38L,0x86a522e0f31ff9a9L }, + { 0xbc8e6f72868bc259L,0x6130c6383ccef9e4L,0x09f1f4549a466555L, + 0x8e6c0f0919b2bfb4L,0x945c46c90ca7bb22L,0xacd871684dafb67bL } }, + /* 127 */ + { { 0x090c72ca10c53841L,0xc20ae01b55a4fcedL,0x03f7ebd5e10234adL, + 0xb3f42a6a85892064L,0xbdbc30c0b4a14722L,0x971bc4378ca124ccL }, + { 0x6f79f46d517ff2ffL,0x6a9c96e2ecba947bL,0x5e79f2f462925122L, + 0x30a96bb16a4e91f1L,0x1147c9232d4c72daL,0x65bc311f5811e4dfL } }, + /* 128 */ + { { 0x87c7dd7d139b3239L,0x8b57824e4d833baeL,0xbcbc48789fff0015L, + 0x8ffcef8b909eaf1aL,0x9905f4eef1443a78L,0x020dd4a2e15cbfedL }, + { 0xca2969eca306d695L,0xdf940cadb93caf60L,0x67f7fab787ea6e39L, + 0x0d0ee10ff98c4fe5L,0xc646879ac19cb91eL,0x4b4ea50c7d1d7ab4L } }, + /* 129 */ + { { 0x19e409457a0db57eL,0xe6017cad9a8c9702L,0xdbf739e51be5cff9L, + 0x3646b3cda7a938a2L,0x0451108568350dfcL,0xad3bd6f356e098b5L }, + { 0x935ebabfee2e3e3eL,0xfbd01702473926cbL,0x7c735b029e9fb5aaL, + 0xc52a1b852e3feff0L,0x9199abd3046b405aL,0xe306fcec39039971L } }, + /* 130 */ + { { 0xd6d9aec823e4712cL,0x7ca8376cc3c198eeL,0xe6d8318731bebd8aL, + 0xed57aff3d88bfef3L,0x72a645eecf44edc7L,0xd4e63d0b5cbb1517L }, + { 0x98ce7a1cceee0ecfL,0x8f0126335383ee8eL,0x3b879078a6b455e8L, + 0xcbcd3d96c7658c06L,0x721d6fe70783336aL,0xf21a72635a677136L } }, + /* 131 */ + { { 0x19d8b3cd9586ba11L,0xd9e0aeb28a5c0480L,0xe4261dbf2230ef5cL, + 0x095a9dee02e6bf09L,0x8963723c80dc7784L,0x5c97dbaf145157b1L }, + { 0x97e744344bc4503eL,0x0fb1cb3185a6b370L,0x3e8df2becd205d4bL, + 0x497dd1bcf8f765daL,0x92ef95c76c988a1aL,0x3f924baa64dc4cfaL } }, + /* 132 */ + { { 0x6bf1b8dd7268b448L,0xd4c28ba1efd79b94L,0x2fa1f8c8e4e3551fL, + 0x769e3ad45c9187a9L,0x28843b4d40326c0dL,0xfefc809450d5d669L }, + { 0x30c85bfd90339366L,0x4eeb56f15ccf6c3aL,0x0e72b14928ccd1dcL, + 0x73ee85b5f2ce978eL,0xcdeb2bf33165bb23L,0x8106c9234e410abfL } }, + /* 133 */ + { { 0xc8df01617d02f4eeL,0x8a78154718e21225L,0x4ea895eb6acf9e40L, + 0x8b000cb56e5a633dL,0xf31d86d57e981ffbL,0xf5c8029c4475bc32L }, + { 0x764561ce1b568973L,0x2f809b81a62996ecL,0x9e513d64da085408L, + 0xc27d815de61ce309L,0x0da6ff99272999e0L,0xbd284779fead73f7L } }, + /* 134 */ + { { 0x6033c2f99b1cdf2bL,0x2a99cf06bc5fa151L,0x7d27d25912177b3bL, + 0xb1f15273c4485483L,0x5fd57d81102e2297L,0x3d43e017c7f6acb7L }, + { 0x41a8bb0b3a70eb28L,0x67de2d8e3e80b06bL,0x09245a4170c28de5L, + 0xad7dbcb1a7b26023L,0x70b08a352cbc6c1eL,0xb504fb669b33041fL } }, + /* 135 */ + { { 0xa8e85ab5f97a27c2L,0x6ac5ec8bc10a011bL,0x55745533ffbcf161L, + 0x01780e8565790a60L,0xe451bf8599ee75b0L,0x8907a63b39c29881L }, + { 0x76d46738260189edL,0x284a443647bd35cbL,0xd74e8c4020cab61eL, + 0x6264bf8c416cf20aL,0xfa5a6c955fd820ceL,0xfa7154d0f24bb5fcL } }, + /* 136 */ + { { 0x18482cec9b3f5034L,0x962d445acd9e68fdL,0x266fb1d695746f23L, + 0xc66ade5a58c94a4bL,0xdbbda826ed68a5b6L,0x05664a4d7ab0d6aeL }, + { 0xbcd4fe51025e32fcL,0x61a5aebfa96df252L,0xd88a07e231592a31L, + 0x5d9d94de98905517L,0x96bb40105fd440e7L,0x1b0c47a2e807db4cL } }, + /* 137 */ + { { 0x5c2a6ac808223878L,0xba08c269e65a5558L,0xd22b1b9b9bbc27fdL, + 0x919171bf72b9607dL,0x9ab455f9e588dc58L,0x6d54916e23662d93L }, + { 0x8da8e9383b1de0c1L,0xa84d186a804f278fL,0xbf4988ccd3461695L, + 0xf5eae3bee10eb0cbL,0x1ff8b68fbf2a66edL,0xa68daf67c305b570L } }, + /* 138 */ + { { 0xc1004cff44b2e045L,0x91b5e1364b1c05d4L,0x53ae409088a48a07L, + 0x73fb2995ea11bb1aL,0x320485703d93a4eaL,0xcce45de83bfc8a5fL }, + { 0xaff4a97ec2b3106eL,0x9069c630b6848b4fL,0xeda837a6ed76241cL, + 0x8a0daf136cc3f6cfL,0x199d049d3da018a8L,0xf867c6b1d9093ba3L } }, + /* 139 */ + { { 0xe4d42a5656527296L,0xae26c73dce71178dL,0x70a0adac6c251664L, + 0x813483ae5dc0ae1dL,0x7574eacddaab2dafL,0xc56b52dcc2d55f4fL }, + { 0x872bc16795f32923L,0x4be175815bdd2a89L,0x9b57f1e7a7699f00L, + 0x5fcd9c723ac2de02L,0x83af3ba192377739L,0xa64d4e2bfc50b97fL } }, + /* 140 */ + { { 0x2172dae20e552b40L,0x62f49725d34d52e8L,0x7930ee4007958f98L, + 0x56da2a90751fdd74L,0xf1192834f53e48c3L,0x34d2ac268e53c343L }, + { 0x1073c21813111286L,0x201dac14da9d9827L,0xec2c29dbee95d378L, + 0x9316f1191f3ee0b1L,0x7890c9f0544ce71cL,0xd77138af27612127L } }, + /* 141 */ + { { 0x78045e6d3b4ad1cdL,0xcd86b94e4aa49bc1L,0x57e51f1dfd677a16L, + 0xd9290935fa613697L,0x7a3f959334f4d893L,0x8c9c248b5d5fcf9bL }, + { 0x9f23a4826f70d4e9L,0x1727345463190ae9L,0x4bdd7c135b081a48L, + 0x1e2de38928d65271L,0x0bbaaa25e5841d1fL,0xc4c18a79746772e5L } }, + /* 142 */ + { { 0x10ee2681593375acL,0x4f3288be7dd5e113L,0x9a97b2fb240f3538L, + 0xfa11089f1de6b1e2L,0x516da5621351bc58L,0x573b61192dfa85b5L }, + { 0x89e966836cba7df5L,0xf299be158c28ab40L,0xe91c9348ad43fcbfL, + 0xe9bbc7cc9a1cefb3L,0xc8add876738b2775L,0x6e3b1f2e775eaa01L } }, + /* 143 */ + { { 0x0365a888b677788bL,0x634ae8c43fd6173cL,0x304987619e498dbeL, + 0x08c43e6dc8f779abL,0x068ae3844c09aca9L,0x2380c70b2018d170L }, + { 0xcf77fbc3a297c5ecL,0xdacbc853ca457948L,0x3690de04336bec7eL, + 0x26bbac6414eec461L,0xd1c23c7e1f713abfL,0xf08bbfcde6fd569eL } }, + /* 144 */ + { { 0x5f8163f484770ee3L,0x0e0c7f94744a1706L,0x9c8f05f7e1b2d46dL, + 0x417eafe7d01fd99aL,0x2ba15df511440e5bL,0xdc5c552a91a6fbcfL }, + { 0x86271d74a270f721L,0x32c0a075a004485bL,0x9d1a87e38defa075L, + 0xb590a7acbf0d20feL,0x430c41c28feda1f5L,0x454d287958f6ec24L } }, + /* 145 */ + { { 0x52b7a6357c525435L,0x3d9ef57f37c4bdbcL,0x2bb93e9edffcc475L, + 0xf7b8ba987710f3beL,0x42ee86da21b727deL,0x55ac3f192e490d01L }, + { 0x487e3a6ec0c1c390L,0x036fb345446cde7bL,0x089eb276496ae951L, + 0xedfed4d971ed1234L,0x661b0dd5900f0b46L,0x11bd6f1b8582f0d3L } }, + /* 146 */ + { { 0x5cf9350f076bc9d1L,0x15d903becf3cd2c3L,0x21cfc8c225af031cL, + 0xe0ad32488b1cc657L,0xdd9fb96370014e87L,0xf0f3a5a1297f1658L }, + { 0xbb908fbaf1f703aaL,0x2f9cc4202f6760baL,0x00ceec6666a38b51L, + 0x4deda33005d645daL,0xb9cf5c72f7de3394L,0xaeef65021ad4c906L } }, + /* 147 */ + { { 0x0583c8b17a19045dL,0xae7c3102d052824cL,0x2a234979ff6cfa58L, + 0xfe9dffc962c733c0L,0x3a7fa2509c0c4b09L,0x516437bb4fe21805L }, + { 0x9454e3d5c2a23ddbL,0x0726d887289c104eL,0x8977d9184fd15243L, + 0xc559e73f6d7790baL,0x8fd3e87d465af85fL,0xa2615c745feee46bL } }, + /* 148 */ + { { 0xc8d607a84335167dL,0x8b42d804e0f5c887L,0x5f9f13df398d11f9L, + 0x5aaa508720740c67L,0x83da9a6aa3d9234bL,0xbd3a5c4e2a54bad1L }, + { 0xdd13914c2db0f658L,0x29dcb66e5a3f373aL,0xbfd62df55245a72bL, + 0x19d1802391e40847L,0xd9df74dbb136b1aeL,0x72a06b6b3f93bc5bL } }, + /* 149 */ + { { 0x6da19ec3ad19d96fL,0xb342daa4fb2a4099L,0x0e61633a662271eaL, + 0x3bcece81ce8c054bL,0x7cc8e0618bd62dc6L,0xae189e19ee578d8bL }, + { 0x73e7a25ddced1eedL,0xc1257f0a7875d3abL,0x2cb2d5a21cfef026L, + 0xd98ef39bb1fdf61cL,0xcd8e6f6924e83e6cL,0xd71e7076c7b7088bL } }, + /* 150 */ + { { 0x339368309d4245bfL,0x22d962172ac2953bL,0xb3bf5a8256c3c3cdL, + 0x50c9be910d0699e8L,0xec0944638f366459L,0x6c056dba513b7c35L }, + { 0x687a6a83045ab0e3L,0x8d40b57f445c9295L,0x0f345048a16f5954L, + 0x64b5c6393d8f0a87L,0x106353a29f71c5e2L,0xdd58b475874f0dd4L } }, + /* 151 */ + { { 0x67ec084f62230c72L,0xf14f6cca481385e3L,0xf58bb4074cda7774L, + 0xe15011b1aa2dbb6bL,0xd488369d0c035ab1L,0xef83c24a8245f2fdL }, + { 0xfb57328f9fdc2538L,0x79808293191fe46aL,0xe28f5c4432ede548L, + 0x1b3cda99ea1a022cL,0x39e639b73df2ec7fL,0x77b6272b760e9a18L } }, + /* 152 */ + { { 0x2b1d51bda65d56d5L,0x3a9b71f97ea696e0L,0x95250ecc9904f4c4L, + 0x8bc4d6ebe75774b7L,0x0e343f8aeaeeb9aaL,0xc473c1d1930e04cbL }, + { 0x282321b1064cd8aeL,0xf4b4371e5562221cL,0xc1cc81ecd1bf1221L, + 0xa52a07a9e2c8082fL,0x350d8e59ba64a958L,0x29e4f3de6fb32c9aL } }, + /* 153 */ + { { 0x0aa9d56cba89aaa5L,0xf0208ac0c4c6059eL,0x7400d9c6bd6ddca4L, + 0xb384e475f2c2f74aL,0x4c1061fcb1562dd3L,0x3924e2482e153b8dL }, + { 0xf38b8d98849808abL,0x29bf3260a491aa36L,0x85159ada88220edeL, + 0x8b47915bbe5bc422L,0xa934d72ed7300967L,0xc4f303982e515d0dL } }, + /* 154 */ + { { 0xe3e9ee421b1de38bL,0xa124e25a42636760L,0x90bf73c090165b1aL, + 0x21802a34146434c5L,0x54aa83f22e1fa109L,0x1d4bd03ced9c51e9L }, + { 0xc2d96a38798751e6L,0xed27235f8c3507f5L,0xb5fb80e2c8c24f88L, + 0xf873eefad37f4f78L,0x7229fd74f224ba96L,0x9dcd91999edd7149L } }, + /* 155 */ + { { 0xee9f81a64e94f22aL,0xe5609892f71ec341L,0x6c818ddda998284eL, + 0x9fd472953b54b098L,0x47a6ac030e8a7cc9L,0xde684e5eb207a382L }, + { 0x4bdd1ecd2b6b956bL,0x09084414f01b3583L,0xe2f80b3255233b14L, + 0x5a0fec54ef5ebc5eL,0x74cf25e6bf8b29a2L,0x1c757fa07f29e014L } }, + /* 156 */ + { { 0x1bcb5c4aeb0fdfe4L,0xd7c649b3f0899367L,0xaef68e3f05bc083bL, + 0x57a06e46a78aa607L,0xa2136ecc21223a44L,0x89bd648452f5a50bL }, + { 0x724411b94455f15aL,0x23dfa97008a9c0fdL,0x7b0da4d16db63befL, + 0x6f8a7ec1fb162443L,0xc1ac9ceee98284fbL,0x085a582b33566022L } }, + /* 157 */ + { { 0x15cb61f9ec1f138aL,0x11c9a230668f0c28L,0xac829729df93f38fL, + 0xcef256984048848dL,0x3f686da02bba8fbfL,0xed5fea78111c619aL }, + { 0x9b4f73bcd6d1c833L,0x5095160686e7bf80L,0xa2a73508042b1d51L, + 0x9ef6ea495fb89ec2L,0xf1008ce95ef8b892L,0x78a7e6849ae8568bL } }, + /* 158 */ + { { 0x3fe83a7c10470cd8L,0x92734682f86df000L,0xb5dac06bda9409b5L, + 0x1e7a966094939c5fL,0xdec6c1505cc116dcL,0x1a52b40866bac8ccL }, + { 0x5303a3656e864045L,0x45eae72a9139efc1L,0x83bec6466f31d54fL, + 0x2fb4a86f6e958a6dL,0x6760718e4ff44030L,0x008117e3e91ae0dfL } }, + /* 159 */ + { { 0x5d5833ba384310a2L,0xbdfb4edc1fd6c9fcL,0xb9a4f102849c4fb8L, + 0xe5fb239a581c1e1fL,0xba44b2e7d0a9746dL,0x78f7b7683bd942b9L }, + { 0x076c8ca1c87607aeL,0x82b23c2ed5caaa7eL,0x6a581f392763e461L, + 0xca8a5e4a3886df11L,0xc87e90cf264e7f22L,0x04f74870215cfcfcL } }, + /* 160 */ + { { 0x5285d116141d161cL,0x67cd2e0e93c4ed17L,0x12c62a647c36187eL, + 0xf5329539ed2584caL,0xc4c777c442fbbd69L,0x107de7761bdfc50aL }, + { 0x9976dcc5e96beebdL,0xbe2aff95a865a151L,0x0e0a9da19d8872afL, + 0x5e357a3da63c17ccL,0xd31fdfd8e15cc67cL,0xc44bbefd7970c6d8L } }, + /* 161 */ + { { 0x703f83e24c0c62f1L,0x9b1e28ee4e195572L,0x6a82858bfe26ccedL, + 0xd381c84bc43638faL,0x94f72867a5ba43d8L,0x3b4a783d10b82743L }, + { 0xee1ad7b57576451eL,0xc3d0b59714b6b5c8L,0x3dc30954fcacc1b8L, + 0x55df110e472c9d7bL,0x97c86ed702f8a328L,0xd043341388dc098fL } }, + /* 162 */ + { { 0x1a60d1522ca8f2feL,0x61640948491bd41fL,0x6dae29a558dfe035L, + 0x9a615bea278e4863L,0xbbdb44779ad7c8e5L,0x1c7066302ceac2fcL }, + { 0x5e2b54c699699b4bL,0xb509ca6d239e17e8L,0x728165feea063a82L, + 0x6b5e609db6a22e02L,0x12813905b26ee1dfL,0x07b9f722439491faL } }, + /* 163 */ + { { 0x1592ec1448ff4e49L,0x3e4e9f176d644129L,0x7acf82881156acc0L, + 0x5aa34ba8bb092b0bL,0xcd0f90227d38393dL,0x416724ddea4f8187L }, + { 0x3c4e641cc0139e73L,0xe0fe46cf91e4d87dL,0xedb3c792cab61f8aL, + 0x4cb46de4d3868753L,0xe449c21d20f1098aL,0x5e5fd059f5b8ea6eL } }, + /* 164 */ + { { 0x7fcadd4675856031L,0x89c7a4cdeaf2fbd0L,0x1af523ce7a87c480L, + 0xe5fc109561d9ae90L,0x3fb5864fbcdb95f5L,0xbeb5188ebb5b2c7dL }, + { 0x3d1563c33ae65825L,0x116854c40e57d641L,0x11f73d341942ebd3L, + 0x24dc5904c06955b3L,0x8a0d4c83995a0a62L,0xfb26b86d5d577b7dL } }, + /* 165 */ + { { 0xc53108e7c686ae17L,0x9090d739d1c1da56L,0x4583b0139aec50aeL, + 0xdd9a088ba49a6ab2L,0x28192eeaf382f850L,0xcc8df756f5fe910eL }, + { 0x877823a39cab7630L,0x64984a9afb8e7fc1L,0x5448ef9c364bfc16L, + 0xbbb4f871c44e2a9aL,0x901a41ab435c95e9L,0xc6c23e5faaa50a06L } }, + /* 166 */ + { { 0xb78016c19034d8ddL,0x856bb44b0b13e79bL,0x85c6409ab3241a05L, + 0x8d2fe19a2d78ed21L,0xdcc7c26d726eddf2L,0x3ccaff5f25104f04L }, + { 0x397d7edc6b21f843L,0xda88e4dde975de4cL,0x5273d3964f5ab69eL, + 0x537680e39aae6cc0L,0xf749cce53e6f9461L,0x021ddbd9957bffd3L } }, + /* 167 */ + { { 0x7b64585f777233cfL,0xfe6771f60942a6f0L,0x636aba7adfe6eef0L, + 0x63bbeb5686038029L,0xacee5842de8fcf36L,0x48d9aa99d4a20524L }, + { 0xcff7a74c0da5e57aL,0xc232593ce549d6c9L,0x68504bccf0f2287bL, + 0x6d7d098dbc8360b5L,0xeac5f1495b402f41L,0x61936f11b87d1bf1L } }, + /* 168 */ + { { 0xaa9da167b8153a9dL,0xa49fe3ac9e83ecf0L,0x14c18f8e1b661384L, + 0x61c24dab38434de1L,0x3d973c3a283dae96L,0xc99baa0182754fc9L }, + { 0x477d198f4c26b1e3L,0x12e8e186a7516202L,0x386e52f6362addfaL, + 0x31e8f695c3962853L,0xdec2af136aaedb60L,0xfcfdb4c629cf74acL } }, + /* 169 */ + { { 0x6b3ee958cca40298L,0xc3878153f2f5d195L,0x0c565630ed2eae5bL, + 0xd089b37e3a697cf2L,0xc2ed2ac7ad5029eaL,0x7e5cdfad0f0dda6aL }, + { 0xf98426dfd9b86202L,0xed1960b14335e054L,0x1fdb02463f14639eL, + 0x17f709c30db6c670L,0xbfc687ae773421e1L,0x13fefc4a26c1a8acL } }, + /* 170 */ + { { 0xe361a1987ffa0a5fL,0xf4b26102c63fe109L,0x264acbc56c74e111L, + 0x4af445fa77abebafL,0x448c4fdd24cddb75L,0x0b13157d44506eeaL }, + { 0x22a6b15972e9993dL,0x2c3c57e485e5ecbeL,0xa673560bfd83e1a1L, + 0x6be23f82c3b8c83bL,0x40b13a9640bbe38eL,0x66eea033ad17399bL } }, + /* 171 */ + { { 0x49fc6e95b4c6c693L,0xefc735de36af7d38L,0xe053343d35fe42fcL, + 0xf0aa427c6a9ab7c3L,0xc79f04364a0fcb24L,0x1628724393ebbc50L }, + { 0x5c3d6bd016927e1eL,0x40158ed2673b984cL,0xa7f86fc84cd48b9aL, + 0x1643eda660ea282dL,0x45b393eae2a1beedL,0x664c839e19571a94L } }, + /* 172 */ + { { 0x5774575027eeaf94L,0x2875c925ea99e1e7L,0xc127e7ba5086adeaL, + 0x765252a086fe424fL,0x1143cc6c2b6c0281L,0xc9bb2989d671312dL }, + { 0x880c337c51acb0a5L,0xa3710915d3c60f78L,0x496113c09262b6edL, + 0x5d25d9f89ce48182L,0x53b6ad72b3813586L,0x0ea3bebc4c0e159cL } }, + /* 173 */ + { { 0xcaba450ac5e49beaL,0x684e54157c05da59L,0xa2e9cab9de7ac36cL, + 0x4ca79b5f2e6f957bL,0xef7b024709b817b1L,0xeb3049907d89df0fL }, + { 0x508f730746fe5096L,0x695810e82e04eaafL,0x88ef1bd93512f76cL, + 0x776613513ebca06bL,0xf7d4863accf158b7L,0xb2a81e4494ee57daL } }, + /* 174 */ + { { 0xff288e5b6d53e6baL,0xa90de1a914484ea2L,0x2fadb60ced33c8ecL, + 0x579d6ef328b66a40L,0x4f2dd6ddec24372dL,0xe9e33fc91d66ec7dL }, + { 0x110899d2039eab6eL,0xa31a667a3e97bb5eL,0x6200166dcfdce68eL, + 0xbe83ebae5137d54bL,0x085f7d874800acdfL,0xcf4ab1330c6f8c86L } }, + /* 175 */ + { { 0x03f65845931e08fbL,0x6438551e1506e2c0L,0x5791f0dc9c36961fL, + 0x68107b29e3dcc916L,0x83242374f495d2caL,0xd8cfb6636ee5895bL }, + { 0x525e0f16a0349b1bL,0x33cd2c6c4a0fab86L,0x46c12ee82af8dda9L, + 0x7cc424ba71e97ad3L,0x69766ddf37621eb0L,0x95565f56a5f0d390L } }, + /* 176 */ + { { 0xe0e7bbf21a0f5e94L,0xf771e1151d82d327L,0x10033e3dceb111faL, + 0xd269744dd3426638L,0xbdf2d9da00d01ef6L,0x1cb80c71a049ceafL }, + { 0x17f183289e21c677L,0x6452af0519c8f98bL,0x35b9c5f780b67997L, + 0x5c2e1cbe40f8f3d4L,0x43f9165666d667caL,0x9faaa059cf9d6e79L } }, + /* 177 */ + { { 0x8ad246180a078fe6L,0xf6cc73e6464fd1ddL,0x4d2ce34dc3e37448L, + 0x624950c5e3271b5fL,0x62910f5eefc5af72L,0x8b585bf8aa132bc6L }, + { 0x11723985a839327fL,0x34e2d27d4aac252fL,0x402f59ef6296cc4eL, + 0x00ae055c47053de9L,0xfc22a97228b4f09bL,0xa9e86264fa0c180eL } }, + /* 178 */ + { { 0x0b7b6224bc310eccL,0x8a1a74f167fa14edL,0x87dd09607214395cL, + 0xdf1b3d09f5c91128L,0x39ff23c686b264a8L,0xdc2d49d03e58d4c5L }, + { 0x2152b7d3a9d6f501L,0xf4c32e24c04094f7L,0xc6366596d938990fL, + 0x084d078f94fb207fL,0xfd99f1d7328594cbL,0x36defa64cb2d96b3L } }, + /* 179 */ + { { 0x4619b78113ed7cbeL,0x95e500159784bd0eL,0x2a32251c2c7705feL, + 0xa376af995f0dd083L,0x55425c6c0361a45bL,0x812d2cef1f291e7bL }, + { 0xccf581a05fd94972L,0x26e20e39e56dc383L,0x0093685d63dbfbf0L, + 0x1fc164cc36b8c575L,0xb9c5ab81390ef5e7L,0x40086beb26908c66L } }, + /* 180 */ + { { 0xe5e54f7937e3c115L,0x69b8ee8cc1445a8aL,0x79aedff2b7659709L, + 0xe288e1631b46fbe6L,0xdb4844f0d18d7bb7L,0xe0ea23d048aa6424L }, + { 0x714c0e4ef3d80a73L,0x87a0aa9e3bd64f98L,0x8844b8a82ec63080L, + 0xe0ac9c30255d81a3L,0x86151237455397fcL,0x0b9794642f820155L } }, + /* 181 */ + { { 0x127a255a4ae03080L,0x232306b4580a89fbL,0x04e8cd6a6416f539L, + 0xaeb70dee13b02a0eL,0xa3038cf84c09684aL,0xa710ec3c28e433eeL }, + { 0x77a72567681b1f7dL,0x86fbce952fc28170L,0xd3408683f5735ac8L, + 0x3a324e2a6bd68e93L,0x7ec74353c027d155L,0xab60354cd4427177L } }, + /* 182 */ + { { 0x32a5342aef4c209dL,0x2ba7527408d62704L,0x4bb4af6fc825d5feL, + 0x1c3919ced28e7ff1L,0x1dfc2fdcde0340f6L,0xc6580baf29f33ba9L }, + { 0xae121e7541d442cbL,0x4c7727fd3a4724e4L,0xe556d6a4524f3474L, + 0x87e13cc7785642a2L,0x182efbb1a17845fdL,0xdcec0cf14e144857L } }, + /* 183 */ + { { 0x1cb89541e9539819L,0xc8cb3b4f9d94dbf1L,0x1d353f63417da578L, + 0xb7a697fb8053a09eL,0x8d841731c35d8b78L,0x85748d6fb656a7a9L }, + { 0x1fd03947c1859c5dL,0x6ce965c1535d22a2L,0x1966a13e0ca3aadcL, + 0x9802e41d4fb14effL,0xa9048cbb76dd3fcdL,0x89b182b5e9455bbaL } }, + /* 184 */ + { { 0xd777ad6a43360710L,0x841287ef55e9936bL,0xbaf5c67004a21b24L, + 0xf2c0725f35ad86f1L,0x338fa650c707e72eL,0x2bf8ed2ed8883e52L }, + { 0xb0212cf4b56e0d6aL,0x50537e126843290cL,0xd8b184a198b3dc6fL, + 0xd2be9a350210b722L,0x407406db559781eeL,0x5a78d5910bc18534L } }, + /* 185 */ + { { 0x4d57aa2ad748b02cL,0xbe5b3451a12b3b95L,0xadca7a4564711258L, + 0x597e091a322153dbL,0xf327100632eb1eabL,0xbd9adcba2873f301L }, + { 0xd1dc79d138543f7fL,0x00022092921b1fefL,0x86db3ef51e5df8edL, + 0x888cae049e6b944aL,0x71bd29ec791a32b4L,0xd3516206a6d1c13eL } }, + /* 186 */ + { { 0x2ef6b95255924f43L,0xd2f401ae4f9de8d5L,0xfc73e8d7adc68042L, + 0x627ea70c0d9d1bb4L,0xc3bb3e3ebbf35679L,0x7e8a254ad882dee4L }, + { 0x08906f50b5924407L,0xf14a0e61a1ad444aL,0xaa0efa2165f3738eL, + 0xd60c7dd6ae71f161L,0x9e8390faf175894dL,0xd115cd20149f4c00L } }, + /* 187 */ + { { 0x2f2e2c1da52abf77L,0xc2a0dca554232568L,0xed423ea254966dccL, + 0xe48c93c7cd0dd039L,0x1e54a225176405c7L,0x1efb5b1670d58f2eL }, + { 0xa751f9d994fb1471L,0xfdb31e1f67d2941dL,0xa6c74eb253733698L, + 0xd3155d1189a0f64aL,0x4414cfe4a4b8d2b6L,0x8d5a4be8f7a8e9e3L } }, + /* 188 */ + { { 0x5c96b4d452669e98L,0x4547f9228fd42a03L,0xcf5c1319d285174eL, + 0x805cd1ae064bffa0L,0x50e8bc4f246d27e7L,0xf89ef98fd5781e11L }, + { 0xb4ff95f6dee0b63fL,0xad850047222663a4L,0x026918604d23ce9cL, + 0x3e5309ce50019f59L,0x27e6f72269a508aeL,0xe9376652267ba52cL } }, + /* 189 */ + { { 0xa04d289cc0368708L,0xc458872f5e306e1dL,0x76fa23de33112feaL, + 0x718e39746efde42eL,0xf0c98cdc1d206091L,0x5fa3ca6214a71987L }, + { 0xeee8188bdcaa9f2aL,0x312cc732589a860dL,0xf9808dd6c63aeb1fL, + 0x70fd43db4ea62b53L,0x2c2bfe34890b6e97L,0x105f863cfa426aa6L } }, + /* 190 */ + { { 0x0b29795db38059adL,0x5686b77e90647ea0L,0xeff0470edb473a3eL, + 0x278d2340f9b6d1e2L,0xebbff95bbd594ec7L,0xf4b72334d3a7f23dL }, + { 0x2a285980a5a83f0bL,0x0786c41a9716a8b3L,0x138901bd22511812L, + 0xd1b55221e2fede6eL,0x0806e264df4eb590L,0x6c4c897e762e462eL } }, + /* 191 */ + { { 0xd10b905fb4b41d9dL,0x826ca4664523a65bL,0x535bbd13b699fa37L, + 0x5b9933d773bc8f90L,0x9332d61fcd2118adL,0x158c693ed4a65fd0L }, + { 0x4ddfb2a8e6806e63L,0xe31ed3ecb5de651bL,0xf9460e51819bc69aL, + 0x6229c0d62c76b1f8L,0xbb78f231901970a3L,0x31f3820f9cee72b8L } }, + /* 192 */ + { { 0xe931caf2c09e1c72L,0x0715f29812990cf4L,0x33aad81d943262d8L, + 0x5d292b7a73048d3fL,0xb152aaa4dc7415f6L,0xc3d10fd90fd19587L }, + { 0xf76b35c575ddadd0L,0x9f5f4a511e7b694cL,0x2f1ab7ebc0663025L, + 0x01c9cc87920260b0L,0xc4b1f61a05d39da6L,0x6dcd76c4eb4a9c4eL } }, + /* 193 */ + { { 0x0ba0916ffdc83f01L,0x354c8b449553e4f9L,0xa6cc511affc5e622L, + 0xb954726ae95be787L,0xcb04811575b41a62L,0xfa2ae6cdebfde989L }, + { 0x6376bbc70f24659aL,0x13a999fd4c289c43L,0xc7134184ec9abd8bL, + 0x28c02bf6a789ab04L,0xff841ebcd3e526ecL,0x442b191e640893a8L } }, + /* 194 */ + { { 0x4cac6c62fa2b6e20L,0x97f29e9bf6d69861L,0x228ab1dbbc96d12dL, + 0x6eb913275e8e108dL,0xd4b3d4d140771245L,0x61b20623ca8a803aL }, + { 0x2c2f3b41a6a560b1L,0x879e1d403859fcf4L,0x7cdb5145024dbfc3L, + 0x55d08f153bfa5315L,0x2f57d773aa93823aL,0xa97f259cc6a2c9a2L } }, + /* 195 */ + { { 0xc306317be58edbbbL,0x25ade51c79dfdf13L,0x6b5beaf116d83dd6L, + 0xe8038a441dd8f925L,0x7f00143cb2a87b6bL,0xa885d00df5b438deL }, + { 0xe9f76790cf9e48bdL,0xf0bdf9f0a5162768L,0x0436709fad7b57cbL, + 0x7e151c12f7c15db7L,0x3514f0225d90ee3bL,0x2e84e8032c361a8dL } }, + /* 196 */ + { { 0x2277607d563ec8d8L,0xa661811fe3934cb7L,0x3ca72e7af58fd5deL, + 0x7989da0462294c6aL,0x88b3708bf6bbefe9L,0x0d524cf753ed7c82L }, + { 0x69f699ca2f30c073L,0xf0fa264b9dc1dcf3L,0x44ca456805f0aaf6L, + 0x0f5b23c7d19b9bafL,0x39193f41eabd1107L,0x9e3e10ad2a7c9b83L } }, + /* 197 */ + { { 0xa90824f0d4ae972fL,0x43eef02bc6e846e7L,0x7e46061229d2160aL, + 0x29a178acfe604e91L,0x23056f044eb184b2L,0x4fcad55feb54cdf4L }, + { 0xa0ff96f3ae728d15L,0x8a2680c6c6a00331L,0x5f84cae07ee52556L, + 0x5e462c3ac5a65dadL,0x5d2b81dfe2d23f4fL,0x6e47301bc5b1eb07L } }, + /* 198 */ + { { 0x77411d68af8219b9L,0xcb883ce651b1907aL,0x25c87e57101383b5L, + 0x9c7d9859982f970dL,0xaa6abca5118305d2L,0x725fed2f9013a5dbL }, + { 0x487cdbafababd109L,0xc0f8cf5687586528L,0xa02591e68ad58254L, + 0xc071b1d1debbd526L,0x927dfe8b961e7e31L,0x55f895f99263dfe1L } }, + /* 199 */ + { { 0xf899b00db175645bL,0x51f3a627b65b4b92L,0xa2f3ac8db67399efL, + 0xe717867fe400bc20L,0x42cc90201967b952L,0x3d5967513ecd1de1L }, + { 0xd41ebcdedb979775L,0x99ba61bc6a2e7e88L,0x039149a5321504f2L, + 0xe7dc231427ba2fadL,0x9f556308b57d8368L,0x2b6d16c957da80a7L } }, + /* 200 */ + { { 0x84af5e76279ad982L,0x9bb4c92d9c8b81a6L,0xd79ad44e0e698e67L, + 0xe8be9048265fc167L,0xf135f7e60c3a4cccL,0xa0a10d38b8863a33L }, + { 0xe197247cd386efd9L,0x0eefd3f9b52346c2L,0xc22415f978607bc8L, + 0xa2a8f862508674ceL,0xa72ad09ec8c9d607L,0xcd9f0ede50fa764fL } }, + /* 201 */ + { { 0x063391c7d1a46d4dL,0x2df51c119eb01693L,0xc5849800849e83deL, + 0x48fd09aa8ad08382L,0xa405d873aa742736L,0xee49e61ee1f9600cL }, + { 0xd76676be48c76f73L,0xd9c100f601274b2aL,0x110bb67c83f8718dL, + 0xec85a42002fc0d73L,0xc0449e1e744656adL,0x28ce737637d9939bL } }, + /* 202 */ + { { 0x97e9af7244544ac7L,0xf2c658d5ba010426L,0x732dec39fb3adfbdL, + 0xd12faf91a2df0b07L,0x8ac267252171e208L,0xf820cdc85b24fa54L }, + { 0x307a6eea94f4cf77L,0x18c783d2944a33c6L,0x4b939d4c0b741ac5L, + 0x1d7acd153ffbb6e4L,0x06a248587a255e44L,0x14fbc494ce336d50L } }, + /* 203 */ + { { 0x9b920c0c51584e3cL,0xc7733c59f7e54027L,0xe24ce13988422bbeL, + 0x11ada812523bd6abL,0xde068800b88e6defL,0x7b872671fe8c582dL }, + { 0x4e746f287de53510L,0x492f8b99f7971968L,0x1ec80bc77d928ac2L, + 0xb3913e48432eb1b5L,0xad08486632028f6eL,0x122bb8358fc2f38bL } }, + /* 204 */ + { { 0x0a9f3b1e3b0b29c3L,0x837b64324fa44151L,0xb9905c9217b28ea7L, + 0xf39bc93798451750L,0xcd383c24ce8b6da1L,0x299f57db010620b2L }, + { 0x7b6ac39658afdce3L,0xa15206b33d05ef47L,0xa0ae37e2b9bb02ffL, + 0x107760ab9db3964cL,0xe29de9a067954beaL,0x446a1ad8431c3f82L } }, + /* 205 */ + { { 0xc6fecea05c6b8195L,0xd744a7c5f49e71b9L,0xa8e96acc177a7ae7L, + 0x1a05746c358773a7L,0xa416214637567369L,0xaa0217f787d1c971L }, + { 0x61e9d15877fd3226L,0x0f6f2304e4f600beL,0xa9c4cebc7a6dff07L, + 0xd15afa0109f12a24L,0x2bbadb228c863ee9L,0xa28290e4e5eb8c78L } }, + /* 206 */ + { { 0x55b87fa03e9de330L,0x12b26066195c145bL,0xe08536e0a920bef0L, + 0x7bff6f2c4d195adcL,0x7f319e9d945f4187L,0xf9848863f892ce47L }, + { 0xd0efc1d34fe37657L,0x3c58de825cf0e45aL,0x626ad21a8b0ccbbeL, + 0xd2a31208af952fc5L,0x81791995eb437357L,0x5f19d30f98e95d4fL } }, + /* 207 */ + { { 0x72e83d9a0e6865bbL,0x22f5af3bf63456a6L,0x409e9c73463c8d9eL, + 0x40e9e578dfe6970eL,0x876b6efa711b91caL,0x895512cf942625a3L }, + { 0x84c8eda8cb4e462bL,0x84c0154a4412e7c8L,0x04325db1ceb7b71fL, + 0x1537dde366f70877L,0xf3a093991992b9acL,0xa7316606d498ae77L } }, + /* 208 */ + { { 0x13990d2fcad260f5L,0x76c3be29eec0e8c0L,0x7dc5bee00f7bd7d5L, + 0x9be167d2efebda4bL,0xcce3dde69122b87eL,0x75a28b0982b5415cL }, + { 0xf6810bcde84607a6L,0xc6d581286f4dbf0dL,0xfead577d1b4dafebL, + 0x9bc440b2066b28ebL,0x53f1da978b17e84bL,0x0459504bcda9a575L } }, + /* 209 */ + { { 0x13e39a02329e5836L,0x2c9e7d51f717269dL,0xc5ac58d6f26c963bL, + 0x3b0c6c4379967bf5L,0x60bbea3f55908d9dL,0xd84811e7f07c9ad1L }, + { 0xfe7609a75bd20e4aL,0xe4325dd20a70baa8L,0x3711f370b3600386L, + 0x97f9562fd0924302L,0x040dc0c34acc4436L,0xfd6d725cde79cdd4L } }, + /* 210 */ + { { 0xb3efd0e3cf13eafbL,0x21009cbb5aa0ae5fL,0xe480c55379022279L, + 0x755cf334b2fc9a6dL,0x8564a5bf07096ae7L,0xddd649d0bd238139L }, + { 0xd0de10b18a045041L,0x6e05b413c957d572L,0x5c5ff8064e0fb25cL, + 0xd933179b641162fbL,0x42d48485e57439f9L,0x70c5bd0a8a8d72aaL } }, + /* 211 */ + { { 0xa767173897bdf646L,0xaa1485b4ab329f7cL,0xce3e11d6f8f25fdfL, + 0x76a3fc7ec6221824L,0x045f281ff3924740L,0x24557d4e96d13a9aL }, + { 0x875c804bdd4c27cdL,0x11c5f0f40f5c7feaL,0xac8c880bdc55ff7eL, + 0x2acddec51103f101L,0x38341a21f99faa89L,0xc7b67a2cce9d6b57L } }, + /* 212 */ + { { 0x9a0d724f8e357586L,0x1d7f4ff5df648da0L,0x9c3e6c9bfdee62a5L, + 0x0499cef00389b372L,0xe904050d98eab879L,0xe8eef1b66c051617L }, + { 0xebf5bfebc37e3ca9L,0x7c5e946da4e0b91dL,0x790973142c4bea28L, + 0x81f6c109ee67b2b7L,0xaf237d9bdafc5edeL,0xd2e602012abb04c7L } }, + /* 213 */ + { { 0x6156060c8a4f57bfL,0xf9758696ff11182aL,0x8336773c6296ef00L, + 0x9c054bceff666899L,0xd6a11611719cd11cL,0x9824a641dbe1acfaL }, + { 0x0b7b7a5fba89fd01L,0xf8d3b809889f79d8L,0xc5e1ea08f578285cL, + 0x7ac74536ae6d8288L,0x5d37a2007521ef5fL,0x5ecc4184b260a25dL } }, + /* 214 */ + { { 0xddcebb19a708c8d3L,0xe63ed04fc63f81ecL,0xd045f5a011873f95L, + 0x3b5ad54479f276d5L,0x81272a3d425ae5b3L,0x8bfeb50110ce1605L }, + { 0x4233809c888228bfL,0x4bd82acfb2aff7dfL,0x9c68f1800cbd4a7fL, + 0xfcd771246b44323dL,0x60c0fcf6891db957L,0xcfbb4d8904da8f7fL } }, + /* 215 */ + { { 0x9a6a5df93b26139aL,0x3e076a83b2cc7eb8L,0x47a8e82d5a964bcdL, + 0x8a4e2a39b9278d6bL,0x93506c98e4443549L,0x06497a8ff1e0d566L }, + { 0x3dee8d992b1efa05L,0x2da63ca845393e33L,0xa4af7277cf0579adL, + 0xaf4b46393236d8eaL,0x6ccad95b32b617f5L,0xce76d8b8b88bb124L } }, + /* 216 */ + { { 0x63d2537a083843dcL,0x89eb35141e4153b4L,0x5175ebc4ea9afc94L, + 0x7a6525808ed1aed7L,0x67295611d85e8297L,0x8dd2d68bb584b73dL }, + { 0x237139e60133c3a4L,0x9de838ab4bd278eaL,0xe829b072c062fcd9L, + 0x70730d4f63ba8706L,0x6080483fd3cd05ecL,0x872ab5b80c85f84dL } }, + /* 217 */ + { { 0xfc0776d3999d4d49L,0xa3eb59deec3f45e7L,0xbc990e440dae1fc1L, + 0x33596b1ea15371ffL,0xd447dcb29bc7ab25L,0xcd5b63e935979582L }, + { 0xae3366fa77d1ff11L,0x59f28f05edee6903L,0x6f43fed1a4433bf2L, + 0x15409c9bdf9ce00eL,0x21b5cdedaca9c5dcL,0xf9f3359582d7bdb4L } }, + /* 218 */ + { { 0x959443789422c792L,0x239ea923c958b8bfL,0x4b61a247df076541L, + 0x4d29ce85bb9fc544L,0x9a692a670b424559L,0x6e0ca5a00e486900L }, + { 0x6b79a78285b3beceL,0x41f35e39c61f9892L,0xff82099aae747f82L, + 0x58c8ae3fd0ca59d6L,0x4ac930e299406b5fL,0x2ce04eb99df24243L } }, + /* 219 */ + { { 0x4366b9941ac37b82L,0xff0c728d25b04d83L,0x1f55136119c47b7cL, + 0xdbf2d5edbeff13e7L,0xf78efd51e12a683dL,0x82cd85b9989cf9c4L }, + { 0xe23c6db6e0cb5d37L,0x818aeebd72ee1a15L,0x8212aafd28771b14L, + 0x7bc221d91def817dL,0xdac403a29445c51fL,0x711b051712c3746bL } }, + /* 220 */ + { { 0x0ed9ed485ea99eccL,0xf799500db8cab5e1L,0xa8ec87dcb570cbdcL, + 0x52cfb2c2d35dfaecL,0x8d31fae26e4d80a4L,0xe6a37dc9dcdeabe5L }, + { 0x5d365a341deca452L,0x09a5f8a50d68b44eL,0x59238ea5a60744b1L, + 0xf2fedc0dbb4249e9L,0xe395c74ea909b2e3L,0xe156d1a539388250L } }, + /* 221 */ + { { 0xd796b3d047181ae9L,0xbaf44ba844197808L,0xe693309434cf3facL, + 0x41aa6adec3bd5c46L,0x4fda75d8eed947c6L,0xacd9d4129ea5a525L }, + { 0x65cc55a3d430301bL,0x3c9a5bcf7b52ea49L,0x22d319cf159507f0L, + 0x2ee0b9b5de74a8ddL,0x20c26a1e877ac2b6L,0x387d73da92e7c314L } }, + /* 222 */ + { { 0x13c4833e8cd3fdacL,0x76fcd473332e5b8eL,0xff671b4be2fe1fd3L, + 0x4d734e8b5d98d8ecL,0xb1ead3c6514bbc11L,0xd14ca8587b390494L }, + { 0x95a443af5d2d37e9L,0x73c6ea7300464622L,0xa44aeb4b15755044L, + 0xba3f8575fab58feeL,0x9779dbc9dc680a6fL,0xe1ee5f5a7b37ddfcL } }, + /* 223 */ + { { 0xcd0b464812d29f46L,0x93295b0b0ed53137L,0xbfe2609480bef6c9L, + 0xa656578854248b00L,0x69c43fca80e7f9c4L,0x2190837bbe141ea1L }, + { 0x875e159aa1b26cfbL,0x90ca9f877affe852L,0x15e6550d92ca598eL, + 0xe3e0945d1938ad11L,0xef7636bb366ef937L,0xb6034d0bb39869e5L } }, + /* 224 */ + { { 0x4d255e3026d8356eL,0xf83666edd314626fL,0x421ddf61d0c8ed64L, + 0x96e473c526677b61L,0xdad4af7e9e9b18b3L,0xfceffd4aa9393f75L }, + { 0x843138a111c731d5L,0x05bcb3a1b2f141d9L,0x20e1fa95617b7671L, + 0xbefce81288ccec7bL,0x582073dc90f1b568L,0xf572261a1f055cb7L } }, + /* 225 */ + { { 0xf314827736973088L,0xc008e70886a9f980L,0x1b795947e046c261L, + 0xdf1e6a7dca76bca0L,0xabafd88671acddf0L,0xff7054d91364d8f4L }, + { 0x2cf63547e2260594L,0x468a5372d73b277eL,0xc7419e24ef9bd35eL, + 0x2b4a1c2024043cc3L,0xa28f047a890b39cdL,0xdca2cea146f9a2e3L } }, + /* 226 */ + { { 0xab78873653277538L,0xa734e225cf697738L,0x66ee1d1e6b22e2c1L, + 0x2c615389ebe1d212L,0xf36cad4002bb0766L,0x120885c33e64f207L }, + { 0x59e77d5690fbfec2L,0xf9e781aad7a574aeL,0x801410b05d045e53L, + 0xd3b5f0aaa91b5f0eL,0xb3d1df007fbb3521L,0x11c4b33ec72bee9aL } }, + /* 227 */ + { { 0xd32b983283c3a7f3L,0x8083abcf88d8a354L,0xdeb1640450f4ec5aL, + 0x18d747f0641e2907L,0x4e8978aef1bbf03eL,0x932447dc88a0cd89L }, + { 0x561e0febcf3d5897L,0xfc3a682f13600e6dL,0xc78b9d73d16a6b73L, + 0xe713feded29bf580L,0x0a22522308d69e5cL,0x3a924a571ff7fda4L } }, + /* 228 */ + { { 0xfb64554cb4093beeL,0xa6d65a25a58c6ec0L,0x4126994d43d0ed37L, + 0xa5689a5155152d44L,0xb8e5ea8c284caa8dL,0x33f05d4fd1f25538L }, + { 0xe0fdfe091b615d6eL,0x2ded7e8f705507daL,0xdd5631e517bbcc80L, + 0x4f87453e267fd11fL,0xc6da723fff89d62dL,0x55cbcae2e3cda21dL } }, + /* 229 */ + { { 0x336bc94e6b4e84f3L,0x728630314ef72c35L,0x6d85fdeeeeb57f99L, + 0x7f4e3272a42ece1bL,0x7f86cbb536f0320aL,0xf09b6a2b923331e6L }, + { 0x21d3ecf156778435L,0x2977ba998323b2d2L,0x6a1b57fb1704bc0fL, + 0xd777cf8b389f048aL,0x9ce2174fac6b42cdL,0x404e2bff09e6c55aL } }, + /* 230 */ + { { 0x9b9b135e204c5ddbL,0x9dbfe0443eff550eL,0x35eab4bfec3be0f6L, + 0x8b4c3f0d0a43e56fL,0x4c1c66730e73f9b3L,0x92ed38bd2c78c905L }, + { 0xc7003f6aa386e27cL,0xb9c4f46faced8507L,0xea024ec859df5464L, + 0x4af96152429572eaL,0x279cd5e2e1fc1194L,0xaa376a03281e358cL } }, + /* 231 */ + { { 0x078592233cdbc95cL,0xaae1aa6aef2e337aL,0xc040108d472a8544L, + 0x80c853e68d037b7dL,0xd221315c8c7eee24L,0x195d38568ee47752L }, + { 0xd4b1ba03dacd7fbeL,0x4b5ac61ed3e0c52bL,0x68d3c0526aab7b52L, + 0xf0d7248c660e3feaL,0xafdb3f893145efb4L,0xa73fd9a38f40936dL } }, + /* 232 */ + { { 0x891b9ef3bb1b17ceL,0x14023667c6127f31L,0x12b2e58d305521fdL, + 0x3a47e449e3508088L,0xe49fc84bff751507L,0x4023f7225310d16eL }, + { 0xa608e5edb73399faL,0xf12632d8d532aa3eL,0x13a2758e845e8415L, + 0xae4b6f851fc2d861L,0x3879f5b1339d02f2L,0x446d22a680d99ebdL } }, + /* 233 */ + { { 0x0f5023024be164f1L,0x8d09d2d688b81920L,0x514056f1984aceffL, + 0xa5c4ddf075e9e80dL,0x38cb47e6df496a93L,0x899e1d6b38df6bf7L }, + { 0x69e87e88b59eb2a6L,0x280d9d639b47f38bL,0x599411ea3654e955L, + 0xcf8dd4fd969aa581L,0xff5c2baf530742a7L,0xa43915361a373085L } }, + /* 234 */ + { { 0x6ace72a3a8a4bdd2L,0xc656cdd1b68ef702L,0xd4a33e7e90c4dad8L, + 0x4aece08a9d951c50L,0xea8005ae085d68e6L,0xfdd7a7d76f7502b8L }, + { 0xce6fb0a698d6fa45L,0x228f86721104eb8cL,0xd23d8787da09d7dcL, + 0x5521428b2ae93065L,0x95faba3dea56c366L,0xedbe50390a88aca5L } }, + /* 235 */ + { { 0xd64da0adbfb26c82L,0xe5d70b3c952c2f9cL,0xf5e8f365f7e77f68L, + 0x7234e00208f2d695L,0xfaf900eed12e7be6L,0x27dc69344acf734eL }, + { 0x80e4ff5ec260a46aL,0x7da5ebce2dc31c28L,0x485c5d73ca69f552L, + 0xcdfb6b2969cc84c2L,0x031c5afeed6d4ecaL,0xc7bbf4c822247637L } }, + /* 236 */ + { { 0x9d5b72c749fe01b2L,0x34785186793a91b8L,0xa3ba3c54cf460438L, + 0x73e8e43d3ab21b6fL,0x50cde8e0be57b8abL,0x6488b3a7dd204264L }, + { 0xa9e398b3dddc4582L,0x1698c1a95bec46feL,0x7f1446ef156d3843L, + 0x3fd25dd8770329a2L,0x05b1221a2c710668L,0x65b2dc2aa72ee6cfL } }, + /* 237 */ + { { 0x21a885f7cd021d63L,0x3f344b15fea61f08L,0xad5ba6ddc5cf73e6L, + 0x154d0d8f227a8b23L,0x9b74373cdc559311L,0x4feab71598620fa1L }, + { 0x5098938e7d9ec924L,0x84d54a5e6d47e550L,0x1a2d1bdc1b617506L, + 0x99fe1782615868a4L,0x171da7803005a924L,0xa70bf5ed7d8f79b6L } }, + /* 238 */ + { { 0x0bc1250dfe2216c5L,0x2c37e2507601b351L,0xb6300175d6f06b7eL, + 0x4dde8ca18bfeb9b7L,0x4f210432b82f843dL,0x8d70e2f9b1ac0afdL }, + { 0x25c73b78aae91abbL,0x0230dca3863028f2L,0x8b923ecfe5cf30b7L, + 0xed754ec25506f265L,0x8e41b88c729a5e39L,0xee67cec2babf889bL } }, + /* 239 */ + { { 0xe183acf51be46c65L,0x9789538fe7565d7aL,0x87873391d9627b4eL, + 0xbf4ac4c19f1d9187L,0x5db99f634691f5c8L,0xa68df80374a1fb98L }, + { 0x3c448ed1bf92b5faL,0xa098c8413e0bdc32L,0x8e74cd5579bf016cL, + 0x5df0d09c115e244dL,0x9418ad013410b66eL,0x8b6124cb17a02130L } }, + /* 240 */ + { { 0x425ec3afc26e3392L,0xc07f8470a1722e00L,0xdcc28190e2356b43L, + 0x4ed97dffb1ef59a6L,0xc22b3ad1c63028c1L,0x070723c268c18988L }, + { 0x70da302f4cf49e7dL,0xc5e87c933f12a522L,0x74acdd1d18594148L, + 0xad5f73abca74124cL,0xe72e4a3ed69fd478L,0x615938687b117cc3L } }, + /* 241 */ + { { 0x7b7b9577a9aa0486L,0x6e41fb35a063d557L,0xb017d5c7da9047d7L, + 0x8c74828068a87ba9L,0xab45fa5cdf08ad93L,0xcd9fb2174c288a28L }, + { 0x595446425747843dL,0x34d64c6ca56111e3L,0x12e47ea14bfce8d5L, + 0x17740e056169267fL,0x5c49438eeed03fb5L,0x9da30add4fc3f513L } }, + /* 242 */ + { { 0xc4e85282ccfa5200L,0x2707608f6a19b13dL,0xdcb9a53df5726e2fL, + 0x612407c9e9427de5L,0x3e5a17e1d54d582aL,0xb99877de655ae118L }, + { 0x6f0e972b015254deL,0x92a56db1f0a6f7c5L,0xd297e4e1a656f8b2L, + 0x99fe0052ad981983L,0xd3652d2f07cfed84L,0xc784352e843c1738L } }, + /* 243 */ + { { 0x6ee90af07e9b2d8aL,0xac8d701857cf1964L,0xf6ed903171f28efcL, + 0x7f70d5a96812b20eL,0x27b557f4f1c61eeeL,0xf1c9bd57c6263758L }, + { 0x5cf7d0142a1a6194L,0xdd614e0b1890ab84L,0x3ef9de100e93c2a6L, + 0xf98cf575e0cd91c5L,0x504ec0c614befc32L,0xd0513a666279d68cL } }, + /* 244 */ + { { 0xa8eadbada859fb6aL,0xcf8346e7db283666L,0x7b35e61a3e22e355L, + 0x293ece2c99639c6bL,0xfa0162e256f241c8L,0xd2e6c7b9bf7a1ddaL }, + { 0xd0de625340075e63L,0x2405aa61f9ec8286L,0x2237830a8fe45494L, + 0x4fd01ac7364e9c8cL,0x4d9c3d21904ba750L,0xd589be14af1b520bL } }, + /* 245 */ + { { 0x13576a4f4662e53bL,0x35ec2f51f9077676L,0x66297d1397c0af97L, + 0xed3201fe9e598b58L,0x49bc752a5e70f604L,0xb54af535bb12d951L }, + { 0x36ea4c2b212c1c76L,0x18f5bbc7eb250dfdL,0xa0d466cc9a0a1a46L, + 0x52564da4dac2d917L,0x206559f48e95fab5L,0x7487c1909ca67a33L } }, + /* 246 */ + { { 0x75abfe37dde98e9cL,0x99b90b262a411199L,0x1b410996dcdb1f7cL, + 0xab346f118b3b5675L,0x04852193f1f8ae1eL,0x1ec4d2276b8b98c1L }, + { 0xba3bc92645452baaL,0x387d1858acc4a572L,0x9478eff6e51f171eL, + 0xf357077d931e1c00L,0xffee77cde54c8ca8L,0xfb4892ff551dc9a4L } }, + /* 247 */ + { { 0x5b1bdad02db8dff8L,0xd462f4fd5a2285a2L,0x1d6aad8eda00b461L, + 0x43fbefcf41306d1bL,0x428e86f36a13fe19L,0xc8b2f11817f89404L }, + { 0x762528aaf0d51afbL,0xa3e2fea4549b1d06L,0x86fad8f2ea3ddf66L, + 0x0d9ccc4b4fbdd206L,0xcde97d4cc189ff5aL,0xc36793d6199f19a6L } }, + /* 248 */ + { { 0xea38909b51b85197L,0xffb17dd0b4c92895L,0x0eb0878b1ddb3f3fL, + 0xb05d28ffc57cf0f2L,0xd8bde2e71abd57e2L,0x7f2be28dc40c1b20L }, + { 0x6554dca2299a2d48L,0x5130ba2e8377982dL,0x8863205f1071971aL, + 0x15ee62827cf2825dL,0xd4b6c57f03748f2bL,0xa9e3f4da430385a0L } }, + /* 249 */ + { { 0x33eb7cec83fbc9c6L,0x24a311c74541777eL,0xc81377f74f0767fcL, + 0x12adae364ab702daL,0xb7fcb6db2a779696L,0x4a6fb28401cea6adL }, + { 0x5e8b1d2acdfc73deL,0xd0efae8d1b02fd32L,0x3f99c190d81d8519L, + 0x3c18f7fafc808971L,0x41f713e751b7ae7bL,0x0a4b3435f07fc3f8L } }, + /* 250 */ + { { 0x7dda3c4c019b7d2eL,0x631c8d1ad4dc4b89L,0x5489cd6e1cdb313cL, + 0xd44aed104c07bb06L,0x8f97e13a75f000d1L,0x0e9ee64fdda5df4dL }, + { 0xeaa99f3b3e346910L,0x622f6921fa294ad7L,0x22aaa20d0d0b2fe9L, + 0x4fed2f991e5881baL,0x9af3b2d6c1571802L,0x919e67a8dc7ee17cL } }, + /* 251 */ + { { 0xc724fe4c76250533L,0x8a2080e57d817ef8L,0xa2afb0f4172c9751L, + 0x9b10cdeb17c0702eL,0xbf3975e3c9b7e3e9L,0x206117df1cd0cdc5L }, + { 0xfb049e61be05ebd5L,0xeb0bb55c16c782c0L,0x13a331b8ab7fed09L, + 0xf6c58b1d632863f0L,0x6264ef6e4d3b6195L,0x92c51b639a53f116L } }, + /* 252 */ + { { 0xa57c7bc8288b364dL,0x4a562e087b41e5c4L,0x699d21c6698a9a11L, + 0xa4ed9581f3f849b9L,0xa223eef39eb726baL,0x13159c23cc2884f9L }, + { 0x73931e583a3f4963L,0x965003890ada6a81L,0x3ee8a1c65ab2950bL, + 0xeedf4949775fab52L,0x63d652e14f2671b6L,0xfed4491c3c4e2f55L } }, + /* 253 */ + { { 0x335eadc3f4eb453eL,0x5ff74b63cadd1a5bL,0x6933d0d75d84a91aL, + 0x9ca3eeb9b49ba337L,0x1f6faccec04c15b8L,0x4ef19326dc09a7e4L }, + { 0x53d2d3243dca3233L,0x0ee40590a2259d4bL,0x18c22edb5546f002L, + 0x9242980109ea6b71L,0xaada0addb0e91e61L,0x5fe53ef499963c50L } }, + /* 254 */ + { { 0x372dd06b90c28c65L,0x1765242c119ce47dL,0xc041fb806b22fc82L, + 0x667edf07b0a7ccc1L,0xc79599e71261beceL,0xbc69d9ba19cff22aL }, + { 0x009d77cd13c06819L,0x635a66aee282b79dL,0x4edac4a6225b1be8L, + 0x57d4f4e4524008f9L,0xee299ac5b056af84L,0xcc38444c3a0bc386L } }, + /* 255 */ + { { 0x490643b1cd4c2356L,0x740a4851750547beL,0x643eaf29d4944c04L, + 0xba572479299a98a0L,0x48b29f16ee05fdf9L,0x33fb4f61089b2d7bL }, + { 0x86704902a950f955L,0x97e1034dfedc3ddfL,0x211320b605fbb6a2L, + 0x23d7b93f432299bbL,0x1fe1a0578590e4a3L,0x8e1d0586f58c0ce6L } }, +}; + +/* Multiply the base point of P384 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_base_6(sp_point_384* r, const sp_digit* k, + int map, void* heap) +{ + return sp_384_ecc_mulmod_stripe_6(r, &p384_base, p384_table, + k, map, heap); +} + +/* Multiply the base point of P384 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_base_384(mp_int* km, ecc_point* r, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 p; + sp_digit kd[6]; +#endif + sp_point_384* point; + sp_digit* k = NULL; + int err = MP_OKAY; + + err = sp_384_point_new_6(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) { + err = MEMORY_E; + } + } +#else + k = kd; +#endif + if (err == MP_OKAY) { + sp_384_from_mp(k, 6, km); + + err = sp_384_ecc_mulmod_base_6(point, k, map, heap); + } + if (err == MP_OKAY) { + err = sp_384_point_to_ecc_point_6(point, r); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_6(point, 0, heap); + + return err; +} + +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_384_iszero_6(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5]) == 0; +} + +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */ +/* Add 1 to a. (a = a + 1) + * + * a A single precision integer. + */ +static void sp_384_add_one_6(sp_digit* a) +{ + __asm__ __volatile__ ( + "ldp x1, x2, [%[a], 0]\n\t" + "adds x1, x1, #1\n\t" + "ldr x3, [%[a], 16]\n\t" + "adcs x2, x2, xzr\n\t" + "ldr x4, [%[a], 24]\n\t" + "adcs x3, x3, xzr\n\t" + "stp x1, x2, [%[a], 0]\n\t" + "adcs x4, x4, xzr\n\t" + "stp x3, x4, [%[a], 16]\n\t" + "ldp x1, x2, [%[a], 32]\n\t" + "adcs x1, x1, xzr\n\t" + "adcs x2, x2, xzr\n\t" + "stp x1, x2, [%[a], 32]\n\t" + : + : [a] "r" (a) + : "memory", "x1", "x2", "x3", "x4" + ); +} + +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_384_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j; + byte* d; + + for (i = n - 1,j = 0; i >= 7; i -= 8) { + r[j] = ((sp_digit)a[i - 0] << 0) | + ((sp_digit)a[i - 1] << 8) | + ((sp_digit)a[i - 2] << 16) | + ((sp_digit)a[i - 3] << 24) | + ((sp_digit)a[i - 4] << 32) | + ((sp_digit)a[i - 5] << 40) | + ((sp_digit)a[i - 6] << 48) | + ((sp_digit)a[i - 7] << 56); + j++; + } + + if (i >= 0) { + r[j] = 0; + + d = (byte*)r; + switch (i) { + case 6: d[n - 1 - 6] = a[6]; //fallthrough + case 5: d[n - 1 - 5] = a[5]; //fallthrough + case 4: d[n - 1 - 4] = a[4]; //fallthrough + case 3: d[n - 1 - 3] = a[3]; //fallthrough + case 2: d[n - 1 - 2] = a[2]; //fallthrough + case 1: d[n - 1 - 1] = a[1]; //fallthrough + case 0: d[n - 1 - 0] = a[0]; //fallthrough + } + j++; + } + + for (; j < size; j++) { + r[j] = 0; + } +} + +/* Generates a scalar that is in the range 1..order-1. + * + * rng Random number generator. + * k Scalar value. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +static int sp_384_ecc_gen_k_6(WC_RNG* rng, sp_digit* k) +{ + int err; + byte buf[48]; + + do { + err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf)); + if (err == 0) { + sp_384_from_bin(k, 6, buf, (int)sizeof(buf)); + if (sp_384_cmp_6(k, p384_order2) < 0) { + sp_384_add_one_6(k); + break; + } + } + } + while (err == 0); + + return err; +} + +/* Makes a random EC key pair. + * + * rng Random number generator. + * priv Generated private value. + * pub Generated public point. + * heap Heap to use for allocation. + * returns ECC_INF_E when the point does not have the correct order, RNG + * failures, MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 p; + sp_digit kd[6]; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_384 inf; +#endif +#endif + sp_point_384* point; + sp_digit* k = NULL; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_384* infinity; +#endif + int err; + + (void)heap; + + err = sp_384_point_new_6(heap, p, point); +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + if (err == MP_OKAY) { + err = sp_384_point_new_6(heap, inf, infinity); + } +#endif +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) { + err = MEMORY_E; + } + } +#else + k = kd; +#endif + + if (err == MP_OKAY) { + err = sp_384_ecc_gen_k_6(rng, k); + } + if (err == MP_OKAY) { + err = sp_384_ecc_mulmod_base_6(point, k, 1, NULL); + } + +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + if (err == MP_OKAY) { + err = sp_384_ecc_mulmod_6(infinity, point, p384_order, 1, NULL); + } + if (err == MP_OKAY) { + if ((sp_384_iszero_6(point->x) == 0) || (sp_384_iszero_6(point->y) == 0)) { + err = ECC_INF_E; + } + } +#endif + + if (err == MP_OKAY) { + err = sp_384_to_mp(k, priv); + } + if (err == MP_OKAY) { + err = sp_384_point_to_ecc_point_6(point, pub); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_384_point_free_6(infinity, 1, heap); +#endif + sp_384_point_free_6(point, 1, heap); + + return err; +} + +#ifdef HAVE_ECC_DHE +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 48 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_384_to_bin(sp_digit* r, byte* a) +{ + int i, j; + + for (i = 5, j = 0; i >= 0; i--) { + a[j++] = r[i] >> 56; + a[j++] = r[i] >> 48; + a[j++] = r[i] >> 40; + a[j++] = r[i] >> 32; + a[j++] = r[i] >> 24; + a[j++] = r[i] >> 16; + a[j++] = r[i] >> 8; + a[j++] = r[i] >> 0; + } +} + +/* Multiply the point by the scalar and serialize the X ordinate. + * The number is 0 padded to maximum size on output. + * + * priv Scalar to multiply the point by. + * pub Point to multiply. + * out Buffer to hold X ordinate. + * outLen On entry, size of the buffer in bytes. + * On exit, length of data in buffer in bytes. + * heap Heap to use for allocation. + * returns BUFFER_E if the buffer is to small for output size, + * MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_secret_gen_384(mp_int* priv, ecc_point* pub, byte* out, + word32* outLen, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 p; + sp_digit kd[6]; +#endif + sp_point_384* point = NULL; + sp_digit* k = NULL; + int err = MP_OKAY; + + if (*outLen < 48U) { + err = BUFFER_E; + } + + if (err == MP_OKAY) { + err = sp_384_point_new_6(heap, p, point); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#else + k = kd; +#endif + + if (err == MP_OKAY) { + sp_384_from_mp(k, 6, priv); + sp_384_point_from_ecc_point_6(point, pub); + err = sp_384_ecc_mulmod_6(point, point, k, 1, heap); + } + if (err == MP_OKAY) { + sp_384_to_bin(point->x, out); + *outLen = 48; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_6(point, 0, heap); + + return err; +} +#endif /* HAVE_ECC_DHE */ + +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* Sub b from a into a. (a -= b) + * + * a A single precision integer and result. + * b A single precision integer. + */ +static sp_digit sp_384_sub_in_place_6(sp_digit* a, const sp_digit* b) +{ + __asm__ __volatile__ ( + "ldp x2, x3, [%[a], 0]\n\t" + "ldp x6, x7, [%[b], 0]\n\t" + "subs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 16]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 16]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 0]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 16]\n\t" + "ldr x2, [%[a], 32]\n\t" + "ldr x3, [%[a], 40]\n\t" + "ldr x6, [%[b], 32]\n\t" + "ldr x7, [%[b], 40]\n\t" + "sbcs x2, x2, x6\n\t" + "sbcs x3, x3, x7\n\t" + "str x2, [%[a], 32]\n\t" + "str x3, [%[a], 40]\n\t" + "csetm %[a], cc\n\t" + : [a] "+r" (a) + : [b] "r" (b) + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + ); + + return (sp_digit)a; +} + +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_384_mul_d_6(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "# A[0] * B\n\t" + "ldr x8, [%[a]]\n\t" + "mul x5, %[b], x8\n\t" + "umulh x3, %[b], x8\n\t" + "mov x4, 0\n\t" + "str x5, [%[r]]\n\t" + "mov x5, 0\n\t" + "mov x9, #8\n\t" + "1:\n\t" + "ldr x8, [%[a], x9]\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], x9]\n\t" + "mov x3, x4\n\t" + "mov x4, x5\n\t" + "mov x5, #0\n\t" + "add x9, x9, #8\n\t" + "cmp x9, 48\n\t" + "b.lt 1b\n\t" + "str x3, [%[r], 48]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + ); +#else + __asm__ __volatile__ ( + "# A[0] * B\n\t" + "ldp x8, x9, [%[a]]\n\t" + "mul x3, %[b], x8\n\t" + "umulh x4, %[b], x8\n\t" + "mov x5, 0\n\t" + "# A[1] * B\n\t" + "str x3, [%[r]]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x9\n\t" + "umulh x7, %[b], x9\n\t" + "adds x4, x4, x6\n\t" + "# A[2] * B\n\t" + "ldp x8, x9, [%[a], 16]\n\t" + "str x4, [%[r], 8]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[3] * B\n\t" + "str x5, [%[r], 16]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x9\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[4] * B\n\t" + "ldp x8, x9, [%[a], 32]\n\t" + "str x3, [%[r], 24]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, %[b], x8\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[5] * B\n\t" + "str x4, [%[r], 32]\n\t" + "mul x6, %[b], x9\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "adc x3, x3, x7\n\t" + "stp x5, x3, [%[r], 40]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + ); +#endif +} + +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + */ +static sp_digit div_384_word_6(sp_digit d1, sp_digit d0, sp_digit div) +{ + sp_digit r; + + __asm__ __volatile__ ( + "lsr x5, %[div], 32\n\t" + "add x5, x5, 1\n\t" + + "udiv x3, %[d1], x5\n\t" + "lsl x6, x3, 32\n\t" + "mul x4, %[div], x6\n\t" + "umulh x3, %[div], x6\n\t" + "subs %[d0], %[d0], x4\n\t" + "sbc %[d1], %[d1], x3\n\t" + + "udiv x3, %[d1], x5\n\t" + "lsl x3, x3, 32\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "umulh x3, %[div], x3\n\t" + "subs %[d0], %[d0], x4\n\t" + "sbc %[d1], %[d1], x3\n\t" + + "lsr x3, %[d0], 32\n\t" + "orr x3, x3, %[d1], lsl 32\n\t" + + "udiv x3, x3, x5\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "umulh x3, %[div], x3\n\t" + "subs %[d0], %[d0], x4\n\t" + "sbc %[d1], %[d1], x3\n\t" + + "lsr x3, %[d0], 32\n\t" + "orr x3, x3, %[d1], lsl 32\n\t" + + "udiv x3, x3, x5\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "sub %[d0], %[d0], x4\n\t" + + "udiv x3, %[d0], %[div]\n\t" + "add %[r], x6, x3\n\t" + + : [r] "=r" (r) + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) + : "x3", "x4", "x5", "x6" + ); + + return r; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_384_mask_6(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<6; i++) { + r[i] = a[i] & m; + } +#else + r[0] = a[0] & m; + r[1] = a[1] & m; + r[2] = a[2] & m; + r[3] = a[3] & m; + r[4] = a[4] & m; + r[5] = a[5] & m; +#endif +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_384_div_6(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[12], t2[7]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[5]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 6); + for (i=5; i>=0; i--) { + r1 = div_384_word_6(t1[6 + i], t1[6 + i - 1], div); + + sp_384_mul_d_6(t2, d, r1); + t1[6 + i] += sp_384_sub_in_place_6(&t1[i], t2); + t1[6 + i] -= t2[6]; + sp_384_mask_6(t2, d, t1[6 + i]); + t1[6 + i] += sp_384_add_6(&t1[i], &t1[i], t2); + sp_384_mask_6(t2, d, t1[6 + i]); + t1[6 + i] += sp_384_add_6(&t1[i], &t1[i], t2); + } + + r1 = sp_384_cmp_6(t1, d) >= 0; + sp_384_cond_sub_6(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_384_mod_6(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_384_div_6(a, m, NULL, r); +} + +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#ifdef WOLFSSL_SP_SMALL +/* Order-2 for the P384 curve. */ +static const uint64_t p384_order_minus_2[6] = { + 0xecec196accc52971U,0x581a0db248b0a77aU,0xc7634d81f4372ddfU, + 0xffffffffffffffffU,0xffffffffffffffffU,0xffffffffffffffffU +}; +#else +/* The low half of the order-2 of the P384 curve. */ +static const uint64_t p384_order_low[3] = { + 0xecec196accc52971U,0x581a0db248b0a77aU,0xc7634d81f4372ddfU + +}; +#endif /* WOLFSSL_SP_SMALL */ + +/* Multiply two number mod the order of P384 curve. (r = a * b mod order) + * + * r Result of the multiplication. + * a First operand of the multiplication. + * b Second operand of the multiplication. + */ +static void sp_384_mont_mul_order_6(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_384_mul_6(r, a, b); + sp_384_mont_reduce_order_6(r, p384_order, p384_mp_order); +} + +/* Square number mod the order of P384 curve. (r = a * a mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_384_mont_sqr_order_6(sp_digit* r, const sp_digit* a) +{ + sp_384_sqr_6(r, a); + sp_384_mont_reduce_order_6(r, p384_order, p384_mp_order); +} + +#ifndef WOLFSSL_SP_SMALL +/* Square number mod the order of P384 curve a number of times. + * (r = a ^ n mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_384_mont_sqr_n_order_6(sp_digit* r, const sp_digit* a, int n) +{ + int i; + + sp_384_mont_sqr_order_6(r, a); + for (i=1; i=0; i--) { + sp_384_mont_sqr_order_6(t, t); + if ((p384_order_minus_2[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { + sp_384_mont_mul_order_6(t, t, a); + } + } + XMEMCPY(r, t, sizeof(sp_digit) * 6U); +#else + sp_digit* t = td; + sp_digit* t2 = td + 2 * 6; + sp_digit* t3 = td + 4 * 6; + int i; + + /* t = a^2 */ + sp_384_mont_sqr_order_6(t, a); + /* t = a^3 = t * a */ + sp_384_mont_mul_order_6(t, t, a); + /* t2= a^c = t ^ 2 ^ 2 */ + sp_384_mont_sqr_n_order_6(t2, t, 2); + /* t = a^f = t2 * t */ + sp_384_mont_mul_order_6(t, t2, t); + /* t2= a^f0 = t ^ 2 ^ 4 */ + sp_384_mont_sqr_n_order_6(t2, t, 4); + /* t = a^ff = t2 * t */ + sp_384_mont_mul_order_6(t, t2, t); + /* t2= a^ff00 = t ^ 2 ^ 8 */ + sp_384_mont_sqr_n_order_6(t2, t, 8); + /* t3= a^ffff = t2 * t */ + sp_384_mont_mul_order_6(t3, t2, t); + /* t2= a^ffff0000 = t3 ^ 2 ^ 16 */ + sp_384_mont_sqr_n_order_6(t2, t3, 16); + /* t = a^ffffffff = t2 * t3 */ + sp_384_mont_mul_order_6(t, t2, t3); + /* t2= a^ffffffff0000 = t ^ 2 ^ 16 */ + sp_384_mont_sqr_n_order_6(t2, t, 16); + /* t = a^ffffffffffff = t2 * t3 */ + sp_384_mont_mul_order_6(t, t2, t3); + /* t2= a^ffffffffffff000000000000 = t ^ 2 ^ 48 */ + sp_384_mont_sqr_n_order_6(t2, t, 48); + /* t= a^fffffffffffffffffffffffff = t2 * t */ + sp_384_mont_mul_order_6(t, t2, t); + /* t2= a^ffffffffffffffffffffffff000000000000000000000000 */ + sp_384_mont_sqr_n_order_6(t2, t, 96); + /* t2= a^ffffffffffffffffffffffffffffffffffffffffffffffff = t2 * t */ + sp_384_mont_mul_order_6(t2, t2, t); + for (i=191; i>=1; i--) { + sp_384_mont_sqr_order_6(t2, t2); + if (((sp_digit)p384_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { + sp_384_mont_mul_order_6(t2, t2, a); + } + } + sp_384_mont_sqr_order_6(t2, t2); + sp_384_mont_mul_order_6(r, t2, a); +#endif /* WOLFSSL_SP_SMALL */ +} + +#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */ +#ifdef HAVE_ECC_SIGN +#ifndef SP_ECC_MAX_SIG_GEN +#define SP_ECC_MAX_SIG_GEN 64 +#endif + +/* Sign the hash using the private key. + * e = [hash, 384 bits] from binary + * r = (k.G)->x mod order + * s = (r * x + e) / k mod order + * The hash is truncated to the first 384 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv, + mp_int* rm, mp_int* sm, mp_int* km, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit ed[2*6]; + sp_digit xd[2*6]; + sp_digit kd[2*6]; + sp_digit rd[2*6]; + sp_digit td[3 * 2*6]; + sp_point_384 p; +#endif + sp_digit* e = NULL; + sp_digit* x = NULL; + sp_digit* k = NULL; + sp_digit* r = NULL; + sp_digit* tmp = NULL; + sp_point_384* point = NULL; + sp_digit carry; + sp_digit* s = NULL; + sp_digit* kInv = NULL; + int err = MP_OKAY; + int64_t c; + int i; + + (void)heap; + + err = sp_384_point_new_6(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 6, heap, + DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + e = d + 0 * 6; + x = d + 2 * 6; + k = d + 4 * 6; + r = d + 6 * 6; + tmp = d + 8 * 6; +#else + e = ed; + x = xd; + k = kd; + r = rd; + tmp = td; +#endif + s = e; + kInv = k; + + if (hashLen > 48U) { + hashLen = 48U; + } + + sp_384_from_bin(e, 6, hash, (int)hashLen); + } + + for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) { + sp_384_from_mp(x, 6, priv); + + /* New random point. */ + if (km == NULL || mp_iszero(km)) { + err = sp_384_ecc_gen_k_6(rng, k); + } + else { + sp_384_from_mp(k, 6, km); + mp_zero(km); + } + if (err == MP_OKAY) { + err = sp_384_ecc_mulmod_base_6(point, k, 1, NULL); + } + + if (err == MP_OKAY) { + /* r = point->x mod order */ + XMEMCPY(r, point->x, sizeof(sp_digit) * 6U); + sp_384_norm_6(r); + c = sp_384_cmp_6(r, p384_order); + sp_384_cond_sub_6(r, r, p384_order, 0L - (sp_digit)(c >= 0)); + sp_384_norm_6(r); + + /* Conv k to Montgomery form (mod order) */ + sp_384_mul_6(k, k, p384_norm_order); + err = sp_384_mod_6(k, k, p384_order); + } + if (err == MP_OKAY) { + sp_384_norm_6(k); + /* kInv = 1/k mod order */ + sp_384_mont_inv_order_6(kInv, k, tmp); + sp_384_norm_6(kInv); + + /* s = r * x + e */ + sp_384_mul_6(x, x, r); + err = sp_384_mod_6(x, x, p384_order); + } + if (err == MP_OKAY) { + sp_384_norm_6(x); + carry = sp_384_add_6(s, e, x); + sp_384_cond_sub_6(s, s, p384_order, 0 - carry); + sp_384_norm_6(s); + c = sp_384_cmp_6(s, p384_order); + sp_384_cond_sub_6(s, s, p384_order, 0L - (sp_digit)(c >= 0)); + sp_384_norm_6(s); + + /* s = s * k^-1 mod order */ + sp_384_mont_mul_order_6(s, s, kInv); + sp_384_norm_6(s); + + /* Check that signature is usable. */ + if (sp_384_iszero_6(s) == 0) { + break; + } + } + } + + if (i == 0) { + err = RNG_FAILURE_E; + } + + if (err == MP_OKAY) { + err = sp_384_to_mp(r, rm); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(s, sm); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 8 * 6); + XFREE(d, heap, DYNAMIC_TYPE_ECC); + } +#else + XMEMSET(e, 0, sizeof(sp_digit) * 2U * 6U); + XMEMSET(x, 0, sizeof(sp_digit) * 2U * 6U); + XMEMSET(k, 0, sizeof(sp_digit) * 2U * 6U); + XMEMSET(r, 0, sizeof(sp_digit) * 2U * 6U); + XMEMSET(r, 0, sizeof(sp_digit) * 2U * 6U); + XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 6U); +#endif + sp_384_point_free_6(point, 1, heap); + + return err; +} +#endif /* HAVE_ECC_SIGN */ + +#ifdef HAVE_ECC_VERIFY +/* Verify the signature values with the hash and public key. + * e = Truncate(hash, 384) + * u1 = e/s mod order + * u2 = r/s mod order + * r == (u1.G + u2.Q)->x mod order + * Optimization: Leave point in projective form. + * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z') + * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' + * The hash is truncated to the first 384 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +int sp_ecc_verify_384(const byte* hash, word32 hashLen, mp_int* pX, + mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit u1d[2*6]; + sp_digit u2d[2*6]; + sp_digit sd[2*6]; + sp_digit tmpd[2*6 * 5]; + sp_point_384 p1d; + sp_point_384 p2d; +#endif + sp_digit* u1 = NULL; + sp_digit* u2 = NULL; + sp_digit* s = NULL; + sp_digit* tmp = NULL; + sp_point_384* p1; + sp_point_384* p2 = NULL; + sp_digit carry; + int64_t c; + int err; + + err = sp_384_point_new_6(heap, p1d, p1); + if (err == MP_OKAY) { + err = sp_384_point_new_6(heap, p2d, p2); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 6, heap, + DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + u1 = d + 0 * 6; + u2 = d + 2 * 6; + s = d + 4 * 6; + tmp = d + 6 * 6; +#else + u1 = u1d; + u2 = u2d; + s = sd; + tmp = tmpd; +#endif + + if (hashLen > 48U) { + hashLen = 48U; + } + + sp_384_from_bin(u1, 6, hash, (int)hashLen); + sp_384_from_mp(u2, 6, r); + sp_384_from_mp(s, 6, sm); + sp_384_from_mp(p2->x, 6, pX); + sp_384_from_mp(p2->y, 6, pY); + sp_384_from_mp(p2->z, 6, pZ); + + { + sp_384_mul_6(s, s, p384_norm_order); + } + err = sp_384_mod_6(s, s, p384_order); + } + if (err == MP_OKAY) { + sp_384_norm_6(s); + { + sp_384_mont_inv_order_6(s, s, tmp); + sp_384_mont_mul_order_6(u1, u1, s); + sp_384_mont_mul_order_6(u2, u2, s); + } + + err = sp_384_ecc_mulmod_base_6(p1, u1, 0, heap); + } + if (err == MP_OKAY) { + err = sp_384_ecc_mulmod_6(p2, p2, u2, 0, heap); + } + + if (err == MP_OKAY) { + { + sp_384_proj_point_add_6(p1, p1, p2, tmp); + if (sp_384_iszero_6(p1->z)) { + if (sp_384_iszero_6(p1->x) && sp_384_iszero_6(p1->y)) { + sp_384_proj_point_dbl_6(p1, p2, tmp); + } + else { + /* Y ordinate is not used from here - don't set. */ + p1->x[0] = 0; + p1->x[1] = 0; + p1->x[2] = 0; + p1->x[3] = 0; + p1->x[4] = 0; + p1->x[5] = 0; + XMEMCPY(p1->z, p384_norm_mod, sizeof(p384_norm_mod)); + } + } + } + + /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ + /* Reload r and convert to Montgomery form. */ + sp_384_from_mp(u2, 6, r); + err = sp_384_mod_mul_norm_6(u2, u2, p384_mod); + } + + if (err == MP_OKAY) { + /* u1 = r.z'.z' mod prime */ + sp_384_mont_sqr_6(p1->z, p1->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(u1, u2, p1->z, p384_mod, p384_mp_mod); + *res = (int)(sp_384_cmp_6(p1->x, u1) == 0); + if (*res == 0) { + /* Reload r and add order. */ + sp_384_from_mp(u2, 6, r); + carry = sp_384_add_6(u2, u2, p384_order); + /* Carry means result is greater than mod and is not valid. */ + if (carry == 0) { + sp_384_norm_6(u2); + + /* Compare with mod and if greater or equal then not valid. */ + c = sp_384_cmp_6(u2, p384_mod); + if (c < 0) { + /* Convert to Montogomery form */ + err = sp_384_mod_mul_norm_6(u2, u2, p384_mod); + if (err == MP_OKAY) { + /* u1 = (r + 1*order).z'.z' mod prime */ + sp_384_mont_mul_6(u1, u2, p1->z, p384_mod, + p384_mp_mod); + *res = (int)(sp_384_cmp_6(p1->x, u1) == 0); + } + } + } + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) + XFREE(d, heap, DYNAMIC_TYPE_ECC); +#endif + sp_384_point_free_6(p1, 0, heap); + sp_384_point_free_6(p2, 0, heap); + + return err; +} +#endif /* HAVE_ECC_VERIFY */ + +#ifdef HAVE_ECC_CHECK_KEY +/* Check that the x and y oridinates are a valid point on the curve. + * + * point EC point. + * heap Heap to use if dynamically allocating. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +static int sp_384_ecc_is_point_6(sp_point_384* point, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit t1d[2*6]; + sp_digit t2d[2*6]; +#endif + sp_digit* t1; + sp_digit* t2; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6 * 4, heap, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = d + 0 * 6; + t2 = d + 2 * 6; +#else + (void)heap; + + t1 = t1d; + t2 = t2d; +#endif + + sp_384_sqr_6(t1, point->y); + (void)sp_384_mod_6(t1, t1, p384_mod); + sp_384_sqr_6(t2, point->x); + (void)sp_384_mod_6(t2, t2, p384_mod); + sp_384_mul_6(t2, t2, point->x); + (void)sp_384_mod_6(t2, t2, p384_mod); + (void)sp_384_sub_6(t2, p384_mod, t2); + sp_384_mont_add_6(t1, t1, t2, p384_mod); + + sp_384_mont_add_6(t1, t1, point->x, p384_mod); + sp_384_mont_add_6(t1, t1, point->x, p384_mod); + sp_384_mont_add_6(t1, t1, point->x, p384_mod); + + if (sp_384_cmp_6(t1, p384_b) != 0) { + err = MP_VAL; + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, heap, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + +/* Check that the x and y oridinates are a valid point on the curve. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +int sp_ecc_is_point_384(mp_int* pX, mp_int* pY) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 pubd; +#endif + sp_point_384* pub; + byte one[1] = { 1 }; + int err; + + err = sp_384_point_new_6(NULL, pubd, pub); + if (err == MP_OKAY) { + sp_384_from_mp(pub->x, 6, pX); + sp_384_from_mp(pub->y, 6, pY); + sp_384_from_bin(pub->z, 6, one, (int)sizeof(one)); + + err = sp_384_ecc_is_point_6(pub, NULL); + } + + sp_384_point_free_6(pub, 0, NULL); + + return err; +} + +/* Check that the private scalar generates the EC point (px, py), the point is + * on the curve and the point has the correct order. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * privm Private scalar that generates EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve, ECC_INF_E if the point does not have the correct order, + * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and + * MP_OKAY otherwise. + */ +int sp_ecc_check_key_384(mp_int* pX, mp_int* pY, mp_int* privm, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit privd[6]; + sp_point_384 pubd; + sp_point_384 pd; +#endif + sp_digit* priv = NULL; + sp_point_384* pub; + sp_point_384* p = NULL; + byte one[1] = { 1 }; + int err; + + err = sp_384_point_new_6(heap, pubd, pub); + if (err == MP_OKAY) { + err = sp_384_point_new_6(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6, heap, + DYNAMIC_TYPE_ECC); + if (priv == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + priv = privd; +#endif + + sp_384_from_mp(pub->x, 6, pX); + sp_384_from_mp(pub->y, 6, pY); + sp_384_from_bin(pub->z, 6, one, (int)sizeof(one)); + sp_384_from_mp(priv, 6, privm); + + /* Check point at infinitiy. */ + if ((sp_384_iszero_6(pub->x) != 0) && + (sp_384_iszero_6(pub->y) != 0)) { + err = ECC_INF_E; + } + } + + if (err == MP_OKAY) { + /* Check range of X and Y */ + if (sp_384_cmp_6(pub->x, p384_mod) >= 0 || + sp_384_cmp_6(pub->y, p384_mod) >= 0) { + err = ECC_OUT_OF_RANGE_E; + } + } + + if (err == MP_OKAY) { + /* Check point is on curve */ + err = sp_384_ecc_is_point_6(pub, heap); + } + + if (err == MP_OKAY) { + /* Point * order = infinity */ + err = sp_384_ecc_mulmod_6(p, pub, p384_order, 1, heap); + } + if (err == MP_OKAY) { + /* Check result is infinity */ + if ((sp_384_iszero_6(p->x) == 0) || + (sp_384_iszero_6(p->y) == 0)) { + err = ECC_INF_E; + } + } + + if (err == MP_OKAY) { + /* Base * private = point */ + err = sp_384_ecc_mulmod_base_6(p, priv, 1, heap); + } + if (err == MP_OKAY) { + /* Check result is public key */ + if (sp_384_cmp_6(p->x, pub->x) != 0 || + sp_384_cmp_6(p->y, pub->y) != 0) { + err = ECC_PRIV_KEY_E; + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (priv != NULL) { + XFREE(priv, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_6(p, 0, heap); + sp_384_point_free_6(pub, 0, heap); + + return err; +} +#endif +#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL +/* Add two projective EC points together. + * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ) + * + * pX First EC point's X ordinate. + * pY First EC point's Y ordinate. + * pZ First EC point's Z ordinate. + * qX Second EC point's X ordinate. + * qY Second EC point's Y ordinate. + * qZ Second EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* qX, mp_int* qY, mp_int* qZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 6 * 5]; + sp_point_384 pd; + sp_point_384 qd; +#endif + sp_digit* tmp; + sp_point_384* p; + sp_point_384* q = NULL; + int err; + + err = sp_384_point_new_6(NULL, pd, p); + if (err == MP_OKAY) { + err = sp_384_point_new_6(NULL, qd, q); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 6 * 5, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + sp_384_from_mp(p->x, 6, pX); + sp_384_from_mp(p->y, 6, pY); + sp_384_from_mp(p->z, 6, pZ); + sp_384_from_mp(q->x, 6, qX); + sp_384_from_mp(q->y, 6, qY); + sp_384_from_mp(q->z, 6, qZ); + + sp_384_proj_point_add_6(p, p, q, tmp); + } + + if (err == MP_OKAY) { + err = sp_384_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->z, rZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_6(q, 0, NULL); + sp_384_point_free_6(p, 0, NULL); + + return err; +} + +/* Double a projective EC point. + * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ) + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 6 * 2]; + sp_point_384 pd; +#endif + sp_digit* tmp; + sp_point_384* p; + int err; + + err = sp_384_point_new_6(NULL, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 6 * 2, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + sp_384_from_mp(p->x, 6, pX); + sp_384_from_mp(p->y, 6, pY); + sp_384_from_mp(p->z, 6, pZ); + + sp_384_proj_point_dbl_6(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_384_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->z, rZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_6(p, 0, NULL); + + return err; +} + +/* Map a projective EC point to affine in place. + * pZ will be one. + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 6 * 6]; + sp_point_384 pd; +#endif + sp_digit* tmp; + sp_point_384* p; + int err; + + err = sp_384_point_new_6(NULL, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 6 * 6, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + if (err == MP_OKAY) { + sp_384_from_mp(p->x, 6, pX); + sp_384_from_mp(p->y, 6, pY); + sp_384_from_mp(p->z, 6, pZ); + + sp_384_map_6(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_384_to_mp(p->x, pX); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->y, pY); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->z, pZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_6(p, 0, NULL); + + return err; +} +#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */ +#ifdef HAVE_COMP_KEY +/* Find the square root of a number mod the prime of the curve. + * + * y The number to operate on and the result. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +static int sp_384_mont_sqrt_6(sp_digit* y) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d; +#else + sp_digit t1d[2 * 6]; + sp_digit t2d[2 * 6]; + sp_digit t3d[2 * 6]; + sp_digit t4d[2 * 6]; + sp_digit t5d[2 * 6]; +#endif + sp_digit* t1; + sp_digit* t2; + sp_digit* t3; + sp_digit* t4; + sp_digit* t5; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 2 * 6, NULL, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = d + 0 * 6; + t2 = d + 2 * 6; + t3 = d + 4 * 6; + t4 = d + 6 * 6; + t5 = d + 8 * 6; +#else + t1 = t1d; + t2 = t2d; + t3 = t3d; + t4 = t4d; + t5 = t5d; +#endif + + { + /* t2 = y ^ 0x2 */ + sp_384_mont_sqr_6(t2, y, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x3 */ + sp_384_mont_mul_6(t1, t2, y, p384_mod, p384_mp_mod); + /* t5 = y ^ 0xc */ + sp_384_mont_sqr_n_6(t5, t1, 2, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xf */ + sp_384_mont_mul_6(t1, t1, t5, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x1e */ + sp_384_mont_sqr_6(t2, t1, p384_mod, p384_mp_mod); + /* t3 = y ^ 0x1f */ + sp_384_mont_mul_6(t3, t2, y, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3e0 */ + sp_384_mont_sqr_n_6(t2, t3, 5, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x3ff */ + sp_384_mont_mul_6(t1, t3, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x7fe0 */ + sp_384_mont_sqr_n_6(t2, t1, 5, p384_mod, p384_mp_mod); + /* t3 = y ^ 0x7fff */ + sp_384_mont_mul_6(t3, t3, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3fff800 */ + sp_384_mont_sqr_n_6(t2, t3, 15, p384_mod, p384_mp_mod); + /* t4 = y ^ 0x3ffffff */ + sp_384_mont_mul_6(t4, t3, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0xffffffc000000 */ + sp_384_mont_sqr_n_6(t2, t4, 30, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xfffffffffffff */ + sp_384_mont_mul_6(t1, t4, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0xfffffffffffffff000000000000000 */ + sp_384_mont_sqr_n_6(t2, t1, 60, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xffffffffffffffffffffffffffffff */ + sp_384_mont_mul_6(t1, t1, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */ + sp_384_mont_sqr_n_6(t2, t1, 120, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_384_mont_mul_6(t1, t1, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */ + sp_384_mont_sqr_n_6(t2, t1, 15, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_384_mont_mul_6(t1, t3, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff80000000 */ + sp_384_mont_sqr_n_6(t2, t1, 31, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff */ + sp_384_mont_mul_6(t1, t4, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff0 */ + sp_384_mont_sqr_n_6(t2, t1, 4, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc */ + sp_384_mont_mul_6(t1, t5, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000 */ + sp_384_mont_sqr_n_6(t2, t1, 62, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000001 */ + sp_384_mont_mul_6(t1, y, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc00000000000000040000000 */ + sp_384_mont_sqr_n_6(y, t1, 30, p384_mod, p384_mp_mod); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + + +/* Uncompress the point given the X ordinate. + * + * xm X ordinate. + * odd Whether the Y ordinate is odd. + * ym Calculated Y ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d; +#else + sp_digit xd[2 * 6]; + sp_digit yd[2 * 6]; +#endif + sp_digit* x = NULL; + sp_digit* y = NULL; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 6, NULL, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + x = d + 0 * 6; + y = d + 2 * 6; +#else + x = xd; + y = yd; +#endif + + sp_384_from_mp(x, 6, xm); + err = sp_384_mod_mul_norm_6(x, x, p384_mod); + } + if (err == MP_OKAY) { + /* y = x^3 */ + { + sp_384_mont_sqr_6(y, x, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(y, y, x, p384_mod, p384_mp_mod); + } + /* y = x^3 - 3x */ + sp_384_mont_sub_6(y, y, x, p384_mod); + sp_384_mont_sub_6(y, y, x, p384_mod); + sp_384_mont_sub_6(y, y, x, p384_mod); + /* y = x^3 - 3x + b */ + err = sp_384_mod_mul_norm_6(x, p384_b, p384_mod); + } + if (err == MP_OKAY) { + sp_384_mont_add_6(y, y, x, p384_mod); + /* y = sqrt(x^3 - 3x + b) */ + err = sp_384_mont_sqrt_6(y); + } + if (err == MP_OKAY) { + XMEMSET(y + 6, 0, 6U * sizeof(sp_digit)); + sp_384_mont_reduce_6(y, p384_mod, p384_mp_mod); + if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) { + sp_384_mont_sub_6(y, p384_mod, y, p384_mod); + } + + err = sp_384_to_mp(y, ym); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} +#endif +#endif /* WOLFSSL_SP_384 */ +#endif /* WOLFSSL_HAVE_SP_ECC */ +#endif /* WOLFSSL_SP_ARM64_ASM */ +#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_ECC */ diff --git a/client/wolfssl/wolfcrypt/src/sp_armthumb.c b/client/wolfssl/wolfcrypt/src/sp_armthumb.c new file mode 100644 index 0000000..40cb431 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/sp_armthumb.c @@ -0,0 +1,27863 @@ +/* sp.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* Implementation by Sean Parkinson. */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include +#include +#include +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) || \ + defined(WOLFSSL_HAVE_SP_ECC) + +#ifdef RSA_LOW_MEM +#ifndef WOLFSSL_SP_SMALL +#define WOLFSSL_SP_SMALL +#endif +#endif + +#include + +#ifdef WOLFSSL_SP_ARM_THUMB_ASM +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) +#ifndef WOLFSSL_SP_NO_2048 +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_2048_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((sp_digit)a[i]) << s); + if (s >= 24U) { + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (sp_digit)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 32 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 32 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 32U) <= (word32)DIGIT_BIT) { + s += 32U; + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 32) { + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + s = 32 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 256 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_2048_to_bin(sp_digit* r, byte* a) +{ + int i, j, s = 0, b; + + j = 2048 / 8 - 1; + a[j] = 0; + for (i=0; i<64 && j>=0; i++) { + b = 0; + /* lint allow cast of mismatch sp_digit and int */ + a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ + b += 8 - s; + if (j < 0) { + break; + } + while (b < 32) { + a[j--] = (byte)(r[i] >> b); + b += 8; + if (j < 0) { + break; + } + } + s = 8 - (b - 32); + if (j >= 0) { + a[j] = 0; + } + if (s != 0) { + j++; + } + } +} + +#ifndef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit tmp[8 * 2]; + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r8, r3\n\t" + "mov r11, %[r]\n\t" + "mov r9, %[a]\n\t" + "mov r10, %[b]\n\t" + "mov r6, #32\n\t" + "add r6, r9\n\t" + "mov r12, r6\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r5, #0\n\t" + "mov r6, #28\n\t" + "mov %[a], r8\n\t" + "sub %[a], r6\n\t" + "sbc r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], r6\n\t" + "mov %[b], r8\n\t" + "sub %[b], %[a]\n\t" + "add %[a], r9\n\t" + "add %[b], r10\n\t" + "\n2:\n\t" + "# Multiply Start\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[b]]\n\t" + "lsl r6, r6, #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [%[b]]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[b]]\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [%[b]]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Multiply Done\n\t" + "add %[a], #4\n\t" + "sub %[b], #4\n\t" + "cmp %[a], r12\n\t" + "beq 3f\n\t" + "mov r6, r8\n\t" + "add r6, r9\n\t" + "cmp %[a], r6\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r11\n\t" + "mov r7, r8\n\t" + "str r3, [%[r], r7]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "add r7, #4\n\t" + "mov r8, r7\n\t" + "mov r6, #56\n\t" + "cmp r7, r6\n\t" + "ble 1b\n\t" + "str r3, [%[r], r7]\n\t" + "mov %[a], r9\n\t" + "mov %[b], r10\n\t" + : + : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r5, #0\n\t" + "mov r8, r3\n\t" + "mov r11, %[r]\n\t" + "mov r6, #64\n\t" + "neg r6, r6\n\t" + "add sp, r6\n\t" + "mov r10, sp\n\t" + "mov r9, %[a]\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r6, #28\n\t" + "mov %[a], r8\n\t" + "sub %[a], r6\n\t" + "sbc r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], r6\n\t" + "mov r2, r8\n\t" + "sub r2, %[a]\n\t" + "add %[a], r9\n\t" + "add r2, r9\n\t" + "\n2:\n\t" + "cmp r2, %[a]\n\t" + "beq 4f\n\t" + "# Multiply * 2: Start\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [r2]\n\t" + "lsl r6, r6, #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [r2]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [r2]\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [r2]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Multiply * 2: Done\n\t" + "bal 5f\n\t" + "\n4:\n\t" + "# Square: Start\n\t" + "ldr r6, [%[a]]\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r6, r6\n\t" + "add r3, r6\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "mul r7, r7\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #15\n\t" + "lsl r6, r6, #17\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Square: Done\n\t" + "\n5:\n\t" + "add %[a], #4\n\t" + "sub r2, #4\n\t" + "mov r6, #32\n\t" + "add r6, r9\n\t" + "cmp %[a], r6\n\t" + "beq 3f\n\t" + "cmp %[a], r2\n\t" + "bgt 3f\n\t" + "mov r7, r8\n\t" + "add r7, r9\n\t" + "cmp %[a], r7\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r10\n\t" + "mov r7, r8\n\t" + "str r3, [%[r], r7]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "add r7, #4\n\t" + "mov r8, r7\n\t" + "mov r6, #56\n\t" + "cmp r7, r6\n\t" + "ble 1b\n\t" + "mov %[a], r9\n\t" + "str r3, [%[r], r7]\n\t" + "mov %[r], r11\n\t" + "mov %[a], r10\n\t" + "mov r3, #60\n\t" + "\n4:\n\t" + "ldr r6, [%[a], r3]\n\t" + "str r6, [%[r], r3]\n\t" + "sub r3, #4\n\t" + "bge 4b\n\t" + "mov r6, #64\n\t" + "add sp, r6\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + ); +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "add r4, r5\n\t" + "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #28]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5" + ); + + return c; +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_2048_sub_in_place_16(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldr r3, [%[a], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sub r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #0]\n\t" + "str r4, [%[a], #4]\n\t" + "ldr r3, [%[a], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r6, [%[b], #12]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #8]\n\t" + "str r4, [%[a], #12]\n\t" + "ldr r3, [%[a], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r6, [%[b], #20]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #16]\n\t" + "str r4, [%[a], #20]\n\t" + "ldr r3, [%[a], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r6, [%[b], #28]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #24]\n\t" + "str r4, [%[a], #28]\n\t" + "ldr r3, [%[a], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r6, [%[b], #36]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #32]\n\t" + "str r4, [%[a], #36]\n\t" + "ldr r3, [%[a], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r6, [%[b], #44]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #40]\n\t" + "str r4, [%[a], #44]\n\t" + "ldr r3, [%[a], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #48]\n\t" + "ldr r6, [%[b], #52]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #48]\n\t" + "str r4, [%[a], #52]\n\t" + "ldr r3, [%[a], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #56]\n\t" + "ldr r6, [%[b], #60]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #56]\n\t" + "str r4, [%[a], #60]\n\t" + "sbc %[c], %[c]\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6" + ); + + return c; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "add r4, r5\n\t" + "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[b], #48]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #52]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[b], #56]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #60]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #60]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5" + ); + + return c; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_2048_mask_8(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<8; i++) { + r[i] = a[i] & m; + } +#else + r[0] = a[0] & m; + r[1] = a[1] & m; + r[2] = a[2] & m; + r[3] = a[3] & m; + r[4] = a[4] & m; + r[5] = a[5] & m; + r[6] = a[6] & m; + r[7] = a[7] & m; +#endif +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_2048_mul_16(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[16]; + sp_digit a1[8]; + sp_digit b1[8]; + sp_digit z2[16]; + sp_digit u, ca, cb; + + ca = sp_2048_add_8(a1, a, &a[8]); + cb = sp_2048_add_8(b1, b, &b[8]); + u = ca & cb; + sp_2048_mul_8(z1, a1, b1); + sp_2048_mul_8(z2, &a[8], &b[8]); + sp_2048_mul_8(z0, a, b); + sp_2048_mask_8(r + 16, a1, 0 - cb); + sp_2048_mask_8(b1, b1, 0 - ca); + u += sp_2048_add_8(r + 16, r + 16, b1); + u += sp_2048_sub_in_place_16(z1, z2); + u += sp_2048_sub_in_place_16(z1, z0); + u += sp_2048_add_16(r + 8, r + 8, z1); + r[24] = u; + XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1)); + (void)sp_2048_add_16(r + 16, r + 16, z2); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z2[16]; + sp_digit z1[16]; + sp_digit a1[8]; + sp_digit u; + + u = sp_2048_add_8(a1, a, &a[8]); + sp_2048_sqr_8(z1, a1); + sp_2048_sqr_8(z2, &a[8]); + sp_2048_sqr_8(z0, a); + sp_2048_mask_8(r + 16, a1, 0 - u); + u += sp_2048_add_8(r + 16, r + 16, r + 16); + u += sp_2048_sub_in_place_16(z1, z2); + u += sp_2048_sub_in_place_16(z1, z0); + u += sp_2048_add_16(r + 8, r + 8, z1); + r[24] = u; + XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1)); + (void)sp_2048_add_16(r + 16, r + 16, z2); +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldr r3, [%[a], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sub r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #0]\n\t" + "str r4, [%[a], #4]\n\t" + "ldr r3, [%[a], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r6, [%[b], #12]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #8]\n\t" + "str r4, [%[a], #12]\n\t" + "ldr r3, [%[a], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r6, [%[b], #20]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #16]\n\t" + "str r4, [%[a], #20]\n\t" + "ldr r3, [%[a], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r6, [%[b], #28]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #24]\n\t" + "str r4, [%[a], #28]\n\t" + "ldr r3, [%[a], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r6, [%[b], #36]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #32]\n\t" + "str r4, [%[a], #36]\n\t" + "ldr r3, [%[a], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r6, [%[b], #44]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #40]\n\t" + "str r4, [%[a], #44]\n\t" + "ldr r3, [%[a], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #48]\n\t" + "ldr r6, [%[b], #52]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #48]\n\t" + "str r4, [%[a], #52]\n\t" + "ldr r3, [%[a], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #56]\n\t" + "ldr r6, [%[b], #60]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #56]\n\t" + "str r4, [%[a], #60]\n\t" + "ldr r3, [%[a], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #64]\n\t" + "ldr r6, [%[b], #68]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #64]\n\t" + "str r4, [%[a], #68]\n\t" + "ldr r3, [%[a], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #72]\n\t" + "ldr r6, [%[b], #76]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #72]\n\t" + "str r4, [%[a], #76]\n\t" + "ldr r3, [%[a], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #80]\n\t" + "ldr r6, [%[b], #84]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #80]\n\t" + "str r4, [%[a], #84]\n\t" + "ldr r3, [%[a], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #88]\n\t" + "ldr r6, [%[b], #92]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #88]\n\t" + "str r4, [%[a], #92]\n\t" + "ldr r3, [%[a], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #96]\n\t" + "ldr r6, [%[b], #100]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #96]\n\t" + "str r4, [%[a], #100]\n\t" + "ldr r3, [%[a], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #104]\n\t" + "ldr r6, [%[b], #108]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #104]\n\t" + "str r4, [%[a], #108]\n\t" + "ldr r3, [%[a], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #112]\n\t" + "ldr r6, [%[b], #116]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #112]\n\t" + "str r4, [%[a], #116]\n\t" + "ldr r3, [%[a], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #120]\n\t" + "ldr r6, [%[b], #124]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #120]\n\t" + "str r4, [%[a], #124]\n\t" + "sbc %[c], %[c]\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6" + ); + + return c; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "add r4, r5\n\t" + "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[b], #48]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #52]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[b], #56]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #60]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[b], #64]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #68]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[b], #72]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #76]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[b], #80]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #84]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[b], #88]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #92]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[b], #96]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #100]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[b], #104]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #108]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[b], #112]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #116]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[b], #120]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #124]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #124]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5" + ); + + return c; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_2048_mask_16(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<16; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 16; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[32]; + sp_digit a1[16]; + sp_digit b1[16]; + sp_digit z2[32]; + sp_digit u, ca, cb; + + ca = sp_2048_add_16(a1, a, &a[16]); + cb = sp_2048_add_16(b1, b, &b[16]); + u = ca & cb; + sp_2048_mul_16(z1, a1, b1); + sp_2048_mul_16(z2, &a[16], &b[16]); + sp_2048_mul_16(z0, a, b); + sp_2048_mask_16(r + 32, a1, 0 - cb); + sp_2048_mask_16(b1, b1, 0 - ca); + u += sp_2048_add_16(r + 32, r + 32, b1); + u += sp_2048_sub_in_place_32(z1, z2); + u += sp_2048_sub_in_place_32(z1, z0); + u += sp_2048_add_32(r + 16, r + 16, z1); + r[48] = u; + XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1)); + (void)sp_2048_add_32(r + 32, r + 32, z2); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z2[32]; + sp_digit z1[32]; + sp_digit a1[16]; + sp_digit u; + + u = sp_2048_add_16(a1, a, &a[16]); + sp_2048_sqr_16(z1, a1); + sp_2048_sqr_16(z2, &a[16]); + sp_2048_sqr_16(z0, a); + sp_2048_mask_16(r + 32, a1, 0 - u); + u += sp_2048_add_16(r + 32, r + 32, r + 32); + u += sp_2048_sub_in_place_32(z1, z2); + u += sp_2048_sub_in_place_32(z1, z0); + u += sp_2048_add_32(r + 16, r + 16, z1); + r[48] = u; + XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1)); + (void)sp_2048_add_32(r + 32, r + 32, z2); +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldr r3, [%[a], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sub r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #0]\n\t" + "str r4, [%[a], #4]\n\t" + "ldr r3, [%[a], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r6, [%[b], #12]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #8]\n\t" + "str r4, [%[a], #12]\n\t" + "ldr r3, [%[a], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r6, [%[b], #20]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #16]\n\t" + "str r4, [%[a], #20]\n\t" + "ldr r3, [%[a], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r6, [%[b], #28]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #24]\n\t" + "str r4, [%[a], #28]\n\t" + "ldr r3, [%[a], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r6, [%[b], #36]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #32]\n\t" + "str r4, [%[a], #36]\n\t" + "ldr r3, [%[a], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r6, [%[b], #44]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #40]\n\t" + "str r4, [%[a], #44]\n\t" + "ldr r3, [%[a], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #48]\n\t" + "ldr r6, [%[b], #52]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #48]\n\t" + "str r4, [%[a], #52]\n\t" + "ldr r3, [%[a], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #56]\n\t" + "ldr r6, [%[b], #60]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #56]\n\t" + "str r4, [%[a], #60]\n\t" + "ldr r3, [%[a], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #64]\n\t" + "ldr r6, [%[b], #68]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #64]\n\t" + "str r4, [%[a], #68]\n\t" + "ldr r3, [%[a], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #72]\n\t" + "ldr r6, [%[b], #76]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #72]\n\t" + "str r4, [%[a], #76]\n\t" + "ldr r3, [%[a], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #80]\n\t" + "ldr r6, [%[b], #84]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #80]\n\t" + "str r4, [%[a], #84]\n\t" + "ldr r3, [%[a], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #88]\n\t" + "ldr r6, [%[b], #92]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #88]\n\t" + "str r4, [%[a], #92]\n\t" + "ldr r3, [%[a], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #96]\n\t" + "ldr r6, [%[b], #100]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #96]\n\t" + "str r4, [%[a], #100]\n\t" + "ldr r3, [%[a], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #104]\n\t" + "ldr r6, [%[b], #108]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #104]\n\t" + "str r4, [%[a], #108]\n\t" + "ldr r3, [%[a], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #112]\n\t" + "ldr r6, [%[b], #116]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #112]\n\t" + "str r4, [%[a], #116]\n\t" + "ldr r3, [%[a], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #120]\n\t" + "ldr r6, [%[b], #124]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #120]\n\t" + "str r4, [%[a], #124]\n\t" + "sbc %[c], %[c]\n\t" + "add %[a], #0x80\n\t" + "add %[b], #0x80\n\t" + "mov r5, #0\n\t" + "sub r5, %[c]\n\t" + "ldr r3, [%[a], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #0]\n\t" + "str r4, [%[a], #4]\n\t" + "ldr r3, [%[a], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r6, [%[b], #12]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #8]\n\t" + "str r4, [%[a], #12]\n\t" + "ldr r3, [%[a], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r6, [%[b], #20]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #16]\n\t" + "str r4, [%[a], #20]\n\t" + "ldr r3, [%[a], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r6, [%[b], #28]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #24]\n\t" + "str r4, [%[a], #28]\n\t" + "ldr r3, [%[a], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r6, [%[b], #36]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #32]\n\t" + "str r4, [%[a], #36]\n\t" + "ldr r3, [%[a], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r6, [%[b], #44]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #40]\n\t" + "str r4, [%[a], #44]\n\t" + "ldr r3, [%[a], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #48]\n\t" + "ldr r6, [%[b], #52]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #48]\n\t" + "str r4, [%[a], #52]\n\t" + "ldr r3, [%[a], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #56]\n\t" + "ldr r6, [%[b], #60]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #56]\n\t" + "str r4, [%[a], #60]\n\t" + "ldr r3, [%[a], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #64]\n\t" + "ldr r6, [%[b], #68]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #64]\n\t" + "str r4, [%[a], #68]\n\t" + "ldr r3, [%[a], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #72]\n\t" + "ldr r6, [%[b], #76]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #72]\n\t" + "str r4, [%[a], #76]\n\t" + "ldr r3, [%[a], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #80]\n\t" + "ldr r6, [%[b], #84]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #80]\n\t" + "str r4, [%[a], #84]\n\t" + "ldr r3, [%[a], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #88]\n\t" + "ldr r6, [%[b], #92]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #88]\n\t" + "str r4, [%[a], #92]\n\t" + "ldr r3, [%[a], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #96]\n\t" + "ldr r6, [%[b], #100]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #96]\n\t" + "str r4, [%[a], #100]\n\t" + "ldr r3, [%[a], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #104]\n\t" + "ldr r6, [%[b], #108]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #104]\n\t" + "str r4, [%[a], #108]\n\t" + "ldr r3, [%[a], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #112]\n\t" + "ldr r6, [%[b], #116]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #112]\n\t" + "str r4, [%[a], #116]\n\t" + "ldr r3, [%[a], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #120]\n\t" + "ldr r6, [%[b], #124]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #120]\n\t" + "str r4, [%[a], #124]\n\t" + "sbc %[c], %[c]\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6" + ); + + return c; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r7, #0\n\t" + "mvn r7, r7\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "add r4, r5\n\t" + "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[b], #48]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #52]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[b], #56]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #60]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[b], #64]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #68]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[b], #72]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #76]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[b], #80]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #84]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[b], #88]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #92]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[b], #96]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #100]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[b], #104]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #108]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[b], #112]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #116]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[b], #120]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #124]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #124]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + "add %[a], #0x80\n\t" + "add %[b], #0x80\n\t" + "add %[r], #0x80\n\t" + "add %[c], r7\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[b], #48]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #52]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[b], #56]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #60]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[b], #64]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #68]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[b], #72]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #76]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[b], #80]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #84]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[b], #88]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #92]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[b], #96]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #100]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[b], #104]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #108]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[b], #112]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #116]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[b], #120]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #124]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #124]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r7" + ); + + return c; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<32; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 32; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[64]; + sp_digit a1[32]; + sp_digit b1[32]; + sp_digit z2[64]; + sp_digit u, ca, cb; + + ca = sp_2048_add_32(a1, a, &a[32]); + cb = sp_2048_add_32(b1, b, &b[32]); + u = ca & cb; + sp_2048_mul_32(z1, a1, b1); + sp_2048_mul_32(z2, &a[32], &b[32]); + sp_2048_mul_32(z0, a, b); + sp_2048_mask_32(r + 64, a1, 0 - cb); + sp_2048_mask_32(b1, b1, 0 - ca); + u += sp_2048_add_32(r + 64, r + 64, b1); + u += sp_2048_sub_in_place_64(z1, z2); + u += sp_2048_sub_in_place_64(z1, z0); + u += sp_2048_add_64(r + 32, r + 32, z1); + r[96] = u; + XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1)); + (void)sp_2048_add_64(r + 64, r + 64, z2); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z2[64]; + sp_digit z1[64]; + sp_digit a1[32]; + sp_digit u; + + u = sp_2048_add_32(a1, a, &a[32]); + sp_2048_sqr_32(z1, a1); + sp_2048_sqr_32(z2, &a[32]); + sp_2048_sqr_32(z0, a); + sp_2048_mask_32(r + 64, a1, 0 - u); + u += sp_2048_add_32(r + 64, r + 64, r + 64); + u += sp_2048_sub_in_place_64(z1, z2); + u += sp_2048_sub_in_place_64(z1, z0); + u += sp_2048_add_64(r + 32, r + 32, z1); + r[96] = u; + XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1)); + (void)sp_2048_add_64(r + 64, r + 64, z2); +} + +#endif /* !WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r6, %[a]\n\t" + "mov r7, #0\n\t" + "mov r4, #1\n\t" + "lsl r4, #8\n\t" + "sub r7, #1\n\t" + "add r6, r4\n\t" + "\n1:\n\t" + "add %[c], r7\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r]]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + "add %[a], #4\n\t" + "add %[b], #4\n\t" + "add %[r], #4\n\t" + "cmp %[a], r6\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into a. (a -= b) + * + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + __asm__ __volatile__ ( + "mov r7, %[a]\n\t" + "mov r5, #1\n\t" + "lsl r5, #8\n\t" + "add r7, r5\n\t" + "\n1:\n\t" + "mov r5, #0\n\t" + "sub r5, %[c]\n\t" + "ldr r3, [%[a]]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b]]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a]]\n\t" + "str r4, [%[a], #4]\n\t" + "sbc %[c], %[c]\n\t" + "add %[a], #8\n\t" + "add %[b], #8\n\t" + "cmp %[a], r7\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit tmp[64 * 2]; + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r8, r3\n\t" + "mov r11, %[r]\n\t" + "mov r9, %[a]\n\t" + "mov r10, %[b]\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r9\n\t" + "mov r12, r6\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r5, #0\n\t" + "mov r6, #252\n\t" + "mov %[a], r8\n\t" + "sub %[a], r6\n\t" + "sbc r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], r6\n\t" + "mov %[b], r8\n\t" + "sub %[b], %[a]\n\t" + "add %[a], r9\n\t" + "add %[b], r10\n\t" + "\n2:\n\t" + "# Multiply Start\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[b]]\n\t" + "lsl r6, r6, #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [%[b]]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[b]]\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [%[b]]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Multiply Done\n\t" + "add %[a], #4\n\t" + "sub %[b], #4\n\t" + "cmp %[a], r12\n\t" + "beq 3f\n\t" + "mov r6, r8\n\t" + "add r6, r9\n\t" + "cmp %[a], r6\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r11\n\t" + "mov r7, r8\n\t" + "str r3, [%[r], r7]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "add r7, #4\n\t" + "mov r8, r7\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, #248\n\t" + "cmp r7, r6\n\t" + "ble 1b\n\t" + "str r3, [%[r], r7]\n\t" + "mov %[a], r9\n\t" + "mov %[b], r10\n\t" + : + : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r5, #0\n\t" + "mov r8, r3\n\t" + "mov r11, %[r]\n\t" + "mov r6, #2\n\t" + "lsl r6, r6, #8\n\t" + "neg r6, r6\n\t" + "add sp, r6\n\t" + "mov r10, sp\n\t" + "mov r9, %[a]\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r6, #252\n\t" + "mov %[a], r8\n\t" + "sub %[a], r6\n\t" + "sbc r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], r6\n\t" + "mov r2, r8\n\t" + "sub r2, %[a]\n\t" + "add %[a], r9\n\t" + "add r2, r9\n\t" + "\n2:\n\t" + "cmp r2, %[a]\n\t" + "beq 4f\n\t" + "# Multiply * 2: Start\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [r2]\n\t" + "lsl r6, r6, #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [r2]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [r2]\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [r2]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Multiply * 2: Done\n\t" + "bal 5f\n\t" + "\n4:\n\t" + "# Square: Start\n\t" + "ldr r6, [%[a]]\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r6, r6\n\t" + "add r3, r6\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "mul r7, r7\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #15\n\t" + "lsl r6, r6, #17\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Square: Done\n\t" + "\n5:\n\t" + "add %[a], #4\n\t" + "sub r2, #4\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r9\n\t" + "cmp %[a], r6\n\t" + "beq 3f\n\t" + "cmp %[a], r2\n\t" + "bgt 3f\n\t" + "mov r7, r8\n\t" + "add r7, r9\n\t" + "cmp %[a], r7\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r10\n\t" + "mov r7, r8\n\t" + "str r3, [%[r], r7]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "add r7, #4\n\t" + "mov r8, r7\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, #248\n\t" + "cmp r7, r6\n\t" + "ble 1b\n\t" + "mov %[a], r9\n\t" + "str r3, [%[r], r7]\n\t" + "mov %[r], r11\n\t" + "mov %[a], r10\n\t" + "mov r3, #1\n\t" + "lsl r3, r3, #8\n\t" + "add r3, #252\n\t" + "\n4:\n\t" + "ldr r6, [%[a], r3]\n\t" + "str r6, [%[r], r3]\n\t" + "sub r3, #4\n\t" + "bge 4b\n\t" + "mov r6, #2\n\t" + "lsl r6, r6, #8\n\t" + "add sp, r6\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +#ifdef WOLFSSL_SP_SMALL +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m) +{ + int i; + + for (i=0; i<32; i++) { + r[i] = a[i] & m; + } +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r6, %[a]\n\t" + "mov r7, #0\n\t" + "add r6, #128\n\t" + "sub r7, #1\n\t" + "\n1:\n\t" + "add %[c], r7\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r]]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + "add %[a], #4\n\t" + "add %[b], #4\n\t" + "add %[r], #4\n\t" + "cmp %[a], r6\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into a. (a -= b) + * + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + __asm__ __volatile__ ( + "mov r7, %[a]\n\t" + "add r7, #128\n\t" + "\n1:\n\t" + "mov r5, #0\n\t" + "sub r5, %[c]\n\t" + "ldr r3, [%[a]]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b]]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a]]\n\t" + "str r4, [%[a], #4]\n\t" + "sbc %[c], %[c]\n\t" + "add %[a], #8\n\t" + "add %[b], #8\n\t" + "cmp %[a], r7\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit tmp[32 * 2]; + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r8, r3\n\t" + "mov r11, %[r]\n\t" + "mov r9, %[a]\n\t" + "mov r10, %[b]\n\t" + "mov r6, #128\n\t" + "add r6, r9\n\t" + "mov r12, r6\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r5, #0\n\t" + "mov r6, #124\n\t" + "mov %[a], r8\n\t" + "sub %[a], r6\n\t" + "sbc r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], r6\n\t" + "mov %[b], r8\n\t" + "sub %[b], %[a]\n\t" + "add %[a], r9\n\t" + "add %[b], r10\n\t" + "\n2:\n\t" + "# Multiply Start\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[b]]\n\t" + "lsl r6, r6, #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [%[b]]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[b]]\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [%[b]]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Multiply Done\n\t" + "add %[a], #4\n\t" + "sub %[b], #4\n\t" + "cmp %[a], r12\n\t" + "beq 3f\n\t" + "mov r6, r8\n\t" + "add r6, r9\n\t" + "cmp %[a], r6\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r11\n\t" + "mov r7, r8\n\t" + "str r3, [%[r], r7]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "add r7, #4\n\t" + "mov r8, r7\n\t" + "mov r6, #248\n\t" + "cmp r7, r6\n\t" + "ble 1b\n\t" + "str r3, [%[r], r7]\n\t" + "mov %[a], r9\n\t" + "mov %[b], r10\n\t" + : + : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r5, #0\n\t" + "mov r8, r3\n\t" + "mov r11, %[r]\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "neg r6, r6\n\t" + "add sp, r6\n\t" + "mov r10, sp\n\t" + "mov r9, %[a]\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r6, #124\n\t" + "mov %[a], r8\n\t" + "sub %[a], r6\n\t" + "sbc r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], r6\n\t" + "mov r2, r8\n\t" + "sub r2, %[a]\n\t" + "add %[a], r9\n\t" + "add r2, r9\n\t" + "\n2:\n\t" + "cmp r2, %[a]\n\t" + "beq 4f\n\t" + "# Multiply * 2: Start\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [r2]\n\t" + "lsl r6, r6, #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [r2]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [r2]\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [r2]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Multiply * 2: Done\n\t" + "bal 5f\n\t" + "\n4:\n\t" + "# Square: Start\n\t" + "ldr r6, [%[a]]\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r6, r6\n\t" + "add r3, r6\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "mul r7, r7\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #15\n\t" + "lsl r6, r6, #17\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Square: Done\n\t" + "\n5:\n\t" + "add %[a], #4\n\t" + "sub r2, #4\n\t" + "mov r6, #128\n\t" + "add r6, r9\n\t" + "cmp %[a], r6\n\t" + "beq 3f\n\t" + "cmp %[a], r2\n\t" + "bgt 3f\n\t" + "mov r7, r8\n\t" + "add r7, r9\n\t" + "cmp %[a], r7\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r10\n\t" + "mov r7, r8\n\t" + "str r3, [%[r], r7]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "add r7, #4\n\t" + "mov r8, r7\n\t" + "mov r6, #248\n\t" + "cmp r7, r6\n\t" + "ble 1b\n\t" + "mov %[a], r9\n\t" + "str r3, [%[r], r7]\n\t" + "mov %[r], r11\n\t" + "mov %[a], r10\n\t" + "mov r3, #252\n\t" + "\n4:\n\t" + "ldr r6, [%[a], r3]\n\t" + "str r6, [%[r], r3]\n\t" + "sub r3, #4\n\t" + "bge 4b\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add sp, r6\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +/* Caclulate the bottom digit of -1/a mod 2^n. + * + * a A single precision number. + * rho Bottom word of inverse. + */ +static void sp_2048_mont_setup(const sp_digit* a, sp_digit* rho) +{ + sp_digit x, b; + + b = a[0]; + x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - b * x; /* here x*a==1 mod 2**8 */ + x *= 2 - b * x; /* here x*a==1 mod 2**16 */ + x *= 2 - b * x; /* here x*a==1 mod 2**32 */ + + /* rho = -1/m mod b */ + *rho = -x; +} + +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +SP_NOINLINE static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, + sp_digit b) +{ + __asm__ __volatile__ ( + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, %[a]\n\t" + "mov r8, %[r]\n\t" + "mov r9, r6\n\t" + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "1:\n\t" + "mov %[r], #0\n\t" + "mov r5, #0\n\t" + "# A[] * B\n\t" + "ldr r6, [%[a]]\n\t" + "lsl r6, r6, #16\n\t" + "lsl r7, %[b], #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "lsr r7, %[b], #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, %[b], #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "lsl r7, %[b], #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# A[] * B - Done\n\t" + "mov %[r], r8\n\t" + "str r3, [%[r]]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "add %[r], #4\n\t" + "add %[a], #4\n\t" + "mov r8, %[r]\n\t" + "cmp %[a], r9\n\t" + "blt 1b\n\t" + "str r3, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a) + : [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + ); +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 2048 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_2048_mont_norm_32(sp_digit* r, const sp_digit* m) +{ + XMEMSET(r, 0, sizeof(sp_digit) * 32); + + /* r = 2^n mod m */ + sp_2048_sub_in_place_32(r, m); +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +SP_NOINLINE static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, + const sp_digit* b, sp_digit m) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r5, #128\n\t" + "mov r8, r5\n\t" + "mov r7, #0\n\t" + "1:\n\t" + "ldr r6, [%[b], r7]\n\t" + "and r6, %[m]\n\t" + "mov r5, #0\n\t" + "sub r5, %[c]\n\t" + "ldr r5, [%[a], r7]\n\t" + "sbc r5, r6\n\t" + "sbc %[c], %[c]\n\t" + "str r5, [%[r], r7]\n\t" + "add r7, #4\n\t" + "cmp r7, r8\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r5", "r6", "r7", "r8" + ); + + return c; +} + +/* Reduce the number back to 2048 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_digit ca = 0; + + __asm__ __volatile__ ( + "mov r8, %[mp]\n\t" + "mov r12, %[ca]\n\t" + "mov r14, %[m]\n\t" + "mov r9, %[a]\n\t" + "mov r4, #0\n\t" + "# i = 0\n\t" + "mov r11, r4\n\t" + "\n1:\n\t" + "mov r5, #0\n\t" + "mov %[ca], #0\n\t" + "# mu = a[i] * mp\n\t" + "mov %[mp], r8\n\t" + "ldr %[a], [%[a]]\n\t" + "mul %[mp], %[a]\n\t" + "mov %[m], r14\n\t" + "mov r10, r9\n\t" + "\n2:\n\t" + "# a[i+j] += m[j] * mu\n\t" + "mov %[a], r10\n\t" + "ldr %[a], [%[a]]\n\t" + "mov %[ca], #0\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "# Multiply m[j] and mu - Start\n\t" + "ldr r7, [%[m]]\n\t" + "lsl r6, %[mp], #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add %[a], r7\n\t" + "adc r5, %[ca]\n\t" + "ldr r7, [%[m]]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add %[a], r6\n\t" + "adc r5, r7\n\t" + "ldr r7, [%[m]]\n\t" + "lsr r6, %[mp], #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r5, r7\n\t" + "ldr r7, [%[m]]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add %[a], r6\n\t" + "adc r5, r7\n\t" + "# Multiply m[j] and mu - Done\n\t" + "add r4, %[a]\n\t" + "adc r5, %[ca]\n\t" + "mov %[a], r10\n\t" + "str r4, [%[a]]\n\t" + "mov r6, #4\n\t" + "add %[m], #4\n\t" + "add r10, r6\n\t" + "mov r4, #124\n\t" + "add r4, r9\n\t" + "cmp r10, r4\n\t" + "blt 2b\n\t" + "# a[i+31] += m[31] * mu\n\t" + "mov %[ca], #0\n\t" + "mov r4, r12\n\t" + "mov %[a], #0\n\t" + "# Multiply m[31] and mu - Start\n\t" + "ldr r7, [%[m]]\n\t" + "lsl r6, %[mp], #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r5, r7\n\t" + "adc r4, %[ca]\n\t" + "adc %[a], %[ca]\n\t" + "ldr r7, [%[m]]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r5, r6\n\t" + "adc r4, r7\n\t" + "adc %[a], %[ca]\n\t" + "ldr r7, [%[m]]\n\t" + "lsr r6, %[mp], #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc %[a], %[ca]\n\t" + "ldr r7, [%[m]]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r5, r6\n\t" + "adc r4, r7\n\t" + "adc %[a], %[ca]\n\t" + "# Multiply m[31] and mu - Done\n\t" + "mov %[ca], %[a]\n\t" + "mov %[a], r10\n\t" + "ldr r7, [%[a], #4]\n\t" + "ldr %[a], [%[a]]\n\t" + "mov r6, #0\n\t" + "add r5, %[a]\n\t" + "adc r7, r4\n\t" + "adc %[ca], r6\n\t" + "mov %[a], r10\n\t" + "str r5, [%[a]]\n\t" + "str r7, [%[a], #4]\n\t" + "# i += 1\n\t" + "mov r6, #4\n\t" + "add r9, r6\n\t" + "add r11, r6\n\t" + "mov r12, %[ca]\n\t" + "mov %[a], r9\n\t" + "mov r4, #128\n\t" + "cmp r11, r4\n\t" + "blt 1b\n\t" + "mov %[m], r14\n\t" + : [ca] "+r" (ca), [a] "+r" (a) + : [m] "r" (m), [mp] "r" (mp) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + ); + + sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - ca); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_2048_mont_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_2048_mul_32(r, a, b); + sp_2048_mont_reduce_32(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_2048_mont_sqr_32(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_2048_sqr_32(r, a); + sp_2048_mont_reduce_32(r, m, mp); +} + +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +SP_NOINLINE static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, + sp_digit b) +{ + __asm__ __volatile__ ( + "mov r6, #128\n\t" + "add r6, %[a]\n\t" + "mov r8, %[r]\n\t" + "mov r9, r6\n\t" + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "1:\n\t" + "mov %[r], #0\n\t" + "mov r5, #0\n\t" + "# A[] * B\n\t" + "ldr r6, [%[a]]\n\t" + "lsl r6, r6, #16\n\t" + "lsl r7, %[b], #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "lsr r7, %[b], #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, %[b], #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "lsl r7, %[b], #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# A[] * B - Done\n\t" + "mov %[r], r8\n\t" + "str r3, [%[r]]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "add %[r], #4\n\t" + "add %[a], #4\n\t" + "mov r8, %[r]\n\t" + "cmp %[a], r9\n\t" + "blt 1b\n\t" + "str r3, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a) + : [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + ); +} + +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +SP_NOINLINE static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, + sp_digit div) +{ + sp_digit r = 0; + + __asm__ __volatile__ ( + "lsr r5, %[div], #1\n\t" + "add r5, #1\n\t" + "mov r8, %[d0]\n\t" + "mov r9, %[d1]\n\t" + "# Do top 32\n\t" + "mov r6, r5\n\t" + "sub r6, %[d1]\n\t" + "sbc r6, r6\n\t" + "add %[r], %[r]\n\t" + "sub %[r], r6\n\t" + "and r6, r5\n\t" + "sub %[d1], r6\n\t" + "# Next 30 bits\n\t" + "mov r4, #29\n\t" + "1:\n\t" + "lsl %[d0], %[d0], #1\n\t" + "adc %[d1], %[d1]\n\t" + "mov r6, r5\n\t" + "sub r6, %[d1]\n\t" + "sbc r6, r6\n\t" + "add %[r], %[r]\n\t" + "sub %[r], r6\n\t" + "and r6, r5\n\t" + "sub %[d1], r6\n\t" + "sub r4, #1\n\t" + "bpl 1b\n\t" + "mov r7, #0\n\t" + "add %[r], %[r]\n\t" + "add %[r], #1\n\t" + "# r * div - Start\n\t" + "lsl %[d1], %[r], #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr %[d1], %[d1], #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, %[d1]\n\t" + "lsr r6, %[div], #16\n\t" + "mul %[d1], r6\n\t" + "lsr r5, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r7\n\t" + "lsr %[d1], %[r], #16\n\t" + "mul r6, %[d1]\n\t" + "add r5, r6\n\t" + "lsl r6, %[div], #16\n\t" + "lsr r6, r6, #16\n\t" + "mul %[d1], r6\n\t" + "lsr r6, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r6\n\t" + "# r * div - Done\n\t" + "mov %[d1], r8\n\t" + "sub %[d1], r4\n\t" + "mov r4, %[d1]\n\t" + "mov %[d1], r9\n\t" + "sbc %[d1], r5\n\t" + "mov r5, %[d1]\n\t" + "add %[r], r5\n\t" + "# r * div - Start\n\t" + "lsl %[d1], %[r], #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr %[d1], %[d1], #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, %[d1]\n\t" + "lsr r6, %[div], #16\n\t" + "mul %[d1], r6\n\t" + "lsr r5, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r7\n\t" + "lsr %[d1], %[r], #16\n\t" + "mul r6, %[d1]\n\t" + "add r5, r6\n\t" + "lsl r6, %[div], #16\n\t" + "lsr r6, r6, #16\n\t" + "mul %[d1], r6\n\t" + "lsr r6, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r6\n\t" + "# r * div - Done\n\t" + "mov %[d1], r8\n\t" + "mov r6, r9\n\t" + "sub r4, %[d1], r4\n\t" + "sbc r6, r5\n\t" + "mov r5, r6\n\t" + "add %[r], r5\n\t" + "# r * div - Start\n\t" + "lsl %[d1], %[r], #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr %[d1], %[d1], #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, %[d1]\n\t" + "lsr r6, %[div], #16\n\t" + "mul %[d1], r6\n\t" + "lsr r5, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r7\n\t" + "lsr %[d1], %[r], #16\n\t" + "mul r6, %[d1]\n\t" + "add r5, r6\n\t" + "lsl r6, %[div], #16\n\t" + "lsr r6, r6, #16\n\t" + "mul %[d1], r6\n\t" + "lsr r6, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r6\n\t" + "# r * div - Done\n\t" + "mov %[d1], r8\n\t" + "mov r6, r9\n\t" + "sub r4, %[d1], r4\n\t" + "sbc r6, r5\n\t" + "mov r5, r6\n\t" + "add %[r], r5\n\t" + "mov r6, %[div]\n\t" + "sub r6, r4\n\t" + "sbc r6, r6\n\t" + "sub %[r], r6\n\t" + : [r] "+r" (r) + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) + : "r4", "r5", "r7", "r6", "r8", "r9" + ); + return r; +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +SP_NOINLINE static int32_t sp_2048_cmp_32(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; + + + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mvn r3, r3\n\t" + "mov r6, #124\n\t" + "1:\n\t" + "ldr r7, [%[a], r6]\n\t" + "ldr r5, [%[b], r6]\n\t" + "and r7, r3\n\t" + "and r5, r3\n\t" + "mov r4, r7\n\t" + "sub r7, r5\n\t" + "sbc r7, r7\n\t" + "add %[r], r7\n\t" + "mvn r7, r7\n\t" + "and r3, r7\n\t" + "sub r5, r4\n\t" + "sbc r7, r7\n\t" + "sub %[r], r7\n\t" + "mvn r7, r7\n\t" + "and r3, r7\n\t" + "sub r6, #4\n\t" + "cmp r6, #0\n\t" + "bge 1b\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "r3", "r4", "r5", "r6", "r7" + ); + + return r; +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[64], t2[33]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[31]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 32); + for (i=31; i>=0; i--) { + r1 = div_2048_word_32(t1[32 + i], t1[32 + i - 1], div); + + sp_2048_mul_d_32(t2, d, r1); + t1[32 + i] += sp_2048_sub_in_place_32(&t1[i], t2); + t1[32 + i] -= t2[32]; + sp_2048_mask_32(t2, d, t1[32 + i]); + t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2); + sp_2048_mask_32(t2, d, t1[32 + i]); + t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2); + } + + r1 = sp_2048_cmp_32(t1, d) >= 0; + sp_2048_cond_sub_32(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_2048_mod_32(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_2048_div_32(a, m, NULL, r); +} + +#ifdef WOLFSSL_SP_SMALL +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[16][64]; +#else + sp_digit* t[16]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 64, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<16; i++) { + t[i] = td + i * 64; + } +#endif + norm = t[0]; + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_32(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 32U); + if (reduceA != 0) { + err = sp_2048_mod_32(t[1] + 32, a, m); + if (err == MP_OKAY) { + err = sp_2048_mod_32(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32); + err = sp_2048_mod_32(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp); + sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp); + sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp); + sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp); + sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp); + sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp); + sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp); + sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp); + sp_2048_mont_sqr_32(t[10], t[ 5], m, mp); + sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp); + sp_2048_mont_sqr_32(t[12], t[ 6], m, mp); + sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp); + sp_2048_mont_sqr_32(t[14], t[ 7], m, mp); + sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 4; + if (c == 32) { + c = 28; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 32); + for (; i>=0 || c>=4; ) { + if (c == 0) { + n = e[i--]; + y = n >> 28; + n <<= 4; + c = 28; + } + else if (c < 4) { + y = n >> 28; + n = e[i--]; + c = 4 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + } + + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + + sp_2048_mont_mul_32(r, r, t[y], m, mp); + } + + XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U); + sp_2048_mont_reduce_32(r, m, mp); + + mask = 0 - (sp_2048_cmp_32(r, m) >= 0); + sp_2048_cond_sub_32(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#else +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][64]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 64, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) { + t[i] = td + i * 64; + } +#endif + norm = t[0]; + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_32(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 32U); + if (reduceA != 0) { + err = sp_2048_mod_32(t[1] + 32, a, m); + if (err == MP_OKAY) { + err = sp_2048_mod_32(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32); + err = sp_2048_mod_32(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp); + sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp); + sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp); + sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp); + sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp); + sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp); + sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp); + sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp); + sp_2048_mont_sqr_32(t[10], t[ 5], m, mp); + sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp); + sp_2048_mont_sqr_32(t[12], t[ 6], m, mp); + sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp); + sp_2048_mont_sqr_32(t[14], t[ 7], m, mp); + sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp); + sp_2048_mont_sqr_32(t[16], t[ 8], m, mp); + sp_2048_mont_mul_32(t[17], t[ 9], t[ 8], m, mp); + sp_2048_mont_sqr_32(t[18], t[ 9], m, mp); + sp_2048_mont_mul_32(t[19], t[10], t[ 9], m, mp); + sp_2048_mont_sqr_32(t[20], t[10], m, mp); + sp_2048_mont_mul_32(t[21], t[11], t[10], m, mp); + sp_2048_mont_sqr_32(t[22], t[11], m, mp); + sp_2048_mont_mul_32(t[23], t[12], t[11], m, mp); + sp_2048_mont_sqr_32(t[24], t[12], m, mp); + sp_2048_mont_mul_32(t[25], t[13], t[12], m, mp); + sp_2048_mont_sqr_32(t[26], t[13], m, mp); + sp_2048_mont_mul_32(t[27], t[14], t[13], m, mp); + sp_2048_mont_sqr_32(t[28], t[14], m, mp); + sp_2048_mont_mul_32(t[29], t[15], t[14], m, mp); + sp_2048_mont_sqr_32(t[30], t[15], m, mp); + sp_2048_mont_mul_32(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 5; + if (c == 32) { + c = 27; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 32); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 27; + n <<= 5; + c = 27; + } + else if (c < 5) { + y = n >> 27; + n = e[i--]; + c = 5 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + + sp_2048_mont_mul_32(r, r, t[y], m, mp); + } + + XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U); + sp_2048_mont_reduce_32(r, m, mp); + + mask = 0 - (sp_2048_cmp_32(r, m) >= 0); + sp_2048_cond_sub_32(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* WOLFSSL_SP_SMALL */ + +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 2048 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_2048_mont_norm_64(sp_digit* r, const sp_digit* m) +{ + XMEMSET(r, 0, sizeof(sp_digit) * 64); + + /* r = 2^n mod m */ + sp_2048_sub_in_place_64(r, m); +} + +#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */ +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +SP_NOINLINE static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a, + const sp_digit* b, sp_digit m) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r5, #1\n\t" + "lsl r5, r5, #8\n\t" + "mov r8, r5\n\t" + "mov r7, #0\n\t" + "1:\n\t" + "ldr r6, [%[b], r7]\n\t" + "and r6, %[m]\n\t" + "mov r5, #0\n\t" + "sub r5, %[c]\n\t" + "ldr r5, [%[a], r7]\n\t" + "sbc r5, r6\n\t" + "sbc %[c], %[c]\n\t" + "str r5, [%[r], r7]\n\t" + "add r7, #4\n\t" + "cmp r7, r8\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r5", "r6", "r7", "r8" + ); + + return c; +} + +/* Reduce the number back to 2048 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_digit ca = 0; + + __asm__ __volatile__ ( + "mov r8, %[mp]\n\t" + "mov r12, %[ca]\n\t" + "mov r14, %[m]\n\t" + "mov r9, %[a]\n\t" + "mov r4, #0\n\t" + "# i = 0\n\t" + "mov r11, r4\n\t" + "\n1:\n\t" + "mov r5, #0\n\t" + "mov %[ca], #0\n\t" + "# mu = a[i] * mp\n\t" + "mov %[mp], r8\n\t" + "ldr %[a], [%[a]]\n\t" + "mul %[mp], %[a]\n\t" + "mov %[m], r14\n\t" + "mov r10, r9\n\t" + "\n2:\n\t" + "# a[i+j] += m[j] * mu\n\t" + "mov %[a], r10\n\t" + "ldr %[a], [%[a]]\n\t" + "mov %[ca], #0\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "# Multiply m[j] and mu - Start\n\t" + "ldr r7, [%[m]]\n\t" + "lsl r6, %[mp], #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add %[a], r7\n\t" + "adc r5, %[ca]\n\t" + "ldr r7, [%[m]]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add %[a], r6\n\t" + "adc r5, r7\n\t" + "ldr r7, [%[m]]\n\t" + "lsr r6, %[mp], #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r5, r7\n\t" + "ldr r7, [%[m]]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add %[a], r6\n\t" + "adc r5, r7\n\t" + "# Multiply m[j] and mu - Done\n\t" + "add r4, %[a]\n\t" + "adc r5, %[ca]\n\t" + "mov %[a], r10\n\t" + "str r4, [%[a]]\n\t" + "mov r6, #4\n\t" + "add %[m], #4\n\t" + "add r10, r6\n\t" + "mov r4, #252\n\t" + "add r4, r9\n\t" + "cmp r10, r4\n\t" + "blt 2b\n\t" + "# a[i+63] += m[63] * mu\n\t" + "mov %[ca], #0\n\t" + "mov r4, r12\n\t" + "mov %[a], #0\n\t" + "# Multiply m[63] and mu - Start\n\t" + "ldr r7, [%[m]]\n\t" + "lsl r6, %[mp], #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r5, r7\n\t" + "adc r4, %[ca]\n\t" + "adc %[a], %[ca]\n\t" + "ldr r7, [%[m]]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r5, r6\n\t" + "adc r4, r7\n\t" + "adc %[a], %[ca]\n\t" + "ldr r7, [%[m]]\n\t" + "lsr r6, %[mp], #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc %[a], %[ca]\n\t" + "ldr r7, [%[m]]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r5, r6\n\t" + "adc r4, r7\n\t" + "adc %[a], %[ca]\n\t" + "# Multiply m[63] and mu - Done\n\t" + "mov %[ca], %[a]\n\t" + "mov %[a], r10\n\t" + "ldr r7, [%[a], #4]\n\t" + "ldr %[a], [%[a]]\n\t" + "mov r6, #0\n\t" + "add r5, %[a]\n\t" + "adc r7, r4\n\t" + "adc %[ca], r6\n\t" + "mov %[a], r10\n\t" + "str r5, [%[a]]\n\t" + "str r7, [%[a], #4]\n\t" + "# i += 1\n\t" + "mov r6, #4\n\t" + "add r9, r6\n\t" + "add r11, r6\n\t" + "mov r12, %[ca]\n\t" + "mov %[a], r9\n\t" + "mov r4, #1\n\t" + "lsl r4, r4, #8\n\t" + "cmp r11, r4\n\t" + "blt 1b\n\t" + "mov %[m], r14\n\t" + : [ca] "+r" (ca), [a] "+r" (a) + : [m] "r" (m), [mp] "r" (mp) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + ); + + sp_2048_cond_sub_64(a - 64, a, m, (sp_digit)0 - ca); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_2048_mont_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_2048_mul_64(r, a, b); + sp_2048_mont_reduce_64(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_2048_mont_sqr_64(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_2048_sqr_64(r, a); + sp_2048_mont_reduce_64(r, m, mp); +} + +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +SP_NOINLINE static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, + sp_digit div) +{ + sp_digit r = 0; + + __asm__ __volatile__ ( + "lsr r5, %[div], #1\n\t" + "add r5, #1\n\t" + "mov r8, %[d0]\n\t" + "mov r9, %[d1]\n\t" + "# Do top 32\n\t" + "mov r6, r5\n\t" + "sub r6, %[d1]\n\t" + "sbc r6, r6\n\t" + "add %[r], %[r]\n\t" + "sub %[r], r6\n\t" + "and r6, r5\n\t" + "sub %[d1], r6\n\t" + "# Next 30 bits\n\t" + "mov r4, #29\n\t" + "1:\n\t" + "lsl %[d0], %[d0], #1\n\t" + "adc %[d1], %[d1]\n\t" + "mov r6, r5\n\t" + "sub r6, %[d1]\n\t" + "sbc r6, r6\n\t" + "add %[r], %[r]\n\t" + "sub %[r], r6\n\t" + "and r6, r5\n\t" + "sub %[d1], r6\n\t" + "sub r4, #1\n\t" + "bpl 1b\n\t" + "mov r7, #0\n\t" + "add %[r], %[r]\n\t" + "add %[r], #1\n\t" + "# r * div - Start\n\t" + "lsl %[d1], %[r], #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr %[d1], %[d1], #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, %[d1]\n\t" + "lsr r6, %[div], #16\n\t" + "mul %[d1], r6\n\t" + "lsr r5, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r7\n\t" + "lsr %[d1], %[r], #16\n\t" + "mul r6, %[d1]\n\t" + "add r5, r6\n\t" + "lsl r6, %[div], #16\n\t" + "lsr r6, r6, #16\n\t" + "mul %[d1], r6\n\t" + "lsr r6, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r6\n\t" + "# r * div - Done\n\t" + "mov %[d1], r8\n\t" + "sub %[d1], r4\n\t" + "mov r4, %[d1]\n\t" + "mov %[d1], r9\n\t" + "sbc %[d1], r5\n\t" + "mov r5, %[d1]\n\t" + "add %[r], r5\n\t" + "# r * div - Start\n\t" + "lsl %[d1], %[r], #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr %[d1], %[d1], #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, %[d1]\n\t" + "lsr r6, %[div], #16\n\t" + "mul %[d1], r6\n\t" + "lsr r5, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r7\n\t" + "lsr %[d1], %[r], #16\n\t" + "mul r6, %[d1]\n\t" + "add r5, r6\n\t" + "lsl r6, %[div], #16\n\t" + "lsr r6, r6, #16\n\t" + "mul %[d1], r6\n\t" + "lsr r6, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r6\n\t" + "# r * div - Done\n\t" + "mov %[d1], r8\n\t" + "mov r6, r9\n\t" + "sub r4, %[d1], r4\n\t" + "sbc r6, r5\n\t" + "mov r5, r6\n\t" + "add %[r], r5\n\t" + "# r * div - Start\n\t" + "lsl %[d1], %[r], #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr %[d1], %[d1], #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, %[d1]\n\t" + "lsr r6, %[div], #16\n\t" + "mul %[d1], r6\n\t" + "lsr r5, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r7\n\t" + "lsr %[d1], %[r], #16\n\t" + "mul r6, %[d1]\n\t" + "add r5, r6\n\t" + "lsl r6, %[div], #16\n\t" + "lsr r6, r6, #16\n\t" + "mul %[d1], r6\n\t" + "lsr r6, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r6\n\t" + "# r * div - Done\n\t" + "mov %[d1], r8\n\t" + "mov r6, r9\n\t" + "sub r4, %[d1], r4\n\t" + "sbc r6, r5\n\t" + "mov r5, r6\n\t" + "add %[r], r5\n\t" + "mov r6, %[div]\n\t" + "sub r6, r4\n\t" + "sbc r6, r6\n\t" + "sub %[r], r6\n\t" + : [r] "+r" (r) + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) + : "r4", "r5", "r7", "r6", "r8", "r9" + ); + return r; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_2048_mask_64(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<64; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 64; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +SP_NOINLINE static int32_t sp_2048_cmp_64(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; + + + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mvn r3, r3\n\t" + "mov r6, #252\n\t" + "1:\n\t" + "ldr r7, [%[a], r6]\n\t" + "ldr r5, [%[b], r6]\n\t" + "and r7, r3\n\t" + "and r5, r3\n\t" + "mov r4, r7\n\t" + "sub r7, r5\n\t" + "sbc r7, r7\n\t" + "add %[r], r7\n\t" + "mvn r7, r7\n\t" + "and r3, r7\n\t" + "sub r5, r4\n\t" + "sbc r7, r7\n\t" + "sub %[r], r7\n\t" + "mvn r7, r7\n\t" + "and r3, r7\n\t" + "sub r6, #4\n\t" + "cmp r6, #0\n\t" + "bge 1b\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "r3", "r4", "r5", "r6", "r7" + ); + + return r; +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_2048_div_64(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[128], t2[65]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[63]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 64); + for (i=63; i>=0; i--) { + r1 = div_2048_word_64(t1[64 + i], t1[64 + i - 1], div); + + sp_2048_mul_d_64(t2, d, r1); + t1[64 + i] += sp_2048_sub_in_place_64(&t1[i], t2); + t1[64 + i] -= t2[64]; + sp_2048_mask_64(t2, d, t1[64 + i]); + t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], t2); + sp_2048_mask_64(t2, d, t1[64 + i]); + t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], t2); + } + + r1 = sp_2048_cmp_64(t1, d) >= 0; + sp_2048_cond_sub_64(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_2048_mod_64(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_2048_div_64(a, m, NULL, r); +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_2048_div_64_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[128], t2[65]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[63]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 64); + for (i=63; i>=0; i--) { + r1 = div_2048_word_64(t1[64 + i], t1[64 + i - 1], div); + + sp_2048_mul_d_64(t2, d, r1); + t1[64 + i] += sp_2048_sub_in_place_64(&t1[i], t2); + t1[64 + i] -= t2[64]; + if (t1[64 + i] != 0) { + t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], d); + if (t1[64 + i] != 0) + t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], d); + } + } + + r1 = sp_2048_cmp_64(t1, d) >= 0; + sp_2048_cond_sub_64(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_2048_mod_64_cond(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_2048_div_64_cond(a, m, NULL, r); +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \ + defined(WOLFSSL_HAVE_SP_DH) +#ifdef WOLFSSL_SP_SMALL +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[16][128]; +#else + sp_digit* t[16]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 128, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<16; i++) { + t[i] = td + i * 128; + } +#endif + norm = t[0]; + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_64(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 64U); + if (reduceA != 0) { + err = sp_2048_mod_64(t[1] + 64, a, m); + if (err == MP_OKAY) { + err = sp_2048_mod_64(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64); + err = sp_2048_mod_64(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_2048_mont_sqr_64(t[ 2], t[ 1], m, mp); + sp_2048_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp); + sp_2048_mont_sqr_64(t[ 4], t[ 2], m, mp); + sp_2048_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp); + sp_2048_mont_sqr_64(t[ 6], t[ 3], m, mp); + sp_2048_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp); + sp_2048_mont_sqr_64(t[ 8], t[ 4], m, mp); + sp_2048_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp); + sp_2048_mont_sqr_64(t[10], t[ 5], m, mp); + sp_2048_mont_mul_64(t[11], t[ 6], t[ 5], m, mp); + sp_2048_mont_sqr_64(t[12], t[ 6], m, mp); + sp_2048_mont_mul_64(t[13], t[ 7], t[ 6], m, mp); + sp_2048_mont_sqr_64(t[14], t[ 7], m, mp); + sp_2048_mont_mul_64(t[15], t[ 8], t[ 7], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 4; + if (c == 32) { + c = 28; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 64); + for (; i>=0 || c>=4; ) { + if (c == 0) { + n = e[i--]; + y = n >> 28; + n <<= 4; + c = 28; + } + else if (c < 4) { + y = n >> 28; + n = e[i--]; + c = 4 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + } + + sp_2048_mont_sqr_64(r, r, m, mp); + sp_2048_mont_sqr_64(r, r, m, mp); + sp_2048_mont_sqr_64(r, r, m, mp); + sp_2048_mont_sqr_64(r, r, m, mp); + + sp_2048_mont_mul_64(r, r, t[y], m, mp); + } + + XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U); + sp_2048_mont_reduce_64(r, m, mp); + + mask = 0 - (sp_2048_cmp_64(r, m) >= 0); + sp_2048_cond_sub_64(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#else +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][128]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 128, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) { + t[i] = td + i * 128; + } +#endif + norm = t[0]; + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_64(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 64U); + if (reduceA != 0) { + err = sp_2048_mod_64(t[1] + 64, a, m); + if (err == MP_OKAY) { + err = sp_2048_mod_64(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64); + err = sp_2048_mod_64(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_2048_mont_sqr_64(t[ 2], t[ 1], m, mp); + sp_2048_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp); + sp_2048_mont_sqr_64(t[ 4], t[ 2], m, mp); + sp_2048_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp); + sp_2048_mont_sqr_64(t[ 6], t[ 3], m, mp); + sp_2048_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp); + sp_2048_mont_sqr_64(t[ 8], t[ 4], m, mp); + sp_2048_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp); + sp_2048_mont_sqr_64(t[10], t[ 5], m, mp); + sp_2048_mont_mul_64(t[11], t[ 6], t[ 5], m, mp); + sp_2048_mont_sqr_64(t[12], t[ 6], m, mp); + sp_2048_mont_mul_64(t[13], t[ 7], t[ 6], m, mp); + sp_2048_mont_sqr_64(t[14], t[ 7], m, mp); + sp_2048_mont_mul_64(t[15], t[ 8], t[ 7], m, mp); + sp_2048_mont_sqr_64(t[16], t[ 8], m, mp); + sp_2048_mont_mul_64(t[17], t[ 9], t[ 8], m, mp); + sp_2048_mont_sqr_64(t[18], t[ 9], m, mp); + sp_2048_mont_mul_64(t[19], t[10], t[ 9], m, mp); + sp_2048_mont_sqr_64(t[20], t[10], m, mp); + sp_2048_mont_mul_64(t[21], t[11], t[10], m, mp); + sp_2048_mont_sqr_64(t[22], t[11], m, mp); + sp_2048_mont_mul_64(t[23], t[12], t[11], m, mp); + sp_2048_mont_sqr_64(t[24], t[12], m, mp); + sp_2048_mont_mul_64(t[25], t[13], t[12], m, mp); + sp_2048_mont_sqr_64(t[26], t[13], m, mp); + sp_2048_mont_mul_64(t[27], t[14], t[13], m, mp); + sp_2048_mont_sqr_64(t[28], t[14], m, mp); + sp_2048_mont_mul_64(t[29], t[15], t[14], m, mp); + sp_2048_mont_sqr_64(t[30], t[15], m, mp); + sp_2048_mont_mul_64(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 5; + if (c == 32) { + c = 27; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 64); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 27; + n <<= 5; + c = 27; + } + else if (c < 5) { + y = n >> 27; + n = e[i--]; + c = 5 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_2048_mont_sqr_64(r, r, m, mp); + sp_2048_mont_sqr_64(r, r, m, mp); + sp_2048_mont_sqr_64(r, r, m, mp); + sp_2048_mont_sqr_64(r, r, m, mp); + sp_2048_mont_sqr_64(r, r, m, mp); + + sp_2048_mont_mul_64(r, r, t[y], m, mp); + } + + XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U); + sp_2048_mont_reduce_64(r, m, mp); + + mask = 0 - (sp_2048_cmp_64(r, m) >= 0); + sp_2048_cond_sub_64(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* WOLFSSL_SP_SMALL */ +#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */ + +#ifdef WOLFSSL_HAVE_SP_RSA +/* RSA public key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * em Public exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 256 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPublic_2048(const byte* in, word32 inLen, mp_int* em, mp_int* mm, + byte* out, word32* outLen) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit a[128], m[64], r[128]; +#else + sp_digit* d = NULL; + sp_digit* a; + sp_digit* m; + sp_digit* r; +#endif + sp_digit *ah; + sp_digit e[1]; + int err = MP_OKAY; + + if (*outLen < 256) + err = MP_TO_E; + if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 256 || + mp_count_bits(mm) != 2048)) + err = MP_READ_E; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + a = d; + r = a + 64 * 2; + m = r + 64 * 2; + } +#endif + + if (err == MP_OKAY) { + ah = a + 64; + + sp_2048_from_bin(ah, 64, in, inLen); +#if DIGIT_BIT >= 32 + e[0] = em->dp[0]; +#else + e[0] = em->dp[0]; + if (em->used > 1) { + e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + } +#endif + if (e[0] == 0) { + err = MP_EXPTMOD_E; + } + } + if (err == MP_OKAY) { + sp_2048_from_mp(m, 64, mm); + + if (e[0] == 0x3) { + if (err == MP_OKAY) { + sp_2048_sqr_64(r, ah); + err = sp_2048_mod_64_cond(r, r, m); + } + if (err == MP_OKAY) { + sp_2048_mul_64(r, ah, r); + err = sp_2048_mod_64_cond(r, r, m); + } + } + else { + int i; + sp_digit mp; + + sp_2048_mont_setup(m, &mp); + + /* Convert to Montgomery form. */ + XMEMSET(a, 0, sizeof(sp_digit) * 64); + err = sp_2048_mod_64_cond(a, a, m); + + if (err == MP_OKAY) { + for (i = 31; i >= 0; i--) { + if (e[0] >> i) { + break; + } + } + + XMEMCPY(r, a, sizeof(sp_digit) * 64); + for (i--; i>=0; i--) { + sp_2048_mont_sqr_64(r, r, m, mp); + if (((e[0] >> i) & 1) == 1) { + sp_2048_mont_mul_64(r, r, a, m, mp); + } + } + XMEMSET(&r[64], 0, sizeof(sp_digit) * 64); + sp_2048_mont_reduce_64(r, m, mp); + + for (i = 63; i > 0; i--) { + if (r[i] != m[i]) { + break; + } + } + if (r[i] >= m[i]) { + sp_2048_sub_in_place_64(r, m); + } + } + } + } + + if (err == MP_OKAY) { + sp_2048_to_bin(r, out); + *outLen = 256; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } +#endif + + return err; +} + +#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) + sp_digit* a; + sp_digit* d = NULL; + sp_digit* m; + sp_digit* r; + int err = MP_OKAY; + + (void)pm; + (void)qm; + (void)dpm; + (void)dqm; + (void)qim; + + if (*outLen < 256U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(dm) > 2048) { + err = MP_READ_E; + } + if (inLen > 256) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 2048) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 4, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) { + err = MEMORY_E; + } + } + if (err == MP_OKAY) { + a = d + 64; + m = a + 128; + r = a; + + sp_2048_from_bin(a, 64, in, inLen); + sp_2048_from_mp(d, 64, dm); + sp_2048_from_mp(m, 64, mm); + err = sp_2048_mod_exp_64(r, a, d, 2048, m, 0); + } + if (err == MP_OKAY) { + sp_2048_to_bin(r, out); + *outLen = 256; + } + + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 64); + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +SP_NOINLINE static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r5, #128\n\t" + "mov r8, r5\n\t" + "mov r7, #0\n\t" + "1:\n\t" + "ldr r6, [%[b], r7]\n\t" + "and r6, %[m]\n\t" + "mov r5, #0\n\t" + "sub r5, #1\n\t" + "add r5, %[c]\n\t" + "ldr r5, [%[a], r7]\n\t" + "adc r5, r6\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + "str r5, [%[r], r7]\n\t" + "add r7, #4\n\t" + "cmp r7, r8\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r5", "r6", "r7", "r8" + ); + + return c; +} + +/* RSA private key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * pm First prime. + * qm Second prime. + * dpm First prime's CRT exponent. + * dqm Second prime's CRT exponent. + * qim Inverse of second prime mod p. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 256 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, + mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm, + byte* out, word32* outLen) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit a[64 * 2]; + sp_digit p[32], q[32], dp[32]; + sp_digit tmpa[64], tmpb[64]; +#else + sp_digit* t = NULL; + sp_digit* a; + sp_digit* p; + sp_digit* q; + sp_digit* dp; + sp_digit* tmpa; + sp_digit* tmpb; +#endif + sp_digit* r; + sp_digit* qi; + sp_digit* dq; + sp_digit c; + int err = MP_OKAY; + + (void)dm; + (void)mm; + + if (*outLen < 256) + err = MP_TO_E; + if (err == MP_OKAY && (inLen > 256 || mp_count_bits(mm) != 2048)) + err = MP_READ_E; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 11, NULL, + DYNAMIC_TYPE_RSA); + if (t == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + a = t; + p = a + 64 * 2; + q = p + 32; + qi = dq = dp = q + 32; + tmpa = qi + 32; + tmpb = tmpa + 64; + + r = t + 64; + } +#else +#endif + + if (err == MP_OKAY) { +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + r = a; + qi = dq = dp; +#endif + sp_2048_from_bin(a, 64, in, inLen); + sp_2048_from_mp(p, 32, pm); + sp_2048_from_mp(q, 32, qm); + sp_2048_from_mp(dp, 32, dpm); + + err = sp_2048_mod_exp_32(tmpa, a, dp, 1024, p, 1); + } + if (err == MP_OKAY) { + sp_2048_from_mp(dq, 32, dqm); + err = sp_2048_mod_exp_32(tmpb, a, dq, 1024, q, 1); + } + + if (err == MP_OKAY) { + c = sp_2048_sub_in_place_32(tmpa, tmpb); + c += sp_2048_cond_add_32(tmpa, tmpa, p, c); + sp_2048_cond_add_32(tmpa, tmpa, p, c); + + sp_2048_from_mp(qi, 32, qim); + sp_2048_mul_32(tmpa, tmpa, qi); + err = sp_2048_mod_32(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { + sp_2048_mul_32(tmpa, q, tmpa); + XMEMSET(&tmpb[32], 0, sizeof(sp_digit) * 32); + sp_2048_add_64(r, tmpb, tmpa); + + sp_2048_to_bin(r, out); + *outLen = 256; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_digit) * 32 * 11); + XFREE(t, NULL, DYNAMIC_TYPE_RSA); + } +#else + XMEMSET(tmpa, 0, sizeof(tmpa)); + XMEMSET(tmpb, 0, sizeof(tmpb)); + XMEMSET(p, 0, sizeof(p)); + XMEMSET(q, 0, sizeof(q)); + XMEMSET(dp, 0, sizeof(dp)); +#endif + + return err; +} +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ +#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */ +#endif /* WOLFSSL_HAVE_SP_RSA */ +#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY)) +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_2048_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (2048 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 32 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 64); + r->used = 64; + mp_clamp(r); +#elif DIGIT_BIT < 32 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 64; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 32) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 32 - s; + } + r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 64; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 32 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 32 - s; + } + else { + s += 32; + } + } + r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ + int err = MP_OKAY; + sp_digit b[128], e[64], m[64]; + sp_digit* r = b; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 2048) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 2048) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 2048) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_2048_from_mp(b, 64, base); + sp_2048_from_mp(e, 64, exp); + sp_2048_from_mp(m, 64, mod); + + err = sp_2048_mod_exp_64(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + err = sp_2048_to_mp(r, res); + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} + +#ifdef WOLFSSL_HAVE_SP_DH + +#ifdef HAVE_FFDHE_2048 +static void sp_2048_lshift_64(sp_digit* r, sp_digit* a, byte n) +{ + __asm__ __volatile__ ( + "mov r6, #31\n\t" + "sub r6, r6, %[n]\n\t" + "add %[a], %[a], #192\n\t" + "add %[r], %[r], #192\n\t" + "ldr r3, [%[a], #60]\n\t" + "lsr r4, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r4, r4, r6\n\t" + "ldr r2, [%[a], #56]\n\t" + "str r4, [%[r], #64]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #52]\n\t" + "str r3, [%[r], #60]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #48]\n\t" + "str r2, [%[r], #56]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #44]\n\t" + "str r4, [%[r], #52]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #40]\n\t" + "str r3, [%[r], #48]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #36]\n\t" + "str r2, [%[r], #44]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #32]\n\t" + "str r4, [%[r], #40]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #28]\n\t" + "str r3, [%[r], #36]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #24]\n\t" + "str r2, [%[r], #32]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #20]\n\t" + "str r4, [%[r], #28]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #16]\n\t" + "str r3, [%[r], #24]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #12]\n\t" + "str r2, [%[r], #20]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #8]\n\t" + "str r4, [%[r], #16]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #4]\n\t" + "str r3, [%[r], #12]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #0]\n\t" + "str r2, [%[r], #8]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r2, [%[a], #60]\n\t" + "str r4, [%[r], #68]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #56]\n\t" + "str r3, [%[r], #64]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #52]\n\t" + "str r2, [%[r], #60]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #48]\n\t" + "str r4, [%[r], #56]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #44]\n\t" + "str r3, [%[r], #52]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #40]\n\t" + "str r2, [%[r], #48]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #36]\n\t" + "str r4, [%[r], #44]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #32]\n\t" + "str r3, [%[r], #40]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #28]\n\t" + "str r2, [%[r], #36]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #24]\n\t" + "str r4, [%[r], #32]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #20]\n\t" + "str r3, [%[r], #28]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #16]\n\t" + "str r2, [%[r], #24]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #12]\n\t" + "str r4, [%[r], #20]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #8]\n\t" + "str r3, [%[r], #16]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #4]\n\t" + "str r2, [%[r], #12]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #0]\n\t" + "str r4, [%[r], #8]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r4, [%[a], #60]\n\t" + "str r3, [%[r], #68]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #56]\n\t" + "str r2, [%[r], #64]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #52]\n\t" + "str r4, [%[r], #60]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #48]\n\t" + "str r3, [%[r], #56]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #44]\n\t" + "str r2, [%[r], #52]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #40]\n\t" + "str r4, [%[r], #48]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #36]\n\t" + "str r3, [%[r], #44]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #32]\n\t" + "str r2, [%[r], #40]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #28]\n\t" + "str r4, [%[r], #36]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #24]\n\t" + "str r3, [%[r], #32]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #20]\n\t" + "str r2, [%[r], #28]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #16]\n\t" + "str r4, [%[r], #24]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #12]\n\t" + "str r3, [%[r], #20]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #8]\n\t" + "str r2, [%[r], #16]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #4]\n\t" + "str r4, [%[r], #12]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #0]\n\t" + "str r3, [%[r], #8]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r3, [%[a], #60]\n\t" + "str r2, [%[r], #68]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #56]\n\t" + "str r4, [%[r], #64]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #52]\n\t" + "str r3, [%[r], #60]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #48]\n\t" + "str r2, [%[r], #56]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #44]\n\t" + "str r4, [%[r], #52]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #40]\n\t" + "str r3, [%[r], #48]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #36]\n\t" + "str r2, [%[r], #44]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #32]\n\t" + "str r4, [%[r], #40]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #28]\n\t" + "str r3, [%[r], #36]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #24]\n\t" + "str r2, [%[r], #32]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #20]\n\t" + "str r4, [%[r], #28]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #16]\n\t" + "str r3, [%[r], #24]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #12]\n\t" + "str r2, [%[r], #20]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #8]\n\t" + "str r4, [%[r], #16]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #4]\n\t" + "str r3, [%[r], #12]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #0]\n\t" + "str r2, [%[r], #8]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "str r3, [%[r]]\n\t" + "str r4, [%[r], #4]\n\t" + : + : [r] "r" (r), [a] "r" (a), [n] "r" (n) + : "memory", "r2", "r3", "r4", "r5", "r6" + ); +} + +/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m) + * + * r A single precision number that is the result of the operation. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_2048_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits, + const sp_digit* m) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit nd[128]; + sp_digit td[65]; +#else + sp_digit* td; +#endif + sp_digit* norm; + sp_digit* tmp; + sp_digit mp = 1; + sp_digit n, o; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 193, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + norm = td; + tmp = td + 128; +#else + norm = nd; + tmp = td; +#endif + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_64(norm, m); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 5; + if (c == 32) { + c = 27; + } + y = (int)(n >> c); + n <<= 32 - c; + sp_2048_lshift_64(r, norm, y); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 27; + n <<= 5; + c = 27; + } + else if (c < 5) { + y = n >> 27; + n = e[i--]; + c = 5 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_2048_mont_sqr_64(r, r, m, mp); + sp_2048_mont_sqr_64(r, r, m, mp); + sp_2048_mont_sqr_64(r, r, m, mp); + sp_2048_mont_sqr_64(r, r, m, mp); + sp_2048_mont_sqr_64(r, r, m, mp); + + sp_2048_lshift_64(r, r, y); + sp_2048_mul_d_64(tmp, norm, r[64]); + r[64] = 0; + o = sp_2048_add_64(r, r, tmp); + sp_2048_cond_sub_64(r, r, m, (sp_digit)0 - o); + } + + XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U); + sp_2048_mont_reduce_64(r, m, mp); + + mask = 0 - (sp_2048_cmp_64(r, m) >= 0); + sp_2048_cond_sub_64(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* HAVE_FFDHE_2048 */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. + * exp Array of bytes that is the exponent. + * expLen Length of data, in bytes, in exponent. + * mod Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 256 bytes long. + * outLen Length, in bytes, of exponentiation result. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen, + mp_int* mod, byte* out, word32* outLen) +{ + int err = MP_OKAY; + sp_digit b[128], e[64], m[64]; + sp_digit* r = b; + word32 i; + + if (mp_count_bits(base) > 2048) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expLen > 256) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 2048) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_2048_from_mp(b, 64, base); + sp_2048_from_bin(e, 64, exp, expLen); + sp_2048_from_mp(m, 64, mod); + + #ifdef HAVE_FFDHE_2048 + if (base->used == 1 && base->dp[0] == 2 && m[63] == (sp_digit)-1) + err = sp_2048_mod_exp_2_64(r, e, expLen * 8, m); + else + #endif + err = sp_2048_mod_exp_64(r, b, e, expLen * 8, m, 0); + + } + + if (err == MP_OKAY) { + sp_2048_to_bin(r, out); + *outLen = 256; + for (i=0; i<256 && out[i] == 0; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} +#endif /* WOLFSSL_HAVE_SP_DH */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ + int err = MP_OKAY; + sp_digit b[64], e[32], m[32]; + sp_digit* r = b; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 1024) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 1024) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 1024) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_2048_from_mp(b, 32, base); + sp_2048_from_mp(e, 32, exp); + sp_2048_from_mp(m, 32, mod); + + err = sp_2048_mod_exp_32(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + XMEMSET(r + 32, 0, sizeof(*r) * 32U); + err = sp_2048_to_mp(r, res); + res->used = mod->used; + mp_clamp(res); + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} + +#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */ + +#endif /* !WOLFSSL_SP_NO_2048 */ + +#ifndef WOLFSSL_SP_NO_3072 +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((sp_digit)a[i]) << s); + if (s >= 24U) { + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (sp_digit)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 32 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 32 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 32U) <= (word32)DIGIT_BIT) { + s += 32U; + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 32) { + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + s = 32 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 384 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_3072_to_bin(sp_digit* r, byte* a) +{ + int i, j, s = 0, b; + + j = 3072 / 8 - 1; + a[j] = 0; + for (i=0; i<96 && j>=0; i++) { + b = 0; + /* lint allow cast of mismatch sp_digit and int */ + a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ + b += 8 - s; + if (j < 0) { + break; + } + while (b < 32) { + a[j--] = (byte)(r[i] >> b); + b += 8; + if (j < 0) { + break; + } + } + s = 8 - (b - 32); + if (j >= 0) { + a[j] = 0; + } + if (s != 0) { + j++; + } + } +} + +#ifndef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit tmp[12 * 2]; + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r8, r3\n\t" + "mov r11, %[r]\n\t" + "mov r9, %[a]\n\t" + "mov r10, %[b]\n\t" + "mov r6, #48\n\t" + "add r6, r9\n\t" + "mov r12, r6\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r5, #0\n\t" + "mov r6, #44\n\t" + "mov %[a], r8\n\t" + "sub %[a], r6\n\t" + "sbc r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], r6\n\t" + "mov %[b], r8\n\t" + "sub %[b], %[a]\n\t" + "add %[a], r9\n\t" + "add %[b], r10\n\t" + "\n2:\n\t" + "# Multiply Start\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[b]]\n\t" + "lsl r6, r6, #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [%[b]]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[b]]\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [%[b]]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Multiply Done\n\t" + "add %[a], #4\n\t" + "sub %[b], #4\n\t" + "cmp %[a], r12\n\t" + "beq 3f\n\t" + "mov r6, r8\n\t" + "add r6, r9\n\t" + "cmp %[a], r6\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r11\n\t" + "mov r7, r8\n\t" + "str r3, [%[r], r7]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "add r7, #4\n\t" + "mov r8, r7\n\t" + "mov r6, #88\n\t" + "cmp r7, r6\n\t" + "ble 1b\n\t" + "str r3, [%[r], r7]\n\t" + "mov %[a], r9\n\t" + "mov %[b], r10\n\t" + : + : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r5, #0\n\t" + "mov r8, r3\n\t" + "mov r11, %[r]\n\t" + "mov r6, #96\n\t" + "neg r6, r6\n\t" + "add sp, r6\n\t" + "mov r10, sp\n\t" + "mov r9, %[a]\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r6, #44\n\t" + "mov %[a], r8\n\t" + "sub %[a], r6\n\t" + "sbc r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], r6\n\t" + "mov r2, r8\n\t" + "sub r2, %[a]\n\t" + "add %[a], r9\n\t" + "add r2, r9\n\t" + "\n2:\n\t" + "cmp r2, %[a]\n\t" + "beq 4f\n\t" + "# Multiply * 2: Start\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [r2]\n\t" + "lsl r6, r6, #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [r2]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [r2]\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [r2]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Multiply * 2: Done\n\t" + "bal 5f\n\t" + "\n4:\n\t" + "# Square: Start\n\t" + "ldr r6, [%[a]]\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r6, r6\n\t" + "add r3, r6\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "mul r7, r7\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #15\n\t" + "lsl r6, r6, #17\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Square: Done\n\t" + "\n5:\n\t" + "add %[a], #4\n\t" + "sub r2, #4\n\t" + "mov r6, #48\n\t" + "add r6, r9\n\t" + "cmp %[a], r6\n\t" + "beq 3f\n\t" + "cmp %[a], r2\n\t" + "bgt 3f\n\t" + "mov r7, r8\n\t" + "add r7, r9\n\t" + "cmp %[a], r7\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r10\n\t" + "mov r7, r8\n\t" + "str r3, [%[r], r7]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "add r7, #4\n\t" + "mov r8, r7\n\t" + "mov r6, #88\n\t" + "cmp r7, r6\n\t" + "ble 1b\n\t" + "mov %[a], r9\n\t" + "str r3, [%[r], r7]\n\t" + "mov %[r], r11\n\t" + "mov %[a], r10\n\t" + "mov r3, #92\n\t" + "\n4:\n\t" + "ldr r6, [%[a], r3]\n\t" + "str r6, [%[r], r3]\n\t" + "sub r3, #4\n\t" + "bge 4b\n\t" + "mov r6, #96\n\t" + "add sp, r6\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + ); +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "add r4, r5\n\t" + "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #44]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5" + ); + + return c; +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_3072_sub_in_place_24(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldr r3, [%[a], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sub r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #0]\n\t" + "str r4, [%[a], #4]\n\t" + "ldr r3, [%[a], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r6, [%[b], #12]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #8]\n\t" + "str r4, [%[a], #12]\n\t" + "ldr r3, [%[a], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r6, [%[b], #20]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #16]\n\t" + "str r4, [%[a], #20]\n\t" + "ldr r3, [%[a], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r6, [%[b], #28]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #24]\n\t" + "str r4, [%[a], #28]\n\t" + "ldr r3, [%[a], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r6, [%[b], #36]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #32]\n\t" + "str r4, [%[a], #36]\n\t" + "ldr r3, [%[a], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r6, [%[b], #44]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #40]\n\t" + "str r4, [%[a], #44]\n\t" + "ldr r3, [%[a], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #48]\n\t" + "ldr r6, [%[b], #52]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #48]\n\t" + "str r4, [%[a], #52]\n\t" + "ldr r3, [%[a], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #56]\n\t" + "ldr r6, [%[b], #60]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #56]\n\t" + "str r4, [%[a], #60]\n\t" + "ldr r3, [%[a], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #64]\n\t" + "ldr r6, [%[b], #68]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #64]\n\t" + "str r4, [%[a], #68]\n\t" + "ldr r3, [%[a], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #72]\n\t" + "ldr r6, [%[b], #76]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #72]\n\t" + "str r4, [%[a], #76]\n\t" + "ldr r3, [%[a], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #80]\n\t" + "ldr r6, [%[b], #84]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #80]\n\t" + "str r4, [%[a], #84]\n\t" + "ldr r3, [%[a], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #88]\n\t" + "ldr r6, [%[b], #92]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #88]\n\t" + "str r4, [%[a], #92]\n\t" + "sbc %[c], %[c]\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6" + ); + + return c; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "add r4, r5\n\t" + "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[b], #48]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #52]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[b], #56]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #60]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[b], #64]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #68]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[b], #72]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #76]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[b], #80]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #84]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[b], #88]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #92]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #92]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5" + ); + + return c; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_3072_mask_12(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<12; i++) { + r[i] = a[i] & m; + } +#else + r[0] = a[0] & m; + r[1] = a[1] & m; + r[2] = a[2] & m; + r[3] = a[3] & m; + r[4] = a[4] & m; + r[5] = a[5] & m; + r[6] = a[6] & m; + r[7] = a[7] & m; + r[8] = a[8] & m; + r[9] = a[9] & m; + r[10] = a[10] & m; + r[11] = a[11] & m; +#endif +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_3072_mul_24(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[24]; + sp_digit a1[12]; + sp_digit b1[12]; + sp_digit z2[24]; + sp_digit u, ca, cb; + + ca = sp_3072_add_12(a1, a, &a[12]); + cb = sp_3072_add_12(b1, b, &b[12]); + u = ca & cb; + sp_3072_mul_12(z1, a1, b1); + sp_3072_mul_12(z2, &a[12], &b[12]); + sp_3072_mul_12(z0, a, b); + sp_3072_mask_12(r + 24, a1, 0 - cb); + sp_3072_mask_12(b1, b1, 0 - ca); + u += sp_3072_add_12(r + 24, r + 24, b1); + u += sp_3072_sub_in_place_24(z1, z2); + u += sp_3072_sub_in_place_24(z1, z0); + u += sp_3072_add_24(r + 12, r + 12, z1); + r[36] = u; + XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1)); + (void)sp_3072_add_24(r + 24, r + 24, z2); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z2[24]; + sp_digit z1[24]; + sp_digit a1[12]; + sp_digit u; + + u = sp_3072_add_12(a1, a, &a[12]); + sp_3072_sqr_12(z1, a1); + sp_3072_sqr_12(z2, &a[12]); + sp_3072_sqr_12(z0, a); + sp_3072_mask_12(r + 24, a1, 0 - u); + u += sp_3072_add_12(r + 24, r + 24, r + 24); + u += sp_3072_sub_in_place_24(z1, z2); + u += sp_3072_sub_in_place_24(z1, z0); + u += sp_3072_add_24(r + 12, r + 12, z1); + r[36] = u; + XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1)); + (void)sp_3072_add_24(r + 24, r + 24, z2); +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldr r3, [%[a], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sub r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #0]\n\t" + "str r4, [%[a], #4]\n\t" + "ldr r3, [%[a], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r6, [%[b], #12]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #8]\n\t" + "str r4, [%[a], #12]\n\t" + "ldr r3, [%[a], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r6, [%[b], #20]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #16]\n\t" + "str r4, [%[a], #20]\n\t" + "ldr r3, [%[a], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r6, [%[b], #28]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #24]\n\t" + "str r4, [%[a], #28]\n\t" + "ldr r3, [%[a], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r6, [%[b], #36]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #32]\n\t" + "str r4, [%[a], #36]\n\t" + "ldr r3, [%[a], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r6, [%[b], #44]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #40]\n\t" + "str r4, [%[a], #44]\n\t" + "ldr r3, [%[a], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #48]\n\t" + "ldr r6, [%[b], #52]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #48]\n\t" + "str r4, [%[a], #52]\n\t" + "ldr r3, [%[a], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #56]\n\t" + "ldr r6, [%[b], #60]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #56]\n\t" + "str r4, [%[a], #60]\n\t" + "ldr r3, [%[a], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #64]\n\t" + "ldr r6, [%[b], #68]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #64]\n\t" + "str r4, [%[a], #68]\n\t" + "ldr r3, [%[a], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #72]\n\t" + "ldr r6, [%[b], #76]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #72]\n\t" + "str r4, [%[a], #76]\n\t" + "ldr r3, [%[a], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #80]\n\t" + "ldr r6, [%[b], #84]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #80]\n\t" + "str r4, [%[a], #84]\n\t" + "ldr r3, [%[a], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #88]\n\t" + "ldr r6, [%[b], #92]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #88]\n\t" + "str r4, [%[a], #92]\n\t" + "ldr r3, [%[a], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #96]\n\t" + "ldr r6, [%[b], #100]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #96]\n\t" + "str r4, [%[a], #100]\n\t" + "ldr r3, [%[a], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #104]\n\t" + "ldr r6, [%[b], #108]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #104]\n\t" + "str r4, [%[a], #108]\n\t" + "ldr r3, [%[a], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #112]\n\t" + "ldr r6, [%[b], #116]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #112]\n\t" + "str r4, [%[a], #116]\n\t" + "ldr r3, [%[a], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #120]\n\t" + "ldr r6, [%[b], #124]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #120]\n\t" + "str r4, [%[a], #124]\n\t" + "sbc %[c], %[c]\n\t" + "add %[a], #0x80\n\t" + "add %[b], #0x80\n\t" + "mov r5, #0\n\t" + "sub r5, %[c]\n\t" + "ldr r3, [%[a], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #0]\n\t" + "str r4, [%[a], #4]\n\t" + "ldr r3, [%[a], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r6, [%[b], #12]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #8]\n\t" + "str r4, [%[a], #12]\n\t" + "ldr r3, [%[a], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r6, [%[b], #20]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #16]\n\t" + "str r4, [%[a], #20]\n\t" + "ldr r3, [%[a], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r6, [%[b], #28]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #24]\n\t" + "str r4, [%[a], #28]\n\t" + "ldr r3, [%[a], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r6, [%[b], #36]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #32]\n\t" + "str r4, [%[a], #36]\n\t" + "ldr r3, [%[a], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r6, [%[b], #44]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #40]\n\t" + "str r4, [%[a], #44]\n\t" + "ldr r3, [%[a], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #48]\n\t" + "ldr r6, [%[b], #52]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #48]\n\t" + "str r4, [%[a], #52]\n\t" + "ldr r3, [%[a], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #56]\n\t" + "ldr r6, [%[b], #60]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #56]\n\t" + "str r4, [%[a], #60]\n\t" + "sbc %[c], %[c]\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6" + ); + + return c; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r7, #0\n\t" + "mvn r7, r7\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "add r4, r5\n\t" + "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[b], #48]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #52]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[b], #56]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #60]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[b], #64]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #68]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[b], #72]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #76]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[b], #80]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #84]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[b], #88]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #92]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[b], #96]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #100]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[b], #104]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #108]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[b], #112]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #116]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[b], #120]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #124]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #124]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + "add %[a], #0x80\n\t" + "add %[b], #0x80\n\t" + "add %[r], #0x80\n\t" + "add %[c], r7\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[b], #48]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #52]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[b], #56]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #60]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #60]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r7" + ); + + return c; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_3072_mask_24(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<24; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 24; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[48]; + sp_digit a1[24]; + sp_digit b1[24]; + sp_digit z2[48]; + sp_digit u, ca, cb; + + ca = sp_3072_add_24(a1, a, &a[24]); + cb = sp_3072_add_24(b1, b, &b[24]); + u = ca & cb; + sp_3072_mul_24(z1, a1, b1); + sp_3072_mul_24(z2, &a[24], &b[24]); + sp_3072_mul_24(z0, a, b); + sp_3072_mask_24(r + 48, a1, 0 - cb); + sp_3072_mask_24(b1, b1, 0 - ca); + u += sp_3072_add_24(r + 48, r + 48, b1); + u += sp_3072_sub_in_place_48(z1, z2); + u += sp_3072_sub_in_place_48(z1, z0); + u += sp_3072_add_48(r + 24, r + 24, z1); + r[72] = u; + XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1)); + (void)sp_3072_add_48(r + 48, r + 48, z2); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z2[48]; + sp_digit z1[48]; + sp_digit a1[24]; + sp_digit u; + + u = sp_3072_add_24(a1, a, &a[24]); + sp_3072_sqr_24(z1, a1); + sp_3072_sqr_24(z2, &a[24]); + sp_3072_sqr_24(z0, a); + sp_3072_mask_24(r + 48, a1, 0 - u); + u += sp_3072_add_24(r + 48, r + 48, r + 48); + u += sp_3072_sub_in_place_48(z1, z2); + u += sp_3072_sub_in_place_48(z1, z0); + u += sp_3072_add_48(r + 24, r + 24, z1); + r[72] = u; + XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1)); + (void)sp_3072_add_48(r + 48, r + 48, z2); +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldr r3, [%[a], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sub r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #0]\n\t" + "str r4, [%[a], #4]\n\t" + "ldr r3, [%[a], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r6, [%[b], #12]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #8]\n\t" + "str r4, [%[a], #12]\n\t" + "ldr r3, [%[a], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r6, [%[b], #20]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #16]\n\t" + "str r4, [%[a], #20]\n\t" + "ldr r3, [%[a], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r6, [%[b], #28]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #24]\n\t" + "str r4, [%[a], #28]\n\t" + "ldr r3, [%[a], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r6, [%[b], #36]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #32]\n\t" + "str r4, [%[a], #36]\n\t" + "ldr r3, [%[a], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r6, [%[b], #44]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #40]\n\t" + "str r4, [%[a], #44]\n\t" + "ldr r3, [%[a], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #48]\n\t" + "ldr r6, [%[b], #52]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #48]\n\t" + "str r4, [%[a], #52]\n\t" + "ldr r3, [%[a], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #56]\n\t" + "ldr r6, [%[b], #60]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #56]\n\t" + "str r4, [%[a], #60]\n\t" + "ldr r3, [%[a], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #64]\n\t" + "ldr r6, [%[b], #68]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #64]\n\t" + "str r4, [%[a], #68]\n\t" + "ldr r3, [%[a], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #72]\n\t" + "ldr r6, [%[b], #76]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #72]\n\t" + "str r4, [%[a], #76]\n\t" + "ldr r3, [%[a], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #80]\n\t" + "ldr r6, [%[b], #84]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #80]\n\t" + "str r4, [%[a], #84]\n\t" + "ldr r3, [%[a], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #88]\n\t" + "ldr r6, [%[b], #92]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #88]\n\t" + "str r4, [%[a], #92]\n\t" + "ldr r3, [%[a], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #96]\n\t" + "ldr r6, [%[b], #100]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #96]\n\t" + "str r4, [%[a], #100]\n\t" + "ldr r3, [%[a], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #104]\n\t" + "ldr r6, [%[b], #108]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #104]\n\t" + "str r4, [%[a], #108]\n\t" + "ldr r3, [%[a], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #112]\n\t" + "ldr r6, [%[b], #116]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #112]\n\t" + "str r4, [%[a], #116]\n\t" + "ldr r3, [%[a], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #120]\n\t" + "ldr r6, [%[b], #124]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #120]\n\t" + "str r4, [%[a], #124]\n\t" + "sbc %[c], %[c]\n\t" + "add %[a], #0x80\n\t" + "add %[b], #0x80\n\t" + "mov r5, #0\n\t" + "sub r5, %[c]\n\t" + "ldr r3, [%[a], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #0]\n\t" + "str r4, [%[a], #4]\n\t" + "ldr r3, [%[a], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r6, [%[b], #12]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #8]\n\t" + "str r4, [%[a], #12]\n\t" + "ldr r3, [%[a], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r6, [%[b], #20]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #16]\n\t" + "str r4, [%[a], #20]\n\t" + "ldr r3, [%[a], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r6, [%[b], #28]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #24]\n\t" + "str r4, [%[a], #28]\n\t" + "ldr r3, [%[a], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r6, [%[b], #36]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #32]\n\t" + "str r4, [%[a], #36]\n\t" + "ldr r3, [%[a], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r6, [%[b], #44]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #40]\n\t" + "str r4, [%[a], #44]\n\t" + "ldr r3, [%[a], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #48]\n\t" + "ldr r6, [%[b], #52]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #48]\n\t" + "str r4, [%[a], #52]\n\t" + "ldr r3, [%[a], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #56]\n\t" + "ldr r6, [%[b], #60]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #56]\n\t" + "str r4, [%[a], #60]\n\t" + "ldr r3, [%[a], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #64]\n\t" + "ldr r6, [%[b], #68]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #64]\n\t" + "str r4, [%[a], #68]\n\t" + "ldr r3, [%[a], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #72]\n\t" + "ldr r6, [%[b], #76]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #72]\n\t" + "str r4, [%[a], #76]\n\t" + "ldr r3, [%[a], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #80]\n\t" + "ldr r6, [%[b], #84]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #80]\n\t" + "str r4, [%[a], #84]\n\t" + "ldr r3, [%[a], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #88]\n\t" + "ldr r6, [%[b], #92]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #88]\n\t" + "str r4, [%[a], #92]\n\t" + "ldr r3, [%[a], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #96]\n\t" + "ldr r6, [%[b], #100]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #96]\n\t" + "str r4, [%[a], #100]\n\t" + "ldr r3, [%[a], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #104]\n\t" + "ldr r6, [%[b], #108]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #104]\n\t" + "str r4, [%[a], #108]\n\t" + "ldr r3, [%[a], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #112]\n\t" + "ldr r6, [%[b], #116]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #112]\n\t" + "str r4, [%[a], #116]\n\t" + "ldr r3, [%[a], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #120]\n\t" + "ldr r6, [%[b], #124]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #120]\n\t" + "str r4, [%[a], #124]\n\t" + "sbc %[c], %[c]\n\t" + "add %[a], #0x80\n\t" + "add %[b], #0x80\n\t" + "mov r5, #0\n\t" + "sub r5, %[c]\n\t" + "ldr r3, [%[a], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #0]\n\t" + "str r4, [%[a], #4]\n\t" + "ldr r3, [%[a], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r6, [%[b], #12]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #8]\n\t" + "str r4, [%[a], #12]\n\t" + "ldr r3, [%[a], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r6, [%[b], #20]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #16]\n\t" + "str r4, [%[a], #20]\n\t" + "ldr r3, [%[a], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r6, [%[b], #28]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #24]\n\t" + "str r4, [%[a], #28]\n\t" + "ldr r3, [%[a], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r6, [%[b], #36]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #32]\n\t" + "str r4, [%[a], #36]\n\t" + "ldr r3, [%[a], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r6, [%[b], #44]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #40]\n\t" + "str r4, [%[a], #44]\n\t" + "ldr r3, [%[a], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #48]\n\t" + "ldr r6, [%[b], #52]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #48]\n\t" + "str r4, [%[a], #52]\n\t" + "ldr r3, [%[a], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #56]\n\t" + "ldr r6, [%[b], #60]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #56]\n\t" + "str r4, [%[a], #60]\n\t" + "ldr r3, [%[a], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #64]\n\t" + "ldr r6, [%[b], #68]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #64]\n\t" + "str r4, [%[a], #68]\n\t" + "ldr r3, [%[a], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #72]\n\t" + "ldr r6, [%[b], #76]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #72]\n\t" + "str r4, [%[a], #76]\n\t" + "ldr r3, [%[a], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #80]\n\t" + "ldr r6, [%[b], #84]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #80]\n\t" + "str r4, [%[a], #84]\n\t" + "ldr r3, [%[a], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #88]\n\t" + "ldr r6, [%[b], #92]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #88]\n\t" + "str r4, [%[a], #92]\n\t" + "ldr r3, [%[a], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #96]\n\t" + "ldr r6, [%[b], #100]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #96]\n\t" + "str r4, [%[a], #100]\n\t" + "ldr r3, [%[a], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #104]\n\t" + "ldr r6, [%[b], #108]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #104]\n\t" + "str r4, [%[a], #108]\n\t" + "ldr r3, [%[a], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #112]\n\t" + "ldr r6, [%[b], #116]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #112]\n\t" + "str r4, [%[a], #116]\n\t" + "ldr r3, [%[a], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #120]\n\t" + "ldr r6, [%[b], #124]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #120]\n\t" + "str r4, [%[a], #124]\n\t" + "sbc %[c], %[c]\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6" + ); + + return c; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r7, #0\n\t" + "mvn r7, r7\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "add r4, r5\n\t" + "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[b], #48]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #52]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[b], #56]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #60]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[b], #64]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #68]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[b], #72]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #76]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[b], #80]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #84]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[b], #88]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #92]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[b], #96]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #100]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[b], #104]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #108]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[b], #112]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #116]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[b], #120]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #124]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #124]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + "add %[a], #0x80\n\t" + "add %[b], #0x80\n\t" + "add %[r], #0x80\n\t" + "add %[c], r7\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[b], #48]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #52]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[b], #56]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #60]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[b], #64]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #68]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[b], #72]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #76]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[b], #80]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #84]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[b], #88]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #92]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[b], #96]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #100]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[b], #104]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #108]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[b], #112]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #116]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[b], #120]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #124]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #124]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + "add %[a], #0x80\n\t" + "add %[b], #0x80\n\t" + "add %[r], #0x80\n\t" + "add %[c], r7\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[b], #48]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #52]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[b], #56]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #60]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[b], #64]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #68]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[b], #72]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #76]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[b], #80]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #84]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[b], #88]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #92]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[b], #96]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #100]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[b], #104]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #108]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[b], #112]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #116]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[b], #120]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #124]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #124]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r7" + ); + + return c; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<48; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 48; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[96]; + sp_digit a1[48]; + sp_digit b1[48]; + sp_digit z2[96]; + sp_digit u, ca, cb; + + ca = sp_3072_add_48(a1, a, &a[48]); + cb = sp_3072_add_48(b1, b, &b[48]); + u = ca & cb; + sp_3072_mul_48(z1, a1, b1); + sp_3072_mul_48(z2, &a[48], &b[48]); + sp_3072_mul_48(z0, a, b); + sp_3072_mask_48(r + 96, a1, 0 - cb); + sp_3072_mask_48(b1, b1, 0 - ca); + u += sp_3072_add_48(r + 96, r + 96, b1); + u += sp_3072_sub_in_place_96(z1, z2); + u += sp_3072_sub_in_place_96(z1, z0); + u += sp_3072_add_96(r + 48, r + 48, z1); + r[144] = u; + XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1)); + (void)sp_3072_add_96(r + 96, r + 96, z2); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z2[96]; + sp_digit z1[96]; + sp_digit a1[48]; + sp_digit u; + + u = sp_3072_add_48(a1, a, &a[48]); + sp_3072_sqr_48(z1, a1); + sp_3072_sqr_48(z2, &a[48]); + sp_3072_sqr_48(z0, a); + sp_3072_mask_48(r + 96, a1, 0 - u); + u += sp_3072_add_48(r + 96, r + 96, r + 96); + u += sp_3072_sub_in_place_96(z1, z2); + u += sp_3072_sub_in_place_96(z1, z0); + u += sp_3072_add_96(r + 48, r + 48, z1); + r[144] = u; + XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1)); + (void)sp_3072_add_96(r + 96, r + 96, z2); +} + +#endif /* !WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r6, %[a]\n\t" + "mov r7, #0\n\t" + "mov r4, #1\n\t" + "lsl r4, #8\n\t" + "add r4, #128\n\t" + "sub r7, #1\n\t" + "add r6, r4\n\t" + "\n1:\n\t" + "add %[c], r7\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r]]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + "add %[a], #4\n\t" + "add %[b], #4\n\t" + "add %[r], #4\n\t" + "cmp %[a], r6\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into a. (a -= b) + * + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + __asm__ __volatile__ ( + "mov r7, %[a]\n\t" + "mov r5, #1\n\t" + "lsl r5, #8\n\t" + "add r5, #128\n\t" + "add r7, r5\n\t" + "\n1:\n\t" + "mov r5, #0\n\t" + "sub r5, %[c]\n\t" + "ldr r3, [%[a]]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b]]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a]]\n\t" + "str r4, [%[a], #4]\n\t" + "sbc %[c], %[c]\n\t" + "add %[a], #8\n\t" + "add %[b], #8\n\t" + "cmp %[a], r7\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit tmp[96 * 2]; + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r8, r3\n\t" + "mov r11, %[r]\n\t" + "mov r9, %[a]\n\t" + "mov r10, %[b]\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, #128\n\t" + "add r6, r9\n\t" + "mov r12, r6\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r5, #0\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, #124\n\t" + "mov %[a], r8\n\t" + "sub %[a], r6\n\t" + "sbc r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], r6\n\t" + "mov %[b], r8\n\t" + "sub %[b], %[a]\n\t" + "add %[a], r9\n\t" + "add %[b], r10\n\t" + "\n2:\n\t" + "# Multiply Start\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[b]]\n\t" + "lsl r6, r6, #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [%[b]]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[b]]\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [%[b]]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Multiply Done\n\t" + "add %[a], #4\n\t" + "sub %[b], #4\n\t" + "cmp %[a], r12\n\t" + "beq 3f\n\t" + "mov r6, r8\n\t" + "add r6, r9\n\t" + "cmp %[a], r6\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r11\n\t" + "mov r7, r8\n\t" + "str r3, [%[r], r7]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "add r7, #4\n\t" + "mov r8, r7\n\t" + "mov r6, #2\n\t" + "lsl r6, r6, #8\n\t" + "add r6, #248\n\t" + "cmp r7, r6\n\t" + "ble 1b\n\t" + "str r3, [%[r], r7]\n\t" + "mov %[a], r9\n\t" + "mov %[b], r10\n\t" + : + : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r5, #0\n\t" + "mov r8, r3\n\t" + "mov r11, %[r]\n\t" + "mov r6, #3\n\t" + "lsl r6, r6, #8\n\t" + "neg r6, r6\n\t" + "add sp, r6\n\t" + "mov r10, sp\n\t" + "mov r9, %[a]\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, #124\n\t" + "mov %[a], r8\n\t" + "sub %[a], r6\n\t" + "sbc r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], r6\n\t" + "mov r2, r8\n\t" + "sub r2, %[a]\n\t" + "add %[a], r9\n\t" + "add r2, r9\n\t" + "\n2:\n\t" + "cmp r2, %[a]\n\t" + "beq 4f\n\t" + "# Multiply * 2: Start\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [r2]\n\t" + "lsl r6, r6, #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [r2]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [r2]\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [r2]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Multiply * 2: Done\n\t" + "bal 5f\n\t" + "\n4:\n\t" + "# Square: Start\n\t" + "ldr r6, [%[a]]\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r6, r6\n\t" + "add r3, r6\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "mul r7, r7\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #15\n\t" + "lsl r6, r6, #17\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Square: Done\n\t" + "\n5:\n\t" + "add %[a], #4\n\t" + "sub r2, #4\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, #128\n\t" + "add r6, r9\n\t" + "cmp %[a], r6\n\t" + "beq 3f\n\t" + "cmp %[a], r2\n\t" + "bgt 3f\n\t" + "mov r7, r8\n\t" + "add r7, r9\n\t" + "cmp %[a], r7\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r10\n\t" + "mov r7, r8\n\t" + "str r3, [%[r], r7]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "add r7, #4\n\t" + "mov r8, r7\n\t" + "mov r6, #2\n\t" + "lsl r6, r6, #8\n\t" + "add r6, #248\n\t" + "cmp r7, r6\n\t" + "ble 1b\n\t" + "mov %[a], r9\n\t" + "str r3, [%[r], r7]\n\t" + "mov %[r], r11\n\t" + "mov %[a], r10\n\t" + "mov r3, #2\n\t" + "lsl r3, r3, #8\n\t" + "add r3, #252\n\t" + "\n4:\n\t" + "ldr r6, [%[a], r3]\n\t" + "str r6, [%[r], r3]\n\t" + "sub r3, #4\n\t" + "bge 4b\n\t" + "mov r6, #3\n\t" + "lsl r6, r6, #8\n\t" + "add sp, r6\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +#ifdef WOLFSSL_SP_SMALL +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m) +{ + int i; + + for (i=0; i<48; i++) { + r[i] = a[i] & m; + } +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r6, %[a]\n\t" + "mov r7, #0\n\t" + "add r6, #192\n\t" + "sub r7, #1\n\t" + "\n1:\n\t" + "add %[c], r7\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r]]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + "add %[a], #4\n\t" + "add %[b], #4\n\t" + "add %[r], #4\n\t" + "cmp %[a], r6\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into a. (a -= b) + * + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + __asm__ __volatile__ ( + "mov r7, %[a]\n\t" + "add r7, #192\n\t" + "\n1:\n\t" + "mov r5, #0\n\t" + "sub r5, %[c]\n\t" + "ldr r3, [%[a]]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b]]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a]]\n\t" + "str r4, [%[a], #4]\n\t" + "sbc %[c], %[c]\n\t" + "add %[a], #8\n\t" + "add %[b], #8\n\t" + "cmp %[a], r7\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit tmp[48 * 2]; + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r8, r3\n\t" + "mov r11, %[r]\n\t" + "mov r9, %[a]\n\t" + "mov r10, %[b]\n\t" + "mov r6, #192\n\t" + "add r6, r9\n\t" + "mov r12, r6\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r5, #0\n\t" + "mov r6, #188\n\t" + "mov %[a], r8\n\t" + "sub %[a], r6\n\t" + "sbc r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], r6\n\t" + "mov %[b], r8\n\t" + "sub %[b], %[a]\n\t" + "add %[a], r9\n\t" + "add %[b], r10\n\t" + "\n2:\n\t" + "# Multiply Start\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[b]]\n\t" + "lsl r6, r6, #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [%[b]]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[b]]\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [%[b]]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Multiply Done\n\t" + "add %[a], #4\n\t" + "sub %[b], #4\n\t" + "cmp %[a], r12\n\t" + "beq 3f\n\t" + "mov r6, r8\n\t" + "add r6, r9\n\t" + "cmp %[a], r6\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r11\n\t" + "mov r7, r8\n\t" + "str r3, [%[r], r7]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "add r7, #4\n\t" + "mov r8, r7\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, #120\n\t" + "cmp r7, r6\n\t" + "ble 1b\n\t" + "str r3, [%[r], r7]\n\t" + "mov %[a], r9\n\t" + "mov %[b], r10\n\t" + : + : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r5, #0\n\t" + "mov r8, r3\n\t" + "mov r11, %[r]\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, #128\n\t" + "neg r6, r6\n\t" + "add sp, r6\n\t" + "mov r10, sp\n\t" + "mov r9, %[a]\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r6, #188\n\t" + "mov %[a], r8\n\t" + "sub %[a], r6\n\t" + "sbc r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], r6\n\t" + "mov r2, r8\n\t" + "sub r2, %[a]\n\t" + "add %[a], r9\n\t" + "add r2, r9\n\t" + "\n2:\n\t" + "cmp r2, %[a]\n\t" + "beq 4f\n\t" + "# Multiply * 2: Start\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [r2]\n\t" + "lsl r6, r6, #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [r2]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [r2]\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [r2]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Multiply * 2: Done\n\t" + "bal 5f\n\t" + "\n4:\n\t" + "# Square: Start\n\t" + "ldr r6, [%[a]]\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r6, r6\n\t" + "add r3, r6\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "mul r7, r7\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #15\n\t" + "lsl r6, r6, #17\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Square: Done\n\t" + "\n5:\n\t" + "add %[a], #4\n\t" + "sub r2, #4\n\t" + "mov r6, #192\n\t" + "add r6, r9\n\t" + "cmp %[a], r6\n\t" + "beq 3f\n\t" + "cmp %[a], r2\n\t" + "bgt 3f\n\t" + "mov r7, r8\n\t" + "add r7, r9\n\t" + "cmp %[a], r7\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r10\n\t" + "mov r7, r8\n\t" + "str r3, [%[r], r7]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "add r7, #4\n\t" + "mov r8, r7\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, #120\n\t" + "cmp r7, r6\n\t" + "ble 1b\n\t" + "mov %[a], r9\n\t" + "str r3, [%[r], r7]\n\t" + "mov %[r], r11\n\t" + "mov %[a], r10\n\t" + "mov r3, #1\n\t" + "lsl r3, r3, #8\n\t" + "add r3, #124\n\t" + "\n4:\n\t" + "ldr r6, [%[a], r3]\n\t" + "str r6, [%[r], r3]\n\t" + "sub r3, #4\n\t" + "bge 4b\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, #128\n\t" + "add sp, r6\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +/* Caclulate the bottom digit of -1/a mod 2^n. + * + * a A single precision number. + * rho Bottom word of inverse. + */ +static void sp_3072_mont_setup(const sp_digit* a, sp_digit* rho) +{ + sp_digit x, b; + + b = a[0]; + x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - b * x; /* here x*a==1 mod 2**8 */ + x *= 2 - b * x; /* here x*a==1 mod 2**16 */ + x *= 2 - b * x; /* here x*a==1 mod 2**32 */ + + /* rho = -1/m mod b */ + *rho = -x; +} + +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +SP_NOINLINE static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, + sp_digit b) +{ + __asm__ __volatile__ ( + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, #128\n\t" + "add r6, %[a]\n\t" + "mov r8, %[r]\n\t" + "mov r9, r6\n\t" + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "1:\n\t" + "mov %[r], #0\n\t" + "mov r5, #0\n\t" + "# A[] * B\n\t" + "ldr r6, [%[a]]\n\t" + "lsl r6, r6, #16\n\t" + "lsl r7, %[b], #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "lsr r7, %[b], #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, %[b], #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "lsl r7, %[b], #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# A[] * B - Done\n\t" + "mov %[r], r8\n\t" + "str r3, [%[r]]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "add %[r], #4\n\t" + "add %[a], #4\n\t" + "mov r8, %[r]\n\t" + "cmp %[a], r9\n\t" + "blt 1b\n\t" + "str r3, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a) + : [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + ); +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 3072 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_3072_mont_norm_48(sp_digit* r, const sp_digit* m) +{ + XMEMSET(r, 0, sizeof(sp_digit) * 48); + + /* r = 2^n mod m */ + sp_3072_sub_in_place_48(r, m); +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +SP_NOINLINE static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, + const sp_digit* b, sp_digit m) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r5, #192\n\t" + "mov r8, r5\n\t" + "mov r7, #0\n\t" + "1:\n\t" + "ldr r6, [%[b], r7]\n\t" + "and r6, %[m]\n\t" + "mov r5, #0\n\t" + "sub r5, %[c]\n\t" + "ldr r5, [%[a], r7]\n\t" + "sbc r5, r6\n\t" + "sbc %[c], %[c]\n\t" + "str r5, [%[r], r7]\n\t" + "add r7, #4\n\t" + "cmp r7, r8\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r5", "r6", "r7", "r8" + ); + + return c; +} + +/* Reduce the number back to 3072 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_digit ca = 0; + + __asm__ __volatile__ ( + "mov r8, %[mp]\n\t" + "mov r12, %[ca]\n\t" + "mov r14, %[m]\n\t" + "mov r9, %[a]\n\t" + "mov r4, #0\n\t" + "# i = 0\n\t" + "mov r11, r4\n\t" + "\n1:\n\t" + "mov r5, #0\n\t" + "mov %[ca], #0\n\t" + "# mu = a[i] * mp\n\t" + "mov %[mp], r8\n\t" + "ldr %[a], [%[a]]\n\t" + "mul %[mp], %[a]\n\t" + "mov %[m], r14\n\t" + "mov r10, r9\n\t" + "\n2:\n\t" + "# a[i+j] += m[j] * mu\n\t" + "mov %[a], r10\n\t" + "ldr %[a], [%[a]]\n\t" + "mov %[ca], #0\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "# Multiply m[j] and mu - Start\n\t" + "ldr r7, [%[m]]\n\t" + "lsl r6, %[mp], #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add %[a], r7\n\t" + "adc r5, %[ca]\n\t" + "ldr r7, [%[m]]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add %[a], r6\n\t" + "adc r5, r7\n\t" + "ldr r7, [%[m]]\n\t" + "lsr r6, %[mp], #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r5, r7\n\t" + "ldr r7, [%[m]]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add %[a], r6\n\t" + "adc r5, r7\n\t" + "# Multiply m[j] and mu - Done\n\t" + "add r4, %[a]\n\t" + "adc r5, %[ca]\n\t" + "mov %[a], r10\n\t" + "str r4, [%[a]]\n\t" + "mov r6, #4\n\t" + "add %[m], #4\n\t" + "add r10, r6\n\t" + "mov r4, #188\n\t" + "add r4, r9\n\t" + "cmp r10, r4\n\t" + "blt 2b\n\t" + "# a[i+47] += m[47] * mu\n\t" + "mov %[ca], #0\n\t" + "mov r4, r12\n\t" + "mov %[a], #0\n\t" + "# Multiply m[47] and mu - Start\n\t" + "ldr r7, [%[m]]\n\t" + "lsl r6, %[mp], #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r5, r7\n\t" + "adc r4, %[ca]\n\t" + "adc %[a], %[ca]\n\t" + "ldr r7, [%[m]]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r5, r6\n\t" + "adc r4, r7\n\t" + "adc %[a], %[ca]\n\t" + "ldr r7, [%[m]]\n\t" + "lsr r6, %[mp], #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc %[a], %[ca]\n\t" + "ldr r7, [%[m]]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r5, r6\n\t" + "adc r4, r7\n\t" + "adc %[a], %[ca]\n\t" + "# Multiply m[47] and mu - Done\n\t" + "mov %[ca], %[a]\n\t" + "mov %[a], r10\n\t" + "ldr r7, [%[a], #4]\n\t" + "ldr %[a], [%[a]]\n\t" + "mov r6, #0\n\t" + "add r5, %[a]\n\t" + "adc r7, r4\n\t" + "adc %[ca], r6\n\t" + "mov %[a], r10\n\t" + "str r5, [%[a]]\n\t" + "str r7, [%[a], #4]\n\t" + "# i += 1\n\t" + "mov r6, #4\n\t" + "add r9, r6\n\t" + "add r11, r6\n\t" + "mov r12, %[ca]\n\t" + "mov %[a], r9\n\t" + "mov r4, #192\n\t" + "cmp r11, r4\n\t" + "blt 1b\n\t" + "mov %[m], r14\n\t" + : [ca] "+r" (ca), [a] "+r" (a) + : [m] "r" (m), [mp] "r" (mp) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + ); + + sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - ca); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_3072_mont_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_3072_mul_48(r, a, b); + sp_3072_mont_reduce_48(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_3072_mont_sqr_48(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_3072_sqr_48(r, a); + sp_3072_mont_reduce_48(r, m, mp); +} + +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +SP_NOINLINE static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, + sp_digit b) +{ + __asm__ __volatile__ ( + "mov r6, #192\n\t" + "add r6, %[a]\n\t" + "mov r8, %[r]\n\t" + "mov r9, r6\n\t" + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "1:\n\t" + "mov %[r], #0\n\t" + "mov r5, #0\n\t" + "# A[] * B\n\t" + "ldr r6, [%[a]]\n\t" + "lsl r6, r6, #16\n\t" + "lsl r7, %[b], #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "lsr r7, %[b], #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, %[b], #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "lsl r7, %[b], #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# A[] * B - Done\n\t" + "mov %[r], r8\n\t" + "str r3, [%[r]]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "add %[r], #4\n\t" + "add %[a], #4\n\t" + "mov r8, %[r]\n\t" + "cmp %[a], r9\n\t" + "blt 1b\n\t" + "str r3, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a) + : [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + ); +} + +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +SP_NOINLINE static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, + sp_digit div) +{ + sp_digit r = 0; + + __asm__ __volatile__ ( + "lsr r5, %[div], #1\n\t" + "add r5, #1\n\t" + "mov r8, %[d0]\n\t" + "mov r9, %[d1]\n\t" + "# Do top 32\n\t" + "mov r6, r5\n\t" + "sub r6, %[d1]\n\t" + "sbc r6, r6\n\t" + "add %[r], %[r]\n\t" + "sub %[r], r6\n\t" + "and r6, r5\n\t" + "sub %[d1], r6\n\t" + "# Next 30 bits\n\t" + "mov r4, #29\n\t" + "1:\n\t" + "lsl %[d0], %[d0], #1\n\t" + "adc %[d1], %[d1]\n\t" + "mov r6, r5\n\t" + "sub r6, %[d1]\n\t" + "sbc r6, r6\n\t" + "add %[r], %[r]\n\t" + "sub %[r], r6\n\t" + "and r6, r5\n\t" + "sub %[d1], r6\n\t" + "sub r4, #1\n\t" + "bpl 1b\n\t" + "mov r7, #0\n\t" + "add %[r], %[r]\n\t" + "add %[r], #1\n\t" + "# r * div - Start\n\t" + "lsl %[d1], %[r], #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr %[d1], %[d1], #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, %[d1]\n\t" + "lsr r6, %[div], #16\n\t" + "mul %[d1], r6\n\t" + "lsr r5, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r7\n\t" + "lsr %[d1], %[r], #16\n\t" + "mul r6, %[d1]\n\t" + "add r5, r6\n\t" + "lsl r6, %[div], #16\n\t" + "lsr r6, r6, #16\n\t" + "mul %[d1], r6\n\t" + "lsr r6, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r6\n\t" + "# r * div - Done\n\t" + "mov %[d1], r8\n\t" + "sub %[d1], r4\n\t" + "mov r4, %[d1]\n\t" + "mov %[d1], r9\n\t" + "sbc %[d1], r5\n\t" + "mov r5, %[d1]\n\t" + "add %[r], r5\n\t" + "# r * div - Start\n\t" + "lsl %[d1], %[r], #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr %[d1], %[d1], #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, %[d1]\n\t" + "lsr r6, %[div], #16\n\t" + "mul %[d1], r6\n\t" + "lsr r5, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r7\n\t" + "lsr %[d1], %[r], #16\n\t" + "mul r6, %[d1]\n\t" + "add r5, r6\n\t" + "lsl r6, %[div], #16\n\t" + "lsr r6, r6, #16\n\t" + "mul %[d1], r6\n\t" + "lsr r6, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r6\n\t" + "# r * div - Done\n\t" + "mov %[d1], r8\n\t" + "mov r6, r9\n\t" + "sub r4, %[d1], r4\n\t" + "sbc r6, r5\n\t" + "mov r5, r6\n\t" + "add %[r], r5\n\t" + "# r * div - Start\n\t" + "lsl %[d1], %[r], #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr %[d1], %[d1], #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, %[d1]\n\t" + "lsr r6, %[div], #16\n\t" + "mul %[d1], r6\n\t" + "lsr r5, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r7\n\t" + "lsr %[d1], %[r], #16\n\t" + "mul r6, %[d1]\n\t" + "add r5, r6\n\t" + "lsl r6, %[div], #16\n\t" + "lsr r6, r6, #16\n\t" + "mul %[d1], r6\n\t" + "lsr r6, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r6\n\t" + "# r * div - Done\n\t" + "mov %[d1], r8\n\t" + "mov r6, r9\n\t" + "sub r4, %[d1], r4\n\t" + "sbc r6, r5\n\t" + "mov r5, r6\n\t" + "add %[r], r5\n\t" + "mov r6, %[div]\n\t" + "sub r6, r4\n\t" + "sbc r6, r6\n\t" + "sub %[r], r6\n\t" + : [r] "+r" (r) + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) + : "r4", "r5", "r7", "r6", "r8", "r9" + ); + return r; +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +SP_NOINLINE static int32_t sp_3072_cmp_48(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; + + + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mvn r3, r3\n\t" + "mov r6, #188\n\t" + "1:\n\t" + "ldr r7, [%[a], r6]\n\t" + "ldr r5, [%[b], r6]\n\t" + "and r7, r3\n\t" + "and r5, r3\n\t" + "mov r4, r7\n\t" + "sub r7, r5\n\t" + "sbc r7, r7\n\t" + "add %[r], r7\n\t" + "mvn r7, r7\n\t" + "and r3, r7\n\t" + "sub r5, r4\n\t" + "sbc r7, r7\n\t" + "sub %[r], r7\n\t" + "mvn r7, r7\n\t" + "and r3, r7\n\t" + "sub r6, #4\n\t" + "cmp r6, #0\n\t" + "bge 1b\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "r3", "r4", "r5", "r6", "r7" + ); + + return r; +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[96], t2[49]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[47]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 48); + for (i=47; i>=0; i--) { + r1 = div_3072_word_48(t1[48 + i], t1[48 + i - 1], div); + + sp_3072_mul_d_48(t2, d, r1); + t1[48 + i] += sp_3072_sub_in_place_48(&t1[i], t2); + t1[48 + i] -= t2[48]; + sp_3072_mask_48(t2, d, t1[48 + i]); + t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2); + sp_3072_mask_48(t2, d, t1[48 + i]); + t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2); + } + + r1 = sp_3072_cmp_48(t1, d) >= 0; + sp_3072_cond_sub_48(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_3072_mod_48(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_3072_div_48(a, m, NULL, r); +} + +#ifdef WOLFSSL_SP_SMALL +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[16][96]; +#else + sp_digit* t[16]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 96, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<16; i++) { + t[i] = td + i * 96; + } +#endif + norm = t[0]; + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_48(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 48U); + if (reduceA != 0) { + err = sp_3072_mod_48(t[1] + 48, a, m); + if (err == MP_OKAY) { + err = sp_3072_mod_48(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48); + err = sp_3072_mod_48(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_3072_mont_sqr_48(t[ 2], t[ 1], m, mp); + sp_3072_mont_mul_48(t[ 3], t[ 2], t[ 1], m, mp); + sp_3072_mont_sqr_48(t[ 4], t[ 2], m, mp); + sp_3072_mont_mul_48(t[ 5], t[ 3], t[ 2], m, mp); + sp_3072_mont_sqr_48(t[ 6], t[ 3], m, mp); + sp_3072_mont_mul_48(t[ 7], t[ 4], t[ 3], m, mp); + sp_3072_mont_sqr_48(t[ 8], t[ 4], m, mp); + sp_3072_mont_mul_48(t[ 9], t[ 5], t[ 4], m, mp); + sp_3072_mont_sqr_48(t[10], t[ 5], m, mp); + sp_3072_mont_mul_48(t[11], t[ 6], t[ 5], m, mp); + sp_3072_mont_sqr_48(t[12], t[ 6], m, mp); + sp_3072_mont_mul_48(t[13], t[ 7], t[ 6], m, mp); + sp_3072_mont_sqr_48(t[14], t[ 7], m, mp); + sp_3072_mont_mul_48(t[15], t[ 8], t[ 7], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 4; + if (c == 32) { + c = 28; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 48); + for (; i>=0 || c>=4; ) { + if (c == 0) { + n = e[i--]; + y = n >> 28; + n <<= 4; + c = 28; + } + else if (c < 4) { + y = n >> 28; + n = e[i--]; + c = 4 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + } + + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + + sp_3072_mont_mul_48(r, r, t[y], m, mp); + } + + XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U); + sp_3072_mont_reduce_48(r, m, mp); + + mask = 0 - (sp_3072_cmp_48(r, m) >= 0); + sp_3072_cond_sub_48(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#else +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][96]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 96, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) { + t[i] = td + i * 96; + } +#endif + norm = t[0]; + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_48(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 48U); + if (reduceA != 0) { + err = sp_3072_mod_48(t[1] + 48, a, m); + if (err == MP_OKAY) { + err = sp_3072_mod_48(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48); + err = sp_3072_mod_48(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_3072_mont_sqr_48(t[ 2], t[ 1], m, mp); + sp_3072_mont_mul_48(t[ 3], t[ 2], t[ 1], m, mp); + sp_3072_mont_sqr_48(t[ 4], t[ 2], m, mp); + sp_3072_mont_mul_48(t[ 5], t[ 3], t[ 2], m, mp); + sp_3072_mont_sqr_48(t[ 6], t[ 3], m, mp); + sp_3072_mont_mul_48(t[ 7], t[ 4], t[ 3], m, mp); + sp_3072_mont_sqr_48(t[ 8], t[ 4], m, mp); + sp_3072_mont_mul_48(t[ 9], t[ 5], t[ 4], m, mp); + sp_3072_mont_sqr_48(t[10], t[ 5], m, mp); + sp_3072_mont_mul_48(t[11], t[ 6], t[ 5], m, mp); + sp_3072_mont_sqr_48(t[12], t[ 6], m, mp); + sp_3072_mont_mul_48(t[13], t[ 7], t[ 6], m, mp); + sp_3072_mont_sqr_48(t[14], t[ 7], m, mp); + sp_3072_mont_mul_48(t[15], t[ 8], t[ 7], m, mp); + sp_3072_mont_sqr_48(t[16], t[ 8], m, mp); + sp_3072_mont_mul_48(t[17], t[ 9], t[ 8], m, mp); + sp_3072_mont_sqr_48(t[18], t[ 9], m, mp); + sp_3072_mont_mul_48(t[19], t[10], t[ 9], m, mp); + sp_3072_mont_sqr_48(t[20], t[10], m, mp); + sp_3072_mont_mul_48(t[21], t[11], t[10], m, mp); + sp_3072_mont_sqr_48(t[22], t[11], m, mp); + sp_3072_mont_mul_48(t[23], t[12], t[11], m, mp); + sp_3072_mont_sqr_48(t[24], t[12], m, mp); + sp_3072_mont_mul_48(t[25], t[13], t[12], m, mp); + sp_3072_mont_sqr_48(t[26], t[13], m, mp); + sp_3072_mont_mul_48(t[27], t[14], t[13], m, mp); + sp_3072_mont_sqr_48(t[28], t[14], m, mp); + sp_3072_mont_mul_48(t[29], t[15], t[14], m, mp); + sp_3072_mont_sqr_48(t[30], t[15], m, mp); + sp_3072_mont_mul_48(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 5; + if (c == 32) { + c = 27; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 48); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 27; + n <<= 5; + c = 27; + } + else if (c < 5) { + y = n >> 27; + n = e[i--]; + c = 5 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + + sp_3072_mont_mul_48(r, r, t[y], m, mp); + } + + XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U); + sp_3072_mont_reduce_48(r, m, mp); + + mask = 0 - (sp_3072_cmp_48(r, m) >= 0); + sp_3072_cond_sub_48(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* WOLFSSL_SP_SMALL */ + +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 3072 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_3072_mont_norm_96(sp_digit* r, const sp_digit* m) +{ + XMEMSET(r, 0, sizeof(sp_digit) * 96); + + /* r = 2^n mod m */ + sp_3072_sub_in_place_96(r, m); +} + +#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */ +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +SP_NOINLINE static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a, + const sp_digit* b, sp_digit m) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r5, #1\n\t" + "lsl r5, r5, #8\n\t" + "add r5, #128\n\t" + "mov r8, r5\n\t" + "mov r7, #0\n\t" + "1:\n\t" + "ldr r6, [%[b], r7]\n\t" + "and r6, %[m]\n\t" + "mov r5, #0\n\t" + "sub r5, %[c]\n\t" + "ldr r5, [%[a], r7]\n\t" + "sbc r5, r6\n\t" + "sbc %[c], %[c]\n\t" + "str r5, [%[r], r7]\n\t" + "add r7, #4\n\t" + "cmp r7, r8\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r5", "r6", "r7", "r8" + ); + + return c; +} + +/* Reduce the number back to 3072 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_digit ca = 0; + + __asm__ __volatile__ ( + "mov r8, %[mp]\n\t" + "mov r12, %[ca]\n\t" + "mov r14, %[m]\n\t" + "mov r9, %[a]\n\t" + "mov r4, #0\n\t" + "# i = 0\n\t" + "mov r11, r4\n\t" + "\n1:\n\t" + "mov r5, #0\n\t" + "mov %[ca], #0\n\t" + "# mu = a[i] * mp\n\t" + "mov %[mp], r8\n\t" + "ldr %[a], [%[a]]\n\t" + "mul %[mp], %[a]\n\t" + "mov %[m], r14\n\t" + "mov r10, r9\n\t" + "\n2:\n\t" + "# a[i+j] += m[j] * mu\n\t" + "mov %[a], r10\n\t" + "ldr %[a], [%[a]]\n\t" + "mov %[ca], #0\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "# Multiply m[j] and mu - Start\n\t" + "ldr r7, [%[m]]\n\t" + "lsl r6, %[mp], #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add %[a], r7\n\t" + "adc r5, %[ca]\n\t" + "ldr r7, [%[m]]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add %[a], r6\n\t" + "adc r5, r7\n\t" + "ldr r7, [%[m]]\n\t" + "lsr r6, %[mp], #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r5, r7\n\t" + "ldr r7, [%[m]]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add %[a], r6\n\t" + "adc r5, r7\n\t" + "# Multiply m[j] and mu - Done\n\t" + "add r4, %[a]\n\t" + "adc r5, %[ca]\n\t" + "mov %[a], r10\n\t" + "str r4, [%[a]]\n\t" + "mov r6, #4\n\t" + "add %[m], #4\n\t" + "add r10, r6\n\t" + "mov r4, #1\n\t" + "lsl r4, r4, #8\n\t" + "add r4, #124\n\t" + "add r4, r9\n\t" + "cmp r10, r4\n\t" + "blt 2b\n\t" + "# a[i+95] += m[95] * mu\n\t" + "mov %[ca], #0\n\t" + "mov r4, r12\n\t" + "mov %[a], #0\n\t" + "# Multiply m[95] and mu - Start\n\t" + "ldr r7, [%[m]]\n\t" + "lsl r6, %[mp], #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r5, r7\n\t" + "adc r4, %[ca]\n\t" + "adc %[a], %[ca]\n\t" + "ldr r7, [%[m]]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r5, r6\n\t" + "adc r4, r7\n\t" + "adc %[a], %[ca]\n\t" + "ldr r7, [%[m]]\n\t" + "lsr r6, %[mp], #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc %[a], %[ca]\n\t" + "ldr r7, [%[m]]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r5, r6\n\t" + "adc r4, r7\n\t" + "adc %[a], %[ca]\n\t" + "# Multiply m[95] and mu - Done\n\t" + "mov %[ca], %[a]\n\t" + "mov %[a], r10\n\t" + "ldr r7, [%[a], #4]\n\t" + "ldr %[a], [%[a]]\n\t" + "mov r6, #0\n\t" + "add r5, %[a]\n\t" + "adc r7, r4\n\t" + "adc %[ca], r6\n\t" + "mov %[a], r10\n\t" + "str r5, [%[a]]\n\t" + "str r7, [%[a], #4]\n\t" + "# i += 1\n\t" + "mov r6, #4\n\t" + "add r9, r6\n\t" + "add r11, r6\n\t" + "mov r12, %[ca]\n\t" + "mov %[a], r9\n\t" + "mov r4, #1\n\t" + "lsl r4, r4, #8\n\t" + "add r4, #128\n\t" + "cmp r11, r4\n\t" + "blt 1b\n\t" + "mov %[m], r14\n\t" + : [ca] "+r" (ca), [a] "+r" (a) + : [m] "r" (m), [mp] "r" (mp) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + ); + + sp_3072_cond_sub_96(a - 96, a, m, (sp_digit)0 - ca); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_3072_mont_mul_96(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_3072_mul_96(r, a, b); + sp_3072_mont_reduce_96(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_3072_mont_sqr_96(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_3072_sqr_96(r, a); + sp_3072_mont_reduce_96(r, m, mp); +} + +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +SP_NOINLINE static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, + sp_digit div) +{ + sp_digit r = 0; + + __asm__ __volatile__ ( + "lsr r5, %[div], #1\n\t" + "add r5, #1\n\t" + "mov r8, %[d0]\n\t" + "mov r9, %[d1]\n\t" + "# Do top 32\n\t" + "mov r6, r5\n\t" + "sub r6, %[d1]\n\t" + "sbc r6, r6\n\t" + "add %[r], %[r]\n\t" + "sub %[r], r6\n\t" + "and r6, r5\n\t" + "sub %[d1], r6\n\t" + "# Next 30 bits\n\t" + "mov r4, #29\n\t" + "1:\n\t" + "lsl %[d0], %[d0], #1\n\t" + "adc %[d1], %[d1]\n\t" + "mov r6, r5\n\t" + "sub r6, %[d1]\n\t" + "sbc r6, r6\n\t" + "add %[r], %[r]\n\t" + "sub %[r], r6\n\t" + "and r6, r5\n\t" + "sub %[d1], r6\n\t" + "sub r4, #1\n\t" + "bpl 1b\n\t" + "mov r7, #0\n\t" + "add %[r], %[r]\n\t" + "add %[r], #1\n\t" + "# r * div - Start\n\t" + "lsl %[d1], %[r], #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr %[d1], %[d1], #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, %[d1]\n\t" + "lsr r6, %[div], #16\n\t" + "mul %[d1], r6\n\t" + "lsr r5, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r7\n\t" + "lsr %[d1], %[r], #16\n\t" + "mul r6, %[d1]\n\t" + "add r5, r6\n\t" + "lsl r6, %[div], #16\n\t" + "lsr r6, r6, #16\n\t" + "mul %[d1], r6\n\t" + "lsr r6, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r6\n\t" + "# r * div - Done\n\t" + "mov %[d1], r8\n\t" + "sub %[d1], r4\n\t" + "mov r4, %[d1]\n\t" + "mov %[d1], r9\n\t" + "sbc %[d1], r5\n\t" + "mov r5, %[d1]\n\t" + "add %[r], r5\n\t" + "# r * div - Start\n\t" + "lsl %[d1], %[r], #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr %[d1], %[d1], #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, %[d1]\n\t" + "lsr r6, %[div], #16\n\t" + "mul %[d1], r6\n\t" + "lsr r5, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r7\n\t" + "lsr %[d1], %[r], #16\n\t" + "mul r6, %[d1]\n\t" + "add r5, r6\n\t" + "lsl r6, %[div], #16\n\t" + "lsr r6, r6, #16\n\t" + "mul %[d1], r6\n\t" + "lsr r6, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r6\n\t" + "# r * div - Done\n\t" + "mov %[d1], r8\n\t" + "mov r6, r9\n\t" + "sub r4, %[d1], r4\n\t" + "sbc r6, r5\n\t" + "mov r5, r6\n\t" + "add %[r], r5\n\t" + "# r * div - Start\n\t" + "lsl %[d1], %[r], #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr %[d1], %[d1], #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, %[d1]\n\t" + "lsr r6, %[div], #16\n\t" + "mul %[d1], r6\n\t" + "lsr r5, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r7\n\t" + "lsr %[d1], %[r], #16\n\t" + "mul r6, %[d1]\n\t" + "add r5, r6\n\t" + "lsl r6, %[div], #16\n\t" + "lsr r6, r6, #16\n\t" + "mul %[d1], r6\n\t" + "lsr r6, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r6\n\t" + "# r * div - Done\n\t" + "mov %[d1], r8\n\t" + "mov r6, r9\n\t" + "sub r4, %[d1], r4\n\t" + "sbc r6, r5\n\t" + "mov r5, r6\n\t" + "add %[r], r5\n\t" + "mov r6, %[div]\n\t" + "sub r6, r4\n\t" + "sbc r6, r6\n\t" + "sub %[r], r6\n\t" + : [r] "+r" (r) + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) + : "r4", "r5", "r7", "r6", "r8", "r9" + ); + return r; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_3072_mask_96(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<96; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 96; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +SP_NOINLINE static int32_t sp_3072_cmp_96(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; + + + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mvn r3, r3\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, #124\n\t" + "1:\n\t" + "ldr r7, [%[a], r6]\n\t" + "ldr r5, [%[b], r6]\n\t" + "and r7, r3\n\t" + "and r5, r3\n\t" + "mov r4, r7\n\t" + "sub r7, r5\n\t" + "sbc r7, r7\n\t" + "add %[r], r7\n\t" + "mvn r7, r7\n\t" + "and r3, r7\n\t" + "sub r5, r4\n\t" + "sbc r7, r7\n\t" + "sub %[r], r7\n\t" + "mvn r7, r7\n\t" + "and r3, r7\n\t" + "sub r6, #4\n\t" + "cmp r6, #0\n\t" + "bge 1b\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "r3", "r4", "r5", "r6", "r7" + ); + + return r; +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_3072_div_96(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[192], t2[97]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[95]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 96); + for (i=95; i>=0; i--) { + r1 = div_3072_word_96(t1[96 + i], t1[96 + i - 1], div); + + sp_3072_mul_d_96(t2, d, r1); + t1[96 + i] += sp_3072_sub_in_place_96(&t1[i], t2); + t1[96 + i] -= t2[96]; + sp_3072_mask_96(t2, d, t1[96 + i]); + t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], t2); + sp_3072_mask_96(t2, d, t1[96 + i]); + t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], t2); + } + + r1 = sp_3072_cmp_96(t1, d) >= 0; + sp_3072_cond_sub_96(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_3072_mod_96(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_3072_div_96(a, m, NULL, r); +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_3072_div_96_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[192], t2[97]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[95]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 96); + for (i=95; i>=0; i--) { + r1 = div_3072_word_96(t1[96 + i], t1[96 + i - 1], div); + + sp_3072_mul_d_96(t2, d, r1); + t1[96 + i] += sp_3072_sub_in_place_96(&t1[i], t2); + t1[96 + i] -= t2[96]; + if (t1[96 + i] != 0) { + t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], d); + if (t1[96 + i] != 0) + t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], d); + } + } + + r1 = sp_3072_cmp_96(t1, d) >= 0; + sp_3072_cond_sub_96(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_3072_mod_96_cond(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_3072_div_96_cond(a, m, NULL, r); +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \ + defined(WOLFSSL_HAVE_SP_DH) +#ifdef WOLFSSL_SP_SMALL +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[16][192]; +#else + sp_digit* t[16]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 192, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<16; i++) { + t[i] = td + i * 192; + } +#endif + norm = t[0]; + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_96(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 96U); + if (reduceA != 0) { + err = sp_3072_mod_96(t[1] + 96, a, m); + if (err == MP_OKAY) { + err = sp_3072_mod_96(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 96, a, sizeof(sp_digit) * 96); + err = sp_3072_mod_96(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_3072_mont_sqr_96(t[ 2], t[ 1], m, mp); + sp_3072_mont_mul_96(t[ 3], t[ 2], t[ 1], m, mp); + sp_3072_mont_sqr_96(t[ 4], t[ 2], m, mp); + sp_3072_mont_mul_96(t[ 5], t[ 3], t[ 2], m, mp); + sp_3072_mont_sqr_96(t[ 6], t[ 3], m, mp); + sp_3072_mont_mul_96(t[ 7], t[ 4], t[ 3], m, mp); + sp_3072_mont_sqr_96(t[ 8], t[ 4], m, mp); + sp_3072_mont_mul_96(t[ 9], t[ 5], t[ 4], m, mp); + sp_3072_mont_sqr_96(t[10], t[ 5], m, mp); + sp_3072_mont_mul_96(t[11], t[ 6], t[ 5], m, mp); + sp_3072_mont_sqr_96(t[12], t[ 6], m, mp); + sp_3072_mont_mul_96(t[13], t[ 7], t[ 6], m, mp); + sp_3072_mont_sqr_96(t[14], t[ 7], m, mp); + sp_3072_mont_mul_96(t[15], t[ 8], t[ 7], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 4; + if (c == 32) { + c = 28; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 96); + for (; i>=0 || c>=4; ) { + if (c == 0) { + n = e[i--]; + y = n >> 28; + n <<= 4; + c = 28; + } + else if (c < 4) { + y = n >> 28; + n = e[i--]; + c = 4 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + } + + sp_3072_mont_sqr_96(r, r, m, mp); + sp_3072_mont_sqr_96(r, r, m, mp); + sp_3072_mont_sqr_96(r, r, m, mp); + sp_3072_mont_sqr_96(r, r, m, mp); + + sp_3072_mont_mul_96(r, r, t[y], m, mp); + } + + XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U); + sp_3072_mont_reduce_96(r, m, mp); + + mask = 0 - (sp_3072_cmp_96(r, m) >= 0); + sp_3072_cond_sub_96(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#else +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][192]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 192, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) { + t[i] = td + i * 192; + } +#endif + norm = t[0]; + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_96(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 96U); + if (reduceA != 0) { + err = sp_3072_mod_96(t[1] + 96, a, m); + if (err == MP_OKAY) { + err = sp_3072_mod_96(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 96, a, sizeof(sp_digit) * 96); + err = sp_3072_mod_96(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_3072_mont_sqr_96(t[ 2], t[ 1], m, mp); + sp_3072_mont_mul_96(t[ 3], t[ 2], t[ 1], m, mp); + sp_3072_mont_sqr_96(t[ 4], t[ 2], m, mp); + sp_3072_mont_mul_96(t[ 5], t[ 3], t[ 2], m, mp); + sp_3072_mont_sqr_96(t[ 6], t[ 3], m, mp); + sp_3072_mont_mul_96(t[ 7], t[ 4], t[ 3], m, mp); + sp_3072_mont_sqr_96(t[ 8], t[ 4], m, mp); + sp_3072_mont_mul_96(t[ 9], t[ 5], t[ 4], m, mp); + sp_3072_mont_sqr_96(t[10], t[ 5], m, mp); + sp_3072_mont_mul_96(t[11], t[ 6], t[ 5], m, mp); + sp_3072_mont_sqr_96(t[12], t[ 6], m, mp); + sp_3072_mont_mul_96(t[13], t[ 7], t[ 6], m, mp); + sp_3072_mont_sqr_96(t[14], t[ 7], m, mp); + sp_3072_mont_mul_96(t[15], t[ 8], t[ 7], m, mp); + sp_3072_mont_sqr_96(t[16], t[ 8], m, mp); + sp_3072_mont_mul_96(t[17], t[ 9], t[ 8], m, mp); + sp_3072_mont_sqr_96(t[18], t[ 9], m, mp); + sp_3072_mont_mul_96(t[19], t[10], t[ 9], m, mp); + sp_3072_mont_sqr_96(t[20], t[10], m, mp); + sp_3072_mont_mul_96(t[21], t[11], t[10], m, mp); + sp_3072_mont_sqr_96(t[22], t[11], m, mp); + sp_3072_mont_mul_96(t[23], t[12], t[11], m, mp); + sp_3072_mont_sqr_96(t[24], t[12], m, mp); + sp_3072_mont_mul_96(t[25], t[13], t[12], m, mp); + sp_3072_mont_sqr_96(t[26], t[13], m, mp); + sp_3072_mont_mul_96(t[27], t[14], t[13], m, mp); + sp_3072_mont_sqr_96(t[28], t[14], m, mp); + sp_3072_mont_mul_96(t[29], t[15], t[14], m, mp); + sp_3072_mont_sqr_96(t[30], t[15], m, mp); + sp_3072_mont_mul_96(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 5; + if (c == 32) { + c = 27; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 96); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 27; + n <<= 5; + c = 27; + } + else if (c < 5) { + y = n >> 27; + n = e[i--]; + c = 5 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_3072_mont_sqr_96(r, r, m, mp); + sp_3072_mont_sqr_96(r, r, m, mp); + sp_3072_mont_sqr_96(r, r, m, mp); + sp_3072_mont_sqr_96(r, r, m, mp); + sp_3072_mont_sqr_96(r, r, m, mp); + + sp_3072_mont_mul_96(r, r, t[y], m, mp); + } + + XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U); + sp_3072_mont_reduce_96(r, m, mp); + + mask = 0 - (sp_3072_cmp_96(r, m) >= 0); + sp_3072_cond_sub_96(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* WOLFSSL_SP_SMALL */ +#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */ + +#ifdef WOLFSSL_HAVE_SP_RSA +/* RSA public key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * em Public exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 384 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm, + byte* out, word32* outLen) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit a[192], m[96], r[192]; +#else + sp_digit* d = NULL; + sp_digit* a; + sp_digit* m; + sp_digit* r; +#endif + sp_digit *ah; + sp_digit e[1]; + int err = MP_OKAY; + + if (*outLen < 384) + err = MP_TO_E; + if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 384 || + mp_count_bits(mm) != 3072)) + err = MP_READ_E; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 96 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + a = d; + r = a + 96 * 2; + m = r + 96 * 2; + } +#endif + + if (err == MP_OKAY) { + ah = a + 96; + + sp_3072_from_bin(ah, 96, in, inLen); +#if DIGIT_BIT >= 32 + e[0] = em->dp[0]; +#else + e[0] = em->dp[0]; + if (em->used > 1) { + e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + } +#endif + if (e[0] == 0) { + err = MP_EXPTMOD_E; + } + } + if (err == MP_OKAY) { + sp_3072_from_mp(m, 96, mm); + + if (e[0] == 0x3) { + if (err == MP_OKAY) { + sp_3072_sqr_96(r, ah); + err = sp_3072_mod_96_cond(r, r, m); + } + if (err == MP_OKAY) { + sp_3072_mul_96(r, ah, r); + err = sp_3072_mod_96_cond(r, r, m); + } + } + else { + int i; + sp_digit mp; + + sp_3072_mont_setup(m, &mp); + + /* Convert to Montgomery form. */ + XMEMSET(a, 0, sizeof(sp_digit) * 96); + err = sp_3072_mod_96_cond(a, a, m); + + if (err == MP_OKAY) { + for (i = 31; i >= 0; i--) { + if (e[0] >> i) { + break; + } + } + + XMEMCPY(r, a, sizeof(sp_digit) * 96); + for (i--; i>=0; i--) { + sp_3072_mont_sqr_96(r, r, m, mp); + if (((e[0] >> i) & 1) == 1) { + sp_3072_mont_mul_96(r, r, a, m, mp); + } + } + XMEMSET(&r[96], 0, sizeof(sp_digit) * 96); + sp_3072_mont_reduce_96(r, m, mp); + + for (i = 95; i > 0; i--) { + if (r[i] != m[i]) { + break; + } + } + if (r[i] >= m[i]) { + sp_3072_sub_in_place_96(r, m); + } + } + } + } + + if (err == MP_OKAY) { + sp_3072_to_bin(r, out); + *outLen = 384; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } +#endif + + return err; +} + +#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) + sp_digit* a; + sp_digit* d = NULL; + sp_digit* m; + sp_digit* r; + int err = MP_OKAY; + + (void)pm; + (void)qm; + (void)dpm; + (void)dqm; + (void)qim; + + if (*outLen < 384U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(dm) > 3072) { + err = MP_READ_E; + } + if (inLen > 384) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 3072) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 96 * 4, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) { + err = MEMORY_E; + } + } + if (err == MP_OKAY) { + a = d + 96; + m = a + 192; + r = a; + + sp_3072_from_bin(a, 96, in, inLen); + sp_3072_from_mp(d, 96, dm); + sp_3072_from_mp(m, 96, mm); + err = sp_3072_mod_exp_96(r, a, d, 3072, m, 0); + } + if (err == MP_OKAY) { + sp_3072_to_bin(r, out); + *outLen = 384; + } + + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 96); + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +SP_NOINLINE static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r5, #192\n\t" + "mov r8, r5\n\t" + "mov r7, #0\n\t" + "1:\n\t" + "ldr r6, [%[b], r7]\n\t" + "and r6, %[m]\n\t" + "mov r5, #0\n\t" + "sub r5, #1\n\t" + "add r5, %[c]\n\t" + "ldr r5, [%[a], r7]\n\t" + "adc r5, r6\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + "str r5, [%[r], r7]\n\t" + "add r7, #4\n\t" + "cmp r7, r8\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r5", "r6", "r7", "r8" + ); + + return c; +} + +/* RSA private key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * pm First prime. + * qm Second prime. + * dpm First prime's CRT exponent. + * dqm Second prime's CRT exponent. + * qim Inverse of second prime mod p. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 384 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, + mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm, + byte* out, word32* outLen) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit a[96 * 2]; + sp_digit p[48], q[48], dp[48]; + sp_digit tmpa[96], tmpb[96]; +#else + sp_digit* t = NULL; + sp_digit* a; + sp_digit* p; + sp_digit* q; + sp_digit* dp; + sp_digit* tmpa; + sp_digit* tmpb; +#endif + sp_digit* r; + sp_digit* qi; + sp_digit* dq; + sp_digit c; + int err = MP_OKAY; + + (void)dm; + (void)mm; + + if (*outLen < 384) + err = MP_TO_E; + if (err == MP_OKAY && (inLen > 384 || mp_count_bits(mm) != 3072)) + err = MP_READ_E; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 48 * 11, NULL, + DYNAMIC_TYPE_RSA); + if (t == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + a = t; + p = a + 96 * 2; + q = p + 48; + qi = dq = dp = q + 48; + tmpa = qi + 48; + tmpb = tmpa + 96; + + r = t + 96; + } +#else +#endif + + if (err == MP_OKAY) { +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + r = a; + qi = dq = dp; +#endif + sp_3072_from_bin(a, 96, in, inLen); + sp_3072_from_mp(p, 48, pm); + sp_3072_from_mp(q, 48, qm); + sp_3072_from_mp(dp, 48, dpm); + + err = sp_3072_mod_exp_48(tmpa, a, dp, 1536, p, 1); + } + if (err == MP_OKAY) { + sp_3072_from_mp(dq, 48, dqm); + err = sp_3072_mod_exp_48(tmpb, a, dq, 1536, q, 1); + } + + if (err == MP_OKAY) { + c = sp_3072_sub_in_place_48(tmpa, tmpb); + c += sp_3072_cond_add_48(tmpa, tmpa, p, c); + sp_3072_cond_add_48(tmpa, tmpa, p, c); + + sp_3072_from_mp(qi, 48, qim); + sp_3072_mul_48(tmpa, tmpa, qi); + err = sp_3072_mod_48(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { + sp_3072_mul_48(tmpa, q, tmpa); + XMEMSET(&tmpb[48], 0, sizeof(sp_digit) * 48); + sp_3072_add_96(r, tmpb, tmpa); + + sp_3072_to_bin(r, out); + *outLen = 384; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_digit) * 48 * 11); + XFREE(t, NULL, DYNAMIC_TYPE_RSA); + } +#else + XMEMSET(tmpa, 0, sizeof(tmpa)); + XMEMSET(tmpb, 0, sizeof(tmpb)); + XMEMSET(p, 0, sizeof(p)); + XMEMSET(q, 0, sizeof(q)); + XMEMSET(dp, 0, sizeof(dp)); +#endif + + return err; +} +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ +#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */ +#endif /* WOLFSSL_HAVE_SP_RSA */ +#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY)) +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_3072_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (3072 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 32 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 96); + r->used = 96; + mp_clamp(r); +#elif DIGIT_BIT < 32 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 96; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 32) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 32 - s; + } + r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 96; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 32 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 32 - s; + } + else { + s += 32; + } + } + r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ + int err = MP_OKAY; + sp_digit b[192], e[96], m[96]; + sp_digit* r = b; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 3072) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 3072) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 3072) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_3072_from_mp(b, 96, base); + sp_3072_from_mp(e, 96, exp); + sp_3072_from_mp(m, 96, mod); + + err = sp_3072_mod_exp_96(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + err = sp_3072_to_mp(r, res); + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} + +#ifdef WOLFSSL_HAVE_SP_DH + +#ifdef HAVE_FFDHE_3072 +static void sp_3072_lshift_96(sp_digit* r, sp_digit* a, byte n) +{ + __asm__ __volatile__ ( + "mov r6, #31\n\t" + "sub r6, r6, %[n]\n\t" + "add %[a], %[a], #255\n\t" + "add %[r], %[r], #255\n\t" + "add %[a], %[a], #65\n\t" + "add %[r], %[r], #65\n\t" + "ldr r3, [%[a], #60]\n\t" + "lsr r4, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r4, r4, r6\n\t" + "ldr r2, [%[a], #56]\n\t" + "str r4, [%[r], #64]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #52]\n\t" + "str r3, [%[r], #60]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #48]\n\t" + "str r2, [%[r], #56]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #44]\n\t" + "str r4, [%[r], #52]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #40]\n\t" + "str r3, [%[r], #48]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #36]\n\t" + "str r2, [%[r], #44]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #32]\n\t" + "str r4, [%[r], #40]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #28]\n\t" + "str r3, [%[r], #36]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #24]\n\t" + "str r2, [%[r], #32]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #20]\n\t" + "str r4, [%[r], #28]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #16]\n\t" + "str r3, [%[r], #24]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #12]\n\t" + "str r2, [%[r], #20]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #8]\n\t" + "str r4, [%[r], #16]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #4]\n\t" + "str r3, [%[r], #12]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #0]\n\t" + "str r2, [%[r], #8]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r2, [%[a], #60]\n\t" + "str r4, [%[r], #68]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #56]\n\t" + "str r3, [%[r], #64]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #52]\n\t" + "str r2, [%[r], #60]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #48]\n\t" + "str r4, [%[r], #56]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #44]\n\t" + "str r3, [%[r], #52]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #40]\n\t" + "str r2, [%[r], #48]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #36]\n\t" + "str r4, [%[r], #44]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #32]\n\t" + "str r3, [%[r], #40]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #28]\n\t" + "str r2, [%[r], #36]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #24]\n\t" + "str r4, [%[r], #32]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #20]\n\t" + "str r3, [%[r], #28]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #16]\n\t" + "str r2, [%[r], #24]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #12]\n\t" + "str r4, [%[r], #20]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #8]\n\t" + "str r3, [%[r], #16]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #4]\n\t" + "str r2, [%[r], #12]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #0]\n\t" + "str r4, [%[r], #8]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r4, [%[a], #60]\n\t" + "str r3, [%[r], #68]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #56]\n\t" + "str r2, [%[r], #64]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #52]\n\t" + "str r4, [%[r], #60]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #48]\n\t" + "str r3, [%[r], #56]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #44]\n\t" + "str r2, [%[r], #52]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #40]\n\t" + "str r4, [%[r], #48]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #36]\n\t" + "str r3, [%[r], #44]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #32]\n\t" + "str r2, [%[r], #40]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #28]\n\t" + "str r4, [%[r], #36]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #24]\n\t" + "str r3, [%[r], #32]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #20]\n\t" + "str r2, [%[r], #28]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #16]\n\t" + "str r4, [%[r], #24]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #12]\n\t" + "str r3, [%[r], #20]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #8]\n\t" + "str r2, [%[r], #16]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #4]\n\t" + "str r4, [%[r], #12]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #0]\n\t" + "str r3, [%[r], #8]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r3, [%[a], #60]\n\t" + "str r2, [%[r], #68]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #56]\n\t" + "str r4, [%[r], #64]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #52]\n\t" + "str r3, [%[r], #60]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #48]\n\t" + "str r2, [%[r], #56]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #44]\n\t" + "str r4, [%[r], #52]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #40]\n\t" + "str r3, [%[r], #48]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #36]\n\t" + "str r2, [%[r], #44]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #32]\n\t" + "str r4, [%[r], #40]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #28]\n\t" + "str r3, [%[r], #36]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #24]\n\t" + "str r2, [%[r], #32]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #20]\n\t" + "str r4, [%[r], #28]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #16]\n\t" + "str r3, [%[r], #24]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #12]\n\t" + "str r2, [%[r], #20]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #8]\n\t" + "str r4, [%[r], #16]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #4]\n\t" + "str r3, [%[r], #12]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #0]\n\t" + "str r2, [%[r], #8]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r2, [%[a], #60]\n\t" + "str r4, [%[r], #68]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #56]\n\t" + "str r3, [%[r], #64]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #52]\n\t" + "str r2, [%[r], #60]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #48]\n\t" + "str r4, [%[r], #56]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #44]\n\t" + "str r3, [%[r], #52]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #40]\n\t" + "str r2, [%[r], #48]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #36]\n\t" + "str r4, [%[r], #44]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #32]\n\t" + "str r3, [%[r], #40]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #28]\n\t" + "str r2, [%[r], #36]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #24]\n\t" + "str r4, [%[r], #32]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #20]\n\t" + "str r3, [%[r], #28]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #16]\n\t" + "str r2, [%[r], #24]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #12]\n\t" + "str r4, [%[r], #20]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #8]\n\t" + "str r3, [%[r], #16]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #4]\n\t" + "str r2, [%[r], #12]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #0]\n\t" + "str r4, [%[r], #8]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r4, [%[a], #60]\n\t" + "str r3, [%[r], #68]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #56]\n\t" + "str r2, [%[r], #64]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #52]\n\t" + "str r4, [%[r], #60]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #48]\n\t" + "str r3, [%[r], #56]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #44]\n\t" + "str r2, [%[r], #52]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #40]\n\t" + "str r4, [%[r], #48]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #36]\n\t" + "str r3, [%[r], #44]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #32]\n\t" + "str r2, [%[r], #40]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #28]\n\t" + "str r4, [%[r], #36]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #24]\n\t" + "str r3, [%[r], #32]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #20]\n\t" + "str r2, [%[r], #28]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #16]\n\t" + "str r4, [%[r], #24]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #12]\n\t" + "str r3, [%[r], #20]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #8]\n\t" + "str r2, [%[r], #16]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #4]\n\t" + "str r4, [%[r], #12]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #0]\n\t" + "str r3, [%[r], #8]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "str r4, [%[r]]\n\t" + "str r2, [%[r], #4]\n\t" + : + : [r] "r" (r), [a] "r" (a), [n] "r" (n) + : "memory", "r2", "r3", "r4", "r5", "r6" + ); +} + +/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m) + * + * r A single precision number that is the result of the operation. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_3072_mod_exp_2_96(sp_digit* r, const sp_digit* e, int bits, + const sp_digit* m) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit nd[192]; + sp_digit td[97]; +#else + sp_digit* td; +#endif + sp_digit* norm; + sp_digit* tmp; + sp_digit mp = 1; + sp_digit n, o; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 289, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + norm = td; + tmp = td + 192; +#else + norm = nd; + tmp = td; +#endif + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_96(norm, m); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 5; + if (c == 32) { + c = 27; + } + y = (int)(n >> c); + n <<= 32 - c; + sp_3072_lshift_96(r, norm, y); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 27; + n <<= 5; + c = 27; + } + else if (c < 5) { + y = n >> 27; + n = e[i--]; + c = 5 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_3072_mont_sqr_96(r, r, m, mp); + sp_3072_mont_sqr_96(r, r, m, mp); + sp_3072_mont_sqr_96(r, r, m, mp); + sp_3072_mont_sqr_96(r, r, m, mp); + sp_3072_mont_sqr_96(r, r, m, mp); + + sp_3072_lshift_96(r, r, y); + sp_3072_mul_d_96(tmp, norm, r[96]); + r[96] = 0; + o = sp_3072_add_96(r, r, tmp); + sp_3072_cond_sub_96(r, r, m, (sp_digit)0 - o); + } + + XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U); + sp_3072_mont_reduce_96(r, m, mp); + + mask = 0 - (sp_3072_cmp_96(r, m) >= 0); + sp_3072_cond_sub_96(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* HAVE_FFDHE_3072 */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. + * exp Array of bytes that is the exponent. + * expLen Length of data, in bytes, in exponent. + * mod Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 384 bytes long. + * outLen Length, in bytes, of exponentiation result. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen, + mp_int* mod, byte* out, word32* outLen) +{ + int err = MP_OKAY; + sp_digit b[192], e[96], m[96]; + sp_digit* r = b; + word32 i; + + if (mp_count_bits(base) > 3072) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expLen > 384) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 3072) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_3072_from_mp(b, 96, base); + sp_3072_from_bin(e, 96, exp, expLen); + sp_3072_from_mp(m, 96, mod); + + #ifdef HAVE_FFDHE_3072 + if (base->used == 1 && base->dp[0] == 2 && m[95] == (sp_digit)-1) + err = sp_3072_mod_exp_2_96(r, e, expLen * 8, m); + else + #endif + err = sp_3072_mod_exp_96(r, b, e, expLen * 8, m, 0); + + } + + if (err == MP_OKAY) { + sp_3072_to_bin(r, out); + *outLen = 384; + for (i=0; i<384 && out[i] == 0; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} +#endif /* WOLFSSL_HAVE_SP_DH */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ + int err = MP_OKAY; + sp_digit b[96], e[48], m[48]; + sp_digit* r = b; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 1536) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 1536) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 1536) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_3072_from_mp(b, 48, base); + sp_3072_from_mp(e, 48, exp); + sp_3072_from_mp(m, 48, mod); + + err = sp_3072_mod_exp_48(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + XMEMSET(r + 48, 0, sizeof(*r) * 48U); + err = sp_3072_to_mp(r, res); + res->used = mod->used; + mp_clamp(res); + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} + +#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */ + +#endif /* !WOLFSSL_SP_NO_3072 */ + +#ifdef WOLFSSL_SP_4096 +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((sp_digit)a[i]) << s); + if (s >= 24U) { + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (sp_digit)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 32 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 32 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 32U) <= (word32)DIGIT_BIT) { + s += 32U; + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 32) { + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + s = 32 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 512 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_4096_to_bin(sp_digit* r, byte* a) +{ + int i, j, s = 0, b; + + j = 4096 / 8 - 1; + a[j] = 0; + for (i=0; i<128 && j>=0; i++) { + b = 0; + /* lint allow cast of mismatch sp_digit and int */ + a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ + b += 8 - s; + if (j < 0) { + break; + } + while (b < 32) { + a[j--] = (byte)(r[i] >> b); + b += 8; + if (j < 0) { + break; + } + } + s = 8 - (b - 32); + if (j >= 0) { + a[j] = 0; + } + if (s != 0) { + j++; + } + } +} + +#ifndef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_4096_add_64(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r7, #0\n\t" + "mvn r7, r7\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "add r4, r5\n\t" + "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[b], #48]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #52]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[b], #56]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #60]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[b], #64]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #68]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[b], #72]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #76]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[b], #80]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #84]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[b], #88]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #92]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[b], #96]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #100]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[b], #104]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #108]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[b], #112]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #116]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[b], #120]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #124]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #124]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + "add %[a], #0x80\n\t" + "add %[b], #0x80\n\t" + "add %[r], #0x80\n\t" + "add %[c], r7\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[b], #48]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #52]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[b], #56]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #60]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[b], #64]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #68]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[b], #72]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #76]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[b], #80]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #84]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[b], #88]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #92]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[b], #96]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #100]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[b], #104]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #108]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[b], #112]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #116]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[b], #120]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #124]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #124]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r7" + ); + + return c; +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldr r3, [%[a], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sub r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #0]\n\t" + "str r4, [%[a], #4]\n\t" + "ldr r3, [%[a], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r6, [%[b], #12]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #8]\n\t" + "str r4, [%[a], #12]\n\t" + "ldr r3, [%[a], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r6, [%[b], #20]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #16]\n\t" + "str r4, [%[a], #20]\n\t" + "ldr r3, [%[a], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r6, [%[b], #28]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #24]\n\t" + "str r4, [%[a], #28]\n\t" + "ldr r3, [%[a], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r6, [%[b], #36]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #32]\n\t" + "str r4, [%[a], #36]\n\t" + "ldr r3, [%[a], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r6, [%[b], #44]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #40]\n\t" + "str r4, [%[a], #44]\n\t" + "ldr r3, [%[a], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #48]\n\t" + "ldr r6, [%[b], #52]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #48]\n\t" + "str r4, [%[a], #52]\n\t" + "ldr r3, [%[a], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #56]\n\t" + "ldr r6, [%[b], #60]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #56]\n\t" + "str r4, [%[a], #60]\n\t" + "ldr r3, [%[a], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #64]\n\t" + "ldr r6, [%[b], #68]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #64]\n\t" + "str r4, [%[a], #68]\n\t" + "ldr r3, [%[a], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #72]\n\t" + "ldr r6, [%[b], #76]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #72]\n\t" + "str r4, [%[a], #76]\n\t" + "ldr r3, [%[a], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #80]\n\t" + "ldr r6, [%[b], #84]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #80]\n\t" + "str r4, [%[a], #84]\n\t" + "ldr r3, [%[a], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #88]\n\t" + "ldr r6, [%[b], #92]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #88]\n\t" + "str r4, [%[a], #92]\n\t" + "ldr r3, [%[a], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #96]\n\t" + "ldr r6, [%[b], #100]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #96]\n\t" + "str r4, [%[a], #100]\n\t" + "ldr r3, [%[a], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #104]\n\t" + "ldr r6, [%[b], #108]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #104]\n\t" + "str r4, [%[a], #108]\n\t" + "ldr r3, [%[a], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #112]\n\t" + "ldr r6, [%[b], #116]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #112]\n\t" + "str r4, [%[a], #116]\n\t" + "ldr r3, [%[a], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #120]\n\t" + "ldr r6, [%[b], #124]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #120]\n\t" + "str r4, [%[a], #124]\n\t" + "sbc %[c], %[c]\n\t" + "add %[a], #0x80\n\t" + "add %[b], #0x80\n\t" + "mov r5, #0\n\t" + "sub r5, %[c]\n\t" + "ldr r3, [%[a], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #0]\n\t" + "str r4, [%[a], #4]\n\t" + "ldr r3, [%[a], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r6, [%[b], #12]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #8]\n\t" + "str r4, [%[a], #12]\n\t" + "ldr r3, [%[a], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r6, [%[b], #20]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #16]\n\t" + "str r4, [%[a], #20]\n\t" + "ldr r3, [%[a], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r6, [%[b], #28]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #24]\n\t" + "str r4, [%[a], #28]\n\t" + "ldr r3, [%[a], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r6, [%[b], #36]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #32]\n\t" + "str r4, [%[a], #36]\n\t" + "ldr r3, [%[a], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r6, [%[b], #44]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #40]\n\t" + "str r4, [%[a], #44]\n\t" + "ldr r3, [%[a], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #48]\n\t" + "ldr r6, [%[b], #52]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #48]\n\t" + "str r4, [%[a], #52]\n\t" + "ldr r3, [%[a], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #56]\n\t" + "ldr r6, [%[b], #60]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #56]\n\t" + "str r4, [%[a], #60]\n\t" + "ldr r3, [%[a], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #64]\n\t" + "ldr r6, [%[b], #68]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #64]\n\t" + "str r4, [%[a], #68]\n\t" + "ldr r3, [%[a], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #72]\n\t" + "ldr r6, [%[b], #76]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #72]\n\t" + "str r4, [%[a], #76]\n\t" + "ldr r3, [%[a], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #80]\n\t" + "ldr r6, [%[b], #84]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #80]\n\t" + "str r4, [%[a], #84]\n\t" + "ldr r3, [%[a], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #88]\n\t" + "ldr r6, [%[b], #92]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #88]\n\t" + "str r4, [%[a], #92]\n\t" + "ldr r3, [%[a], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #96]\n\t" + "ldr r6, [%[b], #100]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #96]\n\t" + "str r4, [%[a], #100]\n\t" + "ldr r3, [%[a], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #104]\n\t" + "ldr r6, [%[b], #108]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #104]\n\t" + "str r4, [%[a], #108]\n\t" + "ldr r3, [%[a], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #112]\n\t" + "ldr r6, [%[b], #116]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #112]\n\t" + "str r4, [%[a], #116]\n\t" + "ldr r3, [%[a], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #120]\n\t" + "ldr r6, [%[b], #124]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #120]\n\t" + "str r4, [%[a], #124]\n\t" + "sbc %[c], %[c]\n\t" + "add %[a], #0x80\n\t" + "add %[b], #0x80\n\t" + "mov r5, #0\n\t" + "sub r5, %[c]\n\t" + "ldr r3, [%[a], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #0]\n\t" + "str r4, [%[a], #4]\n\t" + "ldr r3, [%[a], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r6, [%[b], #12]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #8]\n\t" + "str r4, [%[a], #12]\n\t" + "ldr r3, [%[a], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r6, [%[b], #20]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #16]\n\t" + "str r4, [%[a], #20]\n\t" + "ldr r3, [%[a], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r6, [%[b], #28]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #24]\n\t" + "str r4, [%[a], #28]\n\t" + "ldr r3, [%[a], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r6, [%[b], #36]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #32]\n\t" + "str r4, [%[a], #36]\n\t" + "ldr r3, [%[a], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r6, [%[b], #44]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #40]\n\t" + "str r4, [%[a], #44]\n\t" + "ldr r3, [%[a], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #48]\n\t" + "ldr r6, [%[b], #52]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #48]\n\t" + "str r4, [%[a], #52]\n\t" + "ldr r3, [%[a], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #56]\n\t" + "ldr r6, [%[b], #60]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #56]\n\t" + "str r4, [%[a], #60]\n\t" + "ldr r3, [%[a], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #64]\n\t" + "ldr r6, [%[b], #68]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #64]\n\t" + "str r4, [%[a], #68]\n\t" + "ldr r3, [%[a], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #72]\n\t" + "ldr r6, [%[b], #76]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #72]\n\t" + "str r4, [%[a], #76]\n\t" + "ldr r3, [%[a], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #80]\n\t" + "ldr r6, [%[b], #84]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #80]\n\t" + "str r4, [%[a], #84]\n\t" + "ldr r3, [%[a], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #88]\n\t" + "ldr r6, [%[b], #92]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #88]\n\t" + "str r4, [%[a], #92]\n\t" + "ldr r3, [%[a], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #96]\n\t" + "ldr r6, [%[b], #100]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #96]\n\t" + "str r4, [%[a], #100]\n\t" + "ldr r3, [%[a], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #104]\n\t" + "ldr r6, [%[b], #108]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #104]\n\t" + "str r4, [%[a], #108]\n\t" + "ldr r3, [%[a], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #112]\n\t" + "ldr r6, [%[b], #116]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #112]\n\t" + "str r4, [%[a], #116]\n\t" + "ldr r3, [%[a], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #120]\n\t" + "ldr r6, [%[b], #124]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #120]\n\t" + "str r4, [%[a], #124]\n\t" + "sbc %[c], %[c]\n\t" + "add %[a], #0x80\n\t" + "add %[b], #0x80\n\t" + "mov r5, #0\n\t" + "sub r5, %[c]\n\t" + "ldr r3, [%[a], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #0]\n\t" + "str r4, [%[a], #4]\n\t" + "ldr r3, [%[a], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r6, [%[b], #12]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #8]\n\t" + "str r4, [%[a], #12]\n\t" + "ldr r3, [%[a], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r6, [%[b], #20]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #16]\n\t" + "str r4, [%[a], #20]\n\t" + "ldr r3, [%[a], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r6, [%[b], #28]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #24]\n\t" + "str r4, [%[a], #28]\n\t" + "ldr r3, [%[a], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r6, [%[b], #36]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #32]\n\t" + "str r4, [%[a], #36]\n\t" + "ldr r3, [%[a], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r6, [%[b], #44]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #40]\n\t" + "str r4, [%[a], #44]\n\t" + "ldr r3, [%[a], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #48]\n\t" + "ldr r6, [%[b], #52]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #48]\n\t" + "str r4, [%[a], #52]\n\t" + "ldr r3, [%[a], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #56]\n\t" + "ldr r6, [%[b], #60]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #56]\n\t" + "str r4, [%[a], #60]\n\t" + "ldr r3, [%[a], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #64]\n\t" + "ldr r6, [%[b], #68]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #64]\n\t" + "str r4, [%[a], #68]\n\t" + "ldr r3, [%[a], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #72]\n\t" + "ldr r6, [%[b], #76]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #72]\n\t" + "str r4, [%[a], #76]\n\t" + "ldr r3, [%[a], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #80]\n\t" + "ldr r6, [%[b], #84]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #80]\n\t" + "str r4, [%[a], #84]\n\t" + "ldr r3, [%[a], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #88]\n\t" + "ldr r6, [%[b], #92]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #88]\n\t" + "str r4, [%[a], #92]\n\t" + "ldr r3, [%[a], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #96]\n\t" + "ldr r6, [%[b], #100]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #96]\n\t" + "str r4, [%[a], #100]\n\t" + "ldr r3, [%[a], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #104]\n\t" + "ldr r6, [%[b], #108]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #104]\n\t" + "str r4, [%[a], #108]\n\t" + "ldr r3, [%[a], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #112]\n\t" + "ldr r6, [%[b], #116]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #112]\n\t" + "str r4, [%[a], #116]\n\t" + "ldr r3, [%[a], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #120]\n\t" + "ldr r6, [%[b], #124]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #120]\n\t" + "str r4, [%[a], #124]\n\t" + "sbc %[c], %[c]\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6" + ); + + return c; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r7, #0\n\t" + "mvn r7, r7\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "add r4, r5\n\t" + "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[b], #48]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #52]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[b], #56]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #60]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[b], #64]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #68]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[b], #72]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #76]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[b], #80]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #84]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[b], #88]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #92]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[b], #96]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #100]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[b], #104]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #108]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[b], #112]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #116]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[b], #120]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #124]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #124]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + "add %[a], #0x80\n\t" + "add %[b], #0x80\n\t" + "add %[r], #0x80\n\t" + "add %[c], r7\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[b], #48]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #52]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[b], #56]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #60]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[b], #64]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #68]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[b], #72]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #76]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[b], #80]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #84]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[b], #88]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #92]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[b], #96]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #100]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[b], #104]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #108]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[b], #112]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #116]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[b], #120]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #124]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #124]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + "add %[a], #0x80\n\t" + "add %[b], #0x80\n\t" + "add %[r], #0x80\n\t" + "add %[c], r7\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[b], #48]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #52]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[b], #56]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #60]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[b], #64]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #68]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[b], #72]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #76]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[b], #80]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #84]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[b], #88]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #92]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[b], #96]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #100]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[b], #104]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #108]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[b], #112]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #116]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[b], #120]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #124]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #124]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + "add %[a], #0x80\n\t" + "add %[b], #0x80\n\t" + "add %[r], #0x80\n\t" + "add %[c], r7\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[b], #48]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #52]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[b], #56]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #60]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[b], #64]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #68]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[b], #72]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #76]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[b], #80]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #84]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[b], #88]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #92]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[b], #96]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #100]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[b], #104]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #108]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[b], #112]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #116]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[b], #120]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #124]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #124]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r7" + ); + + return c; +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_64(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit tmp[64 * 2]; + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r8, r3\n\t" + "mov r11, %[r]\n\t" + "mov r9, %[a]\n\t" + "mov r10, %[b]\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r9\n\t" + "mov r12, r6\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r5, #0\n\t" + "mov r6, #252\n\t" + "mov %[a], r8\n\t" + "sub %[a], r6\n\t" + "sbc r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], r6\n\t" + "mov %[b], r8\n\t" + "sub %[b], %[a]\n\t" + "add %[a], r9\n\t" + "add %[b], r10\n\t" + "\n2:\n\t" + "# Multiply Start\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[b]]\n\t" + "lsl r6, r6, #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [%[b]]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[b]]\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [%[b]]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Multiply Done\n\t" + "add %[a], #4\n\t" + "sub %[b], #4\n\t" + "cmp %[a], r12\n\t" + "beq 3f\n\t" + "mov r6, r8\n\t" + "add r6, r9\n\t" + "cmp %[a], r6\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r11\n\t" + "mov r7, r8\n\t" + "str r3, [%[r], r7]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "add r7, #4\n\t" + "mov r8, r7\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, #248\n\t" + "cmp r7, r6\n\t" + "ble 1b\n\t" + "str r3, [%[r], r7]\n\t" + "mov %[a], r9\n\t" + "mov %[b], r10\n\t" + : + : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_4096_mask_64(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<64; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 64; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[128]; + sp_digit a1[64]; + sp_digit b1[64]; + sp_digit z2[128]; + sp_digit u, ca, cb; + + ca = sp_2048_add_64(a1, a, &a[64]); + cb = sp_2048_add_64(b1, b, &b[64]); + u = ca & cb; + sp_2048_mul_64(z1, a1, b1); + sp_2048_mul_64(z2, &a[64], &b[64]); + sp_2048_mul_64(z0, a, b); + sp_2048_mask_64(r + 128, a1, 0 - cb); + sp_2048_mask_64(b1, b1, 0 - ca); + u += sp_2048_add_64(r + 128, r + 128, b1); + u += sp_4096_sub_in_place_128(z1, z2); + u += sp_4096_sub_in_place_128(z1, z0); + u += sp_4096_add_128(r + 64, r + 64, z1); + r[192] = u; + XMEMSET(r + 192 + 1, 0, sizeof(sp_digit) * (64 - 1)); + (void)sp_4096_add_128(r + 128, r + 128, z2); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_64(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r5, #0\n\t" + "mov r8, r3\n\t" + "mov r11, %[r]\n\t" + "mov r6, #2\n\t" + "lsl r6, r6, #8\n\t" + "neg r6, r6\n\t" + "add sp, r6\n\t" + "mov r10, sp\n\t" + "mov r9, %[a]\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r6, #252\n\t" + "mov %[a], r8\n\t" + "sub %[a], r6\n\t" + "sbc r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], r6\n\t" + "mov r2, r8\n\t" + "sub r2, %[a]\n\t" + "add %[a], r9\n\t" + "add r2, r9\n\t" + "\n2:\n\t" + "cmp r2, %[a]\n\t" + "beq 4f\n\t" + "# Multiply * 2: Start\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [r2]\n\t" + "lsl r6, r6, #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [r2]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [r2]\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [r2]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Multiply * 2: Done\n\t" + "bal 5f\n\t" + "\n4:\n\t" + "# Square: Start\n\t" + "ldr r6, [%[a]]\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r6, r6\n\t" + "add r3, r6\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "mul r7, r7\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #15\n\t" + "lsl r6, r6, #17\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Square: Done\n\t" + "\n5:\n\t" + "add %[a], #4\n\t" + "sub r2, #4\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r9\n\t" + "cmp %[a], r6\n\t" + "beq 3f\n\t" + "cmp %[a], r2\n\t" + "bgt 3f\n\t" + "mov r7, r8\n\t" + "add r7, r9\n\t" + "cmp %[a], r7\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r10\n\t" + "mov r7, r8\n\t" + "str r3, [%[r], r7]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "add r7, #4\n\t" + "mov r8, r7\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, #248\n\t" + "cmp r7, r6\n\t" + "ble 1b\n\t" + "mov %[a], r9\n\t" + "str r3, [%[r], r7]\n\t" + "mov %[r], r11\n\t" + "mov %[a], r10\n\t" + "mov r3, #1\n\t" + "lsl r3, r3, #8\n\t" + "add r3, #252\n\t" + "\n4:\n\t" + "ldr r6, [%[a], r3]\n\t" + "str r6, [%[r], r3]\n\t" + "sub r3, #4\n\t" + "bge 4b\n\t" + "mov r6, #2\n\t" + "lsl r6, r6, #8\n\t" + "add sp, r6\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + ); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z2[128]; + sp_digit z1[128]; + sp_digit a1[64]; + sp_digit u; + + u = sp_2048_add_64(a1, a, &a[64]); + sp_2048_sqr_64(z1, a1); + sp_2048_sqr_64(z2, &a[64]); + sp_2048_sqr_64(z0, a); + sp_2048_mask_64(r + 128, a1, 0 - u); + u += sp_2048_add_64(r + 128, r + 128, r + 128); + u += sp_4096_sub_in_place_128(z1, z2); + u += sp_4096_sub_in_place_128(z1, z0); + u += sp_4096_add_128(r + 64, r + 64, z1); + r[192] = u; + XMEMSET(r + 192 + 1, 0, sizeof(sp_digit) * (64 - 1)); + (void)sp_4096_add_128(r + 128, r + 128, z2); +} + +#endif /* !WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r6, %[a]\n\t" + "mov r7, #0\n\t" + "mov r4, #2\n\t" + "lsl r4, #8\n\t" + "sub r7, #1\n\t" + "add r6, r4\n\t" + "\n1:\n\t" + "add %[c], r7\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r]]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + "add %[a], #4\n\t" + "add %[b], #4\n\t" + "add %[r], #4\n\t" + "cmp %[a], r6\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into a. (a -= b) + * + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + __asm__ __volatile__ ( + "mov r7, %[a]\n\t" + "mov r5, #2\n\t" + "lsl r5, #8\n\t" + "add r7, r5\n\t" + "\n1:\n\t" + "mov r5, #0\n\t" + "sub r5, %[c]\n\t" + "ldr r3, [%[a]]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b]]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a]]\n\t" + "str r4, [%[a], #4]\n\t" + "sbc %[c], %[c]\n\t" + "add %[a], #8\n\t" + "add %[b], #8\n\t" + "cmp %[a], r7\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit tmp[128 * 2]; + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r8, r3\n\t" + "mov r11, %[r]\n\t" + "mov r9, %[a]\n\t" + "mov r10, %[b]\n\t" + "mov r6, #2\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r9\n\t" + "mov r12, r6\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r5, #0\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, #252\n\t" + "mov %[a], r8\n\t" + "sub %[a], r6\n\t" + "sbc r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], r6\n\t" + "mov %[b], r8\n\t" + "sub %[b], %[a]\n\t" + "add %[a], r9\n\t" + "add %[b], r10\n\t" + "\n2:\n\t" + "# Multiply Start\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[b]]\n\t" + "lsl r6, r6, #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [%[b]]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[b]]\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [%[b]]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Multiply Done\n\t" + "add %[a], #4\n\t" + "sub %[b], #4\n\t" + "cmp %[a], r12\n\t" + "beq 3f\n\t" + "mov r6, r8\n\t" + "add r6, r9\n\t" + "cmp %[a], r6\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r11\n\t" + "mov r7, r8\n\t" + "str r3, [%[r], r7]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "add r7, #4\n\t" + "mov r8, r7\n\t" + "mov r6, #3\n\t" + "lsl r6, r6, #8\n\t" + "add r6, #248\n\t" + "cmp r7, r6\n\t" + "ble 1b\n\t" + "str r3, [%[r], r7]\n\t" + "mov %[a], r9\n\t" + "mov %[b], r10\n\t" + : + : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r5, #0\n\t" + "mov r8, r3\n\t" + "mov r11, %[r]\n\t" + "mov r6, #4\n\t" + "lsl r6, r6, #8\n\t" + "neg r6, r6\n\t" + "add sp, r6\n\t" + "mov r10, sp\n\t" + "mov r9, %[a]\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, #252\n\t" + "mov %[a], r8\n\t" + "sub %[a], r6\n\t" + "sbc r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], r6\n\t" + "mov r2, r8\n\t" + "sub r2, %[a]\n\t" + "add %[a], r9\n\t" + "add r2, r9\n\t" + "\n2:\n\t" + "cmp r2, %[a]\n\t" + "beq 4f\n\t" + "# Multiply * 2: Start\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [r2]\n\t" + "lsl r6, r6, #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [r2]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [r2]\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [r2]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Multiply * 2: Done\n\t" + "bal 5f\n\t" + "\n4:\n\t" + "# Square: Start\n\t" + "ldr r6, [%[a]]\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r6, r6\n\t" + "add r3, r6\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "mul r7, r7\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #15\n\t" + "lsl r6, r6, #17\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Square: Done\n\t" + "\n5:\n\t" + "add %[a], #4\n\t" + "sub r2, #4\n\t" + "mov r6, #2\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r9\n\t" + "cmp %[a], r6\n\t" + "beq 3f\n\t" + "cmp %[a], r2\n\t" + "bgt 3f\n\t" + "mov r7, r8\n\t" + "add r7, r9\n\t" + "cmp %[a], r7\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r10\n\t" + "mov r7, r8\n\t" + "str r3, [%[r], r7]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "add r7, #4\n\t" + "mov r8, r7\n\t" + "mov r6, #3\n\t" + "lsl r6, r6, #8\n\t" + "add r6, #248\n\t" + "cmp r7, r6\n\t" + "ble 1b\n\t" + "mov %[a], r9\n\t" + "str r3, [%[r], r7]\n\t" + "mov %[r], r11\n\t" + "mov %[a], r10\n\t" + "mov r3, #3\n\t" + "lsl r3, r3, #8\n\t" + "add r3, #252\n\t" + "\n4:\n\t" + "ldr r6, [%[a], r3]\n\t" + "str r6, [%[r], r3]\n\t" + "sub r3, #4\n\t" + "bge 4b\n\t" + "mov r6, #4\n\t" + "lsl r6, r6, #8\n\t" + "add sp, r6\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Caclulate the bottom digit of -1/a mod 2^n. + * + * a A single precision number. + * rho Bottom word of inverse. + */ +static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho) +{ + sp_digit x, b; + + b = a[0]; + x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - b * x; /* here x*a==1 mod 2**8 */ + x *= 2 - b * x; /* here x*a==1 mod 2**16 */ + x *= 2 - b * x; /* here x*a==1 mod 2**32 */ + + /* rho = -1/m mod b */ + *rho = -x; +} + +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +SP_NOINLINE static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a, + sp_digit b) +{ + __asm__ __volatile__ ( + "mov r6, #2\n\t" + "lsl r6, r6, #8\n\t" + "add r6, %[a]\n\t" + "mov r8, %[r]\n\t" + "mov r9, r6\n\t" + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "1:\n\t" + "mov %[r], #0\n\t" + "mov r5, #0\n\t" + "# A[] * B\n\t" + "ldr r6, [%[a]]\n\t" + "lsl r6, r6, #16\n\t" + "lsl r7, %[b], #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "lsr r7, %[b], #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, %[b], #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "lsl r7, %[b], #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# A[] * B - Done\n\t" + "mov %[r], r8\n\t" + "str r3, [%[r]]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "add %[r], #4\n\t" + "add %[a], #4\n\t" + "mov r8, %[r]\n\t" + "cmp %[a], r9\n\t" + "blt 1b\n\t" + "str r3, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a) + : [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + ); +} + +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 4096 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_4096_mont_norm_128(sp_digit* r, const sp_digit* m) +{ + XMEMSET(r, 0, sizeof(sp_digit) * 128); + + /* r = 2^n mod m */ + sp_4096_sub_in_place_128(r, m); +} + +#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */ +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +SP_NOINLINE static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, + const sp_digit* b, sp_digit m) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r5, #2\n\t" + "lsl r5, r5, #8\n\t" + "mov r8, r5\n\t" + "mov r7, #0\n\t" + "1:\n\t" + "ldr r6, [%[b], r7]\n\t" + "and r6, %[m]\n\t" + "mov r5, #0\n\t" + "sub r5, %[c]\n\t" + "ldr r5, [%[a], r7]\n\t" + "sbc r5, r6\n\t" + "sbc %[c], %[c]\n\t" + "str r5, [%[r], r7]\n\t" + "add r7, #4\n\t" + "cmp r7, r8\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r5", "r6", "r7", "r8" + ); + + return c; +} + +/* Reduce the number back to 4096 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_digit ca = 0; + + __asm__ __volatile__ ( + "mov r8, %[mp]\n\t" + "mov r12, %[ca]\n\t" + "mov r14, %[m]\n\t" + "mov r9, %[a]\n\t" + "mov r4, #0\n\t" + "# i = 0\n\t" + "mov r11, r4\n\t" + "\n1:\n\t" + "mov r5, #0\n\t" + "mov %[ca], #0\n\t" + "# mu = a[i] * mp\n\t" + "mov %[mp], r8\n\t" + "ldr %[a], [%[a]]\n\t" + "mul %[mp], %[a]\n\t" + "mov %[m], r14\n\t" + "mov r10, r9\n\t" + "\n2:\n\t" + "# a[i+j] += m[j] * mu\n\t" + "mov %[a], r10\n\t" + "ldr %[a], [%[a]]\n\t" + "mov %[ca], #0\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "# Multiply m[j] and mu - Start\n\t" + "ldr r7, [%[m]]\n\t" + "lsl r6, %[mp], #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add %[a], r7\n\t" + "adc r5, %[ca]\n\t" + "ldr r7, [%[m]]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add %[a], r6\n\t" + "adc r5, r7\n\t" + "ldr r7, [%[m]]\n\t" + "lsr r6, %[mp], #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r5, r7\n\t" + "ldr r7, [%[m]]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add %[a], r6\n\t" + "adc r5, r7\n\t" + "# Multiply m[j] and mu - Done\n\t" + "add r4, %[a]\n\t" + "adc r5, %[ca]\n\t" + "mov %[a], r10\n\t" + "str r4, [%[a]]\n\t" + "mov r6, #4\n\t" + "add %[m], #4\n\t" + "add r10, r6\n\t" + "mov r4, #1\n\t" + "lsl r4, r4, #8\n\t" + "add r4, #252\n\t" + "add r4, r9\n\t" + "cmp r10, r4\n\t" + "blt 2b\n\t" + "# a[i+127] += m[127] * mu\n\t" + "mov %[ca], #0\n\t" + "mov r4, r12\n\t" + "mov %[a], #0\n\t" + "# Multiply m[127] and mu - Start\n\t" + "ldr r7, [%[m]]\n\t" + "lsl r6, %[mp], #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r5, r7\n\t" + "adc r4, %[ca]\n\t" + "adc %[a], %[ca]\n\t" + "ldr r7, [%[m]]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r5, r6\n\t" + "adc r4, r7\n\t" + "adc %[a], %[ca]\n\t" + "ldr r7, [%[m]]\n\t" + "lsr r6, %[mp], #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc %[a], %[ca]\n\t" + "ldr r7, [%[m]]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r5, r6\n\t" + "adc r4, r7\n\t" + "adc %[a], %[ca]\n\t" + "# Multiply m[127] and mu - Done\n\t" + "mov %[ca], %[a]\n\t" + "mov %[a], r10\n\t" + "ldr r7, [%[a], #4]\n\t" + "ldr %[a], [%[a]]\n\t" + "mov r6, #0\n\t" + "add r5, %[a]\n\t" + "adc r7, r4\n\t" + "adc %[ca], r6\n\t" + "mov %[a], r10\n\t" + "str r5, [%[a]]\n\t" + "str r7, [%[a], #4]\n\t" + "# i += 1\n\t" + "mov r6, #4\n\t" + "add r9, r6\n\t" + "add r11, r6\n\t" + "mov r12, %[ca]\n\t" + "mov %[a], r9\n\t" + "mov r4, #2\n\t" + "lsl r4, r4, #8\n\t" + "cmp r11, r4\n\t" + "blt 1b\n\t" + "mov %[m], r14\n\t" + : [ca] "+r" (ca), [a] "+r" (a) + : [m] "r" (m), [mp] "r" (mp) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + ); + + sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - ca); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_mul_128(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_4096_mul_128(r, a, b); + sp_4096_mont_reduce_128(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_sqr_128(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_4096_sqr_128(r, a); + sp_4096_mont_reduce_128(r, m, mp); +} + +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +SP_NOINLINE static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, + sp_digit div) +{ + sp_digit r = 0; + + __asm__ __volatile__ ( + "lsr r5, %[div], #1\n\t" + "add r5, #1\n\t" + "mov r8, %[d0]\n\t" + "mov r9, %[d1]\n\t" + "# Do top 32\n\t" + "mov r6, r5\n\t" + "sub r6, %[d1]\n\t" + "sbc r6, r6\n\t" + "add %[r], %[r]\n\t" + "sub %[r], r6\n\t" + "and r6, r5\n\t" + "sub %[d1], r6\n\t" + "# Next 30 bits\n\t" + "mov r4, #29\n\t" + "1:\n\t" + "lsl %[d0], %[d0], #1\n\t" + "adc %[d1], %[d1]\n\t" + "mov r6, r5\n\t" + "sub r6, %[d1]\n\t" + "sbc r6, r6\n\t" + "add %[r], %[r]\n\t" + "sub %[r], r6\n\t" + "and r6, r5\n\t" + "sub %[d1], r6\n\t" + "sub r4, #1\n\t" + "bpl 1b\n\t" + "mov r7, #0\n\t" + "add %[r], %[r]\n\t" + "add %[r], #1\n\t" + "# r * div - Start\n\t" + "lsl %[d1], %[r], #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr %[d1], %[d1], #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, %[d1]\n\t" + "lsr r6, %[div], #16\n\t" + "mul %[d1], r6\n\t" + "lsr r5, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r7\n\t" + "lsr %[d1], %[r], #16\n\t" + "mul r6, %[d1]\n\t" + "add r5, r6\n\t" + "lsl r6, %[div], #16\n\t" + "lsr r6, r6, #16\n\t" + "mul %[d1], r6\n\t" + "lsr r6, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r6\n\t" + "# r * div - Done\n\t" + "mov %[d1], r8\n\t" + "sub %[d1], r4\n\t" + "mov r4, %[d1]\n\t" + "mov %[d1], r9\n\t" + "sbc %[d1], r5\n\t" + "mov r5, %[d1]\n\t" + "add %[r], r5\n\t" + "# r * div - Start\n\t" + "lsl %[d1], %[r], #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr %[d1], %[d1], #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, %[d1]\n\t" + "lsr r6, %[div], #16\n\t" + "mul %[d1], r6\n\t" + "lsr r5, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r7\n\t" + "lsr %[d1], %[r], #16\n\t" + "mul r6, %[d1]\n\t" + "add r5, r6\n\t" + "lsl r6, %[div], #16\n\t" + "lsr r6, r6, #16\n\t" + "mul %[d1], r6\n\t" + "lsr r6, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r6\n\t" + "# r * div - Done\n\t" + "mov %[d1], r8\n\t" + "mov r6, r9\n\t" + "sub r4, %[d1], r4\n\t" + "sbc r6, r5\n\t" + "mov r5, r6\n\t" + "add %[r], r5\n\t" + "# r * div - Start\n\t" + "lsl %[d1], %[r], #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr %[d1], %[d1], #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, %[d1]\n\t" + "lsr r6, %[div], #16\n\t" + "mul %[d1], r6\n\t" + "lsr r5, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r7\n\t" + "lsr %[d1], %[r], #16\n\t" + "mul r6, %[d1]\n\t" + "add r5, r6\n\t" + "lsl r6, %[div], #16\n\t" + "lsr r6, r6, #16\n\t" + "mul %[d1], r6\n\t" + "lsr r6, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r6\n\t" + "# r * div - Done\n\t" + "mov %[d1], r8\n\t" + "mov r6, r9\n\t" + "sub r4, %[d1], r4\n\t" + "sbc r6, r5\n\t" + "mov r5, r6\n\t" + "add %[r], r5\n\t" + "mov r6, %[div]\n\t" + "sub r6, r4\n\t" + "sbc r6, r6\n\t" + "sub %[r], r6\n\t" + : [r] "+r" (r) + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) + : "r4", "r5", "r7", "r6", "r8", "r9" + ); + return r; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_4096_mask_128(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<128; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 128; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +SP_NOINLINE static int32_t sp_4096_cmp_128(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; + + + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mvn r3, r3\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, #252\n\t" + "1:\n\t" + "ldr r7, [%[a], r6]\n\t" + "ldr r5, [%[b], r6]\n\t" + "and r7, r3\n\t" + "and r5, r3\n\t" + "mov r4, r7\n\t" + "sub r7, r5\n\t" + "sbc r7, r7\n\t" + "add %[r], r7\n\t" + "mvn r7, r7\n\t" + "and r3, r7\n\t" + "sub r5, r4\n\t" + "sbc r7, r7\n\t" + "sub %[r], r7\n\t" + "mvn r7, r7\n\t" + "and r3, r7\n\t" + "sub r6, #4\n\t" + "cmp r6, #0\n\t" + "bge 1b\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "r3", "r4", "r5", "r6", "r7" + ); + + return r; +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_div_128(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[256], t2[129]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[127]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 128); + for (i=127; i>=0; i--) { + r1 = div_4096_word_128(t1[128 + i], t1[128 + i - 1], div); + + sp_4096_mul_d_128(t2, d, r1); + t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2); + t1[128 + i] -= t2[128]; + sp_4096_mask_128(t2, d, t1[128 + i]); + t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], t2); + sp_4096_mask_128(t2, d, t1[128 + i]); + t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], t2); + } + + r1 = sp_4096_cmp_128(t1, d) >= 0; + sp_4096_cond_sub_128(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_mod_128(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_4096_div_128(a, m, NULL, r); +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_div_128_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[256], t2[129]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[127]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 128); + for (i=127; i>=0; i--) { + r1 = div_4096_word_128(t1[128 + i], t1[128 + i - 1], div); + + sp_4096_mul_d_128(t2, d, r1); + t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2); + t1[128 + i] -= t2[128]; + if (t1[128 + i] != 0) { + t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], d); + if (t1[128 + i] != 0) + t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], d); + } + } + + r1 = sp_4096_cmp_128(t1, d) >= 0; + sp_4096_cond_sub_128(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_mod_128_cond(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_4096_div_128_cond(a, m, NULL, r); +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \ + defined(WOLFSSL_HAVE_SP_DH) +#ifdef WOLFSSL_SP_SMALL +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[16][256]; +#else + sp_digit* t[16]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 256, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<16; i++) { + t[i] = td + i * 256; + } +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_128(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 128U); + if (reduceA != 0) { + err = sp_4096_mod_128(t[1] + 128, a, m); + if (err == MP_OKAY) { + err = sp_4096_mod_128(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 128, a, sizeof(sp_digit) * 128); + err = sp_4096_mod_128(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_4096_mont_sqr_128(t[ 2], t[ 1], m, mp); + sp_4096_mont_mul_128(t[ 3], t[ 2], t[ 1], m, mp); + sp_4096_mont_sqr_128(t[ 4], t[ 2], m, mp); + sp_4096_mont_mul_128(t[ 5], t[ 3], t[ 2], m, mp); + sp_4096_mont_sqr_128(t[ 6], t[ 3], m, mp); + sp_4096_mont_mul_128(t[ 7], t[ 4], t[ 3], m, mp); + sp_4096_mont_sqr_128(t[ 8], t[ 4], m, mp); + sp_4096_mont_mul_128(t[ 9], t[ 5], t[ 4], m, mp); + sp_4096_mont_sqr_128(t[10], t[ 5], m, mp); + sp_4096_mont_mul_128(t[11], t[ 6], t[ 5], m, mp); + sp_4096_mont_sqr_128(t[12], t[ 6], m, mp); + sp_4096_mont_mul_128(t[13], t[ 7], t[ 6], m, mp); + sp_4096_mont_sqr_128(t[14], t[ 7], m, mp); + sp_4096_mont_mul_128(t[15], t[ 8], t[ 7], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 4; + if (c == 32) { + c = 28; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 128); + for (; i>=0 || c>=4; ) { + if (c == 0) { + n = e[i--]; + y = n >> 28; + n <<= 4; + c = 28; + } + else if (c < 4) { + y = n >> 28; + n = e[i--]; + c = 4 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + } + + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + + sp_4096_mont_mul_128(r, r, t[y], m, mp); + } + + XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U); + sp_4096_mont_reduce_128(r, m, mp); + + mask = 0 - (sp_4096_cmp_128(r, m) >= 0); + sp_4096_cond_sub_128(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#else +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][256]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 256, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) { + t[i] = td + i * 256; + } +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_128(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 128U); + if (reduceA != 0) { + err = sp_4096_mod_128(t[1] + 128, a, m); + if (err == MP_OKAY) { + err = sp_4096_mod_128(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 128, a, sizeof(sp_digit) * 128); + err = sp_4096_mod_128(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_4096_mont_sqr_128(t[ 2], t[ 1], m, mp); + sp_4096_mont_mul_128(t[ 3], t[ 2], t[ 1], m, mp); + sp_4096_mont_sqr_128(t[ 4], t[ 2], m, mp); + sp_4096_mont_mul_128(t[ 5], t[ 3], t[ 2], m, mp); + sp_4096_mont_sqr_128(t[ 6], t[ 3], m, mp); + sp_4096_mont_mul_128(t[ 7], t[ 4], t[ 3], m, mp); + sp_4096_mont_sqr_128(t[ 8], t[ 4], m, mp); + sp_4096_mont_mul_128(t[ 9], t[ 5], t[ 4], m, mp); + sp_4096_mont_sqr_128(t[10], t[ 5], m, mp); + sp_4096_mont_mul_128(t[11], t[ 6], t[ 5], m, mp); + sp_4096_mont_sqr_128(t[12], t[ 6], m, mp); + sp_4096_mont_mul_128(t[13], t[ 7], t[ 6], m, mp); + sp_4096_mont_sqr_128(t[14], t[ 7], m, mp); + sp_4096_mont_mul_128(t[15], t[ 8], t[ 7], m, mp); + sp_4096_mont_sqr_128(t[16], t[ 8], m, mp); + sp_4096_mont_mul_128(t[17], t[ 9], t[ 8], m, mp); + sp_4096_mont_sqr_128(t[18], t[ 9], m, mp); + sp_4096_mont_mul_128(t[19], t[10], t[ 9], m, mp); + sp_4096_mont_sqr_128(t[20], t[10], m, mp); + sp_4096_mont_mul_128(t[21], t[11], t[10], m, mp); + sp_4096_mont_sqr_128(t[22], t[11], m, mp); + sp_4096_mont_mul_128(t[23], t[12], t[11], m, mp); + sp_4096_mont_sqr_128(t[24], t[12], m, mp); + sp_4096_mont_mul_128(t[25], t[13], t[12], m, mp); + sp_4096_mont_sqr_128(t[26], t[13], m, mp); + sp_4096_mont_mul_128(t[27], t[14], t[13], m, mp); + sp_4096_mont_sqr_128(t[28], t[14], m, mp); + sp_4096_mont_mul_128(t[29], t[15], t[14], m, mp); + sp_4096_mont_sqr_128(t[30], t[15], m, mp); + sp_4096_mont_mul_128(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 5; + if (c == 32) { + c = 27; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 128); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 27; + n <<= 5; + c = 27; + } + else if (c < 5) { + y = n >> 27; + n = e[i--]; + c = 5 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + + sp_4096_mont_mul_128(r, r, t[y], m, mp); + } + + XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U); + sp_4096_mont_reduce_128(r, m, mp); + + mask = 0 - (sp_4096_cmp_128(r, m) >= 0); + sp_4096_cond_sub_128(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* WOLFSSL_SP_SMALL */ +#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */ + +#ifdef WOLFSSL_HAVE_SP_RSA +/* RSA public key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * em Public exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm, + byte* out, word32* outLen) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit a[256], m[128], r[256]; +#else + sp_digit* d = NULL; + sp_digit* a; + sp_digit* m; + sp_digit* r; +#endif + sp_digit *ah; + sp_digit e[1]; + int err = MP_OKAY; + + if (*outLen < 512) + err = MP_TO_E; + if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 512 || + mp_count_bits(mm) != 4096)) + err = MP_READ_E; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 128 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + a = d; + r = a + 128 * 2; + m = r + 128 * 2; + } +#endif + + if (err == MP_OKAY) { + ah = a + 128; + + sp_4096_from_bin(ah, 128, in, inLen); +#if DIGIT_BIT >= 32 + e[0] = em->dp[0]; +#else + e[0] = em->dp[0]; + if (em->used > 1) { + e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + } +#endif + if (e[0] == 0) { + err = MP_EXPTMOD_E; + } + } + if (err == MP_OKAY) { + sp_4096_from_mp(m, 128, mm); + + if (e[0] == 0x3) { + if (err == MP_OKAY) { + sp_4096_sqr_128(r, ah); + err = sp_4096_mod_128_cond(r, r, m); + } + if (err == MP_OKAY) { + sp_4096_mul_128(r, ah, r); + err = sp_4096_mod_128_cond(r, r, m); + } + } + else { + int i; + sp_digit mp; + + sp_4096_mont_setup(m, &mp); + + /* Convert to Montgomery form. */ + XMEMSET(a, 0, sizeof(sp_digit) * 128); + err = sp_4096_mod_128_cond(a, a, m); + + if (err == MP_OKAY) { + for (i = 31; i >= 0; i--) { + if (e[0] >> i) { + break; + } + } + + XMEMCPY(r, a, sizeof(sp_digit) * 128); + for (i--; i>=0; i--) { + sp_4096_mont_sqr_128(r, r, m, mp); + if (((e[0] >> i) & 1) == 1) { + sp_4096_mont_mul_128(r, r, a, m, mp); + } + } + XMEMSET(&r[128], 0, sizeof(sp_digit) * 128); + sp_4096_mont_reduce_128(r, m, mp); + + for (i = 127; i > 0; i--) { + if (r[i] != m[i]) { + break; + } + } + if (r[i] >= m[i]) { + sp_4096_sub_in_place_128(r, m); + } + } + } + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } +#endif + + return err; +} + +#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) + sp_digit* a; + sp_digit* d = NULL; + sp_digit* m; + sp_digit* r; + int err = MP_OKAY; + + (void)pm; + (void)qm; + (void)dpm; + (void)dqm; + (void)qim; + + if (*outLen < 512U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(dm) > 4096) { + err = MP_READ_E; + } + if (inLen > 512) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 128 * 4, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) { + err = MEMORY_E; + } + } + if (err == MP_OKAY) { + a = d + 128; + m = a + 256; + r = a; + + sp_4096_from_bin(a, 128, in, inLen); + sp_4096_from_mp(d, 128, dm); + sp_4096_from_mp(m, 128, mm); + err = sp_4096_mod_exp_128(r, a, d, 4096, m, 0); + } + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + } + + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 128); + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +SP_NOINLINE static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r5, #1\n\t" + "lsl r5, r5, #8\n\t" + "mov r8, r5\n\t" + "mov r7, #0\n\t" + "1:\n\t" + "ldr r6, [%[b], r7]\n\t" + "and r6, %[m]\n\t" + "mov r5, #0\n\t" + "sub r5, #1\n\t" + "add r5, %[c]\n\t" + "ldr r5, [%[a], r7]\n\t" + "adc r5, r6\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + "str r5, [%[r], r7]\n\t" + "add r7, #4\n\t" + "cmp r7, r8\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r5", "r6", "r7", "r8" + ); + + return c; +} + +/* RSA private key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * pm First prime. + * qm Second prime. + * dpm First prime's CRT exponent. + * dqm Second prime's CRT exponent. + * qim Inverse of second prime mod p. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, + mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm, + byte* out, word32* outLen) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit a[128 * 2]; + sp_digit p[64], q[64], dp[64]; + sp_digit tmpa[128], tmpb[128]; +#else + sp_digit* t = NULL; + sp_digit* a; + sp_digit* p; + sp_digit* q; + sp_digit* dp; + sp_digit* tmpa; + sp_digit* tmpb; +#endif + sp_digit* r; + sp_digit* qi; + sp_digit* dq; + sp_digit c; + int err = MP_OKAY; + + (void)dm; + (void)mm; + + if (*outLen < 512) + err = MP_TO_E; + if (err == MP_OKAY && (inLen > 512 || mp_count_bits(mm) != 4096)) + err = MP_READ_E; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 11, NULL, + DYNAMIC_TYPE_RSA); + if (t == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + a = t; + p = a + 128 * 2; + q = p + 64; + qi = dq = dp = q + 64; + tmpa = qi + 64; + tmpb = tmpa + 128; + + r = t + 128; + } +#else +#endif + + if (err == MP_OKAY) { +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + r = a; + qi = dq = dp; +#endif + sp_4096_from_bin(a, 128, in, inLen); + sp_4096_from_mp(p, 64, pm); + sp_4096_from_mp(q, 64, qm); + sp_4096_from_mp(dp, 64, dpm); + + err = sp_2048_mod_exp_64(tmpa, a, dp, 2048, p, 1); + } + if (err == MP_OKAY) { + sp_4096_from_mp(dq, 64, dqm); + err = sp_2048_mod_exp_64(tmpb, a, dq, 2048, q, 1); + } + + if (err == MP_OKAY) { + c = sp_2048_sub_in_place_64(tmpa, tmpb); + c += sp_4096_cond_add_64(tmpa, tmpa, p, c); + sp_4096_cond_add_64(tmpa, tmpa, p, c); + + sp_2048_from_mp(qi, 64, qim); + sp_2048_mul_64(tmpa, tmpa, qi); + err = sp_2048_mod_64(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { + sp_2048_mul_64(tmpa, q, tmpa); + XMEMSET(&tmpb[64], 0, sizeof(sp_digit) * 64); + sp_4096_add_128(r, tmpb, tmpa); + + sp_4096_to_bin(r, out); + *outLen = 512; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_digit) * 64 * 11); + XFREE(t, NULL, DYNAMIC_TYPE_RSA); + } +#else + XMEMSET(tmpa, 0, sizeof(tmpa)); + XMEMSET(tmpb, 0, sizeof(tmpb)); + XMEMSET(p, 0, sizeof(p)); + XMEMSET(q, 0, sizeof(q)); + XMEMSET(dp, 0, sizeof(dp)); +#endif + + return err; +} +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ +#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */ +#endif /* WOLFSSL_HAVE_SP_RSA */ +#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY)) +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_4096_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (4096 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 32 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 128); + r->used = 128; + mp_clamp(r); +#elif DIGIT_BIT < 32 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 128; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 32) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 32 - s; + } + r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 128; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 32 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 32 - s; + } + else { + s += 32; + } + } + r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ + int err = MP_OKAY; + sp_digit b[256], e[128], m[128]; + sp_digit* r = b; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_4096_from_mp(b, 128, base); + sp_4096_from_mp(e, 128, exp); + sp_4096_from_mp(m, 128, mod); + + err = sp_4096_mod_exp_128(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + err = sp_4096_to_mp(r, res); + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} + +#ifdef WOLFSSL_HAVE_SP_DH + +#ifdef HAVE_FFDHE_4096 +static void sp_4096_lshift_128(sp_digit* r, sp_digit* a, byte n) +{ + __asm__ __volatile__ ( + "mov r6, #31\n\t" + "sub r6, r6, %[n]\n\t" + "add %[a], %[a], #255\n\t" + "add %[r], %[r], #255\n\t" + "add %[a], %[a], #193\n\t" + "add %[r], %[r], #193\n\t" + "ldr r3, [%[a], #60]\n\t" + "lsr r4, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r4, r4, r6\n\t" + "ldr r2, [%[a], #56]\n\t" + "str r4, [%[r], #64]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #52]\n\t" + "str r3, [%[r], #60]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #48]\n\t" + "str r2, [%[r], #56]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #44]\n\t" + "str r4, [%[r], #52]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #40]\n\t" + "str r3, [%[r], #48]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #36]\n\t" + "str r2, [%[r], #44]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #32]\n\t" + "str r4, [%[r], #40]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #28]\n\t" + "str r3, [%[r], #36]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #24]\n\t" + "str r2, [%[r], #32]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #20]\n\t" + "str r4, [%[r], #28]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #16]\n\t" + "str r3, [%[r], #24]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #12]\n\t" + "str r2, [%[r], #20]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #8]\n\t" + "str r4, [%[r], #16]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #4]\n\t" + "str r3, [%[r], #12]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #0]\n\t" + "str r2, [%[r], #8]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r2, [%[a], #60]\n\t" + "str r4, [%[r], #68]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #56]\n\t" + "str r3, [%[r], #64]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #52]\n\t" + "str r2, [%[r], #60]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #48]\n\t" + "str r4, [%[r], #56]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #44]\n\t" + "str r3, [%[r], #52]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #40]\n\t" + "str r2, [%[r], #48]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #36]\n\t" + "str r4, [%[r], #44]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #32]\n\t" + "str r3, [%[r], #40]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #28]\n\t" + "str r2, [%[r], #36]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #24]\n\t" + "str r4, [%[r], #32]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #20]\n\t" + "str r3, [%[r], #28]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #16]\n\t" + "str r2, [%[r], #24]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #12]\n\t" + "str r4, [%[r], #20]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #8]\n\t" + "str r3, [%[r], #16]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #4]\n\t" + "str r2, [%[r], #12]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #0]\n\t" + "str r4, [%[r], #8]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r4, [%[a], #60]\n\t" + "str r3, [%[r], #68]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #56]\n\t" + "str r2, [%[r], #64]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #52]\n\t" + "str r4, [%[r], #60]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #48]\n\t" + "str r3, [%[r], #56]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #44]\n\t" + "str r2, [%[r], #52]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #40]\n\t" + "str r4, [%[r], #48]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #36]\n\t" + "str r3, [%[r], #44]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #32]\n\t" + "str r2, [%[r], #40]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #28]\n\t" + "str r4, [%[r], #36]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #24]\n\t" + "str r3, [%[r], #32]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #20]\n\t" + "str r2, [%[r], #28]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #16]\n\t" + "str r4, [%[r], #24]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #12]\n\t" + "str r3, [%[r], #20]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #8]\n\t" + "str r2, [%[r], #16]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #4]\n\t" + "str r4, [%[r], #12]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #0]\n\t" + "str r3, [%[r], #8]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r3, [%[a], #60]\n\t" + "str r2, [%[r], #68]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #56]\n\t" + "str r4, [%[r], #64]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #52]\n\t" + "str r3, [%[r], #60]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #48]\n\t" + "str r2, [%[r], #56]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #44]\n\t" + "str r4, [%[r], #52]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #40]\n\t" + "str r3, [%[r], #48]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #36]\n\t" + "str r2, [%[r], #44]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #32]\n\t" + "str r4, [%[r], #40]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #28]\n\t" + "str r3, [%[r], #36]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #24]\n\t" + "str r2, [%[r], #32]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #20]\n\t" + "str r4, [%[r], #28]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #16]\n\t" + "str r3, [%[r], #24]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #12]\n\t" + "str r2, [%[r], #20]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #8]\n\t" + "str r4, [%[r], #16]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #4]\n\t" + "str r3, [%[r], #12]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #0]\n\t" + "str r2, [%[r], #8]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r2, [%[a], #60]\n\t" + "str r4, [%[r], #68]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #56]\n\t" + "str r3, [%[r], #64]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #52]\n\t" + "str r2, [%[r], #60]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #48]\n\t" + "str r4, [%[r], #56]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #44]\n\t" + "str r3, [%[r], #52]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #40]\n\t" + "str r2, [%[r], #48]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #36]\n\t" + "str r4, [%[r], #44]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #32]\n\t" + "str r3, [%[r], #40]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #28]\n\t" + "str r2, [%[r], #36]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #24]\n\t" + "str r4, [%[r], #32]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #20]\n\t" + "str r3, [%[r], #28]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #16]\n\t" + "str r2, [%[r], #24]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #12]\n\t" + "str r4, [%[r], #20]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #8]\n\t" + "str r3, [%[r], #16]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #4]\n\t" + "str r2, [%[r], #12]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #0]\n\t" + "str r4, [%[r], #8]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r4, [%[a], #60]\n\t" + "str r3, [%[r], #68]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #56]\n\t" + "str r2, [%[r], #64]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #52]\n\t" + "str r4, [%[r], #60]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #48]\n\t" + "str r3, [%[r], #56]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #44]\n\t" + "str r2, [%[r], #52]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #40]\n\t" + "str r4, [%[r], #48]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #36]\n\t" + "str r3, [%[r], #44]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #32]\n\t" + "str r2, [%[r], #40]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #28]\n\t" + "str r4, [%[r], #36]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #24]\n\t" + "str r3, [%[r], #32]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #20]\n\t" + "str r2, [%[r], #28]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #16]\n\t" + "str r4, [%[r], #24]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #12]\n\t" + "str r3, [%[r], #20]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #8]\n\t" + "str r2, [%[r], #16]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #4]\n\t" + "str r4, [%[r], #12]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #0]\n\t" + "str r3, [%[r], #8]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r3, [%[a], #60]\n\t" + "str r2, [%[r], #68]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #56]\n\t" + "str r4, [%[r], #64]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #52]\n\t" + "str r3, [%[r], #60]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #48]\n\t" + "str r2, [%[r], #56]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #44]\n\t" + "str r4, [%[r], #52]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #40]\n\t" + "str r3, [%[r], #48]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #36]\n\t" + "str r2, [%[r], #44]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #32]\n\t" + "str r4, [%[r], #40]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #28]\n\t" + "str r3, [%[r], #36]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #24]\n\t" + "str r2, [%[r], #32]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #20]\n\t" + "str r4, [%[r], #28]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #16]\n\t" + "str r3, [%[r], #24]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #12]\n\t" + "str r2, [%[r], #20]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #8]\n\t" + "str r4, [%[r], #16]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #4]\n\t" + "str r3, [%[r], #12]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #0]\n\t" + "str r2, [%[r], #8]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r2, [%[a], #60]\n\t" + "str r4, [%[r], #68]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #56]\n\t" + "str r3, [%[r], #64]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #52]\n\t" + "str r2, [%[r], #60]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #48]\n\t" + "str r4, [%[r], #56]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #44]\n\t" + "str r3, [%[r], #52]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #40]\n\t" + "str r2, [%[r], #48]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #36]\n\t" + "str r4, [%[r], #44]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #32]\n\t" + "str r3, [%[r], #40]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #28]\n\t" + "str r2, [%[r], #36]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #24]\n\t" + "str r4, [%[r], #32]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #20]\n\t" + "str r3, [%[r], #28]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #16]\n\t" + "str r2, [%[r], #24]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #12]\n\t" + "str r4, [%[r], #20]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #8]\n\t" + "str r3, [%[r], #16]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #4]\n\t" + "str r2, [%[r], #12]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #0]\n\t" + "str r4, [%[r], #8]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "str r2, [%[r]]\n\t" + "str r3, [%[r], #4]\n\t" + : + : [r] "r" (r), [a] "r" (a), [n] "r" (n) + : "memory", "r2", "r3", "r4", "r5", "r6" + ); +} + +/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m) + * + * r A single precision number that is the result of the operation. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_2_128(sp_digit* r, const sp_digit* e, int bits, + const sp_digit* m) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit nd[256]; + sp_digit td[129]; +#else + sp_digit* td; +#endif + sp_digit* norm; + sp_digit* tmp; + sp_digit mp = 1; + sp_digit n, o; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 385, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + norm = td; + tmp = td + 256; +#else + norm = nd; + tmp = td; +#endif + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_128(norm, m); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 5; + if (c == 32) { + c = 27; + } + y = (int)(n >> c); + n <<= 32 - c; + sp_4096_lshift_128(r, norm, y); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 27; + n <<= 5; + c = 27; + } + else if (c < 5) { + y = n >> 27; + n = e[i--]; + c = 5 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + + sp_4096_lshift_128(r, r, y); + sp_4096_mul_d_128(tmp, norm, r[128]); + r[128] = 0; + o = sp_4096_add_128(r, r, tmp); + sp_4096_cond_sub_128(r, r, m, (sp_digit)0 - o); + } + + XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U); + sp_4096_mont_reduce_128(r, m, mp); + + mask = 0 - (sp_4096_cmp_128(r, m) >= 0); + sp_4096_cond_sub_128(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* HAVE_FFDHE_4096 */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. + * exp Array of bytes that is the exponent. + * expLen Length of data, in bytes, in exponent. + * mod Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Length, in bytes, of exponentiation result. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_DhExp_4096(mp_int* base, const byte* exp, word32 expLen, + mp_int* mod, byte* out, word32* outLen) +{ + int err = MP_OKAY; + sp_digit b[256], e[128], m[128]; + sp_digit* r = b; + word32 i; + + if (mp_count_bits(base) > 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expLen > 512) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_4096_from_mp(b, 128, base); + sp_4096_from_bin(e, 128, exp, expLen); + sp_4096_from_mp(m, 128, mod); + + #ifdef HAVE_FFDHE_4096 + if (base->used == 1 && base->dp[0] == 2 && m[127] == (sp_digit)-1) + err = sp_4096_mod_exp_2_128(r, e, expLen * 8, m); + else + #endif + err = sp_4096_mod_exp_128(r, b, e, expLen * 8, m, 0); + + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + for (i=0; i<512 && out[i] == 0; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} +#endif /* WOLFSSL_HAVE_SP_DH */ + +#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */ + +#endif /* WOLFSSL_SP_4096 */ + +#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */ +#ifdef WOLFSSL_HAVE_SP_ECC +#ifndef WOLFSSL_SP_NO_256 + +/* Point structure to use. */ +typedef struct sp_point_256 { + sp_digit x[2 * 8]; + sp_digit y[2 * 8]; + sp_digit z[2 * 8]; + int infinity; +} sp_point_256; + +/* The modulus (prime) of the curve P256. */ +static const sp_digit p256_mod[8] = { + 0xffffffff,0xffffffff,0xffffffff,0x00000000,0x00000000,0x00000000, + 0x00000001,0xffffffff +}; +/* The Montogmery normalizer for modulus of the curve P256. */ +static const sp_digit p256_norm_mod[8] = { + 0x00000001,0x00000000,0x00000000,0xffffffff,0xffffffff,0xffffffff, + 0xfffffffe,0x00000000 +}; +/* The Montogmery multiplier for modulus of the curve P256. */ +static const sp_digit p256_mp_mod = 0x00000001; +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +/* The order of the curve P256. */ +static const sp_digit p256_order[8] = { + 0xfc632551,0xf3b9cac2,0xa7179e84,0xbce6faad,0xffffffff,0xffffffff, + 0x00000000,0xffffffff +}; +#endif +/* The order of the curve P256 minus 2. */ +static const sp_digit p256_order2[8] = { + 0xfc63254f,0xf3b9cac2,0xa7179e84,0xbce6faad,0xffffffff,0xffffffff, + 0x00000000,0xffffffff +}; +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montogmery normalizer for order of the curve P256. */ +static const sp_digit p256_norm_order[8] = { + 0x039cdaaf,0x0c46353d,0x58e8617b,0x43190552,0x00000000,0x00000000, + 0xffffffff,0x00000000 +}; +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montogmery multiplier for order of the curve P256. */ +static const sp_digit p256_mp_order = 0xee00bc4f; +#endif +/* The base point of curve P256. */ +static const sp_point_256 p256_base = { + /* X ordinate */ + { + 0xd898c296,0xf4a13945,0x2deb33a0,0x77037d81,0x63a440f2,0xf8bce6e5, + 0xe12c4247,0x6b17d1f2, + 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L + }, + /* Y ordinate */ + { + 0x37bf51f5,0xcbb64068,0x6b315ece,0x2bce3357,0x7c0f9e16,0x8ee7eb4a, + 0xfe1a7f9b,0x4fe342e2, + 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L + }, + /* Z ordinate */ + { + 0x00000001,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000, + 0x00000000,0x00000000, + 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L + }, + /* infinity */ + 0 +}; +#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY) +static const sp_digit p256_b[8] = { + 0x27d2604b,0x3bce3c3e,0xcc53b0f6,0x651d06b0,0x769886bc,0xb3ebbd55, + 0xaa3a93e7,0x5ac635d8 +}; +#endif + +static int sp_256_point_new_ex_8(void* heap, sp_point_256* sp, sp_point_256** p) +{ + int ret = MP_OKAY; + (void)heap; +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + (void)sp; + *p = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC); +#else + *p = sp; +#endif + if (*p == NULL) { + ret = MEMORY_E; + } + return ret; +} + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +/* Allocate memory for point and return error. */ +#define sp_256_point_new_8(heap, sp, p) sp_256_point_new_ex_8((heap), NULL, &(p)) +#else +/* Set pointer to data and return no error. */ +#define sp_256_point_new_8(heap, sp, p) sp_256_point_new_ex_8((heap), &(sp), &(p)) +#endif + + +static void sp_256_point_free_8(sp_point_256* p, int clear, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +/* If valid pointer then clear point data if requested and free data. */ + if (p != NULL) { + if (clear != 0) { + XMEMSET(p, 0, sizeof(*p)); + } + XFREE(p, heap, DYNAMIC_TYPE_ECC); + } +#else +/* Clear point data if requested. */ + if (clear != 0) { + XMEMSET(p, 0, sizeof(*p)); + } +#endif + (void)heap; +} + +/* Multiply a number by Montogmery normalizer mod modulus (prime). + * + * r The resulting Montgomery form number. + * a The number to convert. + * m The modulus (prime). + */ +static int sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + int64_t t[8]; + int64_t a64[8]; + int64_t o; + + (void)m; + + a64[0] = a[0]; + a64[1] = a[1]; + a64[2] = a[2]; + a64[3] = a[3]; + a64[4] = a[4]; + a64[5] = a[5]; + a64[6] = a[6]; + a64[7] = a[7]; + + /* 1 1 0 -1 -1 -1 -1 0 */ + t[0] = 0 + a64[0] + a64[1] - a64[3] - a64[4] - a64[5] - a64[6]; + /* 0 1 1 0 -1 -1 -1 -1 */ + t[1] = 0 + a64[1] + a64[2] - a64[4] - a64[5] - a64[6] - a64[7]; + /* 0 0 1 1 0 -1 -1 -1 */ + t[2] = 0 + a64[2] + a64[3] - a64[5] - a64[6] - a64[7]; + /* -1 -1 0 2 2 1 0 -1 */ + t[3] = 0 - a64[0] - a64[1] + 2 * a64[3] + 2 * a64[4] + a64[5] - a64[7]; + /* 0 -1 -1 0 2 2 1 0 */ + t[4] = 0 - a64[1] - a64[2] + 2 * a64[4] + 2 * a64[5] + a64[6]; + /* 0 0 -1 -1 0 2 2 1 */ + t[5] = 0 - a64[2] - a64[3] + 2 * a64[5] + 2 * a64[6] + a64[7]; + /* -1 -1 0 0 0 1 3 2 */ + t[6] = 0 - a64[0] - a64[1] + a64[5] + 3 * a64[6] + 2 * a64[7]; + /* 1 0 -1 -1 -1 -1 0 3 */ + t[7] = 0 + a64[0] - a64[2] - a64[3] - a64[4] - a64[5] + 3 * a64[7]; + + t[1] += t[0] >> 32; t[0] &= 0xffffffff; + t[2] += t[1] >> 32; t[1] &= 0xffffffff; + t[3] += t[2] >> 32; t[2] &= 0xffffffff; + t[4] += t[3] >> 32; t[3] &= 0xffffffff; + t[5] += t[4] >> 32; t[4] &= 0xffffffff; + t[6] += t[5] >> 32; t[5] &= 0xffffffff; + t[7] += t[6] >> 32; t[6] &= 0xffffffff; + o = t[7] >> 32; t[7] &= 0xffffffff; + t[0] += o; + t[3] -= o; + t[6] -= o; + t[7] += o; + t[1] += t[0] >> 32; t[0] &= 0xffffffff; + t[2] += t[1] >> 32; t[1] &= 0xffffffff; + t[3] += t[2] >> 32; t[2] &= 0xffffffff; + t[4] += t[3] >> 32; t[3] &= 0xffffffff; + t[5] += t[4] >> 32; t[4] &= 0xffffffff; + t[6] += t[5] >> 32; t[5] &= 0xffffffff; + t[7] += t[6] >> 32; t[6] &= 0xffffffff; + r[0] = t[0]; + r[1] = t[1]; + r[2] = t[2]; + r[3] = t[3]; + r[4] = t[4]; + r[5] = t[5]; + r[6] = t[6]; + r[7] = t[7]; + + return MP_OKAY; +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_256_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 32 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 32 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 32U) <= (word32)DIGIT_BIT) { + s += 32U; + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 32) { + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + s = 32 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Convert a point of type ecc_point to type sp_point_256. + * + * p Point of type sp_point_256 (result). + * pm Point of type ecc_point. + */ +static void sp_256_point_from_ecc_point_8(sp_point_256* p, const ecc_point* pm) +{ + XMEMSET(p->x, 0, sizeof(p->x)); + XMEMSET(p->y, 0, sizeof(p->y)); + XMEMSET(p->z, 0, sizeof(p->z)); + sp_256_from_mp(p->x, 8, pm->x); + sp_256_from_mp(p->y, 8, pm->y); + sp_256_from_mp(p->z, 8, pm->z); + p->infinity = 0; +} + +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_256_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (256 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 32 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 8); + r->used = 8; + mp_clamp(r); +#elif DIGIT_BIT < 32 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 8; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 32) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 32 - s; + } + r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 8; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 32 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 32 - s; + } + else { + s += 32; + } + } + r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Convert a point of type sp_point_256 to type ecc_point. + * + * p Point of type sp_point_256. + * pm Point of type ecc_point (result). + * returns MEMORY_E when allocation of memory in ecc_point fails otherwise + * MP_OKAY. + */ +static int sp_256_point_to_ecc_point_8(const sp_point_256* p, ecc_point* pm) +{ + int err; + + err = sp_256_to_mp(p->x, pm->x); + if (err == MP_OKAY) { + err = sp_256_to_mp(p->y, pm->y); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->z, pm->z); + } + + return err; +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_256_mul_8(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit tmp[8 * 2]; + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r8, r3\n\t" + "mov r11, %[r]\n\t" + "mov r9, %[a]\n\t" + "mov r10, %[b]\n\t" + "mov r6, #32\n\t" + "add r6, r9\n\t" + "mov r12, r6\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r5, #0\n\t" + "mov r6, #28\n\t" + "mov %[a], r8\n\t" + "sub %[a], r6\n\t" + "sbc r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], r6\n\t" + "mov %[b], r8\n\t" + "sub %[b], %[a]\n\t" + "add %[a], r9\n\t" + "add %[b], r10\n\t" + "\n2:\n\t" + "# Multiply Start\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[b]]\n\t" + "lsl r6, r6, #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [%[b]]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[b]]\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [%[b]]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Multiply Done\n\t" + "add %[a], #4\n\t" + "sub %[b], #4\n\t" + "cmp %[a], r12\n\t" + "beq 3f\n\t" + "mov r6, r8\n\t" + "add r6, r9\n\t" + "cmp %[a], r6\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r11\n\t" + "mov r7, r8\n\t" + "str r3, [%[r], r7]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "add r7, #4\n\t" + "mov r8, r7\n\t" + "mov r6, #56\n\t" + "cmp r7, r6\n\t" + "ble 1b\n\t" + "str r3, [%[r], r7]\n\t" + "mov %[a], r9\n\t" + "mov %[b], r10\n\t" + : + : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +SP_NOINLINE static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a, + const sp_digit* b, sp_digit m) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r5, #32\n\t" + "mov r8, r5\n\t" + "mov r7, #0\n\t" + "1:\n\t" + "ldr r6, [%[b], r7]\n\t" + "and r6, %[m]\n\t" + "mov r5, #0\n\t" + "sub r5, %[c]\n\t" + "ldr r5, [%[a], r7]\n\t" + "sbc r5, r6\n\t" + "sbc %[c], %[c]\n\t" + "str r5, [%[r], r7]\n\t" + "add r7, #4\n\t" + "cmp r7, r8\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r5", "r6", "r7", "r8" + ); + + return c; +} + +/* Reduce the number back to 256 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + (void)mp; + (void)m; + + __asm__ __volatile__ ( + "mov r2, #0\n\t" + "mov r1, #0\n\t" + "# i = 0\n\t" + "mov r8, r2\n\t" + "\n1:\n\t" + "mov r4, #0\n\t" + "# mu = a[i] * 1 (mp) = a[i]\n\t" + "ldr r3, [%[a]]\n\t" + "# a[i+0] += -1 * mu\n\t" + "mov r5, r3\n\t" + "str r4, [%[a], #0]\n\t" + "# a[i+1] += -1 * mu\n\t" + "ldr r6, [%[a], #4]\n\t" + "mov r4, r3\n\t" + "sub r5, r3\n\t" + "sbc r4, r2\n\t" + "add r5, r6\n\t" + "adc r4, r2\n\t" + "str r5, [%[a], #4]\n\t" + "# a[i+2] += -1 * mu\n\t" + "ldr r6, [%[a], #8]\n\t" + "mov r5, r3\n\t" + "sub r4, r3\n\t" + "sbc r5, r2\n\t" + "add r4, r6\n\t" + "adc r5, r2\n\t" + "str r4, [%[a], #8]\n\t" + "# a[i+3] += 0 * mu\n\t" + "ldr r6, [%[a], #12]\n\t" + "mov r4, #0\n\t" + "add r5, r6\n\t" + "adc r4, r2\n\t" + "str r5, [%[a], #12]\n\t" + "# a[i+4] += 0 * mu\n\t" + "ldr r6, [%[a], #16]\n\t" + "mov r5, #0\n\t" + "add r4, r6\n\t" + "adc r5, r2\n\t" + "str r4, [%[a], #16]\n\t" + "# a[i+5] += 0 * mu\n\t" + "ldr r6, [%[a], #20]\n\t" + "mov r4, #0\n\t" + "add r5, r6\n\t" + "adc r4, r2\n\t" + "str r5, [%[a], #20]\n\t" + "# a[i+6] += 1 * mu\n\t" + "ldr r6, [%[a], #24]\n\t" + "mov r5, #0\n\t" + "add r4, r3\n\t" + "adc r5, r2\n\t" + "add r4, r6\n\t" + "adc r5, r2\n\t" + "str r4, [%[a], #24]\n\t" + "# a[i+7] += -1 * mu\n\t" + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[a], #32]\n\t" + "add r4, r1, r3\n\t" + "mov r1, #0\n\t" + "adc r1, r2\n\t" + "sub r5, r3\n\t" + "sbc r4, r2\n\t" + "sbc r1, r2\n\t" + "add r5, r6\n\t" + "adc r4, r7\n\t" + "adc r1, r2\n\t" + "str r5, [%[a], #28]\n\t" + "str r4, [%[a], #32]\n\t" + "# i += 1\n\t" + "mov r6, #4\n\t" + "add r8, r6\n\t" + "add %[a], #4\n\t" + "mov r6, #32\n\t" + "cmp r8, r6\n\t" + "blt 1b\n\t" + "sub %[a], #32\n\t" + "mov r3, r1\n\t" + "sub r1, #1\n\t" + "mvn r1, r1\n\t" + "ldr r5, [%[a],#32]\n\t" + "ldr r4, [%[a],#36]\n\t" + "ldr r6, [%[a],#40]\n\t" + "ldr r7, [%[a],#44]\n\t" + "sub r5, r1\n\t" + "sbc r4, r1\n\t" + "sbc r6, r1\n\t" + "sbc r7, r2\n\t" + "str r5, [%[a],#0]\n\t" + "str r4, [%[a],#4]\n\t" + "str r6, [%[a],#8]\n\t" + "str r7, [%[a],#12]\n\t" + "ldr r5, [%[a],#48]\n\t" + "ldr r4, [%[a],#52]\n\t" + "ldr r6, [%[a],#56]\n\t" + "ldr r7, [%[a],#60]\n\t" + "sbc r5, r2\n\t" + "sbc r4, r2\n\t" + "sbc r6, r3\n\t" + "sbc r7, r1\n\t" + "str r5, [%[a],#16]\n\t" + "str r4, [%[a],#20]\n\t" + "str r6, [%[a],#24]\n\t" + "str r7, [%[a],#28]\n\t" + : [a] "+r" (a) + : + : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8" + ); + + + (void)m; + (void)mp; +} + +/* Reduce the number back to 256 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_256_mont_reduce_order_8(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_digit ca = 0; + + __asm__ __volatile__ ( + "mov r8, %[mp]\n\t" + "mov r12, %[ca]\n\t" + "mov r14, %[m]\n\t" + "mov r9, %[a]\n\t" + "mov r4, #0\n\t" + "# i = 0\n\t" + "mov r11, r4\n\t" + "\n1:\n\t" + "mov r5, #0\n\t" + "mov %[ca], #0\n\t" + "# mu = a[i] * mp\n\t" + "mov %[mp], r8\n\t" + "ldr %[a], [%[a]]\n\t" + "mul %[mp], %[a]\n\t" + "mov %[m], r14\n\t" + "mov r10, r9\n\t" + "\n2:\n\t" + "# a[i+j] += m[j] * mu\n\t" + "mov %[a], r10\n\t" + "ldr %[a], [%[a]]\n\t" + "mov %[ca], #0\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "# Multiply m[j] and mu - Start\n\t" + "ldr r7, [%[m]]\n\t" + "lsl r6, %[mp], #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add %[a], r7\n\t" + "adc r5, %[ca]\n\t" + "ldr r7, [%[m]]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add %[a], r6\n\t" + "adc r5, r7\n\t" + "ldr r7, [%[m]]\n\t" + "lsr r6, %[mp], #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r5, r7\n\t" + "ldr r7, [%[m]]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add %[a], r6\n\t" + "adc r5, r7\n\t" + "# Multiply m[j] and mu - Done\n\t" + "add r4, %[a]\n\t" + "adc r5, %[ca]\n\t" + "mov %[a], r10\n\t" + "str r4, [%[a]]\n\t" + "mov r6, #4\n\t" + "add %[m], #4\n\t" + "add r10, r6\n\t" + "mov r4, #28\n\t" + "add r4, r9\n\t" + "cmp r10, r4\n\t" + "blt 2b\n\t" + "# a[i+7] += m[7] * mu\n\t" + "mov %[ca], #0\n\t" + "mov r4, r12\n\t" + "mov %[a], #0\n\t" + "# Multiply m[7] and mu - Start\n\t" + "ldr r7, [%[m]]\n\t" + "lsl r6, %[mp], #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r5, r7\n\t" + "adc r4, %[ca]\n\t" + "adc %[a], %[ca]\n\t" + "ldr r7, [%[m]]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r5, r6\n\t" + "adc r4, r7\n\t" + "adc %[a], %[ca]\n\t" + "ldr r7, [%[m]]\n\t" + "lsr r6, %[mp], #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc %[a], %[ca]\n\t" + "ldr r7, [%[m]]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r5, r6\n\t" + "adc r4, r7\n\t" + "adc %[a], %[ca]\n\t" + "# Multiply m[7] and mu - Done\n\t" + "mov %[ca], %[a]\n\t" + "mov %[a], r10\n\t" + "ldr r7, [%[a], #4]\n\t" + "ldr %[a], [%[a]]\n\t" + "mov r6, #0\n\t" + "add r5, %[a]\n\t" + "adc r7, r4\n\t" + "adc %[ca], r6\n\t" + "mov %[a], r10\n\t" + "str r5, [%[a]]\n\t" + "str r7, [%[a], #4]\n\t" + "# i += 1\n\t" + "mov r6, #4\n\t" + "add r9, r6\n\t" + "add r11, r6\n\t" + "mov r12, %[ca]\n\t" + "mov %[a], r9\n\t" + "mov r4, #32\n\t" + "cmp r11, r4\n\t" + "blt 1b\n\t" + "mov %[m], r14\n\t" + : [ca] "+r" (ca), [a] "+r" (a) + : [m] "r" (m), [mp] "r" (mp) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + ); + + sp_256_cond_sub_8(a - 8, a, m, (sp_digit)0 - ca); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_256_mul_8(r, a, b); + sp_256_mont_reduce_8(r, m, mp); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r5, #0\n\t" + "mov r8, r3\n\t" + "mov r11, %[r]\n\t" + "mov r6, #64\n\t" + "neg r6, r6\n\t" + "add sp, r6\n\t" + "mov r10, sp\n\t" + "mov r9, %[a]\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r6, #28\n\t" + "mov %[a], r8\n\t" + "sub %[a], r6\n\t" + "sbc r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], r6\n\t" + "mov r2, r8\n\t" + "sub r2, %[a]\n\t" + "add %[a], r9\n\t" + "add r2, r9\n\t" + "\n2:\n\t" + "cmp r2, %[a]\n\t" + "beq 4f\n\t" + "# Multiply * 2: Start\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [r2]\n\t" + "lsl r6, r6, #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [r2]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [r2]\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [r2]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Multiply * 2: Done\n\t" + "bal 5f\n\t" + "\n4:\n\t" + "# Square: Start\n\t" + "ldr r6, [%[a]]\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r6, r6\n\t" + "add r3, r6\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "mul r7, r7\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #15\n\t" + "lsl r6, r6, #17\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Square: Done\n\t" + "\n5:\n\t" + "add %[a], #4\n\t" + "sub r2, #4\n\t" + "mov r6, #32\n\t" + "add r6, r9\n\t" + "cmp %[a], r6\n\t" + "beq 3f\n\t" + "cmp %[a], r2\n\t" + "bgt 3f\n\t" + "mov r7, r8\n\t" + "add r7, r9\n\t" + "cmp %[a], r7\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r10\n\t" + "mov r7, r8\n\t" + "str r3, [%[r], r7]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "add r7, #4\n\t" + "mov r8, r7\n\t" + "mov r6, #56\n\t" + "cmp r7, r6\n\t" + "ble 1b\n\t" + "mov %[a], r9\n\t" + "str r3, [%[r], r7]\n\t" + "mov %[r], r11\n\t" + "mov %[a], r10\n\t" + "mov r3, #60\n\t" + "\n4:\n\t" + "ldr r6, [%[a], r3]\n\t" + "str r6, [%[r], r3]\n\t" + "sub r3, #4\n\t" + "bge 4b\n\t" + "mov r6, #64\n\t" + "add sp, r6\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + ); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_256_sqr_8(r, a); + sp_256_mont_reduce_8(r, m, mp); +} + +#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY) +/* Square the Montgomery form number a number of times. (r = a ^ n mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * n Number of times to square. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_256_mont_sqr_n_8(sp_digit* r, const sp_digit* a, int n, + const sp_digit* m, sp_digit mp) +{ + sp_256_mont_sqr_8(r, a, m, mp); + for (; n > 1; n--) { + sp_256_mont_sqr_8(r, r, m, mp); + } +} + +#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */ +#ifdef WOLFSSL_SP_SMALL +/* Mod-2 for the P256 curve. */ +static const uint32_t p256_mod_minus_2[8] = { + 0xfffffffdU,0xffffffffU,0xffffffffU,0x00000000U,0x00000000U,0x00000000U, + 0x00000001U,0xffffffffU +}; +#endif /* !WOLFSSL_SP_SMALL */ + +/* Invert the number, in Montgomery form, modulo the modulus (prime) of the + * P256 curve. (r = 1 / a mod m) + * + * r Inverse result. + * a Number to invert. + * td Temporary data. + */ +static void sp_256_mont_inv_8(sp_digit* r, const sp_digit* a, sp_digit* td) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* t = td; + int i; + + XMEMCPY(t, a, sizeof(sp_digit) * 8); + for (i=254; i>=0; i--) { + sp_256_mont_sqr_8(t, t, p256_mod, p256_mp_mod); + if (p256_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32))) + sp_256_mont_mul_8(t, t, a, p256_mod, p256_mp_mod); + } + XMEMCPY(r, t, sizeof(sp_digit) * 8); +#else + sp_digit* t1 = td; + sp_digit* t2 = td + 2 * 8; + sp_digit* t3 = td + 4 * 8; + /* 0x2 */ + sp_256_mont_sqr_8(t1, a, p256_mod, p256_mp_mod); + /* 0x3 */ + sp_256_mont_mul_8(t2, t1, a, p256_mod, p256_mp_mod); + /* 0xc */ + sp_256_mont_sqr_n_8(t1, t2, 2, p256_mod, p256_mp_mod); + /* 0xd */ + sp_256_mont_mul_8(t3, t1, a, p256_mod, p256_mp_mod); + /* 0xf */ + sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xf0 */ + sp_256_mont_sqr_n_8(t1, t2, 4, p256_mod, p256_mp_mod); + /* 0xfd */ + sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod); + /* 0xff */ + sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xff00 */ + sp_256_mont_sqr_n_8(t1, t2, 8, p256_mod, p256_mp_mod); + /* 0xfffd */ + sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod); + /* 0xffff */ + sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xffff0000 */ + sp_256_mont_sqr_n_8(t1, t2, 16, p256_mod, p256_mp_mod); + /* 0xfffffffd */ + sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod); + /* 0xffffffff */ + sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xffffffff00000000 */ + sp_256_mont_sqr_n_8(t1, t2, 32, p256_mod, p256_mp_mod); + /* 0xffffffffffffffff */ + sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xffffffff00000001 */ + sp_256_mont_mul_8(r, t1, a, p256_mod, p256_mp_mod); + /* 0xffffffff000000010000000000000000000000000000000000000000 */ + sp_256_mont_sqr_n_8(r, r, 160, p256_mod, p256_mp_mod); + /* 0xffffffff00000001000000000000000000000000ffffffffffffffff */ + sp_256_mont_mul_8(r, r, t2, p256_mod, p256_mp_mod); + /* 0xffffffff00000001000000000000000000000000ffffffffffffffff00000000 */ + sp_256_mont_sqr_n_8(r, r, 32, p256_mod, p256_mp_mod); + /* 0xffffffff00000001000000000000000000000000fffffffffffffffffffffffd */ + sp_256_mont_mul_8(r, r, t3, p256_mod, p256_mp_mod); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +SP_NOINLINE static int32_t sp_256_cmp_8(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; + + + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mvn r3, r3\n\t" + "mov r6, #28\n\t" + "1:\n\t" + "ldr r7, [%[a], r6]\n\t" + "ldr r5, [%[b], r6]\n\t" + "and r7, r3\n\t" + "and r5, r3\n\t" + "mov r4, r7\n\t" + "sub r7, r5\n\t" + "sbc r7, r7\n\t" + "add %[r], r7\n\t" + "mvn r7, r7\n\t" + "and r3, r7\n\t" + "sub r5, r4\n\t" + "sbc r7, r7\n\t" + "sub %[r], r7\n\t" + "mvn r7, r7\n\t" + "and r3, r7\n\t" + "sub r6, #4\n\t" + "cmp r6, #0\n\t" + "bge 1b\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "r3", "r4", "r5", "r6", "r7" + ); + + return r; +} + +/* Normalize the values in each word to 32. + * + * a Array of sp_digit to normalize. + */ +#define sp_256_norm_8(a) + +/* Map the Montgomery form projective coordinate point to an affine point. + * + * r Resulting affine coordinate point. + * p Montgomery form projective coordinate point. + * t Temporary ordinate data. + */ +static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*8; + int32_t n; + + sp_256_mont_inv_8(t1, p->z, t + 2*8); + + sp_256_mont_sqr_8(t2, t1, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t1, t2, t1, p256_mod, p256_mp_mod); + + /* x /= z^2 */ + sp_256_mont_mul_8(r->x, p->x, t2, p256_mod, p256_mp_mod); + XMEMSET(r->x + 8, 0, sizeof(r->x) / 2U); + sp_256_mont_reduce_8(r->x, p256_mod, p256_mp_mod); + /* Reduce x to less than modulus */ + n = sp_256_cmp_8(r->x, p256_mod); + sp_256_cond_sub_8(r->x, r->x, p256_mod, 0 - ((n >= 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_256_norm_8(r->x); + + /* y /= z^3 */ + sp_256_mont_mul_8(r->y, p->y, t1, p256_mod, p256_mp_mod); + XMEMSET(r->y + 8, 0, sizeof(r->y) / 2U); + sp_256_mont_reduce_8(r->y, p256_mod, p256_mp_mod); + /* Reduce y to less than modulus */ + n = sp_256_cmp_8(r->y, p256_mod); + sp_256_cond_sub_8(r->y, r->y, p256_mod, 0 - ((n >= 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_256_norm_8(r->y); + + XMEMSET(r->z, 0, sizeof(r->z)); + r->z[0] = 1; + +} + +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r6, %[a]\n\t" + "mov r7, #0\n\t" + "add r6, #32\n\t" + "sub r7, #1\n\t" + "\n1:\n\t" + "add %[c], r7\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r]]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + "add %[a], #4\n\t" + "add %[b], #4\n\t" + "add %[r], #4\n\t" + "cmp %[a], r6\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7" + ); + + return c; +} + +#else +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "add r4, r5\n\t" + "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #28]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Add two Montgomery form numbers (r = a + b % m). + * + * r Result of addition. + * a First number to add in Montogmery form. + * b Second number to add in Montogmery form. + * m Modulus (prime). + */ +SP_NOINLINE static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m) +{ + (void)m; + + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "ldr r4, [%[a],#0]\n\t" + "ldr r5, [%[a],#4]\n\t" + "ldr r6, [%[b],#0]\n\t" + "ldr r7, [%[b],#4]\n\t" + "add r4, r6\n\t" + "adc r5, r7\n\t" + "str r4, [%[r],#0]\n\t" + "str r5, [%[r],#4]\n\t" + "ldr r4, [%[a],#8]\n\t" + "ldr r5, [%[a],#12]\n\t" + "ldr r6, [%[b],#8]\n\t" + "ldr r7, [%[b],#12]\n\t" + "adc r4, r6\n\t" + "adc r5, r7\n\t" + "str r4, [%[r],#8]\n\t" + "str r5, [%[r],#12]\n\t" + "ldr r4, [%[a],#16]\n\t" + "ldr r5, [%[a],#20]\n\t" + "ldr r6, [%[b],#16]\n\t" + "ldr r7, [%[b],#20]\n\t" + "adc r4, r6\n\t" + "adc r5, r7\n\t" + "mov r8, r4\n\t" + "mov r9, r5\n\t" + "ldr r4, [%[a],#24]\n\t" + "ldr r5, [%[a],#28]\n\t" + "ldr r6, [%[b],#24]\n\t" + "ldr r7, [%[b],#28]\n\t" + "adc r4, r6\n\t" + "adc r5, r7\n\t" + "mov r10, r4\n\t" + "mov r11, r5\n\t" + "adc r3, r3\n\t" + "mov r6, r3\n\t" + "sub r3, #1\n\t" + "mvn r3, r3\n\t" + "mov r7, #0\n\t" + "ldr r4, [%[r],#0]\n\t" + "ldr r5, [%[r],#4]\n\t" + "sub r4, r3\n\t" + "sbc r5, r3\n\t" + "str r4, [%[r],#0]\n\t" + "str r5, [%[r],#4]\n\t" + "ldr r4, [%[r],#8]\n\t" + "ldr r5, [%[r],#12]\n\t" + "sbc r4, r3\n\t" + "sbc r5, r7\n\t" + "str r4, [%[r],#8]\n\t" + "str r5, [%[r],#12]\n\t" + "mov r4, r8\n\t" + "mov r5, r9\n\t" + "sbc r4, r7\n\t" + "sbc r5, r7\n\t" + "str r4, [%[r],#16]\n\t" + "str r5, [%[r],#20]\n\t" + "mov r4, r10\n\t" + "mov r5, r11\n\t" + "sbc r4, r6\n\t" + "sbc r5, r3\n\t" + "str r4, [%[r],#24]\n\t" + "str r5, [%[r],#28]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + ); +} + +/* Double a Montgomery form number (r = a + a % m). + * + * r Result of doubling. + * a Number to double in Montogmery form. + * m Modulus (prime). + */ +SP_NOINLINE static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + (void)m; + + __asm__ __volatile__ ( + "ldr r4, [%[a],#0]\n\t" + "ldr r5, [%[a],#4]\n\t" + "ldr r6, [%[a],#8]\n\t" + "ldr r7, [%[a],#12]\n\t" + "add r4, r4\n\t" + "adc r5, r5\n\t" + "adc r6, r6\n\t" + "adc r7, r7\n\t" + "str r4, [%[r],#0]\n\t" + "str r5, [%[r],#4]\n\t" + "str r6, [%[r],#8]\n\t" + "str r7, [%[r],#12]\n\t" + "ldr r4, [%[a],#16]\n\t" + "ldr r5, [%[a],#20]\n\t" + "ldr r6, [%[a],#24]\n\t" + "ldr r7, [%[a],#28]\n\t" + "adc r4, r4\n\t" + "adc r5, r5\n\t" + "adc r6, r6\n\t" + "adc r7, r7\n\t" + "mov r8, r4\n\t" + "mov r9, r5\n\t" + "mov r10, r6\n\t" + "mov r11, r7\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "adc r3, r3\n\t" + "mov r2, r3\n\t" + "sub r3, #1\n\t" + "mvn r3, r3\n\t" + "ldr r4, [%[r],#0]\n\t" + "ldr r5, [%[r],#4]\n\t" + "ldr r6, [%[r],#8]\n\t" + "sub r4, r3\n\t" + "sbc r5, r3\n\t" + "sbc r6, r3\n\t" + "str r4, [%[r],#0]\n\t" + "str r5, [%[r],#4]\n\t" + "str r6, [%[r],#8]\n\t" + "ldr r4, [%[r],#12]\n\t" + "mov r5, r8\n\t" + "mov r6, r9\n\t" + "sbc r4, r7\n\t" + "sbc r5, r7\n\t" + "sbc r6, r7\n\t" + "str r4, [%[r],#12]\n\t" + "str r5, [%[r],#16]\n\t" + "str r6, [%[r],#20]\n\t" + "mov r4, r10\n\t" + "mov r5, r11\n\t" + "sbc r4, r2\n\t" + "sbc r5, r3\n\t" + "str r4, [%[r],#24]\n\t" + "str r5, [%[r],#28]\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r3", "r2", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + ); +} + +/* Triple a Montgomery form number (r = a + a + a % m). + * + * r Result of Tripling. + * a Number to triple in Montogmery form. + * m Modulus (prime). + */ +SP_NOINLINE static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + (void)m; + + __asm__ __volatile__ ( + "ldr r6, [%[a],#0]\n\t" + "ldr r7, [%[a],#4]\n\t" + "ldr r4, [%[a],#8]\n\t" + "ldr r5, [%[a],#12]\n\t" + "add r6, r6\n\t" + "adc r7, r7\n\t" + "adc r4, r4\n\t" + "adc r5, r5\n\t" + "mov r8, r4\n\t" + "mov r9, r5\n\t" + "ldr r2, [%[a],#16]\n\t" + "ldr r3, [%[a],#20]\n\t" + "ldr r4, [%[a],#24]\n\t" + "ldr r5, [%[a],#28]\n\t" + "adc r2, r2\n\t" + "adc r3, r3\n\t" + "adc r4, r4\n\t" + "adc r5, r5\n\t" + "mov r10, r2\n\t" + "mov r11, r3\n\t" + "mov r12, r4\n\t" + "mov r14, r5\n\t" + "mov r3, #0\n\t" + "mov r5, #0\n\t" + "adc r3, r3\n\t" + "mov r4, r3\n\t" + "sub r3, #1\n\t" + "mvn r3, r3\n\t" + "sub r6, r3\n\t" + "sbc r7, r3\n\t" + "mov r2, r8\n\t" + "sbc r2, r3\n\t" + "mov r8, r2\n\t" + "mov r2, r9\n\t" + "sbc r2, r5\n\t" + "mov r9, r2\n\t" + "mov r2, r10\n\t" + "sbc r2, r5\n\t" + "mov r10, r2\n\t" + "mov r2, r11\n\t" + "sbc r2, r5\n\t" + "mov r11, r2\n\t" + "mov r2, r12\n\t" + "sbc r2, r4\n\t" + "mov r12, r2\n\t" + "mov r2, r14\n\t" + "sbc r2, r3\n\t" + "mov r14, r2\n\t" + "ldr r2, [%[a],#0]\n\t" + "ldr r3, [%[a],#4]\n\t" + "add r6, r2\n\t" + "adc r7, r3\n\t" + "ldr r2, [%[a],#8]\n\t" + "ldr r3, [%[a],#12]\n\t" + "mov r4, r8\n\t" + "mov r5, r9\n\t" + "adc r2, r4\n\t" + "adc r3, r5\n\t" + "mov r8, r2\n\t" + "mov r9, r3\n\t" + "ldr r2, [%[a],#16]\n\t" + "ldr r3, [%[a],#20]\n\t" + "mov r4, r10\n\t" + "mov r5, r11\n\t" + "adc r2, r4\n\t" + "adc r3, r5\n\t" + "mov r10, r2\n\t" + "mov r11, r3\n\t" + "ldr r2, [%[a],#24]\n\t" + "ldr r3, [%[a],#28]\n\t" + "mov r4, r12\n\t" + "mov r5, r14\n\t" + "adc r2, r4\n\t" + "adc r3, r5\n\t" + "mov r12, r2\n\t" + "mov r14, r3\n\t" + "mov r3, #0\n\t" + "mov r5, #0\n\t" + "adc r3, r3\n\t" + "mov r4, r3\n\t" + "sub r3, #1\n\t" + "mvn r3, r3\n\t" + "sub r6, r3\n\t" + "str r6, [%[r],#0]\n\t" + "sbc r7, r3\n\t" + "str r7, [%[r],#4]\n\t" + "mov r2, r8\n\t" + "sbc r2, r3\n\t" + "str r2, [%[r],#8]\n\t" + "mov r2, r9\n\t" + "sbc r2, r5\n\t" + "str r2, [%[r],#12]\n\t" + "mov r2, r10\n\t" + "sbc r2, r5\n\t" + "str r2, [%[r],#16]\n\t" + "mov r2, r11\n\t" + "sbc r2, r5\n\t" + "str r2, [%[r],#20]\n\t" + "mov r2, r12\n\t" + "sbc r2, r4\n\t" + "str r2, [%[r],#24]\n\t" + "mov r2, r14\n\t" + "sbc r2, r3\n\t" + "str r2, [%[r],#28]\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + ); +} + +/* Subtract two Montgomery form numbers (r = a - b % m). + * + * r Result of subtration. + * a Number to subtract from in Montogmery form. + * b Number to subtract with in Montogmery form. + * m Modulus (prime). + */ +SP_NOINLINE static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m) +{ + (void)m; + + __asm__ __volatile__ ( + "ldr r4, [%[a],#0]\n\t" + "ldr r5, [%[a],#4]\n\t" + "ldr r6, [%[b],#0]\n\t" + "ldr r7, [%[b],#4]\n\t" + "sub r4, r6\n\t" + "sbc r5, r7\n\t" + "str r4, [%[r],#0]\n\t" + "str r5, [%[r],#4]\n\t" + "ldr r4, [%[a],#8]\n\t" + "ldr r5, [%[a],#12]\n\t" + "ldr r6, [%[b],#8]\n\t" + "ldr r7, [%[b],#12]\n\t" + "sbc r4, r6\n\t" + "sbc r5, r7\n\t" + "str r4, [%[r],#8]\n\t" + "str r5, [%[r],#12]\n\t" + "ldr r4, [%[a],#16]\n\t" + "ldr r5, [%[a],#20]\n\t" + "ldr r6, [%[b],#16]\n\t" + "ldr r7, [%[b],#20]\n\t" + "sbc r4, r6\n\t" + "sbc r5, r7\n\t" + "mov r8, r4\n\t" + "mov r9, r5\n\t" + "ldr r4, [%[a],#24]\n\t" + "ldr r5, [%[a],#28]\n\t" + "ldr r6, [%[b],#24]\n\t" + "ldr r7, [%[b],#28]\n\t" + "sbc r4, r6\n\t" + "sbc r5, r7\n\t" + "mov r10, r4\n\t" + "mov r11, r5\n\t" + "sbc r3, r3\n\t" + "lsr r7, r3, #31\n\t" + "mov r6, #0\n\t" + "ldr r4, [%[r],#0]\n\t" + "ldr r5, [%[r],#4]\n\t" + "add r4, r3\n\t" + "adc r5, r3\n\t" + "str r4, [%[r],#0]\n\t" + "str r5, [%[r],#4]\n\t" + "ldr r4, [%[r],#8]\n\t" + "ldr r5, [%[r],#12]\n\t" + "adc r4, r3\n\t" + "adc r5, r6\n\t" + "str r4, [%[r],#8]\n\t" + "str r5, [%[r],#12]\n\t" + "mov r4, r8\n\t" + "mov r5, r9\n\t" + "adc r4, r6\n\t" + "adc r5, r6\n\t" + "str r4, [%[r],#16]\n\t" + "str r5, [%[r],#20]\n\t" + "mov r4, r10\n\t" + "mov r5, r11\n\t" + "adc r4, r7\n\t" + "adc r5, r3\n\t" + "str r4, [%[r],#24]\n\t" + "str r5, [%[r],#28]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + ); +} + +/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) + * + * r Result of division by 2. + * a Number to divide. + * m Modulus (prime). + */ +SP_NOINLINE static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + __asm__ __volatile__ ( + "ldr r7, [%[a], #0]\n\t" + "lsl r7, r7, #31\n\t" + "lsr r7, r7, #31\n\t" + "mov r5, #0\n\t" + "sub r5, r7\n\t" + "mov r7, #0\n\t" + "lsl r6, r5, #31\n\t" + "lsr r6, r6, #31\n\t" + "ldr r3, [%[a], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "add r3, r5\n\t" + "adc r4, r5\n\t" + "str r3, [%[r], #0]\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r3, [%[a], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "adc r3, r5\n\t" + "adc r4, r7\n\t" + "str r3, [%[r], #8]\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r3, [%[a], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "adc r3, r7\n\t" + "adc r4, r7\n\t" + "str r3, [%[r], #16]\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r3, [%[a], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "adc r3, r6\n\t" + "adc r4, r5\n\t" + "adc r7, r7\n\t" + "lsl r7, r7, #31\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, #31\n\t" + "lsr r6, r4, #1\n\t" + "lsl r4, r4, #31\n\t" + "orr r5, r4\n\t" + "orr r6, r7\n\t" + "mov r7, r3\n\t" + "str r5, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" + "ldr r3, [%[a], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, #31\n\t" + "lsr r6, r4, #1\n\t" + "lsl r4, r4, #31\n\t" + "orr r5, r4\n\t" + "orr r6, r7\n\t" + "mov r7, r3\n\t" + "str r5, [%[r], #16]\n\t" + "str r6, [%[r], #20]\n\t" + "ldr r3, [%[a], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, #31\n\t" + "lsr r6, r4, #1\n\t" + "lsl r4, r4, #31\n\t" + "orr r5, r4\n\t" + "orr r6, r7\n\t" + "mov r7, r3\n\t" + "str r5, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" + "ldr r3, [%[r], #0]\n\t" + "ldr r4, [%[r], #4]\n\t" + "lsr r5, r3, #1\n\t" + "lsr r6, r4, #1\n\t" + "lsl r4, r4, #31\n\t" + "orr r5, r4\n\t" + "orr r6, r7\n\t" + "str r5, [%[r], #0]\n\t" + "str r6, [%[r], #4]\n\t" + : + : [r] "r" (r), [a] "r" (a), [m] "r" (m) + : "memory", "r3", "r4", "r5", "r6", "r7" + ); +} + +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static void sp_256_proj_point_dbl_8(sp_point_256* r, const sp_point_256* p, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*8; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_256_mont_sqr_8(t1, p->z, p256_mod, p256_mp_mod); + /* Z = Y * Z */ + sp_256_mont_mul_8(z, p->y, p->z, p256_mod, p256_mp_mod); + /* Z = 2Z */ + sp_256_mont_dbl_8(z, z, p256_mod); + /* T2 = X - T1 */ + sp_256_mont_sub_8(t2, p->x, t1, p256_mod); + /* T1 = X + T1 */ + sp_256_mont_add_8(t1, p->x, t1, p256_mod); + /* T2 = T1 * T2 */ + sp_256_mont_mul_8(t2, t1, t2, p256_mod, p256_mp_mod); + /* T1 = 3T2 */ + sp_256_mont_tpl_8(t1, t2, p256_mod); + /* Y = 2Y */ + sp_256_mont_dbl_8(y, p->y, p256_mod); + /* Y = Y * Y */ + sp_256_mont_sqr_8(y, y, p256_mod, p256_mp_mod); + /* T2 = Y * Y */ + sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod); + /* T2 = T2/2 */ + sp_256_div2_8(t2, t2, p256_mod); + /* Y = Y * X */ + sp_256_mont_mul_8(y, y, p->x, p256_mod, p256_mp_mod); + /* X = T1 * T1 */ + sp_256_mont_sqr_8(x, t1, p256_mod, p256_mp_mod); + /* X = X - Y */ + sp_256_mont_sub_8(x, x, y, p256_mod); + /* X = X - Y */ + sp_256_mont_sub_8(x, x, y, p256_mod); + /* Y = Y - X */ + sp_256_mont_sub_8(y, y, x, p256_mod); + /* Y = Y * T1 */ + sp_256_mont_mul_8(y, y, t1, p256_mod, p256_mp_mod); + /* Y = Y - T2 */ + sp_256_mont_sub_8(y, y, t2, p256_mod); +} + +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r6, %[a]\n\t" + "add r6, #32\n\t" + "\n1:\n\t" + "mov r5, #0\n\t" + "sub r5, %[c]\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" + "sbc r4, r5\n\t" + "str r4, [%[r]]\n\t" + "sbc %[c], %[c]\n\t" + "add %[a], #4\n\t" + "add %[b], #4\n\t" + "add %[r], #4\n\t" + "cmp %[a], r6\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6" + ); + + return c; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[b], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" + "sub r4, r6\n\t" + "sbc r5, r7\n\t" + "str r4, [%[r], #0]\n\t" + "str r5, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[a], #12]\n\t" + "ldr r6, [%[b], #8]\n\t" + "ldr r7, [%[b], #12]\n\t" + "sbc r4, r6\n\t" + "sbc r5, r7\n\t" + "str r4, [%[r], #8]\n\t" + "str r5, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" + "ldr r6, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" + "sbc r4, r6\n\t" + "sbc r5, r7\n\t" + "str r4, [%[r], #16]\n\t" + "str r5, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[a], #28]\n\t" + "ldr r6, [%[b], #24]\n\t" + "ldr r7, [%[b], #28]\n\t" + "sbc r4, r6\n\t" + "sbc r5, r7\n\t" + "str r4, [%[r], #24]\n\t" + "str r5, [%[r], #28]\n\t" + "sbc %[c], %[c]\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Compare two numbers to determine if they are equal. + * Constant time implementation. + * + * a First number to compare. + * b Second number to compare. + * returns 1 when equal and 0 otherwise. + */ +static int sp_256_cmp_equal_8(const sp_digit* a, const sp_digit* b) +{ + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) | + (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6]) | (a[7] ^ b[7])) == 0; +} + +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_256_proj_point_add_8(sp_point_256* r, const sp_point_256* p, const sp_point_256* q, + sp_digit* t) +{ + const sp_point_256* ap[2]; + sp_point_256* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*8; + sp_digit* t3 = t + 4*8; + sp_digit* t4 = t + 6*8; + sp_digit* t5 = t + 8*8; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* Ensure only the first point is the same as the result. */ + if (q == r) { + const sp_point_256* a = p; + p = q; + q = a; + } + + /* Check double */ + (void)sp_256_sub_8(t1, p256_mod, q->y); + sp_256_norm_8(t1); + if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & + (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) { + sp_256_proj_point_dbl_8(r, p, t); + } + else { + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point_256)); + x = rp[p->infinity | q->infinity]->x; + y = rp[p->infinity | q->infinity]->y; + z = rp[p->infinity | q->infinity]->z; + + ap[0] = p; + ap[1] = q; + for (i=0; i<8; i++) { + r->x[i] = ap[p->infinity]->x[i]; + } + for (i=0; i<8; i++) { + r->y[i] = ap[p->infinity]->y[i]; + } + for (i=0; i<8; i++) { + r->z[i] = ap[p->infinity]->z[i]; + } + r->infinity = ap[p->infinity]->infinity; + + /* U1 = X1*Z2^2 */ + sp_256_mont_sqr_8(t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t3, t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t1, t1, x, p256_mod, p256_mp_mod); + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_8(t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t4, t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_8(t3, t3, y, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod); + /* H = U2 - U1 */ + sp_256_mont_sub_8(t2, t2, t1, p256_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_8(t4, t4, t3, p256_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_8(z, z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(z, z, t2, p256_mod, p256_mp_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_8(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(x, x, t5, p256_mod); + sp_256_mont_dbl_8(t1, y, p256_mod); + sp_256_mont_sub_8(x, x, t1, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_8(y, y, x, p256_mod); + sp_256_mont_mul_8(y, y, t4, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t5, t5, t3, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(y, y, t5, p256_mod); + } +} + +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_fast_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k, + int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 td[16]; + sp_point_256 rtd; + sp_digit tmpd[2 * 8 * 5]; +#endif + sp_point_256* t; + sp_point_256* rt; + sp_digit* tmp; + sp_digit n; + int i; + int c, y; + int err; + + (void)heap; + + err = sp_256_point_new_8(heap, rtd, rt); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 16, heap, DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; +#else + t = td; + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + /* t[0] = {0, 0, 1} * norm */ + XMEMSET(&t[0], 0, sizeof(t[0])); + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + (void)sp_256_mod_mul_norm_8(t[1].x, g->x, p256_mod); + (void)sp_256_mod_mul_norm_8(t[1].y, g->y, p256_mod); + (void)sp_256_mod_mul_norm_8(t[1].z, g->z, p256_mod); + t[1].infinity = 0; + sp_256_proj_point_dbl_8(&t[ 2], &t[ 1], tmp); + t[ 2].infinity = 0; + sp_256_proj_point_add_8(&t[ 3], &t[ 2], &t[ 1], tmp); + t[ 3].infinity = 0; + sp_256_proj_point_dbl_8(&t[ 4], &t[ 2], tmp); + t[ 4].infinity = 0; + sp_256_proj_point_add_8(&t[ 5], &t[ 3], &t[ 2], tmp); + t[ 5].infinity = 0; + sp_256_proj_point_dbl_8(&t[ 6], &t[ 3], tmp); + t[ 6].infinity = 0; + sp_256_proj_point_add_8(&t[ 7], &t[ 4], &t[ 3], tmp); + t[ 7].infinity = 0; + sp_256_proj_point_dbl_8(&t[ 8], &t[ 4], tmp); + t[ 8].infinity = 0; + sp_256_proj_point_add_8(&t[ 9], &t[ 5], &t[ 4], tmp); + t[ 9].infinity = 0; + sp_256_proj_point_dbl_8(&t[10], &t[ 5], tmp); + t[10].infinity = 0; + sp_256_proj_point_add_8(&t[11], &t[ 6], &t[ 5], tmp); + t[11].infinity = 0; + sp_256_proj_point_dbl_8(&t[12], &t[ 6], tmp); + t[12].infinity = 0; + sp_256_proj_point_add_8(&t[13], &t[ 7], &t[ 6], tmp); + t[13].infinity = 0; + sp_256_proj_point_dbl_8(&t[14], &t[ 7], tmp); + t[14].infinity = 0; + sp_256_proj_point_add_8(&t[15], &t[ 8], &t[ 7], tmp); + t[15].infinity = 0; + + i = 6; + n = k[i+1] << 0; + c = 28; + y = n >> 28; + XMEMCPY(rt, &t[y], sizeof(sp_point_256)); + n <<= 4; + for (; i>=0 || c>=4; ) { + if (c < 4) { + n |= k[i--]; + c += 32; + } + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + + sp_256_proj_point_dbl_8(rt, rt, tmp); + sp_256_proj_point_dbl_8(rt, rt, tmp); + sp_256_proj_point_dbl_8(rt, rt, tmp); + sp_256_proj_point_dbl_8(rt, rt, tmp); + + sp_256_proj_point_add_8(rt, rt, &t[y], tmp); + } + + if (map != 0) { + sp_256_map_8(r, rt, tmp); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_256)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 8 * 5); + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); + } + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_point_256) * 16); + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#else + ForceZero(tmpd, sizeof(tmpd)); + ForceZero(td, sizeof(td)); +#endif + sp_256_point_free_8(rt, 1, heap); + + return err; +} + +/* A table entry for pre-computed points. */ +typedef struct sp_table_entry_256 { + sp_digit x[8]; + sp_digit y[8]; +} sp_table_entry_256; + +#ifdef FP_ECC +/* Double the Montgomery form projective point p a number of times. + * + * r Result of repeated doubling of point. + * p Point to double. + * n Number of times to double + * t Temporary ordinate data. + */ +static void sp_256_proj_point_dbl_n_8(sp_point_256* p, int n, sp_digit* t) +{ + sp_digit* w = t; + sp_digit* a = t + 2*8; + sp_digit* b = t + 4*8; + sp_digit* t1 = t + 6*8; + sp_digit* t2 = t + 8*8; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = p->x; + y = p->y; + z = p->z; + + /* Y = 2*Y */ + sp_256_mont_dbl_8(y, y, p256_mod); + /* W = Z^4 */ + sp_256_mont_sqr_8(w, z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_8(w, w, p256_mod, p256_mp_mod); + +#ifndef WOLFSSL_SP_SMALL + while (--n > 0) +#else + while (--n >= 0) +#endif + { + /* A = 3*(X^2 - W) */ + sp_256_mont_sqr_8(t1, x, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(t1, t1, w, p256_mod); + sp_256_mont_tpl_8(a, t1, p256_mod); + /* B = X*Y^2 */ + sp_256_mont_sqr_8(t1, y, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(b, t1, x, p256_mod, p256_mp_mod); + /* X = A^2 - 2B */ + sp_256_mont_sqr_8(x, a, p256_mod, p256_mp_mod); + sp_256_mont_dbl_8(t2, b, p256_mod); + sp_256_mont_sub_8(x, x, t2, p256_mod); + /* Z = Z*Y */ + sp_256_mont_mul_8(z, z, y, p256_mod, p256_mp_mod); + /* t2 = Y^4 */ + sp_256_mont_sqr_8(t1, t1, p256_mod, p256_mp_mod); +#ifdef WOLFSSL_SP_SMALL + if (n != 0) +#endif + { + /* W = W*Y^4 */ + sp_256_mont_mul_8(w, w, t1, p256_mod, p256_mp_mod); + } + /* y = 2*A*(B - X) - Y^4 */ + sp_256_mont_sub_8(y, b, x, p256_mod); + sp_256_mont_mul_8(y, y, a, p256_mod, p256_mp_mod); + sp_256_mont_dbl_8(y, y, p256_mod); + sp_256_mont_sub_8(y, y, t1, p256_mod); + } +#ifndef WOLFSSL_SP_SMALL + /* A = 3*(X^2 - W) */ + sp_256_mont_sqr_8(t1, x, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(t1, t1, w, p256_mod); + sp_256_mont_tpl_8(a, t1, p256_mod); + /* B = X*Y^2 */ + sp_256_mont_sqr_8(t1, y, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(b, t1, x, p256_mod, p256_mp_mod); + /* X = A^2 - 2B */ + sp_256_mont_sqr_8(x, a, p256_mod, p256_mp_mod); + sp_256_mont_dbl_8(t2, b, p256_mod); + sp_256_mont_sub_8(x, x, t2, p256_mod); + /* Z = Z*Y */ + sp_256_mont_mul_8(z, z, y, p256_mod, p256_mp_mod); + /* t2 = Y^4 */ + sp_256_mont_sqr_8(t1, t1, p256_mod, p256_mp_mod); + /* y = 2*A*(B - X) - Y^4 */ + sp_256_mont_sub_8(y, b, x, p256_mod); + sp_256_mont_mul_8(y, y, a, p256_mod, p256_mp_mod); + sp_256_mont_dbl_8(y, y, p256_mod); + sp_256_mont_sub_8(y, y, t1, p256_mod); +#endif + /* Y = Y/2 */ + sp_256_div2_8(y, y, p256_mod); +} + +#endif /* FP_ECC */ +/* Add two Montgomery form projective points. The second point has a q value of + * one. + * Only the first point can be the same pointer as the result point. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_256_proj_point_add_qz1_8(sp_point_256* r, const sp_point_256* p, + const sp_point_256* q, sp_digit* t) +{ + const sp_point_256* ap[2]; + sp_point_256* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*8; + sp_digit* t3 = t + 4*8; + sp_digit* t4 = t + 6*8; + sp_digit* t5 = t + 8*8; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* Check double */ + (void)sp_256_sub_8(t1, p256_mod, q->y); + sp_256_norm_8(t1); + if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & + (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) { + sp_256_proj_point_dbl_8(r, p, t); + } + else { + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point_256)); + x = rp[p->infinity | q->infinity]->x; + y = rp[p->infinity | q->infinity]->y; + z = rp[p->infinity | q->infinity]->z; + + ap[0] = p; + ap[1] = q; + for (i=0; i<8; i++) { + r->x[i] = ap[p->infinity]->x[i]; + } + for (i=0; i<8; i++) { + r->y[i] = ap[p->infinity]->y[i]; + } + for (i=0; i<8; i++) { + r->z[i] = ap[p->infinity]->z[i]; + } + r->infinity = ap[p->infinity]->infinity; + + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_8(t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t4, t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod); + /* H = U2 - X1 */ + sp_256_mont_sub_8(t2, t2, x, p256_mod); + /* R = S2 - Y1 */ + sp_256_mont_sub_8(t4, t4, y, p256_mod); + /* Z3 = H*Z1 */ + sp_256_mont_mul_8(z, z, t2, p256_mod, p256_mp_mod); + /* X3 = R^2 - H^3 - 2*X1*H^2 */ + sp_256_mont_sqr_8(t1, t4, p256_mod, p256_mp_mod); + sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t3, x, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(x, t1, t5, p256_mod); + sp_256_mont_dbl_8(t1, t3, p256_mod); + sp_256_mont_sub_8(x, x, t1, p256_mod); + /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */ + sp_256_mont_sub_8(t3, t3, x, p256_mod); + sp_256_mont_mul_8(t3, t3, t4, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t5, t5, y, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(y, t3, t5, p256_mod); + } +} + +#ifdef WOLFSSL_SP_SMALL +#ifdef FP_ECC +/* Convert the projective point to affine. + * Ordinates are in Montgomery form. + * + * a Point to convert. + * t Temporary data. + */ +static void sp_256_proj_to_affine_8(sp_point_256* a, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2 * 8; + sp_digit* tmp = t + 4 * 8; + + sp_256_mont_inv_8(t1, a->z, tmp); + + sp_256_mont_sqr_8(t2, t1, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t1, t2, t1, p256_mod, p256_mp_mod); + + sp_256_mont_mul_8(a->x, a->x, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(a->y, a->y, t1, p256_mod, p256_mp_mod); + XMEMCPY(a->z, p256_norm_mod, sizeof(p256_norm_mod)); +} + +/* Generate the pre-computed table of points for the base point. + * + * a The base point. + * table Place to store generated point data. + * tmp Temporary data. + * heap Heap to use for allocation. + */ +static int sp_256_gen_stripe_table_8(const sp_point_256* a, + sp_table_entry_256* table, sp_digit* tmp, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 td, s1d, s2d; +#endif + sp_point_256* t; + sp_point_256* s1 = NULL; + sp_point_256* s2 = NULL; + int i, j; + int err; + + (void)heap; + + err = sp_256_point_new_8(heap, td, t); + if (err == MP_OKAY) { + err = sp_256_point_new_8(heap, s1d, s1); + } + if (err == MP_OKAY) { + err = sp_256_point_new_8(heap, s2d, s2); + } + + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_8(t->x, a->x, p256_mod); + } + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_8(t->y, a->y, p256_mod); + } + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_8(t->z, a->z, p256_mod); + } + if (err == MP_OKAY) { + t->infinity = 0; + sp_256_proj_to_affine_8(t, tmp); + + XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod)); + s1->infinity = 0; + XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod)); + s2->infinity = 0; + + /* table[0] = {0, 0, infinity} */ + XMEMSET(&table[0], 0, sizeof(sp_table_entry_256)); + /* table[1] = Affine version of 'a' in Montgomery form */ + XMEMCPY(table[1].x, t->x, sizeof(table->x)); + XMEMCPY(table[1].y, t->y, sizeof(table->y)); + + for (i=1; i<4; i++) { + sp_256_proj_point_dbl_n_8(t, 64, tmp); + sp_256_proj_to_affine_8(t, tmp); + XMEMCPY(table[1<x, sizeof(table->x)); + XMEMCPY(table[1<y, sizeof(table->y)); + } + + for (i=1; i<4; i++) { + XMEMCPY(s1->x, table[1<x)); + XMEMCPY(s1->y, table[1<y)); + for (j=(1<x, table[j-(1<x)); + XMEMCPY(s2->y, table[j-(1<y)); + sp_256_proj_point_add_qz1_8(t, s1, s2, tmp); + sp_256_proj_to_affine_8(t, tmp); + XMEMCPY(table[j].x, t->x, sizeof(table->x)); + XMEMCPY(table[j].y, t->y, sizeof(table->y)); + } + } + } + + sp_256_point_free_8(s2, 0, heap); + sp_256_point_free_8(s1, 0, heap); + sp_256_point_free_8( t, 0, heap); + + return err; +} + +#endif /* FP_ECC */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_stripe_8(sp_point_256* r, const sp_point_256* g, + const sp_table_entry_256* table, const sp_digit* k, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 rtd; + sp_point_256 pd; + sp_digit td[2 * 8 * 5]; +#endif + sp_point_256* rt; + sp_point_256* p = NULL; + sp_digit* t; + int i, j; + int y, x; + int err; + + (void)g; + (void)heap; + + + err = sp_256_point_new_8(heap, rtd, rt); + if (err == MP_OKAY) { + err = sp_256_point_new_8(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) { + err = MEMORY_E; + } +#else + t = td; +#endif + + if (err == MP_OKAY) { + XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod)); + XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod)); + + y = 0; + for (j=0,x=63; j<4; j++,x+=64) { + y |= ((k[x / 32] >> (x % 32)) & 1) << j; + } + XMEMCPY(rt->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(rt->y, table[y].y, sizeof(table[y].y)); + rt->infinity = !y; + for (i=62; i>=0; i--) { + y = 0; + for (j=0,x=i; j<4; j++,x+=64) { + y |= ((k[x / 32] >> (x % 32)) & 1) << j; + } + + sp_256_proj_point_dbl_8(rt, rt, t); + XMEMCPY(p->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(p->y, table[y].y, sizeof(table[y].y)); + p->infinity = !y; + sp_256_proj_point_add_qz1_8(rt, rt, p, t); + } + + if (map != 0) { + sp_256_map_8(r, rt, t); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_256)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_8(p, 0, heap); + sp_256_point_free_8(rt, 0, heap); + + return err; +} + +#ifdef FP_ECC +#ifndef FP_ENTRIES + #define FP_ENTRIES 16 +#endif + +typedef struct sp_cache_256_t { + sp_digit x[8]; + sp_digit y[8]; + sp_table_entry_256 table[16]; + uint32_t cnt; + int set; +} sp_cache_256_t; + +static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES]; +static THREAD_LS_T int sp_cache_256_last = -1; +static THREAD_LS_T int sp_cache_256_inited = 0; + +#ifndef HAVE_THREAD_LS + static volatile int initCacheMutex_256 = 0; + static wolfSSL_Mutex sp_cache_256_lock; +#endif + +static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache) +{ + int i, j; + uint32_t least; + + if (sp_cache_256_inited == 0) { + for (i=0; ix, sp_cache_256[i].x) & + sp_256_cmp_equal_8(g->y, sp_cache_256[i].y)) { + sp_cache_256[i].cnt++; + break; + } + } + + /* No match. */ + if (i == FP_ENTRIES) { + /* Find empty entry. */ + i = (sp_cache_256_last + 1) % FP_ENTRIES; + for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) { + if (!sp_cache_256[i].set) { + break; + } + } + + /* Evict least used. */ + if (i == sp_cache_256_last) { + least = sp_cache_256[0].cnt; + for (j=1; jx, sizeof(sp_cache_256[i].x)); + XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y)); + sp_cache_256[i].set = 1; + sp_cache_256[i].cnt = 1; + } + + *cache = &sp_cache_256[i]; + sp_cache_256_last = i; +} +#endif /* FP_ECC */ + +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k, + int map, void* heap) +{ +#ifndef FP_ECC + return sp_256_ecc_mulmod_fast_8(r, g, k, map, heap); +#else + sp_digit tmp[2 * 8 * 5]; + sp_cache_256_t* cache; + int err = MP_OKAY; + +#ifndef HAVE_THREAD_LS + if (initCacheMutex_256 == 0) { + wc_InitMutex(&sp_cache_256_lock); + initCacheMutex_256 = 1; + } + if (wc_LockMutex(&sp_cache_256_lock) != 0) + err = BAD_MUTEX_E; +#endif /* HAVE_THREAD_LS */ + + if (err == MP_OKAY) { + sp_ecc_get_cache_256(g, &cache); + if (cache->cnt == 2) + sp_256_gen_stripe_table_8(g, cache->table, tmp, heap); + +#ifndef HAVE_THREAD_LS + wc_UnLockMutex(&sp_cache_256_lock); +#endif /* HAVE_THREAD_LS */ + + if (cache->cnt < 2) { + err = sp_256_ecc_mulmod_fast_8(r, g, k, map, heap); + } + else { + err = sp_256_ecc_mulmod_stripe_8(r, g, cache->table, k, + map, heap); + } + } + + return err; +#endif +} + +#else +#ifdef FP_ECC +/* Generate the pre-computed table of points for the base point. + * + * a The base point. + * table Place to store generated point data. + * tmp Temporary data. + * heap Heap to use for allocation. + */ +static int sp_256_gen_stripe_table_8(const sp_point_256* a, + sp_table_entry_256* table, sp_digit* tmp, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 td, s1d, s2d; +#endif + sp_point_256* t; + sp_point_256* s1 = NULL; + sp_point_256* s2 = NULL; + int i, j; + int err; + + (void)heap; + + err = sp_256_point_new_8(heap, td, t); + if (err == MP_OKAY) { + err = sp_256_point_new_8(heap, s1d, s1); + } + if (err == MP_OKAY) { + err = sp_256_point_new_8(heap, s2d, s2); + } + + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_8(t->x, a->x, p256_mod); + } + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_8(t->y, a->y, p256_mod); + } + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_8(t->z, a->z, p256_mod); + } + if (err == MP_OKAY) { + t->infinity = 0; + sp_256_proj_to_affine_8(t, tmp); + + XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod)); + s1->infinity = 0; + XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod)); + s2->infinity = 0; + + /* table[0] = {0, 0, infinity} */ + XMEMSET(&table[0], 0, sizeof(sp_table_entry_256)); + /* table[1] = Affine version of 'a' in Montgomery form */ + XMEMCPY(table[1].x, t->x, sizeof(table->x)); + XMEMCPY(table[1].y, t->y, sizeof(table->y)); + + for (i=1; i<8; i++) { + sp_256_proj_point_dbl_n_8(t, 32, tmp); + sp_256_proj_to_affine_8(t, tmp); + XMEMCPY(table[1<x, sizeof(table->x)); + XMEMCPY(table[1<y, sizeof(table->y)); + } + + for (i=1; i<8; i++) { + XMEMCPY(s1->x, table[1<x)); + XMEMCPY(s1->y, table[1<y)); + for (j=(1<x, table[j-(1<x)); + XMEMCPY(s2->y, table[j-(1<y)); + sp_256_proj_point_add_qz1_8(t, s1, s2, tmp); + sp_256_proj_to_affine_8(t, tmp); + XMEMCPY(table[j].x, t->x, sizeof(table->x)); + XMEMCPY(table[j].y, t->y, sizeof(table->y)); + } + } + } + + sp_256_point_free_8(s2, 0, heap); + sp_256_point_free_8(s1, 0, heap); + sp_256_point_free_8( t, 0, heap); + + return err; +} + +#endif /* FP_ECC */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_stripe_8(sp_point_256* r, const sp_point_256* g, + const sp_table_entry_256* table, const sp_digit* k, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 rtd; + sp_point_256 pd; + sp_digit td[2 * 8 * 5]; +#endif + sp_point_256* rt; + sp_point_256* p = NULL; + sp_digit* t; + int i, j; + int y, x; + int err; + + (void)g; + (void)heap; + + + err = sp_256_point_new_8(heap, rtd, rt); + if (err == MP_OKAY) { + err = sp_256_point_new_8(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) { + err = MEMORY_E; + } +#else + t = td; +#endif + + if (err == MP_OKAY) { + XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod)); + XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod)); + + y = 0; + for (j=0,x=31; j<8; j++,x+=32) { + y |= ((k[x / 32] >> (x % 32)) & 1) << j; + } + XMEMCPY(rt->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(rt->y, table[y].y, sizeof(table[y].y)); + rt->infinity = !y; + for (i=30; i>=0; i--) { + y = 0; + for (j=0,x=i; j<8; j++,x+=32) { + y |= ((k[x / 32] >> (x % 32)) & 1) << j; + } + + sp_256_proj_point_dbl_8(rt, rt, t); + XMEMCPY(p->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(p->y, table[y].y, sizeof(table[y].y)); + p->infinity = !y; + sp_256_proj_point_add_qz1_8(rt, rt, p, t); + } + + if (map != 0) { + sp_256_map_8(r, rt, t); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_256)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_8(p, 0, heap); + sp_256_point_free_8(rt, 0, heap); + + return err; +} + +#ifdef FP_ECC +#ifndef FP_ENTRIES + #define FP_ENTRIES 16 +#endif + +typedef struct sp_cache_256_t { + sp_digit x[8]; + sp_digit y[8]; + sp_table_entry_256 table[256]; + uint32_t cnt; + int set; +} sp_cache_256_t; + +static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES]; +static THREAD_LS_T int sp_cache_256_last = -1; +static THREAD_LS_T int sp_cache_256_inited = 0; + +#ifndef HAVE_THREAD_LS + static volatile int initCacheMutex_256 = 0; + static wolfSSL_Mutex sp_cache_256_lock; +#endif + +static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache) +{ + int i, j; + uint32_t least; + + if (sp_cache_256_inited == 0) { + for (i=0; ix, sp_cache_256[i].x) & + sp_256_cmp_equal_8(g->y, sp_cache_256[i].y)) { + sp_cache_256[i].cnt++; + break; + } + } + + /* No match. */ + if (i == FP_ENTRIES) { + /* Find empty entry. */ + i = (sp_cache_256_last + 1) % FP_ENTRIES; + for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) { + if (!sp_cache_256[i].set) { + break; + } + } + + /* Evict least used. */ + if (i == sp_cache_256_last) { + least = sp_cache_256[0].cnt; + for (j=1; jx, sizeof(sp_cache_256[i].x)); + XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y)); + sp_cache_256[i].set = 1; + sp_cache_256[i].cnt = 1; + } + + *cache = &sp_cache_256[i]; + sp_cache_256_last = i; +} +#endif /* FP_ECC */ + +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k, + int map, void* heap) +{ +#ifndef FP_ECC + return sp_256_ecc_mulmod_fast_8(r, g, k, map, heap); +#else + sp_digit tmp[2 * 8 * 5]; + sp_cache_256_t* cache; + int err = MP_OKAY; + +#ifndef HAVE_THREAD_LS + if (initCacheMutex_256 == 0) { + wc_InitMutex(&sp_cache_256_lock); + initCacheMutex_256 = 1; + } + if (wc_LockMutex(&sp_cache_256_lock) != 0) + err = BAD_MUTEX_E; +#endif /* HAVE_THREAD_LS */ + + if (err == MP_OKAY) { + sp_ecc_get_cache_256(g, &cache); + if (cache->cnt == 2) + sp_256_gen_stripe_table_8(g, cache->table, tmp, heap); + +#ifndef HAVE_THREAD_LS + wc_UnLockMutex(&sp_cache_256_lock); +#endif /* HAVE_THREAD_LS */ + + if (cache->cnt < 2) { + err = sp_256_ecc_mulmod_fast_8(r, g, k, map, heap); + } + else { + err = sp_256_ecc_mulmod_stripe_8(r, g, cache->table, k, + map, heap); + } + } + + return err; +#endif +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * p Point to multiply. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map, + void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 p; + sp_digit kd[8]; +#endif + sp_point_256* point; + sp_digit* k = NULL; + int err = MP_OKAY; + + err = sp_256_point_new_8(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#else + k = kd; +#endif + if (err == MP_OKAY) { + sp_256_from_mp(k, 8, km); + sp_256_point_from_ecc_point_8(point, gm); + + err = sp_256_ecc_mulmod_8(point, point, k, map, heap); + } + if (err == MP_OKAY) { + err = sp_256_point_to_ecc_point_8(point, r); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_8(point, 0, heap); + + return err; +} + +#ifdef WOLFSSL_SP_SMALL +static const sp_table_entry_256 p256_table[16] = { + /* 0 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 */ + { { 0x18a9143c,0x79e730d4,0x5fedb601,0x75ba95fc,0x77622510,0x79fb732b, + 0xa53755c6,0x18905f76 }, + { 0xce95560a,0xddf25357,0xba19e45c,0x8b4ab8e4,0xdd21f325,0xd2e88688, + 0x25885d85,0x8571ff18 } }, + /* 2 */ + { { 0x16a0d2bb,0x4f922fc5,0x1a623499,0x0d5cc16c,0x57c62c8b,0x9241cf3a, + 0xfd1b667f,0x2f5e6961 }, + { 0xf5a01797,0x5c15c70b,0x60956192,0x3d20b44d,0x071fdb52,0x04911b37, + 0x8d6f0f7b,0xf648f916 } }, + /* 3 */ + { { 0xe137bbbc,0x9e566847,0x8a6a0bec,0xe434469e,0x79d73463,0xb1c42761, + 0x133d0015,0x5abe0285 }, + { 0xc04c7dab,0x92aa837c,0x43260c07,0x573d9f4c,0x78e6cc37,0x0c931562, + 0x6b6f7383,0x94bb725b } }, + /* 4 */ + { { 0xbfe20925,0x62a8c244,0x8fdce867,0x91c19ac3,0xdd387063,0x5a96a5d5, + 0x21d324f6,0x61d587d4 }, + { 0xa37173ea,0xe87673a2,0x53778b65,0x23848008,0x05bab43e,0x10f8441e, + 0x4621efbe,0xfa11fe12 } }, + /* 5 */ + { { 0x2cb19ffd,0x1c891f2b,0xb1923c23,0x01ba8d5b,0x8ac5ca8e,0xb6d03d67, + 0x1f13bedc,0x586eb04c }, + { 0x27e8ed09,0x0c35c6e5,0x1819ede2,0x1e81a33c,0x56c652fa,0x278fd6c0, + 0x70864f11,0x19d5ac08 } }, + /* 6 */ + { { 0xd2b533d5,0x62577734,0xa1bdddc0,0x673b8af6,0xa79ec293,0x577e7c9a, + 0xc3b266b1,0xbb6de651 }, + { 0xb65259b3,0xe7e9303a,0xd03a7480,0xd6a0afd3,0x9b3cfc27,0xc5ac83d1, + 0x5d18b99b,0x60b4619a } }, + /* 7 */ + { { 0x1ae5aa1c,0xbd6a38e1,0x49e73658,0xb8b7652b,0xee5f87ed,0x0b130014, + 0xaeebffcd,0x9d0f27b2 }, + { 0x7a730a55,0xca924631,0xddbbc83a,0x9c955b2f,0xac019a71,0x07c1dfe0, + 0x356ec48d,0x244a566d } }, + /* 8 */ + { { 0xf4f8b16a,0x56f8410e,0xc47b266a,0x97241afe,0x6d9c87c1,0x0a406b8e, + 0xcd42ab1b,0x803f3e02 }, + { 0x04dbec69,0x7f0309a8,0x3bbad05f,0xa83b85f7,0xad8e197f,0xc6097273, + 0x5067adc1,0xc097440e } }, + /* 9 */ + { { 0xc379ab34,0x846a56f2,0x841df8d1,0xa8ee068b,0x176c68ef,0x20314459, + 0x915f1f30,0xf1af32d5 }, + { 0x5d75bd50,0x99c37531,0xf72f67bc,0x837cffba,0x48d7723f,0x0613a418, + 0xe2d41c8b,0x23d0f130 } }, + /* 10 */ + { { 0xd5be5a2b,0xed93e225,0x5934f3c6,0x6fe79983,0x22626ffc,0x43140926, + 0x7990216a,0x50bbb4d9 }, + { 0xe57ec63e,0x378191c6,0x181dcdb2,0x65422c40,0x0236e0f6,0x41a8099b, + 0x01fe49c3,0x2b100118 } }, + /* 11 */ + { { 0x9b391593,0xfc68b5c5,0x598270fc,0xc385f5a2,0xd19adcbb,0x7144f3aa, + 0x83fbae0c,0xdd558999 }, + { 0x74b82ff4,0x93b88b8e,0x71e734c9,0xd2e03c40,0x43c0322a,0x9a7a9eaf, + 0x149d6041,0xe6e4c551 } }, + /* 12 */ + { { 0x80ec21fe,0x5fe14bfe,0xc255be82,0xf6ce116a,0x2f4a5d67,0x98bc5a07, + 0xdb7e63af,0xfad27148 }, + { 0x29ab05b3,0x90c0b6ac,0x4e251ae6,0x37a9a83c,0xc2aade7d,0x0a7dc875, + 0x9f0e1a84,0x77387de3 } }, + /* 13 */ + { { 0xa56c0dd7,0x1e9ecc49,0x46086c74,0xa5cffcd8,0xf505aece,0x8f7a1408, + 0xbef0c47e,0xb37b85c0 }, + { 0xcc0e6a8f,0x3596b6e4,0x6b388f23,0xfd6d4bbf,0xc39cef4e,0xaba453fa, + 0xf9f628d5,0x9c135ac8 } }, + /* 14 */ + { { 0x95c8f8be,0x0a1c7294,0x3bf362bf,0x2961c480,0xdf63d4ac,0x9e418403, + 0x91ece900,0xc109f9cb }, + { 0x58945705,0xc2d095d0,0xddeb85c0,0xb9083d96,0x7a40449b,0x84692b8d, + 0x2eee1ee1,0x9bc3344f } }, + /* 15 */ + { { 0x42913074,0x0d5ae356,0x48a542b1,0x55491b27,0xb310732a,0x469ca665, + 0x5f1a4cc1,0x29591d52 }, + { 0xb84f983f,0xe76f5b6b,0x9f5f84e1,0xbe7eef41,0x80baa189,0x1200d496, + 0x18ef332c,0x6376551f } }, +}; + +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_base_8(sp_point_256* r, const sp_digit* k, + int map, void* heap) +{ + return sp_256_ecc_mulmod_stripe_8(r, &p256_base, p256_table, + k, map, heap); +} + +#else +static const sp_table_entry_256 p256_table[256] = { + /* 0 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 */ + { { 0x18a9143c,0x79e730d4,0x5fedb601,0x75ba95fc,0x77622510,0x79fb732b, + 0xa53755c6,0x18905f76 }, + { 0xce95560a,0xddf25357,0xba19e45c,0x8b4ab8e4,0xdd21f325,0xd2e88688, + 0x25885d85,0x8571ff18 } }, + /* 2 */ + { { 0x4147519a,0x20288602,0x26b372f0,0xd0981eac,0xa785ebc8,0xa9d4a7ca, + 0xdbdf58e9,0xd953c50d }, + { 0xfd590f8f,0x9d6361cc,0x44e6c917,0x72e9626b,0x22eb64cf,0x7fd96110, + 0x9eb288f3,0x863ebb7e } }, + /* 3 */ + { { 0x5cdb6485,0x7856b623,0x2f0a2f97,0x808f0ea2,0x4f7e300b,0x3e68d954, + 0xb5ff80a0,0x00076055 }, + { 0x838d2010,0x7634eb9b,0x3243708a,0x54014fbb,0x842a6606,0xe0e47d39, + 0x34373ee0,0x83087761 } }, + /* 4 */ + { { 0x16a0d2bb,0x4f922fc5,0x1a623499,0x0d5cc16c,0x57c62c8b,0x9241cf3a, + 0xfd1b667f,0x2f5e6961 }, + { 0xf5a01797,0x5c15c70b,0x60956192,0x3d20b44d,0x071fdb52,0x04911b37, + 0x8d6f0f7b,0xf648f916 } }, + /* 5 */ + { { 0xe137bbbc,0x9e566847,0x8a6a0bec,0xe434469e,0x79d73463,0xb1c42761, + 0x133d0015,0x5abe0285 }, + { 0xc04c7dab,0x92aa837c,0x43260c07,0x573d9f4c,0x78e6cc37,0x0c931562, + 0x6b6f7383,0x94bb725b } }, + /* 6 */ + { { 0x720f141c,0xbbf9b48f,0x2df5bc74,0x6199b3cd,0x411045c4,0xdc3f6129, + 0x2f7dc4ef,0xcdd6bbcb }, + { 0xeaf436fd,0xcca6700b,0xb99326be,0x6f647f6d,0x014f2522,0x0c0fa792, + 0x4bdae5f6,0xa361bebd } }, + /* 7 */ + { { 0x597c13c7,0x28aa2558,0x50b7c3e1,0xc38d635f,0xf3c09d1d,0x07039aec, + 0xc4b5292c,0xba12ca09 }, + { 0x59f91dfd,0x9e408fa4,0xceea07fb,0x3af43b66,0x9d780b29,0x1eceb089, + 0x701fef4b,0x53ebb99d } }, + /* 8 */ + { { 0xb0e63d34,0x4fe7ee31,0xa9e54fab,0xf4600572,0xd5e7b5a4,0xc0493334, + 0x06d54831,0x8589fb92 }, + { 0x6583553a,0xaa70f5cc,0xe25649e5,0x0879094a,0x10044652,0xcc904507, + 0x02541c4f,0xebb0696d } }, + /* 9 */ + { { 0xac1647c5,0x4616ca15,0xc4cf5799,0xb8127d47,0x764dfbac,0xdc666aa3, + 0xd1b27da3,0xeb2820cb }, + { 0x6a87e008,0x9406f8d8,0x922378f3,0xd87dfa9d,0x80ccecb2,0x56ed2e42, + 0x55a7da1d,0x1f28289b } }, + /* 10 */ + { { 0x3b89da99,0xabbaa0c0,0xb8284022,0xa6f2d79e,0xb81c05e8,0x27847862, + 0x05e54d63,0x337a4b59 }, + { 0x21f7794a,0x3c67500d,0x7d6d7f61,0x207005b7,0x04cfd6e8,0x0a5a3781, + 0xf4c2fbd6,0x0d65e0d5 } }, + /* 11 */ + { { 0xb5275d38,0xd9d09bbe,0x0be0a358,0x4268a745,0x973eb265,0xf0762ff4, + 0x52f4a232,0xc23da242 }, + { 0x0b94520c,0x5da1b84f,0xb05bd78e,0x09666763,0x94d29ea1,0x3a4dcb86, + 0xc790cff1,0x19de3b8c } }, + /* 12 */ + { { 0x26c5fe04,0x183a716c,0x3bba1bdb,0x3b28de0b,0xa4cb712c,0x7432c586, + 0x91fccbfd,0xe34dcbd4 }, + { 0xaaa58403,0xb408d46b,0x82e97a53,0x9a697486,0x36aaa8af,0x9e390127, + 0x7b4e0f7f,0xe7641f44 } }, + /* 13 */ + { { 0xdf64ba59,0x7d753941,0x0b0242fc,0xd33f10ec,0xa1581859,0x4f06dfc6, + 0x052a57bf,0x4a12df57 }, + { 0x9439dbd0,0xbfa6338f,0xbde53e1f,0xd3c24bd4,0x21f1b314,0xfd5e4ffa, + 0xbb5bea46,0x6af5aa93 } }, + /* 14 */ + { { 0x10c91999,0xda10b699,0x2a580491,0x0a24b440,0xb8cc2090,0x3e0094b4, + 0x66a44013,0x5fe3475a }, + { 0xf93e7b4b,0xb0f8cabd,0x7c23f91a,0x292b501a,0xcd1e6263,0x42e889ae, + 0xecfea916,0xb544e308 } }, + /* 15 */ + { { 0x16ddfdce,0x6478c6e9,0xf89179e6,0x2c329166,0x4d4e67e1,0x4e8d6e76, + 0xa6b0c20b,0xe0b6b2bd }, + { 0xbb7efb57,0x0d312df2,0x790c4007,0x1aac0dde,0x679bc944,0xf90336ad, + 0x25a63774,0x71c023de } }, + /* 16 */ + { { 0xbfe20925,0x62a8c244,0x8fdce867,0x91c19ac3,0xdd387063,0x5a96a5d5, + 0x21d324f6,0x61d587d4 }, + { 0xa37173ea,0xe87673a2,0x53778b65,0x23848008,0x05bab43e,0x10f8441e, + 0x4621efbe,0xfa11fe12 } }, + /* 17 */ + { { 0x2cb19ffd,0x1c891f2b,0xb1923c23,0x01ba8d5b,0x8ac5ca8e,0xb6d03d67, + 0x1f13bedc,0x586eb04c }, + { 0x27e8ed09,0x0c35c6e5,0x1819ede2,0x1e81a33c,0x56c652fa,0x278fd6c0, + 0x70864f11,0x19d5ac08 } }, + /* 18 */ + { { 0x309a4e1f,0x1e99f581,0xe9270074,0xab7de71b,0xefd28d20,0x26a5ef0b, + 0x7f9c563f,0xe7c0073f }, + { 0x0ef59f76,0x1f6d663a,0x20fcb050,0x669b3b54,0x7a6602d4,0xc08c1f7a, + 0xc65b3c0a,0xe08504fe } }, + /* 19 */ + { { 0xa031b3ca,0xf098f68d,0xe6da6d66,0x6d1cab9e,0x94f246e8,0x5bfd81fa, + 0x5b0996b4,0x78f01882 }, + { 0x3a25787f,0xb7eefde4,0x1dccac9b,0x8016f80d,0xb35bfc36,0x0cea4877, + 0x7e94747a,0x43a773b8 } }, + /* 20 */ + { { 0xd2b533d5,0x62577734,0xa1bdddc0,0x673b8af6,0xa79ec293,0x577e7c9a, + 0xc3b266b1,0xbb6de651 }, + { 0xb65259b3,0xe7e9303a,0xd03a7480,0xd6a0afd3,0x9b3cfc27,0xc5ac83d1, + 0x5d18b99b,0x60b4619a } }, + /* 21 */ + { { 0x1ae5aa1c,0xbd6a38e1,0x49e73658,0xb8b7652b,0xee5f87ed,0x0b130014, + 0xaeebffcd,0x9d0f27b2 }, + { 0x7a730a55,0xca924631,0xddbbc83a,0x9c955b2f,0xac019a71,0x07c1dfe0, + 0x356ec48d,0x244a566d } }, + /* 22 */ + { { 0xeacf1f96,0x6db0394a,0x024c271c,0x9f2122a9,0x82cbd3b9,0x2626ac1b, + 0x3581ef69,0x45e58c87 }, + { 0xa38f9dbc,0xd3ff479d,0xe888a040,0xa8aaf146,0x46e0bed7,0x945adfb2, + 0xc1e4b7a4,0xc040e21c } }, + /* 23 */ + { { 0x6f8117b6,0x847af000,0x73a35433,0x651969ff,0x1d9475eb,0x482b3576, + 0x682c6ec7,0x1cdf5c97 }, + { 0x11f04839,0x7db775b4,0x48de1698,0x7dbeacf4,0xb70b3219,0xb2921dd1, + 0xa92dff3d,0x046755f8 } }, + /* 24 */ + { { 0xbce8ffcd,0xcc8ac5d2,0x2fe61a82,0x0d53c48b,0x7202d6c7,0xf6f16172, + 0x3b83a5f3,0x046e5e11 }, + { 0xd8007f01,0xe7b8ff64,0x5af43183,0x7fb1ef12,0x35e1a03c,0x045c5ea6, + 0x303d005b,0x6e0106c3 } }, + /* 25 */ + { { 0x88dd73b1,0x48c73584,0x995ed0d9,0x7670708f,0xc56a2ab7,0x38385ea8, + 0xe901cf1f,0x442594ed }, + { 0x12d4b65b,0xf8faa2c9,0x96c90c37,0x94c2343b,0x5e978d1f,0xd326e4a1, + 0x4c2ee68e,0xa796fa51 } }, + /* 26 */ + { { 0x823addd7,0x359fb604,0xe56693b3,0x9e2a6183,0x3cbf3c80,0xf885b78e, + 0xc69766e9,0xe4ad2da9 }, + { 0x8e048a61,0x357f7f42,0xc092d9a0,0x082d198c,0xc03ed8ef,0xfc3a1af4, + 0xc37b5143,0xc5e94046 } }, + /* 27 */ + { { 0x2be75f9e,0x476a538c,0xcb123a78,0x6fd1a9e8,0xb109c04b,0xd85e4df0, + 0xdb464747,0x63283daf }, + { 0xbaf2df15,0xce728cf7,0x0ad9a7f4,0xe592c455,0xe834bcc3,0xfab226ad, + 0x1981a938,0x68bd19ab } }, + /* 28 */ + { { 0x1887d659,0xc08ead51,0xb359305a,0x3374d5f4,0xcfe74fe3,0x96986981, + 0x3c6fdfd6,0x495292f5 }, + { 0x1acec896,0x4a878c9e,0xec5b4484,0xd964b210,0x664d60a7,0x6696f7e2, + 0x26036837,0x0ec7530d } }, + /* 29 */ + { { 0xad2687bb,0x2da13a05,0xf32e21fa,0xa1f83b6a,0x1dd4607b,0x390f5ef5, + 0x64863f0b,0x0f6207a6 }, + { 0x0f138233,0xbd67e3bb,0x272aa718,0xdd66b96c,0x26ec88ae,0x8ed00407, + 0x08ed6dcf,0xff0db072 } }, + /* 30 */ + { { 0x4c95d553,0x749fa101,0x5d680a8a,0xa44052fd,0xff3b566f,0x183b4317, + 0x88740ea3,0x313b513c }, + { 0x08d11549,0xb402e2ac,0xb4dee21c,0x071ee10b,0x47f2320e,0x26b987dd, + 0x86f19f81,0x2d3abcf9 } }, + /* 31 */ + { { 0x815581a2,0x4c288501,0x632211af,0x9a0a6d56,0x0cab2e99,0x19ba7a0f, + 0xded98cdf,0xc036fa10 }, + { 0xc1fbd009,0x29ae08ba,0x06d15816,0x0b68b190,0x9b9e0d8f,0xc2eb3277, + 0xb6d40194,0xa6b2a2c4 } }, + /* 32 */ + { { 0x6d3549cf,0xd433e50f,0xfacd665e,0x6f33696f,0xce11fcb4,0x695bfdac, + 0xaf7c9860,0x810ee252 }, + { 0x7159bb2c,0x65450fe1,0x758b357b,0xf7dfbebe,0xd69fea72,0x2b057e74, + 0x92731745,0xd485717a } }, + /* 33 */ + { { 0xf0cb5a98,0x11741a8a,0x1f3110bf,0xd3da8f93,0xab382adf,0x1994e2cb, + 0x2f9a604e,0x6a6045a7 }, + { 0xa2b2411d,0x170c0d3f,0x510e96e0,0xbe0eb83e,0x8865b3cc,0x3bcc9f73, + 0xf9e15790,0xd3e45cfa } }, + /* 34 */ + { { 0xe83f7669,0xce1f69bb,0x72877d6b,0x09f8ae82,0x3244278d,0x9548ae54, + 0xe3c2c19c,0x207755de }, + { 0x6fef1945,0x87bd61d9,0xb12d28c3,0x18813cef,0x72df64aa,0x9fbcd1d6, + 0x7154b00d,0x48dc5ee5 } }, + /* 35 */ + { { 0xf7e5a199,0x123790bf,0x989ccbb7,0xe0efb8cf,0x0a519c79,0xc27a2bfe, + 0xdff6f445,0xf2fb0aed }, + { 0xf0b5025f,0x41c09575,0x40fa9f22,0x550543d7,0x380bfbd0,0x8fa3c8ad, + 0xdb28d525,0xa13e9015 } }, + /* 36 */ + { { 0xa2b65cbc,0xf9f7a350,0x2a464226,0x0b04b972,0xe23f07a1,0x265ce241, + 0x1497526f,0x2bf0d6b0 }, + { 0x4b216fb7,0xd3d4dd3f,0xfbdda26a,0xf7d7b867,0x6708505c,0xaeb7b83f, + 0x162fe89f,0x42a94a5a } }, + /* 37 */ + { { 0xeaadf191,0x5846ad0b,0x25a268d7,0x0f8a4890,0x494dc1f6,0xe8603050, + 0xc65ede3d,0x2c2dd969 }, + { 0x93849c17,0x6d02171d,0x1da250dd,0x460488ba,0x3c3a5485,0x4810c706, + 0x42c56dbc,0xf437fa1f } }, + /* 38 */ + { { 0x4a0f7dab,0x6aa0d714,0x1776e9ac,0x0f049793,0xf5f39786,0x52c0a050, + 0x54707aa8,0xaaf45b33 }, + { 0xc18d364a,0x85e37c33,0x3e497165,0xd40b9b06,0x15ec5444,0xf4171681, + 0xf4f272bc,0xcdf6310d } }, + /* 39 */ + { { 0x8ea8b7ef,0x7473c623,0x85bc2287,0x08e93518,0x2bda8e34,0x41956772, + 0xda9e2ff2,0xf0d008ba }, + { 0x2414d3b1,0x2912671d,0xb019ea76,0xb3754985,0x453bcbdb,0x5c61b96d, + 0xca887b8b,0x5bd5c2f5 } }, + /* 40 */ + { { 0xf49a3154,0xef0f469e,0x6e2b2e9a,0x3e85a595,0xaa924a9c,0x45aaec1e, + 0xa09e4719,0xaa12dfc8 }, + { 0x4df69f1d,0x26f27227,0xa2ff5e73,0xe0e4c82c,0xb7a9dd44,0xb9d8ce73, + 0xe48ca901,0x6c036e73 } }, + /* 41 */ + { { 0x0f6e3138,0x5cfae12a,0x25ad345a,0x6966ef00,0x45672bc5,0x8993c64b, + 0x96afbe24,0x292ff658 }, + { 0x5e213402,0xd5250d44,0x4392c9fe,0xf6580e27,0xda1c72e8,0x097b397f, + 0x311b7276,0x644e0c90 } }, + /* 42 */ + { { 0xa47153f0,0xe1e421e1,0x920418c9,0xb86c3b79,0x705d7672,0x93bdce87, + 0xcab79a77,0xf25ae793 }, + { 0x6d869d0c,0x1f3194a3,0x4986c264,0x9d55c882,0x096e945e,0x49fb5ea3, + 0x13db0a3e,0x39b8e653 } }, + /* 43 */ + { { 0xb6fd2e59,0x37754200,0x9255c98f,0x35e2c066,0x0e2a5739,0xd9dab21a, + 0x0f19db06,0x39122f2f }, + { 0x03cad53c,0xcfbce1e0,0xe65c17e3,0x225b2c0f,0x9aa13877,0x72baf1d2, + 0xce80ff8d,0x8de80af8 } }, + /* 44 */ + { { 0x207bbb76,0xafbea8d9,0x21782758,0x921c7e7c,0x1c0436b1,0xdfa2b74b, + 0x2e368c04,0x87194906 }, + { 0xa3993df5,0xb5f928bb,0xf3b3d26a,0x639d75b5,0x85b55050,0x011aa78a, + 0x5b74fde1,0xfc315e6a } }, + /* 45 */ + { { 0xe8d6ecfa,0x561fd41a,0x1aec7f86,0x5f8c44f6,0x4924741d,0x98452a7b, + 0xee389088,0xe6d4a7ad }, + { 0x4593c75d,0x60552ed1,0xdd271162,0x70a70da4,0x7ba2c7db,0xd2aede93, + 0x9be2ae57,0x35dfaf9a } }, + /* 46 */ + { { 0xaa736636,0x6b956fcd,0xae2cab7e,0x09f51d97,0x0f349966,0xfb10bf41, + 0x1c830d2b,0x1da5c7d7 }, + { 0x3cce6825,0x5c41e483,0xf9573c3b,0x15ad118f,0xf23036b8,0xa28552c7, + 0xdbf4b9d6,0x7077c0fd } }, + /* 47 */ + { { 0x46b9661c,0xbf63ff8d,0x0d2cfd71,0xa1dfd36b,0xa847f8f7,0x0373e140, + 0xe50efe44,0x53a8632e }, + { 0x696d8051,0x0976ff68,0xc74f468a,0xdaec0c95,0x5e4e26bd,0x62994dc3, + 0x34e1fcc1,0x028ca76d } }, + /* 48 */ + { { 0xfc9877ee,0xd11d47dc,0x801d0002,0xc8b36210,0x54c260b6,0xd002c117, + 0x6962f046,0x04c17cd8 }, + { 0xb0daddf5,0x6d9bd094,0x24ce55c0,0xbea23575,0x72da03b5,0x663356e6, + 0xfed97474,0xf7ba4de9 } }, + /* 49 */ + { { 0xebe1263f,0xd0dbfa34,0x71ae7ce6,0x55763735,0x82a6f523,0xd2440553, + 0x52131c41,0xe31f9600 }, + { 0xea6b6ec6,0xd1bb9216,0x73c2fc44,0x37a1d12e,0x89d0a294,0xc10e7eac, + 0xce34d47b,0xaa3a6259 } }, + /* 50 */ + { { 0x36f3dcd3,0xfbcf9df5,0xd2bf7360,0x6ceded50,0xdf504f5b,0x491710fa, + 0x7e79daee,0x2398dd62 }, + { 0x6d09569e,0xcf4705a3,0x5149f769,0xea0619bb,0x35f6034c,0xff9c0377, + 0x1c046210,0x5717f5b2 } }, + /* 51 */ + { { 0x21dd895e,0x9fe229c9,0x40c28451,0x8e518500,0x1d637ecd,0xfa13d239, + 0x0e3c28de,0x660a2c56 }, + { 0xd67fcbd0,0x9cca88ae,0x0ea9f096,0xc8472478,0x72e92b4d,0x32b2f481, + 0x4f522453,0x624ee54c } }, + /* 52 */ + { { 0xd897eccc,0x09549ce4,0x3f9880aa,0x4d49d1d9,0x043a7c20,0x723c2423, + 0x92bdfbc0,0x4f392afb }, + { 0x7de44fd9,0x6969f8fa,0x57b32156,0xb66cfbe4,0x368ebc3c,0xdb2fa803, + 0xccdb399c,0x8a3e7977 } }, + /* 53 */ + { { 0x06c4b125,0xdde1881f,0xf6e3ca8c,0xae34e300,0x5c7a13e9,0xef6999de, + 0x70c24404,0x3888d023 }, + { 0x44f91081,0x76280356,0x5f015504,0x3d9fcf61,0x632cd36e,0x1827edc8, + 0x18102336,0xa5e62e47 } }, + /* 54 */ + { { 0x2facd6c8,0x1a825ee3,0x54bcbc66,0x699c6354,0x98df9931,0x0ce3edf7, + 0x466a5adc,0x2c4768e6 }, + { 0x90a64bc9,0xb346ff8c,0xe4779f5c,0x630a6020,0xbc05e884,0xd949d064, + 0xf9e652a0,0x7b5e6441 } }, + /* 55 */ + { { 0x1d28444a,0x2169422c,0xbe136a39,0xe996c5d8,0xfb0c7fce,0x2387afe5, + 0x0c8d744a,0xb8af73cb }, + { 0x338b86fd,0x5fde83aa,0xa58a5cff,0xfee3f158,0x20ac9433,0xc9ee8f6f, + 0x7f3f0895,0xa036395f } }, + /* 56 */ + { { 0xa10f7770,0x8c73c6bb,0xa12a0e24,0xa6f16d81,0x51bc2b9f,0x100df682, + 0x875fb533,0x4be36b01 }, + { 0x9fb56dbb,0x9226086e,0x07e7a4f8,0x306fef8b,0x66d52f20,0xeeaccc05, + 0x1bdc00c0,0x8cbc9a87 } }, + /* 57 */ + { { 0xc0dac4ab,0xe131895c,0x712ff112,0xa874a440,0x6a1cee57,0x6332ae7c, + 0x0c0835f8,0x44e7553e }, + { 0x7734002d,0x6d503fff,0x0b34425c,0x9d35cb8b,0x0e8738b5,0x95f70276, + 0x5eb8fc18,0x470a683a } }, + /* 58 */ + { { 0x90513482,0x81b761dc,0x01e9276a,0x0287202a,0x0ce73083,0xcda441ee, + 0xc63dc6ef,0x16410690 }, + { 0x6d06a2ed,0xf5034a06,0x189b100b,0xdd4d7745,0xab8218c9,0xd914ae72, + 0x7abcbb4f,0xd73479fd } }, + /* 59 */ + { { 0x5ad4c6e5,0x7edefb16,0x5b06d04d,0x262cf08f,0x8575cb14,0x12ed5bb1, + 0x0771666b,0x816469e3 }, + { 0x561e291e,0xd7ab9d79,0xc1de1661,0xeb9daf22,0x135e0513,0xf49827eb, + 0xf0dd3f9c,0x0a36dd23 } }, + /* 60 */ + { { 0x41d5533c,0x098d32c7,0x8684628f,0x7c5f5a9e,0xe349bd11,0x39a228ad, + 0xfdbab118,0xe331dfd6 }, + { 0x6bcc6ed8,0x5100ab68,0xef7a260e,0x7160c3bd,0xbce850d7,0x9063d9a7, + 0x492e3389,0xd3b4782a } }, + /* 61 */ + { { 0xf3821f90,0xa149b6e8,0x66eb7aad,0x92edd9ed,0x1a013116,0x0bb66953, + 0x4c86a5bd,0x7281275a }, + { 0xd3ff47e5,0x503858f7,0x61016441,0x5e1616bc,0x7dfd9bb1,0x62b0f11a, + 0xce145059,0x2c062e7e } }, + /* 62 */ + { { 0x0159ac2e,0xa76f996f,0xcbdb2713,0x281e7736,0x08e46047,0x2ad6d288, + 0x2c4e7ef1,0x282a35f9 }, + { 0xc0ce5cd2,0x9c354b1e,0x1379c229,0xcf99efc9,0x3e82c11e,0x992caf38, + 0x554d2abd,0xc71cd513 } }, + /* 63 */ + { { 0x09b578f4,0x4885de9c,0xe3affa7a,0x1884e258,0x59182f1f,0x8f76b1b7, + 0xcf47f3a3,0xc50f6740 }, + { 0x374b68ea,0xa9c4adf3,0x69965fe2,0xa406f323,0x85a53050,0x2f86a222, + 0x212958dc,0xb9ecb3a7 } }, + /* 64 */ + { { 0xf4f8b16a,0x56f8410e,0xc47b266a,0x97241afe,0x6d9c87c1,0x0a406b8e, + 0xcd42ab1b,0x803f3e02 }, + { 0x04dbec69,0x7f0309a8,0x3bbad05f,0xa83b85f7,0xad8e197f,0xc6097273, + 0x5067adc1,0xc097440e } }, + /* 65 */ + { { 0xc379ab34,0x846a56f2,0x841df8d1,0xa8ee068b,0x176c68ef,0x20314459, + 0x915f1f30,0xf1af32d5 }, + { 0x5d75bd50,0x99c37531,0xf72f67bc,0x837cffba,0x48d7723f,0x0613a418, + 0xe2d41c8b,0x23d0f130 } }, + /* 66 */ + { { 0xf41500d9,0x857ab6ed,0xfcbeada8,0x0d890ae5,0x89725951,0x52fe8648, + 0xc0a3fadd,0xb0288dd6 }, + { 0x650bcb08,0x85320f30,0x695d6e16,0x71af6313,0xb989aa76,0x31f520a7, + 0xf408c8d2,0xffd3724f } }, + /* 67 */ + { { 0xb458e6cb,0x53968e64,0x317a5d28,0x992dad20,0x7aa75f56,0x3814ae0b, + 0xd78c26df,0xf5590f4a }, + { 0xcf0ba55a,0x0fc24bd3,0x0c778bae,0x0fc4724a,0x683b674a,0x1ce9864f, + 0xf6f74a20,0x18d6da54 } }, + /* 68 */ + { { 0xd5be5a2b,0xed93e225,0x5934f3c6,0x6fe79983,0x22626ffc,0x43140926, + 0x7990216a,0x50bbb4d9 }, + { 0xe57ec63e,0x378191c6,0x181dcdb2,0x65422c40,0x0236e0f6,0x41a8099b, + 0x01fe49c3,0x2b100118 } }, + /* 69 */ + { { 0x9b391593,0xfc68b5c5,0x598270fc,0xc385f5a2,0xd19adcbb,0x7144f3aa, + 0x83fbae0c,0xdd558999 }, + { 0x74b82ff4,0x93b88b8e,0x71e734c9,0xd2e03c40,0x43c0322a,0x9a7a9eaf, + 0x149d6041,0xe6e4c551 } }, + /* 70 */ + { { 0x1e9af288,0x55f655bb,0xf7ada931,0x647e1a64,0xcb2820e5,0x43697e4b, + 0x07ed56ff,0x51e00db1 }, + { 0x771c327e,0x43d169b8,0x4a96c2ad,0x29cdb20b,0x3deb4779,0xc07d51f5, + 0x49829177,0xe22f4241 } }, + /* 71 */ + { { 0x635f1abb,0xcd45e8f4,0x68538874,0x7edc0cb5,0xb5a8034d,0xc9472c1f, + 0x52dc48c9,0xf709373d }, + { 0xa8af30d6,0x401966bb,0xf137b69c,0x95bf5f4a,0x9361c47e,0x3966162a, + 0xe7275b11,0xbd52d288 } }, + /* 72 */ + { { 0x9c5fa877,0xab155c7a,0x7d3a3d48,0x17dad672,0x73d189d8,0x43f43f9e, + 0xc8aa77a6,0xa0d0f8e4 }, + { 0xcc94f92d,0x0bbeafd8,0x0c4ddb3a,0xd818c8be,0xb82eba14,0x22cc65f8, + 0x946d6a00,0xa56c78c7 } }, + /* 73 */ + { { 0x0dd09529,0x2962391b,0x3daddfcf,0x803e0ea6,0x5b5bf481,0x2c77351f, + 0x731a367a,0xd8befdf8 }, + { 0xfc0157f4,0xab919d42,0xfec8e650,0xf51caed7,0x02d48b0a,0xcdf9cb40, + 0xce9f6478,0x854a68a5 } }, + /* 74 */ + { { 0x63506ea5,0xdc35f67b,0xa4fe0d66,0x9286c489,0xfe95cd4d,0x3f101d3b, + 0x98846a95,0x5cacea0b }, + { 0x9ceac44d,0xa90df60c,0x354d1c3a,0x3db29af4,0xad5dbabe,0x08dd3de8, + 0x35e4efa9,0xe4982d12 } }, + /* 75 */ + { { 0xc34cd55e,0x23104a22,0x2680d132,0x58695bb3,0x1fa1d943,0xfb345afa, + 0x16b20499,0x8046b7f6 }, + { 0x38e7d098,0xb533581e,0xf46f0b70,0xd7f61e8d,0x44cb78c4,0x30dea9ea, + 0x9082af55,0xeb17ca7b } }, + /* 76 */ + { { 0x76a145b9,0x1751b598,0xc1bc71ec,0xa5cf6b0f,0x392715bb,0xd3e03565, + 0xfab5e131,0x097b00ba }, + { 0x565f69e1,0xaa66c8e9,0xb5be5199,0x77e8f75a,0xda4fd984,0x6033ba11, + 0xafdbcc9e,0xf95c747b } }, + /* 77 */ + { { 0xbebae45e,0x558f01d3,0xc4bc6955,0xa8ebe9f0,0xdbc64fc6,0xaeb705b1, + 0x566ed837,0x3512601e }, + { 0xfa1161cd,0x9336f1e1,0x4c65ef87,0x328ab8d5,0x724f21e5,0x4757eee2, + 0x6068ab6b,0x0ef97123 } }, + /* 78 */ + { { 0x54ca4226,0x02598cf7,0xf8642c8e,0x5eede138,0x468e1790,0x48963f74, + 0x3b4fbc95,0xfc16d933 }, + { 0xe7c800ca,0xbe96fb31,0x2678adaa,0x13806331,0x6ff3e8b5,0x3d624497, + 0xb95d7a17,0x14ca4af1 } }, + /* 79 */ + { { 0xbd2f81d5,0x7a4771ba,0x01f7d196,0x1a5f9d69,0xcad9c907,0xd898bef7, + 0xf59c231d,0x4057b063 }, + { 0x89c05c0a,0xbffd82fe,0x1dc0df85,0xe4911c6f,0xa35a16db,0x3befccae, + 0xf1330b13,0x1c3b5d64 } }, + /* 80 */ + { { 0x80ec21fe,0x5fe14bfe,0xc255be82,0xf6ce116a,0x2f4a5d67,0x98bc5a07, + 0xdb7e63af,0xfad27148 }, + { 0x29ab05b3,0x90c0b6ac,0x4e251ae6,0x37a9a83c,0xc2aade7d,0x0a7dc875, + 0x9f0e1a84,0x77387de3 } }, + /* 81 */ + { { 0xa56c0dd7,0x1e9ecc49,0x46086c74,0xa5cffcd8,0xf505aece,0x8f7a1408, + 0xbef0c47e,0xb37b85c0 }, + { 0xcc0e6a8f,0x3596b6e4,0x6b388f23,0xfd6d4bbf,0xc39cef4e,0xaba453fa, + 0xf9f628d5,0x9c135ac8 } }, + /* 82 */ + { { 0x84e35743,0x32aa3202,0x85a3cdef,0x320d6ab1,0x1df19819,0xb821b176, + 0xc433851f,0x5721361f }, + { 0x71fc9168,0x1f0db36a,0x5e5c403c,0x5f98ba73,0x37bcd8f5,0xf64ca87e, + 0xe6bb11bd,0xdcbac3c9 } }, + /* 83 */ + { { 0x4518cbe2,0xf01d9968,0x9c9eb04e,0xd242fc18,0xe47feebf,0x727663c7, + 0x2d626862,0xb8c1c89e }, + { 0xc8e1d569,0x51a58bdd,0xb7d88cd0,0x563809c8,0xf11f31eb,0x26c27fd9, + 0x2f9422d4,0x5d23bbda } }, + /* 84 */ + { { 0x95c8f8be,0x0a1c7294,0x3bf362bf,0x2961c480,0xdf63d4ac,0x9e418403, + 0x91ece900,0xc109f9cb }, + { 0x58945705,0xc2d095d0,0xddeb85c0,0xb9083d96,0x7a40449b,0x84692b8d, + 0x2eee1ee1,0x9bc3344f } }, + /* 85 */ + { { 0x42913074,0x0d5ae356,0x48a542b1,0x55491b27,0xb310732a,0x469ca665, + 0x5f1a4cc1,0x29591d52 }, + { 0xb84f983f,0xe76f5b6b,0x9f5f84e1,0xbe7eef41,0x80baa189,0x1200d496, + 0x18ef332c,0x6376551f } }, + /* 86 */ + { { 0x562976cc,0xbda5f14e,0x0ef12c38,0x22bca3e6,0x6cca9852,0xbbfa3064, + 0x08e2987a,0xbdb79dc8 }, + { 0xcb06a772,0xfd2cb5c9,0xfe536dce,0x38f475aa,0x7c2b5db8,0xc2a3e022, + 0xadd3c14a,0x8ee86001 } }, + /* 87 */ + { { 0xa4ade873,0xcbe96981,0xc4fba48c,0x7ee9aa4d,0x5a054ba5,0x2cee2899, + 0x6f77aa4b,0x92e51d7a }, + { 0x7190a34d,0x948bafa8,0xf6bd1ed1,0xd698f75b,0x0caf1144,0xd00ee6e3, + 0x0a56aaaa,0x5182f86f } }, + /* 88 */ + { { 0x7a4cc99c,0xfba6212c,0x3e6d9ca1,0xff609b68,0x5ac98c5a,0x5dbb27cb, + 0x4073a6f2,0x91dcab5d }, + { 0x5f575a70,0x01b6cc3d,0x6f8d87fa,0x0cb36139,0x89981736,0x165d4e8c, + 0x97974f2b,0x17a0cedb } }, + /* 89 */ + { { 0x076c8d3a,0x38861e2a,0x210f924b,0x701aad39,0x13a835d9,0x94d0eae4, + 0x7f4cdf41,0x2e8ce36c }, + { 0x037a862b,0x91273dab,0x60e4c8fa,0x01ba9bb7,0x33baf2dd,0xf9645388, + 0x34f668f3,0xf4ccc6cb } }, + /* 90 */ + { { 0xf1f79687,0x44ef525c,0x92efa815,0x7c595495,0xa5c78d29,0xe1231741, + 0x9a0df3c9,0xac0db488 }, + { 0xdf01747f,0x86bfc711,0xef17df13,0x592b9358,0x5ccb6bb5,0xe5880e4f, + 0x94c974a2,0x95a64a61 } }, + /* 91 */ + { { 0xc15a4c93,0x72c1efda,0x82585141,0x40269b73,0x16cb0bad,0x6a8dfb1c, + 0x29210677,0x231e54ba }, + { 0x8ae6d2dc,0xa70df917,0x39112918,0x4d6aa63f,0x5e5b7223,0xf627726b, + 0xd8a731e1,0xab0be032 } }, + /* 92 */ + { { 0x8d131f2d,0x097ad0e9,0x3b04f101,0x637f09e3,0xd5e9a748,0x1ac86196, + 0x2cf6a679,0xf1bcc880 }, + { 0xe8daacb4,0x25c69140,0x60f65009,0x3c4e4055,0x477937a6,0x591cc8fc, + 0x5aebb271,0x85169469 } }, + /* 93 */ + { { 0xf1dcf593,0xde35c143,0xb018be3b,0x78202b29,0x9bdd9d3d,0xe9cdadc2, + 0xdaad55d8,0x8f67d9d2 }, + { 0x7481ea5f,0x84111656,0xe34c590c,0xe7d2dde9,0x05053fa8,0xffdd43f4, + 0xc0728b5d,0xf84572b9 } }, + /* 94 */ + { { 0x97af71c9,0x5e1a7a71,0x7a736565,0xa1449444,0x0e1d5063,0xa1b4ae07, + 0x616b2c19,0xedee2710 }, + { 0x11734121,0xb2f034f5,0x4a25e9f0,0x1cac6e55,0xa40c2ecf,0x8dc148f3, + 0x44ebd7f4,0x9fd27e9b } }, + /* 95 */ + { { 0xf6e2cb16,0x3cc7658a,0xfe5919b6,0xe3eb7d2c,0x168d5583,0x5a8c5816, + 0x958ff387,0xa40c2fb6 }, + { 0xfedcc158,0x8c9ec560,0x55f23056,0x7ad804c6,0x9a307e12,0xd9396704, + 0x7dc6decf,0x99bc9bb8 } }, + /* 96 */ + { { 0x927dafc6,0x84a9521d,0x5c09cd19,0x52c1fb69,0xf9366dde,0x9d9581a0, + 0xa16d7e64,0x9abe210b }, + { 0x48915220,0x480af84a,0x4dd816c6,0xfa73176a,0x1681ca5a,0xc7d53987, + 0x87f344b0,0x7881c257 } }, + /* 97 */ + { { 0xe0bcf3ff,0x93399b51,0x127f74f6,0x0d02cbc5,0xdd01d968,0x8fb465a2, + 0xa30e8940,0x15e6e319 }, + { 0x3e0e05f4,0x646d6e0d,0x43588404,0xfad7bddc,0xc4f850d3,0xbe61c7d1, + 0x191172ce,0x0e55facf } }, + /* 98 */ + { { 0xf8787564,0x7e9d9806,0x31e85ce6,0x1a331721,0xb819e8d6,0x6b0158ca, + 0x6fe96577,0xd73d0976 }, + { 0x1eb7206e,0x42483425,0xc618bb42,0xa519290f,0x5e30a520,0x5dcbb859, + 0x8f15a50b,0x9250a374 } }, + /* 99 */ + { { 0xbe577410,0xcaff08f8,0x5077a8c6,0xfd408a03,0xec0a63a4,0xf1f63289, + 0xc1cc8c0b,0x77414082 }, + { 0xeb0991cd,0x05a40fa6,0x49fdc296,0xc1ca0866,0xb324fd40,0x3a68a3c7, + 0x12eb20b9,0x8cb04f4d } }, + /* 100 */ + { { 0x6906171c,0xb1c2d055,0xb0240c3f,0x9073e9cd,0xd8906841,0xdb8e6b4f, + 0x47123b51,0xe4e429ef }, + { 0x38ec36f4,0x0b8dd53c,0xff4b6a27,0xf9d2dc01,0x879a9a48,0x5d066e07, + 0x3c6e6552,0x37bca2ff } }, + /* 101 */ + { { 0xdf562470,0x4cd2e3c7,0xc0964ac9,0x44f272a2,0x80c793be,0x7c6d5df9, + 0x3002b22a,0x59913edc }, + { 0x5750592a,0x7a139a83,0xe783de02,0x99e01d80,0xea05d64f,0xcf8c0375, + 0xb013e226,0x43786e4a } }, + /* 102 */ + { { 0x9e56b5a6,0xff32b0ed,0xd9fc68f9,0x0750d9a6,0x597846a7,0xec15e845, + 0xb7e79e7a,0x8638ca98 }, + { 0x0afc24b2,0x2f5ae096,0x4dace8f2,0x05398eaf,0xaecba78f,0x3b765dd0, + 0x7b3aa6f0,0x1ecdd36a } }, + /* 103 */ + { { 0x6c5ff2f3,0x5d3acd62,0x2873a978,0xa2d516c0,0xd2110d54,0xad94c9fa, + 0xd459f32d,0xd85d0f85 }, + { 0x10b11da3,0x9f700b8d,0xa78318c4,0xd2c22c30,0x9208decd,0x556988f4, + 0xb4ed3c62,0xa04f19c3 } }, + /* 104 */ + { { 0xed7f93bd,0x087924c8,0x392f51f6,0xcb64ac5d,0x821b71af,0x7cae330a, + 0x5c0950b0,0x92b2eeea }, + { 0x85b6e235,0x85ac4c94,0x2936c0f0,0xab2ca4a9,0xe0508891,0x80faa6b3, + 0x5834276c,0x1ee78221 } }, + /* 105 */ + { { 0xe63e79f7,0xa60a2e00,0xf399d906,0xf590e7b2,0x6607c09d,0x9021054a, + 0x57a6e150,0xf3f2ced8 }, + { 0xf10d9b55,0x200510f3,0xd8642648,0x9d2fcfac,0xe8bd0e7c,0xe5631aa7, + 0x3da3e210,0x0f56a454 } }, + /* 106 */ + { { 0x1043e0df,0x5b21bffa,0x9c007e6d,0x6c74b6cc,0xd4a8517a,0x1a656ec0, + 0x1969e263,0xbd8f1741 }, + { 0xbeb7494a,0x8a9bbb86,0x45f3b838,0x1567d46f,0xa4e5a79a,0xdf7a12a7, + 0x30ccfa09,0x2d1a1c35 } }, + /* 107 */ + { { 0x506508da,0x192e3813,0xa1d795a7,0x336180c4,0x7a9944b3,0xcddb5949, + 0xb91fba46,0xa107a65e }, + { 0x0f94d639,0xe6d1d1c5,0x8a58b7d7,0x8b4af375,0xbd37ca1c,0x1a7c5584, + 0xf87a9af2,0x183d760a } }, + /* 108 */ + { { 0x0dde59a4,0x29d69711,0x0e8bef87,0xf1ad8d07,0x4f2ebe78,0x229b4963, + 0xc269d754,0x1d44179d }, + { 0x8390d30e,0xb32dc0cf,0x0de8110c,0x0a3b2753,0x2bc0339a,0x31af1dc5, + 0x9606d262,0x771f9cc2 } }, + /* 109 */ + { { 0x85040739,0x99993e77,0x8026a939,0x44539db9,0xf5f8fc26,0xcf40f6f2, + 0x0362718e,0x64427a31 }, + { 0x85428aa8,0x4f4f2d87,0xebfb49a8,0x7b7adc3f,0xf23d01ac,0x201b2c6d, + 0x6ae90d6d,0x49d9b749 } }, + /* 110 */ + { { 0x435d1099,0xcc78d8bc,0x8e8d1a08,0x2adbcd4e,0x2cb68a41,0x02c2e2a0, + 0x3f605445,0x9037d81b }, + { 0x074c7b61,0x7cdbac27,0x57bfd72e,0xfe2031ab,0x596d5352,0x61ccec96, + 0x7cc0639c,0x08c3de6a } }, + /* 111 */ + { { 0xf6d552ab,0x20fdd020,0x05cd81f1,0x56baff98,0x91351291,0x06fb7c3e, + 0x45796b2f,0xc6909442 }, + { 0x41231bd1,0x17b3ae9c,0x5cc58205,0x1eac6e87,0xf9d6a122,0x208837ab, + 0xcafe3ac0,0x3fa3db02 } }, + /* 112 */ + { { 0x05058880,0xd75a3e65,0x643943f2,0x7da365ef,0xfab24925,0x4147861c, + 0xfdb808ff,0xc5c4bdb0 }, + { 0xb272b56b,0x73513e34,0x11b9043a,0xc8327e95,0xf8844969,0xfd8ce37d, + 0x46c2b6b5,0x2d56db94 } }, + /* 113 */ + { { 0xff46ac6b,0x2461782f,0x07a2e425,0xd19f7926,0x09a48de1,0xfafea3c4, + 0xe503ba42,0x0f56bd9d }, + { 0x345cda49,0x137d4ed1,0x816f299d,0x821158fc,0xaeb43402,0xe7c6a54a, + 0x1173b5f1,0x4003bb9d } }, + /* 114 */ + { { 0xa0803387,0x3b8e8189,0x39cbd404,0xece115f5,0xd2877f21,0x4297208d, + 0xa07f2f9e,0x53765522 }, + { 0xa8a4182d,0xa4980a21,0x3219df79,0xa2bbd07a,0x1a19a2d4,0x674d0a2e, + 0x6c5d4549,0x7a056f58 } }, + /* 115 */ + { { 0x9d8a2a47,0x646b2558,0xc3df2773,0x5b582948,0xabf0d539,0x51ec000e, + 0x7a1a2675,0x77d482f1 }, + { 0x87853948,0xb8a1bd95,0x6cfbffee,0xa6f817bd,0x80681e47,0xab6ec057, + 0x2b38b0e4,0x4115012b } }, + /* 116 */ + { { 0x6de28ced,0x3c73f0f4,0x9b13ec47,0x1d5da760,0x6e5c6392,0x61b8ce9e, + 0xfbea0946,0xcdf04572 }, + { 0x6c53c3b0,0x1cb3c58b,0x447b843c,0x97fe3c10,0x2cb9780e,0xfb2b8ae1, + 0x97383109,0xee703dda } }, + /* 117 */ + { { 0xff57e43a,0x34515140,0xb1b811b8,0xd44660d3,0x8f42b986,0x2b3b5dff, + 0xa162ce21,0x2a0ad89d }, + { 0x6bc277ba,0x64e4a694,0xc141c276,0xc788c954,0xcabf6274,0x141aa64c, + 0xac2b4659,0xd62d0b67 } }, + /* 118 */ + { { 0x2c054ac4,0x39c5d87b,0xf27df788,0x57005859,0xb18128d6,0xedf7cbf3, + 0x991c2426,0xb39a23f2 }, + { 0xf0b16ae5,0x95284a15,0xa136f51b,0x0c6a05b1,0xf2700783,0x1d63c137, + 0xc0674cc5,0x04ed0092 } }, + /* 119 */ + { { 0x9ae90393,0x1f4185d1,0x4a3d64e6,0x3047b429,0x9854fc14,0xae0001a6, + 0x0177c387,0xa0a91fc1 }, + { 0xae2c831e,0xff0a3f01,0x2b727e16,0xbb76ae82,0x5a3075b4,0x8f12c8a1, + 0x9ed20c41,0x084cf988 } }, + /* 120 */ + { { 0xfca6becf,0xd98509de,0x7dffb328,0x2fceae80,0x4778e8b9,0x5d8a15c4, + 0x73abf77e,0xd57955b2 }, + { 0x31b5d4f1,0x210da79e,0x3cfa7a1c,0xaa52f04b,0xdc27c20b,0xd4d12089, + 0x02d141f1,0x8e14ea42 } }, + /* 121 */ + { { 0xf2897042,0xeed50345,0x43402c4a,0x8d05331f,0xc8bdfb21,0xc8d9c194, + 0x2aa4d158,0x597e1a37 }, + { 0xcf0bd68c,0x0327ec1a,0xab024945,0x6d4be0dc,0xc9fe3e84,0x5b9c8d7a, + 0x199b4dea,0xca3f0236 } }, + /* 122 */ + { { 0x6170bd20,0x592a10b5,0x6d3f5de7,0x0ea897f1,0x44b2ade2,0xa3363ff1, + 0x309c07e4,0xbde7fd7e }, + { 0xb8f5432c,0x516bb6d2,0xe043444b,0x210dc1cb,0xf8f95b5a,0x3db01e6f, + 0x0a7dd198,0xb623ad0e } }, + /* 123 */ + { { 0x60c7b65b,0xa75bd675,0x23a4a289,0xab8c5590,0xd7b26795,0xf8220fd0, + 0x58ec137b,0xd6aa2e46 }, + { 0x5138bb85,0x10abc00b,0xd833a95c,0x8c31d121,0x1702a32e,0xb24ff00b, + 0x2dcc513a,0x111662e0 } }, + /* 124 */ + { { 0xefb42b87,0x78114015,0x1b6c4dff,0xbd9f5d70,0xa7d7c129,0x66ecccd7, + 0x94b750f8,0xdb3ee1cb }, + { 0xf34837cf,0xb26f3db0,0xb9578d4f,0xe7eed18b,0x7c56657d,0x5d2cdf93, + 0x52206a59,0x886a6442 } }, + /* 125 */ + { { 0x65b569ea,0x3c234cfb,0xf72119c1,0x20011141,0xa15a619e,0x8badc85d, + 0x018a17bc,0xa70cf4eb }, + { 0x8c4a6a65,0x224f97ae,0x0134378f,0x36e5cf27,0x4f7e0960,0xbe3a609e, + 0xd1747b77,0xaa4772ab } }, + /* 126 */ + { { 0x7aa60cc0,0x67676131,0x0368115f,0xc7916361,0xbbc1bb5a,0xded98bb4, + 0x30faf974,0x611a6ddc }, + { 0xc15ee47a,0x30e78cbc,0x4e0d96a5,0x2e896282,0x3dd9ed88,0x36f35adf, + 0x16429c88,0x5cfffaf8 } }, + /* 127 */ + { { 0x9b7a99cd,0xc0d54cff,0x843c45a1,0x7bf3b99d,0x62c739e1,0x038a908f, + 0x7dc1994c,0x6e5a6b23 }, + { 0x0ba5db77,0xef8b454e,0xacf60d63,0xb7b8807f,0x76608378,0xe591c0c6, + 0x242dabcc,0x481a238d } }, + /* 128 */ + { { 0x35d0b34a,0xe3417bc0,0x8327c0a7,0x440b386b,0xac0362d1,0x8fb7262d, + 0xe0cdf943,0x2c41114c }, + { 0xad95a0b1,0x2ba5cef1,0x67d54362,0xc09b37a8,0x01e486c9,0x26d6cdd2, + 0x42ff9297,0x20477abf } }, + /* 129 */ + { { 0x18d65dbf,0x2f75173c,0x339edad8,0x77bf940e,0xdcf1001c,0x7022d26b, + 0xc77396b6,0xac66409a }, + { 0xc6261cc3,0x8b0bb36f,0x190e7e90,0x213f7bc9,0xa45e6c10,0x6541ceba, + 0xcc122f85,0xce8e6975 } }, + /* 130 */ + { { 0xbc0a67d2,0x0f121b41,0x444d248a,0x62d4760a,0x659b4737,0x0e044f1d, + 0x250bb4a8,0x08fde365 }, + { 0x848bf287,0xaceec3da,0xd3369d6e,0xc2a62182,0x92449482,0x3582dfdc, + 0x565d6cd7,0x2f7e2fd2 } }, + /* 131 */ + { { 0xc3770fa7,0xae4b92db,0x379043f9,0x095e8d5c,0x17761171,0x54f34e9d, + 0x907702ae,0xc65be92e }, + { 0xf6fd0a40,0x2758a303,0xbcce784b,0xe7d822e3,0x4f9767bf,0x7ae4f585, + 0xd1193b3a,0x4bff8e47 } }, + /* 132 */ + { { 0x00ff1480,0xcd41d21f,0x0754db16,0x2ab8fb7d,0xbbe0f3ea,0xac81d2ef, + 0x5772967d,0x3e4e4ae6 }, + { 0x3c5303e6,0x7e18f36d,0x92262397,0x3bd9994b,0x1324c3c0,0x9ed70e26, + 0x58ec6028,0x5388aefd } }, + /* 133 */ + { { 0x5e5d7713,0xad1317eb,0x75de49da,0x09b985ee,0xc74fb261,0x32f5bc4f, + 0x4f75be0e,0x5cf908d1 }, + { 0x8e657b12,0x76043510,0xb96ed9e6,0xbfd421a5,0x8970ccc2,0x0e29f51f, + 0x60f00ce2,0xa698ba40 } }, + /* 134 */ + { { 0xef748fec,0x73db1686,0x7e9d2cf9,0xe6e755a2,0xce265eff,0x630b6544, + 0x7aebad8d,0xb142ef8a }, + { 0x17d5770a,0xad31af9f,0x2cb3412f,0x66af3b67,0xdf3359de,0x6bd60d1b, + 0x58515075,0xd1896a96 } }, + /* 135 */ + { { 0x33c41c08,0xec5957ab,0x5468e2e1,0x87de94ac,0xac472f6c,0x18816b73, + 0x7981da39,0x267b0e0b }, + { 0x8e62b988,0x6e554e5d,0x116d21e7,0xd8ddc755,0x3d2a6f99,0x4610faf0, + 0xa1119393,0xb54e287a } }, + /* 136 */ + { { 0x178a876b,0x0a0122b5,0x085104b4,0x51ff96ff,0x14f29f76,0x050b31ab, + 0x5f87d4e6,0x84abb28b }, + { 0x8270790a,0xd5ed439f,0x85e3f46b,0x2d6cb59d,0x6c1e2212,0x75f55c1b, + 0x17655640,0xe5436f67 } }, + /* 137 */ + { { 0x2286e8d5,0x53f9025e,0x864453be,0x353c95b4,0xe408e3a0,0xd832f5bd, + 0x5b9ce99e,0x0404f68b }, + { 0xa781e8e5,0xcad33bde,0x163c2f5b,0x3cdf5018,0x0119caa3,0x57576960, + 0x0ac1c701,0x3a4263df } }, + /* 138 */ + { { 0x9aeb596d,0xc2965ecc,0x023c92b4,0x01ea03e7,0x2e013961,0x4704b4b6, + 0x905ea367,0x0ca8fd3f }, + { 0x551b2b61,0x92523a42,0x390fcd06,0x1eb7a89c,0x0392a63e,0xe7f1d2be, + 0x4ddb0c33,0x96dca264 } }, + /* 139 */ + { { 0x387510af,0x203bb43a,0xa9a36a01,0x846feaa8,0x2f950378,0xd23a5770, + 0x3aad59dc,0x4363e212 }, + { 0x40246a47,0xca43a1c7,0xe55dd24d,0xb362b8d2,0x5d8faf96,0xf9b08604, + 0xd8bb98c4,0x840e115c } }, + /* 140 */ + { { 0x1023e8a7,0xf12205e2,0xd8dc7a0b,0xc808a8cd,0x163a5ddf,0xe292a272, + 0x30ded6d4,0x5e0d6abd }, + { 0x7cfc0f64,0x07a721c2,0x0e55ed88,0x42eec01d,0x1d1f9db2,0x26a7bef9, + 0x2945a25a,0x7dea48f4 } }, + /* 141 */ + { { 0xe5060a81,0xabdf6f1c,0xf8f95615,0xe79f9c72,0x06ac268b,0xcfd36c54, + 0xebfd16d1,0xabc2a2be }, + { 0xd3e2eac7,0x8ac66f91,0xd2dd0466,0x6f10ba63,0x0282d31b,0x6790e377, + 0x6c7eefc1,0x4ea35394 } }, + /* 142 */ + { { 0x5266309d,0xed8a2f8d,0x81945a3e,0x0a51c6c0,0x578c5dc1,0xcecaf45a, + 0x1c94ffc3,0x3a76e689 }, + { 0x7d7b0d0f,0x9aace8a4,0x8f584a5f,0x963ace96,0x4e697fbe,0x51a30c72, + 0x465e6464,0x8212a10a } }, + /* 143 */ + { { 0xcfab8caa,0xef7c61c3,0x0e142390,0x18eb8e84,0x7e9733ca,0xcd1dff67, + 0x599cb164,0xaa7cab71 }, + { 0xbc837bd1,0x02fc9273,0xc36af5d7,0xc06407d0,0xf423da49,0x17621292, + 0xfe0617c3,0x40e38073 } }, + /* 144 */ + { { 0xa7bf9b7c,0xf4f80824,0x3fbe30d0,0x365d2320,0x97cf9ce3,0xbfbe5320, + 0xb3055526,0xe3604700 }, + { 0x6cc6c2c7,0x4dcb9911,0xba4cbee6,0x72683708,0x637ad9ec,0xdcded434, + 0xa3dee15f,0x6542d677 } }, + /* 145 */ + { { 0x7b6c377a,0x3f32b6d0,0x903448be,0x6cb03847,0x20da8af7,0xd6fdd3a8, + 0x09bb6f21,0xa6534aee }, + { 0x1035facf,0x30a1780d,0x9dcb47e6,0x35e55a33,0xc447f393,0x6ea50fe1, + 0xdc9aef22,0xf3cb672f } }, + /* 146 */ + { { 0x3b55fd83,0xeb3719fe,0x875ddd10,0xe0d7a46c,0x05cea784,0x33ac9fa9, + 0xaae870e7,0x7cafaa2e }, + { 0x1d53b338,0x9b814d04,0xef87e6c6,0xe0acc0a0,0x11672b0f,0xfb93d108, + 0xb9bd522e,0x0aab13c1 } }, + /* 147 */ + { { 0xd2681297,0xddcce278,0xb509546a,0xcb350eb1,0x7661aaf2,0x2dc43173, + 0x847012e9,0x4b91a602 }, + { 0x72f8ddcf,0xdcff1095,0x9a911af4,0x08ebf61e,0xc372430e,0x48f4360a, + 0x72321cab,0x49534c53 } }, + /* 148 */ + { { 0xf07b7e9d,0x83df7d71,0x13cd516f,0xa478efa3,0x6c047ee3,0x78ef264b, + 0xd65ac5ee,0xcaf46c4f }, + { 0x92aa8266,0xa04d0c77,0x913684bb,0xedf45466,0xae4b16b0,0x56e65168, + 0x04c6770f,0x14ce9e57 } }, + /* 149 */ + { { 0x965e8f91,0x99445e3e,0xcb0f2492,0xd3aca1ba,0x90c8a0a0,0xd31cc70f, + 0x3e4c9a71,0x1bb708a5 }, + { 0x558bdd7a,0xd5ca9e69,0x018a26b1,0x734a0508,0x4c9cf1ec,0xb093aa71, + 0xda300102,0xf9d126f2 } }, + /* 150 */ + { { 0xaff9563e,0x749bca7a,0xb49914a0,0xdd077afe,0xbf5f1671,0xe27a0311, + 0x729ecc69,0x807afcb9 }, + { 0xc9b08b77,0x7f8a9337,0x443c7e38,0x86c3a785,0x476fd8ba,0x85fafa59, + 0x6568cd8c,0x751adcd1 } }, + /* 151 */ + { { 0x10715c0d,0x8aea38b4,0x8f7697f7,0xd113ea71,0x93fbf06d,0x665eab14, + 0x2537743f,0x29ec4468 }, + { 0xb50bebbc,0x3d94719c,0xe4505422,0x399ee5bf,0x8d2dedb1,0x90cd5b3a, + 0x92a4077d,0xff9370e3 } }, + /* 152 */ + { { 0xc6b75b65,0x59a2d69b,0x266651c5,0x4188f8d5,0x3de9d7d2,0x28a9f33e, + 0xa2a9d01a,0x9776478b }, + { 0x929af2c7,0x8852622d,0x4e690923,0x334f5d6d,0xa89a51e9,0xce6cc7e5, + 0xac2f82fa,0x74a6313f } }, + /* 153 */ + { { 0xb75f079c,0xb2f4dfdd,0x18e36fbb,0x85b07c95,0xe7cd36dd,0x1b6cfcf0, + 0x0ff4863d,0xab75be15 }, + { 0x173fc9b7,0x81b367c0,0xd2594fd0,0xb90a7420,0xc4091236,0x15fdbf03, + 0x0b4459f6,0x4ebeac2e } }, + /* 154 */ + { { 0x5c9f2c53,0xeb6c5fe7,0x8eae9411,0xd2522011,0xf95ac5d8,0xc8887633, + 0x2c1baffc,0xdf99887b }, + { 0x850aaecb,0xbb78eed2,0x01d6a272,0x9d49181b,0xb1cdbcac,0x978dd511, + 0x779f4058,0x27b040a7 } }, + /* 155 */ + { { 0xf73b2eb2,0x90405db7,0x8e1b2118,0xe0df8508,0x5962327e,0x501b7152, + 0xe4cfa3f5,0xb393dd37 }, + { 0x3fd75165,0xa1230e7b,0xbcd33554,0xd66344c2,0x0f7b5022,0x6c36f1be, + 0xd0463419,0x09588c12 } }, + /* 156 */ + { { 0x02601c3b,0xe086093f,0xcf5c335f,0xfb0252f8,0x894aff28,0x955cf280, + 0xdb9f648b,0x81c879a9 }, + { 0xc6f56c51,0x040e687c,0x3f17618c,0xfed47169,0x9059353b,0x44f88a41, + 0x5fc11bc4,0xfa0d48f5 } }, + /* 157 */ + { { 0xe1608e4d,0xbc6e1c9d,0x3582822c,0x010dda11,0x157ec2d7,0xf6b7ddc1, + 0xb6a367d6,0x8ea0e156 }, + { 0x2383b3b4,0xa354e02f,0x3f01f53c,0x69966b94,0x2de03ca5,0x4ff6632b, + 0xfa00b5ac,0x3f5ab924 } }, + /* 158 */ + { { 0x59739efb,0x337bb0d9,0xe7ebec0d,0xc751b0f4,0x411a67d1,0x2da52dd6, + 0x2b74256e,0x8bc76887 }, + { 0x82d3d253,0xa5be3b72,0xf58d779f,0xa9f679a1,0xe16767bb,0xa1cac168, + 0x60fcf34f,0xb386f190 } }, + /* 159 */ + { { 0x2fedcfc2,0x31f3c135,0x62f8af0d,0x5396bf62,0xe57288c2,0x9a02b4ea, + 0x1b069c4d,0x4cb460f7 }, + { 0x5b8095ea,0xae67b4d3,0x6fc07603,0x92bbf859,0xb614a165,0xe1475f66, + 0x95ef5223,0x52c0d508 } }, + /* 160 */ + { { 0x15339848,0x231c210e,0x70778c8d,0xe87a28e8,0x6956e170,0x9d1de661, + 0x2bb09c0b,0x4ac3c938 }, + { 0x6998987d,0x19be0551,0xae09f4d6,0x8b2376c4,0x1a3f933d,0x1de0b765, + 0xe39705f4,0x380d94c7 } }, + /* 161 */ + { { 0x81542e75,0x01a355aa,0xee01b9b7,0x96c724a1,0x624d7087,0x6b3a2977, + 0xde2637af,0x2ce3e171 }, + { 0xf5d5bc1a,0xcfefeb49,0x2777e2b5,0xa655607e,0x9513756c,0x4feaac2f, + 0x0b624e4d,0x2e6cd852 } }, + /* 162 */ + { { 0x8c31c31d,0x3685954b,0x5bf21a0c,0x68533d00,0x75c79ec9,0x0bd7626e, + 0x42c69d54,0xca177547 }, + { 0xf6d2dbb2,0xcc6edaff,0x174a9d18,0xfd0d8cbd,0xaa4578e8,0x875e8793, + 0x9cab2ce6,0xa976a713 } }, + /* 163 */ + { { 0x93fb353d,0x0a651f1b,0x57fcfa72,0xd75cab8b,0x31b15281,0xaa88cfa7, + 0x0a1f4999,0x8720a717 }, + { 0x693e1b90,0x8c3e8d37,0x16f6dfc3,0xd345dc0b,0xb52a8742,0x8ea8d00a, + 0xc769893c,0x9719ef29 } }, + /* 164 */ + { { 0x58e35909,0x820eed8d,0x33ddc116,0x9366d8dc,0x6e205026,0xd7f999d0, + 0xe15704c1,0xa5072976 }, + { 0xc4e70b2e,0x002a37ea,0x6890aa8a,0x84dcf657,0x645b2a5c,0xcd71bf18, + 0xf7b77725,0x99389c9d } }, + /* 165 */ + { { 0x7ada7a4b,0x238c08f2,0xfd389366,0x3abe9d03,0x766f512c,0x6b672e89, + 0x202c82e4,0xa88806aa }, + { 0xd380184e,0x6602044a,0x126a8b85,0xa8cb78c4,0xad844f17,0x79d670c0, + 0x4738dcfe,0x0043bffb } }, + /* 166 */ + { { 0x36d5192e,0x8d59b5dc,0x4590b2af,0xacf885d3,0x11601781,0x83566d0a, + 0xba6c4866,0x52f3ef01 }, + { 0x0edcb64d,0x3986732a,0x8068379f,0x0a482c23,0x7040f309,0x16cbe5fa, + 0x9ef27e75,0x3296bd89 } }, + /* 167 */ + { { 0x454d81d7,0x476aba89,0x51eb9b3c,0x9eade7ef,0x81c57986,0x619a21cd, + 0xaee571e9,0x3b90febf }, + { 0x5496f7cb,0x9393023e,0x7fb51bc4,0x55be41d8,0x99beb5ce,0x03f1dd48, + 0x9f810b18,0x6e88069d } }, + /* 168 */ + { { 0xb43ea1db,0xce37ab11,0x5259d292,0x0a7ff1a9,0x8f84f186,0x851b0221, + 0xdefaad13,0xa7222bea }, + { 0x2b0a9144,0xa2ac78ec,0xf2fa59c5,0x5a024051,0x6147ce38,0x91d1eca5, + 0xbc2ac690,0xbe94d523 } }, + /* 169 */ + { { 0x0b226ce7,0x72f4945e,0x967e8b70,0xb8afd747,0x85a6c63e,0xedea46f1, + 0x9be8c766,0x7782defe }, + { 0x3db38626,0x760d2aa4,0x76f67ad1,0x460ae787,0x54499cdb,0x341b86fc, + 0xa2892e4b,0x03838567 } }, + /* 170 */ + { { 0x79ec1a0f,0x2d8daefd,0xceb39c97,0x3bbcd6fd,0x58f61a95,0xf5575ffc, + 0xadf7b420,0xdbd986c4 }, + { 0x15f39eb7,0x81aa8814,0xb98d976c,0x6ee2fcf5,0xcf2f717d,0x5465475d, + 0x6860bbd0,0x8e24d3c4 } }, + /* 171 */ + { { 0x9a587390,0x749d8e54,0x0cbec588,0x12bb194f,0xb25983c6,0x46e07da4, + 0x407bafc8,0x541a99c4 }, + { 0x624c8842,0xdb241692,0xd86c05ff,0x6044c12a,0x4f7fcf62,0xc59d14b4, + 0xf57d35d1,0xc0092c49 } }, + /* 172 */ + { { 0xdf2e61ef,0xd3cc75c3,0x2e1b35ca,0x7e8841c8,0x909f29f4,0xc62d30d1, + 0x7286944d,0x75e40634 }, + { 0xbbc237d0,0xe7d41fc5,0xec4f01c9,0xc9537bf0,0x282bd534,0x91c51a16, + 0xc7848586,0x5b7cb658 } }, + /* 173 */ + { { 0x8a28ead1,0x964a7084,0xfd3b47f6,0x802dc508,0x767e5b39,0x9ae4bfd1, + 0x8df097a1,0x7ae13eba }, + { 0xeadd384e,0xfd216ef8,0xb6b2ff06,0x0361a2d9,0x4bcdb5f3,0x204b9878, + 0xe2a8e3fd,0x787d8074 } }, + /* 174 */ + { { 0x757fbb1c,0xc5e25d6b,0xca201deb,0xe47bddb2,0x6d2233ff,0x4a55e9a3, + 0x9ef28484,0x5c222819 }, + { 0x88315250,0x773d4a85,0x827097c1,0x21b21a2b,0xdef5d33f,0xab7c4ea1, + 0xbaf0f2b0,0xe45d37ab } }, + /* 175 */ + { { 0x28511c8a,0xd2df1e34,0xbdca6cd3,0xebb229c8,0x627c39a7,0x578a71a7, + 0x84dfb9d3,0xed7bc122 }, + { 0x93dea561,0xcf22a6df,0xd48f0ed1,0x5443f18d,0x5bad23e8,0xd8b86140, + 0x45ca6d27,0xaac97cc9 } }, + /* 176 */ + { { 0xa16bd00a,0xeb54ea74,0xf5c0bcc1,0xd839e9ad,0x1f9bfc06,0x092bb7f1, + 0x1163dc4e,0x318f97b3 }, + { 0xc30d7138,0xecc0c5be,0xabc30220,0x44e8df23,0xb0223606,0x2bb7972f, + 0x9a84ff4d,0xfa41faa1 } }, + /* 177 */ + { { 0xa6642269,0x4402d974,0x9bb783bd,0xc81814ce,0x7941e60b,0x398d38e4, + 0x1d26e9e2,0x38bb6b2c }, + { 0x6a577f87,0xc64e4a25,0xdc11fe1c,0x8b52d253,0x62280728,0xff336abf, + 0xce7601a5,0x94dd0905 } }, + /* 178 */ + { { 0xde93f92a,0x156cf7dc,0x89b5f315,0xa01333cb,0xc995e750,0x02404df9, + 0xd25c2ae9,0x92077867 }, + { 0x0bf39d44,0xe2471e01,0x96bb53d7,0x5f2c9020,0x5c9c3d8f,0x4c44b7b3, + 0xd29beb51,0x81e8428b } }, + /* 179 */ + { { 0xc477199f,0x6dd9c2ba,0x6b5ecdd9,0x8cb8eeee,0xee40fd0e,0x8af7db3f, + 0xdbbfa4b1,0x1b94ab62 }, + { 0xce47f143,0x44f0d8b3,0x63f46163,0x51e623fc,0xcc599383,0xf18f270f, + 0x055590ee,0x06a38e28 } }, + /* 180 */ + { { 0xb3355b49,0x2e5b0139,0xb4ebf99b,0x20e26560,0xd269f3dc,0xc08ffa6b, + 0x83d9d4f8,0xa7b36c20 }, + { 0x1b3e8830,0x64d15c3a,0xa89f9c0b,0xd5fceae1,0xe2d16930,0xcfeee4a2, + 0xa2822a20,0xbe54c6b4 } }, + /* 181 */ + { { 0x8d91167c,0xd6cdb3df,0xe7a6625e,0x517c3f79,0x346ac7f4,0x7105648f, + 0xeae022bb,0xbf30a5ab }, + { 0x93828a68,0x8e7785be,0x7f3ef036,0x5161c332,0x592146b2,0xe11b5feb, + 0x2732d13a,0xd1c820de } }, + /* 182 */ + { { 0x9038b363,0x043e1347,0x6b05e519,0x58c11f54,0x6026cad1,0x4fe57abe, + 0x68a18da3,0xb7d17bed }, + { 0xe29c2559,0x44ca5891,0x5bfffd84,0x4f7a0376,0x74e46948,0x498de4af, + 0x6412cc64,0x3997fd5e } }, + /* 183 */ + { { 0x8bd61507,0xf2074682,0x34a64d2a,0x29e132d5,0x8a8a15e3,0xffeddfb0, + 0x3c6c13e8,0x0eeb8929 }, + { 0xa7e259f8,0xe9b69a3e,0xd13e7e67,0xce1db7e6,0xad1fa685,0x277318f6, + 0xc922b6ef,0x228916f8 } }, + /* 184 */ + { { 0x0a12ab5b,0x959ae25b,0x957bc136,0xcc11171f,0xd16e2b0c,0x8058429e, + 0x6e93097e,0xec05ad1d }, + { 0xac3f3708,0x157ba5be,0x30b59d77,0x31baf935,0x118234e5,0x47b55237, + 0x7ff11b37,0x7d314156 } }, + /* 185 */ + { { 0xf6dfefab,0x7bd9c05c,0xdcb37707,0xbe2f2268,0x3a38bb95,0xe53ead97, + 0x9bc1d7a3,0xe9ce66fc }, + { 0x6f6a02a1,0x75aa1576,0x60e600ed,0x38c087df,0x68cdc1b9,0xf8947f34, + 0x72280651,0xd9650b01 } }, + /* 186 */ + { { 0x5a057e60,0x504b4c4a,0x8def25e4,0xcbccc3be,0x17c1ccbd,0xa6353208, + 0x804eb7a2,0x14d6699a }, + { 0xdb1f411a,0x2c8a8415,0xf80d769c,0x09fbaf0b,0x1c2f77ad,0xb4deef90, + 0x0d43598a,0x6f4c6841 } }, + /* 187 */ + { { 0x96c24a96,0x8726df4e,0xfcbd99a3,0x534dbc85,0x8b2ae30a,0x3c466ef2, + 0x61189abb,0x4c4350fd }, + { 0xf855b8da,0x2967f716,0x463c38a1,0x41a42394,0xeae93343,0xc37e1413, + 0x5a3118b5,0xa726d242 } }, + /* 188 */ + { { 0x948c1086,0xdae6b3ee,0xcbd3a2e1,0xf1de503d,0x03d022f3,0x3f35ed3f, + 0xcc6cf392,0x13639e82 }, + { 0xcdafaa86,0x9ac938fb,0x2654a258,0xf45bc5fb,0x45051329,0x1963b26e, + 0xc1a335a3,0xca9365e1 } }, + /* 189 */ + { { 0x4c3b2d20,0x3615ac75,0x904e241b,0x742a5417,0xcc9d071d,0xb08521c4, + 0x970b72a5,0x9ce29c34 }, + { 0x6d3e0ad6,0x8cc81f73,0xf2f8434c,0x8060da9e,0x6ce862d9,0x35ed1d1a, + 0xab42af98,0x48c4abd7 } }, + /* 190 */ + { { 0x40c7485a,0xd221b0cc,0xe5274dbf,0xead455bb,0x9263d2e8,0x493c7698, + 0xf67b33cb,0x78017c32 }, + { 0x930cb5ee,0xb9d35769,0x0c408ed2,0xc0d14e94,0x272f1a4d,0xf8b7bf55, + 0xde5c1c04,0x53cd0454 } }, + /* 191 */ + { { 0x5d28ccac,0xbcd585fa,0x005b746e,0x5f823e56,0xcd0123aa,0x7c79f0a1, + 0xd3d7fa8f,0xeea465c1 }, + { 0x0551803b,0x7810659f,0x7ce6af70,0x6c0b599f,0x29288e70,0x4195a770, + 0x7ae69193,0x1b6e42a4 } }, + /* 192 */ + { { 0xf67d04c3,0x2e80937c,0x89eeb811,0x1e312be2,0x92594d60,0x56b5d887, + 0x187fbd3d,0x0224da14 }, + { 0x0c5fe36f,0x87abb863,0x4ef51f5f,0x580f3c60,0xb3b429ec,0x964fb1bf, + 0x42bfff33,0x60838ef0 } }, + /* 193 */ + { { 0x7e0bbe99,0x432cb2f2,0x04aa39ee,0x7bda44f3,0x9fa93903,0x5f497c7a, + 0x2d331643,0x636eb202 }, + { 0x93ae00aa,0xfcfd0e61,0x31ae6d2f,0x875a00fe,0x9f93901c,0xf43658a2, + 0x39218bac,0x8844eeb6 } }, + /* 194 */ + { { 0x6b3bae58,0x114171d2,0x17e39f3e,0x7db3df71,0x81a8eada,0xcd37bc7f, + 0x51fb789e,0x27ba83dc }, + { 0xfbf54de5,0xa7df439f,0xb5fe1a71,0x7277030b,0xdb297a48,0x42ee8e35, + 0x87f3a4ab,0xadb62d34 } }, + /* 195 */ + { { 0xa175df2a,0x9b1168a2,0x618c32e9,0x082aa04f,0x146b0916,0xc9e4f2e7, + 0x75e7c8b2,0xb990fd76 }, + { 0x4df37313,0x0829d96b,0xd0b40789,0x1c205579,0x78087711,0x66c9ae4a, + 0x4d10d18d,0x81707ef9 } }, + /* 196 */ + { { 0x03d6ff96,0x97d7cab2,0x0d843360,0x5b851bfc,0xd042db4b,0x268823c4, + 0xd5a8aa5c,0x3792daea }, + { 0x941afa0b,0x52818865,0x42d83671,0xf3e9e741,0x5be4e0a7,0x17c82527, + 0x94b001ba,0x5abd635e } }, + /* 197 */ + { { 0x0ac4927c,0x727fa84e,0xa7c8cf23,0xe3886035,0x4adca0df,0xa4bcd5ea, + 0x846ab610,0x5995bf21 }, + { 0x829dfa33,0xe90f860b,0x958fc18b,0xcaafe2ae,0x78630366,0x9b3baf44, + 0xd483411e,0x44c32ca2 } }, + /* 198 */ + { { 0xe40ed80c,0xa74a97f1,0x31d2ca82,0x5f938cb1,0x7c2d6ad9,0x53f2124b, + 0x8082a54c,0x1f2162fb }, + { 0x720b173e,0x7e467cc5,0x085f12f9,0x40e8a666,0x4c9d65dc,0x8cebc20e, + 0xc3e907c9,0x8f1d402b } }, + /* 199 */ + { { 0xfbc4058a,0x4f592f9c,0x292f5670,0xb15e14b6,0xbc1d8c57,0xc55cfe37, + 0x926edbf9,0xb1980f43 }, + { 0x32c76b09,0x98c33e09,0x33b07f78,0x1df5279d,0x863bb461,0x6f08ead4, + 0x37448e45,0x2828ad9b } }, + /* 200 */ + { { 0xc4cf4ac5,0x696722c4,0xdde64afb,0xf5ac1a3f,0xe0890832,0x0551baa2, + 0x5a14b390,0x4973f127 }, + { 0x322eac5d,0xe59d8335,0x0bd9b568,0x5e07eef5,0xa2588393,0xab36720f, + 0xdb168ac7,0x6dac8ed0 } }, + /* 201 */ + { { 0xeda835ef,0xf7b545ae,0x1d10ed51,0x4aa113d2,0x13741b09,0x035a65e0, + 0x20b9de4c,0x4b23ef59 }, + { 0x3c4c7341,0xe82bb680,0x3f58bc37,0xd457706d,0xa51e3ee8,0x73527863, + 0xddf49a4e,0x4dd71534 } }, + /* 202 */ + { { 0x95476cd9,0xbf944672,0xe31a725b,0x648d072f,0xfc4b67e0,0x1441c8b8, + 0x2f4a4dbb,0xfd317000 }, + { 0x8995d0e1,0x1cb43ff4,0x0ef729aa,0x76e695d1,0x41798982,0xe0d5f976, + 0x9569f365,0x14fac58c } }, + /* 203 */ + { { 0xf312ae18,0xad9a0065,0xfcc93fc9,0x51958dc0,0x8a7d2846,0xd9a14240, + 0x36abda50,0xed7c7651 }, + { 0x25d4abbc,0x46270f1a,0xf1a113ea,0x9b5dd8f3,0x5b51952f,0xc609b075, + 0x4d2e9f53,0xfefcb7f7 } }, + /* 204 */ + { { 0xba119185,0xbd09497a,0xaac45ba4,0xd54e8c30,0xaa521179,0x492479de, + 0x87e0d80b,0x1801a57e }, + { 0xfcafffb0,0x073d3f8d,0xae255240,0x6cf33c0b,0x5b5fdfbc,0x781d763b, + 0x1ead1064,0x9f8fc11e } }, + /* 205 */ + { { 0x5e69544c,0x1583a171,0xf04b7813,0x0eaf8567,0x278a4c32,0x1e22a8fd, + 0x3d3a69a9,0xa9d3809d }, + { 0x59a2da3b,0x936c2c2c,0x1895c847,0x38ccbcf6,0x63d50869,0x5e65244e, + 0xe1178ef7,0x3006b9ae } }, + /* 206 */ + { { 0xc9eead28,0x0bb1f2b0,0x89f4dfbc,0x7eef635d,0xb2ce8939,0x074757fd, + 0x45f8f761,0x0ab85fd7 }, + { 0x3e5b4549,0xecda7c93,0x97922f21,0x4be2bb5c,0xb43b8040,0x261a1274, + 0x11e942c2,0xb122d675 } }, + /* 207 */ + { { 0x66a5ae7a,0x3be607be,0x76adcbe3,0x01e703fa,0x4eb6e5c5,0xaf904301, + 0x097dbaec,0x9f599dc1 }, + { 0x0ff250ed,0x6d75b718,0x349a20dc,0x8eb91574,0x10b227a3,0x425605a4, + 0x8a294b78,0x7d5528e0 } }, + /* 208 */ + { { 0x20c26def,0xf0f58f66,0x582b2d1e,0x025585ea,0x01ce3881,0xfbe7d79b, + 0x303f1730,0x28ccea01 }, + { 0x79644ba5,0xd1dabcd1,0x06fff0b8,0x1fc643e8,0x66b3e17b,0xa60a76fc, + 0xa1d013bf,0xc18baf48 } }, + /* 209 */ + { { 0x5dc4216d,0x34e638c8,0x206142ac,0x00c01067,0x95f5064a,0xd453a171, + 0xb7a9596b,0x9def809d }, + { 0x67ab8d2c,0x41e8642e,0x6237a2b6,0xb4240433,0x64c4218b,0x7d506a6d, + 0x68808ce5,0x0357f8b0 } }, + /* 210 */ + { { 0x4cd2cc88,0x8e9dbe64,0xf0b8f39d,0xcc61c28d,0xcd30a0c8,0x4a309874, + 0x1b489887,0xe4a01add }, + { 0xf57cd8f9,0x2ed1eeac,0xbd594c48,0x1b767d3e,0x7bd2f787,0xa7295c71, + 0xce10cc30,0x466d7d79 } }, + /* 211 */ + { { 0x9dada2c7,0x47d31892,0x8f9aa27d,0x4fa0a6c3,0x820a59e1,0x90e4fd28, + 0x451ead1a,0xc672a522 }, + { 0x5d86b655,0x30607cc8,0xf9ad4af1,0xf0235d3b,0x571172a6,0x99a08680, + 0xf2a67513,0x5e3d64fa } }, + /* 212 */ + { { 0x9b3b4416,0xaa6410c7,0xeab26d99,0xcd8fcf85,0xdb656a74,0x5ebff74a, + 0xeb8e42fc,0x6c8a7a95 }, + { 0xb02a63bd,0x10c60ba7,0x8b8f0047,0x6b2f2303,0x312d90b0,0x8c6c3738, + 0xad82ca91,0x348ae422 } }, + /* 213 */ + { { 0x5ccda2fb,0x7f474663,0x8e0726d2,0x22accaa1,0x492b1f20,0x85adf782, + 0xd9ef2d2e,0xc1074de0 }, + { 0xae9a65b3,0xfcf3ce44,0x05d7151b,0xfd71e4ac,0xce6a9788,0xd4711f50, + 0xc9e54ffc,0xfbadfbdb } }, + /* 214 */ + { { 0x20a99363,0x1713f1cd,0x6cf22775,0xb915658f,0x24d359b2,0x968175cd, + 0x83716fcd,0xb7f976b4 }, + { 0x5d6dbf74,0x5758e24d,0x71c3af36,0x8d23bafd,0x0243dfe3,0x48f47760, + 0xcafcc805,0xf4d41b2e } }, + /* 215 */ + { { 0xfdabd48d,0x51f1cf28,0x32c078a4,0xce81be36,0x117146e9,0x6ace2974, + 0xe0160f10,0x180824ea }, + { 0x66e58358,0x0387698b,0xce6ca358,0x63568752,0x5e41e6c5,0x82380e34, + 0x83cf6d25,0x67e5f639 } }, + /* 216 */ + { { 0xcf4899ef,0xf89ccb8d,0x9ebb44c0,0x949015f0,0xb2598ec9,0x546f9276, + 0x04c11fc6,0x9fef789a }, + { 0x53d2a071,0x6d367ecf,0xa4519b09,0xb10e1a7f,0x611e2eef,0xca6b3fb0, + 0xa99c4e20,0xbc80c181 } }, + /* 217 */ + { { 0xe5eb82e6,0x972536f8,0xf56cb920,0x1a484fc7,0x50b5da5e,0xc78e2171, + 0x9f8cdf10,0x49270e62 }, + { 0xea6b50ad,0x1a39b7bb,0xa2388ffc,0x9a0284c1,0x8107197b,0x5403eb17, + 0x61372f7f,0xd2ee52f9 } }, + /* 218 */ + { { 0x88e0362a,0xd37cd285,0x8fa5d94d,0x442fa8a7,0xa434a526,0xaff836e5, + 0xe5abb733,0xdfb478be }, + { 0x673eede6,0xa91f1ce7,0x2b5b2f04,0xa5390ad4,0x5530da2f,0x5e66f7bf, + 0x08df473a,0xd9a140b4 } }, + /* 219 */ + { { 0x6e8ea498,0x0e0221b5,0x3563ee09,0x62347829,0x335d2ade,0xe06b8391, + 0x623f4b1a,0x760c058d }, + { 0xc198aa79,0x0b89b58c,0xf07aba7f,0xf74890d2,0xfde2556a,0x4e204110, + 0x8f190409,0x7141982d } }, + /* 220 */ + { { 0x4d4b0f45,0x6f0a0e33,0x392a94e1,0xd9280b38,0xb3c61d5e,0x3af324c6, + 0x89d54e47,0x3af9d1ce }, + { 0x20930371,0xfd8f7981,0x21c17097,0xeda2664c,0xdc42309b,0x0e9545dc, + 0x73957dd6,0xb1f815c3 } }, + /* 221 */ + { { 0x89fec44a,0x84faa78e,0x3caa4caf,0xc8c2ae47,0xc1b6a624,0x691c807d, + 0x1543f052,0xa41aed14 }, + { 0x7d5ffe04,0x42435399,0x625b6e20,0x8bacb2df,0x87817775,0x85d660be, + 0x86fb60ef,0xd6e9c1dd } }, + /* 222 */ + { { 0xc6853264,0x3aa2e97e,0xe2304a0b,0x771533b7,0xb8eae9be,0x1b912bb7, + 0xae9bf8c2,0x9c9c6e10 }, + { 0xe030b74c,0xa2309a59,0x6a631e90,0x4ed7494d,0xa49b79f2,0x89f44b23, + 0x40fa61b6,0x566bd596 } }, + /* 223 */ + { { 0xc18061f3,0x066c0118,0x7c83fc70,0x190b25d3,0x27273245,0xf05fc8e0, + 0xf525345e,0xcf2c7390 }, + { 0x10eb30cf,0xa09bceb4,0x0d77703a,0xcfd2ebba,0x150ff255,0xe842c43a, + 0x8aa20979,0x02f51755 } }, + /* 224 */ + { { 0xaddb7d07,0x396ef794,0x24455500,0x0b4fc742,0xc78aa3ce,0xfaff8eac, + 0xe8d4d97d,0x14e9ada5 }, + { 0x2f7079e2,0xdaa480a1,0xe4b0800e,0x45baa3cd,0x7838157d,0x01765e2d, + 0x8e9d9ae8,0xa0ad4fab } }, + /* 225 */ + { { 0x4a653618,0x0bfb7621,0x31eaaa5f,0x1872813c,0x44949d5e,0x1553e737, + 0x6e56ed1e,0xbcd530b8 }, + { 0x32e9c47b,0x169be853,0xb50059ab,0xdc2776fe,0x192bfbb4,0xcdba9761, + 0x6979341d,0x909283cf } }, + /* 226 */ + { { 0x76e81a13,0x67b00324,0x62171239,0x9bee1a99,0xd32e19d6,0x08ed361b, + 0xace1549a,0x35eeb7c9 }, + { 0x7e4e5bdc,0x1280ae5a,0xb6ceec6e,0x2dcd2cd3,0x6e266bc1,0x52e4224c, + 0x448ae864,0x9a8b2cf4 } }, + /* 227 */ + { { 0x09d03b59,0xf6471bf2,0xb65af2ab,0xc90e62a3,0xebd5eec9,0xff7ff168, + 0xd4491379,0x6bdb60f4 }, + { 0x8a55bc30,0xdadafebc,0x10097fe0,0xc79ead16,0x4c1e3bdd,0x42e19741, + 0x94ba08a9,0x01ec3cfd } }, + /* 228 */ + { { 0xdc9485c2,0xba6277eb,0x22fb10c7,0x48cc9a79,0x70a28d8a,0x4f61d60f, + 0x475464f6,0xd1acb1c0 }, + { 0x26f36612,0xd26902b1,0xe0618d8b,0x59c3a44e,0x308357ee,0x4df8a813, + 0x405626c2,0x7dcd079d } }, + /* 229 */ + { { 0xf05a4b48,0x5ce7d4d3,0x37230772,0xadcd2952,0x812a915a,0xd18f7971, + 0x377d19b8,0x0bf53589 }, + { 0x6c68ea73,0x35ecd95a,0x823a584d,0xc7f3bbca,0xf473a723,0x9fb674c6, + 0xe16686fc,0xd28be4d9 } }, + /* 230 */ + { { 0x38fa8e4b,0x5d2b9906,0x893fd8fc,0x559f186e,0x436fb6fc,0x3a6de2aa, + 0x510f88ce,0xd76007aa }, + { 0x523a4988,0x2d10aab6,0x74dd0273,0xb455cf44,0xa3407278,0x7f467082, + 0xb303bb01,0xf2b52f68 } }, + /* 231 */ + { { 0x9835b4ca,0x0d57eafa,0xbb669cbc,0x2d2232fc,0xc6643198,0x8eeeb680, + 0xcc5aed3a,0xd8dbe98e }, + { 0xc5a02709,0xcba9be3f,0xf5ba1fa8,0x30be68e5,0xf10ea852,0xfebd43cd, + 0xee559705,0xe01593a3 } }, + /* 232 */ + { { 0xea75a0a6,0xd3e5af50,0x57858033,0x512226ac,0xd0176406,0x6fe6d50f, + 0xaeb8ef06,0xafec07b1 }, + { 0x80bb0a31,0x7fb99567,0x37309aae,0x6f1af3cc,0x01abf389,0x9153a15a, + 0x6e2dbfdd,0xa71b9354 } }, + /* 233 */ + { { 0x18f593d2,0xbf8e12e0,0xa078122b,0xd1a90428,0x0ba4f2ad,0x150505db, + 0x628523d9,0x53a2005c }, + { 0xe7f2b935,0x07c8b639,0xc182961a,0x2bff975a,0x7518ca2c,0x86bceea7, + 0x3d588e3d,0xbf47d19b } }, + /* 234 */ + { { 0xdd7665d5,0x672967a7,0x2f2f4de5,0x4e303057,0x80d4903f,0x144005ae, + 0x39c9a1b6,0x001c2c7f }, + { 0x69efc6d6,0x143a8014,0x7bc7a724,0xc810bdaa,0xa78150a4,0x5f65670b, + 0x86ffb99b,0xfdadf8e7 } }, + /* 235 */ + { { 0xffc00785,0xfd38cb88,0x3b48eb67,0x77fa7591,0xbf368fbc,0x0454d055, + 0x5aa43c94,0x3a838e4d }, + { 0x3e97bb9a,0x56166329,0x441d94d9,0x9eb93363,0x0adb2a83,0x515591a6, + 0x873e1da3,0x3cdb8257 } }, + /* 236 */ + { { 0x7de77eab,0x137140a9,0x41648109,0xf7e1c50d,0xceb1d0df,0x762dcad2, + 0xf1f57fba,0x5a60cc89 }, + { 0x40d45673,0x80b36382,0x5913c655,0x1b82be19,0xdd64b741,0x057284b8, + 0xdbfd8fc0,0x922ff56f } }, + /* 237 */ + { { 0xc9a129a1,0x1b265dee,0xcc284e04,0xa5b1ce57,0xcebfbe3c,0x04380c46, + 0xf6c5cd62,0x72919a7d }, + { 0x8fb90f9a,0x298f453a,0x88e4031b,0xd719c00b,0x796f1856,0xe32c0e77, + 0x3624089a,0x5e791780 } }, + /* 238 */ + { { 0x7f63cdfb,0x5c16ec55,0xf1cae4fd,0x8e6a3571,0x560597ca,0xfce26bea, + 0xe24c2fab,0x4e0a5371 }, + { 0xa5765357,0x276a40d3,0x0d73a2b4,0x3c89af44,0x41d11a32,0xb8f370ae, + 0xd56604ee,0xf5ff7818 } }, + /* 239 */ + { { 0x1a09df21,0xfbf3e3fe,0xe66e8e47,0x26d5d28e,0x29c89015,0x2096bd0a, + 0x533f5e64,0xe41df0e9 }, + { 0xb3ba9e3f,0x305fda40,0x2604d895,0xf2340ceb,0x7f0367c7,0x0866e192, + 0xac4f155f,0x8edd7d6e } }, + /* 240 */ + { { 0x0bfc8ff3,0xc9a1dc0e,0xe936f42f,0x14efd82b,0xcca381ef,0x67016f7c, + 0xed8aee96,0x1432c1ca }, + { 0x70b23c26,0xec684829,0x0735b273,0xa64fe873,0xeaef0f5a,0xe389f6e5, + 0x5ac8d2c6,0xcaef480b } }, + /* 241 */ + { { 0x75315922,0x5245c978,0x3063cca5,0xd8295171,0xb64ef2cb,0xf3ce60d0, + 0x8efae236,0xd0ba177e }, + { 0xb1b3af60,0x53a9ae8f,0x3d2da20e,0x1a796ae5,0xdf9eef28,0x01d63605, + 0x1c54ae16,0xf31c957c } }, + /* 242 */ + { { 0x49cc4597,0xc0f58d52,0xbae0a028,0xdc5015b0,0x734a814a,0xefc5fc55, + 0x96e17c3a,0x013404cb }, + { 0xc9a824bf,0xb29e2585,0x001eaed7,0xd593185e,0x61ef68ac,0x8d6ee682, + 0x91933e6c,0x6f377c4b } }, + /* 243 */ + { { 0xa8333fd2,0x9f93bad1,0x5a2a95b8,0xa8930202,0xeaf75ace,0x211e5037, + 0xd2d09506,0x6dba3e4e }, + { 0xd04399cd,0xa48ef98c,0xe6b73ade,0x1811c66e,0xc17ecaf3,0x72f60752, + 0x3becf4a7,0xf13cf342 } }, + /* 244 */ + { { 0xa919e2eb,0xceeb9ec0,0xf62c0f68,0x83a9a195,0x7aba2299,0xcfba3bb6, + 0x274bbad3,0xc83fa9a9 }, + { 0x62fa1ce0,0x0d7d1b0b,0x3418efbf,0xe58b60f5,0x52706f04,0xbfa8ef9e, + 0x5d702683,0xb49d70f4 } }, + /* 245 */ + { { 0xfad5513b,0x914c7510,0xb1751e2d,0x05f32eec,0xd9fb9d59,0x6d850418, + 0x0c30f1cf,0x59cfadbb }, + { 0x55cb7fd6,0xe167ac23,0x820426a3,0x249367b8,0x90a78864,0xeaeec58c, + 0x354a4b67,0x5babf362 } }, + /* 246 */ + { { 0xee424865,0x37c981d1,0xf2e5577f,0x8b002878,0xb9e0c058,0x702970f1, + 0x9026c8f0,0x6188c6a7 }, + { 0xd0f244da,0x06f9a19b,0xfb080873,0x1ecced5c,0x9f213637,0x35470f9b, + 0xdf50b9d9,0x993fe475 } }, + /* 247 */ + { { 0x9b2c3609,0x68e31cdf,0x2c46d4ea,0x84eb19c0,0x9a775101,0x7ac9ec1a, + 0x4c80616b,0x81f76466 }, + { 0x75fbe978,0x1d7c2a5a,0xf183b356,0x6743fed3,0x501dd2bf,0x838d1f04, + 0x5fe9060d,0x564a812a } }, + /* 248 */ + { { 0xfa817d1d,0x7a5a64f4,0xbea82e0f,0x55f96844,0xcd57f9aa,0xb5ff5a0f, + 0x00e51d6c,0x226bf3cf }, + { 0x2f2833cf,0xd6d1a9f9,0x4f4f89a8,0x20a0a35a,0x8f3f7f77,0x11536c49, + 0xff257836,0x68779f47 } }, + /* 249 */ + { { 0x73043d08,0x79b0c1c1,0x1fc020fa,0xa5446774,0x9a6d26d0,0xd3767e28, + 0xeb092e0b,0x97bcb0d1 }, + { 0xf32ed3c3,0x2ab6eaa8,0xb281bc48,0xc8a4f151,0xbfa178f3,0x4d1bf4f3, + 0x0a784655,0xa872ffe8 } }, + /* 250 */ + { { 0xa32b2086,0xb1ab7935,0x8160f486,0xe1eb710e,0x3b6ae6be,0x9bd0cd91, + 0xb732a36a,0x02812bfc }, + { 0xcf605318,0xa63fd7ca,0xfdfd6d1d,0x646e5d50,0x2102d619,0xa1d68398, + 0xfe5396af,0x07391cc9 } }, + /* 251 */ + { { 0x8b80d02b,0xc50157f0,0x62877f7f,0x6b8333d1,0x78d542ae,0x7aca1af8, + 0x7e6d2a08,0x355d2adc }, + { 0x287386e1,0xb41f335a,0xf8e43275,0xfd272a94,0xe79989ea,0x286ca2cd, + 0x7c2a3a79,0x3dc2b1e3 } }, + /* 252 */ + { { 0x04581352,0xd689d21c,0x376782be,0x0a00c825,0x9fed701f,0x203bd590, + 0x3ccd846b,0xc4786910 }, + { 0x24c768ed,0x5dba7708,0x6841f657,0x72feea02,0x6accce0e,0x73313ed5, + 0xd5bb4d32,0xccc42968 } }, + /* 253 */ + { { 0x3d7620b9,0x94e50de1,0x5992a56a,0xd89a5c8a,0x675487c9,0xdc007640, + 0xaa4871cf,0xe147eb42 }, + { 0xacf3ae46,0x274ab4ee,0x50350fbe,0xfd4936fb,0x48c840ea,0xdf2afe47, + 0x080e96e3,0x239ac047 } }, + /* 254 */ + { { 0x2bfee8d4,0x481d1f35,0xfa7b0fec,0xce80b5cf,0x2ce9af3c,0x105c4c9e, + 0xf5f7e59d,0xc55fa1a3 }, + { 0x8257c227,0x3186f14e,0x342be00b,0xc5b1653f,0xaa904fb2,0x09afc998, + 0xd4f4b699,0x094cd99c } }, + /* 255 */ + { { 0xd703beba,0x8a981c84,0x32ceb291,0x8631d150,0xe3bd49ec,0xa445f2c9, + 0x42abad33,0xb90a30b6 }, + { 0xb4a5abf9,0xb465404f,0x75db7603,0x004750c3,0xca35d89f,0x6f9a42cc, + 0x1b7924f7,0x019f8b9a } }, +}; + +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_base_8(sp_point_256* r, const sp_digit* k, + int map, void* heap) +{ + return sp_256_ecc_mulmod_stripe_8(r, &p256_base, p256_table, + k, map, heap); +} + +#endif + +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 p; + sp_digit kd[8]; +#endif + sp_point_256* point; + sp_digit* k = NULL; + int err = MP_OKAY; + + err = sp_256_point_new_8(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) { + err = MEMORY_E; + } + } +#else + k = kd; +#endif + if (err == MP_OKAY) { + sp_256_from_mp(k, 8, km); + + err = sp_256_ecc_mulmod_base_8(point, k, map, heap); + } + if (err == MP_OKAY) { + err = sp_256_point_to_ecc_point_8(point, r); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_8(point, 0, heap); + + return err; +} + +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_256_iszero_8(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7]) == 0; +} + +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */ +/* Add 1 to a. (a = a + 1) + * + * a A single precision integer. + */ +SP_NOINLINE static void sp_256_add_one_8(sp_digit* a) +{ + __asm__ __volatile__ ( + "mov r2, #1\n\t" + "ldr r1, [%[a], #0]\n\t" + "add r1, r2\n\t" + "mov r2, #0\n\t" + "str r1, [%[a], #0]\n\t" + "ldr r1, [%[a], #4]\n\t" + "adc r1, r2\n\t" + "str r1, [%[a], #4]\n\t" + "ldr r1, [%[a], #8]\n\t" + "adc r1, r2\n\t" + "str r1, [%[a], #8]\n\t" + "ldr r1, [%[a], #12]\n\t" + "adc r1, r2\n\t" + "str r1, [%[a], #12]\n\t" + "ldr r1, [%[a], #16]\n\t" + "adc r1, r2\n\t" + "str r1, [%[a], #16]\n\t" + "ldr r1, [%[a], #20]\n\t" + "adc r1, r2\n\t" + "str r1, [%[a], #20]\n\t" + "ldr r1, [%[a], #24]\n\t" + "adc r1, r2\n\t" + "str r1, [%[a], #24]\n\t" + "ldr r1, [%[a], #28]\n\t" + "adc r1, r2\n\t" + "str r1, [%[a], #28]\n\t" + : + : [a] "r" (a) + : "memory", "r1", "r2" + ); +} + +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_256_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((sp_digit)a[i]) << s); + if (s >= 24U) { + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (sp_digit)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Generates a scalar that is in the range 1..order-1. + * + * rng Random number generator. + * k Scalar value. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +static int sp_256_ecc_gen_k_8(WC_RNG* rng, sp_digit* k) +{ + int err; + byte buf[32]; + + do { + err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf)); + if (err == 0) { + sp_256_from_bin(k, 8, buf, (int)sizeof(buf)); + if (sp_256_cmp_8(k, p256_order2) < 0) { + sp_256_add_one_8(k); + break; + } + } + } + while (err == 0); + + return err; +} + +/* Makes a random EC key pair. + * + * rng Random number generator. + * priv Generated private value. + * pub Generated public point. + * heap Heap to use for allocation. + * returns ECC_INF_E when the point does not have the correct order, RNG + * failures, MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 p; + sp_digit kd[8]; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_256 inf; +#endif +#endif + sp_point_256* point; + sp_digit* k = NULL; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_256* infinity; +#endif + int err; + + (void)heap; + + err = sp_256_point_new_8(heap, p, point); +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + if (err == MP_OKAY) { + err = sp_256_point_new_8(heap, inf, infinity); + } +#endif +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) { + err = MEMORY_E; + } + } +#else + k = kd; +#endif + + if (err == MP_OKAY) { + err = sp_256_ecc_gen_k_8(rng, k); + } + if (err == MP_OKAY) { + err = sp_256_ecc_mulmod_base_8(point, k, 1, NULL); + } + +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + if (err == MP_OKAY) { + err = sp_256_ecc_mulmod_8(infinity, point, p256_order, 1, NULL); + } + if (err == MP_OKAY) { + if ((sp_256_iszero_8(point->x) == 0) || (sp_256_iszero_8(point->y) == 0)) { + err = ECC_INF_E; + } + } +#endif + + if (err == MP_OKAY) { + err = sp_256_to_mp(k, priv); + } + if (err == MP_OKAY) { + err = sp_256_point_to_ecc_point_8(point, pub); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_256_point_free_8(infinity, 1, heap); +#endif + sp_256_point_free_8(point, 1, heap); + + return err; +} + +#ifdef HAVE_ECC_DHE +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 32 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_256_to_bin(sp_digit* r, byte* a) +{ + int i, j, s = 0, b; + + j = 256 / 8 - 1; + a[j] = 0; + for (i=0; i<8 && j>=0; i++) { + b = 0; + /* lint allow cast of mismatch sp_digit and int */ + a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ + b += 8 - s; + if (j < 0) { + break; + } + while (b < 32) { + a[j--] = (byte)(r[i] >> b); + b += 8; + if (j < 0) { + break; + } + } + s = 8 - (b - 32); + if (j >= 0) { + a[j] = 0; + } + if (s != 0) { + j++; + } + } +} + +/* Multiply the point by the scalar and serialize the X ordinate. + * The number is 0 padded to maximum size on output. + * + * priv Scalar to multiply the point by. + * pub Point to multiply. + * out Buffer to hold X ordinate. + * outLen On entry, size of the buffer in bytes. + * On exit, length of data in buffer in bytes. + * heap Heap to use for allocation. + * returns BUFFER_E if the buffer is to small for output size, + * MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out, + word32* outLen, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 p; + sp_digit kd[8]; +#endif + sp_point_256* point = NULL; + sp_digit* k = NULL; + int err = MP_OKAY; + + if (*outLen < 32U) { + err = BUFFER_E; + } + + if (err == MP_OKAY) { + err = sp_256_point_new_8(heap, p, point); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#else + k = kd; +#endif + + if (err == MP_OKAY) { + sp_256_from_mp(k, 8, priv); + sp_256_point_from_ecc_point_8(point, pub); + err = sp_256_ecc_mulmod_8(point, point, k, 1, heap); + } + if (err == MP_OKAY) { + sp_256_to_bin(point->x, out); + *outLen = 32; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_8(point, 0, heap); + + return err; +} +#endif /* HAVE_ECC_DHE */ + +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into a. (a -= b) + * + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_256_sub_in_place_8(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + __asm__ __volatile__ ( + "mov r7, %[a]\n\t" + "add r7, #32\n\t" + "\n1:\n\t" + "mov r5, #0\n\t" + "sub r5, %[c]\n\t" + "ldr r3, [%[a]]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b]]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a]]\n\t" + "str r4, [%[a], #4]\n\t" + "sbc %[c], %[c]\n\t" + "add %[a], #8\n\t" + "add %[b], #8\n\t" + "cmp %[a], r7\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7" + ); + + return c; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_256_sub_in_place_8(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldr r3, [%[a], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sub r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #0]\n\t" + "str r4, [%[a], #4]\n\t" + "ldr r3, [%[a], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r6, [%[b], #12]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #8]\n\t" + "str r4, [%[a], #12]\n\t" + "ldr r3, [%[a], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r6, [%[b], #20]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #16]\n\t" + "str r4, [%[a], #20]\n\t" + "ldr r3, [%[a], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r6, [%[b], #28]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #24]\n\t" + "str r4, [%[a], #28]\n\t" + "sbc %[c], %[c]\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +SP_NOINLINE static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, + sp_digit b) +{ + __asm__ __volatile__ ( + "mov r6, #32\n\t" + "add r6, %[a]\n\t" + "mov r8, %[r]\n\t" + "mov r9, r6\n\t" + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "1:\n\t" + "mov %[r], #0\n\t" + "mov r5, #0\n\t" + "# A[] * B\n\t" + "ldr r6, [%[a]]\n\t" + "lsl r6, r6, #16\n\t" + "lsl r7, %[b], #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "lsr r7, %[b], #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, %[b], #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "lsl r7, %[b], #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# A[] * B - Done\n\t" + "mov %[r], r8\n\t" + "str r3, [%[r]]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "add %[r], #4\n\t" + "add %[a], #4\n\t" + "mov r8, %[r]\n\t" + "cmp %[a], r9\n\t" + "blt 1b\n\t" + "str r3, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a) + : [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + ); +} + +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +SP_NOINLINE static sp_digit div_256_word_8(sp_digit d1, sp_digit d0, + sp_digit div) +{ + sp_digit r = 0; + + __asm__ __volatile__ ( + "lsr r5, %[div], #1\n\t" + "add r5, #1\n\t" + "mov r8, %[d0]\n\t" + "mov r9, %[d1]\n\t" + "# Do top 32\n\t" + "mov r6, r5\n\t" + "sub r6, %[d1]\n\t" + "sbc r6, r6\n\t" + "add %[r], %[r]\n\t" + "sub %[r], r6\n\t" + "and r6, r5\n\t" + "sub %[d1], r6\n\t" + "# Next 30 bits\n\t" + "mov r4, #29\n\t" + "1:\n\t" + "lsl %[d0], %[d0], #1\n\t" + "adc %[d1], %[d1]\n\t" + "mov r6, r5\n\t" + "sub r6, %[d1]\n\t" + "sbc r6, r6\n\t" + "add %[r], %[r]\n\t" + "sub %[r], r6\n\t" + "and r6, r5\n\t" + "sub %[d1], r6\n\t" + "sub r4, #1\n\t" + "bpl 1b\n\t" + "mov r7, #0\n\t" + "add %[r], %[r]\n\t" + "add %[r], #1\n\t" + "# r * div - Start\n\t" + "lsl %[d1], %[r], #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr %[d1], %[d1], #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, %[d1]\n\t" + "lsr r6, %[div], #16\n\t" + "mul %[d1], r6\n\t" + "lsr r5, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r7\n\t" + "lsr %[d1], %[r], #16\n\t" + "mul r6, %[d1]\n\t" + "add r5, r6\n\t" + "lsl r6, %[div], #16\n\t" + "lsr r6, r6, #16\n\t" + "mul %[d1], r6\n\t" + "lsr r6, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r6\n\t" + "# r * div - Done\n\t" + "mov %[d1], r8\n\t" + "sub %[d1], r4\n\t" + "mov r4, %[d1]\n\t" + "mov %[d1], r9\n\t" + "sbc %[d1], r5\n\t" + "mov r5, %[d1]\n\t" + "add %[r], r5\n\t" + "# r * div - Start\n\t" + "lsl %[d1], %[r], #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr %[d1], %[d1], #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, %[d1]\n\t" + "lsr r6, %[div], #16\n\t" + "mul %[d1], r6\n\t" + "lsr r5, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r7\n\t" + "lsr %[d1], %[r], #16\n\t" + "mul r6, %[d1]\n\t" + "add r5, r6\n\t" + "lsl r6, %[div], #16\n\t" + "lsr r6, r6, #16\n\t" + "mul %[d1], r6\n\t" + "lsr r6, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r6\n\t" + "# r * div - Done\n\t" + "mov %[d1], r8\n\t" + "mov r6, r9\n\t" + "sub r4, %[d1], r4\n\t" + "sbc r6, r5\n\t" + "mov r5, r6\n\t" + "add %[r], r5\n\t" + "# r * div - Start\n\t" + "lsl %[d1], %[r], #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr %[d1], %[d1], #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, %[d1]\n\t" + "lsr r6, %[div], #16\n\t" + "mul %[d1], r6\n\t" + "lsr r5, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r7\n\t" + "lsr %[d1], %[r], #16\n\t" + "mul r6, %[d1]\n\t" + "add r5, r6\n\t" + "lsl r6, %[div], #16\n\t" + "lsr r6, r6, #16\n\t" + "mul %[d1], r6\n\t" + "lsr r6, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r6\n\t" + "# r * div - Done\n\t" + "mov %[d1], r8\n\t" + "mov r6, r9\n\t" + "sub r4, %[d1], r4\n\t" + "sbc r6, r5\n\t" + "mov r5, r6\n\t" + "add %[r], r5\n\t" + "mov r6, %[div]\n\t" + "sub r6, r4\n\t" + "sbc r6, r6\n\t" + "sub %[r], r6\n\t" + : [r] "+r" (r) + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) + : "r4", "r5", "r7", "r6", "r8", "r9" + ); + return r; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_256_mask_8(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<8; i++) { + r[i] = a[i] & m; + } +#else + r[0] = a[0] & m; + r[1] = a[1] & m; + r[2] = a[2] & m; + r[3] = a[3] & m; + r[4] = a[4] & m; + r[5] = a[5] & m; + r[6] = a[6] & m; + r[7] = a[7] & m; +#endif +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_256_div_8(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[16], t2[9]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[7]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 8); + for (i=7; i>=0; i--) { + r1 = div_256_word_8(t1[8 + i], t1[8 + i - 1], div); + + sp_256_mul_d_8(t2, d, r1); + t1[8 + i] += sp_256_sub_in_place_8(&t1[i], t2); + t1[8 + i] -= t2[8]; + sp_256_mask_8(t2, d, t1[8 + i]); + t1[8 + i] += sp_256_add_8(&t1[i], &t1[i], t2); + sp_256_mask_8(t2, d, t1[8 + i]); + t1[8 + i] += sp_256_add_8(&t1[i], &t1[i], t2); + } + + r1 = sp_256_cmp_8(t1, d) >= 0; + sp_256_cond_sub_8(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_256_mod_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_256_div_8(a, m, NULL, r); +} + +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#ifdef WOLFSSL_SP_SMALL +/* Order-2 for the P256 curve. */ +static const uint32_t p256_order_minus_2[8] = { + 0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU,0xffffffffU,0xffffffffU, + 0x00000000U,0xffffffffU +}; +#else +/* The low half of the order-2 of the P256 curve. */ +static const uint32_t p256_order_low[4] = { + 0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU +}; +#endif /* WOLFSSL_SP_SMALL */ + +/* Multiply two number mod the order of P256 curve. (r = a * b mod order) + * + * r Result of the multiplication. + * a First operand of the multiplication. + * b Second operand of the multiplication. + */ +static void sp_256_mont_mul_order_8(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_256_mul_8(r, a, b); + sp_256_mont_reduce_order_8(r, p256_order, p256_mp_order); +} + +/* Square number mod the order of P256 curve. (r = a * a mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_256_mont_sqr_order_8(sp_digit* r, const sp_digit* a) +{ + sp_256_sqr_8(r, a); + sp_256_mont_reduce_order_8(r, p256_order, p256_mp_order); +} + +#ifndef WOLFSSL_SP_SMALL +/* Square number mod the order of P256 curve a number of times. + * (r = a ^ n mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_256_mont_sqr_n_order_8(sp_digit* r, const sp_digit* a, int n) +{ + int i; + + sp_256_mont_sqr_order_8(r, a); + for (i=1; i=0; i--) { + sp_256_mont_sqr_order_8(t, t); + if ((p256_order_minus_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_256_mont_mul_order_8(t, t, a); + } + } + XMEMCPY(r, t, sizeof(sp_digit) * 8U); +#else + sp_digit* t = td; + sp_digit* t2 = td + 2 * 8; + sp_digit* t3 = td + 4 * 8; + int i; + + /* t = a^2 */ + sp_256_mont_sqr_order_8(t, a); + /* t = a^3 = t * a */ + sp_256_mont_mul_order_8(t, t, a); + /* t2= a^c = t ^ 2 ^ 2 */ + sp_256_mont_sqr_n_order_8(t2, t, 2); + /* t3= a^f = t2 * t */ + sp_256_mont_mul_order_8(t3, t2, t); + /* t2= a^f0 = t3 ^ 2 ^ 4 */ + sp_256_mont_sqr_n_order_8(t2, t3, 4); + /* t = a^ff = t2 * t3 */ + sp_256_mont_mul_order_8(t, t2, t3); + /* t3= a^ff00 = t ^ 2 ^ 8 */ + sp_256_mont_sqr_n_order_8(t2, t, 8); + /* t = a^ffff = t2 * t */ + sp_256_mont_mul_order_8(t, t2, t); + /* t2= a^ffff0000 = t ^ 2 ^ 16 */ + sp_256_mont_sqr_n_order_8(t2, t, 16); + /* t = a^ffffffff = t2 * t */ + sp_256_mont_mul_order_8(t, t2, t); + /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64 */ + sp_256_mont_sqr_n_order_8(t2, t, 64); + /* t2= a^ffffffff00000000ffffffff = t2 * t */ + sp_256_mont_mul_order_8(t2, t2, t); + /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32 */ + sp_256_mont_sqr_n_order_8(t2, t2, 32); + /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */ + sp_256_mont_mul_order_8(t2, t2, t); + /* t2= a^ffffffff00000000ffffffffffffffffbce6 */ + for (i=127; i>=112; i--) { + sp_256_mont_sqr_order_8(t2, t2); + if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_256_mont_mul_order_8(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6f */ + sp_256_mont_sqr_n_order_8(t2, t2, 4); + sp_256_mont_mul_order_8(t2, t2, t3); + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */ + for (i=107; i>=64; i--) { + sp_256_mont_sqr_order_8(t2, t2); + if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_256_mont_mul_order_8(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */ + sp_256_mont_sqr_n_order_8(t2, t2, 4); + sp_256_mont_mul_order_8(t2, t2, t3); + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */ + for (i=59; i>=32; i--) { + sp_256_mont_sqr_order_8(t2, t2); + if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_256_mont_mul_order_8(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */ + sp_256_mont_sqr_n_order_8(t2, t2, 4); + sp_256_mont_mul_order_8(t2, t2, t3); + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */ + for (i=27; i>=0; i--) { + sp_256_mont_sqr_order_8(t2, t2); + if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_256_mont_mul_order_8(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */ + sp_256_mont_sqr_n_order_8(t2, t2, 4); + /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */ + sp_256_mont_mul_order_8(r, t2, t3); +#endif /* WOLFSSL_SP_SMALL */ +} + +#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */ +#ifdef HAVE_ECC_SIGN +#ifndef SP_ECC_MAX_SIG_GEN +#define SP_ECC_MAX_SIG_GEN 64 +#endif + +/* Sign the hash using the private key. + * e = [hash, 256 bits] from binary + * r = (k.G)->x mod order + * s = (r * x + e) / k mod order + * The hash is truncated to the first 256 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv, + mp_int* rm, mp_int* sm, mp_int* km, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit ed[2*8]; + sp_digit xd[2*8]; + sp_digit kd[2*8]; + sp_digit rd[2*8]; + sp_digit td[3 * 2*8]; + sp_point_256 p; +#endif + sp_digit* e = NULL; + sp_digit* x = NULL; + sp_digit* k = NULL; + sp_digit* r = NULL; + sp_digit* tmp = NULL; + sp_point_256* point = NULL; + sp_digit carry; + sp_digit* s = NULL; + sp_digit* kInv = NULL; + int err = MP_OKAY; + int32_t c; + int i; + + (void)heap; + + err = sp_256_point_new_8(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 8, heap, + DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + e = d + 0 * 8; + x = d + 2 * 8; + k = d + 4 * 8; + r = d + 6 * 8; + tmp = d + 8 * 8; +#else + e = ed; + x = xd; + k = kd; + r = rd; + tmp = td; +#endif + s = e; + kInv = k; + + if (hashLen > 32U) { + hashLen = 32U; + } + + sp_256_from_bin(e, 8, hash, (int)hashLen); + } + + for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) { + sp_256_from_mp(x, 8, priv); + + /* New random point. */ + if (km == NULL || mp_iszero(km)) { + err = sp_256_ecc_gen_k_8(rng, k); + } + else { + sp_256_from_mp(k, 8, km); + mp_zero(km); + } + if (err == MP_OKAY) { + err = sp_256_ecc_mulmod_base_8(point, k, 1, NULL); + } + + if (err == MP_OKAY) { + /* r = point->x mod order */ + XMEMCPY(r, point->x, sizeof(sp_digit) * 8U); + sp_256_norm_8(r); + c = sp_256_cmp_8(r, p256_order); + sp_256_cond_sub_8(r, r, p256_order, 0L - (sp_digit)(c >= 0)); + sp_256_norm_8(r); + + /* Conv k to Montgomery form (mod order) */ + sp_256_mul_8(k, k, p256_norm_order); + err = sp_256_mod_8(k, k, p256_order); + } + if (err == MP_OKAY) { + sp_256_norm_8(k); + /* kInv = 1/k mod order */ + sp_256_mont_inv_order_8(kInv, k, tmp); + sp_256_norm_8(kInv); + + /* s = r * x + e */ + sp_256_mul_8(x, x, r); + err = sp_256_mod_8(x, x, p256_order); + } + if (err == MP_OKAY) { + sp_256_norm_8(x); + carry = sp_256_add_8(s, e, x); + sp_256_cond_sub_8(s, s, p256_order, 0 - carry); + sp_256_norm_8(s); + c = sp_256_cmp_8(s, p256_order); + sp_256_cond_sub_8(s, s, p256_order, 0L - (sp_digit)(c >= 0)); + sp_256_norm_8(s); + + /* s = s * k^-1 mod order */ + sp_256_mont_mul_order_8(s, s, kInv); + sp_256_norm_8(s); + + /* Check that signature is usable. */ + if (sp_256_iszero_8(s) == 0) { + break; + } + } + } + + if (i == 0) { + err = RNG_FAILURE_E; + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(r, rm); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(s, sm); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 8 * 8); + XFREE(d, heap, DYNAMIC_TYPE_ECC); + } +#else + XMEMSET(e, 0, sizeof(sp_digit) * 2U * 8U); + XMEMSET(x, 0, sizeof(sp_digit) * 2U * 8U); + XMEMSET(k, 0, sizeof(sp_digit) * 2U * 8U); + XMEMSET(r, 0, sizeof(sp_digit) * 2U * 8U); + XMEMSET(r, 0, sizeof(sp_digit) * 2U * 8U); + XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 8U); +#endif + sp_256_point_free_8(point, 1, heap); + + return err; +} +#endif /* HAVE_ECC_SIGN */ + +#ifdef HAVE_ECC_VERIFY +/* Verify the signature values with the hash and public key. + * e = Truncate(hash, 256) + * u1 = e/s mod order + * u2 = r/s mod order + * r == (u1.G + u2.Q)->x mod order + * Optimization: Leave point in projective form. + * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z') + * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' + * The hash is truncated to the first 256 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX, + mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit u1d[2*8]; + sp_digit u2d[2*8]; + sp_digit sd[2*8]; + sp_digit tmpd[2*8 * 5]; + sp_point_256 p1d; + sp_point_256 p2d; +#endif + sp_digit* u1 = NULL; + sp_digit* u2 = NULL; + sp_digit* s = NULL; + sp_digit* tmp = NULL; + sp_point_256* p1; + sp_point_256* p2 = NULL; + sp_digit carry; + int32_t c; + int err; + + err = sp_256_point_new_8(heap, p1d, p1); + if (err == MP_OKAY) { + err = sp_256_point_new_8(heap, p2d, p2); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 8, heap, + DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + u1 = d + 0 * 8; + u2 = d + 2 * 8; + s = d + 4 * 8; + tmp = d + 6 * 8; +#else + u1 = u1d; + u2 = u2d; + s = sd; + tmp = tmpd; +#endif + + if (hashLen > 32U) { + hashLen = 32U; + } + + sp_256_from_bin(u1, 8, hash, (int)hashLen); + sp_256_from_mp(u2, 8, r); + sp_256_from_mp(s, 8, sm); + sp_256_from_mp(p2->x, 8, pX); + sp_256_from_mp(p2->y, 8, pY); + sp_256_from_mp(p2->z, 8, pZ); + + { + sp_256_mul_8(s, s, p256_norm_order); + } + err = sp_256_mod_8(s, s, p256_order); + } + if (err == MP_OKAY) { + sp_256_norm_8(s); + { + sp_256_mont_inv_order_8(s, s, tmp); + sp_256_mont_mul_order_8(u1, u1, s); + sp_256_mont_mul_order_8(u2, u2, s); + } + + err = sp_256_ecc_mulmod_base_8(p1, u1, 0, heap); + } + if (err == MP_OKAY) { + err = sp_256_ecc_mulmod_8(p2, p2, u2, 0, heap); + } + + if (err == MP_OKAY) { + { + sp_256_proj_point_add_8(p1, p1, p2, tmp); + if (sp_256_iszero_8(p1->z)) { + if (sp_256_iszero_8(p1->x) && sp_256_iszero_8(p1->y)) { + sp_256_proj_point_dbl_8(p1, p2, tmp); + } + else { + /* Y ordinate is not used from here - don't set. */ + p1->x[0] = 0; + p1->x[1] = 0; + p1->x[2] = 0; + p1->x[3] = 0; + p1->x[4] = 0; + p1->x[5] = 0; + p1->x[6] = 0; + p1->x[7] = 0; + XMEMCPY(p1->z, p256_norm_mod, sizeof(p256_norm_mod)); + } + } + } + + /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ + /* Reload r and convert to Montgomery form. */ + sp_256_from_mp(u2, 8, r); + err = sp_256_mod_mul_norm_8(u2, u2, p256_mod); + } + + if (err == MP_OKAY) { + /* u1 = r.z'.z' mod prime */ + sp_256_mont_sqr_8(p1->z, p1->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(u1, u2, p1->z, p256_mod, p256_mp_mod); + *res = (int)(sp_256_cmp_8(p1->x, u1) == 0); + if (*res == 0) { + /* Reload r and add order. */ + sp_256_from_mp(u2, 8, r); + carry = sp_256_add_8(u2, u2, p256_order); + /* Carry means result is greater than mod and is not valid. */ + if (carry == 0) { + sp_256_norm_8(u2); + + /* Compare with mod and if greater or equal then not valid. */ + c = sp_256_cmp_8(u2, p256_mod); + if (c < 0) { + /* Convert to Montogomery form */ + err = sp_256_mod_mul_norm_8(u2, u2, p256_mod); + if (err == MP_OKAY) { + /* u1 = (r + 1*order).z'.z' mod prime */ + sp_256_mont_mul_8(u1, u2, p1->z, p256_mod, + p256_mp_mod); + *res = (int)(sp_256_cmp_8(p1->x, u1) == 0); + } + } + } + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) + XFREE(d, heap, DYNAMIC_TYPE_ECC); +#endif + sp_256_point_free_8(p1, 0, heap); + sp_256_point_free_8(p2, 0, heap); + + return err; +} +#endif /* HAVE_ECC_VERIFY */ + +#ifdef HAVE_ECC_CHECK_KEY +/* Check that the x and y oridinates are a valid point on the curve. + * + * point EC point. + * heap Heap to use if dynamically allocating. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +static int sp_256_ecc_is_point_8(sp_point_256* point, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit t1d[2*8]; + sp_digit t2d[2*8]; +#endif + sp_digit* t1; + sp_digit* t2; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8 * 4, heap, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = d + 0 * 8; + t2 = d + 2 * 8; +#else + (void)heap; + + t1 = t1d; + t2 = t2d; +#endif + + sp_256_sqr_8(t1, point->y); + (void)sp_256_mod_8(t1, t1, p256_mod); + sp_256_sqr_8(t2, point->x); + (void)sp_256_mod_8(t2, t2, p256_mod); + sp_256_mul_8(t2, t2, point->x); + (void)sp_256_mod_8(t2, t2, p256_mod); + (void)sp_256_sub_8(t2, p256_mod, t2); + sp_256_mont_add_8(t1, t1, t2, p256_mod); + + sp_256_mont_add_8(t1, t1, point->x, p256_mod); + sp_256_mont_add_8(t1, t1, point->x, p256_mod); + sp_256_mont_add_8(t1, t1, point->x, p256_mod); + + if (sp_256_cmp_8(t1, p256_b) != 0) { + err = MP_VAL; + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, heap, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + +/* Check that the x and y oridinates are a valid point on the curve. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +int sp_ecc_is_point_256(mp_int* pX, mp_int* pY) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 pubd; +#endif + sp_point_256* pub; + byte one[1] = { 1 }; + int err; + + err = sp_256_point_new_8(NULL, pubd, pub); + if (err == MP_OKAY) { + sp_256_from_mp(pub->x, 8, pX); + sp_256_from_mp(pub->y, 8, pY); + sp_256_from_bin(pub->z, 8, one, (int)sizeof(one)); + + err = sp_256_ecc_is_point_8(pub, NULL); + } + + sp_256_point_free_8(pub, 0, NULL); + + return err; +} + +/* Check that the private scalar generates the EC point (px, py), the point is + * on the curve and the point has the correct order. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * privm Private scalar that generates EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve, ECC_INF_E if the point does not have the correct order, + * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and + * MP_OKAY otherwise. + */ +int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit privd[8]; + sp_point_256 pubd; + sp_point_256 pd; +#endif + sp_digit* priv = NULL; + sp_point_256* pub; + sp_point_256* p = NULL; + byte one[1] = { 1 }; + int err; + + err = sp_256_point_new_8(heap, pubd, pub); + if (err == MP_OKAY) { + err = sp_256_point_new_8(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap, + DYNAMIC_TYPE_ECC); + if (priv == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + priv = privd; +#endif + + sp_256_from_mp(pub->x, 8, pX); + sp_256_from_mp(pub->y, 8, pY); + sp_256_from_bin(pub->z, 8, one, (int)sizeof(one)); + sp_256_from_mp(priv, 8, privm); + + /* Check point at infinitiy. */ + if ((sp_256_iszero_8(pub->x) != 0) && + (sp_256_iszero_8(pub->y) != 0)) { + err = ECC_INF_E; + } + } + + if (err == MP_OKAY) { + /* Check range of X and Y */ + if (sp_256_cmp_8(pub->x, p256_mod) >= 0 || + sp_256_cmp_8(pub->y, p256_mod) >= 0) { + err = ECC_OUT_OF_RANGE_E; + } + } + + if (err == MP_OKAY) { + /* Check point is on curve */ + err = sp_256_ecc_is_point_8(pub, heap); + } + + if (err == MP_OKAY) { + /* Point * order = infinity */ + err = sp_256_ecc_mulmod_8(p, pub, p256_order, 1, heap); + } + if (err == MP_OKAY) { + /* Check result is infinity */ + if ((sp_256_iszero_8(p->x) == 0) || + (sp_256_iszero_8(p->y) == 0)) { + err = ECC_INF_E; + } + } + + if (err == MP_OKAY) { + /* Base * private = point */ + err = sp_256_ecc_mulmod_base_8(p, priv, 1, heap); + } + if (err == MP_OKAY) { + /* Check result is public key */ + if (sp_256_cmp_8(p->x, pub->x) != 0 || + sp_256_cmp_8(p->y, pub->y) != 0) { + err = ECC_PRIV_KEY_E; + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (priv != NULL) { + XFREE(priv, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_8(p, 0, heap); + sp_256_point_free_8(pub, 0, heap); + + return err; +} +#endif +#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL +/* Add two projective EC points together. + * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ) + * + * pX First EC point's X ordinate. + * pY First EC point's Y ordinate. + * pZ First EC point's Z ordinate. + * qX Second EC point's X ordinate. + * qY Second EC point's Y ordinate. + * qZ Second EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* qX, mp_int* qY, mp_int* qZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 8 * 5]; + sp_point_256 pd; + sp_point_256 qd; +#endif + sp_digit* tmp; + sp_point_256* p; + sp_point_256* q = NULL; + int err; + + err = sp_256_point_new_8(NULL, pd, p); + if (err == MP_OKAY) { + err = sp_256_point_new_8(NULL, qd, q); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + sp_256_from_mp(p->x, 8, pX); + sp_256_from_mp(p->y, 8, pY); + sp_256_from_mp(p->z, 8, pZ); + sp_256_from_mp(q->x, 8, qX); + sp_256_from_mp(q->y, 8, qY); + sp_256_from_mp(q->z, 8, qZ); + + sp_256_proj_point_add_8(p, p, q, tmp); + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->z, rZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_8(q, 0, NULL); + sp_256_point_free_8(p, 0, NULL); + + return err; +} + +/* Double a projective EC point. + * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ) + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 8 * 2]; + sp_point_256 pd; +#endif + sp_digit* tmp; + sp_point_256* p; + int err; + + err = sp_256_point_new_8(NULL, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 2, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + sp_256_from_mp(p->x, 8, pX); + sp_256_from_mp(p->y, 8, pY); + sp_256_from_mp(p->z, 8, pZ); + + sp_256_proj_point_dbl_8(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->z, rZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_8(p, 0, NULL); + + return err; +} + +/* Map a projective EC point to affine in place. + * pZ will be one. + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 8 * 4]; + sp_point_256 pd; +#endif + sp_digit* tmp; + sp_point_256* p; + int err; + + err = sp_256_point_new_8(NULL, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 4, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + if (err == MP_OKAY) { + sp_256_from_mp(p->x, 8, pX); + sp_256_from_mp(p->y, 8, pY); + sp_256_from_mp(p->z, 8, pZ); + + sp_256_map_8(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(p->x, pX); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->y, pY); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->z, pZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_8(p, 0, NULL); + + return err; +} +#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */ +#ifdef HAVE_COMP_KEY +/* Find the square root of a number mod the prime of the curve. + * + * y The number to operate on and the result. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +static int sp_256_mont_sqrt_8(sp_digit* y) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d; +#else + sp_digit t1d[2 * 8]; + sp_digit t2d[2 * 8]; +#endif + sp_digit* t1; + sp_digit* t2; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 8, NULL, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = d + 0 * 8; + t2 = d + 2 * 8; +#else + t1 = t1d; + t2 = t2d; +#endif + + { + /* t2 = y ^ 0x2 */ + sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod); + /* t1 = y ^ 0x3 */ + sp_256_mont_mul_8(t1, t2, y, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xc */ + sp_256_mont_sqr_n_8(t2, t1, 2, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xf */ + sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xf0 */ + sp_256_mont_sqr_n_8(t2, t1, 4, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xff */ + sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xff00 */ + sp_256_mont_sqr_n_8(t2, t1, 8, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffff */ + sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xffff0000 */ + sp_256_mont_sqr_n_8(t2, t1, 16, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff */ + sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000000 */ + sp_256_mont_sqr_n_8(t1, t1, 32, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000001 */ + sp_256_mont_mul_8(t1, t1, y, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */ + sp_256_mont_sqr_n_8(t1, t1, 96, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */ + sp_256_mont_mul_8(t1, t1, y, p256_mod, p256_mp_mod); + sp_256_mont_sqr_n_8(y, t1, 94, p256_mod, p256_mp_mod); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + + +/* Uncompress the point given the X ordinate. + * + * xm X ordinate. + * odd Whether the Y ordinate is odd. + * ym Calculated Y ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d; +#else + sp_digit xd[2 * 8]; + sp_digit yd[2 * 8]; +#endif + sp_digit* x = NULL; + sp_digit* y = NULL; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 8, NULL, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + x = d + 0 * 8; + y = d + 2 * 8; +#else + x = xd; + y = yd; +#endif + + sp_256_from_mp(x, 8, xm); + err = sp_256_mod_mul_norm_8(x, x, p256_mod); + } + if (err == MP_OKAY) { + /* y = x^3 */ + { + sp_256_mont_sqr_8(y, x, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(y, y, x, p256_mod, p256_mp_mod); + } + /* y = x^3 - 3x */ + sp_256_mont_sub_8(y, y, x, p256_mod); + sp_256_mont_sub_8(y, y, x, p256_mod); + sp_256_mont_sub_8(y, y, x, p256_mod); + /* y = x^3 - 3x + b */ + err = sp_256_mod_mul_norm_8(x, p256_b, p256_mod); + } + if (err == MP_OKAY) { + sp_256_mont_add_8(y, y, x, p256_mod); + /* y = sqrt(x^3 - 3x + b) */ + err = sp_256_mont_sqrt_8(y); + } + if (err == MP_OKAY) { + XMEMSET(y + 8, 0, 8U * sizeof(sp_digit)); + sp_256_mont_reduce_8(y, p256_mod, p256_mp_mod); + if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) { + sp_256_mont_sub_8(y, p256_mod, y, p256_mod); + } + + err = sp_256_to_mp(y, ym); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} +#endif +#endif /* !WOLFSSL_SP_NO_256 */ +#ifdef WOLFSSL_SP_384 + +/* Point structure to use. */ +typedef struct sp_point_384 { + sp_digit x[2 * 12]; + sp_digit y[2 * 12]; + sp_digit z[2 * 12]; + int infinity; +} sp_point_384; + +/* The modulus (prime) of the curve P384. */ +static const sp_digit p384_mod[12] = { + 0xffffffff,0x00000000,0x00000000,0xffffffff,0xfffffffe,0xffffffff, + 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff +}; +/* The Montogmery normalizer for modulus of the curve P384. */ +static const sp_digit p384_norm_mod[12] = { + 0x00000001,0xffffffff,0xffffffff,0x00000000,0x00000001,0x00000000, + 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000 +}; +/* The Montogmery multiplier for modulus of the curve P384. */ +static sp_digit p384_mp_mod = 0x00000001; +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +/* The order of the curve P384. */ +static const sp_digit p384_order[12] = { + 0xccc52973,0xecec196a,0x48b0a77a,0x581a0db2,0xf4372ddf,0xc7634d81, + 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff +}; +#endif +/* The order of the curve P384 minus 2. */ +static const sp_digit p384_order2[12] = { + 0xccc52971,0xecec196a,0x48b0a77a,0x581a0db2,0xf4372ddf,0xc7634d81, + 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff +}; +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montogmery normalizer for order of the curve P384. */ +static const sp_digit p384_norm_order[12] = { + 0x333ad68d,0x1313e695,0xb74f5885,0xa7e5f24d,0x0bc8d220,0x389cb27e, + 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000 +}; +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montogmery multiplier for order of the curve P384. */ +static sp_digit p384_mp_order = 0xe88fdc45; +#endif +/* The base point of curve P384. */ +static const sp_point_384 p384_base = { + /* X ordinate */ + { + 0x72760ab7,0x3a545e38,0xbf55296c,0x5502f25d,0x82542a38,0x59f741e0, + 0x8ba79b98,0x6e1d3b62,0xf320ad74,0x8eb1c71e,0xbe8b0537,0xaa87ca22, + 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L + }, + /* Y ordinate */ + { + 0x90ea0e5f,0x7a431d7c,0x1d7e819d,0x0a60b1ce,0xb5f0b8c0,0xe9da3113, + 0x289a147c,0xf8f41dbd,0x9292dc29,0x5d9e98bf,0x96262c6f,0x3617de4a, + 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L + }, + /* Z ordinate */ + { + 0x00000001,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000, + 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000, + 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L + }, + /* infinity */ + 0 +}; +#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY) +static const sp_digit p384_b[12] = { + 0xd3ec2aef,0x2a85c8ed,0x8a2ed19d,0xc656398d,0x5013875a,0x0314088f, + 0xfe814112,0x181d9c6e,0xe3f82d19,0x988e056b,0xe23ee7e4,0xb3312fa7 +}; +#endif + +static int sp_384_point_new_ex_12(void* heap, sp_point_384* sp, sp_point_384** p) +{ + int ret = MP_OKAY; + (void)heap; +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + (void)sp; + *p = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC); +#else + *p = sp; +#endif + if (*p == NULL) { + ret = MEMORY_E; + } + return ret; +} + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +/* Allocate memory for point and return error. */ +#define sp_384_point_new_12(heap, sp, p) sp_384_point_new_ex_12((heap), NULL, &(p)) +#else +/* Set pointer to data and return no error. */ +#define sp_384_point_new_12(heap, sp, p) sp_384_point_new_ex_12((heap), &(sp), &(p)) +#endif + + +static void sp_384_point_free_12(sp_point_384* p, int clear, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +/* If valid pointer then clear point data if requested and free data. */ + if (p != NULL) { + if (clear != 0) { + XMEMSET(p, 0, sizeof(*p)); + } + XFREE(p, heap, DYNAMIC_TYPE_ECC); + } +#else +/* Clear point data if requested. */ + if (clear != 0) { + XMEMSET(p, 0, sizeof(*p)); + } +#endif + (void)heap; +} + +/* Multiply a number by Montogmery normalizer mod modulus (prime). + * + * r The resulting Montgomery form number. + * a The number to convert. + * m The modulus (prime). + * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise. + */ +static int sp_384_mod_mul_norm_12(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + int64_t* t; +#else + int64_t t[12]; +#endif + int64_t o; + int err = MP_OKAY; + + (void)m; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (int64_t*)XMALLOC(sizeof(int64_t) * 12, NULL, DYNAMIC_TYPE_ECC); + if (t == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + /* 1 0 0 0 0 0 0 0 1 1 0 -1 */ + t[0] = 0 + (uint64_t)a[0] + (uint64_t)a[8] + (uint64_t)a[9] - (uint64_t)a[11]; + /* -1 1 0 0 0 0 0 0 -1 0 1 1 */ + t[1] = 0 - (uint64_t)a[0] + (uint64_t)a[1] - (uint64_t)a[8] + (uint64_t)a[10] + (uint64_t)a[11]; + /* 0 -1 1 0 0 0 0 0 0 -1 0 1 */ + t[2] = 0 - (uint64_t)a[1] + (uint64_t)a[2] - (uint64_t)a[9] + (uint64_t)a[11]; + /* 1 0 -1 1 0 0 0 0 1 1 -1 -1 */ + t[3] = 0 + (uint64_t)a[0] - (uint64_t)a[2] + (uint64_t)a[3] + (uint64_t)a[8] + (uint64_t)a[9] - (uint64_t)a[10] - (uint64_t)a[11]; + /* 1 1 0 -1 1 0 0 0 1 2 1 -2 */ + t[4] = 0 + (uint64_t)a[0] + (uint64_t)a[1] - (uint64_t)a[3] + (uint64_t)a[4] + (uint64_t)a[8] + 2 * (uint64_t)a[9] + (uint64_t)a[10] - 2 * (uint64_t)a[11]; + /* 0 1 1 0 -1 1 0 0 0 1 2 1 */ + t[5] = 0 + (uint64_t)a[1] + (uint64_t)a[2] - (uint64_t)a[4] + (uint64_t)a[5] + (uint64_t)a[9] + 2 * (uint64_t)a[10] + (uint64_t)a[11]; + /* 0 0 1 1 0 -1 1 0 0 0 1 2 */ + t[6] = 0 + (uint64_t)a[2] + (uint64_t)a[3] - (uint64_t)a[5] + (uint64_t)a[6] + (uint64_t)a[10] + 2 * (uint64_t)a[11]; + /* 0 0 0 1 1 0 -1 1 0 0 0 1 */ + t[7] = 0 + (uint64_t)a[3] + (uint64_t)a[4] - (uint64_t)a[6] + (uint64_t)a[7] + (uint64_t)a[11]; + /* 0 0 0 0 1 1 0 -1 1 0 0 0 */ + t[8] = 0 + (uint64_t)a[4] + (uint64_t)a[5] - (uint64_t)a[7] + (uint64_t)a[8]; + /* 0 0 0 0 0 1 1 0 -1 1 0 0 */ + t[9] = 0 + (uint64_t)a[5] + (uint64_t)a[6] - (uint64_t)a[8] + (uint64_t)a[9]; + /* 0 0 0 0 0 0 1 1 0 -1 1 0 */ + t[10] = 0 + (uint64_t)a[6] + (uint64_t)a[7] - (uint64_t)a[9] + (uint64_t)a[10]; + /* 0 0 0 0 0 0 0 1 1 0 -1 1 */ + t[11] = 0 + (uint64_t)a[7] + (uint64_t)a[8] - (uint64_t)a[10] + (uint64_t)a[11]; + + t[1] += t[0] >> 32; t[0] &= 0xffffffff; + t[2] += t[1] >> 32; t[1] &= 0xffffffff; + t[3] += t[2] >> 32; t[2] &= 0xffffffff; + t[4] += t[3] >> 32; t[3] &= 0xffffffff; + t[5] += t[4] >> 32; t[4] &= 0xffffffff; + t[6] += t[5] >> 32; t[5] &= 0xffffffff; + t[7] += t[6] >> 32; t[6] &= 0xffffffff; + t[8] += t[7] >> 32; t[7] &= 0xffffffff; + t[9] += t[8] >> 32; t[8] &= 0xffffffff; + t[10] += t[9] >> 32; t[9] &= 0xffffffff; + t[11] += t[10] >> 32; t[10] &= 0xffffffff; + o = t[11] >> 32; t[11] &= 0xffffffff; + t[0] += o; + t[1] -= o; + t[3] += o; + t[4] += o; + t[1] += t[0] >> 32; t[0] &= 0xffffffff; + t[2] += t[1] >> 32; t[1] &= 0xffffffff; + t[3] += t[2] >> 32; t[2] &= 0xffffffff; + t[4] += t[3] >> 32; t[3] &= 0xffffffff; + t[5] += t[4] >> 32; t[4] &= 0xffffffff; + t[6] += t[5] >> 32; t[5] &= 0xffffffff; + t[7] += t[6] >> 32; t[6] &= 0xffffffff; + t[8] += t[7] >> 32; t[7] &= 0xffffffff; + t[9] += t[8] >> 32; t[8] &= 0xffffffff; + t[10] += t[9] >> 32; t[9] &= 0xffffffff; + t[11] += t[10] >> 32; t[10] &= 0xffffffff; + + r[0] = t[0]; + r[1] = t[1]; + r[2] = t[2]; + r[3] = t[3]; + r[4] = t[4]; + r[5] = t[5]; + r[6] = t[6]; + r[7] = t[7]; + r[8] = t[8]; + r[9] = t[9]; + r[10] = t[10]; + r[11] = t[11]; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) + XFREE(t, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_384_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 32 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 32 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 32U) <= (word32)DIGIT_BIT) { + s += 32U; + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 32) { + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + s = 32 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Convert a point of type ecc_point to type sp_point_384. + * + * p Point of type sp_point_384 (result). + * pm Point of type ecc_point. + */ +static void sp_384_point_from_ecc_point_12(sp_point_384* p, const ecc_point* pm) +{ + XMEMSET(p->x, 0, sizeof(p->x)); + XMEMSET(p->y, 0, sizeof(p->y)); + XMEMSET(p->z, 0, sizeof(p->z)); + sp_384_from_mp(p->x, 12, pm->x); + sp_384_from_mp(p->y, 12, pm->y); + sp_384_from_mp(p->z, 12, pm->z); + p->infinity = 0; +} + +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_384_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (384 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 32 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 12); + r->used = 12; + mp_clamp(r); +#elif DIGIT_BIT < 32 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 12; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 32) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 32 - s; + } + r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 12; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 32 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 32 - s; + } + else { + s += 32; + } + } + r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Convert a point of type sp_point_384 to type ecc_point. + * + * p Point of type sp_point_384. + * pm Point of type ecc_point (result). + * returns MEMORY_E when allocation of memory in ecc_point fails otherwise + * MP_OKAY. + */ +static int sp_384_point_to_ecc_point_12(const sp_point_384* p, ecc_point* pm) +{ + int err; + + err = sp_384_to_mp(p->x, pm->x); + if (err == MP_OKAY) { + err = sp_384_to_mp(p->y, pm->y); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->z, pm->z); + } + + return err; +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_384_mul_12(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit tmp[12 * 2]; + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r8, r3\n\t" + "mov r11, %[r]\n\t" + "mov r9, %[a]\n\t" + "mov r10, %[b]\n\t" + "mov r6, #48\n\t" + "add r6, r9\n\t" + "mov r12, r6\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r5, #0\n\t" + "mov r6, #44\n\t" + "mov %[a], r8\n\t" + "sub %[a], r6\n\t" + "sbc r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], r6\n\t" + "mov %[b], r8\n\t" + "sub %[b], %[a]\n\t" + "add %[a], r9\n\t" + "add %[b], r10\n\t" + "\n2:\n\t" + "# Multiply Start\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[b]]\n\t" + "lsl r6, r6, #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [%[b]]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[b]]\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [%[b]]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Multiply Done\n\t" + "add %[a], #4\n\t" + "sub %[b], #4\n\t" + "cmp %[a], r12\n\t" + "beq 3f\n\t" + "mov r6, r8\n\t" + "add r6, r9\n\t" + "cmp %[a], r6\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r11\n\t" + "mov r7, r8\n\t" + "str r3, [%[r], r7]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "add r7, #4\n\t" + "mov r8, r7\n\t" + "mov r6, #88\n\t" + "cmp r7, r6\n\t" + "ble 1b\n\t" + "str r3, [%[r], r7]\n\t" + "mov %[a], r9\n\t" + "mov %[b], r10\n\t" + : + : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +SP_NOINLINE static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a, + const sp_digit* b, sp_digit m) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r5, #48\n\t" + "mov r8, r5\n\t" + "mov r7, #0\n\t" + "1:\n\t" + "ldr r6, [%[b], r7]\n\t" + "and r6, %[m]\n\t" + "mov r5, #0\n\t" + "sub r5, %[c]\n\t" + "ldr r5, [%[a], r7]\n\t" + "sbc r5, r6\n\t" + "sbc %[c], %[c]\n\t" + "str r5, [%[r], r7]\n\t" + "add r7, #4\n\t" + "cmp r7, r8\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r5", "r6", "r7", "r8" + ); + + return c; +} + +#define sp_384_mont_reduce_order_12 sp_384_mont_reduce_12 + +/* Reduce the number back to 384 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_digit ca = 0; + + __asm__ __volatile__ ( + "mov r8, %[mp]\n\t" + "mov r12, %[ca]\n\t" + "mov r14, %[m]\n\t" + "mov r9, %[a]\n\t" + "mov r4, #0\n\t" + "# i = 0\n\t" + "mov r11, r4\n\t" + "\n1:\n\t" + "mov r5, #0\n\t" + "mov %[ca], #0\n\t" + "# mu = a[i] * mp\n\t" + "mov %[mp], r8\n\t" + "ldr %[a], [%[a]]\n\t" + "mul %[mp], %[a]\n\t" + "mov %[m], r14\n\t" + "mov r10, r9\n\t" + "\n2:\n\t" + "# a[i+j] += m[j] * mu\n\t" + "mov %[a], r10\n\t" + "ldr %[a], [%[a]]\n\t" + "mov %[ca], #0\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "# Multiply m[j] and mu - Start\n\t" + "ldr r7, [%[m]]\n\t" + "lsl r6, %[mp], #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add %[a], r7\n\t" + "adc r5, %[ca]\n\t" + "ldr r7, [%[m]]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add %[a], r6\n\t" + "adc r5, r7\n\t" + "ldr r7, [%[m]]\n\t" + "lsr r6, %[mp], #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r5, r7\n\t" + "ldr r7, [%[m]]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add %[a], r6\n\t" + "adc r5, r7\n\t" + "# Multiply m[j] and mu - Done\n\t" + "add r4, %[a]\n\t" + "adc r5, %[ca]\n\t" + "mov %[a], r10\n\t" + "str r4, [%[a]]\n\t" + "mov r6, #4\n\t" + "add %[m], #4\n\t" + "add r10, r6\n\t" + "mov r4, #44\n\t" + "add r4, r9\n\t" + "cmp r10, r4\n\t" + "blt 2b\n\t" + "# a[i+11] += m[11] * mu\n\t" + "mov %[ca], #0\n\t" + "mov r4, r12\n\t" + "mov %[a], #0\n\t" + "# Multiply m[11] and mu - Start\n\t" + "ldr r7, [%[m]]\n\t" + "lsl r6, %[mp], #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r5, r7\n\t" + "adc r4, %[ca]\n\t" + "adc %[a], %[ca]\n\t" + "ldr r7, [%[m]]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r5, r6\n\t" + "adc r4, r7\n\t" + "adc %[a], %[ca]\n\t" + "ldr r7, [%[m]]\n\t" + "lsr r6, %[mp], #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc %[a], %[ca]\n\t" + "ldr r7, [%[m]]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r5, r6\n\t" + "adc r4, r7\n\t" + "adc %[a], %[ca]\n\t" + "# Multiply m[11] and mu - Done\n\t" + "mov %[ca], %[a]\n\t" + "mov %[a], r10\n\t" + "ldr r7, [%[a], #4]\n\t" + "ldr %[a], [%[a]]\n\t" + "mov r6, #0\n\t" + "add r5, %[a]\n\t" + "adc r7, r4\n\t" + "adc %[ca], r6\n\t" + "mov %[a], r10\n\t" + "str r5, [%[a]]\n\t" + "str r7, [%[a], #4]\n\t" + "# i += 1\n\t" + "mov r6, #4\n\t" + "add r9, r6\n\t" + "add r11, r6\n\t" + "mov r12, %[ca]\n\t" + "mov %[a], r9\n\t" + "mov r4, #48\n\t" + "cmp r11, r4\n\t" + "blt 1b\n\t" + "mov %[m], r14\n\t" + : [ca] "+r" (ca), [a] "+r" (a) + : [m] "r" (m), [mp] "r" (mp) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + ); + + sp_384_cond_sub_12(a - 12, a, m, (sp_digit)0 - ca); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_384_mont_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_384_mul_12(r, a, b); + sp_384_mont_reduce_12(r, m, mp); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r5, #0\n\t" + "mov r8, r3\n\t" + "mov r11, %[r]\n\t" + "mov r6, #96\n\t" + "neg r6, r6\n\t" + "add sp, r6\n\t" + "mov r10, sp\n\t" + "mov r9, %[a]\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r6, #44\n\t" + "mov %[a], r8\n\t" + "sub %[a], r6\n\t" + "sbc r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], r6\n\t" + "mov r2, r8\n\t" + "sub r2, %[a]\n\t" + "add %[a], r9\n\t" + "add r2, r9\n\t" + "\n2:\n\t" + "cmp r2, %[a]\n\t" + "beq 4f\n\t" + "# Multiply * 2: Start\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [r2]\n\t" + "lsl r6, r6, #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [r2]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [r2]\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [r2]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Multiply * 2: Done\n\t" + "bal 5f\n\t" + "\n4:\n\t" + "# Square: Start\n\t" + "ldr r6, [%[a]]\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r6, r6\n\t" + "add r3, r6\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "mul r7, r7\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #15\n\t" + "lsl r6, r6, #17\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Square: Done\n\t" + "\n5:\n\t" + "add %[a], #4\n\t" + "sub r2, #4\n\t" + "mov r6, #48\n\t" + "add r6, r9\n\t" + "cmp %[a], r6\n\t" + "beq 3f\n\t" + "cmp %[a], r2\n\t" + "bgt 3f\n\t" + "mov r7, r8\n\t" + "add r7, r9\n\t" + "cmp %[a], r7\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r10\n\t" + "mov r7, r8\n\t" + "str r3, [%[r], r7]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "add r7, #4\n\t" + "mov r8, r7\n\t" + "mov r6, #88\n\t" + "cmp r7, r6\n\t" + "ble 1b\n\t" + "mov %[a], r9\n\t" + "str r3, [%[r], r7]\n\t" + "mov %[r], r11\n\t" + "mov %[a], r10\n\t" + "mov r3, #92\n\t" + "\n4:\n\t" + "ldr r6, [%[a], r3]\n\t" + "str r6, [%[r], r3]\n\t" + "sub r3, #4\n\t" + "bge 4b\n\t" + "mov r6, #96\n\t" + "add sp, r6\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + ); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_384_mont_sqr_12(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_384_sqr_12(r, a); + sp_384_mont_reduce_12(r, m, mp); +} + +#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY) +/* Square the Montgomery form number a number of times. (r = a ^ n mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * n Number of times to square. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_384_mont_sqr_n_12(sp_digit* r, const sp_digit* a, int n, + const sp_digit* m, sp_digit mp) +{ + sp_384_mont_sqr_12(r, a, m, mp); + for (; n > 1; n--) { + sp_384_mont_sqr_12(r, r, m, mp); + } +} + +#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */ +#ifdef WOLFSSL_SP_SMALL +/* Mod-2 for the P384 curve. */ +static const uint32_t p384_mod_minus_2[12] = { + 0xfffffffdU,0x00000000U,0x00000000U,0xffffffffU,0xfffffffeU,0xffffffffU, + 0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU +}; +#endif /* !WOLFSSL_SP_SMALL */ + +/* Invert the number, in Montgomery form, modulo the modulus (prime) of the + * P384 curve. (r = 1 / a mod m) + * + * r Inverse result. + * a Number to invert. + * td Temporary data. + */ +static void sp_384_mont_inv_12(sp_digit* r, const sp_digit* a, sp_digit* td) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* t = td; + int i; + + XMEMCPY(t, a, sizeof(sp_digit) * 12); + for (i=382; i>=0; i--) { + sp_384_mont_sqr_12(t, t, p384_mod, p384_mp_mod); + if (p384_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32))) + sp_384_mont_mul_12(t, t, a, p384_mod, p384_mp_mod); + } + XMEMCPY(r, t, sizeof(sp_digit) * 12); +#else + sp_digit* t1 = td; + sp_digit* t2 = td + 2 * 12; + sp_digit* t3 = td + 4 * 12; + sp_digit* t4 = td + 6 * 12; + sp_digit* t5 = td + 8 * 12; + + /* 0x2 */ + sp_384_mont_sqr_12(t1, a, p384_mod, p384_mp_mod); + /* 0x3 */ + sp_384_mont_mul_12(t5, t1, a, p384_mod, p384_mp_mod); + /* 0xc */ + sp_384_mont_sqr_n_12(t1, t5, 2, p384_mod, p384_mp_mod); + /* 0xf */ + sp_384_mont_mul_12(t2, t5, t1, p384_mod, p384_mp_mod); + /* 0x1e */ + sp_384_mont_sqr_12(t1, t2, p384_mod, p384_mp_mod); + /* 0x1f */ + sp_384_mont_mul_12(t4, t1, a, p384_mod, p384_mp_mod); + /* 0x3e0 */ + sp_384_mont_sqr_n_12(t1, t4, 5, p384_mod, p384_mp_mod); + /* 0x3ff */ + sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod); + /* 0x7fe0 */ + sp_384_mont_sqr_n_12(t1, t2, 5, p384_mod, p384_mp_mod); + /* 0x7fff */ + sp_384_mont_mul_12(t4, t4, t1, p384_mod, p384_mp_mod); + /* 0x3fff8000 */ + sp_384_mont_sqr_n_12(t1, t4, 15, p384_mod, p384_mp_mod); + /* 0x3fffffff */ + sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod); + /* 0xfffffffc */ + sp_384_mont_sqr_n_12(t3, t2, 2, p384_mod, p384_mp_mod); + /* 0xfffffffd */ + sp_384_mont_mul_12(r, t3, a, p384_mod, p384_mp_mod); + /* 0xffffffff */ + sp_384_mont_mul_12(t3, t5, t3, p384_mod, p384_mp_mod); + /* 0xfffffffc0000000 */ + sp_384_mont_sqr_n_12(t1, t2, 30, p384_mod, p384_mp_mod); + /* 0xfffffffffffffff */ + sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod); + /* 0xfffffffffffffff000000000000000 */ + sp_384_mont_sqr_n_12(t1, t2, 60, p384_mod, p384_mp_mod); + /* 0xffffffffffffffffffffffffffffff */ + sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod); + /* 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */ + sp_384_mont_sqr_n_12(t1, t2, 120, p384_mod, p384_mp_mod); + /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod); + /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */ + sp_384_mont_sqr_n_12(t1, t2, 15, p384_mod, p384_mp_mod); + /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod); + /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000 */ + sp_384_mont_sqr_n_12(t1, t2, 33, p384_mod, p384_mp_mod); + /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff */ + sp_384_mont_mul_12(t2, t3, t1, p384_mod, p384_mp_mod); + /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff000000000000000000000000 */ + sp_384_mont_sqr_n_12(t1, t2, 96, p384_mod, p384_mp_mod); + /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000fffffffd */ + sp_384_mont_mul_12(r, r, t1, p384_mod, p384_mp_mod); + +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +SP_NOINLINE static int32_t sp_384_cmp_12(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; + + + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mvn r3, r3\n\t" + "mov r6, #44\n\t" + "1:\n\t" + "ldr r7, [%[a], r6]\n\t" + "ldr r5, [%[b], r6]\n\t" + "and r7, r3\n\t" + "and r5, r3\n\t" + "mov r4, r7\n\t" + "sub r7, r5\n\t" + "sbc r7, r7\n\t" + "add %[r], r7\n\t" + "mvn r7, r7\n\t" + "and r3, r7\n\t" + "sub r5, r4\n\t" + "sbc r7, r7\n\t" + "sub %[r], r7\n\t" + "mvn r7, r7\n\t" + "and r3, r7\n\t" + "sub r6, #4\n\t" + "cmp r6, #0\n\t" + "bge 1b\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "r3", "r4", "r5", "r6", "r7" + ); + + return r; +} + +/* Normalize the values in each word to 32. + * + * a Array of sp_digit to normalize. + */ +#define sp_384_norm_12(a) + +/* Map the Montgomery form projective coordinate point to an affine point. + * + * r Resulting affine coordinate point. + * p Montgomery form projective coordinate point. + * t Temporary ordinate data. + */ +static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*12; + int32_t n; + + sp_384_mont_inv_12(t1, p->z, t + 2*12); + + sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t1, t2, t1, p384_mod, p384_mp_mod); + + /* x /= z^2 */ + sp_384_mont_mul_12(r->x, p->x, t2, p384_mod, p384_mp_mod); + XMEMSET(r->x + 12, 0, sizeof(r->x) / 2U); + sp_384_mont_reduce_12(r->x, p384_mod, p384_mp_mod); + /* Reduce x to less than modulus */ + n = sp_384_cmp_12(r->x, p384_mod); + sp_384_cond_sub_12(r->x, r->x, p384_mod, 0 - ((n >= 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_384_norm_12(r->x); + + /* y /= z^3 */ + sp_384_mont_mul_12(r->y, p->y, t1, p384_mod, p384_mp_mod); + XMEMSET(r->y + 12, 0, sizeof(r->y) / 2U); + sp_384_mont_reduce_12(r->y, p384_mod, p384_mp_mod); + /* Reduce y to less than modulus */ + n = sp_384_cmp_12(r->y, p384_mod); + sp_384_cond_sub_12(r->y, r->y, p384_mod, 0 - ((n >= 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_384_norm_12(r->y); + + XMEMSET(r->z, 0, sizeof(r->z)); + r->z[0] = 1; + +} + +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r6, %[a]\n\t" + "mov r7, #0\n\t" + "add r6, #48\n\t" + "sub r7, #1\n\t" + "\n1:\n\t" + "add %[c], r7\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r]]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + "add %[a], #4\n\t" + "add %[b], #4\n\t" + "add %[r], #4\n\t" + "cmp %[a], r6\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7" + ); + + return c; +} + +#else +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "add r4, r5\n\t" + "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #44]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Add two Montgomery form numbers (r = a + b % m). + * + * r Result of addition. + * a First number to add in Montogmery form. + * b Second number to add in Montogmery form. + * m Modulus (prime). + */ +SP_NOINLINE static void sp_384_mont_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m) +{ + sp_digit o; + + o = sp_384_add_12(r, a, b); + sp_384_cond_sub_12(r, r, m, 0 - o); +} + +/* Double a Montgomery form number (r = a + a % m). + * + * r Result of doubling. + * a Number to double in Montogmery form. + * m Modulus (prime). + */ +SP_NOINLINE static void sp_384_mont_dbl_12(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + sp_digit o; + + o = sp_384_add_12(r, a, a); + sp_384_cond_sub_12(r, r, m, 0 - o); +} + +/* Triple a Montgomery form number (r = a + a + a % m). + * + * r Result of Tripling. + * a Number to triple in Montogmery form. + * m Modulus (prime). + */ +SP_NOINLINE static void sp_384_mont_tpl_12(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + sp_digit o; + + o = sp_384_add_12(r, a, a); + sp_384_cond_sub_12(r, r, m, 0 - o); + o = sp_384_add_12(r, r, a); + sp_384_cond_sub_12(r, r, m, 0 - o); +} + +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r6, %[a]\n\t" + "add r6, #48\n\t" + "\n1:\n\t" + "mov r5, #0\n\t" + "sub r5, %[c]\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" + "sbc r4, r5\n\t" + "str r4, [%[r]]\n\t" + "sbc %[c], %[c]\n\t" + "add %[a], #4\n\t" + "add %[b], #4\n\t" + "add %[r], #4\n\t" + "cmp %[a], r6\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6" + ); + + return c; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[b], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" + "sub r4, r6\n\t" + "sbc r5, r7\n\t" + "str r4, [%[r], #0]\n\t" + "str r5, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[a], #12]\n\t" + "ldr r6, [%[b], #8]\n\t" + "ldr r7, [%[b], #12]\n\t" + "sbc r4, r6\n\t" + "sbc r5, r7\n\t" + "str r4, [%[r], #8]\n\t" + "str r5, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" + "ldr r6, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" + "sbc r4, r6\n\t" + "sbc r5, r7\n\t" + "str r4, [%[r], #16]\n\t" + "str r5, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[a], #28]\n\t" + "ldr r6, [%[b], #24]\n\t" + "ldr r7, [%[b], #28]\n\t" + "sbc r4, r6\n\t" + "sbc r5, r7\n\t" + "str r4, [%[r], #24]\n\t" + "str r5, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[a], #36]\n\t" + "ldr r6, [%[b], #32]\n\t" + "ldr r7, [%[b], #36]\n\t" + "sbc r4, r6\n\t" + "sbc r5, r7\n\t" + "str r4, [%[r], #32]\n\t" + "str r5, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[a], #44]\n\t" + "ldr r6, [%[b], #40]\n\t" + "ldr r7, [%[b], #44]\n\t" + "sbc r4, r6\n\t" + "sbc r5, r7\n\t" + "str r4, [%[r], #40]\n\t" + "str r5, [%[r], #44]\n\t" + "sbc %[c], %[c]\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +SP_NOINLINE static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r5, #48\n\t" + "mov r8, r5\n\t" + "mov r7, #0\n\t" + "1:\n\t" + "ldr r6, [%[b], r7]\n\t" + "and r6, %[m]\n\t" + "mov r5, #0\n\t" + "sub r5, #1\n\t" + "add r5, %[c]\n\t" + "ldr r5, [%[a], r7]\n\t" + "adc r5, r6\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + "str r5, [%[r], r7]\n\t" + "add r7, #4\n\t" + "cmp r7, r8\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r5", "r6", "r7", "r8" + ); + + return c; +} + +/* Subtract two Montgomery form numbers (r = a - b % m). + * + * r Result of subtration. + * a Number to subtract from in Montogmery form. + * b Number to subtract with in Montogmery form. + * m Modulus (prime). + */ +SP_NOINLINE static void sp_384_mont_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m) +{ + sp_digit o; + + o = sp_384_sub_12(r, a, b); + sp_384_cond_add_12(r, r, m, o); +} + +static void sp_384_rshift1_12(sp_digit* r, sp_digit* a) +{ + __asm__ __volatile__ ( + "ldr r2, [%[a]]\n\t" + "ldr r3, [%[a], #4]\n\t" + "lsr r2, r2, #1\n\t" + "lsl r5, r3, #31\n\t" + "lsr r3, r3, #1\n\t" + "orr r2, r2, r5\n\t" + "ldr r4, [%[a], #8]\n\t" + "str r2, [%[r], #0]\n\t" + "lsl r5, r4, #31\n\t" + "lsr r4, r4, #1\n\t" + "orr r3, r3, r5\n\t" + "ldr r2, [%[a], #12]\n\t" + "str r3, [%[r], #4]\n\t" + "lsl r5, r2, #31\n\t" + "lsr r2, r2, #1\n\t" + "orr r4, r4, r5\n\t" + "ldr r3, [%[a], #16]\n\t" + "str r4, [%[r], #8]\n\t" + "lsl r5, r3, #31\n\t" + "lsr r3, r3, #1\n\t" + "orr r2, r2, r5\n\t" + "ldr r4, [%[a], #20]\n\t" + "str r2, [%[r], #12]\n\t" + "lsl r5, r4, #31\n\t" + "lsr r4, r4, #1\n\t" + "orr r3, r3, r5\n\t" + "ldr r2, [%[a], #24]\n\t" + "str r3, [%[r], #16]\n\t" + "lsl r5, r2, #31\n\t" + "lsr r2, r2, #1\n\t" + "orr r4, r4, r5\n\t" + "ldr r3, [%[a], #28]\n\t" + "str r4, [%[r], #20]\n\t" + "lsl r5, r3, #31\n\t" + "lsr r3, r3, #1\n\t" + "orr r2, r2, r5\n\t" + "ldr r4, [%[a], #32]\n\t" + "str r2, [%[r], #24]\n\t" + "lsl r5, r4, #31\n\t" + "lsr r4, r4, #1\n\t" + "orr r3, r3, r5\n\t" + "ldr r2, [%[a], #36]\n\t" + "str r3, [%[r], #28]\n\t" + "lsl r5, r2, #31\n\t" + "lsr r2, r2, #1\n\t" + "orr r4, r4, r5\n\t" + "ldr r3, [%[a], #40]\n\t" + "str r4, [%[r], #32]\n\t" + "lsl r5, r3, #31\n\t" + "lsr r3, r3, #1\n\t" + "orr r2, r2, r5\n\t" + "ldr r4, [%[a], #44]\n\t" + "str r2, [%[r], #36]\n\t" + "lsl r5, r4, #31\n\t" + "lsr r4, r4, #1\n\t" + "orr r3, r3, r5\n\t" + "str r3, [%[r], #40]\n\t" + "str r4, [%[r], #44]\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r2", "r3", "r4", "r5" + ); +} + +/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) + * + * r Result of division by 2. + * a Number to divide. + * m Modulus (prime). + */ +SP_NOINLINE static void sp_384_div2_12(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + sp_digit o; + + o = sp_384_cond_add_12(r, a, m, 0 - (a[0] & 1)); + sp_384_rshift1_12(r, r); + r[11] |= o << 31; +} + +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static void sp_384_proj_point_dbl_12(sp_point_384* r, const sp_point_384* p, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*12; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_384_mont_sqr_12(t1, p->z, p384_mod, p384_mp_mod); + /* Z = Y * Z */ + sp_384_mont_mul_12(z, p->y, p->z, p384_mod, p384_mp_mod); + /* Z = 2Z */ + sp_384_mont_dbl_12(z, z, p384_mod); + /* T2 = X - T1 */ + sp_384_mont_sub_12(t2, p->x, t1, p384_mod); + /* T1 = X + T1 */ + sp_384_mont_add_12(t1, p->x, t1, p384_mod); + /* T2 = T1 * T2 */ + sp_384_mont_mul_12(t2, t1, t2, p384_mod, p384_mp_mod); + /* T1 = 3T2 */ + sp_384_mont_tpl_12(t1, t2, p384_mod); + /* Y = 2Y */ + sp_384_mont_dbl_12(y, p->y, p384_mod); + /* Y = Y * Y */ + sp_384_mont_sqr_12(y, y, p384_mod, p384_mp_mod); + /* T2 = Y * Y */ + sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod); + /* T2 = T2/2 */ + sp_384_div2_12(t2, t2, p384_mod); + /* Y = Y * X */ + sp_384_mont_mul_12(y, y, p->x, p384_mod, p384_mp_mod); + /* X = T1 * T1 */ + sp_384_mont_sqr_12(x, t1, p384_mod, p384_mp_mod); + /* X = X - Y */ + sp_384_mont_sub_12(x, x, y, p384_mod); + /* X = X - Y */ + sp_384_mont_sub_12(x, x, y, p384_mod); + /* Y = Y - X */ + sp_384_mont_sub_12(y, y, x, p384_mod); + /* Y = Y * T1 */ + sp_384_mont_mul_12(y, y, t1, p384_mod, p384_mp_mod); + /* Y = Y - T2 */ + sp_384_mont_sub_12(y, y, t2, p384_mod); +} + +/* Compare two numbers to determine if they are equal. + * Constant time implementation. + * + * a First number to compare. + * b Second number to compare. + * returns 1 when equal and 0 otherwise. + */ +static int sp_384_cmp_equal_12(const sp_digit* a, const sp_digit* b) +{ + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) | + (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6]) | (a[7] ^ b[7]) | + (a[8] ^ b[8]) | (a[9] ^ b[9]) | (a[10] ^ b[10]) | (a[11] ^ b[11])) == 0; +} + +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_384_proj_point_add_12(sp_point_384* r, const sp_point_384* p, const sp_point_384* q, + sp_digit* t) +{ + const sp_point_384* ap[2]; + sp_point_384* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*12; + sp_digit* t3 = t + 4*12; + sp_digit* t4 = t + 6*12; + sp_digit* t5 = t + 8*12; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* Ensure only the first point is the same as the result. */ + if (q == r) { + const sp_point_384* a = p; + p = q; + q = a; + } + + /* Check double */ + (void)sp_384_sub_12(t1, p384_mod, q->y); + sp_384_norm_12(t1); + if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & + (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) { + sp_384_proj_point_dbl_12(r, p, t); + } + else { + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point_384)); + x = rp[p->infinity | q->infinity]->x; + y = rp[p->infinity | q->infinity]->y; + z = rp[p->infinity | q->infinity]->z; + + ap[0] = p; + ap[1] = q; + for (i=0; i<12; i++) { + r->x[i] = ap[p->infinity]->x[i]; + } + for (i=0; i<12; i++) { + r->y[i] = ap[p->infinity]->y[i]; + } + for (i=0; i<12; i++) { + r->z[i] = ap[p->infinity]->z[i]; + } + r->infinity = ap[p->infinity]->infinity; + + /* U1 = X1*Z2^2 */ + sp_384_mont_sqr_12(t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t3, t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t1, t1, x, p384_mod, p384_mp_mod); + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_12(t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t4, t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_12(t3, t3, y, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod); + /* H = U2 - U1 */ + sp_384_mont_sub_12(t2, t2, t1, p384_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_12(t4, t4, t3, p384_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_12(z, z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(z, z, t2, p384_mod, p384_mp_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_12(x, t4, p384_mod, p384_mp_mod); + sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(y, t1, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(x, x, t5, p384_mod); + sp_384_mont_dbl_12(t1, y, p384_mod); + sp_384_mont_sub_12(x, x, t1, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_12(y, y, x, p384_mod); + sp_384_mont_mul_12(y, y, t4, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t5, t5, t3, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(y, y, t5, p384_mod); + } +} + +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_fast_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k, + int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 td[16]; + sp_point_384 rtd; + sp_digit tmpd[2 * 12 * 6]; +#endif + sp_point_384* t; + sp_point_384* rt; + sp_digit* tmp; + sp_digit n; + int i; + int c, y; + int err; + + (void)heap; + + err = sp_384_point_new_12(heap, rtd, rt); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 16, heap, DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; +#else + t = td; + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + /* t[0] = {0, 0, 1} * norm */ + XMEMSET(&t[0], 0, sizeof(t[0])); + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + (void)sp_384_mod_mul_norm_12(t[1].x, g->x, p384_mod); + (void)sp_384_mod_mul_norm_12(t[1].y, g->y, p384_mod); + (void)sp_384_mod_mul_norm_12(t[1].z, g->z, p384_mod); + t[1].infinity = 0; + sp_384_proj_point_dbl_12(&t[ 2], &t[ 1], tmp); + t[ 2].infinity = 0; + sp_384_proj_point_add_12(&t[ 3], &t[ 2], &t[ 1], tmp); + t[ 3].infinity = 0; + sp_384_proj_point_dbl_12(&t[ 4], &t[ 2], tmp); + t[ 4].infinity = 0; + sp_384_proj_point_add_12(&t[ 5], &t[ 3], &t[ 2], tmp); + t[ 5].infinity = 0; + sp_384_proj_point_dbl_12(&t[ 6], &t[ 3], tmp); + t[ 6].infinity = 0; + sp_384_proj_point_add_12(&t[ 7], &t[ 4], &t[ 3], tmp); + t[ 7].infinity = 0; + sp_384_proj_point_dbl_12(&t[ 8], &t[ 4], tmp); + t[ 8].infinity = 0; + sp_384_proj_point_add_12(&t[ 9], &t[ 5], &t[ 4], tmp); + t[ 9].infinity = 0; + sp_384_proj_point_dbl_12(&t[10], &t[ 5], tmp); + t[10].infinity = 0; + sp_384_proj_point_add_12(&t[11], &t[ 6], &t[ 5], tmp); + t[11].infinity = 0; + sp_384_proj_point_dbl_12(&t[12], &t[ 6], tmp); + t[12].infinity = 0; + sp_384_proj_point_add_12(&t[13], &t[ 7], &t[ 6], tmp); + t[13].infinity = 0; + sp_384_proj_point_dbl_12(&t[14], &t[ 7], tmp); + t[14].infinity = 0; + sp_384_proj_point_add_12(&t[15], &t[ 8], &t[ 7], tmp); + t[15].infinity = 0; + + i = 10; + n = k[i+1] << 0; + c = 28; + y = n >> 28; + XMEMCPY(rt, &t[y], sizeof(sp_point_384)); + n <<= 4; + for (; i>=0 || c>=4; ) { + if (c < 4) { + n |= k[i--]; + c += 32; + } + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + + sp_384_proj_point_dbl_12(rt, rt, tmp); + sp_384_proj_point_dbl_12(rt, rt, tmp); + sp_384_proj_point_dbl_12(rt, rt, tmp); + sp_384_proj_point_dbl_12(rt, rt, tmp); + + sp_384_proj_point_add_12(rt, rt, &t[y], tmp); + } + + if (map != 0) { + sp_384_map_12(r, rt, tmp); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_384)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 12 * 6); + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); + } + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_point_384) * 16); + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#else + ForceZero(tmpd, sizeof(tmpd)); + ForceZero(td, sizeof(td)); +#endif + sp_384_point_free_12(rt, 1, heap); + + return err; +} + +/* A table entry for pre-computed points. */ +typedef struct sp_table_entry_384 { + sp_digit x[12]; + sp_digit y[12]; +} sp_table_entry_384; + +#ifdef FP_ECC +/* Double the Montgomery form projective point p a number of times. + * + * r Result of repeated doubling of point. + * p Point to double. + * n Number of times to double + * t Temporary ordinate data. + */ +static void sp_384_proj_point_dbl_n_12(sp_point_384* p, int n, sp_digit* t) +{ + sp_digit* w = t; + sp_digit* a = t + 2*12; + sp_digit* b = t + 4*12; + sp_digit* t1 = t + 6*12; + sp_digit* t2 = t + 8*12; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = p->x; + y = p->y; + z = p->z; + + /* Y = 2*Y */ + sp_384_mont_dbl_12(y, y, p384_mod); + /* W = Z^4 */ + sp_384_mont_sqr_12(w, z, p384_mod, p384_mp_mod); + sp_384_mont_sqr_12(w, w, p384_mod, p384_mp_mod); + +#ifndef WOLFSSL_SP_SMALL + while (--n > 0) +#else + while (--n >= 0) +#endif + { + /* A = 3*(X^2 - W) */ + sp_384_mont_sqr_12(t1, x, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(t1, t1, w, p384_mod); + sp_384_mont_tpl_12(a, t1, p384_mod); + /* B = X*Y^2 */ + sp_384_mont_sqr_12(t1, y, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(b, t1, x, p384_mod, p384_mp_mod); + /* X = A^2 - 2B */ + sp_384_mont_sqr_12(x, a, p384_mod, p384_mp_mod); + sp_384_mont_dbl_12(t2, b, p384_mod); + sp_384_mont_sub_12(x, x, t2, p384_mod); + /* Z = Z*Y */ + sp_384_mont_mul_12(z, z, y, p384_mod, p384_mp_mod); + /* t2 = Y^4 */ + sp_384_mont_sqr_12(t1, t1, p384_mod, p384_mp_mod); +#ifdef WOLFSSL_SP_SMALL + if (n != 0) +#endif + { + /* W = W*Y^4 */ + sp_384_mont_mul_12(w, w, t1, p384_mod, p384_mp_mod); + } + /* y = 2*A*(B - X) - Y^4 */ + sp_384_mont_sub_12(y, b, x, p384_mod); + sp_384_mont_mul_12(y, y, a, p384_mod, p384_mp_mod); + sp_384_mont_dbl_12(y, y, p384_mod); + sp_384_mont_sub_12(y, y, t1, p384_mod); + } +#ifndef WOLFSSL_SP_SMALL + /* A = 3*(X^2 - W) */ + sp_384_mont_sqr_12(t1, x, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(t1, t1, w, p384_mod); + sp_384_mont_tpl_12(a, t1, p384_mod); + /* B = X*Y^2 */ + sp_384_mont_sqr_12(t1, y, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(b, t1, x, p384_mod, p384_mp_mod); + /* X = A^2 - 2B */ + sp_384_mont_sqr_12(x, a, p384_mod, p384_mp_mod); + sp_384_mont_dbl_12(t2, b, p384_mod); + sp_384_mont_sub_12(x, x, t2, p384_mod); + /* Z = Z*Y */ + sp_384_mont_mul_12(z, z, y, p384_mod, p384_mp_mod); + /* t2 = Y^4 */ + sp_384_mont_sqr_12(t1, t1, p384_mod, p384_mp_mod); + /* y = 2*A*(B - X) - Y^4 */ + sp_384_mont_sub_12(y, b, x, p384_mod); + sp_384_mont_mul_12(y, y, a, p384_mod, p384_mp_mod); + sp_384_mont_dbl_12(y, y, p384_mod); + sp_384_mont_sub_12(y, y, t1, p384_mod); +#endif + /* Y = Y/2 */ + sp_384_div2_12(y, y, p384_mod); +} + +#endif /* FP_ECC */ +/* Add two Montgomery form projective points. The second point has a q value of + * one. + * Only the first point can be the same pointer as the result point. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_384_proj_point_add_qz1_12(sp_point_384* r, const sp_point_384* p, + const sp_point_384* q, sp_digit* t) +{ + const sp_point_384* ap[2]; + sp_point_384* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*12; + sp_digit* t3 = t + 4*12; + sp_digit* t4 = t + 6*12; + sp_digit* t5 = t + 8*12; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* Check double */ + (void)sp_384_sub_12(t1, p384_mod, q->y); + sp_384_norm_12(t1); + if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & + (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) { + sp_384_proj_point_dbl_12(r, p, t); + } + else { + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point_384)); + x = rp[p->infinity | q->infinity]->x; + y = rp[p->infinity | q->infinity]->y; + z = rp[p->infinity | q->infinity]->z; + + ap[0] = p; + ap[1] = q; + for (i=0; i<12; i++) { + r->x[i] = ap[p->infinity]->x[i]; + } + for (i=0; i<12; i++) { + r->y[i] = ap[p->infinity]->y[i]; + } + for (i=0; i<12; i++) { + r->z[i] = ap[p->infinity]->z[i]; + } + r->infinity = ap[p->infinity]->infinity; + + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_12(t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t4, t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod); + /* H = U2 - X1 */ + sp_384_mont_sub_12(t2, t2, x, p384_mod); + /* R = S2 - Y1 */ + sp_384_mont_sub_12(t4, t4, y, p384_mod); + /* Z3 = H*Z1 */ + sp_384_mont_mul_12(z, z, t2, p384_mod, p384_mp_mod); + /* X3 = R^2 - H^3 - 2*X1*H^2 */ + sp_384_mont_sqr_12(t1, t4, p384_mod, p384_mp_mod); + sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t3, x, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(x, t1, t5, p384_mod); + sp_384_mont_dbl_12(t1, t3, p384_mod); + sp_384_mont_sub_12(x, x, t1, p384_mod); + /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */ + sp_384_mont_sub_12(t3, t3, x, p384_mod); + sp_384_mont_mul_12(t3, t3, t4, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t5, t5, y, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(y, t3, t5, p384_mod); + } +} + +#ifdef WOLFSSL_SP_SMALL +#ifdef FP_ECC +/* Convert the projective point to affine. + * Ordinates are in Montgomery form. + * + * a Point to convert. + * t Temporary data. + */ +static void sp_384_proj_to_affine_12(sp_point_384* a, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2 * 12; + sp_digit* tmp = t + 4 * 12; + + sp_384_mont_inv_12(t1, a->z, tmp); + + sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t1, t2, t1, p384_mod, p384_mp_mod); + + sp_384_mont_mul_12(a->x, a->x, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(a->y, a->y, t1, p384_mod, p384_mp_mod); + XMEMCPY(a->z, p384_norm_mod, sizeof(p384_norm_mod)); +} + +/* Generate the pre-computed table of points for the base point. + * + * a The base point. + * table Place to store generated point data. + * tmp Temporary data. + * heap Heap to use for allocation. + */ +static int sp_384_gen_stripe_table_12(const sp_point_384* a, + sp_table_entry_384* table, sp_digit* tmp, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 td, s1d, s2d; +#endif + sp_point_384* t; + sp_point_384* s1 = NULL; + sp_point_384* s2 = NULL; + int i, j; + int err; + + (void)heap; + + err = sp_384_point_new_12(heap, td, t); + if (err == MP_OKAY) { + err = sp_384_point_new_12(heap, s1d, s1); + } + if (err == MP_OKAY) { + err = sp_384_point_new_12(heap, s2d, s2); + } + + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_12(t->x, a->x, p384_mod); + } + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_12(t->y, a->y, p384_mod); + } + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_12(t->z, a->z, p384_mod); + } + if (err == MP_OKAY) { + t->infinity = 0; + sp_384_proj_to_affine_12(t, tmp); + + XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod)); + s1->infinity = 0; + XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod)); + s2->infinity = 0; + + /* table[0] = {0, 0, infinity} */ + XMEMSET(&table[0], 0, sizeof(sp_table_entry_384)); + /* table[1] = Affine version of 'a' in Montgomery form */ + XMEMCPY(table[1].x, t->x, sizeof(table->x)); + XMEMCPY(table[1].y, t->y, sizeof(table->y)); + + for (i=1; i<4; i++) { + sp_384_proj_point_dbl_n_12(t, 96, tmp); + sp_384_proj_to_affine_12(t, tmp); + XMEMCPY(table[1<x, sizeof(table->x)); + XMEMCPY(table[1<y, sizeof(table->y)); + } + + for (i=1; i<4; i++) { + XMEMCPY(s1->x, table[1<x)); + XMEMCPY(s1->y, table[1<y)); + for (j=(1<x, table[j-(1<x)); + XMEMCPY(s2->y, table[j-(1<y)); + sp_384_proj_point_add_qz1_12(t, s1, s2, tmp); + sp_384_proj_to_affine_12(t, tmp); + XMEMCPY(table[j].x, t->x, sizeof(table->x)); + XMEMCPY(table[j].y, t->y, sizeof(table->y)); + } + } + } + + sp_384_point_free_12(s2, 0, heap); + sp_384_point_free_12(s1, 0, heap); + sp_384_point_free_12( t, 0, heap); + + return err; +} + +#endif /* FP_ECC */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_stripe_12(sp_point_384* r, const sp_point_384* g, + const sp_table_entry_384* table, const sp_digit* k, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 rtd; + sp_point_384 pd; + sp_digit td[2 * 12 * 6]; +#endif + sp_point_384* rt; + sp_point_384* p = NULL; + sp_digit* t; + int i, j; + int y, x; + int err; + + (void)g; + (void)heap; + + + err = sp_384_point_new_12(heap, rtd, rt); + if (err == MP_OKAY) { + err = sp_384_point_new_12(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) { + err = MEMORY_E; + } +#else + t = td; +#endif + + if (err == MP_OKAY) { + XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod)); + XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod)); + + y = 0; + for (j=0,x=95; j<4; j++,x+=96) { + y |= ((k[x / 32] >> (x % 32)) & 1) << j; + } + XMEMCPY(rt->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(rt->y, table[y].y, sizeof(table[y].y)); + rt->infinity = !y; + for (i=94; i>=0; i--) { + y = 0; + for (j=0,x=i; j<4; j++,x+=96) { + y |= ((k[x / 32] >> (x % 32)) & 1) << j; + } + + sp_384_proj_point_dbl_12(rt, rt, t); + XMEMCPY(p->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(p->y, table[y].y, sizeof(table[y].y)); + p->infinity = !y; + sp_384_proj_point_add_qz1_12(rt, rt, p, t); + } + + if (map != 0) { + sp_384_map_12(r, rt, t); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_384)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_12(p, 0, heap); + sp_384_point_free_12(rt, 0, heap); + + return err; +} + +#ifdef FP_ECC +#ifndef FP_ENTRIES + #define FP_ENTRIES 16 +#endif + +typedef struct sp_cache_384_t { + sp_digit x[12]; + sp_digit y[12]; + sp_table_entry_384 table[16]; + uint32_t cnt; + int set; +} sp_cache_384_t; + +static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES]; +static THREAD_LS_T int sp_cache_384_last = -1; +static THREAD_LS_T int sp_cache_384_inited = 0; + +#ifndef HAVE_THREAD_LS + static volatile int initCacheMutex_384 = 0; + static wolfSSL_Mutex sp_cache_384_lock; +#endif + +static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache) +{ + int i, j; + uint32_t least; + + if (sp_cache_384_inited == 0) { + for (i=0; ix, sp_cache_384[i].x) & + sp_384_cmp_equal_12(g->y, sp_cache_384[i].y)) { + sp_cache_384[i].cnt++; + break; + } + } + + /* No match. */ + if (i == FP_ENTRIES) { + /* Find empty entry. */ + i = (sp_cache_384_last + 1) % FP_ENTRIES; + for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) { + if (!sp_cache_384[i].set) { + break; + } + } + + /* Evict least used. */ + if (i == sp_cache_384_last) { + least = sp_cache_384[0].cnt; + for (j=1; jx, sizeof(sp_cache_384[i].x)); + XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y)); + sp_cache_384[i].set = 1; + sp_cache_384[i].cnt = 1; + } + + *cache = &sp_cache_384[i]; + sp_cache_384_last = i; +} +#endif /* FP_ECC */ + +/* Multiply the base point of P384 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k, + int map, void* heap) +{ +#ifndef FP_ECC + return sp_384_ecc_mulmod_fast_12(r, g, k, map, heap); +#else + sp_digit tmp[2 * 12 * 7]; + sp_cache_384_t* cache; + int err = MP_OKAY; + +#ifndef HAVE_THREAD_LS + if (initCacheMutex_384 == 0) { + wc_InitMutex(&sp_cache_384_lock); + initCacheMutex_384 = 1; + } + if (wc_LockMutex(&sp_cache_384_lock) != 0) + err = BAD_MUTEX_E; +#endif /* HAVE_THREAD_LS */ + + if (err == MP_OKAY) { + sp_ecc_get_cache_384(g, &cache); + if (cache->cnt == 2) + sp_384_gen_stripe_table_12(g, cache->table, tmp, heap); + +#ifndef HAVE_THREAD_LS + wc_UnLockMutex(&sp_cache_384_lock); +#endif /* HAVE_THREAD_LS */ + + if (cache->cnt < 2) { + err = sp_384_ecc_mulmod_fast_12(r, g, k, map, heap); + } + else { + err = sp_384_ecc_mulmod_stripe_12(r, g, cache->table, k, + map, heap); + } + } + + return err; +#endif +} + +#else +#ifdef FP_ECC +/* Generate the pre-computed table of points for the base point. + * + * a The base point. + * table Place to store generated point data. + * tmp Temporary data. + * heap Heap to use for allocation. + */ +static int sp_384_gen_stripe_table_12(const sp_point_384* a, + sp_table_entry_384* table, sp_digit* tmp, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 td, s1d, s2d; +#endif + sp_point_384* t; + sp_point_384* s1 = NULL; + sp_point_384* s2 = NULL; + int i, j; + int err; + + (void)heap; + + err = sp_384_point_new_12(heap, td, t); + if (err == MP_OKAY) { + err = sp_384_point_new_12(heap, s1d, s1); + } + if (err == MP_OKAY) { + err = sp_384_point_new_12(heap, s2d, s2); + } + + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_12(t->x, a->x, p384_mod); + } + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_12(t->y, a->y, p384_mod); + } + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_12(t->z, a->z, p384_mod); + } + if (err == MP_OKAY) { + t->infinity = 0; + sp_384_proj_to_affine_12(t, tmp); + + XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod)); + s1->infinity = 0; + XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod)); + s2->infinity = 0; + + /* table[0] = {0, 0, infinity} */ + XMEMSET(&table[0], 0, sizeof(sp_table_entry_384)); + /* table[1] = Affine version of 'a' in Montgomery form */ + XMEMCPY(table[1].x, t->x, sizeof(table->x)); + XMEMCPY(table[1].y, t->y, sizeof(table->y)); + + for (i=1; i<8; i++) { + sp_384_proj_point_dbl_n_12(t, 48, tmp); + sp_384_proj_to_affine_12(t, tmp); + XMEMCPY(table[1<x, sizeof(table->x)); + XMEMCPY(table[1<y, sizeof(table->y)); + } + + for (i=1; i<8; i++) { + XMEMCPY(s1->x, table[1<x)); + XMEMCPY(s1->y, table[1<y)); + for (j=(1<x, table[j-(1<x)); + XMEMCPY(s2->y, table[j-(1<y)); + sp_384_proj_point_add_qz1_12(t, s1, s2, tmp); + sp_384_proj_to_affine_12(t, tmp); + XMEMCPY(table[j].x, t->x, sizeof(table->x)); + XMEMCPY(table[j].y, t->y, sizeof(table->y)); + } + } + } + + sp_384_point_free_12(s2, 0, heap); + sp_384_point_free_12(s1, 0, heap); + sp_384_point_free_12( t, 0, heap); + + return err; +} + +#endif /* FP_ECC */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_stripe_12(sp_point_384* r, const sp_point_384* g, + const sp_table_entry_384* table, const sp_digit* k, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 rtd; + sp_point_384 pd; + sp_digit td[2 * 12 * 6]; +#endif + sp_point_384* rt; + sp_point_384* p = NULL; + sp_digit* t; + int i, j; + int y, x; + int err; + + (void)g; + (void)heap; + + + err = sp_384_point_new_12(heap, rtd, rt); + if (err == MP_OKAY) { + err = sp_384_point_new_12(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) { + err = MEMORY_E; + } +#else + t = td; +#endif + + if (err == MP_OKAY) { + XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod)); + XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod)); + + y = 0; + for (j=0,x=47; j<8; j++,x+=48) { + y |= ((k[x / 32] >> (x % 32)) & 1) << j; + } + XMEMCPY(rt->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(rt->y, table[y].y, sizeof(table[y].y)); + rt->infinity = !y; + for (i=46; i>=0; i--) { + y = 0; + for (j=0,x=i; j<8; j++,x+=48) { + y |= ((k[x / 32] >> (x % 32)) & 1) << j; + } + + sp_384_proj_point_dbl_12(rt, rt, t); + XMEMCPY(p->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(p->y, table[y].y, sizeof(table[y].y)); + p->infinity = !y; + sp_384_proj_point_add_qz1_12(rt, rt, p, t); + } + + if (map != 0) { + sp_384_map_12(r, rt, t); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_384)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_12(p, 0, heap); + sp_384_point_free_12(rt, 0, heap); + + return err; +} + +#ifdef FP_ECC +#ifndef FP_ENTRIES + #define FP_ENTRIES 16 +#endif + +typedef struct sp_cache_384_t { + sp_digit x[12]; + sp_digit y[12]; + sp_table_entry_384 table[256]; + uint32_t cnt; + int set; +} sp_cache_384_t; + +static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES]; +static THREAD_LS_T int sp_cache_384_last = -1; +static THREAD_LS_T int sp_cache_384_inited = 0; + +#ifndef HAVE_THREAD_LS + static volatile int initCacheMutex_384 = 0; + static wolfSSL_Mutex sp_cache_384_lock; +#endif + +static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache) +{ + int i, j; + uint32_t least; + + if (sp_cache_384_inited == 0) { + for (i=0; ix, sp_cache_384[i].x) & + sp_384_cmp_equal_12(g->y, sp_cache_384[i].y)) { + sp_cache_384[i].cnt++; + break; + } + } + + /* No match. */ + if (i == FP_ENTRIES) { + /* Find empty entry. */ + i = (sp_cache_384_last + 1) % FP_ENTRIES; + for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) { + if (!sp_cache_384[i].set) { + break; + } + } + + /* Evict least used. */ + if (i == sp_cache_384_last) { + least = sp_cache_384[0].cnt; + for (j=1; jx, sizeof(sp_cache_384[i].x)); + XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y)); + sp_cache_384[i].set = 1; + sp_cache_384[i].cnt = 1; + } + + *cache = &sp_cache_384[i]; + sp_cache_384_last = i; +} +#endif /* FP_ECC */ + +/* Multiply the base point of P384 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k, + int map, void* heap) +{ +#ifndef FP_ECC + return sp_384_ecc_mulmod_fast_12(r, g, k, map, heap); +#else + sp_digit tmp[2 * 12 * 7]; + sp_cache_384_t* cache; + int err = MP_OKAY; + +#ifndef HAVE_THREAD_LS + if (initCacheMutex_384 == 0) { + wc_InitMutex(&sp_cache_384_lock); + initCacheMutex_384 = 1; + } + if (wc_LockMutex(&sp_cache_384_lock) != 0) + err = BAD_MUTEX_E; +#endif /* HAVE_THREAD_LS */ + + if (err == MP_OKAY) { + sp_ecc_get_cache_384(g, &cache); + if (cache->cnt == 2) + sp_384_gen_stripe_table_12(g, cache->table, tmp, heap); + +#ifndef HAVE_THREAD_LS + wc_UnLockMutex(&sp_cache_384_lock); +#endif /* HAVE_THREAD_LS */ + + if (cache->cnt < 2) { + err = sp_384_ecc_mulmod_fast_12(r, g, k, map, heap); + } + else { + err = sp_384_ecc_mulmod_stripe_12(r, g, cache->table, k, + map, heap); + } + } + + return err; +#endif +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * p Point to multiply. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_384(mp_int* km, ecc_point* gm, ecc_point* r, int map, + void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 p; + sp_digit kd[12]; +#endif + sp_point_384* point; + sp_digit* k = NULL; + int err = MP_OKAY; + + err = sp_384_point_new_12(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#else + k = kd; +#endif + if (err == MP_OKAY) { + sp_384_from_mp(k, 12, km); + sp_384_point_from_ecc_point_12(point, gm); + + err = sp_384_ecc_mulmod_12(point, point, k, map, heap); + } + if (err == MP_OKAY) { + err = sp_384_point_to_ecc_point_12(point, r); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_12(point, 0, heap); + + return err; +} + +#ifdef WOLFSSL_SP_SMALL +static const sp_table_entry_384 p384_table[16] = { + /* 0 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 */ + { { 0x49c0b528,0x3dd07566,0xa0d6ce38,0x20e378e2,0x541b4d6e,0x879c3afc, + 0x59a30eff,0x64548684,0x614ede2b,0x812ff723,0x299e1513,0x4d3aadc2 }, + { 0x4b03a4fe,0x23043dad,0x7bb4a9ac,0xa1bfa8bf,0x2e83b050,0x8bade756, + 0x68f4ffd9,0xc6c35219,0x3969a840,0xdd800226,0x5a15c5e9,0x2b78abc2 } }, + /* 2 */ + { { 0xf26feef9,0x24480c57,0x3a0e1240,0xc31a2694,0x273e2bc7,0x735002c3, + 0x3ef1ed4c,0x8c42e9c5,0x7f4948e8,0x028babf6,0x8a978632,0x6a502f43 }, + { 0xb74536fe,0xf5f13a46,0xd8a9f0eb,0x1d218bab,0x37232768,0x30f36bcc, + 0x576e8c18,0xc5317b31,0x9bbcb766,0xef1d57a6,0xb3e3d4dc,0x917c4930 } }, + /* 3 */ + { { 0xe349ddd0,0x11426e2e,0x9b2fc250,0x9f117ef9,0xec0174a6,0xff36b480, + 0x18458466,0x4f4bde76,0x05806049,0x2f2edb6d,0x19dfca92,0x8adc75d1 }, + { 0xb7d5a7ce,0xa619d097,0xa34411e9,0x874275e5,0x0da4b4ef,0x5403e047, + 0x77901d8f,0x2ebaafd9,0xa747170f,0x5e63ebce,0x7f9d8036,0x12a36944 } }, + /* 4 */ + { { 0x2f9fbe67,0x378205de,0x7f728e44,0xc4afcb83,0x682e00f1,0xdbcec06c, + 0x114d5423,0xf2a145c3,0x7a52463e,0xa01d9874,0x7d717b0a,0xfc0935b1 }, + { 0xd4d01f95,0x9653bc4f,0x9560ad34,0x9aa83ea8,0xaf8e3f3f,0xf77943dc, + 0xe86fe16e,0x70774a10,0xbf9ffdcf,0x6b62e6f1,0x588745c9,0x8a72f39e } }, + /* 5 */ + { { 0x2341c342,0x73ade4da,0xea704422,0xdd326e54,0x3741cef3,0x336c7d98, + 0x59e61549,0x1eafa00d,0xbd9a3efd,0xcd3ed892,0xc5c6c7e4,0x03faf26c }, + { 0x3045f8ac,0x087e2fcf,0x174f1e73,0x14a65532,0xfe0af9a7,0x2cf84f28, + 0x2cdc935b,0xddfd7a84,0x6929c895,0x4c0f117b,0x4c8bcfcc,0x356572d6 } }, + /* 6 */ + { { 0x3f3b236f,0xfab08607,0x81e221da,0x19e9d41d,0x3927b428,0xf3f6571e, + 0x7550f1f6,0x4348a933,0xa85e62f0,0x7167b996,0x7f5452bf,0x62d43759 }, + { 0xf2955926,0xd85feb9e,0x6df78353,0x440a561f,0x9ca36b59,0x389668ec, + 0xa22da016,0x052bf1a1,0xf6093254,0xbdfbff72,0xe22209f3,0x94e50f28 } }, + /* 7 */ + { { 0x3062e8af,0x90b2e5b3,0xe8a3d369,0xa8572375,0x201db7b1,0x3fe1b00b, + 0xee651aa2,0xe926def0,0xb9b10ad7,0x6542c9be,0xa2fcbe74,0x098e309b }, + { 0xfff1d63f,0x779deeb3,0x20bfd374,0x23d0e80a,0x8768f797,0x8452bb3b, + 0x1f952856,0xcf75bb4d,0x29ea3faa,0x8fe6b400,0x81373a53,0x12bd3e40 } }, + /* 8 */ + { { 0x16973cf4,0x070d34e1,0x7e4f34f7,0x20aee08b,0x5eb8ad29,0x269af9b9, + 0xa6a45dda,0xdde0a036,0x63df41e0,0xa18b528e,0xa260df2a,0x03cc71b2 }, + { 0xa06b1dd7,0x24a6770a,0x9d2675d3,0x5bfa9c11,0x96844432,0x73c1e2a1, + 0x131a6cf0,0x3660558d,0x2ee79454,0xb0289c83,0xc6d8ddcd,0xa6aefb01 } }, + /* 9 */ + { { 0x01ab5245,0xba1464b4,0xc48d93ff,0x9b8d0b6d,0x93ad272c,0x939867dc, + 0xae9fdc77,0xbebe085e,0x894ea8bd,0x73ae5103,0x39ac22e1,0x740fc89a }, + { 0x28e23b23,0x5e28b0a3,0xe13104d0,0x2352722e,0xb0a2640d,0xf4667a18, + 0x49bb37c3,0xac74a72e,0xe81e183a,0x79f734f0,0x3fd9c0eb,0xbffe5b6c } }, + /* 10 */ + { { 0x00623f3b,0x03cf2922,0x5f29ebff,0x095c7111,0x80aa6823,0x42d72247, + 0x7458c0b0,0x044c7ba1,0x0959ec20,0xca62f7ef,0xf8ca929f,0x40ae2ab7 }, + { 0xa927b102,0xb8c5377a,0xdc031771,0x398a86a0,0xc216a406,0x04908f9d, + 0x918d3300,0xb423a73a,0xe0b94739,0x634b0ff1,0x2d69f697,0xe29de725 } }, + /* 11 */ + { { 0x8435af04,0x744d1400,0xfec192da,0x5f255b1d,0x336dc542,0x1f17dc12, + 0x636a68a8,0x5c90c2a7,0x7704ca1e,0x960c9eb7,0x6fb3d65a,0x9de8cf1e }, + { 0x511d3d06,0xc60fee0d,0xf9eb52c7,0x466e2313,0x206b0914,0x743c0f5f, + 0x2191aa4d,0x42f55bac,0xffebdbc2,0xcefc7c8f,0xe6e8ed1c,0xd4fa6081 } }, + /* 12 */ + { { 0x98683186,0x867db639,0xddcc4ea9,0xfb5cf424,0xd4f0e7bd,0xcc9a7ffe, + 0x7a779f7e,0x7c57f71c,0xd6b25ef2,0x90774079,0xb4081680,0x90eae903 }, + { 0x0ee1fceb,0xdf2aae5e,0xe86c1a1f,0x3ff1da24,0xca193edf,0x80f587d6, + 0xdc9b9d6a,0xa5695523,0x85920303,0x7b840900,0xba6dbdef,0x1efa4dfc } }, + /* 13 */ + { { 0xe0540015,0xfbd838f9,0xc39077dc,0x2c323946,0xad619124,0x8b1fb9e6, + 0x0ca62ea8,0x9612440c,0x2dbe00ff,0x9ad9b52c,0xae197643,0xf52abaa1 }, + { 0x2cac32ad,0xd0e89894,0x62a98f91,0xdfb79e42,0x276f55cb,0x65452ecf, + 0x7ad23e12,0xdb1ac0d2,0xde4986f0,0xf68c5f6a,0x82ce327d,0x389ac37b } }, + /* 14 */ + { { 0xb8a9e8c9,0xcd96866d,0x5bb8091e,0xa11963b8,0x045b3cd2,0xc7f90d53, + 0x80f36504,0x755a72b5,0x21d3751c,0x46f8b399,0x53c193de,0x4bffdc91 }, + { 0xb89554e7,0xcd15c049,0xf7a26be6,0x353c6754,0xbd41d970,0x79602370, + 0x12b176c0,0xde16470b,0x40c8809d,0x56ba1175,0xe435fb1e,0xe2db35c3 } }, + /* 15 */ + { { 0x6328e33f,0xd71e4aab,0xaf8136d1,0x5486782b,0x86d57231,0x07a4995f, + 0x1651a968,0xf1f0a5bd,0x76803b6d,0xa5dc5b24,0x42dda935,0x5c587cbc }, + { 0xbae8b4c0,0x2b6cdb32,0xb1331138,0x66d1598b,0x5d7e9614,0x4a23b2d2, + 0x74a8c05d,0x93e402a6,0xda7ce82e,0x45ac94e6,0xe463d465,0xeb9f8281 } }, +}; + +/* Multiply the base point of P384 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_base_12(sp_point_384* r, const sp_digit* k, + int map, void* heap) +{ + return sp_384_ecc_mulmod_stripe_12(r, &p384_base, p384_table, + k, map, heap); +} + +#else +static const sp_table_entry_384 p384_table[256] = { + /* 0 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 */ + { { 0x49c0b528,0x3dd07566,0xa0d6ce38,0x20e378e2,0x541b4d6e,0x879c3afc, + 0x59a30eff,0x64548684,0x614ede2b,0x812ff723,0x299e1513,0x4d3aadc2 }, + { 0x4b03a4fe,0x23043dad,0x7bb4a9ac,0xa1bfa8bf,0x2e83b050,0x8bade756, + 0x68f4ffd9,0xc6c35219,0x3969a840,0xdd800226,0x5a15c5e9,0x2b78abc2 } }, + /* 2 */ + { { 0x2b0c535b,0x29864753,0x70506296,0x90dd6953,0x216ab9ac,0x038cd6b4, + 0xbe12d76a,0x3df9b7b7,0x5f347bdb,0x13f4d978,0x13e94489,0x222c5c9c }, + { 0x2680dc64,0x5f8e796f,0x58352417,0x120e7cb7,0xd10740b8,0x254b5d8a, + 0x5337dee6,0xc38b8efb,0x94f02247,0xf688c2e1,0x6c25bc4c,0x7b5c75f3 } }, + /* 3 */ + { { 0x9edffea5,0xe26a3cc3,0x37d7e9fc,0x35bbfd1c,0x9bde3ef6,0xf0e7700d, + 0x1a538f5a,0x0380eb47,0x05bf9eb3,0x2e9da8bb,0x1a460c3e,0xdbb93c73 }, + { 0xf526b605,0x37dba260,0xfd785537,0x95d4978e,0xed72a04a,0x24ed793a, + 0x76005b1a,0x26948377,0x9e681f82,0x99f557b9,0xd64954ef,0xae5f9557 } }, + /* 4 */ + { { 0xf26feef9,0x24480c57,0x3a0e1240,0xc31a2694,0x273e2bc7,0x735002c3, + 0x3ef1ed4c,0x8c42e9c5,0x7f4948e8,0x028babf6,0x8a978632,0x6a502f43 }, + { 0xb74536fe,0xf5f13a46,0xd8a9f0eb,0x1d218bab,0x37232768,0x30f36bcc, + 0x576e8c18,0xc5317b31,0x9bbcb766,0xef1d57a6,0xb3e3d4dc,0x917c4930 } }, + /* 5 */ + { { 0xe349ddd0,0x11426e2e,0x9b2fc250,0x9f117ef9,0xec0174a6,0xff36b480, + 0x18458466,0x4f4bde76,0x05806049,0x2f2edb6d,0x19dfca92,0x8adc75d1 }, + { 0xb7d5a7ce,0xa619d097,0xa34411e9,0x874275e5,0x0da4b4ef,0x5403e047, + 0x77901d8f,0x2ebaafd9,0xa747170f,0x5e63ebce,0x7f9d8036,0x12a36944 } }, + /* 6 */ + { { 0x4fc52870,0x28f9c07a,0x1a53a961,0xce0b3748,0x0e1828d9,0xd550fa18, + 0x6adb225a,0xa24abaf7,0x6e58a348,0xd11ed0a5,0x948acb62,0xf3d811e6 }, + { 0x4c61ed22,0x8618dd77,0x80b47c9d,0x0bb747f9,0xde6b8559,0x22bf796f, + 0x680a21e9,0xfdfd1c6d,0x2af2c9dd,0xc0db1577,0xc1e90f3d,0xa09379e6 } }, + /* 7 */ + { { 0xe085c629,0x386c66ef,0x095bc89a,0x5fc2a461,0x203f4b41,0x1353d631, + 0x7e4bd8f5,0x7ca1972b,0xa7df8ce9,0xb077380a,0xee7e4ea3,0xd8a90389 }, + { 0xe7b14461,0x1bc74dc7,0x0c9c4f78,0xdc2cb014,0x84ef0a10,0x52b4b3a6, + 0x20327fe2,0xbde6ea5d,0x660f9615,0xb71ec435,0xb8ad8173,0xeede5a04 } }, + /* 8 */ + { { 0x893b9a2d,0x5584cbb3,0x00850c5d,0x820c660b,0x7df2d43d,0x4126d826, + 0x0109e801,0xdd5bbbf0,0x38172f1c,0x85b92ee3,0xf31430d9,0x609d4f93 }, + { 0xeadaf9d6,0x1e059a07,0x0f125fb0,0x70e6536c,0x560f20e7,0xd6220751, + 0x7aaf3a9a,0xa59489ae,0x64bae14e,0x7b70e2f6,0x76d08249,0x0dd03701 } }, + /* 9 */ + { { 0x8510521f,0x4cc13be8,0xf724cc17,0x87315ba9,0x353dc263,0xb49d83bb, + 0x0c279257,0x8b677efe,0xc93c9537,0x510a1c1c,0xa4702c99,0x33e30cd8 }, + { 0x2208353f,0xf0ffc89d,0xced42b2b,0x0170fa8d,0x26e2a5f5,0x090851ed, + 0xecb52c96,0x81276455,0x7fe1adf4,0x0646c4e1,0xb0868eab,0x513f047e } }, + /* 10 */ + { { 0xdf5bdf53,0xc07611f4,0x58b11a6d,0x45d331a7,0x1c4ee394,0x58965daf, + 0x5a5878d1,0xba8bebe7,0x82dd3025,0xaecc0a18,0xa923eb8b,0xcf2a3899 }, + { 0xd24fd048,0xf98c9281,0x8bbb025d,0x841bfb59,0xc9ab9d53,0xb8ddf8ce, + 0x7fef044e,0x538a4cb6,0x23236662,0x092ac21f,0x0b66f065,0xa919d385 } }, + /* 11 */ + { { 0x85d480d8,0x3db03b40,0x1b287a7d,0x8cd9f479,0x4a8f3bae,0x8f24dc75, + 0x3db41892,0x482eb800,0x9c56e0f5,0x38bf9eb3,0x9a91dc6f,0x8b977320 }, + { 0x7209cfc2,0xa31b05b2,0x05b2db70,0x4c49bf85,0xd619527b,0x56462498, + 0x1fac51ba,0x3fe51039,0xab4b8342,0xfb04f55e,0x04c6eabf,0xc07c10dc } }, + /* 12 */ + { { 0xdb32f048,0xad22fe4c,0x475ed6df,0x5f23bf91,0xaa66b6cb,0xa50ce0c0, + 0xf03405c0,0xdf627a89,0xf95e2d6a,0x3674837d,0xba42e64e,0x081c95b6 }, + { 0xe71d6ceb,0xeba3e036,0x6c6b0271,0xb45bcccf,0x0684701d,0x67b47e63, + 0xe712523f,0x60f8f942,0x5cd47adc,0x82423472,0x87649cbb,0x83027d79 } }, + /* 13 */ + { { 0x3615b0b8,0xb3929ea6,0xa54dac41,0xb41441fd,0xb5b6a368,0x8995d556, + 0x167ef05e,0xa80d4529,0x6d25a27f,0xf6bcb4a1,0x7bd55b68,0x210d6a4c }, + { 0x25351130,0xf3804abb,0x903e37eb,0x1d2df699,0x084c25c8,0x5f201efc, + 0xa1c68e91,0x31a28c87,0x563f62a5,0x81dad253,0xd6c415d4,0x5dd6de70 } }, + /* 14 */ + { { 0x846612ce,0x29f470fd,0xda18d997,0x986f3eec,0x2f34af86,0x6b84c161, + 0x46ddaf8b,0x5ef0a408,0xe49e795f,0x14405a00,0xaa2f7a37,0x5f491b16 }, + { 0xdb41b38d,0xc7f07ae4,0x18fbfcaa,0xef7d119e,0x14443b19,0x3a18e076, + 0x79a19926,0x4356841a,0xe2226fbe,0x91f4a91c,0x3cc88721,0xdc77248c } }, + /* 15 */ + { { 0xe4b1ec9d,0xd570ff1a,0xe7eef706,0x21d23e0e,0xca19e086,0x3cde40f4, + 0xcd4bb270,0x7d6523c4,0xbf13aa6c,0x16c1f06c,0xd14c4b60,0x5aa7245a }, + { 0x44b74de8,0x37f81467,0x620a934e,0x839e7a17,0xde8b1aa1,0xf74d14e8, + 0xf30d75e2,0x8789fa51,0xc81c261e,0x09b24052,0x33c565ee,0x654e2678 } }, + /* 16 */ + { { 0x2f9fbe67,0x378205de,0x7f728e44,0xc4afcb83,0x682e00f1,0xdbcec06c, + 0x114d5423,0xf2a145c3,0x7a52463e,0xa01d9874,0x7d717b0a,0xfc0935b1 }, + { 0xd4d01f95,0x9653bc4f,0x9560ad34,0x9aa83ea8,0xaf8e3f3f,0xf77943dc, + 0xe86fe16e,0x70774a10,0xbf9ffdcf,0x6b62e6f1,0x588745c9,0x8a72f39e } }, + /* 17 */ + { { 0x2341c342,0x73ade4da,0xea704422,0xdd326e54,0x3741cef3,0x336c7d98, + 0x59e61549,0x1eafa00d,0xbd9a3efd,0xcd3ed892,0xc5c6c7e4,0x03faf26c }, + { 0x3045f8ac,0x087e2fcf,0x174f1e73,0x14a65532,0xfe0af9a7,0x2cf84f28, + 0x2cdc935b,0xddfd7a84,0x6929c895,0x4c0f117b,0x4c8bcfcc,0x356572d6 } }, + /* 18 */ + { { 0x7d8c1bba,0x7ecbac01,0x90b0f3d5,0x6058f9c3,0xf6197d0f,0xaee116e3, + 0x4033b128,0xc4dd7068,0xc209b983,0xf084dba6,0x831dbc4a,0x97c7c2cf }, + { 0xf96010e8,0x2f4e61dd,0x529faa17,0xd97e4e20,0x69d37f20,0x4ee66660, + 0x3d366d72,0xccc139ed,0x13488e0f,0x690b6ee2,0xf3a6d533,0x7cad1dc5 } }, + /* 19 */ + { { 0xda57a41f,0x660a9a81,0xec0039b6,0xe74a0412,0x5e1dad15,0x42343c6b, + 0x46681d4c,0x284f3ff5,0x63749e89,0xb51087f1,0x6f9f2f13,0x070f23cc }, + { 0x5d186e14,0x542211da,0xfddb0dff,0x84748f37,0xdb1f4180,0x41a3aab4, + 0xa6402d0e,0x25ed667b,0x02f58355,0x2f2924a9,0xfa44a689,0x5844ee7c } }, + /* 20 */ + { { 0x3f3b236f,0xfab08607,0x81e221da,0x19e9d41d,0x3927b428,0xf3f6571e, + 0x7550f1f6,0x4348a933,0xa85e62f0,0x7167b996,0x7f5452bf,0x62d43759 }, + { 0xf2955926,0xd85feb9e,0x6df78353,0x440a561f,0x9ca36b59,0x389668ec, + 0xa22da016,0x052bf1a1,0xf6093254,0xbdfbff72,0xe22209f3,0x94e50f28 } }, + /* 21 */ + { { 0x3062e8af,0x90b2e5b3,0xe8a3d369,0xa8572375,0x201db7b1,0x3fe1b00b, + 0xee651aa2,0xe926def0,0xb9b10ad7,0x6542c9be,0xa2fcbe74,0x098e309b }, + { 0xfff1d63f,0x779deeb3,0x20bfd374,0x23d0e80a,0x8768f797,0x8452bb3b, + 0x1f952856,0xcf75bb4d,0x29ea3faa,0x8fe6b400,0x81373a53,0x12bd3e40 } }, + /* 22 */ + { { 0x104cbba5,0xc023780d,0xfa35dd4c,0x6207e747,0x1ca9b6a3,0x35c23928, + 0x97987b10,0x4ff19be8,0x8022eee8,0xb8476bbf,0xd3bbe74d,0xaa0a4a14 }, + { 0x187d4543,0x20f94331,0x79f6e066,0x32153870,0xac7e82e1,0x83b0f74e, + 0x828f06ab,0xa7748ba2,0xc26ef35f,0xc5f0298a,0x8e9a7dbd,0x0f0c5070 } }, + /* 23 */ + { { 0xdef029dd,0x0c5c244c,0x850661b8,0x3dabc687,0xfe11d981,0x9992b865, + 0x6274dbad,0xe9801b8f,0x098da242,0xe54e6319,0x91a53d08,0x9929a91a }, + { 0x35285887,0x37bffd72,0xf1418102,0xbc759425,0xfd2e6e20,0x9280cc35, + 0xfbc42ee5,0x735c600c,0x8837619a,0xb7ad2864,0xa778c57b,0xa3627231 } }, + /* 24 */ + { { 0x91361ed8,0xae799b5c,0x6c63366c,0x47d71b75,0x1b265a6a,0x54cdd521, + 0x98d77b74,0xe0215a59,0xbab29db0,0x4424d9b7,0x7fd9e536,0x8b0ffacc }, + { 0x37b5d9ef,0x46d85d12,0xbfa91747,0x5b106d62,0x5f99ba2d,0xed0479f8, + 0x1d104de4,0x0e6f3923,0x25e8983f,0x83a84c84,0xf8105a70,0xa9507e0a } }, + /* 25 */ + { { 0x14cf381c,0xf6c68a6e,0xc22e31cc,0xaf9d27bd,0xaa8a5ccb,0x23568d4d, + 0xe338e4d2,0xe431eec0,0x8f52ad1f,0xf1a828fe,0xe86acd80,0xdb6a0579 }, + { 0x4507832a,0x2885672e,0x887e5289,0x73fc275f,0x05610d08,0x65f80278, + 0x075ff5b0,0x8d9b4554,0x09f712b5,0x3a8e8fb1,0x2ebe9cf2,0x39f0ac86 } }, + /* 26 */ + { { 0x4c52edf5,0xd8fabf78,0xa589ae53,0xdcd737e5,0xd791ab17,0x94918bf0, + 0xbcff06c9,0xb5fbd956,0xdca46d45,0xf6d3032e,0x41a3e486,0x2cdff7e1 }, + { 0x61f47ec8,0x6674b3ba,0xeef84608,0x8a882163,0x4c687f90,0xa257c705, + 0xf6cdf227,0xe30cb2ed,0x7f6ea846,0x2c4c64ca,0xcc6bcd3c,0x186fa17c } }, + /* 27 */ + { { 0x1dfcb91e,0x48a3f536,0x646d358a,0x83595e13,0x91128798,0xbd15827b, + 0x2187757a,0x3ce612b8,0x61bd7372,0x873150a1,0xb662f568,0xf4684530 }, + { 0x401896f6,0x8833950b,0x77f3e090,0xe11cb89a,0x48e7f4a5,0xb2f12cac, + 0xf606677e,0x313dd769,0x16579f93,0xfdcf08b3,0x46b8f22b,0x6429cec9 } }, + /* 28 */ + { { 0xbb75f9a4,0x4984dd54,0x29d3b570,0x4aef06b9,0x3d6e4c1e,0xb5f84ca2, + 0xb083ef35,0x24c61c11,0x392ca9ff,0xce4a7392,0x6730a800,0x865d6517 }, + { 0x722b4a2b,0xca3dfe76,0x7b083e0e,0x12c04bf9,0x1b86b8a5,0x803ce5b5, + 0x6a7e3e0c,0x3fc7632d,0xc81adbe4,0xc89970c2,0x120e16b1,0x3cbcd3ad } }, + /* 29 */ + { { 0xec30ce93,0xfbfb4cc7,0xb72720a2,0x10ed6c7d,0x47b55500,0xec675bf7, + 0x333ff7c3,0x90725903,0x5075bfc0,0xc7c3973e,0x07acf31b,0xb049ecb0 }, + { 0x4f58839c,0xb4076eaf,0xa2b05e4f,0x101896da,0xab40c66e,0x3f6033b0, + 0xc8d864ba,0x19ee9eeb,0x47bf6d2a,0xeb6cf155,0xf826477d,0x8e5a9663 } }, + /* 30 */ + { { 0xf7fbd5e1,0x69e62fdd,0x76912b1d,0x38ecfe54,0xd1da3bfb,0x845a3d56, + 0x1c86f0d4,0x0494950e,0x3bc36ce8,0x83cadbf9,0x4fccc8d1,0x41fce572 }, + { 0x8332c144,0x05f939c2,0x0871e46e,0xb17f248b,0x66e8aff6,0x3d8534e2, + 0x3b85c629,0x1d06f1dc,0xa3131b73,0xdb06a32e,0x8b3f64e5,0xf295184d } }, + /* 31 */ + { { 0x36ddc103,0xd9653ff7,0x95ef606f,0x25f43e37,0xfe06dce8,0x09e301fc, + 0x30b6eebf,0x85af2341,0x0ff56b20,0x79b12b53,0xfe9a3c6b,0x9b4fb499 }, + { 0x51d27ac2,0x0154f892,0x56ca5389,0xd33167e3,0xafc065a6,0x7828ec1f, + 0x7f746c9b,0x0959a258,0x0c44f837,0xb18f1be3,0xc4132fdb,0xa7946117 } }, + /* 32 */ + { { 0x5e3c647b,0xc0426b77,0x8cf05348,0xbfcbd939,0x172c0d3d,0x31d312e3, + 0xee754737,0x5f49fde6,0x6da7ee61,0x895530f0,0xe8b3a5fb,0xcf281b0a }, + { 0x41b8a543,0xfd149735,0x3080dd30,0x41a625a7,0x653908cf,0xe2baae07, + 0xba02a278,0xc3d01436,0x7b21b8f8,0xa0d0222e,0xd7ec1297,0xfdc270e9 } }, + /* 33 */ + { { 0xbc7f41d6,0x00873c0c,0x1b7ad641,0xd976113e,0x238443fb,0x2a536ff4, + 0x41e62e45,0x030d00e2,0x5f545fc6,0x532e9867,0x8e91208c,0xcd033108 }, + { 0x9797612c,0xd1a04c99,0xeea674e2,0xd4393e02,0xe19742a1,0xd56fa69e, + 0x85f0590e,0xdd2ab480,0x48a2243d,0xa5cefc52,0x54383f41,0x48cc67b6 } }, + /* 34 */ + { { 0xfc14ab48,0x4e50430e,0x26706a74,0x195b7f4f,0xcc881ff6,0x2fe8a228, + 0xd945013d,0xb1b968e2,0x4b92162b,0x936aa579,0x364e754a,0x4fb766b7 }, + { 0x31e1ff7f,0x13f93bca,0xce4f2691,0x696eb5ca,0xa2b09e02,0xff754bf8, + 0xe58e3ff8,0x58f13c9c,0x1678c0b0,0xb757346f,0xa86692b3,0xd54200db } }, + /* 35 */ + { { 0x6dda1265,0x9a030bbd,0xe89718dd,0xf7b4f3fc,0x936065b8,0xa6a4931f, + 0x5f72241c,0xbce72d87,0x65775857,0x6cbb51cb,0x4e993675,0xc7161815 }, + { 0x2ee32189,0xe81a0f79,0x277dc0b2,0xef2fab26,0xb71f469f,0x9e64f6fe, + 0xdfdaf859,0xb448ce33,0xbe6b5df1,0x3f5c1c4c,0x1de45f7b,0xfb8dfb00 } }, + /* 36 */ + { { 0x4d5bb921,0xc7345fa7,0x4d2b667e,0x5c7e04be,0x282d7a3e,0x47ed3a80, + 0x7e47b2a4,0x5c2777f8,0x08488e2e,0x89b3b100,0xb2eb5b45,0x9aad77c2 }, + { 0xdaac34ae,0xd681bca7,0x26afb326,0x2452e4e5,0x41a1ee14,0x0c887924, + 0xc2407ade,0x743b04d4,0xfc17a2ac,0xcb5e999b,0x4a701a06,0x4dca2f82 } }, + /* 37 */ + { { 0x1127bc1a,0x68e31ca6,0x17ead3be,0xa3edd59b,0xe25f5a15,0x67b6b645, + 0xa420e15e,0x76221794,0x4b1e872e,0x794fd83b,0xb2dece1b,0x7cab3f03 }, + { 0xca9b3586,0x7119bf15,0x4d250bd7,0xa5545924,0xcc6bcf24,0x173633ea, + 0xb1b6f884,0x9bd308c2,0x447d38c3,0x3bae06f5,0xf341fe1c,0x54dcc135 } }, + /* 38 */ + { { 0x943caf0d,0x56d3598d,0x225ff133,0xce044ea9,0x563fadea,0x9edf6a7c, + 0x73e8dc27,0x632eb944,0x3190dcab,0x814b467e,0x6dbb1e31,0x2d4f4f31 }, + { 0xa143b7ca,0x8d69811c,0xde7cf950,0x4ec1ac32,0x37b5fe82,0x223ab5fd, + 0x9390f1d9,0xe82616e4,0x75804610,0xabff4b20,0x875b08f0,0x11b9be15 } }, + /* 39 */ + { { 0x3bbe682c,0x4ae31a3d,0x74eef2dd,0xbc7c5d26,0x3c47dd40,0x92afd10a, + 0xc14ab9e1,0xec7e0a3b,0xb2e495e4,0x6a6c3dd1,0x309bcd85,0x085ee5e9 }, + { 0x8c2e67fd,0xf381a908,0xe261eaf2,0x32083a80,0x96deee15,0x0fcd6a49, + 0x5e524c79,0xe3b8fb03,0x1d5b08b9,0x8dc360d9,0x7f26719f,0x3a06e2c8 } }, + /* 40 */ + { { 0x7237cac0,0x5cd9f5a8,0x43586794,0x93f0b59d,0xe94f6c4e,0x4384a764, + 0xb62782d3,0x8304ed2b,0xcde06015,0x0b8db8b3,0x5dbe190f,0x4336dd53 }, + { 0x92ab473a,0x57443553,0xbe5ed046,0x031c7275,0x21909aa4,0x3e78678c, + 0x99202ddb,0x4ab7e04f,0x6977e635,0x2648d206,0x093198be,0xd427d184 } }, + /* 41 */ + { { 0x0f9b5a31,0x822848f5,0xbaadb62a,0xbb003468,0x3357559c,0x233a0472, + 0x79aee843,0x49ef6880,0xaeb9e1e3,0xa89867a0,0x1f6f9a55,0xc151931b }, + { 0xad74251e,0xd264eb0b,0x4abf295e,0x37b9b263,0x04960d10,0xb600921b, + 0x4da77dc0,0x0de53dbc,0xd2b18697,0x01d9bab3,0xf7156ddf,0xad54ec7a } }, + /* 42 */ + { { 0x79efdc58,0x8e74dc35,0x4ff68ddb,0x456bd369,0xd32096a5,0x724e74cc, + 0x386783d0,0xe41cff42,0x7c70d8a4,0xa04c7f21,0xe61a19a2,0x41199d2f }, + { 0x29c05dd2,0xd389a3e0,0xe7e3fda9,0x535f2a6b,0x7c2b4df8,0x26ecf72d, + 0xfe745294,0x678275f4,0x9d23f519,0x6319c9cc,0x88048fc4,0x1e05a02d } }, + /* 43 */ + { { 0xd4d5ffe8,0x75cc8e2e,0xdbea17f2,0xf8bb4896,0xcee3cb4a,0x35059790, + 0xa47c6165,0x4c06ee85,0x92935d2f,0xf98fff25,0x32ffd7c7,0x34c4a572 }, + { 0xea0376a2,0xc4b14806,0x4f115e02,0x2ea5e750,0x1e55d7c0,0x532d76e2, + 0xf31044da,0x68dc9411,0x71b77993,0x9272e465,0x93a8cfd5,0xadaa38bb } }, + /* 44 */ + { { 0x7d4ed72a,0x4bf0c712,0xba1f79a3,0xda0e9264,0xf4c39ea4,0x48c0258b, + 0x2a715138,0xa5394ed8,0xbf06c660,0x4af511ce,0xec5c37cd,0xfcebceef }, + { 0x779ae8c1,0xf23b75aa,0xad1e606e,0xdeff59cc,0x22755c82,0xf3f526fd, + 0xbb32cefd,0x64c5ab44,0x915bdefd,0xa96e11a2,0x1143813e,0xab19746a } }, + /* 45 */ + { { 0xec837d7d,0x43c78585,0xb8ee0ba4,0xca5b6fbc,0xd5dbb5ee,0x34e924d9, + 0xbb4f1ca5,0x3f4fa104,0x398640f7,0x15458b72,0xd7f407ea,0x4231faa9 }, + { 0xf96e6896,0x53e0661e,0xd03b0f9d,0x554e4c69,0x9c7858d1,0xd4fcb07b, + 0x52cb04fa,0x7e952793,0x8974e7f7,0x5f5f1574,0x6b6d57c8,0x2e3fa558 } }, + /* 46 */ + { { 0x6a9951a8,0x42cd4803,0x42792ad0,0xa8b15b88,0xabb29a73,0x18e8bcf9, + 0x409933e8,0xbfd9a092,0xefb88dc4,0x760a3594,0x40724458,0x14418863 }, + { 0x99caedc7,0x162a56ee,0x91d101c9,0x8fb12ecd,0x393202da,0xea671967, + 0xa4ccd796,0x1aac8c4a,0x1cf185a8,0x7db05036,0x8cfd095a,0x0c9f86cd } }, + /* 47 */ + { { 0x10b2a556,0x9a728147,0x327b70b2,0x767ca964,0x5e3799b7,0x04ed9e12, + 0x22a3eb2a,0x6781d2dc,0x0d9450ac,0x5bd116eb,0xa7ebe08a,0xeccac1fc }, + { 0xdc2d6e94,0xde68444f,0x35ecf21b,0x3621f429,0x29e03a2c,0x14e2d543, + 0x7d3e7f0a,0x53e42cd5,0x73ed00b9,0xbba26c09,0xc57d2272,0x00297c39 } }, + /* 48 */ + { { 0xb8243a7d,0x3aaaab10,0x8fa58c5b,0x6eeef93e,0x9ae7f764,0xf866fca3, + 0x61ab04d3,0x64105a26,0x03945d66,0xa3578d8a,0x791b848c,0xb08cd3e4 }, + { 0x756d2411,0x45edc5f8,0xa755128c,0xd4a790d9,0x49e5f6a0,0xc2cf0963, + 0xf649beaa,0xc66d267d,0x8467039e,0x3ce6d968,0x42f7816f,0x50046c6b } }, + /* 49 */ + { { 0x66425043,0x92ae1602,0xf08db890,0x1ff66afd,0x8f162ce5,0x386f5a7f, + 0xfcf5598f,0x18d2dea0,0x1a8ca18e,0x78372b3a,0x8cd0e6f7,0xdf0d20eb }, + { 0x75bb4045,0x7edd5e1d,0xb96d94b7,0x252a47ce,0x2c626776,0xbdb29358, + 0x40dd1031,0x853c3943,0x7d5f47fd,0x9dc9becf,0xbae4044a,0x27c2302f } }, + /* 50 */ + { { 0x8f2d49ce,0x2d1d208a,0x162df0a2,0x0d91aa02,0x09a07f65,0x9c5cce87, + 0x84339012,0xdf07238b,0x419442cd,0x5028e2c8,0x72062aba,0x2dcbd358 }, + { 0xe4680967,0xb5fbc3cb,0x9f92d72c,0x2a7bc645,0x116c369d,0x806c76e1, + 0x3177e8d8,0x5c50677a,0x4569df57,0x753739eb,0x36c3f40b,0x2d481ef6 } }, + /* 51 */ + { { 0xfea1103e,0x1a2d39fd,0x95f81b17,0xeaae5592,0xf59b264a,0xdbd0aa18, + 0xcb592ee0,0x90c39c1a,0x9750cca3,0xdf62f80d,0xdf97cc6c,0xda4d8283 }, + { 0x1e201067,0x0a6dd346,0x69fb1f6b,0x1531f859,0x1d60121f,0x4895e552, + 0x4c041c91,0x0b21aab0,0xbcc1ccf8,0x9d896c46,0x3141bde7,0xd24da3b3 } }, + /* 52 */ + { { 0x53b0a354,0x575a0537,0x0c6ddcd8,0x392ff2f4,0x56157b94,0x0b8e8cff, + 0x3b1b80d1,0x073e57bd,0x3fedee15,0x2a75e0f0,0xaa8e6f19,0x752380e4 }, + { 0x6558ffe9,0x1f4e227c,0x19ec5415,0x3a348618,0xf7997085,0xab382d5e, + 0xddc46ac2,0x5e6deaff,0xfc8d094c,0xe5144078,0xf60e37c6,0xf674fe51 } }, + /* 53 */ + { { 0xaf63408f,0x6fb87ae5,0xcd75a737,0xa39c36a9,0xcf4c618d,0x7833313f, + 0xf034c88d,0xfbcd4482,0x39b35288,0x4469a761,0x66b5d9c9,0x77a711c5 }, + { 0x944f8d65,0x4a695dc7,0x161aaba8,0xe6da5f65,0x24601669,0x8654e9c3, + 0x28ae7491,0xbc8b93f5,0x8f5580d8,0x5f1d1e83,0xcea32cc8,0x8ccf9a1a } }, + /* 54 */ + { { 0x7196fee2,0x28ab110c,0x874c8945,0x75799d63,0x29aedadd,0xa2629348, + 0x2be88ff4,0x9714cc7b,0xd58d60d6,0xf71293cf,0x32a564e9,0xda6b6cb3 }, + { 0x3dd821c2,0xf43fddb1,0x90dd323d,0xf2f2785f,0x048489f8,0x91246419, + 0xd24c6749,0x61660f26,0xc803c15c,0x961d9e8c,0xfaadc4c9,0x631c6158 } }, + /* 55 */ + { { 0xfd752366,0xacf2ebe0,0x139be88b,0xb93c340e,0x0f20179e,0x98f66485, + 0xff1da785,0x14820254,0x4f85c16e,0x5278e276,0x7aab1913,0xa246ee45 }, + { 0x53763b33,0x43861eb4,0x45c0bc0d,0xc49f03fc,0xad6b1ea1,0xafff16bc, + 0x6fd49c99,0xce33908b,0xf7fde8c3,0x5c51e9bf,0xff142c5e,0x076a7a39 } }, + /* 56 */ + { { 0x9e338d10,0x04639dfe,0xf42b411b,0x8ee6996f,0xa875cef2,0x960461d1, + 0x95b4d0ba,0x1057b6d6,0xa906e0bc,0x27639252,0xe1c20f8a,0x2c19f09a }, + { 0xeef4c43d,0x5b8fc3f0,0x07a84aa9,0xe2e1b1a8,0x835d2bdb,0x5f455528, + 0x207132dd,0x0f4aee4d,0x3907f675,0xe9f8338c,0x0e0531f0,0x7a874dc9 } }, + /* 57 */ + { { 0x97c27050,0x84b22d45,0x59e70bf8,0xbd0b8df7,0x79738b9b,0xb4d67405, + 0xcd917c4f,0x47f4d5f5,0x13ce6e33,0x9099c4ce,0x521d0f8b,0x942bfd39 }, + { 0xa43b566d,0x5028f0f6,0x21bff7de,0xaf6e8669,0xc44232cd,0x83f6f856, + 0xf915069a,0x65680579,0xecfecb85,0xd12095a2,0xdb01ba16,0xcf7f06ae } }, + /* 58 */ + { { 0x8ef96c80,0x0f56e3c4,0x3ddb609c,0xd521f2b3,0x7dc1450d,0x2be94102, + 0x02a91fe2,0x2d21a071,0x1efa37de,0x2e6f74fa,0x156c28a1,0x9a9a90b8 }, + { 0x9dc7dfcb,0xc54ea9ea,0x2c2c1d62,0xc74e66fc,0x49d3e067,0x9f23f967, + 0x54dd38ad,0x1c7c3a46,0x5946cee3,0xc7005884,0x45cc045d,0x89856368 } }, + /* 59 */ + { { 0xfce73946,0x29da7cd4,0x23168563,0x8f697db5,0xcba92ec6,0x8e235e9c, + 0x9f91d3ea,0x55d4655f,0xaa50a6cd,0xf3689f23,0x21e6a1a0,0xdcf21c26 }, + { 0x61b818bf,0xcffbc82e,0xda47a243,0xc74a2f96,0x8bc1a0cf,0x234e980a, + 0x7929cb6d,0xf35fd6b5,0xefe17d6c,0x81468e12,0x58b2dafb,0xddea6ae5 } }, + /* 60 */ + { { 0x7e787b2e,0x294de887,0x39a9310d,0x258acc1f,0xac14265d,0x92d9714a, + 0x708b48a0,0x18b5591c,0xe1abbf71,0x27cc6bb0,0x568307b9,0xc0581fa3 }, + { 0xf24d4d58,0x9e0f58a3,0xe0ce2327,0xfebe9bb8,0x9d1be702,0x91fd6a41, + 0xfacac993,0x9a7d8a45,0x9e50d66d,0xabc0a08c,0x06498201,0x02c342f7 } }, + /* 61 */ + { { 0x157bdbc2,0xccd71407,0xad0e1605,0x72fa89c6,0xb92a015f,0xb1d3da2b, + 0xa0a3fe56,0x8ad9e7cd,0x24f06737,0x160edcbd,0x61275be6,0x79d4db33 }, + { 0x5f3497c4,0xd3d31fd9,0x04192fb0,0x8cafeaee,0x13a50af3,0xe13ca745, + 0x8c85aae5,0x18826167,0x9eb556ff,0xce06cea8,0xbdb549f3,0x2eef1995 } }, + /* 62 */ + { { 0x50596edc,0x8ed7d3eb,0x905243a2,0xaa359362,0xa4b6d02b,0xa212c2c2, + 0xc4fbec68,0x611fd727,0xb84f733d,0x8a0b8ff7,0x5f0daf0e,0xd85a6b90 }, + { 0xd4091cf7,0x60e899f5,0x2eff2768,0x4fef2b67,0x10c33964,0xc1f195cb, + 0x93626a8f,0x8275d369,0x0d6c840a,0xc77904f4,0x7a868acd,0x88d8b7fd } }, + /* 63 */ + { { 0x7bd98425,0x85f23723,0xc70b154e,0xd4463992,0x96687a2e,0xcbb00ee2, + 0xc83214fd,0x905fdbf7,0x13593684,0x2019d293,0xef51218e,0x0428c393 }, + { 0x981e909a,0x40c7623f,0x7be192da,0x92513385,0x4010907e,0x48fe480f, + 0x3120b459,0xdd7a187c,0xa1fd8f3c,0xc9d7702d,0xe358efc5,0x66e4753b } }, + /* 64 */ + { { 0x16973cf4,0x070d34e1,0x7e4f34f7,0x20aee08b,0x5eb8ad29,0x269af9b9, + 0xa6a45dda,0xdde0a036,0x63df41e0,0xa18b528e,0xa260df2a,0x03cc71b2 }, + { 0xa06b1dd7,0x24a6770a,0x9d2675d3,0x5bfa9c11,0x96844432,0x73c1e2a1, + 0x131a6cf0,0x3660558d,0x2ee79454,0xb0289c83,0xc6d8ddcd,0xa6aefb01 } }, + /* 65 */ + { { 0x01ab5245,0xba1464b4,0xc48d93ff,0x9b8d0b6d,0x93ad272c,0x939867dc, + 0xae9fdc77,0xbebe085e,0x894ea8bd,0x73ae5103,0x39ac22e1,0x740fc89a }, + { 0x28e23b23,0x5e28b0a3,0xe13104d0,0x2352722e,0xb0a2640d,0xf4667a18, + 0x49bb37c3,0xac74a72e,0xe81e183a,0x79f734f0,0x3fd9c0eb,0xbffe5b6c } }, + /* 66 */ + { { 0xc6a2123f,0xb1a358f5,0xfe28df6d,0x927b2d95,0xf199d2f9,0x89702753, + 0x1a3f82dc,0x0a73754c,0x777affe1,0x063d029d,0xdae6d34d,0x5439817e }, + { 0x6b8b83c4,0xf7979eef,0x9d945682,0x615cb214,0xc5e57eae,0x8f0e4fac, + 0x113047dd,0x042b89b8,0x93f36508,0x888356dc,0x5fd1f32f,0xbf008d18 } }, + /* 67 */ + { { 0x4e8068db,0x8012aa24,0xa5729a47,0xc72cc641,0x43f0691d,0x3c33df2c, + 0x1d92145f,0xfa057347,0xb97f7946,0xaefc0f2f,0x2f8121bf,0x813d75cb }, + { 0x4383bba6,0x05613c72,0xa4224b3f,0xa924ce70,0x5f2179a6,0xe59cecbe, + 0x79f62b61,0x78e2e8aa,0x53ad8079,0x3ac2cc3b,0xd8f4fa96,0x55518d71 } }, + /* 68 */ + { { 0x00623f3b,0x03cf2922,0x5f29ebff,0x095c7111,0x80aa6823,0x42d72247, + 0x7458c0b0,0x044c7ba1,0x0959ec20,0xca62f7ef,0xf8ca929f,0x40ae2ab7 }, + { 0xa927b102,0xb8c5377a,0xdc031771,0x398a86a0,0xc216a406,0x04908f9d, + 0x918d3300,0xb423a73a,0xe0b94739,0x634b0ff1,0x2d69f697,0xe29de725 } }, + /* 69 */ + { { 0x8435af04,0x744d1400,0xfec192da,0x5f255b1d,0x336dc542,0x1f17dc12, + 0x636a68a8,0x5c90c2a7,0x7704ca1e,0x960c9eb7,0x6fb3d65a,0x9de8cf1e }, + { 0x511d3d06,0xc60fee0d,0xf9eb52c7,0x466e2313,0x206b0914,0x743c0f5f, + 0x2191aa4d,0x42f55bac,0xffebdbc2,0xcefc7c8f,0xe6e8ed1c,0xd4fa6081 } }, + /* 70 */ + { { 0xb0ab9645,0xb5e405d3,0xd5f1f711,0xaeec7f98,0x585c2a6e,0x8ad42311, + 0x512c6944,0x045acb9e,0xa90db1c6,0xae106c4e,0x898e6563,0xb89f33d5 }, + { 0x7fed2ce4,0x43b07cd9,0xdd815b20,0xf9934e17,0x0a81a349,0x6778d4d5, + 0x52918061,0x9e616ade,0xd7e67112,0xfa06db06,0x88488091,0x1da23cf1 } }, + /* 71 */ + { { 0x42f2c4b5,0x821c46b3,0x66059e47,0x931513ef,0x66f50cd1,0x7030ae43, + 0x43e7b127,0x43b536c9,0x5fca5360,0x006258cf,0x6b557abf,0xe4e3ee79 }, + { 0x24c8b22f,0xbb6b3900,0xfcbf1054,0x2eb5e2c1,0x567492af,0x937b18c9, + 0xacf53957,0xf09432e4,0x1dbf3a56,0x585f5a9d,0xbe0887cf,0xf86751fd } }, + /* 72 */ + { { 0x9d10e0b2,0x157399cb,0x60dc51b7,0x1c0d5956,0x1f583090,0x1d496b8a, + 0x88590484,0x6658bc26,0x03213f28,0x88c08ab7,0x7ae58de4,0x8d2e0f73 }, + { 0x486cfee6,0x9b79bc95,0xe9e5bc57,0x036a26c7,0xcd8ae97a,0x1ad03601, + 0xff3a0494,0x06907f87,0x2c7eb584,0x078f4bbf,0x7e8d0a5a,0xe3731bf5 } }, + /* 73 */ + { { 0xe1cd0abe,0x72f2282b,0x87efefa2,0xd4f9015e,0x6c3834bd,0x9d189806, + 0xb8a29ced,0x9c8cdcc1,0xfee82ebc,0x0601b9f4,0x7206a756,0x371052bc }, + { 0x46f32562,0x76fa1092,0x17351bb4,0xdaad534c,0xb3636bb5,0xc3d64c37, + 0x45d54e00,0x038a8c51,0x32c09e7c,0x301e6180,0x95735151,0x9764eae7 } }, + /* 74 */ + { { 0xcbd5256a,0x8791b19f,0x6ca13a3b,0x4007e0f2,0x4cf06904,0x03b79460, + 0xb6c17589,0xb18a9c22,0x81d45908,0xa1cb7d7d,0x21bb68f1,0x6e13fa9d }, + { 0xa71e6e16,0x47183c62,0xe18749ed,0x5cf0ef8e,0x2e5ed409,0x2c9c7f9b, + 0xe6e117e1,0x042eeacc,0x13fb5a7f,0xb86d4816,0xc9e5feb1,0xea1cf0ed } }, + /* 75 */ + { { 0xcea4cc9b,0x6e6573c9,0xafcec8f3,0x5417961d,0xa438b6f6,0x804bf02a, + 0xdcd4ea88,0xb894b03c,0x3799571f,0xd0f807e9,0x862156e8,0x3466a7f5 }, + { 0x56515664,0x51e59acd,0xa3c5eb0b,0x55b0f93c,0x6a4279db,0x84a06b02, + 0xc5fae08e,0x5c850579,0xa663a1a2,0xcf07b8db,0xf46ffc8d,0x49a36bbc } }, + /* 76 */ + { { 0x46d93106,0xe47f5acc,0xaa897c9c,0x65b7ade0,0x12d7e4be,0x37cf4c94, + 0xd4b2caa9,0xa2ae9b80,0xe60357a3,0x5e7ce09c,0xc8ecd5f9,0x29f77667 }, + { 0xa8a0b1c5,0xdf6868f5,0x62978ad8,0x240858cf,0xdc0002a1,0x0f7ac101, + 0xffe9aa05,0x1d28a9d7,0x5b962c97,0x744984d6,0x3d28c8b2,0xa8a7c00b } }, + /* 77 */ + { { 0xae11a338,0x7c58a852,0xd1af96e7,0xa78613f1,0x5355cc73,0x7e9767d2, + 0x792a2de6,0x6ba37009,0x124386b2,0x7d60f618,0x11157674,0xab09b531 }, + { 0x98eb9dd0,0x95a04841,0x15070328,0xe6c17acc,0x489c6e49,0xafc6da45, + 0xbb211530,0xab45a60a,0x7d7ea933,0xc58d6592,0x095642c6,0xa3ef3c65 } }, + /* 78 */ + { { 0xdf010879,0x89d420e9,0x39576179,0x9d25255d,0xe39513b6,0x9cdefd50, + 0xd5d1c313,0xe4efe45b,0x3f7af771,0xc0149de7,0x340ab06b,0x55a6b4f4 }, + { 0xebeaf771,0xf1325251,0x878d4288,0x2ab44128,0x18e05afe,0xfcd5832e, + 0xcc1fb62b,0xef52a348,0xc1c4792a,0x2bd08274,0x877c6dc7,0x345c5846 } }, + /* 79 */ + { { 0xbea65e90,0xde15ceb0,0x2416d99c,0x0987f72b,0xfd863dec,0x44db578d, + 0xac6a3578,0xf617b74b,0xdb48e999,0x9e62bd7a,0xeab1a1be,0x877cae61 }, + { 0x3a358610,0x23adddaa,0x325e2b07,0x2fc4d6d1,0x1585754e,0x897198f5, + 0xb392b584,0xf741852c,0xb55f7de1,0x9927804c,0x1aa8efae,0xe9e6c4ed } }, + /* 80 */ + { { 0x98683186,0x867db639,0xddcc4ea9,0xfb5cf424,0xd4f0e7bd,0xcc9a7ffe, + 0x7a779f7e,0x7c57f71c,0xd6b25ef2,0x90774079,0xb4081680,0x90eae903 }, + { 0x0ee1fceb,0xdf2aae5e,0xe86c1a1f,0x3ff1da24,0xca193edf,0x80f587d6, + 0xdc9b9d6a,0xa5695523,0x85920303,0x7b840900,0xba6dbdef,0x1efa4dfc } }, + /* 81 */ + { { 0xe0540015,0xfbd838f9,0xc39077dc,0x2c323946,0xad619124,0x8b1fb9e6, + 0x0ca62ea8,0x9612440c,0x2dbe00ff,0x9ad9b52c,0xae197643,0xf52abaa1 }, + { 0x2cac32ad,0xd0e89894,0x62a98f91,0xdfb79e42,0x276f55cb,0x65452ecf, + 0x7ad23e12,0xdb1ac0d2,0xde4986f0,0xf68c5f6a,0x82ce327d,0x389ac37b } }, + /* 82 */ + { { 0xf8e60f5b,0x511188b4,0x48aa2ada,0x7fe67015,0x381abca2,0xdb333cb8, + 0xdaf3fc97,0xb15e6d9d,0x36aabc03,0x4b24f6eb,0x72a748b4,0xc59789df }, + { 0x29cf5279,0x26fcb8a5,0x01ad9a6c,0x7a3c6bfc,0x4b8bac9b,0x866cf88d, + 0x9c80d041,0xf4c89989,0x70add148,0xf0a04241,0x45d81a41,0x5a02f479 } }, + /* 83 */ + { { 0xc1c90202,0xfa5c877c,0xf8ac7570,0xd099d440,0xd17881f7,0x428a5b1b, + 0x5b2501d7,0x61e267db,0xf2e4465b,0xf889bf04,0x76aa4cb8,0x4da3ae08 }, + { 0xe3e66861,0x3ef0fe26,0x3318b86d,0x5e772953,0x747396df,0xc3c35fbc, + 0x439ffd37,0x5115a29c,0xb2d70374,0xbfc4bd97,0x56246b9d,0x088630ea } }, + /* 84 */ + { { 0xb8a9e8c9,0xcd96866d,0x5bb8091e,0xa11963b8,0x045b3cd2,0xc7f90d53, + 0x80f36504,0x755a72b5,0x21d3751c,0x46f8b399,0x53c193de,0x4bffdc91 }, + { 0xb89554e7,0xcd15c049,0xf7a26be6,0x353c6754,0xbd41d970,0x79602370, + 0x12b176c0,0xde16470b,0x40c8809d,0x56ba1175,0xe435fb1e,0xe2db35c3 } }, + /* 85 */ + { { 0x6328e33f,0xd71e4aab,0xaf8136d1,0x5486782b,0x86d57231,0x07a4995f, + 0x1651a968,0xf1f0a5bd,0x76803b6d,0xa5dc5b24,0x42dda935,0x5c587cbc }, + { 0xbae8b4c0,0x2b6cdb32,0xb1331138,0x66d1598b,0x5d7e9614,0x4a23b2d2, + 0x74a8c05d,0x93e402a6,0xda7ce82e,0x45ac94e6,0xe463d465,0xeb9f8281 } }, + /* 86 */ + { { 0xfecf5b9b,0x34e0f9d1,0xf206966a,0xa115b12b,0x1eaa0534,0x5591cf3b, + 0xfb1558f9,0x5f0293cb,0x1bc703a5,0x1c8507a4,0x862c1f81,0x92e6b81c }, + { 0xcdaf24e3,0xcc9ebc66,0x72fcfc70,0x68917ecd,0x8157ba48,0x6dc9a930, + 0xb06ab2b2,0x5d425c08,0x36e929c4,0x362f8ce7,0x62e89324,0x09f6f57c } }, + /* 87 */ + { { 0xd29375fb,0x1c7d6b78,0xe35d1157,0xfabd851e,0x4243ea47,0xf6f62dcd, + 0x8fe30b0f,0x1dd92460,0xffc6e709,0x08166dfa,0x0881e6a7,0xc6c4c693 }, + { 0xd6a53fb0,0x20368f87,0x9eb4d1f9,0x38718e9f,0xafd7e790,0x03f08acd, + 0x72fe2a1c,0x0835eb44,0x88076e5d,0x7e050903,0xa638e731,0x538f765e } }, + /* 88 */ + { { 0xc2663b4b,0x0e0249d9,0x47cd38dd,0xe700ab5b,0x2c46559f,0xb192559d, + 0x4bcde66d,0x8f9f74a8,0x3e2aced5,0xad161523,0x3dd03a5b,0xc155c047 }, + { 0x3be454eb,0x346a8799,0x83b7dccd,0x66ee94db,0xab9d2abe,0x1f6d8378, + 0x7733f355,0x4a396dd2,0xf53553c2,0x419bd40a,0x731dd943,0xd0ead98d } }, + /* 89 */ + { { 0xec142408,0x908e0b0e,0x4114b310,0x98943cb9,0x1742b1d7,0x03dbf7d8, + 0x693412f4,0xd270df6b,0x8f69e20c,0xc5065494,0x697e43a1,0xa76a90c3 }, + { 0x4624825a,0xe0fa3384,0x8acc34c2,0x82e48c0b,0xe9a14f2b,0x7b24bd14, + 0x4db30803,0x4f5dd5e2,0x932da0a3,0x0c77a9e7,0x74c653dc,0x20db90f2 } }, + /* 90 */ + { { 0x0e6c5fd9,0x261179b7,0x6c982eea,0xf8bec123,0xd4957b7e,0x47683338, + 0x0a72f66a,0xcc47e664,0x1bad9350,0xbd54bf6a,0xf454e95a,0xdfbf4c6a }, + { 0x6907f4fa,0x3f7a7afa,0x865ca735,0x7311fae0,0x2a496ada,0x24737ab8, + 0x15feb79b,0x13e425f1,0xa1b93c21,0xe9e97c50,0x4ddd3eb5,0xb26b6eac } }, + /* 91 */ + { { 0x2a2e5f2b,0x81cab9f5,0xbf385ac4,0xf93caf29,0xc909963a,0xf4bf35c3, + 0x74c9143c,0x081e7300,0xc281b4c5,0x3ea57fa8,0x9b340741,0xe497905c }, + { 0x55ab3cfb,0xf556dd8a,0x518db6ad,0xd444b96b,0x5ef4b955,0x34f5425a, + 0xecd26aa3,0xdda7a3ac,0xda655e97,0xb57da11b,0xc2024c70,0x02da3eff } }, + /* 92 */ + { { 0x6481d0d9,0xe24b0036,0x818fdfe2,0x3740dbe5,0x190fda00,0xc1fc1f45, + 0x3cf27fde,0x329c9280,0x6934f43e,0x7435cb53,0x7884e8fe,0x2b505a5d }, + { 0x711adcc9,0x6cfcc6a6,0x531e21e1,0xf034325c,0x9b2a8a99,0xa2f4a967, + 0x3c21bdff,0x9d5f3842,0x31b57d66,0xb25c7811,0x0b8093b9,0xdb5344d8 } }, + /* 93 */ + { { 0xae50a2f5,0x0d72e667,0xe4a861d1,0x9b7f8d8a,0x330df1cb,0xa129f70f, + 0xe04fefc3,0xe90aa5d7,0xe72c3ae1,0xff561ecb,0xcdb955fa,0x0d8fb428 }, + { 0xd7663784,0xd2235f73,0x7e2c456a,0xc05baec6,0x2adbfccc,0xe5c292e4, + 0xefb110d5,0x4fd17988,0xd19d49f3,0x27e57734,0x84f679fe,0x188ac4ce } }, + /* 94 */ + { { 0xa796c53e,0x7ee344cf,0x0868009b,0xbbf6074d,0x474a1295,0x1f1594f7, + 0xac11632d,0x66776edc,0x04e2fa5a,0x1862278b,0xc854a89a,0x52665cf2 }, + { 0x8104ab58,0x7e376464,0x7204fd6d,0x16775913,0x44ea1199,0x86ca06a5, + 0x1c9240dd,0xaa3f765b,0x24746149,0x5f8501a9,0xdcd251d7,0x7b982e30 } }, + /* 95 */ + { { 0xc15f3060,0xe44e9efc,0xa87ebbe6,0x5ad62f2e,0xc79500d4,0x36499d41, + 0x336fa9d1,0xa66d6dc0,0x5afd3b1f,0xf8afc495,0xe5c9822b,0x1d8ccb24 }, + { 0x79d7584b,0x4031422b,0xea3f20dd,0xc54a0580,0x958468c5,0x3f837c8f, + 0xfbea7735,0x3d82f110,0x7dffe2fc,0x679a8778,0x20704803,0x48eba63b } }, + /* 96 */ + { { 0xdf46e2f6,0x89b10d41,0x19514367,0x13ab57f8,0x1d469c87,0x067372b9, + 0x4f6c5798,0x0c195afa,0x272c9acf,0xea43a12a,0x678abdac,0x9dadd8cb }, + { 0xe182579a,0xcce56c6b,0x2d26c2d8,0x86febadb,0x2a44745c,0x1c668ee1, + 0x98dc047a,0x580acd86,0x51b9ec2d,0x5a2b79cc,0x4054f6a0,0x007da608 } }, + /* 97 */ + { { 0x17b00dd0,0x9e3ca352,0x0e81a7a6,0x046779cb,0xd482d871,0xb999fef3, + 0xd9233fbc,0xe6f38134,0xf48cd0e0,0x112c3001,0x3c6c66ae,0x934e7576 }, + { 0xd73234dc,0xb44d4fc3,0x864eafc1,0xfcae2062,0x26bef21a,0x843afe25, + 0xf3b75fdf,0x61355107,0x794c2e6b,0x8367a5aa,0x8548a372,0x3d2629b1 } }, + /* 98 */ + { { 0x437cfaf8,0x6230618f,0x2032c299,0x5b8742cb,0x2293643a,0x949f7247, + 0x09464f79,0xb8040f1a,0x4f254143,0x049462d2,0x366c7e76,0xabd6b522 }, + { 0xd5338f55,0x119b392b,0x01495a0c,0x1a80a9ce,0xf8d7537e,0xf3118ca7, + 0x6bf4b762,0xb715adc2,0xa8482b6c,0x24506165,0x96a7c84d,0xd958d7c6 } }, + /* 99 */ + { { 0xbdc21f31,0x9ad8aa87,0x8063e58c,0xadb3cab4,0xb07dd7b8,0xefd86283, + 0x1be7c6b4,0xc7b9b762,0x015582de,0x2ef58741,0x299addf3,0xc970c52e }, + { 0x22f24d66,0x78f02e2a,0x74cc100a,0xefec1d10,0x09316e1a,0xaf2a6a39, + 0x5849dd49,0xce7c2205,0x96bffc4c,0x9c1fe75c,0x7ba06ec0,0xcad98fd2 } }, + /* 100 */ + { { 0xb648b73e,0xed76e2d0,0x1cfd285e,0xa9f92ce5,0x2ed13de1,0xa8c86c06, + 0xa5191a93,0x1d3a574e,0x1ad1b8bf,0x385cdf8b,0x47d2cfe3,0xbbecc28a }, + { 0x69cec548,0x98d326c0,0xf240a0b2,0x4f5bc1dd,0x29057236,0x241a7062, + 0xc68294a4,0x0fc6e9c5,0xa319f17a,0x4d04838b,0x9ffc1c6f,0x8b612cf1 } }, + /* 101 */ + { { 0x4c3830eb,0x9bb0b501,0x8ee0d0c5,0x3d08f83c,0x79ba9389,0xa4a62642, + 0x9cbc2914,0x5d5d4044,0x074c46f0,0xae9eb83e,0x74ead7d6,0x63bb758f }, + { 0xc6bb29e0,0x1c40d2ea,0x4b02f41e,0x95aa2d87,0x53cb199a,0x92989175, + 0x51584f6d,0xdd91bafe,0x31a1aaec,0x3715efb9,0x46780f9e,0xc1b6ae5b } }, + /* 102 */ + { { 0x42772f41,0xcded3e4b,0x3bcb79d1,0x3a700d5d,0x80feee60,0x4430d50e, + 0xf5e5d4bb,0x444ef1fc,0xe6e358ff,0xc660194f,0x6a91b43c,0xe68a2f32 }, + { 0x977fe4d2,0x5842775c,0x7e2a41eb,0x78fdef5c,0xff8df00e,0x5f3bec02, + 0x5852525d,0xf4b840cd,0x4e6988bd,0x0870483a,0xcc64b837,0x39499e39 } }, + /* 103 */ + { { 0xb08df5fe,0xfc05de80,0x63ba0362,0x0c12957c,0xd5cf1428,0xea379414, + 0x54ef6216,0xc559132a,0xb9e65cf8,0x33d5f12f,0x1695d663,0x09c60278 }, + { 0x61f7a2fb,0x3ac1ced4,0xd4f5eeb8,0xdd838444,0x8318fcad,0x82a38c6c, + 0xe9f1a864,0x315be2e5,0x442daf47,0x317b5771,0x95aa5f9e,0x81b5904a } }, + /* 104 */ + { { 0x8b21d232,0x6b6b1c50,0x8c2cba75,0x87f3dbc0,0xae9f0faf,0xa7e74b46, + 0xbb7b8079,0x036a0985,0x8d974a25,0x4f185b90,0xd9af5ec9,0x5aa7cef0 }, + { 0x57dcfffc,0xe0566a70,0xb8453225,0x6ea311da,0x23368aa9,0x72ea1a8d, + 0x48cd552d,0xed9b2083,0xc80ea435,0xb987967c,0x6c104173,0xad735c75 } }, + /* 105 */ + { { 0xcee76ef4,0xaea85ab3,0xaf1d2b93,0x44997444,0xeacb923f,0x0851929b, + 0x51e3bc0c,0xb080b590,0x59be68a2,0xc4ee1d86,0x64b26cda,0xf00de219 }, + { 0xf2e90d4d,0x8d7fb5c0,0x77d9ec64,0x00e219a7,0x5d1c491c,0xc4e6febd, + 0x1a8f4585,0x080e3754,0x48d2af9c,0x4a9b86c8,0xb6679851,0x2ed70db6 } }, + /* 106 */ + { { 0x586f25cb,0xaee44116,0xa0fcf70f,0xf7b6861f,0x18a350e8,0x55d2cd20, + 0x92dc286f,0x861bf3e5,0x6226aba7,0x9ab18ffa,0xa9857b03,0xd15827be }, + { 0x92e6acef,0x26c1f547,0xac1fbac3,0x422c63c8,0xfcbfd71d,0xa2d8760d, + 0xb2511224,0x35f6a539,0x048d1a21,0xbaa88fa1,0xebf999db,0x49f1abe9 } }, + /* 107 */ + { { 0xf7492b73,0x16f9f4f4,0xcb392b1a,0xcf28ec1e,0x69ca6ffc,0x45b130d4, + 0xb72efa58,0x28ba8d40,0x5ca066f5,0xace987c7,0x4ad022eb,0x3e399246 }, + { 0x752555bb,0x63a2d84e,0x9c2ae394,0xaaa93b4a,0xc89539ca,0xcd80424e, + 0xaa119a99,0x6d6b5a6d,0x379f2629,0xbd50334c,0xef3cc7d3,0x899e925e } }, + /* 108 */ + { { 0xbf825dc4,0xb7ff3651,0x40b9c462,0x0f741cc4,0x5cc4fb5b,0x771ff5a9, + 0x47fd56fe,0xcb9e9c9b,0x5626c0d3,0xbdf053db,0xf7e14098,0xa97ce675 }, + { 0x6c934f5e,0x68afe5a3,0xccefc46f,0x6cd5e148,0xd7a88586,0xc7758570, + 0xdd558d40,0x49978f5e,0x64ae00c1,0xa1d5088a,0xf1d65bb2,0x58f2a720 } }, + /* 109 */ + { { 0x3e4daedb,0x66fdda4a,0x65d1b052,0x38318c12,0x4c4bbf5c,0x28d910a2, + 0x78a9cd14,0x762fe5c4,0xd2cc0aee,0x08e5ebaa,0xca0c654c,0xd2cdf257 }, + { 0x08b717d2,0x48f7c58b,0x386cd07a,0x3807184a,0xae7d0112,0x3240f626, + 0xc43917b0,0x03e9361b,0x20aea018,0xf261a876,0x7e1e6372,0x53f556a4 } }, + /* 110 */ + { { 0x2f512a90,0xc84cee56,0x1b0ea9f1,0x24b3c004,0xe26cc1ea,0x0ee15d2d, + 0xf0c9ef7d,0xd848762c,0xd5341435,0x1026e9c5,0xfdb16b31,0x8f5b73dc }, + { 0xd2c75d95,0x1f69bef2,0xbe064dda,0x8d33d581,0x57ed35e6,0x8c024c12, + 0xc309c281,0xf8d435f9,0xd6960193,0xfd295061,0xe9e49541,0x66618d78 } }, + /* 111 */ + { { 0x8ce382de,0x571cfd45,0xde900dde,0x175806ee,0x34aba3b5,0x61849965, + 0xde7aec95,0xe899778a,0xff4aa97f,0xe8f00f6e,0x010b0c6d,0xae971cb5 }, + { 0x3af788f1,0x1827eebc,0xe413fe2d,0xd46229ff,0x4741c9b4,0x8a15455b, + 0xf8e424eb,0x5f02e690,0xdae87712,0x40a1202e,0x64944f6d,0x49b3bda2 } }, + /* 112 */ + { { 0x035b2d69,0xd63c6067,0x6bed91b0,0xb507150d,0x7afb39b2,0x1f35f82f, + 0x16012b66,0xb9bd9c01,0xed0a5f50,0x00d97960,0x2716f7c9,0xed705451 }, + { 0x127abdb4,0x1576eff4,0xf01e701c,0x6850d698,0x3fc87e2f,0x9fa7d749, + 0xb0ce3e48,0x0b6bcc6f,0xf7d8c1c0,0xf4fbe1f5,0x02719cc6,0xcf75230e } }, + /* 113 */ + { { 0x722d94ed,0x6761d6c2,0x3718820e,0xd1ec3f21,0x25d0e7c6,0x65a40b70, + 0xbaf3cf31,0xd67f830e,0xb93ea430,0x633b3807,0x0bc96c69,0x17faa0ea }, + { 0xdf866b98,0xe6bf3482,0xa9db52d4,0x205c1ee9,0xff9ab869,0x51ef9bbd, + 0x75eeb985,0x3863dad1,0xd3cf442a,0xef216c3b,0xf9c8e321,0x3fb228e3 } }, + /* 114 */ + { { 0x0760ac07,0x94f9b70c,0x9d79bf4d,0xf3c9ccae,0xc5ffc83d,0x73cea084, + 0xdc49c38e,0xef50f943,0xbc9e7330,0xf467a2ae,0x44ea7fba,0x5ee534b6 }, + { 0x03609e7f,0x20cb6272,0x62fdc9f0,0x09844355,0x0f1457f7,0xaf5c8e58, + 0xb4b25941,0xd1f50a6c,0x2ec82395,0x77cb247c,0xda3dca33,0xa5f3e1e5 } }, + /* 115 */ + { { 0x7d85fa94,0x023489d6,0x2db9ce47,0x0ba40537,0xaed7aad1,0x0fdf7a1f, + 0x9a4ccb40,0xa57b0d73,0x5b18967c,0x48fcec99,0xb7274d24,0xf30b5b6e }, + { 0xc81c5338,0x7ccb4773,0xa3ed6bd0,0xb85639e6,0x1d56eada,0x7d9df95f, + 0x0a1607ad,0xe256d57f,0x957574d6,0x6da7ffdc,0x01c7a8c4,0x65f84046 } }, + /* 116 */ + { { 0xcba1e7f1,0x8d45d0cb,0x02b55f64,0xef0a08c0,0x17e19892,0x771ca31b, + 0x4885907e,0xe1843ecb,0x364ce16a,0x67797ebc,0x8df4b338,0x816d2b2d }, + { 0x39aa8671,0xe870b0e5,0xc102b5f5,0x9f0db3e4,0x1720c697,0x34296659, + 0x613c0d2a,0x0ad4c89e,0x418ddd61,0x1af900b2,0xd336e20e,0xe087ca72 } }, + /* 117 */ + { { 0xaba10079,0x222831ff,0x6d64fff2,0x0dc5f87b,0x3e8cb330,0x44547907, + 0x702a33fb,0xe815aaa2,0x5fba3215,0x338d6b2e,0x79f549c8,0x0f7535cb }, + { 0x2ee95923,0x471ecd97,0xc6d1c09f,0x1e868b37,0xc666ef4e,0x2bc7b8ec, + 0x808a4bfc,0xf5416589,0x3fbc4d2e,0xf23e9ee2,0x2d75125b,0x4357236c } }, + /* 118 */ + { { 0xba9cdb1b,0xfe176d95,0x2f82791e,0x45a1ca01,0x4de4cca2,0x97654af2, + 0x5cc4bcb9,0xbdbf9d0e,0xad97ac0a,0xf6a7df50,0x61359fd6,0xc52112b0 }, + { 0x4f05eae3,0x696d9ce3,0xe943ac2b,0x903adc02,0x0848be17,0xa9075347, + 0x2a3973e5,0x1e20f170,0x6feb67e9,0xe1aacc1c,0xe16bc6b9,0x2ca0ac32 } }, + /* 119 */ + { { 0xef871eb5,0xffea12e4,0xa8bf0a7a,0x94c2f25d,0x78134eaa,0x4d1e4c2a, + 0x0360fb10,0x11ed16fb,0x85fc11be,0x4029b6db,0xf4d390fa,0x5e9f7ab7 }, + { 0x30646612,0x5076d72f,0xdda1d0d8,0xa0afed1d,0x85a1d103,0x29022257, + 0x4e276bcd,0xcb499e17,0x51246c3d,0x16d1da71,0x589a0443,0xc72d56d3 } }, + /* 120 */ + { { 0xdae5bb45,0xdf5ffc74,0x261bd6dc,0x99068c4a,0xaa98ec7b,0xdc0afa7a, + 0xf121e96d,0xedd2ee00,0x1414045c,0x163cc7be,0x335af50e,0xb0b1bbce }, + { 0x01a06293,0xd440d785,0x6552e644,0xcdebab7c,0x8c757e46,0x48cb8dbc, + 0x3cabe3cb,0x81f9cf78,0xb123f59a,0xddd02611,0xeeb3784d,0x3dc7b88e } }, + /* 121 */ + { { 0xc4741456,0xe1b8d398,0x6032a121,0xa9dfa902,0x1263245b,0x1cbfc86d, + 0x5244718c,0xf411c762,0x05b0fc54,0x96521d54,0xdbaa4985,0x1afab46e }, + { 0x8674b4ad,0xa75902ba,0x5ad87d12,0x486b43ad,0x36e0d099,0x72b1c736, + 0xbb6cd6d6,0x39890e07,0x59bace4e,0x8128999c,0x7b535e33,0xd8da430b } }, + /* 122 */ + { { 0xc6b75791,0x39f65642,0x21806bfb,0x050947a6,0x1362ef84,0x0ca3e370, + 0x8c3d2391,0x9bc60aed,0x732e1ddc,0x9b488671,0xa98ee077,0x12d10d9e }, + { 0x3651b7dc,0xb6f2822d,0x80abd138,0x6345a5ba,0x472d3c84,0x62033262, + 0xacc57527,0xd54a1d40,0x424447cb,0x6ea46b3a,0x2fb1a496,0x5bc41057 } }, + /* 123 */ + { { 0xa751cd0e,0xe70c57a3,0xeba3c7d6,0x190d8419,0x9d47d55a,0xb1c3bee7, + 0xf912c6d8,0xda941266,0x407a6ad6,0x12e9aacc,0x6e838911,0xd6ce5f11 }, + { 0x70e1f2ce,0x063ca97b,0x8213d434,0xa3e47c72,0x84df810a,0xa016e241, + 0xdfd881a4,0x688ad7b0,0xa89bf0ad,0xa37d99fc,0xa23c2d23,0xd8e3f339 } }, + /* 124 */ + { { 0x750bed6f,0xbdf53163,0x83e68b0a,0x808abc32,0x5bb08a33,0x85a36627, + 0x6b0e4abe,0xf72a3a0f,0xfaf0c6ad,0xf7716d19,0x5379b25f,0x22dcc020 }, + { 0xf9a56e11,0x7400bf8d,0x56a47f21,0x6cb8bad7,0x7a6eb644,0x7c97176f, + 0xd1f5b646,0xe8fd84f7,0x44ddb054,0x98320a94,0x1dde86f5,0x07071ba3 } }, + /* 125 */ + { { 0x98f8fcb9,0x6fdfa0e5,0x94d0d70c,0x89cec8e0,0x106d20a8,0xa0899397, + 0xba8acc9c,0x915bfb9a,0x5507e01c,0x1370c94b,0x8a821ffb,0x83246a60 }, + { 0xbe3c378f,0xa8273a9f,0x35a25be9,0x7e544789,0x4dd929d7,0x6cfa4972, + 0x365bd878,0x987fed9d,0x5c29a7ae,0x4982ac94,0x5ddd7ec5,0x4589a5d7 } }, + /* 126 */ + { { 0xa95540a9,0x9fabb174,0x0162c5b0,0x7cfb886f,0xea3dee18,0x17be766b, + 0xe88e624c,0xff7da41f,0x8b919c38,0xad0b71eb,0xf31ff9a9,0x86a522e0 }, + { 0x868bc259,0xbc8e6f72,0x3ccef9e4,0x6130c638,0x9a466555,0x09f1f454, + 0x19b2bfb4,0x8e6c0f09,0x0ca7bb22,0x945c46c9,0x4dafb67b,0xacd87168 } }, + /* 127 */ + { { 0x10c53841,0x090c72ca,0x55a4fced,0xc20ae01b,0xe10234ad,0x03f7ebd5, + 0x85892064,0xb3f42a6a,0xb4a14722,0xbdbc30c0,0x8ca124cc,0x971bc437 }, + { 0x517ff2ff,0x6f79f46d,0xecba947b,0x6a9c96e2,0x62925122,0x5e79f2f4, + 0x6a4e91f1,0x30a96bb1,0x2d4c72da,0x1147c923,0x5811e4df,0x65bc311f } }, + /* 128 */ + { { 0x139b3239,0x87c7dd7d,0x4d833bae,0x8b57824e,0x9fff0015,0xbcbc4878, + 0x909eaf1a,0x8ffcef8b,0xf1443a78,0x9905f4ee,0xe15cbfed,0x020dd4a2 }, + { 0xa306d695,0xca2969ec,0xb93caf60,0xdf940cad,0x87ea6e39,0x67f7fab7, + 0xf98c4fe5,0x0d0ee10f,0xc19cb91e,0xc646879a,0x7d1d7ab4,0x4b4ea50c } }, + /* 129 */ + { { 0x7a0db57e,0x19e40945,0x9a8c9702,0xe6017cad,0x1be5cff9,0xdbf739e5, + 0xa7a938a2,0x3646b3cd,0x68350dfc,0x04511085,0x56e098b5,0xad3bd6f3 }, + { 0xee2e3e3e,0x935ebabf,0x473926cb,0xfbd01702,0x9e9fb5aa,0x7c735b02, + 0x2e3feff0,0xc52a1b85,0x046b405a,0x9199abd3,0x39039971,0xe306fcec } }, + /* 130 */ + { { 0x23e4712c,0xd6d9aec8,0xc3c198ee,0x7ca8376c,0x31bebd8a,0xe6d83187, + 0xd88bfef3,0xed57aff3,0xcf44edc7,0x72a645ee,0x5cbb1517,0xd4e63d0b }, + { 0xceee0ecf,0x98ce7a1c,0x5383ee8e,0x8f012633,0xa6b455e8,0x3b879078, + 0xc7658c06,0xcbcd3d96,0x0783336a,0x721d6fe7,0x5a677136,0xf21a7263 } }, + /* 131 */ + { { 0x9586ba11,0x19d8b3cd,0x8a5c0480,0xd9e0aeb2,0x2230ef5c,0xe4261dbf, + 0x02e6bf09,0x095a9dee,0x80dc7784,0x8963723c,0x145157b1,0x5c97dbaf }, + { 0x4bc4503e,0x97e74434,0x85a6b370,0x0fb1cb31,0xcd205d4b,0x3e8df2be, + 0xf8f765da,0x497dd1bc,0x6c988a1a,0x92ef95c7,0x64dc4cfa,0x3f924baa } }, + /* 132 */ + { { 0x7268b448,0x6bf1b8dd,0xefd79b94,0xd4c28ba1,0xe4e3551f,0x2fa1f8c8, + 0x5c9187a9,0x769e3ad4,0x40326c0d,0x28843b4d,0x50d5d669,0xfefc8094 }, + { 0x90339366,0x30c85bfd,0x5ccf6c3a,0x4eeb56f1,0x28ccd1dc,0x0e72b149, + 0xf2ce978e,0x73ee85b5,0x3165bb23,0xcdeb2bf3,0x4e410abf,0x8106c923 } }, + /* 133 */ + { { 0x7d02f4ee,0xc8df0161,0x18e21225,0x8a781547,0x6acf9e40,0x4ea895eb, + 0x6e5a633d,0x8b000cb5,0x7e981ffb,0xf31d86d5,0x4475bc32,0xf5c8029c }, + { 0x1b568973,0x764561ce,0xa62996ec,0x2f809b81,0xda085408,0x9e513d64, + 0xe61ce309,0xc27d815d,0x272999e0,0x0da6ff99,0xfead73f7,0xbd284779 } }, + /* 134 */ + { { 0x9b1cdf2b,0x6033c2f9,0xbc5fa151,0x2a99cf06,0x12177b3b,0x7d27d259, + 0xc4485483,0xb1f15273,0x102e2297,0x5fd57d81,0xc7f6acb7,0x3d43e017 }, + { 0x3a70eb28,0x41a8bb0b,0x3e80b06b,0x67de2d8e,0x70c28de5,0x09245a41, + 0xa7b26023,0xad7dbcb1,0x2cbc6c1e,0x70b08a35,0x9b33041f,0xb504fb66 } }, + /* 135 */ + { { 0xf97a27c2,0xa8e85ab5,0xc10a011b,0x6ac5ec8b,0xffbcf161,0x55745533, + 0x65790a60,0x01780e85,0x99ee75b0,0xe451bf85,0x39c29881,0x8907a63b }, + { 0x260189ed,0x76d46738,0x47bd35cb,0x284a4436,0x20cab61e,0xd74e8c40, + 0x416cf20a,0x6264bf8c,0x5fd820ce,0xfa5a6c95,0xf24bb5fc,0xfa7154d0 } }, + /* 136 */ + { { 0x9b3f5034,0x18482cec,0xcd9e68fd,0x962d445a,0x95746f23,0x266fb1d6, + 0x58c94a4b,0xc66ade5a,0xed68a5b6,0xdbbda826,0x7ab0d6ae,0x05664a4d }, + { 0x025e32fc,0xbcd4fe51,0xa96df252,0x61a5aebf,0x31592a31,0xd88a07e2, + 0x98905517,0x5d9d94de,0x5fd440e7,0x96bb4010,0xe807db4c,0x1b0c47a2 } }, + /* 137 */ + { { 0x08223878,0x5c2a6ac8,0xe65a5558,0xba08c269,0x9bbc27fd,0xd22b1b9b, + 0x72b9607d,0x919171bf,0xe588dc58,0x9ab455f9,0x23662d93,0x6d54916e }, + { 0x3b1de0c1,0x8da8e938,0x804f278f,0xa84d186a,0xd3461695,0xbf4988cc, + 0xe10eb0cb,0xf5eae3be,0xbf2a66ed,0x1ff8b68f,0xc305b570,0xa68daf67 } }, + /* 138 */ + { { 0x44b2e045,0xc1004cff,0x4b1c05d4,0x91b5e136,0x88a48a07,0x53ae4090, + 0xea11bb1a,0x73fb2995,0x3d93a4ea,0x32048570,0x3bfc8a5f,0xcce45de8 }, + { 0xc2b3106e,0xaff4a97e,0xb6848b4f,0x9069c630,0xed76241c,0xeda837a6, + 0x6cc3f6cf,0x8a0daf13,0x3da018a8,0x199d049d,0xd9093ba3,0xf867c6b1 } }, + /* 139 */ + { { 0x56527296,0xe4d42a56,0xce71178d,0xae26c73d,0x6c251664,0x70a0adac, + 0x5dc0ae1d,0x813483ae,0xdaab2daf,0x7574eacd,0xc2d55f4f,0xc56b52dc }, + { 0x95f32923,0x872bc167,0x5bdd2a89,0x4be17581,0xa7699f00,0x9b57f1e7, + 0x3ac2de02,0x5fcd9c72,0x92377739,0x83af3ba1,0xfc50b97f,0xa64d4e2b } }, + /* 140 */ + { { 0x0e552b40,0x2172dae2,0xd34d52e8,0x62f49725,0x07958f98,0x7930ee40, + 0x751fdd74,0x56da2a90,0xf53e48c3,0xf1192834,0x8e53c343,0x34d2ac26 }, + { 0x13111286,0x1073c218,0xda9d9827,0x201dac14,0xee95d378,0xec2c29db, + 0x1f3ee0b1,0x9316f119,0x544ce71c,0x7890c9f0,0x27612127,0xd77138af } }, + /* 141 */ + { { 0x3b4ad1cd,0x78045e6d,0x4aa49bc1,0xcd86b94e,0xfd677a16,0x57e51f1d, + 0xfa613697,0xd9290935,0x34f4d893,0x7a3f9593,0x5d5fcf9b,0x8c9c248b }, + { 0x6f70d4e9,0x9f23a482,0x63190ae9,0x17273454,0x5b081a48,0x4bdd7c13, + 0x28d65271,0x1e2de389,0xe5841d1f,0x0bbaaa25,0x746772e5,0xc4c18a79 } }, + /* 142 */ + { { 0x593375ac,0x10ee2681,0x7dd5e113,0x4f3288be,0x240f3538,0x9a97b2fb, + 0x1de6b1e2,0xfa11089f,0x1351bc58,0x516da562,0x2dfa85b5,0x573b6119 }, + { 0x6cba7df5,0x89e96683,0x8c28ab40,0xf299be15,0xad43fcbf,0xe91c9348, + 0x9a1cefb3,0xe9bbc7cc,0x738b2775,0xc8add876,0x775eaa01,0x6e3b1f2e } }, + /* 143 */ + { { 0xb677788b,0x0365a888,0x3fd6173c,0x634ae8c4,0x9e498dbe,0x30498761, + 0xc8f779ab,0x08c43e6d,0x4c09aca9,0x068ae384,0x2018d170,0x2380c70b }, + { 0xa297c5ec,0xcf77fbc3,0xca457948,0xdacbc853,0x336bec7e,0x3690de04, + 0x14eec461,0x26bbac64,0x1f713abf,0xd1c23c7e,0xe6fd569e,0xf08bbfcd } }, + /* 144 */ + { { 0x84770ee3,0x5f8163f4,0x744a1706,0x0e0c7f94,0xe1b2d46d,0x9c8f05f7, + 0xd01fd99a,0x417eafe7,0x11440e5b,0x2ba15df5,0x91a6fbcf,0xdc5c552a }, + { 0xa270f721,0x86271d74,0xa004485b,0x32c0a075,0x8defa075,0x9d1a87e3, + 0xbf0d20fe,0xb590a7ac,0x8feda1f5,0x430c41c2,0x58f6ec24,0x454d2879 } }, + /* 145 */ + { { 0x7c525435,0x52b7a635,0x37c4bdbc,0x3d9ef57f,0xdffcc475,0x2bb93e9e, + 0x7710f3be,0xf7b8ba98,0x21b727de,0x42ee86da,0x2e490d01,0x55ac3f19 }, + { 0xc0c1c390,0x487e3a6e,0x446cde7b,0x036fb345,0x496ae951,0x089eb276, + 0x71ed1234,0xedfed4d9,0x900f0b46,0x661b0dd5,0x8582f0d3,0x11bd6f1b } }, + /* 146 */ + { { 0x076bc9d1,0x5cf9350f,0xcf3cd2c3,0x15d903be,0x25af031c,0x21cfc8c2, + 0x8b1cc657,0xe0ad3248,0x70014e87,0xdd9fb963,0x297f1658,0xf0f3a5a1 }, + { 0xf1f703aa,0xbb908fba,0x2f6760ba,0x2f9cc420,0x66a38b51,0x00ceec66, + 0x05d645da,0x4deda330,0xf7de3394,0xb9cf5c72,0x1ad4c906,0xaeef6502 } }, + /* 147 */ + { { 0x7a19045d,0x0583c8b1,0xd052824c,0xae7c3102,0xff6cfa58,0x2a234979, + 0x62c733c0,0xfe9dffc9,0x9c0c4b09,0x3a7fa250,0x4fe21805,0x516437bb }, + { 0xc2a23ddb,0x9454e3d5,0x289c104e,0x0726d887,0x4fd15243,0x8977d918, + 0x6d7790ba,0xc559e73f,0x465af85f,0x8fd3e87d,0x5feee46b,0xa2615c74 } }, + /* 148 */ + { { 0x4335167d,0xc8d607a8,0xe0f5c887,0x8b42d804,0x398d11f9,0x5f9f13df, + 0x20740c67,0x5aaa5087,0xa3d9234b,0x83da9a6a,0x2a54bad1,0xbd3a5c4e }, + { 0x2db0f658,0xdd13914c,0x5a3f373a,0x29dcb66e,0x5245a72b,0xbfd62df5, + 0x91e40847,0x19d18023,0xb136b1ae,0xd9df74db,0x3f93bc5b,0x72a06b6b } }, + /* 149 */ + { { 0xad19d96f,0x6da19ec3,0xfb2a4099,0xb342daa4,0x662271ea,0x0e61633a, + 0xce8c054b,0x3bcece81,0x8bd62dc6,0x7cc8e061,0xee578d8b,0xae189e19 }, + { 0xdced1eed,0x73e7a25d,0x7875d3ab,0xc1257f0a,0x1cfef026,0x2cb2d5a2, + 0xb1fdf61c,0xd98ef39b,0x24e83e6c,0xcd8e6f69,0xc7b7088b,0xd71e7076 } }, + /* 150 */ + { { 0x9d4245bf,0x33936830,0x2ac2953b,0x22d96217,0x56c3c3cd,0xb3bf5a82, + 0x0d0699e8,0x50c9be91,0x8f366459,0xec094463,0x513b7c35,0x6c056dba }, + { 0x045ab0e3,0x687a6a83,0x445c9295,0x8d40b57f,0xa16f5954,0x0f345048, + 0x3d8f0a87,0x64b5c639,0x9f71c5e2,0x106353a2,0x874f0dd4,0xdd58b475 } }, + /* 151 */ + { { 0x62230c72,0x67ec084f,0x481385e3,0xf14f6cca,0x4cda7774,0xf58bb407, + 0xaa2dbb6b,0xe15011b1,0x0c035ab1,0xd488369d,0x8245f2fd,0xef83c24a }, + { 0x9fdc2538,0xfb57328f,0x191fe46a,0x79808293,0x32ede548,0xe28f5c44, + 0xea1a022c,0x1b3cda99,0x3df2ec7f,0x39e639b7,0x760e9a18,0x77b6272b } }, + /* 152 */ + { { 0xa65d56d5,0x2b1d51bd,0x7ea696e0,0x3a9b71f9,0x9904f4c4,0x95250ecc, + 0xe75774b7,0x8bc4d6eb,0xeaeeb9aa,0x0e343f8a,0x930e04cb,0xc473c1d1 }, + { 0x064cd8ae,0x282321b1,0x5562221c,0xf4b4371e,0xd1bf1221,0xc1cc81ec, + 0xe2c8082f,0xa52a07a9,0xba64a958,0x350d8e59,0x6fb32c9a,0x29e4f3de } }, + /* 153 */ + { { 0xba89aaa5,0x0aa9d56c,0xc4c6059e,0xf0208ac0,0xbd6ddca4,0x7400d9c6, + 0xf2c2f74a,0xb384e475,0xb1562dd3,0x4c1061fc,0x2e153b8d,0x3924e248 }, + { 0x849808ab,0xf38b8d98,0xa491aa36,0x29bf3260,0x88220ede,0x85159ada, + 0xbe5bc422,0x8b47915b,0xd7300967,0xa934d72e,0x2e515d0d,0xc4f30398 } }, + /* 154 */ + { { 0x1b1de38b,0xe3e9ee42,0x42636760,0xa124e25a,0x90165b1a,0x90bf73c0, + 0x146434c5,0x21802a34,0x2e1fa109,0x54aa83f2,0xed9c51e9,0x1d4bd03c }, + { 0x798751e6,0xc2d96a38,0x8c3507f5,0xed27235f,0xc8c24f88,0xb5fb80e2, + 0xd37f4f78,0xf873eefa,0xf224ba96,0x7229fd74,0x9edd7149,0x9dcd9199 } }, + /* 155 */ + { { 0x4e94f22a,0xee9f81a6,0xf71ec341,0xe5609892,0xa998284e,0x6c818ddd, + 0x3b54b098,0x9fd47295,0x0e8a7cc9,0x47a6ac03,0xb207a382,0xde684e5e }, + { 0x2b6b956b,0x4bdd1ecd,0xf01b3583,0x09084414,0x55233b14,0xe2f80b32, + 0xef5ebc5e,0x5a0fec54,0xbf8b29a2,0x74cf25e6,0x7f29e014,0x1c757fa0 } }, + /* 156 */ + { { 0xeb0fdfe4,0x1bcb5c4a,0xf0899367,0xd7c649b3,0x05bc083b,0xaef68e3f, + 0xa78aa607,0x57a06e46,0x21223a44,0xa2136ecc,0x52f5a50b,0x89bd6484 }, + { 0x4455f15a,0x724411b9,0x08a9c0fd,0x23dfa970,0x6db63bef,0x7b0da4d1, + 0xfb162443,0x6f8a7ec1,0xe98284fb,0xc1ac9cee,0x33566022,0x085a582b } }, + /* 157 */ + { { 0xec1f138a,0x15cb61f9,0x668f0c28,0x11c9a230,0xdf93f38f,0xac829729, + 0x4048848d,0xcef25698,0x2bba8fbf,0x3f686da0,0x111c619a,0xed5fea78 }, + { 0xd6d1c833,0x9b4f73bc,0x86e7bf80,0x50951606,0x042b1d51,0xa2a73508, + 0x5fb89ec2,0x9ef6ea49,0x5ef8b892,0xf1008ce9,0x9ae8568b,0x78a7e684 } }, + /* 158 */ + { { 0x10470cd8,0x3fe83a7c,0xf86df000,0x92734682,0xda9409b5,0xb5dac06b, + 0x94939c5f,0x1e7a9660,0x5cc116dc,0xdec6c150,0x66bac8cc,0x1a52b408 }, + { 0x6e864045,0x5303a365,0x9139efc1,0x45eae72a,0x6f31d54f,0x83bec646, + 0x6e958a6d,0x2fb4a86f,0x4ff44030,0x6760718e,0xe91ae0df,0x008117e3 } }, + /* 159 */ + { { 0x384310a2,0x5d5833ba,0x1fd6c9fc,0xbdfb4edc,0x849c4fb8,0xb9a4f102, + 0x581c1e1f,0xe5fb239a,0xd0a9746d,0xba44b2e7,0x3bd942b9,0x78f7b768 }, + { 0xc87607ae,0x076c8ca1,0xd5caaa7e,0x82b23c2e,0x2763e461,0x6a581f39, + 0x3886df11,0xca8a5e4a,0x264e7f22,0xc87e90cf,0x215cfcfc,0x04f74870 } }, + /* 160 */ + { { 0x141d161c,0x5285d116,0x93c4ed17,0x67cd2e0e,0x7c36187e,0x12c62a64, + 0xed2584ca,0xf5329539,0x42fbbd69,0xc4c777c4,0x1bdfc50a,0x107de776 }, + { 0xe96beebd,0x9976dcc5,0xa865a151,0xbe2aff95,0x9d8872af,0x0e0a9da1, + 0xa63c17cc,0x5e357a3d,0xe15cc67c,0xd31fdfd8,0x7970c6d8,0xc44bbefd } }, + /* 161 */ + { { 0x4c0c62f1,0x703f83e2,0x4e195572,0x9b1e28ee,0xfe26cced,0x6a82858b, + 0xc43638fa,0xd381c84b,0xa5ba43d8,0x94f72867,0x10b82743,0x3b4a783d }, + { 0x7576451e,0xee1ad7b5,0x14b6b5c8,0xc3d0b597,0xfcacc1b8,0x3dc30954, + 0x472c9d7b,0x55df110e,0x02f8a328,0x97c86ed7,0x88dc098f,0xd0433413 } }, + /* 162 */ + { { 0x2ca8f2fe,0x1a60d152,0x491bd41f,0x61640948,0x58dfe035,0x6dae29a5, + 0x278e4863,0x9a615bea,0x9ad7c8e5,0xbbdb4477,0x2ceac2fc,0x1c706630 }, + { 0x99699b4b,0x5e2b54c6,0x239e17e8,0xb509ca6d,0xea063a82,0x728165fe, + 0xb6a22e02,0x6b5e609d,0xb26ee1df,0x12813905,0x439491fa,0x07b9f722 } }, + /* 163 */ + { { 0x48ff4e49,0x1592ec14,0x6d644129,0x3e4e9f17,0x1156acc0,0x7acf8288, + 0xbb092b0b,0x5aa34ba8,0x7d38393d,0xcd0f9022,0xea4f8187,0x416724dd }, + { 0xc0139e73,0x3c4e641c,0x91e4d87d,0xe0fe46cf,0xcab61f8a,0xedb3c792, + 0xd3868753,0x4cb46de4,0x20f1098a,0xe449c21d,0xf5b8ea6e,0x5e5fd059 } }, + /* 164 */ + { { 0x75856031,0x7fcadd46,0xeaf2fbd0,0x89c7a4cd,0x7a87c480,0x1af523ce, + 0x61d9ae90,0xe5fc1095,0xbcdb95f5,0x3fb5864f,0xbb5b2c7d,0xbeb5188e }, + { 0x3ae65825,0x3d1563c3,0x0e57d641,0x116854c4,0x1942ebd3,0x11f73d34, + 0xc06955b3,0x24dc5904,0x995a0a62,0x8a0d4c83,0x5d577b7d,0xfb26b86d } }, + /* 165 */ + { { 0xc686ae17,0xc53108e7,0xd1c1da56,0x9090d739,0x9aec50ae,0x4583b013, + 0xa49a6ab2,0xdd9a088b,0xf382f850,0x28192eea,0xf5fe910e,0xcc8df756 }, + { 0x9cab7630,0x877823a3,0xfb8e7fc1,0x64984a9a,0x364bfc16,0x5448ef9c, + 0xc44e2a9a,0xbbb4f871,0x435c95e9,0x901a41ab,0xaaa50a06,0xc6c23e5f } }, + /* 166 */ + { { 0x9034d8dd,0xb78016c1,0x0b13e79b,0x856bb44b,0xb3241a05,0x85c6409a, + 0x2d78ed21,0x8d2fe19a,0x726eddf2,0xdcc7c26d,0x25104f04,0x3ccaff5f }, + { 0x6b21f843,0x397d7edc,0xe975de4c,0xda88e4dd,0x4f5ab69e,0x5273d396, + 0x9aae6cc0,0x537680e3,0x3e6f9461,0xf749cce5,0x957bffd3,0x021ddbd9 } }, + /* 167 */ + { { 0x777233cf,0x7b64585f,0x0942a6f0,0xfe6771f6,0xdfe6eef0,0x636aba7a, + 0x86038029,0x63bbeb56,0xde8fcf36,0xacee5842,0xd4a20524,0x48d9aa99 }, + { 0x0da5e57a,0xcff7a74c,0xe549d6c9,0xc232593c,0xf0f2287b,0x68504bcc, + 0xbc8360b5,0x6d7d098d,0x5b402f41,0xeac5f149,0xb87d1bf1,0x61936f11 } }, + /* 168 */ + { { 0xb8153a9d,0xaa9da167,0x9e83ecf0,0xa49fe3ac,0x1b661384,0x14c18f8e, + 0x38434de1,0x61c24dab,0x283dae96,0x3d973c3a,0x82754fc9,0xc99baa01 }, + { 0x4c26b1e3,0x477d198f,0xa7516202,0x12e8e186,0x362addfa,0x386e52f6, + 0xc3962853,0x31e8f695,0x6aaedb60,0xdec2af13,0x29cf74ac,0xfcfdb4c6 } }, + /* 169 */ + { { 0xcca40298,0x6b3ee958,0xf2f5d195,0xc3878153,0xed2eae5b,0x0c565630, + 0x3a697cf2,0xd089b37e,0xad5029ea,0xc2ed2ac7,0x0f0dda6a,0x7e5cdfad }, + { 0xd9b86202,0xf98426df,0x4335e054,0xed1960b1,0x3f14639e,0x1fdb0246, + 0x0db6c670,0x17f709c3,0x773421e1,0xbfc687ae,0x26c1a8ac,0x13fefc4a } }, + /* 170 */ + { { 0x7ffa0a5f,0xe361a198,0xc63fe109,0xf4b26102,0x6c74e111,0x264acbc5, + 0x77abebaf,0x4af445fa,0x24cddb75,0x448c4fdd,0x44506eea,0x0b13157d }, + { 0x72e9993d,0x22a6b159,0x85e5ecbe,0x2c3c57e4,0xfd83e1a1,0xa673560b, + 0xc3b8c83b,0x6be23f82,0x40bbe38e,0x40b13a96,0xad17399b,0x66eea033 } }, + /* 171 */ + { { 0xb4c6c693,0x49fc6e95,0x36af7d38,0xefc735de,0x35fe42fc,0xe053343d, + 0x6a9ab7c3,0xf0aa427c,0x4a0fcb24,0xc79f0436,0x93ebbc50,0x16287243 }, + { 0x16927e1e,0x5c3d6bd0,0x673b984c,0x40158ed2,0x4cd48b9a,0xa7f86fc8, + 0x60ea282d,0x1643eda6,0xe2a1beed,0x45b393ea,0x19571a94,0x664c839e } }, + /* 172 */ + { { 0x27eeaf94,0x57745750,0xea99e1e7,0x2875c925,0x5086adea,0xc127e7ba, + 0x86fe424f,0x765252a0,0x2b6c0281,0x1143cc6c,0xd671312d,0xc9bb2989 }, + { 0x51acb0a5,0x880c337c,0xd3c60f78,0xa3710915,0x9262b6ed,0x496113c0, + 0x9ce48182,0x5d25d9f8,0xb3813586,0x53b6ad72,0x4c0e159c,0x0ea3bebc } }, + /* 173 */ + { { 0xc5e49bea,0xcaba450a,0x7c05da59,0x684e5415,0xde7ac36c,0xa2e9cab9, + 0x2e6f957b,0x4ca79b5f,0x09b817b1,0xef7b0247,0x7d89df0f,0xeb304990 }, + { 0x46fe5096,0x508f7307,0x2e04eaaf,0x695810e8,0x3512f76c,0x88ef1bd9, + 0x3ebca06b,0x77661351,0xccf158b7,0xf7d4863a,0x94ee57da,0xb2a81e44 } }, + /* 174 */ + { { 0x6d53e6ba,0xff288e5b,0x14484ea2,0xa90de1a9,0xed33c8ec,0x2fadb60c, + 0x28b66a40,0x579d6ef3,0xec24372d,0x4f2dd6dd,0x1d66ec7d,0xe9e33fc9 }, + { 0x039eab6e,0x110899d2,0x3e97bb5e,0xa31a667a,0xcfdce68e,0x6200166d, + 0x5137d54b,0xbe83ebae,0x4800acdf,0x085f7d87,0x0c6f8c86,0xcf4ab133 } }, + /* 175 */ + { { 0x931e08fb,0x03f65845,0x1506e2c0,0x6438551e,0x9c36961f,0x5791f0dc, + 0xe3dcc916,0x68107b29,0xf495d2ca,0x83242374,0x6ee5895b,0xd8cfb663 }, + { 0xa0349b1b,0x525e0f16,0x4a0fab86,0x33cd2c6c,0x2af8dda9,0x46c12ee8, + 0x71e97ad3,0x7cc424ba,0x37621eb0,0x69766ddf,0xa5f0d390,0x95565f56 } }, + /* 176 */ + { { 0x1a0f5e94,0xe0e7bbf2,0x1d82d327,0xf771e115,0xceb111fa,0x10033e3d, + 0xd3426638,0xd269744d,0x00d01ef6,0xbdf2d9da,0xa049ceaf,0x1cb80c71 }, + { 0x9e21c677,0x17f18328,0x19c8f98b,0x6452af05,0x80b67997,0x35b9c5f7, + 0x40f8f3d4,0x5c2e1cbe,0x66d667ca,0x43f91656,0xcf9d6e79,0x9faaa059 } }, + /* 177 */ + { { 0x0a078fe6,0x8ad24618,0x464fd1dd,0xf6cc73e6,0xc3e37448,0x4d2ce34d, + 0xe3271b5f,0x624950c5,0xefc5af72,0x62910f5e,0xaa132bc6,0x8b585bf8 }, + { 0xa839327f,0x11723985,0x4aac252f,0x34e2d27d,0x6296cc4e,0x402f59ef, + 0x47053de9,0x00ae055c,0x28b4f09b,0xfc22a972,0xfa0c180e,0xa9e86264 } }, + /* 178 */ + { { 0xbc310ecc,0x0b7b6224,0x67fa14ed,0x8a1a74f1,0x7214395c,0x87dd0960, + 0xf5c91128,0xdf1b3d09,0x86b264a8,0x39ff23c6,0x3e58d4c5,0xdc2d49d0 }, + { 0xa9d6f501,0x2152b7d3,0xc04094f7,0xf4c32e24,0xd938990f,0xc6366596, + 0x94fb207f,0x084d078f,0x328594cb,0xfd99f1d7,0xcb2d96b3,0x36defa64 } }, + /* 179 */ + { { 0x13ed7cbe,0x4619b781,0x9784bd0e,0x95e50015,0x2c7705fe,0x2a32251c, + 0x5f0dd083,0xa376af99,0x0361a45b,0x55425c6c,0x1f291e7b,0x812d2cef }, + { 0x5fd94972,0xccf581a0,0xe56dc383,0x26e20e39,0x63dbfbf0,0x0093685d, + 0x36b8c575,0x1fc164cc,0x390ef5e7,0xb9c5ab81,0x26908c66,0x40086beb } }, + /* 180 */ + { { 0x37e3c115,0xe5e54f79,0xc1445a8a,0x69b8ee8c,0xb7659709,0x79aedff2, + 0x1b46fbe6,0xe288e163,0xd18d7bb7,0xdb4844f0,0x48aa6424,0xe0ea23d0 }, + { 0xf3d80a73,0x714c0e4e,0x3bd64f98,0x87a0aa9e,0x2ec63080,0x8844b8a8, + 0x255d81a3,0xe0ac9c30,0x455397fc,0x86151237,0x2f820155,0x0b979464 } }, + /* 181 */ + { { 0x4ae03080,0x127a255a,0x580a89fb,0x232306b4,0x6416f539,0x04e8cd6a, + 0x13b02a0e,0xaeb70dee,0x4c09684a,0xa3038cf8,0x28e433ee,0xa710ec3c }, + { 0x681b1f7d,0x77a72567,0x2fc28170,0x86fbce95,0xf5735ac8,0xd3408683, + 0x6bd68e93,0x3a324e2a,0xc027d155,0x7ec74353,0xd4427177,0xab60354c } }, + /* 182 */ + { { 0xef4c209d,0x32a5342a,0x08d62704,0x2ba75274,0xc825d5fe,0x4bb4af6f, + 0xd28e7ff1,0x1c3919ce,0xde0340f6,0x1dfc2fdc,0x29f33ba9,0xc6580baf }, + { 0x41d442cb,0xae121e75,0x3a4724e4,0x4c7727fd,0x524f3474,0xe556d6a4, + 0x785642a2,0x87e13cc7,0xa17845fd,0x182efbb1,0x4e144857,0xdcec0cf1 } }, + /* 183 */ + { { 0xe9539819,0x1cb89541,0x9d94dbf1,0xc8cb3b4f,0x417da578,0x1d353f63, + 0x8053a09e,0xb7a697fb,0xc35d8b78,0x8d841731,0xb656a7a9,0x85748d6f }, + { 0xc1859c5d,0x1fd03947,0x535d22a2,0x6ce965c1,0x0ca3aadc,0x1966a13e, + 0x4fb14eff,0x9802e41d,0x76dd3fcd,0xa9048cbb,0xe9455bba,0x89b182b5 } }, + /* 184 */ + { { 0x43360710,0xd777ad6a,0x55e9936b,0x841287ef,0x04a21b24,0xbaf5c670, + 0x35ad86f1,0xf2c0725f,0xc707e72e,0x338fa650,0xd8883e52,0x2bf8ed2e }, + { 0xb56e0d6a,0xb0212cf4,0x6843290c,0x50537e12,0x98b3dc6f,0xd8b184a1, + 0x0210b722,0xd2be9a35,0x559781ee,0x407406db,0x0bc18534,0x5a78d591 } }, + /* 185 */ + { { 0xd748b02c,0x4d57aa2a,0xa12b3b95,0xbe5b3451,0x64711258,0xadca7a45, + 0x322153db,0x597e091a,0x32eb1eab,0xf3271006,0x2873f301,0xbd9adcba }, + { 0x38543f7f,0xd1dc79d1,0x921b1fef,0x00022092,0x1e5df8ed,0x86db3ef5, + 0x9e6b944a,0x888cae04,0x791a32b4,0x71bd29ec,0xa6d1c13e,0xd3516206 } }, + /* 186 */ + { { 0x55924f43,0x2ef6b952,0x4f9de8d5,0xd2f401ae,0xadc68042,0xfc73e8d7, + 0x0d9d1bb4,0x627ea70c,0xbbf35679,0xc3bb3e3e,0xd882dee4,0x7e8a254a }, + { 0xb5924407,0x08906f50,0xa1ad444a,0xf14a0e61,0x65f3738e,0xaa0efa21, + 0xae71f161,0xd60c7dd6,0xf175894d,0x9e8390fa,0x149f4c00,0xd115cd20 } }, + /* 187 */ + { { 0xa52abf77,0x2f2e2c1d,0x54232568,0xc2a0dca5,0x54966dcc,0xed423ea2, + 0xcd0dd039,0xe48c93c7,0x176405c7,0x1e54a225,0x70d58f2e,0x1efb5b16 }, + { 0x94fb1471,0xa751f9d9,0x67d2941d,0xfdb31e1f,0x53733698,0xa6c74eb2, + 0x89a0f64a,0xd3155d11,0xa4b8d2b6,0x4414cfe4,0xf7a8e9e3,0x8d5a4be8 } }, + /* 188 */ + { { 0x52669e98,0x5c96b4d4,0x8fd42a03,0x4547f922,0xd285174e,0xcf5c1319, + 0x064bffa0,0x805cd1ae,0x246d27e7,0x50e8bc4f,0xd5781e11,0xf89ef98f }, + { 0xdee0b63f,0xb4ff95f6,0x222663a4,0xad850047,0x4d23ce9c,0x02691860, + 0x50019f59,0x3e5309ce,0x69a508ae,0x27e6f722,0x267ba52c,0xe9376652 } }, + /* 189 */ + { { 0xc0368708,0xa04d289c,0x5e306e1d,0xc458872f,0x33112fea,0x76fa23de, + 0x6efde42e,0x718e3974,0x1d206091,0xf0c98cdc,0x14a71987,0x5fa3ca62 }, + { 0xdcaa9f2a,0xeee8188b,0x589a860d,0x312cc732,0xc63aeb1f,0xf9808dd6, + 0x4ea62b53,0x70fd43db,0x890b6e97,0x2c2bfe34,0xfa426aa6,0x105f863c } }, + /* 190 */ + { { 0xb38059ad,0x0b29795d,0x90647ea0,0x5686b77e,0xdb473a3e,0xeff0470e, + 0xf9b6d1e2,0x278d2340,0xbd594ec7,0xebbff95b,0xd3a7f23d,0xf4b72334 }, + { 0xa5a83f0b,0x2a285980,0x9716a8b3,0x0786c41a,0x22511812,0x138901bd, + 0xe2fede6e,0xd1b55221,0xdf4eb590,0x0806e264,0x762e462e,0x6c4c897e } }, + /* 191 */ + { { 0xb4b41d9d,0xd10b905f,0x4523a65b,0x826ca466,0xb699fa37,0x535bbd13, + 0x73bc8f90,0x5b9933d7,0xcd2118ad,0x9332d61f,0xd4a65fd0,0x158c693e }, + { 0xe6806e63,0x4ddfb2a8,0xb5de651b,0xe31ed3ec,0x819bc69a,0xf9460e51, + 0x2c76b1f8,0x6229c0d6,0x901970a3,0xbb78f231,0x9cee72b8,0x31f3820f } }, + /* 192 */ + { { 0xc09e1c72,0xe931caf2,0x12990cf4,0x0715f298,0x943262d8,0x33aad81d, + 0x73048d3f,0x5d292b7a,0xdc7415f6,0xb152aaa4,0x0fd19587,0xc3d10fd9 }, + { 0x75ddadd0,0xf76b35c5,0x1e7b694c,0x9f5f4a51,0xc0663025,0x2f1ab7eb, + 0x920260b0,0x01c9cc87,0x05d39da6,0xc4b1f61a,0xeb4a9c4e,0x6dcd76c4 } }, + /* 193 */ + { { 0xfdc83f01,0x0ba0916f,0x9553e4f9,0x354c8b44,0xffc5e622,0xa6cc511a, + 0xe95be787,0xb954726a,0x75b41a62,0xcb048115,0xebfde989,0xfa2ae6cd }, + { 0x0f24659a,0x6376bbc7,0x4c289c43,0x13a999fd,0xec9abd8b,0xc7134184, + 0xa789ab04,0x28c02bf6,0xd3e526ec,0xff841ebc,0x640893a8,0x442b191e } }, + /* 194 */ + { { 0xfa2b6e20,0x4cac6c62,0xf6d69861,0x97f29e9b,0xbc96d12d,0x228ab1db, + 0x5e8e108d,0x6eb91327,0x40771245,0xd4b3d4d1,0xca8a803a,0x61b20623 }, + { 0xa6a560b1,0x2c2f3b41,0x3859fcf4,0x879e1d40,0x024dbfc3,0x7cdb5145, + 0x3bfa5315,0x55d08f15,0xaa93823a,0x2f57d773,0xc6a2c9a2,0xa97f259c } }, + /* 195 */ + { { 0xe58edbbb,0xc306317b,0x79dfdf13,0x25ade51c,0x16d83dd6,0x6b5beaf1, + 0x1dd8f925,0xe8038a44,0xb2a87b6b,0x7f00143c,0xf5b438de,0xa885d00d }, + { 0xcf9e48bd,0xe9f76790,0xa5162768,0xf0bdf9f0,0xad7b57cb,0x0436709f, + 0xf7c15db7,0x7e151c12,0x5d90ee3b,0x3514f022,0x2c361a8d,0x2e84e803 } }, + /* 196 */ + { { 0x563ec8d8,0x2277607d,0xe3934cb7,0xa661811f,0xf58fd5de,0x3ca72e7a, + 0x62294c6a,0x7989da04,0xf6bbefe9,0x88b3708b,0x53ed7c82,0x0d524cf7 }, + { 0x2f30c073,0x69f699ca,0x9dc1dcf3,0xf0fa264b,0x05f0aaf6,0x44ca4568, + 0xd19b9baf,0x0f5b23c7,0xeabd1107,0x39193f41,0x2a7c9b83,0x9e3e10ad } }, + /* 197 */ + { { 0xd4ae972f,0xa90824f0,0xc6e846e7,0x43eef02b,0x29d2160a,0x7e460612, + 0xfe604e91,0x29a178ac,0x4eb184b2,0x23056f04,0xeb54cdf4,0x4fcad55f }, + { 0xae728d15,0xa0ff96f3,0xc6a00331,0x8a2680c6,0x7ee52556,0x5f84cae0, + 0xc5a65dad,0x5e462c3a,0xe2d23f4f,0x5d2b81df,0xc5b1eb07,0x6e47301b } }, + /* 198 */ + { { 0xaf8219b9,0x77411d68,0x51b1907a,0xcb883ce6,0x101383b5,0x25c87e57, + 0x982f970d,0x9c7d9859,0x118305d2,0xaa6abca5,0x9013a5db,0x725fed2f }, + { 0xababd109,0x487cdbaf,0x87586528,0xc0f8cf56,0x8ad58254,0xa02591e6, + 0xdebbd526,0xc071b1d1,0x961e7e31,0x927dfe8b,0x9263dfe1,0x55f895f9 } }, + /* 199 */ + { { 0xb175645b,0xf899b00d,0xb65b4b92,0x51f3a627,0xb67399ef,0xa2f3ac8d, + 0xe400bc20,0xe717867f,0x1967b952,0x42cc9020,0x3ecd1de1,0x3d596751 }, + { 0xdb979775,0xd41ebcde,0x6a2e7e88,0x99ba61bc,0x321504f2,0x039149a5, + 0x27ba2fad,0xe7dc2314,0xb57d8368,0x9f556308,0x57da80a7,0x2b6d16c9 } }, + /* 200 */ + { { 0x279ad982,0x84af5e76,0x9c8b81a6,0x9bb4c92d,0x0e698e67,0xd79ad44e, + 0x265fc167,0xe8be9048,0x0c3a4ccc,0xf135f7e6,0xb8863a33,0xa0a10d38 }, + { 0xd386efd9,0xe197247c,0xb52346c2,0x0eefd3f9,0x78607bc8,0xc22415f9, + 0x508674ce,0xa2a8f862,0xc8c9d607,0xa72ad09e,0x50fa764f,0xcd9f0ede } }, + /* 201 */ + { { 0xd1a46d4d,0x063391c7,0x9eb01693,0x2df51c11,0x849e83de,0xc5849800, + 0x8ad08382,0x48fd09aa,0xaa742736,0xa405d873,0xe1f9600c,0xee49e61e }, + { 0x48c76f73,0xd76676be,0x01274b2a,0xd9c100f6,0x83f8718d,0x110bb67c, + 0x02fc0d73,0xec85a420,0x744656ad,0xc0449e1e,0x37d9939b,0x28ce7376 } }, + /* 202 */ + { { 0x44544ac7,0x97e9af72,0xba010426,0xf2c658d5,0xfb3adfbd,0x732dec39, + 0xa2df0b07,0xd12faf91,0x2171e208,0x8ac26725,0x5b24fa54,0xf820cdc8 }, + { 0x94f4cf77,0x307a6eea,0x944a33c6,0x18c783d2,0x0b741ac5,0x4b939d4c, + 0x3ffbb6e4,0x1d7acd15,0x7a255e44,0x06a24858,0xce336d50,0x14fbc494 } }, + /* 203 */ + { { 0x51584e3c,0x9b920c0c,0xf7e54027,0xc7733c59,0x88422bbe,0xe24ce139, + 0x523bd6ab,0x11ada812,0xb88e6def,0xde068800,0xfe8c582d,0x7b872671 }, + { 0x7de53510,0x4e746f28,0xf7971968,0x492f8b99,0x7d928ac2,0x1ec80bc7, + 0x432eb1b5,0xb3913e48,0x32028f6e,0xad084866,0x8fc2f38b,0x122bb835 } }, + /* 204 */ + { { 0x3b0b29c3,0x0a9f3b1e,0x4fa44151,0x837b6432,0x17b28ea7,0xb9905c92, + 0x98451750,0xf39bc937,0xce8b6da1,0xcd383c24,0x010620b2,0x299f57db }, + { 0x58afdce3,0x7b6ac396,0x3d05ef47,0xa15206b3,0xb9bb02ff,0xa0ae37e2, + 0x9db3964c,0x107760ab,0x67954bea,0xe29de9a0,0x431c3f82,0x446a1ad8 } }, + /* 205 */ + { { 0x5c6b8195,0xc6fecea0,0xf49e71b9,0xd744a7c5,0x177a7ae7,0xa8e96acc, + 0x358773a7,0x1a05746c,0x37567369,0xa4162146,0x87d1c971,0xaa0217f7 }, + { 0x77fd3226,0x61e9d158,0xe4f600be,0x0f6f2304,0x7a6dff07,0xa9c4cebc, + 0x09f12a24,0xd15afa01,0x8c863ee9,0x2bbadb22,0xe5eb8c78,0xa28290e4 } }, + /* 206 */ + { { 0x3e9de330,0x55b87fa0,0x195c145b,0x12b26066,0xa920bef0,0xe08536e0, + 0x4d195adc,0x7bff6f2c,0x945f4187,0x7f319e9d,0xf892ce47,0xf9848863 }, + { 0x4fe37657,0xd0efc1d3,0x5cf0e45a,0x3c58de82,0x8b0ccbbe,0x626ad21a, + 0xaf952fc5,0xd2a31208,0xeb437357,0x81791995,0x98e95d4f,0x5f19d30f } }, + /* 207 */ + { { 0x0e6865bb,0x72e83d9a,0xf63456a6,0x22f5af3b,0x463c8d9e,0x409e9c73, + 0xdfe6970e,0x40e9e578,0x711b91ca,0x876b6efa,0x942625a3,0x895512cf }, + { 0xcb4e462b,0x84c8eda8,0x4412e7c8,0x84c0154a,0xceb7b71f,0x04325db1, + 0x66f70877,0x1537dde3,0x1992b9ac,0xf3a09399,0xd498ae77,0xa7316606 } }, + /* 208 */ + { { 0xcad260f5,0x13990d2f,0xeec0e8c0,0x76c3be29,0x0f7bd7d5,0x7dc5bee0, + 0xefebda4b,0x9be167d2,0x9122b87e,0xcce3dde6,0x82b5415c,0x75a28b09 }, + { 0xe84607a6,0xf6810bcd,0x6f4dbf0d,0xc6d58128,0x1b4dafeb,0xfead577d, + 0x066b28eb,0x9bc440b2,0x8b17e84b,0x53f1da97,0xcda9a575,0x0459504b } }, + /* 209 */ + { { 0x329e5836,0x13e39a02,0xf717269d,0x2c9e7d51,0xf26c963b,0xc5ac58d6, + 0x79967bf5,0x3b0c6c43,0x55908d9d,0x60bbea3f,0xf07c9ad1,0xd84811e7 }, + { 0x5bd20e4a,0xfe7609a7,0x0a70baa8,0xe4325dd2,0xb3600386,0x3711f370, + 0xd0924302,0x97f9562f,0x4acc4436,0x040dc0c3,0xde79cdd4,0xfd6d725c } }, + /* 210 */ + { { 0xcf13eafb,0xb3efd0e3,0x5aa0ae5f,0x21009cbb,0x79022279,0xe480c553, + 0xb2fc9a6d,0x755cf334,0x07096ae7,0x8564a5bf,0xbd238139,0xddd649d0 }, + { 0x8a045041,0xd0de10b1,0xc957d572,0x6e05b413,0x4e0fb25c,0x5c5ff806, + 0x641162fb,0xd933179b,0xe57439f9,0x42d48485,0x8a8d72aa,0x70c5bd0a } }, + /* 211 */ + { { 0x97bdf646,0xa7671738,0xab329f7c,0xaa1485b4,0xf8f25fdf,0xce3e11d6, + 0xc6221824,0x76a3fc7e,0xf3924740,0x045f281f,0x96d13a9a,0x24557d4e }, + { 0xdd4c27cd,0x875c804b,0x0f5c7fea,0x11c5f0f4,0xdc55ff7e,0xac8c880b, + 0x1103f101,0x2acddec5,0xf99faa89,0x38341a21,0xce9d6b57,0xc7b67a2c } }, + /* 212 */ + { { 0x8e357586,0x9a0d724f,0xdf648da0,0x1d7f4ff5,0xfdee62a5,0x9c3e6c9b, + 0x0389b372,0x0499cef0,0x98eab879,0xe904050d,0x6c051617,0xe8eef1b6 }, + { 0xc37e3ca9,0xebf5bfeb,0xa4e0b91d,0x7c5e946d,0x2c4bea28,0x79097314, + 0xee67b2b7,0x81f6c109,0xdafc5ede,0xaf237d9b,0x2abb04c7,0xd2e60201 } }, + /* 213 */ + { { 0x8a4f57bf,0x6156060c,0xff11182a,0xf9758696,0x6296ef00,0x8336773c, + 0xff666899,0x9c054bce,0x719cd11c,0xd6a11611,0xdbe1acfa,0x9824a641 }, + { 0xba89fd01,0x0b7b7a5f,0x889f79d8,0xf8d3b809,0xf578285c,0xc5e1ea08, + 0xae6d8288,0x7ac74536,0x7521ef5f,0x5d37a200,0xb260a25d,0x5ecc4184 } }, + /* 214 */ + { { 0xa708c8d3,0xddcebb19,0xc63f81ec,0xe63ed04f,0x11873f95,0xd045f5a0, + 0x79f276d5,0x3b5ad544,0x425ae5b3,0x81272a3d,0x10ce1605,0x8bfeb501 }, + { 0x888228bf,0x4233809c,0xb2aff7df,0x4bd82acf,0x0cbd4a7f,0x9c68f180, + 0x6b44323d,0xfcd77124,0x891db957,0x60c0fcf6,0x04da8f7f,0xcfbb4d89 } }, + /* 215 */ + { { 0x3b26139a,0x9a6a5df9,0xb2cc7eb8,0x3e076a83,0x5a964bcd,0x47a8e82d, + 0xb9278d6b,0x8a4e2a39,0xe4443549,0x93506c98,0xf1e0d566,0x06497a8f }, + { 0x2b1efa05,0x3dee8d99,0x45393e33,0x2da63ca8,0xcf0579ad,0xa4af7277, + 0x3236d8ea,0xaf4b4639,0x32b617f5,0x6ccad95b,0xb88bb124,0xce76d8b8 } }, + /* 216 */ + { { 0x083843dc,0x63d2537a,0x1e4153b4,0x89eb3514,0xea9afc94,0x5175ebc4, + 0x8ed1aed7,0x7a652580,0xd85e8297,0x67295611,0xb584b73d,0x8dd2d68b }, + { 0x0133c3a4,0x237139e6,0x4bd278ea,0x9de838ab,0xc062fcd9,0xe829b072, + 0x63ba8706,0x70730d4f,0xd3cd05ec,0x6080483f,0x0c85f84d,0x872ab5b8 } }, + /* 217 */ + { { 0x999d4d49,0xfc0776d3,0xec3f45e7,0xa3eb59de,0x0dae1fc1,0xbc990e44, + 0xa15371ff,0x33596b1e,0x9bc7ab25,0xd447dcb2,0x35979582,0xcd5b63e9 }, + { 0x77d1ff11,0xae3366fa,0xedee6903,0x59f28f05,0xa4433bf2,0x6f43fed1, + 0xdf9ce00e,0x15409c9b,0xaca9c5dc,0x21b5cded,0x82d7bdb4,0xf9f33595 } }, + /* 218 */ + { { 0x9422c792,0x95944378,0xc958b8bf,0x239ea923,0xdf076541,0x4b61a247, + 0xbb9fc544,0x4d29ce85,0x0b424559,0x9a692a67,0x0e486900,0x6e0ca5a0 }, + { 0x85b3bece,0x6b79a782,0xc61f9892,0x41f35e39,0xae747f82,0xff82099a, + 0xd0ca59d6,0x58c8ae3f,0x99406b5f,0x4ac930e2,0x9df24243,0x2ce04eb9 } }, + /* 219 */ + { { 0x1ac37b82,0x4366b994,0x25b04d83,0xff0c728d,0x19c47b7c,0x1f551361, + 0xbeff13e7,0xdbf2d5ed,0xe12a683d,0xf78efd51,0x989cf9c4,0x82cd85b9 }, + { 0xe0cb5d37,0xe23c6db6,0x72ee1a15,0x818aeebd,0x28771b14,0x8212aafd, + 0x1def817d,0x7bc221d9,0x9445c51f,0xdac403a2,0x12c3746b,0x711b0517 } }, + /* 220 */ + { { 0x5ea99ecc,0x0ed9ed48,0xb8cab5e1,0xf799500d,0xb570cbdc,0xa8ec87dc, + 0xd35dfaec,0x52cfb2c2,0x6e4d80a4,0x8d31fae2,0xdcdeabe5,0xe6a37dc9 }, + { 0x1deca452,0x5d365a34,0x0d68b44e,0x09a5f8a5,0xa60744b1,0x59238ea5, + 0xbb4249e9,0xf2fedc0d,0xa909b2e3,0xe395c74e,0x39388250,0xe156d1a5 } }, + /* 221 */ + { { 0x47181ae9,0xd796b3d0,0x44197808,0xbaf44ba8,0x34cf3fac,0xe6933094, + 0xc3bd5c46,0x41aa6ade,0xeed947c6,0x4fda75d8,0x9ea5a525,0xacd9d412 }, + { 0xd430301b,0x65cc55a3,0x7b52ea49,0x3c9a5bcf,0x159507f0,0x22d319cf, + 0xde74a8dd,0x2ee0b9b5,0x877ac2b6,0x20c26a1e,0x92e7c314,0x387d73da } }, + /* 222 */ + { { 0x8cd3fdac,0x13c4833e,0x332e5b8e,0x76fcd473,0xe2fe1fd3,0xff671b4b, + 0x5d98d8ec,0x4d734e8b,0x514bbc11,0xb1ead3c6,0x7b390494,0xd14ca858 }, + { 0x5d2d37e9,0x95a443af,0x00464622,0x73c6ea73,0x15755044,0xa44aeb4b, + 0xfab58fee,0xba3f8575,0xdc680a6f,0x9779dbc9,0x7b37ddfc,0xe1ee5f5a } }, + /* 223 */ + { { 0x12d29f46,0xcd0b4648,0x0ed53137,0x93295b0b,0x80bef6c9,0xbfe26094, + 0x54248b00,0xa6565788,0x80e7f9c4,0x69c43fca,0xbe141ea1,0x2190837b }, + { 0xa1b26cfb,0x875e159a,0x7affe852,0x90ca9f87,0x92ca598e,0x15e6550d, + 0x1938ad11,0xe3e0945d,0x366ef937,0xef7636bb,0xb39869e5,0xb6034d0b } }, + /* 224 */ + { { 0x26d8356e,0x4d255e30,0xd314626f,0xf83666ed,0xd0c8ed64,0x421ddf61, + 0x26677b61,0x96e473c5,0x9e9b18b3,0xdad4af7e,0xa9393f75,0xfceffd4a }, + { 0x11c731d5,0x843138a1,0xb2f141d9,0x05bcb3a1,0x617b7671,0x20e1fa95, + 0x88ccec7b,0xbefce812,0x90f1b568,0x582073dc,0x1f055cb7,0xf572261a } }, + /* 225 */ + { { 0x36973088,0xf3148277,0x86a9f980,0xc008e708,0xe046c261,0x1b795947, + 0xca76bca0,0xdf1e6a7d,0x71acddf0,0xabafd886,0x1364d8f4,0xff7054d9 }, + { 0xe2260594,0x2cf63547,0xd73b277e,0x468a5372,0xef9bd35e,0xc7419e24, + 0x24043cc3,0x2b4a1c20,0x890b39cd,0xa28f047a,0x46f9a2e3,0xdca2cea1 } }, + /* 226 */ + { { 0x53277538,0xab788736,0xcf697738,0xa734e225,0x6b22e2c1,0x66ee1d1e, + 0xebe1d212,0x2c615389,0x02bb0766,0xf36cad40,0x3e64f207,0x120885c3 }, + { 0x90fbfec2,0x59e77d56,0xd7a574ae,0xf9e781aa,0x5d045e53,0x801410b0, + 0xa91b5f0e,0xd3b5f0aa,0x7fbb3521,0xb3d1df00,0xc72bee9a,0x11c4b33e } }, + /* 227 */ + { { 0x83c3a7f3,0xd32b9832,0x88d8a354,0x8083abcf,0x50f4ec5a,0xdeb16404, + 0x641e2907,0x18d747f0,0xf1bbf03e,0x4e8978ae,0x88a0cd89,0x932447dc }, + { 0xcf3d5897,0x561e0feb,0x13600e6d,0xfc3a682f,0xd16a6b73,0xc78b9d73, + 0xd29bf580,0xe713fede,0x08d69e5c,0x0a225223,0x1ff7fda4,0x3a924a57 } }, + /* 228 */ + { { 0xb4093bee,0xfb64554c,0xa58c6ec0,0xa6d65a25,0x43d0ed37,0x4126994d, + 0x55152d44,0xa5689a51,0x284caa8d,0xb8e5ea8c,0xd1f25538,0x33f05d4f }, + { 0x1b615d6e,0xe0fdfe09,0x705507da,0x2ded7e8f,0x17bbcc80,0xdd5631e5, + 0x267fd11f,0x4f87453e,0xff89d62d,0xc6da723f,0xe3cda21d,0x55cbcae2 } }, + /* 229 */ + { { 0x6b4e84f3,0x336bc94e,0x4ef72c35,0x72863031,0xeeb57f99,0x6d85fdee, + 0xa42ece1b,0x7f4e3272,0x36f0320a,0x7f86cbb5,0x923331e6,0xf09b6a2b }, + { 0x56778435,0x21d3ecf1,0x8323b2d2,0x2977ba99,0x1704bc0f,0x6a1b57fb, + 0x389f048a,0xd777cf8b,0xac6b42cd,0x9ce2174f,0x09e6c55a,0x404e2bff } }, + /* 230 */ + { { 0x204c5ddb,0x9b9b135e,0x3eff550e,0x9dbfe044,0xec3be0f6,0x35eab4bf, + 0x0a43e56f,0x8b4c3f0d,0x0e73f9b3,0x4c1c6673,0x2c78c905,0x92ed38bd }, + { 0xa386e27c,0xc7003f6a,0xaced8507,0xb9c4f46f,0x59df5464,0xea024ec8, + 0x429572ea,0x4af96152,0xe1fc1194,0x279cd5e2,0x281e358c,0xaa376a03 } }, + /* 231 */ + { { 0x3cdbc95c,0x07859223,0xef2e337a,0xaae1aa6a,0x472a8544,0xc040108d, + 0x8d037b7d,0x80c853e6,0x8c7eee24,0xd221315c,0x8ee47752,0x195d3856 }, + { 0xdacd7fbe,0xd4b1ba03,0xd3e0c52b,0x4b5ac61e,0x6aab7b52,0x68d3c052, + 0x660e3fea,0xf0d7248c,0x3145efb4,0xafdb3f89,0x8f40936d,0xa73fd9a3 } }, + /* 232 */ + { { 0xbb1b17ce,0x891b9ef3,0xc6127f31,0x14023667,0x305521fd,0x12b2e58d, + 0xe3508088,0x3a47e449,0xff751507,0xe49fc84b,0x5310d16e,0x4023f722 }, + { 0xb73399fa,0xa608e5ed,0xd532aa3e,0xf12632d8,0x845e8415,0x13a2758e, + 0x1fc2d861,0xae4b6f85,0x339d02f2,0x3879f5b1,0x80d99ebd,0x446d22a6 } }, + /* 233 */ + { { 0x4be164f1,0x0f502302,0x88b81920,0x8d09d2d6,0x984aceff,0x514056f1, + 0x75e9e80d,0xa5c4ddf0,0xdf496a93,0x38cb47e6,0x38df6bf7,0x899e1d6b }, + { 0xb59eb2a6,0x69e87e88,0x9b47f38b,0x280d9d63,0x3654e955,0x599411ea, + 0x969aa581,0xcf8dd4fd,0x530742a7,0xff5c2baf,0x1a373085,0xa4391536 } }, + /* 234 */ + { { 0xa8a4bdd2,0x6ace72a3,0xb68ef702,0xc656cdd1,0x90c4dad8,0xd4a33e7e, + 0x9d951c50,0x4aece08a,0x085d68e6,0xea8005ae,0x6f7502b8,0xfdd7a7d7 }, + { 0x98d6fa45,0xce6fb0a6,0x1104eb8c,0x228f8672,0xda09d7dc,0xd23d8787, + 0x2ae93065,0x5521428b,0xea56c366,0x95faba3d,0x0a88aca5,0xedbe5039 } }, + /* 235 */ + { { 0xbfb26c82,0xd64da0ad,0x952c2f9c,0xe5d70b3c,0xf7e77f68,0xf5e8f365, + 0x08f2d695,0x7234e002,0xd12e7be6,0xfaf900ee,0x4acf734e,0x27dc6934 }, + { 0xc260a46a,0x80e4ff5e,0x2dc31c28,0x7da5ebce,0xca69f552,0x485c5d73, + 0x69cc84c2,0xcdfb6b29,0xed6d4eca,0x031c5afe,0x22247637,0xc7bbf4c8 } }, + /* 236 */ + { { 0x49fe01b2,0x9d5b72c7,0x793a91b8,0x34785186,0xcf460438,0xa3ba3c54, + 0x3ab21b6f,0x73e8e43d,0xbe57b8ab,0x50cde8e0,0xdd204264,0x6488b3a7 }, + { 0xdddc4582,0xa9e398b3,0x5bec46fe,0x1698c1a9,0x156d3843,0x7f1446ef, + 0x770329a2,0x3fd25dd8,0x2c710668,0x05b1221a,0xa72ee6cf,0x65b2dc2a } }, + /* 237 */ + { { 0xcd021d63,0x21a885f7,0xfea61f08,0x3f344b15,0xc5cf73e6,0xad5ba6dd, + 0x227a8b23,0x154d0d8f,0xdc559311,0x9b74373c,0x98620fa1,0x4feab715 }, + { 0x7d9ec924,0x5098938e,0x6d47e550,0x84d54a5e,0x1b617506,0x1a2d1bdc, + 0x615868a4,0x99fe1782,0x3005a924,0x171da780,0x7d8f79b6,0xa70bf5ed } }, + /* 238 */ + { { 0xfe2216c5,0x0bc1250d,0x7601b351,0x2c37e250,0xd6f06b7e,0xb6300175, + 0x8bfeb9b7,0x4dde8ca1,0xb82f843d,0x4f210432,0xb1ac0afd,0x8d70e2f9 }, + { 0xaae91abb,0x25c73b78,0x863028f2,0x0230dca3,0xe5cf30b7,0x8b923ecf, + 0x5506f265,0xed754ec2,0x729a5e39,0x8e41b88c,0xbabf889b,0xee67cec2 } }, + /* 239 */ + { { 0x1be46c65,0xe183acf5,0xe7565d7a,0x9789538f,0xd9627b4e,0x87873391, + 0x9f1d9187,0xbf4ac4c1,0x4691f5c8,0x5db99f63,0x74a1fb98,0xa68df803 }, + { 0xbf92b5fa,0x3c448ed1,0x3e0bdc32,0xa098c841,0x79bf016c,0x8e74cd55, + 0x115e244d,0x5df0d09c,0x3410b66e,0x9418ad01,0x17a02130,0x8b6124cb } }, + /* 240 */ + { { 0xc26e3392,0x425ec3af,0xa1722e00,0xc07f8470,0xe2356b43,0xdcc28190, + 0xb1ef59a6,0x4ed97dff,0xc63028c1,0xc22b3ad1,0x68c18988,0x070723c2 }, + { 0x4cf49e7d,0x70da302f,0x3f12a522,0xc5e87c93,0x18594148,0x74acdd1d, + 0xca74124c,0xad5f73ab,0xd69fd478,0xe72e4a3e,0x7b117cc3,0x61593868 } }, + /* 241 */ + { { 0xa9aa0486,0x7b7b9577,0xa063d557,0x6e41fb35,0xda9047d7,0xb017d5c7, + 0x68a87ba9,0x8c748280,0xdf08ad93,0xab45fa5c,0x4c288a28,0xcd9fb217 }, + { 0x5747843d,0x59544642,0xa56111e3,0x34d64c6c,0x4bfce8d5,0x12e47ea1, + 0x6169267f,0x17740e05,0xeed03fb5,0x5c49438e,0x4fc3f513,0x9da30add } }, + /* 242 */ + { { 0xccfa5200,0xc4e85282,0x6a19b13d,0x2707608f,0xf5726e2f,0xdcb9a53d, + 0xe9427de5,0x612407c9,0xd54d582a,0x3e5a17e1,0x655ae118,0xb99877de }, + { 0x015254de,0x6f0e972b,0xf0a6f7c5,0x92a56db1,0xa656f8b2,0xd297e4e1, + 0xad981983,0x99fe0052,0x07cfed84,0xd3652d2f,0x843c1738,0xc784352e } }, + /* 243 */ + { { 0x7e9b2d8a,0x6ee90af0,0x57cf1964,0xac8d7018,0x71f28efc,0xf6ed9031, + 0x6812b20e,0x7f70d5a9,0xf1c61eee,0x27b557f4,0xc6263758,0xf1c9bd57 }, + { 0x2a1a6194,0x5cf7d014,0x1890ab84,0xdd614e0b,0x0e93c2a6,0x3ef9de10, + 0xe0cd91c5,0xf98cf575,0x14befc32,0x504ec0c6,0x6279d68c,0xd0513a66 } }, + /* 244 */ + { { 0xa859fb6a,0xa8eadbad,0xdb283666,0xcf8346e7,0x3e22e355,0x7b35e61a, + 0x99639c6b,0x293ece2c,0x56f241c8,0xfa0162e2,0xbf7a1dda,0xd2e6c7b9 }, + { 0x40075e63,0xd0de6253,0xf9ec8286,0x2405aa61,0x8fe45494,0x2237830a, + 0x364e9c8c,0x4fd01ac7,0x904ba750,0x4d9c3d21,0xaf1b520b,0xd589be14 } }, + /* 245 */ + { { 0x4662e53b,0x13576a4f,0xf9077676,0x35ec2f51,0x97c0af97,0x66297d13, + 0x9e598b58,0xed3201fe,0x5e70f604,0x49bc752a,0xbb12d951,0xb54af535 }, + { 0x212c1c76,0x36ea4c2b,0xeb250dfd,0x18f5bbc7,0x9a0a1a46,0xa0d466cc, + 0xdac2d917,0x52564da4,0x8e95fab5,0x206559f4,0x9ca67a33,0x7487c190 } }, + /* 246 */ + { { 0xdde98e9c,0x75abfe37,0x2a411199,0x99b90b26,0xdcdb1f7c,0x1b410996, + 0x8b3b5675,0xab346f11,0xf1f8ae1e,0x04852193,0x6b8b98c1,0x1ec4d227 }, + { 0x45452baa,0xba3bc926,0xacc4a572,0x387d1858,0xe51f171e,0x9478eff6, + 0x931e1c00,0xf357077d,0xe54c8ca8,0xffee77cd,0x551dc9a4,0xfb4892ff } }, + /* 247 */ + { { 0x2db8dff8,0x5b1bdad0,0x5a2285a2,0xd462f4fd,0xda00b461,0x1d6aad8e, + 0x41306d1b,0x43fbefcf,0x6a13fe19,0x428e86f3,0x17f89404,0xc8b2f118 }, + { 0xf0d51afb,0x762528aa,0x549b1d06,0xa3e2fea4,0xea3ddf66,0x86fad8f2, + 0x4fbdd206,0x0d9ccc4b,0xc189ff5a,0xcde97d4c,0x199f19a6,0xc36793d6 } }, + /* 248 */ + { { 0x51b85197,0xea38909b,0xb4c92895,0xffb17dd0,0x1ddb3f3f,0x0eb0878b, + 0xc57cf0f2,0xb05d28ff,0x1abd57e2,0xd8bde2e7,0xc40c1b20,0x7f2be28d }, + { 0x299a2d48,0x6554dca2,0x8377982d,0x5130ba2e,0x1071971a,0x8863205f, + 0x7cf2825d,0x15ee6282,0x03748f2b,0xd4b6c57f,0x430385a0,0xa9e3f4da } }, + /* 249 */ + { { 0x83fbc9c6,0x33eb7cec,0x4541777e,0x24a311c7,0x4f0767fc,0xc81377f7, + 0x4ab702da,0x12adae36,0x2a779696,0xb7fcb6db,0x01cea6ad,0x4a6fb284 }, + { 0xcdfc73de,0x5e8b1d2a,0x1b02fd32,0xd0efae8d,0xd81d8519,0x3f99c190, + 0xfc808971,0x3c18f7fa,0x51b7ae7b,0x41f713e7,0xf07fc3f8,0x0a4b3435 } }, + /* 250 */ + { { 0x019b7d2e,0x7dda3c4c,0xd4dc4b89,0x631c8d1a,0x1cdb313c,0x5489cd6e, + 0x4c07bb06,0xd44aed10,0x75f000d1,0x8f97e13a,0xdda5df4d,0x0e9ee64f }, + { 0x3e346910,0xeaa99f3b,0xfa294ad7,0x622f6921,0x0d0b2fe9,0x22aaa20d, + 0x1e5881ba,0x4fed2f99,0xc1571802,0x9af3b2d6,0xdc7ee17c,0x919e67a8 } }, + /* 251 */ + { { 0x76250533,0xc724fe4c,0x7d817ef8,0x8a2080e5,0x172c9751,0xa2afb0f4, + 0x17c0702e,0x9b10cdeb,0xc9b7e3e9,0xbf3975e3,0x1cd0cdc5,0x206117df }, + { 0xbe05ebd5,0xfb049e61,0x16c782c0,0xeb0bb55c,0xab7fed09,0x13a331b8, + 0x632863f0,0xf6c58b1d,0x4d3b6195,0x6264ef6e,0x9a53f116,0x92c51b63 } }, + /* 252 */ + { { 0x288b364d,0xa57c7bc8,0x7b41e5c4,0x4a562e08,0x698a9a11,0x699d21c6, + 0xf3f849b9,0xa4ed9581,0x9eb726ba,0xa223eef3,0xcc2884f9,0x13159c23 }, + { 0x3a3f4963,0x73931e58,0x0ada6a81,0x96500389,0x5ab2950b,0x3ee8a1c6, + 0x775fab52,0xeedf4949,0x4f2671b6,0x63d652e1,0x3c4e2f55,0xfed4491c } }, + /* 253 */ + { { 0xf4eb453e,0x335eadc3,0xcadd1a5b,0x5ff74b63,0x5d84a91a,0x6933d0d7, + 0xb49ba337,0x9ca3eeb9,0xc04c15b8,0x1f6facce,0xdc09a7e4,0x4ef19326 }, + { 0x3dca3233,0x53d2d324,0xa2259d4b,0x0ee40590,0x5546f002,0x18c22edb, + 0x09ea6b71,0x92429801,0xb0e91e61,0xaada0add,0x99963c50,0x5fe53ef4 } }, + /* 254 */ + { { 0x90c28c65,0x372dd06b,0x119ce47d,0x1765242c,0x6b22fc82,0xc041fb80, + 0xb0a7ccc1,0x667edf07,0x1261bece,0xc79599e7,0x19cff22a,0xbc69d9ba }, + { 0x13c06819,0x009d77cd,0xe282b79d,0x635a66ae,0x225b1be8,0x4edac4a6, + 0x524008f9,0x57d4f4e4,0xb056af84,0xee299ac5,0x3a0bc386,0xcc38444c } }, + /* 255 */ + { { 0xcd4c2356,0x490643b1,0x750547be,0x740a4851,0xd4944c04,0x643eaf29, + 0x299a98a0,0xba572479,0xee05fdf9,0x48b29f16,0x089b2d7b,0x33fb4f61 }, + { 0xa950f955,0x86704902,0xfedc3ddf,0x97e1034d,0x05fbb6a2,0x211320b6, + 0x432299bb,0x23d7b93f,0x8590e4a3,0x1fe1a057,0xf58c0ce6,0x8e1d0586 } }, +}; + +/* Multiply the base point of P384 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_base_12(sp_point_384* r, const sp_digit* k, + int map, void* heap) +{ + return sp_384_ecc_mulmod_stripe_12(r, &p384_base, p384_table, + k, map, heap); +} + +#endif + +/* Multiply the base point of P384 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_base_384(mp_int* km, ecc_point* r, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 p; + sp_digit kd[12]; +#endif + sp_point_384* point; + sp_digit* k = NULL; + int err = MP_OKAY; + + err = sp_384_point_new_12(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) { + err = MEMORY_E; + } + } +#else + k = kd; +#endif + if (err == MP_OKAY) { + sp_384_from_mp(k, 12, km); + + err = sp_384_ecc_mulmod_base_12(point, k, map, heap); + } + if (err == MP_OKAY) { + err = sp_384_point_to_ecc_point_12(point, r); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_12(point, 0, heap); + + return err; +} + +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_384_iszero_12(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8] | a[9] | a[10] | a[11]) == 0; +} + +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */ +/* Add 1 to a. (a = a + 1) + * + * a A single precision integer. + */ +SP_NOINLINE static void sp_384_add_one_12(sp_digit* a) +{ + __asm__ __volatile__ ( + "mov r2, #1\n\t" + "ldr r1, [%[a], #0]\n\t" + "add r1, r2\n\t" + "mov r2, #0\n\t" + "str r1, [%[a], #0]\n\t" + "ldr r1, [%[a], #4]\n\t" + "adc r1, r2\n\t" + "str r1, [%[a], #4]\n\t" + "ldr r1, [%[a], #8]\n\t" + "adc r1, r2\n\t" + "str r1, [%[a], #8]\n\t" + "ldr r1, [%[a], #12]\n\t" + "adc r1, r2\n\t" + "str r1, [%[a], #12]\n\t" + "ldr r1, [%[a], #16]\n\t" + "adc r1, r2\n\t" + "str r1, [%[a], #16]\n\t" + "ldr r1, [%[a], #20]\n\t" + "adc r1, r2\n\t" + "str r1, [%[a], #20]\n\t" + "ldr r1, [%[a], #24]\n\t" + "adc r1, r2\n\t" + "str r1, [%[a], #24]\n\t" + "ldr r1, [%[a], #28]\n\t" + "adc r1, r2\n\t" + "str r1, [%[a], #28]\n\t" + "ldr r1, [%[a], #32]\n\t" + "adc r1, r2\n\t" + "str r1, [%[a], #32]\n\t" + "ldr r1, [%[a], #36]\n\t" + "adc r1, r2\n\t" + "str r1, [%[a], #36]\n\t" + "ldr r1, [%[a], #40]\n\t" + "adc r1, r2\n\t" + "str r1, [%[a], #40]\n\t" + "ldr r1, [%[a], #44]\n\t" + "adc r1, r2\n\t" + "str r1, [%[a], #44]\n\t" + : + : [a] "r" (a) + : "memory", "r1", "r2" + ); +} + +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_384_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((sp_digit)a[i]) << s); + if (s >= 24U) { + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (sp_digit)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Generates a scalar that is in the range 1..order-1. + * + * rng Random number generator. + * k Scalar value. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +static int sp_384_ecc_gen_k_12(WC_RNG* rng, sp_digit* k) +{ + int err; + byte buf[48]; + + do { + err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf)); + if (err == 0) { + sp_384_from_bin(k, 12, buf, (int)sizeof(buf)); + if (sp_384_cmp_12(k, p384_order2) < 0) { + sp_384_add_one_12(k); + break; + } + } + } + while (err == 0); + + return err; +} + +/* Makes a random EC key pair. + * + * rng Random number generator. + * priv Generated private value. + * pub Generated public point. + * heap Heap to use for allocation. + * returns ECC_INF_E when the point does not have the correct order, RNG + * failures, MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 p; + sp_digit kd[12]; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_384 inf; +#endif +#endif + sp_point_384* point; + sp_digit* k = NULL; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_384* infinity; +#endif + int err; + + (void)heap; + + err = sp_384_point_new_12(heap, p, point); +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + if (err == MP_OKAY) { + err = sp_384_point_new_12(heap, inf, infinity); + } +#endif +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) { + err = MEMORY_E; + } + } +#else + k = kd; +#endif + + if (err == MP_OKAY) { + err = sp_384_ecc_gen_k_12(rng, k); + } + if (err == MP_OKAY) { + err = sp_384_ecc_mulmod_base_12(point, k, 1, NULL); + } + +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + if (err == MP_OKAY) { + err = sp_384_ecc_mulmod_12(infinity, point, p384_order, 1, NULL); + } + if (err == MP_OKAY) { + if ((sp_384_iszero_12(point->x) == 0) || (sp_384_iszero_12(point->y) == 0)) { + err = ECC_INF_E; + } + } +#endif + + if (err == MP_OKAY) { + err = sp_384_to_mp(k, priv); + } + if (err == MP_OKAY) { + err = sp_384_point_to_ecc_point_12(point, pub); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_384_point_free_12(infinity, 1, heap); +#endif + sp_384_point_free_12(point, 1, heap); + + return err; +} + +#ifdef HAVE_ECC_DHE +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 48 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_384_to_bin(sp_digit* r, byte* a) +{ + int i, j, s = 0, b; + + j = 384 / 8 - 1; + a[j] = 0; + for (i=0; i<12 && j>=0; i++) { + b = 0; + /* lint allow cast of mismatch sp_digit and int */ + a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ + b += 8 - s; + if (j < 0) { + break; + } + while (b < 32) { + a[j--] = (byte)(r[i] >> b); + b += 8; + if (j < 0) { + break; + } + } + s = 8 - (b - 32); + if (j >= 0) { + a[j] = 0; + } + if (s != 0) { + j++; + } + } +} + +/* Multiply the point by the scalar and serialize the X ordinate. + * The number is 0 padded to maximum size on output. + * + * priv Scalar to multiply the point by. + * pub Point to multiply. + * out Buffer to hold X ordinate. + * outLen On entry, size of the buffer in bytes. + * On exit, length of data in buffer in bytes. + * heap Heap to use for allocation. + * returns BUFFER_E if the buffer is to small for output size, + * MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_secret_gen_384(mp_int* priv, ecc_point* pub, byte* out, + word32* outLen, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 p; + sp_digit kd[12]; +#endif + sp_point_384* point = NULL; + sp_digit* k = NULL; + int err = MP_OKAY; + + if (*outLen < 48U) { + err = BUFFER_E; + } + + if (err == MP_OKAY) { + err = sp_384_point_new_12(heap, p, point); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#else + k = kd; +#endif + + if (err == MP_OKAY) { + sp_384_from_mp(k, 12, priv); + sp_384_point_from_ecc_point_12(point, pub); + err = sp_384_ecc_mulmod_12(point, point, k, 1, heap); + } + if (err == MP_OKAY) { + sp_384_to_bin(point->x, out); + *outLen = 48; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_12(point, 0, heap); + + return err; +} +#endif /* HAVE_ECC_DHE */ + +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into a. (a -= b) + * + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_384_sub_in_place_12(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + __asm__ __volatile__ ( + "mov r7, %[a]\n\t" + "add r7, #48\n\t" + "\n1:\n\t" + "mov r5, #0\n\t" + "sub r5, %[c]\n\t" + "ldr r3, [%[a]]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b]]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a]]\n\t" + "str r4, [%[a], #4]\n\t" + "sbc %[c], %[c]\n\t" + "add %[a], #8\n\t" + "add %[b], #8\n\t" + "cmp %[a], r7\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7" + ); + + return c; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_384_sub_in_place_12(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldr r3, [%[a], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sub r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #0]\n\t" + "str r4, [%[a], #4]\n\t" + "ldr r3, [%[a], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r6, [%[b], #12]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #8]\n\t" + "str r4, [%[a], #12]\n\t" + "ldr r3, [%[a], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r6, [%[b], #20]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #16]\n\t" + "str r4, [%[a], #20]\n\t" + "ldr r3, [%[a], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r6, [%[b], #28]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #24]\n\t" + "str r4, [%[a], #28]\n\t" + "ldr r3, [%[a], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r6, [%[b], #36]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #32]\n\t" + "str r4, [%[a], #36]\n\t" + "ldr r3, [%[a], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r6, [%[b], #44]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #40]\n\t" + "str r4, [%[a], #44]\n\t" + "sbc %[c], %[c]\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +SP_NOINLINE static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a, + sp_digit b) +{ + __asm__ __volatile__ ( + "mov r6, #48\n\t" + "add r6, %[a]\n\t" + "mov r8, %[r]\n\t" + "mov r9, r6\n\t" + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "1:\n\t" + "mov %[r], #0\n\t" + "mov r5, #0\n\t" + "# A[] * B\n\t" + "ldr r6, [%[a]]\n\t" + "lsl r6, r6, #16\n\t" + "lsl r7, %[b], #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "lsr r7, %[b], #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, %[b], #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "lsl r7, %[b], #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# A[] * B - Done\n\t" + "mov %[r], r8\n\t" + "str r3, [%[r]]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "add %[r], #4\n\t" + "add %[a], #4\n\t" + "mov r8, %[r]\n\t" + "cmp %[a], r9\n\t" + "blt 1b\n\t" + "str r3, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a) + : [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + ); +} + +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +SP_NOINLINE static sp_digit div_384_word_12(sp_digit d1, sp_digit d0, + sp_digit div) +{ + sp_digit r = 0; + + __asm__ __volatile__ ( + "lsr r5, %[div], #1\n\t" + "add r5, #1\n\t" + "mov r8, %[d0]\n\t" + "mov r9, %[d1]\n\t" + "# Do top 32\n\t" + "mov r6, r5\n\t" + "sub r6, %[d1]\n\t" + "sbc r6, r6\n\t" + "add %[r], %[r]\n\t" + "sub %[r], r6\n\t" + "and r6, r5\n\t" + "sub %[d1], r6\n\t" + "# Next 30 bits\n\t" + "mov r4, #29\n\t" + "1:\n\t" + "lsl %[d0], %[d0], #1\n\t" + "adc %[d1], %[d1]\n\t" + "mov r6, r5\n\t" + "sub r6, %[d1]\n\t" + "sbc r6, r6\n\t" + "add %[r], %[r]\n\t" + "sub %[r], r6\n\t" + "and r6, r5\n\t" + "sub %[d1], r6\n\t" + "sub r4, #1\n\t" + "bpl 1b\n\t" + "mov r7, #0\n\t" + "add %[r], %[r]\n\t" + "add %[r], #1\n\t" + "# r * div - Start\n\t" + "lsl %[d1], %[r], #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr %[d1], %[d1], #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, %[d1]\n\t" + "lsr r6, %[div], #16\n\t" + "mul %[d1], r6\n\t" + "lsr r5, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r7\n\t" + "lsr %[d1], %[r], #16\n\t" + "mul r6, %[d1]\n\t" + "add r5, r6\n\t" + "lsl r6, %[div], #16\n\t" + "lsr r6, r6, #16\n\t" + "mul %[d1], r6\n\t" + "lsr r6, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r6\n\t" + "# r * div - Done\n\t" + "mov %[d1], r8\n\t" + "sub %[d1], r4\n\t" + "mov r4, %[d1]\n\t" + "mov %[d1], r9\n\t" + "sbc %[d1], r5\n\t" + "mov r5, %[d1]\n\t" + "add %[r], r5\n\t" + "# r * div - Start\n\t" + "lsl %[d1], %[r], #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr %[d1], %[d1], #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, %[d1]\n\t" + "lsr r6, %[div], #16\n\t" + "mul %[d1], r6\n\t" + "lsr r5, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r7\n\t" + "lsr %[d1], %[r], #16\n\t" + "mul r6, %[d1]\n\t" + "add r5, r6\n\t" + "lsl r6, %[div], #16\n\t" + "lsr r6, r6, #16\n\t" + "mul %[d1], r6\n\t" + "lsr r6, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r6\n\t" + "# r * div - Done\n\t" + "mov %[d1], r8\n\t" + "mov r6, r9\n\t" + "sub r4, %[d1], r4\n\t" + "sbc r6, r5\n\t" + "mov r5, r6\n\t" + "add %[r], r5\n\t" + "# r * div - Start\n\t" + "lsl %[d1], %[r], #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr %[d1], %[d1], #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, %[d1]\n\t" + "lsr r6, %[div], #16\n\t" + "mul %[d1], r6\n\t" + "lsr r5, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r7\n\t" + "lsr %[d1], %[r], #16\n\t" + "mul r6, %[d1]\n\t" + "add r5, r6\n\t" + "lsl r6, %[div], #16\n\t" + "lsr r6, r6, #16\n\t" + "mul %[d1], r6\n\t" + "lsr r6, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r6\n\t" + "# r * div - Done\n\t" + "mov %[d1], r8\n\t" + "mov r6, r9\n\t" + "sub r4, %[d1], r4\n\t" + "sbc r6, r5\n\t" + "mov r5, r6\n\t" + "add %[r], r5\n\t" + "mov r6, %[div]\n\t" + "sub r6, r4\n\t" + "sbc r6, r6\n\t" + "sub %[r], r6\n\t" + : [r] "+r" (r) + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) + : "r4", "r5", "r7", "r6", "r8", "r9" + ); + return r; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_384_mask_12(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<12; i++) { + r[i] = a[i] & m; + } +#else + r[0] = a[0] & m; + r[1] = a[1] & m; + r[2] = a[2] & m; + r[3] = a[3] & m; + r[4] = a[4] & m; + r[5] = a[5] & m; + r[6] = a[6] & m; + r[7] = a[7] & m; + r[8] = a[8] & m; + r[9] = a[9] & m; + r[10] = a[10] & m; + r[11] = a[11] & m; +#endif +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_384_div_12(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[24], t2[13]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[11]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 12); + for (i=11; i>=0; i--) { + r1 = div_384_word_12(t1[12 + i], t1[12 + i - 1], div); + + sp_384_mul_d_12(t2, d, r1); + t1[12 + i] += sp_384_sub_in_place_12(&t1[i], t2); + t1[12 + i] -= t2[12]; + sp_384_mask_12(t2, d, t1[12 + i]); + t1[12 + i] += sp_384_add_12(&t1[i], &t1[i], t2); + sp_384_mask_12(t2, d, t1[12 + i]); + t1[12 + i] += sp_384_add_12(&t1[i], &t1[i], t2); + } + + r1 = sp_384_cmp_12(t1, d) >= 0; + sp_384_cond_sub_12(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_384_mod_12(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_384_div_12(a, m, NULL, r); +} + +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#ifdef WOLFSSL_SP_SMALL +/* Order-2 for the P384 curve. */ +static const uint32_t p384_order_minus_2[12] = { + 0xccc52971U,0xecec196aU,0x48b0a77aU,0x581a0db2U,0xf4372ddfU,0xc7634d81U, + 0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU +}; +#else +/* The low half of the order-2 of the P384 curve. */ +static const uint32_t p384_order_low[6] = { + 0xccc52971U,0xecec196aU,0x48b0a77aU,0x581a0db2U,0xf4372ddfU,0xc7634d81U + +}; +#endif /* WOLFSSL_SP_SMALL */ + +/* Multiply two number mod the order of P384 curve. (r = a * b mod order) + * + * r Result of the multiplication. + * a First operand of the multiplication. + * b Second operand of the multiplication. + */ +static void sp_384_mont_mul_order_12(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_384_mul_12(r, a, b); + sp_384_mont_reduce_order_12(r, p384_order, p384_mp_order); +} + +/* Square number mod the order of P384 curve. (r = a * a mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_384_mont_sqr_order_12(sp_digit* r, const sp_digit* a) +{ + sp_384_sqr_12(r, a); + sp_384_mont_reduce_order_12(r, p384_order, p384_mp_order); +} + +#ifndef WOLFSSL_SP_SMALL +/* Square number mod the order of P384 curve a number of times. + * (r = a ^ n mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_384_mont_sqr_n_order_12(sp_digit* r, const sp_digit* a, int n) +{ + int i; + + sp_384_mont_sqr_order_12(r, a); + for (i=1; i=0; i--) { + sp_384_mont_sqr_order_12(t, t); + if ((p384_order_minus_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_384_mont_mul_order_12(t, t, a); + } + } + XMEMCPY(r, t, sizeof(sp_digit) * 12U); +#else + sp_digit* t = td; + sp_digit* t2 = td + 2 * 12; + sp_digit* t3 = td + 4 * 12; + int i; + + /* t = a^2 */ + sp_384_mont_sqr_order_12(t, a); + /* t = a^3 = t * a */ + sp_384_mont_mul_order_12(t, t, a); + /* t2= a^c = t ^ 2 ^ 2 */ + sp_384_mont_sqr_n_order_12(t2, t, 2); + /* t = a^f = t2 * t */ + sp_384_mont_mul_order_12(t, t2, t); + /* t2= a^f0 = t ^ 2 ^ 4 */ + sp_384_mont_sqr_n_order_12(t2, t, 4); + /* t = a^ff = t2 * t */ + sp_384_mont_mul_order_12(t, t2, t); + /* t2= a^ff00 = t ^ 2 ^ 8 */ + sp_384_mont_sqr_n_order_12(t2, t, 8); + /* t3= a^ffff = t2 * t */ + sp_384_mont_mul_order_12(t3, t2, t); + /* t2= a^ffff0000 = t3 ^ 2 ^ 16 */ + sp_384_mont_sqr_n_order_12(t2, t3, 16); + /* t = a^ffffffff = t2 * t3 */ + sp_384_mont_mul_order_12(t, t2, t3); + /* t2= a^ffffffff0000 = t ^ 2 ^ 16 */ + sp_384_mont_sqr_n_order_12(t2, t, 16); + /* t = a^ffffffffffff = t2 * t3 */ + sp_384_mont_mul_order_12(t, t2, t3); + /* t2= a^ffffffffffff000000000000 = t ^ 2 ^ 48 */ + sp_384_mont_sqr_n_order_12(t2, t, 48); + /* t= a^fffffffffffffffffffffffff = t2 * t */ + sp_384_mont_mul_order_12(t, t2, t); + /* t2= a^ffffffffffffffffffffffff000000000000000000000000 */ + sp_384_mont_sqr_n_order_12(t2, t, 96); + /* t2= a^ffffffffffffffffffffffffffffffffffffffffffffffff = t2 * t */ + sp_384_mont_mul_order_12(t2, t2, t); + for (i=191; i>=1; i--) { + sp_384_mont_sqr_order_12(t2, t2); + if (((sp_digit)p384_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_384_mont_mul_order_12(t2, t2, a); + } + } + sp_384_mont_sqr_order_12(t2, t2); + sp_384_mont_mul_order_12(r, t2, a); +#endif /* WOLFSSL_SP_SMALL */ +} + +#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */ +#ifdef HAVE_ECC_SIGN +#ifndef SP_ECC_MAX_SIG_GEN +#define SP_ECC_MAX_SIG_GEN 64 +#endif + +/* Sign the hash using the private key. + * e = [hash, 384 bits] from binary + * r = (k.G)->x mod order + * s = (r * x + e) / k mod order + * The hash is truncated to the first 384 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv, + mp_int* rm, mp_int* sm, mp_int* km, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit ed[2*12]; + sp_digit xd[2*12]; + sp_digit kd[2*12]; + sp_digit rd[2*12]; + sp_digit td[3 * 2*12]; + sp_point_384 p; +#endif + sp_digit* e = NULL; + sp_digit* x = NULL; + sp_digit* k = NULL; + sp_digit* r = NULL; + sp_digit* tmp = NULL; + sp_point_384* point = NULL; + sp_digit carry; + sp_digit* s = NULL; + sp_digit* kInv = NULL; + int err = MP_OKAY; + int32_t c; + int i; + + (void)heap; + + err = sp_384_point_new_12(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 12, heap, + DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + e = d + 0 * 12; + x = d + 2 * 12; + k = d + 4 * 12; + r = d + 6 * 12; + tmp = d + 8 * 12; +#else + e = ed; + x = xd; + k = kd; + r = rd; + tmp = td; +#endif + s = e; + kInv = k; + + if (hashLen > 48U) { + hashLen = 48U; + } + + sp_384_from_bin(e, 12, hash, (int)hashLen); + } + + for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) { + sp_384_from_mp(x, 12, priv); + + /* New random point. */ + if (km == NULL || mp_iszero(km)) { + err = sp_384_ecc_gen_k_12(rng, k); + } + else { + sp_384_from_mp(k, 12, km); + mp_zero(km); + } + if (err == MP_OKAY) { + err = sp_384_ecc_mulmod_base_12(point, k, 1, NULL); + } + + if (err == MP_OKAY) { + /* r = point->x mod order */ + XMEMCPY(r, point->x, sizeof(sp_digit) * 12U); + sp_384_norm_12(r); + c = sp_384_cmp_12(r, p384_order); + sp_384_cond_sub_12(r, r, p384_order, 0L - (sp_digit)(c >= 0)); + sp_384_norm_12(r); + + /* Conv k to Montgomery form (mod order) */ + sp_384_mul_12(k, k, p384_norm_order); + err = sp_384_mod_12(k, k, p384_order); + } + if (err == MP_OKAY) { + sp_384_norm_12(k); + /* kInv = 1/k mod order */ + sp_384_mont_inv_order_12(kInv, k, tmp); + sp_384_norm_12(kInv); + + /* s = r * x + e */ + sp_384_mul_12(x, x, r); + err = sp_384_mod_12(x, x, p384_order); + } + if (err == MP_OKAY) { + sp_384_norm_12(x); + carry = sp_384_add_12(s, e, x); + sp_384_cond_sub_12(s, s, p384_order, 0 - carry); + sp_384_norm_12(s); + c = sp_384_cmp_12(s, p384_order); + sp_384_cond_sub_12(s, s, p384_order, 0L - (sp_digit)(c >= 0)); + sp_384_norm_12(s); + + /* s = s * k^-1 mod order */ + sp_384_mont_mul_order_12(s, s, kInv); + sp_384_norm_12(s); + + /* Check that signature is usable. */ + if (sp_384_iszero_12(s) == 0) { + break; + } + } + } + + if (i == 0) { + err = RNG_FAILURE_E; + } + + if (err == MP_OKAY) { + err = sp_384_to_mp(r, rm); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(s, sm); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 8 * 12); + XFREE(d, heap, DYNAMIC_TYPE_ECC); + } +#else + XMEMSET(e, 0, sizeof(sp_digit) * 2U * 12U); + XMEMSET(x, 0, sizeof(sp_digit) * 2U * 12U); + XMEMSET(k, 0, sizeof(sp_digit) * 2U * 12U); + XMEMSET(r, 0, sizeof(sp_digit) * 2U * 12U); + XMEMSET(r, 0, sizeof(sp_digit) * 2U * 12U); + XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 12U); +#endif + sp_384_point_free_12(point, 1, heap); + + return err; +} +#endif /* HAVE_ECC_SIGN */ + +#ifdef HAVE_ECC_VERIFY +/* Verify the signature values with the hash and public key. + * e = Truncate(hash, 384) + * u1 = e/s mod order + * u2 = r/s mod order + * r == (u1.G + u2.Q)->x mod order + * Optimization: Leave point in projective form. + * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z') + * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' + * The hash is truncated to the first 384 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +int sp_ecc_verify_384(const byte* hash, word32 hashLen, mp_int* pX, + mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit u1d[2*12]; + sp_digit u2d[2*12]; + sp_digit sd[2*12]; + sp_digit tmpd[2*12 * 5]; + sp_point_384 p1d; + sp_point_384 p2d; +#endif + sp_digit* u1 = NULL; + sp_digit* u2 = NULL; + sp_digit* s = NULL; + sp_digit* tmp = NULL; + sp_point_384* p1; + sp_point_384* p2 = NULL; + sp_digit carry; + int32_t c; + int err; + + err = sp_384_point_new_12(heap, p1d, p1); + if (err == MP_OKAY) { + err = sp_384_point_new_12(heap, p2d, p2); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 12, heap, + DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + u1 = d + 0 * 12; + u2 = d + 2 * 12; + s = d + 4 * 12; + tmp = d + 6 * 12; +#else + u1 = u1d; + u2 = u2d; + s = sd; + tmp = tmpd; +#endif + + if (hashLen > 48U) { + hashLen = 48U; + } + + sp_384_from_bin(u1, 12, hash, (int)hashLen); + sp_384_from_mp(u2, 12, r); + sp_384_from_mp(s, 12, sm); + sp_384_from_mp(p2->x, 12, pX); + sp_384_from_mp(p2->y, 12, pY); + sp_384_from_mp(p2->z, 12, pZ); + + { + sp_384_mul_12(s, s, p384_norm_order); + } + err = sp_384_mod_12(s, s, p384_order); + } + if (err == MP_OKAY) { + sp_384_norm_12(s); + { + sp_384_mont_inv_order_12(s, s, tmp); + sp_384_mont_mul_order_12(u1, u1, s); + sp_384_mont_mul_order_12(u2, u2, s); + } + + err = sp_384_ecc_mulmod_base_12(p1, u1, 0, heap); + } + if (err == MP_OKAY) { + err = sp_384_ecc_mulmod_12(p2, p2, u2, 0, heap); + } + + if (err == MP_OKAY) { + { + sp_384_proj_point_add_12(p1, p1, p2, tmp); + if (sp_384_iszero_12(p1->z)) { + if (sp_384_iszero_12(p1->x) && sp_384_iszero_12(p1->y)) { + sp_384_proj_point_dbl_12(p1, p2, tmp); + } + else { + /* Y ordinate is not used from here - don't set. */ + p1->x[0] = 0; + p1->x[1] = 0; + p1->x[2] = 0; + p1->x[3] = 0; + p1->x[4] = 0; + p1->x[5] = 0; + p1->x[6] = 0; + p1->x[7] = 0; + p1->x[8] = 0; + p1->x[9] = 0; + p1->x[10] = 0; + p1->x[11] = 0; + XMEMCPY(p1->z, p384_norm_mod, sizeof(p384_norm_mod)); + } + } + } + + /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ + /* Reload r and convert to Montgomery form. */ + sp_384_from_mp(u2, 12, r); + err = sp_384_mod_mul_norm_12(u2, u2, p384_mod); + } + + if (err == MP_OKAY) { + /* u1 = r.z'.z' mod prime */ + sp_384_mont_sqr_12(p1->z, p1->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(u1, u2, p1->z, p384_mod, p384_mp_mod); + *res = (int)(sp_384_cmp_12(p1->x, u1) == 0); + if (*res == 0) { + /* Reload r and add order. */ + sp_384_from_mp(u2, 12, r); + carry = sp_384_add_12(u2, u2, p384_order); + /* Carry means result is greater than mod and is not valid. */ + if (carry == 0) { + sp_384_norm_12(u2); + + /* Compare with mod and if greater or equal then not valid. */ + c = sp_384_cmp_12(u2, p384_mod); + if (c < 0) { + /* Convert to Montogomery form */ + err = sp_384_mod_mul_norm_12(u2, u2, p384_mod); + if (err == MP_OKAY) { + /* u1 = (r + 1*order).z'.z' mod prime */ + sp_384_mont_mul_12(u1, u2, p1->z, p384_mod, + p384_mp_mod); + *res = (int)(sp_384_cmp_12(p1->x, u1) == 0); + } + } + } + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) + XFREE(d, heap, DYNAMIC_TYPE_ECC); +#endif + sp_384_point_free_12(p1, 0, heap); + sp_384_point_free_12(p2, 0, heap); + + return err; +} +#endif /* HAVE_ECC_VERIFY */ + +#ifdef HAVE_ECC_CHECK_KEY +/* Check that the x and y oridinates are a valid point on the curve. + * + * point EC point. + * heap Heap to use if dynamically allocating. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +static int sp_384_ecc_is_point_12(sp_point_384* point, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit t1d[2*12]; + sp_digit t2d[2*12]; +#endif + sp_digit* t1; + sp_digit* t2; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12 * 4, heap, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = d + 0 * 12; + t2 = d + 2 * 12; +#else + (void)heap; + + t1 = t1d; + t2 = t2d; +#endif + + sp_384_sqr_12(t1, point->y); + (void)sp_384_mod_12(t1, t1, p384_mod); + sp_384_sqr_12(t2, point->x); + (void)sp_384_mod_12(t2, t2, p384_mod); + sp_384_mul_12(t2, t2, point->x); + (void)sp_384_mod_12(t2, t2, p384_mod); + (void)sp_384_sub_12(t2, p384_mod, t2); + sp_384_mont_add_12(t1, t1, t2, p384_mod); + + sp_384_mont_add_12(t1, t1, point->x, p384_mod); + sp_384_mont_add_12(t1, t1, point->x, p384_mod); + sp_384_mont_add_12(t1, t1, point->x, p384_mod); + + if (sp_384_cmp_12(t1, p384_b) != 0) { + err = MP_VAL; + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, heap, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + +/* Check that the x and y oridinates are a valid point on the curve. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +int sp_ecc_is_point_384(mp_int* pX, mp_int* pY) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 pubd; +#endif + sp_point_384* pub; + byte one[1] = { 1 }; + int err; + + err = sp_384_point_new_12(NULL, pubd, pub); + if (err == MP_OKAY) { + sp_384_from_mp(pub->x, 12, pX); + sp_384_from_mp(pub->y, 12, pY); + sp_384_from_bin(pub->z, 12, one, (int)sizeof(one)); + + err = sp_384_ecc_is_point_12(pub, NULL); + } + + sp_384_point_free_12(pub, 0, NULL); + + return err; +} + +/* Check that the private scalar generates the EC point (px, py), the point is + * on the curve and the point has the correct order. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * privm Private scalar that generates EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve, ECC_INF_E if the point does not have the correct order, + * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and + * MP_OKAY otherwise. + */ +int sp_ecc_check_key_384(mp_int* pX, mp_int* pY, mp_int* privm, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit privd[12]; + sp_point_384 pubd; + sp_point_384 pd; +#endif + sp_digit* priv = NULL; + sp_point_384* pub; + sp_point_384* p = NULL; + byte one[1] = { 1 }; + int err; + + err = sp_384_point_new_12(heap, pubd, pub); + if (err == MP_OKAY) { + err = sp_384_point_new_12(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap, + DYNAMIC_TYPE_ECC); + if (priv == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + priv = privd; +#endif + + sp_384_from_mp(pub->x, 12, pX); + sp_384_from_mp(pub->y, 12, pY); + sp_384_from_bin(pub->z, 12, one, (int)sizeof(one)); + sp_384_from_mp(priv, 12, privm); + + /* Check point at infinitiy. */ + if ((sp_384_iszero_12(pub->x) != 0) && + (sp_384_iszero_12(pub->y) != 0)) { + err = ECC_INF_E; + } + } + + if (err == MP_OKAY) { + /* Check range of X and Y */ + if (sp_384_cmp_12(pub->x, p384_mod) >= 0 || + sp_384_cmp_12(pub->y, p384_mod) >= 0) { + err = ECC_OUT_OF_RANGE_E; + } + } + + if (err == MP_OKAY) { + /* Check point is on curve */ + err = sp_384_ecc_is_point_12(pub, heap); + } + + if (err == MP_OKAY) { + /* Point * order = infinity */ + err = sp_384_ecc_mulmod_12(p, pub, p384_order, 1, heap); + } + if (err == MP_OKAY) { + /* Check result is infinity */ + if ((sp_384_iszero_12(p->x) == 0) || + (sp_384_iszero_12(p->y) == 0)) { + err = ECC_INF_E; + } + } + + if (err == MP_OKAY) { + /* Base * private = point */ + err = sp_384_ecc_mulmod_base_12(p, priv, 1, heap); + } + if (err == MP_OKAY) { + /* Check result is public key */ + if (sp_384_cmp_12(p->x, pub->x) != 0 || + sp_384_cmp_12(p->y, pub->y) != 0) { + err = ECC_PRIV_KEY_E; + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (priv != NULL) { + XFREE(priv, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_12(p, 0, heap); + sp_384_point_free_12(pub, 0, heap); + + return err; +} +#endif +#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL +/* Add two projective EC points together. + * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ) + * + * pX First EC point's X ordinate. + * pY First EC point's Y ordinate. + * pZ First EC point's Z ordinate. + * qX Second EC point's X ordinate. + * qY Second EC point's Y ordinate. + * qZ Second EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* qX, mp_int* qY, mp_int* qZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 12 * 5]; + sp_point_384 pd; + sp_point_384 qd; +#endif + sp_digit* tmp; + sp_point_384* p; + sp_point_384* q = NULL; + int err; + + err = sp_384_point_new_12(NULL, pd, p); + if (err == MP_OKAY) { + err = sp_384_point_new_12(NULL, qd, q); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 5, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + sp_384_from_mp(p->x, 12, pX); + sp_384_from_mp(p->y, 12, pY); + sp_384_from_mp(p->z, 12, pZ); + sp_384_from_mp(q->x, 12, qX); + sp_384_from_mp(q->y, 12, qY); + sp_384_from_mp(q->z, 12, qZ); + + sp_384_proj_point_add_12(p, p, q, tmp); + } + + if (err == MP_OKAY) { + err = sp_384_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->z, rZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_12(q, 0, NULL); + sp_384_point_free_12(p, 0, NULL); + + return err; +} + +/* Double a projective EC point. + * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ) + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 12 * 2]; + sp_point_384 pd; +#endif + sp_digit* tmp; + sp_point_384* p; + int err; + + err = sp_384_point_new_12(NULL, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 2, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + sp_384_from_mp(p->x, 12, pX); + sp_384_from_mp(p->y, 12, pY); + sp_384_from_mp(p->z, 12, pZ); + + sp_384_proj_point_dbl_12(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_384_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->z, rZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_12(p, 0, NULL); + + return err; +} + +/* Map a projective EC point to affine in place. + * pZ will be one. + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 12 * 6]; + sp_point_384 pd; +#endif + sp_digit* tmp; + sp_point_384* p; + int err; + + err = sp_384_point_new_12(NULL, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + if (err == MP_OKAY) { + sp_384_from_mp(p->x, 12, pX); + sp_384_from_mp(p->y, 12, pY); + sp_384_from_mp(p->z, 12, pZ); + + sp_384_map_12(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_384_to_mp(p->x, pX); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->y, pY); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->z, pZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_12(p, 0, NULL); + + return err; +} +#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */ +#ifdef HAVE_COMP_KEY +/* Find the square root of a number mod the prime of the curve. + * + * y The number to operate on and the result. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +static int sp_384_mont_sqrt_12(sp_digit* y) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d; +#else + sp_digit t1d[2 * 12]; + sp_digit t2d[2 * 12]; + sp_digit t3d[2 * 12]; + sp_digit t4d[2 * 12]; + sp_digit t5d[2 * 12]; +#endif + sp_digit* t1; + sp_digit* t2; + sp_digit* t3; + sp_digit* t4; + sp_digit* t5; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 2 * 12, NULL, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = d + 0 * 12; + t2 = d + 2 * 12; + t3 = d + 4 * 12; + t4 = d + 6 * 12; + t5 = d + 8 * 12; +#else + t1 = t1d; + t2 = t2d; + t3 = t3d; + t4 = t4d; + t5 = t5d; +#endif + + { + /* t2 = y ^ 0x2 */ + sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x3 */ + sp_384_mont_mul_12(t1, t2, y, p384_mod, p384_mp_mod); + /* t5 = y ^ 0xc */ + sp_384_mont_sqr_n_12(t5, t1, 2, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xf */ + sp_384_mont_mul_12(t1, t1, t5, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x1e */ + sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod); + /* t3 = y ^ 0x1f */ + sp_384_mont_mul_12(t3, t2, y, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3e0 */ + sp_384_mont_sqr_n_12(t2, t3, 5, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x3ff */ + sp_384_mont_mul_12(t1, t3, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x7fe0 */ + sp_384_mont_sqr_n_12(t2, t1, 5, p384_mod, p384_mp_mod); + /* t3 = y ^ 0x7fff */ + sp_384_mont_mul_12(t3, t3, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3fff800 */ + sp_384_mont_sqr_n_12(t2, t3, 15, p384_mod, p384_mp_mod); + /* t4 = y ^ 0x3ffffff */ + sp_384_mont_mul_12(t4, t3, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0xffffffc000000 */ + sp_384_mont_sqr_n_12(t2, t4, 30, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xfffffffffffff */ + sp_384_mont_mul_12(t1, t4, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0xfffffffffffffff000000000000000 */ + sp_384_mont_sqr_n_12(t2, t1, 60, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xffffffffffffffffffffffffffffff */ + sp_384_mont_mul_12(t1, t1, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */ + sp_384_mont_sqr_n_12(t2, t1, 120, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_384_mont_mul_12(t1, t1, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */ + sp_384_mont_sqr_n_12(t2, t1, 15, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_384_mont_mul_12(t1, t3, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff80000000 */ + sp_384_mont_sqr_n_12(t2, t1, 31, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff */ + sp_384_mont_mul_12(t1, t4, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff0 */ + sp_384_mont_sqr_n_12(t2, t1, 4, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc */ + sp_384_mont_mul_12(t1, t5, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000 */ + sp_384_mont_sqr_n_12(t2, t1, 62, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000001 */ + sp_384_mont_mul_12(t1, y, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc00000000000000040000000 */ + sp_384_mont_sqr_n_12(y, t1, 30, p384_mod, p384_mp_mod); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + + +/* Uncompress the point given the X ordinate. + * + * xm X ordinate. + * odd Whether the Y ordinate is odd. + * ym Calculated Y ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d; +#else + sp_digit xd[2 * 12]; + sp_digit yd[2 * 12]; +#endif + sp_digit* x = NULL; + sp_digit* y = NULL; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 12, NULL, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + x = d + 0 * 12; + y = d + 2 * 12; +#else + x = xd; + y = yd; +#endif + + sp_384_from_mp(x, 12, xm); + err = sp_384_mod_mul_norm_12(x, x, p384_mod); + } + if (err == MP_OKAY) { + /* y = x^3 */ + { + sp_384_mont_sqr_12(y, x, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(y, y, x, p384_mod, p384_mp_mod); + } + /* y = x^3 - 3x */ + sp_384_mont_sub_12(y, y, x, p384_mod); + sp_384_mont_sub_12(y, y, x, p384_mod); + sp_384_mont_sub_12(y, y, x, p384_mod); + /* y = x^3 - 3x + b */ + err = sp_384_mod_mul_norm_12(x, p384_b, p384_mod); + } + if (err == MP_OKAY) { + sp_384_mont_add_12(y, y, x, p384_mod); + /* y = sqrt(x^3 - 3x + b) */ + err = sp_384_mont_sqrt_12(y); + } + if (err == MP_OKAY) { + XMEMSET(y + 12, 0, 12U * sizeof(sp_digit)); + sp_384_mont_reduce_12(y, p384_mod, p384_mp_mod); + if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) { + sp_384_mont_sub_12(y, p384_mod, y, p384_mod); + } + + err = sp_384_to_mp(y, ym); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} +#endif +#endif /* WOLFSSL_SP_384 */ +#endif /* WOLFSSL_HAVE_SP_ECC */ +#endif /* WOLFSSL_SP_ARM_THUMB_ASM */ +#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_ECC */ diff --git a/client/wolfssl/wolfcrypt/src/sp_c32.c b/client/wolfssl/wolfcrypt/src/sp_c32.c new file mode 100644 index 0000000..4b9596d --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/sp_c32.c @@ -0,0 +1,23857 @@ +/* sp.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* Implementation by Sean Parkinson. */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include +#include +#include +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) || \ + defined(WOLFSSL_HAVE_SP_ECC) + +#ifdef RSA_LOW_MEM +#ifndef SP_RSA_PRIVATE_EXP_D +#define SP_RSA_PRIVATE_EXP_D +#endif + +#ifndef WOLFSSL_SP_SMALL +#define WOLFSSL_SP_SMALL +#endif +#endif + +#include + +#ifndef WOLFSSL_SP_ASM +#if SP_WORD_SIZE == 32 +#if (defined(WOLFSSL_SP_CACHE_RESISTANT) || defined(WOLFSSL_SP_SMALL)) && (defined(WOLFSSL_HAVE_SP_ECC) || !defined(WOLFSSL_RSA_PUBLIC_ONLY)) +/* Mask for address to obfuscate which of the two address will be used. */ +static const size_t addr_mask[2] = { 0, (size_t)-1 }; +#endif + +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) +#ifndef WOLFSSL_SP_NO_2048 +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_2048_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((sp_digit)a[i]) << s); + if (s >= 15U) { + r[j] &= 0x7fffff; + s = 23U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (sp_digit)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 23 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 23 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0x7fffff; + s = 23U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 23U) <= (word32)DIGIT_BIT) { + s += 23U; + r[j] &= 0x7fffff; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 23) { + r[j] &= 0x7fffff; + if (j + 1 >= size) { + break; + } + s = 23 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 256 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_2048_to_bin(sp_digit* r, byte* a) +{ + int i, j, s = 0, b; + + for (i=0; i<89; i++) { + r[i+1] += r[i] >> 23; + r[i] &= 0x7fffff; + } + j = 2048 / 8 - 1; + a[j] = 0; + for (i=0; i<90 && j>=0; i++) { + b = 0; + /* lint allow cast of mismatch sp_digit and int */ + a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ + b += 8 - s; + if (j < 0) { + break; + } + while (b < 23) { + a[j--] = (byte)(r[i] >> b); + b += 8; + if (j < 0) { + break; + } + } + s = 8 - (b - 23); + if (j >= 0) { + a[j] = 0; + } + if (s != 0) { + j++; + } + } +} + +#ifndef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_2048_mul_15(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int64_t t0 = ((int64_t)a[ 0]) * b[ 0]; + int64_t t1 = ((int64_t)a[ 0]) * b[ 1] + + ((int64_t)a[ 1]) * b[ 0]; + int64_t t2 = ((int64_t)a[ 0]) * b[ 2] + + ((int64_t)a[ 1]) * b[ 1] + + ((int64_t)a[ 2]) * b[ 0]; + int64_t t3 = ((int64_t)a[ 0]) * b[ 3] + + ((int64_t)a[ 1]) * b[ 2] + + ((int64_t)a[ 2]) * b[ 1] + + ((int64_t)a[ 3]) * b[ 0]; + int64_t t4 = ((int64_t)a[ 0]) * b[ 4] + + ((int64_t)a[ 1]) * b[ 3] + + ((int64_t)a[ 2]) * b[ 2] + + ((int64_t)a[ 3]) * b[ 1] + + ((int64_t)a[ 4]) * b[ 0]; + int64_t t5 = ((int64_t)a[ 0]) * b[ 5] + + ((int64_t)a[ 1]) * b[ 4] + + ((int64_t)a[ 2]) * b[ 3] + + ((int64_t)a[ 3]) * b[ 2] + + ((int64_t)a[ 4]) * b[ 1] + + ((int64_t)a[ 5]) * b[ 0]; + int64_t t6 = ((int64_t)a[ 0]) * b[ 6] + + ((int64_t)a[ 1]) * b[ 5] + + ((int64_t)a[ 2]) * b[ 4] + + ((int64_t)a[ 3]) * b[ 3] + + ((int64_t)a[ 4]) * b[ 2] + + ((int64_t)a[ 5]) * b[ 1] + + ((int64_t)a[ 6]) * b[ 0]; + int64_t t7 = ((int64_t)a[ 0]) * b[ 7] + + ((int64_t)a[ 1]) * b[ 6] + + ((int64_t)a[ 2]) * b[ 5] + + ((int64_t)a[ 3]) * b[ 4] + + ((int64_t)a[ 4]) * b[ 3] + + ((int64_t)a[ 5]) * b[ 2] + + ((int64_t)a[ 6]) * b[ 1] + + ((int64_t)a[ 7]) * b[ 0]; + int64_t t8 = ((int64_t)a[ 0]) * b[ 8] + + ((int64_t)a[ 1]) * b[ 7] + + ((int64_t)a[ 2]) * b[ 6] + + ((int64_t)a[ 3]) * b[ 5] + + ((int64_t)a[ 4]) * b[ 4] + + ((int64_t)a[ 5]) * b[ 3] + + ((int64_t)a[ 6]) * b[ 2] + + ((int64_t)a[ 7]) * b[ 1] + + ((int64_t)a[ 8]) * b[ 0]; + int64_t t9 = ((int64_t)a[ 0]) * b[ 9] + + ((int64_t)a[ 1]) * b[ 8] + + ((int64_t)a[ 2]) * b[ 7] + + ((int64_t)a[ 3]) * b[ 6] + + ((int64_t)a[ 4]) * b[ 5] + + ((int64_t)a[ 5]) * b[ 4] + + ((int64_t)a[ 6]) * b[ 3] + + ((int64_t)a[ 7]) * b[ 2] + + ((int64_t)a[ 8]) * b[ 1] + + ((int64_t)a[ 9]) * b[ 0]; + int64_t t10 = ((int64_t)a[ 0]) * b[10] + + ((int64_t)a[ 1]) * b[ 9] + + ((int64_t)a[ 2]) * b[ 8] + + ((int64_t)a[ 3]) * b[ 7] + + ((int64_t)a[ 4]) * b[ 6] + + ((int64_t)a[ 5]) * b[ 5] + + ((int64_t)a[ 6]) * b[ 4] + + ((int64_t)a[ 7]) * b[ 3] + + ((int64_t)a[ 8]) * b[ 2] + + ((int64_t)a[ 9]) * b[ 1] + + ((int64_t)a[10]) * b[ 0]; + int64_t t11 = ((int64_t)a[ 0]) * b[11] + + ((int64_t)a[ 1]) * b[10] + + ((int64_t)a[ 2]) * b[ 9] + + ((int64_t)a[ 3]) * b[ 8] + + ((int64_t)a[ 4]) * b[ 7] + + ((int64_t)a[ 5]) * b[ 6] + + ((int64_t)a[ 6]) * b[ 5] + + ((int64_t)a[ 7]) * b[ 4] + + ((int64_t)a[ 8]) * b[ 3] + + ((int64_t)a[ 9]) * b[ 2] + + ((int64_t)a[10]) * b[ 1] + + ((int64_t)a[11]) * b[ 0]; + int64_t t12 = ((int64_t)a[ 0]) * b[12] + + ((int64_t)a[ 1]) * b[11] + + ((int64_t)a[ 2]) * b[10] + + ((int64_t)a[ 3]) * b[ 9] + + ((int64_t)a[ 4]) * b[ 8] + + ((int64_t)a[ 5]) * b[ 7] + + ((int64_t)a[ 6]) * b[ 6] + + ((int64_t)a[ 7]) * b[ 5] + + ((int64_t)a[ 8]) * b[ 4] + + ((int64_t)a[ 9]) * b[ 3] + + ((int64_t)a[10]) * b[ 2] + + ((int64_t)a[11]) * b[ 1] + + ((int64_t)a[12]) * b[ 0]; + int64_t t13 = ((int64_t)a[ 0]) * b[13] + + ((int64_t)a[ 1]) * b[12] + + ((int64_t)a[ 2]) * b[11] + + ((int64_t)a[ 3]) * b[10] + + ((int64_t)a[ 4]) * b[ 9] + + ((int64_t)a[ 5]) * b[ 8] + + ((int64_t)a[ 6]) * b[ 7] + + ((int64_t)a[ 7]) * b[ 6] + + ((int64_t)a[ 8]) * b[ 5] + + ((int64_t)a[ 9]) * b[ 4] + + ((int64_t)a[10]) * b[ 3] + + ((int64_t)a[11]) * b[ 2] + + ((int64_t)a[12]) * b[ 1] + + ((int64_t)a[13]) * b[ 0]; + int64_t t14 = ((int64_t)a[ 0]) * b[14] + + ((int64_t)a[ 1]) * b[13] + + ((int64_t)a[ 2]) * b[12] + + ((int64_t)a[ 3]) * b[11] + + ((int64_t)a[ 4]) * b[10] + + ((int64_t)a[ 5]) * b[ 9] + + ((int64_t)a[ 6]) * b[ 8] + + ((int64_t)a[ 7]) * b[ 7] + + ((int64_t)a[ 8]) * b[ 6] + + ((int64_t)a[ 9]) * b[ 5] + + ((int64_t)a[10]) * b[ 4] + + ((int64_t)a[11]) * b[ 3] + + ((int64_t)a[12]) * b[ 2] + + ((int64_t)a[13]) * b[ 1] + + ((int64_t)a[14]) * b[ 0]; + int64_t t15 = ((int64_t)a[ 1]) * b[14] + + ((int64_t)a[ 2]) * b[13] + + ((int64_t)a[ 3]) * b[12] + + ((int64_t)a[ 4]) * b[11] + + ((int64_t)a[ 5]) * b[10] + + ((int64_t)a[ 6]) * b[ 9] + + ((int64_t)a[ 7]) * b[ 8] + + ((int64_t)a[ 8]) * b[ 7] + + ((int64_t)a[ 9]) * b[ 6] + + ((int64_t)a[10]) * b[ 5] + + ((int64_t)a[11]) * b[ 4] + + ((int64_t)a[12]) * b[ 3] + + ((int64_t)a[13]) * b[ 2] + + ((int64_t)a[14]) * b[ 1]; + int64_t t16 = ((int64_t)a[ 2]) * b[14] + + ((int64_t)a[ 3]) * b[13] + + ((int64_t)a[ 4]) * b[12] + + ((int64_t)a[ 5]) * b[11] + + ((int64_t)a[ 6]) * b[10] + + ((int64_t)a[ 7]) * b[ 9] + + ((int64_t)a[ 8]) * b[ 8] + + ((int64_t)a[ 9]) * b[ 7] + + ((int64_t)a[10]) * b[ 6] + + ((int64_t)a[11]) * b[ 5] + + ((int64_t)a[12]) * b[ 4] + + ((int64_t)a[13]) * b[ 3] + + ((int64_t)a[14]) * b[ 2]; + int64_t t17 = ((int64_t)a[ 3]) * b[14] + + ((int64_t)a[ 4]) * b[13] + + ((int64_t)a[ 5]) * b[12] + + ((int64_t)a[ 6]) * b[11] + + ((int64_t)a[ 7]) * b[10] + + ((int64_t)a[ 8]) * b[ 9] + + ((int64_t)a[ 9]) * b[ 8] + + ((int64_t)a[10]) * b[ 7] + + ((int64_t)a[11]) * b[ 6] + + ((int64_t)a[12]) * b[ 5] + + ((int64_t)a[13]) * b[ 4] + + ((int64_t)a[14]) * b[ 3]; + int64_t t18 = ((int64_t)a[ 4]) * b[14] + + ((int64_t)a[ 5]) * b[13] + + ((int64_t)a[ 6]) * b[12] + + ((int64_t)a[ 7]) * b[11] + + ((int64_t)a[ 8]) * b[10] + + ((int64_t)a[ 9]) * b[ 9] + + ((int64_t)a[10]) * b[ 8] + + ((int64_t)a[11]) * b[ 7] + + ((int64_t)a[12]) * b[ 6] + + ((int64_t)a[13]) * b[ 5] + + ((int64_t)a[14]) * b[ 4]; + int64_t t19 = ((int64_t)a[ 5]) * b[14] + + ((int64_t)a[ 6]) * b[13] + + ((int64_t)a[ 7]) * b[12] + + ((int64_t)a[ 8]) * b[11] + + ((int64_t)a[ 9]) * b[10] + + ((int64_t)a[10]) * b[ 9] + + ((int64_t)a[11]) * b[ 8] + + ((int64_t)a[12]) * b[ 7] + + ((int64_t)a[13]) * b[ 6] + + ((int64_t)a[14]) * b[ 5]; + int64_t t20 = ((int64_t)a[ 6]) * b[14] + + ((int64_t)a[ 7]) * b[13] + + ((int64_t)a[ 8]) * b[12] + + ((int64_t)a[ 9]) * b[11] + + ((int64_t)a[10]) * b[10] + + ((int64_t)a[11]) * b[ 9] + + ((int64_t)a[12]) * b[ 8] + + ((int64_t)a[13]) * b[ 7] + + ((int64_t)a[14]) * b[ 6]; + int64_t t21 = ((int64_t)a[ 7]) * b[14] + + ((int64_t)a[ 8]) * b[13] + + ((int64_t)a[ 9]) * b[12] + + ((int64_t)a[10]) * b[11] + + ((int64_t)a[11]) * b[10] + + ((int64_t)a[12]) * b[ 9] + + ((int64_t)a[13]) * b[ 8] + + ((int64_t)a[14]) * b[ 7]; + int64_t t22 = ((int64_t)a[ 8]) * b[14] + + ((int64_t)a[ 9]) * b[13] + + ((int64_t)a[10]) * b[12] + + ((int64_t)a[11]) * b[11] + + ((int64_t)a[12]) * b[10] + + ((int64_t)a[13]) * b[ 9] + + ((int64_t)a[14]) * b[ 8]; + int64_t t23 = ((int64_t)a[ 9]) * b[14] + + ((int64_t)a[10]) * b[13] + + ((int64_t)a[11]) * b[12] + + ((int64_t)a[12]) * b[11] + + ((int64_t)a[13]) * b[10] + + ((int64_t)a[14]) * b[ 9]; + int64_t t24 = ((int64_t)a[10]) * b[14] + + ((int64_t)a[11]) * b[13] + + ((int64_t)a[12]) * b[12] + + ((int64_t)a[13]) * b[11] + + ((int64_t)a[14]) * b[10]; + int64_t t25 = ((int64_t)a[11]) * b[14] + + ((int64_t)a[12]) * b[13] + + ((int64_t)a[13]) * b[12] + + ((int64_t)a[14]) * b[11]; + int64_t t26 = ((int64_t)a[12]) * b[14] + + ((int64_t)a[13]) * b[13] + + ((int64_t)a[14]) * b[12]; + int64_t t27 = ((int64_t)a[13]) * b[14] + + ((int64_t)a[14]) * b[13]; + int64_t t28 = ((int64_t)a[14]) * b[14]; + + t1 += t0 >> 23; r[ 0] = t0 & 0x7fffff; + t2 += t1 >> 23; r[ 1] = t1 & 0x7fffff; + t3 += t2 >> 23; r[ 2] = t2 & 0x7fffff; + t4 += t3 >> 23; r[ 3] = t3 & 0x7fffff; + t5 += t4 >> 23; r[ 4] = t4 & 0x7fffff; + t6 += t5 >> 23; r[ 5] = t5 & 0x7fffff; + t7 += t6 >> 23; r[ 6] = t6 & 0x7fffff; + t8 += t7 >> 23; r[ 7] = t7 & 0x7fffff; + t9 += t8 >> 23; r[ 8] = t8 & 0x7fffff; + t10 += t9 >> 23; r[ 9] = t9 & 0x7fffff; + t11 += t10 >> 23; r[10] = t10 & 0x7fffff; + t12 += t11 >> 23; r[11] = t11 & 0x7fffff; + t13 += t12 >> 23; r[12] = t12 & 0x7fffff; + t14 += t13 >> 23; r[13] = t13 & 0x7fffff; + t15 += t14 >> 23; r[14] = t14 & 0x7fffff; + t16 += t15 >> 23; r[15] = t15 & 0x7fffff; + t17 += t16 >> 23; r[16] = t16 & 0x7fffff; + t18 += t17 >> 23; r[17] = t17 & 0x7fffff; + t19 += t18 >> 23; r[18] = t18 & 0x7fffff; + t20 += t19 >> 23; r[19] = t19 & 0x7fffff; + t21 += t20 >> 23; r[20] = t20 & 0x7fffff; + t22 += t21 >> 23; r[21] = t21 & 0x7fffff; + t23 += t22 >> 23; r[22] = t22 & 0x7fffff; + t24 += t23 >> 23; r[23] = t23 & 0x7fffff; + t25 += t24 >> 23; r[24] = t24 & 0x7fffff; + t26 += t25 >> 23; r[25] = t25 & 0x7fffff; + t27 += t26 >> 23; r[26] = t26 & 0x7fffff; + t28 += t27 >> 23; r[27] = t27 & 0x7fffff; + r[29] = (sp_digit)(t28 >> 23); + r[28] = t28 & 0x7fffff; +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_2048_sqr_15(sp_digit* r, const sp_digit* a) +{ + int64_t t0 = ((int64_t)a[ 0]) * a[ 0]; + int64_t t1 = (((int64_t)a[ 0]) * a[ 1]) * 2; + int64_t t2 = (((int64_t)a[ 0]) * a[ 2]) * 2 + + ((int64_t)a[ 1]) * a[ 1]; + int64_t t3 = (((int64_t)a[ 0]) * a[ 3] + + ((int64_t)a[ 1]) * a[ 2]) * 2; + int64_t t4 = (((int64_t)a[ 0]) * a[ 4] + + ((int64_t)a[ 1]) * a[ 3]) * 2 + + ((int64_t)a[ 2]) * a[ 2]; + int64_t t5 = (((int64_t)a[ 0]) * a[ 5] + + ((int64_t)a[ 1]) * a[ 4] + + ((int64_t)a[ 2]) * a[ 3]) * 2; + int64_t t6 = (((int64_t)a[ 0]) * a[ 6] + + ((int64_t)a[ 1]) * a[ 5] + + ((int64_t)a[ 2]) * a[ 4]) * 2 + + ((int64_t)a[ 3]) * a[ 3]; + int64_t t7 = (((int64_t)a[ 0]) * a[ 7] + + ((int64_t)a[ 1]) * a[ 6] + + ((int64_t)a[ 2]) * a[ 5] + + ((int64_t)a[ 3]) * a[ 4]) * 2; + int64_t t8 = (((int64_t)a[ 0]) * a[ 8] + + ((int64_t)a[ 1]) * a[ 7] + + ((int64_t)a[ 2]) * a[ 6] + + ((int64_t)a[ 3]) * a[ 5]) * 2 + + ((int64_t)a[ 4]) * a[ 4]; + int64_t t9 = (((int64_t)a[ 0]) * a[ 9] + + ((int64_t)a[ 1]) * a[ 8] + + ((int64_t)a[ 2]) * a[ 7] + + ((int64_t)a[ 3]) * a[ 6] + + ((int64_t)a[ 4]) * a[ 5]) * 2; + int64_t t10 = (((int64_t)a[ 0]) * a[10] + + ((int64_t)a[ 1]) * a[ 9] + + ((int64_t)a[ 2]) * a[ 8] + + ((int64_t)a[ 3]) * a[ 7] + + ((int64_t)a[ 4]) * a[ 6]) * 2 + + ((int64_t)a[ 5]) * a[ 5]; + int64_t t11 = (((int64_t)a[ 0]) * a[11] + + ((int64_t)a[ 1]) * a[10] + + ((int64_t)a[ 2]) * a[ 9] + + ((int64_t)a[ 3]) * a[ 8] + + ((int64_t)a[ 4]) * a[ 7] + + ((int64_t)a[ 5]) * a[ 6]) * 2; + int64_t t12 = (((int64_t)a[ 0]) * a[12] + + ((int64_t)a[ 1]) * a[11] + + ((int64_t)a[ 2]) * a[10] + + ((int64_t)a[ 3]) * a[ 9] + + ((int64_t)a[ 4]) * a[ 8] + + ((int64_t)a[ 5]) * a[ 7]) * 2 + + ((int64_t)a[ 6]) * a[ 6]; + int64_t t13 = (((int64_t)a[ 0]) * a[13] + + ((int64_t)a[ 1]) * a[12] + + ((int64_t)a[ 2]) * a[11] + + ((int64_t)a[ 3]) * a[10] + + ((int64_t)a[ 4]) * a[ 9] + + ((int64_t)a[ 5]) * a[ 8] + + ((int64_t)a[ 6]) * a[ 7]) * 2; + int64_t t14 = (((int64_t)a[ 0]) * a[14] + + ((int64_t)a[ 1]) * a[13] + + ((int64_t)a[ 2]) * a[12] + + ((int64_t)a[ 3]) * a[11] + + ((int64_t)a[ 4]) * a[10] + + ((int64_t)a[ 5]) * a[ 9] + + ((int64_t)a[ 6]) * a[ 8]) * 2 + + ((int64_t)a[ 7]) * a[ 7]; + int64_t t15 = (((int64_t)a[ 1]) * a[14] + + ((int64_t)a[ 2]) * a[13] + + ((int64_t)a[ 3]) * a[12] + + ((int64_t)a[ 4]) * a[11] + + ((int64_t)a[ 5]) * a[10] + + ((int64_t)a[ 6]) * a[ 9] + + ((int64_t)a[ 7]) * a[ 8]) * 2; + int64_t t16 = (((int64_t)a[ 2]) * a[14] + + ((int64_t)a[ 3]) * a[13] + + ((int64_t)a[ 4]) * a[12] + + ((int64_t)a[ 5]) * a[11] + + ((int64_t)a[ 6]) * a[10] + + ((int64_t)a[ 7]) * a[ 9]) * 2 + + ((int64_t)a[ 8]) * a[ 8]; + int64_t t17 = (((int64_t)a[ 3]) * a[14] + + ((int64_t)a[ 4]) * a[13] + + ((int64_t)a[ 5]) * a[12] + + ((int64_t)a[ 6]) * a[11] + + ((int64_t)a[ 7]) * a[10] + + ((int64_t)a[ 8]) * a[ 9]) * 2; + int64_t t18 = (((int64_t)a[ 4]) * a[14] + + ((int64_t)a[ 5]) * a[13] + + ((int64_t)a[ 6]) * a[12] + + ((int64_t)a[ 7]) * a[11] + + ((int64_t)a[ 8]) * a[10]) * 2 + + ((int64_t)a[ 9]) * a[ 9]; + int64_t t19 = (((int64_t)a[ 5]) * a[14] + + ((int64_t)a[ 6]) * a[13] + + ((int64_t)a[ 7]) * a[12] + + ((int64_t)a[ 8]) * a[11] + + ((int64_t)a[ 9]) * a[10]) * 2; + int64_t t20 = (((int64_t)a[ 6]) * a[14] + + ((int64_t)a[ 7]) * a[13] + + ((int64_t)a[ 8]) * a[12] + + ((int64_t)a[ 9]) * a[11]) * 2 + + ((int64_t)a[10]) * a[10]; + int64_t t21 = (((int64_t)a[ 7]) * a[14] + + ((int64_t)a[ 8]) * a[13] + + ((int64_t)a[ 9]) * a[12] + + ((int64_t)a[10]) * a[11]) * 2; + int64_t t22 = (((int64_t)a[ 8]) * a[14] + + ((int64_t)a[ 9]) * a[13] + + ((int64_t)a[10]) * a[12]) * 2 + + ((int64_t)a[11]) * a[11]; + int64_t t23 = (((int64_t)a[ 9]) * a[14] + + ((int64_t)a[10]) * a[13] + + ((int64_t)a[11]) * a[12]) * 2; + int64_t t24 = (((int64_t)a[10]) * a[14] + + ((int64_t)a[11]) * a[13]) * 2 + + ((int64_t)a[12]) * a[12]; + int64_t t25 = (((int64_t)a[11]) * a[14] + + ((int64_t)a[12]) * a[13]) * 2; + int64_t t26 = (((int64_t)a[12]) * a[14]) * 2 + + ((int64_t)a[13]) * a[13]; + int64_t t27 = (((int64_t)a[13]) * a[14]) * 2; + int64_t t28 = ((int64_t)a[14]) * a[14]; + + t1 += t0 >> 23; r[ 0] = t0 & 0x7fffff; + t2 += t1 >> 23; r[ 1] = t1 & 0x7fffff; + t3 += t2 >> 23; r[ 2] = t2 & 0x7fffff; + t4 += t3 >> 23; r[ 3] = t3 & 0x7fffff; + t5 += t4 >> 23; r[ 4] = t4 & 0x7fffff; + t6 += t5 >> 23; r[ 5] = t5 & 0x7fffff; + t7 += t6 >> 23; r[ 6] = t6 & 0x7fffff; + t8 += t7 >> 23; r[ 7] = t7 & 0x7fffff; + t9 += t8 >> 23; r[ 8] = t8 & 0x7fffff; + t10 += t9 >> 23; r[ 9] = t9 & 0x7fffff; + t11 += t10 >> 23; r[10] = t10 & 0x7fffff; + t12 += t11 >> 23; r[11] = t11 & 0x7fffff; + t13 += t12 >> 23; r[12] = t12 & 0x7fffff; + t14 += t13 >> 23; r[13] = t13 & 0x7fffff; + t15 += t14 >> 23; r[14] = t14 & 0x7fffff; + t16 += t15 >> 23; r[15] = t15 & 0x7fffff; + t17 += t16 >> 23; r[16] = t16 & 0x7fffff; + t18 += t17 >> 23; r[17] = t17 & 0x7fffff; + t19 += t18 >> 23; r[18] = t18 & 0x7fffff; + t20 += t19 >> 23; r[19] = t19 & 0x7fffff; + t21 += t20 >> 23; r[20] = t20 & 0x7fffff; + t22 += t21 >> 23; r[21] = t21 & 0x7fffff; + t23 += t22 >> 23; r[22] = t22 & 0x7fffff; + t24 += t23 >> 23; r[23] = t23 & 0x7fffff; + t25 += t24 >> 23; r[24] = t24 & 0x7fffff; + t26 += t25 >> 23; r[25] = t25 & 0x7fffff; + t27 += t26 >> 23; r[26] = t26 & 0x7fffff; + t28 += t27 >> 23; r[27] = t27 & 0x7fffff; + r[29] = (sp_digit)(t28 >> 23); + r[28] = t28 & 0x7fffff; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_2048_add_15(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + r[ 0] = a[ 0] + b[ 0]; + r[ 1] = a[ 1] + b[ 1]; + r[ 2] = a[ 2] + b[ 2]; + r[ 3] = a[ 3] + b[ 3]; + r[ 4] = a[ 4] + b[ 4]; + r[ 5] = a[ 5] + b[ 5]; + r[ 6] = a[ 6] + b[ 6]; + r[ 7] = a[ 7] + b[ 7]; + r[ 8] = a[ 8] + b[ 8]; + r[ 9] = a[ 9] + b[ 9]; + r[10] = a[10] + b[10]; + r[11] = a[11] + b[11]; + r[12] = a[12] + b[12]; + r[13] = a[13] + b[13]; + r[14] = a[14] + b[14]; + + return 0; +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_2048_sub_30(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 24; i += 8) { + r[i + 0] = a[i + 0] - b[i + 0]; + r[i + 1] = a[i + 1] - b[i + 1]; + r[i + 2] = a[i + 2] - b[i + 2]; + r[i + 3] = a[i + 3] - b[i + 3]; + r[i + 4] = a[i + 4] - b[i + 4]; + r[i + 5] = a[i + 5] - b[i + 5]; + r[i + 6] = a[i + 6] - b[i + 6]; + r[i + 7] = a[i + 7] - b[i + 7]; + } + r[24] = a[24] - b[24]; + r[25] = a[25] - b[25]; + r[26] = a[26] - b[26]; + r[27] = a[27] - b[27]; + r[28] = a[28] - b[28]; + r[29] = a[29] - b[29]; + + return 0; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_2048_add_30(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 24; i += 8) { + r[i + 0] = a[i + 0] + b[i + 0]; + r[i + 1] = a[i + 1] + b[i + 1]; + r[i + 2] = a[i + 2] + b[i + 2]; + r[i + 3] = a[i + 3] + b[i + 3]; + r[i + 4] = a[i + 4] + b[i + 4]; + r[i + 5] = a[i + 5] + b[i + 5]; + r[i + 6] = a[i + 6] + b[i + 6]; + r[i + 7] = a[i + 7] + b[i + 7]; + } + r[24] = a[24] + b[24]; + r[25] = a[25] + b[25]; + r[26] = a[26] + b[26]; + r[27] = a[27] + b[27]; + r[28] = a[28] + b[28]; + r[29] = a[29] + b[29]; + + return 0; +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_2048_mul_45(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit p0[30]; + sp_digit p1[30]; + sp_digit p2[30]; + sp_digit p3[30]; + sp_digit p4[30]; + sp_digit p5[30]; + sp_digit t0[30]; + sp_digit t1[30]; + sp_digit t2[30]; + sp_digit a0[15]; + sp_digit a1[15]; + sp_digit a2[15]; + sp_digit b0[15]; + sp_digit b1[15]; + sp_digit b2[15]; + (void)sp_2048_add_15(a0, a, &a[15]); + (void)sp_2048_add_15(b0, b, &b[15]); + (void)sp_2048_add_15(a1, &a[15], &a[30]); + (void)sp_2048_add_15(b1, &b[15], &b[30]); + (void)sp_2048_add_15(a2, a0, &a[30]); + (void)sp_2048_add_15(b2, b0, &b[30]); + sp_2048_mul_15(p0, a, b); + sp_2048_mul_15(p2, &a[15], &b[15]); + sp_2048_mul_15(p4, &a[30], &b[30]); + sp_2048_mul_15(p1, a0, b0); + sp_2048_mul_15(p3, a1, b1); + sp_2048_mul_15(p5, a2, b2); + XMEMSET(r, 0, sizeof(*r)*2U*45U); + (void)sp_2048_sub_30(t0, p3, p2); + (void)sp_2048_sub_30(t1, p1, p2); + (void)sp_2048_sub_30(t2, p5, t0); + (void)sp_2048_sub_30(t2, t2, t1); + (void)sp_2048_sub_30(t0, t0, p4); + (void)sp_2048_sub_30(t1, t1, p0); + (void)sp_2048_add_30(r, r, p0); + (void)sp_2048_add_30(&r[15], &r[15], t1); + (void)sp_2048_add_30(&r[30], &r[30], t2); + (void)sp_2048_add_30(&r[45], &r[45], t0); + (void)sp_2048_add_30(&r[60], &r[60], p4); +} + +/* Square a into r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_2048_sqr_45(sp_digit* r, const sp_digit* a) +{ + sp_digit p0[30]; + sp_digit p1[30]; + sp_digit p2[30]; + sp_digit p3[30]; + sp_digit p4[30]; + sp_digit p5[30]; + sp_digit t0[30]; + sp_digit t1[30]; + sp_digit t2[30]; + sp_digit a0[15]; + sp_digit a1[15]; + sp_digit a2[15]; + (void)sp_2048_add_15(a0, a, &a[15]); + (void)sp_2048_add_15(a1, &a[15], &a[30]); + (void)sp_2048_add_15(a2, a0, &a[30]); + sp_2048_sqr_15(p0, a); + sp_2048_sqr_15(p2, &a[15]); + sp_2048_sqr_15(p4, &a[30]); + sp_2048_sqr_15(p1, a0); + sp_2048_sqr_15(p3, a1); + sp_2048_sqr_15(p5, a2); + XMEMSET(r, 0, sizeof(*r)*2U*45U); + (void)sp_2048_sub_30(t0, p3, p2); + (void)sp_2048_sub_30(t1, p1, p2); + (void)sp_2048_sub_30(t2, p5, t0); + (void)sp_2048_sub_30(t2, t2, t1); + (void)sp_2048_sub_30(t0, t0, p4); + (void)sp_2048_sub_30(t1, t1, p0); + (void)sp_2048_add_30(r, r, p0); + (void)sp_2048_add_30(&r[15], &r[15], t1); + (void)sp_2048_add_30(&r[30], &r[30], t2); + (void)sp_2048_add_30(&r[45], &r[45], t0); + (void)sp_2048_add_30(&r[60], &r[60], p4); +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_2048_add_45(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 40; i += 8) { + r[i + 0] = a[i + 0] + b[i + 0]; + r[i + 1] = a[i + 1] + b[i + 1]; + r[i + 2] = a[i + 2] + b[i + 2]; + r[i + 3] = a[i + 3] + b[i + 3]; + r[i + 4] = a[i + 4] + b[i + 4]; + r[i + 5] = a[i + 5] + b[i + 5]; + r[i + 6] = a[i + 6] + b[i + 6]; + r[i + 7] = a[i + 7] + b[i + 7]; + } + r[40] = a[40] + b[40]; + r[41] = a[41] + b[41]; + r[42] = a[42] + b[42]; + r[43] = a[43] + b[43]; + r[44] = a[44] + b[44]; + + return 0; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_2048_add_90(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 88; i += 8) { + r[i + 0] = a[i + 0] + b[i + 0]; + r[i + 1] = a[i + 1] + b[i + 1]; + r[i + 2] = a[i + 2] + b[i + 2]; + r[i + 3] = a[i + 3] + b[i + 3]; + r[i + 4] = a[i + 4] + b[i + 4]; + r[i + 5] = a[i + 5] + b[i + 5]; + r[i + 6] = a[i + 6] + b[i + 6]; + r[i + 7] = a[i + 7] + b[i + 7]; + } + r[88] = a[88] + b[88]; + r[89] = a[89] + b[89]; + + return 0; +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_2048_sub_90(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 88; i += 8) { + r[i + 0] = a[i + 0] - b[i + 0]; + r[i + 1] = a[i + 1] - b[i + 1]; + r[i + 2] = a[i + 2] - b[i + 2]; + r[i + 3] = a[i + 3] - b[i + 3]; + r[i + 4] = a[i + 4] - b[i + 4]; + r[i + 5] = a[i + 5] - b[i + 5]; + r[i + 6] = a[i + 6] - b[i + 6]; + r[i + 7] = a[i + 7] - b[i + 7]; + } + r[88] = a[88] - b[88]; + r[89] = a[89] - b[89]; + + return 0; +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_2048_mul_90(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[90]; + sp_digit* a1 = z1; + sp_digit b1[45]; + sp_digit* z2 = r + 90; + (void)sp_2048_add_45(a1, a, &a[45]); + (void)sp_2048_add_45(b1, b, &b[45]); + sp_2048_mul_45(z2, &a[45], &b[45]); + sp_2048_mul_45(z0, a, b); + sp_2048_mul_45(z1, a1, b1); + (void)sp_2048_sub_90(z1, z1, z2); + (void)sp_2048_sub_90(z1, z1, z0); + (void)sp_2048_add_90(r + 45, r + 45, z1); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_2048_sqr_90(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z1[90]; + sp_digit* a1 = z1; + sp_digit* z2 = r + 90; + (void)sp_2048_add_45(a1, a, &a[45]); + sp_2048_sqr_45(z2, &a[45]); + sp_2048_sqr_45(z0, a); + sp_2048_sqr_45(z1, a1); + (void)sp_2048_sub_90(z1, z1, z2); + (void)sp_2048_sub_90(z1, z1, z0); + (void)sp_2048_add_90(r + 45, r + 45, z1); +} + +#endif /* !WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_2048_add_90(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 90; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_2048_sub_90(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 90; i++) { + r[i] = a[i] - b[i]; + } + + return 0; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_2048_mul_90(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i, j, k; + int64_t c; + + c = ((int64_t)a[89]) * b[89]; + r[179] = (sp_digit)(c >> 23); + c = (c & 0x7fffff) << 23; + for (k = 177; k >= 0; k--) { + for (i = 89; i >= 0; i--) { + j = k - i; + if (j >= 90) { + break; + } + if (j < 0) { + continue; + } + + c += ((int64_t)a[i]) * b[j]; + } + r[k + 2] += c >> 46; + r[k + 1] = (c >> 23) & 0x7fffff; + c = (c & 0x7fffff) << 23; + } + r[0] = (sp_digit)(c >> 23); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_2048_sqr_90(sp_digit* r, const sp_digit* a) +{ + int i, j, k; + int64_t c; + + c = ((int64_t)a[89]) * a[89]; + r[179] = (sp_digit)(c >> 23); + c = (c & 0x7fffff) << 23; + for (k = 177; k >= 0; k--) { + for (i = 89; i >= 0; i--) { + j = k - i; + if (j >= 90 || i <= j) { + break; + } + if (j < 0) { + continue; + } + + c += ((int64_t)a[i]) * a[j] * 2; + } + if (i == j) { + c += ((int64_t)a[i]) * a[i]; + } + + r[k + 2] += c >> 46; + r[k + 1] = (c >> 23) & 0x7fffff; + c = (c & 0x7fffff) << 23; + } + r[0] = (sp_digit)(c >> 23); +} + +#endif /* WOLFSSL_SP_SMALL */ +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_2048_add_45(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 45; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_2048_sub_45(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 45; i++) { + r[i] = a[i] - b[i]; + } + + return 0; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_2048_sub_45(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 40; i += 8) { + r[i + 0] = a[i + 0] - b[i + 0]; + r[i + 1] = a[i + 1] - b[i + 1]; + r[i + 2] = a[i + 2] - b[i + 2]; + r[i + 3] = a[i + 3] - b[i + 3]; + r[i + 4] = a[i + 4] - b[i + 4]; + r[i + 5] = a[i + 5] - b[i + 5]; + r[i + 6] = a[i + 6] - b[i + 6]; + r[i + 7] = a[i + 7] - b[i + 7]; + } + r[40] = a[40] - b[40]; + r[41] = a[41] - b[41]; + r[42] = a[42] - b[42]; + r[43] = a[43] - b[43]; + r[44] = a[44] - b[44]; + + return 0; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_2048_mul_45(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i, j, k; + int64_t c; + + c = ((int64_t)a[44]) * b[44]; + r[89] = (sp_digit)(c >> 23); + c = (c & 0x7fffff) << 23; + for (k = 87; k >= 0; k--) { + for (i = 44; i >= 0; i--) { + j = k - i; + if (j >= 45) { + break; + } + if (j < 0) { + continue; + } + + c += ((int64_t)a[i]) * b[j]; + } + r[k + 2] += c >> 46; + r[k + 1] = (c >> 23) & 0x7fffff; + c = (c & 0x7fffff) << 23; + } + r[0] = (sp_digit)(c >> 23); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_2048_sqr_45(sp_digit* r, const sp_digit* a) +{ + int i, j, k; + int64_t c; + + c = ((int64_t)a[44]) * a[44]; + r[89] = (sp_digit)(c >> 23); + c = (c & 0x7fffff) << 23; + for (k = 87; k >= 0; k--) { + for (i = 44; i >= 0; i--) { + j = k - i; + if (j >= 45 || i <= j) { + break; + } + if (j < 0) { + continue; + } + + c += ((int64_t)a[i]) * a[j] * 2; + } + if (i == j) { + c += ((int64_t)a[i]) * a[i]; + } + + r[k + 2] += c >> 46; + r[k + 1] = (c >> 23) & 0x7fffff; + c = (c & 0x7fffff) << 23; + } + r[0] = (sp_digit)(c >> 23); +} + +#endif /* WOLFSSL_SP_SMALL */ +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +/* Caclulate the bottom digit of -1/a mod 2^n. + * + * a A single precision number. + * rho Bottom word of inverse. + */ +static void sp_2048_mont_setup(const sp_digit* a, sp_digit* rho) +{ + sp_digit x, b; + + b = a[0]; + x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - b * x; /* here x*a==1 mod 2**8 */ + x *= 2 - b * x; /* here x*a==1 mod 2**16 */ + x *= 2 - b * x; /* here x*a==1 mod 2**32 */ + x &= 0x7fffff; + + /* rho = -1/m mod b */ + *rho = (1L << 23) - x; +} + +/* Multiply a by scalar b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_2048_mul_d_90(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int64_t tb = b; + int64_t t = 0; + int i; + + for (i = 0; i < 90; i++) { + t += tb * a[i]; + r[i] = t & 0x7fffff; + t >>= 23; + } + r[90] = (sp_digit)t; +#else + int64_t tb = b; + int64_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] = t[0] & 0x7fffff; + for (i = 0; i < 88; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff); + t[2] = tb * a[i+2]; + r[i+2] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff); + t[3] = tb * a[i+3]; + r[i+3] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff); + t[4] = tb * a[i+4]; + r[i+4] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff); + t[5] = tb * a[i+5]; + r[i+5] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff); + t[6] = tb * a[i+6]; + r[i+6] = (sp_digit)(t[5] >> 23) + (t[6] & 0x7fffff); + t[7] = tb * a[i+7]; + r[i+7] = (sp_digit)(t[6] >> 23) + (t[7] & 0x7fffff); + t[0] = tb * a[i+8]; + r[i+8] = (sp_digit)(t[7] >> 23) + (t[0] & 0x7fffff); + } + t[1] = tb * a[89]; + r[89] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff); + r[90] = (sp_digit)(t[1] >> 23); +#endif /* WOLFSSL_SP_SMALL */ +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 2048 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_2048_mont_norm_45(sp_digit* r, const sp_digit* m) +{ + /* Set r = 2^n - 1. */ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<44; i++) { + r[i] = 0x7fffff; + } +#else + int i; + + for (i = 0; i < 40; i += 8) { + r[i + 0] = 0x7fffff; + r[i + 1] = 0x7fffff; + r[i + 2] = 0x7fffff; + r[i + 3] = 0x7fffff; + r[i + 4] = 0x7fffff; + r[i + 5] = 0x7fffff; + r[i + 6] = 0x7fffff; + r[i + 7] = 0x7fffff; + } + r[40] = 0x7fffff; + r[41] = 0x7fffff; + r[42] = 0x7fffff; + r[43] = 0x7fffff; +#endif + r[44] = 0xfffL; + + /* r = (2^n - 1) mod n */ + (void)sp_2048_sub_45(r, r, m); + + /* Add one so r = 2^n mod m */ + r[0] += 1; +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static sp_digit sp_2048_cmp_45(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=44; i>=0; i--) { + r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#else + int i; + + r |= (a[44] - b[44]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[43] - b[43]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[42] - b[42]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[41] - b[41]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[40] - b[40]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + for (i = 32; i >= 0; i -= 8) { + r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#endif /* WOLFSSL_SP_SMALL */ + + return r; +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static void sp_2048_cond_sub_45(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 45; i++) { + r[i] = a[i] - (b[i] & m); + } +#else + int i; + + for (i = 0; i < 40; i += 8) { + r[i + 0] = a[i + 0] - (b[i + 0] & m); + r[i + 1] = a[i + 1] - (b[i + 1] & m); + r[i + 2] = a[i + 2] - (b[i + 2] & m); + r[i + 3] = a[i + 3] - (b[i + 3] & m); + r[i + 4] = a[i + 4] - (b[i + 4] & m); + r[i + 5] = a[i + 5] - (b[i + 5] & m); + r[i + 6] = a[i + 6] - (b[i + 6] & m); + r[i + 7] = a[i + 7] - (b[i + 7] & m); + } + r[40] = a[40] - (b[40] & m); + r[41] = a[41] - (b[41] & m); + r[42] = a[42] - (b[42] & m); + r[43] = a[43] - (b[43] & m); + r[44] = a[44] - (b[44] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Mul a by scalar b and add into r. (r += a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_2048_mul_add_45(sp_digit* r, const sp_digit* a, + const sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int64_t tb = b; + int64_t t = 0; + int i; + + for (i = 0; i < 45; i++) { + t += (tb * a[i]) + r[i]; + r[i] = t & 0x7fffff; + t >>= 23; + } + r[45] += t; +#else + int64_t tb = b; + int64_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x7fffff); + for (i = 0; i < 40; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff)); + t[2] = tb * a[i+2]; + r[i+2] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff)); + t[3] = tb * a[i+3]; + r[i+3] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff)); + t[4] = tb * a[i+4]; + r[i+4] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff)); + t[5] = tb * a[i+5]; + r[i+5] += (sp_digit)((t[4] >> 23) + (t[5] & 0x7fffff)); + t[6] = tb * a[i+6]; + r[i+6] += (sp_digit)((t[5] >> 23) + (t[6] & 0x7fffff)); + t[7] = tb * a[i+7]; + r[i+7] += (sp_digit)((t[6] >> 23) + (t[7] & 0x7fffff)); + t[0] = tb * a[i+8]; + r[i+8] += (sp_digit)((t[7] >> 23) + (t[0] & 0x7fffff)); + } + t[1] = tb * a[41]; r[41] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff)); + t[2] = tb * a[42]; r[42] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff)); + t[3] = tb * a[43]; r[43] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff)); + t[4] = tb * a[44]; r[44] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff)); + r[45] += (sp_digit)(t[4] >> 23); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Normalize the values in each word to 23. + * + * a Array of sp_digit to normalize. + */ +static void sp_2048_norm_45(sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + for (i = 0; i < 44; i++) { + a[i+1] += a[i] >> 23; + a[i] &= 0x7fffff; + } +#else + int i; + for (i = 0; i < 40; i += 8) { + a[i+1] += a[i+0] >> 23; a[i+0] &= 0x7fffff; + a[i+2] += a[i+1] >> 23; a[i+1] &= 0x7fffff; + a[i+3] += a[i+2] >> 23; a[i+2] &= 0x7fffff; + a[i+4] += a[i+3] >> 23; a[i+3] &= 0x7fffff; + a[i+5] += a[i+4] >> 23; a[i+4] &= 0x7fffff; + a[i+6] += a[i+5] >> 23; a[i+5] &= 0x7fffff; + a[i+7] += a[i+6] >> 23; a[i+6] &= 0x7fffff; + a[i+8] += a[i+7] >> 23; a[i+7] &= 0x7fffff; + a[i+9] += a[i+8] >> 23; a[i+8] &= 0x7fffff; + } + a[40+1] += a[40] >> 23; + a[40] &= 0x7fffff; + a[41+1] += a[41] >> 23; + a[41] &= 0x7fffff; + a[42+1] += a[42] >> 23; + a[42] &= 0x7fffff; + a[43+1] += a[43] >> 23; + a[43] &= 0x7fffff; +#endif +} + +/* Shift the result in the high 1024 bits down to the bottom. + * + * r A single precision number. + * a A single precision number. + */ +static void sp_2048_mont_shift_45(sp_digit* r, const sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + int64_t n = a[44] >> 12; + n += ((int64_t)a[45]) << 11; + + for (i = 0; i < 44; i++) { + r[i] = n & 0x7fffff; + n >>= 23; + n += ((int64_t)a[46 + i]) << 11; + } + r[44] = (sp_digit)n; +#else + int i; + int64_t n = a[44] >> 12; + n += ((int64_t)a[45]) << 11; + for (i = 0; i < 40; i += 8) { + r[i + 0] = n & 0x7fffff; + n >>= 23; n += ((int64_t)a[i + 46]) << 11; + r[i + 1] = n & 0x7fffff; + n >>= 23; n += ((int64_t)a[i + 47]) << 11; + r[i + 2] = n & 0x7fffff; + n >>= 23; n += ((int64_t)a[i + 48]) << 11; + r[i + 3] = n & 0x7fffff; + n >>= 23; n += ((int64_t)a[i + 49]) << 11; + r[i + 4] = n & 0x7fffff; + n >>= 23; n += ((int64_t)a[i + 50]) << 11; + r[i + 5] = n & 0x7fffff; + n >>= 23; n += ((int64_t)a[i + 51]) << 11; + r[i + 6] = n & 0x7fffff; + n >>= 23; n += ((int64_t)a[i + 52]) << 11; + r[i + 7] = n & 0x7fffff; + n >>= 23; n += ((int64_t)a[i + 53]) << 11; + } + r[40] = n & 0x7fffff; n >>= 23; n += ((int64_t)a[86]) << 11; + r[41] = n & 0x7fffff; n >>= 23; n += ((int64_t)a[87]) << 11; + r[42] = n & 0x7fffff; n >>= 23; n += ((int64_t)a[88]) << 11; + r[43] = n & 0x7fffff; n >>= 23; n += ((int64_t)a[89]) << 11; + r[44] = (sp_digit)n; +#endif /* WOLFSSL_SP_SMALL */ + XMEMSET(&r[45], 0, sizeof(*r) * 45U); +} + +/* Reduce the number back to 2048 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static void sp_2048_mont_reduce_45(sp_digit* a, const sp_digit* m, sp_digit mp) +{ + int i; + sp_digit mu; + + sp_2048_norm_45(a + 45); + + for (i=0; i<44; i++) { + mu = (a[i] * mp) & 0x7fffff; + sp_2048_mul_add_45(a+i, m, mu); + a[i+1] += a[i] >> 23; + } + mu = (a[i] * mp) & 0xfffL; + sp_2048_mul_add_45(a+i, m, mu); + a[i+1] += a[i] >> 23; + a[i] &= 0x7fffff; + + sp_2048_mont_shift_45(a, a); + sp_2048_cond_sub_45(a, a, m, 0 - (((a[44] >> 12) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_2048_norm_45(a); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_2048_mont_mul_45(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_2048_mul_45(r, a, b); + sp_2048_mont_reduce_45(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_2048_mont_sqr_45(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_2048_sqr_45(r, a); + sp_2048_mont_reduce_45(r, m, mp); +} + +/* Multiply a by scalar b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_2048_mul_d_45(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int64_t tb = b; + int64_t t = 0; + int i; + + for (i = 0; i < 45; i++) { + t += tb * a[i]; + r[i] = t & 0x7fffff; + t >>= 23; + } + r[45] = (sp_digit)t; +#else + int64_t tb = b; + int64_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] = t[0] & 0x7fffff; + for (i = 0; i < 40; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff); + t[2] = tb * a[i+2]; + r[i+2] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff); + t[3] = tb * a[i+3]; + r[i+3] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff); + t[4] = tb * a[i+4]; + r[i+4] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff); + t[5] = tb * a[i+5]; + r[i+5] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff); + t[6] = tb * a[i+6]; + r[i+6] = (sp_digit)(t[5] >> 23) + (t[6] & 0x7fffff); + t[7] = tb * a[i+7]; + r[i+7] = (sp_digit)(t[6] >> 23) + (t[7] & 0x7fffff); + t[0] = tb * a[i+8]; + r[i+8] = (sp_digit)(t[7] >> 23) + (t[0] & 0x7fffff); + } + t[1] = tb * a[41]; + r[41] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff); + t[2] = tb * a[42]; + r[42] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff); + t[3] = tb * a[43]; + r[43] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff); + t[4] = tb * a[44]; + r[44] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff); + r[45] = (sp_digit)(t[4] >> 23); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static void sp_2048_cond_add_45(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 45; i++) { + r[i] = a[i] + (b[i] & m); + } +#else + int i; + + for (i = 0; i < 40; i += 8) { + r[i + 0] = a[i + 0] + (b[i + 0] & m); + r[i + 1] = a[i + 1] + (b[i + 1] & m); + r[i + 2] = a[i + 2] + (b[i + 2] & m); + r[i + 3] = a[i + 3] + (b[i + 3] & m); + r[i + 4] = a[i + 4] + (b[i + 4] & m); + r[i + 5] = a[i + 5] + (b[i + 5] & m); + r[i + 6] = a[i + 6] + (b[i + 6] & m); + r[i + 7] = a[i + 7] + (b[i + 7] & m); + } + r[40] = a[40] + (b[40] & m); + r[41] = a[41] + (b[41] & m); + r[42] = a[42] + (b[42] & m); + r[43] = a[43] + (b[43] & m); + r[44] = a[44] + (b[44] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +#ifdef WOLFSSL_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_2048_add_45(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 45; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#endif +SP_NOINLINE static void sp_2048_rshift_45(sp_digit* r, sp_digit* a, byte n) +{ + int i; + +#ifdef WOLFSSL_SP_SMALL + for (i=0; i<44; i++) { + r[i] = ((a[i] >> n) | (a[i + 1] << (23 - n))) & 0x7fffff; + } +#else + for (i=0; i<40; i += 8) { + r[i+0] = ((a[i+0] >> n) | (a[i+1] << (23 - n))) & 0x7fffff; + r[i+1] = ((a[i+1] >> n) | (a[i+2] << (23 - n))) & 0x7fffff; + r[i+2] = ((a[i+2] >> n) | (a[i+3] << (23 - n))) & 0x7fffff; + r[i+3] = ((a[i+3] >> n) | (a[i+4] << (23 - n))) & 0x7fffff; + r[i+4] = ((a[i+4] >> n) | (a[i+5] << (23 - n))) & 0x7fffff; + r[i+5] = ((a[i+5] >> n) | (a[i+6] << (23 - n))) & 0x7fffff; + r[i+6] = ((a[i+6] >> n) | (a[i+7] << (23 - n))) & 0x7fffff; + r[i+7] = ((a[i+7] >> n) | (a[i+8] << (23 - n))) & 0x7fffff; + } + r[40] = ((a[40] >> n) | (a[41] << (23 - n))) & 0x7fffff; + r[41] = ((a[41] >> n) | (a[42] << (23 - n))) & 0x7fffff; + r[42] = ((a[42] >> n) | (a[43] << (23 - n))) & 0x7fffff; + r[43] = ((a[43] >> n) | (a[44] << (23 - n))) & 0x7fffff; +#endif + r[44] = a[44] >> n; +} + +#ifdef WOLFSSL_SP_DIV_32 +static WC_INLINE sp_digit sp_2048_div_word_45(sp_digit d1, sp_digit d0, + sp_digit dv) +{ + sp_digit d, r, t; + + /* All 23 bits from d1 and top 8 bits from d0. */ + d = (d1 << 8) | (d0 >> 15); + r = d / dv; + d -= r * dv; + /* Up to 9 bits in r */ + /* Next 8 bits from d0. */ + r <<= 8; + d <<= 8; + d |= (d0 >> 7) & ((1 << 8) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 17 bits in r */ + /* Remaining 7 bits from d0. */ + r <<= 7; + d <<= 7; + d |= d0 & ((1 << 7) - 1); + t = d / dv; + r += t; + + return r; +} +#endif /* WOLFSSL_SP_DIV_32 */ + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_2048_div_45(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + int i; +#ifndef WOLFSSL_SP_DIV_32 + int64_t d1; +#endif + sp_digit dv, r1; +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* td; +#else + sp_digit t1d[90 + 1], t2d[45 + 1], sdd[45 + 1]; +#endif + sp_digit* t1; + sp_digit* t2; + sp_digit* sd; + int err = MP_OKAY; + + (void)m; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 45 + 3), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + (void)m; + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = td; + t2 = td + 90 + 1; + sd = t2 + 45 + 1; +#else + t1 = t1d; + t2 = t2d; + sd = sdd; +#endif + + sp_2048_mul_d_45(sd, d, 1L << 11); + sp_2048_mul_d_90(t1, a, 1L << 11); + dv = sd[44]; + for (i=45; i>=0; i--) { + t1[45 + i] += t1[45 + i - 1] >> 23; + t1[45 + i - 1] &= 0x7fffff; +#ifndef WOLFSSL_SP_DIV_32 + d1 = t1[45 + i]; + d1 <<= 23; + d1 += t1[45 + i - 1]; + r1 = (sp_digit)(d1 / dv); +#else + r1 = sp_2048_div_word_45(t1[45 + i], t1[45 + i - 1], dv); +#endif + + sp_2048_mul_d_45(t2, sd, r1); + (void)sp_2048_sub_45(&t1[i], &t1[i], t2); + t1[45 + i] -= t2[45]; + t1[45 + i] += t1[45 + i - 1] >> 23; + t1[45 + i - 1] &= 0x7fffff; + r1 = (((-t1[45 + i]) << 23) - t1[45 + i - 1]) / dv; + r1 -= t1[45 + i]; + sp_2048_mul_d_45(t2, sd, r1); + (void)sp_2048_add_45(&t1[i], &t1[i], t2); + t1[45 + i] += t1[45 + i - 1] >> 23; + t1[45 + i - 1] &= 0x7fffff; + } + t1[45 - 1] += t1[45 - 2] >> 23; + t1[45 - 2] &= 0x7fffff; + r1 = t1[45 - 1] / dv; + + sp_2048_mul_d_45(t2, sd, r1); + sp_2048_sub_45(t1, t1, t2); + XMEMCPY(r, t1, sizeof(*r) * 2U * 45U); + for (i=0; i<43; i++) { + r[i+1] += r[i] >> 23; + r[i] &= 0x7fffff; + } + sp_2048_cond_add_45(r, r, sd, 0 - ((r[44] < 0) ? + (sp_digit)1 : (sp_digit)0)); + + sp_2048_norm_45(r); + sp_2048_rshift_45(r, r, 11); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_2048_mod_45(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_2048_div_45(a, m, NULL, r); +} + +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_2048_mod_exp_45(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, + const sp_digit* m, int reduceA) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* td; + sp_digit* t[3]; + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 45 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } + + if (err == MP_OKAY) { + XMEMSET(td, 0, sizeof(*td) * 3U * 45U * 2U); + + norm = t[0] = td; + t[1] = &td[45 * 2]; + t[2] = &td[2 * 45 * 2]; + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_45(norm, m); + + if (reduceA != 0) { + err = sp_2048_mod_45(t[1], a, m); + } + else { + XMEMCPY(t[1], a, sizeof(sp_digit) * 45U); + } + } + if (err == MP_OKAY) { + sp_2048_mul_45(t[1], t[1], norm); + err = sp_2048_mod_45(t[1], t[1], m); + } + + if (err == MP_OKAY) { + i = bits / 23; + c = bits % 23; + n = e[i--] << (23 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) { + break; + } + + n = e[i--]; + c = 23; + } + + y = (n >> 22) & 1; + n <<= 1; + + sp_2048_mont_mul_45(t[y^1], t[0], t[1], m, mp); + + XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), + sizeof(*t[2]) * 45 * 2); + sp_2048_mont_sqr_45(t[2], t[2], m, mp); + XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), t[2], + sizeof(*t[2]) * 45 * 2); + } + + sp_2048_mont_reduce_45(t[0], m, mp); + n = sp_2048_cmp_45(t[0], m); + sp_2048_cond_sub_45(t[0], t[0], m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, t[0], sizeof(*r) * 45 * 2); + + } + + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } + + return err; +#elif defined(WOLFSSL_SP_CACHE_RESISTANT) +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[3][90]; +#else + sp_digit* td; + sp_digit* t[3]; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 45 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + t[0] = td; + t[1] = &td[45 * 2]; + t[2] = &td[2 * 45 * 2]; +#endif + norm = t[0]; + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_45(norm, m); + + if (reduceA != 0) { + err = sp_2048_mod_45(t[1], a, m); + if (err == MP_OKAY) { + sp_2048_mul_45(t[1], t[1], norm); + err = sp_2048_mod_45(t[1], t[1], m); + } + } + else { + sp_2048_mul_45(t[1], a, norm); + err = sp_2048_mod_45(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + i = bits / 23; + c = bits % 23; + n = e[i--] << (23 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) { + break; + } + + n = e[i--]; + c = 23; + } + + y = (n >> 22) & 1; + n <<= 1; + + sp_2048_mont_mul_45(t[y^1], t[0], t[1], m, mp); + + XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), sizeof(t[2])); + sp_2048_mont_sqr_45(t[2], t[2], m, mp); + XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2])); + } + + sp_2048_mont_reduce_45(t[0], m, mp); + n = sp_2048_cmp_45(t[0], m); + sp_2048_cond_sub_45(t[0], t[0], m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, t[0], sizeof(t[0])); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][90]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit rt[90]; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 90, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) + t[i] = td + i * 90; +#endif + norm = t[0]; + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_45(norm, m); + + if (reduceA != 0) { + err = sp_2048_mod_45(t[1], a, m); + if (err == MP_OKAY) { + sp_2048_mul_45(t[1], t[1], norm); + err = sp_2048_mod_45(t[1], t[1], m); + } + } + else { + sp_2048_mul_45(t[1], a, norm); + err = sp_2048_mod_45(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_2048_mont_sqr_45(t[ 2], t[ 1], m, mp); + sp_2048_mont_mul_45(t[ 3], t[ 2], t[ 1], m, mp); + sp_2048_mont_sqr_45(t[ 4], t[ 2], m, mp); + sp_2048_mont_mul_45(t[ 5], t[ 3], t[ 2], m, mp); + sp_2048_mont_sqr_45(t[ 6], t[ 3], m, mp); + sp_2048_mont_mul_45(t[ 7], t[ 4], t[ 3], m, mp); + sp_2048_mont_sqr_45(t[ 8], t[ 4], m, mp); + sp_2048_mont_mul_45(t[ 9], t[ 5], t[ 4], m, mp); + sp_2048_mont_sqr_45(t[10], t[ 5], m, mp); + sp_2048_mont_mul_45(t[11], t[ 6], t[ 5], m, mp); + sp_2048_mont_sqr_45(t[12], t[ 6], m, mp); + sp_2048_mont_mul_45(t[13], t[ 7], t[ 6], m, mp); + sp_2048_mont_sqr_45(t[14], t[ 7], m, mp); + sp_2048_mont_mul_45(t[15], t[ 8], t[ 7], m, mp); + sp_2048_mont_sqr_45(t[16], t[ 8], m, mp); + sp_2048_mont_mul_45(t[17], t[ 9], t[ 8], m, mp); + sp_2048_mont_sqr_45(t[18], t[ 9], m, mp); + sp_2048_mont_mul_45(t[19], t[10], t[ 9], m, mp); + sp_2048_mont_sqr_45(t[20], t[10], m, mp); + sp_2048_mont_mul_45(t[21], t[11], t[10], m, mp); + sp_2048_mont_sqr_45(t[22], t[11], m, mp); + sp_2048_mont_mul_45(t[23], t[12], t[11], m, mp); + sp_2048_mont_sqr_45(t[24], t[12], m, mp); + sp_2048_mont_mul_45(t[25], t[13], t[12], m, mp); + sp_2048_mont_sqr_45(t[26], t[13], m, mp); + sp_2048_mont_mul_45(t[27], t[14], t[13], m, mp); + sp_2048_mont_sqr_45(t[28], t[14], m, mp); + sp_2048_mont_mul_45(t[29], t[15], t[14], m, mp); + sp_2048_mont_sqr_45(t[30], t[15], m, mp); + sp_2048_mont_mul_45(t[31], t[16], t[15], m, mp); + + bits = ((bits + 4) / 5) * 5; + i = ((bits + 22) / 23) - 1; + c = bits % 23; + if (c == 0) { + c = 23; + } + if (i < 45) { + n = e[i--] << (32 - c); + } + else { + n = 0; + i--; + } + if (c < 5) { + n |= e[i--] << (9 - c); + c += 23; + } + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + XMEMCPY(rt, t[y], sizeof(rt)); + for (; i>=0 || c>=5; ) { + if (c < 5) { + n |= e[i--] << (9 - c); + c += 23; + } + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + + sp_2048_mont_sqr_45(rt, rt, m, mp); + sp_2048_mont_sqr_45(rt, rt, m, mp); + sp_2048_mont_sqr_45(rt, rt, m, mp); + sp_2048_mont_sqr_45(rt, rt, m, mp); + sp_2048_mont_sqr_45(rt, rt, m, mp); + + sp_2048_mont_mul_45(rt, rt, t[y], m, mp); + } + + sp_2048_mont_reduce_45(rt, m, mp); + n = sp_2048_cmp_45(rt, m); + sp_2048_cond_sub_45(rt, rt, m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, rt, sizeof(rt)); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +#endif +} + +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 2048 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_2048_mont_norm_90(sp_digit* r, const sp_digit* m) +{ + /* Set r = 2^n - 1. */ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<89; i++) { + r[i] = 0x7fffff; + } +#else + int i; + + for (i = 0; i < 88; i += 8) { + r[i + 0] = 0x7fffff; + r[i + 1] = 0x7fffff; + r[i + 2] = 0x7fffff; + r[i + 3] = 0x7fffff; + r[i + 4] = 0x7fffff; + r[i + 5] = 0x7fffff; + r[i + 6] = 0x7fffff; + r[i + 7] = 0x7fffff; + } + r[88] = 0x7fffff; +#endif + r[89] = 0x1L; + + /* r = (2^n - 1) mod n */ + (void)sp_2048_sub_90(r, r, m); + + /* Add one so r = 2^n mod m */ + r[0] += 1; +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static sp_digit sp_2048_cmp_90(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=89; i>=0; i--) { + r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#else + int i; + + r |= (a[89] - b[89]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[88] - b[88]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + for (i = 80; i >= 0; i -= 8) { + r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#endif /* WOLFSSL_SP_SMALL */ + + return r; +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static void sp_2048_cond_sub_90(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 90; i++) { + r[i] = a[i] - (b[i] & m); + } +#else + int i; + + for (i = 0; i < 88; i += 8) { + r[i + 0] = a[i + 0] - (b[i + 0] & m); + r[i + 1] = a[i + 1] - (b[i + 1] & m); + r[i + 2] = a[i + 2] - (b[i + 2] & m); + r[i + 3] = a[i + 3] - (b[i + 3] & m); + r[i + 4] = a[i + 4] - (b[i + 4] & m); + r[i + 5] = a[i + 5] - (b[i + 5] & m); + r[i + 6] = a[i + 6] - (b[i + 6] & m); + r[i + 7] = a[i + 7] - (b[i + 7] & m); + } + r[88] = a[88] - (b[88] & m); + r[89] = a[89] - (b[89] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Mul a by scalar b and add into r. (r += a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_2048_mul_add_90(sp_digit* r, const sp_digit* a, + const sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int64_t tb = b; + int64_t t = 0; + int i; + + for (i = 0; i < 90; i++) { + t += (tb * a[i]) + r[i]; + r[i] = t & 0x7fffff; + t >>= 23; + } + r[90] += t; +#else + int64_t tb = b; + int64_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x7fffff); + for (i = 0; i < 88; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff)); + t[2] = tb * a[i+2]; + r[i+2] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff)); + t[3] = tb * a[i+3]; + r[i+3] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff)); + t[4] = tb * a[i+4]; + r[i+4] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff)); + t[5] = tb * a[i+5]; + r[i+5] += (sp_digit)((t[4] >> 23) + (t[5] & 0x7fffff)); + t[6] = tb * a[i+6]; + r[i+6] += (sp_digit)((t[5] >> 23) + (t[6] & 0x7fffff)); + t[7] = tb * a[i+7]; + r[i+7] += (sp_digit)((t[6] >> 23) + (t[7] & 0x7fffff)); + t[0] = tb * a[i+8]; + r[i+8] += (sp_digit)((t[7] >> 23) + (t[0] & 0x7fffff)); + } + t[1] = tb * a[89]; r[89] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff)); + r[90] += (sp_digit)(t[1] >> 23); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Normalize the values in each word to 23. + * + * a Array of sp_digit to normalize. + */ +static void sp_2048_norm_90(sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + for (i = 0; i < 89; i++) { + a[i+1] += a[i] >> 23; + a[i] &= 0x7fffff; + } +#else + int i; + for (i = 0; i < 88; i += 8) { + a[i+1] += a[i+0] >> 23; a[i+0] &= 0x7fffff; + a[i+2] += a[i+1] >> 23; a[i+1] &= 0x7fffff; + a[i+3] += a[i+2] >> 23; a[i+2] &= 0x7fffff; + a[i+4] += a[i+3] >> 23; a[i+3] &= 0x7fffff; + a[i+5] += a[i+4] >> 23; a[i+4] &= 0x7fffff; + a[i+6] += a[i+5] >> 23; a[i+5] &= 0x7fffff; + a[i+7] += a[i+6] >> 23; a[i+6] &= 0x7fffff; + a[i+8] += a[i+7] >> 23; a[i+7] &= 0x7fffff; + a[i+9] += a[i+8] >> 23; a[i+8] &= 0x7fffff; + } + a[88+1] += a[88] >> 23; + a[88] &= 0x7fffff; +#endif +} + +/* Shift the result in the high 2048 bits down to the bottom. + * + * r A single precision number. + * a A single precision number. + */ +static void sp_2048_mont_shift_90(sp_digit* r, const sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + int64_t n = a[89] >> 1; + n += ((int64_t)a[90]) << 22; + + for (i = 0; i < 89; i++) { + r[i] = n & 0x7fffff; + n >>= 23; + n += ((int64_t)a[91 + i]) << 22; + } + r[89] = (sp_digit)n; +#else + int i; + int64_t n = a[89] >> 1; + n += ((int64_t)a[90]) << 22; + for (i = 0; i < 88; i += 8) { + r[i + 0] = n & 0x7fffff; + n >>= 23; n += ((int64_t)a[i + 91]) << 22; + r[i + 1] = n & 0x7fffff; + n >>= 23; n += ((int64_t)a[i + 92]) << 22; + r[i + 2] = n & 0x7fffff; + n >>= 23; n += ((int64_t)a[i + 93]) << 22; + r[i + 3] = n & 0x7fffff; + n >>= 23; n += ((int64_t)a[i + 94]) << 22; + r[i + 4] = n & 0x7fffff; + n >>= 23; n += ((int64_t)a[i + 95]) << 22; + r[i + 5] = n & 0x7fffff; + n >>= 23; n += ((int64_t)a[i + 96]) << 22; + r[i + 6] = n & 0x7fffff; + n >>= 23; n += ((int64_t)a[i + 97]) << 22; + r[i + 7] = n & 0x7fffff; + n >>= 23; n += ((int64_t)a[i + 98]) << 22; + } + r[88] = n & 0x7fffff; n >>= 23; n += ((int64_t)a[179]) << 22; + r[89] = (sp_digit)n; +#endif /* WOLFSSL_SP_SMALL */ + XMEMSET(&r[90], 0, sizeof(*r) * 90U); +} + +/* Reduce the number back to 2048 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static void sp_2048_mont_reduce_90(sp_digit* a, const sp_digit* m, sp_digit mp) +{ + int i; + sp_digit mu; + + sp_2048_norm_90(a + 90); + +#ifdef WOLFSSL_SP_DH + if (mp != 1) { + for (i=0; i<89; i++) { + mu = (a[i] * mp) & 0x7fffff; + sp_2048_mul_add_90(a+i, m, mu); + a[i+1] += a[i] >> 23; + } + mu = (a[i] * mp) & 0x1L; + sp_2048_mul_add_90(a+i, m, mu); + a[i+1] += a[i] >> 23; + a[i] &= 0x7fffff; + } + else { + for (i=0; i<89; i++) { + mu = a[i] & 0x7fffff; + sp_2048_mul_add_90(a+i, m, mu); + a[i+1] += a[i] >> 23; + } + mu = a[i] & 0x1L; + sp_2048_mul_add_90(a+i, m, mu); + a[i+1] += a[i] >> 23; + a[i] &= 0x7fffff; + } +#else + for (i=0; i<89; i++) { + mu = (a[i] * mp) & 0x7fffff; + sp_2048_mul_add_90(a+i, m, mu); + a[i+1] += a[i] >> 23; + } + mu = (a[i] * mp) & 0x1L; + sp_2048_mul_add_90(a+i, m, mu); + a[i+1] += a[i] >> 23; + a[i] &= 0x7fffff; +#endif + + sp_2048_mont_shift_90(a, a); + sp_2048_cond_sub_90(a, a, m, 0 - (((a[89] >> 1) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_2048_norm_90(a); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_2048_mont_mul_90(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_2048_mul_90(r, a, b); + sp_2048_mont_reduce_90(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_2048_mont_sqr_90(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_2048_sqr_90(r, a); + sp_2048_mont_reduce_90(r, m, mp); +} + +/* Multiply a by scalar b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_2048_mul_d_180(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int64_t tb = b; + int64_t t = 0; + int i; + + for (i = 0; i < 180; i++) { + t += tb * a[i]; + r[i] = t & 0x7fffff; + t >>= 23; + } + r[180] = (sp_digit)t; +#else + int64_t tb = b; + int64_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] = t[0] & 0x7fffff; + for (i = 0; i < 176; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff); + t[2] = tb * a[i+2]; + r[i+2] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff); + t[3] = tb * a[i+3]; + r[i+3] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff); + t[4] = tb * a[i+4]; + r[i+4] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff); + t[5] = tb * a[i+5]; + r[i+5] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff); + t[6] = tb * a[i+6]; + r[i+6] = (sp_digit)(t[5] >> 23) + (t[6] & 0x7fffff); + t[7] = tb * a[i+7]; + r[i+7] = (sp_digit)(t[6] >> 23) + (t[7] & 0x7fffff); + t[0] = tb * a[i+8]; + r[i+8] = (sp_digit)(t[7] >> 23) + (t[0] & 0x7fffff); + } + t[1] = tb * a[177]; + r[177] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff); + t[2] = tb * a[178]; + r[178] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff); + t[3] = tb * a[179]; + r[179] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff); + r[180] = (sp_digit)(t[3] >> 23); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static void sp_2048_cond_add_90(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 90; i++) { + r[i] = a[i] + (b[i] & m); + } +#else + int i; + + for (i = 0; i < 88; i += 8) { + r[i + 0] = a[i + 0] + (b[i + 0] & m); + r[i + 1] = a[i + 1] + (b[i + 1] & m); + r[i + 2] = a[i + 2] + (b[i + 2] & m); + r[i + 3] = a[i + 3] + (b[i + 3] & m); + r[i + 4] = a[i + 4] + (b[i + 4] & m); + r[i + 5] = a[i + 5] + (b[i + 5] & m); + r[i + 6] = a[i + 6] + (b[i + 6] & m); + r[i + 7] = a[i + 7] + (b[i + 7] & m); + } + r[88] = a[88] + (b[88] & m); + r[89] = a[89] + (b[89] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +#ifdef WOLFSSL_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_2048_sub_90(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 90; i++) { + r[i] = a[i] - b[i]; + } + + return 0; +} + +#endif +#ifdef WOLFSSL_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_2048_add_90(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 90; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#endif +SP_NOINLINE static void sp_2048_rshift_90(sp_digit* r, sp_digit* a, byte n) +{ + int i; + +#ifdef WOLFSSL_SP_SMALL + for (i=0; i<89; i++) { + r[i] = ((a[i] >> n) | (a[i + 1] << (23 - n))) & 0x7fffff; + } +#else + for (i=0; i<88; i += 8) { + r[i+0] = ((a[i+0] >> n) | (a[i+1] << (23 - n))) & 0x7fffff; + r[i+1] = ((a[i+1] >> n) | (a[i+2] << (23 - n))) & 0x7fffff; + r[i+2] = ((a[i+2] >> n) | (a[i+3] << (23 - n))) & 0x7fffff; + r[i+3] = ((a[i+3] >> n) | (a[i+4] << (23 - n))) & 0x7fffff; + r[i+4] = ((a[i+4] >> n) | (a[i+5] << (23 - n))) & 0x7fffff; + r[i+5] = ((a[i+5] >> n) | (a[i+6] << (23 - n))) & 0x7fffff; + r[i+6] = ((a[i+6] >> n) | (a[i+7] << (23 - n))) & 0x7fffff; + r[i+7] = ((a[i+7] >> n) | (a[i+8] << (23 - n))) & 0x7fffff; + } + r[88] = ((a[88] >> n) | (a[89] << (23 - n))) & 0x7fffff; +#endif + r[89] = a[89] >> n; +} + +#ifdef WOLFSSL_SP_DIV_32 +static WC_INLINE sp_digit sp_2048_div_word_90(sp_digit d1, sp_digit d0, + sp_digit dv) +{ + sp_digit d, r, t; + + /* All 23 bits from d1 and top 8 bits from d0. */ + d = (d1 << 8) | (d0 >> 15); + r = d / dv; + d -= r * dv; + /* Up to 9 bits in r */ + /* Next 8 bits from d0. */ + r <<= 8; + d <<= 8; + d |= (d0 >> 7) & ((1 << 8) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 17 bits in r */ + /* Remaining 7 bits from d0. */ + r <<= 7; + d <<= 7; + d |= d0 & ((1 << 7) - 1); + t = d / dv; + r += t; + + return r; +} +#endif /* WOLFSSL_SP_DIV_32 */ + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_2048_div_90(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + int i; +#ifndef WOLFSSL_SP_DIV_32 + int64_t d1; +#endif + sp_digit dv, r1; +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* td; +#else + sp_digit t1d[180 + 1], t2d[90 + 1], sdd[90 + 1]; +#endif + sp_digit* t1; + sp_digit* t2; + sp_digit* sd; + int err = MP_OKAY; + + (void)m; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 90 + 3), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + (void)m; + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = td; + t2 = td + 180 + 1; + sd = t2 + 90 + 1; +#else + t1 = t1d; + t2 = t2d; + sd = sdd; +#endif + + sp_2048_mul_d_90(sd, d, 1L << 22); + sp_2048_mul_d_180(t1, a, 1L << 22); + dv = sd[89]; + for (i=90; i>=0; i--) { + t1[90 + i] += t1[90 + i - 1] >> 23; + t1[90 + i - 1] &= 0x7fffff; +#ifndef WOLFSSL_SP_DIV_32 + d1 = t1[90 + i]; + d1 <<= 23; + d1 += t1[90 + i - 1]; + r1 = (sp_digit)(d1 / dv); +#else + r1 = sp_2048_div_word_90(t1[90 + i], t1[90 + i - 1], dv); +#endif + + sp_2048_mul_d_90(t2, sd, r1); + (void)sp_2048_sub_90(&t1[i], &t1[i], t2); + t1[90 + i] -= t2[90]; + t1[90 + i] += t1[90 + i - 1] >> 23; + t1[90 + i - 1] &= 0x7fffff; + r1 = (((-t1[90 + i]) << 23) - t1[90 + i - 1]) / dv; + r1 -= t1[90 + i]; + sp_2048_mul_d_90(t2, sd, r1); + (void)sp_2048_add_90(&t1[i], &t1[i], t2); + t1[90 + i] += t1[90 + i - 1] >> 23; + t1[90 + i - 1] &= 0x7fffff; + } + t1[90 - 1] += t1[90 - 2] >> 23; + t1[90 - 2] &= 0x7fffff; + r1 = t1[90 - 1] / dv; + + sp_2048_mul_d_90(t2, sd, r1); + sp_2048_sub_90(t1, t1, t2); + XMEMCPY(r, t1, sizeof(*r) * 2U * 90U); + for (i=0; i<88; i++) { + r[i+1] += r[i] >> 23; + r[i] &= 0x7fffff; + } + sp_2048_cond_add_90(r, r, sd, 0 - ((r[89] < 0) ? + (sp_digit)1 : (sp_digit)0)); + + sp_2048_norm_90(r); + sp_2048_rshift_90(r, r, 22); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_2048_mod_90(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_2048_div_90(a, m, NULL, r); +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \ + defined(WOLFSSL_HAVE_SP_DH) +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_2048_mod_exp_90(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, + const sp_digit* m, int reduceA) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* td; + sp_digit* t[3]; + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 90 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } + + if (err == MP_OKAY) { + XMEMSET(td, 0, sizeof(*td) * 3U * 90U * 2U); + + norm = t[0] = td; + t[1] = &td[90 * 2]; + t[2] = &td[2 * 90 * 2]; + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_90(norm, m); + + if (reduceA != 0) { + err = sp_2048_mod_90(t[1], a, m); + } + else { + XMEMCPY(t[1], a, sizeof(sp_digit) * 90U); + } + } + if (err == MP_OKAY) { + sp_2048_mul_90(t[1], t[1], norm); + err = sp_2048_mod_90(t[1], t[1], m); + } + + if (err == MP_OKAY) { + i = bits / 23; + c = bits % 23; + n = e[i--] << (23 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) { + break; + } + + n = e[i--]; + c = 23; + } + + y = (n >> 22) & 1; + n <<= 1; + + sp_2048_mont_mul_90(t[y^1], t[0], t[1], m, mp); + + XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), + sizeof(*t[2]) * 90 * 2); + sp_2048_mont_sqr_90(t[2], t[2], m, mp); + XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), t[2], + sizeof(*t[2]) * 90 * 2); + } + + sp_2048_mont_reduce_90(t[0], m, mp); + n = sp_2048_cmp_90(t[0], m); + sp_2048_cond_sub_90(t[0], t[0], m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, t[0], sizeof(*r) * 90 * 2); + + } + + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } + + return err; +#elif defined(WOLFSSL_SP_CACHE_RESISTANT) +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[3][180]; +#else + sp_digit* td; + sp_digit* t[3]; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 90 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + t[0] = td; + t[1] = &td[90 * 2]; + t[2] = &td[2 * 90 * 2]; +#endif + norm = t[0]; + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_90(norm, m); + + if (reduceA != 0) { + err = sp_2048_mod_90(t[1], a, m); + if (err == MP_OKAY) { + sp_2048_mul_90(t[1], t[1], norm); + err = sp_2048_mod_90(t[1], t[1], m); + } + } + else { + sp_2048_mul_90(t[1], a, norm); + err = sp_2048_mod_90(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + i = bits / 23; + c = bits % 23; + n = e[i--] << (23 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) { + break; + } + + n = e[i--]; + c = 23; + } + + y = (n >> 22) & 1; + n <<= 1; + + sp_2048_mont_mul_90(t[y^1], t[0], t[1], m, mp); + + XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), sizeof(t[2])); + sp_2048_mont_sqr_90(t[2], t[2], m, mp); + XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2])); + } + + sp_2048_mont_reduce_90(t[0], m, mp); + n = sp_2048_cmp_90(t[0], m); + sp_2048_cond_sub_90(t[0], t[0], m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, t[0], sizeof(t[0])); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][180]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit rt[180]; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 180, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) + t[i] = td + i * 180; +#endif + norm = t[0]; + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_90(norm, m); + + if (reduceA != 0) { + err = sp_2048_mod_90(t[1], a, m); + if (err == MP_OKAY) { + sp_2048_mul_90(t[1], t[1], norm); + err = sp_2048_mod_90(t[1], t[1], m); + } + } + else { + sp_2048_mul_90(t[1], a, norm); + err = sp_2048_mod_90(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_2048_mont_sqr_90(t[ 2], t[ 1], m, mp); + sp_2048_mont_mul_90(t[ 3], t[ 2], t[ 1], m, mp); + sp_2048_mont_sqr_90(t[ 4], t[ 2], m, mp); + sp_2048_mont_mul_90(t[ 5], t[ 3], t[ 2], m, mp); + sp_2048_mont_sqr_90(t[ 6], t[ 3], m, mp); + sp_2048_mont_mul_90(t[ 7], t[ 4], t[ 3], m, mp); + sp_2048_mont_sqr_90(t[ 8], t[ 4], m, mp); + sp_2048_mont_mul_90(t[ 9], t[ 5], t[ 4], m, mp); + sp_2048_mont_sqr_90(t[10], t[ 5], m, mp); + sp_2048_mont_mul_90(t[11], t[ 6], t[ 5], m, mp); + sp_2048_mont_sqr_90(t[12], t[ 6], m, mp); + sp_2048_mont_mul_90(t[13], t[ 7], t[ 6], m, mp); + sp_2048_mont_sqr_90(t[14], t[ 7], m, mp); + sp_2048_mont_mul_90(t[15], t[ 8], t[ 7], m, mp); + sp_2048_mont_sqr_90(t[16], t[ 8], m, mp); + sp_2048_mont_mul_90(t[17], t[ 9], t[ 8], m, mp); + sp_2048_mont_sqr_90(t[18], t[ 9], m, mp); + sp_2048_mont_mul_90(t[19], t[10], t[ 9], m, mp); + sp_2048_mont_sqr_90(t[20], t[10], m, mp); + sp_2048_mont_mul_90(t[21], t[11], t[10], m, mp); + sp_2048_mont_sqr_90(t[22], t[11], m, mp); + sp_2048_mont_mul_90(t[23], t[12], t[11], m, mp); + sp_2048_mont_sqr_90(t[24], t[12], m, mp); + sp_2048_mont_mul_90(t[25], t[13], t[12], m, mp); + sp_2048_mont_sqr_90(t[26], t[13], m, mp); + sp_2048_mont_mul_90(t[27], t[14], t[13], m, mp); + sp_2048_mont_sqr_90(t[28], t[14], m, mp); + sp_2048_mont_mul_90(t[29], t[15], t[14], m, mp); + sp_2048_mont_sqr_90(t[30], t[15], m, mp); + sp_2048_mont_mul_90(t[31], t[16], t[15], m, mp); + + bits = ((bits + 4) / 5) * 5; + i = ((bits + 22) / 23) - 1; + c = bits % 23; + if (c == 0) { + c = 23; + } + if (i < 90) { + n = e[i--] << (32 - c); + } + else { + n = 0; + i--; + } + if (c < 5) { + n |= e[i--] << (9 - c); + c += 23; + } + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + XMEMCPY(rt, t[y], sizeof(rt)); + for (; i>=0 || c>=5; ) { + if (c < 5) { + n |= e[i--] << (9 - c); + c += 23; + } + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + + sp_2048_mont_sqr_90(rt, rt, m, mp); + sp_2048_mont_sqr_90(rt, rt, m, mp); + sp_2048_mont_sqr_90(rt, rt, m, mp); + sp_2048_mont_sqr_90(rt, rt, m, mp); + sp_2048_mont_sqr_90(rt, rt, m, mp); + + sp_2048_mont_mul_90(rt, rt, t[y], m, mp); + } + + sp_2048_mont_reduce_90(rt, m, mp); + n = sp_2048_cmp_90(rt, m); + sp_2048_cond_sub_90(rt, rt, m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, rt, sizeof(rt)); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +#endif +} +#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || */ + /* WOLFSSL_HAVE_SP_DH */ + +#ifdef WOLFSSL_HAVE_SP_RSA +/* RSA public key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * em Public exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 256 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPublic_2048(const byte* in, word32 inLen, mp_int* em, mp_int* mm, + byte* out, word32* outLen) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* d = NULL; + sp_digit* a; + sp_digit* m; + sp_digit* r; + sp_digit* norm; + sp_digit e[1] = {0}; + sp_digit mp; + int i; + int err = MP_OKAY; + + if (*outLen < 256U) { + err = MP_TO_E; + } + + if (err == MP_OKAY) { + if (mp_count_bits(em) > 23) { + err = MP_READ_E; + } + if (inLen > 256U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 2048) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 90 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + a = d; + r = a + 90 * 2; + m = r + 90 * 2; + norm = r; + + sp_2048_from_bin(a, 90, in, inLen); +#if DIGIT_BIT >= 23 + e[0] = (sp_digit)em->dp[0]; +#else + e[0] = (sp_digit)em->dp[0]; + if (em->used > 1) { + e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + } +#endif + if (e[0] == 0) { + err = MP_EXPTMOD_E; + } + } + + if (err == MP_OKAY) { + sp_2048_from_mp(m, 90, mm); + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_90(norm, m); + } + if (err == MP_OKAY) { + sp_2048_mul_90(a, a, norm); + err = sp_2048_mod_90(a, a, m); + } + if (err == MP_OKAY) { + for (i=22; i>=0; i--) { + if ((e[0] >> i) != 0) { + break; + } + } + + XMEMCPY(r, a, sizeof(sp_digit) * 90 * 2); + for (i--; i>=0; i--) { + sp_2048_mont_sqr_90(r, r, m, mp); + + if (((e[0] >> i) & 1) == 1) { + sp_2048_mont_mul_90(r, r, a, m, mp); + } + } + sp_2048_mont_reduce_90(r, m, mp); + mp = sp_2048_cmp_90(r, m); + sp_2048_cond_sub_90(r, r, m, ((mp < 0) ? + (sp_digit)1 : (sp_digit)0)- 1); + + sp_2048_to_bin(r, out); + *outLen = 256; + } + + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit ad[180], md[90], rd[180]; +#else + sp_digit* d = NULL; +#endif + sp_digit* a; + sp_digit* m; + sp_digit* r; + sp_digit e[1] = {0}; + int err = MP_OKAY; + + if (*outLen < 256U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(em) > 23) { + err = MP_READ_E; + } + if (inLen > 256U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 2048) { + err = MP_READ_E; + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 90 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) { + err = MEMORY_E; + } + } + + if (err == MP_OKAY) { + a = d; + r = a + 90 * 2; + m = r + 90 * 2; + } +#else + a = ad; + m = md; + r = rd; +#endif + + if (err == MP_OKAY) { + sp_2048_from_bin(a, 90, in, inLen); +#if DIGIT_BIT >= 23 + e[0] = (sp_digit)em->dp[0]; +#else + e[0] = (sp_digit)em->dp[0]; + if (em->used > 1) { + e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + } +#endif + if (e[0] == 0) { + err = MP_EXPTMOD_E; + } + } + if (err == MP_OKAY) { + sp_2048_from_mp(m, 90, mm); + + if (e[0] == 0x3) { + sp_2048_sqr_90(r, a); + err = sp_2048_mod_90(r, r, m); + if (err == MP_OKAY) { + sp_2048_mul_90(r, a, r); + err = sp_2048_mod_90(r, r, m); + } + } + else { + sp_digit* norm = r; + int i; + sp_digit mp; + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_90(norm, m); + + sp_2048_mul_90(a, a, norm); + err = sp_2048_mod_90(a, a, m); + + if (err == MP_OKAY) { + for (i=22; i>=0; i--) { + if ((e[0] >> i) != 0) { + break; + } + } + + XMEMCPY(r, a, sizeof(sp_digit) * 180U); + for (i--; i>=0; i--) { + sp_2048_mont_sqr_90(r, r, m, mp); + + if (((e[0] >> i) & 1) == 1) { + sp_2048_mont_mul_90(r, r, a, m, mp); + } + } + sp_2048_mont_reduce_90(r, m, mp); + mp = sp_2048_cmp_90(r, m); + sp_2048_cond_sub_90(r, r, m, ((mp < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + } + } + } + + if (err == MP_OKAY) { + sp_2048_to_bin(r, out); + *outLen = 256; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } +#endif + + return err; +#endif /* WOLFSSL_SP_SMALL */ +} + +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +#if !defined(SP_RSA_PRIVATE_EXP_D) && !defined(RSA_LOW_MEM) +#endif /* !SP_RSA_PRIVATE_EXP_D && !RSA_LOW_MEM */ +/* RSA private key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * pm First prime. + * qm Second prime. + * dpm First prime's CRT exponent. + * dqm Second prime's CRT exponent. + * qim Inverse of second prime mod p. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 256 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, + mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm, + byte* out, word32* outLen) +{ +#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* a; + sp_digit* d = NULL; + sp_digit* m; + sp_digit* r; + int err = MP_OKAY; + + (void)pm; + (void)qm; + (void)dpm; + (void)dqm; + (void)qim; + + if (*outLen < 256U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(dm) > 2048) { + err = MP_READ_E; + } + if (inLen > 256) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 2048) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 90 * 4, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) { + err = MEMORY_E; + } + } + if (err == MP_OKAY) { + a = d + 90; + m = a + 180; + r = a; + + sp_2048_from_bin(a, 90, in, inLen); + sp_2048_from_mp(d, 90, dm); + sp_2048_from_mp(m, 90, mm); + err = sp_2048_mod_exp_90(r, a, d, 2048, m, 0); + } + if (err == MP_OKAY) { + sp_2048_to_bin(r, out); + *outLen = 256; + } + + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 90); + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else + sp_digit a[180], d[90], m[90]; + sp_digit* r = a; + int err = MP_OKAY; + + (void)pm; + (void)qm; + (void)dpm; + (void)dqm; + (void)qim; + + if (*outLen < 256U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(dm) > 2048) { + err = MP_READ_E; + } + if (inLen > 256U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 2048) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_2048_from_bin(a, 90, in, inLen); + sp_2048_from_mp(d, 90, dm); + sp_2048_from_mp(m, 90, mm); + err = sp_2048_mod_exp_90(r, a, d, 2048, m, 0); + } + + if (err == MP_OKAY) { + sp_2048_to_bin(r, out); + *outLen = 256; + } + + XMEMSET(d, 0, sizeof(sp_digit) * 90); + + return err; +#endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */ +#else +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* t = NULL; + sp_digit* a; + sp_digit* p; + sp_digit* q; + sp_digit* dp; + sp_digit* dq; + sp_digit* qi; + sp_digit* tmpa; + sp_digit* tmpb; + sp_digit* r; + int err = MP_OKAY; + + (void)dm; + (void)mm; + + if (*outLen < 256U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (inLen > 256) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 2048) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 45 * 11, NULL, + DYNAMIC_TYPE_RSA); + if (t == NULL) { + err = MEMORY_E; + } + } + if (err == MP_OKAY) { + a = t; + p = a + 90 * 2; + q = p + 45; + qi = dq = dp = q + 45; + tmpa = qi + 45; + tmpb = tmpa + 90; + + r = t + 90; + + sp_2048_from_bin(a, 90, in, inLen); + sp_2048_from_mp(p, 45, pm); + sp_2048_from_mp(q, 45, qm); + sp_2048_from_mp(dp, 45, dpm); + err = sp_2048_mod_exp_45(tmpa, a, dp, 1024, p, 1); + } + if (err == MP_OKAY) { + sp_2048_from_mp(dq, 45, dqm); + err = sp_2048_mod_exp_45(tmpb, a, dq, 1024, q, 1); + } + if (err == MP_OKAY) { + (void)sp_2048_sub_45(tmpa, tmpa, tmpb); + sp_2048_cond_add_45(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[44] >> 31)); + sp_2048_cond_add_45(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[44] >> 31)); + + sp_2048_from_mp(qi, 45, qim); + sp_2048_mul_45(tmpa, tmpa, qi); + err = sp_2048_mod_45(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { + sp_2048_mul_45(tmpa, q, tmpa); + (void)sp_2048_add_90(r, tmpb, tmpa); + sp_2048_norm_90(r); + + sp_2048_to_bin(r, out); + *outLen = 256; + } + + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_digit) * 45 * 11); + XFREE(t, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else + sp_digit a[90 * 2]; + sp_digit p[45], q[45], dp[45], dq[45], qi[45]; + sp_digit tmpa[90], tmpb[90]; + sp_digit* r = a; + int err = MP_OKAY; + + (void)dm; + (void)mm; + + if (*outLen < 256U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (inLen > 256U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 2048) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_2048_from_bin(a, 90, in, inLen); + sp_2048_from_mp(p, 45, pm); + sp_2048_from_mp(q, 45, qm); + sp_2048_from_mp(dp, 45, dpm); + sp_2048_from_mp(dq, 45, dqm); + sp_2048_from_mp(qi, 45, qim); + + err = sp_2048_mod_exp_45(tmpa, a, dp, 1024, p, 1); + } + if (err == MP_OKAY) { + err = sp_2048_mod_exp_45(tmpb, a, dq, 1024, q, 1); + } + + if (err == MP_OKAY) { + (void)sp_2048_sub_45(tmpa, tmpa, tmpb); + sp_2048_cond_add_45(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[44] >> 31)); + sp_2048_cond_add_45(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[44] >> 31)); + sp_2048_mul_45(tmpa, tmpa, qi); + err = sp_2048_mod_45(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { + sp_2048_mul_45(tmpa, tmpa, q); + (void)sp_2048_add_90(r, tmpb, tmpa); + sp_2048_norm_90(r); + + sp_2048_to_bin(r, out); + *outLen = 256; + } + + XMEMSET(tmpa, 0, sizeof(tmpa)); + XMEMSET(tmpb, 0, sizeof(tmpb)); + XMEMSET(p, 0, sizeof(p)); + XMEMSET(q, 0, sizeof(q)); + XMEMSET(dp, 0, sizeof(dp)); + XMEMSET(dq, 0, sizeof(dq)); + XMEMSET(qi, 0, sizeof(qi)); + + return err; +#endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */ +#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */ +} + +#endif /* !WOLFSSL_RSA_PUBLIC_ONLY */ +#endif /* WOLFSSL_HAVE_SP_RSA */ +#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY)) +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_2048_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (2048 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 23 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 90); + r->used = 90; + mp_clamp(r); +#elif DIGIT_BIT < 23 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 90; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 23) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 23 - s; + } + r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 90; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 23 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 23 - s; + } + else { + s += 23; + } + } + r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ +#ifdef WOLFSSL_SP_SMALL + int err = MP_OKAY; + sp_digit* d = NULL; + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 2048) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 2048) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 2048) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 90 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) { + err = MEMORY_E; + } + } + + if (err == MP_OKAY) { + b = d; + e = b + 90 * 2; + m = e + 90; + r = b; + + sp_2048_from_mp(b, 90, base); + sp_2048_from_mp(e, 90, exp); + sp_2048_from_mp(m, 90, mod); + + err = sp_2048_mod_exp_90(r, b, e, mp_count_bits(exp), m, 0); + } + + if (err == MP_OKAY) { + err = sp_2048_to_mp(r, res); + } + + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 90U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit bd[180], ed[90], md[90]; +#else + sp_digit* d = NULL; +#endif + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + int err = MP_OKAY; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 2048) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 2048) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 2048) { + err = MP_READ_E; + } + } + +#ifdef WOLFSSL_SMALL_STACK + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 90 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + b = d; + e = b + 90 * 2; + m = e + 90; + r = b; + } +#else + r = b = bd; + e = ed; + m = md; +#endif + + if (err == MP_OKAY) { + sp_2048_from_mp(b, 90, base); + sp_2048_from_mp(e, 90, exp); + sp_2048_from_mp(m, 90, mod); + + err = sp_2048_mod_exp_90(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + err = sp_2048_to_mp(r, res); + } + + +#ifdef WOLFSSL_SMALL_STACK + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 90U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } +#else + XMEMSET(e, 0, sizeof(sp_digit) * 90U); +#endif + + return err; +#endif +} + +#ifdef WOLFSSL_HAVE_SP_DH + +#ifdef HAVE_FFDHE_2048 +SP_NOINLINE static void sp_2048_lshift_90(sp_digit* r, sp_digit* a, byte n) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + r[90] = a[89] >> (23 - n); + for (i=89; i>0; i--) { + r[i] = ((a[i] << n) | (a[i-1] >> (23 - n))) & 0x7fffff; + } +#else + sp_int_digit s, t; + + s = (sp_int_digit)a[89]; + r[90] = s >> (23U - n); + s = (sp_int_digit)(a[89]); t = (sp_int_digit)(a[88]); + r[89] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[88]); t = (sp_int_digit)(a[87]); + r[88] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[87]); t = (sp_int_digit)(a[86]); + r[87] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[86]); t = (sp_int_digit)(a[85]); + r[86] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[85]); t = (sp_int_digit)(a[84]); + r[85] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[84]); t = (sp_int_digit)(a[83]); + r[84] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[83]); t = (sp_int_digit)(a[82]); + r[83] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[82]); t = (sp_int_digit)(a[81]); + r[82] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[81]); t = (sp_int_digit)(a[80]); + r[81] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[80]); t = (sp_int_digit)(a[79]); + r[80] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[79]); t = (sp_int_digit)(a[78]); + r[79] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[78]); t = (sp_int_digit)(a[77]); + r[78] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[77]); t = (sp_int_digit)(a[76]); + r[77] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[76]); t = (sp_int_digit)(a[75]); + r[76] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[75]); t = (sp_int_digit)(a[74]); + r[75] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[74]); t = (sp_int_digit)(a[73]); + r[74] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[73]); t = (sp_int_digit)(a[72]); + r[73] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[72]); t = (sp_int_digit)(a[71]); + r[72] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[71]); t = (sp_int_digit)(a[70]); + r[71] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[70]); t = (sp_int_digit)(a[69]); + r[70] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[69]); t = (sp_int_digit)(a[68]); + r[69] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[68]); t = (sp_int_digit)(a[67]); + r[68] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[67]); t = (sp_int_digit)(a[66]); + r[67] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[66]); t = (sp_int_digit)(a[65]); + r[66] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[65]); t = (sp_int_digit)(a[64]); + r[65] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[64]); t = (sp_int_digit)(a[63]); + r[64] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[63]); t = (sp_int_digit)(a[62]); + r[63] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[62]); t = (sp_int_digit)(a[61]); + r[62] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[61]); t = (sp_int_digit)(a[60]); + r[61] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[60]); t = (sp_int_digit)(a[59]); + r[60] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[59]); t = (sp_int_digit)(a[58]); + r[59] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[58]); t = (sp_int_digit)(a[57]); + r[58] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[57]); t = (sp_int_digit)(a[56]); + r[57] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[56]); t = (sp_int_digit)(a[55]); + r[56] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[55]); t = (sp_int_digit)(a[54]); + r[55] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[54]); t = (sp_int_digit)(a[53]); + r[54] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[53]); t = (sp_int_digit)(a[52]); + r[53] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[52]); t = (sp_int_digit)(a[51]); + r[52] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[51]); t = (sp_int_digit)(a[50]); + r[51] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[50]); t = (sp_int_digit)(a[49]); + r[50] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[49]); t = (sp_int_digit)(a[48]); + r[49] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[48]); t = (sp_int_digit)(a[47]); + r[48] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[47]); t = (sp_int_digit)(a[46]); + r[47] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[46]); t = (sp_int_digit)(a[45]); + r[46] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[45]); t = (sp_int_digit)(a[44]); + r[45] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[44]); t = (sp_int_digit)(a[43]); + r[44] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[43]); t = (sp_int_digit)(a[42]); + r[43] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[42]); t = (sp_int_digit)(a[41]); + r[42] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[41]); t = (sp_int_digit)(a[40]); + r[41] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[40]); t = (sp_int_digit)(a[39]); + r[40] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[39]); t = (sp_int_digit)(a[38]); + r[39] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[38]); t = (sp_int_digit)(a[37]); + r[38] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[37]); t = (sp_int_digit)(a[36]); + r[37] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[36]); t = (sp_int_digit)(a[35]); + r[36] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[35]); t = (sp_int_digit)(a[34]); + r[35] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[34]); t = (sp_int_digit)(a[33]); + r[34] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[33]); t = (sp_int_digit)(a[32]); + r[33] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[32]); t = (sp_int_digit)(a[31]); + r[32] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[31]); t = (sp_int_digit)(a[30]); + r[31] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[30]); t = (sp_int_digit)(a[29]); + r[30] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[29]); t = (sp_int_digit)(a[28]); + r[29] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[28]); t = (sp_int_digit)(a[27]); + r[28] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[27]); t = (sp_int_digit)(a[26]); + r[27] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[26]); t = (sp_int_digit)(a[25]); + r[26] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[25]); t = (sp_int_digit)(a[24]); + r[25] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[24]); t = (sp_int_digit)(a[23]); + r[24] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[23]); t = (sp_int_digit)(a[22]); + r[23] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[22]); t = (sp_int_digit)(a[21]); + r[22] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[21]); t = (sp_int_digit)(a[20]); + r[21] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[20]); t = (sp_int_digit)(a[19]); + r[20] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[19]); t = (sp_int_digit)(a[18]); + r[19] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[18]); t = (sp_int_digit)(a[17]); + r[18] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[17]); t = (sp_int_digit)(a[16]); + r[17] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[16]); t = (sp_int_digit)(a[15]); + r[16] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[15]); t = (sp_int_digit)(a[14]); + r[15] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[14]); t = (sp_int_digit)(a[13]); + r[14] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[13]); t = (sp_int_digit)(a[12]); + r[13] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[12]); t = (sp_int_digit)(a[11]); + r[12] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[11]); t = (sp_int_digit)(a[10]); + r[11] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[10]); t = (sp_int_digit)(a[9]); + r[10] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[9]); t = (sp_int_digit)(a[8]); + r[9] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[8]); t = (sp_int_digit)(a[7]); + r[8] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[7]); t = (sp_int_digit)(a[6]); + r[7] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[6]); t = (sp_int_digit)(a[5]); + r[6] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[5]); t = (sp_int_digit)(a[4]); + r[5] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[4]); t = (sp_int_digit)(a[3]); + r[4] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[3]); t = (sp_int_digit)(a[2]); + r[3] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[2]); t = (sp_int_digit)(a[1]); + r[2] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[1]); t = (sp_int_digit)(a[0]); + r[1] = ((s << n) | (t >> (23U - n))) & 0x7fffff; +#endif + r[0] = (a[0] << n) & 0x7fffff; +} + +/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m) + * + * r A single precision number that is the result of the operation. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_2048_mod_exp_2_90(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit nd[180]; + sp_digit td[91]; +#else + sp_digit* td; +#endif + sp_digit* norm; + sp_digit* tmp; + sp_digit mp = 1; + sp_digit n, o; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 271, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + norm = td; + tmp = td + 180; + XMEMSET(td, 0, sizeof(sp_digit) * 271); +#else + norm = nd; + tmp = td; + XMEMSET(td, 0, sizeof(td)); +#endif + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_90(norm, m); + + bits = ((bits + 3) / 4) * 4; + i = ((bits + 22) / 23) - 1; + c = bits % 23; + if (c == 0) { + c = 23; + } + if (i < 90) { + n = e[i--] << (32 - c); + } + else { + n = 0; + i--; + } + if (c < 4) { + n |= e[i--] << (9 - c); + c += 23; + } + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + sp_2048_lshift_90(r, norm, y); + for (; i>=0 || c>=4; ) { + if (c < 4) { + n |= e[i--] << (9 - c); + c += 23; + } + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + + sp_2048_mont_sqr_90(r, r, m, mp); + sp_2048_mont_sqr_90(r, r, m, mp); + sp_2048_mont_sqr_90(r, r, m, mp); + sp_2048_mont_sqr_90(r, r, m, mp); + + sp_2048_lshift_90(r, r, y); + sp_2048_mul_d_90(tmp, norm, (r[90] << 22) + (r[89] >> 1)); + r[90] = 0; + r[89] &= 0x1L; + (void)sp_2048_add_90(r, r, tmp); + sp_2048_norm_90(r); + o = sp_2048_cmp_90(r, m); + sp_2048_cond_sub_90(r, r, m, ((o < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + } + + sp_2048_mont_reduce_90(r, m, mp); + n = sp_2048_cmp_90(r, m); + sp_2048_cond_sub_90(r, r, m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} + +#endif /* HAVE_FFDHE_2048 */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. + * exp Array of bytes that is the exponent. + * expLen Length of data, in bytes, in exponent. + * mod Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 256 bytes long. + * outLen Length, in bytes, of exponentiation result. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen, + mp_int* mod, byte* out, word32* outLen) +{ +#ifdef WOLFSSL_SP_SMALL + int err = MP_OKAY; + sp_digit* d = NULL; + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + word32 i; + + if (mp_count_bits(base) > 2048) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expLen > 256) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 2048) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 90 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) { + err = MEMORY_E; + } + } + + if (err == MP_OKAY) { + b = d; + e = b + 90 * 2; + m = e + 90; + r = b; + + sp_2048_from_mp(b, 90, base); + sp_2048_from_bin(e, 90, exp, expLen); + sp_2048_from_mp(m, 90, mod); + + #ifdef HAVE_FFDHE_2048 + if (base->used == 1 && base->dp[0] == 2 && + ((m[89] << 15) | (m[88] >> 8)) == 0xffffL) { + err = sp_2048_mod_exp_2_90(r, e, expLen * 8, m); + } + else + #endif + err = sp_2048_mod_exp_90(r, b, e, expLen * 8, m, 0); + } + + if (err == MP_OKAY) { + sp_2048_to_bin(r, out); + *outLen = 256; + for (i=0; i<256 && out[i] == 0; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + } + + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 90U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit bd[180], ed[90], md[90]; +#else + sp_digit* d = NULL; +#endif + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + word32 i; + int err = MP_OKAY; + + if (mp_count_bits(base) > 2048) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expLen > 256U) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 2048) { + err = MP_READ_E; + } + } +#ifdef WOLFSSL_SMALL_STACK + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 90 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + b = d; + e = b + 90 * 2; + m = e + 90; + r = b; + } +#else + r = b = bd; + e = ed; + m = md; +#endif + + if (err == MP_OKAY) { + sp_2048_from_mp(b, 90, base); + sp_2048_from_bin(e, 90, exp, expLen); + sp_2048_from_mp(m, 90, mod); + + #ifdef HAVE_FFDHE_2048 + if (base->used == 1 && base->dp[0] == 2U && + ((m[89] << 15) | (m[88] >> 8)) == 0xffffL) { + err = sp_2048_mod_exp_2_90(r, e, expLen * 8U, m); + } + else { + #endif + err = sp_2048_mod_exp_90(r, b, e, expLen * 8U, m, 0); + #ifdef HAVE_FFDHE_2048 + } + #endif + } + + if (err == MP_OKAY) { + sp_2048_to_bin(r, out); + *outLen = 256; + for (i=0; i<256U && out[i] == 0U; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + } + +#ifdef WOLFSSL_SMALL_STACK + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 90U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } +#else + XMEMSET(e, 0, sizeof(sp_digit) * 90U); +#endif + + return err; +#endif +} +#endif /* WOLFSSL_HAVE_SP_DH */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ +#ifdef WOLFSSL_SP_SMALL + int err = MP_OKAY; + sp_digit* d = NULL; + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 1024) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 1024) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 1024) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 45 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) { + err = MEMORY_E; + } + } + + if (err == MP_OKAY) { + b = d; + e = b + 45 * 2; + m = e + 45; + r = b; + + sp_2048_from_mp(b, 45, base); + sp_2048_from_mp(e, 45, exp); + sp_2048_from_mp(m, 45, mod); + + err = sp_2048_mod_exp_45(r, b, e, mp_count_bits(exp), m, 0); + } + + if (err == MP_OKAY) { + XMEMSET(r + 45, 0, sizeof(*r) * 45U); + err = sp_2048_to_mp(r, res); + } + + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 45U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit bd[90], ed[45], md[45]; +#else + sp_digit* d = NULL; +#endif + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + int err = MP_OKAY; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 1024) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 1024) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 1024) { + err = MP_READ_E; + } + } + +#ifdef WOLFSSL_SMALL_STACK + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 45 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + b = d; + e = b + 45 * 2; + m = e + 45; + r = b; + } +#else + r = b = bd; + e = ed; + m = md; +#endif + + if (err == MP_OKAY) { + sp_2048_from_mp(b, 45, base); + sp_2048_from_mp(e, 45, exp); + sp_2048_from_mp(m, 45, mod); + + err = sp_2048_mod_exp_45(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + XMEMSET(r + 45, 0, sizeof(*r) * 45U); + err = sp_2048_to_mp(r, res); + } + + +#ifdef WOLFSSL_SMALL_STACK + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 45U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } +#else + XMEMSET(e, 0, sizeof(sp_digit) * 45U); +#endif + + return err; +#endif +} + +#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */ + +#endif /* !WOLFSSL_SP_NO_2048 */ + +#ifndef WOLFSSL_SP_NO_3072 +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((sp_digit)a[i]) << s); + if (s >= 15U) { + r[j] &= 0x7fffff; + s = 23U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (sp_digit)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 23 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 23 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0x7fffff; + s = 23U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 23U) <= (word32)DIGIT_BIT) { + s += 23U; + r[j] &= 0x7fffff; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 23) { + r[j] &= 0x7fffff; + if (j + 1 >= size) { + break; + } + s = 23 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 384 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_3072_to_bin(sp_digit* r, byte* a) +{ + int i, j, s = 0, b; + + for (i=0; i<133; i++) { + r[i+1] += r[i] >> 23; + r[i] &= 0x7fffff; + } + j = 3072 / 8 - 1; + a[j] = 0; + for (i=0; i<134 && j>=0; i++) { + b = 0; + /* lint allow cast of mismatch sp_digit and int */ + a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ + b += 8 - s; + if (j < 0) { + break; + } + while (b < 23) { + a[j--] = (byte)(r[i] >> b); + b += 8; + if (j < 0) { + break; + } + } + s = 8 - (b - 23); + if (j >= 0) { + a[j] = 0; + } + if (s != 0) { + j++; + } + } +} + +#ifndef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_3072_mul_67(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i, j; + int64_t t[134]; + + XMEMSET(t, 0, sizeof(t)); + for (i=0; i<67; i++) { + for (j=0; j<67; j++) { + t[i+j] += ((int64_t)a[i]) * b[j]; + } + } + for (i=0; i<133; i++) { + r[i] = t[i] & 0x7fffff; + t[i+1] += t[i] >> 23; + } + r[133] = (sp_digit)t[133]; +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_3072_sqr_67(sp_digit* r, const sp_digit* a) +{ + int i, j; + int64_t t[134]; + + XMEMSET(t, 0, sizeof(t)); + for (i=0; i<67; i++) { + for (j=0; j> 23; + } + r[133] = (sp_digit)t[133]; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_3072_add_67(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 64; i += 8) { + r[i + 0] = a[i + 0] + b[i + 0]; + r[i + 1] = a[i + 1] + b[i + 1]; + r[i + 2] = a[i + 2] + b[i + 2]; + r[i + 3] = a[i + 3] + b[i + 3]; + r[i + 4] = a[i + 4] + b[i + 4]; + r[i + 5] = a[i + 5] + b[i + 5]; + r[i + 6] = a[i + 6] + b[i + 6]; + r[i + 7] = a[i + 7] + b[i + 7]; + } + r[64] = a[64] + b[64]; + r[65] = a[65] + b[65]; + r[66] = a[66] + b[66]; + + return 0; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_3072_add_134(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 128; i += 8) { + r[i + 0] = a[i + 0] + b[i + 0]; + r[i + 1] = a[i + 1] + b[i + 1]; + r[i + 2] = a[i + 2] + b[i + 2]; + r[i + 3] = a[i + 3] + b[i + 3]; + r[i + 4] = a[i + 4] + b[i + 4]; + r[i + 5] = a[i + 5] + b[i + 5]; + r[i + 6] = a[i + 6] + b[i + 6]; + r[i + 7] = a[i + 7] + b[i + 7]; + } + r[128] = a[128] + b[128]; + r[129] = a[129] + b[129]; + r[130] = a[130] + b[130]; + r[131] = a[131] + b[131]; + r[132] = a[132] + b[132]; + r[133] = a[133] + b[133]; + + return 0; +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_3072_sub_134(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 128; i += 8) { + r[i + 0] = a[i + 0] - b[i + 0]; + r[i + 1] = a[i + 1] - b[i + 1]; + r[i + 2] = a[i + 2] - b[i + 2]; + r[i + 3] = a[i + 3] - b[i + 3]; + r[i + 4] = a[i + 4] - b[i + 4]; + r[i + 5] = a[i + 5] - b[i + 5]; + r[i + 6] = a[i + 6] - b[i + 6]; + r[i + 7] = a[i + 7] - b[i + 7]; + } + r[128] = a[128] - b[128]; + r[129] = a[129] - b[129]; + r[130] = a[130] - b[130]; + r[131] = a[131] - b[131]; + r[132] = a[132] - b[132]; + r[133] = a[133] - b[133]; + + return 0; +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_3072_mul_134(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[134]; + sp_digit* a1 = z1; + sp_digit b1[67]; + sp_digit* z2 = r + 134; + (void)sp_3072_add_67(a1, a, &a[67]); + (void)sp_3072_add_67(b1, b, &b[67]); + sp_3072_mul_67(z2, &a[67], &b[67]); + sp_3072_mul_67(z0, a, b); + sp_3072_mul_67(z1, a1, b1); + (void)sp_3072_sub_134(z1, z1, z2); + (void)sp_3072_sub_134(z1, z1, z0); + (void)sp_3072_add_134(r + 67, r + 67, z1); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_3072_sqr_134(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z1[134]; + sp_digit* a1 = z1; + sp_digit* z2 = r + 134; + (void)sp_3072_add_67(a1, a, &a[67]); + sp_3072_sqr_67(z2, &a[67]); + sp_3072_sqr_67(z0, a); + sp_3072_sqr_67(z1, a1); + (void)sp_3072_sub_134(z1, z1, z2); + (void)sp_3072_sub_134(z1, z1, z0); + (void)sp_3072_add_134(r + 67, r + 67, z1); +} + +#endif /* !WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_3072_add_134(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 134; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_3072_sub_134(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 134; i++) { + r[i] = a[i] - b[i]; + } + + return 0; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_3072_mul_134(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i, j, k; + int64_t c; + + c = ((int64_t)a[133]) * b[133]; + r[267] = (sp_digit)(c >> 23); + c = (c & 0x7fffff) << 23; + for (k = 265; k >= 0; k--) { + for (i = 133; i >= 0; i--) { + j = k - i; + if (j >= 134) { + break; + } + if (j < 0) { + continue; + } + + c += ((int64_t)a[i]) * b[j]; + } + r[k + 2] += c >> 46; + r[k + 1] = (c >> 23) & 0x7fffff; + c = (c & 0x7fffff) << 23; + } + r[0] = (sp_digit)(c >> 23); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_3072_sqr_134(sp_digit* r, const sp_digit* a) +{ + int i, j, k; + int64_t c; + + c = ((int64_t)a[133]) * a[133]; + r[267] = (sp_digit)(c >> 23); + c = (c & 0x7fffff) << 23; + for (k = 265; k >= 0; k--) { + for (i = 133; i >= 0; i--) { + j = k - i; + if (j >= 134 || i <= j) { + break; + } + if (j < 0) { + continue; + } + + c += ((int64_t)a[i]) * a[j] * 2; + } + if (i == j) { + c += ((int64_t)a[i]) * a[i]; + } + + r[k + 2] += c >> 46; + r[k + 1] = (c >> 23) & 0x7fffff; + c = (c & 0x7fffff) << 23; + } + r[0] = (sp_digit)(c >> 23); +} + +#endif /* WOLFSSL_SP_SMALL */ +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_3072_add_67(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 67; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_3072_sub_67(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 67; i++) { + r[i] = a[i] - b[i]; + } + + return 0; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_3072_sub_67(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 64; i += 8) { + r[i + 0] = a[i + 0] - b[i + 0]; + r[i + 1] = a[i + 1] - b[i + 1]; + r[i + 2] = a[i + 2] - b[i + 2]; + r[i + 3] = a[i + 3] - b[i + 3]; + r[i + 4] = a[i + 4] - b[i + 4]; + r[i + 5] = a[i + 5] - b[i + 5]; + r[i + 6] = a[i + 6] - b[i + 6]; + r[i + 7] = a[i + 7] - b[i + 7]; + } + r[64] = a[64] - b[64]; + r[65] = a[65] - b[65]; + r[66] = a[66] - b[66]; + + return 0; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_3072_mul_67(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i, j, k; + int64_t c; + + c = ((int64_t)a[66]) * b[66]; + r[133] = (sp_digit)(c >> 23); + c = (c & 0x7fffff) << 23; + for (k = 131; k >= 0; k--) { + for (i = 66; i >= 0; i--) { + j = k - i; + if (j >= 67) { + break; + } + if (j < 0) { + continue; + } + + c += ((int64_t)a[i]) * b[j]; + } + r[k + 2] += c >> 46; + r[k + 1] = (c >> 23) & 0x7fffff; + c = (c & 0x7fffff) << 23; + } + r[0] = (sp_digit)(c >> 23); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_3072_sqr_67(sp_digit* r, const sp_digit* a) +{ + int i, j, k; + int64_t c; + + c = ((int64_t)a[66]) * a[66]; + r[133] = (sp_digit)(c >> 23); + c = (c & 0x7fffff) << 23; + for (k = 131; k >= 0; k--) { + for (i = 66; i >= 0; i--) { + j = k - i; + if (j >= 67 || i <= j) { + break; + } + if (j < 0) { + continue; + } + + c += ((int64_t)a[i]) * a[j] * 2; + } + if (i == j) { + c += ((int64_t)a[i]) * a[i]; + } + + r[k + 2] += c >> 46; + r[k + 1] = (c >> 23) & 0x7fffff; + c = (c & 0x7fffff) << 23; + } + r[0] = (sp_digit)(c >> 23); +} + +#endif /* WOLFSSL_SP_SMALL */ +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +/* Caclulate the bottom digit of -1/a mod 2^n. + * + * a A single precision number. + * rho Bottom word of inverse. + */ +static void sp_3072_mont_setup(const sp_digit* a, sp_digit* rho) +{ + sp_digit x, b; + + b = a[0]; + x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - b * x; /* here x*a==1 mod 2**8 */ + x *= 2 - b * x; /* here x*a==1 mod 2**16 */ + x *= 2 - b * x; /* here x*a==1 mod 2**32 */ + x &= 0x7fffff; + + /* rho = -1/m mod b */ + *rho = (1L << 23) - x; +} + +/* Multiply a by scalar b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_3072_mul_d_134(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int64_t tb = b; + int64_t t = 0; + int i; + + for (i = 0; i < 134; i++) { + t += tb * a[i]; + r[i] = t & 0x7fffff; + t >>= 23; + } + r[134] = (sp_digit)t; +#else + int64_t tb = b; + int64_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] = t[0] & 0x7fffff; + for (i = 0; i < 128; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff); + t[2] = tb * a[i+2]; + r[i+2] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff); + t[3] = tb * a[i+3]; + r[i+3] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff); + t[4] = tb * a[i+4]; + r[i+4] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff); + t[5] = tb * a[i+5]; + r[i+5] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff); + t[6] = tb * a[i+6]; + r[i+6] = (sp_digit)(t[5] >> 23) + (t[6] & 0x7fffff); + t[7] = tb * a[i+7]; + r[i+7] = (sp_digit)(t[6] >> 23) + (t[7] & 0x7fffff); + t[0] = tb * a[i+8]; + r[i+8] = (sp_digit)(t[7] >> 23) + (t[0] & 0x7fffff); + } + t[1] = tb * a[129]; + r[129] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff); + t[2] = tb * a[130]; + r[130] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff); + t[3] = tb * a[131]; + r[131] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff); + t[4] = tb * a[132]; + r[132] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff); + t[5] = tb * a[133]; + r[133] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff); + r[134] = (sp_digit)(t[5] >> 23); +#endif /* WOLFSSL_SP_SMALL */ +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 3072 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_3072_mont_norm_67(sp_digit* r, const sp_digit* m) +{ + /* Set r = 2^n - 1. */ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<66; i++) { + r[i] = 0x7fffff; + } +#else + int i; + + for (i = 0; i < 64; i += 8) { + r[i + 0] = 0x7fffff; + r[i + 1] = 0x7fffff; + r[i + 2] = 0x7fffff; + r[i + 3] = 0x7fffff; + r[i + 4] = 0x7fffff; + r[i + 5] = 0x7fffff; + r[i + 6] = 0x7fffff; + r[i + 7] = 0x7fffff; + } + r[64] = 0x7fffff; + r[65] = 0x7fffff; +#endif + r[66] = 0x3ffffL; + + /* r = (2^n - 1) mod n */ + (void)sp_3072_sub_67(r, r, m); + + /* Add one so r = 2^n mod m */ + r[0] += 1; +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static sp_digit sp_3072_cmp_67(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=66; i>=0; i--) { + r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#else + int i; + + r |= (a[66] - b[66]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[65] - b[65]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[64] - b[64]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + for (i = 56; i >= 0; i -= 8) { + r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#endif /* WOLFSSL_SP_SMALL */ + + return r; +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static void sp_3072_cond_sub_67(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 67; i++) { + r[i] = a[i] - (b[i] & m); + } +#else + int i; + + for (i = 0; i < 64; i += 8) { + r[i + 0] = a[i + 0] - (b[i + 0] & m); + r[i + 1] = a[i + 1] - (b[i + 1] & m); + r[i + 2] = a[i + 2] - (b[i + 2] & m); + r[i + 3] = a[i + 3] - (b[i + 3] & m); + r[i + 4] = a[i + 4] - (b[i + 4] & m); + r[i + 5] = a[i + 5] - (b[i + 5] & m); + r[i + 6] = a[i + 6] - (b[i + 6] & m); + r[i + 7] = a[i + 7] - (b[i + 7] & m); + } + r[64] = a[64] - (b[64] & m); + r[65] = a[65] - (b[65] & m); + r[66] = a[66] - (b[66] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Mul a by scalar b and add into r. (r += a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_3072_mul_add_67(sp_digit* r, const sp_digit* a, + const sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int64_t tb = b; + int64_t t = 0; + int i; + + for (i = 0; i < 67; i++) { + t += (tb * a[i]) + r[i]; + r[i] = t & 0x7fffff; + t >>= 23; + } + r[67] += t; +#else + int64_t tb = b; + int64_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x7fffff); + for (i = 0; i < 64; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff)); + t[2] = tb * a[i+2]; + r[i+2] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff)); + t[3] = tb * a[i+3]; + r[i+3] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff)); + t[4] = tb * a[i+4]; + r[i+4] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff)); + t[5] = tb * a[i+5]; + r[i+5] += (sp_digit)((t[4] >> 23) + (t[5] & 0x7fffff)); + t[6] = tb * a[i+6]; + r[i+6] += (sp_digit)((t[5] >> 23) + (t[6] & 0x7fffff)); + t[7] = tb * a[i+7]; + r[i+7] += (sp_digit)((t[6] >> 23) + (t[7] & 0x7fffff)); + t[0] = tb * a[i+8]; + r[i+8] += (sp_digit)((t[7] >> 23) + (t[0] & 0x7fffff)); + } + t[1] = tb * a[65]; r[65] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff)); + t[2] = tb * a[66]; r[66] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff)); + r[67] += (sp_digit)(t[2] >> 23); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Normalize the values in each word to 23. + * + * a Array of sp_digit to normalize. + */ +static void sp_3072_norm_67(sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + for (i = 0; i < 66; i++) { + a[i+1] += a[i] >> 23; + a[i] &= 0x7fffff; + } +#else + int i; + for (i = 0; i < 64; i += 8) { + a[i+1] += a[i+0] >> 23; a[i+0] &= 0x7fffff; + a[i+2] += a[i+1] >> 23; a[i+1] &= 0x7fffff; + a[i+3] += a[i+2] >> 23; a[i+2] &= 0x7fffff; + a[i+4] += a[i+3] >> 23; a[i+3] &= 0x7fffff; + a[i+5] += a[i+4] >> 23; a[i+4] &= 0x7fffff; + a[i+6] += a[i+5] >> 23; a[i+5] &= 0x7fffff; + a[i+7] += a[i+6] >> 23; a[i+6] &= 0x7fffff; + a[i+8] += a[i+7] >> 23; a[i+7] &= 0x7fffff; + a[i+9] += a[i+8] >> 23; a[i+8] &= 0x7fffff; + } + a[64+1] += a[64] >> 23; + a[64] &= 0x7fffff; + a[65+1] += a[65] >> 23; + a[65] &= 0x7fffff; +#endif +} + +/* Shift the result in the high 1536 bits down to the bottom. + * + * r A single precision number. + * a A single precision number. + */ +static void sp_3072_mont_shift_67(sp_digit* r, const sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + sp_digit n, s; + + s = a[67]; + n = a[66] >> 18; + for (i = 0; i < 66; i++) { + n += (s & 0x7fffff) << 5; + r[i] = n & 0x7fffff; + n >>= 23; + s = a[68 + i] + (s >> 23); + } + n += s << 5; + r[66] = n; +#else + sp_digit n, s; + int i; + + s = a[67]; n = a[66] >> 18; + for (i = 0; i < 64; i += 8) { + n += (s & 0x7fffff) << 5; r[i+0] = n & 0x7fffff; + n >>= 23; s = a[i+68] + (s >> 23); + n += (s & 0x7fffff) << 5; r[i+1] = n & 0x7fffff; + n >>= 23; s = a[i+69] + (s >> 23); + n += (s & 0x7fffff) << 5; r[i+2] = n & 0x7fffff; + n >>= 23; s = a[i+70] + (s >> 23); + n += (s & 0x7fffff) << 5; r[i+3] = n & 0x7fffff; + n >>= 23; s = a[i+71] + (s >> 23); + n += (s & 0x7fffff) << 5; r[i+4] = n & 0x7fffff; + n >>= 23; s = a[i+72] + (s >> 23); + n += (s & 0x7fffff) << 5; r[i+5] = n & 0x7fffff; + n >>= 23; s = a[i+73] + (s >> 23); + n += (s & 0x7fffff) << 5; r[i+6] = n & 0x7fffff; + n >>= 23; s = a[i+74] + (s >> 23); + n += (s & 0x7fffff) << 5; r[i+7] = n & 0x7fffff; + n >>= 23; s = a[i+75] + (s >> 23); + } + n += (s & 0x7fffff) << 5; r[64] = n & 0x7fffff; + n >>= 23; s = a[132] + (s >> 23); + n += (s & 0x7fffff) << 5; r[65] = n & 0x7fffff; + n >>= 23; s = a[133] + (s >> 23); + n += s << 5; r[66] = n; +#endif /* WOLFSSL_SP_SMALL */ + XMEMSET(&r[67], 0, sizeof(*r) * 67U); +} + +/* Reduce the number back to 3072 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static void sp_3072_mont_reduce_67(sp_digit* a, const sp_digit* m, sp_digit mp) +{ + int i; + sp_digit mu; + + sp_3072_norm_67(a + 67); + + for (i=0; i<66; i++) { + mu = (a[i] * mp) & 0x7fffff; + sp_3072_mul_add_67(a+i, m, mu); + a[i+1] += a[i] >> 23; + } + mu = (a[i] * mp) & 0x3ffffL; + sp_3072_mul_add_67(a+i, m, mu); + a[i+1] += a[i] >> 23; + a[i] &= 0x7fffff; + + sp_3072_mont_shift_67(a, a); + sp_3072_cond_sub_67(a, a, m, 0 - (((a[66] >> 18) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_3072_norm_67(a); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_3072_mont_mul_67(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_3072_mul_67(r, a, b); + sp_3072_mont_reduce_67(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_3072_mont_sqr_67(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_3072_sqr_67(r, a); + sp_3072_mont_reduce_67(r, m, mp); +} + +/* Multiply a by scalar b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_3072_mul_d_67(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int64_t tb = b; + int64_t t = 0; + int i; + + for (i = 0; i < 67; i++) { + t += tb * a[i]; + r[i] = t & 0x7fffff; + t >>= 23; + } + r[67] = (sp_digit)t; +#else + int64_t tb = b; + int64_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] = t[0] & 0x7fffff; + for (i = 0; i < 64; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff); + t[2] = tb * a[i+2]; + r[i+2] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff); + t[3] = tb * a[i+3]; + r[i+3] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff); + t[4] = tb * a[i+4]; + r[i+4] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff); + t[5] = tb * a[i+5]; + r[i+5] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff); + t[6] = tb * a[i+6]; + r[i+6] = (sp_digit)(t[5] >> 23) + (t[6] & 0x7fffff); + t[7] = tb * a[i+7]; + r[i+7] = (sp_digit)(t[6] >> 23) + (t[7] & 0x7fffff); + t[0] = tb * a[i+8]; + r[i+8] = (sp_digit)(t[7] >> 23) + (t[0] & 0x7fffff); + } + t[1] = tb * a[65]; + r[65] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff); + t[2] = tb * a[66]; + r[66] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff); + r[67] = (sp_digit)(t[2] >> 23); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static void sp_3072_cond_add_67(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 67; i++) { + r[i] = a[i] + (b[i] & m); + } +#else + int i; + + for (i = 0; i < 64; i += 8) { + r[i + 0] = a[i + 0] + (b[i + 0] & m); + r[i + 1] = a[i + 1] + (b[i + 1] & m); + r[i + 2] = a[i + 2] + (b[i + 2] & m); + r[i + 3] = a[i + 3] + (b[i + 3] & m); + r[i + 4] = a[i + 4] + (b[i + 4] & m); + r[i + 5] = a[i + 5] + (b[i + 5] & m); + r[i + 6] = a[i + 6] + (b[i + 6] & m); + r[i + 7] = a[i + 7] + (b[i + 7] & m); + } + r[64] = a[64] + (b[64] & m); + r[65] = a[65] + (b[65] & m); + r[66] = a[66] + (b[66] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +#ifdef WOLFSSL_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_3072_add_67(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 67; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#endif +#ifdef WOLFSSL_SP_DIV_32 +static WC_INLINE sp_digit sp_3072_div_word_67(sp_digit d1, sp_digit d0, + sp_digit dv) +{ + sp_digit d, r, t; + + /* All 23 bits from d1 and top 8 bits from d0. */ + d = (d1 << 8) | (d0 >> 15); + r = d / dv; + d -= r * dv; + /* Up to 9 bits in r */ + /* Next 8 bits from d0. */ + r <<= 8; + d <<= 8; + d |= (d0 >> 7) & ((1 << 8) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 17 bits in r */ + /* Remaining 7 bits from d0. */ + r <<= 7; + d <<= 7; + d |= d0 & ((1 << 7) - 1); + t = d / dv; + r += t; + + return r; +} +#endif /* WOLFSSL_SP_DIV_32 */ + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Number to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_3072_div_67(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + int i; +#ifndef WOLFSSL_SP_DIV_32 + int64_t d1; +#endif + sp_digit dv, r1; +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* td; +#else + sp_digit t1d[134], t2d[67 + 1]; +#endif + sp_digit* t1; + sp_digit* t2; + int err = MP_OKAY; + + (void)m; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 67 + 1), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = td; + t2 = td + 2 * 67; +#else + t1 = t1d; + t2 = t2d; +#endif + + dv = d[66]; + XMEMCPY(t1, a, sizeof(*t1) * 2U * 67U); + for (i=66; i>=0; i--) { + t1[67 + i] += t1[67 + i - 1] >> 23; + t1[67 + i - 1] &= 0x7fffff; +#ifndef WOLFSSL_SP_DIV_32 + d1 = t1[67 + i]; + d1 <<= 23; + d1 += t1[67 + i - 1]; + r1 = (sp_digit)(d1 / dv); +#else + r1 = sp_3072_div_word_67(t1[67 + i], t1[67 + i - 1], dv); +#endif + + sp_3072_mul_d_67(t2, d, r1); + (void)sp_3072_sub_67(&t1[i], &t1[i], t2); + t1[67 + i] -= t2[67]; + t1[67 + i] += t1[67 + i - 1] >> 23; + t1[67 + i - 1] &= 0x7fffff; + r1 = (((-t1[67 + i]) << 23) - t1[67 + i - 1]) / dv; + r1++; + sp_3072_mul_d_67(t2, d, r1); + (void)sp_3072_add_67(&t1[i], &t1[i], t2); + t1[67 + i] += t1[67 + i - 1] >> 23; + t1[67 + i - 1] &= 0x7fffff; + } + t1[67 - 1] += t1[67 - 2] >> 23; + t1[67 - 2] &= 0x7fffff; + r1 = t1[67 - 1] / dv; + + sp_3072_mul_d_67(t2, d, r1); + (void)sp_3072_sub_67(t1, t1, t2); + XMEMCPY(r, t1, sizeof(*r) * 2U * 67U); + for (i=0; i<65; i++) { + r[i+1] += r[i] >> 23; + r[i] &= 0x7fffff; + } + sp_3072_cond_add_67(r, r, d, 0 - ((r[66] < 0) ? + (sp_digit)1 : (sp_digit)0)); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_3072_mod_67(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_3072_div_67(a, m, NULL, r); +} + +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_3072_mod_exp_67(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, + const sp_digit* m, int reduceA) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* td; + sp_digit* t[3]; + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 67 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } + + if (err == MP_OKAY) { + XMEMSET(td, 0, sizeof(*td) * 3U * 67U * 2U); + + norm = t[0] = td; + t[1] = &td[67 * 2]; + t[2] = &td[2 * 67 * 2]; + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_67(norm, m); + + if (reduceA != 0) { + err = sp_3072_mod_67(t[1], a, m); + } + else { + XMEMCPY(t[1], a, sizeof(sp_digit) * 67U); + } + } + if (err == MP_OKAY) { + sp_3072_mul_67(t[1], t[1], norm); + err = sp_3072_mod_67(t[1], t[1], m); + } + + if (err == MP_OKAY) { + i = bits / 23; + c = bits % 23; + n = e[i--] << (23 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) { + break; + } + + n = e[i--]; + c = 23; + } + + y = (n >> 22) & 1; + n <<= 1; + + sp_3072_mont_mul_67(t[y^1], t[0], t[1], m, mp); + + XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), + sizeof(*t[2]) * 67 * 2); + sp_3072_mont_sqr_67(t[2], t[2], m, mp); + XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), t[2], + sizeof(*t[2]) * 67 * 2); + } + + sp_3072_mont_reduce_67(t[0], m, mp); + n = sp_3072_cmp_67(t[0], m); + sp_3072_cond_sub_67(t[0], t[0], m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, t[0], sizeof(*r) * 67 * 2); + + } + + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } + + return err; +#elif defined(WOLFSSL_SP_CACHE_RESISTANT) +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[3][134]; +#else + sp_digit* td; + sp_digit* t[3]; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 67 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + t[0] = td; + t[1] = &td[67 * 2]; + t[2] = &td[2 * 67 * 2]; +#endif + norm = t[0]; + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_67(norm, m); + + if (reduceA != 0) { + err = sp_3072_mod_67(t[1], a, m); + if (err == MP_OKAY) { + sp_3072_mul_67(t[1], t[1], norm); + err = sp_3072_mod_67(t[1], t[1], m); + } + } + else { + sp_3072_mul_67(t[1], a, norm); + err = sp_3072_mod_67(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + i = bits / 23; + c = bits % 23; + n = e[i--] << (23 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) { + break; + } + + n = e[i--]; + c = 23; + } + + y = (n >> 22) & 1; + n <<= 1; + + sp_3072_mont_mul_67(t[y^1], t[0], t[1], m, mp); + + XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), sizeof(t[2])); + sp_3072_mont_sqr_67(t[2], t[2], m, mp); + XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2])); + } + + sp_3072_mont_reduce_67(t[0], m, mp); + n = sp_3072_cmp_67(t[0], m); + sp_3072_cond_sub_67(t[0], t[0], m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, t[0], sizeof(t[0])); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][134]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit rt[134]; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 134, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) + t[i] = td + i * 134; +#endif + norm = t[0]; + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_67(norm, m); + + if (reduceA != 0) { + err = sp_3072_mod_67(t[1], a, m); + if (err == MP_OKAY) { + sp_3072_mul_67(t[1], t[1], norm); + err = sp_3072_mod_67(t[1], t[1], m); + } + } + else { + sp_3072_mul_67(t[1], a, norm); + err = sp_3072_mod_67(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_3072_mont_sqr_67(t[ 2], t[ 1], m, mp); + sp_3072_mont_mul_67(t[ 3], t[ 2], t[ 1], m, mp); + sp_3072_mont_sqr_67(t[ 4], t[ 2], m, mp); + sp_3072_mont_mul_67(t[ 5], t[ 3], t[ 2], m, mp); + sp_3072_mont_sqr_67(t[ 6], t[ 3], m, mp); + sp_3072_mont_mul_67(t[ 7], t[ 4], t[ 3], m, mp); + sp_3072_mont_sqr_67(t[ 8], t[ 4], m, mp); + sp_3072_mont_mul_67(t[ 9], t[ 5], t[ 4], m, mp); + sp_3072_mont_sqr_67(t[10], t[ 5], m, mp); + sp_3072_mont_mul_67(t[11], t[ 6], t[ 5], m, mp); + sp_3072_mont_sqr_67(t[12], t[ 6], m, mp); + sp_3072_mont_mul_67(t[13], t[ 7], t[ 6], m, mp); + sp_3072_mont_sqr_67(t[14], t[ 7], m, mp); + sp_3072_mont_mul_67(t[15], t[ 8], t[ 7], m, mp); + sp_3072_mont_sqr_67(t[16], t[ 8], m, mp); + sp_3072_mont_mul_67(t[17], t[ 9], t[ 8], m, mp); + sp_3072_mont_sqr_67(t[18], t[ 9], m, mp); + sp_3072_mont_mul_67(t[19], t[10], t[ 9], m, mp); + sp_3072_mont_sqr_67(t[20], t[10], m, mp); + sp_3072_mont_mul_67(t[21], t[11], t[10], m, mp); + sp_3072_mont_sqr_67(t[22], t[11], m, mp); + sp_3072_mont_mul_67(t[23], t[12], t[11], m, mp); + sp_3072_mont_sqr_67(t[24], t[12], m, mp); + sp_3072_mont_mul_67(t[25], t[13], t[12], m, mp); + sp_3072_mont_sqr_67(t[26], t[13], m, mp); + sp_3072_mont_mul_67(t[27], t[14], t[13], m, mp); + sp_3072_mont_sqr_67(t[28], t[14], m, mp); + sp_3072_mont_mul_67(t[29], t[15], t[14], m, mp); + sp_3072_mont_sqr_67(t[30], t[15], m, mp); + sp_3072_mont_mul_67(t[31], t[16], t[15], m, mp); + + bits = ((bits + 4) / 5) * 5; + i = ((bits + 22) / 23) - 1; + c = bits % 23; + if (c == 0) { + c = 23; + } + if (i < 67) { + n = e[i--] << (32 - c); + } + else { + n = 0; + i--; + } + if (c < 5) { + n |= e[i--] << (9 - c); + c += 23; + } + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + XMEMCPY(rt, t[y], sizeof(rt)); + for (; i>=0 || c>=5; ) { + if (c < 5) { + n |= e[i--] << (9 - c); + c += 23; + } + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + + sp_3072_mont_sqr_67(rt, rt, m, mp); + sp_3072_mont_sqr_67(rt, rt, m, mp); + sp_3072_mont_sqr_67(rt, rt, m, mp); + sp_3072_mont_sqr_67(rt, rt, m, mp); + sp_3072_mont_sqr_67(rt, rt, m, mp); + + sp_3072_mont_mul_67(rt, rt, t[y], m, mp); + } + + sp_3072_mont_reduce_67(rt, m, mp); + n = sp_3072_cmp_67(rt, m); + sp_3072_cond_sub_67(rt, rt, m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, rt, sizeof(rt)); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +#endif +} + +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 3072 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_3072_mont_norm_134(sp_digit* r, const sp_digit* m) +{ + /* Set r = 2^n - 1. */ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<133; i++) { + r[i] = 0x7fffff; + } +#else + int i; + + for (i = 0; i < 128; i += 8) { + r[i + 0] = 0x7fffff; + r[i + 1] = 0x7fffff; + r[i + 2] = 0x7fffff; + r[i + 3] = 0x7fffff; + r[i + 4] = 0x7fffff; + r[i + 5] = 0x7fffff; + r[i + 6] = 0x7fffff; + r[i + 7] = 0x7fffff; + } + r[128] = 0x7fffff; + r[129] = 0x7fffff; + r[130] = 0x7fffff; + r[131] = 0x7fffff; + r[132] = 0x7fffff; +#endif + r[133] = 0x1fffL; + + /* r = (2^n - 1) mod n */ + (void)sp_3072_sub_134(r, r, m); + + /* Add one so r = 2^n mod m */ + r[0] += 1; +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static sp_digit sp_3072_cmp_134(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=133; i>=0; i--) { + r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#else + int i; + + r |= (a[133] - b[133]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[132] - b[132]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[131] - b[131]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[130] - b[130]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[129] - b[129]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[128] - b[128]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + for (i = 120; i >= 0; i -= 8) { + r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#endif /* WOLFSSL_SP_SMALL */ + + return r; +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static void sp_3072_cond_sub_134(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 134; i++) { + r[i] = a[i] - (b[i] & m); + } +#else + int i; + + for (i = 0; i < 128; i += 8) { + r[i + 0] = a[i + 0] - (b[i + 0] & m); + r[i + 1] = a[i + 1] - (b[i + 1] & m); + r[i + 2] = a[i + 2] - (b[i + 2] & m); + r[i + 3] = a[i + 3] - (b[i + 3] & m); + r[i + 4] = a[i + 4] - (b[i + 4] & m); + r[i + 5] = a[i + 5] - (b[i + 5] & m); + r[i + 6] = a[i + 6] - (b[i + 6] & m); + r[i + 7] = a[i + 7] - (b[i + 7] & m); + } + r[128] = a[128] - (b[128] & m); + r[129] = a[129] - (b[129] & m); + r[130] = a[130] - (b[130] & m); + r[131] = a[131] - (b[131] & m); + r[132] = a[132] - (b[132] & m); + r[133] = a[133] - (b[133] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Mul a by scalar b and add into r. (r += a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_3072_mul_add_134(sp_digit* r, const sp_digit* a, + const sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int64_t tb = b; + int64_t t = 0; + int i; + + for (i = 0; i < 134; i++) { + t += (tb * a[i]) + r[i]; + r[i] = t & 0x7fffff; + t >>= 23; + } + r[134] += t; +#else + int64_t tb = b; + int64_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x7fffff); + for (i = 0; i < 128; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff)); + t[2] = tb * a[i+2]; + r[i+2] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff)); + t[3] = tb * a[i+3]; + r[i+3] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff)); + t[4] = tb * a[i+4]; + r[i+4] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff)); + t[5] = tb * a[i+5]; + r[i+5] += (sp_digit)((t[4] >> 23) + (t[5] & 0x7fffff)); + t[6] = tb * a[i+6]; + r[i+6] += (sp_digit)((t[5] >> 23) + (t[6] & 0x7fffff)); + t[7] = tb * a[i+7]; + r[i+7] += (sp_digit)((t[6] >> 23) + (t[7] & 0x7fffff)); + t[0] = tb * a[i+8]; + r[i+8] += (sp_digit)((t[7] >> 23) + (t[0] & 0x7fffff)); + } + t[1] = tb * a[129]; r[129] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff)); + t[2] = tb * a[130]; r[130] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff)); + t[3] = tb * a[131]; r[131] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff)); + t[4] = tb * a[132]; r[132] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff)); + t[5] = tb * a[133]; r[133] += (sp_digit)((t[4] >> 23) + (t[5] & 0x7fffff)); + r[134] += (sp_digit)(t[5] >> 23); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Normalize the values in each word to 23. + * + * a Array of sp_digit to normalize. + */ +static void sp_3072_norm_134(sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + for (i = 0; i < 133; i++) { + a[i+1] += a[i] >> 23; + a[i] &= 0x7fffff; + } +#else + int i; + for (i = 0; i < 128; i += 8) { + a[i+1] += a[i+0] >> 23; a[i+0] &= 0x7fffff; + a[i+2] += a[i+1] >> 23; a[i+1] &= 0x7fffff; + a[i+3] += a[i+2] >> 23; a[i+2] &= 0x7fffff; + a[i+4] += a[i+3] >> 23; a[i+3] &= 0x7fffff; + a[i+5] += a[i+4] >> 23; a[i+4] &= 0x7fffff; + a[i+6] += a[i+5] >> 23; a[i+5] &= 0x7fffff; + a[i+7] += a[i+6] >> 23; a[i+6] &= 0x7fffff; + a[i+8] += a[i+7] >> 23; a[i+7] &= 0x7fffff; + a[i+9] += a[i+8] >> 23; a[i+8] &= 0x7fffff; + } + a[128+1] += a[128] >> 23; + a[128] &= 0x7fffff; + a[129+1] += a[129] >> 23; + a[129] &= 0x7fffff; + a[130+1] += a[130] >> 23; + a[130] &= 0x7fffff; + a[131+1] += a[131] >> 23; + a[131] &= 0x7fffff; + a[132+1] += a[132] >> 23; + a[132] &= 0x7fffff; +#endif +} + +/* Shift the result in the high 3072 bits down to the bottom. + * + * r A single precision number. + * a A single precision number. + */ +static void sp_3072_mont_shift_134(sp_digit* r, const sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + int64_t n = a[133] >> 13; + n += ((int64_t)a[134]) << 10; + + for (i = 0; i < 133; i++) { + r[i] = n & 0x7fffff; + n >>= 23; + n += ((int64_t)a[135 + i]) << 10; + } + r[133] = (sp_digit)n; +#else + int i; + int64_t n = a[133] >> 13; + n += ((int64_t)a[134]) << 10; + for (i = 0; i < 128; i += 8) { + r[i + 0] = n & 0x7fffff; + n >>= 23; n += ((int64_t)a[i + 135]) << 10; + r[i + 1] = n & 0x7fffff; + n >>= 23; n += ((int64_t)a[i + 136]) << 10; + r[i + 2] = n & 0x7fffff; + n >>= 23; n += ((int64_t)a[i + 137]) << 10; + r[i + 3] = n & 0x7fffff; + n >>= 23; n += ((int64_t)a[i + 138]) << 10; + r[i + 4] = n & 0x7fffff; + n >>= 23; n += ((int64_t)a[i + 139]) << 10; + r[i + 5] = n & 0x7fffff; + n >>= 23; n += ((int64_t)a[i + 140]) << 10; + r[i + 6] = n & 0x7fffff; + n >>= 23; n += ((int64_t)a[i + 141]) << 10; + r[i + 7] = n & 0x7fffff; + n >>= 23; n += ((int64_t)a[i + 142]) << 10; + } + r[128] = n & 0x7fffff; n >>= 23; n += ((int64_t)a[263]) << 10; + r[129] = n & 0x7fffff; n >>= 23; n += ((int64_t)a[264]) << 10; + r[130] = n & 0x7fffff; n >>= 23; n += ((int64_t)a[265]) << 10; + r[131] = n & 0x7fffff; n >>= 23; n += ((int64_t)a[266]) << 10; + r[132] = n & 0x7fffff; n >>= 23; n += ((int64_t)a[267]) << 10; + r[133] = (sp_digit)n; +#endif /* WOLFSSL_SP_SMALL */ + XMEMSET(&r[134], 0, sizeof(*r) * 134U); +} + +/* Reduce the number back to 3072 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static void sp_3072_mont_reduce_134(sp_digit* a, const sp_digit* m, sp_digit mp) +{ + int i; + sp_digit mu; + + sp_3072_norm_134(a + 134); + +#ifdef WOLFSSL_SP_DH + if (mp != 1) { + for (i=0; i<133; i++) { + mu = (a[i] * mp) & 0x7fffff; + sp_3072_mul_add_134(a+i, m, mu); + a[i+1] += a[i] >> 23; + } + mu = (a[i] * mp) & 0x1fffL; + sp_3072_mul_add_134(a+i, m, mu); + a[i+1] += a[i] >> 23; + a[i] &= 0x7fffff; + } + else { + for (i=0; i<133; i++) { + mu = a[i] & 0x7fffff; + sp_3072_mul_add_134(a+i, m, mu); + a[i+1] += a[i] >> 23; + } + mu = a[i] & 0x1fffL; + sp_3072_mul_add_134(a+i, m, mu); + a[i+1] += a[i] >> 23; + a[i] &= 0x7fffff; + } +#else + for (i=0; i<133; i++) { + mu = (a[i] * mp) & 0x7fffff; + sp_3072_mul_add_134(a+i, m, mu); + a[i+1] += a[i] >> 23; + } + mu = (a[i] * mp) & 0x1fffL; + sp_3072_mul_add_134(a+i, m, mu); + a[i+1] += a[i] >> 23; + a[i] &= 0x7fffff; +#endif + + sp_3072_mont_shift_134(a, a); + sp_3072_cond_sub_134(a, a, m, 0 - (((a[133] >> 13) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_3072_norm_134(a); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_3072_mont_mul_134(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_3072_mul_134(r, a, b); + sp_3072_mont_reduce_134(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_3072_mont_sqr_134(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_3072_sqr_134(r, a); + sp_3072_mont_reduce_134(r, m, mp); +} + +/* Multiply a by scalar b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_3072_mul_d_268(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int64_t tb = b; + int64_t t = 0; + int i; + + for (i = 0; i < 268; i++) { + t += tb * a[i]; + r[i] = t & 0x7fffff; + t >>= 23; + } + r[268] = (sp_digit)t; +#else + int64_t tb = b; + int64_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] = t[0] & 0x7fffff; + for (i = 0; i < 264; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff); + t[2] = tb * a[i+2]; + r[i+2] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff); + t[3] = tb * a[i+3]; + r[i+3] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff); + t[4] = tb * a[i+4]; + r[i+4] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff); + t[5] = tb * a[i+5]; + r[i+5] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff); + t[6] = tb * a[i+6]; + r[i+6] = (sp_digit)(t[5] >> 23) + (t[6] & 0x7fffff); + t[7] = tb * a[i+7]; + r[i+7] = (sp_digit)(t[6] >> 23) + (t[7] & 0x7fffff); + t[0] = tb * a[i+8]; + r[i+8] = (sp_digit)(t[7] >> 23) + (t[0] & 0x7fffff); + } + t[1] = tb * a[265]; + r[265] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff); + t[2] = tb * a[266]; + r[266] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff); + t[3] = tb * a[267]; + r[267] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff); + r[268] = (sp_digit)(t[3] >> 23); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static void sp_3072_cond_add_134(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 134; i++) { + r[i] = a[i] + (b[i] & m); + } +#else + int i; + + for (i = 0; i < 128; i += 8) { + r[i + 0] = a[i + 0] + (b[i + 0] & m); + r[i + 1] = a[i + 1] + (b[i + 1] & m); + r[i + 2] = a[i + 2] + (b[i + 2] & m); + r[i + 3] = a[i + 3] + (b[i + 3] & m); + r[i + 4] = a[i + 4] + (b[i + 4] & m); + r[i + 5] = a[i + 5] + (b[i + 5] & m); + r[i + 6] = a[i + 6] + (b[i + 6] & m); + r[i + 7] = a[i + 7] + (b[i + 7] & m); + } + r[128] = a[128] + (b[128] & m); + r[129] = a[129] + (b[129] & m); + r[130] = a[130] + (b[130] & m); + r[131] = a[131] + (b[131] & m); + r[132] = a[132] + (b[132] & m); + r[133] = a[133] + (b[133] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +#ifdef WOLFSSL_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_3072_sub_134(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 134; i++) { + r[i] = a[i] - b[i]; + } + + return 0; +} + +#endif +#ifdef WOLFSSL_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_3072_add_134(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 134; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#endif +SP_NOINLINE static void sp_3072_rshift_134(sp_digit* r, sp_digit* a, byte n) +{ + int i; + +#ifdef WOLFSSL_SP_SMALL + for (i=0; i<133; i++) { + r[i] = ((a[i] >> n) | (a[i + 1] << (23 - n))) & 0x7fffff; + } +#else + for (i=0; i<128; i += 8) { + r[i+0] = ((a[i+0] >> n) | (a[i+1] << (23 - n))) & 0x7fffff; + r[i+1] = ((a[i+1] >> n) | (a[i+2] << (23 - n))) & 0x7fffff; + r[i+2] = ((a[i+2] >> n) | (a[i+3] << (23 - n))) & 0x7fffff; + r[i+3] = ((a[i+3] >> n) | (a[i+4] << (23 - n))) & 0x7fffff; + r[i+4] = ((a[i+4] >> n) | (a[i+5] << (23 - n))) & 0x7fffff; + r[i+5] = ((a[i+5] >> n) | (a[i+6] << (23 - n))) & 0x7fffff; + r[i+6] = ((a[i+6] >> n) | (a[i+7] << (23 - n))) & 0x7fffff; + r[i+7] = ((a[i+7] >> n) | (a[i+8] << (23 - n))) & 0x7fffff; + } + r[128] = ((a[128] >> n) | (a[129] << (23 - n))) & 0x7fffff; + r[129] = ((a[129] >> n) | (a[130] << (23 - n))) & 0x7fffff; + r[130] = ((a[130] >> n) | (a[131] << (23 - n))) & 0x7fffff; + r[131] = ((a[131] >> n) | (a[132] << (23 - n))) & 0x7fffff; + r[132] = ((a[132] >> n) | (a[133] << (23 - n))) & 0x7fffff; +#endif + r[133] = a[133] >> n; +} + +#ifdef WOLFSSL_SP_DIV_32 +static WC_INLINE sp_digit sp_3072_div_word_134(sp_digit d1, sp_digit d0, + sp_digit dv) +{ + sp_digit d, r, t; + + /* All 23 bits from d1 and top 8 bits from d0. */ + d = (d1 << 8) | (d0 >> 15); + r = d / dv; + d -= r * dv; + /* Up to 9 bits in r */ + /* Next 8 bits from d0. */ + r <<= 8; + d <<= 8; + d |= (d0 >> 7) & ((1 << 8) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 17 bits in r */ + /* Remaining 7 bits from d0. */ + r <<= 7; + d <<= 7; + d |= d0 & ((1 << 7) - 1); + t = d / dv; + r += t; + + return r; +} +#endif /* WOLFSSL_SP_DIV_32 */ + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_3072_div_134(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + int i; +#ifndef WOLFSSL_SP_DIV_32 + int64_t d1; +#endif + sp_digit dv, r1; +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* td; +#else + sp_digit t1d[268 + 1], t2d[134 + 1], sdd[134 + 1]; +#endif + sp_digit* t1; + sp_digit* t2; + sp_digit* sd; + int err = MP_OKAY; + + (void)m; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 134 + 3), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + (void)m; + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = td; + t2 = td + 268 + 1; + sd = t2 + 134 + 1; +#else + t1 = t1d; + t2 = t2d; + sd = sdd; +#endif + + sp_3072_mul_d_134(sd, d, 1L << 10); + sp_3072_mul_d_268(t1, a, 1L << 10); + dv = sd[133]; + for (i=134; i>=0; i--) { + t1[134 + i] += t1[134 + i - 1] >> 23; + t1[134 + i - 1] &= 0x7fffff; +#ifndef WOLFSSL_SP_DIV_32 + d1 = t1[134 + i]; + d1 <<= 23; + d1 += t1[134 + i - 1]; + r1 = (sp_digit)(d1 / dv); +#else + r1 = sp_3072_div_word_134(t1[134 + i], t1[134 + i - 1], dv); +#endif + + sp_3072_mul_d_134(t2, sd, r1); + (void)sp_3072_sub_134(&t1[i], &t1[i], t2); + t1[134 + i] -= t2[134]; + t1[134 + i] += t1[134 + i - 1] >> 23; + t1[134 + i - 1] &= 0x7fffff; + r1 = (((-t1[134 + i]) << 23) - t1[134 + i - 1]) / dv; + r1 -= t1[134 + i]; + sp_3072_mul_d_134(t2, sd, r1); + (void)sp_3072_add_134(&t1[i], &t1[i], t2); + t1[134 + i] += t1[134 + i - 1] >> 23; + t1[134 + i - 1] &= 0x7fffff; + } + t1[134 - 1] += t1[134 - 2] >> 23; + t1[134 - 2] &= 0x7fffff; + r1 = t1[134 - 1] / dv; + + sp_3072_mul_d_134(t2, sd, r1); + sp_3072_sub_134(t1, t1, t2); + XMEMCPY(r, t1, sizeof(*r) * 2U * 134U); + for (i=0; i<132; i++) { + r[i+1] += r[i] >> 23; + r[i] &= 0x7fffff; + } + sp_3072_cond_add_134(r, r, sd, 0 - ((r[133] < 0) ? + (sp_digit)1 : (sp_digit)0)); + + sp_3072_norm_134(r); + sp_3072_rshift_134(r, r, 10); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_3072_mod_134(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_3072_div_134(a, m, NULL, r); +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \ + defined(WOLFSSL_HAVE_SP_DH) +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_3072_mod_exp_134(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, + const sp_digit* m, int reduceA) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* td; + sp_digit* t[3]; + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 134 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } + + if (err == MP_OKAY) { + XMEMSET(td, 0, sizeof(*td) * 3U * 134U * 2U); + + norm = t[0] = td; + t[1] = &td[134 * 2]; + t[2] = &td[2 * 134 * 2]; + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_134(norm, m); + + if (reduceA != 0) { + err = sp_3072_mod_134(t[1], a, m); + } + else { + XMEMCPY(t[1], a, sizeof(sp_digit) * 134U); + } + } + if (err == MP_OKAY) { + sp_3072_mul_134(t[1], t[1], norm); + err = sp_3072_mod_134(t[1], t[1], m); + } + + if (err == MP_OKAY) { + i = bits / 23; + c = bits % 23; + n = e[i--] << (23 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) { + break; + } + + n = e[i--]; + c = 23; + } + + y = (n >> 22) & 1; + n <<= 1; + + sp_3072_mont_mul_134(t[y^1], t[0], t[1], m, mp); + + XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), + sizeof(*t[2]) * 134 * 2); + sp_3072_mont_sqr_134(t[2], t[2], m, mp); + XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), t[2], + sizeof(*t[2]) * 134 * 2); + } + + sp_3072_mont_reduce_134(t[0], m, mp); + n = sp_3072_cmp_134(t[0], m); + sp_3072_cond_sub_134(t[0], t[0], m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, t[0], sizeof(*r) * 134 * 2); + + } + + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } + + return err; +#elif defined(WOLFSSL_SP_CACHE_RESISTANT) +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[3][268]; +#else + sp_digit* td; + sp_digit* t[3]; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 134 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + t[0] = td; + t[1] = &td[134 * 2]; + t[2] = &td[2 * 134 * 2]; +#endif + norm = t[0]; + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_134(norm, m); + + if (reduceA != 0) { + err = sp_3072_mod_134(t[1], a, m); + if (err == MP_OKAY) { + sp_3072_mul_134(t[1], t[1], norm); + err = sp_3072_mod_134(t[1], t[1], m); + } + } + else { + sp_3072_mul_134(t[1], a, norm); + err = sp_3072_mod_134(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + i = bits / 23; + c = bits % 23; + n = e[i--] << (23 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) { + break; + } + + n = e[i--]; + c = 23; + } + + y = (n >> 22) & 1; + n <<= 1; + + sp_3072_mont_mul_134(t[y^1], t[0], t[1], m, mp); + + XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), sizeof(t[2])); + sp_3072_mont_sqr_134(t[2], t[2], m, mp); + XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2])); + } + + sp_3072_mont_reduce_134(t[0], m, mp); + n = sp_3072_cmp_134(t[0], m); + sp_3072_cond_sub_134(t[0], t[0], m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, t[0], sizeof(t[0])); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][268]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit rt[268]; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 268, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) + t[i] = td + i * 268; +#endif + norm = t[0]; + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_134(norm, m); + + if (reduceA != 0) { + err = sp_3072_mod_134(t[1], a, m); + if (err == MP_OKAY) { + sp_3072_mul_134(t[1], t[1], norm); + err = sp_3072_mod_134(t[1], t[1], m); + } + } + else { + sp_3072_mul_134(t[1], a, norm); + err = sp_3072_mod_134(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_3072_mont_sqr_134(t[ 2], t[ 1], m, mp); + sp_3072_mont_mul_134(t[ 3], t[ 2], t[ 1], m, mp); + sp_3072_mont_sqr_134(t[ 4], t[ 2], m, mp); + sp_3072_mont_mul_134(t[ 5], t[ 3], t[ 2], m, mp); + sp_3072_mont_sqr_134(t[ 6], t[ 3], m, mp); + sp_3072_mont_mul_134(t[ 7], t[ 4], t[ 3], m, mp); + sp_3072_mont_sqr_134(t[ 8], t[ 4], m, mp); + sp_3072_mont_mul_134(t[ 9], t[ 5], t[ 4], m, mp); + sp_3072_mont_sqr_134(t[10], t[ 5], m, mp); + sp_3072_mont_mul_134(t[11], t[ 6], t[ 5], m, mp); + sp_3072_mont_sqr_134(t[12], t[ 6], m, mp); + sp_3072_mont_mul_134(t[13], t[ 7], t[ 6], m, mp); + sp_3072_mont_sqr_134(t[14], t[ 7], m, mp); + sp_3072_mont_mul_134(t[15], t[ 8], t[ 7], m, mp); + sp_3072_mont_sqr_134(t[16], t[ 8], m, mp); + sp_3072_mont_mul_134(t[17], t[ 9], t[ 8], m, mp); + sp_3072_mont_sqr_134(t[18], t[ 9], m, mp); + sp_3072_mont_mul_134(t[19], t[10], t[ 9], m, mp); + sp_3072_mont_sqr_134(t[20], t[10], m, mp); + sp_3072_mont_mul_134(t[21], t[11], t[10], m, mp); + sp_3072_mont_sqr_134(t[22], t[11], m, mp); + sp_3072_mont_mul_134(t[23], t[12], t[11], m, mp); + sp_3072_mont_sqr_134(t[24], t[12], m, mp); + sp_3072_mont_mul_134(t[25], t[13], t[12], m, mp); + sp_3072_mont_sqr_134(t[26], t[13], m, mp); + sp_3072_mont_mul_134(t[27], t[14], t[13], m, mp); + sp_3072_mont_sqr_134(t[28], t[14], m, mp); + sp_3072_mont_mul_134(t[29], t[15], t[14], m, mp); + sp_3072_mont_sqr_134(t[30], t[15], m, mp); + sp_3072_mont_mul_134(t[31], t[16], t[15], m, mp); + + bits = ((bits + 4) / 5) * 5; + i = ((bits + 22) / 23) - 1; + c = bits % 23; + if (c == 0) { + c = 23; + } + if (i < 134) { + n = e[i--] << (32 - c); + } + else { + n = 0; + i--; + } + if (c < 5) { + n |= e[i--] << (9 - c); + c += 23; + } + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + XMEMCPY(rt, t[y], sizeof(rt)); + for (; i>=0 || c>=5; ) { + if (c < 5) { + n |= e[i--] << (9 - c); + c += 23; + } + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + + sp_3072_mont_sqr_134(rt, rt, m, mp); + sp_3072_mont_sqr_134(rt, rt, m, mp); + sp_3072_mont_sqr_134(rt, rt, m, mp); + sp_3072_mont_sqr_134(rt, rt, m, mp); + sp_3072_mont_sqr_134(rt, rt, m, mp); + + sp_3072_mont_mul_134(rt, rt, t[y], m, mp); + } + + sp_3072_mont_reduce_134(rt, m, mp); + n = sp_3072_cmp_134(rt, m); + sp_3072_cond_sub_134(rt, rt, m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, rt, sizeof(rt)); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +#endif +} +#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || */ + /* WOLFSSL_HAVE_SP_DH */ + +#ifdef WOLFSSL_HAVE_SP_RSA +/* RSA public key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * em Public exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 384 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm, + byte* out, word32* outLen) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* d = NULL; + sp_digit* a; + sp_digit* m; + sp_digit* r; + sp_digit* norm; + sp_digit e[1] = {0}; + sp_digit mp; + int i; + int err = MP_OKAY; + + if (*outLen < 384U) { + err = MP_TO_E; + } + + if (err == MP_OKAY) { + if (mp_count_bits(em) > 23) { + err = MP_READ_E; + } + if (inLen > 384U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 3072) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 134 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + a = d; + r = a + 134 * 2; + m = r + 134 * 2; + norm = r; + + sp_3072_from_bin(a, 134, in, inLen); +#if DIGIT_BIT >= 23 + e[0] = (sp_digit)em->dp[0]; +#else + e[0] = (sp_digit)em->dp[0]; + if (em->used > 1) { + e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + } +#endif + if (e[0] == 0) { + err = MP_EXPTMOD_E; + } + } + + if (err == MP_OKAY) { + sp_3072_from_mp(m, 134, mm); + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_134(norm, m); + } + if (err == MP_OKAY) { + sp_3072_mul_134(a, a, norm); + err = sp_3072_mod_134(a, a, m); + } + if (err == MP_OKAY) { + for (i=22; i>=0; i--) { + if ((e[0] >> i) != 0) { + break; + } + } + + XMEMCPY(r, a, sizeof(sp_digit) * 134 * 2); + for (i--; i>=0; i--) { + sp_3072_mont_sqr_134(r, r, m, mp); + + if (((e[0] >> i) & 1) == 1) { + sp_3072_mont_mul_134(r, r, a, m, mp); + } + } + sp_3072_mont_reduce_134(r, m, mp); + mp = sp_3072_cmp_134(r, m); + sp_3072_cond_sub_134(r, r, m, ((mp < 0) ? + (sp_digit)1 : (sp_digit)0)- 1); + + sp_3072_to_bin(r, out); + *outLen = 384; + } + + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit ad[268], md[134], rd[268]; +#else + sp_digit* d = NULL; +#endif + sp_digit* a; + sp_digit* m; + sp_digit* r; + sp_digit e[1] = {0}; + int err = MP_OKAY; + + if (*outLen < 384U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(em) > 23) { + err = MP_READ_E; + } + if (inLen > 384U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 3072) { + err = MP_READ_E; + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 134 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) { + err = MEMORY_E; + } + } + + if (err == MP_OKAY) { + a = d; + r = a + 134 * 2; + m = r + 134 * 2; + } +#else + a = ad; + m = md; + r = rd; +#endif + + if (err == MP_OKAY) { + sp_3072_from_bin(a, 134, in, inLen); +#if DIGIT_BIT >= 23 + e[0] = (sp_digit)em->dp[0]; +#else + e[0] = (sp_digit)em->dp[0]; + if (em->used > 1) { + e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + } +#endif + if (e[0] == 0) { + err = MP_EXPTMOD_E; + } + } + if (err == MP_OKAY) { + sp_3072_from_mp(m, 134, mm); + + if (e[0] == 0x3) { + sp_3072_sqr_134(r, a); + err = sp_3072_mod_134(r, r, m); + if (err == MP_OKAY) { + sp_3072_mul_134(r, a, r); + err = sp_3072_mod_134(r, r, m); + } + } + else { + sp_digit* norm = r; + int i; + sp_digit mp; + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_134(norm, m); + + sp_3072_mul_134(a, a, norm); + err = sp_3072_mod_134(a, a, m); + + if (err == MP_OKAY) { + for (i=22; i>=0; i--) { + if ((e[0] >> i) != 0) { + break; + } + } + + XMEMCPY(r, a, sizeof(sp_digit) * 268U); + for (i--; i>=0; i--) { + sp_3072_mont_sqr_134(r, r, m, mp); + + if (((e[0] >> i) & 1) == 1) { + sp_3072_mont_mul_134(r, r, a, m, mp); + } + } + sp_3072_mont_reduce_134(r, m, mp); + mp = sp_3072_cmp_134(r, m); + sp_3072_cond_sub_134(r, r, m, ((mp < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + } + } + } + + if (err == MP_OKAY) { + sp_3072_to_bin(r, out); + *outLen = 384; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } +#endif + + return err; +#endif /* WOLFSSL_SP_SMALL */ +} + +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +#if !defined(SP_RSA_PRIVATE_EXP_D) && !defined(RSA_LOW_MEM) +#endif /* !SP_RSA_PRIVATE_EXP_D && !RSA_LOW_MEM */ +/* RSA private key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * pm First prime. + * qm Second prime. + * dpm First prime's CRT exponent. + * dqm Second prime's CRT exponent. + * qim Inverse of second prime mod p. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 384 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, + mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm, + byte* out, word32* outLen) +{ +#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* a; + sp_digit* d = NULL; + sp_digit* m; + sp_digit* r; + int err = MP_OKAY; + + (void)pm; + (void)qm; + (void)dpm; + (void)dqm; + (void)qim; + + if (*outLen < 384U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(dm) > 3072) { + err = MP_READ_E; + } + if (inLen > 384) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 3072) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 134 * 4, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) { + err = MEMORY_E; + } + } + if (err == MP_OKAY) { + a = d + 134; + m = a + 268; + r = a; + + sp_3072_from_bin(a, 134, in, inLen); + sp_3072_from_mp(d, 134, dm); + sp_3072_from_mp(m, 134, mm); + err = sp_3072_mod_exp_134(r, a, d, 3072, m, 0); + } + if (err == MP_OKAY) { + sp_3072_to_bin(r, out); + *outLen = 384; + } + + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 134); + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else + sp_digit a[268], d[134], m[134]; + sp_digit* r = a; + int err = MP_OKAY; + + (void)pm; + (void)qm; + (void)dpm; + (void)dqm; + (void)qim; + + if (*outLen < 384U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(dm) > 3072) { + err = MP_READ_E; + } + if (inLen > 384U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 3072) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_3072_from_bin(a, 134, in, inLen); + sp_3072_from_mp(d, 134, dm); + sp_3072_from_mp(m, 134, mm); + err = sp_3072_mod_exp_134(r, a, d, 3072, m, 0); + } + + if (err == MP_OKAY) { + sp_3072_to_bin(r, out); + *outLen = 384; + } + + XMEMSET(d, 0, sizeof(sp_digit) * 134); + + return err; +#endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */ +#else +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* t = NULL; + sp_digit* a; + sp_digit* p; + sp_digit* q; + sp_digit* dp; + sp_digit* dq; + sp_digit* qi; + sp_digit* tmpa; + sp_digit* tmpb; + sp_digit* r; + int err = MP_OKAY; + + (void)dm; + (void)mm; + + if (*outLen < 384U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (inLen > 384) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 3072) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 67 * 11, NULL, + DYNAMIC_TYPE_RSA); + if (t == NULL) { + err = MEMORY_E; + } + } + if (err == MP_OKAY) { + a = t; + p = a + 134 * 2; + q = p + 67; + qi = dq = dp = q + 67; + tmpa = qi + 67; + tmpb = tmpa + 134; + + r = t + 134; + + sp_3072_from_bin(a, 134, in, inLen); + sp_3072_from_mp(p, 67, pm); + sp_3072_from_mp(q, 67, qm); + sp_3072_from_mp(dp, 67, dpm); + err = sp_3072_mod_exp_67(tmpa, a, dp, 1536, p, 1); + } + if (err == MP_OKAY) { + sp_3072_from_mp(dq, 67, dqm); + err = sp_3072_mod_exp_67(tmpb, a, dq, 1536, q, 1); + } + if (err == MP_OKAY) { + (void)sp_3072_sub_67(tmpa, tmpa, tmpb); + sp_3072_cond_add_67(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[66] >> 31)); + sp_3072_cond_add_67(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[66] >> 31)); + + sp_3072_from_mp(qi, 67, qim); + sp_3072_mul_67(tmpa, tmpa, qi); + err = sp_3072_mod_67(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { + sp_3072_mul_67(tmpa, q, tmpa); + (void)sp_3072_add_134(r, tmpb, tmpa); + sp_3072_norm_134(r); + + sp_3072_to_bin(r, out); + *outLen = 384; + } + + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_digit) * 67 * 11); + XFREE(t, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else + sp_digit a[134 * 2]; + sp_digit p[67], q[67], dp[67], dq[67], qi[67]; + sp_digit tmpa[134], tmpb[134]; + sp_digit* r = a; + int err = MP_OKAY; + + (void)dm; + (void)mm; + + if (*outLen < 384U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (inLen > 384U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 3072) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_3072_from_bin(a, 134, in, inLen); + sp_3072_from_mp(p, 67, pm); + sp_3072_from_mp(q, 67, qm); + sp_3072_from_mp(dp, 67, dpm); + sp_3072_from_mp(dq, 67, dqm); + sp_3072_from_mp(qi, 67, qim); + + err = sp_3072_mod_exp_67(tmpa, a, dp, 1536, p, 1); + } + if (err == MP_OKAY) { + err = sp_3072_mod_exp_67(tmpb, a, dq, 1536, q, 1); + } + + if (err == MP_OKAY) { + (void)sp_3072_sub_67(tmpa, tmpa, tmpb); + sp_3072_cond_add_67(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[66] >> 31)); + sp_3072_cond_add_67(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[66] >> 31)); + sp_3072_mul_67(tmpa, tmpa, qi); + err = sp_3072_mod_67(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { + sp_3072_mul_67(tmpa, tmpa, q); + (void)sp_3072_add_134(r, tmpb, tmpa); + sp_3072_norm_134(r); + + sp_3072_to_bin(r, out); + *outLen = 384; + } + + XMEMSET(tmpa, 0, sizeof(tmpa)); + XMEMSET(tmpb, 0, sizeof(tmpb)); + XMEMSET(p, 0, sizeof(p)); + XMEMSET(q, 0, sizeof(q)); + XMEMSET(dp, 0, sizeof(dp)); + XMEMSET(dq, 0, sizeof(dq)); + XMEMSET(qi, 0, sizeof(qi)); + + return err; +#endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */ +#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */ +} + +#endif /* !WOLFSSL_RSA_PUBLIC_ONLY */ +#endif /* WOLFSSL_HAVE_SP_RSA */ +#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY)) +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_3072_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (3072 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 23 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 134); + r->used = 134; + mp_clamp(r); +#elif DIGIT_BIT < 23 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 134; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 23) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 23 - s; + } + r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 134; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 23 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 23 - s; + } + else { + s += 23; + } + } + r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ +#ifdef WOLFSSL_SP_SMALL + int err = MP_OKAY; + sp_digit* d = NULL; + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 3072) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 3072) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 3072) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 134 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) { + err = MEMORY_E; + } + } + + if (err == MP_OKAY) { + b = d; + e = b + 134 * 2; + m = e + 134; + r = b; + + sp_3072_from_mp(b, 134, base); + sp_3072_from_mp(e, 134, exp); + sp_3072_from_mp(m, 134, mod); + + err = sp_3072_mod_exp_134(r, b, e, mp_count_bits(exp), m, 0); + } + + if (err == MP_OKAY) { + err = sp_3072_to_mp(r, res); + } + + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 134U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit bd[268], ed[134], md[134]; +#else + sp_digit* d = NULL; +#endif + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + int err = MP_OKAY; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 3072) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 3072) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 3072) { + err = MP_READ_E; + } + } + +#ifdef WOLFSSL_SMALL_STACK + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 134 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + b = d; + e = b + 134 * 2; + m = e + 134; + r = b; + } +#else + r = b = bd; + e = ed; + m = md; +#endif + + if (err == MP_OKAY) { + sp_3072_from_mp(b, 134, base); + sp_3072_from_mp(e, 134, exp); + sp_3072_from_mp(m, 134, mod); + + err = sp_3072_mod_exp_134(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + err = sp_3072_to_mp(r, res); + } + + +#ifdef WOLFSSL_SMALL_STACK + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 134U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } +#else + XMEMSET(e, 0, sizeof(sp_digit) * 134U); +#endif + + return err; +#endif +} + +#ifdef WOLFSSL_HAVE_SP_DH + +#ifdef HAVE_FFDHE_3072 +SP_NOINLINE static void sp_3072_lshift_134(sp_digit* r, sp_digit* a, byte n) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + r[134] = a[133] >> (23 - n); + for (i=133; i>0; i--) { + r[i] = ((a[i] << n) | (a[i-1] >> (23 - n))) & 0x7fffff; + } +#else + sp_int_digit s, t; + + s = (sp_int_digit)a[133]; + r[134] = s >> (23U - n); + s = (sp_int_digit)(a[133]); t = (sp_int_digit)(a[132]); + r[133] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[132]); t = (sp_int_digit)(a[131]); + r[132] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[131]); t = (sp_int_digit)(a[130]); + r[131] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[130]); t = (sp_int_digit)(a[129]); + r[130] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[129]); t = (sp_int_digit)(a[128]); + r[129] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[128]); t = (sp_int_digit)(a[127]); + r[128] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[127]); t = (sp_int_digit)(a[126]); + r[127] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[126]); t = (sp_int_digit)(a[125]); + r[126] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[125]); t = (sp_int_digit)(a[124]); + r[125] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[124]); t = (sp_int_digit)(a[123]); + r[124] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[123]); t = (sp_int_digit)(a[122]); + r[123] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[122]); t = (sp_int_digit)(a[121]); + r[122] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[121]); t = (sp_int_digit)(a[120]); + r[121] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[120]); t = (sp_int_digit)(a[119]); + r[120] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[119]); t = (sp_int_digit)(a[118]); + r[119] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[118]); t = (sp_int_digit)(a[117]); + r[118] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[117]); t = (sp_int_digit)(a[116]); + r[117] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[116]); t = (sp_int_digit)(a[115]); + r[116] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[115]); t = (sp_int_digit)(a[114]); + r[115] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[114]); t = (sp_int_digit)(a[113]); + r[114] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[113]); t = (sp_int_digit)(a[112]); + r[113] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[112]); t = (sp_int_digit)(a[111]); + r[112] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[111]); t = (sp_int_digit)(a[110]); + r[111] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[110]); t = (sp_int_digit)(a[109]); + r[110] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[109]); t = (sp_int_digit)(a[108]); + r[109] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[108]); t = (sp_int_digit)(a[107]); + r[108] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[107]); t = (sp_int_digit)(a[106]); + r[107] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[106]); t = (sp_int_digit)(a[105]); + r[106] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[105]); t = (sp_int_digit)(a[104]); + r[105] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[104]); t = (sp_int_digit)(a[103]); + r[104] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[103]); t = (sp_int_digit)(a[102]); + r[103] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[102]); t = (sp_int_digit)(a[101]); + r[102] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[101]); t = (sp_int_digit)(a[100]); + r[101] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[100]); t = (sp_int_digit)(a[99]); + r[100] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[99]); t = (sp_int_digit)(a[98]); + r[99] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[98]); t = (sp_int_digit)(a[97]); + r[98] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[97]); t = (sp_int_digit)(a[96]); + r[97] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[96]); t = (sp_int_digit)(a[95]); + r[96] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[95]); t = (sp_int_digit)(a[94]); + r[95] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[94]); t = (sp_int_digit)(a[93]); + r[94] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[93]); t = (sp_int_digit)(a[92]); + r[93] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[92]); t = (sp_int_digit)(a[91]); + r[92] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[91]); t = (sp_int_digit)(a[90]); + r[91] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[90]); t = (sp_int_digit)(a[89]); + r[90] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[89]); t = (sp_int_digit)(a[88]); + r[89] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[88]); t = (sp_int_digit)(a[87]); + r[88] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[87]); t = (sp_int_digit)(a[86]); + r[87] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[86]); t = (sp_int_digit)(a[85]); + r[86] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[85]); t = (sp_int_digit)(a[84]); + r[85] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[84]); t = (sp_int_digit)(a[83]); + r[84] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[83]); t = (sp_int_digit)(a[82]); + r[83] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[82]); t = (sp_int_digit)(a[81]); + r[82] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[81]); t = (sp_int_digit)(a[80]); + r[81] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[80]); t = (sp_int_digit)(a[79]); + r[80] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[79]); t = (sp_int_digit)(a[78]); + r[79] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[78]); t = (sp_int_digit)(a[77]); + r[78] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[77]); t = (sp_int_digit)(a[76]); + r[77] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[76]); t = (sp_int_digit)(a[75]); + r[76] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[75]); t = (sp_int_digit)(a[74]); + r[75] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[74]); t = (sp_int_digit)(a[73]); + r[74] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[73]); t = (sp_int_digit)(a[72]); + r[73] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[72]); t = (sp_int_digit)(a[71]); + r[72] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[71]); t = (sp_int_digit)(a[70]); + r[71] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[70]); t = (sp_int_digit)(a[69]); + r[70] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[69]); t = (sp_int_digit)(a[68]); + r[69] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[68]); t = (sp_int_digit)(a[67]); + r[68] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[67]); t = (sp_int_digit)(a[66]); + r[67] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[66]); t = (sp_int_digit)(a[65]); + r[66] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[65]); t = (sp_int_digit)(a[64]); + r[65] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[64]); t = (sp_int_digit)(a[63]); + r[64] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[63]); t = (sp_int_digit)(a[62]); + r[63] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[62]); t = (sp_int_digit)(a[61]); + r[62] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[61]); t = (sp_int_digit)(a[60]); + r[61] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[60]); t = (sp_int_digit)(a[59]); + r[60] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[59]); t = (sp_int_digit)(a[58]); + r[59] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[58]); t = (sp_int_digit)(a[57]); + r[58] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[57]); t = (sp_int_digit)(a[56]); + r[57] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[56]); t = (sp_int_digit)(a[55]); + r[56] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[55]); t = (sp_int_digit)(a[54]); + r[55] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[54]); t = (sp_int_digit)(a[53]); + r[54] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[53]); t = (sp_int_digit)(a[52]); + r[53] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[52]); t = (sp_int_digit)(a[51]); + r[52] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[51]); t = (sp_int_digit)(a[50]); + r[51] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[50]); t = (sp_int_digit)(a[49]); + r[50] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[49]); t = (sp_int_digit)(a[48]); + r[49] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[48]); t = (sp_int_digit)(a[47]); + r[48] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[47]); t = (sp_int_digit)(a[46]); + r[47] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[46]); t = (sp_int_digit)(a[45]); + r[46] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[45]); t = (sp_int_digit)(a[44]); + r[45] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[44]); t = (sp_int_digit)(a[43]); + r[44] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[43]); t = (sp_int_digit)(a[42]); + r[43] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[42]); t = (sp_int_digit)(a[41]); + r[42] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[41]); t = (sp_int_digit)(a[40]); + r[41] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[40]); t = (sp_int_digit)(a[39]); + r[40] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[39]); t = (sp_int_digit)(a[38]); + r[39] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[38]); t = (sp_int_digit)(a[37]); + r[38] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[37]); t = (sp_int_digit)(a[36]); + r[37] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[36]); t = (sp_int_digit)(a[35]); + r[36] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[35]); t = (sp_int_digit)(a[34]); + r[35] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[34]); t = (sp_int_digit)(a[33]); + r[34] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[33]); t = (sp_int_digit)(a[32]); + r[33] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[32]); t = (sp_int_digit)(a[31]); + r[32] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[31]); t = (sp_int_digit)(a[30]); + r[31] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[30]); t = (sp_int_digit)(a[29]); + r[30] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[29]); t = (sp_int_digit)(a[28]); + r[29] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[28]); t = (sp_int_digit)(a[27]); + r[28] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[27]); t = (sp_int_digit)(a[26]); + r[27] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[26]); t = (sp_int_digit)(a[25]); + r[26] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[25]); t = (sp_int_digit)(a[24]); + r[25] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[24]); t = (sp_int_digit)(a[23]); + r[24] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[23]); t = (sp_int_digit)(a[22]); + r[23] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[22]); t = (sp_int_digit)(a[21]); + r[22] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[21]); t = (sp_int_digit)(a[20]); + r[21] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[20]); t = (sp_int_digit)(a[19]); + r[20] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[19]); t = (sp_int_digit)(a[18]); + r[19] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[18]); t = (sp_int_digit)(a[17]); + r[18] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[17]); t = (sp_int_digit)(a[16]); + r[17] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[16]); t = (sp_int_digit)(a[15]); + r[16] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[15]); t = (sp_int_digit)(a[14]); + r[15] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[14]); t = (sp_int_digit)(a[13]); + r[14] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[13]); t = (sp_int_digit)(a[12]); + r[13] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[12]); t = (sp_int_digit)(a[11]); + r[12] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[11]); t = (sp_int_digit)(a[10]); + r[11] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[10]); t = (sp_int_digit)(a[9]); + r[10] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[9]); t = (sp_int_digit)(a[8]); + r[9] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[8]); t = (sp_int_digit)(a[7]); + r[8] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[7]); t = (sp_int_digit)(a[6]); + r[7] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[6]); t = (sp_int_digit)(a[5]); + r[6] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[5]); t = (sp_int_digit)(a[4]); + r[5] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[4]); t = (sp_int_digit)(a[3]); + r[4] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[3]); t = (sp_int_digit)(a[2]); + r[3] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[2]); t = (sp_int_digit)(a[1]); + r[2] = ((s << n) | (t >> (23U - n))) & 0x7fffff; + s = (sp_int_digit)(a[1]); t = (sp_int_digit)(a[0]); + r[1] = ((s << n) | (t >> (23U - n))) & 0x7fffff; +#endif + r[0] = (a[0] << n) & 0x7fffff; +} + +/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m) + * + * r A single precision number that is the result of the operation. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_3072_mod_exp_2_134(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit nd[268]; + sp_digit td[135]; +#else + sp_digit* td; +#endif + sp_digit* norm; + sp_digit* tmp; + sp_digit mp = 1; + sp_digit n, o; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 403, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + norm = td; + tmp = td + 268; + XMEMSET(td, 0, sizeof(sp_digit) * 403); +#else + norm = nd; + tmp = td; + XMEMSET(td, 0, sizeof(td)); +#endif + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_134(norm, m); + + bits = ((bits + 3) / 4) * 4; + i = ((bits + 22) / 23) - 1; + c = bits % 23; + if (c == 0) { + c = 23; + } + if (i < 134) { + n = e[i--] << (32 - c); + } + else { + n = 0; + i--; + } + if (c < 4) { + n |= e[i--] << (9 - c); + c += 23; + } + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + sp_3072_lshift_134(r, norm, y); + for (; i>=0 || c>=4; ) { + if (c < 4) { + n |= e[i--] << (9 - c); + c += 23; + } + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + + sp_3072_mont_sqr_134(r, r, m, mp); + sp_3072_mont_sqr_134(r, r, m, mp); + sp_3072_mont_sqr_134(r, r, m, mp); + sp_3072_mont_sqr_134(r, r, m, mp); + + sp_3072_lshift_134(r, r, y); + sp_3072_mul_d_134(tmp, norm, (r[134] << 10) + (r[133] >> 13)); + r[134] = 0; + r[133] &= 0x1fffL; + (void)sp_3072_add_134(r, r, tmp); + sp_3072_norm_134(r); + o = sp_3072_cmp_134(r, m); + sp_3072_cond_sub_134(r, r, m, ((o < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + } + + sp_3072_mont_reduce_134(r, m, mp); + n = sp_3072_cmp_134(r, m); + sp_3072_cond_sub_134(r, r, m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} + +#endif /* HAVE_FFDHE_3072 */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. + * exp Array of bytes that is the exponent. + * expLen Length of data, in bytes, in exponent. + * mod Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 384 bytes long. + * outLen Length, in bytes, of exponentiation result. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen, + mp_int* mod, byte* out, word32* outLen) +{ +#ifdef WOLFSSL_SP_SMALL + int err = MP_OKAY; + sp_digit* d = NULL; + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + word32 i; + + if (mp_count_bits(base) > 3072) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expLen > 384) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 3072) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 134 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) { + err = MEMORY_E; + } + } + + if (err == MP_OKAY) { + b = d; + e = b + 134 * 2; + m = e + 134; + r = b; + + sp_3072_from_mp(b, 134, base); + sp_3072_from_bin(e, 134, exp, expLen); + sp_3072_from_mp(m, 134, mod); + + #ifdef HAVE_FFDHE_3072 + if (base->used == 1 && base->dp[0] == 2 && + ((m[133] << 3) | (m[132] >> 20)) == 0xffffL) { + err = sp_3072_mod_exp_2_134(r, e, expLen * 8, m); + } + else + #endif + err = sp_3072_mod_exp_134(r, b, e, expLen * 8, m, 0); + } + + if (err == MP_OKAY) { + sp_3072_to_bin(r, out); + *outLen = 384; + for (i=0; i<384 && out[i] == 0; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + } + + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 134U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit bd[268], ed[134], md[134]; +#else + sp_digit* d = NULL; +#endif + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + word32 i; + int err = MP_OKAY; + + if (mp_count_bits(base) > 3072) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expLen > 384U) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 3072) { + err = MP_READ_E; + } + } +#ifdef WOLFSSL_SMALL_STACK + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 134 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + b = d; + e = b + 134 * 2; + m = e + 134; + r = b; + } +#else + r = b = bd; + e = ed; + m = md; +#endif + + if (err == MP_OKAY) { + sp_3072_from_mp(b, 134, base); + sp_3072_from_bin(e, 134, exp, expLen); + sp_3072_from_mp(m, 134, mod); + + #ifdef HAVE_FFDHE_3072 + if (base->used == 1 && base->dp[0] == 2U && + ((m[133] << 3) | (m[132] >> 20)) == 0xffffL) { + err = sp_3072_mod_exp_2_134(r, e, expLen * 8U, m); + } + else { + #endif + err = sp_3072_mod_exp_134(r, b, e, expLen * 8U, m, 0); + #ifdef HAVE_FFDHE_3072 + } + #endif + } + + if (err == MP_OKAY) { + sp_3072_to_bin(r, out); + *outLen = 384; + for (i=0; i<384U && out[i] == 0U; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + } + +#ifdef WOLFSSL_SMALL_STACK + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 134U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } +#else + XMEMSET(e, 0, sizeof(sp_digit) * 134U); +#endif + + return err; +#endif +} +#endif /* WOLFSSL_HAVE_SP_DH */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ +#ifdef WOLFSSL_SP_SMALL + int err = MP_OKAY; + sp_digit* d = NULL; + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 1536) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 1536) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 1536) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 67 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) { + err = MEMORY_E; + } + } + + if (err == MP_OKAY) { + b = d; + e = b + 67 * 2; + m = e + 67; + r = b; + + sp_3072_from_mp(b, 67, base); + sp_3072_from_mp(e, 67, exp); + sp_3072_from_mp(m, 67, mod); + + err = sp_3072_mod_exp_67(r, b, e, mp_count_bits(exp), m, 0); + } + + if (err == MP_OKAY) { + XMEMSET(r + 67, 0, sizeof(*r) * 67U); + err = sp_3072_to_mp(r, res); + } + + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 67U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit bd[134], ed[67], md[67]; +#else + sp_digit* d = NULL; +#endif + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + int err = MP_OKAY; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 1536) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 1536) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 1536) { + err = MP_READ_E; + } + } + +#ifdef WOLFSSL_SMALL_STACK + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 67 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + b = d; + e = b + 67 * 2; + m = e + 67; + r = b; + } +#else + r = b = bd; + e = ed; + m = md; +#endif + + if (err == MP_OKAY) { + sp_3072_from_mp(b, 67, base); + sp_3072_from_mp(e, 67, exp); + sp_3072_from_mp(m, 67, mod); + + err = sp_3072_mod_exp_67(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + XMEMSET(r + 67, 0, sizeof(*r) * 67U); + err = sp_3072_to_mp(r, res); + } + + +#ifdef WOLFSSL_SMALL_STACK + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 67U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } +#else + XMEMSET(e, 0, sizeof(sp_digit) * 67U); +#endif + + return err; +#endif +} + +#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */ + +#endif /* !WOLFSSL_SP_NO_3072 */ + +#ifdef WOLFSSL_SP_4096 +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((sp_digit)a[i]) << s); + if (s >= 13U) { + r[j] &= 0x1fffff; + s = 21U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (sp_digit)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 21 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 21 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0x1fffff; + s = 21U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 21U) <= (word32)DIGIT_BIT) { + s += 21U; + r[j] &= 0x1fffff; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 21) { + r[j] &= 0x1fffff; + if (j + 1 >= size) { + break; + } + s = 21 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 512 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_4096_to_bin(sp_digit* r, byte* a) +{ + int i, j, s = 0, b; + + for (i=0; i<195; i++) { + r[i+1] += r[i] >> 21; + r[i] &= 0x1fffff; + } + j = 4096 / 8 - 1; + a[j] = 0; + for (i=0; i<196 && j>=0; i++) { + b = 0; + /* lint allow cast of mismatch sp_digit and int */ + a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ + b += 8 - s; + if (j < 0) { + break; + } + while (b < 21) { + a[j--] = (byte)(r[i] >> b); + b += 8; + if (j < 0) { + break; + } + } + s = 8 - (b - 21); + if (j >= 0) { + a[j] = 0; + } + if (s != 0) { + j++; + } + } +} + +#ifndef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_49(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i, j; + int64_t t[98]; + + XMEMSET(t, 0, sizeof(t)); + for (i=0; i<49; i++) { + for (j=0; j<49; j++) { + t[i+j] += ((int64_t)a[i]) * b[j]; + } + } + for (i=0; i<97; i++) { + r[i] = t[i] & 0x1fffff; + t[i+1] += t[i] >> 21; + } + r[97] = (sp_digit)t[97]; +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_49(sp_digit* r, const sp_digit* a) +{ + int i, j; + int64_t t[98]; + + XMEMSET(t, 0, sizeof(t)); + for (i=0; i<49; i++) { + for (j=0; j> 21; + } + r[97] = (sp_digit)t[97]; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_add_49(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 48; i += 8) { + r[i + 0] = a[i + 0] + b[i + 0]; + r[i + 1] = a[i + 1] + b[i + 1]; + r[i + 2] = a[i + 2] + b[i + 2]; + r[i + 3] = a[i + 3] + b[i + 3]; + r[i + 4] = a[i + 4] + b[i + 4]; + r[i + 5] = a[i + 5] + b[i + 5]; + r[i + 6] = a[i + 6] + b[i + 6]; + r[i + 7] = a[i + 7] + b[i + 7]; + } + r[48] = a[48] + b[48]; + + return 0; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_add_98(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 96; i += 8) { + r[i + 0] = a[i + 0] + b[i + 0]; + r[i + 1] = a[i + 1] + b[i + 1]; + r[i + 2] = a[i + 2] + b[i + 2]; + r[i + 3] = a[i + 3] + b[i + 3]; + r[i + 4] = a[i + 4] + b[i + 4]; + r[i + 5] = a[i + 5] + b[i + 5]; + r[i + 6] = a[i + 6] + b[i + 6]; + r[i + 7] = a[i + 7] + b[i + 7]; + } + r[96] = a[96] + b[96]; + r[97] = a[97] + b[97]; + + return 0; +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_sub_98(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 96; i += 8) { + r[i + 0] = a[i + 0] - b[i + 0]; + r[i + 1] = a[i + 1] - b[i + 1]; + r[i + 2] = a[i + 2] - b[i + 2]; + r[i + 3] = a[i + 3] - b[i + 3]; + r[i + 4] = a[i + 4] - b[i + 4]; + r[i + 5] = a[i + 5] - b[i + 5]; + r[i + 6] = a[i + 6] - b[i + 6]; + r[i + 7] = a[i + 7] - b[i + 7]; + } + r[96] = a[96] - b[96]; + r[97] = a[97] - b[97]; + + return 0; +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_98(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[98]; + sp_digit* a1 = z1; + sp_digit b1[49]; + sp_digit* z2 = r + 98; + (void)sp_4096_add_49(a1, a, &a[49]); + (void)sp_4096_add_49(b1, b, &b[49]); + sp_4096_mul_49(z2, &a[49], &b[49]); + sp_4096_mul_49(z0, a, b); + sp_4096_mul_49(z1, a1, b1); + (void)sp_4096_sub_98(z1, z1, z2); + (void)sp_4096_sub_98(z1, z1, z0); + (void)sp_4096_add_98(r + 49, r + 49, z1); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_98(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z1[98]; + sp_digit* a1 = z1; + sp_digit* z2 = r + 98; + (void)sp_4096_add_49(a1, a, &a[49]); + sp_4096_sqr_49(z2, &a[49]); + sp_4096_sqr_49(z0, a); + sp_4096_sqr_49(z1, a1); + (void)sp_4096_sub_98(z1, z1, z2); + (void)sp_4096_sub_98(z1, z1, z0); + (void)sp_4096_add_98(r + 49, r + 49, z1); +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_add_196(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 192; i += 8) { + r[i + 0] = a[i + 0] + b[i + 0]; + r[i + 1] = a[i + 1] + b[i + 1]; + r[i + 2] = a[i + 2] + b[i + 2]; + r[i + 3] = a[i + 3] + b[i + 3]; + r[i + 4] = a[i + 4] + b[i + 4]; + r[i + 5] = a[i + 5] + b[i + 5]; + r[i + 6] = a[i + 6] + b[i + 6]; + r[i + 7] = a[i + 7] + b[i + 7]; + } + r[192] = a[192] + b[192]; + r[193] = a[193] + b[193]; + r[194] = a[194] + b[194]; + r[195] = a[195] + b[195]; + + return 0; +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_sub_196(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 192; i += 8) { + r[i + 0] = a[i + 0] - b[i + 0]; + r[i + 1] = a[i + 1] - b[i + 1]; + r[i + 2] = a[i + 2] - b[i + 2]; + r[i + 3] = a[i + 3] - b[i + 3]; + r[i + 4] = a[i + 4] - b[i + 4]; + r[i + 5] = a[i + 5] - b[i + 5]; + r[i + 6] = a[i + 6] - b[i + 6]; + r[i + 7] = a[i + 7] - b[i + 7]; + } + r[192] = a[192] - b[192]; + r[193] = a[193] - b[193]; + r[194] = a[194] - b[194]; + r[195] = a[195] - b[195]; + + return 0; +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_196(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[196]; + sp_digit* a1 = z1; + sp_digit b1[98]; + sp_digit* z2 = r + 196; + (void)sp_4096_add_98(a1, a, &a[98]); + (void)sp_4096_add_98(b1, b, &b[98]); + sp_4096_mul_98(z2, &a[98], &b[98]); + sp_4096_mul_98(z0, a, b); + sp_4096_mul_98(z1, a1, b1); + (void)sp_4096_sub_196(z1, z1, z2); + (void)sp_4096_sub_196(z1, z1, z0); + (void)sp_4096_add_196(r + 98, r + 98, z1); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_196(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z1[196]; + sp_digit* a1 = z1; + sp_digit* z2 = r + 196; + (void)sp_4096_add_98(a1, a, &a[98]); + sp_4096_sqr_98(z2, &a[98]); + sp_4096_sqr_98(z0, a); + sp_4096_sqr_98(z1, a1); + (void)sp_4096_sub_196(z1, z1, z2); + (void)sp_4096_sub_196(z1, z1, z0); + (void)sp_4096_add_196(r + 98, r + 98, z1); +} + +#endif /* !WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_add_196(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 196; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_sub_196(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 196; i++) { + r[i] = a[i] - b[i]; + } + + return 0; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_196(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i, j, k; + int64_t c; + + c = ((int64_t)a[195]) * b[195]; + r[391] = (sp_digit)(c >> 21); + c = (c & 0x1fffff) << 21; + for (k = 389; k >= 0; k--) { + for (i = 195; i >= 0; i--) { + j = k - i; + if (j >= 196) { + break; + } + if (j < 0) { + continue; + } + + c += ((int64_t)a[i]) * b[j]; + } + r[k + 2] += c >> 42; + r[k + 1] = (c >> 21) & 0x1fffff; + c = (c & 0x1fffff) << 21; + } + r[0] = (sp_digit)(c >> 21); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_196(sp_digit* r, const sp_digit* a) +{ + int i, j, k; + int64_t c; + + c = ((int64_t)a[195]) * a[195]; + r[391] = (sp_digit)(c >> 21); + c = (c & 0x1fffff) << 21; + for (k = 389; k >= 0; k--) { + for (i = 195; i >= 0; i--) { + j = k - i; + if (j >= 196 || i <= j) { + break; + } + if (j < 0) { + continue; + } + + c += ((int64_t)a[i]) * a[j] * 2; + } + if (i == j) { + c += ((int64_t)a[i]) * a[i]; + } + + r[k + 2] += c >> 42; + r[k + 1] = (c >> 21) & 0x1fffff; + c = (c & 0x1fffff) << 21; + } + r[0] = (sp_digit)(c >> 21); +} + +#endif /* WOLFSSL_SP_SMALL */ +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +#if defined(WOLFSSL_HAVE_SP_RSA) && !defined(SP_RSA_PRIVATE_EXP_D) +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_add_98(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 98; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_sub_98(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 98; i++) { + r[i] = a[i] - b[i]; + } + + return 0; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_98(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i, j, k; + int64_t c; + + c = ((int64_t)a[97]) * b[97]; + r[195] = (sp_digit)(c >> 21); + c = (c & 0x1fffff) << 21; + for (k = 193; k >= 0; k--) { + for (i = 97; i >= 0; i--) { + j = k - i; + if (j >= 98) { + break; + } + if (j < 0) { + continue; + } + + c += ((int64_t)a[i]) * b[j]; + } + r[k + 2] += c >> 42; + r[k + 1] = (c >> 21) & 0x1fffff; + c = (c & 0x1fffff) << 21; + } + r[0] = (sp_digit)(c >> 21); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_98(sp_digit* r, const sp_digit* a) +{ + int i, j, k; + int64_t c; + + c = ((int64_t)a[97]) * a[97]; + r[195] = (sp_digit)(c >> 21); + c = (c & 0x1fffff) << 21; + for (k = 193; k >= 0; k--) { + for (i = 97; i >= 0; i--) { + j = k - i; + if (j >= 98 || i <= j) { + break; + } + if (j < 0) { + continue; + } + + c += ((int64_t)a[i]) * a[j] * 2; + } + if (i == j) { + c += ((int64_t)a[i]) * a[i]; + } + + r[k + 2] += c >> 42; + r[k + 1] = (c >> 21) & 0x1fffff; + c = (c & 0x1fffff) << 21; + } + r[0] = (sp_digit)(c >> 21); +} + +#endif /* WOLFSSL_SP_SMALL */ +#endif /* WOLFSSL_HAVE_SP_RSA && !SP_RSA_PRIVATE_EXP_D */ +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +/* Caclulate the bottom digit of -1/a mod 2^n. + * + * a A single precision number. + * rho Bottom word of inverse. + */ +static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho) +{ + sp_digit x, b; + + b = a[0]; + x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - b * x; /* here x*a==1 mod 2**8 */ + x *= 2 - b * x; /* here x*a==1 mod 2**16 */ + x *= 2 - b * x; /* here x*a==1 mod 2**32 */ + x &= 0x1fffff; + + /* rho = -1/m mod b */ + *rho = (1L << 21) - x; +} + +/* Multiply a by scalar b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_4096_mul_d_196(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int64_t tb = b; + int64_t t = 0; + int i; + + for (i = 0; i < 196; i++) { + t += tb * a[i]; + r[i] = t & 0x1fffff; + t >>= 21; + } + r[196] = (sp_digit)t; +#else + int64_t tb = b; + int64_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] = t[0] & 0x1fffff; + for (i = 0; i < 192; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] = (sp_digit)(t[0] >> 21) + (t[1] & 0x1fffff); + t[2] = tb * a[i+2]; + r[i+2] = (sp_digit)(t[1] >> 21) + (t[2] & 0x1fffff); + t[3] = tb * a[i+3]; + r[i+3] = (sp_digit)(t[2] >> 21) + (t[3] & 0x1fffff); + t[4] = tb * a[i+4]; + r[i+4] = (sp_digit)(t[3] >> 21) + (t[4] & 0x1fffff); + t[5] = tb * a[i+5]; + r[i+5] = (sp_digit)(t[4] >> 21) + (t[5] & 0x1fffff); + t[6] = tb * a[i+6]; + r[i+6] = (sp_digit)(t[5] >> 21) + (t[6] & 0x1fffff); + t[7] = tb * a[i+7]; + r[i+7] = (sp_digit)(t[6] >> 21) + (t[7] & 0x1fffff); + t[0] = tb * a[i+8]; + r[i+8] = (sp_digit)(t[7] >> 21) + (t[0] & 0x1fffff); + } + t[1] = tb * a[193]; + r[193] = (sp_digit)(t[0] >> 21) + (t[1] & 0x1fffff); + t[2] = tb * a[194]; + r[194] = (sp_digit)(t[1] >> 21) + (t[2] & 0x1fffff); + t[3] = tb * a[195]; + r[195] = (sp_digit)(t[2] >> 21) + (t[3] & 0x1fffff); + r[196] = (sp_digit)(t[3] >> 21); +#endif /* WOLFSSL_SP_SMALL */ +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +#if defined(WOLFSSL_HAVE_SP_RSA) && !defined(SP_RSA_PRIVATE_EXP_D) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 4096 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_4096_mont_norm_98(sp_digit* r, const sp_digit* m) +{ + /* Set r = 2^n - 1. */ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<97; i++) { + r[i] = 0x1fffff; + } +#else + int i; + + for (i = 0; i < 96; i += 8) { + r[i + 0] = 0x1fffff; + r[i + 1] = 0x1fffff; + r[i + 2] = 0x1fffff; + r[i + 3] = 0x1fffff; + r[i + 4] = 0x1fffff; + r[i + 5] = 0x1fffff; + r[i + 6] = 0x1fffff; + r[i + 7] = 0x1fffff; + } + r[96] = 0x1fffff; +#endif + r[97] = 0x7ffL; + + /* r = (2^n - 1) mod n */ + (void)sp_4096_sub_98(r, r, m); + + /* Add one so r = 2^n mod m */ + r[0] += 1; +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static sp_digit sp_4096_cmp_98(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=97; i>=0; i--) { + r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#else + int i; + + r |= (a[97] - b[97]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[96] - b[96]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + for (i = 88; i >= 0; i -= 8) { + r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#endif /* WOLFSSL_SP_SMALL */ + + return r; +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static void sp_4096_cond_sub_98(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 98; i++) { + r[i] = a[i] - (b[i] & m); + } +#else + int i; + + for (i = 0; i < 96; i += 8) { + r[i + 0] = a[i + 0] - (b[i + 0] & m); + r[i + 1] = a[i + 1] - (b[i + 1] & m); + r[i + 2] = a[i + 2] - (b[i + 2] & m); + r[i + 3] = a[i + 3] - (b[i + 3] & m); + r[i + 4] = a[i + 4] - (b[i + 4] & m); + r[i + 5] = a[i + 5] - (b[i + 5] & m); + r[i + 6] = a[i + 6] - (b[i + 6] & m); + r[i + 7] = a[i + 7] - (b[i + 7] & m); + } + r[96] = a[96] - (b[96] & m); + r[97] = a[97] - (b[97] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Mul a by scalar b and add into r. (r += a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_4096_mul_add_98(sp_digit* r, const sp_digit* a, + const sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int64_t tb = b; + int64_t t = 0; + int i; + + for (i = 0; i < 98; i++) { + t += (tb * a[i]) + r[i]; + r[i] = t & 0x1fffff; + t >>= 21; + } + r[98] += t; +#else + int64_t tb = b; + int64_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1fffff); + for (i = 0; i < 96; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] += (sp_digit)((t[0] >> 21) + (t[1] & 0x1fffff)); + t[2] = tb * a[i+2]; + r[i+2] += (sp_digit)((t[1] >> 21) + (t[2] & 0x1fffff)); + t[3] = tb * a[i+3]; + r[i+3] += (sp_digit)((t[2] >> 21) + (t[3] & 0x1fffff)); + t[4] = tb * a[i+4]; + r[i+4] += (sp_digit)((t[3] >> 21) + (t[4] & 0x1fffff)); + t[5] = tb * a[i+5]; + r[i+5] += (sp_digit)((t[4] >> 21) + (t[5] & 0x1fffff)); + t[6] = tb * a[i+6]; + r[i+6] += (sp_digit)((t[5] >> 21) + (t[6] & 0x1fffff)); + t[7] = tb * a[i+7]; + r[i+7] += (sp_digit)((t[6] >> 21) + (t[7] & 0x1fffff)); + t[0] = tb * a[i+8]; + r[i+8] += (sp_digit)((t[7] >> 21) + (t[0] & 0x1fffff)); + } + t[1] = tb * a[97]; r[97] += (sp_digit)((t[0] >> 21) + (t[1] & 0x1fffff)); + r[98] += (sp_digit)(t[1] >> 21); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Normalize the values in each word to 21. + * + * a Array of sp_digit to normalize. + */ +static void sp_4096_norm_98(sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + for (i = 0; i < 97; i++) { + a[i+1] += a[i] >> 21; + a[i] &= 0x1fffff; + } +#else + int i; + for (i = 0; i < 96; i += 8) { + a[i+1] += a[i+0] >> 21; a[i+0] &= 0x1fffff; + a[i+2] += a[i+1] >> 21; a[i+1] &= 0x1fffff; + a[i+3] += a[i+2] >> 21; a[i+2] &= 0x1fffff; + a[i+4] += a[i+3] >> 21; a[i+3] &= 0x1fffff; + a[i+5] += a[i+4] >> 21; a[i+4] &= 0x1fffff; + a[i+6] += a[i+5] >> 21; a[i+5] &= 0x1fffff; + a[i+7] += a[i+6] >> 21; a[i+6] &= 0x1fffff; + a[i+8] += a[i+7] >> 21; a[i+7] &= 0x1fffff; + a[i+9] += a[i+8] >> 21; a[i+8] &= 0x1fffff; + } + a[96+1] += a[96] >> 21; + a[96] &= 0x1fffff; +#endif +} + +/* Shift the result in the high 2048 bits down to the bottom. + * + * r A single precision number. + * a A single precision number. + */ +static void sp_4096_mont_shift_98(sp_digit* r, const sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + int64_t n = a[97] >> 11; + n += ((int64_t)a[98]) << 10; + + for (i = 0; i < 97; i++) { + r[i] = n & 0x1fffff; + n >>= 21; + n += ((int64_t)a[99 + i]) << 10; + } + r[97] = (sp_digit)n; +#else + int i; + int64_t n = a[97] >> 11; + n += ((int64_t)a[98]) << 10; + for (i = 0; i < 96; i += 8) { + r[i + 0] = n & 0x1fffff; + n >>= 21; n += ((int64_t)a[i + 99]) << 10; + r[i + 1] = n & 0x1fffff; + n >>= 21; n += ((int64_t)a[i + 100]) << 10; + r[i + 2] = n & 0x1fffff; + n >>= 21; n += ((int64_t)a[i + 101]) << 10; + r[i + 3] = n & 0x1fffff; + n >>= 21; n += ((int64_t)a[i + 102]) << 10; + r[i + 4] = n & 0x1fffff; + n >>= 21; n += ((int64_t)a[i + 103]) << 10; + r[i + 5] = n & 0x1fffff; + n >>= 21; n += ((int64_t)a[i + 104]) << 10; + r[i + 6] = n & 0x1fffff; + n >>= 21; n += ((int64_t)a[i + 105]) << 10; + r[i + 7] = n & 0x1fffff; + n >>= 21; n += ((int64_t)a[i + 106]) << 10; + } + r[96] = n & 0x1fffff; n >>= 21; n += ((int64_t)a[195]) << 10; + r[97] = (sp_digit)n; +#endif /* WOLFSSL_SP_SMALL */ + XMEMSET(&r[98], 0, sizeof(*r) * 98U); +} + +/* Reduce the number back to 4096 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static void sp_4096_mont_reduce_98(sp_digit* a, const sp_digit* m, sp_digit mp) +{ + int i; + sp_digit mu; + + sp_4096_norm_98(a + 98); + + for (i=0; i<97; i++) { + mu = (a[i] * mp) & 0x1fffff; + sp_4096_mul_add_98(a+i, m, mu); + a[i+1] += a[i] >> 21; + } + mu = (a[i] * mp) & 0x7ffL; + sp_4096_mul_add_98(a+i, m, mu); + a[i+1] += a[i] >> 21; + a[i] &= 0x1fffff; + + sp_4096_mont_shift_98(a, a); + sp_4096_cond_sub_98(a, a, m, 0 - (((a[97] >> 11) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_4096_norm_98(a); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_mul_98(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_4096_mul_98(r, a, b); + sp_4096_mont_reduce_98(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_sqr_98(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_4096_sqr_98(r, a); + sp_4096_mont_reduce_98(r, m, mp); +} + +/* Multiply a by scalar b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_4096_mul_d_98(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int64_t tb = b; + int64_t t = 0; + int i; + + for (i = 0; i < 98; i++) { + t += tb * a[i]; + r[i] = t & 0x1fffff; + t >>= 21; + } + r[98] = (sp_digit)t; +#else + int64_t tb = b; + int64_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] = t[0] & 0x1fffff; + for (i = 0; i < 96; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] = (sp_digit)(t[0] >> 21) + (t[1] & 0x1fffff); + t[2] = tb * a[i+2]; + r[i+2] = (sp_digit)(t[1] >> 21) + (t[2] & 0x1fffff); + t[3] = tb * a[i+3]; + r[i+3] = (sp_digit)(t[2] >> 21) + (t[3] & 0x1fffff); + t[4] = tb * a[i+4]; + r[i+4] = (sp_digit)(t[3] >> 21) + (t[4] & 0x1fffff); + t[5] = tb * a[i+5]; + r[i+5] = (sp_digit)(t[4] >> 21) + (t[5] & 0x1fffff); + t[6] = tb * a[i+6]; + r[i+6] = (sp_digit)(t[5] >> 21) + (t[6] & 0x1fffff); + t[7] = tb * a[i+7]; + r[i+7] = (sp_digit)(t[6] >> 21) + (t[7] & 0x1fffff); + t[0] = tb * a[i+8]; + r[i+8] = (sp_digit)(t[7] >> 21) + (t[0] & 0x1fffff); + } + t[1] = tb * a[97]; + r[97] = (sp_digit)(t[0] >> 21) + (t[1] & 0x1fffff); + r[98] = (sp_digit)(t[1] >> 21); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static void sp_4096_cond_add_98(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 98; i++) { + r[i] = a[i] + (b[i] & m); + } +#else + int i; + + for (i = 0; i < 96; i += 8) { + r[i + 0] = a[i + 0] + (b[i + 0] & m); + r[i + 1] = a[i + 1] + (b[i + 1] & m); + r[i + 2] = a[i + 2] + (b[i + 2] & m); + r[i + 3] = a[i + 3] + (b[i + 3] & m); + r[i + 4] = a[i + 4] + (b[i + 4] & m); + r[i + 5] = a[i + 5] + (b[i + 5] & m); + r[i + 6] = a[i + 6] + (b[i + 6] & m); + r[i + 7] = a[i + 7] + (b[i + 7] & m); + } + r[96] = a[96] + (b[96] & m); + r[97] = a[97] + (b[97] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +#ifdef WOLFSSL_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_sub_98(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 98; i++) { + r[i] = a[i] - b[i]; + } + + return 0; +} + +#endif +#ifdef WOLFSSL_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_add_98(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 98; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#endif +SP_NOINLINE static void sp_4096_rshift_98(sp_digit* r, sp_digit* a, byte n) +{ + int i; + +#ifdef WOLFSSL_SP_SMALL + for (i=0; i<97; i++) { + r[i] = ((a[i] >> n) | (a[i + 1] << (21 - n))) & 0x1fffff; + } +#else + for (i=0; i<96; i += 8) { + r[i+0] = ((a[i+0] >> n) | (a[i+1] << (21 - n))) & 0x1fffff; + r[i+1] = ((a[i+1] >> n) | (a[i+2] << (21 - n))) & 0x1fffff; + r[i+2] = ((a[i+2] >> n) | (a[i+3] << (21 - n))) & 0x1fffff; + r[i+3] = ((a[i+3] >> n) | (a[i+4] << (21 - n))) & 0x1fffff; + r[i+4] = ((a[i+4] >> n) | (a[i+5] << (21 - n))) & 0x1fffff; + r[i+5] = ((a[i+5] >> n) | (a[i+6] << (21 - n))) & 0x1fffff; + r[i+6] = ((a[i+6] >> n) | (a[i+7] << (21 - n))) & 0x1fffff; + r[i+7] = ((a[i+7] >> n) | (a[i+8] << (21 - n))) & 0x1fffff; + } + r[96] = ((a[96] >> n) | (a[97] << (21 - n))) & 0x1fffff; +#endif + r[97] = a[97] >> n; +} + +#ifdef WOLFSSL_SP_DIV_32 +static WC_INLINE sp_digit sp_4096_div_word_98(sp_digit d1, sp_digit d0, + sp_digit dv) +{ + sp_digit d, r, t; + + /* All 21 bits from d1 and top 10 bits from d0. */ + d = (d1 << 10) | (d0 >> 11); + r = d / dv; + d -= r * dv; + /* Up to 11 bits in r */ + /* Next 10 bits from d0. */ + r <<= 10; + d <<= 10; + d |= (d0 >> 1) & ((1 << 10) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 21 bits in r */ + /* Remaining 1 bits from d0. */ + r <<= 1; + d <<= 1; + d |= d0 & ((1 << 1) - 1); + t = d / dv; + r += t; + + return r; +} +#endif /* WOLFSSL_SP_DIV_32 */ + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_4096_div_98(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + int i; +#ifndef WOLFSSL_SP_DIV_32 + int64_t d1; +#endif + sp_digit dv, r1; +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* td; +#else + sp_digit t1d[196 + 1], t2d[98 + 1], sdd[98 + 1]; +#endif + sp_digit* t1; + sp_digit* t2; + sp_digit* sd; + int err = MP_OKAY; + + (void)m; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 98 + 3), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + (void)m; + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = td; + t2 = td + 196 + 1; + sd = t2 + 98 + 1; +#else + t1 = t1d; + t2 = t2d; + sd = sdd; +#endif + + sp_4096_mul_d_98(sd, d, 1L << 10); + sp_4096_mul_d_196(t1, a, 1L << 10); + dv = sd[97]; + for (i=98; i>=0; i--) { + t1[98 + i] += t1[98 + i - 1] >> 21; + t1[98 + i - 1] &= 0x1fffff; +#ifndef WOLFSSL_SP_DIV_32 + d1 = t1[98 + i]; + d1 <<= 21; + d1 += t1[98 + i - 1]; + r1 = (sp_digit)(d1 / dv); +#else + r1 = sp_4096_div_word_98(t1[98 + i], t1[98 + i - 1], dv); +#endif + + sp_4096_mul_d_98(t2, sd, r1); + (void)sp_4096_sub_98(&t1[i], &t1[i], t2); + t1[98 + i] -= t2[98]; + t1[98 + i] += t1[98 + i - 1] >> 21; + t1[98 + i - 1] &= 0x1fffff; + r1 = (((-t1[98 + i]) << 21) - t1[98 + i - 1]) / dv; + r1 -= t1[98 + i]; + sp_4096_mul_d_98(t2, sd, r1); + (void)sp_4096_add_98(&t1[i], &t1[i], t2); + t1[98 + i] += t1[98 + i - 1] >> 21; + t1[98 + i - 1] &= 0x1fffff; + } + t1[98 - 1] += t1[98 - 2] >> 21; + t1[98 - 2] &= 0x1fffff; + r1 = t1[98 - 1] / dv; + + sp_4096_mul_d_98(t2, sd, r1); + sp_4096_sub_98(t1, t1, t2); + XMEMCPY(r, t1, sizeof(*r) * 2U * 98U); + for (i=0; i<96; i++) { + r[i+1] += r[i] >> 21; + r[i] &= 0x1fffff; + } + sp_4096_cond_add_98(r, r, sd, 0 - ((r[97] < 0) ? + (sp_digit)1 : (sp_digit)0)); + + sp_4096_norm_98(r); + sp_4096_rshift_98(r, r, 10); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_4096_mod_98(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_4096_div_98(a, m, NULL, r); +} + +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_98(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, + const sp_digit* m, int reduceA) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* td; + sp_digit* t[3]; + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 98 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } + + if (err == MP_OKAY) { + XMEMSET(td, 0, sizeof(*td) * 3U * 98U * 2U); + + norm = t[0] = td; + t[1] = &td[98 * 2]; + t[2] = &td[2 * 98 * 2]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_98(norm, m); + + if (reduceA != 0) { + err = sp_4096_mod_98(t[1], a, m); + } + else { + XMEMCPY(t[1], a, sizeof(sp_digit) * 98U); + } + } + if (err == MP_OKAY) { + sp_4096_mul_98(t[1], t[1], norm); + err = sp_4096_mod_98(t[1], t[1], m); + } + + if (err == MP_OKAY) { + i = bits / 21; + c = bits % 21; + n = e[i--] << (21 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) { + break; + } + + n = e[i--]; + c = 21; + } + + y = (n >> 20) & 1; + n <<= 1; + + sp_4096_mont_mul_98(t[y^1], t[0], t[1], m, mp); + + XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), + sizeof(*t[2]) * 98 * 2); + sp_4096_mont_sqr_98(t[2], t[2], m, mp); + XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), t[2], + sizeof(*t[2]) * 98 * 2); + } + + sp_4096_mont_reduce_98(t[0], m, mp); + n = sp_4096_cmp_98(t[0], m); + sp_4096_cond_sub_98(t[0], t[0], m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, t[0], sizeof(*r) * 98 * 2); + + } + + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } + + return err; +#elif defined(WOLFSSL_SP_CACHE_RESISTANT) +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[3][196]; +#else + sp_digit* td; + sp_digit* t[3]; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 98 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + t[0] = td; + t[1] = &td[98 * 2]; + t[2] = &td[2 * 98 * 2]; +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_98(norm, m); + + if (reduceA != 0) { + err = sp_4096_mod_98(t[1], a, m); + if (err == MP_OKAY) { + sp_4096_mul_98(t[1], t[1], norm); + err = sp_4096_mod_98(t[1], t[1], m); + } + } + else { + sp_4096_mul_98(t[1], a, norm); + err = sp_4096_mod_98(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + i = bits / 21; + c = bits % 21; + n = e[i--] << (21 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) { + break; + } + + n = e[i--]; + c = 21; + } + + y = (n >> 20) & 1; + n <<= 1; + + sp_4096_mont_mul_98(t[y^1], t[0], t[1], m, mp); + + XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), sizeof(t[2])); + sp_4096_mont_sqr_98(t[2], t[2], m, mp); + XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2])); + } + + sp_4096_mont_reduce_98(t[0], m, mp); + n = sp_4096_cmp_98(t[0], m); + sp_4096_cond_sub_98(t[0], t[0], m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, t[0], sizeof(t[0])); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][196]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit rt[196]; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 196, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) + t[i] = td + i * 196; +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_98(norm, m); + + if (reduceA != 0) { + err = sp_4096_mod_98(t[1], a, m); + if (err == MP_OKAY) { + sp_4096_mul_98(t[1], t[1], norm); + err = sp_4096_mod_98(t[1], t[1], m); + } + } + else { + sp_4096_mul_98(t[1], a, norm); + err = sp_4096_mod_98(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_4096_mont_sqr_98(t[ 2], t[ 1], m, mp); + sp_4096_mont_mul_98(t[ 3], t[ 2], t[ 1], m, mp); + sp_4096_mont_sqr_98(t[ 4], t[ 2], m, mp); + sp_4096_mont_mul_98(t[ 5], t[ 3], t[ 2], m, mp); + sp_4096_mont_sqr_98(t[ 6], t[ 3], m, mp); + sp_4096_mont_mul_98(t[ 7], t[ 4], t[ 3], m, mp); + sp_4096_mont_sqr_98(t[ 8], t[ 4], m, mp); + sp_4096_mont_mul_98(t[ 9], t[ 5], t[ 4], m, mp); + sp_4096_mont_sqr_98(t[10], t[ 5], m, mp); + sp_4096_mont_mul_98(t[11], t[ 6], t[ 5], m, mp); + sp_4096_mont_sqr_98(t[12], t[ 6], m, mp); + sp_4096_mont_mul_98(t[13], t[ 7], t[ 6], m, mp); + sp_4096_mont_sqr_98(t[14], t[ 7], m, mp); + sp_4096_mont_mul_98(t[15], t[ 8], t[ 7], m, mp); + sp_4096_mont_sqr_98(t[16], t[ 8], m, mp); + sp_4096_mont_mul_98(t[17], t[ 9], t[ 8], m, mp); + sp_4096_mont_sqr_98(t[18], t[ 9], m, mp); + sp_4096_mont_mul_98(t[19], t[10], t[ 9], m, mp); + sp_4096_mont_sqr_98(t[20], t[10], m, mp); + sp_4096_mont_mul_98(t[21], t[11], t[10], m, mp); + sp_4096_mont_sqr_98(t[22], t[11], m, mp); + sp_4096_mont_mul_98(t[23], t[12], t[11], m, mp); + sp_4096_mont_sqr_98(t[24], t[12], m, mp); + sp_4096_mont_mul_98(t[25], t[13], t[12], m, mp); + sp_4096_mont_sqr_98(t[26], t[13], m, mp); + sp_4096_mont_mul_98(t[27], t[14], t[13], m, mp); + sp_4096_mont_sqr_98(t[28], t[14], m, mp); + sp_4096_mont_mul_98(t[29], t[15], t[14], m, mp); + sp_4096_mont_sqr_98(t[30], t[15], m, mp); + sp_4096_mont_mul_98(t[31], t[16], t[15], m, mp); + + bits = ((bits + 4) / 5) * 5; + i = ((bits + 20) / 21) - 1; + c = bits % 21; + if (c == 0) { + c = 21; + } + if (i < 98) { + n = e[i--] << (32 - c); + } + else { + n = 0; + i--; + } + if (c < 5) { + n |= e[i--] << (11 - c); + c += 21; + } + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + XMEMCPY(rt, t[y], sizeof(rt)); + for (; i>=0 || c>=5; ) { + if (c < 5) { + n |= e[i--] << (11 - c); + c += 21; + } + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + + sp_4096_mont_sqr_98(rt, rt, m, mp); + sp_4096_mont_sqr_98(rt, rt, m, mp); + sp_4096_mont_sqr_98(rt, rt, m, mp); + sp_4096_mont_sqr_98(rt, rt, m, mp); + sp_4096_mont_sqr_98(rt, rt, m, mp); + + sp_4096_mont_mul_98(rt, rt, t[y], m, mp); + } + + sp_4096_mont_reduce_98(rt, m, mp); + n = sp_4096_cmp_98(rt, m); + sp_4096_cond_sub_98(rt, rt, m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, rt, sizeof(rt)); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +#endif +} + +#endif /* WOLFSSL_HAVE_SP_RSA && !SP_RSA_PRIVATE_EXP_D */ +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 4096 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_4096_mont_norm_196(sp_digit* r, const sp_digit* m) +{ + /* Set r = 2^n - 1. */ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<195; i++) { + r[i] = 0x1fffff; + } +#else + int i; + + for (i = 0; i < 192; i += 8) { + r[i + 0] = 0x1fffff; + r[i + 1] = 0x1fffff; + r[i + 2] = 0x1fffff; + r[i + 3] = 0x1fffff; + r[i + 4] = 0x1fffff; + r[i + 5] = 0x1fffff; + r[i + 6] = 0x1fffff; + r[i + 7] = 0x1fffff; + } + r[192] = 0x1fffff; + r[193] = 0x1fffff; + r[194] = 0x1fffff; +#endif + r[195] = 0x1L; + + /* r = (2^n - 1) mod n */ + (void)sp_4096_sub_196(r, r, m); + + /* Add one so r = 2^n mod m */ + r[0] += 1; +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static sp_digit sp_4096_cmp_196(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=195; i>=0; i--) { + r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#else + int i; + + r |= (a[195] - b[195]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[194] - b[194]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[193] - b[193]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[192] - b[192]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + for (i = 184; i >= 0; i -= 8) { + r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#endif /* WOLFSSL_SP_SMALL */ + + return r; +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static void sp_4096_cond_sub_196(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 196; i++) { + r[i] = a[i] - (b[i] & m); + } +#else + int i; + + for (i = 0; i < 192; i += 8) { + r[i + 0] = a[i + 0] - (b[i + 0] & m); + r[i + 1] = a[i + 1] - (b[i + 1] & m); + r[i + 2] = a[i + 2] - (b[i + 2] & m); + r[i + 3] = a[i + 3] - (b[i + 3] & m); + r[i + 4] = a[i + 4] - (b[i + 4] & m); + r[i + 5] = a[i + 5] - (b[i + 5] & m); + r[i + 6] = a[i + 6] - (b[i + 6] & m); + r[i + 7] = a[i + 7] - (b[i + 7] & m); + } + r[192] = a[192] - (b[192] & m); + r[193] = a[193] - (b[193] & m); + r[194] = a[194] - (b[194] & m); + r[195] = a[195] - (b[195] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Mul a by scalar b and add into r. (r += a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_4096_mul_add_196(sp_digit* r, const sp_digit* a, + const sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int64_t tb = b; + int64_t t = 0; + int i; + + for (i = 0; i < 196; i++) { + t += (tb * a[i]) + r[i]; + r[i] = t & 0x1fffff; + t >>= 21; + } + r[196] += t; +#else + int64_t tb = b; + int64_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1fffff); + for (i = 0; i < 192; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] += (sp_digit)((t[0] >> 21) + (t[1] & 0x1fffff)); + t[2] = tb * a[i+2]; + r[i+2] += (sp_digit)((t[1] >> 21) + (t[2] & 0x1fffff)); + t[3] = tb * a[i+3]; + r[i+3] += (sp_digit)((t[2] >> 21) + (t[3] & 0x1fffff)); + t[4] = tb * a[i+4]; + r[i+4] += (sp_digit)((t[3] >> 21) + (t[4] & 0x1fffff)); + t[5] = tb * a[i+5]; + r[i+5] += (sp_digit)((t[4] >> 21) + (t[5] & 0x1fffff)); + t[6] = tb * a[i+6]; + r[i+6] += (sp_digit)((t[5] >> 21) + (t[6] & 0x1fffff)); + t[7] = tb * a[i+7]; + r[i+7] += (sp_digit)((t[6] >> 21) + (t[7] & 0x1fffff)); + t[0] = tb * a[i+8]; + r[i+8] += (sp_digit)((t[7] >> 21) + (t[0] & 0x1fffff)); + } + t[1] = tb * a[193]; r[193] += (sp_digit)((t[0] >> 21) + (t[1] & 0x1fffff)); + t[2] = tb * a[194]; r[194] += (sp_digit)((t[1] >> 21) + (t[2] & 0x1fffff)); + t[3] = tb * a[195]; r[195] += (sp_digit)((t[2] >> 21) + (t[3] & 0x1fffff)); + r[196] += (sp_digit)(t[3] >> 21); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Normalize the values in each word to 21. + * + * a Array of sp_digit to normalize. + */ +static void sp_4096_norm_196(sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + for (i = 0; i < 195; i++) { + a[i+1] += a[i] >> 21; + a[i] &= 0x1fffff; + } +#else + int i; + for (i = 0; i < 192; i += 8) { + a[i+1] += a[i+0] >> 21; a[i+0] &= 0x1fffff; + a[i+2] += a[i+1] >> 21; a[i+1] &= 0x1fffff; + a[i+3] += a[i+2] >> 21; a[i+2] &= 0x1fffff; + a[i+4] += a[i+3] >> 21; a[i+3] &= 0x1fffff; + a[i+5] += a[i+4] >> 21; a[i+4] &= 0x1fffff; + a[i+6] += a[i+5] >> 21; a[i+5] &= 0x1fffff; + a[i+7] += a[i+6] >> 21; a[i+6] &= 0x1fffff; + a[i+8] += a[i+7] >> 21; a[i+7] &= 0x1fffff; + a[i+9] += a[i+8] >> 21; a[i+8] &= 0x1fffff; + } + a[192+1] += a[192] >> 21; + a[192] &= 0x1fffff; + a[193+1] += a[193] >> 21; + a[193] &= 0x1fffff; + a[194+1] += a[194] >> 21; + a[194] &= 0x1fffff; +#endif +} + +/* Shift the result in the high 4096 bits down to the bottom. + * + * r A single precision number. + * a A single precision number. + */ +static void sp_4096_mont_shift_196(sp_digit* r, const sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + int64_t n = a[195] >> 1; + n += ((int64_t)a[196]) << 20; + + for (i = 0; i < 195; i++) { + r[i] = n & 0x1fffff; + n >>= 21; + n += ((int64_t)a[197 + i]) << 20; + } + r[195] = (sp_digit)n; +#else + int i; + int64_t n = a[195] >> 1; + n += ((int64_t)a[196]) << 20; + for (i = 0; i < 192; i += 8) { + r[i + 0] = n & 0x1fffff; + n >>= 21; n += ((int64_t)a[i + 197]) << 20; + r[i + 1] = n & 0x1fffff; + n >>= 21; n += ((int64_t)a[i + 198]) << 20; + r[i + 2] = n & 0x1fffff; + n >>= 21; n += ((int64_t)a[i + 199]) << 20; + r[i + 3] = n & 0x1fffff; + n >>= 21; n += ((int64_t)a[i + 200]) << 20; + r[i + 4] = n & 0x1fffff; + n >>= 21; n += ((int64_t)a[i + 201]) << 20; + r[i + 5] = n & 0x1fffff; + n >>= 21; n += ((int64_t)a[i + 202]) << 20; + r[i + 6] = n & 0x1fffff; + n >>= 21; n += ((int64_t)a[i + 203]) << 20; + r[i + 7] = n & 0x1fffff; + n >>= 21; n += ((int64_t)a[i + 204]) << 20; + } + r[192] = n & 0x1fffff; n >>= 21; n += ((int64_t)a[389]) << 20; + r[193] = n & 0x1fffff; n >>= 21; n += ((int64_t)a[390]) << 20; + r[194] = n & 0x1fffff; n >>= 21; n += ((int64_t)a[391]) << 20; + r[195] = (sp_digit)n; +#endif /* WOLFSSL_SP_SMALL */ + XMEMSET(&r[196], 0, sizeof(*r) * 196U); +} + +/* Reduce the number back to 4096 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static void sp_4096_mont_reduce_196(sp_digit* a, const sp_digit* m, sp_digit mp) +{ + int i; + sp_digit mu; + + sp_4096_norm_196(a + 196); + +#ifdef WOLFSSL_SP_DH + if (mp != 1) { + for (i=0; i<195; i++) { + mu = (a[i] * mp) & 0x1fffff; + sp_4096_mul_add_196(a+i, m, mu); + a[i+1] += a[i] >> 21; + } + mu = (a[i] * mp) & 0x1L; + sp_4096_mul_add_196(a+i, m, mu); + a[i+1] += a[i] >> 21; + a[i] &= 0x1fffff; + } + else { + for (i=0; i<195; i++) { + mu = a[i] & 0x1fffff; + sp_4096_mul_add_196(a+i, m, mu); + a[i+1] += a[i] >> 21; + } + mu = a[i] & 0x1L; + sp_4096_mul_add_196(a+i, m, mu); + a[i+1] += a[i] >> 21; + a[i] &= 0x1fffff; + } +#else + for (i=0; i<195; i++) { + mu = (a[i] * mp) & 0x1fffff; + sp_4096_mul_add_196(a+i, m, mu); + a[i+1] += a[i] >> 21; + } + mu = (a[i] * mp) & 0x1L; + sp_4096_mul_add_196(a+i, m, mu); + a[i+1] += a[i] >> 21; + a[i] &= 0x1fffff; +#endif + + sp_4096_mont_shift_196(a, a); + sp_4096_cond_sub_196(a, a, m, 0 - (((a[195] >> 1) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_4096_norm_196(a); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_mul_196(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_4096_mul_196(r, a, b); + sp_4096_mont_reduce_196(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_sqr_196(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_4096_sqr_196(r, a); + sp_4096_mont_reduce_196(r, m, mp); +} + +/* Multiply a by scalar b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_4096_mul_d_392(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int64_t tb = b; + int64_t t = 0; + int i; + + for (i = 0; i < 392; i++) { + t += tb * a[i]; + r[i] = t & 0x1fffff; + t >>= 21; + } + r[392] = (sp_digit)t; +#else + int64_t tb = b; + int64_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] = t[0] & 0x1fffff; + for (i = 0; i < 392; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] = (sp_digit)(t[0] >> 21) + (t[1] & 0x1fffff); + t[2] = tb * a[i+2]; + r[i+2] = (sp_digit)(t[1] >> 21) + (t[2] & 0x1fffff); + t[3] = tb * a[i+3]; + r[i+3] = (sp_digit)(t[2] >> 21) + (t[3] & 0x1fffff); + t[4] = tb * a[i+4]; + r[i+4] = (sp_digit)(t[3] >> 21) + (t[4] & 0x1fffff); + t[5] = tb * a[i+5]; + r[i+5] = (sp_digit)(t[4] >> 21) + (t[5] & 0x1fffff); + t[6] = tb * a[i+6]; + r[i+6] = (sp_digit)(t[5] >> 21) + (t[6] & 0x1fffff); + t[7] = tb * a[i+7]; + r[i+7] = (sp_digit)(t[6] >> 21) + (t[7] & 0x1fffff); + t[0] = tb * a[i+8]; + r[i+8] = (sp_digit)(t[7] >> 21) + (t[0] & 0x1fffff); + } + r[392] = (sp_digit)(t[7] >> 21); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static void sp_4096_cond_add_196(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 196; i++) { + r[i] = a[i] + (b[i] & m); + } +#else + int i; + + for (i = 0; i < 192; i += 8) { + r[i + 0] = a[i + 0] + (b[i + 0] & m); + r[i + 1] = a[i + 1] + (b[i + 1] & m); + r[i + 2] = a[i + 2] + (b[i + 2] & m); + r[i + 3] = a[i + 3] + (b[i + 3] & m); + r[i + 4] = a[i + 4] + (b[i + 4] & m); + r[i + 5] = a[i + 5] + (b[i + 5] & m); + r[i + 6] = a[i + 6] + (b[i + 6] & m); + r[i + 7] = a[i + 7] + (b[i + 7] & m); + } + r[192] = a[192] + (b[192] & m); + r[193] = a[193] + (b[193] & m); + r[194] = a[194] + (b[194] & m); + r[195] = a[195] + (b[195] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +#ifdef WOLFSSL_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_sub_196(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 196; i++) { + r[i] = a[i] - b[i]; + } + + return 0; +} + +#endif +#ifdef WOLFSSL_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_add_196(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 196; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#endif +SP_NOINLINE static void sp_4096_rshift_196(sp_digit* r, sp_digit* a, byte n) +{ + int i; + +#ifdef WOLFSSL_SP_SMALL + for (i=0; i<195; i++) { + r[i] = ((a[i] >> n) | (a[i + 1] << (21 - n))) & 0x1fffff; + } +#else + for (i=0; i<192; i += 8) { + r[i+0] = ((a[i+0] >> n) | (a[i+1] << (21 - n))) & 0x1fffff; + r[i+1] = ((a[i+1] >> n) | (a[i+2] << (21 - n))) & 0x1fffff; + r[i+2] = ((a[i+2] >> n) | (a[i+3] << (21 - n))) & 0x1fffff; + r[i+3] = ((a[i+3] >> n) | (a[i+4] << (21 - n))) & 0x1fffff; + r[i+4] = ((a[i+4] >> n) | (a[i+5] << (21 - n))) & 0x1fffff; + r[i+5] = ((a[i+5] >> n) | (a[i+6] << (21 - n))) & 0x1fffff; + r[i+6] = ((a[i+6] >> n) | (a[i+7] << (21 - n))) & 0x1fffff; + r[i+7] = ((a[i+7] >> n) | (a[i+8] << (21 - n))) & 0x1fffff; + } + r[192] = ((a[192] >> n) | (a[193] << (21 - n))) & 0x1fffff; + r[193] = ((a[193] >> n) | (a[194] << (21 - n))) & 0x1fffff; + r[194] = ((a[194] >> n) | (a[195] << (21 - n))) & 0x1fffff; +#endif + r[195] = a[195] >> n; +} + +#ifdef WOLFSSL_SP_DIV_32 +static WC_INLINE sp_digit sp_4096_div_word_196(sp_digit d1, sp_digit d0, + sp_digit dv) +{ + sp_digit d, r, t; + + /* All 21 bits from d1 and top 10 bits from d0. */ + d = (d1 << 10) | (d0 >> 11); + r = d / dv; + d -= r * dv; + /* Up to 11 bits in r */ + /* Next 10 bits from d0. */ + r <<= 10; + d <<= 10; + d |= (d0 >> 1) & ((1 << 10) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 21 bits in r */ + /* Remaining 1 bits from d0. */ + r <<= 1; + d <<= 1; + d |= d0 & ((1 << 1) - 1); + t = d / dv; + r += t; + + return r; +} +#endif /* WOLFSSL_SP_DIV_32 */ + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_4096_div_196(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + int i; +#ifndef WOLFSSL_SP_DIV_32 + int64_t d1; +#endif + sp_digit dv, r1; +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* td; +#else + sp_digit t1d[392 + 1], t2d[196 + 1], sdd[196 + 1]; +#endif + sp_digit* t1; + sp_digit* t2; + sp_digit* sd; + int err = MP_OKAY; + + (void)m; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 196 + 3), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + (void)m; + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = td; + t2 = td + 392 + 1; + sd = t2 + 196 + 1; +#else + t1 = t1d; + t2 = t2d; + sd = sdd; +#endif + + sp_4096_mul_d_196(sd, d, 1L << 20); + sp_4096_mul_d_392(t1, a, 1L << 20); + dv = sd[195]; + for (i=196; i>=0; i--) { + t1[196 + i] += t1[196 + i - 1] >> 21; + t1[196 + i - 1] &= 0x1fffff; +#ifndef WOLFSSL_SP_DIV_32 + d1 = t1[196 + i]; + d1 <<= 21; + d1 += t1[196 + i - 1]; + r1 = (sp_digit)(d1 / dv); +#else + r1 = sp_4096_div_word_196(t1[196 + i], t1[196 + i - 1], dv); +#endif + + sp_4096_mul_d_196(t2, sd, r1); + (void)sp_4096_sub_196(&t1[i], &t1[i], t2); + t1[196 + i] -= t2[196]; + t1[196 + i] += t1[196 + i - 1] >> 21; + t1[196 + i - 1] &= 0x1fffff; + r1 = (((-t1[196 + i]) << 21) - t1[196 + i - 1]) / dv; + r1 -= t1[196 + i]; + sp_4096_mul_d_196(t2, sd, r1); + (void)sp_4096_add_196(&t1[i], &t1[i], t2); + t1[196 + i] += t1[196 + i - 1] >> 21; + t1[196 + i - 1] &= 0x1fffff; + } + t1[196 - 1] += t1[196 - 2] >> 21; + t1[196 - 2] &= 0x1fffff; + r1 = t1[196 - 1] / dv; + + sp_4096_mul_d_196(t2, sd, r1); + sp_4096_sub_196(t1, t1, t2); + XMEMCPY(r, t1, sizeof(*r) * 2U * 196U); + for (i=0; i<194; i++) { + r[i+1] += r[i] >> 21; + r[i] &= 0x1fffff; + } + sp_4096_cond_add_196(r, r, sd, 0 - ((r[195] < 0) ? + (sp_digit)1 : (sp_digit)0)); + + sp_4096_norm_196(r); + sp_4096_rshift_196(r, r, 20); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_4096_mod_196(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_4096_div_196(a, m, NULL, r); +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \ + defined(WOLFSSL_HAVE_SP_DH) +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_196(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, + const sp_digit* m, int reduceA) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* td; + sp_digit* t[3]; + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 196 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } + + if (err == MP_OKAY) { + XMEMSET(td, 0, sizeof(*td) * 3U * 196U * 2U); + + norm = t[0] = td; + t[1] = &td[196 * 2]; + t[2] = &td[2 * 196 * 2]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_196(norm, m); + + if (reduceA != 0) { + err = sp_4096_mod_196(t[1], a, m); + } + else { + XMEMCPY(t[1], a, sizeof(sp_digit) * 196U); + } + } + if (err == MP_OKAY) { + sp_4096_mul_196(t[1], t[1], norm); + err = sp_4096_mod_196(t[1], t[1], m); + } + + if (err == MP_OKAY) { + i = bits / 21; + c = bits % 21; + n = e[i--] << (21 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) { + break; + } + + n = e[i--]; + c = 21; + } + + y = (n >> 20) & 1; + n <<= 1; + + sp_4096_mont_mul_196(t[y^1], t[0], t[1], m, mp); + + XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), + sizeof(*t[2]) * 196 * 2); + sp_4096_mont_sqr_196(t[2], t[2], m, mp); + XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), t[2], + sizeof(*t[2]) * 196 * 2); + } + + sp_4096_mont_reduce_196(t[0], m, mp); + n = sp_4096_cmp_196(t[0], m); + sp_4096_cond_sub_196(t[0], t[0], m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, t[0], sizeof(*r) * 196 * 2); + + } + + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } + + return err; +#elif defined(WOLFSSL_SP_CACHE_RESISTANT) +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[3][392]; +#else + sp_digit* td; + sp_digit* t[3]; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 196 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + t[0] = td; + t[1] = &td[196 * 2]; + t[2] = &td[2 * 196 * 2]; +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_196(norm, m); + + if (reduceA != 0) { + err = sp_4096_mod_196(t[1], a, m); + if (err == MP_OKAY) { + sp_4096_mul_196(t[1], t[1], norm); + err = sp_4096_mod_196(t[1], t[1], m); + } + } + else { + sp_4096_mul_196(t[1], a, norm); + err = sp_4096_mod_196(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + i = bits / 21; + c = bits % 21; + n = e[i--] << (21 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) { + break; + } + + n = e[i--]; + c = 21; + } + + y = (n >> 20) & 1; + n <<= 1; + + sp_4096_mont_mul_196(t[y^1], t[0], t[1], m, mp); + + XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), sizeof(t[2])); + sp_4096_mont_sqr_196(t[2], t[2], m, mp); + XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2])); + } + + sp_4096_mont_reduce_196(t[0], m, mp); + n = sp_4096_cmp_196(t[0], m); + sp_4096_cond_sub_196(t[0], t[0], m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, t[0], sizeof(t[0])); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][392]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit rt[392]; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 392, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) + t[i] = td + i * 392; +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_196(norm, m); + + if (reduceA != 0) { + err = sp_4096_mod_196(t[1], a, m); + if (err == MP_OKAY) { + sp_4096_mul_196(t[1], t[1], norm); + err = sp_4096_mod_196(t[1], t[1], m); + } + } + else { + sp_4096_mul_196(t[1], a, norm); + err = sp_4096_mod_196(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_4096_mont_sqr_196(t[ 2], t[ 1], m, mp); + sp_4096_mont_mul_196(t[ 3], t[ 2], t[ 1], m, mp); + sp_4096_mont_sqr_196(t[ 4], t[ 2], m, mp); + sp_4096_mont_mul_196(t[ 5], t[ 3], t[ 2], m, mp); + sp_4096_mont_sqr_196(t[ 6], t[ 3], m, mp); + sp_4096_mont_mul_196(t[ 7], t[ 4], t[ 3], m, mp); + sp_4096_mont_sqr_196(t[ 8], t[ 4], m, mp); + sp_4096_mont_mul_196(t[ 9], t[ 5], t[ 4], m, mp); + sp_4096_mont_sqr_196(t[10], t[ 5], m, mp); + sp_4096_mont_mul_196(t[11], t[ 6], t[ 5], m, mp); + sp_4096_mont_sqr_196(t[12], t[ 6], m, mp); + sp_4096_mont_mul_196(t[13], t[ 7], t[ 6], m, mp); + sp_4096_mont_sqr_196(t[14], t[ 7], m, mp); + sp_4096_mont_mul_196(t[15], t[ 8], t[ 7], m, mp); + sp_4096_mont_sqr_196(t[16], t[ 8], m, mp); + sp_4096_mont_mul_196(t[17], t[ 9], t[ 8], m, mp); + sp_4096_mont_sqr_196(t[18], t[ 9], m, mp); + sp_4096_mont_mul_196(t[19], t[10], t[ 9], m, mp); + sp_4096_mont_sqr_196(t[20], t[10], m, mp); + sp_4096_mont_mul_196(t[21], t[11], t[10], m, mp); + sp_4096_mont_sqr_196(t[22], t[11], m, mp); + sp_4096_mont_mul_196(t[23], t[12], t[11], m, mp); + sp_4096_mont_sqr_196(t[24], t[12], m, mp); + sp_4096_mont_mul_196(t[25], t[13], t[12], m, mp); + sp_4096_mont_sqr_196(t[26], t[13], m, mp); + sp_4096_mont_mul_196(t[27], t[14], t[13], m, mp); + sp_4096_mont_sqr_196(t[28], t[14], m, mp); + sp_4096_mont_mul_196(t[29], t[15], t[14], m, mp); + sp_4096_mont_sqr_196(t[30], t[15], m, mp); + sp_4096_mont_mul_196(t[31], t[16], t[15], m, mp); + + bits = ((bits + 4) / 5) * 5; + i = ((bits + 20) / 21) - 1; + c = bits % 21; + if (c == 0) { + c = 21; + } + if (i < 196) { + n = e[i--] << (32 - c); + } + else { + n = 0; + i--; + } + if (c < 5) { + n |= e[i--] << (11 - c); + c += 21; + } + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + XMEMCPY(rt, t[y], sizeof(rt)); + for (; i>=0 || c>=5; ) { + if (c < 5) { + n |= e[i--] << (11 - c); + c += 21; + } + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + + sp_4096_mont_sqr_196(rt, rt, m, mp); + sp_4096_mont_sqr_196(rt, rt, m, mp); + sp_4096_mont_sqr_196(rt, rt, m, mp); + sp_4096_mont_sqr_196(rt, rt, m, mp); + sp_4096_mont_sqr_196(rt, rt, m, mp); + + sp_4096_mont_mul_196(rt, rt, t[y], m, mp); + } + + sp_4096_mont_reduce_196(rt, m, mp); + n = sp_4096_cmp_196(rt, m); + sp_4096_cond_sub_196(rt, rt, m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, rt, sizeof(rt)); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +#endif +} +#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || */ + /* WOLFSSL_HAVE_SP_DH */ + +#ifdef WOLFSSL_HAVE_SP_RSA +/* RSA public key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * em Public exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm, + byte* out, word32* outLen) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* d = NULL; + sp_digit* a; + sp_digit* m; + sp_digit* r; + sp_digit* norm; + sp_digit e[1] = {0}; + sp_digit mp; + int i; + int err = MP_OKAY; + + if (*outLen < 512U) { + err = MP_TO_E; + } + + if (err == MP_OKAY) { + if (mp_count_bits(em) > 21) { + err = MP_READ_E; + } + if (inLen > 512U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 196 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + a = d; + r = a + 196 * 2; + m = r + 196 * 2; + norm = r; + + sp_4096_from_bin(a, 196, in, inLen); +#if DIGIT_BIT >= 21 + e[0] = (sp_digit)em->dp[0]; +#else + e[0] = (sp_digit)em->dp[0]; + if (em->used > 1) { + e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + } +#endif + if (e[0] == 0) { + err = MP_EXPTMOD_E; + } + } + + if (err == MP_OKAY) { + sp_4096_from_mp(m, 196, mm); + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_196(norm, m); + } + if (err == MP_OKAY) { + sp_4096_mul_196(a, a, norm); + err = sp_4096_mod_196(a, a, m); + } + if (err == MP_OKAY) { + for (i=20; i>=0; i--) { + if ((e[0] >> i) != 0) { + break; + } + } + + XMEMCPY(r, a, sizeof(sp_digit) * 196 * 2); + for (i--; i>=0; i--) { + sp_4096_mont_sqr_196(r, r, m, mp); + + if (((e[0] >> i) & 1) == 1) { + sp_4096_mont_mul_196(r, r, a, m, mp); + } + } + sp_4096_mont_reduce_196(r, m, mp); + mp = sp_4096_cmp_196(r, m); + sp_4096_cond_sub_196(r, r, m, ((mp < 0) ? + (sp_digit)1 : (sp_digit)0)- 1); + + sp_4096_to_bin(r, out); + *outLen = 512; + } + + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit ad[392], md[196], rd[392]; +#else + sp_digit* d = NULL; +#endif + sp_digit* a; + sp_digit* m; + sp_digit* r; + sp_digit e[1] = {0}; + int err = MP_OKAY; + + if (*outLen < 512U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(em) > 21) { + err = MP_READ_E; + } + if (inLen > 512U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 4096) { + err = MP_READ_E; + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 196 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) { + err = MEMORY_E; + } + } + + if (err == MP_OKAY) { + a = d; + r = a + 196 * 2; + m = r + 196 * 2; + } +#else + a = ad; + m = md; + r = rd; +#endif + + if (err == MP_OKAY) { + sp_4096_from_bin(a, 196, in, inLen); +#if DIGIT_BIT >= 21 + e[0] = (sp_digit)em->dp[0]; +#else + e[0] = (sp_digit)em->dp[0]; + if (em->used > 1) { + e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + } +#endif + if (e[0] == 0) { + err = MP_EXPTMOD_E; + } + } + if (err == MP_OKAY) { + sp_4096_from_mp(m, 196, mm); + + if (e[0] == 0x3) { + sp_4096_sqr_196(r, a); + err = sp_4096_mod_196(r, r, m); + if (err == MP_OKAY) { + sp_4096_mul_196(r, a, r); + err = sp_4096_mod_196(r, r, m); + } + } + else { + sp_digit* norm = r; + int i; + sp_digit mp; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_196(norm, m); + + sp_4096_mul_196(a, a, norm); + err = sp_4096_mod_196(a, a, m); + + if (err == MP_OKAY) { + for (i=20; i>=0; i--) { + if ((e[0] >> i) != 0) { + break; + } + } + + XMEMCPY(r, a, sizeof(sp_digit) * 392U); + for (i--; i>=0; i--) { + sp_4096_mont_sqr_196(r, r, m, mp); + + if (((e[0] >> i) & 1) == 1) { + sp_4096_mont_mul_196(r, r, a, m, mp); + } + } + sp_4096_mont_reduce_196(r, m, mp); + mp = sp_4096_cmp_196(r, m); + sp_4096_cond_sub_196(r, r, m, ((mp < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + } + } + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } +#endif + + return err; +#endif /* WOLFSSL_SP_SMALL */ +} + +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +#if !defined(SP_RSA_PRIVATE_EXP_D) && !defined(RSA_LOW_MEM) +#endif /* !SP_RSA_PRIVATE_EXP_D && !RSA_LOW_MEM */ +/* RSA private key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * pm First prime. + * qm Second prime. + * dpm First prime's CRT exponent. + * dqm Second prime's CRT exponent. + * qim Inverse of second prime mod p. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, + mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm, + byte* out, word32* outLen) +{ +#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* a; + sp_digit* d = NULL; + sp_digit* m; + sp_digit* r; + int err = MP_OKAY; + + (void)pm; + (void)qm; + (void)dpm; + (void)dqm; + (void)qim; + + if (*outLen < 512U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(dm) > 4096) { + err = MP_READ_E; + } + if (inLen > 512) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 196 * 4, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) { + err = MEMORY_E; + } + } + if (err == MP_OKAY) { + a = d + 196; + m = a + 392; + r = a; + + sp_4096_from_bin(a, 196, in, inLen); + sp_4096_from_mp(d, 196, dm); + sp_4096_from_mp(m, 196, mm); + err = sp_4096_mod_exp_196(r, a, d, 4096, m, 0); + } + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + } + + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 196); + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else + sp_digit a[392], d[196], m[196]; + sp_digit* r = a; + int err = MP_OKAY; + + (void)pm; + (void)qm; + (void)dpm; + (void)dqm; + (void)qim; + + if (*outLen < 512U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(dm) > 4096) { + err = MP_READ_E; + } + if (inLen > 512U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_4096_from_bin(a, 196, in, inLen); + sp_4096_from_mp(d, 196, dm); + sp_4096_from_mp(m, 196, mm); + err = sp_4096_mod_exp_196(r, a, d, 4096, m, 0); + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + } + + XMEMSET(d, 0, sizeof(sp_digit) * 196); + + return err; +#endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */ +#else +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* t = NULL; + sp_digit* a; + sp_digit* p; + sp_digit* q; + sp_digit* dp; + sp_digit* dq; + sp_digit* qi; + sp_digit* tmpa; + sp_digit* tmpb; + sp_digit* r; + int err = MP_OKAY; + + (void)dm; + (void)mm; + + if (*outLen < 512U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (inLen > 512) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 98 * 11, NULL, + DYNAMIC_TYPE_RSA); + if (t == NULL) { + err = MEMORY_E; + } + } + if (err == MP_OKAY) { + a = t; + p = a + 196 * 2; + q = p + 98; + qi = dq = dp = q + 98; + tmpa = qi + 98; + tmpb = tmpa + 196; + + r = t + 196; + + sp_4096_from_bin(a, 196, in, inLen); + sp_4096_from_mp(p, 98, pm); + sp_4096_from_mp(q, 98, qm); + sp_4096_from_mp(dp, 98, dpm); + err = sp_4096_mod_exp_98(tmpa, a, dp, 2048, p, 1); + } + if (err == MP_OKAY) { + sp_4096_from_mp(dq, 98, dqm); + err = sp_4096_mod_exp_98(tmpb, a, dq, 2048, q, 1); + } + if (err == MP_OKAY) { + (void)sp_4096_sub_98(tmpa, tmpa, tmpb); + sp_4096_cond_add_98(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[97] >> 31)); + sp_4096_cond_add_98(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[97] >> 31)); + + sp_4096_from_mp(qi, 98, qim); + sp_4096_mul_98(tmpa, tmpa, qi); + err = sp_4096_mod_98(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { + sp_4096_mul_98(tmpa, q, tmpa); + (void)sp_4096_add_196(r, tmpb, tmpa); + sp_4096_norm_196(r); + + sp_4096_to_bin(r, out); + *outLen = 512; + } + + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_digit) * 98 * 11); + XFREE(t, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else + sp_digit a[196 * 2]; + sp_digit p[98], q[98], dp[98], dq[98], qi[98]; + sp_digit tmpa[196], tmpb[196]; + sp_digit* r = a; + int err = MP_OKAY; + + (void)dm; + (void)mm; + + if (*outLen < 512U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (inLen > 512U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_4096_from_bin(a, 196, in, inLen); + sp_4096_from_mp(p, 98, pm); + sp_4096_from_mp(q, 98, qm); + sp_4096_from_mp(dp, 98, dpm); + sp_4096_from_mp(dq, 98, dqm); + sp_4096_from_mp(qi, 98, qim); + + err = sp_4096_mod_exp_98(tmpa, a, dp, 2048, p, 1); + } + if (err == MP_OKAY) { + err = sp_4096_mod_exp_98(tmpb, a, dq, 2048, q, 1); + } + + if (err == MP_OKAY) { + (void)sp_4096_sub_98(tmpa, tmpa, tmpb); + sp_4096_cond_add_98(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[97] >> 31)); + sp_4096_cond_add_98(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[97] >> 31)); + sp_4096_mul_98(tmpa, tmpa, qi); + err = sp_4096_mod_98(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { + sp_4096_mul_98(tmpa, tmpa, q); + (void)sp_4096_add_196(r, tmpb, tmpa); + sp_4096_norm_196(r); + + sp_4096_to_bin(r, out); + *outLen = 512; + } + + XMEMSET(tmpa, 0, sizeof(tmpa)); + XMEMSET(tmpb, 0, sizeof(tmpb)); + XMEMSET(p, 0, sizeof(p)); + XMEMSET(q, 0, sizeof(q)); + XMEMSET(dp, 0, sizeof(dp)); + XMEMSET(dq, 0, sizeof(dq)); + XMEMSET(qi, 0, sizeof(qi)); + + return err; +#endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */ +#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */ +} + +#endif /* !WOLFSSL_RSA_PUBLIC_ONLY */ +#endif /* WOLFSSL_HAVE_SP_RSA */ +#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY)) +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_4096_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (4096 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 21 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 196); + r->used = 196; + mp_clamp(r); +#elif DIGIT_BIT < 21 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 196; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 21) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 21 - s; + } + r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 196; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 21 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 21 - s; + } + else { + s += 21; + } + } + r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ +#ifdef WOLFSSL_SP_SMALL + int err = MP_OKAY; + sp_digit* d = NULL; + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 196 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) { + err = MEMORY_E; + } + } + + if (err == MP_OKAY) { + b = d; + e = b + 196 * 2; + m = e + 196; + r = b; + + sp_4096_from_mp(b, 196, base); + sp_4096_from_mp(e, 196, exp); + sp_4096_from_mp(m, 196, mod); + + err = sp_4096_mod_exp_196(r, b, e, mp_count_bits(exp), m, 0); + } + + if (err == MP_OKAY) { + err = sp_4096_to_mp(r, res); + } + + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 196U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit bd[392], ed[196], md[196]; +#else + sp_digit* d = NULL; +#endif + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + int err = MP_OKAY; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + } + +#ifdef WOLFSSL_SMALL_STACK + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 196 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + b = d; + e = b + 196 * 2; + m = e + 196; + r = b; + } +#else + r = b = bd; + e = ed; + m = md; +#endif + + if (err == MP_OKAY) { + sp_4096_from_mp(b, 196, base); + sp_4096_from_mp(e, 196, exp); + sp_4096_from_mp(m, 196, mod); + + err = sp_4096_mod_exp_196(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + err = sp_4096_to_mp(r, res); + } + + +#ifdef WOLFSSL_SMALL_STACK + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 196U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } +#else + XMEMSET(e, 0, sizeof(sp_digit) * 196U); +#endif + + return err; +#endif +} + +#ifdef WOLFSSL_HAVE_SP_DH + +#ifdef HAVE_FFDHE_4096 +SP_NOINLINE static void sp_4096_lshift_196(sp_digit* r, sp_digit* a, byte n) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + r[196] = a[195] >> (21 - n); + for (i=195; i>0; i--) { + r[i] = ((a[i] << n) | (a[i-1] >> (21 - n))) & 0x1fffff; + } +#else + sp_int_digit s, t; + + s = (sp_int_digit)a[195]; + r[196] = s >> (21U - n); + s = (sp_int_digit)(a[195]); t = (sp_int_digit)(a[194]); + r[195] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[194]); t = (sp_int_digit)(a[193]); + r[194] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[193]); t = (sp_int_digit)(a[192]); + r[193] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[192]); t = (sp_int_digit)(a[191]); + r[192] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[191]); t = (sp_int_digit)(a[190]); + r[191] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[190]); t = (sp_int_digit)(a[189]); + r[190] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[189]); t = (sp_int_digit)(a[188]); + r[189] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[188]); t = (sp_int_digit)(a[187]); + r[188] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[187]); t = (sp_int_digit)(a[186]); + r[187] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[186]); t = (sp_int_digit)(a[185]); + r[186] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[185]); t = (sp_int_digit)(a[184]); + r[185] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[184]); t = (sp_int_digit)(a[183]); + r[184] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[183]); t = (sp_int_digit)(a[182]); + r[183] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[182]); t = (sp_int_digit)(a[181]); + r[182] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[181]); t = (sp_int_digit)(a[180]); + r[181] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[180]); t = (sp_int_digit)(a[179]); + r[180] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[179]); t = (sp_int_digit)(a[178]); + r[179] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[178]); t = (sp_int_digit)(a[177]); + r[178] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[177]); t = (sp_int_digit)(a[176]); + r[177] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[176]); t = (sp_int_digit)(a[175]); + r[176] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[175]); t = (sp_int_digit)(a[174]); + r[175] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[174]); t = (sp_int_digit)(a[173]); + r[174] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[173]); t = (sp_int_digit)(a[172]); + r[173] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[172]); t = (sp_int_digit)(a[171]); + r[172] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[171]); t = (sp_int_digit)(a[170]); + r[171] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[170]); t = (sp_int_digit)(a[169]); + r[170] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[169]); t = (sp_int_digit)(a[168]); + r[169] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[168]); t = (sp_int_digit)(a[167]); + r[168] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[167]); t = (sp_int_digit)(a[166]); + r[167] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[166]); t = (sp_int_digit)(a[165]); + r[166] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[165]); t = (sp_int_digit)(a[164]); + r[165] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[164]); t = (sp_int_digit)(a[163]); + r[164] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[163]); t = (sp_int_digit)(a[162]); + r[163] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[162]); t = (sp_int_digit)(a[161]); + r[162] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[161]); t = (sp_int_digit)(a[160]); + r[161] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[160]); t = (sp_int_digit)(a[159]); + r[160] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[159]); t = (sp_int_digit)(a[158]); + r[159] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[158]); t = (sp_int_digit)(a[157]); + r[158] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[157]); t = (sp_int_digit)(a[156]); + r[157] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[156]); t = (sp_int_digit)(a[155]); + r[156] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[155]); t = (sp_int_digit)(a[154]); + r[155] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[154]); t = (sp_int_digit)(a[153]); + r[154] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[153]); t = (sp_int_digit)(a[152]); + r[153] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[152]); t = (sp_int_digit)(a[151]); + r[152] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[151]); t = (sp_int_digit)(a[150]); + r[151] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[150]); t = (sp_int_digit)(a[149]); + r[150] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[149]); t = (sp_int_digit)(a[148]); + r[149] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[148]); t = (sp_int_digit)(a[147]); + r[148] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[147]); t = (sp_int_digit)(a[146]); + r[147] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[146]); t = (sp_int_digit)(a[145]); + r[146] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[145]); t = (sp_int_digit)(a[144]); + r[145] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[144]); t = (sp_int_digit)(a[143]); + r[144] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[143]); t = (sp_int_digit)(a[142]); + r[143] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[142]); t = (sp_int_digit)(a[141]); + r[142] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[141]); t = (sp_int_digit)(a[140]); + r[141] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[140]); t = (sp_int_digit)(a[139]); + r[140] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[139]); t = (sp_int_digit)(a[138]); + r[139] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[138]); t = (sp_int_digit)(a[137]); + r[138] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[137]); t = (sp_int_digit)(a[136]); + r[137] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[136]); t = (sp_int_digit)(a[135]); + r[136] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[135]); t = (sp_int_digit)(a[134]); + r[135] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[134]); t = (sp_int_digit)(a[133]); + r[134] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[133]); t = (sp_int_digit)(a[132]); + r[133] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[132]); t = (sp_int_digit)(a[131]); + r[132] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[131]); t = (sp_int_digit)(a[130]); + r[131] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[130]); t = (sp_int_digit)(a[129]); + r[130] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[129]); t = (sp_int_digit)(a[128]); + r[129] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[128]); t = (sp_int_digit)(a[127]); + r[128] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[127]); t = (sp_int_digit)(a[126]); + r[127] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[126]); t = (sp_int_digit)(a[125]); + r[126] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[125]); t = (sp_int_digit)(a[124]); + r[125] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[124]); t = (sp_int_digit)(a[123]); + r[124] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[123]); t = (sp_int_digit)(a[122]); + r[123] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[122]); t = (sp_int_digit)(a[121]); + r[122] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[121]); t = (sp_int_digit)(a[120]); + r[121] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[120]); t = (sp_int_digit)(a[119]); + r[120] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[119]); t = (sp_int_digit)(a[118]); + r[119] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[118]); t = (sp_int_digit)(a[117]); + r[118] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[117]); t = (sp_int_digit)(a[116]); + r[117] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[116]); t = (sp_int_digit)(a[115]); + r[116] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[115]); t = (sp_int_digit)(a[114]); + r[115] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[114]); t = (sp_int_digit)(a[113]); + r[114] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[113]); t = (sp_int_digit)(a[112]); + r[113] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[112]); t = (sp_int_digit)(a[111]); + r[112] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[111]); t = (sp_int_digit)(a[110]); + r[111] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[110]); t = (sp_int_digit)(a[109]); + r[110] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[109]); t = (sp_int_digit)(a[108]); + r[109] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[108]); t = (sp_int_digit)(a[107]); + r[108] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[107]); t = (sp_int_digit)(a[106]); + r[107] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[106]); t = (sp_int_digit)(a[105]); + r[106] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[105]); t = (sp_int_digit)(a[104]); + r[105] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[104]); t = (sp_int_digit)(a[103]); + r[104] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[103]); t = (sp_int_digit)(a[102]); + r[103] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[102]); t = (sp_int_digit)(a[101]); + r[102] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[101]); t = (sp_int_digit)(a[100]); + r[101] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[100]); t = (sp_int_digit)(a[99]); + r[100] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[99]); t = (sp_int_digit)(a[98]); + r[99] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[98]); t = (sp_int_digit)(a[97]); + r[98] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[97]); t = (sp_int_digit)(a[96]); + r[97] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[96]); t = (sp_int_digit)(a[95]); + r[96] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[95]); t = (sp_int_digit)(a[94]); + r[95] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[94]); t = (sp_int_digit)(a[93]); + r[94] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[93]); t = (sp_int_digit)(a[92]); + r[93] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[92]); t = (sp_int_digit)(a[91]); + r[92] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[91]); t = (sp_int_digit)(a[90]); + r[91] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[90]); t = (sp_int_digit)(a[89]); + r[90] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[89]); t = (sp_int_digit)(a[88]); + r[89] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[88]); t = (sp_int_digit)(a[87]); + r[88] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[87]); t = (sp_int_digit)(a[86]); + r[87] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[86]); t = (sp_int_digit)(a[85]); + r[86] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[85]); t = (sp_int_digit)(a[84]); + r[85] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[84]); t = (sp_int_digit)(a[83]); + r[84] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[83]); t = (sp_int_digit)(a[82]); + r[83] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[82]); t = (sp_int_digit)(a[81]); + r[82] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[81]); t = (sp_int_digit)(a[80]); + r[81] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[80]); t = (sp_int_digit)(a[79]); + r[80] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[79]); t = (sp_int_digit)(a[78]); + r[79] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[78]); t = (sp_int_digit)(a[77]); + r[78] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[77]); t = (sp_int_digit)(a[76]); + r[77] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[76]); t = (sp_int_digit)(a[75]); + r[76] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[75]); t = (sp_int_digit)(a[74]); + r[75] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[74]); t = (sp_int_digit)(a[73]); + r[74] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[73]); t = (sp_int_digit)(a[72]); + r[73] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[72]); t = (sp_int_digit)(a[71]); + r[72] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[71]); t = (sp_int_digit)(a[70]); + r[71] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[70]); t = (sp_int_digit)(a[69]); + r[70] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[69]); t = (sp_int_digit)(a[68]); + r[69] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[68]); t = (sp_int_digit)(a[67]); + r[68] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[67]); t = (sp_int_digit)(a[66]); + r[67] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[66]); t = (sp_int_digit)(a[65]); + r[66] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[65]); t = (sp_int_digit)(a[64]); + r[65] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[64]); t = (sp_int_digit)(a[63]); + r[64] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[63]); t = (sp_int_digit)(a[62]); + r[63] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[62]); t = (sp_int_digit)(a[61]); + r[62] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[61]); t = (sp_int_digit)(a[60]); + r[61] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[60]); t = (sp_int_digit)(a[59]); + r[60] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[59]); t = (sp_int_digit)(a[58]); + r[59] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[58]); t = (sp_int_digit)(a[57]); + r[58] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[57]); t = (sp_int_digit)(a[56]); + r[57] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[56]); t = (sp_int_digit)(a[55]); + r[56] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[55]); t = (sp_int_digit)(a[54]); + r[55] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[54]); t = (sp_int_digit)(a[53]); + r[54] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[53]); t = (sp_int_digit)(a[52]); + r[53] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[52]); t = (sp_int_digit)(a[51]); + r[52] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[51]); t = (sp_int_digit)(a[50]); + r[51] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[50]); t = (sp_int_digit)(a[49]); + r[50] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[49]); t = (sp_int_digit)(a[48]); + r[49] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[48]); t = (sp_int_digit)(a[47]); + r[48] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[47]); t = (sp_int_digit)(a[46]); + r[47] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[46]); t = (sp_int_digit)(a[45]); + r[46] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[45]); t = (sp_int_digit)(a[44]); + r[45] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[44]); t = (sp_int_digit)(a[43]); + r[44] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[43]); t = (sp_int_digit)(a[42]); + r[43] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[42]); t = (sp_int_digit)(a[41]); + r[42] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[41]); t = (sp_int_digit)(a[40]); + r[41] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[40]); t = (sp_int_digit)(a[39]); + r[40] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[39]); t = (sp_int_digit)(a[38]); + r[39] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[38]); t = (sp_int_digit)(a[37]); + r[38] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[37]); t = (sp_int_digit)(a[36]); + r[37] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[36]); t = (sp_int_digit)(a[35]); + r[36] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[35]); t = (sp_int_digit)(a[34]); + r[35] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[34]); t = (sp_int_digit)(a[33]); + r[34] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[33]); t = (sp_int_digit)(a[32]); + r[33] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[32]); t = (sp_int_digit)(a[31]); + r[32] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[31]); t = (sp_int_digit)(a[30]); + r[31] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[30]); t = (sp_int_digit)(a[29]); + r[30] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[29]); t = (sp_int_digit)(a[28]); + r[29] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[28]); t = (sp_int_digit)(a[27]); + r[28] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[27]); t = (sp_int_digit)(a[26]); + r[27] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[26]); t = (sp_int_digit)(a[25]); + r[26] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[25]); t = (sp_int_digit)(a[24]); + r[25] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[24]); t = (sp_int_digit)(a[23]); + r[24] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[23]); t = (sp_int_digit)(a[22]); + r[23] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[22]); t = (sp_int_digit)(a[21]); + r[22] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[21]); t = (sp_int_digit)(a[20]); + r[21] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[20]); t = (sp_int_digit)(a[19]); + r[20] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[19]); t = (sp_int_digit)(a[18]); + r[19] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[18]); t = (sp_int_digit)(a[17]); + r[18] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[17]); t = (sp_int_digit)(a[16]); + r[17] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[16]); t = (sp_int_digit)(a[15]); + r[16] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[15]); t = (sp_int_digit)(a[14]); + r[15] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[14]); t = (sp_int_digit)(a[13]); + r[14] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[13]); t = (sp_int_digit)(a[12]); + r[13] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[12]); t = (sp_int_digit)(a[11]); + r[12] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[11]); t = (sp_int_digit)(a[10]); + r[11] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[10]); t = (sp_int_digit)(a[9]); + r[10] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[9]); t = (sp_int_digit)(a[8]); + r[9] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[8]); t = (sp_int_digit)(a[7]); + r[8] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[7]); t = (sp_int_digit)(a[6]); + r[7] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[6]); t = (sp_int_digit)(a[5]); + r[6] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[5]); t = (sp_int_digit)(a[4]); + r[5] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[4]); t = (sp_int_digit)(a[3]); + r[4] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[3]); t = (sp_int_digit)(a[2]); + r[3] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[2]); t = (sp_int_digit)(a[1]); + r[2] = ((s << n) | (t >> (21U - n))) & 0x1fffff; + s = (sp_int_digit)(a[1]); t = (sp_int_digit)(a[0]); + r[1] = ((s << n) | (t >> (21U - n))) & 0x1fffff; +#endif + r[0] = (a[0] << n) & 0x1fffff; +} + +/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m) + * + * r A single precision number that is the result of the operation. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_2_196(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit nd[392]; + sp_digit td[197]; +#else + sp_digit* td; +#endif + sp_digit* norm; + sp_digit* tmp; + sp_digit mp = 1; + sp_digit n, o; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 589, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + norm = td; + tmp = td + 392; + XMEMSET(td, 0, sizeof(sp_digit) * 589); +#else + norm = nd; + tmp = td; + XMEMSET(td, 0, sizeof(td)); +#endif + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_196(norm, m); + + bits = ((bits + 3) / 4) * 4; + i = ((bits + 20) / 21) - 1; + c = bits % 21; + if (c == 0) { + c = 21; + } + if (i < 196) { + n = e[i--] << (32 - c); + } + else { + n = 0; + i--; + } + if (c < 4) { + n |= e[i--] << (11 - c); + c += 21; + } + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + sp_4096_lshift_196(r, norm, y); + for (; i>=0 || c>=4; ) { + if (c < 4) { + n |= e[i--] << (11 - c); + c += 21; + } + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + + sp_4096_mont_sqr_196(r, r, m, mp); + sp_4096_mont_sqr_196(r, r, m, mp); + sp_4096_mont_sqr_196(r, r, m, mp); + sp_4096_mont_sqr_196(r, r, m, mp); + + sp_4096_lshift_196(r, r, y); + sp_4096_mul_d_196(tmp, norm, (r[196] << 20) + (r[195] >> 1)); + r[196] = 0; + r[195] &= 0x1L; + (void)sp_4096_add_196(r, r, tmp); + sp_4096_norm_196(r); + o = sp_4096_cmp_196(r, m); + sp_4096_cond_sub_196(r, r, m, ((o < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + } + + sp_4096_mont_reduce_196(r, m, mp); + n = sp_4096_cmp_196(r, m); + sp_4096_cond_sub_196(r, r, m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} + +#endif /* HAVE_FFDHE_4096 */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. + * exp Array of bytes that is the exponent. + * expLen Length of data, in bytes, in exponent. + * mod Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Length, in bytes, of exponentiation result. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_DhExp_4096(mp_int* base, const byte* exp, word32 expLen, + mp_int* mod, byte* out, word32* outLen) +{ +#ifdef WOLFSSL_SP_SMALL + int err = MP_OKAY; + sp_digit* d = NULL; + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + word32 i; + + if (mp_count_bits(base) > 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expLen > 512) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 196 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) { + err = MEMORY_E; + } + } + + if (err == MP_OKAY) { + b = d; + e = b + 196 * 2; + m = e + 196; + r = b; + + sp_4096_from_mp(b, 196, base); + sp_4096_from_bin(e, 196, exp, expLen); + sp_4096_from_mp(m, 196, mod); + + #ifdef HAVE_FFDHE_4096 + if (base->used == 1 && base->dp[0] == 2 && + ((m[195] << 15) | (m[194] >> 6)) == 0xffffL) { + err = sp_4096_mod_exp_2_196(r, e, expLen * 8, m); + } + else + #endif + err = sp_4096_mod_exp_196(r, b, e, expLen * 8, m, 0); + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + for (i=0; i<512 && out[i] == 0; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + } + + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 196U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit bd[392], ed[196], md[196]; +#else + sp_digit* d = NULL; +#endif + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + word32 i; + int err = MP_OKAY; + + if (mp_count_bits(base) > 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expLen > 512U) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + } +#ifdef WOLFSSL_SMALL_STACK + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 196 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + b = d; + e = b + 196 * 2; + m = e + 196; + r = b; + } +#else + r = b = bd; + e = ed; + m = md; +#endif + + if (err == MP_OKAY) { + sp_4096_from_mp(b, 196, base); + sp_4096_from_bin(e, 196, exp, expLen); + sp_4096_from_mp(m, 196, mod); + + #ifdef HAVE_FFDHE_4096 + if (base->used == 1 && base->dp[0] == 2U && + ((m[195] << 15) | (m[194] >> 6)) == 0xffffL) { + err = sp_4096_mod_exp_2_196(r, e, expLen * 8U, m); + } + else { + #endif + err = sp_4096_mod_exp_196(r, b, e, expLen * 8U, m, 0); + #ifdef HAVE_FFDHE_4096 + } + #endif + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + for (i=0; i<512U && out[i] == 0U; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + } + +#ifdef WOLFSSL_SMALL_STACK + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 196U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } +#else + XMEMSET(e, 0, sizeof(sp_digit) * 196U); +#endif + + return err; +#endif +} +#endif /* WOLFSSL_HAVE_SP_DH */ + +#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */ + +#endif /* WOLFSSL_SP_4096 */ + +#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */ +#ifdef WOLFSSL_HAVE_SP_ECC +#ifndef WOLFSSL_SP_NO_256 + +/* Point structure to use. */ +typedef struct sp_point_256 { + sp_digit x[2 * 10]; + sp_digit y[2 * 10]; + sp_digit z[2 * 10]; + int infinity; +} sp_point_256; + +/* The modulus (prime) of the curve P256. */ +static const sp_digit p256_mod[10] = { + 0x3ffffff,0x3ffffff,0x3ffffff,0x003ffff,0x0000000,0x0000000,0x0000000, + 0x0000400,0x3ff0000,0x03fffff +}; +/* The Montogmery normalizer for modulus of the curve P256. */ +static const sp_digit p256_norm_mod[10] = { + 0x0000001,0x0000000,0x0000000,0x3fc0000,0x3ffffff,0x3ffffff,0x3ffffff, + 0x3fffbff,0x000ffff,0x0000000 +}; +/* The Montogmery multiplier for modulus of the curve P256. */ +static const sp_digit p256_mp_mod = 0x000001; +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +/* The order of the curve P256. */ +static const sp_digit p256_order[10] = { + 0x0632551,0x272b0bf,0x1e84f3b,0x2b69c5e,0x3bce6fa,0x3ffffff,0x3ffffff, + 0x00003ff,0x3ff0000,0x03fffff +}; +#endif +/* The order of the curve P256 minus 2. */ +static const sp_digit p256_order2[10] = { + 0x063254f,0x272b0bf,0x1e84f3b,0x2b69c5e,0x3bce6fa,0x3ffffff,0x3ffffff, + 0x00003ff,0x3ff0000,0x03fffff +}; +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montogmery normalizer for order of the curve P256. */ +static const sp_digit p256_norm_order[10] = { + 0x39cdaaf,0x18d4f40,0x217b0c4,0x14963a1,0x0431905,0x0000000,0x0000000, + 0x3fffc00,0x000ffff,0x0000000 +}; +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montogmery multiplier for order of the curve P256. */ +static const sp_digit p256_mp_order = 0x200bc4f; +#endif +/* The base point of curve P256. */ +static const sp_point_256 p256_base = { + /* X ordinate */ + { + 0x098c296,0x04e5176,0x33a0f4a,0x204b7ac,0x277037d,0x0e9103c,0x3ce6e56, + 0x1091fe2,0x1f2e12c,0x01ac5f4, + 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L + }, + /* Y ordinate */ + { + 0x3bf51f5,0x1901a0d,0x1ececbb,0x15dacc5,0x22bce33,0x303e785,0x27eb4a7, + 0x1fe6e3b,0x2e2fe1a,0x013f8d0, + 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L + }, + /* Z ordinate */ + { + 0x0000001,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000, + 0x0000000,0x0000000,0x0000000, + 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L + }, + /* infinity */ + 0 +}; +#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY) +static const sp_digit p256_b[10] = { + 0x3d2604b,0x38f0f89,0x30f63bc,0x2c3314e,0x0651d06,0x1a621af,0x2bbd557, + 0x24f9ecf,0x1d8aa3a,0x016b18d +}; +#endif + +static int sp_256_point_new_ex_10(void* heap, sp_point_256* sp, sp_point_256** p) +{ + int ret = MP_OKAY; + (void)heap; +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + (void)sp; + *p = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC); +#else + *p = sp; +#endif + if (*p == NULL) { + ret = MEMORY_E; + } + return ret; +} + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +/* Allocate memory for point and return error. */ +#define sp_256_point_new_10(heap, sp, p) sp_256_point_new_ex_10((heap), NULL, &(p)) +#else +/* Set pointer to data and return no error. */ +#define sp_256_point_new_10(heap, sp, p) sp_256_point_new_ex_10((heap), &(sp), &(p)) +#endif + + +static void sp_256_point_free_10(sp_point_256* p, int clear, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +/* If valid pointer then clear point data if requested and free data. */ + if (p != NULL) { + if (clear != 0) { + XMEMSET(p, 0, sizeof(*p)); + } + XFREE(p, heap, DYNAMIC_TYPE_ECC); + } +#else +/* Clear point data if requested. */ + if (clear != 0) { + XMEMSET(p, 0, sizeof(*p)); + } +#endif + (void)heap; +} + +/* Multiply a number by Montogmery normalizer mod modulus (prime). + * + * r The resulting Montgomery form number. + * a The number to convert. + * m The modulus (prime). + * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise. + */ +static int sp_256_mod_mul_norm_10(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + int64_t* td; +#else + int64_t td[8]; + int64_t a32d[8]; +#endif + int64_t* t; + int64_t* a32; + int64_t o; + int err = MP_OKAY; + + (void)m; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + td = (int64_t*)XMALLOC(sizeof(int64_t) * 2 * 8, NULL, DYNAMIC_TYPE_ECC); + if (td == NULL) { + return MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = td; + a32 = td + 8; +#else + t = td; + a32 = a32d; +#endif + + a32[0] = a[0]; + a32[0] |= a[1] << 26U; + a32[0] &= 0xffffffffL; + a32[1] = (sp_digit)(a[1] >> 6); + a32[1] |= a[2] << 20U; + a32[1] &= 0xffffffffL; + a32[2] = (sp_digit)(a[2] >> 12); + a32[2] |= a[3] << 14U; + a32[2] &= 0xffffffffL; + a32[3] = (sp_digit)(a[3] >> 18); + a32[3] |= a[4] << 8U; + a32[3] &= 0xffffffffL; + a32[4] = (sp_digit)(a[4] >> 24); + a32[4] |= a[5] << 2U; + a32[4] |= a[6] << 28U; + a32[4] &= 0xffffffffL; + a32[5] = (sp_digit)(a[6] >> 4); + a32[5] |= a[7] << 22U; + a32[5] &= 0xffffffffL; + a32[6] = (sp_digit)(a[7] >> 10); + a32[6] |= a[8] << 16U; + a32[6] &= 0xffffffffL; + a32[7] = (sp_digit)(a[8] >> 16); + a32[7] |= a[9] << 10U; + a32[7] &= 0xffffffffL; + + /* 1 1 0 -1 -1 -1 -1 0 */ + t[0] = 0 + a32[0] + a32[1] - a32[3] - a32[4] - a32[5] - a32[6]; + /* 0 1 1 0 -1 -1 -1 -1 */ + t[1] = 0 + a32[1] + a32[2] - a32[4] - a32[5] - a32[6] - a32[7]; + /* 0 0 1 1 0 -1 -1 -1 */ + t[2] = 0 + a32[2] + a32[3] - a32[5] - a32[6] - a32[7]; + /* -1 -1 0 2 2 1 0 -1 */ + t[3] = 0 - a32[0] - a32[1] + 2 * a32[3] + 2 * a32[4] + a32[5] - a32[7]; + /* 0 -1 -1 0 2 2 1 0 */ + t[4] = 0 - a32[1] - a32[2] + 2 * a32[4] + 2 * a32[5] + a32[6]; + /* 0 0 -1 -1 0 2 2 1 */ + t[5] = 0 - a32[2] - a32[3] + 2 * a32[5] + 2 * a32[6] + a32[7]; + /* -1 -1 0 0 0 1 3 2 */ + t[6] = 0 - a32[0] - a32[1] + a32[5] + 3 * a32[6] + 2 * a32[7]; + /* 1 0 -1 -1 -1 -1 0 3 */ + t[7] = 0 + a32[0] - a32[2] - a32[3] - a32[4] - a32[5] + 3 * a32[7]; + + t[1] += t[0] >> 32U; t[0] &= 0xffffffffL; + t[2] += t[1] >> 32U; t[1] &= 0xffffffffL; + t[3] += t[2] >> 32U; t[2] &= 0xffffffffL; + t[4] += t[3] >> 32U; t[3] &= 0xffffffffL; + t[5] += t[4] >> 32U; t[4] &= 0xffffffffL; + t[6] += t[5] >> 32U; t[5] &= 0xffffffffL; + t[7] += t[6] >> 32U; t[6] &= 0xffffffffL; + o = t[7] >> 32U; t[7] &= 0xffffffffL; + t[0] += o; + t[3] -= o; + t[6] -= o; + t[7] += o; + t[1] += t[0] >> 32U; t[0] &= 0xffffffffL; + t[2] += t[1] >> 32U; t[1] &= 0xffffffffL; + t[3] += t[2] >> 32U; t[2] &= 0xffffffffL; + t[4] += t[3] >> 32U; t[3] &= 0xffffffffL; + t[5] += t[4] >> 32U; t[4] &= 0xffffffffL; + t[6] += t[5] >> 32U; t[5] &= 0xffffffffL; + t[7] += t[6] >> 32U; t[6] &= 0xffffffffL; + + r[0] = (sp_digit)(t[0]) & 0x3ffffffL; + r[1] = (sp_digit)(t[0] >> 26U); + r[1] |= t[1] << 6U; + r[1] &= 0x3ffffffL; + r[2] = (sp_digit)(t[1] >> 20U); + r[2] |= t[2] << 12U; + r[2] &= 0x3ffffffL; + r[3] = (sp_digit)(t[2] >> 14U); + r[3] |= t[3] << 18U; + r[3] &= 0x3ffffffL; + r[4] = (sp_digit)(t[3] >> 8U); + r[4] |= t[4] << 24U; + r[4] &= 0x3ffffffL; + r[5] = (sp_digit)(t[4] >> 2U) & 0x3ffffffL; + r[6] = (sp_digit)(t[4] >> 28U); + r[6] |= t[5] << 4U; + r[6] &= 0x3ffffffL; + r[7] = (sp_digit)(t[5] >> 22U); + r[7] |= t[6] << 10U; + r[7] &= 0x3ffffffL; + r[8] = (sp_digit)(t[6] >> 16U); + r[8] |= t[7] << 16U; + r[8] &= 0x3ffffffL; + r[9] = (sp_digit)(t[7] >> 10U); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_256_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 26 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 26 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0x3ffffff; + s = 26U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 26U) <= (word32)DIGIT_BIT) { + s += 26U; + r[j] &= 0x3ffffff; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 26) { + r[j] &= 0x3ffffff; + if (j + 1 >= size) { + break; + } + s = 26 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Convert a point of type ecc_point to type sp_point_256. + * + * p Point of type sp_point_256 (result). + * pm Point of type ecc_point. + */ +static void sp_256_point_from_ecc_point_10(sp_point_256* p, const ecc_point* pm) +{ + XMEMSET(p->x, 0, sizeof(p->x)); + XMEMSET(p->y, 0, sizeof(p->y)); + XMEMSET(p->z, 0, sizeof(p->z)); + sp_256_from_mp(p->x, 10, pm->x); + sp_256_from_mp(p->y, 10, pm->y); + sp_256_from_mp(p->z, 10, pm->z); + p->infinity = 0; +} + +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_256_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (256 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 26 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 10); + r->used = 10; + mp_clamp(r); +#elif DIGIT_BIT < 26 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 10; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 26) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 26 - s; + } + r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 10; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 26 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 26 - s; + } + else { + s += 26; + } + } + r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Convert a point of type sp_point_256 to type ecc_point. + * + * p Point of type sp_point_256. + * pm Point of type ecc_point (result). + * returns MEMORY_E when allocation of memory in ecc_point fails otherwise + * MP_OKAY. + */ +static int sp_256_point_to_ecc_point_10(const sp_point_256* p, ecc_point* pm) +{ + int err; + + err = sp_256_to_mp(p->x, pm->x); + if (err == MP_OKAY) { + err = sp_256_to_mp(p->y, pm->y); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->z, pm->z); + } + + return err; +} + +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_256_mul_10(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i, j, k; + int64_t c; + + c = ((int64_t)a[9]) * b[9]; + r[19] = (sp_digit)(c >> 26); + c = (c & 0x3ffffff) << 26; + for (k = 17; k >= 0; k--) { + for (i = 9; i >= 0; i--) { + j = k - i; + if (j >= 10) { + break; + } + if (j < 0) { + continue; + } + + c += ((int64_t)a[i]) * b[j]; + } + r[k + 2] += c >> 52; + r[k + 1] = (c >> 26) & 0x3ffffff; + c = (c & 0x3ffffff) << 26; + } + r[0] = (sp_digit)(c >> 26); +} + +#else +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_256_mul_10(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int64_t t0 = ((int64_t)a[ 0]) * b[ 0]; + int64_t t1 = ((int64_t)a[ 0]) * b[ 1] + + ((int64_t)a[ 1]) * b[ 0]; + int64_t t2 = ((int64_t)a[ 0]) * b[ 2] + + ((int64_t)a[ 1]) * b[ 1] + + ((int64_t)a[ 2]) * b[ 0]; + int64_t t3 = ((int64_t)a[ 0]) * b[ 3] + + ((int64_t)a[ 1]) * b[ 2] + + ((int64_t)a[ 2]) * b[ 1] + + ((int64_t)a[ 3]) * b[ 0]; + int64_t t4 = ((int64_t)a[ 0]) * b[ 4] + + ((int64_t)a[ 1]) * b[ 3] + + ((int64_t)a[ 2]) * b[ 2] + + ((int64_t)a[ 3]) * b[ 1] + + ((int64_t)a[ 4]) * b[ 0]; + int64_t t5 = ((int64_t)a[ 0]) * b[ 5] + + ((int64_t)a[ 1]) * b[ 4] + + ((int64_t)a[ 2]) * b[ 3] + + ((int64_t)a[ 3]) * b[ 2] + + ((int64_t)a[ 4]) * b[ 1] + + ((int64_t)a[ 5]) * b[ 0]; + int64_t t6 = ((int64_t)a[ 0]) * b[ 6] + + ((int64_t)a[ 1]) * b[ 5] + + ((int64_t)a[ 2]) * b[ 4] + + ((int64_t)a[ 3]) * b[ 3] + + ((int64_t)a[ 4]) * b[ 2] + + ((int64_t)a[ 5]) * b[ 1] + + ((int64_t)a[ 6]) * b[ 0]; + int64_t t7 = ((int64_t)a[ 0]) * b[ 7] + + ((int64_t)a[ 1]) * b[ 6] + + ((int64_t)a[ 2]) * b[ 5] + + ((int64_t)a[ 3]) * b[ 4] + + ((int64_t)a[ 4]) * b[ 3] + + ((int64_t)a[ 5]) * b[ 2] + + ((int64_t)a[ 6]) * b[ 1] + + ((int64_t)a[ 7]) * b[ 0]; + int64_t t8 = ((int64_t)a[ 0]) * b[ 8] + + ((int64_t)a[ 1]) * b[ 7] + + ((int64_t)a[ 2]) * b[ 6] + + ((int64_t)a[ 3]) * b[ 5] + + ((int64_t)a[ 4]) * b[ 4] + + ((int64_t)a[ 5]) * b[ 3] + + ((int64_t)a[ 6]) * b[ 2] + + ((int64_t)a[ 7]) * b[ 1] + + ((int64_t)a[ 8]) * b[ 0]; + int64_t t9 = ((int64_t)a[ 0]) * b[ 9] + + ((int64_t)a[ 1]) * b[ 8] + + ((int64_t)a[ 2]) * b[ 7] + + ((int64_t)a[ 3]) * b[ 6] + + ((int64_t)a[ 4]) * b[ 5] + + ((int64_t)a[ 5]) * b[ 4] + + ((int64_t)a[ 6]) * b[ 3] + + ((int64_t)a[ 7]) * b[ 2] + + ((int64_t)a[ 8]) * b[ 1] + + ((int64_t)a[ 9]) * b[ 0]; + int64_t t10 = ((int64_t)a[ 1]) * b[ 9] + + ((int64_t)a[ 2]) * b[ 8] + + ((int64_t)a[ 3]) * b[ 7] + + ((int64_t)a[ 4]) * b[ 6] + + ((int64_t)a[ 5]) * b[ 5] + + ((int64_t)a[ 6]) * b[ 4] + + ((int64_t)a[ 7]) * b[ 3] + + ((int64_t)a[ 8]) * b[ 2] + + ((int64_t)a[ 9]) * b[ 1]; + int64_t t11 = ((int64_t)a[ 2]) * b[ 9] + + ((int64_t)a[ 3]) * b[ 8] + + ((int64_t)a[ 4]) * b[ 7] + + ((int64_t)a[ 5]) * b[ 6] + + ((int64_t)a[ 6]) * b[ 5] + + ((int64_t)a[ 7]) * b[ 4] + + ((int64_t)a[ 8]) * b[ 3] + + ((int64_t)a[ 9]) * b[ 2]; + int64_t t12 = ((int64_t)a[ 3]) * b[ 9] + + ((int64_t)a[ 4]) * b[ 8] + + ((int64_t)a[ 5]) * b[ 7] + + ((int64_t)a[ 6]) * b[ 6] + + ((int64_t)a[ 7]) * b[ 5] + + ((int64_t)a[ 8]) * b[ 4] + + ((int64_t)a[ 9]) * b[ 3]; + int64_t t13 = ((int64_t)a[ 4]) * b[ 9] + + ((int64_t)a[ 5]) * b[ 8] + + ((int64_t)a[ 6]) * b[ 7] + + ((int64_t)a[ 7]) * b[ 6] + + ((int64_t)a[ 8]) * b[ 5] + + ((int64_t)a[ 9]) * b[ 4]; + int64_t t14 = ((int64_t)a[ 5]) * b[ 9] + + ((int64_t)a[ 6]) * b[ 8] + + ((int64_t)a[ 7]) * b[ 7] + + ((int64_t)a[ 8]) * b[ 6] + + ((int64_t)a[ 9]) * b[ 5]; + int64_t t15 = ((int64_t)a[ 6]) * b[ 9] + + ((int64_t)a[ 7]) * b[ 8] + + ((int64_t)a[ 8]) * b[ 7] + + ((int64_t)a[ 9]) * b[ 6]; + int64_t t16 = ((int64_t)a[ 7]) * b[ 9] + + ((int64_t)a[ 8]) * b[ 8] + + ((int64_t)a[ 9]) * b[ 7]; + int64_t t17 = ((int64_t)a[ 8]) * b[ 9] + + ((int64_t)a[ 9]) * b[ 8]; + int64_t t18 = ((int64_t)a[ 9]) * b[ 9]; + + t1 += t0 >> 26; r[ 0] = t0 & 0x3ffffff; + t2 += t1 >> 26; r[ 1] = t1 & 0x3ffffff; + t3 += t2 >> 26; r[ 2] = t2 & 0x3ffffff; + t4 += t3 >> 26; r[ 3] = t3 & 0x3ffffff; + t5 += t4 >> 26; r[ 4] = t4 & 0x3ffffff; + t6 += t5 >> 26; r[ 5] = t5 & 0x3ffffff; + t7 += t6 >> 26; r[ 6] = t6 & 0x3ffffff; + t8 += t7 >> 26; r[ 7] = t7 & 0x3ffffff; + t9 += t8 >> 26; r[ 8] = t8 & 0x3ffffff; + t10 += t9 >> 26; r[ 9] = t9 & 0x3ffffff; + t11 += t10 >> 26; r[10] = t10 & 0x3ffffff; + t12 += t11 >> 26; r[11] = t11 & 0x3ffffff; + t13 += t12 >> 26; r[12] = t12 & 0x3ffffff; + t14 += t13 >> 26; r[13] = t13 & 0x3ffffff; + t15 += t14 >> 26; r[14] = t14 & 0x3ffffff; + t16 += t15 >> 26; r[15] = t15 & 0x3ffffff; + t17 += t16 >> 26; r[16] = t16 & 0x3ffffff; + t18 += t17 >> 26; r[17] = t17 & 0x3ffffff; + r[19] = (sp_digit)(t18 >> 26); + r[18] = t18 & 0x3ffffff; +} + +#endif /* WOLFSSL_SP_SMALL */ +#define sp_256_mont_reduce_order_10 sp_256_mont_reduce_10 + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static sp_digit sp_256_cmp_10(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=9; i>=0; i--) { + r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#else + r |= (a[ 9] - b[ 9]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 8] - b[ 8]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 7] - b[ 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 6] - b[ 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 5] - b[ 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 4] - b[ 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 3] - b[ 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 2] - b[ 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 1] - b[ 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 0] - b[ 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); +#endif /* WOLFSSL_SP_SMALL */ + + return r; +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static void sp_256_cond_sub_10(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 10; i++) { + r[i] = a[i] - (b[i] & m); + } +#else + r[ 0] = a[ 0] - (b[ 0] & m); + r[ 1] = a[ 1] - (b[ 1] & m); + r[ 2] = a[ 2] - (b[ 2] & m); + r[ 3] = a[ 3] - (b[ 3] & m); + r[ 4] = a[ 4] - (b[ 4] & m); + r[ 5] = a[ 5] - (b[ 5] & m); + r[ 6] = a[ 6] - (b[ 6] & m); + r[ 7] = a[ 7] - (b[ 7] & m); + r[ 8] = a[ 8] - (b[ 8] & m); + r[ 9] = a[ 9] - (b[ 9] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Mul a by scalar b and add into r. (r += a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_256_mul_add_10(sp_digit* r, const sp_digit* a, + const sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int64_t tb = b; + int64_t t = 0; + int i; + + for (i = 0; i < 10; i++) { + t += (tb * a[i]) + r[i]; + r[i] = t & 0x3ffffff; + t >>= 26; + } + r[10] += t; +#else + int64_t tb = b; + int64_t t[10]; + + t[ 0] = tb * a[ 0]; + t[ 1] = tb * a[ 1]; + t[ 2] = tb * a[ 2]; + t[ 3] = tb * a[ 3]; + t[ 4] = tb * a[ 4]; + t[ 5] = tb * a[ 5]; + t[ 6] = tb * a[ 6]; + t[ 7] = tb * a[ 7]; + t[ 8] = tb * a[ 8]; + t[ 9] = tb * a[ 9]; + r[ 0] += (sp_digit) (t[ 0] & 0x3ffffff); + r[ 1] += (sp_digit)((t[ 0] >> 26) + (t[ 1] & 0x3ffffff)); + r[ 2] += (sp_digit)((t[ 1] >> 26) + (t[ 2] & 0x3ffffff)); + r[ 3] += (sp_digit)((t[ 2] >> 26) + (t[ 3] & 0x3ffffff)); + r[ 4] += (sp_digit)((t[ 3] >> 26) + (t[ 4] & 0x3ffffff)); + r[ 5] += (sp_digit)((t[ 4] >> 26) + (t[ 5] & 0x3ffffff)); + r[ 6] += (sp_digit)((t[ 5] >> 26) + (t[ 6] & 0x3ffffff)); + r[ 7] += (sp_digit)((t[ 6] >> 26) + (t[ 7] & 0x3ffffff)); + r[ 8] += (sp_digit)((t[ 7] >> 26) + (t[ 8] & 0x3ffffff)); + r[ 9] += (sp_digit)((t[ 8] >> 26) + (t[ 9] & 0x3ffffff)); + r[10] += (sp_digit) (t[ 9] >> 26); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Normalize the values in each word to 26. + * + * a Array of sp_digit to normalize. + */ +static void sp_256_norm_10(sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + for (i = 0; i < 9; i++) { + a[i+1] += a[i] >> 26; + a[i] &= 0x3ffffff; + } +#else + a[1] += a[0] >> 26; a[0] &= 0x3ffffff; + a[2] += a[1] >> 26; a[1] &= 0x3ffffff; + a[3] += a[2] >> 26; a[2] &= 0x3ffffff; + a[4] += a[3] >> 26; a[3] &= 0x3ffffff; + a[5] += a[4] >> 26; a[4] &= 0x3ffffff; + a[6] += a[5] >> 26; a[5] &= 0x3ffffff; + a[7] += a[6] >> 26; a[6] &= 0x3ffffff; + a[8] += a[7] >> 26; a[7] &= 0x3ffffff; + a[9] += a[8] >> 26; a[8] &= 0x3ffffff; +#endif +} + +/* Shift the result in the high 256 bits down to the bottom. + * + * r A single precision number. + * a A single precision number. + */ +static void sp_256_mont_shift_10(sp_digit* r, const sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + sp_digit n, s; + + s = a[10]; + n = a[9] >> 22; + for (i = 0; i < 9; i++) { + n += (s & 0x3ffffff) << 4; + r[i] = n & 0x3ffffff; + n >>= 26; + s = a[11 + i] + (s >> 26); + } + n += s << 4; + r[9] = n; +#else + sp_digit n, s; + + s = a[10]; n = a[9] >> 22; + n += (s & 0x3ffffff) << 4; r[ 0] = n & 0x3ffffff; + n >>= 26; s = a[11] + (s >> 26); + n += (s & 0x3ffffff) << 4; r[ 1] = n & 0x3ffffff; + n >>= 26; s = a[12] + (s >> 26); + n += (s & 0x3ffffff) << 4; r[ 2] = n & 0x3ffffff; + n >>= 26; s = a[13] + (s >> 26); + n += (s & 0x3ffffff) << 4; r[ 3] = n & 0x3ffffff; + n >>= 26; s = a[14] + (s >> 26); + n += (s & 0x3ffffff) << 4; r[ 4] = n & 0x3ffffff; + n >>= 26; s = a[15] + (s >> 26); + n += (s & 0x3ffffff) << 4; r[ 5] = n & 0x3ffffff; + n >>= 26; s = a[16] + (s >> 26); + n += (s & 0x3ffffff) << 4; r[ 6] = n & 0x3ffffff; + n >>= 26; s = a[17] + (s >> 26); + n += (s & 0x3ffffff) << 4; r[ 7] = n & 0x3ffffff; + n >>= 26; s = a[18] + (s >> 26); + n += (s & 0x3ffffff) << 4; r[ 8] = n & 0x3ffffff; + n >>= 26; s = a[19] + (s >> 26); + n += s << 4; r[ 9] = n; +#endif /* WOLFSSL_SP_SMALL */ + XMEMSET(&r[10], 0, sizeof(*r) * 10U); +} + +/* Reduce the number back to 256 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static void sp_256_mont_reduce_10(sp_digit* a, const sp_digit* m, sp_digit mp) +{ + int i; + sp_digit mu; + + if (mp != 1) { + for (i=0; i<9; i++) { + mu = (a[i] * mp) & 0x3ffffff; + sp_256_mul_add_10(a+i, m, mu); + a[i+1] += a[i] >> 26; + } + mu = (a[i] * mp) & 0x3fffffL; + sp_256_mul_add_10(a+i, m, mu); + a[i+1] += a[i] >> 26; + a[i] &= 0x3ffffff; + } + else { + for (i=0; i<9; i++) { + mu = a[i] & 0x3ffffff; + sp_256_mul_add_10(a+i, p256_mod, mu); + a[i+1] += a[i] >> 26; + } + mu = a[i] & 0x3fffffL; + sp_256_mul_add_10(a+i, p256_mod, mu); + a[i+1] += a[i] >> 26; + a[i] &= 0x3ffffff; + } + + sp_256_mont_shift_10(a, a); + sp_256_cond_sub_10(a, a, m, 0 - (((a[9] >> 22) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_256_norm_10(a); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_256_mont_mul_10(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_256_mul_10(r, a, b); + sp_256_mont_reduce_10(r, m, mp); +} + +#ifdef WOLFSSL_SP_SMALL +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_256_sqr_10(sp_digit* r, const sp_digit* a) +{ + int i, j, k; + int64_t c; + + c = ((int64_t)a[9]) * a[9]; + r[19] = (sp_digit)(c >> 26); + c = (c & 0x3ffffff) << 26; + for (k = 17; k >= 0; k--) { + for (i = 9; i >= 0; i--) { + j = k - i; + if (j >= 10 || i <= j) { + break; + } + if (j < 0) { + continue; + } + + c += ((int64_t)a[i]) * a[j] * 2; + } + if (i == j) { + c += ((int64_t)a[i]) * a[i]; + } + + r[k + 2] += c >> 52; + r[k + 1] = (c >> 26) & 0x3ffffff; + c = (c & 0x3ffffff) << 26; + } + r[0] = (sp_digit)(c >> 26); +} + +#else +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_256_sqr_10(sp_digit* r, const sp_digit* a) +{ + int64_t t0 = ((int64_t)a[ 0]) * a[ 0]; + int64_t t1 = (((int64_t)a[ 0]) * a[ 1]) * 2; + int64_t t2 = (((int64_t)a[ 0]) * a[ 2]) * 2 + + ((int64_t)a[ 1]) * a[ 1]; + int64_t t3 = (((int64_t)a[ 0]) * a[ 3] + + ((int64_t)a[ 1]) * a[ 2]) * 2; + int64_t t4 = (((int64_t)a[ 0]) * a[ 4] + + ((int64_t)a[ 1]) * a[ 3]) * 2 + + ((int64_t)a[ 2]) * a[ 2]; + int64_t t5 = (((int64_t)a[ 0]) * a[ 5] + + ((int64_t)a[ 1]) * a[ 4] + + ((int64_t)a[ 2]) * a[ 3]) * 2; + int64_t t6 = (((int64_t)a[ 0]) * a[ 6] + + ((int64_t)a[ 1]) * a[ 5] + + ((int64_t)a[ 2]) * a[ 4]) * 2 + + ((int64_t)a[ 3]) * a[ 3]; + int64_t t7 = (((int64_t)a[ 0]) * a[ 7] + + ((int64_t)a[ 1]) * a[ 6] + + ((int64_t)a[ 2]) * a[ 5] + + ((int64_t)a[ 3]) * a[ 4]) * 2; + int64_t t8 = (((int64_t)a[ 0]) * a[ 8] + + ((int64_t)a[ 1]) * a[ 7] + + ((int64_t)a[ 2]) * a[ 6] + + ((int64_t)a[ 3]) * a[ 5]) * 2 + + ((int64_t)a[ 4]) * a[ 4]; + int64_t t9 = (((int64_t)a[ 0]) * a[ 9] + + ((int64_t)a[ 1]) * a[ 8] + + ((int64_t)a[ 2]) * a[ 7] + + ((int64_t)a[ 3]) * a[ 6] + + ((int64_t)a[ 4]) * a[ 5]) * 2; + int64_t t10 = (((int64_t)a[ 1]) * a[ 9] + + ((int64_t)a[ 2]) * a[ 8] + + ((int64_t)a[ 3]) * a[ 7] + + ((int64_t)a[ 4]) * a[ 6]) * 2 + + ((int64_t)a[ 5]) * a[ 5]; + int64_t t11 = (((int64_t)a[ 2]) * a[ 9] + + ((int64_t)a[ 3]) * a[ 8] + + ((int64_t)a[ 4]) * a[ 7] + + ((int64_t)a[ 5]) * a[ 6]) * 2; + int64_t t12 = (((int64_t)a[ 3]) * a[ 9] + + ((int64_t)a[ 4]) * a[ 8] + + ((int64_t)a[ 5]) * a[ 7]) * 2 + + ((int64_t)a[ 6]) * a[ 6]; + int64_t t13 = (((int64_t)a[ 4]) * a[ 9] + + ((int64_t)a[ 5]) * a[ 8] + + ((int64_t)a[ 6]) * a[ 7]) * 2; + int64_t t14 = (((int64_t)a[ 5]) * a[ 9] + + ((int64_t)a[ 6]) * a[ 8]) * 2 + + ((int64_t)a[ 7]) * a[ 7]; + int64_t t15 = (((int64_t)a[ 6]) * a[ 9] + + ((int64_t)a[ 7]) * a[ 8]) * 2; + int64_t t16 = (((int64_t)a[ 7]) * a[ 9]) * 2 + + ((int64_t)a[ 8]) * a[ 8]; + int64_t t17 = (((int64_t)a[ 8]) * a[ 9]) * 2; + int64_t t18 = ((int64_t)a[ 9]) * a[ 9]; + + t1 += t0 >> 26; r[ 0] = t0 & 0x3ffffff; + t2 += t1 >> 26; r[ 1] = t1 & 0x3ffffff; + t3 += t2 >> 26; r[ 2] = t2 & 0x3ffffff; + t4 += t3 >> 26; r[ 3] = t3 & 0x3ffffff; + t5 += t4 >> 26; r[ 4] = t4 & 0x3ffffff; + t6 += t5 >> 26; r[ 5] = t5 & 0x3ffffff; + t7 += t6 >> 26; r[ 6] = t6 & 0x3ffffff; + t8 += t7 >> 26; r[ 7] = t7 & 0x3ffffff; + t9 += t8 >> 26; r[ 8] = t8 & 0x3ffffff; + t10 += t9 >> 26; r[ 9] = t9 & 0x3ffffff; + t11 += t10 >> 26; r[10] = t10 & 0x3ffffff; + t12 += t11 >> 26; r[11] = t11 & 0x3ffffff; + t13 += t12 >> 26; r[12] = t12 & 0x3ffffff; + t14 += t13 >> 26; r[13] = t13 & 0x3ffffff; + t15 += t14 >> 26; r[14] = t14 & 0x3ffffff; + t16 += t15 >> 26; r[15] = t15 & 0x3ffffff; + t17 += t16 >> 26; r[16] = t16 & 0x3ffffff; + t18 += t17 >> 26; r[17] = t17 & 0x3ffffff; + r[19] = (sp_digit)(t18 >> 26); + r[18] = t18 & 0x3ffffff; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_256_mont_sqr_10(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_256_sqr_10(r, a); + sp_256_mont_reduce_10(r, m, mp); +} + +#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY) +/* Square the Montgomery form number a number of times. (r = a ^ n mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * n Number of times to square. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_256_mont_sqr_n_10(sp_digit* r, const sp_digit* a, int n, + const sp_digit* m, sp_digit mp) +{ + sp_256_mont_sqr_10(r, a, m, mp); + for (; n > 1; n--) { + sp_256_mont_sqr_10(r, r, m, mp); + } +} + +#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */ +#ifdef WOLFSSL_SP_SMALL +/* Mod-2 for the P256 curve. */ +static const uint32_t p256_mod_minus_2[8] = { + 0xfffffffdU,0xffffffffU,0xffffffffU,0x00000000U,0x00000000U,0x00000000U, + 0x00000001U,0xffffffffU +}; +#endif /* !WOLFSSL_SP_SMALL */ + +/* Invert the number, in Montgomery form, modulo the modulus (prime) of the + * P256 curve. (r = 1 / a mod m) + * + * r Inverse result. + * a Number to invert. + * td Temporary data. + */ +static void sp_256_mont_inv_10(sp_digit* r, const sp_digit* a, sp_digit* td) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* t = td; + int i; + + XMEMCPY(t, a, sizeof(sp_digit) * 10); + for (i=254; i>=0; i--) { + sp_256_mont_sqr_10(t, t, p256_mod, p256_mp_mod); + if (p256_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32))) + sp_256_mont_mul_10(t, t, a, p256_mod, p256_mp_mod); + } + XMEMCPY(r, t, sizeof(sp_digit) * 10); +#else + sp_digit* t1 = td; + sp_digit* t2 = td + 2 * 10; + sp_digit* t3 = td + 4 * 10; + /* 0x2 */ + sp_256_mont_sqr_10(t1, a, p256_mod, p256_mp_mod); + /* 0x3 */ + sp_256_mont_mul_10(t2, t1, a, p256_mod, p256_mp_mod); + /* 0xc */ + sp_256_mont_sqr_n_10(t1, t2, 2, p256_mod, p256_mp_mod); + /* 0xd */ + sp_256_mont_mul_10(t3, t1, a, p256_mod, p256_mp_mod); + /* 0xf */ + sp_256_mont_mul_10(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xf0 */ + sp_256_mont_sqr_n_10(t1, t2, 4, p256_mod, p256_mp_mod); + /* 0xfd */ + sp_256_mont_mul_10(t3, t3, t1, p256_mod, p256_mp_mod); + /* 0xff */ + sp_256_mont_mul_10(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xff00 */ + sp_256_mont_sqr_n_10(t1, t2, 8, p256_mod, p256_mp_mod); + /* 0xfffd */ + sp_256_mont_mul_10(t3, t3, t1, p256_mod, p256_mp_mod); + /* 0xffff */ + sp_256_mont_mul_10(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xffff0000 */ + sp_256_mont_sqr_n_10(t1, t2, 16, p256_mod, p256_mp_mod); + /* 0xfffffffd */ + sp_256_mont_mul_10(t3, t3, t1, p256_mod, p256_mp_mod); + /* 0xffffffff */ + sp_256_mont_mul_10(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xffffffff00000000 */ + sp_256_mont_sqr_n_10(t1, t2, 32, p256_mod, p256_mp_mod); + /* 0xffffffffffffffff */ + sp_256_mont_mul_10(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xffffffff00000001 */ + sp_256_mont_mul_10(r, t1, a, p256_mod, p256_mp_mod); + /* 0xffffffff000000010000000000000000000000000000000000000000 */ + sp_256_mont_sqr_n_10(r, r, 160, p256_mod, p256_mp_mod); + /* 0xffffffff00000001000000000000000000000000ffffffffffffffff */ + sp_256_mont_mul_10(r, r, t2, p256_mod, p256_mp_mod); + /* 0xffffffff00000001000000000000000000000000ffffffffffffffff00000000 */ + sp_256_mont_sqr_n_10(r, r, 32, p256_mod, p256_mp_mod); + /* 0xffffffff00000001000000000000000000000000fffffffffffffffffffffffd */ + sp_256_mont_mul_10(r, r, t3, p256_mod, p256_mp_mod); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Map the Montgomery form projective coordinate point to an affine point. + * + * r Resulting affine coordinate point. + * p Montgomery form projective coordinate point. + * t Temporary ordinate data. + */ +static void sp_256_map_10(sp_point_256* r, const sp_point_256* p, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*10; + int32_t n; + + sp_256_mont_inv_10(t1, p->z, t + 2*10); + + sp_256_mont_sqr_10(t2, t1, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(t1, t2, t1, p256_mod, p256_mp_mod); + + /* x /= z^2 */ + sp_256_mont_mul_10(r->x, p->x, t2, p256_mod, p256_mp_mod); + XMEMSET(r->x + 10, 0, sizeof(r->x) / 2U); + sp_256_mont_reduce_10(r->x, p256_mod, p256_mp_mod); + /* Reduce x to less than modulus */ + n = sp_256_cmp_10(r->x, p256_mod); + sp_256_cond_sub_10(r->x, r->x, p256_mod, 0 - ((n >= 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_256_norm_10(r->x); + + /* y /= z^3 */ + sp_256_mont_mul_10(r->y, p->y, t1, p256_mod, p256_mp_mod); + XMEMSET(r->y + 10, 0, sizeof(r->y) / 2U); + sp_256_mont_reduce_10(r->y, p256_mod, p256_mp_mod); + /* Reduce y to less than modulus */ + n = sp_256_cmp_10(r->y, p256_mod); + sp_256_cond_sub_10(r->y, r->y, p256_mod, 0 - ((n >= 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_256_norm_10(r->y); + + XMEMSET(r->z, 0, sizeof(r->z)); + r->z[0] = 1; + +} + +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_256_add_10(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 10; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#else +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_256_add_10(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + r[ 0] = a[ 0] + b[ 0]; + r[ 1] = a[ 1] + b[ 1]; + r[ 2] = a[ 2] + b[ 2]; + r[ 3] = a[ 3] + b[ 3]; + r[ 4] = a[ 4] + b[ 4]; + r[ 5] = a[ 5] + b[ 5]; + r[ 6] = a[ 6] + b[ 6]; + r[ 7] = a[ 7] + b[ 7]; + r[ 8] = a[ 8] + b[ 8]; + r[ 9] = a[ 9] + b[ 9]; + + return 0; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Add two Montgomery form numbers (r = a + b % m). + * + * r Result of addition. + * a First number to add in Montogmery form. + * b Second number to add in Montogmery form. + * m Modulus (prime). + */ +static void sp_256_mont_add_10(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m) +{ + (void)sp_256_add_10(r, a, b); + sp_256_norm_10(r); + sp_256_cond_sub_10(r, r, m, 0 - (((r[9] >> 22) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_256_norm_10(r); +} + +/* Double a Montgomery form number (r = a + a % m). + * + * r Result of doubling. + * a Number to double in Montogmery form. + * m Modulus (prime). + */ +static void sp_256_mont_dbl_10(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + (void)sp_256_add_10(r, a, a); + sp_256_norm_10(r); + sp_256_cond_sub_10(r, r, m, 0 - (((r[9] >> 22) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_256_norm_10(r); +} + +/* Triple a Montgomery form number (r = a + a + a % m). + * + * r Result of Tripling. + * a Number to triple in Montogmery form. + * m Modulus (prime). + */ +static void sp_256_mont_tpl_10(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + (void)sp_256_add_10(r, a, a); + sp_256_norm_10(r); + sp_256_cond_sub_10(r, r, m, 0 - (((r[9] >> 22) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_256_norm_10(r); + (void)sp_256_add_10(r, r, a); + sp_256_norm_10(r); + sp_256_cond_sub_10(r, r, m, 0 - (((r[9] >> 22) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_256_norm_10(r); +} + +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_256_sub_10(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 10; i++) { + r[i] = a[i] - b[i]; + } + + return 0; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_256_sub_10(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + r[ 0] = a[ 0] - b[ 0]; + r[ 1] = a[ 1] - b[ 1]; + r[ 2] = a[ 2] - b[ 2]; + r[ 3] = a[ 3] - b[ 3]; + r[ 4] = a[ 4] - b[ 4]; + r[ 5] = a[ 5] - b[ 5]; + r[ 6] = a[ 6] - b[ 6]; + r[ 7] = a[ 7] - b[ 7]; + r[ 8] = a[ 8] - b[ 8]; + r[ 9] = a[ 9] - b[ 9]; + + return 0; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static void sp_256_cond_add_10(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 10; i++) { + r[i] = a[i] + (b[i] & m); + } +#else + r[ 0] = a[ 0] + (b[ 0] & m); + r[ 1] = a[ 1] + (b[ 1] & m); + r[ 2] = a[ 2] + (b[ 2] & m); + r[ 3] = a[ 3] + (b[ 3] & m); + r[ 4] = a[ 4] + (b[ 4] & m); + r[ 5] = a[ 5] + (b[ 5] & m); + r[ 6] = a[ 6] + (b[ 6] & m); + r[ 7] = a[ 7] + (b[ 7] & m); + r[ 8] = a[ 8] + (b[ 8] & m); + r[ 9] = a[ 9] + (b[ 9] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Subtract two Montgomery form numbers (r = a - b % m). + * + * r Result of subtration. + * a Number to subtract from in Montogmery form. + * b Number to subtract with in Montogmery form. + * m Modulus (prime). + */ +static void sp_256_mont_sub_10(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m) +{ + (void)sp_256_sub_10(r, a, b); + sp_256_cond_add_10(r, r, m, r[9] >> 22); + sp_256_norm_10(r); +} + +/* Shift number left one bit. + * Bottom bit is lost. + * + * r Result of shift. + * a Number to shift. + */ +SP_NOINLINE static void sp_256_rshift1_10(sp_digit* r, sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<9; i++) { + r[i] = ((a[i] >> 1) | (a[i + 1] << 25)) & 0x3ffffff; + } +#else + r[0] = ((a[0] >> 1) | (a[1] << 25)) & 0x3ffffff; + r[1] = ((a[1] >> 1) | (a[2] << 25)) & 0x3ffffff; + r[2] = ((a[2] >> 1) | (a[3] << 25)) & 0x3ffffff; + r[3] = ((a[3] >> 1) | (a[4] << 25)) & 0x3ffffff; + r[4] = ((a[4] >> 1) | (a[5] << 25)) & 0x3ffffff; + r[5] = ((a[5] >> 1) | (a[6] << 25)) & 0x3ffffff; + r[6] = ((a[6] >> 1) | (a[7] << 25)) & 0x3ffffff; + r[7] = ((a[7] >> 1) | (a[8] << 25)) & 0x3ffffff; + r[8] = ((a[8] >> 1) | (a[9] << 25)) & 0x3ffffff; +#endif + r[9] = a[9] >> 1; +} + +/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) + * + * r Result of division by 2. + * a Number to divide. + * m Modulus (prime). + */ +static void sp_256_div2_10(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + sp_256_cond_add_10(r, a, m, 0 - (a[0] & 1)); + sp_256_norm_10(r); + sp_256_rshift1_10(r, r); +} + +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static void sp_256_proj_point_dbl_10(sp_point_256* r, const sp_point_256* p, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*10; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_256_mont_sqr_10(t1, p->z, p256_mod, p256_mp_mod); + /* Z = Y * Z */ + sp_256_mont_mul_10(z, p->y, p->z, p256_mod, p256_mp_mod); + /* Z = 2Z */ + sp_256_mont_dbl_10(z, z, p256_mod); + /* T2 = X - T1 */ + sp_256_mont_sub_10(t2, p->x, t1, p256_mod); + /* T1 = X + T1 */ + sp_256_mont_add_10(t1, p->x, t1, p256_mod); + /* T2 = T1 * T2 */ + sp_256_mont_mul_10(t2, t1, t2, p256_mod, p256_mp_mod); + /* T1 = 3T2 */ + sp_256_mont_tpl_10(t1, t2, p256_mod); + /* Y = 2Y */ + sp_256_mont_dbl_10(y, p->y, p256_mod); + /* Y = Y * Y */ + sp_256_mont_sqr_10(y, y, p256_mod, p256_mp_mod); + /* T2 = Y * Y */ + sp_256_mont_sqr_10(t2, y, p256_mod, p256_mp_mod); + /* T2 = T2/2 */ + sp_256_div2_10(t2, t2, p256_mod); + /* Y = Y * X */ + sp_256_mont_mul_10(y, y, p->x, p256_mod, p256_mp_mod); + /* X = T1 * T1 */ + sp_256_mont_sqr_10(x, t1, p256_mod, p256_mp_mod); + /* X = X - Y */ + sp_256_mont_sub_10(x, x, y, p256_mod); + /* X = X - Y */ + sp_256_mont_sub_10(x, x, y, p256_mod); + /* Y = Y - X */ + sp_256_mont_sub_10(y, y, x, p256_mod); + /* Y = Y * T1 */ + sp_256_mont_mul_10(y, y, t1, p256_mod, p256_mp_mod); + /* Y = Y - T2 */ + sp_256_mont_sub_10(y, y, t2, p256_mod); +} + +/* Compare two numbers to determine if they are equal. + * Constant time implementation. + * + * a First number to compare. + * b Second number to compare. + * returns 1 when equal and 0 otherwise. + */ +static int sp_256_cmp_equal_10(const sp_digit* a, const sp_digit* b) +{ + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) | + (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6]) | (a[7] ^ b[7]) | + (a[8] ^ b[8]) | (a[9] ^ b[9])) == 0; +} + +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_256_proj_point_add_10(sp_point_256* r, const sp_point_256* p, const sp_point_256* q, + sp_digit* t) +{ + const sp_point_256* ap[2]; + sp_point_256* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*10; + sp_digit* t3 = t + 4*10; + sp_digit* t4 = t + 6*10; + sp_digit* t5 = t + 8*10; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* Ensure only the first point is the same as the result. */ + if (q == r) { + const sp_point_256* a = p; + p = q; + q = a; + } + + /* Check double */ + (void)sp_256_sub_10(t1, p256_mod, q->y); + sp_256_norm_10(t1); + if ((sp_256_cmp_equal_10(p->x, q->x) & sp_256_cmp_equal_10(p->z, q->z) & + (sp_256_cmp_equal_10(p->y, q->y) | sp_256_cmp_equal_10(p->y, t1))) != 0) { + sp_256_proj_point_dbl_10(r, p, t); + } + else { + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point_256)); + x = rp[p->infinity | q->infinity]->x; + y = rp[p->infinity | q->infinity]->y; + z = rp[p->infinity | q->infinity]->z; + + ap[0] = p; + ap[1] = q; + for (i=0; i<10; i++) { + r->x[i] = ap[p->infinity]->x[i]; + } + for (i=0; i<10; i++) { + r->y[i] = ap[p->infinity]->y[i]; + } + for (i=0; i<10; i++) { + r->z[i] = ap[p->infinity]->z[i]; + } + r->infinity = ap[p->infinity]->infinity; + + /* U1 = X1*Z2^2 */ + sp_256_mont_sqr_10(t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(t3, t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(t1, t1, x, p256_mod, p256_mp_mod); + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_10(t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(t4, t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_10(t3, t3, y, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_10(t4, t4, q->y, p256_mod, p256_mp_mod); + /* H = U2 - U1 */ + sp_256_mont_sub_10(t2, t2, t1, p256_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_10(t4, t4, t3, p256_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_10(z, z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(z, z, t2, p256_mod, p256_mp_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_10(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sqr_10(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(t5, t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_sub_10(x, x, t5, p256_mod); + sp_256_mont_dbl_10(t1, y, p256_mod); + sp_256_mont_sub_10(x, x, t1, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_10(y, y, x, p256_mod); + sp_256_mont_mul_10(y, y, t4, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(t5, t5, t3, p256_mod, p256_mp_mod); + sp_256_mont_sub_10(y, y, t5, p256_mod); + } +} + +#ifdef WOLFSSL_SP_SMALL +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_10(sp_point_256* r, const sp_point_256* g, const sp_digit* k, + int map, void* heap) +{ +#ifdef WOLFSSL_SP_NO_MALLOC + sp_point_256 t[3]; + sp_digit tmp[2 * 10 * 5]; +#else + sp_point_256* t; + sp_digit* tmp; +#endif + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + + (void)heap; + +#ifndef WOLFSSL_SP_NO_MALLOC + t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 3, heap, DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; +#endif + + if (err == MP_OKAY) { + XMEMSET(t, 0, sizeof(sp_point_256) * 3); + + /* t[0] = {0, 0, 1} * norm */ + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + err = sp_256_mod_mul_norm_10(t[1].x, g->x, p256_mod); + } + if (err == MP_OKAY) + err = sp_256_mod_mul_norm_10(t[1].y, g->y, p256_mod); + if (err == MP_OKAY) + err = sp_256_mod_mul_norm_10(t[1].z, g->z, p256_mod); + + if (err == MP_OKAY) { + i = 9; + c = 22; + n = k[i--] << (26 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) + break; + + n = k[i--]; + c = 26; + } + + y = (n >> 25) & 1; + n <<= 1; + + sp_256_proj_point_add_10(&t[y^1], &t[0], &t[1], tmp); + + XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) + + ((size_t)&t[1] & addr_mask[y])), + sizeof(sp_point_256)); + sp_256_proj_point_dbl_10(&t[2], &t[2], tmp); + XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) + + ((size_t)&t[1] & addr_mask[y])), &t[2], + sizeof(sp_point_256)); + } + + if (map != 0) { + sp_256_map_10(r, &t[0], tmp); + } + else { + XMEMCPY(r, &t[0], sizeof(sp_point_256)); + } + } + +#ifndef WOLFSSL_SP_NO_MALLOC + if (tmp != NULL) { + XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 10 * 5); + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_point_256) * 3); + XFREE(t, NULL, DYNAMIC_TYPE_ECC); + } +#else + ForceZero(tmp, sizeof(tmp)); + ForceZero(t, sizeof(t)); +#endif + + return err; +} + +#elif defined(WOLFSSL_SP_CACHE_RESISTANT) +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_10(sp_point_256* r, const sp_point_256* g, const sp_digit* k, + int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 t[3]; + sp_digit tmp[2 * 10 * 5]; +#else + sp_point_256* t; + sp_digit* tmp; +#endif + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + + (void)heap; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_point*)XMALLOC(sizeof(*t) * 3, heap, DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; +#endif + + if (err == MP_OKAY) { + /* t[0] = {0, 0, 1} * norm */ + XMEMSET(&t[0], 0, sizeof(t[0])); + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + t[1].infinity = 0; + err = sp_256_mod_mul_norm_10(t[1].x, g->x, p256_mod); + } + if (err == MP_OKAY) + err = sp_256_mod_mul_norm_10(t[1].y, g->y, p256_mod); + if (err == MP_OKAY) + err = sp_256_mod_mul_norm_10(t[1].z, g->z, p256_mod); + + if (err == MP_OKAY) { + i = 9; + c = 22; + n = k[i--] << (26 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) + break; + + n = k[i--]; + c = 26; + } + + y = (n >> 25) & 1; + n <<= 1; + + sp_256_proj_point_add_10(&t[y^1], &t[0], &t[1], tmp); + + XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) + + ((size_t)&t[1] & addr_mask[y])), sizeof(t[2])); + sp_256_proj_point_dbl_10(&t[2], &t[2], tmp); + XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) + + ((size_t)&t[1] & addr_mask[y])), &t[2], sizeof(t[2])); + } + + if (map != 0) { + sp_256_map_10(r, &t[0], tmp); + } + else { + XMEMCPY(r, &t[0], sizeof(sp_point_256)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 10 * 5); + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); + } + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_point_256) * 3); + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#else + ForceZero(tmp, sizeof(tmp)); + ForceZero(t, sizeof(t)); +#endif + + return err; +} + +#else +/* A table entry for pre-computed points. */ +typedef struct sp_table_entry_256 { + sp_digit x[10]; + sp_digit y[10]; +} sp_table_entry_256; + +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_fast_10(sp_point_256* r, const sp_point_256* g, const sp_digit* k, + int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 td[16]; + sp_point_256 rtd; + sp_digit tmpd[2 * 10 * 5]; +#endif + sp_point_256* t; + sp_point_256* rt; + sp_digit* tmp; + sp_digit n; + int i; + int c, y; + int err; + + (void)heap; + + err = sp_256_point_new_10(heap, rtd, rt); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 16, heap, DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; +#else + t = td; + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + /* t[0] = {0, 0, 1} * norm */ + XMEMSET(&t[0], 0, sizeof(t[0])); + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + (void)sp_256_mod_mul_norm_10(t[1].x, g->x, p256_mod); + (void)sp_256_mod_mul_norm_10(t[1].y, g->y, p256_mod); + (void)sp_256_mod_mul_norm_10(t[1].z, g->z, p256_mod); + t[1].infinity = 0; + sp_256_proj_point_dbl_10(&t[ 2], &t[ 1], tmp); + t[ 2].infinity = 0; + sp_256_proj_point_add_10(&t[ 3], &t[ 2], &t[ 1], tmp); + t[ 3].infinity = 0; + sp_256_proj_point_dbl_10(&t[ 4], &t[ 2], tmp); + t[ 4].infinity = 0; + sp_256_proj_point_add_10(&t[ 5], &t[ 3], &t[ 2], tmp); + t[ 5].infinity = 0; + sp_256_proj_point_dbl_10(&t[ 6], &t[ 3], tmp); + t[ 6].infinity = 0; + sp_256_proj_point_add_10(&t[ 7], &t[ 4], &t[ 3], tmp); + t[ 7].infinity = 0; + sp_256_proj_point_dbl_10(&t[ 8], &t[ 4], tmp); + t[ 8].infinity = 0; + sp_256_proj_point_add_10(&t[ 9], &t[ 5], &t[ 4], tmp); + t[ 9].infinity = 0; + sp_256_proj_point_dbl_10(&t[10], &t[ 5], tmp); + t[10].infinity = 0; + sp_256_proj_point_add_10(&t[11], &t[ 6], &t[ 5], tmp); + t[11].infinity = 0; + sp_256_proj_point_dbl_10(&t[12], &t[ 6], tmp); + t[12].infinity = 0; + sp_256_proj_point_add_10(&t[13], &t[ 7], &t[ 6], tmp); + t[13].infinity = 0; + sp_256_proj_point_dbl_10(&t[14], &t[ 7], tmp); + t[14].infinity = 0; + sp_256_proj_point_add_10(&t[15], &t[ 8], &t[ 7], tmp); + t[15].infinity = 0; + + i = 8; + n = k[i+1] << 6; + c = 18; + y = n >> 24; + XMEMCPY(rt, &t[y], sizeof(sp_point_256)); + n <<= 8; + for (; i>=0 || c>=4; ) { + if (c < 4) { + n |= k[i--] << (6 - c); + c += 26; + } + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + + sp_256_proj_point_dbl_10(rt, rt, tmp); + sp_256_proj_point_dbl_10(rt, rt, tmp); + sp_256_proj_point_dbl_10(rt, rt, tmp); + sp_256_proj_point_dbl_10(rt, rt, tmp); + + sp_256_proj_point_add_10(rt, rt, &t[y], tmp); + } + + if (map != 0) { + sp_256_map_10(r, rt, tmp); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_256)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 10 * 5); + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); + } + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_point_256) * 16); + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#else + ForceZero(tmpd, sizeof(tmpd)); + ForceZero(td, sizeof(td)); +#endif + sp_256_point_free_10(rt, 1, heap); + + return err; +} + +#ifdef FP_ECC +/* Double the Montgomery form projective point p a number of times. + * + * r Result of repeated doubling of point. + * p Point to double. + * n Number of times to double + * t Temporary ordinate data. + */ +static void sp_256_proj_point_dbl_n_10(sp_point_256* p, int n, sp_digit* t) +{ + sp_digit* w = t; + sp_digit* a = t + 2*10; + sp_digit* b = t + 4*10; + sp_digit* t1 = t + 6*10; + sp_digit* t2 = t + 8*10; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = p->x; + y = p->y; + z = p->z; + + /* Y = 2*Y */ + sp_256_mont_dbl_10(y, y, p256_mod); + /* W = Z^4 */ + sp_256_mont_sqr_10(w, z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_10(w, w, p256_mod, p256_mp_mod); + +#ifndef WOLFSSL_SP_SMALL + while (--n > 0) +#else + while (--n >= 0) +#endif + { + /* A = 3*(X^2 - W) */ + sp_256_mont_sqr_10(t1, x, p256_mod, p256_mp_mod); + sp_256_mont_sub_10(t1, t1, w, p256_mod); + sp_256_mont_tpl_10(a, t1, p256_mod); + /* B = X*Y^2 */ + sp_256_mont_sqr_10(t1, y, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(b, t1, x, p256_mod, p256_mp_mod); + /* X = A^2 - 2B */ + sp_256_mont_sqr_10(x, a, p256_mod, p256_mp_mod); + sp_256_mont_dbl_10(t2, b, p256_mod); + sp_256_mont_sub_10(x, x, t2, p256_mod); + /* Z = Z*Y */ + sp_256_mont_mul_10(z, z, y, p256_mod, p256_mp_mod); + /* t2 = Y^4 */ + sp_256_mont_sqr_10(t1, t1, p256_mod, p256_mp_mod); +#ifdef WOLFSSL_SP_SMALL + if (n != 0) +#endif + { + /* W = W*Y^4 */ + sp_256_mont_mul_10(w, w, t1, p256_mod, p256_mp_mod); + } + /* y = 2*A*(B - X) - Y^4 */ + sp_256_mont_sub_10(y, b, x, p256_mod); + sp_256_mont_mul_10(y, y, a, p256_mod, p256_mp_mod); + sp_256_mont_dbl_10(y, y, p256_mod); + sp_256_mont_sub_10(y, y, t1, p256_mod); + } +#ifndef WOLFSSL_SP_SMALL + /* A = 3*(X^2 - W) */ + sp_256_mont_sqr_10(t1, x, p256_mod, p256_mp_mod); + sp_256_mont_sub_10(t1, t1, w, p256_mod); + sp_256_mont_tpl_10(a, t1, p256_mod); + /* B = X*Y^2 */ + sp_256_mont_sqr_10(t1, y, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(b, t1, x, p256_mod, p256_mp_mod); + /* X = A^2 - 2B */ + sp_256_mont_sqr_10(x, a, p256_mod, p256_mp_mod); + sp_256_mont_dbl_10(t2, b, p256_mod); + sp_256_mont_sub_10(x, x, t2, p256_mod); + /* Z = Z*Y */ + sp_256_mont_mul_10(z, z, y, p256_mod, p256_mp_mod); + /* t2 = Y^4 */ + sp_256_mont_sqr_10(t1, t1, p256_mod, p256_mp_mod); + /* y = 2*A*(B - X) - Y^4 */ + sp_256_mont_sub_10(y, b, x, p256_mod); + sp_256_mont_mul_10(y, y, a, p256_mod, p256_mp_mod); + sp_256_mont_dbl_10(y, y, p256_mod); + sp_256_mont_sub_10(y, y, t1, p256_mod); +#endif + /* Y = Y/2 */ + sp_256_div2_10(y, y, p256_mod); +} + +#endif /* FP_ECC */ +/* Add two Montgomery form projective points. The second point has a q value of + * one. + * Only the first point can be the same pointer as the result point. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_256_proj_point_add_qz1_10(sp_point_256* r, const sp_point_256* p, + const sp_point_256* q, sp_digit* t) +{ + const sp_point_256* ap[2]; + sp_point_256* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*10; + sp_digit* t3 = t + 4*10; + sp_digit* t4 = t + 6*10; + sp_digit* t5 = t + 8*10; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* Check double */ + (void)sp_256_sub_10(t1, p256_mod, q->y); + sp_256_norm_10(t1); + if ((sp_256_cmp_equal_10(p->x, q->x) & sp_256_cmp_equal_10(p->z, q->z) & + (sp_256_cmp_equal_10(p->y, q->y) | sp_256_cmp_equal_10(p->y, t1))) != 0) { + sp_256_proj_point_dbl_10(r, p, t); + } + else { + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point_256)); + x = rp[p->infinity | q->infinity]->x; + y = rp[p->infinity | q->infinity]->y; + z = rp[p->infinity | q->infinity]->z; + + ap[0] = p; + ap[1] = q; + for (i=0; i<10; i++) { + r->x[i] = ap[p->infinity]->x[i]; + } + for (i=0; i<10; i++) { + r->y[i] = ap[p->infinity]->y[i]; + } + for (i=0; i<10; i++) { + r->z[i] = ap[p->infinity]->z[i]; + } + r->infinity = ap[p->infinity]->infinity; + + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_10(t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(t4, t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_10(t4, t4, q->y, p256_mod, p256_mp_mod); + /* H = U2 - X1 */ + sp_256_mont_sub_10(t2, t2, x, p256_mod); + /* R = S2 - Y1 */ + sp_256_mont_sub_10(t4, t4, y, p256_mod); + /* Z3 = H*Z1 */ + sp_256_mont_mul_10(z, z, t2, p256_mod, p256_mp_mod); + /* X3 = R^2 - H^3 - 2*X1*H^2 */ + sp_256_mont_sqr_10(t1, t4, p256_mod, p256_mp_mod); + sp_256_mont_sqr_10(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(t3, x, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(t5, t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_sub_10(x, t1, t5, p256_mod); + sp_256_mont_dbl_10(t1, t3, p256_mod); + sp_256_mont_sub_10(x, x, t1, p256_mod); + /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */ + sp_256_mont_sub_10(t3, t3, x, p256_mod); + sp_256_mont_mul_10(t3, t3, t4, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(t5, t5, y, p256_mod, p256_mp_mod); + sp_256_mont_sub_10(y, t3, t5, p256_mod); + } +} + +#ifdef FP_ECC +/* Convert the projective point to affine. + * Ordinates are in Montgomery form. + * + * a Point to convert. + * t Temporary data. + */ +static void sp_256_proj_to_affine_10(sp_point_256* a, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2 * 10; + sp_digit* tmp = t + 4 * 10; + + sp_256_mont_inv_10(t1, a->z, tmp); + + sp_256_mont_sqr_10(t2, t1, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(t1, t2, t1, p256_mod, p256_mp_mod); + + sp_256_mont_mul_10(a->x, a->x, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(a->y, a->y, t1, p256_mod, p256_mp_mod); + XMEMCPY(a->z, p256_norm_mod, sizeof(p256_norm_mod)); +} + +/* Generate the pre-computed table of points for the base point. + * + * a The base point. + * table Place to store generated point data. + * tmp Temporary data. + * heap Heap to use for allocation. + */ +static int sp_256_gen_stripe_table_10(const sp_point_256* a, + sp_table_entry_256* table, sp_digit* tmp, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 td, s1d, s2d; +#endif + sp_point_256* t; + sp_point_256* s1 = NULL; + sp_point_256* s2 = NULL; + int i, j; + int err; + + (void)heap; + + err = sp_256_point_new_10(heap, td, t); + if (err == MP_OKAY) { + err = sp_256_point_new_10(heap, s1d, s1); + } + if (err == MP_OKAY) { + err = sp_256_point_new_10(heap, s2d, s2); + } + + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_10(t->x, a->x, p256_mod); + } + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_10(t->y, a->y, p256_mod); + } + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_10(t->z, a->z, p256_mod); + } + if (err == MP_OKAY) { + t->infinity = 0; + sp_256_proj_to_affine_10(t, tmp); + + XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod)); + s1->infinity = 0; + XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod)); + s2->infinity = 0; + + /* table[0] = {0, 0, infinity} */ + XMEMSET(&table[0], 0, sizeof(sp_table_entry_256)); + /* table[1] = Affine version of 'a' in Montgomery form */ + XMEMCPY(table[1].x, t->x, sizeof(table->x)); + XMEMCPY(table[1].y, t->y, sizeof(table->y)); + + for (i=1; i<8; i++) { + sp_256_proj_point_dbl_n_10(t, 32, tmp); + sp_256_proj_to_affine_10(t, tmp); + XMEMCPY(table[1<x, sizeof(table->x)); + XMEMCPY(table[1<y, sizeof(table->y)); + } + + for (i=1; i<8; i++) { + XMEMCPY(s1->x, table[1<x)); + XMEMCPY(s1->y, table[1<y)); + for (j=(1<x, table[j-(1<x)); + XMEMCPY(s2->y, table[j-(1<y)); + sp_256_proj_point_add_qz1_10(t, s1, s2, tmp); + sp_256_proj_to_affine_10(t, tmp); + XMEMCPY(table[j].x, t->x, sizeof(table->x)); + XMEMCPY(table[j].y, t->y, sizeof(table->y)); + } + } + } + + sp_256_point_free_10(s2, 0, heap); + sp_256_point_free_10(s1, 0, heap); + sp_256_point_free_10( t, 0, heap); + + return err; +} + +#endif /* FP_ECC */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_stripe_10(sp_point_256* r, const sp_point_256* g, + const sp_table_entry_256* table, const sp_digit* k, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 rtd; + sp_point_256 pd; + sp_digit td[2 * 10 * 5]; +#endif + sp_point_256* rt; + sp_point_256* p = NULL; + sp_digit* t; + int i, j; + int y, x; + int err; + + (void)g; + (void)heap; + + + err = sp_256_point_new_10(heap, rtd, rt); + if (err == MP_OKAY) { + err = sp_256_point_new_10(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) { + err = MEMORY_E; + } +#else + t = td; +#endif + + if (err == MP_OKAY) { + XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod)); + XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod)); + + y = 0; + for (j=0,x=31; j<8; j++,x+=32) { + y |= ((k[x / 26] >> (x % 26)) & 1) << j; + } + XMEMCPY(rt->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(rt->y, table[y].y, sizeof(table[y].y)); + rt->infinity = !y; + for (i=30; i>=0; i--) { + y = 0; + for (j=0,x=i; j<8; j++,x+=32) { + y |= ((k[x / 26] >> (x % 26)) & 1) << j; + } + + sp_256_proj_point_dbl_10(rt, rt, t); + XMEMCPY(p->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(p->y, table[y].y, sizeof(table[y].y)); + p->infinity = !y; + sp_256_proj_point_add_qz1_10(rt, rt, p, t); + } + + if (map != 0) { + sp_256_map_10(r, rt, t); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_256)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_10(p, 0, heap); + sp_256_point_free_10(rt, 0, heap); + + return err; +} + +#ifdef FP_ECC +#ifndef FP_ENTRIES + #define FP_ENTRIES 16 +#endif + +typedef struct sp_cache_256_t { + sp_digit x[10]; + sp_digit y[10]; + sp_table_entry_256 table[256]; + uint32_t cnt; + int set; +} sp_cache_256_t; + +static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES]; +static THREAD_LS_T int sp_cache_256_last = -1; +static THREAD_LS_T int sp_cache_256_inited = 0; + +#ifndef HAVE_THREAD_LS + static volatile int initCacheMutex_256 = 0; + static wolfSSL_Mutex sp_cache_256_lock; +#endif + +static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache) +{ + int i, j; + uint32_t least; + + if (sp_cache_256_inited == 0) { + for (i=0; ix, sp_cache_256[i].x) & + sp_256_cmp_equal_10(g->y, sp_cache_256[i].y)) { + sp_cache_256[i].cnt++; + break; + } + } + + /* No match. */ + if (i == FP_ENTRIES) { + /* Find empty entry. */ + i = (sp_cache_256_last + 1) % FP_ENTRIES; + for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) { + if (!sp_cache_256[i].set) { + break; + } + } + + /* Evict least used. */ + if (i == sp_cache_256_last) { + least = sp_cache_256[0].cnt; + for (j=1; jx, sizeof(sp_cache_256[i].x)); + XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y)); + sp_cache_256[i].set = 1; + sp_cache_256[i].cnt = 1; + } + + *cache = &sp_cache_256[i]; + sp_cache_256_last = i; +} +#endif /* FP_ECC */ + +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_10(sp_point_256* r, const sp_point_256* g, const sp_digit* k, + int map, void* heap) +{ +#ifndef FP_ECC + return sp_256_ecc_mulmod_fast_10(r, g, k, map, heap); +#else + sp_digit tmp[2 * 10 * 5]; + sp_cache_256_t* cache; + int err = MP_OKAY; + +#ifndef HAVE_THREAD_LS + if (initCacheMutex_256 == 0) { + wc_InitMutex(&sp_cache_256_lock); + initCacheMutex_256 = 1; + } + if (wc_LockMutex(&sp_cache_256_lock) != 0) + err = BAD_MUTEX_E; +#endif /* HAVE_THREAD_LS */ + + if (err == MP_OKAY) { + sp_ecc_get_cache_256(g, &cache); + if (cache->cnt == 2) + sp_256_gen_stripe_table_10(g, cache->table, tmp, heap); + +#ifndef HAVE_THREAD_LS + wc_UnLockMutex(&sp_cache_256_lock); +#endif /* HAVE_THREAD_LS */ + + if (cache->cnt < 2) { + err = sp_256_ecc_mulmod_fast_10(r, g, k, map, heap); + } + else { + err = sp_256_ecc_mulmod_stripe_10(r, g, cache->table, k, + map, heap); + } + } + + return err; +#endif +} + +#endif +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * p Point to multiply. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map, + void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 p; + sp_digit kd[10]; +#endif + sp_point_256* point; + sp_digit* k = NULL; + int err = MP_OKAY; + + err = sp_256_point_new_10(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 10, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#else + k = kd; +#endif + if (err == MP_OKAY) { + sp_256_from_mp(k, 10, km); + sp_256_point_from_ecc_point_10(point, gm); + + err = sp_256_ecc_mulmod_10(point, point, k, map, heap); + } + if (err == MP_OKAY) { + err = sp_256_point_to_ecc_point_10(point, r); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_10(point, 0, heap); + + return err; +} + +#ifdef WOLFSSL_SP_SMALL +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_base_10(sp_point_256* r, const sp_digit* k, + int map, void* heap) +{ + /* No pre-computed values. */ + return sp_256_ecc_mulmod_10(r, &p256_base, k, map, heap); +} + +#elif defined(WOLFSSL_SP_CACHE_RESISTANT) +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_base_10(sp_point_256* r, const sp_digit* k, + int map, void* heap) +{ + /* No pre-computed values. */ + return sp_256_ecc_mulmod_10(r, &p256_base, k, map, heap); +} + +#else +static const sp_table_entry_256 p256_table[256] = { + /* 0 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 */ + { { 0x0a9143c,0x1cc3506,0x360179e,0x3f17fb6,0x075ba95,0x1d88944, + 0x3b732b7,0x15719e7,0x376a537,0x0062417 }, + { 0x295560a,0x094d5f3,0x245cddf,0x392e867,0x18b4ab8,0x3487cc9, + 0x288688d,0x176174b,0x3182588,0x0215c7f } }, + /* 2 */ + { { 0x147519a,0x2218090,0x32f0202,0x2b09acd,0x0d0981e,0x1e17af2, + 0x14a7caa,0x163a6a7,0x10ddbdf,0x03654f1 }, + { 0x1590f8f,0x0d8733f,0x09179d6,0x1ad139b,0x372e962,0x0bad933, + 0x1961102,0x223cdff,0x37e9eb2,0x0218fae } }, + /* 3 */ + { { 0x0db6485,0x1ad88d7,0x2f97785,0x288bc28,0x3808f0e,0x3df8c02, + 0x28d9544,0x20280f9,0x055b5ff,0x00001d8 }, + { 0x38d2010,0x13ae6e0,0x308a763,0x2ecc90d,0x254014f,0x10a9981, + 0x247d398,0x0fb8383,0x3613437,0x020c21d } }, + /* 4 */ + { { 0x2a0d2bb,0x08bf145,0x34994f9,0x1b06988,0x30d5cc1,0x1f18b22, + 0x01cf3a5,0x199fe49,0x161fd1b,0x00bd79a }, + { 0x1a01797,0x171c2fd,0x21925c1,0x1358255,0x23d20b4,0x1c7f6d4, + 0x111b370,0x03dec12,0x1168d6f,0x03d923e } }, + /* 5 */ + { { 0x137bbbc,0x19a11f8,0x0bec9e5,0x27a29a8,0x3e43446,0x275cd18, + 0x0427617,0x00056c7,0x285133d,0x016af80 }, + { 0x04c7dab,0x2a0df30,0x0c0792a,0x1310c98,0x3573d9f,0x239b30d, + 0x1315627,0x1ce0c32,0x25b6b6f,0x0252edc } }, + /* 6 */ + { { 0x20f141c,0x26d23dc,0x3c74bbf,0x334b7d6,0x06199b3,0x0441171, + 0x3f61294,0x313bf70,0x3cb2f7d,0x03375ae }, + { 0x2f436fd,0x19c02fa,0x26becca,0x1b6e64c,0x26f647f,0x053c948, + 0x0fa7920,0x397d830,0x2bd4bda,0x028d86f } }, + /* 7 */ + { { 0x17c13c7,0x2895616,0x03e128a,0x17d42df,0x1c38d63,0x0f02747, + 0x039aecf,0x0a4b01c,0x209c4b5,0x02e84b2 }, + { 0x1f91dfd,0x023e916,0x07fb9e4,0x19b3ba8,0x13af43b,0x35e02ca, + 0x0eb0899,0x3bd2c7b,0x19d701f,0x014faee } }, + /* 8 */ + { { 0x0e63d34,0x1fb8c6c,0x0fab4fe,0x1caa795,0x0f46005,0x179ed69, + 0x093334d,0x120c701,0x39206d5,0x021627e }, + { 0x183553a,0x03d7319,0x09e5aa7,0x12b8959,0x2087909,0x0011194, + 0x1045071,0x0713f32,0x16d0254,0x03aec1a } }, + /* 9 */ + { { 0x01647c5,0x1b2856b,0x1799461,0x11f133d,0x0b8127d,0x1937eeb, + 0x266aa37,0x1f68f71,0x0cbd1b2,0x03aca08 }, + { 0x287e008,0x1be361a,0x38f3940,0x276488d,0x2d87dfa,0x0333b2c, + 0x2d2e428,0x368755b,0x09b55a7,0x007ca0a } }, + /* 10 */ + { { 0x389da99,0x2a8300e,0x0022abb,0x27ae0a1,0x0a6f2d7,0x207017a, + 0x047862b,0x1358c9e,0x35905e5,0x00cde92 }, + { 0x1f7794a,0x1d40348,0x3f613c6,0x2ddf5b5,0x0207005,0x133f5ba, + 0x1a37810,0x3ef5829,0x0d5f4c2,0x0035978 } }, + /* 11 */ + { { 0x1275d38,0x026efad,0x2358d9d,0x1142f82,0x14268a7,0x1cfac99, + 0x362ff49,0x288cbc1,0x24252f4,0x0308f68 }, + { 0x394520c,0x06e13c2,0x178e5da,0x18ec16f,0x1096667,0x134a7a8, + 0x0dcb869,0x33fc4e9,0x38cc790,0x006778e } }, + /* 12 */ + { { 0x2c5fe04,0x29c5b09,0x1bdb183,0x02ceee8,0x03b28de,0x132dc4b, + 0x32c586a,0x32ff5d0,0x3d491fc,0x038d372 }, + { 0x2a58403,0x2351aea,0x3a53b40,0x21a0ba5,0x39a6974,0x1aaaa2b, + 0x3901273,0x03dfe78,0x3447b4e,0x039d907 } }, + /* 13 */ + { { 0x364ba59,0x14e5077,0x02fc7d7,0x3b02c09,0x1d33f10,0x0560616, + 0x06dfc6a,0x15efd3c,0x357052a,0x01284b7 }, + { 0x039dbd0,0x18ce3e5,0x3e1fbfa,0x352f794,0x0d3c24b,0x07c6cc5, + 0x1e4ffa2,0x3a91bf5,0x293bb5b,0x01abd6a } }, + /* 14 */ + { { 0x0c91999,0x02da644,0x0491da1,0x100a960,0x00a24b4,0x2330824, + 0x0094b4b,0x1004cf8,0x35a66a4,0x017f8d1 }, + { 0x13e7b4b,0x232af7e,0x391ab0f,0x069f08f,0x3292b50,0x3479898, + 0x2889aec,0x2a4590b,0x308ecfe,0x02d5138 } }, + /* 15 */ + { { 0x2ddfdce,0x231ba45,0x39e6647,0x19be245,0x12c3291,0x35399f8, + 0x0d6e764,0x3082d3a,0x2bda6b0,0x0382dac }, + { 0x37efb57,0x04b7cae,0x00070d3,0x379e431,0x01aac0d,0x1e6f251, + 0x0336ad6,0x0ddd3e4,0x3de25a6,0x01c7008 } }, + /* 16 */ + { { 0x3e20925,0x230912f,0x286762a,0x30e3f73,0x391c19a,0x34e1c18, + 0x16a5d5d,0x093d96a,0x3d421d3,0x0187561 }, + { 0x37173ea,0x19ce8a8,0x0b65e87,0x0214dde,0x2238480,0x16ead0f, + 0x38441e0,0x3bef843,0x2124621,0x03e847f } }, + /* 17 */ + { { 0x0b19ffd,0x247cacb,0x3c231c8,0x16ec648,0x201ba8d,0x2b172a3, + 0x103d678,0x2fb72db,0x04c1f13,0x0161bac }, + { 0x3e8ed09,0x171b949,0x2de20c3,0x0f06067,0x21e81a3,0x1b194be, + 0x0fd6c05,0x13c449e,0x0087086,0x006756b } }, + /* 18 */ + { { 0x09a4e1f,0x27d604c,0x00741e9,0x06fa49c,0x0ab7de7,0x3f4a348, + 0x25ef0be,0x158fc9a,0x33f7f9c,0x039f001 }, + { 0x2f59f76,0x3598e83,0x30501f6,0x15083f2,0x0669b3b,0x29980b5, + 0x0c1f7a7,0x0f02b02,0x0fec65b,0x0382141 } }, + /* 19 */ + { { 0x031b3ca,0x23da368,0x2d66f09,0x27b9b69,0x06d1cab,0x13c91ba, + 0x3d81fa9,0x25ad16f,0x0825b09,0x01e3c06 }, + { 0x225787f,0x3bf790e,0x2c9bb7e,0x0347732,0x28016f8,0x0d6ff0d, + 0x2a4877b,0x1d1e833,0x3b87e94,0x010e9dc } }, + /* 20 */ + { { 0x2b533d5,0x1ddcd34,0x1dc0625,0x3da86f7,0x3673b8a,0x1e7b0a4, + 0x3e7c9aa,0x19ac55d,0x251c3b2,0x02edb79 }, + { 0x25259b3,0x24c0ead,0x3480e7e,0x34f40e9,0x3d6a0af,0x2cf3f09, + 0x2c83d19,0x2e66f16,0x19a5d18,0x0182d18 } }, + /* 21 */ + { { 0x2e5aa1c,0x28e3846,0x3658bd6,0x0ad279c,0x1b8b765,0x397e1fb, + 0x130014e,0x3ff342c,0x3b2aeeb,0x02743c9 }, + { 0x2730a55,0x0918c5e,0x083aca9,0x0bf76ef,0x19c955b,0x300669c, + 0x01dfe0a,0x312341f,0x26d356e,0x0091295 } }, + /* 22 */ + { { 0x2cf1f96,0x00e52ba,0x271c6db,0x2a40930,0x19f2122,0x0b2f4ee, + 0x26ac1b8,0x3bda498,0x0873581,0x0117963 }, + { 0x38f9dbc,0x3d1e768,0x2040d3f,0x11ba222,0x3a8aaf1,0x1b82fb5, + 0x1adfb24,0x2de9251,0x21cc1e4,0x0301038 } }, + /* 23 */ + { { 0x38117b6,0x2bc001b,0x1433847,0x3fdce8d,0x3651969,0x3651d7a, + 0x2b35761,0x1bb1d20,0x097682c,0x00737d7 }, + { 0x1f04839,0x1dd6d04,0x16987db,0x3d12378,0x17dbeac,0x1c2cc86, + 0x121dd1b,0x3fcf6ca,0x1f8a92d,0x00119d5 } }, + /* 24 */ + { { 0x0e8ffcd,0x2b174af,0x1a82cc8,0x22cbf98,0x30d53c4,0x080b5b1, + 0x3161727,0x297cfdb,0x2113b83,0x0011b97 }, + { 0x0007f01,0x23fd936,0x3183e7b,0x0496bd0,0x07fb1ef,0x178680f, + 0x1c5ea63,0x0016c11,0x2c3303d,0x01b8041 } }, + /* 25 */ + { { 0x0dd73b1,0x1cd6122,0x10d948c,0x23e657b,0x3767070,0x15a8aad, + 0x385ea8c,0x33c7ce0,0x0ede901,0x0110965 }, + { 0x2d4b65b,0x2a8b244,0x0c37f8f,0x0ee5b24,0x394c234,0x3a5e347, + 0x26e4a15,0x39a3b4c,0x2514c2e,0x029e5be } }, + /* 26 */ + { { 0x23addd7,0x3ed8120,0x13b3359,0x20f959a,0x09e2a61,0x32fcf20, + 0x05b78e3,0x19ba7e2,0x1a9c697,0x0392b4b }, + { 0x2048a61,0x3dfd0a3,0x19a0357,0x233024b,0x3082d19,0x00fb63b, + 0x3a1af4c,0x1450ff0,0x046c37b,0x0317a50 } }, + /* 27 */ + { { 0x3e75f9e,0x294e30a,0x3a78476,0x3a32c48,0x36fd1a9,0x0427012, + 0x1e4df0b,0x11d1f61,0x1afdb46,0x018ca0f }, + { 0x2f2df15,0x0a33dee,0x27f4ce7,0x1542b66,0x3e592c4,0x20d2f30, + 0x3226ade,0x2a4e3ea,0x1ab1981,0x01a2f46 } }, + /* 28 */ + { { 0x087d659,0x3ab5446,0x305ac08,0x3d2cd64,0x33374d5,0x3f9d3f8, + 0x186981c,0x37f5a5a,0x2f53c6f,0x01254a4 }, + { 0x2cec896,0x1e32786,0x04844a8,0x043b16d,0x3d964b2,0x1935829, + 0x16f7e26,0x1a0dd9a,0x30d2603,0x003b1d4 } }, + /* 29 */ + { { 0x12687bb,0x04e816b,0x21fa2da,0x1abccb8,0x3a1f83b,0x375181e, + 0x0f5ef51,0x0fc2ce4,0x3a66486,0x003d881 }, + { 0x3138233,0x1f8eec3,0x2718bd6,0x1b09caa,0x2dd66b9,0x1bb222b, + 0x1004072,0x1b73e3b,0x07208ed,0x03fc36c } }, + /* 30 */ + { { 0x095d553,0x3e84053,0x0a8a749,0x3f575a0,0x3a44052,0x3ced59b, + 0x3b4317f,0x03a8c60,0x13c8874,0x00c4ed4 }, + { 0x0d11549,0x0b8ab02,0x221cb40,0x02ed37b,0x2071ee1,0x1fc8c83, + 0x3987dd4,0x27e049a,0x0f986f1,0x00b4eaf } }, + /* 31 */ + { { 0x15581a2,0x2214060,0x11af4c2,0x1598c88,0x19a0a6d,0x32acba6, + 0x3a7a0f0,0x2337c66,0x210ded9,0x0300dbe }, + { 0x1fbd009,0x3822eb0,0x181629a,0x2401b45,0x30b68b1,0x2e78363, + 0x2b32779,0x006530b,0x2c4b6d4,0x029aca8 } }, + /* 32 */ + { { 0x13549cf,0x0f943db,0x265ed43,0x1bfeb35,0x06f3369,0x3847f2d, + 0x1bfdacc,0x26181a5,0x252af7c,0x02043b8 }, + { 0x159bb2c,0x143f85c,0x357b654,0x2f9d62c,0x2f7dfbe,0x1a7fa9c, + 0x057e74d,0x05d14ac,0x17a9273,0x035215c } }, + /* 33 */ + { { 0x0cb5a98,0x106a2bc,0x10bf117,0x24c7cc4,0x3d3da8f,0x2ce0ab7, + 0x14e2cba,0x1813866,0x1a72f9a,0x01a9811 }, + { 0x2b2411d,0x3034fe8,0x16e0170,0x0f9443a,0x0be0eb8,0x2196cf3, + 0x0c9f738,0x15e40ef,0x0faf9e1,0x034f917 } }, + /* 34 */ + { { 0x03f7669,0x3da6efa,0x3d6bce1,0x209ca1d,0x109f8ae,0x09109e3, + 0x08ae543,0x3067255,0x1dee3c2,0x0081dd5 }, + { 0x3ef1945,0x358765b,0x28c387b,0x3bec4b4,0x218813c,0x0b7d92a, + 0x3cd1d67,0x2c0367e,0x2e57154,0x0123717 } }, + /* 35 */ + { { 0x3e5a199,0x1e42ffd,0x0bb7123,0x33e6273,0x1e0efb8,0x294671e, + 0x3a2bfe0,0x3d11709,0x2eddff6,0x03cbec2 }, + { 0x0b5025f,0x0255d7c,0x1f2241c,0x35d03ea,0x0550543,0x202fef4, + 0x23c8ad3,0x354963e,0x015db28,0x0284fa4 } }, + /* 36 */ + { { 0x2b65cbc,0x1e8d428,0x0226f9f,0x1c8a919,0x10b04b9,0x08fc1e8, + 0x1ce241e,0x149bc99,0x2b01497,0x00afc35 }, + { 0x3216fb7,0x1374fd2,0x226ad3d,0x19fef76,0x0f7d7b8,0x1c21417, + 0x37b83f6,0x3a27eba,0x25a162f,0x010aa52 } }, + /* 37 */ + { { 0x2adf191,0x1ab42fa,0x28d7584,0x2409689,0x20f8a48,0x253707d, + 0x2030504,0x378f7a1,0x169c65e,0x00b0b76 }, + { 0x3849c17,0x085c764,0x10dd6d0,0x2e87689,0x1460488,0x30e9521, + 0x10c7063,0x1b6f120,0x21f42c5,0x03d0dfe } }, + /* 38 */ + { { 0x20f7dab,0x035c512,0x29ac6aa,0x24c5ddb,0x20f0497,0x17ce5e1, + 0x00a050f,0x1eaa14b,0x3335470,0x02abd16 }, + { 0x18d364a,0x0df0cf0,0x316585e,0x018f925,0x0d40b9b,0x17b1511, + 0x1716811,0x1caf3d0,0x10df4f2,0x0337d8c } }, + /* 39 */ + { { 0x2a8b7ef,0x0f188e3,0x2287747,0x06216f0,0x008e935,0x2f6a38d, + 0x1567722,0x0bfc906,0x0bada9e,0x03c3402 }, + { 0x014d3b1,0x099c749,0x2a76291,0x216c067,0x3b37549,0x14ef2f6, + 0x21b96d4,0x1ee2d71,0x2f5ca88,0x016f570 } }, + /* 40 */ + { { 0x09a3154,0x3d1a7bd,0x2e9aef0,0x255b8ac,0x03e85a5,0x2a492a7, + 0x2aec1ea,0x11c6516,0x3c8a09e,0x02a84b7 }, + { 0x1f69f1d,0x09c89d3,0x1e7326f,0x0b28bfd,0x0e0e4c8,0x1ea7751, + 0x18ce73b,0x2a406e7,0x273e48c,0x01b00db } }, + /* 41 */ + { { 0x36e3138,0x2b84a83,0x345a5cf,0x00096b4,0x16966ef,0x159caf1, + 0x13c64b4,0x2f89226,0x25896af,0x00a4bfd }, + { 0x2213402,0x1435117,0x09fed52,0x09d0e4b,0x0f6580e,0x2871cba, + 0x3b397fd,0x1c9d825,0x090311b,0x0191383 } }, + /* 42 */ + { { 0x07153f0,0x1087869,0x18c9e1e,0x1e64810,0x2b86c3b,0x0175d9c, + 0x3dce877,0x269de4e,0x393cab7,0x03c96b9 }, + { 0x1869d0c,0x06528db,0x02641f3,0x209261b,0x29d55c8,0x25ba517, + 0x3b5ea30,0x028f927,0x25313db,0x00e6e39 } }, + /* 43 */ + { { 0x2fd2e59,0x150802d,0x098f377,0x19a4957,0x135e2c0,0x38a95ce, + 0x1ab21a0,0x36c1b67,0x32f0f19,0x00e448b }, + { 0x3cad53c,0x3387800,0x17e3cfb,0x03f9970,0x3225b2c,0x2a84e1d, + 0x3af1d29,0x3fe35ca,0x2f8ce80,0x0237a02 } }, + /* 44 */ + { { 0x07bbb76,0x3aa3648,0x2758afb,0x1f085e0,0x1921c7e,0x3010dac, + 0x22b74b1,0x230137e,0x1062e36,0x021c652 }, + { 0x3993df5,0x24a2ee8,0x126ab5f,0x2d7cecf,0x0639d75,0x16d5414, + 0x1aa78a8,0x3f78404,0x26a5b74,0x03f0c57 } }, + /* 45 */ + { { 0x0d6ecfa,0x3f506ba,0x3f86561,0x3d86bb1,0x15f8c44,0x2491d07, + 0x052a7b4,0x2422261,0x3adee38,0x039b529 }, + { 0x193c75d,0x14bb451,0x1162605,0x293749c,0x370a70d,0x2e8b1f6, + 0x2ede937,0x2b95f4a,0x39a9be2,0x00d77eb } }, + /* 46 */ + { { 0x2736636,0x15bf36a,0x2b7e6b9,0x25eb8b2,0x209f51d,0x3cd2659, + 0x10bf410,0x034afec,0x3d71c83,0x0076971 }, + { 0x0ce6825,0x07920cf,0x3c3b5c4,0x23fe55c,0x015ad11,0x08c0dae, + 0x0552c7f,0x2e75a8a,0x0fddbf4,0x01c1df0 } }, + /* 47 */ + { { 0x2b9661c,0x0ffe351,0x3d71bf6,0x1ac34b3,0x3a1dfd3,0x211fe3d, + 0x33e140a,0x3f9100d,0x32ee50e,0x014ea18 }, + { 0x16d8051,0x1bfda1a,0x068a097,0x2571d3d,0x1daec0c,0x39389af, + 0x194dc35,0x3f3058a,0x36d34e1,0x000a329 } }, + /* 48 */ + { { 0x09877ee,0x351f73f,0x0002d11,0x0420074,0x2c8b362,0x130982d, + 0x02c1175,0x3c11b40,0x0d86962,0x001305f }, + { 0x0daddf5,0x2f4252c,0x15c06d9,0x1d49339,0x1bea235,0x0b680ed, + 0x3356e67,0x1d1d198,0x1e9fed9,0x03dee93 } }, + /* 49 */ + { { 0x3e1263f,0x2fe8d3a,0x3ce6d0d,0x0d5c6b9,0x3557637,0x0a9bd48, + 0x0405538,0x0710749,0x2005213,0x038c7e5 }, + { 0x26b6ec6,0x2e485ba,0x3c44d1b,0x0b9cf0b,0x037a1d1,0x27428a5, + 0x0e7eac8,0x351ef04,0x259ce34,0x02a8e98 } }, + /* 50 */ + { { 0x2f3dcd3,0x3e77d4d,0x3360fbc,0x1434afd,0x36ceded,0x3d413d6, + 0x1710fad,0x36bb924,0x1627e79,0x008e637 }, + { 0x109569e,0x1c168db,0x3769cf4,0x2ed4527,0x0ea0619,0x17d80d3, + 0x1c03773,0x18843fe,0x1b21c04,0x015c5fd } }, + /* 51 */ + { { 0x1dd895e,0x08a7248,0x04519fe,0x001030a,0x18e5185,0x358dfb3, + 0x13d2391,0x0a37be8,0x0560e3c,0x019828b }, + { 0x27fcbd0,0x2a22bb5,0x30969cc,0x1e03aa7,0x1c84724,0x0ba4ad3, + 0x32f4817,0x0914cca,0x14c4f52,0x01893b9 } }, + /* 52 */ + { { 0x097eccc,0x1273936,0x00aa095,0x364fe62,0x04d49d1,0x10e9f08, + 0x3c24230,0x3ef01c8,0x2fb92bd,0x013ce4a }, + { 0x1e44fd9,0x27e3e9f,0x2156696,0x3915ecc,0x0b66cfb,0x1a3af0f, + 0x2fa8033,0x0e6736c,0x177ccdb,0x0228f9e } }, + /* 53 */ + { { 0x2c4b125,0x06207c1,0x0a8cdde,0x003db8f,0x1ae34e3,0x31e84fa, + 0x2999de5,0x11013bd,0x02370c2,0x00e2234 }, + { 0x0f91081,0x200d591,0x1504762,0x1857c05,0x23d9fcf,0x0cb34db, + 0x27edc86,0x08cd860,0x2471810,0x029798b } }, + /* 54 */ + { { 0x3acd6c8,0x097b8cb,0x3c661a8,0x15152f2,0x1699c63,0x237e64c, + 0x23edf79,0x16b7033,0x0e6466a,0x00b11da }, + { 0x0a64bc9,0x1bfe324,0x1f5cb34,0x08391de,0x0630a60,0x3017a21, + 0x09d064b,0x14a8365,0x041f9e6,0x01ed799 } }, + /* 55 */ + { { 0x128444a,0x2508b07,0x2a39216,0x362f84d,0x2e996c5,0x2c31ff3, + 0x07afe5f,0x1d1288e,0x3cb0c8d,0x02e2bdc }, + { 0x38b86fd,0x3a0ea8c,0x1cff5fd,0x1629629,0x3fee3f1,0x02b250c, + 0x2e8f6f2,0x0225727,0x15f7f3f,0x0280d8e } }, + /* 56 */ + { { 0x10f7770,0x0f1aee8,0x0e248c7,0x20684a8,0x3a6f16d,0x06f0ae7, + 0x0df6825,0x2d4cc40,0x301875f,0x012f8da }, + { 0x3b56dbb,0x1821ba7,0x24f8922,0x22c1f9e,0x0306fef,0x1b54bc8, + 0x2ccc056,0x00303ba,0x2871bdc,0x0232f26 } }, + /* 57 */ + { { 0x0dac4ab,0x0625730,0x3112e13,0x101c4bf,0x3a874a4,0x2873b95, + 0x32ae7c6,0x0d7e18c,0x13e0c08,0x01139d5 }, + { 0x334002d,0x00fffdd,0x025c6d5,0x22c2cd1,0x19d35cb,0x3a1ce2d, + 0x3702760,0x3f06257,0x03a5eb8,0x011c29a } }, + /* 58 */ + { { 0x0513482,0x1d87724,0x276a81b,0x0a807a4,0x3028720,0x339cc20, + 0x2441ee0,0x31bbf36,0x290c63d,0x0059041 }, + { 0x106a2ed,0x0d2819b,0x100bf50,0x114626c,0x1dd4d77,0x2e08632, + 0x14ae72a,0x2ed3f64,0x1fd7abc,0x035cd1e } }, + /* 59 */ + { { 0x2d4c6e5,0x3bec596,0x104d7ed,0x23d6c1b,0x0262cf0,0x15d72c5, + 0x2d5bb18,0x199ac4b,0x1e30771,0x020591a }, + { 0x21e291e,0x2e75e55,0x1661d7a,0x08b0778,0x3eb9daf,0x0d78144, + 0x1827eb1,0x0fe73d2,0x123f0dd,0x0028db7 } }, + /* 60 */ + { { 0x1d5533c,0x34cb1d0,0x228f098,0x27a1a11,0x17c5f5a,0x0d26f44, + 0x2228ade,0x2c460e6,0x3d6fdba,0x038cc77 }, + { 0x3cc6ed8,0x02ada1a,0x260e510,0x2f7bde8,0x37160c3,0x33a1435, + 0x23d9a7b,0x0ce2641,0x02a492e,0x034ed1e } }, + /* 61 */ + { { 0x3821f90,0x26dba3c,0x3aada14,0x3b59bad,0x292edd9,0x2804c45, + 0x3669531,0x296f42e,0x35a4c86,0x01ca049 }, + { 0x3ff47e5,0x2163df4,0x2441503,0x2f18405,0x15e1616,0x37f66ec, + 0x30f11a7,0x141658a,0x27ece14,0x00b018b } }, + /* 62 */ + { { 0x159ac2e,0x3e65bc0,0x2713a76,0x0db2f6c,0x3281e77,0x2391811, + 0x16d2880,0x1fbc4ab,0x1f92c4e,0x00a0a8d }, + { 0x0ce5cd2,0x152c7b0,0x02299c3,0x3244de7,0x2cf99ef,0x3a0b047, + 0x2caf383,0x0aaf664,0x113554d,0x031c735 } }, + /* 63 */ + { { 0x1b578f4,0x177a702,0x3a7a488,0x1638ebf,0x31884e2,0x2460bc7, + 0x36b1b75,0x3ce8e3d,0x340cf47,0x03143d9 }, + { 0x34b68ea,0x12b7ccd,0x1fe2a9c,0x08da659,0x0a406f3,0x1694c14, + 0x06a2228,0x16370be,0x3a72129,0x02e7b2c } }, + /* 64 */ + { { 0x0f8b16a,0x21043bd,0x266a56f,0x3fb11ec,0x197241a,0x36721f0, + 0x006b8e6,0x2ac6c29,0x202cd42,0x0200fcf }, + { 0x0dbec69,0x0c26a01,0x105f7f0,0x3dceeeb,0x3a83b85,0x363865f, + 0x097273a,0x2b70718,0x00e5067,0x03025d1 } }, + /* 65 */ + { { 0x379ab34,0x295bcb0,0x38d1846,0x22e1077,0x3a8ee06,0x1db1a3b, + 0x3144591,0x07cc080,0x2d5915f,0x03c6bcc }, + { 0x175bd50,0x0dd4c57,0x27bc99c,0x2ebdcbd,0x3837cff,0x235dc8f, + 0x13a4184,0x0722c18,0x130e2d4,0x008f43c } }, + /* 66 */ + { { 0x01500d9,0x2adbb7d,0x2da8857,0x397f2fa,0x10d890a,0x25c9654, + 0x3e86488,0x3eb754b,0x1d6c0a3,0x02c0a23 }, + { 0x10bcb08,0x083cc19,0x2e16853,0x04da575,0x271af63,0x2626a9d, + 0x3520a7b,0x32348c7,0x24ff408,0x03ff4dc } }, + /* 67 */ + { { 0x058e6cb,0x1a3992d,0x1d28539,0x080c5e9,0x2992dad,0x2a9d7d5, + 0x14ae0b7,0x09b7ce0,0x34ad78c,0x03d5643 }, + { 0x30ba55a,0x092f4f3,0x0bae0fc,0x12831de,0x20fc472,0x20ed9d2, + 0x29864f6,0x1288073,0x254f6f7,0x00635b6 } }, + /* 68 */ + { { 0x1be5a2b,0x0f88975,0x33c6ed9,0x20d64d3,0x06fe799,0x0989bff, + 0x1409262,0x085a90c,0x0d97990,0x0142eed }, + { 0x17ec63e,0x06471b9,0x0db2378,0x1006077,0x265422c,0x08db83d, + 0x28099b0,0x1270d06,0x11801fe,0x00ac400 } }, + /* 69 */ + { { 0x3391593,0x22d7166,0x30fcfc6,0x2896609,0x3c385f5,0x066b72e, + 0x04f3aad,0x2b831c5,0x19983fb,0x0375562 }, + { 0x0b82ff4,0x222e39d,0x34c993b,0x101c79c,0x2d2e03c,0x0f00c8a, + 0x3a9eaf4,0x1810669,0x151149d,0x039b931 } }, + /* 70 */ + { { 0x29af288,0x1956ec7,0x293155f,0x193deb6,0x1647e1a,0x2ca0839, + 0x297e4bc,0x15bfd0d,0x1b107ed,0x0147803 }, + { 0x31c327e,0x05a6e1d,0x02ad43d,0x02d2a5b,0x129cdb2,0x37ad1de, + 0x3d51f53,0x245df01,0x2414982,0x0388bd0 } }, + /* 71 */ + { { 0x35f1abb,0x17a3d18,0x0874cd4,0x2d5a14e,0x17edc0c,0x16a00d3, + 0x072c1fb,0x1232725,0x33d52dc,0x03dc24d }, + { 0x0af30d6,0x259aeea,0x369c401,0x12bc4de,0x295bf5f,0x0d8711f, + 0x26162a9,0x16c44e5,0x288e727,0x02f54b4 } }, + /* 72 */ + { { 0x05fa877,0x1571ea7,0x3d48ab1,0x1c9f4e8,0x017dad6,0x0f46276, + 0x343f9e7,0x1de990f,0x0e4c8aa,0x028343e }, + { 0x094f92d,0x3abf633,0x1b3a0bb,0x2f83137,0x0d818c8,0x20bae85, + 0x0c65f8b,0x1a8008b,0x0c7946d,0x0295b1e } }, + /* 73 */ + { { 0x1d09529,0x08e46c3,0x1fcf296,0x298f6b7,0x1803e0e,0x2d6fd20, + 0x37351f5,0x0d9e8b1,0x1f8731a,0x0362fbf }, + { 0x00157f4,0x06750bf,0x2650ab9,0x35ffb23,0x2f51cae,0x0b522c2, + 0x39cb400,0x191e337,0x0a5ce9f,0x021529a } }, + /* 74 */ + { { 0x3506ea5,0x17d9ed8,0x0d66dc3,0x22693f8,0x19286c4,0x3a57353, + 0x101d3bf,0x1aa54fc,0x20b9884,0x0172b3a }, + { 0x0eac44d,0x37d8327,0x1c3aa90,0x3d0d534,0x23db29a,0x3576eaf, + 0x1d3de8a,0x3bea423,0x11235e4,0x039260b } }, + /* 75 */ + { { 0x34cd55e,0x01288b0,0x1132231,0x2cc9a03,0x358695b,0x3e87650, + 0x345afa1,0x01267ec,0x3f616b2,0x02011ad }, + { 0x0e7d098,0x0d6078e,0x0b70b53,0x237d1bc,0x0d7f61e,0x132de31, + 0x1ea9ea4,0x2bd54c3,0x27b9082,0x03ac5f2 } }, + /* 76 */ + { { 0x2a145b9,0x06d661d,0x31ec175,0x03f06f1,0x3a5cf6b,0x249c56e, + 0x2035653,0x384c74f,0x0bafab5,0x0025ec0 }, + { 0x25f69e1,0x1b23a55,0x1199aa6,0x16ad6f9,0x077e8f7,0x293f661, + 0x33ba11d,0x3327980,0x07bafdb,0x03e571d } }, + /* 77 */ + { { 0x2bae45e,0x3c074ef,0x2955558,0x3c312f1,0x2a8ebe9,0x2f193f1, + 0x3705b1d,0x360deba,0x01e566e,0x00d4498 }, + { 0x21161cd,0x1bc787e,0x2f87933,0x3553197,0x1328ab8,0x093c879, + 0x17eee27,0x2adad1d,0x1236068,0x003be5c } }, + /* 78 */ + { { 0x0ca4226,0x2633dd5,0x2c8e025,0x0e3e190,0x05eede1,0x1a385e4, + 0x163f744,0x2f25522,0x1333b4f,0x03f05b6 }, + { 0x3c800ca,0x1becc79,0x2daabe9,0x0c499e2,0x1138063,0x3fcfa2d, + 0x2244976,0x1e85cf5,0x2f1b95d,0x0053292 } }, + /* 79 */ + { { 0x12f81d5,0x1dc6eaf,0x11967a4,0x1a407df,0x31a5f9d,0x2b67241, + 0x18bef7c,0x08c7762,0x063f59c,0x01015ec }, + { 0x1c05c0a,0x360bfa2,0x1f85bff,0x1bc7703,0x3e4911c,0x0d685b6, + 0x2fccaea,0x02c4cef,0x164f133,0x0070ed7 } }, + /* 80 */ + { { 0x0ec21fe,0x052ffa0,0x3e825fe,0x1ab0956,0x3f6ce11,0x3d29759, + 0x3c5a072,0x18ebe62,0x148db7e,0x03eb49c }, + { 0x1ab05b3,0x02dab0a,0x1ae690c,0x0f13894,0x137a9a8,0x0aab79f, + 0x3dc875c,0x06a1029,0x1e39f0e,0x01dce1f } }, + /* 81 */ + { { 0x16c0dd7,0x3b31269,0x2c741e9,0x3611821,0x2a5cffc,0x1416bb3, + 0x3a1408f,0x311fa3d,0x1c0bef0,0x02cdee1 }, + { 0x00e6a8f,0x1adb933,0x0f23359,0x2fdace2,0x2fd6d4b,0x0e73bd3, + 0x2453fac,0x0a356ae,0x2c8f9f6,0x02704d6 } }, + /* 82 */ + { { 0x0e35743,0x28c80a1,0x0def32a,0x2c6168f,0x1320d6a,0x37c6606, + 0x21b1761,0x2147ee0,0x21fc433,0x015c84d }, + { 0x1fc9168,0x36cda9c,0x003c1f0,0x1cd7971,0x15f98ba,0x1ef363d, + 0x0ca87e3,0x046f7d9,0x3c9e6bb,0x0372eb0 } }, + /* 83 */ + { { 0x118cbe2,0x3665a11,0x304ef01,0x062727a,0x3d242fc,0x11ffbaf, + 0x3663c7e,0x1a189c9,0x09e2d62,0x02e3072 }, + { 0x0e1d569,0x162f772,0x0cd051a,0x322df62,0x3563809,0x047cc7a, + 0x027fd9f,0x08b509b,0x3da2f94,0x01748ee } }, + /* 84 */ + { { 0x1c8f8be,0x31ca525,0x22bf0a1,0x200efcd,0x02961c4,0x3d8f52b, + 0x018403d,0x3a40279,0x1cb91ec,0x030427e }, + { 0x0945705,0x0257416,0x05c0c2d,0x25b77ae,0x3b9083d,0x2901126, + 0x292b8d7,0x07b8611,0x04f2eee,0x026f0cd } }, + /* 85 */ + { { 0x2913074,0x2b8d590,0x02b10d5,0x09d2295,0x255491b,0x0c41cca, + 0x1ca665b,0x133051a,0x1525f1a,0x00a5647 }, + { 0x04f983f,0x3d6daee,0x04e1e76,0x1067d7e,0x1be7eef,0x02ea862, + 0x00d4968,0x0ccb048,0x11f18ef,0x018dd95 } }, + /* 86 */ + { { 0x22976cc,0x17c5395,0x2c38bda,0x3983bc4,0x222bca3,0x332a614, + 0x3a30646,0x261eaef,0x1c808e2,0x02f6de7 }, + { 0x306a772,0x32d7272,0x2dcefd2,0x2abf94d,0x038f475,0x30ad76e, + 0x23e0227,0x3052b0a,0x001add3,0x023ba18 } }, + /* 87 */ + { { 0x0ade873,0x25a6069,0x248ccbe,0x13713ee,0x17ee9aa,0x28152e9, + 0x2e28995,0x2a92cb3,0x17a6f77,0x024b947 }, + { 0x190a34d,0x2ebea1c,0x1ed1948,0x16fdaf4,0x0d698f7,0x32bc451, + 0x0ee6e30,0x2aaab40,0x06f0a56,0x01460be } }, + /* 88 */ + { { 0x24cc99c,0x1884b1e,0x1ca1fba,0x1a0f9b6,0x2ff609b,0x2b26316, + 0x3b27cb5,0x29bc976,0x35d4073,0x024772a }, + { 0x3575a70,0x1b30f57,0x07fa01b,0x0e5be36,0x20cb361,0x26605cd, + 0x1d4e8c8,0x13cac59,0x2db9797,0x005e833 } }, + /* 89 */ + { { 0x36c8d3a,0x1878a81,0x124b388,0x0e4843e,0x1701aad,0x0ea0d76, + 0x10eae41,0x37d0653,0x36c7f4c,0x00ba338 }, + { 0x37a862b,0x1cf6ac0,0x08fa912,0x2dd8393,0x101ba9b,0x0eebcb7, + 0x2453883,0x1a3cfe5,0x2cb34f6,0x03d3331 } }, + /* 90 */ + { { 0x1f79687,0x3d4973c,0x281544e,0x2564bbe,0x17c5954,0x171e34a, + 0x231741a,0x3cf2784,0x0889a0d,0x02b036d }, + { 0x301747f,0x3f1c477,0x1f1386b,0x163bc5f,0x1592b93,0x332daed, + 0x080e4f5,0x1d28b96,0x26194c9,0x0256992 } }, + /* 91 */ + { { 0x15a4c93,0x07bf6b0,0x114172c,0x1ce0961,0x140269b,0x1b2c2eb, + 0x0dfb1c1,0x019ddaa,0x0ba2921,0x008c795 }, + { 0x2e6d2dc,0x37e45e2,0x2918a70,0x0fce444,0x34d6aa6,0x396dc88, + 0x27726b5,0x0c787d8,0x032d8a7,0x02ac2f8 } }, + /* 92 */ + { { 0x1131f2d,0x2b43a63,0x3101097,0x38cec13,0x0637f09,0x17a69d2, + 0x086196d,0x299e46b,0x0802cf6,0x03c6f32 }, + { 0x0daacb4,0x1a4503a,0x100925c,0x15583d9,0x23c4e40,0x1de4de9, + 0x1cc8fc4,0x2c9c564,0x0695aeb,0x02145a5 } }, + /* 93 */ + { { 0x1dcf593,0x17050fc,0x3e3bde3,0x0a6c062,0x178202b,0x2f7674f, + 0x0dadc29,0x15763a7,0x1d2daad,0x023d9f6 }, + { 0x081ea5f,0x045959d,0x190c841,0x3a78d31,0x0e7d2dd,0x1414fea, + 0x1d43f40,0x22d77ff,0x2b9c072,0x03e115c } }, + /* 94 */ + { { 0x3af71c9,0x29e9c65,0x25655e1,0x111e9cd,0x3a14494,0x3875418, + 0x34ae070,0x0b06686,0x310616b,0x03b7b89 }, + { 0x1734121,0x00d3d44,0x29f0b2f,0x1552897,0x31cac6e,0x1030bb3, + 0x0148f3a,0x35fd237,0x29b44eb,0x027f49f } }, + /* 95 */ + { { 0x2e2cb16,0x1d962bd,0x19b63cc,0x0b3f964,0x3e3eb7d,0x1a35560, + 0x0c58161,0x3ce1d6a,0x3b6958f,0x029030b }, + { 0x2dcc158,0x3b1583f,0x30568c9,0x31957c8,0x27ad804,0x28c1f84, + 0x3967049,0x37b3f64,0x3b87dc6,0x0266f26 } }, + /* 96 */ + { { 0x27dafc6,0x2548764,0x0d1984a,0x1a57027,0x252c1fb,0x24d9b77, + 0x1581a0f,0x1f99276,0x10ba16d,0x026af88 }, + { 0x0915220,0x2be1292,0x16c6480,0x1a93760,0x2fa7317,0x1a07296, + 0x1539871,0x112c31f,0x25787f3,0x01e2070 } }, + /* 97 */ + { { 0x0bcf3ff,0x266d478,0x34f6933,0x31449fd,0x00d02cb,0x340765a, + 0x3465a2d,0x225023e,0x319a30e,0x00579b8 }, + { 0x20e05f4,0x35b834f,0x0404646,0x3710d62,0x3fad7bd,0x13e1434, + 0x21c7d1c,0x1cb3af9,0x2cf1911,0x003957e } }, + /* 98 */ + { { 0x0787564,0x36601be,0x1ce67e9,0x084c7a1,0x21a3317,0x2067a35, + 0x0158cab,0x195ddac,0x1766fe9,0x035cf42 }, + { 0x2b7206e,0x20d0947,0x3b42424,0x03f1862,0x0a51929,0x38c2948, + 0x0bb8595,0x2942d77,0x3748f15,0x0249428 } }, + /* 99 */ + { { 0x2577410,0x3c23e2f,0x28c6caf,0x00d41de,0x0fd408a,0x30298e9, + 0x363289e,0x2302fc7,0x082c1cc,0x01dd050 }, + { 0x30991cd,0x103e9ba,0x029605a,0x19927f7,0x0c1ca08,0x0c93f50, + 0x28a3c7b,0x082e4e9,0x34d12eb,0x0232c13 } }, + /* 100 */ + { { 0x106171c,0x0b4155a,0x0c3fb1c,0x336c090,0x19073e9,0x2241a10, + 0x0e6b4fd,0x0ed476e,0x1ef4712,0x039390a }, + { 0x0ec36f4,0x3754f0e,0x2a270b8,0x007fd2d,0x0f9d2dc,0x1e6a692, + 0x066e078,0x1954974,0x2ff3c6e,0x00def28 } }, + /* 101 */ + { { 0x3562470,0x0b8f1f7,0x0ac94cd,0x28b0259,0x244f272,0x031e4ef, + 0x2d5df98,0x2c8a9f1,0x2dc3002,0x016644f }, + { 0x350592a,0x0e6a0d5,0x1e027a1,0x2039e0f,0x399e01d,0x2817593, + 0x0c0375e,0x3889b3e,0x24ab013,0x010de1b } }, + /* 102 */ + { { 0x256b5a6,0x0ac3b67,0x28f9ff3,0x29b67f1,0x30750d9,0x25e11a9, + 0x15e8455,0x279ebb0,0x298b7e7,0x0218e32 }, + { 0x2fc24b2,0x2b82582,0x28f22f5,0x2bd36b3,0x305398e,0x3b2e9e3, + 0x365dd0a,0x29bc0ed,0x36a7b3a,0x007b374 } }, + /* 103 */ + { { 0x05ff2f3,0x2b3589b,0x29785d3,0x300a1ce,0x0a2d516,0x0844355, + 0x14c9fad,0x3ccb6b6,0x385d459,0x0361743 }, + { 0x0b11da3,0x002e344,0x18c49f7,0x0c29e0c,0x1d2c22c,0x08237b3, + 0x2988f49,0x0f18955,0x1c3b4ed,0x02813c6 } }, + /* 104 */ + { { 0x17f93bd,0x249323b,0x11f6087,0x174e4bd,0x3cb64ac,0x086dc6b, + 0x2e330a8,0x142c1f2,0x2ea5c09,0x024acbb }, + { 0x1b6e235,0x3132521,0x00f085a,0x2a4a4db,0x1ab2ca4,0x0142224, + 0x3aa6b3e,0x09db203,0x2215834,0x007b9e0 } }, + /* 105 */ + { { 0x23e79f7,0x28b8039,0x1906a60,0x2cbce67,0x1f590e7,0x181f027, + 0x21054a6,0x3854240,0x2d857a6,0x03cfcb3 }, + { 0x10d9b55,0x1443cfc,0x2648200,0x2b36190,0x09d2fcf,0x22f439f, + 0x231aa7e,0x3884395,0x0543da3,0x003d5a9 } }, + /* 106 */ + { { 0x043e0df,0x06ffe84,0x3e6d5b2,0x3327001,0x26c74b6,0x12a145e, + 0x256ec0d,0x3898c69,0x3411969,0x02f63c5 }, + { 0x2b7494a,0x2eee1af,0x38388a9,0x1bd17ce,0x21567d4,0x13969e6, + 0x3a12a7a,0x3e8277d,0x03530cc,0x00b4687 } }, + /* 107 */ + { { 0x06508da,0x38e04d4,0x15a7192,0x312875e,0x3336180,0x2a6512c, + 0x1b59497,0x2e91b37,0x25eb91f,0x02841e9 }, + { 0x394d639,0x0747143,0x37d7e6d,0x1d62962,0x08b4af3,0x34df287, + 0x3c5584b,0x26bc869,0x20af87a,0x0060f5d } }, + /* 108 */ + { { 0x1de59a4,0x1a5c443,0x2f8729d,0x01c3a2f,0x0f1ad8d,0x3cbaf9e, + 0x1b49634,0x35d508a,0x39dc269,0x0075105 }, + { 0x390d30e,0x37033e0,0x110cb32,0x14c37a0,0x20a3b27,0x2f00ce6, + 0x2f1dc52,0x34988c6,0x0c29606,0x01dc7e7 } }, + /* 109 */ + { { 0x1040739,0x24f9de1,0x2939999,0x2e6009a,0x244539d,0x17e3f09, + 0x00f6f2f,0x1c63b3d,0x2310362,0x019109e }, + { 0x1428aa8,0x3cb61e1,0x09a84f4,0x0ffafed,0x07b7adc,0x08f406b, + 0x1b2c6df,0x035b480,0x3496ae9,0x012766d } }, + /* 110 */ + { { 0x35d1099,0x2362f10,0x1a08cc7,0x13a3a34,0x12adbcd,0x32da290, + 0x02e2a02,0x151140b,0x01b3f60,0x0240df6 }, + { 0x34c7b61,0x2eb09c1,0x172e7cd,0x2ad5eff,0x2fe2031,0x25b54d4, + 0x0cec965,0x18e7187,0x26a7cc0,0x00230f7 } }, + /* 111 */ + { { 0x2d552ab,0x374083d,0x01f120f,0x2601736,0x156baff,0x04d44a4, + 0x3b7c3e9,0x1acbc1b,0x0424579,0x031a425 }, + { 0x1231bd1,0x0eba710,0x020517b,0x21d7316,0x21eac6e,0x275a848, + 0x0837abf,0x0eb0082,0x302cafe,0x00fe8f6 } }, + /* 112 */ + { { 0x1058880,0x28f9941,0x03f2d75,0x3bd90e5,0x17da365,0x2ac9249, + 0x07861cf,0x023fd05,0x1b0fdb8,0x031712f }, + { 0x272b56b,0x04f8d2c,0x043a735,0x25446e4,0x1c8327e,0x221125a, + 0x0ce37df,0x2dad7f6,0x39446c2,0x00b55b6 } }, + /* 113 */ + { { 0x346ac6b,0x05e0bff,0x2425246,0x0981e8b,0x1d19f79,0x2692378, + 0x3ea3c40,0x2e90beb,0x19de503,0x003d5af }, + { 0x05cda49,0x353b44d,0x299d137,0x3f205bc,0x2821158,0x3ad0d00, + 0x06a54aa,0x2d7c79f,0x39d1173,0x01000ee } }, + /* 114 */ + { { 0x0803387,0x3a06268,0x14043b8,0x3d4e72f,0x1ece115,0x0a1dfc8, + 0x17208dd,0x0be790a,0x122a07f,0x014dd95 }, + { 0x0a4182d,0x202886a,0x1f79a49,0x1e8c867,0x0a2bbd0,0x28668b5, + 0x0d0a2e1,0x115259d,0x3586c5d,0x01e815b } }, + /* 115 */ + { { 0x18a2a47,0x2c95627,0x2773646,0x1230f7c,0x15b5829,0x2fc354e, + 0x2c000ea,0x099d547,0x2f17a1a,0x01df520 }, + { 0x3853948,0x06f6561,0x3feeb8a,0x2f5b3ef,0x3a6f817,0x01a0791, + 0x2ec0578,0x2c392ad,0x12b2b38,0x0104540 } }, + /* 116 */ + { { 0x1e28ced,0x0fc3d1b,0x2c473c7,0x1826c4f,0x21d5da7,0x39718e4, + 0x38ce9e6,0x0251986,0x172fbea,0x0337c11 }, + { 0x053c3b0,0x0f162db,0x043c1cb,0x04111ee,0x297fe3c,0x32e5e03, + 0x2b8ae12,0x0c427ec,0x1da9738,0x03b9c0f } }, + /* 117 */ + { { 0x357e43a,0x054503f,0x11b8345,0x34ec6e0,0x2d44660,0x3d0ae61, + 0x3b5dff8,0x33884ac,0x09da162,0x00a82b6 }, + { 0x3c277ba,0x129a51a,0x027664e,0x1530507,0x0c788c9,0x2afd89d, + 0x1aa64cc,0x1196450,0x367ac2b,0x0358b42 } }, + /* 118 */ + { { 0x0054ac4,0x1761ecb,0x378839c,0x167c9f7,0x2570058,0x0604a35, + 0x37cbf3b,0x0909bb7,0x3f2991c,0x02ce688 }, + { 0x0b16ae5,0x212857c,0x351b952,0x2c684db,0x30c6a05,0x09c01e0, + 0x23c137f,0x1331475,0x092c067,0x0013b40 } }, + /* 119 */ + { { 0x2e90393,0x0617466,0x24e61f4,0x0a528f5,0x03047b4,0x2153f05, + 0x0001a69,0x30e1eb8,0x3c10177,0x0282a47 }, + { 0x22c831e,0x28fc06b,0x3e16ff0,0x208adc9,0x0bb76ae,0x28c1d6d, + 0x12c8a15,0x031063c,0x1889ed2,0x002133e } }, + /* 120 */ + { { 0x0a6becf,0x14277bf,0x3328d98,0x201f7fe,0x12fceae,0x1de3a2e, + 0x0a15c44,0x3ddf976,0x1b273ab,0x0355e55 }, + { 0x1b5d4f1,0x369e78c,0x3a1c210,0x12cf3e9,0x3aa52f0,0x309f082, + 0x112089d,0x107c753,0x24202d1,0x023853a } }, + /* 121 */ + { { 0x2897042,0x140d17c,0x2c4aeed,0x07d0d00,0x18d0533,0x22f7ec8, + 0x19c194c,0x3456323,0x2372aa4,0x0165f86 }, + { 0x30bd68c,0x1fb06b3,0x0945032,0x372ac09,0x06d4be0,0x27f8fa1, + 0x1c8d7ac,0x137a96e,0x236199b,0x0328fc0 } }, + /* 122 */ + { { 0x170bd20,0x2842d58,0x1de7592,0x3c5b4fd,0x20ea897,0x12cab78, + 0x363ff14,0x01f928c,0x17e309c,0x02f79ff }, + { 0x0f5432c,0x2edb4ae,0x044b516,0x32f810d,0x2210dc1,0x23e56d6, + 0x301e6ff,0x34660f6,0x10e0a7d,0x02d88eb } }, + /* 123 */ + { { 0x0c7b65b,0x2f59d58,0x2289a75,0x2408e92,0x1ab8c55,0x1ec99e5, + 0x220fd0d,0x04defe0,0x24658ec,0x035aa8b }, + { 0x138bb85,0x2f002d4,0x295c10a,0x08760ce,0x28c31d1,0x1c0a8cb, + 0x0ff00b1,0x144eac9,0x2e02dcc,0x0044598 } }, + /* 124 */ + { { 0x3b42b87,0x050057b,0x0dff781,0x1c06db1,0x1bd9f5d,0x1f5f04a, + 0x2cccd7a,0x143e19b,0x1cb94b7,0x036cfb8 }, + { 0x34837cf,0x3cf6c3c,0x0d4fb26,0x22ee55e,0x1e7eed1,0x315995f, + 0x2cdf937,0x1a96574,0x0425220,0x0221a99 } }, + /* 125 */ + { { 0x1b569ea,0x0d33ed9,0x19c13c2,0x107dc84,0x2200111,0x0569867, + 0x2dc85da,0x05ef22e,0x0eb018a,0x029c33d }, + { 0x04a6a65,0x3e5eba3,0x378f224,0x09c04d0,0x036e5cf,0x3df8258, + 0x3a609e4,0x1eddef8,0x2abd174,0x02a91dc } }, + /* 126 */ + { { 0x2a60cc0,0x1d84c5e,0x115f676,0x1840da0,0x2c79163,0x2f06ed6, + 0x198bb4b,0x3e5d37b,0x1dc30fa,0x018469b }, + { 0x15ee47a,0x1e32f30,0x16a530e,0x2093836,0x02e8962,0x3767b62, + 0x335adf3,0x27220db,0x2f81642,0x0173ffe } }, + /* 127 */ + { { 0x37a99cd,0x1533fe6,0x05a1c0d,0x27610f1,0x17bf3b9,0x0b1ce78, + 0x0a908f6,0x265300e,0x3237dc1,0x01b969a }, + { 0x3a5db77,0x2d15382,0x0d63ef8,0x1feb3d8,0x0b7b880,0x19820de, + 0x11c0c67,0x2af3396,0x38d242d,0x0120688 } }, + /* 128 */ + { { 0x1d0b34a,0x05ef00d,0x00a7e34,0x1ae0c9f,0x1440b38,0x300d8b4, + 0x37262da,0x3e50e3e,0x14ce0cd,0x00b1044 }, + { 0x195a0b1,0x173bc6b,0x03622ba,0x2a19f55,0x1c09b37,0x07921b2, + 0x16cdd20,0x24a5c9b,0x2bf42ff,0x00811de } }, + /* 129 */ + { { 0x0d65dbf,0x145cf06,0x1ad82f7,0x038ce7b,0x077bf94,0x33c4007, + 0x22d26bd,0x25ad9c0,0x09ac773,0x02b1990 }, + { 0x2261cc3,0x2ecdbf1,0x3e908b0,0x3246439,0x0213f7b,0x1179b04, + 0x01cebaa,0x0be1595,0x175cc12,0x033a39a } }, + /* 130 */ + { { 0x00a67d2,0x086d06f,0x248a0f1,0x0291134,0x362d476,0x166d1cd, + 0x044f1d6,0x2d2a038,0x365250b,0x0023f78 }, + { 0x08bf287,0x3b0f6a1,0x1d6eace,0x20b4cda,0x2c2a621,0x0912520, + 0x02dfdc9,0x1b35cd6,0x3d2565d,0x00bdf8b } }, + /* 131 */ + { { 0x3770fa7,0x2e4b6f0,0x03f9ae4,0x170de41,0x1095e8d,0x1dd845c, + 0x334e9d1,0x00ab953,0x12e9077,0x03196fa }, + { 0x2fd0a40,0x228c0fd,0x384b275,0x38ef339,0x3e7d822,0x3e5d9ef, + 0x24f5854,0x0ece9eb,0x247d119,0x012ffe3 } }, + /* 132 */ + { { 0x0ff1480,0x07487c0,0x1b16cd4,0x1f41d53,0x22ab8fb,0x2f83cfa, + 0x01d2efb,0x259f6b2,0x2e65772,0x00f9392 }, + { 0x05303e6,0x23cdb4f,0x23977e1,0x12e4898,0x03bd999,0x0c930f0, + 0x170e261,0x180a27b,0x2fd58ec,0x014e22b } }, + /* 133 */ + { { 0x25d7713,0x0c5fad7,0x09daad1,0x3b9d779,0x109b985,0x1d3ec98, + 0x35bc4fc,0x2f838cb,0x0d14f75,0x0173e42 }, + { 0x2657b12,0x10d4423,0x19e6760,0x296e5bb,0x2bfd421,0x25c3330, + 0x29f51f8,0x0338838,0x24060f0,0x029a62e } }, + /* 134 */ + { { 0x3748fec,0x2c5a1bb,0x2cf973d,0x289fa74,0x3e6e755,0x38997bf, + 0x0b6544c,0x2b6358c,0x38a7aeb,0x02c50bb }, + { 0x3d5770a,0x06be7c5,0x012fad3,0x19cb2cd,0x266af3b,0x3ccd677, + 0x160d1bd,0x141d5af,0x2965851,0x034625a } }, + /* 135 */ + { { 0x3c41c08,0x255eacc,0x22e1ec5,0x2b151a3,0x087de94,0x311cbdb, + 0x016b73a,0x368e462,0x20b7981,0x0099ec3 }, + { 0x262b988,0x1539763,0x21e76e5,0x15445b4,0x1d8ddc7,0x34a9be6, + 0x10faf03,0x24e4d18,0x07aa111,0x02d538a } }, + /* 136 */ + { { 0x38a876b,0x048ad45,0x04b40a0,0x3fc2144,0x251ff96,0x13ca7dd, + 0x0b31ab1,0x3539814,0x28b5f87,0x0212aec }, + { 0x270790a,0x350e7e0,0x346bd5e,0x276178f,0x22d6cb5,0x3078884, + 0x355c1b6,0x15901d7,0x3671765,0x03950db } }, + /* 137 */ + { { 0x286e8d5,0x2409788,0x13be53f,0x2d21911,0x0353c95,0x10238e8, + 0x32f5bde,0x3a67b60,0x28b5b9c,0x001013d }, + { 0x381e8e5,0x0cef7a9,0x2f5bcad,0x06058f0,0x33cdf50,0x04672a8, + 0x1769600,0x31c055d,0x3df0ac1,0x00e9098 } }, + /* 138 */ + { { 0x2eb596d,0x197b326,0x12b4c29,0x39c08f2,0x101ea03,0x3804e58, + 0x04b4b62,0x28d9d1c,0x13f905e,0x0032a3f }, + { 0x11b2b61,0x08e9095,0x0d06925,0x270e43f,0x21eb7a8,0x0e4a98f, + 0x31d2be0,0x030cf9f,0x2644ddb,0x025b728 } }, + /* 139 */ + { { 0x07510af,0x2ed0e8e,0x2a01203,0x2a2a68d,0x0846fea,0x3e540de, + 0x3a57702,0x1677348,0x2123aad,0x010d8f8 }, + { 0x0246a47,0x0e871d0,0x124dca4,0x34b9577,0x2b362b8,0x363ebe5, + 0x3086045,0x26313e6,0x15cd8bb,0x0210384 } }, + /* 140 */ + { { 0x023e8a7,0x0817884,0x3a0bf12,0x3376371,0x3c808a8,0x18e9777, + 0x12a2721,0x35b538a,0x2bd30de,0x017835a }, + { 0x0fc0f64,0x1c8709f,0x2d8807a,0x0743957,0x242eec0,0x347e76c, + 0x27bef91,0x289689a,0x0f42945,0x01f7a92 } }, + /* 141 */ + { { 0x1060a81,0x3dbc739,0x1615abd,0x1cbe3e5,0x3e79f9c,0x1ab09a2, + 0x136c540,0x05b473f,0x2beebfd,0x02af0a8 }, + { 0x3e2eac7,0x19be474,0x04668ac,0x18f4b74,0x36f10ba,0x0a0b4c6, + 0x10e3770,0x3bf059e,0x3946c7e,0x013a8d4 } }, + /* 142 */ + { { 0x266309d,0x28be354,0x1a3eed8,0x3020651,0x10a51c6,0x1e31770, + 0x0af45a5,0x3ff0f3b,0x2891c94,0x00e9db9 }, + { 0x17b0d0f,0x33a291f,0x0a5f9aa,0x25a3d61,0x2963ace,0x39a5fef, + 0x230c724,0x1919146,0x10a465e,0x02084a8 } }, + /* 143 */ + { { 0x3ab8caa,0x31870f3,0x2390ef7,0x2103850,0x218eb8e,0x3a5ccf2, + 0x1dff677,0x2c59334,0x371599c,0x02a9f2a }, + { 0x0837bd1,0x3249cef,0x35d702f,0x3430dab,0x1c06407,0x108f692, + 0x221292f,0x05f0c5d,0x073fe06,0x01038e0 } }, + /* 144 */ + { { 0x3bf9b7c,0x2020929,0x30d0f4f,0x080fef8,0x3365d23,0x1f3e738, + 0x3e53209,0x1549afe,0x300b305,0x038d811 }, + { 0x0c6c2c7,0x2e6445b,0x3ee64dc,0x022e932,0x0726837,0x0deb67b, + 0x1ed4346,0x3857f73,0x277a3de,0x01950b5 } }, + /* 145 */ + { { 0x36c377a,0x0adb41e,0x08be3f3,0x11e40d1,0x36cb038,0x036a2bd, + 0x3dd3a82,0x1bc875b,0x2ee09bb,0x02994d2 }, + { 0x035facf,0x05e0344,0x07e630a,0x0ce772d,0x335e55a,0x111fce4, + 0x250fe1c,0x3bc89ba,0x32fdc9a,0x03cf2d9 } }, + /* 146 */ + { { 0x355fd83,0x1c67f8e,0x1d10eb3,0x1b21d77,0x0e0d7a4,0x173a9e1, + 0x2c9fa90,0x1c39cce,0x22eaae8,0x01f2bea }, + { 0x153b338,0x0534107,0x26c69b8,0x283be1f,0x3e0acc0,0x059cac3, + 0x13d1081,0x148bbee,0x3c1b9bd,0x002aac4 } }, + /* 147 */ + { { 0x2681297,0x3389e34,0x146addc,0x2c6d425,0x2cb350e,0x1986abc, + 0x0431737,0x04ba4b7,0x2028470,0x012e469 }, + { 0x2f8ddcf,0x3c4255c,0x1af4dcf,0x07a6a44,0x208ebf6,0x0dc90c3, + 0x34360ac,0x072ad23,0x0537232,0x01254d3 } }, + /* 148 */ + { { 0x07b7e9d,0x3df5c7c,0x116f83d,0x28c4f35,0x3a478ef,0x3011fb8, + 0x2f264b6,0x317b9e3,0x04fd65a,0x032bd1b }, + { 0x2aa8266,0x3431de4,0x04bba04,0x19a44da,0x0edf454,0x392c5ac, + 0x265168a,0x1dc3d5b,0x25704c6,0x00533a7 } }, + /* 149 */ + { { 0x25e8f91,0x1178fa5,0x2492994,0x2eb2c3c,0x0d3aca1,0x0322828, + 0x1cc70f9,0x269c74c,0x0a53e4c,0x006edc2 }, + { 0x18bdd7a,0x2a79a55,0x26b1d5c,0x0200628,0x0734a05,0x3273c7b, + 0x13aa714,0x0040ac2,0x2f2da30,0x03e7449 } }, + /* 150 */ + { { 0x3f9563e,0x2f29eab,0x14a0749,0x3fad264,0x1dd077a,0x3d7c59c, + 0x3a0311b,0x331a789,0x0b9729e,0x0201ebf }, + { 0x1b08b77,0x2a4cdf2,0x3e387f8,0x21510f1,0x286c3a7,0x1dbf62e, + 0x3afa594,0x3363217,0x0d16568,0x01d46b7 } }, + /* 151 */ + { { 0x0715c0d,0x28e2d04,0x17f78ae,0x1c63dda,0x1d113ea,0x0fefc1b, + 0x1eab149,0x1d0fd99,0x0682537,0x00a7b11 }, + { 0x10bebbc,0x11c672d,0x14223d9,0x2ff9141,0x1399ee5,0x34b7b6c, + 0x0d5b3a8,0x01df643,0x0e392a4,0x03fe4dc } }, + /* 152 */ + { { 0x2b75b65,0x0b5a6f1,0x11c559a,0x3549999,0x24188f8,0x37a75f4, + 0x29f33e3,0x34068a2,0x38ba2a9,0x025dd91 }, + { 0x29af2c7,0x0988b64,0x0923885,0x1b539a4,0x1334f5d,0x226947a, + 0x2cc7e5a,0x20beb39,0x13fac2f,0x01d298c } }, + /* 153 */ + { { 0x35f079c,0x137f76d,0x2fbbb2f,0x254638d,0x185b07c,0x1f34db7, + 0x2cfcf0e,0x218f46d,0x2150ff4,0x02add6f }, + { 0x33fc9b7,0x0d9f005,0x0fd081b,0x0834965,0x2b90a74,0x102448d, + 0x3dbf03c,0x167d857,0x02e0b44,0x013afab } }, + /* 154 */ + { { 0x09f2c53,0x317f9d7,0x1411eb6,0x0463aba,0x0d25220,0x256b176, + 0x087633f,0x2bff322,0x07b2c1b,0x037e662 }, + { 0x10aaecb,0x23bb4a1,0x2272bb7,0x06c075a,0x09d4918,0x0736f2b, + 0x0dd511b,0x101625e,0x0a7779f,0x009ec10 } }, + /* 155 */ + { { 0x33b2eb2,0x0176dfd,0x2118904,0x022386c,0x2e0df85,0x2588c9f, + 0x1b71525,0x28fd540,0x137e4cf,0x02ce4f7 }, + { 0x3d75165,0x0c39ecf,0x3554a12,0x30af34c,0x2d66344,0x3ded408, + 0x36f1be0,0x0d065b0,0x012d046,0x0025623 } }, + /* 156 */ + { { 0x2601c3b,0x1824fc0,0x335fe08,0x3e33d70,0x0fb0252,0x252bfca, + 0x1cf2808,0x1922e55,0x1a9db9f,0x020721e }, + { 0x2f56c51,0x39a1f31,0x218c040,0x1a4fc5d,0x3fed471,0x0164d4e, + 0x388a419,0x06f1113,0x0f55fc1,0x03e8352 } }, + /* 157 */ + { { 0x1608e4d,0x3872778,0x022cbc6,0x044d60a,0x3010dda,0x15fb0b5, + 0x37ddc11,0x19f5bda,0x156b6a3,0x023a838 }, + { 0x383b3b4,0x1380bc8,0x353ca35,0x250fc07,0x169966b,0x3780f29, + 0x36632b2,0x2d6b13f,0x124fa00,0x00fd6ae } }, + /* 158 */ + { { 0x1739efb,0x2ec3656,0x2c0d337,0x3d39faf,0x1c751b0,0x04699f4, + 0x252dd64,0x095b8b6,0x0872b74,0x022f1da }, + { 0x2d3d253,0x38edca0,0x379fa5b,0x287d635,0x3a9f679,0x059d9ee, + 0x0ac168e,0x3cd3e87,0x19060fc,0x02ce1bc } }, + /* 159 */ + { { 0x3edcfc2,0x0f04d4b,0x2f0d31f,0x1898be2,0x25396bf,0x15ca230, + 0x02b4eae,0x2713668,0x0f71b06,0x0132d18 }, + { 0x38095ea,0x1ed34d6,0x3603ae6,0x165bf01,0x192bbf8,0x1852859, + 0x075f66b,0x1488f85,0x10895ef,0x014b035 } }, + /* 160 */ + { { 0x1339848,0x3084385,0x0c8d231,0x3a1c1de,0x0e87a28,0x255b85c, + 0x1de6616,0x2702e74,0x1382bb0,0x012b0f2 }, + { 0x198987d,0x381545a,0x34d619b,0x312b827,0x18b2376,0x28fe4cf, + 0x20b7651,0x017d077,0x0c7e397,0x00e0365 } }, + /* 161 */ + { { 0x1542e75,0x0d56aa0,0x39b701a,0x287b806,0x396c724,0x0935c21, + 0x3a29776,0x0debdac,0x171de26,0x00b38f8 }, + { 0x1d5bc1a,0x3fad27d,0x22b5cfe,0x1f89ddf,0x0a65560,0x144dd5b, + 0x2aac2f9,0x139353f,0x0520b62,0x00b9b36 } }, + /* 162 */ + { { 0x031c31d,0x16552e3,0x1a0c368,0x0016fc8,0x168533d,0x171e7b2, + 0x17626e7,0x275502f,0x14742c6,0x03285dd }, + { 0x2d2dbb2,0x3b6bffd,0x1d18cc6,0x2f45d2a,0x0fd0d8c,0x2915e3a, + 0x1e8793a,0x0b39a1d,0x3139cab,0x02a5da9 } }, + /* 163 */ + { { 0x3fb353d,0x147c6e4,0x3a720a6,0x22d5ff3,0x1d75cab,0x06c54a0, + 0x08cfa73,0x12666aa,0x3170a1f,0x021c829 }, + { 0x13e1b90,0x3a34dda,0x1fc38c3,0x02c5bdb,0x2d345dc,0x14aa1d0, + 0x28d00ab,0x224f23a,0x329c769,0x025c67b } }, + /* 164 */ + { { 0x0e35909,0x3bb6356,0x0116820,0x370cf77,0x29366d8,0x3881409, + 0x3999d06,0x013075f,0x176e157,0x02941ca }, + { 0x0e70b2e,0x28dfab1,0x2a8a002,0x15da242,0x084dcf6,0x116ca97, + 0x31bf186,0x1dc9735,0x09df7b7,0x0264e27 } }, + /* 165 */ + { { 0x2da7a4b,0x3023c9e,0x1366238,0x00ff4e2,0x03abe9d,0x19bd44b, + 0x272e897,0x20b91ad,0x2aa202c,0x02a2201 }, + { 0x380184e,0x08112b4,0x0b85660,0x31049aa,0x3a8cb78,0x36113c5, + 0x1670c0a,0x373f9e7,0x3fb4738,0x00010ef } }, + /* 166 */ + { { 0x2d5192e,0x26d770d,0x32af8d5,0x34d1642,0x1acf885,0x05805e0, + 0x166d0a1,0x1219a0d,0x301ba6c,0x014bcfb }, + { 0x2dcb64d,0x19cca83,0x379f398,0x08e01a0,0x10a482c,0x0103cc2, + 0x0be5fa7,0x1f9d45b,0x1899ef2,0x00ca5af } }, + /* 167 */ + { { 0x14d81d7,0x2aea251,0x1b3c476,0x3bd47ae,0x29eade7,0x0715e61, + 0x1a21cd8,0x1c7a586,0x2bfaee5,0x00ee43f }, + { 0x096f7cb,0x0c08f95,0x1bc4939,0x361fed4,0x255be41,0x26fad73, + 0x31dd489,0x02c600f,0x29d9f81,0x01ba201 } }, + /* 168 */ + { { 0x03ea1db,0x1eac46d,0x1292ce3,0x2a54967,0x20a7ff1,0x3e13c61, + 0x1b02218,0x2b44e14,0x3eadefa,0x029c88a }, + { 0x30a9144,0x31e3b0a,0x19c5a2a,0x147cbe9,0x05a0240,0x051f38e, + 0x11eca56,0x31a4247,0x123bc2a,0x02fa535 } }, + /* 169 */ + { { 0x3226ce7,0x1251782,0x0b7072f,0x11e59fa,0x2b8afd7,0x169b18f, + 0x2a46f18,0x31d9bb7,0x2fe9be8,0x01de0b7 }, + { 0x1b38626,0x34aa90f,0x3ad1760,0x21ddbd9,0x3460ae7,0x1126736, + 0x1b86fc5,0x0b92cd0,0x167a289,0x000e0e1 } }, + /* 170 */ + { { 0x1ec1a0f,0x36bbf5e,0x1c972d8,0x3f73ace,0x13bbcd6,0x23d86a5, + 0x175ffc5,0x2d083d5,0x2c4adf7,0x036f661 }, + { 0x1f39eb7,0x2a20505,0x176c81a,0x3d6e636,0x16ee2fc,0x3cbdc5f, + 0x25475dc,0x2ef4151,0x3c46860,0x0238934 } }, + /* 171 */ + { { 0x2587390,0x3639526,0x0588749,0x13c32fb,0x212bb19,0x09660f1, + 0x207da4b,0x2bf211b,0x1c4407b,0x01506a6 }, + { 0x24c8842,0x105a498,0x05ffdb2,0x0ab61b0,0x26044c1,0x3dff3d8, + 0x1d14b44,0x0d74716,0x049f57d,0x030024b } }, + /* 172 */ + { { 0x32e61ef,0x31d70f7,0x35cad3c,0x320b86c,0x07e8841,0x027ca7d, + 0x2d30d19,0x2513718,0x2347286,0x01d7901 }, + { 0x3c237d0,0x107f16e,0x01c9e7d,0x3c3b13c,0x0c9537b,0x20af54d, + 0x051a162,0x2161a47,0x258c784,0x016df2d } }, + /* 173 */ + { { 0x228ead1,0x29c2122,0x07f6964,0x023f4ed,0x1802dc5,0x19f96ce, + 0x24bfd17,0x25e866b,0x2ba8df0,0x01eb84f }, + { 0x2dd384e,0x05bbe3a,0x3f06fd2,0x366dacb,0x30361a2,0x2f36d7c, + 0x0b98784,0x38ff481,0x074e2a8,0x01e1f60 } }, + /* 174 */ + { { 0x17fbb1c,0x0975add,0x1debc5e,0x2cb2880,0x3e47bdd,0x3488cff, + 0x15e9a36,0x2121129,0x0199ef2,0x017088a }, + { 0x0315250,0x352a162,0x17c1773,0x0ae09c2,0x321b21a,0x3bd74cf, + 0x3c4ea1d,0x3cac2ad,0x3abbaf0,0x039174d } }, + /* 175 */ + { { 0x0511c8a,0x3c78d0a,0x2cd3d2d,0x322f729,0x3ebb229,0x09f0e69, + 0x0a71a76,0x2e74d5e,0x12284df,0x03b5ef0 }, + { 0x3dea561,0x0a9b7e4,0x0ed1cf2,0x237523c,0x05443f1,0x2eb48fa, + 0x3861405,0x1b49f62,0x0c945ca,0x02ab25f } }, + /* 176 */ + { { 0x16bd00a,0x13a9d28,0x3cc1eb5,0x2b7d702,0x2d839e9,0x3e6ff01, + 0x2bb7f11,0x3713824,0x3b31163,0x00c63e5 }, + { 0x30d7138,0x0316fb0,0x0220ecc,0x08eaf0c,0x244e8df,0x0088d81, + 0x37972fb,0x3fd34ae,0x2a19a84,0x03e907e } }, + /* 177 */ + { { 0x2642269,0x0b65d29,0x03bd440,0x33a6ede,0x3c81814,0x2507982, + 0x0d38e47,0x3a788e6,0x32c1d26,0x00e2eda }, + { 0x2577f87,0x392895a,0x3e1cc64,0x14f7047,0x08b52d2,0x08a01ca, + 0x336abf6,0x00697fc,0x105ce76,0x0253742 } }, + /* 178 */ + { { 0x293f92a,0x33df737,0x3315156,0x32e26d7,0x0a01333,0x26579d4, + 0x004df9c,0x0aba409,0x067d25c,0x02481de }, + { 0x3f39d44,0x1c78042,0x13d7e24,0x0825aed,0x35f2c90,0x3270f63, + 0x04b7b35,0x3ad4531,0x28bd29b,0x0207a10 } }, + /* 179 */ + { { 0x077199f,0x270aeb1,0x0dd96dd,0x3b9ad7b,0x28cb8ee,0x3903f43, + 0x37db3fe,0x292c62b,0x362dbbf,0x006e52a }, + { 0x247f143,0x0362cf3,0x216344f,0x3f18fd1,0x351e623,0x31664e0, + 0x0f270fc,0x243bbc6,0x2280555,0x001a8e3 } }, + /* 180 */ + { { 0x3355b49,0x2c04e6c,0x399b2e5,0x182d3af,0x020e265,0x09a7cf7, + 0x0ffa6bd,0x353e302,0x02083d9,0x029ecdb }, + { 0x33e8830,0x0570e86,0x1c0b64d,0x386a27e,0x0d5fcea,0x0b45a4c, + 0x2ee4a2e,0x0a8833f,0x2b4a282,0x02f9531 } }, + /* 181 */ + { { 0x191167c,0x36cf7e3,0x225ed6c,0x1e79e99,0x0517c3f,0x11ab1fd, + 0x05648f3,0x08aedc4,0x1abeae0,0x02fcc29 }, + { 0x3828a68,0x1e16fa4,0x30368e7,0x0c9fcfb,0x25161c3,0x24851ac, + 0x1b5feb5,0x344eb84,0x0de2732,0x0347208 } }, + /* 182 */ + { { 0x038b363,0x384d1e4,0x2519043,0x151ac17,0x158c11f,0x009b2b4, + 0x257abe6,0x2368d3f,0x3ed68a1,0x02df45e }, + { 0x29c2559,0x2962478,0x3d8444c,0x1d96fff,0x04f7a03,0x1391a52, + 0x0de4af7,0x3319126,0x15e6412,0x00e65ff } }, + /* 183 */ + { { 0x3d61507,0x1d1a0a2,0x0d2af20,0x354d299,0x329e132,0x2a28578, + 0x2ddfb08,0x04fa3ff,0x1293c6c,0x003bae2 }, + { 0x3e259f8,0x1a68fa9,0x3e67e9b,0x39b44f9,0x1ce1db7,0x347e9a1, + 0x3318f6a,0x2dbbc9d,0x2f8c922,0x008a245 } }, + /* 184 */ + { { 0x212ab5b,0x2b896c2,0x0136959,0x07e55ef,0x0cc1117,0x05b8ac3, + 0x18429ed,0x025fa01,0x11d6e93,0x03b016b }, + { 0x03f3708,0x2e96fab,0x1d77157,0x0d4c2d6,0x131baf9,0x0608d39, + 0x3552371,0x06cdd1e,0x1567ff1,0x01f4c50 } }, + /* 185 */ + { { 0x2dfefab,0x270173d,0x37077bd,0x1a372cd,0x1be2f22,0x28e2ee5, + 0x3ead973,0x35e8f94,0x2fc9bc1,0x03a7399 }, + { 0x36a02a1,0x2855d9b,0x00ed75a,0x37d8398,0x138c087,0x233706e, + 0x147f346,0x01947e2,0x3017228,0x0365942 } }, + /* 186 */ + { { 0x2057e60,0x2d31296,0x25e4504,0x2fa37bc,0x1cbccc3,0x1f0732f, + 0x3532081,0x2de8a98,0x19a804e,0x005359a }, + { 0x31f411a,0x2a10576,0x369c2c8,0x02fe035,0x109fbaf,0x30bddeb, + 0x1eef901,0x1662ad3,0x0410d43,0x01bd31a } }, + /* 187 */ + { { 0x2c24a96,0x1b7d3a5,0x19a3872,0x217f2f6,0x2534dbc,0x2cab8c2, + 0x066ef28,0x26aecf1,0x0fd6118,0x01310d4 }, + { 0x055b8da,0x1fdc5be,0x38a1296,0x25118f0,0x341a423,0x2ba4cd0, + 0x3e1413e,0x062d70d,0x2425a31,0x029c9b4 } }, + /* 188 */ + { { 0x08c1086,0x1acfba5,0x22e1dae,0x0f72f4e,0x3f1de50,0x0f408bc, + 0x35ed3f0,0x3ce48fc,0x282cc6c,0x004d8e7 }, + { 0x1afaa86,0x24e3ef3,0x22589ac,0x3ec9952,0x1f45bc5,0x14144ca, + 0x23b26e4,0x0d68c65,0x1e1c1a3,0x032a4d9 } }, + /* 189 */ + { { 0x03b2d20,0x16b1d53,0x241b361,0x05e4138,0x1742a54,0x32741c7, + 0x0521c4c,0x1ca96c2,0x034970b,0x02738a7 }, + { 0x13e0ad6,0x207dcdb,0x034c8cc,0x27bcbe1,0x18060da,0x33a18b6, + 0x2d1d1a6,0x2be60d7,0x3d7ab42,0x012312a } }, + /* 190 */ + { { 0x0c7485a,0x06c3310,0x0dbfd22,0x2ef949d,0x0ead455,0x098f4ba, + 0x3c76989,0x0cf2d24,0x032f67b,0x01e005f }, + { 0x30cb5ee,0x0d5da64,0x0ed2b9d,0x2503102,0x1c0d14e,0x1cbc693, + 0x37bf552,0x07013e2,0x054de5c,0x014f341 } }, + /* 191 */ + { { 0x128ccac,0x1617e97,0x346ebcd,0x158016d,0x25f823e,0x34048ea, + 0x39f0a1c,0x3ea3df1,0x1c1d3d7,0x03ba919 }, + { 0x151803b,0x01967c1,0x2f70781,0x27df39a,0x06c0b59,0x24a239c, + 0x15a7702,0x2464d06,0x2a47ae6,0x006db90 } }, + /* 192 */ + { { 0x27d04c3,0x024df3d,0x38112e8,0x38a27ba,0x01e312b,0x0965358, + 0x35d8879,0x2f4f55a,0x214187f,0x0008936 }, + { 0x05fe36f,0x2ee18c3,0x1f5f87a,0x1813bd4,0x0580f3c,0x0ed0a7b, + 0x0fb1bfb,0x3fcce59,0x2f042bf,0x01820e3 } }, + /* 193 */ + { { 0x20bbe99,0x32cbc9f,0x39ee432,0x3cc12a8,0x37bda44,0x3ea4e40, + 0x097c7a9,0x0590d7d,0x2022d33,0x018dbac }, + { 0x3ae00aa,0x3439864,0x2d2ffcf,0x3f8c6b9,0x0875a00,0x3e4e407, + 0x3658a29,0x22eb3d0,0x2b63921,0x022113b } }, + /* 194 */ + { { 0x33bae58,0x05c749a,0x1f3e114,0x1c45f8e,0x27db3df,0x06a3ab6, + 0x37bc7f8,0x1e27b34,0x3dc51fb,0x009eea0 }, + { 0x3f54de5,0x3d0e7fe,0x1a71a7d,0x02ed7f8,0x0727703,0x2ca5e92, + 0x2e8e35d,0x292ad0b,0x13487f3,0x02b6d8b } }, + /* 195 */ + { { 0x175df2a,0x05a28a8,0x32e99b1,0x13d8630,0x2082aa0,0x11ac245, + 0x24f2e71,0x322cb27,0x17675e7,0x02e643f }, + { 0x1f37313,0x2765ad3,0x0789082,0x1e742d0,0x11c2055,0x2021dc4, + 0x09ae4a7,0x346359b,0x2f94d10,0x0205c1f } }, + /* 196 */ + { { 0x3d6ff96,0x1f2ac80,0x336097d,0x3f03610,0x35b851b,0x010b6d2, + 0x0823c4d,0x2a9709a,0x2ead5a8,0x00de4b6 }, + { 0x01afa0b,0x0621965,0x3671528,0x1050b60,0x3f3e9e7,0x2f93829, + 0x0825275,0x006e85f,0x35e94b0,0x016af58 } }, + /* 197 */ + { { 0x2c4927c,0x3ea1382,0x0f23727,0x0d69f23,0x3e38860,0x2b72837, + 0x3cd5ea4,0x2d84292,0x321846a,0x016656f }, + { 0x29dfa33,0x3e182e0,0x018be90,0x2ba563f,0x2caafe2,0x218c0d9, + 0x3baf447,0x1047a6c,0x0a2d483,0x01130cb } }, + /* 198 */ + { { 0x00ed80c,0x2a5fc79,0x0a82a74,0x2c4c74b,0x15f938c,0x30b5ab6, + 0x32124b7,0x295314f,0x2fb8082,0x007c858 }, + { 0x20b173e,0x19f315c,0x12f97e4,0x198217c,0x040e8a6,0x3275977, + 0x2bc20e4,0x01f2633,0x02bc3e9,0x023c750 } }, + /* 199 */ + { { 0x3c4058a,0x24be73e,0x16704f5,0x2d8a4bd,0x3b15e14,0x3076315, + 0x1cfe37b,0x36fe715,0x343926e,0x02c6603 }, + { 0x2c76b09,0x0cf824c,0x3f7898c,0x274cec1,0x11df527,0x18eed18, + 0x08ead48,0x23915bc,0x19b3744,0x00a0a2b } }, + /* 200 */ + { { 0x0cf4ac5,0x1c8b131,0x0afb696,0x0ff7799,0x2f5ac1a,0x022420c, + 0x11baa2e,0x2ce4015,0x1275a14,0x0125cfc }, + { 0x22eac5d,0x360cd4c,0x3568e59,0x3d42f66,0x35e07ee,0x09620e4, + 0x36720fa,0x22b1eac,0x2d0db16,0x01b6b23 } }, + /* 201 */ + { { 0x1a835ef,0x1516bbb,0x2d51f7b,0x3487443,0x14aa113,0x0dd06c2, + 0x1a65e01,0x379300d,0x35920b9,0x012c8fb }, + { 0x04c7341,0x2eda00f,0x3c37e82,0x1b4fd62,0x0d45770,0x1478fba, + 0x127863a,0x26939cd,0x134ddf4,0x01375c5 } }, + /* 202 */ + { { 0x1476cd9,0x1119ca5,0x325bbf9,0x0bf8c69,0x0648d07,0x312d9f8, + 0x01c8b8f,0x136ec51,0x0002f4a,0x03f4c5c }, + { 0x195d0e1,0x10ffd22,0x29aa1cb,0x3443bdc,0x276e695,0x05e6260, + 0x15f9764,0x3cd9783,0x18c9569,0x0053eb1 } }, + /* 203 */ + { { 0x312ae18,0x280197c,0x3fc9ad9,0x303f324,0x251958d,0x29f4a11, + 0x2142408,0x3694366,0x25136ab,0x03b5f1d }, + { 0x1d4abbc,0x1c3c689,0x13ea462,0x3cfc684,0x39b5dd8,0x2d4654b, + 0x09b0755,0x27d4f18,0x3f74d2e,0x03fbf2d } }, + /* 204 */ + { { 0x2119185,0x2525eae,0x1ba4bd0,0x0c2ab11,0x1d54e8c,0x294845e, + 0x2479dea,0x3602d24,0x17e87e0,0x0060069 }, + { 0x0afffb0,0x34fe37f,0x1240073,0x02eb895,0x06cf33c,0x2d7f7ef, + 0x1d763b5,0x04191e0,0x11e1ead,0x027e3f0 } }, + /* 205 */ + { { 0x269544c,0x0e85c57,0x3813158,0x19fc12d,0x20eaf85,0x1e2930c, + 0x22a8fd2,0x1a6a478,0x09d3d3a,0x02a74e0 }, + { 0x1a2da3b,0x30b0b16,0x0847936,0x3d86257,0x138ccbc,0x0f5421a, + 0x25244e6,0x23bdd79,0x1aee117,0x00c01ae } }, + /* 206 */ + { { 0x1eead28,0x07cac32,0x1fbc0bb,0x17627d3,0x17eef63,0x0b3a24e, + 0x0757fdb,0x3dd841d,0x3d745f8,0x002ae17 }, + { 0x25b4549,0x29f24cf,0x2f21ecd,0x1725e48,0x04be2bb,0x10ee010, + 0x1a1274b,0x10b0898,0x27511e9,0x02c48b5 } }, + /* 207 */ + { { 0x2a5ae7a,0x181ef99,0x0be33be,0x3e9dab7,0x101e703,0x3adb971, + 0x1043014,0x2ebb2be,0x1c1097d,0x027d667 }, + { 0x3f250ed,0x16dc603,0x20dc6d7,0x1d0d268,0x38eb915,0x02c89e8, + 0x1605a41,0x12de109,0x0e08a29,0x01f554a } }, + /* 208 */ + { { 0x0c26def,0x163d988,0x2d1ef0f,0x3a960ac,0x1025585,0x0738e20, + 0x27d79b0,0x05cc3ef,0x201303f,0x00a333a }, + { 0x1644ba5,0x2af345e,0x30b8d1d,0x3a01bff,0x31fc643,0x1acf85e, + 0x0a76fc6,0x04efe98,0x348a1d0,0x03062eb } }, + /* 209 */ + { { 0x1c4216d,0x18e3217,0x02ac34e,0x19c8185,0x200c010,0x17d4192, + 0x13a1719,0x165af51,0x09db7a9,0x0277be0 }, + { 0x3ab8d2c,0x2190b99,0x22b641e,0x0cd88de,0x3b42404,0x1310862, + 0x106a6d6,0x23395f5,0x0b06880,0x000d5fe } }, + /* 210 */ + { { 0x0d2cc88,0x36f9913,0x339d8e9,0x237c2e3,0x0cc61c2,0x34c2832, + 0x309874c,0x2621d28,0x2dd1b48,0x0392806 }, + { 0x17cd8f9,0x07bab3d,0x0c482ed,0x0faf565,0x31b767d,0x2f4bde1, + 0x295c717,0x330c29c,0x179ce10,0x0119b5f } }, + /* 211 */ + { { 0x1ada2c7,0x0c624a7,0x227d47d,0x30e3e6a,0x14fa0a6,0x0829678, + 0x24fd288,0x2b46a43,0x122451e,0x0319ca9 }, + { 0x186b655,0x01f3217,0x0af1306,0x0efe6b5,0x2f0235d,0x1c45ca9, + 0x2086805,0x1d44e66,0x0faf2a6,0x0178f59 } }, + /* 212 */ + { { 0x33b4416,0x10431e6,0x2d99aa6,0x217aac9,0x0cd8fcf,0x2d95a9d, + 0x3ff74ad,0x10bf17a,0x295eb8e,0x01b229e }, + { 0x02a63bd,0x182e9ec,0x004710c,0x00e2e3c,0x06b2f23,0x04b642c, + 0x2c37383,0x32a4631,0x022ad82,0x00d22b9 } }, + /* 213 */ + { { 0x0cda2fb,0x1d198d7,0x26d27f4,0x286381c,0x022acca,0x24ac7c8, + 0x2df7824,0x0b4ba16,0x1e0d9ef,0x03041d3 }, + { 0x29a65b3,0x0f3912b,0x151bfcf,0x2b0175c,0x0fd71e4,0x39aa5e2, + 0x311f50c,0x13ff351,0x3dbc9e5,0x03eeb7e } }, + /* 214 */ + { { 0x0a99363,0x0fc7348,0x2775171,0x23db3c8,0x2b91565,0x134d66c, + 0x0175cd2,0x1bf365a,0x2b48371,0x02dfe5d }, + { 0x16dbf74,0x2389357,0x2f36575,0x3f5c70e,0x38d23ba,0x090f7f8, + 0x3477600,0x3201523,0x32ecafc,0x03d3506 } }, + /* 215 */ + { { 0x1abd48d,0x073ca3f,0x38a451f,0x0d8cb01,0x1ce81be,0x05c51ba, + 0x0e29741,0x03c41ab,0x0eae016,0x0060209 }, + { 0x2e58358,0x1da62d9,0x2358038,0x14b39b2,0x1635687,0x39079b1, + 0x380e345,0x1b49608,0x23983cf,0x019f97d } }, + /* 216 */ + { { 0x34899ef,0x332e373,0x04c0f89,0x3c27aed,0x1949015,0x09663b2, + 0x2f9276b,0x07f1951,0x09a04c1,0x027fbde }, + { 0x3d2a071,0x19fb3d4,0x1b096d3,0x1fe9146,0x3b10e1a,0x0478bbb, + 0x2b3fb06,0x1388329,0x181a99c,0x02f2030 } }, + /* 217 */ + { { 0x1eb82e6,0x14dbe39,0x3920972,0x31fd5b2,0x21a484f,0x02d7697, + 0x0e21715,0x37c431e,0x2629f8c,0x01249c3 }, + { 0x26b50ad,0x26deefa,0x0ffc1a3,0x30688e2,0x39a0284,0x041c65e, + 0x03eb178,0x0bdfd50,0x2f96137,0x034bb94 } }, + /* 218 */ + { { 0x0e0362a,0x334a162,0x194dd37,0x29e3e97,0x2442fa8,0x10d2949, + 0x3836e5a,0x2dccebf,0x0bee5ab,0x037ed1e }, + { 0x33eede6,0x3c739d9,0x2f04a91,0x350ad6c,0x3a5390a,0x14c368b, + 0x26f7bf5,0x11ce979,0x0b408df,0x0366850 } }, + /* 219 */ + { { 0x28ea498,0x0886d5b,0x2e090e0,0x0a4d58f,0x2623478,0x0d74ab7, + 0x2b83913,0x12c6b81,0x18d623f,0x01d8301 }, + { 0x198aa79,0x26d6330,0x3a7f0b8,0x34bc1ea,0x2f74890,0x378955a, + 0x204110f,0x0102538,0x02d8f19,0x01c5066 } }, + /* 220 */ + { { 0x14b0f45,0x2838cd3,0x14e16f0,0x0e0e4aa,0x2d9280b,0x0f18757, + 0x3324c6b,0x1391ceb,0x1ce89d5,0x00ebe74 }, + { 0x0930371,0x3de6048,0x3097fd8,0x1308705,0x3eda266,0x3108c26, + 0x1545dcd,0x1f7583a,0x1c37395,0x02c7e05 } }, + /* 221 */ + { { 0x1fec44a,0x2a9e3a2,0x0caf84f,0x11cf2a9,0x0c8c2ae,0x06da989, + 0x1c807dc,0x3c149a4,0x1141543,0x02906bb }, + { 0x15ffe04,0x0d4e65f,0x2e20424,0x37d896d,0x18bacb2,0x1e05ddd, + 0x1660be8,0x183be17,0x1dd86fb,0x035ba70 } }, + /* 222 */ + { { 0x2853264,0x0ba5fb1,0x0a0b3aa,0x2df88c1,0x2771533,0x23aba6f, + 0x112bb7b,0x3e3086e,0x210ae9b,0x027271b }, + { 0x030b74c,0x0269678,0x1e90a23,0x135a98c,0x24ed749,0x126de7c, + 0x344b23a,0x186da27,0x19640fa,0x0159af5 } }, + /* 223 */ + { { 0x18061f3,0x3004630,0x3c70066,0x34df20f,0x1190b25,0x1c9cc91, + 0x1fc8e02,0x0d17bc1,0x390f525,0x033cb1c }, + { 0x0eb30cf,0x2f3ad04,0x303aa09,0x2e835dd,0x1cfd2eb,0x143fc95, + 0x02c43a1,0x025e7a1,0x3558aa2,0x000bd45 } }, + /* 224 */ + { { 0x1db7d07,0x3bde52b,0x1500396,0x1089115,0x20b4fc7,0x1e2a8f3, + 0x3f8eacc,0x365f7eb,0x1a5e8d4,0x0053a6b }, + { 0x37079e2,0x120284b,0x000edaa,0x33792c2,0x145baa3,0x20e055f, + 0x365e2d7,0x26ba005,0x3ab8e9d,0x0282b53 } }, + /* 225 */ + { { 0x2653618,0x2dd8852,0x2a5f0bf,0x0f0c7aa,0x2187281,0x1252757, + 0x13e7374,0x3b47855,0x0b86e56,0x02f354c }, + { 0x2e9c47b,0x2fa14cc,0x19ab169,0x3fad401,0x0dc2776,0x24afeed, + 0x3a97611,0x0d07736,0x3cf6979,0x02424a0 } }, + /* 226 */ + { { 0x2e81a13,0x000c91d,0x123967b,0x265885c,0x29bee1a,0x0cb8675, + 0x2d361bd,0x1526823,0x3c9ace1,0x00d7bad }, + { 0x24e5bdc,0x02b969f,0x2c6e128,0x34edb3b,0x12dcd2c,0x3899af0, + 0x24224c6,0x3a1914b,0x0f4448a,0x026a2cb } }, + /* 227 */ + { { 0x1d03b59,0x1c6fc82,0x32abf64,0x28ed96b,0x1c90e62,0x2f57bb2, + 0x3ff168e,0x04de7fd,0x0f4d449,0x01af6d8 }, + { 0x255bc30,0x2bfaf22,0x3fe0dad,0x0584025,0x1c79ead,0x3078ef7, + 0x2197414,0x022a50b,0x0fd94ba,0x0007b0f } }, + /* 228 */ + { { 0x09485c2,0x09dfaf7,0x10c7ba6,0x1e48bec,0x248cc9a,0x028a362, + 0x21d60f7,0x193d93d,0x1c04754,0x0346b2c }, + { 0x2f36612,0x240ac49,0x0d8bd26,0x13b8186,0x259c3a4,0x020d5fb, + 0x38a8133,0x09b0937,0x39d4056,0x01f7341 } }, + /* 229 */ + { { 0x05a4b48,0x1f534fc,0x07725ce,0x148dc8c,0x2adcd29,0x04aa456, + 0x0f79718,0x066e346,0x189377d,0x002fd4d }, + { 0x068ea73,0x336569b,0x184d35e,0x32a08e9,0x3c7f3bb,0x11ce9c8, + 0x3674c6f,0x21bf27e,0x0d9e166,0x034a2f9 } }, + /* 230 */ + { { 0x0fa8e4b,0x2e6418e,0x18fc5d2,0x1ba24ff,0x0559f18,0x0dbedbf, + 0x2de2aa4,0x22338e9,0x3aa510f,0x035d801 }, + { 0x23a4988,0x02aad94,0x02732d1,0x111d374,0x0b455cf,0x0d01c9e, + 0x067082a,0x2ec05fd,0x368b303,0x03cad4b } }, + /* 231 */ + { { 0x035b4ca,0x1fabea6,0x1cbc0d5,0x3f2ed9a,0x02d2232,0x1990c66, + 0x2eb680c,0x3b4ea3b,0x18ecc5a,0x03636fa }, + { 0x1a02709,0x26f8ff1,0x1fa8cba,0x397d6e8,0x230be68,0x043aa14, + 0x3d43cdf,0x25c17fa,0x3a3ee55,0x0380564 } }, + /* 232 */ + { { 0x275a0a6,0x16bd43a,0x0033d3e,0x2b15e16,0x2512226,0x005d901, + 0x26d50fd,0x3bc19bf,0x3b1aeb8,0x02bfb01 }, + { 0x0bb0a31,0x26559e0,0x1aae7fb,0x330dcc2,0x16f1af3,0x06afce2, + 0x13a15a0,0x2ff7645,0x3546e2d,0x029c6e4 } }, + /* 233 */ + { { 0x0f593d2,0x384b806,0x122bbf8,0x0a281e0,0x1d1a904,0x2e93cab, + 0x0505db0,0x08f6454,0x05c6285,0x014e880 }, + { 0x3f2b935,0x22d8e79,0x161a07c,0x16b060a,0x02bff97,0x146328b, + 0x3ceea77,0x238f61a,0x19b3d58,0x02fd1f4 } }, + /* 234 */ + { { 0x17665d5,0x259e9f7,0x0de5672,0x15cbcbd,0x34e3030,0x035240f, + 0x0005ae8,0x286d851,0x07f39c9,0x000070b }, + { 0x1efc6d6,0x2a0051a,0x2724143,0x2a9ef1e,0x0c810bd,0x1e05429, + 0x25670ba,0x2e66d7d,0x0e786ff,0x03f6b7e } }, + /* 235 */ + { { 0x3c00785,0x232e23f,0x2b67fd3,0x244ed23,0x077fa75,0x3cda3ef, + 0x14d055b,0x0f25011,0x24d5aa4,0x00ea0e3 }, + { 0x297bb9a,0x198ca4f,0x14d9561,0x18d1076,0x39eb933,0x2b6caa0, + 0x1591a60,0x0768d45,0x257873e,0x00f36e0 } }, + /* 236 */ + { { 0x1e77eab,0x0502a5f,0x0109137,0x0350592,0x3f7e1c5,0x3ac7437, + 0x2dcad2c,0x1fee9d8,0x089f1f5,0x0169833 }, + { 0x0d45673,0x0d8e090,0x065580b,0x065644f,0x11b82be,0x3592dd0, + 0x3284b8d,0x23f0015,0x16fdbfd,0x0248bfd } }, + /* 237 */ + { { 0x1a129a1,0x1977bb2,0x0e041b2,0x15f30a1,0x0a5b1ce,0x3afef8f, + 0x380c46c,0x3358810,0x27df6c5,0x01ca466 }, + { 0x3b90f9a,0x3d14ea3,0x031b298,0x02e2390,0x2d719c0,0x25bc615, + 0x2c0e777,0x0226b8c,0x3803624,0x0179e45 } }, + /* 238 */ + { { 0x363cdfb,0x1bb155f,0x24fd5c1,0x1c7c72b,0x28e6a35,0x18165f2, + 0x226bea5,0x0beaff3,0x371e24c,0x0138294 }, + { 0x1765357,0x29034e9,0x22b4276,0x11035ce,0x23c89af,0x074468c, + 0x3370ae4,0x013bae3,0x018d566,0x03d7fde } }, + /* 239 */ + { { 0x209df21,0x0f8ff86,0x0e47fbf,0x23b99ba,0x126d5d2,0x2722405, + 0x16bd0a2,0x1799082,0x0e9533f,0x039077c }, + { 0x3ba9e3f,0x3f6902c,0x1895305,0x3ac9813,0x3f2340c,0x3c0d9f1, + 0x26e1927,0x0557c21,0x16eac4f,0x023b75f } }, + /* 240 */ + { { 0x3fc8ff3,0x0770382,0x342fc9a,0x0afa4db,0x314efd8,0x328e07b, + 0x016f7cc,0x3ba599c,0x1caed8a,0x0050cb0 }, + { 0x0b23c26,0x2120a5c,0x3273ec6,0x1cc1cd6,0x2a64fe8,0x2bbc3d6, + 0x09f6e5e,0x34b1b8e,0x00b5ac8,0x032bbd2 } }, + /* 241 */ + { { 0x1315922,0x1725e1d,0x0ca5524,0x1c4c18f,0x3d82951,0x193bcb2, + 0x0e60d0b,0x388dbcf,0x37e8efa,0x0342e85 }, + { 0x1b3af60,0x26ba3ec,0x220e53a,0x394f4b6,0x01a796a,0x3e7bbca, + 0x163605d,0x2b85807,0x17c1c54,0x03cc725 } }, + /* 242 */ + { { 0x1cc4597,0x1635492,0x2028c0f,0x2c2eb82,0x2dc5015,0x0d2a052, + 0x05fc557,0x1f0ebbf,0x0cb96e1,0x0004d01 }, + { 0x1a824bf,0x3896172,0x2ed7b29,0x178007a,0x0d59318,0x07bda2b, + 0x2ee6826,0x0f9b235,0x04b9193,0x01bcddf } }, + /* 243 */ + { { 0x0333fd2,0x0eeb46a,0x15b89f9,0x00968aa,0x2a89302,0x2bdd6b3, + 0x1e5037e,0x2541884,0x24ed2d0,0x01b6e8f }, + { 0x04399cd,0x3be6334,0x3adea48,0x1bb9adc,0x31811c6,0x05fb2bc, + 0x360752c,0x3d29dcb,0x3423bec,0x03c4f3c } }, + /* 244 */ + { { 0x119e2eb,0x2e7b02a,0x0f68cee,0x257d8b0,0x183a9a1,0x2ae88a6, + 0x3a3bb67,0x2eb4f3e,0x1a9274b,0x0320fea }, + { 0x2fa1ce0,0x346c2d8,0x2fbf0d7,0x3d4d063,0x0e58b60,0x09c1bc1, + 0x28ef9e5,0x09a0efe,0x0f45d70,0x02d275c } }, + /* 245 */ + { { 0x2d5513b,0x31d443e,0x1e2d914,0x3b2c5d4,0x105f32e,0x27ee756, + 0x050418d,0x3c73db6,0x1bb0c30,0x01673eb }, + { 0x1cb7fd6,0x1eb08d5,0x26a3e16,0x2e20810,0x0249367,0x029e219, + 0x2ec58c9,0x12d9fab,0x362354a,0x016eafc } }, + /* 246 */ + { { 0x2424865,0x260747b,0x177f37c,0x1e3cb95,0x08b0028,0x2783016, + 0x2970f1b,0x323c1c0,0x2a79026,0x0186231 }, + { 0x0f244da,0x26866f4,0x087306f,0x173ec20,0x31ecced,0x3c84d8d, + 0x070f9b9,0x2e764d5,0x075df50,0x0264ff9 } }, + /* 247 */ + { { 0x32c3609,0x0c737e6,0x14ea68e,0x300b11b,0x184eb19,0x29dd440, + 0x09ec1a9,0x185adeb,0x0664c80,0x0207dd9 }, + { 0x1fbe978,0x30a969d,0x33561d7,0x34fc60e,0x36743fe,0x00774af, + 0x0d1f045,0x018360e,0x12a5fe9,0x01592a0 } }, + /* 248 */ + { { 0x2817d1d,0x2993d3e,0x2e0f7a5,0x112faa0,0x255f968,0x355fe6a, + 0x3f5a0fc,0x075b2d7,0x3cf00e5,0x0089afc }, + { 0x32833cf,0x06a7e4b,0x09a8d6d,0x1693d3e,0x320a0a3,0x3cfdfdd, + 0x136c498,0x1e0d845,0x347ff25,0x01a1de7 } }, + /* 249 */ + { { 0x3043d08,0x030705c,0x20fa79b,0x1d07f00,0x0a54467,0x29b49b4, + 0x367e289,0x0b82f4d,0x0d1eb09,0x025ef2c }, + { 0x32ed3c3,0x1baaa3c,0x3c482ab,0x146ca06,0x3c8a4f1,0x3e85e3c, + 0x1bf4f3b,0x1195534,0x3e80a78,0x02a1cbf } }, + /* 250 */ + { { 0x32b2086,0x2de4d68,0x3486b1a,0x03a0583,0x2e1eb71,0x2dab9af, + 0x10cd913,0x28daa6f,0x3fcb732,0x000a04a }, + { 0x3605318,0x3f5f2b3,0x2d1da63,0x143f7f5,0x1646e5d,0x040b586, + 0x1683982,0x25abe87,0x0c9fe53,0x001ce47 } }, + /* 251 */ + { { 0x380d02b,0x055fc22,0x3f7fc50,0x3458a1d,0x26b8333,0x23550ab, + 0x0a1af87,0x0a821eb,0x2dc7e6d,0x00d574a }, + { 0x07386e1,0x3ccd68a,0x3275b41,0x253e390,0x2fd272a,0x1e6627a, + 0x2ca2cde,0x0e9e4a1,0x1e37c2a,0x00f70ac } }, + /* 252 */ + { { 0x0581352,0x2748701,0x02bed68,0x094dd9e,0x30a00c8,0x3fb5c07, + 0x3bd5909,0x211ac80,0x1103ccd,0x0311e1a }, + { 0x0c768ed,0x29dc209,0x36575db,0x009a107,0x272feea,0x2b33383, + 0x313ed56,0x134c9cc,0x168d5bb,0x033310a } }, + /* 253 */ + { { 0x17620b9,0x143784f,0x256a94e,0x229664a,0x1d89a5c,0x1d521f2, + 0x0076406,0x1c73f70,0x342aa48,0x03851fa }, + { 0x0f3ae46,0x2ad3bab,0x0fbe274,0x3ed40d4,0x2fd4936,0x232103a, + 0x2afe474,0x25b8f7c,0x047080e,0x008e6b0 } }, + /* 254 */ + { { 0x3fee8d4,0x347cd4a,0x0fec481,0x33fe9ec,0x0ce80b5,0x33a6bcf, + 0x1c4c9e2,0x3967441,0x1a3f5f7,0x03157e8 }, + { 0x257c227,0x1bc53a0,0x200b318,0x0fcd0af,0x2c5b165,0x2a413ec, + 0x2fc998a,0x2da6426,0x19cd4f4,0x0025336 } }, + /* 255 */ + { { 0x303beba,0x2072135,0x32918a9,0x140cb3a,0x08631d1,0x0ef527b, + 0x05f2c9e,0x2b4ce91,0x0b642ab,0x02e428c }, + { 0x0a5abf9,0x15013ed,0x3603b46,0x30dd76d,0x3004750,0x28d7627, + 0x1a42ccc,0x093ddbe,0x39a1b79,0x00067e2 } }, +}; + +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_base_10(sp_point_256* r, const sp_digit* k, + int map, void* heap) +{ + return sp_256_ecc_mulmod_stripe_10(r, &p256_base, p256_table, + k, map, heap); +} + +#endif + +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 p; + sp_digit kd[10]; +#endif + sp_point_256* point; + sp_digit* k = NULL; + int err = MP_OKAY; + + err = sp_256_point_new_10(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 10, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) { + err = MEMORY_E; + } + } +#else + k = kd; +#endif + if (err == MP_OKAY) { + sp_256_from_mp(k, 10, km); + + err = sp_256_ecc_mulmod_base_10(point, k, map, heap); + } + if (err == MP_OKAY) { + err = sp_256_point_to_ecc_point_10(point, r); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_10(point, 0, heap); + + return err; +} + +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_256_iszero_10(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8] | a[9]) == 0; +} + +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */ +/* Add 1 to a. (a = a + 1) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_256_add_one_10(sp_digit* a) +{ + a[0]++; + sp_256_norm_10(a); +} + +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_256_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((sp_digit)a[i]) << s); + if (s >= 18U) { + r[j] &= 0x3ffffff; + s = 26U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (sp_digit)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Generates a scalar that is in the range 1..order-1. + * + * rng Random number generator. + * k Scalar value. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +static int sp_256_ecc_gen_k_10(WC_RNG* rng, sp_digit* k) +{ + int err; + byte buf[32]; + + do { + err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf)); + if (err == 0) { + sp_256_from_bin(k, 10, buf, (int)sizeof(buf)); + if (sp_256_cmp_10(k, p256_order2) < 0) { + sp_256_add_one_10(k); + break; + } + } + } + while (err == 0); + + return err; +} + +/* Makes a random EC key pair. + * + * rng Random number generator. + * priv Generated private value. + * pub Generated public point. + * heap Heap to use for allocation. + * returns ECC_INF_E when the point does not have the correct order, RNG + * failures, MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 p; + sp_digit kd[10]; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_256 inf; +#endif +#endif + sp_point_256* point; + sp_digit* k = NULL; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_256* infinity; +#endif + int err; + + (void)heap; + + err = sp_256_point_new_10(heap, p, point); +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + if (err == MP_OKAY) { + err = sp_256_point_new_10(heap, inf, infinity); + } +#endif +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 10, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) { + err = MEMORY_E; + } + } +#else + k = kd; +#endif + + if (err == MP_OKAY) { + err = sp_256_ecc_gen_k_10(rng, k); + } + if (err == MP_OKAY) { + err = sp_256_ecc_mulmod_base_10(point, k, 1, NULL); + } + +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + if (err == MP_OKAY) { + err = sp_256_ecc_mulmod_10(infinity, point, p256_order, 1, NULL); + } + if (err == MP_OKAY) { + if ((sp_256_iszero_10(point->x) == 0) || (sp_256_iszero_10(point->y) == 0)) { + err = ECC_INF_E; + } + } +#endif + + if (err == MP_OKAY) { + err = sp_256_to_mp(k, priv); + } + if (err == MP_OKAY) { + err = sp_256_point_to_ecc_point_10(point, pub); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_256_point_free_10(infinity, 1, heap); +#endif + sp_256_point_free_10(point, 1, heap); + + return err; +} + +#ifdef HAVE_ECC_DHE +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 32 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_256_to_bin(sp_digit* r, byte* a) +{ + int i, j, s = 0, b; + + for (i=0; i<9; i++) { + r[i+1] += r[i] >> 26; + r[i] &= 0x3ffffff; + } + j = 256 / 8 - 1; + a[j] = 0; + for (i=0; i<10 && j>=0; i++) { + b = 0; + /* lint allow cast of mismatch sp_digit and int */ + a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ + b += 8 - s; + if (j < 0) { + break; + } + while (b < 26) { + a[j--] = (byte)(r[i] >> b); + b += 8; + if (j < 0) { + break; + } + } + s = 8 - (b - 26); + if (j >= 0) { + a[j] = 0; + } + if (s != 0) { + j++; + } + } +} + +/* Multiply the point by the scalar and serialize the X ordinate. + * The number is 0 padded to maximum size on output. + * + * priv Scalar to multiply the point by. + * pub Point to multiply. + * out Buffer to hold X ordinate. + * outLen On entry, size of the buffer in bytes. + * On exit, length of data in buffer in bytes. + * heap Heap to use for allocation. + * returns BUFFER_E if the buffer is to small for output size, + * MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out, + word32* outLen, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 p; + sp_digit kd[10]; +#endif + sp_point_256* point = NULL; + sp_digit* k = NULL; + int err = MP_OKAY; + + if (*outLen < 32U) { + err = BUFFER_E; + } + + if (err == MP_OKAY) { + err = sp_256_point_new_10(heap, p, point); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 10, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#else + k = kd; +#endif + + if (err == MP_OKAY) { + sp_256_from_mp(k, 10, priv); + sp_256_point_from_ecc_point_10(point, pub); + err = sp_256_ecc_mulmod_10(point, point, k, 1, heap); + } + if (err == MP_OKAY) { + sp_256_to_bin(point->x, out); + *outLen = 32; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_10(point, 0, heap); + + return err; +} +#endif /* HAVE_ECC_DHE */ + +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* Multiply a by scalar b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_256_mul_d_10(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int64_t tb = b; + int64_t t = 0; + int i; + + for (i = 0; i < 10; i++) { + t += tb * a[i]; + r[i] = t & 0x3ffffff; + t >>= 26; + } + r[10] = (sp_digit)t; +#else + int64_t tb = b; + int64_t t[10]; + + t[ 0] = tb * a[ 0]; + t[ 1] = tb * a[ 1]; + t[ 2] = tb * a[ 2]; + t[ 3] = tb * a[ 3]; + t[ 4] = tb * a[ 4]; + t[ 5] = tb * a[ 5]; + t[ 6] = tb * a[ 6]; + t[ 7] = tb * a[ 7]; + t[ 8] = tb * a[ 8]; + t[ 9] = tb * a[ 9]; + r[ 0] = (t[ 0] & 0x3ffffff); + r[ 1] = (sp_digit)(t[ 0] >> 26) + (t[ 1] & 0x3ffffff); + r[ 2] = (sp_digit)(t[ 1] >> 26) + (t[ 2] & 0x3ffffff); + r[ 3] = (sp_digit)(t[ 2] >> 26) + (t[ 3] & 0x3ffffff); + r[ 4] = (sp_digit)(t[ 3] >> 26) + (t[ 4] & 0x3ffffff); + r[ 5] = (sp_digit)(t[ 4] >> 26) + (t[ 5] & 0x3ffffff); + r[ 6] = (sp_digit)(t[ 5] >> 26) + (t[ 6] & 0x3ffffff); + r[ 7] = (sp_digit)(t[ 6] >> 26) + (t[ 7] & 0x3ffffff); + r[ 8] = (sp_digit)(t[ 7] >> 26) + (t[ 8] & 0x3ffffff); + r[ 9] = (sp_digit)(t[ 8] >> 26) + (t[ 9] & 0x3ffffff); + r[10] = (sp_digit)(t[ 9] >> 26); +#endif /* WOLFSSL_SP_SMALL */ +} + +#ifdef WOLFSSL_SP_DIV_32 +static WC_INLINE sp_digit sp_256_div_word_10(sp_digit d1, sp_digit d0, + sp_digit dv) +{ + sp_digit d, r, t; + + /* All 26 bits from d1 and top 5 bits from d0. */ + d = (d1 << 5) | (d0 >> 21); + r = d / dv; + d -= r * dv; + /* Up to 6 bits in r */ + /* Next 5 bits from d0. */ + r <<= 5; + d <<= 5; + d |= (d0 >> 16) & ((1 << 5) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 11 bits in r */ + /* Next 5 bits from d0. */ + r <<= 5; + d <<= 5; + d |= (d0 >> 11) & ((1 << 5) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 16 bits in r */ + /* Next 5 bits from d0. */ + r <<= 5; + d <<= 5; + d |= (d0 >> 6) & ((1 << 5) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 21 bits in r */ + /* Next 5 bits from d0. */ + r <<= 5; + d <<= 5; + d |= (d0 >> 1) & ((1 << 5) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 26 bits in r */ + /* Remaining 1 bits from d0. */ + r <<= 1; + d <<= 1; + d |= d0 & ((1 << 1) - 1); + t = d / dv; + r += t; + + return r; +} +#endif /* WOLFSSL_SP_DIV_32 */ + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Number to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_256_div_10(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + int i; +#ifndef WOLFSSL_SP_DIV_32 + int64_t d1; +#endif + sp_digit dv, r1; +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* td; +#else + sp_digit t1d[20], t2d[10 + 1]; +#endif + sp_digit* t1; + sp_digit* t2; + int err = MP_OKAY; + + (void)m; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 10 + 1), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = td; + t2 = td + 2 * 10; +#else + t1 = t1d; + t2 = t2d; +#endif + + dv = d[9]; + XMEMCPY(t1, a, sizeof(*t1) * 2U * 10U); + for (i=9; i>=0; i--) { + t1[10 + i] += t1[10 + i - 1] >> 26; + t1[10 + i - 1] &= 0x3ffffff; +#ifndef WOLFSSL_SP_DIV_32 + d1 = t1[10 + i]; + d1 <<= 26; + d1 += t1[10 + i - 1]; + r1 = (sp_digit)(d1 / dv); +#else + r1 = sp_256_div_word_10(t1[10 + i], t1[10 + i - 1], dv); +#endif + + sp_256_mul_d_10(t2, d, r1); + (void)sp_256_sub_10(&t1[i], &t1[i], t2); + t1[10 + i] -= t2[10]; + t1[10 + i] += t1[10 + i - 1] >> 26; + t1[10 + i - 1] &= 0x3ffffff; + r1 = (((-t1[10 + i]) << 26) - t1[10 + i - 1]) / dv; + r1++; + sp_256_mul_d_10(t2, d, r1); + (void)sp_256_add_10(&t1[i], &t1[i], t2); + t1[10 + i] += t1[10 + i - 1] >> 26; + t1[10 + i - 1] &= 0x3ffffff; + } + t1[10 - 1] += t1[10 - 2] >> 26; + t1[10 - 2] &= 0x3ffffff; + r1 = t1[10 - 1] / dv; + + sp_256_mul_d_10(t2, d, r1); + (void)sp_256_sub_10(t1, t1, t2); + XMEMCPY(r, t1, sizeof(*r) * 2U * 10U); + for (i=0; i<8; i++) { + r[i+1] += r[i] >> 26; + r[i] &= 0x3ffffff; + } + sp_256_cond_add_10(r, r, d, 0 - ((r[9] < 0) ? + (sp_digit)1 : (sp_digit)0)); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_256_mod_10(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_256_div_10(a, m, NULL, r); +} + +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#ifdef WOLFSSL_SP_SMALL +/* Order-2 for the P256 curve. */ +static const uint32_t p256_order_minus_2[8] = { + 0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU,0xffffffffU,0xffffffffU, + 0x00000000U,0xffffffffU +}; +#else +/* The low half of the order-2 of the P256 curve. */ +static const uint32_t p256_order_low[4] = { + 0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU +}; +#endif /* WOLFSSL_SP_SMALL */ + +/* Multiply two number mod the order of P256 curve. (r = a * b mod order) + * + * r Result of the multiplication. + * a First operand of the multiplication. + * b Second operand of the multiplication. + */ +static void sp_256_mont_mul_order_10(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_256_mul_10(r, a, b); + sp_256_mont_reduce_order_10(r, p256_order, p256_mp_order); +} + +/* Square number mod the order of P256 curve. (r = a * a mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_256_mont_sqr_order_10(sp_digit* r, const sp_digit* a) +{ + sp_256_sqr_10(r, a); + sp_256_mont_reduce_order_10(r, p256_order, p256_mp_order); +} + +#ifndef WOLFSSL_SP_SMALL +/* Square number mod the order of P256 curve a number of times. + * (r = a ^ n mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_256_mont_sqr_n_order_10(sp_digit* r, const sp_digit* a, int n) +{ + int i; + + sp_256_mont_sqr_order_10(r, a); + for (i=1; i=0; i--) { + sp_256_mont_sqr_order_10(t, t); + if ((p256_order_minus_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_256_mont_mul_order_10(t, t, a); + } + } + XMEMCPY(r, t, sizeof(sp_digit) * 10U); +#else + sp_digit* t = td; + sp_digit* t2 = td + 2 * 10; + sp_digit* t3 = td + 4 * 10; + int i; + + /* t = a^2 */ + sp_256_mont_sqr_order_10(t, a); + /* t = a^3 = t * a */ + sp_256_mont_mul_order_10(t, t, a); + /* t2= a^c = t ^ 2 ^ 2 */ + sp_256_mont_sqr_n_order_10(t2, t, 2); + /* t3= a^f = t2 * t */ + sp_256_mont_mul_order_10(t3, t2, t); + /* t2= a^f0 = t3 ^ 2 ^ 4 */ + sp_256_mont_sqr_n_order_10(t2, t3, 4); + /* t = a^ff = t2 * t3 */ + sp_256_mont_mul_order_10(t, t2, t3); + /* t3= a^ff00 = t ^ 2 ^ 8 */ + sp_256_mont_sqr_n_order_10(t2, t, 8); + /* t = a^ffff = t2 * t */ + sp_256_mont_mul_order_10(t, t2, t); + /* t2= a^ffff0000 = t ^ 2 ^ 16 */ + sp_256_mont_sqr_n_order_10(t2, t, 16); + /* t = a^ffffffff = t2 * t */ + sp_256_mont_mul_order_10(t, t2, t); + /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64 */ + sp_256_mont_sqr_n_order_10(t2, t, 64); + /* t2= a^ffffffff00000000ffffffff = t2 * t */ + sp_256_mont_mul_order_10(t2, t2, t); + /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32 */ + sp_256_mont_sqr_n_order_10(t2, t2, 32); + /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */ + sp_256_mont_mul_order_10(t2, t2, t); + /* t2= a^ffffffff00000000ffffffffffffffffbce6 */ + for (i=127; i>=112; i--) { + sp_256_mont_sqr_order_10(t2, t2); + if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_256_mont_mul_order_10(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6f */ + sp_256_mont_sqr_n_order_10(t2, t2, 4); + sp_256_mont_mul_order_10(t2, t2, t3); + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */ + for (i=107; i>=64; i--) { + sp_256_mont_sqr_order_10(t2, t2); + if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_256_mont_mul_order_10(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */ + sp_256_mont_sqr_n_order_10(t2, t2, 4); + sp_256_mont_mul_order_10(t2, t2, t3); + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */ + for (i=59; i>=32; i--) { + sp_256_mont_sqr_order_10(t2, t2); + if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_256_mont_mul_order_10(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */ + sp_256_mont_sqr_n_order_10(t2, t2, 4); + sp_256_mont_mul_order_10(t2, t2, t3); + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */ + for (i=27; i>=0; i--) { + sp_256_mont_sqr_order_10(t2, t2); + if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_256_mont_mul_order_10(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */ + sp_256_mont_sqr_n_order_10(t2, t2, 4); + /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */ + sp_256_mont_mul_order_10(r, t2, t3); +#endif /* WOLFSSL_SP_SMALL */ +} + +#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */ +#ifdef HAVE_ECC_SIGN +#ifndef SP_ECC_MAX_SIG_GEN +#define SP_ECC_MAX_SIG_GEN 64 +#endif + +/* Sign the hash using the private key. + * e = [hash, 256 bits] from binary + * r = (k.G)->x mod order + * s = (r * x + e) / k mod order + * The hash is truncated to the first 256 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv, + mp_int* rm, mp_int* sm, mp_int* km, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit ed[2*10]; + sp_digit xd[2*10]; + sp_digit kd[2*10]; + sp_digit rd[2*10]; + sp_digit td[3 * 2*10]; + sp_point_256 p; +#endif + sp_digit* e = NULL; + sp_digit* x = NULL; + sp_digit* k = NULL; + sp_digit* r = NULL; + sp_digit* tmp = NULL; + sp_point_256* point = NULL; + sp_digit carry; + sp_digit* s = NULL; + sp_digit* kInv = NULL; + int err = MP_OKAY; + int32_t c; + int i; + + (void)heap; + + err = sp_256_point_new_10(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 10, heap, + DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + e = d + 0 * 10; + x = d + 2 * 10; + k = d + 4 * 10; + r = d + 6 * 10; + tmp = d + 8 * 10; +#else + e = ed; + x = xd; + k = kd; + r = rd; + tmp = td; +#endif + s = e; + kInv = k; + + if (hashLen > 32U) { + hashLen = 32U; + } + + sp_256_from_bin(e, 10, hash, (int)hashLen); + } + + for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) { + sp_256_from_mp(x, 10, priv); + + /* New random point. */ + if (km == NULL || mp_iszero(km)) { + err = sp_256_ecc_gen_k_10(rng, k); + } + else { + sp_256_from_mp(k, 10, km); + mp_zero(km); + } + if (err == MP_OKAY) { + err = sp_256_ecc_mulmod_base_10(point, k, 1, NULL); + } + + if (err == MP_OKAY) { + /* r = point->x mod order */ + XMEMCPY(r, point->x, sizeof(sp_digit) * 10U); + sp_256_norm_10(r); + c = sp_256_cmp_10(r, p256_order); + sp_256_cond_sub_10(r, r, p256_order, 0L - (sp_digit)(c >= 0)); + sp_256_norm_10(r); + + /* Conv k to Montgomery form (mod order) */ + sp_256_mul_10(k, k, p256_norm_order); + err = sp_256_mod_10(k, k, p256_order); + } + if (err == MP_OKAY) { + sp_256_norm_10(k); + /* kInv = 1/k mod order */ + sp_256_mont_inv_order_10(kInv, k, tmp); + sp_256_norm_10(kInv); + + /* s = r * x + e */ + sp_256_mul_10(x, x, r); + err = sp_256_mod_10(x, x, p256_order); + } + if (err == MP_OKAY) { + sp_256_norm_10(x); + carry = sp_256_add_10(s, e, x); + sp_256_cond_sub_10(s, s, p256_order, 0 - carry); + sp_256_norm_10(s); + c = sp_256_cmp_10(s, p256_order); + sp_256_cond_sub_10(s, s, p256_order, 0L - (sp_digit)(c >= 0)); + sp_256_norm_10(s); + + /* s = s * k^-1 mod order */ + sp_256_mont_mul_order_10(s, s, kInv); + sp_256_norm_10(s); + + /* Check that signature is usable. */ + if (sp_256_iszero_10(s) == 0) { + break; + } + } + } + + if (i == 0) { + err = RNG_FAILURE_E; + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(r, rm); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(s, sm); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 8 * 10); + XFREE(d, heap, DYNAMIC_TYPE_ECC); + } +#else + XMEMSET(e, 0, sizeof(sp_digit) * 2U * 10U); + XMEMSET(x, 0, sizeof(sp_digit) * 2U * 10U); + XMEMSET(k, 0, sizeof(sp_digit) * 2U * 10U); + XMEMSET(r, 0, sizeof(sp_digit) * 2U * 10U); + XMEMSET(r, 0, sizeof(sp_digit) * 2U * 10U); + XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 10U); +#endif + sp_256_point_free_10(point, 1, heap); + + return err; +} +#endif /* HAVE_ECC_SIGN */ + +#ifdef HAVE_ECC_VERIFY +/* Verify the signature values with the hash and public key. + * e = Truncate(hash, 256) + * u1 = e/s mod order + * u2 = r/s mod order + * r == (u1.G + u2.Q)->x mod order + * Optimization: Leave point in projective form. + * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z') + * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' + * The hash is truncated to the first 256 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX, + mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit u1d[2*10]; + sp_digit u2d[2*10]; + sp_digit sd[2*10]; + sp_digit tmpd[2*10 * 5]; + sp_point_256 p1d; + sp_point_256 p2d; +#endif + sp_digit* u1 = NULL; + sp_digit* u2 = NULL; + sp_digit* s = NULL; + sp_digit* tmp = NULL; + sp_point_256* p1; + sp_point_256* p2 = NULL; + sp_digit carry; + int32_t c; + int err; + + err = sp_256_point_new_10(heap, p1d, p1); + if (err == MP_OKAY) { + err = sp_256_point_new_10(heap, p2d, p2); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 10, heap, + DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + u1 = d + 0 * 10; + u2 = d + 2 * 10; + s = d + 4 * 10; + tmp = d + 6 * 10; +#else + u1 = u1d; + u2 = u2d; + s = sd; + tmp = tmpd; +#endif + + if (hashLen > 32U) { + hashLen = 32U; + } + + sp_256_from_bin(u1, 10, hash, (int)hashLen); + sp_256_from_mp(u2, 10, r); + sp_256_from_mp(s, 10, sm); + sp_256_from_mp(p2->x, 10, pX); + sp_256_from_mp(p2->y, 10, pY); + sp_256_from_mp(p2->z, 10, pZ); + + { + sp_256_mul_10(s, s, p256_norm_order); + } + err = sp_256_mod_10(s, s, p256_order); + } + if (err == MP_OKAY) { + sp_256_norm_10(s); + { + sp_256_mont_inv_order_10(s, s, tmp); + sp_256_mont_mul_order_10(u1, u1, s); + sp_256_mont_mul_order_10(u2, u2, s); + } + + err = sp_256_ecc_mulmod_base_10(p1, u1, 0, heap); + } + if (err == MP_OKAY) { + err = sp_256_ecc_mulmod_10(p2, p2, u2, 0, heap); + } + + if (err == MP_OKAY) { + { + sp_256_proj_point_add_10(p1, p1, p2, tmp); + if (sp_256_iszero_10(p1->z)) { + if (sp_256_iszero_10(p1->x) && sp_256_iszero_10(p1->y)) { + sp_256_proj_point_dbl_10(p1, p2, tmp); + } + else { + /* Y ordinate is not used from here - don't set. */ + p1->x[0] = 0; + p1->x[1] = 0; + p1->x[2] = 0; + p1->x[3] = 0; + p1->x[4] = 0; + p1->x[5] = 0; + p1->x[6] = 0; + p1->x[7] = 0; + p1->x[8] = 0; + p1->x[9] = 0; + XMEMCPY(p1->z, p256_norm_mod, sizeof(p256_norm_mod)); + } + } + } + + /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ + /* Reload r and convert to Montgomery form. */ + sp_256_from_mp(u2, 10, r); + err = sp_256_mod_mul_norm_10(u2, u2, p256_mod); + } + + if (err == MP_OKAY) { + /* u1 = r.z'.z' mod prime */ + sp_256_mont_sqr_10(p1->z, p1->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(u1, u2, p1->z, p256_mod, p256_mp_mod); + *res = (int)(sp_256_cmp_10(p1->x, u1) == 0); + if (*res == 0) { + /* Reload r and add order. */ + sp_256_from_mp(u2, 10, r); + carry = sp_256_add_10(u2, u2, p256_order); + /* Carry means result is greater than mod and is not valid. */ + if (carry == 0) { + sp_256_norm_10(u2); + + /* Compare with mod and if greater or equal then not valid. */ + c = sp_256_cmp_10(u2, p256_mod); + if (c < 0) { + /* Convert to Montogomery form */ + err = sp_256_mod_mul_norm_10(u2, u2, p256_mod); + if (err == MP_OKAY) { + /* u1 = (r + 1*order).z'.z' mod prime */ + sp_256_mont_mul_10(u1, u2, p1->z, p256_mod, + p256_mp_mod); + *res = (int)(sp_256_cmp_10(p1->x, u1) == 0); + } + } + } + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) + XFREE(d, heap, DYNAMIC_TYPE_ECC); +#endif + sp_256_point_free_10(p1, 0, heap); + sp_256_point_free_10(p2, 0, heap); + + return err; +} +#endif /* HAVE_ECC_VERIFY */ + +#ifdef HAVE_ECC_CHECK_KEY +/* Check that the x and y oridinates are a valid point on the curve. + * + * point EC point. + * heap Heap to use if dynamically allocating. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +static int sp_256_ecc_is_point_10(sp_point_256* point, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit t1d[2*10]; + sp_digit t2d[2*10]; +#endif + sp_digit* t1; + sp_digit* t2; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 10 * 4, heap, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = d + 0 * 10; + t2 = d + 2 * 10; +#else + (void)heap; + + t1 = t1d; + t2 = t2d; +#endif + + sp_256_sqr_10(t1, point->y); + (void)sp_256_mod_10(t1, t1, p256_mod); + sp_256_sqr_10(t2, point->x); + (void)sp_256_mod_10(t2, t2, p256_mod); + sp_256_mul_10(t2, t2, point->x); + (void)sp_256_mod_10(t2, t2, p256_mod); + (void)sp_256_sub_10(t2, p256_mod, t2); + sp_256_mont_add_10(t1, t1, t2, p256_mod); + + sp_256_mont_add_10(t1, t1, point->x, p256_mod); + sp_256_mont_add_10(t1, t1, point->x, p256_mod); + sp_256_mont_add_10(t1, t1, point->x, p256_mod); + + if (sp_256_cmp_10(t1, p256_b) != 0) { + err = MP_VAL; + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, heap, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + +/* Check that the x and y oridinates are a valid point on the curve. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +int sp_ecc_is_point_256(mp_int* pX, mp_int* pY) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 pubd; +#endif + sp_point_256* pub; + byte one[1] = { 1 }; + int err; + + err = sp_256_point_new_10(NULL, pubd, pub); + if (err == MP_OKAY) { + sp_256_from_mp(pub->x, 10, pX); + sp_256_from_mp(pub->y, 10, pY); + sp_256_from_bin(pub->z, 10, one, (int)sizeof(one)); + + err = sp_256_ecc_is_point_10(pub, NULL); + } + + sp_256_point_free_10(pub, 0, NULL); + + return err; +} + +/* Check that the private scalar generates the EC point (px, py), the point is + * on the curve and the point has the correct order. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * privm Private scalar that generates EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve, ECC_INF_E if the point does not have the correct order, + * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and + * MP_OKAY otherwise. + */ +int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit privd[10]; + sp_point_256 pubd; + sp_point_256 pd; +#endif + sp_digit* priv = NULL; + sp_point_256* pub; + sp_point_256* p = NULL; + byte one[1] = { 1 }; + int err; + + err = sp_256_point_new_10(heap, pubd, pub); + if (err == MP_OKAY) { + err = sp_256_point_new_10(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 10, heap, + DYNAMIC_TYPE_ECC); + if (priv == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + priv = privd; +#endif + + sp_256_from_mp(pub->x, 10, pX); + sp_256_from_mp(pub->y, 10, pY); + sp_256_from_bin(pub->z, 10, one, (int)sizeof(one)); + sp_256_from_mp(priv, 10, privm); + + /* Check point at infinitiy. */ + if ((sp_256_iszero_10(pub->x) != 0) && + (sp_256_iszero_10(pub->y) != 0)) { + err = ECC_INF_E; + } + } + + if (err == MP_OKAY) { + /* Check range of X and Y */ + if (sp_256_cmp_10(pub->x, p256_mod) >= 0 || + sp_256_cmp_10(pub->y, p256_mod) >= 0) { + err = ECC_OUT_OF_RANGE_E; + } + } + + if (err == MP_OKAY) { + /* Check point is on curve */ + err = sp_256_ecc_is_point_10(pub, heap); + } + + if (err == MP_OKAY) { + /* Point * order = infinity */ + err = sp_256_ecc_mulmod_10(p, pub, p256_order, 1, heap); + } + if (err == MP_OKAY) { + /* Check result is infinity */ + if ((sp_256_iszero_10(p->x) == 0) || + (sp_256_iszero_10(p->y) == 0)) { + err = ECC_INF_E; + } + } + + if (err == MP_OKAY) { + /* Base * private = point */ + err = sp_256_ecc_mulmod_base_10(p, priv, 1, heap); + } + if (err == MP_OKAY) { + /* Check result is public key */ + if (sp_256_cmp_10(p->x, pub->x) != 0 || + sp_256_cmp_10(p->y, pub->y) != 0) { + err = ECC_PRIV_KEY_E; + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (priv != NULL) { + XFREE(priv, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_10(p, 0, heap); + sp_256_point_free_10(pub, 0, heap); + + return err; +} +#endif +#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL +/* Add two projective EC points together. + * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ) + * + * pX First EC point's X ordinate. + * pY First EC point's Y ordinate. + * pZ First EC point's Z ordinate. + * qX Second EC point's X ordinate. + * qY Second EC point's Y ordinate. + * qZ Second EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* qX, mp_int* qY, mp_int* qZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 10 * 5]; + sp_point_256 pd; + sp_point_256 qd; +#endif + sp_digit* tmp; + sp_point_256* p; + sp_point_256* q = NULL; + int err; + + err = sp_256_point_new_10(NULL, pd, p); + if (err == MP_OKAY) { + err = sp_256_point_new_10(NULL, qd, q); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + sp_256_from_mp(p->x, 10, pX); + sp_256_from_mp(p->y, 10, pY); + sp_256_from_mp(p->z, 10, pZ); + sp_256_from_mp(q->x, 10, qX); + sp_256_from_mp(q->y, 10, qY); + sp_256_from_mp(q->z, 10, qZ); + + sp_256_proj_point_add_10(p, p, q, tmp); + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->z, rZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_10(q, 0, NULL); + sp_256_point_free_10(p, 0, NULL); + + return err; +} + +/* Double a projective EC point. + * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ) + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 10 * 2]; + sp_point_256 pd; +#endif + sp_digit* tmp; + sp_point_256* p; + int err; + + err = sp_256_point_new_10(NULL, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 2, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + sp_256_from_mp(p->x, 10, pX); + sp_256_from_mp(p->y, 10, pY); + sp_256_from_mp(p->z, 10, pZ); + + sp_256_proj_point_dbl_10(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->z, rZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_10(p, 0, NULL); + + return err; +} + +/* Map a projective EC point to affine in place. + * pZ will be one. + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 10 * 4]; + sp_point_256 pd; +#endif + sp_digit* tmp; + sp_point_256* p; + int err; + + err = sp_256_point_new_10(NULL, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 4, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + if (err == MP_OKAY) { + sp_256_from_mp(p->x, 10, pX); + sp_256_from_mp(p->y, 10, pY); + sp_256_from_mp(p->z, 10, pZ); + + sp_256_map_10(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(p->x, pX); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->y, pY); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->z, pZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_10(p, 0, NULL); + + return err; +} +#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */ +#ifdef HAVE_COMP_KEY +/* Find the square root of a number mod the prime of the curve. + * + * y The number to operate on and the result. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +static int sp_256_mont_sqrt_10(sp_digit* y) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d; +#else + sp_digit t1d[2 * 10]; + sp_digit t2d[2 * 10]; +#endif + sp_digit* t1; + sp_digit* t2; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 10, NULL, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = d + 0 * 10; + t2 = d + 2 * 10; +#else + t1 = t1d; + t2 = t2d; +#endif + + { + /* t2 = y ^ 0x2 */ + sp_256_mont_sqr_10(t2, y, p256_mod, p256_mp_mod); + /* t1 = y ^ 0x3 */ + sp_256_mont_mul_10(t1, t2, y, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xc */ + sp_256_mont_sqr_n_10(t2, t1, 2, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xf */ + sp_256_mont_mul_10(t1, t1, t2, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xf0 */ + sp_256_mont_sqr_n_10(t2, t1, 4, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xff */ + sp_256_mont_mul_10(t1, t1, t2, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xff00 */ + sp_256_mont_sqr_n_10(t2, t1, 8, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffff */ + sp_256_mont_mul_10(t1, t1, t2, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xffff0000 */ + sp_256_mont_sqr_n_10(t2, t1, 16, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff */ + sp_256_mont_mul_10(t1, t1, t2, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000000 */ + sp_256_mont_sqr_n_10(t1, t1, 32, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000001 */ + sp_256_mont_mul_10(t1, t1, y, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */ + sp_256_mont_sqr_n_10(t1, t1, 96, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */ + sp_256_mont_mul_10(t1, t1, y, p256_mod, p256_mp_mod); + sp_256_mont_sqr_n_10(y, t1, 94, p256_mod, p256_mp_mod); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + + +/* Uncompress the point given the X ordinate. + * + * xm X ordinate. + * odd Whether the Y ordinate is odd. + * ym Calculated Y ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d; +#else + sp_digit xd[2 * 10]; + sp_digit yd[2 * 10]; +#endif + sp_digit* x = NULL; + sp_digit* y = NULL; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 10, NULL, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + x = d + 0 * 10; + y = d + 2 * 10; +#else + x = xd; + y = yd; +#endif + + sp_256_from_mp(x, 10, xm); + err = sp_256_mod_mul_norm_10(x, x, p256_mod); + } + if (err == MP_OKAY) { + /* y = x^3 */ + { + sp_256_mont_sqr_10(y, x, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(y, y, x, p256_mod, p256_mp_mod); + } + /* y = x^3 - 3x */ + sp_256_mont_sub_10(y, y, x, p256_mod); + sp_256_mont_sub_10(y, y, x, p256_mod); + sp_256_mont_sub_10(y, y, x, p256_mod); + /* y = x^3 - 3x + b */ + err = sp_256_mod_mul_norm_10(x, p256_b, p256_mod); + } + if (err == MP_OKAY) { + sp_256_mont_add_10(y, y, x, p256_mod); + /* y = sqrt(x^3 - 3x + b) */ + err = sp_256_mont_sqrt_10(y); + } + if (err == MP_OKAY) { + XMEMSET(y + 10, 0, 10U * sizeof(sp_digit)); + sp_256_mont_reduce_10(y, p256_mod, p256_mp_mod); + if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) { + sp_256_mont_sub_10(y, p256_mod, y, p256_mod); + } + + err = sp_256_to_mp(y, ym); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} +#endif +#endif /* !WOLFSSL_SP_NO_256 */ +#ifdef WOLFSSL_SP_384 + +/* Point structure to use. */ +typedef struct sp_point_384 { + sp_digit x[2 * 15]; + sp_digit y[2 * 15]; + sp_digit z[2 * 15]; + int infinity; +} sp_point_384; + +/* The modulus (prime) of the curve P384. */ +static const sp_digit p384_mod[15] = { + 0x3ffffff,0x000003f,0x0000000,0x3fc0000,0x2ffffff,0x3ffffff,0x3ffffff, + 0x3ffffff,0x3ffffff,0x3ffffff,0x3ffffff,0x3ffffff,0x3ffffff,0x3ffffff,0x00fffff + +}; +/* The Montogmery normalizer for modulus of the curve P384. */ +static const sp_digit p384_norm_mod[15] = { + 0x0000001,0x3ffffc0,0x3ffffff,0x003ffff,0x1000000,0x0000000,0x0000000, + 0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000 + +}; +/* The Montogmery multiplier for modulus of the curve P384. */ +static sp_digit p384_mp_mod = 0x000001; +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +/* The order of the curve P384. */ +static const sp_digit p384_order[15] = { + 0x0c52973,0x3065ab3,0x277aece,0x2c922c2,0x3581a0d,0x10dcb77,0x234d81f, + 0x3ffff1d,0x3ffffff,0x3ffffff,0x3ffffff,0x3ffffff,0x3ffffff,0x3ffffff,0x00fffff + +}; +#endif +/* The order of the curve P384 minus 2. */ +static const sp_digit p384_order2[15] = { + 0x0c52971,0x3065ab3,0x277aece,0x2c922c2,0x3581a0d,0x10dcb77,0x234d81f, + 0x3ffff1d,0x3ffffff,0x3ffffff,0x3ffffff,0x3ffffff,0x3ffffff,0x3ffffff,0x00fffff + +}; +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montogmery normalizer for order of the curve P384. */ +static const sp_digit p384_norm_order[15] = { + 0x33ad68d,0x0f9a54c,0x1885131,0x136dd3d,0x0a7e5f2,0x2f23488,0x1cb27e0, + 0x00000e2,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000 + +}; +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montogmery multiplier for order of the curve P384. */ +static sp_digit p384_mp_order = 0x8fdc45; +#endif +/* The base point of curve P384. */ +static const sp_point_384 p384_base = { + /* X ordinate */ + { + 0x2760ab7,0x1178e1c,0x296c3a5,0x176fd54,0x05502f2,0x0950a8e,0x3741e08, + 0x26e6167,0x3628ba7,0x11b874e,0x3320ad7,0x2c71c7b,0x305378e,0x288afa2,0x00aa87c, + + 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L + }, + /* Y ordinate */ + { + 0x0ea0e5f,0x0c75f24,0x019d7a4,0x33875fa,0x00a60b1,0x17c2e30,0x1a3113b, + 0x051f3a7,0x1bd289a,0x27e3d07,0x1292dc2,0x27a62fe,0x22c6f5d,0x392a589,0x003617d, + + 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L + }, + /* Z ordinate */ + { + 0x0000001,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000, + 0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000, + + 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L + }, + /* infinity */ + 0 +}; +#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY) +static const sp_digit p384_b[15] = { + 0x3ec2aef,0x1723b74,0x119d2a8,0x23628bb,0x2c65639,0x004e1d6,0x14088f5, + 0x104480c,0x06efe81,0x2460767,0x23f82d1,0x23815af,0x2e7e498,0x3e9f88f,0x00b3312 + +}; +#endif + +static int sp_384_point_new_ex_15(void* heap, sp_point_384* sp, sp_point_384** p) +{ + int ret = MP_OKAY; + (void)heap; +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + (void)sp; + *p = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC); +#else + *p = sp; +#endif + if (*p == NULL) { + ret = MEMORY_E; + } + return ret; +} + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +/* Allocate memory for point and return error. */ +#define sp_384_point_new_15(heap, sp, p) sp_384_point_new_ex_15((heap), NULL, &(p)) +#else +/* Set pointer to data and return no error. */ +#define sp_384_point_new_15(heap, sp, p) sp_384_point_new_ex_15((heap), &(sp), &(p)) +#endif + + +static void sp_384_point_free_15(sp_point_384* p, int clear, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +/* If valid pointer then clear point data if requested and free data. */ + if (p != NULL) { + if (clear != 0) { + XMEMSET(p, 0, sizeof(*p)); + } + XFREE(p, heap, DYNAMIC_TYPE_ECC); + } +#else +/* Clear point data if requested. */ + if (clear != 0) { + XMEMSET(p, 0, sizeof(*p)); + } +#endif + (void)heap; +} + +/* Multiply a number by Montogmery normalizer mod modulus (prime). + * + * r The resulting Montgomery form number. + * a The number to convert. + * m The modulus (prime). + * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise. + */ +static int sp_384_mod_mul_norm_15(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + int64_t* td; +#else + int64_t td[12]; + int64_t a32d[12]; +#endif + int64_t* t; + int64_t* a32; + int64_t o; + int err = MP_OKAY; + + (void)m; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + td = (int64_t*)XMALLOC(sizeof(int64_t) * 2 * 12, NULL, DYNAMIC_TYPE_ECC); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = td; + a32 = td + 12; +#else + t = td; + a32 = a32d; +#endif + + a32[0] = a[0]; + a32[0] |= a[1] << 26U; + a32[0] &= 0xffffffffL; + a32[1] = (sp_digit)(a[1] >> 6); + a32[1] |= a[2] << 20U; + a32[1] &= 0xffffffffL; + a32[2] = (sp_digit)(a[2] >> 12); + a32[2] |= a[3] << 14U; + a32[2] &= 0xffffffffL; + a32[3] = (sp_digit)(a[3] >> 18); + a32[3] |= a[4] << 8U; + a32[3] &= 0xffffffffL; + a32[4] = (sp_digit)(a[4] >> 24); + a32[4] |= a[5] << 2U; + a32[4] |= a[6] << 28U; + a32[4] &= 0xffffffffL; + a32[5] = (sp_digit)(a[6] >> 4); + a32[5] |= a[7] << 22U; + a32[5] &= 0xffffffffL; + a32[6] = (sp_digit)(a[7] >> 10); + a32[6] |= a[8] << 16U; + a32[6] &= 0xffffffffL; + a32[7] = (sp_digit)(a[8] >> 16); + a32[7] |= a[9] << 10U; + a32[7] &= 0xffffffffL; + a32[8] = (sp_digit)(a[9] >> 22); + a32[8] |= a[10] << 4U; + a32[8] |= a[11] << 30U; + a32[8] &= 0xffffffffL; + a32[9] = (sp_digit)(a[11] >> 2); + a32[9] |= a[12] << 24U; + a32[9] &= 0xffffffffL; + a32[10] = (sp_digit)(a[12] >> 8); + a32[10] |= a[13] << 18U; + a32[10] &= 0xffffffffL; + a32[11] = (sp_digit)(a[13] >> 14); + a32[11] |= a[14] << 12U; + a32[11] &= 0xffffffffL; + + /* 1 0 0 0 0 0 0 0 1 1 0 -1 */ + t[0] = 0 + a32[0] + a32[8] + a32[9] - a32[11]; + /* -1 1 0 0 0 0 0 0 -1 0 1 1 */ + t[1] = 0 - a32[0] + a32[1] - a32[8] + a32[10] + a32[11]; + /* 0 -1 1 0 0 0 0 0 0 -1 0 1 */ + t[2] = 0 - a32[1] + a32[2] - a32[9] + a32[11]; + /* 1 0 -1 1 0 0 0 0 1 1 -1 -1 */ + t[3] = 0 + a32[0] - a32[2] + a32[3] + a32[8] + a32[9] - a32[10] - a32[11]; + /* 1 1 0 -1 1 0 0 0 1 2 1 -2 */ + t[4] = 0 + a32[0] + a32[1] - a32[3] + a32[4] + a32[8] + 2 * a32[9] + a32[10] - 2 * a32[11]; + /* 0 1 1 0 -1 1 0 0 0 1 2 1 */ + t[5] = 0 + a32[1] + a32[2] - a32[4] + a32[5] + a32[9] + 2 * a32[10] + a32[11]; + /* 0 0 1 1 0 -1 1 0 0 0 1 2 */ + t[6] = 0 + a32[2] + a32[3] - a32[5] + a32[6] + a32[10] + 2 * a32[11]; + /* 0 0 0 1 1 0 -1 1 0 0 0 1 */ + t[7] = 0 + a32[3] + a32[4] - a32[6] + a32[7] + a32[11]; + /* 0 0 0 0 1 1 0 -1 1 0 0 0 */ + t[8] = 0 + a32[4] + a32[5] - a32[7] + a32[8]; + /* 0 0 0 0 0 1 1 0 -1 1 0 0 */ + t[9] = 0 + a32[5] + a32[6] - a32[8] + a32[9]; + /* 0 0 0 0 0 0 1 1 0 -1 1 0 */ + t[10] = 0 + a32[6] + a32[7] - a32[9] + a32[10]; + /* 0 0 0 0 0 0 0 1 1 0 -1 1 */ + t[11] = 0 + a32[7] + a32[8] - a32[10] + a32[11]; + + t[1] += t[0] >> 32; t[0] &= 0xffffffff; + t[2] += t[1] >> 32; t[1] &= 0xffffffff; + t[3] += t[2] >> 32; t[2] &= 0xffffffff; + t[4] += t[3] >> 32; t[3] &= 0xffffffff; + t[5] += t[4] >> 32; t[4] &= 0xffffffff; + t[6] += t[5] >> 32; t[5] &= 0xffffffff; + t[7] += t[6] >> 32; t[6] &= 0xffffffff; + t[8] += t[7] >> 32; t[7] &= 0xffffffff; + t[9] += t[8] >> 32; t[8] &= 0xffffffff; + t[10] += t[9] >> 32; t[9] &= 0xffffffff; + t[11] += t[10] >> 32; t[10] &= 0xffffffff; + o = t[11] >> 32; t[11] &= 0xffffffff; + t[0] += o; + t[1] -= o; + t[3] += o; + t[4] += o; + t[1] += t[0] >> 32; t[0] &= 0xffffffff; + t[2] += t[1] >> 32; t[1] &= 0xffffffff; + t[3] += t[2] >> 32; t[2] &= 0xffffffff; + t[4] += t[3] >> 32; t[3] &= 0xffffffff; + t[5] += t[4] >> 32; t[4] &= 0xffffffff; + t[6] += t[5] >> 32; t[5] &= 0xffffffff; + t[7] += t[6] >> 32; t[6] &= 0xffffffff; + t[8] += t[7] >> 32; t[7] &= 0xffffffff; + t[9] += t[8] >> 32; t[8] &= 0xffffffff; + t[10] += t[9] >> 32; t[9] &= 0xffffffff; + t[11] += t[10] >> 32; t[10] &= 0xffffffff; + + r[0] = (sp_digit)(t[0]) & 0x3ffffffL; + r[1] = (sp_digit)(t[0] >> 26U); + r[1] |= t[1] << 6U; + r[1] &= 0x3ffffffL; + r[2] = (sp_digit)(t[1] >> 20U); + r[2] |= t[2] << 12U; + r[2] &= 0x3ffffffL; + r[3] = (sp_digit)(t[2] >> 14U); + r[3] |= t[3] << 18U; + r[3] &= 0x3ffffffL; + r[4] = (sp_digit)(t[3] >> 8U); + r[4] |= t[4] << 24U; + r[4] &= 0x3ffffffL; + r[5] = (sp_digit)(t[4] >> 2U) & 0x3ffffffL; + r[6] = (sp_digit)(t[4] >> 28U); + r[6] |= t[5] << 4U; + r[6] &= 0x3ffffffL; + r[7] = (sp_digit)(t[5] >> 22U); + r[7] |= t[6] << 10U; + r[7] &= 0x3ffffffL; + r[8] = (sp_digit)(t[6] >> 16U); + r[8] |= t[7] << 16U; + r[8] &= 0x3ffffffL; + r[9] = (sp_digit)(t[7] >> 10U); + r[9] |= t[8] << 22U; + r[9] &= 0x3ffffffL; + r[10] = (sp_digit)(t[8] >> 4U) & 0x3ffffffL; + r[11] = (sp_digit)(t[8] >> 30U); + r[11] |= t[9] << 2U; + r[11] &= 0x3ffffffL; + r[12] = (sp_digit)(t[9] >> 24U); + r[12] |= t[10] << 8U; + r[12] &= 0x3ffffffL; + r[13] = (sp_digit)(t[10] >> 18U); + r[13] |= t[11] << 14U; + r[13] &= 0x3ffffffL; + r[14] = (sp_digit)(t[11] >> 12U); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (td != NULL) + XFREE(td, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_384_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 26 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 26 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0x3ffffff; + s = 26U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 26U) <= (word32)DIGIT_BIT) { + s += 26U; + r[j] &= 0x3ffffff; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 26) { + r[j] &= 0x3ffffff; + if (j + 1 >= size) { + break; + } + s = 26 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Convert a point of type ecc_point to type sp_point_384. + * + * p Point of type sp_point_384 (result). + * pm Point of type ecc_point. + */ +static void sp_384_point_from_ecc_point_15(sp_point_384* p, const ecc_point* pm) +{ + XMEMSET(p->x, 0, sizeof(p->x)); + XMEMSET(p->y, 0, sizeof(p->y)); + XMEMSET(p->z, 0, sizeof(p->z)); + sp_384_from_mp(p->x, 15, pm->x); + sp_384_from_mp(p->y, 15, pm->y); + sp_384_from_mp(p->z, 15, pm->z); + p->infinity = 0; +} + +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_384_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (384 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 26 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 15); + r->used = 15; + mp_clamp(r); +#elif DIGIT_BIT < 26 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 15; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 26) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 26 - s; + } + r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 15; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 26 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 26 - s; + } + else { + s += 26; + } + } + r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Convert a point of type sp_point_384 to type ecc_point. + * + * p Point of type sp_point_384. + * pm Point of type ecc_point (result). + * returns MEMORY_E when allocation of memory in ecc_point fails otherwise + * MP_OKAY. + */ +static int sp_384_point_to_ecc_point_15(const sp_point_384* p, ecc_point* pm) +{ + int err; + + err = sp_384_to_mp(p->x, pm->x); + if (err == MP_OKAY) { + err = sp_384_to_mp(p->y, pm->y); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->z, pm->z); + } + + return err; +} + +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_384_mul_15(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i, j, k; + int64_t c; + + c = ((int64_t)a[14]) * b[14]; + r[29] = (sp_digit)(c >> 26); + c = (c & 0x3ffffff) << 26; + for (k = 27; k >= 0; k--) { + for (i = 14; i >= 0; i--) { + j = k - i; + if (j >= 15) { + break; + } + if (j < 0) { + continue; + } + + c += ((int64_t)a[i]) * b[j]; + } + r[k + 2] += c >> 52; + r[k + 1] = (c >> 26) & 0x3ffffff; + c = (c & 0x3ffffff) << 26; + } + r[0] = (sp_digit)(c >> 26); +} + +#else +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_384_mul_15(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int64_t t0 = ((int64_t)a[ 0]) * b[ 0]; + int64_t t1 = ((int64_t)a[ 0]) * b[ 1] + + ((int64_t)a[ 1]) * b[ 0]; + int64_t t2 = ((int64_t)a[ 0]) * b[ 2] + + ((int64_t)a[ 1]) * b[ 1] + + ((int64_t)a[ 2]) * b[ 0]; + int64_t t3 = ((int64_t)a[ 0]) * b[ 3] + + ((int64_t)a[ 1]) * b[ 2] + + ((int64_t)a[ 2]) * b[ 1] + + ((int64_t)a[ 3]) * b[ 0]; + int64_t t4 = ((int64_t)a[ 0]) * b[ 4] + + ((int64_t)a[ 1]) * b[ 3] + + ((int64_t)a[ 2]) * b[ 2] + + ((int64_t)a[ 3]) * b[ 1] + + ((int64_t)a[ 4]) * b[ 0]; + int64_t t5 = ((int64_t)a[ 0]) * b[ 5] + + ((int64_t)a[ 1]) * b[ 4] + + ((int64_t)a[ 2]) * b[ 3] + + ((int64_t)a[ 3]) * b[ 2] + + ((int64_t)a[ 4]) * b[ 1] + + ((int64_t)a[ 5]) * b[ 0]; + int64_t t6 = ((int64_t)a[ 0]) * b[ 6] + + ((int64_t)a[ 1]) * b[ 5] + + ((int64_t)a[ 2]) * b[ 4] + + ((int64_t)a[ 3]) * b[ 3] + + ((int64_t)a[ 4]) * b[ 2] + + ((int64_t)a[ 5]) * b[ 1] + + ((int64_t)a[ 6]) * b[ 0]; + int64_t t7 = ((int64_t)a[ 0]) * b[ 7] + + ((int64_t)a[ 1]) * b[ 6] + + ((int64_t)a[ 2]) * b[ 5] + + ((int64_t)a[ 3]) * b[ 4] + + ((int64_t)a[ 4]) * b[ 3] + + ((int64_t)a[ 5]) * b[ 2] + + ((int64_t)a[ 6]) * b[ 1] + + ((int64_t)a[ 7]) * b[ 0]; + int64_t t8 = ((int64_t)a[ 0]) * b[ 8] + + ((int64_t)a[ 1]) * b[ 7] + + ((int64_t)a[ 2]) * b[ 6] + + ((int64_t)a[ 3]) * b[ 5] + + ((int64_t)a[ 4]) * b[ 4] + + ((int64_t)a[ 5]) * b[ 3] + + ((int64_t)a[ 6]) * b[ 2] + + ((int64_t)a[ 7]) * b[ 1] + + ((int64_t)a[ 8]) * b[ 0]; + int64_t t9 = ((int64_t)a[ 0]) * b[ 9] + + ((int64_t)a[ 1]) * b[ 8] + + ((int64_t)a[ 2]) * b[ 7] + + ((int64_t)a[ 3]) * b[ 6] + + ((int64_t)a[ 4]) * b[ 5] + + ((int64_t)a[ 5]) * b[ 4] + + ((int64_t)a[ 6]) * b[ 3] + + ((int64_t)a[ 7]) * b[ 2] + + ((int64_t)a[ 8]) * b[ 1] + + ((int64_t)a[ 9]) * b[ 0]; + int64_t t10 = ((int64_t)a[ 0]) * b[10] + + ((int64_t)a[ 1]) * b[ 9] + + ((int64_t)a[ 2]) * b[ 8] + + ((int64_t)a[ 3]) * b[ 7] + + ((int64_t)a[ 4]) * b[ 6] + + ((int64_t)a[ 5]) * b[ 5] + + ((int64_t)a[ 6]) * b[ 4] + + ((int64_t)a[ 7]) * b[ 3] + + ((int64_t)a[ 8]) * b[ 2] + + ((int64_t)a[ 9]) * b[ 1] + + ((int64_t)a[10]) * b[ 0]; + int64_t t11 = ((int64_t)a[ 0]) * b[11] + + ((int64_t)a[ 1]) * b[10] + + ((int64_t)a[ 2]) * b[ 9] + + ((int64_t)a[ 3]) * b[ 8] + + ((int64_t)a[ 4]) * b[ 7] + + ((int64_t)a[ 5]) * b[ 6] + + ((int64_t)a[ 6]) * b[ 5] + + ((int64_t)a[ 7]) * b[ 4] + + ((int64_t)a[ 8]) * b[ 3] + + ((int64_t)a[ 9]) * b[ 2] + + ((int64_t)a[10]) * b[ 1] + + ((int64_t)a[11]) * b[ 0]; + int64_t t12 = ((int64_t)a[ 0]) * b[12] + + ((int64_t)a[ 1]) * b[11] + + ((int64_t)a[ 2]) * b[10] + + ((int64_t)a[ 3]) * b[ 9] + + ((int64_t)a[ 4]) * b[ 8] + + ((int64_t)a[ 5]) * b[ 7] + + ((int64_t)a[ 6]) * b[ 6] + + ((int64_t)a[ 7]) * b[ 5] + + ((int64_t)a[ 8]) * b[ 4] + + ((int64_t)a[ 9]) * b[ 3] + + ((int64_t)a[10]) * b[ 2] + + ((int64_t)a[11]) * b[ 1] + + ((int64_t)a[12]) * b[ 0]; + int64_t t13 = ((int64_t)a[ 0]) * b[13] + + ((int64_t)a[ 1]) * b[12] + + ((int64_t)a[ 2]) * b[11] + + ((int64_t)a[ 3]) * b[10] + + ((int64_t)a[ 4]) * b[ 9] + + ((int64_t)a[ 5]) * b[ 8] + + ((int64_t)a[ 6]) * b[ 7] + + ((int64_t)a[ 7]) * b[ 6] + + ((int64_t)a[ 8]) * b[ 5] + + ((int64_t)a[ 9]) * b[ 4] + + ((int64_t)a[10]) * b[ 3] + + ((int64_t)a[11]) * b[ 2] + + ((int64_t)a[12]) * b[ 1] + + ((int64_t)a[13]) * b[ 0]; + int64_t t14 = ((int64_t)a[ 0]) * b[14] + + ((int64_t)a[ 1]) * b[13] + + ((int64_t)a[ 2]) * b[12] + + ((int64_t)a[ 3]) * b[11] + + ((int64_t)a[ 4]) * b[10] + + ((int64_t)a[ 5]) * b[ 9] + + ((int64_t)a[ 6]) * b[ 8] + + ((int64_t)a[ 7]) * b[ 7] + + ((int64_t)a[ 8]) * b[ 6] + + ((int64_t)a[ 9]) * b[ 5] + + ((int64_t)a[10]) * b[ 4] + + ((int64_t)a[11]) * b[ 3] + + ((int64_t)a[12]) * b[ 2] + + ((int64_t)a[13]) * b[ 1] + + ((int64_t)a[14]) * b[ 0]; + int64_t t15 = ((int64_t)a[ 1]) * b[14] + + ((int64_t)a[ 2]) * b[13] + + ((int64_t)a[ 3]) * b[12] + + ((int64_t)a[ 4]) * b[11] + + ((int64_t)a[ 5]) * b[10] + + ((int64_t)a[ 6]) * b[ 9] + + ((int64_t)a[ 7]) * b[ 8] + + ((int64_t)a[ 8]) * b[ 7] + + ((int64_t)a[ 9]) * b[ 6] + + ((int64_t)a[10]) * b[ 5] + + ((int64_t)a[11]) * b[ 4] + + ((int64_t)a[12]) * b[ 3] + + ((int64_t)a[13]) * b[ 2] + + ((int64_t)a[14]) * b[ 1]; + int64_t t16 = ((int64_t)a[ 2]) * b[14] + + ((int64_t)a[ 3]) * b[13] + + ((int64_t)a[ 4]) * b[12] + + ((int64_t)a[ 5]) * b[11] + + ((int64_t)a[ 6]) * b[10] + + ((int64_t)a[ 7]) * b[ 9] + + ((int64_t)a[ 8]) * b[ 8] + + ((int64_t)a[ 9]) * b[ 7] + + ((int64_t)a[10]) * b[ 6] + + ((int64_t)a[11]) * b[ 5] + + ((int64_t)a[12]) * b[ 4] + + ((int64_t)a[13]) * b[ 3] + + ((int64_t)a[14]) * b[ 2]; + int64_t t17 = ((int64_t)a[ 3]) * b[14] + + ((int64_t)a[ 4]) * b[13] + + ((int64_t)a[ 5]) * b[12] + + ((int64_t)a[ 6]) * b[11] + + ((int64_t)a[ 7]) * b[10] + + ((int64_t)a[ 8]) * b[ 9] + + ((int64_t)a[ 9]) * b[ 8] + + ((int64_t)a[10]) * b[ 7] + + ((int64_t)a[11]) * b[ 6] + + ((int64_t)a[12]) * b[ 5] + + ((int64_t)a[13]) * b[ 4] + + ((int64_t)a[14]) * b[ 3]; + int64_t t18 = ((int64_t)a[ 4]) * b[14] + + ((int64_t)a[ 5]) * b[13] + + ((int64_t)a[ 6]) * b[12] + + ((int64_t)a[ 7]) * b[11] + + ((int64_t)a[ 8]) * b[10] + + ((int64_t)a[ 9]) * b[ 9] + + ((int64_t)a[10]) * b[ 8] + + ((int64_t)a[11]) * b[ 7] + + ((int64_t)a[12]) * b[ 6] + + ((int64_t)a[13]) * b[ 5] + + ((int64_t)a[14]) * b[ 4]; + int64_t t19 = ((int64_t)a[ 5]) * b[14] + + ((int64_t)a[ 6]) * b[13] + + ((int64_t)a[ 7]) * b[12] + + ((int64_t)a[ 8]) * b[11] + + ((int64_t)a[ 9]) * b[10] + + ((int64_t)a[10]) * b[ 9] + + ((int64_t)a[11]) * b[ 8] + + ((int64_t)a[12]) * b[ 7] + + ((int64_t)a[13]) * b[ 6] + + ((int64_t)a[14]) * b[ 5]; + int64_t t20 = ((int64_t)a[ 6]) * b[14] + + ((int64_t)a[ 7]) * b[13] + + ((int64_t)a[ 8]) * b[12] + + ((int64_t)a[ 9]) * b[11] + + ((int64_t)a[10]) * b[10] + + ((int64_t)a[11]) * b[ 9] + + ((int64_t)a[12]) * b[ 8] + + ((int64_t)a[13]) * b[ 7] + + ((int64_t)a[14]) * b[ 6]; + int64_t t21 = ((int64_t)a[ 7]) * b[14] + + ((int64_t)a[ 8]) * b[13] + + ((int64_t)a[ 9]) * b[12] + + ((int64_t)a[10]) * b[11] + + ((int64_t)a[11]) * b[10] + + ((int64_t)a[12]) * b[ 9] + + ((int64_t)a[13]) * b[ 8] + + ((int64_t)a[14]) * b[ 7]; + int64_t t22 = ((int64_t)a[ 8]) * b[14] + + ((int64_t)a[ 9]) * b[13] + + ((int64_t)a[10]) * b[12] + + ((int64_t)a[11]) * b[11] + + ((int64_t)a[12]) * b[10] + + ((int64_t)a[13]) * b[ 9] + + ((int64_t)a[14]) * b[ 8]; + int64_t t23 = ((int64_t)a[ 9]) * b[14] + + ((int64_t)a[10]) * b[13] + + ((int64_t)a[11]) * b[12] + + ((int64_t)a[12]) * b[11] + + ((int64_t)a[13]) * b[10] + + ((int64_t)a[14]) * b[ 9]; + int64_t t24 = ((int64_t)a[10]) * b[14] + + ((int64_t)a[11]) * b[13] + + ((int64_t)a[12]) * b[12] + + ((int64_t)a[13]) * b[11] + + ((int64_t)a[14]) * b[10]; + int64_t t25 = ((int64_t)a[11]) * b[14] + + ((int64_t)a[12]) * b[13] + + ((int64_t)a[13]) * b[12] + + ((int64_t)a[14]) * b[11]; + int64_t t26 = ((int64_t)a[12]) * b[14] + + ((int64_t)a[13]) * b[13] + + ((int64_t)a[14]) * b[12]; + int64_t t27 = ((int64_t)a[13]) * b[14] + + ((int64_t)a[14]) * b[13]; + int64_t t28 = ((int64_t)a[14]) * b[14]; + + t1 += t0 >> 26; r[ 0] = t0 & 0x3ffffff; + t2 += t1 >> 26; r[ 1] = t1 & 0x3ffffff; + t3 += t2 >> 26; r[ 2] = t2 & 0x3ffffff; + t4 += t3 >> 26; r[ 3] = t3 & 0x3ffffff; + t5 += t4 >> 26; r[ 4] = t4 & 0x3ffffff; + t6 += t5 >> 26; r[ 5] = t5 & 0x3ffffff; + t7 += t6 >> 26; r[ 6] = t6 & 0x3ffffff; + t8 += t7 >> 26; r[ 7] = t7 & 0x3ffffff; + t9 += t8 >> 26; r[ 8] = t8 & 0x3ffffff; + t10 += t9 >> 26; r[ 9] = t9 & 0x3ffffff; + t11 += t10 >> 26; r[10] = t10 & 0x3ffffff; + t12 += t11 >> 26; r[11] = t11 & 0x3ffffff; + t13 += t12 >> 26; r[12] = t12 & 0x3ffffff; + t14 += t13 >> 26; r[13] = t13 & 0x3ffffff; + t15 += t14 >> 26; r[14] = t14 & 0x3ffffff; + t16 += t15 >> 26; r[15] = t15 & 0x3ffffff; + t17 += t16 >> 26; r[16] = t16 & 0x3ffffff; + t18 += t17 >> 26; r[17] = t17 & 0x3ffffff; + t19 += t18 >> 26; r[18] = t18 & 0x3ffffff; + t20 += t19 >> 26; r[19] = t19 & 0x3ffffff; + t21 += t20 >> 26; r[20] = t20 & 0x3ffffff; + t22 += t21 >> 26; r[21] = t21 & 0x3ffffff; + t23 += t22 >> 26; r[22] = t22 & 0x3ffffff; + t24 += t23 >> 26; r[23] = t23 & 0x3ffffff; + t25 += t24 >> 26; r[24] = t24 & 0x3ffffff; + t26 += t25 >> 26; r[25] = t25 & 0x3ffffff; + t27 += t26 >> 26; r[26] = t26 & 0x3ffffff; + t28 += t27 >> 26; r[27] = t27 & 0x3ffffff; + r[29] = (sp_digit)(t28 >> 26); + r[28] = t28 & 0x3ffffff; +} + +#endif /* WOLFSSL_SP_SMALL */ +#define sp_384_mont_reduce_order_15 sp_384_mont_reduce_15 + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static sp_digit sp_384_cmp_15(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=14; i>=0; i--) { + r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#else + r |= (a[14] - b[14]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[13] - b[13]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[12] - b[12]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[11] - b[11]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[10] - b[10]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 9] - b[ 9]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 8] - b[ 8]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 7] - b[ 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 6] - b[ 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 5] - b[ 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 4] - b[ 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 3] - b[ 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 2] - b[ 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 1] - b[ 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 0] - b[ 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); +#endif /* WOLFSSL_SP_SMALL */ + + return r; +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static void sp_384_cond_sub_15(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 15; i++) { + r[i] = a[i] - (b[i] & m); + } +#else + r[ 0] = a[ 0] - (b[ 0] & m); + r[ 1] = a[ 1] - (b[ 1] & m); + r[ 2] = a[ 2] - (b[ 2] & m); + r[ 3] = a[ 3] - (b[ 3] & m); + r[ 4] = a[ 4] - (b[ 4] & m); + r[ 5] = a[ 5] - (b[ 5] & m); + r[ 6] = a[ 6] - (b[ 6] & m); + r[ 7] = a[ 7] - (b[ 7] & m); + r[ 8] = a[ 8] - (b[ 8] & m); + r[ 9] = a[ 9] - (b[ 9] & m); + r[10] = a[10] - (b[10] & m); + r[11] = a[11] - (b[11] & m); + r[12] = a[12] - (b[12] & m); + r[13] = a[13] - (b[13] & m); + r[14] = a[14] - (b[14] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Mul a by scalar b and add into r. (r += a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_384_mul_add_15(sp_digit* r, const sp_digit* a, + const sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int64_t tb = b; + int64_t t = 0; + int i; + + for (i = 0; i < 15; i++) { + t += (tb * a[i]) + r[i]; + r[i] = t & 0x3ffffff; + t >>= 26; + } + r[15] += t; +#else + int64_t tb = b; + int64_t t[15]; + + t[ 0] = tb * a[ 0]; + t[ 1] = tb * a[ 1]; + t[ 2] = tb * a[ 2]; + t[ 3] = tb * a[ 3]; + t[ 4] = tb * a[ 4]; + t[ 5] = tb * a[ 5]; + t[ 6] = tb * a[ 6]; + t[ 7] = tb * a[ 7]; + t[ 8] = tb * a[ 8]; + t[ 9] = tb * a[ 9]; + t[10] = tb * a[10]; + t[11] = tb * a[11]; + t[12] = tb * a[12]; + t[13] = tb * a[13]; + t[14] = tb * a[14]; + r[ 0] += (sp_digit) (t[ 0] & 0x3ffffff); + r[ 1] += (sp_digit)((t[ 0] >> 26) + (t[ 1] & 0x3ffffff)); + r[ 2] += (sp_digit)((t[ 1] >> 26) + (t[ 2] & 0x3ffffff)); + r[ 3] += (sp_digit)((t[ 2] >> 26) + (t[ 3] & 0x3ffffff)); + r[ 4] += (sp_digit)((t[ 3] >> 26) + (t[ 4] & 0x3ffffff)); + r[ 5] += (sp_digit)((t[ 4] >> 26) + (t[ 5] & 0x3ffffff)); + r[ 6] += (sp_digit)((t[ 5] >> 26) + (t[ 6] & 0x3ffffff)); + r[ 7] += (sp_digit)((t[ 6] >> 26) + (t[ 7] & 0x3ffffff)); + r[ 8] += (sp_digit)((t[ 7] >> 26) + (t[ 8] & 0x3ffffff)); + r[ 9] += (sp_digit)((t[ 8] >> 26) + (t[ 9] & 0x3ffffff)); + r[10] += (sp_digit)((t[ 9] >> 26) + (t[10] & 0x3ffffff)); + r[11] += (sp_digit)((t[10] >> 26) + (t[11] & 0x3ffffff)); + r[12] += (sp_digit)((t[11] >> 26) + (t[12] & 0x3ffffff)); + r[13] += (sp_digit)((t[12] >> 26) + (t[13] & 0x3ffffff)); + r[14] += (sp_digit)((t[13] >> 26) + (t[14] & 0x3ffffff)); + r[15] += (sp_digit) (t[14] >> 26); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Normalize the values in each word to 26. + * + * a Array of sp_digit to normalize. + */ +static void sp_384_norm_15(sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + for (i = 0; i < 14; i++) { + a[i+1] += a[i] >> 26; + a[i] &= 0x3ffffff; + } +#else + a[1] += a[0] >> 26; a[0] &= 0x3ffffff; + a[2] += a[1] >> 26; a[1] &= 0x3ffffff; + a[3] += a[2] >> 26; a[2] &= 0x3ffffff; + a[4] += a[3] >> 26; a[3] &= 0x3ffffff; + a[5] += a[4] >> 26; a[4] &= 0x3ffffff; + a[6] += a[5] >> 26; a[5] &= 0x3ffffff; + a[7] += a[6] >> 26; a[6] &= 0x3ffffff; + a[8] += a[7] >> 26; a[7] &= 0x3ffffff; + a[9] += a[8] >> 26; a[8] &= 0x3ffffff; + a[10] += a[9] >> 26; a[9] &= 0x3ffffff; + a[11] += a[10] >> 26; a[10] &= 0x3ffffff; + a[12] += a[11] >> 26; a[11] &= 0x3ffffff; + a[13] += a[12] >> 26; a[12] &= 0x3ffffff; + a[14] += a[13] >> 26; a[13] &= 0x3ffffff; +#endif +} + +/* Shift the result in the high 384 bits down to the bottom. + * + * r A single precision number. + * a A single precision number. + */ +static void sp_384_mont_shift_15(sp_digit* r, const sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + int64_t n = a[14] >> 20; + n += ((int64_t)a[15]) << 6; + + for (i = 0; i < 14; i++) { + r[i] = n & 0x3ffffff; + n >>= 26; + n += ((int64_t)a[16 + i]) << 6; + } + r[14] = (sp_digit)n; +#else + int64_t n = a[14] >> 20; + n += ((int64_t)a[15]) << 6; + r[ 0] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[16]) << 6; + r[ 1] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[17]) << 6; + r[ 2] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[18]) << 6; + r[ 3] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[19]) << 6; + r[ 4] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[20]) << 6; + r[ 5] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[21]) << 6; + r[ 6] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[22]) << 6; + r[ 7] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[23]) << 6; + r[ 8] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[24]) << 6; + r[ 9] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[25]) << 6; + r[10] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[26]) << 6; + r[11] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[27]) << 6; + r[12] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[28]) << 6; + r[13] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[29]) << 6; + r[14] = (sp_digit)n; +#endif /* WOLFSSL_SP_SMALL */ + XMEMSET(&r[15], 0, sizeof(*r) * 15U); +} + +/* Reduce the number back to 384 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static void sp_384_mont_reduce_15(sp_digit* a, const sp_digit* m, sp_digit mp) +{ + int i; + sp_digit mu; + + sp_384_norm_15(a + 15); + + for (i=0; i<14; i++) { + mu = (a[i] * mp) & 0x3ffffff; + sp_384_mul_add_15(a+i, m, mu); + a[i+1] += a[i] >> 26; + } + mu = (a[i] * mp) & 0xfffffL; + sp_384_mul_add_15(a+i, m, mu); + a[i+1] += a[i] >> 26; + a[i] &= 0x3ffffff; + + sp_384_mont_shift_15(a, a); + sp_384_cond_sub_15(a, a, m, 0 - (((a[14] >> 20) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_384_norm_15(a); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_384_mont_mul_15(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_384_mul_15(r, a, b); + sp_384_mont_reduce_15(r, m, mp); +} + +#ifdef WOLFSSL_SP_SMALL +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_384_sqr_15(sp_digit* r, const sp_digit* a) +{ + int i, j, k; + int64_t c; + + c = ((int64_t)a[14]) * a[14]; + r[29] = (sp_digit)(c >> 26); + c = (c & 0x3ffffff) << 26; + for (k = 27; k >= 0; k--) { + for (i = 14; i >= 0; i--) { + j = k - i; + if (j >= 15 || i <= j) { + break; + } + if (j < 0) { + continue; + } + + c += ((int64_t)a[i]) * a[j] * 2; + } + if (i == j) { + c += ((int64_t)a[i]) * a[i]; + } + + r[k + 2] += c >> 52; + r[k + 1] = (c >> 26) & 0x3ffffff; + c = (c & 0x3ffffff) << 26; + } + r[0] = (sp_digit)(c >> 26); +} + +#else +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_384_sqr_15(sp_digit* r, const sp_digit* a) +{ + int64_t t0 = ((int64_t)a[ 0]) * a[ 0]; + int64_t t1 = (((int64_t)a[ 0]) * a[ 1]) * 2; + int64_t t2 = (((int64_t)a[ 0]) * a[ 2]) * 2 + + ((int64_t)a[ 1]) * a[ 1]; + int64_t t3 = (((int64_t)a[ 0]) * a[ 3] + + ((int64_t)a[ 1]) * a[ 2]) * 2; + int64_t t4 = (((int64_t)a[ 0]) * a[ 4] + + ((int64_t)a[ 1]) * a[ 3]) * 2 + + ((int64_t)a[ 2]) * a[ 2]; + int64_t t5 = (((int64_t)a[ 0]) * a[ 5] + + ((int64_t)a[ 1]) * a[ 4] + + ((int64_t)a[ 2]) * a[ 3]) * 2; + int64_t t6 = (((int64_t)a[ 0]) * a[ 6] + + ((int64_t)a[ 1]) * a[ 5] + + ((int64_t)a[ 2]) * a[ 4]) * 2 + + ((int64_t)a[ 3]) * a[ 3]; + int64_t t7 = (((int64_t)a[ 0]) * a[ 7] + + ((int64_t)a[ 1]) * a[ 6] + + ((int64_t)a[ 2]) * a[ 5] + + ((int64_t)a[ 3]) * a[ 4]) * 2; + int64_t t8 = (((int64_t)a[ 0]) * a[ 8] + + ((int64_t)a[ 1]) * a[ 7] + + ((int64_t)a[ 2]) * a[ 6] + + ((int64_t)a[ 3]) * a[ 5]) * 2 + + ((int64_t)a[ 4]) * a[ 4]; + int64_t t9 = (((int64_t)a[ 0]) * a[ 9] + + ((int64_t)a[ 1]) * a[ 8] + + ((int64_t)a[ 2]) * a[ 7] + + ((int64_t)a[ 3]) * a[ 6] + + ((int64_t)a[ 4]) * a[ 5]) * 2; + int64_t t10 = (((int64_t)a[ 0]) * a[10] + + ((int64_t)a[ 1]) * a[ 9] + + ((int64_t)a[ 2]) * a[ 8] + + ((int64_t)a[ 3]) * a[ 7] + + ((int64_t)a[ 4]) * a[ 6]) * 2 + + ((int64_t)a[ 5]) * a[ 5]; + int64_t t11 = (((int64_t)a[ 0]) * a[11] + + ((int64_t)a[ 1]) * a[10] + + ((int64_t)a[ 2]) * a[ 9] + + ((int64_t)a[ 3]) * a[ 8] + + ((int64_t)a[ 4]) * a[ 7] + + ((int64_t)a[ 5]) * a[ 6]) * 2; + int64_t t12 = (((int64_t)a[ 0]) * a[12] + + ((int64_t)a[ 1]) * a[11] + + ((int64_t)a[ 2]) * a[10] + + ((int64_t)a[ 3]) * a[ 9] + + ((int64_t)a[ 4]) * a[ 8] + + ((int64_t)a[ 5]) * a[ 7]) * 2 + + ((int64_t)a[ 6]) * a[ 6]; + int64_t t13 = (((int64_t)a[ 0]) * a[13] + + ((int64_t)a[ 1]) * a[12] + + ((int64_t)a[ 2]) * a[11] + + ((int64_t)a[ 3]) * a[10] + + ((int64_t)a[ 4]) * a[ 9] + + ((int64_t)a[ 5]) * a[ 8] + + ((int64_t)a[ 6]) * a[ 7]) * 2; + int64_t t14 = (((int64_t)a[ 0]) * a[14] + + ((int64_t)a[ 1]) * a[13] + + ((int64_t)a[ 2]) * a[12] + + ((int64_t)a[ 3]) * a[11] + + ((int64_t)a[ 4]) * a[10] + + ((int64_t)a[ 5]) * a[ 9] + + ((int64_t)a[ 6]) * a[ 8]) * 2 + + ((int64_t)a[ 7]) * a[ 7]; + int64_t t15 = (((int64_t)a[ 1]) * a[14] + + ((int64_t)a[ 2]) * a[13] + + ((int64_t)a[ 3]) * a[12] + + ((int64_t)a[ 4]) * a[11] + + ((int64_t)a[ 5]) * a[10] + + ((int64_t)a[ 6]) * a[ 9] + + ((int64_t)a[ 7]) * a[ 8]) * 2; + int64_t t16 = (((int64_t)a[ 2]) * a[14] + + ((int64_t)a[ 3]) * a[13] + + ((int64_t)a[ 4]) * a[12] + + ((int64_t)a[ 5]) * a[11] + + ((int64_t)a[ 6]) * a[10] + + ((int64_t)a[ 7]) * a[ 9]) * 2 + + ((int64_t)a[ 8]) * a[ 8]; + int64_t t17 = (((int64_t)a[ 3]) * a[14] + + ((int64_t)a[ 4]) * a[13] + + ((int64_t)a[ 5]) * a[12] + + ((int64_t)a[ 6]) * a[11] + + ((int64_t)a[ 7]) * a[10] + + ((int64_t)a[ 8]) * a[ 9]) * 2; + int64_t t18 = (((int64_t)a[ 4]) * a[14] + + ((int64_t)a[ 5]) * a[13] + + ((int64_t)a[ 6]) * a[12] + + ((int64_t)a[ 7]) * a[11] + + ((int64_t)a[ 8]) * a[10]) * 2 + + ((int64_t)a[ 9]) * a[ 9]; + int64_t t19 = (((int64_t)a[ 5]) * a[14] + + ((int64_t)a[ 6]) * a[13] + + ((int64_t)a[ 7]) * a[12] + + ((int64_t)a[ 8]) * a[11] + + ((int64_t)a[ 9]) * a[10]) * 2; + int64_t t20 = (((int64_t)a[ 6]) * a[14] + + ((int64_t)a[ 7]) * a[13] + + ((int64_t)a[ 8]) * a[12] + + ((int64_t)a[ 9]) * a[11]) * 2 + + ((int64_t)a[10]) * a[10]; + int64_t t21 = (((int64_t)a[ 7]) * a[14] + + ((int64_t)a[ 8]) * a[13] + + ((int64_t)a[ 9]) * a[12] + + ((int64_t)a[10]) * a[11]) * 2; + int64_t t22 = (((int64_t)a[ 8]) * a[14] + + ((int64_t)a[ 9]) * a[13] + + ((int64_t)a[10]) * a[12]) * 2 + + ((int64_t)a[11]) * a[11]; + int64_t t23 = (((int64_t)a[ 9]) * a[14] + + ((int64_t)a[10]) * a[13] + + ((int64_t)a[11]) * a[12]) * 2; + int64_t t24 = (((int64_t)a[10]) * a[14] + + ((int64_t)a[11]) * a[13]) * 2 + + ((int64_t)a[12]) * a[12]; + int64_t t25 = (((int64_t)a[11]) * a[14] + + ((int64_t)a[12]) * a[13]) * 2; + int64_t t26 = (((int64_t)a[12]) * a[14]) * 2 + + ((int64_t)a[13]) * a[13]; + int64_t t27 = (((int64_t)a[13]) * a[14]) * 2; + int64_t t28 = ((int64_t)a[14]) * a[14]; + + t1 += t0 >> 26; r[ 0] = t0 & 0x3ffffff; + t2 += t1 >> 26; r[ 1] = t1 & 0x3ffffff; + t3 += t2 >> 26; r[ 2] = t2 & 0x3ffffff; + t4 += t3 >> 26; r[ 3] = t3 & 0x3ffffff; + t5 += t4 >> 26; r[ 4] = t4 & 0x3ffffff; + t6 += t5 >> 26; r[ 5] = t5 & 0x3ffffff; + t7 += t6 >> 26; r[ 6] = t6 & 0x3ffffff; + t8 += t7 >> 26; r[ 7] = t7 & 0x3ffffff; + t9 += t8 >> 26; r[ 8] = t8 & 0x3ffffff; + t10 += t9 >> 26; r[ 9] = t9 & 0x3ffffff; + t11 += t10 >> 26; r[10] = t10 & 0x3ffffff; + t12 += t11 >> 26; r[11] = t11 & 0x3ffffff; + t13 += t12 >> 26; r[12] = t12 & 0x3ffffff; + t14 += t13 >> 26; r[13] = t13 & 0x3ffffff; + t15 += t14 >> 26; r[14] = t14 & 0x3ffffff; + t16 += t15 >> 26; r[15] = t15 & 0x3ffffff; + t17 += t16 >> 26; r[16] = t16 & 0x3ffffff; + t18 += t17 >> 26; r[17] = t17 & 0x3ffffff; + t19 += t18 >> 26; r[18] = t18 & 0x3ffffff; + t20 += t19 >> 26; r[19] = t19 & 0x3ffffff; + t21 += t20 >> 26; r[20] = t20 & 0x3ffffff; + t22 += t21 >> 26; r[21] = t21 & 0x3ffffff; + t23 += t22 >> 26; r[22] = t22 & 0x3ffffff; + t24 += t23 >> 26; r[23] = t23 & 0x3ffffff; + t25 += t24 >> 26; r[24] = t24 & 0x3ffffff; + t26 += t25 >> 26; r[25] = t25 & 0x3ffffff; + t27 += t26 >> 26; r[26] = t26 & 0x3ffffff; + t28 += t27 >> 26; r[27] = t27 & 0x3ffffff; + r[29] = (sp_digit)(t28 >> 26); + r[28] = t28 & 0x3ffffff; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_384_mont_sqr_15(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_384_sqr_15(r, a); + sp_384_mont_reduce_15(r, m, mp); +} + +#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY) +/* Square the Montgomery form number a number of times. (r = a ^ n mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * n Number of times to square. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_384_mont_sqr_n_15(sp_digit* r, const sp_digit* a, int n, + const sp_digit* m, sp_digit mp) +{ + sp_384_mont_sqr_15(r, a, m, mp); + for (; n > 1; n--) { + sp_384_mont_sqr_15(r, r, m, mp); + } +} + +#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */ +#ifdef WOLFSSL_SP_SMALL +/* Mod-2 for the P384 curve. */ +static const uint32_t p384_mod_minus_2[12] = { + 0xfffffffdU,0x00000000U,0x00000000U,0xffffffffU,0xfffffffeU,0xffffffffU, + 0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU +}; +#endif /* !WOLFSSL_SP_SMALL */ + +/* Invert the number, in Montgomery form, modulo the modulus (prime) of the + * P384 curve. (r = 1 / a mod m) + * + * r Inverse result. + * a Number to invert. + * td Temporary data. + */ +static void sp_384_mont_inv_15(sp_digit* r, const sp_digit* a, sp_digit* td) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* t = td; + int i; + + XMEMCPY(t, a, sizeof(sp_digit) * 15); + for (i=382; i>=0; i--) { + sp_384_mont_sqr_15(t, t, p384_mod, p384_mp_mod); + if (p384_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32))) + sp_384_mont_mul_15(t, t, a, p384_mod, p384_mp_mod); + } + XMEMCPY(r, t, sizeof(sp_digit) * 15); +#else + sp_digit* t1 = td; + sp_digit* t2 = td + 2 * 15; + sp_digit* t3 = td + 4 * 15; + sp_digit* t4 = td + 6 * 15; + sp_digit* t5 = td + 8 * 15; + + /* 0x2 */ + sp_384_mont_sqr_15(t1, a, p384_mod, p384_mp_mod); + /* 0x3 */ + sp_384_mont_mul_15(t5, t1, a, p384_mod, p384_mp_mod); + /* 0xc */ + sp_384_mont_sqr_n_15(t1, t5, 2, p384_mod, p384_mp_mod); + /* 0xf */ + sp_384_mont_mul_15(t2, t5, t1, p384_mod, p384_mp_mod); + /* 0x1e */ + sp_384_mont_sqr_15(t1, t2, p384_mod, p384_mp_mod); + /* 0x1f */ + sp_384_mont_mul_15(t4, t1, a, p384_mod, p384_mp_mod); + /* 0x3e0 */ + sp_384_mont_sqr_n_15(t1, t4, 5, p384_mod, p384_mp_mod); + /* 0x3ff */ + sp_384_mont_mul_15(t2, t4, t1, p384_mod, p384_mp_mod); + /* 0x7fe0 */ + sp_384_mont_sqr_n_15(t1, t2, 5, p384_mod, p384_mp_mod); + /* 0x7fff */ + sp_384_mont_mul_15(t4, t4, t1, p384_mod, p384_mp_mod); + /* 0x3fff8000 */ + sp_384_mont_sqr_n_15(t1, t4, 15, p384_mod, p384_mp_mod); + /* 0x3fffffff */ + sp_384_mont_mul_15(t2, t4, t1, p384_mod, p384_mp_mod); + /* 0xfffffffc */ + sp_384_mont_sqr_n_15(t3, t2, 2, p384_mod, p384_mp_mod); + /* 0xfffffffd */ + sp_384_mont_mul_15(r, t3, a, p384_mod, p384_mp_mod); + /* 0xffffffff */ + sp_384_mont_mul_15(t3, t5, t3, p384_mod, p384_mp_mod); + /* 0xfffffffc0000000 */ + sp_384_mont_sqr_n_15(t1, t2, 30, p384_mod, p384_mp_mod); + /* 0xfffffffffffffff */ + sp_384_mont_mul_15(t2, t2, t1, p384_mod, p384_mp_mod); + /* 0xfffffffffffffff000000000000000 */ + sp_384_mont_sqr_n_15(t1, t2, 60, p384_mod, p384_mp_mod); + /* 0xffffffffffffffffffffffffffffff */ + sp_384_mont_mul_15(t2, t2, t1, p384_mod, p384_mp_mod); + /* 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */ + sp_384_mont_sqr_n_15(t1, t2, 120, p384_mod, p384_mp_mod); + /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_384_mont_mul_15(t2, t2, t1, p384_mod, p384_mp_mod); + /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */ + sp_384_mont_sqr_n_15(t1, t2, 15, p384_mod, p384_mp_mod); + /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_384_mont_mul_15(t2, t4, t1, p384_mod, p384_mp_mod); + /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000 */ + sp_384_mont_sqr_n_15(t1, t2, 33, p384_mod, p384_mp_mod); + /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff */ + sp_384_mont_mul_15(t2, t3, t1, p384_mod, p384_mp_mod); + /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff000000000000000000000000 */ + sp_384_mont_sqr_n_15(t1, t2, 96, p384_mod, p384_mp_mod); + /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000fffffffd */ + sp_384_mont_mul_15(r, r, t1, p384_mod, p384_mp_mod); + +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Map the Montgomery form projective coordinate point to an affine point. + * + * r Resulting affine coordinate point. + * p Montgomery form projective coordinate point. + * t Temporary ordinate data. + */ +static void sp_384_map_15(sp_point_384* r, const sp_point_384* p, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*15; + int32_t n; + + sp_384_mont_inv_15(t1, p->z, t + 2*15); + + sp_384_mont_sqr_15(t2, t1, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(t1, t2, t1, p384_mod, p384_mp_mod); + + /* x /= z^2 */ + sp_384_mont_mul_15(r->x, p->x, t2, p384_mod, p384_mp_mod); + XMEMSET(r->x + 15, 0, sizeof(r->x) / 2U); + sp_384_mont_reduce_15(r->x, p384_mod, p384_mp_mod); + /* Reduce x to less than modulus */ + n = sp_384_cmp_15(r->x, p384_mod); + sp_384_cond_sub_15(r->x, r->x, p384_mod, 0 - ((n >= 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_384_norm_15(r->x); + + /* y /= z^3 */ + sp_384_mont_mul_15(r->y, p->y, t1, p384_mod, p384_mp_mod); + XMEMSET(r->y + 15, 0, sizeof(r->y) / 2U); + sp_384_mont_reduce_15(r->y, p384_mod, p384_mp_mod); + /* Reduce y to less than modulus */ + n = sp_384_cmp_15(r->y, p384_mod); + sp_384_cond_sub_15(r->y, r->y, p384_mod, 0 - ((n >= 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_384_norm_15(r->y); + + XMEMSET(r->z, 0, sizeof(r->z)); + r->z[0] = 1; + +} + +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_384_add_15(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 15; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#else +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_384_add_15(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + r[ 0] = a[ 0] + b[ 0]; + r[ 1] = a[ 1] + b[ 1]; + r[ 2] = a[ 2] + b[ 2]; + r[ 3] = a[ 3] + b[ 3]; + r[ 4] = a[ 4] + b[ 4]; + r[ 5] = a[ 5] + b[ 5]; + r[ 6] = a[ 6] + b[ 6]; + r[ 7] = a[ 7] + b[ 7]; + r[ 8] = a[ 8] + b[ 8]; + r[ 9] = a[ 9] + b[ 9]; + r[10] = a[10] + b[10]; + r[11] = a[11] + b[11]; + r[12] = a[12] + b[12]; + r[13] = a[13] + b[13]; + r[14] = a[14] + b[14]; + + return 0; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Add two Montgomery form numbers (r = a + b % m). + * + * r Result of addition. + * a First number to add in Montogmery form. + * b Second number to add in Montogmery form. + * m Modulus (prime). + */ +static void sp_384_mont_add_15(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m) +{ + (void)sp_384_add_15(r, a, b); + sp_384_norm_15(r); + sp_384_cond_sub_15(r, r, m, 0 - (((r[14] >> 20) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_384_norm_15(r); +} + +/* Double a Montgomery form number (r = a + a % m). + * + * r Result of doubling. + * a Number to double in Montogmery form. + * m Modulus (prime). + */ +static void sp_384_mont_dbl_15(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + (void)sp_384_add_15(r, a, a); + sp_384_norm_15(r); + sp_384_cond_sub_15(r, r, m, 0 - (((r[14] >> 20) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_384_norm_15(r); +} + +/* Triple a Montgomery form number (r = a + a + a % m). + * + * r Result of Tripling. + * a Number to triple in Montogmery form. + * m Modulus (prime). + */ +static void sp_384_mont_tpl_15(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + (void)sp_384_add_15(r, a, a); + sp_384_norm_15(r); + sp_384_cond_sub_15(r, r, m, 0 - (((r[14] >> 20) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_384_norm_15(r); + (void)sp_384_add_15(r, r, a); + sp_384_norm_15(r); + sp_384_cond_sub_15(r, r, m, 0 - (((r[14] >> 20) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_384_norm_15(r); +} + +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_384_sub_15(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 15; i++) { + r[i] = a[i] - b[i]; + } + + return 0; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_384_sub_15(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + r[ 0] = a[ 0] - b[ 0]; + r[ 1] = a[ 1] - b[ 1]; + r[ 2] = a[ 2] - b[ 2]; + r[ 3] = a[ 3] - b[ 3]; + r[ 4] = a[ 4] - b[ 4]; + r[ 5] = a[ 5] - b[ 5]; + r[ 6] = a[ 6] - b[ 6]; + r[ 7] = a[ 7] - b[ 7]; + r[ 8] = a[ 8] - b[ 8]; + r[ 9] = a[ 9] - b[ 9]; + r[10] = a[10] - b[10]; + r[11] = a[11] - b[11]; + r[12] = a[12] - b[12]; + r[13] = a[13] - b[13]; + r[14] = a[14] - b[14]; + + return 0; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static void sp_384_cond_add_15(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 15; i++) { + r[i] = a[i] + (b[i] & m); + } +#else + r[ 0] = a[ 0] + (b[ 0] & m); + r[ 1] = a[ 1] + (b[ 1] & m); + r[ 2] = a[ 2] + (b[ 2] & m); + r[ 3] = a[ 3] + (b[ 3] & m); + r[ 4] = a[ 4] + (b[ 4] & m); + r[ 5] = a[ 5] + (b[ 5] & m); + r[ 6] = a[ 6] + (b[ 6] & m); + r[ 7] = a[ 7] + (b[ 7] & m); + r[ 8] = a[ 8] + (b[ 8] & m); + r[ 9] = a[ 9] + (b[ 9] & m); + r[10] = a[10] + (b[10] & m); + r[11] = a[11] + (b[11] & m); + r[12] = a[12] + (b[12] & m); + r[13] = a[13] + (b[13] & m); + r[14] = a[14] + (b[14] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Subtract two Montgomery form numbers (r = a - b % m). + * + * r Result of subtration. + * a Number to subtract from in Montogmery form. + * b Number to subtract with in Montogmery form. + * m Modulus (prime). + */ +static void sp_384_mont_sub_15(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m) +{ + (void)sp_384_sub_15(r, a, b); + sp_384_cond_add_15(r, r, m, r[14] >> 20); + sp_384_norm_15(r); +} + +/* Shift number left one bit. + * Bottom bit is lost. + * + * r Result of shift. + * a Number to shift. + */ +SP_NOINLINE static void sp_384_rshift1_15(sp_digit* r, sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<14; i++) { + r[i] = ((a[i] >> 1) | (a[i + 1] << 25)) & 0x3ffffff; + } +#else + r[0] = ((a[0] >> 1) | (a[1] << 25)) & 0x3ffffff; + r[1] = ((a[1] >> 1) | (a[2] << 25)) & 0x3ffffff; + r[2] = ((a[2] >> 1) | (a[3] << 25)) & 0x3ffffff; + r[3] = ((a[3] >> 1) | (a[4] << 25)) & 0x3ffffff; + r[4] = ((a[4] >> 1) | (a[5] << 25)) & 0x3ffffff; + r[5] = ((a[5] >> 1) | (a[6] << 25)) & 0x3ffffff; + r[6] = ((a[6] >> 1) | (a[7] << 25)) & 0x3ffffff; + r[7] = ((a[7] >> 1) | (a[8] << 25)) & 0x3ffffff; + r[8] = ((a[8] >> 1) | (a[9] << 25)) & 0x3ffffff; + r[9] = ((a[9] >> 1) | (a[10] << 25)) & 0x3ffffff; + r[10] = ((a[10] >> 1) | (a[11] << 25)) & 0x3ffffff; + r[11] = ((a[11] >> 1) | (a[12] << 25)) & 0x3ffffff; + r[12] = ((a[12] >> 1) | (a[13] << 25)) & 0x3ffffff; + r[13] = ((a[13] >> 1) | (a[14] << 25)) & 0x3ffffff; +#endif + r[14] = a[14] >> 1; +} + +/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) + * + * r Result of division by 2. + * a Number to divide. + * m Modulus (prime). + */ +static void sp_384_div2_15(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + sp_384_cond_add_15(r, a, m, 0 - (a[0] & 1)); + sp_384_norm_15(r); + sp_384_rshift1_15(r, r); +} + +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static void sp_384_proj_point_dbl_15(sp_point_384* r, const sp_point_384* p, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*15; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_384_mont_sqr_15(t1, p->z, p384_mod, p384_mp_mod); + /* Z = Y * Z */ + sp_384_mont_mul_15(z, p->y, p->z, p384_mod, p384_mp_mod); + /* Z = 2Z */ + sp_384_mont_dbl_15(z, z, p384_mod); + /* T2 = X - T1 */ + sp_384_mont_sub_15(t2, p->x, t1, p384_mod); + /* T1 = X + T1 */ + sp_384_mont_add_15(t1, p->x, t1, p384_mod); + /* T2 = T1 * T2 */ + sp_384_mont_mul_15(t2, t1, t2, p384_mod, p384_mp_mod); + /* T1 = 3T2 */ + sp_384_mont_tpl_15(t1, t2, p384_mod); + /* Y = 2Y */ + sp_384_mont_dbl_15(y, p->y, p384_mod); + /* Y = Y * Y */ + sp_384_mont_sqr_15(y, y, p384_mod, p384_mp_mod); + /* T2 = Y * Y */ + sp_384_mont_sqr_15(t2, y, p384_mod, p384_mp_mod); + /* T2 = T2/2 */ + sp_384_div2_15(t2, t2, p384_mod); + /* Y = Y * X */ + sp_384_mont_mul_15(y, y, p->x, p384_mod, p384_mp_mod); + /* X = T1 * T1 */ + sp_384_mont_sqr_15(x, t1, p384_mod, p384_mp_mod); + /* X = X - Y */ + sp_384_mont_sub_15(x, x, y, p384_mod); + /* X = X - Y */ + sp_384_mont_sub_15(x, x, y, p384_mod); + /* Y = Y - X */ + sp_384_mont_sub_15(y, y, x, p384_mod); + /* Y = Y * T1 */ + sp_384_mont_mul_15(y, y, t1, p384_mod, p384_mp_mod); + /* Y = Y - T2 */ + sp_384_mont_sub_15(y, y, t2, p384_mod); +} + +/* Compare two numbers to determine if they are equal. + * Constant time implementation. + * + * a First number to compare. + * b Second number to compare. + * returns 1 when equal and 0 otherwise. + */ +static int sp_384_cmp_equal_15(const sp_digit* a, const sp_digit* b) +{ + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) | + (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6]) | (a[7] ^ b[7]) | + (a[8] ^ b[8]) | (a[9] ^ b[9]) | (a[10] ^ b[10]) | (a[11] ^ b[11]) | + (a[12] ^ b[12]) | (a[13] ^ b[13]) | (a[14] ^ b[14])) == 0; +} + +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_384_proj_point_add_15(sp_point_384* r, const sp_point_384* p, const sp_point_384* q, + sp_digit* t) +{ + const sp_point_384* ap[2]; + sp_point_384* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*15; + sp_digit* t3 = t + 4*15; + sp_digit* t4 = t + 6*15; + sp_digit* t5 = t + 8*15; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* Ensure only the first point is the same as the result. */ + if (q == r) { + const sp_point_384* a = p; + p = q; + q = a; + } + + /* Check double */ + (void)sp_384_sub_15(t1, p384_mod, q->y); + sp_384_norm_15(t1); + if ((sp_384_cmp_equal_15(p->x, q->x) & sp_384_cmp_equal_15(p->z, q->z) & + (sp_384_cmp_equal_15(p->y, q->y) | sp_384_cmp_equal_15(p->y, t1))) != 0) { + sp_384_proj_point_dbl_15(r, p, t); + } + else { + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point_384)); + x = rp[p->infinity | q->infinity]->x; + y = rp[p->infinity | q->infinity]->y; + z = rp[p->infinity | q->infinity]->z; + + ap[0] = p; + ap[1] = q; + for (i=0; i<15; i++) { + r->x[i] = ap[p->infinity]->x[i]; + } + for (i=0; i<15; i++) { + r->y[i] = ap[p->infinity]->y[i]; + } + for (i=0; i<15; i++) { + r->z[i] = ap[p->infinity]->z[i]; + } + r->infinity = ap[p->infinity]->infinity; + + /* U1 = X1*Z2^2 */ + sp_384_mont_sqr_15(t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(t3, t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(t1, t1, x, p384_mod, p384_mp_mod); + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_15(t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(t4, t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_15(t3, t3, y, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_15(t4, t4, q->y, p384_mod, p384_mp_mod); + /* H = U2 - U1 */ + sp_384_mont_sub_15(t2, t2, t1, p384_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_15(t4, t4, t3, p384_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_15(z, z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(z, z, t2, p384_mod, p384_mp_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_15(x, t4, p384_mod, p384_mp_mod); + sp_384_mont_sqr_15(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(y, t1, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(t5, t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_sub_15(x, x, t5, p384_mod); + sp_384_mont_dbl_15(t1, y, p384_mod); + sp_384_mont_sub_15(x, x, t1, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_15(y, y, x, p384_mod); + sp_384_mont_mul_15(y, y, t4, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(t5, t5, t3, p384_mod, p384_mp_mod); + sp_384_mont_sub_15(y, y, t5, p384_mod); + } +} + +#ifdef WOLFSSL_SP_SMALL +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_15(sp_point_384* r, const sp_point_384* g, const sp_digit* k, + int map, void* heap) +{ +#ifdef WOLFSSL_SP_NO_MALLOC + sp_point_384 t[3]; + sp_digit tmp[2 * 15 * 6]; +#else + sp_point_384* t; + sp_digit* tmp; +#endif + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + + (void)heap; + +#ifndef WOLFSSL_SP_NO_MALLOC + t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 3, heap, DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 15 * 6, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; +#endif + + if (err == MP_OKAY) { + XMEMSET(t, 0, sizeof(sp_point_384) * 3); + + /* t[0] = {0, 0, 1} * norm */ + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + err = sp_384_mod_mul_norm_15(t[1].x, g->x, p384_mod); + } + if (err == MP_OKAY) + err = sp_384_mod_mul_norm_15(t[1].y, g->y, p384_mod); + if (err == MP_OKAY) + err = sp_384_mod_mul_norm_15(t[1].z, g->z, p384_mod); + + if (err == MP_OKAY) { + i = 14; + c = 20; + n = k[i--] << (26 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) + break; + + n = k[i--]; + c = 26; + } + + y = (n >> 25) & 1; + n <<= 1; + + sp_384_proj_point_add_15(&t[y^1], &t[0], &t[1], tmp); + + XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) + + ((size_t)&t[1] & addr_mask[y])), + sizeof(sp_point_384)); + sp_384_proj_point_dbl_15(&t[2], &t[2], tmp); + XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) + + ((size_t)&t[1] & addr_mask[y])), &t[2], + sizeof(sp_point_384)); + } + + if (map != 0) { + sp_384_map_15(r, &t[0], tmp); + } + else { + XMEMCPY(r, &t[0], sizeof(sp_point_384)); + } + } + +#ifndef WOLFSSL_SP_NO_MALLOC + if (tmp != NULL) { + XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 15 * 6); + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_point_384) * 3); + XFREE(t, NULL, DYNAMIC_TYPE_ECC); + } +#else + ForceZero(tmp, sizeof(tmp)); + ForceZero(t, sizeof(t)); +#endif + + return err; +} + +#elif defined(WOLFSSL_SP_CACHE_RESISTANT) +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_15(sp_point_384* r, const sp_point_384* g, const sp_digit* k, + int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 t[3]; + sp_digit tmp[2 * 15 * 6]; +#else + sp_point_384* t; + sp_digit* tmp; +#endif + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + + (void)heap; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_point*)XMALLOC(sizeof(*t) * 3, heap, DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 15 * 6, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; +#endif + + if (err == MP_OKAY) { + /* t[0] = {0, 0, 1} * norm */ + XMEMSET(&t[0], 0, sizeof(t[0])); + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + t[1].infinity = 0; + err = sp_384_mod_mul_norm_15(t[1].x, g->x, p384_mod); + } + if (err == MP_OKAY) + err = sp_384_mod_mul_norm_15(t[1].y, g->y, p384_mod); + if (err == MP_OKAY) + err = sp_384_mod_mul_norm_15(t[1].z, g->z, p384_mod); + + if (err == MP_OKAY) { + i = 14; + c = 20; + n = k[i--] << (26 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) + break; + + n = k[i--]; + c = 26; + } + + y = (n >> 25) & 1; + n <<= 1; + + sp_384_proj_point_add_15(&t[y^1], &t[0], &t[1], tmp); + + XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) + + ((size_t)&t[1] & addr_mask[y])), sizeof(t[2])); + sp_384_proj_point_dbl_15(&t[2], &t[2], tmp); + XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) + + ((size_t)&t[1] & addr_mask[y])), &t[2], sizeof(t[2])); + } + + if (map != 0) { + sp_384_map_15(r, &t[0], tmp); + } + else { + XMEMCPY(r, &t[0], sizeof(sp_point_384)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 15 * 6); + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); + } + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_point_384) * 3); + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#else + ForceZero(tmp, sizeof(tmp)); + ForceZero(t, sizeof(t)); +#endif + + return err; +} + +#else +/* A table entry for pre-computed points. */ +typedef struct sp_table_entry_384 { + sp_digit x[15]; + sp_digit y[15]; +} sp_table_entry_384; + +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_fast_15(sp_point_384* r, const sp_point_384* g, const sp_digit* k, + int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 td[16]; + sp_point_384 rtd; + sp_digit tmpd[2 * 15 * 6]; +#endif + sp_point_384* t; + sp_point_384* rt; + sp_digit* tmp; + sp_digit n; + int i; + int c, y; + int err; + + (void)heap; + + err = sp_384_point_new_15(heap, rtd, rt); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 16, heap, DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 15 * 6, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; +#else + t = td; + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + /* t[0] = {0, 0, 1} * norm */ + XMEMSET(&t[0], 0, sizeof(t[0])); + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + (void)sp_384_mod_mul_norm_15(t[1].x, g->x, p384_mod); + (void)sp_384_mod_mul_norm_15(t[1].y, g->y, p384_mod); + (void)sp_384_mod_mul_norm_15(t[1].z, g->z, p384_mod); + t[1].infinity = 0; + sp_384_proj_point_dbl_15(&t[ 2], &t[ 1], tmp); + t[ 2].infinity = 0; + sp_384_proj_point_add_15(&t[ 3], &t[ 2], &t[ 1], tmp); + t[ 3].infinity = 0; + sp_384_proj_point_dbl_15(&t[ 4], &t[ 2], tmp); + t[ 4].infinity = 0; + sp_384_proj_point_add_15(&t[ 5], &t[ 3], &t[ 2], tmp); + t[ 5].infinity = 0; + sp_384_proj_point_dbl_15(&t[ 6], &t[ 3], tmp); + t[ 6].infinity = 0; + sp_384_proj_point_add_15(&t[ 7], &t[ 4], &t[ 3], tmp); + t[ 7].infinity = 0; + sp_384_proj_point_dbl_15(&t[ 8], &t[ 4], tmp); + t[ 8].infinity = 0; + sp_384_proj_point_add_15(&t[ 9], &t[ 5], &t[ 4], tmp); + t[ 9].infinity = 0; + sp_384_proj_point_dbl_15(&t[10], &t[ 5], tmp); + t[10].infinity = 0; + sp_384_proj_point_add_15(&t[11], &t[ 6], &t[ 5], tmp); + t[11].infinity = 0; + sp_384_proj_point_dbl_15(&t[12], &t[ 6], tmp); + t[12].infinity = 0; + sp_384_proj_point_add_15(&t[13], &t[ 7], &t[ 6], tmp); + t[13].infinity = 0; + sp_384_proj_point_dbl_15(&t[14], &t[ 7], tmp); + t[14].infinity = 0; + sp_384_proj_point_add_15(&t[15], &t[ 8], &t[ 7], tmp); + t[15].infinity = 0; + + i = 13; + n = k[i+1] << 6; + c = 16; + y = n >> 22; + XMEMCPY(rt, &t[y], sizeof(sp_point_384)); + n <<= 10; + for (; i>=0 || c>=4; ) { + if (c < 4) { + n |= k[i--] << (6 - c); + c += 26; + } + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + + sp_384_proj_point_dbl_15(rt, rt, tmp); + sp_384_proj_point_dbl_15(rt, rt, tmp); + sp_384_proj_point_dbl_15(rt, rt, tmp); + sp_384_proj_point_dbl_15(rt, rt, tmp); + + sp_384_proj_point_add_15(rt, rt, &t[y], tmp); + } + + if (map != 0) { + sp_384_map_15(r, rt, tmp); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_384)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 15 * 6); + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); + } + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_point_384) * 16); + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#else + ForceZero(tmpd, sizeof(tmpd)); + ForceZero(td, sizeof(td)); +#endif + sp_384_point_free_15(rt, 1, heap); + + return err; +} + +#ifdef FP_ECC +/* Double the Montgomery form projective point p a number of times. + * + * r Result of repeated doubling of point. + * p Point to double. + * n Number of times to double + * t Temporary ordinate data. + */ +static void sp_384_proj_point_dbl_n_15(sp_point_384* p, int n, sp_digit* t) +{ + sp_digit* w = t; + sp_digit* a = t + 2*15; + sp_digit* b = t + 4*15; + sp_digit* t1 = t + 6*15; + sp_digit* t2 = t + 8*15; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = p->x; + y = p->y; + z = p->z; + + /* Y = 2*Y */ + sp_384_mont_dbl_15(y, y, p384_mod); + /* W = Z^4 */ + sp_384_mont_sqr_15(w, z, p384_mod, p384_mp_mod); + sp_384_mont_sqr_15(w, w, p384_mod, p384_mp_mod); + +#ifndef WOLFSSL_SP_SMALL + while (--n > 0) +#else + while (--n >= 0) +#endif + { + /* A = 3*(X^2 - W) */ + sp_384_mont_sqr_15(t1, x, p384_mod, p384_mp_mod); + sp_384_mont_sub_15(t1, t1, w, p384_mod); + sp_384_mont_tpl_15(a, t1, p384_mod); + /* B = X*Y^2 */ + sp_384_mont_sqr_15(t1, y, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(b, t1, x, p384_mod, p384_mp_mod); + /* X = A^2 - 2B */ + sp_384_mont_sqr_15(x, a, p384_mod, p384_mp_mod); + sp_384_mont_dbl_15(t2, b, p384_mod); + sp_384_mont_sub_15(x, x, t2, p384_mod); + /* Z = Z*Y */ + sp_384_mont_mul_15(z, z, y, p384_mod, p384_mp_mod); + /* t2 = Y^4 */ + sp_384_mont_sqr_15(t1, t1, p384_mod, p384_mp_mod); +#ifdef WOLFSSL_SP_SMALL + if (n != 0) +#endif + { + /* W = W*Y^4 */ + sp_384_mont_mul_15(w, w, t1, p384_mod, p384_mp_mod); + } + /* y = 2*A*(B - X) - Y^4 */ + sp_384_mont_sub_15(y, b, x, p384_mod); + sp_384_mont_mul_15(y, y, a, p384_mod, p384_mp_mod); + sp_384_mont_dbl_15(y, y, p384_mod); + sp_384_mont_sub_15(y, y, t1, p384_mod); + } +#ifndef WOLFSSL_SP_SMALL + /* A = 3*(X^2 - W) */ + sp_384_mont_sqr_15(t1, x, p384_mod, p384_mp_mod); + sp_384_mont_sub_15(t1, t1, w, p384_mod); + sp_384_mont_tpl_15(a, t1, p384_mod); + /* B = X*Y^2 */ + sp_384_mont_sqr_15(t1, y, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(b, t1, x, p384_mod, p384_mp_mod); + /* X = A^2 - 2B */ + sp_384_mont_sqr_15(x, a, p384_mod, p384_mp_mod); + sp_384_mont_dbl_15(t2, b, p384_mod); + sp_384_mont_sub_15(x, x, t2, p384_mod); + /* Z = Z*Y */ + sp_384_mont_mul_15(z, z, y, p384_mod, p384_mp_mod); + /* t2 = Y^4 */ + sp_384_mont_sqr_15(t1, t1, p384_mod, p384_mp_mod); + /* y = 2*A*(B - X) - Y^4 */ + sp_384_mont_sub_15(y, b, x, p384_mod); + sp_384_mont_mul_15(y, y, a, p384_mod, p384_mp_mod); + sp_384_mont_dbl_15(y, y, p384_mod); + sp_384_mont_sub_15(y, y, t1, p384_mod); +#endif + /* Y = Y/2 */ + sp_384_div2_15(y, y, p384_mod); +} + +#endif /* FP_ECC */ +/* Add two Montgomery form projective points. The second point has a q value of + * one. + * Only the first point can be the same pointer as the result point. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_384_proj_point_add_qz1_15(sp_point_384* r, const sp_point_384* p, + const sp_point_384* q, sp_digit* t) +{ + const sp_point_384* ap[2]; + sp_point_384* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*15; + sp_digit* t3 = t + 4*15; + sp_digit* t4 = t + 6*15; + sp_digit* t5 = t + 8*15; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* Check double */ + (void)sp_384_sub_15(t1, p384_mod, q->y); + sp_384_norm_15(t1); + if ((sp_384_cmp_equal_15(p->x, q->x) & sp_384_cmp_equal_15(p->z, q->z) & + (sp_384_cmp_equal_15(p->y, q->y) | sp_384_cmp_equal_15(p->y, t1))) != 0) { + sp_384_proj_point_dbl_15(r, p, t); + } + else { + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point_384)); + x = rp[p->infinity | q->infinity]->x; + y = rp[p->infinity | q->infinity]->y; + z = rp[p->infinity | q->infinity]->z; + + ap[0] = p; + ap[1] = q; + for (i=0; i<15; i++) { + r->x[i] = ap[p->infinity]->x[i]; + } + for (i=0; i<15; i++) { + r->y[i] = ap[p->infinity]->y[i]; + } + for (i=0; i<15; i++) { + r->z[i] = ap[p->infinity]->z[i]; + } + r->infinity = ap[p->infinity]->infinity; + + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_15(t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(t4, t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_15(t4, t4, q->y, p384_mod, p384_mp_mod); + /* H = U2 - X1 */ + sp_384_mont_sub_15(t2, t2, x, p384_mod); + /* R = S2 - Y1 */ + sp_384_mont_sub_15(t4, t4, y, p384_mod); + /* Z3 = H*Z1 */ + sp_384_mont_mul_15(z, z, t2, p384_mod, p384_mp_mod); + /* X3 = R^2 - H^3 - 2*X1*H^2 */ + sp_384_mont_sqr_15(t1, t4, p384_mod, p384_mp_mod); + sp_384_mont_sqr_15(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(t3, x, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(t5, t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_sub_15(x, t1, t5, p384_mod); + sp_384_mont_dbl_15(t1, t3, p384_mod); + sp_384_mont_sub_15(x, x, t1, p384_mod); + /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */ + sp_384_mont_sub_15(t3, t3, x, p384_mod); + sp_384_mont_mul_15(t3, t3, t4, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(t5, t5, y, p384_mod, p384_mp_mod); + sp_384_mont_sub_15(y, t3, t5, p384_mod); + } +} + +#ifdef FP_ECC +/* Convert the projective point to affine. + * Ordinates are in Montgomery form. + * + * a Point to convert. + * t Temporary data. + */ +static void sp_384_proj_to_affine_15(sp_point_384* a, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2 * 15; + sp_digit* tmp = t + 4 * 15; + + sp_384_mont_inv_15(t1, a->z, tmp); + + sp_384_mont_sqr_15(t2, t1, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(t1, t2, t1, p384_mod, p384_mp_mod); + + sp_384_mont_mul_15(a->x, a->x, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(a->y, a->y, t1, p384_mod, p384_mp_mod); + XMEMCPY(a->z, p384_norm_mod, sizeof(p384_norm_mod)); +} + +/* Generate the pre-computed table of points for the base point. + * + * a The base point. + * table Place to store generated point data. + * tmp Temporary data. + * heap Heap to use for allocation. + */ +static int sp_384_gen_stripe_table_15(const sp_point_384* a, + sp_table_entry_384* table, sp_digit* tmp, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 td, s1d, s2d; +#endif + sp_point_384* t; + sp_point_384* s1 = NULL; + sp_point_384* s2 = NULL; + int i, j; + int err; + + (void)heap; + + err = sp_384_point_new_15(heap, td, t); + if (err == MP_OKAY) { + err = sp_384_point_new_15(heap, s1d, s1); + } + if (err == MP_OKAY) { + err = sp_384_point_new_15(heap, s2d, s2); + } + + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_15(t->x, a->x, p384_mod); + } + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_15(t->y, a->y, p384_mod); + } + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_15(t->z, a->z, p384_mod); + } + if (err == MP_OKAY) { + t->infinity = 0; + sp_384_proj_to_affine_15(t, tmp); + + XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod)); + s1->infinity = 0; + XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod)); + s2->infinity = 0; + + /* table[0] = {0, 0, infinity} */ + XMEMSET(&table[0], 0, sizeof(sp_table_entry_384)); + /* table[1] = Affine version of 'a' in Montgomery form */ + XMEMCPY(table[1].x, t->x, sizeof(table->x)); + XMEMCPY(table[1].y, t->y, sizeof(table->y)); + + for (i=1; i<8; i++) { + sp_384_proj_point_dbl_n_15(t, 48, tmp); + sp_384_proj_to_affine_15(t, tmp); + XMEMCPY(table[1<x, sizeof(table->x)); + XMEMCPY(table[1<y, sizeof(table->y)); + } + + for (i=1; i<8; i++) { + XMEMCPY(s1->x, table[1<x)); + XMEMCPY(s1->y, table[1<y)); + for (j=(1<x, table[j-(1<x)); + XMEMCPY(s2->y, table[j-(1<y)); + sp_384_proj_point_add_qz1_15(t, s1, s2, tmp); + sp_384_proj_to_affine_15(t, tmp); + XMEMCPY(table[j].x, t->x, sizeof(table->x)); + XMEMCPY(table[j].y, t->y, sizeof(table->y)); + } + } + } + + sp_384_point_free_15(s2, 0, heap); + sp_384_point_free_15(s1, 0, heap); + sp_384_point_free_15( t, 0, heap); + + return err; +} + +#endif /* FP_ECC */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_stripe_15(sp_point_384* r, const sp_point_384* g, + const sp_table_entry_384* table, const sp_digit* k, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 rtd; + sp_point_384 pd; + sp_digit td[2 * 15 * 6]; +#endif + sp_point_384* rt; + sp_point_384* p = NULL; + sp_digit* t; + int i, j; + int y, x; + int err; + + (void)g; + (void)heap; + + + err = sp_384_point_new_15(heap, rtd, rt); + if (err == MP_OKAY) { + err = sp_384_point_new_15(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 15 * 6, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) { + err = MEMORY_E; + } +#else + t = td; +#endif + + if (err == MP_OKAY) { + XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod)); + XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod)); + + y = 0; + for (j=0,x=47; j<8; j++,x+=48) { + y |= ((k[x / 26] >> (x % 26)) & 1) << j; + } + XMEMCPY(rt->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(rt->y, table[y].y, sizeof(table[y].y)); + rt->infinity = !y; + for (i=46; i>=0; i--) { + y = 0; + for (j=0,x=i; j<8; j++,x+=48) { + y |= ((k[x / 26] >> (x % 26)) & 1) << j; + } + + sp_384_proj_point_dbl_15(rt, rt, t); + XMEMCPY(p->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(p->y, table[y].y, sizeof(table[y].y)); + p->infinity = !y; + sp_384_proj_point_add_qz1_15(rt, rt, p, t); + } + + if (map != 0) { + sp_384_map_15(r, rt, t); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_384)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_15(p, 0, heap); + sp_384_point_free_15(rt, 0, heap); + + return err; +} + +#ifdef FP_ECC +#ifndef FP_ENTRIES + #define FP_ENTRIES 16 +#endif + +typedef struct sp_cache_384_t { + sp_digit x[15]; + sp_digit y[15]; + sp_table_entry_384 table[256]; + uint32_t cnt; + int set; +} sp_cache_384_t; + +static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES]; +static THREAD_LS_T int sp_cache_384_last = -1; +static THREAD_LS_T int sp_cache_384_inited = 0; + +#ifndef HAVE_THREAD_LS + static volatile int initCacheMutex_384 = 0; + static wolfSSL_Mutex sp_cache_384_lock; +#endif + +static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache) +{ + int i, j; + uint32_t least; + + if (sp_cache_384_inited == 0) { + for (i=0; ix, sp_cache_384[i].x) & + sp_384_cmp_equal_15(g->y, sp_cache_384[i].y)) { + sp_cache_384[i].cnt++; + break; + } + } + + /* No match. */ + if (i == FP_ENTRIES) { + /* Find empty entry. */ + i = (sp_cache_384_last + 1) % FP_ENTRIES; + for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) { + if (!sp_cache_384[i].set) { + break; + } + } + + /* Evict least used. */ + if (i == sp_cache_384_last) { + least = sp_cache_384[0].cnt; + for (j=1; jx, sizeof(sp_cache_384[i].x)); + XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y)); + sp_cache_384[i].set = 1; + sp_cache_384[i].cnt = 1; + } + + *cache = &sp_cache_384[i]; + sp_cache_384_last = i; +} +#endif /* FP_ECC */ + +/* Multiply the base point of P384 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_15(sp_point_384* r, const sp_point_384* g, const sp_digit* k, + int map, void* heap) +{ +#ifndef FP_ECC + return sp_384_ecc_mulmod_fast_15(r, g, k, map, heap); +#else + sp_digit tmp[2 * 15 * 7]; + sp_cache_384_t* cache; + int err = MP_OKAY; + +#ifndef HAVE_THREAD_LS + if (initCacheMutex_384 == 0) { + wc_InitMutex(&sp_cache_384_lock); + initCacheMutex_384 = 1; + } + if (wc_LockMutex(&sp_cache_384_lock) != 0) + err = BAD_MUTEX_E; +#endif /* HAVE_THREAD_LS */ + + if (err == MP_OKAY) { + sp_ecc_get_cache_384(g, &cache); + if (cache->cnt == 2) + sp_384_gen_stripe_table_15(g, cache->table, tmp, heap); + +#ifndef HAVE_THREAD_LS + wc_UnLockMutex(&sp_cache_384_lock); +#endif /* HAVE_THREAD_LS */ + + if (cache->cnt < 2) { + err = sp_384_ecc_mulmod_fast_15(r, g, k, map, heap); + } + else { + err = sp_384_ecc_mulmod_stripe_15(r, g, cache->table, k, + map, heap); + } + } + + return err; +#endif +} + +#endif +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * p Point to multiply. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_384(mp_int* km, ecc_point* gm, ecc_point* r, int map, + void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 p; + sp_digit kd[15]; +#endif + sp_point_384* point; + sp_digit* k = NULL; + int err = MP_OKAY; + + err = sp_384_point_new_15(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 15, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#else + k = kd; +#endif + if (err == MP_OKAY) { + sp_384_from_mp(k, 15, km); + sp_384_point_from_ecc_point_15(point, gm); + + err = sp_384_ecc_mulmod_15(point, point, k, map, heap); + } + if (err == MP_OKAY) { + err = sp_384_point_to_ecc_point_15(point, r); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_15(point, 0, heap); + + return err; +} + +#ifdef WOLFSSL_SP_SMALL +/* Multiply the base point of P384 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_base_15(sp_point_384* r, const sp_digit* k, + int map, void* heap) +{ + /* No pre-computed values. */ + return sp_384_ecc_mulmod_15(r, &p384_base, k, map, heap); +} + +#elif defined(WOLFSSL_SP_CACHE_RESISTANT) +/* Multiply the base point of P384 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_base_15(sp_point_384* r, const sp_digit* k, + int map, void* heap) +{ + /* No pre-computed values. */ + return sp_384_ecc_mulmod_15(r, &p384_base, k, map, heap); +} + +#else +static const sp_table_entry_384 p384_table[256] = { + /* 0 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 */ + { { 0x1c0b528,0x01d5992,0x0e383dd,0x38a835b,0x220e378,0x106d35b, + 0x1c3afc5,0x03bfe1e,0x28459a3,0x2d91521,0x214ede2,0x0bfdc8d, + 0x2151381,0x3708a67,0x004d3aa }, + { 0x303a4fe,0x10f6b52,0x29ac230,0x2fdeed2,0x0a1bfa8,0x3a0ec14, + 0x2de7562,0x3ff662e,0x21968f4,0x031b0d4,0x3969a84,0x2000898, + 0x1c5e9dd,0x2f09685,0x002b78a } }, + /* 2 */ + { { 0x30c535b,0x191d4ca,0x2296298,0x14dc141,0x090dd69,0x05aae6b, + 0x0cd6b42,0x35da80e,0x3b7be12,0x2cf7e6d,0x1f347bd,0x3d365e1, + 0x1448913,0x32704fa,0x00222c5 }, + { 0x280dc64,0x39e5bc9,0x24175f8,0x2dd60d4,0x0120e7c,0x041d02e, + 0x0b5d8ad,0x37b9895,0x2fb5337,0x1f0e2e3,0x14f0224,0x2230b86, + 0x1bc4cf6,0x17cdb09,0x007b5c7 } }, + /* 3 */ + { { 0x2dffea5,0x28f30e7,0x29fce26,0x070df5f,0x235bbfd,0x2f78fbd, + 0x27700d9,0x23d6bc3,0x3471a53,0x0c0e03a,0x05bf9eb,0x276a2ec, + 0x20c3e2e,0x31cc691,0x00dbb93 }, + { 0x126b605,0x2e8983d,0x153737d,0x23bf5e1,0x295d497,0x35ca812, + 0x2d793ae,0x16c6893,0x3777600,0x089a520,0x1e681f8,0x3d55ee6, + 0x154ef99,0x155f592,0x00ae5f9 } }, + /* 4 */ + { { 0x26feef9,0x20315fc,0x1240244,0x250e838,0x3c31a26,0x1cf8af1, + 0x1002c32,0x3b531cd,0x1c53ef1,0x22310ba,0x3f4948e,0x22eafd9, + 0x3863202,0x3d0e2a5,0x006a502 }, + { 0x34536fe,0x04e91ad,0x30ebf5f,0x2af62a7,0x01d218b,0x1c8c9da, + 0x336bcc3,0x23060c3,0x331576e,0x1b14c5e,0x1bbcb76,0x0755e9a, + 0x3d4dcef,0x24c2cf8,0x00917c4 } }, + /* 5 */ + { { 0x349ddd0,0x09b8bb8,0x0250114,0x3e66cbf,0x29f117e,0x3005d29, + 0x36b480e,0x2119bfc,0x2761845,0x253d2f7,0x0580604,0x0bb6db4, + 0x3ca922f,0x1744677,0x008adc7 }, + { 0x3d5a7ce,0x27425ed,0x11e9a61,0x3968d10,0x3874275,0x3692d3b, + 0x03e0470,0x0763d50,0x3d97790,0x3cbaeab,0x2747170,0x18faf3a, + 0x180365e,0x2511fe7,0x0012a36 } }, + /* 6 */ + { { 0x3c52870,0x2701e93,0x296128f,0x120694e,0x1ce0b37,0x3860a36, + 0x10fa180,0x0896b55,0x2f76adb,0x22892ae,0x2e58a34,0x07b4295, + 0x2cb62d1,0x079a522,0x00f3d81 }, + { 0x061ed22,0x2375dd3,0x3c9d861,0x3e602d1,0x10bb747,0x39ae156, + 0x3f796fd,0x087a48a,0x06d680a,0x37f7f47,0x2af2c9d,0x36c55dc, + 0x10f3dc0,0x279b07a,0x00a0937 } }, + /* 7 */ + { { 0x085c629,0x319bbf8,0x089a386,0x184256f,0x15fc2a4,0x00fd2d0, + 0x13d6312,0x363d44d,0x32b7e4b,0x25f2865,0x27df8ce,0x1dce02a, + 0x24ea3b0,0x0e27b9f,0x00d8a90 }, + { 0x3b14461,0x1d371f9,0x0f781bc,0x0503271,0x0dc2cb0,0x13bc284, + 0x34b3a68,0x1ff894a,0x25d2032,0x16f79ba,0x260f961,0x07b10d5, + 0x18173b7,0x2812e2b,0x00eede5 } }, + /* 8 */ + { { 0x13b9a2d,0x132ece2,0x0c5d558,0x02c0214,0x1820c66,0x37cb50f, + 0x26d8267,0x3a00504,0x3f00109,0x33756ee,0x38172f1,0x2e4bb8c, + 0x030d985,0x3e4fcc5,0x00609d4 }, + { 0x2daf9d6,0x16681fa,0x1fb01e0,0x1b03c49,0x370e653,0x183c839, + 0x2207515,0x0ea6b58,0x1ae7aaf,0x3a96522,0x24bae14,0x1c38bd9, + 0x082497b,0x1c05db4,0x000dd03 } }, + /* 9 */ + { { 0x110521f,0x04efa21,0x0c174cc,0x2a7dc93,0x387315b,0x14f7098, + 0x1d83bb3,0x2495ed2,0x2fe0c27,0x1e2d9df,0x093c953,0x0287073, + 0x02c9951,0x336291c,0x0033e30 }, + { 0x208353f,0x3f22748,0x2b2bf0f,0x2373b50,0x10170fa,0x1b8a97d, + 0x0851ed2,0x0b25824,0x055ecb5,0x12049d9,0x3fe1adf,0x11b1385, + 0x28eab06,0x11fac21,0x00513f0 } }, + /* 10 */ + { { 0x35bdf53,0x1847d37,0x1a6dc07,0x29d62c4,0x045d331,0x313b8e5, + 0x165daf1,0x1e34562,0x3e75a58,0x16ea2fa,0x02dd302,0x3302862, + 0x3eb8bae,0x2266a48,0x00cf2a3 }, + { 0x24fd048,0x324a074,0x025df98,0x1662eec,0x3841bfb,0x26ae754, + 0x1df8cec,0x0113ae3,0x0b67fef,0x094e293,0x2323666,0x0ab087c, + 0x2f06509,0x0e142d9,0x00a919d } }, + /* 11 */ + { { 0x1d480d8,0x00ed021,0x3a7d3db,0x1e46ca1,0x28cd9f4,0x2a3ceeb, + 0x24dc754,0x0624a3c,0x0003db4,0x1520bae,0x1c56e0f,0x2fe7ace, + 0x1dc6f38,0x0c826a4,0x008b977 }, + { 0x209cfc2,0x2c16c9c,0x1b70a31,0x21416cb,0x34c49bf,0x186549e, + 0x062498d,0x146e959,0x0391fac,0x08ff944,0x2b4b834,0x013d57a, + 0x2eabffb,0x0370131,0x00c07c1 } }, + /* 12 */ + { { 0x332f048,0x0bf9336,0x16dfad2,0x2451d7b,0x35f23bf,0x299adb2, + 0x0ce0c0a,0x0170294,0x289f034,0x2b7d89e,0x395e2d6,0x1d20df7, + 0x2e64e36,0x16dae90,0x00081c9 }, + { 0x31d6ceb,0x0f80db9,0x0271eba,0x33db1ac,0x1b45bcc,0x1a11c07, + 0x347e630,0x148fd9e,0x142e712,0x3183e3e,0x1cd47ad,0x108d1c9, + 0x09cbb82,0x35e61d9,0x0083027 } }, + /* 13 */ + { { 0x215b0b8,0x0a7a98d,0x2c41b39,0x3f69536,0x0b41441,0x16da8da, + 0x15d556b,0x3c17a26,0x129167e,0x3ea0351,0x2d25a27,0x2f2d285, + 0x15b68f6,0x2931ef5,0x00210d6 }, + { 0x1351130,0x012aec9,0x37ebf38,0x26640f8,0x01d2df6,0x2130972, + 0x201efc0,0x23a457c,0x087a1c6,0x14c68a3,0x163f62a,0x36b494d, + 0x015d481,0x39c35b1,0x005dd6d } }, + /* 14 */ + { { 0x06612ce,0x11c3f61,0x199729f,0x3b36863,0x2986f3e,0x3cd2be1, + 0x04c1612,0x2be2dae,0x00846dd,0x3d7bc29,0x249e795,0x1016803, + 0x37a3714,0x2c5aa8b,0x005f491 }, + { 0x341b38d,0x01eb936,0x3caac7f,0x27863ef,0x1ef7d11,0x1110ec6, + 0x18e0761,0x26498e8,0x01a79a1,0x390d5a1,0x22226fb,0x3d2a473, + 0x0872191,0x1230f32,0x00dc772 } }, + /* 15 */ + { { 0x0b1ec9d,0x03fc6b9,0x3706d57,0x03b9fbb,0x221d23e,0x2867821, + 0x1e40f4c,0x2c9c0f3,0x3c4cd4b,0x31f5948,0x3f13aa6,0x307c1b2, + 0x04b6016,0x116b453,0x005aa72 }, + { 0x0b74de8,0x20519d1,0x134e37f,0x05d882a,0x1839e7a,0x3a2c6a8, + 0x0d14e8d,0x1d78bdd,0x251f30d,0x3a1e27e,0x081c261,0x2c9014b, + 0x165ee09,0x19e0cf1,0x00654e2 } }, + /* 16 */ + { { 0x39fbe67,0x081778b,0x0e44378,0x20dfdca,0x1c4afcb,0x20b803c, + 0x0ec06c6,0x1508f6f,0x1c3114d,0x3bca851,0x3a52463,0x07661d1, + 0x17b0aa0,0x16c5f5c,0x00fc093 }, + { 0x0d01f95,0x0ef13f5,0x2d34965,0x2a25582,0x39aa83e,0x3e38fcf, + 0x3943dca,0x385bbdd,0x210e86f,0x3dc1dd2,0x3f9ffdc,0x18b9bc6, + 0x345c96b,0x0e79621,0x008a72f } }, + /* 17 */ + { { 0x341c342,0x3793688,0x042273a,0x153a9c1,0x3dd326e,0x1d073bc, + 0x2c7d983,0x05524cd,0x00d59e6,0x347abe8,0x3d9a3ef,0x0fb624a, + 0x2c7e4cd,0x09b3171,0x0003faf }, + { 0x045f8ac,0x38bf3cc,0x1e73087,0x0c85d3c,0x314a655,0x382be69, + 0x384f28f,0x24d6cb3,0x2842cdc,0x1777f5e,0x2929c89,0x03c45ed, + 0x3cfcc4c,0x0b59322,0x0035657 } }, + /* 18 */ + { { 0x18c1bba,0x2eb005f,0x33d57ec,0x30e42c3,0x36058f9,0x1865f43, + 0x2116e3f,0x2c4a2bb,0x0684033,0x0f1375c,0x0209b98,0x2136e9b, + 0x1bc4af0,0x0b3e0c7,0x0097c7c }, + { 0x16010e8,0x398777e,0x2a172f4,0x0814a7e,0x0d97e4e,0x274dfc8, + 0x2666606,0x1b5c93b,0x1ed3d36,0x3f3304e,0x13488e0,0x02dbb88, + 0x2d53369,0x3717ce9,0x007cad1 } }, + /* 19 */ + { { 0x257a41f,0x2a6a076,0x39b6660,0x04bb000,0x1e74a04,0x3876b45, + 0x343c6b5,0x0753108,0x3f54668,0x24a13cf,0x23749e8,0x0421fc5, + 0x32f13b5,0x0f31be7,0x00070f2 }, + { 0x1186e14,0x0847697,0x0dff542,0x0dff76c,0x084748f,0x2c7d060, + 0x23aab4d,0x0b43906,0x27ba640,0x1497b59,0x02f5835,0x0a492a4, + 0x0a6892f,0x39f3e91,0x005844e } }, + /* 20 */ + { { 0x33b236f,0x02181cf,0x21dafab,0x0760788,0x019e9d4,0x249ed0a, + 0x36571e3,0x3c7dbcf,0x1337550,0x010d22a,0x285e62f,0x19ee65a, + 0x052bf71,0x1d65fd5,0x0062d43 }, + { 0x2955926,0x3fae7bc,0x0353d85,0x07db7de,0x1440a56,0x328dad6, + 0x1668ec9,0x28058e2,0x1a1a22d,0x1014afc,0x3609325,0x3effdcb, + 0x209f3bd,0x3ca3888,0x0094e50 } }, + /* 21 */ + { { 0x062e8af,0x0b96ccc,0x136990b,0x1d7a28f,0x1a85723,0x0076dec, + 0x21b00b2,0x06a88ff,0x2f0ee65,0x1fa49b7,0x39b10ad,0x10b26fa, + 0x0be7465,0x026e8bf,0x00098e3 }, + { 0x3f1d63f,0x37bacff,0x1374779,0x02882ff,0x323d0e8,0x1da3de5, + 0x12bb3b8,0x0a15a11,0x34d1f95,0x2b3dd6e,0x29ea3fa,0x39ad000, + 0x33a538f,0x390204d,0x0012bd3 } }, + /* 22 */ + { { 0x04cbba5,0x0de0344,0x1d4cc02,0x11fe8d7,0x36207e7,0x32a6da8, + 0x0239281,0x1ec40d7,0x3e89798,0x213fc66,0x0022eee,0x11daefe, + 0x3e74db8,0x28534ee,0x00aa0a4 }, + { 0x07d4543,0x250cc46,0x206620f,0x1c1e7db,0x1321538,0x31fa0b8, + 0x30f74ea,0x01aae0e,0x3a2828f,0x3e9dd22,0x026ef35,0x3c0a62b, + 0x27dbdc5,0x01c23a6,0x000f0c5 } }, + /* 23 */ + { { 0x2f029dd,0x3091337,0x21b80c5,0x21e1419,0x13dabc6,0x3847660, + 0x12b865f,0x36eb666,0x38f6274,0x0ba6006,0x098da24,0x1398c64, + 0x13d08e5,0x246a469,0x009929a }, + { 0x1285887,0x3ff5c8d,0x010237b,0x097c506,0x0bc7594,0x34b9b88, + 0x00cc35f,0x0bb964a,0x00cfbc4,0x29cd718,0x0837619,0x2b4a192, + 0x0c57bb7,0x08c69de,0x00a3627 } }, + /* 24 */ + { { 0x1361ed8,0x266d724,0x366cae7,0x1d5b18c,0x247d71b,0x2c9969a, + 0x0dd5211,0x1edd153,0x25998d7,0x0380856,0x3ab29db,0x09366de, + 0x1e53644,0x2b31ff6,0x008b0ff }, + { 0x3b5d9ef,0x217448d,0x174746d,0x18afea4,0x15b106d,0x3e66e8b, + 0x0479f85,0x13793b4,0x1231d10,0x3c39bce,0x25e8983,0x2a13210, + 0x05a7083,0x382be04,0x00a9507 } }, + /* 25 */ + { { 0x0cf381c,0x1a29b85,0x31ccf6c,0x2f708b8,0x3af9d27,0x2a29732, + 0x168d4da,0x393488d,0x2c0e338,0x3f90c7b,0x0f52ad1,0x2a0a3fa, + 0x2cd80f1,0x15e7a1a,0x00db6a0 }, + { 0x107832a,0x159cb91,0x1289288,0x17e21f9,0x073fc27,0x1584342, + 0x3802780,0x3d6c197,0x154075f,0x16366d1,0x09f712b,0x23a3ec4, + 0x29cf23a,0x3218baf,0x0039f0a } }, + /* 26 */ + { { 0x052edf5,0x2afde13,0x2e53d8f,0x3969626,0x3dcd737,0x1e46ac5, + 0x118bf0d,0x01b2652,0x156bcff,0x16d7ef6,0x1ca46d4,0x34c0cbb, + 0x3e486f6,0x1f85068,0x002cdff }, + { 0x1f47ec8,0x12cee98,0x0608667,0x18fbbe1,0x08a8821,0x31a1fe4, + 0x17c7054,0x3c89e89,0x2edf6cd,0x1b8c32c,0x3f6ea84,0x1319329, + 0x3cd3c2c,0x05f331a,0x00186fa } }, + /* 27 */ + { { 0x1fcb91e,0x0fd4d87,0x358a48a,0x04d91b4,0x083595e,0x044a1e6, + 0x15827b9,0x1d5eaf4,0x2b82187,0x08f3984,0x21bd737,0x0c54285, + 0x2f56887,0x14c2d98,0x00f4684 }, + { 0x01896f6,0x0e542d0,0x2090883,0x269dfcf,0x1e11cb8,0x239fd29, + 0x312cac4,0x19dfacb,0x369f606,0x0cc4f75,0x16579f9,0x33c22cc, + 0x0f22bfd,0x3b251ae,0x006429c } }, + /* 28 */ + { { 0x375f9a4,0x137552e,0x3570498,0x2e4a74e,0x24aef06,0x35b9307, + 0x384ca23,0x3bcd6d7,0x011b083,0x3c93187,0x392ca9f,0x129ce48, + 0x0a800ce,0x145d9cc,0x00865d6 }, + { 0x22b4a2b,0x37f9d9c,0x3e0eca3,0x3e5ec20,0x112c04b,0x2e1ae29, + 0x3ce5b51,0x0f83200,0x32d6a7e,0x10ff1d8,0x081adbe,0x265c30b, + 0x216b1c8,0x0eb4483,0x003cbcd } }, + /* 29 */ + { { 0x030ce93,0x2d331fb,0x20a2fbf,0x1f6dc9c,0x010ed6c,0x1ed5540, + 0x275bf74,0x3df0fb1,0x103333f,0x0241c96,0x1075bfc,0x30e5cf9, + 0x0f31bc7,0x32c01eb,0x00b049e }, + { 0x358839c,0x1dbabd3,0x1e4fb40,0x36a8ac1,0x2101896,0x2d0319b, + 0x2033b0a,0x192e8fd,0x2ebc8d8,0x2867ba7,0x07bf6d2,0x1b3c555, + 0x2477deb,0x198fe09,0x008e5a9 } }, + /* 30 */ + { { 0x3fbd5e1,0x18bf77d,0x2b1d69e,0x151da44,0x338ecfe,0x0768efe, + 0x1a3d56d,0x3c35211,0x10e1c86,0x2012525,0x3bc36ce,0x32b6fe4, + 0x0c8d183,0x15c93f3,0x0041fce }, + { 0x332c144,0x24e70a0,0x246e05f,0x22c21c7,0x2b17f24,0x1ba2bfd, + 0x0534e26,0x318a4f6,0x1dc3b85,0x0c741bc,0x23131b7,0x01a8cba, + 0x364e5db,0x21362cf,0x00f2951 } }, + /* 31 */ + { { 0x2ddc103,0x14ffdcd,0x206fd96,0x0de57bd,0x025f43e,0x381b73a, + 0x2301fcf,0x3bafc27,0x34130b6,0x0216bc8,0x0ff56b2,0x2c4ad4c, + 0x23c6b79,0x1267fa6,0x009b4fb }, + { 0x1d27ac2,0x13e2494,0x1389015,0x38d5b29,0x2d33167,0x3f01969, + 0x28ec1fa,0x1b26de0,0x2587f74,0x1c25668,0x0c44f83,0x23c6f8c, + 0x32fdbb1,0x045f104,0x00a7946 } }, + /* 32 */ + { { 0x23c647b,0x09addd7,0x1348c04,0x0e633c1,0x1bfcbd9,0x1cb034f, + 0x1312e31,0x11cdcc7,0x1e6ee75,0x057d27f,0x2da7ee6,0x154c3c1, + 0x3a5fb89,0x2c2ba2c,0x00cf281 }, + { 0x1b8a543,0x125cd50,0x1d30fd1,0x29cc203,0x341a625,0x14e4233, + 0x3aae076,0x289e38a,0x036ba02,0x230f405,0x3b21b8f,0x34088b9, + 0x01297a0,0x03a75fb,0x00fdc27 } }, + /* 33 */ + { { 0x07f41d6,0x1cf032f,0x1641008,0x0f86deb,0x3d97611,0x0e110fe, + 0x136ff42,0x0b914a9,0x0e241e6,0x180c340,0x1f545fc,0x0ba619d, + 0x1208c53,0x04223a4,0x00cd033 }, + { 0x397612c,0x0132665,0x34e2d1a,0x00bba99,0x1d4393e,0x065d0a8, + 0x2fa69ee,0x1643b55,0x08085f0,0x3774aad,0x08a2243,0x33bf149, + 0x03f41a5,0x1ed950e,0x0048cc6 } }, + /* 34 */ + { { 0x014ab48,0x010c3bf,0x2a744e5,0x13c99c1,0x2195b7f,0x32207fd, + 0x28a228c,0x004f4bf,0x0e2d945,0x2ec6e5a,0x0b92162,0x1aa95e5, + 0x2754a93,0x1adcd93,0x004fb76 }, + { 0x1e1ff7f,0x24ef28c,0x269113f,0x32b393c,0x2696eb5,0x0ac2780, + 0x354bf8a,0x0ffe3fd,0x09ce58e,0x0163c4f,0x1678c0b,0x15cd1bc, + 0x292b3b7,0x036ea19,0x00d5420 } }, + /* 35 */ + { { 0x1da1265,0x0c2ef5b,0x18dd9a0,0x3f3a25c,0x0f7b4f3,0x0d8196e, + 0x24931f9,0x090729a,0x1875f72,0x1ef39cb,0x2577585,0x2ed472d, + 0x136756c,0x20553a6,0x00c7161 }, + { 0x2e32189,0x283de4b,0x00b2e81,0x0989df7,0x3ef2fab,0x1c7d1a7, + 0x24f6feb,0x3e16679,0x233dfda,0x06d1233,0x3e6b5df,0x1707132, + 0x05f7b3f,0x2c00779,0x00fb8df } }, + /* 36 */ + { { 0x15bb921,0x117e9d3,0x267ec73,0x2f934ad,0x25c7e04,0x20b5e8f, + 0x2d3a802,0x2ca911f,0x3f87e47,0x39709dd,0x08488e2,0x2cec400, + 0x35b4589,0x1f0acba,0x009aad7 }, + { 0x2ac34ae,0x06f29f6,0x3326d68,0x3949abe,0x02452e4,0x0687b85, + 0x0879244,0x1eb7832,0x0d4c240,0x31d0ec1,0x3c17a2a,0x17a666f, + 0x01a06cb,0x3e0929c,0x004dca2 } }, + /* 37 */ + { { 0x127bc1a,0x0c72984,0x13be68e,0x26c5fab,0x1a3edd5,0x097d685, + 0x36b645e,0x385799e,0x394a420,0x39d8885,0x0b1e872,0x13f60ed, + 0x2ce1b79,0x3c0ecb7,0x007cab3 }, + { 0x29b3586,0x26fc572,0x0bd7711,0x0913494,0x0a55459,0x31af3c9, + 0x3633eac,0x3e2105c,0x0c2b1b6,0x0e6f4c2,0x047d38c,0x2b81bd5, + 0x1fe1c3b,0x04d7cd0,0x0054dcc } }, + /* 38 */ + { { 0x03caf0d,0x0d66365,0x313356d,0x2a4897f,0x2ce044e,0x18feb7a, + 0x1f6a7c5,0x3709e7b,0x14473e8,0x2d8cbae,0x3190dca,0x12d19f8, + 0x31e3181,0x3cc5b6e,0x002d4f4 }, + { 0x143b7ca,0x2604728,0x39508d6,0x0cb79f3,0x24ec1ac,0x1ed7fa0, + 0x3ab5fd3,0x3c76488,0x2e49390,0x03a0985,0x3580461,0x3fd2c81, + 0x308f0ab,0x38561d6,0x0011b9b } }, + /* 39 */ + { { 0x3be682c,0x0c68f4e,0x32dd4ae,0x099d3bb,0x0bc7c5d,0x311f750, + 0x2fd10a3,0x2e7864a,0x23bc14a,0x13b1f82,0x32e495e,0x1b0f746, + 0x3cd856a,0x17a4c26,0x00085ee }, + { 0x02e67fd,0x06a4223,0x2af2f38,0x2038987,0x132083a,0x1b7bb85, + 0x0d6a499,0x131e43f,0x3035e52,0x278ee3e,0x1d5b08b,0x30d8364, + 0x2719f8d,0x0b21fc9,0x003a06e } }, + /* 40 */ + { { 0x237cac0,0x27d6a1c,0x27945cd,0x2750d61,0x293f0b5,0x253db13, + 0x04a764e,0x20b4d0e,0x12bb627,0x160c13b,0x0de0601,0x236e2cf, + 0x2190f0b,0x354d76f,0x004336d }, + { 0x2ab473a,0x10d54e4,0x1046574,0x1d6f97b,0x0031c72,0x06426a9, + 0x38678c2,0x0b76cf9,0x04f9920,0x152adf8,0x2977e63,0x1234819, + 0x198be26,0x061024c,0x00d427d } }, + /* 41 */ + { { 0x39b5a31,0x2123d43,0x362a822,0x1a2eab6,0x0bb0034,0x0d5d567, + 0x3a04723,0x3a10c8c,0x08079ae,0x0d27bda,0x2eb9e1e,0x2619e82, + 0x39a55a8,0x0c6c7db,0x00c1519 }, + { 0x174251e,0x13ac2eb,0x295ed26,0x18d2afc,0x037b9b2,0x1258344, + 0x00921b0,0x1f702d8,0x1bc4da7,0x1c3794f,0x12b1869,0x366eacf, + 0x16ddf01,0x31ebdc5,0x00ad54e } }, + /* 42 */ + { { 0x1efdc58,0x1370d5e,0x0ddb8e7,0x1a53fda,0x1456bd3,0x0c825a9, + 0x0e74ccd,0x20f41c9,0x3423867,0x139073f,0x3c70d8a,0x131fc85, + 0x219a2a0,0x34bf986,0x0041199 }, + { 0x1c05dd2,0x268f80a,0x3da9d38,0x1af9f8f,0x0535f2a,0x30ad37e, + 0x2cf72d7,0x14a509b,0x1f4fe74,0x259e09d,0x1d23f51,0x0672732, + 0x08fc463,0x00b6201,0x001e05a } }, + /* 43 */ + { { 0x0d5ffe8,0x3238bb5,0x17f275c,0x25b6fa8,0x2f8bb48,0x3b8f2d2, + 0x059790c,0x18594d4,0x285a47c,0x3d301bb,0x12935d2,0x23ffc96, + 0x3d7c7f9,0x15c8cbf,0x0034c4a }, + { 0x20376a2,0x05201ba,0x1e02c4b,0x1413c45,0x02ea5e7,0x39575f0, + 0x2d76e21,0x113694c,0x011f310,0x0da3725,0x31b7799,0x1cb9195, + 0x0cfd592,0x22ee4ea,0x00adaa3 } }, + /* 44 */ + { { 0x14ed72a,0x031c49f,0x39a34bf,0x192e87d,0x0da0e92,0x130e7a9, + 0x00258bf,0x144e123,0x2d82a71,0x0294e53,0x3f06c66,0x3d4473a, + 0x037cd4a,0x3bbfb17,0x00fcebc }, + { 0x39ae8c1,0x2dd6a9d,0x206ef23,0x332b479,0x2deff59,0x09d5720, + 0x3526fd2,0x33bf7cf,0x344bb32,0x359316a,0x115bdef,0x1b8468a, + 0x3813ea9,0x11a8450,0x00ab197 } }, + /* 45 */ + { { 0x0837d7d,0x1e1617b,0x0ba443c,0x2f2e3b8,0x2ca5b6f,0x176ed7b, + 0x2924d9d,0x07294d3,0x104bb4f,0x1cfd3e8,0x398640f,0x1162dc8, + 0x007ea15,0x2aa75fd,0x004231f }, + { 0x16e6896,0x01987be,0x0f9d53e,0x1a740ec,0x1554e4c,0x31e1634, + 0x3cb07b9,0x013eb53,0x39352cb,0x1dfa549,0x0974e7f,0x17c55d2, + 0x157c85f,0x1561adb,0x002e3fa } }, + /* 46 */ + { { 0x29951a8,0x35200da,0x2ad042c,0x22109e4,0x3a8b15b,0x2eca69c, + 0x28bcf9a,0x0cfa063,0x0924099,0x12ff668,0x2fb88dc,0x028d653, + 0x2445876,0x218d01c,0x0014418 }, + { 0x1caedc7,0x295bba6,0x01c9162,0x3364744,0x28fb12e,0x24c80b6, + 0x2719673,0x35e5ba9,0x04aa4cc,0x206ab23,0x1cf185a,0x2c140d8, + 0x1095a7d,0x1b3633f,0x000c9f8 } }, + /* 47 */ + { { 0x0b2a556,0x0a051c4,0x30b29a7,0x190c9ed,0x3767ca9,0x38de66d, + 0x2d9e125,0x3aca813,0x2dc22a3,0x319e074,0x0d9450a,0x3445bac, + 0x3e08a5b,0x07f29fa,0x00eccac }, + { 0x02d6e94,0x21113f7,0x321bde6,0x0a4d7b3,0x03621f4,0x2780e8b, + 0x22d5432,0x1fc2853,0x0d57d3e,0x254f90b,0x33ed00b,0x289b025, + 0x12272bb,0x30e715f,0x0000297 } }, + /* 48 */ + { { 0x0243a7d,0x2aac42e,0x0c5b3aa,0x0fa3e96,0x06eeef9,0x2b9fdd9, + 0x26fca39,0x0134fe1,0x22661ab,0x1990416,0x03945d6,0x15e3628, + 0x3848ca3,0x0f91e46,0x00b08cd }, + { 0x16d2411,0x3717e1d,0x128c45e,0x3669d54,0x0d4a790,0x2797da8, + 0x0f09634,0x2faab0b,0x27df649,0x3b19b49,0x0467039,0x39b65a2, + 0x3816f3c,0x31ad0bd,0x0050046 } }, + /* 49 */ + { { 0x2425043,0x3858099,0x389092a,0x3f7c236,0x11ff66a,0x3c58b39, + 0x2f5a7f8,0x1663ce1,0x2a0fcf5,0x38634b7,0x1a8ca18,0x0dcace8, + 0x0e6f778,0x03ae334,0x00df0d2 }, + { 0x1bb4045,0x357875d,0x14b77ed,0x33ae5b6,0x2252a47,0x31899dd, + 0x3293582,0x040c6f6,0x14340dd,0x3614f0e,0x3d5f47f,0x326fb3d, + 0x0044a9d,0x00beeb9,0x0027c23 } }, + /* 50 */ + { { 0x32d49ce,0x34822a3,0x30a22d1,0x00858b7,0x10d91aa,0x2681fd9, + 0x1cce870,0x2404a71,0x38b8433,0x377c1c8,0x019442c,0x0a38b21, + 0x22aba50,0x0d61c81,0x002dcbd }, + { 0x0680967,0x2f0f2f9,0x172cb5f,0x1167e4b,0x12a7bc6,0x05b0da7, + 0x2c76e11,0x3a36201,0x37a3177,0x1d71419,0x0569df5,0x0dce7ad, + 0x3f40b75,0x3bd8db0,0x002d481 } }, + /* 51 */ + { { 0x2a1103e,0x34e7f7f,0x1b171a2,0x24a57e0,0x2eaae55,0x166c992, + 0x10aa18f,0x0bb836f,0x01acb59,0x0e430e7,0x1750cca,0x18be036, + 0x3cc6cdf,0x0a0f7e5,0x00da4d8 }, + { 0x2201067,0x374d187,0x1f6b0a6,0x165a7ec,0x31531f8,0x3580487, + 0x15e5521,0x0724522,0x2b04c04,0x202c86a,0x3cc1ccf,0x225b11a, + 0x1bde79d,0x0eccc50,0x00d24da } }, + /* 52 */ + { { 0x3b0a354,0x2814dd4,0x1cd8575,0x3d031b7,0x0392ff2,0x1855ee5, + 0x0e8cff5,0x203442e,0x3bd3b1b,0x141cf95,0x3fedee1,0x1d783c0, + 0x26f192a,0x0392aa3,0x0075238 }, + { 0x158ffe9,0x3889f19,0x14151f4,0x06067b1,0x13a3486,0x1e65c21, + 0x382d5ef,0x1ab0aac,0x2ffddc4,0x3179b7a,0x3c8d094,0x05101e3, + 0x237c6e5,0x3947d83,0x00f674f } }, + /* 53 */ + { { 0x363408f,0x21eb96b,0x27376fb,0x2a735d6,0x1a39c36,0x3d31863, + 0x33313fc,0x32235e0,0x082f034,0x23ef351,0x39b3528,0x1a69d84, + 0x1d9c944,0x07159ad,0x0077a71 }, + { 0x04f8d65,0x25771e5,0x2ba84a6,0x194586a,0x1e6da5f,0x118059a, + 0x14e9c32,0x1d24619,0x3f528ae,0x22f22e4,0x0f5580d,0x0747a0e, + 0x32cc85f,0x286b3a8,0x008ccf9 } }, + /* 54 */ + { { 0x196fee2,0x2c4431c,0x094528a,0x18e1d32,0x175799d,0x26bb6b7, + 0x2293482,0x23fd289,0x07b2be8,0x1a5c533,0x158d60d,0x04a4f3f, + 0x164e9f7,0x32ccca9,0x00da6b6 }, + { 0x1d821c2,0x3f76c4f,0x323df43,0x17e4374,0x0f2f278,0x121227e, + 0x2464190,0x19d2644,0x326d24c,0x3185983,0x0803c15,0x0767a33, + 0x1c4c996,0x0563eab,0x00631c6 } }, + /* 55 */ + { { 0x1752366,0x0baf83f,0x288bacf,0x0384e6f,0x2b93c34,0x3c805e7, + 0x3664850,0x29e1663,0x254ff1d,0x3852080,0x0f85c16,0x1e389d9, + 0x3191352,0x3915eaa,0x00a246e }, + { 0x3763b33,0x187ad14,0x3c0d438,0x3f11702,0x1c49f03,0x35ac7a8, + 0x3f16bca,0x27266bf,0x08b6fd4,0x0f38ce4,0x37fde8c,0x147a6ff, + 0x02c5e5c,0x28e7fc5,0x00076a7 } }, + /* 56 */ + { { 0x2338d10,0x0e77fa7,0x011b046,0x1bfd0ad,0x28ee699,0x21d73bc, + 0x0461d1a,0x342ea58,0x2d695b4,0x30415ed,0x2906e0b,0x18e494a, + 0x20f8a27,0x026b870,0x002c19f }, + { 0x2f4c43d,0x3f0fc3b,0x0aa95b8,0x2a01ea1,0x3e2e1b1,0x0d74af6, + 0x0555288,0x0cb757d,0x24d2071,0x143d2bb,0x3907f67,0x3e0ce30, + 0x131f0e9,0x3724381,0x007a874 } }, + /* 57 */ + { { 0x3c27050,0x08b5165,0x0bf884b,0x3dd679c,0x3bd0b8d,0x25ce2e6, + 0x1674057,0x1f13ed3,0x1f5cd91,0x0d1fd35,0x13ce6e3,0x2671338, + 0x10f8b90,0x34e5487,0x00942bf }, + { 0x03b566d,0x23c3da9,0x37de502,0x1a486ff,0x1af6e86,0x1108cb3, + 0x36f856c,0x01a6a0f,0x179f915,0x1595a01,0x2cfecb8,0x082568b, + 0x1ba16d1,0x1abb6c0,0x00cf7f0 } }, + /* 58 */ + { { 0x2f96c80,0x1b8f123,0x209c0f5,0x2ccf76d,0x1d521f2,0x3705143, + 0x2941027,0x07f88af,0x07102a9,0x38b4868,0x1efa37d,0x1bdd3e8, + 0x028a12e,0x02e055b,0x009a9a9 }, + { 0x1c7dfcb,0x3aa7aa7,0x1d62c54,0x3f0b0b0,0x3c74e66,0x274f819, + 0x23f9674,0x0e2b67c,0x24654dd,0x0c71f0e,0x1946cee,0x0016211, + 0x0045dc7,0x0da1173,0x0089856 } }, + /* 59 */ + { { 0x0e73946,0x29f353f,0x056329d,0x2d48c5a,0x28f697d,0x2ea4bb1, + 0x235e9cc,0x34faa38,0x15f9f91,0x3557519,0x2a50a6c,0x1a27c8e, + 0x2a1a0f3,0x3098879,0x00dcf21 }, + { 0x1b818bf,0x2f20b98,0x2243cff,0x25b691e,0x3c74a2f,0x2f06833, + 0x0e980a8,0x32db48d,0x2b57929,0x33cd7f5,0x2fe17d6,0x11a384b, + 0x2dafb81,0x2b9562c,0x00ddea6 } }, + /* 60 */ + { { 0x2787b2e,0x37a21df,0x310d294,0x07ce6a4,0x1258acc,0x3050997, + 0x19714aa,0x122824b,0x11c708b,0x0462d56,0x21abbf7,0x331aec3, + 0x307b927,0x3e8d5a0,0x00c0581 }, + { 0x24d4d58,0x3d628fc,0x23279e0,0x2e38338,0x2febe9b,0x346f9c0, + 0x3d6a419,0x3264e47,0x245faca,0x3669f62,0x1e50d66,0x3028232, + 0x18201ab,0x0bdc192,0x0002c34 } }, + /* 61 */ + { { 0x17bdbc2,0x1c501c5,0x1605ccd,0x31ab438,0x372fa89,0x24a8057, + 0x13da2bb,0x3f95ac7,0x3cda0a3,0x1e2b679,0x24f0673,0x03b72f4, + 0x35be616,0x2ccd849,0x0079d4d }, + { 0x33497c4,0x0c7f657,0x2fb0d3d,0x3b81064,0x38cafea,0x0e942bc, + 0x3ca7451,0x2ab9784,0x1678c85,0x3c62098,0x1eb556f,0x01b3aa2, + 0x149f3ce,0x2656f6d,0x002eef1 } }, + /* 62 */ + { { 0x0596edc,0x1f4fad4,0x03a28ed,0x18a4149,0x3aa3593,0x12db40a, + 0x12c2c2a,0x3b1a288,0x327c4fb,0x35847f5,0x384f733,0x02e3fde, + 0x1af0e8a,0x2e417c3,0x00d85a6 }, + { 0x0091cf7,0x2267d75,0x276860e,0x19cbbfc,0x04fef2b,0x030ce59, + 0x3195cb1,0x1aa3f07,0x3699362,0x2a09d74,0x0d6c840,0x1e413d0, + 0x28acdc7,0x1ff5ea1,0x0088d8b } }, + /* 63 */ + { { 0x3d98425,0x08dc8de,0x154e85f,0x24b1c2c,0x2d44639,0x19a1e8b, + 0x300ee29,0x053f72e,0x3f7c832,0x12417f6,0x1359368,0x0674a4c, + 0x1218e20,0x0e4fbd4,0x000428c }, + { 0x01e909a,0x1d88fe6,0x12da40c,0x215ef86,0x2925133,0x004241f, + 0x3e480f4,0x2d16523,0x07c3120,0x3375e86,0x21fd8f3,0x35dc0b6, + 0x0efc5c9,0x14ef8d6,0x0066e47 } }, + /* 64 */ + { { 0x2973cf4,0x34d3845,0x34f7070,0x22df93c,0x120aee0,0x3ae2b4a, + 0x1af9b95,0x177689a,0x036a6a4,0x0377828,0x23df41e,0x22d4a39, + 0x0df2aa1,0x06ca898,0x0003cc7 }, + { 0x06b1dd7,0x19dc2a8,0x35d324a,0x0467499,0x25bfa9c,0x1a1110c, + 0x01e2a19,0x1b3c1cf,0x18d131a,0x10d9815,0x2ee7945,0x0a2720c, + 0x0ddcdb0,0x2c071b6,0x00a6aef } }, + /* 65 */ + { { 0x1ab5245,0x1192d00,0x13ffba1,0x1b71236,0x09b8d0b,0x0eb49cb, + 0x1867dc9,0x371de4e,0x05eae9f,0x36faf82,0x094ea8b,0x2b9440e, + 0x022e173,0x2268e6b,0x00740fc }, + { 0x0e23b23,0x22c28ca,0x04d05e2,0x0bb84c4,0x1235272,0x0289903, + 0x267a18b,0x0df0fd1,0x32e49bb,0x2ab1d29,0x281e183,0x3dcd3c3, + 0x1c0eb79,0x2db0ff6,0x00bffe5 } }, + /* 66 */ + { { 0x2a2123f,0x0d63d71,0x1f6db1a,0x257f8a3,0x1927b2d,0x06674be, + 0x302753f,0x20b7225,0x14c1a3f,0x0429cdd,0x377affe,0x0f40a75, + 0x2d34d06,0x05fb6b9,0x0054398 }, + { 0x38b83c4,0x1e7bbda,0x1682f79,0x0527651,0x2615cb2,0x1795fab, + 0x0e4facc,0x11f763c,0x1b81130,0x2010ae2,0x13f3650,0x20d5b72, + 0x1f32f88,0x34617f4,0x00bf008 } }, + /* 67 */ + { { 0x28068db,0x0aa8913,0x1a47801,0x10695ca,0x1c72cc6,0x0fc1a47, + 0x33df2c4,0x0517cf0,0x3471d92,0x1be815c,0x397f794,0x3f03cbe, + 0x121bfae,0x172cbe0,0x00813d7 }, + { 0x383bba6,0x04f1c90,0x0b3f056,0x1c29089,0x2a924ce,0x3c85e69, + 0x1cecbe5,0x0ad8796,0x0aa79f6,0x25e38ba,0x13ad807,0x30b30ed, + 0x0fa963a,0x35c763d,0x0055518 } }, + /* 68 */ + { { 0x0623f3b,0x3ca4880,0x2bff03c,0x0457ca7,0x3095c71,0x02a9a08, + 0x1722478,0x302c10b,0x3a17458,0x001131e,0x0959ec2,0x18bdfbc, + 0x2929fca,0x2adfe32,0x0040ae2 }, + { 0x127b102,0x14ddeaa,0x1771b8c,0x283700c,0x2398a86,0x085a901, + 0x108f9dc,0x0cc0012,0x33a918d,0x26d08e9,0x20b9473,0x12c3fc7, + 0x1f69763,0x1c94b5a,0x00e29de } }, + /* 69 */ + { { 0x035af04,0x3450021,0x12da744,0x077fb06,0x25f255b,0x0db7150, + 0x17dc123,0x1a2a07c,0x2a7636a,0x3972430,0x3704ca1,0x0327add, + 0x3d65a96,0x3c79bec,0x009de8c }, + { 0x11d3d06,0x3fb8354,0x12c7c60,0x04fe7ad,0x0466e23,0x01ac245, + 0x3c0f5f2,0x2a935d0,0x3ac2191,0x090bd56,0x3febdbc,0x3f1f23f, + 0x0ed1cce,0x02079ba,0x00d4fa6 } }, + /* 70 */ + { { 0x0ab9645,0x10174ec,0x3711b5e,0x26357c7,0x2aeec7f,0x2170a9b, + 0x1423115,0x1a5122b,0x39e512c,0x18116b2,0x290db1c,0x041b13a, + 0x26563ae,0x0f56263,0x00b89f3 }, + { 0x3ed2ce4,0x01f365f,0x1b2043b,0x05f7605,0x1f9934e,0x2a068d2, + 0x38d4d50,0x201859d,0x2de5291,0x0a7985a,0x17e6711,0x01b6c1b, + 0x08091fa,0x33c6212,0x001da23 } }, + /* 71 */ + { { 0x2f2c4b5,0x311acd0,0x1e47821,0x3bd9816,0x1931513,0x1bd4334, + 0x30ae436,0x2c49dc0,0x2c943e7,0x010ed4d,0x1fca536,0x189633d, + 0x17abf00,0x39e5ad5,0x00e4e3e }, + { 0x0c8b22f,0x2ce4009,0x1054bb6,0x307f2fc,0x32eb5e2,0x19d24ab, + 0x3b18c95,0x0e55e4d,0x2e4acf5,0x1bc250c,0x1dbf3a5,0x17d6a74, + 0x087cf58,0x07f6f82,0x00f8675 } }, + /* 72 */ + { { 0x110e0b2,0x0e672e7,0x11b7157,0x1598371,0x01c0d59,0x3d60c24, + 0x096b8a1,0x0121075,0x0268859,0x219962f,0x03213f2,0x3022adc, + 0x18de488,0x3dcdeb9,0x008d2e0 }, + { 0x06cfee6,0x26f2552,0x3c579b7,0x31fa796,0x2036a26,0x362ba5e, + 0x103601c,0x012506b,0x387ff3a,0x101a41f,0x2c7eb58,0x23d2efc, + 0x10a5a07,0x2fd5fa3,0x00e3731 } }, + /* 73 */ + { { 0x1cd0abe,0x08a0af8,0x2fa272f,0x17a1fbf,0x1d4f901,0x30e0d2f, + 0x1898066,0x273b674,0x0c1b8a2,0x3272337,0x3ee82eb,0x006e7d3, + 0x2a75606,0x0af1c81,0x0037105 }, + { 0x2f32562,0x2842491,0x1bb476f,0x1305cd4,0x1daad53,0x0d8daed, + 0x164c37b,0x138030f,0x05145d5,0x300e2a3,0x32c09e7,0x0798600, + 0x3515130,0x2b9e55c,0x009764e } }, + /* 74 */ + { { 0x3d5256a,0x06c67f2,0x3a3b879,0x3c9b284,0x04007e0,0x33c1a41, + 0x3794604,0x1d6240e,0x022b6c1,0x22c62a7,0x01d4590,0x32df5f6, + 0x368f1a1,0x2a7486e,0x006e13f }, + { 0x31e6e16,0x20f18a9,0x09ed471,0x23b861d,0x15cf0ef,0x397b502, + 0x1c7f9b2,0x05f84b2,0x2cce6e1,0x3c10bba,0x13fb5a7,0x1b52058, + 0x1feb1b8,0x03b7279,0x00ea1cf } }, + /* 75 */ + { { 0x2a4cc9b,0x15cf273,0x08f36e6,0x076bf3b,0x2541796,0x10e2dbd, + 0x0bf02aa,0x3aa2201,0x03cdcd4,0x3ee252c,0x3799571,0x3e01fa4, + 0x156e8d0,0x1fd6188,0x003466a }, + { 0x2515664,0x166b355,0x2b0b51e,0x0f28f17,0x355b0f9,0x2909e76, + 0x206b026,0x3823a12,0x179c5fa,0x0972141,0x2663a1a,0x01ee36e, + 0x3fc8dcf,0x2ef3d1b,0x0049a36 } }, + /* 76 */ + { { 0x2d93106,0x3d6b311,0x3c9ce47,0x382aa25,0x265b7ad,0x0b5f92f, + 0x0f4c941,0x32aa4df,0x380d4b2,0x0e8aba6,0x260357a,0x1f38273, + 0x0d5f95e,0x199f23b,0x0029f77 }, + { 0x0a0b1c5,0x21a3d6a,0x0ad8df6,0x33d8a5e,0x1240858,0x30000a8, + 0x3ac101d,0x2a8143d,0x1d7ffe9,0x1c74a2a,0x1b962c9,0x1261359, + 0x0c8b274,0x002cf4a,0x00a8a7c } }, + /* 77 */ + { { 0x211a338,0x22a14ab,0x16e77c5,0x3c746be,0x3a78613,0x0d5731c, + 0x1767d25,0x0b799fa,0x009792a,0x09ae8dc,0x124386b,0x183d860, + 0x176747d,0x14c4445,0x00ab09b }, + { 0x0eb9dd0,0x0121066,0x032895a,0x330541c,0x1e6c17a,0x2271b92, + 0x06da454,0x054c2bf,0x20abb21,0x0ead169,0x3d7ea93,0x2359649, + 0x242c6c5,0x3194255,0x00a3ef3 } }, + /* 78 */ + { { 0x3010879,0x1083a77,0x217989d,0x174e55d,0x29d2525,0x0e544ed, + 0x1efd50e,0x30c4e73,0x05bd5d1,0x0793bf9,0x3f7af77,0x052779c, + 0x2b06bc0,0x13d0d02,0x0055a6b }, + { 0x3eaf771,0x094947a,0x0288f13,0x0a21e35,0x22ab441,0x23816bf, + 0x15832e1,0x2d8aff3,0x348cc1f,0x2bbd4a8,0x01c4792,0x34209d3, + 0x06dc72b,0x211a1df,0x00345c5 } }, + /* 79 */ + { { 0x2a65e90,0x173ac2f,0x199cde1,0x0ac905b,0x00987f7,0x3618f7b, + 0x1b578df,0x0d5e113,0x34bac6a,0x27d85ed,0x1b48e99,0x18af5eb, + 0x1a1be9e,0x3987aac,0x00877ca }, + { 0x2358610,0x3776a8e,0x2b0723a,0x344c978,0x22fc4d6,0x1615d53, + 0x3198f51,0x2d61225,0x12cb392,0x07dd061,0x355f7de,0x09e0132, + 0x0efae99,0x13b46aa,0x00e9e6c } }, + /* 80 */ + { { 0x0683186,0x36d8e66,0x0ea9867,0x0937731,0x1fb5cf4,0x13c39ef, + 0x1a7ffed,0x27dfb32,0x31c7a77,0x09f15fd,0x16b25ef,0x1dd01e7, + 0x0168090,0x240ed02,0x0090eae }, + { 0x2e1fceb,0x2ab9783,0x1a1fdf2,0x093a1b0,0x33ff1da,0x2864fb7, + 0x3587d6c,0x275aa03,0x123dc9b,0x0e95a55,0x0592030,0x2102402, + 0x1bdef7b,0x37f2e9b,0x001efa4 } }, + /* 81 */ + { { 0x0540015,0x20e3e78,0x37dcfbd,0x11b0e41,0x02c3239,0x3586449, + 0x1fb9e6a,0x0baa22c,0x00c0ca6,0x3e58491,0x2dbe00f,0x366d4b0, + 0x176439a,0x2a86b86,0x00f52ab }, + { 0x0ac32ad,0x226250b,0x0f91d0e,0x1098aa6,0x3dfb79e,0x1dbd572, + 0x052ecf2,0x0f84995,0x0d27ad2,0x036c6b0,0x1e4986f,0x2317dab, + 0x2327df6,0x0dee0b3,0x00389ac } }, + /* 82 */ + { { 0x0e60f5b,0x0622d3e,0x2ada511,0x05522a8,0x27fe670,0x206af28, + 0x333cb83,0x3f25f6c,0x19ddaf3,0x0ec579b,0x36aabc0,0x093dbac, + 0x348b44b,0x277dca9,0x00c5978 }, + { 0x1cf5279,0x32e294a,0x1a6c26f,0x3f006b6,0x37a3c6b,0x2e2eb26, + 0x2cf88d4,0x3410619,0x1899c80,0x23d3226,0x30add14,0x2810905, + 0x01a41f0,0x11e5176,0x005a02f } }, + /* 83 */ + { { 0x1c90202,0x321df30,0x3570fa5,0x103e2b1,0x3d099d4,0x05e207d, + 0x0a5b1bd,0x0075d0a,0x3db5b25,0x2d87899,0x32e4465,0x226fc13, + 0x24cb8f8,0x3821daa,0x004da3a }, + { 0x3e66861,0x03f89b8,0x386d3ef,0x14ccc62,0x35e7729,0x11ce5b7, + 0x035fbc7,0x3f4df0f,0x29c439f,0x1144568,0x32d7037,0x312f65e, + 0x06b9dbf,0x03a9589,0x0008863 } }, + /* 84 */ + { { 0x0a9e8c9,0x1a19b6e,0x091ecd9,0x2e16ee0,0x2a11963,0x116cf34, + 0x390d530,0x194131f,0x2b580f3,0x31d569c,0x21d3751,0x3e2ce64, + 0x193de46,0x32454f0,0x004bffd }, + { 0x09554e7,0x170126e,0x2be6cd1,0x153de89,0x0353c67,0x350765c, + 0x202370b,0x1db01e5,0x30b12b1,0x3778591,0x00c8809,0x2e845d5, + 0x1fb1e56,0x170f90d,0x00e2db3 } }, + /* 85 */ + { { 0x328e33f,0x392aad8,0x36d1d71,0x0aebe04,0x1548678,0x1b55c8c, + 0x24995f8,0x2a5a01e,0x1bd1651,0x37c7c29,0x36803b6,0x3716c91, + 0x1a935a5,0x32f10b7,0x005c587 }, + { 0x2e8b4c0,0x336ccae,0x11382b6,0x22ec4cc,0x066d159,0x35fa585, + 0x23b2d25,0x3017528,0x2a674a8,0x3a4f900,0x1a7ce82,0x2b2539b, + 0x3d46545,0x0a07918,0x00eb9f8 } }, + /* 86 */ + { { 0x2cf5b9b,0x03e747f,0x166a34e,0x0afc81a,0x0a115b1,0x3aa814d, + 0x11cf3b1,0x163e556,0x3cbfb15,0x157c0a4,0x1bc703a,0x2141e90, + 0x01f811c,0x207218b,0x0092e6b }, + { 0x1af24e3,0x3af19b3,0x3c70cc9,0x335cbf3,0x068917e,0x055ee92, + 0x09a9308,0x2cac9b7,0x008b06a,0x1175097,0x36e929c,0x0be339c, + 0x0932436,0x15f18ba,0x0009f6f } }, + /* 87 */ + { { 0x29375fb,0x35ade34,0x11571c7,0x07b8d74,0x3fabd85,0x090fa91, + 0x362dcd4,0x02c3fdb,0x0608fe3,0x2477649,0x3fc6e70,0x059b7eb, + 0x1e6a708,0x1a4c220,0x00c6c4c }, + { 0x2a53fb0,0x1a3e1f5,0x11f9203,0x27e7ad3,0x038718e,0x3f5f9e4, + 0x308acda,0x0a8700f,0x34472fe,0x3420d7a,0x08076e5,0x014240e, + 0x0e7317e,0x197a98e,0x00538f7 } }, + /* 88 */ + { { 0x2663b4b,0x0927670,0x38dd0e0,0x16d1f34,0x3e700ab,0x3119567, + 0x12559d2,0x399b6c6,0x0a84bcd,0x163e7dd,0x3e2aced,0x058548c, + 0x03a5bad,0x011cf74,0x00c155c }, + { 0x3e454eb,0x2a1e64e,0x1ccd346,0x36e0edf,0x266ee94,0x2e74aaf, + 0x2d8378a,0x3cd547d,0x1d27733,0x0928e5b,0x353553c,0x26f502b, + 0x1d94341,0x2635cc7,0x00d0ead } }, + /* 89 */ + { { 0x0142408,0x382c3bb,0x3310908,0x2e50452,0x398943c,0x1d0ac75, + 0x1bf7d81,0x04bd00f,0x36b6934,0x3349c37,0x0f69e20,0x0195252, + 0x243a1c5,0x030da5f,0x00a76a9 }, + { 0x224825a,0x28ce111,0x34c2e0f,0x02e2b30,0x382e48c,0x26853ca, + 0x24bd14e,0x0200dec,0x1e24db3,0x0d3d775,0x132da0a,0x1dea79e, + 0x253dc0c,0x03c9d31,0x0020db9 } }, + /* 90 */ + { { 0x26c5fd9,0x05e6dc3,0x2eea261,0x08db260,0x2f8bec1,0x1255edf, + 0x283338d,0x3d9a91d,0x2640a72,0x03311f9,0x1bad935,0x152fda8, + 0x0e95abd,0x31abd15,0x00dfbf4 }, + { 0x107f4fa,0x29ebe9a,0x27353f7,0x3821972,0x27311fa,0x2925ab6, + 0x337ab82,0x2de6c91,0x1f115fe,0x044f909,0x21b93c2,0x3a5f142, + 0x13eb5e9,0x3ab1377,0x00b26b6 } }, + /* 91 */ + { { 0x22e5f2b,0x2ae7d4a,0x1ac481c,0x0a6fce1,0x2f93caf,0x242658e, + 0x3f35c3c,0x050f3d2,0x30074c9,0x142079c,0x0281b4c,0x295fea3, + 0x007413e,0x01726cd,0x00e4979 }, + { 0x1ab3cfb,0x1b76295,0x36adf55,0x1ad4636,0x1d444b9,0x3bd2e55, + 0x35425a5,0x1aa8cd3,0x3acecd2,0x1f769e8,0x1a655e9,0x1f6846f, + 0x24c70b5,0x3bff080,0x0002da3 } }, + /* 92 */ + { { 0x081d0d9,0x2c00d99,0x1fe2e24,0x396063f,0x03740db,0x243f680, + 0x3c1f451,0x1ff7b07,0x2803cf2,0x38ca724,0x2934f43,0x0d72d4d, + 0x0e8fe74,0x2975e21,0x002b505 }, + { 0x11adcc9,0x331a99c,0x21e16cf,0x1714c78,0x1f03432,0x2caa2a6, + 0x34a9679,0x2f7fe8b,0x0423c21,0x1a757ce,0x31b57d6,0x171e044, + 0x093b9b2,0x13602e0,0x00db534 } }, + /* 93 */ + { { 0x250a2f5,0x0b999eb,0x21d10d7,0x22b92a1,0x39b7f8d,0x0c37c72, + 0x29f70f3,0x3bf0e84,0x1d7e04f,0x07a42a9,0x272c3ae,0x1587b2f, + 0x155faff,0x10a336e,0x000d8fb }, + { 0x3663784,0x0d7dcf5,0x056ad22,0x319f8b1,0x0c05bae,0x2b6ff33, + 0x0292e42,0x0435797,0x188efb1,0x0d3f45e,0x119d49f,0x395dcd3, + 0x279fe27,0x133a13d,0x00188ac } }, + /* 94 */ + { { 0x396c53e,0x0d133e9,0x009b7ee,0x13421a0,0x1bbf607,0x1d284a5, + 0x1594f74,0x18cb47c,0x2dcac11,0x2999ddb,0x04e2fa5,0x1889e2c, + 0x0a89a18,0x33cb215,0x0052665 }, + { 0x104ab58,0x1d91920,0x3d6d7e3,0x04dc813,0x1167759,0x13a8466, + 0x0a06a54,0x103761b,0x25b1c92,0x26a8fdd,0x2474614,0x21406a4, + 0x251d75f,0x38c3734,0x007b982 } }, + /* 95 */ + { { 0x15f3060,0x3a7bf30,0x3be6e44,0x0baa1fa,0x05ad62f,0x1e54035, + 0x099d41c,0x2a744d9,0x1c0336f,0x3e99b5b,0x1afd3b1,0x2bf1255, + 0x1822bf8,0x2c93972,0x001d8cc }, + { 0x1d7584b,0x0508ade,0x20dd403,0x203a8fc,0x1c54a05,0x1611a31, + 0x037c8f9,0x1dcd4fe,0x110fbea,0x30f60bc,0x3dffe2f,0x26a1de1, + 0x0480367,0x18ec81c,0x0048eba } }, + /* 96 */ + { { 0x346e2f6,0x0435077,0x036789b,0x3e06545,0x313ab57,0x351a721, + 0x3372b91,0x15e6019,0x2fa4f6c,0x3c30656,0x272c9ac,0x10e84a8, + 0x2bdacea,0x232d9e2,0x009dadd }, + { 0x182579a,0x15b1af8,0x02d8cce,0x36cb49b,0x086feba,0x2911d17, + 0x268ee12,0x011e871,0x18698dc,0x35602b3,0x11b9ec2,0x0ade731, + 0x0f6a05a,0x1821015,0x00007da } }, + /* 97 */ + { { 0x3b00dd0,0x328d485,0x27a69e3,0x32c3a06,0x1046779,0x120b61c, + 0x19fef3d,0x0fef2e6,0x134d923,0x039bce0,0x348cd0e,0x0b0c007, + 0x066ae11,0x15d8f1b,0x00934e7 }, + { 0x33234dc,0x353f0f5,0x2fc1b44,0x18a193a,0x2fcae20,0x1afbc86, + 0x3afe252,0x17f7e10,0x107f3b7,0x2d84d54,0x394c2e6,0x19e96a9, + 0x0a37283,0x26c6152,0x003d262 } }, + /* 98 */ + { { 0x37cfaf8,0x01863d0,0x0299623,0x32c80cb,0x25b8742,0x0a4d90e, + 0x1f72472,0x13de652,0x31a0946,0x0ee0103,0x0f25414,0x2518b49, + 0x07e7604,0x1488d9b,0x00abd6b }, + { 0x1338f55,0x2ce4af5,0x1a0c119,0x3380525,0x21a80a9,0x235d4df, + 0x118ca7f,0x2dd8bcc,0x1c26bf4,0x32dc56b,0x28482b6,0x1418596, + 0x3c84d24,0x1f1a5a9,0x00d958d } }, + /* 99 */ + { { 0x1c21f31,0x22aa1ef,0x258c9ad,0x2d2018f,0x0adb3ca,0x01f75ee, + 0x186283b,0x31ad3bf,0x3621be7,0x3b1ee6d,0x015582d,0x3d61d04, + 0x2ddf32e,0x14b8a66,0x00c970c }, + { 0x2f24d66,0x00b8a88,0x100a78f,0x041d330,0x2efec1d,0x24c5b86, + 0x2a6a390,0x37526bc,0x2055849,0x3339f08,0x16bffc4,0x07f9d72, + 0x06ec09c,0x3f49ee8,0x00cad98 } }, + /* 100 */ + { { 0x248b73e,0x1b8b42d,0x285eed7,0x39473f4,0x1a9f92c,0x3b44f78, + 0x086c062,0x06a4ea3,0x34ea519,0x3c74e95,0x1ad1b8b,0x1737e2c, + 0x2cfe338,0x0a291f4,0x00bbecc }, + { 0x1cec548,0x0c9b01a,0x20b298d,0x377c902,0x24f5bc1,0x2415c8d, + 0x1a70622,0x2529090,0x1c5c682,0x283f1ba,0x2319f17,0x0120e2e, + 0x01c6f4d,0x33c67ff,0x008b612 } }, + /* 101 */ + { { 0x03830eb,0x02d4053,0x10c59bb,0x0f23b83,0x13d08f8,0x26ea4e2, + 0x2626427,0x0a45292,0x0449cbc,0x0175750,0x074c46f,0x27ae0f8, + 0x2d7d6ae,0x163dd3a,0x0063bb7 }, + { 0x2bb29e0,0x034bab1,0x341e1c4,0x21d2c0b,0x295aa2d,0x0f2c666, + 0x1891755,0x13db64a,0x2fe5158,0x337646e,0x31a1aae,0x057bee4, + 0x00f9e37,0x396d19e,0x00c1b6a } }, + /* 102 */ + { { 0x2772f41,0x34f92d0,0x39d1cde,0x174ef2d,0x03a700d,0x03fbb98, + 0x30d50e8,0x352ed10,0x1fcf5e5,0x3d113bc,0x26e358f,0x180653f, + 0x1b43cc6,0x3cc9aa4,0x00e68a2 }, + { 0x37fe4d2,0x09dd725,0x01eb584,0x171f8a9,0x278fdef,0x3e37c03, + 0x3bec02f,0x149757c,0x0cd5852,0x37d2e10,0x0e6988b,0x1c120e9, + 0x0b83708,0x38e7319,0x0039499 } }, + /* 103 */ + { { 0x08df5fe,0x177a02c,0x0362fc0,0x1f18ee8,0x00c1295,0x173c50a, + 0x379414d,0x1885ba8,0x32a54ef,0x2315644,0x39e65cf,0x357c4be, + 0x1d66333,0x09e05a5,0x0009c60 }, + { 0x1f7a2fb,0x073b518,0x2eb83ac,0x11353d7,0x1dd8384,0x0c63f2b, + 0x238c6c8,0x2a1920a,0x2e5e9f1,0x1cc56f8,0x042daf4,0x1ed5dc5, + 0x25f9e31,0x012a56a,0x0081b59 } }, + /* 104 */ + { { 0x321d232,0x2c71422,0x3a756b6,0x30230b2,0x387f3db,0x3a7c3eb, + 0x274b46a,0x201e69f,0x185bb7b,0x140da82,0x0d974a2,0x0616e42, + 0x35ec94f,0x3bc366b,0x005aa7c }, + { 0x3dcfffc,0x19a9c15,0x3225e05,0x36ae114,0x16ea311,0x0cda2aa, + 0x2a1a8d2,0x154b5cb,0x08348cd,0x17b66c8,0x080ea43,0x21e59f3, + 0x04173b9,0x31d5b04,0x00ad735 } }, + /* 105 */ + { { 0x2e76ef4,0x216acf3,0x2b93aea,0x112bc74,0x3449974,0x2b2e48f, + 0x11929be,0x2f03021,0x19051e3,0x0ac202d,0x19be68a,0x3b87619, + 0x26cdac4,0x086592c,0x00f00de }, + { 0x2e90d4d,0x3ed703c,0x2c648d7,0x29ddf67,0x000e219,0x3471247, + 0x26febd5,0x1161713,0x3541a8f,0x302038d,0x08d2af9,0x26e1b21, + 0x398514a,0x36dad99,0x002ed70 } }, + /* 106 */ + { { 0x06f25cb,0x1104596,0x370faee,0x07e83f3,0x0f7b686,0x228d43a, + 0x12cd201,0x0a1bd57,0x3e592dc,0x1e186fc,0x2226aba,0x2c63fe9, + 0x17b039a,0x1efaa61,0x00d1582 }, + { 0x2e6acef,0x07d51e4,0x3ac326c,0x322b07e,0x1422c63,0x32ff5c7, + 0x18760df,0x048928b,0x139b251,0x04d7da9,0x048d1a2,0x2a23e84, + 0x199dbba,0x2fa7afe,0x0049f1a } }, + /* 107 */ + { { 0x3492b73,0x27d3d3d,0x2b1a16f,0x07b2ce4,0x0cf28ec,0x2729bff, + 0x3130d46,0x3e96116,0x140b72e,0x14a2ea3,0x1ca066f,0x3a61f1d, + 0x022ebac,0x09192b4,0x003e399 }, + { 0x12555bb,0x0b6139d,0x239463a,0x12a70ab,0x2aaa93b,0x2254e72, + 0x00424ec,0x26a6736,0x26daa11,0x25b5ad6,0x379f262,0x140cd30, + 0x0c7d3bd,0x097bbcf,0x00899e9 } }, + /* 108 */ + { { 0x3825dc4,0x3cd946f,0x0462b7f,0x31102e7,0x30f741c,0x3313ed6, + 0x1ff5a95,0x15bf9dc,0x09b47fd,0x0f2e7a7,0x1626c0d,0x3c14f6d, + 0x14098bd,0x19d7df8,0x00a97ce }, + { 0x0934f5e,0x3f968db,0x046f68a,0x12333bf,0x26cd5e1,0x1ea2161, + 0x358570d,0x235031d,0x35edd55,0x05265e3,0x24ae00c,0x3542229, + 0x25bb2a1,0x1c83c75,0x0058f2a } }, + /* 109 */ + { { 0x24daedb,0x376928f,0x305266f,0x0499746,0x038318c,0x312efd7, + 0x1910a24,0x33450a3,0x1c478a9,0x39d8bf9,0x12cc0ae,0x397aeab, + 0x0654c08,0x095f283,0x00d2cdf }, + { 0x0b717d2,0x1f162c2,0x107a48f,0x128e1b3,0x2380718,0x39f4044, + 0x00f626a,0x05ec0c9,0x21bc439,0x200fa4d,0x20aea01,0x186a1d8, + 0x26372f2,0x1a91f87,0x0053f55 } }, + /* 110 */ + { { 0x3512a90,0x33b958b,0x29f1c84,0x0106c3a,0x224b3c0,0x09b307a, + 0x215d2de,0x3bdf43b,0x22cf0c9,0x176121d,0x1534143,0x09ba717, + 0x16b3110,0x0f73f6c,0x008f5b7 }, + { 0x2c75d95,0x26fbcb4,0x0dda1f6,0x206f819,0x28d33d5,0x1fb4d79, + 0x024c125,0x30a0630,0x1f9c309,0x0fe350d,0x1696019,0x0a54187, + 0x09541fd,0x35e3a79,0x0066618 } }, + /* 111 */ + { { 0x0e382de,0x33f5163,0x0dde571,0x3bb7a40,0x1175806,0x12ae8ed, + 0x0499653,0x3b25586,0x38ade7a,0x3fa265d,0x3f4aa97,0x3c03dbb, + 0x30c6de8,0x32d4042,0x00ae971 }, + { 0x2f788f1,0x1fbaf0e,0x3e2d182,0x3ff904f,0x0d46229,0x1d0726d, + 0x15455b4,0x093ae28,0x290f8e4,0x097c0b9,0x1ae8771,0x28480bb, + 0x04f6d40,0x3689925,0x0049b3b } }, + /* 112 */ + { { 0x35b2d69,0x31819c0,0x11b0d63,0x035afb6,0x2b50715,0x2bece6c, + 0x35f82f7,0x0ad987c,0x0011601,0x02e6f67,0x2d0a5f5,0x365e583, + 0x2f7c900,0x11449c5,0x00ed705 }, + { 0x27abdb4,0x1bbfd04,0x301c157,0x263c079,0x36850d6,0x3f21f8b, + 0x27d7493,0x0f9227e,0x06fb0ce,0x002daf3,0x37d8c1c,0x3ef87d7, + 0x19cc6f4,0x0c3809c,0x00cf752 } }, + /* 113 */ + { { 0x22d94ed,0x075b09c,0x020e676,0x084dc62,0x2d1ec3f,0x17439f1, + 0x240b702,0x33cc596,0x30ebaf3,0x0359fe0,0x393ea43,0x0ece01e, + 0x16c6963,0x03a82f2,0x0017faa }, + { 0x3866b98,0x3cd20b7,0x12d4e6b,0x3a6a76d,0x1205c1e,0x3e6ae1a, + 0x2f9bbdf,0x2e61547,0x2d175ee,0x28e18f6,0x13cf442,0x085b0ef, + 0x0e321ef,0x238fe72,0x003fb22 } }, + /* 114 */ + { { 0x360ac07,0x26dc301,0x3f4d94f,0x2ba75e6,0x1f3c9cc,0x17ff20f, + 0x0ea084c,0x30e39cf,0x143dc49,0x03bd43e,0x3c9e733,0x19e8aba, + 0x27fbaf4,0x12d913a,0x005ee53 }, + { 0x3609e7f,0x2d89c80,0x09f020c,0x1558bf7,0x3098443,0x3c515fd, + 0x1c8e580,0x16506bd,0x26cb4b2,0x1747d42,0x2ec8239,0x32c91f0, + 0x1ca3377,0x079768f,0x00a5f3e } }, + /* 115 */ + { { 0x185fa94,0x122759f,0x0e47023,0x0dcb6e7,0x10ba405,0x3b5eab4, + 0x1f7a1fa,0x32d003f,0x1739a4c,0x3295ec3,0x1b18967,0x3f3b265, + 0x34d2448,0x2dbadc9,0x00f30b5 }, + { 0x01c5338,0x2d1dcf2,0x2bd07cc,0x39a8fb5,0x2b85639,0x355bab6, + 0x1df95f1,0x01eb5f6,0x17f0a16,0x1b895b5,0x157574d,0x29fff72, + 0x3a8c46d,0x0118071,0x0065f84 } }, + /* 116 */ + { { 0x3a1e7f1,0x17432f2,0x1f648d4,0x3000ad5,0x2ef0a08,0x1f86624, + 0x1ca31b1,0x241f9dc,0x2cb4885,0x2b8610f,0x364ce16,0x1e5faf0, + 0x0b33867,0x2cb637d,0x00816d2 }, + { 0x1aa8671,0x02c394e,0x35f5e87,0x393040a,0x39f0db3,0x1c831a5, + 0x2966591,0x034a8d0,0x09e613c,0x042b532,0x018ddd6,0x3e402c9, + 0x2e20e1a,0x29cb4cd,0x00e087c } }, + /* 117 */ + { { 0x3a10079,0x20c7fea,0x3ff2222,0x1edb593,0x00dc5f8,0x3a32ccc, + 0x1479073,0x0cfed11,0x2a2702a,0x17a056a,0x1fba321,0x235acb9, + 0x149c833,0x172de7d,0x000f753 }, + { 0x2e95923,0x3b365cb,0x009f471,0x0df1b47,0x21e868b,0x199bbd3, + 0x07b8ecc,0x12ff0af,0x189808a,0x3bd5059,0x3fbc4d2,0x0fa7b88, + 0x1125bf2,0x0db0b5d,0x0043572 } }, + /* 118 */ + { { 0x29cdb1b,0x1db656e,0x391efe1,0x004be09,0x245a1ca,0x3793328, + 0x254af24,0x2f2e65d,0x10e5cc4,0x2af6fe7,0x2d97ac0,0x29f7d42, + 0x19fd6f6,0x0ac184d,0x00c5211 }, + { 0x305eae3,0x36738d3,0x2c2b696,0x00ba50e,0x3903adc,0x2122f85, + 0x0753470,0x1cf96a4,0x1702a39,0x247883c,0x2feb67e,0x2ab3071, + 0x3c6b9e1,0x30cb85a,0x002ca0a } }, + /* 119 */ + { { 0x3871eb5,0x284b93b,0x0a7affe,0x176a2fc,0x294c2f2,0x204d3aa, + 0x1e4c2a7,0x3ec4134,0x2fb0360,0x3847b45,0x05fc11b,0x0a6db6e, + 0x390fa40,0x2adfd34,0x005e9f7 }, + { 0x0646612,0x1b5cbcc,0x10d8507,0x0777687,0x3a0afed,0x1687440, + 0x0222578,0x1af34a4,0x2174e27,0x372d267,0x11246c3,0x34769c5, + 0x2044316,0x1b4d626,0x00c72d5 } }, + /* 120 */ + { { 0x2e5bb45,0x3ff1d36,0x16dcdf5,0x128986f,0x399068c,0x2a63b1e, + 0x0afa7aa,0x3a5b770,0x200f121,0x33b74bb,0x1414045,0x0f31ef8, + 0x2f50e16,0x2f38cd6,0x00b0b1b }, + { 0x1a06293,0x035e140,0x2644d44,0x1f1954b,0x2cdebab,0x31d5f91, + 0x0b8dbc8,0x38f2d23,0x3783cab,0x2a07e73,0x3123f59,0x3409846, + 0x3784ddd,0x223bbac,0x003dc7b } }, + /* 121 */ + { { 0x0741456,0x234e631,0x2121e1b,0x00980ca,0x3a9dfa9,0x098c916, + 0x3fc86d1,0x1c63072,0x3625244,0x13d0471,0x05b0fc5,0x1487550, + 0x2498596,0x11bb6ea,0x001afab }, + { 0x274b4ad,0x240aea1,0x3d12a75,0x2b56b61,0x1486b43,0x1b83426, + 0x31c7363,0x35b59ca,0x207bb6c,0x38e6243,0x19bace4,0x0a26671, + 0x35e3381,0x0c2ded4,0x00d8da4 } }, + /* 122 */ + { { 0x2b75791,0x19590b1,0x2bfb39f,0x2988601,0x0050947,0x0d8bbe1, + 0x23e3701,0x08e4432,0x2ed8c3d,0x326f182,0x332e1dd,0x12219c5, + 0x2e0779b,0x367aa63,0x0012d10 }, + { 0x251b7dc,0x0a08b4d,0x1138b6f,0x2ea02af,0x06345a5,0x1cb4f21, + 0x0332624,0x1d49d88,0x140acc5,0x2f55287,0x024447c,0x291ace9, + 0x1a4966e,0x015cbec,0x005bc41 } }, + /* 123 */ + { { 0x351cd0e,0x315e8e9,0x07d6e70,0x067ae8f,0x2190d84,0x351f556, + 0x03bee79,0x31b62c7,0x266f912,0x1b6a504,0x007a6ad,0x3a6ab31, + 0x3891112,0x3c45ba0,0x00d6ce5 }, + { 0x0e1f2ce,0x32a5edc,0x1434063,0x1ca084f,0x2a3e47c,0x137e042, + 0x16e2418,0x2069280,0x3b0dfd8,0x35a22b5,0x289bf0a,0x1f667f2, + 0x02d23a3,0x0ce688f,0x00d8e3f } }, + /* 124 */ + { { 0x10bed6f,0x14c58dd,0x0b0abdf,0x0ca0f9a,0x3808abc,0x2ec228c, + 0x2366275,0x12afa16,0x20f6b0e,0x37dca8e,0x3af0c6a,0x1c5b467, + 0x1b25ff7,0x00814de,0x0022dcc }, + { 0x1a56e11,0x02fe37e,0x3f21740,0x35d5a91,0x06cb8ba,0x29bad91, + 0x17176f7,0x2d919f2,0x0f7d1f5,0x13a3f61,0x04ddb05,0x0c82a51, + 0x286f598,0x2e8c777,0x0007071 } }, + /* 125 */ + { { 0x0f8fcb9,0x3e83966,0x170c6fd,0x3825343,0x089cec8,0x01b482a, + 0x0993971,0x3327282,0x39aba8a,0x32456fe,0x1507e01,0x1c3252d, + 0x21ffb13,0x29822a0,0x0083246 }, + { 0x23c378f,0x1cea7ef,0x1be9a82,0x224d689,0x37e5447,0x3764a75, + 0x3a49724,0x361e1b3,0x19d365b,0x3a61ffb,0x1c29a7a,0x20ab251, + 0x17ec549,0x175d777,0x004589a } }, + /* 126 */ + { { 0x15540a9,0x2ec5d2a,0x05b09fa,0x1bc058b,0x07cfb88,0x28f7b86, + 0x3e766be,0x189305e,0x01fe88e,0x23fdf69,0x0b919c3,0x02dc7ae, + 0x3f9a9ad,0x0b83cc7,0x0086a52 }, + { 0x28bc259,0x39bdca1,0x39e4bc8,0x0e0f33b,0x16130c6,0x2919955, + 0x31f4549,0x2fed027,0x30919b2,0x0a39b03,0x0ca7bb2,0x1711b24, + 0x3b67b94,0x05a136b,0x00acd87 } }, + /* 127 */ + { { 0x0c53841,0x31cb284,0x3ced090,0x06d5693,0x1c20ae0,0x0408d2b, + 0x37ebd5e,0x081900f,0x26a8589,0x0acfd0a,0x34a1472,0x2f0c302, + 0x124ccbd,0x10de328,0x00971bc }, + { 0x17ff2ff,0x27d1b54,0x147b6f7,0x38bb2ea,0x26a9c96,0x0a49448, + 0x39f2f46,0x247c579,0x3b16a4e,0x28c2a5a,0x2d4c72d,0x11f248c, + 0x1e4df11,0x047d604,0x0065bc3 } }, + /* 128 */ + { { 0x39b3239,0x1f75f44,0x3bae87c,0x139360c,0x18b5782,0x3ffc005, + 0x3c48789,0x2bc6af2,0x38b909e,0x223ff3b,0x31443a7,0x017d3bb, + 0x0bfed99,0x128b857,0x00020dd }, + { 0x306d695,0x25a7b28,0x2f60ca2,0x2b6e4f2,0x1df940c,0x1fa9b8e, + 0x37fab78,0x13f959f,0x10ff98c,0x38343b8,0x019cb91,0x11a1e6b, + 0x17ab4c6,0x1431f47,0x004b4ea } }, + /* 129 */ + { { 0x20db57e,0x102515e,0x170219e,0x2b66a32,0x1e6017c,0x2f973fe, + 0x3739e51,0x0e28b6f,0x3cda7a9,0x30d91ac,0x28350df,0x1444215, + 0x098b504,0x1bcd5b8,0x00ad3bd }, + { 0x22e3e3e,0x3aeaffb,0x26cb935,0x0091ce4,0x2fbd017,0x3a7ed6a, + 0x335b029,0x3bfc1f1,0x3852e3f,0x2b14a86,0x046b405,0x266af4c, + 0x3997191,0x33b0e40,0x00e306f } }, + /* 130 */ + { { 0x3e4712c,0x26bb208,0x18eed6d,0x1b30f06,0x27ca837,0x06faf62, + 0x1831873,0x3fbcf9b,0x3f3d88b,0x1fb55eb,0x0f44edc,0x29917bb, + 0x3151772,0x342d72e,0x00d4e63 }, + { 0x2ee0ecf,0x39e8733,0x2e8e98c,0x0cd4e0f,0x08f0126,0x1ad157a, + 0x079078a,0x23018ee,0x196c765,0x2b2f34f,0x0783336,0x075bf9c, + 0x3713672,0x098d699,0x00f21a7 } }, + /* 131 */ + { { 0x186ba11,0x22cf365,0x048019d,0x2ca2970,0x0d9e0ae,0x08c3bd7, + 0x261dbf2,0x2fc2790,0x1ee02e6,0x10256a7,0x00dc778,0x18dc8f2, + 0x157b189,0x2ebc514,0x005c97d }, + { 0x3c4503e,0x1d10d12,0x337097e,0x0c6169a,0x30fb1cb,0x3481752, + 0x0df2bec,0x19768fa,0x1bcf8f7,0x2925f74,0x2c988a1,0x3be571d, + 0x04cfa92,0x2ea9937,0x003f924 } }, + /* 132 */ + { { 0x268b448,0x06e375c,0x1b946bf,0x287bf5e,0x3d4c28b,0x138d547, + 0x21f8c8e,0x21ea4be,0x2d45c91,0x35da78e,0x00326c0,0x210ed35, + 0x1d66928,0x0251435,0x00fefc8 }, + { 0x0339366,0x216ff64,0x2c3a30c,0x3c5733d,0x04eeb56,0x2333477, + 0x32b1492,0x25e3839,0x1b5f2ce,0x0dcfba1,0x3165bb2,0x3acafcc, + 0x10abfcd,0x248d390,0x008106c } }, + /* 133 */ + { { 0x102f4ee,0x3c0585f,0x1225c8d,0x11c6388,0x08a7815,0x2b3e790, + 0x2895eb6,0x18cf53a,0x0b56e5a,0x2e2c003,0x3e981ff,0x0761b55, + 0x1bc32f3,0x0a7111d,0x00f5c80 }, + { 0x3568973,0x1587386,0x16ec764,0x20698a6,0x02f809b,0x2821502, + 0x113d64d,0x38c2679,0x15de61c,0x0309f60,0x272999e,0x29bfe64, + 0x173f70d,0x1de7fab,0x00bd284 } }, + /* 134 */ + { { 0x31cdf2b,0x0f0be66,0x2151603,0x01af17e,0x32a99cf,0x085dece, + 0x27d2591,0x1520df4,0x273c448,0x1ec7c54,0x102e229,0x355f604, + 0x2acb75f,0x005f1fd,0x003d43e }, + { 0x270eb28,0x22ec2ce,0x306b41a,0x238fa02,0x167de2d,0x030a379, + 0x245a417,0x1808c24,0x0b1a7b2,0x3ab5f6f,0x2cbc6c1,0x2c228d4, + 0x3041f70,0x2d9a6cc,0x00b504f } }, + /* 135 */ + { { 0x17a27c2,0x216ad7e,0x011ba8e,0x22f0428,0x16ac5ec,0x3ef3c58, + 0x345533f,0x0298155,0x2856579,0x0005e03,0x19ee75b,0x146fe16, + 0x29881e4,0x18ece70,0x008907a }, + { 0x20189ed,0x119ce09,0x35cb76d,0x0d91ef4,0x2284a44,0x032ad87, + 0x0e8c402,0x3c82b5d,0x38c416c,0x398992f,0x1fd820c,0x169b255, + 0x3b5fcfa,0x1343c92,0x00fa715 } }, + /* 136 */ + { { 0x33f5034,0x20b3b26,0x28fd184,0x16b3679,0x3962d44,0x15d1bc8, + 0x2fb1d69,0x1292c99,0x25a58c9,0x1b19ab7,0x2d68a5b,0x2f6a09b, + 0x0d6aedb,0x2935eac,0x0005664 }, + { 0x25e32fc,0x13f9440,0x3252bcd,0x2fea5b7,0x161a5ae,0x0564a8c, + 0x0a07e23,0x1545f62,0x0de9890,0x1d76765,0x1fd440e,0x2ed0041, + 0x3db4c96,0x1e8ba01,0x001b0c4 } }, + /* 137 */ + { { 0x0223878,0x29ab202,0x15585c2,0x1a79969,0x1ba08c2,0x2ef09ff, + 0x2b1b9b9,0x181f748,0x1bf72b9,0x224645c,0x2588dc5,0x2d157e7, + 0x22d939a,0x05b88d9,0x006d549 }, + { 0x31de0c1,0x23a4e0e,0x278f8da,0x1aa013c,0x1a84d18,0x0d185a5, + 0x0988ccd,0x2c32efd,0x3bee10e,0x37d7ab8,0x3f2a66e,0x3e2da3e, + 0x1b5701f,0x3d9f0c1,0x00a68da } }, + /* 138 */ + { { 0x0b2e045,0x0133fd1,0x05d4c10,0x0d92c70,0x391b5e1,0x2292281, + 0x2e40908,0x2ec694e,0x195ea11,0x29cfeca,0x3d93a4e,0x01215c0, + 0x08a5f32,0x37a0eff,0x00cce45 }, + { 0x2b3106e,0x12a5fb0,0x0b4faff,0x0c2da12,0x09069c6,0x35d8907, + 0x2837a6e,0x3db3fb6,0x3136cc3,0x222836b,0x3da018a,0x2741274, + 0x13ba319,0x1ac7642,0x00f867c } }, + /* 139 */ + { { 0x2527296,0x10a9595,0x178de4d,0x0f739c4,0x0ae26c7,0x3094599, + 0x20adac6,0x2b875c2,0x3ae5dc0,0x3e04d20,0x1aab2da,0x1d3ab37, + 0x15f4f75,0x0b730b5,0x00c56b5 }, + { 0x1f32923,0x2f059e5,0x2a89872,0x2056f74,0x04be175,0x1da67c0, + 0x17f1e7a,0x3780a6d,0x0723ac2,0x257f367,0x1237773,0x2bcee86, + 0x0b97f83,0x38aff14,0x00a64d4 } }, + /* 140 */ + { { 0x2552b40,0x0b6b883,0x12e8217,0x0974d35,0x062f497,0x1e563e6, + 0x30ee400,0x375d1e4,0x290751f,0x0d5b68a,0x353e48c,0x064a0d3, + 0x3c343f1,0x309a394,0x0034d2a }, + { 0x3111286,0x0f08604,0x1827107,0x0536a76,0x0201dac,0x3a574de, + 0x2c29dbe,0x382c7b0,0x1191f3e,0x324c5bc,0x144ce71,0x24327c1, + 0x1212778,0x22bc9d8,0x00d7713 } }, + /* 141 */ + { { 0x34ad1cd,0x1179b4e,0x1bc1780,0x1392a92,0x2cd86b9,0x359de85, + 0x251f1df,0x0da5d5f,0x135fa61,0x0f64a42,0x34f4d89,0x0fe564c, + 0x3cf9b7a,0x122d757,0x008c9c2 }, + { 0x370d4e9,0x0e9209b,0x0ae99f2,0x1518c64,0x0172734,0x2c20692, + 0x1d7c135,0x149c52f,0x38928d6,0x3c78b78,0x25841d1,0x2eaa897, + 0x372e50b,0x29e5d19,0x00c4c18 } }, + /* 142 */ + { { 0x13375ac,0x389a056,0x211310e,0x2f9f757,0x04f3288,0x103cd4e, + 0x17b2fb2,0x2c78a6a,0x09f1de6,0x23e8442,0x1351bc5,0x1b69588, + 0x285b551,0x0464b7e,0x00573b6 }, + { 0x0ba7df5,0x259a0db,0x2b4089e,0x05630a2,0x3f299be,0x350ff2f, + 0x1c9348a,0x3becfa4,0x3cc9a1c,0x17a6ef1,0x338b277,0x2b761d9, + 0x2aa01c8,0x3cb9dd7,0x006e3b1 } }, + /* 143 */ + { { 0x277788b,0x16a222d,0x173c036,0x310ff58,0x2634ae8,0x392636f, + 0x0987619,0x1e6acc1,0x26dc8f7,0x242310f,0x0c09aca,0x22b8e11, + 0x0d17006,0x1c2c806,0x002380c }, + { 0x297c5ec,0x1fef0e8,0x3948cf7,0x14f2915,0x2dacbc8,0x0dafb1f, + 0x10de043,0x31184da,0x06414ee,0x3c9aeeb,0x1f713ab,0x308f1f8, + 0x1569ed1,0x3f379bf,0x00f08bb } }, + /* 144 */ + { { 0x0770ee3,0x058fd21,0x17065f8,0x251d128,0x10e0c7f,0x06cb51b, + 0x0f05f7e,0x3666a72,0x3e7d01f,0x2d05fab,0x11440e5,0x28577d4, + 0x2fbcf2b,0x14aa469,0x00dc5c5 }, + { 0x270f721,0x1c75d28,0x085b862,0x1d68011,0x132c0a0,0x37be81d, + 0x1a87e38,0x083fa74,0x3acbf0d,0x16d6429,0x0feda1f,0x031070a, + 0x2ec2443,0x21e563d,0x00454d2 } }, + /* 145 */ + { { 0x0525435,0x1e98d5f,0x3dbc52b,0x1fcdf12,0x13d9ef5,0x3ff311d, + 0x393e9ed,0x3cef8ae,0x2987710,0x3bdee2e,0x21b727d,0x3ba1b68, + 0x10d0142,0x3c64b92,0x0055ac3 }, + { 0x0c1c390,0x38e9bb0,0x1e7b487,0x11511b3,0x1036fb3,0x25aba54, + 0x1eb2764,0x048d022,0x0d971ed,0x1bb7fb5,0x100f0b4,0x06c3756, + 0x2f0d366,0x3c6e160,0x0011bd6 } }, + /* 146 */ + { { 0x36bc9d1,0x24d43c1,0x12c35cf,0x2fb3cf3,0x015d903,0x16bc0c7, + 0x0fc8c22,0x3195c87,0x2488b1c,0x1f82b4c,0x30014e8,0x27ee58d, + 0x31658dd,0x1684a5f,0x00f0f3a }, + { 0x1f703aa,0x023eebc,0x20babb9,0x080bd9d,0x12f9cc4,0x1a8e2d4, + 0x0eec666,0x1176803,0x33005d6,0x1137b68,0x37de339,0x33d71cb, + 0x0c906b9,0x14086b5,0x00aeef6 } }, + /* 147 */ + { { 0x219045d,0x0f22c5e,0x024c058,0x00b414a,0x0ae7c31,0x3db3e96, + 0x234979f,0x0cf00a8,0x3c962c7,0x27fa77f,0x1c0c4b0,0x1fe8942, + 0x218053a,0x1eed3f8,0x0051643 }, + { 0x2a23ddb,0x138f570,0x104e945,0x21ca270,0x30726d8,0x3f45490, + 0x37d9184,0x242ea25,0x33f6d77,0x3f15679,0x065af85,0x34fa1f5, + 0x2e46b8f,0x31d17fb,0x00a2615 } }, + /* 148 */ + { { 0x335167d,0x181ea10,0x0887c8d,0x01383d7,0x18b42d8,0x263447e, + 0x1f13df3,0x0319d7e,0x0872074,0x2d6aa94,0x23d9234,0x36a69aa, + 0x0bad183,0x3138a95,0x00bd3a5 }, + { 0x1b0f658,0x0e4530b,0x373add1,0x1b968fc,0x329dcb6,0x09169ca, + 0x162df55,0x0211eff,0x02391e4,0x3867460,0x3136b1a,0x37dd36e, + 0x3bc5bd9,0x2dacfe4,0x0072a06 } }, + /* 149 */ + { { 0x119d96f,0x067b0eb,0x00996da,0x293eca9,0x2b342da,0x1889c7a, + 0x21633a6,0x0152c39,0x281ce8c,0x18ef3b3,0x0bd62dc,0x3238186, + 0x38d8b7c,0x3867b95,0x00ae189 }, + { 0x0ed1eed,0x1e89777,0x13ab73e,0x029e1d7,0x2c1257f,0x33fbc09, + 0x32d5a21,0x3d870b2,0x39bb1fd,0x33663bc,0x24e83e6,0x239bda4, + 0x3088bcd,0x01db1ed,0x00d71e7 } }, + /* 150 */ + { { 0x14245bf,0x0da0c27,0x153b339,0x05cab0a,0x122d962,0x1b0f0f3, + 0x3f5a825,0x267a2ce,0x2910d06,0x254326f,0x0f36645,0x025118e, + 0x37c35ec,0x36e944e,0x006c056 }, + { 0x05ab0e3,0x29aa0c1,0x1295687,0x1fd1172,0x08d40b5,0x05bd655, + 0x345048a,0x02a1c3c,0x2393d8f,0x0992d71,0x1f71c5e,0x18d4e8a, + 0x30dd410,0x11d61d3,0x00dd58b } }, + /* 151 */ + { { 0x2230c72,0x30213d8,0x05e367e,0x329204e,0x0f14f6c,0x3369ddd, + 0x0bb4074,0x2edafd6,0x1b1aa2d,0x0785404,0x0c035ab,0x220da74, + 0x1f2fdd4,0x092a091,0x00ef83c }, + { 0x3dc2538,0x1cca3e7,0x246afb5,0x24c647f,0x0798082,0x0bb7952, + 0x0f5c443,0x008b38a,0x299ea1a,0x3c6cf36,0x3df2ec7,0x398e6dc, + 0x29a1839,0x1cadd83,0x0077b62 } }, + /* 152 */ + { { 0x25d56d5,0x3546f69,0x16e02b1,0x3e5fa9a,0x03a9b71,0x2413d31, + 0x250ecc9,0x1d2de54,0x2ebe757,0x2a2f135,0x2aeeb9a,0x0d0fe2b, + 0x204cb0e,0x07464c3,0x00c473c }, + { 0x24cd8ae,0x0c86c41,0x221c282,0x0795588,0x1f4b437,0x06fc488, + 0x0c81ecd,0x020bf07,0x3a9e2c8,0x2294a81,0x3a64a95,0x0363966, + 0x32c9a35,0x0f79bec,0x0029e4f } }, + /* 153 */ + { { 0x289aaa5,0x2755b2e,0x059e0aa,0x3031318,0x0f0208a,0x35b7729, + 0x00d9c6b,0x3dd29d0,0x075f2c2,0x0ece139,0x31562dd,0x04187f2, + 0x13b8d4c,0x0920b85,0x003924e }, + { 0x09808ab,0x2e36621,0x2a36f38,0x1829246,0x229bf32,0x20883b7, + 0x159ada8,0x3108a14,0x15bbe5b,0x1e2d1e4,0x1730096,0x0d35cbb, + 0x15d0da9,0x0e60b94,0x00c4f30 } }, + /* 154 */ + { { 0x31de38b,0x27b9086,0x2760e3e,0x169098d,0x2a124e2,0x00596c6, + 0x3f73c09,0x0d31642,0x2341464,0x248600a,0x2e1fa10,0x2aa0fc8, + 0x051e954,0x00f3b67,0x001d4bd }, + { 0x18751e6,0x25a8e1e,0x07f5c2d,0x17e30d4,0x0ed2723,0x23093e2, + 0x3b80e2c,0x13de2d7,0x2fad37f,0x1be1cfb,0x3224ba9,0x0a7f5d3, + 0x1714972,0x06667b7,0x009dcd9 } }, + /* 155 */ + { { 0x294f22a,0x3e06993,0x0341ee9,0x24bdc7b,0x2e56098,0x2660a13, + 0x018ddda,0x2c261b2,0x2953b54,0x267f51c,0x0e8a7cc,0x29ab00c, + 0x3a38247,0x397ac81,0x00de684 }, + { 0x36b956b,0x347b34a,0x35834bd,0x053c06c,0x0090844,0x148cec5, + 0x380b325,0x2f17b8b,0x054ef5e,0x09683fb,0x3f8b29a,0x33c979a, + 0x1e01474,0x3e81fca,0x001c757 } }, + /* 156 */ + { { 0x30fdfe4,0x2d712ba,0x13671bc,0x2cfc226,0x3d7c649,0x16f020e, + 0x368e3f0,0x2981ebb,0x246a78a,0x115e81b,0x21223a4,0x04dbb30, + 0x1a50ba2,0x12114bd,0x0089bd6 }, + { 0x055f15a,0x1046e51,0x00fd724,0x1c022a7,0x323dfa9,0x36d8efb, + 0x0da4d16,0x0910dec,0x2c1fb16,0x2dbe29f,0x298284f,0x2b273bb, + 0x26022c1,0x20accd5,0x00085a5 } }, + /* 157 */ + { { 0x01f138a,0x2d87e7b,0x0c2815c,0x0c19a3c,0x311c9a2,0x3e4fce3, + 0x029729d,0x21236b2,0x2984048,0x3f3bc95,0x2bba8fb,0x1a1b680, + 0x0619a3f,0x29e0447,0x00ed5fe }, + { 0x2d1c833,0x3dcef35,0x3f809b4,0x01a1b9e,0x1509516,0x10ac754, + 0x2735080,0x27b0a8a,0x2495fb8,0x0a7bdba,0x1ef8b89,0x00233a5, + 0x0568bf1,0x1a126ba,0x0078a7e } }, + /* 158 */ + { { 0x0470cd8,0x20e9f04,0x30003fe,0x20be1b7,0x1927346,0x2a5026d, + 0x1ac06bd,0x2717ed7,0x2609493,0x3079ea5,0x1cc116d,0x31b0541, + 0x2c8ccde,0x10219ae,0x001a52b }, + { 0x2864045,0x0e8d95b,0x2fc1530,0x0aa44e7,0x345eae7,0x3cc7553, + 0x3ec6466,0x229b60e,0x06f6e95,0x00bed2a,0x0ff4403,0x181c639, + 0x2e0df67,0x1f8fa46,0x0000811 } }, + /* 159 */ + { { 0x04310a2,0x20cee8e,0x09fc5d5,0x3707f5b,0x0bdfb4e,0x12713ee, + 0x24f1028,0x0787ee6,0x39a581c,0x3797ec8,0x10a9746,0x112cb9f, + 0x142b9ba,0x1da0ef6,0x0078f7b }, + { 0x07607ae,0x3232872,0x2a7e076,0x0bb572a,0x182b23c,0x1d8f918, + 0x181f392,0x37c45a9,0x24a3886,0x0b2a297,0x264e7f2,0x1fa433c, + 0x0fcfcc8,0x21c0857,0x0004f74 } }, + /* 160 */ + { { 0x01d161c,0x1744585,0x2d17528,0x03a4f13,0x267cd2e,0x30d861f, + 0x062a647,0x213284b,0x139ed25,0x27d4ca5,0x02fbbd6,0x31ddf11, + 0x3c50ac4,0x1dd86f7,0x00107de }, + { 0x16beebd,0x1b7317a,0x2151997,0x256a196,0x3be2aff,0x3621cab, + 0x0a9da19,0x05f3038,0x23da63c,0x3178d5e,0x215cc67,0x07f7f63, + 0x0c6d8d3,0x3bf5e5c,0x00c44bb } }, + /* 161 */ + { { 0x00c62f1,0x3e0f893,0x1572703,0x3b93865,0x19b1e28,0x389b33b, + 0x02858bf,0x0e3e9aa,0x04bc436,0x234e072,0x25ba43d,0x3dca19e, + 0x0274394,0x20f442e,0x003b4a7 }, + { 0x176451e,0x2b5ed5d,0x35c8ee1,0x25c52da,0x0c3d0b5,0x32b306e, + 0x030954f,0x275ecf7,0x10e472c,0x21577c4,0x02f8a32,0x321bb5c, + 0x0098f97,0x104e237,0x00d0433 } }, + /* 162 */ + { { 0x0a8f2fe,0x034548b,0x141f1a6,0x121246f,0x1616409,0x237f80d, + 0x2e29a55,0x1218db6,0x3ea278e,0x1669856,0x1ad7c8e,0x36d11de, + 0x2c2fcbb,0x18c0b3a,0x001c706 }, + { 0x1699b4b,0x2d531a6,0x17e85e2,0x1b48e78,0x2b509ca,0x2818ea0, + 0x0165fee,0x0b809ca,0x09db6a2,0x3dad798,0x326ee1d,0x204e416, + 0x091fa12,0x1c890e5,0x0007b9f } }, + /* 163 */ + { { 0x0ff4e49,0x0bb0512,0x0129159,0x05db591,0x03e4e9f,0x055ab30, + 0x0f82881,0x0ac2deb,0x3a8bb09,0x356a8d2,0x3d38393,0x03e4089, + 0x38187cd,0x1377a93,0x0041672 }, + { 0x0139e73,0x3990730,0x187d3c4,0x33e4793,0x2e0fe46,0x2ad87e2, + 0x33c792c,0x21d4fb6,0x1e4d386,0x2932d1b,0x20f1098,0x1270874, + 0x0ea6ee4,0x0167d6e,0x005e5fd } }, + /* 164 */ + { { 0x1856031,0x2b7519d,0x3bd07fc,0x337abcb,0x089c7a4,0x2a1f120, + 0x3523ce7,0x2ba406b,0x09561d9,0x1797f04,0x3cdb95f,0x2d6193e, + 0x32c7d3f,0x223aed6,0x00beb51 }, + { 0x2e65825,0x158f0ce,0x16413d1,0x310395f,0x3116854,0x250baf4, + 0x373d341,0x156cc47,0x104c069,0x0893716,0x195a0a6,0x035320e, + 0x37b7d8a,0x21b5755,0x00fb26b } }, + /* 165 */ + { { 0x286ae17,0x04239f1,0x1a56c53,0x0e74707,0x29090d7,0x2bb142b, + 0x03b0139,0x1aac916,0x08ba49a,0x0376682,0x3382f85,0x064bbab, + 0x2910e28,0x1d5bd7f,0x00cc8df }, + { 0x0ab7630,0x208e8e7,0x3fc1877,0x26bee39,0x264984a,0x192ff05, + 0x08ef9c3,0x0aa6951,0x071c44e,0x26eed3e,0x035c95e,0x06906ad, + 0x10a0690,0x397eaa9,0x00c6c23 } }, + /* 166 */ + { { 0x034d8dd,0x005b064,0x279bb78,0x12c2c4f,0x1856bb4,0x0c90681, + 0x06409ab,0x3b48617,0x19a2d78,0x0a34bf8,0x326eddf,0x31f09b5, + 0x04f04dc,0x3d7c944,0x003ccaf }, + { 0x321f843,0x35fb71a,0x1e4c397,0x377a5d7,0x2da88e4,0x3d6ada7, + 0x33d3964,0x1b30149,0x0e39aae,0x054dda0,0x3e6f946,0x1273394, + 0x3ffd3f7,0x2f6655e,0x00021dd } }, + /* 167 */ + { { 0x37233cf,0x11617dd,0x26f07b6,0x3d8250a,0x0fe6771,0x3f9bbbc, + 0x2aba7ad,0x200a58d,0x3568603,0x198eefa,0x1e8fcf3,0x3b9610b, + 0x20524ac,0x2a67528,0x0048d9a }, + { 0x1a5e57a,0x1e9d303,0x16c9cff,0x0f39527,0x3c23259,0x03c8a1e, + 0x104bccf,0x182d5a1,0x18dbc83,0x05b5f42,0x1b402f4,0x317c525, + 0x11bf1ea,0x3c46e1f,0x0061936 } }, + /* 168 */ + { { 0x0153a9d,0x36859ee,0x2cf0aa9,0x2b27a0f,0x0a49fe3,0x2d984e1, + 0x018f8e1,0x1378453,0x1ab3843,0x1987093,0x283dae9,0x25cf0e8, + 0x14fc93d,0x280609d,0x00c99ba }, + { 0x026b1e3,0x34663d3,0x2202477,0x21a9d45,0x212e8e1,0x18ab77e, + 0x2e52f63,0x0a14ce1,0x295c396,0x00c7a3d,0x2aaedb6,0x30abc4d, + 0x374acde,0x1318a73,0x00fcfdb } }, + /* 169 */ + { { 0x0a40298,0x3ba5633,0x11956b3,0x14fcbd7,0x3c38781,0x34bab96, + 0x165630e,0x1f3c831,0x37e3a69,0x2b4226c,0x2d5029e,0x3b4ab1e, + 0x1da6ac2,0x3eb43c3,0x007e5cd }, + { 0x1b86202,0x109b7f6,0x2054f98,0x2c50cd7,0x2ed1960,0x3c518e7, + 0x1b02463,0x319c07f,0x1c30db6,0x045fdc2,0x373421e,0x31a1eb9, + 0x1a8acbf,0x31289b0,0x0013fef } }, + /* 170 */ + { { 0x3fa0a5f,0x068661f,0x2109e36,0x00b18ff,0x1f4b261,0x31d3844, + 0x0acbc56,0x3aebc99,0x1fa77ab,0x152bd11,0x24cddb7,0x2313f74, + 0x06eea44,0x15f5114,0x000b131 }, + { 0x2e9993d,0x1ac565c,0x2cbe22a,0x3921797,0x12c3c57,0x360f868, + 0x33560bf,0x320ee99,0x382c3b8,0x39af88f,0x00bbe38,0x2c4ea59, + 0x3399b40,0x00ceb45,0x0066eea } }, + /* 171 */ + { { 0x0c6c693,0x31ba56d,0x3d3849f,0x378dabd,0x0efc735,0x17f90bf, + 0x13343d3,0x2df0f81,0x27c6a9a,0x13c2a90,0x0a0fcb2,0x27c10d9, + 0x3bc50c7,0x090e4fa,0x0016287 }, + { 0x2927e1e,0x35af405,0x184c5c3,0x3499cee,0x240158e,0x33522e6, + 0x386fc84,0x0a0b69f,0x1a660ea,0x34590fb,0x22a1bee,0x2ce4fab, + 0x31a9445,0x0e78655,0x00664c8 } }, + /* 172 */ + { { 0x3eeaf94,0x115d409,0x21e7577,0x097aa67,0x22875c9,0x021ab7a, + 0x27e7ba5,0x1093f04,0x2a086fe,0x05d9494,0x2b6c028,0x10f31b0, + 0x1312d11,0x262759c,0x00c9bb2 }, + { 0x1acb0a5,0x30cdf14,0x0f78880,0x0574f18,0x1a37109,0x098adbb, + 0x2113c09,0x2060925,0x1f89ce4,0x1974976,0x3381358,0x2dab5ca, + 0x2159c53,0x3af1303,0x000ea3b } }, + /* 173 */ + { { 0x1e49bea,0x29142b1,0x1a59cab,0x055f017,0x0684e54,0x39eb0db, + 0x29cab9d,0x255ee8b,0x35f2e6f,0x05329e6,0x09b817b,0x1ec091c, + 0x1df0fef,0x2641f62,0x00eb304 }, + { 0x2fe5096,0x3dcc1d1,0x2aaf508,0x3a0b813,0x0695810,0x144bddb, + 0x2f1bd93,0x281ae23,0x3513ebc,0x1ddd984,0x0cf158b,0x35218eb, + 0x257daf7,0x391253b,0x00b2a81 } }, + /* 174 */ + { { 0x153e6ba,0x22396db,0x0ea2ff2,0x2a45121,0x0a90de1,0x34cf23b, + 0x2db60ce,0x1a900be,0x2f328b6,0x355e75b,0x2c24372,0x0b75b77, + 0x2ec7d4f,0x3f24759,0x00e9e33 }, + { 0x39eab6e,0x2267480,0x3b5e110,0x1e8fa5e,0x2a31a66,0x3f739a3, + 0x00166dc,0x3552d88,0x3ae5137,0x3efa0fa,0x0800acd,0x17df61d, + 0x38c8608,0x04cc31b,0x00cf4ab } }, + /* 175 */ + { { 0x31e08fb,0x1961164,0x22c003f,0x078541b,0x3643855,0x30da587, + 0x11f0dc9,0x324595e,0x329e3dc,0x29a041e,0x3495d2c,0x0908dd3, + 0x1895b83,0x198dbb9,0x00d8cfb }, + { 0x0349b1b,0x383c5a8,0x2b86525,0x1b1283e,0x133cd2c,0x2be376a, + 0x012ee82,0x1eb4d1b,0x0ba71e9,0x01f3109,0x37621eb,0x1d9b77c, + 0x0d39069,0x3d5a97c,0x0095565 } }, + /* 176 */ + { { 0x20f5e94,0x1eefc86,0x1327e0e,0x054760b,0x2f771e1,0x3ac447e, + 0x033e3dc,0x198e040,0x04dd342,0x1b49a5d,0x00d01ef,0x3cb6768, + 0x1ceafbd,0x31c6812,0x001cb80 }, + { 0x221c677,0x060ca27,0x398b17f,0x0146723,0x36452af,0x02d9e65, + 0x39c5f78,0x3cf50d6,0x0be40f8,0x2970b87,0x26d667c,0x3e45959, + 0x16e7943,0x01673e7,0x009faaa } }, + /* 177 */ + { { 0x2078fe6,0x0918602,0x11dd8ad,0x399193f,0x0f6cc73,0x0f8dd12, + 0x2ce34dc,0x06d7d34,0x0c5e327,0x0989254,0x2fc5af7,0x2443d7b, + 0x32bc662,0x2fe2a84,0x008b585 }, + { 0x039327f,0x08e616a,0x252f117,0x1f52ab0,0x234e2d2,0x0a5b313, + 0x2f59ef6,0x0f7a500,0x15c4705,0x2c02b81,0x28b4f09,0x08aa5c8, + 0x0180efc,0x0993e83,0x00a9e86 } }, + /* 178 */ + { { 0x0310ecc,0x2d8892f,0x14ed0b7,0x3c59fe8,0x08a1a74,0x0850e57, + 0x1d09607,0x044a21f,0x109f5c9,0x237c6cf,0x06b264a,0x3fc8f1a, + 0x0d4c539,0x2740f96,0x00dc2d4 }, + { 0x1d6f501,0x0adf4ea,0x14f7215,0x0930102,0x3f4c32e,0x24e2643, + 0x366596d,0x081ff18,0x38f94fb,0x2c21341,0x328594c,0x267c75c, + 0x196b3fd,0x29932cb,0x0036def } }, + /* 179 */ + { { 0x3ed7cbe,0x26de044,0x3d0e461,0x0565e12,0x295e500,0x31dc17f, + 0x32251c2,0x3420ca8,0x3995f0d,0x2e8ddab,0x0361a45,0x10971b0, + 0x11e7b55,0x33bc7ca,0x00812d2 }, + { 0x3d94972,0x1606817,0x0383ccf,0x0e795b7,0x026e20e,0x0f6fefc, + 0x13685d6,0x315d402,0x0cc36b8,0x1c7f059,0x390ef5e,0x316ae04, + 0x08c66b9,0x2fac9a4,0x0040086 } }, + /* 180 */ + { { 0x3e3c115,0x153de4d,0x1a8ae5e,0x2330511,0x169b8ee,0x1d965c2, + 0x2edff2b,0x3ef99e6,0x1631b46,0x1f8a238,0x118d7bb,0x12113c3, + 0x26424db,0x0f4122a,0x00e0ea2 }, + { 0x3d80a73,0x30393bc,0x0f98714,0x278ef59,0x087a0aa,0x3b18c20, + 0x04b8a82,0x2068e21,0x030255d,0x3382b27,0x055397f,0x05448dd, + 0x2015586,0x1190be0,0x000b979 } }, + /* 181 */ + { { 0x2e03080,0x2895692,0x09fb127,0x2d1602a,0x1232306,0x105bd4e, + 0x28cd6a6,0x0a83813,0x1ee13b0,0x2abadc3,0x0c09684,0x00e33e1, + 0x033eea3,0x30f0a39,0x00a710e }, + { 0x01b1f7d,0x1c959da,0x017077a,0x254bf0a,0x086fbce,0x15cd6b2, + 0x008683f,0x23a4f4d,0x22a6bd6,0x14e8c93,0x0027d15,0x31d0d4f, + 0x271777e,0x1533510,0x00ab603 } }, + /* 182 */ + { { 0x34c209d,0x14d0abb,0x270432a,0x1d02358,0x22ba752,0x209757f, + 0x34af6fc,0x1ffc52e,0x1ced28e,0x1870e46,0x1e0340f,0x3f0bf73, + 0x33ba91d,0x2ebca7c,0x00c6580 }, + { 0x1d442cb,0x0879d50,0x24e4ae1,0x3f4e91c,0x04c7727,0x093cd1d, + 0x16d6a45,0x10a8b95,0x0c77856,0x361f84f,0x217845f,0x0bbeec6, + 0x0485718,0x33c5385,0x00dcec0 } }, + /* 183 */ + { { 0x1539819,0x225507a,0x1bf11cb,0x13e7653,0x0c8cb3b,0x05f695e, + 0x353f634,0x2827874,0x3fb8053,0x22de9a5,0x035d8b7,0x2105cc7, + 0x2a7a98d,0x35bed95,0x0085748 }, + { 0x1859c5d,0x00e51f0,0x22a21fd,0x3054d74,0x06ce965,0x328eab7, + 0x26a13e0,0x13bfc65,0x01d4fb1,0x36600b9,0x36dd3fc,0x01232ed, + 0x15bbaa9,0x0ad7a51,0x0089b18 } }, + /* 184 */ + { { 0x3360710,0x1eb5a90,0x136bd77,0x3bd57a6,0x0841287,0x12886c9, + 0x35c6700,0x21bc6eb,0x25f35ad,0x3bcb01c,0x0707e72,0x23e9943, + 0x03e5233,0x34bb622,0x002bf8e }, + { 0x16e0d6a,0x04b3d2d,0x290cb02,0x049a10c,0x350537e,0x22cf71b, + 0x3184a19,0x2dc8b62,0x2350210,0x3b4afa6,0x159781e,0x1d01b6d, + 0x1853440,0x16442f0,0x005a78d } }, + /* 185 */ + { { 0x348b02c,0x1ea8ab5,0x3b954d5,0x14684ac,0x0be5b34,0x11c4496, + 0x0a7a456,0x14f6eb7,0x11a3221,0x2d65f82,0x32eb1ea,0x09c4018, + 0x3f301f3,0x32e8a1c,0x00bd9ad }, + { 0x0543f7f,0x31e744e,0x1fefd1d,0x24a486c,0x1000220,0x3977e3b, + 0x1b3ef51,0x2512a1b,0x2049e6b,0x122232b,0x391a32b,0x2f4a7b1, + 0x1c13e71,0x081a9b4,0x00d3516 } }, + /* 186 */ + { { 0x1924f43,0x1ae5495,0x28d52ef,0x2b93e77,0x2d2f401,0x371a010, + 0x33e8d7a,0x06ed3f1,0x30c0d9d,0x2589fa9,0x3bf3567,0x2ecf8fa, + 0x2dee4c3,0x152b620,0x007e8a2 }, + { 0x1924407,0x01bd42d,0x044a089,0x18686b5,0x2f14a0e,0x17cdce3, + 0x0efa216,0x3c586a8,0x1d6ae71,0x375831f,0x3175894,0x20e43eb, + 0x34c009e,0x3480527,0x00d115c } }, + /* 187 */ + { { 0x12abf77,0x38b0769,0x25682f2,0x295508c,0x0c2a0dc,0x1259b73, + 0x023ea25,0x340e7b5,0x3c7cd0d,0x1f92324,0x176405c,0x1528894, + 0x18f2e1e,0x2c59c35,0x001efb5 }, + { 0x0fb1471,0x07e7665,0x141da75,0x07d9f4a,0x0fdb31e,0x0dccda6, + 0x074eb25,0x3d92a9b,0x11189a0,0x1b4c557,0x24b8d2b,0x0533f92, + 0x0e9e344,0x2fa3dea,0x008d5a4 } }, + /* 188 */ + { { 0x2669e98,0x1ad3514,0x2a035c9,0x08a3f50,0x24547f9,0x0a145d3, + 0x1c1319d,0x3fe833d,0x1ae064b,0x1e01734,0x246d27e,0x3a2f13c, + 0x01e1150,0x263f55e,0x00f89ef }, + { 0x2e0b63f,0x3e57db7,0x23a4b4f,0x11c8899,0x0ad8500,0x348f3a7, + 0x2918604,0x27d6409,0x1ce5001,0x38f94c2,0x29a508a,0x39bdc89, + 0x3a52c27,0x194899e,0x00e9376 } }, + /* 189 */ + { { 0x0368708,0x34a2730,0x2e1da04,0x0bd78c1,0x2c45887,0x0c44bfa, + 0x3a23de3,0x390b9db,0x1746efd,0x05c638e,0x1d20609,0x3263370, + 0x31987f0,0x2988529,0x005fa3c }, + { 0x0aa9f2a,0x20622f7,0x060deee,0x0c9626a,0x3312cc7,0x18ebac7, + 0x008dd6c,0x0ad4fe6,0x3db4ea6,0x1dc3f50,0x090b6e9,0x0aff8d2, + 0x26aa62c,0x18f3e90,0x00105f8 } }, + /* 190 */ + { { 0x38059ad,0x25e576c,0x3ea00b2,0x1fa4191,0x25686b7,0x2d1ce8f, + 0x30470ed,0x3478bbf,0x340f9b6,0x1c9e348,0x3d594ec,0x2ffe56e, + 0x3f23deb,0x0cd34e9,0x00f4b72 }, + { 0x1a83f0b,0x2166029,0x28b32a2,0x06a5c5a,0x20786c4,0x0944604, + 0x0901bd2,0x379b84e,0x221e2fe,0x0346d54,0x1f4eb59,0x01b8993, + 0x2462e08,0x25f9d8b,0x006c4c8 } }, + /* 191 */ + { { 0x0b41d9d,0x2e417ed,0x265bd10,0x199148e,0x3826ca4,0x1a67e8d, + 0x1bbd13b,0x23e414d,0x3d773bc,0x356e64c,0x0d2118a,0x0cb587f, + 0x25fd093,0x24fb529,0x00158c6 }, + { 0x2806e63,0x3ecaa39,0x251b4dd,0x3b2d779,0x2e31ed3,0x066f1a6, + 0x060e518,0x2c7e3e5,0x0d62c76,0x0d88a70,0x101970a,0x1e3c8c6, + 0x272b8bb,0x083e73b,0x0031f38 } }, + /* 192 */ + { { 0x09e1c72,0x072bcb0,0x0cf4e93,0x2604a64,0x00715f2,0x10c98b6, + 0x2ad81d9,0x234fcce,0x37a7304,0x1974a4a,0x1c7415f,0x14aaa93, + 0x19587b1,0x3f643f4,0x00c3d10 }, + { 0x1ddadd0,0x2cd715d,0x294cf76,0x14479ed,0x19f5f4a,0x0198c09, + 0x1ab7ebc,0x182c0bc,0x0879202,0x1807273,0x05d39da,0x2c7d868, + 0x29c4ec4,0x1b13ad2,0x006dcd7 } }, + /* 193 */ + { { 0x1c83f01,0x0245bff,0x24f90ba,0x112554f,0x2354c8b,0x3f17988, + 0x0c511af,0x39e1e9b,0x26ae95b,0x0ae551c,0x35b41a6,0x0120455, + 0x1e989cb,0x1b37aff,0x00fa2ae }, + { 0x324659a,0x1aef1c3,0x1c43637,0x3f530a2,0x313a999,0x326af62, + 0x134184e,0x2ac131c,0x3f6a789,0x30a300a,0x13e526e,0x2107af3, + 0x093a8ff,0x2479902,0x00442b1 } }, + /* 194 */ + { { 0x22b6e20,0x31b18be,0x18614ca,0x26fdb5a,0x197f29e,0x325b44b, + 0x0ab1dbb,0x042348a,0x3275e8e,0x15bae44,0x0077124,0x2cf5345, + 0x2803ad4,0x188f2a2,0x0061b20 }, + { 0x2a560b1,0x3ced069,0x3cf42c2,0x100e167,0x3879e1d,0x0936ff0, + 0x1b51450,0x14c55f3,0x3153bfa,0x2957423,0x2a93823,0x15f5dce, + 0x2c9a22f,0x16731a8,0x00a97f2 } }, + /* 195 */ + { { 0x18edbbb,0x18c5ef9,0x1f13c30,0x071e77f,0x225ade5,0x1b60f75, + 0x1beaf11,0x3e495ad,0x2441dd8,0x2fa00e2,0x32a87b6,0x00050f2, + 0x038de7f,0x0037d6d,0x00a885d }, + { 0x39e48bd,0x1d9e433,0x2768e9f,0x3c29458,0x3f0bdf9,0x35ed5f2, + 0x36709fa,0x176dc10,0x012f7c1,0x2df8547,0x1d90ee3,0x053c089, + 0x21a8d35,0x200cb0d,0x002e84e } }, + /* 196 */ + { { 0x23ec8d8,0x1d81f55,0x0cb7227,0x07f8e4d,0x2a66181,0x163f577, + 0x272e7af,0x131a8f2,0x2046229,0x25e6276,0x36bbefe,0x2cdc22f, + 0x17c8288,0x33dd4fb,0x000d524 }, + { 0x330c073,0x1a6728b,0x1cf369f,0x12e7707,0x2f0fa26,0x17c2abd, + 0x0a45680,0x26ebd13,0x3c7d19b,0x1c3d6c8,0x2abd110,0x064fd07, + 0x09b8339,0x02b4a9f,0x009e3e1 } }, + /* 197 */ + { { 0x0ae972f,0x2093c35,0x06e7a90,0x0af1ba1,0x243eef0,0x2748582, + 0x0606122,0x13a45f9,0x0acfe60,0x08a685e,0x0eb184b,0x015bc11, + 0x0cdf423,0x157fad5,0x004fcad }, + { 0x2728d15,0x3e5bceb,0x0331a0f,0x31b1a80,0x28a2680,0x3b94955, + 0x04cae07,0x176b57e,0x03ac5a6,0x3d7918b,0x22d23f4,0x0ae077f, + 0x1eb075d,0x006f16c,0x006e473 } }, + /* 198 */ + { { 0x38219b9,0x0475a2b,0x107a774,0x39946c6,0x1cb883c,0x004e0ed, + 0x087e571,0x25c3497,0x059982f,0x0a71f66,0x118305d,0x1aaf294, + 0x3a5dbaa,0x34be404,0x00725fe }, + { 0x3abd109,0x336ebea,0x2528487,0x15a1d61,0x0c0f8cf,0x2b56095, + 0x2591e68,0x3549a80,0x1d1debb,0x0701c6c,0x161e7e3,0x1f7fa2e, + 0x3dfe192,0x17e6498,0x0055f89 } }, + /* 199 */ + { { 0x175645b,0x26c036c,0x0b92f89,0x09ed96d,0x351f3a6,0x19ce67b, + 0x33ac8db,0x2f0828b,0x27fe400,0x0b9c5e1,0x1967b95,0x3324080, + 0x11de142,0x1d44fb3,0x003d596 }, + { 0x3979775,0x3af37b6,0x3e88d41,0x2f1a8b9,0x299ba61,0x085413c, + 0x1149a53,0x0beb40e,0x31427ba,0x239f708,0x357d836,0x1558c22, + 0x280a79f,0x1b255f6,0x002b6d1 } }, + /* 200 */ + { { 0x39ad982,0x3d79d89,0x01a684a,0x0b6722e,0x39bb4c9,0x39a6399, + 0x1ad44e0,0x3059f5e,0x048265f,0x33a2fa4,0x0c3a4cc,0x0d7df98, + 0x23a33f1,0x34e2e21,0x00a0a10 }, + { 0x386efd9,0x1c91f34,0x06c2e19,0x3e6d48d,0x00eefd3,0x2181ef2, + 0x2415f97,0x1d33b08,0x0625086,0x1e8aa3e,0x08c9d60,0x0ab427b, + 0x2764fa7,0x3b7943e,0x00cd9f0 } }, + /* 201 */ + { { 0x1a46d4d,0x0e471f4,0x1693063,0x0467ac0,0x22df51c,0x127a0f7, + 0x0498008,0x20e0b16,0x1aa8ad0,0x1923f42,0x2a74273,0x01761ce, + 0x1600ca4,0x187b87e,0x00ee49e }, + { 0x0c76f73,0x19daf92,0x0b2ad76,0x3d8049d,0x1d9c100,0x0fe1c63, + 0x0bb67c8,0x035cc44,0x02002fc,0x37b2169,0x344656a,0x1127879, + 0x1939bc0,0x0dd8df6,0x0028ce7 } }, + /* 202 */ + { { 0x0544ac7,0x26bdc91,0x042697e,0x356e804,0x1f2c658,0x2ceb7ef, + 0x2dec39f,0x02c1dcc,0x391a2df,0x2344beb,0x2171e20,0x3099c94, + 0x0fa548a,0x37216c9,0x00f820c }, + { 0x0f4cf77,0x29bbaa5,0x33c6307,0x34a5128,0x118c783,0x2dd06b1, + 0x139d4c0,0x2db912e,0x1153ffb,0x1075eb3,0x3a255e4,0x2892161, + 0x36d5006,0x125338c,0x0014fbc } }, + /* 203 */ + { { 0x1584e3c,0x0830314,0x00279b9,0x167df95,0x2c7733c,0x2108aef, + 0x0ce1398,0x35aaf89,0x012523b,0x3c46b6a,0x388e6de,0x01a2002, + 0x0582dde,0x19c7fa3,0x007b872 }, + { 0x1e53510,0x11bca1f,0x19684e7,0x267de5c,0x2492f8b,0x364a2b0, + 0x080bc77,0x2c6d47b,0x248432e,0x3ace44f,0x32028f6,0x0212198, + 0x2f38bad,0x20d63f0,0x00122bb } }, + /* 204 */ + { { 0x30b29c3,0x3cec78e,0x01510a9,0x0c93e91,0x3837b64,0x1eca3a9, + 0x105c921,0x05d42e6,0x1379845,0x07ce6f2,0x0e8b6da,0x0e0f093, + 0x220b2cd,0x1f6c041,0x00299f5 }, + { 0x0afdce3,0x2b0e596,0x2f477b6,0x2ccf417,0x3a15206,0x26ec0bf, + 0x2e37e2b,0x2593282,0x0ab9db3,0x2841dd8,0x27954be,0x277a681, + 0x03f82e2,0x2b610c7,0x00446a1 } }, + /* 205 */ + { { 0x06b8195,0x3b3a817,0x31b9c6f,0x317d279,0x3d744a7,0x1de9eb9, + 0x296acc1,0x1ce9ea3,0x06c3587,0x246815d,0x3756736,0x0588518, + 0x1c971a4,0x1fde1f4,0x00aa021 }, + { 0x3fd3226,0x274561d,0x00be61e,0x01393d8,0x30f6f23,0x29b7fc1, + 0x04cebc7,0x0a892a7,0x20109f1,0x27456be,0x0c863ee,0x2eb6c8a, + 0x38c782b,0x039397a,0x00a2829 } }, + /* 206 */ + { { 0x29de330,0x21fe80f,0x145b55b,0x1986570,0x012b260,0x2482fbc, + 0x0536e0a,0x16b7382,0x32c4d19,0x1deffdb,0x145f418,0x0c67a76, + 0x2ce477f,0x218fe24,0x00f9848 }, + { 0x3e37657,0x3f074d3,0x245ad0e,0x20973c3,0x23c58de,0x2c332ef, + 0x2ad21a8,0x0bf1589,0x208af95,0x1f4a8c4,0x2b43735,0x1e46657, + 0x15d4f81,0x0c3e63a,0x005f19d } }, + /* 207 */ + { { 0x26865bb,0x20f6683,0x16a672e,0x0efd8d1,0x222f5af,0x18f2367, + 0x1e9c734,0x25c3902,0x178dfe6,0x2903a79,0x311b91c,0x1adbbe9, + 0x225a387,0x0b3e509,0x0089551 }, + { 0x34e462b,0x23b6a32,0x27c884c,0x129104b,0x384c015,0x3adedc7, + 0x325db1c,0x021dc10,0x1e366f7,0x3054df7,0x1992b9a,0x2824e64, + 0x0ae77f3,0x181b526,0x00a7316 } }, + /* 208 */ + { { 0x2d260f5,0x2434bf2,0x28c0139,0x0a7bb03,0x176c3be,0x3def5f5, + 0x05bee00,0x3692df7,0x3d2efeb,0x3a6f859,0x1122b87,0x38f779a, + 0x1415ccc,0x2c260ad,0x0075a28 }, + { 0x04607a6,0x042f37a,0x3f0df68,0x0a1bd36,0x3c6d581,0x2d36bfa, + 0x2d577d1,0x0a3affa,0x0b2066b,0x2e6f110,0x0b17e84,0x3c76a5e, + 0x1a57553,0x012f36a,0x0004595 } }, + /* 209 */ + { { 0x29e5836,0x0e6808c,0x269d13e,0x147dc5c,0x32c9e7d,0x09b258e, + 0x2c58d6f,0x1efd716,0x0437996,0x34ec31b,0x15908d9,0x2efa8fd, + 0x09ad160,0x079fc1f,0x00d8481 }, + { 0x3d20e4a,0x18269d6,0x3aa8fe7,0x34829c2,0x2e4325d,0x0d800e1, + 0x11f370b,0x10c08dc,0x22fd092,0x1a5fe55,0x0acc443,0x037030d, + 0x1cdd404,0x097379e,0x00fd6d7 } }, + /* 210 */ + { { 0x313eafb,0x3f438f3,0x2e5fb3e,0x2ed6a82,0x121009c,0x240889e, + 0x00c5537,0x269b792,0x334b2fc,0x1dd573c,0x07096ae,0x19296fc, + 0x3813985,0x2742f48,0x00ddd64 }, + { 0x2045041,0x3842c62,0x1572d0d,0x04f255f,0x06e05b4,0x383ec97, + 0x1ff8064,0x18bed71,0x39b6411,0x2764cc5,0x257439f,0x3521217, + 0x172aa42,0x342a2a3,0x0070c5b } }, + /* 211 */ + { { 0x3bdf646,0x1c5ce25,0x1f7ca76,0x2d2acca,0x3aa1485,0x23c97f7, + 0x3e11d6f,0x0609338,0x07ec622,0x01da8ff,0x3392474,0x17ca07f, + 0x13a9a04,0x353a5b4,0x0024557 }, + { 0x14c27cd,0x32012f7,0x3fea875,0x3d03d71,0x211c5f0,0x3157fdf, + 0x0c880bd,0x3c406b2,0x2c51103,0x24ab377,0x399faa8,0x0d06887, + 0x16b5738,0x28b33a7,0x00c7b67 } }, + /* 212 */ + { { 0x2357586,0x35c93e3,0x0da09a0,0x3d77d92,0x11d7f4f,0x37b98a9, + 0x3e6c9bf,0x2cdca70,0x2f00389,0x2412673,0x18eab87,0x0101436, + 0x11617e9,0x06d9b01,0x00e8eef }, + { 0x37e3ca9,0x16ffaf0,0x391debf,0x1b69382,0x07c5e94,0x312fa8a, + 0x0973142,0x2cadde4,0x109ee67,0x3a07db0,0x1afc5ed,0x08df66f, + 0x304c7af,0x0804aae,0x00d2e60 } }, + /* 213 */ + { { 0x24f57bf,0x1818322,0x182a615,0x25bfc44,0x0f97586,0x0a5bbc0, + 0x36773c6,0x1a2660c,0x3ceff66,0x3270152,0x319cd11,0x2845845, + 0x1acfad6,0x19076f8,0x009824a }, + { 0x289fd01,0x2de97ee,0x39d80b7,0x026227d,0x0f8d3b8,0x15e0a17, + 0x21ea08f,0x20a2317,0x136ae6d,0x3deb1d1,0x3521ef5,0x0de8801, + 0x0a25d5d,0x0612c98,0x005ecc4 } }, + /* 214 */ + { { 0x308c8d3,0x3aec669,0x01ecddc,0x13f18fe,0x1e63ed0,0x061cfe5, + 0x05f5a01,0x1db5741,0x14479f2,0x0ced6b5,0x025ae5b,0x09ca8f5, + 0x2160581,0x1404433,0x008bfeb }, + { 0x08228bf,0x0e02722,0x37df423,0x33ecabf,0x34bd82a,0x32f529f, + 0x28f1800,0x0c8f671,0x1246b44,0x1ff35dc,0x091db95,0x303f3da, + 0x28f7f60,0x3624136,0x00cfbb4 } }, + /* 215 */ + { { 0x326139a,0x2977e4e,0x3eb89a6,0x20ecb31,0x13e076a,0x2a592f3, + 0x28e82d5,0x235ad1e,0x239b927,0x262938a,0x2444354,0x141b263, + 0x0d56693,0x2a3fc78,0x0006497 }, + { 0x31efa05,0x3a3664a,0x3e333de,0x2a114e4,0x12da63c,0x3c15e6b, + 0x2f7277c,0x363aa92,0x2393236,0x16bd2d1,0x32b617f,0x32b656c, + 0x3b1246c,0x22e2e22,0x00ce76d } }, + /* 216 */ + { { 0x03843dc,0x094de82,0x13b463d,0x0507905,0x089eb35,0x2a6bf25, + 0x35ebc4e,0x2bb5d45,0x1808ed1,0x1de9949,0x185e829,0x0a55847, + 0x0b73d67,0x1a2ed61,0x008dd2d }, + { 0x133c3a4,0x04e7980,0x38ea237,0x2ad2f49,0x19de838,0x018bf36, + 0x29b072c,0x21c1ba0,0x14f63ba,0x31c1cc3,0x13cd05e,0x20120ff, + 0x1f84d60,0x16e0321,0x00872ab } }, + /* 217 */ + { { 0x19d4d49,0x1ddb4e6,0x05e7fc0,0x37bb0fd,0x1a3eb59,0x36b87f0, + 0x190e440,0x1c7fef2,0x31ea153,0x14cd65a,0x1bc7ab2,0x11f72ca, + 0x39582d4,0x0fa4d65,0x00cd5b6 }, + { 0x3d1ff11,0x0d9be9d,0x2903ae3,0x017b7b9,0x259f28f,0x110cefc, + 0x03fed1a,0x38039bd,0x09bdf9c,0x3055027,0x2ca9c5d,0x2d737b6, + 0x3bdb421,0x16560b5,0x00f9f33 } }, + /* 218 */ + { { 0x022c792,0x110de25,0x38bf959,0x08f2562,0x1239ea9,0x3c1d950, + 0x21a247d,0x315112d,0x285bb9f,0x2534a73,0x0b42455,0x1a4a99c, + 0x069009a,0x1680392,0x006e0ca }, + { 0x1b3bece,0x269e0a1,0x18926b7,0x0e7187e,0x241f35e,0x39d1fe0, + 0x02099aa,0x1675bfe,0x23fd0ca,0x3d6322b,0x19406b5,0x324c38a, + 0x242434a,0x3ae677c,0x002ce04 } }, + /* 219 */ + { { 0x2c37b82,0x1ae6506,0x0d83436,0x23496c1,0x0ff0c72,0x2711edf, + 0x1513611,0x04f9c7d,0x1edbeff,0x376fcb5,0x212a683,0x23bf547, + 0x0f9c4f7,0x16e6627,0x0082cd8 }, + { 0x0cb5d37,0x31b6db8,0x1a15e23,0x2f5cbb8,0x0818aee,0x21dc6c5, + 0x12aafd2,0x205f608,0x1d91def,0x3def088,0x1445c51,0x3100e8a, + 0x3746bda,0x145c4b0,0x00711b0 } }, + /* 220 */ + { { 0x2a99ecc,0x27b5217,0x35e10ed,0x036e32a,0x0f79950,0x15c32f7, + 0x2c87dcb,0x3ebb2a3,0x2c2d35d,0x114b3ec,0x2e4d80a,0x0c7eb89, + 0x2abe58d,0x3727737,0x00e6a37 }, + { 0x1eca452,0x1968d07,0x344e5d3,0x29435a2,0x109a5f8,0x181d12c, + 0x238ea5a,0x127a564,0x00dbb42,0x0fcbfb7,0x2909b2e,0x2571d3a, + 0x08250e3,0x0694e4e,0x00e156d } }, + /* 221 */ + { { 0x3181ae9,0x1acf411,0x3808d79,0x2a11065,0x0baf44b,0x133cfeb, + 0x1330943,0x1711b9a,0x2dec3bd,0x1906a9a,0x2ed947c,0x369d763, + 0x1a5254f,0x104a7a9,0x00acd9d }, + { 0x030301b,0x31568f5,0x2a4965c,0x33ded4b,0x03c9a5b,0x16541fc, + 0x1319cf1,0x2a3748b,0x1b5de74,0x18bb82e,0x077ac2b,0x309a87a, + 0x3c31420,0x0f6a4b9,0x00387d7 } }, + /* 222 */ + { { 0x0d3fdac,0x120cfa3,0x1b8e13c,0x1ccccb9,0x376fcd4,0x0bf87f4, + 0x271b4be,0x363b3fd,0x28b5d98,0x0535cd3,0x114bbc1,0x3ab4f19, + 0x10494b1,0x2161ece,0x00d14ca }, + { 0x12d37e9,0x110ebd7,0x062295a,0x1cc0119,0x073c6ea,0x15d5411, + 0x0aeb4b1,0x23fba91,0x175fab5,0x3ee8fe1,0x1c680a6,0x1e76f27, + 0x3ddfc97,0x3d69ecd,0x00e1ee5 } }, + /* 223 */ + { { 0x2d29f46,0x2d19204,0x3137cd0,0x02c3b54,0x193295b,0x02fbdb2, + 0x2260948,0x22c02ff,0x3885424,0x1299595,0x00e7f9c,0x310ff2a, + 0x01ea169,0x0deef85,0x0021908 }, + { 0x1b26cfb,0x38566a8,0x2852875,0x21debff,0x290ca9f,0x0b29663, + 0x26550d9,0x2b44457,0x05d1938,0x1f8f825,0x366ef93,0x1d8daec, + 0x069e5ef,0x342ece6,0x00b6034 } }, + /* 224 */ + { { 0x2d8356e,0x1578c09,0x226f4d2,0x3b74c51,0x0f83666,0x0323b59, + 0x1ddf61d,0x1ed8508,0x3c52667,0x0e5b91c,0x1e9b18b,0x352bdfa, + 0x13f75da,0x352aa4e,0x00fceff }, + { 0x1c731d5,0x04e2844,0x01d9843,0x286cbc5,0x105bcb3,0x05edd9c, + 0x21fa956,0x3b1ec83,0x01288cc,0x22fbf3a,0x10f1b56,0x081cf72, + 0x15cb758,0x18687c1,0x00f5722 } }, + /* 225 */ + { { 0x2973088,0x1209dcd,0x3980f31,0x0221aa7,0x1c008e7,0x011b098, + 0x395947e,0x2f2806d,0x27dca76,0x037c79a,0x31acddf,0x2bf6219, + 0x0d8f4ab,0x13644d9,0x00ff705 }, + { 0x2260594,0x18d51f8,0x277e2cf,0x1cb5cec,0x2468a53,0x3e6f4d7, + 0x019e24e,0x0f30f1d,0x0202404,0x34ad287,0x090b39c,0x23c11ea, + 0x1a2e3a2,0x3a851be,0x00dca2c } }, + /* 226 */ + { { 0x3277538,0x221cd94,0x3738ab7,0x0973da5,0x1a734e2,0x2c8b8b0, + 0x2e1d1e6,0x348499b,0x389ebe1,0x18b1854,0x02bb076,0x1b2b500, + 0x0f207f3,0x170cf99,0x0012088 }, + { 0x0fbfec2,0x1df55a4,0x34ae59e,0x2ab5e95,0x3f9e781,0x3411794, + 0x1410b05,0x17c3a00,0x0aaa91b,0x074ed7c,0x3fbb352,0x3477c01, + 0x3ee9ab3,0x0cfb1ca,0x0011c4b } }, + /* 227 */ + { { 0x3c3a7f3,0x2e60ca0,0x2354d32,0x33e2362,0x28083ab,0x03d3b16, + 0x3164045,0x0a41f7a,0x3f0641e,0x38635d1,0x31bbf03,0x225e2bb, + 0x0cd894e,0x1f72228,0x0093244 }, + { 0x33d5897,0x383faf3,0x0e6d561,0x0bc4d80,0x3fc3a68,0x05a9adc, + 0x0b9d73d,0x3d6031e,0x2ded29b,0x339c4ff,0x08d69e5,0x089488c, + 0x3fda40a,0x295c7fd,0x003a924 } }, + /* 228 */ + { { 0x0093bee,0x115532d,0x2ec0fb6,0x0969631,0x3a6d65a,0x0f43b4d, + 0x26994d4,0x0b51104,0x2515515,0x3695a26,0x284caa8,0x397aa30, + 0x25538b8,0x353f47c,0x0033f05 }, + { 0x3615d6e,0x37f8246,0x07dae0f,0x23dc154,0x02ded7e,0x1eef320, + 0x1631e51,0x3447f75,0x13e267f,0x353e1d1,0x3f89d62,0x369c8ff, + 0x1a21dc6,0x2b8b8f3,0x0055cbc } }, + /* 229 */ + { { 0x34e84f3,0x2f2539a,0x2c35336,0x0c53bdc,0x1728630,0x3ad5fe6, + 0x05fdeee,0x3386db6,0x272a42e,0x29fd38c,0x36f0320,0x21b2ed4, + 0x331e67f,0x28ae48c,0x00f09b6 }, + { 0x2778435,0x0fb3c55,0x32d221d,0x2660c8e,0x32977ba,0x1c12f03, + 0x1b57fb1,0x01229a8,0x38b389f,0x375ddf3,0x2c6b42c,0x3885d3e, + 0x2c55a9c,0x2ffc279,0x00404e2 } }, + /* 230 */ + { { 0x04c5ddb,0x2c4d788,0x150e9b9,0x110fbfd,0x29dbfe0,0x30ef83d, + 0x2ab4bfe,0x395bcd7,0x30d0a43,0x0e2d30f,0x0e73f9b,0x07199cc, + 0x0c9054c,0x22f4b1e,0x0092ed3 }, + { 0x386e27c,0x00fdaa8,0x0507c70,0x1beb3b6,0x0b9c4f4,0x277d519, + 0x024ec85,0x1cbaba8,0x1524295,0x112be58,0x21fc119,0x273578b, + 0x2358c27,0x280ca07,0x00aa376 } }, + /* 231 */ + { { 0x0dbc95c,0x16488cf,0x337a078,0x1abbcb8,0x0aae1aa,0x1caa151, + 0x00108d4,0x1edf701,0x3e68d03,0x1203214,0x0c7eee2,0x084c572, + 0x07752d2,0x215a3b9,0x00195d3 }, + { 0x2cd7fbe,0x06e80f6,0x052bd4b,0x07b4f83,0x24b5ac6,0x2aaded4, + 0x13c0526,0x0ffa9a3,0x08c660e,0x13c35c9,0x3145efb,0x36cfe24, + 0x0936daf,0x268e3d0,0x00a73fd } }, + /* 232 */ + { { 0x31b17ce,0x2e7bcee,0x3f31891,0x19f1849,0x1140236,0x015487f, + 0x32e58d3,0x202204a,0x049e350,0x1ce91f9,0x3f75150,0x27f212f, + 0x0d16ee4,0x1c894c4,0x004023f }, + { 0x33399fa,0x2397b6d,0x2a3ea60,0x36354ca,0x1f12632,0x117a105, + 0x22758e8,0x361844e,0x3851fc2,0x0ab92db,0x339d02f,0x1e7d6c4, + 0x19ebd38,0x0a9a036,0x00446d2 } }, + /* 233 */ + { { 0x3e164f1,0x008c092,0x19200f5,0x35a22e0,0x38d09d2,0x212b3bf, + 0x0056f19,0x3a03545,0x1f075e9,0x0e97137,0x1f496a9,0x32d1f9b, + 0x36bf738,0x35ace37,0x00899e1 }, + { 0x19eb2a6,0x21fa22d,0x338b69e,0x18e6d1f,0x1280d9d,0x1953a55, + 0x1411ea3,0x2960566,0x0fd969a,0x1f3e375,0x130742a,0x170aebd, + 0x33085ff,0x14d868d,0x00a4391 } }, + /* 234 */ + { { 0x0a4bdd2,0x39ca8ea,0x37026ac,0x346da3b,0x0c656cd,0x03136b6, + 0x233e7e9,0x0714352,0x08a9d95,0x192bb38,0x085d68e,0x20016b8, + 0x102b8ea,0x1f5dbdd,0x00fdd7a }, + { 0x0d6fa45,0x3ec29a6,0x2b8cce6,0x1c84413,0x0228f86,0x28275f7, + 0x3d8787d,0x0c19748,0x28b2ae9,0x1954850,0x2a56c36,0x3eae8f7, + 0x0aca595,0x00e42a2,0x00edbe5 } }, + /* 235 */ + { { 0x3b26c82,0x3682b6f,0x2f9cd64,0x0f254b0,0x0e5d70b,0x1f9dfda, + 0x28f365f,0x35a57d7,0x00208f2,0x19c8d38,0x112e7be,0x3e403bb, + 0x3734efa,0x24d12b3,0x0027dc6 }, + { 0x260a46a,0x13fd7b0,0x1c2880e,0x338b70c,0x27da5eb,0x29a7d54, + 0x1c5d73c,0x2130921,0x32969cc,0x2b37eda,0x2d6d4ec,0x0716bfb, + 0x0763703,0x1320889,0x00c7bbf } }, + /* 236 */ + { { 0x1fe01b2,0x2dcb1d2,0x11b89d5,0x219e4ea,0x0347851,0x3d1810e, + 0x3a3c54c,0x06dbe8e,0x03d3ab2,0x2dcfa39,0x3e57b8a,0x337a382, + 0x0426450,0x0e9f748,0x006488b }, + { 0x1dc4582,0x0e62cf7,0x06fea9e,0x2a56fb1,0x31698c1,0x15b4e10, + 0x1446ef1,0x0a689fc,0x1d87703,0x20ff497,0x2c71066,0x2c48868, + 0x2e6cf05,0x30aa9cb,0x0065b2d } }, + /* 237 */ + { { 0x1021d63,0x2217df3,0x1f0821a,0x057fa98,0x23f344b,0x173dcf9, + 0x1ba6ddc,0x22c8eb5,0x18f227a,0x0455343,0x1c55931,0x1d0dcf3, + 0x20fa19b,0x1c56618,0x004feab }, + { 0x19ec924,0x224e39f,0x2550509,0x179b51f,0x284d54a,0x2d85d41, + 0x2d1bdc1,0x1a29068,0x3826158,0x1267f85,0x3005a92,0x0769e00, + 0x379b617,0x17b5f63,0x00a70bf } }, + /* 238 */ + { { 0x22216c5,0x049437f,0x33510bc,0x141d806,0x22c37e2,0x1bc1adf, + 0x300175d,0x2e6ded8,0x0a18bfe,0x35377a3,0x382f843,0x08410ca, + 0x00afd4f,0x0be6c6b,0x008d70e }, + { 0x2e91abb,0x1cede2a,0x28f225c,0x28e18c0,0x30230dc,0x173cc2d, + 0x123ecfe,0x3c9962e,0x2c25506,0x27b5d53,0x329a5e3,0x106e231, + 0x3889b8e,0x3b0aeaf,0x00ee67c } }, + /* 239 */ + { { 0x3e46c65,0x0eb3d46,0x1d7ae18,0x23f9d59,0x2978953,0x2589ed3, + 0x073391d,0x2461e1e,0x0c19f1d,0x22fd2b1,0x0691f5c,0x2e67d8d, + 0x1fb985d,0x200dd28,0x00a68df }, + { 0x392b5fa,0x123b46f,0x1c323c4,0x104f82f,0x0a098c8,0x26fc05b, + 0x34cd557,0x0913639,0x09c115e,0x3977c34,0x3410b66,0x062b404, + 0x0213094,0x132c5e8,0x008b612 } }, + /* 240 */ + { { 0x26e3392,0x3b0ebf0,0x2e00425,0x1c285c8,0x3c07f84,0x08d5ad0, + 0x028190e,0x1669b73,0x1ffb1ef,0x053b65f,0x063028c,0x0aceb47, + 0x18988c2,0x0f09a30,0x0007072 }, + { 0x0f49e7d,0x28c0bd3,0x252270d,0x24cfc4a,0x0c5e87c,0x2165052, + 0x2cdd1d1,0x04931d2,0x3abca74,0x22b57dc,0x169fd47,0x0b928fb, + 0x17cc3e7,0x21a1ec4,0x0061593 } }, + /* 241 */ + { { 0x1aa0486,0x2e55dea,0x15577b7,0x0d6818f,0x36e41fb,0x2a411f5, + 0x17d5c7d,0x1eea6c0,0x28068a8,0x0e31d20,0x1f08ad9,0x117e973, + 0x08a28ab,0x085d30a,0x00cd9fb }, + { 0x347843d,0x1119095,0x11e3595,0x1b29584,0x134d64c,0x2ff3a35, + 0x247ea14,0x099fc4b,0x2056169,0x145dd03,0x2ed03fb,0x1250e3b, + 0x3f5135c,0x2b753f0,0x009da30 } }, + /* 242 */ + { { 0x0fa5200,0x214a0b3,0x313dc4e,0x23da866,0x3270760,0x15c9b8b, + 0x39a53df,0x1f79772,0x3c9e942,0x2984901,0x154d582,0x1685f87, + 0x2e1183e,0x1f79956,0x00b9987 }, + { 0x15254de,0x3a5cac0,0x37c56f0,0x2c7c29b,0x292a56d,0x195be2c, + 0x17e4e1a,0x0660f4a,0x052ad98,0x1267f80,0x07cfed8,0x194b4bc, + 0x01738d3,0x14ba10f,0x00c7843 } }, + /* 243 */ + { { 0x29b2d8a,0x242bc1f,0x19646ee,0x0615f3c,0x0ac8d70,0x07ca3bf, + 0x2d90317,0x2c83bdb,0x1a96812,0x39fdc35,0x31c61ee,0x2d55fd3, + 0x2375827,0x355f189,0x00f1c9b }, + { 0x21a6194,0x1f4050a,0x2b845cf,0x02c6242,0x2dd614e,0x3a4f0a9, + 0x39de100,0x24714fb,0x175e0cd,0x0be633d,0x14befc3,0x13b0318, + 0x1d68c50,0x299989e,0x00d0513 } }, + /* 244 */ + { { 0x059fb6a,0x2b6eb6a,0x3666a8e,0x39f6ca0,0x1cf8346,0x388b8d5, + 0x35e61a3,0x271adec,0x22c9963,0x20a4fb3,0x16f241c,0x0058b89, + 0x21ddafa,0x1ee6fde,0x00d2e6c }, + { 0x0075e63,0x39894d0,0x0286d0d,0x187e7b2,0x02405aa,0x3f91525, + 0x37830a8,0x2723088,0x2c7364e,0x013f406,0x104ba75,0x270f486, + 0x3520b4d,0x3852bc6,0x00d589b } }, + /* 245 */ + { { 0x262e53b,0x1da93d1,0x3676135,0x147e41d,0x335ec2f,0x1f02be5, + 0x297d139,0x22d6198,0x1fe9e59,0x13b4c80,0x1e70f60,0x2f1d4a9, + 0x2d95149,0x14d6ec4,0x00b54af }, + { 0x12c1c76,0x2930ac8,0x0dfd36e,0x31fac94,0x218f5bb,0x2828691, + 0x1466cc9,0x3645e83,0x1a4dac2,0x1549593,0x0e95fab,0x19567d2, + 0x27a3320,0x0642729,0x007487c } }, + /* 246 */ + { { 0x1e98e9c,0x2ff8df7,0x119975a,0x098a904,0x099b90b,0x336c7df, + 0x010996d,0x159d46d,0x3118b3b,0x3aacd1b,0x31f8ae1,0x214864f, + 0x398c104,0x089dae2,0x001ec4d }, + { 0x1452baa,0x2f24991,0x2572ba3,0x162b312,0x2387d18,0x147c5c7, + 0x38eff6e,0x0700251,0x37d931e,0x23cd5c1,0x254c8ca,0x3b9df37, + 0x1c9a4ff,0x0bfd547,0x00fb489 } }, + /* 247 */ + { { 0x1b8dff8,0x2f6b40b,0x05a25b1,0x3f5688a,0x1d462f4,0x2802d18, + 0x2aad8ed,0x1b46c75,0x3cf4130,0x250fefb,0x2a13fe1,0x23a1bcd, + 0x0940442,0x04605fe,0x00c8b2f }, + { 0x0d51afb,0x14a2abc,0x1d06762,0x291526c,0x2a3e2fe,0x28f77d9, + 0x3ad8f2e,0x3481a1b,0x04b4fbd,0x2836733,0x0189ff5,0x3a5f533, + 0x319a6cd,0x0f58667,0x00c3679 } }, + /* 248 */ + { { 0x1b85197,0x22426d4,0x2895ea3,0x342d324,0x3ffb17d,0x376cfcf, + 0x30878b1,0x3c3c83a,0x0ffc57c,0x0ac174a,0x1abd57e,0x2f78b9c, + 0x01b20d8,0x0a37103,0x007f2be }, + { 0x19a2d48,0x137288a,0x182d655,0x0ba0dde,0x25130ba,0x01c65c6, + 0x23205f1,0x2097621,0x2827cf2,0x2c57b98,0x03748f2,0x2db15fc, + 0x385a0d4,0x13690c0,0x00a9e3f } }, + /* 249 */ + { { 0x3fbc9c6,0x2df3b20,0x377e33e,0x31d1505,0x024a311,0x3c1d9ff, + 0x1377f74,0x00b6b20,0x2364ab7,0x184ab6b,0x2a77969,0x3f2db6c, + 0x2a6adb7,0x0a10073,0x004a6fb }, + { 0x1fc73de,0x2c74ab3,0x3d325e8,0x2346c0b,0x1d0efae,0x2076146, + 0x19c190d,0x225c4fe,0x3fafc80,0x2cf063d,0x11b7ae7,0x3dc4f9d, + 0x3c3f841,0x10d7c1f,0x000a4b3 } }, + /* 250 */ + { { 0x19b7d2e,0x28f1300,0x0b897dd,0x06b5371,0x0631c8d,0x336cc4f, + 0x09cd6e1,0x2ec1952,0x1104c07,0x07512bb,0x35f000d,0x25f84e9, + 0x1df4d8f,0x193f769,0x000e9ee }, + { 0x2346910,0x267cecf,0x0ad7eaa,0x087e8a5,0x1622f69,0x342cbfa, + 0x2aa20d0,0x206e88a,0x3991e58,0x093fb4b,0x0157180,0x3cecb5b, + 0x2e17c9a,0x1ea371f,0x00919e6 } }, + /* 251 */ + { { 0x2250533,0x13f931d,0x3ef8c72,0x395f605,0x18a2080,0x1cb25d4, + 0x2fb0f41,0x1c0ba8a,0x1eb17c0,0x266c433,0x09b7e3e,0x0e5d78f, + 0x0cdc5bf,0x1f7c734,0x0020611 }, + { 0x205ebd5,0x127986f,0x02c0fb0,0x1705b1e,0x1eb0bb5,0x2dffb42, + 0x2331b8a,0x18fc04e,0x31d6328,0x17db162,0x0d3b619,0x193bdb9, + 0x3f11662,0x2d8e694,0x0092c51 } }, + /* 252 */ + { { 0x08b364d,0x31ef20a,0x25c4a57,0x021ed07,0x14a562e,0x262a684, + 0x1d21c66,0x126e5a6,0x181f3f8,0x2a93b65,0x1eb726b,0x08fbbce, + 0x084f9a2,0x308f30a,0x0013159 }, + { 0x23f4963,0x0c7960e,0x2a81739,0x2242b69,0x3965003,0x2aca542, + 0x28a1c65,0x2ad48fb,0x149775f,0x1bbb7d2,0x0f2671b,0x3594b85, + 0x22f5563,0x2470f13,0x00fed44 } }, + /* 253 */ + { { 0x0eb453e,0x3ab70fd,0x1a5b335,0x18f2b74,0x25ff74b,0x3612a46, + 0x33d0d75,0x28cdda4,0x2b9b49b,0x22728fb,0x004c15b,0x1beb33b, + 0x1a7e41f,0x0c9b702,0x004ef19 }, + { 0x1ca3233,0x0b4c90f,0x1d4b53d,0x2428896,0x20ee405,0x151bc00, + 0x022edb5,0x1adc463,0x00109ea,0x06490a6,0x30e91e6,0x3682b76, + 0x23c50aa,0x3bd2665,0x005fe53 } }, + /* 254 */ + { { 0x0c28c65,0x3741ae4,0x247d372,0x0b04673,0x2176524,0x2c8bf20, + 0x01fb806,0x3330701,0x307b0a7,0x3999fb7,0x1261bec,0x256679c, + 0x3f22ac7,0x26e8673,0x00bc69d }, + { 0x3c06819,0x35df344,0x379d009,0x2bb8a0a,0x0635a66,0x096c6fa, + 0x1ac4a62,0x023e53b,0x0e45240,0x115f53d,0x3056af8,0x0a66b16, + 0x3c386ee,0x1130e82,0x00cc384 } }, + /* 255 */ + { { 0x14c2356,0x190ec73,0x07be490,0x145d415,0x0740a48,0x1251301, + 0x3eaf29d,0x2628190,0x079299a,0x26e95c9,0x2e05fdf,0x2ca7c5b, + 0x32d7b48,0x3d84226,0x0033fb4 }, + { 0x150f955,0x01240aa,0x3ddf867,0x137fb70,0x297e103,0x17eeda8, + 0x1320b60,0x266ec84,0x13f4322,0x0c8f5ee,0x0590e4a,0x386815e, + 0x00ce61f,0x161bd63,0x008e1d0 } }, +}; + +/* Multiply the base point of P384 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_base_15(sp_point_384* r, const sp_digit* k, + int map, void* heap) +{ + return sp_384_ecc_mulmod_stripe_15(r, &p384_base, p384_table, + k, map, heap); +} + +#endif + +/* Multiply the base point of P384 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_base_384(mp_int* km, ecc_point* r, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 p; + sp_digit kd[15]; +#endif + sp_point_384* point; + sp_digit* k = NULL; + int err = MP_OKAY; + + err = sp_384_point_new_15(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 15, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) { + err = MEMORY_E; + } + } +#else + k = kd; +#endif + if (err == MP_OKAY) { + sp_384_from_mp(k, 15, km); + + err = sp_384_ecc_mulmod_base_15(point, k, map, heap); + } + if (err == MP_OKAY) { + err = sp_384_point_to_ecc_point_15(point, r); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_15(point, 0, heap); + + return err; +} + +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_384_iszero_15(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14]) == 0; +} + +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */ +/* Add 1 to a. (a = a + 1) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_384_add_one_15(sp_digit* a) +{ + a[0]++; + sp_384_norm_15(a); +} + +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_384_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((sp_digit)a[i]) << s); + if (s >= 18U) { + r[j] &= 0x3ffffff; + s = 26U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (sp_digit)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Generates a scalar that is in the range 1..order-1. + * + * rng Random number generator. + * k Scalar value. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +static int sp_384_ecc_gen_k_15(WC_RNG* rng, sp_digit* k) +{ + int err; + byte buf[48]; + + do { + err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf)); + if (err == 0) { + sp_384_from_bin(k, 15, buf, (int)sizeof(buf)); + if (sp_384_cmp_15(k, p384_order2) < 0) { + sp_384_add_one_15(k); + break; + } + } + } + while (err == 0); + + return err; +} + +/* Makes a random EC key pair. + * + * rng Random number generator. + * priv Generated private value. + * pub Generated public point. + * heap Heap to use for allocation. + * returns ECC_INF_E when the point does not have the correct order, RNG + * failures, MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 p; + sp_digit kd[15]; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_384 inf; +#endif +#endif + sp_point_384* point; + sp_digit* k = NULL; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_384* infinity; +#endif + int err; + + (void)heap; + + err = sp_384_point_new_15(heap, p, point); +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + if (err == MP_OKAY) { + err = sp_384_point_new_15(heap, inf, infinity); + } +#endif +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 15, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) { + err = MEMORY_E; + } + } +#else + k = kd; +#endif + + if (err == MP_OKAY) { + err = sp_384_ecc_gen_k_15(rng, k); + } + if (err == MP_OKAY) { + err = sp_384_ecc_mulmod_base_15(point, k, 1, NULL); + } + +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + if (err == MP_OKAY) { + err = sp_384_ecc_mulmod_15(infinity, point, p384_order, 1, NULL); + } + if (err == MP_OKAY) { + if ((sp_384_iszero_15(point->x) == 0) || (sp_384_iszero_15(point->y) == 0)) { + err = ECC_INF_E; + } + } +#endif + + if (err == MP_OKAY) { + err = sp_384_to_mp(k, priv); + } + if (err == MP_OKAY) { + err = sp_384_point_to_ecc_point_15(point, pub); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_384_point_free_15(infinity, 1, heap); +#endif + sp_384_point_free_15(point, 1, heap); + + return err; +} + +#ifdef HAVE_ECC_DHE +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 48 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_384_to_bin(sp_digit* r, byte* a) +{ + int i, j, s = 0, b; + + for (i=0; i<14; i++) { + r[i+1] += r[i] >> 26; + r[i] &= 0x3ffffff; + } + j = 384 / 8 - 1; + a[j] = 0; + for (i=0; i<15 && j>=0; i++) { + b = 0; + /* lint allow cast of mismatch sp_digit and int */ + a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ + b += 8 - s; + if (j < 0) { + break; + } + while (b < 26) { + a[j--] = (byte)(r[i] >> b); + b += 8; + if (j < 0) { + break; + } + } + s = 8 - (b - 26); + if (j >= 0) { + a[j] = 0; + } + if (s != 0) { + j++; + } + } +} + +/* Multiply the point by the scalar and serialize the X ordinate. + * The number is 0 padded to maximum size on output. + * + * priv Scalar to multiply the point by. + * pub Point to multiply. + * out Buffer to hold X ordinate. + * outLen On entry, size of the buffer in bytes. + * On exit, length of data in buffer in bytes. + * heap Heap to use for allocation. + * returns BUFFER_E if the buffer is to small for output size, + * MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_secret_gen_384(mp_int* priv, ecc_point* pub, byte* out, + word32* outLen, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 p; + sp_digit kd[15]; +#endif + sp_point_384* point = NULL; + sp_digit* k = NULL; + int err = MP_OKAY; + + if (*outLen < 48U) { + err = BUFFER_E; + } + + if (err == MP_OKAY) { + err = sp_384_point_new_15(heap, p, point); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 15, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#else + k = kd; +#endif + + if (err == MP_OKAY) { + sp_384_from_mp(k, 15, priv); + sp_384_point_from_ecc_point_15(point, pub); + err = sp_384_ecc_mulmod_15(point, point, k, 1, heap); + } + if (err == MP_OKAY) { + sp_384_to_bin(point->x, out); + *outLen = 48; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_15(point, 0, heap); + + return err; +} +#endif /* HAVE_ECC_DHE */ + +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* Multiply a by scalar b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_384_mul_d_15(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int64_t tb = b; + int64_t t = 0; + int i; + + for (i = 0; i < 15; i++) { + t += tb * a[i]; + r[i] = t & 0x3ffffff; + t >>= 26; + } + r[15] = (sp_digit)t; +#else + int64_t tb = b; + int64_t t[15]; + + t[ 0] = tb * a[ 0]; + t[ 1] = tb * a[ 1]; + t[ 2] = tb * a[ 2]; + t[ 3] = tb * a[ 3]; + t[ 4] = tb * a[ 4]; + t[ 5] = tb * a[ 5]; + t[ 6] = tb * a[ 6]; + t[ 7] = tb * a[ 7]; + t[ 8] = tb * a[ 8]; + t[ 9] = tb * a[ 9]; + t[10] = tb * a[10]; + t[11] = tb * a[11]; + t[12] = tb * a[12]; + t[13] = tb * a[13]; + t[14] = tb * a[14]; + r[ 0] = (t[ 0] & 0x3ffffff); + r[ 1] = (sp_digit)(t[ 0] >> 26) + (t[ 1] & 0x3ffffff); + r[ 2] = (sp_digit)(t[ 1] >> 26) + (t[ 2] & 0x3ffffff); + r[ 3] = (sp_digit)(t[ 2] >> 26) + (t[ 3] & 0x3ffffff); + r[ 4] = (sp_digit)(t[ 3] >> 26) + (t[ 4] & 0x3ffffff); + r[ 5] = (sp_digit)(t[ 4] >> 26) + (t[ 5] & 0x3ffffff); + r[ 6] = (sp_digit)(t[ 5] >> 26) + (t[ 6] & 0x3ffffff); + r[ 7] = (sp_digit)(t[ 6] >> 26) + (t[ 7] & 0x3ffffff); + r[ 8] = (sp_digit)(t[ 7] >> 26) + (t[ 8] & 0x3ffffff); + r[ 9] = (sp_digit)(t[ 8] >> 26) + (t[ 9] & 0x3ffffff); + r[10] = (sp_digit)(t[ 9] >> 26) + (t[10] & 0x3ffffff); + r[11] = (sp_digit)(t[10] >> 26) + (t[11] & 0x3ffffff); + r[12] = (sp_digit)(t[11] >> 26) + (t[12] & 0x3ffffff); + r[13] = (sp_digit)(t[12] >> 26) + (t[13] & 0x3ffffff); + r[14] = (sp_digit)(t[13] >> 26) + (t[14] & 0x3ffffff); + r[15] = (sp_digit)(t[14] >> 26); +#endif /* WOLFSSL_SP_SMALL */ +} + +#ifdef WOLFSSL_SP_DIV_32 +static WC_INLINE sp_digit sp_384_div_word_15(sp_digit d1, sp_digit d0, + sp_digit dv) +{ + sp_digit d, r, t; + + /* All 26 bits from d1 and top 5 bits from d0. */ + d = (d1 << 5) | (d0 >> 21); + r = d / dv; + d -= r * dv; + /* Up to 6 bits in r */ + /* Next 5 bits from d0. */ + r <<= 5; + d <<= 5; + d |= (d0 >> 16) & ((1 << 5) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 11 bits in r */ + /* Next 5 bits from d0. */ + r <<= 5; + d <<= 5; + d |= (d0 >> 11) & ((1 << 5) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 16 bits in r */ + /* Next 5 bits from d0. */ + r <<= 5; + d <<= 5; + d |= (d0 >> 6) & ((1 << 5) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 21 bits in r */ + /* Next 5 bits from d0. */ + r <<= 5; + d <<= 5; + d |= (d0 >> 1) & ((1 << 5) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 26 bits in r */ + /* Remaining 1 bits from d0. */ + r <<= 1; + d <<= 1; + d |= d0 & ((1 << 1) - 1); + t = d / dv; + r += t; + + return r; +} +#endif /* WOLFSSL_SP_DIV_32 */ + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Number to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_384_div_15(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + int i; +#ifndef WOLFSSL_SP_DIV_32 + int64_t d1; +#endif + sp_digit dv, r1; +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* td; +#else + sp_digit t1d[30], t2d[15 + 1]; +#endif + sp_digit* t1; + sp_digit* t2; + int err = MP_OKAY; + + (void)m; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 15 + 1), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = td; + t2 = td + 2 * 15; +#else + t1 = t1d; + t2 = t2d; +#endif + + dv = d[14]; + XMEMCPY(t1, a, sizeof(*t1) * 2U * 15U); + for (i=14; i>=0; i--) { + t1[15 + i] += t1[15 + i - 1] >> 26; + t1[15 + i - 1] &= 0x3ffffff; +#ifndef WOLFSSL_SP_DIV_32 + d1 = t1[15 + i]; + d1 <<= 26; + d1 += t1[15 + i - 1]; + r1 = (sp_digit)(d1 / dv); +#else + r1 = sp_384_div_word_15(t1[15 + i], t1[15 + i - 1], dv); +#endif + + sp_384_mul_d_15(t2, d, r1); + (void)sp_384_sub_15(&t1[i], &t1[i], t2); + t1[15 + i] -= t2[15]; + t1[15 + i] += t1[15 + i - 1] >> 26; + t1[15 + i - 1] &= 0x3ffffff; + r1 = (((-t1[15 + i]) << 26) - t1[15 + i - 1]) / dv; + r1++; + sp_384_mul_d_15(t2, d, r1); + (void)sp_384_add_15(&t1[i], &t1[i], t2); + t1[15 + i] += t1[15 + i - 1] >> 26; + t1[15 + i - 1] &= 0x3ffffff; + } + t1[15 - 1] += t1[15 - 2] >> 26; + t1[15 - 2] &= 0x3ffffff; + r1 = t1[15 - 1] / dv; + + sp_384_mul_d_15(t2, d, r1); + (void)sp_384_sub_15(t1, t1, t2); + XMEMCPY(r, t1, sizeof(*r) * 2U * 15U); + for (i=0; i<13; i++) { + r[i+1] += r[i] >> 26; + r[i] &= 0x3ffffff; + } + sp_384_cond_add_15(r, r, d, 0 - ((r[14] < 0) ? + (sp_digit)1 : (sp_digit)0)); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_384_mod_15(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_384_div_15(a, m, NULL, r); +} + +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#ifdef WOLFSSL_SP_SMALL +/* Order-2 for the P384 curve. */ +static const uint32_t p384_order_minus_2[12] = { + 0xccc52971U,0xecec196aU,0x48b0a77aU,0x581a0db2U,0xf4372ddfU,0xc7634d81U, + 0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU +}; +#else +/* The low half of the order-2 of the P384 curve. */ +static const uint32_t p384_order_low[6] = { + 0xccc52971U,0xecec196aU,0x48b0a77aU,0x581a0db2U,0xf4372ddfU,0xc7634d81U + +}; +#endif /* WOLFSSL_SP_SMALL */ + +/* Multiply two number mod the order of P384 curve. (r = a * b mod order) + * + * r Result of the multiplication. + * a First operand of the multiplication. + * b Second operand of the multiplication. + */ +static void sp_384_mont_mul_order_15(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_384_mul_15(r, a, b); + sp_384_mont_reduce_order_15(r, p384_order, p384_mp_order); +} + +/* Square number mod the order of P384 curve. (r = a * a mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_384_mont_sqr_order_15(sp_digit* r, const sp_digit* a) +{ + sp_384_sqr_15(r, a); + sp_384_mont_reduce_order_15(r, p384_order, p384_mp_order); +} + +#ifndef WOLFSSL_SP_SMALL +/* Square number mod the order of P384 curve a number of times. + * (r = a ^ n mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_384_mont_sqr_n_order_15(sp_digit* r, const sp_digit* a, int n) +{ + int i; + + sp_384_mont_sqr_order_15(r, a); + for (i=1; i=0; i--) { + sp_384_mont_sqr_order_15(t, t); + if ((p384_order_minus_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_384_mont_mul_order_15(t, t, a); + } + } + XMEMCPY(r, t, sizeof(sp_digit) * 15U); +#else + sp_digit* t = td; + sp_digit* t2 = td + 2 * 15; + sp_digit* t3 = td + 4 * 15; + int i; + + /* t = a^2 */ + sp_384_mont_sqr_order_15(t, a); + /* t = a^3 = t * a */ + sp_384_mont_mul_order_15(t, t, a); + /* t2= a^c = t ^ 2 ^ 2 */ + sp_384_mont_sqr_n_order_15(t2, t, 2); + /* t = a^f = t2 * t */ + sp_384_mont_mul_order_15(t, t2, t); + /* t2= a^f0 = t ^ 2 ^ 4 */ + sp_384_mont_sqr_n_order_15(t2, t, 4); + /* t = a^ff = t2 * t */ + sp_384_mont_mul_order_15(t, t2, t); + /* t2= a^ff00 = t ^ 2 ^ 8 */ + sp_384_mont_sqr_n_order_15(t2, t, 8); + /* t3= a^ffff = t2 * t */ + sp_384_mont_mul_order_15(t3, t2, t); + /* t2= a^ffff0000 = t3 ^ 2 ^ 16 */ + sp_384_mont_sqr_n_order_15(t2, t3, 16); + /* t = a^ffffffff = t2 * t3 */ + sp_384_mont_mul_order_15(t, t2, t3); + /* t2= a^ffffffff0000 = t ^ 2 ^ 16 */ + sp_384_mont_sqr_n_order_15(t2, t, 16); + /* t = a^ffffffffffff = t2 * t3 */ + sp_384_mont_mul_order_15(t, t2, t3); + /* t2= a^ffffffffffff000000000000 = t ^ 2 ^ 48 */ + sp_384_mont_sqr_n_order_15(t2, t, 48); + /* t= a^fffffffffffffffffffffffff = t2 * t */ + sp_384_mont_mul_order_15(t, t2, t); + /* t2= a^ffffffffffffffffffffffff000000000000000000000000 */ + sp_384_mont_sqr_n_order_15(t2, t, 96); + /* t2= a^ffffffffffffffffffffffffffffffffffffffffffffffff = t2 * t */ + sp_384_mont_mul_order_15(t2, t2, t); + for (i=191; i>=1; i--) { + sp_384_mont_sqr_order_15(t2, t2); + if (((sp_digit)p384_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_384_mont_mul_order_15(t2, t2, a); + } + } + sp_384_mont_sqr_order_15(t2, t2); + sp_384_mont_mul_order_15(r, t2, a); +#endif /* WOLFSSL_SP_SMALL */ +} + +#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */ +#ifdef HAVE_ECC_SIGN +#ifndef SP_ECC_MAX_SIG_GEN +#define SP_ECC_MAX_SIG_GEN 64 +#endif + +/* Sign the hash using the private key. + * e = [hash, 384 bits] from binary + * r = (k.G)->x mod order + * s = (r * x + e) / k mod order + * The hash is truncated to the first 384 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv, + mp_int* rm, mp_int* sm, mp_int* km, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit ed[2*15]; + sp_digit xd[2*15]; + sp_digit kd[2*15]; + sp_digit rd[2*15]; + sp_digit td[3 * 2*15]; + sp_point_384 p; +#endif + sp_digit* e = NULL; + sp_digit* x = NULL; + sp_digit* k = NULL; + sp_digit* r = NULL; + sp_digit* tmp = NULL; + sp_point_384* point = NULL; + sp_digit carry; + sp_digit* s = NULL; + sp_digit* kInv = NULL; + int err = MP_OKAY; + int32_t c; + int i; + + (void)heap; + + err = sp_384_point_new_15(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 15, heap, + DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + e = d + 0 * 15; + x = d + 2 * 15; + k = d + 4 * 15; + r = d + 6 * 15; + tmp = d + 8 * 15; +#else + e = ed; + x = xd; + k = kd; + r = rd; + tmp = td; +#endif + s = e; + kInv = k; + + if (hashLen > 48U) { + hashLen = 48U; + } + + sp_384_from_bin(e, 15, hash, (int)hashLen); + } + + for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) { + sp_384_from_mp(x, 15, priv); + + /* New random point. */ + if (km == NULL || mp_iszero(km)) { + err = sp_384_ecc_gen_k_15(rng, k); + } + else { + sp_384_from_mp(k, 15, km); + mp_zero(km); + } + if (err == MP_OKAY) { + err = sp_384_ecc_mulmod_base_15(point, k, 1, NULL); + } + + if (err == MP_OKAY) { + /* r = point->x mod order */ + XMEMCPY(r, point->x, sizeof(sp_digit) * 15U); + sp_384_norm_15(r); + c = sp_384_cmp_15(r, p384_order); + sp_384_cond_sub_15(r, r, p384_order, 0L - (sp_digit)(c >= 0)); + sp_384_norm_15(r); + + /* Conv k to Montgomery form (mod order) */ + sp_384_mul_15(k, k, p384_norm_order); + err = sp_384_mod_15(k, k, p384_order); + } + if (err == MP_OKAY) { + sp_384_norm_15(k); + /* kInv = 1/k mod order */ + sp_384_mont_inv_order_15(kInv, k, tmp); + sp_384_norm_15(kInv); + + /* s = r * x + e */ + sp_384_mul_15(x, x, r); + err = sp_384_mod_15(x, x, p384_order); + } + if (err == MP_OKAY) { + sp_384_norm_15(x); + carry = sp_384_add_15(s, e, x); + sp_384_cond_sub_15(s, s, p384_order, 0 - carry); + sp_384_norm_15(s); + c = sp_384_cmp_15(s, p384_order); + sp_384_cond_sub_15(s, s, p384_order, 0L - (sp_digit)(c >= 0)); + sp_384_norm_15(s); + + /* s = s * k^-1 mod order */ + sp_384_mont_mul_order_15(s, s, kInv); + sp_384_norm_15(s); + + /* Check that signature is usable. */ + if (sp_384_iszero_15(s) == 0) { + break; + } + } + } + + if (i == 0) { + err = RNG_FAILURE_E; + } + + if (err == MP_OKAY) { + err = sp_384_to_mp(r, rm); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(s, sm); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 8 * 15); + XFREE(d, heap, DYNAMIC_TYPE_ECC); + } +#else + XMEMSET(e, 0, sizeof(sp_digit) * 2U * 15U); + XMEMSET(x, 0, sizeof(sp_digit) * 2U * 15U); + XMEMSET(k, 0, sizeof(sp_digit) * 2U * 15U); + XMEMSET(r, 0, sizeof(sp_digit) * 2U * 15U); + XMEMSET(r, 0, sizeof(sp_digit) * 2U * 15U); + XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 15U); +#endif + sp_384_point_free_15(point, 1, heap); + + return err; +} +#endif /* HAVE_ECC_SIGN */ + +#ifdef HAVE_ECC_VERIFY +/* Verify the signature values with the hash and public key. + * e = Truncate(hash, 384) + * u1 = e/s mod order + * u2 = r/s mod order + * r == (u1.G + u2.Q)->x mod order + * Optimization: Leave point in projective form. + * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z') + * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' + * The hash is truncated to the first 384 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +int sp_ecc_verify_384(const byte* hash, word32 hashLen, mp_int* pX, + mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit u1d[2*15]; + sp_digit u2d[2*15]; + sp_digit sd[2*15]; + sp_digit tmpd[2*15 * 5]; + sp_point_384 p1d; + sp_point_384 p2d; +#endif + sp_digit* u1 = NULL; + sp_digit* u2 = NULL; + sp_digit* s = NULL; + sp_digit* tmp = NULL; + sp_point_384* p1; + sp_point_384* p2 = NULL; + sp_digit carry; + int32_t c; + int err; + + err = sp_384_point_new_15(heap, p1d, p1); + if (err == MP_OKAY) { + err = sp_384_point_new_15(heap, p2d, p2); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 15, heap, + DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + u1 = d + 0 * 15; + u2 = d + 2 * 15; + s = d + 4 * 15; + tmp = d + 6 * 15; +#else + u1 = u1d; + u2 = u2d; + s = sd; + tmp = tmpd; +#endif + + if (hashLen > 48U) { + hashLen = 48U; + } + + sp_384_from_bin(u1, 15, hash, (int)hashLen); + sp_384_from_mp(u2, 15, r); + sp_384_from_mp(s, 15, sm); + sp_384_from_mp(p2->x, 15, pX); + sp_384_from_mp(p2->y, 15, pY); + sp_384_from_mp(p2->z, 15, pZ); + + { + sp_384_mul_15(s, s, p384_norm_order); + } + err = sp_384_mod_15(s, s, p384_order); + } + if (err == MP_OKAY) { + sp_384_norm_15(s); + { + sp_384_mont_inv_order_15(s, s, tmp); + sp_384_mont_mul_order_15(u1, u1, s); + sp_384_mont_mul_order_15(u2, u2, s); + } + + err = sp_384_ecc_mulmod_base_15(p1, u1, 0, heap); + } + if (err == MP_OKAY) { + err = sp_384_ecc_mulmod_15(p2, p2, u2, 0, heap); + } + + if (err == MP_OKAY) { + { + sp_384_proj_point_add_15(p1, p1, p2, tmp); + if (sp_384_iszero_15(p1->z)) { + if (sp_384_iszero_15(p1->x) && sp_384_iszero_15(p1->y)) { + sp_384_proj_point_dbl_15(p1, p2, tmp); + } + else { + /* Y ordinate is not used from here - don't set. */ + p1->x[0] = 0; + p1->x[1] = 0; + p1->x[2] = 0; + p1->x[3] = 0; + p1->x[4] = 0; + p1->x[5] = 0; + p1->x[6] = 0; + p1->x[7] = 0; + p1->x[8] = 0; + p1->x[9] = 0; + p1->x[10] = 0; + p1->x[11] = 0; + p1->x[12] = 0; + p1->x[13] = 0; + p1->x[14] = 0; + XMEMCPY(p1->z, p384_norm_mod, sizeof(p384_norm_mod)); + } + } + } + + /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ + /* Reload r and convert to Montgomery form. */ + sp_384_from_mp(u2, 15, r); + err = sp_384_mod_mul_norm_15(u2, u2, p384_mod); + } + + if (err == MP_OKAY) { + /* u1 = r.z'.z' mod prime */ + sp_384_mont_sqr_15(p1->z, p1->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(u1, u2, p1->z, p384_mod, p384_mp_mod); + *res = (int)(sp_384_cmp_15(p1->x, u1) == 0); + if (*res == 0) { + /* Reload r and add order. */ + sp_384_from_mp(u2, 15, r); + carry = sp_384_add_15(u2, u2, p384_order); + /* Carry means result is greater than mod and is not valid. */ + if (carry == 0) { + sp_384_norm_15(u2); + + /* Compare with mod and if greater or equal then not valid. */ + c = sp_384_cmp_15(u2, p384_mod); + if (c < 0) { + /* Convert to Montogomery form */ + err = sp_384_mod_mul_norm_15(u2, u2, p384_mod); + if (err == MP_OKAY) { + /* u1 = (r + 1*order).z'.z' mod prime */ + sp_384_mont_mul_15(u1, u2, p1->z, p384_mod, + p384_mp_mod); + *res = (int)(sp_384_cmp_15(p1->x, u1) == 0); + } + } + } + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) + XFREE(d, heap, DYNAMIC_TYPE_ECC); +#endif + sp_384_point_free_15(p1, 0, heap); + sp_384_point_free_15(p2, 0, heap); + + return err; +} +#endif /* HAVE_ECC_VERIFY */ + +#ifdef HAVE_ECC_CHECK_KEY +/* Check that the x and y oridinates are a valid point on the curve. + * + * point EC point. + * heap Heap to use if dynamically allocating. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +static int sp_384_ecc_is_point_15(sp_point_384* point, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit t1d[2*15]; + sp_digit t2d[2*15]; +#endif + sp_digit* t1; + sp_digit* t2; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 15 * 4, heap, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = d + 0 * 15; + t2 = d + 2 * 15; +#else + (void)heap; + + t1 = t1d; + t2 = t2d; +#endif + + sp_384_sqr_15(t1, point->y); + (void)sp_384_mod_15(t1, t1, p384_mod); + sp_384_sqr_15(t2, point->x); + (void)sp_384_mod_15(t2, t2, p384_mod); + sp_384_mul_15(t2, t2, point->x); + (void)sp_384_mod_15(t2, t2, p384_mod); + (void)sp_384_sub_15(t2, p384_mod, t2); + sp_384_mont_add_15(t1, t1, t2, p384_mod); + + sp_384_mont_add_15(t1, t1, point->x, p384_mod); + sp_384_mont_add_15(t1, t1, point->x, p384_mod); + sp_384_mont_add_15(t1, t1, point->x, p384_mod); + + if (sp_384_cmp_15(t1, p384_b) != 0) { + err = MP_VAL; + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, heap, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + +/* Check that the x and y oridinates are a valid point on the curve. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +int sp_ecc_is_point_384(mp_int* pX, mp_int* pY) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 pubd; +#endif + sp_point_384* pub; + byte one[1] = { 1 }; + int err; + + err = sp_384_point_new_15(NULL, pubd, pub); + if (err == MP_OKAY) { + sp_384_from_mp(pub->x, 15, pX); + sp_384_from_mp(pub->y, 15, pY); + sp_384_from_bin(pub->z, 15, one, (int)sizeof(one)); + + err = sp_384_ecc_is_point_15(pub, NULL); + } + + sp_384_point_free_15(pub, 0, NULL); + + return err; +} + +/* Check that the private scalar generates the EC point (px, py), the point is + * on the curve and the point has the correct order. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * privm Private scalar that generates EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve, ECC_INF_E if the point does not have the correct order, + * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and + * MP_OKAY otherwise. + */ +int sp_ecc_check_key_384(mp_int* pX, mp_int* pY, mp_int* privm, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit privd[15]; + sp_point_384 pubd; + sp_point_384 pd; +#endif + sp_digit* priv = NULL; + sp_point_384* pub; + sp_point_384* p = NULL; + byte one[1] = { 1 }; + int err; + + err = sp_384_point_new_15(heap, pubd, pub); + if (err == MP_OKAY) { + err = sp_384_point_new_15(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 15, heap, + DYNAMIC_TYPE_ECC); + if (priv == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + priv = privd; +#endif + + sp_384_from_mp(pub->x, 15, pX); + sp_384_from_mp(pub->y, 15, pY); + sp_384_from_bin(pub->z, 15, one, (int)sizeof(one)); + sp_384_from_mp(priv, 15, privm); + + /* Check point at infinitiy. */ + if ((sp_384_iszero_15(pub->x) != 0) && + (sp_384_iszero_15(pub->y) != 0)) { + err = ECC_INF_E; + } + } + + if (err == MP_OKAY) { + /* Check range of X and Y */ + if (sp_384_cmp_15(pub->x, p384_mod) >= 0 || + sp_384_cmp_15(pub->y, p384_mod) >= 0) { + err = ECC_OUT_OF_RANGE_E; + } + } + + if (err == MP_OKAY) { + /* Check point is on curve */ + err = sp_384_ecc_is_point_15(pub, heap); + } + + if (err == MP_OKAY) { + /* Point * order = infinity */ + err = sp_384_ecc_mulmod_15(p, pub, p384_order, 1, heap); + } + if (err == MP_OKAY) { + /* Check result is infinity */ + if ((sp_384_iszero_15(p->x) == 0) || + (sp_384_iszero_15(p->y) == 0)) { + err = ECC_INF_E; + } + } + + if (err == MP_OKAY) { + /* Base * private = point */ + err = sp_384_ecc_mulmod_base_15(p, priv, 1, heap); + } + if (err == MP_OKAY) { + /* Check result is public key */ + if (sp_384_cmp_15(p->x, pub->x) != 0 || + sp_384_cmp_15(p->y, pub->y) != 0) { + err = ECC_PRIV_KEY_E; + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (priv != NULL) { + XFREE(priv, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_15(p, 0, heap); + sp_384_point_free_15(pub, 0, heap); + + return err; +} +#endif +#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL +/* Add two projective EC points together. + * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ) + * + * pX First EC point's X ordinate. + * pY First EC point's Y ordinate. + * pZ First EC point's Z ordinate. + * qX Second EC point's X ordinate. + * qY Second EC point's Y ordinate. + * qZ Second EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* qX, mp_int* qY, mp_int* qZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 15 * 5]; + sp_point_384 pd; + sp_point_384 qd; +#endif + sp_digit* tmp; + sp_point_384* p; + sp_point_384* q = NULL; + int err; + + err = sp_384_point_new_15(NULL, pd, p); + if (err == MP_OKAY) { + err = sp_384_point_new_15(NULL, qd, q); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 15 * 5, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + sp_384_from_mp(p->x, 15, pX); + sp_384_from_mp(p->y, 15, pY); + sp_384_from_mp(p->z, 15, pZ); + sp_384_from_mp(q->x, 15, qX); + sp_384_from_mp(q->y, 15, qY); + sp_384_from_mp(q->z, 15, qZ); + + sp_384_proj_point_add_15(p, p, q, tmp); + } + + if (err == MP_OKAY) { + err = sp_384_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->z, rZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_15(q, 0, NULL); + sp_384_point_free_15(p, 0, NULL); + + return err; +} + +/* Double a projective EC point. + * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ) + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 15 * 2]; + sp_point_384 pd; +#endif + sp_digit* tmp; + sp_point_384* p; + int err; + + err = sp_384_point_new_15(NULL, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 15 * 2, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + sp_384_from_mp(p->x, 15, pX); + sp_384_from_mp(p->y, 15, pY); + sp_384_from_mp(p->z, 15, pZ); + + sp_384_proj_point_dbl_15(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_384_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->z, rZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_15(p, 0, NULL); + + return err; +} + +/* Map a projective EC point to affine in place. + * pZ will be one. + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 15 * 6]; + sp_point_384 pd; +#endif + sp_digit* tmp; + sp_point_384* p; + int err; + + err = sp_384_point_new_15(NULL, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 15 * 6, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + if (err == MP_OKAY) { + sp_384_from_mp(p->x, 15, pX); + sp_384_from_mp(p->y, 15, pY); + sp_384_from_mp(p->z, 15, pZ); + + sp_384_map_15(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_384_to_mp(p->x, pX); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->y, pY); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->z, pZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_15(p, 0, NULL); + + return err; +} +#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */ +#ifdef HAVE_COMP_KEY +/* Find the square root of a number mod the prime of the curve. + * + * y The number to operate on and the result. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +static int sp_384_mont_sqrt_15(sp_digit* y) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d; +#else + sp_digit t1d[2 * 15]; + sp_digit t2d[2 * 15]; + sp_digit t3d[2 * 15]; + sp_digit t4d[2 * 15]; + sp_digit t5d[2 * 15]; +#endif + sp_digit* t1; + sp_digit* t2; + sp_digit* t3; + sp_digit* t4; + sp_digit* t5; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 2 * 15, NULL, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = d + 0 * 15; + t2 = d + 2 * 15; + t3 = d + 4 * 15; + t4 = d + 6 * 15; + t5 = d + 8 * 15; +#else + t1 = t1d; + t2 = t2d; + t3 = t3d; + t4 = t4d; + t5 = t5d; +#endif + + { + /* t2 = y ^ 0x2 */ + sp_384_mont_sqr_15(t2, y, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x3 */ + sp_384_mont_mul_15(t1, t2, y, p384_mod, p384_mp_mod); + /* t5 = y ^ 0xc */ + sp_384_mont_sqr_n_15(t5, t1, 2, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xf */ + sp_384_mont_mul_15(t1, t1, t5, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x1e */ + sp_384_mont_sqr_15(t2, t1, p384_mod, p384_mp_mod); + /* t3 = y ^ 0x1f */ + sp_384_mont_mul_15(t3, t2, y, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3e0 */ + sp_384_mont_sqr_n_15(t2, t3, 5, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x3ff */ + sp_384_mont_mul_15(t1, t3, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x7fe0 */ + sp_384_mont_sqr_n_15(t2, t1, 5, p384_mod, p384_mp_mod); + /* t3 = y ^ 0x7fff */ + sp_384_mont_mul_15(t3, t3, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3fff800 */ + sp_384_mont_sqr_n_15(t2, t3, 15, p384_mod, p384_mp_mod); + /* t4 = y ^ 0x3ffffff */ + sp_384_mont_mul_15(t4, t3, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0xffffffc000000 */ + sp_384_mont_sqr_n_15(t2, t4, 30, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xfffffffffffff */ + sp_384_mont_mul_15(t1, t4, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0xfffffffffffffff000000000000000 */ + sp_384_mont_sqr_n_15(t2, t1, 60, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xffffffffffffffffffffffffffffff */ + sp_384_mont_mul_15(t1, t1, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */ + sp_384_mont_sqr_n_15(t2, t1, 120, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_384_mont_mul_15(t1, t1, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */ + sp_384_mont_sqr_n_15(t2, t1, 15, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_384_mont_mul_15(t1, t3, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff80000000 */ + sp_384_mont_sqr_n_15(t2, t1, 31, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff */ + sp_384_mont_mul_15(t1, t4, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff0 */ + sp_384_mont_sqr_n_15(t2, t1, 4, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc */ + sp_384_mont_mul_15(t1, t5, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000 */ + sp_384_mont_sqr_n_15(t2, t1, 62, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000001 */ + sp_384_mont_mul_15(t1, y, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc00000000000000040000000 */ + sp_384_mont_sqr_n_15(y, t1, 30, p384_mod, p384_mp_mod); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + + +/* Uncompress the point given the X ordinate. + * + * xm X ordinate. + * odd Whether the Y ordinate is odd. + * ym Calculated Y ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d; +#else + sp_digit xd[2 * 15]; + sp_digit yd[2 * 15]; +#endif + sp_digit* x = NULL; + sp_digit* y = NULL; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 15, NULL, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + x = d + 0 * 15; + y = d + 2 * 15; +#else + x = xd; + y = yd; +#endif + + sp_384_from_mp(x, 15, xm); + err = sp_384_mod_mul_norm_15(x, x, p384_mod); + } + if (err == MP_OKAY) { + /* y = x^3 */ + { + sp_384_mont_sqr_15(y, x, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(y, y, x, p384_mod, p384_mp_mod); + } + /* y = x^3 - 3x */ + sp_384_mont_sub_15(y, y, x, p384_mod); + sp_384_mont_sub_15(y, y, x, p384_mod); + sp_384_mont_sub_15(y, y, x, p384_mod); + /* y = x^3 - 3x + b */ + err = sp_384_mod_mul_norm_15(x, p384_b, p384_mod); + } + if (err == MP_OKAY) { + sp_384_mont_add_15(y, y, x, p384_mod); + /* y = sqrt(x^3 - 3x + b) */ + err = sp_384_mont_sqrt_15(y); + } + if (err == MP_OKAY) { + XMEMSET(y + 15, 0, 15U * sizeof(sp_digit)); + sp_384_mont_reduce_15(y, p384_mod, p384_mp_mod); + if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) { + sp_384_mont_sub_15(y, p384_mod, y, p384_mod); + } + + err = sp_384_to_mp(y, ym); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} +#endif +#endif /* WOLFSSL_SP_384 */ +#endif /* WOLFSSL_HAVE_SP_ECC */ +#endif /* SP_WORD_SIZE == 32 */ +#endif /* !WOLFSSL_SP_ASM */ +#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_ECC */ diff --git a/client/wolfssl/wolfcrypt/src/sp_c64.c b/client/wolfssl/wolfcrypt/src/sp_c64.c new file mode 100644 index 0000000..9038173 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/sp_c64.c @@ -0,0 +1,23220 @@ +/* sp.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* Implementation by Sean Parkinson. */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include +#include +#include +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) || \ + defined(WOLFSSL_HAVE_SP_ECC) + +#ifdef RSA_LOW_MEM +#ifndef SP_RSA_PRIVATE_EXP_D +#define SP_RSA_PRIVATE_EXP_D +#endif + +#ifndef WOLFSSL_SP_SMALL +#define WOLFSSL_SP_SMALL +#endif +#endif + +#include + +#ifndef WOLFSSL_SP_ASM +#if SP_WORD_SIZE == 64 +#if (defined(WOLFSSL_SP_CACHE_RESISTANT) || defined(WOLFSSL_SP_SMALL)) && (defined(WOLFSSL_HAVE_SP_ECC) || !defined(WOLFSSL_RSA_PUBLIC_ONLY)) +/* Mask for address to obfuscate which of the two address will be used. */ +static const size_t addr_mask[2] = { 0, (size_t)-1 }; +#endif + +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) +#ifndef WOLFSSL_SP_NO_2048 +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_2048_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((sp_digit)a[i]) << s); + if (s >= 49U) { + r[j] &= 0x1ffffffffffffffL; + s = 57U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (sp_digit)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 57 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 57 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0x1ffffffffffffffL; + s = 57U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 57U) <= (word32)DIGIT_BIT) { + s += 57U; + r[j] &= 0x1ffffffffffffffL; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 57) { + r[j] &= 0x1ffffffffffffffL; + if (j + 1 >= size) { + break; + } + s = 57 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 256 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_2048_to_bin(sp_digit* r, byte* a) +{ + int i, j, s = 0, b; + + for (i=0; i<35; i++) { + r[i+1] += r[i] >> 57; + r[i] &= 0x1ffffffffffffffL; + } + j = 2048 / 8 - 1; + a[j] = 0; + for (i=0; i<36 && j>=0; i++) { + b = 0; + /* lint allow cast of mismatch sp_digit and int */ + a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ + b += 8 - s; + if (j < 0) { + break; + } + while (b < 57) { + a[j--] = (byte)(r[i] >> b); + b += 8; + if (j < 0) { + break; + } + } + s = 8 - (b - 57); + if (j >= 0) { + a[j] = 0; + } + if (s != 0) { + j++; + } + } +} + +#ifndef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_2048_mul_9(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int128_t t0 = ((int128_t)a[ 0]) * b[ 0]; + int128_t t1 = ((int128_t)a[ 0]) * b[ 1] + + ((int128_t)a[ 1]) * b[ 0]; + int128_t t2 = ((int128_t)a[ 0]) * b[ 2] + + ((int128_t)a[ 1]) * b[ 1] + + ((int128_t)a[ 2]) * b[ 0]; + int128_t t3 = ((int128_t)a[ 0]) * b[ 3] + + ((int128_t)a[ 1]) * b[ 2] + + ((int128_t)a[ 2]) * b[ 1] + + ((int128_t)a[ 3]) * b[ 0]; + int128_t t4 = ((int128_t)a[ 0]) * b[ 4] + + ((int128_t)a[ 1]) * b[ 3] + + ((int128_t)a[ 2]) * b[ 2] + + ((int128_t)a[ 3]) * b[ 1] + + ((int128_t)a[ 4]) * b[ 0]; + int128_t t5 = ((int128_t)a[ 0]) * b[ 5] + + ((int128_t)a[ 1]) * b[ 4] + + ((int128_t)a[ 2]) * b[ 3] + + ((int128_t)a[ 3]) * b[ 2] + + ((int128_t)a[ 4]) * b[ 1] + + ((int128_t)a[ 5]) * b[ 0]; + int128_t t6 = ((int128_t)a[ 0]) * b[ 6] + + ((int128_t)a[ 1]) * b[ 5] + + ((int128_t)a[ 2]) * b[ 4] + + ((int128_t)a[ 3]) * b[ 3] + + ((int128_t)a[ 4]) * b[ 2] + + ((int128_t)a[ 5]) * b[ 1] + + ((int128_t)a[ 6]) * b[ 0]; + int128_t t7 = ((int128_t)a[ 0]) * b[ 7] + + ((int128_t)a[ 1]) * b[ 6] + + ((int128_t)a[ 2]) * b[ 5] + + ((int128_t)a[ 3]) * b[ 4] + + ((int128_t)a[ 4]) * b[ 3] + + ((int128_t)a[ 5]) * b[ 2] + + ((int128_t)a[ 6]) * b[ 1] + + ((int128_t)a[ 7]) * b[ 0]; + int128_t t8 = ((int128_t)a[ 0]) * b[ 8] + + ((int128_t)a[ 1]) * b[ 7] + + ((int128_t)a[ 2]) * b[ 6] + + ((int128_t)a[ 3]) * b[ 5] + + ((int128_t)a[ 4]) * b[ 4] + + ((int128_t)a[ 5]) * b[ 3] + + ((int128_t)a[ 6]) * b[ 2] + + ((int128_t)a[ 7]) * b[ 1] + + ((int128_t)a[ 8]) * b[ 0]; + int128_t t9 = ((int128_t)a[ 1]) * b[ 8] + + ((int128_t)a[ 2]) * b[ 7] + + ((int128_t)a[ 3]) * b[ 6] + + ((int128_t)a[ 4]) * b[ 5] + + ((int128_t)a[ 5]) * b[ 4] + + ((int128_t)a[ 6]) * b[ 3] + + ((int128_t)a[ 7]) * b[ 2] + + ((int128_t)a[ 8]) * b[ 1]; + int128_t t10 = ((int128_t)a[ 2]) * b[ 8] + + ((int128_t)a[ 3]) * b[ 7] + + ((int128_t)a[ 4]) * b[ 6] + + ((int128_t)a[ 5]) * b[ 5] + + ((int128_t)a[ 6]) * b[ 4] + + ((int128_t)a[ 7]) * b[ 3] + + ((int128_t)a[ 8]) * b[ 2]; + int128_t t11 = ((int128_t)a[ 3]) * b[ 8] + + ((int128_t)a[ 4]) * b[ 7] + + ((int128_t)a[ 5]) * b[ 6] + + ((int128_t)a[ 6]) * b[ 5] + + ((int128_t)a[ 7]) * b[ 4] + + ((int128_t)a[ 8]) * b[ 3]; + int128_t t12 = ((int128_t)a[ 4]) * b[ 8] + + ((int128_t)a[ 5]) * b[ 7] + + ((int128_t)a[ 6]) * b[ 6] + + ((int128_t)a[ 7]) * b[ 5] + + ((int128_t)a[ 8]) * b[ 4]; + int128_t t13 = ((int128_t)a[ 5]) * b[ 8] + + ((int128_t)a[ 6]) * b[ 7] + + ((int128_t)a[ 7]) * b[ 6] + + ((int128_t)a[ 8]) * b[ 5]; + int128_t t14 = ((int128_t)a[ 6]) * b[ 8] + + ((int128_t)a[ 7]) * b[ 7] + + ((int128_t)a[ 8]) * b[ 6]; + int128_t t15 = ((int128_t)a[ 7]) * b[ 8] + + ((int128_t)a[ 8]) * b[ 7]; + int128_t t16 = ((int128_t)a[ 8]) * b[ 8]; + + t1 += t0 >> 57; r[ 0] = t0 & 0x1ffffffffffffffL; + t2 += t1 >> 57; r[ 1] = t1 & 0x1ffffffffffffffL; + t3 += t2 >> 57; r[ 2] = t2 & 0x1ffffffffffffffL; + t4 += t3 >> 57; r[ 3] = t3 & 0x1ffffffffffffffL; + t5 += t4 >> 57; r[ 4] = t4 & 0x1ffffffffffffffL; + t6 += t5 >> 57; r[ 5] = t5 & 0x1ffffffffffffffL; + t7 += t6 >> 57; r[ 6] = t6 & 0x1ffffffffffffffL; + t8 += t7 >> 57; r[ 7] = t7 & 0x1ffffffffffffffL; + t9 += t8 >> 57; r[ 8] = t8 & 0x1ffffffffffffffL; + t10 += t9 >> 57; r[ 9] = t9 & 0x1ffffffffffffffL; + t11 += t10 >> 57; r[10] = t10 & 0x1ffffffffffffffL; + t12 += t11 >> 57; r[11] = t11 & 0x1ffffffffffffffL; + t13 += t12 >> 57; r[12] = t12 & 0x1ffffffffffffffL; + t14 += t13 >> 57; r[13] = t13 & 0x1ffffffffffffffL; + t15 += t14 >> 57; r[14] = t14 & 0x1ffffffffffffffL; + t16 += t15 >> 57; r[15] = t15 & 0x1ffffffffffffffL; + r[17] = (sp_digit)(t16 >> 57); + r[16] = t16 & 0x1ffffffffffffffL; +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_2048_sqr_9(sp_digit* r, const sp_digit* a) +{ + int128_t t0 = ((int128_t)a[ 0]) * a[ 0]; + int128_t t1 = (((int128_t)a[ 0]) * a[ 1]) * 2; + int128_t t2 = (((int128_t)a[ 0]) * a[ 2]) * 2 + + ((int128_t)a[ 1]) * a[ 1]; + int128_t t3 = (((int128_t)a[ 0]) * a[ 3] + + ((int128_t)a[ 1]) * a[ 2]) * 2; + int128_t t4 = (((int128_t)a[ 0]) * a[ 4] + + ((int128_t)a[ 1]) * a[ 3]) * 2 + + ((int128_t)a[ 2]) * a[ 2]; + int128_t t5 = (((int128_t)a[ 0]) * a[ 5] + + ((int128_t)a[ 1]) * a[ 4] + + ((int128_t)a[ 2]) * a[ 3]) * 2; + int128_t t6 = (((int128_t)a[ 0]) * a[ 6] + + ((int128_t)a[ 1]) * a[ 5] + + ((int128_t)a[ 2]) * a[ 4]) * 2 + + ((int128_t)a[ 3]) * a[ 3]; + int128_t t7 = (((int128_t)a[ 0]) * a[ 7] + + ((int128_t)a[ 1]) * a[ 6] + + ((int128_t)a[ 2]) * a[ 5] + + ((int128_t)a[ 3]) * a[ 4]) * 2; + int128_t t8 = (((int128_t)a[ 0]) * a[ 8] + + ((int128_t)a[ 1]) * a[ 7] + + ((int128_t)a[ 2]) * a[ 6] + + ((int128_t)a[ 3]) * a[ 5]) * 2 + + ((int128_t)a[ 4]) * a[ 4]; + int128_t t9 = (((int128_t)a[ 1]) * a[ 8] + + ((int128_t)a[ 2]) * a[ 7] + + ((int128_t)a[ 3]) * a[ 6] + + ((int128_t)a[ 4]) * a[ 5]) * 2; + int128_t t10 = (((int128_t)a[ 2]) * a[ 8] + + ((int128_t)a[ 3]) * a[ 7] + + ((int128_t)a[ 4]) * a[ 6]) * 2 + + ((int128_t)a[ 5]) * a[ 5]; + int128_t t11 = (((int128_t)a[ 3]) * a[ 8] + + ((int128_t)a[ 4]) * a[ 7] + + ((int128_t)a[ 5]) * a[ 6]) * 2; + int128_t t12 = (((int128_t)a[ 4]) * a[ 8] + + ((int128_t)a[ 5]) * a[ 7]) * 2 + + ((int128_t)a[ 6]) * a[ 6]; + int128_t t13 = (((int128_t)a[ 5]) * a[ 8] + + ((int128_t)a[ 6]) * a[ 7]) * 2; + int128_t t14 = (((int128_t)a[ 6]) * a[ 8]) * 2 + + ((int128_t)a[ 7]) * a[ 7]; + int128_t t15 = (((int128_t)a[ 7]) * a[ 8]) * 2; + int128_t t16 = ((int128_t)a[ 8]) * a[ 8]; + + t1 += t0 >> 57; r[ 0] = t0 & 0x1ffffffffffffffL; + t2 += t1 >> 57; r[ 1] = t1 & 0x1ffffffffffffffL; + t3 += t2 >> 57; r[ 2] = t2 & 0x1ffffffffffffffL; + t4 += t3 >> 57; r[ 3] = t3 & 0x1ffffffffffffffL; + t5 += t4 >> 57; r[ 4] = t4 & 0x1ffffffffffffffL; + t6 += t5 >> 57; r[ 5] = t5 & 0x1ffffffffffffffL; + t7 += t6 >> 57; r[ 6] = t6 & 0x1ffffffffffffffL; + t8 += t7 >> 57; r[ 7] = t7 & 0x1ffffffffffffffL; + t9 += t8 >> 57; r[ 8] = t8 & 0x1ffffffffffffffL; + t10 += t9 >> 57; r[ 9] = t9 & 0x1ffffffffffffffL; + t11 += t10 >> 57; r[10] = t10 & 0x1ffffffffffffffL; + t12 += t11 >> 57; r[11] = t11 & 0x1ffffffffffffffL; + t13 += t12 >> 57; r[12] = t12 & 0x1ffffffffffffffL; + t14 += t13 >> 57; r[13] = t13 & 0x1ffffffffffffffL; + t15 += t14 >> 57; r[14] = t14 & 0x1ffffffffffffffL; + t16 += t15 >> 57; r[15] = t15 & 0x1ffffffffffffffL; + r[17] = (sp_digit)(t16 >> 57); + r[16] = t16 & 0x1ffffffffffffffL; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_2048_add_9(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + r[ 0] = a[ 0] + b[ 0]; + r[ 1] = a[ 1] + b[ 1]; + r[ 2] = a[ 2] + b[ 2]; + r[ 3] = a[ 3] + b[ 3]; + r[ 4] = a[ 4] + b[ 4]; + r[ 5] = a[ 5] + b[ 5]; + r[ 6] = a[ 6] + b[ 6]; + r[ 7] = a[ 7] + b[ 7]; + r[ 8] = a[ 8] + b[ 8]; + + return 0; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_2048_add_18(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 16; i += 8) { + r[i + 0] = a[i + 0] + b[i + 0]; + r[i + 1] = a[i + 1] + b[i + 1]; + r[i + 2] = a[i + 2] + b[i + 2]; + r[i + 3] = a[i + 3] + b[i + 3]; + r[i + 4] = a[i + 4] + b[i + 4]; + r[i + 5] = a[i + 5] + b[i + 5]; + r[i + 6] = a[i + 6] + b[i + 6]; + r[i + 7] = a[i + 7] + b[i + 7]; + } + r[16] = a[16] + b[16]; + r[17] = a[17] + b[17]; + + return 0; +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_2048_sub_18(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 16; i += 8) { + r[i + 0] = a[i + 0] - b[i + 0]; + r[i + 1] = a[i + 1] - b[i + 1]; + r[i + 2] = a[i + 2] - b[i + 2]; + r[i + 3] = a[i + 3] - b[i + 3]; + r[i + 4] = a[i + 4] - b[i + 4]; + r[i + 5] = a[i + 5] - b[i + 5]; + r[i + 6] = a[i + 6] - b[i + 6]; + r[i + 7] = a[i + 7] - b[i + 7]; + } + r[16] = a[16] - b[16]; + r[17] = a[17] - b[17]; + + return 0; +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_2048_mul_18(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[18]; + sp_digit* a1 = z1; + sp_digit b1[9]; + sp_digit* z2 = r + 18; + (void)sp_2048_add_9(a1, a, &a[9]); + (void)sp_2048_add_9(b1, b, &b[9]); + sp_2048_mul_9(z2, &a[9], &b[9]); + sp_2048_mul_9(z0, a, b); + sp_2048_mul_9(z1, a1, b1); + (void)sp_2048_sub_18(z1, z1, z2); + (void)sp_2048_sub_18(z1, z1, z0); + (void)sp_2048_add_18(r + 9, r + 9, z1); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_2048_sqr_18(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z1[18]; + sp_digit* a1 = z1; + sp_digit* z2 = r + 18; + (void)sp_2048_add_9(a1, a, &a[9]); + sp_2048_sqr_9(z2, &a[9]); + sp_2048_sqr_9(z0, a); + sp_2048_sqr_9(z1, a1); + (void)sp_2048_sub_18(z1, z1, z2); + (void)sp_2048_sub_18(z1, z1, z0); + (void)sp_2048_add_18(r + 9, r + 9, z1); +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_2048_add_36(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 32; i += 8) { + r[i + 0] = a[i + 0] + b[i + 0]; + r[i + 1] = a[i + 1] + b[i + 1]; + r[i + 2] = a[i + 2] + b[i + 2]; + r[i + 3] = a[i + 3] + b[i + 3]; + r[i + 4] = a[i + 4] + b[i + 4]; + r[i + 5] = a[i + 5] + b[i + 5]; + r[i + 6] = a[i + 6] + b[i + 6]; + r[i + 7] = a[i + 7] + b[i + 7]; + } + r[32] = a[32] + b[32]; + r[33] = a[33] + b[33]; + r[34] = a[34] + b[34]; + r[35] = a[35] + b[35]; + + return 0; +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_2048_sub_36(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 32; i += 8) { + r[i + 0] = a[i + 0] - b[i + 0]; + r[i + 1] = a[i + 1] - b[i + 1]; + r[i + 2] = a[i + 2] - b[i + 2]; + r[i + 3] = a[i + 3] - b[i + 3]; + r[i + 4] = a[i + 4] - b[i + 4]; + r[i + 5] = a[i + 5] - b[i + 5]; + r[i + 6] = a[i + 6] - b[i + 6]; + r[i + 7] = a[i + 7] - b[i + 7]; + } + r[32] = a[32] - b[32]; + r[33] = a[33] - b[33]; + r[34] = a[34] - b[34]; + r[35] = a[35] - b[35]; + + return 0; +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_2048_mul_36(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[36]; + sp_digit* a1 = z1; + sp_digit b1[18]; + sp_digit* z2 = r + 36; + (void)sp_2048_add_18(a1, a, &a[18]); + (void)sp_2048_add_18(b1, b, &b[18]); + sp_2048_mul_18(z2, &a[18], &b[18]); + sp_2048_mul_18(z0, a, b); + sp_2048_mul_18(z1, a1, b1); + (void)sp_2048_sub_36(z1, z1, z2); + (void)sp_2048_sub_36(z1, z1, z0); + (void)sp_2048_add_36(r + 18, r + 18, z1); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_2048_sqr_36(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z1[36]; + sp_digit* a1 = z1; + sp_digit* z2 = r + 36; + (void)sp_2048_add_18(a1, a, &a[18]); + sp_2048_sqr_18(z2, &a[18]); + sp_2048_sqr_18(z0, a); + sp_2048_sqr_18(z1, a1); + (void)sp_2048_sub_36(z1, z1, z2); + (void)sp_2048_sub_36(z1, z1, z0); + (void)sp_2048_add_36(r + 18, r + 18, z1); +} + +#endif /* !WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_2048_add_36(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 36; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_2048_sub_36(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 36; i++) { + r[i] = a[i] - b[i]; + } + + return 0; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_2048_mul_36(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i, j, k; + int128_t c; + + c = ((int128_t)a[35]) * b[35]; + r[71] = (sp_digit)(c >> 57); + c = (c & 0x1ffffffffffffffL) << 57; + for (k = 69; k >= 0; k--) { + for (i = 35; i >= 0; i--) { + j = k - i; + if (j >= 36) { + break; + } + if (j < 0) { + continue; + } + + c += ((int128_t)a[i]) * b[j]; + } + r[k + 2] += c >> 114; + r[k + 1] = (c >> 57) & 0x1ffffffffffffffL; + c = (c & 0x1ffffffffffffffL) << 57; + } + r[0] = (sp_digit)(c >> 57); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_2048_sqr_36(sp_digit* r, const sp_digit* a) +{ + int i, j, k; + int128_t c; + + c = ((int128_t)a[35]) * a[35]; + r[71] = (sp_digit)(c >> 57); + c = (c & 0x1ffffffffffffffL) << 57; + for (k = 69; k >= 0; k--) { + for (i = 35; i >= 0; i--) { + j = k - i; + if (j >= 36 || i <= j) { + break; + } + if (j < 0) { + continue; + } + + c += ((int128_t)a[i]) * a[j] * 2; + } + if (i == j) { + c += ((int128_t)a[i]) * a[i]; + } + + r[k + 2] += c >> 114; + r[k + 1] = (c >> 57) & 0x1ffffffffffffffL; + c = (c & 0x1ffffffffffffffL) << 57; + } + r[0] = (sp_digit)(c >> 57); +} + +#endif /* WOLFSSL_SP_SMALL */ +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_2048_add_18(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 18; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_2048_sub_18(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 18; i++) { + r[i] = a[i] - b[i]; + } + + return 0; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_2048_mul_18(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i, j, k; + int128_t c; + + c = ((int128_t)a[17]) * b[17]; + r[35] = (sp_digit)(c >> 57); + c = (c & 0x1ffffffffffffffL) << 57; + for (k = 33; k >= 0; k--) { + for (i = 17; i >= 0; i--) { + j = k - i; + if (j >= 18) { + break; + } + if (j < 0) { + continue; + } + + c += ((int128_t)a[i]) * b[j]; + } + r[k + 2] += c >> 114; + r[k + 1] = (c >> 57) & 0x1ffffffffffffffL; + c = (c & 0x1ffffffffffffffL) << 57; + } + r[0] = (sp_digit)(c >> 57); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_2048_sqr_18(sp_digit* r, const sp_digit* a) +{ + int i, j, k; + int128_t c; + + c = ((int128_t)a[17]) * a[17]; + r[35] = (sp_digit)(c >> 57); + c = (c & 0x1ffffffffffffffL) << 57; + for (k = 33; k >= 0; k--) { + for (i = 17; i >= 0; i--) { + j = k - i; + if (j >= 18 || i <= j) { + break; + } + if (j < 0) { + continue; + } + + c += ((int128_t)a[i]) * a[j] * 2; + } + if (i == j) { + c += ((int128_t)a[i]) * a[i]; + } + + r[k + 2] += c >> 114; + r[k + 1] = (c >> 57) & 0x1ffffffffffffffL; + c = (c & 0x1ffffffffffffffL) << 57; + } + r[0] = (sp_digit)(c >> 57); +} + +#endif /* WOLFSSL_SP_SMALL */ +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +/* Caclulate the bottom digit of -1/a mod 2^n. + * + * a A single precision number. + * rho Bottom word of inverse. + */ +static void sp_2048_mont_setup(const sp_digit* a, sp_digit* rho) +{ + sp_digit x, b; + + b = a[0]; + x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - b * x; /* here x*a==1 mod 2**8 */ + x *= 2 - b * x; /* here x*a==1 mod 2**16 */ + x *= 2 - b * x; /* here x*a==1 mod 2**32 */ + x *= 2 - b * x; /* here x*a==1 mod 2**64 */ + x &= 0x1ffffffffffffffL; + + /* rho = -1/m mod b */ + *rho = (1L << 57) - x; +} + +/* Multiply a by scalar b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_2048_mul_d_36(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int128_t tb = b; + int128_t t = 0; + int i; + + for (i = 0; i < 36; i++) { + t += tb * a[i]; + r[i] = t & 0x1ffffffffffffffL; + t >>= 57; + } + r[36] = (sp_digit)t; +#else + int128_t tb = b; + int128_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] = t[0] & 0x1ffffffffffffffL; + for (i = 0; i < 32; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffL); + t[2] = tb * a[i+2]; + r[i+2] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffL); + t[3] = tb * a[i+3]; + r[i+3] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffL); + t[4] = tb * a[i+4]; + r[i+4] = (sp_digit)(t[3] >> 57) + (t[4] & 0x1ffffffffffffffL); + t[5] = tb * a[i+5]; + r[i+5] = (sp_digit)(t[4] >> 57) + (t[5] & 0x1ffffffffffffffL); + t[6] = tb * a[i+6]; + r[i+6] = (sp_digit)(t[5] >> 57) + (t[6] & 0x1ffffffffffffffL); + t[7] = tb * a[i+7]; + r[i+7] = (sp_digit)(t[6] >> 57) + (t[7] & 0x1ffffffffffffffL); + t[0] = tb * a[i+8]; + r[i+8] = (sp_digit)(t[7] >> 57) + (t[0] & 0x1ffffffffffffffL); + } + t[1] = tb * a[33]; + r[33] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffL); + t[2] = tb * a[34]; + r[34] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffL); + t[3] = tb * a[35]; + r[35] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffL); + r[36] = (sp_digit)(t[3] >> 57); +#endif /* WOLFSSL_SP_SMALL */ +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 2048 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_2048_mont_norm_18(sp_digit* r, const sp_digit* m) +{ + /* Set r = 2^n - 1. */ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<17; i++) { + r[i] = 0x1ffffffffffffffL; + } +#else + int i; + + for (i = 0; i < 16; i += 8) { + r[i + 0] = 0x1ffffffffffffffL; + r[i + 1] = 0x1ffffffffffffffL; + r[i + 2] = 0x1ffffffffffffffL; + r[i + 3] = 0x1ffffffffffffffL; + r[i + 4] = 0x1ffffffffffffffL; + r[i + 5] = 0x1ffffffffffffffL; + r[i + 6] = 0x1ffffffffffffffL; + r[i + 7] = 0x1ffffffffffffffL; + } + r[16] = 0x1ffffffffffffffL; +#endif + r[17] = 0x7fffffffffffffL; + + /* r = (2^n - 1) mod n */ + (void)sp_2048_sub_18(r, r, m); + + /* Add one so r = 2^n mod m */ + r[0] += 1; +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static sp_digit sp_2048_cmp_18(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=17; i>=0; i--) { + r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#else + int i; + + r |= (a[17] - b[17]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[16] - b[16]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + for (i = 8; i >= 0; i -= 8) { + r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#endif /* WOLFSSL_SP_SMALL */ + + return r; +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static void sp_2048_cond_sub_18(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 18; i++) { + r[i] = a[i] - (b[i] & m); + } +#else + int i; + + for (i = 0; i < 16; i += 8) { + r[i + 0] = a[i + 0] - (b[i + 0] & m); + r[i + 1] = a[i + 1] - (b[i + 1] & m); + r[i + 2] = a[i + 2] - (b[i + 2] & m); + r[i + 3] = a[i + 3] - (b[i + 3] & m); + r[i + 4] = a[i + 4] - (b[i + 4] & m); + r[i + 5] = a[i + 5] - (b[i + 5] & m); + r[i + 6] = a[i + 6] - (b[i + 6] & m); + r[i + 7] = a[i + 7] - (b[i + 7] & m); + } + r[16] = a[16] - (b[16] & m); + r[17] = a[17] - (b[17] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Mul a by scalar b and add into r. (r += a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_2048_mul_add_18(sp_digit* r, const sp_digit* a, + const sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int128_t tb = b; + int128_t t = 0; + int i; + + for (i = 0; i < 18; i++) { + t += (tb * a[i]) + r[i]; + r[i] = t & 0x1ffffffffffffffL; + t >>= 57; + } + r[18] += t; +#else + int128_t tb = b; + int128_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1ffffffffffffffL); + for (i = 0; i < 16; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL)); + t[2] = tb * a[i+2]; + r[i+2] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL)); + t[3] = tb * a[i+3]; + r[i+3] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL)); + t[4] = tb * a[i+4]; + r[i+4] += (sp_digit)((t[3] >> 57) + (t[4] & 0x1ffffffffffffffL)); + t[5] = tb * a[i+5]; + r[i+5] += (sp_digit)((t[4] >> 57) + (t[5] & 0x1ffffffffffffffL)); + t[6] = tb * a[i+6]; + r[i+6] += (sp_digit)((t[5] >> 57) + (t[6] & 0x1ffffffffffffffL)); + t[7] = tb * a[i+7]; + r[i+7] += (sp_digit)((t[6] >> 57) + (t[7] & 0x1ffffffffffffffL)); + t[0] = tb * a[i+8]; + r[i+8] += (sp_digit)((t[7] >> 57) + (t[0] & 0x1ffffffffffffffL)); + } + t[1] = tb * a[17]; r[17] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL)); + r[18] += (sp_digit)(t[1] >> 57); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Normalize the values in each word to 57. + * + * a Array of sp_digit to normalize. + */ +static void sp_2048_norm_18(sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + for (i = 0; i < 17; i++) { + a[i+1] += a[i] >> 57; + a[i] &= 0x1ffffffffffffffL; + } +#else + int i; + for (i = 0; i < 16; i += 8) { + a[i+1] += a[i+0] >> 57; a[i+0] &= 0x1ffffffffffffffL; + a[i+2] += a[i+1] >> 57; a[i+1] &= 0x1ffffffffffffffL; + a[i+3] += a[i+2] >> 57; a[i+2] &= 0x1ffffffffffffffL; + a[i+4] += a[i+3] >> 57; a[i+3] &= 0x1ffffffffffffffL; + a[i+5] += a[i+4] >> 57; a[i+4] &= 0x1ffffffffffffffL; + a[i+6] += a[i+5] >> 57; a[i+5] &= 0x1ffffffffffffffL; + a[i+7] += a[i+6] >> 57; a[i+6] &= 0x1ffffffffffffffL; + a[i+8] += a[i+7] >> 57; a[i+7] &= 0x1ffffffffffffffL; + a[i+9] += a[i+8] >> 57; a[i+8] &= 0x1ffffffffffffffL; + } + a[16+1] += a[16] >> 57; + a[16] &= 0x1ffffffffffffffL; +#endif +} + +/* Shift the result in the high 1024 bits down to the bottom. + * + * r A single precision number. + * a A single precision number. + */ +static void sp_2048_mont_shift_18(sp_digit* r, const sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + word64 n; + + n = a[17] >> 55; + for (i = 0; i < 17; i++) { + n += (word64)a[18 + i] << 2; + r[i] = n & 0x1ffffffffffffffL; + n >>= 57; + } + n += (word64)a[35] << 2; + r[17] = n; +#else + word64 n; + int i; + + n = (word64)a[17]; + n = n >> 55U; + for (i = 0; i < 16; i += 8) { + n += (word64)a[i+18] << 2U; r[i+0] = n & 0x1ffffffffffffffUL; n >>= 57U; + n += (word64)a[i+19] << 2U; r[i+1] = n & 0x1ffffffffffffffUL; n >>= 57U; + n += (word64)a[i+20] << 2U; r[i+2] = n & 0x1ffffffffffffffUL; n >>= 57U; + n += (word64)a[i+21] << 2U; r[i+3] = n & 0x1ffffffffffffffUL; n >>= 57U; + n += (word64)a[i+22] << 2U; r[i+4] = n & 0x1ffffffffffffffUL; n >>= 57U; + n += (word64)a[i+23] << 2U; r[i+5] = n & 0x1ffffffffffffffUL; n >>= 57U; + n += (word64)a[i+24] << 2U; r[i+6] = n & 0x1ffffffffffffffUL; n >>= 57U; + n += (word64)a[i+25] << 2U; r[i+7] = n & 0x1ffffffffffffffUL; n >>= 57U; + } + n += (word64)a[34] << 2U; r[16] = n & 0x1ffffffffffffffUL; n >>= 57U; + n += (word64)a[35] << 2U; r[17] = n; +#endif /* WOLFSSL_SP_SMALL */ + XMEMSET(&r[18], 0, sizeof(*r) * 18U); +} + +/* Reduce the number back to 2048 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static void sp_2048_mont_reduce_18(sp_digit* a, const sp_digit* m, sp_digit mp) +{ + int i; + sp_digit mu; + + sp_2048_norm_18(a + 18); + + for (i=0; i<17; i++) { + mu = (a[i] * mp) & 0x1ffffffffffffffL; + sp_2048_mul_add_18(a+i, m, mu); + a[i+1] += a[i] >> 57; + } + mu = (a[i] * mp) & 0x7fffffffffffffL; + sp_2048_mul_add_18(a+i, m, mu); + a[i+1] += a[i] >> 57; + a[i] &= 0x1ffffffffffffffL; + + sp_2048_mont_shift_18(a, a); + sp_2048_cond_sub_18(a, a, m, 0 - (((a[17] >> 55) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_2048_norm_18(a); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_2048_mont_mul_18(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_2048_mul_18(r, a, b); + sp_2048_mont_reduce_18(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_2048_mont_sqr_18(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_2048_sqr_18(r, a); + sp_2048_mont_reduce_18(r, m, mp); +} + +/* Multiply a by scalar b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_2048_mul_d_18(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int128_t tb = b; + int128_t t = 0; + int i; + + for (i = 0; i < 18; i++) { + t += tb * a[i]; + r[i] = t & 0x1ffffffffffffffL; + t >>= 57; + } + r[18] = (sp_digit)t; +#else + int128_t tb = b; + int128_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] = t[0] & 0x1ffffffffffffffL; + for (i = 0; i < 16; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffL); + t[2] = tb * a[i+2]; + r[i+2] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffL); + t[3] = tb * a[i+3]; + r[i+3] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffL); + t[4] = tb * a[i+4]; + r[i+4] = (sp_digit)(t[3] >> 57) + (t[4] & 0x1ffffffffffffffL); + t[5] = tb * a[i+5]; + r[i+5] = (sp_digit)(t[4] >> 57) + (t[5] & 0x1ffffffffffffffL); + t[6] = tb * a[i+6]; + r[i+6] = (sp_digit)(t[5] >> 57) + (t[6] & 0x1ffffffffffffffL); + t[7] = tb * a[i+7]; + r[i+7] = (sp_digit)(t[6] >> 57) + (t[7] & 0x1ffffffffffffffL); + t[0] = tb * a[i+8]; + r[i+8] = (sp_digit)(t[7] >> 57) + (t[0] & 0x1ffffffffffffffL); + } + t[1] = tb * a[17]; + r[17] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffL); + r[18] = (sp_digit)(t[1] >> 57); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static void sp_2048_cond_add_18(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 18; i++) { + r[i] = a[i] + (b[i] & m); + } +#else + int i; + + for (i = 0; i < 16; i += 8) { + r[i + 0] = a[i + 0] + (b[i + 0] & m); + r[i + 1] = a[i + 1] + (b[i + 1] & m); + r[i + 2] = a[i + 2] + (b[i + 2] & m); + r[i + 3] = a[i + 3] + (b[i + 3] & m); + r[i + 4] = a[i + 4] + (b[i + 4] & m); + r[i + 5] = a[i + 5] + (b[i + 5] & m); + r[i + 6] = a[i + 6] + (b[i + 6] & m); + r[i + 7] = a[i + 7] + (b[i + 7] & m); + } + r[16] = a[16] + (b[16] & m); + r[17] = a[17] + (b[17] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +#ifdef WOLFSSL_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_2048_sub_18(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 18; i++) { + r[i] = a[i] - b[i]; + } + + return 0; +} + +#endif +#ifdef WOLFSSL_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_2048_add_18(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 18; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#endif +#ifdef WOLFSSL_SP_DIV_64 +static WC_INLINE sp_digit sp_2048_div_word_18(sp_digit d1, sp_digit d0, + sp_digit dv) +{ + sp_digit d, r, t; + + /* All 57 bits from d1 and top 6 bits from d0. */ + d = (d1 << 6) | (d0 >> 51); + r = d / dv; + d -= r * dv; + /* Up to 7 bits in r */ + /* Next 6 bits from d0. */ + r <<= 6; + d <<= 6; + d |= (d0 >> 45) & ((1 << 6) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 13 bits in r */ + /* Next 6 bits from d0. */ + r <<= 6; + d <<= 6; + d |= (d0 >> 39) & ((1 << 6) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 19 bits in r */ + /* Next 6 bits from d0. */ + r <<= 6; + d <<= 6; + d |= (d0 >> 33) & ((1 << 6) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 25 bits in r */ + /* Next 6 bits from d0. */ + r <<= 6; + d <<= 6; + d |= (d0 >> 27) & ((1 << 6) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 31 bits in r */ + /* Next 6 bits from d0. */ + r <<= 6; + d <<= 6; + d |= (d0 >> 21) & ((1 << 6) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 37 bits in r */ + /* Next 6 bits from d0. */ + r <<= 6; + d <<= 6; + d |= (d0 >> 15) & ((1 << 6) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 43 bits in r */ + /* Next 6 bits from d0. */ + r <<= 6; + d <<= 6; + d |= (d0 >> 9) & ((1 << 6) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 49 bits in r */ + /* Next 6 bits from d0. */ + r <<= 6; + d <<= 6; + d |= (d0 >> 3) & ((1 << 6) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 55 bits in r */ + /* Remaining 3 bits from d0. */ + r <<= 3; + d <<= 3; + d |= d0 & ((1 << 3) - 1); + t = d / dv; + r += t; + + return r; +} +#endif /* WOLFSSL_SP_DIV_64 */ + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Number to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_2048_div_18(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + int i; +#ifndef WOLFSSL_SP_DIV_64 + int128_t d1; +#endif + sp_digit dv, r1; +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* td; +#else + sp_digit t1d[36], t2d[18 + 1]; +#endif + sp_digit* t1; + sp_digit* t2; + int err = MP_OKAY; + + (void)m; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 18 + 1), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = td; + t2 = td + 2 * 18; +#else + t1 = t1d; + t2 = t2d; +#endif + + dv = d[17]; + XMEMCPY(t1, a, sizeof(*t1) * 2U * 18U); + for (i=17; i>=0; i--) { + t1[18 + i] += t1[18 + i - 1] >> 57; + t1[18 + i - 1] &= 0x1ffffffffffffffL; +#ifndef WOLFSSL_SP_DIV_64 + d1 = t1[18 + i]; + d1 <<= 57; + d1 += t1[18 + i - 1]; + r1 = (sp_digit)(d1 / dv); +#else + r1 = sp_2048_div_word_18(t1[18 + i], t1[18 + i - 1], dv); +#endif + + sp_2048_mul_d_18(t2, d, r1); + (void)sp_2048_sub_18(&t1[i], &t1[i], t2); + t1[18 + i] -= t2[18]; + t1[18 + i] += t1[18 + i - 1] >> 57; + t1[18 + i - 1] &= 0x1ffffffffffffffL; + r1 = (((-t1[18 + i]) << 57) - t1[18 + i - 1]) / dv; + r1++; + sp_2048_mul_d_18(t2, d, r1); + (void)sp_2048_add_18(&t1[i], &t1[i], t2); + t1[18 + i] += t1[18 + i - 1] >> 57; + t1[18 + i - 1] &= 0x1ffffffffffffffL; + } + t1[18 - 1] += t1[18 - 2] >> 57; + t1[18 - 2] &= 0x1ffffffffffffffL; + r1 = t1[18 - 1] / dv; + + sp_2048_mul_d_18(t2, d, r1); + (void)sp_2048_sub_18(t1, t1, t2); + XMEMCPY(r, t1, sizeof(*r) * 2U * 18U); + for (i=0; i<16; i++) { + r[i+1] += r[i] >> 57; + r[i] &= 0x1ffffffffffffffL; + } + sp_2048_cond_add_18(r, r, d, 0 - ((r[17] < 0) ? + (sp_digit)1 : (sp_digit)0)); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_2048_mod_18(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_2048_div_18(a, m, NULL, r); +} + +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_2048_mod_exp_18(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, + const sp_digit* m, int reduceA) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* td; + sp_digit* t[3]; + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 18 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } + + if (err == MP_OKAY) { + XMEMSET(td, 0, sizeof(*td) * 3U * 18U * 2U); + + norm = t[0] = td; + t[1] = &td[18 * 2]; + t[2] = &td[2 * 18 * 2]; + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_18(norm, m); + + if (reduceA != 0) { + err = sp_2048_mod_18(t[1], a, m); + } + else { + XMEMCPY(t[1], a, sizeof(sp_digit) * 18U); + } + } + if (err == MP_OKAY) { + sp_2048_mul_18(t[1], t[1], norm); + err = sp_2048_mod_18(t[1], t[1], m); + } + + if (err == MP_OKAY) { + i = bits / 57; + c = bits % 57; + n = e[i--] << (57 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) { + break; + } + + n = e[i--]; + c = 57; + } + + y = (n >> 56) & 1; + n <<= 1; + + sp_2048_mont_mul_18(t[y^1], t[0], t[1], m, mp); + + XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), + sizeof(*t[2]) * 18 * 2); + sp_2048_mont_sqr_18(t[2], t[2], m, mp); + XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), t[2], + sizeof(*t[2]) * 18 * 2); + } + + sp_2048_mont_reduce_18(t[0], m, mp); + n = sp_2048_cmp_18(t[0], m); + sp_2048_cond_sub_18(t[0], t[0], m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, t[0], sizeof(*r) * 18 * 2); + + } + + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } + + return err; +#elif defined(WOLFSSL_SP_CACHE_RESISTANT) +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[3][36]; +#else + sp_digit* td; + sp_digit* t[3]; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 18 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + t[0] = td; + t[1] = &td[18 * 2]; + t[2] = &td[2 * 18 * 2]; +#endif + norm = t[0]; + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_18(norm, m); + + if (reduceA != 0) { + err = sp_2048_mod_18(t[1], a, m); + if (err == MP_OKAY) { + sp_2048_mul_18(t[1], t[1], norm); + err = sp_2048_mod_18(t[1], t[1], m); + } + } + else { + sp_2048_mul_18(t[1], a, norm); + err = sp_2048_mod_18(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + i = bits / 57; + c = bits % 57; + n = e[i--] << (57 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) { + break; + } + + n = e[i--]; + c = 57; + } + + y = (n >> 56) & 1; + n <<= 1; + + sp_2048_mont_mul_18(t[y^1], t[0], t[1], m, mp); + + XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), sizeof(t[2])); + sp_2048_mont_sqr_18(t[2], t[2], m, mp); + XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2])); + } + + sp_2048_mont_reduce_18(t[0], m, mp); + n = sp_2048_cmp_18(t[0], m); + sp_2048_cond_sub_18(t[0], t[0], m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, t[0], sizeof(t[0])); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][36]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit rt[36]; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 36, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) + t[i] = td + i * 36; +#endif + norm = t[0]; + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_18(norm, m); + + if (reduceA != 0) { + err = sp_2048_mod_18(t[1], a, m); + if (err == MP_OKAY) { + sp_2048_mul_18(t[1], t[1], norm); + err = sp_2048_mod_18(t[1], t[1], m); + } + } + else { + sp_2048_mul_18(t[1], a, norm); + err = sp_2048_mod_18(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_2048_mont_sqr_18(t[ 2], t[ 1], m, mp); + sp_2048_mont_mul_18(t[ 3], t[ 2], t[ 1], m, mp); + sp_2048_mont_sqr_18(t[ 4], t[ 2], m, mp); + sp_2048_mont_mul_18(t[ 5], t[ 3], t[ 2], m, mp); + sp_2048_mont_sqr_18(t[ 6], t[ 3], m, mp); + sp_2048_mont_mul_18(t[ 7], t[ 4], t[ 3], m, mp); + sp_2048_mont_sqr_18(t[ 8], t[ 4], m, mp); + sp_2048_mont_mul_18(t[ 9], t[ 5], t[ 4], m, mp); + sp_2048_mont_sqr_18(t[10], t[ 5], m, mp); + sp_2048_mont_mul_18(t[11], t[ 6], t[ 5], m, mp); + sp_2048_mont_sqr_18(t[12], t[ 6], m, mp); + sp_2048_mont_mul_18(t[13], t[ 7], t[ 6], m, mp); + sp_2048_mont_sqr_18(t[14], t[ 7], m, mp); + sp_2048_mont_mul_18(t[15], t[ 8], t[ 7], m, mp); + sp_2048_mont_sqr_18(t[16], t[ 8], m, mp); + sp_2048_mont_mul_18(t[17], t[ 9], t[ 8], m, mp); + sp_2048_mont_sqr_18(t[18], t[ 9], m, mp); + sp_2048_mont_mul_18(t[19], t[10], t[ 9], m, mp); + sp_2048_mont_sqr_18(t[20], t[10], m, mp); + sp_2048_mont_mul_18(t[21], t[11], t[10], m, mp); + sp_2048_mont_sqr_18(t[22], t[11], m, mp); + sp_2048_mont_mul_18(t[23], t[12], t[11], m, mp); + sp_2048_mont_sqr_18(t[24], t[12], m, mp); + sp_2048_mont_mul_18(t[25], t[13], t[12], m, mp); + sp_2048_mont_sqr_18(t[26], t[13], m, mp); + sp_2048_mont_mul_18(t[27], t[14], t[13], m, mp); + sp_2048_mont_sqr_18(t[28], t[14], m, mp); + sp_2048_mont_mul_18(t[29], t[15], t[14], m, mp); + sp_2048_mont_sqr_18(t[30], t[15], m, mp); + sp_2048_mont_mul_18(t[31], t[16], t[15], m, mp); + + bits = ((bits + 4) / 5) * 5; + i = ((bits + 56) / 57) - 1; + c = bits % 57; + if (c == 0) { + c = 57; + } + if (i < 18) { + n = e[i--] << (64 - c); + } + else { + n = 0; + i--; + } + if (c < 5) { + n |= e[i--] << (7 - c); + c += 57; + } + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + XMEMCPY(rt, t[y], sizeof(rt)); + for (; i>=0 || c>=5; ) { + if (c < 5) { + n |= e[i--] << (7 - c); + c += 57; + } + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + + sp_2048_mont_sqr_18(rt, rt, m, mp); + sp_2048_mont_sqr_18(rt, rt, m, mp); + sp_2048_mont_sqr_18(rt, rt, m, mp); + sp_2048_mont_sqr_18(rt, rt, m, mp); + sp_2048_mont_sqr_18(rt, rt, m, mp); + + sp_2048_mont_mul_18(rt, rt, t[y], m, mp); + } + + sp_2048_mont_reduce_18(rt, m, mp); + n = sp_2048_cmp_18(rt, m); + sp_2048_cond_sub_18(rt, rt, m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, rt, sizeof(rt)); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +#endif +} + +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 2048 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_2048_mont_norm_36(sp_digit* r, const sp_digit* m) +{ + /* Set r = 2^n - 1. */ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<35; i++) { + r[i] = 0x1ffffffffffffffL; + } +#else + int i; + + for (i = 0; i < 32; i += 8) { + r[i + 0] = 0x1ffffffffffffffL; + r[i + 1] = 0x1ffffffffffffffL; + r[i + 2] = 0x1ffffffffffffffL; + r[i + 3] = 0x1ffffffffffffffL; + r[i + 4] = 0x1ffffffffffffffL; + r[i + 5] = 0x1ffffffffffffffL; + r[i + 6] = 0x1ffffffffffffffL; + r[i + 7] = 0x1ffffffffffffffL; + } + r[32] = 0x1ffffffffffffffL; + r[33] = 0x1ffffffffffffffL; + r[34] = 0x1ffffffffffffffL; +#endif + r[35] = 0x1fffffffffffffL; + + /* r = (2^n - 1) mod n */ + (void)sp_2048_sub_36(r, r, m); + + /* Add one so r = 2^n mod m */ + r[0] += 1; +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static sp_digit sp_2048_cmp_36(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=35; i>=0; i--) { + r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#else + int i; + + r |= (a[35] - b[35]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[34] - b[34]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[33] - b[33]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[32] - b[32]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + for (i = 24; i >= 0; i -= 8) { + r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#endif /* WOLFSSL_SP_SMALL */ + + return r; +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static void sp_2048_cond_sub_36(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 36; i++) { + r[i] = a[i] - (b[i] & m); + } +#else + int i; + + for (i = 0; i < 32; i += 8) { + r[i + 0] = a[i + 0] - (b[i + 0] & m); + r[i + 1] = a[i + 1] - (b[i + 1] & m); + r[i + 2] = a[i + 2] - (b[i + 2] & m); + r[i + 3] = a[i + 3] - (b[i + 3] & m); + r[i + 4] = a[i + 4] - (b[i + 4] & m); + r[i + 5] = a[i + 5] - (b[i + 5] & m); + r[i + 6] = a[i + 6] - (b[i + 6] & m); + r[i + 7] = a[i + 7] - (b[i + 7] & m); + } + r[32] = a[32] - (b[32] & m); + r[33] = a[33] - (b[33] & m); + r[34] = a[34] - (b[34] & m); + r[35] = a[35] - (b[35] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Mul a by scalar b and add into r. (r += a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_2048_mul_add_36(sp_digit* r, const sp_digit* a, + const sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int128_t tb = b; + int128_t t = 0; + int i; + + for (i = 0; i < 36; i++) { + t += (tb * a[i]) + r[i]; + r[i] = t & 0x1ffffffffffffffL; + t >>= 57; + } + r[36] += t; +#else + int128_t tb = b; + int128_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1ffffffffffffffL); + for (i = 0; i < 32; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL)); + t[2] = tb * a[i+2]; + r[i+2] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL)); + t[3] = tb * a[i+3]; + r[i+3] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL)); + t[4] = tb * a[i+4]; + r[i+4] += (sp_digit)((t[3] >> 57) + (t[4] & 0x1ffffffffffffffL)); + t[5] = tb * a[i+5]; + r[i+5] += (sp_digit)((t[4] >> 57) + (t[5] & 0x1ffffffffffffffL)); + t[6] = tb * a[i+6]; + r[i+6] += (sp_digit)((t[5] >> 57) + (t[6] & 0x1ffffffffffffffL)); + t[7] = tb * a[i+7]; + r[i+7] += (sp_digit)((t[6] >> 57) + (t[7] & 0x1ffffffffffffffL)); + t[0] = tb * a[i+8]; + r[i+8] += (sp_digit)((t[7] >> 57) + (t[0] & 0x1ffffffffffffffL)); + } + t[1] = tb * a[33]; r[33] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL)); + t[2] = tb * a[34]; r[34] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL)); + t[3] = tb * a[35]; r[35] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL)); + r[36] += (sp_digit)(t[3] >> 57); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Normalize the values in each word to 57. + * + * a Array of sp_digit to normalize. + */ +static void sp_2048_norm_36(sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + for (i = 0; i < 35; i++) { + a[i+1] += a[i] >> 57; + a[i] &= 0x1ffffffffffffffL; + } +#else + int i; + for (i = 0; i < 32; i += 8) { + a[i+1] += a[i+0] >> 57; a[i+0] &= 0x1ffffffffffffffL; + a[i+2] += a[i+1] >> 57; a[i+1] &= 0x1ffffffffffffffL; + a[i+3] += a[i+2] >> 57; a[i+2] &= 0x1ffffffffffffffL; + a[i+4] += a[i+3] >> 57; a[i+3] &= 0x1ffffffffffffffL; + a[i+5] += a[i+4] >> 57; a[i+4] &= 0x1ffffffffffffffL; + a[i+6] += a[i+5] >> 57; a[i+5] &= 0x1ffffffffffffffL; + a[i+7] += a[i+6] >> 57; a[i+6] &= 0x1ffffffffffffffL; + a[i+8] += a[i+7] >> 57; a[i+7] &= 0x1ffffffffffffffL; + a[i+9] += a[i+8] >> 57; a[i+8] &= 0x1ffffffffffffffL; + } + a[32+1] += a[32] >> 57; + a[32] &= 0x1ffffffffffffffL; + a[33+1] += a[33] >> 57; + a[33] &= 0x1ffffffffffffffL; + a[34+1] += a[34] >> 57; + a[34] &= 0x1ffffffffffffffL; +#endif +} + +/* Shift the result in the high 2048 bits down to the bottom. + * + * r A single precision number. + * a A single precision number. + */ +static void sp_2048_mont_shift_36(sp_digit* r, const sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + sp_digit n, s; + + s = a[36]; + n = a[35] >> 53; + for (i = 0; i < 35; i++) { + n += (s & 0x1ffffffffffffffL) << 4; + r[i] = n & 0x1ffffffffffffffL; + n >>= 57; + s = a[37 + i] + (s >> 57); + } + n += s << 4; + r[35] = n; +#else + sp_digit n, s; + int i; + + s = a[36]; n = a[35] >> 53; + for (i = 0; i < 32; i += 8) { + n += (s & 0x1ffffffffffffffL) << 4; r[i+0] = n & 0x1ffffffffffffffL; + n >>= 57; s = a[i+37] + (s >> 57); + n += (s & 0x1ffffffffffffffL) << 4; r[i+1] = n & 0x1ffffffffffffffL; + n >>= 57; s = a[i+38] + (s >> 57); + n += (s & 0x1ffffffffffffffL) << 4; r[i+2] = n & 0x1ffffffffffffffL; + n >>= 57; s = a[i+39] + (s >> 57); + n += (s & 0x1ffffffffffffffL) << 4; r[i+3] = n & 0x1ffffffffffffffL; + n >>= 57; s = a[i+40] + (s >> 57); + n += (s & 0x1ffffffffffffffL) << 4; r[i+4] = n & 0x1ffffffffffffffL; + n >>= 57; s = a[i+41] + (s >> 57); + n += (s & 0x1ffffffffffffffL) << 4; r[i+5] = n & 0x1ffffffffffffffL; + n >>= 57; s = a[i+42] + (s >> 57); + n += (s & 0x1ffffffffffffffL) << 4; r[i+6] = n & 0x1ffffffffffffffL; + n >>= 57; s = a[i+43] + (s >> 57); + n += (s & 0x1ffffffffffffffL) << 4; r[i+7] = n & 0x1ffffffffffffffL; + n >>= 57; s = a[i+44] + (s >> 57); + } + n += (s & 0x1ffffffffffffffL) << 4; r[32] = n & 0x1ffffffffffffffL; + n >>= 57; s = a[69] + (s >> 57); + n += (s & 0x1ffffffffffffffL) << 4; r[33] = n & 0x1ffffffffffffffL; + n >>= 57; s = a[70] + (s >> 57); + n += (s & 0x1ffffffffffffffL) << 4; r[34] = n & 0x1ffffffffffffffL; + n >>= 57; s = a[71] + (s >> 57); + n += s << 4; r[35] = n; +#endif /* WOLFSSL_SP_SMALL */ + XMEMSET(&r[36], 0, sizeof(*r) * 36U); +} + +/* Reduce the number back to 2048 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static void sp_2048_mont_reduce_36(sp_digit* a, const sp_digit* m, sp_digit mp) +{ + int i; + sp_digit mu; + + sp_2048_norm_36(a + 36); + +#ifdef WOLFSSL_SP_DH + if (mp != 1) { + for (i=0; i<35; i++) { + mu = (a[i] * mp) & 0x1ffffffffffffffL; + sp_2048_mul_add_36(a+i, m, mu); + a[i+1] += a[i] >> 57; + } + mu = (a[i] * mp) & 0x1fffffffffffffL; + sp_2048_mul_add_36(a+i, m, mu); + a[i+1] += a[i] >> 57; + a[i] &= 0x1ffffffffffffffL; + } + else { + for (i=0; i<35; i++) { + mu = a[i] & 0x1ffffffffffffffL; + sp_2048_mul_add_36(a+i, m, mu); + a[i+1] += a[i] >> 57; + } + mu = a[i] & 0x1fffffffffffffL; + sp_2048_mul_add_36(a+i, m, mu); + a[i+1] += a[i] >> 57; + a[i] &= 0x1ffffffffffffffL; + } +#else + for (i=0; i<35; i++) { + mu = (a[i] * mp) & 0x1ffffffffffffffL; + sp_2048_mul_add_36(a+i, m, mu); + a[i+1] += a[i] >> 57; + } + mu = (a[i] * mp) & 0x1fffffffffffffL; + sp_2048_mul_add_36(a+i, m, mu); + a[i+1] += a[i] >> 57; + a[i] &= 0x1ffffffffffffffL; +#endif + + sp_2048_mont_shift_36(a, a); + sp_2048_cond_sub_36(a, a, m, 0 - (((a[35] >> 53) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_2048_norm_36(a); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_2048_mont_mul_36(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_2048_mul_36(r, a, b); + sp_2048_mont_reduce_36(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_2048_mont_sqr_36(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_2048_sqr_36(r, a); + sp_2048_mont_reduce_36(r, m, mp); +} + +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static void sp_2048_cond_add_36(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 36; i++) { + r[i] = a[i] + (b[i] & m); + } +#else + int i; + + for (i = 0; i < 32; i += 8) { + r[i + 0] = a[i + 0] + (b[i + 0] & m); + r[i + 1] = a[i + 1] + (b[i + 1] & m); + r[i + 2] = a[i + 2] + (b[i + 2] & m); + r[i + 3] = a[i + 3] + (b[i + 3] & m); + r[i + 4] = a[i + 4] + (b[i + 4] & m); + r[i + 5] = a[i + 5] + (b[i + 5] & m); + r[i + 6] = a[i + 6] + (b[i + 6] & m); + r[i + 7] = a[i + 7] + (b[i + 7] & m); + } + r[32] = a[32] + (b[32] & m); + r[33] = a[33] + (b[33] & m); + r[34] = a[34] + (b[34] & m); + r[35] = a[35] + (b[35] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +#ifdef WOLFSSL_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_2048_sub_36(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 36; i++) { + r[i] = a[i] - b[i]; + } + + return 0; +} + +#endif +#ifdef WOLFSSL_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_2048_add_36(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 36; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#endif +#ifdef WOLFSSL_SP_DIV_64 +static WC_INLINE sp_digit sp_2048_div_word_36(sp_digit d1, sp_digit d0, + sp_digit dv) +{ + sp_digit d, r, t; + + /* All 57 bits from d1 and top 6 bits from d0. */ + d = (d1 << 6) | (d0 >> 51); + r = d / dv; + d -= r * dv; + /* Up to 7 bits in r */ + /* Next 6 bits from d0. */ + r <<= 6; + d <<= 6; + d |= (d0 >> 45) & ((1 << 6) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 13 bits in r */ + /* Next 6 bits from d0. */ + r <<= 6; + d <<= 6; + d |= (d0 >> 39) & ((1 << 6) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 19 bits in r */ + /* Next 6 bits from d0. */ + r <<= 6; + d <<= 6; + d |= (d0 >> 33) & ((1 << 6) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 25 bits in r */ + /* Next 6 bits from d0. */ + r <<= 6; + d <<= 6; + d |= (d0 >> 27) & ((1 << 6) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 31 bits in r */ + /* Next 6 bits from d0. */ + r <<= 6; + d <<= 6; + d |= (d0 >> 21) & ((1 << 6) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 37 bits in r */ + /* Next 6 bits from d0. */ + r <<= 6; + d <<= 6; + d |= (d0 >> 15) & ((1 << 6) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 43 bits in r */ + /* Next 6 bits from d0. */ + r <<= 6; + d <<= 6; + d |= (d0 >> 9) & ((1 << 6) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 49 bits in r */ + /* Next 6 bits from d0. */ + r <<= 6; + d <<= 6; + d |= (d0 >> 3) & ((1 << 6) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 55 bits in r */ + /* Remaining 3 bits from d0. */ + r <<= 3; + d <<= 3; + d |= d0 & ((1 << 3) - 1); + t = d / dv; + r += t; + + return r; +} +#endif /* WOLFSSL_SP_DIV_64 */ + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Number to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_2048_div_36(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + int i; +#ifndef WOLFSSL_SP_DIV_64 + int128_t d1; +#endif + sp_digit dv, r1; +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* td; +#else + sp_digit t1d[72], t2d[36 + 1]; +#endif + sp_digit* t1; + sp_digit* t2; + int err = MP_OKAY; + + (void)m; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 36 + 1), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = td; + t2 = td + 2 * 36; +#else + t1 = t1d; + t2 = t2d; +#endif + + dv = d[35]; + XMEMCPY(t1, a, sizeof(*t1) * 2U * 36U); + for (i=35; i>=0; i--) { + t1[36 + i] += t1[36 + i - 1] >> 57; + t1[36 + i - 1] &= 0x1ffffffffffffffL; +#ifndef WOLFSSL_SP_DIV_64 + d1 = t1[36 + i]; + d1 <<= 57; + d1 += t1[36 + i - 1]; + r1 = (sp_digit)(d1 / dv); +#else + r1 = sp_2048_div_word_36(t1[36 + i], t1[36 + i - 1], dv); +#endif + + sp_2048_mul_d_36(t2, d, r1); + (void)sp_2048_sub_36(&t1[i], &t1[i], t2); + t1[36 + i] -= t2[36]; + t1[36 + i] += t1[36 + i - 1] >> 57; + t1[36 + i - 1] &= 0x1ffffffffffffffL; + r1 = (((-t1[36 + i]) << 57) - t1[36 + i - 1]) / dv; + r1++; + sp_2048_mul_d_36(t2, d, r1); + (void)sp_2048_add_36(&t1[i], &t1[i], t2); + t1[36 + i] += t1[36 + i - 1] >> 57; + t1[36 + i - 1] &= 0x1ffffffffffffffL; + } + t1[36 - 1] += t1[36 - 2] >> 57; + t1[36 - 2] &= 0x1ffffffffffffffL; + r1 = t1[36 - 1] / dv; + + sp_2048_mul_d_36(t2, d, r1); + (void)sp_2048_sub_36(t1, t1, t2); + XMEMCPY(r, t1, sizeof(*r) * 2U * 36U); + for (i=0; i<34; i++) { + r[i+1] += r[i] >> 57; + r[i] &= 0x1ffffffffffffffL; + } + sp_2048_cond_add_36(r, r, d, 0 - ((r[35] < 0) ? + (sp_digit)1 : (sp_digit)0)); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_2048_mod_36(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_2048_div_36(a, m, NULL, r); +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \ + defined(WOLFSSL_HAVE_SP_DH) +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_2048_mod_exp_36(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, + const sp_digit* m, int reduceA) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* td; + sp_digit* t[3]; + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 36 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } + + if (err == MP_OKAY) { + XMEMSET(td, 0, sizeof(*td) * 3U * 36U * 2U); + + norm = t[0] = td; + t[1] = &td[36 * 2]; + t[2] = &td[2 * 36 * 2]; + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_36(norm, m); + + if (reduceA != 0) { + err = sp_2048_mod_36(t[1], a, m); + } + else { + XMEMCPY(t[1], a, sizeof(sp_digit) * 36U); + } + } + if (err == MP_OKAY) { + sp_2048_mul_36(t[1], t[1], norm); + err = sp_2048_mod_36(t[1], t[1], m); + } + + if (err == MP_OKAY) { + i = bits / 57; + c = bits % 57; + n = e[i--] << (57 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) { + break; + } + + n = e[i--]; + c = 57; + } + + y = (n >> 56) & 1; + n <<= 1; + + sp_2048_mont_mul_36(t[y^1], t[0], t[1], m, mp); + + XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), + sizeof(*t[2]) * 36 * 2); + sp_2048_mont_sqr_36(t[2], t[2], m, mp); + XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), t[2], + sizeof(*t[2]) * 36 * 2); + } + + sp_2048_mont_reduce_36(t[0], m, mp); + n = sp_2048_cmp_36(t[0], m); + sp_2048_cond_sub_36(t[0], t[0], m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, t[0], sizeof(*r) * 36 * 2); + + } + + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } + + return err; +#elif defined(WOLFSSL_SP_CACHE_RESISTANT) +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[3][72]; +#else + sp_digit* td; + sp_digit* t[3]; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 36 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + t[0] = td; + t[1] = &td[36 * 2]; + t[2] = &td[2 * 36 * 2]; +#endif + norm = t[0]; + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_36(norm, m); + + if (reduceA != 0) { + err = sp_2048_mod_36(t[1], a, m); + if (err == MP_OKAY) { + sp_2048_mul_36(t[1], t[1], norm); + err = sp_2048_mod_36(t[1], t[1], m); + } + } + else { + sp_2048_mul_36(t[1], a, norm); + err = sp_2048_mod_36(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + i = bits / 57; + c = bits % 57; + n = e[i--] << (57 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) { + break; + } + + n = e[i--]; + c = 57; + } + + y = (n >> 56) & 1; + n <<= 1; + + sp_2048_mont_mul_36(t[y^1], t[0], t[1], m, mp); + + XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), sizeof(t[2])); + sp_2048_mont_sqr_36(t[2], t[2], m, mp); + XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2])); + } + + sp_2048_mont_reduce_36(t[0], m, mp); + n = sp_2048_cmp_36(t[0], m); + sp_2048_cond_sub_36(t[0], t[0], m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, t[0], sizeof(t[0])); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][72]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit rt[72]; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 72, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) + t[i] = td + i * 72; +#endif + norm = t[0]; + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_36(norm, m); + + if (reduceA != 0) { + err = sp_2048_mod_36(t[1], a, m); + if (err == MP_OKAY) { + sp_2048_mul_36(t[1], t[1], norm); + err = sp_2048_mod_36(t[1], t[1], m); + } + } + else { + sp_2048_mul_36(t[1], a, norm); + err = sp_2048_mod_36(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_2048_mont_sqr_36(t[ 2], t[ 1], m, mp); + sp_2048_mont_mul_36(t[ 3], t[ 2], t[ 1], m, mp); + sp_2048_mont_sqr_36(t[ 4], t[ 2], m, mp); + sp_2048_mont_mul_36(t[ 5], t[ 3], t[ 2], m, mp); + sp_2048_mont_sqr_36(t[ 6], t[ 3], m, mp); + sp_2048_mont_mul_36(t[ 7], t[ 4], t[ 3], m, mp); + sp_2048_mont_sqr_36(t[ 8], t[ 4], m, mp); + sp_2048_mont_mul_36(t[ 9], t[ 5], t[ 4], m, mp); + sp_2048_mont_sqr_36(t[10], t[ 5], m, mp); + sp_2048_mont_mul_36(t[11], t[ 6], t[ 5], m, mp); + sp_2048_mont_sqr_36(t[12], t[ 6], m, mp); + sp_2048_mont_mul_36(t[13], t[ 7], t[ 6], m, mp); + sp_2048_mont_sqr_36(t[14], t[ 7], m, mp); + sp_2048_mont_mul_36(t[15], t[ 8], t[ 7], m, mp); + sp_2048_mont_sqr_36(t[16], t[ 8], m, mp); + sp_2048_mont_mul_36(t[17], t[ 9], t[ 8], m, mp); + sp_2048_mont_sqr_36(t[18], t[ 9], m, mp); + sp_2048_mont_mul_36(t[19], t[10], t[ 9], m, mp); + sp_2048_mont_sqr_36(t[20], t[10], m, mp); + sp_2048_mont_mul_36(t[21], t[11], t[10], m, mp); + sp_2048_mont_sqr_36(t[22], t[11], m, mp); + sp_2048_mont_mul_36(t[23], t[12], t[11], m, mp); + sp_2048_mont_sqr_36(t[24], t[12], m, mp); + sp_2048_mont_mul_36(t[25], t[13], t[12], m, mp); + sp_2048_mont_sqr_36(t[26], t[13], m, mp); + sp_2048_mont_mul_36(t[27], t[14], t[13], m, mp); + sp_2048_mont_sqr_36(t[28], t[14], m, mp); + sp_2048_mont_mul_36(t[29], t[15], t[14], m, mp); + sp_2048_mont_sqr_36(t[30], t[15], m, mp); + sp_2048_mont_mul_36(t[31], t[16], t[15], m, mp); + + bits = ((bits + 4) / 5) * 5; + i = ((bits + 56) / 57) - 1; + c = bits % 57; + if (c == 0) { + c = 57; + } + if (i < 36) { + n = e[i--] << (64 - c); + } + else { + n = 0; + i--; + } + if (c < 5) { + n |= e[i--] << (7 - c); + c += 57; + } + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + XMEMCPY(rt, t[y], sizeof(rt)); + for (; i>=0 || c>=5; ) { + if (c < 5) { + n |= e[i--] << (7 - c); + c += 57; + } + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + + sp_2048_mont_sqr_36(rt, rt, m, mp); + sp_2048_mont_sqr_36(rt, rt, m, mp); + sp_2048_mont_sqr_36(rt, rt, m, mp); + sp_2048_mont_sqr_36(rt, rt, m, mp); + sp_2048_mont_sqr_36(rt, rt, m, mp); + + sp_2048_mont_mul_36(rt, rt, t[y], m, mp); + } + + sp_2048_mont_reduce_36(rt, m, mp); + n = sp_2048_cmp_36(rt, m); + sp_2048_cond_sub_36(rt, rt, m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, rt, sizeof(rt)); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +#endif +} +#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || */ + /* WOLFSSL_HAVE_SP_DH */ + +#ifdef WOLFSSL_HAVE_SP_RSA +/* RSA public key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * em Public exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 256 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPublic_2048(const byte* in, word32 inLen, mp_int* em, mp_int* mm, + byte* out, word32* outLen) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* d = NULL; + sp_digit* a; + sp_digit* m; + sp_digit* r; + sp_digit* norm; + sp_digit e[1] = {0}; + sp_digit mp; + int i; + int err = MP_OKAY; + + if (*outLen < 256U) { + err = MP_TO_E; + } + + if (err == MP_OKAY) { + if (mp_count_bits(em) > 57) { + err = MP_READ_E; + } + if (inLen > 256U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 2048) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 36 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + a = d; + r = a + 36 * 2; + m = r + 36 * 2; + norm = r; + + sp_2048_from_bin(a, 36, in, inLen); +#if DIGIT_BIT >= 57 + e[0] = (sp_digit)em->dp[0]; +#else + e[0] = (sp_digit)em->dp[0]; + if (em->used > 1) { + e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + } +#endif + if (e[0] == 0) { + err = MP_EXPTMOD_E; + } + } + + if (err == MP_OKAY) { + sp_2048_from_mp(m, 36, mm); + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_36(norm, m); + } + if (err == MP_OKAY) { + sp_2048_mul_36(a, a, norm); + err = sp_2048_mod_36(a, a, m); + } + if (err == MP_OKAY) { + for (i=56; i>=0; i--) { + if ((e[0] >> i) != 0) { + break; + } + } + + XMEMCPY(r, a, sizeof(sp_digit) * 36 * 2); + for (i--; i>=0; i--) { + sp_2048_mont_sqr_36(r, r, m, mp); + + if (((e[0] >> i) & 1) == 1) { + sp_2048_mont_mul_36(r, r, a, m, mp); + } + } + sp_2048_mont_reduce_36(r, m, mp); + mp = sp_2048_cmp_36(r, m); + sp_2048_cond_sub_36(r, r, m, ((mp < 0) ? + (sp_digit)1 : (sp_digit)0)- 1); + + sp_2048_to_bin(r, out); + *outLen = 256; + } + + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit ad[72], md[36], rd[72]; +#else + sp_digit* d = NULL; +#endif + sp_digit* a; + sp_digit* m; + sp_digit* r; + sp_digit e[1] = {0}; + int err = MP_OKAY; + + if (*outLen < 256U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(em) > 57) { + err = MP_READ_E; + } + if (inLen > 256U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 2048) { + err = MP_READ_E; + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 36 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) { + err = MEMORY_E; + } + } + + if (err == MP_OKAY) { + a = d; + r = a + 36 * 2; + m = r + 36 * 2; + } +#else + a = ad; + m = md; + r = rd; +#endif + + if (err == MP_OKAY) { + sp_2048_from_bin(a, 36, in, inLen); +#if DIGIT_BIT >= 57 + e[0] = (sp_digit)em->dp[0]; +#else + e[0] = (sp_digit)em->dp[0]; + if (em->used > 1) { + e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + } +#endif + if (e[0] == 0) { + err = MP_EXPTMOD_E; + } + } + if (err == MP_OKAY) { + sp_2048_from_mp(m, 36, mm); + + if (e[0] == 0x3) { + sp_2048_sqr_36(r, a); + err = sp_2048_mod_36(r, r, m); + if (err == MP_OKAY) { + sp_2048_mul_36(r, a, r); + err = sp_2048_mod_36(r, r, m); + } + } + else { + sp_digit* norm = r; + int i; + sp_digit mp; + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_36(norm, m); + + sp_2048_mul_36(a, a, norm); + err = sp_2048_mod_36(a, a, m); + + if (err == MP_OKAY) { + for (i=56; i>=0; i--) { + if ((e[0] >> i) != 0) { + break; + } + } + + XMEMCPY(r, a, sizeof(sp_digit) * 72U); + for (i--; i>=0; i--) { + sp_2048_mont_sqr_36(r, r, m, mp); + + if (((e[0] >> i) & 1) == 1) { + sp_2048_mont_mul_36(r, r, a, m, mp); + } + } + sp_2048_mont_reduce_36(r, m, mp); + mp = sp_2048_cmp_36(r, m); + sp_2048_cond_sub_36(r, r, m, ((mp < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + } + } + } + + if (err == MP_OKAY) { + sp_2048_to_bin(r, out); + *outLen = 256; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } +#endif + + return err; +#endif /* WOLFSSL_SP_SMALL */ +} + +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +#if !defined(SP_RSA_PRIVATE_EXP_D) && !defined(RSA_LOW_MEM) +#endif /* !SP_RSA_PRIVATE_EXP_D && !RSA_LOW_MEM */ +/* RSA private key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * pm First prime. + * qm Second prime. + * dpm First prime's CRT exponent. + * dqm Second prime's CRT exponent. + * qim Inverse of second prime mod p. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 256 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, + mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm, + byte* out, word32* outLen) +{ +#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* a; + sp_digit* d = NULL; + sp_digit* m; + sp_digit* r; + int err = MP_OKAY; + + (void)pm; + (void)qm; + (void)dpm; + (void)dqm; + (void)qim; + + if (*outLen < 256U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(dm) > 2048) { + err = MP_READ_E; + } + if (inLen > 256) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 2048) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 36 * 4, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) { + err = MEMORY_E; + } + } + if (err == MP_OKAY) { + a = d + 36; + m = a + 72; + r = a; + + sp_2048_from_bin(a, 36, in, inLen); + sp_2048_from_mp(d, 36, dm); + sp_2048_from_mp(m, 36, mm); + err = sp_2048_mod_exp_36(r, a, d, 2048, m, 0); + } + if (err == MP_OKAY) { + sp_2048_to_bin(r, out); + *outLen = 256; + } + + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 36); + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else + sp_digit a[72], d[36], m[36]; + sp_digit* r = a; + int err = MP_OKAY; + + (void)pm; + (void)qm; + (void)dpm; + (void)dqm; + (void)qim; + + if (*outLen < 256U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(dm) > 2048) { + err = MP_READ_E; + } + if (inLen > 256U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 2048) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_2048_from_bin(a, 36, in, inLen); + sp_2048_from_mp(d, 36, dm); + sp_2048_from_mp(m, 36, mm); + err = sp_2048_mod_exp_36(r, a, d, 2048, m, 0); + } + + if (err == MP_OKAY) { + sp_2048_to_bin(r, out); + *outLen = 256; + } + + XMEMSET(d, 0, sizeof(sp_digit) * 36); + + return err; +#endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */ +#else +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* t = NULL; + sp_digit* a; + sp_digit* p; + sp_digit* q; + sp_digit* dp; + sp_digit* dq; + sp_digit* qi; + sp_digit* tmpa; + sp_digit* tmpb; + sp_digit* r; + int err = MP_OKAY; + + (void)dm; + (void)mm; + + if (*outLen < 256U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (inLen > 256) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 2048) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 18 * 11, NULL, + DYNAMIC_TYPE_RSA); + if (t == NULL) { + err = MEMORY_E; + } + } + if (err == MP_OKAY) { + a = t; + p = a + 36 * 2; + q = p + 18; + qi = dq = dp = q + 18; + tmpa = qi + 18; + tmpb = tmpa + 36; + + r = t + 36; + + sp_2048_from_bin(a, 36, in, inLen); + sp_2048_from_mp(p, 18, pm); + sp_2048_from_mp(q, 18, qm); + sp_2048_from_mp(dp, 18, dpm); + err = sp_2048_mod_exp_18(tmpa, a, dp, 1024, p, 1); + } + if (err == MP_OKAY) { + sp_2048_from_mp(dq, 18, dqm); + err = sp_2048_mod_exp_18(tmpb, a, dq, 1024, q, 1); + } + if (err == MP_OKAY) { + (void)sp_2048_sub_18(tmpa, tmpa, tmpb); + sp_2048_cond_add_18(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[17] >> 63)); + sp_2048_cond_add_18(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[17] >> 63)); + + sp_2048_from_mp(qi, 18, qim); + sp_2048_mul_18(tmpa, tmpa, qi); + err = sp_2048_mod_18(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { + sp_2048_mul_18(tmpa, q, tmpa); + (void)sp_2048_add_36(r, tmpb, tmpa); + sp_2048_norm_36(r); + + sp_2048_to_bin(r, out); + *outLen = 256; + } + + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_digit) * 18 * 11); + XFREE(t, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else + sp_digit a[36 * 2]; + sp_digit p[18], q[18], dp[18], dq[18], qi[18]; + sp_digit tmpa[36], tmpb[36]; + sp_digit* r = a; + int err = MP_OKAY; + + (void)dm; + (void)mm; + + if (*outLen < 256U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (inLen > 256U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 2048) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_2048_from_bin(a, 36, in, inLen); + sp_2048_from_mp(p, 18, pm); + sp_2048_from_mp(q, 18, qm); + sp_2048_from_mp(dp, 18, dpm); + sp_2048_from_mp(dq, 18, dqm); + sp_2048_from_mp(qi, 18, qim); + + err = sp_2048_mod_exp_18(tmpa, a, dp, 1024, p, 1); + } + if (err == MP_OKAY) { + err = sp_2048_mod_exp_18(tmpb, a, dq, 1024, q, 1); + } + + if (err == MP_OKAY) { + (void)sp_2048_sub_18(tmpa, tmpa, tmpb); + sp_2048_cond_add_18(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[17] >> 63)); + sp_2048_cond_add_18(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[17] >> 63)); + sp_2048_mul_18(tmpa, tmpa, qi); + err = sp_2048_mod_18(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { + sp_2048_mul_18(tmpa, tmpa, q); + (void)sp_2048_add_36(r, tmpb, tmpa); + sp_2048_norm_36(r); + + sp_2048_to_bin(r, out); + *outLen = 256; + } + + XMEMSET(tmpa, 0, sizeof(tmpa)); + XMEMSET(tmpb, 0, sizeof(tmpb)); + XMEMSET(p, 0, sizeof(p)); + XMEMSET(q, 0, sizeof(q)); + XMEMSET(dp, 0, sizeof(dp)); + XMEMSET(dq, 0, sizeof(dq)); + XMEMSET(qi, 0, sizeof(qi)); + + return err; +#endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */ +#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */ +} + +#endif /* !WOLFSSL_RSA_PUBLIC_ONLY */ +#endif /* WOLFSSL_HAVE_SP_RSA */ +#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY)) +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_2048_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (2048 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 57 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 36); + r->used = 36; + mp_clamp(r); +#elif DIGIT_BIT < 57 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 36; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 57) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 57 - s; + } + r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 36; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 57 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 57 - s; + } + else { + s += 57; + } + } + r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ +#ifdef WOLFSSL_SP_SMALL + int err = MP_OKAY; + sp_digit* d = NULL; + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 2048) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 2048) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 2048) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 36 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) { + err = MEMORY_E; + } + } + + if (err == MP_OKAY) { + b = d; + e = b + 36 * 2; + m = e + 36; + r = b; + + sp_2048_from_mp(b, 36, base); + sp_2048_from_mp(e, 36, exp); + sp_2048_from_mp(m, 36, mod); + + err = sp_2048_mod_exp_36(r, b, e, mp_count_bits(exp), m, 0); + } + + if (err == MP_OKAY) { + err = sp_2048_to_mp(r, res); + } + + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 36U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit bd[72], ed[36], md[36]; +#else + sp_digit* d = NULL; +#endif + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + int err = MP_OKAY; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 2048) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 2048) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 2048) { + err = MP_READ_E; + } + } + +#ifdef WOLFSSL_SMALL_STACK + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 36 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + b = d; + e = b + 36 * 2; + m = e + 36; + r = b; + } +#else + r = b = bd; + e = ed; + m = md; +#endif + + if (err == MP_OKAY) { + sp_2048_from_mp(b, 36, base); + sp_2048_from_mp(e, 36, exp); + sp_2048_from_mp(m, 36, mod); + + err = sp_2048_mod_exp_36(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + err = sp_2048_to_mp(r, res); + } + + +#ifdef WOLFSSL_SMALL_STACK + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 36U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } +#else + XMEMSET(e, 0, sizeof(sp_digit) * 36U); +#endif + + return err; +#endif +} + +#ifdef WOLFSSL_HAVE_SP_DH + +#ifdef HAVE_FFDHE_2048 +SP_NOINLINE static void sp_2048_lshift_36(sp_digit* r, sp_digit* a, byte n) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + r[36] = a[35] >> (57 - n); + for (i=35; i>0; i--) { + r[i] = ((a[i] << n) | (a[i-1] >> (57 - n))) & 0x1ffffffffffffffL; + } +#else + sp_int_digit s, t; + + s = (sp_int_digit)a[35]; + r[36] = s >> (57U - n); + s = (sp_int_digit)(a[35]); t = (sp_int_digit)(a[34]); + r[35] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[34]); t = (sp_int_digit)(a[33]); + r[34] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[33]); t = (sp_int_digit)(a[32]); + r[33] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[32]); t = (sp_int_digit)(a[31]); + r[32] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[31]); t = (sp_int_digit)(a[30]); + r[31] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[30]); t = (sp_int_digit)(a[29]); + r[30] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[29]); t = (sp_int_digit)(a[28]); + r[29] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[28]); t = (sp_int_digit)(a[27]); + r[28] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[27]); t = (sp_int_digit)(a[26]); + r[27] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[26]); t = (sp_int_digit)(a[25]); + r[26] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[25]); t = (sp_int_digit)(a[24]); + r[25] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[24]); t = (sp_int_digit)(a[23]); + r[24] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[23]); t = (sp_int_digit)(a[22]); + r[23] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[22]); t = (sp_int_digit)(a[21]); + r[22] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[21]); t = (sp_int_digit)(a[20]); + r[21] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[20]); t = (sp_int_digit)(a[19]); + r[20] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[19]); t = (sp_int_digit)(a[18]); + r[19] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[18]); t = (sp_int_digit)(a[17]); + r[18] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[17]); t = (sp_int_digit)(a[16]); + r[17] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[16]); t = (sp_int_digit)(a[15]); + r[16] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[15]); t = (sp_int_digit)(a[14]); + r[15] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[14]); t = (sp_int_digit)(a[13]); + r[14] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[13]); t = (sp_int_digit)(a[12]); + r[13] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[12]); t = (sp_int_digit)(a[11]); + r[12] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[11]); t = (sp_int_digit)(a[10]); + r[11] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[10]); t = (sp_int_digit)(a[9]); + r[10] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[9]); t = (sp_int_digit)(a[8]); + r[9] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[8]); t = (sp_int_digit)(a[7]); + r[8] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[7]); t = (sp_int_digit)(a[6]); + r[7] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[6]); t = (sp_int_digit)(a[5]); + r[6] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[5]); t = (sp_int_digit)(a[4]); + r[5] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[4]); t = (sp_int_digit)(a[3]); + r[4] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[3]); t = (sp_int_digit)(a[2]); + r[3] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[2]); t = (sp_int_digit)(a[1]); + r[2] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[1]); t = (sp_int_digit)(a[0]); + r[1] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; +#endif + r[0] = (a[0] << n) & 0x1ffffffffffffffL; +} + +/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m) + * + * r A single precision number that is the result of the operation. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_2048_mod_exp_2_36(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit nd[72]; + sp_digit td[37]; +#else + sp_digit* td; +#endif + sp_digit* norm; + sp_digit* tmp; + sp_digit mp = 1; + sp_digit n, o; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 109, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + norm = td; + tmp = td + 72; + XMEMSET(td, 0, sizeof(sp_digit) * 109); +#else + norm = nd; + tmp = td; + XMEMSET(td, 0, sizeof(td)); +#endif + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_36(norm, m); + + bits = ((bits + 4) / 5) * 5; + i = ((bits + 56) / 57) - 1; + c = bits % 57; + if (c == 0) { + c = 57; + } + if (i < 36) { + n = e[i--] << (64 - c); + } + else { + n = 0; + i--; + } + if (c < 5) { + n |= e[i--] << (7 - c); + c += 57; + } + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + sp_2048_lshift_36(r, norm, y); + for (; i>=0 || c>=5; ) { + if (c < 5) { + n |= e[i--] << (7 - c); + c += 57; + } + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + + sp_2048_mont_sqr_36(r, r, m, mp); + sp_2048_mont_sqr_36(r, r, m, mp); + sp_2048_mont_sqr_36(r, r, m, mp); + sp_2048_mont_sqr_36(r, r, m, mp); + sp_2048_mont_sqr_36(r, r, m, mp); + + sp_2048_lshift_36(r, r, y); + sp_2048_mul_d_36(tmp, norm, (r[36] << 4) + (r[35] >> 53)); + r[36] = 0; + r[35] &= 0x1fffffffffffffL; + (void)sp_2048_add_36(r, r, tmp); + sp_2048_norm_36(r); + o = sp_2048_cmp_36(r, m); + sp_2048_cond_sub_36(r, r, m, ((o < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + } + + sp_2048_mont_reduce_36(r, m, mp); + n = sp_2048_cmp_36(r, m); + sp_2048_cond_sub_36(r, r, m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} + +#endif /* HAVE_FFDHE_2048 */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. + * exp Array of bytes that is the exponent. + * expLen Length of data, in bytes, in exponent. + * mod Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 256 bytes long. + * outLen Length, in bytes, of exponentiation result. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen, + mp_int* mod, byte* out, word32* outLen) +{ +#ifdef WOLFSSL_SP_SMALL + int err = MP_OKAY; + sp_digit* d = NULL; + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + word32 i; + + if (mp_count_bits(base) > 2048) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expLen > 256) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 2048) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 36 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) { + err = MEMORY_E; + } + } + + if (err == MP_OKAY) { + b = d; + e = b + 36 * 2; + m = e + 36; + r = b; + + sp_2048_from_mp(b, 36, base); + sp_2048_from_bin(e, 36, exp, expLen); + sp_2048_from_mp(m, 36, mod); + + #ifdef HAVE_FFDHE_2048 + if (base->used == 1 && base->dp[0] == 2 && + (m[35] >> 21) == 0xffffffffL) { + err = sp_2048_mod_exp_2_36(r, e, expLen * 8, m); + } + else + #endif + err = sp_2048_mod_exp_36(r, b, e, expLen * 8, m, 0); + } + + if (err == MP_OKAY) { + sp_2048_to_bin(r, out); + *outLen = 256; + for (i=0; i<256 && out[i] == 0; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + } + + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 36U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit bd[72], ed[36], md[36]; +#else + sp_digit* d = NULL; +#endif + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + word32 i; + int err = MP_OKAY; + + if (mp_count_bits(base) > 2048) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expLen > 256U) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 2048) { + err = MP_READ_E; + } + } +#ifdef WOLFSSL_SMALL_STACK + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 36 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + b = d; + e = b + 36 * 2; + m = e + 36; + r = b; + } +#else + r = b = bd; + e = ed; + m = md; +#endif + + if (err == MP_OKAY) { + sp_2048_from_mp(b, 36, base); + sp_2048_from_bin(e, 36, exp, expLen); + sp_2048_from_mp(m, 36, mod); + + #ifdef HAVE_FFDHE_2048 + if (base->used == 1 && base->dp[0] == 2U && + (m[35] >> 21) == 0xffffffffL) { + err = sp_2048_mod_exp_2_36(r, e, expLen * 8U, m); + } + else { + #endif + err = sp_2048_mod_exp_36(r, b, e, expLen * 8U, m, 0); + #ifdef HAVE_FFDHE_2048 + } + #endif + } + + if (err == MP_OKAY) { + sp_2048_to_bin(r, out); + *outLen = 256; + for (i=0; i<256U && out[i] == 0U; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + } + +#ifdef WOLFSSL_SMALL_STACK + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 36U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } +#else + XMEMSET(e, 0, sizeof(sp_digit) * 36U); +#endif + + return err; +#endif +} +#endif /* WOLFSSL_HAVE_SP_DH */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ +#ifdef WOLFSSL_SP_SMALL + int err = MP_OKAY; + sp_digit* d = NULL; + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 1024) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 1024) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 1024) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 18 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) { + err = MEMORY_E; + } + } + + if (err == MP_OKAY) { + b = d; + e = b + 18 * 2; + m = e + 18; + r = b; + + sp_2048_from_mp(b, 18, base); + sp_2048_from_mp(e, 18, exp); + sp_2048_from_mp(m, 18, mod); + + err = sp_2048_mod_exp_18(r, b, e, mp_count_bits(exp), m, 0); + } + + if (err == MP_OKAY) { + XMEMSET(r + 18, 0, sizeof(*r) * 18U); + err = sp_2048_to_mp(r, res); + } + + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 18U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit bd[36], ed[18], md[18]; +#else + sp_digit* d = NULL; +#endif + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + int err = MP_OKAY; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 1024) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 1024) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 1024) { + err = MP_READ_E; + } + } + +#ifdef WOLFSSL_SMALL_STACK + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 18 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + b = d; + e = b + 18 * 2; + m = e + 18; + r = b; + } +#else + r = b = bd; + e = ed; + m = md; +#endif + + if (err == MP_OKAY) { + sp_2048_from_mp(b, 18, base); + sp_2048_from_mp(e, 18, exp); + sp_2048_from_mp(m, 18, mod); + + err = sp_2048_mod_exp_18(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + XMEMSET(r + 18, 0, sizeof(*r) * 18U); + err = sp_2048_to_mp(r, res); + } + + +#ifdef WOLFSSL_SMALL_STACK + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 18U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } +#else + XMEMSET(e, 0, sizeof(sp_digit) * 18U); +#endif + + return err; +#endif +} + +#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */ + +#endif /* !WOLFSSL_SP_NO_2048 */ + +#ifndef WOLFSSL_SP_NO_3072 +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((sp_digit)a[i]) << s); + if (s >= 49U) { + r[j] &= 0x1ffffffffffffffL; + s = 57U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (sp_digit)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 57 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 57 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0x1ffffffffffffffL; + s = 57U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 57U) <= (word32)DIGIT_BIT) { + s += 57U; + r[j] &= 0x1ffffffffffffffL; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 57) { + r[j] &= 0x1ffffffffffffffL; + if (j + 1 >= size) { + break; + } + s = 57 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 384 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_3072_to_bin(sp_digit* r, byte* a) +{ + int i, j, s = 0, b; + + for (i=0; i<53; i++) { + r[i+1] += r[i] >> 57; + r[i] &= 0x1ffffffffffffffL; + } + j = 3072 / 8 - 1; + a[j] = 0; + for (i=0; i<54 && j>=0; i++) { + b = 0; + /* lint allow cast of mismatch sp_digit and int */ + a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ + b += 8 - s; + if (j < 0) { + break; + } + while (b < 57) { + a[j--] = (byte)(r[i] >> b); + b += 8; + if (j < 0) { + break; + } + } + s = 8 - (b - 57); + if (j >= 0) { + a[j] = 0; + } + if (s != 0) { + j++; + } + } +} + +#ifndef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_3072_mul_9(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int128_t t0 = ((int128_t)a[ 0]) * b[ 0]; + int128_t t1 = ((int128_t)a[ 0]) * b[ 1] + + ((int128_t)a[ 1]) * b[ 0]; + int128_t t2 = ((int128_t)a[ 0]) * b[ 2] + + ((int128_t)a[ 1]) * b[ 1] + + ((int128_t)a[ 2]) * b[ 0]; + int128_t t3 = ((int128_t)a[ 0]) * b[ 3] + + ((int128_t)a[ 1]) * b[ 2] + + ((int128_t)a[ 2]) * b[ 1] + + ((int128_t)a[ 3]) * b[ 0]; + int128_t t4 = ((int128_t)a[ 0]) * b[ 4] + + ((int128_t)a[ 1]) * b[ 3] + + ((int128_t)a[ 2]) * b[ 2] + + ((int128_t)a[ 3]) * b[ 1] + + ((int128_t)a[ 4]) * b[ 0]; + int128_t t5 = ((int128_t)a[ 0]) * b[ 5] + + ((int128_t)a[ 1]) * b[ 4] + + ((int128_t)a[ 2]) * b[ 3] + + ((int128_t)a[ 3]) * b[ 2] + + ((int128_t)a[ 4]) * b[ 1] + + ((int128_t)a[ 5]) * b[ 0]; + int128_t t6 = ((int128_t)a[ 0]) * b[ 6] + + ((int128_t)a[ 1]) * b[ 5] + + ((int128_t)a[ 2]) * b[ 4] + + ((int128_t)a[ 3]) * b[ 3] + + ((int128_t)a[ 4]) * b[ 2] + + ((int128_t)a[ 5]) * b[ 1] + + ((int128_t)a[ 6]) * b[ 0]; + int128_t t7 = ((int128_t)a[ 0]) * b[ 7] + + ((int128_t)a[ 1]) * b[ 6] + + ((int128_t)a[ 2]) * b[ 5] + + ((int128_t)a[ 3]) * b[ 4] + + ((int128_t)a[ 4]) * b[ 3] + + ((int128_t)a[ 5]) * b[ 2] + + ((int128_t)a[ 6]) * b[ 1] + + ((int128_t)a[ 7]) * b[ 0]; + int128_t t8 = ((int128_t)a[ 0]) * b[ 8] + + ((int128_t)a[ 1]) * b[ 7] + + ((int128_t)a[ 2]) * b[ 6] + + ((int128_t)a[ 3]) * b[ 5] + + ((int128_t)a[ 4]) * b[ 4] + + ((int128_t)a[ 5]) * b[ 3] + + ((int128_t)a[ 6]) * b[ 2] + + ((int128_t)a[ 7]) * b[ 1] + + ((int128_t)a[ 8]) * b[ 0]; + int128_t t9 = ((int128_t)a[ 1]) * b[ 8] + + ((int128_t)a[ 2]) * b[ 7] + + ((int128_t)a[ 3]) * b[ 6] + + ((int128_t)a[ 4]) * b[ 5] + + ((int128_t)a[ 5]) * b[ 4] + + ((int128_t)a[ 6]) * b[ 3] + + ((int128_t)a[ 7]) * b[ 2] + + ((int128_t)a[ 8]) * b[ 1]; + int128_t t10 = ((int128_t)a[ 2]) * b[ 8] + + ((int128_t)a[ 3]) * b[ 7] + + ((int128_t)a[ 4]) * b[ 6] + + ((int128_t)a[ 5]) * b[ 5] + + ((int128_t)a[ 6]) * b[ 4] + + ((int128_t)a[ 7]) * b[ 3] + + ((int128_t)a[ 8]) * b[ 2]; + int128_t t11 = ((int128_t)a[ 3]) * b[ 8] + + ((int128_t)a[ 4]) * b[ 7] + + ((int128_t)a[ 5]) * b[ 6] + + ((int128_t)a[ 6]) * b[ 5] + + ((int128_t)a[ 7]) * b[ 4] + + ((int128_t)a[ 8]) * b[ 3]; + int128_t t12 = ((int128_t)a[ 4]) * b[ 8] + + ((int128_t)a[ 5]) * b[ 7] + + ((int128_t)a[ 6]) * b[ 6] + + ((int128_t)a[ 7]) * b[ 5] + + ((int128_t)a[ 8]) * b[ 4]; + int128_t t13 = ((int128_t)a[ 5]) * b[ 8] + + ((int128_t)a[ 6]) * b[ 7] + + ((int128_t)a[ 7]) * b[ 6] + + ((int128_t)a[ 8]) * b[ 5]; + int128_t t14 = ((int128_t)a[ 6]) * b[ 8] + + ((int128_t)a[ 7]) * b[ 7] + + ((int128_t)a[ 8]) * b[ 6]; + int128_t t15 = ((int128_t)a[ 7]) * b[ 8] + + ((int128_t)a[ 8]) * b[ 7]; + int128_t t16 = ((int128_t)a[ 8]) * b[ 8]; + + t1 += t0 >> 57; r[ 0] = t0 & 0x1ffffffffffffffL; + t2 += t1 >> 57; r[ 1] = t1 & 0x1ffffffffffffffL; + t3 += t2 >> 57; r[ 2] = t2 & 0x1ffffffffffffffL; + t4 += t3 >> 57; r[ 3] = t3 & 0x1ffffffffffffffL; + t5 += t4 >> 57; r[ 4] = t4 & 0x1ffffffffffffffL; + t6 += t5 >> 57; r[ 5] = t5 & 0x1ffffffffffffffL; + t7 += t6 >> 57; r[ 6] = t6 & 0x1ffffffffffffffL; + t8 += t7 >> 57; r[ 7] = t7 & 0x1ffffffffffffffL; + t9 += t8 >> 57; r[ 8] = t8 & 0x1ffffffffffffffL; + t10 += t9 >> 57; r[ 9] = t9 & 0x1ffffffffffffffL; + t11 += t10 >> 57; r[10] = t10 & 0x1ffffffffffffffL; + t12 += t11 >> 57; r[11] = t11 & 0x1ffffffffffffffL; + t13 += t12 >> 57; r[12] = t12 & 0x1ffffffffffffffL; + t14 += t13 >> 57; r[13] = t13 & 0x1ffffffffffffffL; + t15 += t14 >> 57; r[14] = t14 & 0x1ffffffffffffffL; + t16 += t15 >> 57; r[15] = t15 & 0x1ffffffffffffffL; + r[17] = (sp_digit)(t16 >> 57); + r[16] = t16 & 0x1ffffffffffffffL; +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_3072_sqr_9(sp_digit* r, const sp_digit* a) +{ + int128_t t0 = ((int128_t)a[ 0]) * a[ 0]; + int128_t t1 = (((int128_t)a[ 0]) * a[ 1]) * 2; + int128_t t2 = (((int128_t)a[ 0]) * a[ 2]) * 2 + + ((int128_t)a[ 1]) * a[ 1]; + int128_t t3 = (((int128_t)a[ 0]) * a[ 3] + + ((int128_t)a[ 1]) * a[ 2]) * 2; + int128_t t4 = (((int128_t)a[ 0]) * a[ 4] + + ((int128_t)a[ 1]) * a[ 3]) * 2 + + ((int128_t)a[ 2]) * a[ 2]; + int128_t t5 = (((int128_t)a[ 0]) * a[ 5] + + ((int128_t)a[ 1]) * a[ 4] + + ((int128_t)a[ 2]) * a[ 3]) * 2; + int128_t t6 = (((int128_t)a[ 0]) * a[ 6] + + ((int128_t)a[ 1]) * a[ 5] + + ((int128_t)a[ 2]) * a[ 4]) * 2 + + ((int128_t)a[ 3]) * a[ 3]; + int128_t t7 = (((int128_t)a[ 0]) * a[ 7] + + ((int128_t)a[ 1]) * a[ 6] + + ((int128_t)a[ 2]) * a[ 5] + + ((int128_t)a[ 3]) * a[ 4]) * 2; + int128_t t8 = (((int128_t)a[ 0]) * a[ 8] + + ((int128_t)a[ 1]) * a[ 7] + + ((int128_t)a[ 2]) * a[ 6] + + ((int128_t)a[ 3]) * a[ 5]) * 2 + + ((int128_t)a[ 4]) * a[ 4]; + int128_t t9 = (((int128_t)a[ 1]) * a[ 8] + + ((int128_t)a[ 2]) * a[ 7] + + ((int128_t)a[ 3]) * a[ 6] + + ((int128_t)a[ 4]) * a[ 5]) * 2; + int128_t t10 = (((int128_t)a[ 2]) * a[ 8] + + ((int128_t)a[ 3]) * a[ 7] + + ((int128_t)a[ 4]) * a[ 6]) * 2 + + ((int128_t)a[ 5]) * a[ 5]; + int128_t t11 = (((int128_t)a[ 3]) * a[ 8] + + ((int128_t)a[ 4]) * a[ 7] + + ((int128_t)a[ 5]) * a[ 6]) * 2; + int128_t t12 = (((int128_t)a[ 4]) * a[ 8] + + ((int128_t)a[ 5]) * a[ 7]) * 2 + + ((int128_t)a[ 6]) * a[ 6]; + int128_t t13 = (((int128_t)a[ 5]) * a[ 8] + + ((int128_t)a[ 6]) * a[ 7]) * 2; + int128_t t14 = (((int128_t)a[ 6]) * a[ 8]) * 2 + + ((int128_t)a[ 7]) * a[ 7]; + int128_t t15 = (((int128_t)a[ 7]) * a[ 8]) * 2; + int128_t t16 = ((int128_t)a[ 8]) * a[ 8]; + + t1 += t0 >> 57; r[ 0] = t0 & 0x1ffffffffffffffL; + t2 += t1 >> 57; r[ 1] = t1 & 0x1ffffffffffffffL; + t3 += t2 >> 57; r[ 2] = t2 & 0x1ffffffffffffffL; + t4 += t3 >> 57; r[ 3] = t3 & 0x1ffffffffffffffL; + t5 += t4 >> 57; r[ 4] = t4 & 0x1ffffffffffffffL; + t6 += t5 >> 57; r[ 5] = t5 & 0x1ffffffffffffffL; + t7 += t6 >> 57; r[ 6] = t6 & 0x1ffffffffffffffL; + t8 += t7 >> 57; r[ 7] = t7 & 0x1ffffffffffffffL; + t9 += t8 >> 57; r[ 8] = t8 & 0x1ffffffffffffffL; + t10 += t9 >> 57; r[ 9] = t9 & 0x1ffffffffffffffL; + t11 += t10 >> 57; r[10] = t10 & 0x1ffffffffffffffL; + t12 += t11 >> 57; r[11] = t11 & 0x1ffffffffffffffL; + t13 += t12 >> 57; r[12] = t12 & 0x1ffffffffffffffL; + t14 += t13 >> 57; r[13] = t13 & 0x1ffffffffffffffL; + t15 += t14 >> 57; r[14] = t14 & 0x1ffffffffffffffL; + t16 += t15 >> 57; r[15] = t15 & 0x1ffffffffffffffL; + r[17] = (sp_digit)(t16 >> 57); + r[16] = t16 & 0x1ffffffffffffffL; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_3072_add_9(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + r[ 0] = a[ 0] + b[ 0]; + r[ 1] = a[ 1] + b[ 1]; + r[ 2] = a[ 2] + b[ 2]; + r[ 3] = a[ 3] + b[ 3]; + r[ 4] = a[ 4] + b[ 4]; + r[ 5] = a[ 5] + b[ 5]; + r[ 6] = a[ 6] + b[ 6]; + r[ 7] = a[ 7] + b[ 7]; + r[ 8] = a[ 8] + b[ 8]; + + return 0; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_3072_add_18(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 16; i += 8) { + r[i + 0] = a[i + 0] + b[i + 0]; + r[i + 1] = a[i + 1] + b[i + 1]; + r[i + 2] = a[i + 2] + b[i + 2]; + r[i + 3] = a[i + 3] + b[i + 3]; + r[i + 4] = a[i + 4] + b[i + 4]; + r[i + 5] = a[i + 5] + b[i + 5]; + r[i + 6] = a[i + 6] + b[i + 6]; + r[i + 7] = a[i + 7] + b[i + 7]; + } + r[16] = a[16] + b[16]; + r[17] = a[17] + b[17]; + + return 0; +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_3072_sub_18(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 16; i += 8) { + r[i + 0] = a[i + 0] - b[i + 0]; + r[i + 1] = a[i + 1] - b[i + 1]; + r[i + 2] = a[i + 2] - b[i + 2]; + r[i + 3] = a[i + 3] - b[i + 3]; + r[i + 4] = a[i + 4] - b[i + 4]; + r[i + 5] = a[i + 5] - b[i + 5]; + r[i + 6] = a[i + 6] - b[i + 6]; + r[i + 7] = a[i + 7] - b[i + 7]; + } + r[16] = a[16] - b[16]; + r[17] = a[17] - b[17]; + + return 0; +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_3072_mul_18(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[18]; + sp_digit* a1 = z1; + sp_digit b1[9]; + sp_digit* z2 = r + 18; + (void)sp_3072_add_9(a1, a, &a[9]); + (void)sp_3072_add_9(b1, b, &b[9]); + sp_3072_mul_9(z2, &a[9], &b[9]); + sp_3072_mul_9(z0, a, b); + sp_3072_mul_9(z1, a1, b1); + (void)sp_3072_sub_18(z1, z1, z2); + (void)sp_3072_sub_18(z1, z1, z0); + (void)sp_3072_add_18(r + 9, r + 9, z1); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_3072_sqr_18(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z1[18]; + sp_digit* a1 = z1; + sp_digit* z2 = r + 18; + (void)sp_3072_add_9(a1, a, &a[9]); + sp_3072_sqr_9(z2, &a[9]); + sp_3072_sqr_9(z0, a); + sp_3072_sqr_9(z1, a1); + (void)sp_3072_sub_18(z1, z1, z2); + (void)sp_3072_sub_18(z1, z1, z0); + (void)sp_3072_add_18(r + 9, r + 9, z1); +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_3072_sub_36(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 32; i += 8) { + r[i + 0] = a[i + 0] - b[i + 0]; + r[i + 1] = a[i + 1] - b[i + 1]; + r[i + 2] = a[i + 2] - b[i + 2]; + r[i + 3] = a[i + 3] - b[i + 3]; + r[i + 4] = a[i + 4] - b[i + 4]; + r[i + 5] = a[i + 5] - b[i + 5]; + r[i + 6] = a[i + 6] - b[i + 6]; + r[i + 7] = a[i + 7] - b[i + 7]; + } + r[32] = a[32] - b[32]; + r[33] = a[33] - b[33]; + r[34] = a[34] - b[34]; + r[35] = a[35] - b[35]; + + return 0; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_3072_add_36(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 32; i += 8) { + r[i + 0] = a[i + 0] + b[i + 0]; + r[i + 1] = a[i + 1] + b[i + 1]; + r[i + 2] = a[i + 2] + b[i + 2]; + r[i + 3] = a[i + 3] + b[i + 3]; + r[i + 4] = a[i + 4] + b[i + 4]; + r[i + 5] = a[i + 5] + b[i + 5]; + r[i + 6] = a[i + 6] + b[i + 6]; + r[i + 7] = a[i + 7] + b[i + 7]; + } + r[32] = a[32] + b[32]; + r[33] = a[33] + b[33]; + r[34] = a[34] + b[34]; + r[35] = a[35] + b[35]; + + return 0; +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_3072_mul_54(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit p0[36]; + sp_digit p1[36]; + sp_digit p2[36]; + sp_digit p3[36]; + sp_digit p4[36]; + sp_digit p5[36]; + sp_digit t0[36]; + sp_digit t1[36]; + sp_digit t2[36]; + sp_digit a0[18]; + sp_digit a1[18]; + sp_digit a2[18]; + sp_digit b0[18]; + sp_digit b1[18]; + sp_digit b2[18]; + (void)sp_3072_add_18(a0, a, &a[18]); + (void)sp_3072_add_18(b0, b, &b[18]); + (void)sp_3072_add_18(a1, &a[18], &a[36]); + (void)sp_3072_add_18(b1, &b[18], &b[36]); + (void)sp_3072_add_18(a2, a0, &a[36]); + (void)sp_3072_add_18(b2, b0, &b[36]); + sp_3072_mul_18(p0, a, b); + sp_3072_mul_18(p2, &a[18], &b[18]); + sp_3072_mul_18(p4, &a[36], &b[36]); + sp_3072_mul_18(p1, a0, b0); + sp_3072_mul_18(p3, a1, b1); + sp_3072_mul_18(p5, a2, b2); + XMEMSET(r, 0, sizeof(*r)*2U*54U); + (void)sp_3072_sub_36(t0, p3, p2); + (void)sp_3072_sub_36(t1, p1, p2); + (void)sp_3072_sub_36(t2, p5, t0); + (void)sp_3072_sub_36(t2, t2, t1); + (void)sp_3072_sub_36(t0, t0, p4); + (void)sp_3072_sub_36(t1, t1, p0); + (void)sp_3072_add_36(r, r, p0); + (void)sp_3072_add_36(&r[18], &r[18], t1); + (void)sp_3072_add_36(&r[36], &r[36], t2); + (void)sp_3072_add_36(&r[54], &r[54], t0); + (void)sp_3072_add_36(&r[72], &r[72], p4); +} + +/* Square a into r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_3072_sqr_54(sp_digit* r, const sp_digit* a) +{ + sp_digit p0[36]; + sp_digit p1[36]; + sp_digit p2[36]; + sp_digit p3[36]; + sp_digit p4[36]; + sp_digit p5[36]; + sp_digit t0[36]; + sp_digit t1[36]; + sp_digit t2[36]; + sp_digit a0[18]; + sp_digit a1[18]; + sp_digit a2[18]; + (void)sp_3072_add_18(a0, a, &a[18]); + (void)sp_3072_add_18(a1, &a[18], &a[36]); + (void)sp_3072_add_18(a2, a0, &a[36]); + sp_3072_sqr_18(p0, a); + sp_3072_sqr_18(p2, &a[18]); + sp_3072_sqr_18(p4, &a[36]); + sp_3072_sqr_18(p1, a0); + sp_3072_sqr_18(p3, a1); + sp_3072_sqr_18(p5, a2); + XMEMSET(r, 0, sizeof(*r)*2U*54U); + (void)sp_3072_sub_36(t0, p3, p2); + (void)sp_3072_sub_36(t1, p1, p2); + (void)sp_3072_sub_36(t2, p5, t0); + (void)sp_3072_sub_36(t2, t2, t1); + (void)sp_3072_sub_36(t0, t0, p4); + (void)sp_3072_sub_36(t1, t1, p0); + (void)sp_3072_add_36(r, r, p0); + (void)sp_3072_add_36(&r[18], &r[18], t1); + (void)sp_3072_add_36(&r[36], &r[36], t2); + (void)sp_3072_add_36(&r[54], &r[54], t0); + (void)sp_3072_add_36(&r[72], &r[72], p4); +} + +#endif /* !WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_3072_add_54(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 54; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#else +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_3072_add_54(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 48; i += 8) { + r[i + 0] = a[i + 0] + b[i + 0]; + r[i + 1] = a[i + 1] + b[i + 1]; + r[i + 2] = a[i + 2] + b[i + 2]; + r[i + 3] = a[i + 3] + b[i + 3]; + r[i + 4] = a[i + 4] + b[i + 4]; + r[i + 5] = a[i + 5] + b[i + 5]; + r[i + 6] = a[i + 6] + b[i + 6]; + r[i + 7] = a[i + 7] + b[i + 7]; + } + r[48] = a[48] + b[48]; + r[49] = a[49] + b[49]; + r[50] = a[50] + b[50]; + r[51] = a[51] + b[51]; + r[52] = a[52] + b[52]; + r[53] = a[53] + b[53]; + + return 0; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_3072_sub_54(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 54; i++) { + r[i] = a[i] - b[i]; + } + + return 0; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_3072_sub_54(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 48; i += 8) { + r[i + 0] = a[i + 0] - b[i + 0]; + r[i + 1] = a[i + 1] - b[i + 1]; + r[i + 2] = a[i + 2] - b[i + 2]; + r[i + 3] = a[i + 3] - b[i + 3]; + r[i + 4] = a[i + 4] - b[i + 4]; + r[i + 5] = a[i + 5] - b[i + 5]; + r[i + 6] = a[i + 6] - b[i + 6]; + r[i + 7] = a[i + 7] - b[i + 7]; + } + r[48] = a[48] - b[48]; + r[49] = a[49] - b[49]; + r[50] = a[50] - b[50]; + r[51] = a[51] - b[51]; + r[52] = a[52] - b[52]; + r[53] = a[53] - b[53]; + + return 0; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_3072_mul_54(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i, j, k; + int128_t c; + + c = ((int128_t)a[53]) * b[53]; + r[107] = (sp_digit)(c >> 57); + c = (c & 0x1ffffffffffffffL) << 57; + for (k = 105; k >= 0; k--) { + for (i = 53; i >= 0; i--) { + j = k - i; + if (j >= 54) { + break; + } + if (j < 0) { + continue; + } + + c += ((int128_t)a[i]) * b[j]; + } + r[k + 2] += c >> 114; + r[k + 1] = (c >> 57) & 0x1ffffffffffffffL; + c = (c & 0x1ffffffffffffffL) << 57; + } + r[0] = (sp_digit)(c >> 57); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_3072_sqr_54(sp_digit* r, const sp_digit* a) +{ + int i, j, k; + int128_t c; + + c = ((int128_t)a[53]) * a[53]; + r[107] = (sp_digit)(c >> 57); + c = (c & 0x1ffffffffffffffL) << 57; + for (k = 105; k >= 0; k--) { + for (i = 53; i >= 0; i--) { + j = k - i; + if (j >= 54 || i <= j) { + break; + } + if (j < 0) { + continue; + } + + c += ((int128_t)a[i]) * a[j] * 2; + } + if (i == j) { + c += ((int128_t)a[i]) * a[i]; + } + + r[k + 2] += c >> 114; + r[k + 1] = (c >> 57) & 0x1ffffffffffffffL; + c = (c & 0x1ffffffffffffffL) << 57; + } + r[0] = (sp_digit)(c >> 57); +} + +#endif /* WOLFSSL_SP_SMALL */ +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_3072_add_27(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 27; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#else +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_3072_add_27(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 24; i += 8) { + r[i + 0] = a[i + 0] + b[i + 0]; + r[i + 1] = a[i + 1] + b[i + 1]; + r[i + 2] = a[i + 2] + b[i + 2]; + r[i + 3] = a[i + 3] + b[i + 3]; + r[i + 4] = a[i + 4] + b[i + 4]; + r[i + 5] = a[i + 5] + b[i + 5]; + r[i + 6] = a[i + 6] + b[i + 6]; + r[i + 7] = a[i + 7] + b[i + 7]; + } + r[24] = a[24] + b[24]; + r[25] = a[25] + b[25]; + r[26] = a[26] + b[26]; + + return 0; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_3072_sub_27(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 27; i++) { + r[i] = a[i] - b[i]; + } + + return 0; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_3072_sub_27(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 24; i += 8) { + r[i + 0] = a[i + 0] - b[i + 0]; + r[i + 1] = a[i + 1] - b[i + 1]; + r[i + 2] = a[i + 2] - b[i + 2]; + r[i + 3] = a[i + 3] - b[i + 3]; + r[i + 4] = a[i + 4] - b[i + 4]; + r[i + 5] = a[i + 5] - b[i + 5]; + r[i + 6] = a[i + 6] - b[i + 6]; + r[i + 7] = a[i + 7] - b[i + 7]; + } + r[24] = a[24] - b[24]; + r[25] = a[25] - b[25]; + r[26] = a[26] - b[26]; + + return 0; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_3072_mul_27(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i, j, k; + int128_t c; + + c = ((int128_t)a[26]) * b[26]; + r[53] = (sp_digit)(c >> 57); + c = (c & 0x1ffffffffffffffL) << 57; + for (k = 51; k >= 0; k--) { + for (i = 26; i >= 0; i--) { + j = k - i; + if (j >= 27) { + break; + } + if (j < 0) { + continue; + } + + c += ((int128_t)a[i]) * b[j]; + } + r[k + 2] += c >> 114; + r[k + 1] = (c >> 57) & 0x1ffffffffffffffL; + c = (c & 0x1ffffffffffffffL) << 57; + } + r[0] = (sp_digit)(c >> 57); +} + +#else +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_3072_mul_27(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i, j; + int128_t t[54]; + + XMEMSET(t, 0, sizeof(t)); + for (i=0; i<27; i++) { + for (j=0; j<27; j++) { + t[i+j] += ((int128_t)a[i]) * b[j]; + } + } + for (i=0; i<53; i++) { + r[i] = t[i] & 0x1ffffffffffffffL; + t[i+1] += t[i] >> 57; + } + r[53] = (sp_digit)t[53]; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_3072_sqr_27(sp_digit* r, const sp_digit* a) +{ + int i, j, k; + int128_t c; + + c = ((int128_t)a[26]) * a[26]; + r[53] = (sp_digit)(c >> 57); + c = (c & 0x1ffffffffffffffL) << 57; + for (k = 51; k >= 0; k--) { + for (i = 26; i >= 0; i--) { + j = k - i; + if (j >= 27 || i <= j) { + break; + } + if (j < 0) { + continue; + } + + c += ((int128_t)a[i]) * a[j] * 2; + } + if (i == j) { + c += ((int128_t)a[i]) * a[i]; + } + + r[k + 2] += c >> 114; + r[k + 1] = (c >> 57) & 0x1ffffffffffffffL; + c = (c & 0x1ffffffffffffffL) << 57; + } + r[0] = (sp_digit)(c >> 57); +} + +#else +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_3072_sqr_27(sp_digit* r, const sp_digit* a) +{ + int i, j; + int128_t t[54]; + + XMEMSET(t, 0, sizeof(t)); + for (i=0; i<27; i++) { + for (j=0; j> 57; + } + r[53] = (sp_digit)t[53]; +} + +#endif /* WOLFSSL_SP_SMALL */ +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +/* Caclulate the bottom digit of -1/a mod 2^n. + * + * a A single precision number. + * rho Bottom word of inverse. + */ +static void sp_3072_mont_setup(const sp_digit* a, sp_digit* rho) +{ + sp_digit x, b; + + b = a[0]; + x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - b * x; /* here x*a==1 mod 2**8 */ + x *= 2 - b * x; /* here x*a==1 mod 2**16 */ + x *= 2 - b * x; /* here x*a==1 mod 2**32 */ + x *= 2 - b * x; /* here x*a==1 mod 2**64 */ + x &= 0x1ffffffffffffffL; + + /* rho = -1/m mod b */ + *rho = (1L << 57) - x; +} + +/* Multiply a by scalar b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_3072_mul_d_54(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int128_t tb = b; + int128_t t = 0; + int i; + + for (i = 0; i < 54; i++) { + t += tb * a[i]; + r[i] = t & 0x1ffffffffffffffL; + t >>= 57; + } + r[54] = (sp_digit)t; +#else + int128_t tb = b; + int128_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] = t[0] & 0x1ffffffffffffffL; + for (i = 0; i < 48; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffL); + t[2] = tb * a[i+2]; + r[i+2] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffL); + t[3] = tb * a[i+3]; + r[i+3] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffL); + t[4] = tb * a[i+4]; + r[i+4] = (sp_digit)(t[3] >> 57) + (t[4] & 0x1ffffffffffffffL); + t[5] = tb * a[i+5]; + r[i+5] = (sp_digit)(t[4] >> 57) + (t[5] & 0x1ffffffffffffffL); + t[6] = tb * a[i+6]; + r[i+6] = (sp_digit)(t[5] >> 57) + (t[6] & 0x1ffffffffffffffL); + t[7] = tb * a[i+7]; + r[i+7] = (sp_digit)(t[6] >> 57) + (t[7] & 0x1ffffffffffffffL); + t[0] = tb * a[i+8]; + r[i+8] = (sp_digit)(t[7] >> 57) + (t[0] & 0x1ffffffffffffffL); + } + t[1] = tb * a[49]; + r[49] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffL); + t[2] = tb * a[50]; + r[50] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffL); + t[3] = tb * a[51]; + r[51] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffL); + t[4] = tb * a[52]; + r[52] = (sp_digit)(t[3] >> 57) + (t[4] & 0x1ffffffffffffffL); + t[5] = tb * a[53]; + r[53] = (sp_digit)(t[4] >> 57) + (t[5] & 0x1ffffffffffffffL); + r[54] = (sp_digit)(t[5] >> 57); +#endif /* WOLFSSL_SP_SMALL */ +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 3072 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_3072_mont_norm_27(sp_digit* r, const sp_digit* m) +{ + /* Set r = 2^n - 1. */ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<26; i++) { + r[i] = 0x1ffffffffffffffL; + } +#else + int i; + + for (i = 0; i < 24; i += 8) { + r[i + 0] = 0x1ffffffffffffffL; + r[i + 1] = 0x1ffffffffffffffL; + r[i + 2] = 0x1ffffffffffffffL; + r[i + 3] = 0x1ffffffffffffffL; + r[i + 4] = 0x1ffffffffffffffL; + r[i + 5] = 0x1ffffffffffffffL; + r[i + 6] = 0x1ffffffffffffffL; + r[i + 7] = 0x1ffffffffffffffL; + } + r[24] = 0x1ffffffffffffffL; + r[25] = 0x1ffffffffffffffL; +#endif + r[26] = 0x3fffffffffffffL; + + /* r = (2^n - 1) mod n */ + (void)sp_3072_sub_27(r, r, m); + + /* Add one so r = 2^n mod m */ + r[0] += 1; +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static sp_digit sp_3072_cmp_27(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=26; i>=0; i--) { + r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#else + int i; + + r |= (a[26] - b[26]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[25] - b[25]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[24] - b[24]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + for (i = 16; i >= 0; i -= 8) { + r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#endif /* WOLFSSL_SP_SMALL */ + + return r; +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static void sp_3072_cond_sub_27(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 27; i++) { + r[i] = a[i] - (b[i] & m); + } +#else + int i; + + for (i = 0; i < 24; i += 8) { + r[i + 0] = a[i + 0] - (b[i + 0] & m); + r[i + 1] = a[i + 1] - (b[i + 1] & m); + r[i + 2] = a[i + 2] - (b[i + 2] & m); + r[i + 3] = a[i + 3] - (b[i + 3] & m); + r[i + 4] = a[i + 4] - (b[i + 4] & m); + r[i + 5] = a[i + 5] - (b[i + 5] & m); + r[i + 6] = a[i + 6] - (b[i + 6] & m); + r[i + 7] = a[i + 7] - (b[i + 7] & m); + } + r[24] = a[24] - (b[24] & m); + r[25] = a[25] - (b[25] & m); + r[26] = a[26] - (b[26] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Mul a by scalar b and add into r. (r += a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_3072_mul_add_27(sp_digit* r, const sp_digit* a, + const sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int128_t tb = b; + int128_t t = 0; + int i; + + for (i = 0; i < 27; i++) { + t += (tb * a[i]) + r[i]; + r[i] = t & 0x1ffffffffffffffL; + t >>= 57; + } + r[27] += t; +#else + int128_t tb = b; + int128_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1ffffffffffffffL); + for (i = 0; i < 24; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL)); + t[2] = tb * a[i+2]; + r[i+2] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL)); + t[3] = tb * a[i+3]; + r[i+3] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL)); + t[4] = tb * a[i+4]; + r[i+4] += (sp_digit)((t[3] >> 57) + (t[4] & 0x1ffffffffffffffL)); + t[5] = tb * a[i+5]; + r[i+5] += (sp_digit)((t[4] >> 57) + (t[5] & 0x1ffffffffffffffL)); + t[6] = tb * a[i+6]; + r[i+6] += (sp_digit)((t[5] >> 57) + (t[6] & 0x1ffffffffffffffL)); + t[7] = tb * a[i+7]; + r[i+7] += (sp_digit)((t[6] >> 57) + (t[7] & 0x1ffffffffffffffL)); + t[0] = tb * a[i+8]; + r[i+8] += (sp_digit)((t[7] >> 57) + (t[0] & 0x1ffffffffffffffL)); + } + t[1] = tb * a[25]; r[25] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL)); + t[2] = tb * a[26]; r[26] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL)); + r[27] += (sp_digit)(t[2] >> 57); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Normalize the values in each word to 57. + * + * a Array of sp_digit to normalize. + */ +static void sp_3072_norm_27(sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + for (i = 0; i < 26; i++) { + a[i+1] += a[i] >> 57; + a[i] &= 0x1ffffffffffffffL; + } +#else + int i; + for (i = 0; i < 24; i += 8) { + a[i+1] += a[i+0] >> 57; a[i+0] &= 0x1ffffffffffffffL; + a[i+2] += a[i+1] >> 57; a[i+1] &= 0x1ffffffffffffffL; + a[i+3] += a[i+2] >> 57; a[i+2] &= 0x1ffffffffffffffL; + a[i+4] += a[i+3] >> 57; a[i+3] &= 0x1ffffffffffffffL; + a[i+5] += a[i+4] >> 57; a[i+4] &= 0x1ffffffffffffffL; + a[i+6] += a[i+5] >> 57; a[i+5] &= 0x1ffffffffffffffL; + a[i+7] += a[i+6] >> 57; a[i+6] &= 0x1ffffffffffffffL; + a[i+8] += a[i+7] >> 57; a[i+7] &= 0x1ffffffffffffffL; + a[i+9] += a[i+8] >> 57; a[i+8] &= 0x1ffffffffffffffL; + } + a[24+1] += a[24] >> 57; + a[24] &= 0x1ffffffffffffffL; + a[25+1] += a[25] >> 57; + a[25] &= 0x1ffffffffffffffL; +#endif +} + +/* Shift the result in the high 1536 bits down to the bottom. + * + * r A single precision number. + * a A single precision number. + */ +static void sp_3072_mont_shift_27(sp_digit* r, const sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + sp_digit n, s; + + s = a[27]; + n = a[26] >> 54; + for (i = 0; i < 26; i++) { + n += (s & 0x1ffffffffffffffL) << 3; + r[i] = n & 0x1ffffffffffffffL; + n >>= 57; + s = a[28 + i] + (s >> 57); + } + n += s << 3; + r[26] = n; +#else + sp_digit n, s; + int i; + + s = a[27]; n = a[26] >> 54; + for (i = 0; i < 24; i += 8) { + n += (s & 0x1ffffffffffffffL) << 3; r[i+0] = n & 0x1ffffffffffffffL; + n >>= 57; s = a[i+28] + (s >> 57); + n += (s & 0x1ffffffffffffffL) << 3; r[i+1] = n & 0x1ffffffffffffffL; + n >>= 57; s = a[i+29] + (s >> 57); + n += (s & 0x1ffffffffffffffL) << 3; r[i+2] = n & 0x1ffffffffffffffL; + n >>= 57; s = a[i+30] + (s >> 57); + n += (s & 0x1ffffffffffffffL) << 3; r[i+3] = n & 0x1ffffffffffffffL; + n >>= 57; s = a[i+31] + (s >> 57); + n += (s & 0x1ffffffffffffffL) << 3; r[i+4] = n & 0x1ffffffffffffffL; + n >>= 57; s = a[i+32] + (s >> 57); + n += (s & 0x1ffffffffffffffL) << 3; r[i+5] = n & 0x1ffffffffffffffL; + n >>= 57; s = a[i+33] + (s >> 57); + n += (s & 0x1ffffffffffffffL) << 3; r[i+6] = n & 0x1ffffffffffffffL; + n >>= 57; s = a[i+34] + (s >> 57); + n += (s & 0x1ffffffffffffffL) << 3; r[i+7] = n & 0x1ffffffffffffffL; + n >>= 57; s = a[i+35] + (s >> 57); + } + n += (s & 0x1ffffffffffffffL) << 3; r[24] = n & 0x1ffffffffffffffL; + n >>= 57; s = a[52] + (s >> 57); + n += (s & 0x1ffffffffffffffL) << 3; r[25] = n & 0x1ffffffffffffffL; + n >>= 57; s = a[53] + (s >> 57); + n += s << 3; r[26] = n; +#endif /* WOLFSSL_SP_SMALL */ + XMEMSET(&r[27], 0, sizeof(*r) * 27U); +} + +/* Reduce the number back to 3072 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static void sp_3072_mont_reduce_27(sp_digit* a, const sp_digit* m, sp_digit mp) +{ + int i; + sp_digit mu; + + sp_3072_norm_27(a + 27); + + for (i=0; i<26; i++) { + mu = (a[i] * mp) & 0x1ffffffffffffffL; + sp_3072_mul_add_27(a+i, m, mu); + a[i+1] += a[i] >> 57; + } + mu = (a[i] * mp) & 0x3fffffffffffffL; + sp_3072_mul_add_27(a+i, m, mu); + a[i+1] += a[i] >> 57; + a[i] &= 0x1ffffffffffffffL; + + sp_3072_mont_shift_27(a, a); + sp_3072_cond_sub_27(a, a, m, 0 - (((a[26] >> 54) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_3072_norm_27(a); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_3072_mont_mul_27(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_3072_mul_27(r, a, b); + sp_3072_mont_reduce_27(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_3072_mont_sqr_27(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_3072_sqr_27(r, a); + sp_3072_mont_reduce_27(r, m, mp); +} + +/* Multiply a by scalar b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_3072_mul_d_27(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int128_t tb = b; + int128_t t = 0; + int i; + + for (i = 0; i < 27; i++) { + t += tb * a[i]; + r[i] = t & 0x1ffffffffffffffL; + t >>= 57; + } + r[27] = (sp_digit)t; +#else + int128_t tb = b; + int128_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] = t[0] & 0x1ffffffffffffffL; + for (i = 0; i < 24; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffL); + t[2] = tb * a[i+2]; + r[i+2] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffL); + t[3] = tb * a[i+3]; + r[i+3] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffL); + t[4] = tb * a[i+4]; + r[i+4] = (sp_digit)(t[3] >> 57) + (t[4] & 0x1ffffffffffffffL); + t[5] = tb * a[i+5]; + r[i+5] = (sp_digit)(t[4] >> 57) + (t[5] & 0x1ffffffffffffffL); + t[6] = tb * a[i+6]; + r[i+6] = (sp_digit)(t[5] >> 57) + (t[6] & 0x1ffffffffffffffL); + t[7] = tb * a[i+7]; + r[i+7] = (sp_digit)(t[6] >> 57) + (t[7] & 0x1ffffffffffffffL); + t[0] = tb * a[i+8]; + r[i+8] = (sp_digit)(t[7] >> 57) + (t[0] & 0x1ffffffffffffffL); + } + t[1] = tb * a[25]; + r[25] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffL); + t[2] = tb * a[26]; + r[26] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffL); + r[27] = (sp_digit)(t[2] >> 57); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static void sp_3072_cond_add_27(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 27; i++) { + r[i] = a[i] + (b[i] & m); + } +#else + int i; + + for (i = 0; i < 24; i += 8) { + r[i + 0] = a[i + 0] + (b[i + 0] & m); + r[i + 1] = a[i + 1] + (b[i + 1] & m); + r[i + 2] = a[i + 2] + (b[i + 2] & m); + r[i + 3] = a[i + 3] + (b[i + 3] & m); + r[i + 4] = a[i + 4] + (b[i + 4] & m); + r[i + 5] = a[i + 5] + (b[i + 5] & m); + r[i + 6] = a[i + 6] + (b[i + 6] & m); + r[i + 7] = a[i + 7] + (b[i + 7] & m); + } + r[24] = a[24] + (b[24] & m); + r[25] = a[25] + (b[25] & m); + r[26] = a[26] + (b[26] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +#ifdef WOLFSSL_SP_DIV_64 +static WC_INLINE sp_digit sp_3072_div_word_27(sp_digit d1, sp_digit d0, + sp_digit dv) +{ + sp_digit d, r, t; + + /* All 57 bits from d1 and top 6 bits from d0. */ + d = (d1 << 6) | (d0 >> 51); + r = d / dv; + d -= r * dv; + /* Up to 7 bits in r */ + /* Next 6 bits from d0. */ + r <<= 6; + d <<= 6; + d |= (d0 >> 45) & ((1 << 6) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 13 bits in r */ + /* Next 6 bits from d0. */ + r <<= 6; + d <<= 6; + d |= (d0 >> 39) & ((1 << 6) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 19 bits in r */ + /* Next 6 bits from d0. */ + r <<= 6; + d <<= 6; + d |= (d0 >> 33) & ((1 << 6) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 25 bits in r */ + /* Next 6 bits from d0. */ + r <<= 6; + d <<= 6; + d |= (d0 >> 27) & ((1 << 6) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 31 bits in r */ + /* Next 6 bits from d0. */ + r <<= 6; + d <<= 6; + d |= (d0 >> 21) & ((1 << 6) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 37 bits in r */ + /* Next 6 bits from d0. */ + r <<= 6; + d <<= 6; + d |= (d0 >> 15) & ((1 << 6) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 43 bits in r */ + /* Next 6 bits from d0. */ + r <<= 6; + d <<= 6; + d |= (d0 >> 9) & ((1 << 6) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 49 bits in r */ + /* Next 6 bits from d0. */ + r <<= 6; + d <<= 6; + d |= (d0 >> 3) & ((1 << 6) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 55 bits in r */ + /* Remaining 3 bits from d0. */ + r <<= 3; + d <<= 3; + d |= d0 & ((1 << 3) - 1); + t = d / dv; + r += t; + + return r; +} +#endif /* WOLFSSL_SP_DIV_64 */ + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Number to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_3072_div_27(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + int i; +#ifndef WOLFSSL_SP_DIV_64 + int128_t d1; +#endif + sp_digit dv, r1; +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* td; +#else + sp_digit t1d[54], t2d[27 + 1]; +#endif + sp_digit* t1; + sp_digit* t2; + int err = MP_OKAY; + + (void)m; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 27 + 1), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = td; + t2 = td + 2 * 27; +#else + t1 = t1d; + t2 = t2d; +#endif + + dv = d[26]; + XMEMCPY(t1, a, sizeof(*t1) * 2U * 27U); + for (i=26; i>=0; i--) { + t1[27 + i] += t1[27 + i - 1] >> 57; + t1[27 + i - 1] &= 0x1ffffffffffffffL; +#ifndef WOLFSSL_SP_DIV_64 + d1 = t1[27 + i]; + d1 <<= 57; + d1 += t1[27 + i - 1]; + r1 = (sp_digit)(d1 / dv); +#else + r1 = sp_3072_div_word_27(t1[27 + i], t1[27 + i - 1], dv); +#endif + + sp_3072_mul_d_27(t2, d, r1); + (void)sp_3072_sub_27(&t1[i], &t1[i], t2); + t1[27 + i] -= t2[27]; + t1[27 + i] += t1[27 + i - 1] >> 57; + t1[27 + i - 1] &= 0x1ffffffffffffffL; + r1 = (((-t1[27 + i]) << 57) - t1[27 + i - 1]) / dv; + r1++; + sp_3072_mul_d_27(t2, d, r1); + (void)sp_3072_add_27(&t1[i], &t1[i], t2); + t1[27 + i] += t1[27 + i - 1] >> 57; + t1[27 + i - 1] &= 0x1ffffffffffffffL; + } + t1[27 - 1] += t1[27 - 2] >> 57; + t1[27 - 2] &= 0x1ffffffffffffffL; + r1 = t1[27 - 1] / dv; + + sp_3072_mul_d_27(t2, d, r1); + (void)sp_3072_sub_27(t1, t1, t2); + XMEMCPY(r, t1, sizeof(*r) * 2U * 27U); + for (i=0; i<25; i++) { + r[i+1] += r[i] >> 57; + r[i] &= 0x1ffffffffffffffL; + } + sp_3072_cond_add_27(r, r, d, 0 - ((r[26] < 0) ? + (sp_digit)1 : (sp_digit)0)); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_3072_mod_27(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_3072_div_27(a, m, NULL, r); +} + +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_3072_mod_exp_27(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, + const sp_digit* m, int reduceA) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* td; + sp_digit* t[3]; + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 27 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } + + if (err == MP_OKAY) { + XMEMSET(td, 0, sizeof(*td) * 3U * 27U * 2U); + + norm = t[0] = td; + t[1] = &td[27 * 2]; + t[2] = &td[2 * 27 * 2]; + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_27(norm, m); + + if (reduceA != 0) { + err = sp_3072_mod_27(t[1], a, m); + } + else { + XMEMCPY(t[1], a, sizeof(sp_digit) * 27U); + } + } + if (err == MP_OKAY) { + sp_3072_mul_27(t[1], t[1], norm); + err = sp_3072_mod_27(t[1], t[1], m); + } + + if (err == MP_OKAY) { + i = bits / 57; + c = bits % 57; + n = e[i--] << (57 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) { + break; + } + + n = e[i--]; + c = 57; + } + + y = (n >> 56) & 1; + n <<= 1; + + sp_3072_mont_mul_27(t[y^1], t[0], t[1], m, mp); + + XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), + sizeof(*t[2]) * 27 * 2); + sp_3072_mont_sqr_27(t[2], t[2], m, mp); + XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), t[2], + sizeof(*t[2]) * 27 * 2); + } + + sp_3072_mont_reduce_27(t[0], m, mp); + n = sp_3072_cmp_27(t[0], m); + sp_3072_cond_sub_27(t[0], t[0], m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, t[0], sizeof(*r) * 27 * 2); + + } + + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } + + return err; +#elif defined(WOLFSSL_SP_CACHE_RESISTANT) +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[3][54]; +#else + sp_digit* td; + sp_digit* t[3]; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 27 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + t[0] = td; + t[1] = &td[27 * 2]; + t[2] = &td[2 * 27 * 2]; +#endif + norm = t[0]; + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_27(norm, m); + + if (reduceA != 0) { + err = sp_3072_mod_27(t[1], a, m); + if (err == MP_OKAY) { + sp_3072_mul_27(t[1], t[1], norm); + err = sp_3072_mod_27(t[1], t[1], m); + } + } + else { + sp_3072_mul_27(t[1], a, norm); + err = sp_3072_mod_27(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + i = bits / 57; + c = bits % 57; + n = e[i--] << (57 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) { + break; + } + + n = e[i--]; + c = 57; + } + + y = (n >> 56) & 1; + n <<= 1; + + sp_3072_mont_mul_27(t[y^1], t[0], t[1], m, mp); + + XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), sizeof(t[2])); + sp_3072_mont_sqr_27(t[2], t[2], m, mp); + XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2])); + } + + sp_3072_mont_reduce_27(t[0], m, mp); + n = sp_3072_cmp_27(t[0], m); + sp_3072_cond_sub_27(t[0], t[0], m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, t[0], sizeof(t[0])); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][54]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit rt[54]; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 54, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) + t[i] = td + i * 54; +#endif + norm = t[0]; + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_27(norm, m); + + if (reduceA != 0) { + err = sp_3072_mod_27(t[1], a, m); + if (err == MP_OKAY) { + sp_3072_mul_27(t[1], t[1], norm); + err = sp_3072_mod_27(t[1], t[1], m); + } + } + else { + sp_3072_mul_27(t[1], a, norm); + err = sp_3072_mod_27(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_3072_mont_sqr_27(t[ 2], t[ 1], m, mp); + sp_3072_mont_mul_27(t[ 3], t[ 2], t[ 1], m, mp); + sp_3072_mont_sqr_27(t[ 4], t[ 2], m, mp); + sp_3072_mont_mul_27(t[ 5], t[ 3], t[ 2], m, mp); + sp_3072_mont_sqr_27(t[ 6], t[ 3], m, mp); + sp_3072_mont_mul_27(t[ 7], t[ 4], t[ 3], m, mp); + sp_3072_mont_sqr_27(t[ 8], t[ 4], m, mp); + sp_3072_mont_mul_27(t[ 9], t[ 5], t[ 4], m, mp); + sp_3072_mont_sqr_27(t[10], t[ 5], m, mp); + sp_3072_mont_mul_27(t[11], t[ 6], t[ 5], m, mp); + sp_3072_mont_sqr_27(t[12], t[ 6], m, mp); + sp_3072_mont_mul_27(t[13], t[ 7], t[ 6], m, mp); + sp_3072_mont_sqr_27(t[14], t[ 7], m, mp); + sp_3072_mont_mul_27(t[15], t[ 8], t[ 7], m, mp); + sp_3072_mont_sqr_27(t[16], t[ 8], m, mp); + sp_3072_mont_mul_27(t[17], t[ 9], t[ 8], m, mp); + sp_3072_mont_sqr_27(t[18], t[ 9], m, mp); + sp_3072_mont_mul_27(t[19], t[10], t[ 9], m, mp); + sp_3072_mont_sqr_27(t[20], t[10], m, mp); + sp_3072_mont_mul_27(t[21], t[11], t[10], m, mp); + sp_3072_mont_sqr_27(t[22], t[11], m, mp); + sp_3072_mont_mul_27(t[23], t[12], t[11], m, mp); + sp_3072_mont_sqr_27(t[24], t[12], m, mp); + sp_3072_mont_mul_27(t[25], t[13], t[12], m, mp); + sp_3072_mont_sqr_27(t[26], t[13], m, mp); + sp_3072_mont_mul_27(t[27], t[14], t[13], m, mp); + sp_3072_mont_sqr_27(t[28], t[14], m, mp); + sp_3072_mont_mul_27(t[29], t[15], t[14], m, mp); + sp_3072_mont_sqr_27(t[30], t[15], m, mp); + sp_3072_mont_mul_27(t[31], t[16], t[15], m, mp); + + bits = ((bits + 4) / 5) * 5; + i = ((bits + 56) / 57) - 1; + c = bits % 57; + if (c == 0) { + c = 57; + } + if (i < 27) { + n = e[i--] << (64 - c); + } + else { + n = 0; + i--; + } + if (c < 5) { + n |= e[i--] << (7 - c); + c += 57; + } + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + XMEMCPY(rt, t[y], sizeof(rt)); + for (; i>=0 || c>=5; ) { + if (c < 5) { + n |= e[i--] << (7 - c); + c += 57; + } + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + + sp_3072_mont_sqr_27(rt, rt, m, mp); + sp_3072_mont_sqr_27(rt, rt, m, mp); + sp_3072_mont_sqr_27(rt, rt, m, mp); + sp_3072_mont_sqr_27(rt, rt, m, mp); + sp_3072_mont_sqr_27(rt, rt, m, mp); + + sp_3072_mont_mul_27(rt, rt, t[y], m, mp); + } + + sp_3072_mont_reduce_27(rt, m, mp); + n = sp_3072_cmp_27(rt, m); + sp_3072_cond_sub_27(rt, rt, m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, rt, sizeof(rt)); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +#endif +} + +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 3072 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_3072_mont_norm_54(sp_digit* r, const sp_digit* m) +{ + /* Set r = 2^n - 1. */ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<53; i++) { + r[i] = 0x1ffffffffffffffL; + } +#else + int i; + + for (i = 0; i < 48; i += 8) { + r[i + 0] = 0x1ffffffffffffffL; + r[i + 1] = 0x1ffffffffffffffL; + r[i + 2] = 0x1ffffffffffffffL; + r[i + 3] = 0x1ffffffffffffffL; + r[i + 4] = 0x1ffffffffffffffL; + r[i + 5] = 0x1ffffffffffffffL; + r[i + 6] = 0x1ffffffffffffffL; + r[i + 7] = 0x1ffffffffffffffL; + } + r[48] = 0x1ffffffffffffffL; + r[49] = 0x1ffffffffffffffL; + r[50] = 0x1ffffffffffffffL; + r[51] = 0x1ffffffffffffffL; + r[52] = 0x1ffffffffffffffL; +#endif + r[53] = 0x7ffffffffffffL; + + /* r = (2^n - 1) mod n */ + (void)sp_3072_sub_54(r, r, m); + + /* Add one so r = 2^n mod m */ + r[0] += 1; +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static sp_digit sp_3072_cmp_54(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=53; i>=0; i--) { + r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#else + int i; + + r |= (a[53] - b[53]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[52] - b[52]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[51] - b[51]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[50] - b[50]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[49] - b[49]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[48] - b[48]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + for (i = 40; i >= 0; i -= 8) { + r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#endif /* WOLFSSL_SP_SMALL */ + + return r; +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static void sp_3072_cond_sub_54(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 54; i++) { + r[i] = a[i] - (b[i] & m); + } +#else + int i; + + for (i = 0; i < 48; i += 8) { + r[i + 0] = a[i + 0] - (b[i + 0] & m); + r[i + 1] = a[i + 1] - (b[i + 1] & m); + r[i + 2] = a[i + 2] - (b[i + 2] & m); + r[i + 3] = a[i + 3] - (b[i + 3] & m); + r[i + 4] = a[i + 4] - (b[i + 4] & m); + r[i + 5] = a[i + 5] - (b[i + 5] & m); + r[i + 6] = a[i + 6] - (b[i + 6] & m); + r[i + 7] = a[i + 7] - (b[i + 7] & m); + } + r[48] = a[48] - (b[48] & m); + r[49] = a[49] - (b[49] & m); + r[50] = a[50] - (b[50] & m); + r[51] = a[51] - (b[51] & m); + r[52] = a[52] - (b[52] & m); + r[53] = a[53] - (b[53] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Mul a by scalar b and add into r. (r += a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_3072_mul_add_54(sp_digit* r, const sp_digit* a, + const sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int128_t tb = b; + int128_t t = 0; + int i; + + for (i = 0; i < 54; i++) { + t += (tb * a[i]) + r[i]; + r[i] = t & 0x1ffffffffffffffL; + t >>= 57; + } + r[54] += t; +#else + int128_t tb = b; + int128_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1ffffffffffffffL); + for (i = 0; i < 48; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL)); + t[2] = tb * a[i+2]; + r[i+2] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL)); + t[3] = tb * a[i+3]; + r[i+3] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL)); + t[4] = tb * a[i+4]; + r[i+4] += (sp_digit)((t[3] >> 57) + (t[4] & 0x1ffffffffffffffL)); + t[5] = tb * a[i+5]; + r[i+5] += (sp_digit)((t[4] >> 57) + (t[5] & 0x1ffffffffffffffL)); + t[6] = tb * a[i+6]; + r[i+6] += (sp_digit)((t[5] >> 57) + (t[6] & 0x1ffffffffffffffL)); + t[7] = tb * a[i+7]; + r[i+7] += (sp_digit)((t[6] >> 57) + (t[7] & 0x1ffffffffffffffL)); + t[0] = tb * a[i+8]; + r[i+8] += (sp_digit)((t[7] >> 57) + (t[0] & 0x1ffffffffffffffL)); + } + t[1] = tb * a[49]; r[49] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL)); + t[2] = tb * a[50]; r[50] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL)); + t[3] = tb * a[51]; r[51] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL)); + t[4] = tb * a[52]; r[52] += (sp_digit)((t[3] >> 57) + (t[4] & 0x1ffffffffffffffL)); + t[5] = tb * a[53]; r[53] += (sp_digit)((t[4] >> 57) + (t[5] & 0x1ffffffffffffffL)); + r[54] += (sp_digit)(t[5] >> 57); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Normalize the values in each word to 57. + * + * a Array of sp_digit to normalize. + */ +static void sp_3072_norm_54(sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + for (i = 0; i < 53; i++) { + a[i+1] += a[i] >> 57; + a[i] &= 0x1ffffffffffffffL; + } +#else + int i; + for (i = 0; i < 48; i += 8) { + a[i+1] += a[i+0] >> 57; a[i+0] &= 0x1ffffffffffffffL; + a[i+2] += a[i+1] >> 57; a[i+1] &= 0x1ffffffffffffffL; + a[i+3] += a[i+2] >> 57; a[i+2] &= 0x1ffffffffffffffL; + a[i+4] += a[i+3] >> 57; a[i+3] &= 0x1ffffffffffffffL; + a[i+5] += a[i+4] >> 57; a[i+4] &= 0x1ffffffffffffffL; + a[i+6] += a[i+5] >> 57; a[i+5] &= 0x1ffffffffffffffL; + a[i+7] += a[i+6] >> 57; a[i+6] &= 0x1ffffffffffffffL; + a[i+8] += a[i+7] >> 57; a[i+7] &= 0x1ffffffffffffffL; + a[i+9] += a[i+8] >> 57; a[i+8] &= 0x1ffffffffffffffL; + } + a[48+1] += a[48] >> 57; + a[48] &= 0x1ffffffffffffffL; + a[49+1] += a[49] >> 57; + a[49] &= 0x1ffffffffffffffL; + a[50+1] += a[50] >> 57; + a[50] &= 0x1ffffffffffffffL; + a[51+1] += a[51] >> 57; + a[51] &= 0x1ffffffffffffffL; + a[52+1] += a[52] >> 57; + a[52] &= 0x1ffffffffffffffL; +#endif +} + +/* Shift the result in the high 3072 bits down to the bottom. + * + * r A single precision number. + * a A single precision number. + */ +static void sp_3072_mont_shift_54(sp_digit* r, const sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + int128_t n = a[53] >> 51; + n += ((int128_t)a[54]) << 6; + + for (i = 0; i < 53; i++) { + r[i] = n & 0x1ffffffffffffffL; + n >>= 57; + n += ((int128_t)a[55 + i]) << 6; + } + r[53] = (sp_digit)n; +#else + int i; + int128_t n = a[53] >> 51; + n += ((int128_t)a[54]) << 6; + for (i = 0; i < 48; i += 8) { + r[i + 0] = n & 0x1ffffffffffffffL; + n >>= 57; n += ((int128_t)a[i + 55]) << 6; + r[i + 1] = n & 0x1ffffffffffffffL; + n >>= 57; n += ((int128_t)a[i + 56]) << 6; + r[i + 2] = n & 0x1ffffffffffffffL; + n >>= 57; n += ((int128_t)a[i + 57]) << 6; + r[i + 3] = n & 0x1ffffffffffffffL; + n >>= 57; n += ((int128_t)a[i + 58]) << 6; + r[i + 4] = n & 0x1ffffffffffffffL; + n >>= 57; n += ((int128_t)a[i + 59]) << 6; + r[i + 5] = n & 0x1ffffffffffffffL; + n >>= 57; n += ((int128_t)a[i + 60]) << 6; + r[i + 6] = n & 0x1ffffffffffffffL; + n >>= 57; n += ((int128_t)a[i + 61]) << 6; + r[i + 7] = n & 0x1ffffffffffffffL; + n >>= 57; n += ((int128_t)a[i + 62]) << 6; + } + r[48] = n & 0x1ffffffffffffffL; n >>= 57; n += ((int128_t)a[103]) << 6; + r[49] = n & 0x1ffffffffffffffL; n >>= 57; n += ((int128_t)a[104]) << 6; + r[50] = n & 0x1ffffffffffffffL; n >>= 57; n += ((int128_t)a[105]) << 6; + r[51] = n & 0x1ffffffffffffffL; n >>= 57; n += ((int128_t)a[106]) << 6; + r[52] = n & 0x1ffffffffffffffL; n >>= 57; n += ((int128_t)a[107]) << 6; + r[53] = (sp_digit)n; +#endif /* WOLFSSL_SP_SMALL */ + XMEMSET(&r[54], 0, sizeof(*r) * 54U); +} + +/* Reduce the number back to 3072 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static void sp_3072_mont_reduce_54(sp_digit* a, const sp_digit* m, sp_digit mp) +{ + int i; + sp_digit mu; + + sp_3072_norm_54(a + 54); + +#ifdef WOLFSSL_SP_DH + if (mp != 1) { + for (i=0; i<53; i++) { + mu = (a[i] * mp) & 0x1ffffffffffffffL; + sp_3072_mul_add_54(a+i, m, mu); + a[i+1] += a[i] >> 57; + } + mu = (a[i] * mp) & 0x7ffffffffffffL; + sp_3072_mul_add_54(a+i, m, mu); + a[i+1] += a[i] >> 57; + a[i] &= 0x1ffffffffffffffL; + } + else { + for (i=0; i<53; i++) { + mu = a[i] & 0x1ffffffffffffffL; + sp_3072_mul_add_54(a+i, m, mu); + a[i+1] += a[i] >> 57; + } + mu = a[i] & 0x7ffffffffffffL; + sp_3072_mul_add_54(a+i, m, mu); + a[i+1] += a[i] >> 57; + a[i] &= 0x1ffffffffffffffL; + } +#else + for (i=0; i<53; i++) { + mu = (a[i] * mp) & 0x1ffffffffffffffL; + sp_3072_mul_add_54(a+i, m, mu); + a[i+1] += a[i] >> 57; + } + mu = (a[i] * mp) & 0x7ffffffffffffL; + sp_3072_mul_add_54(a+i, m, mu); + a[i+1] += a[i] >> 57; + a[i] &= 0x1ffffffffffffffL; +#endif + + sp_3072_mont_shift_54(a, a); + sp_3072_cond_sub_54(a, a, m, 0 - (((a[53] >> 51) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_3072_norm_54(a); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_3072_mont_mul_54(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_3072_mul_54(r, a, b); + sp_3072_mont_reduce_54(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_3072_mont_sqr_54(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_3072_sqr_54(r, a); + sp_3072_mont_reduce_54(r, m, mp); +} + +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static void sp_3072_cond_add_54(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 54; i++) { + r[i] = a[i] + (b[i] & m); + } +#else + int i; + + for (i = 0; i < 48; i += 8) { + r[i + 0] = a[i + 0] + (b[i + 0] & m); + r[i + 1] = a[i + 1] + (b[i + 1] & m); + r[i + 2] = a[i + 2] + (b[i + 2] & m); + r[i + 3] = a[i + 3] + (b[i + 3] & m); + r[i + 4] = a[i + 4] + (b[i + 4] & m); + r[i + 5] = a[i + 5] + (b[i + 5] & m); + r[i + 6] = a[i + 6] + (b[i + 6] & m); + r[i + 7] = a[i + 7] + (b[i + 7] & m); + } + r[48] = a[48] + (b[48] & m); + r[49] = a[49] + (b[49] & m); + r[50] = a[50] + (b[50] & m); + r[51] = a[51] + (b[51] & m); + r[52] = a[52] + (b[52] & m); + r[53] = a[53] + (b[53] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +#ifdef WOLFSSL_SP_DIV_64 +static WC_INLINE sp_digit sp_3072_div_word_54(sp_digit d1, sp_digit d0, + sp_digit dv) +{ + sp_digit d, r, t; + + /* All 57 bits from d1 and top 6 bits from d0. */ + d = (d1 << 6) | (d0 >> 51); + r = d / dv; + d -= r * dv; + /* Up to 7 bits in r */ + /* Next 6 bits from d0. */ + r <<= 6; + d <<= 6; + d |= (d0 >> 45) & ((1 << 6) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 13 bits in r */ + /* Next 6 bits from d0. */ + r <<= 6; + d <<= 6; + d |= (d0 >> 39) & ((1 << 6) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 19 bits in r */ + /* Next 6 bits from d0. */ + r <<= 6; + d <<= 6; + d |= (d0 >> 33) & ((1 << 6) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 25 bits in r */ + /* Next 6 bits from d0. */ + r <<= 6; + d <<= 6; + d |= (d0 >> 27) & ((1 << 6) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 31 bits in r */ + /* Next 6 bits from d0. */ + r <<= 6; + d <<= 6; + d |= (d0 >> 21) & ((1 << 6) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 37 bits in r */ + /* Next 6 bits from d0. */ + r <<= 6; + d <<= 6; + d |= (d0 >> 15) & ((1 << 6) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 43 bits in r */ + /* Next 6 bits from d0. */ + r <<= 6; + d <<= 6; + d |= (d0 >> 9) & ((1 << 6) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 49 bits in r */ + /* Next 6 bits from d0. */ + r <<= 6; + d <<= 6; + d |= (d0 >> 3) & ((1 << 6) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 55 bits in r */ + /* Remaining 3 bits from d0. */ + r <<= 3; + d <<= 3; + d |= d0 & ((1 << 3) - 1); + t = d / dv; + r += t; + + return r; +} +#endif /* WOLFSSL_SP_DIV_64 */ + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Number to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_3072_div_54(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + int i; +#ifndef WOLFSSL_SP_DIV_64 + int128_t d1; +#endif + sp_digit dv, r1; +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* td; +#else + sp_digit t1d[108], t2d[54 + 1]; +#endif + sp_digit* t1; + sp_digit* t2; + int err = MP_OKAY; + + (void)m; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 54 + 1), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = td; + t2 = td + 2 * 54; +#else + t1 = t1d; + t2 = t2d; +#endif + + dv = d[53]; + XMEMCPY(t1, a, sizeof(*t1) * 2U * 54U); + for (i=53; i>=0; i--) { + t1[54 + i] += t1[54 + i - 1] >> 57; + t1[54 + i - 1] &= 0x1ffffffffffffffL; +#ifndef WOLFSSL_SP_DIV_64 + d1 = t1[54 + i]; + d1 <<= 57; + d1 += t1[54 + i - 1]; + r1 = (sp_digit)(d1 / dv); +#else + r1 = sp_3072_div_word_54(t1[54 + i], t1[54 + i - 1], dv); +#endif + + sp_3072_mul_d_54(t2, d, r1); + (void)sp_3072_sub_54(&t1[i], &t1[i], t2); + t1[54 + i] -= t2[54]; + t1[54 + i] += t1[54 + i - 1] >> 57; + t1[54 + i - 1] &= 0x1ffffffffffffffL; + r1 = (((-t1[54 + i]) << 57) - t1[54 + i - 1]) / dv; + r1++; + sp_3072_mul_d_54(t2, d, r1); + (void)sp_3072_add_54(&t1[i], &t1[i], t2); + t1[54 + i] += t1[54 + i - 1] >> 57; + t1[54 + i - 1] &= 0x1ffffffffffffffL; + } + t1[54 - 1] += t1[54 - 2] >> 57; + t1[54 - 2] &= 0x1ffffffffffffffL; + r1 = t1[54 - 1] / dv; + + sp_3072_mul_d_54(t2, d, r1); + (void)sp_3072_sub_54(t1, t1, t2); + XMEMCPY(r, t1, sizeof(*r) * 2U * 54U); + for (i=0; i<52; i++) { + r[i+1] += r[i] >> 57; + r[i] &= 0x1ffffffffffffffL; + } + sp_3072_cond_add_54(r, r, d, 0 - ((r[53] < 0) ? + (sp_digit)1 : (sp_digit)0)); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_3072_mod_54(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_3072_div_54(a, m, NULL, r); +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \ + defined(WOLFSSL_HAVE_SP_DH) +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_3072_mod_exp_54(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, + const sp_digit* m, int reduceA) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* td; + sp_digit* t[3]; + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 54 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } + + if (err == MP_OKAY) { + XMEMSET(td, 0, sizeof(*td) * 3U * 54U * 2U); + + norm = t[0] = td; + t[1] = &td[54 * 2]; + t[2] = &td[2 * 54 * 2]; + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_54(norm, m); + + if (reduceA != 0) { + err = sp_3072_mod_54(t[1], a, m); + } + else { + XMEMCPY(t[1], a, sizeof(sp_digit) * 54U); + } + } + if (err == MP_OKAY) { + sp_3072_mul_54(t[1], t[1], norm); + err = sp_3072_mod_54(t[1], t[1], m); + } + + if (err == MP_OKAY) { + i = bits / 57; + c = bits % 57; + n = e[i--] << (57 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) { + break; + } + + n = e[i--]; + c = 57; + } + + y = (n >> 56) & 1; + n <<= 1; + + sp_3072_mont_mul_54(t[y^1], t[0], t[1], m, mp); + + XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), + sizeof(*t[2]) * 54 * 2); + sp_3072_mont_sqr_54(t[2], t[2], m, mp); + XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), t[2], + sizeof(*t[2]) * 54 * 2); + } + + sp_3072_mont_reduce_54(t[0], m, mp); + n = sp_3072_cmp_54(t[0], m); + sp_3072_cond_sub_54(t[0], t[0], m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, t[0], sizeof(*r) * 54 * 2); + + } + + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } + + return err; +#elif defined(WOLFSSL_SP_CACHE_RESISTANT) +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[3][108]; +#else + sp_digit* td; + sp_digit* t[3]; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 54 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + t[0] = td; + t[1] = &td[54 * 2]; + t[2] = &td[2 * 54 * 2]; +#endif + norm = t[0]; + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_54(norm, m); + + if (reduceA != 0) { + err = sp_3072_mod_54(t[1], a, m); + if (err == MP_OKAY) { + sp_3072_mul_54(t[1], t[1], norm); + err = sp_3072_mod_54(t[1], t[1], m); + } + } + else { + sp_3072_mul_54(t[1], a, norm); + err = sp_3072_mod_54(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + i = bits / 57; + c = bits % 57; + n = e[i--] << (57 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) { + break; + } + + n = e[i--]; + c = 57; + } + + y = (n >> 56) & 1; + n <<= 1; + + sp_3072_mont_mul_54(t[y^1], t[0], t[1], m, mp); + + XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), sizeof(t[2])); + sp_3072_mont_sqr_54(t[2], t[2], m, mp); + XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2])); + } + + sp_3072_mont_reduce_54(t[0], m, mp); + n = sp_3072_cmp_54(t[0], m); + sp_3072_cond_sub_54(t[0], t[0], m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, t[0], sizeof(t[0])); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][108]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit rt[108]; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 108, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) + t[i] = td + i * 108; +#endif + norm = t[0]; + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_54(norm, m); + + if (reduceA != 0) { + err = sp_3072_mod_54(t[1], a, m); + if (err == MP_OKAY) { + sp_3072_mul_54(t[1], t[1], norm); + err = sp_3072_mod_54(t[1], t[1], m); + } + } + else { + sp_3072_mul_54(t[1], a, norm); + err = sp_3072_mod_54(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_3072_mont_sqr_54(t[ 2], t[ 1], m, mp); + sp_3072_mont_mul_54(t[ 3], t[ 2], t[ 1], m, mp); + sp_3072_mont_sqr_54(t[ 4], t[ 2], m, mp); + sp_3072_mont_mul_54(t[ 5], t[ 3], t[ 2], m, mp); + sp_3072_mont_sqr_54(t[ 6], t[ 3], m, mp); + sp_3072_mont_mul_54(t[ 7], t[ 4], t[ 3], m, mp); + sp_3072_mont_sqr_54(t[ 8], t[ 4], m, mp); + sp_3072_mont_mul_54(t[ 9], t[ 5], t[ 4], m, mp); + sp_3072_mont_sqr_54(t[10], t[ 5], m, mp); + sp_3072_mont_mul_54(t[11], t[ 6], t[ 5], m, mp); + sp_3072_mont_sqr_54(t[12], t[ 6], m, mp); + sp_3072_mont_mul_54(t[13], t[ 7], t[ 6], m, mp); + sp_3072_mont_sqr_54(t[14], t[ 7], m, mp); + sp_3072_mont_mul_54(t[15], t[ 8], t[ 7], m, mp); + sp_3072_mont_sqr_54(t[16], t[ 8], m, mp); + sp_3072_mont_mul_54(t[17], t[ 9], t[ 8], m, mp); + sp_3072_mont_sqr_54(t[18], t[ 9], m, mp); + sp_3072_mont_mul_54(t[19], t[10], t[ 9], m, mp); + sp_3072_mont_sqr_54(t[20], t[10], m, mp); + sp_3072_mont_mul_54(t[21], t[11], t[10], m, mp); + sp_3072_mont_sqr_54(t[22], t[11], m, mp); + sp_3072_mont_mul_54(t[23], t[12], t[11], m, mp); + sp_3072_mont_sqr_54(t[24], t[12], m, mp); + sp_3072_mont_mul_54(t[25], t[13], t[12], m, mp); + sp_3072_mont_sqr_54(t[26], t[13], m, mp); + sp_3072_mont_mul_54(t[27], t[14], t[13], m, mp); + sp_3072_mont_sqr_54(t[28], t[14], m, mp); + sp_3072_mont_mul_54(t[29], t[15], t[14], m, mp); + sp_3072_mont_sqr_54(t[30], t[15], m, mp); + sp_3072_mont_mul_54(t[31], t[16], t[15], m, mp); + + bits = ((bits + 4) / 5) * 5; + i = ((bits + 56) / 57) - 1; + c = bits % 57; + if (c == 0) { + c = 57; + } + if (i < 54) { + n = e[i--] << (64 - c); + } + else { + n = 0; + i--; + } + if (c < 5) { + n |= e[i--] << (7 - c); + c += 57; + } + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + XMEMCPY(rt, t[y], sizeof(rt)); + for (; i>=0 || c>=5; ) { + if (c < 5) { + n |= e[i--] << (7 - c); + c += 57; + } + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + + sp_3072_mont_sqr_54(rt, rt, m, mp); + sp_3072_mont_sqr_54(rt, rt, m, mp); + sp_3072_mont_sqr_54(rt, rt, m, mp); + sp_3072_mont_sqr_54(rt, rt, m, mp); + sp_3072_mont_sqr_54(rt, rt, m, mp); + + sp_3072_mont_mul_54(rt, rt, t[y], m, mp); + } + + sp_3072_mont_reduce_54(rt, m, mp); + n = sp_3072_cmp_54(rt, m); + sp_3072_cond_sub_54(rt, rt, m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, rt, sizeof(rt)); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +#endif +} +#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || */ + /* WOLFSSL_HAVE_SP_DH */ + +#ifdef WOLFSSL_HAVE_SP_RSA +/* RSA public key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * em Public exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 384 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm, + byte* out, word32* outLen) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* d = NULL; + sp_digit* a; + sp_digit* m; + sp_digit* r; + sp_digit* norm; + sp_digit e[1] = {0}; + sp_digit mp; + int i; + int err = MP_OKAY; + + if (*outLen < 384U) { + err = MP_TO_E; + } + + if (err == MP_OKAY) { + if (mp_count_bits(em) > 57) { + err = MP_READ_E; + } + if (inLen > 384U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 3072) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 54 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + a = d; + r = a + 54 * 2; + m = r + 54 * 2; + norm = r; + + sp_3072_from_bin(a, 54, in, inLen); +#if DIGIT_BIT >= 57 + e[0] = (sp_digit)em->dp[0]; +#else + e[0] = (sp_digit)em->dp[0]; + if (em->used > 1) { + e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + } +#endif + if (e[0] == 0) { + err = MP_EXPTMOD_E; + } + } + + if (err == MP_OKAY) { + sp_3072_from_mp(m, 54, mm); + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_54(norm, m); + } + if (err == MP_OKAY) { + sp_3072_mul_54(a, a, norm); + err = sp_3072_mod_54(a, a, m); + } + if (err == MP_OKAY) { + for (i=56; i>=0; i--) { + if ((e[0] >> i) != 0) { + break; + } + } + + XMEMCPY(r, a, sizeof(sp_digit) * 54 * 2); + for (i--; i>=0; i--) { + sp_3072_mont_sqr_54(r, r, m, mp); + + if (((e[0] >> i) & 1) == 1) { + sp_3072_mont_mul_54(r, r, a, m, mp); + } + } + sp_3072_mont_reduce_54(r, m, mp); + mp = sp_3072_cmp_54(r, m); + sp_3072_cond_sub_54(r, r, m, ((mp < 0) ? + (sp_digit)1 : (sp_digit)0)- 1); + + sp_3072_to_bin(r, out); + *outLen = 384; + } + + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit ad[108], md[54], rd[108]; +#else + sp_digit* d = NULL; +#endif + sp_digit* a; + sp_digit* m; + sp_digit* r; + sp_digit e[1] = {0}; + int err = MP_OKAY; + + if (*outLen < 384U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(em) > 57) { + err = MP_READ_E; + } + if (inLen > 384U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 3072) { + err = MP_READ_E; + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 54 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) { + err = MEMORY_E; + } + } + + if (err == MP_OKAY) { + a = d; + r = a + 54 * 2; + m = r + 54 * 2; + } +#else + a = ad; + m = md; + r = rd; +#endif + + if (err == MP_OKAY) { + sp_3072_from_bin(a, 54, in, inLen); +#if DIGIT_BIT >= 57 + e[0] = (sp_digit)em->dp[0]; +#else + e[0] = (sp_digit)em->dp[0]; + if (em->used > 1) { + e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + } +#endif + if (e[0] == 0) { + err = MP_EXPTMOD_E; + } + } + if (err == MP_OKAY) { + sp_3072_from_mp(m, 54, mm); + + if (e[0] == 0x3) { + sp_3072_sqr_54(r, a); + err = sp_3072_mod_54(r, r, m); + if (err == MP_OKAY) { + sp_3072_mul_54(r, a, r); + err = sp_3072_mod_54(r, r, m); + } + } + else { + sp_digit* norm = r; + int i; + sp_digit mp; + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_54(norm, m); + + sp_3072_mul_54(a, a, norm); + err = sp_3072_mod_54(a, a, m); + + if (err == MP_OKAY) { + for (i=56; i>=0; i--) { + if ((e[0] >> i) != 0) { + break; + } + } + + XMEMCPY(r, a, sizeof(sp_digit) * 108U); + for (i--; i>=0; i--) { + sp_3072_mont_sqr_54(r, r, m, mp); + + if (((e[0] >> i) & 1) == 1) { + sp_3072_mont_mul_54(r, r, a, m, mp); + } + } + sp_3072_mont_reduce_54(r, m, mp); + mp = sp_3072_cmp_54(r, m); + sp_3072_cond_sub_54(r, r, m, ((mp < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + } + } + } + + if (err == MP_OKAY) { + sp_3072_to_bin(r, out); + *outLen = 384; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } +#endif + + return err; +#endif /* WOLFSSL_SP_SMALL */ +} + +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +#if !defined(SP_RSA_PRIVATE_EXP_D) && !defined(RSA_LOW_MEM) +#endif /* !SP_RSA_PRIVATE_EXP_D && !RSA_LOW_MEM */ +/* RSA private key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * pm First prime. + * qm Second prime. + * dpm First prime's CRT exponent. + * dqm Second prime's CRT exponent. + * qim Inverse of second prime mod p. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 384 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, + mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm, + byte* out, word32* outLen) +{ +#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* a; + sp_digit* d = NULL; + sp_digit* m; + sp_digit* r; + int err = MP_OKAY; + + (void)pm; + (void)qm; + (void)dpm; + (void)dqm; + (void)qim; + + if (*outLen < 384U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(dm) > 3072) { + err = MP_READ_E; + } + if (inLen > 384) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 3072) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 54 * 4, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) { + err = MEMORY_E; + } + } + if (err == MP_OKAY) { + a = d + 54; + m = a + 108; + r = a; + + sp_3072_from_bin(a, 54, in, inLen); + sp_3072_from_mp(d, 54, dm); + sp_3072_from_mp(m, 54, mm); + err = sp_3072_mod_exp_54(r, a, d, 3072, m, 0); + } + if (err == MP_OKAY) { + sp_3072_to_bin(r, out); + *outLen = 384; + } + + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 54); + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else + sp_digit a[108], d[54], m[54]; + sp_digit* r = a; + int err = MP_OKAY; + + (void)pm; + (void)qm; + (void)dpm; + (void)dqm; + (void)qim; + + if (*outLen < 384U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(dm) > 3072) { + err = MP_READ_E; + } + if (inLen > 384U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 3072) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_3072_from_bin(a, 54, in, inLen); + sp_3072_from_mp(d, 54, dm); + sp_3072_from_mp(m, 54, mm); + err = sp_3072_mod_exp_54(r, a, d, 3072, m, 0); + } + + if (err == MP_OKAY) { + sp_3072_to_bin(r, out); + *outLen = 384; + } + + XMEMSET(d, 0, sizeof(sp_digit) * 54); + + return err; +#endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */ +#else +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* t = NULL; + sp_digit* a; + sp_digit* p; + sp_digit* q; + sp_digit* dp; + sp_digit* dq; + sp_digit* qi; + sp_digit* tmpa; + sp_digit* tmpb; + sp_digit* r; + int err = MP_OKAY; + + (void)dm; + (void)mm; + + if (*outLen < 384U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (inLen > 384) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 3072) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 27 * 11, NULL, + DYNAMIC_TYPE_RSA); + if (t == NULL) { + err = MEMORY_E; + } + } + if (err == MP_OKAY) { + a = t; + p = a + 54 * 2; + q = p + 27; + qi = dq = dp = q + 27; + tmpa = qi + 27; + tmpb = tmpa + 54; + + r = t + 54; + + sp_3072_from_bin(a, 54, in, inLen); + sp_3072_from_mp(p, 27, pm); + sp_3072_from_mp(q, 27, qm); + sp_3072_from_mp(dp, 27, dpm); + err = sp_3072_mod_exp_27(tmpa, a, dp, 1536, p, 1); + } + if (err == MP_OKAY) { + sp_3072_from_mp(dq, 27, dqm); + err = sp_3072_mod_exp_27(tmpb, a, dq, 1536, q, 1); + } + if (err == MP_OKAY) { + (void)sp_3072_sub_27(tmpa, tmpa, tmpb); + sp_3072_cond_add_27(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[26] >> 63)); + sp_3072_cond_add_27(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[26] >> 63)); + + sp_3072_from_mp(qi, 27, qim); + sp_3072_mul_27(tmpa, tmpa, qi); + err = sp_3072_mod_27(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { + sp_3072_mul_27(tmpa, q, tmpa); + (void)sp_3072_add_54(r, tmpb, tmpa); + sp_3072_norm_54(r); + + sp_3072_to_bin(r, out); + *outLen = 384; + } + + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_digit) * 27 * 11); + XFREE(t, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else + sp_digit a[54 * 2]; + sp_digit p[27], q[27], dp[27], dq[27], qi[27]; + sp_digit tmpa[54], tmpb[54]; + sp_digit* r = a; + int err = MP_OKAY; + + (void)dm; + (void)mm; + + if (*outLen < 384U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (inLen > 384U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 3072) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_3072_from_bin(a, 54, in, inLen); + sp_3072_from_mp(p, 27, pm); + sp_3072_from_mp(q, 27, qm); + sp_3072_from_mp(dp, 27, dpm); + sp_3072_from_mp(dq, 27, dqm); + sp_3072_from_mp(qi, 27, qim); + + err = sp_3072_mod_exp_27(tmpa, a, dp, 1536, p, 1); + } + if (err == MP_OKAY) { + err = sp_3072_mod_exp_27(tmpb, a, dq, 1536, q, 1); + } + + if (err == MP_OKAY) { + (void)sp_3072_sub_27(tmpa, tmpa, tmpb); + sp_3072_cond_add_27(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[26] >> 63)); + sp_3072_cond_add_27(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[26] >> 63)); + sp_3072_mul_27(tmpa, tmpa, qi); + err = sp_3072_mod_27(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { + sp_3072_mul_27(tmpa, tmpa, q); + (void)sp_3072_add_54(r, tmpb, tmpa); + sp_3072_norm_54(r); + + sp_3072_to_bin(r, out); + *outLen = 384; + } + + XMEMSET(tmpa, 0, sizeof(tmpa)); + XMEMSET(tmpb, 0, sizeof(tmpb)); + XMEMSET(p, 0, sizeof(p)); + XMEMSET(q, 0, sizeof(q)); + XMEMSET(dp, 0, sizeof(dp)); + XMEMSET(dq, 0, sizeof(dq)); + XMEMSET(qi, 0, sizeof(qi)); + + return err; +#endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */ +#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */ +} + +#endif /* !WOLFSSL_RSA_PUBLIC_ONLY */ +#endif /* WOLFSSL_HAVE_SP_RSA */ +#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY)) +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_3072_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (3072 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 57 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 54); + r->used = 54; + mp_clamp(r); +#elif DIGIT_BIT < 57 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 54; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 57) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 57 - s; + } + r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 54; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 57 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 57 - s; + } + else { + s += 57; + } + } + r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ +#ifdef WOLFSSL_SP_SMALL + int err = MP_OKAY; + sp_digit* d = NULL; + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 3072) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 3072) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 3072) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 54 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) { + err = MEMORY_E; + } + } + + if (err == MP_OKAY) { + b = d; + e = b + 54 * 2; + m = e + 54; + r = b; + + sp_3072_from_mp(b, 54, base); + sp_3072_from_mp(e, 54, exp); + sp_3072_from_mp(m, 54, mod); + + err = sp_3072_mod_exp_54(r, b, e, mp_count_bits(exp), m, 0); + } + + if (err == MP_OKAY) { + err = sp_3072_to_mp(r, res); + } + + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 54U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit bd[108], ed[54], md[54]; +#else + sp_digit* d = NULL; +#endif + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + int err = MP_OKAY; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 3072) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 3072) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 3072) { + err = MP_READ_E; + } + } + +#ifdef WOLFSSL_SMALL_STACK + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 54 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + b = d; + e = b + 54 * 2; + m = e + 54; + r = b; + } +#else + r = b = bd; + e = ed; + m = md; +#endif + + if (err == MP_OKAY) { + sp_3072_from_mp(b, 54, base); + sp_3072_from_mp(e, 54, exp); + sp_3072_from_mp(m, 54, mod); + + err = sp_3072_mod_exp_54(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + err = sp_3072_to_mp(r, res); + } + + +#ifdef WOLFSSL_SMALL_STACK + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 54U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } +#else + XMEMSET(e, 0, sizeof(sp_digit) * 54U); +#endif + + return err; +#endif +} + +#ifdef WOLFSSL_HAVE_SP_DH + +#ifdef HAVE_FFDHE_3072 +SP_NOINLINE static void sp_3072_lshift_54(sp_digit* r, sp_digit* a, byte n) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + r[54] = a[53] >> (57 - n); + for (i=53; i>0; i--) { + r[i] = ((a[i] << n) | (a[i-1] >> (57 - n))) & 0x1ffffffffffffffL; + } +#else + sp_int_digit s, t; + + s = (sp_int_digit)a[53]; + r[54] = s >> (57U - n); + s = (sp_int_digit)(a[53]); t = (sp_int_digit)(a[52]); + r[53] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[52]); t = (sp_int_digit)(a[51]); + r[52] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[51]); t = (sp_int_digit)(a[50]); + r[51] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[50]); t = (sp_int_digit)(a[49]); + r[50] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[49]); t = (sp_int_digit)(a[48]); + r[49] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[48]); t = (sp_int_digit)(a[47]); + r[48] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[47]); t = (sp_int_digit)(a[46]); + r[47] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[46]); t = (sp_int_digit)(a[45]); + r[46] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[45]); t = (sp_int_digit)(a[44]); + r[45] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[44]); t = (sp_int_digit)(a[43]); + r[44] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[43]); t = (sp_int_digit)(a[42]); + r[43] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[42]); t = (sp_int_digit)(a[41]); + r[42] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[41]); t = (sp_int_digit)(a[40]); + r[41] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[40]); t = (sp_int_digit)(a[39]); + r[40] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[39]); t = (sp_int_digit)(a[38]); + r[39] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[38]); t = (sp_int_digit)(a[37]); + r[38] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[37]); t = (sp_int_digit)(a[36]); + r[37] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[36]); t = (sp_int_digit)(a[35]); + r[36] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[35]); t = (sp_int_digit)(a[34]); + r[35] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[34]); t = (sp_int_digit)(a[33]); + r[34] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[33]); t = (sp_int_digit)(a[32]); + r[33] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[32]); t = (sp_int_digit)(a[31]); + r[32] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[31]); t = (sp_int_digit)(a[30]); + r[31] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[30]); t = (sp_int_digit)(a[29]); + r[30] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[29]); t = (sp_int_digit)(a[28]); + r[29] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[28]); t = (sp_int_digit)(a[27]); + r[28] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[27]); t = (sp_int_digit)(a[26]); + r[27] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[26]); t = (sp_int_digit)(a[25]); + r[26] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[25]); t = (sp_int_digit)(a[24]); + r[25] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[24]); t = (sp_int_digit)(a[23]); + r[24] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[23]); t = (sp_int_digit)(a[22]); + r[23] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[22]); t = (sp_int_digit)(a[21]); + r[22] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[21]); t = (sp_int_digit)(a[20]); + r[21] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[20]); t = (sp_int_digit)(a[19]); + r[20] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[19]); t = (sp_int_digit)(a[18]); + r[19] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[18]); t = (sp_int_digit)(a[17]); + r[18] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[17]); t = (sp_int_digit)(a[16]); + r[17] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[16]); t = (sp_int_digit)(a[15]); + r[16] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[15]); t = (sp_int_digit)(a[14]); + r[15] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[14]); t = (sp_int_digit)(a[13]); + r[14] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[13]); t = (sp_int_digit)(a[12]); + r[13] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[12]); t = (sp_int_digit)(a[11]); + r[12] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[11]); t = (sp_int_digit)(a[10]); + r[11] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[10]); t = (sp_int_digit)(a[9]); + r[10] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[9]); t = (sp_int_digit)(a[8]); + r[9] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[8]); t = (sp_int_digit)(a[7]); + r[8] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[7]); t = (sp_int_digit)(a[6]); + r[7] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[6]); t = (sp_int_digit)(a[5]); + r[6] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[5]); t = (sp_int_digit)(a[4]); + r[5] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[4]); t = (sp_int_digit)(a[3]); + r[4] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[3]); t = (sp_int_digit)(a[2]); + r[3] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[2]); t = (sp_int_digit)(a[1]); + r[2] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; + s = (sp_int_digit)(a[1]); t = (sp_int_digit)(a[0]); + r[1] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL; +#endif + r[0] = (a[0] << n) & 0x1ffffffffffffffL; +} + +/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m) + * + * r A single precision number that is the result of the operation. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_3072_mod_exp_2_54(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit nd[108]; + sp_digit td[55]; +#else + sp_digit* td; +#endif + sp_digit* norm; + sp_digit* tmp; + sp_digit mp = 1; + sp_digit n, o; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 163, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + norm = td; + tmp = td + 108; + XMEMSET(td, 0, sizeof(sp_digit) * 163); +#else + norm = nd; + tmp = td; + XMEMSET(td, 0, sizeof(td)); +#endif + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_54(norm, m); + + bits = ((bits + 4) / 5) * 5; + i = ((bits + 56) / 57) - 1; + c = bits % 57; + if (c == 0) { + c = 57; + } + if (i < 54) { + n = e[i--] << (64 - c); + } + else { + n = 0; + i--; + } + if (c < 5) { + n |= e[i--] << (7 - c); + c += 57; + } + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + sp_3072_lshift_54(r, norm, y); + for (; i>=0 || c>=5; ) { + if (c < 5) { + n |= e[i--] << (7 - c); + c += 57; + } + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + + sp_3072_mont_sqr_54(r, r, m, mp); + sp_3072_mont_sqr_54(r, r, m, mp); + sp_3072_mont_sqr_54(r, r, m, mp); + sp_3072_mont_sqr_54(r, r, m, mp); + sp_3072_mont_sqr_54(r, r, m, mp); + + sp_3072_lshift_54(r, r, y); + sp_3072_mul_d_54(tmp, norm, (r[54] << 6) + (r[53] >> 51)); + r[54] = 0; + r[53] &= 0x7ffffffffffffL; + (void)sp_3072_add_54(r, r, tmp); + sp_3072_norm_54(r); + o = sp_3072_cmp_54(r, m); + sp_3072_cond_sub_54(r, r, m, ((o < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + } + + sp_3072_mont_reduce_54(r, m, mp); + n = sp_3072_cmp_54(r, m); + sp_3072_cond_sub_54(r, r, m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} + +#endif /* HAVE_FFDHE_3072 */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. + * exp Array of bytes that is the exponent. + * expLen Length of data, in bytes, in exponent. + * mod Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 384 bytes long. + * outLen Length, in bytes, of exponentiation result. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen, + mp_int* mod, byte* out, word32* outLen) +{ +#ifdef WOLFSSL_SP_SMALL + int err = MP_OKAY; + sp_digit* d = NULL; + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + word32 i; + + if (mp_count_bits(base) > 3072) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expLen > 384) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 3072) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 54 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) { + err = MEMORY_E; + } + } + + if (err == MP_OKAY) { + b = d; + e = b + 54 * 2; + m = e + 54; + r = b; + + sp_3072_from_mp(b, 54, base); + sp_3072_from_bin(e, 54, exp, expLen); + sp_3072_from_mp(m, 54, mod); + + #ifdef HAVE_FFDHE_3072 + if (base->used == 1 && base->dp[0] == 2 && + (m[53] >> 19) == 0xffffffffL) { + err = sp_3072_mod_exp_2_54(r, e, expLen * 8, m); + } + else + #endif + err = sp_3072_mod_exp_54(r, b, e, expLen * 8, m, 0); + } + + if (err == MP_OKAY) { + sp_3072_to_bin(r, out); + *outLen = 384; + for (i=0; i<384 && out[i] == 0; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + } + + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 54U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit bd[108], ed[54], md[54]; +#else + sp_digit* d = NULL; +#endif + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + word32 i; + int err = MP_OKAY; + + if (mp_count_bits(base) > 3072) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expLen > 384U) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 3072) { + err = MP_READ_E; + } + } +#ifdef WOLFSSL_SMALL_STACK + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 54 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + b = d; + e = b + 54 * 2; + m = e + 54; + r = b; + } +#else + r = b = bd; + e = ed; + m = md; +#endif + + if (err == MP_OKAY) { + sp_3072_from_mp(b, 54, base); + sp_3072_from_bin(e, 54, exp, expLen); + sp_3072_from_mp(m, 54, mod); + + #ifdef HAVE_FFDHE_3072 + if (base->used == 1 && base->dp[0] == 2U && + (m[53] >> 19) == 0xffffffffL) { + err = sp_3072_mod_exp_2_54(r, e, expLen * 8U, m); + } + else { + #endif + err = sp_3072_mod_exp_54(r, b, e, expLen * 8U, m, 0); + #ifdef HAVE_FFDHE_3072 + } + #endif + } + + if (err == MP_OKAY) { + sp_3072_to_bin(r, out); + *outLen = 384; + for (i=0; i<384U && out[i] == 0U; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + } + +#ifdef WOLFSSL_SMALL_STACK + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 54U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } +#else + XMEMSET(e, 0, sizeof(sp_digit) * 54U); +#endif + + return err; +#endif +} +#endif /* WOLFSSL_HAVE_SP_DH */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ +#ifdef WOLFSSL_SP_SMALL + int err = MP_OKAY; + sp_digit* d = NULL; + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 1536) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 1536) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 1536) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 27 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) { + err = MEMORY_E; + } + } + + if (err == MP_OKAY) { + b = d; + e = b + 27 * 2; + m = e + 27; + r = b; + + sp_3072_from_mp(b, 27, base); + sp_3072_from_mp(e, 27, exp); + sp_3072_from_mp(m, 27, mod); + + err = sp_3072_mod_exp_27(r, b, e, mp_count_bits(exp), m, 0); + } + + if (err == MP_OKAY) { + XMEMSET(r + 27, 0, sizeof(*r) * 27U); + err = sp_3072_to_mp(r, res); + } + + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 27U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit bd[54], ed[27], md[27]; +#else + sp_digit* d = NULL; +#endif + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + int err = MP_OKAY; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 1536) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 1536) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 1536) { + err = MP_READ_E; + } + } + +#ifdef WOLFSSL_SMALL_STACK + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 27 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + b = d; + e = b + 27 * 2; + m = e + 27; + r = b; + } +#else + r = b = bd; + e = ed; + m = md; +#endif + + if (err == MP_OKAY) { + sp_3072_from_mp(b, 27, base); + sp_3072_from_mp(e, 27, exp); + sp_3072_from_mp(m, 27, mod); + + err = sp_3072_mod_exp_27(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + XMEMSET(r + 27, 0, sizeof(*r) * 27U); + err = sp_3072_to_mp(r, res); + } + + +#ifdef WOLFSSL_SMALL_STACK + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 27U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } +#else + XMEMSET(e, 0, sizeof(sp_digit) * 27U); +#endif + + return err; +#endif +} + +#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */ + +#endif /* !WOLFSSL_SP_NO_3072 */ + +#ifdef WOLFSSL_SP_4096 +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((sp_digit)a[i]) << s); + if (s >= 45U) { + r[j] &= 0x1fffffffffffffL; + s = 53U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (sp_digit)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 53 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 53 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0x1fffffffffffffL; + s = 53U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 53U) <= (word32)DIGIT_BIT) { + s += 53U; + r[j] &= 0x1fffffffffffffL; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 53) { + r[j] &= 0x1fffffffffffffL; + if (j + 1 >= size) { + break; + } + s = 53 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 512 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_4096_to_bin(sp_digit* r, byte* a) +{ + int i, j, s = 0, b; + + for (i=0; i<77; i++) { + r[i+1] += r[i] >> 53; + r[i] &= 0x1fffffffffffffL; + } + j = 4096 / 8 - 1; + a[j] = 0; + for (i=0; i<78 && j>=0; i++) { + b = 0; + /* lint allow cast of mismatch sp_digit and int */ + a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ + b += 8 - s; + if (j < 0) { + break; + } + while (b < 53) { + a[j--] = (byte)(r[i] >> b); + b += 8; + if (j < 0) { + break; + } + } + s = 8 - (b - 53); + if (j >= 0) { + a[j] = 0; + } + if (s != 0) { + j++; + } + } +} + +#ifndef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_13(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int128_t t0 = ((int128_t)a[ 0]) * b[ 0]; + int128_t t1 = ((int128_t)a[ 0]) * b[ 1] + + ((int128_t)a[ 1]) * b[ 0]; + int128_t t2 = ((int128_t)a[ 0]) * b[ 2] + + ((int128_t)a[ 1]) * b[ 1] + + ((int128_t)a[ 2]) * b[ 0]; + int128_t t3 = ((int128_t)a[ 0]) * b[ 3] + + ((int128_t)a[ 1]) * b[ 2] + + ((int128_t)a[ 2]) * b[ 1] + + ((int128_t)a[ 3]) * b[ 0]; + int128_t t4 = ((int128_t)a[ 0]) * b[ 4] + + ((int128_t)a[ 1]) * b[ 3] + + ((int128_t)a[ 2]) * b[ 2] + + ((int128_t)a[ 3]) * b[ 1] + + ((int128_t)a[ 4]) * b[ 0]; + int128_t t5 = ((int128_t)a[ 0]) * b[ 5] + + ((int128_t)a[ 1]) * b[ 4] + + ((int128_t)a[ 2]) * b[ 3] + + ((int128_t)a[ 3]) * b[ 2] + + ((int128_t)a[ 4]) * b[ 1] + + ((int128_t)a[ 5]) * b[ 0]; + int128_t t6 = ((int128_t)a[ 0]) * b[ 6] + + ((int128_t)a[ 1]) * b[ 5] + + ((int128_t)a[ 2]) * b[ 4] + + ((int128_t)a[ 3]) * b[ 3] + + ((int128_t)a[ 4]) * b[ 2] + + ((int128_t)a[ 5]) * b[ 1] + + ((int128_t)a[ 6]) * b[ 0]; + int128_t t7 = ((int128_t)a[ 0]) * b[ 7] + + ((int128_t)a[ 1]) * b[ 6] + + ((int128_t)a[ 2]) * b[ 5] + + ((int128_t)a[ 3]) * b[ 4] + + ((int128_t)a[ 4]) * b[ 3] + + ((int128_t)a[ 5]) * b[ 2] + + ((int128_t)a[ 6]) * b[ 1] + + ((int128_t)a[ 7]) * b[ 0]; + int128_t t8 = ((int128_t)a[ 0]) * b[ 8] + + ((int128_t)a[ 1]) * b[ 7] + + ((int128_t)a[ 2]) * b[ 6] + + ((int128_t)a[ 3]) * b[ 5] + + ((int128_t)a[ 4]) * b[ 4] + + ((int128_t)a[ 5]) * b[ 3] + + ((int128_t)a[ 6]) * b[ 2] + + ((int128_t)a[ 7]) * b[ 1] + + ((int128_t)a[ 8]) * b[ 0]; + int128_t t9 = ((int128_t)a[ 0]) * b[ 9] + + ((int128_t)a[ 1]) * b[ 8] + + ((int128_t)a[ 2]) * b[ 7] + + ((int128_t)a[ 3]) * b[ 6] + + ((int128_t)a[ 4]) * b[ 5] + + ((int128_t)a[ 5]) * b[ 4] + + ((int128_t)a[ 6]) * b[ 3] + + ((int128_t)a[ 7]) * b[ 2] + + ((int128_t)a[ 8]) * b[ 1] + + ((int128_t)a[ 9]) * b[ 0]; + int128_t t10 = ((int128_t)a[ 0]) * b[10] + + ((int128_t)a[ 1]) * b[ 9] + + ((int128_t)a[ 2]) * b[ 8] + + ((int128_t)a[ 3]) * b[ 7] + + ((int128_t)a[ 4]) * b[ 6] + + ((int128_t)a[ 5]) * b[ 5] + + ((int128_t)a[ 6]) * b[ 4] + + ((int128_t)a[ 7]) * b[ 3] + + ((int128_t)a[ 8]) * b[ 2] + + ((int128_t)a[ 9]) * b[ 1] + + ((int128_t)a[10]) * b[ 0]; + int128_t t11 = ((int128_t)a[ 0]) * b[11] + + ((int128_t)a[ 1]) * b[10] + + ((int128_t)a[ 2]) * b[ 9] + + ((int128_t)a[ 3]) * b[ 8] + + ((int128_t)a[ 4]) * b[ 7] + + ((int128_t)a[ 5]) * b[ 6] + + ((int128_t)a[ 6]) * b[ 5] + + ((int128_t)a[ 7]) * b[ 4] + + ((int128_t)a[ 8]) * b[ 3] + + ((int128_t)a[ 9]) * b[ 2] + + ((int128_t)a[10]) * b[ 1] + + ((int128_t)a[11]) * b[ 0]; + int128_t t12 = ((int128_t)a[ 0]) * b[12] + + ((int128_t)a[ 1]) * b[11] + + ((int128_t)a[ 2]) * b[10] + + ((int128_t)a[ 3]) * b[ 9] + + ((int128_t)a[ 4]) * b[ 8] + + ((int128_t)a[ 5]) * b[ 7] + + ((int128_t)a[ 6]) * b[ 6] + + ((int128_t)a[ 7]) * b[ 5] + + ((int128_t)a[ 8]) * b[ 4] + + ((int128_t)a[ 9]) * b[ 3] + + ((int128_t)a[10]) * b[ 2] + + ((int128_t)a[11]) * b[ 1] + + ((int128_t)a[12]) * b[ 0]; + int128_t t13 = ((int128_t)a[ 1]) * b[12] + + ((int128_t)a[ 2]) * b[11] + + ((int128_t)a[ 3]) * b[10] + + ((int128_t)a[ 4]) * b[ 9] + + ((int128_t)a[ 5]) * b[ 8] + + ((int128_t)a[ 6]) * b[ 7] + + ((int128_t)a[ 7]) * b[ 6] + + ((int128_t)a[ 8]) * b[ 5] + + ((int128_t)a[ 9]) * b[ 4] + + ((int128_t)a[10]) * b[ 3] + + ((int128_t)a[11]) * b[ 2] + + ((int128_t)a[12]) * b[ 1]; + int128_t t14 = ((int128_t)a[ 2]) * b[12] + + ((int128_t)a[ 3]) * b[11] + + ((int128_t)a[ 4]) * b[10] + + ((int128_t)a[ 5]) * b[ 9] + + ((int128_t)a[ 6]) * b[ 8] + + ((int128_t)a[ 7]) * b[ 7] + + ((int128_t)a[ 8]) * b[ 6] + + ((int128_t)a[ 9]) * b[ 5] + + ((int128_t)a[10]) * b[ 4] + + ((int128_t)a[11]) * b[ 3] + + ((int128_t)a[12]) * b[ 2]; + int128_t t15 = ((int128_t)a[ 3]) * b[12] + + ((int128_t)a[ 4]) * b[11] + + ((int128_t)a[ 5]) * b[10] + + ((int128_t)a[ 6]) * b[ 9] + + ((int128_t)a[ 7]) * b[ 8] + + ((int128_t)a[ 8]) * b[ 7] + + ((int128_t)a[ 9]) * b[ 6] + + ((int128_t)a[10]) * b[ 5] + + ((int128_t)a[11]) * b[ 4] + + ((int128_t)a[12]) * b[ 3]; + int128_t t16 = ((int128_t)a[ 4]) * b[12] + + ((int128_t)a[ 5]) * b[11] + + ((int128_t)a[ 6]) * b[10] + + ((int128_t)a[ 7]) * b[ 9] + + ((int128_t)a[ 8]) * b[ 8] + + ((int128_t)a[ 9]) * b[ 7] + + ((int128_t)a[10]) * b[ 6] + + ((int128_t)a[11]) * b[ 5] + + ((int128_t)a[12]) * b[ 4]; + int128_t t17 = ((int128_t)a[ 5]) * b[12] + + ((int128_t)a[ 6]) * b[11] + + ((int128_t)a[ 7]) * b[10] + + ((int128_t)a[ 8]) * b[ 9] + + ((int128_t)a[ 9]) * b[ 8] + + ((int128_t)a[10]) * b[ 7] + + ((int128_t)a[11]) * b[ 6] + + ((int128_t)a[12]) * b[ 5]; + int128_t t18 = ((int128_t)a[ 6]) * b[12] + + ((int128_t)a[ 7]) * b[11] + + ((int128_t)a[ 8]) * b[10] + + ((int128_t)a[ 9]) * b[ 9] + + ((int128_t)a[10]) * b[ 8] + + ((int128_t)a[11]) * b[ 7] + + ((int128_t)a[12]) * b[ 6]; + int128_t t19 = ((int128_t)a[ 7]) * b[12] + + ((int128_t)a[ 8]) * b[11] + + ((int128_t)a[ 9]) * b[10] + + ((int128_t)a[10]) * b[ 9] + + ((int128_t)a[11]) * b[ 8] + + ((int128_t)a[12]) * b[ 7]; + int128_t t20 = ((int128_t)a[ 8]) * b[12] + + ((int128_t)a[ 9]) * b[11] + + ((int128_t)a[10]) * b[10] + + ((int128_t)a[11]) * b[ 9] + + ((int128_t)a[12]) * b[ 8]; + int128_t t21 = ((int128_t)a[ 9]) * b[12] + + ((int128_t)a[10]) * b[11] + + ((int128_t)a[11]) * b[10] + + ((int128_t)a[12]) * b[ 9]; + int128_t t22 = ((int128_t)a[10]) * b[12] + + ((int128_t)a[11]) * b[11] + + ((int128_t)a[12]) * b[10]; + int128_t t23 = ((int128_t)a[11]) * b[12] + + ((int128_t)a[12]) * b[11]; + int128_t t24 = ((int128_t)a[12]) * b[12]; + + t1 += t0 >> 53; r[ 0] = t0 & 0x1fffffffffffffL; + t2 += t1 >> 53; r[ 1] = t1 & 0x1fffffffffffffL; + t3 += t2 >> 53; r[ 2] = t2 & 0x1fffffffffffffL; + t4 += t3 >> 53; r[ 3] = t3 & 0x1fffffffffffffL; + t5 += t4 >> 53; r[ 4] = t4 & 0x1fffffffffffffL; + t6 += t5 >> 53; r[ 5] = t5 & 0x1fffffffffffffL; + t7 += t6 >> 53; r[ 6] = t6 & 0x1fffffffffffffL; + t8 += t7 >> 53; r[ 7] = t7 & 0x1fffffffffffffL; + t9 += t8 >> 53; r[ 8] = t8 & 0x1fffffffffffffL; + t10 += t9 >> 53; r[ 9] = t9 & 0x1fffffffffffffL; + t11 += t10 >> 53; r[10] = t10 & 0x1fffffffffffffL; + t12 += t11 >> 53; r[11] = t11 & 0x1fffffffffffffL; + t13 += t12 >> 53; r[12] = t12 & 0x1fffffffffffffL; + t14 += t13 >> 53; r[13] = t13 & 0x1fffffffffffffL; + t15 += t14 >> 53; r[14] = t14 & 0x1fffffffffffffL; + t16 += t15 >> 53; r[15] = t15 & 0x1fffffffffffffL; + t17 += t16 >> 53; r[16] = t16 & 0x1fffffffffffffL; + t18 += t17 >> 53; r[17] = t17 & 0x1fffffffffffffL; + t19 += t18 >> 53; r[18] = t18 & 0x1fffffffffffffL; + t20 += t19 >> 53; r[19] = t19 & 0x1fffffffffffffL; + t21 += t20 >> 53; r[20] = t20 & 0x1fffffffffffffL; + t22 += t21 >> 53; r[21] = t21 & 0x1fffffffffffffL; + t23 += t22 >> 53; r[22] = t22 & 0x1fffffffffffffL; + t24 += t23 >> 53; r[23] = t23 & 0x1fffffffffffffL; + r[25] = (sp_digit)(t24 >> 53); + r[24] = t24 & 0x1fffffffffffffL; +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_13(sp_digit* r, const sp_digit* a) +{ + int128_t t0 = ((int128_t)a[ 0]) * a[ 0]; + int128_t t1 = (((int128_t)a[ 0]) * a[ 1]) * 2; + int128_t t2 = (((int128_t)a[ 0]) * a[ 2]) * 2 + + ((int128_t)a[ 1]) * a[ 1]; + int128_t t3 = (((int128_t)a[ 0]) * a[ 3] + + ((int128_t)a[ 1]) * a[ 2]) * 2; + int128_t t4 = (((int128_t)a[ 0]) * a[ 4] + + ((int128_t)a[ 1]) * a[ 3]) * 2 + + ((int128_t)a[ 2]) * a[ 2]; + int128_t t5 = (((int128_t)a[ 0]) * a[ 5] + + ((int128_t)a[ 1]) * a[ 4] + + ((int128_t)a[ 2]) * a[ 3]) * 2; + int128_t t6 = (((int128_t)a[ 0]) * a[ 6] + + ((int128_t)a[ 1]) * a[ 5] + + ((int128_t)a[ 2]) * a[ 4]) * 2 + + ((int128_t)a[ 3]) * a[ 3]; + int128_t t7 = (((int128_t)a[ 0]) * a[ 7] + + ((int128_t)a[ 1]) * a[ 6] + + ((int128_t)a[ 2]) * a[ 5] + + ((int128_t)a[ 3]) * a[ 4]) * 2; + int128_t t8 = (((int128_t)a[ 0]) * a[ 8] + + ((int128_t)a[ 1]) * a[ 7] + + ((int128_t)a[ 2]) * a[ 6] + + ((int128_t)a[ 3]) * a[ 5]) * 2 + + ((int128_t)a[ 4]) * a[ 4]; + int128_t t9 = (((int128_t)a[ 0]) * a[ 9] + + ((int128_t)a[ 1]) * a[ 8] + + ((int128_t)a[ 2]) * a[ 7] + + ((int128_t)a[ 3]) * a[ 6] + + ((int128_t)a[ 4]) * a[ 5]) * 2; + int128_t t10 = (((int128_t)a[ 0]) * a[10] + + ((int128_t)a[ 1]) * a[ 9] + + ((int128_t)a[ 2]) * a[ 8] + + ((int128_t)a[ 3]) * a[ 7] + + ((int128_t)a[ 4]) * a[ 6]) * 2 + + ((int128_t)a[ 5]) * a[ 5]; + int128_t t11 = (((int128_t)a[ 0]) * a[11] + + ((int128_t)a[ 1]) * a[10] + + ((int128_t)a[ 2]) * a[ 9] + + ((int128_t)a[ 3]) * a[ 8] + + ((int128_t)a[ 4]) * a[ 7] + + ((int128_t)a[ 5]) * a[ 6]) * 2; + int128_t t12 = (((int128_t)a[ 0]) * a[12] + + ((int128_t)a[ 1]) * a[11] + + ((int128_t)a[ 2]) * a[10] + + ((int128_t)a[ 3]) * a[ 9] + + ((int128_t)a[ 4]) * a[ 8] + + ((int128_t)a[ 5]) * a[ 7]) * 2 + + ((int128_t)a[ 6]) * a[ 6]; + int128_t t13 = (((int128_t)a[ 1]) * a[12] + + ((int128_t)a[ 2]) * a[11] + + ((int128_t)a[ 3]) * a[10] + + ((int128_t)a[ 4]) * a[ 9] + + ((int128_t)a[ 5]) * a[ 8] + + ((int128_t)a[ 6]) * a[ 7]) * 2; + int128_t t14 = (((int128_t)a[ 2]) * a[12] + + ((int128_t)a[ 3]) * a[11] + + ((int128_t)a[ 4]) * a[10] + + ((int128_t)a[ 5]) * a[ 9] + + ((int128_t)a[ 6]) * a[ 8]) * 2 + + ((int128_t)a[ 7]) * a[ 7]; + int128_t t15 = (((int128_t)a[ 3]) * a[12] + + ((int128_t)a[ 4]) * a[11] + + ((int128_t)a[ 5]) * a[10] + + ((int128_t)a[ 6]) * a[ 9] + + ((int128_t)a[ 7]) * a[ 8]) * 2; + int128_t t16 = (((int128_t)a[ 4]) * a[12] + + ((int128_t)a[ 5]) * a[11] + + ((int128_t)a[ 6]) * a[10] + + ((int128_t)a[ 7]) * a[ 9]) * 2 + + ((int128_t)a[ 8]) * a[ 8]; + int128_t t17 = (((int128_t)a[ 5]) * a[12] + + ((int128_t)a[ 6]) * a[11] + + ((int128_t)a[ 7]) * a[10] + + ((int128_t)a[ 8]) * a[ 9]) * 2; + int128_t t18 = (((int128_t)a[ 6]) * a[12] + + ((int128_t)a[ 7]) * a[11] + + ((int128_t)a[ 8]) * a[10]) * 2 + + ((int128_t)a[ 9]) * a[ 9]; + int128_t t19 = (((int128_t)a[ 7]) * a[12] + + ((int128_t)a[ 8]) * a[11] + + ((int128_t)a[ 9]) * a[10]) * 2; + int128_t t20 = (((int128_t)a[ 8]) * a[12] + + ((int128_t)a[ 9]) * a[11]) * 2 + + ((int128_t)a[10]) * a[10]; + int128_t t21 = (((int128_t)a[ 9]) * a[12] + + ((int128_t)a[10]) * a[11]) * 2; + int128_t t22 = (((int128_t)a[10]) * a[12]) * 2 + + ((int128_t)a[11]) * a[11]; + int128_t t23 = (((int128_t)a[11]) * a[12]) * 2; + int128_t t24 = ((int128_t)a[12]) * a[12]; + + t1 += t0 >> 53; r[ 0] = t0 & 0x1fffffffffffffL; + t2 += t1 >> 53; r[ 1] = t1 & 0x1fffffffffffffL; + t3 += t2 >> 53; r[ 2] = t2 & 0x1fffffffffffffL; + t4 += t3 >> 53; r[ 3] = t3 & 0x1fffffffffffffL; + t5 += t4 >> 53; r[ 4] = t4 & 0x1fffffffffffffL; + t6 += t5 >> 53; r[ 5] = t5 & 0x1fffffffffffffL; + t7 += t6 >> 53; r[ 6] = t6 & 0x1fffffffffffffL; + t8 += t7 >> 53; r[ 7] = t7 & 0x1fffffffffffffL; + t9 += t8 >> 53; r[ 8] = t8 & 0x1fffffffffffffL; + t10 += t9 >> 53; r[ 9] = t9 & 0x1fffffffffffffL; + t11 += t10 >> 53; r[10] = t10 & 0x1fffffffffffffL; + t12 += t11 >> 53; r[11] = t11 & 0x1fffffffffffffL; + t13 += t12 >> 53; r[12] = t12 & 0x1fffffffffffffL; + t14 += t13 >> 53; r[13] = t13 & 0x1fffffffffffffL; + t15 += t14 >> 53; r[14] = t14 & 0x1fffffffffffffL; + t16 += t15 >> 53; r[15] = t15 & 0x1fffffffffffffL; + t17 += t16 >> 53; r[16] = t16 & 0x1fffffffffffffL; + t18 += t17 >> 53; r[17] = t17 & 0x1fffffffffffffL; + t19 += t18 >> 53; r[18] = t18 & 0x1fffffffffffffL; + t20 += t19 >> 53; r[19] = t19 & 0x1fffffffffffffL; + t21 += t20 >> 53; r[20] = t20 & 0x1fffffffffffffL; + t22 += t21 >> 53; r[21] = t21 & 0x1fffffffffffffL; + t23 += t22 >> 53; r[22] = t22 & 0x1fffffffffffffL; + t24 += t23 >> 53; r[23] = t23 & 0x1fffffffffffffL; + r[25] = (sp_digit)(t24 >> 53); + r[24] = t24 & 0x1fffffffffffffL; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_add_13(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + r[ 0] = a[ 0] + b[ 0]; + r[ 1] = a[ 1] + b[ 1]; + r[ 2] = a[ 2] + b[ 2]; + r[ 3] = a[ 3] + b[ 3]; + r[ 4] = a[ 4] + b[ 4]; + r[ 5] = a[ 5] + b[ 5]; + r[ 6] = a[ 6] + b[ 6]; + r[ 7] = a[ 7] + b[ 7]; + r[ 8] = a[ 8] + b[ 8]; + r[ 9] = a[ 9] + b[ 9]; + r[10] = a[10] + b[10]; + r[11] = a[11] + b[11]; + r[12] = a[12] + b[12]; + + return 0; +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_sub_26(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 24; i += 8) { + r[i + 0] = a[i + 0] - b[i + 0]; + r[i + 1] = a[i + 1] - b[i + 1]; + r[i + 2] = a[i + 2] - b[i + 2]; + r[i + 3] = a[i + 3] - b[i + 3]; + r[i + 4] = a[i + 4] - b[i + 4]; + r[i + 5] = a[i + 5] - b[i + 5]; + r[i + 6] = a[i + 6] - b[i + 6]; + r[i + 7] = a[i + 7] - b[i + 7]; + } + r[24] = a[24] - b[24]; + r[25] = a[25] - b[25]; + + return 0; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_add_26(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 24; i += 8) { + r[i + 0] = a[i + 0] + b[i + 0]; + r[i + 1] = a[i + 1] + b[i + 1]; + r[i + 2] = a[i + 2] + b[i + 2]; + r[i + 3] = a[i + 3] + b[i + 3]; + r[i + 4] = a[i + 4] + b[i + 4]; + r[i + 5] = a[i + 5] + b[i + 5]; + r[i + 6] = a[i + 6] + b[i + 6]; + r[i + 7] = a[i + 7] + b[i + 7]; + } + r[24] = a[24] + b[24]; + r[25] = a[25] + b[25]; + + return 0; +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_39(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit p0[26]; + sp_digit p1[26]; + sp_digit p2[26]; + sp_digit p3[26]; + sp_digit p4[26]; + sp_digit p5[26]; + sp_digit t0[26]; + sp_digit t1[26]; + sp_digit t2[26]; + sp_digit a0[13]; + sp_digit a1[13]; + sp_digit a2[13]; + sp_digit b0[13]; + sp_digit b1[13]; + sp_digit b2[13]; + (void)sp_4096_add_13(a0, a, &a[13]); + (void)sp_4096_add_13(b0, b, &b[13]); + (void)sp_4096_add_13(a1, &a[13], &a[26]); + (void)sp_4096_add_13(b1, &b[13], &b[26]); + (void)sp_4096_add_13(a2, a0, &a[26]); + (void)sp_4096_add_13(b2, b0, &b[26]); + sp_4096_mul_13(p0, a, b); + sp_4096_mul_13(p2, &a[13], &b[13]); + sp_4096_mul_13(p4, &a[26], &b[26]); + sp_4096_mul_13(p1, a0, b0); + sp_4096_mul_13(p3, a1, b1); + sp_4096_mul_13(p5, a2, b2); + XMEMSET(r, 0, sizeof(*r)*2U*39U); + (void)sp_4096_sub_26(t0, p3, p2); + (void)sp_4096_sub_26(t1, p1, p2); + (void)sp_4096_sub_26(t2, p5, t0); + (void)sp_4096_sub_26(t2, t2, t1); + (void)sp_4096_sub_26(t0, t0, p4); + (void)sp_4096_sub_26(t1, t1, p0); + (void)sp_4096_add_26(r, r, p0); + (void)sp_4096_add_26(&r[13], &r[13], t1); + (void)sp_4096_add_26(&r[26], &r[26], t2); + (void)sp_4096_add_26(&r[39], &r[39], t0); + (void)sp_4096_add_26(&r[52], &r[52], p4); +} + +/* Square a into r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_39(sp_digit* r, const sp_digit* a) +{ + sp_digit p0[26]; + sp_digit p1[26]; + sp_digit p2[26]; + sp_digit p3[26]; + sp_digit p4[26]; + sp_digit p5[26]; + sp_digit t0[26]; + sp_digit t1[26]; + sp_digit t2[26]; + sp_digit a0[13]; + sp_digit a1[13]; + sp_digit a2[13]; + (void)sp_4096_add_13(a0, a, &a[13]); + (void)sp_4096_add_13(a1, &a[13], &a[26]); + (void)sp_4096_add_13(a2, a0, &a[26]); + sp_4096_sqr_13(p0, a); + sp_4096_sqr_13(p2, &a[13]); + sp_4096_sqr_13(p4, &a[26]); + sp_4096_sqr_13(p1, a0); + sp_4096_sqr_13(p3, a1); + sp_4096_sqr_13(p5, a2); + XMEMSET(r, 0, sizeof(*r)*2U*39U); + (void)sp_4096_sub_26(t0, p3, p2); + (void)sp_4096_sub_26(t1, p1, p2); + (void)sp_4096_sub_26(t2, p5, t0); + (void)sp_4096_sub_26(t2, t2, t1); + (void)sp_4096_sub_26(t0, t0, p4); + (void)sp_4096_sub_26(t1, t1, p0); + (void)sp_4096_add_26(r, r, p0); + (void)sp_4096_add_26(&r[13], &r[13], t1); + (void)sp_4096_add_26(&r[26], &r[26], t2); + (void)sp_4096_add_26(&r[39], &r[39], t0); + (void)sp_4096_add_26(&r[52], &r[52], p4); +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_add_39(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 32; i += 8) { + r[i + 0] = a[i + 0] + b[i + 0]; + r[i + 1] = a[i + 1] + b[i + 1]; + r[i + 2] = a[i + 2] + b[i + 2]; + r[i + 3] = a[i + 3] + b[i + 3]; + r[i + 4] = a[i + 4] + b[i + 4]; + r[i + 5] = a[i + 5] + b[i + 5]; + r[i + 6] = a[i + 6] + b[i + 6]; + r[i + 7] = a[i + 7] + b[i + 7]; + } + r[32] = a[32] + b[32]; + r[33] = a[33] + b[33]; + r[34] = a[34] + b[34]; + r[35] = a[35] + b[35]; + r[36] = a[36] + b[36]; + r[37] = a[37] + b[37]; + r[38] = a[38] + b[38]; + + return 0; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_add_78(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 72; i += 8) { + r[i + 0] = a[i + 0] + b[i + 0]; + r[i + 1] = a[i + 1] + b[i + 1]; + r[i + 2] = a[i + 2] + b[i + 2]; + r[i + 3] = a[i + 3] + b[i + 3]; + r[i + 4] = a[i + 4] + b[i + 4]; + r[i + 5] = a[i + 5] + b[i + 5]; + r[i + 6] = a[i + 6] + b[i + 6]; + r[i + 7] = a[i + 7] + b[i + 7]; + } + r[72] = a[72] + b[72]; + r[73] = a[73] + b[73]; + r[74] = a[74] + b[74]; + r[75] = a[75] + b[75]; + r[76] = a[76] + b[76]; + r[77] = a[77] + b[77]; + + return 0; +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_sub_78(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 72; i += 8) { + r[i + 0] = a[i + 0] - b[i + 0]; + r[i + 1] = a[i + 1] - b[i + 1]; + r[i + 2] = a[i + 2] - b[i + 2]; + r[i + 3] = a[i + 3] - b[i + 3]; + r[i + 4] = a[i + 4] - b[i + 4]; + r[i + 5] = a[i + 5] - b[i + 5]; + r[i + 6] = a[i + 6] - b[i + 6]; + r[i + 7] = a[i + 7] - b[i + 7]; + } + r[72] = a[72] - b[72]; + r[73] = a[73] - b[73]; + r[74] = a[74] - b[74]; + r[75] = a[75] - b[75]; + r[76] = a[76] - b[76]; + r[77] = a[77] - b[77]; + + return 0; +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_78(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[78]; + sp_digit* a1 = z1; + sp_digit b1[39]; + sp_digit* z2 = r + 78; + (void)sp_4096_add_39(a1, a, &a[39]); + (void)sp_4096_add_39(b1, b, &b[39]); + sp_4096_mul_39(z2, &a[39], &b[39]); + sp_4096_mul_39(z0, a, b); + sp_4096_mul_39(z1, a1, b1); + (void)sp_4096_sub_78(z1, z1, z2); + (void)sp_4096_sub_78(z1, z1, z0); + (void)sp_4096_add_78(r + 39, r + 39, z1); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_78(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z1[78]; + sp_digit* a1 = z1; + sp_digit* z2 = r + 78; + (void)sp_4096_add_39(a1, a, &a[39]); + sp_4096_sqr_39(z2, &a[39]); + sp_4096_sqr_39(z0, a); + sp_4096_sqr_39(z1, a1); + (void)sp_4096_sub_78(z1, z1, z2); + (void)sp_4096_sub_78(z1, z1, z0); + (void)sp_4096_add_78(r + 39, r + 39, z1); +} + +#endif /* !WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_add_78(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 78; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_sub_78(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 78; i++) { + r[i] = a[i] - b[i]; + } + + return 0; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_78(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i, j, k; + int128_t c; + + c = ((int128_t)a[77]) * b[77]; + r[155] = (sp_digit)(c >> 53); + c = (c & 0x1fffffffffffffL) << 53; + for (k = 153; k >= 0; k--) { + for (i = 77; i >= 0; i--) { + j = k - i; + if (j >= 78) { + break; + } + if (j < 0) { + continue; + } + + c += ((int128_t)a[i]) * b[j]; + } + r[k + 2] += c >> 106; + r[k + 1] = (c >> 53) & 0x1fffffffffffffL; + c = (c & 0x1fffffffffffffL) << 53; + } + r[0] = (sp_digit)(c >> 53); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_78(sp_digit* r, const sp_digit* a) +{ + int i, j, k; + int128_t c; + + c = ((int128_t)a[77]) * a[77]; + r[155] = (sp_digit)(c >> 53); + c = (c & 0x1fffffffffffffL) << 53; + for (k = 153; k >= 0; k--) { + for (i = 77; i >= 0; i--) { + j = k - i; + if (j >= 78 || i <= j) { + break; + } + if (j < 0) { + continue; + } + + c += ((int128_t)a[i]) * a[j] * 2; + } + if (i == j) { + c += ((int128_t)a[i]) * a[i]; + } + + r[k + 2] += c >> 106; + r[k + 1] = (c >> 53) & 0x1fffffffffffffL; + c = (c & 0x1fffffffffffffL) << 53; + } + r[0] = (sp_digit)(c >> 53); +} + +#endif /* WOLFSSL_SP_SMALL */ +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +#if defined(WOLFSSL_HAVE_SP_RSA) && !defined(SP_RSA_PRIVATE_EXP_D) +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_add_39(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 39; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_sub_39(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 39; i++) { + r[i] = a[i] - b[i]; + } + + return 0; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_sub_39(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 32; i += 8) { + r[i + 0] = a[i + 0] - b[i + 0]; + r[i + 1] = a[i + 1] - b[i + 1]; + r[i + 2] = a[i + 2] - b[i + 2]; + r[i + 3] = a[i + 3] - b[i + 3]; + r[i + 4] = a[i + 4] - b[i + 4]; + r[i + 5] = a[i + 5] - b[i + 5]; + r[i + 6] = a[i + 6] - b[i + 6]; + r[i + 7] = a[i + 7] - b[i + 7]; + } + r[32] = a[32] - b[32]; + r[33] = a[33] - b[33]; + r[34] = a[34] - b[34]; + r[35] = a[35] - b[35]; + r[36] = a[36] - b[36]; + r[37] = a[37] - b[37]; + r[38] = a[38] - b[38]; + + return 0; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_39(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i, j, k; + int128_t c; + + c = ((int128_t)a[38]) * b[38]; + r[77] = (sp_digit)(c >> 53); + c = (c & 0x1fffffffffffffL) << 53; + for (k = 75; k >= 0; k--) { + for (i = 38; i >= 0; i--) { + j = k - i; + if (j >= 39) { + break; + } + if (j < 0) { + continue; + } + + c += ((int128_t)a[i]) * b[j]; + } + r[k + 2] += c >> 106; + r[k + 1] = (c >> 53) & 0x1fffffffffffffL; + c = (c & 0x1fffffffffffffL) << 53; + } + r[0] = (sp_digit)(c >> 53); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_39(sp_digit* r, const sp_digit* a) +{ + int i, j, k; + int128_t c; + + c = ((int128_t)a[38]) * a[38]; + r[77] = (sp_digit)(c >> 53); + c = (c & 0x1fffffffffffffL) << 53; + for (k = 75; k >= 0; k--) { + for (i = 38; i >= 0; i--) { + j = k - i; + if (j >= 39 || i <= j) { + break; + } + if (j < 0) { + continue; + } + + c += ((int128_t)a[i]) * a[j] * 2; + } + if (i == j) { + c += ((int128_t)a[i]) * a[i]; + } + + r[k + 2] += c >> 106; + r[k + 1] = (c >> 53) & 0x1fffffffffffffL; + c = (c & 0x1fffffffffffffL) << 53; + } + r[0] = (sp_digit)(c >> 53); +} + +#endif /* WOLFSSL_SP_SMALL */ +#endif /* WOLFSSL_HAVE_SP_RSA && !SP_RSA_PRIVATE_EXP_D */ +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +/* Caclulate the bottom digit of -1/a mod 2^n. + * + * a A single precision number. + * rho Bottom word of inverse. + */ +static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho) +{ + sp_digit x, b; + + b = a[0]; + x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - b * x; /* here x*a==1 mod 2**8 */ + x *= 2 - b * x; /* here x*a==1 mod 2**16 */ + x *= 2 - b * x; /* here x*a==1 mod 2**32 */ + x *= 2 - b * x; /* here x*a==1 mod 2**64 */ + x &= 0x1fffffffffffffL; + + /* rho = -1/m mod b */ + *rho = (1L << 53) - x; +} + +/* Multiply a by scalar b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_4096_mul_d_78(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int128_t tb = b; + int128_t t = 0; + int i; + + for (i = 0; i < 78; i++) { + t += tb * a[i]; + r[i] = t & 0x1fffffffffffffL; + t >>= 53; + } + r[78] = (sp_digit)t; +#else + int128_t tb = b; + int128_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] = t[0] & 0x1fffffffffffffL; + for (i = 0; i < 72; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] = (sp_digit)(t[0] >> 53) + (t[1] & 0x1fffffffffffffL); + t[2] = tb * a[i+2]; + r[i+2] = (sp_digit)(t[1] >> 53) + (t[2] & 0x1fffffffffffffL); + t[3] = tb * a[i+3]; + r[i+3] = (sp_digit)(t[2] >> 53) + (t[3] & 0x1fffffffffffffL); + t[4] = tb * a[i+4]; + r[i+4] = (sp_digit)(t[3] >> 53) + (t[4] & 0x1fffffffffffffL); + t[5] = tb * a[i+5]; + r[i+5] = (sp_digit)(t[4] >> 53) + (t[5] & 0x1fffffffffffffL); + t[6] = tb * a[i+6]; + r[i+6] = (sp_digit)(t[5] >> 53) + (t[6] & 0x1fffffffffffffL); + t[7] = tb * a[i+7]; + r[i+7] = (sp_digit)(t[6] >> 53) + (t[7] & 0x1fffffffffffffL); + t[0] = tb * a[i+8]; + r[i+8] = (sp_digit)(t[7] >> 53) + (t[0] & 0x1fffffffffffffL); + } + t[1] = tb * a[73]; + r[73] = (sp_digit)(t[0] >> 53) + (t[1] & 0x1fffffffffffffL); + t[2] = tb * a[74]; + r[74] = (sp_digit)(t[1] >> 53) + (t[2] & 0x1fffffffffffffL); + t[3] = tb * a[75]; + r[75] = (sp_digit)(t[2] >> 53) + (t[3] & 0x1fffffffffffffL); + t[4] = tb * a[76]; + r[76] = (sp_digit)(t[3] >> 53) + (t[4] & 0x1fffffffffffffL); + t[5] = tb * a[77]; + r[77] = (sp_digit)(t[4] >> 53) + (t[5] & 0x1fffffffffffffL); + r[78] = (sp_digit)(t[5] >> 53); +#endif /* WOLFSSL_SP_SMALL */ +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +#if defined(WOLFSSL_HAVE_SP_RSA) && !defined(SP_RSA_PRIVATE_EXP_D) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 4096 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_4096_mont_norm_39(sp_digit* r, const sp_digit* m) +{ + /* Set r = 2^n - 1. */ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<38; i++) { + r[i] = 0x1fffffffffffffL; + } +#else + int i; + + for (i = 0; i < 32; i += 8) { + r[i + 0] = 0x1fffffffffffffL; + r[i + 1] = 0x1fffffffffffffL; + r[i + 2] = 0x1fffffffffffffL; + r[i + 3] = 0x1fffffffffffffL; + r[i + 4] = 0x1fffffffffffffL; + r[i + 5] = 0x1fffffffffffffL; + r[i + 6] = 0x1fffffffffffffL; + r[i + 7] = 0x1fffffffffffffL; + } + r[32] = 0x1fffffffffffffL; + r[33] = 0x1fffffffffffffL; + r[34] = 0x1fffffffffffffL; + r[35] = 0x1fffffffffffffL; + r[36] = 0x1fffffffffffffL; + r[37] = 0x1fffffffffffffL; +#endif + r[38] = 0x3ffffffffL; + + /* r = (2^n - 1) mod n */ + (void)sp_4096_sub_39(r, r, m); + + /* Add one so r = 2^n mod m */ + r[0] += 1; +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static sp_digit sp_4096_cmp_39(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=38; i>=0; i--) { + r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#else + int i; + + r |= (a[38] - b[38]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[37] - b[37]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[36] - b[36]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[35] - b[35]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[34] - b[34]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[33] - b[33]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[32] - b[32]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + for (i = 24; i >= 0; i -= 8) { + r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#endif /* WOLFSSL_SP_SMALL */ + + return r; +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static void sp_4096_cond_sub_39(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 39; i++) { + r[i] = a[i] - (b[i] & m); + } +#else + int i; + + for (i = 0; i < 32; i += 8) { + r[i + 0] = a[i + 0] - (b[i + 0] & m); + r[i + 1] = a[i + 1] - (b[i + 1] & m); + r[i + 2] = a[i + 2] - (b[i + 2] & m); + r[i + 3] = a[i + 3] - (b[i + 3] & m); + r[i + 4] = a[i + 4] - (b[i + 4] & m); + r[i + 5] = a[i + 5] - (b[i + 5] & m); + r[i + 6] = a[i + 6] - (b[i + 6] & m); + r[i + 7] = a[i + 7] - (b[i + 7] & m); + } + r[32] = a[32] - (b[32] & m); + r[33] = a[33] - (b[33] & m); + r[34] = a[34] - (b[34] & m); + r[35] = a[35] - (b[35] & m); + r[36] = a[36] - (b[36] & m); + r[37] = a[37] - (b[37] & m); + r[38] = a[38] - (b[38] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Mul a by scalar b and add into r. (r += a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_4096_mul_add_39(sp_digit* r, const sp_digit* a, + const sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int128_t tb = b; + int128_t t = 0; + int i; + + for (i = 0; i < 39; i++) { + t += (tb * a[i]) + r[i]; + r[i] = t & 0x1fffffffffffffL; + t >>= 53; + } + r[39] += t; +#else + int128_t tb = b; + int128_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1fffffffffffffL); + for (i = 0; i < 32; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] += (sp_digit)((t[0] >> 53) + (t[1] & 0x1fffffffffffffL)); + t[2] = tb * a[i+2]; + r[i+2] += (sp_digit)((t[1] >> 53) + (t[2] & 0x1fffffffffffffL)); + t[3] = tb * a[i+3]; + r[i+3] += (sp_digit)((t[2] >> 53) + (t[3] & 0x1fffffffffffffL)); + t[4] = tb * a[i+4]; + r[i+4] += (sp_digit)((t[3] >> 53) + (t[4] & 0x1fffffffffffffL)); + t[5] = tb * a[i+5]; + r[i+5] += (sp_digit)((t[4] >> 53) + (t[5] & 0x1fffffffffffffL)); + t[6] = tb * a[i+6]; + r[i+6] += (sp_digit)((t[5] >> 53) + (t[6] & 0x1fffffffffffffL)); + t[7] = tb * a[i+7]; + r[i+7] += (sp_digit)((t[6] >> 53) + (t[7] & 0x1fffffffffffffL)); + t[0] = tb * a[i+8]; + r[i+8] += (sp_digit)((t[7] >> 53) + (t[0] & 0x1fffffffffffffL)); + } + t[1] = tb * a[33]; r[33] += (sp_digit)((t[0] >> 53) + (t[1] & 0x1fffffffffffffL)); + t[2] = tb * a[34]; r[34] += (sp_digit)((t[1] >> 53) + (t[2] & 0x1fffffffffffffL)); + t[3] = tb * a[35]; r[35] += (sp_digit)((t[2] >> 53) + (t[3] & 0x1fffffffffffffL)); + t[4] = tb * a[36]; r[36] += (sp_digit)((t[3] >> 53) + (t[4] & 0x1fffffffffffffL)); + t[5] = tb * a[37]; r[37] += (sp_digit)((t[4] >> 53) + (t[5] & 0x1fffffffffffffL)); + t[6] = tb * a[38]; r[38] += (sp_digit)((t[5] >> 53) + (t[6] & 0x1fffffffffffffL)); + r[39] += (sp_digit)(t[6] >> 53); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Normalize the values in each word to 53. + * + * a Array of sp_digit to normalize. + */ +static void sp_4096_norm_39(sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + for (i = 0; i < 38; i++) { + a[i+1] += a[i] >> 53; + a[i] &= 0x1fffffffffffffL; + } +#else + int i; + for (i = 0; i < 32; i += 8) { + a[i+1] += a[i+0] >> 53; a[i+0] &= 0x1fffffffffffffL; + a[i+2] += a[i+1] >> 53; a[i+1] &= 0x1fffffffffffffL; + a[i+3] += a[i+2] >> 53; a[i+2] &= 0x1fffffffffffffL; + a[i+4] += a[i+3] >> 53; a[i+3] &= 0x1fffffffffffffL; + a[i+5] += a[i+4] >> 53; a[i+4] &= 0x1fffffffffffffL; + a[i+6] += a[i+5] >> 53; a[i+5] &= 0x1fffffffffffffL; + a[i+7] += a[i+6] >> 53; a[i+6] &= 0x1fffffffffffffL; + a[i+8] += a[i+7] >> 53; a[i+7] &= 0x1fffffffffffffL; + a[i+9] += a[i+8] >> 53; a[i+8] &= 0x1fffffffffffffL; + } + a[32+1] += a[32] >> 53; + a[32] &= 0x1fffffffffffffL; + a[33+1] += a[33] >> 53; + a[33] &= 0x1fffffffffffffL; + a[34+1] += a[34] >> 53; + a[34] &= 0x1fffffffffffffL; + a[35+1] += a[35] >> 53; + a[35] &= 0x1fffffffffffffL; + a[36+1] += a[36] >> 53; + a[36] &= 0x1fffffffffffffL; + a[37+1] += a[37] >> 53; + a[37] &= 0x1fffffffffffffL; +#endif +} + +/* Shift the result in the high 2048 bits down to the bottom. + * + * r A single precision number. + * a A single precision number. + */ +static void sp_4096_mont_shift_39(sp_digit* r, const sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + int128_t n = a[38] >> 34; + n += ((int128_t)a[39]) << 19; + + for (i = 0; i < 38; i++) { + r[i] = n & 0x1fffffffffffffL; + n >>= 53; + n += ((int128_t)a[40 + i]) << 19; + } + r[38] = (sp_digit)n; +#else + int i; + int128_t n = a[38] >> 34; + n += ((int128_t)a[39]) << 19; + for (i = 0; i < 32; i += 8) { + r[i + 0] = n & 0x1fffffffffffffL; + n >>= 53; n += ((int128_t)a[i + 40]) << 19; + r[i + 1] = n & 0x1fffffffffffffL; + n >>= 53; n += ((int128_t)a[i + 41]) << 19; + r[i + 2] = n & 0x1fffffffffffffL; + n >>= 53; n += ((int128_t)a[i + 42]) << 19; + r[i + 3] = n & 0x1fffffffffffffL; + n >>= 53; n += ((int128_t)a[i + 43]) << 19; + r[i + 4] = n & 0x1fffffffffffffL; + n >>= 53; n += ((int128_t)a[i + 44]) << 19; + r[i + 5] = n & 0x1fffffffffffffL; + n >>= 53; n += ((int128_t)a[i + 45]) << 19; + r[i + 6] = n & 0x1fffffffffffffL; + n >>= 53; n += ((int128_t)a[i + 46]) << 19; + r[i + 7] = n & 0x1fffffffffffffL; + n >>= 53; n += ((int128_t)a[i + 47]) << 19; + } + r[32] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[72]) << 19; + r[33] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[73]) << 19; + r[34] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[74]) << 19; + r[35] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[75]) << 19; + r[36] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[76]) << 19; + r[37] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[77]) << 19; + r[38] = (sp_digit)n; +#endif /* WOLFSSL_SP_SMALL */ + XMEMSET(&r[39], 0, sizeof(*r) * 39U); +} + +/* Reduce the number back to 4096 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static void sp_4096_mont_reduce_39(sp_digit* a, const sp_digit* m, sp_digit mp) +{ + int i; + sp_digit mu; + + sp_4096_norm_39(a + 39); + + for (i=0; i<38; i++) { + mu = (a[i] * mp) & 0x1fffffffffffffL; + sp_4096_mul_add_39(a+i, m, mu); + a[i+1] += a[i] >> 53; + } + mu = (a[i] * mp) & 0x3ffffffffL; + sp_4096_mul_add_39(a+i, m, mu); + a[i+1] += a[i] >> 53; + a[i] &= 0x1fffffffffffffL; + + sp_4096_mont_shift_39(a, a); + sp_4096_cond_sub_39(a, a, m, 0 - (((a[38] >> 34) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_4096_norm_39(a); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_mul_39(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_4096_mul_39(r, a, b); + sp_4096_mont_reduce_39(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_sqr_39(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_4096_sqr_39(r, a); + sp_4096_mont_reduce_39(r, m, mp); +} + +/* Multiply a by scalar b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_4096_mul_d_39(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int128_t tb = b; + int128_t t = 0; + int i; + + for (i = 0; i < 39; i++) { + t += tb * a[i]; + r[i] = t & 0x1fffffffffffffL; + t >>= 53; + } + r[39] = (sp_digit)t; +#else + int128_t tb = b; + int128_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] = t[0] & 0x1fffffffffffffL; + for (i = 0; i < 32; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] = (sp_digit)(t[0] >> 53) + (t[1] & 0x1fffffffffffffL); + t[2] = tb * a[i+2]; + r[i+2] = (sp_digit)(t[1] >> 53) + (t[2] & 0x1fffffffffffffL); + t[3] = tb * a[i+3]; + r[i+3] = (sp_digit)(t[2] >> 53) + (t[3] & 0x1fffffffffffffL); + t[4] = tb * a[i+4]; + r[i+4] = (sp_digit)(t[3] >> 53) + (t[4] & 0x1fffffffffffffL); + t[5] = tb * a[i+5]; + r[i+5] = (sp_digit)(t[4] >> 53) + (t[5] & 0x1fffffffffffffL); + t[6] = tb * a[i+6]; + r[i+6] = (sp_digit)(t[5] >> 53) + (t[6] & 0x1fffffffffffffL); + t[7] = tb * a[i+7]; + r[i+7] = (sp_digit)(t[6] >> 53) + (t[7] & 0x1fffffffffffffL); + t[0] = tb * a[i+8]; + r[i+8] = (sp_digit)(t[7] >> 53) + (t[0] & 0x1fffffffffffffL); + } + t[1] = tb * a[33]; + r[33] = (sp_digit)(t[0] >> 53) + (t[1] & 0x1fffffffffffffL); + t[2] = tb * a[34]; + r[34] = (sp_digit)(t[1] >> 53) + (t[2] & 0x1fffffffffffffL); + t[3] = tb * a[35]; + r[35] = (sp_digit)(t[2] >> 53) + (t[3] & 0x1fffffffffffffL); + t[4] = tb * a[36]; + r[36] = (sp_digit)(t[3] >> 53) + (t[4] & 0x1fffffffffffffL); + t[5] = tb * a[37]; + r[37] = (sp_digit)(t[4] >> 53) + (t[5] & 0x1fffffffffffffL); + t[6] = tb * a[38]; + r[38] = (sp_digit)(t[5] >> 53) + (t[6] & 0x1fffffffffffffL); + r[39] = (sp_digit)(t[6] >> 53); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static void sp_4096_cond_add_39(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 39; i++) { + r[i] = a[i] + (b[i] & m); + } +#else + int i; + + for (i = 0; i < 32; i += 8) { + r[i + 0] = a[i + 0] + (b[i + 0] & m); + r[i + 1] = a[i + 1] + (b[i + 1] & m); + r[i + 2] = a[i + 2] + (b[i + 2] & m); + r[i + 3] = a[i + 3] + (b[i + 3] & m); + r[i + 4] = a[i + 4] + (b[i + 4] & m); + r[i + 5] = a[i + 5] + (b[i + 5] & m); + r[i + 6] = a[i + 6] + (b[i + 6] & m); + r[i + 7] = a[i + 7] + (b[i + 7] & m); + } + r[32] = a[32] + (b[32] & m); + r[33] = a[33] + (b[33] & m); + r[34] = a[34] + (b[34] & m); + r[35] = a[35] + (b[35] & m); + r[36] = a[36] + (b[36] & m); + r[37] = a[37] + (b[37] & m); + r[38] = a[38] + (b[38] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +#ifdef WOLFSSL_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_add_39(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 39; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#endif +SP_NOINLINE static void sp_4096_rshift_39(sp_digit* r, sp_digit* a, byte n) +{ + int i; + +#ifdef WOLFSSL_SP_SMALL + for (i=0; i<38; i++) { + r[i] = ((a[i] >> n) | (a[i + 1] << (53 - n))) & 0x1fffffffffffffL; + } +#else + for (i=0; i<32; i += 8) { + r[i+0] = ((a[i+0] >> n) | (a[i+1] << (53 - n))) & 0x1fffffffffffffL; + r[i+1] = ((a[i+1] >> n) | (a[i+2] << (53 - n))) & 0x1fffffffffffffL; + r[i+2] = ((a[i+2] >> n) | (a[i+3] << (53 - n))) & 0x1fffffffffffffL; + r[i+3] = ((a[i+3] >> n) | (a[i+4] << (53 - n))) & 0x1fffffffffffffL; + r[i+4] = ((a[i+4] >> n) | (a[i+5] << (53 - n))) & 0x1fffffffffffffL; + r[i+5] = ((a[i+5] >> n) | (a[i+6] << (53 - n))) & 0x1fffffffffffffL; + r[i+6] = ((a[i+6] >> n) | (a[i+7] << (53 - n))) & 0x1fffffffffffffL; + r[i+7] = ((a[i+7] >> n) | (a[i+8] << (53 - n))) & 0x1fffffffffffffL; + } + r[32] = ((a[32] >> n) | (a[33] << (53 - n))) & 0x1fffffffffffffL; + r[33] = ((a[33] >> n) | (a[34] << (53 - n))) & 0x1fffffffffffffL; + r[34] = ((a[34] >> n) | (a[35] << (53 - n))) & 0x1fffffffffffffL; + r[35] = ((a[35] >> n) | (a[36] << (53 - n))) & 0x1fffffffffffffL; + r[36] = ((a[36] >> n) | (a[37] << (53 - n))) & 0x1fffffffffffffL; + r[37] = ((a[37] >> n) | (a[38] << (53 - n))) & 0x1fffffffffffffL; +#endif + r[38] = a[38] >> n; +} + +#ifdef WOLFSSL_SP_DIV_64 +static WC_INLINE sp_digit sp_4096_div_word_39(sp_digit d1, sp_digit d0, + sp_digit dv) +{ + sp_digit d, r, t; + + /* All 53 bits from d1 and top 10 bits from d0. */ + d = (d1 << 10) | (d0 >> 43); + r = d / dv; + d -= r * dv; + /* Up to 11 bits in r */ + /* Next 10 bits from d0. */ + r <<= 10; + d <<= 10; + d |= (d0 >> 33) & ((1 << 10) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 21 bits in r */ + /* Next 10 bits from d0. */ + r <<= 10; + d <<= 10; + d |= (d0 >> 23) & ((1 << 10) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 31 bits in r */ + /* Next 10 bits from d0. */ + r <<= 10; + d <<= 10; + d |= (d0 >> 13) & ((1 << 10) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 41 bits in r */ + /* Next 10 bits from d0. */ + r <<= 10; + d <<= 10; + d |= (d0 >> 3) & ((1 << 10) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 51 bits in r */ + /* Remaining 3 bits from d0. */ + r <<= 3; + d <<= 3; + d |= d0 & ((1 << 3) - 1); + t = d / dv; + r += t; + + return r; +} +#endif /* WOLFSSL_SP_DIV_64 */ + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_4096_div_39(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + int i; +#ifndef WOLFSSL_SP_DIV_64 + int128_t d1; +#endif + sp_digit dv, r1; +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* td; +#else + sp_digit t1d[78 + 1], t2d[39 + 1], sdd[39 + 1]; +#endif + sp_digit* t1; + sp_digit* t2; + sp_digit* sd; + int err = MP_OKAY; + + (void)m; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 39 + 3), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + (void)m; + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = td; + t2 = td + 78 + 1; + sd = t2 + 39 + 1; +#else + t1 = t1d; + t2 = t2d; + sd = sdd; +#endif + + sp_4096_mul_d_39(sd, d, 1L << 19); + sp_4096_mul_d_78(t1, a, 1L << 19); + dv = sd[38]; + for (i=39; i>=0; i--) { + t1[39 + i] += t1[39 + i - 1] >> 53; + t1[39 + i - 1] &= 0x1fffffffffffffL; +#ifndef WOLFSSL_SP_DIV_64 + d1 = t1[39 + i]; + d1 <<= 53; + d1 += t1[39 + i - 1]; + r1 = (sp_digit)(d1 / dv); +#else + r1 = sp_4096_div_word_39(t1[39 + i], t1[39 + i - 1], dv); +#endif + + sp_4096_mul_d_39(t2, sd, r1); + (void)sp_4096_sub_39(&t1[i], &t1[i], t2); + t1[39 + i] -= t2[39]; + t1[39 + i] += t1[39 + i - 1] >> 53; + t1[39 + i - 1] &= 0x1fffffffffffffL; + r1 = (((-t1[39 + i]) << 53) - t1[39 + i - 1]) / dv; + r1 -= t1[39 + i]; + sp_4096_mul_d_39(t2, sd, r1); + (void)sp_4096_add_39(&t1[i], &t1[i], t2); + t1[39 + i] += t1[39 + i - 1] >> 53; + t1[39 + i - 1] &= 0x1fffffffffffffL; + } + t1[39 - 1] += t1[39 - 2] >> 53; + t1[39 - 2] &= 0x1fffffffffffffL; + r1 = t1[39 - 1] / dv; + + sp_4096_mul_d_39(t2, sd, r1); + sp_4096_sub_39(t1, t1, t2); + XMEMCPY(r, t1, sizeof(*r) * 2U * 39U); + for (i=0; i<37; i++) { + r[i+1] += r[i] >> 53; + r[i] &= 0x1fffffffffffffL; + } + sp_4096_cond_add_39(r, r, sd, 0 - ((r[38] < 0) ? + (sp_digit)1 : (sp_digit)0)); + + sp_4096_norm_39(r); + sp_4096_rshift_39(r, r, 19); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_4096_mod_39(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_4096_div_39(a, m, NULL, r); +} + +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_39(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, + const sp_digit* m, int reduceA) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* td; + sp_digit* t[3]; + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 39 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } + + if (err == MP_OKAY) { + XMEMSET(td, 0, sizeof(*td) * 3U * 39U * 2U); + + norm = t[0] = td; + t[1] = &td[39 * 2]; + t[2] = &td[2 * 39 * 2]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_39(norm, m); + + if (reduceA != 0) { + err = sp_4096_mod_39(t[1], a, m); + } + else { + XMEMCPY(t[1], a, sizeof(sp_digit) * 39U); + } + } + if (err == MP_OKAY) { + sp_4096_mul_39(t[1], t[1], norm); + err = sp_4096_mod_39(t[1], t[1], m); + } + + if (err == MP_OKAY) { + i = bits / 53; + c = bits % 53; + n = e[i--] << (53 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) { + break; + } + + n = e[i--]; + c = 53; + } + + y = (n >> 52) & 1; + n <<= 1; + + sp_4096_mont_mul_39(t[y^1], t[0], t[1], m, mp); + + XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), + sizeof(*t[2]) * 39 * 2); + sp_4096_mont_sqr_39(t[2], t[2], m, mp); + XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), t[2], + sizeof(*t[2]) * 39 * 2); + } + + sp_4096_mont_reduce_39(t[0], m, mp); + n = sp_4096_cmp_39(t[0], m); + sp_4096_cond_sub_39(t[0], t[0], m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, t[0], sizeof(*r) * 39 * 2); + + } + + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } + + return err; +#elif defined(WOLFSSL_SP_CACHE_RESISTANT) +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[3][78]; +#else + sp_digit* td; + sp_digit* t[3]; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 39 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + t[0] = td; + t[1] = &td[39 * 2]; + t[2] = &td[2 * 39 * 2]; +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_39(norm, m); + + if (reduceA != 0) { + err = sp_4096_mod_39(t[1], a, m); + if (err == MP_OKAY) { + sp_4096_mul_39(t[1], t[1], norm); + err = sp_4096_mod_39(t[1], t[1], m); + } + } + else { + sp_4096_mul_39(t[1], a, norm); + err = sp_4096_mod_39(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + i = bits / 53; + c = bits % 53; + n = e[i--] << (53 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) { + break; + } + + n = e[i--]; + c = 53; + } + + y = (n >> 52) & 1; + n <<= 1; + + sp_4096_mont_mul_39(t[y^1], t[0], t[1], m, mp); + + XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), sizeof(t[2])); + sp_4096_mont_sqr_39(t[2], t[2], m, mp); + XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2])); + } + + sp_4096_mont_reduce_39(t[0], m, mp); + n = sp_4096_cmp_39(t[0], m); + sp_4096_cond_sub_39(t[0], t[0], m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, t[0], sizeof(t[0])); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][78]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit rt[78]; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 78, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) + t[i] = td + i * 78; +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_39(norm, m); + + if (reduceA != 0) { + err = sp_4096_mod_39(t[1], a, m); + if (err == MP_OKAY) { + sp_4096_mul_39(t[1], t[1], norm); + err = sp_4096_mod_39(t[1], t[1], m); + } + } + else { + sp_4096_mul_39(t[1], a, norm); + err = sp_4096_mod_39(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_4096_mont_sqr_39(t[ 2], t[ 1], m, mp); + sp_4096_mont_mul_39(t[ 3], t[ 2], t[ 1], m, mp); + sp_4096_mont_sqr_39(t[ 4], t[ 2], m, mp); + sp_4096_mont_mul_39(t[ 5], t[ 3], t[ 2], m, mp); + sp_4096_mont_sqr_39(t[ 6], t[ 3], m, mp); + sp_4096_mont_mul_39(t[ 7], t[ 4], t[ 3], m, mp); + sp_4096_mont_sqr_39(t[ 8], t[ 4], m, mp); + sp_4096_mont_mul_39(t[ 9], t[ 5], t[ 4], m, mp); + sp_4096_mont_sqr_39(t[10], t[ 5], m, mp); + sp_4096_mont_mul_39(t[11], t[ 6], t[ 5], m, mp); + sp_4096_mont_sqr_39(t[12], t[ 6], m, mp); + sp_4096_mont_mul_39(t[13], t[ 7], t[ 6], m, mp); + sp_4096_mont_sqr_39(t[14], t[ 7], m, mp); + sp_4096_mont_mul_39(t[15], t[ 8], t[ 7], m, mp); + sp_4096_mont_sqr_39(t[16], t[ 8], m, mp); + sp_4096_mont_mul_39(t[17], t[ 9], t[ 8], m, mp); + sp_4096_mont_sqr_39(t[18], t[ 9], m, mp); + sp_4096_mont_mul_39(t[19], t[10], t[ 9], m, mp); + sp_4096_mont_sqr_39(t[20], t[10], m, mp); + sp_4096_mont_mul_39(t[21], t[11], t[10], m, mp); + sp_4096_mont_sqr_39(t[22], t[11], m, mp); + sp_4096_mont_mul_39(t[23], t[12], t[11], m, mp); + sp_4096_mont_sqr_39(t[24], t[12], m, mp); + sp_4096_mont_mul_39(t[25], t[13], t[12], m, mp); + sp_4096_mont_sqr_39(t[26], t[13], m, mp); + sp_4096_mont_mul_39(t[27], t[14], t[13], m, mp); + sp_4096_mont_sqr_39(t[28], t[14], m, mp); + sp_4096_mont_mul_39(t[29], t[15], t[14], m, mp); + sp_4096_mont_sqr_39(t[30], t[15], m, mp); + sp_4096_mont_mul_39(t[31], t[16], t[15], m, mp); + + bits = ((bits + 4) / 5) * 5; + i = ((bits + 52) / 53) - 1; + c = bits % 53; + if (c == 0) { + c = 53; + } + if (i < 39) { + n = e[i--] << (64 - c); + } + else { + n = 0; + i--; + } + if (c < 5) { + n |= e[i--] << (11 - c); + c += 53; + } + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + XMEMCPY(rt, t[y], sizeof(rt)); + for (; i>=0 || c>=5; ) { + if (c < 5) { + n |= e[i--] << (11 - c); + c += 53; + } + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + + sp_4096_mont_sqr_39(rt, rt, m, mp); + sp_4096_mont_sqr_39(rt, rt, m, mp); + sp_4096_mont_sqr_39(rt, rt, m, mp); + sp_4096_mont_sqr_39(rt, rt, m, mp); + sp_4096_mont_sqr_39(rt, rt, m, mp); + + sp_4096_mont_mul_39(rt, rt, t[y], m, mp); + } + + sp_4096_mont_reduce_39(rt, m, mp); + n = sp_4096_cmp_39(rt, m); + sp_4096_cond_sub_39(rt, rt, m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, rt, sizeof(rt)); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +#endif +} + +#endif /* WOLFSSL_HAVE_SP_RSA && !SP_RSA_PRIVATE_EXP_D */ +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 4096 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_4096_mont_norm_78(sp_digit* r, const sp_digit* m) +{ + /* Set r = 2^n - 1. */ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<77; i++) { + r[i] = 0x1fffffffffffffL; + } +#else + int i; + + for (i = 0; i < 72; i += 8) { + r[i + 0] = 0x1fffffffffffffL; + r[i + 1] = 0x1fffffffffffffL; + r[i + 2] = 0x1fffffffffffffL; + r[i + 3] = 0x1fffffffffffffL; + r[i + 4] = 0x1fffffffffffffL; + r[i + 5] = 0x1fffffffffffffL; + r[i + 6] = 0x1fffffffffffffL; + r[i + 7] = 0x1fffffffffffffL; + } + r[72] = 0x1fffffffffffffL; + r[73] = 0x1fffffffffffffL; + r[74] = 0x1fffffffffffffL; + r[75] = 0x1fffffffffffffL; + r[76] = 0x1fffffffffffffL; +#endif + r[77] = 0x7fffL; + + /* r = (2^n - 1) mod n */ + (void)sp_4096_sub_78(r, r, m); + + /* Add one so r = 2^n mod m */ + r[0] += 1; +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static sp_digit sp_4096_cmp_78(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=77; i>=0; i--) { + r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#else + int i; + + r |= (a[77] - b[77]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[76] - b[76]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[75] - b[75]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[74] - b[74]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[73] - b[73]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[72] - b[72]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + for (i = 64; i >= 0; i -= 8) { + r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#endif /* WOLFSSL_SP_SMALL */ + + return r; +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static void sp_4096_cond_sub_78(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 78; i++) { + r[i] = a[i] - (b[i] & m); + } +#else + int i; + + for (i = 0; i < 72; i += 8) { + r[i + 0] = a[i + 0] - (b[i + 0] & m); + r[i + 1] = a[i + 1] - (b[i + 1] & m); + r[i + 2] = a[i + 2] - (b[i + 2] & m); + r[i + 3] = a[i + 3] - (b[i + 3] & m); + r[i + 4] = a[i + 4] - (b[i + 4] & m); + r[i + 5] = a[i + 5] - (b[i + 5] & m); + r[i + 6] = a[i + 6] - (b[i + 6] & m); + r[i + 7] = a[i + 7] - (b[i + 7] & m); + } + r[72] = a[72] - (b[72] & m); + r[73] = a[73] - (b[73] & m); + r[74] = a[74] - (b[74] & m); + r[75] = a[75] - (b[75] & m); + r[76] = a[76] - (b[76] & m); + r[77] = a[77] - (b[77] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Mul a by scalar b and add into r. (r += a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_4096_mul_add_78(sp_digit* r, const sp_digit* a, + const sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int128_t tb = b; + int128_t t = 0; + int i; + + for (i = 0; i < 78; i++) { + t += (tb * a[i]) + r[i]; + r[i] = t & 0x1fffffffffffffL; + t >>= 53; + } + r[78] += t; +#else + int128_t tb = b; + int128_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1fffffffffffffL); + for (i = 0; i < 72; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] += (sp_digit)((t[0] >> 53) + (t[1] & 0x1fffffffffffffL)); + t[2] = tb * a[i+2]; + r[i+2] += (sp_digit)((t[1] >> 53) + (t[2] & 0x1fffffffffffffL)); + t[3] = tb * a[i+3]; + r[i+3] += (sp_digit)((t[2] >> 53) + (t[3] & 0x1fffffffffffffL)); + t[4] = tb * a[i+4]; + r[i+4] += (sp_digit)((t[3] >> 53) + (t[4] & 0x1fffffffffffffL)); + t[5] = tb * a[i+5]; + r[i+5] += (sp_digit)((t[4] >> 53) + (t[5] & 0x1fffffffffffffL)); + t[6] = tb * a[i+6]; + r[i+6] += (sp_digit)((t[5] >> 53) + (t[6] & 0x1fffffffffffffL)); + t[7] = tb * a[i+7]; + r[i+7] += (sp_digit)((t[6] >> 53) + (t[7] & 0x1fffffffffffffL)); + t[0] = tb * a[i+8]; + r[i+8] += (sp_digit)((t[7] >> 53) + (t[0] & 0x1fffffffffffffL)); + } + t[1] = tb * a[73]; r[73] += (sp_digit)((t[0] >> 53) + (t[1] & 0x1fffffffffffffL)); + t[2] = tb * a[74]; r[74] += (sp_digit)((t[1] >> 53) + (t[2] & 0x1fffffffffffffL)); + t[3] = tb * a[75]; r[75] += (sp_digit)((t[2] >> 53) + (t[3] & 0x1fffffffffffffL)); + t[4] = tb * a[76]; r[76] += (sp_digit)((t[3] >> 53) + (t[4] & 0x1fffffffffffffL)); + t[5] = tb * a[77]; r[77] += (sp_digit)((t[4] >> 53) + (t[5] & 0x1fffffffffffffL)); + r[78] += (sp_digit)(t[5] >> 53); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Normalize the values in each word to 53. + * + * a Array of sp_digit to normalize. + */ +static void sp_4096_norm_78(sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + for (i = 0; i < 77; i++) { + a[i+1] += a[i] >> 53; + a[i] &= 0x1fffffffffffffL; + } +#else + int i; + for (i = 0; i < 72; i += 8) { + a[i+1] += a[i+0] >> 53; a[i+0] &= 0x1fffffffffffffL; + a[i+2] += a[i+1] >> 53; a[i+1] &= 0x1fffffffffffffL; + a[i+3] += a[i+2] >> 53; a[i+2] &= 0x1fffffffffffffL; + a[i+4] += a[i+3] >> 53; a[i+3] &= 0x1fffffffffffffL; + a[i+5] += a[i+4] >> 53; a[i+4] &= 0x1fffffffffffffL; + a[i+6] += a[i+5] >> 53; a[i+5] &= 0x1fffffffffffffL; + a[i+7] += a[i+6] >> 53; a[i+6] &= 0x1fffffffffffffL; + a[i+8] += a[i+7] >> 53; a[i+7] &= 0x1fffffffffffffL; + a[i+9] += a[i+8] >> 53; a[i+8] &= 0x1fffffffffffffL; + } + a[72+1] += a[72] >> 53; + a[72] &= 0x1fffffffffffffL; + a[73+1] += a[73] >> 53; + a[73] &= 0x1fffffffffffffL; + a[74+1] += a[74] >> 53; + a[74] &= 0x1fffffffffffffL; + a[75+1] += a[75] >> 53; + a[75] &= 0x1fffffffffffffL; + a[76+1] += a[76] >> 53; + a[76] &= 0x1fffffffffffffL; +#endif +} + +/* Shift the result in the high 4096 bits down to the bottom. + * + * r A single precision number. + * a A single precision number. + */ +static void sp_4096_mont_shift_78(sp_digit* r, const sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + int128_t n = a[77] >> 15; + n += ((int128_t)a[78]) << 38; + + for (i = 0; i < 77; i++) { + r[i] = n & 0x1fffffffffffffL; + n >>= 53; + n += ((int128_t)a[79 + i]) << 38; + } + r[77] = (sp_digit)n; +#else + int i; + int128_t n = a[77] >> 15; + n += ((int128_t)a[78]) << 38; + for (i = 0; i < 72; i += 8) { + r[i + 0] = n & 0x1fffffffffffffL; + n >>= 53; n += ((int128_t)a[i + 79]) << 38; + r[i + 1] = n & 0x1fffffffffffffL; + n >>= 53; n += ((int128_t)a[i + 80]) << 38; + r[i + 2] = n & 0x1fffffffffffffL; + n >>= 53; n += ((int128_t)a[i + 81]) << 38; + r[i + 3] = n & 0x1fffffffffffffL; + n >>= 53; n += ((int128_t)a[i + 82]) << 38; + r[i + 4] = n & 0x1fffffffffffffL; + n >>= 53; n += ((int128_t)a[i + 83]) << 38; + r[i + 5] = n & 0x1fffffffffffffL; + n >>= 53; n += ((int128_t)a[i + 84]) << 38; + r[i + 6] = n & 0x1fffffffffffffL; + n >>= 53; n += ((int128_t)a[i + 85]) << 38; + r[i + 7] = n & 0x1fffffffffffffL; + n >>= 53; n += ((int128_t)a[i + 86]) << 38; + } + r[72] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[151]) << 38; + r[73] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[152]) << 38; + r[74] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[153]) << 38; + r[75] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[154]) << 38; + r[76] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[155]) << 38; + r[77] = (sp_digit)n; +#endif /* WOLFSSL_SP_SMALL */ + XMEMSET(&r[78], 0, sizeof(*r) * 78U); +} + +/* Reduce the number back to 4096 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static void sp_4096_mont_reduce_78(sp_digit* a, const sp_digit* m, sp_digit mp) +{ + int i; + sp_digit mu; + + sp_4096_norm_78(a + 78); + +#ifdef WOLFSSL_SP_DH + if (mp != 1) { + for (i=0; i<77; i++) { + mu = (a[i] * mp) & 0x1fffffffffffffL; + sp_4096_mul_add_78(a+i, m, mu); + a[i+1] += a[i] >> 53; + } + mu = (a[i] * mp) & 0x7fffL; + sp_4096_mul_add_78(a+i, m, mu); + a[i+1] += a[i] >> 53; + a[i] &= 0x1fffffffffffffL; + } + else { + for (i=0; i<77; i++) { + mu = a[i] & 0x1fffffffffffffL; + sp_4096_mul_add_78(a+i, m, mu); + a[i+1] += a[i] >> 53; + } + mu = a[i] & 0x7fffL; + sp_4096_mul_add_78(a+i, m, mu); + a[i+1] += a[i] >> 53; + a[i] &= 0x1fffffffffffffL; + } +#else + for (i=0; i<77; i++) { + mu = (a[i] * mp) & 0x1fffffffffffffL; + sp_4096_mul_add_78(a+i, m, mu); + a[i+1] += a[i] >> 53; + } + mu = (a[i] * mp) & 0x7fffL; + sp_4096_mul_add_78(a+i, m, mu); + a[i+1] += a[i] >> 53; + a[i] &= 0x1fffffffffffffL; +#endif + + sp_4096_mont_shift_78(a, a); + sp_4096_cond_sub_78(a, a, m, 0 - (((a[77] >> 15) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_4096_norm_78(a); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_mul_78(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_4096_mul_78(r, a, b); + sp_4096_mont_reduce_78(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_sqr_78(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_4096_sqr_78(r, a); + sp_4096_mont_reduce_78(r, m, mp); +} + +/* Multiply a by scalar b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_4096_mul_d_156(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int128_t tb = b; + int128_t t = 0; + int i; + + for (i = 0; i < 156; i++) { + t += tb * a[i]; + r[i] = t & 0x1fffffffffffffL; + t >>= 53; + } + r[156] = (sp_digit)t; +#else + int128_t tb = b; + int128_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] = t[0] & 0x1fffffffffffffL; + for (i = 0; i < 152; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] = (sp_digit)(t[0] >> 53) + (t[1] & 0x1fffffffffffffL); + t[2] = tb * a[i+2]; + r[i+2] = (sp_digit)(t[1] >> 53) + (t[2] & 0x1fffffffffffffL); + t[3] = tb * a[i+3]; + r[i+3] = (sp_digit)(t[2] >> 53) + (t[3] & 0x1fffffffffffffL); + t[4] = tb * a[i+4]; + r[i+4] = (sp_digit)(t[3] >> 53) + (t[4] & 0x1fffffffffffffL); + t[5] = tb * a[i+5]; + r[i+5] = (sp_digit)(t[4] >> 53) + (t[5] & 0x1fffffffffffffL); + t[6] = tb * a[i+6]; + r[i+6] = (sp_digit)(t[5] >> 53) + (t[6] & 0x1fffffffffffffL); + t[7] = tb * a[i+7]; + r[i+7] = (sp_digit)(t[6] >> 53) + (t[7] & 0x1fffffffffffffL); + t[0] = tb * a[i+8]; + r[i+8] = (sp_digit)(t[7] >> 53) + (t[0] & 0x1fffffffffffffL); + } + t[1] = tb * a[153]; + r[153] = (sp_digit)(t[0] >> 53) + (t[1] & 0x1fffffffffffffL); + t[2] = tb * a[154]; + r[154] = (sp_digit)(t[1] >> 53) + (t[2] & 0x1fffffffffffffL); + t[3] = tb * a[155]; + r[155] = (sp_digit)(t[2] >> 53) + (t[3] & 0x1fffffffffffffL); + r[156] = (sp_digit)(t[3] >> 53); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static void sp_4096_cond_add_78(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 78; i++) { + r[i] = a[i] + (b[i] & m); + } +#else + int i; + + for (i = 0; i < 72; i += 8) { + r[i + 0] = a[i + 0] + (b[i + 0] & m); + r[i + 1] = a[i + 1] + (b[i + 1] & m); + r[i + 2] = a[i + 2] + (b[i + 2] & m); + r[i + 3] = a[i + 3] + (b[i + 3] & m); + r[i + 4] = a[i + 4] + (b[i + 4] & m); + r[i + 5] = a[i + 5] + (b[i + 5] & m); + r[i + 6] = a[i + 6] + (b[i + 6] & m); + r[i + 7] = a[i + 7] + (b[i + 7] & m); + } + r[72] = a[72] + (b[72] & m); + r[73] = a[73] + (b[73] & m); + r[74] = a[74] + (b[74] & m); + r[75] = a[75] + (b[75] & m); + r[76] = a[76] + (b[76] & m); + r[77] = a[77] + (b[77] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +#ifdef WOLFSSL_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_sub_78(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 78; i++) { + r[i] = a[i] - b[i]; + } + + return 0; +} + +#endif +#ifdef WOLFSSL_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_add_78(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 78; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#endif +SP_NOINLINE static void sp_4096_rshift_78(sp_digit* r, sp_digit* a, byte n) +{ + int i; + +#ifdef WOLFSSL_SP_SMALL + for (i=0; i<77; i++) { + r[i] = ((a[i] >> n) | (a[i + 1] << (53 - n))) & 0x1fffffffffffffL; + } +#else + for (i=0; i<72; i += 8) { + r[i+0] = ((a[i+0] >> n) | (a[i+1] << (53 - n))) & 0x1fffffffffffffL; + r[i+1] = ((a[i+1] >> n) | (a[i+2] << (53 - n))) & 0x1fffffffffffffL; + r[i+2] = ((a[i+2] >> n) | (a[i+3] << (53 - n))) & 0x1fffffffffffffL; + r[i+3] = ((a[i+3] >> n) | (a[i+4] << (53 - n))) & 0x1fffffffffffffL; + r[i+4] = ((a[i+4] >> n) | (a[i+5] << (53 - n))) & 0x1fffffffffffffL; + r[i+5] = ((a[i+5] >> n) | (a[i+6] << (53 - n))) & 0x1fffffffffffffL; + r[i+6] = ((a[i+6] >> n) | (a[i+7] << (53 - n))) & 0x1fffffffffffffL; + r[i+7] = ((a[i+7] >> n) | (a[i+8] << (53 - n))) & 0x1fffffffffffffL; + } + r[72] = ((a[72] >> n) | (a[73] << (53 - n))) & 0x1fffffffffffffL; + r[73] = ((a[73] >> n) | (a[74] << (53 - n))) & 0x1fffffffffffffL; + r[74] = ((a[74] >> n) | (a[75] << (53 - n))) & 0x1fffffffffffffL; + r[75] = ((a[75] >> n) | (a[76] << (53 - n))) & 0x1fffffffffffffL; + r[76] = ((a[76] >> n) | (a[77] << (53 - n))) & 0x1fffffffffffffL; +#endif + r[77] = a[77] >> n; +} + +#ifdef WOLFSSL_SP_DIV_64 +static WC_INLINE sp_digit sp_4096_div_word_78(sp_digit d1, sp_digit d0, + sp_digit dv) +{ + sp_digit d, r, t; + + /* All 53 bits from d1 and top 10 bits from d0. */ + d = (d1 << 10) | (d0 >> 43); + r = d / dv; + d -= r * dv; + /* Up to 11 bits in r */ + /* Next 10 bits from d0. */ + r <<= 10; + d <<= 10; + d |= (d0 >> 33) & ((1 << 10) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 21 bits in r */ + /* Next 10 bits from d0. */ + r <<= 10; + d <<= 10; + d |= (d0 >> 23) & ((1 << 10) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 31 bits in r */ + /* Next 10 bits from d0. */ + r <<= 10; + d <<= 10; + d |= (d0 >> 13) & ((1 << 10) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 41 bits in r */ + /* Next 10 bits from d0. */ + r <<= 10; + d <<= 10; + d |= (d0 >> 3) & ((1 << 10) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 51 bits in r */ + /* Remaining 3 bits from d0. */ + r <<= 3; + d <<= 3; + d |= d0 & ((1 << 3) - 1); + t = d / dv; + r += t; + + return r; +} +#endif /* WOLFSSL_SP_DIV_64 */ + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_4096_div_78(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + int i; +#ifndef WOLFSSL_SP_DIV_64 + int128_t d1; +#endif + sp_digit dv, r1; +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* td; +#else + sp_digit t1d[156 + 1], t2d[78 + 1], sdd[78 + 1]; +#endif + sp_digit* t1; + sp_digit* t2; + sp_digit* sd; + int err = MP_OKAY; + + (void)m; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 78 + 3), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + (void)m; + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = td; + t2 = td + 156 + 1; + sd = t2 + 78 + 1; +#else + t1 = t1d; + t2 = t2d; + sd = sdd; +#endif + + sp_4096_mul_d_78(sd, d, 1L << 38); + sp_4096_mul_d_156(t1, a, 1L << 38); + dv = sd[77]; + for (i=78; i>=0; i--) { + t1[78 + i] += t1[78 + i - 1] >> 53; + t1[78 + i - 1] &= 0x1fffffffffffffL; +#ifndef WOLFSSL_SP_DIV_64 + d1 = t1[78 + i]; + d1 <<= 53; + d1 += t1[78 + i - 1]; + r1 = (sp_digit)(d1 / dv); +#else + r1 = sp_4096_div_word_78(t1[78 + i], t1[78 + i - 1], dv); +#endif + + sp_4096_mul_d_78(t2, sd, r1); + (void)sp_4096_sub_78(&t1[i], &t1[i], t2); + t1[78 + i] -= t2[78]; + t1[78 + i] += t1[78 + i - 1] >> 53; + t1[78 + i - 1] &= 0x1fffffffffffffL; + r1 = (((-t1[78 + i]) << 53) - t1[78 + i - 1]) / dv; + r1 -= t1[78 + i]; + sp_4096_mul_d_78(t2, sd, r1); + (void)sp_4096_add_78(&t1[i], &t1[i], t2); + t1[78 + i] += t1[78 + i - 1] >> 53; + t1[78 + i - 1] &= 0x1fffffffffffffL; + } + t1[78 - 1] += t1[78 - 2] >> 53; + t1[78 - 2] &= 0x1fffffffffffffL; + r1 = t1[78 - 1] / dv; + + sp_4096_mul_d_78(t2, sd, r1); + sp_4096_sub_78(t1, t1, t2); + XMEMCPY(r, t1, sizeof(*r) * 2U * 78U); + for (i=0; i<76; i++) { + r[i+1] += r[i] >> 53; + r[i] &= 0x1fffffffffffffL; + } + sp_4096_cond_add_78(r, r, sd, 0 - ((r[77] < 0) ? + (sp_digit)1 : (sp_digit)0)); + + sp_4096_norm_78(r); + sp_4096_rshift_78(r, r, 38); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_4096_mod_78(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_4096_div_78(a, m, NULL, r); +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \ + defined(WOLFSSL_HAVE_SP_DH) +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_78(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, + const sp_digit* m, int reduceA) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* td; + sp_digit* t[3]; + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 78 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } + + if (err == MP_OKAY) { + XMEMSET(td, 0, sizeof(*td) * 3U * 78U * 2U); + + norm = t[0] = td; + t[1] = &td[78 * 2]; + t[2] = &td[2 * 78 * 2]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_78(norm, m); + + if (reduceA != 0) { + err = sp_4096_mod_78(t[1], a, m); + } + else { + XMEMCPY(t[1], a, sizeof(sp_digit) * 78U); + } + } + if (err == MP_OKAY) { + sp_4096_mul_78(t[1], t[1], norm); + err = sp_4096_mod_78(t[1], t[1], m); + } + + if (err == MP_OKAY) { + i = bits / 53; + c = bits % 53; + n = e[i--] << (53 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) { + break; + } + + n = e[i--]; + c = 53; + } + + y = (n >> 52) & 1; + n <<= 1; + + sp_4096_mont_mul_78(t[y^1], t[0], t[1], m, mp); + + XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), + sizeof(*t[2]) * 78 * 2); + sp_4096_mont_sqr_78(t[2], t[2], m, mp); + XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), t[2], + sizeof(*t[2]) * 78 * 2); + } + + sp_4096_mont_reduce_78(t[0], m, mp); + n = sp_4096_cmp_78(t[0], m); + sp_4096_cond_sub_78(t[0], t[0], m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, t[0], sizeof(*r) * 78 * 2); + + } + + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } + + return err; +#elif defined(WOLFSSL_SP_CACHE_RESISTANT) +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[3][156]; +#else + sp_digit* td; + sp_digit* t[3]; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 78 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + t[0] = td; + t[1] = &td[78 * 2]; + t[2] = &td[2 * 78 * 2]; +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_78(norm, m); + + if (reduceA != 0) { + err = sp_4096_mod_78(t[1], a, m); + if (err == MP_OKAY) { + sp_4096_mul_78(t[1], t[1], norm); + err = sp_4096_mod_78(t[1], t[1], m); + } + } + else { + sp_4096_mul_78(t[1], a, norm); + err = sp_4096_mod_78(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + i = bits / 53; + c = bits % 53; + n = e[i--] << (53 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) { + break; + } + + n = e[i--]; + c = 53; + } + + y = (n >> 52) & 1; + n <<= 1; + + sp_4096_mont_mul_78(t[y^1], t[0], t[1], m, mp); + + XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), sizeof(t[2])); + sp_4096_mont_sqr_78(t[2], t[2], m, mp); + XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2])); + } + + sp_4096_mont_reduce_78(t[0], m, mp); + n = sp_4096_cmp_78(t[0], m); + sp_4096_cond_sub_78(t[0], t[0], m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, t[0], sizeof(t[0])); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][156]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit rt[156]; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 156, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) + t[i] = td + i * 156; +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_78(norm, m); + + if (reduceA != 0) { + err = sp_4096_mod_78(t[1], a, m); + if (err == MP_OKAY) { + sp_4096_mul_78(t[1], t[1], norm); + err = sp_4096_mod_78(t[1], t[1], m); + } + } + else { + sp_4096_mul_78(t[1], a, norm); + err = sp_4096_mod_78(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_4096_mont_sqr_78(t[ 2], t[ 1], m, mp); + sp_4096_mont_mul_78(t[ 3], t[ 2], t[ 1], m, mp); + sp_4096_mont_sqr_78(t[ 4], t[ 2], m, mp); + sp_4096_mont_mul_78(t[ 5], t[ 3], t[ 2], m, mp); + sp_4096_mont_sqr_78(t[ 6], t[ 3], m, mp); + sp_4096_mont_mul_78(t[ 7], t[ 4], t[ 3], m, mp); + sp_4096_mont_sqr_78(t[ 8], t[ 4], m, mp); + sp_4096_mont_mul_78(t[ 9], t[ 5], t[ 4], m, mp); + sp_4096_mont_sqr_78(t[10], t[ 5], m, mp); + sp_4096_mont_mul_78(t[11], t[ 6], t[ 5], m, mp); + sp_4096_mont_sqr_78(t[12], t[ 6], m, mp); + sp_4096_mont_mul_78(t[13], t[ 7], t[ 6], m, mp); + sp_4096_mont_sqr_78(t[14], t[ 7], m, mp); + sp_4096_mont_mul_78(t[15], t[ 8], t[ 7], m, mp); + sp_4096_mont_sqr_78(t[16], t[ 8], m, mp); + sp_4096_mont_mul_78(t[17], t[ 9], t[ 8], m, mp); + sp_4096_mont_sqr_78(t[18], t[ 9], m, mp); + sp_4096_mont_mul_78(t[19], t[10], t[ 9], m, mp); + sp_4096_mont_sqr_78(t[20], t[10], m, mp); + sp_4096_mont_mul_78(t[21], t[11], t[10], m, mp); + sp_4096_mont_sqr_78(t[22], t[11], m, mp); + sp_4096_mont_mul_78(t[23], t[12], t[11], m, mp); + sp_4096_mont_sqr_78(t[24], t[12], m, mp); + sp_4096_mont_mul_78(t[25], t[13], t[12], m, mp); + sp_4096_mont_sqr_78(t[26], t[13], m, mp); + sp_4096_mont_mul_78(t[27], t[14], t[13], m, mp); + sp_4096_mont_sqr_78(t[28], t[14], m, mp); + sp_4096_mont_mul_78(t[29], t[15], t[14], m, mp); + sp_4096_mont_sqr_78(t[30], t[15], m, mp); + sp_4096_mont_mul_78(t[31], t[16], t[15], m, mp); + + bits = ((bits + 4) / 5) * 5; + i = ((bits + 52) / 53) - 1; + c = bits % 53; + if (c == 0) { + c = 53; + } + if (i < 78) { + n = e[i--] << (64 - c); + } + else { + n = 0; + i--; + } + if (c < 5) { + n |= e[i--] << (11 - c); + c += 53; + } + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + XMEMCPY(rt, t[y], sizeof(rt)); + for (; i>=0 || c>=5; ) { + if (c < 5) { + n |= e[i--] << (11 - c); + c += 53; + } + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + + sp_4096_mont_sqr_78(rt, rt, m, mp); + sp_4096_mont_sqr_78(rt, rt, m, mp); + sp_4096_mont_sqr_78(rt, rt, m, mp); + sp_4096_mont_sqr_78(rt, rt, m, mp); + sp_4096_mont_sqr_78(rt, rt, m, mp); + + sp_4096_mont_mul_78(rt, rt, t[y], m, mp); + } + + sp_4096_mont_reduce_78(rt, m, mp); + n = sp_4096_cmp_78(rt, m); + sp_4096_cond_sub_78(rt, rt, m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, rt, sizeof(rt)); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +#endif +} +#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || */ + /* WOLFSSL_HAVE_SP_DH */ + +#ifdef WOLFSSL_HAVE_SP_RSA +/* RSA public key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * em Public exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm, + byte* out, word32* outLen) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* d = NULL; + sp_digit* a; + sp_digit* m; + sp_digit* r; + sp_digit* norm; + sp_digit e[1] = {0}; + sp_digit mp; + int i; + int err = MP_OKAY; + + if (*outLen < 512U) { + err = MP_TO_E; + } + + if (err == MP_OKAY) { + if (mp_count_bits(em) > 53) { + err = MP_READ_E; + } + if (inLen > 512U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 78 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + a = d; + r = a + 78 * 2; + m = r + 78 * 2; + norm = r; + + sp_4096_from_bin(a, 78, in, inLen); +#if DIGIT_BIT >= 53 + e[0] = (sp_digit)em->dp[0]; +#else + e[0] = (sp_digit)em->dp[0]; + if (em->used > 1) { + e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + } +#endif + if (e[0] == 0) { + err = MP_EXPTMOD_E; + } + } + + if (err == MP_OKAY) { + sp_4096_from_mp(m, 78, mm); + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_78(norm, m); + } + if (err == MP_OKAY) { + sp_4096_mul_78(a, a, norm); + err = sp_4096_mod_78(a, a, m); + } + if (err == MP_OKAY) { + for (i=52; i>=0; i--) { + if ((e[0] >> i) != 0) { + break; + } + } + + XMEMCPY(r, a, sizeof(sp_digit) * 78 * 2); + for (i--; i>=0; i--) { + sp_4096_mont_sqr_78(r, r, m, mp); + + if (((e[0] >> i) & 1) == 1) { + sp_4096_mont_mul_78(r, r, a, m, mp); + } + } + sp_4096_mont_reduce_78(r, m, mp); + mp = sp_4096_cmp_78(r, m); + sp_4096_cond_sub_78(r, r, m, ((mp < 0) ? + (sp_digit)1 : (sp_digit)0)- 1); + + sp_4096_to_bin(r, out); + *outLen = 512; + } + + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit ad[156], md[78], rd[156]; +#else + sp_digit* d = NULL; +#endif + sp_digit* a; + sp_digit* m; + sp_digit* r; + sp_digit e[1] = {0}; + int err = MP_OKAY; + + if (*outLen < 512U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(em) > 53) { + err = MP_READ_E; + } + if (inLen > 512U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 4096) { + err = MP_READ_E; + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 78 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) { + err = MEMORY_E; + } + } + + if (err == MP_OKAY) { + a = d; + r = a + 78 * 2; + m = r + 78 * 2; + } +#else + a = ad; + m = md; + r = rd; +#endif + + if (err == MP_OKAY) { + sp_4096_from_bin(a, 78, in, inLen); +#if DIGIT_BIT >= 53 + e[0] = (sp_digit)em->dp[0]; +#else + e[0] = (sp_digit)em->dp[0]; + if (em->used > 1) { + e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + } +#endif + if (e[0] == 0) { + err = MP_EXPTMOD_E; + } + } + if (err == MP_OKAY) { + sp_4096_from_mp(m, 78, mm); + + if (e[0] == 0x3) { + sp_4096_sqr_78(r, a); + err = sp_4096_mod_78(r, r, m); + if (err == MP_OKAY) { + sp_4096_mul_78(r, a, r); + err = sp_4096_mod_78(r, r, m); + } + } + else { + sp_digit* norm = r; + int i; + sp_digit mp; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_78(norm, m); + + sp_4096_mul_78(a, a, norm); + err = sp_4096_mod_78(a, a, m); + + if (err == MP_OKAY) { + for (i=52; i>=0; i--) { + if ((e[0] >> i) != 0) { + break; + } + } + + XMEMCPY(r, a, sizeof(sp_digit) * 156U); + for (i--; i>=0; i--) { + sp_4096_mont_sqr_78(r, r, m, mp); + + if (((e[0] >> i) & 1) == 1) { + sp_4096_mont_mul_78(r, r, a, m, mp); + } + } + sp_4096_mont_reduce_78(r, m, mp); + mp = sp_4096_cmp_78(r, m); + sp_4096_cond_sub_78(r, r, m, ((mp < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + } + } + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } +#endif + + return err; +#endif /* WOLFSSL_SP_SMALL */ +} + +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +#if !defined(SP_RSA_PRIVATE_EXP_D) && !defined(RSA_LOW_MEM) +#endif /* !SP_RSA_PRIVATE_EXP_D && !RSA_LOW_MEM */ +/* RSA private key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * pm First prime. + * qm Second prime. + * dpm First prime's CRT exponent. + * dqm Second prime's CRT exponent. + * qim Inverse of second prime mod p. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, + mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm, + byte* out, word32* outLen) +{ +#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* a; + sp_digit* d = NULL; + sp_digit* m; + sp_digit* r; + int err = MP_OKAY; + + (void)pm; + (void)qm; + (void)dpm; + (void)dqm; + (void)qim; + + if (*outLen < 512U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(dm) > 4096) { + err = MP_READ_E; + } + if (inLen > 512) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 78 * 4, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) { + err = MEMORY_E; + } + } + if (err == MP_OKAY) { + a = d + 78; + m = a + 156; + r = a; + + sp_4096_from_bin(a, 78, in, inLen); + sp_4096_from_mp(d, 78, dm); + sp_4096_from_mp(m, 78, mm); + err = sp_4096_mod_exp_78(r, a, d, 4096, m, 0); + } + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + } + + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 78); + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else + sp_digit a[156], d[78], m[78]; + sp_digit* r = a; + int err = MP_OKAY; + + (void)pm; + (void)qm; + (void)dpm; + (void)dqm; + (void)qim; + + if (*outLen < 512U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(dm) > 4096) { + err = MP_READ_E; + } + if (inLen > 512U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_4096_from_bin(a, 78, in, inLen); + sp_4096_from_mp(d, 78, dm); + sp_4096_from_mp(m, 78, mm); + err = sp_4096_mod_exp_78(r, a, d, 4096, m, 0); + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + } + + XMEMSET(d, 0, sizeof(sp_digit) * 78); + + return err; +#endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */ +#else +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* t = NULL; + sp_digit* a; + sp_digit* p; + sp_digit* q; + sp_digit* dp; + sp_digit* dq; + sp_digit* qi; + sp_digit* tmpa; + sp_digit* tmpb; + sp_digit* r; + int err = MP_OKAY; + + (void)dm; + (void)mm; + + if (*outLen < 512U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (inLen > 512) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 39 * 11, NULL, + DYNAMIC_TYPE_RSA); + if (t == NULL) { + err = MEMORY_E; + } + } + if (err == MP_OKAY) { + a = t; + p = a + 78 * 2; + q = p + 39; + qi = dq = dp = q + 39; + tmpa = qi + 39; + tmpb = tmpa + 78; + + r = t + 78; + + sp_4096_from_bin(a, 78, in, inLen); + sp_4096_from_mp(p, 39, pm); + sp_4096_from_mp(q, 39, qm); + sp_4096_from_mp(dp, 39, dpm); + err = sp_4096_mod_exp_39(tmpa, a, dp, 2048, p, 1); + } + if (err == MP_OKAY) { + sp_4096_from_mp(dq, 39, dqm); + err = sp_4096_mod_exp_39(tmpb, a, dq, 2048, q, 1); + } + if (err == MP_OKAY) { + (void)sp_4096_sub_39(tmpa, tmpa, tmpb); + sp_4096_cond_add_39(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[38] >> 63)); + sp_4096_cond_add_39(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[38] >> 63)); + + sp_4096_from_mp(qi, 39, qim); + sp_4096_mul_39(tmpa, tmpa, qi); + err = sp_4096_mod_39(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { + sp_4096_mul_39(tmpa, q, tmpa); + (void)sp_4096_add_78(r, tmpb, tmpa); + sp_4096_norm_78(r); + + sp_4096_to_bin(r, out); + *outLen = 512; + } + + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_digit) * 39 * 11); + XFREE(t, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else + sp_digit a[78 * 2]; + sp_digit p[39], q[39], dp[39], dq[39], qi[39]; + sp_digit tmpa[78], tmpb[78]; + sp_digit* r = a; + int err = MP_OKAY; + + (void)dm; + (void)mm; + + if (*outLen < 512U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (inLen > 512U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_4096_from_bin(a, 78, in, inLen); + sp_4096_from_mp(p, 39, pm); + sp_4096_from_mp(q, 39, qm); + sp_4096_from_mp(dp, 39, dpm); + sp_4096_from_mp(dq, 39, dqm); + sp_4096_from_mp(qi, 39, qim); + + err = sp_4096_mod_exp_39(tmpa, a, dp, 2048, p, 1); + } + if (err == MP_OKAY) { + err = sp_4096_mod_exp_39(tmpb, a, dq, 2048, q, 1); + } + + if (err == MP_OKAY) { + (void)sp_4096_sub_39(tmpa, tmpa, tmpb); + sp_4096_cond_add_39(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[38] >> 63)); + sp_4096_cond_add_39(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[38] >> 63)); + sp_4096_mul_39(tmpa, tmpa, qi); + err = sp_4096_mod_39(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { + sp_4096_mul_39(tmpa, tmpa, q); + (void)sp_4096_add_78(r, tmpb, tmpa); + sp_4096_norm_78(r); + + sp_4096_to_bin(r, out); + *outLen = 512; + } + + XMEMSET(tmpa, 0, sizeof(tmpa)); + XMEMSET(tmpb, 0, sizeof(tmpb)); + XMEMSET(p, 0, sizeof(p)); + XMEMSET(q, 0, sizeof(q)); + XMEMSET(dp, 0, sizeof(dp)); + XMEMSET(dq, 0, sizeof(dq)); + XMEMSET(qi, 0, sizeof(qi)); + + return err; +#endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */ +#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */ +} + +#endif /* !WOLFSSL_RSA_PUBLIC_ONLY */ +#endif /* WOLFSSL_HAVE_SP_RSA */ +#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY)) +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_4096_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (4096 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 53 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 78); + r->used = 78; + mp_clamp(r); +#elif DIGIT_BIT < 53 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 78; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 53) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 53 - s; + } + r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 78; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 53 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 53 - s; + } + else { + s += 53; + } + } + r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ +#ifdef WOLFSSL_SP_SMALL + int err = MP_OKAY; + sp_digit* d = NULL; + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 78 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) { + err = MEMORY_E; + } + } + + if (err == MP_OKAY) { + b = d; + e = b + 78 * 2; + m = e + 78; + r = b; + + sp_4096_from_mp(b, 78, base); + sp_4096_from_mp(e, 78, exp); + sp_4096_from_mp(m, 78, mod); + + err = sp_4096_mod_exp_78(r, b, e, mp_count_bits(exp), m, 0); + } + + if (err == MP_OKAY) { + err = sp_4096_to_mp(r, res); + } + + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 78U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit bd[156], ed[78], md[78]; +#else + sp_digit* d = NULL; +#endif + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + int err = MP_OKAY; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + } + +#ifdef WOLFSSL_SMALL_STACK + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 78 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + b = d; + e = b + 78 * 2; + m = e + 78; + r = b; + } +#else + r = b = bd; + e = ed; + m = md; +#endif + + if (err == MP_OKAY) { + sp_4096_from_mp(b, 78, base); + sp_4096_from_mp(e, 78, exp); + sp_4096_from_mp(m, 78, mod); + + err = sp_4096_mod_exp_78(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + err = sp_4096_to_mp(r, res); + } + + +#ifdef WOLFSSL_SMALL_STACK + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 78U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } +#else + XMEMSET(e, 0, sizeof(sp_digit) * 78U); +#endif + + return err; +#endif +} + +#ifdef WOLFSSL_HAVE_SP_DH + +#ifdef HAVE_FFDHE_4096 +SP_NOINLINE static void sp_4096_lshift_78(sp_digit* r, sp_digit* a, byte n) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + r[78] = a[77] >> (53 - n); + for (i=77; i>0; i--) { + r[i] = ((a[i] << n) | (a[i-1] >> (53 - n))) & 0x1fffffffffffffL; + } +#else + sp_int_digit s, t; + + s = (sp_int_digit)a[77]; + r[78] = s >> (53U - n); + s = (sp_int_digit)(a[77]); t = (sp_int_digit)(a[76]); + r[77] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[76]); t = (sp_int_digit)(a[75]); + r[76] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[75]); t = (sp_int_digit)(a[74]); + r[75] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[74]); t = (sp_int_digit)(a[73]); + r[74] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[73]); t = (sp_int_digit)(a[72]); + r[73] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[72]); t = (sp_int_digit)(a[71]); + r[72] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[71]); t = (sp_int_digit)(a[70]); + r[71] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[70]); t = (sp_int_digit)(a[69]); + r[70] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[69]); t = (sp_int_digit)(a[68]); + r[69] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[68]); t = (sp_int_digit)(a[67]); + r[68] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[67]); t = (sp_int_digit)(a[66]); + r[67] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[66]); t = (sp_int_digit)(a[65]); + r[66] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[65]); t = (sp_int_digit)(a[64]); + r[65] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[64]); t = (sp_int_digit)(a[63]); + r[64] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[63]); t = (sp_int_digit)(a[62]); + r[63] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[62]); t = (sp_int_digit)(a[61]); + r[62] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[61]); t = (sp_int_digit)(a[60]); + r[61] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[60]); t = (sp_int_digit)(a[59]); + r[60] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[59]); t = (sp_int_digit)(a[58]); + r[59] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[58]); t = (sp_int_digit)(a[57]); + r[58] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[57]); t = (sp_int_digit)(a[56]); + r[57] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[56]); t = (sp_int_digit)(a[55]); + r[56] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[55]); t = (sp_int_digit)(a[54]); + r[55] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[54]); t = (sp_int_digit)(a[53]); + r[54] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[53]); t = (sp_int_digit)(a[52]); + r[53] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[52]); t = (sp_int_digit)(a[51]); + r[52] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[51]); t = (sp_int_digit)(a[50]); + r[51] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[50]); t = (sp_int_digit)(a[49]); + r[50] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[49]); t = (sp_int_digit)(a[48]); + r[49] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[48]); t = (sp_int_digit)(a[47]); + r[48] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[47]); t = (sp_int_digit)(a[46]); + r[47] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[46]); t = (sp_int_digit)(a[45]); + r[46] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[45]); t = (sp_int_digit)(a[44]); + r[45] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[44]); t = (sp_int_digit)(a[43]); + r[44] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[43]); t = (sp_int_digit)(a[42]); + r[43] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[42]); t = (sp_int_digit)(a[41]); + r[42] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[41]); t = (sp_int_digit)(a[40]); + r[41] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[40]); t = (sp_int_digit)(a[39]); + r[40] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[39]); t = (sp_int_digit)(a[38]); + r[39] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[38]); t = (sp_int_digit)(a[37]); + r[38] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[37]); t = (sp_int_digit)(a[36]); + r[37] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[36]); t = (sp_int_digit)(a[35]); + r[36] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[35]); t = (sp_int_digit)(a[34]); + r[35] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[34]); t = (sp_int_digit)(a[33]); + r[34] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[33]); t = (sp_int_digit)(a[32]); + r[33] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[32]); t = (sp_int_digit)(a[31]); + r[32] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[31]); t = (sp_int_digit)(a[30]); + r[31] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[30]); t = (sp_int_digit)(a[29]); + r[30] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[29]); t = (sp_int_digit)(a[28]); + r[29] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[28]); t = (sp_int_digit)(a[27]); + r[28] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[27]); t = (sp_int_digit)(a[26]); + r[27] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[26]); t = (sp_int_digit)(a[25]); + r[26] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[25]); t = (sp_int_digit)(a[24]); + r[25] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[24]); t = (sp_int_digit)(a[23]); + r[24] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[23]); t = (sp_int_digit)(a[22]); + r[23] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[22]); t = (sp_int_digit)(a[21]); + r[22] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[21]); t = (sp_int_digit)(a[20]); + r[21] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[20]); t = (sp_int_digit)(a[19]); + r[20] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[19]); t = (sp_int_digit)(a[18]); + r[19] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[18]); t = (sp_int_digit)(a[17]); + r[18] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[17]); t = (sp_int_digit)(a[16]); + r[17] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[16]); t = (sp_int_digit)(a[15]); + r[16] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[15]); t = (sp_int_digit)(a[14]); + r[15] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[14]); t = (sp_int_digit)(a[13]); + r[14] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[13]); t = (sp_int_digit)(a[12]); + r[13] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[12]); t = (sp_int_digit)(a[11]); + r[12] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[11]); t = (sp_int_digit)(a[10]); + r[11] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[10]); t = (sp_int_digit)(a[9]); + r[10] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[9]); t = (sp_int_digit)(a[8]); + r[9] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[8]); t = (sp_int_digit)(a[7]); + r[8] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[7]); t = (sp_int_digit)(a[6]); + r[7] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[6]); t = (sp_int_digit)(a[5]); + r[6] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[5]); t = (sp_int_digit)(a[4]); + r[5] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[4]); t = (sp_int_digit)(a[3]); + r[4] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[3]); t = (sp_int_digit)(a[2]); + r[3] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[2]); t = (sp_int_digit)(a[1]); + r[2] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[1]); t = (sp_int_digit)(a[0]); + r[1] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; +#endif + r[0] = (a[0] << n) & 0x1fffffffffffffL; +} + +/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m) + * + * r A single precision number that is the result of the operation. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_2_78(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit nd[156]; + sp_digit td[79]; +#else + sp_digit* td; +#endif + sp_digit* norm; + sp_digit* tmp; + sp_digit mp = 1; + sp_digit n, o; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 235, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + norm = td; + tmp = td + 156; + XMEMSET(td, 0, sizeof(sp_digit) * 235); +#else + norm = nd; + tmp = td; + XMEMSET(td, 0, sizeof(td)); +#endif + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_78(norm, m); + + bits = ((bits + 4) / 5) * 5; + i = ((bits + 52) / 53) - 1; + c = bits % 53; + if (c == 0) { + c = 53; + } + if (i < 78) { + n = e[i--] << (64 - c); + } + else { + n = 0; + i--; + } + if (c < 5) { + n |= e[i--] << (11 - c); + c += 53; + } + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + sp_4096_lshift_78(r, norm, y); + for (; i>=0 || c>=5; ) { + if (c < 5) { + n |= e[i--] << (11 - c); + c += 53; + } + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + + sp_4096_mont_sqr_78(r, r, m, mp); + sp_4096_mont_sqr_78(r, r, m, mp); + sp_4096_mont_sqr_78(r, r, m, mp); + sp_4096_mont_sqr_78(r, r, m, mp); + sp_4096_mont_sqr_78(r, r, m, mp); + + sp_4096_lshift_78(r, r, y); + sp_4096_mul_d_78(tmp, norm, (r[78] << 38) + (r[77] >> 15)); + r[78] = 0; + r[77] &= 0x7fffL; + (void)sp_4096_add_78(r, r, tmp); + sp_4096_norm_78(r); + o = sp_4096_cmp_78(r, m); + sp_4096_cond_sub_78(r, r, m, ((o < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + } + + sp_4096_mont_reduce_78(r, m, mp); + n = sp_4096_cmp_78(r, m); + sp_4096_cond_sub_78(r, r, m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} + +#endif /* HAVE_FFDHE_4096 */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. + * exp Array of bytes that is the exponent. + * expLen Length of data, in bytes, in exponent. + * mod Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Length, in bytes, of exponentiation result. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_DhExp_4096(mp_int* base, const byte* exp, word32 expLen, + mp_int* mod, byte* out, word32* outLen) +{ +#ifdef WOLFSSL_SP_SMALL + int err = MP_OKAY; + sp_digit* d = NULL; + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + word32 i; + + if (mp_count_bits(base) > 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expLen > 512) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 78 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) { + err = MEMORY_E; + } + } + + if (err == MP_OKAY) { + b = d; + e = b + 78 * 2; + m = e + 78; + r = b; + + sp_4096_from_mp(b, 78, base); + sp_4096_from_bin(e, 78, exp, expLen); + sp_4096_from_mp(m, 78, mod); + + #ifdef HAVE_FFDHE_4096 + if (base->used == 1 && base->dp[0] == 2 && + ((m[77] << 17) | (m[76] >> 36)) == 0xffffffffL) { + err = sp_4096_mod_exp_2_78(r, e, expLen * 8, m); + } + else + #endif + err = sp_4096_mod_exp_78(r, b, e, expLen * 8, m, 0); + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + for (i=0; i<512 && out[i] == 0; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + } + + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 78U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit bd[156], ed[78], md[78]; +#else + sp_digit* d = NULL; +#endif + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + word32 i; + int err = MP_OKAY; + + if (mp_count_bits(base) > 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expLen > 512U) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + } +#ifdef WOLFSSL_SMALL_STACK + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 78 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + b = d; + e = b + 78 * 2; + m = e + 78; + r = b; + } +#else + r = b = bd; + e = ed; + m = md; +#endif + + if (err == MP_OKAY) { + sp_4096_from_mp(b, 78, base); + sp_4096_from_bin(e, 78, exp, expLen); + sp_4096_from_mp(m, 78, mod); + + #ifdef HAVE_FFDHE_4096 + if (base->used == 1 && base->dp[0] == 2U && + ((m[77] << 17) | (m[76] >> 36)) == 0xffffffffL) { + err = sp_4096_mod_exp_2_78(r, e, expLen * 8U, m); + } + else { + #endif + err = sp_4096_mod_exp_78(r, b, e, expLen * 8U, m, 0); + #ifdef HAVE_FFDHE_4096 + } + #endif + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + for (i=0; i<512U && out[i] == 0U; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + } + +#ifdef WOLFSSL_SMALL_STACK + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 78U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } +#else + XMEMSET(e, 0, sizeof(sp_digit) * 78U); +#endif + + return err; +#endif +} +#endif /* WOLFSSL_HAVE_SP_DH */ + +#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */ + +#endif /* WOLFSSL_SP_4096 */ + +#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */ +#ifdef WOLFSSL_HAVE_SP_ECC +#ifndef WOLFSSL_SP_NO_256 + +/* Point structure to use. */ +typedef struct sp_point_256 { + sp_digit x[2 * 5]; + sp_digit y[2 * 5]; + sp_digit z[2 * 5]; + int infinity; +} sp_point_256; + +/* The modulus (prime) of the curve P256. */ +static const sp_digit p256_mod[5] = { + 0xfffffffffffffL,0x00fffffffffffL,0x0000000000000L,0x0001000000000L, + 0x0ffffffff0000L +}; +/* The Montogmery normalizer for modulus of the curve P256. */ +static const sp_digit p256_norm_mod[5] = { + 0x0000000000001L,0xff00000000000L,0xfffffffffffffL,0xfffefffffffffL, + 0x000000000ffffL +}; +/* The Montogmery multiplier for modulus of the curve P256. */ +static const sp_digit p256_mp_mod = 0x0000000000001; +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +/* The order of the curve P256. */ +static const sp_digit p256_order[5] = { + 0x9cac2fc632551L,0xada7179e84f3bL,0xfffffffbce6faL,0x0000fffffffffL, + 0x0ffffffff0000L +}; +#endif +/* The order of the curve P256 minus 2. */ +static const sp_digit p256_order2[5] = { + 0x9cac2fc63254fL,0xada7179e84f3bL,0xfffffffbce6faL,0x0000fffffffffL, + 0x0ffffffff0000L +}; +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montogmery normalizer for order of the curve P256. */ +static const sp_digit p256_norm_order[5] = { + 0x6353d039cdaafL,0x5258e8617b0c4L,0x0000000431905L,0xffff000000000L, + 0x000000000ffffL +}; +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montogmery multiplier for order of the curve P256. */ +static const sp_digit p256_mp_order = 0x1c8aaee00bc4fL; +#endif +/* The base point of curve P256. */ +static const sp_point_256 p256_base = { + /* X ordinate */ + { + 0x13945d898c296L,0x812deb33a0f4aL,0x3a440f277037dL,0x4247f8bce6e56L, + 0x06b17d1f2e12cL, + 0L, 0L, 0L, 0L, 0L + }, + /* Y ordinate */ + { + 0x6406837bf51f5L,0x576b315ececbbL,0xc0f9e162bce33L,0x7f9b8ee7eb4a7L, + 0x04fe342e2fe1aL, + 0L, 0L, 0L, 0L, 0L + }, + /* Z ordinate */ + { + 0x0000000000001L,0x0000000000000L,0x0000000000000L,0x0000000000000L, + 0x0000000000000L, + 0L, 0L, 0L, 0L, 0L + }, + /* infinity */ + 0 +}; +#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY) +static const sp_digit p256_b[5] = { + 0xe3c3e27d2604bL,0xb0cc53b0f63bcL,0x69886bc651d06L,0x93e7b3ebbd557L, + 0x05ac635d8aa3aL +}; +#endif + +static int sp_256_point_new_ex_5(void* heap, sp_point_256* sp, sp_point_256** p) +{ + int ret = MP_OKAY; + (void)heap; +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + (void)sp; + *p = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC); +#else + *p = sp; +#endif + if (*p == NULL) { + ret = MEMORY_E; + } + return ret; +} + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +/* Allocate memory for point and return error. */ +#define sp_256_point_new_5(heap, sp, p) sp_256_point_new_ex_5((heap), NULL, &(p)) +#else +/* Set pointer to data and return no error. */ +#define sp_256_point_new_5(heap, sp, p) sp_256_point_new_ex_5((heap), &(sp), &(p)) +#endif + + +static void sp_256_point_free_5(sp_point_256* p, int clear, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +/* If valid pointer then clear point data if requested and free data. */ + if (p != NULL) { + if (clear != 0) { + XMEMSET(p, 0, sizeof(*p)); + } + XFREE(p, heap, DYNAMIC_TYPE_ECC); + } +#else +/* Clear point data if requested. */ + if (clear != 0) { + XMEMSET(p, 0, sizeof(*p)); + } +#endif + (void)heap; +} + +/* Multiply a number by Montogmery normalizer mod modulus (prime). + * + * r The resulting Montgomery form number. + * a The number to convert. + * m The modulus (prime). + * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise. + */ +static int sp_256_mod_mul_norm_5(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + int64_t* td; +#else + int64_t td[8]; + int64_t a32d[8]; +#endif + int64_t* t; + int64_t* a32; + int64_t o; + int err = MP_OKAY; + + (void)m; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + td = (int64_t*)XMALLOC(sizeof(int64_t) * 2 * 8, NULL, DYNAMIC_TYPE_ECC); + if (td == NULL) { + return MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = td; + a32 = td + 8; +#else + t = td; + a32 = a32d; +#endif + + a32[0] = (sp_digit)(a[0]) & 0xffffffffL; + a32[1] = (sp_digit)(a[0] >> 32U); + a32[1] |= a[1] << 20U; + a32[1] &= 0xffffffffL; + a32[2] = (sp_digit)(a[1] >> 12U) & 0xffffffffL; + a32[3] = (sp_digit)(a[1] >> 44U); + a32[3] |= a[2] << 8U; + a32[3] &= 0xffffffffL; + a32[4] = (sp_digit)(a[2] >> 24U); + a32[4] |= a[3] << 28U; + a32[4] &= 0xffffffffL; + a32[5] = (sp_digit)(a[3] >> 4U) & 0xffffffffL; + a32[6] = (sp_digit)(a[3] >> 36U); + a32[6] |= a[4] << 16U; + a32[6] &= 0xffffffffL; + a32[7] = (sp_digit)(a[4] >> 16U) & 0xffffffffL; + + /* 1 1 0 -1 -1 -1 -1 0 */ + t[0] = 0 + a32[0] + a32[1] - a32[3] - a32[4] - a32[5] - a32[6]; + /* 0 1 1 0 -1 -1 -1 -1 */ + t[1] = 0 + a32[1] + a32[2] - a32[4] - a32[5] - a32[6] - a32[7]; + /* 0 0 1 1 0 -1 -1 -1 */ + t[2] = 0 + a32[2] + a32[3] - a32[5] - a32[6] - a32[7]; + /* -1 -1 0 2 2 1 0 -1 */ + t[3] = 0 - a32[0] - a32[1] + 2 * a32[3] + 2 * a32[4] + a32[5] - a32[7]; + /* 0 -1 -1 0 2 2 1 0 */ + t[4] = 0 - a32[1] - a32[2] + 2 * a32[4] + 2 * a32[5] + a32[6]; + /* 0 0 -1 -1 0 2 2 1 */ + t[5] = 0 - a32[2] - a32[3] + 2 * a32[5] + 2 * a32[6] + a32[7]; + /* -1 -1 0 0 0 1 3 2 */ + t[6] = 0 - a32[0] - a32[1] + a32[5] + 3 * a32[6] + 2 * a32[7]; + /* 1 0 -1 -1 -1 -1 0 3 */ + t[7] = 0 + a32[0] - a32[2] - a32[3] - a32[4] - a32[5] + 3 * a32[7]; + + t[1] += t[0] >> 32U; t[0] &= 0xffffffffL; + t[2] += t[1] >> 32U; t[1] &= 0xffffffffL; + t[3] += t[2] >> 32U; t[2] &= 0xffffffffL; + t[4] += t[3] >> 32U; t[3] &= 0xffffffffL; + t[5] += t[4] >> 32U; t[4] &= 0xffffffffL; + t[6] += t[5] >> 32U; t[5] &= 0xffffffffL; + t[7] += t[6] >> 32U; t[6] &= 0xffffffffL; + o = t[7] >> 32U; t[7] &= 0xffffffffL; + t[0] += o; + t[3] -= o; + t[6] -= o; + t[7] += o; + t[1] += t[0] >> 32U; t[0] &= 0xffffffffL; + t[2] += t[1] >> 32U; t[1] &= 0xffffffffL; + t[3] += t[2] >> 32U; t[2] &= 0xffffffffL; + t[4] += t[3] >> 32U; t[3] &= 0xffffffffL; + t[5] += t[4] >> 32U; t[4] &= 0xffffffffL; + t[6] += t[5] >> 32U; t[5] &= 0xffffffffL; + t[7] += t[6] >> 32U; t[6] &= 0xffffffffL; + + r[0] = t[0]; + r[0] |= t[1] << 32U; + r[0] &= 0xfffffffffffffLL; + r[1] = (sp_digit)(t[1] >> 20); + r[1] |= t[2] << 12U; + r[1] |= t[3] << 44U; + r[1] &= 0xfffffffffffffLL; + r[2] = (sp_digit)(t[3] >> 8); + r[2] |= t[4] << 24U; + r[2] &= 0xfffffffffffffLL; + r[3] = (sp_digit)(t[4] >> 28); + r[3] |= t[5] << 4U; + r[3] |= t[6] << 36U; + r[3] &= 0xfffffffffffffLL; + r[4] = (sp_digit)(t[6] >> 16); + r[4] |= t[7] << 16U; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_256_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 52 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 52 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0xfffffffffffffL; + s = 52U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 52U) <= (word32)DIGIT_BIT) { + s += 52U; + r[j] &= 0xfffffffffffffL; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 52) { + r[j] &= 0xfffffffffffffL; + if (j + 1 >= size) { + break; + } + s = 52 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Convert a point of type ecc_point to type sp_point_256. + * + * p Point of type sp_point_256 (result). + * pm Point of type ecc_point. + */ +static void sp_256_point_from_ecc_point_5(sp_point_256* p, const ecc_point* pm) +{ + XMEMSET(p->x, 0, sizeof(p->x)); + XMEMSET(p->y, 0, sizeof(p->y)); + XMEMSET(p->z, 0, sizeof(p->z)); + sp_256_from_mp(p->x, 5, pm->x); + sp_256_from_mp(p->y, 5, pm->y); + sp_256_from_mp(p->z, 5, pm->z); + p->infinity = 0; +} + +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_256_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (256 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 52 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 5); + r->used = 5; + mp_clamp(r); +#elif DIGIT_BIT < 52 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 5; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 52) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 52 - s; + } + r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 5; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 52 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 52 - s; + } + else { + s += 52; + } + } + r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Convert a point of type sp_point_256 to type ecc_point. + * + * p Point of type sp_point_256. + * pm Point of type ecc_point (result). + * returns MEMORY_E when allocation of memory in ecc_point fails otherwise + * MP_OKAY. + */ +static int sp_256_point_to_ecc_point_5(const sp_point_256* p, ecc_point* pm) +{ + int err; + + err = sp_256_to_mp(p->x, pm->x); + if (err == MP_OKAY) { + err = sp_256_to_mp(p->y, pm->y); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->z, pm->z); + } + + return err; +} + +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_256_mul_5(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i, j, k; + int128_t c; + + c = ((int128_t)a[4]) * b[4]; + r[9] = (sp_digit)(c >> 52); + c = (c & 0xfffffffffffffL) << 52; + for (k = 7; k >= 0; k--) { + for (i = 4; i >= 0; i--) { + j = k - i; + if (j >= 5) { + break; + } + if (j < 0) { + continue; + } + + c += ((int128_t)a[i]) * b[j]; + } + r[k + 2] += c >> 104; + r[k + 1] = (c >> 52) & 0xfffffffffffffL; + c = (c & 0xfffffffffffffL) << 52; + } + r[0] = (sp_digit)(c >> 52); +} + +#else +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_256_mul_5(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int128_t t0 = ((int128_t)a[ 0]) * b[ 0]; + int128_t t1 = ((int128_t)a[ 0]) * b[ 1] + + ((int128_t)a[ 1]) * b[ 0]; + int128_t t2 = ((int128_t)a[ 0]) * b[ 2] + + ((int128_t)a[ 1]) * b[ 1] + + ((int128_t)a[ 2]) * b[ 0]; + int128_t t3 = ((int128_t)a[ 0]) * b[ 3] + + ((int128_t)a[ 1]) * b[ 2] + + ((int128_t)a[ 2]) * b[ 1] + + ((int128_t)a[ 3]) * b[ 0]; + int128_t t4 = ((int128_t)a[ 0]) * b[ 4] + + ((int128_t)a[ 1]) * b[ 3] + + ((int128_t)a[ 2]) * b[ 2] + + ((int128_t)a[ 3]) * b[ 1] + + ((int128_t)a[ 4]) * b[ 0]; + int128_t t5 = ((int128_t)a[ 1]) * b[ 4] + + ((int128_t)a[ 2]) * b[ 3] + + ((int128_t)a[ 3]) * b[ 2] + + ((int128_t)a[ 4]) * b[ 1]; + int128_t t6 = ((int128_t)a[ 2]) * b[ 4] + + ((int128_t)a[ 3]) * b[ 3] + + ((int128_t)a[ 4]) * b[ 2]; + int128_t t7 = ((int128_t)a[ 3]) * b[ 4] + + ((int128_t)a[ 4]) * b[ 3]; + int128_t t8 = ((int128_t)a[ 4]) * b[ 4]; + + t1 += t0 >> 52; r[ 0] = t0 & 0xfffffffffffffL; + t2 += t1 >> 52; r[ 1] = t1 & 0xfffffffffffffL; + t3 += t2 >> 52; r[ 2] = t2 & 0xfffffffffffffL; + t4 += t3 >> 52; r[ 3] = t3 & 0xfffffffffffffL; + t5 += t4 >> 52; r[ 4] = t4 & 0xfffffffffffffL; + t6 += t5 >> 52; r[ 5] = t5 & 0xfffffffffffffL; + t7 += t6 >> 52; r[ 6] = t6 & 0xfffffffffffffL; + t8 += t7 >> 52; r[ 7] = t7 & 0xfffffffffffffL; + r[9] = (sp_digit)(t8 >> 52); + r[8] = t8 & 0xfffffffffffffL; +} + +#endif /* WOLFSSL_SP_SMALL */ +#define sp_256_mont_reduce_order_5 sp_256_mont_reduce_5 + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static sp_digit sp_256_cmp_5(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=4; i>=0; i--) { + r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#else + r |= (a[ 4] - b[ 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 3] - b[ 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 2] - b[ 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 1] - b[ 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 0] - b[ 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); +#endif /* WOLFSSL_SP_SMALL */ + + return r; +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static void sp_256_cond_sub_5(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 5; i++) { + r[i] = a[i] - (b[i] & m); + } +#else + r[ 0] = a[ 0] - (b[ 0] & m); + r[ 1] = a[ 1] - (b[ 1] & m); + r[ 2] = a[ 2] - (b[ 2] & m); + r[ 3] = a[ 3] - (b[ 3] & m); + r[ 4] = a[ 4] - (b[ 4] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Mul a by scalar b and add into r. (r += a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_256_mul_add_5(sp_digit* r, const sp_digit* a, + const sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int128_t tb = b; + int128_t t = 0; + int i; + + for (i = 0; i < 5; i++) { + t += (tb * a[i]) + r[i]; + r[i] = t & 0xfffffffffffffL; + t >>= 52; + } + r[5] += t; +#else + int128_t tb = b; + int128_t t[5]; + + t[ 0] = tb * a[ 0]; + t[ 1] = tb * a[ 1]; + t[ 2] = tb * a[ 2]; + t[ 3] = tb * a[ 3]; + t[ 4] = tb * a[ 4]; + r[ 0] += (sp_digit) (t[ 0] & 0xfffffffffffffL); + r[ 1] += (sp_digit)((t[ 0] >> 52) + (t[ 1] & 0xfffffffffffffL)); + r[ 2] += (sp_digit)((t[ 1] >> 52) + (t[ 2] & 0xfffffffffffffL)); + r[ 3] += (sp_digit)((t[ 2] >> 52) + (t[ 3] & 0xfffffffffffffL)); + r[ 4] += (sp_digit)((t[ 3] >> 52) + (t[ 4] & 0xfffffffffffffL)); + r[ 5] += (sp_digit) (t[ 4] >> 52); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Normalize the values in each word to 52. + * + * a Array of sp_digit to normalize. + */ +static void sp_256_norm_5(sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + for (i = 0; i < 4; i++) { + a[i+1] += a[i] >> 52; + a[i] &= 0xfffffffffffffL; + } +#else + a[1] += a[0] >> 52; a[0] &= 0xfffffffffffffL; + a[2] += a[1] >> 52; a[1] &= 0xfffffffffffffL; + a[3] += a[2] >> 52; a[2] &= 0xfffffffffffffL; + a[4] += a[3] >> 52; a[3] &= 0xfffffffffffffL; +#endif +} + +/* Shift the result in the high 256 bits down to the bottom. + * + * r A single precision number. + * a A single precision number. + */ +static void sp_256_mont_shift_5(sp_digit* r, const sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + word64 n; + + n = a[4] >> 48; + for (i = 0; i < 4; i++) { + n += (word64)a[5 + i] << 4; + r[i] = n & 0xfffffffffffffL; + n >>= 52; + } + n += (word64)a[9] << 4; + r[4] = n; +#else + word64 n; + + n = a[4] >> 48; + n += (word64)a[ 5] << 4U; r[ 0] = n & 0xfffffffffffffUL; n >>= 52U; + n += (word64)a[ 6] << 4U; r[ 1] = n & 0xfffffffffffffUL; n >>= 52U; + n += (word64)a[ 7] << 4U; r[ 2] = n & 0xfffffffffffffUL; n >>= 52U; + n += (word64)a[ 8] << 4U; r[ 3] = n & 0xfffffffffffffUL; n >>= 52U; + n += (word64)a[ 9] << 4U; r[ 4] = n; +#endif /* WOLFSSL_SP_SMALL */ + XMEMSET(&r[5], 0, sizeof(*r) * 5U); +} + +/* Reduce the number back to 256 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static void sp_256_mont_reduce_5(sp_digit* a, const sp_digit* m, sp_digit mp) +{ + int i; + sp_digit mu; + + if (mp != 1) { + for (i=0; i<4; i++) { + mu = (a[i] * mp) & 0xfffffffffffffL; + sp_256_mul_add_5(a+i, m, mu); + a[i+1] += a[i] >> 52; + } + mu = (a[i] * mp) & 0xffffffffffffL; + sp_256_mul_add_5(a+i, m, mu); + a[i+1] += a[i] >> 52; + a[i] &= 0xfffffffffffffL; + } + else { + for (i=0; i<4; i++) { + mu = a[i] & 0xfffffffffffffL; + sp_256_mul_add_5(a+i, p256_mod, mu); + a[i+1] += a[i] >> 52; + } + mu = a[i] & 0xffffffffffffL; + sp_256_mul_add_5(a+i, p256_mod, mu); + a[i+1] += a[i] >> 52; + a[i] &= 0xfffffffffffffL; + } + + sp_256_mont_shift_5(a, a); + sp_256_cond_sub_5(a, a, m, 0 - (((a[4] >> 48) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_256_norm_5(a); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_256_mont_mul_5(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_256_mul_5(r, a, b); + sp_256_mont_reduce_5(r, m, mp); +} + +#ifdef WOLFSSL_SP_SMALL +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_256_sqr_5(sp_digit* r, const sp_digit* a) +{ + int i, j, k; + int128_t c; + + c = ((int128_t)a[4]) * a[4]; + r[9] = (sp_digit)(c >> 52); + c = (c & 0xfffffffffffffL) << 52; + for (k = 7; k >= 0; k--) { + for (i = 4; i >= 0; i--) { + j = k - i; + if (j >= 5 || i <= j) { + break; + } + if (j < 0) { + continue; + } + + c += ((int128_t)a[i]) * a[j] * 2; + } + if (i == j) { + c += ((int128_t)a[i]) * a[i]; + } + + r[k + 2] += c >> 104; + r[k + 1] = (c >> 52) & 0xfffffffffffffL; + c = (c & 0xfffffffffffffL) << 52; + } + r[0] = (sp_digit)(c >> 52); +} + +#else +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_256_sqr_5(sp_digit* r, const sp_digit* a) +{ + int128_t t0 = ((int128_t)a[ 0]) * a[ 0]; + int128_t t1 = (((int128_t)a[ 0]) * a[ 1]) * 2; + int128_t t2 = (((int128_t)a[ 0]) * a[ 2]) * 2 + + ((int128_t)a[ 1]) * a[ 1]; + int128_t t3 = (((int128_t)a[ 0]) * a[ 3] + + ((int128_t)a[ 1]) * a[ 2]) * 2; + int128_t t4 = (((int128_t)a[ 0]) * a[ 4] + + ((int128_t)a[ 1]) * a[ 3]) * 2 + + ((int128_t)a[ 2]) * a[ 2]; + int128_t t5 = (((int128_t)a[ 1]) * a[ 4] + + ((int128_t)a[ 2]) * a[ 3]) * 2; + int128_t t6 = (((int128_t)a[ 2]) * a[ 4]) * 2 + + ((int128_t)a[ 3]) * a[ 3]; + int128_t t7 = (((int128_t)a[ 3]) * a[ 4]) * 2; + int128_t t8 = ((int128_t)a[ 4]) * a[ 4]; + + t1 += t0 >> 52; r[ 0] = t0 & 0xfffffffffffffL; + t2 += t1 >> 52; r[ 1] = t1 & 0xfffffffffffffL; + t3 += t2 >> 52; r[ 2] = t2 & 0xfffffffffffffL; + t4 += t3 >> 52; r[ 3] = t3 & 0xfffffffffffffL; + t5 += t4 >> 52; r[ 4] = t4 & 0xfffffffffffffL; + t6 += t5 >> 52; r[ 5] = t5 & 0xfffffffffffffL; + t7 += t6 >> 52; r[ 6] = t6 & 0xfffffffffffffL; + t8 += t7 >> 52; r[ 7] = t7 & 0xfffffffffffffL; + r[9] = (sp_digit)(t8 >> 52); + r[8] = t8 & 0xfffffffffffffL; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_256_mont_sqr_5(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_256_sqr_5(r, a); + sp_256_mont_reduce_5(r, m, mp); +} + +#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY) +/* Square the Montgomery form number a number of times. (r = a ^ n mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * n Number of times to square. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_256_mont_sqr_n_5(sp_digit* r, const sp_digit* a, int n, + const sp_digit* m, sp_digit mp) +{ + sp_256_mont_sqr_5(r, a, m, mp); + for (; n > 1; n--) { + sp_256_mont_sqr_5(r, r, m, mp); + } +} + +#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */ +#ifdef WOLFSSL_SP_SMALL +/* Mod-2 for the P256 curve. */ +static const uint64_t p256_mod_minus_2[4] = { + 0xfffffffffffffffdU,0x00000000ffffffffU,0x0000000000000000U, + 0xffffffff00000001U +}; +#endif /* !WOLFSSL_SP_SMALL */ + +/* Invert the number, in Montgomery form, modulo the modulus (prime) of the + * P256 curve. (r = 1 / a mod m) + * + * r Inverse result. + * a Number to invert. + * td Temporary data. + */ +static void sp_256_mont_inv_5(sp_digit* r, const sp_digit* a, sp_digit* td) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* t = td; + int i; + + XMEMCPY(t, a, sizeof(sp_digit) * 5); + for (i=254; i>=0; i--) { + sp_256_mont_sqr_5(t, t, p256_mod, p256_mp_mod); + if (p256_mod_minus_2[i / 64] & ((sp_digit)1 << (i % 64))) + sp_256_mont_mul_5(t, t, a, p256_mod, p256_mp_mod); + } + XMEMCPY(r, t, sizeof(sp_digit) * 5); +#else + sp_digit* t1 = td; + sp_digit* t2 = td + 2 * 5; + sp_digit* t3 = td + 4 * 5; + /* 0x2 */ + sp_256_mont_sqr_5(t1, a, p256_mod, p256_mp_mod); + /* 0x3 */ + sp_256_mont_mul_5(t2, t1, a, p256_mod, p256_mp_mod); + /* 0xc */ + sp_256_mont_sqr_n_5(t1, t2, 2, p256_mod, p256_mp_mod); + /* 0xd */ + sp_256_mont_mul_5(t3, t1, a, p256_mod, p256_mp_mod); + /* 0xf */ + sp_256_mont_mul_5(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xf0 */ + sp_256_mont_sqr_n_5(t1, t2, 4, p256_mod, p256_mp_mod); + /* 0xfd */ + sp_256_mont_mul_5(t3, t3, t1, p256_mod, p256_mp_mod); + /* 0xff */ + sp_256_mont_mul_5(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xff00 */ + sp_256_mont_sqr_n_5(t1, t2, 8, p256_mod, p256_mp_mod); + /* 0xfffd */ + sp_256_mont_mul_5(t3, t3, t1, p256_mod, p256_mp_mod); + /* 0xffff */ + sp_256_mont_mul_5(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xffff0000 */ + sp_256_mont_sqr_n_5(t1, t2, 16, p256_mod, p256_mp_mod); + /* 0xfffffffd */ + sp_256_mont_mul_5(t3, t3, t1, p256_mod, p256_mp_mod); + /* 0xffffffff */ + sp_256_mont_mul_5(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xffffffff00000000 */ + sp_256_mont_sqr_n_5(t1, t2, 32, p256_mod, p256_mp_mod); + /* 0xffffffffffffffff */ + sp_256_mont_mul_5(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xffffffff00000001 */ + sp_256_mont_mul_5(r, t1, a, p256_mod, p256_mp_mod); + /* 0xffffffff000000010000000000000000000000000000000000000000 */ + sp_256_mont_sqr_n_5(r, r, 160, p256_mod, p256_mp_mod); + /* 0xffffffff00000001000000000000000000000000ffffffffffffffff */ + sp_256_mont_mul_5(r, r, t2, p256_mod, p256_mp_mod); + /* 0xffffffff00000001000000000000000000000000ffffffffffffffff00000000 */ + sp_256_mont_sqr_n_5(r, r, 32, p256_mod, p256_mp_mod); + /* 0xffffffff00000001000000000000000000000000fffffffffffffffffffffffd */ + sp_256_mont_mul_5(r, r, t3, p256_mod, p256_mp_mod); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Map the Montgomery form projective coordinate point to an affine point. + * + * r Resulting affine coordinate point. + * p Montgomery form projective coordinate point. + * t Temporary ordinate data. + */ +static void sp_256_map_5(sp_point_256* r, const sp_point_256* p, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*5; + int64_t n; + + sp_256_mont_inv_5(t1, p->z, t + 2*5); + + sp_256_mont_sqr_5(t2, t1, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(t1, t2, t1, p256_mod, p256_mp_mod); + + /* x /= z^2 */ + sp_256_mont_mul_5(r->x, p->x, t2, p256_mod, p256_mp_mod); + XMEMSET(r->x + 5, 0, sizeof(r->x) / 2U); + sp_256_mont_reduce_5(r->x, p256_mod, p256_mp_mod); + /* Reduce x to less than modulus */ + n = sp_256_cmp_5(r->x, p256_mod); + sp_256_cond_sub_5(r->x, r->x, p256_mod, 0 - ((n >= 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_256_norm_5(r->x); + + /* y /= z^3 */ + sp_256_mont_mul_5(r->y, p->y, t1, p256_mod, p256_mp_mod); + XMEMSET(r->y + 5, 0, sizeof(r->y) / 2U); + sp_256_mont_reduce_5(r->y, p256_mod, p256_mp_mod); + /* Reduce y to less than modulus */ + n = sp_256_cmp_5(r->y, p256_mod); + sp_256_cond_sub_5(r->y, r->y, p256_mod, 0 - ((n >= 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_256_norm_5(r->y); + + XMEMSET(r->z, 0, sizeof(r->z)); + r->z[0] = 1; + +} + +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_256_add_5(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 5; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#else +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_256_add_5(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + r[ 0] = a[ 0] + b[ 0]; + r[ 1] = a[ 1] + b[ 1]; + r[ 2] = a[ 2] + b[ 2]; + r[ 3] = a[ 3] + b[ 3]; + r[ 4] = a[ 4] + b[ 4]; + + return 0; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Add two Montgomery form numbers (r = a + b % m). + * + * r Result of addition. + * a First number to add in Montogmery form. + * b Second number to add in Montogmery form. + * m Modulus (prime). + */ +static void sp_256_mont_add_5(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m) +{ + (void)sp_256_add_5(r, a, b); + sp_256_norm_5(r); + sp_256_cond_sub_5(r, r, m, 0 - (((r[4] >> 48) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_256_norm_5(r); +} + +/* Double a Montgomery form number (r = a + a % m). + * + * r Result of doubling. + * a Number to double in Montogmery form. + * m Modulus (prime). + */ +static void sp_256_mont_dbl_5(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + (void)sp_256_add_5(r, a, a); + sp_256_norm_5(r); + sp_256_cond_sub_5(r, r, m, 0 - (((r[4] >> 48) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_256_norm_5(r); +} + +/* Triple a Montgomery form number (r = a + a + a % m). + * + * r Result of Tripling. + * a Number to triple in Montogmery form. + * m Modulus (prime). + */ +static void sp_256_mont_tpl_5(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + (void)sp_256_add_5(r, a, a); + sp_256_norm_5(r); + sp_256_cond_sub_5(r, r, m, 0 - (((r[4] >> 48) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_256_norm_5(r); + (void)sp_256_add_5(r, r, a); + sp_256_norm_5(r); + sp_256_cond_sub_5(r, r, m, 0 - (((r[4] >> 48) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_256_norm_5(r); +} + +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_256_sub_5(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 5; i++) { + r[i] = a[i] - b[i]; + } + + return 0; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_256_sub_5(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + r[ 0] = a[ 0] - b[ 0]; + r[ 1] = a[ 1] - b[ 1]; + r[ 2] = a[ 2] - b[ 2]; + r[ 3] = a[ 3] - b[ 3]; + r[ 4] = a[ 4] - b[ 4]; + + return 0; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static void sp_256_cond_add_5(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 5; i++) { + r[i] = a[i] + (b[i] & m); + } +#else + r[ 0] = a[ 0] + (b[ 0] & m); + r[ 1] = a[ 1] + (b[ 1] & m); + r[ 2] = a[ 2] + (b[ 2] & m); + r[ 3] = a[ 3] + (b[ 3] & m); + r[ 4] = a[ 4] + (b[ 4] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Subtract two Montgomery form numbers (r = a - b % m). + * + * r Result of subtration. + * a Number to subtract from in Montogmery form. + * b Number to subtract with in Montogmery form. + * m Modulus (prime). + */ +static void sp_256_mont_sub_5(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m) +{ + (void)sp_256_sub_5(r, a, b); + sp_256_cond_add_5(r, r, m, r[4] >> 48); + sp_256_norm_5(r); +} + +/* Shift number left one bit. + * Bottom bit is lost. + * + * r Result of shift. + * a Number to shift. + */ +SP_NOINLINE static void sp_256_rshift1_5(sp_digit* r, sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<4; i++) { + r[i] = ((a[i] >> 1) | (a[i + 1] << 51)) & 0xfffffffffffffL; + } +#else + r[0] = ((a[0] >> 1) | (a[1] << 51)) & 0xfffffffffffffL; + r[1] = ((a[1] >> 1) | (a[2] << 51)) & 0xfffffffffffffL; + r[2] = ((a[2] >> 1) | (a[3] << 51)) & 0xfffffffffffffL; + r[3] = ((a[3] >> 1) | (a[4] << 51)) & 0xfffffffffffffL; +#endif + r[4] = a[4] >> 1; +} + +/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) + * + * r Result of division by 2. + * a Number to divide. + * m Modulus (prime). + */ +static void sp_256_div2_5(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + sp_256_cond_add_5(r, a, m, 0 - (a[0] & 1)); + sp_256_norm_5(r); + sp_256_rshift1_5(r, r); +} + +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static void sp_256_proj_point_dbl_5(sp_point_256* r, const sp_point_256* p, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*5; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_256_mont_sqr_5(t1, p->z, p256_mod, p256_mp_mod); + /* Z = Y * Z */ + sp_256_mont_mul_5(z, p->y, p->z, p256_mod, p256_mp_mod); + /* Z = 2Z */ + sp_256_mont_dbl_5(z, z, p256_mod); + /* T2 = X - T1 */ + sp_256_mont_sub_5(t2, p->x, t1, p256_mod); + /* T1 = X + T1 */ + sp_256_mont_add_5(t1, p->x, t1, p256_mod); + /* T2 = T1 * T2 */ + sp_256_mont_mul_5(t2, t1, t2, p256_mod, p256_mp_mod); + /* T1 = 3T2 */ + sp_256_mont_tpl_5(t1, t2, p256_mod); + /* Y = 2Y */ + sp_256_mont_dbl_5(y, p->y, p256_mod); + /* Y = Y * Y */ + sp_256_mont_sqr_5(y, y, p256_mod, p256_mp_mod); + /* T2 = Y * Y */ + sp_256_mont_sqr_5(t2, y, p256_mod, p256_mp_mod); + /* T2 = T2/2 */ + sp_256_div2_5(t2, t2, p256_mod); + /* Y = Y * X */ + sp_256_mont_mul_5(y, y, p->x, p256_mod, p256_mp_mod); + /* X = T1 * T1 */ + sp_256_mont_sqr_5(x, t1, p256_mod, p256_mp_mod); + /* X = X - Y */ + sp_256_mont_sub_5(x, x, y, p256_mod); + /* X = X - Y */ + sp_256_mont_sub_5(x, x, y, p256_mod); + /* Y = Y - X */ + sp_256_mont_sub_5(y, y, x, p256_mod); + /* Y = Y * T1 */ + sp_256_mont_mul_5(y, y, t1, p256_mod, p256_mp_mod); + /* Y = Y - T2 */ + sp_256_mont_sub_5(y, y, t2, p256_mod); +} + +/* Compare two numbers to determine if they are equal. + * Constant time implementation. + * + * a First number to compare. + * b Second number to compare. + * returns 1 when equal and 0 otherwise. + */ +static int sp_256_cmp_equal_5(const sp_digit* a, const sp_digit* b) +{ + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) | + (a[4] ^ b[4])) == 0; +} + +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_256_proj_point_add_5(sp_point_256* r, const sp_point_256* p, const sp_point_256* q, + sp_digit* t) +{ + const sp_point_256* ap[2]; + sp_point_256* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*5; + sp_digit* t3 = t + 4*5; + sp_digit* t4 = t + 6*5; + sp_digit* t5 = t + 8*5; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* Ensure only the first point is the same as the result. */ + if (q == r) { + const sp_point_256* a = p; + p = q; + q = a; + } + + /* Check double */ + (void)sp_256_sub_5(t1, p256_mod, q->y); + sp_256_norm_5(t1); + if ((sp_256_cmp_equal_5(p->x, q->x) & sp_256_cmp_equal_5(p->z, q->z) & + (sp_256_cmp_equal_5(p->y, q->y) | sp_256_cmp_equal_5(p->y, t1))) != 0) { + sp_256_proj_point_dbl_5(r, p, t); + } + else { + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point_256)); + x = rp[p->infinity | q->infinity]->x; + y = rp[p->infinity | q->infinity]->y; + z = rp[p->infinity | q->infinity]->z; + + ap[0] = p; + ap[1] = q; + for (i=0; i<5; i++) { + r->x[i] = ap[p->infinity]->x[i]; + } + for (i=0; i<5; i++) { + r->y[i] = ap[p->infinity]->y[i]; + } + for (i=0; i<5; i++) { + r->z[i] = ap[p->infinity]->z[i]; + } + r->infinity = ap[p->infinity]->infinity; + + /* U1 = X1*Z2^2 */ + sp_256_mont_sqr_5(t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(t3, t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(t1, t1, x, p256_mod, p256_mp_mod); + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_5(t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(t4, t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_5(t3, t3, y, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_5(t4, t4, q->y, p256_mod, p256_mp_mod); + /* H = U2 - U1 */ + sp_256_mont_sub_5(t2, t2, t1, p256_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_5(t4, t4, t3, p256_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_5(z, z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(z, z, t2, p256_mod, p256_mp_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_5(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sqr_5(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(t5, t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_sub_5(x, x, t5, p256_mod); + sp_256_mont_dbl_5(t1, y, p256_mod); + sp_256_mont_sub_5(x, x, t1, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_5(y, y, x, p256_mod); + sp_256_mont_mul_5(y, y, t4, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(t5, t5, t3, p256_mod, p256_mp_mod); + sp_256_mont_sub_5(y, y, t5, p256_mod); + } +} + +#ifdef WOLFSSL_SP_SMALL +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_5(sp_point_256* r, const sp_point_256* g, const sp_digit* k, + int map, void* heap) +{ +#ifdef WOLFSSL_SP_NO_MALLOC + sp_point_256 t[3]; + sp_digit tmp[2 * 5 * 5]; +#else + sp_point_256* t; + sp_digit* tmp; +#endif + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + + (void)heap; + +#ifndef WOLFSSL_SP_NO_MALLOC + t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 3, heap, DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 5 * 5, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; +#endif + + if (err == MP_OKAY) { + XMEMSET(t, 0, sizeof(sp_point_256) * 3); + + /* t[0] = {0, 0, 1} * norm */ + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + err = sp_256_mod_mul_norm_5(t[1].x, g->x, p256_mod); + } + if (err == MP_OKAY) + err = sp_256_mod_mul_norm_5(t[1].y, g->y, p256_mod); + if (err == MP_OKAY) + err = sp_256_mod_mul_norm_5(t[1].z, g->z, p256_mod); + + if (err == MP_OKAY) { + i = 4; + c = 48; + n = k[i--] << (52 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) + break; + + n = k[i--]; + c = 52; + } + + y = (n >> 51) & 1; + n <<= 1; + + sp_256_proj_point_add_5(&t[y^1], &t[0], &t[1], tmp); + + XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) + + ((size_t)&t[1] & addr_mask[y])), + sizeof(sp_point_256)); + sp_256_proj_point_dbl_5(&t[2], &t[2], tmp); + XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) + + ((size_t)&t[1] & addr_mask[y])), &t[2], + sizeof(sp_point_256)); + } + + if (map != 0) { + sp_256_map_5(r, &t[0], tmp); + } + else { + XMEMCPY(r, &t[0], sizeof(sp_point_256)); + } + } + +#ifndef WOLFSSL_SP_NO_MALLOC + if (tmp != NULL) { + XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 5 * 5); + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_point_256) * 3); + XFREE(t, NULL, DYNAMIC_TYPE_ECC); + } +#else + ForceZero(tmp, sizeof(tmp)); + ForceZero(t, sizeof(t)); +#endif + + return err; +} + +#elif defined(WOLFSSL_SP_CACHE_RESISTANT) +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_5(sp_point_256* r, const sp_point_256* g, const sp_digit* k, + int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 t[3]; + sp_digit tmp[2 * 5 * 5]; +#else + sp_point_256* t; + sp_digit* tmp; +#endif + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + + (void)heap; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_point*)XMALLOC(sizeof(*t) * 3, heap, DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 5 * 5, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; +#endif + + if (err == MP_OKAY) { + /* t[0] = {0, 0, 1} * norm */ + XMEMSET(&t[0], 0, sizeof(t[0])); + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + t[1].infinity = 0; + err = sp_256_mod_mul_norm_5(t[1].x, g->x, p256_mod); + } + if (err == MP_OKAY) + err = sp_256_mod_mul_norm_5(t[1].y, g->y, p256_mod); + if (err == MP_OKAY) + err = sp_256_mod_mul_norm_5(t[1].z, g->z, p256_mod); + + if (err == MP_OKAY) { + i = 4; + c = 48; + n = k[i--] << (52 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) + break; + + n = k[i--]; + c = 52; + } + + y = (n >> 51) & 1; + n <<= 1; + + sp_256_proj_point_add_5(&t[y^1], &t[0], &t[1], tmp); + + XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) + + ((size_t)&t[1] & addr_mask[y])), sizeof(t[2])); + sp_256_proj_point_dbl_5(&t[2], &t[2], tmp); + XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) + + ((size_t)&t[1] & addr_mask[y])), &t[2], sizeof(t[2])); + } + + if (map != 0) { + sp_256_map_5(r, &t[0], tmp); + } + else { + XMEMCPY(r, &t[0], sizeof(sp_point_256)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 5 * 5); + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); + } + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_point_256) * 3); + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#else + ForceZero(tmp, sizeof(tmp)); + ForceZero(t, sizeof(t)); +#endif + + return err; +} + +#else +/* A table entry for pre-computed points. */ +typedef struct sp_table_entry_256 { + sp_digit x[5]; + sp_digit y[5]; +} sp_table_entry_256; + +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_fast_5(sp_point_256* r, const sp_point_256* g, const sp_digit* k, + int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 td[16]; + sp_point_256 rtd; + sp_digit tmpd[2 * 5 * 5]; +#endif + sp_point_256* t; + sp_point_256* rt; + sp_digit* tmp; + sp_digit n; + int i; + int c, y; + int err; + + (void)heap; + + err = sp_256_point_new_5(heap, rtd, rt); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 16, heap, DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 5 * 5, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; +#else + t = td; + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + /* t[0] = {0, 0, 1} * norm */ + XMEMSET(&t[0], 0, sizeof(t[0])); + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + (void)sp_256_mod_mul_norm_5(t[1].x, g->x, p256_mod); + (void)sp_256_mod_mul_norm_5(t[1].y, g->y, p256_mod); + (void)sp_256_mod_mul_norm_5(t[1].z, g->z, p256_mod); + t[1].infinity = 0; + sp_256_proj_point_dbl_5(&t[ 2], &t[ 1], tmp); + t[ 2].infinity = 0; + sp_256_proj_point_add_5(&t[ 3], &t[ 2], &t[ 1], tmp); + t[ 3].infinity = 0; + sp_256_proj_point_dbl_5(&t[ 4], &t[ 2], tmp); + t[ 4].infinity = 0; + sp_256_proj_point_add_5(&t[ 5], &t[ 3], &t[ 2], tmp); + t[ 5].infinity = 0; + sp_256_proj_point_dbl_5(&t[ 6], &t[ 3], tmp); + t[ 6].infinity = 0; + sp_256_proj_point_add_5(&t[ 7], &t[ 4], &t[ 3], tmp); + t[ 7].infinity = 0; + sp_256_proj_point_dbl_5(&t[ 8], &t[ 4], tmp); + t[ 8].infinity = 0; + sp_256_proj_point_add_5(&t[ 9], &t[ 5], &t[ 4], tmp); + t[ 9].infinity = 0; + sp_256_proj_point_dbl_5(&t[10], &t[ 5], tmp); + t[10].infinity = 0; + sp_256_proj_point_add_5(&t[11], &t[ 6], &t[ 5], tmp); + t[11].infinity = 0; + sp_256_proj_point_dbl_5(&t[12], &t[ 6], tmp); + t[12].infinity = 0; + sp_256_proj_point_add_5(&t[13], &t[ 7], &t[ 6], tmp); + t[13].infinity = 0; + sp_256_proj_point_dbl_5(&t[14], &t[ 7], tmp); + t[14].infinity = 0; + sp_256_proj_point_add_5(&t[15], &t[ 8], &t[ 7], tmp); + t[15].infinity = 0; + + i = 3; + n = k[i+1] << 12; + c = 44; + y = n >> 56; + XMEMCPY(rt, &t[y], sizeof(sp_point_256)); + n <<= 8; + for (; i>=0 || c>=4; ) { + if (c < 4) { + n |= k[i--] << (12 - c); + c += 52; + } + y = (n >> 60) & 0xf; + n <<= 4; + c -= 4; + + sp_256_proj_point_dbl_5(rt, rt, tmp); + sp_256_proj_point_dbl_5(rt, rt, tmp); + sp_256_proj_point_dbl_5(rt, rt, tmp); + sp_256_proj_point_dbl_5(rt, rt, tmp); + + sp_256_proj_point_add_5(rt, rt, &t[y], tmp); + } + + if (map != 0) { + sp_256_map_5(r, rt, tmp); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_256)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 5 * 5); + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); + } + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_point_256) * 16); + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#else + ForceZero(tmpd, sizeof(tmpd)); + ForceZero(td, sizeof(td)); +#endif + sp_256_point_free_5(rt, 1, heap); + + return err; +} + +#ifdef FP_ECC +/* Double the Montgomery form projective point p a number of times. + * + * r Result of repeated doubling of point. + * p Point to double. + * n Number of times to double + * t Temporary ordinate data. + */ +static void sp_256_proj_point_dbl_n_5(sp_point_256* p, int n, sp_digit* t) +{ + sp_digit* w = t; + sp_digit* a = t + 2*5; + sp_digit* b = t + 4*5; + sp_digit* t1 = t + 6*5; + sp_digit* t2 = t + 8*5; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = p->x; + y = p->y; + z = p->z; + + /* Y = 2*Y */ + sp_256_mont_dbl_5(y, y, p256_mod); + /* W = Z^4 */ + sp_256_mont_sqr_5(w, z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_5(w, w, p256_mod, p256_mp_mod); + +#ifndef WOLFSSL_SP_SMALL + while (--n > 0) +#else + while (--n >= 0) +#endif + { + /* A = 3*(X^2 - W) */ + sp_256_mont_sqr_5(t1, x, p256_mod, p256_mp_mod); + sp_256_mont_sub_5(t1, t1, w, p256_mod); + sp_256_mont_tpl_5(a, t1, p256_mod); + /* B = X*Y^2 */ + sp_256_mont_sqr_5(t1, y, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(b, t1, x, p256_mod, p256_mp_mod); + /* X = A^2 - 2B */ + sp_256_mont_sqr_5(x, a, p256_mod, p256_mp_mod); + sp_256_mont_dbl_5(t2, b, p256_mod); + sp_256_mont_sub_5(x, x, t2, p256_mod); + /* Z = Z*Y */ + sp_256_mont_mul_5(z, z, y, p256_mod, p256_mp_mod); + /* t2 = Y^4 */ + sp_256_mont_sqr_5(t1, t1, p256_mod, p256_mp_mod); +#ifdef WOLFSSL_SP_SMALL + if (n != 0) +#endif + { + /* W = W*Y^4 */ + sp_256_mont_mul_5(w, w, t1, p256_mod, p256_mp_mod); + } + /* y = 2*A*(B - X) - Y^4 */ + sp_256_mont_sub_5(y, b, x, p256_mod); + sp_256_mont_mul_5(y, y, a, p256_mod, p256_mp_mod); + sp_256_mont_dbl_5(y, y, p256_mod); + sp_256_mont_sub_5(y, y, t1, p256_mod); + } +#ifndef WOLFSSL_SP_SMALL + /* A = 3*(X^2 - W) */ + sp_256_mont_sqr_5(t1, x, p256_mod, p256_mp_mod); + sp_256_mont_sub_5(t1, t1, w, p256_mod); + sp_256_mont_tpl_5(a, t1, p256_mod); + /* B = X*Y^2 */ + sp_256_mont_sqr_5(t1, y, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(b, t1, x, p256_mod, p256_mp_mod); + /* X = A^2 - 2B */ + sp_256_mont_sqr_5(x, a, p256_mod, p256_mp_mod); + sp_256_mont_dbl_5(t2, b, p256_mod); + sp_256_mont_sub_5(x, x, t2, p256_mod); + /* Z = Z*Y */ + sp_256_mont_mul_5(z, z, y, p256_mod, p256_mp_mod); + /* t2 = Y^4 */ + sp_256_mont_sqr_5(t1, t1, p256_mod, p256_mp_mod); + /* y = 2*A*(B - X) - Y^4 */ + sp_256_mont_sub_5(y, b, x, p256_mod); + sp_256_mont_mul_5(y, y, a, p256_mod, p256_mp_mod); + sp_256_mont_dbl_5(y, y, p256_mod); + sp_256_mont_sub_5(y, y, t1, p256_mod); +#endif + /* Y = Y/2 */ + sp_256_div2_5(y, y, p256_mod); +} + +#endif /* FP_ECC */ +/* Add two Montgomery form projective points. The second point has a q value of + * one. + * Only the first point can be the same pointer as the result point. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_256_proj_point_add_qz1_5(sp_point_256* r, const sp_point_256* p, + const sp_point_256* q, sp_digit* t) +{ + const sp_point_256* ap[2]; + sp_point_256* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*5; + sp_digit* t3 = t + 4*5; + sp_digit* t4 = t + 6*5; + sp_digit* t5 = t + 8*5; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* Check double */ + (void)sp_256_sub_5(t1, p256_mod, q->y); + sp_256_norm_5(t1); + if ((sp_256_cmp_equal_5(p->x, q->x) & sp_256_cmp_equal_5(p->z, q->z) & + (sp_256_cmp_equal_5(p->y, q->y) | sp_256_cmp_equal_5(p->y, t1))) != 0) { + sp_256_proj_point_dbl_5(r, p, t); + } + else { + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point_256)); + x = rp[p->infinity | q->infinity]->x; + y = rp[p->infinity | q->infinity]->y; + z = rp[p->infinity | q->infinity]->z; + + ap[0] = p; + ap[1] = q; + for (i=0; i<5; i++) { + r->x[i] = ap[p->infinity]->x[i]; + } + for (i=0; i<5; i++) { + r->y[i] = ap[p->infinity]->y[i]; + } + for (i=0; i<5; i++) { + r->z[i] = ap[p->infinity]->z[i]; + } + r->infinity = ap[p->infinity]->infinity; + + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_5(t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(t4, t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_5(t4, t4, q->y, p256_mod, p256_mp_mod); + /* H = U2 - X1 */ + sp_256_mont_sub_5(t2, t2, x, p256_mod); + /* R = S2 - Y1 */ + sp_256_mont_sub_5(t4, t4, y, p256_mod); + /* Z3 = H*Z1 */ + sp_256_mont_mul_5(z, z, t2, p256_mod, p256_mp_mod); + /* X3 = R^2 - H^3 - 2*X1*H^2 */ + sp_256_mont_sqr_5(t1, t4, p256_mod, p256_mp_mod); + sp_256_mont_sqr_5(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(t3, x, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(t5, t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_sub_5(x, t1, t5, p256_mod); + sp_256_mont_dbl_5(t1, t3, p256_mod); + sp_256_mont_sub_5(x, x, t1, p256_mod); + /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */ + sp_256_mont_sub_5(t3, t3, x, p256_mod); + sp_256_mont_mul_5(t3, t3, t4, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(t5, t5, y, p256_mod, p256_mp_mod); + sp_256_mont_sub_5(y, t3, t5, p256_mod); + } +} + +#ifdef FP_ECC +/* Convert the projective point to affine. + * Ordinates are in Montgomery form. + * + * a Point to convert. + * t Temporary data. + */ +static void sp_256_proj_to_affine_5(sp_point_256* a, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2 * 5; + sp_digit* tmp = t + 4 * 5; + + sp_256_mont_inv_5(t1, a->z, tmp); + + sp_256_mont_sqr_5(t2, t1, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(t1, t2, t1, p256_mod, p256_mp_mod); + + sp_256_mont_mul_5(a->x, a->x, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(a->y, a->y, t1, p256_mod, p256_mp_mod); + XMEMCPY(a->z, p256_norm_mod, sizeof(p256_norm_mod)); +} + +/* Generate the pre-computed table of points for the base point. + * + * a The base point. + * table Place to store generated point data. + * tmp Temporary data. + * heap Heap to use for allocation. + */ +static int sp_256_gen_stripe_table_5(const sp_point_256* a, + sp_table_entry_256* table, sp_digit* tmp, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 td, s1d, s2d; +#endif + sp_point_256* t; + sp_point_256* s1 = NULL; + sp_point_256* s2 = NULL; + int i, j; + int err; + + (void)heap; + + err = sp_256_point_new_5(heap, td, t); + if (err == MP_OKAY) { + err = sp_256_point_new_5(heap, s1d, s1); + } + if (err == MP_OKAY) { + err = sp_256_point_new_5(heap, s2d, s2); + } + + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_5(t->x, a->x, p256_mod); + } + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_5(t->y, a->y, p256_mod); + } + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_5(t->z, a->z, p256_mod); + } + if (err == MP_OKAY) { + t->infinity = 0; + sp_256_proj_to_affine_5(t, tmp); + + XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod)); + s1->infinity = 0; + XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod)); + s2->infinity = 0; + + /* table[0] = {0, 0, infinity} */ + XMEMSET(&table[0], 0, sizeof(sp_table_entry_256)); + /* table[1] = Affine version of 'a' in Montgomery form */ + XMEMCPY(table[1].x, t->x, sizeof(table->x)); + XMEMCPY(table[1].y, t->y, sizeof(table->y)); + + for (i=1; i<8; i++) { + sp_256_proj_point_dbl_n_5(t, 32, tmp); + sp_256_proj_to_affine_5(t, tmp); + XMEMCPY(table[1<x, sizeof(table->x)); + XMEMCPY(table[1<y, sizeof(table->y)); + } + + for (i=1; i<8; i++) { + XMEMCPY(s1->x, table[1<x)); + XMEMCPY(s1->y, table[1<y)); + for (j=(1<x, table[j-(1<x)); + XMEMCPY(s2->y, table[j-(1<y)); + sp_256_proj_point_add_qz1_5(t, s1, s2, tmp); + sp_256_proj_to_affine_5(t, tmp); + XMEMCPY(table[j].x, t->x, sizeof(table->x)); + XMEMCPY(table[j].y, t->y, sizeof(table->y)); + } + } + } + + sp_256_point_free_5(s2, 0, heap); + sp_256_point_free_5(s1, 0, heap); + sp_256_point_free_5( t, 0, heap); + + return err; +} + +#endif /* FP_ECC */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_stripe_5(sp_point_256* r, const sp_point_256* g, + const sp_table_entry_256* table, const sp_digit* k, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 rtd; + sp_point_256 pd; + sp_digit td[2 * 5 * 5]; +#endif + sp_point_256* rt; + sp_point_256* p = NULL; + sp_digit* t; + int i, j; + int y, x; + int err; + + (void)g; + (void)heap; + + + err = sp_256_point_new_5(heap, rtd, rt); + if (err == MP_OKAY) { + err = sp_256_point_new_5(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 5 * 5, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) { + err = MEMORY_E; + } +#else + t = td; +#endif + + if (err == MP_OKAY) { + XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod)); + XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod)); + + y = 0; + for (j=0,x=31; j<8; j++,x+=32) { + y |= ((k[x / 52] >> (x % 52)) & 1) << j; + } + XMEMCPY(rt->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(rt->y, table[y].y, sizeof(table[y].y)); + rt->infinity = !y; + for (i=30; i>=0; i--) { + y = 0; + for (j=0,x=i; j<8; j++,x+=32) { + y |= ((k[x / 52] >> (x % 52)) & 1) << j; + } + + sp_256_proj_point_dbl_5(rt, rt, t); + XMEMCPY(p->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(p->y, table[y].y, sizeof(table[y].y)); + p->infinity = !y; + sp_256_proj_point_add_qz1_5(rt, rt, p, t); + } + + if (map != 0) { + sp_256_map_5(r, rt, t); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_256)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_5(p, 0, heap); + sp_256_point_free_5(rt, 0, heap); + + return err; +} + +#ifdef FP_ECC +#ifndef FP_ENTRIES + #define FP_ENTRIES 16 +#endif + +typedef struct sp_cache_256_t { + sp_digit x[5]; + sp_digit y[5]; + sp_table_entry_256 table[256]; + uint32_t cnt; + int set; +} sp_cache_256_t; + +static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES]; +static THREAD_LS_T int sp_cache_256_last = -1; +static THREAD_LS_T int sp_cache_256_inited = 0; + +#ifndef HAVE_THREAD_LS + static volatile int initCacheMutex_256 = 0; + static wolfSSL_Mutex sp_cache_256_lock; +#endif + +static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache) +{ + int i, j; + uint32_t least; + + if (sp_cache_256_inited == 0) { + for (i=0; ix, sp_cache_256[i].x) & + sp_256_cmp_equal_5(g->y, sp_cache_256[i].y)) { + sp_cache_256[i].cnt++; + break; + } + } + + /* No match. */ + if (i == FP_ENTRIES) { + /* Find empty entry. */ + i = (sp_cache_256_last + 1) % FP_ENTRIES; + for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) { + if (!sp_cache_256[i].set) { + break; + } + } + + /* Evict least used. */ + if (i == sp_cache_256_last) { + least = sp_cache_256[0].cnt; + for (j=1; jx, sizeof(sp_cache_256[i].x)); + XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y)); + sp_cache_256[i].set = 1; + sp_cache_256[i].cnt = 1; + } + + *cache = &sp_cache_256[i]; + sp_cache_256_last = i; +} +#endif /* FP_ECC */ + +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_5(sp_point_256* r, const sp_point_256* g, const sp_digit* k, + int map, void* heap) +{ +#ifndef FP_ECC + return sp_256_ecc_mulmod_fast_5(r, g, k, map, heap); +#else + sp_digit tmp[2 * 5 * 5]; + sp_cache_256_t* cache; + int err = MP_OKAY; + +#ifndef HAVE_THREAD_LS + if (initCacheMutex_256 == 0) { + wc_InitMutex(&sp_cache_256_lock); + initCacheMutex_256 = 1; + } + if (wc_LockMutex(&sp_cache_256_lock) != 0) + err = BAD_MUTEX_E; +#endif /* HAVE_THREAD_LS */ + + if (err == MP_OKAY) { + sp_ecc_get_cache_256(g, &cache); + if (cache->cnt == 2) + sp_256_gen_stripe_table_5(g, cache->table, tmp, heap); + +#ifndef HAVE_THREAD_LS + wc_UnLockMutex(&sp_cache_256_lock); +#endif /* HAVE_THREAD_LS */ + + if (cache->cnt < 2) { + err = sp_256_ecc_mulmod_fast_5(r, g, k, map, heap); + } + else { + err = sp_256_ecc_mulmod_stripe_5(r, g, cache->table, k, + map, heap); + } + } + + return err; +#endif +} + +#endif +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * p Point to multiply. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map, + void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 p; + sp_digit kd[5]; +#endif + sp_point_256* point; + sp_digit* k = NULL; + int err = MP_OKAY; + + err = sp_256_point_new_5(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#else + k = kd; +#endif + if (err == MP_OKAY) { + sp_256_from_mp(k, 5, km); + sp_256_point_from_ecc_point_5(point, gm); + + err = sp_256_ecc_mulmod_5(point, point, k, map, heap); + } + if (err == MP_OKAY) { + err = sp_256_point_to_ecc_point_5(point, r); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_5(point, 0, heap); + + return err; +} + +#ifdef WOLFSSL_SP_SMALL +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_base_5(sp_point_256* r, const sp_digit* k, + int map, void* heap) +{ + /* No pre-computed values. */ + return sp_256_ecc_mulmod_5(r, &p256_base, k, map, heap); +} + +#elif defined(WOLFSSL_SP_CACHE_RESISTANT) +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_base_5(sp_point_256* r, const sp_digit* k, + int map, void* heap) +{ + /* No pre-computed values. */ + return sp_256_ecc_mulmod_5(r, &p256_base, k, map, heap); +} + +#else +static const sp_table_entry_256 p256_table[256] = { + /* 0 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 */ + { { 0x730d418a9143cL,0xfc5fedb60179eL,0x762251075ba95L,0x55c679fb732b7L, + 0x018905f76a537L }, + { 0x25357ce95560aL,0xe4ba19e45cddfL,0xd21f3258b4ab8L,0x5d85d2e88688dL, + 0x08571ff182588L } }, + /* 2 */ + { { 0x886024147519aL,0xac26b372f0202L,0x785ebc8d0981eL,0x58e9a9d4a7caaL, + 0x0d953c50ddbdfL }, + { 0x361ccfd590f8fL,0x6b44e6c9179d6L,0x2eb64cf72e962L,0x88f37fd961102L, + 0x0863ebb7e9eb2L } }, + /* 3 */ + { { 0x6b6235cdb6485L,0xa22f0a2f97785L,0xf7e300b808f0eL,0x80a03e68d9544L, + 0x000076055b5ffL }, + { 0x4eb9b838d2010L,0xbb3243708a763L,0x42a660654014fL,0x3ee0e0e47d398L, + 0x0830877613437L } }, + /* 4 */ + { { 0x22fc516a0d2bbL,0x6c1a6234994f9L,0x7c62c8b0d5cc1L,0x667f9241cf3a5L, + 0x02f5e6961fd1bL }, + { 0x5c70bf5a01797L,0x4d609561925c1L,0x71fdb523d20b4L,0x0f7b04911b370L, + 0x0f648f9168d6fL } }, + /* 5 */ + { { 0x66847e137bbbcL,0x9e8a6a0bec9e5L,0x9d73463e43446L,0x0015b1c427617L, + 0x05abe0285133dL }, + { 0xa837cc04c7dabL,0x4c43260c0792aL,0x8e6cc37573d9fL,0x73830c9315627L, + 0x094bb725b6b6fL } }, + /* 6 */ + { { 0x9b48f720f141cL,0xcd2df5bc74bbfL,0x11045c46199b3L,0xc4efdc3f61294L, + 0x0cdd6bbcb2f7dL }, + { 0x6700beaf436fdL,0x6db99326beccaL,0x14f25226f647fL,0xe5f60c0fa7920L, + 0x0a361bebd4bdaL } }, + /* 7 */ + { { 0xa2558597c13c7L,0x5f50b7c3e128aL,0x3c09d1dc38d63L,0x292c07039aecfL, + 0x0ba12ca09c4b5L }, + { 0x08fa459f91dfdL,0x66ceea07fb9e4L,0xd780b293af43bL,0xef4b1eceb0899L, + 0x053ebb99d701fL } }, + /* 8 */ + { { 0x7ee31b0e63d34L,0x72a9e54fab4feL,0x5e7b5a4f46005L,0x4831c0493334dL, + 0x08589fb9206d5L }, + { 0x0f5cc6583553aL,0x4ae25649e5aa7L,0x0044652087909L,0x1c4fcc9045071L, + 0x0ebb0696d0254L } }, + /* 9 */ + { { 0x6ca15ac1647c5L,0x47c4cf5799461L,0x64dfbacb8127dL,0x7da3dc666aa37L, + 0x0eb2820cbd1b2L }, + { 0x6f8d86a87e008L,0x9d922378f3940L,0x0ccecb2d87dfaL,0xda1d56ed2e428L, + 0x01f28289b55a7L } }, + /* 10 */ + { { 0xaa0c03b89da99L,0x9eb8284022abbL,0x81c05e8a6f2d7L,0x4d6327847862bL, + 0x0337a4b5905e5L }, + { 0x7500d21f7794aL,0xb77d6d7f613c6L,0x4cfd6e8207005L,0xfbd60a5a37810L, + 0x00d65e0d5f4c2L } }, + /* 11 */ + { { 0x09bbeb5275d38L,0x450be0a358d9dL,0x73eb2654268a7L,0xa232f0762ff49L, + 0x0c23da24252f4L }, + { 0x1b84f0b94520cL,0x63b05bd78e5daL,0x4d29ea1096667L,0xcff13a4dcb869L, + 0x019de3b8cc790L } }, + /* 12 */ + { { 0xa716c26c5fe04L,0x0b3bba1bdb183L,0x4cb712c3b28deL,0xcbfd7432c586aL, + 0x0e34dcbd491fcL }, + { 0x8d46baaa58403L,0x8682e97a53b40L,0x6aaa8af9a6974L,0x0f7f9e3901273L, + 0x0e7641f447b4eL } }, + /* 13 */ + { { 0x53941df64ba59L,0xec0b0242fc7d7L,0x1581859d33f10L,0x57bf4f06dfc6aL, + 0x04a12df57052aL }, + { 0x6338f9439dbd0L,0xd4bde53e1fbfaL,0x1f1b314d3c24bL,0xea46fd5e4ffa2L, + 0x06af5aa93bb5bL } }, + /* 14 */ + { { 0x0b69910c91999L,0x402a580491da1L,0x8cc20900a24b4L,0x40133e0094b4bL, + 0x05fe3475a66a4L }, + { 0x8cabdf93e7b4bL,0x1a7c23f91ab0fL,0xd1e6263292b50L,0xa91642e889aecL, + 0x0b544e308ecfeL } }, + /* 15 */ + { { 0x8c6e916ddfdceL,0x66f89179e6647L,0xd4e67e12c3291L,0xc20b4e8d6e764L, + 0x0e0b6b2bda6b0L }, + { 0x12df2bb7efb57L,0xde790c40070d3L,0x79bc9441aac0dL,0x3774f90336ad6L, + 0x071c023de25a6L } }, + /* 16 */ + { { 0x8c244bfe20925L,0xc38fdce86762aL,0xd38706391c19aL,0x24f65a96a5d5dL, + 0x061d587d421d3L }, + { 0x673a2a37173eaL,0x0853778b65e87L,0x5bab43e238480L,0xefbe10f8441e0L, + 0x0fa11fe124621L } }, + /* 17 */ + { { 0x91f2b2cb19ffdL,0x5bb1923c231c8L,0xac5ca8e01ba8dL,0xbedcb6d03d678L, + 0x0586eb04c1f13L }, + { 0x5c6e527e8ed09L,0x3c1819ede20c3L,0x6c652fa1e81a3L,0x4f11278fd6c05L, + 0x019d5ac087086L } }, + /* 18 */ + { { 0x9f581309a4e1fL,0x1be92700741e9L,0xfd28d20ab7de7L,0x563f26a5ef0beL, + 0x0e7c0073f7f9cL }, + { 0xd663a0ef59f76L,0x5420fcb0501f6L,0xa6602d4669b3bL,0x3c0ac08c1f7a7L, + 0x0e08504fec65bL } }, + /* 19 */ + { { 0x8f68da031b3caL,0x9ee6da6d66f09L,0x4f246e86d1cabL,0x96b45bfd81fa9L, + 0x078f018825b09L }, + { 0xefde43a25787fL,0x0d1dccac9bb7eL,0x35bfc368016f8L,0x747a0cea4877bL, + 0x043a773b87e94L } }, + /* 20 */ + { { 0x77734d2b533d5L,0xf6a1bdddc0625L,0x79ec293673b8aL,0x66b1577e7c9aaL, + 0x0bb6de651c3b2L }, + { 0x9303ab65259b3L,0xd3d03a7480e7eL,0xb3cfc27d6a0afL,0xb99bc5ac83d19L, + 0x060b4619a5d18L } }, + /* 21 */ + { { 0xa38e11ae5aa1cL,0x2b49e73658bd6L,0xe5f87edb8b765L,0xffcd0b130014eL, + 0x09d0f27b2aeebL }, + { 0x246317a730a55L,0x2fddbbc83aca9L,0xc019a719c955bL,0xc48d07c1dfe0aL, + 0x0244a566d356eL } }, + /* 22 */ + { { 0x0394aeacf1f96L,0xa9024c271c6dbL,0x2cbd3b99f2122L,0xef692626ac1b8L, + 0x045e58c873581L }, + { 0xf479da38f9dbcL,0x46e888a040d3fL,0x6e0bed7a8aaf1L,0xb7a4945adfb24L, + 0x0c040e21cc1e4L } }, + /* 23 */ + { { 0xaf0006f8117b6L,0xff73a35433847L,0xd9475eb651969L,0x6ec7482b35761L, + 0x01cdf5c97682cL }, + { 0x775b411f04839L,0xf448de16987dbL,0x70b32197dbeacL,0xff3db2921dd1bL, + 0x0046755f8a92dL } }, + /* 24 */ + { { 0xac5d2bce8ffcdL,0x8b2fe61a82cc8L,0x202d6c70d53c4L,0xa5f3f6f161727L, + 0x0046e5e113b83L }, + { 0x8ff64d8007f01L,0x125af43183e7bL,0x5e1a03c7fb1efL,0x005b045c5ea63L, + 0x06e0106c3303dL } }, + /* 25 */ + { { 0x7358488dd73b1L,0x8f995ed0d948cL,0x56a2ab7767070L,0xcf1f38385ea8cL, + 0x0442594ede901L }, + { 0xaa2c912d4b65bL,0x3b96c90c37f8fL,0xe978d1f94c234L,0xe68ed326e4a15L, + 0x0a796fa514c2eL } }, + /* 26 */ + { { 0xfb604823addd7L,0x83e56693b3359L,0xcbf3c809e2a61L,0x66e9f885b78e3L, + 0x0e4ad2da9c697L }, + { 0xf7f428e048a61L,0x8cc092d9a0357L,0x03ed8ef082d19L,0x5143fc3a1af4cL, + 0x0c5e94046c37bL } }, + /* 27 */ + { { 0xa538c2be75f9eL,0xe8cb123a78476L,0x109c04b6fd1a9L,0x4747d85e4df0bL, + 0x063283dafdb46L }, + { 0x28cf7baf2df15L,0x550ad9a7f4ce7L,0x834bcc3e592c4L,0xa938fab226adeL, + 0x068bd19ab1981L } }, + /* 28 */ + { { 0xead511887d659L,0xf4b359305ac08L,0xfe74fe33374d5L,0xdfd696986981cL, + 0x0495292f53c6fL }, + { 0x78c9e1acec896L,0x10ec5b44844a8L,0x64d60a7d964b2L,0x68376696f7e26L, + 0x00ec7530d2603L } }, + /* 29 */ + { { 0x13a05ad2687bbL,0x6af32e21fa2daL,0xdd4607ba1f83bL,0x3f0b390f5ef51L, + 0x00f6207a66486L }, + { 0x7e3bb0f138233L,0x6c272aa718bd6L,0x6ec88aedd66b9L,0x6dcf8ed004072L, + 0x0ff0db07208edL } }, + /* 30 */ + { { 0xfa1014c95d553L,0xfd5d680a8a749L,0xf3b566fa44052L,0x0ea3183b4317fL, + 0x0313b513c8874L }, + { 0x2e2ac08d11549L,0x0bb4dee21cb40L,0x7f2320e071ee1L,0x9f8126b987dd4L, + 0x02d3abcf986f1L } }, + /* 31 */ + { { 0x88501815581a2L,0x56632211af4c2L,0xcab2e999a0a6dL,0x8cdf19ba7a0f0L, + 0x0c036fa10ded9L }, + { 0xe08bac1fbd009L,0x9006d1581629aL,0xb9e0d8f0b68b1L,0x0194c2eb32779L, + 0x0a6b2a2c4b6d4L } }, + /* 32 */ + { { 0x3e50f6d3549cfL,0x6ffacd665ed43L,0xe11fcb46f3369L,0x9860695bfdaccL, + 0x0810ee252af7cL }, + { 0x50fe17159bb2cL,0xbe758b357b654L,0x69fea72f7dfbeL,0x17452b057e74dL, + 0x0d485717a9273L } }, + /* 33 */ + { { 0x41a8af0cb5a98L,0x931f3110bf117L,0xb382adfd3da8fL,0x604e1994e2cbaL, + 0x06a6045a72f9aL }, + { 0xc0d3fa2b2411dL,0x3e510e96e0170L,0x865b3ccbe0eb8L,0x57903bcc9f738L, + 0x0d3e45cfaf9e1L } }, + /* 34 */ + { { 0xf69bbe83f7669L,0x8272877d6bce1L,0x244278d09f8aeL,0xc19c9548ae543L, + 0x0207755dee3c2L }, + { 0xd61d96fef1945L,0xefb12d28c387bL,0x2df64aa18813cL,0xb00d9fbcd1d67L, + 0x048dc5ee57154L } }, + /* 35 */ + { { 0x790bff7e5a199L,0xcf989ccbb7123L,0xa519c79e0efb8L,0xf445c27a2bfe0L, + 0x0f2fb0aeddff6L }, + { 0x09575f0b5025fL,0xd740fa9f2241cL,0x80bfbd0550543L,0xd5258fa3c8ad3L, + 0x0a13e9015db28L } }, + /* 36 */ + { { 0x7a350a2b65cbcL,0x722a464226f9fL,0x23f07a10b04b9L,0x526f265ce241eL, + 0x02bf0d6b01497L }, + { 0x4dd3f4b216fb7L,0x67fbdda26ad3dL,0x708505cf7d7b8L,0xe89faeb7b83f6L, + 0x042a94a5a162fL } }, + /* 37 */ + { { 0x6ad0beaadf191L,0x9025a268d7584L,0x94dc1f60f8a48L,0xde3de86030504L, + 0x02c2dd969c65eL }, + { 0x2171d93849c17L,0xba1da250dd6d0L,0xc3a5485460488L,0x6dbc4810c7063L, + 0x0f437fa1f42c5L } }, + /* 38 */ + { { 0x0d7144a0f7dabL,0x931776e9ac6aaL,0x5f397860f0497L,0x7aa852c0a050fL, + 0x0aaf45b335470L }, + { 0x37c33c18d364aL,0x063e49716585eL,0x5ec5444d40b9bL,0x72bcf41716811L, + 0x0cdf6310df4f2L } }, + /* 39 */ + { { 0x3c6238ea8b7efL,0x1885bc2287747L,0xbda8e3408e935L,0x2ff2419567722L, + 0x0f0d008bada9eL }, + { 0x2671d2414d3b1L,0x85b019ea76291L,0x53bcbdbb37549L,0x7b8b5c61b96d4L, + 0x05bd5c2f5ca88L } }, + /* 40 */ + { { 0xf469ef49a3154L,0x956e2b2e9aef0L,0xa924a9c3e85a5L,0x471945aaec1eaL, + 0x0aa12dfc8a09eL }, + { 0x272274df69f1dL,0x2ca2ff5e7326fL,0x7a9dd44e0e4c8L,0xa901b9d8ce73bL, + 0x06c036e73e48cL } }, + /* 41 */ + { { 0xae12a0f6e3138L,0x0025ad345a5cfL,0x5672bc56966efL,0xbe248993c64b4L, + 0x0292ff65896afL }, + { 0x50d445e213402L,0x274392c9fed52L,0xa1c72e8f6580eL,0x7276097b397fdL, + 0x0644e0c90311bL } }, + /* 42 */ + { { 0x421e1a47153f0L,0x79920418c9e1eL,0x05d7672b86c3bL,0x9a7793bdce877L, + 0x0f25ae793cab7L }, + { 0x194a36d869d0cL,0x824986c2641f3L,0x96e945e9d55c8L,0x0a3e49fb5ea30L, + 0x039b8e65313dbL } }, + /* 43 */ + { { 0x54200b6fd2e59L,0x669255c98f377L,0xe2a573935e2c0L,0xdb06d9dab21a0L, + 0x039122f2f0f19L }, + { 0xce1e003cad53cL,0x0fe65c17e3cfbL,0xaa13877225b2cL,0xff8d72baf1d29L, + 0x08de80af8ce80L } }, + /* 44 */ + { { 0xea8d9207bbb76L,0x7c21782758afbL,0xc0436b1921c7eL,0x8c04dfa2b74b1L, + 0x0871949062e36L }, + { 0x928bba3993df5L,0xb5f3b3d26ab5fL,0x5b55050639d75L,0xfde1011aa78a8L, + 0x0fc315e6a5b74L } }, + /* 45 */ + { { 0xfd41ae8d6ecfaL,0xf61aec7f86561L,0x924741d5f8c44L,0x908898452a7b4L, + 0x0e6d4a7adee38L }, + { 0x52ed14593c75dL,0xa4dd271162605L,0xba2c7db70a70dL,0xae57d2aede937L, + 0x035dfaf9a9be2L } }, + /* 46 */ + { { 0x56fcdaa736636L,0x97ae2cab7e6b9L,0xf34996609f51dL,0x0d2bfb10bf410L, + 0x01da5c7d71c83L }, + { 0x1e4833cce6825L,0x8ff9573c3b5c4L,0x23036b815ad11L,0xb9d6a28552c7fL, + 0x07077c0fddbf4L } }, + /* 47 */ + { { 0x3ff8d46b9661cL,0x6b0d2cfd71bf6L,0x847f8f7a1dfd3L,0xfe440373e140aL, + 0x053a8632ee50eL }, + { 0x6ff68696d8051L,0x95c74f468a097L,0xe4e26bddaec0cL,0xfcc162994dc35L, + 0x0028ca76d34e1L } }, + /* 48 */ + { { 0xd47dcfc9877eeL,0x10801d0002d11L,0x4c260b6c8b362L,0xf046d002c1175L, + 0x004c17cd86962L }, + { 0xbd094b0daddf5L,0x7524ce55c06d9L,0x2da03b5bea235L,0x7474663356e67L, + 0x0f7ba4de9fed9L } }, + /* 49 */ + { { 0xbfa34ebe1263fL,0x3571ae7ce6d0dL,0x2a6f523557637L,0x1c41d24405538L, + 0x0e31f96005213L }, + { 0xb9216ea6b6ec6L,0x2e73c2fc44d1bL,0x9d0a29437a1d1L,0xd47bc10e7eac8L, + 0x0aa3a6259ce34L } }, + /* 50 */ + { { 0xf9df536f3dcd3L,0x50d2bf7360fbcL,0xf504f5b6cededL,0xdaee491710fadL, + 0x02398dd627e79L }, + { 0x705a36d09569eL,0xbb5149f769cf4L,0x5f6034cea0619L,0x6210ff9c03773L, + 0x05717f5b21c04L } }, + /* 51 */ + { { 0x229c921dd895eL,0x0040c284519feL,0xd637ecd8e5185L,0x28defa13d2391L, + 0x0660a2c560e3cL }, + { 0xa88aed67fcbd0L,0x780ea9f0969ccL,0x2e92b4dc84724L,0x245332b2f4817L, + 0x0624ee54c4f52L } }, + /* 52 */ + { { 0x49ce4d897ecccL,0xd93f9880aa095L,0x43a7c204d49d1L,0xfbc0723c24230L, + 0x04f392afb92bdL }, + { 0x9f8fa7de44fd9L,0xe457b32156696L,0x68ebc3cb66cfbL,0x399cdb2fa8033L, + 0x08a3e7977ccdbL } }, + /* 53 */ + { { 0x1881f06c4b125L,0x00f6e3ca8cddeL,0xc7a13e9ae34e3L,0x4404ef6999de5L, + 0x03888d02370c2L }, + { 0x8035644f91081L,0x615f015504762L,0x32cd36e3d9fcfL,0x23361827edc86L, + 0x0a5e62e471810L } }, + /* 54 */ + { { 0x25ee32facd6c8L,0x5454bcbc661a8L,0x8df9931699c63L,0x5adc0ce3edf79L, + 0x02c4768e6466aL }, + { 0x6ff8c90a64bc9L,0x20e4779f5cb34L,0xc05e884630a60L,0x52a0d949d064bL, + 0x07b5e6441f9e6L } }, + /* 55 */ + { { 0x9422c1d28444aL,0xd8be136a39216L,0xb0c7fcee996c5L,0x744a2387afe5fL, + 0x0b8af73cb0c8dL }, + { 0xe83aa338b86fdL,0x58a58a5cff5fdL,0x0ac9433fee3f1L,0x0895c9ee8f6f2L, + 0x0a036395f7f3fL } }, + /* 56 */ + { { 0x3c6bba10f7770L,0x81a12a0e248c7L,0x1bc2b9fa6f16dL,0xb533100df6825L, + 0x04be36b01875fL }, + { 0x6086e9fb56dbbL,0x8b07e7a4f8922L,0x6d52f20306fefL,0x00c0eeaccc056L, + 0x08cbc9a871bdcL } }, + /* 57 */ + { { 0x1895cc0dac4abL,0x40712ff112e13L,0xa1cee57a874a4L,0x35f86332ae7c6L, + 0x044e7553e0c08L }, + { 0x03fff7734002dL,0x8b0b34425c6d5L,0xe8738b59d35cbL,0xfc1895f702760L, + 0x0470a683a5eb8L } }, + /* 58 */ + { { 0x761dc90513482L,0x2a01e9276a81bL,0xce73083028720L,0xc6efcda441ee0L, + 0x016410690c63dL }, + { 0x34a066d06a2edL,0x45189b100bf50L,0xb8218c9dd4d77L,0xbb4fd914ae72aL, + 0x0d73479fd7abcL } }, + /* 59 */ + { { 0xefb165ad4c6e5L,0x8f5b06d04d7edL,0x575cb14262cf0L,0x666b12ed5bb18L, + 0x0816469e30771L }, + { 0xb9d79561e291eL,0x22c1de1661d7aL,0x35e0513eb9dafL,0x3f9cf49827eb1L, + 0x00a36dd23f0ddL } }, + /* 60 */ + { { 0xd32c741d5533cL,0x9e8684628f098L,0x349bd117c5f5aL,0xb11839a228adeL, + 0x0e331dfd6fdbaL }, + { 0x0ab686bcc6ed8L,0xbdef7a260e510L,0xce850d77160c3L,0x33899063d9a7bL, + 0x0d3b4782a492eL } }, + /* 61 */ + { { 0x9b6e8f3821f90L,0xed66eb7aada14L,0xa01311692edd9L,0xa5bd0bb669531L, + 0x07281275a4c86L }, + { 0x858f7d3ff47e5L,0xbc61016441503L,0xdfd9bb15e1616L,0x505962b0f11a7L, + 0x02c062e7ece14L } }, + /* 62 */ + { { 0xf996f0159ac2eL,0x36cbdb2713a76L,0x8e46047281e77L,0x7ef12ad6d2880L, + 0x0282a35f92c4eL }, + { 0x54b1ec0ce5cd2L,0xc91379c2299c3L,0xe82c11ecf99efL,0x2abd992caf383L, + 0x0c71cd513554dL } }, + /* 63 */ + { { 0x5de9c09b578f4L,0x58e3affa7a488L,0x9182f1f1884e2L,0xf3a38f76b1b75L, + 0x0c50f6740cf47L }, + { 0x4adf3374b68eaL,0x2369965fe2a9cL,0x5a53050a406f3L,0x58dc2f86a2228L, + 0x0b9ecb3a72129L } }, + /* 64 */ + { { 0x8410ef4f8b16aL,0xfec47b266a56fL,0xd9c87c197241aL,0xab1b0a406b8e6L, + 0x0803f3e02cd42L }, + { 0x309a804dbec69L,0xf73bbad05f7f0L,0xd8e197fa83b85L,0xadc1c6097273aL, + 0x0c097440e5067L } }, + /* 65 */ + { { 0xa56f2c379ab34L,0x8b841df8d1846L,0x76c68efa8ee06L,0x1f30203144591L, + 0x0f1af32d5915fL }, + { 0x375315d75bd50L,0xbaf72f67bc99cL,0x8d7723f837cffL,0x1c8b0613a4184L, + 0x023d0f130e2d4L } }, + /* 66 */ + { { 0xab6edf41500d9L,0xe5fcbeada8857L,0x97259510d890aL,0xfadd52fe86488L, + 0x0b0288dd6c0a3L }, + { 0x20f30650bcb08L,0x13695d6e16853L,0x989aa7671af63L,0xc8d231f520a7bL, + 0x0ffd3724ff408L } }, + /* 67 */ + { { 0x68e64b458e6cbL,0x20317a5d28539L,0xaa75f56992dadL,0x26df3814ae0b7L, + 0x0f5590f4ad78cL }, + { 0x24bd3cf0ba55aL,0x4a0c778bae0fcL,0x83b674a0fc472L,0x4a201ce9864f6L, + 0x018d6da54f6f7L } }, + /* 68 */ + { { 0x3e225d5be5a2bL,0x835934f3c6ed9L,0x2626ffc6fe799L,0x216a431409262L, + 0x050bbb4d97990L }, + { 0x191c6e57ec63eL,0x40181dcdb2378L,0x236e0f665422cL,0x49c341a8099b0L, + 0x02b10011801feL } }, + /* 69 */ + { { 0x8b5c59b391593L,0xa2598270fcfc6L,0x19adcbbc385f5L,0xae0c7144f3aadL, + 0x0dd55899983fbL }, + { 0x88b8e74b82ff4L,0x4071e734c993bL,0x3c0322ad2e03cL,0x60419a7a9eaf4L, + 0x0e6e4c551149dL } }, + /* 70 */ + { { 0x655bb1e9af288L,0x64f7ada93155fL,0xb2820e5647e1aL,0x56ff43697e4bcL, + 0x051e00db107edL }, + { 0x169b8771c327eL,0x0b4a96c2ad43dL,0xdeb477929cdb2L,0x9177c07d51f53L, + 0x0e22f42414982L } }, + /* 71 */ + { { 0x5e8f4635f1abbL,0xb568538874cd4L,0x5a8034d7edc0cL,0x48c9c9472c1fbL, + 0x0f709373d52dcL }, + { 0x966bba8af30d6L,0x4af137b69c401L,0x361c47e95bf5fL,0x5b113966162a9L, + 0x0bd52d288e727L } }, + /* 72 */ + { { 0x55c7a9c5fa877L,0x727d3a3d48ab1L,0x3d189d817dad6L,0x77a643f43f9e7L, + 0x0a0d0f8e4c8aaL }, + { 0xeafd8cc94f92dL,0xbe0c4ddb3a0bbL,0x82eba14d818c8L,0x6a0022cc65f8bL, + 0x0a56c78c7946dL } }, + /* 73 */ + { { 0x2391b0dd09529L,0xa63daddfcf296L,0xb5bf481803e0eL,0x367a2c77351f5L, + 0x0d8befdf8731aL }, + { 0x19d42fc0157f4L,0xd7fec8e650ab9L,0x2d48b0af51caeL,0x6478cdf9cb400L, + 0x0854a68a5ce9fL } }, + /* 74 */ + { { 0x5f67b63506ea5L,0x89a4fe0d66dc3L,0xe95cd4d9286c4L,0x6a953f101d3bfL, + 0x05cacea0b9884L }, + { 0xdf60c9ceac44dL,0xf4354d1c3aa90L,0xd5dbabe3db29aL,0xefa908dd3de8aL, + 0x0e4982d1235e4L } }, + /* 75 */ + { { 0x04a22c34cd55eL,0xb32680d132231L,0xfa1d94358695bL,0x0499fb345afa1L, + 0x08046b7f616b2L }, + { 0x3581e38e7d098L,0x8df46f0b70b53L,0x4cb78c4d7f61eL,0xaf5530dea9ea4L, + 0x0eb17ca7b9082L } }, + /* 76 */ + { { 0x1b59876a145b9L,0x0fc1bc71ec175L,0x92715bba5cf6bL,0xe131d3e035653L, + 0x0097b00bafab5L }, + { 0x6c8e9565f69e1L,0x5ab5be5199aa6L,0xa4fd98477e8f7L,0xcc9e6033ba11dL, + 0x0f95c747bafdbL } }, + /* 77 */ + { { 0xf01d3bebae45eL,0xf0c4bc6955558L,0xbc64fc6a8ebe9L,0xd837aeb705b1dL, + 0x03512601e566eL }, + { 0x6f1e1fa1161cdL,0xd54c65ef87933L,0x24f21e5328ab8L,0xab6b4757eee27L, + 0x00ef971236068L } }, + /* 78 */ + { { 0x98cf754ca4226L,0x38f8642c8e025L,0x68e17905eede1L,0xbc9548963f744L, + 0x0fc16d9333b4fL }, + { 0x6fb31e7c800caL,0x312678adaabe9L,0xff3e8b5138063L,0x7a173d6244976L, + 0x014ca4af1b95dL } }, + /* 79 */ + { { 0x771babd2f81d5L,0x6901f7d1967a4L,0xad9c9071a5f9dL,0x231dd898bef7cL, + 0x04057b063f59cL }, + { 0xd82fe89c05c0aL,0x6f1dc0df85bffL,0x35a16dbe4911cL,0x0b133befccaeaL, + 0x01c3b5d64f133L } }, + /* 80 */ + { { 0x14bfe80ec21feL,0x6ac255be825feL,0xf4a5d67f6ce11L,0x63af98bc5a072L, + 0x0fad27148db7eL }, + { 0x0b6ac29ab05b3L,0x3c4e251ae690cL,0x2aade7d37a9a8L,0x1a840a7dc875cL, + 0x077387de39f0eL } }, + /* 81 */ + { { 0xecc49a56c0dd7L,0xd846086c741e9L,0x505aecea5cffcL,0xc47e8f7a1408fL, + 0x0b37b85c0bef0L }, + { 0x6b6e4cc0e6a8fL,0xbf6b388f23359L,0x39cef4efd6d4bL,0x28d5aba453facL, + 0x09c135ac8f9f6L } }, + /* 82 */ + { { 0xa320284e35743L,0xb185a3cdef32aL,0xdf19819320d6aL,0x851fb821b1761L, + 0x05721361fc433L }, + { 0xdb36a71fc9168L,0x735e5c403c1f0L,0x7bcd8f55f98baL,0x11bdf64ca87e3L, + 0x0dcbac3c9e6bbL } }, + /* 83 */ + { { 0xd99684518cbe2L,0x189c9eb04ef01L,0x47feebfd242fcL,0x6862727663c7eL, + 0x0b8c1c89e2d62L }, + { 0x58bddc8e1d569L,0xc8b7d88cd051aL,0x11f31eb563809L,0x22d426c27fd9fL, + 0x05d23bbda2f94L } }, + /* 84 */ + { { 0xc729495c8f8beL,0x803bf362bf0a1L,0xf63d4ac2961c4L,0xe9009e418403dL, + 0x0c109f9cb91ecL }, + { 0x095d058945705L,0x96ddeb85c0c2dL,0xa40449bb9083dL,0x1ee184692b8d7L, + 0x09bc3344f2eeeL } }, + /* 85 */ + { { 0xae35642913074L,0x2748a542b10d5L,0x310732a55491bL,0x4cc1469ca665bL, + 0x029591d525f1aL }, + { 0xf5b6bb84f983fL,0x419f5f84e1e76L,0x0baa189be7eefL,0x332c1200d4968L, + 0x06376551f18efL } }, + /* 86 */ + { { 0x5f14e562976ccL,0xe60ef12c38bdaL,0xcca985222bca3L,0x987abbfa30646L, + 0x0bdb79dc808e2L }, + { 0xcb5c9cb06a772L,0xaafe536dcefd2L,0xc2b5db838f475L,0xc14ac2a3e0227L, + 0x08ee86001add3L } }, + /* 87 */ + { { 0x96981a4ade873L,0x4dc4fba48ccbeL,0xa054ba57ee9aaL,0xaa4b2cee28995L, + 0x092e51d7a6f77L }, + { 0xbafa87190a34dL,0x5bf6bd1ed1948L,0xcaf1144d698f7L,0xaaaad00ee6e30L, + 0x05182f86f0a56L } }, + /* 88 */ + { { 0x6212c7a4cc99cL,0x683e6d9ca1fbaL,0xac98c5aff609bL,0xa6f25dbb27cb5L, + 0x091dcab5d4073L }, + { 0x6cc3d5f575a70L,0x396f8d87fa01bL,0x99817360cb361L,0x4f2b165d4e8c8L, + 0x017a0cedb9797L } }, + /* 89 */ + { { 0x61e2a076c8d3aL,0x39210f924b388L,0x3a835d9701aadL,0xdf4194d0eae41L, + 0x02e8ce36c7f4cL }, + { 0x73dab037a862bL,0xb760e4c8fa912L,0x3baf2dd01ba9bL,0x68f3f96453883L, + 0x0f4ccc6cb34f6L } }, + /* 90 */ + { { 0xf525cf1f79687L,0x9592efa81544eL,0x5c78d297c5954L,0xf3c9e1231741aL, + 0x0ac0db4889a0dL }, + { 0xfc711df01747fL,0x58ef17df1386bL,0xccb6bb5592b93L,0x74a2e5880e4f5L, + 0x095a64a6194c9L } }, + /* 91 */ + { { 0x1efdac15a4c93L,0x738258514172cL,0x6cb0bad40269bL,0x06776a8dfb1c1L, + 0x0231e54ba2921L }, + { 0xdf9178ae6d2dcL,0x3f39112918a70L,0xe5b72234d6aa6L,0x31e1f627726b5L, + 0x0ab0be032d8a7L } }, + /* 92 */ + { { 0xad0e98d131f2dL,0xe33b04f101097L,0x5e9a748637f09L,0xa6791ac86196dL, + 0x0f1bcc8802cf6L }, + { 0x69140e8daacb4L,0x5560f6500925cL,0x77937a63c4e40L,0xb271591cc8fc4L, + 0x0851694695aebL } }, + /* 93 */ + { { 0x5c143f1dcf593L,0x29b018be3bde3L,0xbdd9d3d78202bL,0x55d8e9cdadc29L, + 0x08f67d9d2daadL }, + { 0x116567481ea5fL,0xe9e34c590c841L,0x5053fa8e7d2ddL,0x8b5dffdd43f40L, + 0x0f84572b9c072L } }, + /* 94 */ + { { 0xa7a7197af71c9L,0x447a7365655e1L,0xe1d5063a14494L,0x2c19a1b4ae070L, + 0x0edee2710616bL }, + { 0x034f511734121L,0x554a25e9f0b2fL,0x40c2ecf1cac6eL,0xd7f48dc148f3aL, + 0x09fd27e9b44ebL } }, + /* 95 */ + { { 0x7658af6e2cb16L,0x2cfe5919b63ccL,0x68d5583e3eb7dL,0xf3875a8c58161L, + 0x0a40c2fb6958fL }, + { 0xec560fedcc158L,0xc655f230568c9L,0xa307e127ad804L,0xdecfd93967049L, + 0x099bc9bb87dc6L } }, + /* 96 */ + { { 0x9521d927dafc6L,0x695c09cd1984aL,0x9366dde52c1fbL,0x7e649d9581a0fL, + 0x09abe210ba16dL }, + { 0xaf84a48915220L,0x6a4dd816c6480L,0x681ca5afa7317L,0x44b0c7d539871L, + 0x07881c25787f3L } }, + /* 97 */ + { { 0x99b51e0bcf3ffL,0xc5127f74f6933L,0xd01d9680d02cbL,0x89408fb465a2dL, + 0x015e6e319a30eL }, + { 0xd6e0d3e0e05f4L,0xdc43588404646L,0x4f850d3fad7bdL,0x72cebe61c7d1cL, + 0x00e55facf1911L } }, + /* 98 */ + { { 0xd9806f8787564L,0x2131e85ce67e9L,0x819e8d61a3317L,0x65776b0158cabL, + 0x0d73d09766fe9L }, + { 0x834251eb7206eL,0x0fc618bb42424L,0xe30a520a51929L,0xa50b5dcbb8595L, + 0x09250a3748f15L } }, + /* 99 */ + { { 0xf08f8be577410L,0x035077a8c6cafL,0xc0a63a4fd408aL,0x8c0bf1f63289eL, + 0x077414082c1ccL }, + { 0x40fa6eb0991cdL,0x6649fdc29605aL,0x324fd40c1ca08L,0x20b93a68a3c7bL, + 0x08cb04f4d12ebL } }, + /* 100 */ + { { 0x2d0556906171cL,0xcdb0240c3fb1cL,0x89068419073e9L,0x3b51db8e6b4fdL, + 0x0e4e429ef4712L }, + { 0xdd53c38ec36f4L,0x01ff4b6a270b8L,0x79a9a48f9d2dcL,0x65525d066e078L, + 0x037bca2ff3c6eL } }, + /* 101 */ + { { 0x2e3c7df562470L,0xa2c0964ac94cdL,0x0c793be44f272L,0xb22a7c6d5df98L, + 0x059913edc3002L }, + { 0x39a835750592aL,0x80e783de027a1L,0xa05d64f99e01dL,0xe226cf8c0375eL, + 0x043786e4ab013L } }, + /* 102 */ + { { 0x2b0ed9e56b5a6L,0xa6d9fc68f9ff3L,0x97846a70750d9L,0x9e7aec15e8455L, + 0x08638ca98b7e7L }, + { 0xae0960afc24b2L,0xaf4dace8f22f5L,0xecba78f05398eL,0xa6f03b765dd0aL, + 0x01ecdd36a7b3aL } }, + /* 103 */ + { { 0xacd626c5ff2f3L,0xc02873a9785d3L,0x2110d54a2d516L,0xf32dad94c9fadL, + 0x0d85d0f85d459L }, + { 0x00b8d10b11da3L,0x30a78318c49f7L,0x208decdd2c22cL,0x3c62556988f49L, + 0x0a04f19c3b4edL } }, + /* 104 */ + { { 0x924c8ed7f93bdL,0x5d392f51f6087L,0x21b71afcb64acL,0x50b07cae330a8L, + 0x092b2eeea5c09L }, + { 0xc4c9485b6e235L,0xa92936c0f085aL,0x0508891ab2ca4L,0x276c80faa6b3eL, + 0x01ee782215834L } }, + /* 105 */ + { { 0xa2e00e63e79f7L,0xb2f399d906a60L,0x607c09df590e7L,0xe1509021054a6L, + 0x0f3f2ced857a6L }, + { 0x510f3f10d9b55L,0xacd8642648200L,0x8bd0e7c9d2fcfL,0xe210e5631aa7eL, + 0x00f56a4543da3L } }, + /* 106 */ + { { 0x1bffa1043e0dfL,0xcc9c007e6d5b2L,0x4a8517a6c74b6L,0xe2631a656ec0dL, + 0x0bd8f17411969L }, + { 0xbbb86beb7494aL,0x6f45f3b8388a9L,0x4e5a79a1567d4L,0xfa09df7a12a7aL, + 0x02d1a1c3530ccL } }, + /* 107 */ + { { 0xe3813506508daL,0xc4a1d795a7192L,0xa9944b3336180L,0xba46cddb59497L, + 0x0a107a65eb91fL }, + { 0x1d1c50f94d639L,0x758a58b7d7e6dL,0xd37ca1c8b4af3L,0x9af21a7c5584bL, + 0x0183d760af87aL } }, + /* 108 */ + { { 0x697110dde59a4L,0x070e8bef8729dL,0xf2ebe78f1ad8dL,0xd754229b49634L, + 0x01d44179dc269L }, + { 0xdc0cf8390d30eL,0x530de8110cb32L,0xbc0339a0a3b27L,0xd26231af1dc52L, + 0x0771f9cc29606L } }, + /* 109 */ + { { 0x93e7785040739L,0xb98026a939999L,0x5f8fc2644539dL,0x718ecf40f6f2fL, + 0x064427a310362L }, + { 0xf2d8785428aa8L,0x3febfb49a84f4L,0x23d01ac7b7adcL,0x0d6d201b2c6dfL, + 0x049d9b7496ae9L } }, + /* 110 */ + { { 0x8d8bc435d1099L,0x4e8e8d1a08cc7L,0xcb68a412adbcdL,0x544502c2e2a02L, + 0x09037d81b3f60L }, + { 0xbac27074c7b61L,0xab57bfd72e7cdL,0x96d5352fe2031L,0x639c61ccec965L, + 0x008c3de6a7cc0L } }, + /* 111 */ + { { 0xdd020f6d552abL,0x9805cd81f120fL,0x135129156baffL,0x6b2f06fb7c3e9L, + 0x0c69094424579L }, + { 0x3ae9c41231bd1L,0x875cc5820517bL,0x9d6a1221eac6eL,0x3ac0208837abfL, + 0x03fa3db02cafeL } }, + /* 112 */ + { { 0xa3e6505058880L,0xef643943f2d75L,0xab249257da365L,0x08ff4147861cfL, + 0x0c5c4bdb0fdb8L }, + { 0x13e34b272b56bL,0x9511b9043a735L,0x8844969c8327eL,0xb6b5fd8ce37dfL, + 0x02d56db9446c2L } }, + /* 113 */ + { { 0x1782fff46ac6bL,0x2607a2e425246L,0x9a48de1d19f79L,0xba42fafea3c40L, + 0x00f56bd9de503L }, + { 0xd4ed1345cda49L,0xfc816f299d137L,0xeb43402821158L,0xb5f1e7c6a54aaL, + 0x04003bb9d1173L } }, + /* 114 */ + { { 0xe8189a0803387L,0xf539cbd4043b8L,0x2877f21ece115L,0x2f9e4297208ddL, + 0x053765522a07fL }, + { 0x80a21a8a4182dL,0x7a3219df79a49L,0xa19a2d4a2bbd0L,0x4549674d0a2e1L, + 0x07a056f586c5dL } }, + /* 115 */ + { { 0xb25589d8a2a47L,0x48c3df2773646L,0xbf0d5395b5829L,0x267551ec000eaL, + 0x077d482f17a1aL }, + { 0x1bd9587853948L,0xbd6cfbffeeb8aL,0x0681e47a6f817L,0xb0e4ab6ec0578L, + 0x04115012b2b38L } }, + /* 116 */ + { { 0x3f0f46de28cedL,0x609b13ec473c7L,0xe5c63921d5da7L,0x094661b8ce9e6L, + 0x0cdf04572fbeaL }, + { 0x3c58b6c53c3b0L,0x10447b843c1cbL,0xcb9780e97fe3cL,0x3109fb2b8ae12L, + 0x0ee703dda9738L } }, + /* 117 */ + { { 0x15140ff57e43aL,0xd3b1b811b8345L,0xf42b986d44660L,0xce212b3b5dff8L, + 0x02a0ad89da162L }, + { 0x4a6946bc277baL,0x54c141c27664eL,0xabf6274c788c9L,0x4659141aa64ccL, + 0x0d62d0b67ac2bL } }, + /* 118 */ + { { 0x5d87b2c054ac4L,0x59f27df78839cL,0x18128d6570058L,0x2426edf7cbf3bL, + 0x0b39a23f2991cL }, + { 0x84a15f0b16ae5L,0xb1a136f51b952L,0x27007830c6a05L,0x4cc51d63c137fL, + 0x004ed0092c067L } }, + /* 119 */ + { { 0x185d19ae90393L,0x294a3d64e61f4L,0x854fc143047b4L,0xc387ae0001a69L, + 0x0a0a91fc10177L }, + { 0xa3f01ae2c831eL,0x822b727e16ff0L,0xa3075b4bb76aeL,0x0c418f12c8a15L, + 0x0084cf9889ed2L } }, + /* 120 */ + { { 0x509defca6becfL,0x807dffb328d98L,0x778e8b92fceaeL,0xf77e5d8a15c44L, + 0x0d57955b273abL }, + { 0xda79e31b5d4f1L,0x4b3cfa7a1c210L,0xc27c20baa52f0L,0x41f1d4d12089dL, + 0x08e14ea4202d1L } }, + /* 121 */ + { { 0x50345f2897042L,0x1f43402c4aeedL,0x8bdfb218d0533L,0xd158c8d9c194cL, + 0x0597e1a372aa4L }, + { 0x7ec1acf0bd68cL,0xdcab024945032L,0x9fe3e846d4be0L,0x4dea5b9c8d7acL, + 0x0ca3f0236199bL } }, + /* 122 */ + { { 0xa10b56170bd20L,0xf16d3f5de7592L,0x4b2ade20ea897L,0x07e4a3363ff14L, + 0x0bde7fd7e309cL }, + { 0xbb6d2b8f5432cL,0xcbe043444b516L,0x8f95b5a210dc1L,0xd1983db01e6ffL, + 0x0b623ad0e0a7dL } }, + /* 123 */ + { { 0xbd67560c7b65bL,0x9023a4a289a75L,0x7b26795ab8c55L,0x137bf8220fd0dL, + 0x0d6aa2e4658ecL }, + { 0xbc00b5138bb85L,0x21d833a95c10aL,0x702a32e8c31d1L,0x513ab24ff00b1L, + 0x0111662e02dccL } }, + /* 124 */ + { { 0x14015efb42b87L,0x701b6c4dff781L,0x7d7c129bd9f5dL,0x50f866ecccd7aL, + 0x0db3ee1cb94b7L }, + { 0xf3db0f34837cfL,0x8bb9578d4fb26L,0xc56657de7eed1L,0x6a595d2cdf937L, + 0x0886a64425220L } }, + /* 125 */ + { { 0x34cfb65b569eaL,0x41f72119c13c2L,0x15a619e200111L,0x17bc8badc85daL, + 0x0a70cf4eb018aL }, + { 0xf97ae8c4a6a65L,0x270134378f224L,0xf7e096036e5cfL,0x7b77be3a609e4L, + 0x0aa4772abd174L } }, + /* 126 */ + { { 0x761317aa60cc0L,0x610368115f676L,0xbc1bb5ac79163L,0xf974ded98bb4bL, + 0x0611a6ddc30faL }, + { 0x78cbcc15ee47aL,0x824e0d96a530eL,0xdd9ed882e8962L,0x9c8836f35adf3L, + 0x05cfffaf81642L } }, + /* 127 */ + { { 0x54cff9b7a99cdL,0x9d843c45a1c0dL,0x2c739e17bf3b9L,0x994c038a908f6L, + 0x06e5a6b237dc1L }, + { 0xb454e0ba5db77L,0x7facf60d63ef8L,0x6608378b7b880L,0xabcce591c0c67L, + 0x0481a238d242dL } }, + /* 128 */ + { { 0x17bc035d0b34aL,0x6b8327c0a7e34L,0xc0362d1440b38L,0xf9438fb7262daL, + 0x02c41114ce0cdL }, + { 0x5cef1ad95a0b1L,0xa867d543622baL,0x1e486c9c09b37L,0x929726d6cdd20L, + 0x020477abf42ffL } }, + /* 129 */ + { { 0x5173c18d65dbfL,0x0e339edad82f7L,0xcf1001c77bf94L,0x96b67022d26bdL, + 0x0ac66409ac773L }, + { 0xbb36fc6261cc3L,0xc9190e7e908b0L,0x45e6c10213f7bL,0x2f856541cebaaL, + 0x0ce8e6975cc12L } }, + /* 130 */ + { { 0x21b41bc0a67d2L,0x0a444d248a0f1L,0x59b473762d476L,0xb4a80e044f1d6L, + 0x008fde365250bL }, + { 0xec3da848bf287L,0x82d3369d6eaceL,0x2449482c2a621L,0x6cd73582dfdc9L, + 0x02f7e2fd2565dL } }, + /* 131 */ + { { 0xb92dbc3770fa7L,0x5c379043f9ae4L,0x7761171095e8dL,0x02ae54f34e9d1L, + 0x0c65be92e9077L }, + { 0x8a303f6fd0a40L,0xe3bcce784b275L,0xf9767bfe7d822L,0x3b3a7ae4f5854L, + 0x04bff8e47d119L } }, + /* 132 */ + { { 0x1d21f00ff1480L,0x7d0754db16cd4L,0xbe0f3ea2ab8fbL,0x967dac81d2efbL, + 0x03e4e4ae65772L }, + { 0x8f36d3c5303e6L,0x4b922623977e1L,0x324c3c03bd999L,0x60289ed70e261L, + 0x05388aefd58ecL } }, + /* 133 */ + { { 0x317eb5e5d7713L,0xee75de49daad1L,0x74fb26109b985L,0xbe0e32f5bc4fcL, + 0x05cf908d14f75L }, + { 0x435108e657b12L,0xa5b96ed9e6760L,0x970ccc2bfd421L,0x0ce20e29f51f8L, + 0x0a698ba4060f0L } }, + /* 134 */ + { { 0xb1686ef748fecL,0xa27e9d2cf973dL,0xe265effe6e755L,0xad8d630b6544cL, + 0x0b142ef8a7aebL }, + { 0x1af9f17d5770aL,0x672cb3412fad3L,0xf3359de66af3bL,0x50756bd60d1bdL, + 0x0d1896a965851L } }, + /* 135 */ + { { 0x957ab33c41c08L,0xac5468e2e1ec5L,0xc472f6c87de94L,0xda3918816b73aL, + 0x0267b0e0b7981L }, + { 0x54e5d8e62b988L,0x55116d21e76e5L,0xd2a6f99d8ddc7L,0x93934610faf03L, + 0x0b54e287aa111L } }, + /* 136 */ + { { 0x122b5178a876bL,0xff085104b40a0L,0x4f29f7651ff96L,0xd4e6050b31ab1L, + 0x084abb28b5f87L }, + { 0xd439f8270790aL,0x9d85e3f46bd5eL,0xc1e22122d6cb5L,0x564075f55c1b6L, + 0x0e5436f671765L } }, + /* 137 */ + { { 0x9025e2286e8d5L,0xb4864453be53fL,0x408e3a0353c95L,0xe99ed832f5bdeL, + 0x00404f68b5b9cL }, + { 0x33bdea781e8e5L,0x18163c2f5bcadL,0x119caa33cdf50L,0xc701575769600L, + 0x03a4263df0ac1L } }, + /* 138 */ + { { 0x65ecc9aeb596dL,0xe7023c92b4c29L,0xe01396101ea03L,0xa3674704b4b62L, + 0x00ca8fd3f905eL }, + { 0x23a42551b2b61L,0x9c390fcd06925L,0x392a63e1eb7a8L,0x0c33e7f1d2be0L, + 0x096dca2644ddbL } }, + /* 139 */ + { { 0xbb43a387510afL,0xa8a9a36a01203L,0xf950378846feaL,0x59dcd23a57702L, + 0x04363e2123aadL }, + { 0x3a1c740246a47L,0xd2e55dd24dca4L,0xd8faf96b362b8L,0x98c4f9b086045L, + 0x0840e115cd8bbL } }, + /* 140 */ + { { 0x205e21023e8a7L,0xcdd8dc7a0bf12L,0x63a5ddfc808a8L,0xd6d4e292a2721L, + 0x05e0d6abd30deL }, + { 0x721c27cfc0f64L,0x1d0e55ed8807aL,0xd1f9db242eec0L,0xa25a26a7bef91L, + 0x07dea48f42945L } }, + /* 141 */ + { { 0xf6f1ce5060a81L,0x72f8f95615abdL,0x6ac268be79f9cL,0x16d1cfd36c540L, + 0x0abc2a2beebfdL }, + { 0x66f91d3e2eac7L,0x63d2dd04668acL,0x282d31b6f10baL,0xefc16790e3770L, + 0x04ea353946c7eL } }, + /* 142 */ + { { 0xa2f8d5266309dL,0xc081945a3eed8L,0x78c5dc10a51c6L,0xffc3cecaf45a5L, + 0x03a76e6891c94L }, + { 0xce8a47d7b0d0fL,0x968f584a5f9aaL,0xe697fbe963aceL,0x646451a30c724L, + 0x08212a10a465eL } }, + /* 143 */ + { { 0xc61c3cfab8caaL,0x840e142390ef7L,0xe9733ca18eb8eL,0xb164cd1dff677L, + 0x0aa7cab71599cL }, + { 0xc9273bc837bd1L,0xd0c36af5d702fL,0x423da49c06407L,0x17c317621292fL, + 0x040e38073fe06L } }, + /* 144 */ + { { 0x80824a7bf9b7cL,0x203fbe30d0f4fL,0x7cf9ce3365d23L,0x5526bfbe53209L, + 0x0e3604700b305L }, + { 0xb99116cc6c2c7L,0x08ba4cbee64dcL,0x37ad9ec726837L,0xe15fdcded4346L, + 0x06542d677a3deL } }, + /* 145 */ + { { 0x2b6d07b6c377aL,0x47903448be3f3L,0x0da8af76cb038L,0x6f21d6fdd3a82L, + 0x0a6534aee09bbL }, + { 0x1780d1035facfL,0x339dcb47e630aL,0x447f39335e55aL,0xef226ea50fe1cL, + 0x0f3cb672fdc9aL } }, + /* 146 */ + { { 0x719fe3b55fd83L,0x6c875ddd10eb3L,0x5cea784e0d7a4L,0x70e733ac9fa90L, + 0x07cafaa2eaae8L }, + { 0x14d041d53b338L,0xa0ef87e6c69b8L,0x1672b0fe0acc0L,0x522efb93d1081L, + 0x00aab13c1b9bdL } }, + /* 147 */ + { { 0xce278d2681297L,0xb1b509546addcL,0x661aaf2cb350eL,0x12e92dc431737L, + 0x04b91a6028470L }, + { 0xf109572f8ddcfL,0x1e9a911af4dcfL,0x372430e08ebf6L,0x1cab48f4360acL, + 0x049534c537232L } }, + /* 148 */ + { { 0xf7d71f07b7e9dL,0xa313cd516f83dL,0xc047ee3a478efL,0xc5ee78ef264b6L, + 0x0caf46c4fd65aL }, + { 0xd0c7792aa8266L,0x66913684bba04L,0xe4b16b0edf454L,0x770f56e65168aL, + 0x014ce9e5704c6L } }, + /* 149 */ + { { 0x45e3e965e8f91L,0xbacb0f2492994L,0x0c8a0a0d3aca1L,0x9a71d31cc70f9L, + 0x01bb708a53e4cL }, + { 0xa9e69558bdd7aL,0x08018a26b1d5cL,0xc9cf1ec734a05L,0x0102b093aa714L, + 0x0f9d126f2da30L } }, + /* 150 */ + { { 0xbca7aaff9563eL,0xfeb49914a0749L,0xf5f1671dd077aL,0xcc69e27a0311bL, + 0x0807afcb9729eL }, + { 0xa9337c9b08b77L,0x85443c7e387f8L,0x76fd8ba86c3a7L,0xcd8c85fafa594L, + 0x0751adcd16568L } }, + /* 151 */ + { { 0xa38b410715c0dL,0x718f7697f78aeL,0x3fbf06dd113eaL,0x743f665eab149L, + 0x029ec44682537L }, + { 0x4719cb50bebbcL,0xbfe45054223d9L,0xd2dedb1399ee5L,0x077d90cd5b3a8L, + 0x0ff9370e392a4L } }, + /* 152 */ + { { 0x2d69bc6b75b65L,0xd5266651c559aL,0xde9d7d24188f8L,0xd01a28a9f33e3L, + 0x09776478ba2a9L }, + { 0x2622d929af2c7L,0x6d4e690923885L,0x89a51e9334f5dL,0x82face6cc7e5aL, + 0x074a6313fac2fL } }, + /* 153 */ + { { 0x4dfddb75f079cL,0x9518e36fbbb2fL,0x7cd36dd85b07cL,0x863d1b6cfcf0eL, + 0x0ab75be150ff4L }, + { 0x367c0173fc9b7L,0x20d2594fd081bL,0x4091236b90a74L,0x59f615fdbf03cL, + 0x04ebeac2e0b44L } }, + /* 154 */ + { { 0xc5fe75c9f2c53L,0x118eae9411eb6L,0x95ac5d8d25220L,0xaffcc8887633fL, + 0x0df99887b2c1bL }, + { 0x8eed2850aaecbL,0x1b01d6a272bb7L,0x1cdbcac9d4918L,0x4058978dd511bL, + 0x027b040a7779fL } }, + /* 155 */ + { { 0x05db7f73b2eb2L,0x088e1b2118904L,0x962327ee0df85L,0xa3f5501b71525L, + 0x0b393dd37e4cfL }, + { 0x30e7b3fd75165L,0xc2bcd33554a12L,0xf7b5022d66344L,0x34196c36f1be0L, + 0x009588c12d046L } }, + /* 156 */ + { { 0x6093f02601c3bL,0xf8cf5c335fe08L,0x94aff28fb0252L,0x648b955cf2808L, + 0x081c879a9db9fL }, + { 0xe687cc6f56c51L,0x693f17618c040L,0x059353bfed471L,0x1bc444f88a419L, + 0x0fa0d48f55fc1L } }, + /* 157 */ + { { 0xe1c9de1608e4dL,0x113582822cbc6L,0x57ec2d7010ddaL,0x67d6f6b7ddc11L, + 0x08ea0e156b6a3L }, + { 0x4e02f2383b3b4L,0x943f01f53ca35L,0xde03ca569966bL,0xb5ac4ff6632b2L, + 0x03f5ab924fa00L } }, + /* 158 */ + { { 0xbb0d959739efbL,0xf4e7ebec0d337L,0x11a67d1c751b0L,0x256e2da52dd64L, + 0x08bc768872b74L }, + { 0xe3b7282d3d253L,0xa1f58d779fa5bL,0x16767bba9f679L,0xf34fa1cac168eL, + 0x0b386f19060fcL } }, + /* 159 */ + { { 0x3c1352fedcfc2L,0x6262f8af0d31fL,0x57288c25396bfL,0x9c4d9a02b4eaeL, + 0x04cb460f71b06L }, + { 0x7b4d35b8095eaL,0x596fc07603ae6L,0x614a16592bbf8L,0x5223e1475f66bL, + 0x052c0d50895efL } }, + /* 160 */ + { { 0xc210e15339848L,0xe870778c8d231L,0x956e170e87a28L,0x9c0b9d1de6616L, + 0x04ac3c9382bb0L }, + { 0xe05516998987dL,0xc4ae09f4d619bL,0xa3f933d8b2376L,0x05f41de0b7651L, + 0x0380d94c7e397L } }, + /* 161 */ + { { 0x355aa81542e75L,0xa1ee01b9b701aL,0x24d708796c724L,0x37af6b3a29776L, + 0x02ce3e171de26L }, + { 0xfeb49f5d5bc1aL,0x7e2777e2b5cfeL,0x513756ca65560L,0x4e4d4feaac2f9L, + 0x02e6cd8520b62L } }, + /* 162 */ + { { 0x5954b8c31c31dL,0x005bf21a0c368L,0x5c79ec968533dL,0x9d540bd7626e7L, + 0x0ca17754742c6L }, + { 0xedafff6d2dbb2L,0xbd174a9d18cc6L,0xa4578e8fd0d8cL,0x2ce6875e8793aL, + 0x0a976a7139cabL } }, + /* 163 */ + { { 0x51f1b93fb353dL,0x8b57fcfa720a6L,0x1b15281d75cabL,0x4999aa88cfa73L, + 0x08720a7170a1fL }, + { 0xe8d37693e1b90L,0x0b16f6dfc38c3L,0x52a8742d345dcL,0x893c8ea8d00abL, + 0x09719ef29c769L } }, + /* 164 */ + { { 0xeed8d58e35909L,0xdc33ddc116820L,0xe2050269366d8L,0x04c1d7f999d06L, + 0x0a5072976e157L }, + { 0xa37eac4e70b2eL,0x576890aa8a002L,0x45b2a5c84dcf6L,0x7725cd71bf186L, + 0x099389c9df7b7L } }, + /* 165 */ + { { 0xc08f27ada7a4bL,0x03fd389366238L,0x66f512c3abe9dL,0x82e46b672e897L, + 0x0a88806aa202cL }, + { 0x2044ad380184eL,0xc4126a8b85660L,0xd844f17a8cb78L,0xdcfe79d670c0aL, + 0x00043bffb4738L } }, + /* 166 */ + { { 0x9b5dc36d5192eL,0xd34590b2af8d5L,0x1601781acf885L,0x486683566d0a1L, + 0x052f3ef01ba6cL }, + { 0x6732a0edcb64dL,0x238068379f398L,0x040f3090a482cL,0x7e7516cbe5fa7L, + 0x03296bd899ef2L } }, + /* 167 */ + { { 0xaba89454d81d7L,0xef51eb9b3c476L,0x1c579869eade7L,0x71e9619a21cd8L, + 0x03b90febfaee5L }, + { 0x3023e5496f7cbL,0xd87fb51bc4939L,0x9beb5ce55be41L,0x0b1803f1dd489L, + 0x06e88069d9f81L } }, + /* 168 */ + { { 0x7ab11b43ea1dbL,0xa95259d292ce3L,0xf84f1860a7ff1L,0xad13851b02218L, + 0x0a7222beadefaL }, + { 0xc78ec2b0a9144L,0x51f2fa59c5a2aL,0x147ce385a0240L,0xc69091d1eca56L, + 0x0be94d523bc2aL } }, + /* 169 */ + { { 0x4945e0b226ce7L,0x47967e8b7072fL,0x5a6c63eb8afd7L,0xc766edea46f18L, + 0x07782defe9be8L }, + { 0xd2aa43db38626L,0x8776f67ad1760L,0x4499cdb460ae7L,0x2e4b341b86fc5L, + 0x003838567a289L } }, + /* 170 */ + { { 0xdaefd79ec1a0fL,0xfdceb39c972d8L,0x8f61a953bbcd6L,0xb420f5575ffc5L, + 0x0dbd986c4adf7L }, + { 0xa881415f39eb7L,0xf5b98d976c81aL,0xf2f717d6ee2fcL,0xbbd05465475dcL, + 0x08e24d3c46860L } }, + /* 171 */ + { { 0xd8e549a587390L,0x4f0cbec588749L,0x25983c612bb19L,0xafc846e07da4bL, + 0x0541a99c4407bL }, + { 0x41692624c8842L,0x2ad86c05ffdb2L,0xf7fcf626044c1L,0x35d1c59d14b44L, + 0x0c0092c49f57dL } }, + /* 172 */ + { { 0xc75c3df2e61efL,0xc82e1b35cad3cL,0x09f29f47e8841L,0x944dc62d30d19L, + 0x075e406347286L }, + { 0x41fc5bbc237d0L,0xf0ec4f01c9e7dL,0x82bd534c9537bL,0x858691c51a162L, + 0x05b7cb658c784L } }, + /* 173 */ + { { 0xa70848a28ead1L,0x08fd3b47f6964L,0x67e5b39802dc5L,0x97a19ae4bfd17L, + 0x07ae13eba8df0L }, + { 0x16ef8eadd384eL,0xd9b6b2ff06fd2L,0xbcdb5f30361a2L,0xe3fd204b98784L, + 0x0787d8074e2a8L } }, + /* 174 */ + { { 0x25d6b757fbb1cL,0xb2ca201debc5eL,0xd2233ffe47bddL,0x84844a55e9a36L, + 0x05c2228199ef2L }, + { 0xd4a8588315250L,0x2b827097c1773L,0xef5d33f21b21aL,0xf2b0ab7c4ea1dL, + 0x0e45d37abbaf0L } }, + /* 175 */ + { { 0xf1e3428511c8aL,0xc8bdca6cd3d2dL,0x27c39a7ebb229L,0xb9d3578a71a76L, + 0x0ed7bc12284dfL }, + { 0x2a6df93dea561L,0x8dd48f0ed1cf2L,0xbad23e85443f1L,0x6d27d8b861405L, + 0x0aac97cc945caL } }, + /* 176 */ + { { 0x4ea74a16bd00aL,0xadf5c0bcc1eb5L,0xf9bfc06d839e9L,0xdc4e092bb7f11L, + 0x0318f97b31163L }, + { 0x0c5bec30d7138L,0x23abc30220eccL,0x022360644e8dfL,0xff4d2bb7972fbL, + 0x0fa41faa19a84L } }, + /* 177 */ + { { 0x2d974a6642269L,0xce9bb783bd440L,0x941e60bc81814L,0xe9e2398d38e47L, + 0x038bb6b2c1d26L }, + { 0xe4a256a577f87L,0x53dc11fe1cc64L,0x22807288b52d2L,0x01a5ff336abf6L, + 0x094dd0905ce76L } }, + /* 178 */ + { { 0xcf7dcde93f92aL,0xcb89b5f315156L,0x995e750a01333L,0x2ae902404df9cL, + 0x092077867d25cL }, + { 0x71e010bf39d44L,0x2096bb53d7e24L,0xc9c3d8f5f2c90L,0xeb514c44b7b35L, + 0x081e8428bd29bL } }, + /* 179 */ + { { 0x9c2bac477199fL,0xee6b5ecdd96ddL,0xe40fd0e8cb8eeL,0xa4b18af7db3feL, + 0x01b94ab62dbbfL }, + { 0x0d8b3ce47f143L,0xfc63f4616344fL,0xc59938351e623L,0x90eef18f270fcL, + 0x006a38e280555L } }, + /* 180 */ + { { 0xb0139b3355b49L,0x60b4ebf99b2e5L,0x269f3dc20e265L,0xd4f8c08ffa6bdL, + 0x0a7b36c2083d9L }, + { 0x15c3a1b3e8830L,0xe1a89f9c0b64dL,0x2d16930d5fceaL,0x2a20cfeee4a2eL, + 0x0be54c6b4a282L } }, + /* 181 */ + { { 0xdb3df8d91167cL,0x79e7a6625ed6cL,0x46ac7f4517c3fL,0x22bb7105648f3L, + 0x0bf30a5abeae0L }, + { 0x785be93828a68L,0x327f3ef0368e7L,0x92146b25161c3L,0xd13ae11b5feb5L, + 0x0d1c820de2732L } }, + /* 182 */ + { { 0xe13479038b363L,0x546b05e519043L,0x026cad158c11fL,0x8da34fe57abe6L, + 0x0b7d17bed68a1L }, + { 0xa5891e29c2559L,0x765bfffd8444cL,0x4e469484f7a03L,0xcc64498de4af7L, + 0x03997fd5e6412L } }, + /* 183 */ + { { 0x746828bd61507L,0xd534a64d2af20L,0xa8a15e329e132L,0x13e8ffeddfb08L, + 0x00eeb89293c6cL }, + { 0x69a3ea7e259f8L,0xe6d13e7e67e9bL,0xd1fa685ce1db7L,0xb6ef277318f6aL, + 0x0228916f8c922L } }, + /* 184 */ + { { 0xae25b0a12ab5bL,0x1f957bc136959L,0x16e2b0ccc1117L,0x097e8058429edL, + 0x0ec05ad1d6e93L }, + { 0xba5beac3f3708L,0x3530b59d77157L,0x18234e531baf9L,0x1b3747b552371L, + 0x07d3141567ff1L } }, + /* 185 */ + { { 0x9c05cf6dfefabL,0x68dcb377077bdL,0xa38bb95be2f22L,0xd7a3e53ead973L, + 0x0e9ce66fc9bc1L }, + { 0xa15766f6a02a1L,0xdf60e600ed75aL,0x8cdc1b938c087L,0x0651f8947f346L, + 0x0d9650b017228L } }, + /* 186 */ + { { 0xb4c4a5a057e60L,0xbe8def25e4504L,0x7c1ccbdcbccc3L,0xb7a2a63532081L, + 0x014d6699a804eL }, + { 0xa8415db1f411aL,0x0bf80d769c2c8L,0xc2f77ad09fbafL,0x598ab4deef901L, + 0x06f4c68410d43L } }, + /* 187 */ + { { 0x6df4e96c24a96L,0x85fcbd99a3872L,0xb2ae30a534dbcL,0x9abb3c466ef28L, + 0x04c4350fd6118L }, + { 0x7f716f855b8daL,0x94463c38a1296L,0xae9334341a423L,0x18b5c37e1413eL, + 0x0a726d2425a31L } }, + /* 188 */ + { { 0x6b3ee948c1086L,0x3dcbd3a2e1daeL,0x3d022f3f1de50L,0xf3923f35ed3f0L, + 0x013639e82cc6cL }, + { 0x938fbcdafaa86L,0xfb2654a2589acL,0x5051329f45bc5L,0x35a31963b26e4L, + 0x0ca9365e1c1a3L } }, + /* 189 */ + { { 0x5ac754c3b2d20L,0x17904e241b361L,0xc9d071d742a54L,0x72a5b08521c4cL, + 0x09ce29c34970bL }, + { 0x81f736d3e0ad6L,0x9ef2f8434c8ccL,0xce862d98060daL,0xaf9835ed1d1a6L, + 0x048c4abd7ab42L } }, + /* 190 */ + { { 0x1b0cc40c7485aL,0xbbe5274dbfd22L,0x263d2e8ead455L,0x33cb493c76989L, + 0x078017c32f67bL }, + { 0x35769930cb5eeL,0x940c408ed2b9dL,0x72f1a4dc0d14eL,0x1c04f8b7bf552L, + 0x053cd0454de5cL } }, + /* 191 */ + { { 0x585fa5d28ccacL,0x56005b746ebcdL,0xd0123aa5f823eL,0xfa8f7c79f0a1cL, + 0x0eea465c1d3d7L }, + { 0x0659f0551803bL,0x9f7ce6af70781L,0x9288e706c0b59L,0x91934195a7702L, + 0x01b6e42a47ae6L } }, + /* 192 */ + { { 0x0937cf67d04c3L,0xe289eeb8112e8L,0x2594d601e312bL,0xbd3d56b5d8879L, + 0x00224da14187fL }, + { 0xbb8630c5fe36fL,0x604ef51f5f87aL,0x3b429ec580f3cL,0xff33964fb1bfbL, + 0x060838ef042bfL } }, + /* 193 */ + { { 0xcb2f27e0bbe99L,0xf304aa39ee432L,0xfa939037bda44L,0x16435f497c7a9L, + 0x0636eb2022d33L }, + { 0xd0e6193ae00aaL,0xfe31ae6d2ffcfL,0xf93901c875a00L,0x8bacf43658a29L, + 0x08844eeb63921L } }, + /* 194 */ + { { 0x171d26b3bae58L,0x7117e39f3e114L,0x1a8eada7db3dfL,0x789ecd37bc7f8L, + 0x027ba83dc51fbL }, + { 0xf439ffbf54de5L,0x0bb5fe1a71a7dL,0xb297a48727703L,0xa4ab42ee8e35dL, + 0x0adb62d3487f3L } }, + /* 195 */ + { { 0x168a2a175df2aL,0x4f618c32e99b1L,0x46b0916082aa0L,0xc8b2c9e4f2e71L, + 0x0b990fd7675e7L }, + { 0x9d96b4df37313L,0x79d0b40789082L,0x80877111c2055L,0xd18d66c9ae4a7L, + 0x081707ef94d10L } }, + /* 196 */ + { { 0x7cab203d6ff96L,0xfc0d84336097dL,0x042db4b5b851bL,0xaa5c268823c4dL, + 0x03792daead5a8L }, + { 0x18865941afa0bL,0x4142d83671528L,0xbe4e0a7f3e9e7L,0x01ba17c825275L, + 0x05abd635e94b0L } }, + /* 197 */ + { { 0xfa84e0ac4927cL,0x35a7c8cf23727L,0xadca0dfe38860L,0xb610a4bcd5ea4L, + 0x05995bf21846aL }, + { 0xf860b829dfa33L,0xae958fc18be90L,0x8630366caafe2L,0x411e9b3baf447L, + 0x044c32ca2d483L } }, + /* 198 */ + { { 0xa97f1e40ed80cL,0xb131d2ca82a74L,0xc2d6ad95f938cL,0xa54c53f2124b7L, + 0x01f2162fb8082L }, + { 0x67cc5720b173eL,0x66085f12f97e4L,0xc9d65dc40e8a6L,0x07c98cebc20e4L, + 0x08f1d402bc3e9L } }, + /* 199 */ + { { 0x92f9cfbc4058aL,0xb6292f56704f5L,0xc1d8c57b15e14L,0xdbf9c55cfe37bL, + 0x0b1980f43926eL }, + { 0x33e0932c76b09L,0x9d33b07f7898cL,0x63bb4611df527L,0x8e456f08ead48L, + 0x02828ad9b3744L } }, + /* 200 */ + { { 0x722c4c4cf4ac5L,0x3fdde64afb696L,0x0890832f5ac1aL,0xb3900551baa2eL, + 0x04973f1275a14L }, + { 0xd8335322eac5dL,0xf50bd9b568e59L,0x25883935e07eeL,0x8ac7ab36720faL, + 0x06dac8ed0db16L } }, + /* 201 */ + { { 0x545aeeda835efL,0xd21d10ed51f7bL,0x3741b094aa113L,0xde4c035a65e01L, + 0x04b23ef5920b9L }, + { 0xbb6803c4c7341L,0x6d3f58bc37e82L,0x51e3ee8d45770L,0x9a4e73527863aL, + 0x04dd71534ddf4L } }, + /* 202 */ + { { 0x4467295476cd9L,0x2fe31a725bbf9L,0xc4b67e0648d07L,0x4dbb1441c8b8fL, + 0x0fd3170002f4aL }, + { 0x43ff48995d0e1L,0xd10ef729aa1cbL,0x179898276e695L,0xf365e0d5f9764L, + 0x014fac58c9569L } }, + /* 203 */ + { { 0xa0065f312ae18L,0xc0fcc93fc9ad9L,0xa7d284651958dL,0xda50d9a142408L, + 0x0ed7c765136abL }, + { 0x70f1a25d4abbcL,0xf3f1a113ea462L,0xb51952f9b5dd8L,0x9f53c609b0755L, + 0x0fefcb7f74d2eL } }, + /* 204 */ + { { 0x9497aba119185L,0x30aac45ba4bd0L,0xa521179d54e8cL,0xd80b492479deaL, + 0x01801a57e87e0L }, + { 0xd3f8dfcafffb0L,0x0bae255240073L,0xb5fdfbc6cf33cL,0x1064781d763b5L, + 0x09f8fc11e1eadL } }, + /* 205 */ + { { 0x3a1715e69544cL,0x67f04b7813158L,0x78a4c320eaf85L,0x69a91e22a8fd2L, + 0x0a9d3809d3d3aL }, + { 0xc2c2c59a2da3bL,0xf61895c847936L,0x3d5086938ccbcL,0x8ef75e65244e6L, + 0x03006b9aee117L } }, + /* 206 */ + { { 0x1f2b0c9eead28L,0x5d89f4dfbc0bbL,0x2ce89397eef63L,0xf761074757fdbL, + 0x00ab85fd745f8L }, + { 0xa7c933e5b4549L,0x5c97922f21ecdL,0x43b80404be2bbL,0x42c2261a1274bL, + 0x0b122d67511e9L } }, + /* 207 */ + { { 0x607be66a5ae7aL,0xfa76adcbe33beL,0xeb6e5c501e703L,0xbaecaf9043014L, + 0x09f599dc1097dL }, + { 0x5b7180ff250edL,0x74349a20dc6d7L,0x0b227a38eb915L,0x4b78425605a41L, + 0x07d5528e08a29L } }, + /* 208 */ + { { 0x58f6620c26defL,0xea582b2d1ef0fL,0x1ce3881025585L,0x1730fbe7d79b0L, + 0x028ccea01303fL }, + { 0xabcd179644ba5L,0xe806fff0b8d1dL,0x6b3e17b1fc643L,0x13bfa60a76fc6L, + 0x0c18baf48a1d0L } }, + /* 209 */ + { { 0x638c85dc4216dL,0x67206142ac34eL,0x5f5064a00c010L,0x596bd453a1719L, + 0x09def809db7a9L }, + { 0x8642e67ab8d2cL,0x336237a2b641eL,0x4c4218bb42404L,0x8ce57d506a6d6L, + 0x00357f8b06880L } }, + /* 210 */ + { { 0xdbe644cd2cc88L,0x8df0b8f39d8e9L,0xd30a0c8cc61c2L,0x98874a309874cL, + 0x0e4a01add1b48L }, + { 0x1eeacf57cd8f9L,0x3ebd594c482edL,0xbd2f7871b767dL,0xcc30a7295c717L, + 0x0466d7d79ce10L } }, + /* 211 */ + { { 0x318929dada2c7L,0xc38f9aa27d47dL,0x20a59e14fa0a6L,0xad1a90e4fd288L, + 0x0c672a522451eL }, + { 0x07cc85d86b655L,0x3bf9ad4af1306L,0x71172a6f0235dL,0x751399a086805L, + 0x05e3d64faf2a6L } }, + /* 212 */ + { { 0x410c79b3b4416L,0x85eab26d99aa6L,0xb656a74cd8fcfL,0x42fc5ebff74adL, + 0x06c8a7a95eb8eL }, + { 0x60ba7b02a63bdL,0x038b8f004710cL,0x12d90b06b2f23L,0xca918c6c37383L, + 0x0348ae422ad82L } }, + /* 213 */ + { { 0x746635ccda2fbL,0xa18e0726d27f4L,0x92b1f2022accaL,0x2d2e85adf7824L, + 0x0c1074de0d9efL }, + { 0x3ce44ae9a65b3L,0xac05d7151bfcfL,0xe6a9788fd71e4L,0x4ffcd4711f50cL, + 0x0fbadfbdbc9e5L } }, + /* 214 */ + { { 0x3f1cd20a99363L,0x8f6cf22775171L,0x4d359b2b91565L,0x6fcd968175cd2L, + 0x0b7f976b48371L }, + { 0x8e24d5d6dbf74L,0xfd71c3af36575L,0x243dfe38d23baL,0xc80548f477600L, + 0x0f4d41b2ecafcL } }, + /* 215 */ + { { 0x1cf28fdabd48dL,0x3632c078a451fL,0x17146e9ce81beL,0x0f106ace29741L, + 0x0180824eae016L }, + { 0x7698b66e58358L,0x52ce6ca358038L,0xe41e6c5635687L,0x6d2582380e345L, + 0x067e5f63983cfL } }, + /* 216 */ + { { 0xccb8dcf4899efL,0xf09ebb44c0f89L,0x2598ec9949015L,0x1fc6546f9276bL, + 0x09fef789a04c1L }, + { 0x67ecf53d2a071L,0x7fa4519b096d3L,0x11e2eefb10e1aL,0x4e20ca6b3fb06L, + 0x0bc80c181a99cL } }, + /* 217 */ + { { 0x536f8e5eb82e6L,0xc7f56cb920972L,0x0b5da5e1a484fL,0xdf10c78e21715L, + 0x049270e629f8cL }, + { 0x9b7bbea6b50adL,0xc1a2388ffc1a3L,0x107197b9a0284L,0x2f7f5403eb178L, + 0x0d2ee52f96137L } }, + /* 218 */ + { { 0xcd28588e0362aL,0xa78fa5d94dd37L,0x434a526442fa8L,0xb733aff836e5aL, + 0x0dfb478bee5abL }, + { 0xf1ce7673eede6L,0xd42b5b2f04a91L,0x530da2fa5390aL,0x473a5e66f7bf5L, + 0x0d9a140b408dfL } }, + /* 219 */ + { { 0x221b56e8ea498L,0x293563ee090e0L,0x35d2ade623478L,0x4b1ae06b83913L, + 0x0760c058d623fL }, + { 0x9b58cc198aa79L,0xd2f07aba7f0b8L,0xde2556af74890L,0x04094e204110fL, + 0x07141982d8f19L } }, + /* 220 */ + { { 0xa0e334d4b0f45L,0x38392a94e16f0L,0x3c61d5ed9280bL,0x4e473af324c6bL, + 0x03af9d1ce89d5L }, + { 0xf798120930371L,0x4c21c17097fd8L,0xc42309beda266L,0x7dd60e9545dcdL, + 0x0b1f815c37395L } }, + /* 221 */ + { { 0xaa78e89fec44aL,0x473caa4caf84fL,0x1b6a624c8c2aeL,0xf052691c807dcL, + 0x0a41aed141543L }, + { 0x353997d5ffe04L,0xdf625b6e20424L,0x78177758bacb2L,0x60ef85d660be8L, + 0x0d6e9c1dd86fbL } }, + /* 222 */ + { { 0x2e97ec6853264L,0xb7e2304a0b3aaL,0x8eae9be771533L,0xf8c21b912bb7bL, + 0x09c9c6e10ae9bL }, + { 0x09a59e030b74cL,0x4d6a631e90a23L,0x49b79f24ed749L,0x61b689f44b23aL, + 0x0566bd59640faL } }, + /* 223 */ + { { 0xc0118c18061f3L,0xd37c83fc70066L,0x7273245190b25L,0x345ef05fc8e02L, + 0x0cf2c7390f525L }, + { 0xbceb410eb30cfL,0xba0d77703aa09L,0x50ff255cfd2ebL,0x0979e842c43a1L, + 0x002f517558aa2L } }, + /* 224 */ + { { 0xef794addb7d07L,0x4224455500396L,0x78aa3ce0b4fc7L,0xd97dfaff8eaccL, + 0x014e9ada5e8d4L }, + { 0x480a12f7079e2L,0xcde4b0800edaaL,0x838157d45baa3L,0x9ae801765e2d7L, + 0x0a0ad4fab8e9dL } }, + /* 225 */ + { { 0xb76214a653618L,0x3c31eaaa5f0bfL,0x4949d5e187281L,0xed1e1553e7374L, + 0x0bcd530b86e56L }, + { 0xbe85332e9c47bL,0xfeb50059ab169L,0x92bfbb4dc2776L,0x341dcdba97611L, + 0x0909283cf6979L } }, + /* 226 */ + { { 0x0032476e81a13L,0x996217123967bL,0x32e19d69bee1aL,0x549a08ed361bdL, + 0x035eeb7c9ace1L }, + { 0x0ae5a7e4e5bdcL,0xd3b6ceec6e128L,0xe266bc12dcd2cL,0xe86452e4224c6L, + 0x09a8b2cf4448aL } }, + /* 227 */ + { { 0x71bf209d03b59L,0xa3b65af2abf64L,0xbd5eec9c90e62L,0x1379ff7ff168eL, + 0x06bdb60f4d449L }, + { 0xafebc8a55bc30L,0x1610097fe0dadL,0xc1e3bddc79eadL,0x08a942e197414L, + 0x001ec3cfd94baL } }, + /* 228 */ + { { 0x277ebdc9485c2L,0x7922fb10c7ba6L,0x0a28d8a48cc9aL,0x64f64f61d60f7L, + 0x0d1acb1c04754L }, + { 0x902b126f36612L,0x4ee0618d8bd26L,0x08357ee59c3a4L,0x26c24df8a8133L, + 0x07dcd079d4056L } }, + /* 229 */ + { { 0x7d4d3f05a4b48L,0x52372307725ceL,0x12a915aadcd29L,0x19b8d18f79718L, + 0x00bf53589377dL }, + { 0xcd95a6c68ea73L,0xca823a584d35eL,0x473a723c7f3bbL,0x86fc9fb674c6fL, + 0x0d28be4d9e166L } }, + /* 230 */ + { { 0xb990638fa8e4bL,0x6e893fd8fc5d2L,0x36fb6fc559f18L,0x88ce3a6de2aa4L, + 0x0d76007aa510fL }, + { 0x0aab6523a4988L,0x4474dd02732d1L,0x3407278b455cfL,0xbb017f467082aL, + 0x0f2b52f68b303L } }, + /* 231 */ + { { 0x7eafa9835b4caL,0xfcbb669cbc0d5L,0x66431982d2232L,0xed3a8eeeb680cL, + 0x0d8dbe98ecc5aL }, + { 0x9be3fc5a02709L,0xe5f5ba1fa8cbaL,0x10ea85230be68L,0x9705febd43cdfL, + 0x0e01593a3ee55L } }, + /* 232 */ + { { 0x5af50ea75a0a6L,0xac57858033d3eL,0x0176406512226L,0xef066fe6d50fdL, + 0x0afec07b1aeb8L }, + { 0x9956780bb0a31L,0xcc37309aae7fbL,0x1abf3896f1af3L,0xbfdd9153a15a0L, + 0x0a71b93546e2dL } }, + /* 233 */ + { { 0xe12e018f593d2L,0x28a078122bbf8L,0xba4f2add1a904L,0x23d9150505db0L, + 0x053a2005c6285L }, + { 0x8b639e7f2b935L,0x5ac182961a07cL,0x518ca2c2bff97L,0x8e3d86bceea77L, + 0x0bf47d19b3d58L } }, + /* 234 */ + { { 0x967a7dd7665d5L,0x572f2f4de5672L,0x0d4903f4e3030L,0xa1b6144005ae8L, + 0x0001c2c7f39c9L }, + { 0xa801469efc6d6L,0xaa7bc7a724143L,0x78150a4c810bdL,0xb99b5f65670baL, + 0x0fdadf8e786ffL } }, + /* 235 */ + { { 0x8cb88ffc00785L,0x913b48eb67fd3L,0xf368fbc77fa75L,0x3c940454d055bL, + 0x03a838e4d5aa4L }, + { 0x663293e97bb9aL,0x63441d94d9561L,0xadb2a839eb933L,0x1da3515591a60L, + 0x03cdb8257873eL } }, + /* 236 */ + { { 0x140a97de77eabL,0x0d41648109137L,0xeb1d0dff7e1c5L,0x7fba762dcad2cL, + 0x05a60cc89f1f5L }, + { 0x3638240d45673L,0x195913c65580bL,0xd64b7411b82beL,0x8fc0057284b8dL, + 0x0922ff56fdbfdL } }, + /* 237 */ + { { 0x65deec9a129a1L,0x57cc284e041b2L,0xebfbe3ca5b1ceL,0xcd6204380c46cL, + 0x072919a7df6c5L }, + { 0xf453a8fb90f9aL,0x0b88e4031b298L,0x96f1856d719c0L,0x089ae32c0e777L, + 0x05e7917803624L } }, + /* 238 */ + { { 0x6ec557f63cdfbL,0x71f1cae4fd5c1L,0x60597ca8e6a35L,0x2fabfce26bea5L, + 0x04e0a5371e24cL }, + { 0xa40d3a5765357L,0x440d73a2b4276L,0x1d11a323c89afL,0x04eeb8f370ae4L, + 0x0f5ff7818d566L } }, + /* 239 */ + { { 0x3e3fe1a09df21L,0x8ee66e8e47fbfL,0x9c8901526d5d2L,0x5e642096bd0a2L, + 0x0e41df0e9533fL }, + { 0xfda40b3ba9e3fL,0xeb2604d895305L,0xf0367c7f2340cL,0x155f0866e1927L, + 0x08edd7d6eac4fL } }, + /* 240 */ + { { 0x1dc0e0bfc8ff3L,0x2be936f42fc9aL,0xca381ef14efd8L,0xee9667016f7ccL, + 0x01432c1caed8aL }, + { 0x8482970b23c26L,0x730735b273ec6L,0xaef0f5aa64fe8L,0xd2c6e389f6e5eL, + 0x0caef480b5ac8L } }, + /* 241 */ + { { 0x5c97875315922L,0x713063cca5524L,0x64ef2cbd82951L,0xe236f3ce60d0bL, + 0x0d0ba177e8efaL }, + { 0x9ae8fb1b3af60L,0xe53d2da20e53aL,0xf9eef281a796aL,0xae1601d63605dL, + 0x0f31c957c1c54L } }, + /* 242 */ + { { 0x58d5249cc4597L,0xb0bae0a028c0fL,0x34a814adc5015L,0x7c3aefc5fc557L, + 0x0013404cb96e1L }, + { 0xe2585c9a824bfL,0x5e001eaed7b29L,0x1ef68acd59318L,0x3e6c8d6ee6826L, + 0x06f377c4b9193L } }, + /* 243 */ + { { 0x3bad1a8333fd2L,0x025a2a95b89f9L,0xaf75acea89302L,0x9506211e5037eL, + 0x06dba3e4ed2d0L }, + { 0xef98cd04399cdL,0x6ee6b73adea48L,0x17ecaf31811c6L,0xf4a772f60752cL, + 0x0f13cf3423becL } }, + /* 244 */ + { { 0xb9ec0a919e2ebL,0x95f62c0f68ceeL,0xaba229983a9a1L,0xbad3cfba3bb67L, + 0x0c83fa9a9274bL }, + { 0xd1b0b62fa1ce0L,0xf53418efbf0d7L,0x2706f04e58b60L,0x2683bfa8ef9e5L, + 0x0b49d70f45d70L } }, + /* 245 */ + { { 0xc7510fad5513bL,0xecb1751e2d914L,0x9fb9d5905f32eL,0xf1cf6d850418dL, + 0x059cfadbb0c30L }, + { 0x7ac2355cb7fd6L,0xb8820426a3e16L,0x0a78864249367L,0x4b67eaeec58c9L, + 0x05babf362354aL } }, + /* 246 */ + { { 0x981d1ee424865L,0x78f2e5577f37cL,0x9e0c0588b0028L,0xc8f0702970f1bL, + 0x06188c6a79026L }, + { 0x9a19bd0f244daL,0x5cfb08087306fL,0xf2136371eccedL,0xb9d935470f9b9L, + 0x0993fe475df50L } }, + /* 247 */ + { { 0x31cdf9b2c3609L,0xc02c46d4ea68eL,0xa77510184eb19L,0x616b7ac9ec1a9L, + 0x081f764664c80L }, + { 0xc2a5a75fbe978L,0xd3f183b3561d7L,0x01dd2bf6743feL,0x060d838d1f045L, + 0x0564a812a5fe9L } }, + /* 248 */ + { { 0xa64f4fa817d1dL,0x44bea82e0f7a5L,0xd57f9aa55f968L,0x1d6cb5ff5a0fcL, + 0x0226bf3cf00e5L }, + { 0x1a9f92f2833cfL,0x5a4f4f89a8d6dL,0xf3f7f7720a0a3L,0x783611536c498L, + 0x068779f47ff25L } }, + /* 249 */ + { { 0x0c1c173043d08L,0x741fc020fa79bL,0xa6d26d0a54467L,0x2e0bd3767e289L, + 0x097bcb0d1eb09L }, + { 0x6eaa8f32ed3c3L,0x51b281bc482abL,0xfa178f3c8a4f1L,0x46554d1bf4f3bL, + 0x0a872ffe80a78L } }, + /* 250 */ + { { 0xb7935a32b2086L,0x0e8160f486b1aL,0xb6ae6bee1eb71L,0xa36a9bd0cd913L, + 0x002812bfcb732L }, + { 0xfd7cacf605318L,0x50fdfd6d1da63L,0x102d619646e5dL,0x96afa1d683982L, + 0x007391cc9fe53L } }, + /* 251 */ + { { 0x157f08b80d02bL,0xd162877f7fc50L,0x8d542ae6b8333L,0x2a087aca1af87L, + 0x0355d2adc7e6dL }, + { 0xf335a287386e1L,0x94f8e43275b41L,0x79989eafd272aL,0x3a79286ca2cdeL, + 0x03dc2b1e37c2aL } }, + /* 252 */ + { { 0x9d21c04581352L,0x25376782bed68L,0xfed701f0a00c8L,0x846b203bd5909L, + 0x0c47869103ccdL }, + { 0xa770824c768edL,0x026841f6575dbL,0xaccce0e72feeaL,0x4d3273313ed56L, + 0x0ccc42968d5bbL } }, + /* 253 */ + { { 0x50de13d7620b9L,0x8a5992a56a94eL,0x75487c9d89a5cL,0x71cfdc0076406L, + 0x0e147eb42aa48L }, + { 0xab4eeacf3ae46L,0xfb50350fbe274L,0x8c840eafd4936L,0x96e3df2afe474L, + 0x0239ac047080eL } }, + /* 254 */ + { { 0xd1f352bfee8d4L,0xcffa7b0fec481L,0xce9af3cce80b5L,0xe59d105c4c9e2L, + 0x0c55fa1a3f5f7L }, + { 0x6f14e8257c227L,0x3f342be00b318L,0xa904fb2c5b165L,0xb69909afc998aL, + 0x0094cd99cd4f4L } }, + /* 255 */ + { { 0x81c84d703bebaL,0x5032ceb2918a9L,0x3bd49ec8631d1L,0xad33a445f2c9eL, + 0x0b90a30b642abL }, + { 0x5404fb4a5abf9L,0xc375db7603b46L,0xa35d89f004750L,0x24f76f9a42cccL, + 0x0019f8b9a1b79L } }, +}; + +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_base_5(sp_point_256* r, const sp_digit* k, + int map, void* heap) +{ + return sp_256_ecc_mulmod_stripe_5(r, &p256_base, p256_table, + k, map, heap); +} + +#endif + +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 p; + sp_digit kd[5]; +#endif + sp_point_256* point; + sp_digit* k = NULL; + int err = MP_OKAY; + + err = sp_256_point_new_5(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) { + err = MEMORY_E; + } + } +#else + k = kd; +#endif + if (err == MP_OKAY) { + sp_256_from_mp(k, 5, km); + + err = sp_256_ecc_mulmod_base_5(point, k, map, heap); + } + if (err == MP_OKAY) { + err = sp_256_point_to_ecc_point_5(point, r); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_5(point, 0, heap); + + return err; +} + +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_256_iszero_5(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4]) == 0; +} + +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */ +/* Add 1 to a. (a = a + 1) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_256_add_one_5(sp_digit* a) +{ + a[0]++; + sp_256_norm_5(a); +} + +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_256_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((sp_digit)a[i]) << s); + if (s >= 44U) { + r[j] &= 0xfffffffffffffL; + s = 52U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (sp_digit)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Generates a scalar that is in the range 1..order-1. + * + * rng Random number generator. + * k Scalar value. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +static int sp_256_ecc_gen_k_5(WC_RNG* rng, sp_digit* k) +{ + int err; + byte buf[32]; + + do { + err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf)); + if (err == 0) { + sp_256_from_bin(k, 5, buf, (int)sizeof(buf)); + if (sp_256_cmp_5(k, p256_order2) < 0) { + sp_256_add_one_5(k); + break; + } + } + } + while (err == 0); + + return err; +} + +/* Makes a random EC key pair. + * + * rng Random number generator. + * priv Generated private value. + * pub Generated public point. + * heap Heap to use for allocation. + * returns ECC_INF_E when the point does not have the correct order, RNG + * failures, MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 p; + sp_digit kd[5]; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_256 inf; +#endif +#endif + sp_point_256* point; + sp_digit* k = NULL; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_256* infinity; +#endif + int err; + + (void)heap; + + err = sp_256_point_new_5(heap, p, point); +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + if (err == MP_OKAY) { + err = sp_256_point_new_5(heap, inf, infinity); + } +#endif +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) { + err = MEMORY_E; + } + } +#else + k = kd; +#endif + + if (err == MP_OKAY) { + err = sp_256_ecc_gen_k_5(rng, k); + } + if (err == MP_OKAY) { + err = sp_256_ecc_mulmod_base_5(point, k, 1, NULL); + } + +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + if (err == MP_OKAY) { + err = sp_256_ecc_mulmod_5(infinity, point, p256_order, 1, NULL); + } + if (err == MP_OKAY) { + if ((sp_256_iszero_5(point->x) == 0) || (sp_256_iszero_5(point->y) == 0)) { + err = ECC_INF_E; + } + } +#endif + + if (err == MP_OKAY) { + err = sp_256_to_mp(k, priv); + } + if (err == MP_OKAY) { + err = sp_256_point_to_ecc_point_5(point, pub); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_256_point_free_5(infinity, 1, heap); +#endif + sp_256_point_free_5(point, 1, heap); + + return err; +} + +#ifdef HAVE_ECC_DHE +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 32 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_256_to_bin(sp_digit* r, byte* a) +{ + int i, j, s = 0, b; + + for (i=0; i<4; i++) { + r[i+1] += r[i] >> 52; + r[i] &= 0xfffffffffffffL; + } + j = 256 / 8 - 1; + a[j] = 0; + for (i=0; i<5 && j>=0; i++) { + b = 0; + /* lint allow cast of mismatch sp_digit and int */ + a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ + b += 8 - s; + if (j < 0) { + break; + } + while (b < 52) { + a[j--] = (byte)(r[i] >> b); + b += 8; + if (j < 0) { + break; + } + } + s = 8 - (b - 52); + if (j >= 0) { + a[j] = 0; + } + if (s != 0) { + j++; + } + } +} + +/* Multiply the point by the scalar and serialize the X ordinate. + * The number is 0 padded to maximum size on output. + * + * priv Scalar to multiply the point by. + * pub Point to multiply. + * out Buffer to hold X ordinate. + * outLen On entry, size of the buffer in bytes. + * On exit, length of data in buffer in bytes. + * heap Heap to use for allocation. + * returns BUFFER_E if the buffer is to small for output size, + * MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out, + word32* outLen, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 p; + sp_digit kd[5]; +#endif + sp_point_256* point = NULL; + sp_digit* k = NULL; + int err = MP_OKAY; + + if (*outLen < 32U) { + err = BUFFER_E; + } + + if (err == MP_OKAY) { + err = sp_256_point_new_5(heap, p, point); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#else + k = kd; +#endif + + if (err == MP_OKAY) { + sp_256_from_mp(k, 5, priv); + sp_256_point_from_ecc_point_5(point, pub); + err = sp_256_ecc_mulmod_5(point, point, k, 1, heap); + } + if (err == MP_OKAY) { + sp_256_to_bin(point->x, out); + *outLen = 32; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_5(point, 0, heap); + + return err; +} +#endif /* HAVE_ECC_DHE */ + +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* Multiply a by scalar b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_256_mul_d_5(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int128_t tb = b; + int128_t t = 0; + int i; + + for (i = 0; i < 5; i++) { + t += tb * a[i]; + r[i] = t & 0xfffffffffffffL; + t >>= 52; + } + r[5] = (sp_digit)t; +#else + int128_t tb = b; + int128_t t[5]; + + t[ 0] = tb * a[ 0]; + t[ 1] = tb * a[ 1]; + t[ 2] = tb * a[ 2]; + t[ 3] = tb * a[ 3]; + t[ 4] = tb * a[ 4]; + r[ 0] = (t[ 0] & 0xfffffffffffffL); + r[ 1] = (sp_digit)(t[ 0] >> 52) + (t[ 1] & 0xfffffffffffffL); + r[ 2] = (sp_digit)(t[ 1] >> 52) + (t[ 2] & 0xfffffffffffffL); + r[ 3] = (sp_digit)(t[ 2] >> 52) + (t[ 3] & 0xfffffffffffffL); + r[ 4] = (sp_digit)(t[ 3] >> 52) + (t[ 4] & 0xfffffffffffffL); + r[ 5] = (sp_digit)(t[ 4] >> 52); +#endif /* WOLFSSL_SP_SMALL */ +} + +#ifdef WOLFSSL_SP_DIV_64 +static WC_INLINE sp_digit sp_256_div_word_5(sp_digit d1, sp_digit d0, + sp_digit dv) +{ + sp_digit d, r, t; + + /* All 52 bits from d1 and top 11 bits from d0. */ + d = (d1 << 11) | (d0 >> 41); + r = d / dv; + d -= r * dv; + /* Up to 12 bits in r */ + /* Next 11 bits from d0. */ + r <<= 11; + d <<= 11; + d |= (d0 >> 30) & ((1 << 11) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 23 bits in r */ + /* Next 11 bits from d0. */ + r <<= 11; + d <<= 11; + d |= (d0 >> 19) & ((1 << 11) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 34 bits in r */ + /* Next 11 bits from d0. */ + r <<= 11; + d <<= 11; + d |= (d0 >> 8) & ((1 << 11) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 45 bits in r */ + /* Remaining 8 bits from d0. */ + r <<= 8; + d <<= 8; + d |= d0 & ((1 << 8) - 1); + t = d / dv; + r += t; + + return r; +} +#endif /* WOLFSSL_SP_DIV_64 */ + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Number to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_256_div_5(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + int i; +#ifndef WOLFSSL_SP_DIV_64 + int128_t d1; +#endif + sp_digit dv, r1; +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* td; +#else + sp_digit t1d[10], t2d[5 + 1]; +#endif + sp_digit* t1; + sp_digit* t2; + int err = MP_OKAY; + + (void)m; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 5 + 1), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = td; + t2 = td + 2 * 5; +#else + t1 = t1d; + t2 = t2d; +#endif + + dv = d[4]; + XMEMCPY(t1, a, sizeof(*t1) * 2U * 5U); + for (i=4; i>=0; i--) { + t1[5 + i] += t1[5 + i - 1] >> 52; + t1[5 + i - 1] &= 0xfffffffffffffL; +#ifndef WOLFSSL_SP_DIV_64 + d1 = t1[5 + i]; + d1 <<= 52; + d1 += t1[5 + i - 1]; + r1 = (sp_digit)(d1 / dv); +#else + r1 = sp_256_div_word_5(t1[5 + i], t1[5 + i - 1], dv); +#endif + + sp_256_mul_d_5(t2, d, r1); + (void)sp_256_sub_5(&t1[i], &t1[i], t2); + t1[5 + i] -= t2[5]; + t1[5 + i] += t1[5 + i - 1] >> 52; + t1[5 + i - 1] &= 0xfffffffffffffL; + r1 = (((-t1[5 + i]) << 52) - t1[5 + i - 1]) / dv; + r1++; + sp_256_mul_d_5(t2, d, r1); + (void)sp_256_add_5(&t1[i], &t1[i], t2); + t1[5 + i] += t1[5 + i - 1] >> 52; + t1[5 + i - 1] &= 0xfffffffffffffL; + } + t1[5 - 1] += t1[5 - 2] >> 52; + t1[5 - 2] &= 0xfffffffffffffL; + r1 = t1[5 - 1] / dv; + + sp_256_mul_d_5(t2, d, r1); + (void)sp_256_sub_5(t1, t1, t2); + XMEMCPY(r, t1, sizeof(*r) * 2U * 5U); + for (i=0; i<3; i++) { + r[i+1] += r[i] >> 52; + r[i] &= 0xfffffffffffffL; + } + sp_256_cond_add_5(r, r, d, 0 - ((r[4] < 0) ? + (sp_digit)1 : (sp_digit)0)); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_256_mod_5(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_256_div_5(a, m, NULL, r); +} + +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#ifdef WOLFSSL_SP_SMALL +/* Order-2 for the P256 curve. */ +static const uint64_t p256_order_minus_2[4] = { + 0xf3b9cac2fc63254fU,0xbce6faada7179e84U,0xffffffffffffffffU, + 0xffffffff00000000U +}; +#else +/* The low half of the order-2 of the P256 curve. */ +static const uint64_t p256_order_low[2] = { + 0xf3b9cac2fc63254fU,0xbce6faada7179e84U +}; +#endif /* WOLFSSL_SP_SMALL */ + +/* Multiply two number mod the order of P256 curve. (r = a * b mod order) + * + * r Result of the multiplication. + * a First operand of the multiplication. + * b Second operand of the multiplication. + */ +static void sp_256_mont_mul_order_5(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_256_mul_5(r, a, b); + sp_256_mont_reduce_order_5(r, p256_order, p256_mp_order); +} + +/* Square number mod the order of P256 curve. (r = a * a mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_256_mont_sqr_order_5(sp_digit* r, const sp_digit* a) +{ + sp_256_sqr_5(r, a); + sp_256_mont_reduce_order_5(r, p256_order, p256_mp_order); +} + +#ifndef WOLFSSL_SP_SMALL +/* Square number mod the order of P256 curve a number of times. + * (r = a ^ n mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_256_mont_sqr_n_order_5(sp_digit* r, const sp_digit* a, int n) +{ + int i; + + sp_256_mont_sqr_order_5(r, a); + for (i=1; i=0; i--) { + sp_256_mont_sqr_order_5(t, t); + if ((p256_order_minus_2[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { + sp_256_mont_mul_order_5(t, t, a); + } + } + XMEMCPY(r, t, sizeof(sp_digit) * 5U); +#else + sp_digit* t = td; + sp_digit* t2 = td + 2 * 5; + sp_digit* t3 = td + 4 * 5; + int i; + + /* t = a^2 */ + sp_256_mont_sqr_order_5(t, a); + /* t = a^3 = t * a */ + sp_256_mont_mul_order_5(t, t, a); + /* t2= a^c = t ^ 2 ^ 2 */ + sp_256_mont_sqr_n_order_5(t2, t, 2); + /* t3= a^f = t2 * t */ + sp_256_mont_mul_order_5(t3, t2, t); + /* t2= a^f0 = t3 ^ 2 ^ 4 */ + sp_256_mont_sqr_n_order_5(t2, t3, 4); + /* t = a^ff = t2 * t3 */ + sp_256_mont_mul_order_5(t, t2, t3); + /* t3= a^ff00 = t ^ 2 ^ 8 */ + sp_256_mont_sqr_n_order_5(t2, t, 8); + /* t = a^ffff = t2 * t */ + sp_256_mont_mul_order_5(t, t2, t); + /* t2= a^ffff0000 = t ^ 2 ^ 16 */ + sp_256_mont_sqr_n_order_5(t2, t, 16); + /* t = a^ffffffff = t2 * t */ + sp_256_mont_mul_order_5(t, t2, t); + /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64 */ + sp_256_mont_sqr_n_order_5(t2, t, 64); + /* t2= a^ffffffff00000000ffffffff = t2 * t */ + sp_256_mont_mul_order_5(t2, t2, t); + /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32 */ + sp_256_mont_sqr_n_order_5(t2, t2, 32); + /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */ + sp_256_mont_mul_order_5(t2, t2, t); + /* t2= a^ffffffff00000000ffffffffffffffffbce6 */ + for (i=127; i>=112; i--) { + sp_256_mont_sqr_order_5(t2, t2); + if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { + sp_256_mont_mul_order_5(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6f */ + sp_256_mont_sqr_n_order_5(t2, t2, 4); + sp_256_mont_mul_order_5(t2, t2, t3); + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */ + for (i=107; i>=64; i--) { + sp_256_mont_sqr_order_5(t2, t2); + if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { + sp_256_mont_mul_order_5(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */ + sp_256_mont_sqr_n_order_5(t2, t2, 4); + sp_256_mont_mul_order_5(t2, t2, t3); + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */ + for (i=59; i>=32; i--) { + sp_256_mont_sqr_order_5(t2, t2); + if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { + sp_256_mont_mul_order_5(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */ + sp_256_mont_sqr_n_order_5(t2, t2, 4); + sp_256_mont_mul_order_5(t2, t2, t3); + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */ + for (i=27; i>=0; i--) { + sp_256_mont_sqr_order_5(t2, t2); + if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { + sp_256_mont_mul_order_5(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */ + sp_256_mont_sqr_n_order_5(t2, t2, 4); + /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */ + sp_256_mont_mul_order_5(r, t2, t3); +#endif /* WOLFSSL_SP_SMALL */ +} + +#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */ +#ifdef HAVE_ECC_SIGN +#ifndef SP_ECC_MAX_SIG_GEN +#define SP_ECC_MAX_SIG_GEN 64 +#endif + +/* Sign the hash using the private key. + * e = [hash, 256 bits] from binary + * r = (k.G)->x mod order + * s = (r * x + e) / k mod order + * The hash is truncated to the first 256 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv, + mp_int* rm, mp_int* sm, mp_int* km, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit ed[2*5]; + sp_digit xd[2*5]; + sp_digit kd[2*5]; + sp_digit rd[2*5]; + sp_digit td[3 * 2*5]; + sp_point_256 p; +#endif + sp_digit* e = NULL; + sp_digit* x = NULL; + sp_digit* k = NULL; + sp_digit* r = NULL; + sp_digit* tmp = NULL; + sp_point_256* point = NULL; + sp_digit carry; + sp_digit* s = NULL; + sp_digit* kInv = NULL; + int err = MP_OKAY; + int64_t c; + int i; + + (void)heap; + + err = sp_256_point_new_5(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 5, heap, + DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + e = d + 0 * 5; + x = d + 2 * 5; + k = d + 4 * 5; + r = d + 6 * 5; + tmp = d + 8 * 5; +#else + e = ed; + x = xd; + k = kd; + r = rd; + tmp = td; +#endif + s = e; + kInv = k; + + if (hashLen > 32U) { + hashLen = 32U; + } + + sp_256_from_bin(e, 5, hash, (int)hashLen); + } + + for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) { + sp_256_from_mp(x, 5, priv); + + /* New random point. */ + if (km == NULL || mp_iszero(km)) { + err = sp_256_ecc_gen_k_5(rng, k); + } + else { + sp_256_from_mp(k, 5, km); + mp_zero(km); + } + if (err == MP_OKAY) { + err = sp_256_ecc_mulmod_base_5(point, k, 1, NULL); + } + + if (err == MP_OKAY) { + /* r = point->x mod order */ + XMEMCPY(r, point->x, sizeof(sp_digit) * 5U); + sp_256_norm_5(r); + c = sp_256_cmp_5(r, p256_order); + sp_256_cond_sub_5(r, r, p256_order, 0L - (sp_digit)(c >= 0)); + sp_256_norm_5(r); + + /* Conv k to Montgomery form (mod order) */ + sp_256_mul_5(k, k, p256_norm_order); + err = sp_256_mod_5(k, k, p256_order); + } + if (err == MP_OKAY) { + sp_256_norm_5(k); + /* kInv = 1/k mod order */ + sp_256_mont_inv_order_5(kInv, k, tmp); + sp_256_norm_5(kInv); + + /* s = r * x + e */ + sp_256_mul_5(x, x, r); + err = sp_256_mod_5(x, x, p256_order); + } + if (err == MP_OKAY) { + sp_256_norm_5(x); + carry = sp_256_add_5(s, e, x); + sp_256_cond_sub_5(s, s, p256_order, 0 - carry); + sp_256_norm_5(s); + c = sp_256_cmp_5(s, p256_order); + sp_256_cond_sub_5(s, s, p256_order, 0L - (sp_digit)(c >= 0)); + sp_256_norm_5(s); + + /* s = s * k^-1 mod order */ + sp_256_mont_mul_order_5(s, s, kInv); + sp_256_norm_5(s); + + /* Check that signature is usable. */ + if (sp_256_iszero_5(s) == 0) { + break; + } + } + } + + if (i == 0) { + err = RNG_FAILURE_E; + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(r, rm); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(s, sm); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 8 * 5); + XFREE(d, heap, DYNAMIC_TYPE_ECC); + } +#else + XMEMSET(e, 0, sizeof(sp_digit) * 2U * 5U); + XMEMSET(x, 0, sizeof(sp_digit) * 2U * 5U); + XMEMSET(k, 0, sizeof(sp_digit) * 2U * 5U); + XMEMSET(r, 0, sizeof(sp_digit) * 2U * 5U); + XMEMSET(r, 0, sizeof(sp_digit) * 2U * 5U); + XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 5U); +#endif + sp_256_point_free_5(point, 1, heap); + + return err; +} +#endif /* HAVE_ECC_SIGN */ + +#ifdef HAVE_ECC_VERIFY +/* Verify the signature values with the hash and public key. + * e = Truncate(hash, 256) + * u1 = e/s mod order + * u2 = r/s mod order + * r == (u1.G + u2.Q)->x mod order + * Optimization: Leave point in projective form. + * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z') + * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' + * The hash is truncated to the first 256 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX, + mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit u1d[2*5]; + sp_digit u2d[2*5]; + sp_digit sd[2*5]; + sp_digit tmpd[2*5 * 5]; + sp_point_256 p1d; + sp_point_256 p2d; +#endif + sp_digit* u1 = NULL; + sp_digit* u2 = NULL; + sp_digit* s = NULL; + sp_digit* tmp = NULL; + sp_point_256* p1; + sp_point_256* p2 = NULL; + sp_digit carry; + int64_t c; + int err; + + err = sp_256_point_new_5(heap, p1d, p1); + if (err == MP_OKAY) { + err = sp_256_point_new_5(heap, p2d, p2); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 5, heap, + DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + u1 = d + 0 * 5; + u2 = d + 2 * 5; + s = d + 4 * 5; + tmp = d + 6 * 5; +#else + u1 = u1d; + u2 = u2d; + s = sd; + tmp = tmpd; +#endif + + if (hashLen > 32U) { + hashLen = 32U; + } + + sp_256_from_bin(u1, 5, hash, (int)hashLen); + sp_256_from_mp(u2, 5, r); + sp_256_from_mp(s, 5, sm); + sp_256_from_mp(p2->x, 5, pX); + sp_256_from_mp(p2->y, 5, pY); + sp_256_from_mp(p2->z, 5, pZ); + + { + sp_256_mul_5(s, s, p256_norm_order); + } + err = sp_256_mod_5(s, s, p256_order); + } + if (err == MP_OKAY) { + sp_256_norm_5(s); + { + sp_256_mont_inv_order_5(s, s, tmp); + sp_256_mont_mul_order_5(u1, u1, s); + sp_256_mont_mul_order_5(u2, u2, s); + } + + err = sp_256_ecc_mulmod_base_5(p1, u1, 0, heap); + } + if (err == MP_OKAY) { + err = sp_256_ecc_mulmod_5(p2, p2, u2, 0, heap); + } + + if (err == MP_OKAY) { + { + sp_256_proj_point_add_5(p1, p1, p2, tmp); + if (sp_256_iszero_5(p1->z)) { + if (sp_256_iszero_5(p1->x) && sp_256_iszero_5(p1->y)) { + sp_256_proj_point_dbl_5(p1, p2, tmp); + } + else { + /* Y ordinate is not used from here - don't set. */ + p1->x[0] = 0; + p1->x[1] = 0; + p1->x[2] = 0; + p1->x[3] = 0; + p1->x[4] = 0; + XMEMCPY(p1->z, p256_norm_mod, sizeof(p256_norm_mod)); + } + } + } + + /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ + /* Reload r and convert to Montgomery form. */ + sp_256_from_mp(u2, 5, r); + err = sp_256_mod_mul_norm_5(u2, u2, p256_mod); + } + + if (err == MP_OKAY) { + /* u1 = r.z'.z' mod prime */ + sp_256_mont_sqr_5(p1->z, p1->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(u1, u2, p1->z, p256_mod, p256_mp_mod); + *res = (int)(sp_256_cmp_5(p1->x, u1) == 0); + if (*res == 0) { + /* Reload r and add order. */ + sp_256_from_mp(u2, 5, r); + carry = sp_256_add_5(u2, u2, p256_order); + /* Carry means result is greater than mod and is not valid. */ + if (carry == 0) { + sp_256_norm_5(u2); + + /* Compare with mod and if greater or equal then not valid. */ + c = sp_256_cmp_5(u2, p256_mod); + if (c < 0) { + /* Convert to Montogomery form */ + err = sp_256_mod_mul_norm_5(u2, u2, p256_mod); + if (err == MP_OKAY) { + /* u1 = (r + 1*order).z'.z' mod prime */ + sp_256_mont_mul_5(u1, u2, p1->z, p256_mod, + p256_mp_mod); + *res = (int)(sp_256_cmp_5(p1->x, u1) == 0); + } + } + } + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) + XFREE(d, heap, DYNAMIC_TYPE_ECC); +#endif + sp_256_point_free_5(p1, 0, heap); + sp_256_point_free_5(p2, 0, heap); + + return err; +} +#endif /* HAVE_ECC_VERIFY */ + +#ifdef HAVE_ECC_CHECK_KEY +/* Check that the x and y oridinates are a valid point on the curve. + * + * point EC point. + * heap Heap to use if dynamically allocating. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +static int sp_256_ecc_is_point_5(sp_point_256* point, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit t1d[2*5]; + sp_digit t2d[2*5]; +#endif + sp_digit* t1; + sp_digit* t2; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 4, heap, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = d + 0 * 5; + t2 = d + 2 * 5; +#else + (void)heap; + + t1 = t1d; + t2 = t2d; +#endif + + sp_256_sqr_5(t1, point->y); + (void)sp_256_mod_5(t1, t1, p256_mod); + sp_256_sqr_5(t2, point->x); + (void)sp_256_mod_5(t2, t2, p256_mod); + sp_256_mul_5(t2, t2, point->x); + (void)sp_256_mod_5(t2, t2, p256_mod); + (void)sp_256_sub_5(t2, p256_mod, t2); + sp_256_mont_add_5(t1, t1, t2, p256_mod); + + sp_256_mont_add_5(t1, t1, point->x, p256_mod); + sp_256_mont_add_5(t1, t1, point->x, p256_mod); + sp_256_mont_add_5(t1, t1, point->x, p256_mod); + + if (sp_256_cmp_5(t1, p256_b) != 0) { + err = MP_VAL; + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, heap, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + +/* Check that the x and y oridinates are a valid point on the curve. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +int sp_ecc_is_point_256(mp_int* pX, mp_int* pY) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 pubd; +#endif + sp_point_256* pub; + byte one[1] = { 1 }; + int err; + + err = sp_256_point_new_5(NULL, pubd, pub); + if (err == MP_OKAY) { + sp_256_from_mp(pub->x, 5, pX); + sp_256_from_mp(pub->y, 5, pY); + sp_256_from_bin(pub->z, 5, one, (int)sizeof(one)); + + err = sp_256_ecc_is_point_5(pub, NULL); + } + + sp_256_point_free_5(pub, 0, NULL); + + return err; +} + +/* Check that the private scalar generates the EC point (px, py), the point is + * on the curve and the point has the correct order. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * privm Private scalar that generates EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve, ECC_INF_E if the point does not have the correct order, + * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and + * MP_OKAY otherwise. + */ +int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit privd[5]; + sp_point_256 pubd; + sp_point_256 pd; +#endif + sp_digit* priv = NULL; + sp_point_256* pub; + sp_point_256* p = NULL; + byte one[1] = { 1 }; + int err; + + err = sp_256_point_new_5(heap, pubd, pub); + if (err == MP_OKAY) { + err = sp_256_point_new_5(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5, heap, + DYNAMIC_TYPE_ECC); + if (priv == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + priv = privd; +#endif + + sp_256_from_mp(pub->x, 5, pX); + sp_256_from_mp(pub->y, 5, pY); + sp_256_from_bin(pub->z, 5, one, (int)sizeof(one)); + sp_256_from_mp(priv, 5, privm); + + /* Check point at infinitiy. */ + if ((sp_256_iszero_5(pub->x) != 0) && + (sp_256_iszero_5(pub->y) != 0)) { + err = ECC_INF_E; + } + } + + if (err == MP_OKAY) { + /* Check range of X and Y */ + if (sp_256_cmp_5(pub->x, p256_mod) >= 0 || + sp_256_cmp_5(pub->y, p256_mod) >= 0) { + err = ECC_OUT_OF_RANGE_E; + } + } + + if (err == MP_OKAY) { + /* Check point is on curve */ + err = sp_256_ecc_is_point_5(pub, heap); + } + + if (err == MP_OKAY) { + /* Point * order = infinity */ + err = sp_256_ecc_mulmod_5(p, pub, p256_order, 1, heap); + } + if (err == MP_OKAY) { + /* Check result is infinity */ + if ((sp_256_iszero_5(p->x) == 0) || + (sp_256_iszero_5(p->y) == 0)) { + err = ECC_INF_E; + } + } + + if (err == MP_OKAY) { + /* Base * private = point */ + err = sp_256_ecc_mulmod_base_5(p, priv, 1, heap); + } + if (err == MP_OKAY) { + /* Check result is public key */ + if (sp_256_cmp_5(p->x, pub->x) != 0 || + sp_256_cmp_5(p->y, pub->y) != 0) { + err = ECC_PRIV_KEY_E; + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (priv != NULL) { + XFREE(priv, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_5(p, 0, heap); + sp_256_point_free_5(pub, 0, heap); + + return err; +} +#endif +#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL +/* Add two projective EC points together. + * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ) + * + * pX First EC point's X ordinate. + * pY First EC point's Y ordinate. + * pZ First EC point's Z ordinate. + * qX Second EC point's X ordinate. + * qY Second EC point's Y ordinate. + * qZ Second EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* qX, mp_int* qY, mp_int* qZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 5 * 5]; + sp_point_256 pd; + sp_point_256 qd; +#endif + sp_digit* tmp; + sp_point_256* p; + sp_point_256* q = NULL; + int err; + + err = sp_256_point_new_5(NULL, pd, p); + if (err == MP_OKAY) { + err = sp_256_point_new_5(NULL, qd, q); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 5 * 5, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + sp_256_from_mp(p->x, 5, pX); + sp_256_from_mp(p->y, 5, pY); + sp_256_from_mp(p->z, 5, pZ); + sp_256_from_mp(q->x, 5, qX); + sp_256_from_mp(q->y, 5, qY); + sp_256_from_mp(q->z, 5, qZ); + + sp_256_proj_point_add_5(p, p, q, tmp); + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->z, rZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_5(q, 0, NULL); + sp_256_point_free_5(p, 0, NULL); + + return err; +} + +/* Double a projective EC point. + * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ) + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 5 * 2]; + sp_point_256 pd; +#endif + sp_digit* tmp; + sp_point_256* p; + int err; + + err = sp_256_point_new_5(NULL, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 5 * 2, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + sp_256_from_mp(p->x, 5, pX); + sp_256_from_mp(p->y, 5, pY); + sp_256_from_mp(p->z, 5, pZ); + + sp_256_proj_point_dbl_5(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->z, rZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_5(p, 0, NULL); + + return err; +} + +/* Map a projective EC point to affine in place. + * pZ will be one. + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 5 * 4]; + sp_point_256 pd; +#endif + sp_digit* tmp; + sp_point_256* p; + int err; + + err = sp_256_point_new_5(NULL, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 5 * 4, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + if (err == MP_OKAY) { + sp_256_from_mp(p->x, 5, pX); + sp_256_from_mp(p->y, 5, pY); + sp_256_from_mp(p->z, 5, pZ); + + sp_256_map_5(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(p->x, pX); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->y, pY); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->z, pZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_5(p, 0, NULL); + + return err; +} +#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */ +#ifdef HAVE_COMP_KEY +/* Find the square root of a number mod the prime of the curve. + * + * y The number to operate on and the result. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +static int sp_256_mont_sqrt_5(sp_digit* y) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d; +#else + sp_digit t1d[2 * 5]; + sp_digit t2d[2 * 5]; +#endif + sp_digit* t1; + sp_digit* t2; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 5, NULL, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = d + 0 * 5; + t2 = d + 2 * 5; +#else + t1 = t1d; + t2 = t2d; +#endif + + { + /* t2 = y ^ 0x2 */ + sp_256_mont_sqr_5(t2, y, p256_mod, p256_mp_mod); + /* t1 = y ^ 0x3 */ + sp_256_mont_mul_5(t1, t2, y, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xc */ + sp_256_mont_sqr_n_5(t2, t1, 2, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xf */ + sp_256_mont_mul_5(t1, t1, t2, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xf0 */ + sp_256_mont_sqr_n_5(t2, t1, 4, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xff */ + sp_256_mont_mul_5(t1, t1, t2, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xff00 */ + sp_256_mont_sqr_n_5(t2, t1, 8, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffff */ + sp_256_mont_mul_5(t1, t1, t2, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xffff0000 */ + sp_256_mont_sqr_n_5(t2, t1, 16, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff */ + sp_256_mont_mul_5(t1, t1, t2, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000000 */ + sp_256_mont_sqr_n_5(t1, t1, 32, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000001 */ + sp_256_mont_mul_5(t1, t1, y, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */ + sp_256_mont_sqr_n_5(t1, t1, 96, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */ + sp_256_mont_mul_5(t1, t1, y, p256_mod, p256_mp_mod); + sp_256_mont_sqr_n_5(y, t1, 94, p256_mod, p256_mp_mod); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + + +/* Uncompress the point given the X ordinate. + * + * xm X ordinate. + * odd Whether the Y ordinate is odd. + * ym Calculated Y ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d; +#else + sp_digit xd[2 * 5]; + sp_digit yd[2 * 5]; +#endif + sp_digit* x = NULL; + sp_digit* y = NULL; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 5, NULL, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + x = d + 0 * 5; + y = d + 2 * 5; +#else + x = xd; + y = yd; +#endif + + sp_256_from_mp(x, 5, xm); + err = sp_256_mod_mul_norm_5(x, x, p256_mod); + } + if (err == MP_OKAY) { + /* y = x^3 */ + { + sp_256_mont_sqr_5(y, x, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(y, y, x, p256_mod, p256_mp_mod); + } + /* y = x^3 - 3x */ + sp_256_mont_sub_5(y, y, x, p256_mod); + sp_256_mont_sub_5(y, y, x, p256_mod); + sp_256_mont_sub_5(y, y, x, p256_mod); + /* y = x^3 - 3x + b */ + err = sp_256_mod_mul_norm_5(x, p256_b, p256_mod); + } + if (err == MP_OKAY) { + sp_256_mont_add_5(y, y, x, p256_mod); + /* y = sqrt(x^3 - 3x + b) */ + err = sp_256_mont_sqrt_5(y); + } + if (err == MP_OKAY) { + XMEMSET(y + 5, 0, 5U * sizeof(sp_digit)); + sp_256_mont_reduce_5(y, p256_mod, p256_mp_mod); + if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) { + sp_256_mont_sub_5(y, p256_mod, y, p256_mod); + } + + err = sp_256_to_mp(y, ym); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} +#endif +#endif /* !WOLFSSL_SP_NO_256 */ +#ifdef WOLFSSL_SP_384 + +/* Point structure to use. */ +typedef struct sp_point_384 { + sp_digit x[2 * 7]; + sp_digit y[2 * 7]; + sp_digit z[2 * 7]; + int infinity; +} sp_point_384; + +/* The modulus (prime) of the curve P384. */ +static const sp_digit p384_mod[7] = { + 0x000000ffffffffL,0x7ffe0000000000L,0x7ffffffffbffffL,0x7fffffffffffffL, + 0x7fffffffffffffL,0x7fffffffffffffL,0x3fffffffffffffL +}; +/* The Montogmery normalizer for modulus of the curve P384. */ +static const sp_digit p384_norm_mod[7] = { + 0x7fffff00000001L,0x0001ffffffffffL,0x00000000040000L,0x00000000000000L, + 0x00000000000000L,0x00000000000000L,0x00000000000000L +}; +/* The Montogmery multiplier for modulus of the curve P384. */ +static sp_digit p384_mp_mod = 0x0000100000001; +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +/* The order of the curve P384. */ +static const sp_digit p384_order[7] = { + 0x6c196accc52973L,0x1b6491614ef5d9L,0x07d0dcb77d6068L,0x7ffffffe3b1a6cL, + 0x7fffffffffffffL,0x7fffffffffffffL,0x3fffffffffffffL +}; +#endif +/* The order of the curve P384 minus 2. */ +static const sp_digit p384_order2[7] = { + 0x6c196accc52971L,0x1b6491614ef5d9L,0x07d0dcb77d6068L,0x7ffffffe3b1a6cL, + 0x7fffffffffffffL,0x7fffffffffffffL,0x3fffffffffffffL +}; +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montogmery normalizer for order of the curve P384. */ +static const sp_digit p384_norm_order[7] = { + 0x13e695333ad68dL,0x649b6e9eb10a26L,0x782f2348829f97L,0x00000001c4e593L, + 0x00000000000000L,0x00000000000000L,0x00000000000000L +}; +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montogmery multiplier for order of the curve P384. */ +static sp_digit p384_mp_order = 0x546089e88fdc45l; +#endif +/* The base point of curve P384. */ +static const sp_point_384 p384_base = { + /* X ordinate */ + { + 0x545e3872760ab7L,0x64bb7eaa52d874L,0x020950a8e1540bL, + 0x5d3cdcc2cfba0fL,0x0ad746e1d3b628L,0x26f1d638e3de64L,0x2aa1f288afa2c1L, + 0L, 0L, 0L, 0L, 0L, 0L, 0L + }, + /* Y ordinate */ + { + 0x431d7c90ea0e5fL,0x639c3afd033af4L,0x4ed7c2e3002982L, + 0x44d0a3e74ed188L,0x2dc29f8f41dbd2L,0x0debb3d317f252L,0x0d85f792a5898bL, + 0L, 0L, 0L, 0L, 0L, 0L, 0L + }, + /* Z ordinate */ + { + 0x00000000000001L,0x00000000000000L,0x00000000000000L, + 0x00000000000000L,0x00000000000000L,0x00000000000000L,0x00000000000000L, + 0L, 0L, 0L, 0L, 0L, 0L, 0L + }, + /* infinity */ + 0 +}; +#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY) +static const sp_digit p384_b[7] = { + 0x05c8edd3ec2aefL,0x731b145da33a55L,0x3d404e1d6b1958L,0x740a089018a044L, + 0x02d19181d9c6efL,0x7c9311c0ad7c7fL,0x2ccc4be9f88fb9L +}; +#endif + +static int sp_384_point_new_ex_7(void* heap, sp_point_384* sp, sp_point_384** p) +{ + int ret = MP_OKAY; + (void)heap; +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + (void)sp; + *p = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC); +#else + *p = sp; +#endif + if (*p == NULL) { + ret = MEMORY_E; + } + return ret; +} + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +/* Allocate memory for point and return error. */ +#define sp_384_point_new_7(heap, sp, p) sp_384_point_new_ex_7((heap), NULL, &(p)) +#else +/* Set pointer to data and return no error. */ +#define sp_384_point_new_7(heap, sp, p) sp_384_point_new_ex_7((heap), &(sp), &(p)) +#endif + + +static void sp_384_point_free_7(sp_point_384* p, int clear, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +/* If valid pointer then clear point data if requested and free data. */ + if (p != NULL) { + if (clear != 0) { + XMEMSET(p, 0, sizeof(*p)); + } + XFREE(p, heap, DYNAMIC_TYPE_ECC); + } +#else +/* Clear point data if requested. */ + if (clear != 0) { + XMEMSET(p, 0, sizeof(*p)); + } +#endif + (void)heap; +} + +/* Multiply a number by Montogmery normalizer mod modulus (prime). + * + * r The resulting Montgomery form number. + * a The number to convert. + * m The modulus (prime). + * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise. + */ +static int sp_384_mod_mul_norm_7(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + int64_t* td; +#else + int64_t td[12]; + int64_t a32d[12]; +#endif + int64_t* t; + int64_t* a32; + int64_t o; + int err = MP_OKAY; + + (void)m; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + td = (int64_t*)XMALLOC(sizeof(int64_t) * 2 * 12, NULL, DYNAMIC_TYPE_ECC); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = td; + a32 = td + 12; +#else + t = td; + a32 = a32d; +#endif + + a32[0] = (sp_digit)(a[0]) & 0xffffffffL; + a32[1] = (sp_digit)(a[0] >> 32U); + a32[1] |= a[1] << 23U; + a32[1] &= 0xffffffffL; + a32[2] = (sp_digit)(a[1] >> 9U) & 0xffffffffL; + a32[3] = (sp_digit)(a[1] >> 41U); + a32[3] |= a[2] << 14U; + a32[3] &= 0xffffffffL; + a32[4] = (sp_digit)(a[2] >> 18U) & 0xffffffffL; + a32[5] = (sp_digit)(a[2] >> 50U); + a32[5] |= a[3] << 5U; + a32[5] &= 0xffffffffL; + a32[6] = (sp_digit)(a[3] >> 27U); + a32[6] |= a[4] << 28U; + a32[6] &= 0xffffffffL; + a32[7] = (sp_digit)(a[4] >> 4U) & 0xffffffffL; + a32[8] = (sp_digit)(a[4] >> 36U); + a32[8] |= a[5] << 19U; + a32[8] &= 0xffffffffL; + a32[9] = (sp_digit)(a[5] >> 13U) & 0xffffffffL; + a32[10] = (sp_digit)(a[5] >> 45U); + a32[10] |= a[6] << 10U; + a32[10] &= 0xffffffffL; + a32[11] = (sp_digit)(a[6] >> 22U) & 0xffffffffL; + + /* 1 0 0 0 0 0 0 0 1 1 0 -1 */ + t[0] = 0 + a32[0] + a32[8] + a32[9] - a32[11]; + /* -1 1 0 0 0 0 0 0 -1 0 1 1 */ + t[1] = 0 - a32[0] + a32[1] - a32[8] + a32[10] + a32[11]; + /* 0 -1 1 0 0 0 0 0 0 -1 0 1 */ + t[2] = 0 - a32[1] + a32[2] - a32[9] + a32[11]; + /* 1 0 -1 1 0 0 0 0 1 1 -1 -1 */ + t[3] = 0 + a32[0] - a32[2] + a32[3] + a32[8] + a32[9] - a32[10] - a32[11]; + /* 1 1 0 -1 1 0 0 0 1 2 1 -2 */ + t[4] = 0 + a32[0] + a32[1] - a32[3] + a32[4] + a32[8] + 2 * a32[9] + a32[10] - 2 * a32[11]; + /* 0 1 1 0 -1 1 0 0 0 1 2 1 */ + t[5] = 0 + a32[1] + a32[2] - a32[4] + a32[5] + a32[9] + 2 * a32[10] + a32[11]; + /* 0 0 1 1 0 -1 1 0 0 0 1 2 */ + t[6] = 0 + a32[2] + a32[3] - a32[5] + a32[6] + a32[10] + 2 * a32[11]; + /* 0 0 0 1 1 0 -1 1 0 0 0 1 */ + t[7] = 0 + a32[3] + a32[4] - a32[6] + a32[7] + a32[11]; + /* 0 0 0 0 1 1 0 -1 1 0 0 0 */ + t[8] = 0 + a32[4] + a32[5] - a32[7] + a32[8]; + /* 0 0 0 0 0 1 1 0 -1 1 0 0 */ + t[9] = 0 + a32[5] + a32[6] - a32[8] + a32[9]; + /* 0 0 0 0 0 0 1 1 0 -1 1 0 */ + t[10] = 0 + a32[6] + a32[7] - a32[9] + a32[10]; + /* 0 0 0 0 0 0 0 1 1 0 -1 1 */ + t[11] = 0 + a32[7] + a32[8] - a32[10] + a32[11]; + + t[1] += t[0] >> 32; t[0] &= 0xffffffff; + t[2] += t[1] >> 32; t[1] &= 0xffffffff; + t[3] += t[2] >> 32; t[2] &= 0xffffffff; + t[4] += t[3] >> 32; t[3] &= 0xffffffff; + t[5] += t[4] >> 32; t[4] &= 0xffffffff; + t[6] += t[5] >> 32; t[5] &= 0xffffffff; + t[7] += t[6] >> 32; t[6] &= 0xffffffff; + t[8] += t[7] >> 32; t[7] &= 0xffffffff; + t[9] += t[8] >> 32; t[8] &= 0xffffffff; + t[10] += t[9] >> 32; t[9] &= 0xffffffff; + t[11] += t[10] >> 32; t[10] &= 0xffffffff; + o = t[11] >> 32; t[11] &= 0xffffffff; + t[0] += o; + t[1] -= o; + t[3] += o; + t[4] += o; + t[1] += t[0] >> 32; t[0] &= 0xffffffff; + t[2] += t[1] >> 32; t[1] &= 0xffffffff; + t[3] += t[2] >> 32; t[2] &= 0xffffffff; + t[4] += t[3] >> 32; t[3] &= 0xffffffff; + t[5] += t[4] >> 32; t[4] &= 0xffffffff; + t[6] += t[5] >> 32; t[5] &= 0xffffffff; + t[7] += t[6] >> 32; t[6] &= 0xffffffff; + t[8] += t[7] >> 32; t[7] &= 0xffffffff; + t[9] += t[8] >> 32; t[8] &= 0xffffffff; + t[10] += t[9] >> 32; t[9] &= 0xffffffff; + t[11] += t[10] >> 32; t[10] &= 0xffffffff; + + r[0] = t[0]; + r[0] |= t[1] << 32U; + r[0] &= 0x7fffffffffffffLL; + r[1] = (sp_digit)(t[1] >> 23); + r[1] |= t[2] << 9U; + r[1] |= t[3] << 41U; + r[1] &= 0x7fffffffffffffLL; + r[2] = (sp_digit)(t[3] >> 14); + r[2] |= t[4] << 18U; + r[2] |= t[5] << 50U; + r[2] &= 0x7fffffffffffffLL; + r[3] = (sp_digit)(t[5] >> 5); + r[3] |= t[6] << 27U; + r[3] &= 0x7fffffffffffffLL; + r[4] = (sp_digit)(t[6] >> 28); + r[4] |= t[7] << 4U; + r[4] |= t[8] << 36U; + r[4] &= 0x7fffffffffffffLL; + r[5] = (sp_digit)(t[8] >> 19); + r[5] |= t[9] << 13U; + r[5] |= t[10] << 45U; + r[5] &= 0x7fffffffffffffLL; + r[6] = (sp_digit)(t[10] >> 10); + r[6] |= t[11] << 22U; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (td != NULL) + XFREE(td, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_384_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 55 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 55 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0x7fffffffffffffL; + s = 55U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 55U) <= (word32)DIGIT_BIT) { + s += 55U; + r[j] &= 0x7fffffffffffffL; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 55) { + r[j] &= 0x7fffffffffffffL; + if (j + 1 >= size) { + break; + } + s = 55 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Convert a point of type ecc_point to type sp_point_384. + * + * p Point of type sp_point_384 (result). + * pm Point of type ecc_point. + */ +static void sp_384_point_from_ecc_point_7(sp_point_384* p, const ecc_point* pm) +{ + XMEMSET(p->x, 0, sizeof(p->x)); + XMEMSET(p->y, 0, sizeof(p->y)); + XMEMSET(p->z, 0, sizeof(p->z)); + sp_384_from_mp(p->x, 7, pm->x); + sp_384_from_mp(p->y, 7, pm->y); + sp_384_from_mp(p->z, 7, pm->z); + p->infinity = 0; +} + +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_384_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (384 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 55 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 7); + r->used = 7; + mp_clamp(r); +#elif DIGIT_BIT < 55 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 7; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 55) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 55 - s; + } + r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 7; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 55 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 55 - s; + } + else { + s += 55; + } + } + r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Convert a point of type sp_point_384 to type ecc_point. + * + * p Point of type sp_point_384. + * pm Point of type ecc_point (result). + * returns MEMORY_E when allocation of memory in ecc_point fails otherwise + * MP_OKAY. + */ +static int sp_384_point_to_ecc_point_7(const sp_point_384* p, ecc_point* pm) +{ + int err; + + err = sp_384_to_mp(p->x, pm->x); + if (err == MP_OKAY) { + err = sp_384_to_mp(p->y, pm->y); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->z, pm->z); + } + + return err; +} + +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_384_mul_7(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i, j, k; + int128_t c; + + c = ((int128_t)a[6]) * b[6]; + r[13] = (sp_digit)(c >> 55); + c = (c & 0x7fffffffffffffL) << 55; + for (k = 11; k >= 0; k--) { + for (i = 6; i >= 0; i--) { + j = k - i; + if (j >= 7) { + break; + } + if (j < 0) { + continue; + } + + c += ((int128_t)a[i]) * b[j]; + } + r[k + 2] += c >> 110; + r[k + 1] = (c >> 55) & 0x7fffffffffffffL; + c = (c & 0x7fffffffffffffL) << 55; + } + r[0] = (sp_digit)(c >> 55); +} + +#else +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_384_mul_7(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int128_t t0 = ((int128_t)a[ 0]) * b[ 0]; + int128_t t1 = ((int128_t)a[ 0]) * b[ 1] + + ((int128_t)a[ 1]) * b[ 0]; + int128_t t2 = ((int128_t)a[ 0]) * b[ 2] + + ((int128_t)a[ 1]) * b[ 1] + + ((int128_t)a[ 2]) * b[ 0]; + int128_t t3 = ((int128_t)a[ 0]) * b[ 3] + + ((int128_t)a[ 1]) * b[ 2] + + ((int128_t)a[ 2]) * b[ 1] + + ((int128_t)a[ 3]) * b[ 0]; + int128_t t4 = ((int128_t)a[ 0]) * b[ 4] + + ((int128_t)a[ 1]) * b[ 3] + + ((int128_t)a[ 2]) * b[ 2] + + ((int128_t)a[ 3]) * b[ 1] + + ((int128_t)a[ 4]) * b[ 0]; + int128_t t5 = ((int128_t)a[ 0]) * b[ 5] + + ((int128_t)a[ 1]) * b[ 4] + + ((int128_t)a[ 2]) * b[ 3] + + ((int128_t)a[ 3]) * b[ 2] + + ((int128_t)a[ 4]) * b[ 1] + + ((int128_t)a[ 5]) * b[ 0]; + int128_t t6 = ((int128_t)a[ 0]) * b[ 6] + + ((int128_t)a[ 1]) * b[ 5] + + ((int128_t)a[ 2]) * b[ 4] + + ((int128_t)a[ 3]) * b[ 3] + + ((int128_t)a[ 4]) * b[ 2] + + ((int128_t)a[ 5]) * b[ 1] + + ((int128_t)a[ 6]) * b[ 0]; + int128_t t7 = ((int128_t)a[ 1]) * b[ 6] + + ((int128_t)a[ 2]) * b[ 5] + + ((int128_t)a[ 3]) * b[ 4] + + ((int128_t)a[ 4]) * b[ 3] + + ((int128_t)a[ 5]) * b[ 2] + + ((int128_t)a[ 6]) * b[ 1]; + int128_t t8 = ((int128_t)a[ 2]) * b[ 6] + + ((int128_t)a[ 3]) * b[ 5] + + ((int128_t)a[ 4]) * b[ 4] + + ((int128_t)a[ 5]) * b[ 3] + + ((int128_t)a[ 6]) * b[ 2]; + int128_t t9 = ((int128_t)a[ 3]) * b[ 6] + + ((int128_t)a[ 4]) * b[ 5] + + ((int128_t)a[ 5]) * b[ 4] + + ((int128_t)a[ 6]) * b[ 3]; + int128_t t10 = ((int128_t)a[ 4]) * b[ 6] + + ((int128_t)a[ 5]) * b[ 5] + + ((int128_t)a[ 6]) * b[ 4]; + int128_t t11 = ((int128_t)a[ 5]) * b[ 6] + + ((int128_t)a[ 6]) * b[ 5]; + int128_t t12 = ((int128_t)a[ 6]) * b[ 6]; + + t1 += t0 >> 55; r[ 0] = t0 & 0x7fffffffffffffL; + t2 += t1 >> 55; r[ 1] = t1 & 0x7fffffffffffffL; + t3 += t2 >> 55; r[ 2] = t2 & 0x7fffffffffffffL; + t4 += t3 >> 55; r[ 3] = t3 & 0x7fffffffffffffL; + t5 += t4 >> 55; r[ 4] = t4 & 0x7fffffffffffffL; + t6 += t5 >> 55; r[ 5] = t5 & 0x7fffffffffffffL; + t7 += t6 >> 55; r[ 6] = t6 & 0x7fffffffffffffL; + t8 += t7 >> 55; r[ 7] = t7 & 0x7fffffffffffffL; + t9 += t8 >> 55; r[ 8] = t8 & 0x7fffffffffffffL; + t10 += t9 >> 55; r[ 9] = t9 & 0x7fffffffffffffL; + t11 += t10 >> 55; r[10] = t10 & 0x7fffffffffffffL; + t12 += t11 >> 55; r[11] = t11 & 0x7fffffffffffffL; + r[13] = (sp_digit)(t12 >> 55); + r[12] = t12 & 0x7fffffffffffffL; +} + +#endif /* WOLFSSL_SP_SMALL */ +#define sp_384_mont_reduce_order_7 sp_384_mont_reduce_7 + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static sp_digit sp_384_cmp_7(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=6; i>=0; i--) { + r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#else + r |= (a[ 6] - b[ 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 5] - b[ 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 4] - b[ 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 3] - b[ 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 2] - b[ 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 1] - b[ 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 0] - b[ 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); +#endif /* WOLFSSL_SP_SMALL */ + + return r; +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static void sp_384_cond_sub_7(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 7; i++) { + r[i] = a[i] - (b[i] & m); + } +#else + r[ 0] = a[ 0] - (b[ 0] & m); + r[ 1] = a[ 1] - (b[ 1] & m); + r[ 2] = a[ 2] - (b[ 2] & m); + r[ 3] = a[ 3] - (b[ 3] & m); + r[ 4] = a[ 4] - (b[ 4] & m); + r[ 5] = a[ 5] - (b[ 5] & m); + r[ 6] = a[ 6] - (b[ 6] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Mul a by scalar b and add into r. (r += a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_384_mul_add_7(sp_digit* r, const sp_digit* a, + const sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int128_t tb = b; + int128_t t = 0; + int i; + + for (i = 0; i < 7; i++) { + t += (tb * a[i]) + r[i]; + r[i] = t & 0x7fffffffffffffL; + t >>= 55; + } + r[7] += t; +#else + int128_t tb = b; + int128_t t[7]; + + t[ 0] = tb * a[ 0]; + t[ 1] = tb * a[ 1]; + t[ 2] = tb * a[ 2]; + t[ 3] = tb * a[ 3]; + t[ 4] = tb * a[ 4]; + t[ 5] = tb * a[ 5]; + t[ 6] = tb * a[ 6]; + r[ 0] += (sp_digit) (t[ 0] & 0x7fffffffffffffL); + r[ 1] += (sp_digit)((t[ 0] >> 55) + (t[ 1] & 0x7fffffffffffffL)); + r[ 2] += (sp_digit)((t[ 1] >> 55) + (t[ 2] & 0x7fffffffffffffL)); + r[ 3] += (sp_digit)((t[ 2] >> 55) + (t[ 3] & 0x7fffffffffffffL)); + r[ 4] += (sp_digit)((t[ 3] >> 55) + (t[ 4] & 0x7fffffffffffffL)); + r[ 5] += (sp_digit)((t[ 4] >> 55) + (t[ 5] & 0x7fffffffffffffL)); + r[ 6] += (sp_digit)((t[ 5] >> 55) + (t[ 6] & 0x7fffffffffffffL)); + r[ 7] += (sp_digit) (t[ 6] >> 55); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Normalize the values in each word to 55. + * + * a Array of sp_digit to normalize. + */ +static void sp_384_norm_7(sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + for (i = 0; i < 6; i++) { + a[i+1] += a[i] >> 55; + a[i] &= 0x7fffffffffffffL; + } +#else + a[1] += a[0] >> 55; a[0] &= 0x7fffffffffffffL; + a[2] += a[1] >> 55; a[1] &= 0x7fffffffffffffL; + a[3] += a[2] >> 55; a[2] &= 0x7fffffffffffffL; + a[4] += a[3] >> 55; a[3] &= 0x7fffffffffffffL; + a[5] += a[4] >> 55; a[4] &= 0x7fffffffffffffL; + a[6] += a[5] >> 55; a[5] &= 0x7fffffffffffffL; +#endif +} + +/* Shift the result in the high 384 bits down to the bottom. + * + * r A single precision number. + * a A single precision number. + */ +static void sp_384_mont_shift_7(sp_digit* r, const sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + word64 n; + + n = a[6] >> 54; + for (i = 0; i < 6; i++) { + n += (word64)a[7 + i] << 1; + r[i] = n & 0x7fffffffffffffL; + n >>= 55; + } + n += (word64)a[13] << 1; + r[6] = n; +#else + word64 n; + + n = a[6] >> 54; + n += (word64)a[ 7] << 1U; r[ 0] = n & 0x7fffffffffffffUL; n >>= 55U; + n += (word64)a[ 8] << 1U; r[ 1] = n & 0x7fffffffffffffUL; n >>= 55U; + n += (word64)a[ 9] << 1U; r[ 2] = n & 0x7fffffffffffffUL; n >>= 55U; + n += (word64)a[10] << 1U; r[ 3] = n & 0x7fffffffffffffUL; n >>= 55U; + n += (word64)a[11] << 1U; r[ 4] = n & 0x7fffffffffffffUL; n >>= 55U; + n += (word64)a[12] << 1U; r[ 5] = n & 0x7fffffffffffffUL; n >>= 55U; + n += (word64)a[13] << 1U; r[ 6] = n; +#endif /* WOLFSSL_SP_SMALL */ + XMEMSET(&r[7], 0, sizeof(*r) * 7U); +} + +/* Reduce the number back to 384 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static void sp_384_mont_reduce_7(sp_digit* a, const sp_digit* m, sp_digit mp) +{ + int i; + sp_digit mu; + + sp_384_norm_7(a + 7); + + for (i=0; i<6; i++) { + mu = (a[i] * mp) & 0x7fffffffffffffL; + sp_384_mul_add_7(a+i, m, mu); + a[i+1] += a[i] >> 55; + } + mu = (a[i] * mp) & 0x3fffffffffffffL; + sp_384_mul_add_7(a+i, m, mu); + a[i+1] += a[i] >> 55; + a[i] &= 0x7fffffffffffffL; + + sp_384_mont_shift_7(a, a); + sp_384_cond_sub_7(a, a, m, 0 - (((a[6] >> 54) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_384_norm_7(a); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_384_mont_mul_7(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_384_mul_7(r, a, b); + sp_384_mont_reduce_7(r, m, mp); +} + +#ifdef WOLFSSL_SP_SMALL +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_384_sqr_7(sp_digit* r, const sp_digit* a) +{ + int i, j, k; + int128_t c; + + c = ((int128_t)a[6]) * a[6]; + r[13] = (sp_digit)(c >> 55); + c = (c & 0x7fffffffffffffL) << 55; + for (k = 11; k >= 0; k--) { + for (i = 6; i >= 0; i--) { + j = k - i; + if (j >= 7 || i <= j) { + break; + } + if (j < 0) { + continue; + } + + c += ((int128_t)a[i]) * a[j] * 2; + } + if (i == j) { + c += ((int128_t)a[i]) * a[i]; + } + + r[k + 2] += c >> 110; + r[k + 1] = (c >> 55) & 0x7fffffffffffffL; + c = (c & 0x7fffffffffffffL) << 55; + } + r[0] = (sp_digit)(c >> 55); +} + +#else +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_384_sqr_7(sp_digit* r, const sp_digit* a) +{ + int128_t t0 = ((int128_t)a[ 0]) * a[ 0]; + int128_t t1 = (((int128_t)a[ 0]) * a[ 1]) * 2; + int128_t t2 = (((int128_t)a[ 0]) * a[ 2]) * 2 + + ((int128_t)a[ 1]) * a[ 1]; + int128_t t3 = (((int128_t)a[ 0]) * a[ 3] + + ((int128_t)a[ 1]) * a[ 2]) * 2; + int128_t t4 = (((int128_t)a[ 0]) * a[ 4] + + ((int128_t)a[ 1]) * a[ 3]) * 2 + + ((int128_t)a[ 2]) * a[ 2]; + int128_t t5 = (((int128_t)a[ 0]) * a[ 5] + + ((int128_t)a[ 1]) * a[ 4] + + ((int128_t)a[ 2]) * a[ 3]) * 2; + int128_t t6 = (((int128_t)a[ 0]) * a[ 6] + + ((int128_t)a[ 1]) * a[ 5] + + ((int128_t)a[ 2]) * a[ 4]) * 2 + + ((int128_t)a[ 3]) * a[ 3]; + int128_t t7 = (((int128_t)a[ 1]) * a[ 6] + + ((int128_t)a[ 2]) * a[ 5] + + ((int128_t)a[ 3]) * a[ 4]) * 2; + int128_t t8 = (((int128_t)a[ 2]) * a[ 6] + + ((int128_t)a[ 3]) * a[ 5]) * 2 + + ((int128_t)a[ 4]) * a[ 4]; + int128_t t9 = (((int128_t)a[ 3]) * a[ 6] + + ((int128_t)a[ 4]) * a[ 5]) * 2; + int128_t t10 = (((int128_t)a[ 4]) * a[ 6]) * 2 + + ((int128_t)a[ 5]) * a[ 5]; + int128_t t11 = (((int128_t)a[ 5]) * a[ 6]) * 2; + int128_t t12 = ((int128_t)a[ 6]) * a[ 6]; + + t1 += t0 >> 55; r[ 0] = t0 & 0x7fffffffffffffL; + t2 += t1 >> 55; r[ 1] = t1 & 0x7fffffffffffffL; + t3 += t2 >> 55; r[ 2] = t2 & 0x7fffffffffffffL; + t4 += t3 >> 55; r[ 3] = t3 & 0x7fffffffffffffL; + t5 += t4 >> 55; r[ 4] = t4 & 0x7fffffffffffffL; + t6 += t5 >> 55; r[ 5] = t5 & 0x7fffffffffffffL; + t7 += t6 >> 55; r[ 6] = t6 & 0x7fffffffffffffL; + t8 += t7 >> 55; r[ 7] = t7 & 0x7fffffffffffffL; + t9 += t8 >> 55; r[ 8] = t8 & 0x7fffffffffffffL; + t10 += t9 >> 55; r[ 9] = t9 & 0x7fffffffffffffL; + t11 += t10 >> 55; r[10] = t10 & 0x7fffffffffffffL; + t12 += t11 >> 55; r[11] = t11 & 0x7fffffffffffffL; + r[13] = (sp_digit)(t12 >> 55); + r[12] = t12 & 0x7fffffffffffffL; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_384_mont_sqr_7(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_384_sqr_7(r, a); + sp_384_mont_reduce_7(r, m, mp); +} + +#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY) +/* Square the Montgomery form number a number of times. (r = a ^ n mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * n Number of times to square. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_384_mont_sqr_n_7(sp_digit* r, const sp_digit* a, int n, + const sp_digit* m, sp_digit mp) +{ + sp_384_mont_sqr_7(r, a, m, mp); + for (; n > 1; n--) { + sp_384_mont_sqr_7(r, r, m, mp); + } +} + +#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */ +#ifdef WOLFSSL_SP_SMALL +/* Mod-2 for the P384 curve. */ +static const uint64_t p384_mod_minus_2[6] = { + 0x00000000fffffffdU,0xffffffff00000000U,0xfffffffffffffffeU, + 0xffffffffffffffffU,0xffffffffffffffffU,0xffffffffffffffffU +}; +#endif /* !WOLFSSL_SP_SMALL */ + +/* Invert the number, in Montgomery form, modulo the modulus (prime) of the + * P384 curve. (r = 1 / a mod m) + * + * r Inverse result. + * a Number to invert. + * td Temporary data. + */ +static void sp_384_mont_inv_7(sp_digit* r, const sp_digit* a, sp_digit* td) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* t = td; + int i; + + XMEMCPY(t, a, sizeof(sp_digit) * 7); + for (i=382; i>=0; i--) { + sp_384_mont_sqr_7(t, t, p384_mod, p384_mp_mod); + if (p384_mod_minus_2[i / 64] & ((sp_digit)1 << (i % 64))) + sp_384_mont_mul_7(t, t, a, p384_mod, p384_mp_mod); + } + XMEMCPY(r, t, sizeof(sp_digit) * 7); +#else + sp_digit* t1 = td; + sp_digit* t2 = td + 2 * 7; + sp_digit* t3 = td + 4 * 7; + sp_digit* t4 = td + 6 * 7; + sp_digit* t5 = td + 8 * 7; + + /* 0x2 */ + sp_384_mont_sqr_7(t1, a, p384_mod, p384_mp_mod); + /* 0x3 */ + sp_384_mont_mul_7(t5, t1, a, p384_mod, p384_mp_mod); + /* 0xc */ + sp_384_mont_sqr_n_7(t1, t5, 2, p384_mod, p384_mp_mod); + /* 0xf */ + sp_384_mont_mul_7(t2, t5, t1, p384_mod, p384_mp_mod); + /* 0x1e */ + sp_384_mont_sqr_7(t1, t2, p384_mod, p384_mp_mod); + /* 0x1f */ + sp_384_mont_mul_7(t4, t1, a, p384_mod, p384_mp_mod); + /* 0x3e0 */ + sp_384_mont_sqr_n_7(t1, t4, 5, p384_mod, p384_mp_mod); + /* 0x3ff */ + sp_384_mont_mul_7(t2, t4, t1, p384_mod, p384_mp_mod); + /* 0x7fe0 */ + sp_384_mont_sqr_n_7(t1, t2, 5, p384_mod, p384_mp_mod); + /* 0x7fff */ + sp_384_mont_mul_7(t4, t4, t1, p384_mod, p384_mp_mod); + /* 0x3fff8000 */ + sp_384_mont_sqr_n_7(t1, t4, 15, p384_mod, p384_mp_mod); + /* 0x3fffffff */ + sp_384_mont_mul_7(t2, t4, t1, p384_mod, p384_mp_mod); + /* 0xfffffffc */ + sp_384_mont_sqr_n_7(t3, t2, 2, p384_mod, p384_mp_mod); + /* 0xfffffffd */ + sp_384_mont_mul_7(r, t3, a, p384_mod, p384_mp_mod); + /* 0xffffffff */ + sp_384_mont_mul_7(t3, t5, t3, p384_mod, p384_mp_mod); + /* 0xfffffffc0000000 */ + sp_384_mont_sqr_n_7(t1, t2, 30, p384_mod, p384_mp_mod); + /* 0xfffffffffffffff */ + sp_384_mont_mul_7(t2, t2, t1, p384_mod, p384_mp_mod); + /* 0xfffffffffffffff000000000000000 */ + sp_384_mont_sqr_n_7(t1, t2, 60, p384_mod, p384_mp_mod); + /* 0xffffffffffffffffffffffffffffff */ + sp_384_mont_mul_7(t2, t2, t1, p384_mod, p384_mp_mod); + /* 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */ + sp_384_mont_sqr_n_7(t1, t2, 120, p384_mod, p384_mp_mod); + /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_384_mont_mul_7(t2, t2, t1, p384_mod, p384_mp_mod); + /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */ + sp_384_mont_sqr_n_7(t1, t2, 15, p384_mod, p384_mp_mod); + /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_384_mont_mul_7(t2, t4, t1, p384_mod, p384_mp_mod); + /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000 */ + sp_384_mont_sqr_n_7(t1, t2, 33, p384_mod, p384_mp_mod); + /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff */ + sp_384_mont_mul_7(t2, t3, t1, p384_mod, p384_mp_mod); + /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff000000000000000000000000 */ + sp_384_mont_sqr_n_7(t1, t2, 96, p384_mod, p384_mp_mod); + /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000fffffffd */ + sp_384_mont_mul_7(r, r, t1, p384_mod, p384_mp_mod); + +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Map the Montgomery form projective coordinate point to an affine point. + * + * r Resulting affine coordinate point. + * p Montgomery form projective coordinate point. + * t Temporary ordinate data. + */ +static void sp_384_map_7(sp_point_384* r, const sp_point_384* p, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*7; + int64_t n; + + sp_384_mont_inv_7(t1, p->z, t + 2*7); + + sp_384_mont_sqr_7(t2, t1, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(t1, t2, t1, p384_mod, p384_mp_mod); + + /* x /= z^2 */ + sp_384_mont_mul_7(r->x, p->x, t2, p384_mod, p384_mp_mod); + XMEMSET(r->x + 7, 0, sizeof(r->x) / 2U); + sp_384_mont_reduce_7(r->x, p384_mod, p384_mp_mod); + /* Reduce x to less than modulus */ + n = sp_384_cmp_7(r->x, p384_mod); + sp_384_cond_sub_7(r->x, r->x, p384_mod, 0 - ((n >= 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_384_norm_7(r->x); + + /* y /= z^3 */ + sp_384_mont_mul_7(r->y, p->y, t1, p384_mod, p384_mp_mod); + XMEMSET(r->y + 7, 0, sizeof(r->y) / 2U); + sp_384_mont_reduce_7(r->y, p384_mod, p384_mp_mod); + /* Reduce y to less than modulus */ + n = sp_384_cmp_7(r->y, p384_mod); + sp_384_cond_sub_7(r->y, r->y, p384_mod, 0 - ((n >= 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_384_norm_7(r->y); + + XMEMSET(r->z, 0, sizeof(r->z)); + r->z[0] = 1; + +} + +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_384_add_7(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 7; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#else +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_384_add_7(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + r[ 0] = a[ 0] + b[ 0]; + r[ 1] = a[ 1] + b[ 1]; + r[ 2] = a[ 2] + b[ 2]; + r[ 3] = a[ 3] + b[ 3]; + r[ 4] = a[ 4] + b[ 4]; + r[ 5] = a[ 5] + b[ 5]; + r[ 6] = a[ 6] + b[ 6]; + + return 0; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Add two Montgomery form numbers (r = a + b % m). + * + * r Result of addition. + * a First number to add in Montogmery form. + * b Second number to add in Montogmery form. + * m Modulus (prime). + */ +static void sp_384_mont_add_7(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m) +{ + (void)sp_384_add_7(r, a, b); + sp_384_norm_7(r); + sp_384_cond_sub_7(r, r, m, 0 - (((r[6] >> 54) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_384_norm_7(r); +} + +/* Double a Montgomery form number (r = a + a % m). + * + * r Result of doubling. + * a Number to double in Montogmery form. + * m Modulus (prime). + */ +static void sp_384_mont_dbl_7(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + (void)sp_384_add_7(r, a, a); + sp_384_norm_7(r); + sp_384_cond_sub_7(r, r, m, 0 - (((r[6] >> 54) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_384_norm_7(r); +} + +/* Triple a Montgomery form number (r = a + a + a % m). + * + * r Result of Tripling. + * a Number to triple in Montogmery form. + * m Modulus (prime). + */ +static void sp_384_mont_tpl_7(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + (void)sp_384_add_7(r, a, a); + sp_384_norm_7(r); + sp_384_cond_sub_7(r, r, m, 0 - (((r[6] >> 54) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_384_norm_7(r); + (void)sp_384_add_7(r, r, a); + sp_384_norm_7(r); + sp_384_cond_sub_7(r, r, m, 0 - (((r[6] >> 54) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_384_norm_7(r); +} + +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_384_sub_7(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 7; i++) { + r[i] = a[i] - b[i]; + } + + return 0; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_384_sub_7(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + r[ 0] = a[ 0] - b[ 0]; + r[ 1] = a[ 1] - b[ 1]; + r[ 2] = a[ 2] - b[ 2]; + r[ 3] = a[ 3] - b[ 3]; + r[ 4] = a[ 4] - b[ 4]; + r[ 5] = a[ 5] - b[ 5]; + r[ 6] = a[ 6] - b[ 6]; + + return 0; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static void sp_384_cond_add_7(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 7; i++) { + r[i] = a[i] + (b[i] & m); + } +#else + r[ 0] = a[ 0] + (b[ 0] & m); + r[ 1] = a[ 1] + (b[ 1] & m); + r[ 2] = a[ 2] + (b[ 2] & m); + r[ 3] = a[ 3] + (b[ 3] & m); + r[ 4] = a[ 4] + (b[ 4] & m); + r[ 5] = a[ 5] + (b[ 5] & m); + r[ 6] = a[ 6] + (b[ 6] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Subtract two Montgomery form numbers (r = a - b % m). + * + * r Result of subtration. + * a Number to subtract from in Montogmery form. + * b Number to subtract with in Montogmery form. + * m Modulus (prime). + */ +static void sp_384_mont_sub_7(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m) +{ + (void)sp_384_sub_7(r, a, b); + sp_384_cond_add_7(r, r, m, r[6] >> 54); + sp_384_norm_7(r); +} + +/* Shift number left one bit. + * Bottom bit is lost. + * + * r Result of shift. + * a Number to shift. + */ +SP_NOINLINE static void sp_384_rshift1_7(sp_digit* r, sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<6; i++) { + r[i] = ((a[i] >> 1) | (a[i + 1] << 54)) & 0x7fffffffffffffL; + } +#else + r[0] = ((a[0] >> 1) | (a[1] << 54)) & 0x7fffffffffffffL; + r[1] = ((a[1] >> 1) | (a[2] << 54)) & 0x7fffffffffffffL; + r[2] = ((a[2] >> 1) | (a[3] << 54)) & 0x7fffffffffffffL; + r[3] = ((a[3] >> 1) | (a[4] << 54)) & 0x7fffffffffffffL; + r[4] = ((a[4] >> 1) | (a[5] << 54)) & 0x7fffffffffffffL; + r[5] = ((a[5] >> 1) | (a[6] << 54)) & 0x7fffffffffffffL; +#endif + r[6] = a[6] >> 1; +} + +/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) + * + * r Result of division by 2. + * a Number to divide. + * m Modulus (prime). + */ +static void sp_384_div2_7(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + sp_384_cond_add_7(r, a, m, 0 - (a[0] & 1)); + sp_384_norm_7(r); + sp_384_rshift1_7(r, r); +} + +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static void sp_384_proj_point_dbl_7(sp_point_384* r, const sp_point_384* p, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*7; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_384_mont_sqr_7(t1, p->z, p384_mod, p384_mp_mod); + /* Z = Y * Z */ + sp_384_mont_mul_7(z, p->y, p->z, p384_mod, p384_mp_mod); + /* Z = 2Z */ + sp_384_mont_dbl_7(z, z, p384_mod); + /* T2 = X - T1 */ + sp_384_mont_sub_7(t2, p->x, t1, p384_mod); + /* T1 = X + T1 */ + sp_384_mont_add_7(t1, p->x, t1, p384_mod); + /* T2 = T1 * T2 */ + sp_384_mont_mul_7(t2, t1, t2, p384_mod, p384_mp_mod); + /* T1 = 3T2 */ + sp_384_mont_tpl_7(t1, t2, p384_mod); + /* Y = 2Y */ + sp_384_mont_dbl_7(y, p->y, p384_mod); + /* Y = Y * Y */ + sp_384_mont_sqr_7(y, y, p384_mod, p384_mp_mod); + /* T2 = Y * Y */ + sp_384_mont_sqr_7(t2, y, p384_mod, p384_mp_mod); + /* T2 = T2/2 */ + sp_384_div2_7(t2, t2, p384_mod); + /* Y = Y * X */ + sp_384_mont_mul_7(y, y, p->x, p384_mod, p384_mp_mod); + /* X = T1 * T1 */ + sp_384_mont_sqr_7(x, t1, p384_mod, p384_mp_mod); + /* X = X - Y */ + sp_384_mont_sub_7(x, x, y, p384_mod); + /* X = X - Y */ + sp_384_mont_sub_7(x, x, y, p384_mod); + /* Y = Y - X */ + sp_384_mont_sub_7(y, y, x, p384_mod); + /* Y = Y * T1 */ + sp_384_mont_mul_7(y, y, t1, p384_mod, p384_mp_mod); + /* Y = Y - T2 */ + sp_384_mont_sub_7(y, y, t2, p384_mod); +} + +/* Compare two numbers to determine if they are equal. + * Constant time implementation. + * + * a First number to compare. + * b Second number to compare. + * returns 1 when equal and 0 otherwise. + */ +static int sp_384_cmp_equal_7(const sp_digit* a, const sp_digit* b) +{ + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) | + (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6])) == 0; +} + +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_384_proj_point_add_7(sp_point_384* r, const sp_point_384* p, const sp_point_384* q, + sp_digit* t) +{ + const sp_point_384* ap[2]; + sp_point_384* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*7; + sp_digit* t3 = t + 4*7; + sp_digit* t4 = t + 6*7; + sp_digit* t5 = t + 8*7; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* Ensure only the first point is the same as the result. */ + if (q == r) { + const sp_point_384* a = p; + p = q; + q = a; + } + + /* Check double */ + (void)sp_384_sub_7(t1, p384_mod, q->y); + sp_384_norm_7(t1); + if ((sp_384_cmp_equal_7(p->x, q->x) & sp_384_cmp_equal_7(p->z, q->z) & + (sp_384_cmp_equal_7(p->y, q->y) | sp_384_cmp_equal_7(p->y, t1))) != 0) { + sp_384_proj_point_dbl_7(r, p, t); + } + else { + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point_384)); + x = rp[p->infinity | q->infinity]->x; + y = rp[p->infinity | q->infinity]->y; + z = rp[p->infinity | q->infinity]->z; + + ap[0] = p; + ap[1] = q; + for (i=0; i<7; i++) { + r->x[i] = ap[p->infinity]->x[i]; + } + for (i=0; i<7; i++) { + r->y[i] = ap[p->infinity]->y[i]; + } + for (i=0; i<7; i++) { + r->z[i] = ap[p->infinity]->z[i]; + } + r->infinity = ap[p->infinity]->infinity; + + /* U1 = X1*Z2^2 */ + sp_384_mont_sqr_7(t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(t3, t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(t1, t1, x, p384_mod, p384_mp_mod); + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_7(t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(t4, t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_7(t3, t3, y, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_7(t4, t4, q->y, p384_mod, p384_mp_mod); + /* H = U2 - U1 */ + sp_384_mont_sub_7(t2, t2, t1, p384_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_7(t4, t4, t3, p384_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_7(z, z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(z, z, t2, p384_mod, p384_mp_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_7(x, t4, p384_mod, p384_mp_mod); + sp_384_mont_sqr_7(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(y, t1, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(t5, t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_sub_7(x, x, t5, p384_mod); + sp_384_mont_dbl_7(t1, y, p384_mod); + sp_384_mont_sub_7(x, x, t1, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_7(y, y, x, p384_mod); + sp_384_mont_mul_7(y, y, t4, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(t5, t5, t3, p384_mod, p384_mp_mod); + sp_384_mont_sub_7(y, y, t5, p384_mod); + } +} + +#ifdef WOLFSSL_SP_SMALL +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_7(sp_point_384* r, const sp_point_384* g, const sp_digit* k, + int map, void* heap) +{ +#ifdef WOLFSSL_SP_NO_MALLOC + sp_point_384 t[3]; + sp_digit tmp[2 * 7 * 6]; +#else + sp_point_384* t; + sp_digit* tmp; +#endif + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + + (void)heap; + +#ifndef WOLFSSL_SP_NO_MALLOC + t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 3, heap, DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 7 * 6, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; +#endif + + if (err == MP_OKAY) { + XMEMSET(t, 0, sizeof(sp_point_384) * 3); + + /* t[0] = {0, 0, 1} * norm */ + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + err = sp_384_mod_mul_norm_7(t[1].x, g->x, p384_mod); + } + if (err == MP_OKAY) + err = sp_384_mod_mul_norm_7(t[1].y, g->y, p384_mod); + if (err == MP_OKAY) + err = sp_384_mod_mul_norm_7(t[1].z, g->z, p384_mod); + + if (err == MP_OKAY) { + i = 6; + c = 54; + n = k[i--] << (55 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) + break; + + n = k[i--]; + c = 55; + } + + y = (n >> 54) & 1; + n <<= 1; + + sp_384_proj_point_add_7(&t[y^1], &t[0], &t[1], tmp); + + XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) + + ((size_t)&t[1] & addr_mask[y])), + sizeof(sp_point_384)); + sp_384_proj_point_dbl_7(&t[2], &t[2], tmp); + XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) + + ((size_t)&t[1] & addr_mask[y])), &t[2], + sizeof(sp_point_384)); + } + + if (map != 0) { + sp_384_map_7(r, &t[0], tmp); + } + else { + XMEMCPY(r, &t[0], sizeof(sp_point_384)); + } + } + +#ifndef WOLFSSL_SP_NO_MALLOC + if (tmp != NULL) { + XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 7 * 6); + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_point_384) * 3); + XFREE(t, NULL, DYNAMIC_TYPE_ECC); + } +#else + ForceZero(tmp, sizeof(tmp)); + ForceZero(t, sizeof(t)); +#endif + + return err; +} + +#elif defined(WOLFSSL_SP_CACHE_RESISTANT) +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_7(sp_point_384* r, const sp_point_384* g, const sp_digit* k, + int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 t[3]; + sp_digit tmp[2 * 7 * 6]; +#else + sp_point_384* t; + sp_digit* tmp; +#endif + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + + (void)heap; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_point*)XMALLOC(sizeof(*t) * 3, heap, DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 7 * 6, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; +#endif + + if (err == MP_OKAY) { + /* t[0] = {0, 0, 1} * norm */ + XMEMSET(&t[0], 0, sizeof(t[0])); + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + t[1].infinity = 0; + err = sp_384_mod_mul_norm_7(t[1].x, g->x, p384_mod); + } + if (err == MP_OKAY) + err = sp_384_mod_mul_norm_7(t[1].y, g->y, p384_mod); + if (err == MP_OKAY) + err = sp_384_mod_mul_norm_7(t[1].z, g->z, p384_mod); + + if (err == MP_OKAY) { + i = 6; + c = 54; + n = k[i--] << (55 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) + break; + + n = k[i--]; + c = 55; + } + + y = (n >> 54) & 1; + n <<= 1; + + sp_384_proj_point_add_7(&t[y^1], &t[0], &t[1], tmp); + + XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) + + ((size_t)&t[1] & addr_mask[y])), sizeof(t[2])); + sp_384_proj_point_dbl_7(&t[2], &t[2], tmp); + XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) + + ((size_t)&t[1] & addr_mask[y])), &t[2], sizeof(t[2])); + } + + if (map != 0) { + sp_384_map_7(r, &t[0], tmp); + } + else { + XMEMCPY(r, &t[0], sizeof(sp_point_384)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 7 * 6); + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); + } + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_point_384) * 3); + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#else + ForceZero(tmp, sizeof(tmp)); + ForceZero(t, sizeof(t)); +#endif + + return err; +} + +#else +/* A table entry for pre-computed points. */ +typedef struct sp_table_entry_384 { + sp_digit x[7]; + sp_digit y[7]; +} sp_table_entry_384; + +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_fast_7(sp_point_384* r, const sp_point_384* g, const sp_digit* k, + int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 td[16]; + sp_point_384 rtd; + sp_digit tmpd[2 * 7 * 6]; +#endif + sp_point_384* t; + sp_point_384* rt; + sp_digit* tmp; + sp_digit n; + int i; + int c, y; + int err; + + (void)heap; + + err = sp_384_point_new_7(heap, rtd, rt); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 16, heap, DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 7 * 6, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; +#else + t = td; + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + /* t[0] = {0, 0, 1} * norm */ + XMEMSET(&t[0], 0, sizeof(t[0])); + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + (void)sp_384_mod_mul_norm_7(t[1].x, g->x, p384_mod); + (void)sp_384_mod_mul_norm_7(t[1].y, g->y, p384_mod); + (void)sp_384_mod_mul_norm_7(t[1].z, g->z, p384_mod); + t[1].infinity = 0; + sp_384_proj_point_dbl_7(&t[ 2], &t[ 1], tmp); + t[ 2].infinity = 0; + sp_384_proj_point_add_7(&t[ 3], &t[ 2], &t[ 1], tmp); + t[ 3].infinity = 0; + sp_384_proj_point_dbl_7(&t[ 4], &t[ 2], tmp); + t[ 4].infinity = 0; + sp_384_proj_point_add_7(&t[ 5], &t[ 3], &t[ 2], tmp); + t[ 5].infinity = 0; + sp_384_proj_point_dbl_7(&t[ 6], &t[ 3], tmp); + t[ 6].infinity = 0; + sp_384_proj_point_add_7(&t[ 7], &t[ 4], &t[ 3], tmp); + t[ 7].infinity = 0; + sp_384_proj_point_dbl_7(&t[ 8], &t[ 4], tmp); + t[ 8].infinity = 0; + sp_384_proj_point_add_7(&t[ 9], &t[ 5], &t[ 4], tmp); + t[ 9].infinity = 0; + sp_384_proj_point_dbl_7(&t[10], &t[ 5], tmp); + t[10].infinity = 0; + sp_384_proj_point_add_7(&t[11], &t[ 6], &t[ 5], tmp); + t[11].infinity = 0; + sp_384_proj_point_dbl_7(&t[12], &t[ 6], tmp); + t[12].infinity = 0; + sp_384_proj_point_add_7(&t[13], &t[ 7], &t[ 6], tmp); + t[13].infinity = 0; + sp_384_proj_point_dbl_7(&t[14], &t[ 7], tmp); + t[14].infinity = 0; + sp_384_proj_point_add_7(&t[15], &t[ 8], &t[ 7], tmp); + t[15].infinity = 0; + + i = 5; + n = k[i+1] << 9; + c = 50; + y = n >> 59; + XMEMCPY(rt, &t[y], sizeof(sp_point_384)); + n <<= 5; + for (; i>=0 || c>=4; ) { + if (c < 4) { + n |= k[i--] << (9 - c); + c += 55; + } + y = (n >> 60) & 0xf; + n <<= 4; + c -= 4; + + sp_384_proj_point_dbl_7(rt, rt, tmp); + sp_384_proj_point_dbl_7(rt, rt, tmp); + sp_384_proj_point_dbl_7(rt, rt, tmp); + sp_384_proj_point_dbl_7(rt, rt, tmp); + + sp_384_proj_point_add_7(rt, rt, &t[y], tmp); + } + + if (map != 0) { + sp_384_map_7(r, rt, tmp); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_384)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 7 * 6); + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); + } + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_point_384) * 16); + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#else + ForceZero(tmpd, sizeof(tmpd)); + ForceZero(td, sizeof(td)); +#endif + sp_384_point_free_7(rt, 1, heap); + + return err; +} + +#ifdef FP_ECC +/* Double the Montgomery form projective point p a number of times. + * + * r Result of repeated doubling of point. + * p Point to double. + * n Number of times to double + * t Temporary ordinate data. + */ +static void sp_384_proj_point_dbl_n_7(sp_point_384* p, int n, sp_digit* t) +{ + sp_digit* w = t; + sp_digit* a = t + 2*7; + sp_digit* b = t + 4*7; + sp_digit* t1 = t + 6*7; + sp_digit* t2 = t + 8*7; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = p->x; + y = p->y; + z = p->z; + + /* Y = 2*Y */ + sp_384_mont_dbl_7(y, y, p384_mod); + /* W = Z^4 */ + sp_384_mont_sqr_7(w, z, p384_mod, p384_mp_mod); + sp_384_mont_sqr_7(w, w, p384_mod, p384_mp_mod); + +#ifndef WOLFSSL_SP_SMALL + while (--n > 0) +#else + while (--n >= 0) +#endif + { + /* A = 3*(X^2 - W) */ + sp_384_mont_sqr_7(t1, x, p384_mod, p384_mp_mod); + sp_384_mont_sub_7(t1, t1, w, p384_mod); + sp_384_mont_tpl_7(a, t1, p384_mod); + /* B = X*Y^2 */ + sp_384_mont_sqr_7(t1, y, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(b, t1, x, p384_mod, p384_mp_mod); + /* X = A^2 - 2B */ + sp_384_mont_sqr_7(x, a, p384_mod, p384_mp_mod); + sp_384_mont_dbl_7(t2, b, p384_mod); + sp_384_mont_sub_7(x, x, t2, p384_mod); + /* Z = Z*Y */ + sp_384_mont_mul_7(z, z, y, p384_mod, p384_mp_mod); + /* t2 = Y^4 */ + sp_384_mont_sqr_7(t1, t1, p384_mod, p384_mp_mod); +#ifdef WOLFSSL_SP_SMALL + if (n != 0) +#endif + { + /* W = W*Y^4 */ + sp_384_mont_mul_7(w, w, t1, p384_mod, p384_mp_mod); + } + /* y = 2*A*(B - X) - Y^4 */ + sp_384_mont_sub_7(y, b, x, p384_mod); + sp_384_mont_mul_7(y, y, a, p384_mod, p384_mp_mod); + sp_384_mont_dbl_7(y, y, p384_mod); + sp_384_mont_sub_7(y, y, t1, p384_mod); + } +#ifndef WOLFSSL_SP_SMALL + /* A = 3*(X^2 - W) */ + sp_384_mont_sqr_7(t1, x, p384_mod, p384_mp_mod); + sp_384_mont_sub_7(t1, t1, w, p384_mod); + sp_384_mont_tpl_7(a, t1, p384_mod); + /* B = X*Y^2 */ + sp_384_mont_sqr_7(t1, y, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(b, t1, x, p384_mod, p384_mp_mod); + /* X = A^2 - 2B */ + sp_384_mont_sqr_7(x, a, p384_mod, p384_mp_mod); + sp_384_mont_dbl_7(t2, b, p384_mod); + sp_384_mont_sub_7(x, x, t2, p384_mod); + /* Z = Z*Y */ + sp_384_mont_mul_7(z, z, y, p384_mod, p384_mp_mod); + /* t2 = Y^4 */ + sp_384_mont_sqr_7(t1, t1, p384_mod, p384_mp_mod); + /* y = 2*A*(B - X) - Y^4 */ + sp_384_mont_sub_7(y, b, x, p384_mod); + sp_384_mont_mul_7(y, y, a, p384_mod, p384_mp_mod); + sp_384_mont_dbl_7(y, y, p384_mod); + sp_384_mont_sub_7(y, y, t1, p384_mod); +#endif + /* Y = Y/2 */ + sp_384_div2_7(y, y, p384_mod); +} + +#endif /* FP_ECC */ +/* Add two Montgomery form projective points. The second point has a q value of + * one. + * Only the first point can be the same pointer as the result point. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_384_proj_point_add_qz1_7(sp_point_384* r, const sp_point_384* p, + const sp_point_384* q, sp_digit* t) +{ + const sp_point_384* ap[2]; + sp_point_384* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*7; + sp_digit* t3 = t + 4*7; + sp_digit* t4 = t + 6*7; + sp_digit* t5 = t + 8*7; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* Check double */ + (void)sp_384_sub_7(t1, p384_mod, q->y); + sp_384_norm_7(t1); + if ((sp_384_cmp_equal_7(p->x, q->x) & sp_384_cmp_equal_7(p->z, q->z) & + (sp_384_cmp_equal_7(p->y, q->y) | sp_384_cmp_equal_7(p->y, t1))) != 0) { + sp_384_proj_point_dbl_7(r, p, t); + } + else { + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point_384)); + x = rp[p->infinity | q->infinity]->x; + y = rp[p->infinity | q->infinity]->y; + z = rp[p->infinity | q->infinity]->z; + + ap[0] = p; + ap[1] = q; + for (i=0; i<7; i++) { + r->x[i] = ap[p->infinity]->x[i]; + } + for (i=0; i<7; i++) { + r->y[i] = ap[p->infinity]->y[i]; + } + for (i=0; i<7; i++) { + r->z[i] = ap[p->infinity]->z[i]; + } + r->infinity = ap[p->infinity]->infinity; + + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_7(t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(t4, t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_7(t4, t4, q->y, p384_mod, p384_mp_mod); + /* H = U2 - X1 */ + sp_384_mont_sub_7(t2, t2, x, p384_mod); + /* R = S2 - Y1 */ + sp_384_mont_sub_7(t4, t4, y, p384_mod); + /* Z3 = H*Z1 */ + sp_384_mont_mul_7(z, z, t2, p384_mod, p384_mp_mod); + /* X3 = R^2 - H^3 - 2*X1*H^2 */ + sp_384_mont_sqr_7(t1, t4, p384_mod, p384_mp_mod); + sp_384_mont_sqr_7(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(t3, x, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(t5, t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_sub_7(x, t1, t5, p384_mod); + sp_384_mont_dbl_7(t1, t3, p384_mod); + sp_384_mont_sub_7(x, x, t1, p384_mod); + /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */ + sp_384_mont_sub_7(t3, t3, x, p384_mod); + sp_384_mont_mul_7(t3, t3, t4, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(t5, t5, y, p384_mod, p384_mp_mod); + sp_384_mont_sub_7(y, t3, t5, p384_mod); + } +} + +#ifdef FP_ECC +/* Convert the projective point to affine. + * Ordinates are in Montgomery form. + * + * a Point to convert. + * t Temporary data. + */ +static void sp_384_proj_to_affine_7(sp_point_384* a, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2 * 7; + sp_digit* tmp = t + 4 * 7; + + sp_384_mont_inv_7(t1, a->z, tmp); + + sp_384_mont_sqr_7(t2, t1, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(t1, t2, t1, p384_mod, p384_mp_mod); + + sp_384_mont_mul_7(a->x, a->x, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(a->y, a->y, t1, p384_mod, p384_mp_mod); + XMEMCPY(a->z, p384_norm_mod, sizeof(p384_norm_mod)); +} + +/* Generate the pre-computed table of points for the base point. + * + * a The base point. + * table Place to store generated point data. + * tmp Temporary data. + * heap Heap to use for allocation. + */ +static int sp_384_gen_stripe_table_7(const sp_point_384* a, + sp_table_entry_384* table, sp_digit* tmp, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 td, s1d, s2d; +#endif + sp_point_384* t; + sp_point_384* s1 = NULL; + sp_point_384* s2 = NULL; + int i, j; + int err; + + (void)heap; + + err = sp_384_point_new_7(heap, td, t); + if (err == MP_OKAY) { + err = sp_384_point_new_7(heap, s1d, s1); + } + if (err == MP_OKAY) { + err = sp_384_point_new_7(heap, s2d, s2); + } + + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_7(t->x, a->x, p384_mod); + } + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_7(t->y, a->y, p384_mod); + } + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_7(t->z, a->z, p384_mod); + } + if (err == MP_OKAY) { + t->infinity = 0; + sp_384_proj_to_affine_7(t, tmp); + + XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod)); + s1->infinity = 0; + XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod)); + s2->infinity = 0; + + /* table[0] = {0, 0, infinity} */ + XMEMSET(&table[0], 0, sizeof(sp_table_entry_384)); + /* table[1] = Affine version of 'a' in Montgomery form */ + XMEMCPY(table[1].x, t->x, sizeof(table->x)); + XMEMCPY(table[1].y, t->y, sizeof(table->y)); + + for (i=1; i<8; i++) { + sp_384_proj_point_dbl_n_7(t, 48, tmp); + sp_384_proj_to_affine_7(t, tmp); + XMEMCPY(table[1<x, sizeof(table->x)); + XMEMCPY(table[1<y, sizeof(table->y)); + } + + for (i=1; i<8; i++) { + XMEMCPY(s1->x, table[1<x)); + XMEMCPY(s1->y, table[1<y)); + for (j=(1<x, table[j-(1<x)); + XMEMCPY(s2->y, table[j-(1<y)); + sp_384_proj_point_add_qz1_7(t, s1, s2, tmp); + sp_384_proj_to_affine_7(t, tmp); + XMEMCPY(table[j].x, t->x, sizeof(table->x)); + XMEMCPY(table[j].y, t->y, sizeof(table->y)); + } + } + } + + sp_384_point_free_7(s2, 0, heap); + sp_384_point_free_7(s1, 0, heap); + sp_384_point_free_7( t, 0, heap); + + return err; +} + +#endif /* FP_ECC */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_stripe_7(sp_point_384* r, const sp_point_384* g, + const sp_table_entry_384* table, const sp_digit* k, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 rtd; + sp_point_384 pd; + sp_digit td[2 * 7 * 6]; +#endif + sp_point_384* rt; + sp_point_384* p = NULL; + sp_digit* t; + int i, j; + int y, x; + int err; + + (void)g; + (void)heap; + + + err = sp_384_point_new_7(heap, rtd, rt); + if (err == MP_OKAY) { + err = sp_384_point_new_7(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 7 * 6, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) { + err = MEMORY_E; + } +#else + t = td; +#endif + + if (err == MP_OKAY) { + XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod)); + XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod)); + + y = 0; + for (j=0,x=47; j<8; j++,x+=48) { + y |= ((k[x / 55] >> (x % 55)) & 1) << j; + } + XMEMCPY(rt->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(rt->y, table[y].y, sizeof(table[y].y)); + rt->infinity = !y; + for (i=46; i>=0; i--) { + y = 0; + for (j=0,x=i; j<8; j++,x+=48) { + y |= ((k[x / 55] >> (x % 55)) & 1) << j; + } + + sp_384_proj_point_dbl_7(rt, rt, t); + XMEMCPY(p->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(p->y, table[y].y, sizeof(table[y].y)); + p->infinity = !y; + sp_384_proj_point_add_qz1_7(rt, rt, p, t); + } + + if (map != 0) { + sp_384_map_7(r, rt, t); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_384)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_7(p, 0, heap); + sp_384_point_free_7(rt, 0, heap); + + return err; +} + +#ifdef FP_ECC +#ifndef FP_ENTRIES + #define FP_ENTRIES 16 +#endif + +typedef struct sp_cache_384_t { + sp_digit x[7]; + sp_digit y[7]; + sp_table_entry_384 table[256]; + uint32_t cnt; + int set; +} sp_cache_384_t; + +static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES]; +static THREAD_LS_T int sp_cache_384_last = -1; +static THREAD_LS_T int sp_cache_384_inited = 0; + +#ifndef HAVE_THREAD_LS + static volatile int initCacheMutex_384 = 0; + static wolfSSL_Mutex sp_cache_384_lock; +#endif + +static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache) +{ + int i, j; + uint32_t least; + + if (sp_cache_384_inited == 0) { + for (i=0; ix, sp_cache_384[i].x) & + sp_384_cmp_equal_7(g->y, sp_cache_384[i].y)) { + sp_cache_384[i].cnt++; + break; + } + } + + /* No match. */ + if (i == FP_ENTRIES) { + /* Find empty entry. */ + i = (sp_cache_384_last + 1) % FP_ENTRIES; + for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) { + if (!sp_cache_384[i].set) { + break; + } + } + + /* Evict least used. */ + if (i == sp_cache_384_last) { + least = sp_cache_384[0].cnt; + for (j=1; jx, sizeof(sp_cache_384[i].x)); + XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y)); + sp_cache_384[i].set = 1; + sp_cache_384[i].cnt = 1; + } + + *cache = &sp_cache_384[i]; + sp_cache_384_last = i; +} +#endif /* FP_ECC */ + +/* Multiply the base point of P384 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_7(sp_point_384* r, const sp_point_384* g, const sp_digit* k, + int map, void* heap) +{ +#ifndef FP_ECC + return sp_384_ecc_mulmod_fast_7(r, g, k, map, heap); +#else + sp_digit tmp[2 * 7 * 7]; + sp_cache_384_t* cache; + int err = MP_OKAY; + +#ifndef HAVE_THREAD_LS + if (initCacheMutex_384 == 0) { + wc_InitMutex(&sp_cache_384_lock); + initCacheMutex_384 = 1; + } + if (wc_LockMutex(&sp_cache_384_lock) != 0) + err = BAD_MUTEX_E; +#endif /* HAVE_THREAD_LS */ + + if (err == MP_OKAY) { + sp_ecc_get_cache_384(g, &cache); + if (cache->cnt == 2) + sp_384_gen_stripe_table_7(g, cache->table, tmp, heap); + +#ifndef HAVE_THREAD_LS + wc_UnLockMutex(&sp_cache_384_lock); +#endif /* HAVE_THREAD_LS */ + + if (cache->cnt < 2) { + err = sp_384_ecc_mulmod_fast_7(r, g, k, map, heap); + } + else { + err = sp_384_ecc_mulmod_stripe_7(r, g, cache->table, k, + map, heap); + } + } + + return err; +#endif +} + +#endif +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * p Point to multiply. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_384(mp_int* km, ecc_point* gm, ecc_point* r, int map, + void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 p; + sp_digit kd[7]; +#endif + sp_point_384* point; + sp_digit* k = NULL; + int err = MP_OKAY; + + err = sp_384_point_new_7(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#else + k = kd; +#endif + if (err == MP_OKAY) { + sp_384_from_mp(k, 7, km); + sp_384_point_from_ecc_point_7(point, gm); + + err = sp_384_ecc_mulmod_7(point, point, k, map, heap); + } + if (err == MP_OKAY) { + err = sp_384_point_to_ecc_point_7(point, r); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_7(point, 0, heap); + + return err; +} + +#ifdef WOLFSSL_SP_SMALL +/* Multiply the base point of P384 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_base_7(sp_point_384* r, const sp_digit* k, + int map, void* heap) +{ + /* No pre-computed values. */ + return sp_384_ecc_mulmod_7(r, &p384_base, k, map, heap); +} + +#elif defined(WOLFSSL_SP_CACHE_RESISTANT) +/* Multiply the base point of P384 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_base_7(sp_point_384* r, const sp_digit* k, + int map, void* heap) +{ + /* No pre-computed values. */ + return sp_384_ecc_mulmod_7(r, &p384_base, k, map, heap); +} + +#else +static const sp_table_entry_384 p384_table[256] = { + /* 0 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 */ + { { 0x50756649c0b528L,0x71c541ad9c707bL,0x71506d35b8838dL, + 0x4d1877fc3ce1d7L,0x6de2b645486845L,0x227025fee46c29L, + 0x134eab708a6785L }, + { 0x043dad4b03a4feL,0x517ef769535846L,0x58ba0ec14286feL, + 0x47a7fecc5d6f3aL,0x1a840c6c352196L,0x3d3bb00044c72dL, + 0x0ade2af0968571L } }, + /* 2 */ + { { 0x0647532b0c535bL,0x52a6e0a0c52c53L,0x5085aae6b24375L, + 0x7096bb501c66b5L,0x47bdb3df9b7b7bL,0x11227e9b2f0be6L, + 0x088b172704fa51L }, + { 0x0e796f2680dc64L,0x796eb06a482ebfL,0x2b441d02e04839L, + 0x19bef7312a5aecL,0x02247c38b8efb5L,0x099ed1185c329eL, + 0x1ed71d7cdb096fL } }, + /* 3 */ + { { 0x6a3cc39edffea5L,0x7a386fafd3f9c4L,0x366f78fbd8d6efL, + 0x529c7ad7873b80L,0x79eb30380eb471L,0x07c5d3b51760b7L, + 0x36ee4f1cc69183L }, + { 0x5ba260f526b605L,0x2f1dfaf0aa6e6fL,0x6bb5ca812a5752L, + 0x3002d8d1276bc9L,0x01f82269483777L,0x1df33eaaf733cdL, + 0x2b97e555f59255L } }, + /* 4 */ + { { 0x480c57f26feef9L,0x4d28741c248048L,0x0c9cf8af1f0c68L, + 0x778f6a639a8016L,0x148e88c42e9c53L,0x464051757ecfe9L, + 0x1a940bd0e2a5e1L }, + { 0x713a46b74536feL,0x1757b153e1d7ebL,0x30dc8c9da07486L, + 0x3b7460c1879b5eL,0x4b766c5317b315L,0x1b9de3aaf4d377L, + 0x245f124c2cf8f5L } }, + /* 5 */ + { { 0x426e2ee349ddd0L,0x7df3365f84a022L,0x03b005d29a7c45L, + 0x422c2337f9b5a4L,0x060494f4bde761L,0x5245e5db6da0b0L, + 0x22b71d744677f2L }, + { 0x19d097b7d5a7ceL,0x6bcb468823d34cL,0x1c3692d3be1d09L, + 0x3c80ec7aa01f02L,0x7170f2ebaafd97L,0x06cbcc7d79d4e8L, + 0x04a8da511fe760L } }, + /* 6 */ + { { 0x79c07a4fc52870L,0x6e9034a752c251L,0x603860a367382cL, + 0x56d912d6aa87d0L,0x0a348a24abaf76L,0x6c5a23da14adcbL, + 0x3cf60479a522b2L }, + { 0x18dd774c61ed22L,0x0ff30168f93b0cL,0x3f79ae15642eddL, + 0x40510f4915fbcbL,0x2c9ddfdfd1c6d6L,0x67b81b62aee55eL, + 0x2824de79b07a43L } }, + /* 7 */ + { { 0x6c66efe085c629L,0x48c212b7913470L,0x4480fd2d057f0aL, + 0x725ec7a89a9eb1L,0x78ce97ca1972b7L,0x54760ee70154fbL, + 0x362a40e27b9f93L }, + { 0x474dc7e7b14461L,0x602819389ef037L,0x1a13bc284370b2L, + 0x0193ff1295a59dL,0x79615bde6ea5d2L,0x2e76e3d886acc1L, + 0x3bb796812e2b60L } }, + /* 8 */ + { { 0x04cbb3893b9a2dL,0x4c16010a18baabL,0x19f7cb50f60831L, + 0x084f400a0936c1L,0x72f1cdd5bbbf00L,0x1b30b725dc6702L, + 0x182753e4fcc50cL }, + { 0x059a07eadaf9d6L,0x26d81e24bf603cL,0x45583c839dc399L, + 0x5579d4d6b1103aL,0x2e14ea59489ae7L,0x492f6e1c5ecc97L, + 0x03740dc05db420L } }, + /* 9 */ + { { 0x413be88510521fL,0x3753ee49982e99L,0x6cd4f7098e1cc5L, + 0x613c92bda4ec1dL,0x495378b677efe0L,0x132a2143839927L, + 0x0cf8c336291c0bL }, + { 0x7fc89d2208353fL,0x751b9da85657e1L,0x349b8a97d405c3L, + 0x65a964b048428fL,0x1adf481276455eL,0x5560c8d89c2ffcL, + 0x144fc11fac21a3L } }, + /* 10 */ + { { 0x7611f4df5bdf53L,0x634eb16234db80L,0x3c713b8e51174cL, + 0x52c3c68ac4b2edL,0x53025ba8bebe75L,0x7175d98143105bL, + 0x33ca8e266a48faL }, + { 0x0c9281d24fd048L,0x76b3177604bbf3L,0x3b26ae754e106fL, + 0x7f782275c6efc6L,0x36662538a4cb67L,0x0ca1255843e464L, + 0x2a4674e142d9bcL } }, + /* 11 */ + { { 0x303b4085d480d8L,0x68f23650f4fa7bL,0x552a3ceeba3367L, + 0x6da0c4947926e3L,0x6e0f5482eb8003L,0x0de717f3d6738aL, + 0x22e5dcc826a477L }, + { 0x1b05b27209cfc2L,0x7f0a0b65b6e146L,0x63586549ed3126L, + 0x7d628dd2b23124L,0x383423fe510391L,0x57ff609eabd569L, + 0x301f04370131baL } }, + /* 12 */ + { { 0x22fe4cdb32f048L,0x7f228ebdadbf5aL,0x02a99adb2d7c8eL, + 0x01a02e05286706L,0x62d6adf627a89fL,0x49c6ce906fbf2bL, + 0x0207256dae90b9L }, + { 0x23e036e71d6cebL,0x199ed8d604e3d7L,0x0c1a11c076d16fL, + 0x389291fb3da3f3L,0x47adc60f8f942eL,0x177048468e4b9aL, + 0x20c09f5e61d927L } }, + /* 13 */ + { { 0x129ea63615b0b8L,0x03fb4a9b588367L,0x5ad6da8da2d051L, + 0x33f782f44caeaaL,0x5a27fa80d45291L,0x6d1ed796942da4L, + 0x08435a931ef556L }, + { 0x004abb25351130L,0x6d33207c6fd7e7L,0x702130972074b7L, + 0x0e34748af900f7L,0x762a531a28c87aL,0x3a903b5a4a6ac7L, + 0x1775b79c35b105L } }, + /* 14 */ + { { 0x7470fd846612ceL,0x7dd9b431b32e53L,0x04bcd2be1a61bcL, + 0x36ed7c5b5c260bL,0x6795f5ef0a4084L,0x46e2880b401c93L, + 0x17d246c5aa8bdeL }, + { 0x707ae4db41b38dL,0x233c31f7f9558fL,0x585110ec67bdf4L, + 0x4d0cc931d0c703L,0x26fbe4356841a7L,0x64323e95239c44L, + 0x371dc9230f3221L } }, + /* 15 */ + { { 0x70ff1ae4b1ec9dL,0x7c1dcfddee0daaL,0x53286782188748L, + 0x6a5d9381e6f207L,0x3aa6c7d6523c4cL,0x6c02d83e0d97e2L, + 0x16a9c916b45312L }, + { 0x78146744b74de8L,0x742ec415269c6fL,0x237a2c6a860e79L, + 0x186baf17ba68a7L,0x4261e8789fa51fL,0x3dc136480a5903L, + 0x1953899e0cf159L } }, + /* 16 */ + { { 0x0205de2f9fbe67L,0x1706fee51c886fL,0x31a0b803c712bfL, + 0x0a6aa11ede7603L,0x2463ef2a145c31L,0x615403b30e8f4aL, + 0x3f024d6c5f5c5eL }, + { 0x53bc4fd4d01f95L,0x7d512ac15a692cL,0x72be38fcfe6aa0L, + 0x437f0b77bbca1eL,0x7fdcf70774a10eL,0x392d6c5cde37f3L, + 0x229cbce79621d1L } }, + /* 17 */ + { { 0x2de4da2341c342L,0x5ca9d4e08844e7L,0x60dd073bcf74c9L, + 0x4f30aa499b63ecL,0x23efd1eafa00d5L,0x7c99a7db1257b3L, + 0x00febc9b3171b1L }, + { 0x7e2fcf3045f8acL,0x2a642e9e3ce610L,0x23f82be69c5299L, + 0x66e49ad967c279L,0x1c895ddfd7a842L,0x798981e22f6d25L, + 0x0d595cb59322f3L } }, + /* 18 */ + { { 0x4bac017d8c1bbaL,0x73872161e7aafdL,0x0fd865f43d8163L, + 0x019d89457708b7L,0x1b983c4dd70684L,0x095e109b74d841L, + 0x25f1f0b3e0c76fL }, + { 0x4e61ddf96010e8L,0x1c40a53f542e5eL,0x01a74dfc8365f9L, + 0x69b36b92773333L,0x08e0fccc139ed3L,0x266d216ddc4269L, + 0x1f2b47717ce9b5L } }, + /* 19 */ + { { 0x0a9a81da57a41fL,0x0825d800736cccL,0x2d7876b4579d28L, + 0x3340ea6211a1e3L,0x49e89284f3ff54L,0x6276a210fe2c6eL, + 0x01c3c8f31be7cbL }, + { 0x2211da5d186e14L,0x1e6ffbb61bfea8L,0x536c7d060211d2L, + 0x320168720d1d55L,0x5835525ed667baL,0x5125e52495205eL, + 0x16113b9f3e9129L } }, + /* 20 */ + { { 0x3086073f3b236fL,0x283b03c443b5f5L,0x78e49ed0a067a7L, + 0x2a878fb79fb2b8L,0x662f04348a9337L,0x57ee2cf732d50bL, + 0x18b50dd65fd514L }, + { 0x5feb9ef2955926L,0x2c3edbef06a7b0L,0x32728dad651029L, + 0x116d00b1c4b347L,0x13254052bf1a1aL,0x3e77bf7fee5ec1L, + 0x253943ca388882L } }, + /* 21 */ + { { 0x32e5b33062e8afL,0x46ebd147a6d321L,0x2c8076dec6a15cL, + 0x7328d511ff0d80L,0x10ad7e926def0eL,0x4e8ca85937d736L, + 0x02638c26e8bf2fL }, + { 0x1deeb3fff1d63fL,0x5014417fa6e8efL,0x6e1da3de5c8f43L, + 0x7ca942b42295d9L,0x23faacf75bb4d1L,0x4a71fcd680053dL, + 0x04af4f90204dceL } }, + /* 22 */ + { { 0x23780d104cbba5L,0x4e8ff46bba9980L,0x2072a6da8d881fL, + 0x3cc3d881ae11c9L,0x2eee84ff19be89L,0x69b708ed77f004L, + 0x2a82928534eef9L }, + { 0x794331187d4543L,0x70e0f3edc0cc41L,0x3ab1fa0b84c854L, + 0x1478355c1d87baL,0x6f35fa7748ba28L,0x37b8be0531584dL, + 0x03c3141c23a69fL } }, + /* 23 */ + { { 0x5c244cdef029ddL,0x0d0f0a0cc37018L,0x17f8476604f6afL, + 0x13a6dd6ccc95c3L,0x5a242e9801b8f6L,0x211ca9cc632131L, + 0x264a6a46a4694fL }, + { 0x3ffd7235285887L,0x284be28302046fL,0x57f4b9b882f1d6L, + 0x5e21772c940661L,0x7619a735c600cfL,0x2f76f5a50c9106L, + 0x28d89c8c69de31L } }, + /* 24 */ + { { 0x799b5c91361ed8L,0x36ead8c66cd95cL,0x046c9969a91f5cL, + 0x46bbdba2a66ea9L,0x29db0e0215a599L,0x26c8849b36f756L, + 0x22c3feb31ff679L }, + { 0x585d1237b5d9efL,0x5ac57f522e8e8dL,0x617e66e8b56c41L, + 0x68826f276823cfL,0x0983f0e6f39231L,0x4e1075099084bdL, + 0x2a541f82be0416L } }, + /* 25 */ + { { 0x468a6e14cf381cL,0x4f7b845c6399edL,0x36aa29732ebe74L, + 0x19c726911ab46aL,0x2ad1fe431eec0eL,0x301e35051fd1eaL, + 0x36da815e7a1ab3L }, + { 0x05672e4507832aL,0x4ebf10fca51251L,0x6015843421cff0L, + 0x3affad832fc013L,0x712b58d9b45540L,0x1e4751d1f6213eL, + 0x0e7c2b218bafa7L } }, + /* 26 */ + { { 0x7abf784c52edf5L,0x6fcb4b135ca7b1L,0x435e46ac5f735cL, + 0x67f8364ca48c5fL,0x46d45b5fbd956bL,0x10deda6065db94L, + 0x0b37fdf85068f9L }, + { 0x74b3ba61f47ec8L,0x42c7ddf08c10ccL,0x1531a1fe422a20L, + 0x366f913d12be38L,0x6a846e30cb2edfL,0x2785898c994fedL, + 0x061be85f331af3L } }, + /* 27 */ + { { 0x23f5361dfcb91eL,0x3c26c8da6b1491L,0x6e444a1e620d65L, + 0x0c3babd5e8ac13L,0x573723ce612b82L,0x2d10e62a142c37L, + 0x3d1a114c2d98bdL }, + { 0x33950b401896f6L,0x7134efe7c12110L,0x31239fd2978472L, + 0x30333bf5978965L,0x79f93313dd769fL,0x457fb9e11662caL, + 0x190a73b251ae3cL } }, + /* 28 */ + { { 0x04dd54bb75f9a4L,0x0d7253a76ae093L,0x08f5b930792bbcL, + 0x041f79adafc265L,0x4a9ff24c61c11bL,0x0019c94e724725L, + 0x21975945d9cc2aL }, + { 0x3dfe76722b4a2bL,0x17f2f6107c1d94L,0x546e1ae2944b01L, + 0x53f1f06401e72dL,0x2dbe43fc7632d6L,0x5639132e185903L, + 0x0f2f34eb448385L } }, + /* 29 */ + { { 0x7b4cc7ec30ce93L,0x58fb6e4e4145f7L,0x5d1ed5540043b5L, + 0x19ffbe1f633adfL,0x5bfc0907259033L,0x6378f872e7ca0eL, + 0x2c127b2c01eb3cL }, + { 0x076eaf4f58839cL,0x2db54560bc9f68L,0x42ad0319b84062L, + 0x46c325d1fb019dL,0x76d2a19ee9eebcL,0x6fbd6d9e2aa8f7L, + 0x2396a598fe0991L } }, + /* 30 */ + { { 0x662fddf7fbd5e1L,0x7ca8ed22563ad3L,0x5b4768efece3b3L, + 0x643786a422d1eaL,0x36ce80494950e1L,0x1a30795b7f2778L, + 0x107f395c93f332L }, + { 0x7939c28332c144L,0x491610e3c8dc0bL,0x099ba2bfdac5fcL, + 0x5c2e3149ec29a7L,0x31b731d06f1dc3L,0x1cbb60d465d462L, + 0x3ca5461362cfd9L } }, + /* 31 */ + { { 0x653ff736ddc103L,0x7c6f2bdec0dfb2L,0x73f81b73a097d0L, + 0x05b775f84f180fL,0x56b2085af23413L,0x0d6f36256a61feL, + 0x26d3ed267fa68fL }, + { 0x54f89251d27ac2L,0x4fc6ad94a71202L,0x7ebf01969b4cc5L, + 0x7ba364dbc14760L,0x4f8370959a2587L,0x7b7631e37c6188L, + 0x29e51845f104cbL } }, + /* 32 */ + { { 0x426b775e3c647bL,0x327319e0a69180L,0x0c5cb034f6ff2fL, + 0x73aa39b98e9897L,0x7ee615f49fde6eL,0x3f712aa61e0db4L, + 0x33ca06c2ba2ce9L }, + { 0x14973541b8a543L,0x4b4e6101ba61faL,0x1d94e4233d0698L, + 0x501513c715d570L,0x1b8f8c3d01436bL,0x52f41a0445cf64L, + 0x3f709c3a75fb04L } }, + /* 33 */ + { { 0x073c0cbc7f41d6L,0x227c36f5ac8201L,0x508e110fef65d8L, + 0x0f317229529b7fL,0x45fc6030d00e24L,0x118a65d30cebeaL, + 0x3340cc4223a448L }, + { 0x204c999797612cL,0x7c05dd4ce9c5a3L,0x7b865d0a8750e4L, + 0x2f82c876ab7d34L,0x2243ddd2ab4808L,0x6834b9df8a4914L, + 0x123319ed950e0fL } }, + /* 34 */ + { { 0x50430efc14ab48L,0x7e9e4ce0d4e89cL,0x2332207fd8656dL, + 0x4a2809e97f4511L,0x2162bb1b968e2dL,0x29526d54af2972L, + 0x13edd9adcd939dL }, + { 0x793bca31e1ff7fL,0x6b959c9e4d2227L,0x628ac27809a5baL, + 0x2c71ffc7fbaa5fL,0x0c0b058f13c9ceL,0x5676eae68de2cfL, + 0x35508036ea19a4L } }, + /* 35 */ + { { 0x030bbd6dda1265L,0x67f9d12e31bb34L,0x7e4d8196e3ded3L, + 0x7b9120e5352498L,0x75857bce72d875L,0x4ead976a396caeL, + 0x31c5860553a64dL }, + { 0x1a0f792ee32189L,0x564c4efb8165d0L,0x7adc7d1a7fbcbeL, + 0x7ed7c2ccf327b7L,0x35df1b448ce33dL,0x6f67eb838997cdL, + 0x3ee37ec0077917L } }, + /* 36 */ + { { 0x345fa74d5bb921L,0x097c9a56ccfd8eL,0x00a0b5e8f971f8L, + 0x723d95223f69d4L,0x08e2e5c2777f87L,0x68b13676200109L, + 0x26ab5df0acbad6L }, + { 0x01bca7daac34aeL,0x49ca4d5f664dadL,0x110687b850914bL, + 0x1203d6f06443c9L,0x7a2ac743b04d4cL,0x40d96bd3337f82L, + 0x13728be0929c06L } }, + /* 37 */ + { { 0x631ca61127bc1aL,0x2b362fd5a77cd1L,0x17897d68568fb7L, + 0x21070af33db5b2L,0x6872e76221794aL,0x436f29fb076963L, + 0x1f2acfc0ecb7b3L }, + { 0x19bf15ca9b3586L,0x32489a4a17aee2L,0x2b31af3c929551L, + 0x0db7c420b9b19fL,0x538c39bd308c2bL,0x438775c0dea88fL, + 0x1537304d7cd07fL } }, + /* 38 */ + { { 0x53598d943caf0dL,0x1d5244bfe266adL,0x7158feb7ab3811L, + 0x1f46e13cf6fb53L,0x0dcab632eb9447L,0x46302968cfc632L, + 0x0b53d3cc5b6ec7L }, + { 0x69811ca143b7caL,0x5865bcf9f2a11aL,0x74ded7fa093b06L, + 0x1c878ec911d5afL,0x04610e82616e49L,0x1e157fe9640eb0L, + 0x046e6f8561d6c2L } }, + /* 39 */ + { { 0x631a3d3bbe682cL,0x3a4ce9dde5ba95L,0x28f11f7502f1f1L, + 0x0a55cf0c957e88L,0x495e4ec7e0a3bcL,0x30ad4d87ba365cL, + 0x0217b97a4c26f3L }, + { 0x01a9088c2e67fdL,0x7501c4c3d5e5e7L,0x265b7bb854c820L, + 0x729263c87e6b52L,0x308b9e3b8fb035L,0x33f1b86c1b23abL, + 0x0e81b8b21fc99cL } }, + /* 40 */ + { { 0x59f5a87237cac0L,0x6b3a86b0cf28b9L,0x13a53db13a4fc2L, + 0x313c169a1c253bL,0x060158304ed2bbL,0x21e171b71679bcL, + 0x10cdb754d76f86L }, + { 0x44355392ab473aL,0x64eb7cbda08caeL,0x3086426a900c71L, + 0x49016ed9f3c33cL,0x7e6354ab7e04f9L,0x17c4c91a40cd2eL, + 0x3509f461024c66L } }, + /* 41 */ + { { 0x2848f50f9b5a31L,0x68d1755b6c5504L,0x48cd5d5672ec00L, + 0x4d77421919d023L,0x1e1e349ef68807L,0x4ab5130cf415d7L, + 0x305464c6c7dbe6L }, + { 0x64eb0bad74251eL,0x64c6957e52bda4L,0x6c12583440dee6L, + 0x6d3bee05b00490L,0x186970de53dbc4L,0x3be03b37567a56L, + 0x2b553b1ebdc55bL } }, + /* 42 */ + { { 0x74dc3579efdc58L,0x26d29fed1bb71cL,0x334c825a9515afL, + 0x433c1e839273a6L,0x0d8a4e41cff423L,0x3454098fe42f8eL, + 0x1046674bf98686L }, + { 0x09a3e029c05dd2L,0x54d7cfc7fb53a7L,0x35f0ad37e14d7cL, + 0x73a294a13767b9L,0x3f519678275f4fL,0x788c63393993a4L, + 0x0781680b620123L } }, + /* 43 */ + { { 0x4c8e2ed4d5ffe8L,0x112db7d42fe4ebL,0x433b8f2d2be2edL, + 0x23e30b29a82cbcL,0x35d2f4c06ee85aL,0x78ff31ffe4b252L, + 0x0d31295c8cbff5L }, + { 0x314806ea0376a2L,0x4ea09e22bc0589L,0x0879575f00ba97L, + 0x188226d2996bb7L,0x7799368dc9411fL,0x7ab24e5c8cae36L, + 0x2b6a8e2ee4ea33L } }, + /* 44 */ + { { 0x70c7127d4ed72aL,0x24c9743ef34697L,0x2fd30e7a93683aL, + 0x538a89c246012cL,0x6c660a5394ed82L,0x79a95ea239d7e0L, + 0x3f3af3bbfb170dL }, + { 0x3b75aa779ae8c1L,0x33995a3cc0dde4L,0x7489d5720b7bfdL, + 0x599677ef9fa937L,0x3defd64c5ab44bL,0x27d52dc234522bL, + 0x2ac65d1a8450e0L } }, + /* 45 */ + { { 0x478585ec837d7dL,0x5f7971dc174887L,0x67576ed7bb296dL, + 0x5a78e529a74926L,0x640f73f4fa104bL,0x7d42a8b16e4730L, + 0x108c7eaa75fd01L }, + { 0x60661ef96e6896L,0x18d3a0761f3aa7L,0x6e71e163455539L, + 0x165827d6a7e583L,0x4e7f77e9527935L,0x790bebe2ae912eL, + 0x0b8fe9561adb55L } }, + /* 46 */ + { { 0x4d48036a9951a8L,0x371084f255a085L,0x66aeca69cea2c5L, + 0x04c99f40c745e7L,0x08dc4bfd9a0924L,0x0b0ec146b29df7L, + 0x05106218d01c91L }, + { 0x2a56ee99caedc7L,0x5d9b23a203922cL,0x1ce4c80b6a3ec4L, + 0x2666bcb75338cbL,0x185a81aac8c4aaL,0x2b4fb60a06c39eL, + 0x0327e1b3633f42L } }, + /* 47 */ + { { 0x72814710b2a556L,0x52c864f6e16534L,0x4978de66ddd9f2L, + 0x151f5950276cf0L,0x450ac6781d2dc2L,0x114b7a22dd61b2L, + 0x3b32b07f29faf8L }, + { 0x68444fdc2d6e94L,0x68526bd9e437bcL,0x0ca780e8b0d887L, + 0x69f3f850a716aaL,0x500b953e42cd57L,0x4e57744d812e7dL, + 0x000a5f0e715f48L } }, + /* 48 */ + { { 0x2aab10b8243a7dL,0x727d1f4b18b675L,0x0e6b9fdd91bbbbL, + 0x0d58269fc337e5L,0x45d6664105a266L,0x11946af1b14072L, + 0x2c2334f91e46e1L }, + { 0x6dc5f8756d2411L,0x21b34eaa25188bL,0x0d2797da83529eL, + 0x324df55616784bL,0x7039ec66d267dfL,0x2de79cdb2d108cL, + 0x14011b1ad0bde0L } }, + /* 49 */ + { { 0x2e160266425043L,0x55fbe11b712125L,0x7e3c58b3947fd9L, + 0x67aacc79c37ad3L,0x4a18e18d2dea0fL,0x5eef06e5674351L, + 0x37c3483ae33439L }, + { 0x5d5e1d75bb4045L,0x0f9d72db296efdL,0x60b1899dd894a9L, + 0x06e8818ded949aL,0x747fd853c39434L,0x0953b937d9efabL, + 0x09f08c0beeb901L } }, + /* 50 */ + { { 0x1d208a8f2d49ceL,0x54042c5be1445aL,0x1c2681fd943646L, + 0x219c8094e2e674L,0x442cddf07238b8L,0x574a051c590832L, + 0x0b72f4d61c818aL }, + { 0x7bc3cbe4680967L,0x0c8b3f25ae596bL,0x0445b0da74a9efL, + 0x0bbf46c40363b7L,0x1df575c50677a3L,0x016ea6e73d68adL, + 0x0b5207bd8db0fdL } }, + /* 51 */ + { { 0x2d39fdfea1103eL,0x2b252bf0362e34L,0x63d66c992baab9L, + 0x5ac97706de8550L,0x0cca390c39c1acL,0x0d9bec5f01b2eaL, + 0x369360a0f7e5f3L }, + { 0x6dd3461e201067L,0x70b2d3f63ed614L,0x487580487c54c7L, + 0x6020e48a44af2aL,0x1ccf80b21aab04L,0x3cf3b12d88d798L, + 0x349368eccc506fL } }, + /* 52 */ + { { 0x5a053753b0a354L,0x65e818dbb9b0aeL,0x7d5855ee50e4bfL, + 0x58dc06885c7467L,0x5ee15073e57bd3L,0x63254ebc1e07fdL, + 0x1d48e0392aa39bL }, + { 0x4e227c6558ffe9L,0x0c3033d8a82a3eL,0x7bde65c214e8d2L, + 0x6e23561559c16aL,0x5094c5e6deaffdL,0x78dca2880f1f91L, + 0x3d9d3f947d838dL } }, + /* 53 */ + { { 0x387ae5af63408fL,0x6d539aeb4e6edfL,0x7f3d3186368e70L, + 0x01a6446bc19989L,0x35288fbcd4482fL,0x39288d34ec2736L, + 0x1de9c47159ad76L }, + { 0x695dc7944f8d65L,0x3eca2c35575094L,0x0c918059a79b69L, + 0x4573a48c32a74eL,0x580d8bc8b93f52L,0x190be3a3d071eaL, + 0x2333e686b3a8cbL } }, + /* 54 */ + { { 0x2b110c7196fee2L,0x3ac70e99128a51L,0x20a6bb6b75d5e6L, + 0x5f447fa513149aL,0x560d69714cc7b2L,0x1d3ee25279fab1L, + 0x369adb2ccca959L }, + { 0x3fddb13dd821c2L,0x70bf21ba647be8L,0x64121227e3cbc9L, + 0x12633a4c892320L,0x3c15c61660f26dL,0x1932c3b3d19900L, + 0x18c718563eab71L } }, + /* 55 */ + { { 0x72ebe0fd752366L,0x681c2737d11759L,0x143c805e7ae4f0L, + 0x78ed3c2cc7b324L,0x5c16e14820254fL,0x226a4f1c4ec9f0L, + 0x2891bb915eaac6L }, + { 0x061eb453763b33L,0x07f88b81781a87L,0x72b5ac7a87127cL, + 0x7ea4e4cd7ff8b5L,0x5e8c3ce33908b6L,0x0bcb8a3d37feffL, + 0x01da9e8e7fc50bL } }, + /* 56 */ + { { 0x639dfe9e338d10L,0x32dfe856823608L,0x46a1d73bca3b9aL, + 0x2da685d4b0230eL,0x6e0bc1057b6d69L,0x7144ec724a5520L, + 0x0b067c26b87083L }, + { 0x0fc3f0eef4c43dL,0x63500f509552b7L,0x220d74af6f8b86L, + 0x038996eafa2aa9L,0x7f6750f4aee4d2L,0x3e1d3f06718720L, + 0x1ea1d37243814cL } }, + /* 57 */ + { { 0x322d4597c27050L,0x1beeb3ce17f109L,0x15e5ce2e6ef42eL, + 0x6c8be27da6b3a0L,0x66e3347f4d5f5cL,0x7172133899c279L, + 0x250aff4e548743L }, + { 0x28f0f6a43b566dL,0x0cd2437fefbca0L,0x5b1108cb36bdbaL, + 0x48a834d41fb7c2L,0x6cb8565680579fL,0x42da2412b45d9fL, + 0x33dfc1abb6c06eL } }, + /* 58 */ + { { 0x56e3c48ef96c80L,0x65667bb6c1381eL,0x09f70514375487L, + 0x1548ff115f4a08L,0x237de2d21a0710L,0x1425cdee9f43dfL, + 0x26a6a42e055b0aL }, + { 0x4ea9ea9dc7dfcbL,0x4df858583ac58aL,0x1d274f819f1d39L, + 0x26e9c56cf91fcbL,0x6cee31c7c3a465L,0x0bb8e00b108b28L, + 0x226158da117301L } }, + /* 59 */ + { { 0x5a7cd4fce73946L,0x7b6a462d0ac653L,0x732ea4bb1a3da5L, + 0x7c8e9f54711af4L,0x0a6cd55d4655f9L,0x341e6d13e4754aL, + 0x373c87098879a8L }, + { 0x7bc82e61b818bfL,0x5f2db48f44879fL,0x2a2f06833f1d28L, + 0x494e5b691a74c0L,0x17d6cf35fd6b57L,0x5f7028d1c25dfcL, + 0x377a9ab9562cb6L } }, + /* 60 */ + { { 0x4de8877e787b2eL,0x183e7352621a52L,0x2ab0509974962bL, + 0x045a450496cb8aL,0x3bf7118b5591c7L,0x7724f98d761c35L, + 0x301607e8d5a0c1L }, + { 0x0f58a3f24d4d58L,0x3771c19c464f3cL,0x06746f9c0bfafaL, + 0x56564c9c8feb52L,0x0d66d9a7d8a45fL,0x403578141193caL, + 0x00b0d0bdc19260L } }, + /* 61 */ + { { 0x571407157bdbc2L,0x138d5a1c2c0b99L,0x2ee4a8057dcbeaL, + 0x051ff2b58e9ed1L,0x067378ad9e7cdaL,0x7cc2c1db97a49eL, + 0x1e7536ccd849d6L }, + { 0x531fd95f3497c4L,0x55dc08325f61a7L,0x144e942bce32bfL, + 0x642d572f09e53aL,0x556ff188261678L,0x3e79c0d9d513d6L, + 0x0bbbc6656f6d52L } }, + /* 62 */ + { { 0x57d3eb50596edcL,0x26c520a487451dL,0x0a92db40aea8d6L, + 0x27df6345109616L,0x7733d611fd727cL,0x61d14171fef709L, + 0x36169ae417c36bL }, + { 0x6899f5d4091cf7L,0x56ce5dfe4ed0c1L,0x2c430ce5913fbcL, + 0x1b13547e0f8caeL,0x4840a8275d3699L,0x59b8ef209e81adL, + 0x22362dff5ea1a2L } }, + /* 63 */ + { { 0x7237237bd98425L,0x73258e162a9d0bL,0x0a59a1e8bb5118L, + 0x4190a7ee5d8077L,0x13684905fdbf7cL,0x31c4033a52626bL, + 0x010a30e4fbd448L }, + { 0x47623f981e909aL,0x670af7c325b481L,0x3d004241fa4944L, + 0x0905a2ca47f240L,0x58f3cdd7a187c3L,0x78b93aee05b43fL, + 0x19b91d4ef8d63bL } }, + /* 64 */ + { { 0x0d34e116973cf4L,0x4116fc9e69ee0eL,0x657ae2b4a482bbL, + 0x3522eed134d7cdL,0x741e0dde0a036aL,0x6554316a51cc7bL, + 0x00f31c6ca89837L }, + { 0x26770aa06b1dd7L,0x38233a4ceba649L,0x065a1110c96feaL, + 0x18d367839e0f15L,0x794543660558d1L,0x39b605139065dcL, + 0x29abbec071b637L } }, + /* 65 */ + { { 0x1464b401ab5245L,0x16db891b27ff74L,0x724eb49cb26e34L, + 0x74fee3bc9cc33eL,0x6a8bdbebe085eaL,0x5c2e75ca207129L, + 0x1d03f2268e6b08L }, + { 0x28b0a328e23b23L,0x645dc26209a0bcL,0x62c28990348d49L, + 0x4dd9be1fa333d0L,0x6183aac74a72e4L,0x1d6f3ee69e1d03L, + 0x2fff96db0ff670L } }, + /* 66 */ + { { 0x2358f5c6a2123fL,0x5b2bfc51bedb63L,0x4fc6674be649ecL, + 0x51fc16e44b813aL,0x2ffe10a73754c1L,0x69a0c7a053aeefL, + 0x150e605fb6b9b4L }, + { 0x179eef6b8b83c4L,0x64293b28ad05efL,0x331795fab98572L, + 0x09823eec78727dL,0x36508042b89b81L,0x65f1106adb927eL, + 0x2fc0234617f47cL } }, + /* 67 */ + { { 0x12aa244e8068dbL,0x0c834ae5348f00L,0x310fc1a4771cb3L, + 0x6c90a2f9e19ef9L,0x77946fa0573471L,0x37f5df81e5f72fL, + 0x204f5d72cbe048L }, + { 0x613c724383bba6L,0x1ce14844967e0aL,0x797c85e69aa493L, + 0x4fb15b0f2ce765L,0x5807978e2e8aa7L,0x52c75859876a75L, + 0x1554635c763d3eL } }, + /* 68 */ + { { 0x4f292200623f3bL,0x6222be53d7fe07L,0x1e02a9a08c2571L, + 0x22c6058216b912L,0x1ec20044c7ba17L,0x53f94c5efde12bL, + 0x102b8aadfe32a4L }, + { 0x45377aa927b102L,0x0d41b8062ee371L,0x77085a9018e62aL, + 0x0c69980024847cL,0x14739b423a73a9L,0x52ec6961fe3c17L, + 0x38a779c94b5a7dL } }, + /* 69 */ + { { 0x4d14008435af04L,0x363bfd8325b4e8L,0x48cdb715097c95L, + 0x1b534540f8bee0L,0x4ca1e5c90c2a76L,0x4b52c193d6eee0L, + 0x277a33c79becf5L }, + { 0x0fee0d511d3d06L,0x4627f3d6a58f8cL,0x7c81ac245119b8L, + 0x0c8d526ba1e07aL,0x3dbc242f55bac2L,0x2399df8f91fffdL, + 0x353e982079ba3bL } }, + /* 70 */ + { { 0x6405d3b0ab9645L,0x7f31abe3ee236bL,0x456170a9babbb1L, + 0x09634a2456a118L,0x5b1c6045acb9e5L,0x2c75c20d89d521L, + 0x2e27ccf5626399L }, + { 0x307cd97fed2ce4L,0x1c2fbb02b64087L,0x542a068d27e64dL, + 0x148c030b3bc6a6L,0x671129e616ade5L,0x123f40db60dafcL, + 0x07688f3c621220L } }, + /* 71 */ + { { 0x1c46b342f2c4b5L,0x27decc0b3c8f04L,0x0d9bd433464c54L, + 0x1f3d893b818572L,0x2536043b536c94L,0x57e00c4b19ebf9L, + 0x3938fb9e5ad55eL }, + { 0x6b390024c8b22fL,0x4583f97e20a976L,0x2559d24abcbad7L, + 0x67a9cabc9bd8c6L,0x73a56f09432e4aL,0x79eb0beb53a3b7L, + 0x3e19d47f6f8221L } }, + /* 72 */ + { { 0x7399cb9d10e0b2L,0x32acc1b8a36e2aL,0x287d60c2407035L, + 0x42c82420ea4b5cL,0x13f286658bc268L,0x3c91181156e064L, + 0x234b83dcdeb963L }, + { 0x79bc95486cfee6L,0x4d8fd3cb78af36L,0x07362ba5e80da8L, + 0x79d024a0d681b0L,0x6b58406907f87fL,0x4b40f1e977e58fL, + 0x38dcc6fd5fa342L } }, + /* 73 */ + { { 0x72282be1cd0abeL,0x02bd0fdfdf44e5L,0x19b0e0d2f753e4L, + 0x4514e76ce8c4c0L,0x02ebc9c8cdcc1bL,0x6ac0c0373e9fddL, + 0x0dc414af1c81a9L }, + { 0x7a109246f32562L,0x26982e6a3768edL,0x5ecd8daed76ab5L, + 0x2eaa70061eb261L,0x09e7c038a8c514L,0x2a2603cc300658L, + 0x25d93ab9e55cd4L } }, + /* 74 */ + { { 0x11b19fcbd5256aL,0x41e4d94274770fL,0x0133c1a411001fL, + 0x360bac481dbca3L,0x45908b18a9c22bL,0x1e34396fafb03aL, + 0x1b84fea7486edaL }, + { 0x183c62a71e6e16L,0x5f1dc30e93da8eL,0x6cb97b502573c3L, + 0x3708bf0964e3fcL,0x35a7f042eeacceL,0x56370da902c27fL, + 0x3a873c3b72797fL } }, + /* 75 */ + { { 0x6573c9cea4cc9bL,0x2c3b5f9d91e6dcL,0x2a90e2dbd9505eL, + 0x66a75444025f81L,0x1571fb894b03cdL,0x5d1a1f00fd26f3L, + 0x0d19a9fd618855L }, + { 0x659acd56515664L,0x7279478bd616a3L,0x09a909e76d56c3L, + 0x2fd70474250358L,0x3a1a25c850579cL,0x11b9e0f71b74ccL, + 0x1268daef3d1bffL } }, + /* 76 */ + { { 0x7f5acc46d93106L,0x5bc15512f939c8L,0x504b5f92f996deL, + 0x25965549be7a64L,0x357a3a2ae9b80dL,0x3f2bcf9c139cc0L, + 0x0a7ddd99f23b35L }, + { 0x6868f5a8a0b1c5L,0x319ec52f15b1beL,0x0770000a849021L, + 0x7f4d50287bd608L,0x62c971d28a9d7fL,0x164e89309acb72L, + 0x2a29f002cf4a32L } }, + /* 77 */ + { { 0x58a852ae11a338L,0x27e3a35f2dcef8L,0x494d5731ce9e18L, + 0x49516f33f4bb3eL,0x386b26ba370097L,0x4e8fac1ec30248L, + 0x2ac26d4c44455dL }, + { 0x20484198eb9dd0L,0x75982a0e06512bL,0x152271b9279b05L, + 0x5908a9857e36d2L,0x6a933ab45a60abL,0x58d8b1acb24fafL, + 0x28fbcf19425590L } }, + /* 78 */ + { { 0x5420e9df010879L,0x4aba72aec2f313L,0x438e544eda7494L, + 0x2e8e189ce6f7eaL,0x2f771e4efe45bdL,0x0d780293bce7efL, + 0x1569ad3d0d02acL }, + { 0x325251ebeaf771L,0x02510f1a8511e2L,0x3863816bf8aad1L, + 0x60fdb15fe6ac19L,0x4792aef52a348cL,0x38e57a104e9838L, + 0x0d171611a1df1bL } }, + /* 79 */ + { { 0x15ceb0bea65e90L,0x6e56482db339bcL,0x37f618f7b0261fL, + 0x6351abc226dabcL,0x0e999f617b74baL,0x37d3cc57af5b69L, + 0x21df2b987aac68L }, + { 0x2dddaa3a358610L,0x2da264bc560e47L,0x545615d538bf13L, + 0x1c95ac244b8cc7L,0x77de1f741852cbL,0x75d324f00996abL, + 0x3a79b13b46aa3bL } }, + /* 80 */ + { { 0x7db63998683186L,0x6849bb989d530cL,0x7b53c39ef7ed73L, + 0x53bcfbf664d3ffL,0x25ef27c57f71c7L,0x50120ee80f3ad6L, + 0x243aba40ed0205L }, + { 0x2aae5e0ee1fcebL,0x3449d0d8343fbeL,0x5b2864fb7cffc7L, + 0x64dceb5407ac3eL,0x20303a5695523dL,0x3def70812010b2L, + 0x07be937f2e9b6fL } }, + /* 81 */ + { { 0x5838f9e0540015L,0x728d8720efb9f7L,0x1ab5864490b0c8L, + 0x6531754458fdcfL,0x600ff9612440c0L,0x48735b36a585b7L, + 0x3d4aaea86b865dL }, + { 0x6898942cac32adL,0x3c84c5531f23a1L,0x3c9dbd572f7edeL, + 0x5691f0932a2976L,0x186f0db1ac0d27L,0x4fbed18bed5bc9L, + 0x0e26b0dee0b38cL } }, + /* 82 */ + { { 0x1188b4f8e60f5bL,0x602a915455b4a2L,0x60e06af289ff99L, + 0x579fe4bed999e5L,0x2bc03b15e6d9ddL,0x1689649edd66d5L, + 0x3165e277dca9d2L }, + { 0x7cb8a529cf5279L,0x57f8035b34d84dL,0x352e2eb26de8f1L, + 0x6406820c3367c4L,0x5d148f4c899899L,0x483e1408482e15L, + 0x1680bd1e517606L } }, + /* 83 */ + { { 0x5c877cc1c90202L,0x2881f158eae1f4L,0x6f45e207df4267L, + 0x59280eba1452d8L,0x4465b61e267db5L,0x171f1137e09e5cL, + 0x1368eb821daa93L }, + { 0x70fe26e3e66861L,0x52a6663170da7dL,0x71d1ce5b7d79dcL, + 0x1cffe9be1e1afdL,0x703745115a29c4L,0x73b7f897b2f65aL, + 0x02218c3a95891aL } }, + /* 84 */ + { { 0x16866db8a9e8c9L,0x4770b770123d9bL,0x4c116cf34a8465L, + 0x079b28263fc86aL,0x3751c755a72b58L,0x7bc8df1673243aL, + 0x12fff72454f064L }, + { 0x15c049b89554e7L,0x4ea9ef44d7cd9aL,0x42f50765c0d4f1L, + 0x158bb603cb011bL,0x0809dde16470b1L,0x63cad7422ea819L, + 0x38b6cd70f90d7eL } }, + /* 85 */ + { { 0x1e4aab6328e33fL,0x70575f026da3aeL,0x7e1b55c8c55219L, + 0x328d4b403d24caL,0x03b6df1f0a5bd1L,0x26b4bb8b648ed0L, + 0x17161f2f10b76aL }, + { 0x6cdb32bae8b4c0L,0x33176266227056L,0x4975fa58519b45L, + 0x254602ea511d96L,0x4e82e93e402a67L,0x0ca8b5929cdb4fL, + 0x3ae7e0a07918f5L } }, + /* 86 */ + { { 0x60f9d1fecf5b9bL,0x6257e40d2cd469L,0x6c7aa814d28456L, + 0x58aac7caac8e79L,0x703a55f0293cbfL,0x702390a0f48378L, + 0x24b9ae07218b07L }, + { 0x1ebc66cdaf24e3L,0x7d9ae5f9f8e199L,0x42055ee921a245L, + 0x035595936e4d49L,0x129c45d425c08bL,0x6486c5f19ce6ddL, + 0x027dbd5f18ba24L } }, + /* 87 */ + { { 0x7d6b78d29375fbL,0x0a3dc6ba22ae38L,0x35090fa91feaf6L, + 0x7f18587fb7b16eL,0x6e7091dd924608L,0x54e102cdbf5ff8L, + 0x31b131a4c22079L }, + { 0x368f87d6a53fb0L,0x1d3f3d69a3f240L,0x36bf5f9e40e1c6L, + 0x17f150e01f8456L,0x76e5d0835eb447L,0x662fc0a1207100L, + 0x14e3dd97a98e39L } }, + /* 88 */ + { { 0x0249d9c2663b4bL,0x56b68f9a71ba1cL,0x74b119567f9c02L, + 0x5e6f336d8c92acL,0x2ced58f9f74a84L,0x4b75a2c2a467c5L, + 0x30557011cf740eL }, + { 0x6a87993be454ebL,0x29b7076fb99a68L,0x62ae74aaf99bbaL, + 0x399f9aa8fb6c1bL,0x553c24a396dd27L,0x2868337a815ea6L, + 0x343ab6635cc776L } }, + /* 89 */ + { { 0x0e0b0eec142408L,0x79728229662121L,0x605d0ac75e6250L, + 0x49a097a01edfbeL,0x1e20cd270df6b6L,0x7438a0ca9291edL, + 0x29daa430da5f90L }, + { 0x7a33844624825aL,0x181715986985c1L,0x53a6853cae0b92L, + 0x6d98401bd925e8L,0x5a0a34f5dd5e24L,0x7b818ef53cf265L, + 0x0836e43c9d3194L } }, + /* 90 */ + { { 0x1179b70e6c5fd9L,0x0246d9305dd44cL,0x635255edfbe2fbL, + 0x5397b3523b4199L,0x59350cc47e6640L,0x2b57aa97ed4375L, + 0x37efd31abd153aL }, + { 0x7a7afa6907f4faL,0x75c10cb94e6a7eL,0x60a925ab69cc47L, + 0x2ff5bcd9239bd5L,0x13c2113e425f11L,0x56bd3d2f8a1437L, + 0x2c9adbab13774fL } }, + /* 91 */ + { { 0x4ab9f52a2e5f2bL,0x5e537e70b58903L,0x0f242658ebe4f2L, + 0x2648a1e7a5f9aeL,0x1b4c5081e73007L,0x6827d4aff51850L, + 0x3925e41726cd01L }, + { 0x56dd8a55ab3cfbL,0x72d6a31b6d5beaL,0x697bd2e5575112L, + 0x66935519a7aa12L,0x55e97dda7a3aceL,0x0e16afb4237b4cL, + 0x00b68fbff08093L } }, + /* 92 */ + { { 0x4b00366481d0d9L,0x37cb031fbfc5c4L,0x14643f6800dd03L, + 0x6793fef60fe0faL,0x4f43e329c92803L,0x1fce86b96a6d26L, + 0x0ad416975e213aL }, + { 0x7cc6a6711adcc9L,0x64b8a63c43c2d9L,0x1e6caa2a67c0d0L, + 0x610deffd17a54bL,0x57d669d5f38423L,0x77364b8f022636L, + 0x36d4d13602e024L } }, + /* 93 */ + { { 0x72e667ae50a2f5L,0x1b15c950c3a21aL,0x3ccc37c72e6dfeL, + 0x027f7e1d094fb8L,0x43ae1e90aa5d7eL,0x3f5feac3d97ce5L, + 0x0363ed0a336e55L }, + { 0x235f73d7663784L,0x5d8cfc588ad5a4L,0x10ab6ff333016eL, + 0x7d8886af2e1497L,0x549f34fd17988eL,0x3fc4fcaee69a33L, + 0x0622b133a13d9eL } }, + /* 94 */ + { { 0x6344cfa796c53eL,0x0e9a10d00136fdL,0x5d1d284a56efd8L, + 0x608b1968f8aca7L,0x2fa5a66776edcaL,0x13430c44f1609cL, + 0x1499973cb2152aL }, + { 0x3764648104ab58L,0x3226e409fadafcL,0x1513a8466459ddL, + 0x649206ec365035L,0x46149aa3f765b1L,0x3aebf0a035248eL, + 0x1ee60b8c373494L } }, + /* 95 */ + { { 0x4e9efcc15f3060L,0x5e5d50fd77cdc8L,0x071e5403516b58L, + 0x1b7d4e89b24ceaL,0x53b1fa66d6dc03L,0x457f15f892ab5fL, + 0x076332c9397260L }, + { 0x31422b79d7584bL,0x0b01d47e41ba80L,0x3e5611a3171528L, + 0x5f53b9a9fc1be4L,0x7e2fc3d82f110fL,0x006cf350ef0fbfL, + 0x123ae98ec81c12L } }, + /* 96 */ + { { 0x310d41df46e2f6L,0x2ff032a286cf13L,0x64751a721c4eadL, + 0x7b62bcc0339b95L,0x49acf0c195afa4L,0x359d48742544e5L, + 0x276b7632d9e2afL }, + { 0x656c6be182579aL,0x75b65a4d85b199L,0x04a911d1721bfaL, + 0x46e023d0e33477L,0x1ec2d580acd869L,0x540b456f398a37L, + 0x001f698210153dL } }, + /* 97 */ + { { 0x3ca35217b00dd0L,0x73961d034f4d3cL,0x4f520b61c4119dL, + 0x4919fde5cccff7L,0x4d0e0e6f38134dL,0x55c22586003e91L, + 0x24d39d5d8f1b19L }, + { 0x4d4fc3d73234dcL,0x40c50c9d5f8368L,0x149afbc86bf2b8L, + 0x1dbafefc21d7f1L,0x42e6b61355107fL,0x6e506cf4b54f29L, + 0x0f498a6c615228L } }, + /* 98 */ + { { 0x30618f437cfaf8L,0x059640658532c4L,0x1c8a4d90e96e1dL, + 0x4a327bcca4fb92L,0x54143b8040f1a0L,0x4ec0928c5a49e4L, + 0x2af5ad488d9b1fL }, + { 0x1b392bd5338f55L,0x539c0292b41823L,0x1fe35d4df86a02L, + 0x5fa5bb17988c65L,0x02b6cb715adc26L,0x09a48a0c2cb509L, + 0x365635f1a5a9f2L } }, + /* 99 */ + { { 0x58aa87bdc21f31L,0x156900c7cb1935L,0x0ec1f75ee2b6cfL, + 0x5f3e35a77ec314L,0x582dec7b9b7621L,0x3e65deb0e8202aL, + 0x325c314b8a66b7L }, + { 0x702e2a22f24d66L,0x3a20e9982014f1L,0x6424c5b86bbfb0L, + 0x424eea4d795351L,0x7fc4cce7c22055L,0x581383fceb92d7L, + 0x32b663f49ee81bL } }, + /* 100 */ + { { 0x76e2d0b648b73eL,0x59ca39fa50bddaL,0x18bb44f786a7e4L, + 0x28c8d49d464360L,0x1b8bf1d3a574eaL,0x7c670b9bf1635aL, + 0x2efb30a291f4b3L }, + { 0x5326c069cec548L,0x03bbe481416531L,0x08a415c8d93d6fL, + 0x3414a52120d383L,0x1f17a0fc6e9c5cL,0x0de9a090717463L, + 0x22d84b3c67ff07L } }, + /* 101 */ + { { 0x30b5014c3830ebL,0x70791dc1a18b37L,0x09e6ea4e24f423L, + 0x65e148a5253132L,0x446f05d5d40449L,0x7ad5d3d707c0e9L, + 0x18eedd63dd3ab5L }, + { 0x40d2eac6bb29e0L,0x5b0e9605e83c38L,0x554f2c666a56a8L, + 0x0ac27b6c94c48bL,0x1aaecdd91bafe5L,0x73c6e2bdf72634L, + 0x306dab96d19e03L } }, + /* 102 */ + { { 0x6d3e4b42772f41L,0x1aba7796f3a39bL,0x3a03fbb980e9c0L, + 0x2f2ea5da2186a8L,0x358ff444ef1fcfL,0x0798cc0329fcdcL, + 0x39a28bcc9aa46dL }, + { 0x42775c977fe4d2L,0x5eb8fc5483d6b0L,0x0bfe37c039e3f7L, + 0x429292eaf9df60L,0x188bdf4b840cd5L,0x06e10e090749cdL, + 0x0e52678e73192eL } }, + /* 103 */ + { { 0x05de80b08df5feL,0x2af8c77406c5f8L,0x53573c50a0304aL, + 0x277b10b751bca0L,0x65cf8c559132a5L,0x4c667abe25f73cL, + 0x0271809e05a575L }, + { 0x41ced461f7a2fbL,0x0889a9ebdd7075L,0x320c63f2b7760eL, + 0x4f8d4324151c63L,0x5af47315be2e5eL,0x73c62f6aee2885L, + 0x206d6412a56a97L } }, + /* 104 */ + { { 0x6b1c508b21d232L,0x3781185974ead6L,0x1aba7c3ebe1fcfL, + 0x5bdc03cd3f3a5aL,0x74a25036a0985bL,0x5929e30b7211b2L, + 0x16a9f3bc366bd7L }, + { 0x566a7057dcfffcL,0x23b5708a644bc0L,0x348cda2aa5ba8cL, + 0x466aa96b9750d4L,0x6a435ed9b20834L,0x2e7730f2cf9901L, + 0x2b5cd71d5b0410L } }, + /* 105 */ + { { 0x285ab3cee76ef4L,0x68895e3a57275dL,0x6fab2e48fd1265L, + 0x0f1de060428c94L,0x668a2b080b5905L,0x1b589dc3b0cb37L, + 0x3c037886592c9bL }, + { 0x7fb5c0f2e90d4dL,0x334eefb3d8c91aL,0x75747124700388L, + 0x547a2c2e2737f5L,0x2af9c080e37541L,0x0a295370d9091aL, + 0x0bb5c36dad99e6L } }, + /* 106 */ + { { 0x644116586f25cbL,0x0c3f41f9ee1f5dL,0x00628d43a3dedaL, + 0x16e1437aae9669L,0x6aba7861bf3e59L,0x60735631ff4c44L, + 0x345609efaa615eL }, + { 0x41f54792e6acefL,0x4791583f75864dL,0x37f2ff5c7508b1L, + 0x1288912516c3b0L,0x51a2135f6a539bL,0x3b775511f42091L, + 0x127c6afa7afe66L } }, + /* 107 */ + { { 0x79f4f4f7492b73L,0x583d967256342dL,0x51a729bff33ca3L, + 0x3977d2c22d8986L,0x066f528ba8d40bL,0x5d759d30f8eb94L, + 0x0f8e649192b408L }, + { 0x22d84e752555bbL,0x76953855c728c7L,0x3b2254e72aaaa4L, + 0x508cd4ce6c0212L,0x726296d6b5a6daL,0x7a77aa066986f3L, + 0x2267a497bbcf31L } }, + /* 108 */ + { { 0x7f3651bf825dc4L,0x3988817388c56fL,0x257313ed6c3dd0L, + 0x3feab7f3b8ffadL,0x6c0d3cb9e9c9b4L,0x1317be0a7b6ac4L, + 0x2a5f399d7df850L }, + { 0x2fe5a36c934f5eL,0x429199df88ded1L,0x435ea21619b357L, + 0x6aac6a063bac2bL,0x600c149978f5edL,0x76543aa1114c95L, + 0x163ca9c83c7596L } }, + /* 109 */ + { { 0x7dda4a3e4daedbL,0x1824cba360a4cdL,0x09312efd70e0c6L, + 0x454e68a146c885L,0x40aee762fe5c47L,0x29811cbd755a59L, + 0x34b37c95f28319L }, + { 0x77c58b08b717d2L,0x309470d9a0f491L,0x1ab9f40448e01cL, + 0x21c8bd819207b1L,0x6a01803e9361bcL,0x6e5e4c350ec415L, + 0x14fd55a91f8798L } }, + /* 110 */ + { { 0x4cee562f512a90L,0x0008361d53e390L,0x3789b307a892cfL, + 0x064f7be8770ae9L,0x41435d848762cfL,0x662204dd38baa6L, + 0x23d6dcf73f6c5aL }, + { 0x69bef2d2c75d95L,0x2b037c0c9bb43eL,0x495fb4d79a34cfL, + 0x184e140c601260L,0x60193f8d435f9cL,0x283fa52a0c3ad2L, + 0x1998635e3a7925L } }, + /* 111 */ + { { 0x1cfd458ce382deL,0x0dddbd201bbcaeL,0x14d2ae8ed45d60L, + 0x73d764ab0c24cbL,0x2a97fe899778adL,0x0dbd1e01eddfe9L, + 0x2ba5c72d4042c3L }, + { 0x27eebc3af788f1L,0x53ffc827fc5a30L,0x6d1d0726d35188L, + 0x4721275c50aa2aL,0x077125f02e690fL,0x6da8142405db5dL, + 0x126cef68992513L } }, + /* 112 */ + { { 0x3c6067035b2d69L,0x2a1ad7db2361acL,0x3debece6cad41cL, + 0x30095b30f9afc1L,0x25f50b9bd9c011L,0x79201b2f2c1da1L, + 0x3b5c151449c5bdL }, + { 0x76eff4127abdb4L,0x2d31e03ce0382aL,0x24ff21f8bda143L, + 0x0671f244fd3ebaL,0x0c1c00b6bcc6fbL,0x18de9f7c3ebefbL, + 0x33dd48c3809c67L } }, + /* 113 */ + { { 0x61d6c2722d94edL,0x7e426e31041cceL,0x4097439f1b47b0L, + 0x579e798b2d205bL,0x6a430d67f830ebL,0x0d2c676700f727L, + 0x05fea83a82f25bL }, + { 0x3f3482df866b98L,0x3dd353b6a5a9cdL,0x77fe6ae1a48170L, + 0x2f75cc2a8f7cddL,0x7442a3863dad17L,0x643de42d877a79L, + 0x0fec8a38fe7238L } }, + /* 114 */ + { { 0x79b70c0760ac07L,0x195d3af37e9b29L,0x1317ff20f7cf27L, + 0x624e1c739e7504L,0x67330ef50f943dL,0x775e8cf455d793L, + 0x17b94d2d913a9fL }, + { 0x4b627203609e7fL,0x06aac5fb93e041L,0x603c515fdc2611L, + 0x2592ca0d7ae472L,0x02395d1f50a6cbL,0x466ef9648f85d9L, + 0x297cf879768f72L } }, + /* 115 */ + { { 0x3489d67d85fa94L,0x0a6e5b739c8e04L,0x7ebb5eab442e90L, + 0x52665a007efbd0L,0x0967ca57b0d739L,0x24891f9d932b63L, + 0x3cc2d6dbadc9d3L }, + { 0x4b4773c81c5338L,0x73cd47dad7a0f9L,0x7c755bab6ae158L, + 0x50b03d6becefcaL,0x574d6e256d57f0L,0x188db4fffb92aeL, + 0x197e10118071eaL } }, + /* 116 */ + { { 0x45d0cbcba1e7f1L,0x1180056abec91aL,0x6c5f86624bbc28L, + 0x442c83f3b8e518L,0x4e16ae1843ecb4L,0x670cef2fd786c9L, + 0x205b4acb637d2cL }, + { 0x70b0e539aa8671L,0x67c982056bebd0L,0x645c831a5e7c36L, + 0x09e06951a14b32L,0x5dd610ad4c89e6L,0x41c35f20164831L, + 0x3821f29cb4cdb8L } }, + /* 117 */ + { { 0x2831ffaba10079L,0x70f6dac9ffe444L,0x1cfa32ccc03717L, + 0x01519fda22a3c8L,0x23215e815aaa27L,0x390671ad65cbf7L, + 0x03dd4d72de7d52L }, + { 0x1ecd972ee95923L,0x166f8da3813e8eL,0x33199bbd387a1aL, + 0x04525fe15e3dc7L,0x44d2ef54165898L,0x4b7e47d3dc47f7L, + 0x10d5c8db0b5d44L } }, + /* 118 */ + { { 0x176d95ba9cdb1bL,0x14025f04f23dfcL,0x49379332891687L, + 0x6625e5ccbb2a57L,0x7ac0abdbf9d0e5L,0x7aded4fbea15b2L, + 0x314844ac184d67L }, + { 0x6d9ce34f05eae3L,0x3805d2875856d2L,0x1c2122f85e40ebL, + 0x51cb9f2d483a9aL,0x367e91e20f1702L,0x573c3559838dfdL, + 0x0b282b0cb85af1L } }, + /* 119 */ + { { 0x6a12e4ef871eb5L,0x64bb517e14f5ffL,0x29e04d3aaa530bL, + 0x1b07d88268f261L,0x411be11ed16fb0L,0x1f480536db70bfL, + 0x17a7deadfd34e4L }, + { 0x76d72f30646612L,0x5a3bbb43a1b0a0L,0x5e1687440e82bfL, + 0x713b5e69481112L,0x46c3dcb499e174L,0x0862da3b4e2a24L, + 0x31cb55b4d62681L } }, + /* 120 */ + { { 0x5ffc74dae5bb45L,0x18944c37adb9beL,0x6aaa63b1ee641aL, + 0x090f4b6ee057d3L,0x4045cedd2ee00fL,0x21c2c798f7c282L, + 0x2c2c6ef38cd6bdL }, + { 0x40d78501a06293L,0x56f8caa5cc89a8L,0x7231d5f91b37aeL, + 0x655f1e5a465c6dL,0x3f59a81f9cf783L,0x09bbba04c23624L, + 0x0f71ee23bbacdeL } }, + /* 121 */ + { { 0x38d398c4741456L,0x5204c0654243c3L,0x34498c916ea77eL, + 0x12238c60e5fe43L,0x0fc54f411c7625L,0x30b2ca43aa80b6L, + 0x06bead1bb6ea92L }, + { 0x5902ba8674b4adL,0x075ab5b0fa254eL,0x58db83426521adL, + 0x5b66b6b3958e39L,0x2ce4e39890e07bL,0x46702513338b37L, + 0x363690c2ded4d7L } }, + /* 122 */ + { { 0x765642c6b75791L,0x0f4c4300d7f673L,0x404d8bbe101425L, + 0x61e91c88651f1bL,0x61ddc9bc60aed8L,0x0ef36910ce2e65L, + 0x04b44367aa63b8L }, + { 0x72822d3651b7dcL,0x4b750157a2716dL,0x091cb4f2118d16L, + 0x662ba93b101993L,0x447cbd54a1d40aL,0x12cdd48d674848L, + 0x16f10415cbec69L } }, + /* 123 */ + { { 0x0c57a3a751cd0eL,0x0833d7478fadceL,0x1e751f55686436L, + 0x489636c58e1df7L,0x26ad6da941266fL,0x22225d3559880fL, + 0x35b397c45ba0e2L }, + { 0x3ca97b70e1f2ceL,0x78e50427a8680cL,0x06137e042a8f91L, + 0x7ec40d2500b712L,0x3f0ad688ad7b0dL,0x24746fb33f9513L, + 0x3638fcce688f0bL } }, + /* 124 */ + { { 0x753163750bed6fL,0x786507cd16157bL,0x1d6ec228ce022aL, + 0x587255f42d1b31L,0x0c6adf72a3a0f6L,0x4bfeee2da33f5eL, + 0x08b7300814de6cL }, + { 0x00bf8df9a56e11L,0x75aead48fe42e8L,0x3de9bad911b2e2L, + 0x0fadb233e4b8bbL,0x5b054e8fd84f7dL,0x5eb3064152889bL, + 0x01c1c6e8c777a1L } }, + /* 125 */ + { { 0x5fa0e598f8fcb9L,0x11c129a1ae18dfL,0x5c41b482a2273bL, + 0x545664e5044c9cL,0x7e01c915bfb9abL,0x7f626e19296aa0L, + 0x20c91a9822a087L }, + { 0x273a9fbe3c378fL,0x0f126b44b7d350L,0x493764a75df951L, + 0x32dec3c367d24bL,0x1a7ae987fed9d3L,0x58a93055928b85L, + 0x11626975d7775fL } }, + /* 126 */ + { { 0x2bb174a95540a9L,0x10de02c58b613fL,0x2fa8f7b861f3eeL, + 0x44731260bdf3b3L,0x19c38ff7da41feL,0x3535a16e3d7172L, + 0x21a948b83cc7feL }, + { 0x0e6f72868bc259L,0x0c70799df3c979L,0x526919955584c3L, + 0x4d95fda04f8fa2L,0x7bb228e6c0f091L,0x4f728b88d92194L, + 0x2b361c5a136bedL } }, + /* 127 */ + { { 0x0c72ca10c53841L,0x4036ab49f9da12L,0x578408d2b7082bL, + 0x2c4903201fbf5eL,0x14722b3f42a6a8L,0x1997b786181694L, + 0x25c6f10de32849L }, + { 0x79f46d517ff2ffL,0x2dc5d97528f6deL,0x518a494489aa72L, + 0x52748f8af3cf97L,0x472da30a96bb16L,0x1be228f92465a9L, + 0x196f0c47d60479L } }, + /* 128 */ + { { 0x47dd7d139b3239L,0x049c9b06775d0fL,0x627ffc00562d5eL, + 0x04f578d5e5e243L,0x43a788ffcef8b9L,0x7db320be9dde28L, + 0x00837528b8572fL }, + { 0x2969eca306d695L,0x195b72795ec194L,0x5e1fa9b8e77e50L, + 0x4c627f2b3fbfd5L,0x4b91e0d0ee10ffL,0x5698c8d0f35833L, + 0x12d3a9431f475eL } }, + /* 129 */ + { { 0x6409457a0db57eL,0x795b35192e0433L,0x146f973fe79805L, + 0x3d49c516dfb9cfL,0x50dfc3646b3cdaL,0x16a08a2210ad06L, + 0x2b4ef5bcd5b826L }, + { 0x5ebabfee2e3e3eL,0x2e048e724d9726L,0x0a7a7ed6abef40L, + 0x71ff7f83e39ad8L,0x3405ac52a1b852L,0x2e3233357a608dL, + 0x38c1bf3b0e40e6L } }, + /* 130 */ + { { 0x59aec823e4712cL,0x6ed9878331ddadL,0x1cc6faf629f2a0L, + 0x445ff79f36c18cL,0x4edc7ed57aff3dL,0x22ee54c8bdd9e8L, + 0x35398f42d72ec5L }, + { 0x4e7a1cceee0ecfL,0x4c66a707dd1d31L,0x629ad157a23c04L, + 0x3b2c6031dc3c83L,0x3336acbcd3d96cL,0x26ce43adfce0f0L, + 0x3c869c98d699dcL } }, + /* 131 */ + { { 0x58b3cd9586ba11L,0x5d6514b8090033L,0x7c88c3bd736782L, + 0x1735f84f2130edL,0x47784095a9dee0L,0x76312c6e47901bL, + 0x1725f6ebc51455L }, + { 0x6744344bc4503eL,0x16630b4d66e12fL,0x7b3481752c3ec7L, + 0x47bb2ed1f46f95L,0x08a1a497dd1bcfL,0x1f525df2b8ed93L, + 0x0fe492ea993713L } }, + /* 132 */ + { { 0x71b8dd7268b448L,0x1743dfaf3728d7L,0x23938d547f530aL, + 0x648c3d497d0fc6L,0x26c0d769e3ad45L,0x4d25108769a806L, + 0x3fbf2025143575L }, + { 0x485bfd90339366L,0x2de2b99ed87461L,0x24a33347713badL, + 0x1674bc7073958aL,0x5bb2373ee85b5fL,0x57f9bd657e662cL, + 0x2041b248d39042L } }, + /* 133 */ + { { 0x5f01617d02f4eeL,0x2a8e31c4244b91L,0x2dab3e790229e0L, + 0x72d319ea7544afL,0x01ffb8b000cb56L,0x065e63b0daafd3L, + 0x3d7200a7111d6fL }, + { 0x4561ce1b568973L,0x37034c532dd8ecL,0x1368215020be02L, + 0x30e7184cf289ebL,0x199e0c27d815deL,0x7ee1b4dff324e5L, + 0x2f4a11de7fab5cL } }, + /* 134 */ + { { 0x33c2f99b1cdf2bL,0x1e0d78bf42a2c0L,0x64485dececaa67L, + 0x2242a41be93e92L,0x62297b1f15273cL,0x16ebfaafb02205L, + 0x0f50f805f1fdabL }, + { 0x28bb0b3a70eb28L,0x5b1c7d0160d683L,0x05c30a37959f78L, + 0x3d9301184922d2L,0x46c1ead7dbcb1aL,0x03ee161146a597L, + 0x2d413ed9a6ccc1L } }, + /* 135 */ + { { 0x685ab5f97a27c2L,0x59178214023751L,0x4ffef3c585ab17L, + 0x2bc85302aba2a9L,0x675b001780e856L,0x103c8a37f0b33dL, + 0x2241e98ece70a6L }, + { 0x546738260189edL,0x086c8f7a6b96edL,0x00832ad878a129L, + 0x0b679056ba7462L,0x020ce6264bf8c4L,0x3f9f4b4d92abfbL, + 0x3e9c55343c92edL } }, + /* 136 */ + { { 0x482cec9b3f5034L,0x08b59b3cd1fa30L,0x5a55d1bc8e58b5L, + 0x464a5259337d8eL,0x0a5b6c66ade5a5L,0x55db77b504ddadL, + 0x015992935eac35L }, + { 0x54fe51025e32fcL,0x5d7f52dbe4a579L,0x08c564a8c58696L, + 0x4482a8bec4503fL,0x440e75d9d94de9L,0x6992d768020bfaL, + 0x06c311e8ba01f6L } }, + /* 137 */ + { { 0x2a6ac808223878L,0x04d3ccb4aab0b8L,0x6e6ef09ff6e823L, + 0x15cb03ee9158dcL,0x0dc58919171bf7L,0x3273568abf3cb1L, + 0x1b55245b88d98bL }, + { 0x28e9383b1de0c1L,0x30d5009e4f1f1bL,0x334d185a56a134L, + 0x0875865dfa4c46L,0x266edf5eae3beeL,0x2e03ff16d1f7e5L, + 0x29a36bd9f0c16dL } }, + /* 138 */ + { { 0x004cff44b2e045L,0x426c96380ba982L,0x422292281e46d7L, + 0x508dd8d29d7204L,0x3a4ea73fb2995eL,0x4be64090ae07b2L, + 0x3339177a0eff22L }, + { 0x74a97ec2b3106eL,0x0c616d09169f5fL,0x1bb5d8907241a7L, + 0x661fb67f6d41bdL,0x018a88a0daf136L,0x746333a093a7b4L, + 0x3e19f1ac76424eL } }, + /* 139 */ + { { 0x542a5656527296L,0x0e7b9ce22f1bc9L,0x31b0945992b89bL, + 0x6e0570eb85056dL,0x32daf813483ae5L,0x69eeae9d59bb55L, + 0x315ad4b730b557L }, + { 0x2bc16795f32923L,0x6b02b7ba55130eL,0x1e9da67c012f85L, + 0x5616f014dabf8fL,0x777395fcd9c723L,0x2ff075e7743246L, + 0x2993538aff142eL } }, + /* 140 */ + { { 0x72dae20e552b40L,0x2e4ba69aa5d042L,0x001e563e618bd2L, + 0x28feeba3c98772L,0x648c356da2a907L,0x687e2325069ea7L, + 0x0d34ab09a394f0L }, + { 0x73c21813111286L,0x5829b53b304e20L,0x6fba574de08076L, + 0x79f7058f61614eL,0x4e71c9316f1191L,0x24ef12193e0a89L, + 0x35dc4e2bc9d848L } }, + /* 141 */ + { { 0x045e6d3b4ad1cdL,0x729c95493782f0L,0x77f59de85b361aL, + 0x5309b4babf28f8L,0x4d893d9290935fL,0x736f47f2b2669eL, + 0x23270922d757f3L }, + { 0x23a4826f70d4e9L,0x68a8c63215d33eL,0x4d6c2069205c9cL, + 0x46b2938a5eebe0L,0x41d1f1e2de3892L,0x5ca1775544bcb0L, + 0x3130629e5d19dcL } }, + /* 142 */ + { { 0x6e2681593375acL,0x117cfbabc22621L,0x6c903cd4e13ccaL, + 0x6f358f14d4bd97L,0x1bc58fa11089f1L,0x36aa2db4ac426aL, + 0x15ced8464b7ea1L }, + { 0x6966836cba7df5L,0x7c2b1851568113L,0x22b50ff2ffca66L, + 0x50e77d9f48e49aL,0x32775e9bbc7cc9L,0x403915bb0ece71L, + 0x1b8ec7cb9dd7aaL } }, + /* 143 */ + { { 0x65a888b677788bL,0x51887fac2e7806L,0x06792636f98d2bL, + 0x47bbcd59824c3bL,0x1aca908c43e6dcL,0x2e00d15c708981L, + 0x08e031c2c80634L }, + { 0x77fbc3a297c5ecL,0x10a7948af2919eL,0x10cdafb1fb6b2fL, + 0x27762309b486f0L,0x13abf26bbac641L,0x53da38478fc3eeL, + 0x3c22eff379bf55L } }, + /* 144 */ + { { 0x0163f484770ee3L,0x7f28e8942e0cbfL,0x5f86cb51b43831L, + 0x00feccd4e4782fL,0x40e5b417eafe7dL,0x79e5742bbea228L, + 0x3717154aa469beL }, + { 0x271d74a270f721L,0x40eb400890b70cL,0x0e37be81d4cb02L, + 0x786907f4e8d43fL,0x5a1f5b590a7acbL,0x048861883851fdL, + 0x11534a1e563dbbL } }, + /* 145 */ + { { 0x37a6357c525435L,0x6afe6f897b78a5L,0x7b7ff311d4f67bL, + 0x38879df15dc9f4L,0x727def7b8ba987L,0x20285dd0db4436L, + 0x156b0fc64b9243L }, + { 0x7e3a6ec0c1c390L,0x668a88d9bcf690L,0x5925aba5440dbeL, + 0x0f6891a044f593L,0x70b46edfed4d97L,0x1a6cc361bab201L, + 0x046f5bc6e160bcL } }, + /* 146 */ + { { 0x79350f076bc9d1L,0x077d9e79a586b9L,0x0896bc0c705764L, + 0x58e632b90e7e46L,0x14e87e0ad32488L,0x4b1bb3f72c6e00L, + 0x3c3ce9684a5fc5L }, + { 0x108fbaf1f703aaL,0x08405ecec17577L,0x199a8e2d44be73L, + 0x2eb22ed0067763L,0x633944deda3300L,0x20d739eb8e5efbL, + 0x2bbbd94086b532L } }, + /* 147 */ + { { 0x03c8b17a19045dL,0x6205a0a504980bL,0x67fdb3e962b9f0L, + 0x16399e01511a4bL,0x44b09fe9dffc96L,0x00a74ff44a1381L, + 0x14590deed3f886L }, + { 0x54e3d5c2a23ddbL,0x310e5138209d28L,0x613f45490c1c9bL, + 0x6bbc85d44bbec8L,0x2f85fc559e73f6L,0x0d71fa7d0fa8cbL, + 0x2898571d17fbb9L } }, + /* 148 */ + { { 0x5607a84335167dL,0x3009c1eb910f91L,0x7ce63447e62d0bL, + 0x03a0633afcf89eL,0x1234b5aaa50872L,0x5a307b534d547bL, + 0x2f4e97138a952eL }, + { 0x13914c2db0f658L,0x6cdcb47e6e75baL,0x5549169caca772L, + 0x0f20423dfeb16fL,0x6b1ae19d180239L,0x0b7b3bee9b7626L, + 0x1ca81adacfe4efL } }, + /* 149 */ + { { 0x219ec3ad19d96fL,0x3549f6548132dbL,0x699889c7aacd0bL, + 0x74602a58730b19L,0x62dc63bcece81cL,0x316f991c0c317aL, + 0x2b8627867b95e3L }, + { 0x67a25ddced1eedL,0x7e14f0eba756e7L,0x0873fbc09b0495L, + 0x0fefb0e16596adL,0x03e6cd98ef39bbL,0x1179b1cded249dL, + 0x35c79c1db1edc2L } }, + /* 150 */ + { { 0x1368309d4245bfL,0x442e55852a7667L,0x095b0f0f348b65L, + 0x6834cf459dfad4L,0x6645950c9be910L,0x06bd81288c71e6L, + 0x1b015b6e944edfL }, + { 0x7a6a83045ab0e3L,0x6afe88b9252ad0L,0x2285bd65523502L, + 0x6c78543879a282L,0x1c5e264b5c6393L,0x3a820c6a7453eeL, + 0x37562d1d61d3c3L } }, + /* 151 */ + { { 0x6c084f62230c72L,0x599490270bc6cfL,0x1d3369ddd3c53dL, + 0x516ddb5fac5da0L,0x35ab1e15011b1aL,0x5fba9106d3a180L, + 0x3be0f092a0917cL }, + { 0x57328f9fdc2538L,0x0526323fc8d5f6L,0x10cbb79521e602L, + 0x50d01167147ae2L,0x2ec7f1b3cda99eL,0x43073cc736e7beL, + 0x1ded89cadd83a6L } }, + /* 152 */ + { { 0x1d51bda65d56d5L,0x63f2fd4d2dc056L,0x326413d310ea6dL, + 0x3abba5bca92876L,0x6b9aa8bc4d6ebeL,0x1961c687f15d5dL, + 0x311cf07464c381L }, + { 0x2321b1064cd8aeL,0x6e3caac4443850L,0x3346fc4887d2d0L, + 0x1640417e0e640fL,0x4a958a52a07a9eL,0x1346a1b1cb374cL, + 0x0a793cf79beccbL } }, + /* 153 */ + { { 0x29d56cba89aaa5L,0x1581898c0b3c15L,0x1af5b77293c082L, + 0x1617ba53a006ceL,0x62dd3b384e475fL,0x71a9820c3f962aL, + 0x0e4938920b854eL }, + { 0x0b8d98849808abL,0x64c14923546de7L,0x6a20883b78a6fcL, + 0x72de211428acd6L,0x009678b47915bbL,0x21b5269ae5dae6L, + 0x313cc0e60b9457L } }, + /* 154 */ + { { 0x69ee421b1de38bL,0x44b484c6cec1c7L,0x0240596c6a8493L, + 0x2321a62c85fb9eL,0x7a10921802a341L,0x3d2a95507e45c3L, + 0x0752f40f3b6714L }, + { 0x596a38798751e6L,0x46bf186a0feb85L,0x0b23093e23b49cL, + 0x1bfa7bc5afdc07L,0x4ba96f873eefadL,0x292e453fae9e44L, + 0x2773646667b75cL } }, + /* 155 */ + { { 0x1f81a64e94f22aL,0x3125ee3d8683ddL,0x76a660a13b9582L, + 0x5aa584c3640c6eL,0x27cc99fd472953L,0x7048f4d58061d1L, + 0x379a1397ac81e8L }, + { 0x5d1ecd2b6b956bL,0x0829e0366b0697L,0x49548cec502421L, + 0x7af5e2f717c059L,0x329a25a0fec54eL,0x028e99e4bcd7f1L, + 0x071d5fe81fca78L } }, + /* 156 */ + { { 0x4b5c4aeb0fdfe4L,0x1367e11326ce37L,0x7c16f020ef5f19L, + 0x3c55303d77b471L,0x23a4457a06e46aL,0x2174426dd98424L, + 0x226f592114bd69L }, + { 0x4411b94455f15aL,0x52e0115381fae4L,0x45b6d8efbc8f7eL, + 0x58b1221bd86d26L,0x284fb6f8a7ec1fL,0x045835939ddd30L, + 0x0216960accd598L } }, + /* 157 */ + { { 0x4b61f9ec1f138aL,0x4460cd1e18502bL,0x277e4fce3c4726L, + 0x0244246d6414b9L,0x28fbfcef256984L,0x3347ed0db40577L, + 0x3b57fa9e044718L }, + { 0x4f73bcd6d1c833L,0x2c0d0dcf7f0136L,0x2010ac75454254L, + 0x7dc4f6151539a8L,0x0b8929ef6ea495L,0x517e20119d2bdfL, + 0x1e29f9a126ba15L } }, + /* 158 */ + { { 0x683a7c10470cd8L,0x0d05f0dbe0007fL,0x2f6a5026d649cdL, + 0x249ce2fdaed603L,0x116dc1e7a96609L,0x199bd8d82a0b98L, + 0x0694ad0219aeb2L }, + { 0x03a3656e864045L,0x4e552273df82a6L,0x19bcc7553d17abL, + 0x74ac536c1df632L,0x440302fb4a86f6L,0x1becec0e31c9feL, + 0x002045f8fa46b8L } }, + /* 159 */ + { { 0x5833ba384310a2L,0x1db83fad93f8baL,0x0a12713ee2f7edL, + 0x40e0f0fdcd2788L,0x1746de5fb239a5L,0x573748965cfa15L, + 0x1e3dedda0ef650L }, + { 0x6c8ca1c87607aeL,0x785dab9554fc0eL,0x649d8f91860ac8L, + 0x4436f88b52c0f9L,0x67f22ca8a5e4a3L,0x1f990fd219e4c9L, + 0x013dd21c08573fL } }, + /* 160 */ + { { 0x05d116141d161cL,0x5c1d2789da2ea5L,0x11f0d861f99f34L, + 0x692c2650963153L,0x3bd69f5329539eL,0x215898eef8885fL, + 0x041f79dd86f7f1L }, + { 0x76dcc5e96beebdL,0x7f2b50cb42a332L,0x067621cabef8abL, + 0x31e0be607054edL,0x4c67c5e357a3daL,0x5b1a63fbfb1c2bL, + 0x3112efbf5e5c31L } }, + /* 161 */ + { { 0x3f83e24c0c62f1L,0x51dc9c32aae4e0L,0x2ff89b33b66c78L, + 0x21b1c7d354142cL,0x243d8d381c84bcL,0x68729ee50cf4b7L, + 0x0ed29e0f442e09L }, + { 0x1ad7b57576451eL,0x6b2e296d6b91dcL,0x53f2b306e30f42L, + 0x3964ebd9ee184aL,0x0a32855df110e4L,0x31f2f90ddae05fL, + 0x3410cd04e23702L } }, + /* 162 */ + { { 0x60d1522ca8f2feL,0x12909237a83e34L,0x15637f80d58590L, + 0x3c72431b6d714dL,0x7c8e59a615bea2L,0x5f977b688ef35aL, + 0x071c198c0b3ab0L }, + { 0x2b54c699699b4bL,0x14da473c2fd0bcL,0x7ba818ea0ad427L, + 0x35117013940b2fL,0x6e1df6b5e609dbL,0x3f42502720b64dL, + 0x01ee7dc890e524L } }, + /* 163 */ + { { 0x12ec1448ff4e49L,0x3e2edac882522bL,0x20455ab300f93aL, + 0x5849585bd67c14L,0x0393d5aa34ba8bL,0x30f9a1f2044fa7L, + 0x1059c9377a93e0L }, + { 0x4e641cc0139e73L,0x0d9f23c9b0fa78L,0x4b2ad87e2b83f9L, + 0x1c343a9f6d9e3cL,0x1098a4cb46de4dL,0x4ddc893843a41eL, + 0x1797f4167d6e3aL } }, + /* 164 */ + { { 0x4add4675856031L,0x499bd5e5f7a0ffL,0x39ea1f1202271eL, + 0x0ecd7480d7a91eL,0x395f5e5fc10956L,0x0fa7f6b0c9f79bL, + 0x2fad4623aed6cbL }, + { 0x1563c33ae65825L,0x29881cafac827aL,0x50650baf4c45a1L, + 0x034aad988fb9e9L,0x20a6224dc5904cL,0x6fb141a990732bL, + 0x3ec9ae1b5755deL } }, + /* 165 */ + { { 0x3108e7c686ae17L,0x2e73a383b4ad8aL,0x4e6bb142ba4243L, + 0x24d355922c1d80L,0x2f850dd9a088baL,0x21c50325dd5e70L, + 0x33237dd5bd7fa4L }, + { 0x7823a39cab7630L,0x1535f71cff830eL,0x70d92ff0599261L, + 0x227154d2a2477cL,0x495e9bbb4f871cL,0x40d2034835686bL, + 0x31b08f97eaa942L } }, + /* 166 */ + { { 0x0016c19034d8ddL,0x68961627cf376fL,0x6acc90681615aeL, + 0x6bc7690c2e3204L,0x6ddf28d2fe19a2L,0x609b98f84dae4dL, + 0x0f32bfd7c94413L }, + { 0x7d7edc6b21f843L,0x49bbd2ebbc9872L,0x593d6ada7b6a23L, + 0x55736602939e9cL,0x79461537680e39L,0x7a7ee9399ca7cdL, + 0x008776f6655effL } }, + /* 167 */ + { { 0x64585f777233cfL,0x63ec12854de0f6L,0x6b7f9bbbc3f99dL, + 0x301c014b1b55d3L,0x7cf3663bbeb568L,0x24959dcb085bd1L, + 0x12366aa6752881L }, + { 0x77a74c0da5e57aL,0x3279ca93ad939fL,0x33c3c8a1ef08c9L, + 0x641b05ab42825eL,0x02f416d7d098dbL,0x7e3d58be292b68L, + 0x1864dbc46e1f46L } }, + /* 168 */ + { { 0x1da167b8153a9dL,0x47593d07d9e155L,0x386d984e12927fL, + 0x421a6f08a60c7cL,0x5ae9661c24dab3L,0x7927b2e7874507L, + 0x3266ea80609d53L }, + { 0x7d198f4c26b1e3L,0x430d4ea2c4048eL,0x58d8ab77e84ba3L, + 0x1cb14299c37297L,0x6db6031e8f695cL,0x159bd855e26d55L, + 0x3f3f6d318a73ddL } }, + /* 169 */ + { { 0x3ee958cca40298L,0x02a7e5eba32ad6L,0x43b4bab96f0e1eL, + 0x534be79062b2b1L,0x029ead089b37e3L,0x4d585da558f5aaL, + 0x1f9737eb43c376L }, + { 0x0426dfd9b86202L,0x4162866bc0a9f3L,0x18fc518e7bb465L, + 0x6db63380fed812L,0x421e117f709c30L,0x1597f8d0f5cee6L, + 0x04ffbf1289b06aL } }, + /* 170 */ + { { 0x61a1987ffa0a5fL,0x42058c7fc213c6L,0x15b1d38447d2c9L, + 0x3d5f5d7932565eL,0x5db754af445fa7L,0x5d489189fba499L, + 0x02c4c55f51141bL }, + { 0x26b15972e9993dL,0x2fc90bcbd97c45L,0x2ff60f8684b0f1L, + 0x1dc641dd339ab0L,0x3e38e6be23f82cL,0x3368162752c817L, + 0x19bba80ceb45ceL } }, + /* 171 */ + { { 0x7c6e95b4c6c693L,0x6bbc6d5efa7093L,0x74d7f90bf3bf1cL, + 0x54d5be1f0299a1L,0x7cb24f0aa427c6L,0x0a18f3e086c941L, + 0x058a1c90e4faefL }, + { 0x3d6bd016927e1eL,0x1da4ce773098b8L,0x2133522e690056L, + 0x0751416d3fc37eL,0x1beed1643eda66L,0x5288b6727d5c54L, + 0x199320e78655c6L } }, + /* 172 */ + { { 0x74575027eeaf94L,0x124bd533c3ceaeL,0x69421ab7a8a1d7L, + 0x37f2127e093f3dL,0x40281765252a08L,0x25a228798d856dL, + 0x326eca62759c4cL }, + { 0x0c337c51acb0a5L,0x122ba78c1ef110L,0x02498adbb68dc4L, + 0x67240c124b089eL,0x135865d25d9f89L,0x338a76d5ae5670L, + 0x03a8efaf130385L } }, + /* 173 */ + { { 0x3a450ac5e49beaL,0x282af80bb4b395L,0x6779eb0db1a139L, + 0x737cabdd174e55L,0x017b14ca79b5f2L,0x61fdef6048e137L, + 0x3acc12641f6277L }, + { 0x0f730746fe5096L,0x21d05c09d55ea1L,0x64d44bddb1a560L, + 0x75e5035c4778deL,0x158b7776613513L,0x7b5efa90c7599eL, + 0x2caa0791253b95L } }, + /* 174 */ + { { 0x288e5b6d53e6baL,0x435228909d45feL,0x33b4cf23b2a437L, + 0x45b352017d6db0L,0x4372d579d6ef32L,0x0fa9e5badbbd84L, + 0x3a78cff24759bbL }, + { 0x0899d2039eab6eL,0x4cf47d2f76bc22L,0x373f739a3a8c69L, + 0x09beaa5b1000b3L,0x0acdfbe83ebae5L,0x10c10befb0e900L, + 0x33d2ac4cc31be3L } }, + /* 175 */ + { { 0x765845931e08fbL,0x2a3c2a0dc58007L,0x7270da587d90e1L, + 0x1ee648b2bc8f86L,0x5d2ca68107b29eL,0x2b7064846e9e92L, + 0x3633ed98dbb962L }, + { 0x5e0f16a0349b1bL,0x58d8941f570ca4L,0x20abe376a4cf34L, + 0x0f4bd69a360977L,0x21eb07cc424ba7L,0x720d2ecdbbe6ecL, + 0x255597d5a97c34L } }, + /* 176 */ + { { 0x67bbf21a0f5e94L,0x422a3b05a64fc1L,0x773ac447ebddc7L, + 0x1a1331c08019f1L,0x01ef6d269744ddL,0x55f7be5b3b401aL, + 0x072e031c681273L }, + { 0x7183289e21c677L,0x5e0a3391f3162fL,0x5e02d9e65d914aL, + 0x07c79ea1adce2fL,0x667ca5c2e1cbe4L,0x4f287f22caccdaL, + 0x27eaa81673e75bL } }, + /* 177 */ + { { 0x5246180a078fe6L,0x67cc8c9fa3bb15L,0x370f8dd123db31L, + 0x1938dafa69671aL,0x5af72624950c5eL,0x78cc5221ebddf8L, + 0x22d616fe2a84caL }, + { 0x723985a839327fL,0x24fa95584a5e22L,0x3d8a5b3138d38bL, + 0x3829ef4a017acfL,0x4f09b00ae055c4L,0x01df84552e4516L, + 0x2a7a18993e8306L } }, + /* 178 */ + { { 0x7b6224bc310eccL,0x69e2cff429da16L,0x01c850e5722869L, + 0x2e4889443ee84bL,0x264a8df1b3d09fL,0x18a73fe478d0d6L, + 0x370b52740f9635L }, + { 0x52b7d3a9d6f501L,0x5c49808129ee42L,0x5b64e2643fd30cL, + 0x27d903fe31b32cL,0x594cb084d078f9L,0x567fb33e3ae650L, + 0x0db7be9932cb65L } }, + /* 179 */ + { { 0x19b78113ed7cbeL,0x002b2f097a1c8cL,0x70b1dc17fa5794L, + 0x786e8419519128L,0x1a45ba376af995L,0x4f6aa84b8d806cL, + 0x204b4b3bc7ca47L }, + { 0x7581a05fd94972L,0x1c73cadb870799L,0x758f6fefc09b88L, + 0x35c62ba8049b42L,0x6f5e71fc164cc3L,0x0cd738b5702721L, + 0x10021afac9a423L } }, + /* 180 */ + { { 0x654f7937e3c115L,0x5d198288b515cbL,0x4add965c25a6e3L, + 0x5a37df33cd76ffL,0x57bb7e288e1631L,0x049b69089e1a31L, + 0x383a88f4122a99L }, + { 0x4c0e4ef3d80a73L,0x553c77ac9f30e2L,0x20bb18c2021e82L, + 0x2aec0d1c4225c5L,0x397fce0ac9c302L,0x2ab0c2a246e8aaL, + 0x02e5e5190be080L } }, + /* 181 */ + { { 0x7a255a4ae03080L,0x0d68b01513f624L,0x29905bd4e48c8cL, + 0x1d81507027466bL,0x1684aaeb70dee1L,0x7dd460719f0981L, + 0x29c43b0f0a390cL }, + { 0x272567681b1f7dL,0x1d2a5f8502e0efL,0x0fd5cd6b221befL, + 0x5eb4749e9a0434L,0x7d1553a324e2a6L,0x2eefd8e86a7804L, + 0x2ad80d5335109cL } }, + /* 182 */ + { { 0x25342aef4c209dL,0x24e811ac4e0865L,0x3f209757f8ae9dL, + 0x1473ff8a5da57bL,0x340f61c3919cedL,0x7523bf85fb9bc0L, + 0x319602ebca7cceL }, + { 0x121e7541d442cbL,0x4ffa748e49c95cL,0x11493cd1d131dcL, + 0x42b215172ab6b5L,0x045fd87e13cc77L,0x0ae305df76342fL, + 0x373b033c538512L } }, + /* 183 */ + { { 0x389541e9539819L,0x769f3b29b7e239L,0x0d05f695e3232cL, + 0x029d04f0e9a9fbL,0x58b78b7a697fb8L,0x7531b082e6386bL, + 0x215d235bed95a9L }, + { 0x503947c1859c5dL,0x4b82a6ba45443fL,0x78328eab71b3a5L, + 0x7d8a77f8cb3509L,0x53fcd9802e41d4L,0x77552091976edbL, + 0x226c60ad7a5156L } }, + /* 184 */ + { { 0x77ad6a43360710L,0x0fdeabd326d7aeL,0x4012886c92104aL, + 0x2d6c378dd7ae33L,0x7e72ef2c0725f3L,0x4a4671f4ca18e0L, + 0x0afe3b4bb6220fL }, + { 0x212cf4b56e0d6aL,0x7c24d086521960L,0x0662cf71bd414dL, + 0x1085b916c58c25L,0x781eed2be9a350L,0x26880e80db6ab2L, + 0x169e356442f061L } }, + /* 185 */ + { { 0x57aa2ad748b02cL,0x68a34256772a9aL,0x1591c44962f96cL, + 0x110a9edd6e53d2L,0x31eab597e091a3L,0x603e64e200c65dL, + 0x2f66b72e8a1cfcL }, + { 0x5c79d138543f7fL,0x412524363fdfa3L,0x547977e3b40008L, + 0x735ca25436d9f7L,0x232b4888cae049L,0x27ce37a53d8f23L, + 0x34d45881a9b470L } }, + /* 186 */ + { { 0x76b95255924f43L,0x035c9f3bd1aa5dL,0x5eb71a010b4bd0L, + 0x6ce8dda7e39f46L,0x35679627ea70c0L,0x5c987767c7d77eL, + 0x1fa28952b620b7L }, + { 0x106f50b5924407L,0x1cc3435a889411L,0x0597cdce3bc528L, + 0x738f8b0d5077d1L,0x5894dd60c7dd6aL,0x0013d0721f5e2eL, + 0x344573480527d3L } }, + /* 187 */ + { { 0x2e2c1da52abf77L,0x394aa8464ad05eL,0x095259b7330a83L, + 0x686e81cf6a11f5L,0x405c7e48c93c7cL,0x65c3ca9444a2ecL, + 0x07bed6c59c3563L }, + { 0x51f9d994fb1471L,0x3c3ecfa5283b4eL,0x494dccda63f6ccL, + 0x4d07b255363a75L,0x0d2b6d3155d118L,0x3c688299fc9497L, + 0x235692fa3dea3aL } }, + /* 188 */ + { { 0x16b4d452669e98L,0x72451fa85406b9L,0x674a145d39151fL, + 0x325ffd067ae098L,0x527e7805cd1ae0L,0x422a1d1789e48dL, + 0x3e27be63f55e07L }, + { 0x7f95f6dee0b63fL,0x008e444cc74969L,0x01348f3a72b614L, + 0x000cfac81348c3L,0x508ae3e5309ce5L,0x2584fcdee44d34L, + 0x3a4dd994899ee9L } }, + /* 189 */ + { { 0x4d289cc0368708L,0x0e5ebc60dc3b40L,0x78cc44bfab1162L, + 0x77ef2173b7d11eL,0x06091718e39746L,0x30fe19319b83a4L, + 0x17e8f2988529c6L }, + { 0x68188bdcaa9f2aL,0x0e64b1350c1bddL,0x5b18ebac7cc4b3L, + 0x75315a9fcc046eL,0x36e9770fd43db4L,0x54c5857fc69121L, + 0x0417e18f3e909aL } }, + /* 190 */ + { { 0x29795db38059adL,0x6efd20c8fd4016L,0x3b6d1ce8f95a1aL, + 0x4db68f177f8238L,0x14ec7278d2340fL,0x47bd77ff2b77abL, + 0x3d2dc8cd34e9fcL }, + { 0x285980a5a83f0bL,0x08352e2d516654L,0x74894460481e1bL, + 0x17f6f3709c480dL,0x6b590d1b55221eL,0x45c100dc4c9be9L, + 0x1b13225f9d8b91L } }, + /* 191 */ + { { 0x0b905fb4b41d9dL,0x48cc8a474cb7a2L,0x4eda67e8de09b2L, + 0x1de47c829adde8L,0x118ad5b9933d77L,0x7a12665ac3f9a4L, + 0x05631a4fb52997L }, + { 0x5fb2a8e6806e63L,0x27d96bbcca369bL,0x46066f1a6b8c7bL, + 0x63b58fc7ca3072L,0x170a36229c0d62L,0x57176f1e463203L, + 0x0c7ce083e73b9cL } }, + /* 192 */ + { { 0x31caf2c09e1c72L,0x6530253219e9d2L,0x7650c98b601c57L, + 0x182469f99d56c0L,0x415f65d292b7a7L,0x30f62a55549b8eL, + 0x30f443f643f465L }, + { 0x6b35c575ddadd0L,0x14a23cf6d299eeL,0x2f0198c0967d7dL, + 0x1013058178d5bfL,0x39da601c9cc879L,0x09d8963ec340baL, + 0x1b735db13ad2a7L } }, + /* 193 */ + { { 0x20916ffdc83f01L,0x16892aa7c9f217L,0x6bff179888d532L, + 0x4adf3c3d366288L,0x41a62b954726aeL,0x3139609022aeb6L, + 0x3e8ab9b37aff7aL }, + { 0x76bbc70f24659aL,0x33fa98513886c6L,0x13b26af62c4ea6L, + 0x3c4d5826389a0cL,0x526ec28c02bf6aL,0x751ff083d79a7cL, + 0x110ac647990224L } }, + /* 194 */ + { { 0x2c6c62fa2b6e20L,0x3d37edad30c299L,0x6ef25b44b65fcaL, + 0x7470846914558eL,0x712456eb913275L,0x075a967a9a280eL, + 0x186c8188f2a2a0L }, + { 0x2f3b41a6a560b1L,0x3a8070b3f9e858L,0x140936ff0e1e78L, + 0x5fd298abe6da8aL,0x3823a55d08f153L,0x3445eafaee7552L, + 0x2a5fc96731a8b2L } }, + /* 195 */ + { { 0x06317be58edbbbL,0x4a38f3bfbe2786L,0x445b60f75896b7L, + 0x6ec7c92b5adf57L,0x07b6be8038a441L,0x1bcfe002879655L, + 0x2a2174037d6d0eL }, + { 0x776790cf9e48bdL,0x73e14a2c4ed1d3L,0x7eb5ed5f2fc2f7L, + 0x3e0aedb821b384L,0x0ee3b7e151c12fL,0x51a6a29e044bb2L, + 0x0ba13a00cb0d86L } }, + /* 196 */ + { { 0x77607d563ec8d8L,0x023fc726996e44L,0x6bd63f577a9986L, + 0x114a6351e53973L,0x3efe97989da046L,0x1051166e117ed7L, + 0x0354933dd4fb5fL }, + { 0x7699ca2f30c073L,0x4c973b83b9e6d3L,0x2017c2abdbc3e8L, + 0x0cdcdd7a26522bL,0x511070f5b23c7dL,0x70672327e83d57L, + 0x278f842b4a9f26L } }, + /* 197 */ + { { 0x0824f0d4ae972fL,0x60578dd08dcf52L,0x48a74858290fbbL, + 0x7302748bf23030L,0x184b229a178acfL,0x3e8460ade089d6L, + 0x13f2b557fad533L }, + { 0x7f96f3ae728d15L,0x018d8d40066341L,0x01fb94955a289aL, + 0x2d32ed6afc2657L,0x23f4f5e462c3acL,0x60eba5703bfc5aL, + 0x1b91cc06f16c7aL } }, + /* 198 */ + { { 0x411d68af8219b9L,0x79cca36320f4eeL,0x5c404e0ed72e20L, + 0x417cb8692e43f2L,0x305d29c7d98599L,0x3b754d5794a230L, + 0x1c97fb4be404e9L }, + { 0x7cdbafababd109L,0x1ead0eb0ca5090L,0x1a2b56095303e3L, + 0x75dea935012c8fL,0x67e31c071b1d1dL,0x7c324fbfd172c3L, + 0x157e257e6498f7L } }, + /* 199 */ + { { 0x19b00db175645bL,0x4c4f6cb69725f1L,0x36d9ce67bd47ceL, + 0x2005e105179d64L,0x7b952e717867feL,0x3c28599204032cL, + 0x0f5659d44fb347L }, + { 0x1ebcdedb979775L,0x4378d45cfd11a8L,0x14c85413ca66e9L, + 0x3dd17d681c8a4dL,0x58368e7dc23142L,0x14f3eaac6116afL, + 0x0adb45b255f6a0L } }, + /* 200 */ + { { 0x2f5e76279ad982L,0x125b3917034d09L,0x3839a6399e6ed3L, + 0x32fe0b3ebcd6a2L,0x24ccce8be90482L,0x467e26befcc187L, + 0x2828434e2e218eL }, + { 0x17247cd386efd9L,0x27f36a468d85c3L,0x65e181ef203bbfL, + 0x0433a6761120afL,0x1d607a2a8f8625L,0x49f4e55a13d919L, + 0x3367c3b7943e9dL } }, + /* 201 */ + { { 0x3391c7d1a46d4dL,0x38233d602d260cL,0x02127a0f78b7d4L, + 0x56841c162c24c0L,0x4273648fd09aa8L,0x019480bb0e754eL, + 0x3b927987b87e58L }, + { 0x6676be48c76f73L,0x01ec024e9655aeL,0x720fe1c6376704L, + 0x17e06b98885db3L,0x656adec85a4200L,0x73780893c3ce88L, + 0x0a339cdd8df664L } }, + /* 202 */ + { { 0x69af7244544ac7L,0x31ab7402084d2fL,0x67eceb7ef7cb19L, + 0x16f8583b996f61L,0x1e208d12faf91aL,0x4a91584ce4a42eL, + 0x3e08337216c93eL }, + { 0x7a6eea94f4cf77L,0x07a52894678c60L,0x302dd06b14631eL, + 0x7fddb7225c9ceaL,0x55e441d7acd153L,0x2a00d4490b0f44L, + 0x053ef125338cdbL } }, + /* 203 */ + { { 0x120c0c51584e3cL,0x78b3efca804f37L,0x662108aefb1dccL, + 0x11deb55f126709L,0x66def11ada8125L,0x05bbc0d1001711L, + 0x1ee1c99c7fa316L }, + { 0x746f287de53510L,0x1733ef2e32d09cL,0x1df64a2b0924beL, + 0x19758da8f6405eL,0x28f6eb3913e484L,0x7175a1090cc640L, + 0x048aee0d63f0bcL } }, + /* 204 */ + { { 0x1f3b1e3b0b29c3L,0x48649f4882a215L,0x485eca3a9e0dedL, + 0x4228ba85cc82e4L,0x36da1f39bc9379L,0x1659a7078499d1L, + 0x0a67d5f6c04188L }, + { 0x6ac39658afdce3L,0x0d667a0bde8ef6L,0x0ae6ec0bfe8548L, + 0x6d9cb2650571bfL,0x54bea107760ab9L,0x705c53bd340cf2L, + 0x111a86b610c70fL } }, + /* 205 */ + { { 0x7ecea05c6b8195L,0x4f8be93ce3738dL,0x305de9eb9f5d12L, + 0x2c3b9d3d474b56L,0x673691a05746c3L,0x2e3482c428c6eaL, + 0x2a8085fde1f472L }, + { 0x69d15877fd3226L,0x4609c9ec017cc3L,0x71e9b7fc1c3dbcL, + 0x4f8951254e2675L,0x63ee9d15afa010L,0x0f05775b645190L, + 0x28a0a439397ae3L } }, + /* 206 */ + { { 0x387fa03e9de330L,0x40cc32b828b6abL,0x02a482fbc04ac9L, + 0x68cad6e70429b7L,0x741877bff6f2c4L,0x48efe633d3b28bL, + 0x3e612218fe24b3L }, + { 0x6fc1d34fe37657L,0x3d04b9e1c8b5a1L,0x6a2c332ef8f163L, + 0x7ca97e2b135690L,0x37357d2a31208aL,0x29f02f2332bd68L, + 0x17c674c3e63a57L } }, + /* 207 */ + { { 0x683d9a0e6865bbL,0x5e77ec68ad4ce5L,0x4d18f236788bd6L, + 0x7f34b87204f4e3L,0x391ca40e9e578dL,0x3470ed6ddf4e23L, + 0x225544b3e50989L }, + { 0x48eda8cb4e462bL,0x2a948825cf9109L,0x473adedc7e1300L, + 0x37b843b82192edL,0x2b9ac1537dde36L,0x4efe7412732332L, + 0x29cc5981b5262bL } }, + /* 208 */ + { { 0x190d2fcad260f5L,0x7c53dd81d18027L,0x003def5f55db0eL, + 0x7f5ed25bee2df7L,0x2b87e9be167d2eL,0x2b999c7bbcd224L, + 0x1d68a2c260ad50L }, + { 0x010bcde84607a6L,0x0250de9b7e1bedL,0x746d36bfaf1b56L, + 0x3359475ff56abbL,0x7e84b9bc440b20L,0x2eaa7e3b52f162L, + 0x01165412f36a69L } }, + /* 209 */ + { { 0x639a02329e5836L,0x7aa3ee2e4d3a27L,0x5bc9b258ecb279L, + 0x4cb3dfae2d62c6L,0x08d9d3b0c6c437L,0x5a2c177d47eab2L, + 0x36120479fc1f26L }, + { 0x7609a75bd20e4aL,0x3ba414e17551fcL,0x42cd800e1b90c9L, + 0x04921811b88f9bL,0x4443697f9562fdL,0x3a8081b8186959L, + 0x3f5b5c97379e73L } }, + /* 210 */ + { { 0x6fd0e3cf13eafbL,0x3976b5415cbf67L,0x4de40889e48402L, + 0x17e4d36f24062aL,0x16ae7755cf334bL,0x2730ac94b7e0e1L, + 0x377592742f48e0L }, + { 0x5e10b18a045041L,0x682792afaae5a1L,0x19383ec971b816L, + 0x208b17dae2ffc0L,0x439f9d933179b6L,0x55485a9090bcaeL, + 0x1c316f42a2a35cL } }, + /* 211 */ + { { 0x67173897bdf646L,0x0b6956653ef94eL,0x5be3c97f7ea852L, + 0x3110c12671f08eL,0x2474076a3fc7ecL,0x53408be503fe72L, + 0x09155f53a5b44eL }, + { 0x5c804bdd4c27cdL,0x61e81eb8ffd50eL,0x2f7157fdf84717L, + 0x081f880d646440L,0x7aa892acddec51L,0x6ae70683443f33L, + 0x31ed9e8b33a75aL } }, + /* 212 */ + { { 0x0d724f8e357586L,0x1febbec91b4134L,0x6ff7b98a9475fdL, + 0x1c4d9b94e1f364L,0x2b8790499cef00L,0x42fd2080a1b31dL, + 0x3a3bbc6d9b0145L }, + { 0x75bfebc37e3ca9L,0x28db49c1723bd7L,0x50b12fa8a1f17aL, + 0x733d95bbc84b98L,0x45ede81f6c109eL,0x18f5e46fb37b5fL, + 0x34b980804aaec1L } }, + /* 213 */ + { { 0x56060c8a4f57bfL,0x0d2dfe223054c2L,0x718a5bbc03e5d6L, + 0x7b3344cc19b3b9L,0x4d11c9c054bcefL,0x1f5ad422c22e33L, + 0x2609299076f86bL }, + { 0x7b7a5fba89fd01L,0x7013113ef3b016L,0x23d5e0a173e34eL, + 0x736c14462f0f50L,0x1ef5f7ac74536aL,0x4baba6f4400ea4L, + 0x17b310612c9828L } }, + /* 214 */ + { { 0x4ebb19a708c8d3L,0x209f8c7f03d9bbL,0x00461cfe5798fbL, + 0x4f93b6ae822fadL,0x2e5b33b5ad5447L,0x40b024e547a84bL, + 0x22ffad40443385L }, + { 0x33809c888228bfL,0x559f655fefbe84L,0x0032f529fd2f60L, + 0x5a2191ece3478cL,0x5b957fcd771246L,0x6fec181f9ed123L, + 0x33eed3624136a3L } }, + /* 215 */ + { { 0x6a5df93b26139aL,0x55076598fd7134L,0x356a592f34f81dL, + 0x493c6b5a3d4741L,0x435498a4e2a39bL,0x2cd26a0d931c88L, + 0x01925ea3fc7835L }, + { 0x6e8d992b1efa05L,0x79508a727c667bL,0x5f3c15e6b4b698L, + 0x11b6c755257b93L,0x617f5af4b46393L,0x248d995b2b6656L, + 0x339db62e2e22ecL } }, + /* 216 */ + { { 0x52537a083843dcL,0x6a283c82a768c7L,0x13aa6bf25227acL, + 0x768d76ba8baf5eL,0x682977a6525808L,0x67ace52ac23b0bL, + 0x2374b5a2ed612dL }, + { 0x7139e60133c3a4L,0x715697a4f1d446L,0x4b018bf36677a0L, + 0x1dd43837414d83L,0x505ec70730d4f6L,0x09ac100907fa79L, + 0x21caad6e03217eL } }, + /* 217 */ + { { 0x0776d3999d4d49L,0x33bdd87e8bcff8L,0x1036b87f068fadL, + 0x0a9b8ffde4c872L,0x7ab2533596b1eaL,0x305a88fb965378L, + 0x3356d8fa4d65e5L }, + { 0x3366fa77d1ff11L,0x1e0bdbdcd2075cL,0x46910cefc967caL, + 0x7ce700737a1ff6L,0x1c5dc15409c9bdL,0x368436b9bdb595L, + 0x3e7ccd6560b5efL } }, + /* 218 */ + { { 0x1443789422c792L,0x524792b1717f2bL,0x1f7c1d95048e7aL, + 0x5cfe2a225b0d12L,0x245594d29ce85bL,0x20134d254ce168L, + 0x1b83296803921aL }, + { 0x79a78285b3beceL,0x3c738c3f3124d6L,0x6ab9d1fe0907cdL, + 0x0652ceb7fc104cL,0x06b5f58c8ae3fdL,0x486959261c5328L, + 0x0b3813ae677c90L } }, + /* 219 */ + { { 0x66b9941ac37b82L,0x651a4b609b0686L,0x046711edf3fc31L, + 0x77f89f38faa89bL,0x2683ddbf2d5edbL,0x389ef1dfaa3c25L, + 0x20b3616e66273eL }, + { 0x3c6db6e0cb5d37L,0x5d7ae5dc342bc4L,0x74a1dc6c52062bL, + 0x6f7c0bec109557L,0x5c51f7bc221d91L,0x0d7b5880745288L, + 0x1c46c145c4b0ddL } }, + /* 220 */ + { { 0x59ed485ea99eccL,0x201b71956bc21dL,0x72d5c32f73de65L, + 0x1aefd76547643eL,0x580a452cfb2c2dL,0x7cb1a63f5c4dc9L, + 0x39a8df727737aaL }, + { 0x365a341deca452L,0x714a1ad1689cbaL,0x16981d12c42697L, + 0x5a124f4ac91c75L,0x1b2e3f2fedc0dbL,0x4a1c72b8e9d521L, + 0x3855b4694e4e20L } }, + /* 221 */ + { { 0x16b3d047181ae9L,0x17508832f011afL,0x50d33cfeb2ebd1L, + 0x1deae237349984L,0x147c641aa6adecL,0x24a9fb4ebb1ddbL, + 0x2b367504a7a969L }, + { 0x4c55a3d430301bL,0x379ef6a5d492cbL,0x3c56541fc0f269L, + 0x73a546e91698ceL,0x2c2b62ee0b9b5dL,0x6284184d43d0efL, + 0x0e1f5cf6a4b9f0L } }, + /* 222 */ + { { 0x44833e8cd3fdacL,0x28e6665cb71c27L,0x2f8bf87f4ddbf3L, + 0x6cc6c767fb38daL,0x3bc114d734e8b5L,0x12963d5a78ca29L, + 0x34532a161ece41L }, + { 0x2443af5d2d37e9L,0x54e6008c8c452bL,0x2c55d54111cf1bL, + 0x55ac7f7522575aL,0x00a6fba3f8575fL,0x3f92ef3b793b8dL, + 0x387b97d69ecdf7L } }, + /* 223 */ + { { 0x0b464812d29f46L,0x36161daa626f9aL,0x5202fbdb264ca5L, + 0x21245805ff1304L,0x7f9c4a65657885L,0x542d3887f9501cL, + 0x086420deef8507L }, + { 0x5e159aa1b26cfbL,0x3f0ef5ffd0a50eL,0x364b29663a432aL, + 0x49c56888af32a8L,0x6f937e3e0945d1L,0x3cbdeec6d766cdL, + 0x2d80d342ece61aL } }, + /* 224 */ + { { 0x255e3026d8356eL,0x4ddba628c4de9aL,0x074323b593e0d9L, + 0x333bdb0a10eefbL,0x318b396e473c52L,0x6ebb5a95efd3d3L, + 0x3f3bff52aa4e4fL }, + { 0x3138a111c731d5L,0x674365e283b308L,0x5585edd9c416f2L, + 0x466763d9070fd4L,0x1b568befce8128L,0x16eb040e7b921eL, + 0x3d5c898687c157L } }, + /* 225 */ + { { 0x14827736973088L,0x4e110d53f301e6L,0x1f811b09870023L, + 0x53b5e500dbcacaL,0x4ddf0df1e6a7dcL,0x1e9575fb10ce35L, + 0x3fdc153644d936L }, + { 0x763547e2260594L,0x26e5ae764efc59L,0x13be6f4d791a29L, + 0x2021e61e3a0cf1L,0x339cd2b4a1c202L,0x5c7451e08f5121L, + 0x3728b3a851be68L } }, + /* 226 */ + { { 0x78873653277538L,0x444b9ed2ee7156L,0x79ac8b8b069cd3L, + 0x5f0e90933770e8L,0x307662c615389eL,0x40fe6d95a80057L, + 0x04822170cf993cL }, + { 0x677d5690fbfec2L,0x0355af4ae95cb3L,0x417411794fe79eL, + 0x48daf87400a085L,0x33521d3b5f0aaaL,0x53567a3be00ff7L, + 0x04712ccfb1cafbL } }, + /* 227 */ + { { 0x2b983283c3a7f3L,0x579f11b146a9a6L,0x1143d3b16a020eL, + 0x20f1483ef58b20L,0x3f03e18d747f06L,0x3129d12f15de37L, + 0x24c911f7222833L }, + { 0x1e0febcf3d5897L,0x505e26c01cdaacL,0x4f45a9adcff0e9L, + 0x14dfac063c5cebL,0x69e5ce713fededL,0x3481444a44611aL, + 0x0ea49295c7fdffL } }, + /* 228 */ + { { 0x64554cb4093beeL,0x344b4b18dd81f6L,0x350f43b4de9b59L, + 0x28a96a220934caL,0x4aa8da5689a515L,0x27171cbd518509L, + 0x0cfc1753f47c95L }, + { 0x7dfe091b615d6eL,0x7d1ee0aa0fb5c1L,0x145eef3200b7b5L, + 0x33fe88feeab18fL,0x1d62d4f87453e2L,0x43b8db4e47fff1L, + 0x1572f2b8b8f368L } }, + /* 229 */ + { { 0x6bc94e6b4e84f3L,0x60629dee586a66L,0x3bbad5fe65ca18L, + 0x217670db6c2fefL,0x0320a7f4e3272aL,0x3ccff0d976a6deL, + 0x3c26da8ae48cccL }, + { 0x53ecf156778435L,0x7533064765a443L,0x6c5c12f03ca5deL, + 0x44f8245350dabfL,0x342cdd777cf8b3L,0x2b539c42e9f58dL, + 0x10138affc279b1L } }, + /* 230 */ + { { 0x1b135e204c5ddbL,0x40887dfeaa1d37L,0x7fb0ef83da76ffL, + 0x521f2b79af55a5L,0x3f9b38b4c3f0d0L,0x20a9838cce61ceL, + 0x24bb4e2f4b1e32L }, + { 0x003f6aa386e27cL,0x68df59db0a0f8eL,0x21677d5192e713L, + 0x14ab9757501276L,0x411944af961524L,0x3184f39abc5c3fL, + 0x2a8dda80ca078dL } }, + /* 231 */ + { { 0x0592233cdbc95cL,0x54d5de5c66f40fL,0x351caa1512ab86L, + 0x681bdbee020084L,0x6ee2480c853e68L,0x6a5a44262b918fL, + 0x06574e15a3b91dL }, + { 0x31ba03dacd7fbeL,0x0c3da7c18a57a9L,0x49aaaded492d6bL, + 0x3071ff53469e02L,0x5efb4f0d7248c6L,0x6db5fb67f12628L, + 0x29cff668e3d024L } }, + /* 232 */ + { { 0x1b9ef3bb1b17ceL,0x6ccf8c24fe6312L,0x34c15487f45008L, + 0x1a84044095972cL,0x515073a47e449eL,0x2ddc93f9097feeL, + 0x1008fdc894c434L }, + { 0x08e5edb73399faL,0x65b1aa65547d4cL,0x3a117a1057c498L, + 0x7e16c3089d13acL,0x502f2ae4b6f851L,0x57a70f3eb62673L, + 0x111b48a9a03667L } }, + /* 233 */ + { { 0x5023024be164f1L,0x25ad117032401eL,0x46612b3bfe3427L, + 0x2f4f406a8a02b7L,0x16a93a5c4ddf07L,0x7ee71968fcdbe9L, + 0x2267875ace37daL }, + { 0x687e88b59eb2a6L,0x3ac7368fe716d3L,0x28d953a554a036L, + 0x34d52c0acca08fL,0x742a7cf8dd4fd9L,0x10bfeb8575ea60L, + 0x290e454d868dccL } }, + /* 234 */ + { { 0x4e72a3a8a4bdd2L,0x1ba36d1dee04d5L,0x7a43136b63195bL, + 0x6ca8e286a519f3L,0x568e64aece08a9L,0x571d5000b5c10bL, + 0x3f75e9f5dbdd40L }, + { 0x6fb0a698d6fa45L,0x0ce42209d7199cL,0x1f68275f708a3eL, + 0x5749832e91ec3cL,0x6c3665521428b2L,0x14b2bf5747bd4aL, + 0x3b6f940e42a22bL } }, + /* 235 */ + { { 0x4da0adbfb26c82L,0x16792a585f39acL,0x17df9dfda3975cL, + 0x4796b4afaf479bL,0x67be67234e0020L,0x69df5f201dda25L, + 0x09f71a4d12b3dcL }, + { 0x64ff5ec260a46aL,0x579c5b86385101L,0x4f29a7d549f697L, + 0x4e64261242e2ebL,0x54ecacdfb6b296L,0x46e0638b5fddadL, + 0x31eefd3208891dL } }, + /* 236 */ + { { 0x5b72c749fe01b2L,0x230cf27523713aL,0x533d1810e0d1e1L, + 0x5590db7d1dd1e2L,0x7b8ab73e8e43d3L,0x4c8a19bd1c17caL, + 0x19222ce9f74810L }, + { 0x6398b3dddc4582L,0x0352b7d88dfd53L,0x3c55b4e10c5a63L, + 0x38194d13f8a237L,0x106683fd25dd87L,0x59e0b62443458eL, + 0x196cb70aa9cbb9L } }, + /* 237 */ + { { 0x2885f7cd021d63L,0x162bfd4c3e1043L,0x77173dcf98fcd1L, + 0x13d4591d6add36L,0x59311154d0d8f2L,0x74336e86e79b8aL, + 0x13faadc5661883L }, + { 0x18938e7d9ec924L,0x14bcda8fcaa0a1L,0x706d85d41a1355L, + 0x0ac34520d168deL,0x5a92499fe17826L,0x36c2e3b4f00600L, + 0x29c2fd7b5f63deL } }, + /* 238 */ + { { 0x41250dfe2216c5L,0x44a0ec0366a217L,0x575bc1adf8b0dfL, + 0x5ff5cdbdb1800bL,0x7843d4dde8ca18L,0x5fa9e420865705L, + 0x235c38be6c6b02L }, + { 0x473b78aae91abbL,0x39470c6051e44bL,0x3f973cc2dc08c3L, + 0x2837932c5c91f6L,0x25e39ed754ec25L,0x1371c837118e53L, + 0x3b99f3b0aeafe2L } }, + /* 239 */ + { { 0x03acf51be46c65L,0x271fceacbaf5c3L,0x476589ed3a5e25L, + 0x78ec8c3c3c399cL,0x1f5c8bf4ac4c19L,0x730bb733ec68d2L, + 0x29a37e00dd287eL }, + { 0x448ed1bf92b5faL,0x10827c17b86478L,0x55e6fc05b28263L, + 0x0af1226c73a66aL,0x0b66e5df0d09c1L,0x26128315a02682L, + 0x22d84932c5e808L } }, + /* 240 */ + { { 0x5ec3afc26e3392L,0x08e142e45c0084L,0x4388d5ad0f01feL, + 0x0f7acd36e6140cL,0x028c14ed97dffbL,0x311845675a38c6L, + 0x01c1c8f09a3062L }, + { 0x5a302f4cf49e7dL,0x79267e254a44e1L,0x746165052317a1L, + 0x53a09263a566e8L,0x7d478ad5f73abcL,0x187ce5c947dad3L, + 0x18564e1a1ec45fL } }, + /* 241 */ + { { 0x7b9577a9aa0486L,0x766b40c7aaaef6L,0x1f6a411f5db907L, + 0x4543dd4d80beaeL,0x0ad938c7482806L,0x451568bf4b9be1L, + 0x3367ec85d30a22L }, + { 0x5446425747843dL,0x18d94ac223c6b2L,0x052ff3a354d359L, + 0x0b4933f89723f5L,0x03fb517740e056L,0x226b892871dddaL, + 0x2768c2b753f0fdL } }, + /* 242 */ + { { 0x685282ccfa5200L,0x411ed433627b89L,0x77d5c9b8bc9c1dL, + 0x4a13ef2ee5cd29L,0x5582a612407c9eL,0x2307cb42fc3aa9L, + 0x2e661df79956b8L }, + { 0x0e972b015254deL,0x5b63e14def8adeL,0x06995be2ca4a95L, + 0x6cc0cc1e94bf27L,0x7ed8499fe0052aL,0x671a6ca5a5e0f9L, + 0x31e10d4ba10f05L } }, + /* 243 */ + { { 0x690af07e9b2d8aL,0x6030af9e32c8ddL,0x45c7ca3bf2b235L, + 0x40959077b76c81L,0x61eee7f70d5a96L,0x6b04f6aafe9e38L, + 0x3c726f55f1898dL }, + { 0x77d0142a1a6194L,0x1c1631215708b9L,0x403a4f0a9b7585L, + 0x066c8e29f7cef0L,0x6fc32f98cf575eL,0x518a09d818c297L, + 0x34144e99989e75L } }, + /* 244 */ + { { 0x6adbada859fb6aL,0x0dcfb6506ccd51L,0x68f88b8d573e0dL, + 0x4b1ce35bd9af30L,0x241c8293ece2c9L,0x3b5f402c5c4adeL, + 0x34b9b1ee6fde87L }, + { 0x5e625340075e63L,0x54c3f3d9050da1L,0x2a3f9152509016L, + 0x3274e46111bc18L,0x3a7504fd01ac73L,0x4169b387a43209L, + 0x35626f852bc6d4L } }, + /* 245 */ + { { 0x576a4f4662e53bL,0x5ea3f20eecec26L,0x4e5f02be5cd7b0L, + 0x72cc5ac3314be8L,0x0f604ed3201fe9L,0x2a29378ea54bceL, + 0x2d52bd4d6ec4b6L }, + { 0x6a4c2b212c1c76L,0x778fd64a1bfa6dL,0x326828691863d6L, + 0x5616c8bd06a336L,0x5fab552564da4dL,0x46640cab3e91d2L, + 0x1d21f06427299eL } }, + /* 246 */ + { { 0x2bfe37dde98e9cL,0x164c54822332ebL,0x5b736c7df266e4L, + 0x59dab3a8da084cL,0x0ae1eab346f118L,0x182090a4327e3fL, + 0x07b13489dae2e6L }, + { 0x3bc92645452baaL,0x30b159894ae574L,0x5b947c5c78e1f4L, + 0x18f0e004a3c77fL,0x48ca8f357077d9L,0x349ffdcef9bca9L, + 0x3ed224bfd54772L } }, + /* 247 */ + { { 0x1bdad02db8dff8L,0x69fab4450b44b6L,0x3b6802d187518bL, + 0x098368d8eb556cL,0x3fe1943fbefcf4L,0x008851d0de6d42L, + 0x322cbc4605fe25L }, + { 0x2528aaf0d51afbL,0x7d48a9363a0cecL,0x4ba8f77d9a8f8bL, + 0x7dee903437d6c7L,0x1ff5a0d9ccc4b4L,0x34d9bd2fa99831L, + 0x30d9e4f58667c6L } }, + /* 248 */ + { { 0x38909b51b85197L,0x7ba16992512bd4L,0x2c776cfcfffec5L, + 0x2be7879075843cL,0x557e2b05d28ffcL,0x641b17bc5ce357L, + 0x1fcaf8a3710306L }, + { 0x54dca2299a2d48L,0x745d06ef305acaL,0x7c41c65c6944c2L, + 0x679412ec431902L,0x48f2b15ee62827L,0x341a96d8afe06eL, + 0x2a78fd3690c0e1L } }, + /* 249 */ + { { 0x6b7cec83fbc9c6L,0x238e8a82eefc67L,0x5d3c1d9ff0928cL, + 0x55b816d6409bbfL,0x7969612adae364L,0x55b6ff96db654eL, + 0x129beca10073a9L }, + { 0x0b1d2acdfc73deL,0x5d1a3605fa64bdL,0x436076146743beL, + 0x64044b89fcce0cL,0x7ae7b3c18f7fafL,0x7f083ee27cea36L, + 0x0292cd0d7c1ff0L } }, + /* 250 */ + { { 0x5a3c4c019b7d2eL,0x1a35a9b89712fbL,0x38736cc4f18c72L, + 0x603dd832a44e6bL,0x000d1d44aed104L,0x69b1f2fc274ebeL, + 0x03a7b993f76977L }, + { 0x299f3b3e346910L,0x5243f45295afd5L,0x34342cbfa588bdL, + 0x72c40dd1155510L,0x718024fed2f991L,0x2f935e765ad82aL, + 0x246799ea371fb8L } }, + /* 251 */ + { { 0x24fe4c76250533L,0x01cafb02fdf18eL,0x505cb25d462882L, + 0x3e038175157d87L,0x7e3e99b10cdeb1L,0x38b7e72ebc7936L, + 0x081845f7c73433L }, + { 0x049e61be05ebd5L,0x6ab82d8f0581f6L,0x62adffb427ac2eL, + 0x19431f809d198dL,0x36195f6c58b1d6L,0x22cc4c9dedc9a7L, + 0x24b146d8e694fcL } }, + /* 252 */ + { { 0x7c7bc8288b364dL,0x5c10f683cb894aL,0x19a62a68452958L, + 0x1fc24dcb4ce90eL,0x726baa4ed9581fL,0x1f34447dde73d6L, + 0x04c56708f30a21L }, + { 0x131e583a3f4963L,0x071215b4d502e7L,0x196aca542e5940L, + 0x3afd5a91f7450eL,0x671b6eedf49497L,0x6aac7aca5c29e4L, + 0x3fb512470f138bL } }, + /* 253 */ + { { 0x5eadc3f4eb453eL,0x16c795ba34b666L,0x5d7612a4697fddL, + 0x24dd19bb499e86L,0x415b89ca3eeb9bL,0x7c83edf599d809L, + 0x13bc64c9b70269L }, + { 0x52d3243dca3233L,0x0b21444b3a96a7L,0x6d551bc0083b90L, + 0x4f535b88c61176L,0x11e61924298010L,0x0a155b415bb61dL, + 0x17f94fbd26658fL } }, + /* 254 */ + { { 0x2dd06b90c28c65L,0x48582339c8fa6eL,0x01ac8bf2085d94L, + 0x053e660e020fdcL,0x1bece667edf07bL,0x4558f2b33ce24cL, + 0x2f1a766e8673fcL }, + { 0x1d77cd13c06819L,0x4d5dc5056f3a01L,0x18896c6fa18d69L, + 0x120047ca76d625L,0x6af8457d4f4e45L,0x70ddc53358b60aL, + 0x330e11130e82f0L } }, + /* 255 */ + { { 0x0643b1cd4c2356L,0x10a2ea0a8f7c92L,0x2752513011d029L, + 0x4cd4c50321f579L,0x5fdf9ba5724792L,0x2f691653e2ddc0L, + 0x0cfed3d84226cbL }, + { 0x704902a950f955L,0x069bfdb87bbf0cL,0x5817eeda8a5f84L, + 0x1914cdd9089905L,0x0e4a323d7b93f4L,0x1cc3fc340af0b2L, + 0x23874161bd6303L } }, +}; + +/* Multiply the base point of P384 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_base_7(sp_point_384* r, const sp_digit* k, + int map, void* heap) +{ + return sp_384_ecc_mulmod_stripe_7(r, &p384_base, p384_table, + k, map, heap); +} + +#endif + +/* Multiply the base point of P384 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_base_384(mp_int* km, ecc_point* r, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 p; + sp_digit kd[7]; +#endif + sp_point_384* point; + sp_digit* k = NULL; + int err = MP_OKAY; + + err = sp_384_point_new_7(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) { + err = MEMORY_E; + } + } +#else + k = kd; +#endif + if (err == MP_OKAY) { + sp_384_from_mp(k, 7, km); + + err = sp_384_ecc_mulmod_base_7(point, k, map, heap); + } + if (err == MP_OKAY) { + err = sp_384_point_to_ecc_point_7(point, r); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_7(point, 0, heap); + + return err; +} + +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_384_iszero_7(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6]) == 0; +} + +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */ +/* Add 1 to a. (a = a + 1) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_384_add_one_7(sp_digit* a) +{ + a[0]++; + sp_384_norm_7(a); +} + +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_384_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((sp_digit)a[i]) << s); + if (s >= 47U) { + r[j] &= 0x7fffffffffffffL; + s = 55U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (sp_digit)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Generates a scalar that is in the range 1..order-1. + * + * rng Random number generator. + * k Scalar value. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +static int sp_384_ecc_gen_k_7(WC_RNG* rng, sp_digit* k) +{ + int err; + byte buf[48]; + + do { + err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf)); + if (err == 0) { + sp_384_from_bin(k, 7, buf, (int)sizeof(buf)); + if (sp_384_cmp_7(k, p384_order2) < 0) { + sp_384_add_one_7(k); + break; + } + } + } + while (err == 0); + + return err; +} + +/* Makes a random EC key pair. + * + * rng Random number generator. + * priv Generated private value. + * pub Generated public point. + * heap Heap to use for allocation. + * returns ECC_INF_E when the point does not have the correct order, RNG + * failures, MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 p; + sp_digit kd[7]; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_384 inf; +#endif +#endif + sp_point_384* point; + sp_digit* k = NULL; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_384* infinity; +#endif + int err; + + (void)heap; + + err = sp_384_point_new_7(heap, p, point); +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + if (err == MP_OKAY) { + err = sp_384_point_new_7(heap, inf, infinity); + } +#endif +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) { + err = MEMORY_E; + } + } +#else + k = kd; +#endif + + if (err == MP_OKAY) { + err = sp_384_ecc_gen_k_7(rng, k); + } + if (err == MP_OKAY) { + err = sp_384_ecc_mulmod_base_7(point, k, 1, NULL); + } + +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + if (err == MP_OKAY) { + err = sp_384_ecc_mulmod_7(infinity, point, p384_order, 1, NULL); + } + if (err == MP_OKAY) { + if ((sp_384_iszero_7(point->x) == 0) || (sp_384_iszero_7(point->y) == 0)) { + err = ECC_INF_E; + } + } +#endif + + if (err == MP_OKAY) { + err = sp_384_to_mp(k, priv); + } + if (err == MP_OKAY) { + err = sp_384_point_to_ecc_point_7(point, pub); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_384_point_free_7(infinity, 1, heap); +#endif + sp_384_point_free_7(point, 1, heap); + + return err; +} + +#ifdef HAVE_ECC_DHE +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 48 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_384_to_bin(sp_digit* r, byte* a) +{ + int i, j, s = 0, b; + + for (i=0; i<6; i++) { + r[i+1] += r[i] >> 55; + r[i] &= 0x7fffffffffffffL; + } + j = 384 / 8 - 1; + a[j] = 0; + for (i=0; i<7 && j>=0; i++) { + b = 0; + /* lint allow cast of mismatch sp_digit and int */ + a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ + b += 8 - s; + if (j < 0) { + break; + } + while (b < 55) { + a[j--] = (byte)(r[i] >> b); + b += 8; + if (j < 0) { + break; + } + } + s = 8 - (b - 55); + if (j >= 0) { + a[j] = 0; + } + if (s != 0) { + j++; + } + } +} + +/* Multiply the point by the scalar and serialize the X ordinate. + * The number is 0 padded to maximum size on output. + * + * priv Scalar to multiply the point by. + * pub Point to multiply. + * out Buffer to hold X ordinate. + * outLen On entry, size of the buffer in bytes. + * On exit, length of data in buffer in bytes. + * heap Heap to use for allocation. + * returns BUFFER_E if the buffer is to small for output size, + * MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_secret_gen_384(mp_int* priv, ecc_point* pub, byte* out, + word32* outLen, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 p; + sp_digit kd[7]; +#endif + sp_point_384* point = NULL; + sp_digit* k = NULL; + int err = MP_OKAY; + + if (*outLen < 48U) { + err = BUFFER_E; + } + + if (err == MP_OKAY) { + err = sp_384_point_new_7(heap, p, point); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#else + k = kd; +#endif + + if (err == MP_OKAY) { + sp_384_from_mp(k, 7, priv); + sp_384_point_from_ecc_point_7(point, pub); + err = sp_384_ecc_mulmod_7(point, point, k, 1, heap); + } + if (err == MP_OKAY) { + sp_384_to_bin(point->x, out); + *outLen = 48; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_7(point, 0, heap); + + return err; +} +#endif /* HAVE_ECC_DHE */ + +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* Multiply a by scalar b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_384_mul_d_7(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int128_t tb = b; + int128_t t = 0; + int i; + + for (i = 0; i < 7; i++) { + t += tb * a[i]; + r[i] = t & 0x7fffffffffffffL; + t >>= 55; + } + r[7] = (sp_digit)t; +#else + int128_t tb = b; + int128_t t[7]; + + t[ 0] = tb * a[ 0]; + t[ 1] = tb * a[ 1]; + t[ 2] = tb * a[ 2]; + t[ 3] = tb * a[ 3]; + t[ 4] = tb * a[ 4]; + t[ 5] = tb * a[ 5]; + t[ 6] = tb * a[ 6]; + r[ 0] = (t[ 0] & 0x7fffffffffffffL); + r[ 1] = (sp_digit)(t[ 0] >> 55) + (t[ 1] & 0x7fffffffffffffL); + r[ 2] = (sp_digit)(t[ 1] >> 55) + (t[ 2] & 0x7fffffffffffffL); + r[ 3] = (sp_digit)(t[ 2] >> 55) + (t[ 3] & 0x7fffffffffffffL); + r[ 4] = (sp_digit)(t[ 3] >> 55) + (t[ 4] & 0x7fffffffffffffL); + r[ 5] = (sp_digit)(t[ 4] >> 55) + (t[ 5] & 0x7fffffffffffffL); + r[ 6] = (sp_digit)(t[ 5] >> 55) + (t[ 6] & 0x7fffffffffffffL); + r[ 7] = (sp_digit)(t[ 6] >> 55); +#endif /* WOLFSSL_SP_SMALL */ +} + +#ifdef WOLFSSL_SP_DIV_64 +static WC_INLINE sp_digit sp_384_div_word_7(sp_digit d1, sp_digit d0, + sp_digit dv) +{ + sp_digit d, r, t; + + /* All 55 bits from d1 and top 8 bits from d0. */ + d = (d1 << 8) | (d0 >> 47); + r = d / dv; + d -= r * dv; + /* Up to 9 bits in r */ + /* Next 8 bits from d0. */ + r <<= 8; + d <<= 8; + d |= (d0 >> 39) & ((1 << 8) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 17 bits in r */ + /* Next 8 bits from d0. */ + r <<= 8; + d <<= 8; + d |= (d0 >> 31) & ((1 << 8) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 25 bits in r */ + /* Next 8 bits from d0. */ + r <<= 8; + d <<= 8; + d |= (d0 >> 23) & ((1 << 8) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 33 bits in r */ + /* Next 8 bits from d0. */ + r <<= 8; + d <<= 8; + d |= (d0 >> 15) & ((1 << 8) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 41 bits in r */ + /* Next 8 bits from d0. */ + r <<= 8; + d <<= 8; + d |= (d0 >> 7) & ((1 << 8) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 49 bits in r */ + /* Remaining 7 bits from d0. */ + r <<= 7; + d <<= 7; + d |= d0 & ((1 << 7) - 1); + t = d / dv; + r += t; + + return r; +} +#endif /* WOLFSSL_SP_DIV_64 */ + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Number to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_384_div_7(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + int i; +#ifndef WOLFSSL_SP_DIV_64 + int128_t d1; +#endif + sp_digit dv, r1; +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* td; +#else + sp_digit t1d[14], t2d[7 + 1]; +#endif + sp_digit* t1; + sp_digit* t2; + int err = MP_OKAY; + + (void)m; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 7 + 1), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = td; + t2 = td + 2 * 7; +#else + t1 = t1d; + t2 = t2d; +#endif + + dv = d[6]; + XMEMCPY(t1, a, sizeof(*t1) * 2U * 7U); + for (i=6; i>=0; i--) { + t1[7 + i] += t1[7 + i - 1] >> 55; + t1[7 + i - 1] &= 0x7fffffffffffffL; +#ifndef WOLFSSL_SP_DIV_64 + d1 = t1[7 + i]; + d1 <<= 55; + d1 += t1[7 + i - 1]; + r1 = (sp_digit)(d1 / dv); +#else + r1 = sp_384_div_word_7(t1[7 + i], t1[7 + i - 1], dv); +#endif + + sp_384_mul_d_7(t2, d, r1); + (void)sp_384_sub_7(&t1[i], &t1[i], t2); + t1[7 + i] -= t2[7]; + t1[7 + i] += t1[7 + i - 1] >> 55; + t1[7 + i - 1] &= 0x7fffffffffffffL; + r1 = (((-t1[7 + i]) << 55) - t1[7 + i - 1]) / dv; + r1++; + sp_384_mul_d_7(t2, d, r1); + (void)sp_384_add_7(&t1[i], &t1[i], t2); + t1[7 + i] += t1[7 + i - 1] >> 55; + t1[7 + i - 1] &= 0x7fffffffffffffL; + } + t1[7 - 1] += t1[7 - 2] >> 55; + t1[7 - 2] &= 0x7fffffffffffffL; + r1 = t1[7 - 1] / dv; + + sp_384_mul_d_7(t2, d, r1); + (void)sp_384_sub_7(t1, t1, t2); + XMEMCPY(r, t1, sizeof(*r) * 2U * 7U); + for (i=0; i<5; i++) { + r[i+1] += r[i] >> 55; + r[i] &= 0x7fffffffffffffL; + } + sp_384_cond_add_7(r, r, d, 0 - ((r[6] < 0) ? + (sp_digit)1 : (sp_digit)0)); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_384_mod_7(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_384_div_7(a, m, NULL, r); +} + +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#ifdef WOLFSSL_SP_SMALL +/* Order-2 for the P384 curve. */ +static const uint64_t p384_order_minus_2[6] = { + 0xecec196accc52971U,0x581a0db248b0a77aU,0xc7634d81f4372ddfU, + 0xffffffffffffffffU,0xffffffffffffffffU,0xffffffffffffffffU +}; +#else +/* The low half of the order-2 of the P384 curve. */ +static const uint64_t p384_order_low[3] = { + 0xecec196accc52971U,0x581a0db248b0a77aU,0xc7634d81f4372ddfU + +}; +#endif /* WOLFSSL_SP_SMALL */ + +/* Multiply two number mod the order of P384 curve. (r = a * b mod order) + * + * r Result of the multiplication. + * a First operand of the multiplication. + * b Second operand of the multiplication. + */ +static void sp_384_mont_mul_order_7(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_384_mul_7(r, a, b); + sp_384_mont_reduce_order_7(r, p384_order, p384_mp_order); +} + +/* Square number mod the order of P384 curve. (r = a * a mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_384_mont_sqr_order_7(sp_digit* r, const sp_digit* a) +{ + sp_384_sqr_7(r, a); + sp_384_mont_reduce_order_7(r, p384_order, p384_mp_order); +} + +#ifndef WOLFSSL_SP_SMALL +/* Square number mod the order of P384 curve a number of times. + * (r = a ^ n mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_384_mont_sqr_n_order_7(sp_digit* r, const sp_digit* a, int n) +{ + int i; + + sp_384_mont_sqr_order_7(r, a); + for (i=1; i=0; i--) { + sp_384_mont_sqr_order_7(t, t); + if ((p384_order_minus_2[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { + sp_384_mont_mul_order_7(t, t, a); + } + } + XMEMCPY(r, t, sizeof(sp_digit) * 7U); +#else + sp_digit* t = td; + sp_digit* t2 = td + 2 * 7; + sp_digit* t3 = td + 4 * 7; + int i; + + /* t = a^2 */ + sp_384_mont_sqr_order_7(t, a); + /* t = a^3 = t * a */ + sp_384_mont_mul_order_7(t, t, a); + /* t2= a^c = t ^ 2 ^ 2 */ + sp_384_mont_sqr_n_order_7(t2, t, 2); + /* t = a^f = t2 * t */ + sp_384_mont_mul_order_7(t, t2, t); + /* t2= a^f0 = t ^ 2 ^ 4 */ + sp_384_mont_sqr_n_order_7(t2, t, 4); + /* t = a^ff = t2 * t */ + sp_384_mont_mul_order_7(t, t2, t); + /* t2= a^ff00 = t ^ 2 ^ 8 */ + sp_384_mont_sqr_n_order_7(t2, t, 8); + /* t3= a^ffff = t2 * t */ + sp_384_mont_mul_order_7(t3, t2, t); + /* t2= a^ffff0000 = t3 ^ 2 ^ 16 */ + sp_384_mont_sqr_n_order_7(t2, t3, 16); + /* t = a^ffffffff = t2 * t3 */ + sp_384_mont_mul_order_7(t, t2, t3); + /* t2= a^ffffffff0000 = t ^ 2 ^ 16 */ + sp_384_mont_sqr_n_order_7(t2, t, 16); + /* t = a^ffffffffffff = t2 * t3 */ + sp_384_mont_mul_order_7(t, t2, t3); + /* t2= a^ffffffffffff000000000000 = t ^ 2 ^ 48 */ + sp_384_mont_sqr_n_order_7(t2, t, 48); + /* t= a^fffffffffffffffffffffffff = t2 * t */ + sp_384_mont_mul_order_7(t, t2, t); + /* t2= a^ffffffffffffffffffffffff000000000000000000000000 */ + sp_384_mont_sqr_n_order_7(t2, t, 96); + /* t2= a^ffffffffffffffffffffffffffffffffffffffffffffffff = t2 * t */ + sp_384_mont_mul_order_7(t2, t2, t); + for (i=191; i>=1; i--) { + sp_384_mont_sqr_order_7(t2, t2); + if (((sp_digit)p384_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { + sp_384_mont_mul_order_7(t2, t2, a); + } + } + sp_384_mont_sqr_order_7(t2, t2); + sp_384_mont_mul_order_7(r, t2, a); +#endif /* WOLFSSL_SP_SMALL */ +} + +#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */ +#ifdef HAVE_ECC_SIGN +#ifndef SP_ECC_MAX_SIG_GEN +#define SP_ECC_MAX_SIG_GEN 64 +#endif + +/* Sign the hash using the private key. + * e = [hash, 384 bits] from binary + * r = (k.G)->x mod order + * s = (r * x + e) / k mod order + * The hash is truncated to the first 384 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv, + mp_int* rm, mp_int* sm, mp_int* km, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit ed[2*7]; + sp_digit xd[2*7]; + sp_digit kd[2*7]; + sp_digit rd[2*7]; + sp_digit td[3 * 2*7]; + sp_point_384 p; +#endif + sp_digit* e = NULL; + sp_digit* x = NULL; + sp_digit* k = NULL; + sp_digit* r = NULL; + sp_digit* tmp = NULL; + sp_point_384* point = NULL; + sp_digit carry; + sp_digit* s = NULL; + sp_digit* kInv = NULL; + int err = MP_OKAY; + int64_t c; + int i; + + (void)heap; + + err = sp_384_point_new_7(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 7, heap, + DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + e = d + 0 * 7; + x = d + 2 * 7; + k = d + 4 * 7; + r = d + 6 * 7; + tmp = d + 8 * 7; +#else + e = ed; + x = xd; + k = kd; + r = rd; + tmp = td; +#endif + s = e; + kInv = k; + + if (hashLen > 48U) { + hashLen = 48U; + } + + sp_384_from_bin(e, 7, hash, (int)hashLen); + } + + for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) { + sp_384_from_mp(x, 7, priv); + + /* New random point. */ + if (km == NULL || mp_iszero(km)) { + err = sp_384_ecc_gen_k_7(rng, k); + } + else { + sp_384_from_mp(k, 7, km); + mp_zero(km); + } + if (err == MP_OKAY) { + err = sp_384_ecc_mulmod_base_7(point, k, 1, NULL); + } + + if (err == MP_OKAY) { + /* r = point->x mod order */ + XMEMCPY(r, point->x, sizeof(sp_digit) * 7U); + sp_384_norm_7(r); + c = sp_384_cmp_7(r, p384_order); + sp_384_cond_sub_7(r, r, p384_order, 0L - (sp_digit)(c >= 0)); + sp_384_norm_7(r); + + /* Conv k to Montgomery form (mod order) */ + sp_384_mul_7(k, k, p384_norm_order); + err = sp_384_mod_7(k, k, p384_order); + } + if (err == MP_OKAY) { + sp_384_norm_7(k); + /* kInv = 1/k mod order */ + sp_384_mont_inv_order_7(kInv, k, tmp); + sp_384_norm_7(kInv); + + /* s = r * x + e */ + sp_384_mul_7(x, x, r); + err = sp_384_mod_7(x, x, p384_order); + } + if (err == MP_OKAY) { + sp_384_norm_7(x); + carry = sp_384_add_7(s, e, x); + sp_384_cond_sub_7(s, s, p384_order, 0 - carry); + sp_384_norm_7(s); + c = sp_384_cmp_7(s, p384_order); + sp_384_cond_sub_7(s, s, p384_order, 0L - (sp_digit)(c >= 0)); + sp_384_norm_7(s); + + /* s = s * k^-1 mod order */ + sp_384_mont_mul_order_7(s, s, kInv); + sp_384_norm_7(s); + + /* Check that signature is usable. */ + if (sp_384_iszero_7(s) == 0) { + break; + } + } + } + + if (i == 0) { + err = RNG_FAILURE_E; + } + + if (err == MP_OKAY) { + err = sp_384_to_mp(r, rm); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(s, sm); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 8 * 7); + XFREE(d, heap, DYNAMIC_TYPE_ECC); + } +#else + XMEMSET(e, 0, sizeof(sp_digit) * 2U * 7U); + XMEMSET(x, 0, sizeof(sp_digit) * 2U * 7U); + XMEMSET(k, 0, sizeof(sp_digit) * 2U * 7U); + XMEMSET(r, 0, sizeof(sp_digit) * 2U * 7U); + XMEMSET(r, 0, sizeof(sp_digit) * 2U * 7U); + XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 7U); +#endif + sp_384_point_free_7(point, 1, heap); + + return err; +} +#endif /* HAVE_ECC_SIGN */ + +#ifdef HAVE_ECC_VERIFY +/* Verify the signature values with the hash and public key. + * e = Truncate(hash, 384) + * u1 = e/s mod order + * u2 = r/s mod order + * r == (u1.G + u2.Q)->x mod order + * Optimization: Leave point in projective form. + * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z') + * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' + * The hash is truncated to the first 384 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +int sp_ecc_verify_384(const byte* hash, word32 hashLen, mp_int* pX, + mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit u1d[2*7]; + sp_digit u2d[2*7]; + sp_digit sd[2*7]; + sp_digit tmpd[2*7 * 5]; + sp_point_384 p1d; + sp_point_384 p2d; +#endif + sp_digit* u1 = NULL; + sp_digit* u2 = NULL; + sp_digit* s = NULL; + sp_digit* tmp = NULL; + sp_point_384* p1; + sp_point_384* p2 = NULL; + sp_digit carry; + int64_t c; + int err; + + err = sp_384_point_new_7(heap, p1d, p1); + if (err == MP_OKAY) { + err = sp_384_point_new_7(heap, p2d, p2); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 7, heap, + DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + u1 = d + 0 * 7; + u2 = d + 2 * 7; + s = d + 4 * 7; + tmp = d + 6 * 7; +#else + u1 = u1d; + u2 = u2d; + s = sd; + tmp = tmpd; +#endif + + if (hashLen > 48U) { + hashLen = 48U; + } + + sp_384_from_bin(u1, 7, hash, (int)hashLen); + sp_384_from_mp(u2, 7, r); + sp_384_from_mp(s, 7, sm); + sp_384_from_mp(p2->x, 7, pX); + sp_384_from_mp(p2->y, 7, pY); + sp_384_from_mp(p2->z, 7, pZ); + + { + sp_384_mul_7(s, s, p384_norm_order); + } + err = sp_384_mod_7(s, s, p384_order); + } + if (err == MP_OKAY) { + sp_384_norm_7(s); + { + sp_384_mont_inv_order_7(s, s, tmp); + sp_384_mont_mul_order_7(u1, u1, s); + sp_384_mont_mul_order_7(u2, u2, s); + } + + err = sp_384_ecc_mulmod_base_7(p1, u1, 0, heap); + } + if (err == MP_OKAY) { + err = sp_384_ecc_mulmod_7(p2, p2, u2, 0, heap); + } + + if (err == MP_OKAY) { + { + sp_384_proj_point_add_7(p1, p1, p2, tmp); + if (sp_384_iszero_7(p1->z)) { + if (sp_384_iszero_7(p1->x) && sp_384_iszero_7(p1->y)) { + sp_384_proj_point_dbl_7(p1, p2, tmp); + } + else { + /* Y ordinate is not used from here - don't set. */ + p1->x[0] = 0; + p1->x[1] = 0; + p1->x[2] = 0; + p1->x[3] = 0; + p1->x[4] = 0; + p1->x[5] = 0; + p1->x[6] = 0; + XMEMCPY(p1->z, p384_norm_mod, sizeof(p384_norm_mod)); + } + } + } + + /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ + /* Reload r and convert to Montgomery form. */ + sp_384_from_mp(u2, 7, r); + err = sp_384_mod_mul_norm_7(u2, u2, p384_mod); + } + + if (err == MP_OKAY) { + /* u1 = r.z'.z' mod prime */ + sp_384_mont_sqr_7(p1->z, p1->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(u1, u2, p1->z, p384_mod, p384_mp_mod); + *res = (int)(sp_384_cmp_7(p1->x, u1) == 0); + if (*res == 0) { + /* Reload r and add order. */ + sp_384_from_mp(u2, 7, r); + carry = sp_384_add_7(u2, u2, p384_order); + /* Carry means result is greater than mod and is not valid. */ + if (carry == 0) { + sp_384_norm_7(u2); + + /* Compare with mod and if greater or equal then not valid. */ + c = sp_384_cmp_7(u2, p384_mod); + if (c < 0) { + /* Convert to Montogomery form */ + err = sp_384_mod_mul_norm_7(u2, u2, p384_mod); + if (err == MP_OKAY) { + /* u1 = (r + 1*order).z'.z' mod prime */ + sp_384_mont_mul_7(u1, u2, p1->z, p384_mod, + p384_mp_mod); + *res = (int)(sp_384_cmp_7(p1->x, u1) == 0); + } + } + } + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) + XFREE(d, heap, DYNAMIC_TYPE_ECC); +#endif + sp_384_point_free_7(p1, 0, heap); + sp_384_point_free_7(p2, 0, heap); + + return err; +} +#endif /* HAVE_ECC_VERIFY */ + +#ifdef HAVE_ECC_CHECK_KEY +/* Check that the x and y oridinates are a valid point on the curve. + * + * point EC point. + * heap Heap to use if dynamically allocating. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +static int sp_384_ecc_is_point_7(sp_point_384* point, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit t1d[2*7]; + sp_digit t2d[2*7]; +#endif + sp_digit* t1; + sp_digit* t2; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 4, heap, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = d + 0 * 7; + t2 = d + 2 * 7; +#else + (void)heap; + + t1 = t1d; + t2 = t2d; +#endif + + sp_384_sqr_7(t1, point->y); + (void)sp_384_mod_7(t1, t1, p384_mod); + sp_384_sqr_7(t2, point->x); + (void)sp_384_mod_7(t2, t2, p384_mod); + sp_384_mul_7(t2, t2, point->x); + (void)sp_384_mod_7(t2, t2, p384_mod); + (void)sp_384_sub_7(t2, p384_mod, t2); + sp_384_mont_add_7(t1, t1, t2, p384_mod); + + sp_384_mont_add_7(t1, t1, point->x, p384_mod); + sp_384_mont_add_7(t1, t1, point->x, p384_mod); + sp_384_mont_add_7(t1, t1, point->x, p384_mod); + + if (sp_384_cmp_7(t1, p384_b) != 0) { + err = MP_VAL; + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, heap, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + +/* Check that the x and y oridinates are a valid point on the curve. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +int sp_ecc_is_point_384(mp_int* pX, mp_int* pY) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 pubd; +#endif + sp_point_384* pub; + byte one[1] = { 1 }; + int err; + + err = sp_384_point_new_7(NULL, pubd, pub); + if (err == MP_OKAY) { + sp_384_from_mp(pub->x, 7, pX); + sp_384_from_mp(pub->y, 7, pY); + sp_384_from_bin(pub->z, 7, one, (int)sizeof(one)); + + err = sp_384_ecc_is_point_7(pub, NULL); + } + + sp_384_point_free_7(pub, 0, NULL); + + return err; +} + +/* Check that the private scalar generates the EC point (px, py), the point is + * on the curve and the point has the correct order. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * privm Private scalar that generates EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve, ECC_INF_E if the point does not have the correct order, + * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and + * MP_OKAY otherwise. + */ +int sp_ecc_check_key_384(mp_int* pX, mp_int* pY, mp_int* privm, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit privd[7]; + sp_point_384 pubd; + sp_point_384 pd; +#endif + sp_digit* priv = NULL; + sp_point_384* pub; + sp_point_384* p = NULL; + byte one[1] = { 1 }; + int err; + + err = sp_384_point_new_7(heap, pubd, pub); + if (err == MP_OKAY) { + err = sp_384_point_new_7(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7, heap, + DYNAMIC_TYPE_ECC); + if (priv == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + priv = privd; +#endif + + sp_384_from_mp(pub->x, 7, pX); + sp_384_from_mp(pub->y, 7, pY); + sp_384_from_bin(pub->z, 7, one, (int)sizeof(one)); + sp_384_from_mp(priv, 7, privm); + + /* Check point at infinitiy. */ + if ((sp_384_iszero_7(pub->x) != 0) && + (sp_384_iszero_7(pub->y) != 0)) { + err = ECC_INF_E; + } + } + + if (err == MP_OKAY) { + /* Check range of X and Y */ + if (sp_384_cmp_7(pub->x, p384_mod) >= 0 || + sp_384_cmp_7(pub->y, p384_mod) >= 0) { + err = ECC_OUT_OF_RANGE_E; + } + } + + if (err == MP_OKAY) { + /* Check point is on curve */ + err = sp_384_ecc_is_point_7(pub, heap); + } + + if (err == MP_OKAY) { + /* Point * order = infinity */ + err = sp_384_ecc_mulmod_7(p, pub, p384_order, 1, heap); + } + if (err == MP_OKAY) { + /* Check result is infinity */ + if ((sp_384_iszero_7(p->x) == 0) || + (sp_384_iszero_7(p->y) == 0)) { + err = ECC_INF_E; + } + } + + if (err == MP_OKAY) { + /* Base * private = point */ + err = sp_384_ecc_mulmod_base_7(p, priv, 1, heap); + } + if (err == MP_OKAY) { + /* Check result is public key */ + if (sp_384_cmp_7(p->x, pub->x) != 0 || + sp_384_cmp_7(p->y, pub->y) != 0) { + err = ECC_PRIV_KEY_E; + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (priv != NULL) { + XFREE(priv, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_7(p, 0, heap); + sp_384_point_free_7(pub, 0, heap); + + return err; +} +#endif +#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL +/* Add two projective EC points together. + * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ) + * + * pX First EC point's X ordinate. + * pY First EC point's Y ordinate. + * pZ First EC point's Z ordinate. + * qX Second EC point's X ordinate. + * qY Second EC point's Y ordinate. + * qZ Second EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* qX, mp_int* qY, mp_int* qZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 7 * 5]; + sp_point_384 pd; + sp_point_384 qd; +#endif + sp_digit* tmp; + sp_point_384* p; + sp_point_384* q = NULL; + int err; + + err = sp_384_point_new_7(NULL, pd, p); + if (err == MP_OKAY) { + err = sp_384_point_new_7(NULL, qd, q); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 7 * 5, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + sp_384_from_mp(p->x, 7, pX); + sp_384_from_mp(p->y, 7, pY); + sp_384_from_mp(p->z, 7, pZ); + sp_384_from_mp(q->x, 7, qX); + sp_384_from_mp(q->y, 7, qY); + sp_384_from_mp(q->z, 7, qZ); + + sp_384_proj_point_add_7(p, p, q, tmp); + } + + if (err == MP_OKAY) { + err = sp_384_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->z, rZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_7(q, 0, NULL); + sp_384_point_free_7(p, 0, NULL); + + return err; +} + +/* Double a projective EC point. + * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ) + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 7 * 2]; + sp_point_384 pd; +#endif + sp_digit* tmp; + sp_point_384* p; + int err; + + err = sp_384_point_new_7(NULL, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 7 * 2, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + sp_384_from_mp(p->x, 7, pX); + sp_384_from_mp(p->y, 7, pY); + sp_384_from_mp(p->z, 7, pZ); + + sp_384_proj_point_dbl_7(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_384_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->z, rZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_7(p, 0, NULL); + + return err; +} + +/* Map a projective EC point to affine in place. + * pZ will be one. + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 7 * 6]; + sp_point_384 pd; +#endif + sp_digit* tmp; + sp_point_384* p; + int err; + + err = sp_384_point_new_7(NULL, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 7 * 6, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + if (err == MP_OKAY) { + sp_384_from_mp(p->x, 7, pX); + sp_384_from_mp(p->y, 7, pY); + sp_384_from_mp(p->z, 7, pZ); + + sp_384_map_7(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_384_to_mp(p->x, pX); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->y, pY); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->z, pZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_7(p, 0, NULL); + + return err; +} +#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */ +#ifdef HAVE_COMP_KEY +/* Find the square root of a number mod the prime of the curve. + * + * y The number to operate on and the result. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +static int sp_384_mont_sqrt_7(sp_digit* y) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d; +#else + sp_digit t1d[2 * 7]; + sp_digit t2d[2 * 7]; + sp_digit t3d[2 * 7]; + sp_digit t4d[2 * 7]; + sp_digit t5d[2 * 7]; +#endif + sp_digit* t1; + sp_digit* t2; + sp_digit* t3; + sp_digit* t4; + sp_digit* t5; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 2 * 7, NULL, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = d + 0 * 7; + t2 = d + 2 * 7; + t3 = d + 4 * 7; + t4 = d + 6 * 7; + t5 = d + 8 * 7; +#else + t1 = t1d; + t2 = t2d; + t3 = t3d; + t4 = t4d; + t5 = t5d; +#endif + + { + /* t2 = y ^ 0x2 */ + sp_384_mont_sqr_7(t2, y, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x3 */ + sp_384_mont_mul_7(t1, t2, y, p384_mod, p384_mp_mod); + /* t5 = y ^ 0xc */ + sp_384_mont_sqr_n_7(t5, t1, 2, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xf */ + sp_384_mont_mul_7(t1, t1, t5, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x1e */ + sp_384_mont_sqr_7(t2, t1, p384_mod, p384_mp_mod); + /* t3 = y ^ 0x1f */ + sp_384_mont_mul_7(t3, t2, y, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3e0 */ + sp_384_mont_sqr_n_7(t2, t3, 5, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x3ff */ + sp_384_mont_mul_7(t1, t3, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x7fe0 */ + sp_384_mont_sqr_n_7(t2, t1, 5, p384_mod, p384_mp_mod); + /* t3 = y ^ 0x7fff */ + sp_384_mont_mul_7(t3, t3, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3fff800 */ + sp_384_mont_sqr_n_7(t2, t3, 15, p384_mod, p384_mp_mod); + /* t4 = y ^ 0x3ffffff */ + sp_384_mont_mul_7(t4, t3, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0xffffffc000000 */ + sp_384_mont_sqr_n_7(t2, t4, 30, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xfffffffffffff */ + sp_384_mont_mul_7(t1, t4, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0xfffffffffffffff000000000000000 */ + sp_384_mont_sqr_n_7(t2, t1, 60, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xffffffffffffffffffffffffffffff */ + sp_384_mont_mul_7(t1, t1, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */ + sp_384_mont_sqr_n_7(t2, t1, 120, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_384_mont_mul_7(t1, t1, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */ + sp_384_mont_sqr_n_7(t2, t1, 15, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_384_mont_mul_7(t1, t3, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff80000000 */ + sp_384_mont_sqr_n_7(t2, t1, 31, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff */ + sp_384_mont_mul_7(t1, t4, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff0 */ + sp_384_mont_sqr_n_7(t2, t1, 4, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc */ + sp_384_mont_mul_7(t1, t5, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000 */ + sp_384_mont_sqr_n_7(t2, t1, 62, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000001 */ + sp_384_mont_mul_7(t1, y, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc00000000000000040000000 */ + sp_384_mont_sqr_n_7(y, t1, 30, p384_mod, p384_mp_mod); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + + +/* Uncompress the point given the X ordinate. + * + * xm X ordinate. + * odd Whether the Y ordinate is odd. + * ym Calculated Y ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d; +#else + sp_digit xd[2 * 7]; + sp_digit yd[2 * 7]; +#endif + sp_digit* x = NULL; + sp_digit* y = NULL; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 7, NULL, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + x = d + 0 * 7; + y = d + 2 * 7; +#else + x = xd; + y = yd; +#endif + + sp_384_from_mp(x, 7, xm); + err = sp_384_mod_mul_norm_7(x, x, p384_mod); + } + if (err == MP_OKAY) { + /* y = x^3 */ + { + sp_384_mont_sqr_7(y, x, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(y, y, x, p384_mod, p384_mp_mod); + } + /* y = x^3 - 3x */ + sp_384_mont_sub_7(y, y, x, p384_mod); + sp_384_mont_sub_7(y, y, x, p384_mod); + sp_384_mont_sub_7(y, y, x, p384_mod); + /* y = x^3 - 3x + b */ + err = sp_384_mod_mul_norm_7(x, p384_b, p384_mod); + } + if (err == MP_OKAY) { + sp_384_mont_add_7(y, y, x, p384_mod); + /* y = sqrt(x^3 - 3x + b) */ + err = sp_384_mont_sqrt_7(y); + } + if (err == MP_OKAY) { + XMEMSET(y + 7, 0, 7U * sizeof(sp_digit)); + sp_384_mont_reduce_7(y, p384_mod, p384_mp_mod); + if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) { + sp_384_mont_sub_7(y, p384_mod, y, p384_mod); + } + + err = sp_384_to_mp(y, ym); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} +#endif +#endif /* WOLFSSL_SP_384 */ +#endif /* WOLFSSL_HAVE_SP_ECC */ +#endif /* SP_WORD_SIZE == 64 */ +#endif /* !WOLFSSL_SP_ASM */ +#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_ECC */ diff --git a/client/wolfssl/wolfcrypt/src/sp_cortexm.c b/client/wolfssl/wolfcrypt/src/sp_cortexm.c new file mode 100644 index 0000000..b03de8a --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/sp_cortexm.c @@ -0,0 +1,25687 @@ +/* sp.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* Implementation by Sean Parkinson. */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include +#include +#include +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) || \ + defined(WOLFSSL_HAVE_SP_ECC) + +#ifdef RSA_LOW_MEM +#ifndef WOLFSSL_SP_SMALL +#define WOLFSSL_SP_SMALL +#endif +#endif + +#include + +#ifdef __IAR_SYSTEMS_ICC__ +#define __asm__ asm +#define __volatile__ volatile +#endif /* __IAR_SYSTEMS_ICC__ */ +#ifdef __KEIL__ +#define __asm__ __asm +#define __volatile__ volatile +#endif + +#ifdef WOLFSSL_SP_ARM_CORTEX_M_ASM +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) +#ifndef WOLFSSL_SP_NO_2048 +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_2048_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((sp_digit)a[i]) << s); + if (s >= 24U) { + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (sp_digit)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 32 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 32 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 32U) <= (word32)DIGIT_BIT) { + s += 32U; + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 32) { + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + s = 32 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 256 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_2048_to_bin(sp_digit* r, byte* a) +{ + int i, j, s = 0, b; + + j = 2048 / 8 - 1; + a[j] = 0; + for (i=0; i<64 && j>=0; i++) { + b = 0; + /* lint allow cast of mismatch sp_digit and int */ + a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ + b += 8 - s; + if (j < 0) { + break; + } + while (b < 32) { + a[j--] = (byte)(r[i] >> b); + b += 8; + if (j < 0) { + break; + } + } + s = 8 - (b - 32); + if (j >= 0) { + a[j] = 0; + } + if (s != 0) { + j++; + } + } +} + +#ifndef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit tmp[8]; + + __asm__ __volatile__ ( + /* A[0] * B[0] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r8, [%[b], #0]\n\t" + "umull r3, r4, r6, r8\n\t" + "mov r5, #0\n\t" + "str r3, [%[tmp], #0]\n\t" + "mov r3, #0\n\t" + /* A[0] * B[1] */ + "ldr r8, [%[b], #4]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r8\n\t" + /* A[1] * B[0] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r8, [%[b], #0]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + "str r4, [%[tmp], #4]\n\t" + "mov r4, #0\n\t" + /* A[0] * B[2] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r8, [%[b], #8]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[1] * B[1] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r8, [%[b], #4]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[2] * B[0] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r8, [%[b], #0]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + "str r5, [%[tmp], #8]\n\t" + "mov r5, #0\n\t" + /* A[0] * B[3] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r8, [%[b], #12]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[1] * B[2] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r8, [%[b], #8]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[2] * B[1] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r8, [%[b], #4]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[3] * B[0] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r8, [%[b], #0]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + "str r3, [%[tmp], #12]\n\t" + "mov r3, #0\n\t" + /* A[0] * B[4] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r8, [%[b], #16]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + /* A[1] * B[3] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r8, [%[b], #12]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + /* A[2] * B[2] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r8, [%[b], #8]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + /* A[3] * B[1] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r8, [%[b], #4]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + /* A[4] * B[0] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r8, [%[b], #0]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + "str r4, [%[tmp], #16]\n\t" + "mov r4, #0\n\t" + /* A[0] * B[5] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r8, [%[b], #20]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[1] * B[4] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r8, [%[b], #16]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[2] * B[3] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r8, [%[b], #12]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[3] * B[2] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r8, [%[b], #8]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[4] * B[1] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r8, [%[b], #4]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[5] * B[0] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r8, [%[b], #0]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + "str r5, [%[tmp], #20]\n\t" + "mov r5, #0\n\t" + /* A[0] * B[6] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r8, [%[b], #24]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[1] * B[5] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r8, [%[b], #20]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[2] * B[4] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r8, [%[b], #16]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[3] * B[3] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r8, [%[b], #12]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[4] * B[2] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r8, [%[b], #8]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[5] * B[1] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r8, [%[b], #4]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[6] * B[0] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r8, [%[b], #0]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + "str r3, [%[tmp], #24]\n\t" + "mov r3, #0\n\t" + /* A[0] * B[7] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r8, [%[b], #28]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + /* A[1] * B[6] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r8, [%[b], #24]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + /* A[2] * B[5] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r8, [%[b], #20]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + /* A[3] * B[4] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r8, [%[b], #16]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + /* A[4] * B[3] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r8, [%[b], #12]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + /* A[5] * B[2] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r8, [%[b], #8]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + /* A[6] * B[1] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r8, [%[b], #4]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + /* A[7] * B[0] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r8, [%[b], #0]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + "str r4, [%[tmp], #28]\n\t" + "mov r4, #0\n\t" + /* A[1] * B[7] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r8, [%[b], #28]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[2] * B[6] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r8, [%[b], #24]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[3] * B[5] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r8, [%[b], #20]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[4] * B[4] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r8, [%[b], #16]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[5] * B[3] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r8, [%[b], #12]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[6] * B[2] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r8, [%[b], #8]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[7] * B[1] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r8, [%[b], #4]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + "str r5, [%[r], #32]\n\t" + "mov r5, #0\n\t" + /* A[2] * B[7] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r8, [%[b], #28]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[3] * B[6] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r8, [%[b], #24]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[4] * B[5] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r8, [%[b], #20]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[5] * B[4] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r8, [%[b], #16]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[6] * B[3] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r8, [%[b], #12]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[7] * B[2] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r8, [%[b], #8]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + "str r3, [%[r], #36]\n\t" + "mov r3, #0\n\t" + /* A[3] * B[7] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r8, [%[b], #28]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + /* A[4] * B[6] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r8, [%[b], #24]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + /* A[5] * B[5] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r8, [%[b], #20]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + /* A[6] * B[4] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r8, [%[b], #16]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + /* A[7] * B[3] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r8, [%[b], #12]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + "str r4, [%[r], #40]\n\t" + "mov r4, #0\n\t" + /* A[4] * B[7] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r8, [%[b], #28]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[5] * B[6] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r8, [%[b], #24]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[6] * B[5] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r8, [%[b], #20]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[7] * B[4] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r8, [%[b], #16]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + "str r5, [%[r], #44]\n\t" + "mov r5, #0\n\t" + /* A[5] * B[7] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r8, [%[b], #28]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[6] * B[6] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r8, [%[b], #24]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[7] * B[5] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r8, [%[b], #20]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + "str r3, [%[r], #48]\n\t" + "mov r3, #0\n\t" + /* A[6] * B[7] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r8, [%[b], #28]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + /* A[7] * B[6] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r8, [%[b], #24]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + "str r4, [%[r], #52]\n\t" + "mov r4, #0\n\t" + /* A[7] * B[7] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r8, [%[b], #28]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r8\n\t" + "str r5, [%[r], #56]\n\t" + "str r3, [%[r], #60]\n\t" + /* Transfer tmp to r */ + "ldr r3, [%[tmp], #0]\n\t" + "ldr r4, [%[tmp], #4]\n\t" + "ldr r5, [%[tmp], #8]\n\t" + "ldr r6, [%[tmp], #12]\n\t" + "str r3, [%[r], #0]\n\t" + "str r4, [%[r], #4]\n\t" + "str r5, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" + "ldr r3, [%[tmp], #16]\n\t" + "ldr r4, [%[tmp], #20]\n\t" + "ldr r5, [%[tmp], #24]\n\t" + "ldr r6, [%[tmp], #28]\n\t" + "str r3, [%[r], #16]\n\t" + "str r4, [%[r], #20]\n\t" + "str r5, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp) + : "memory", "r3", "r4", "r5", "r6", "r8" + ); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) +{ + sp_digit tmp[8]; + __asm__ __volatile__ ( + /* A[0] * A[0] */ + "ldr r6, [%[a], #0]\n\t" + "umull r3, r4, r6, r6\n\t" + "mov r5, #0\n\t" + "str r3, [%[tmp], #0]\n\t" + "mov r3, #0\n\t" + /* A[0] * A[1] */ + "ldr r8, [%[a], #4]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + "str r4, [%[tmp], #4]\n\t" + "mov r4, #0\n\t" + /* A[0] * A[2] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[1] * A[1] */ + "ldr r6, [%[a], #4]\n\t" + "umull r6, r8, r6, r6\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + "str r5, [%[tmp], #8]\n\t" + "mov r5, #0\n\t" + /* A[0] * A[3] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r9, r10, r6, r8\n\t" + "mov r11, #0\n\t" + /* A[1] * A[2] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + "adds r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adc r11, r11, r11\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, r10\n\t" + "adc r5, r5, r11\n\t" + "str r3, [%[tmp], #12]\n\t" + "mov r3, #0\n\t" + /* A[0] * A[4] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r9, r10, r6, r8\n\t" + "mov r11, #0\n\t" + /* A[1] * A[3] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + /* A[2] * A[2] */ + "ldr r6, [%[a], #8]\n\t" + "umull r6, r8, r6, r6\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + "adds r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adc r11, r11, r11\n\t" + "adds r4, r4, r9\n\t" + "adcs r5, r5, r10\n\t" + "adc r3, r3, r11\n\t" + "str r4, [%[tmp], #16]\n\t" + "mov r4, #0\n\t" + /* A[0] * A[5] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r9, r10, r6, r8\n\t" + "mov r11, #0\n\t" + /* A[1] * A[4] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + /* A[2] * A[3] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + "adds r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adc r11, r11, r11\n\t" + "adds r5, r5, r9\n\t" + "adcs r3, r3, r10\n\t" + "adc r4, r4, r11\n\t" + "str r5, [%[tmp], #20]\n\t" + "mov r5, #0\n\t" + /* A[0] * A[6] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r9, r10, r6, r8\n\t" + "mov r11, #0\n\t" + /* A[1] * A[5] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + /* A[2] * A[4] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + /* A[3] * A[3] */ + "ldr r6, [%[a], #12]\n\t" + "umull r6, r8, r6, r6\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + "adds r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adc r11, r11, r11\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, r10\n\t" + "adc r5, r5, r11\n\t" + "str r3, [%[tmp], #24]\n\t" + "mov r3, #0\n\t" + /* A[0] * A[7] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r9, r10, r6, r8\n\t" + "mov r11, #0\n\t" + /* A[1] * A[6] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + /* A[2] * A[5] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + /* A[3] * A[4] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + "adds r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adc r11, r11, r11\n\t" + "adds r4, r4, r9\n\t" + "adcs r5, r5, r10\n\t" + "adc r3, r3, r11\n\t" + "str r4, [%[tmp], #28]\n\t" + "mov r4, #0\n\t" + /* A[1] * A[7] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r9, r10, r6, r8\n\t" + "mov r11, #0\n\t" + /* A[2] * A[6] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + /* A[3] * A[5] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + /* A[4] * A[4] */ + "ldr r6, [%[a], #16]\n\t" + "umull r6, r8, r6, r6\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + "adds r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adc r11, r11, r11\n\t" + "adds r5, r5, r9\n\t" + "adcs r3, r3, r10\n\t" + "adc r4, r4, r11\n\t" + "str r5, [%[r], #32]\n\t" + "mov r5, #0\n\t" + /* A[2] * A[7] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r9, r10, r6, r8\n\t" + "mov r11, #0\n\t" + /* A[3] * A[6] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + /* A[4] * A[5] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + "adds r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adc r11, r11, r11\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, r10\n\t" + "adc r5, r5, r11\n\t" + "str r3, [%[r], #36]\n\t" + "mov r3, #0\n\t" + /* A[3] * A[7] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r9, r10, r6, r8\n\t" + "mov r11, #0\n\t" + /* A[4] * A[6] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + /* A[5] * A[5] */ + "ldr r6, [%[a], #20]\n\t" + "umull r6, r8, r6, r6\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + "adds r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adc r11, r11, r11\n\t" + "adds r4, r4, r9\n\t" + "adcs r5, r5, r10\n\t" + "adc r3, r3, r11\n\t" + "str r4, [%[r], #40]\n\t" + "mov r4, #0\n\t" + /* A[4] * A[7] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[5] * A[6] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + "str r5, [%[r], #44]\n\t" + "mov r5, #0\n\t" + /* A[5] * A[7] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[6] * A[6] */ + "ldr r6, [%[a], #24]\n\t" + "umull r6, r8, r6, r6\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + "str r3, [%[r], #48]\n\t" + "mov r3, #0\n\t" + /* A[6] * A[7] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + "str r4, [%[r], #52]\n\t" + "mov r4, #0\n\t" + /* A[7] * A[7] */ + "ldr r6, [%[a], #28]\n\t" + "umull r6, r8, r6, r6\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r8\n\t" + "str r5, [%[r], #56]\n\t" + "str r3, [%[r], #60]\n\t" + /* Transfer tmp to r */ + "ldr r3, [%[tmp], #0]\n\t" + "ldr r4, [%[tmp], #4]\n\t" + "ldr r5, [%[tmp], #8]\n\t" + "ldr r6, [%[tmp], #12]\n\t" + "str r3, [%[r], #0]\n\t" + "str r4, [%[r], #4]\n\t" + "str r5, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" + "ldr r3, [%[tmp], #16]\n\t" + "ldr r4, [%[tmp], #20]\n\t" + "ldr r5, [%[tmp], #24]\n\t" + "ldr r6, [%[tmp], #28]\n\t" + "str r3, [%[r], #16]\n\t" + "str r4, [%[r], #20]\n\t" + "str r5, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" + : + : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp) + : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11" + ); +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c], %[c]\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r8" + ); + + return c; +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_2048_sub_in_place_16(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "subs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "sbc %[c], %[c], %[c]\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6" + ); + + return c; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c], %[c]\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r8" + ); + + return c; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_2048_mask_8(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<8; i++) { + r[i] = a[i] & m; + } +#else + r[0] = a[0] & m; + r[1] = a[1] & m; + r[2] = a[2] & m; + r[3] = a[3] & m; + r[4] = a[4] & m; + r[5] = a[5] & m; + r[6] = a[6] & m; + r[7] = a[7] & m; +#endif +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_2048_mul_16(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[16]; + sp_digit a1[8]; + sp_digit b1[8]; + sp_digit z2[16]; + sp_digit u, ca, cb; + + ca = sp_2048_add_8(a1, a, &a[8]); + cb = sp_2048_add_8(b1, b, &b[8]); + u = ca & cb; + sp_2048_mul_8(z1, a1, b1); + sp_2048_mul_8(z2, &a[8], &b[8]); + sp_2048_mul_8(z0, a, b); + sp_2048_mask_8(r + 16, a1, 0 - cb); + sp_2048_mask_8(b1, b1, 0 - ca); + u += sp_2048_add_8(r + 16, r + 16, b1); + u += sp_2048_sub_in_place_16(z1, z2); + u += sp_2048_sub_in_place_16(z1, z0); + u += sp_2048_add_16(r + 8, r + 8, z1); + r[24] = u; + XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1)); + (void)sp_2048_add_16(r + 16, r + 16, z2); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z2[16]; + sp_digit z1[16]; + sp_digit a1[8]; + sp_digit u; + + u = sp_2048_add_8(a1, a, &a[8]); + sp_2048_sqr_8(z1, a1); + sp_2048_sqr_8(z2, &a[8]); + sp_2048_sqr_8(z0, a); + sp_2048_mask_8(r + 16, a1, 0 - u); + u += sp_2048_add_8(r + 16, r + 16, r + 16); + u += sp_2048_sub_in_place_16(z1, z2); + u += sp_2048_sub_in_place_16(z1, z0); + u += sp_2048_add_16(r + 8, r + 8, z1); + r[24] = u; + XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1)); + (void)sp_2048_add_16(r + 16, r + 16, z2); +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "subs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "sbc %[c], %[c], %[c]\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6" + ); + + return c; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c], %[c]\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r8" + ); + + return c; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_2048_mask_16(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<16; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 16; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[32]; + sp_digit a1[16]; + sp_digit b1[16]; + sp_digit z2[32]; + sp_digit u, ca, cb; + + ca = sp_2048_add_16(a1, a, &a[16]); + cb = sp_2048_add_16(b1, b, &b[16]); + u = ca & cb; + sp_2048_mul_16(z1, a1, b1); + sp_2048_mul_16(z2, &a[16], &b[16]); + sp_2048_mul_16(z0, a, b); + sp_2048_mask_16(r + 32, a1, 0 - cb); + sp_2048_mask_16(b1, b1, 0 - ca); + u += sp_2048_add_16(r + 32, r + 32, b1); + u += sp_2048_sub_in_place_32(z1, z2); + u += sp_2048_sub_in_place_32(z1, z0); + u += sp_2048_add_32(r + 16, r + 16, z1); + r[48] = u; + XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1)); + (void)sp_2048_add_32(r + 32, r + 32, z2); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z2[32]; + sp_digit z1[32]; + sp_digit a1[16]; + sp_digit u; + + u = sp_2048_add_16(a1, a, &a[16]); + sp_2048_sqr_16(z1, a1); + sp_2048_sqr_16(z2, &a[16]); + sp_2048_sqr_16(z0, a); + sp_2048_mask_16(r + 32, a1, 0 - u); + u += sp_2048_add_16(r + 32, r + 32, r + 32); + u += sp_2048_sub_in_place_32(z1, z2); + u += sp_2048_sub_in_place_32(z1, z0); + u += sp_2048_add_32(r + 16, r + 16, z1); + r[48] = u; + XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1)); + (void)sp_2048_add_32(r + 32, r + 32, z2); +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "subs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "sbc %[c], %[c], %[c]\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6" + ); + + return c; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c], %[c]\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r8" + ); + + return c; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<32; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 32; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[64]; + sp_digit a1[32]; + sp_digit b1[32]; + sp_digit z2[64]; + sp_digit u, ca, cb; + + ca = sp_2048_add_32(a1, a, &a[32]); + cb = sp_2048_add_32(b1, b, &b[32]); + u = ca & cb; + sp_2048_mul_32(z1, a1, b1); + sp_2048_mul_32(z2, &a[32], &b[32]); + sp_2048_mul_32(z0, a, b); + sp_2048_mask_32(r + 64, a1, 0 - cb); + sp_2048_mask_32(b1, b1, 0 - ca); + u += sp_2048_add_32(r + 64, r + 64, b1); + u += sp_2048_sub_in_place_64(z1, z2); + u += sp_2048_sub_in_place_64(z1, z0); + u += sp_2048_add_64(r + 32, r + 32, z1); + r[96] = u; + XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1)); + (void)sp_2048_add_64(r + 64, r + 64, z2); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z2[64]; + sp_digit z1[64]; + sp_digit a1[32]; + sp_digit u; + + u = sp_2048_add_32(a1, a, &a[32]); + sp_2048_sqr_32(z1, a1); + sp_2048_sqr_32(z2, &a[32]); + sp_2048_sqr_32(z0, a); + sp_2048_mask_32(r + 64, a1, 0 - u); + u += sp_2048_add_32(r + 64, r + 64, r + 64); + u += sp_2048_sub_in_place_64(z1, z2); + u += sp_2048_sub_in_place_64(z1, z0); + u += sp_2048_add_64(r + 32, r + 32, z1); + r[96] = u; + XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1)); + (void)sp_2048_add_64(r + 64, r + 64, z2); +} + +#endif /* !WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r6, %[a]\n\t" + "mov r8, #0\n\t" + "add r6, r6, #256\n\t" + "sub r8, r8, #1\n\t" + "\n1:\n\t" + "adds %[c], %[c], r8\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r]]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #4\n\t" + "add %[b], %[b], #4\n\t" + "add %[r], %[r], #4\n\t" + "cmp %[a], r6\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r8" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into a. (a -= b) + * + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + __asm__ __volatile__ ( + "mov r8, %[a]\n\t" + "add r8, r8, #256\n\t" + "\n1:\n\t" + "mov r5, #0\n\t" + "subs r5, r5, %[c]\n\t" + "ldr r3, [%[a]]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b]]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a]]\n\t" + "str r4, [%[a], #4]\n\t" + "sbc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #8\n\t" + "add %[b], %[b], #8\n\t" + "cmp %[a], r8\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r8" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit tmp[64 * 2]; + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r9, r3\n\t" + "mov r12, %[r]\n\t" + "mov r10, %[a]\n\t" + "mov r11, %[b]\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, r10\n\t" + "mov r14, r6\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r5, #0\n\t" + "mov r6, #252\n\t" + "mov %[a], r9\n\t" + "subs %[a], %[a], r6\n\t" + "sbc r6, r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], %[a], r6\n\t" + "mov %[b], r9\n\t" + "sub %[b], %[b], %[a]\n\t" + "add %[a], %[a], r10\n\t" + "add %[b], %[b], r11\n\t" + "\n2:\n\t" + /* Multiply Start */ + "ldr r6, [%[a]]\n\t" + "ldr r8, [%[b]]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, %[r]\n\t" + /* Multiply Done */ + "add %[a], %[a], #4\n\t" + "sub %[b], %[b], #4\n\t" + "cmp %[a], r14\n\t" + "beq 3f\n\t" + "mov r6, r9\n\t" + "add r6, r6, r10\n\t" + "cmp %[a], r6\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r12\n\t" + "mov r8, r9\n\t" + "str r3, [%[r], r8]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "add r8, r8, #4\n\t" + "mov r9, r8\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, #248\n\t" + "cmp r8, r6\n\t" + "ble 1b\n\t" + "str r3, [%[r], r8]\n\t" + "mov %[a], r10\n\t" + "mov %[b], r11\n\t" + : + : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r5, #0\n\t" + "mov r9, r3\n\t" + "mov r12, %[r]\n\t" + "mov r6, #2\n\t" + "lsl r6, r6, #8\n\t" + "neg r6, r6\n\t" + "add sp, sp, r6\n\t" + "mov r11, sp\n\t" + "mov r10, %[a]\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r6, #252\n\t" + "mov %[a], r9\n\t" + "subs %[a], %[a], r6\n\t" + "sbc r6, r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], %[a], r6\n\t" + "mov r2, r9\n\t" + "sub r2, r2, %[a]\n\t" + "add %[a], %[a], r10\n\t" + "add r2, r2, r10\n\t" + "\n2:\n\t" + "cmp r2, %[a]\n\t" + "beq 4f\n\t" + /* Multiply * 2: Start */ + "ldr r6, [%[a]]\n\t" + "ldr r8, [r2]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, %[r]\n\t" + /* Multiply * 2: Done */ + "bal 5f\n\t" + "\n4:\n\t" + /* Square: Start */ + "ldr r6, [%[a]]\n\t" + "umull r6, r8, r6, r6\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, %[r]\n\t" + /* Square: Done */ + "\n5:\n\t" + "add %[a], %[a], #4\n\t" + "sub r2, r2, #4\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, r10\n\t" + "cmp %[a], r6\n\t" + "beq 3f\n\t" + "cmp %[a], r2\n\t" + "bgt 3f\n\t" + "mov r8, r9\n\t" + "add r8, r8, r10\n\t" + "cmp %[a], r8\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r11\n\t" + "mov r8, r9\n\t" + "str r3, [%[r], r8]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "add r8, r8, #4\n\t" + "mov r9, r8\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, #248\n\t" + "cmp r8, r6\n\t" + "ble 1b\n\t" + "mov %[a], r10\n\t" + "str r3, [%[r], r8]\n\t" + "mov %[r], r12\n\t" + "mov %[a], r11\n\t" + "mov r3, #1\n\t" + "lsl r3, r3, #8\n\t" + "add r3, r3, #252\n\t" + "\n4:\n\t" + "ldr r6, [%[a], r3]\n\t" + "str r6, [%[r], r3]\n\t" + "subs r3, r3, #4\n\t" + "bge 4b\n\t" + "mov r6, #2\n\t" + "lsl r6, r6, #8\n\t" + "add sp, sp, r6\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +#ifdef WOLFSSL_SP_SMALL +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m) +{ + int i; + + for (i=0; i<32; i++) { + r[i] = a[i] & m; + } +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r6, %[a]\n\t" + "mov r8, #0\n\t" + "add r6, r6, #128\n\t" + "sub r8, r8, #1\n\t" + "\n1:\n\t" + "adds %[c], %[c], r8\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r]]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #4\n\t" + "add %[b], %[b], #4\n\t" + "add %[r], %[r], #4\n\t" + "cmp %[a], r6\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r8" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into a. (a -= b) + * + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + __asm__ __volatile__ ( + "mov r8, %[a]\n\t" + "add r8, r8, #128\n\t" + "\n1:\n\t" + "mov r5, #0\n\t" + "subs r5, r5, %[c]\n\t" + "ldr r3, [%[a]]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b]]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a]]\n\t" + "str r4, [%[a], #4]\n\t" + "sbc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #8\n\t" + "add %[b], %[b], #8\n\t" + "cmp %[a], r8\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r8" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit tmp[32 * 2]; + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r9, r3\n\t" + "mov r12, %[r]\n\t" + "mov r10, %[a]\n\t" + "mov r11, %[b]\n\t" + "mov r6, #128\n\t" + "add r6, r6, r10\n\t" + "mov r14, r6\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r5, #0\n\t" + "mov r6, #124\n\t" + "mov %[a], r9\n\t" + "subs %[a], %[a], r6\n\t" + "sbc r6, r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], %[a], r6\n\t" + "mov %[b], r9\n\t" + "sub %[b], %[b], %[a]\n\t" + "add %[a], %[a], r10\n\t" + "add %[b], %[b], r11\n\t" + "\n2:\n\t" + /* Multiply Start */ + "ldr r6, [%[a]]\n\t" + "ldr r8, [%[b]]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, %[r]\n\t" + /* Multiply Done */ + "add %[a], %[a], #4\n\t" + "sub %[b], %[b], #4\n\t" + "cmp %[a], r14\n\t" + "beq 3f\n\t" + "mov r6, r9\n\t" + "add r6, r6, r10\n\t" + "cmp %[a], r6\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r12\n\t" + "mov r8, r9\n\t" + "str r3, [%[r], r8]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "add r8, r8, #4\n\t" + "mov r9, r8\n\t" + "mov r6, #248\n\t" + "cmp r8, r6\n\t" + "ble 1b\n\t" + "str r3, [%[r], r8]\n\t" + "mov %[a], r10\n\t" + "mov %[b], r11\n\t" + : + : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r5, #0\n\t" + "mov r9, r3\n\t" + "mov r12, %[r]\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "neg r6, r6\n\t" + "add sp, sp, r6\n\t" + "mov r11, sp\n\t" + "mov r10, %[a]\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r6, #124\n\t" + "mov %[a], r9\n\t" + "subs %[a], %[a], r6\n\t" + "sbc r6, r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], %[a], r6\n\t" + "mov r2, r9\n\t" + "sub r2, r2, %[a]\n\t" + "add %[a], %[a], r10\n\t" + "add r2, r2, r10\n\t" + "\n2:\n\t" + "cmp r2, %[a]\n\t" + "beq 4f\n\t" + /* Multiply * 2: Start */ + "ldr r6, [%[a]]\n\t" + "ldr r8, [r2]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, %[r]\n\t" + /* Multiply * 2: Done */ + "bal 5f\n\t" + "\n4:\n\t" + /* Square: Start */ + "ldr r6, [%[a]]\n\t" + "umull r6, r8, r6, r6\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, %[r]\n\t" + /* Square: Done */ + "\n5:\n\t" + "add %[a], %[a], #4\n\t" + "sub r2, r2, #4\n\t" + "mov r6, #128\n\t" + "add r6, r6, r10\n\t" + "cmp %[a], r6\n\t" + "beq 3f\n\t" + "cmp %[a], r2\n\t" + "bgt 3f\n\t" + "mov r8, r9\n\t" + "add r8, r8, r10\n\t" + "cmp %[a], r8\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r11\n\t" + "mov r8, r9\n\t" + "str r3, [%[r], r8]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "add r8, r8, #4\n\t" + "mov r9, r8\n\t" + "mov r6, #248\n\t" + "cmp r8, r6\n\t" + "ble 1b\n\t" + "mov %[a], r10\n\t" + "str r3, [%[r], r8]\n\t" + "mov %[r], r12\n\t" + "mov %[a], r11\n\t" + "mov r3, #252\n\t" + "\n4:\n\t" + "ldr r6, [%[a], r3]\n\t" + "str r6, [%[r], r3]\n\t" + "subs r3, r3, #4\n\t" + "bge 4b\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add sp, sp, r6\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +/* Caclulate the bottom digit of -1/a mod 2^n. + * + * a A single precision number. + * rho Bottom word of inverse. + */ +static void sp_2048_mont_setup(const sp_digit* a, sp_digit* rho) +{ + sp_digit x, b; + + b = a[0]; + x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - b * x; /* here x*a==1 mod 2**8 */ + x *= 2 - b * x; /* here x*a==1 mod 2**16 */ + x *= 2 - b * x; /* here x*a==1 mod 2**32 */ + + /* rho = -1/m mod b */ + *rho = -x; +} + +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +SP_NOINLINE static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, + sp_digit b) +{ + __asm__ __volatile__ ( + "add r9, %[a], #256\n\t" + /* A[0] * B */ + "ldr r6, [%[a]], #4\n\t" + "umull r5, r3, r6, %[b]\n\t" + "mov r4, #0\n\t" + "str r5, [%[r]], #4\n\t" + /* A[0] * B - Done */ + "\n1:\n\t" + "mov r5, #0\n\t" + /* A[] * B */ + "ldr r6, [%[a]], #4\n\t" + "umull r6, r8, r6, %[b]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[] * B - Done */ + "str r3, [%[r]], #4\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "cmp %[a], r9\n\t" + "blt 1b\n\t" + "str r3, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a) + : [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r8", "r9" + ); +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 2048 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_2048_mont_norm_32(sp_digit* r, const sp_digit* m) +{ + XMEMSET(r, 0, sizeof(sp_digit) * 32); + + /* r = 2^n mod m */ + sp_2048_sub_in_place_32(r, m); +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +SP_NOINLINE static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, + const sp_digit* b, sp_digit m) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r5, #128\n\t" + "mov r9, r5\n\t" + "mov r8, #0\n\t" + "\n1:\n\t" + "ldr r6, [%[b], r8]\n\t" + "and r6, r6, %[m]\n\t" + "mov r5, #0\n\t" + "subs r5, r5, %[c]\n\t" + "ldr r5, [%[a], r8]\n\t" + "sbcs r5, r5, r6\n\t" + "sbcs %[c], %[c], %[c]\n\t" + "str r5, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, r9\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r5", "r6", "r8", "r9" + ); + + return c; +} + +/* Reduce the number back to 2048 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_digit ca = 0; + + __asm__ __volatile__ ( + "mov r9, %[mp]\n\t" + "mov r12, %[m]\n\t" + "mov r10, %[a]\n\t" + "mov r4, #0\n\t" + "add r11, r10, #128\n\t" + "\n1:\n\t" + /* mu = a[i] * mp */ + "mov %[mp], r9\n\t" + "ldr %[a], [r10]\n\t" + "mul %[mp], %[mp], %[a]\n\t" + "mov %[m], r12\n\t" + "add r14, r10, #120\n\t" + "\n2:\n\t" + /* a[i+j] += m[j] * mu */ + "ldr %[a], [r10]\n\t" + "mov r5, #0\n\t" + /* Multiply m[j] and mu - Start */ + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" + "adds %[a], %[a], r6\n\t" + "adc r5, r5, r8\n\t" + /* Multiply m[j] and mu - Done */ + "adds r4, r4, %[a]\n\t" + "adc r5, r5, #0\n\t" + "str r4, [r10], #4\n\t" + /* a[i+j+1] += m[j+1] * mu */ + "ldr %[a], [r10]\n\t" + "mov r4, #0\n\t" + /* Multiply m[j] and mu - Start */ + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" + "adds %[a], %[a], r6\n\t" + "adc r4, r4, r8\n\t" + /* Multiply m[j] and mu - Done */ + "adds r5, r5, %[a]\n\t" + "adc r4, r4, #0\n\t" + "str r5, [r10], #4\n\t" + "cmp r10, r14\n\t" + "blt 2b\n\t" + /* a[i+30] += m[30] * mu */ + "ldr %[a], [r10]\n\t" + "mov r5, #0\n\t" + /* Multiply m[j] and mu - Start */ + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" + "adds %[a], %[a], r6\n\t" + "adc r5, r5, r8\n\t" + /* Multiply m[j] and mu - Done */ + "adds r4, r4, %[a]\n\t" + "adc r5, r5, #0\n\t" + "str r4, [r10], #4\n\t" + /* a[i+31] += m[31] * mu */ + "mov r4, %[ca]\n\t" + "mov %[ca], #0\n\t" + /* Multiply m[31] and mu - Start */ + "ldr r8, [%[m]]\n\t" + "umull r6, r8, %[mp], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc %[ca], %[ca], #0\n\t" + /* Multiply m[31] and mu - Done */ + "ldr r6, [r10]\n\t" + "ldr r8, [r10, #4]\n\t" + "adds r6, r6, r5\n\t" + "adcs r8, r8, r4\n\t" + "adc %[ca], %[ca], #0\n\t" + "str r6, [r10]\n\t" + "str r8, [r10, #4]\n\t" + /* Next word in a */ + "sub r10, r10, #120\n\t" + "cmp r10, r11\n\t" + "blt 1b\n\t" + "mov %[a], r10\n\t" + "mov %[m], r12\n\t" + : [ca] "+r" (ca), [a] "+r" (a) + : [m] "r" (m), [mp] "r" (mp) + : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" + ); + + sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - ca); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_2048_mont_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_2048_mul_32(r, a, b); + sp_2048_mont_reduce_32(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_2048_mont_sqr_32(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_2048_sqr_32(r, a); + sp_2048_mont_reduce_32(r, m, mp); +} + +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +SP_NOINLINE static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, + sp_digit b) +{ + __asm__ __volatile__ ( + "add r9, %[a], #128\n\t" + /* A[0] * B */ + "ldr r6, [%[a]], #4\n\t" + "umull r5, r3, r6, %[b]\n\t" + "mov r4, #0\n\t" + "str r5, [%[r]], #4\n\t" + /* A[0] * B - Done */ + "\n1:\n\t" + "mov r5, #0\n\t" + /* A[] * B */ + "ldr r6, [%[a]], #4\n\t" + "umull r6, r8, r6, %[b]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[] * B - Done */ + "str r3, [%[r]], #4\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "cmp %[a], r9\n\t" + "blt 1b\n\t" + "str r3, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a) + : [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r8", "r9" + ); +} + +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +SP_NOINLINE static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, + sp_digit div) +{ + sp_digit r = 0; + + __asm__ __volatile__ ( + "lsr r6, %[div], #16\n\t" + "add r6, r6, #1\n\t" + "udiv r4, %[d1], r6\n\t" + "lsl r8, r4, #16\n\t" + "umull r4, r5, %[div], r8\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "udiv r5, %[d1], r6\n\t" + "lsl r4, r5, #16\n\t" + "add r8, r8, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "lsl r4, %[d1], #16\n\t" + "orr r4, r4, %[d0], lsr #16\n\t" + "udiv r4, r4, r6\n\t" + "add r8, r8, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "lsl r4, %[d1], #16\n\t" + "orr r4, r4, %[d0], lsr #16\n\t" + "udiv r4, r4, r6\n\t" + "add r8, r8, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "udiv r4, %[d0], %[div]\n\t" + "add r8, r8, r4\n\t" + "mov %[r], r8\n\t" + : [r] "+r" (r) + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) + : "r4", "r5", "r6", "r8" + ); + return r; +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +SP_NOINLINE static int32_t sp_2048_cmp_32(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; + + + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mvn r3, r3\n\t" + "mov r6, #124\n\t" + "\n1:\n\t" + "ldr r8, [%[a], r6]\n\t" + "ldr r5, [%[b], r6]\n\t" + "and r8, r8, r3\n\t" + "and r5, r5, r3\n\t" + "mov r4, r8\n\t" + "subs r8, r8, r5\n\t" + "sbc r8, r8, r8\n\t" + "add %[r], %[r], r8\n\t" + "mvn r8, r8\n\t" + "and r3, r3, r8\n\t" + "subs r5, r5, r4\n\t" + "sbc r8, r8, r8\n\t" + "sub %[r], %[r], r8\n\t" + "mvn r8, r8\n\t" + "and r3, r3, r8\n\t" + "sub r6, r6, #4\n\t" + "cmp r6, #0\n\t" + "bge 1b\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "r3", "r4", "r5", "r6", "r8" + ); + + return r; +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[64], t2[33]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[31]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 32); + for (i=31; i>=0; i--) { + r1 = div_2048_word_32(t1[32 + i], t1[32 + i - 1], div); + + sp_2048_mul_d_32(t2, d, r1); + t1[32 + i] += sp_2048_sub_in_place_32(&t1[i], t2); + t1[32 + i] -= t2[32]; + sp_2048_mask_32(t2, d, t1[32 + i]); + t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2); + sp_2048_mask_32(t2, d, t1[32 + i]); + t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2); + } + + r1 = sp_2048_cmp_32(t1, d) >= 0; + sp_2048_cond_sub_32(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_2048_mod_32(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_2048_div_32(a, m, NULL, r); +} + +#ifdef WOLFSSL_SP_SMALL +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[16][64]; +#else + sp_digit* t[16]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 64, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<16; i++) { + t[i] = td + i * 64; + } +#endif + norm = t[0]; + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_32(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 32U); + if (reduceA != 0) { + err = sp_2048_mod_32(t[1] + 32, a, m); + if (err == MP_OKAY) { + err = sp_2048_mod_32(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32); + err = sp_2048_mod_32(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp); + sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp); + sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp); + sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp); + sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp); + sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp); + sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp); + sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp); + sp_2048_mont_sqr_32(t[10], t[ 5], m, mp); + sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp); + sp_2048_mont_sqr_32(t[12], t[ 6], m, mp); + sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp); + sp_2048_mont_sqr_32(t[14], t[ 7], m, mp); + sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 4; + if (c == 32) { + c = 28; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 32); + for (; i>=0 || c>=4; ) { + if (c == 0) { + n = e[i--]; + y = n >> 28; + n <<= 4; + c = 28; + } + else if (c < 4) { + y = n >> 28; + n = e[i--]; + c = 4 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + } + + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + + sp_2048_mont_mul_32(r, r, t[y], m, mp); + } + + XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U); + sp_2048_mont_reduce_32(r, m, mp); + + mask = 0 - (sp_2048_cmp_32(r, m) >= 0); + sp_2048_cond_sub_32(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#else +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][64]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 64, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) { + t[i] = td + i * 64; + } +#endif + norm = t[0]; + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_32(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 32U); + if (reduceA != 0) { + err = sp_2048_mod_32(t[1] + 32, a, m); + if (err == MP_OKAY) { + err = sp_2048_mod_32(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32); + err = sp_2048_mod_32(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp); + sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp); + sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp); + sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp); + sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp); + sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp); + sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp); + sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp); + sp_2048_mont_sqr_32(t[10], t[ 5], m, mp); + sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp); + sp_2048_mont_sqr_32(t[12], t[ 6], m, mp); + sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp); + sp_2048_mont_sqr_32(t[14], t[ 7], m, mp); + sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp); + sp_2048_mont_sqr_32(t[16], t[ 8], m, mp); + sp_2048_mont_mul_32(t[17], t[ 9], t[ 8], m, mp); + sp_2048_mont_sqr_32(t[18], t[ 9], m, mp); + sp_2048_mont_mul_32(t[19], t[10], t[ 9], m, mp); + sp_2048_mont_sqr_32(t[20], t[10], m, mp); + sp_2048_mont_mul_32(t[21], t[11], t[10], m, mp); + sp_2048_mont_sqr_32(t[22], t[11], m, mp); + sp_2048_mont_mul_32(t[23], t[12], t[11], m, mp); + sp_2048_mont_sqr_32(t[24], t[12], m, mp); + sp_2048_mont_mul_32(t[25], t[13], t[12], m, mp); + sp_2048_mont_sqr_32(t[26], t[13], m, mp); + sp_2048_mont_mul_32(t[27], t[14], t[13], m, mp); + sp_2048_mont_sqr_32(t[28], t[14], m, mp); + sp_2048_mont_mul_32(t[29], t[15], t[14], m, mp); + sp_2048_mont_sqr_32(t[30], t[15], m, mp); + sp_2048_mont_mul_32(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 5; + if (c == 32) { + c = 27; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 32); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 27; + n <<= 5; + c = 27; + } + else if (c < 5) { + y = n >> 27; + n = e[i--]; + c = 5 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + + sp_2048_mont_mul_32(r, r, t[y], m, mp); + } + + XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U); + sp_2048_mont_reduce_32(r, m, mp); + + mask = 0 - (sp_2048_cmp_32(r, m) >= 0); + sp_2048_cond_sub_32(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* WOLFSSL_SP_SMALL */ + +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 2048 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_2048_mont_norm_64(sp_digit* r, const sp_digit* m) +{ + XMEMSET(r, 0, sizeof(sp_digit) * 64); + + /* r = 2^n mod m */ + sp_2048_sub_in_place_64(r, m); +} + +#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */ +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +SP_NOINLINE static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a, + const sp_digit* b, sp_digit m) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r5, #1\n\t" + "lsl r5, r5, #8\n\t" + "mov r9, r5\n\t" + "mov r8, #0\n\t" + "\n1:\n\t" + "ldr r6, [%[b], r8]\n\t" + "and r6, r6, %[m]\n\t" + "mov r5, #0\n\t" + "subs r5, r5, %[c]\n\t" + "ldr r5, [%[a], r8]\n\t" + "sbcs r5, r5, r6\n\t" + "sbcs %[c], %[c], %[c]\n\t" + "str r5, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, r9\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r5", "r6", "r8", "r9" + ); + + return c; +} + +/* Reduce the number back to 2048 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_digit ca = 0; + + __asm__ __volatile__ ( + "mov r9, %[mp]\n\t" + "mov r12, %[m]\n\t" + "mov r10, %[a]\n\t" + "mov r4, #0\n\t" + "add r11, r10, #256\n\t" + "\n1:\n\t" + /* mu = a[i] * mp */ + "mov %[mp], r9\n\t" + "ldr %[a], [r10]\n\t" + "mul %[mp], %[mp], %[a]\n\t" + "mov %[m], r12\n\t" + "add r14, r10, #248\n\t" + "\n2:\n\t" + /* a[i+j] += m[j] * mu */ + "ldr %[a], [r10]\n\t" + "mov r5, #0\n\t" + /* Multiply m[j] and mu - Start */ + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" + "adds %[a], %[a], r6\n\t" + "adc r5, r5, r8\n\t" + /* Multiply m[j] and mu - Done */ + "adds r4, r4, %[a]\n\t" + "adc r5, r5, #0\n\t" + "str r4, [r10], #4\n\t" + /* a[i+j+1] += m[j+1] * mu */ + "ldr %[a], [r10]\n\t" + "mov r4, #0\n\t" + /* Multiply m[j] and mu - Start */ + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" + "adds %[a], %[a], r6\n\t" + "adc r4, r4, r8\n\t" + /* Multiply m[j] and mu - Done */ + "adds r5, r5, %[a]\n\t" + "adc r4, r4, #0\n\t" + "str r5, [r10], #4\n\t" + "cmp r10, r14\n\t" + "blt 2b\n\t" + /* a[i+62] += m[62] * mu */ + "ldr %[a], [r10]\n\t" + "mov r5, #0\n\t" + /* Multiply m[j] and mu - Start */ + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" + "adds %[a], %[a], r6\n\t" + "adc r5, r5, r8\n\t" + /* Multiply m[j] and mu - Done */ + "adds r4, r4, %[a]\n\t" + "adc r5, r5, #0\n\t" + "str r4, [r10], #4\n\t" + /* a[i+63] += m[63] * mu */ + "mov r4, %[ca]\n\t" + "mov %[ca], #0\n\t" + /* Multiply m[63] and mu - Start */ + "ldr r8, [%[m]]\n\t" + "umull r6, r8, %[mp], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc %[ca], %[ca], #0\n\t" + /* Multiply m[63] and mu - Done */ + "ldr r6, [r10]\n\t" + "ldr r8, [r10, #4]\n\t" + "adds r6, r6, r5\n\t" + "adcs r8, r8, r4\n\t" + "adc %[ca], %[ca], #0\n\t" + "str r6, [r10]\n\t" + "str r8, [r10, #4]\n\t" + /* Next word in a */ + "sub r10, r10, #248\n\t" + "cmp r10, r11\n\t" + "blt 1b\n\t" + "mov %[a], r10\n\t" + "mov %[m], r12\n\t" + : [ca] "+r" (ca), [a] "+r" (a) + : [m] "r" (m), [mp] "r" (mp) + : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" + ); + + sp_2048_cond_sub_64(a - 64, a, m, (sp_digit)0 - ca); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_2048_mont_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_2048_mul_64(r, a, b); + sp_2048_mont_reduce_64(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_2048_mont_sqr_64(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_2048_sqr_64(r, a); + sp_2048_mont_reduce_64(r, m, mp); +} + +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +SP_NOINLINE static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, + sp_digit div) +{ + sp_digit r = 0; + + __asm__ __volatile__ ( + "lsr r6, %[div], #16\n\t" + "add r6, r6, #1\n\t" + "udiv r4, %[d1], r6\n\t" + "lsl r8, r4, #16\n\t" + "umull r4, r5, %[div], r8\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "udiv r5, %[d1], r6\n\t" + "lsl r4, r5, #16\n\t" + "add r8, r8, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "lsl r4, %[d1], #16\n\t" + "orr r4, r4, %[d0], lsr #16\n\t" + "udiv r4, r4, r6\n\t" + "add r8, r8, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "lsl r4, %[d1], #16\n\t" + "orr r4, r4, %[d0], lsr #16\n\t" + "udiv r4, r4, r6\n\t" + "add r8, r8, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "udiv r4, %[d0], %[div]\n\t" + "add r8, r8, r4\n\t" + "mov %[r], r8\n\t" + : [r] "+r" (r) + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) + : "r4", "r5", "r6", "r8" + ); + return r; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_2048_mask_64(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<64; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 64; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +SP_NOINLINE static int32_t sp_2048_cmp_64(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; + + + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mvn r3, r3\n\t" + "mov r6, #252\n\t" + "\n1:\n\t" + "ldr r8, [%[a], r6]\n\t" + "ldr r5, [%[b], r6]\n\t" + "and r8, r8, r3\n\t" + "and r5, r5, r3\n\t" + "mov r4, r8\n\t" + "subs r8, r8, r5\n\t" + "sbc r8, r8, r8\n\t" + "add %[r], %[r], r8\n\t" + "mvn r8, r8\n\t" + "and r3, r3, r8\n\t" + "subs r5, r5, r4\n\t" + "sbc r8, r8, r8\n\t" + "sub %[r], %[r], r8\n\t" + "mvn r8, r8\n\t" + "and r3, r3, r8\n\t" + "sub r6, r6, #4\n\t" + "cmp r6, #0\n\t" + "bge 1b\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "r3", "r4", "r5", "r6", "r8" + ); + + return r; +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_2048_div_64(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[128], t2[65]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[63]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 64); + for (i=63; i>=0; i--) { + r1 = div_2048_word_64(t1[64 + i], t1[64 + i - 1], div); + + sp_2048_mul_d_64(t2, d, r1); + t1[64 + i] += sp_2048_sub_in_place_64(&t1[i], t2); + t1[64 + i] -= t2[64]; + sp_2048_mask_64(t2, d, t1[64 + i]); + t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], t2); + sp_2048_mask_64(t2, d, t1[64 + i]); + t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], t2); + } + + r1 = sp_2048_cmp_64(t1, d) >= 0; + sp_2048_cond_sub_64(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_2048_mod_64(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_2048_div_64(a, m, NULL, r); +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_2048_div_64_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[128], t2[65]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[63]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 64); + for (i=63; i>=0; i--) { + r1 = div_2048_word_64(t1[64 + i], t1[64 + i - 1], div); + + sp_2048_mul_d_64(t2, d, r1); + t1[64 + i] += sp_2048_sub_in_place_64(&t1[i], t2); + t1[64 + i] -= t2[64]; + if (t1[64 + i] != 0) { + t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], d); + if (t1[64 + i] != 0) + t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], d); + } + } + + r1 = sp_2048_cmp_64(t1, d) >= 0; + sp_2048_cond_sub_64(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_2048_mod_64_cond(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_2048_div_64_cond(a, m, NULL, r); +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \ + defined(WOLFSSL_HAVE_SP_DH) +#ifdef WOLFSSL_SP_SMALL +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[16][128]; +#else + sp_digit* t[16]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 128, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<16; i++) { + t[i] = td + i * 128; + } +#endif + norm = t[0]; + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_64(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 64U); + if (reduceA != 0) { + err = sp_2048_mod_64(t[1] + 64, a, m); + if (err == MP_OKAY) { + err = sp_2048_mod_64(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64); + err = sp_2048_mod_64(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_2048_mont_sqr_64(t[ 2], t[ 1], m, mp); + sp_2048_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp); + sp_2048_mont_sqr_64(t[ 4], t[ 2], m, mp); + sp_2048_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp); + sp_2048_mont_sqr_64(t[ 6], t[ 3], m, mp); + sp_2048_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp); + sp_2048_mont_sqr_64(t[ 8], t[ 4], m, mp); + sp_2048_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp); + sp_2048_mont_sqr_64(t[10], t[ 5], m, mp); + sp_2048_mont_mul_64(t[11], t[ 6], t[ 5], m, mp); + sp_2048_mont_sqr_64(t[12], t[ 6], m, mp); + sp_2048_mont_mul_64(t[13], t[ 7], t[ 6], m, mp); + sp_2048_mont_sqr_64(t[14], t[ 7], m, mp); + sp_2048_mont_mul_64(t[15], t[ 8], t[ 7], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 4; + if (c == 32) { + c = 28; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 64); + for (; i>=0 || c>=4; ) { + if (c == 0) { + n = e[i--]; + y = n >> 28; + n <<= 4; + c = 28; + } + else if (c < 4) { + y = n >> 28; + n = e[i--]; + c = 4 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + } + + sp_2048_mont_sqr_64(r, r, m, mp); + sp_2048_mont_sqr_64(r, r, m, mp); + sp_2048_mont_sqr_64(r, r, m, mp); + sp_2048_mont_sqr_64(r, r, m, mp); + + sp_2048_mont_mul_64(r, r, t[y], m, mp); + } + + XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U); + sp_2048_mont_reduce_64(r, m, mp); + + mask = 0 - (sp_2048_cmp_64(r, m) >= 0); + sp_2048_cond_sub_64(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#else +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][128]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 128, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) { + t[i] = td + i * 128; + } +#endif + norm = t[0]; + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_64(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 64U); + if (reduceA != 0) { + err = sp_2048_mod_64(t[1] + 64, a, m); + if (err == MP_OKAY) { + err = sp_2048_mod_64(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64); + err = sp_2048_mod_64(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_2048_mont_sqr_64(t[ 2], t[ 1], m, mp); + sp_2048_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp); + sp_2048_mont_sqr_64(t[ 4], t[ 2], m, mp); + sp_2048_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp); + sp_2048_mont_sqr_64(t[ 6], t[ 3], m, mp); + sp_2048_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp); + sp_2048_mont_sqr_64(t[ 8], t[ 4], m, mp); + sp_2048_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp); + sp_2048_mont_sqr_64(t[10], t[ 5], m, mp); + sp_2048_mont_mul_64(t[11], t[ 6], t[ 5], m, mp); + sp_2048_mont_sqr_64(t[12], t[ 6], m, mp); + sp_2048_mont_mul_64(t[13], t[ 7], t[ 6], m, mp); + sp_2048_mont_sqr_64(t[14], t[ 7], m, mp); + sp_2048_mont_mul_64(t[15], t[ 8], t[ 7], m, mp); + sp_2048_mont_sqr_64(t[16], t[ 8], m, mp); + sp_2048_mont_mul_64(t[17], t[ 9], t[ 8], m, mp); + sp_2048_mont_sqr_64(t[18], t[ 9], m, mp); + sp_2048_mont_mul_64(t[19], t[10], t[ 9], m, mp); + sp_2048_mont_sqr_64(t[20], t[10], m, mp); + sp_2048_mont_mul_64(t[21], t[11], t[10], m, mp); + sp_2048_mont_sqr_64(t[22], t[11], m, mp); + sp_2048_mont_mul_64(t[23], t[12], t[11], m, mp); + sp_2048_mont_sqr_64(t[24], t[12], m, mp); + sp_2048_mont_mul_64(t[25], t[13], t[12], m, mp); + sp_2048_mont_sqr_64(t[26], t[13], m, mp); + sp_2048_mont_mul_64(t[27], t[14], t[13], m, mp); + sp_2048_mont_sqr_64(t[28], t[14], m, mp); + sp_2048_mont_mul_64(t[29], t[15], t[14], m, mp); + sp_2048_mont_sqr_64(t[30], t[15], m, mp); + sp_2048_mont_mul_64(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 5; + if (c == 32) { + c = 27; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 64); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 27; + n <<= 5; + c = 27; + } + else if (c < 5) { + y = n >> 27; + n = e[i--]; + c = 5 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_2048_mont_sqr_64(r, r, m, mp); + sp_2048_mont_sqr_64(r, r, m, mp); + sp_2048_mont_sqr_64(r, r, m, mp); + sp_2048_mont_sqr_64(r, r, m, mp); + sp_2048_mont_sqr_64(r, r, m, mp); + + sp_2048_mont_mul_64(r, r, t[y], m, mp); + } + + XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U); + sp_2048_mont_reduce_64(r, m, mp); + + mask = 0 - (sp_2048_cmp_64(r, m) >= 0); + sp_2048_cond_sub_64(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* WOLFSSL_SP_SMALL */ +#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */ + +#ifdef WOLFSSL_HAVE_SP_RSA +/* RSA public key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * em Public exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 256 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPublic_2048(const byte* in, word32 inLen, mp_int* em, mp_int* mm, + byte* out, word32* outLen) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit a[128], m[64], r[128]; +#else + sp_digit* d = NULL; + sp_digit* a; + sp_digit* m; + sp_digit* r; +#endif + sp_digit *ah; + sp_digit e[1]; + int err = MP_OKAY; + + if (*outLen < 256) + err = MP_TO_E; + if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 256 || + mp_count_bits(mm) != 2048)) + err = MP_READ_E; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + a = d; + r = a + 64 * 2; + m = r + 64 * 2; + } +#endif + + if (err == MP_OKAY) { + ah = a + 64; + + sp_2048_from_bin(ah, 64, in, inLen); +#if DIGIT_BIT >= 32 + e[0] = em->dp[0]; +#else + e[0] = em->dp[0]; + if (em->used > 1) { + e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + } +#endif + if (e[0] == 0) { + err = MP_EXPTMOD_E; + } + } + if (err == MP_OKAY) { + sp_2048_from_mp(m, 64, mm); + + if (e[0] == 0x3) { + if (err == MP_OKAY) { + sp_2048_sqr_64(r, ah); + err = sp_2048_mod_64_cond(r, r, m); + } + if (err == MP_OKAY) { + sp_2048_mul_64(r, ah, r); + err = sp_2048_mod_64_cond(r, r, m); + } + } + else { + int i; + sp_digit mp; + + sp_2048_mont_setup(m, &mp); + + /* Convert to Montgomery form. */ + XMEMSET(a, 0, sizeof(sp_digit) * 64); + err = sp_2048_mod_64_cond(a, a, m); + + if (err == MP_OKAY) { + for (i = 31; i >= 0; i--) { + if (e[0] >> i) { + break; + } + } + + XMEMCPY(r, a, sizeof(sp_digit) * 64); + for (i--; i>=0; i--) { + sp_2048_mont_sqr_64(r, r, m, mp); + if (((e[0] >> i) & 1) == 1) { + sp_2048_mont_mul_64(r, r, a, m, mp); + } + } + XMEMSET(&r[64], 0, sizeof(sp_digit) * 64); + sp_2048_mont_reduce_64(r, m, mp); + + for (i = 63; i > 0; i--) { + if (r[i] != m[i]) { + break; + } + } + if (r[i] >= m[i]) { + sp_2048_sub_in_place_64(r, m); + } + } + } + } + + if (err == MP_OKAY) { + sp_2048_to_bin(r, out); + *outLen = 256; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } +#endif + + return err; +} + +#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) + sp_digit* a; + sp_digit* d = NULL; + sp_digit* m; + sp_digit* r; + int err = MP_OKAY; + + (void)pm; + (void)qm; + (void)dpm; + (void)dqm; + (void)qim; + + if (*outLen < 256U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(dm) > 2048) { + err = MP_READ_E; + } + if (inLen > 256) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 2048) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 4, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) { + err = MEMORY_E; + } + } + if (err == MP_OKAY) { + a = d + 64; + m = a + 128; + r = a; + + sp_2048_from_bin(a, 64, in, inLen); + sp_2048_from_mp(d, 64, dm); + sp_2048_from_mp(m, 64, mm); + err = sp_2048_mod_exp_64(r, a, d, 2048, m, 0); + } + if (err == MP_OKAY) { + sp_2048_to_bin(r, out); + *outLen = 256; + } + + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 64); + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +SP_NOINLINE static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r5, #128\n\t" + "mov r9, r5\n\t" + "mov r8, #0\n\t" + "\n1:\n\t" + "ldr r6, [%[b], r8]\n\t" + "and r6, r6, %[m]\n\t" + "adds r5, %[c], #-1\n\t" + "ldr r5, [%[a], r8]\n\t" + "adcs r5, r5, r6\n\t" + "mov %[c], #0\n\t" + "adcs %[c], %[c], %[c]\n\t" + "str r5, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, r9\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r5", "r6", "r8", "r9" + ); + + return c; +} + +/* RSA private key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * pm First prime. + * qm Second prime. + * dpm First prime's CRT exponent. + * dqm Second prime's CRT exponent. + * qim Inverse of second prime mod p. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 256 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, + mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm, + byte* out, word32* outLen) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit a[64 * 2]; + sp_digit p[32], q[32], dp[32]; + sp_digit tmpa[64], tmpb[64]; +#else + sp_digit* t = NULL; + sp_digit* a; + sp_digit* p; + sp_digit* q; + sp_digit* dp; + sp_digit* tmpa; + sp_digit* tmpb; +#endif + sp_digit* r; + sp_digit* qi; + sp_digit* dq; + sp_digit c; + int err = MP_OKAY; + + (void)dm; + (void)mm; + + if (*outLen < 256) + err = MP_TO_E; + if (err == MP_OKAY && (inLen > 256 || mp_count_bits(mm) != 2048)) + err = MP_READ_E; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 11, NULL, + DYNAMIC_TYPE_RSA); + if (t == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + a = t; + p = a + 64 * 2; + q = p + 32; + qi = dq = dp = q + 32; + tmpa = qi + 32; + tmpb = tmpa + 64; + + r = t + 64; + } +#else +#endif + + if (err == MP_OKAY) { +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + r = a; + qi = dq = dp; +#endif + sp_2048_from_bin(a, 64, in, inLen); + sp_2048_from_mp(p, 32, pm); + sp_2048_from_mp(q, 32, qm); + sp_2048_from_mp(dp, 32, dpm); + + err = sp_2048_mod_exp_32(tmpa, a, dp, 1024, p, 1); + } + if (err == MP_OKAY) { + sp_2048_from_mp(dq, 32, dqm); + err = sp_2048_mod_exp_32(tmpb, a, dq, 1024, q, 1); + } + + if (err == MP_OKAY) { + c = sp_2048_sub_in_place_32(tmpa, tmpb); + c += sp_2048_cond_add_32(tmpa, tmpa, p, c); + sp_2048_cond_add_32(tmpa, tmpa, p, c); + + sp_2048_from_mp(qi, 32, qim); + sp_2048_mul_32(tmpa, tmpa, qi); + err = sp_2048_mod_32(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { + sp_2048_mul_32(tmpa, q, tmpa); + XMEMSET(&tmpb[32], 0, sizeof(sp_digit) * 32); + sp_2048_add_64(r, tmpb, tmpa); + + sp_2048_to_bin(r, out); + *outLen = 256; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_digit) * 32 * 11); + XFREE(t, NULL, DYNAMIC_TYPE_RSA); + } +#else + XMEMSET(tmpa, 0, sizeof(tmpa)); + XMEMSET(tmpb, 0, sizeof(tmpb)); + XMEMSET(p, 0, sizeof(p)); + XMEMSET(q, 0, sizeof(q)); + XMEMSET(dp, 0, sizeof(dp)); +#endif + + return err; +} +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ +#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */ +#endif /* WOLFSSL_HAVE_SP_RSA */ +#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY)) +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_2048_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (2048 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 32 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 64); + r->used = 64; + mp_clamp(r); +#elif DIGIT_BIT < 32 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 64; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 32) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 32 - s; + } + r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 64; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 32 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 32 - s; + } + else { + s += 32; + } + } + r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ + int err = MP_OKAY; + sp_digit b[128], e[64], m[64]; + sp_digit* r = b; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 2048) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 2048) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 2048) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_2048_from_mp(b, 64, base); + sp_2048_from_mp(e, 64, exp); + sp_2048_from_mp(m, 64, mod); + + err = sp_2048_mod_exp_64(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + err = sp_2048_to_mp(r, res); + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} + +#ifdef WOLFSSL_HAVE_SP_DH + +#ifdef HAVE_FFDHE_2048 +static void sp_2048_lshift_64(sp_digit* r, sp_digit* a, byte n) +{ + __asm__ __volatile__ ( + "mov r6, #31\n\t" + "sub r6, r6, %[n]\n\t" + "add %[a], %[a], #192\n\t" + "add %[r], %[r], #192\n\t" + "ldr r3, [%[a], #60]\n\t" + "lsr r4, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r4, r4, r6\n\t" + "ldr r2, [%[a], #56]\n\t" + "str r4, [%[r], #64]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #52]\n\t" + "str r3, [%[r], #60]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #48]\n\t" + "str r2, [%[r], #56]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #44]\n\t" + "str r4, [%[r], #52]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #40]\n\t" + "str r3, [%[r], #48]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #36]\n\t" + "str r2, [%[r], #44]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #32]\n\t" + "str r4, [%[r], #40]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #28]\n\t" + "str r3, [%[r], #36]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #24]\n\t" + "str r2, [%[r], #32]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #20]\n\t" + "str r4, [%[r], #28]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #16]\n\t" + "str r3, [%[r], #24]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #12]\n\t" + "str r2, [%[r], #20]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #8]\n\t" + "str r4, [%[r], #16]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #4]\n\t" + "str r3, [%[r], #12]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #0]\n\t" + "str r2, [%[r], #8]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r2, [%[a], #60]\n\t" + "str r4, [%[r], #68]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #56]\n\t" + "str r3, [%[r], #64]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #52]\n\t" + "str r2, [%[r], #60]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #48]\n\t" + "str r4, [%[r], #56]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #44]\n\t" + "str r3, [%[r], #52]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #40]\n\t" + "str r2, [%[r], #48]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #36]\n\t" + "str r4, [%[r], #44]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #32]\n\t" + "str r3, [%[r], #40]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #28]\n\t" + "str r2, [%[r], #36]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #24]\n\t" + "str r4, [%[r], #32]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #20]\n\t" + "str r3, [%[r], #28]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #16]\n\t" + "str r2, [%[r], #24]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #12]\n\t" + "str r4, [%[r], #20]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #8]\n\t" + "str r3, [%[r], #16]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #4]\n\t" + "str r2, [%[r], #12]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #0]\n\t" + "str r4, [%[r], #8]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r4, [%[a], #60]\n\t" + "str r3, [%[r], #68]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #56]\n\t" + "str r2, [%[r], #64]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #52]\n\t" + "str r4, [%[r], #60]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #48]\n\t" + "str r3, [%[r], #56]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #44]\n\t" + "str r2, [%[r], #52]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #40]\n\t" + "str r4, [%[r], #48]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #36]\n\t" + "str r3, [%[r], #44]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #32]\n\t" + "str r2, [%[r], #40]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #28]\n\t" + "str r4, [%[r], #36]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #24]\n\t" + "str r3, [%[r], #32]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #20]\n\t" + "str r2, [%[r], #28]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #16]\n\t" + "str r4, [%[r], #24]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #12]\n\t" + "str r3, [%[r], #20]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #8]\n\t" + "str r2, [%[r], #16]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #4]\n\t" + "str r4, [%[r], #12]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #0]\n\t" + "str r3, [%[r], #8]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r3, [%[a], #60]\n\t" + "str r2, [%[r], #68]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #56]\n\t" + "str r4, [%[r], #64]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #52]\n\t" + "str r3, [%[r], #60]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #48]\n\t" + "str r2, [%[r], #56]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #44]\n\t" + "str r4, [%[r], #52]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #40]\n\t" + "str r3, [%[r], #48]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #36]\n\t" + "str r2, [%[r], #44]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #32]\n\t" + "str r4, [%[r], #40]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #28]\n\t" + "str r3, [%[r], #36]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #24]\n\t" + "str r2, [%[r], #32]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #20]\n\t" + "str r4, [%[r], #28]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #16]\n\t" + "str r3, [%[r], #24]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #12]\n\t" + "str r2, [%[r], #20]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #8]\n\t" + "str r4, [%[r], #16]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #4]\n\t" + "str r3, [%[r], #12]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #0]\n\t" + "str r2, [%[r], #8]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "str r3, [%[r]]\n\t" + "str r4, [%[r], #4]\n\t" + : + : [r] "r" (r), [a] "r" (a), [n] "r" (n) + : "memory", "r2", "r3", "r4", "r5", "r6" + ); +} + +/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m) + * + * r A single precision number that is the result of the operation. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_2048_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits, + const sp_digit* m) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit nd[128]; + sp_digit td[65]; +#else + sp_digit* td; +#endif + sp_digit* norm; + sp_digit* tmp; + sp_digit mp = 1; + sp_digit n, o; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 193, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + norm = td; + tmp = td + 128; +#else + norm = nd; + tmp = td; +#endif + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_64(norm, m); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 5; + if (c == 32) { + c = 27; + } + y = (int)(n >> c); + n <<= 32 - c; + sp_2048_lshift_64(r, norm, y); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 27; + n <<= 5; + c = 27; + } + else if (c < 5) { + y = n >> 27; + n = e[i--]; + c = 5 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_2048_mont_sqr_64(r, r, m, mp); + sp_2048_mont_sqr_64(r, r, m, mp); + sp_2048_mont_sqr_64(r, r, m, mp); + sp_2048_mont_sqr_64(r, r, m, mp); + sp_2048_mont_sqr_64(r, r, m, mp); + + sp_2048_lshift_64(r, r, y); + sp_2048_mul_d_64(tmp, norm, r[64]); + r[64] = 0; + o = sp_2048_add_64(r, r, tmp); + sp_2048_cond_sub_64(r, r, m, (sp_digit)0 - o); + } + + XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U); + sp_2048_mont_reduce_64(r, m, mp); + + mask = 0 - (sp_2048_cmp_64(r, m) >= 0); + sp_2048_cond_sub_64(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* HAVE_FFDHE_2048 */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. + * exp Array of bytes that is the exponent. + * expLen Length of data, in bytes, in exponent. + * mod Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 256 bytes long. + * outLen Length, in bytes, of exponentiation result. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen, + mp_int* mod, byte* out, word32* outLen) +{ + int err = MP_OKAY; + sp_digit b[128], e[64], m[64]; + sp_digit* r = b; + word32 i; + + if (mp_count_bits(base) > 2048) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expLen > 256) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 2048) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_2048_from_mp(b, 64, base); + sp_2048_from_bin(e, 64, exp, expLen); + sp_2048_from_mp(m, 64, mod); + + #ifdef HAVE_FFDHE_2048 + if (base->used == 1 && base->dp[0] == 2 && m[63] == (sp_digit)-1) + err = sp_2048_mod_exp_2_64(r, e, expLen * 8, m); + else + #endif + err = sp_2048_mod_exp_64(r, b, e, expLen * 8, m, 0); + + } + + if (err == MP_OKAY) { + sp_2048_to_bin(r, out); + *outLen = 256; + for (i=0; i<256 && out[i] == 0; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} +#endif /* WOLFSSL_HAVE_SP_DH */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ + int err = MP_OKAY; + sp_digit b[64], e[32], m[32]; + sp_digit* r = b; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 1024) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 1024) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 1024) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_2048_from_mp(b, 32, base); + sp_2048_from_mp(e, 32, exp); + sp_2048_from_mp(m, 32, mod); + + err = sp_2048_mod_exp_32(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + XMEMSET(r + 32, 0, sizeof(*r) * 32U); + err = sp_2048_to_mp(r, res); + res->used = mod->used; + mp_clamp(res); + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} + +#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */ + +#endif /* !WOLFSSL_SP_NO_2048 */ + +#ifndef WOLFSSL_SP_NO_3072 +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((sp_digit)a[i]) << s); + if (s >= 24U) { + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (sp_digit)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 32 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 32 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 32U) <= (word32)DIGIT_BIT) { + s += 32U; + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 32) { + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + s = 32 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 384 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_3072_to_bin(sp_digit* r, byte* a) +{ + int i, j, s = 0, b; + + j = 3072 / 8 - 1; + a[j] = 0; + for (i=0; i<96 && j>=0; i++) { + b = 0; + /* lint allow cast of mismatch sp_digit and int */ + a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ + b += 8 - s; + if (j < 0) { + break; + } + while (b < 32) { + a[j--] = (byte)(r[i] >> b); + b += 8; + if (j < 0) { + break; + } + } + s = 8 - (b - 32); + if (j >= 0) { + a[j] = 0; + } + if (s != 0) { + j++; + } + } +} + +#ifndef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit tmp[12 * 2]; + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r9, r3\n\t" + "mov r12, %[r]\n\t" + "mov r10, %[a]\n\t" + "mov r11, %[b]\n\t" + "mov r6, #48\n\t" + "add r6, r6, r10\n\t" + "mov r14, r6\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r5, #0\n\t" + "mov r6, #44\n\t" + "mov %[a], r9\n\t" + "subs %[a], %[a], r6\n\t" + "sbc r6, r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], %[a], r6\n\t" + "mov %[b], r9\n\t" + "sub %[b], %[b], %[a]\n\t" + "add %[a], %[a], r10\n\t" + "add %[b], %[b], r11\n\t" + "\n2:\n\t" + /* Multiply Start */ + "ldr r6, [%[a]]\n\t" + "ldr r8, [%[b]]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, %[r]\n\t" + /* Multiply Done */ + "add %[a], %[a], #4\n\t" + "sub %[b], %[b], #4\n\t" + "cmp %[a], r14\n\t" + "beq 3f\n\t" + "mov r6, r9\n\t" + "add r6, r6, r10\n\t" + "cmp %[a], r6\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r12\n\t" + "mov r8, r9\n\t" + "str r3, [%[r], r8]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "add r8, r8, #4\n\t" + "mov r9, r8\n\t" + "mov r6, #88\n\t" + "cmp r8, r6\n\t" + "ble 1b\n\t" + "str r3, [%[r], r8]\n\t" + "mov %[a], r10\n\t" + "mov %[b], r11\n\t" + : + : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r5, #0\n\t" + "mov r9, r3\n\t" + "mov r12, %[r]\n\t" + "mov r6, #96\n\t" + "neg r6, r6\n\t" + "add sp, sp, r6\n\t" + "mov r11, sp\n\t" + "mov r10, %[a]\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r6, #44\n\t" + "mov %[a], r9\n\t" + "subs %[a], %[a], r6\n\t" + "sbc r6, r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], %[a], r6\n\t" + "mov r2, r9\n\t" + "sub r2, r2, %[a]\n\t" + "add %[a], %[a], r10\n\t" + "add r2, r2, r10\n\t" + "\n2:\n\t" + "cmp r2, %[a]\n\t" + "beq 4f\n\t" + /* Multiply * 2: Start */ + "ldr r6, [%[a]]\n\t" + "ldr r8, [r2]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, %[r]\n\t" + /* Multiply * 2: Done */ + "bal 5f\n\t" + "\n4:\n\t" + /* Square: Start */ + "ldr r6, [%[a]]\n\t" + "umull r6, r8, r6, r6\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, %[r]\n\t" + /* Square: Done */ + "\n5:\n\t" + "add %[a], %[a], #4\n\t" + "sub r2, r2, #4\n\t" + "mov r6, #48\n\t" + "add r6, r6, r10\n\t" + "cmp %[a], r6\n\t" + "beq 3f\n\t" + "cmp %[a], r2\n\t" + "bgt 3f\n\t" + "mov r8, r9\n\t" + "add r8, r8, r10\n\t" + "cmp %[a], r8\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r11\n\t" + "mov r8, r9\n\t" + "str r3, [%[r], r8]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "add r8, r8, #4\n\t" + "mov r9, r8\n\t" + "mov r6, #88\n\t" + "cmp r8, r6\n\t" + "ble 1b\n\t" + "mov %[a], r10\n\t" + "str r3, [%[r], r8]\n\t" + "mov %[r], r12\n\t" + "mov %[a], r11\n\t" + "mov r3, #92\n\t" + "\n4:\n\t" + "ldr r6, [%[a], r3]\n\t" + "str r6, [%[r], r3]\n\t" + "subs r3, r3, #4\n\t" + "bge 4b\n\t" + "mov r6, #96\n\t" + "add sp, sp, r6\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12" + ); +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c], %[c]\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r8" + ); + + return c; +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_3072_sub_in_place_24(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "subs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "sbc %[c], %[c], %[c]\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6" + ); + + return c; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c], %[c]\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r8" + ); + + return c; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_3072_mask_12(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<12; i++) { + r[i] = a[i] & m; + } +#else + r[0] = a[0] & m; + r[1] = a[1] & m; + r[2] = a[2] & m; + r[3] = a[3] & m; + r[4] = a[4] & m; + r[5] = a[5] & m; + r[6] = a[6] & m; + r[7] = a[7] & m; + r[8] = a[8] & m; + r[9] = a[9] & m; + r[10] = a[10] & m; + r[11] = a[11] & m; +#endif +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_3072_mul_24(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[24]; + sp_digit a1[12]; + sp_digit b1[12]; + sp_digit z2[24]; + sp_digit u, ca, cb; + + ca = sp_3072_add_12(a1, a, &a[12]); + cb = sp_3072_add_12(b1, b, &b[12]); + u = ca & cb; + sp_3072_mul_12(z1, a1, b1); + sp_3072_mul_12(z2, &a[12], &b[12]); + sp_3072_mul_12(z0, a, b); + sp_3072_mask_12(r + 24, a1, 0 - cb); + sp_3072_mask_12(b1, b1, 0 - ca); + u += sp_3072_add_12(r + 24, r + 24, b1); + u += sp_3072_sub_in_place_24(z1, z2); + u += sp_3072_sub_in_place_24(z1, z0); + u += sp_3072_add_24(r + 12, r + 12, z1); + r[36] = u; + XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1)); + (void)sp_3072_add_24(r + 24, r + 24, z2); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z2[24]; + sp_digit z1[24]; + sp_digit a1[12]; + sp_digit u; + + u = sp_3072_add_12(a1, a, &a[12]); + sp_3072_sqr_12(z1, a1); + sp_3072_sqr_12(z2, &a[12]); + sp_3072_sqr_12(z0, a); + sp_3072_mask_12(r + 24, a1, 0 - u); + u += sp_3072_add_12(r + 24, r + 24, r + 24); + u += sp_3072_sub_in_place_24(z1, z2); + u += sp_3072_sub_in_place_24(z1, z0); + u += sp_3072_add_24(r + 12, r + 12, z1); + r[36] = u; + XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1)); + (void)sp_3072_add_24(r + 24, r + 24, z2); +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "subs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "sbc %[c], %[c], %[c]\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6" + ); + + return c; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c], %[c]\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r8" + ); + + return c; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_3072_mask_24(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<24; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 24; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[48]; + sp_digit a1[24]; + sp_digit b1[24]; + sp_digit z2[48]; + sp_digit u, ca, cb; + + ca = sp_3072_add_24(a1, a, &a[24]); + cb = sp_3072_add_24(b1, b, &b[24]); + u = ca & cb; + sp_3072_mul_24(z1, a1, b1); + sp_3072_mul_24(z2, &a[24], &b[24]); + sp_3072_mul_24(z0, a, b); + sp_3072_mask_24(r + 48, a1, 0 - cb); + sp_3072_mask_24(b1, b1, 0 - ca); + u += sp_3072_add_24(r + 48, r + 48, b1); + u += sp_3072_sub_in_place_48(z1, z2); + u += sp_3072_sub_in_place_48(z1, z0); + u += sp_3072_add_48(r + 24, r + 24, z1); + r[72] = u; + XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1)); + (void)sp_3072_add_48(r + 48, r + 48, z2); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z2[48]; + sp_digit z1[48]; + sp_digit a1[24]; + sp_digit u; + + u = sp_3072_add_24(a1, a, &a[24]); + sp_3072_sqr_24(z1, a1); + sp_3072_sqr_24(z2, &a[24]); + sp_3072_sqr_24(z0, a); + sp_3072_mask_24(r + 48, a1, 0 - u); + u += sp_3072_add_24(r + 48, r + 48, r + 48); + u += sp_3072_sub_in_place_48(z1, z2); + u += sp_3072_sub_in_place_48(z1, z0); + u += sp_3072_add_48(r + 24, r + 24, z1); + r[72] = u; + XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1)); + (void)sp_3072_add_48(r + 48, r + 48, z2); +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "subs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "sbc %[c], %[c], %[c]\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6" + ); + + return c; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c], %[c]\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r8" + ); + + return c; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<48; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 48; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[96]; + sp_digit a1[48]; + sp_digit b1[48]; + sp_digit z2[96]; + sp_digit u, ca, cb; + + ca = sp_3072_add_48(a1, a, &a[48]); + cb = sp_3072_add_48(b1, b, &b[48]); + u = ca & cb; + sp_3072_mul_48(z1, a1, b1); + sp_3072_mul_48(z2, &a[48], &b[48]); + sp_3072_mul_48(z0, a, b); + sp_3072_mask_48(r + 96, a1, 0 - cb); + sp_3072_mask_48(b1, b1, 0 - ca); + u += sp_3072_add_48(r + 96, r + 96, b1); + u += sp_3072_sub_in_place_96(z1, z2); + u += sp_3072_sub_in_place_96(z1, z0); + u += sp_3072_add_96(r + 48, r + 48, z1); + r[144] = u; + XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1)); + (void)sp_3072_add_96(r + 96, r + 96, z2); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z2[96]; + sp_digit z1[96]; + sp_digit a1[48]; + sp_digit u; + + u = sp_3072_add_48(a1, a, &a[48]); + sp_3072_sqr_48(z1, a1); + sp_3072_sqr_48(z2, &a[48]); + sp_3072_sqr_48(z0, a); + sp_3072_mask_48(r + 96, a1, 0 - u); + u += sp_3072_add_48(r + 96, r + 96, r + 96); + u += sp_3072_sub_in_place_96(z1, z2); + u += sp_3072_sub_in_place_96(z1, z0); + u += sp_3072_add_96(r + 48, r + 48, z1); + r[144] = u; + XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1)); + (void)sp_3072_add_96(r + 96, r + 96, z2); +} + +#endif /* !WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r6, %[a]\n\t" + "mov r8, #0\n\t" + "add r6, r6, #384\n\t" + "sub r8, r8, #1\n\t" + "\n1:\n\t" + "adds %[c], %[c], r8\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r]]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #4\n\t" + "add %[b], %[b], #4\n\t" + "add %[r], %[r], #4\n\t" + "cmp %[a], r6\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r8" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into a. (a -= b) + * + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + __asm__ __volatile__ ( + "mov r8, %[a]\n\t" + "add r8, r8, #384\n\t" + "\n1:\n\t" + "mov r5, #0\n\t" + "subs r5, r5, %[c]\n\t" + "ldr r3, [%[a]]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b]]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a]]\n\t" + "str r4, [%[a], #4]\n\t" + "sbc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #8\n\t" + "add %[b], %[b], #8\n\t" + "cmp %[a], r8\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r8" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit tmp[96 * 2]; + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r9, r3\n\t" + "mov r12, %[r]\n\t" + "mov r10, %[a]\n\t" + "mov r11, %[b]\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, #128\n\t" + "add r6, r6, r10\n\t" + "mov r14, r6\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r5, #0\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, #124\n\t" + "mov %[a], r9\n\t" + "subs %[a], %[a], r6\n\t" + "sbc r6, r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], %[a], r6\n\t" + "mov %[b], r9\n\t" + "sub %[b], %[b], %[a]\n\t" + "add %[a], %[a], r10\n\t" + "add %[b], %[b], r11\n\t" + "\n2:\n\t" + /* Multiply Start */ + "ldr r6, [%[a]]\n\t" + "ldr r8, [%[b]]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, %[r]\n\t" + /* Multiply Done */ + "add %[a], %[a], #4\n\t" + "sub %[b], %[b], #4\n\t" + "cmp %[a], r14\n\t" + "beq 3f\n\t" + "mov r6, r9\n\t" + "add r6, r6, r10\n\t" + "cmp %[a], r6\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r12\n\t" + "mov r8, r9\n\t" + "str r3, [%[r], r8]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "add r8, r8, #4\n\t" + "mov r9, r8\n\t" + "mov r6, #2\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, #248\n\t" + "cmp r8, r6\n\t" + "ble 1b\n\t" + "str r3, [%[r], r8]\n\t" + "mov %[a], r10\n\t" + "mov %[b], r11\n\t" + : + : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r5, #0\n\t" + "mov r9, r3\n\t" + "mov r12, %[r]\n\t" + "mov r6, #3\n\t" + "lsl r6, r6, #8\n\t" + "neg r6, r6\n\t" + "add sp, sp, r6\n\t" + "mov r11, sp\n\t" + "mov r10, %[a]\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, #124\n\t" + "mov %[a], r9\n\t" + "subs %[a], %[a], r6\n\t" + "sbc r6, r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], %[a], r6\n\t" + "mov r2, r9\n\t" + "sub r2, r2, %[a]\n\t" + "add %[a], %[a], r10\n\t" + "add r2, r2, r10\n\t" + "\n2:\n\t" + "cmp r2, %[a]\n\t" + "beq 4f\n\t" + /* Multiply * 2: Start */ + "ldr r6, [%[a]]\n\t" + "ldr r8, [r2]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, %[r]\n\t" + /* Multiply * 2: Done */ + "bal 5f\n\t" + "\n4:\n\t" + /* Square: Start */ + "ldr r6, [%[a]]\n\t" + "umull r6, r8, r6, r6\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, %[r]\n\t" + /* Square: Done */ + "\n5:\n\t" + "add %[a], %[a], #4\n\t" + "sub r2, r2, #4\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, #128\n\t" + "add r6, r6, r10\n\t" + "cmp %[a], r6\n\t" + "beq 3f\n\t" + "cmp %[a], r2\n\t" + "bgt 3f\n\t" + "mov r8, r9\n\t" + "add r8, r8, r10\n\t" + "cmp %[a], r8\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r11\n\t" + "mov r8, r9\n\t" + "str r3, [%[r], r8]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "add r8, r8, #4\n\t" + "mov r9, r8\n\t" + "mov r6, #2\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, #248\n\t" + "cmp r8, r6\n\t" + "ble 1b\n\t" + "mov %[a], r10\n\t" + "str r3, [%[r], r8]\n\t" + "mov %[r], r12\n\t" + "mov %[a], r11\n\t" + "mov r3, #2\n\t" + "lsl r3, r3, #8\n\t" + "add r3, r3, #252\n\t" + "\n4:\n\t" + "ldr r6, [%[a], r3]\n\t" + "str r6, [%[r], r3]\n\t" + "subs r3, r3, #4\n\t" + "bge 4b\n\t" + "mov r6, #3\n\t" + "lsl r6, r6, #8\n\t" + "add sp, sp, r6\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +#ifdef WOLFSSL_SP_SMALL +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m) +{ + int i; + + for (i=0; i<48; i++) { + r[i] = a[i] & m; + } +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r6, %[a]\n\t" + "mov r8, #0\n\t" + "add r6, r6, #192\n\t" + "sub r8, r8, #1\n\t" + "\n1:\n\t" + "adds %[c], %[c], r8\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r]]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #4\n\t" + "add %[b], %[b], #4\n\t" + "add %[r], %[r], #4\n\t" + "cmp %[a], r6\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r8" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into a. (a -= b) + * + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + __asm__ __volatile__ ( + "mov r8, %[a]\n\t" + "add r8, r8, #192\n\t" + "\n1:\n\t" + "mov r5, #0\n\t" + "subs r5, r5, %[c]\n\t" + "ldr r3, [%[a]]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b]]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a]]\n\t" + "str r4, [%[a], #4]\n\t" + "sbc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #8\n\t" + "add %[b], %[b], #8\n\t" + "cmp %[a], r8\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r8" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit tmp[48 * 2]; + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r9, r3\n\t" + "mov r12, %[r]\n\t" + "mov r10, %[a]\n\t" + "mov r11, %[b]\n\t" + "mov r6, #192\n\t" + "add r6, r6, r10\n\t" + "mov r14, r6\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r5, #0\n\t" + "mov r6, #188\n\t" + "mov %[a], r9\n\t" + "subs %[a], %[a], r6\n\t" + "sbc r6, r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], %[a], r6\n\t" + "mov %[b], r9\n\t" + "sub %[b], %[b], %[a]\n\t" + "add %[a], %[a], r10\n\t" + "add %[b], %[b], r11\n\t" + "\n2:\n\t" + /* Multiply Start */ + "ldr r6, [%[a]]\n\t" + "ldr r8, [%[b]]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, %[r]\n\t" + /* Multiply Done */ + "add %[a], %[a], #4\n\t" + "sub %[b], %[b], #4\n\t" + "cmp %[a], r14\n\t" + "beq 3f\n\t" + "mov r6, r9\n\t" + "add r6, r6, r10\n\t" + "cmp %[a], r6\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r12\n\t" + "mov r8, r9\n\t" + "str r3, [%[r], r8]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "add r8, r8, #4\n\t" + "mov r9, r8\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, #120\n\t" + "cmp r8, r6\n\t" + "ble 1b\n\t" + "str r3, [%[r], r8]\n\t" + "mov %[a], r10\n\t" + "mov %[b], r11\n\t" + : + : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r5, #0\n\t" + "mov r9, r3\n\t" + "mov r12, %[r]\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, #128\n\t" + "neg r6, r6\n\t" + "add sp, sp, r6\n\t" + "mov r11, sp\n\t" + "mov r10, %[a]\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r6, #188\n\t" + "mov %[a], r9\n\t" + "subs %[a], %[a], r6\n\t" + "sbc r6, r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], %[a], r6\n\t" + "mov r2, r9\n\t" + "sub r2, r2, %[a]\n\t" + "add %[a], %[a], r10\n\t" + "add r2, r2, r10\n\t" + "\n2:\n\t" + "cmp r2, %[a]\n\t" + "beq 4f\n\t" + /* Multiply * 2: Start */ + "ldr r6, [%[a]]\n\t" + "ldr r8, [r2]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, %[r]\n\t" + /* Multiply * 2: Done */ + "bal 5f\n\t" + "\n4:\n\t" + /* Square: Start */ + "ldr r6, [%[a]]\n\t" + "umull r6, r8, r6, r6\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, %[r]\n\t" + /* Square: Done */ + "\n5:\n\t" + "add %[a], %[a], #4\n\t" + "sub r2, r2, #4\n\t" + "mov r6, #192\n\t" + "add r6, r6, r10\n\t" + "cmp %[a], r6\n\t" + "beq 3f\n\t" + "cmp %[a], r2\n\t" + "bgt 3f\n\t" + "mov r8, r9\n\t" + "add r8, r8, r10\n\t" + "cmp %[a], r8\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r11\n\t" + "mov r8, r9\n\t" + "str r3, [%[r], r8]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "add r8, r8, #4\n\t" + "mov r9, r8\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, #120\n\t" + "cmp r8, r6\n\t" + "ble 1b\n\t" + "mov %[a], r10\n\t" + "str r3, [%[r], r8]\n\t" + "mov %[r], r12\n\t" + "mov %[a], r11\n\t" + "mov r3, #1\n\t" + "lsl r3, r3, #8\n\t" + "add r3, r3, #124\n\t" + "\n4:\n\t" + "ldr r6, [%[a], r3]\n\t" + "str r6, [%[r], r3]\n\t" + "subs r3, r3, #4\n\t" + "bge 4b\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, #128\n\t" + "add sp, sp, r6\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +/* Caclulate the bottom digit of -1/a mod 2^n. + * + * a A single precision number. + * rho Bottom word of inverse. + */ +static void sp_3072_mont_setup(const sp_digit* a, sp_digit* rho) +{ + sp_digit x, b; + + b = a[0]; + x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - b * x; /* here x*a==1 mod 2**8 */ + x *= 2 - b * x; /* here x*a==1 mod 2**16 */ + x *= 2 - b * x; /* here x*a==1 mod 2**32 */ + + /* rho = -1/m mod b */ + *rho = -x; +} + +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +SP_NOINLINE static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, + sp_digit b) +{ + __asm__ __volatile__ ( + "add r9, %[a], #384\n\t" + /* A[0] * B */ + "ldr r6, [%[a]], #4\n\t" + "umull r5, r3, r6, %[b]\n\t" + "mov r4, #0\n\t" + "str r5, [%[r]], #4\n\t" + /* A[0] * B - Done */ + "\n1:\n\t" + "mov r5, #0\n\t" + /* A[] * B */ + "ldr r6, [%[a]], #4\n\t" + "umull r6, r8, r6, %[b]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[] * B - Done */ + "str r3, [%[r]], #4\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "cmp %[a], r9\n\t" + "blt 1b\n\t" + "str r3, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a) + : [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r8", "r9" + ); +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 3072 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_3072_mont_norm_48(sp_digit* r, const sp_digit* m) +{ + XMEMSET(r, 0, sizeof(sp_digit) * 48); + + /* r = 2^n mod m */ + sp_3072_sub_in_place_48(r, m); +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +SP_NOINLINE static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, + const sp_digit* b, sp_digit m) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r5, #192\n\t" + "mov r9, r5\n\t" + "mov r8, #0\n\t" + "\n1:\n\t" + "ldr r6, [%[b], r8]\n\t" + "and r6, r6, %[m]\n\t" + "mov r5, #0\n\t" + "subs r5, r5, %[c]\n\t" + "ldr r5, [%[a], r8]\n\t" + "sbcs r5, r5, r6\n\t" + "sbcs %[c], %[c], %[c]\n\t" + "str r5, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, r9\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r5", "r6", "r8", "r9" + ); + + return c; +} + +/* Reduce the number back to 3072 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_digit ca = 0; + + __asm__ __volatile__ ( + "mov r9, %[mp]\n\t" + "mov r12, %[m]\n\t" + "mov r10, %[a]\n\t" + "mov r4, #0\n\t" + "add r11, r10, #192\n\t" + "\n1:\n\t" + /* mu = a[i] * mp */ + "mov %[mp], r9\n\t" + "ldr %[a], [r10]\n\t" + "mul %[mp], %[mp], %[a]\n\t" + "mov %[m], r12\n\t" + "add r14, r10, #184\n\t" + "\n2:\n\t" + /* a[i+j] += m[j] * mu */ + "ldr %[a], [r10]\n\t" + "mov r5, #0\n\t" + /* Multiply m[j] and mu - Start */ + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" + "adds %[a], %[a], r6\n\t" + "adc r5, r5, r8\n\t" + /* Multiply m[j] and mu - Done */ + "adds r4, r4, %[a]\n\t" + "adc r5, r5, #0\n\t" + "str r4, [r10], #4\n\t" + /* a[i+j+1] += m[j+1] * mu */ + "ldr %[a], [r10]\n\t" + "mov r4, #0\n\t" + /* Multiply m[j] and mu - Start */ + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" + "adds %[a], %[a], r6\n\t" + "adc r4, r4, r8\n\t" + /* Multiply m[j] and mu - Done */ + "adds r5, r5, %[a]\n\t" + "adc r4, r4, #0\n\t" + "str r5, [r10], #4\n\t" + "cmp r10, r14\n\t" + "blt 2b\n\t" + /* a[i+46] += m[46] * mu */ + "ldr %[a], [r10]\n\t" + "mov r5, #0\n\t" + /* Multiply m[j] and mu - Start */ + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" + "adds %[a], %[a], r6\n\t" + "adc r5, r5, r8\n\t" + /* Multiply m[j] and mu - Done */ + "adds r4, r4, %[a]\n\t" + "adc r5, r5, #0\n\t" + "str r4, [r10], #4\n\t" + /* a[i+47] += m[47] * mu */ + "mov r4, %[ca]\n\t" + "mov %[ca], #0\n\t" + /* Multiply m[47] and mu - Start */ + "ldr r8, [%[m]]\n\t" + "umull r6, r8, %[mp], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc %[ca], %[ca], #0\n\t" + /* Multiply m[47] and mu - Done */ + "ldr r6, [r10]\n\t" + "ldr r8, [r10, #4]\n\t" + "adds r6, r6, r5\n\t" + "adcs r8, r8, r4\n\t" + "adc %[ca], %[ca], #0\n\t" + "str r6, [r10]\n\t" + "str r8, [r10, #4]\n\t" + /* Next word in a */ + "sub r10, r10, #184\n\t" + "cmp r10, r11\n\t" + "blt 1b\n\t" + "mov %[a], r10\n\t" + "mov %[m], r12\n\t" + : [ca] "+r" (ca), [a] "+r" (a) + : [m] "r" (m), [mp] "r" (mp) + : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" + ); + + sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - ca); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_3072_mont_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_3072_mul_48(r, a, b); + sp_3072_mont_reduce_48(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_3072_mont_sqr_48(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_3072_sqr_48(r, a); + sp_3072_mont_reduce_48(r, m, mp); +} + +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +SP_NOINLINE static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, + sp_digit b) +{ + __asm__ __volatile__ ( + "add r9, %[a], #192\n\t" + /* A[0] * B */ + "ldr r6, [%[a]], #4\n\t" + "umull r5, r3, r6, %[b]\n\t" + "mov r4, #0\n\t" + "str r5, [%[r]], #4\n\t" + /* A[0] * B - Done */ + "\n1:\n\t" + "mov r5, #0\n\t" + /* A[] * B */ + "ldr r6, [%[a]], #4\n\t" + "umull r6, r8, r6, %[b]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[] * B - Done */ + "str r3, [%[r]], #4\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "cmp %[a], r9\n\t" + "blt 1b\n\t" + "str r3, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a) + : [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r8", "r9" + ); +} + +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +SP_NOINLINE static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, + sp_digit div) +{ + sp_digit r = 0; + + __asm__ __volatile__ ( + "lsr r6, %[div], #16\n\t" + "add r6, r6, #1\n\t" + "udiv r4, %[d1], r6\n\t" + "lsl r8, r4, #16\n\t" + "umull r4, r5, %[div], r8\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "udiv r5, %[d1], r6\n\t" + "lsl r4, r5, #16\n\t" + "add r8, r8, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "lsl r4, %[d1], #16\n\t" + "orr r4, r4, %[d0], lsr #16\n\t" + "udiv r4, r4, r6\n\t" + "add r8, r8, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "lsl r4, %[d1], #16\n\t" + "orr r4, r4, %[d0], lsr #16\n\t" + "udiv r4, r4, r6\n\t" + "add r8, r8, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "udiv r4, %[d0], %[div]\n\t" + "add r8, r8, r4\n\t" + "mov %[r], r8\n\t" + : [r] "+r" (r) + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) + : "r4", "r5", "r6", "r8" + ); + return r; +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +SP_NOINLINE static int32_t sp_3072_cmp_48(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; + + + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mvn r3, r3\n\t" + "mov r6, #188\n\t" + "\n1:\n\t" + "ldr r8, [%[a], r6]\n\t" + "ldr r5, [%[b], r6]\n\t" + "and r8, r8, r3\n\t" + "and r5, r5, r3\n\t" + "mov r4, r8\n\t" + "subs r8, r8, r5\n\t" + "sbc r8, r8, r8\n\t" + "add %[r], %[r], r8\n\t" + "mvn r8, r8\n\t" + "and r3, r3, r8\n\t" + "subs r5, r5, r4\n\t" + "sbc r8, r8, r8\n\t" + "sub %[r], %[r], r8\n\t" + "mvn r8, r8\n\t" + "and r3, r3, r8\n\t" + "sub r6, r6, #4\n\t" + "cmp r6, #0\n\t" + "bge 1b\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "r3", "r4", "r5", "r6", "r8" + ); + + return r; +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[96], t2[49]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[47]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 48); + for (i=47; i>=0; i--) { + r1 = div_3072_word_48(t1[48 + i], t1[48 + i - 1], div); + + sp_3072_mul_d_48(t2, d, r1); + t1[48 + i] += sp_3072_sub_in_place_48(&t1[i], t2); + t1[48 + i] -= t2[48]; + sp_3072_mask_48(t2, d, t1[48 + i]); + t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2); + sp_3072_mask_48(t2, d, t1[48 + i]); + t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2); + } + + r1 = sp_3072_cmp_48(t1, d) >= 0; + sp_3072_cond_sub_48(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_3072_mod_48(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_3072_div_48(a, m, NULL, r); +} + +#ifdef WOLFSSL_SP_SMALL +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[16][96]; +#else + sp_digit* t[16]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 96, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<16; i++) { + t[i] = td + i * 96; + } +#endif + norm = t[0]; + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_48(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 48U); + if (reduceA != 0) { + err = sp_3072_mod_48(t[1] + 48, a, m); + if (err == MP_OKAY) { + err = sp_3072_mod_48(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48); + err = sp_3072_mod_48(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_3072_mont_sqr_48(t[ 2], t[ 1], m, mp); + sp_3072_mont_mul_48(t[ 3], t[ 2], t[ 1], m, mp); + sp_3072_mont_sqr_48(t[ 4], t[ 2], m, mp); + sp_3072_mont_mul_48(t[ 5], t[ 3], t[ 2], m, mp); + sp_3072_mont_sqr_48(t[ 6], t[ 3], m, mp); + sp_3072_mont_mul_48(t[ 7], t[ 4], t[ 3], m, mp); + sp_3072_mont_sqr_48(t[ 8], t[ 4], m, mp); + sp_3072_mont_mul_48(t[ 9], t[ 5], t[ 4], m, mp); + sp_3072_mont_sqr_48(t[10], t[ 5], m, mp); + sp_3072_mont_mul_48(t[11], t[ 6], t[ 5], m, mp); + sp_3072_mont_sqr_48(t[12], t[ 6], m, mp); + sp_3072_mont_mul_48(t[13], t[ 7], t[ 6], m, mp); + sp_3072_mont_sqr_48(t[14], t[ 7], m, mp); + sp_3072_mont_mul_48(t[15], t[ 8], t[ 7], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 4; + if (c == 32) { + c = 28; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 48); + for (; i>=0 || c>=4; ) { + if (c == 0) { + n = e[i--]; + y = n >> 28; + n <<= 4; + c = 28; + } + else if (c < 4) { + y = n >> 28; + n = e[i--]; + c = 4 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + } + + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + + sp_3072_mont_mul_48(r, r, t[y], m, mp); + } + + XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U); + sp_3072_mont_reduce_48(r, m, mp); + + mask = 0 - (sp_3072_cmp_48(r, m) >= 0); + sp_3072_cond_sub_48(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#else +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][96]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 96, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) { + t[i] = td + i * 96; + } +#endif + norm = t[0]; + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_48(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 48U); + if (reduceA != 0) { + err = sp_3072_mod_48(t[1] + 48, a, m); + if (err == MP_OKAY) { + err = sp_3072_mod_48(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48); + err = sp_3072_mod_48(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_3072_mont_sqr_48(t[ 2], t[ 1], m, mp); + sp_3072_mont_mul_48(t[ 3], t[ 2], t[ 1], m, mp); + sp_3072_mont_sqr_48(t[ 4], t[ 2], m, mp); + sp_3072_mont_mul_48(t[ 5], t[ 3], t[ 2], m, mp); + sp_3072_mont_sqr_48(t[ 6], t[ 3], m, mp); + sp_3072_mont_mul_48(t[ 7], t[ 4], t[ 3], m, mp); + sp_3072_mont_sqr_48(t[ 8], t[ 4], m, mp); + sp_3072_mont_mul_48(t[ 9], t[ 5], t[ 4], m, mp); + sp_3072_mont_sqr_48(t[10], t[ 5], m, mp); + sp_3072_mont_mul_48(t[11], t[ 6], t[ 5], m, mp); + sp_3072_mont_sqr_48(t[12], t[ 6], m, mp); + sp_3072_mont_mul_48(t[13], t[ 7], t[ 6], m, mp); + sp_3072_mont_sqr_48(t[14], t[ 7], m, mp); + sp_3072_mont_mul_48(t[15], t[ 8], t[ 7], m, mp); + sp_3072_mont_sqr_48(t[16], t[ 8], m, mp); + sp_3072_mont_mul_48(t[17], t[ 9], t[ 8], m, mp); + sp_3072_mont_sqr_48(t[18], t[ 9], m, mp); + sp_3072_mont_mul_48(t[19], t[10], t[ 9], m, mp); + sp_3072_mont_sqr_48(t[20], t[10], m, mp); + sp_3072_mont_mul_48(t[21], t[11], t[10], m, mp); + sp_3072_mont_sqr_48(t[22], t[11], m, mp); + sp_3072_mont_mul_48(t[23], t[12], t[11], m, mp); + sp_3072_mont_sqr_48(t[24], t[12], m, mp); + sp_3072_mont_mul_48(t[25], t[13], t[12], m, mp); + sp_3072_mont_sqr_48(t[26], t[13], m, mp); + sp_3072_mont_mul_48(t[27], t[14], t[13], m, mp); + sp_3072_mont_sqr_48(t[28], t[14], m, mp); + sp_3072_mont_mul_48(t[29], t[15], t[14], m, mp); + sp_3072_mont_sqr_48(t[30], t[15], m, mp); + sp_3072_mont_mul_48(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 5; + if (c == 32) { + c = 27; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 48); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 27; + n <<= 5; + c = 27; + } + else if (c < 5) { + y = n >> 27; + n = e[i--]; + c = 5 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + + sp_3072_mont_mul_48(r, r, t[y], m, mp); + } + + XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U); + sp_3072_mont_reduce_48(r, m, mp); + + mask = 0 - (sp_3072_cmp_48(r, m) >= 0); + sp_3072_cond_sub_48(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* WOLFSSL_SP_SMALL */ + +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 3072 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_3072_mont_norm_96(sp_digit* r, const sp_digit* m) +{ + XMEMSET(r, 0, sizeof(sp_digit) * 96); + + /* r = 2^n mod m */ + sp_3072_sub_in_place_96(r, m); +} + +#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */ +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +SP_NOINLINE static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a, + const sp_digit* b, sp_digit m) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r5, #1\n\t" + "lsl r5, r5, #8\n\t" + "add r5, r5, #128\n\t" + "mov r9, r5\n\t" + "mov r8, #0\n\t" + "\n1:\n\t" + "ldr r6, [%[b], r8]\n\t" + "and r6, r6, %[m]\n\t" + "mov r5, #0\n\t" + "subs r5, r5, %[c]\n\t" + "ldr r5, [%[a], r8]\n\t" + "sbcs r5, r5, r6\n\t" + "sbcs %[c], %[c], %[c]\n\t" + "str r5, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, r9\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r5", "r6", "r8", "r9" + ); + + return c; +} + +/* Reduce the number back to 3072 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_digit ca = 0; + + __asm__ __volatile__ ( + "mov r9, %[mp]\n\t" + "mov r12, %[m]\n\t" + "mov r10, %[a]\n\t" + "mov r4, #0\n\t" + "add r11, r10, #384\n\t" + "\n1:\n\t" + /* mu = a[i] * mp */ + "mov %[mp], r9\n\t" + "ldr %[a], [r10]\n\t" + "mul %[mp], %[mp], %[a]\n\t" + "mov %[m], r12\n\t" + "add r14, r10, #376\n\t" + "\n2:\n\t" + /* a[i+j] += m[j] * mu */ + "ldr %[a], [r10]\n\t" + "mov r5, #0\n\t" + /* Multiply m[j] and mu - Start */ + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" + "adds %[a], %[a], r6\n\t" + "adc r5, r5, r8\n\t" + /* Multiply m[j] and mu - Done */ + "adds r4, r4, %[a]\n\t" + "adc r5, r5, #0\n\t" + "str r4, [r10], #4\n\t" + /* a[i+j+1] += m[j+1] * mu */ + "ldr %[a], [r10]\n\t" + "mov r4, #0\n\t" + /* Multiply m[j] and mu - Start */ + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" + "adds %[a], %[a], r6\n\t" + "adc r4, r4, r8\n\t" + /* Multiply m[j] and mu - Done */ + "adds r5, r5, %[a]\n\t" + "adc r4, r4, #0\n\t" + "str r5, [r10], #4\n\t" + "cmp r10, r14\n\t" + "blt 2b\n\t" + /* a[i+94] += m[94] * mu */ + "ldr %[a], [r10]\n\t" + "mov r5, #0\n\t" + /* Multiply m[j] and mu - Start */ + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" + "adds %[a], %[a], r6\n\t" + "adc r5, r5, r8\n\t" + /* Multiply m[j] and mu - Done */ + "adds r4, r4, %[a]\n\t" + "adc r5, r5, #0\n\t" + "str r4, [r10], #4\n\t" + /* a[i+95] += m[95] * mu */ + "mov r4, %[ca]\n\t" + "mov %[ca], #0\n\t" + /* Multiply m[95] and mu - Start */ + "ldr r8, [%[m]]\n\t" + "umull r6, r8, %[mp], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc %[ca], %[ca], #0\n\t" + /* Multiply m[95] and mu - Done */ + "ldr r6, [r10]\n\t" + "ldr r8, [r10, #4]\n\t" + "adds r6, r6, r5\n\t" + "adcs r8, r8, r4\n\t" + "adc %[ca], %[ca], #0\n\t" + "str r6, [r10]\n\t" + "str r8, [r10, #4]\n\t" + /* Next word in a */ + "sub r10, r10, #376\n\t" + "cmp r10, r11\n\t" + "blt 1b\n\t" + "mov %[a], r10\n\t" + "mov %[m], r12\n\t" + : [ca] "+r" (ca), [a] "+r" (a) + : [m] "r" (m), [mp] "r" (mp) + : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" + ); + + sp_3072_cond_sub_96(a - 96, a, m, (sp_digit)0 - ca); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_3072_mont_mul_96(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_3072_mul_96(r, a, b); + sp_3072_mont_reduce_96(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_3072_mont_sqr_96(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_3072_sqr_96(r, a); + sp_3072_mont_reduce_96(r, m, mp); +} + +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +SP_NOINLINE static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, + sp_digit div) +{ + sp_digit r = 0; + + __asm__ __volatile__ ( + "lsr r6, %[div], #16\n\t" + "add r6, r6, #1\n\t" + "udiv r4, %[d1], r6\n\t" + "lsl r8, r4, #16\n\t" + "umull r4, r5, %[div], r8\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "udiv r5, %[d1], r6\n\t" + "lsl r4, r5, #16\n\t" + "add r8, r8, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "lsl r4, %[d1], #16\n\t" + "orr r4, r4, %[d0], lsr #16\n\t" + "udiv r4, r4, r6\n\t" + "add r8, r8, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "lsl r4, %[d1], #16\n\t" + "orr r4, r4, %[d0], lsr #16\n\t" + "udiv r4, r4, r6\n\t" + "add r8, r8, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "udiv r4, %[d0], %[div]\n\t" + "add r8, r8, r4\n\t" + "mov %[r], r8\n\t" + : [r] "+r" (r) + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) + : "r4", "r5", "r6", "r8" + ); + return r; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_3072_mask_96(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<96; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 96; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +SP_NOINLINE static int32_t sp_3072_cmp_96(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; + + + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mvn r3, r3\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, #124\n\t" + "\n1:\n\t" + "ldr r8, [%[a], r6]\n\t" + "ldr r5, [%[b], r6]\n\t" + "and r8, r8, r3\n\t" + "and r5, r5, r3\n\t" + "mov r4, r8\n\t" + "subs r8, r8, r5\n\t" + "sbc r8, r8, r8\n\t" + "add %[r], %[r], r8\n\t" + "mvn r8, r8\n\t" + "and r3, r3, r8\n\t" + "subs r5, r5, r4\n\t" + "sbc r8, r8, r8\n\t" + "sub %[r], %[r], r8\n\t" + "mvn r8, r8\n\t" + "and r3, r3, r8\n\t" + "sub r6, r6, #4\n\t" + "cmp r6, #0\n\t" + "bge 1b\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "r3", "r4", "r5", "r6", "r8" + ); + + return r; +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_3072_div_96(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[192], t2[97]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[95]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 96); + for (i=95; i>=0; i--) { + r1 = div_3072_word_96(t1[96 + i], t1[96 + i - 1], div); + + sp_3072_mul_d_96(t2, d, r1); + t1[96 + i] += sp_3072_sub_in_place_96(&t1[i], t2); + t1[96 + i] -= t2[96]; + sp_3072_mask_96(t2, d, t1[96 + i]); + t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], t2); + sp_3072_mask_96(t2, d, t1[96 + i]); + t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], t2); + } + + r1 = sp_3072_cmp_96(t1, d) >= 0; + sp_3072_cond_sub_96(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_3072_mod_96(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_3072_div_96(a, m, NULL, r); +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_3072_div_96_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[192], t2[97]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[95]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 96); + for (i=95; i>=0; i--) { + r1 = div_3072_word_96(t1[96 + i], t1[96 + i - 1], div); + + sp_3072_mul_d_96(t2, d, r1); + t1[96 + i] += sp_3072_sub_in_place_96(&t1[i], t2); + t1[96 + i] -= t2[96]; + if (t1[96 + i] != 0) { + t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], d); + if (t1[96 + i] != 0) + t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], d); + } + } + + r1 = sp_3072_cmp_96(t1, d) >= 0; + sp_3072_cond_sub_96(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_3072_mod_96_cond(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_3072_div_96_cond(a, m, NULL, r); +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \ + defined(WOLFSSL_HAVE_SP_DH) +#ifdef WOLFSSL_SP_SMALL +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[16][192]; +#else + sp_digit* t[16]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 192, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<16; i++) { + t[i] = td + i * 192; + } +#endif + norm = t[0]; + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_96(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 96U); + if (reduceA != 0) { + err = sp_3072_mod_96(t[1] + 96, a, m); + if (err == MP_OKAY) { + err = sp_3072_mod_96(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 96, a, sizeof(sp_digit) * 96); + err = sp_3072_mod_96(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_3072_mont_sqr_96(t[ 2], t[ 1], m, mp); + sp_3072_mont_mul_96(t[ 3], t[ 2], t[ 1], m, mp); + sp_3072_mont_sqr_96(t[ 4], t[ 2], m, mp); + sp_3072_mont_mul_96(t[ 5], t[ 3], t[ 2], m, mp); + sp_3072_mont_sqr_96(t[ 6], t[ 3], m, mp); + sp_3072_mont_mul_96(t[ 7], t[ 4], t[ 3], m, mp); + sp_3072_mont_sqr_96(t[ 8], t[ 4], m, mp); + sp_3072_mont_mul_96(t[ 9], t[ 5], t[ 4], m, mp); + sp_3072_mont_sqr_96(t[10], t[ 5], m, mp); + sp_3072_mont_mul_96(t[11], t[ 6], t[ 5], m, mp); + sp_3072_mont_sqr_96(t[12], t[ 6], m, mp); + sp_3072_mont_mul_96(t[13], t[ 7], t[ 6], m, mp); + sp_3072_mont_sqr_96(t[14], t[ 7], m, mp); + sp_3072_mont_mul_96(t[15], t[ 8], t[ 7], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 4; + if (c == 32) { + c = 28; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 96); + for (; i>=0 || c>=4; ) { + if (c == 0) { + n = e[i--]; + y = n >> 28; + n <<= 4; + c = 28; + } + else if (c < 4) { + y = n >> 28; + n = e[i--]; + c = 4 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + } + + sp_3072_mont_sqr_96(r, r, m, mp); + sp_3072_mont_sqr_96(r, r, m, mp); + sp_3072_mont_sqr_96(r, r, m, mp); + sp_3072_mont_sqr_96(r, r, m, mp); + + sp_3072_mont_mul_96(r, r, t[y], m, mp); + } + + XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U); + sp_3072_mont_reduce_96(r, m, mp); + + mask = 0 - (sp_3072_cmp_96(r, m) >= 0); + sp_3072_cond_sub_96(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#else +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][192]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 192, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) { + t[i] = td + i * 192; + } +#endif + norm = t[0]; + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_96(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 96U); + if (reduceA != 0) { + err = sp_3072_mod_96(t[1] + 96, a, m); + if (err == MP_OKAY) { + err = sp_3072_mod_96(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 96, a, sizeof(sp_digit) * 96); + err = sp_3072_mod_96(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_3072_mont_sqr_96(t[ 2], t[ 1], m, mp); + sp_3072_mont_mul_96(t[ 3], t[ 2], t[ 1], m, mp); + sp_3072_mont_sqr_96(t[ 4], t[ 2], m, mp); + sp_3072_mont_mul_96(t[ 5], t[ 3], t[ 2], m, mp); + sp_3072_mont_sqr_96(t[ 6], t[ 3], m, mp); + sp_3072_mont_mul_96(t[ 7], t[ 4], t[ 3], m, mp); + sp_3072_mont_sqr_96(t[ 8], t[ 4], m, mp); + sp_3072_mont_mul_96(t[ 9], t[ 5], t[ 4], m, mp); + sp_3072_mont_sqr_96(t[10], t[ 5], m, mp); + sp_3072_mont_mul_96(t[11], t[ 6], t[ 5], m, mp); + sp_3072_mont_sqr_96(t[12], t[ 6], m, mp); + sp_3072_mont_mul_96(t[13], t[ 7], t[ 6], m, mp); + sp_3072_mont_sqr_96(t[14], t[ 7], m, mp); + sp_3072_mont_mul_96(t[15], t[ 8], t[ 7], m, mp); + sp_3072_mont_sqr_96(t[16], t[ 8], m, mp); + sp_3072_mont_mul_96(t[17], t[ 9], t[ 8], m, mp); + sp_3072_mont_sqr_96(t[18], t[ 9], m, mp); + sp_3072_mont_mul_96(t[19], t[10], t[ 9], m, mp); + sp_3072_mont_sqr_96(t[20], t[10], m, mp); + sp_3072_mont_mul_96(t[21], t[11], t[10], m, mp); + sp_3072_mont_sqr_96(t[22], t[11], m, mp); + sp_3072_mont_mul_96(t[23], t[12], t[11], m, mp); + sp_3072_mont_sqr_96(t[24], t[12], m, mp); + sp_3072_mont_mul_96(t[25], t[13], t[12], m, mp); + sp_3072_mont_sqr_96(t[26], t[13], m, mp); + sp_3072_mont_mul_96(t[27], t[14], t[13], m, mp); + sp_3072_mont_sqr_96(t[28], t[14], m, mp); + sp_3072_mont_mul_96(t[29], t[15], t[14], m, mp); + sp_3072_mont_sqr_96(t[30], t[15], m, mp); + sp_3072_mont_mul_96(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 5; + if (c == 32) { + c = 27; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 96); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 27; + n <<= 5; + c = 27; + } + else if (c < 5) { + y = n >> 27; + n = e[i--]; + c = 5 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_3072_mont_sqr_96(r, r, m, mp); + sp_3072_mont_sqr_96(r, r, m, mp); + sp_3072_mont_sqr_96(r, r, m, mp); + sp_3072_mont_sqr_96(r, r, m, mp); + sp_3072_mont_sqr_96(r, r, m, mp); + + sp_3072_mont_mul_96(r, r, t[y], m, mp); + } + + XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U); + sp_3072_mont_reduce_96(r, m, mp); + + mask = 0 - (sp_3072_cmp_96(r, m) >= 0); + sp_3072_cond_sub_96(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* WOLFSSL_SP_SMALL */ +#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */ + +#ifdef WOLFSSL_HAVE_SP_RSA +/* RSA public key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * em Public exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 384 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm, + byte* out, word32* outLen) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit a[192], m[96], r[192]; +#else + sp_digit* d = NULL; + sp_digit* a; + sp_digit* m; + sp_digit* r; +#endif + sp_digit *ah; + sp_digit e[1]; + int err = MP_OKAY; + + if (*outLen < 384) + err = MP_TO_E; + if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 384 || + mp_count_bits(mm) != 3072)) + err = MP_READ_E; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 96 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + a = d; + r = a + 96 * 2; + m = r + 96 * 2; + } +#endif + + if (err == MP_OKAY) { + ah = a + 96; + + sp_3072_from_bin(ah, 96, in, inLen); +#if DIGIT_BIT >= 32 + e[0] = em->dp[0]; +#else + e[0] = em->dp[0]; + if (em->used > 1) { + e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + } +#endif + if (e[0] == 0) { + err = MP_EXPTMOD_E; + } + } + if (err == MP_OKAY) { + sp_3072_from_mp(m, 96, mm); + + if (e[0] == 0x3) { + if (err == MP_OKAY) { + sp_3072_sqr_96(r, ah); + err = sp_3072_mod_96_cond(r, r, m); + } + if (err == MP_OKAY) { + sp_3072_mul_96(r, ah, r); + err = sp_3072_mod_96_cond(r, r, m); + } + } + else { + int i; + sp_digit mp; + + sp_3072_mont_setup(m, &mp); + + /* Convert to Montgomery form. */ + XMEMSET(a, 0, sizeof(sp_digit) * 96); + err = sp_3072_mod_96_cond(a, a, m); + + if (err == MP_OKAY) { + for (i = 31; i >= 0; i--) { + if (e[0] >> i) { + break; + } + } + + XMEMCPY(r, a, sizeof(sp_digit) * 96); + for (i--; i>=0; i--) { + sp_3072_mont_sqr_96(r, r, m, mp); + if (((e[0] >> i) & 1) == 1) { + sp_3072_mont_mul_96(r, r, a, m, mp); + } + } + XMEMSET(&r[96], 0, sizeof(sp_digit) * 96); + sp_3072_mont_reduce_96(r, m, mp); + + for (i = 95; i > 0; i--) { + if (r[i] != m[i]) { + break; + } + } + if (r[i] >= m[i]) { + sp_3072_sub_in_place_96(r, m); + } + } + } + } + + if (err == MP_OKAY) { + sp_3072_to_bin(r, out); + *outLen = 384; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } +#endif + + return err; +} + +#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) + sp_digit* a; + sp_digit* d = NULL; + sp_digit* m; + sp_digit* r; + int err = MP_OKAY; + + (void)pm; + (void)qm; + (void)dpm; + (void)dqm; + (void)qim; + + if (*outLen < 384U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(dm) > 3072) { + err = MP_READ_E; + } + if (inLen > 384) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 3072) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 96 * 4, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) { + err = MEMORY_E; + } + } + if (err == MP_OKAY) { + a = d + 96; + m = a + 192; + r = a; + + sp_3072_from_bin(a, 96, in, inLen); + sp_3072_from_mp(d, 96, dm); + sp_3072_from_mp(m, 96, mm); + err = sp_3072_mod_exp_96(r, a, d, 3072, m, 0); + } + if (err == MP_OKAY) { + sp_3072_to_bin(r, out); + *outLen = 384; + } + + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 96); + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +SP_NOINLINE static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r5, #192\n\t" + "mov r9, r5\n\t" + "mov r8, #0\n\t" + "\n1:\n\t" + "ldr r6, [%[b], r8]\n\t" + "and r6, r6, %[m]\n\t" + "adds r5, %[c], #-1\n\t" + "ldr r5, [%[a], r8]\n\t" + "adcs r5, r5, r6\n\t" + "mov %[c], #0\n\t" + "adcs %[c], %[c], %[c]\n\t" + "str r5, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, r9\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r5", "r6", "r8", "r9" + ); + + return c; +} + +/* RSA private key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * pm First prime. + * qm Second prime. + * dpm First prime's CRT exponent. + * dqm Second prime's CRT exponent. + * qim Inverse of second prime mod p. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 384 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, + mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm, + byte* out, word32* outLen) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit a[96 * 2]; + sp_digit p[48], q[48], dp[48]; + sp_digit tmpa[96], tmpb[96]; +#else + sp_digit* t = NULL; + sp_digit* a; + sp_digit* p; + sp_digit* q; + sp_digit* dp; + sp_digit* tmpa; + sp_digit* tmpb; +#endif + sp_digit* r; + sp_digit* qi; + sp_digit* dq; + sp_digit c; + int err = MP_OKAY; + + (void)dm; + (void)mm; + + if (*outLen < 384) + err = MP_TO_E; + if (err == MP_OKAY && (inLen > 384 || mp_count_bits(mm) != 3072)) + err = MP_READ_E; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 48 * 11, NULL, + DYNAMIC_TYPE_RSA); + if (t == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + a = t; + p = a + 96 * 2; + q = p + 48; + qi = dq = dp = q + 48; + tmpa = qi + 48; + tmpb = tmpa + 96; + + r = t + 96; + } +#else +#endif + + if (err == MP_OKAY) { +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + r = a; + qi = dq = dp; +#endif + sp_3072_from_bin(a, 96, in, inLen); + sp_3072_from_mp(p, 48, pm); + sp_3072_from_mp(q, 48, qm); + sp_3072_from_mp(dp, 48, dpm); + + err = sp_3072_mod_exp_48(tmpa, a, dp, 1536, p, 1); + } + if (err == MP_OKAY) { + sp_3072_from_mp(dq, 48, dqm); + err = sp_3072_mod_exp_48(tmpb, a, dq, 1536, q, 1); + } + + if (err == MP_OKAY) { + c = sp_3072_sub_in_place_48(tmpa, tmpb); + c += sp_3072_cond_add_48(tmpa, tmpa, p, c); + sp_3072_cond_add_48(tmpa, tmpa, p, c); + + sp_3072_from_mp(qi, 48, qim); + sp_3072_mul_48(tmpa, tmpa, qi); + err = sp_3072_mod_48(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { + sp_3072_mul_48(tmpa, q, tmpa); + XMEMSET(&tmpb[48], 0, sizeof(sp_digit) * 48); + sp_3072_add_96(r, tmpb, tmpa); + + sp_3072_to_bin(r, out); + *outLen = 384; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_digit) * 48 * 11); + XFREE(t, NULL, DYNAMIC_TYPE_RSA); + } +#else + XMEMSET(tmpa, 0, sizeof(tmpa)); + XMEMSET(tmpb, 0, sizeof(tmpb)); + XMEMSET(p, 0, sizeof(p)); + XMEMSET(q, 0, sizeof(q)); + XMEMSET(dp, 0, sizeof(dp)); +#endif + + return err; +} +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ +#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */ +#endif /* WOLFSSL_HAVE_SP_RSA */ +#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY)) +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_3072_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (3072 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 32 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 96); + r->used = 96; + mp_clamp(r); +#elif DIGIT_BIT < 32 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 96; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 32) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 32 - s; + } + r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 96; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 32 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 32 - s; + } + else { + s += 32; + } + } + r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ + int err = MP_OKAY; + sp_digit b[192], e[96], m[96]; + sp_digit* r = b; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 3072) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 3072) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 3072) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_3072_from_mp(b, 96, base); + sp_3072_from_mp(e, 96, exp); + sp_3072_from_mp(m, 96, mod); + + err = sp_3072_mod_exp_96(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + err = sp_3072_to_mp(r, res); + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} + +#ifdef WOLFSSL_HAVE_SP_DH + +#ifdef HAVE_FFDHE_3072 +static void sp_3072_lshift_96(sp_digit* r, sp_digit* a, byte n) +{ + __asm__ __volatile__ ( + "mov r6, #31\n\t" + "sub r6, r6, %[n]\n\t" + "add %[a], %[a], #320\n\t" + "add %[r], %[r], #320\n\t" + "ldr r3, [%[a], #60]\n\t" + "lsr r4, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r4, r4, r6\n\t" + "ldr r2, [%[a], #56]\n\t" + "str r4, [%[r], #64]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #52]\n\t" + "str r3, [%[r], #60]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #48]\n\t" + "str r2, [%[r], #56]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #44]\n\t" + "str r4, [%[r], #52]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #40]\n\t" + "str r3, [%[r], #48]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #36]\n\t" + "str r2, [%[r], #44]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #32]\n\t" + "str r4, [%[r], #40]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #28]\n\t" + "str r3, [%[r], #36]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #24]\n\t" + "str r2, [%[r], #32]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #20]\n\t" + "str r4, [%[r], #28]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #16]\n\t" + "str r3, [%[r], #24]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #12]\n\t" + "str r2, [%[r], #20]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #8]\n\t" + "str r4, [%[r], #16]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #4]\n\t" + "str r3, [%[r], #12]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #0]\n\t" + "str r2, [%[r], #8]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r2, [%[a], #60]\n\t" + "str r4, [%[r], #68]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #56]\n\t" + "str r3, [%[r], #64]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #52]\n\t" + "str r2, [%[r], #60]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #48]\n\t" + "str r4, [%[r], #56]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #44]\n\t" + "str r3, [%[r], #52]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #40]\n\t" + "str r2, [%[r], #48]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #36]\n\t" + "str r4, [%[r], #44]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #32]\n\t" + "str r3, [%[r], #40]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #28]\n\t" + "str r2, [%[r], #36]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #24]\n\t" + "str r4, [%[r], #32]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #20]\n\t" + "str r3, [%[r], #28]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #16]\n\t" + "str r2, [%[r], #24]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #12]\n\t" + "str r4, [%[r], #20]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #8]\n\t" + "str r3, [%[r], #16]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #4]\n\t" + "str r2, [%[r], #12]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #0]\n\t" + "str r4, [%[r], #8]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r4, [%[a], #60]\n\t" + "str r3, [%[r], #68]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #56]\n\t" + "str r2, [%[r], #64]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #52]\n\t" + "str r4, [%[r], #60]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #48]\n\t" + "str r3, [%[r], #56]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #44]\n\t" + "str r2, [%[r], #52]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #40]\n\t" + "str r4, [%[r], #48]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #36]\n\t" + "str r3, [%[r], #44]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #32]\n\t" + "str r2, [%[r], #40]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #28]\n\t" + "str r4, [%[r], #36]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #24]\n\t" + "str r3, [%[r], #32]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #20]\n\t" + "str r2, [%[r], #28]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #16]\n\t" + "str r4, [%[r], #24]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #12]\n\t" + "str r3, [%[r], #20]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #8]\n\t" + "str r2, [%[r], #16]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #4]\n\t" + "str r4, [%[r], #12]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #0]\n\t" + "str r3, [%[r], #8]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r3, [%[a], #60]\n\t" + "str r2, [%[r], #68]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #56]\n\t" + "str r4, [%[r], #64]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #52]\n\t" + "str r3, [%[r], #60]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #48]\n\t" + "str r2, [%[r], #56]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #44]\n\t" + "str r4, [%[r], #52]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #40]\n\t" + "str r3, [%[r], #48]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #36]\n\t" + "str r2, [%[r], #44]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #32]\n\t" + "str r4, [%[r], #40]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #28]\n\t" + "str r3, [%[r], #36]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #24]\n\t" + "str r2, [%[r], #32]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #20]\n\t" + "str r4, [%[r], #28]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #16]\n\t" + "str r3, [%[r], #24]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #12]\n\t" + "str r2, [%[r], #20]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #8]\n\t" + "str r4, [%[r], #16]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #4]\n\t" + "str r3, [%[r], #12]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #0]\n\t" + "str r2, [%[r], #8]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r2, [%[a], #60]\n\t" + "str r4, [%[r], #68]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #56]\n\t" + "str r3, [%[r], #64]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #52]\n\t" + "str r2, [%[r], #60]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #48]\n\t" + "str r4, [%[r], #56]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #44]\n\t" + "str r3, [%[r], #52]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #40]\n\t" + "str r2, [%[r], #48]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #36]\n\t" + "str r4, [%[r], #44]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #32]\n\t" + "str r3, [%[r], #40]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #28]\n\t" + "str r2, [%[r], #36]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #24]\n\t" + "str r4, [%[r], #32]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #20]\n\t" + "str r3, [%[r], #28]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #16]\n\t" + "str r2, [%[r], #24]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #12]\n\t" + "str r4, [%[r], #20]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #8]\n\t" + "str r3, [%[r], #16]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #4]\n\t" + "str r2, [%[r], #12]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #0]\n\t" + "str r4, [%[r], #8]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r4, [%[a], #60]\n\t" + "str r3, [%[r], #68]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #56]\n\t" + "str r2, [%[r], #64]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #52]\n\t" + "str r4, [%[r], #60]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #48]\n\t" + "str r3, [%[r], #56]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #44]\n\t" + "str r2, [%[r], #52]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #40]\n\t" + "str r4, [%[r], #48]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #36]\n\t" + "str r3, [%[r], #44]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #32]\n\t" + "str r2, [%[r], #40]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #28]\n\t" + "str r4, [%[r], #36]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #24]\n\t" + "str r3, [%[r], #32]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #20]\n\t" + "str r2, [%[r], #28]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #16]\n\t" + "str r4, [%[r], #24]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #12]\n\t" + "str r3, [%[r], #20]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #8]\n\t" + "str r2, [%[r], #16]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #4]\n\t" + "str r4, [%[r], #12]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #0]\n\t" + "str r3, [%[r], #8]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "str r4, [%[r]]\n\t" + "str r2, [%[r], #4]\n\t" + : + : [r] "r" (r), [a] "r" (a), [n] "r" (n) + : "memory", "r2", "r3", "r4", "r5", "r6" + ); +} + +/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m) + * + * r A single precision number that is the result of the operation. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_3072_mod_exp_2_96(sp_digit* r, const sp_digit* e, int bits, + const sp_digit* m) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit nd[192]; + sp_digit td[97]; +#else + sp_digit* td; +#endif + sp_digit* norm; + sp_digit* tmp; + sp_digit mp = 1; + sp_digit n, o; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 289, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + norm = td; + tmp = td + 192; +#else + norm = nd; + tmp = td; +#endif + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_96(norm, m); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 5; + if (c == 32) { + c = 27; + } + y = (int)(n >> c); + n <<= 32 - c; + sp_3072_lshift_96(r, norm, y); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 27; + n <<= 5; + c = 27; + } + else if (c < 5) { + y = n >> 27; + n = e[i--]; + c = 5 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_3072_mont_sqr_96(r, r, m, mp); + sp_3072_mont_sqr_96(r, r, m, mp); + sp_3072_mont_sqr_96(r, r, m, mp); + sp_3072_mont_sqr_96(r, r, m, mp); + sp_3072_mont_sqr_96(r, r, m, mp); + + sp_3072_lshift_96(r, r, y); + sp_3072_mul_d_96(tmp, norm, r[96]); + r[96] = 0; + o = sp_3072_add_96(r, r, tmp); + sp_3072_cond_sub_96(r, r, m, (sp_digit)0 - o); + } + + XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U); + sp_3072_mont_reduce_96(r, m, mp); + + mask = 0 - (sp_3072_cmp_96(r, m) >= 0); + sp_3072_cond_sub_96(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* HAVE_FFDHE_3072 */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. + * exp Array of bytes that is the exponent. + * expLen Length of data, in bytes, in exponent. + * mod Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 384 bytes long. + * outLen Length, in bytes, of exponentiation result. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen, + mp_int* mod, byte* out, word32* outLen) +{ + int err = MP_OKAY; + sp_digit b[192], e[96], m[96]; + sp_digit* r = b; + word32 i; + + if (mp_count_bits(base) > 3072) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expLen > 384) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 3072) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_3072_from_mp(b, 96, base); + sp_3072_from_bin(e, 96, exp, expLen); + sp_3072_from_mp(m, 96, mod); + + #ifdef HAVE_FFDHE_3072 + if (base->used == 1 && base->dp[0] == 2 && m[95] == (sp_digit)-1) + err = sp_3072_mod_exp_2_96(r, e, expLen * 8, m); + else + #endif + err = sp_3072_mod_exp_96(r, b, e, expLen * 8, m, 0); + + } + + if (err == MP_OKAY) { + sp_3072_to_bin(r, out); + *outLen = 384; + for (i=0; i<384 && out[i] == 0; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} +#endif /* WOLFSSL_HAVE_SP_DH */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ + int err = MP_OKAY; + sp_digit b[96], e[48], m[48]; + sp_digit* r = b; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 1536) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 1536) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 1536) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_3072_from_mp(b, 48, base); + sp_3072_from_mp(e, 48, exp); + sp_3072_from_mp(m, 48, mod); + + err = sp_3072_mod_exp_48(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + XMEMSET(r + 48, 0, sizeof(*r) * 48U); + err = sp_3072_to_mp(r, res); + res->used = mod->used; + mp_clamp(res); + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} + +#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */ + +#endif /* !WOLFSSL_SP_NO_3072 */ + +#ifdef WOLFSSL_SP_4096 +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((sp_digit)a[i]) << s); + if (s >= 24U) { + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (sp_digit)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 32 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 32 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 32U) <= (word32)DIGIT_BIT) { + s += 32U; + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 32) { + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + s = 32 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 512 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_4096_to_bin(sp_digit* r, byte* a) +{ + int i, j, s = 0, b; + + j = 4096 / 8 - 1; + a[j] = 0; + for (i=0; i<128 && j>=0; i++) { + b = 0; + /* lint allow cast of mismatch sp_digit and int */ + a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ + b += 8 - s; + if (j < 0) { + break; + } + while (b < 32) { + a[j--] = (byte)(r[i] >> b); + b += 8; + if (j < 0) { + break; + } + } + s = 8 - (b - 32); + if (j >= 0) { + a[j] = 0; + } + if (s != 0) { + j++; + } + } +} + +#ifndef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_4096_add_64(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c], %[c]\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r8" + ); + + return c; +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "subs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "sbc %[c], %[c], %[c]\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6" + ); + + return c; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c], %[c]\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r8" + ); + + return c; +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_64(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit tmp[64 * 2]; + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r9, r3\n\t" + "mov r12, %[r]\n\t" + "mov r10, %[a]\n\t" + "mov r11, %[b]\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, r10\n\t" + "mov r14, r6\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r5, #0\n\t" + "mov r6, #252\n\t" + "mov %[a], r9\n\t" + "subs %[a], %[a], r6\n\t" + "sbc r6, r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], %[a], r6\n\t" + "mov %[b], r9\n\t" + "sub %[b], %[b], %[a]\n\t" + "add %[a], %[a], r10\n\t" + "add %[b], %[b], r11\n\t" + "\n2:\n\t" + /* Multiply Start */ + "ldr r6, [%[a]]\n\t" + "ldr r8, [%[b]]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, %[r]\n\t" + /* Multiply Done */ + "add %[a], %[a], #4\n\t" + "sub %[b], %[b], #4\n\t" + "cmp %[a], r14\n\t" + "beq 3f\n\t" + "mov r6, r9\n\t" + "add r6, r6, r10\n\t" + "cmp %[a], r6\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r12\n\t" + "mov r8, r9\n\t" + "str r3, [%[r], r8]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "add r8, r8, #4\n\t" + "mov r9, r8\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, #248\n\t" + "cmp r8, r6\n\t" + "ble 1b\n\t" + "str r3, [%[r], r8]\n\t" + "mov %[a], r10\n\t" + "mov %[b], r11\n\t" + : + : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_4096_mask_64(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<64; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 64; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[128]; + sp_digit a1[64]; + sp_digit b1[64]; + sp_digit z2[128]; + sp_digit u, ca, cb; + + ca = sp_2048_add_64(a1, a, &a[64]); + cb = sp_2048_add_64(b1, b, &b[64]); + u = ca & cb; + sp_2048_mul_64(z1, a1, b1); + sp_2048_mul_64(z2, &a[64], &b[64]); + sp_2048_mul_64(z0, a, b); + sp_2048_mask_64(r + 128, a1, 0 - cb); + sp_2048_mask_64(b1, b1, 0 - ca); + u += sp_2048_add_64(r + 128, r + 128, b1); + u += sp_4096_sub_in_place_128(z1, z2); + u += sp_4096_sub_in_place_128(z1, z0); + u += sp_4096_add_128(r + 64, r + 64, z1); + r[192] = u; + XMEMSET(r + 192 + 1, 0, sizeof(sp_digit) * (64 - 1)); + (void)sp_4096_add_128(r + 128, r + 128, z2); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_64(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r5, #0\n\t" + "mov r9, r3\n\t" + "mov r12, %[r]\n\t" + "mov r6, #2\n\t" + "lsl r6, r6, #8\n\t" + "neg r6, r6\n\t" + "add sp, sp, r6\n\t" + "mov r11, sp\n\t" + "mov r10, %[a]\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r6, #252\n\t" + "mov %[a], r9\n\t" + "subs %[a], %[a], r6\n\t" + "sbc r6, r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], %[a], r6\n\t" + "mov r2, r9\n\t" + "sub r2, r2, %[a]\n\t" + "add %[a], %[a], r10\n\t" + "add r2, r2, r10\n\t" + "\n2:\n\t" + "cmp r2, %[a]\n\t" + "beq 4f\n\t" + /* Multiply * 2: Start */ + "ldr r6, [%[a]]\n\t" + "ldr r8, [r2]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, %[r]\n\t" + /* Multiply * 2: Done */ + "bal 5f\n\t" + "\n4:\n\t" + /* Square: Start */ + "ldr r6, [%[a]]\n\t" + "umull r6, r8, r6, r6\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, %[r]\n\t" + /* Square: Done */ + "\n5:\n\t" + "add %[a], %[a], #4\n\t" + "sub r2, r2, #4\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, r10\n\t" + "cmp %[a], r6\n\t" + "beq 3f\n\t" + "cmp %[a], r2\n\t" + "bgt 3f\n\t" + "mov r8, r9\n\t" + "add r8, r8, r10\n\t" + "cmp %[a], r8\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r11\n\t" + "mov r8, r9\n\t" + "str r3, [%[r], r8]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "add r8, r8, #4\n\t" + "mov r9, r8\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, #248\n\t" + "cmp r8, r6\n\t" + "ble 1b\n\t" + "mov %[a], r10\n\t" + "str r3, [%[r], r8]\n\t" + "mov %[r], r12\n\t" + "mov %[a], r11\n\t" + "mov r3, #1\n\t" + "lsl r3, r3, #8\n\t" + "add r3, r3, #252\n\t" + "\n4:\n\t" + "ldr r6, [%[a], r3]\n\t" + "str r6, [%[r], r3]\n\t" + "subs r3, r3, #4\n\t" + "bge 4b\n\t" + "mov r6, #2\n\t" + "lsl r6, r6, #8\n\t" + "add sp, sp, r6\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12" + ); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z2[128]; + sp_digit z1[128]; + sp_digit a1[64]; + sp_digit u; + + u = sp_2048_add_64(a1, a, &a[64]); + sp_2048_sqr_64(z1, a1); + sp_2048_sqr_64(z2, &a[64]); + sp_2048_sqr_64(z0, a); + sp_2048_mask_64(r + 128, a1, 0 - u); + u += sp_2048_add_64(r + 128, r + 128, r + 128); + u += sp_4096_sub_in_place_128(z1, z2); + u += sp_4096_sub_in_place_128(z1, z0); + u += sp_4096_add_128(r + 64, r + 64, z1); + r[192] = u; + XMEMSET(r + 192 + 1, 0, sizeof(sp_digit) * (64 - 1)); + (void)sp_4096_add_128(r + 128, r + 128, z2); +} + +#endif /* !WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r6, %[a]\n\t" + "mov r8, #0\n\t" + "add r6, r6, #512\n\t" + "sub r8, r8, #1\n\t" + "\n1:\n\t" + "adds %[c], %[c], r8\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r]]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #4\n\t" + "add %[b], %[b], #4\n\t" + "add %[r], %[r], #4\n\t" + "cmp %[a], r6\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r8" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into a. (a -= b) + * + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + __asm__ __volatile__ ( + "mov r8, %[a]\n\t" + "add r8, r8, #512\n\t" + "\n1:\n\t" + "mov r5, #0\n\t" + "subs r5, r5, %[c]\n\t" + "ldr r3, [%[a]]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b]]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a]]\n\t" + "str r4, [%[a], #4]\n\t" + "sbc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #8\n\t" + "add %[b], %[b], #8\n\t" + "cmp %[a], r8\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r8" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit tmp[128 * 2]; + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r9, r3\n\t" + "mov r12, %[r]\n\t" + "mov r10, %[a]\n\t" + "mov r11, %[b]\n\t" + "mov r6, #2\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, r10\n\t" + "mov r14, r6\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r5, #0\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, #252\n\t" + "mov %[a], r9\n\t" + "subs %[a], %[a], r6\n\t" + "sbc r6, r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], %[a], r6\n\t" + "mov %[b], r9\n\t" + "sub %[b], %[b], %[a]\n\t" + "add %[a], %[a], r10\n\t" + "add %[b], %[b], r11\n\t" + "\n2:\n\t" + /* Multiply Start */ + "ldr r6, [%[a]]\n\t" + "ldr r8, [%[b]]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, %[r]\n\t" + /* Multiply Done */ + "add %[a], %[a], #4\n\t" + "sub %[b], %[b], #4\n\t" + "cmp %[a], r14\n\t" + "beq 3f\n\t" + "mov r6, r9\n\t" + "add r6, r6, r10\n\t" + "cmp %[a], r6\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r12\n\t" + "mov r8, r9\n\t" + "str r3, [%[r], r8]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "add r8, r8, #4\n\t" + "mov r9, r8\n\t" + "mov r6, #3\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, #248\n\t" + "cmp r8, r6\n\t" + "ble 1b\n\t" + "str r3, [%[r], r8]\n\t" + "mov %[a], r10\n\t" + "mov %[b], r11\n\t" + : + : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r5, #0\n\t" + "mov r9, r3\n\t" + "mov r12, %[r]\n\t" + "mov r6, #4\n\t" + "lsl r6, r6, #8\n\t" + "neg r6, r6\n\t" + "add sp, sp, r6\n\t" + "mov r11, sp\n\t" + "mov r10, %[a]\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, #252\n\t" + "mov %[a], r9\n\t" + "subs %[a], %[a], r6\n\t" + "sbc r6, r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], %[a], r6\n\t" + "mov r2, r9\n\t" + "sub r2, r2, %[a]\n\t" + "add %[a], %[a], r10\n\t" + "add r2, r2, r10\n\t" + "\n2:\n\t" + "cmp r2, %[a]\n\t" + "beq 4f\n\t" + /* Multiply * 2: Start */ + "ldr r6, [%[a]]\n\t" + "ldr r8, [r2]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, %[r]\n\t" + /* Multiply * 2: Done */ + "bal 5f\n\t" + "\n4:\n\t" + /* Square: Start */ + "ldr r6, [%[a]]\n\t" + "umull r6, r8, r6, r6\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, %[r]\n\t" + /* Square: Done */ + "\n5:\n\t" + "add %[a], %[a], #4\n\t" + "sub r2, r2, #4\n\t" + "mov r6, #2\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, r10\n\t" + "cmp %[a], r6\n\t" + "beq 3f\n\t" + "cmp %[a], r2\n\t" + "bgt 3f\n\t" + "mov r8, r9\n\t" + "add r8, r8, r10\n\t" + "cmp %[a], r8\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r11\n\t" + "mov r8, r9\n\t" + "str r3, [%[r], r8]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "add r8, r8, #4\n\t" + "mov r9, r8\n\t" + "mov r6, #3\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, #248\n\t" + "cmp r8, r6\n\t" + "ble 1b\n\t" + "mov %[a], r10\n\t" + "str r3, [%[r], r8]\n\t" + "mov %[r], r12\n\t" + "mov %[a], r11\n\t" + "mov r3, #3\n\t" + "lsl r3, r3, #8\n\t" + "add r3, r3, #252\n\t" + "\n4:\n\t" + "ldr r6, [%[a], r3]\n\t" + "str r6, [%[r], r3]\n\t" + "subs r3, r3, #4\n\t" + "bge 4b\n\t" + "mov r6, #4\n\t" + "lsl r6, r6, #8\n\t" + "add sp, sp, r6\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Caclulate the bottom digit of -1/a mod 2^n. + * + * a A single precision number. + * rho Bottom word of inverse. + */ +static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho) +{ + sp_digit x, b; + + b = a[0]; + x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - b * x; /* here x*a==1 mod 2**8 */ + x *= 2 - b * x; /* here x*a==1 mod 2**16 */ + x *= 2 - b * x; /* here x*a==1 mod 2**32 */ + + /* rho = -1/m mod b */ + *rho = -x; +} + +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +SP_NOINLINE static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a, + sp_digit b) +{ + __asm__ __volatile__ ( + "add r9, %[a], #512\n\t" + /* A[0] * B */ + "ldr r6, [%[a]], #4\n\t" + "umull r5, r3, r6, %[b]\n\t" + "mov r4, #0\n\t" + "str r5, [%[r]], #4\n\t" + /* A[0] * B - Done */ + "\n1:\n\t" + "mov r5, #0\n\t" + /* A[] * B */ + "ldr r6, [%[a]], #4\n\t" + "umull r6, r8, r6, %[b]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[] * B - Done */ + "str r3, [%[r]], #4\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "cmp %[a], r9\n\t" + "blt 1b\n\t" + "str r3, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a) + : [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r8", "r9" + ); +} + +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 4096 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_4096_mont_norm_128(sp_digit* r, const sp_digit* m) +{ + XMEMSET(r, 0, sizeof(sp_digit) * 128); + + /* r = 2^n mod m */ + sp_4096_sub_in_place_128(r, m); +} + +#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */ +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +SP_NOINLINE static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, + const sp_digit* b, sp_digit m) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r5, #2\n\t" + "lsl r5, r5, #8\n\t" + "mov r9, r5\n\t" + "mov r8, #0\n\t" + "\n1:\n\t" + "ldr r6, [%[b], r8]\n\t" + "and r6, r6, %[m]\n\t" + "mov r5, #0\n\t" + "subs r5, r5, %[c]\n\t" + "ldr r5, [%[a], r8]\n\t" + "sbcs r5, r5, r6\n\t" + "sbcs %[c], %[c], %[c]\n\t" + "str r5, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, r9\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r5", "r6", "r8", "r9" + ); + + return c; +} + +/* Reduce the number back to 4096 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_digit ca = 0; + + __asm__ __volatile__ ( + "mov r9, %[mp]\n\t" + "mov r12, %[m]\n\t" + "mov r10, %[a]\n\t" + "mov r4, #0\n\t" + "add r11, r10, #512\n\t" + "\n1:\n\t" + /* mu = a[i] * mp */ + "mov %[mp], r9\n\t" + "ldr %[a], [r10]\n\t" + "mul %[mp], %[mp], %[a]\n\t" + "mov %[m], r12\n\t" + "add r14, r10, #504\n\t" + "\n2:\n\t" + /* a[i+j] += m[j] * mu */ + "ldr %[a], [r10]\n\t" + "mov r5, #0\n\t" + /* Multiply m[j] and mu - Start */ + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" + "adds %[a], %[a], r6\n\t" + "adc r5, r5, r8\n\t" + /* Multiply m[j] and mu - Done */ + "adds r4, r4, %[a]\n\t" + "adc r5, r5, #0\n\t" + "str r4, [r10], #4\n\t" + /* a[i+j+1] += m[j+1] * mu */ + "ldr %[a], [r10]\n\t" + "mov r4, #0\n\t" + /* Multiply m[j] and mu - Start */ + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" + "adds %[a], %[a], r6\n\t" + "adc r4, r4, r8\n\t" + /* Multiply m[j] and mu - Done */ + "adds r5, r5, %[a]\n\t" + "adc r4, r4, #0\n\t" + "str r5, [r10], #4\n\t" + "cmp r10, r14\n\t" + "blt 2b\n\t" + /* a[i+126] += m[126] * mu */ + "ldr %[a], [r10]\n\t" + "mov r5, #0\n\t" + /* Multiply m[j] and mu - Start */ + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" + "adds %[a], %[a], r6\n\t" + "adc r5, r5, r8\n\t" + /* Multiply m[j] and mu - Done */ + "adds r4, r4, %[a]\n\t" + "adc r5, r5, #0\n\t" + "str r4, [r10], #4\n\t" + /* a[i+127] += m[127] * mu */ + "mov r4, %[ca]\n\t" + "mov %[ca], #0\n\t" + /* Multiply m[127] and mu - Start */ + "ldr r8, [%[m]]\n\t" + "umull r6, r8, %[mp], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc %[ca], %[ca], #0\n\t" + /* Multiply m[127] and mu - Done */ + "ldr r6, [r10]\n\t" + "ldr r8, [r10, #4]\n\t" + "adds r6, r6, r5\n\t" + "adcs r8, r8, r4\n\t" + "adc %[ca], %[ca], #0\n\t" + "str r6, [r10]\n\t" + "str r8, [r10, #4]\n\t" + /* Next word in a */ + "sub r10, r10, #504\n\t" + "cmp r10, r11\n\t" + "blt 1b\n\t" + "mov %[a], r10\n\t" + "mov %[m], r12\n\t" + : [ca] "+r" (ca), [a] "+r" (a) + : [m] "r" (m), [mp] "r" (mp) + : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" + ); + + sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - ca); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_mul_128(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_4096_mul_128(r, a, b); + sp_4096_mont_reduce_128(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_sqr_128(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_4096_sqr_128(r, a); + sp_4096_mont_reduce_128(r, m, mp); +} + +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +SP_NOINLINE static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, + sp_digit div) +{ + sp_digit r = 0; + + __asm__ __volatile__ ( + "lsr r6, %[div], #16\n\t" + "add r6, r6, #1\n\t" + "udiv r4, %[d1], r6\n\t" + "lsl r8, r4, #16\n\t" + "umull r4, r5, %[div], r8\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "udiv r5, %[d1], r6\n\t" + "lsl r4, r5, #16\n\t" + "add r8, r8, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "lsl r4, %[d1], #16\n\t" + "orr r4, r4, %[d0], lsr #16\n\t" + "udiv r4, r4, r6\n\t" + "add r8, r8, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "lsl r4, %[d1], #16\n\t" + "orr r4, r4, %[d0], lsr #16\n\t" + "udiv r4, r4, r6\n\t" + "add r8, r8, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "udiv r4, %[d0], %[div]\n\t" + "add r8, r8, r4\n\t" + "mov %[r], r8\n\t" + : [r] "+r" (r) + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) + : "r4", "r5", "r6", "r8" + ); + return r; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_4096_mask_128(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<128; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 128; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +SP_NOINLINE static int32_t sp_4096_cmp_128(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; + + + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mvn r3, r3\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, #252\n\t" + "\n1:\n\t" + "ldr r8, [%[a], r6]\n\t" + "ldr r5, [%[b], r6]\n\t" + "and r8, r8, r3\n\t" + "and r5, r5, r3\n\t" + "mov r4, r8\n\t" + "subs r8, r8, r5\n\t" + "sbc r8, r8, r8\n\t" + "add %[r], %[r], r8\n\t" + "mvn r8, r8\n\t" + "and r3, r3, r8\n\t" + "subs r5, r5, r4\n\t" + "sbc r8, r8, r8\n\t" + "sub %[r], %[r], r8\n\t" + "mvn r8, r8\n\t" + "and r3, r3, r8\n\t" + "sub r6, r6, #4\n\t" + "cmp r6, #0\n\t" + "bge 1b\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "r3", "r4", "r5", "r6", "r8" + ); + + return r; +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_div_128(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[256], t2[129]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[127]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 128); + for (i=127; i>=0; i--) { + r1 = div_4096_word_128(t1[128 + i], t1[128 + i - 1], div); + + sp_4096_mul_d_128(t2, d, r1); + t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2); + t1[128 + i] -= t2[128]; + sp_4096_mask_128(t2, d, t1[128 + i]); + t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], t2); + sp_4096_mask_128(t2, d, t1[128 + i]); + t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], t2); + } + + r1 = sp_4096_cmp_128(t1, d) >= 0; + sp_4096_cond_sub_128(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_mod_128(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_4096_div_128(a, m, NULL, r); +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_div_128_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[256], t2[129]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[127]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 128); + for (i=127; i>=0; i--) { + r1 = div_4096_word_128(t1[128 + i], t1[128 + i - 1], div); + + sp_4096_mul_d_128(t2, d, r1); + t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2); + t1[128 + i] -= t2[128]; + if (t1[128 + i] != 0) { + t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], d); + if (t1[128 + i] != 0) + t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], d); + } + } + + r1 = sp_4096_cmp_128(t1, d) >= 0; + sp_4096_cond_sub_128(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_mod_128_cond(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_4096_div_128_cond(a, m, NULL, r); +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \ + defined(WOLFSSL_HAVE_SP_DH) +#ifdef WOLFSSL_SP_SMALL +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[16][256]; +#else + sp_digit* t[16]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 256, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<16; i++) { + t[i] = td + i * 256; + } +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_128(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 128U); + if (reduceA != 0) { + err = sp_4096_mod_128(t[1] + 128, a, m); + if (err == MP_OKAY) { + err = sp_4096_mod_128(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 128, a, sizeof(sp_digit) * 128); + err = sp_4096_mod_128(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_4096_mont_sqr_128(t[ 2], t[ 1], m, mp); + sp_4096_mont_mul_128(t[ 3], t[ 2], t[ 1], m, mp); + sp_4096_mont_sqr_128(t[ 4], t[ 2], m, mp); + sp_4096_mont_mul_128(t[ 5], t[ 3], t[ 2], m, mp); + sp_4096_mont_sqr_128(t[ 6], t[ 3], m, mp); + sp_4096_mont_mul_128(t[ 7], t[ 4], t[ 3], m, mp); + sp_4096_mont_sqr_128(t[ 8], t[ 4], m, mp); + sp_4096_mont_mul_128(t[ 9], t[ 5], t[ 4], m, mp); + sp_4096_mont_sqr_128(t[10], t[ 5], m, mp); + sp_4096_mont_mul_128(t[11], t[ 6], t[ 5], m, mp); + sp_4096_mont_sqr_128(t[12], t[ 6], m, mp); + sp_4096_mont_mul_128(t[13], t[ 7], t[ 6], m, mp); + sp_4096_mont_sqr_128(t[14], t[ 7], m, mp); + sp_4096_mont_mul_128(t[15], t[ 8], t[ 7], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 4; + if (c == 32) { + c = 28; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 128); + for (; i>=0 || c>=4; ) { + if (c == 0) { + n = e[i--]; + y = n >> 28; + n <<= 4; + c = 28; + } + else if (c < 4) { + y = n >> 28; + n = e[i--]; + c = 4 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + } + + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + + sp_4096_mont_mul_128(r, r, t[y], m, mp); + } + + XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U); + sp_4096_mont_reduce_128(r, m, mp); + + mask = 0 - (sp_4096_cmp_128(r, m) >= 0); + sp_4096_cond_sub_128(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#else +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][256]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 256, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) { + t[i] = td + i * 256; + } +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_128(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 128U); + if (reduceA != 0) { + err = sp_4096_mod_128(t[1] + 128, a, m); + if (err == MP_OKAY) { + err = sp_4096_mod_128(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 128, a, sizeof(sp_digit) * 128); + err = sp_4096_mod_128(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_4096_mont_sqr_128(t[ 2], t[ 1], m, mp); + sp_4096_mont_mul_128(t[ 3], t[ 2], t[ 1], m, mp); + sp_4096_mont_sqr_128(t[ 4], t[ 2], m, mp); + sp_4096_mont_mul_128(t[ 5], t[ 3], t[ 2], m, mp); + sp_4096_mont_sqr_128(t[ 6], t[ 3], m, mp); + sp_4096_mont_mul_128(t[ 7], t[ 4], t[ 3], m, mp); + sp_4096_mont_sqr_128(t[ 8], t[ 4], m, mp); + sp_4096_mont_mul_128(t[ 9], t[ 5], t[ 4], m, mp); + sp_4096_mont_sqr_128(t[10], t[ 5], m, mp); + sp_4096_mont_mul_128(t[11], t[ 6], t[ 5], m, mp); + sp_4096_mont_sqr_128(t[12], t[ 6], m, mp); + sp_4096_mont_mul_128(t[13], t[ 7], t[ 6], m, mp); + sp_4096_mont_sqr_128(t[14], t[ 7], m, mp); + sp_4096_mont_mul_128(t[15], t[ 8], t[ 7], m, mp); + sp_4096_mont_sqr_128(t[16], t[ 8], m, mp); + sp_4096_mont_mul_128(t[17], t[ 9], t[ 8], m, mp); + sp_4096_mont_sqr_128(t[18], t[ 9], m, mp); + sp_4096_mont_mul_128(t[19], t[10], t[ 9], m, mp); + sp_4096_mont_sqr_128(t[20], t[10], m, mp); + sp_4096_mont_mul_128(t[21], t[11], t[10], m, mp); + sp_4096_mont_sqr_128(t[22], t[11], m, mp); + sp_4096_mont_mul_128(t[23], t[12], t[11], m, mp); + sp_4096_mont_sqr_128(t[24], t[12], m, mp); + sp_4096_mont_mul_128(t[25], t[13], t[12], m, mp); + sp_4096_mont_sqr_128(t[26], t[13], m, mp); + sp_4096_mont_mul_128(t[27], t[14], t[13], m, mp); + sp_4096_mont_sqr_128(t[28], t[14], m, mp); + sp_4096_mont_mul_128(t[29], t[15], t[14], m, mp); + sp_4096_mont_sqr_128(t[30], t[15], m, mp); + sp_4096_mont_mul_128(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 5; + if (c == 32) { + c = 27; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 128); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 27; + n <<= 5; + c = 27; + } + else if (c < 5) { + y = n >> 27; + n = e[i--]; + c = 5 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + + sp_4096_mont_mul_128(r, r, t[y], m, mp); + } + + XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U); + sp_4096_mont_reduce_128(r, m, mp); + + mask = 0 - (sp_4096_cmp_128(r, m) >= 0); + sp_4096_cond_sub_128(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* WOLFSSL_SP_SMALL */ +#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */ + +#ifdef WOLFSSL_HAVE_SP_RSA +/* RSA public key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * em Public exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm, + byte* out, word32* outLen) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit a[256], m[128], r[256]; +#else + sp_digit* d = NULL; + sp_digit* a; + sp_digit* m; + sp_digit* r; +#endif + sp_digit *ah; + sp_digit e[1]; + int err = MP_OKAY; + + if (*outLen < 512) + err = MP_TO_E; + if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 512 || + mp_count_bits(mm) != 4096)) + err = MP_READ_E; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 128 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + a = d; + r = a + 128 * 2; + m = r + 128 * 2; + } +#endif + + if (err == MP_OKAY) { + ah = a + 128; + + sp_4096_from_bin(ah, 128, in, inLen); +#if DIGIT_BIT >= 32 + e[0] = em->dp[0]; +#else + e[0] = em->dp[0]; + if (em->used > 1) { + e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + } +#endif + if (e[0] == 0) { + err = MP_EXPTMOD_E; + } + } + if (err == MP_OKAY) { + sp_4096_from_mp(m, 128, mm); + + if (e[0] == 0x3) { + if (err == MP_OKAY) { + sp_4096_sqr_128(r, ah); + err = sp_4096_mod_128_cond(r, r, m); + } + if (err == MP_OKAY) { + sp_4096_mul_128(r, ah, r); + err = sp_4096_mod_128_cond(r, r, m); + } + } + else { + int i; + sp_digit mp; + + sp_4096_mont_setup(m, &mp); + + /* Convert to Montgomery form. */ + XMEMSET(a, 0, sizeof(sp_digit) * 128); + err = sp_4096_mod_128_cond(a, a, m); + + if (err == MP_OKAY) { + for (i = 31; i >= 0; i--) { + if (e[0] >> i) { + break; + } + } + + XMEMCPY(r, a, sizeof(sp_digit) * 128); + for (i--; i>=0; i--) { + sp_4096_mont_sqr_128(r, r, m, mp); + if (((e[0] >> i) & 1) == 1) { + sp_4096_mont_mul_128(r, r, a, m, mp); + } + } + XMEMSET(&r[128], 0, sizeof(sp_digit) * 128); + sp_4096_mont_reduce_128(r, m, mp); + + for (i = 127; i > 0; i--) { + if (r[i] != m[i]) { + break; + } + } + if (r[i] >= m[i]) { + sp_4096_sub_in_place_128(r, m); + } + } + } + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } +#endif + + return err; +} + +#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) + sp_digit* a; + sp_digit* d = NULL; + sp_digit* m; + sp_digit* r; + int err = MP_OKAY; + + (void)pm; + (void)qm; + (void)dpm; + (void)dqm; + (void)qim; + + if (*outLen < 512U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(dm) > 4096) { + err = MP_READ_E; + } + if (inLen > 512) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 128 * 4, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) { + err = MEMORY_E; + } + } + if (err == MP_OKAY) { + a = d + 128; + m = a + 256; + r = a; + + sp_4096_from_bin(a, 128, in, inLen); + sp_4096_from_mp(d, 128, dm); + sp_4096_from_mp(m, 128, mm); + err = sp_4096_mod_exp_128(r, a, d, 4096, m, 0); + } + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + } + + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 128); + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +SP_NOINLINE static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r5, #1\n\t" + "lsl r5, r5, #8\n\t" + "mov r9, r5\n\t" + "mov r8, #0\n\t" + "\n1:\n\t" + "ldr r6, [%[b], r8]\n\t" + "and r6, r6, %[m]\n\t" + "adds r5, %[c], #-1\n\t" + "ldr r5, [%[a], r8]\n\t" + "adcs r5, r5, r6\n\t" + "mov %[c], #0\n\t" + "adcs %[c], %[c], %[c]\n\t" + "str r5, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, r9\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r5", "r6", "r8", "r9" + ); + + return c; +} + +/* RSA private key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * pm First prime. + * qm Second prime. + * dpm First prime's CRT exponent. + * dqm Second prime's CRT exponent. + * qim Inverse of second prime mod p. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, + mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm, + byte* out, word32* outLen) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit a[128 * 2]; + sp_digit p[64], q[64], dp[64]; + sp_digit tmpa[128], tmpb[128]; +#else + sp_digit* t = NULL; + sp_digit* a; + sp_digit* p; + sp_digit* q; + sp_digit* dp; + sp_digit* tmpa; + sp_digit* tmpb; +#endif + sp_digit* r; + sp_digit* qi; + sp_digit* dq; + sp_digit c; + int err = MP_OKAY; + + (void)dm; + (void)mm; + + if (*outLen < 512) + err = MP_TO_E; + if (err == MP_OKAY && (inLen > 512 || mp_count_bits(mm) != 4096)) + err = MP_READ_E; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 11, NULL, + DYNAMIC_TYPE_RSA); + if (t == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + a = t; + p = a + 128 * 2; + q = p + 64; + qi = dq = dp = q + 64; + tmpa = qi + 64; + tmpb = tmpa + 128; + + r = t + 128; + } +#else +#endif + + if (err == MP_OKAY) { +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + r = a; + qi = dq = dp; +#endif + sp_4096_from_bin(a, 128, in, inLen); + sp_4096_from_mp(p, 64, pm); + sp_4096_from_mp(q, 64, qm); + sp_4096_from_mp(dp, 64, dpm); + + err = sp_2048_mod_exp_64(tmpa, a, dp, 2048, p, 1); + } + if (err == MP_OKAY) { + sp_4096_from_mp(dq, 64, dqm); + err = sp_2048_mod_exp_64(tmpb, a, dq, 2048, q, 1); + } + + if (err == MP_OKAY) { + c = sp_2048_sub_in_place_64(tmpa, tmpb); + c += sp_4096_cond_add_64(tmpa, tmpa, p, c); + sp_4096_cond_add_64(tmpa, tmpa, p, c); + + sp_2048_from_mp(qi, 64, qim); + sp_2048_mul_64(tmpa, tmpa, qi); + err = sp_2048_mod_64(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { + sp_2048_mul_64(tmpa, q, tmpa); + XMEMSET(&tmpb[64], 0, sizeof(sp_digit) * 64); + sp_4096_add_128(r, tmpb, tmpa); + + sp_4096_to_bin(r, out); + *outLen = 512; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_digit) * 64 * 11); + XFREE(t, NULL, DYNAMIC_TYPE_RSA); + } +#else + XMEMSET(tmpa, 0, sizeof(tmpa)); + XMEMSET(tmpb, 0, sizeof(tmpb)); + XMEMSET(p, 0, sizeof(p)); + XMEMSET(q, 0, sizeof(q)); + XMEMSET(dp, 0, sizeof(dp)); +#endif + + return err; +} +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ +#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */ +#endif /* WOLFSSL_HAVE_SP_RSA */ +#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY)) +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_4096_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (4096 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 32 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 128); + r->used = 128; + mp_clamp(r); +#elif DIGIT_BIT < 32 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 128; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 32) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 32 - s; + } + r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 128; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 32 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 32 - s; + } + else { + s += 32; + } + } + r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ + int err = MP_OKAY; + sp_digit b[256], e[128], m[128]; + sp_digit* r = b; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_4096_from_mp(b, 128, base); + sp_4096_from_mp(e, 128, exp); + sp_4096_from_mp(m, 128, mod); + + err = sp_4096_mod_exp_128(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + err = sp_4096_to_mp(r, res); + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} + +#ifdef WOLFSSL_HAVE_SP_DH + +#ifdef HAVE_FFDHE_4096 +static void sp_4096_lshift_128(sp_digit* r, sp_digit* a, byte n) +{ + __asm__ __volatile__ ( + "mov r6, #31\n\t" + "sub r6, r6, %[n]\n\t" + "add %[a], %[a], #448\n\t" + "add %[r], %[r], #448\n\t" + "ldr r3, [%[a], #60]\n\t" + "lsr r4, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r4, r4, r6\n\t" + "ldr r2, [%[a], #56]\n\t" + "str r4, [%[r], #64]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #52]\n\t" + "str r3, [%[r], #60]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #48]\n\t" + "str r2, [%[r], #56]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #44]\n\t" + "str r4, [%[r], #52]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #40]\n\t" + "str r3, [%[r], #48]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #36]\n\t" + "str r2, [%[r], #44]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #32]\n\t" + "str r4, [%[r], #40]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #28]\n\t" + "str r3, [%[r], #36]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #24]\n\t" + "str r2, [%[r], #32]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #20]\n\t" + "str r4, [%[r], #28]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #16]\n\t" + "str r3, [%[r], #24]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #12]\n\t" + "str r2, [%[r], #20]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #8]\n\t" + "str r4, [%[r], #16]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #4]\n\t" + "str r3, [%[r], #12]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #0]\n\t" + "str r2, [%[r], #8]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r2, [%[a], #60]\n\t" + "str r4, [%[r], #68]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #56]\n\t" + "str r3, [%[r], #64]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #52]\n\t" + "str r2, [%[r], #60]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #48]\n\t" + "str r4, [%[r], #56]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #44]\n\t" + "str r3, [%[r], #52]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #40]\n\t" + "str r2, [%[r], #48]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #36]\n\t" + "str r4, [%[r], #44]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #32]\n\t" + "str r3, [%[r], #40]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #28]\n\t" + "str r2, [%[r], #36]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #24]\n\t" + "str r4, [%[r], #32]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #20]\n\t" + "str r3, [%[r], #28]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #16]\n\t" + "str r2, [%[r], #24]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #12]\n\t" + "str r4, [%[r], #20]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #8]\n\t" + "str r3, [%[r], #16]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #4]\n\t" + "str r2, [%[r], #12]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #0]\n\t" + "str r4, [%[r], #8]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r4, [%[a], #60]\n\t" + "str r3, [%[r], #68]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #56]\n\t" + "str r2, [%[r], #64]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #52]\n\t" + "str r4, [%[r], #60]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #48]\n\t" + "str r3, [%[r], #56]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #44]\n\t" + "str r2, [%[r], #52]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #40]\n\t" + "str r4, [%[r], #48]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #36]\n\t" + "str r3, [%[r], #44]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #32]\n\t" + "str r2, [%[r], #40]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #28]\n\t" + "str r4, [%[r], #36]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #24]\n\t" + "str r3, [%[r], #32]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #20]\n\t" + "str r2, [%[r], #28]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #16]\n\t" + "str r4, [%[r], #24]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #12]\n\t" + "str r3, [%[r], #20]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #8]\n\t" + "str r2, [%[r], #16]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #4]\n\t" + "str r4, [%[r], #12]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #0]\n\t" + "str r3, [%[r], #8]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r3, [%[a], #60]\n\t" + "str r2, [%[r], #68]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #56]\n\t" + "str r4, [%[r], #64]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #52]\n\t" + "str r3, [%[r], #60]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #48]\n\t" + "str r2, [%[r], #56]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #44]\n\t" + "str r4, [%[r], #52]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #40]\n\t" + "str r3, [%[r], #48]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #36]\n\t" + "str r2, [%[r], #44]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #32]\n\t" + "str r4, [%[r], #40]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #28]\n\t" + "str r3, [%[r], #36]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #24]\n\t" + "str r2, [%[r], #32]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #20]\n\t" + "str r4, [%[r], #28]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #16]\n\t" + "str r3, [%[r], #24]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #12]\n\t" + "str r2, [%[r], #20]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #8]\n\t" + "str r4, [%[r], #16]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #4]\n\t" + "str r3, [%[r], #12]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #0]\n\t" + "str r2, [%[r], #8]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r2, [%[a], #60]\n\t" + "str r4, [%[r], #68]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #56]\n\t" + "str r3, [%[r], #64]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #52]\n\t" + "str r2, [%[r], #60]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #48]\n\t" + "str r4, [%[r], #56]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #44]\n\t" + "str r3, [%[r], #52]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #40]\n\t" + "str r2, [%[r], #48]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #36]\n\t" + "str r4, [%[r], #44]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #32]\n\t" + "str r3, [%[r], #40]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #28]\n\t" + "str r2, [%[r], #36]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #24]\n\t" + "str r4, [%[r], #32]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #20]\n\t" + "str r3, [%[r], #28]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #16]\n\t" + "str r2, [%[r], #24]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #12]\n\t" + "str r4, [%[r], #20]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #8]\n\t" + "str r3, [%[r], #16]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #4]\n\t" + "str r2, [%[r], #12]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #0]\n\t" + "str r4, [%[r], #8]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r4, [%[a], #60]\n\t" + "str r3, [%[r], #68]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #56]\n\t" + "str r2, [%[r], #64]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #52]\n\t" + "str r4, [%[r], #60]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #48]\n\t" + "str r3, [%[r], #56]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #44]\n\t" + "str r2, [%[r], #52]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #40]\n\t" + "str r4, [%[r], #48]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #36]\n\t" + "str r3, [%[r], #44]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #32]\n\t" + "str r2, [%[r], #40]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #28]\n\t" + "str r4, [%[r], #36]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #24]\n\t" + "str r3, [%[r], #32]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #20]\n\t" + "str r2, [%[r], #28]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #16]\n\t" + "str r4, [%[r], #24]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #12]\n\t" + "str r3, [%[r], #20]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #8]\n\t" + "str r2, [%[r], #16]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #4]\n\t" + "str r4, [%[r], #12]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #0]\n\t" + "str r3, [%[r], #8]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r3, [%[a], #60]\n\t" + "str r2, [%[r], #68]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #56]\n\t" + "str r4, [%[r], #64]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #52]\n\t" + "str r3, [%[r], #60]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #48]\n\t" + "str r2, [%[r], #56]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #44]\n\t" + "str r4, [%[r], #52]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #40]\n\t" + "str r3, [%[r], #48]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #36]\n\t" + "str r2, [%[r], #44]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #32]\n\t" + "str r4, [%[r], #40]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #28]\n\t" + "str r3, [%[r], #36]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #24]\n\t" + "str r2, [%[r], #32]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #20]\n\t" + "str r4, [%[r], #28]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #16]\n\t" + "str r3, [%[r], #24]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #12]\n\t" + "str r2, [%[r], #20]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #8]\n\t" + "str r4, [%[r], #16]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #4]\n\t" + "str r3, [%[r], #12]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #0]\n\t" + "str r2, [%[r], #8]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r2, [%[a], #60]\n\t" + "str r4, [%[r], #68]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #56]\n\t" + "str r3, [%[r], #64]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #52]\n\t" + "str r2, [%[r], #60]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #48]\n\t" + "str r4, [%[r], #56]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #44]\n\t" + "str r3, [%[r], #52]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #40]\n\t" + "str r2, [%[r], #48]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #36]\n\t" + "str r4, [%[r], #44]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #32]\n\t" + "str r3, [%[r], #40]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #28]\n\t" + "str r2, [%[r], #36]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #24]\n\t" + "str r4, [%[r], #32]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #20]\n\t" + "str r3, [%[r], #28]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #16]\n\t" + "str r2, [%[r], #24]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #12]\n\t" + "str r4, [%[r], #20]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #8]\n\t" + "str r3, [%[r], #16]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #4]\n\t" + "str r2, [%[r], #12]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #0]\n\t" + "str r4, [%[r], #8]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "str r2, [%[r]]\n\t" + "str r3, [%[r], #4]\n\t" + : + : [r] "r" (r), [a] "r" (a), [n] "r" (n) + : "memory", "r2", "r3", "r4", "r5", "r6" + ); +} + +/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m) + * + * r A single precision number that is the result of the operation. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_2_128(sp_digit* r, const sp_digit* e, int bits, + const sp_digit* m) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit nd[256]; + sp_digit td[129]; +#else + sp_digit* td; +#endif + sp_digit* norm; + sp_digit* tmp; + sp_digit mp = 1; + sp_digit n, o; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 385, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + norm = td; + tmp = td + 256; +#else + norm = nd; + tmp = td; +#endif + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_128(norm, m); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 5; + if (c == 32) { + c = 27; + } + y = (int)(n >> c); + n <<= 32 - c; + sp_4096_lshift_128(r, norm, y); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 27; + n <<= 5; + c = 27; + } + else if (c < 5) { + y = n >> 27; + n = e[i--]; + c = 5 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + + sp_4096_lshift_128(r, r, y); + sp_4096_mul_d_128(tmp, norm, r[128]); + r[128] = 0; + o = sp_4096_add_128(r, r, tmp); + sp_4096_cond_sub_128(r, r, m, (sp_digit)0 - o); + } + + XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U); + sp_4096_mont_reduce_128(r, m, mp); + + mask = 0 - (sp_4096_cmp_128(r, m) >= 0); + sp_4096_cond_sub_128(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* HAVE_FFDHE_4096 */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. + * exp Array of bytes that is the exponent. + * expLen Length of data, in bytes, in exponent. + * mod Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Length, in bytes, of exponentiation result. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_DhExp_4096(mp_int* base, const byte* exp, word32 expLen, + mp_int* mod, byte* out, word32* outLen) +{ + int err = MP_OKAY; + sp_digit b[256], e[128], m[128]; + sp_digit* r = b; + word32 i; + + if (mp_count_bits(base) > 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expLen > 512) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_4096_from_mp(b, 128, base); + sp_4096_from_bin(e, 128, exp, expLen); + sp_4096_from_mp(m, 128, mod); + + #ifdef HAVE_FFDHE_4096 + if (base->used == 1 && base->dp[0] == 2 && m[127] == (sp_digit)-1) + err = sp_4096_mod_exp_2_128(r, e, expLen * 8, m); + else + #endif + err = sp_4096_mod_exp_128(r, b, e, expLen * 8, m, 0); + + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + for (i=0; i<512 && out[i] == 0; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} +#endif /* WOLFSSL_HAVE_SP_DH */ + +#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */ + +#endif /* WOLFSSL_SP_4096 */ + +#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */ +#ifdef WOLFSSL_HAVE_SP_ECC +#ifndef WOLFSSL_SP_NO_256 + +/* Point structure to use. */ +typedef struct sp_point_256 { + sp_digit x[2 * 8]; + sp_digit y[2 * 8]; + sp_digit z[2 * 8]; + int infinity; +} sp_point_256; + +/* The modulus (prime) of the curve P256. */ +static const sp_digit p256_mod[8] = { + 0xffffffff,0xffffffff,0xffffffff,0x00000000,0x00000000,0x00000000, + 0x00000001,0xffffffff +}; +/* The Montogmery normalizer for modulus of the curve P256. */ +static const sp_digit p256_norm_mod[8] = { + 0x00000001,0x00000000,0x00000000,0xffffffff,0xffffffff,0xffffffff, + 0xfffffffe,0x00000000 +}; +/* The Montogmery multiplier for modulus of the curve P256. */ +static const sp_digit p256_mp_mod = 0x00000001; +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +/* The order of the curve P256. */ +static const sp_digit p256_order[8] = { + 0xfc632551,0xf3b9cac2,0xa7179e84,0xbce6faad,0xffffffff,0xffffffff, + 0x00000000,0xffffffff +}; +#endif +/* The order of the curve P256 minus 2. */ +static const sp_digit p256_order2[8] = { + 0xfc63254f,0xf3b9cac2,0xa7179e84,0xbce6faad,0xffffffff,0xffffffff, + 0x00000000,0xffffffff +}; +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montogmery normalizer for order of the curve P256. */ +static const sp_digit p256_norm_order[8] = { + 0x039cdaaf,0x0c46353d,0x58e8617b,0x43190552,0x00000000,0x00000000, + 0xffffffff,0x00000000 +}; +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montogmery multiplier for order of the curve P256. */ +static const sp_digit p256_mp_order = 0xee00bc4f; +#endif +/* The base point of curve P256. */ +static const sp_point_256 p256_base = { + /* X ordinate */ + { + 0xd898c296,0xf4a13945,0x2deb33a0,0x77037d81,0x63a440f2,0xf8bce6e5, + 0xe12c4247,0x6b17d1f2, + 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L + }, + /* Y ordinate */ + { + 0x37bf51f5,0xcbb64068,0x6b315ece,0x2bce3357,0x7c0f9e16,0x8ee7eb4a, + 0xfe1a7f9b,0x4fe342e2, + 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L + }, + /* Z ordinate */ + { + 0x00000001,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000, + 0x00000000,0x00000000, + 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L + }, + /* infinity */ + 0 +}; +#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY) +static const sp_digit p256_b[8] = { + 0x27d2604b,0x3bce3c3e,0xcc53b0f6,0x651d06b0,0x769886bc,0xb3ebbd55, + 0xaa3a93e7,0x5ac635d8 +}; +#endif + +static int sp_256_point_new_ex_8(void* heap, sp_point_256* sp, sp_point_256** p) +{ + int ret = MP_OKAY; + (void)heap; +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + (void)sp; + *p = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC); +#else + *p = sp; +#endif + if (*p == NULL) { + ret = MEMORY_E; + } + return ret; +} + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +/* Allocate memory for point and return error. */ +#define sp_256_point_new_8(heap, sp, p) sp_256_point_new_ex_8((heap), NULL, &(p)) +#else +/* Set pointer to data and return no error. */ +#define sp_256_point_new_8(heap, sp, p) sp_256_point_new_ex_8((heap), &(sp), &(p)) +#endif + + +static void sp_256_point_free_8(sp_point_256* p, int clear, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +/* If valid pointer then clear point data if requested and free data. */ + if (p != NULL) { + if (clear != 0) { + XMEMSET(p, 0, sizeof(*p)); + } + XFREE(p, heap, DYNAMIC_TYPE_ECC); + } +#else +/* Clear point data if requested. */ + if (clear != 0) { + XMEMSET(p, 0, sizeof(*p)); + } +#endif + (void)heap; +} + +/* Multiply a number by Montogmery normalizer mod modulus (prime). + * + * r The resulting Montgomery form number. + * a The number to convert. + * m The modulus (prime). + */ +static int sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + int64_t t[8]; + int64_t a64[8]; + int64_t o; + + (void)m; + + a64[0] = a[0]; + a64[1] = a[1]; + a64[2] = a[2]; + a64[3] = a[3]; + a64[4] = a[4]; + a64[5] = a[5]; + a64[6] = a[6]; + a64[7] = a[7]; + + /* 1 1 0 -1 -1 -1 -1 0 */ + t[0] = 0 + a64[0] + a64[1] - a64[3] - a64[4] - a64[5] - a64[6]; + /* 0 1 1 0 -1 -1 -1 -1 */ + t[1] = 0 + a64[1] + a64[2] - a64[4] - a64[5] - a64[6] - a64[7]; + /* 0 0 1 1 0 -1 -1 -1 */ + t[2] = 0 + a64[2] + a64[3] - a64[5] - a64[6] - a64[7]; + /* -1 -1 0 2 2 1 0 -1 */ + t[3] = 0 - a64[0] - a64[1] + 2 * a64[3] + 2 * a64[4] + a64[5] - a64[7]; + /* 0 -1 -1 0 2 2 1 0 */ + t[4] = 0 - a64[1] - a64[2] + 2 * a64[4] + 2 * a64[5] + a64[6]; + /* 0 0 -1 -1 0 2 2 1 */ + t[5] = 0 - a64[2] - a64[3] + 2 * a64[5] + 2 * a64[6] + a64[7]; + /* -1 -1 0 0 0 1 3 2 */ + t[6] = 0 - a64[0] - a64[1] + a64[5] + 3 * a64[6] + 2 * a64[7]; + /* 1 0 -1 -1 -1 -1 0 3 */ + t[7] = 0 + a64[0] - a64[2] - a64[3] - a64[4] - a64[5] + 3 * a64[7]; + + t[1] += t[0] >> 32; t[0] &= 0xffffffff; + t[2] += t[1] >> 32; t[1] &= 0xffffffff; + t[3] += t[2] >> 32; t[2] &= 0xffffffff; + t[4] += t[3] >> 32; t[3] &= 0xffffffff; + t[5] += t[4] >> 32; t[4] &= 0xffffffff; + t[6] += t[5] >> 32; t[5] &= 0xffffffff; + t[7] += t[6] >> 32; t[6] &= 0xffffffff; + o = t[7] >> 32; t[7] &= 0xffffffff; + t[0] += o; + t[3] -= o; + t[6] -= o; + t[7] += o; + t[1] += t[0] >> 32; t[0] &= 0xffffffff; + t[2] += t[1] >> 32; t[1] &= 0xffffffff; + t[3] += t[2] >> 32; t[2] &= 0xffffffff; + t[4] += t[3] >> 32; t[3] &= 0xffffffff; + t[5] += t[4] >> 32; t[4] &= 0xffffffff; + t[6] += t[5] >> 32; t[5] &= 0xffffffff; + t[7] += t[6] >> 32; t[6] &= 0xffffffff; + r[0] = t[0]; + r[1] = t[1]; + r[2] = t[2]; + r[3] = t[3]; + r[4] = t[4]; + r[5] = t[5]; + r[6] = t[6]; + r[7] = t[7]; + + return MP_OKAY; +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_256_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 32 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 32 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 32U) <= (word32)DIGIT_BIT) { + s += 32U; + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 32) { + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + s = 32 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Convert a point of type ecc_point to type sp_point_256. + * + * p Point of type sp_point_256 (result). + * pm Point of type ecc_point. + */ +static void sp_256_point_from_ecc_point_8(sp_point_256* p, const ecc_point* pm) +{ + XMEMSET(p->x, 0, sizeof(p->x)); + XMEMSET(p->y, 0, sizeof(p->y)); + XMEMSET(p->z, 0, sizeof(p->z)); + sp_256_from_mp(p->x, 8, pm->x); + sp_256_from_mp(p->y, 8, pm->y); + sp_256_from_mp(p->z, 8, pm->z); + p->infinity = 0; +} + +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_256_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (256 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 32 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 8); + r->used = 8; + mp_clamp(r); +#elif DIGIT_BIT < 32 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 8; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 32) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 32 - s; + } + r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 8; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 32 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 32 - s; + } + else { + s += 32; + } + } + r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Convert a point of type sp_point_256 to type ecc_point. + * + * p Point of type sp_point_256. + * pm Point of type ecc_point (result). + * returns MEMORY_E when allocation of memory in ecc_point fails otherwise + * MP_OKAY. + */ +static int sp_256_point_to_ecc_point_8(const sp_point_256* p, ecc_point* pm) +{ + int err; + + err = sp_256_to_mp(p->x, pm->x); + if (err == MP_OKAY) { + err = sp_256_to_mp(p->y, pm->y); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->z, pm->z); + } + + return err; +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_256_mul_8(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit tmp[8]; + + __asm__ __volatile__ ( + /* A[0] * B[0] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r8, [%[b], #0]\n\t" + "umull r3, r4, r6, r8\n\t" + "mov r5, #0\n\t" + "str r3, [%[tmp], #0]\n\t" + "mov r3, #0\n\t" + /* A[0] * B[1] */ + "ldr r8, [%[b], #4]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r8\n\t" + /* A[1] * B[0] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r8, [%[b], #0]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + "str r4, [%[tmp], #4]\n\t" + "mov r4, #0\n\t" + /* A[0] * B[2] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r8, [%[b], #8]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[1] * B[1] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r8, [%[b], #4]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[2] * B[0] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r8, [%[b], #0]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + "str r5, [%[tmp], #8]\n\t" + "mov r5, #0\n\t" + /* A[0] * B[3] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r8, [%[b], #12]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[1] * B[2] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r8, [%[b], #8]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[2] * B[1] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r8, [%[b], #4]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[3] * B[0] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r8, [%[b], #0]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + "str r3, [%[tmp], #12]\n\t" + "mov r3, #0\n\t" + /* A[0] * B[4] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r8, [%[b], #16]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + /* A[1] * B[3] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r8, [%[b], #12]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + /* A[2] * B[2] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r8, [%[b], #8]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + /* A[3] * B[1] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r8, [%[b], #4]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + /* A[4] * B[0] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r8, [%[b], #0]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + "str r4, [%[tmp], #16]\n\t" + "mov r4, #0\n\t" + /* A[0] * B[5] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r8, [%[b], #20]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[1] * B[4] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r8, [%[b], #16]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[2] * B[3] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r8, [%[b], #12]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[3] * B[2] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r8, [%[b], #8]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[4] * B[1] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r8, [%[b], #4]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[5] * B[0] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r8, [%[b], #0]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + "str r5, [%[tmp], #20]\n\t" + "mov r5, #0\n\t" + /* A[0] * B[6] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r8, [%[b], #24]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[1] * B[5] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r8, [%[b], #20]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[2] * B[4] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r8, [%[b], #16]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[3] * B[3] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r8, [%[b], #12]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[4] * B[2] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r8, [%[b], #8]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[5] * B[1] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r8, [%[b], #4]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[6] * B[0] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r8, [%[b], #0]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + "str r3, [%[tmp], #24]\n\t" + "mov r3, #0\n\t" + /* A[0] * B[7] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r8, [%[b], #28]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + /* A[1] * B[6] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r8, [%[b], #24]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + /* A[2] * B[5] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r8, [%[b], #20]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + /* A[3] * B[4] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r8, [%[b], #16]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + /* A[4] * B[3] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r8, [%[b], #12]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + /* A[5] * B[2] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r8, [%[b], #8]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + /* A[6] * B[1] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r8, [%[b], #4]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + /* A[7] * B[0] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r8, [%[b], #0]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + "str r4, [%[tmp], #28]\n\t" + "mov r4, #0\n\t" + /* A[1] * B[7] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r8, [%[b], #28]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[2] * B[6] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r8, [%[b], #24]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[3] * B[5] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r8, [%[b], #20]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[4] * B[4] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r8, [%[b], #16]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[5] * B[3] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r8, [%[b], #12]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[6] * B[2] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r8, [%[b], #8]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[7] * B[1] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r8, [%[b], #4]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + "str r5, [%[r], #32]\n\t" + "mov r5, #0\n\t" + /* A[2] * B[7] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r8, [%[b], #28]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[3] * B[6] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r8, [%[b], #24]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[4] * B[5] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r8, [%[b], #20]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[5] * B[4] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r8, [%[b], #16]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[6] * B[3] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r8, [%[b], #12]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[7] * B[2] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r8, [%[b], #8]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + "str r3, [%[r], #36]\n\t" + "mov r3, #0\n\t" + /* A[3] * B[7] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r8, [%[b], #28]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + /* A[4] * B[6] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r8, [%[b], #24]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + /* A[5] * B[5] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r8, [%[b], #20]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + /* A[6] * B[4] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r8, [%[b], #16]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + /* A[7] * B[3] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r8, [%[b], #12]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + "str r4, [%[r], #40]\n\t" + "mov r4, #0\n\t" + /* A[4] * B[7] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r8, [%[b], #28]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[5] * B[6] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r8, [%[b], #24]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[6] * B[5] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r8, [%[b], #20]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[7] * B[4] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r8, [%[b], #16]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + "str r5, [%[r], #44]\n\t" + "mov r5, #0\n\t" + /* A[5] * B[7] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r8, [%[b], #28]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[6] * B[6] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r8, [%[b], #24]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[7] * B[5] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r8, [%[b], #20]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + "str r3, [%[r], #48]\n\t" + "mov r3, #0\n\t" + /* A[6] * B[7] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r8, [%[b], #28]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + /* A[7] * B[6] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r8, [%[b], #24]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + "str r4, [%[r], #52]\n\t" + "mov r4, #0\n\t" + /* A[7] * B[7] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r8, [%[b], #28]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r8\n\t" + "str r5, [%[r], #56]\n\t" + "str r3, [%[r], #60]\n\t" + /* Transfer tmp to r */ + "ldr r3, [%[tmp], #0]\n\t" + "ldr r4, [%[tmp], #4]\n\t" + "ldr r5, [%[tmp], #8]\n\t" + "ldr r6, [%[tmp], #12]\n\t" + "str r3, [%[r], #0]\n\t" + "str r4, [%[r], #4]\n\t" + "str r5, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" + "ldr r3, [%[tmp], #16]\n\t" + "ldr r4, [%[tmp], #20]\n\t" + "ldr r5, [%[tmp], #24]\n\t" + "ldr r6, [%[tmp], #28]\n\t" + "str r3, [%[r], #16]\n\t" + "str r4, [%[r], #20]\n\t" + "str r5, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp) + : "memory", "r3", "r4", "r5", "r6", "r8" + ); +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +SP_NOINLINE static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a, + const sp_digit* b, sp_digit m) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r5, #32\n\t" + "mov r9, r5\n\t" + "mov r8, #0\n\t" + "\n1:\n\t" + "ldr r6, [%[b], r8]\n\t" + "and r6, r6, %[m]\n\t" + "mov r5, #0\n\t" + "subs r5, r5, %[c]\n\t" + "ldr r5, [%[a], r8]\n\t" + "sbcs r5, r5, r6\n\t" + "sbcs %[c], %[c], %[c]\n\t" + "str r5, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, r9\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r5", "r6", "r8", "r9" + ); + + return c; +} + +/* Reduce the number back to 256 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + (void)mp; + (void)m; + + __asm__ __volatile__ ( + "mov r2, #0\n\t" + "mov r1, #0\n\t" + /* i = 0 */ + "mov r9, r2\n\t" + "\n1:\n\t" + "mov r4, #0\n\t" + /* mu = a[i] * 1 (mp) = a[i] */ + "ldr r3, [%[a]]\n\t" + /* a[i] += -1 * mu = -1 * a[i] => a[i] = 0 no carry */ + /* a[i+1] += -1 * mu */ + "ldr r6, [%[a], #4]\n\t" + "mov r5, #0\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r2\n\t" + "str r4, [%[a], #4]\n\t" + /* a[i+2] += -1 * mu */ + "ldr r6, [%[a], #8]\n\t" + "mov r4, #0\n\t" + "adds r5, r5, r6\n\t" + "adc r4, r4, r2\n\t" + "str r5, [%[a], #8]\n\t" + /* a[i+3] += 0 * mu */ + "ldr r6, [%[a], #12]\n\t" + "mov r5, #0\n\t" + "adds r4, r4, r3\n\t" + "adc r5, r5, r2\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r2\n\t" + "str r4, [%[a], #12]\n\t" + /* a[i+4] += 0 * mu */ + "ldr r6, [%[a], #16]\n\t" + "mov r4, #0\n\t" + "adds r5, r5, r6\n\t" + "adc r4, r4, r2\n\t" + "str r5, [%[a], #16]\n\t" + /* a[i+5] += 0 * mu */ + "ldr r6, [%[a], #20]\n\t" + "mov r5, #0\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r2\n\t" + "str r4, [%[a], #20]\n\t" + /* a[i+6] += 1 * mu */ + "ldr r6, [%[a], #24]\n\t" + "mov r4, #0\n\t" + "adds r5, r5, r3\n\t" + "adc r4, r4, r2\n\t" + "adds r5, r5, r6\n\t" + "adc r4, r4, r2\n\t" + "str r5, [%[a], #24]\n\t" + /* a[i+7] += -1 * mu */ + "ldr r6, [%[a], #28]\n\t" + "ldr r8, [%[a], #32]\n\t" + "adds r5, r1, r3\n\t" + "mov r1, #0\n\t" + "adc r1, r1, r2\n\t" + "subs r4, r4, r3\n\t" + "sbcs r5, r5, r2\n\t" + "sbc r1, r1, r2\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r1, r1, r2\n\t" + "str r4, [%[a], #28]\n\t" + "str r5, [%[a], #32]\n\t" + /* i += 1 */ + "add r9, r9, #1\n\t" + "add %[a], %[a], #4\n\t" + "mov r6, #8\n\t" + "cmp r9, r6\n\t" + "blt 1b\n\t" + "sub %[a], %[a], #32\n\t" + "mov r3, r1\n\t" + "sub r1, r1, #1\n\t" + "mvn r1, r1\n\t" + "ldr r4, [%[a],#32]\n\t" + "ldr r5, [%[a],#36]\n\t" + "ldr r6, [%[a],#40]\n\t" + "ldr r8, [%[a],#44]\n\t" + "subs r4, r4, r1\n\t" + "sbcs r5, r5, r1\n\t" + "sbcs r6, r6, r1\n\t" + "sbcs r8, r8, r2\n\t" + "str r4, [%[a],#0]\n\t" + "str r5, [%[a],#4]\n\t" + "str r6, [%[a],#8]\n\t" + "str r8, [%[a],#12]\n\t" + "ldr r4, [%[a],#48]\n\t" + "ldr r5, [%[a],#52]\n\t" + "ldr r6, [%[a],#56]\n\t" + "ldr r8, [%[a],#60]\n\t" + "sbcs r4, r4, r2\n\t" + "sbcs r5, r5, r2\n\t" + "sbcs r6, r6, r3\n\t" + "sbc r8, r8, r1\n\t" + "str r4, [%[a],#16]\n\t" + "str r5, [%[a],#20]\n\t" + "str r6, [%[a],#24]\n\t" + "str r8, [%[a],#28]\n\t" + : [a] "+r" (a) + : + : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r8", "r9" + ); + + + (void)m; + (void)mp; +} + +/* Reduce the number back to 256 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_256_mont_reduce_order_8(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_digit ca = 0; + + __asm__ __volatile__ ( + "mov r9, %[mp]\n\t" + "mov r12, %[m]\n\t" + "mov r10, %[a]\n\t" + "mov r4, #0\n\t" + "add r11, r10, #32\n\t" + "\n1:\n\t" + /* mu = a[i] * mp */ + "mov %[mp], r9\n\t" + "ldr %[a], [r10]\n\t" + "mul %[mp], %[mp], %[a]\n\t" + "mov %[m], r12\n\t" + "add r14, r10, #24\n\t" + "\n2:\n\t" + /* a[i+j] += m[j] * mu */ + "ldr %[a], [r10]\n\t" + "mov r5, #0\n\t" + /* Multiply m[j] and mu - Start */ + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" + "adds %[a], %[a], r6\n\t" + "adc r5, r5, r8\n\t" + /* Multiply m[j] and mu - Done */ + "adds r4, r4, %[a]\n\t" + "adc r5, r5, #0\n\t" + "str r4, [r10], #4\n\t" + /* a[i+j+1] += m[j+1] * mu */ + "ldr %[a], [r10]\n\t" + "mov r4, #0\n\t" + /* Multiply m[j] and mu - Start */ + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" + "adds %[a], %[a], r6\n\t" + "adc r4, r4, r8\n\t" + /* Multiply m[j] and mu - Done */ + "adds r5, r5, %[a]\n\t" + "adc r4, r4, #0\n\t" + "str r5, [r10], #4\n\t" + "cmp r10, r14\n\t" + "blt 2b\n\t" + /* a[i+6] += m[6] * mu */ + "ldr %[a], [r10]\n\t" + "mov r5, #0\n\t" + /* Multiply m[j] and mu - Start */ + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" + "adds %[a], %[a], r6\n\t" + "adc r5, r5, r8\n\t" + /* Multiply m[j] and mu - Done */ + "adds r4, r4, %[a]\n\t" + "adc r5, r5, #0\n\t" + "str r4, [r10], #4\n\t" + /* a[i+7] += m[7] * mu */ + "mov r4, %[ca]\n\t" + "mov %[ca], #0\n\t" + /* Multiply m[7] and mu - Start */ + "ldr r8, [%[m]]\n\t" + "umull r6, r8, %[mp], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc %[ca], %[ca], #0\n\t" + /* Multiply m[7] and mu - Done */ + "ldr r6, [r10]\n\t" + "ldr r8, [r10, #4]\n\t" + "adds r6, r6, r5\n\t" + "adcs r8, r8, r4\n\t" + "adc %[ca], %[ca], #0\n\t" + "str r6, [r10]\n\t" + "str r8, [r10, #4]\n\t" + /* Next word in a */ + "sub r10, r10, #24\n\t" + "cmp r10, r11\n\t" + "blt 1b\n\t" + "mov %[a], r10\n\t" + "mov %[m], r12\n\t" + : [ca] "+r" (ca), [a] "+r" (a) + : [m] "r" (m), [mp] "r" (mp) + : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" + ); + + sp_256_cond_sub_8(a - 8, a, m, (sp_digit)0 - ca); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_256_mul_8(r, a, b); + sp_256_mont_reduce_8(r, m, mp); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) +{ + sp_digit tmp[8]; + __asm__ __volatile__ ( + /* A[0] * A[0] */ + "ldr r6, [%[a], #0]\n\t" + "umull r3, r4, r6, r6\n\t" + "mov r5, #0\n\t" + "str r3, [%[tmp], #0]\n\t" + "mov r3, #0\n\t" + /* A[0] * A[1] */ + "ldr r8, [%[a], #4]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + "str r4, [%[tmp], #4]\n\t" + "mov r4, #0\n\t" + /* A[0] * A[2] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[1] * A[1] */ + "ldr r6, [%[a], #4]\n\t" + "umull r6, r8, r6, r6\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + "str r5, [%[tmp], #8]\n\t" + "mov r5, #0\n\t" + /* A[0] * A[3] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r9, r10, r6, r8\n\t" + "mov r11, #0\n\t" + /* A[1] * A[2] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + "adds r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adc r11, r11, r11\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, r10\n\t" + "adc r5, r5, r11\n\t" + "str r3, [%[tmp], #12]\n\t" + "mov r3, #0\n\t" + /* A[0] * A[4] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r9, r10, r6, r8\n\t" + "mov r11, #0\n\t" + /* A[1] * A[3] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + /* A[2] * A[2] */ + "ldr r6, [%[a], #8]\n\t" + "umull r6, r8, r6, r6\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + "adds r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adc r11, r11, r11\n\t" + "adds r4, r4, r9\n\t" + "adcs r5, r5, r10\n\t" + "adc r3, r3, r11\n\t" + "str r4, [%[tmp], #16]\n\t" + "mov r4, #0\n\t" + /* A[0] * A[5] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r9, r10, r6, r8\n\t" + "mov r11, #0\n\t" + /* A[1] * A[4] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + /* A[2] * A[3] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + "adds r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adc r11, r11, r11\n\t" + "adds r5, r5, r9\n\t" + "adcs r3, r3, r10\n\t" + "adc r4, r4, r11\n\t" + "str r5, [%[tmp], #20]\n\t" + "mov r5, #0\n\t" + /* A[0] * A[6] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r9, r10, r6, r8\n\t" + "mov r11, #0\n\t" + /* A[1] * A[5] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + /* A[2] * A[4] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + /* A[3] * A[3] */ + "ldr r6, [%[a], #12]\n\t" + "umull r6, r8, r6, r6\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + "adds r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adc r11, r11, r11\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, r10\n\t" + "adc r5, r5, r11\n\t" + "str r3, [%[tmp], #24]\n\t" + "mov r3, #0\n\t" + /* A[0] * A[7] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r9, r10, r6, r8\n\t" + "mov r11, #0\n\t" + /* A[1] * A[6] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + /* A[2] * A[5] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + /* A[3] * A[4] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + "adds r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adc r11, r11, r11\n\t" + "adds r4, r4, r9\n\t" + "adcs r5, r5, r10\n\t" + "adc r3, r3, r11\n\t" + "str r4, [%[tmp], #28]\n\t" + "mov r4, #0\n\t" + /* A[1] * A[7] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r9, r10, r6, r8\n\t" + "mov r11, #0\n\t" + /* A[2] * A[6] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + /* A[3] * A[5] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + /* A[4] * A[4] */ + "ldr r6, [%[a], #16]\n\t" + "umull r6, r8, r6, r6\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + "adds r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adc r11, r11, r11\n\t" + "adds r5, r5, r9\n\t" + "adcs r3, r3, r10\n\t" + "adc r4, r4, r11\n\t" + "str r5, [%[r], #32]\n\t" + "mov r5, #0\n\t" + /* A[2] * A[7] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r9, r10, r6, r8\n\t" + "mov r11, #0\n\t" + /* A[3] * A[6] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + /* A[4] * A[5] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + "adds r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adc r11, r11, r11\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, r10\n\t" + "adc r5, r5, r11\n\t" + "str r3, [%[r], #36]\n\t" + "mov r3, #0\n\t" + /* A[3] * A[7] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r9, r10, r6, r8\n\t" + "mov r11, #0\n\t" + /* A[4] * A[6] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + /* A[5] * A[5] */ + "ldr r6, [%[a], #20]\n\t" + "umull r6, r8, r6, r6\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + "adds r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adc r11, r11, r11\n\t" + "adds r4, r4, r9\n\t" + "adcs r5, r5, r10\n\t" + "adc r3, r3, r11\n\t" + "str r4, [%[r], #40]\n\t" + "mov r4, #0\n\t" + /* A[4] * A[7] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + /* A[5] * A[6] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r8\n\t" + "adc r4, r4, #0\n\t" + "str r5, [%[r], #44]\n\t" + "mov r5, #0\n\t" + /* A[5] * A[7] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[6] * A[6] */ + "ldr r6, [%[a], #24]\n\t" + "umull r6, r8, r6, r6\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + "str r3, [%[r], #48]\n\t" + "mov r3, #0\n\t" + /* A[6] * A[7] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "adc r3, r3, #0\n\t" + "str r4, [%[r], #52]\n\t" + "mov r4, #0\n\t" + /* A[7] * A[7] */ + "ldr r6, [%[a], #28]\n\t" + "umull r6, r8, r6, r6\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r8\n\t" + "str r5, [%[r], #56]\n\t" + "str r3, [%[r], #60]\n\t" + /* Transfer tmp to r */ + "ldr r3, [%[tmp], #0]\n\t" + "ldr r4, [%[tmp], #4]\n\t" + "ldr r5, [%[tmp], #8]\n\t" + "ldr r6, [%[tmp], #12]\n\t" + "str r3, [%[r], #0]\n\t" + "str r4, [%[r], #4]\n\t" + "str r5, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" + "ldr r3, [%[tmp], #16]\n\t" + "ldr r4, [%[tmp], #20]\n\t" + "ldr r5, [%[tmp], #24]\n\t" + "ldr r6, [%[tmp], #28]\n\t" + "str r3, [%[r], #16]\n\t" + "str r4, [%[r], #20]\n\t" + "str r5, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" + : + : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp) + : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11" + ); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_256_sqr_8(r, a); + sp_256_mont_reduce_8(r, m, mp); +} + +#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY) +/* Square the Montgomery form number a number of times. (r = a ^ n mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * n Number of times to square. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_256_mont_sqr_n_8(sp_digit* r, const sp_digit* a, int n, + const sp_digit* m, sp_digit mp) +{ + sp_256_mont_sqr_8(r, a, m, mp); + for (; n > 1; n--) { + sp_256_mont_sqr_8(r, r, m, mp); + } +} + +#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */ +#ifdef WOLFSSL_SP_SMALL +/* Mod-2 for the P256 curve. */ +static const uint32_t p256_mod_minus_2[8] = { + 0xfffffffdU,0xffffffffU,0xffffffffU,0x00000000U,0x00000000U,0x00000000U, + 0x00000001U,0xffffffffU +}; +#endif /* !WOLFSSL_SP_SMALL */ + +/* Invert the number, in Montgomery form, modulo the modulus (prime) of the + * P256 curve. (r = 1 / a mod m) + * + * r Inverse result. + * a Number to invert. + * td Temporary data. + */ +static void sp_256_mont_inv_8(sp_digit* r, const sp_digit* a, sp_digit* td) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* t = td; + int i; + + XMEMCPY(t, a, sizeof(sp_digit) * 8); + for (i=254; i>=0; i--) { + sp_256_mont_sqr_8(t, t, p256_mod, p256_mp_mod); + if (p256_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32))) + sp_256_mont_mul_8(t, t, a, p256_mod, p256_mp_mod); + } + XMEMCPY(r, t, sizeof(sp_digit) * 8); +#else + sp_digit* t1 = td; + sp_digit* t2 = td + 2 * 8; + sp_digit* t3 = td + 4 * 8; + /* 0x2 */ + sp_256_mont_sqr_8(t1, a, p256_mod, p256_mp_mod); + /* 0x3 */ + sp_256_mont_mul_8(t2, t1, a, p256_mod, p256_mp_mod); + /* 0xc */ + sp_256_mont_sqr_n_8(t1, t2, 2, p256_mod, p256_mp_mod); + /* 0xd */ + sp_256_mont_mul_8(t3, t1, a, p256_mod, p256_mp_mod); + /* 0xf */ + sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xf0 */ + sp_256_mont_sqr_n_8(t1, t2, 4, p256_mod, p256_mp_mod); + /* 0xfd */ + sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod); + /* 0xff */ + sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xff00 */ + sp_256_mont_sqr_n_8(t1, t2, 8, p256_mod, p256_mp_mod); + /* 0xfffd */ + sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod); + /* 0xffff */ + sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xffff0000 */ + sp_256_mont_sqr_n_8(t1, t2, 16, p256_mod, p256_mp_mod); + /* 0xfffffffd */ + sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod); + /* 0xffffffff */ + sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xffffffff00000000 */ + sp_256_mont_sqr_n_8(t1, t2, 32, p256_mod, p256_mp_mod); + /* 0xffffffffffffffff */ + sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xffffffff00000001 */ + sp_256_mont_mul_8(r, t1, a, p256_mod, p256_mp_mod); + /* 0xffffffff000000010000000000000000000000000000000000000000 */ + sp_256_mont_sqr_n_8(r, r, 160, p256_mod, p256_mp_mod); + /* 0xffffffff00000001000000000000000000000000ffffffffffffffff */ + sp_256_mont_mul_8(r, r, t2, p256_mod, p256_mp_mod); + /* 0xffffffff00000001000000000000000000000000ffffffffffffffff00000000 */ + sp_256_mont_sqr_n_8(r, r, 32, p256_mod, p256_mp_mod); + /* 0xffffffff00000001000000000000000000000000fffffffffffffffffffffffd */ + sp_256_mont_mul_8(r, r, t3, p256_mod, p256_mp_mod); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +SP_NOINLINE static int32_t sp_256_cmp_8(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; + + + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mvn r3, r3\n\t" + "mov r6, #28\n\t" + "\n1:\n\t" + "ldr r8, [%[a], r6]\n\t" + "ldr r5, [%[b], r6]\n\t" + "and r8, r8, r3\n\t" + "and r5, r5, r3\n\t" + "mov r4, r8\n\t" + "subs r8, r8, r5\n\t" + "sbc r8, r8, r8\n\t" + "add %[r], %[r], r8\n\t" + "mvn r8, r8\n\t" + "and r3, r3, r8\n\t" + "subs r5, r5, r4\n\t" + "sbc r8, r8, r8\n\t" + "sub %[r], %[r], r8\n\t" + "mvn r8, r8\n\t" + "and r3, r3, r8\n\t" + "sub r6, r6, #4\n\t" + "cmp r6, #0\n\t" + "bge 1b\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "r3", "r4", "r5", "r6", "r8" + ); + + return r; +} + +/* Normalize the values in each word to 32. + * + * a Array of sp_digit to normalize. + */ +#define sp_256_norm_8(a) + +/* Map the Montgomery form projective coordinate point to an affine point. + * + * r Resulting affine coordinate point. + * p Montgomery form projective coordinate point. + * t Temporary ordinate data. + */ +static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*8; + int32_t n; + + sp_256_mont_inv_8(t1, p->z, t + 2*8); + + sp_256_mont_sqr_8(t2, t1, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t1, t2, t1, p256_mod, p256_mp_mod); + + /* x /= z^2 */ + sp_256_mont_mul_8(r->x, p->x, t2, p256_mod, p256_mp_mod); + XMEMSET(r->x + 8, 0, sizeof(r->x) / 2U); + sp_256_mont_reduce_8(r->x, p256_mod, p256_mp_mod); + /* Reduce x to less than modulus */ + n = sp_256_cmp_8(r->x, p256_mod); + sp_256_cond_sub_8(r->x, r->x, p256_mod, 0 - ((n >= 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_256_norm_8(r->x); + + /* y /= z^3 */ + sp_256_mont_mul_8(r->y, p->y, t1, p256_mod, p256_mp_mod); + XMEMSET(r->y + 8, 0, sizeof(r->y) / 2U); + sp_256_mont_reduce_8(r->y, p256_mod, p256_mp_mod); + /* Reduce y to less than modulus */ + n = sp_256_cmp_8(r->y, p256_mod); + sp_256_cond_sub_8(r->y, r->y, p256_mod, 0 - ((n >= 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_256_norm_8(r->y); + + XMEMSET(r->z, 0, sizeof(r->z)); + r->z[0] = 1; + +} + +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r6, %[a]\n\t" + "mov r8, #0\n\t" + "add r6, r6, #32\n\t" + "sub r8, r8, #1\n\t" + "\n1:\n\t" + "adds %[c], %[c], r8\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r]]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #4\n\t" + "add %[b], %[b], #4\n\t" + "add %[r], %[r], #4\n\t" + "cmp %[a], r6\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r8" + ); + + return c; +} + +#else +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c], %[c]\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r8" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Add two Montgomery form numbers (r = a + b % m). + * + * r Result of addition. + * a First number to add in Montogmery form. + * b Second number to add in Montogmery form. + * m Modulus (prime). + */ +SP_NOINLINE static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m) +{ + (void)m; + + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "ldr r4, [%[a],#0]\n\t" + "ldr r5, [%[a],#4]\n\t" + "ldr r6, [%[b],#0]\n\t" + "ldr r8, [%[b],#4]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "str r4, [%[r],#0]\n\t" + "str r5, [%[r],#4]\n\t" + "ldr r4, [%[a],#8]\n\t" + "ldr r5, [%[a],#12]\n\t" + "ldr r6, [%[b],#8]\n\t" + "ldr r8, [%[b],#12]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "str r4, [%[r],#8]\n\t" + "str r5, [%[r],#12]\n\t" + "ldr r4, [%[a],#16]\n\t" + "ldr r5, [%[a],#20]\n\t" + "ldr r6, [%[b],#16]\n\t" + "ldr r8, [%[b],#20]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "mov r9, r4\n\t" + "mov r10, r5\n\t" + "ldr r4, [%[a],#24]\n\t" + "ldr r5, [%[a],#28]\n\t" + "ldr r6, [%[b],#24]\n\t" + "ldr r8, [%[b],#28]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "mov r11, r4\n\t" + "mov r12, r5\n\t" + "adc r3, r3, r3\n\t" + "mov r6, r3\n\t" + "sub r3, r3, #1\n\t" + "mvn r3, r3\n\t" + "mov r8, #0\n\t" + "ldr r4, [%[r],#0]\n\t" + "ldr r5, [%[r],#4]\n\t" + "subs r4, r4, r3\n\t" + "sbcs r5, r5, r3\n\t" + "str r4, [%[r],#0]\n\t" + "str r5, [%[r],#4]\n\t" + "ldr r4, [%[r],#8]\n\t" + "ldr r5, [%[r],#12]\n\t" + "sbcs r4, r4, r3\n\t" + "sbcs r5, r5, r8\n\t" + "str r4, [%[r],#8]\n\t" + "str r5, [%[r],#12]\n\t" + "mov r4, r9\n\t" + "mov r5, r10\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r8\n\t" + "str r4, [%[r],#16]\n\t" + "str r5, [%[r],#20]\n\t" + "mov r4, r11\n\t" + "mov r5, r12\n\t" + "sbcs r4, r4, r6\n\t" + "sbc r5, r5, r3\n\t" + "str r4, [%[r],#24]\n\t" + "str r5, [%[r],#28]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12" + ); +} + +/* Double a Montgomery form number (r = a + a % m). + * + * r Result of doubling. + * a Number to double in Montogmery form. + * m Modulus (prime). + */ +SP_NOINLINE static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + (void)m; + + __asm__ __volatile__ ( + "ldr r4, [%[a],#0]\n\t" + "ldr r5, [%[a],#4]\n\t" + "ldr r6, [%[a],#8]\n\t" + "ldr r8, [%[a],#12]\n\t" + "adds r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r8, r8, r8\n\t" + "str r4, [%[r],#0]\n\t" + "str r5, [%[r],#4]\n\t" + "str r6, [%[r],#8]\n\t" + "str r8, [%[r],#12]\n\t" + "ldr r4, [%[a],#16]\n\t" + "ldr r5, [%[a],#20]\n\t" + "ldr r6, [%[a],#24]\n\t" + "ldr r8, [%[a],#28]\n\t" + "adcs r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r8, r8, r8\n\t" + "mov r9, r4\n\t" + "mov r10, r5\n\t" + "mov r11, r6\n\t" + "mov r12, r8\n\t" + "mov r3, #0\n\t" + "mov r8, #0\n\t" + "adc r3, r3, r3\n\t" + "mov r2, r3\n\t" + "sub r3, r3, #1\n\t" + "mvn r3, r3\n\t" + "ldr r4, [%[r],#0]\n\t" + "ldr r5, [%[r],#4]\n\t" + "ldr r6, [%[r],#8]\n\t" + "subs r4, r4, r3\n\t" + "sbcs r5, r5, r3\n\t" + "sbcs r6, r6, r3\n\t" + "str r4, [%[r],#0]\n\t" + "str r5, [%[r],#4]\n\t" + "str r6, [%[r],#8]\n\t" + "ldr r4, [%[r],#12]\n\t" + "mov r5, r9\n\t" + "mov r6, r10\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r8\n\t" + "sbcs r6, r6, r8\n\t" + "str r4, [%[r],#12]\n\t" + "str r5, [%[r],#16]\n\t" + "str r6, [%[r],#20]\n\t" + "mov r4, r11\n\t" + "mov r5, r12\n\t" + "sbcs r4, r4, r2\n\t" + "sbc r5, r5, r3\n\t" + "str r4, [%[r],#24]\n\t" + "str r5, [%[r],#28]\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r3", "r2", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12" + ); +} + +/* Triple a Montgomery form number (r = a + a + a % m). + * + * r Result of Tripling. + * a Number to triple in Montogmery form. + * m Modulus (prime). + */ +SP_NOINLINE static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + (void)m; + + __asm__ __volatile__ ( + "ldr r2, [%[a],#0]\n\t" + "ldr r3, [%[a],#4]\n\t" + "ldr r4, [%[a],#8]\n\t" + "ldr r5, [%[a],#12]\n\t" + "ldr r6, [%[a],#16]\n\t" + "ldr r8, [%[a],#20]\n\t" + "ldr r9, [%[a],#24]\n\t" + "ldr r10, [%[a],#28]\n\t" + "adds r2, r2, r2\n\t" + "adcs r3, r3, r3\n\t" + "adcs r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "mov r11, #0\n\t" + "mov r14, #0\n\t" + "adc r11, r11, r11\n\t" + "mov r12, r11\n\t" + "sub r11, r11, #1\n\t" + "mvn r11, r11\n\t" + "subs r2, r2, r11\n\t" + "sbcs r3, r3, r11\n\t" + "sbcs r4, r4, r11\n\t" + "sbcs r5, r5, r14\n\t" + "sbcs r6, r6, r14\n\t" + "sbcs r8, r8, r14\n\t" + "sbcs r9, r9, r12\n\t" + "sbc r10, r10, r11\n\t" + "ldr r12, [%[a],#0]\n\t" + "ldr r14, [%[a],#4]\n\t" + "adds r2, r2, r12\n\t" + "adcs r3, r3, r14\n\t" + "ldr r12, [%[a],#8]\n\t" + "ldr r14, [%[a],#12]\n\t" + "adcs r4, r4, r12\n\t" + "adcs r5, r5, r14\n\t" + "ldr r12, [%[a],#16]\n\t" + "ldr r14, [%[a],#20]\n\t" + "adcs r6, r6, r12\n\t" + "adcs r8, r8, r14\n\t" + "ldr r12, [%[a],#24]\n\t" + "ldr r14, [%[a],#28]\n\t" + "adcs r9, r9, r12\n\t" + "adcs r10, r10, r14\n\t" + "mov r11, #0\n\t" + "mov r14, #0\n\t" + "adc r11, r11, r11\n\t" + "mov r12, r11\n\t" + "sub r11, r11, #1\n\t" + "mvn r11, r11\n\t" + "subs r2, r2, r11\n\t" + "str r2, [%[r],#0]\n\t" + "sbcs r3, r3, r11\n\t" + "str r3, [%[r],#4]\n\t" + "sbcs r4, r4, r11\n\t" + "str r4, [%[r],#8]\n\t" + "sbcs r5, r5, r14\n\t" + "str r5, [%[r],#12]\n\t" + "sbcs r6, r6, r14\n\t" + "str r6, [%[r],#16]\n\t" + "sbcs r8, r8, r14\n\t" + "str r8, [%[r],#20]\n\t" + "sbcs r9, r9, r12\n\t" + "str r9, [%[r],#24]\n\t" + "sbc r10, r10, r11\n\t" + "str r10, [%[r],#28]\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r11", "r12", "r14", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10" + ); +} + +/* Subtract two Montgomery form numbers (r = a - b % m). + * + * r Result of subtration. + * a Number to subtract from in Montogmery form. + * b Number to subtract with in Montogmery form. + * m Modulus (prime). + */ +SP_NOINLINE static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m) +{ + (void)m; + + __asm__ __volatile__ ( + "ldr r4, [%[a],#0]\n\t" + "ldr r5, [%[a],#4]\n\t" + "ldr r6, [%[b],#0]\n\t" + "ldr r8, [%[b],#4]\n\t" + "subs r4, r4, r6\n\t" + "sbcs r5, r5, r8\n\t" + "str r4, [%[r],#0]\n\t" + "str r5, [%[r],#4]\n\t" + "ldr r4, [%[a],#8]\n\t" + "ldr r5, [%[a],#12]\n\t" + "ldr r6, [%[b],#8]\n\t" + "ldr r8, [%[b],#12]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r8\n\t" + "str r4, [%[r],#8]\n\t" + "str r5, [%[r],#12]\n\t" + "ldr r4, [%[a],#16]\n\t" + "ldr r5, [%[a],#20]\n\t" + "ldr r6, [%[b],#16]\n\t" + "ldr r8, [%[b],#20]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r8\n\t" + "mov r9, r4\n\t" + "mov r10, r5\n\t" + "ldr r4, [%[a],#24]\n\t" + "ldr r5, [%[a],#28]\n\t" + "ldr r6, [%[b],#24]\n\t" + "ldr r8, [%[b],#28]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r8\n\t" + "mov r11, r4\n\t" + "mov r12, r5\n\t" + "sbc r3, r3, r3\n\t" + "lsr r8, r3, #31\n\t" + "mov r6, #0\n\t" + "ldr r4, [%[r],#0]\n\t" + "ldr r5, [%[r],#4]\n\t" + "adds r4, r4, r3\n\t" + "adcs r5, r5, r3\n\t" + "str r4, [%[r],#0]\n\t" + "str r5, [%[r],#4]\n\t" + "ldr r4, [%[r],#8]\n\t" + "ldr r5, [%[r],#12]\n\t" + "adcs r4, r4, r3\n\t" + "adcs r5, r5, r6\n\t" + "str r4, [%[r],#8]\n\t" + "str r5, [%[r],#12]\n\t" + "mov r4, r9\n\t" + "mov r5, r10\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r6\n\t" + "str r4, [%[r],#16]\n\t" + "str r5, [%[r],#20]\n\t" + "mov r4, r11\n\t" + "mov r5, r12\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, r3\n\t" + "str r4, [%[r],#24]\n\t" + "str r5, [%[r],#28]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12" + ); +} + +/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) + * + * r Result of division by 2. + * a Number to divide. + * m Modulus (prime). + */ +SP_NOINLINE static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + __asm__ __volatile__ ( + "ldr r8, [%[a], #0]\n\t" + "lsl r8, r8, #31\n\t" + "lsr r8, r8, #31\n\t" + "mov r5, #0\n\t" + "sub r5, r5, r8\n\t" + "mov r8, #0\n\t" + "lsl r6, r5, #31\n\t" + "lsr r6, r6, #31\n\t" + "ldr r3, [%[a], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r5\n\t" + "str r3, [%[r], #0]\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r3, [%[a], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "adcs r3, r3, r5\n\t" + "adcs r4, r4, r8\n\t" + "str r3, [%[r], #8]\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r3, [%[a], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "adcs r3, r3, r8\n\t" + "adcs r4, r4, r8\n\t" + "str r3, [%[r], #16]\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r3, [%[a], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "adcs r3, r3, r6\n\t" + "adcs r4, r4, r5\n\t" + "adc r8, r8, r8\n\t" + "lsl r8, r8, #31\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, #31\n\t" + "lsr r6, r4, #1\n\t" + "lsl r4, r4, #31\n\t" + "orr r5, r5, r4\n\t" + "orr r6, r6, r8\n\t" + "mov r8, r3\n\t" + "str r5, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" + "ldr r3, [%[a], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, #31\n\t" + "lsr r6, r4, #1\n\t" + "lsl r4, r4, #31\n\t" + "orr r5, r5, r4\n\t" + "orr r6, r6, r8\n\t" + "mov r8, r3\n\t" + "str r5, [%[r], #16]\n\t" + "str r6, [%[r], #20]\n\t" + "ldr r3, [%[a], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, #31\n\t" + "lsr r6, r4, #1\n\t" + "lsl r4, r4, #31\n\t" + "orr r5, r5, r4\n\t" + "orr r6, r6, r8\n\t" + "mov r8, r3\n\t" + "str r5, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" + "ldr r3, [%[r], #0]\n\t" + "ldr r4, [%[r], #4]\n\t" + "lsr r5, r3, #1\n\t" + "lsr r6, r4, #1\n\t" + "lsl r4, r4, #31\n\t" + "orr r5, r5, r4\n\t" + "orr r6, r6, r8\n\t" + "str r5, [%[r], #0]\n\t" + "str r6, [%[r], #4]\n\t" + : + : [r] "r" (r), [a] "r" (a), [m] "r" (m) + : "memory", "r3", "r4", "r5", "r6", "r8" + ); +} + +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static void sp_256_proj_point_dbl_8(sp_point_256* r, const sp_point_256* p, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*8; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_256_mont_sqr_8(t1, p->z, p256_mod, p256_mp_mod); + /* Z = Y * Z */ + sp_256_mont_mul_8(z, p->y, p->z, p256_mod, p256_mp_mod); + /* Z = 2Z */ + sp_256_mont_dbl_8(z, z, p256_mod); + /* T2 = X - T1 */ + sp_256_mont_sub_8(t2, p->x, t1, p256_mod); + /* T1 = X + T1 */ + sp_256_mont_add_8(t1, p->x, t1, p256_mod); + /* T2 = T1 * T2 */ + sp_256_mont_mul_8(t2, t1, t2, p256_mod, p256_mp_mod); + /* T1 = 3T2 */ + sp_256_mont_tpl_8(t1, t2, p256_mod); + /* Y = 2Y */ + sp_256_mont_dbl_8(y, p->y, p256_mod); + /* Y = Y * Y */ + sp_256_mont_sqr_8(y, y, p256_mod, p256_mp_mod); + /* T2 = Y * Y */ + sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod); + /* T2 = T2/2 */ + sp_256_div2_8(t2, t2, p256_mod); + /* Y = Y * X */ + sp_256_mont_mul_8(y, y, p->x, p256_mod, p256_mp_mod); + /* X = T1 * T1 */ + sp_256_mont_sqr_8(x, t1, p256_mod, p256_mp_mod); + /* X = X - Y */ + sp_256_mont_sub_8(x, x, y, p256_mod); + /* X = X - Y */ + sp_256_mont_sub_8(x, x, y, p256_mod); + /* Y = Y - X */ + sp_256_mont_sub_8(y, y, x, p256_mod); + /* Y = Y * T1 */ + sp_256_mont_mul_8(y, y, t1, p256_mod, p256_mp_mod); + /* Y = Y - T2 */ + sp_256_mont_sub_8(y, y, t2, p256_mod); +} + +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r6, %[a]\n\t" + "add r6, r6, #32\n\t" + "\n1:\n\t" + "mov r5, #0\n\t" + "subs r5, r5, %[c]\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" + "sbcs r4, r4, r5\n\t" + "str r4, [%[r]]\n\t" + "sbc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #4\n\t" + "add %[b], %[b], #4\n\t" + "add %[r], %[r], #4\n\t" + "cmp %[a], r6\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6" + ); + + return c; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[b], #0]\n\t" + "ldr r8, [%[b], #4]\n\t" + "subs r4, r4, r6\n\t" + "sbcs r5, r5, r8\n\t" + "str r4, [%[r], #0]\n\t" + "str r5, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[a], #12]\n\t" + "ldr r6, [%[b], #8]\n\t" + "ldr r8, [%[b], #12]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r8\n\t" + "str r4, [%[r], #8]\n\t" + "str r5, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" + "ldr r6, [%[b], #16]\n\t" + "ldr r8, [%[b], #20]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r8\n\t" + "str r4, [%[r], #16]\n\t" + "str r5, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[a], #28]\n\t" + "ldr r6, [%[b], #24]\n\t" + "ldr r8, [%[b], #28]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r8\n\t" + "str r4, [%[r], #24]\n\t" + "str r5, [%[r], #28]\n\t" + "sbc %[c], %[c], %[c]\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r8" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Compare two numbers to determine if they are equal. + * Constant time implementation. + * + * a First number to compare. + * b Second number to compare. + * returns 1 when equal and 0 otherwise. + */ +static int sp_256_cmp_equal_8(const sp_digit* a, const sp_digit* b) +{ + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) | + (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6]) | (a[7] ^ b[7])) == 0; +} + +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_256_proj_point_add_8(sp_point_256* r, const sp_point_256* p, const sp_point_256* q, + sp_digit* t) +{ + const sp_point_256* ap[2]; + sp_point_256* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*8; + sp_digit* t3 = t + 4*8; + sp_digit* t4 = t + 6*8; + sp_digit* t5 = t + 8*8; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* Ensure only the first point is the same as the result. */ + if (q == r) { + const sp_point_256* a = p; + p = q; + q = a; + } + + /* Check double */ + (void)sp_256_sub_8(t1, p256_mod, q->y); + sp_256_norm_8(t1); + if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & + (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) { + sp_256_proj_point_dbl_8(r, p, t); + } + else { + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point_256)); + x = rp[p->infinity | q->infinity]->x; + y = rp[p->infinity | q->infinity]->y; + z = rp[p->infinity | q->infinity]->z; + + ap[0] = p; + ap[1] = q; + for (i=0; i<8; i++) { + r->x[i] = ap[p->infinity]->x[i]; + } + for (i=0; i<8; i++) { + r->y[i] = ap[p->infinity]->y[i]; + } + for (i=0; i<8; i++) { + r->z[i] = ap[p->infinity]->z[i]; + } + r->infinity = ap[p->infinity]->infinity; + + /* U1 = X1*Z2^2 */ + sp_256_mont_sqr_8(t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t3, t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t1, t1, x, p256_mod, p256_mp_mod); + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_8(t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t4, t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_8(t3, t3, y, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod); + /* H = U2 - U1 */ + sp_256_mont_sub_8(t2, t2, t1, p256_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_8(t4, t4, t3, p256_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_8(z, z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(z, z, t2, p256_mod, p256_mp_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_8(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(x, x, t5, p256_mod); + sp_256_mont_dbl_8(t1, y, p256_mod); + sp_256_mont_sub_8(x, x, t1, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_8(y, y, x, p256_mod); + sp_256_mont_mul_8(y, y, t4, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t5, t5, t3, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(y, y, t5, p256_mod); + } +} + +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_fast_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k, + int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 td[16]; + sp_point_256 rtd; + sp_digit tmpd[2 * 8 * 5]; +#endif + sp_point_256* t; + sp_point_256* rt; + sp_digit* tmp; + sp_digit n; + int i; + int c, y; + int err; + + (void)heap; + + err = sp_256_point_new_8(heap, rtd, rt); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 16, heap, DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; +#else + t = td; + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + /* t[0] = {0, 0, 1} * norm */ + XMEMSET(&t[0], 0, sizeof(t[0])); + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + (void)sp_256_mod_mul_norm_8(t[1].x, g->x, p256_mod); + (void)sp_256_mod_mul_norm_8(t[1].y, g->y, p256_mod); + (void)sp_256_mod_mul_norm_8(t[1].z, g->z, p256_mod); + t[1].infinity = 0; + sp_256_proj_point_dbl_8(&t[ 2], &t[ 1], tmp); + t[ 2].infinity = 0; + sp_256_proj_point_add_8(&t[ 3], &t[ 2], &t[ 1], tmp); + t[ 3].infinity = 0; + sp_256_proj_point_dbl_8(&t[ 4], &t[ 2], tmp); + t[ 4].infinity = 0; + sp_256_proj_point_add_8(&t[ 5], &t[ 3], &t[ 2], tmp); + t[ 5].infinity = 0; + sp_256_proj_point_dbl_8(&t[ 6], &t[ 3], tmp); + t[ 6].infinity = 0; + sp_256_proj_point_add_8(&t[ 7], &t[ 4], &t[ 3], tmp); + t[ 7].infinity = 0; + sp_256_proj_point_dbl_8(&t[ 8], &t[ 4], tmp); + t[ 8].infinity = 0; + sp_256_proj_point_add_8(&t[ 9], &t[ 5], &t[ 4], tmp); + t[ 9].infinity = 0; + sp_256_proj_point_dbl_8(&t[10], &t[ 5], tmp); + t[10].infinity = 0; + sp_256_proj_point_add_8(&t[11], &t[ 6], &t[ 5], tmp); + t[11].infinity = 0; + sp_256_proj_point_dbl_8(&t[12], &t[ 6], tmp); + t[12].infinity = 0; + sp_256_proj_point_add_8(&t[13], &t[ 7], &t[ 6], tmp); + t[13].infinity = 0; + sp_256_proj_point_dbl_8(&t[14], &t[ 7], tmp); + t[14].infinity = 0; + sp_256_proj_point_add_8(&t[15], &t[ 8], &t[ 7], tmp); + t[15].infinity = 0; + + i = 6; + n = k[i+1] << 0; + c = 28; + y = n >> 28; + XMEMCPY(rt, &t[y], sizeof(sp_point_256)); + n <<= 4; + for (; i>=0 || c>=4; ) { + if (c < 4) { + n |= k[i--]; + c += 32; + } + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + + sp_256_proj_point_dbl_8(rt, rt, tmp); + sp_256_proj_point_dbl_8(rt, rt, tmp); + sp_256_proj_point_dbl_8(rt, rt, tmp); + sp_256_proj_point_dbl_8(rt, rt, tmp); + + sp_256_proj_point_add_8(rt, rt, &t[y], tmp); + } + + if (map != 0) { + sp_256_map_8(r, rt, tmp); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_256)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 8 * 5); + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); + } + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_point_256) * 16); + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#else + ForceZero(tmpd, sizeof(tmpd)); + ForceZero(td, sizeof(td)); +#endif + sp_256_point_free_8(rt, 1, heap); + + return err; +} + +/* A table entry for pre-computed points. */ +typedef struct sp_table_entry_256 { + sp_digit x[8]; + sp_digit y[8]; +} sp_table_entry_256; + +#ifdef FP_ECC +/* Double the Montgomery form projective point p a number of times. + * + * r Result of repeated doubling of point. + * p Point to double. + * n Number of times to double + * t Temporary ordinate data. + */ +static void sp_256_proj_point_dbl_n_8(sp_point_256* p, int n, sp_digit* t) +{ + sp_digit* w = t; + sp_digit* a = t + 2*8; + sp_digit* b = t + 4*8; + sp_digit* t1 = t + 6*8; + sp_digit* t2 = t + 8*8; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = p->x; + y = p->y; + z = p->z; + + /* Y = 2*Y */ + sp_256_mont_dbl_8(y, y, p256_mod); + /* W = Z^4 */ + sp_256_mont_sqr_8(w, z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_8(w, w, p256_mod, p256_mp_mod); + +#ifndef WOLFSSL_SP_SMALL + while (--n > 0) +#else + while (--n >= 0) +#endif + { + /* A = 3*(X^2 - W) */ + sp_256_mont_sqr_8(t1, x, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(t1, t1, w, p256_mod); + sp_256_mont_tpl_8(a, t1, p256_mod); + /* B = X*Y^2 */ + sp_256_mont_sqr_8(t1, y, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(b, t1, x, p256_mod, p256_mp_mod); + /* X = A^2 - 2B */ + sp_256_mont_sqr_8(x, a, p256_mod, p256_mp_mod); + sp_256_mont_dbl_8(t2, b, p256_mod); + sp_256_mont_sub_8(x, x, t2, p256_mod); + /* Z = Z*Y */ + sp_256_mont_mul_8(z, z, y, p256_mod, p256_mp_mod); + /* t2 = Y^4 */ + sp_256_mont_sqr_8(t1, t1, p256_mod, p256_mp_mod); +#ifdef WOLFSSL_SP_SMALL + if (n != 0) +#endif + { + /* W = W*Y^4 */ + sp_256_mont_mul_8(w, w, t1, p256_mod, p256_mp_mod); + } + /* y = 2*A*(B - X) - Y^4 */ + sp_256_mont_sub_8(y, b, x, p256_mod); + sp_256_mont_mul_8(y, y, a, p256_mod, p256_mp_mod); + sp_256_mont_dbl_8(y, y, p256_mod); + sp_256_mont_sub_8(y, y, t1, p256_mod); + } +#ifndef WOLFSSL_SP_SMALL + /* A = 3*(X^2 - W) */ + sp_256_mont_sqr_8(t1, x, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(t1, t1, w, p256_mod); + sp_256_mont_tpl_8(a, t1, p256_mod); + /* B = X*Y^2 */ + sp_256_mont_sqr_8(t1, y, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(b, t1, x, p256_mod, p256_mp_mod); + /* X = A^2 - 2B */ + sp_256_mont_sqr_8(x, a, p256_mod, p256_mp_mod); + sp_256_mont_dbl_8(t2, b, p256_mod); + sp_256_mont_sub_8(x, x, t2, p256_mod); + /* Z = Z*Y */ + sp_256_mont_mul_8(z, z, y, p256_mod, p256_mp_mod); + /* t2 = Y^4 */ + sp_256_mont_sqr_8(t1, t1, p256_mod, p256_mp_mod); + /* y = 2*A*(B - X) - Y^4 */ + sp_256_mont_sub_8(y, b, x, p256_mod); + sp_256_mont_mul_8(y, y, a, p256_mod, p256_mp_mod); + sp_256_mont_dbl_8(y, y, p256_mod); + sp_256_mont_sub_8(y, y, t1, p256_mod); +#endif + /* Y = Y/2 */ + sp_256_div2_8(y, y, p256_mod); +} + +/* Convert the projective point to affine. + * Ordinates are in Montgomery form. + * + * a Point to convert. + * t Temporary data. + */ +static void sp_256_proj_to_affine_8(sp_point_256* a, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2 * 8; + sp_digit* tmp = t + 4 * 8; + + sp_256_mont_inv_8(t1, a->z, tmp); + + sp_256_mont_sqr_8(t2, t1, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t1, t2, t1, p256_mod, p256_mp_mod); + + sp_256_mont_mul_8(a->x, a->x, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(a->y, a->y, t1, p256_mod, p256_mp_mod); + XMEMCPY(a->z, p256_norm_mod, sizeof(p256_norm_mod)); +} + +#endif /* FP_ECC */ +/* Add two Montgomery form projective points. The second point has a q value of + * one. + * Only the first point can be the same pointer as the result point. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_256_proj_point_add_qz1_8(sp_point_256* r, const sp_point_256* p, + const sp_point_256* q, sp_digit* t) +{ + const sp_point_256* ap[2]; + sp_point_256* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*8; + sp_digit* t3 = t + 4*8; + sp_digit* t4 = t + 6*8; + sp_digit* t5 = t + 8*8; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* Check double */ + (void)sp_256_sub_8(t1, p256_mod, q->y); + sp_256_norm_8(t1); + if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & + (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) { + sp_256_proj_point_dbl_8(r, p, t); + } + else { + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point_256)); + x = rp[p->infinity | q->infinity]->x; + y = rp[p->infinity | q->infinity]->y; + z = rp[p->infinity | q->infinity]->z; + + ap[0] = p; + ap[1] = q; + for (i=0; i<8; i++) { + r->x[i] = ap[p->infinity]->x[i]; + } + for (i=0; i<8; i++) { + r->y[i] = ap[p->infinity]->y[i]; + } + for (i=0; i<8; i++) { + r->z[i] = ap[p->infinity]->z[i]; + } + r->infinity = ap[p->infinity]->infinity; + + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_8(t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t4, t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod); + /* H = U2 - X1 */ + sp_256_mont_sub_8(t2, t2, x, p256_mod); + /* R = S2 - Y1 */ + sp_256_mont_sub_8(t4, t4, y, p256_mod); + /* Z3 = H*Z1 */ + sp_256_mont_mul_8(z, z, t2, p256_mod, p256_mp_mod); + /* X3 = R^2 - H^3 - 2*X1*H^2 */ + sp_256_mont_sqr_8(t1, t4, p256_mod, p256_mp_mod); + sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t3, x, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(x, t1, t5, p256_mod); + sp_256_mont_dbl_8(t1, t3, p256_mod); + sp_256_mont_sub_8(x, x, t1, p256_mod); + /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */ + sp_256_mont_sub_8(t3, t3, x, p256_mod); + sp_256_mont_mul_8(t3, t3, t4, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t5, t5, y, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(y, t3, t5, p256_mod); + } +} + +#ifdef WOLFSSL_SP_SMALL +#ifdef FP_ECC +/* Generate the pre-computed table of points for the base point. + * + * a The base point. + * table Place to store generated point data. + * tmp Temporary data. + * heap Heap to use for allocation. + */ +static int sp_256_gen_stripe_table_8(const sp_point_256* a, + sp_table_entry_256* table, sp_digit* tmp, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 td, s1d, s2d; +#endif + sp_point_256* t; + sp_point_256* s1 = NULL; + sp_point_256* s2 = NULL; + int i, j; + int err; + + (void)heap; + + err = sp_256_point_new_8(heap, td, t); + if (err == MP_OKAY) { + err = sp_256_point_new_8(heap, s1d, s1); + } + if (err == MP_OKAY) { + err = sp_256_point_new_8(heap, s2d, s2); + } + + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_8(t->x, a->x, p256_mod); + } + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_8(t->y, a->y, p256_mod); + } + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_8(t->z, a->z, p256_mod); + } + if (err == MP_OKAY) { + t->infinity = 0; + sp_256_proj_to_affine_8(t, tmp); + + XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod)); + s1->infinity = 0; + XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod)); + s2->infinity = 0; + + /* table[0] = {0, 0, infinity} */ + XMEMSET(&table[0], 0, sizeof(sp_table_entry_256)); + /* table[1] = Affine version of 'a' in Montgomery form */ + XMEMCPY(table[1].x, t->x, sizeof(table->x)); + XMEMCPY(table[1].y, t->y, sizeof(table->y)); + + for (i=1; i<4; i++) { + sp_256_proj_point_dbl_n_8(t, 64, tmp); + sp_256_proj_to_affine_8(t, tmp); + XMEMCPY(table[1<x, sizeof(table->x)); + XMEMCPY(table[1<y, sizeof(table->y)); + } + + for (i=1; i<4; i++) { + XMEMCPY(s1->x, table[1<x)); + XMEMCPY(s1->y, table[1<y)); + for (j=(1<x, table[j-(1<x)); + XMEMCPY(s2->y, table[j-(1<y)); + sp_256_proj_point_add_qz1_8(t, s1, s2, tmp); + sp_256_proj_to_affine_8(t, tmp); + XMEMCPY(table[j].x, t->x, sizeof(table->x)); + XMEMCPY(table[j].y, t->y, sizeof(table->y)); + } + } + } + + sp_256_point_free_8(s2, 0, heap); + sp_256_point_free_8(s1, 0, heap); + sp_256_point_free_8( t, 0, heap); + + return err; +} + +#endif /* FP_ECC */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_stripe_8(sp_point_256* r, const sp_point_256* g, + const sp_table_entry_256* table, const sp_digit* k, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 rtd; + sp_point_256 pd; + sp_digit td[2 * 8 * 5]; +#endif + sp_point_256* rt; + sp_point_256* p = NULL; + sp_digit* t; + int i, j; + int y, x; + int err; + + (void)g; + (void)heap; + + + err = sp_256_point_new_8(heap, rtd, rt); + if (err == MP_OKAY) { + err = sp_256_point_new_8(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) { + err = MEMORY_E; + } +#else + t = td; +#endif + + if (err == MP_OKAY) { + XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod)); + XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod)); + + y = 0; + for (j=0,x=63; j<4; j++,x+=64) { + y |= ((k[x / 32] >> (x % 32)) & 1) << j; + } + XMEMCPY(rt->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(rt->y, table[y].y, sizeof(table[y].y)); + rt->infinity = !y; + for (i=62; i>=0; i--) { + y = 0; + for (j=0,x=i; j<4; j++,x+=64) { + y |= ((k[x / 32] >> (x % 32)) & 1) << j; + } + + sp_256_proj_point_dbl_8(rt, rt, t); + XMEMCPY(p->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(p->y, table[y].y, sizeof(table[y].y)); + p->infinity = !y; + sp_256_proj_point_add_qz1_8(rt, rt, p, t); + } + + if (map != 0) { + sp_256_map_8(r, rt, t); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_256)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_8(p, 0, heap); + sp_256_point_free_8(rt, 0, heap); + + return err; +} + +#ifdef FP_ECC +#ifndef FP_ENTRIES + #define FP_ENTRIES 16 +#endif + +typedef struct sp_cache_256_t { + sp_digit x[8]; + sp_digit y[8]; + sp_table_entry_256 table[16]; + uint32_t cnt; + int set; +} sp_cache_256_t; + +static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES]; +static THREAD_LS_T int sp_cache_256_last = -1; +static THREAD_LS_T int sp_cache_256_inited = 0; + +#ifndef HAVE_THREAD_LS + static volatile int initCacheMutex_256 = 0; + static wolfSSL_Mutex sp_cache_256_lock; +#endif + +static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache) +{ + int i, j; + uint32_t least; + + if (sp_cache_256_inited == 0) { + for (i=0; ix, sp_cache_256[i].x) & + sp_256_cmp_equal_8(g->y, sp_cache_256[i].y)) { + sp_cache_256[i].cnt++; + break; + } + } + + /* No match. */ + if (i == FP_ENTRIES) { + /* Find empty entry. */ + i = (sp_cache_256_last + 1) % FP_ENTRIES; + for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) { + if (!sp_cache_256[i].set) { + break; + } + } + + /* Evict least used. */ + if (i == sp_cache_256_last) { + least = sp_cache_256[0].cnt; + for (j=1; jx, sizeof(sp_cache_256[i].x)); + XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y)); + sp_cache_256[i].set = 1; + sp_cache_256[i].cnt = 1; + } + + *cache = &sp_cache_256[i]; + sp_cache_256_last = i; +} +#endif /* FP_ECC */ + +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k, + int map, void* heap) +{ +#ifndef FP_ECC + return sp_256_ecc_mulmod_fast_8(r, g, k, map, heap); +#else + sp_digit tmp[2 * 8 * 5]; + sp_cache_256_t* cache; + int err = MP_OKAY; + +#ifndef HAVE_THREAD_LS + if (initCacheMutex_256 == 0) { + wc_InitMutex(&sp_cache_256_lock); + initCacheMutex_256 = 1; + } + if (wc_LockMutex(&sp_cache_256_lock) != 0) + err = BAD_MUTEX_E; +#endif /* HAVE_THREAD_LS */ + + if (err == MP_OKAY) { + sp_ecc_get_cache_256(g, &cache); + if (cache->cnt == 2) + sp_256_gen_stripe_table_8(g, cache->table, tmp, heap); + +#ifndef HAVE_THREAD_LS + wc_UnLockMutex(&sp_cache_256_lock); +#endif /* HAVE_THREAD_LS */ + + if (cache->cnt < 2) { + err = sp_256_ecc_mulmod_fast_8(r, g, k, map, heap); + } + else { + err = sp_256_ecc_mulmod_stripe_8(r, g, cache->table, k, + map, heap); + } + } + + return err; +#endif +} + +#else +#ifdef FP_ECC +/* Generate the pre-computed table of points for the base point. + * + * a The base point. + * table Place to store generated point data. + * tmp Temporary data. + * heap Heap to use for allocation. + */ +static int sp_256_gen_stripe_table_8(const sp_point_256* a, + sp_table_entry_256* table, sp_digit* tmp, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 td, s1d, s2d; +#endif + sp_point_256* t; + sp_point_256* s1 = NULL; + sp_point_256* s2 = NULL; + int i, j; + int err; + + (void)heap; + + err = sp_256_point_new_8(heap, td, t); + if (err == MP_OKAY) { + err = sp_256_point_new_8(heap, s1d, s1); + } + if (err == MP_OKAY) { + err = sp_256_point_new_8(heap, s2d, s2); + } + + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_8(t->x, a->x, p256_mod); + } + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_8(t->y, a->y, p256_mod); + } + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_8(t->z, a->z, p256_mod); + } + if (err == MP_OKAY) { + t->infinity = 0; + sp_256_proj_to_affine_8(t, tmp); + + XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod)); + s1->infinity = 0; + XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod)); + s2->infinity = 0; + + /* table[0] = {0, 0, infinity} */ + XMEMSET(&table[0], 0, sizeof(sp_table_entry_256)); + /* table[1] = Affine version of 'a' in Montgomery form */ + XMEMCPY(table[1].x, t->x, sizeof(table->x)); + XMEMCPY(table[1].y, t->y, sizeof(table->y)); + + for (i=1; i<8; i++) { + sp_256_proj_point_dbl_n_8(t, 32, tmp); + sp_256_proj_to_affine_8(t, tmp); + XMEMCPY(table[1<x, sizeof(table->x)); + XMEMCPY(table[1<y, sizeof(table->y)); + } + + for (i=1; i<8; i++) { + XMEMCPY(s1->x, table[1<x)); + XMEMCPY(s1->y, table[1<y)); + for (j=(1<x, table[j-(1<x)); + XMEMCPY(s2->y, table[j-(1<y)); + sp_256_proj_point_add_qz1_8(t, s1, s2, tmp); + sp_256_proj_to_affine_8(t, tmp); + XMEMCPY(table[j].x, t->x, sizeof(table->x)); + XMEMCPY(table[j].y, t->y, sizeof(table->y)); + } + } + } + + sp_256_point_free_8(s2, 0, heap); + sp_256_point_free_8(s1, 0, heap); + sp_256_point_free_8( t, 0, heap); + + return err; +} + +#endif /* FP_ECC */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_stripe_8(sp_point_256* r, const sp_point_256* g, + const sp_table_entry_256* table, const sp_digit* k, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 rtd; + sp_point_256 pd; + sp_digit td[2 * 8 * 5]; +#endif + sp_point_256* rt; + sp_point_256* p = NULL; + sp_digit* t; + int i, j; + int y, x; + int err; + + (void)g; + (void)heap; + + + err = sp_256_point_new_8(heap, rtd, rt); + if (err == MP_OKAY) { + err = sp_256_point_new_8(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) { + err = MEMORY_E; + } +#else + t = td; +#endif + + if (err == MP_OKAY) { + XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod)); + XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod)); + + y = 0; + for (j=0,x=31; j<8; j++,x+=32) { + y |= ((k[x / 32] >> (x % 32)) & 1) << j; + } + XMEMCPY(rt->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(rt->y, table[y].y, sizeof(table[y].y)); + rt->infinity = !y; + for (i=30; i>=0; i--) { + y = 0; + for (j=0,x=i; j<8; j++,x+=32) { + y |= ((k[x / 32] >> (x % 32)) & 1) << j; + } + + sp_256_proj_point_dbl_8(rt, rt, t); + XMEMCPY(p->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(p->y, table[y].y, sizeof(table[y].y)); + p->infinity = !y; + sp_256_proj_point_add_qz1_8(rt, rt, p, t); + } + + if (map != 0) { + sp_256_map_8(r, rt, t); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_256)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_8(p, 0, heap); + sp_256_point_free_8(rt, 0, heap); + + return err; +} + +#ifdef FP_ECC +#ifndef FP_ENTRIES + #define FP_ENTRIES 16 +#endif + +typedef struct sp_cache_256_t { + sp_digit x[8]; + sp_digit y[8]; + sp_table_entry_256 table[256]; + uint32_t cnt; + int set; +} sp_cache_256_t; + +static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES]; +static THREAD_LS_T int sp_cache_256_last = -1; +static THREAD_LS_T int sp_cache_256_inited = 0; + +#ifndef HAVE_THREAD_LS + static volatile int initCacheMutex_256 = 0; + static wolfSSL_Mutex sp_cache_256_lock; +#endif + +static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache) +{ + int i, j; + uint32_t least; + + if (sp_cache_256_inited == 0) { + for (i=0; ix, sp_cache_256[i].x) & + sp_256_cmp_equal_8(g->y, sp_cache_256[i].y)) { + sp_cache_256[i].cnt++; + break; + } + } + + /* No match. */ + if (i == FP_ENTRIES) { + /* Find empty entry. */ + i = (sp_cache_256_last + 1) % FP_ENTRIES; + for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) { + if (!sp_cache_256[i].set) { + break; + } + } + + /* Evict least used. */ + if (i == sp_cache_256_last) { + least = sp_cache_256[0].cnt; + for (j=1; jx, sizeof(sp_cache_256[i].x)); + XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y)); + sp_cache_256[i].set = 1; + sp_cache_256[i].cnt = 1; + } + + *cache = &sp_cache_256[i]; + sp_cache_256_last = i; +} +#endif /* FP_ECC */ + +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k, + int map, void* heap) +{ +#ifndef FP_ECC + return sp_256_ecc_mulmod_fast_8(r, g, k, map, heap); +#else + sp_digit tmp[2 * 8 * 5]; + sp_cache_256_t* cache; + int err = MP_OKAY; + +#ifndef HAVE_THREAD_LS + if (initCacheMutex_256 == 0) { + wc_InitMutex(&sp_cache_256_lock); + initCacheMutex_256 = 1; + } + if (wc_LockMutex(&sp_cache_256_lock) != 0) + err = BAD_MUTEX_E; +#endif /* HAVE_THREAD_LS */ + + if (err == MP_OKAY) { + sp_ecc_get_cache_256(g, &cache); + if (cache->cnt == 2) + sp_256_gen_stripe_table_8(g, cache->table, tmp, heap); + +#ifndef HAVE_THREAD_LS + wc_UnLockMutex(&sp_cache_256_lock); +#endif /* HAVE_THREAD_LS */ + + if (cache->cnt < 2) { + err = sp_256_ecc_mulmod_fast_8(r, g, k, map, heap); + } + else { + err = sp_256_ecc_mulmod_stripe_8(r, g, cache->table, k, + map, heap); + } + } + + return err; +#endif +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * p Point to multiply. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map, + void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 p; + sp_digit kd[8]; +#endif + sp_point_256* point; + sp_digit* k = NULL; + int err = MP_OKAY; + + err = sp_256_point_new_8(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#else + k = kd; +#endif + if (err == MP_OKAY) { + sp_256_from_mp(k, 8, km); + sp_256_point_from_ecc_point_8(point, gm); + + err = sp_256_ecc_mulmod_8(point, point, k, map, heap); + } + if (err == MP_OKAY) { + err = sp_256_point_to_ecc_point_8(point, r); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_8(point, 0, heap); + + return err; +} + +#ifdef WOLFSSL_SP_SMALL +static const sp_table_entry_256 p256_table[16] = { + /* 0 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 */ + { { 0x18a9143c,0x79e730d4,0x5fedb601,0x75ba95fc,0x77622510,0x79fb732b, + 0xa53755c6,0x18905f76 }, + { 0xce95560a,0xddf25357,0xba19e45c,0x8b4ab8e4,0xdd21f325,0xd2e88688, + 0x25885d85,0x8571ff18 } }, + /* 2 */ + { { 0x16a0d2bb,0x4f922fc5,0x1a623499,0x0d5cc16c,0x57c62c8b,0x9241cf3a, + 0xfd1b667f,0x2f5e6961 }, + { 0xf5a01797,0x5c15c70b,0x60956192,0x3d20b44d,0x071fdb52,0x04911b37, + 0x8d6f0f7b,0xf648f916 } }, + /* 3 */ + { { 0xe137bbbc,0x9e566847,0x8a6a0bec,0xe434469e,0x79d73463,0xb1c42761, + 0x133d0015,0x5abe0285 }, + { 0xc04c7dab,0x92aa837c,0x43260c07,0x573d9f4c,0x78e6cc37,0x0c931562, + 0x6b6f7383,0x94bb725b } }, + /* 4 */ + { { 0xbfe20925,0x62a8c244,0x8fdce867,0x91c19ac3,0xdd387063,0x5a96a5d5, + 0x21d324f6,0x61d587d4 }, + { 0xa37173ea,0xe87673a2,0x53778b65,0x23848008,0x05bab43e,0x10f8441e, + 0x4621efbe,0xfa11fe12 } }, + /* 5 */ + { { 0x2cb19ffd,0x1c891f2b,0xb1923c23,0x01ba8d5b,0x8ac5ca8e,0xb6d03d67, + 0x1f13bedc,0x586eb04c }, + { 0x27e8ed09,0x0c35c6e5,0x1819ede2,0x1e81a33c,0x56c652fa,0x278fd6c0, + 0x70864f11,0x19d5ac08 } }, + /* 6 */ + { { 0xd2b533d5,0x62577734,0xa1bdddc0,0x673b8af6,0xa79ec293,0x577e7c9a, + 0xc3b266b1,0xbb6de651 }, + { 0xb65259b3,0xe7e9303a,0xd03a7480,0xd6a0afd3,0x9b3cfc27,0xc5ac83d1, + 0x5d18b99b,0x60b4619a } }, + /* 7 */ + { { 0x1ae5aa1c,0xbd6a38e1,0x49e73658,0xb8b7652b,0xee5f87ed,0x0b130014, + 0xaeebffcd,0x9d0f27b2 }, + { 0x7a730a55,0xca924631,0xddbbc83a,0x9c955b2f,0xac019a71,0x07c1dfe0, + 0x356ec48d,0x244a566d } }, + /* 8 */ + { { 0xf4f8b16a,0x56f8410e,0xc47b266a,0x97241afe,0x6d9c87c1,0x0a406b8e, + 0xcd42ab1b,0x803f3e02 }, + { 0x04dbec69,0x7f0309a8,0x3bbad05f,0xa83b85f7,0xad8e197f,0xc6097273, + 0x5067adc1,0xc097440e } }, + /* 9 */ + { { 0xc379ab34,0x846a56f2,0x841df8d1,0xa8ee068b,0x176c68ef,0x20314459, + 0x915f1f30,0xf1af32d5 }, + { 0x5d75bd50,0x99c37531,0xf72f67bc,0x837cffba,0x48d7723f,0x0613a418, + 0xe2d41c8b,0x23d0f130 } }, + /* 10 */ + { { 0xd5be5a2b,0xed93e225,0x5934f3c6,0x6fe79983,0x22626ffc,0x43140926, + 0x7990216a,0x50bbb4d9 }, + { 0xe57ec63e,0x378191c6,0x181dcdb2,0x65422c40,0x0236e0f6,0x41a8099b, + 0x01fe49c3,0x2b100118 } }, + /* 11 */ + { { 0x9b391593,0xfc68b5c5,0x598270fc,0xc385f5a2,0xd19adcbb,0x7144f3aa, + 0x83fbae0c,0xdd558999 }, + { 0x74b82ff4,0x93b88b8e,0x71e734c9,0xd2e03c40,0x43c0322a,0x9a7a9eaf, + 0x149d6041,0xe6e4c551 } }, + /* 12 */ + { { 0x80ec21fe,0x5fe14bfe,0xc255be82,0xf6ce116a,0x2f4a5d67,0x98bc5a07, + 0xdb7e63af,0xfad27148 }, + { 0x29ab05b3,0x90c0b6ac,0x4e251ae6,0x37a9a83c,0xc2aade7d,0x0a7dc875, + 0x9f0e1a84,0x77387de3 } }, + /* 13 */ + { { 0xa56c0dd7,0x1e9ecc49,0x46086c74,0xa5cffcd8,0xf505aece,0x8f7a1408, + 0xbef0c47e,0xb37b85c0 }, + { 0xcc0e6a8f,0x3596b6e4,0x6b388f23,0xfd6d4bbf,0xc39cef4e,0xaba453fa, + 0xf9f628d5,0x9c135ac8 } }, + /* 14 */ + { { 0x95c8f8be,0x0a1c7294,0x3bf362bf,0x2961c480,0xdf63d4ac,0x9e418403, + 0x91ece900,0xc109f9cb }, + { 0x58945705,0xc2d095d0,0xddeb85c0,0xb9083d96,0x7a40449b,0x84692b8d, + 0x2eee1ee1,0x9bc3344f } }, + /* 15 */ + { { 0x42913074,0x0d5ae356,0x48a542b1,0x55491b27,0xb310732a,0x469ca665, + 0x5f1a4cc1,0x29591d52 }, + { 0xb84f983f,0xe76f5b6b,0x9f5f84e1,0xbe7eef41,0x80baa189,0x1200d496, + 0x18ef332c,0x6376551f } }, +}; + +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_base_8(sp_point_256* r, const sp_digit* k, + int map, void* heap) +{ + return sp_256_ecc_mulmod_stripe_8(r, &p256_base, p256_table, + k, map, heap); +} + +#else +static const sp_table_entry_256 p256_table[256] = { + /* 0 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 */ + { { 0x18a9143c,0x79e730d4,0x5fedb601,0x75ba95fc,0x77622510,0x79fb732b, + 0xa53755c6,0x18905f76 }, + { 0xce95560a,0xddf25357,0xba19e45c,0x8b4ab8e4,0xdd21f325,0xd2e88688, + 0x25885d85,0x8571ff18 } }, + /* 2 */ + { { 0x4147519a,0x20288602,0x26b372f0,0xd0981eac,0xa785ebc8,0xa9d4a7ca, + 0xdbdf58e9,0xd953c50d }, + { 0xfd590f8f,0x9d6361cc,0x44e6c917,0x72e9626b,0x22eb64cf,0x7fd96110, + 0x9eb288f3,0x863ebb7e } }, + /* 3 */ + { { 0x5cdb6485,0x7856b623,0x2f0a2f97,0x808f0ea2,0x4f7e300b,0x3e68d954, + 0xb5ff80a0,0x00076055 }, + { 0x838d2010,0x7634eb9b,0x3243708a,0x54014fbb,0x842a6606,0xe0e47d39, + 0x34373ee0,0x83087761 } }, + /* 4 */ + { { 0x16a0d2bb,0x4f922fc5,0x1a623499,0x0d5cc16c,0x57c62c8b,0x9241cf3a, + 0xfd1b667f,0x2f5e6961 }, + { 0xf5a01797,0x5c15c70b,0x60956192,0x3d20b44d,0x071fdb52,0x04911b37, + 0x8d6f0f7b,0xf648f916 } }, + /* 5 */ + { { 0xe137bbbc,0x9e566847,0x8a6a0bec,0xe434469e,0x79d73463,0xb1c42761, + 0x133d0015,0x5abe0285 }, + { 0xc04c7dab,0x92aa837c,0x43260c07,0x573d9f4c,0x78e6cc37,0x0c931562, + 0x6b6f7383,0x94bb725b } }, + /* 6 */ + { { 0x720f141c,0xbbf9b48f,0x2df5bc74,0x6199b3cd,0x411045c4,0xdc3f6129, + 0x2f7dc4ef,0xcdd6bbcb }, + { 0xeaf436fd,0xcca6700b,0xb99326be,0x6f647f6d,0x014f2522,0x0c0fa792, + 0x4bdae5f6,0xa361bebd } }, + /* 7 */ + { { 0x597c13c7,0x28aa2558,0x50b7c3e1,0xc38d635f,0xf3c09d1d,0x07039aec, + 0xc4b5292c,0xba12ca09 }, + { 0x59f91dfd,0x9e408fa4,0xceea07fb,0x3af43b66,0x9d780b29,0x1eceb089, + 0x701fef4b,0x53ebb99d } }, + /* 8 */ + { { 0xb0e63d34,0x4fe7ee31,0xa9e54fab,0xf4600572,0xd5e7b5a4,0xc0493334, + 0x06d54831,0x8589fb92 }, + { 0x6583553a,0xaa70f5cc,0xe25649e5,0x0879094a,0x10044652,0xcc904507, + 0x02541c4f,0xebb0696d } }, + /* 9 */ + { { 0xac1647c5,0x4616ca15,0xc4cf5799,0xb8127d47,0x764dfbac,0xdc666aa3, + 0xd1b27da3,0xeb2820cb }, + { 0x6a87e008,0x9406f8d8,0x922378f3,0xd87dfa9d,0x80ccecb2,0x56ed2e42, + 0x55a7da1d,0x1f28289b } }, + /* 10 */ + { { 0x3b89da99,0xabbaa0c0,0xb8284022,0xa6f2d79e,0xb81c05e8,0x27847862, + 0x05e54d63,0x337a4b59 }, + { 0x21f7794a,0x3c67500d,0x7d6d7f61,0x207005b7,0x04cfd6e8,0x0a5a3781, + 0xf4c2fbd6,0x0d65e0d5 } }, + /* 11 */ + { { 0xb5275d38,0xd9d09bbe,0x0be0a358,0x4268a745,0x973eb265,0xf0762ff4, + 0x52f4a232,0xc23da242 }, + { 0x0b94520c,0x5da1b84f,0xb05bd78e,0x09666763,0x94d29ea1,0x3a4dcb86, + 0xc790cff1,0x19de3b8c } }, + /* 12 */ + { { 0x26c5fe04,0x183a716c,0x3bba1bdb,0x3b28de0b,0xa4cb712c,0x7432c586, + 0x91fccbfd,0xe34dcbd4 }, + { 0xaaa58403,0xb408d46b,0x82e97a53,0x9a697486,0x36aaa8af,0x9e390127, + 0x7b4e0f7f,0xe7641f44 } }, + /* 13 */ + { { 0xdf64ba59,0x7d753941,0x0b0242fc,0xd33f10ec,0xa1581859,0x4f06dfc6, + 0x052a57bf,0x4a12df57 }, + { 0x9439dbd0,0xbfa6338f,0xbde53e1f,0xd3c24bd4,0x21f1b314,0xfd5e4ffa, + 0xbb5bea46,0x6af5aa93 } }, + /* 14 */ + { { 0x10c91999,0xda10b699,0x2a580491,0x0a24b440,0xb8cc2090,0x3e0094b4, + 0x66a44013,0x5fe3475a }, + { 0xf93e7b4b,0xb0f8cabd,0x7c23f91a,0x292b501a,0xcd1e6263,0x42e889ae, + 0xecfea916,0xb544e308 } }, + /* 15 */ + { { 0x16ddfdce,0x6478c6e9,0xf89179e6,0x2c329166,0x4d4e67e1,0x4e8d6e76, + 0xa6b0c20b,0xe0b6b2bd }, + { 0xbb7efb57,0x0d312df2,0x790c4007,0x1aac0dde,0x679bc944,0xf90336ad, + 0x25a63774,0x71c023de } }, + /* 16 */ + { { 0xbfe20925,0x62a8c244,0x8fdce867,0x91c19ac3,0xdd387063,0x5a96a5d5, + 0x21d324f6,0x61d587d4 }, + { 0xa37173ea,0xe87673a2,0x53778b65,0x23848008,0x05bab43e,0x10f8441e, + 0x4621efbe,0xfa11fe12 } }, + /* 17 */ + { { 0x2cb19ffd,0x1c891f2b,0xb1923c23,0x01ba8d5b,0x8ac5ca8e,0xb6d03d67, + 0x1f13bedc,0x586eb04c }, + { 0x27e8ed09,0x0c35c6e5,0x1819ede2,0x1e81a33c,0x56c652fa,0x278fd6c0, + 0x70864f11,0x19d5ac08 } }, + /* 18 */ + { { 0x309a4e1f,0x1e99f581,0xe9270074,0xab7de71b,0xefd28d20,0x26a5ef0b, + 0x7f9c563f,0xe7c0073f }, + { 0x0ef59f76,0x1f6d663a,0x20fcb050,0x669b3b54,0x7a6602d4,0xc08c1f7a, + 0xc65b3c0a,0xe08504fe } }, + /* 19 */ + { { 0xa031b3ca,0xf098f68d,0xe6da6d66,0x6d1cab9e,0x94f246e8,0x5bfd81fa, + 0x5b0996b4,0x78f01882 }, + { 0x3a25787f,0xb7eefde4,0x1dccac9b,0x8016f80d,0xb35bfc36,0x0cea4877, + 0x7e94747a,0x43a773b8 } }, + /* 20 */ + { { 0xd2b533d5,0x62577734,0xa1bdddc0,0x673b8af6,0xa79ec293,0x577e7c9a, + 0xc3b266b1,0xbb6de651 }, + { 0xb65259b3,0xe7e9303a,0xd03a7480,0xd6a0afd3,0x9b3cfc27,0xc5ac83d1, + 0x5d18b99b,0x60b4619a } }, + /* 21 */ + { { 0x1ae5aa1c,0xbd6a38e1,0x49e73658,0xb8b7652b,0xee5f87ed,0x0b130014, + 0xaeebffcd,0x9d0f27b2 }, + { 0x7a730a55,0xca924631,0xddbbc83a,0x9c955b2f,0xac019a71,0x07c1dfe0, + 0x356ec48d,0x244a566d } }, + /* 22 */ + { { 0xeacf1f96,0x6db0394a,0x024c271c,0x9f2122a9,0x82cbd3b9,0x2626ac1b, + 0x3581ef69,0x45e58c87 }, + { 0xa38f9dbc,0xd3ff479d,0xe888a040,0xa8aaf146,0x46e0bed7,0x945adfb2, + 0xc1e4b7a4,0xc040e21c } }, + /* 23 */ + { { 0x6f8117b6,0x847af000,0x73a35433,0x651969ff,0x1d9475eb,0x482b3576, + 0x682c6ec7,0x1cdf5c97 }, + { 0x11f04839,0x7db775b4,0x48de1698,0x7dbeacf4,0xb70b3219,0xb2921dd1, + 0xa92dff3d,0x046755f8 } }, + /* 24 */ + { { 0xbce8ffcd,0xcc8ac5d2,0x2fe61a82,0x0d53c48b,0x7202d6c7,0xf6f16172, + 0x3b83a5f3,0x046e5e11 }, + { 0xd8007f01,0xe7b8ff64,0x5af43183,0x7fb1ef12,0x35e1a03c,0x045c5ea6, + 0x303d005b,0x6e0106c3 } }, + /* 25 */ + { { 0x88dd73b1,0x48c73584,0x995ed0d9,0x7670708f,0xc56a2ab7,0x38385ea8, + 0xe901cf1f,0x442594ed }, + { 0x12d4b65b,0xf8faa2c9,0x96c90c37,0x94c2343b,0x5e978d1f,0xd326e4a1, + 0x4c2ee68e,0xa796fa51 } }, + /* 26 */ + { { 0x823addd7,0x359fb604,0xe56693b3,0x9e2a6183,0x3cbf3c80,0xf885b78e, + 0xc69766e9,0xe4ad2da9 }, + { 0x8e048a61,0x357f7f42,0xc092d9a0,0x082d198c,0xc03ed8ef,0xfc3a1af4, + 0xc37b5143,0xc5e94046 } }, + /* 27 */ + { { 0x2be75f9e,0x476a538c,0xcb123a78,0x6fd1a9e8,0xb109c04b,0xd85e4df0, + 0xdb464747,0x63283daf }, + { 0xbaf2df15,0xce728cf7,0x0ad9a7f4,0xe592c455,0xe834bcc3,0xfab226ad, + 0x1981a938,0x68bd19ab } }, + /* 28 */ + { { 0x1887d659,0xc08ead51,0xb359305a,0x3374d5f4,0xcfe74fe3,0x96986981, + 0x3c6fdfd6,0x495292f5 }, + { 0x1acec896,0x4a878c9e,0xec5b4484,0xd964b210,0x664d60a7,0x6696f7e2, + 0x26036837,0x0ec7530d } }, + /* 29 */ + { { 0xad2687bb,0x2da13a05,0xf32e21fa,0xa1f83b6a,0x1dd4607b,0x390f5ef5, + 0x64863f0b,0x0f6207a6 }, + { 0x0f138233,0xbd67e3bb,0x272aa718,0xdd66b96c,0x26ec88ae,0x8ed00407, + 0x08ed6dcf,0xff0db072 } }, + /* 30 */ + { { 0x4c95d553,0x749fa101,0x5d680a8a,0xa44052fd,0xff3b566f,0x183b4317, + 0x88740ea3,0x313b513c }, + { 0x08d11549,0xb402e2ac,0xb4dee21c,0x071ee10b,0x47f2320e,0x26b987dd, + 0x86f19f81,0x2d3abcf9 } }, + /* 31 */ + { { 0x815581a2,0x4c288501,0x632211af,0x9a0a6d56,0x0cab2e99,0x19ba7a0f, + 0xded98cdf,0xc036fa10 }, + { 0xc1fbd009,0x29ae08ba,0x06d15816,0x0b68b190,0x9b9e0d8f,0xc2eb3277, + 0xb6d40194,0xa6b2a2c4 } }, + /* 32 */ + { { 0x6d3549cf,0xd433e50f,0xfacd665e,0x6f33696f,0xce11fcb4,0x695bfdac, + 0xaf7c9860,0x810ee252 }, + { 0x7159bb2c,0x65450fe1,0x758b357b,0xf7dfbebe,0xd69fea72,0x2b057e74, + 0x92731745,0xd485717a } }, + /* 33 */ + { { 0xf0cb5a98,0x11741a8a,0x1f3110bf,0xd3da8f93,0xab382adf,0x1994e2cb, + 0x2f9a604e,0x6a6045a7 }, + { 0xa2b2411d,0x170c0d3f,0x510e96e0,0xbe0eb83e,0x8865b3cc,0x3bcc9f73, + 0xf9e15790,0xd3e45cfa } }, + /* 34 */ + { { 0xe83f7669,0xce1f69bb,0x72877d6b,0x09f8ae82,0x3244278d,0x9548ae54, + 0xe3c2c19c,0x207755de }, + { 0x6fef1945,0x87bd61d9,0xb12d28c3,0x18813cef,0x72df64aa,0x9fbcd1d6, + 0x7154b00d,0x48dc5ee5 } }, + /* 35 */ + { { 0xf7e5a199,0x123790bf,0x989ccbb7,0xe0efb8cf,0x0a519c79,0xc27a2bfe, + 0xdff6f445,0xf2fb0aed }, + { 0xf0b5025f,0x41c09575,0x40fa9f22,0x550543d7,0x380bfbd0,0x8fa3c8ad, + 0xdb28d525,0xa13e9015 } }, + /* 36 */ + { { 0xa2b65cbc,0xf9f7a350,0x2a464226,0x0b04b972,0xe23f07a1,0x265ce241, + 0x1497526f,0x2bf0d6b0 }, + { 0x4b216fb7,0xd3d4dd3f,0xfbdda26a,0xf7d7b867,0x6708505c,0xaeb7b83f, + 0x162fe89f,0x42a94a5a } }, + /* 37 */ + { { 0xeaadf191,0x5846ad0b,0x25a268d7,0x0f8a4890,0x494dc1f6,0xe8603050, + 0xc65ede3d,0x2c2dd969 }, + { 0x93849c17,0x6d02171d,0x1da250dd,0x460488ba,0x3c3a5485,0x4810c706, + 0x42c56dbc,0xf437fa1f } }, + /* 38 */ + { { 0x4a0f7dab,0x6aa0d714,0x1776e9ac,0x0f049793,0xf5f39786,0x52c0a050, + 0x54707aa8,0xaaf45b33 }, + { 0xc18d364a,0x85e37c33,0x3e497165,0xd40b9b06,0x15ec5444,0xf4171681, + 0xf4f272bc,0xcdf6310d } }, + /* 39 */ + { { 0x8ea8b7ef,0x7473c623,0x85bc2287,0x08e93518,0x2bda8e34,0x41956772, + 0xda9e2ff2,0xf0d008ba }, + { 0x2414d3b1,0x2912671d,0xb019ea76,0xb3754985,0x453bcbdb,0x5c61b96d, + 0xca887b8b,0x5bd5c2f5 } }, + /* 40 */ + { { 0xf49a3154,0xef0f469e,0x6e2b2e9a,0x3e85a595,0xaa924a9c,0x45aaec1e, + 0xa09e4719,0xaa12dfc8 }, + { 0x4df69f1d,0x26f27227,0xa2ff5e73,0xe0e4c82c,0xb7a9dd44,0xb9d8ce73, + 0xe48ca901,0x6c036e73 } }, + /* 41 */ + { { 0x0f6e3138,0x5cfae12a,0x25ad345a,0x6966ef00,0x45672bc5,0x8993c64b, + 0x96afbe24,0x292ff658 }, + { 0x5e213402,0xd5250d44,0x4392c9fe,0xf6580e27,0xda1c72e8,0x097b397f, + 0x311b7276,0x644e0c90 } }, + /* 42 */ + { { 0xa47153f0,0xe1e421e1,0x920418c9,0xb86c3b79,0x705d7672,0x93bdce87, + 0xcab79a77,0xf25ae793 }, + { 0x6d869d0c,0x1f3194a3,0x4986c264,0x9d55c882,0x096e945e,0x49fb5ea3, + 0x13db0a3e,0x39b8e653 } }, + /* 43 */ + { { 0xb6fd2e59,0x37754200,0x9255c98f,0x35e2c066,0x0e2a5739,0xd9dab21a, + 0x0f19db06,0x39122f2f }, + { 0x03cad53c,0xcfbce1e0,0xe65c17e3,0x225b2c0f,0x9aa13877,0x72baf1d2, + 0xce80ff8d,0x8de80af8 } }, + /* 44 */ + { { 0x207bbb76,0xafbea8d9,0x21782758,0x921c7e7c,0x1c0436b1,0xdfa2b74b, + 0x2e368c04,0x87194906 }, + { 0xa3993df5,0xb5f928bb,0xf3b3d26a,0x639d75b5,0x85b55050,0x011aa78a, + 0x5b74fde1,0xfc315e6a } }, + /* 45 */ + { { 0xe8d6ecfa,0x561fd41a,0x1aec7f86,0x5f8c44f6,0x4924741d,0x98452a7b, + 0xee389088,0xe6d4a7ad }, + { 0x4593c75d,0x60552ed1,0xdd271162,0x70a70da4,0x7ba2c7db,0xd2aede93, + 0x9be2ae57,0x35dfaf9a } }, + /* 46 */ + { { 0xaa736636,0x6b956fcd,0xae2cab7e,0x09f51d97,0x0f349966,0xfb10bf41, + 0x1c830d2b,0x1da5c7d7 }, + { 0x3cce6825,0x5c41e483,0xf9573c3b,0x15ad118f,0xf23036b8,0xa28552c7, + 0xdbf4b9d6,0x7077c0fd } }, + /* 47 */ + { { 0x46b9661c,0xbf63ff8d,0x0d2cfd71,0xa1dfd36b,0xa847f8f7,0x0373e140, + 0xe50efe44,0x53a8632e }, + { 0x696d8051,0x0976ff68,0xc74f468a,0xdaec0c95,0x5e4e26bd,0x62994dc3, + 0x34e1fcc1,0x028ca76d } }, + /* 48 */ + { { 0xfc9877ee,0xd11d47dc,0x801d0002,0xc8b36210,0x54c260b6,0xd002c117, + 0x6962f046,0x04c17cd8 }, + { 0xb0daddf5,0x6d9bd094,0x24ce55c0,0xbea23575,0x72da03b5,0x663356e6, + 0xfed97474,0xf7ba4de9 } }, + /* 49 */ + { { 0xebe1263f,0xd0dbfa34,0x71ae7ce6,0x55763735,0x82a6f523,0xd2440553, + 0x52131c41,0xe31f9600 }, + { 0xea6b6ec6,0xd1bb9216,0x73c2fc44,0x37a1d12e,0x89d0a294,0xc10e7eac, + 0xce34d47b,0xaa3a6259 } }, + /* 50 */ + { { 0x36f3dcd3,0xfbcf9df5,0xd2bf7360,0x6ceded50,0xdf504f5b,0x491710fa, + 0x7e79daee,0x2398dd62 }, + { 0x6d09569e,0xcf4705a3,0x5149f769,0xea0619bb,0x35f6034c,0xff9c0377, + 0x1c046210,0x5717f5b2 } }, + /* 51 */ + { { 0x21dd895e,0x9fe229c9,0x40c28451,0x8e518500,0x1d637ecd,0xfa13d239, + 0x0e3c28de,0x660a2c56 }, + { 0xd67fcbd0,0x9cca88ae,0x0ea9f096,0xc8472478,0x72e92b4d,0x32b2f481, + 0x4f522453,0x624ee54c } }, + /* 52 */ + { { 0xd897eccc,0x09549ce4,0x3f9880aa,0x4d49d1d9,0x043a7c20,0x723c2423, + 0x92bdfbc0,0x4f392afb }, + { 0x7de44fd9,0x6969f8fa,0x57b32156,0xb66cfbe4,0x368ebc3c,0xdb2fa803, + 0xccdb399c,0x8a3e7977 } }, + /* 53 */ + { { 0x06c4b125,0xdde1881f,0xf6e3ca8c,0xae34e300,0x5c7a13e9,0xef6999de, + 0x70c24404,0x3888d023 }, + { 0x44f91081,0x76280356,0x5f015504,0x3d9fcf61,0x632cd36e,0x1827edc8, + 0x18102336,0xa5e62e47 } }, + /* 54 */ + { { 0x2facd6c8,0x1a825ee3,0x54bcbc66,0x699c6354,0x98df9931,0x0ce3edf7, + 0x466a5adc,0x2c4768e6 }, + { 0x90a64bc9,0xb346ff8c,0xe4779f5c,0x630a6020,0xbc05e884,0xd949d064, + 0xf9e652a0,0x7b5e6441 } }, + /* 55 */ + { { 0x1d28444a,0x2169422c,0xbe136a39,0xe996c5d8,0xfb0c7fce,0x2387afe5, + 0x0c8d744a,0xb8af73cb }, + { 0x338b86fd,0x5fde83aa,0xa58a5cff,0xfee3f158,0x20ac9433,0xc9ee8f6f, + 0x7f3f0895,0xa036395f } }, + /* 56 */ + { { 0xa10f7770,0x8c73c6bb,0xa12a0e24,0xa6f16d81,0x51bc2b9f,0x100df682, + 0x875fb533,0x4be36b01 }, + { 0x9fb56dbb,0x9226086e,0x07e7a4f8,0x306fef8b,0x66d52f20,0xeeaccc05, + 0x1bdc00c0,0x8cbc9a87 } }, + /* 57 */ + { { 0xc0dac4ab,0xe131895c,0x712ff112,0xa874a440,0x6a1cee57,0x6332ae7c, + 0x0c0835f8,0x44e7553e }, + { 0x7734002d,0x6d503fff,0x0b34425c,0x9d35cb8b,0x0e8738b5,0x95f70276, + 0x5eb8fc18,0x470a683a } }, + /* 58 */ + { { 0x90513482,0x81b761dc,0x01e9276a,0x0287202a,0x0ce73083,0xcda441ee, + 0xc63dc6ef,0x16410690 }, + { 0x6d06a2ed,0xf5034a06,0x189b100b,0xdd4d7745,0xab8218c9,0xd914ae72, + 0x7abcbb4f,0xd73479fd } }, + /* 59 */ + { { 0x5ad4c6e5,0x7edefb16,0x5b06d04d,0x262cf08f,0x8575cb14,0x12ed5bb1, + 0x0771666b,0x816469e3 }, + { 0x561e291e,0xd7ab9d79,0xc1de1661,0xeb9daf22,0x135e0513,0xf49827eb, + 0xf0dd3f9c,0x0a36dd23 } }, + /* 60 */ + { { 0x41d5533c,0x098d32c7,0x8684628f,0x7c5f5a9e,0xe349bd11,0x39a228ad, + 0xfdbab118,0xe331dfd6 }, + { 0x6bcc6ed8,0x5100ab68,0xef7a260e,0x7160c3bd,0xbce850d7,0x9063d9a7, + 0x492e3389,0xd3b4782a } }, + /* 61 */ + { { 0xf3821f90,0xa149b6e8,0x66eb7aad,0x92edd9ed,0x1a013116,0x0bb66953, + 0x4c86a5bd,0x7281275a }, + { 0xd3ff47e5,0x503858f7,0x61016441,0x5e1616bc,0x7dfd9bb1,0x62b0f11a, + 0xce145059,0x2c062e7e } }, + /* 62 */ + { { 0x0159ac2e,0xa76f996f,0xcbdb2713,0x281e7736,0x08e46047,0x2ad6d288, + 0x2c4e7ef1,0x282a35f9 }, + { 0xc0ce5cd2,0x9c354b1e,0x1379c229,0xcf99efc9,0x3e82c11e,0x992caf38, + 0x554d2abd,0xc71cd513 } }, + /* 63 */ + { { 0x09b578f4,0x4885de9c,0xe3affa7a,0x1884e258,0x59182f1f,0x8f76b1b7, + 0xcf47f3a3,0xc50f6740 }, + { 0x374b68ea,0xa9c4adf3,0x69965fe2,0xa406f323,0x85a53050,0x2f86a222, + 0x212958dc,0xb9ecb3a7 } }, + /* 64 */ + { { 0xf4f8b16a,0x56f8410e,0xc47b266a,0x97241afe,0x6d9c87c1,0x0a406b8e, + 0xcd42ab1b,0x803f3e02 }, + { 0x04dbec69,0x7f0309a8,0x3bbad05f,0xa83b85f7,0xad8e197f,0xc6097273, + 0x5067adc1,0xc097440e } }, + /* 65 */ + { { 0xc379ab34,0x846a56f2,0x841df8d1,0xa8ee068b,0x176c68ef,0x20314459, + 0x915f1f30,0xf1af32d5 }, + { 0x5d75bd50,0x99c37531,0xf72f67bc,0x837cffba,0x48d7723f,0x0613a418, + 0xe2d41c8b,0x23d0f130 } }, + /* 66 */ + { { 0xf41500d9,0x857ab6ed,0xfcbeada8,0x0d890ae5,0x89725951,0x52fe8648, + 0xc0a3fadd,0xb0288dd6 }, + { 0x650bcb08,0x85320f30,0x695d6e16,0x71af6313,0xb989aa76,0x31f520a7, + 0xf408c8d2,0xffd3724f } }, + /* 67 */ + { { 0xb458e6cb,0x53968e64,0x317a5d28,0x992dad20,0x7aa75f56,0x3814ae0b, + 0xd78c26df,0xf5590f4a }, + { 0xcf0ba55a,0x0fc24bd3,0x0c778bae,0x0fc4724a,0x683b674a,0x1ce9864f, + 0xf6f74a20,0x18d6da54 } }, + /* 68 */ + { { 0xd5be5a2b,0xed93e225,0x5934f3c6,0x6fe79983,0x22626ffc,0x43140926, + 0x7990216a,0x50bbb4d9 }, + { 0xe57ec63e,0x378191c6,0x181dcdb2,0x65422c40,0x0236e0f6,0x41a8099b, + 0x01fe49c3,0x2b100118 } }, + /* 69 */ + { { 0x9b391593,0xfc68b5c5,0x598270fc,0xc385f5a2,0xd19adcbb,0x7144f3aa, + 0x83fbae0c,0xdd558999 }, + { 0x74b82ff4,0x93b88b8e,0x71e734c9,0xd2e03c40,0x43c0322a,0x9a7a9eaf, + 0x149d6041,0xe6e4c551 } }, + /* 70 */ + { { 0x1e9af288,0x55f655bb,0xf7ada931,0x647e1a64,0xcb2820e5,0x43697e4b, + 0x07ed56ff,0x51e00db1 }, + { 0x771c327e,0x43d169b8,0x4a96c2ad,0x29cdb20b,0x3deb4779,0xc07d51f5, + 0x49829177,0xe22f4241 } }, + /* 71 */ + { { 0x635f1abb,0xcd45e8f4,0x68538874,0x7edc0cb5,0xb5a8034d,0xc9472c1f, + 0x52dc48c9,0xf709373d }, + { 0xa8af30d6,0x401966bb,0xf137b69c,0x95bf5f4a,0x9361c47e,0x3966162a, + 0xe7275b11,0xbd52d288 } }, + /* 72 */ + { { 0x9c5fa877,0xab155c7a,0x7d3a3d48,0x17dad672,0x73d189d8,0x43f43f9e, + 0xc8aa77a6,0xa0d0f8e4 }, + { 0xcc94f92d,0x0bbeafd8,0x0c4ddb3a,0xd818c8be,0xb82eba14,0x22cc65f8, + 0x946d6a00,0xa56c78c7 } }, + /* 73 */ + { { 0x0dd09529,0x2962391b,0x3daddfcf,0x803e0ea6,0x5b5bf481,0x2c77351f, + 0x731a367a,0xd8befdf8 }, + { 0xfc0157f4,0xab919d42,0xfec8e650,0xf51caed7,0x02d48b0a,0xcdf9cb40, + 0xce9f6478,0x854a68a5 } }, + /* 74 */ + { { 0x63506ea5,0xdc35f67b,0xa4fe0d66,0x9286c489,0xfe95cd4d,0x3f101d3b, + 0x98846a95,0x5cacea0b }, + { 0x9ceac44d,0xa90df60c,0x354d1c3a,0x3db29af4,0xad5dbabe,0x08dd3de8, + 0x35e4efa9,0xe4982d12 } }, + /* 75 */ + { { 0xc34cd55e,0x23104a22,0x2680d132,0x58695bb3,0x1fa1d943,0xfb345afa, + 0x16b20499,0x8046b7f6 }, + { 0x38e7d098,0xb533581e,0xf46f0b70,0xd7f61e8d,0x44cb78c4,0x30dea9ea, + 0x9082af55,0xeb17ca7b } }, + /* 76 */ + { { 0x76a145b9,0x1751b598,0xc1bc71ec,0xa5cf6b0f,0x392715bb,0xd3e03565, + 0xfab5e131,0x097b00ba }, + { 0x565f69e1,0xaa66c8e9,0xb5be5199,0x77e8f75a,0xda4fd984,0x6033ba11, + 0xafdbcc9e,0xf95c747b } }, + /* 77 */ + { { 0xbebae45e,0x558f01d3,0xc4bc6955,0xa8ebe9f0,0xdbc64fc6,0xaeb705b1, + 0x566ed837,0x3512601e }, + { 0xfa1161cd,0x9336f1e1,0x4c65ef87,0x328ab8d5,0x724f21e5,0x4757eee2, + 0x6068ab6b,0x0ef97123 } }, + /* 78 */ + { { 0x54ca4226,0x02598cf7,0xf8642c8e,0x5eede138,0x468e1790,0x48963f74, + 0x3b4fbc95,0xfc16d933 }, + { 0xe7c800ca,0xbe96fb31,0x2678adaa,0x13806331,0x6ff3e8b5,0x3d624497, + 0xb95d7a17,0x14ca4af1 } }, + /* 79 */ + { { 0xbd2f81d5,0x7a4771ba,0x01f7d196,0x1a5f9d69,0xcad9c907,0xd898bef7, + 0xf59c231d,0x4057b063 }, + { 0x89c05c0a,0xbffd82fe,0x1dc0df85,0xe4911c6f,0xa35a16db,0x3befccae, + 0xf1330b13,0x1c3b5d64 } }, + /* 80 */ + { { 0x80ec21fe,0x5fe14bfe,0xc255be82,0xf6ce116a,0x2f4a5d67,0x98bc5a07, + 0xdb7e63af,0xfad27148 }, + { 0x29ab05b3,0x90c0b6ac,0x4e251ae6,0x37a9a83c,0xc2aade7d,0x0a7dc875, + 0x9f0e1a84,0x77387de3 } }, + /* 81 */ + { { 0xa56c0dd7,0x1e9ecc49,0x46086c74,0xa5cffcd8,0xf505aece,0x8f7a1408, + 0xbef0c47e,0xb37b85c0 }, + { 0xcc0e6a8f,0x3596b6e4,0x6b388f23,0xfd6d4bbf,0xc39cef4e,0xaba453fa, + 0xf9f628d5,0x9c135ac8 } }, + /* 82 */ + { { 0x84e35743,0x32aa3202,0x85a3cdef,0x320d6ab1,0x1df19819,0xb821b176, + 0xc433851f,0x5721361f }, + { 0x71fc9168,0x1f0db36a,0x5e5c403c,0x5f98ba73,0x37bcd8f5,0xf64ca87e, + 0xe6bb11bd,0xdcbac3c9 } }, + /* 83 */ + { { 0x4518cbe2,0xf01d9968,0x9c9eb04e,0xd242fc18,0xe47feebf,0x727663c7, + 0x2d626862,0xb8c1c89e }, + { 0xc8e1d569,0x51a58bdd,0xb7d88cd0,0x563809c8,0xf11f31eb,0x26c27fd9, + 0x2f9422d4,0x5d23bbda } }, + /* 84 */ + { { 0x95c8f8be,0x0a1c7294,0x3bf362bf,0x2961c480,0xdf63d4ac,0x9e418403, + 0x91ece900,0xc109f9cb }, + { 0x58945705,0xc2d095d0,0xddeb85c0,0xb9083d96,0x7a40449b,0x84692b8d, + 0x2eee1ee1,0x9bc3344f } }, + /* 85 */ + { { 0x42913074,0x0d5ae356,0x48a542b1,0x55491b27,0xb310732a,0x469ca665, + 0x5f1a4cc1,0x29591d52 }, + { 0xb84f983f,0xe76f5b6b,0x9f5f84e1,0xbe7eef41,0x80baa189,0x1200d496, + 0x18ef332c,0x6376551f } }, + /* 86 */ + { { 0x562976cc,0xbda5f14e,0x0ef12c38,0x22bca3e6,0x6cca9852,0xbbfa3064, + 0x08e2987a,0xbdb79dc8 }, + { 0xcb06a772,0xfd2cb5c9,0xfe536dce,0x38f475aa,0x7c2b5db8,0xc2a3e022, + 0xadd3c14a,0x8ee86001 } }, + /* 87 */ + { { 0xa4ade873,0xcbe96981,0xc4fba48c,0x7ee9aa4d,0x5a054ba5,0x2cee2899, + 0x6f77aa4b,0x92e51d7a }, + { 0x7190a34d,0x948bafa8,0xf6bd1ed1,0xd698f75b,0x0caf1144,0xd00ee6e3, + 0x0a56aaaa,0x5182f86f } }, + /* 88 */ + { { 0x7a4cc99c,0xfba6212c,0x3e6d9ca1,0xff609b68,0x5ac98c5a,0x5dbb27cb, + 0x4073a6f2,0x91dcab5d }, + { 0x5f575a70,0x01b6cc3d,0x6f8d87fa,0x0cb36139,0x89981736,0x165d4e8c, + 0x97974f2b,0x17a0cedb } }, + /* 89 */ + { { 0x076c8d3a,0x38861e2a,0x210f924b,0x701aad39,0x13a835d9,0x94d0eae4, + 0x7f4cdf41,0x2e8ce36c }, + { 0x037a862b,0x91273dab,0x60e4c8fa,0x01ba9bb7,0x33baf2dd,0xf9645388, + 0x34f668f3,0xf4ccc6cb } }, + /* 90 */ + { { 0xf1f79687,0x44ef525c,0x92efa815,0x7c595495,0xa5c78d29,0xe1231741, + 0x9a0df3c9,0xac0db488 }, + { 0xdf01747f,0x86bfc711,0xef17df13,0x592b9358,0x5ccb6bb5,0xe5880e4f, + 0x94c974a2,0x95a64a61 } }, + /* 91 */ + { { 0xc15a4c93,0x72c1efda,0x82585141,0x40269b73,0x16cb0bad,0x6a8dfb1c, + 0x29210677,0x231e54ba }, + { 0x8ae6d2dc,0xa70df917,0x39112918,0x4d6aa63f,0x5e5b7223,0xf627726b, + 0xd8a731e1,0xab0be032 } }, + /* 92 */ + { { 0x8d131f2d,0x097ad0e9,0x3b04f101,0x637f09e3,0xd5e9a748,0x1ac86196, + 0x2cf6a679,0xf1bcc880 }, + { 0xe8daacb4,0x25c69140,0x60f65009,0x3c4e4055,0x477937a6,0x591cc8fc, + 0x5aebb271,0x85169469 } }, + /* 93 */ + { { 0xf1dcf593,0xde35c143,0xb018be3b,0x78202b29,0x9bdd9d3d,0xe9cdadc2, + 0xdaad55d8,0x8f67d9d2 }, + { 0x7481ea5f,0x84111656,0xe34c590c,0xe7d2dde9,0x05053fa8,0xffdd43f4, + 0xc0728b5d,0xf84572b9 } }, + /* 94 */ + { { 0x97af71c9,0x5e1a7a71,0x7a736565,0xa1449444,0x0e1d5063,0xa1b4ae07, + 0x616b2c19,0xedee2710 }, + { 0x11734121,0xb2f034f5,0x4a25e9f0,0x1cac6e55,0xa40c2ecf,0x8dc148f3, + 0x44ebd7f4,0x9fd27e9b } }, + /* 95 */ + { { 0xf6e2cb16,0x3cc7658a,0xfe5919b6,0xe3eb7d2c,0x168d5583,0x5a8c5816, + 0x958ff387,0xa40c2fb6 }, + { 0xfedcc158,0x8c9ec560,0x55f23056,0x7ad804c6,0x9a307e12,0xd9396704, + 0x7dc6decf,0x99bc9bb8 } }, + /* 96 */ + { { 0x927dafc6,0x84a9521d,0x5c09cd19,0x52c1fb69,0xf9366dde,0x9d9581a0, + 0xa16d7e64,0x9abe210b }, + { 0x48915220,0x480af84a,0x4dd816c6,0xfa73176a,0x1681ca5a,0xc7d53987, + 0x87f344b0,0x7881c257 } }, + /* 97 */ + { { 0xe0bcf3ff,0x93399b51,0x127f74f6,0x0d02cbc5,0xdd01d968,0x8fb465a2, + 0xa30e8940,0x15e6e319 }, + { 0x3e0e05f4,0x646d6e0d,0x43588404,0xfad7bddc,0xc4f850d3,0xbe61c7d1, + 0x191172ce,0x0e55facf } }, + /* 98 */ + { { 0xf8787564,0x7e9d9806,0x31e85ce6,0x1a331721,0xb819e8d6,0x6b0158ca, + 0x6fe96577,0xd73d0976 }, + { 0x1eb7206e,0x42483425,0xc618bb42,0xa519290f,0x5e30a520,0x5dcbb859, + 0x8f15a50b,0x9250a374 } }, + /* 99 */ + { { 0xbe577410,0xcaff08f8,0x5077a8c6,0xfd408a03,0xec0a63a4,0xf1f63289, + 0xc1cc8c0b,0x77414082 }, + { 0xeb0991cd,0x05a40fa6,0x49fdc296,0xc1ca0866,0xb324fd40,0x3a68a3c7, + 0x12eb20b9,0x8cb04f4d } }, + /* 100 */ + { { 0x6906171c,0xb1c2d055,0xb0240c3f,0x9073e9cd,0xd8906841,0xdb8e6b4f, + 0x47123b51,0xe4e429ef }, + { 0x38ec36f4,0x0b8dd53c,0xff4b6a27,0xf9d2dc01,0x879a9a48,0x5d066e07, + 0x3c6e6552,0x37bca2ff } }, + /* 101 */ + { { 0xdf562470,0x4cd2e3c7,0xc0964ac9,0x44f272a2,0x80c793be,0x7c6d5df9, + 0x3002b22a,0x59913edc }, + { 0x5750592a,0x7a139a83,0xe783de02,0x99e01d80,0xea05d64f,0xcf8c0375, + 0xb013e226,0x43786e4a } }, + /* 102 */ + { { 0x9e56b5a6,0xff32b0ed,0xd9fc68f9,0x0750d9a6,0x597846a7,0xec15e845, + 0xb7e79e7a,0x8638ca98 }, + { 0x0afc24b2,0x2f5ae096,0x4dace8f2,0x05398eaf,0xaecba78f,0x3b765dd0, + 0x7b3aa6f0,0x1ecdd36a } }, + /* 103 */ + { { 0x6c5ff2f3,0x5d3acd62,0x2873a978,0xa2d516c0,0xd2110d54,0xad94c9fa, + 0xd459f32d,0xd85d0f85 }, + { 0x10b11da3,0x9f700b8d,0xa78318c4,0xd2c22c30,0x9208decd,0x556988f4, + 0xb4ed3c62,0xa04f19c3 } }, + /* 104 */ + { { 0xed7f93bd,0x087924c8,0x392f51f6,0xcb64ac5d,0x821b71af,0x7cae330a, + 0x5c0950b0,0x92b2eeea }, + { 0x85b6e235,0x85ac4c94,0x2936c0f0,0xab2ca4a9,0xe0508891,0x80faa6b3, + 0x5834276c,0x1ee78221 } }, + /* 105 */ + { { 0xe63e79f7,0xa60a2e00,0xf399d906,0xf590e7b2,0x6607c09d,0x9021054a, + 0x57a6e150,0xf3f2ced8 }, + { 0xf10d9b55,0x200510f3,0xd8642648,0x9d2fcfac,0xe8bd0e7c,0xe5631aa7, + 0x3da3e210,0x0f56a454 } }, + /* 106 */ + { { 0x1043e0df,0x5b21bffa,0x9c007e6d,0x6c74b6cc,0xd4a8517a,0x1a656ec0, + 0x1969e263,0xbd8f1741 }, + { 0xbeb7494a,0x8a9bbb86,0x45f3b838,0x1567d46f,0xa4e5a79a,0xdf7a12a7, + 0x30ccfa09,0x2d1a1c35 } }, + /* 107 */ + { { 0x506508da,0x192e3813,0xa1d795a7,0x336180c4,0x7a9944b3,0xcddb5949, + 0xb91fba46,0xa107a65e }, + { 0x0f94d639,0xe6d1d1c5,0x8a58b7d7,0x8b4af375,0xbd37ca1c,0x1a7c5584, + 0xf87a9af2,0x183d760a } }, + /* 108 */ + { { 0x0dde59a4,0x29d69711,0x0e8bef87,0xf1ad8d07,0x4f2ebe78,0x229b4963, + 0xc269d754,0x1d44179d }, + { 0x8390d30e,0xb32dc0cf,0x0de8110c,0x0a3b2753,0x2bc0339a,0x31af1dc5, + 0x9606d262,0x771f9cc2 } }, + /* 109 */ + { { 0x85040739,0x99993e77,0x8026a939,0x44539db9,0xf5f8fc26,0xcf40f6f2, + 0x0362718e,0x64427a31 }, + { 0x85428aa8,0x4f4f2d87,0xebfb49a8,0x7b7adc3f,0xf23d01ac,0x201b2c6d, + 0x6ae90d6d,0x49d9b749 } }, + /* 110 */ + { { 0x435d1099,0xcc78d8bc,0x8e8d1a08,0x2adbcd4e,0x2cb68a41,0x02c2e2a0, + 0x3f605445,0x9037d81b }, + { 0x074c7b61,0x7cdbac27,0x57bfd72e,0xfe2031ab,0x596d5352,0x61ccec96, + 0x7cc0639c,0x08c3de6a } }, + /* 111 */ + { { 0xf6d552ab,0x20fdd020,0x05cd81f1,0x56baff98,0x91351291,0x06fb7c3e, + 0x45796b2f,0xc6909442 }, + { 0x41231bd1,0x17b3ae9c,0x5cc58205,0x1eac6e87,0xf9d6a122,0x208837ab, + 0xcafe3ac0,0x3fa3db02 } }, + /* 112 */ + { { 0x05058880,0xd75a3e65,0x643943f2,0x7da365ef,0xfab24925,0x4147861c, + 0xfdb808ff,0xc5c4bdb0 }, + { 0xb272b56b,0x73513e34,0x11b9043a,0xc8327e95,0xf8844969,0xfd8ce37d, + 0x46c2b6b5,0x2d56db94 } }, + /* 113 */ + { { 0xff46ac6b,0x2461782f,0x07a2e425,0xd19f7926,0x09a48de1,0xfafea3c4, + 0xe503ba42,0x0f56bd9d }, + { 0x345cda49,0x137d4ed1,0x816f299d,0x821158fc,0xaeb43402,0xe7c6a54a, + 0x1173b5f1,0x4003bb9d } }, + /* 114 */ + { { 0xa0803387,0x3b8e8189,0x39cbd404,0xece115f5,0xd2877f21,0x4297208d, + 0xa07f2f9e,0x53765522 }, + { 0xa8a4182d,0xa4980a21,0x3219df79,0xa2bbd07a,0x1a19a2d4,0x674d0a2e, + 0x6c5d4549,0x7a056f58 } }, + /* 115 */ + { { 0x9d8a2a47,0x646b2558,0xc3df2773,0x5b582948,0xabf0d539,0x51ec000e, + 0x7a1a2675,0x77d482f1 }, + { 0x87853948,0xb8a1bd95,0x6cfbffee,0xa6f817bd,0x80681e47,0xab6ec057, + 0x2b38b0e4,0x4115012b } }, + /* 116 */ + { { 0x6de28ced,0x3c73f0f4,0x9b13ec47,0x1d5da760,0x6e5c6392,0x61b8ce9e, + 0xfbea0946,0xcdf04572 }, + { 0x6c53c3b0,0x1cb3c58b,0x447b843c,0x97fe3c10,0x2cb9780e,0xfb2b8ae1, + 0x97383109,0xee703dda } }, + /* 117 */ + { { 0xff57e43a,0x34515140,0xb1b811b8,0xd44660d3,0x8f42b986,0x2b3b5dff, + 0xa162ce21,0x2a0ad89d }, + { 0x6bc277ba,0x64e4a694,0xc141c276,0xc788c954,0xcabf6274,0x141aa64c, + 0xac2b4659,0xd62d0b67 } }, + /* 118 */ + { { 0x2c054ac4,0x39c5d87b,0xf27df788,0x57005859,0xb18128d6,0xedf7cbf3, + 0x991c2426,0xb39a23f2 }, + { 0xf0b16ae5,0x95284a15,0xa136f51b,0x0c6a05b1,0xf2700783,0x1d63c137, + 0xc0674cc5,0x04ed0092 } }, + /* 119 */ + { { 0x9ae90393,0x1f4185d1,0x4a3d64e6,0x3047b429,0x9854fc14,0xae0001a6, + 0x0177c387,0xa0a91fc1 }, + { 0xae2c831e,0xff0a3f01,0x2b727e16,0xbb76ae82,0x5a3075b4,0x8f12c8a1, + 0x9ed20c41,0x084cf988 } }, + /* 120 */ + { { 0xfca6becf,0xd98509de,0x7dffb328,0x2fceae80,0x4778e8b9,0x5d8a15c4, + 0x73abf77e,0xd57955b2 }, + { 0x31b5d4f1,0x210da79e,0x3cfa7a1c,0xaa52f04b,0xdc27c20b,0xd4d12089, + 0x02d141f1,0x8e14ea42 } }, + /* 121 */ + { { 0xf2897042,0xeed50345,0x43402c4a,0x8d05331f,0xc8bdfb21,0xc8d9c194, + 0x2aa4d158,0x597e1a37 }, + { 0xcf0bd68c,0x0327ec1a,0xab024945,0x6d4be0dc,0xc9fe3e84,0x5b9c8d7a, + 0x199b4dea,0xca3f0236 } }, + /* 122 */ + { { 0x6170bd20,0x592a10b5,0x6d3f5de7,0x0ea897f1,0x44b2ade2,0xa3363ff1, + 0x309c07e4,0xbde7fd7e }, + { 0xb8f5432c,0x516bb6d2,0xe043444b,0x210dc1cb,0xf8f95b5a,0x3db01e6f, + 0x0a7dd198,0xb623ad0e } }, + /* 123 */ + { { 0x60c7b65b,0xa75bd675,0x23a4a289,0xab8c5590,0xd7b26795,0xf8220fd0, + 0x58ec137b,0xd6aa2e46 }, + { 0x5138bb85,0x10abc00b,0xd833a95c,0x8c31d121,0x1702a32e,0xb24ff00b, + 0x2dcc513a,0x111662e0 } }, + /* 124 */ + { { 0xefb42b87,0x78114015,0x1b6c4dff,0xbd9f5d70,0xa7d7c129,0x66ecccd7, + 0x94b750f8,0xdb3ee1cb }, + { 0xf34837cf,0xb26f3db0,0xb9578d4f,0xe7eed18b,0x7c56657d,0x5d2cdf93, + 0x52206a59,0x886a6442 } }, + /* 125 */ + { { 0x65b569ea,0x3c234cfb,0xf72119c1,0x20011141,0xa15a619e,0x8badc85d, + 0x018a17bc,0xa70cf4eb }, + { 0x8c4a6a65,0x224f97ae,0x0134378f,0x36e5cf27,0x4f7e0960,0xbe3a609e, + 0xd1747b77,0xaa4772ab } }, + /* 126 */ + { { 0x7aa60cc0,0x67676131,0x0368115f,0xc7916361,0xbbc1bb5a,0xded98bb4, + 0x30faf974,0x611a6ddc }, + { 0xc15ee47a,0x30e78cbc,0x4e0d96a5,0x2e896282,0x3dd9ed88,0x36f35adf, + 0x16429c88,0x5cfffaf8 } }, + /* 127 */ + { { 0x9b7a99cd,0xc0d54cff,0x843c45a1,0x7bf3b99d,0x62c739e1,0x038a908f, + 0x7dc1994c,0x6e5a6b23 }, + { 0x0ba5db77,0xef8b454e,0xacf60d63,0xb7b8807f,0x76608378,0xe591c0c6, + 0x242dabcc,0x481a238d } }, + /* 128 */ + { { 0x35d0b34a,0xe3417bc0,0x8327c0a7,0x440b386b,0xac0362d1,0x8fb7262d, + 0xe0cdf943,0x2c41114c }, + { 0xad95a0b1,0x2ba5cef1,0x67d54362,0xc09b37a8,0x01e486c9,0x26d6cdd2, + 0x42ff9297,0x20477abf } }, + /* 129 */ + { { 0x18d65dbf,0x2f75173c,0x339edad8,0x77bf940e,0xdcf1001c,0x7022d26b, + 0xc77396b6,0xac66409a }, + { 0xc6261cc3,0x8b0bb36f,0x190e7e90,0x213f7bc9,0xa45e6c10,0x6541ceba, + 0xcc122f85,0xce8e6975 } }, + /* 130 */ + { { 0xbc0a67d2,0x0f121b41,0x444d248a,0x62d4760a,0x659b4737,0x0e044f1d, + 0x250bb4a8,0x08fde365 }, + { 0x848bf287,0xaceec3da,0xd3369d6e,0xc2a62182,0x92449482,0x3582dfdc, + 0x565d6cd7,0x2f7e2fd2 } }, + /* 131 */ + { { 0xc3770fa7,0xae4b92db,0x379043f9,0x095e8d5c,0x17761171,0x54f34e9d, + 0x907702ae,0xc65be92e }, + { 0xf6fd0a40,0x2758a303,0xbcce784b,0xe7d822e3,0x4f9767bf,0x7ae4f585, + 0xd1193b3a,0x4bff8e47 } }, + /* 132 */ + { { 0x00ff1480,0xcd41d21f,0x0754db16,0x2ab8fb7d,0xbbe0f3ea,0xac81d2ef, + 0x5772967d,0x3e4e4ae6 }, + { 0x3c5303e6,0x7e18f36d,0x92262397,0x3bd9994b,0x1324c3c0,0x9ed70e26, + 0x58ec6028,0x5388aefd } }, + /* 133 */ + { { 0x5e5d7713,0xad1317eb,0x75de49da,0x09b985ee,0xc74fb261,0x32f5bc4f, + 0x4f75be0e,0x5cf908d1 }, + { 0x8e657b12,0x76043510,0xb96ed9e6,0xbfd421a5,0x8970ccc2,0x0e29f51f, + 0x60f00ce2,0xa698ba40 } }, + /* 134 */ + { { 0xef748fec,0x73db1686,0x7e9d2cf9,0xe6e755a2,0xce265eff,0x630b6544, + 0x7aebad8d,0xb142ef8a }, + { 0x17d5770a,0xad31af9f,0x2cb3412f,0x66af3b67,0xdf3359de,0x6bd60d1b, + 0x58515075,0xd1896a96 } }, + /* 135 */ + { { 0x33c41c08,0xec5957ab,0x5468e2e1,0x87de94ac,0xac472f6c,0x18816b73, + 0x7981da39,0x267b0e0b }, + { 0x8e62b988,0x6e554e5d,0x116d21e7,0xd8ddc755,0x3d2a6f99,0x4610faf0, + 0xa1119393,0xb54e287a } }, + /* 136 */ + { { 0x178a876b,0x0a0122b5,0x085104b4,0x51ff96ff,0x14f29f76,0x050b31ab, + 0x5f87d4e6,0x84abb28b }, + { 0x8270790a,0xd5ed439f,0x85e3f46b,0x2d6cb59d,0x6c1e2212,0x75f55c1b, + 0x17655640,0xe5436f67 } }, + /* 137 */ + { { 0x2286e8d5,0x53f9025e,0x864453be,0x353c95b4,0xe408e3a0,0xd832f5bd, + 0x5b9ce99e,0x0404f68b }, + { 0xa781e8e5,0xcad33bde,0x163c2f5b,0x3cdf5018,0x0119caa3,0x57576960, + 0x0ac1c701,0x3a4263df } }, + /* 138 */ + { { 0x9aeb596d,0xc2965ecc,0x023c92b4,0x01ea03e7,0x2e013961,0x4704b4b6, + 0x905ea367,0x0ca8fd3f }, + { 0x551b2b61,0x92523a42,0x390fcd06,0x1eb7a89c,0x0392a63e,0xe7f1d2be, + 0x4ddb0c33,0x96dca264 } }, + /* 139 */ + { { 0x387510af,0x203bb43a,0xa9a36a01,0x846feaa8,0x2f950378,0xd23a5770, + 0x3aad59dc,0x4363e212 }, + { 0x40246a47,0xca43a1c7,0xe55dd24d,0xb362b8d2,0x5d8faf96,0xf9b08604, + 0xd8bb98c4,0x840e115c } }, + /* 140 */ + { { 0x1023e8a7,0xf12205e2,0xd8dc7a0b,0xc808a8cd,0x163a5ddf,0xe292a272, + 0x30ded6d4,0x5e0d6abd }, + { 0x7cfc0f64,0x07a721c2,0x0e55ed88,0x42eec01d,0x1d1f9db2,0x26a7bef9, + 0x2945a25a,0x7dea48f4 } }, + /* 141 */ + { { 0xe5060a81,0xabdf6f1c,0xf8f95615,0xe79f9c72,0x06ac268b,0xcfd36c54, + 0xebfd16d1,0xabc2a2be }, + { 0xd3e2eac7,0x8ac66f91,0xd2dd0466,0x6f10ba63,0x0282d31b,0x6790e377, + 0x6c7eefc1,0x4ea35394 } }, + /* 142 */ + { { 0x5266309d,0xed8a2f8d,0x81945a3e,0x0a51c6c0,0x578c5dc1,0xcecaf45a, + 0x1c94ffc3,0x3a76e689 }, + { 0x7d7b0d0f,0x9aace8a4,0x8f584a5f,0x963ace96,0x4e697fbe,0x51a30c72, + 0x465e6464,0x8212a10a } }, + /* 143 */ + { { 0xcfab8caa,0xef7c61c3,0x0e142390,0x18eb8e84,0x7e9733ca,0xcd1dff67, + 0x599cb164,0xaa7cab71 }, + { 0xbc837bd1,0x02fc9273,0xc36af5d7,0xc06407d0,0xf423da49,0x17621292, + 0xfe0617c3,0x40e38073 } }, + /* 144 */ + { { 0xa7bf9b7c,0xf4f80824,0x3fbe30d0,0x365d2320,0x97cf9ce3,0xbfbe5320, + 0xb3055526,0xe3604700 }, + { 0x6cc6c2c7,0x4dcb9911,0xba4cbee6,0x72683708,0x637ad9ec,0xdcded434, + 0xa3dee15f,0x6542d677 } }, + /* 145 */ + { { 0x7b6c377a,0x3f32b6d0,0x903448be,0x6cb03847,0x20da8af7,0xd6fdd3a8, + 0x09bb6f21,0xa6534aee }, + { 0x1035facf,0x30a1780d,0x9dcb47e6,0x35e55a33,0xc447f393,0x6ea50fe1, + 0xdc9aef22,0xf3cb672f } }, + /* 146 */ + { { 0x3b55fd83,0xeb3719fe,0x875ddd10,0xe0d7a46c,0x05cea784,0x33ac9fa9, + 0xaae870e7,0x7cafaa2e }, + { 0x1d53b338,0x9b814d04,0xef87e6c6,0xe0acc0a0,0x11672b0f,0xfb93d108, + 0xb9bd522e,0x0aab13c1 } }, + /* 147 */ + { { 0xd2681297,0xddcce278,0xb509546a,0xcb350eb1,0x7661aaf2,0x2dc43173, + 0x847012e9,0x4b91a602 }, + { 0x72f8ddcf,0xdcff1095,0x9a911af4,0x08ebf61e,0xc372430e,0x48f4360a, + 0x72321cab,0x49534c53 } }, + /* 148 */ + { { 0xf07b7e9d,0x83df7d71,0x13cd516f,0xa478efa3,0x6c047ee3,0x78ef264b, + 0xd65ac5ee,0xcaf46c4f }, + { 0x92aa8266,0xa04d0c77,0x913684bb,0xedf45466,0xae4b16b0,0x56e65168, + 0x04c6770f,0x14ce9e57 } }, + /* 149 */ + { { 0x965e8f91,0x99445e3e,0xcb0f2492,0xd3aca1ba,0x90c8a0a0,0xd31cc70f, + 0x3e4c9a71,0x1bb708a5 }, + { 0x558bdd7a,0xd5ca9e69,0x018a26b1,0x734a0508,0x4c9cf1ec,0xb093aa71, + 0xda300102,0xf9d126f2 } }, + /* 150 */ + { { 0xaff9563e,0x749bca7a,0xb49914a0,0xdd077afe,0xbf5f1671,0xe27a0311, + 0x729ecc69,0x807afcb9 }, + { 0xc9b08b77,0x7f8a9337,0x443c7e38,0x86c3a785,0x476fd8ba,0x85fafa59, + 0x6568cd8c,0x751adcd1 } }, + /* 151 */ + { { 0x10715c0d,0x8aea38b4,0x8f7697f7,0xd113ea71,0x93fbf06d,0x665eab14, + 0x2537743f,0x29ec4468 }, + { 0xb50bebbc,0x3d94719c,0xe4505422,0x399ee5bf,0x8d2dedb1,0x90cd5b3a, + 0x92a4077d,0xff9370e3 } }, + /* 152 */ + { { 0xc6b75b65,0x59a2d69b,0x266651c5,0x4188f8d5,0x3de9d7d2,0x28a9f33e, + 0xa2a9d01a,0x9776478b }, + { 0x929af2c7,0x8852622d,0x4e690923,0x334f5d6d,0xa89a51e9,0xce6cc7e5, + 0xac2f82fa,0x74a6313f } }, + /* 153 */ + { { 0xb75f079c,0xb2f4dfdd,0x18e36fbb,0x85b07c95,0xe7cd36dd,0x1b6cfcf0, + 0x0ff4863d,0xab75be15 }, + { 0x173fc9b7,0x81b367c0,0xd2594fd0,0xb90a7420,0xc4091236,0x15fdbf03, + 0x0b4459f6,0x4ebeac2e } }, + /* 154 */ + { { 0x5c9f2c53,0xeb6c5fe7,0x8eae9411,0xd2522011,0xf95ac5d8,0xc8887633, + 0x2c1baffc,0xdf99887b }, + { 0x850aaecb,0xbb78eed2,0x01d6a272,0x9d49181b,0xb1cdbcac,0x978dd511, + 0x779f4058,0x27b040a7 } }, + /* 155 */ + { { 0xf73b2eb2,0x90405db7,0x8e1b2118,0xe0df8508,0x5962327e,0x501b7152, + 0xe4cfa3f5,0xb393dd37 }, + { 0x3fd75165,0xa1230e7b,0xbcd33554,0xd66344c2,0x0f7b5022,0x6c36f1be, + 0xd0463419,0x09588c12 } }, + /* 156 */ + { { 0x02601c3b,0xe086093f,0xcf5c335f,0xfb0252f8,0x894aff28,0x955cf280, + 0xdb9f648b,0x81c879a9 }, + { 0xc6f56c51,0x040e687c,0x3f17618c,0xfed47169,0x9059353b,0x44f88a41, + 0x5fc11bc4,0xfa0d48f5 } }, + /* 157 */ + { { 0xe1608e4d,0xbc6e1c9d,0x3582822c,0x010dda11,0x157ec2d7,0xf6b7ddc1, + 0xb6a367d6,0x8ea0e156 }, + { 0x2383b3b4,0xa354e02f,0x3f01f53c,0x69966b94,0x2de03ca5,0x4ff6632b, + 0xfa00b5ac,0x3f5ab924 } }, + /* 158 */ + { { 0x59739efb,0x337bb0d9,0xe7ebec0d,0xc751b0f4,0x411a67d1,0x2da52dd6, + 0x2b74256e,0x8bc76887 }, + { 0x82d3d253,0xa5be3b72,0xf58d779f,0xa9f679a1,0xe16767bb,0xa1cac168, + 0x60fcf34f,0xb386f190 } }, + /* 159 */ + { { 0x2fedcfc2,0x31f3c135,0x62f8af0d,0x5396bf62,0xe57288c2,0x9a02b4ea, + 0x1b069c4d,0x4cb460f7 }, + { 0x5b8095ea,0xae67b4d3,0x6fc07603,0x92bbf859,0xb614a165,0xe1475f66, + 0x95ef5223,0x52c0d508 } }, + /* 160 */ + { { 0x15339848,0x231c210e,0x70778c8d,0xe87a28e8,0x6956e170,0x9d1de661, + 0x2bb09c0b,0x4ac3c938 }, + { 0x6998987d,0x19be0551,0xae09f4d6,0x8b2376c4,0x1a3f933d,0x1de0b765, + 0xe39705f4,0x380d94c7 } }, + /* 161 */ + { { 0x81542e75,0x01a355aa,0xee01b9b7,0x96c724a1,0x624d7087,0x6b3a2977, + 0xde2637af,0x2ce3e171 }, + { 0xf5d5bc1a,0xcfefeb49,0x2777e2b5,0xa655607e,0x9513756c,0x4feaac2f, + 0x0b624e4d,0x2e6cd852 } }, + /* 162 */ + { { 0x8c31c31d,0x3685954b,0x5bf21a0c,0x68533d00,0x75c79ec9,0x0bd7626e, + 0x42c69d54,0xca177547 }, + { 0xf6d2dbb2,0xcc6edaff,0x174a9d18,0xfd0d8cbd,0xaa4578e8,0x875e8793, + 0x9cab2ce6,0xa976a713 } }, + /* 163 */ + { { 0x93fb353d,0x0a651f1b,0x57fcfa72,0xd75cab8b,0x31b15281,0xaa88cfa7, + 0x0a1f4999,0x8720a717 }, + { 0x693e1b90,0x8c3e8d37,0x16f6dfc3,0xd345dc0b,0xb52a8742,0x8ea8d00a, + 0xc769893c,0x9719ef29 } }, + /* 164 */ + { { 0x58e35909,0x820eed8d,0x33ddc116,0x9366d8dc,0x6e205026,0xd7f999d0, + 0xe15704c1,0xa5072976 }, + { 0xc4e70b2e,0x002a37ea,0x6890aa8a,0x84dcf657,0x645b2a5c,0xcd71bf18, + 0xf7b77725,0x99389c9d } }, + /* 165 */ + { { 0x7ada7a4b,0x238c08f2,0xfd389366,0x3abe9d03,0x766f512c,0x6b672e89, + 0x202c82e4,0xa88806aa }, + { 0xd380184e,0x6602044a,0x126a8b85,0xa8cb78c4,0xad844f17,0x79d670c0, + 0x4738dcfe,0x0043bffb } }, + /* 166 */ + { { 0x36d5192e,0x8d59b5dc,0x4590b2af,0xacf885d3,0x11601781,0x83566d0a, + 0xba6c4866,0x52f3ef01 }, + { 0x0edcb64d,0x3986732a,0x8068379f,0x0a482c23,0x7040f309,0x16cbe5fa, + 0x9ef27e75,0x3296bd89 } }, + /* 167 */ + { { 0x454d81d7,0x476aba89,0x51eb9b3c,0x9eade7ef,0x81c57986,0x619a21cd, + 0xaee571e9,0x3b90febf }, + { 0x5496f7cb,0x9393023e,0x7fb51bc4,0x55be41d8,0x99beb5ce,0x03f1dd48, + 0x9f810b18,0x6e88069d } }, + /* 168 */ + { { 0xb43ea1db,0xce37ab11,0x5259d292,0x0a7ff1a9,0x8f84f186,0x851b0221, + 0xdefaad13,0xa7222bea }, + { 0x2b0a9144,0xa2ac78ec,0xf2fa59c5,0x5a024051,0x6147ce38,0x91d1eca5, + 0xbc2ac690,0xbe94d523 } }, + /* 169 */ + { { 0x0b226ce7,0x72f4945e,0x967e8b70,0xb8afd747,0x85a6c63e,0xedea46f1, + 0x9be8c766,0x7782defe }, + { 0x3db38626,0x760d2aa4,0x76f67ad1,0x460ae787,0x54499cdb,0x341b86fc, + 0xa2892e4b,0x03838567 } }, + /* 170 */ + { { 0x79ec1a0f,0x2d8daefd,0xceb39c97,0x3bbcd6fd,0x58f61a95,0xf5575ffc, + 0xadf7b420,0xdbd986c4 }, + { 0x15f39eb7,0x81aa8814,0xb98d976c,0x6ee2fcf5,0xcf2f717d,0x5465475d, + 0x6860bbd0,0x8e24d3c4 } }, + /* 171 */ + { { 0x9a587390,0x749d8e54,0x0cbec588,0x12bb194f,0xb25983c6,0x46e07da4, + 0x407bafc8,0x541a99c4 }, + { 0x624c8842,0xdb241692,0xd86c05ff,0x6044c12a,0x4f7fcf62,0xc59d14b4, + 0xf57d35d1,0xc0092c49 } }, + /* 172 */ + { { 0xdf2e61ef,0xd3cc75c3,0x2e1b35ca,0x7e8841c8,0x909f29f4,0xc62d30d1, + 0x7286944d,0x75e40634 }, + { 0xbbc237d0,0xe7d41fc5,0xec4f01c9,0xc9537bf0,0x282bd534,0x91c51a16, + 0xc7848586,0x5b7cb658 } }, + /* 173 */ + { { 0x8a28ead1,0x964a7084,0xfd3b47f6,0x802dc508,0x767e5b39,0x9ae4bfd1, + 0x8df097a1,0x7ae13eba }, + { 0xeadd384e,0xfd216ef8,0xb6b2ff06,0x0361a2d9,0x4bcdb5f3,0x204b9878, + 0xe2a8e3fd,0x787d8074 } }, + /* 174 */ + { { 0x757fbb1c,0xc5e25d6b,0xca201deb,0xe47bddb2,0x6d2233ff,0x4a55e9a3, + 0x9ef28484,0x5c222819 }, + { 0x88315250,0x773d4a85,0x827097c1,0x21b21a2b,0xdef5d33f,0xab7c4ea1, + 0xbaf0f2b0,0xe45d37ab } }, + /* 175 */ + { { 0x28511c8a,0xd2df1e34,0xbdca6cd3,0xebb229c8,0x627c39a7,0x578a71a7, + 0x84dfb9d3,0xed7bc122 }, + { 0x93dea561,0xcf22a6df,0xd48f0ed1,0x5443f18d,0x5bad23e8,0xd8b86140, + 0x45ca6d27,0xaac97cc9 } }, + /* 176 */ + { { 0xa16bd00a,0xeb54ea74,0xf5c0bcc1,0xd839e9ad,0x1f9bfc06,0x092bb7f1, + 0x1163dc4e,0x318f97b3 }, + { 0xc30d7138,0xecc0c5be,0xabc30220,0x44e8df23,0xb0223606,0x2bb7972f, + 0x9a84ff4d,0xfa41faa1 } }, + /* 177 */ + { { 0xa6642269,0x4402d974,0x9bb783bd,0xc81814ce,0x7941e60b,0x398d38e4, + 0x1d26e9e2,0x38bb6b2c }, + { 0x6a577f87,0xc64e4a25,0xdc11fe1c,0x8b52d253,0x62280728,0xff336abf, + 0xce7601a5,0x94dd0905 } }, + /* 178 */ + { { 0xde93f92a,0x156cf7dc,0x89b5f315,0xa01333cb,0xc995e750,0x02404df9, + 0xd25c2ae9,0x92077867 }, + { 0x0bf39d44,0xe2471e01,0x96bb53d7,0x5f2c9020,0x5c9c3d8f,0x4c44b7b3, + 0xd29beb51,0x81e8428b } }, + /* 179 */ + { { 0xc477199f,0x6dd9c2ba,0x6b5ecdd9,0x8cb8eeee,0xee40fd0e,0x8af7db3f, + 0xdbbfa4b1,0x1b94ab62 }, + { 0xce47f143,0x44f0d8b3,0x63f46163,0x51e623fc,0xcc599383,0xf18f270f, + 0x055590ee,0x06a38e28 } }, + /* 180 */ + { { 0xb3355b49,0x2e5b0139,0xb4ebf99b,0x20e26560,0xd269f3dc,0xc08ffa6b, + 0x83d9d4f8,0xa7b36c20 }, + { 0x1b3e8830,0x64d15c3a,0xa89f9c0b,0xd5fceae1,0xe2d16930,0xcfeee4a2, + 0xa2822a20,0xbe54c6b4 } }, + /* 181 */ + { { 0x8d91167c,0xd6cdb3df,0xe7a6625e,0x517c3f79,0x346ac7f4,0x7105648f, + 0xeae022bb,0xbf30a5ab }, + { 0x93828a68,0x8e7785be,0x7f3ef036,0x5161c332,0x592146b2,0xe11b5feb, + 0x2732d13a,0xd1c820de } }, + /* 182 */ + { { 0x9038b363,0x043e1347,0x6b05e519,0x58c11f54,0x6026cad1,0x4fe57abe, + 0x68a18da3,0xb7d17bed }, + { 0xe29c2559,0x44ca5891,0x5bfffd84,0x4f7a0376,0x74e46948,0x498de4af, + 0x6412cc64,0x3997fd5e } }, + /* 183 */ + { { 0x8bd61507,0xf2074682,0x34a64d2a,0x29e132d5,0x8a8a15e3,0xffeddfb0, + 0x3c6c13e8,0x0eeb8929 }, + { 0xa7e259f8,0xe9b69a3e,0xd13e7e67,0xce1db7e6,0xad1fa685,0x277318f6, + 0xc922b6ef,0x228916f8 } }, + /* 184 */ + { { 0x0a12ab5b,0x959ae25b,0x957bc136,0xcc11171f,0xd16e2b0c,0x8058429e, + 0x6e93097e,0xec05ad1d }, + { 0xac3f3708,0x157ba5be,0x30b59d77,0x31baf935,0x118234e5,0x47b55237, + 0x7ff11b37,0x7d314156 } }, + /* 185 */ + { { 0xf6dfefab,0x7bd9c05c,0xdcb37707,0xbe2f2268,0x3a38bb95,0xe53ead97, + 0x9bc1d7a3,0xe9ce66fc }, + { 0x6f6a02a1,0x75aa1576,0x60e600ed,0x38c087df,0x68cdc1b9,0xf8947f34, + 0x72280651,0xd9650b01 } }, + /* 186 */ + { { 0x5a057e60,0x504b4c4a,0x8def25e4,0xcbccc3be,0x17c1ccbd,0xa6353208, + 0x804eb7a2,0x14d6699a }, + { 0xdb1f411a,0x2c8a8415,0xf80d769c,0x09fbaf0b,0x1c2f77ad,0xb4deef90, + 0x0d43598a,0x6f4c6841 } }, + /* 187 */ + { { 0x96c24a96,0x8726df4e,0xfcbd99a3,0x534dbc85,0x8b2ae30a,0x3c466ef2, + 0x61189abb,0x4c4350fd }, + { 0xf855b8da,0x2967f716,0x463c38a1,0x41a42394,0xeae93343,0xc37e1413, + 0x5a3118b5,0xa726d242 } }, + /* 188 */ + { { 0x948c1086,0xdae6b3ee,0xcbd3a2e1,0xf1de503d,0x03d022f3,0x3f35ed3f, + 0xcc6cf392,0x13639e82 }, + { 0xcdafaa86,0x9ac938fb,0x2654a258,0xf45bc5fb,0x45051329,0x1963b26e, + 0xc1a335a3,0xca9365e1 } }, + /* 189 */ + { { 0x4c3b2d20,0x3615ac75,0x904e241b,0x742a5417,0xcc9d071d,0xb08521c4, + 0x970b72a5,0x9ce29c34 }, + { 0x6d3e0ad6,0x8cc81f73,0xf2f8434c,0x8060da9e,0x6ce862d9,0x35ed1d1a, + 0xab42af98,0x48c4abd7 } }, + /* 190 */ + { { 0x40c7485a,0xd221b0cc,0xe5274dbf,0xead455bb,0x9263d2e8,0x493c7698, + 0xf67b33cb,0x78017c32 }, + { 0x930cb5ee,0xb9d35769,0x0c408ed2,0xc0d14e94,0x272f1a4d,0xf8b7bf55, + 0xde5c1c04,0x53cd0454 } }, + /* 191 */ + { { 0x5d28ccac,0xbcd585fa,0x005b746e,0x5f823e56,0xcd0123aa,0x7c79f0a1, + 0xd3d7fa8f,0xeea465c1 }, + { 0x0551803b,0x7810659f,0x7ce6af70,0x6c0b599f,0x29288e70,0x4195a770, + 0x7ae69193,0x1b6e42a4 } }, + /* 192 */ + { { 0xf67d04c3,0x2e80937c,0x89eeb811,0x1e312be2,0x92594d60,0x56b5d887, + 0x187fbd3d,0x0224da14 }, + { 0x0c5fe36f,0x87abb863,0x4ef51f5f,0x580f3c60,0xb3b429ec,0x964fb1bf, + 0x42bfff33,0x60838ef0 } }, + /* 193 */ + { { 0x7e0bbe99,0x432cb2f2,0x04aa39ee,0x7bda44f3,0x9fa93903,0x5f497c7a, + 0x2d331643,0x636eb202 }, + { 0x93ae00aa,0xfcfd0e61,0x31ae6d2f,0x875a00fe,0x9f93901c,0xf43658a2, + 0x39218bac,0x8844eeb6 } }, + /* 194 */ + { { 0x6b3bae58,0x114171d2,0x17e39f3e,0x7db3df71,0x81a8eada,0xcd37bc7f, + 0x51fb789e,0x27ba83dc }, + { 0xfbf54de5,0xa7df439f,0xb5fe1a71,0x7277030b,0xdb297a48,0x42ee8e35, + 0x87f3a4ab,0xadb62d34 } }, + /* 195 */ + { { 0xa175df2a,0x9b1168a2,0x618c32e9,0x082aa04f,0x146b0916,0xc9e4f2e7, + 0x75e7c8b2,0xb990fd76 }, + { 0x4df37313,0x0829d96b,0xd0b40789,0x1c205579,0x78087711,0x66c9ae4a, + 0x4d10d18d,0x81707ef9 } }, + /* 196 */ + { { 0x03d6ff96,0x97d7cab2,0x0d843360,0x5b851bfc,0xd042db4b,0x268823c4, + 0xd5a8aa5c,0x3792daea }, + { 0x941afa0b,0x52818865,0x42d83671,0xf3e9e741,0x5be4e0a7,0x17c82527, + 0x94b001ba,0x5abd635e } }, + /* 197 */ + { { 0x0ac4927c,0x727fa84e,0xa7c8cf23,0xe3886035,0x4adca0df,0xa4bcd5ea, + 0x846ab610,0x5995bf21 }, + { 0x829dfa33,0xe90f860b,0x958fc18b,0xcaafe2ae,0x78630366,0x9b3baf44, + 0xd483411e,0x44c32ca2 } }, + /* 198 */ + { { 0xe40ed80c,0xa74a97f1,0x31d2ca82,0x5f938cb1,0x7c2d6ad9,0x53f2124b, + 0x8082a54c,0x1f2162fb }, + { 0x720b173e,0x7e467cc5,0x085f12f9,0x40e8a666,0x4c9d65dc,0x8cebc20e, + 0xc3e907c9,0x8f1d402b } }, + /* 199 */ + { { 0xfbc4058a,0x4f592f9c,0x292f5670,0xb15e14b6,0xbc1d8c57,0xc55cfe37, + 0x926edbf9,0xb1980f43 }, + { 0x32c76b09,0x98c33e09,0x33b07f78,0x1df5279d,0x863bb461,0x6f08ead4, + 0x37448e45,0x2828ad9b } }, + /* 200 */ + { { 0xc4cf4ac5,0x696722c4,0xdde64afb,0xf5ac1a3f,0xe0890832,0x0551baa2, + 0x5a14b390,0x4973f127 }, + { 0x322eac5d,0xe59d8335,0x0bd9b568,0x5e07eef5,0xa2588393,0xab36720f, + 0xdb168ac7,0x6dac8ed0 } }, + /* 201 */ + { { 0xeda835ef,0xf7b545ae,0x1d10ed51,0x4aa113d2,0x13741b09,0x035a65e0, + 0x20b9de4c,0x4b23ef59 }, + { 0x3c4c7341,0xe82bb680,0x3f58bc37,0xd457706d,0xa51e3ee8,0x73527863, + 0xddf49a4e,0x4dd71534 } }, + /* 202 */ + { { 0x95476cd9,0xbf944672,0xe31a725b,0x648d072f,0xfc4b67e0,0x1441c8b8, + 0x2f4a4dbb,0xfd317000 }, + { 0x8995d0e1,0x1cb43ff4,0x0ef729aa,0x76e695d1,0x41798982,0xe0d5f976, + 0x9569f365,0x14fac58c } }, + /* 203 */ + { { 0xf312ae18,0xad9a0065,0xfcc93fc9,0x51958dc0,0x8a7d2846,0xd9a14240, + 0x36abda50,0xed7c7651 }, + { 0x25d4abbc,0x46270f1a,0xf1a113ea,0x9b5dd8f3,0x5b51952f,0xc609b075, + 0x4d2e9f53,0xfefcb7f7 } }, + /* 204 */ + { { 0xba119185,0xbd09497a,0xaac45ba4,0xd54e8c30,0xaa521179,0x492479de, + 0x87e0d80b,0x1801a57e }, + { 0xfcafffb0,0x073d3f8d,0xae255240,0x6cf33c0b,0x5b5fdfbc,0x781d763b, + 0x1ead1064,0x9f8fc11e } }, + /* 205 */ + { { 0x5e69544c,0x1583a171,0xf04b7813,0x0eaf8567,0x278a4c32,0x1e22a8fd, + 0x3d3a69a9,0xa9d3809d }, + { 0x59a2da3b,0x936c2c2c,0x1895c847,0x38ccbcf6,0x63d50869,0x5e65244e, + 0xe1178ef7,0x3006b9ae } }, + /* 206 */ + { { 0xc9eead28,0x0bb1f2b0,0x89f4dfbc,0x7eef635d,0xb2ce8939,0x074757fd, + 0x45f8f761,0x0ab85fd7 }, + { 0x3e5b4549,0xecda7c93,0x97922f21,0x4be2bb5c,0xb43b8040,0x261a1274, + 0x11e942c2,0xb122d675 } }, + /* 207 */ + { { 0x66a5ae7a,0x3be607be,0x76adcbe3,0x01e703fa,0x4eb6e5c5,0xaf904301, + 0x097dbaec,0x9f599dc1 }, + { 0x0ff250ed,0x6d75b718,0x349a20dc,0x8eb91574,0x10b227a3,0x425605a4, + 0x8a294b78,0x7d5528e0 } }, + /* 208 */ + { { 0x20c26def,0xf0f58f66,0x582b2d1e,0x025585ea,0x01ce3881,0xfbe7d79b, + 0x303f1730,0x28ccea01 }, + { 0x79644ba5,0xd1dabcd1,0x06fff0b8,0x1fc643e8,0x66b3e17b,0xa60a76fc, + 0xa1d013bf,0xc18baf48 } }, + /* 209 */ + { { 0x5dc4216d,0x34e638c8,0x206142ac,0x00c01067,0x95f5064a,0xd453a171, + 0xb7a9596b,0x9def809d }, + { 0x67ab8d2c,0x41e8642e,0x6237a2b6,0xb4240433,0x64c4218b,0x7d506a6d, + 0x68808ce5,0x0357f8b0 } }, + /* 210 */ + { { 0x4cd2cc88,0x8e9dbe64,0xf0b8f39d,0xcc61c28d,0xcd30a0c8,0x4a309874, + 0x1b489887,0xe4a01add }, + { 0xf57cd8f9,0x2ed1eeac,0xbd594c48,0x1b767d3e,0x7bd2f787,0xa7295c71, + 0xce10cc30,0x466d7d79 } }, + /* 211 */ + { { 0x9dada2c7,0x47d31892,0x8f9aa27d,0x4fa0a6c3,0x820a59e1,0x90e4fd28, + 0x451ead1a,0xc672a522 }, + { 0x5d86b655,0x30607cc8,0xf9ad4af1,0xf0235d3b,0x571172a6,0x99a08680, + 0xf2a67513,0x5e3d64fa } }, + /* 212 */ + { { 0x9b3b4416,0xaa6410c7,0xeab26d99,0xcd8fcf85,0xdb656a74,0x5ebff74a, + 0xeb8e42fc,0x6c8a7a95 }, + { 0xb02a63bd,0x10c60ba7,0x8b8f0047,0x6b2f2303,0x312d90b0,0x8c6c3738, + 0xad82ca91,0x348ae422 } }, + /* 213 */ + { { 0x5ccda2fb,0x7f474663,0x8e0726d2,0x22accaa1,0x492b1f20,0x85adf782, + 0xd9ef2d2e,0xc1074de0 }, + { 0xae9a65b3,0xfcf3ce44,0x05d7151b,0xfd71e4ac,0xce6a9788,0xd4711f50, + 0xc9e54ffc,0xfbadfbdb } }, + /* 214 */ + { { 0x20a99363,0x1713f1cd,0x6cf22775,0xb915658f,0x24d359b2,0x968175cd, + 0x83716fcd,0xb7f976b4 }, + { 0x5d6dbf74,0x5758e24d,0x71c3af36,0x8d23bafd,0x0243dfe3,0x48f47760, + 0xcafcc805,0xf4d41b2e } }, + /* 215 */ + { { 0xfdabd48d,0x51f1cf28,0x32c078a4,0xce81be36,0x117146e9,0x6ace2974, + 0xe0160f10,0x180824ea }, + { 0x66e58358,0x0387698b,0xce6ca358,0x63568752,0x5e41e6c5,0x82380e34, + 0x83cf6d25,0x67e5f639 } }, + /* 216 */ + { { 0xcf4899ef,0xf89ccb8d,0x9ebb44c0,0x949015f0,0xb2598ec9,0x546f9276, + 0x04c11fc6,0x9fef789a }, + { 0x53d2a071,0x6d367ecf,0xa4519b09,0xb10e1a7f,0x611e2eef,0xca6b3fb0, + 0xa99c4e20,0xbc80c181 } }, + /* 217 */ + { { 0xe5eb82e6,0x972536f8,0xf56cb920,0x1a484fc7,0x50b5da5e,0xc78e2171, + 0x9f8cdf10,0x49270e62 }, + { 0xea6b50ad,0x1a39b7bb,0xa2388ffc,0x9a0284c1,0x8107197b,0x5403eb17, + 0x61372f7f,0xd2ee52f9 } }, + /* 218 */ + { { 0x88e0362a,0xd37cd285,0x8fa5d94d,0x442fa8a7,0xa434a526,0xaff836e5, + 0xe5abb733,0xdfb478be }, + { 0x673eede6,0xa91f1ce7,0x2b5b2f04,0xa5390ad4,0x5530da2f,0x5e66f7bf, + 0x08df473a,0xd9a140b4 } }, + /* 219 */ + { { 0x6e8ea498,0x0e0221b5,0x3563ee09,0x62347829,0x335d2ade,0xe06b8391, + 0x623f4b1a,0x760c058d }, + { 0xc198aa79,0x0b89b58c,0xf07aba7f,0xf74890d2,0xfde2556a,0x4e204110, + 0x8f190409,0x7141982d } }, + /* 220 */ + { { 0x4d4b0f45,0x6f0a0e33,0x392a94e1,0xd9280b38,0xb3c61d5e,0x3af324c6, + 0x89d54e47,0x3af9d1ce }, + { 0x20930371,0xfd8f7981,0x21c17097,0xeda2664c,0xdc42309b,0x0e9545dc, + 0x73957dd6,0xb1f815c3 } }, + /* 221 */ + { { 0x89fec44a,0x84faa78e,0x3caa4caf,0xc8c2ae47,0xc1b6a624,0x691c807d, + 0x1543f052,0xa41aed14 }, + { 0x7d5ffe04,0x42435399,0x625b6e20,0x8bacb2df,0x87817775,0x85d660be, + 0x86fb60ef,0xd6e9c1dd } }, + /* 222 */ + { { 0xc6853264,0x3aa2e97e,0xe2304a0b,0x771533b7,0xb8eae9be,0x1b912bb7, + 0xae9bf8c2,0x9c9c6e10 }, + { 0xe030b74c,0xa2309a59,0x6a631e90,0x4ed7494d,0xa49b79f2,0x89f44b23, + 0x40fa61b6,0x566bd596 } }, + /* 223 */ + { { 0xc18061f3,0x066c0118,0x7c83fc70,0x190b25d3,0x27273245,0xf05fc8e0, + 0xf525345e,0xcf2c7390 }, + { 0x10eb30cf,0xa09bceb4,0x0d77703a,0xcfd2ebba,0x150ff255,0xe842c43a, + 0x8aa20979,0x02f51755 } }, + /* 224 */ + { { 0xaddb7d07,0x396ef794,0x24455500,0x0b4fc742,0xc78aa3ce,0xfaff8eac, + 0xe8d4d97d,0x14e9ada5 }, + { 0x2f7079e2,0xdaa480a1,0xe4b0800e,0x45baa3cd,0x7838157d,0x01765e2d, + 0x8e9d9ae8,0xa0ad4fab } }, + /* 225 */ + { { 0x4a653618,0x0bfb7621,0x31eaaa5f,0x1872813c,0x44949d5e,0x1553e737, + 0x6e56ed1e,0xbcd530b8 }, + { 0x32e9c47b,0x169be853,0xb50059ab,0xdc2776fe,0x192bfbb4,0xcdba9761, + 0x6979341d,0x909283cf } }, + /* 226 */ + { { 0x76e81a13,0x67b00324,0x62171239,0x9bee1a99,0xd32e19d6,0x08ed361b, + 0xace1549a,0x35eeb7c9 }, + { 0x7e4e5bdc,0x1280ae5a,0xb6ceec6e,0x2dcd2cd3,0x6e266bc1,0x52e4224c, + 0x448ae864,0x9a8b2cf4 } }, + /* 227 */ + { { 0x09d03b59,0xf6471bf2,0xb65af2ab,0xc90e62a3,0xebd5eec9,0xff7ff168, + 0xd4491379,0x6bdb60f4 }, + { 0x8a55bc30,0xdadafebc,0x10097fe0,0xc79ead16,0x4c1e3bdd,0x42e19741, + 0x94ba08a9,0x01ec3cfd } }, + /* 228 */ + { { 0xdc9485c2,0xba6277eb,0x22fb10c7,0x48cc9a79,0x70a28d8a,0x4f61d60f, + 0x475464f6,0xd1acb1c0 }, + { 0x26f36612,0xd26902b1,0xe0618d8b,0x59c3a44e,0x308357ee,0x4df8a813, + 0x405626c2,0x7dcd079d } }, + /* 229 */ + { { 0xf05a4b48,0x5ce7d4d3,0x37230772,0xadcd2952,0x812a915a,0xd18f7971, + 0x377d19b8,0x0bf53589 }, + { 0x6c68ea73,0x35ecd95a,0x823a584d,0xc7f3bbca,0xf473a723,0x9fb674c6, + 0xe16686fc,0xd28be4d9 } }, + /* 230 */ + { { 0x38fa8e4b,0x5d2b9906,0x893fd8fc,0x559f186e,0x436fb6fc,0x3a6de2aa, + 0x510f88ce,0xd76007aa }, + { 0x523a4988,0x2d10aab6,0x74dd0273,0xb455cf44,0xa3407278,0x7f467082, + 0xb303bb01,0xf2b52f68 } }, + /* 231 */ + { { 0x9835b4ca,0x0d57eafa,0xbb669cbc,0x2d2232fc,0xc6643198,0x8eeeb680, + 0xcc5aed3a,0xd8dbe98e }, + { 0xc5a02709,0xcba9be3f,0xf5ba1fa8,0x30be68e5,0xf10ea852,0xfebd43cd, + 0xee559705,0xe01593a3 } }, + /* 232 */ + { { 0xea75a0a6,0xd3e5af50,0x57858033,0x512226ac,0xd0176406,0x6fe6d50f, + 0xaeb8ef06,0xafec07b1 }, + { 0x80bb0a31,0x7fb99567,0x37309aae,0x6f1af3cc,0x01abf389,0x9153a15a, + 0x6e2dbfdd,0xa71b9354 } }, + /* 233 */ + { { 0x18f593d2,0xbf8e12e0,0xa078122b,0xd1a90428,0x0ba4f2ad,0x150505db, + 0x628523d9,0x53a2005c }, + { 0xe7f2b935,0x07c8b639,0xc182961a,0x2bff975a,0x7518ca2c,0x86bceea7, + 0x3d588e3d,0xbf47d19b } }, + /* 234 */ + { { 0xdd7665d5,0x672967a7,0x2f2f4de5,0x4e303057,0x80d4903f,0x144005ae, + 0x39c9a1b6,0x001c2c7f }, + { 0x69efc6d6,0x143a8014,0x7bc7a724,0xc810bdaa,0xa78150a4,0x5f65670b, + 0x86ffb99b,0xfdadf8e7 } }, + /* 235 */ + { { 0xffc00785,0xfd38cb88,0x3b48eb67,0x77fa7591,0xbf368fbc,0x0454d055, + 0x5aa43c94,0x3a838e4d }, + { 0x3e97bb9a,0x56166329,0x441d94d9,0x9eb93363,0x0adb2a83,0x515591a6, + 0x873e1da3,0x3cdb8257 } }, + /* 236 */ + { { 0x7de77eab,0x137140a9,0x41648109,0xf7e1c50d,0xceb1d0df,0x762dcad2, + 0xf1f57fba,0x5a60cc89 }, + { 0x40d45673,0x80b36382,0x5913c655,0x1b82be19,0xdd64b741,0x057284b8, + 0xdbfd8fc0,0x922ff56f } }, + /* 237 */ + { { 0xc9a129a1,0x1b265dee,0xcc284e04,0xa5b1ce57,0xcebfbe3c,0x04380c46, + 0xf6c5cd62,0x72919a7d }, + { 0x8fb90f9a,0x298f453a,0x88e4031b,0xd719c00b,0x796f1856,0xe32c0e77, + 0x3624089a,0x5e791780 } }, + /* 238 */ + { { 0x7f63cdfb,0x5c16ec55,0xf1cae4fd,0x8e6a3571,0x560597ca,0xfce26bea, + 0xe24c2fab,0x4e0a5371 }, + { 0xa5765357,0x276a40d3,0x0d73a2b4,0x3c89af44,0x41d11a32,0xb8f370ae, + 0xd56604ee,0xf5ff7818 } }, + /* 239 */ + { { 0x1a09df21,0xfbf3e3fe,0xe66e8e47,0x26d5d28e,0x29c89015,0x2096bd0a, + 0x533f5e64,0xe41df0e9 }, + { 0xb3ba9e3f,0x305fda40,0x2604d895,0xf2340ceb,0x7f0367c7,0x0866e192, + 0xac4f155f,0x8edd7d6e } }, + /* 240 */ + { { 0x0bfc8ff3,0xc9a1dc0e,0xe936f42f,0x14efd82b,0xcca381ef,0x67016f7c, + 0xed8aee96,0x1432c1ca }, + { 0x70b23c26,0xec684829,0x0735b273,0xa64fe873,0xeaef0f5a,0xe389f6e5, + 0x5ac8d2c6,0xcaef480b } }, + /* 241 */ + { { 0x75315922,0x5245c978,0x3063cca5,0xd8295171,0xb64ef2cb,0xf3ce60d0, + 0x8efae236,0xd0ba177e }, + { 0xb1b3af60,0x53a9ae8f,0x3d2da20e,0x1a796ae5,0xdf9eef28,0x01d63605, + 0x1c54ae16,0xf31c957c } }, + /* 242 */ + { { 0x49cc4597,0xc0f58d52,0xbae0a028,0xdc5015b0,0x734a814a,0xefc5fc55, + 0x96e17c3a,0x013404cb }, + { 0xc9a824bf,0xb29e2585,0x001eaed7,0xd593185e,0x61ef68ac,0x8d6ee682, + 0x91933e6c,0x6f377c4b } }, + /* 243 */ + { { 0xa8333fd2,0x9f93bad1,0x5a2a95b8,0xa8930202,0xeaf75ace,0x211e5037, + 0xd2d09506,0x6dba3e4e }, + { 0xd04399cd,0xa48ef98c,0xe6b73ade,0x1811c66e,0xc17ecaf3,0x72f60752, + 0x3becf4a7,0xf13cf342 } }, + /* 244 */ + { { 0xa919e2eb,0xceeb9ec0,0xf62c0f68,0x83a9a195,0x7aba2299,0xcfba3bb6, + 0x274bbad3,0xc83fa9a9 }, + { 0x62fa1ce0,0x0d7d1b0b,0x3418efbf,0xe58b60f5,0x52706f04,0xbfa8ef9e, + 0x5d702683,0xb49d70f4 } }, + /* 245 */ + { { 0xfad5513b,0x914c7510,0xb1751e2d,0x05f32eec,0xd9fb9d59,0x6d850418, + 0x0c30f1cf,0x59cfadbb }, + { 0x55cb7fd6,0xe167ac23,0x820426a3,0x249367b8,0x90a78864,0xeaeec58c, + 0x354a4b67,0x5babf362 } }, + /* 246 */ + { { 0xee424865,0x37c981d1,0xf2e5577f,0x8b002878,0xb9e0c058,0x702970f1, + 0x9026c8f0,0x6188c6a7 }, + { 0xd0f244da,0x06f9a19b,0xfb080873,0x1ecced5c,0x9f213637,0x35470f9b, + 0xdf50b9d9,0x993fe475 } }, + /* 247 */ + { { 0x9b2c3609,0x68e31cdf,0x2c46d4ea,0x84eb19c0,0x9a775101,0x7ac9ec1a, + 0x4c80616b,0x81f76466 }, + { 0x75fbe978,0x1d7c2a5a,0xf183b356,0x6743fed3,0x501dd2bf,0x838d1f04, + 0x5fe9060d,0x564a812a } }, + /* 248 */ + { { 0xfa817d1d,0x7a5a64f4,0xbea82e0f,0x55f96844,0xcd57f9aa,0xb5ff5a0f, + 0x00e51d6c,0x226bf3cf }, + { 0x2f2833cf,0xd6d1a9f9,0x4f4f89a8,0x20a0a35a,0x8f3f7f77,0x11536c49, + 0xff257836,0x68779f47 } }, + /* 249 */ + { { 0x73043d08,0x79b0c1c1,0x1fc020fa,0xa5446774,0x9a6d26d0,0xd3767e28, + 0xeb092e0b,0x97bcb0d1 }, + { 0xf32ed3c3,0x2ab6eaa8,0xb281bc48,0xc8a4f151,0xbfa178f3,0x4d1bf4f3, + 0x0a784655,0xa872ffe8 } }, + /* 250 */ + { { 0xa32b2086,0xb1ab7935,0x8160f486,0xe1eb710e,0x3b6ae6be,0x9bd0cd91, + 0xb732a36a,0x02812bfc }, + { 0xcf605318,0xa63fd7ca,0xfdfd6d1d,0x646e5d50,0x2102d619,0xa1d68398, + 0xfe5396af,0x07391cc9 } }, + /* 251 */ + { { 0x8b80d02b,0xc50157f0,0x62877f7f,0x6b8333d1,0x78d542ae,0x7aca1af8, + 0x7e6d2a08,0x355d2adc }, + { 0x287386e1,0xb41f335a,0xf8e43275,0xfd272a94,0xe79989ea,0x286ca2cd, + 0x7c2a3a79,0x3dc2b1e3 } }, + /* 252 */ + { { 0x04581352,0xd689d21c,0x376782be,0x0a00c825,0x9fed701f,0x203bd590, + 0x3ccd846b,0xc4786910 }, + { 0x24c768ed,0x5dba7708,0x6841f657,0x72feea02,0x6accce0e,0x73313ed5, + 0xd5bb4d32,0xccc42968 } }, + /* 253 */ + { { 0x3d7620b9,0x94e50de1,0x5992a56a,0xd89a5c8a,0x675487c9,0xdc007640, + 0xaa4871cf,0xe147eb42 }, + { 0xacf3ae46,0x274ab4ee,0x50350fbe,0xfd4936fb,0x48c840ea,0xdf2afe47, + 0x080e96e3,0x239ac047 } }, + /* 254 */ + { { 0x2bfee8d4,0x481d1f35,0xfa7b0fec,0xce80b5cf,0x2ce9af3c,0x105c4c9e, + 0xf5f7e59d,0xc55fa1a3 }, + { 0x8257c227,0x3186f14e,0x342be00b,0xc5b1653f,0xaa904fb2,0x09afc998, + 0xd4f4b699,0x094cd99c } }, + /* 255 */ + { { 0xd703beba,0x8a981c84,0x32ceb291,0x8631d150,0xe3bd49ec,0xa445f2c9, + 0x42abad33,0xb90a30b6 }, + { 0xb4a5abf9,0xb465404f,0x75db7603,0x004750c3,0xca35d89f,0x6f9a42cc, + 0x1b7924f7,0x019f8b9a } }, +}; + +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_base_8(sp_point_256* r, const sp_digit* k, + int map, void* heap) +{ + return sp_256_ecc_mulmod_stripe_8(r, &p256_base, p256_table, + k, map, heap); +} + +#endif + +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 p; + sp_digit kd[8]; +#endif + sp_point_256* point; + sp_digit* k = NULL; + int err = MP_OKAY; + + err = sp_256_point_new_8(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) { + err = MEMORY_E; + } + } +#else + k = kd; +#endif + if (err == MP_OKAY) { + sp_256_from_mp(k, 8, km); + + err = sp_256_ecc_mulmod_base_8(point, k, map, heap); + } + if (err == MP_OKAY) { + err = sp_256_point_to_ecc_point_8(point, r); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_8(point, 0, heap); + + return err; +} + +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_256_iszero_8(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7]) == 0; +} + +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */ +/* Add 1 to a. (a = a + 1) + * + * a A single precision integer. + */ +SP_NOINLINE static void sp_256_add_one_8(sp_digit* a) +{ + __asm__ __volatile__ ( + "mov r2, #1\n\t" + "ldr r1, [%[a], #0]\n\t" + "adds r1, r1, r2\n\t" + "mov r2, #0\n\t" + "str r1, [%[a], #0]\n\t" + "ldr r1, [%[a], #4]\n\t" + "adcs r1, r1, r2\n\t" + "str r1, [%[a], #4]\n\t" + "ldr r1, [%[a], #8]\n\t" + "adcs r1, r1, r2\n\t" + "str r1, [%[a], #8]\n\t" + "ldr r1, [%[a], #12]\n\t" + "adcs r1, r1, r2\n\t" + "str r1, [%[a], #12]\n\t" + "ldr r1, [%[a], #16]\n\t" + "adcs r1, r1, r2\n\t" + "str r1, [%[a], #16]\n\t" + "ldr r1, [%[a], #20]\n\t" + "adcs r1, r1, r2\n\t" + "str r1, [%[a], #20]\n\t" + "ldr r1, [%[a], #24]\n\t" + "adcs r1, r1, r2\n\t" + "str r1, [%[a], #24]\n\t" + "ldr r1, [%[a], #28]\n\t" + "adcs r1, r1, r2\n\t" + "str r1, [%[a], #28]\n\t" + : + : [a] "r" (a) + : "memory", "r1", "r2" + ); +} + +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_256_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((sp_digit)a[i]) << s); + if (s >= 24U) { + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (sp_digit)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Generates a scalar that is in the range 1..order-1. + * + * rng Random number generator. + * k Scalar value. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +static int sp_256_ecc_gen_k_8(WC_RNG* rng, sp_digit* k) +{ + int err; + byte buf[32]; + + do { + err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf)); + if (err == 0) { + sp_256_from_bin(k, 8, buf, (int)sizeof(buf)); + if (sp_256_cmp_8(k, p256_order2) < 0) { + sp_256_add_one_8(k); + break; + } + } + } + while (err == 0); + + return err; +} + +/* Makes a random EC key pair. + * + * rng Random number generator. + * priv Generated private value. + * pub Generated public point. + * heap Heap to use for allocation. + * returns ECC_INF_E when the point does not have the correct order, RNG + * failures, MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 p; + sp_digit kd[8]; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_256 inf; +#endif +#endif + sp_point_256* point; + sp_digit* k = NULL; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_256* infinity; +#endif + int err; + + (void)heap; + + err = sp_256_point_new_8(heap, p, point); +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + if (err == MP_OKAY) { + err = sp_256_point_new_8(heap, inf, infinity); + } +#endif +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) { + err = MEMORY_E; + } + } +#else + k = kd; +#endif + + if (err == MP_OKAY) { + err = sp_256_ecc_gen_k_8(rng, k); + } + if (err == MP_OKAY) { + err = sp_256_ecc_mulmod_base_8(point, k, 1, NULL); + } + +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + if (err == MP_OKAY) { + err = sp_256_ecc_mulmod_8(infinity, point, p256_order, 1, NULL); + } + if (err == MP_OKAY) { + if ((sp_256_iszero_8(point->x) == 0) || (sp_256_iszero_8(point->y) == 0)) { + err = ECC_INF_E; + } + } +#endif + + if (err == MP_OKAY) { + err = sp_256_to_mp(k, priv); + } + if (err == MP_OKAY) { + err = sp_256_point_to_ecc_point_8(point, pub); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_256_point_free_8(infinity, 1, heap); +#endif + sp_256_point_free_8(point, 1, heap); + + return err; +} + +#ifdef HAVE_ECC_DHE +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 32 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_256_to_bin(sp_digit* r, byte* a) +{ + int i, j, s = 0, b; + + j = 256 / 8 - 1; + a[j] = 0; + for (i=0; i<8 && j>=0; i++) { + b = 0; + /* lint allow cast of mismatch sp_digit and int */ + a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ + b += 8 - s; + if (j < 0) { + break; + } + while (b < 32) { + a[j--] = (byte)(r[i] >> b); + b += 8; + if (j < 0) { + break; + } + } + s = 8 - (b - 32); + if (j >= 0) { + a[j] = 0; + } + if (s != 0) { + j++; + } + } +} + +/* Multiply the point by the scalar and serialize the X ordinate. + * The number is 0 padded to maximum size on output. + * + * priv Scalar to multiply the point by. + * pub Point to multiply. + * out Buffer to hold X ordinate. + * outLen On entry, size of the buffer in bytes. + * On exit, length of data in buffer in bytes. + * heap Heap to use for allocation. + * returns BUFFER_E if the buffer is to small for output size, + * MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out, + word32* outLen, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 p; + sp_digit kd[8]; +#endif + sp_point_256* point = NULL; + sp_digit* k = NULL; + int err = MP_OKAY; + + if (*outLen < 32U) { + err = BUFFER_E; + } + + if (err == MP_OKAY) { + err = sp_256_point_new_8(heap, p, point); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#else + k = kd; +#endif + + if (err == MP_OKAY) { + sp_256_from_mp(k, 8, priv); + sp_256_point_from_ecc_point_8(point, pub); + err = sp_256_ecc_mulmod_8(point, point, k, 1, heap); + } + if (err == MP_OKAY) { + sp_256_to_bin(point->x, out); + *outLen = 32; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_8(point, 0, heap); + + return err; +} +#endif /* HAVE_ECC_DHE */ + +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into a. (a -= b) + * + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_256_sub_in_place_8(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + __asm__ __volatile__ ( + "mov r8, %[a]\n\t" + "add r8, r8, #32\n\t" + "\n1:\n\t" + "mov r5, #0\n\t" + "subs r5, r5, %[c]\n\t" + "ldr r3, [%[a]]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b]]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a]]\n\t" + "str r4, [%[a], #4]\n\t" + "sbc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #8\n\t" + "add %[b], %[b], #8\n\t" + "cmp %[a], r8\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r8" + ); + + return c; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_256_sub_in_place_8(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "subs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "sbc %[c], %[c], %[c]\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +SP_NOINLINE static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, + sp_digit b) +{ + __asm__ __volatile__ ( + "add r9, %[a], #32\n\t" + /* A[0] * B */ + "ldr r6, [%[a]], #4\n\t" + "umull r5, r3, r6, %[b]\n\t" + "mov r4, #0\n\t" + "str r5, [%[r]], #4\n\t" + /* A[0] * B - Done */ + "\n1:\n\t" + "mov r5, #0\n\t" + /* A[] * B */ + "ldr r6, [%[a]], #4\n\t" + "umull r6, r8, r6, %[b]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[] * B - Done */ + "str r3, [%[r]], #4\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "cmp %[a], r9\n\t" + "blt 1b\n\t" + "str r3, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a) + : [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r8", "r9" + ); +} + +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +SP_NOINLINE static sp_digit div_256_word_8(sp_digit d1, sp_digit d0, + sp_digit div) +{ + sp_digit r = 0; + + __asm__ __volatile__ ( + "lsr r6, %[div], #16\n\t" + "add r6, r6, #1\n\t" + "udiv r4, %[d1], r6\n\t" + "lsl r8, r4, #16\n\t" + "umull r4, r5, %[div], r8\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "udiv r5, %[d1], r6\n\t" + "lsl r4, r5, #16\n\t" + "add r8, r8, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "lsl r4, %[d1], #16\n\t" + "orr r4, r4, %[d0], lsr #16\n\t" + "udiv r4, r4, r6\n\t" + "add r8, r8, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "lsl r4, %[d1], #16\n\t" + "orr r4, r4, %[d0], lsr #16\n\t" + "udiv r4, r4, r6\n\t" + "add r8, r8, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "udiv r4, %[d0], %[div]\n\t" + "add r8, r8, r4\n\t" + "mov %[r], r8\n\t" + : [r] "+r" (r) + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) + : "r4", "r5", "r6", "r8" + ); + return r; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_256_mask_8(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<8; i++) { + r[i] = a[i] & m; + } +#else + r[0] = a[0] & m; + r[1] = a[1] & m; + r[2] = a[2] & m; + r[3] = a[3] & m; + r[4] = a[4] & m; + r[5] = a[5] & m; + r[6] = a[6] & m; + r[7] = a[7] & m; +#endif +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_256_div_8(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[16], t2[9]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[7]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 8); + for (i=7; i>=0; i--) { + r1 = div_256_word_8(t1[8 + i], t1[8 + i - 1], div); + + sp_256_mul_d_8(t2, d, r1); + t1[8 + i] += sp_256_sub_in_place_8(&t1[i], t2); + t1[8 + i] -= t2[8]; + sp_256_mask_8(t2, d, t1[8 + i]); + t1[8 + i] += sp_256_add_8(&t1[i], &t1[i], t2); + sp_256_mask_8(t2, d, t1[8 + i]); + t1[8 + i] += sp_256_add_8(&t1[i], &t1[i], t2); + } + + r1 = sp_256_cmp_8(t1, d) >= 0; + sp_256_cond_sub_8(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_256_mod_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_256_div_8(a, m, NULL, r); +} + +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#ifdef WOLFSSL_SP_SMALL +/* Order-2 for the P256 curve. */ +static const uint32_t p256_order_minus_2[8] = { + 0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU,0xffffffffU,0xffffffffU, + 0x00000000U,0xffffffffU +}; +#else +/* The low half of the order-2 of the P256 curve. */ +static const uint32_t p256_order_low[4] = { + 0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU +}; +#endif /* WOLFSSL_SP_SMALL */ + +/* Multiply two number mod the order of P256 curve. (r = a * b mod order) + * + * r Result of the multiplication. + * a First operand of the multiplication. + * b Second operand of the multiplication. + */ +static void sp_256_mont_mul_order_8(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_256_mul_8(r, a, b); + sp_256_mont_reduce_order_8(r, p256_order, p256_mp_order); +} + +/* Square number mod the order of P256 curve. (r = a * a mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_256_mont_sqr_order_8(sp_digit* r, const sp_digit* a) +{ + sp_256_sqr_8(r, a); + sp_256_mont_reduce_order_8(r, p256_order, p256_mp_order); +} + +#ifndef WOLFSSL_SP_SMALL +/* Square number mod the order of P256 curve a number of times. + * (r = a ^ n mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_256_mont_sqr_n_order_8(sp_digit* r, const sp_digit* a, int n) +{ + int i; + + sp_256_mont_sqr_order_8(r, a); + for (i=1; i=0; i--) { + sp_256_mont_sqr_order_8(t, t); + if ((p256_order_minus_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_256_mont_mul_order_8(t, t, a); + } + } + XMEMCPY(r, t, sizeof(sp_digit) * 8U); +#else + sp_digit* t = td; + sp_digit* t2 = td + 2 * 8; + sp_digit* t3 = td + 4 * 8; + int i; + + /* t = a^2 */ + sp_256_mont_sqr_order_8(t, a); + /* t = a^3 = t * a */ + sp_256_mont_mul_order_8(t, t, a); + /* t2= a^c = t ^ 2 ^ 2 */ + sp_256_mont_sqr_n_order_8(t2, t, 2); + /* t3= a^f = t2 * t */ + sp_256_mont_mul_order_8(t3, t2, t); + /* t2= a^f0 = t3 ^ 2 ^ 4 */ + sp_256_mont_sqr_n_order_8(t2, t3, 4); + /* t = a^ff = t2 * t3 */ + sp_256_mont_mul_order_8(t, t2, t3); + /* t3= a^ff00 = t ^ 2 ^ 8 */ + sp_256_mont_sqr_n_order_8(t2, t, 8); + /* t = a^ffff = t2 * t */ + sp_256_mont_mul_order_8(t, t2, t); + /* t2= a^ffff0000 = t ^ 2 ^ 16 */ + sp_256_mont_sqr_n_order_8(t2, t, 16); + /* t = a^ffffffff = t2 * t */ + sp_256_mont_mul_order_8(t, t2, t); + /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64 */ + sp_256_mont_sqr_n_order_8(t2, t, 64); + /* t2= a^ffffffff00000000ffffffff = t2 * t */ + sp_256_mont_mul_order_8(t2, t2, t); + /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32 */ + sp_256_mont_sqr_n_order_8(t2, t2, 32); + /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */ + sp_256_mont_mul_order_8(t2, t2, t); + /* t2= a^ffffffff00000000ffffffffffffffffbce6 */ + for (i=127; i>=112; i--) { + sp_256_mont_sqr_order_8(t2, t2); + if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_256_mont_mul_order_8(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6f */ + sp_256_mont_sqr_n_order_8(t2, t2, 4); + sp_256_mont_mul_order_8(t2, t2, t3); + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */ + for (i=107; i>=64; i--) { + sp_256_mont_sqr_order_8(t2, t2); + if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_256_mont_mul_order_8(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */ + sp_256_mont_sqr_n_order_8(t2, t2, 4); + sp_256_mont_mul_order_8(t2, t2, t3); + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */ + for (i=59; i>=32; i--) { + sp_256_mont_sqr_order_8(t2, t2); + if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_256_mont_mul_order_8(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */ + sp_256_mont_sqr_n_order_8(t2, t2, 4); + sp_256_mont_mul_order_8(t2, t2, t3); + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */ + for (i=27; i>=0; i--) { + sp_256_mont_sqr_order_8(t2, t2); + if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_256_mont_mul_order_8(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */ + sp_256_mont_sqr_n_order_8(t2, t2, 4); + /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */ + sp_256_mont_mul_order_8(r, t2, t3); +#endif /* WOLFSSL_SP_SMALL */ +} + +#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */ +#ifdef HAVE_ECC_SIGN +#ifndef SP_ECC_MAX_SIG_GEN +#define SP_ECC_MAX_SIG_GEN 64 +#endif + +/* Sign the hash using the private key. + * e = [hash, 256 bits] from binary + * r = (k.G)->x mod order + * s = (r * x + e) / k mod order + * The hash is truncated to the first 256 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv, + mp_int* rm, mp_int* sm, mp_int* km, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit ed[2*8]; + sp_digit xd[2*8]; + sp_digit kd[2*8]; + sp_digit rd[2*8]; + sp_digit td[3 * 2*8]; + sp_point_256 p; +#endif + sp_digit* e = NULL; + sp_digit* x = NULL; + sp_digit* k = NULL; + sp_digit* r = NULL; + sp_digit* tmp = NULL; + sp_point_256* point = NULL; + sp_digit carry; + sp_digit* s = NULL; + sp_digit* kInv = NULL; + int err = MP_OKAY; + int32_t c; + int i; + + (void)heap; + + err = sp_256_point_new_8(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 8, heap, + DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + e = d + 0 * 8; + x = d + 2 * 8; + k = d + 4 * 8; + r = d + 6 * 8; + tmp = d + 8 * 8; +#else + e = ed; + x = xd; + k = kd; + r = rd; + tmp = td; +#endif + s = e; + kInv = k; + + if (hashLen > 32U) { + hashLen = 32U; + } + + sp_256_from_bin(e, 8, hash, (int)hashLen); + } + + for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) { + sp_256_from_mp(x, 8, priv); + + /* New random point. */ + if (km == NULL || mp_iszero(km)) { + err = sp_256_ecc_gen_k_8(rng, k); + } + else { + sp_256_from_mp(k, 8, km); + mp_zero(km); + } + if (err == MP_OKAY) { + err = sp_256_ecc_mulmod_base_8(point, k, 1, NULL); + } + + if (err == MP_OKAY) { + /* r = point->x mod order */ + XMEMCPY(r, point->x, sizeof(sp_digit) * 8U); + sp_256_norm_8(r); + c = sp_256_cmp_8(r, p256_order); + sp_256_cond_sub_8(r, r, p256_order, 0L - (sp_digit)(c >= 0)); + sp_256_norm_8(r); + + /* Conv k to Montgomery form (mod order) */ + sp_256_mul_8(k, k, p256_norm_order); + err = sp_256_mod_8(k, k, p256_order); + } + if (err == MP_OKAY) { + sp_256_norm_8(k); + /* kInv = 1/k mod order */ + sp_256_mont_inv_order_8(kInv, k, tmp); + sp_256_norm_8(kInv); + + /* s = r * x + e */ + sp_256_mul_8(x, x, r); + err = sp_256_mod_8(x, x, p256_order); + } + if (err == MP_OKAY) { + sp_256_norm_8(x); + carry = sp_256_add_8(s, e, x); + sp_256_cond_sub_8(s, s, p256_order, 0 - carry); + sp_256_norm_8(s); + c = sp_256_cmp_8(s, p256_order); + sp_256_cond_sub_8(s, s, p256_order, 0L - (sp_digit)(c >= 0)); + sp_256_norm_8(s); + + /* s = s * k^-1 mod order */ + sp_256_mont_mul_order_8(s, s, kInv); + sp_256_norm_8(s); + + /* Check that signature is usable. */ + if (sp_256_iszero_8(s) == 0) { + break; + } + } + } + + if (i == 0) { + err = RNG_FAILURE_E; + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(r, rm); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(s, sm); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 8 * 8); + XFREE(d, heap, DYNAMIC_TYPE_ECC); + } +#else + XMEMSET(e, 0, sizeof(sp_digit) * 2U * 8U); + XMEMSET(x, 0, sizeof(sp_digit) * 2U * 8U); + XMEMSET(k, 0, sizeof(sp_digit) * 2U * 8U); + XMEMSET(r, 0, sizeof(sp_digit) * 2U * 8U); + XMEMSET(r, 0, sizeof(sp_digit) * 2U * 8U); + XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 8U); +#endif + sp_256_point_free_8(point, 1, heap); + + return err; +} +#endif /* HAVE_ECC_SIGN */ + +#ifdef HAVE_ECC_VERIFY +/* Verify the signature values with the hash and public key. + * e = Truncate(hash, 256) + * u1 = e/s mod order + * u2 = r/s mod order + * r == (u1.G + u2.Q)->x mod order + * Optimization: Leave point in projective form. + * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z') + * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' + * The hash is truncated to the first 256 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX, + mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit u1d[2*8]; + sp_digit u2d[2*8]; + sp_digit sd[2*8]; + sp_digit tmpd[2*8 * 5]; + sp_point_256 p1d; + sp_point_256 p2d; +#endif + sp_digit* u1 = NULL; + sp_digit* u2 = NULL; + sp_digit* s = NULL; + sp_digit* tmp = NULL; + sp_point_256* p1; + sp_point_256* p2 = NULL; + sp_digit carry; + int32_t c; + int err; + + err = sp_256_point_new_8(heap, p1d, p1); + if (err == MP_OKAY) { + err = sp_256_point_new_8(heap, p2d, p2); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 8, heap, + DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + u1 = d + 0 * 8; + u2 = d + 2 * 8; + s = d + 4 * 8; + tmp = d + 6 * 8; +#else + u1 = u1d; + u2 = u2d; + s = sd; + tmp = tmpd; +#endif + + if (hashLen > 32U) { + hashLen = 32U; + } + + sp_256_from_bin(u1, 8, hash, (int)hashLen); + sp_256_from_mp(u2, 8, r); + sp_256_from_mp(s, 8, sm); + sp_256_from_mp(p2->x, 8, pX); + sp_256_from_mp(p2->y, 8, pY); + sp_256_from_mp(p2->z, 8, pZ); + + { + sp_256_mul_8(s, s, p256_norm_order); + } + err = sp_256_mod_8(s, s, p256_order); + } + if (err == MP_OKAY) { + sp_256_norm_8(s); + { + sp_256_mont_inv_order_8(s, s, tmp); + sp_256_mont_mul_order_8(u1, u1, s); + sp_256_mont_mul_order_8(u2, u2, s); + } + + err = sp_256_ecc_mulmod_base_8(p1, u1, 0, heap); + } + if (err == MP_OKAY) { + err = sp_256_ecc_mulmod_8(p2, p2, u2, 0, heap); + } + + if (err == MP_OKAY) { + { + sp_256_proj_point_add_8(p1, p1, p2, tmp); + if (sp_256_iszero_8(p1->z)) { + if (sp_256_iszero_8(p1->x) && sp_256_iszero_8(p1->y)) { + sp_256_proj_point_dbl_8(p1, p2, tmp); + } + else { + /* Y ordinate is not used from here - don't set. */ + p1->x[0] = 0; + p1->x[1] = 0; + p1->x[2] = 0; + p1->x[3] = 0; + p1->x[4] = 0; + p1->x[5] = 0; + p1->x[6] = 0; + p1->x[7] = 0; + XMEMCPY(p1->z, p256_norm_mod, sizeof(p256_norm_mod)); + } + } + } + + /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ + /* Reload r and convert to Montgomery form. */ + sp_256_from_mp(u2, 8, r); + err = sp_256_mod_mul_norm_8(u2, u2, p256_mod); + } + + if (err == MP_OKAY) { + /* u1 = r.z'.z' mod prime */ + sp_256_mont_sqr_8(p1->z, p1->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(u1, u2, p1->z, p256_mod, p256_mp_mod); + *res = (int)(sp_256_cmp_8(p1->x, u1) == 0); + if (*res == 0) { + /* Reload r and add order. */ + sp_256_from_mp(u2, 8, r); + carry = sp_256_add_8(u2, u2, p256_order); + /* Carry means result is greater than mod and is not valid. */ + if (carry == 0) { + sp_256_norm_8(u2); + + /* Compare with mod and if greater or equal then not valid. */ + c = sp_256_cmp_8(u2, p256_mod); + if (c < 0) { + /* Convert to Montogomery form */ + err = sp_256_mod_mul_norm_8(u2, u2, p256_mod); + if (err == MP_OKAY) { + /* u1 = (r + 1*order).z'.z' mod prime */ + sp_256_mont_mul_8(u1, u2, p1->z, p256_mod, + p256_mp_mod); + *res = (int)(sp_256_cmp_8(p1->x, u1) == 0); + } + } + } + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) + XFREE(d, heap, DYNAMIC_TYPE_ECC); +#endif + sp_256_point_free_8(p1, 0, heap); + sp_256_point_free_8(p2, 0, heap); + + return err; +} +#endif /* HAVE_ECC_VERIFY */ + +#ifdef HAVE_ECC_CHECK_KEY +/* Check that the x and y oridinates are a valid point on the curve. + * + * point EC point. + * heap Heap to use if dynamically allocating. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +static int sp_256_ecc_is_point_8(sp_point_256* point, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit t1d[2*8]; + sp_digit t2d[2*8]; +#endif + sp_digit* t1; + sp_digit* t2; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8 * 4, heap, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = d + 0 * 8; + t2 = d + 2 * 8; +#else + (void)heap; + + t1 = t1d; + t2 = t2d; +#endif + + sp_256_sqr_8(t1, point->y); + (void)sp_256_mod_8(t1, t1, p256_mod); + sp_256_sqr_8(t2, point->x); + (void)sp_256_mod_8(t2, t2, p256_mod); + sp_256_mul_8(t2, t2, point->x); + (void)sp_256_mod_8(t2, t2, p256_mod); + (void)sp_256_sub_8(t2, p256_mod, t2); + sp_256_mont_add_8(t1, t1, t2, p256_mod); + + sp_256_mont_add_8(t1, t1, point->x, p256_mod); + sp_256_mont_add_8(t1, t1, point->x, p256_mod); + sp_256_mont_add_8(t1, t1, point->x, p256_mod); + + if (sp_256_cmp_8(t1, p256_b) != 0) { + err = MP_VAL; + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, heap, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + +/* Check that the x and y oridinates are a valid point on the curve. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +int sp_ecc_is_point_256(mp_int* pX, mp_int* pY) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 pubd; +#endif + sp_point_256* pub; + byte one[1] = { 1 }; + int err; + + err = sp_256_point_new_8(NULL, pubd, pub); + if (err == MP_OKAY) { + sp_256_from_mp(pub->x, 8, pX); + sp_256_from_mp(pub->y, 8, pY); + sp_256_from_bin(pub->z, 8, one, (int)sizeof(one)); + + err = sp_256_ecc_is_point_8(pub, NULL); + } + + sp_256_point_free_8(pub, 0, NULL); + + return err; +} + +/* Check that the private scalar generates the EC point (px, py), the point is + * on the curve and the point has the correct order. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * privm Private scalar that generates EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve, ECC_INF_E if the point does not have the correct order, + * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and + * MP_OKAY otherwise. + */ +int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit privd[8]; + sp_point_256 pubd; + sp_point_256 pd; +#endif + sp_digit* priv = NULL; + sp_point_256* pub; + sp_point_256* p = NULL; + byte one[1] = { 1 }; + int err; + + err = sp_256_point_new_8(heap, pubd, pub); + if (err == MP_OKAY) { + err = sp_256_point_new_8(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap, + DYNAMIC_TYPE_ECC); + if (priv == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + priv = privd; +#endif + + sp_256_from_mp(pub->x, 8, pX); + sp_256_from_mp(pub->y, 8, pY); + sp_256_from_bin(pub->z, 8, one, (int)sizeof(one)); + sp_256_from_mp(priv, 8, privm); + + /* Check point at infinitiy. */ + if ((sp_256_iszero_8(pub->x) != 0) && + (sp_256_iszero_8(pub->y) != 0)) { + err = ECC_INF_E; + } + } + + if (err == MP_OKAY) { + /* Check range of X and Y */ + if (sp_256_cmp_8(pub->x, p256_mod) >= 0 || + sp_256_cmp_8(pub->y, p256_mod) >= 0) { + err = ECC_OUT_OF_RANGE_E; + } + } + + if (err == MP_OKAY) { + /* Check point is on curve */ + err = sp_256_ecc_is_point_8(pub, heap); + } + + if (err == MP_OKAY) { + /* Point * order = infinity */ + err = sp_256_ecc_mulmod_8(p, pub, p256_order, 1, heap); + } + if (err == MP_OKAY) { + /* Check result is infinity */ + if ((sp_256_iszero_8(p->x) == 0) || + (sp_256_iszero_8(p->y) == 0)) { + err = ECC_INF_E; + } + } + + if (err == MP_OKAY) { + /* Base * private = point */ + err = sp_256_ecc_mulmod_base_8(p, priv, 1, heap); + } + if (err == MP_OKAY) { + /* Check result is public key */ + if (sp_256_cmp_8(p->x, pub->x) != 0 || + sp_256_cmp_8(p->y, pub->y) != 0) { + err = ECC_PRIV_KEY_E; + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (priv != NULL) { + XFREE(priv, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_8(p, 0, heap); + sp_256_point_free_8(pub, 0, heap); + + return err; +} +#endif +#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL +/* Add two projective EC points together. + * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ) + * + * pX First EC point's X ordinate. + * pY First EC point's Y ordinate. + * pZ First EC point's Z ordinate. + * qX Second EC point's X ordinate. + * qY Second EC point's Y ordinate. + * qZ Second EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* qX, mp_int* qY, mp_int* qZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 8 * 5]; + sp_point_256 pd; + sp_point_256 qd; +#endif + sp_digit* tmp; + sp_point_256* p; + sp_point_256* q = NULL; + int err; + + err = sp_256_point_new_8(NULL, pd, p); + if (err == MP_OKAY) { + err = sp_256_point_new_8(NULL, qd, q); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + sp_256_from_mp(p->x, 8, pX); + sp_256_from_mp(p->y, 8, pY); + sp_256_from_mp(p->z, 8, pZ); + sp_256_from_mp(q->x, 8, qX); + sp_256_from_mp(q->y, 8, qY); + sp_256_from_mp(q->z, 8, qZ); + + sp_256_proj_point_add_8(p, p, q, tmp); + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->z, rZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_8(q, 0, NULL); + sp_256_point_free_8(p, 0, NULL); + + return err; +} + +/* Double a projective EC point. + * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ) + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 8 * 2]; + sp_point_256 pd; +#endif + sp_digit* tmp; + sp_point_256* p; + int err; + + err = sp_256_point_new_8(NULL, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 2, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + sp_256_from_mp(p->x, 8, pX); + sp_256_from_mp(p->y, 8, pY); + sp_256_from_mp(p->z, 8, pZ); + + sp_256_proj_point_dbl_8(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->z, rZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_8(p, 0, NULL); + + return err; +} + +/* Map a projective EC point to affine in place. + * pZ will be one. + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 8 * 4]; + sp_point_256 pd; +#endif + sp_digit* tmp; + sp_point_256* p; + int err; + + err = sp_256_point_new_8(NULL, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 4, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + if (err == MP_OKAY) { + sp_256_from_mp(p->x, 8, pX); + sp_256_from_mp(p->y, 8, pY); + sp_256_from_mp(p->z, 8, pZ); + + sp_256_map_8(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(p->x, pX); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->y, pY); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->z, pZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_8(p, 0, NULL); + + return err; +} +#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */ +#ifdef HAVE_COMP_KEY +/* Find the square root of a number mod the prime of the curve. + * + * y The number to operate on and the result. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +static int sp_256_mont_sqrt_8(sp_digit* y) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d; +#else + sp_digit t1d[2 * 8]; + sp_digit t2d[2 * 8]; +#endif + sp_digit* t1; + sp_digit* t2; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 8, NULL, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = d + 0 * 8; + t2 = d + 2 * 8; +#else + t1 = t1d; + t2 = t2d; +#endif + + { + /* t2 = y ^ 0x2 */ + sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod); + /* t1 = y ^ 0x3 */ + sp_256_mont_mul_8(t1, t2, y, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xc */ + sp_256_mont_sqr_n_8(t2, t1, 2, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xf */ + sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xf0 */ + sp_256_mont_sqr_n_8(t2, t1, 4, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xff */ + sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xff00 */ + sp_256_mont_sqr_n_8(t2, t1, 8, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffff */ + sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xffff0000 */ + sp_256_mont_sqr_n_8(t2, t1, 16, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff */ + sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000000 */ + sp_256_mont_sqr_n_8(t1, t1, 32, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000001 */ + sp_256_mont_mul_8(t1, t1, y, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */ + sp_256_mont_sqr_n_8(t1, t1, 96, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */ + sp_256_mont_mul_8(t1, t1, y, p256_mod, p256_mp_mod); + sp_256_mont_sqr_n_8(y, t1, 94, p256_mod, p256_mp_mod); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + + +/* Uncompress the point given the X ordinate. + * + * xm X ordinate. + * odd Whether the Y ordinate is odd. + * ym Calculated Y ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d; +#else + sp_digit xd[2 * 8]; + sp_digit yd[2 * 8]; +#endif + sp_digit* x = NULL; + sp_digit* y = NULL; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 8, NULL, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + x = d + 0 * 8; + y = d + 2 * 8; +#else + x = xd; + y = yd; +#endif + + sp_256_from_mp(x, 8, xm); + err = sp_256_mod_mul_norm_8(x, x, p256_mod); + } + if (err == MP_OKAY) { + /* y = x^3 */ + { + sp_256_mont_sqr_8(y, x, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(y, y, x, p256_mod, p256_mp_mod); + } + /* y = x^3 - 3x */ + sp_256_mont_sub_8(y, y, x, p256_mod); + sp_256_mont_sub_8(y, y, x, p256_mod); + sp_256_mont_sub_8(y, y, x, p256_mod); + /* y = x^3 - 3x + b */ + err = sp_256_mod_mul_norm_8(x, p256_b, p256_mod); + } + if (err == MP_OKAY) { + sp_256_mont_add_8(y, y, x, p256_mod); + /* y = sqrt(x^3 - 3x + b) */ + err = sp_256_mont_sqrt_8(y); + } + if (err == MP_OKAY) { + XMEMSET(y + 8, 0, 8U * sizeof(sp_digit)); + sp_256_mont_reduce_8(y, p256_mod, p256_mp_mod); + if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) { + sp_256_mont_sub_8(y, p256_mod, y, p256_mod); + } + + err = sp_256_to_mp(y, ym); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} +#endif +#endif /* !WOLFSSL_SP_NO_256 */ +#ifdef WOLFSSL_SP_384 + +/* Point structure to use. */ +typedef struct sp_point_384 { + sp_digit x[2 * 12]; + sp_digit y[2 * 12]; + sp_digit z[2 * 12]; + int infinity; +} sp_point_384; + +/* The modulus (prime) of the curve P384. */ +static const sp_digit p384_mod[12] = { + 0xffffffff,0x00000000,0x00000000,0xffffffff,0xfffffffe,0xffffffff, + 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff +}; +/* The Montogmery normalizer for modulus of the curve P384. */ +static const sp_digit p384_norm_mod[12] = { + 0x00000001,0xffffffff,0xffffffff,0x00000000,0x00000001,0x00000000, + 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000 +}; +/* The Montogmery multiplier for modulus of the curve P384. */ +static sp_digit p384_mp_mod = 0x00000001; +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +/* The order of the curve P384. */ +static const sp_digit p384_order[12] = { + 0xccc52973,0xecec196a,0x48b0a77a,0x581a0db2,0xf4372ddf,0xc7634d81, + 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff +}; +#endif +/* The order of the curve P384 minus 2. */ +static const sp_digit p384_order2[12] = { + 0xccc52971,0xecec196a,0x48b0a77a,0x581a0db2,0xf4372ddf,0xc7634d81, + 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff +}; +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montogmery normalizer for order of the curve P384. */ +static const sp_digit p384_norm_order[12] = { + 0x333ad68d,0x1313e695,0xb74f5885,0xa7e5f24d,0x0bc8d220,0x389cb27e, + 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000 +}; +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montogmery multiplier for order of the curve P384. */ +static sp_digit p384_mp_order = 0xe88fdc45; +#endif +/* The base point of curve P384. */ +static const sp_point_384 p384_base = { + /* X ordinate */ + { + 0x72760ab7,0x3a545e38,0xbf55296c,0x5502f25d,0x82542a38,0x59f741e0, + 0x8ba79b98,0x6e1d3b62,0xf320ad74,0x8eb1c71e,0xbe8b0537,0xaa87ca22, + 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L + }, + /* Y ordinate */ + { + 0x90ea0e5f,0x7a431d7c,0x1d7e819d,0x0a60b1ce,0xb5f0b8c0,0xe9da3113, + 0x289a147c,0xf8f41dbd,0x9292dc29,0x5d9e98bf,0x96262c6f,0x3617de4a, + 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L + }, + /* Z ordinate */ + { + 0x00000001,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000, + 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000, + 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L + }, + /* infinity */ + 0 +}; +#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY) +static const sp_digit p384_b[12] = { + 0xd3ec2aef,0x2a85c8ed,0x8a2ed19d,0xc656398d,0x5013875a,0x0314088f, + 0xfe814112,0x181d9c6e,0xe3f82d19,0x988e056b,0xe23ee7e4,0xb3312fa7 +}; +#endif + +static int sp_384_point_new_ex_12(void* heap, sp_point_384* sp, sp_point_384** p) +{ + int ret = MP_OKAY; + (void)heap; +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + (void)sp; + *p = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC); +#else + *p = sp; +#endif + if (*p == NULL) { + ret = MEMORY_E; + } + return ret; +} + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +/* Allocate memory for point and return error. */ +#define sp_384_point_new_12(heap, sp, p) sp_384_point_new_ex_12((heap), NULL, &(p)) +#else +/* Set pointer to data and return no error. */ +#define sp_384_point_new_12(heap, sp, p) sp_384_point_new_ex_12((heap), &(sp), &(p)) +#endif + + +static void sp_384_point_free_12(sp_point_384* p, int clear, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +/* If valid pointer then clear point data if requested and free data. */ + if (p != NULL) { + if (clear != 0) { + XMEMSET(p, 0, sizeof(*p)); + } + XFREE(p, heap, DYNAMIC_TYPE_ECC); + } +#else +/* Clear point data if requested. */ + if (clear != 0) { + XMEMSET(p, 0, sizeof(*p)); + } +#endif + (void)heap; +} + +/* Multiply a number by Montogmery normalizer mod modulus (prime). + * + * r The resulting Montgomery form number. + * a The number to convert. + * m The modulus (prime). + * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise. + */ +static int sp_384_mod_mul_norm_12(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + int64_t* t; +#else + int64_t t[12]; +#endif + int64_t o; + int err = MP_OKAY; + + (void)m; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (int64_t*)XMALLOC(sizeof(int64_t) * 12, NULL, DYNAMIC_TYPE_ECC); + if (t == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + /* 1 0 0 0 0 0 0 0 1 1 0 -1 */ + t[0] = 0 + (uint64_t)a[0] + (uint64_t)a[8] + (uint64_t)a[9] - (uint64_t)a[11]; + /* -1 1 0 0 0 0 0 0 -1 0 1 1 */ + t[1] = 0 - (uint64_t)a[0] + (uint64_t)a[1] - (uint64_t)a[8] + (uint64_t)a[10] + (uint64_t)a[11]; + /* 0 -1 1 0 0 0 0 0 0 -1 0 1 */ + t[2] = 0 - (uint64_t)a[1] + (uint64_t)a[2] - (uint64_t)a[9] + (uint64_t)a[11]; + /* 1 0 -1 1 0 0 0 0 1 1 -1 -1 */ + t[3] = 0 + (uint64_t)a[0] - (uint64_t)a[2] + (uint64_t)a[3] + (uint64_t)a[8] + (uint64_t)a[9] - (uint64_t)a[10] - (uint64_t)a[11]; + /* 1 1 0 -1 1 0 0 0 1 2 1 -2 */ + t[4] = 0 + (uint64_t)a[0] + (uint64_t)a[1] - (uint64_t)a[3] + (uint64_t)a[4] + (uint64_t)a[8] + 2 * (uint64_t)a[9] + (uint64_t)a[10] - 2 * (uint64_t)a[11]; + /* 0 1 1 0 -1 1 0 0 0 1 2 1 */ + t[5] = 0 + (uint64_t)a[1] + (uint64_t)a[2] - (uint64_t)a[4] + (uint64_t)a[5] + (uint64_t)a[9] + 2 * (uint64_t)a[10] + (uint64_t)a[11]; + /* 0 0 1 1 0 -1 1 0 0 0 1 2 */ + t[6] = 0 + (uint64_t)a[2] + (uint64_t)a[3] - (uint64_t)a[5] + (uint64_t)a[6] + (uint64_t)a[10] + 2 * (uint64_t)a[11]; + /* 0 0 0 1 1 0 -1 1 0 0 0 1 */ + t[7] = 0 + (uint64_t)a[3] + (uint64_t)a[4] - (uint64_t)a[6] + (uint64_t)a[7] + (uint64_t)a[11]; + /* 0 0 0 0 1 1 0 -1 1 0 0 0 */ + t[8] = 0 + (uint64_t)a[4] + (uint64_t)a[5] - (uint64_t)a[7] + (uint64_t)a[8]; + /* 0 0 0 0 0 1 1 0 -1 1 0 0 */ + t[9] = 0 + (uint64_t)a[5] + (uint64_t)a[6] - (uint64_t)a[8] + (uint64_t)a[9]; + /* 0 0 0 0 0 0 1 1 0 -1 1 0 */ + t[10] = 0 + (uint64_t)a[6] + (uint64_t)a[7] - (uint64_t)a[9] + (uint64_t)a[10]; + /* 0 0 0 0 0 0 0 1 1 0 -1 1 */ + t[11] = 0 + (uint64_t)a[7] + (uint64_t)a[8] - (uint64_t)a[10] + (uint64_t)a[11]; + + t[1] += t[0] >> 32; t[0] &= 0xffffffff; + t[2] += t[1] >> 32; t[1] &= 0xffffffff; + t[3] += t[2] >> 32; t[2] &= 0xffffffff; + t[4] += t[3] >> 32; t[3] &= 0xffffffff; + t[5] += t[4] >> 32; t[4] &= 0xffffffff; + t[6] += t[5] >> 32; t[5] &= 0xffffffff; + t[7] += t[6] >> 32; t[6] &= 0xffffffff; + t[8] += t[7] >> 32; t[7] &= 0xffffffff; + t[9] += t[8] >> 32; t[8] &= 0xffffffff; + t[10] += t[9] >> 32; t[9] &= 0xffffffff; + t[11] += t[10] >> 32; t[10] &= 0xffffffff; + o = t[11] >> 32; t[11] &= 0xffffffff; + t[0] += o; + t[1] -= o; + t[3] += o; + t[4] += o; + t[1] += t[0] >> 32; t[0] &= 0xffffffff; + t[2] += t[1] >> 32; t[1] &= 0xffffffff; + t[3] += t[2] >> 32; t[2] &= 0xffffffff; + t[4] += t[3] >> 32; t[3] &= 0xffffffff; + t[5] += t[4] >> 32; t[4] &= 0xffffffff; + t[6] += t[5] >> 32; t[5] &= 0xffffffff; + t[7] += t[6] >> 32; t[6] &= 0xffffffff; + t[8] += t[7] >> 32; t[7] &= 0xffffffff; + t[9] += t[8] >> 32; t[8] &= 0xffffffff; + t[10] += t[9] >> 32; t[9] &= 0xffffffff; + t[11] += t[10] >> 32; t[10] &= 0xffffffff; + + r[0] = t[0]; + r[1] = t[1]; + r[2] = t[2]; + r[3] = t[3]; + r[4] = t[4]; + r[5] = t[5]; + r[6] = t[6]; + r[7] = t[7]; + r[8] = t[8]; + r[9] = t[9]; + r[10] = t[10]; + r[11] = t[11]; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) + XFREE(t, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_384_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 32 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 32 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 32U) <= (word32)DIGIT_BIT) { + s += 32U; + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 32) { + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + s = 32 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Convert a point of type ecc_point to type sp_point_384. + * + * p Point of type sp_point_384 (result). + * pm Point of type ecc_point. + */ +static void sp_384_point_from_ecc_point_12(sp_point_384* p, const ecc_point* pm) +{ + XMEMSET(p->x, 0, sizeof(p->x)); + XMEMSET(p->y, 0, sizeof(p->y)); + XMEMSET(p->z, 0, sizeof(p->z)); + sp_384_from_mp(p->x, 12, pm->x); + sp_384_from_mp(p->y, 12, pm->y); + sp_384_from_mp(p->z, 12, pm->z); + p->infinity = 0; +} + +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_384_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (384 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 32 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 12); + r->used = 12; + mp_clamp(r); +#elif DIGIT_BIT < 32 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 12; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 32) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 32 - s; + } + r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 12; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 32 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 32 - s; + } + else { + s += 32; + } + } + r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Convert a point of type sp_point_384 to type ecc_point. + * + * p Point of type sp_point_384. + * pm Point of type ecc_point (result). + * returns MEMORY_E when allocation of memory in ecc_point fails otherwise + * MP_OKAY. + */ +static int sp_384_point_to_ecc_point_12(const sp_point_384* p, ecc_point* pm) +{ + int err; + + err = sp_384_to_mp(p->x, pm->x); + if (err == MP_OKAY) { + err = sp_384_to_mp(p->y, pm->y); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->z, pm->z); + } + + return err; +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_384_mul_12(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit tmp[12 * 2]; + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r9, r3\n\t" + "mov r12, %[r]\n\t" + "mov r10, %[a]\n\t" + "mov r11, %[b]\n\t" + "mov r6, #48\n\t" + "add r6, r6, r10\n\t" + "mov r14, r6\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r5, #0\n\t" + "mov r6, #44\n\t" + "mov %[a], r9\n\t" + "subs %[a], %[a], r6\n\t" + "sbc r6, r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], %[a], r6\n\t" + "mov %[b], r9\n\t" + "sub %[b], %[b], %[a]\n\t" + "add %[a], %[a], r10\n\t" + "add %[b], %[b], r11\n\t" + "\n2:\n\t" + /* Multiply Start */ + "ldr r6, [%[a]]\n\t" + "ldr r8, [%[b]]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, %[r]\n\t" + /* Multiply Done */ + "add %[a], %[a], #4\n\t" + "sub %[b], %[b], #4\n\t" + "cmp %[a], r14\n\t" + "beq 3f\n\t" + "mov r6, r9\n\t" + "add r6, r6, r10\n\t" + "cmp %[a], r6\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r12\n\t" + "mov r8, r9\n\t" + "str r3, [%[r], r8]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "add r8, r8, #4\n\t" + "mov r9, r8\n\t" + "mov r6, #88\n\t" + "cmp r8, r6\n\t" + "ble 1b\n\t" + "str r3, [%[r], r8]\n\t" + "mov %[a], r10\n\t" + "mov %[b], r11\n\t" + : + : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +SP_NOINLINE static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a, + const sp_digit* b, sp_digit m) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r5, #48\n\t" + "mov r9, r5\n\t" + "mov r8, #0\n\t" + "\n1:\n\t" + "ldr r6, [%[b], r8]\n\t" + "and r6, r6, %[m]\n\t" + "mov r5, #0\n\t" + "subs r5, r5, %[c]\n\t" + "ldr r5, [%[a], r8]\n\t" + "sbcs r5, r5, r6\n\t" + "sbcs %[c], %[c], %[c]\n\t" + "str r5, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, r9\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r5", "r6", "r8", "r9" + ); + + return c; +} + +#define sp_384_mont_reduce_order_12 sp_384_mont_reduce_12 + +/* Reduce the number back to 384 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_digit ca = 0; + + __asm__ __volatile__ ( + "mov r9, %[mp]\n\t" + "mov r12, %[m]\n\t" + "mov r10, %[a]\n\t" + "mov r4, #0\n\t" + "add r11, r10, #48\n\t" + "\n1:\n\t" + /* mu = a[i] * mp */ + "mov %[mp], r9\n\t" + "ldr %[a], [r10]\n\t" + "mul %[mp], %[mp], %[a]\n\t" + "mov %[m], r12\n\t" + "add r14, r10, #40\n\t" + "\n2:\n\t" + /* a[i+j] += m[j] * mu */ + "ldr %[a], [r10]\n\t" + "mov r5, #0\n\t" + /* Multiply m[j] and mu - Start */ + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" + "adds %[a], %[a], r6\n\t" + "adc r5, r5, r8\n\t" + /* Multiply m[j] and mu - Done */ + "adds r4, r4, %[a]\n\t" + "adc r5, r5, #0\n\t" + "str r4, [r10], #4\n\t" + /* a[i+j+1] += m[j+1] * mu */ + "ldr %[a], [r10]\n\t" + "mov r4, #0\n\t" + /* Multiply m[j] and mu - Start */ + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" + "adds %[a], %[a], r6\n\t" + "adc r4, r4, r8\n\t" + /* Multiply m[j] and mu - Done */ + "adds r5, r5, %[a]\n\t" + "adc r4, r4, #0\n\t" + "str r5, [r10], #4\n\t" + "cmp r10, r14\n\t" + "blt 2b\n\t" + /* a[i+10] += m[10] * mu */ + "ldr %[a], [r10]\n\t" + "mov r5, #0\n\t" + /* Multiply m[j] and mu - Start */ + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" + "adds %[a], %[a], r6\n\t" + "adc r5, r5, r8\n\t" + /* Multiply m[j] and mu - Done */ + "adds r4, r4, %[a]\n\t" + "adc r5, r5, #0\n\t" + "str r4, [r10], #4\n\t" + /* a[i+11] += m[11] * mu */ + "mov r4, %[ca]\n\t" + "mov %[ca], #0\n\t" + /* Multiply m[11] and mu - Start */ + "ldr r8, [%[m]]\n\t" + "umull r6, r8, %[mp], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc %[ca], %[ca], #0\n\t" + /* Multiply m[11] and mu - Done */ + "ldr r6, [r10]\n\t" + "ldr r8, [r10, #4]\n\t" + "adds r6, r6, r5\n\t" + "adcs r8, r8, r4\n\t" + "adc %[ca], %[ca], #0\n\t" + "str r6, [r10]\n\t" + "str r8, [r10, #4]\n\t" + /* Next word in a */ + "sub r10, r10, #40\n\t" + "cmp r10, r11\n\t" + "blt 1b\n\t" + "mov %[a], r10\n\t" + "mov %[m], r12\n\t" + : [ca] "+r" (ca), [a] "+r" (a) + : [m] "r" (m), [mp] "r" (mp) + : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" + ); + + sp_384_cond_sub_12(a - 12, a, m, (sp_digit)0 - ca); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_384_mont_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_384_mul_12(r, a, b); + sp_384_mont_reduce_12(r, m, mp); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r5, #0\n\t" + "mov r9, r3\n\t" + "mov r12, %[r]\n\t" + "mov r6, #96\n\t" + "neg r6, r6\n\t" + "add sp, sp, r6\n\t" + "mov r11, sp\n\t" + "mov r10, %[a]\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r6, #44\n\t" + "mov %[a], r9\n\t" + "subs %[a], %[a], r6\n\t" + "sbc r6, r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], %[a], r6\n\t" + "mov r2, r9\n\t" + "sub r2, r2, %[a]\n\t" + "add %[a], %[a], r10\n\t" + "add r2, r2, r10\n\t" + "\n2:\n\t" + "cmp r2, %[a]\n\t" + "beq 4f\n\t" + /* Multiply * 2: Start */ + "ldr r6, [%[a]]\n\t" + "ldr r8, [r2]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, %[r]\n\t" + /* Multiply * 2: Done */ + "bal 5f\n\t" + "\n4:\n\t" + /* Square: Start */ + "ldr r6, [%[a]]\n\t" + "umull r6, r8, r6, r6\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, %[r]\n\t" + /* Square: Done */ + "\n5:\n\t" + "add %[a], %[a], #4\n\t" + "sub r2, r2, #4\n\t" + "mov r6, #48\n\t" + "add r6, r6, r10\n\t" + "cmp %[a], r6\n\t" + "beq 3f\n\t" + "cmp %[a], r2\n\t" + "bgt 3f\n\t" + "mov r8, r9\n\t" + "add r8, r8, r10\n\t" + "cmp %[a], r8\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r11\n\t" + "mov r8, r9\n\t" + "str r3, [%[r], r8]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "add r8, r8, #4\n\t" + "mov r9, r8\n\t" + "mov r6, #88\n\t" + "cmp r8, r6\n\t" + "ble 1b\n\t" + "mov %[a], r10\n\t" + "str r3, [%[r], r8]\n\t" + "mov %[r], r12\n\t" + "mov %[a], r11\n\t" + "mov r3, #92\n\t" + "\n4:\n\t" + "ldr r6, [%[a], r3]\n\t" + "str r6, [%[r], r3]\n\t" + "subs r3, r3, #4\n\t" + "bge 4b\n\t" + "mov r6, #96\n\t" + "add sp, sp, r6\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12" + ); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_384_mont_sqr_12(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_384_sqr_12(r, a); + sp_384_mont_reduce_12(r, m, mp); +} + +#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY) +/* Square the Montgomery form number a number of times. (r = a ^ n mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * n Number of times to square. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_384_mont_sqr_n_12(sp_digit* r, const sp_digit* a, int n, + const sp_digit* m, sp_digit mp) +{ + sp_384_mont_sqr_12(r, a, m, mp); + for (; n > 1; n--) { + sp_384_mont_sqr_12(r, r, m, mp); + } +} + +#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */ +#ifdef WOLFSSL_SP_SMALL +/* Mod-2 for the P384 curve. */ +static const uint32_t p384_mod_minus_2[12] = { + 0xfffffffdU,0x00000000U,0x00000000U,0xffffffffU,0xfffffffeU,0xffffffffU, + 0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU +}; +#endif /* !WOLFSSL_SP_SMALL */ + +/* Invert the number, in Montgomery form, modulo the modulus (prime) of the + * P384 curve. (r = 1 / a mod m) + * + * r Inverse result. + * a Number to invert. + * td Temporary data. + */ +static void sp_384_mont_inv_12(sp_digit* r, const sp_digit* a, sp_digit* td) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* t = td; + int i; + + XMEMCPY(t, a, sizeof(sp_digit) * 12); + for (i=382; i>=0; i--) { + sp_384_mont_sqr_12(t, t, p384_mod, p384_mp_mod); + if (p384_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32))) + sp_384_mont_mul_12(t, t, a, p384_mod, p384_mp_mod); + } + XMEMCPY(r, t, sizeof(sp_digit) * 12); +#else + sp_digit* t1 = td; + sp_digit* t2 = td + 2 * 12; + sp_digit* t3 = td + 4 * 12; + sp_digit* t4 = td + 6 * 12; + sp_digit* t5 = td + 8 * 12; + + /* 0x2 */ + sp_384_mont_sqr_12(t1, a, p384_mod, p384_mp_mod); + /* 0x3 */ + sp_384_mont_mul_12(t5, t1, a, p384_mod, p384_mp_mod); + /* 0xc */ + sp_384_mont_sqr_n_12(t1, t5, 2, p384_mod, p384_mp_mod); + /* 0xf */ + sp_384_mont_mul_12(t2, t5, t1, p384_mod, p384_mp_mod); + /* 0x1e */ + sp_384_mont_sqr_12(t1, t2, p384_mod, p384_mp_mod); + /* 0x1f */ + sp_384_mont_mul_12(t4, t1, a, p384_mod, p384_mp_mod); + /* 0x3e0 */ + sp_384_mont_sqr_n_12(t1, t4, 5, p384_mod, p384_mp_mod); + /* 0x3ff */ + sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod); + /* 0x7fe0 */ + sp_384_mont_sqr_n_12(t1, t2, 5, p384_mod, p384_mp_mod); + /* 0x7fff */ + sp_384_mont_mul_12(t4, t4, t1, p384_mod, p384_mp_mod); + /* 0x3fff8000 */ + sp_384_mont_sqr_n_12(t1, t4, 15, p384_mod, p384_mp_mod); + /* 0x3fffffff */ + sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod); + /* 0xfffffffc */ + sp_384_mont_sqr_n_12(t3, t2, 2, p384_mod, p384_mp_mod); + /* 0xfffffffd */ + sp_384_mont_mul_12(r, t3, a, p384_mod, p384_mp_mod); + /* 0xffffffff */ + sp_384_mont_mul_12(t3, t5, t3, p384_mod, p384_mp_mod); + /* 0xfffffffc0000000 */ + sp_384_mont_sqr_n_12(t1, t2, 30, p384_mod, p384_mp_mod); + /* 0xfffffffffffffff */ + sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod); + /* 0xfffffffffffffff000000000000000 */ + sp_384_mont_sqr_n_12(t1, t2, 60, p384_mod, p384_mp_mod); + /* 0xffffffffffffffffffffffffffffff */ + sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod); + /* 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */ + sp_384_mont_sqr_n_12(t1, t2, 120, p384_mod, p384_mp_mod); + /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod); + /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */ + sp_384_mont_sqr_n_12(t1, t2, 15, p384_mod, p384_mp_mod); + /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod); + /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000 */ + sp_384_mont_sqr_n_12(t1, t2, 33, p384_mod, p384_mp_mod); + /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff */ + sp_384_mont_mul_12(t2, t3, t1, p384_mod, p384_mp_mod); + /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff000000000000000000000000 */ + sp_384_mont_sqr_n_12(t1, t2, 96, p384_mod, p384_mp_mod); + /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000fffffffd */ + sp_384_mont_mul_12(r, r, t1, p384_mod, p384_mp_mod); + +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +SP_NOINLINE static int32_t sp_384_cmp_12(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; + + + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mvn r3, r3\n\t" + "mov r6, #44\n\t" + "\n1:\n\t" + "ldr r8, [%[a], r6]\n\t" + "ldr r5, [%[b], r6]\n\t" + "and r8, r8, r3\n\t" + "and r5, r5, r3\n\t" + "mov r4, r8\n\t" + "subs r8, r8, r5\n\t" + "sbc r8, r8, r8\n\t" + "add %[r], %[r], r8\n\t" + "mvn r8, r8\n\t" + "and r3, r3, r8\n\t" + "subs r5, r5, r4\n\t" + "sbc r8, r8, r8\n\t" + "sub %[r], %[r], r8\n\t" + "mvn r8, r8\n\t" + "and r3, r3, r8\n\t" + "sub r6, r6, #4\n\t" + "cmp r6, #0\n\t" + "bge 1b\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "r3", "r4", "r5", "r6", "r8" + ); + + return r; +} + +/* Normalize the values in each word to 32. + * + * a Array of sp_digit to normalize. + */ +#define sp_384_norm_12(a) + +/* Map the Montgomery form projective coordinate point to an affine point. + * + * r Resulting affine coordinate point. + * p Montgomery form projective coordinate point. + * t Temporary ordinate data. + */ +static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*12; + int32_t n; + + sp_384_mont_inv_12(t1, p->z, t + 2*12); + + sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t1, t2, t1, p384_mod, p384_mp_mod); + + /* x /= z^2 */ + sp_384_mont_mul_12(r->x, p->x, t2, p384_mod, p384_mp_mod); + XMEMSET(r->x + 12, 0, sizeof(r->x) / 2U); + sp_384_mont_reduce_12(r->x, p384_mod, p384_mp_mod); + /* Reduce x to less than modulus */ + n = sp_384_cmp_12(r->x, p384_mod); + sp_384_cond_sub_12(r->x, r->x, p384_mod, 0 - ((n >= 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_384_norm_12(r->x); + + /* y /= z^3 */ + sp_384_mont_mul_12(r->y, p->y, t1, p384_mod, p384_mp_mod); + XMEMSET(r->y + 12, 0, sizeof(r->y) / 2U); + sp_384_mont_reduce_12(r->y, p384_mod, p384_mp_mod); + /* Reduce y to less than modulus */ + n = sp_384_cmp_12(r->y, p384_mod); + sp_384_cond_sub_12(r->y, r->y, p384_mod, 0 - ((n >= 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_384_norm_12(r->y); + + XMEMSET(r->z, 0, sizeof(r->z)); + r->z[0] = 1; + +} + +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r6, %[a]\n\t" + "mov r8, #0\n\t" + "add r6, r6, #48\n\t" + "sub r8, r8, #1\n\t" + "\n1:\n\t" + "adds %[c], %[c], r8\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r]]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #4\n\t" + "add %[b], %[b], #4\n\t" + "add %[r], %[r], #4\n\t" + "cmp %[a], r6\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r8" + ); + + return c; +} + +#else +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r8}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r8\n\t" + "stm %[r]!, {r4, r5}\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c], %[c]\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r8" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Add two Montgomery form numbers (r = a + b % m). + * + * r Result of addition. + * a First number to add in Montogmery form. + * b Second number to add in Montogmery form. + * m Modulus (prime). + */ +SP_NOINLINE static void sp_384_mont_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m) +{ + sp_digit o; + + o = sp_384_add_12(r, a, b); + sp_384_cond_sub_12(r, r, m, 0 - o); +} + +/* Double a Montgomery form number (r = a + a % m). + * + * r Result of doubling. + * a Number to double in Montogmery form. + * m Modulus (prime). + */ +SP_NOINLINE static void sp_384_mont_dbl_12(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + sp_digit o; + + o = sp_384_add_12(r, a, a); + sp_384_cond_sub_12(r, r, m, 0 - o); +} + +/* Triple a Montgomery form number (r = a + a + a % m). + * + * r Result of Tripling. + * a Number to triple in Montogmery form. + * m Modulus (prime). + */ +SP_NOINLINE static void sp_384_mont_tpl_12(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + sp_digit o; + + o = sp_384_add_12(r, a, a); + sp_384_cond_sub_12(r, r, m, 0 - o); + o = sp_384_add_12(r, r, a); + sp_384_cond_sub_12(r, r, m, 0 - o); +} + +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r6, %[a]\n\t" + "add r6, r6, #48\n\t" + "\n1:\n\t" + "mov r5, #0\n\t" + "subs r5, r5, %[c]\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" + "sbcs r4, r4, r5\n\t" + "str r4, [%[r]]\n\t" + "sbc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #4\n\t" + "add %[b], %[b], #4\n\t" + "add %[r], %[r], #4\n\t" + "cmp %[a], r6\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6" + ); + + return c; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[b], #0]\n\t" + "ldr r8, [%[b], #4]\n\t" + "subs r4, r4, r6\n\t" + "sbcs r5, r5, r8\n\t" + "str r4, [%[r], #0]\n\t" + "str r5, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[a], #12]\n\t" + "ldr r6, [%[b], #8]\n\t" + "ldr r8, [%[b], #12]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r8\n\t" + "str r4, [%[r], #8]\n\t" + "str r5, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" + "ldr r6, [%[b], #16]\n\t" + "ldr r8, [%[b], #20]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r8\n\t" + "str r4, [%[r], #16]\n\t" + "str r5, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[a], #28]\n\t" + "ldr r6, [%[b], #24]\n\t" + "ldr r8, [%[b], #28]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r8\n\t" + "str r4, [%[r], #24]\n\t" + "str r5, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[a], #36]\n\t" + "ldr r6, [%[b], #32]\n\t" + "ldr r8, [%[b], #36]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r8\n\t" + "str r4, [%[r], #32]\n\t" + "str r5, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[a], #44]\n\t" + "ldr r6, [%[b], #40]\n\t" + "ldr r8, [%[b], #44]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r8\n\t" + "str r4, [%[r], #40]\n\t" + "str r5, [%[r], #44]\n\t" + "sbc %[c], %[c], %[c]\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r8" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +SP_NOINLINE static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r5, #48\n\t" + "mov r9, r5\n\t" + "mov r8, #0\n\t" + "\n1:\n\t" + "ldr r6, [%[b], r8]\n\t" + "and r6, r6, %[m]\n\t" + "adds r5, %[c], #-1\n\t" + "ldr r5, [%[a], r8]\n\t" + "adcs r5, r5, r6\n\t" + "mov %[c], #0\n\t" + "adcs %[c], %[c], %[c]\n\t" + "str r5, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, r9\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r5", "r6", "r8", "r9" + ); + + return c; +} + +/* Subtract two Montgomery form numbers (r = a - b % m). + * + * r Result of subtration. + * a Number to subtract from in Montogmery form. + * b Number to subtract with in Montogmery form. + * m Modulus (prime). + */ +SP_NOINLINE static void sp_384_mont_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m) +{ + sp_digit o; + + o = sp_384_sub_12(r, a, b); + sp_384_cond_add_12(r, r, m, o); +} + +static void sp_384_rshift1_12(sp_digit* r, sp_digit* a) +{ + __asm__ __volatile__ ( + "ldr r2, [%[a]]\n\t" + "ldr r3, [%[a], #4]\n\t" + "lsr r2, r2, #1\n\t" + "lsl r5, r3, #31\n\t" + "lsr r3, r3, #1\n\t" + "orr r2, r2, r5\n\t" + "ldr r4, [%[a], #8]\n\t" + "str r2, [%[r], #0]\n\t" + "lsl r5, r4, #31\n\t" + "lsr r4, r4, #1\n\t" + "orr r3, r3, r5\n\t" + "ldr r2, [%[a], #12]\n\t" + "str r3, [%[r], #4]\n\t" + "lsl r5, r2, #31\n\t" + "lsr r2, r2, #1\n\t" + "orr r4, r4, r5\n\t" + "ldr r3, [%[a], #16]\n\t" + "str r4, [%[r], #8]\n\t" + "lsl r5, r3, #31\n\t" + "lsr r3, r3, #1\n\t" + "orr r2, r2, r5\n\t" + "ldr r4, [%[a], #20]\n\t" + "str r2, [%[r], #12]\n\t" + "lsl r5, r4, #31\n\t" + "lsr r4, r4, #1\n\t" + "orr r3, r3, r5\n\t" + "ldr r2, [%[a], #24]\n\t" + "str r3, [%[r], #16]\n\t" + "lsl r5, r2, #31\n\t" + "lsr r2, r2, #1\n\t" + "orr r4, r4, r5\n\t" + "ldr r3, [%[a], #28]\n\t" + "str r4, [%[r], #20]\n\t" + "lsl r5, r3, #31\n\t" + "lsr r3, r3, #1\n\t" + "orr r2, r2, r5\n\t" + "ldr r4, [%[a], #32]\n\t" + "str r2, [%[r], #24]\n\t" + "lsl r5, r4, #31\n\t" + "lsr r4, r4, #1\n\t" + "orr r3, r3, r5\n\t" + "ldr r2, [%[a], #36]\n\t" + "str r3, [%[r], #28]\n\t" + "lsl r5, r2, #31\n\t" + "lsr r2, r2, #1\n\t" + "orr r4, r4, r5\n\t" + "ldr r3, [%[a], #40]\n\t" + "str r4, [%[r], #32]\n\t" + "lsl r5, r3, #31\n\t" + "lsr r3, r3, #1\n\t" + "orr r2, r2, r5\n\t" + "ldr r4, [%[a], #44]\n\t" + "str r2, [%[r], #36]\n\t" + "lsl r5, r4, #31\n\t" + "lsr r4, r4, #1\n\t" + "orr r3, r3, r5\n\t" + "str r3, [%[r], #40]\n\t" + "str r4, [%[r], #44]\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r2", "r3", "r4", "r5" + ); +} + +/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) + * + * r Result of division by 2. + * a Number to divide. + * m Modulus (prime). + */ +SP_NOINLINE static void sp_384_div2_12(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + sp_digit o; + + o = sp_384_cond_add_12(r, a, m, 0 - (a[0] & 1)); + sp_384_rshift1_12(r, r); + r[11] |= o << 31; +} + +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static void sp_384_proj_point_dbl_12(sp_point_384* r, const sp_point_384* p, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*12; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_384_mont_sqr_12(t1, p->z, p384_mod, p384_mp_mod); + /* Z = Y * Z */ + sp_384_mont_mul_12(z, p->y, p->z, p384_mod, p384_mp_mod); + /* Z = 2Z */ + sp_384_mont_dbl_12(z, z, p384_mod); + /* T2 = X - T1 */ + sp_384_mont_sub_12(t2, p->x, t1, p384_mod); + /* T1 = X + T1 */ + sp_384_mont_add_12(t1, p->x, t1, p384_mod); + /* T2 = T1 * T2 */ + sp_384_mont_mul_12(t2, t1, t2, p384_mod, p384_mp_mod); + /* T1 = 3T2 */ + sp_384_mont_tpl_12(t1, t2, p384_mod); + /* Y = 2Y */ + sp_384_mont_dbl_12(y, p->y, p384_mod); + /* Y = Y * Y */ + sp_384_mont_sqr_12(y, y, p384_mod, p384_mp_mod); + /* T2 = Y * Y */ + sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod); + /* T2 = T2/2 */ + sp_384_div2_12(t2, t2, p384_mod); + /* Y = Y * X */ + sp_384_mont_mul_12(y, y, p->x, p384_mod, p384_mp_mod); + /* X = T1 * T1 */ + sp_384_mont_sqr_12(x, t1, p384_mod, p384_mp_mod); + /* X = X - Y */ + sp_384_mont_sub_12(x, x, y, p384_mod); + /* X = X - Y */ + sp_384_mont_sub_12(x, x, y, p384_mod); + /* Y = Y - X */ + sp_384_mont_sub_12(y, y, x, p384_mod); + /* Y = Y * T1 */ + sp_384_mont_mul_12(y, y, t1, p384_mod, p384_mp_mod); + /* Y = Y - T2 */ + sp_384_mont_sub_12(y, y, t2, p384_mod); +} + +/* Compare two numbers to determine if they are equal. + * Constant time implementation. + * + * a First number to compare. + * b Second number to compare. + * returns 1 when equal and 0 otherwise. + */ +static int sp_384_cmp_equal_12(const sp_digit* a, const sp_digit* b) +{ + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) | + (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6]) | (a[7] ^ b[7]) | + (a[8] ^ b[8]) | (a[9] ^ b[9]) | (a[10] ^ b[10]) | (a[11] ^ b[11])) == 0; +} + +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_384_proj_point_add_12(sp_point_384* r, const sp_point_384* p, const sp_point_384* q, + sp_digit* t) +{ + const sp_point_384* ap[2]; + sp_point_384* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*12; + sp_digit* t3 = t + 4*12; + sp_digit* t4 = t + 6*12; + sp_digit* t5 = t + 8*12; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* Ensure only the first point is the same as the result. */ + if (q == r) { + const sp_point_384* a = p; + p = q; + q = a; + } + + /* Check double */ + (void)sp_384_sub_12(t1, p384_mod, q->y); + sp_384_norm_12(t1); + if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & + (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) { + sp_384_proj_point_dbl_12(r, p, t); + } + else { + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point_384)); + x = rp[p->infinity | q->infinity]->x; + y = rp[p->infinity | q->infinity]->y; + z = rp[p->infinity | q->infinity]->z; + + ap[0] = p; + ap[1] = q; + for (i=0; i<12; i++) { + r->x[i] = ap[p->infinity]->x[i]; + } + for (i=0; i<12; i++) { + r->y[i] = ap[p->infinity]->y[i]; + } + for (i=0; i<12; i++) { + r->z[i] = ap[p->infinity]->z[i]; + } + r->infinity = ap[p->infinity]->infinity; + + /* U1 = X1*Z2^2 */ + sp_384_mont_sqr_12(t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t3, t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t1, t1, x, p384_mod, p384_mp_mod); + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_12(t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t4, t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_12(t3, t3, y, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod); + /* H = U2 - U1 */ + sp_384_mont_sub_12(t2, t2, t1, p384_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_12(t4, t4, t3, p384_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_12(z, z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(z, z, t2, p384_mod, p384_mp_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_12(x, t4, p384_mod, p384_mp_mod); + sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(y, t1, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(x, x, t5, p384_mod); + sp_384_mont_dbl_12(t1, y, p384_mod); + sp_384_mont_sub_12(x, x, t1, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_12(y, y, x, p384_mod); + sp_384_mont_mul_12(y, y, t4, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t5, t5, t3, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(y, y, t5, p384_mod); + } +} + +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_fast_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k, + int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 td[16]; + sp_point_384 rtd; + sp_digit tmpd[2 * 12 * 6]; +#endif + sp_point_384* t; + sp_point_384* rt; + sp_digit* tmp; + sp_digit n; + int i; + int c, y; + int err; + + (void)heap; + + err = sp_384_point_new_12(heap, rtd, rt); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 16, heap, DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; +#else + t = td; + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + /* t[0] = {0, 0, 1} * norm */ + XMEMSET(&t[0], 0, sizeof(t[0])); + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + (void)sp_384_mod_mul_norm_12(t[1].x, g->x, p384_mod); + (void)sp_384_mod_mul_norm_12(t[1].y, g->y, p384_mod); + (void)sp_384_mod_mul_norm_12(t[1].z, g->z, p384_mod); + t[1].infinity = 0; + sp_384_proj_point_dbl_12(&t[ 2], &t[ 1], tmp); + t[ 2].infinity = 0; + sp_384_proj_point_add_12(&t[ 3], &t[ 2], &t[ 1], tmp); + t[ 3].infinity = 0; + sp_384_proj_point_dbl_12(&t[ 4], &t[ 2], tmp); + t[ 4].infinity = 0; + sp_384_proj_point_add_12(&t[ 5], &t[ 3], &t[ 2], tmp); + t[ 5].infinity = 0; + sp_384_proj_point_dbl_12(&t[ 6], &t[ 3], tmp); + t[ 6].infinity = 0; + sp_384_proj_point_add_12(&t[ 7], &t[ 4], &t[ 3], tmp); + t[ 7].infinity = 0; + sp_384_proj_point_dbl_12(&t[ 8], &t[ 4], tmp); + t[ 8].infinity = 0; + sp_384_proj_point_add_12(&t[ 9], &t[ 5], &t[ 4], tmp); + t[ 9].infinity = 0; + sp_384_proj_point_dbl_12(&t[10], &t[ 5], tmp); + t[10].infinity = 0; + sp_384_proj_point_add_12(&t[11], &t[ 6], &t[ 5], tmp); + t[11].infinity = 0; + sp_384_proj_point_dbl_12(&t[12], &t[ 6], tmp); + t[12].infinity = 0; + sp_384_proj_point_add_12(&t[13], &t[ 7], &t[ 6], tmp); + t[13].infinity = 0; + sp_384_proj_point_dbl_12(&t[14], &t[ 7], tmp); + t[14].infinity = 0; + sp_384_proj_point_add_12(&t[15], &t[ 8], &t[ 7], tmp); + t[15].infinity = 0; + + i = 10; + n = k[i+1] << 0; + c = 28; + y = n >> 28; + XMEMCPY(rt, &t[y], sizeof(sp_point_384)); + n <<= 4; + for (; i>=0 || c>=4; ) { + if (c < 4) { + n |= k[i--]; + c += 32; + } + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + + sp_384_proj_point_dbl_12(rt, rt, tmp); + sp_384_proj_point_dbl_12(rt, rt, tmp); + sp_384_proj_point_dbl_12(rt, rt, tmp); + sp_384_proj_point_dbl_12(rt, rt, tmp); + + sp_384_proj_point_add_12(rt, rt, &t[y], tmp); + } + + if (map != 0) { + sp_384_map_12(r, rt, tmp); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_384)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 12 * 6); + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); + } + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_point_384) * 16); + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#else + ForceZero(tmpd, sizeof(tmpd)); + ForceZero(td, sizeof(td)); +#endif + sp_384_point_free_12(rt, 1, heap); + + return err; +} + +/* A table entry for pre-computed points. */ +typedef struct sp_table_entry_384 { + sp_digit x[12]; + sp_digit y[12]; +} sp_table_entry_384; + +#ifdef FP_ECC +/* Double the Montgomery form projective point p a number of times. + * + * r Result of repeated doubling of point. + * p Point to double. + * n Number of times to double + * t Temporary ordinate data. + */ +static void sp_384_proj_point_dbl_n_12(sp_point_384* p, int n, sp_digit* t) +{ + sp_digit* w = t; + sp_digit* a = t + 2*12; + sp_digit* b = t + 4*12; + sp_digit* t1 = t + 6*12; + sp_digit* t2 = t + 8*12; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = p->x; + y = p->y; + z = p->z; + + /* Y = 2*Y */ + sp_384_mont_dbl_12(y, y, p384_mod); + /* W = Z^4 */ + sp_384_mont_sqr_12(w, z, p384_mod, p384_mp_mod); + sp_384_mont_sqr_12(w, w, p384_mod, p384_mp_mod); + +#ifndef WOLFSSL_SP_SMALL + while (--n > 0) +#else + while (--n >= 0) +#endif + { + /* A = 3*(X^2 - W) */ + sp_384_mont_sqr_12(t1, x, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(t1, t1, w, p384_mod); + sp_384_mont_tpl_12(a, t1, p384_mod); + /* B = X*Y^2 */ + sp_384_mont_sqr_12(t1, y, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(b, t1, x, p384_mod, p384_mp_mod); + /* X = A^2 - 2B */ + sp_384_mont_sqr_12(x, a, p384_mod, p384_mp_mod); + sp_384_mont_dbl_12(t2, b, p384_mod); + sp_384_mont_sub_12(x, x, t2, p384_mod); + /* Z = Z*Y */ + sp_384_mont_mul_12(z, z, y, p384_mod, p384_mp_mod); + /* t2 = Y^4 */ + sp_384_mont_sqr_12(t1, t1, p384_mod, p384_mp_mod); +#ifdef WOLFSSL_SP_SMALL + if (n != 0) +#endif + { + /* W = W*Y^4 */ + sp_384_mont_mul_12(w, w, t1, p384_mod, p384_mp_mod); + } + /* y = 2*A*(B - X) - Y^4 */ + sp_384_mont_sub_12(y, b, x, p384_mod); + sp_384_mont_mul_12(y, y, a, p384_mod, p384_mp_mod); + sp_384_mont_dbl_12(y, y, p384_mod); + sp_384_mont_sub_12(y, y, t1, p384_mod); + } +#ifndef WOLFSSL_SP_SMALL + /* A = 3*(X^2 - W) */ + sp_384_mont_sqr_12(t1, x, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(t1, t1, w, p384_mod); + sp_384_mont_tpl_12(a, t1, p384_mod); + /* B = X*Y^2 */ + sp_384_mont_sqr_12(t1, y, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(b, t1, x, p384_mod, p384_mp_mod); + /* X = A^2 - 2B */ + sp_384_mont_sqr_12(x, a, p384_mod, p384_mp_mod); + sp_384_mont_dbl_12(t2, b, p384_mod); + sp_384_mont_sub_12(x, x, t2, p384_mod); + /* Z = Z*Y */ + sp_384_mont_mul_12(z, z, y, p384_mod, p384_mp_mod); + /* t2 = Y^4 */ + sp_384_mont_sqr_12(t1, t1, p384_mod, p384_mp_mod); + /* y = 2*A*(B - X) - Y^4 */ + sp_384_mont_sub_12(y, b, x, p384_mod); + sp_384_mont_mul_12(y, y, a, p384_mod, p384_mp_mod); + sp_384_mont_dbl_12(y, y, p384_mod); + sp_384_mont_sub_12(y, y, t1, p384_mod); +#endif + /* Y = Y/2 */ + sp_384_div2_12(y, y, p384_mod); +} + +/* Convert the projective point to affine. + * Ordinates are in Montgomery form. + * + * a Point to convert. + * t Temporary data. + */ +static void sp_384_proj_to_affine_12(sp_point_384* a, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2 * 12; + sp_digit* tmp = t + 4 * 12; + + sp_384_mont_inv_12(t1, a->z, tmp); + + sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t1, t2, t1, p384_mod, p384_mp_mod); + + sp_384_mont_mul_12(a->x, a->x, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(a->y, a->y, t1, p384_mod, p384_mp_mod); + XMEMCPY(a->z, p384_norm_mod, sizeof(p384_norm_mod)); +} + +#endif /* FP_ECC */ +/* Add two Montgomery form projective points. The second point has a q value of + * one. + * Only the first point can be the same pointer as the result point. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_384_proj_point_add_qz1_12(sp_point_384* r, const sp_point_384* p, + const sp_point_384* q, sp_digit* t) +{ + const sp_point_384* ap[2]; + sp_point_384* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*12; + sp_digit* t3 = t + 4*12; + sp_digit* t4 = t + 6*12; + sp_digit* t5 = t + 8*12; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* Check double */ + (void)sp_384_sub_12(t1, p384_mod, q->y); + sp_384_norm_12(t1); + if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & + (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) { + sp_384_proj_point_dbl_12(r, p, t); + } + else { + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point_384)); + x = rp[p->infinity | q->infinity]->x; + y = rp[p->infinity | q->infinity]->y; + z = rp[p->infinity | q->infinity]->z; + + ap[0] = p; + ap[1] = q; + for (i=0; i<12; i++) { + r->x[i] = ap[p->infinity]->x[i]; + } + for (i=0; i<12; i++) { + r->y[i] = ap[p->infinity]->y[i]; + } + for (i=0; i<12; i++) { + r->z[i] = ap[p->infinity]->z[i]; + } + r->infinity = ap[p->infinity]->infinity; + + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_12(t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t4, t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod); + /* H = U2 - X1 */ + sp_384_mont_sub_12(t2, t2, x, p384_mod); + /* R = S2 - Y1 */ + sp_384_mont_sub_12(t4, t4, y, p384_mod); + /* Z3 = H*Z1 */ + sp_384_mont_mul_12(z, z, t2, p384_mod, p384_mp_mod); + /* X3 = R^2 - H^3 - 2*X1*H^2 */ + sp_384_mont_sqr_12(t1, t4, p384_mod, p384_mp_mod); + sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t3, x, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(x, t1, t5, p384_mod); + sp_384_mont_dbl_12(t1, t3, p384_mod); + sp_384_mont_sub_12(x, x, t1, p384_mod); + /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */ + sp_384_mont_sub_12(t3, t3, x, p384_mod); + sp_384_mont_mul_12(t3, t3, t4, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t5, t5, y, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(y, t3, t5, p384_mod); + } +} + +#ifdef WOLFSSL_SP_SMALL +#ifdef FP_ECC +/* Generate the pre-computed table of points for the base point. + * + * a The base point. + * table Place to store generated point data. + * tmp Temporary data. + * heap Heap to use for allocation. + */ +static int sp_384_gen_stripe_table_12(const sp_point_384* a, + sp_table_entry_384* table, sp_digit* tmp, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 td, s1d, s2d; +#endif + sp_point_384* t; + sp_point_384* s1 = NULL; + sp_point_384* s2 = NULL; + int i, j; + int err; + + (void)heap; + + err = sp_384_point_new_12(heap, td, t); + if (err == MP_OKAY) { + err = sp_384_point_new_12(heap, s1d, s1); + } + if (err == MP_OKAY) { + err = sp_384_point_new_12(heap, s2d, s2); + } + + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_12(t->x, a->x, p384_mod); + } + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_12(t->y, a->y, p384_mod); + } + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_12(t->z, a->z, p384_mod); + } + if (err == MP_OKAY) { + t->infinity = 0; + sp_384_proj_to_affine_12(t, tmp); + + XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod)); + s1->infinity = 0; + XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod)); + s2->infinity = 0; + + /* table[0] = {0, 0, infinity} */ + XMEMSET(&table[0], 0, sizeof(sp_table_entry_384)); + /* table[1] = Affine version of 'a' in Montgomery form */ + XMEMCPY(table[1].x, t->x, sizeof(table->x)); + XMEMCPY(table[1].y, t->y, sizeof(table->y)); + + for (i=1; i<4; i++) { + sp_384_proj_point_dbl_n_12(t, 96, tmp); + sp_384_proj_to_affine_12(t, tmp); + XMEMCPY(table[1<x, sizeof(table->x)); + XMEMCPY(table[1<y, sizeof(table->y)); + } + + for (i=1; i<4; i++) { + XMEMCPY(s1->x, table[1<x)); + XMEMCPY(s1->y, table[1<y)); + for (j=(1<x, table[j-(1<x)); + XMEMCPY(s2->y, table[j-(1<y)); + sp_384_proj_point_add_qz1_12(t, s1, s2, tmp); + sp_384_proj_to_affine_12(t, tmp); + XMEMCPY(table[j].x, t->x, sizeof(table->x)); + XMEMCPY(table[j].y, t->y, sizeof(table->y)); + } + } + } + + sp_384_point_free_12(s2, 0, heap); + sp_384_point_free_12(s1, 0, heap); + sp_384_point_free_12( t, 0, heap); + + return err; +} + +#endif /* FP_ECC */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_stripe_12(sp_point_384* r, const sp_point_384* g, + const sp_table_entry_384* table, const sp_digit* k, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 rtd; + sp_point_384 pd; + sp_digit td[2 * 12 * 6]; +#endif + sp_point_384* rt; + sp_point_384* p = NULL; + sp_digit* t; + int i, j; + int y, x; + int err; + + (void)g; + (void)heap; + + + err = sp_384_point_new_12(heap, rtd, rt); + if (err == MP_OKAY) { + err = sp_384_point_new_12(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) { + err = MEMORY_E; + } +#else + t = td; +#endif + + if (err == MP_OKAY) { + XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod)); + XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod)); + + y = 0; + for (j=0,x=95; j<4; j++,x+=96) { + y |= ((k[x / 32] >> (x % 32)) & 1) << j; + } + XMEMCPY(rt->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(rt->y, table[y].y, sizeof(table[y].y)); + rt->infinity = !y; + for (i=94; i>=0; i--) { + y = 0; + for (j=0,x=i; j<4; j++,x+=96) { + y |= ((k[x / 32] >> (x % 32)) & 1) << j; + } + + sp_384_proj_point_dbl_12(rt, rt, t); + XMEMCPY(p->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(p->y, table[y].y, sizeof(table[y].y)); + p->infinity = !y; + sp_384_proj_point_add_qz1_12(rt, rt, p, t); + } + + if (map != 0) { + sp_384_map_12(r, rt, t); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_384)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_12(p, 0, heap); + sp_384_point_free_12(rt, 0, heap); + + return err; +} + +#ifdef FP_ECC +#ifndef FP_ENTRIES + #define FP_ENTRIES 16 +#endif + +typedef struct sp_cache_384_t { + sp_digit x[12]; + sp_digit y[12]; + sp_table_entry_384 table[16]; + uint32_t cnt; + int set; +} sp_cache_384_t; + +static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES]; +static THREAD_LS_T int sp_cache_384_last = -1; +static THREAD_LS_T int sp_cache_384_inited = 0; + +#ifndef HAVE_THREAD_LS + static volatile int initCacheMutex_384 = 0; + static wolfSSL_Mutex sp_cache_384_lock; +#endif + +static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache) +{ + int i, j; + uint32_t least; + + if (sp_cache_384_inited == 0) { + for (i=0; ix, sp_cache_384[i].x) & + sp_384_cmp_equal_12(g->y, sp_cache_384[i].y)) { + sp_cache_384[i].cnt++; + break; + } + } + + /* No match. */ + if (i == FP_ENTRIES) { + /* Find empty entry. */ + i = (sp_cache_384_last + 1) % FP_ENTRIES; + for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) { + if (!sp_cache_384[i].set) { + break; + } + } + + /* Evict least used. */ + if (i == sp_cache_384_last) { + least = sp_cache_384[0].cnt; + for (j=1; jx, sizeof(sp_cache_384[i].x)); + XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y)); + sp_cache_384[i].set = 1; + sp_cache_384[i].cnt = 1; + } + + *cache = &sp_cache_384[i]; + sp_cache_384_last = i; +} +#endif /* FP_ECC */ + +/* Multiply the base point of P384 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k, + int map, void* heap) +{ +#ifndef FP_ECC + return sp_384_ecc_mulmod_fast_12(r, g, k, map, heap); +#else + sp_digit tmp[2 * 12 * 7]; + sp_cache_384_t* cache; + int err = MP_OKAY; + +#ifndef HAVE_THREAD_LS + if (initCacheMutex_384 == 0) { + wc_InitMutex(&sp_cache_384_lock); + initCacheMutex_384 = 1; + } + if (wc_LockMutex(&sp_cache_384_lock) != 0) + err = BAD_MUTEX_E; +#endif /* HAVE_THREAD_LS */ + + if (err == MP_OKAY) { + sp_ecc_get_cache_384(g, &cache); + if (cache->cnt == 2) + sp_384_gen_stripe_table_12(g, cache->table, tmp, heap); + +#ifndef HAVE_THREAD_LS + wc_UnLockMutex(&sp_cache_384_lock); +#endif /* HAVE_THREAD_LS */ + + if (cache->cnt < 2) { + err = sp_384_ecc_mulmod_fast_12(r, g, k, map, heap); + } + else { + err = sp_384_ecc_mulmod_stripe_12(r, g, cache->table, k, + map, heap); + } + } + + return err; +#endif +} + +#else +#ifdef FP_ECC +/* Generate the pre-computed table of points for the base point. + * + * a The base point. + * table Place to store generated point data. + * tmp Temporary data. + * heap Heap to use for allocation. + */ +static int sp_384_gen_stripe_table_12(const sp_point_384* a, + sp_table_entry_384* table, sp_digit* tmp, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 td, s1d, s2d; +#endif + sp_point_384* t; + sp_point_384* s1 = NULL; + sp_point_384* s2 = NULL; + int i, j; + int err; + + (void)heap; + + err = sp_384_point_new_12(heap, td, t); + if (err == MP_OKAY) { + err = sp_384_point_new_12(heap, s1d, s1); + } + if (err == MP_OKAY) { + err = sp_384_point_new_12(heap, s2d, s2); + } + + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_12(t->x, a->x, p384_mod); + } + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_12(t->y, a->y, p384_mod); + } + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_12(t->z, a->z, p384_mod); + } + if (err == MP_OKAY) { + t->infinity = 0; + sp_384_proj_to_affine_12(t, tmp); + + XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod)); + s1->infinity = 0; + XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod)); + s2->infinity = 0; + + /* table[0] = {0, 0, infinity} */ + XMEMSET(&table[0], 0, sizeof(sp_table_entry_384)); + /* table[1] = Affine version of 'a' in Montgomery form */ + XMEMCPY(table[1].x, t->x, sizeof(table->x)); + XMEMCPY(table[1].y, t->y, sizeof(table->y)); + + for (i=1; i<8; i++) { + sp_384_proj_point_dbl_n_12(t, 48, tmp); + sp_384_proj_to_affine_12(t, tmp); + XMEMCPY(table[1<x, sizeof(table->x)); + XMEMCPY(table[1<y, sizeof(table->y)); + } + + for (i=1; i<8; i++) { + XMEMCPY(s1->x, table[1<x)); + XMEMCPY(s1->y, table[1<y)); + for (j=(1<x, table[j-(1<x)); + XMEMCPY(s2->y, table[j-(1<y)); + sp_384_proj_point_add_qz1_12(t, s1, s2, tmp); + sp_384_proj_to_affine_12(t, tmp); + XMEMCPY(table[j].x, t->x, sizeof(table->x)); + XMEMCPY(table[j].y, t->y, sizeof(table->y)); + } + } + } + + sp_384_point_free_12(s2, 0, heap); + sp_384_point_free_12(s1, 0, heap); + sp_384_point_free_12( t, 0, heap); + + return err; +} + +#endif /* FP_ECC */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_stripe_12(sp_point_384* r, const sp_point_384* g, + const sp_table_entry_384* table, const sp_digit* k, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 rtd; + sp_point_384 pd; + sp_digit td[2 * 12 * 6]; +#endif + sp_point_384* rt; + sp_point_384* p = NULL; + sp_digit* t; + int i, j; + int y, x; + int err; + + (void)g; + (void)heap; + + + err = sp_384_point_new_12(heap, rtd, rt); + if (err == MP_OKAY) { + err = sp_384_point_new_12(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) { + err = MEMORY_E; + } +#else + t = td; +#endif + + if (err == MP_OKAY) { + XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod)); + XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod)); + + y = 0; + for (j=0,x=47; j<8; j++,x+=48) { + y |= ((k[x / 32] >> (x % 32)) & 1) << j; + } + XMEMCPY(rt->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(rt->y, table[y].y, sizeof(table[y].y)); + rt->infinity = !y; + for (i=46; i>=0; i--) { + y = 0; + for (j=0,x=i; j<8; j++,x+=48) { + y |= ((k[x / 32] >> (x % 32)) & 1) << j; + } + + sp_384_proj_point_dbl_12(rt, rt, t); + XMEMCPY(p->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(p->y, table[y].y, sizeof(table[y].y)); + p->infinity = !y; + sp_384_proj_point_add_qz1_12(rt, rt, p, t); + } + + if (map != 0) { + sp_384_map_12(r, rt, t); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_384)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_12(p, 0, heap); + sp_384_point_free_12(rt, 0, heap); + + return err; +} + +#ifdef FP_ECC +#ifndef FP_ENTRIES + #define FP_ENTRIES 16 +#endif + +typedef struct sp_cache_384_t { + sp_digit x[12]; + sp_digit y[12]; + sp_table_entry_384 table[256]; + uint32_t cnt; + int set; +} sp_cache_384_t; + +static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES]; +static THREAD_LS_T int sp_cache_384_last = -1; +static THREAD_LS_T int sp_cache_384_inited = 0; + +#ifndef HAVE_THREAD_LS + static volatile int initCacheMutex_384 = 0; + static wolfSSL_Mutex sp_cache_384_lock; +#endif + +static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache) +{ + int i, j; + uint32_t least; + + if (sp_cache_384_inited == 0) { + for (i=0; ix, sp_cache_384[i].x) & + sp_384_cmp_equal_12(g->y, sp_cache_384[i].y)) { + sp_cache_384[i].cnt++; + break; + } + } + + /* No match. */ + if (i == FP_ENTRIES) { + /* Find empty entry. */ + i = (sp_cache_384_last + 1) % FP_ENTRIES; + for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) { + if (!sp_cache_384[i].set) { + break; + } + } + + /* Evict least used. */ + if (i == sp_cache_384_last) { + least = sp_cache_384[0].cnt; + for (j=1; jx, sizeof(sp_cache_384[i].x)); + XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y)); + sp_cache_384[i].set = 1; + sp_cache_384[i].cnt = 1; + } + + *cache = &sp_cache_384[i]; + sp_cache_384_last = i; +} +#endif /* FP_ECC */ + +/* Multiply the base point of P384 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k, + int map, void* heap) +{ +#ifndef FP_ECC + return sp_384_ecc_mulmod_fast_12(r, g, k, map, heap); +#else + sp_digit tmp[2 * 12 * 7]; + sp_cache_384_t* cache; + int err = MP_OKAY; + +#ifndef HAVE_THREAD_LS + if (initCacheMutex_384 == 0) { + wc_InitMutex(&sp_cache_384_lock); + initCacheMutex_384 = 1; + } + if (wc_LockMutex(&sp_cache_384_lock) != 0) + err = BAD_MUTEX_E; +#endif /* HAVE_THREAD_LS */ + + if (err == MP_OKAY) { + sp_ecc_get_cache_384(g, &cache); + if (cache->cnt == 2) + sp_384_gen_stripe_table_12(g, cache->table, tmp, heap); + +#ifndef HAVE_THREAD_LS + wc_UnLockMutex(&sp_cache_384_lock); +#endif /* HAVE_THREAD_LS */ + + if (cache->cnt < 2) { + err = sp_384_ecc_mulmod_fast_12(r, g, k, map, heap); + } + else { + err = sp_384_ecc_mulmod_stripe_12(r, g, cache->table, k, + map, heap); + } + } + + return err; +#endif +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * p Point to multiply. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_384(mp_int* km, ecc_point* gm, ecc_point* r, int map, + void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 p; + sp_digit kd[12]; +#endif + sp_point_384* point; + sp_digit* k = NULL; + int err = MP_OKAY; + + err = sp_384_point_new_12(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#else + k = kd; +#endif + if (err == MP_OKAY) { + sp_384_from_mp(k, 12, km); + sp_384_point_from_ecc_point_12(point, gm); + + err = sp_384_ecc_mulmod_12(point, point, k, map, heap); + } + if (err == MP_OKAY) { + err = sp_384_point_to_ecc_point_12(point, r); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_12(point, 0, heap); + + return err; +} + +#ifdef WOLFSSL_SP_SMALL +static const sp_table_entry_384 p384_table[16] = { + /* 0 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 */ + { { 0x49c0b528,0x3dd07566,0xa0d6ce38,0x20e378e2,0x541b4d6e,0x879c3afc, + 0x59a30eff,0x64548684,0x614ede2b,0x812ff723,0x299e1513,0x4d3aadc2 }, + { 0x4b03a4fe,0x23043dad,0x7bb4a9ac,0xa1bfa8bf,0x2e83b050,0x8bade756, + 0x68f4ffd9,0xc6c35219,0x3969a840,0xdd800226,0x5a15c5e9,0x2b78abc2 } }, + /* 2 */ + { { 0xf26feef9,0x24480c57,0x3a0e1240,0xc31a2694,0x273e2bc7,0x735002c3, + 0x3ef1ed4c,0x8c42e9c5,0x7f4948e8,0x028babf6,0x8a978632,0x6a502f43 }, + { 0xb74536fe,0xf5f13a46,0xd8a9f0eb,0x1d218bab,0x37232768,0x30f36bcc, + 0x576e8c18,0xc5317b31,0x9bbcb766,0xef1d57a6,0xb3e3d4dc,0x917c4930 } }, + /* 3 */ + { { 0xe349ddd0,0x11426e2e,0x9b2fc250,0x9f117ef9,0xec0174a6,0xff36b480, + 0x18458466,0x4f4bde76,0x05806049,0x2f2edb6d,0x19dfca92,0x8adc75d1 }, + { 0xb7d5a7ce,0xa619d097,0xa34411e9,0x874275e5,0x0da4b4ef,0x5403e047, + 0x77901d8f,0x2ebaafd9,0xa747170f,0x5e63ebce,0x7f9d8036,0x12a36944 } }, + /* 4 */ + { { 0x2f9fbe67,0x378205de,0x7f728e44,0xc4afcb83,0x682e00f1,0xdbcec06c, + 0x114d5423,0xf2a145c3,0x7a52463e,0xa01d9874,0x7d717b0a,0xfc0935b1 }, + { 0xd4d01f95,0x9653bc4f,0x9560ad34,0x9aa83ea8,0xaf8e3f3f,0xf77943dc, + 0xe86fe16e,0x70774a10,0xbf9ffdcf,0x6b62e6f1,0x588745c9,0x8a72f39e } }, + /* 5 */ + { { 0x2341c342,0x73ade4da,0xea704422,0xdd326e54,0x3741cef3,0x336c7d98, + 0x59e61549,0x1eafa00d,0xbd9a3efd,0xcd3ed892,0xc5c6c7e4,0x03faf26c }, + { 0x3045f8ac,0x087e2fcf,0x174f1e73,0x14a65532,0xfe0af9a7,0x2cf84f28, + 0x2cdc935b,0xddfd7a84,0x6929c895,0x4c0f117b,0x4c8bcfcc,0x356572d6 } }, + /* 6 */ + { { 0x3f3b236f,0xfab08607,0x81e221da,0x19e9d41d,0x3927b428,0xf3f6571e, + 0x7550f1f6,0x4348a933,0xa85e62f0,0x7167b996,0x7f5452bf,0x62d43759 }, + { 0xf2955926,0xd85feb9e,0x6df78353,0x440a561f,0x9ca36b59,0x389668ec, + 0xa22da016,0x052bf1a1,0xf6093254,0xbdfbff72,0xe22209f3,0x94e50f28 } }, + /* 7 */ + { { 0x3062e8af,0x90b2e5b3,0xe8a3d369,0xa8572375,0x201db7b1,0x3fe1b00b, + 0xee651aa2,0xe926def0,0xb9b10ad7,0x6542c9be,0xa2fcbe74,0x098e309b }, + { 0xfff1d63f,0x779deeb3,0x20bfd374,0x23d0e80a,0x8768f797,0x8452bb3b, + 0x1f952856,0xcf75bb4d,0x29ea3faa,0x8fe6b400,0x81373a53,0x12bd3e40 } }, + /* 8 */ + { { 0x16973cf4,0x070d34e1,0x7e4f34f7,0x20aee08b,0x5eb8ad29,0x269af9b9, + 0xa6a45dda,0xdde0a036,0x63df41e0,0xa18b528e,0xa260df2a,0x03cc71b2 }, + { 0xa06b1dd7,0x24a6770a,0x9d2675d3,0x5bfa9c11,0x96844432,0x73c1e2a1, + 0x131a6cf0,0x3660558d,0x2ee79454,0xb0289c83,0xc6d8ddcd,0xa6aefb01 } }, + /* 9 */ + { { 0x01ab5245,0xba1464b4,0xc48d93ff,0x9b8d0b6d,0x93ad272c,0x939867dc, + 0xae9fdc77,0xbebe085e,0x894ea8bd,0x73ae5103,0x39ac22e1,0x740fc89a }, + { 0x28e23b23,0x5e28b0a3,0xe13104d0,0x2352722e,0xb0a2640d,0xf4667a18, + 0x49bb37c3,0xac74a72e,0xe81e183a,0x79f734f0,0x3fd9c0eb,0xbffe5b6c } }, + /* 10 */ + { { 0x00623f3b,0x03cf2922,0x5f29ebff,0x095c7111,0x80aa6823,0x42d72247, + 0x7458c0b0,0x044c7ba1,0x0959ec20,0xca62f7ef,0xf8ca929f,0x40ae2ab7 }, + { 0xa927b102,0xb8c5377a,0xdc031771,0x398a86a0,0xc216a406,0x04908f9d, + 0x918d3300,0xb423a73a,0xe0b94739,0x634b0ff1,0x2d69f697,0xe29de725 } }, + /* 11 */ + { { 0x8435af04,0x744d1400,0xfec192da,0x5f255b1d,0x336dc542,0x1f17dc12, + 0x636a68a8,0x5c90c2a7,0x7704ca1e,0x960c9eb7,0x6fb3d65a,0x9de8cf1e }, + { 0x511d3d06,0xc60fee0d,0xf9eb52c7,0x466e2313,0x206b0914,0x743c0f5f, + 0x2191aa4d,0x42f55bac,0xffebdbc2,0xcefc7c8f,0xe6e8ed1c,0xd4fa6081 } }, + /* 12 */ + { { 0x98683186,0x867db639,0xddcc4ea9,0xfb5cf424,0xd4f0e7bd,0xcc9a7ffe, + 0x7a779f7e,0x7c57f71c,0xd6b25ef2,0x90774079,0xb4081680,0x90eae903 }, + { 0x0ee1fceb,0xdf2aae5e,0xe86c1a1f,0x3ff1da24,0xca193edf,0x80f587d6, + 0xdc9b9d6a,0xa5695523,0x85920303,0x7b840900,0xba6dbdef,0x1efa4dfc } }, + /* 13 */ + { { 0xe0540015,0xfbd838f9,0xc39077dc,0x2c323946,0xad619124,0x8b1fb9e6, + 0x0ca62ea8,0x9612440c,0x2dbe00ff,0x9ad9b52c,0xae197643,0xf52abaa1 }, + { 0x2cac32ad,0xd0e89894,0x62a98f91,0xdfb79e42,0x276f55cb,0x65452ecf, + 0x7ad23e12,0xdb1ac0d2,0xde4986f0,0xf68c5f6a,0x82ce327d,0x389ac37b } }, + /* 14 */ + { { 0xb8a9e8c9,0xcd96866d,0x5bb8091e,0xa11963b8,0x045b3cd2,0xc7f90d53, + 0x80f36504,0x755a72b5,0x21d3751c,0x46f8b399,0x53c193de,0x4bffdc91 }, + { 0xb89554e7,0xcd15c049,0xf7a26be6,0x353c6754,0xbd41d970,0x79602370, + 0x12b176c0,0xde16470b,0x40c8809d,0x56ba1175,0xe435fb1e,0xe2db35c3 } }, + /* 15 */ + { { 0x6328e33f,0xd71e4aab,0xaf8136d1,0x5486782b,0x86d57231,0x07a4995f, + 0x1651a968,0xf1f0a5bd,0x76803b6d,0xa5dc5b24,0x42dda935,0x5c587cbc }, + { 0xbae8b4c0,0x2b6cdb32,0xb1331138,0x66d1598b,0x5d7e9614,0x4a23b2d2, + 0x74a8c05d,0x93e402a6,0xda7ce82e,0x45ac94e6,0xe463d465,0xeb9f8281 } }, +}; + +/* Multiply the base point of P384 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_base_12(sp_point_384* r, const sp_digit* k, + int map, void* heap) +{ + return sp_384_ecc_mulmod_stripe_12(r, &p384_base, p384_table, + k, map, heap); +} + +#else +static const sp_table_entry_384 p384_table[256] = { + /* 0 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 */ + { { 0x49c0b528,0x3dd07566,0xa0d6ce38,0x20e378e2,0x541b4d6e,0x879c3afc, + 0x59a30eff,0x64548684,0x614ede2b,0x812ff723,0x299e1513,0x4d3aadc2 }, + { 0x4b03a4fe,0x23043dad,0x7bb4a9ac,0xa1bfa8bf,0x2e83b050,0x8bade756, + 0x68f4ffd9,0xc6c35219,0x3969a840,0xdd800226,0x5a15c5e9,0x2b78abc2 } }, + /* 2 */ + { { 0x2b0c535b,0x29864753,0x70506296,0x90dd6953,0x216ab9ac,0x038cd6b4, + 0xbe12d76a,0x3df9b7b7,0x5f347bdb,0x13f4d978,0x13e94489,0x222c5c9c }, + { 0x2680dc64,0x5f8e796f,0x58352417,0x120e7cb7,0xd10740b8,0x254b5d8a, + 0x5337dee6,0xc38b8efb,0x94f02247,0xf688c2e1,0x6c25bc4c,0x7b5c75f3 } }, + /* 3 */ + { { 0x9edffea5,0xe26a3cc3,0x37d7e9fc,0x35bbfd1c,0x9bde3ef6,0xf0e7700d, + 0x1a538f5a,0x0380eb47,0x05bf9eb3,0x2e9da8bb,0x1a460c3e,0xdbb93c73 }, + { 0xf526b605,0x37dba260,0xfd785537,0x95d4978e,0xed72a04a,0x24ed793a, + 0x76005b1a,0x26948377,0x9e681f82,0x99f557b9,0xd64954ef,0xae5f9557 } }, + /* 4 */ + { { 0xf26feef9,0x24480c57,0x3a0e1240,0xc31a2694,0x273e2bc7,0x735002c3, + 0x3ef1ed4c,0x8c42e9c5,0x7f4948e8,0x028babf6,0x8a978632,0x6a502f43 }, + { 0xb74536fe,0xf5f13a46,0xd8a9f0eb,0x1d218bab,0x37232768,0x30f36bcc, + 0x576e8c18,0xc5317b31,0x9bbcb766,0xef1d57a6,0xb3e3d4dc,0x917c4930 } }, + /* 5 */ + { { 0xe349ddd0,0x11426e2e,0x9b2fc250,0x9f117ef9,0xec0174a6,0xff36b480, + 0x18458466,0x4f4bde76,0x05806049,0x2f2edb6d,0x19dfca92,0x8adc75d1 }, + { 0xb7d5a7ce,0xa619d097,0xa34411e9,0x874275e5,0x0da4b4ef,0x5403e047, + 0x77901d8f,0x2ebaafd9,0xa747170f,0x5e63ebce,0x7f9d8036,0x12a36944 } }, + /* 6 */ + { { 0x4fc52870,0x28f9c07a,0x1a53a961,0xce0b3748,0x0e1828d9,0xd550fa18, + 0x6adb225a,0xa24abaf7,0x6e58a348,0xd11ed0a5,0x948acb62,0xf3d811e6 }, + { 0x4c61ed22,0x8618dd77,0x80b47c9d,0x0bb747f9,0xde6b8559,0x22bf796f, + 0x680a21e9,0xfdfd1c6d,0x2af2c9dd,0xc0db1577,0xc1e90f3d,0xa09379e6 } }, + /* 7 */ + { { 0xe085c629,0x386c66ef,0x095bc89a,0x5fc2a461,0x203f4b41,0x1353d631, + 0x7e4bd8f5,0x7ca1972b,0xa7df8ce9,0xb077380a,0xee7e4ea3,0xd8a90389 }, + { 0xe7b14461,0x1bc74dc7,0x0c9c4f78,0xdc2cb014,0x84ef0a10,0x52b4b3a6, + 0x20327fe2,0xbde6ea5d,0x660f9615,0xb71ec435,0xb8ad8173,0xeede5a04 } }, + /* 8 */ + { { 0x893b9a2d,0x5584cbb3,0x00850c5d,0x820c660b,0x7df2d43d,0x4126d826, + 0x0109e801,0xdd5bbbf0,0x38172f1c,0x85b92ee3,0xf31430d9,0x609d4f93 }, + { 0xeadaf9d6,0x1e059a07,0x0f125fb0,0x70e6536c,0x560f20e7,0xd6220751, + 0x7aaf3a9a,0xa59489ae,0x64bae14e,0x7b70e2f6,0x76d08249,0x0dd03701 } }, + /* 9 */ + { { 0x8510521f,0x4cc13be8,0xf724cc17,0x87315ba9,0x353dc263,0xb49d83bb, + 0x0c279257,0x8b677efe,0xc93c9537,0x510a1c1c,0xa4702c99,0x33e30cd8 }, + { 0x2208353f,0xf0ffc89d,0xced42b2b,0x0170fa8d,0x26e2a5f5,0x090851ed, + 0xecb52c96,0x81276455,0x7fe1adf4,0x0646c4e1,0xb0868eab,0x513f047e } }, + /* 10 */ + { { 0xdf5bdf53,0xc07611f4,0x58b11a6d,0x45d331a7,0x1c4ee394,0x58965daf, + 0x5a5878d1,0xba8bebe7,0x82dd3025,0xaecc0a18,0xa923eb8b,0xcf2a3899 }, + { 0xd24fd048,0xf98c9281,0x8bbb025d,0x841bfb59,0xc9ab9d53,0xb8ddf8ce, + 0x7fef044e,0x538a4cb6,0x23236662,0x092ac21f,0x0b66f065,0xa919d385 } }, + /* 11 */ + { { 0x85d480d8,0x3db03b40,0x1b287a7d,0x8cd9f479,0x4a8f3bae,0x8f24dc75, + 0x3db41892,0x482eb800,0x9c56e0f5,0x38bf9eb3,0x9a91dc6f,0x8b977320 }, + { 0x7209cfc2,0xa31b05b2,0x05b2db70,0x4c49bf85,0xd619527b,0x56462498, + 0x1fac51ba,0x3fe51039,0xab4b8342,0xfb04f55e,0x04c6eabf,0xc07c10dc } }, + /* 12 */ + { { 0xdb32f048,0xad22fe4c,0x475ed6df,0x5f23bf91,0xaa66b6cb,0xa50ce0c0, + 0xf03405c0,0xdf627a89,0xf95e2d6a,0x3674837d,0xba42e64e,0x081c95b6 }, + { 0xe71d6ceb,0xeba3e036,0x6c6b0271,0xb45bcccf,0x0684701d,0x67b47e63, + 0xe712523f,0x60f8f942,0x5cd47adc,0x82423472,0x87649cbb,0x83027d79 } }, + /* 13 */ + { { 0x3615b0b8,0xb3929ea6,0xa54dac41,0xb41441fd,0xb5b6a368,0x8995d556, + 0x167ef05e,0xa80d4529,0x6d25a27f,0xf6bcb4a1,0x7bd55b68,0x210d6a4c }, + { 0x25351130,0xf3804abb,0x903e37eb,0x1d2df699,0x084c25c8,0x5f201efc, + 0xa1c68e91,0x31a28c87,0x563f62a5,0x81dad253,0xd6c415d4,0x5dd6de70 } }, + /* 14 */ + { { 0x846612ce,0x29f470fd,0xda18d997,0x986f3eec,0x2f34af86,0x6b84c161, + 0x46ddaf8b,0x5ef0a408,0xe49e795f,0x14405a00,0xaa2f7a37,0x5f491b16 }, + { 0xdb41b38d,0xc7f07ae4,0x18fbfcaa,0xef7d119e,0x14443b19,0x3a18e076, + 0x79a19926,0x4356841a,0xe2226fbe,0x91f4a91c,0x3cc88721,0xdc77248c } }, + /* 15 */ + { { 0xe4b1ec9d,0xd570ff1a,0xe7eef706,0x21d23e0e,0xca19e086,0x3cde40f4, + 0xcd4bb270,0x7d6523c4,0xbf13aa6c,0x16c1f06c,0xd14c4b60,0x5aa7245a }, + { 0x44b74de8,0x37f81467,0x620a934e,0x839e7a17,0xde8b1aa1,0xf74d14e8, + 0xf30d75e2,0x8789fa51,0xc81c261e,0x09b24052,0x33c565ee,0x654e2678 } }, + /* 16 */ + { { 0x2f9fbe67,0x378205de,0x7f728e44,0xc4afcb83,0x682e00f1,0xdbcec06c, + 0x114d5423,0xf2a145c3,0x7a52463e,0xa01d9874,0x7d717b0a,0xfc0935b1 }, + { 0xd4d01f95,0x9653bc4f,0x9560ad34,0x9aa83ea8,0xaf8e3f3f,0xf77943dc, + 0xe86fe16e,0x70774a10,0xbf9ffdcf,0x6b62e6f1,0x588745c9,0x8a72f39e } }, + /* 17 */ + { { 0x2341c342,0x73ade4da,0xea704422,0xdd326e54,0x3741cef3,0x336c7d98, + 0x59e61549,0x1eafa00d,0xbd9a3efd,0xcd3ed892,0xc5c6c7e4,0x03faf26c }, + { 0x3045f8ac,0x087e2fcf,0x174f1e73,0x14a65532,0xfe0af9a7,0x2cf84f28, + 0x2cdc935b,0xddfd7a84,0x6929c895,0x4c0f117b,0x4c8bcfcc,0x356572d6 } }, + /* 18 */ + { { 0x7d8c1bba,0x7ecbac01,0x90b0f3d5,0x6058f9c3,0xf6197d0f,0xaee116e3, + 0x4033b128,0xc4dd7068,0xc209b983,0xf084dba6,0x831dbc4a,0x97c7c2cf }, + { 0xf96010e8,0x2f4e61dd,0x529faa17,0xd97e4e20,0x69d37f20,0x4ee66660, + 0x3d366d72,0xccc139ed,0x13488e0f,0x690b6ee2,0xf3a6d533,0x7cad1dc5 } }, + /* 19 */ + { { 0xda57a41f,0x660a9a81,0xec0039b6,0xe74a0412,0x5e1dad15,0x42343c6b, + 0x46681d4c,0x284f3ff5,0x63749e89,0xb51087f1,0x6f9f2f13,0x070f23cc }, + { 0x5d186e14,0x542211da,0xfddb0dff,0x84748f37,0xdb1f4180,0x41a3aab4, + 0xa6402d0e,0x25ed667b,0x02f58355,0x2f2924a9,0xfa44a689,0x5844ee7c } }, + /* 20 */ + { { 0x3f3b236f,0xfab08607,0x81e221da,0x19e9d41d,0x3927b428,0xf3f6571e, + 0x7550f1f6,0x4348a933,0xa85e62f0,0x7167b996,0x7f5452bf,0x62d43759 }, + { 0xf2955926,0xd85feb9e,0x6df78353,0x440a561f,0x9ca36b59,0x389668ec, + 0xa22da016,0x052bf1a1,0xf6093254,0xbdfbff72,0xe22209f3,0x94e50f28 } }, + /* 21 */ + { { 0x3062e8af,0x90b2e5b3,0xe8a3d369,0xa8572375,0x201db7b1,0x3fe1b00b, + 0xee651aa2,0xe926def0,0xb9b10ad7,0x6542c9be,0xa2fcbe74,0x098e309b }, + { 0xfff1d63f,0x779deeb3,0x20bfd374,0x23d0e80a,0x8768f797,0x8452bb3b, + 0x1f952856,0xcf75bb4d,0x29ea3faa,0x8fe6b400,0x81373a53,0x12bd3e40 } }, + /* 22 */ + { { 0x104cbba5,0xc023780d,0xfa35dd4c,0x6207e747,0x1ca9b6a3,0x35c23928, + 0x97987b10,0x4ff19be8,0x8022eee8,0xb8476bbf,0xd3bbe74d,0xaa0a4a14 }, + { 0x187d4543,0x20f94331,0x79f6e066,0x32153870,0xac7e82e1,0x83b0f74e, + 0x828f06ab,0xa7748ba2,0xc26ef35f,0xc5f0298a,0x8e9a7dbd,0x0f0c5070 } }, + /* 23 */ + { { 0xdef029dd,0x0c5c244c,0x850661b8,0x3dabc687,0xfe11d981,0x9992b865, + 0x6274dbad,0xe9801b8f,0x098da242,0xe54e6319,0x91a53d08,0x9929a91a }, + { 0x35285887,0x37bffd72,0xf1418102,0xbc759425,0xfd2e6e20,0x9280cc35, + 0xfbc42ee5,0x735c600c,0x8837619a,0xb7ad2864,0xa778c57b,0xa3627231 } }, + /* 24 */ + { { 0x91361ed8,0xae799b5c,0x6c63366c,0x47d71b75,0x1b265a6a,0x54cdd521, + 0x98d77b74,0xe0215a59,0xbab29db0,0x4424d9b7,0x7fd9e536,0x8b0ffacc }, + { 0x37b5d9ef,0x46d85d12,0xbfa91747,0x5b106d62,0x5f99ba2d,0xed0479f8, + 0x1d104de4,0x0e6f3923,0x25e8983f,0x83a84c84,0xf8105a70,0xa9507e0a } }, + /* 25 */ + { { 0x14cf381c,0xf6c68a6e,0xc22e31cc,0xaf9d27bd,0xaa8a5ccb,0x23568d4d, + 0xe338e4d2,0xe431eec0,0x8f52ad1f,0xf1a828fe,0xe86acd80,0xdb6a0579 }, + { 0x4507832a,0x2885672e,0x887e5289,0x73fc275f,0x05610d08,0x65f80278, + 0x075ff5b0,0x8d9b4554,0x09f712b5,0x3a8e8fb1,0x2ebe9cf2,0x39f0ac86 } }, + /* 26 */ + { { 0x4c52edf5,0xd8fabf78,0xa589ae53,0xdcd737e5,0xd791ab17,0x94918bf0, + 0xbcff06c9,0xb5fbd956,0xdca46d45,0xf6d3032e,0x41a3e486,0x2cdff7e1 }, + { 0x61f47ec8,0x6674b3ba,0xeef84608,0x8a882163,0x4c687f90,0xa257c705, + 0xf6cdf227,0xe30cb2ed,0x7f6ea846,0x2c4c64ca,0xcc6bcd3c,0x186fa17c } }, + /* 27 */ + { { 0x1dfcb91e,0x48a3f536,0x646d358a,0x83595e13,0x91128798,0xbd15827b, + 0x2187757a,0x3ce612b8,0x61bd7372,0x873150a1,0xb662f568,0xf4684530 }, + { 0x401896f6,0x8833950b,0x77f3e090,0xe11cb89a,0x48e7f4a5,0xb2f12cac, + 0xf606677e,0x313dd769,0x16579f93,0xfdcf08b3,0x46b8f22b,0x6429cec9 } }, + /* 28 */ + { { 0xbb75f9a4,0x4984dd54,0x29d3b570,0x4aef06b9,0x3d6e4c1e,0xb5f84ca2, + 0xb083ef35,0x24c61c11,0x392ca9ff,0xce4a7392,0x6730a800,0x865d6517 }, + { 0x722b4a2b,0xca3dfe76,0x7b083e0e,0x12c04bf9,0x1b86b8a5,0x803ce5b5, + 0x6a7e3e0c,0x3fc7632d,0xc81adbe4,0xc89970c2,0x120e16b1,0x3cbcd3ad } }, + /* 29 */ + { { 0xec30ce93,0xfbfb4cc7,0xb72720a2,0x10ed6c7d,0x47b55500,0xec675bf7, + 0x333ff7c3,0x90725903,0x5075bfc0,0xc7c3973e,0x07acf31b,0xb049ecb0 }, + { 0x4f58839c,0xb4076eaf,0xa2b05e4f,0x101896da,0xab40c66e,0x3f6033b0, + 0xc8d864ba,0x19ee9eeb,0x47bf6d2a,0xeb6cf155,0xf826477d,0x8e5a9663 } }, + /* 30 */ + { { 0xf7fbd5e1,0x69e62fdd,0x76912b1d,0x38ecfe54,0xd1da3bfb,0x845a3d56, + 0x1c86f0d4,0x0494950e,0x3bc36ce8,0x83cadbf9,0x4fccc8d1,0x41fce572 }, + { 0x8332c144,0x05f939c2,0x0871e46e,0xb17f248b,0x66e8aff6,0x3d8534e2, + 0x3b85c629,0x1d06f1dc,0xa3131b73,0xdb06a32e,0x8b3f64e5,0xf295184d } }, + /* 31 */ + { { 0x36ddc103,0xd9653ff7,0x95ef606f,0x25f43e37,0xfe06dce8,0x09e301fc, + 0x30b6eebf,0x85af2341,0x0ff56b20,0x79b12b53,0xfe9a3c6b,0x9b4fb499 }, + { 0x51d27ac2,0x0154f892,0x56ca5389,0xd33167e3,0xafc065a6,0x7828ec1f, + 0x7f746c9b,0x0959a258,0x0c44f837,0xb18f1be3,0xc4132fdb,0xa7946117 } }, + /* 32 */ + { { 0x5e3c647b,0xc0426b77,0x8cf05348,0xbfcbd939,0x172c0d3d,0x31d312e3, + 0xee754737,0x5f49fde6,0x6da7ee61,0x895530f0,0xe8b3a5fb,0xcf281b0a }, + { 0x41b8a543,0xfd149735,0x3080dd30,0x41a625a7,0x653908cf,0xe2baae07, + 0xba02a278,0xc3d01436,0x7b21b8f8,0xa0d0222e,0xd7ec1297,0xfdc270e9 } }, + /* 33 */ + { { 0xbc7f41d6,0x00873c0c,0x1b7ad641,0xd976113e,0x238443fb,0x2a536ff4, + 0x41e62e45,0x030d00e2,0x5f545fc6,0x532e9867,0x8e91208c,0xcd033108 }, + { 0x9797612c,0xd1a04c99,0xeea674e2,0xd4393e02,0xe19742a1,0xd56fa69e, + 0x85f0590e,0xdd2ab480,0x48a2243d,0xa5cefc52,0x54383f41,0x48cc67b6 } }, + /* 34 */ + { { 0xfc14ab48,0x4e50430e,0x26706a74,0x195b7f4f,0xcc881ff6,0x2fe8a228, + 0xd945013d,0xb1b968e2,0x4b92162b,0x936aa579,0x364e754a,0x4fb766b7 }, + { 0x31e1ff7f,0x13f93bca,0xce4f2691,0x696eb5ca,0xa2b09e02,0xff754bf8, + 0xe58e3ff8,0x58f13c9c,0x1678c0b0,0xb757346f,0xa86692b3,0xd54200db } }, + /* 35 */ + { { 0x6dda1265,0x9a030bbd,0xe89718dd,0xf7b4f3fc,0x936065b8,0xa6a4931f, + 0x5f72241c,0xbce72d87,0x65775857,0x6cbb51cb,0x4e993675,0xc7161815 }, + { 0x2ee32189,0xe81a0f79,0x277dc0b2,0xef2fab26,0xb71f469f,0x9e64f6fe, + 0xdfdaf859,0xb448ce33,0xbe6b5df1,0x3f5c1c4c,0x1de45f7b,0xfb8dfb00 } }, + /* 36 */ + { { 0x4d5bb921,0xc7345fa7,0x4d2b667e,0x5c7e04be,0x282d7a3e,0x47ed3a80, + 0x7e47b2a4,0x5c2777f8,0x08488e2e,0x89b3b100,0xb2eb5b45,0x9aad77c2 }, + { 0xdaac34ae,0xd681bca7,0x26afb326,0x2452e4e5,0x41a1ee14,0x0c887924, + 0xc2407ade,0x743b04d4,0xfc17a2ac,0xcb5e999b,0x4a701a06,0x4dca2f82 } }, + /* 37 */ + { { 0x1127bc1a,0x68e31ca6,0x17ead3be,0xa3edd59b,0xe25f5a15,0x67b6b645, + 0xa420e15e,0x76221794,0x4b1e872e,0x794fd83b,0xb2dece1b,0x7cab3f03 }, + { 0xca9b3586,0x7119bf15,0x4d250bd7,0xa5545924,0xcc6bcf24,0x173633ea, + 0xb1b6f884,0x9bd308c2,0x447d38c3,0x3bae06f5,0xf341fe1c,0x54dcc135 } }, + /* 38 */ + { { 0x943caf0d,0x56d3598d,0x225ff133,0xce044ea9,0x563fadea,0x9edf6a7c, + 0x73e8dc27,0x632eb944,0x3190dcab,0x814b467e,0x6dbb1e31,0x2d4f4f31 }, + { 0xa143b7ca,0x8d69811c,0xde7cf950,0x4ec1ac32,0x37b5fe82,0x223ab5fd, + 0x9390f1d9,0xe82616e4,0x75804610,0xabff4b20,0x875b08f0,0x11b9be15 } }, + /* 39 */ + { { 0x3bbe682c,0x4ae31a3d,0x74eef2dd,0xbc7c5d26,0x3c47dd40,0x92afd10a, + 0xc14ab9e1,0xec7e0a3b,0xb2e495e4,0x6a6c3dd1,0x309bcd85,0x085ee5e9 }, + { 0x8c2e67fd,0xf381a908,0xe261eaf2,0x32083a80,0x96deee15,0x0fcd6a49, + 0x5e524c79,0xe3b8fb03,0x1d5b08b9,0x8dc360d9,0x7f26719f,0x3a06e2c8 } }, + /* 40 */ + { { 0x7237cac0,0x5cd9f5a8,0x43586794,0x93f0b59d,0xe94f6c4e,0x4384a764, + 0xb62782d3,0x8304ed2b,0xcde06015,0x0b8db8b3,0x5dbe190f,0x4336dd53 }, + { 0x92ab473a,0x57443553,0xbe5ed046,0x031c7275,0x21909aa4,0x3e78678c, + 0x99202ddb,0x4ab7e04f,0x6977e635,0x2648d206,0x093198be,0xd427d184 } }, + /* 41 */ + { { 0x0f9b5a31,0x822848f5,0xbaadb62a,0xbb003468,0x3357559c,0x233a0472, + 0x79aee843,0x49ef6880,0xaeb9e1e3,0xa89867a0,0x1f6f9a55,0xc151931b }, + { 0xad74251e,0xd264eb0b,0x4abf295e,0x37b9b263,0x04960d10,0xb600921b, + 0x4da77dc0,0x0de53dbc,0xd2b18697,0x01d9bab3,0xf7156ddf,0xad54ec7a } }, + /* 42 */ + { { 0x79efdc58,0x8e74dc35,0x4ff68ddb,0x456bd369,0xd32096a5,0x724e74cc, + 0x386783d0,0xe41cff42,0x7c70d8a4,0xa04c7f21,0xe61a19a2,0x41199d2f }, + { 0x29c05dd2,0xd389a3e0,0xe7e3fda9,0x535f2a6b,0x7c2b4df8,0x26ecf72d, + 0xfe745294,0x678275f4,0x9d23f519,0x6319c9cc,0x88048fc4,0x1e05a02d } }, + /* 43 */ + { { 0xd4d5ffe8,0x75cc8e2e,0xdbea17f2,0xf8bb4896,0xcee3cb4a,0x35059790, + 0xa47c6165,0x4c06ee85,0x92935d2f,0xf98fff25,0x32ffd7c7,0x34c4a572 }, + { 0xea0376a2,0xc4b14806,0x4f115e02,0x2ea5e750,0x1e55d7c0,0x532d76e2, + 0xf31044da,0x68dc9411,0x71b77993,0x9272e465,0x93a8cfd5,0xadaa38bb } }, + /* 44 */ + { { 0x7d4ed72a,0x4bf0c712,0xba1f79a3,0xda0e9264,0xf4c39ea4,0x48c0258b, + 0x2a715138,0xa5394ed8,0xbf06c660,0x4af511ce,0xec5c37cd,0xfcebceef }, + { 0x779ae8c1,0xf23b75aa,0xad1e606e,0xdeff59cc,0x22755c82,0xf3f526fd, + 0xbb32cefd,0x64c5ab44,0x915bdefd,0xa96e11a2,0x1143813e,0xab19746a } }, + /* 45 */ + { { 0xec837d7d,0x43c78585,0xb8ee0ba4,0xca5b6fbc,0xd5dbb5ee,0x34e924d9, + 0xbb4f1ca5,0x3f4fa104,0x398640f7,0x15458b72,0xd7f407ea,0x4231faa9 }, + { 0xf96e6896,0x53e0661e,0xd03b0f9d,0x554e4c69,0x9c7858d1,0xd4fcb07b, + 0x52cb04fa,0x7e952793,0x8974e7f7,0x5f5f1574,0x6b6d57c8,0x2e3fa558 } }, + /* 46 */ + { { 0x6a9951a8,0x42cd4803,0x42792ad0,0xa8b15b88,0xabb29a73,0x18e8bcf9, + 0x409933e8,0xbfd9a092,0xefb88dc4,0x760a3594,0x40724458,0x14418863 }, + { 0x99caedc7,0x162a56ee,0x91d101c9,0x8fb12ecd,0x393202da,0xea671967, + 0xa4ccd796,0x1aac8c4a,0x1cf185a8,0x7db05036,0x8cfd095a,0x0c9f86cd } }, + /* 47 */ + { { 0x10b2a556,0x9a728147,0x327b70b2,0x767ca964,0x5e3799b7,0x04ed9e12, + 0x22a3eb2a,0x6781d2dc,0x0d9450ac,0x5bd116eb,0xa7ebe08a,0xeccac1fc }, + { 0xdc2d6e94,0xde68444f,0x35ecf21b,0x3621f429,0x29e03a2c,0x14e2d543, + 0x7d3e7f0a,0x53e42cd5,0x73ed00b9,0xbba26c09,0xc57d2272,0x00297c39 } }, + /* 48 */ + { { 0xb8243a7d,0x3aaaab10,0x8fa58c5b,0x6eeef93e,0x9ae7f764,0xf866fca3, + 0x61ab04d3,0x64105a26,0x03945d66,0xa3578d8a,0x791b848c,0xb08cd3e4 }, + { 0x756d2411,0x45edc5f8,0xa755128c,0xd4a790d9,0x49e5f6a0,0xc2cf0963, + 0xf649beaa,0xc66d267d,0x8467039e,0x3ce6d968,0x42f7816f,0x50046c6b } }, + /* 49 */ + { { 0x66425043,0x92ae1602,0xf08db890,0x1ff66afd,0x8f162ce5,0x386f5a7f, + 0xfcf5598f,0x18d2dea0,0x1a8ca18e,0x78372b3a,0x8cd0e6f7,0xdf0d20eb }, + { 0x75bb4045,0x7edd5e1d,0xb96d94b7,0x252a47ce,0x2c626776,0xbdb29358, + 0x40dd1031,0x853c3943,0x7d5f47fd,0x9dc9becf,0xbae4044a,0x27c2302f } }, + /* 50 */ + { { 0x8f2d49ce,0x2d1d208a,0x162df0a2,0x0d91aa02,0x09a07f65,0x9c5cce87, + 0x84339012,0xdf07238b,0x419442cd,0x5028e2c8,0x72062aba,0x2dcbd358 }, + { 0xe4680967,0xb5fbc3cb,0x9f92d72c,0x2a7bc645,0x116c369d,0x806c76e1, + 0x3177e8d8,0x5c50677a,0x4569df57,0x753739eb,0x36c3f40b,0x2d481ef6 } }, + /* 51 */ + { { 0xfea1103e,0x1a2d39fd,0x95f81b17,0xeaae5592,0xf59b264a,0xdbd0aa18, + 0xcb592ee0,0x90c39c1a,0x9750cca3,0xdf62f80d,0xdf97cc6c,0xda4d8283 }, + { 0x1e201067,0x0a6dd346,0x69fb1f6b,0x1531f859,0x1d60121f,0x4895e552, + 0x4c041c91,0x0b21aab0,0xbcc1ccf8,0x9d896c46,0x3141bde7,0xd24da3b3 } }, + /* 52 */ + { { 0x53b0a354,0x575a0537,0x0c6ddcd8,0x392ff2f4,0x56157b94,0x0b8e8cff, + 0x3b1b80d1,0x073e57bd,0x3fedee15,0x2a75e0f0,0xaa8e6f19,0x752380e4 }, + { 0x6558ffe9,0x1f4e227c,0x19ec5415,0x3a348618,0xf7997085,0xab382d5e, + 0xddc46ac2,0x5e6deaff,0xfc8d094c,0xe5144078,0xf60e37c6,0xf674fe51 } }, + /* 53 */ + { { 0xaf63408f,0x6fb87ae5,0xcd75a737,0xa39c36a9,0xcf4c618d,0x7833313f, + 0xf034c88d,0xfbcd4482,0x39b35288,0x4469a761,0x66b5d9c9,0x77a711c5 }, + { 0x944f8d65,0x4a695dc7,0x161aaba8,0xe6da5f65,0x24601669,0x8654e9c3, + 0x28ae7491,0xbc8b93f5,0x8f5580d8,0x5f1d1e83,0xcea32cc8,0x8ccf9a1a } }, + /* 54 */ + { { 0x7196fee2,0x28ab110c,0x874c8945,0x75799d63,0x29aedadd,0xa2629348, + 0x2be88ff4,0x9714cc7b,0xd58d60d6,0xf71293cf,0x32a564e9,0xda6b6cb3 }, + { 0x3dd821c2,0xf43fddb1,0x90dd323d,0xf2f2785f,0x048489f8,0x91246419, + 0xd24c6749,0x61660f26,0xc803c15c,0x961d9e8c,0xfaadc4c9,0x631c6158 } }, + /* 55 */ + { { 0xfd752366,0xacf2ebe0,0x139be88b,0xb93c340e,0x0f20179e,0x98f66485, + 0xff1da785,0x14820254,0x4f85c16e,0x5278e276,0x7aab1913,0xa246ee45 }, + { 0x53763b33,0x43861eb4,0x45c0bc0d,0xc49f03fc,0xad6b1ea1,0xafff16bc, + 0x6fd49c99,0xce33908b,0xf7fde8c3,0x5c51e9bf,0xff142c5e,0x076a7a39 } }, + /* 56 */ + { { 0x9e338d10,0x04639dfe,0xf42b411b,0x8ee6996f,0xa875cef2,0x960461d1, + 0x95b4d0ba,0x1057b6d6,0xa906e0bc,0x27639252,0xe1c20f8a,0x2c19f09a }, + { 0xeef4c43d,0x5b8fc3f0,0x07a84aa9,0xe2e1b1a8,0x835d2bdb,0x5f455528, + 0x207132dd,0x0f4aee4d,0x3907f675,0xe9f8338c,0x0e0531f0,0x7a874dc9 } }, + /* 57 */ + { { 0x97c27050,0x84b22d45,0x59e70bf8,0xbd0b8df7,0x79738b9b,0xb4d67405, + 0xcd917c4f,0x47f4d5f5,0x13ce6e33,0x9099c4ce,0x521d0f8b,0x942bfd39 }, + { 0xa43b566d,0x5028f0f6,0x21bff7de,0xaf6e8669,0xc44232cd,0x83f6f856, + 0xf915069a,0x65680579,0xecfecb85,0xd12095a2,0xdb01ba16,0xcf7f06ae } }, + /* 58 */ + { { 0x8ef96c80,0x0f56e3c4,0x3ddb609c,0xd521f2b3,0x7dc1450d,0x2be94102, + 0x02a91fe2,0x2d21a071,0x1efa37de,0x2e6f74fa,0x156c28a1,0x9a9a90b8 }, + { 0x9dc7dfcb,0xc54ea9ea,0x2c2c1d62,0xc74e66fc,0x49d3e067,0x9f23f967, + 0x54dd38ad,0x1c7c3a46,0x5946cee3,0xc7005884,0x45cc045d,0x89856368 } }, + /* 59 */ + { { 0xfce73946,0x29da7cd4,0x23168563,0x8f697db5,0xcba92ec6,0x8e235e9c, + 0x9f91d3ea,0x55d4655f,0xaa50a6cd,0xf3689f23,0x21e6a1a0,0xdcf21c26 }, + { 0x61b818bf,0xcffbc82e,0xda47a243,0xc74a2f96,0x8bc1a0cf,0x234e980a, + 0x7929cb6d,0xf35fd6b5,0xefe17d6c,0x81468e12,0x58b2dafb,0xddea6ae5 } }, + /* 60 */ + { { 0x7e787b2e,0x294de887,0x39a9310d,0x258acc1f,0xac14265d,0x92d9714a, + 0x708b48a0,0x18b5591c,0xe1abbf71,0x27cc6bb0,0x568307b9,0xc0581fa3 }, + { 0xf24d4d58,0x9e0f58a3,0xe0ce2327,0xfebe9bb8,0x9d1be702,0x91fd6a41, + 0xfacac993,0x9a7d8a45,0x9e50d66d,0xabc0a08c,0x06498201,0x02c342f7 } }, + /* 61 */ + { { 0x157bdbc2,0xccd71407,0xad0e1605,0x72fa89c6,0xb92a015f,0xb1d3da2b, + 0xa0a3fe56,0x8ad9e7cd,0x24f06737,0x160edcbd,0x61275be6,0x79d4db33 }, + { 0x5f3497c4,0xd3d31fd9,0x04192fb0,0x8cafeaee,0x13a50af3,0xe13ca745, + 0x8c85aae5,0x18826167,0x9eb556ff,0xce06cea8,0xbdb549f3,0x2eef1995 } }, + /* 62 */ + { { 0x50596edc,0x8ed7d3eb,0x905243a2,0xaa359362,0xa4b6d02b,0xa212c2c2, + 0xc4fbec68,0x611fd727,0xb84f733d,0x8a0b8ff7,0x5f0daf0e,0xd85a6b90 }, + { 0xd4091cf7,0x60e899f5,0x2eff2768,0x4fef2b67,0x10c33964,0xc1f195cb, + 0x93626a8f,0x8275d369,0x0d6c840a,0xc77904f4,0x7a868acd,0x88d8b7fd } }, + /* 63 */ + { { 0x7bd98425,0x85f23723,0xc70b154e,0xd4463992,0x96687a2e,0xcbb00ee2, + 0xc83214fd,0x905fdbf7,0x13593684,0x2019d293,0xef51218e,0x0428c393 }, + { 0x981e909a,0x40c7623f,0x7be192da,0x92513385,0x4010907e,0x48fe480f, + 0x3120b459,0xdd7a187c,0xa1fd8f3c,0xc9d7702d,0xe358efc5,0x66e4753b } }, + /* 64 */ + { { 0x16973cf4,0x070d34e1,0x7e4f34f7,0x20aee08b,0x5eb8ad29,0x269af9b9, + 0xa6a45dda,0xdde0a036,0x63df41e0,0xa18b528e,0xa260df2a,0x03cc71b2 }, + { 0xa06b1dd7,0x24a6770a,0x9d2675d3,0x5bfa9c11,0x96844432,0x73c1e2a1, + 0x131a6cf0,0x3660558d,0x2ee79454,0xb0289c83,0xc6d8ddcd,0xa6aefb01 } }, + /* 65 */ + { { 0x01ab5245,0xba1464b4,0xc48d93ff,0x9b8d0b6d,0x93ad272c,0x939867dc, + 0xae9fdc77,0xbebe085e,0x894ea8bd,0x73ae5103,0x39ac22e1,0x740fc89a }, + { 0x28e23b23,0x5e28b0a3,0xe13104d0,0x2352722e,0xb0a2640d,0xf4667a18, + 0x49bb37c3,0xac74a72e,0xe81e183a,0x79f734f0,0x3fd9c0eb,0xbffe5b6c } }, + /* 66 */ + { { 0xc6a2123f,0xb1a358f5,0xfe28df6d,0x927b2d95,0xf199d2f9,0x89702753, + 0x1a3f82dc,0x0a73754c,0x777affe1,0x063d029d,0xdae6d34d,0x5439817e }, + { 0x6b8b83c4,0xf7979eef,0x9d945682,0x615cb214,0xc5e57eae,0x8f0e4fac, + 0x113047dd,0x042b89b8,0x93f36508,0x888356dc,0x5fd1f32f,0xbf008d18 } }, + /* 67 */ + { { 0x4e8068db,0x8012aa24,0xa5729a47,0xc72cc641,0x43f0691d,0x3c33df2c, + 0x1d92145f,0xfa057347,0xb97f7946,0xaefc0f2f,0x2f8121bf,0x813d75cb }, + { 0x4383bba6,0x05613c72,0xa4224b3f,0xa924ce70,0x5f2179a6,0xe59cecbe, + 0x79f62b61,0x78e2e8aa,0x53ad8079,0x3ac2cc3b,0xd8f4fa96,0x55518d71 } }, + /* 68 */ + { { 0x00623f3b,0x03cf2922,0x5f29ebff,0x095c7111,0x80aa6823,0x42d72247, + 0x7458c0b0,0x044c7ba1,0x0959ec20,0xca62f7ef,0xf8ca929f,0x40ae2ab7 }, + { 0xa927b102,0xb8c5377a,0xdc031771,0x398a86a0,0xc216a406,0x04908f9d, + 0x918d3300,0xb423a73a,0xe0b94739,0x634b0ff1,0x2d69f697,0xe29de725 } }, + /* 69 */ + { { 0x8435af04,0x744d1400,0xfec192da,0x5f255b1d,0x336dc542,0x1f17dc12, + 0x636a68a8,0x5c90c2a7,0x7704ca1e,0x960c9eb7,0x6fb3d65a,0x9de8cf1e }, + { 0x511d3d06,0xc60fee0d,0xf9eb52c7,0x466e2313,0x206b0914,0x743c0f5f, + 0x2191aa4d,0x42f55bac,0xffebdbc2,0xcefc7c8f,0xe6e8ed1c,0xd4fa6081 } }, + /* 70 */ + { { 0xb0ab9645,0xb5e405d3,0xd5f1f711,0xaeec7f98,0x585c2a6e,0x8ad42311, + 0x512c6944,0x045acb9e,0xa90db1c6,0xae106c4e,0x898e6563,0xb89f33d5 }, + { 0x7fed2ce4,0x43b07cd9,0xdd815b20,0xf9934e17,0x0a81a349,0x6778d4d5, + 0x52918061,0x9e616ade,0xd7e67112,0xfa06db06,0x88488091,0x1da23cf1 } }, + /* 71 */ + { { 0x42f2c4b5,0x821c46b3,0x66059e47,0x931513ef,0x66f50cd1,0x7030ae43, + 0x43e7b127,0x43b536c9,0x5fca5360,0x006258cf,0x6b557abf,0xe4e3ee79 }, + { 0x24c8b22f,0xbb6b3900,0xfcbf1054,0x2eb5e2c1,0x567492af,0x937b18c9, + 0xacf53957,0xf09432e4,0x1dbf3a56,0x585f5a9d,0xbe0887cf,0xf86751fd } }, + /* 72 */ + { { 0x9d10e0b2,0x157399cb,0x60dc51b7,0x1c0d5956,0x1f583090,0x1d496b8a, + 0x88590484,0x6658bc26,0x03213f28,0x88c08ab7,0x7ae58de4,0x8d2e0f73 }, + { 0x486cfee6,0x9b79bc95,0xe9e5bc57,0x036a26c7,0xcd8ae97a,0x1ad03601, + 0xff3a0494,0x06907f87,0x2c7eb584,0x078f4bbf,0x7e8d0a5a,0xe3731bf5 } }, + /* 73 */ + { { 0xe1cd0abe,0x72f2282b,0x87efefa2,0xd4f9015e,0x6c3834bd,0x9d189806, + 0xb8a29ced,0x9c8cdcc1,0xfee82ebc,0x0601b9f4,0x7206a756,0x371052bc }, + { 0x46f32562,0x76fa1092,0x17351bb4,0xdaad534c,0xb3636bb5,0xc3d64c37, + 0x45d54e00,0x038a8c51,0x32c09e7c,0x301e6180,0x95735151,0x9764eae7 } }, + /* 74 */ + { { 0xcbd5256a,0x8791b19f,0x6ca13a3b,0x4007e0f2,0x4cf06904,0x03b79460, + 0xb6c17589,0xb18a9c22,0x81d45908,0xa1cb7d7d,0x21bb68f1,0x6e13fa9d }, + { 0xa71e6e16,0x47183c62,0xe18749ed,0x5cf0ef8e,0x2e5ed409,0x2c9c7f9b, + 0xe6e117e1,0x042eeacc,0x13fb5a7f,0xb86d4816,0xc9e5feb1,0xea1cf0ed } }, + /* 75 */ + { { 0xcea4cc9b,0x6e6573c9,0xafcec8f3,0x5417961d,0xa438b6f6,0x804bf02a, + 0xdcd4ea88,0xb894b03c,0x3799571f,0xd0f807e9,0x862156e8,0x3466a7f5 }, + { 0x56515664,0x51e59acd,0xa3c5eb0b,0x55b0f93c,0x6a4279db,0x84a06b02, + 0xc5fae08e,0x5c850579,0xa663a1a2,0xcf07b8db,0xf46ffc8d,0x49a36bbc } }, + /* 76 */ + { { 0x46d93106,0xe47f5acc,0xaa897c9c,0x65b7ade0,0x12d7e4be,0x37cf4c94, + 0xd4b2caa9,0xa2ae9b80,0xe60357a3,0x5e7ce09c,0xc8ecd5f9,0x29f77667 }, + { 0xa8a0b1c5,0xdf6868f5,0x62978ad8,0x240858cf,0xdc0002a1,0x0f7ac101, + 0xffe9aa05,0x1d28a9d7,0x5b962c97,0x744984d6,0x3d28c8b2,0xa8a7c00b } }, + /* 77 */ + { { 0xae11a338,0x7c58a852,0xd1af96e7,0xa78613f1,0x5355cc73,0x7e9767d2, + 0x792a2de6,0x6ba37009,0x124386b2,0x7d60f618,0x11157674,0xab09b531 }, + { 0x98eb9dd0,0x95a04841,0x15070328,0xe6c17acc,0x489c6e49,0xafc6da45, + 0xbb211530,0xab45a60a,0x7d7ea933,0xc58d6592,0x095642c6,0xa3ef3c65 } }, + /* 78 */ + { { 0xdf010879,0x89d420e9,0x39576179,0x9d25255d,0xe39513b6,0x9cdefd50, + 0xd5d1c313,0xe4efe45b,0x3f7af771,0xc0149de7,0x340ab06b,0x55a6b4f4 }, + { 0xebeaf771,0xf1325251,0x878d4288,0x2ab44128,0x18e05afe,0xfcd5832e, + 0xcc1fb62b,0xef52a348,0xc1c4792a,0x2bd08274,0x877c6dc7,0x345c5846 } }, + /* 79 */ + { { 0xbea65e90,0xde15ceb0,0x2416d99c,0x0987f72b,0xfd863dec,0x44db578d, + 0xac6a3578,0xf617b74b,0xdb48e999,0x9e62bd7a,0xeab1a1be,0x877cae61 }, + { 0x3a358610,0x23adddaa,0x325e2b07,0x2fc4d6d1,0x1585754e,0x897198f5, + 0xb392b584,0xf741852c,0xb55f7de1,0x9927804c,0x1aa8efae,0xe9e6c4ed } }, + /* 80 */ + { { 0x98683186,0x867db639,0xddcc4ea9,0xfb5cf424,0xd4f0e7bd,0xcc9a7ffe, + 0x7a779f7e,0x7c57f71c,0xd6b25ef2,0x90774079,0xb4081680,0x90eae903 }, + { 0x0ee1fceb,0xdf2aae5e,0xe86c1a1f,0x3ff1da24,0xca193edf,0x80f587d6, + 0xdc9b9d6a,0xa5695523,0x85920303,0x7b840900,0xba6dbdef,0x1efa4dfc } }, + /* 81 */ + { { 0xe0540015,0xfbd838f9,0xc39077dc,0x2c323946,0xad619124,0x8b1fb9e6, + 0x0ca62ea8,0x9612440c,0x2dbe00ff,0x9ad9b52c,0xae197643,0xf52abaa1 }, + { 0x2cac32ad,0xd0e89894,0x62a98f91,0xdfb79e42,0x276f55cb,0x65452ecf, + 0x7ad23e12,0xdb1ac0d2,0xde4986f0,0xf68c5f6a,0x82ce327d,0x389ac37b } }, + /* 82 */ + { { 0xf8e60f5b,0x511188b4,0x48aa2ada,0x7fe67015,0x381abca2,0xdb333cb8, + 0xdaf3fc97,0xb15e6d9d,0x36aabc03,0x4b24f6eb,0x72a748b4,0xc59789df }, + { 0x29cf5279,0x26fcb8a5,0x01ad9a6c,0x7a3c6bfc,0x4b8bac9b,0x866cf88d, + 0x9c80d041,0xf4c89989,0x70add148,0xf0a04241,0x45d81a41,0x5a02f479 } }, + /* 83 */ + { { 0xc1c90202,0xfa5c877c,0xf8ac7570,0xd099d440,0xd17881f7,0x428a5b1b, + 0x5b2501d7,0x61e267db,0xf2e4465b,0xf889bf04,0x76aa4cb8,0x4da3ae08 }, + { 0xe3e66861,0x3ef0fe26,0x3318b86d,0x5e772953,0x747396df,0xc3c35fbc, + 0x439ffd37,0x5115a29c,0xb2d70374,0xbfc4bd97,0x56246b9d,0x088630ea } }, + /* 84 */ + { { 0xb8a9e8c9,0xcd96866d,0x5bb8091e,0xa11963b8,0x045b3cd2,0xc7f90d53, + 0x80f36504,0x755a72b5,0x21d3751c,0x46f8b399,0x53c193de,0x4bffdc91 }, + { 0xb89554e7,0xcd15c049,0xf7a26be6,0x353c6754,0xbd41d970,0x79602370, + 0x12b176c0,0xde16470b,0x40c8809d,0x56ba1175,0xe435fb1e,0xe2db35c3 } }, + /* 85 */ + { { 0x6328e33f,0xd71e4aab,0xaf8136d1,0x5486782b,0x86d57231,0x07a4995f, + 0x1651a968,0xf1f0a5bd,0x76803b6d,0xa5dc5b24,0x42dda935,0x5c587cbc }, + { 0xbae8b4c0,0x2b6cdb32,0xb1331138,0x66d1598b,0x5d7e9614,0x4a23b2d2, + 0x74a8c05d,0x93e402a6,0xda7ce82e,0x45ac94e6,0xe463d465,0xeb9f8281 } }, + /* 86 */ + { { 0xfecf5b9b,0x34e0f9d1,0xf206966a,0xa115b12b,0x1eaa0534,0x5591cf3b, + 0xfb1558f9,0x5f0293cb,0x1bc703a5,0x1c8507a4,0x862c1f81,0x92e6b81c }, + { 0xcdaf24e3,0xcc9ebc66,0x72fcfc70,0x68917ecd,0x8157ba48,0x6dc9a930, + 0xb06ab2b2,0x5d425c08,0x36e929c4,0x362f8ce7,0x62e89324,0x09f6f57c } }, + /* 87 */ + { { 0xd29375fb,0x1c7d6b78,0xe35d1157,0xfabd851e,0x4243ea47,0xf6f62dcd, + 0x8fe30b0f,0x1dd92460,0xffc6e709,0x08166dfa,0x0881e6a7,0xc6c4c693 }, + { 0xd6a53fb0,0x20368f87,0x9eb4d1f9,0x38718e9f,0xafd7e790,0x03f08acd, + 0x72fe2a1c,0x0835eb44,0x88076e5d,0x7e050903,0xa638e731,0x538f765e } }, + /* 88 */ + { { 0xc2663b4b,0x0e0249d9,0x47cd38dd,0xe700ab5b,0x2c46559f,0xb192559d, + 0x4bcde66d,0x8f9f74a8,0x3e2aced5,0xad161523,0x3dd03a5b,0xc155c047 }, + { 0x3be454eb,0x346a8799,0x83b7dccd,0x66ee94db,0xab9d2abe,0x1f6d8378, + 0x7733f355,0x4a396dd2,0xf53553c2,0x419bd40a,0x731dd943,0xd0ead98d } }, + /* 89 */ + { { 0xec142408,0x908e0b0e,0x4114b310,0x98943cb9,0x1742b1d7,0x03dbf7d8, + 0x693412f4,0xd270df6b,0x8f69e20c,0xc5065494,0x697e43a1,0xa76a90c3 }, + { 0x4624825a,0xe0fa3384,0x8acc34c2,0x82e48c0b,0xe9a14f2b,0x7b24bd14, + 0x4db30803,0x4f5dd5e2,0x932da0a3,0x0c77a9e7,0x74c653dc,0x20db90f2 } }, + /* 90 */ + { { 0x0e6c5fd9,0x261179b7,0x6c982eea,0xf8bec123,0xd4957b7e,0x47683338, + 0x0a72f66a,0xcc47e664,0x1bad9350,0xbd54bf6a,0xf454e95a,0xdfbf4c6a }, + { 0x6907f4fa,0x3f7a7afa,0x865ca735,0x7311fae0,0x2a496ada,0x24737ab8, + 0x15feb79b,0x13e425f1,0xa1b93c21,0xe9e97c50,0x4ddd3eb5,0xb26b6eac } }, + /* 91 */ + { { 0x2a2e5f2b,0x81cab9f5,0xbf385ac4,0xf93caf29,0xc909963a,0xf4bf35c3, + 0x74c9143c,0x081e7300,0xc281b4c5,0x3ea57fa8,0x9b340741,0xe497905c }, + { 0x55ab3cfb,0xf556dd8a,0x518db6ad,0xd444b96b,0x5ef4b955,0x34f5425a, + 0xecd26aa3,0xdda7a3ac,0xda655e97,0xb57da11b,0xc2024c70,0x02da3eff } }, + /* 92 */ + { { 0x6481d0d9,0xe24b0036,0x818fdfe2,0x3740dbe5,0x190fda00,0xc1fc1f45, + 0x3cf27fde,0x329c9280,0x6934f43e,0x7435cb53,0x7884e8fe,0x2b505a5d }, + { 0x711adcc9,0x6cfcc6a6,0x531e21e1,0xf034325c,0x9b2a8a99,0xa2f4a967, + 0x3c21bdff,0x9d5f3842,0x31b57d66,0xb25c7811,0x0b8093b9,0xdb5344d8 } }, + /* 93 */ + { { 0xae50a2f5,0x0d72e667,0xe4a861d1,0x9b7f8d8a,0x330df1cb,0xa129f70f, + 0xe04fefc3,0xe90aa5d7,0xe72c3ae1,0xff561ecb,0xcdb955fa,0x0d8fb428 }, + { 0xd7663784,0xd2235f73,0x7e2c456a,0xc05baec6,0x2adbfccc,0xe5c292e4, + 0xefb110d5,0x4fd17988,0xd19d49f3,0x27e57734,0x84f679fe,0x188ac4ce } }, + /* 94 */ + { { 0xa796c53e,0x7ee344cf,0x0868009b,0xbbf6074d,0x474a1295,0x1f1594f7, + 0xac11632d,0x66776edc,0x04e2fa5a,0x1862278b,0xc854a89a,0x52665cf2 }, + { 0x8104ab58,0x7e376464,0x7204fd6d,0x16775913,0x44ea1199,0x86ca06a5, + 0x1c9240dd,0xaa3f765b,0x24746149,0x5f8501a9,0xdcd251d7,0x7b982e30 } }, + /* 95 */ + { { 0xc15f3060,0xe44e9efc,0xa87ebbe6,0x5ad62f2e,0xc79500d4,0x36499d41, + 0x336fa9d1,0xa66d6dc0,0x5afd3b1f,0xf8afc495,0xe5c9822b,0x1d8ccb24 }, + { 0x79d7584b,0x4031422b,0xea3f20dd,0xc54a0580,0x958468c5,0x3f837c8f, + 0xfbea7735,0x3d82f110,0x7dffe2fc,0x679a8778,0x20704803,0x48eba63b } }, + /* 96 */ + { { 0xdf46e2f6,0x89b10d41,0x19514367,0x13ab57f8,0x1d469c87,0x067372b9, + 0x4f6c5798,0x0c195afa,0x272c9acf,0xea43a12a,0x678abdac,0x9dadd8cb }, + { 0xe182579a,0xcce56c6b,0x2d26c2d8,0x86febadb,0x2a44745c,0x1c668ee1, + 0x98dc047a,0x580acd86,0x51b9ec2d,0x5a2b79cc,0x4054f6a0,0x007da608 } }, + /* 97 */ + { { 0x17b00dd0,0x9e3ca352,0x0e81a7a6,0x046779cb,0xd482d871,0xb999fef3, + 0xd9233fbc,0xe6f38134,0xf48cd0e0,0x112c3001,0x3c6c66ae,0x934e7576 }, + { 0xd73234dc,0xb44d4fc3,0x864eafc1,0xfcae2062,0x26bef21a,0x843afe25, + 0xf3b75fdf,0x61355107,0x794c2e6b,0x8367a5aa,0x8548a372,0x3d2629b1 } }, + /* 98 */ + { { 0x437cfaf8,0x6230618f,0x2032c299,0x5b8742cb,0x2293643a,0x949f7247, + 0x09464f79,0xb8040f1a,0x4f254143,0x049462d2,0x366c7e76,0xabd6b522 }, + { 0xd5338f55,0x119b392b,0x01495a0c,0x1a80a9ce,0xf8d7537e,0xf3118ca7, + 0x6bf4b762,0xb715adc2,0xa8482b6c,0x24506165,0x96a7c84d,0xd958d7c6 } }, + /* 99 */ + { { 0xbdc21f31,0x9ad8aa87,0x8063e58c,0xadb3cab4,0xb07dd7b8,0xefd86283, + 0x1be7c6b4,0xc7b9b762,0x015582de,0x2ef58741,0x299addf3,0xc970c52e }, + { 0x22f24d66,0x78f02e2a,0x74cc100a,0xefec1d10,0x09316e1a,0xaf2a6a39, + 0x5849dd49,0xce7c2205,0x96bffc4c,0x9c1fe75c,0x7ba06ec0,0xcad98fd2 } }, + /* 100 */ + { { 0xb648b73e,0xed76e2d0,0x1cfd285e,0xa9f92ce5,0x2ed13de1,0xa8c86c06, + 0xa5191a93,0x1d3a574e,0x1ad1b8bf,0x385cdf8b,0x47d2cfe3,0xbbecc28a }, + { 0x69cec548,0x98d326c0,0xf240a0b2,0x4f5bc1dd,0x29057236,0x241a7062, + 0xc68294a4,0x0fc6e9c5,0xa319f17a,0x4d04838b,0x9ffc1c6f,0x8b612cf1 } }, + /* 101 */ + { { 0x4c3830eb,0x9bb0b501,0x8ee0d0c5,0x3d08f83c,0x79ba9389,0xa4a62642, + 0x9cbc2914,0x5d5d4044,0x074c46f0,0xae9eb83e,0x74ead7d6,0x63bb758f }, + { 0xc6bb29e0,0x1c40d2ea,0x4b02f41e,0x95aa2d87,0x53cb199a,0x92989175, + 0x51584f6d,0xdd91bafe,0x31a1aaec,0x3715efb9,0x46780f9e,0xc1b6ae5b } }, + /* 102 */ + { { 0x42772f41,0xcded3e4b,0x3bcb79d1,0x3a700d5d,0x80feee60,0x4430d50e, + 0xf5e5d4bb,0x444ef1fc,0xe6e358ff,0xc660194f,0x6a91b43c,0xe68a2f32 }, + { 0x977fe4d2,0x5842775c,0x7e2a41eb,0x78fdef5c,0xff8df00e,0x5f3bec02, + 0x5852525d,0xf4b840cd,0x4e6988bd,0x0870483a,0xcc64b837,0x39499e39 } }, + /* 103 */ + { { 0xb08df5fe,0xfc05de80,0x63ba0362,0x0c12957c,0xd5cf1428,0xea379414, + 0x54ef6216,0xc559132a,0xb9e65cf8,0x33d5f12f,0x1695d663,0x09c60278 }, + { 0x61f7a2fb,0x3ac1ced4,0xd4f5eeb8,0xdd838444,0x8318fcad,0x82a38c6c, + 0xe9f1a864,0x315be2e5,0x442daf47,0x317b5771,0x95aa5f9e,0x81b5904a } }, + /* 104 */ + { { 0x8b21d232,0x6b6b1c50,0x8c2cba75,0x87f3dbc0,0xae9f0faf,0xa7e74b46, + 0xbb7b8079,0x036a0985,0x8d974a25,0x4f185b90,0xd9af5ec9,0x5aa7cef0 }, + { 0x57dcfffc,0xe0566a70,0xb8453225,0x6ea311da,0x23368aa9,0x72ea1a8d, + 0x48cd552d,0xed9b2083,0xc80ea435,0xb987967c,0x6c104173,0xad735c75 } }, + /* 105 */ + { { 0xcee76ef4,0xaea85ab3,0xaf1d2b93,0x44997444,0xeacb923f,0x0851929b, + 0x51e3bc0c,0xb080b590,0x59be68a2,0xc4ee1d86,0x64b26cda,0xf00de219 }, + { 0xf2e90d4d,0x8d7fb5c0,0x77d9ec64,0x00e219a7,0x5d1c491c,0xc4e6febd, + 0x1a8f4585,0x080e3754,0x48d2af9c,0x4a9b86c8,0xb6679851,0x2ed70db6 } }, + /* 106 */ + { { 0x586f25cb,0xaee44116,0xa0fcf70f,0xf7b6861f,0x18a350e8,0x55d2cd20, + 0x92dc286f,0x861bf3e5,0x6226aba7,0x9ab18ffa,0xa9857b03,0xd15827be }, + { 0x92e6acef,0x26c1f547,0xac1fbac3,0x422c63c8,0xfcbfd71d,0xa2d8760d, + 0xb2511224,0x35f6a539,0x048d1a21,0xbaa88fa1,0xebf999db,0x49f1abe9 } }, + /* 107 */ + { { 0xf7492b73,0x16f9f4f4,0xcb392b1a,0xcf28ec1e,0x69ca6ffc,0x45b130d4, + 0xb72efa58,0x28ba8d40,0x5ca066f5,0xace987c7,0x4ad022eb,0x3e399246 }, + { 0x752555bb,0x63a2d84e,0x9c2ae394,0xaaa93b4a,0xc89539ca,0xcd80424e, + 0xaa119a99,0x6d6b5a6d,0x379f2629,0xbd50334c,0xef3cc7d3,0x899e925e } }, + /* 108 */ + { { 0xbf825dc4,0xb7ff3651,0x40b9c462,0x0f741cc4,0x5cc4fb5b,0x771ff5a9, + 0x47fd56fe,0xcb9e9c9b,0x5626c0d3,0xbdf053db,0xf7e14098,0xa97ce675 }, + { 0x6c934f5e,0x68afe5a3,0xccefc46f,0x6cd5e148,0xd7a88586,0xc7758570, + 0xdd558d40,0x49978f5e,0x64ae00c1,0xa1d5088a,0xf1d65bb2,0x58f2a720 } }, + /* 109 */ + { { 0x3e4daedb,0x66fdda4a,0x65d1b052,0x38318c12,0x4c4bbf5c,0x28d910a2, + 0x78a9cd14,0x762fe5c4,0xd2cc0aee,0x08e5ebaa,0xca0c654c,0xd2cdf257 }, + { 0x08b717d2,0x48f7c58b,0x386cd07a,0x3807184a,0xae7d0112,0x3240f626, + 0xc43917b0,0x03e9361b,0x20aea018,0xf261a876,0x7e1e6372,0x53f556a4 } }, + /* 110 */ + { { 0x2f512a90,0xc84cee56,0x1b0ea9f1,0x24b3c004,0xe26cc1ea,0x0ee15d2d, + 0xf0c9ef7d,0xd848762c,0xd5341435,0x1026e9c5,0xfdb16b31,0x8f5b73dc }, + { 0xd2c75d95,0x1f69bef2,0xbe064dda,0x8d33d581,0x57ed35e6,0x8c024c12, + 0xc309c281,0xf8d435f9,0xd6960193,0xfd295061,0xe9e49541,0x66618d78 } }, + /* 111 */ + { { 0x8ce382de,0x571cfd45,0xde900dde,0x175806ee,0x34aba3b5,0x61849965, + 0xde7aec95,0xe899778a,0xff4aa97f,0xe8f00f6e,0x010b0c6d,0xae971cb5 }, + { 0x3af788f1,0x1827eebc,0xe413fe2d,0xd46229ff,0x4741c9b4,0x8a15455b, + 0xf8e424eb,0x5f02e690,0xdae87712,0x40a1202e,0x64944f6d,0x49b3bda2 } }, + /* 112 */ + { { 0x035b2d69,0xd63c6067,0x6bed91b0,0xb507150d,0x7afb39b2,0x1f35f82f, + 0x16012b66,0xb9bd9c01,0xed0a5f50,0x00d97960,0x2716f7c9,0xed705451 }, + { 0x127abdb4,0x1576eff4,0xf01e701c,0x6850d698,0x3fc87e2f,0x9fa7d749, + 0xb0ce3e48,0x0b6bcc6f,0xf7d8c1c0,0xf4fbe1f5,0x02719cc6,0xcf75230e } }, + /* 113 */ + { { 0x722d94ed,0x6761d6c2,0x3718820e,0xd1ec3f21,0x25d0e7c6,0x65a40b70, + 0xbaf3cf31,0xd67f830e,0xb93ea430,0x633b3807,0x0bc96c69,0x17faa0ea }, + { 0xdf866b98,0xe6bf3482,0xa9db52d4,0x205c1ee9,0xff9ab869,0x51ef9bbd, + 0x75eeb985,0x3863dad1,0xd3cf442a,0xef216c3b,0xf9c8e321,0x3fb228e3 } }, + /* 114 */ + { { 0x0760ac07,0x94f9b70c,0x9d79bf4d,0xf3c9ccae,0xc5ffc83d,0x73cea084, + 0xdc49c38e,0xef50f943,0xbc9e7330,0xf467a2ae,0x44ea7fba,0x5ee534b6 }, + { 0x03609e7f,0x20cb6272,0x62fdc9f0,0x09844355,0x0f1457f7,0xaf5c8e58, + 0xb4b25941,0xd1f50a6c,0x2ec82395,0x77cb247c,0xda3dca33,0xa5f3e1e5 } }, + /* 115 */ + { { 0x7d85fa94,0x023489d6,0x2db9ce47,0x0ba40537,0xaed7aad1,0x0fdf7a1f, + 0x9a4ccb40,0xa57b0d73,0x5b18967c,0x48fcec99,0xb7274d24,0xf30b5b6e }, + { 0xc81c5338,0x7ccb4773,0xa3ed6bd0,0xb85639e6,0x1d56eada,0x7d9df95f, + 0x0a1607ad,0xe256d57f,0x957574d6,0x6da7ffdc,0x01c7a8c4,0x65f84046 } }, + /* 116 */ + { { 0xcba1e7f1,0x8d45d0cb,0x02b55f64,0xef0a08c0,0x17e19892,0x771ca31b, + 0x4885907e,0xe1843ecb,0x364ce16a,0x67797ebc,0x8df4b338,0x816d2b2d }, + { 0x39aa8671,0xe870b0e5,0xc102b5f5,0x9f0db3e4,0x1720c697,0x34296659, + 0x613c0d2a,0x0ad4c89e,0x418ddd61,0x1af900b2,0xd336e20e,0xe087ca72 } }, + /* 117 */ + { { 0xaba10079,0x222831ff,0x6d64fff2,0x0dc5f87b,0x3e8cb330,0x44547907, + 0x702a33fb,0xe815aaa2,0x5fba3215,0x338d6b2e,0x79f549c8,0x0f7535cb }, + { 0x2ee95923,0x471ecd97,0xc6d1c09f,0x1e868b37,0xc666ef4e,0x2bc7b8ec, + 0x808a4bfc,0xf5416589,0x3fbc4d2e,0xf23e9ee2,0x2d75125b,0x4357236c } }, + /* 118 */ + { { 0xba9cdb1b,0xfe176d95,0x2f82791e,0x45a1ca01,0x4de4cca2,0x97654af2, + 0x5cc4bcb9,0xbdbf9d0e,0xad97ac0a,0xf6a7df50,0x61359fd6,0xc52112b0 }, + { 0x4f05eae3,0x696d9ce3,0xe943ac2b,0x903adc02,0x0848be17,0xa9075347, + 0x2a3973e5,0x1e20f170,0x6feb67e9,0xe1aacc1c,0xe16bc6b9,0x2ca0ac32 } }, + /* 119 */ + { { 0xef871eb5,0xffea12e4,0xa8bf0a7a,0x94c2f25d,0x78134eaa,0x4d1e4c2a, + 0x0360fb10,0x11ed16fb,0x85fc11be,0x4029b6db,0xf4d390fa,0x5e9f7ab7 }, + { 0x30646612,0x5076d72f,0xdda1d0d8,0xa0afed1d,0x85a1d103,0x29022257, + 0x4e276bcd,0xcb499e17,0x51246c3d,0x16d1da71,0x589a0443,0xc72d56d3 } }, + /* 120 */ + { { 0xdae5bb45,0xdf5ffc74,0x261bd6dc,0x99068c4a,0xaa98ec7b,0xdc0afa7a, + 0xf121e96d,0xedd2ee00,0x1414045c,0x163cc7be,0x335af50e,0xb0b1bbce }, + { 0x01a06293,0xd440d785,0x6552e644,0xcdebab7c,0x8c757e46,0x48cb8dbc, + 0x3cabe3cb,0x81f9cf78,0xb123f59a,0xddd02611,0xeeb3784d,0x3dc7b88e } }, + /* 121 */ + { { 0xc4741456,0xe1b8d398,0x6032a121,0xa9dfa902,0x1263245b,0x1cbfc86d, + 0x5244718c,0xf411c762,0x05b0fc54,0x96521d54,0xdbaa4985,0x1afab46e }, + { 0x8674b4ad,0xa75902ba,0x5ad87d12,0x486b43ad,0x36e0d099,0x72b1c736, + 0xbb6cd6d6,0x39890e07,0x59bace4e,0x8128999c,0x7b535e33,0xd8da430b } }, + /* 122 */ + { { 0xc6b75791,0x39f65642,0x21806bfb,0x050947a6,0x1362ef84,0x0ca3e370, + 0x8c3d2391,0x9bc60aed,0x732e1ddc,0x9b488671,0xa98ee077,0x12d10d9e }, + { 0x3651b7dc,0xb6f2822d,0x80abd138,0x6345a5ba,0x472d3c84,0x62033262, + 0xacc57527,0xd54a1d40,0x424447cb,0x6ea46b3a,0x2fb1a496,0x5bc41057 } }, + /* 123 */ + { { 0xa751cd0e,0xe70c57a3,0xeba3c7d6,0x190d8419,0x9d47d55a,0xb1c3bee7, + 0xf912c6d8,0xda941266,0x407a6ad6,0x12e9aacc,0x6e838911,0xd6ce5f11 }, + { 0x70e1f2ce,0x063ca97b,0x8213d434,0xa3e47c72,0x84df810a,0xa016e241, + 0xdfd881a4,0x688ad7b0,0xa89bf0ad,0xa37d99fc,0xa23c2d23,0xd8e3f339 } }, + /* 124 */ + { { 0x750bed6f,0xbdf53163,0x83e68b0a,0x808abc32,0x5bb08a33,0x85a36627, + 0x6b0e4abe,0xf72a3a0f,0xfaf0c6ad,0xf7716d19,0x5379b25f,0x22dcc020 }, + { 0xf9a56e11,0x7400bf8d,0x56a47f21,0x6cb8bad7,0x7a6eb644,0x7c97176f, + 0xd1f5b646,0xe8fd84f7,0x44ddb054,0x98320a94,0x1dde86f5,0x07071ba3 } }, + /* 125 */ + { { 0x98f8fcb9,0x6fdfa0e5,0x94d0d70c,0x89cec8e0,0x106d20a8,0xa0899397, + 0xba8acc9c,0x915bfb9a,0x5507e01c,0x1370c94b,0x8a821ffb,0x83246a60 }, + { 0xbe3c378f,0xa8273a9f,0x35a25be9,0x7e544789,0x4dd929d7,0x6cfa4972, + 0x365bd878,0x987fed9d,0x5c29a7ae,0x4982ac94,0x5ddd7ec5,0x4589a5d7 } }, + /* 126 */ + { { 0xa95540a9,0x9fabb174,0x0162c5b0,0x7cfb886f,0xea3dee18,0x17be766b, + 0xe88e624c,0xff7da41f,0x8b919c38,0xad0b71eb,0xf31ff9a9,0x86a522e0 }, + { 0x868bc259,0xbc8e6f72,0x3ccef9e4,0x6130c638,0x9a466555,0x09f1f454, + 0x19b2bfb4,0x8e6c0f09,0x0ca7bb22,0x945c46c9,0x4dafb67b,0xacd87168 } }, + /* 127 */ + { { 0x10c53841,0x090c72ca,0x55a4fced,0xc20ae01b,0xe10234ad,0x03f7ebd5, + 0x85892064,0xb3f42a6a,0xb4a14722,0xbdbc30c0,0x8ca124cc,0x971bc437 }, + { 0x517ff2ff,0x6f79f46d,0xecba947b,0x6a9c96e2,0x62925122,0x5e79f2f4, + 0x6a4e91f1,0x30a96bb1,0x2d4c72da,0x1147c923,0x5811e4df,0x65bc311f } }, + /* 128 */ + { { 0x139b3239,0x87c7dd7d,0x4d833bae,0x8b57824e,0x9fff0015,0xbcbc4878, + 0x909eaf1a,0x8ffcef8b,0xf1443a78,0x9905f4ee,0xe15cbfed,0x020dd4a2 }, + { 0xa306d695,0xca2969ec,0xb93caf60,0xdf940cad,0x87ea6e39,0x67f7fab7, + 0xf98c4fe5,0x0d0ee10f,0xc19cb91e,0xc646879a,0x7d1d7ab4,0x4b4ea50c } }, + /* 129 */ + { { 0x7a0db57e,0x19e40945,0x9a8c9702,0xe6017cad,0x1be5cff9,0xdbf739e5, + 0xa7a938a2,0x3646b3cd,0x68350dfc,0x04511085,0x56e098b5,0xad3bd6f3 }, + { 0xee2e3e3e,0x935ebabf,0x473926cb,0xfbd01702,0x9e9fb5aa,0x7c735b02, + 0x2e3feff0,0xc52a1b85,0x046b405a,0x9199abd3,0x39039971,0xe306fcec } }, + /* 130 */ + { { 0x23e4712c,0xd6d9aec8,0xc3c198ee,0x7ca8376c,0x31bebd8a,0xe6d83187, + 0xd88bfef3,0xed57aff3,0xcf44edc7,0x72a645ee,0x5cbb1517,0xd4e63d0b }, + { 0xceee0ecf,0x98ce7a1c,0x5383ee8e,0x8f012633,0xa6b455e8,0x3b879078, + 0xc7658c06,0xcbcd3d96,0x0783336a,0x721d6fe7,0x5a677136,0xf21a7263 } }, + /* 131 */ + { { 0x9586ba11,0x19d8b3cd,0x8a5c0480,0xd9e0aeb2,0x2230ef5c,0xe4261dbf, + 0x02e6bf09,0x095a9dee,0x80dc7784,0x8963723c,0x145157b1,0x5c97dbaf }, + { 0x4bc4503e,0x97e74434,0x85a6b370,0x0fb1cb31,0xcd205d4b,0x3e8df2be, + 0xf8f765da,0x497dd1bc,0x6c988a1a,0x92ef95c7,0x64dc4cfa,0x3f924baa } }, + /* 132 */ + { { 0x7268b448,0x6bf1b8dd,0xefd79b94,0xd4c28ba1,0xe4e3551f,0x2fa1f8c8, + 0x5c9187a9,0x769e3ad4,0x40326c0d,0x28843b4d,0x50d5d669,0xfefc8094 }, + { 0x90339366,0x30c85bfd,0x5ccf6c3a,0x4eeb56f1,0x28ccd1dc,0x0e72b149, + 0xf2ce978e,0x73ee85b5,0x3165bb23,0xcdeb2bf3,0x4e410abf,0x8106c923 } }, + /* 133 */ + { { 0x7d02f4ee,0xc8df0161,0x18e21225,0x8a781547,0x6acf9e40,0x4ea895eb, + 0x6e5a633d,0x8b000cb5,0x7e981ffb,0xf31d86d5,0x4475bc32,0xf5c8029c }, + { 0x1b568973,0x764561ce,0xa62996ec,0x2f809b81,0xda085408,0x9e513d64, + 0xe61ce309,0xc27d815d,0x272999e0,0x0da6ff99,0xfead73f7,0xbd284779 } }, + /* 134 */ + { { 0x9b1cdf2b,0x6033c2f9,0xbc5fa151,0x2a99cf06,0x12177b3b,0x7d27d259, + 0xc4485483,0xb1f15273,0x102e2297,0x5fd57d81,0xc7f6acb7,0x3d43e017 }, + { 0x3a70eb28,0x41a8bb0b,0x3e80b06b,0x67de2d8e,0x70c28de5,0x09245a41, + 0xa7b26023,0xad7dbcb1,0x2cbc6c1e,0x70b08a35,0x9b33041f,0xb504fb66 } }, + /* 135 */ + { { 0xf97a27c2,0xa8e85ab5,0xc10a011b,0x6ac5ec8b,0xffbcf161,0x55745533, + 0x65790a60,0x01780e85,0x99ee75b0,0xe451bf85,0x39c29881,0x8907a63b }, + { 0x260189ed,0x76d46738,0x47bd35cb,0x284a4436,0x20cab61e,0xd74e8c40, + 0x416cf20a,0x6264bf8c,0x5fd820ce,0xfa5a6c95,0xf24bb5fc,0xfa7154d0 } }, + /* 136 */ + { { 0x9b3f5034,0x18482cec,0xcd9e68fd,0x962d445a,0x95746f23,0x266fb1d6, + 0x58c94a4b,0xc66ade5a,0xed68a5b6,0xdbbda826,0x7ab0d6ae,0x05664a4d }, + { 0x025e32fc,0xbcd4fe51,0xa96df252,0x61a5aebf,0x31592a31,0xd88a07e2, + 0x98905517,0x5d9d94de,0x5fd440e7,0x96bb4010,0xe807db4c,0x1b0c47a2 } }, + /* 137 */ + { { 0x08223878,0x5c2a6ac8,0xe65a5558,0xba08c269,0x9bbc27fd,0xd22b1b9b, + 0x72b9607d,0x919171bf,0xe588dc58,0x9ab455f9,0x23662d93,0x6d54916e }, + { 0x3b1de0c1,0x8da8e938,0x804f278f,0xa84d186a,0xd3461695,0xbf4988cc, + 0xe10eb0cb,0xf5eae3be,0xbf2a66ed,0x1ff8b68f,0xc305b570,0xa68daf67 } }, + /* 138 */ + { { 0x44b2e045,0xc1004cff,0x4b1c05d4,0x91b5e136,0x88a48a07,0x53ae4090, + 0xea11bb1a,0x73fb2995,0x3d93a4ea,0x32048570,0x3bfc8a5f,0xcce45de8 }, + { 0xc2b3106e,0xaff4a97e,0xb6848b4f,0x9069c630,0xed76241c,0xeda837a6, + 0x6cc3f6cf,0x8a0daf13,0x3da018a8,0x199d049d,0xd9093ba3,0xf867c6b1 } }, + /* 139 */ + { { 0x56527296,0xe4d42a56,0xce71178d,0xae26c73d,0x6c251664,0x70a0adac, + 0x5dc0ae1d,0x813483ae,0xdaab2daf,0x7574eacd,0xc2d55f4f,0xc56b52dc }, + { 0x95f32923,0x872bc167,0x5bdd2a89,0x4be17581,0xa7699f00,0x9b57f1e7, + 0x3ac2de02,0x5fcd9c72,0x92377739,0x83af3ba1,0xfc50b97f,0xa64d4e2b } }, + /* 140 */ + { { 0x0e552b40,0x2172dae2,0xd34d52e8,0x62f49725,0x07958f98,0x7930ee40, + 0x751fdd74,0x56da2a90,0xf53e48c3,0xf1192834,0x8e53c343,0x34d2ac26 }, + { 0x13111286,0x1073c218,0xda9d9827,0x201dac14,0xee95d378,0xec2c29db, + 0x1f3ee0b1,0x9316f119,0x544ce71c,0x7890c9f0,0x27612127,0xd77138af } }, + /* 141 */ + { { 0x3b4ad1cd,0x78045e6d,0x4aa49bc1,0xcd86b94e,0xfd677a16,0x57e51f1d, + 0xfa613697,0xd9290935,0x34f4d893,0x7a3f9593,0x5d5fcf9b,0x8c9c248b }, + { 0x6f70d4e9,0x9f23a482,0x63190ae9,0x17273454,0x5b081a48,0x4bdd7c13, + 0x28d65271,0x1e2de389,0xe5841d1f,0x0bbaaa25,0x746772e5,0xc4c18a79 } }, + /* 142 */ + { { 0x593375ac,0x10ee2681,0x7dd5e113,0x4f3288be,0x240f3538,0x9a97b2fb, + 0x1de6b1e2,0xfa11089f,0x1351bc58,0x516da562,0x2dfa85b5,0x573b6119 }, + { 0x6cba7df5,0x89e96683,0x8c28ab40,0xf299be15,0xad43fcbf,0xe91c9348, + 0x9a1cefb3,0xe9bbc7cc,0x738b2775,0xc8add876,0x775eaa01,0x6e3b1f2e } }, + /* 143 */ + { { 0xb677788b,0x0365a888,0x3fd6173c,0x634ae8c4,0x9e498dbe,0x30498761, + 0xc8f779ab,0x08c43e6d,0x4c09aca9,0x068ae384,0x2018d170,0x2380c70b }, + { 0xa297c5ec,0xcf77fbc3,0xca457948,0xdacbc853,0x336bec7e,0x3690de04, + 0x14eec461,0x26bbac64,0x1f713abf,0xd1c23c7e,0xe6fd569e,0xf08bbfcd } }, + /* 144 */ + { { 0x84770ee3,0x5f8163f4,0x744a1706,0x0e0c7f94,0xe1b2d46d,0x9c8f05f7, + 0xd01fd99a,0x417eafe7,0x11440e5b,0x2ba15df5,0x91a6fbcf,0xdc5c552a }, + { 0xa270f721,0x86271d74,0xa004485b,0x32c0a075,0x8defa075,0x9d1a87e3, + 0xbf0d20fe,0xb590a7ac,0x8feda1f5,0x430c41c2,0x58f6ec24,0x454d2879 } }, + /* 145 */ + { { 0x7c525435,0x52b7a635,0x37c4bdbc,0x3d9ef57f,0xdffcc475,0x2bb93e9e, + 0x7710f3be,0xf7b8ba98,0x21b727de,0x42ee86da,0x2e490d01,0x55ac3f19 }, + { 0xc0c1c390,0x487e3a6e,0x446cde7b,0x036fb345,0x496ae951,0x089eb276, + 0x71ed1234,0xedfed4d9,0x900f0b46,0x661b0dd5,0x8582f0d3,0x11bd6f1b } }, + /* 146 */ + { { 0x076bc9d1,0x5cf9350f,0xcf3cd2c3,0x15d903be,0x25af031c,0x21cfc8c2, + 0x8b1cc657,0xe0ad3248,0x70014e87,0xdd9fb963,0x297f1658,0xf0f3a5a1 }, + { 0xf1f703aa,0xbb908fba,0x2f6760ba,0x2f9cc420,0x66a38b51,0x00ceec66, + 0x05d645da,0x4deda330,0xf7de3394,0xb9cf5c72,0x1ad4c906,0xaeef6502 } }, + /* 147 */ + { { 0x7a19045d,0x0583c8b1,0xd052824c,0xae7c3102,0xff6cfa58,0x2a234979, + 0x62c733c0,0xfe9dffc9,0x9c0c4b09,0x3a7fa250,0x4fe21805,0x516437bb }, + { 0xc2a23ddb,0x9454e3d5,0x289c104e,0x0726d887,0x4fd15243,0x8977d918, + 0x6d7790ba,0xc559e73f,0x465af85f,0x8fd3e87d,0x5feee46b,0xa2615c74 } }, + /* 148 */ + { { 0x4335167d,0xc8d607a8,0xe0f5c887,0x8b42d804,0x398d11f9,0x5f9f13df, + 0x20740c67,0x5aaa5087,0xa3d9234b,0x83da9a6a,0x2a54bad1,0xbd3a5c4e }, + { 0x2db0f658,0xdd13914c,0x5a3f373a,0x29dcb66e,0x5245a72b,0xbfd62df5, + 0x91e40847,0x19d18023,0xb136b1ae,0xd9df74db,0x3f93bc5b,0x72a06b6b } }, + /* 149 */ + { { 0xad19d96f,0x6da19ec3,0xfb2a4099,0xb342daa4,0x662271ea,0x0e61633a, + 0xce8c054b,0x3bcece81,0x8bd62dc6,0x7cc8e061,0xee578d8b,0xae189e19 }, + { 0xdced1eed,0x73e7a25d,0x7875d3ab,0xc1257f0a,0x1cfef026,0x2cb2d5a2, + 0xb1fdf61c,0xd98ef39b,0x24e83e6c,0xcd8e6f69,0xc7b7088b,0xd71e7076 } }, + /* 150 */ + { { 0x9d4245bf,0x33936830,0x2ac2953b,0x22d96217,0x56c3c3cd,0xb3bf5a82, + 0x0d0699e8,0x50c9be91,0x8f366459,0xec094463,0x513b7c35,0x6c056dba }, + { 0x045ab0e3,0x687a6a83,0x445c9295,0x8d40b57f,0xa16f5954,0x0f345048, + 0x3d8f0a87,0x64b5c639,0x9f71c5e2,0x106353a2,0x874f0dd4,0xdd58b475 } }, + /* 151 */ + { { 0x62230c72,0x67ec084f,0x481385e3,0xf14f6cca,0x4cda7774,0xf58bb407, + 0xaa2dbb6b,0xe15011b1,0x0c035ab1,0xd488369d,0x8245f2fd,0xef83c24a }, + { 0x9fdc2538,0xfb57328f,0x191fe46a,0x79808293,0x32ede548,0xe28f5c44, + 0xea1a022c,0x1b3cda99,0x3df2ec7f,0x39e639b7,0x760e9a18,0x77b6272b } }, + /* 152 */ + { { 0xa65d56d5,0x2b1d51bd,0x7ea696e0,0x3a9b71f9,0x9904f4c4,0x95250ecc, + 0xe75774b7,0x8bc4d6eb,0xeaeeb9aa,0x0e343f8a,0x930e04cb,0xc473c1d1 }, + { 0x064cd8ae,0x282321b1,0x5562221c,0xf4b4371e,0xd1bf1221,0xc1cc81ec, + 0xe2c8082f,0xa52a07a9,0xba64a958,0x350d8e59,0x6fb32c9a,0x29e4f3de } }, + /* 153 */ + { { 0xba89aaa5,0x0aa9d56c,0xc4c6059e,0xf0208ac0,0xbd6ddca4,0x7400d9c6, + 0xf2c2f74a,0xb384e475,0xb1562dd3,0x4c1061fc,0x2e153b8d,0x3924e248 }, + { 0x849808ab,0xf38b8d98,0xa491aa36,0x29bf3260,0x88220ede,0x85159ada, + 0xbe5bc422,0x8b47915b,0xd7300967,0xa934d72e,0x2e515d0d,0xc4f30398 } }, + /* 154 */ + { { 0x1b1de38b,0xe3e9ee42,0x42636760,0xa124e25a,0x90165b1a,0x90bf73c0, + 0x146434c5,0x21802a34,0x2e1fa109,0x54aa83f2,0xed9c51e9,0x1d4bd03c }, + { 0x798751e6,0xc2d96a38,0x8c3507f5,0xed27235f,0xc8c24f88,0xb5fb80e2, + 0xd37f4f78,0xf873eefa,0xf224ba96,0x7229fd74,0x9edd7149,0x9dcd9199 } }, + /* 155 */ + { { 0x4e94f22a,0xee9f81a6,0xf71ec341,0xe5609892,0xa998284e,0x6c818ddd, + 0x3b54b098,0x9fd47295,0x0e8a7cc9,0x47a6ac03,0xb207a382,0xde684e5e }, + { 0x2b6b956b,0x4bdd1ecd,0xf01b3583,0x09084414,0x55233b14,0xe2f80b32, + 0xef5ebc5e,0x5a0fec54,0xbf8b29a2,0x74cf25e6,0x7f29e014,0x1c757fa0 } }, + /* 156 */ + { { 0xeb0fdfe4,0x1bcb5c4a,0xf0899367,0xd7c649b3,0x05bc083b,0xaef68e3f, + 0xa78aa607,0x57a06e46,0x21223a44,0xa2136ecc,0x52f5a50b,0x89bd6484 }, + { 0x4455f15a,0x724411b9,0x08a9c0fd,0x23dfa970,0x6db63bef,0x7b0da4d1, + 0xfb162443,0x6f8a7ec1,0xe98284fb,0xc1ac9cee,0x33566022,0x085a582b } }, + /* 157 */ + { { 0xec1f138a,0x15cb61f9,0x668f0c28,0x11c9a230,0xdf93f38f,0xac829729, + 0x4048848d,0xcef25698,0x2bba8fbf,0x3f686da0,0x111c619a,0xed5fea78 }, + { 0xd6d1c833,0x9b4f73bc,0x86e7bf80,0x50951606,0x042b1d51,0xa2a73508, + 0x5fb89ec2,0x9ef6ea49,0x5ef8b892,0xf1008ce9,0x9ae8568b,0x78a7e684 } }, + /* 158 */ + { { 0x10470cd8,0x3fe83a7c,0xf86df000,0x92734682,0xda9409b5,0xb5dac06b, + 0x94939c5f,0x1e7a9660,0x5cc116dc,0xdec6c150,0x66bac8cc,0x1a52b408 }, + { 0x6e864045,0x5303a365,0x9139efc1,0x45eae72a,0x6f31d54f,0x83bec646, + 0x6e958a6d,0x2fb4a86f,0x4ff44030,0x6760718e,0xe91ae0df,0x008117e3 } }, + /* 159 */ + { { 0x384310a2,0x5d5833ba,0x1fd6c9fc,0xbdfb4edc,0x849c4fb8,0xb9a4f102, + 0x581c1e1f,0xe5fb239a,0xd0a9746d,0xba44b2e7,0x3bd942b9,0x78f7b768 }, + { 0xc87607ae,0x076c8ca1,0xd5caaa7e,0x82b23c2e,0x2763e461,0x6a581f39, + 0x3886df11,0xca8a5e4a,0x264e7f22,0xc87e90cf,0x215cfcfc,0x04f74870 } }, + /* 160 */ + { { 0x141d161c,0x5285d116,0x93c4ed17,0x67cd2e0e,0x7c36187e,0x12c62a64, + 0xed2584ca,0xf5329539,0x42fbbd69,0xc4c777c4,0x1bdfc50a,0x107de776 }, + { 0xe96beebd,0x9976dcc5,0xa865a151,0xbe2aff95,0x9d8872af,0x0e0a9da1, + 0xa63c17cc,0x5e357a3d,0xe15cc67c,0xd31fdfd8,0x7970c6d8,0xc44bbefd } }, + /* 161 */ + { { 0x4c0c62f1,0x703f83e2,0x4e195572,0x9b1e28ee,0xfe26cced,0x6a82858b, + 0xc43638fa,0xd381c84b,0xa5ba43d8,0x94f72867,0x10b82743,0x3b4a783d }, + { 0x7576451e,0xee1ad7b5,0x14b6b5c8,0xc3d0b597,0xfcacc1b8,0x3dc30954, + 0x472c9d7b,0x55df110e,0x02f8a328,0x97c86ed7,0x88dc098f,0xd0433413 } }, + /* 162 */ + { { 0x2ca8f2fe,0x1a60d152,0x491bd41f,0x61640948,0x58dfe035,0x6dae29a5, + 0x278e4863,0x9a615bea,0x9ad7c8e5,0xbbdb4477,0x2ceac2fc,0x1c706630 }, + { 0x99699b4b,0x5e2b54c6,0x239e17e8,0xb509ca6d,0xea063a82,0x728165fe, + 0xb6a22e02,0x6b5e609d,0xb26ee1df,0x12813905,0x439491fa,0x07b9f722 } }, + /* 163 */ + { { 0x48ff4e49,0x1592ec14,0x6d644129,0x3e4e9f17,0x1156acc0,0x7acf8288, + 0xbb092b0b,0x5aa34ba8,0x7d38393d,0xcd0f9022,0xea4f8187,0x416724dd }, + { 0xc0139e73,0x3c4e641c,0x91e4d87d,0xe0fe46cf,0xcab61f8a,0xedb3c792, + 0xd3868753,0x4cb46de4,0x20f1098a,0xe449c21d,0xf5b8ea6e,0x5e5fd059 } }, + /* 164 */ + { { 0x75856031,0x7fcadd46,0xeaf2fbd0,0x89c7a4cd,0x7a87c480,0x1af523ce, + 0x61d9ae90,0xe5fc1095,0xbcdb95f5,0x3fb5864f,0xbb5b2c7d,0xbeb5188e }, + { 0x3ae65825,0x3d1563c3,0x0e57d641,0x116854c4,0x1942ebd3,0x11f73d34, + 0xc06955b3,0x24dc5904,0x995a0a62,0x8a0d4c83,0x5d577b7d,0xfb26b86d } }, + /* 165 */ + { { 0xc686ae17,0xc53108e7,0xd1c1da56,0x9090d739,0x9aec50ae,0x4583b013, + 0xa49a6ab2,0xdd9a088b,0xf382f850,0x28192eea,0xf5fe910e,0xcc8df756 }, + { 0x9cab7630,0x877823a3,0xfb8e7fc1,0x64984a9a,0x364bfc16,0x5448ef9c, + 0xc44e2a9a,0xbbb4f871,0x435c95e9,0x901a41ab,0xaaa50a06,0xc6c23e5f } }, + /* 166 */ + { { 0x9034d8dd,0xb78016c1,0x0b13e79b,0x856bb44b,0xb3241a05,0x85c6409a, + 0x2d78ed21,0x8d2fe19a,0x726eddf2,0xdcc7c26d,0x25104f04,0x3ccaff5f }, + { 0x6b21f843,0x397d7edc,0xe975de4c,0xda88e4dd,0x4f5ab69e,0x5273d396, + 0x9aae6cc0,0x537680e3,0x3e6f9461,0xf749cce5,0x957bffd3,0x021ddbd9 } }, + /* 167 */ + { { 0x777233cf,0x7b64585f,0x0942a6f0,0xfe6771f6,0xdfe6eef0,0x636aba7a, + 0x86038029,0x63bbeb56,0xde8fcf36,0xacee5842,0xd4a20524,0x48d9aa99 }, + { 0x0da5e57a,0xcff7a74c,0xe549d6c9,0xc232593c,0xf0f2287b,0x68504bcc, + 0xbc8360b5,0x6d7d098d,0x5b402f41,0xeac5f149,0xb87d1bf1,0x61936f11 } }, + /* 168 */ + { { 0xb8153a9d,0xaa9da167,0x9e83ecf0,0xa49fe3ac,0x1b661384,0x14c18f8e, + 0x38434de1,0x61c24dab,0x283dae96,0x3d973c3a,0x82754fc9,0xc99baa01 }, + { 0x4c26b1e3,0x477d198f,0xa7516202,0x12e8e186,0x362addfa,0x386e52f6, + 0xc3962853,0x31e8f695,0x6aaedb60,0xdec2af13,0x29cf74ac,0xfcfdb4c6 } }, + /* 169 */ + { { 0xcca40298,0x6b3ee958,0xf2f5d195,0xc3878153,0xed2eae5b,0x0c565630, + 0x3a697cf2,0xd089b37e,0xad5029ea,0xc2ed2ac7,0x0f0dda6a,0x7e5cdfad }, + { 0xd9b86202,0xf98426df,0x4335e054,0xed1960b1,0x3f14639e,0x1fdb0246, + 0x0db6c670,0x17f709c3,0x773421e1,0xbfc687ae,0x26c1a8ac,0x13fefc4a } }, + /* 170 */ + { { 0x7ffa0a5f,0xe361a198,0xc63fe109,0xf4b26102,0x6c74e111,0x264acbc5, + 0x77abebaf,0x4af445fa,0x24cddb75,0x448c4fdd,0x44506eea,0x0b13157d }, + { 0x72e9993d,0x22a6b159,0x85e5ecbe,0x2c3c57e4,0xfd83e1a1,0xa673560b, + 0xc3b8c83b,0x6be23f82,0x40bbe38e,0x40b13a96,0xad17399b,0x66eea033 } }, + /* 171 */ + { { 0xb4c6c693,0x49fc6e95,0x36af7d38,0xefc735de,0x35fe42fc,0xe053343d, + 0x6a9ab7c3,0xf0aa427c,0x4a0fcb24,0xc79f0436,0x93ebbc50,0x16287243 }, + { 0x16927e1e,0x5c3d6bd0,0x673b984c,0x40158ed2,0x4cd48b9a,0xa7f86fc8, + 0x60ea282d,0x1643eda6,0xe2a1beed,0x45b393ea,0x19571a94,0x664c839e } }, + /* 172 */ + { { 0x27eeaf94,0x57745750,0xea99e1e7,0x2875c925,0x5086adea,0xc127e7ba, + 0x86fe424f,0x765252a0,0x2b6c0281,0x1143cc6c,0xd671312d,0xc9bb2989 }, + { 0x51acb0a5,0x880c337c,0xd3c60f78,0xa3710915,0x9262b6ed,0x496113c0, + 0x9ce48182,0x5d25d9f8,0xb3813586,0x53b6ad72,0x4c0e159c,0x0ea3bebc } }, + /* 173 */ + { { 0xc5e49bea,0xcaba450a,0x7c05da59,0x684e5415,0xde7ac36c,0xa2e9cab9, + 0x2e6f957b,0x4ca79b5f,0x09b817b1,0xef7b0247,0x7d89df0f,0xeb304990 }, + { 0x46fe5096,0x508f7307,0x2e04eaaf,0x695810e8,0x3512f76c,0x88ef1bd9, + 0x3ebca06b,0x77661351,0xccf158b7,0xf7d4863a,0x94ee57da,0xb2a81e44 } }, + /* 174 */ + { { 0x6d53e6ba,0xff288e5b,0x14484ea2,0xa90de1a9,0xed33c8ec,0x2fadb60c, + 0x28b66a40,0x579d6ef3,0xec24372d,0x4f2dd6dd,0x1d66ec7d,0xe9e33fc9 }, + { 0x039eab6e,0x110899d2,0x3e97bb5e,0xa31a667a,0xcfdce68e,0x6200166d, + 0x5137d54b,0xbe83ebae,0x4800acdf,0x085f7d87,0x0c6f8c86,0xcf4ab133 } }, + /* 175 */ + { { 0x931e08fb,0x03f65845,0x1506e2c0,0x6438551e,0x9c36961f,0x5791f0dc, + 0xe3dcc916,0x68107b29,0xf495d2ca,0x83242374,0x6ee5895b,0xd8cfb663 }, + { 0xa0349b1b,0x525e0f16,0x4a0fab86,0x33cd2c6c,0x2af8dda9,0x46c12ee8, + 0x71e97ad3,0x7cc424ba,0x37621eb0,0x69766ddf,0xa5f0d390,0x95565f56 } }, + /* 176 */ + { { 0x1a0f5e94,0xe0e7bbf2,0x1d82d327,0xf771e115,0xceb111fa,0x10033e3d, + 0xd3426638,0xd269744d,0x00d01ef6,0xbdf2d9da,0xa049ceaf,0x1cb80c71 }, + { 0x9e21c677,0x17f18328,0x19c8f98b,0x6452af05,0x80b67997,0x35b9c5f7, + 0x40f8f3d4,0x5c2e1cbe,0x66d667ca,0x43f91656,0xcf9d6e79,0x9faaa059 } }, + /* 177 */ + { { 0x0a078fe6,0x8ad24618,0x464fd1dd,0xf6cc73e6,0xc3e37448,0x4d2ce34d, + 0xe3271b5f,0x624950c5,0xefc5af72,0x62910f5e,0xaa132bc6,0x8b585bf8 }, + { 0xa839327f,0x11723985,0x4aac252f,0x34e2d27d,0x6296cc4e,0x402f59ef, + 0x47053de9,0x00ae055c,0x28b4f09b,0xfc22a972,0xfa0c180e,0xa9e86264 } }, + /* 178 */ + { { 0xbc310ecc,0x0b7b6224,0x67fa14ed,0x8a1a74f1,0x7214395c,0x87dd0960, + 0xf5c91128,0xdf1b3d09,0x86b264a8,0x39ff23c6,0x3e58d4c5,0xdc2d49d0 }, + { 0xa9d6f501,0x2152b7d3,0xc04094f7,0xf4c32e24,0xd938990f,0xc6366596, + 0x94fb207f,0x084d078f,0x328594cb,0xfd99f1d7,0xcb2d96b3,0x36defa64 } }, + /* 179 */ + { { 0x13ed7cbe,0x4619b781,0x9784bd0e,0x95e50015,0x2c7705fe,0x2a32251c, + 0x5f0dd083,0xa376af99,0x0361a45b,0x55425c6c,0x1f291e7b,0x812d2cef }, + { 0x5fd94972,0xccf581a0,0xe56dc383,0x26e20e39,0x63dbfbf0,0x0093685d, + 0x36b8c575,0x1fc164cc,0x390ef5e7,0xb9c5ab81,0x26908c66,0x40086beb } }, + /* 180 */ + { { 0x37e3c115,0xe5e54f79,0xc1445a8a,0x69b8ee8c,0xb7659709,0x79aedff2, + 0x1b46fbe6,0xe288e163,0xd18d7bb7,0xdb4844f0,0x48aa6424,0xe0ea23d0 }, + { 0xf3d80a73,0x714c0e4e,0x3bd64f98,0x87a0aa9e,0x2ec63080,0x8844b8a8, + 0x255d81a3,0xe0ac9c30,0x455397fc,0x86151237,0x2f820155,0x0b979464 } }, + /* 181 */ + { { 0x4ae03080,0x127a255a,0x580a89fb,0x232306b4,0x6416f539,0x04e8cd6a, + 0x13b02a0e,0xaeb70dee,0x4c09684a,0xa3038cf8,0x28e433ee,0xa710ec3c }, + { 0x681b1f7d,0x77a72567,0x2fc28170,0x86fbce95,0xf5735ac8,0xd3408683, + 0x6bd68e93,0x3a324e2a,0xc027d155,0x7ec74353,0xd4427177,0xab60354c } }, + /* 182 */ + { { 0xef4c209d,0x32a5342a,0x08d62704,0x2ba75274,0xc825d5fe,0x4bb4af6f, + 0xd28e7ff1,0x1c3919ce,0xde0340f6,0x1dfc2fdc,0x29f33ba9,0xc6580baf }, + { 0x41d442cb,0xae121e75,0x3a4724e4,0x4c7727fd,0x524f3474,0xe556d6a4, + 0x785642a2,0x87e13cc7,0xa17845fd,0x182efbb1,0x4e144857,0xdcec0cf1 } }, + /* 183 */ + { { 0xe9539819,0x1cb89541,0x9d94dbf1,0xc8cb3b4f,0x417da578,0x1d353f63, + 0x8053a09e,0xb7a697fb,0xc35d8b78,0x8d841731,0xb656a7a9,0x85748d6f }, + { 0xc1859c5d,0x1fd03947,0x535d22a2,0x6ce965c1,0x0ca3aadc,0x1966a13e, + 0x4fb14eff,0x9802e41d,0x76dd3fcd,0xa9048cbb,0xe9455bba,0x89b182b5 } }, + /* 184 */ + { { 0x43360710,0xd777ad6a,0x55e9936b,0x841287ef,0x04a21b24,0xbaf5c670, + 0x35ad86f1,0xf2c0725f,0xc707e72e,0x338fa650,0xd8883e52,0x2bf8ed2e }, + { 0xb56e0d6a,0xb0212cf4,0x6843290c,0x50537e12,0x98b3dc6f,0xd8b184a1, + 0x0210b722,0xd2be9a35,0x559781ee,0x407406db,0x0bc18534,0x5a78d591 } }, + /* 185 */ + { { 0xd748b02c,0x4d57aa2a,0xa12b3b95,0xbe5b3451,0x64711258,0xadca7a45, + 0x322153db,0x597e091a,0x32eb1eab,0xf3271006,0x2873f301,0xbd9adcba }, + { 0x38543f7f,0xd1dc79d1,0x921b1fef,0x00022092,0x1e5df8ed,0x86db3ef5, + 0x9e6b944a,0x888cae04,0x791a32b4,0x71bd29ec,0xa6d1c13e,0xd3516206 } }, + /* 186 */ + { { 0x55924f43,0x2ef6b952,0x4f9de8d5,0xd2f401ae,0xadc68042,0xfc73e8d7, + 0x0d9d1bb4,0x627ea70c,0xbbf35679,0xc3bb3e3e,0xd882dee4,0x7e8a254a }, + { 0xb5924407,0x08906f50,0xa1ad444a,0xf14a0e61,0x65f3738e,0xaa0efa21, + 0xae71f161,0xd60c7dd6,0xf175894d,0x9e8390fa,0x149f4c00,0xd115cd20 } }, + /* 187 */ + { { 0xa52abf77,0x2f2e2c1d,0x54232568,0xc2a0dca5,0x54966dcc,0xed423ea2, + 0xcd0dd039,0xe48c93c7,0x176405c7,0x1e54a225,0x70d58f2e,0x1efb5b16 }, + { 0x94fb1471,0xa751f9d9,0x67d2941d,0xfdb31e1f,0x53733698,0xa6c74eb2, + 0x89a0f64a,0xd3155d11,0xa4b8d2b6,0x4414cfe4,0xf7a8e9e3,0x8d5a4be8 } }, + /* 188 */ + { { 0x52669e98,0x5c96b4d4,0x8fd42a03,0x4547f922,0xd285174e,0xcf5c1319, + 0x064bffa0,0x805cd1ae,0x246d27e7,0x50e8bc4f,0xd5781e11,0xf89ef98f }, + { 0xdee0b63f,0xb4ff95f6,0x222663a4,0xad850047,0x4d23ce9c,0x02691860, + 0x50019f59,0x3e5309ce,0x69a508ae,0x27e6f722,0x267ba52c,0xe9376652 } }, + /* 189 */ + { { 0xc0368708,0xa04d289c,0x5e306e1d,0xc458872f,0x33112fea,0x76fa23de, + 0x6efde42e,0x718e3974,0x1d206091,0xf0c98cdc,0x14a71987,0x5fa3ca62 }, + { 0xdcaa9f2a,0xeee8188b,0x589a860d,0x312cc732,0xc63aeb1f,0xf9808dd6, + 0x4ea62b53,0x70fd43db,0x890b6e97,0x2c2bfe34,0xfa426aa6,0x105f863c } }, + /* 190 */ + { { 0xb38059ad,0x0b29795d,0x90647ea0,0x5686b77e,0xdb473a3e,0xeff0470e, + 0xf9b6d1e2,0x278d2340,0xbd594ec7,0xebbff95b,0xd3a7f23d,0xf4b72334 }, + { 0xa5a83f0b,0x2a285980,0x9716a8b3,0x0786c41a,0x22511812,0x138901bd, + 0xe2fede6e,0xd1b55221,0xdf4eb590,0x0806e264,0x762e462e,0x6c4c897e } }, + /* 191 */ + { { 0xb4b41d9d,0xd10b905f,0x4523a65b,0x826ca466,0xb699fa37,0x535bbd13, + 0x73bc8f90,0x5b9933d7,0xcd2118ad,0x9332d61f,0xd4a65fd0,0x158c693e }, + { 0xe6806e63,0x4ddfb2a8,0xb5de651b,0xe31ed3ec,0x819bc69a,0xf9460e51, + 0x2c76b1f8,0x6229c0d6,0x901970a3,0xbb78f231,0x9cee72b8,0x31f3820f } }, + /* 192 */ + { { 0xc09e1c72,0xe931caf2,0x12990cf4,0x0715f298,0x943262d8,0x33aad81d, + 0x73048d3f,0x5d292b7a,0xdc7415f6,0xb152aaa4,0x0fd19587,0xc3d10fd9 }, + { 0x75ddadd0,0xf76b35c5,0x1e7b694c,0x9f5f4a51,0xc0663025,0x2f1ab7eb, + 0x920260b0,0x01c9cc87,0x05d39da6,0xc4b1f61a,0xeb4a9c4e,0x6dcd76c4 } }, + /* 193 */ + { { 0xfdc83f01,0x0ba0916f,0x9553e4f9,0x354c8b44,0xffc5e622,0xa6cc511a, + 0xe95be787,0xb954726a,0x75b41a62,0xcb048115,0xebfde989,0xfa2ae6cd }, + { 0x0f24659a,0x6376bbc7,0x4c289c43,0x13a999fd,0xec9abd8b,0xc7134184, + 0xa789ab04,0x28c02bf6,0xd3e526ec,0xff841ebc,0x640893a8,0x442b191e } }, + /* 194 */ + { { 0xfa2b6e20,0x4cac6c62,0xf6d69861,0x97f29e9b,0xbc96d12d,0x228ab1db, + 0x5e8e108d,0x6eb91327,0x40771245,0xd4b3d4d1,0xca8a803a,0x61b20623 }, + { 0xa6a560b1,0x2c2f3b41,0x3859fcf4,0x879e1d40,0x024dbfc3,0x7cdb5145, + 0x3bfa5315,0x55d08f15,0xaa93823a,0x2f57d773,0xc6a2c9a2,0xa97f259c } }, + /* 195 */ + { { 0xe58edbbb,0xc306317b,0x79dfdf13,0x25ade51c,0x16d83dd6,0x6b5beaf1, + 0x1dd8f925,0xe8038a44,0xb2a87b6b,0x7f00143c,0xf5b438de,0xa885d00d }, + { 0xcf9e48bd,0xe9f76790,0xa5162768,0xf0bdf9f0,0xad7b57cb,0x0436709f, + 0xf7c15db7,0x7e151c12,0x5d90ee3b,0x3514f022,0x2c361a8d,0x2e84e803 } }, + /* 196 */ + { { 0x563ec8d8,0x2277607d,0xe3934cb7,0xa661811f,0xf58fd5de,0x3ca72e7a, + 0x62294c6a,0x7989da04,0xf6bbefe9,0x88b3708b,0x53ed7c82,0x0d524cf7 }, + { 0x2f30c073,0x69f699ca,0x9dc1dcf3,0xf0fa264b,0x05f0aaf6,0x44ca4568, + 0xd19b9baf,0x0f5b23c7,0xeabd1107,0x39193f41,0x2a7c9b83,0x9e3e10ad } }, + /* 197 */ + { { 0xd4ae972f,0xa90824f0,0xc6e846e7,0x43eef02b,0x29d2160a,0x7e460612, + 0xfe604e91,0x29a178ac,0x4eb184b2,0x23056f04,0xeb54cdf4,0x4fcad55f }, + { 0xae728d15,0xa0ff96f3,0xc6a00331,0x8a2680c6,0x7ee52556,0x5f84cae0, + 0xc5a65dad,0x5e462c3a,0xe2d23f4f,0x5d2b81df,0xc5b1eb07,0x6e47301b } }, + /* 198 */ + { { 0xaf8219b9,0x77411d68,0x51b1907a,0xcb883ce6,0x101383b5,0x25c87e57, + 0x982f970d,0x9c7d9859,0x118305d2,0xaa6abca5,0x9013a5db,0x725fed2f }, + { 0xababd109,0x487cdbaf,0x87586528,0xc0f8cf56,0x8ad58254,0xa02591e6, + 0xdebbd526,0xc071b1d1,0x961e7e31,0x927dfe8b,0x9263dfe1,0x55f895f9 } }, + /* 199 */ + { { 0xb175645b,0xf899b00d,0xb65b4b92,0x51f3a627,0xb67399ef,0xa2f3ac8d, + 0xe400bc20,0xe717867f,0x1967b952,0x42cc9020,0x3ecd1de1,0x3d596751 }, + { 0xdb979775,0xd41ebcde,0x6a2e7e88,0x99ba61bc,0x321504f2,0x039149a5, + 0x27ba2fad,0xe7dc2314,0xb57d8368,0x9f556308,0x57da80a7,0x2b6d16c9 } }, + /* 200 */ + { { 0x279ad982,0x84af5e76,0x9c8b81a6,0x9bb4c92d,0x0e698e67,0xd79ad44e, + 0x265fc167,0xe8be9048,0x0c3a4ccc,0xf135f7e6,0xb8863a33,0xa0a10d38 }, + { 0xd386efd9,0xe197247c,0xb52346c2,0x0eefd3f9,0x78607bc8,0xc22415f9, + 0x508674ce,0xa2a8f862,0xc8c9d607,0xa72ad09e,0x50fa764f,0xcd9f0ede } }, + /* 201 */ + { { 0xd1a46d4d,0x063391c7,0x9eb01693,0x2df51c11,0x849e83de,0xc5849800, + 0x8ad08382,0x48fd09aa,0xaa742736,0xa405d873,0xe1f9600c,0xee49e61e }, + { 0x48c76f73,0xd76676be,0x01274b2a,0xd9c100f6,0x83f8718d,0x110bb67c, + 0x02fc0d73,0xec85a420,0x744656ad,0xc0449e1e,0x37d9939b,0x28ce7376 } }, + /* 202 */ + { { 0x44544ac7,0x97e9af72,0xba010426,0xf2c658d5,0xfb3adfbd,0x732dec39, + 0xa2df0b07,0xd12faf91,0x2171e208,0x8ac26725,0x5b24fa54,0xf820cdc8 }, + { 0x94f4cf77,0x307a6eea,0x944a33c6,0x18c783d2,0x0b741ac5,0x4b939d4c, + 0x3ffbb6e4,0x1d7acd15,0x7a255e44,0x06a24858,0xce336d50,0x14fbc494 } }, + /* 203 */ + { { 0x51584e3c,0x9b920c0c,0xf7e54027,0xc7733c59,0x88422bbe,0xe24ce139, + 0x523bd6ab,0x11ada812,0xb88e6def,0xde068800,0xfe8c582d,0x7b872671 }, + { 0x7de53510,0x4e746f28,0xf7971968,0x492f8b99,0x7d928ac2,0x1ec80bc7, + 0x432eb1b5,0xb3913e48,0x32028f6e,0xad084866,0x8fc2f38b,0x122bb835 } }, + /* 204 */ + { { 0x3b0b29c3,0x0a9f3b1e,0x4fa44151,0x837b6432,0x17b28ea7,0xb9905c92, + 0x98451750,0xf39bc937,0xce8b6da1,0xcd383c24,0x010620b2,0x299f57db }, + { 0x58afdce3,0x7b6ac396,0x3d05ef47,0xa15206b3,0xb9bb02ff,0xa0ae37e2, + 0x9db3964c,0x107760ab,0x67954bea,0xe29de9a0,0x431c3f82,0x446a1ad8 } }, + /* 205 */ + { { 0x5c6b8195,0xc6fecea0,0xf49e71b9,0xd744a7c5,0x177a7ae7,0xa8e96acc, + 0x358773a7,0x1a05746c,0x37567369,0xa4162146,0x87d1c971,0xaa0217f7 }, + { 0x77fd3226,0x61e9d158,0xe4f600be,0x0f6f2304,0x7a6dff07,0xa9c4cebc, + 0x09f12a24,0xd15afa01,0x8c863ee9,0x2bbadb22,0xe5eb8c78,0xa28290e4 } }, + /* 206 */ + { { 0x3e9de330,0x55b87fa0,0x195c145b,0x12b26066,0xa920bef0,0xe08536e0, + 0x4d195adc,0x7bff6f2c,0x945f4187,0x7f319e9d,0xf892ce47,0xf9848863 }, + { 0x4fe37657,0xd0efc1d3,0x5cf0e45a,0x3c58de82,0x8b0ccbbe,0x626ad21a, + 0xaf952fc5,0xd2a31208,0xeb437357,0x81791995,0x98e95d4f,0x5f19d30f } }, + /* 207 */ + { { 0x0e6865bb,0x72e83d9a,0xf63456a6,0x22f5af3b,0x463c8d9e,0x409e9c73, + 0xdfe6970e,0x40e9e578,0x711b91ca,0x876b6efa,0x942625a3,0x895512cf }, + { 0xcb4e462b,0x84c8eda8,0x4412e7c8,0x84c0154a,0xceb7b71f,0x04325db1, + 0x66f70877,0x1537dde3,0x1992b9ac,0xf3a09399,0xd498ae77,0xa7316606 } }, + /* 208 */ + { { 0xcad260f5,0x13990d2f,0xeec0e8c0,0x76c3be29,0x0f7bd7d5,0x7dc5bee0, + 0xefebda4b,0x9be167d2,0x9122b87e,0xcce3dde6,0x82b5415c,0x75a28b09 }, + { 0xe84607a6,0xf6810bcd,0x6f4dbf0d,0xc6d58128,0x1b4dafeb,0xfead577d, + 0x066b28eb,0x9bc440b2,0x8b17e84b,0x53f1da97,0xcda9a575,0x0459504b } }, + /* 209 */ + { { 0x329e5836,0x13e39a02,0xf717269d,0x2c9e7d51,0xf26c963b,0xc5ac58d6, + 0x79967bf5,0x3b0c6c43,0x55908d9d,0x60bbea3f,0xf07c9ad1,0xd84811e7 }, + { 0x5bd20e4a,0xfe7609a7,0x0a70baa8,0xe4325dd2,0xb3600386,0x3711f370, + 0xd0924302,0x97f9562f,0x4acc4436,0x040dc0c3,0xde79cdd4,0xfd6d725c } }, + /* 210 */ + { { 0xcf13eafb,0xb3efd0e3,0x5aa0ae5f,0x21009cbb,0x79022279,0xe480c553, + 0xb2fc9a6d,0x755cf334,0x07096ae7,0x8564a5bf,0xbd238139,0xddd649d0 }, + { 0x8a045041,0xd0de10b1,0xc957d572,0x6e05b413,0x4e0fb25c,0x5c5ff806, + 0x641162fb,0xd933179b,0xe57439f9,0x42d48485,0x8a8d72aa,0x70c5bd0a } }, + /* 211 */ + { { 0x97bdf646,0xa7671738,0xab329f7c,0xaa1485b4,0xf8f25fdf,0xce3e11d6, + 0xc6221824,0x76a3fc7e,0xf3924740,0x045f281f,0x96d13a9a,0x24557d4e }, + { 0xdd4c27cd,0x875c804b,0x0f5c7fea,0x11c5f0f4,0xdc55ff7e,0xac8c880b, + 0x1103f101,0x2acddec5,0xf99faa89,0x38341a21,0xce9d6b57,0xc7b67a2c } }, + /* 212 */ + { { 0x8e357586,0x9a0d724f,0xdf648da0,0x1d7f4ff5,0xfdee62a5,0x9c3e6c9b, + 0x0389b372,0x0499cef0,0x98eab879,0xe904050d,0x6c051617,0xe8eef1b6 }, + { 0xc37e3ca9,0xebf5bfeb,0xa4e0b91d,0x7c5e946d,0x2c4bea28,0x79097314, + 0xee67b2b7,0x81f6c109,0xdafc5ede,0xaf237d9b,0x2abb04c7,0xd2e60201 } }, + /* 213 */ + { { 0x8a4f57bf,0x6156060c,0xff11182a,0xf9758696,0x6296ef00,0x8336773c, + 0xff666899,0x9c054bce,0x719cd11c,0xd6a11611,0xdbe1acfa,0x9824a641 }, + { 0xba89fd01,0x0b7b7a5f,0x889f79d8,0xf8d3b809,0xf578285c,0xc5e1ea08, + 0xae6d8288,0x7ac74536,0x7521ef5f,0x5d37a200,0xb260a25d,0x5ecc4184 } }, + /* 214 */ + { { 0xa708c8d3,0xddcebb19,0xc63f81ec,0xe63ed04f,0x11873f95,0xd045f5a0, + 0x79f276d5,0x3b5ad544,0x425ae5b3,0x81272a3d,0x10ce1605,0x8bfeb501 }, + { 0x888228bf,0x4233809c,0xb2aff7df,0x4bd82acf,0x0cbd4a7f,0x9c68f180, + 0x6b44323d,0xfcd77124,0x891db957,0x60c0fcf6,0x04da8f7f,0xcfbb4d89 } }, + /* 215 */ + { { 0x3b26139a,0x9a6a5df9,0xb2cc7eb8,0x3e076a83,0x5a964bcd,0x47a8e82d, + 0xb9278d6b,0x8a4e2a39,0xe4443549,0x93506c98,0xf1e0d566,0x06497a8f }, + { 0x2b1efa05,0x3dee8d99,0x45393e33,0x2da63ca8,0xcf0579ad,0xa4af7277, + 0x3236d8ea,0xaf4b4639,0x32b617f5,0x6ccad95b,0xb88bb124,0xce76d8b8 } }, + /* 216 */ + { { 0x083843dc,0x63d2537a,0x1e4153b4,0x89eb3514,0xea9afc94,0x5175ebc4, + 0x8ed1aed7,0x7a652580,0xd85e8297,0x67295611,0xb584b73d,0x8dd2d68b }, + { 0x0133c3a4,0x237139e6,0x4bd278ea,0x9de838ab,0xc062fcd9,0xe829b072, + 0x63ba8706,0x70730d4f,0xd3cd05ec,0x6080483f,0x0c85f84d,0x872ab5b8 } }, + /* 217 */ + { { 0x999d4d49,0xfc0776d3,0xec3f45e7,0xa3eb59de,0x0dae1fc1,0xbc990e44, + 0xa15371ff,0x33596b1e,0x9bc7ab25,0xd447dcb2,0x35979582,0xcd5b63e9 }, + { 0x77d1ff11,0xae3366fa,0xedee6903,0x59f28f05,0xa4433bf2,0x6f43fed1, + 0xdf9ce00e,0x15409c9b,0xaca9c5dc,0x21b5cded,0x82d7bdb4,0xf9f33595 } }, + /* 218 */ + { { 0x9422c792,0x95944378,0xc958b8bf,0x239ea923,0xdf076541,0x4b61a247, + 0xbb9fc544,0x4d29ce85,0x0b424559,0x9a692a67,0x0e486900,0x6e0ca5a0 }, + { 0x85b3bece,0x6b79a782,0xc61f9892,0x41f35e39,0xae747f82,0xff82099a, + 0xd0ca59d6,0x58c8ae3f,0x99406b5f,0x4ac930e2,0x9df24243,0x2ce04eb9 } }, + /* 219 */ + { { 0x1ac37b82,0x4366b994,0x25b04d83,0xff0c728d,0x19c47b7c,0x1f551361, + 0xbeff13e7,0xdbf2d5ed,0xe12a683d,0xf78efd51,0x989cf9c4,0x82cd85b9 }, + { 0xe0cb5d37,0xe23c6db6,0x72ee1a15,0x818aeebd,0x28771b14,0x8212aafd, + 0x1def817d,0x7bc221d9,0x9445c51f,0xdac403a2,0x12c3746b,0x711b0517 } }, + /* 220 */ + { { 0x5ea99ecc,0x0ed9ed48,0xb8cab5e1,0xf799500d,0xb570cbdc,0xa8ec87dc, + 0xd35dfaec,0x52cfb2c2,0x6e4d80a4,0x8d31fae2,0xdcdeabe5,0xe6a37dc9 }, + { 0x1deca452,0x5d365a34,0x0d68b44e,0x09a5f8a5,0xa60744b1,0x59238ea5, + 0xbb4249e9,0xf2fedc0d,0xa909b2e3,0xe395c74e,0x39388250,0xe156d1a5 } }, + /* 221 */ + { { 0x47181ae9,0xd796b3d0,0x44197808,0xbaf44ba8,0x34cf3fac,0xe6933094, + 0xc3bd5c46,0x41aa6ade,0xeed947c6,0x4fda75d8,0x9ea5a525,0xacd9d412 }, + { 0xd430301b,0x65cc55a3,0x7b52ea49,0x3c9a5bcf,0x159507f0,0x22d319cf, + 0xde74a8dd,0x2ee0b9b5,0x877ac2b6,0x20c26a1e,0x92e7c314,0x387d73da } }, + /* 222 */ + { { 0x8cd3fdac,0x13c4833e,0x332e5b8e,0x76fcd473,0xe2fe1fd3,0xff671b4b, + 0x5d98d8ec,0x4d734e8b,0x514bbc11,0xb1ead3c6,0x7b390494,0xd14ca858 }, + { 0x5d2d37e9,0x95a443af,0x00464622,0x73c6ea73,0x15755044,0xa44aeb4b, + 0xfab58fee,0xba3f8575,0xdc680a6f,0x9779dbc9,0x7b37ddfc,0xe1ee5f5a } }, + /* 223 */ + { { 0x12d29f46,0xcd0b4648,0x0ed53137,0x93295b0b,0x80bef6c9,0xbfe26094, + 0x54248b00,0xa6565788,0x80e7f9c4,0x69c43fca,0xbe141ea1,0x2190837b }, + { 0xa1b26cfb,0x875e159a,0x7affe852,0x90ca9f87,0x92ca598e,0x15e6550d, + 0x1938ad11,0xe3e0945d,0x366ef937,0xef7636bb,0xb39869e5,0xb6034d0b } }, + /* 224 */ + { { 0x26d8356e,0x4d255e30,0xd314626f,0xf83666ed,0xd0c8ed64,0x421ddf61, + 0x26677b61,0x96e473c5,0x9e9b18b3,0xdad4af7e,0xa9393f75,0xfceffd4a }, + { 0x11c731d5,0x843138a1,0xb2f141d9,0x05bcb3a1,0x617b7671,0x20e1fa95, + 0x88ccec7b,0xbefce812,0x90f1b568,0x582073dc,0x1f055cb7,0xf572261a } }, + /* 225 */ + { { 0x36973088,0xf3148277,0x86a9f980,0xc008e708,0xe046c261,0x1b795947, + 0xca76bca0,0xdf1e6a7d,0x71acddf0,0xabafd886,0x1364d8f4,0xff7054d9 }, + { 0xe2260594,0x2cf63547,0xd73b277e,0x468a5372,0xef9bd35e,0xc7419e24, + 0x24043cc3,0x2b4a1c20,0x890b39cd,0xa28f047a,0x46f9a2e3,0xdca2cea1 } }, + /* 226 */ + { { 0x53277538,0xab788736,0xcf697738,0xa734e225,0x6b22e2c1,0x66ee1d1e, + 0xebe1d212,0x2c615389,0x02bb0766,0xf36cad40,0x3e64f207,0x120885c3 }, + { 0x90fbfec2,0x59e77d56,0xd7a574ae,0xf9e781aa,0x5d045e53,0x801410b0, + 0xa91b5f0e,0xd3b5f0aa,0x7fbb3521,0xb3d1df00,0xc72bee9a,0x11c4b33e } }, + /* 227 */ + { { 0x83c3a7f3,0xd32b9832,0x88d8a354,0x8083abcf,0x50f4ec5a,0xdeb16404, + 0x641e2907,0x18d747f0,0xf1bbf03e,0x4e8978ae,0x88a0cd89,0x932447dc }, + { 0xcf3d5897,0x561e0feb,0x13600e6d,0xfc3a682f,0xd16a6b73,0xc78b9d73, + 0xd29bf580,0xe713fede,0x08d69e5c,0x0a225223,0x1ff7fda4,0x3a924a57 } }, + /* 228 */ + { { 0xb4093bee,0xfb64554c,0xa58c6ec0,0xa6d65a25,0x43d0ed37,0x4126994d, + 0x55152d44,0xa5689a51,0x284caa8d,0xb8e5ea8c,0xd1f25538,0x33f05d4f }, + { 0x1b615d6e,0xe0fdfe09,0x705507da,0x2ded7e8f,0x17bbcc80,0xdd5631e5, + 0x267fd11f,0x4f87453e,0xff89d62d,0xc6da723f,0xe3cda21d,0x55cbcae2 } }, + /* 229 */ + { { 0x6b4e84f3,0x336bc94e,0x4ef72c35,0x72863031,0xeeb57f99,0x6d85fdee, + 0xa42ece1b,0x7f4e3272,0x36f0320a,0x7f86cbb5,0x923331e6,0xf09b6a2b }, + { 0x56778435,0x21d3ecf1,0x8323b2d2,0x2977ba99,0x1704bc0f,0x6a1b57fb, + 0x389f048a,0xd777cf8b,0xac6b42cd,0x9ce2174f,0x09e6c55a,0x404e2bff } }, + /* 230 */ + { { 0x204c5ddb,0x9b9b135e,0x3eff550e,0x9dbfe044,0xec3be0f6,0x35eab4bf, + 0x0a43e56f,0x8b4c3f0d,0x0e73f9b3,0x4c1c6673,0x2c78c905,0x92ed38bd }, + { 0xa386e27c,0xc7003f6a,0xaced8507,0xb9c4f46f,0x59df5464,0xea024ec8, + 0x429572ea,0x4af96152,0xe1fc1194,0x279cd5e2,0x281e358c,0xaa376a03 } }, + /* 231 */ + { { 0x3cdbc95c,0x07859223,0xef2e337a,0xaae1aa6a,0x472a8544,0xc040108d, + 0x8d037b7d,0x80c853e6,0x8c7eee24,0xd221315c,0x8ee47752,0x195d3856 }, + { 0xdacd7fbe,0xd4b1ba03,0xd3e0c52b,0x4b5ac61e,0x6aab7b52,0x68d3c052, + 0x660e3fea,0xf0d7248c,0x3145efb4,0xafdb3f89,0x8f40936d,0xa73fd9a3 } }, + /* 232 */ + { { 0xbb1b17ce,0x891b9ef3,0xc6127f31,0x14023667,0x305521fd,0x12b2e58d, + 0xe3508088,0x3a47e449,0xff751507,0xe49fc84b,0x5310d16e,0x4023f722 }, + { 0xb73399fa,0xa608e5ed,0xd532aa3e,0xf12632d8,0x845e8415,0x13a2758e, + 0x1fc2d861,0xae4b6f85,0x339d02f2,0x3879f5b1,0x80d99ebd,0x446d22a6 } }, + /* 233 */ + { { 0x4be164f1,0x0f502302,0x88b81920,0x8d09d2d6,0x984aceff,0x514056f1, + 0x75e9e80d,0xa5c4ddf0,0xdf496a93,0x38cb47e6,0x38df6bf7,0x899e1d6b }, + { 0xb59eb2a6,0x69e87e88,0x9b47f38b,0x280d9d63,0x3654e955,0x599411ea, + 0x969aa581,0xcf8dd4fd,0x530742a7,0xff5c2baf,0x1a373085,0xa4391536 } }, + /* 234 */ + { { 0xa8a4bdd2,0x6ace72a3,0xb68ef702,0xc656cdd1,0x90c4dad8,0xd4a33e7e, + 0x9d951c50,0x4aece08a,0x085d68e6,0xea8005ae,0x6f7502b8,0xfdd7a7d7 }, + { 0x98d6fa45,0xce6fb0a6,0x1104eb8c,0x228f8672,0xda09d7dc,0xd23d8787, + 0x2ae93065,0x5521428b,0xea56c366,0x95faba3d,0x0a88aca5,0xedbe5039 } }, + /* 235 */ + { { 0xbfb26c82,0xd64da0ad,0x952c2f9c,0xe5d70b3c,0xf7e77f68,0xf5e8f365, + 0x08f2d695,0x7234e002,0xd12e7be6,0xfaf900ee,0x4acf734e,0x27dc6934 }, + { 0xc260a46a,0x80e4ff5e,0x2dc31c28,0x7da5ebce,0xca69f552,0x485c5d73, + 0x69cc84c2,0xcdfb6b29,0xed6d4eca,0x031c5afe,0x22247637,0xc7bbf4c8 } }, + /* 236 */ + { { 0x49fe01b2,0x9d5b72c7,0x793a91b8,0x34785186,0xcf460438,0xa3ba3c54, + 0x3ab21b6f,0x73e8e43d,0xbe57b8ab,0x50cde8e0,0xdd204264,0x6488b3a7 }, + { 0xdddc4582,0xa9e398b3,0x5bec46fe,0x1698c1a9,0x156d3843,0x7f1446ef, + 0x770329a2,0x3fd25dd8,0x2c710668,0x05b1221a,0xa72ee6cf,0x65b2dc2a } }, + /* 237 */ + { { 0xcd021d63,0x21a885f7,0xfea61f08,0x3f344b15,0xc5cf73e6,0xad5ba6dd, + 0x227a8b23,0x154d0d8f,0xdc559311,0x9b74373c,0x98620fa1,0x4feab715 }, + { 0x7d9ec924,0x5098938e,0x6d47e550,0x84d54a5e,0x1b617506,0x1a2d1bdc, + 0x615868a4,0x99fe1782,0x3005a924,0x171da780,0x7d8f79b6,0xa70bf5ed } }, + /* 238 */ + { { 0xfe2216c5,0x0bc1250d,0x7601b351,0x2c37e250,0xd6f06b7e,0xb6300175, + 0x8bfeb9b7,0x4dde8ca1,0xb82f843d,0x4f210432,0xb1ac0afd,0x8d70e2f9 }, + { 0xaae91abb,0x25c73b78,0x863028f2,0x0230dca3,0xe5cf30b7,0x8b923ecf, + 0x5506f265,0xed754ec2,0x729a5e39,0x8e41b88c,0xbabf889b,0xee67cec2 } }, + /* 239 */ + { { 0x1be46c65,0xe183acf5,0xe7565d7a,0x9789538f,0xd9627b4e,0x87873391, + 0x9f1d9187,0xbf4ac4c1,0x4691f5c8,0x5db99f63,0x74a1fb98,0xa68df803 }, + { 0xbf92b5fa,0x3c448ed1,0x3e0bdc32,0xa098c841,0x79bf016c,0x8e74cd55, + 0x115e244d,0x5df0d09c,0x3410b66e,0x9418ad01,0x17a02130,0x8b6124cb } }, + /* 240 */ + { { 0xc26e3392,0x425ec3af,0xa1722e00,0xc07f8470,0xe2356b43,0xdcc28190, + 0xb1ef59a6,0x4ed97dff,0xc63028c1,0xc22b3ad1,0x68c18988,0x070723c2 }, + { 0x4cf49e7d,0x70da302f,0x3f12a522,0xc5e87c93,0x18594148,0x74acdd1d, + 0xca74124c,0xad5f73ab,0xd69fd478,0xe72e4a3e,0x7b117cc3,0x61593868 } }, + /* 241 */ + { { 0xa9aa0486,0x7b7b9577,0xa063d557,0x6e41fb35,0xda9047d7,0xb017d5c7, + 0x68a87ba9,0x8c748280,0xdf08ad93,0xab45fa5c,0x4c288a28,0xcd9fb217 }, + { 0x5747843d,0x59544642,0xa56111e3,0x34d64c6c,0x4bfce8d5,0x12e47ea1, + 0x6169267f,0x17740e05,0xeed03fb5,0x5c49438e,0x4fc3f513,0x9da30add } }, + /* 242 */ + { { 0xccfa5200,0xc4e85282,0x6a19b13d,0x2707608f,0xf5726e2f,0xdcb9a53d, + 0xe9427de5,0x612407c9,0xd54d582a,0x3e5a17e1,0x655ae118,0xb99877de }, + { 0x015254de,0x6f0e972b,0xf0a6f7c5,0x92a56db1,0xa656f8b2,0xd297e4e1, + 0xad981983,0x99fe0052,0x07cfed84,0xd3652d2f,0x843c1738,0xc784352e } }, + /* 243 */ + { { 0x7e9b2d8a,0x6ee90af0,0x57cf1964,0xac8d7018,0x71f28efc,0xf6ed9031, + 0x6812b20e,0x7f70d5a9,0xf1c61eee,0x27b557f4,0xc6263758,0xf1c9bd57 }, + { 0x2a1a6194,0x5cf7d014,0x1890ab84,0xdd614e0b,0x0e93c2a6,0x3ef9de10, + 0xe0cd91c5,0xf98cf575,0x14befc32,0x504ec0c6,0x6279d68c,0xd0513a66 } }, + /* 244 */ + { { 0xa859fb6a,0xa8eadbad,0xdb283666,0xcf8346e7,0x3e22e355,0x7b35e61a, + 0x99639c6b,0x293ece2c,0x56f241c8,0xfa0162e2,0xbf7a1dda,0xd2e6c7b9 }, + { 0x40075e63,0xd0de6253,0xf9ec8286,0x2405aa61,0x8fe45494,0x2237830a, + 0x364e9c8c,0x4fd01ac7,0x904ba750,0x4d9c3d21,0xaf1b520b,0xd589be14 } }, + /* 245 */ + { { 0x4662e53b,0x13576a4f,0xf9077676,0x35ec2f51,0x97c0af97,0x66297d13, + 0x9e598b58,0xed3201fe,0x5e70f604,0x49bc752a,0xbb12d951,0xb54af535 }, + { 0x212c1c76,0x36ea4c2b,0xeb250dfd,0x18f5bbc7,0x9a0a1a46,0xa0d466cc, + 0xdac2d917,0x52564da4,0x8e95fab5,0x206559f4,0x9ca67a33,0x7487c190 } }, + /* 246 */ + { { 0xdde98e9c,0x75abfe37,0x2a411199,0x99b90b26,0xdcdb1f7c,0x1b410996, + 0x8b3b5675,0xab346f11,0xf1f8ae1e,0x04852193,0x6b8b98c1,0x1ec4d227 }, + { 0x45452baa,0xba3bc926,0xacc4a572,0x387d1858,0xe51f171e,0x9478eff6, + 0x931e1c00,0xf357077d,0xe54c8ca8,0xffee77cd,0x551dc9a4,0xfb4892ff } }, + /* 247 */ + { { 0x2db8dff8,0x5b1bdad0,0x5a2285a2,0xd462f4fd,0xda00b461,0x1d6aad8e, + 0x41306d1b,0x43fbefcf,0x6a13fe19,0x428e86f3,0x17f89404,0xc8b2f118 }, + { 0xf0d51afb,0x762528aa,0x549b1d06,0xa3e2fea4,0xea3ddf66,0x86fad8f2, + 0x4fbdd206,0x0d9ccc4b,0xc189ff5a,0xcde97d4c,0x199f19a6,0xc36793d6 } }, + /* 248 */ + { { 0x51b85197,0xea38909b,0xb4c92895,0xffb17dd0,0x1ddb3f3f,0x0eb0878b, + 0xc57cf0f2,0xb05d28ff,0x1abd57e2,0xd8bde2e7,0xc40c1b20,0x7f2be28d }, + { 0x299a2d48,0x6554dca2,0x8377982d,0x5130ba2e,0x1071971a,0x8863205f, + 0x7cf2825d,0x15ee6282,0x03748f2b,0xd4b6c57f,0x430385a0,0xa9e3f4da } }, + /* 249 */ + { { 0x83fbc9c6,0x33eb7cec,0x4541777e,0x24a311c7,0x4f0767fc,0xc81377f7, + 0x4ab702da,0x12adae36,0x2a779696,0xb7fcb6db,0x01cea6ad,0x4a6fb284 }, + { 0xcdfc73de,0x5e8b1d2a,0x1b02fd32,0xd0efae8d,0xd81d8519,0x3f99c190, + 0xfc808971,0x3c18f7fa,0x51b7ae7b,0x41f713e7,0xf07fc3f8,0x0a4b3435 } }, + /* 250 */ + { { 0x019b7d2e,0x7dda3c4c,0xd4dc4b89,0x631c8d1a,0x1cdb313c,0x5489cd6e, + 0x4c07bb06,0xd44aed10,0x75f000d1,0x8f97e13a,0xdda5df4d,0x0e9ee64f }, + { 0x3e346910,0xeaa99f3b,0xfa294ad7,0x622f6921,0x0d0b2fe9,0x22aaa20d, + 0x1e5881ba,0x4fed2f99,0xc1571802,0x9af3b2d6,0xdc7ee17c,0x919e67a8 } }, + /* 251 */ + { { 0x76250533,0xc724fe4c,0x7d817ef8,0x8a2080e5,0x172c9751,0xa2afb0f4, + 0x17c0702e,0x9b10cdeb,0xc9b7e3e9,0xbf3975e3,0x1cd0cdc5,0x206117df }, + { 0xbe05ebd5,0xfb049e61,0x16c782c0,0xeb0bb55c,0xab7fed09,0x13a331b8, + 0x632863f0,0xf6c58b1d,0x4d3b6195,0x6264ef6e,0x9a53f116,0x92c51b63 } }, + /* 252 */ + { { 0x288b364d,0xa57c7bc8,0x7b41e5c4,0x4a562e08,0x698a9a11,0x699d21c6, + 0xf3f849b9,0xa4ed9581,0x9eb726ba,0xa223eef3,0xcc2884f9,0x13159c23 }, + { 0x3a3f4963,0x73931e58,0x0ada6a81,0x96500389,0x5ab2950b,0x3ee8a1c6, + 0x775fab52,0xeedf4949,0x4f2671b6,0x63d652e1,0x3c4e2f55,0xfed4491c } }, + /* 253 */ + { { 0xf4eb453e,0x335eadc3,0xcadd1a5b,0x5ff74b63,0x5d84a91a,0x6933d0d7, + 0xb49ba337,0x9ca3eeb9,0xc04c15b8,0x1f6facce,0xdc09a7e4,0x4ef19326 }, + { 0x3dca3233,0x53d2d324,0xa2259d4b,0x0ee40590,0x5546f002,0x18c22edb, + 0x09ea6b71,0x92429801,0xb0e91e61,0xaada0add,0x99963c50,0x5fe53ef4 } }, + /* 254 */ + { { 0x90c28c65,0x372dd06b,0x119ce47d,0x1765242c,0x6b22fc82,0xc041fb80, + 0xb0a7ccc1,0x667edf07,0x1261bece,0xc79599e7,0x19cff22a,0xbc69d9ba }, + { 0x13c06819,0x009d77cd,0xe282b79d,0x635a66ae,0x225b1be8,0x4edac4a6, + 0x524008f9,0x57d4f4e4,0xb056af84,0xee299ac5,0x3a0bc386,0xcc38444c } }, + /* 255 */ + { { 0xcd4c2356,0x490643b1,0x750547be,0x740a4851,0xd4944c04,0x643eaf29, + 0x299a98a0,0xba572479,0xee05fdf9,0x48b29f16,0x089b2d7b,0x33fb4f61 }, + { 0xa950f955,0x86704902,0xfedc3ddf,0x97e1034d,0x05fbb6a2,0x211320b6, + 0x432299bb,0x23d7b93f,0x8590e4a3,0x1fe1a057,0xf58c0ce6,0x8e1d0586 } }, +}; + +/* Multiply the base point of P384 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_base_12(sp_point_384* r, const sp_digit* k, + int map, void* heap) +{ + return sp_384_ecc_mulmod_stripe_12(r, &p384_base, p384_table, + k, map, heap); +} + +#endif + +/* Multiply the base point of P384 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_base_384(mp_int* km, ecc_point* r, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 p; + sp_digit kd[12]; +#endif + sp_point_384* point; + sp_digit* k = NULL; + int err = MP_OKAY; + + err = sp_384_point_new_12(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) { + err = MEMORY_E; + } + } +#else + k = kd; +#endif + if (err == MP_OKAY) { + sp_384_from_mp(k, 12, km); + + err = sp_384_ecc_mulmod_base_12(point, k, map, heap); + } + if (err == MP_OKAY) { + err = sp_384_point_to_ecc_point_12(point, r); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_12(point, 0, heap); + + return err; +} + +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_384_iszero_12(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8] | a[9] | a[10] | a[11]) == 0; +} + +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */ +/* Add 1 to a. (a = a + 1) + * + * a A single precision integer. + */ +SP_NOINLINE static void sp_384_add_one_12(sp_digit* a) +{ + __asm__ __volatile__ ( + "mov r2, #1\n\t" + "ldr r1, [%[a], #0]\n\t" + "adds r1, r1, r2\n\t" + "mov r2, #0\n\t" + "str r1, [%[a], #0]\n\t" + "ldr r1, [%[a], #4]\n\t" + "adcs r1, r1, r2\n\t" + "str r1, [%[a], #4]\n\t" + "ldr r1, [%[a], #8]\n\t" + "adcs r1, r1, r2\n\t" + "str r1, [%[a], #8]\n\t" + "ldr r1, [%[a], #12]\n\t" + "adcs r1, r1, r2\n\t" + "str r1, [%[a], #12]\n\t" + "ldr r1, [%[a], #16]\n\t" + "adcs r1, r1, r2\n\t" + "str r1, [%[a], #16]\n\t" + "ldr r1, [%[a], #20]\n\t" + "adcs r1, r1, r2\n\t" + "str r1, [%[a], #20]\n\t" + "ldr r1, [%[a], #24]\n\t" + "adcs r1, r1, r2\n\t" + "str r1, [%[a], #24]\n\t" + "ldr r1, [%[a], #28]\n\t" + "adcs r1, r1, r2\n\t" + "str r1, [%[a], #28]\n\t" + "ldr r1, [%[a], #32]\n\t" + "adcs r1, r1, r2\n\t" + "str r1, [%[a], #32]\n\t" + "ldr r1, [%[a], #36]\n\t" + "adcs r1, r1, r2\n\t" + "str r1, [%[a], #36]\n\t" + "ldr r1, [%[a], #40]\n\t" + "adcs r1, r1, r2\n\t" + "str r1, [%[a], #40]\n\t" + "ldr r1, [%[a], #44]\n\t" + "adcs r1, r1, r2\n\t" + "str r1, [%[a], #44]\n\t" + : + : [a] "r" (a) + : "memory", "r1", "r2" + ); +} + +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_384_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((sp_digit)a[i]) << s); + if (s >= 24U) { + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (sp_digit)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Generates a scalar that is in the range 1..order-1. + * + * rng Random number generator. + * k Scalar value. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +static int sp_384_ecc_gen_k_12(WC_RNG* rng, sp_digit* k) +{ + int err; + byte buf[48]; + + do { + err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf)); + if (err == 0) { + sp_384_from_bin(k, 12, buf, (int)sizeof(buf)); + if (sp_384_cmp_12(k, p384_order2) < 0) { + sp_384_add_one_12(k); + break; + } + } + } + while (err == 0); + + return err; +} + +/* Makes a random EC key pair. + * + * rng Random number generator. + * priv Generated private value. + * pub Generated public point. + * heap Heap to use for allocation. + * returns ECC_INF_E when the point does not have the correct order, RNG + * failures, MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 p; + sp_digit kd[12]; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_384 inf; +#endif +#endif + sp_point_384* point; + sp_digit* k = NULL; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_384* infinity; +#endif + int err; + + (void)heap; + + err = sp_384_point_new_12(heap, p, point); +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + if (err == MP_OKAY) { + err = sp_384_point_new_12(heap, inf, infinity); + } +#endif +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) { + err = MEMORY_E; + } + } +#else + k = kd; +#endif + + if (err == MP_OKAY) { + err = sp_384_ecc_gen_k_12(rng, k); + } + if (err == MP_OKAY) { + err = sp_384_ecc_mulmod_base_12(point, k, 1, NULL); + } + +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + if (err == MP_OKAY) { + err = sp_384_ecc_mulmod_12(infinity, point, p384_order, 1, NULL); + } + if (err == MP_OKAY) { + if ((sp_384_iszero_12(point->x) == 0) || (sp_384_iszero_12(point->y) == 0)) { + err = ECC_INF_E; + } + } +#endif + + if (err == MP_OKAY) { + err = sp_384_to_mp(k, priv); + } + if (err == MP_OKAY) { + err = sp_384_point_to_ecc_point_12(point, pub); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_384_point_free_12(infinity, 1, heap); +#endif + sp_384_point_free_12(point, 1, heap); + + return err; +} + +#ifdef HAVE_ECC_DHE +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 48 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_384_to_bin(sp_digit* r, byte* a) +{ + int i, j, s = 0, b; + + j = 384 / 8 - 1; + a[j] = 0; + for (i=0; i<12 && j>=0; i++) { + b = 0; + /* lint allow cast of mismatch sp_digit and int */ + a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ + b += 8 - s; + if (j < 0) { + break; + } + while (b < 32) { + a[j--] = (byte)(r[i] >> b); + b += 8; + if (j < 0) { + break; + } + } + s = 8 - (b - 32); + if (j >= 0) { + a[j] = 0; + } + if (s != 0) { + j++; + } + } +} + +/* Multiply the point by the scalar and serialize the X ordinate. + * The number is 0 padded to maximum size on output. + * + * priv Scalar to multiply the point by. + * pub Point to multiply. + * out Buffer to hold X ordinate. + * outLen On entry, size of the buffer in bytes. + * On exit, length of data in buffer in bytes. + * heap Heap to use for allocation. + * returns BUFFER_E if the buffer is to small for output size, + * MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_secret_gen_384(mp_int* priv, ecc_point* pub, byte* out, + word32* outLen, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 p; + sp_digit kd[12]; +#endif + sp_point_384* point = NULL; + sp_digit* k = NULL; + int err = MP_OKAY; + + if (*outLen < 48U) { + err = BUFFER_E; + } + + if (err == MP_OKAY) { + err = sp_384_point_new_12(heap, p, point); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#else + k = kd; +#endif + + if (err == MP_OKAY) { + sp_384_from_mp(k, 12, priv); + sp_384_point_from_ecc_point_12(point, pub); + err = sp_384_ecc_mulmod_12(point, point, k, 1, heap); + } + if (err == MP_OKAY) { + sp_384_to_bin(point->x, out); + *outLen = 48; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_12(point, 0, heap); + + return err; +} +#endif /* HAVE_ECC_DHE */ + +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into a. (a -= b) + * + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_384_sub_in_place_12(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + __asm__ __volatile__ ( + "mov r8, %[a]\n\t" + "add r8, r8, #48\n\t" + "\n1:\n\t" + "mov r5, #0\n\t" + "subs r5, r5, %[c]\n\t" + "ldr r3, [%[a]]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b]]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a]]\n\t" + "str r4, [%[a], #4]\n\t" + "sbc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #8\n\t" + "add %[b], %[b], #8\n\t" + "cmp %[a], r8\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r8" + ); + + return c; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_384_sub_in_place_12(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "subs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "stm %[a]!, {r3, r4}\n\t" + "sbc %[c], %[c], %[c]\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +SP_NOINLINE static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a, + sp_digit b) +{ + __asm__ __volatile__ ( + "add r9, %[a], #48\n\t" + /* A[0] * B */ + "ldr r6, [%[a]], #4\n\t" + "umull r5, r3, r6, %[b]\n\t" + "mov r4, #0\n\t" + "str r5, [%[r]], #4\n\t" + /* A[0] * B - Done */ + "\n1:\n\t" + "mov r5, #0\n\t" + /* A[] * B */ + "ldr r6, [%[a]], #4\n\t" + "umull r6, r8, r6, %[b]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r8\n\t" + "adc r5, r5, #0\n\t" + /* A[] * B - Done */ + "str r3, [%[r]], #4\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "cmp %[a], r9\n\t" + "blt 1b\n\t" + "str r3, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a) + : [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r8", "r9" + ); +} + +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +SP_NOINLINE static sp_digit div_384_word_12(sp_digit d1, sp_digit d0, + sp_digit div) +{ + sp_digit r = 0; + + __asm__ __volatile__ ( + "lsr r6, %[div], #16\n\t" + "add r6, r6, #1\n\t" + "udiv r4, %[d1], r6\n\t" + "lsl r8, r4, #16\n\t" + "umull r4, r5, %[div], r8\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "udiv r5, %[d1], r6\n\t" + "lsl r4, r5, #16\n\t" + "add r8, r8, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "lsl r4, %[d1], #16\n\t" + "orr r4, r4, %[d0], lsr #16\n\t" + "udiv r4, r4, r6\n\t" + "add r8, r8, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "lsl r4, %[d1], #16\n\t" + "orr r4, r4, %[d0], lsr #16\n\t" + "udiv r4, r4, r6\n\t" + "add r8, r8, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "udiv r4, %[d0], %[div]\n\t" + "add r8, r8, r4\n\t" + "mov %[r], r8\n\t" + : [r] "+r" (r) + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) + : "r4", "r5", "r6", "r8" + ); + return r; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_384_mask_12(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<12; i++) { + r[i] = a[i] & m; + } +#else + r[0] = a[0] & m; + r[1] = a[1] & m; + r[2] = a[2] & m; + r[3] = a[3] & m; + r[4] = a[4] & m; + r[5] = a[5] & m; + r[6] = a[6] & m; + r[7] = a[7] & m; + r[8] = a[8] & m; + r[9] = a[9] & m; + r[10] = a[10] & m; + r[11] = a[11] & m; +#endif +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_384_div_12(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[24], t2[13]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[11]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 12); + for (i=11; i>=0; i--) { + r1 = div_384_word_12(t1[12 + i], t1[12 + i - 1], div); + + sp_384_mul_d_12(t2, d, r1); + t1[12 + i] += sp_384_sub_in_place_12(&t1[i], t2); + t1[12 + i] -= t2[12]; + sp_384_mask_12(t2, d, t1[12 + i]); + t1[12 + i] += sp_384_add_12(&t1[i], &t1[i], t2); + sp_384_mask_12(t2, d, t1[12 + i]); + t1[12 + i] += sp_384_add_12(&t1[i], &t1[i], t2); + } + + r1 = sp_384_cmp_12(t1, d) >= 0; + sp_384_cond_sub_12(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_384_mod_12(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_384_div_12(a, m, NULL, r); +} + +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#ifdef WOLFSSL_SP_SMALL +/* Order-2 for the P384 curve. */ +static const uint32_t p384_order_minus_2[12] = { + 0xccc52971U,0xecec196aU,0x48b0a77aU,0x581a0db2U,0xf4372ddfU,0xc7634d81U, + 0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU +}; +#else +/* The low half of the order-2 of the P384 curve. */ +static const uint32_t p384_order_low[6] = { + 0xccc52971U,0xecec196aU,0x48b0a77aU,0x581a0db2U,0xf4372ddfU,0xc7634d81U + +}; +#endif /* WOLFSSL_SP_SMALL */ + +/* Multiply two number mod the order of P384 curve. (r = a * b mod order) + * + * r Result of the multiplication. + * a First operand of the multiplication. + * b Second operand of the multiplication. + */ +static void sp_384_mont_mul_order_12(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_384_mul_12(r, a, b); + sp_384_mont_reduce_order_12(r, p384_order, p384_mp_order); +} + +/* Square number mod the order of P384 curve. (r = a * a mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_384_mont_sqr_order_12(sp_digit* r, const sp_digit* a) +{ + sp_384_sqr_12(r, a); + sp_384_mont_reduce_order_12(r, p384_order, p384_mp_order); +} + +#ifndef WOLFSSL_SP_SMALL +/* Square number mod the order of P384 curve a number of times. + * (r = a ^ n mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_384_mont_sqr_n_order_12(sp_digit* r, const sp_digit* a, int n) +{ + int i; + + sp_384_mont_sqr_order_12(r, a); + for (i=1; i=0; i--) { + sp_384_mont_sqr_order_12(t, t); + if ((p384_order_minus_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_384_mont_mul_order_12(t, t, a); + } + } + XMEMCPY(r, t, sizeof(sp_digit) * 12U); +#else + sp_digit* t = td; + sp_digit* t2 = td + 2 * 12; + sp_digit* t3 = td + 4 * 12; + int i; + + /* t = a^2 */ + sp_384_mont_sqr_order_12(t, a); + /* t = a^3 = t * a */ + sp_384_mont_mul_order_12(t, t, a); + /* t2= a^c = t ^ 2 ^ 2 */ + sp_384_mont_sqr_n_order_12(t2, t, 2); + /* t = a^f = t2 * t */ + sp_384_mont_mul_order_12(t, t2, t); + /* t2= a^f0 = t ^ 2 ^ 4 */ + sp_384_mont_sqr_n_order_12(t2, t, 4); + /* t = a^ff = t2 * t */ + sp_384_mont_mul_order_12(t, t2, t); + /* t2= a^ff00 = t ^ 2 ^ 8 */ + sp_384_mont_sqr_n_order_12(t2, t, 8); + /* t3= a^ffff = t2 * t */ + sp_384_mont_mul_order_12(t3, t2, t); + /* t2= a^ffff0000 = t3 ^ 2 ^ 16 */ + sp_384_mont_sqr_n_order_12(t2, t3, 16); + /* t = a^ffffffff = t2 * t3 */ + sp_384_mont_mul_order_12(t, t2, t3); + /* t2= a^ffffffff0000 = t ^ 2 ^ 16 */ + sp_384_mont_sqr_n_order_12(t2, t, 16); + /* t = a^ffffffffffff = t2 * t3 */ + sp_384_mont_mul_order_12(t, t2, t3); + /* t2= a^ffffffffffff000000000000 = t ^ 2 ^ 48 */ + sp_384_mont_sqr_n_order_12(t2, t, 48); + /* t= a^fffffffffffffffffffffffff = t2 * t */ + sp_384_mont_mul_order_12(t, t2, t); + /* t2= a^ffffffffffffffffffffffff000000000000000000000000 */ + sp_384_mont_sqr_n_order_12(t2, t, 96); + /* t2= a^ffffffffffffffffffffffffffffffffffffffffffffffff = t2 * t */ + sp_384_mont_mul_order_12(t2, t2, t); + for (i=191; i>=1; i--) { + sp_384_mont_sqr_order_12(t2, t2); + if (((sp_digit)p384_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_384_mont_mul_order_12(t2, t2, a); + } + } + sp_384_mont_sqr_order_12(t2, t2); + sp_384_mont_mul_order_12(r, t2, a); +#endif /* WOLFSSL_SP_SMALL */ +} + +#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */ +#ifdef HAVE_ECC_SIGN +#ifndef SP_ECC_MAX_SIG_GEN +#define SP_ECC_MAX_SIG_GEN 64 +#endif + +/* Sign the hash using the private key. + * e = [hash, 384 bits] from binary + * r = (k.G)->x mod order + * s = (r * x + e) / k mod order + * The hash is truncated to the first 384 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv, + mp_int* rm, mp_int* sm, mp_int* km, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit ed[2*12]; + sp_digit xd[2*12]; + sp_digit kd[2*12]; + sp_digit rd[2*12]; + sp_digit td[3 * 2*12]; + sp_point_384 p; +#endif + sp_digit* e = NULL; + sp_digit* x = NULL; + sp_digit* k = NULL; + sp_digit* r = NULL; + sp_digit* tmp = NULL; + sp_point_384* point = NULL; + sp_digit carry; + sp_digit* s = NULL; + sp_digit* kInv = NULL; + int err = MP_OKAY; + int32_t c; + int i; + + (void)heap; + + err = sp_384_point_new_12(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 12, heap, + DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + e = d + 0 * 12; + x = d + 2 * 12; + k = d + 4 * 12; + r = d + 6 * 12; + tmp = d + 8 * 12; +#else + e = ed; + x = xd; + k = kd; + r = rd; + tmp = td; +#endif + s = e; + kInv = k; + + if (hashLen > 48U) { + hashLen = 48U; + } + + sp_384_from_bin(e, 12, hash, (int)hashLen); + } + + for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) { + sp_384_from_mp(x, 12, priv); + + /* New random point. */ + if (km == NULL || mp_iszero(km)) { + err = sp_384_ecc_gen_k_12(rng, k); + } + else { + sp_384_from_mp(k, 12, km); + mp_zero(km); + } + if (err == MP_OKAY) { + err = sp_384_ecc_mulmod_base_12(point, k, 1, NULL); + } + + if (err == MP_OKAY) { + /* r = point->x mod order */ + XMEMCPY(r, point->x, sizeof(sp_digit) * 12U); + sp_384_norm_12(r); + c = sp_384_cmp_12(r, p384_order); + sp_384_cond_sub_12(r, r, p384_order, 0L - (sp_digit)(c >= 0)); + sp_384_norm_12(r); + + /* Conv k to Montgomery form (mod order) */ + sp_384_mul_12(k, k, p384_norm_order); + err = sp_384_mod_12(k, k, p384_order); + } + if (err == MP_OKAY) { + sp_384_norm_12(k); + /* kInv = 1/k mod order */ + sp_384_mont_inv_order_12(kInv, k, tmp); + sp_384_norm_12(kInv); + + /* s = r * x + e */ + sp_384_mul_12(x, x, r); + err = sp_384_mod_12(x, x, p384_order); + } + if (err == MP_OKAY) { + sp_384_norm_12(x); + carry = sp_384_add_12(s, e, x); + sp_384_cond_sub_12(s, s, p384_order, 0 - carry); + sp_384_norm_12(s); + c = sp_384_cmp_12(s, p384_order); + sp_384_cond_sub_12(s, s, p384_order, 0L - (sp_digit)(c >= 0)); + sp_384_norm_12(s); + + /* s = s * k^-1 mod order */ + sp_384_mont_mul_order_12(s, s, kInv); + sp_384_norm_12(s); + + /* Check that signature is usable. */ + if (sp_384_iszero_12(s) == 0) { + break; + } + } + } + + if (i == 0) { + err = RNG_FAILURE_E; + } + + if (err == MP_OKAY) { + err = sp_384_to_mp(r, rm); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(s, sm); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 8 * 12); + XFREE(d, heap, DYNAMIC_TYPE_ECC); + } +#else + XMEMSET(e, 0, sizeof(sp_digit) * 2U * 12U); + XMEMSET(x, 0, sizeof(sp_digit) * 2U * 12U); + XMEMSET(k, 0, sizeof(sp_digit) * 2U * 12U); + XMEMSET(r, 0, sizeof(sp_digit) * 2U * 12U); + XMEMSET(r, 0, sizeof(sp_digit) * 2U * 12U); + XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 12U); +#endif + sp_384_point_free_12(point, 1, heap); + + return err; +} +#endif /* HAVE_ECC_SIGN */ + +#ifdef HAVE_ECC_VERIFY +/* Verify the signature values with the hash and public key. + * e = Truncate(hash, 384) + * u1 = e/s mod order + * u2 = r/s mod order + * r == (u1.G + u2.Q)->x mod order + * Optimization: Leave point in projective form. + * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z') + * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' + * The hash is truncated to the first 384 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +int sp_ecc_verify_384(const byte* hash, word32 hashLen, mp_int* pX, + mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit u1d[2*12]; + sp_digit u2d[2*12]; + sp_digit sd[2*12]; + sp_digit tmpd[2*12 * 5]; + sp_point_384 p1d; + sp_point_384 p2d; +#endif + sp_digit* u1 = NULL; + sp_digit* u2 = NULL; + sp_digit* s = NULL; + sp_digit* tmp = NULL; + sp_point_384* p1; + sp_point_384* p2 = NULL; + sp_digit carry; + int32_t c; + int err; + + err = sp_384_point_new_12(heap, p1d, p1); + if (err == MP_OKAY) { + err = sp_384_point_new_12(heap, p2d, p2); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 12, heap, + DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + u1 = d + 0 * 12; + u2 = d + 2 * 12; + s = d + 4 * 12; + tmp = d + 6 * 12; +#else + u1 = u1d; + u2 = u2d; + s = sd; + tmp = tmpd; +#endif + + if (hashLen > 48U) { + hashLen = 48U; + } + + sp_384_from_bin(u1, 12, hash, (int)hashLen); + sp_384_from_mp(u2, 12, r); + sp_384_from_mp(s, 12, sm); + sp_384_from_mp(p2->x, 12, pX); + sp_384_from_mp(p2->y, 12, pY); + sp_384_from_mp(p2->z, 12, pZ); + + { + sp_384_mul_12(s, s, p384_norm_order); + } + err = sp_384_mod_12(s, s, p384_order); + } + if (err == MP_OKAY) { + sp_384_norm_12(s); + { + sp_384_mont_inv_order_12(s, s, tmp); + sp_384_mont_mul_order_12(u1, u1, s); + sp_384_mont_mul_order_12(u2, u2, s); + } + + err = sp_384_ecc_mulmod_base_12(p1, u1, 0, heap); + } + if (err == MP_OKAY) { + err = sp_384_ecc_mulmod_12(p2, p2, u2, 0, heap); + } + + if (err == MP_OKAY) { + { + sp_384_proj_point_add_12(p1, p1, p2, tmp); + if (sp_384_iszero_12(p1->z)) { + if (sp_384_iszero_12(p1->x) && sp_384_iszero_12(p1->y)) { + sp_384_proj_point_dbl_12(p1, p2, tmp); + } + else { + /* Y ordinate is not used from here - don't set. */ + p1->x[0] = 0; + p1->x[1] = 0; + p1->x[2] = 0; + p1->x[3] = 0; + p1->x[4] = 0; + p1->x[5] = 0; + p1->x[6] = 0; + p1->x[7] = 0; + p1->x[8] = 0; + p1->x[9] = 0; + p1->x[10] = 0; + p1->x[11] = 0; + XMEMCPY(p1->z, p384_norm_mod, sizeof(p384_norm_mod)); + } + } + } + + /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ + /* Reload r and convert to Montgomery form. */ + sp_384_from_mp(u2, 12, r); + err = sp_384_mod_mul_norm_12(u2, u2, p384_mod); + } + + if (err == MP_OKAY) { + /* u1 = r.z'.z' mod prime */ + sp_384_mont_sqr_12(p1->z, p1->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(u1, u2, p1->z, p384_mod, p384_mp_mod); + *res = (int)(sp_384_cmp_12(p1->x, u1) == 0); + if (*res == 0) { + /* Reload r and add order. */ + sp_384_from_mp(u2, 12, r); + carry = sp_384_add_12(u2, u2, p384_order); + /* Carry means result is greater than mod and is not valid. */ + if (carry == 0) { + sp_384_norm_12(u2); + + /* Compare with mod and if greater or equal then not valid. */ + c = sp_384_cmp_12(u2, p384_mod); + if (c < 0) { + /* Convert to Montogomery form */ + err = sp_384_mod_mul_norm_12(u2, u2, p384_mod); + if (err == MP_OKAY) { + /* u1 = (r + 1*order).z'.z' mod prime */ + sp_384_mont_mul_12(u1, u2, p1->z, p384_mod, + p384_mp_mod); + *res = (int)(sp_384_cmp_12(p1->x, u1) == 0); + } + } + } + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) + XFREE(d, heap, DYNAMIC_TYPE_ECC); +#endif + sp_384_point_free_12(p1, 0, heap); + sp_384_point_free_12(p2, 0, heap); + + return err; +} +#endif /* HAVE_ECC_VERIFY */ + +#ifdef HAVE_ECC_CHECK_KEY +/* Check that the x and y oridinates are a valid point on the curve. + * + * point EC point. + * heap Heap to use if dynamically allocating. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +static int sp_384_ecc_is_point_12(sp_point_384* point, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit t1d[2*12]; + sp_digit t2d[2*12]; +#endif + sp_digit* t1; + sp_digit* t2; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12 * 4, heap, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = d + 0 * 12; + t2 = d + 2 * 12; +#else + (void)heap; + + t1 = t1d; + t2 = t2d; +#endif + + sp_384_sqr_12(t1, point->y); + (void)sp_384_mod_12(t1, t1, p384_mod); + sp_384_sqr_12(t2, point->x); + (void)sp_384_mod_12(t2, t2, p384_mod); + sp_384_mul_12(t2, t2, point->x); + (void)sp_384_mod_12(t2, t2, p384_mod); + (void)sp_384_sub_12(t2, p384_mod, t2); + sp_384_mont_add_12(t1, t1, t2, p384_mod); + + sp_384_mont_add_12(t1, t1, point->x, p384_mod); + sp_384_mont_add_12(t1, t1, point->x, p384_mod); + sp_384_mont_add_12(t1, t1, point->x, p384_mod); + + if (sp_384_cmp_12(t1, p384_b) != 0) { + err = MP_VAL; + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, heap, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + +/* Check that the x and y oridinates are a valid point on the curve. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +int sp_ecc_is_point_384(mp_int* pX, mp_int* pY) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 pubd; +#endif + sp_point_384* pub; + byte one[1] = { 1 }; + int err; + + err = sp_384_point_new_12(NULL, pubd, pub); + if (err == MP_OKAY) { + sp_384_from_mp(pub->x, 12, pX); + sp_384_from_mp(pub->y, 12, pY); + sp_384_from_bin(pub->z, 12, one, (int)sizeof(one)); + + err = sp_384_ecc_is_point_12(pub, NULL); + } + + sp_384_point_free_12(pub, 0, NULL); + + return err; +} + +/* Check that the private scalar generates the EC point (px, py), the point is + * on the curve and the point has the correct order. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * privm Private scalar that generates EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve, ECC_INF_E if the point does not have the correct order, + * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and + * MP_OKAY otherwise. + */ +int sp_ecc_check_key_384(mp_int* pX, mp_int* pY, mp_int* privm, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit privd[12]; + sp_point_384 pubd; + sp_point_384 pd; +#endif + sp_digit* priv = NULL; + sp_point_384* pub; + sp_point_384* p = NULL; + byte one[1] = { 1 }; + int err; + + err = sp_384_point_new_12(heap, pubd, pub); + if (err == MP_OKAY) { + err = sp_384_point_new_12(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap, + DYNAMIC_TYPE_ECC); + if (priv == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + priv = privd; +#endif + + sp_384_from_mp(pub->x, 12, pX); + sp_384_from_mp(pub->y, 12, pY); + sp_384_from_bin(pub->z, 12, one, (int)sizeof(one)); + sp_384_from_mp(priv, 12, privm); + + /* Check point at infinitiy. */ + if ((sp_384_iszero_12(pub->x) != 0) && + (sp_384_iszero_12(pub->y) != 0)) { + err = ECC_INF_E; + } + } + + if (err == MP_OKAY) { + /* Check range of X and Y */ + if (sp_384_cmp_12(pub->x, p384_mod) >= 0 || + sp_384_cmp_12(pub->y, p384_mod) >= 0) { + err = ECC_OUT_OF_RANGE_E; + } + } + + if (err == MP_OKAY) { + /* Check point is on curve */ + err = sp_384_ecc_is_point_12(pub, heap); + } + + if (err == MP_OKAY) { + /* Point * order = infinity */ + err = sp_384_ecc_mulmod_12(p, pub, p384_order, 1, heap); + } + if (err == MP_OKAY) { + /* Check result is infinity */ + if ((sp_384_iszero_12(p->x) == 0) || + (sp_384_iszero_12(p->y) == 0)) { + err = ECC_INF_E; + } + } + + if (err == MP_OKAY) { + /* Base * private = point */ + err = sp_384_ecc_mulmod_base_12(p, priv, 1, heap); + } + if (err == MP_OKAY) { + /* Check result is public key */ + if (sp_384_cmp_12(p->x, pub->x) != 0 || + sp_384_cmp_12(p->y, pub->y) != 0) { + err = ECC_PRIV_KEY_E; + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (priv != NULL) { + XFREE(priv, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_12(p, 0, heap); + sp_384_point_free_12(pub, 0, heap); + + return err; +} +#endif +#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL +/* Add two projective EC points together. + * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ) + * + * pX First EC point's X ordinate. + * pY First EC point's Y ordinate. + * pZ First EC point's Z ordinate. + * qX Second EC point's X ordinate. + * qY Second EC point's Y ordinate. + * qZ Second EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* qX, mp_int* qY, mp_int* qZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 12 * 5]; + sp_point_384 pd; + sp_point_384 qd; +#endif + sp_digit* tmp; + sp_point_384* p; + sp_point_384* q = NULL; + int err; + + err = sp_384_point_new_12(NULL, pd, p); + if (err == MP_OKAY) { + err = sp_384_point_new_12(NULL, qd, q); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 5, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + sp_384_from_mp(p->x, 12, pX); + sp_384_from_mp(p->y, 12, pY); + sp_384_from_mp(p->z, 12, pZ); + sp_384_from_mp(q->x, 12, qX); + sp_384_from_mp(q->y, 12, qY); + sp_384_from_mp(q->z, 12, qZ); + + sp_384_proj_point_add_12(p, p, q, tmp); + } + + if (err == MP_OKAY) { + err = sp_384_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->z, rZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_12(q, 0, NULL); + sp_384_point_free_12(p, 0, NULL); + + return err; +} + +/* Double a projective EC point. + * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ) + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 12 * 2]; + sp_point_384 pd; +#endif + sp_digit* tmp; + sp_point_384* p; + int err; + + err = sp_384_point_new_12(NULL, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 2, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + sp_384_from_mp(p->x, 12, pX); + sp_384_from_mp(p->y, 12, pY); + sp_384_from_mp(p->z, 12, pZ); + + sp_384_proj_point_dbl_12(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_384_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->z, rZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_12(p, 0, NULL); + + return err; +} + +/* Map a projective EC point to affine in place. + * pZ will be one. + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 12 * 6]; + sp_point_384 pd; +#endif + sp_digit* tmp; + sp_point_384* p; + int err; + + err = sp_384_point_new_12(NULL, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + if (err == MP_OKAY) { + sp_384_from_mp(p->x, 12, pX); + sp_384_from_mp(p->y, 12, pY); + sp_384_from_mp(p->z, 12, pZ); + + sp_384_map_12(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_384_to_mp(p->x, pX); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->y, pY); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->z, pZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_12(p, 0, NULL); + + return err; +} +#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */ +#ifdef HAVE_COMP_KEY +/* Find the square root of a number mod the prime of the curve. + * + * y The number to operate on and the result. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +static int sp_384_mont_sqrt_12(sp_digit* y) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d; +#else + sp_digit t1d[2 * 12]; + sp_digit t2d[2 * 12]; + sp_digit t3d[2 * 12]; + sp_digit t4d[2 * 12]; + sp_digit t5d[2 * 12]; +#endif + sp_digit* t1; + sp_digit* t2; + sp_digit* t3; + sp_digit* t4; + sp_digit* t5; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 2 * 12, NULL, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = d + 0 * 12; + t2 = d + 2 * 12; + t3 = d + 4 * 12; + t4 = d + 6 * 12; + t5 = d + 8 * 12; +#else + t1 = t1d; + t2 = t2d; + t3 = t3d; + t4 = t4d; + t5 = t5d; +#endif + + { + /* t2 = y ^ 0x2 */ + sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x3 */ + sp_384_mont_mul_12(t1, t2, y, p384_mod, p384_mp_mod); + /* t5 = y ^ 0xc */ + sp_384_mont_sqr_n_12(t5, t1, 2, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xf */ + sp_384_mont_mul_12(t1, t1, t5, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x1e */ + sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod); + /* t3 = y ^ 0x1f */ + sp_384_mont_mul_12(t3, t2, y, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3e0 */ + sp_384_mont_sqr_n_12(t2, t3, 5, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x3ff */ + sp_384_mont_mul_12(t1, t3, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x7fe0 */ + sp_384_mont_sqr_n_12(t2, t1, 5, p384_mod, p384_mp_mod); + /* t3 = y ^ 0x7fff */ + sp_384_mont_mul_12(t3, t3, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3fff800 */ + sp_384_mont_sqr_n_12(t2, t3, 15, p384_mod, p384_mp_mod); + /* t4 = y ^ 0x3ffffff */ + sp_384_mont_mul_12(t4, t3, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0xffffffc000000 */ + sp_384_mont_sqr_n_12(t2, t4, 30, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xfffffffffffff */ + sp_384_mont_mul_12(t1, t4, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0xfffffffffffffff000000000000000 */ + sp_384_mont_sqr_n_12(t2, t1, 60, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xffffffffffffffffffffffffffffff */ + sp_384_mont_mul_12(t1, t1, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */ + sp_384_mont_sqr_n_12(t2, t1, 120, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_384_mont_mul_12(t1, t1, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */ + sp_384_mont_sqr_n_12(t2, t1, 15, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_384_mont_mul_12(t1, t3, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff80000000 */ + sp_384_mont_sqr_n_12(t2, t1, 31, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff */ + sp_384_mont_mul_12(t1, t4, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff0 */ + sp_384_mont_sqr_n_12(t2, t1, 4, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc */ + sp_384_mont_mul_12(t1, t5, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000 */ + sp_384_mont_sqr_n_12(t2, t1, 62, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000001 */ + sp_384_mont_mul_12(t1, y, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc00000000000000040000000 */ + sp_384_mont_sqr_n_12(y, t1, 30, p384_mod, p384_mp_mod); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + + +/* Uncompress the point given the X ordinate. + * + * xm X ordinate. + * odd Whether the Y ordinate is odd. + * ym Calculated Y ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d; +#else + sp_digit xd[2 * 12]; + sp_digit yd[2 * 12]; +#endif + sp_digit* x = NULL; + sp_digit* y = NULL; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 12, NULL, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + x = d + 0 * 12; + y = d + 2 * 12; +#else + x = xd; + y = yd; +#endif + + sp_384_from_mp(x, 12, xm); + err = sp_384_mod_mul_norm_12(x, x, p384_mod); + } + if (err == MP_OKAY) { + /* y = x^3 */ + { + sp_384_mont_sqr_12(y, x, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(y, y, x, p384_mod, p384_mp_mod); + } + /* y = x^3 - 3x */ + sp_384_mont_sub_12(y, y, x, p384_mod); + sp_384_mont_sub_12(y, y, x, p384_mod); + sp_384_mont_sub_12(y, y, x, p384_mod); + /* y = x^3 - 3x + b */ + err = sp_384_mod_mul_norm_12(x, p384_b, p384_mod); + } + if (err == MP_OKAY) { + sp_384_mont_add_12(y, y, x, p384_mod); + /* y = sqrt(x^3 - 3x + b) */ + err = sp_384_mont_sqrt_12(y); + } + if (err == MP_OKAY) { + XMEMSET(y + 12, 0, 12U * sizeof(sp_digit)); + sp_384_mont_reduce_12(y, p384_mod, p384_mp_mod); + if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) { + sp_384_mont_sub_12(y, p384_mod, y, p384_mod); + } + + err = sp_384_to_mp(y, ym); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} +#endif +#endif /* WOLFSSL_SP_384 */ +#endif /* WOLFSSL_HAVE_SP_ECC */ +#endif /* WOLFSSL_SP_ARM_CORTEX_M_ASM */ +#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_ECC */ diff --git a/client/wolfssl/wolfcrypt/src/sp_dsp32.c b/client/wolfssl/wolfcrypt/src/sp_dsp32.c new file mode 100644 index 0000000..ef95c06 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/sp_dsp32.c @@ -0,0 +1,4908 @@ +/* sp_cdsp_signed.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* from wolfcrypt/src/sp_c32.c */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include +#include +#include +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#if defined(WOLFSSL_HAVE_SP_ECC) +#ifdef WOLFSSL_DSP + +#include +#include "remote.h" +#include "hexagon_protos.h" +#include "hexagon_types.h" + +#if (defined(WOLFSSL_SP_CACHE_RESISTANT) || defined(WOLFSSL_SP_SMALL)) && (defined(WOLFSSL_HAVE_SP_ECC) || !defined(WOLFSSL_RSA_PUBLIC_ONLY)) +/* Mask for address to obfuscate which of the two address will be used. */ +static const size_t addr_mask[2] = { 0, (size_t)-1 }; +#endif + +#ifdef WOLFSSL_HAVE_SP_ECC +#ifndef WOLFSSL_SP_NO_256 + +/* Point structure to use. */ +typedef struct sp_point { + sp_digit x[2 * 10] __attribute__((aligned(128))); + sp_digit y[2 * 10] __attribute__((aligned(128))); + sp_digit z[2 * 10] __attribute__((aligned(128))); + int infinity; +} sp_point; + +/* The modulus (prime) of the curve P256. */ +static const sp_digit p256_mod[10] __attribute__((aligned(128))) = { + 0x3ffffff,0x3ffffff,0x3ffffff,0x003ffff,0x0000000,0x0000000,0x0000000, + 0x0000400,0x3ff0000,0x03fffff +}; +#ifndef WOLFSSL_SP_SMALL +/* The Montogmery normalizer for modulus of the curve P256. */ +static const sp_digit p256_norm_mod[10] __attribute__((aligned(128))) = { + 0x0000001,0x0000000,0x0000000,0x3fc0000,0x3ffffff,0x3ffffff,0x3ffffff, + 0x3fffbff,0x000ffff,0x0000000 +}; +#endif /* WOLFSSL_SP_SMALL */ +/* The Montogmery multiplier for modulus of the curve P256. */ +static const sp_digit p256_mp_mod __attribute__((aligned(128))) = 0x000001; +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +/* The order of the curve P256. */ +static const sp_digit p256_order[10] __attribute__((aligned(128))) = { + 0x0632551,0x272b0bf,0x1e84f3b,0x2b69c5e,0x3bce6fa,0x3ffffff,0x3ffffff, + 0x00003ff,0x3ff0000,0x03fffff +}; +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montogmery normalizer for order of the curve P256. */ +static const sp_digit p256_norm_order[10] __attribute__((aligned(128))) = { + 0x39cdaaf,0x18d4f40,0x217b0c4,0x14963a1,0x0431905,0x0000000,0x0000000, + 0x3fffc00,0x000ffff,0x0000000 +}; +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montogmery multiplier for order of the curve P256. */ +static const sp_digit p256_mp_order __attribute__((aligned(128))) = 0x200bc4f; +#endif +/* The base point of curve P256. */ +static const sp_point p256_base __attribute__((aligned(128))) = { + /* X ordinate */ + { + 0x098c296,0x04e5176,0x33a0f4a,0x204b7ac,0x277037d,0x0e9103c,0x3ce6e56, + 0x1091fe2,0x1f2e12c,0x01ac5f4, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L + }, + /* Y ordinate */ + { + 0x3bf51f5,0x1901a0d,0x1ececbb,0x15dacc5,0x22bce33,0x303e785,0x27eb4a7, + 0x1fe6e3b,0x2e2fe1a,0x013f8d0, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L + }, + /* Z ordinate */ + { + 0x0000001,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000, + 0x0000000,0x0000000,0x0000000, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L + }, + /* infinity */ + 0 +}; + +static int sp_ecc_point_new_ex(void* heap, sp_point* sp, sp_point** p) +{ + int ret = MP_OKAY; + (void)heap; +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + (void)sp; + *p = (sp_point*)XMALLOC(sizeof(sp_point), heap, DYNAMIC_TYPE_ECC); +#else + *p = sp; +#endif + if (p == NULL) { + ret = MEMORY_E; + } + return ret; +} + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) +/* Allocate memory for point and return error. */ +#define sp_ecc_point_new(heap, sp, p) sp_ecc_point_new_ex((heap), NULL, &(p)) +#else +/* Set pointer to data and return no error. */ +#define sp_ecc_point_new(heap, sp, p) sp_ecc_point_new_ex((heap), &(sp), &(p)) +#endif + + +static void sp_ecc_point_free(sp_point* p, int clear, void* heap) +{ +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) +/* If valid pointer then clear point data if requested and free data. */ + if (p != NULL) { + if (clear != 0) { + XMEMSET(p, 0, sizeof(*p)); + } + XFREE(p, heap, DYNAMIC_TYPE_ECC); + } +#else +/* Clear point data if requested. */ + if (clear != 0) { + XMEMSET(p, 0, sizeof(*p)); + } +#endif + (void)heap; +} + +/* Multiply a number by Montogmery normalizer mod modulus (prime). + * + * r The resulting Montgomery form number. + * a The number to convert. + * m The modulus (prime). + * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise. + */ +static int sp_256_mod_mul_norm_10(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + int64_t* td; +#else + int64_t td[8]; + int64_t a32d[8]; +#endif + int64_t* t; + int64_t* a32; + int64_t o; + int err = MP_OKAY; + + (void)m; + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + td = (int64_t*)XMALLOC(sizeof(int64_t) * 2 * 8, NULL, DYNAMIC_TYPE_ECC); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + t = td; + a32 = td + 8; +#else + t = td; + a32 = a32d; +#endif + + a32[0] = a[0]; + a32[0] |= a[1] << 26U; + a32[0] &= 0xffffffffL; + a32[1] = (sp_digit)(a[1] >> 6); + a32[1] |= a[2] << 20U; + a32[1] &= 0xffffffffL; + a32[2] = (sp_digit)(a[2] >> 12); + a32[2] |= a[3] << 14U; + a32[2] &= 0xffffffffL; + a32[3] = (sp_digit)(a[3] >> 18); + a32[3] |= a[4] << 8U; + a32[3] &= 0xffffffffL; + a32[4] = (sp_digit)(a[4] >> 24); + a32[4] |= a[5] << 2U; + a32[4] |= a[6] << 28U; + a32[4] &= 0xffffffffL; + a32[5] = (sp_digit)(a[6] >> 4); + a32[5] |= a[7] << 22U; + a32[5] &= 0xffffffffL; + a32[6] = (sp_digit)(a[7] >> 10); + a32[6] |= a[8] << 16U; + a32[6] &= 0xffffffffL; + a32[7] = (sp_digit)(a[8] >> 16); + a32[7] |= a[9] << 10U; + a32[7] &= 0xffffffffL; + + /* 1 1 0 -1 -1 -1 -1 0 */ + t[0] = 0 + a32[0] + a32[1] - a32[3] - a32[4] - a32[5] - a32[6]; + /* 0 1 1 0 -1 -1 -1 -1 */ + t[1] = 0 + a32[1] + a32[2] - a32[4] - a32[5] - a32[6] - a32[7]; + /* 0 0 1 1 0 -1 -1 -1 */ + t[2] = 0 + a32[2] + a32[3] - a32[5] - a32[6] - a32[7]; + /* -1 -1 0 2 2 1 0 -1 */ + t[3] = 0 - a32[0] - a32[1] + 2 * a32[3] + 2 * a32[4] + a32[5] - a32[7]; + /* 0 -1 -1 0 2 2 1 0 */ + t[4] = 0 - a32[1] - a32[2] + 2 * a32[4] + 2 * a32[5] + a32[6]; + /* 0 0 -1 -1 0 2 2 1 */ + t[5] = 0 - a32[2] - a32[3] + 2 * a32[5] + 2 * a32[6] + a32[7]; + /* -1 -1 0 0 0 1 3 2 */ + t[6] = 0 - a32[0] - a32[1] + a32[5] + 3 * a32[6] + 2 * a32[7]; + /* 1 0 -1 -1 -1 -1 0 3 */ + t[7] = 0 + a32[0] - a32[2] - a32[3] - a32[4] - a32[5] + 3 * a32[7]; + + t[1] += t[0] >> 32U; t[0] &= 0xffffffffL; + t[2] += t[1] >> 32U; t[1] &= 0xffffffffL; + t[3] += t[2] >> 32U; t[2] &= 0xffffffffL; + t[4] += t[3] >> 32U; t[3] &= 0xffffffffL; + t[5] += t[4] >> 32U; t[4] &= 0xffffffffL; + t[6] += t[5] >> 32U; t[5] &= 0xffffffffL; + t[7] += t[6] >> 32U; t[6] &= 0xffffffffL; + o = t[7] >> 32U; t[7] &= 0xffffffffL; + t[0] += o; + t[3] -= o; + t[6] -= o; + t[7] += o; + t[1] += t[0] >> 32U; t[0] &= 0xffffffffL; + t[2] += t[1] >> 32U; t[1] &= 0xffffffffL; + t[3] += t[2] >> 32U; t[2] &= 0xffffffffL; + t[4] += t[3] >> 32U; t[3] &= 0xffffffffL; + t[5] += t[4] >> 32U; t[4] &= 0xffffffffL; + t[6] += t[5] >> 32U; t[5] &= 0xffffffffL; + t[7] += t[6] >> 32U; t[6] &= 0xffffffffL; + + r[0] = (sp_digit)(t[0]) & 0x3ffffffL; + r[1] = (sp_digit)(t[0] >> 26U); + r[1] |= t[1] << 6U; + r[1] &= 0x3ffffffL; + r[2] = (sp_digit)(t[1] >> 20U); + r[2] |= t[2] << 12U; + r[2] &= 0x3ffffffL; + r[3] = (sp_digit)(t[2] >> 14U); + r[3] |= t[3] << 18U; + r[3] &= 0x3ffffffL; + r[4] = (sp_digit)(t[3] >> 8U); + r[4] |= t[4] << 24U; + r[4] &= 0x3ffffffL; + r[5] = (sp_digit)(t[4] >> 2U) & 0x3ffffffL; + r[6] = (sp_digit)(t[4] >> 28U); + r[6] |= t[5] << 4U; + r[6] &= 0x3ffffffL; + r[7] = (sp_digit)(t[5] >> 22U); + r[7] |= t[6] << 10U; + r[7] &= 0x3ffffffL; + r[8] = (sp_digit)(t[6] >> 16U); + r[8] |= t[7] << 16U; + r[8] &= 0x3ffffffL; + r[9] = (sp_digit)(t[7] >> 10U); + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static sp_digit sp_256_cmp_10(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=9; i>=0; i--) { + r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#else + r |= (a[ 9] - b[ 9]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 8] - b[ 8]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 7] - b[ 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 6] - b[ 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 5] - b[ 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 4] - b[ 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 3] - b[ 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 2] - b[ 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 1] - b[ 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[ 0] - b[ 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); +#endif /* WOLFSSL_SP_SMALL */ + + return r; +} + +/* Normalize the values in each word to 26. + * + * a Array of sp_digit to normalize. + */ +static void sp_256_norm_10(sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + for (i = 0; i < 9; i++) { + a[i+1] += a[i] >> 26; + a[i] &= 0x3ffffff; + } +#else + a[1] += a[0] >> 26; a[0] = Q6_R_and_RR(a[0], 0x3ffffff); + a[2] += a[1] >> 26; a[1] = Q6_R_and_RR(a[1], 0x3ffffff); + a[3] += a[2] >> 26; a[2] = Q6_R_and_RR(a[2], 0x3ffffff); + a[4] += a[3] >> 26; a[3] = Q6_R_and_RR(a[3], 0x3ffffff); + a[5] += a[4] >> 26; a[4] = Q6_R_and_RR(a[4], 0x3ffffff); + a[6] += a[5] >> 26; a[5] = Q6_R_and_RR(a[5], 0x3ffffff); + a[7] += a[6] >> 26; a[6] = Q6_R_and_RR(a[6], 0x3ffffff); + a[8] += a[7] >> 26; a[7] = Q6_R_and_RR(a[7], 0x3ffffff); + a[9] += a[8] >> 26; a[8] = Q6_R_and_RR(a[8], 0x3ffffff); +#endif +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static void sp_256_cond_sub_10(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 10; i++) { + r[i] = a[i] - (b[i] & m); + } +#else + r[ 0] = Q6_R_sub_RR(a[ 0], Q6_R_and_RR(b[ 0], m)); + r[ 1] = Q6_R_sub_RR(a[ 1], Q6_R_and_RR(b[ 1], m)); + r[ 2] = Q6_R_sub_RR(a[ 2], Q6_R_and_RR(b[ 2], m)); + r[ 3] = Q6_R_sub_RR(a[ 3], Q6_R_and_RR(b[ 3], m)); + r[ 4] = Q6_R_sub_RR(a[ 4], Q6_R_and_RR(b[ 4], m)); + r[ 5] = Q6_R_sub_RR(a[ 5], Q6_R_and_RR(b[ 5], m)); + r[ 6] = Q6_R_sub_RR(a[ 6], Q6_R_and_RR(b[ 6], m)); + r[ 7] = Q6_R_sub_RR(a[ 7], Q6_R_and_RR(b[ 7], m)); + r[ 8] = Q6_R_sub_RR(a[ 8], Q6_R_and_RR(b[ 8], m)); + r[ 9] = Q6_R_sub_RR(a[ 9], Q6_R_and_RR(b[ 9], m)); +#endif /* WOLFSSL_SP_SMALL */ +} + +#define sp_256_mont_reduce_order_10 sp_256_mont_reduce_10 + +/* Mul a by scalar b and add into r. (r += a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_256_mul_add_10(sp_digit* r, const sp_digit* a, + const sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int64_t tb = b; + int64_t t = 0; + int i; + + for (i = 0; i < 10; i++) { + t += (tb * a[i]) + r[i]; + r[i] = t & 0x3ffffff; + t >>= 26; + } + r[10] += t; +#else + int64_t tb = b; + int64_t t[10]; + + t[ 0] = Q6_P_mpy_RR(tb, a[ 0]); + t[ 1] = Q6_P_mpy_RR(tb, a[ 1]); + t[ 2] = Q6_P_mpy_RR(tb, a[ 2]); + t[ 3] = Q6_P_mpy_RR(tb, a[ 3]); + t[ 4] = Q6_P_mpy_RR(tb, a[ 4]); + t[ 5] = Q6_P_mpy_RR(tb, a[ 5]); + t[ 6] = Q6_P_mpy_RR(tb, a[ 6]); + t[ 7] = Q6_P_mpy_RR(tb, a[ 7]); + t[ 8] = Q6_P_mpy_RR(tb, a[ 8]); + t[ 9] = Q6_P_mpy_RR(tb, a[ 9]); + r[ 0] += (t[ 0] & 0x3ffffff); + r[ 1] += (t[ 0] >> 26) + (t[ 1] & 0x3ffffff); + r[ 2] += (t[ 1] >> 26) + (t[ 2] & 0x3ffffff); + r[ 3] += (t[ 2] >> 26) + (t[ 3] & 0x3ffffff); + r[ 4] += (t[ 3] >> 26) + (t[ 4] & 0x3ffffff); + r[ 5] += (t[ 4] >> 26) + (t[ 5] & 0x3ffffff); + r[ 6] += (t[ 5] >> 26) + (t[ 6] & 0x3ffffff); + r[ 7] += (t[ 6] >> 26) + (t[ 7] & 0x3ffffff); + r[ 8] += (t[ 7] >> 26) + (t[ 8] & 0x3ffffff); + r[ 9] += (t[ 8] >> 26) + (t[ 9] & 0x3ffffff); + r[10] += t[ 9] >> 26; +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Shift the result in the high 256 bits down to the bottom. + * + * r A single precision number. + * a A single precision number. + */ +static void sp_256_mont_shift_10(sp_digit* r, const sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + sp_digit n, s; + + s = a[10]; + n = a[9] >> 22; + for (i = 0; i < 9; i++) { + n += (s & 0x3ffffff) << 4; + r[i] = n & 0x3ffffff; + n >>= 26; + s = a[11 + i] + (s >> 26); + } + n += s << 4; + r[9] = n; +#else + sp_digit n, s; + + s = a[10]; n = a[9] >> 22; + n += (s & 0x3ffffff) << 4; r[ 0] = Q6_R_and_RR(n, 0x3ffffff); + n >>= 26; s = a[11] + (s >> 26); + n += (s & 0x3ffffff) << 4; r[ 1] = Q6_R_and_RR(n, 0x3ffffff); + n >>= 26; s = a[12] + (s >> 26); + n += (s & 0x3ffffff) << 4; r[ 2] = Q6_R_and_RR(n, 0x3ffffff); + n >>= 26; s = a[13] + (s >> 26); + n += (s & 0x3ffffff) << 4; r[ 3] = Q6_R_and_RR(n, 0x3ffffff); + n >>= 26; s = a[14] + (s >> 26); + n += (s & 0x3ffffff) << 4; r[ 4] = Q6_R_and_RR(n, 0x3ffffff); + n >>= 26; s = a[15] + (s >> 26); + n += (s & 0x3ffffff) << 4; r[ 5] = Q6_R_and_RR(n, 0x3ffffff); + n >>= 26; s = a[16] + (s >> 26); + n += (s & 0x3ffffff) << 4; r[ 6] = Q6_R_and_RR(n, 0x3ffffff); + n >>= 26; s = a[17] + (s >> 26); + n += (s & 0x3ffffff) << 4; r[ 7] = Q6_R_and_RR(n, 0x3ffffff); + n >>= 26; s = a[18] + (s >> 26); + n += (s & 0x3ffffff) << 4; r[ 8] = Q6_R_and_RR(n, 0x3ffffff); + n >>= 26; s = a[19] + (s >> 26); + n += s << 4; r[ 9] = n; +#endif /* WOLFSSL_SP_SMALL */ + XMEMSET(&r[10], 0, sizeof(*r) * 10U); +} + + +/* Reduce the number back to 256 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static void sp_256_mont_reduce_10(sp_digit* a, const sp_digit* m, sp_digit mp) +{ + sp_digit mu; + + + /* unrolled for loops due to unexpected behavior with -O optimizations */ + if (mp != 1) { + mu = Q6_P_mpy_RR(a[0], mp) & 0x3ffffff; + sp_256_mul_add_10(a+0, m, mu); + a[0+1] += a[0] >> 26; + + mu = Q6_P_mpy_RR(a[1], mp) & 0x3ffffff; + sp_256_mul_add_10(a+1, m, mu); + a[1+1] += a[1] >> 26; + + mu = Q6_P_mpy_RR(a[2], mp) & 0x3ffffff; + sp_256_mul_add_10(a+2, m, mu); + a[2+1] += a[2] >> 26; + + mu = Q6_P_mpy_RR(a[3], mp) & 0x3ffffff; + sp_256_mul_add_10(a+3, m, mu); + a[3+1] += a[3] >> 26; + + mu = Q6_P_mpy_RR(a[4], mp) & 0x3ffffff; + sp_256_mul_add_10(a+4, m, mu); + a[4+1] += a[4] >> 26; + + mu = Q6_P_mpy_RR(a[5], mp) & 0x3ffffff; + sp_256_mul_add_10(a+5, m, mu); + a[5+1] += a[5] >> 26; + + mu = Q6_P_mpy_RR(a[6], mp) & 0x3ffffff; + sp_256_mul_add_10(a+6, m, mu); + a[6+1] += a[6] >> 26; + + mu = Q6_P_mpy_RR(a[7], mp) & 0x3ffffff; + sp_256_mul_add_10(a+7, m, mu); + a[7+1] += a[7] >> 26; + + mu = Q6_P_mpy_RR(a[8], mp) & 0x3ffffff; + sp_256_mul_add_10(a+8, m, mu); + a[8+1] += a[8] >> 26; + + mu = Q6_P_mpy_RR(a[9], mp) & 0x3fffffL; + sp_256_mul_add_10(a+9, m, mu); + a[9+1] += a[9] >> 26; + a[9] &= 0x3ffffff; + } + else { + mu = Q6_P_mpy_RR(a[0], mp) & 0x3ffffff; + sp_256_mul_add_10(a+0, p256_mod, mu); + a[0+1] += a[0] >> 26; + + mu = Q6_P_mpy_RR(a[1], mp) & 0x3ffffff; + sp_256_mul_add_10(a+1, p256_mod, mu); + a[1+1] += a[1] >> 26; + + mu = Q6_P_mpy_RR(a[2], mp) & 0x3ffffff; + sp_256_mul_add_10(a+2, p256_mod, mu); + a[2+1] += a[2] >> 26; + + mu = Q6_P_mpy_RR(a[3], mp) & 0x3ffffff; + sp_256_mul_add_10(a+3, p256_mod, mu); + a[3+1] += a[3] >> 26; + + mu = Q6_P_mpy_RR(a[4], mp) & 0x3ffffff; + sp_256_mul_add_10(a+4, p256_mod, mu); + a[4+1] += a[4] >> 26; + + mu = Q6_P_mpy_RR(a[5], mp) & 0x3ffffff; + sp_256_mul_add_10(a+5, p256_mod, mu); + a[5+1] += a[5] >> 26; + + mu = Q6_P_mpy_RR(a[6], mp) & 0x3ffffff; + sp_256_mul_add_10(a+6, p256_mod, mu); + a[6+1] += a[6] >> 26; + + mu = Q6_P_mpy_RR(a[7], mp) & 0x3ffffff; + sp_256_mul_add_10(a+7, p256_mod, mu); + a[7+1] += a[7] >> 26; + + mu = Q6_P_mpy_RR(a[8], mp) & 0x3ffffff; + sp_256_mul_add_10(a+8, p256_mod, mu); + a[8+1] += a[8] >> 26; + + mu = Q6_P_mpy_RR(a[9], mp) & 0x3fffffL; + sp_256_mul_add_10(a+9, p256_mod, mu); + a[9+1] += a[9] >> 26; + a[9] &= 0x3ffffff; + } + + + sp_256_mont_shift_10(a, a); + sp_256_cond_sub_10(a, a, m, 0 - (((a[9] >> 22) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_256_norm_10(a); +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_256_mul_10(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ +#if 1 + int64_t t0 = Q6_P_mpy_RR(a[0], b[0]); + int64_t t1 = Q6_P_mpy_RR(a[0], b[1]) + + Q6_P_mpy_RR(a[1], b[0]); + int64_t t2 = Q6_P_mpy_RR(a[0], b[2]) + + Q6_P_mpy_RR(a[1], b[1]) + + Q6_P_mpy_RR(a[2], b[0]); + int64_t t3 = Q6_P_mpy_RR(a[0], b[3]) + + Q6_P_mpy_RR(a[1], b[2]) + + Q6_P_mpy_RR(a[2], b[1]) + + Q6_P_mpy_RR(a[3], b[0]); + int64_t t4 = Q6_P_mpy_RR(a[0], b[4]) + + Q6_P_mpy_RR(a[1], b[3]) + + Q6_P_mpy_RR(a[2], b[2]) + + Q6_P_mpy_RR(a[3], b[1]) + + Q6_P_mpy_RR(a[4], b[0]); + int64_t t5 = Q6_P_mpy_RR(a[0], b[5]) + + Q6_P_mpy_RR(a[1], b[4]) + + Q6_P_mpy_RR(a[2], b[3]) + + Q6_P_mpy_RR(a[3], b[2]) + + Q6_P_mpy_RR(a[4], b[1]) + + Q6_P_mpy_RR(a[5], b[0]); + int64_t t6 = Q6_P_mpy_RR(a[0], b[6]) + + Q6_P_mpy_RR(a[1], b[5]) + + Q6_P_mpy_RR(a[2], b[4]) + + Q6_P_mpy_RR(a[3], b[3]) + + Q6_P_mpy_RR(a[4], b[2]) + + Q6_P_mpy_RR(a[5], b[1]) + + Q6_P_mpy_RR(a[6], b[0]); + int64_t t7 = Q6_P_mpy_RR(a[0], b[7]) + + Q6_P_mpy_RR(a[1], b[6]) + + Q6_P_mpy_RR(a[2], b[5]) + + Q6_P_mpy_RR(a[3], b[4]) + + Q6_P_mpy_RR(a[4], b[3]) + + Q6_P_mpy_RR(a[5], b[2]) + + Q6_P_mpy_RR(a[6], b[1]) + + Q6_P_mpy_RR(a[7], b[0]); + int64_t t8 = Q6_P_mpy_RR(a[0], b[8]) + + Q6_P_mpy_RR(a[1], b[7]) + + Q6_P_mpy_RR(a[2], b[6]) + + Q6_P_mpy_RR(a[3], b[5]) + + Q6_P_mpy_RR(a[4], b[4]) + + Q6_P_mpy_RR(a[5], b[3]) + + Q6_P_mpy_RR(a[6], b[2]) + + Q6_P_mpy_RR(a[7], b[1]) + + Q6_P_mpy_RR(a[8], b[0]); + int64_t t9 = Q6_P_mpy_RR(a[0], b[9]) + + Q6_P_mpy_RR(a[1], b[8]) + + Q6_P_mpy_RR(a[2], b[7]) + + Q6_P_mpy_RR(a[3], b[6]) + + Q6_P_mpy_RR(a[4], b[5]) + + Q6_P_mpy_RR(a[5], b[4]) + + Q6_P_mpy_RR(a[6], b[3]) + + Q6_P_mpy_RR(a[7], b[2]) + + Q6_P_mpy_RR(a[8], b[1]) + + Q6_P_mpy_RR(a[9], b[0]); + int64_t t10 = Q6_P_mpy_RR(a[1], b[9]) + + Q6_P_mpy_RR(a[2], b[8]) + + Q6_P_mpy_RR(a[3], b[7]) + + Q6_P_mpy_RR(a[4], b[6]) + + Q6_P_mpy_RR(a[5], b[5]) + + Q6_P_mpy_RR(a[6], b[4]) + + Q6_P_mpy_RR(a[7], b[3]) + + Q6_P_mpy_RR(a[8], b[2]) + + Q6_P_mpy_RR(a[9], b[1]); + int64_t t11 = Q6_P_mpy_RR(a[2], b[9]) + + Q6_P_mpy_RR(a[3], b[8]) + + Q6_P_mpy_RR(a[4], b[7]) + + Q6_P_mpy_RR(a[5], b[6]) + + Q6_P_mpy_RR(a[6], b[5]) + + Q6_P_mpy_RR(a[7], b[4]) + + Q6_P_mpy_RR(a[8], b[3]) + + Q6_P_mpy_RR(a[9], b[2]); + int64_t t12 = Q6_P_mpy_RR(a[3], b[9]) + + Q6_P_mpy_RR(a[4], b[8]) + + Q6_P_mpy_RR(a[5], b[7]) + + Q6_P_mpy_RR(a[6], b[6]) + + Q6_P_mpy_RR(a[7], b[5]) + + Q6_P_mpy_RR(a[8], b[4]) + + Q6_P_mpy_RR(a[9], b[3]); + int64_t t13 = Q6_P_mpy_RR(a[4], b[9]) + + Q6_P_mpy_RR(a[5], b[8]) + + Q6_P_mpy_RR(a[6], b[7]) + + Q6_P_mpy_RR(a[7], b[6]) + + Q6_P_mpy_RR(a[8], b[5]) + + Q6_P_mpy_RR(a[9], b[4]); + int64_t t14 = Q6_P_mpy_RR(a[5], b[9]) + + Q6_P_mpy_RR(a[6], b[8]) + + Q6_P_mpy_RR(a[7], b[7]) + + Q6_P_mpy_RR(a[8], b[6]) + + Q6_P_mpy_RR(a[9], b[5]); + int64_t t15 = Q6_P_mpy_RR(a[6], b[9]) + + Q6_P_mpy_RR(a[7], b[8]) + + Q6_P_mpy_RR(a[8], b[7]) + + Q6_P_mpy_RR(a[9], b[6]); + int64_t t16 = Q6_P_mpy_RR(a[7], b[9]) + + Q6_P_mpy_RR(a[8], b[8]) + + Q6_P_mpy_RR(a[9], b[7]); + int64_t t17 = Q6_P_mpy_RR(a[8], b[9]) + + Q6_P_mpy_RR(a[9], b[8]); + int64_t t18 = Q6_P_mpy_RR(a[9], b[9]); + + + t1 += t0 >> 26; r[ 0] = t0 & 0x3ffffff; + t2 += t1 >> 26; r[ 1] = t1 & 0x3ffffff; + t3 += t2 >> 26; r[ 2] = t2 & 0x3ffffff; + t4 += t3 >> 26; r[ 3] = t3 & 0x3ffffff; + t5 += t4 >> 26; r[ 4] = t4 & 0x3ffffff; + t6 += t5 >> 26; r[ 5] = t5 & 0x3ffffff; + t7 += t6 >> 26; r[ 6] = t6 & 0x3ffffff; + t8 += t7 >> 26; r[ 7] = t7 & 0x3ffffff; + t9 += t8 >> 26; r[ 8] = t8 & 0x3ffffff; + t10 += t9 >> 26; r[ 9] = t9 & 0x3ffffff; + t11 += t10 >> 26; r[10] = t10 & 0x3ffffff; + t12 += t11 >> 26; r[11] = t11 & 0x3ffffff; + t13 += t12 >> 26; r[12] = t12 & 0x3ffffff; + t14 += t13 >> 26; r[13] = t13 & 0x3ffffff; + t15 += t14 >> 26; r[14] = t14 & 0x3ffffff; + t16 += t15 >> 26; r[15] = t15 & 0x3ffffff; + t17 += t16 >> 26; r[16] = t16 & 0x3ffffff; + t18 += t17 >> 26; r[17] = t17 & 0x3ffffff; + r[19] = (sp_digit)(t18 >> 26); + r[18] = t18 & 0x3ffffff; +#endif +#if 0 + /* Testing speeds with using HVX_Vectors */ + { + int64_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15, t16, t17, t18; + HVX_Vector av, splat; + HVX_Vector vlow, vhi; + + av = Q6_V_vzero(); + vlow = Q6_V_vzero(); + vhi = Q6_V_vzero(); + + XMEMCPY((byte*)&av, (byte*)a, 40); + + splat = Q6_V_vsplat_R(b[0]); + vlow = Q6_Vw_vmpyieo_VhVh(av, splat); + vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat); + + vhi = Q6_Vw_vmpye_VwVuh(av, splat); + vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat); + unsigned int* loi = (unsigned int*)&vlow; + int* hii = (int*)&vhi; + + /* a[0] * b[0] */ + t0 = loi[0] | ((int64_t)hii[0] << 31); + + /* a[1] * b[0] */ + t1 = loi[1] | ((int64_t)hii[1] << 31); + + /* a[2] * b[0] */ + t2 = loi[2] | ((int64_t)hii[2] << 31); + + /* a[3] * b[0] */ + t3 = loi[3] | ((int64_t)hii[3] << 31); + + /* a[4] * b[0] */ + t4 = loi[4] | ((int64_t)hii[4] << 31); + + /* a[5] * b[0] */ + t5 = loi[5] | ((int64_t)hii[5] << 31); + + /* a[6] * b[0] */ + t6 = loi[6] | ((int64_t)hii[6] << 31); + + /* a[7] * b[0] */ + t7 = loi[7] | ((int64_t)hii[7] << 31); + + /* a[8] * b[0] */ + t8 = loi[8] | ((int64_t)hii[8] << 31); + + /* a[9] * b[0] */ + t9 = loi[9] | ((int64_t)hii[9] << 31); + + /* a[*] * b[1] */ + splat = Q6_V_vsplat_R(b[1]); + vlow = Q6_Vw_vmpyieo_VhVh(av, splat); + vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat); + vhi = Q6_Vw_vmpye_VwVuh(av, splat); + vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat); + loi = (unsigned int*)&vlow; + hii = (int*)&vhi; + + /* a[0] * b[1] */ + t1 += (loi[0] | ((int64_t)hii[0] << 31)); + + /* a[1] * b[1] */ + t2 += (loi[1] | ((int64_t)hii[1] << 31)); + + /* a[2] * b[1] */ + t3 += (loi[2] | ((int64_t)hii[2] << 31)); + + /* a[3] * b[1] */ + t4 += (loi[3] | ((int64_t)hii[3] << 31)); + + /* a[4] * b[1] */ + t5 += (loi[4] | ((int64_t)hii[4] << 31)); + + /* a[5] * b[1] */ + t6 += (loi[5] | ((int64_t)hii[5] << 31)); + + /* a[6] * b[1] */ + t7 += (loi[6] | ((int64_t)hii[6] << 31)); + + /* a[7] * b[1] */ + t8 += (loi[7] | ((int64_t)hii[7] << 31)); + + /* a[8] * b[1] */ + t9 += (loi[8] | ((int64_t)hii[8] << 31)); + + /* a[9] * b[1] */ + t10 = (loi[9] | ((int64_t)hii[9] << 31)); + + /* a[*] * b[2] */ + splat = Q6_V_vsplat_R(b[2]); + vlow = Q6_Vw_vmpyieo_VhVh(av, splat); + vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat); + vhi = Q6_Vw_vmpye_VwVuh(av, splat); + vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat); + loi = (unsigned int*)&vlow; + hii = (int*)&vhi; + + + /* a[0] * b[2] */ + t2 += (loi[0] | ((int64_t)hii[0] << 31)); + + /* a[1] * b[2] */ + t3 += (loi[1] | ((int64_t)hii[1] << 31)); + + /* a[2] * b[2] */ + t4 += (loi[2] | ((int64_t)hii[2] << 31)); + + /* a[3] * b[2] */ + t5 += (loi[3] | ((int64_t)hii[3] << 31)); + + /* a[4] * b[2] */ + t6 += (loi[4] | ((int64_t)hii[4] << 31)); + + /* a[5] * b[2] */ + t7 += (loi[5] | ((int64_t)hii[5] << 31)); + + /* a[6] * b[2] */ + t8 += (loi[6] | ((int64_t)hii[6] << 31)); + + /* a[7] * b[2] */ + t9 += (loi[7] | ((int64_t)hii[7] << 31)); + + /* a[8] * b[2] */ + t10 += (loi[8] | ((int64_t)hii[8] << 31)); + + /* a[9] * b[2] */ + t11 = (loi[9] | ((int64_t)hii[9] << 31)); + + + /* a[*] * b[3] */ + splat = Q6_V_vsplat_R(b[3]); + vlow = Q6_Vw_vmpyieo_VhVh(av, splat); + vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat); + vhi = Q6_Vw_vmpye_VwVuh(av, splat); + vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat); + loi = (unsigned int*)&vlow; + hii = (int*)&vhi; + + + /* a[0] * b[3] */ + t3 += (loi[0] | ((int64_t)hii[0] << 31)); + + /* a[1] * b[3] */ + t4 += (loi[1] | ((int64_t)hii[1] << 31)); + + /* a[2] * b[3] */ + t5 += (loi[2] | ((int64_t)hii[2] << 31)); + + /* a[3] * b[3] */ + t6 += (loi[3] | ((int64_t)hii[3] << 31)); + + /* a[4] * b[3] */ + t7 += (loi[4] | ((int64_t)hii[4] << 31)); + + /* a[5] * b[3] */ + t8 += (loi[5] | ((int64_t)hii[5] << 31)); + + /* a[6] * b[3] */ + t9 += (loi[6] | ((int64_t)hii[6] << 31)); + + /* a[7] * b[3] */ + t10 += (loi[7] | ((int64_t)hii[7] << 31)); + + /* a[8] * b[3] */ + t11 += (loi[8] | ((int64_t)hii[8] << 31)); + + /* a[9] * b[3] */ + t12 = (loi[9] | ((int64_t)hii[9] << 31)); + + + /* a[*] * b[4] */ + splat = Q6_V_vsplat_R(b[4]); + vlow = Q6_Vw_vmpyieo_VhVh(av, splat); + vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat); + vhi = Q6_Vw_vmpye_VwVuh(av, splat); + vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat); + loi = (unsigned int*)&vlow; + hii = (int*)&vhi; + + + /* a[0] * b[4] */ + t4 += (loi[0] | ((int64_t)hii[0] << 31)); + + /* a[1] * b[4] */ + t5 += (loi[1] | ((int64_t)hii[1] << 31)); + + /* a[2] * b[4] */ + t6 += (loi[2] | ((int64_t)hii[2] << 31)); + + /* a[3] * b[4] */ + t7 += (loi[3] | ((int64_t)hii[3] << 31)); + + /* a[4] * b[4] */ + t8 += (loi[4] | ((int64_t)hii[4] << 31)); + + /* a[5] * b[4] */ + t9 += (loi[5] | ((int64_t)hii[5] << 31)); + + /* a[6] * b[4] */ + t10 += (loi[6] | ((int64_t)hii[6] << 31)); + + /* a[7] * b[4] */ + t11 += (loi[7] | ((int64_t)hii[7] << 31)); + + /* a[8] * b[4] */ + t12 += (loi[8] | ((int64_t)hii[8] << 31)); + + /* a[9] * b[4] */ + t13 = (loi[9] | ((int64_t)hii[9] << 31)); + + + /* a[*] * b[5] */ + splat = Q6_V_vsplat_R(b[5]); + vlow = Q6_Vw_vmpyieo_VhVh(av, splat); + vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat); + vhi = Q6_Vw_vmpye_VwVuh(av, splat); + vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat); + loi = (unsigned int*)&vlow; + hii = (int*)&vhi; + + + /* a[0] * b[5] */ + t5 += (loi[0] | ((int64_t)hii[0] << 31)); + + /* a[1] * b[5] */ + t6 += (loi[1] | ((int64_t)hii[1] << 31)); + + /* a[2] * b[5] */ + t7 += (loi[2] | ((int64_t)hii[2] << 31)); + + /* a[3] * b[5] */ + t8 += (loi[3] | ((int64_t)hii[3] << 31)); + + /* a[4] * b[5] */ + t9 += (loi[4] | ((int64_t)hii[4] << 31)); + + /* a[5] * b[5] */ + t10 += (loi[5] | ((int64_t)hii[5] << 31)); + + /* a[6] * b[5] */ + t11 += (loi[6] | ((int64_t)hii[6] << 31)); + + /* a[7] * b[5] */ + t12 += (loi[7] | ((int64_t)hii[7] << 31)); + + /* a[8] * b[5] */ + t13 += (loi[8] | ((int64_t)hii[8] << 31)); + + /* a[9] * b[5] */ + t14 = (loi[9] | ((int64_t)hii[9] << 31)); + + + /* a[*] * b[6] */ + splat = Q6_V_vsplat_R(b[6]); + vlow = Q6_Vw_vmpyieo_VhVh(av, splat); + vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat); + vhi = Q6_Vw_vmpye_VwVuh(av, splat); + vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat); + loi = (unsigned int*)&vlow; + hii = (int*)&vhi; + + + /* a[0] * b[6] */ + t6 += (loi[0] | ((int64_t)hii[0] << 31)); + + /* a[1] * b[6] */ + t7 += (loi[1] | ((int64_t)hii[1] << 31)); + + /* a[2] * b[6] */ + t8 += (loi[2] | ((int64_t)hii[2] << 31)); + + /* a[3] * b[6] */ + t9 += (loi[3] | ((int64_t)hii[3] << 31)); + + /* a[4] * b[6] */ + t10 += (loi[4] | ((int64_t)hii[4] << 31)); + + /* a[5] * b[6] */ + t11 += (loi[5] | ((int64_t)hii[5] << 31)); + + /* a[6] * b[6] */ + t12 += (loi[6] | ((int64_t)hii[6] << 31)); + + /* a[7] * b[6] */ + t13 += (loi[7] | ((int64_t)hii[7] << 31)); + + /* a[8] * b[6] */ + t14 += (loi[8] | ((int64_t)hii[8] << 31)); + + /* a[9] * b[6] */ + t15 = (loi[9] | ((int64_t)hii[9] << 31)); + + + + /* a[*] * b[7] */ + splat = Q6_V_vsplat_R(b[7]); + vlow = Q6_Vw_vmpyieo_VhVh(av, splat); + vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat); + vhi = Q6_Vw_vmpye_VwVuh(av, splat); + vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat); + loi = (unsigned int*)&vlow; + hii = (int*)&vhi; + + + /* a[0] * b[7] */ + t7 += (loi[0] | ((int64_t)hii[0] << 31)); + + /* a[1] * b[7] */ + t8 += (loi[1] | ((int64_t)hii[1] << 31)); + + /* a[2] * b[7] */ + t9 += (loi[2] | ((int64_t)hii[2] << 31)); + + /* a[3] * b[7] */ + t10 += (loi[3] | ((int64_t)hii[3] << 31)); + + /* a[4] * b[7] */ + t11 += (loi[4] | ((int64_t)hii[4] << 31)); + + /* a[5] * b[7] */ + t12 += (loi[5] | ((int64_t)hii[5] << 31)); + + /* a[6] * b[7] */ + t13 += (loi[6] | ((int64_t)hii[6] << 31)); + + /* a[7] * b[7] */ + t14 += (loi[7] | ((int64_t)hii[7] << 31)); + + /* a[8] * b[7] */ + t15 += (loi[8] | ((int64_t)hii[8] << 31)); + + /* a[9] * b[7] */ + t16 = (loi[9] | ((int64_t)hii[9] << 31)); + + + /* a[*] * b[8] */ + splat = Q6_V_vsplat_R(b[8]); + vlow = Q6_Vw_vmpyieo_VhVh(av, splat); + vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat); + vhi = Q6_Vw_vmpye_VwVuh(av, splat); + vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat); + loi = (unsigned int*)&vlow; + hii = (int*)&vhi; + + + /* a[0] * b[8] */ + t8 += (loi[0] | ((int64_t)hii[0] << 31)); + + /* a[1] * b[8] */ + t9 += (loi[1] | ((int64_t)hii[1] << 31)); + + /* a[2] * b[8] */ + t10 += (loi[2] | ((int64_t)hii[2] << 31)); + + /* a[3] * b[8] */ + t11 += (loi[3] | ((int64_t)hii[3] << 31)); + + /* a[4] * b[8] */ + t12 += (loi[4] | ((int64_t)hii[4] << 31)); + + /* a[5] * b[8] */ + t13 += (loi[5] | ((int64_t)hii[5] << 31)); + + /* a[6] * b[8] */ + t14 += (loi[6] | ((int64_t)hii[6] << 31)); + + /* a[7] * b[8] */ + t15 += (loi[7] | ((int64_t)hii[7] << 31)); + + /* a[8] * b[8] */ + t16 += (loi[8] | ((int64_t)hii[8] << 31)); + + /* a[9] * b[8] */ + t17 = (loi[9] | ((int64_t)hii[9] << 31)); + + + /* a[*] * b[9] */ + splat = Q6_V_vsplat_R(b[9]); + vlow = Q6_Vw_vmpyieo_VhVh(av, splat); + vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat); + vhi = Q6_Vw_vmpye_VwVuh(av, splat); + vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat); + loi = (unsigned int*)&vlow; + hii = (int*)&vhi; + + + /* a[0] * b[9] */ + t9 += (loi[0] | ((int64_t)hii[0] << 31)); + + /* a[1] * b[9] */ + t10 += (loi[1] | ((int64_t)hii[1] << 31)); + + /* a[2] * b[9] */ + t11 += (loi[2] | ((int64_t)hii[2] << 31)); + + /* a[3] * b[9] */ + t12 += (loi[3] | ((int64_t)hii[3] << 31)); + + /* a[4] * b[9] */ + t13 += (loi[4] | ((int64_t)hii[4] << 31)); + + /* a[5] * b[9] */ + t14 += (loi[5] | ((int64_t)hii[5] << 31)); + + /* a[6] * b[9] */ + t15 += (loi[6] | ((int64_t)hii[6] << 31)); + + /* a[7] * b[9] */ + t16 += (loi[7] | ((int64_t)hii[7] << 31)); + + /* a[8] * b[9] */ + t17 += (loi[8] | ((int64_t)hii[8] << 31)); + + /* a[9] * b[9] */ + t18 = (loi[9] | ((int64_t)hii[9] << 31)); + + t1 += t0 >> 26; r[ 0] = t0 & 0x3ffffff; + t2 += t1 >> 26; r[ 1] = t1 & 0x3ffffff; + t3 += t2 >> 26; r[ 2] = t2 & 0x3ffffff; + t4 += t3 >> 26; r[ 3] = t3 & 0x3ffffff; + t5 += t4 >> 26; r[ 4] = t4 & 0x3ffffff; + t6 += t5 >> 26; r[ 5] = t5 & 0x3ffffff; + t7 += t6 >> 26; r[ 6] = t6 & 0x3ffffff; + t8 += t7 >> 26; r[ 7] = t7 & 0x3ffffff; + t9 += t8 >> 26; r[ 8] = t8 & 0x3ffffff; + t10 += t9 >> 26; r[ 9] = t9 & 0x3ffffff; + t11 += t10 >> 26; r[10] = t10 & 0x3ffffff; + t12 += t11 >> 26; r[11] = t11 & 0x3ffffff; + t13 += t12 >> 26; r[12] = t12 & 0x3ffffff; + t14 += t13 >> 26; r[13] = t13 & 0x3ffffff; + t15 += t14 >> 26; r[14] = t14 & 0x3ffffff; + t16 += t15 >> 26; r[15] = t15 & 0x3ffffff; + t17 += t16 >> 26; r[16] = t16 & 0x3ffffff; + t18 += t17 >> 26; r[17] = t17 & 0x3ffffff; + r[19] = (sp_digit)(t18 >> 26); + r[18] = t18 & 0x3ffffff; + } +#endif +} + + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_256_mont_mul_10(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_256_mul_10(r, a, b); + sp_256_mont_reduce_10(r, m, mp); +} + + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_256_sqr_10(sp_digit* r, const sp_digit* a) +{ + int64_t t0 = Q6_P_mpy_RR(a[0], a[0]); + int64_t t1 = Q6_P_mpy_RR(a[0], a[1]) * 2; + int64_t t2 = Q6_P_mpy_RR(a[0], a[2]) * 2 + + Q6_P_mpy_RR(a[1], a[1]); + int64_t t3 = (Q6_P_mpy_RR(a[0], a[3]) + + Q6_P_mpy_RR(a[1], a[2])) * 2; + int64_t t4 = (Q6_P_mpy_RR(a[ 0], a[ 4]) + + Q6_P_mpy_RR(a[ 1], a[ 3])) * 2 + + Q6_P_mpy_RR(a[ 2], a[ 2]); + int64_t t5 = (Q6_P_mpy_RR(a[ 0], a[ 5]) + + Q6_P_mpy_RR(a[ 1], a[ 4]) + + Q6_P_mpy_RR(a[ 2], a[ 3])) * 2; + int64_t t6 = (Q6_P_mpy_RR(a[ 0], a[ 6]) + + Q6_P_mpy_RR(a[ 1], a[ 5]) + + Q6_P_mpy_RR(a[ 2], a[ 4])) * 2 + + Q6_P_mpy_RR(a[ 3], a[ 3]); + int64_t t7 = (Q6_P_mpy_RR(a[ 0], a[ 7]) + + Q6_P_mpy_RR(a[ 1], a[ 6]) + + Q6_P_mpy_RR(a[ 2], a[ 5]) + + Q6_P_mpy_RR(a[ 3], a[ 4])) * 2; + int64_t t8 = (Q6_P_mpy_RR(a[ 0], a[ 8]) + + Q6_P_mpy_RR(a[ 1], a[ 7]) + + Q6_P_mpy_RR(a[ 2], a[ 6]) + + Q6_P_mpy_RR(a[ 3], a[ 5])) * 2 + + Q6_P_mpy_RR(a[ 4], a[ 4]); + int64_t t9 = (Q6_P_mpy_RR(a[ 0], a[ 9]) + + Q6_P_mpy_RR(a[ 1], a[ 8]) + + Q6_P_mpy_RR(a[ 2], a[ 7]) + + Q6_P_mpy_RR(a[ 3], a[ 6]) + + Q6_P_mpy_RR(a[ 4], a[ 5])) * 2; + int64_t t10 = (Q6_P_mpy_RR(a[ 1], a[ 9]) + + Q6_P_mpy_RR(a[ 2], a[ 8]) + + Q6_P_mpy_RR(a[ 3], a[ 7]) + + Q6_P_mpy_RR(a[ 4], a[ 6])) * 2 + + Q6_P_mpy_RR(a[ 5], a[ 5]); + int64_t t11 = (Q6_P_mpy_RR(a[ 2], a[ 9]) + + Q6_P_mpy_RR(a[ 3], a[ 8]) + + Q6_P_mpy_RR(a[ 4], a[ 7]) + + Q6_P_mpy_RR(a[ 5], a[ 6])) * 2; + int64_t t12 = (Q6_P_mpy_RR(a[ 3], a[ 9]) + + Q6_P_mpy_RR(a[ 4], a[ 8]) + + Q6_P_mpy_RR(a[ 5], a[ 7])) * 2 + + Q6_P_mpy_RR(a[ 6], a[ 6]); + int64_t t13 = (Q6_P_mpy_RR(a[ 4], a[ 9]) + + Q6_P_mpy_RR(a[ 5], a[ 8]) + + Q6_P_mpy_RR(a[ 6], a[ 7])) * 2; + int64_t t14 = (Q6_P_mpy_RR(a[ 5], a[ 9]) + + Q6_P_mpy_RR(a[ 6], a[ 8])) * 2 + + Q6_P_mpy_RR(a[ 7], a[ 7]); + int64_t t15 =( Q6_P_mpy_RR(a[ 6], a[ 9]) + + Q6_P_mpy_RR(a[ 7], a[ 8])) * 2; + int64_t t16 = Q6_P_mpy_RR(a[ 7], a[ 9]) * 2 + + Q6_P_mpy_RR(a[ 8], a[ 8]); + int64_t t17 = Q6_P_mpy_RR(a[ 8], a[ 9]) * 2; + int64_t t18 = Q6_P_mpy_RR(a[ 9], a[ 9]); + + t1 += t0 >> 26; r[ 0] = t0 & 0x3ffffff; + t2 += t1 >> 26; r[ 1] = t1 & 0x3ffffff; + t3 += t2 >> 26; r[ 2] = t2 & 0x3ffffff; + t4 += t3 >> 26; r[ 3] = t3 & 0x3ffffff; + t5 += t4 >> 26; r[ 4] = t4 & 0x3ffffff; + t6 += t5 >> 26; r[ 5] = t5 & 0x3ffffff; + t7 += t6 >> 26; r[ 6] = t6 & 0x3ffffff; + t8 += t7 >> 26; r[ 7] = t7 & 0x3ffffff; + t9 += t8 >> 26; r[ 8] = t8 & 0x3ffffff; + t10 += t9 >> 26; r[ 9] = t9 & 0x3ffffff; + t11 += t10 >> 26; r[10] = t10 & 0x3ffffff; + t12 += t11 >> 26; r[11] = t11 & 0x3ffffff; + t13 += t12 >> 26; r[12] = t12 & 0x3ffffff; + t14 += t13 >> 26; r[13] = t13 & 0x3ffffff; + t15 += t14 >> 26; r[14] = t14 & 0x3ffffff; + t16 += t15 >> 26; r[15] = t15 & 0x3ffffff; + t17 += t16 >> 26; r[16] = t16 & 0x3ffffff; + t18 += t17 >> 26; r[17] = t17 & 0x3ffffff; + r[19] = (sp_digit)(t18 >> 26); + r[18] = t18 & 0x3ffffff; +} + + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_256_mont_sqr_10(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_256_sqr_10(r, a); + sp_256_mont_reduce_10(r, m, mp); +} + +#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY) +/* Square the Montgomery form number a number of times. (r = a ^ n mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * n Number of times to square. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_256_mont_sqr_n_10(sp_digit* r, const sp_digit* a, int n, + const sp_digit* m, sp_digit mp) +{ + sp_256_mont_sqr_10(r, a, m, mp); + for (; n > 1; n--) { + sp_256_mont_sqr_10(r, r, m, mp); + } +} + +#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */ +#ifdef WOLFSSL_SP_SMALL +/* Mod-2 for the P256 curve. */ +static const uint32_t p256_mod_2[8] = { + 0xfffffffdU,0xffffffffU,0xffffffffU,0x00000000U,0x00000000U,0x00000000U, + 0x00000001U,0xffffffffU +}; +#endif /* !WOLFSSL_SP_SMALL */ + +/* Invert the number, in Montgomery form, modulo the modulus (prime) of the + * P256 curve. (r = 1 / a mod m) + * + * r Inverse result. + * a Number to invert. + * td Temporary data. + */ +static void sp_256_mont_inv_10(sp_digit* r, const sp_digit* a, sp_digit* td) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* t = td; + int i; + + XMEMCPY(t, a, sizeof(sp_digit) * 10); + for (i=254; i>=0; i--) { + sp_256_mont_sqr_10(t, t, p256_mod, p256_mp_mod); + if (p256_mod_2[i / 32] & ((sp_digit)1 << (i % 32))) + sp_256_mont_mul_10(t, t, a, p256_mod, p256_mp_mod); + } + XMEMCPY(r, t, sizeof(sp_digit) * 10); +#else + sp_digit* t = td; + sp_digit* t2 = td + Q6_P_mpy_RR(2, 10); + sp_digit* t3 = td + Q6_P_mpy_RR(4, 10); + + /* t = a^2 */ + sp_256_mont_sqr_10(t, a, p256_mod, p256_mp_mod); + /* t = a^3 = t * a */ + sp_256_mont_mul_10(t, t, a, p256_mod, p256_mp_mod); + /* t2= a^c = t ^ 2 ^ 2 */ + sp_256_mont_sqr_n_10(t2, t, 2, p256_mod, p256_mp_mod); + /* t3= a^d = t2 * a */ + sp_256_mont_mul_10(t3, t2, a, p256_mod, p256_mp_mod); + /* t = a^f = t2 * t */ + sp_256_mont_mul_10(t, t2, t, p256_mod, p256_mp_mod); + /* t2= a^f0 = t ^ 2 ^ 4 */ + sp_256_mont_sqr_n_10(t2, t, 4, p256_mod, p256_mp_mod); + /* t3= a^fd = t2 * t3 */ + sp_256_mont_mul_10(t3, t2, t3, p256_mod, p256_mp_mod); + /* t = a^ff = t2 * t */ + sp_256_mont_mul_10(t, t2, t, p256_mod, p256_mp_mod); + /* t2= a^ff00 = t ^ 2 ^ 8 */ + sp_256_mont_sqr_n_10(t2, t, 8, p256_mod, p256_mp_mod); + /* t3= a^fffd = t2 * t3 */ + sp_256_mont_mul_10(t3, t2, t3, p256_mod, p256_mp_mod); + /* t = a^ffff = t2 * t */ + sp_256_mont_mul_10(t, t2, t, p256_mod, p256_mp_mod); + /* t2= a^ffff0000 = t ^ 2 ^ 16 */ + sp_256_mont_sqr_n_10(t2, t, 16, p256_mod, p256_mp_mod); + /* t3= a^fffffffd = t2 * t3 */ + sp_256_mont_mul_10(t3, t2, t3, p256_mod, p256_mp_mod); + /* t = a^ffffffff = t2 * t */ + sp_256_mont_mul_10(t, t2, t, p256_mod, p256_mp_mod); + /* t = a^ffffffff00000000 = t ^ 2 ^ 32 */ + sp_256_mont_sqr_n_10(t2, t, 32, p256_mod, p256_mp_mod); + /* t2= a^ffffffffffffffff = t2 * t */ + sp_256_mont_mul_10(t, t2, t, p256_mod, p256_mp_mod); + /* t2= a^ffffffff00000001 = t2 * a */ + sp_256_mont_mul_10(t2, t2, a, p256_mod, p256_mp_mod); + /* t2= a^ffffffff000000010000000000000000000000000000000000000000 + * = t2 ^ 2 ^ 160 */ + sp_256_mont_sqr_n_10(t2, t2, 160, p256_mod, p256_mp_mod); + /* t2= a^ffffffff00000001000000000000000000000000ffffffffffffffff + * = t2 * t */ + sp_256_mont_mul_10(t2, t2, t, p256_mod, p256_mp_mod); + /* t2= a^ffffffff00000001000000000000000000000000ffffffffffffffff00000000 + * = t2 ^ 2 ^ 32 */ + sp_256_mont_sqr_n_10(t2, t2, 32, p256_mod, p256_mp_mod); + /* r = a^ffffffff00000001000000000000000000000000fffffffffffffffffffffffd + * = t2 * t3 */ + sp_256_mont_mul_10(r, t2, t3, p256_mod, p256_mp_mod); +#endif /* WOLFSSL_SP_SMALL */ +} + + +/* Map the Montgomery form projective co-ordinate point to an affine point. + * + * r Resulting affine co-ordinate point. + * p Montgomery form projective co-ordinate point. + * t Temporary ordinate data. + */ +static void sp_256_map_10(sp_point* r, const sp_point* p, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + Q6_P_mpy_RR(2, 10); + int32_t n; + + sp_256_mont_inv_10(t1, p->z, t + 2*10); + + sp_256_mont_sqr_10(t2, t1, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(t1, t2, t1, p256_mod, p256_mp_mod); + + /* x /= z^2 */ + sp_256_mont_mul_10(r->x, p->x, t2, p256_mod, p256_mp_mod); + XMEMSET(r->x + 10, 0, sizeof(r->x) / 2U); + sp_256_mont_reduce_10(r->x, p256_mod, p256_mp_mod); + /* Reduce x to less than modulus */ + n = sp_256_cmp_10(r->x, p256_mod); + sp_256_cond_sub_10(r->x, r->x, p256_mod, 0 - ((n >= 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_256_norm_10(r->x); + + /* y /= z^3 */ + sp_256_mont_mul_10(r->y, p->y, t1, p256_mod, p256_mp_mod); + XMEMSET(r->y + 10, 0, sizeof(r->y) / 2U); + sp_256_mont_reduce_10(r->y, p256_mod, p256_mp_mod); + /* Reduce y to less than modulus */ + n = sp_256_cmp_10(r->y, p256_mod); + sp_256_cond_sub_10(r->y, r->y, p256_mod, 0 - ((n >= 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_256_norm_10(r->y); + + XMEMSET(r->z, 0, sizeof(r->z)); + r->z[0] = 1; + +} + + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_256_add_10(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ +#if 0 + r[ 0] = Q6_R_add_RR(a[0], b[0]); + r[ 1] = Q6_R_add_RR(a[1], b[1]); + r[ 2] = Q6_R_add_RR(a[2], b[2]); + r[ 3] = Q6_R_add_RR(a[3], b[3]); + r[ 4] = Q6_R_add_RR(a[4], b[4]); + r[ 5] = Q6_R_add_RR(a[5], b[5]); + r[ 6] = Q6_R_add_RR(a[6], b[6]); + r[ 7] = Q6_R_add_RR(a[7], b[7]); + r[ 8] = Q6_R_add_RR(a[8], b[8]); + r[ 9] = Q6_R_add_RR(a[9], b[9]); +#endif +#if 1 + __asm__ __volatile__ ( + "{ r1 = memw(%[a]+#0) \n" + " r2 = memw(%[b]+#0) }\n" + "{ r3 = memw(%[a]+#4) \n" + " r19 = add(r1,r2) \n" + " r4 = memw(%[b]+#4) }\n" + "{ r5 = memw(%[a]+#8) \n" + " r20 = add(r3,r4) \n" + " r6 = memw(%[b]+#8) }\n" + "{ memw(%[r]+#0) = r19 }\n" + "{ r7 = memw(%[a]+#12) \n" + " r21 = add(r5,r6) \n" + " r8 = memw(%[b]+#12) }\n" + "{ memw(%[r]+#4) = r20 }\n" + "{ r9 = memw(%[a]+#16) \n" + " r22 = add(r7,r8) \n" + " r10 = memw(%[b]+#16) }\n" + "{ memw(%[r]+#8) = r21 }\n" + "{ r11 = memw(%[a]+#20) \n" + " r23 = add(r9,r10) \n" + " r12 = memw(%[b]+#20) }\n" + "{ memw(%[r]+#12) = r22 }\n" + "{ r13 = memw(%[a]+#24) \n" + " r24 = add(r11,r12) \n" + " r14 = memw(%[b]+#24) }\n" + "{ memw(%[r]+#16) = r23 }\n" + "{ r15 = memw(%[a]+#28) \n" + " r25 = add(r13,r14) \n" + " r16 = memw(%[b]+#28) }\n" + "{ memw(%[r]+#20) = r24 }\n" + "{ r17 = memw(%[a]+#32) \n" + " r26 = add(r15,r16) \n" + " r18 = memw(%[b]+#32) }\n" + "{ memw(%[r]+#24) = r25 }\n" + "{ r5 = memw(%[a]+#36) \n" + " r19 = add(r17,r18) \n" + " r6 = memw(%[b]+#36) }\n" + "{ memw(%[r]+#28) = r26 }\n" + "{ r20 = add(r5,r6) \n" + " memw(%[r]+#32) = r19 }\n" + "{ memw(%[r]+#36) = r20 }\n" + : [r] "+r" (r) + : [a] "r"(a), [b] "r"(b) + : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r26" + ); +#endif + return 0; +} + + +/* Add two Montgomery form numbers (r = a + b % m). + * + * r Result of addition. + * a First number to add in Montogmery form. + * b Second number to add in Montogmery form. + * m Modulus (prime). + */ +static void sp_256_mont_add_10(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m) +{ + (void)sp_256_add_10(r, a, b); + sp_256_norm_10(r); + sp_256_cond_sub_10(r, r, m, 0 - (((r[9] >> 22) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_256_norm_10(r); +} + + +/* Double a Montgomery form number (r = a + a % m). + * + * r Result of doubling. + * a Number to double in Montogmery form. + * m Modulus (prime). + */ +static void sp_256_mont_dbl_10(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + (void)sp_256_add_10(r, a, a); + sp_256_norm_10(r); + sp_256_cond_sub_10(r, r, m, 0 - (((r[9] >> 22) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_256_norm_10(r); +} + + +/* Triple a Montgomery form number (r = a + a + a % m). + * + * r Result of Tripling. + * a Number to triple in Montogmery form. + * m Modulus (prime). + */ +static void sp_256_mont_tpl_10(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + (void)sp_256_add_10(r, a, a); + sp_256_norm_10(r); + sp_256_cond_sub_10(r, r, m, 0 - (((r[9] >> 22) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_256_norm_10(r); + (void)sp_256_add_10(r, r, a); + sp_256_norm_10(r); + sp_256_cond_sub_10(r, r, m, 0 - (((r[9] >> 22) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_256_norm_10(r); +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_256_sub_10(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ +#if 0 + r[ 0] = Q6_R_sub_RR(a[0], b[0]); + r[ 1] = Q6_R_sub_RR(a[1], b[1]); + r[ 2] = Q6_R_sub_RR(a[2], b[2]); + r[ 3] = Q6_R_sub_RR(a[3], b[3]); + r[ 4] = Q6_R_sub_RR(a[4], b[4]); + r[ 5] = Q6_R_sub_RR(a[5], b[5]); + r[ 6] = Q6_R_sub_RR(a[6], b[6]); + r[ 7] = Q6_R_sub_RR(a[7], b[7]); + r[ 8] = Q6_R_sub_RR(a[8], b[8]); + r[ 9] = Q6_R_sub_RR(a[9], b[9]); +#endif +#if 1 + __asm__ __volatile__ ( + "{ r1 = memw(%[a]+#0) \n" + " r2 = memw(%[b]+#0) }\n" + "{ r3 = memw(%[a]+#4) \n" + " r19 = sub(r1,r2) \n" + " r4 = memw(%[b]+#4) }\n" + "{ r5 = memw(%[a]+#8) \n" + " r20 = sub(r3,r4) \n" + " r6 = memw(%[b]+#8) }\n" + "{ memw(%[r]+#0) = r19 }\n" + "{ r7 = memw(%[a]+#12) \n" + " r21 = sub(r5,r6) \n" + " r8 = memw(%[b]+#12) }\n" + "{ memw(%[r]+#4) = r20 }\n" + "{ r9 = memw(%[a]+#16) \n" + " r22 = sub(r7,r8) \n" + " r10 = memw(%[b]+#16) }\n" + "{ memw(%[r]+#8) = r21 }\n" + "{ r11 = memw(%[a]+#20) \n" + " r23 = sub(r9,r10) \n" + " r12 = memw(%[b]+#20) }\n" + "{ memw(%[r]+#12) = r22 }\n" + "{ r13 = memw(%[a]+#24) \n" + " r24 = sub(r11,r12) \n" + " r14 = memw(%[b]+#24) }\n" + "{ memw(%[r]+#16) = r23 }\n" + "{ r15 = memw(%[a]+#28) \n" + " r25 = sub(r13,r14) \n" + " r16 = memw(%[b]+#28) }\n" + "{ memw(%[r]+#20) = r24 }\n" + "{ r17 = memw(%[a]+#32) \n" + " r26 = sub(r15,r16) \n" + " r18 = memw(%[b]+#32) }\n" + "{ memw(%[r]+#24) = r25 }\n" + "{ r5 = memw(%[a]+#36) \n" + " r19 = sub(r17,r18) \n" + " r6 = memw(%[b]+#36) }\n" + "{ memw(%[r]+#28) = r26 }\n" + "{ r20 = sub(r5,r6) \n" + " memw(%[r]+#32) = r19 }\n" + "{ memw(%[r]+#36) = r20 }\n" + : [r] "+r" (r) + : [a] "r"(a), [b] "r"(b) + : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r26" + ); +#endif + return 0; +} + +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static void sp_256_cond_add_10(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 10; i++) { + r[i] = a[i] + (b[i] & m); + } +#else + r[ 0] = Q6_R_add_RR(a[ 0], Q6_R_and_RR(b[ 0], m)); + r[ 1] = Q6_R_add_RR(a[ 1], Q6_R_and_RR(b[ 1], m)); + r[ 2] = Q6_R_add_RR(a[ 2], Q6_R_and_RR(b[ 2], m)); + r[ 3] = Q6_R_add_RR(a[ 3], Q6_R_and_RR(b[ 3], m)); + r[ 4] = Q6_R_add_RR(a[ 4], Q6_R_and_RR(b[ 4], m)); + r[ 5] = Q6_R_add_RR(a[ 5], Q6_R_and_RR(b[ 5], m)); + r[ 6] = Q6_R_add_RR(a[ 6], Q6_R_and_RR(b[ 6], m)); + r[ 7] = Q6_R_add_RR(a[ 7], Q6_R_and_RR(b[ 7], m)); + r[ 8] = Q6_R_add_RR(a[ 8], Q6_R_and_RR(b[ 8], m)); + r[ 9] = Q6_R_add_RR(a[ 9], Q6_R_and_RR(b[ 9], m)); +#endif /* WOLFSSL_SP_SMALL */ +} + + +/* Subtract two Montgomery form numbers (r = a - b % m). + * + * r Result of subtration. + * a Number to subtract from in Montogmery form. + * b Number to subtract with in Montogmery form. + * m Modulus (prime). + */ +static void sp_256_mont_sub_10(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m) +{ + (void)sp_256_sub_10(r, a, b); + sp_256_cond_add_10(r, r, m, r[9] >> 22); + sp_256_norm_10(r); +} + + +/* Shift number left one bit. + * Bottom bit is lost. + * + * r Result of shift. + * a Number to shift. + */ +SP_NOINLINE static void sp_256_rshift1_10(sp_digit* r, sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<9; i++) { + r[i] = ((a[i] >> 1) | (a[i + 1] << 25)) & 0x3ffffff; + } +#else + r[0] = ((a[0] >> 1) | Q6_R_and_RR((a[1] << 25), 0x3ffffff)); + r[1] = ((a[1] >> 1) | Q6_R_and_RR((a[2] << 25), 0x3ffffff)); + r[2] = ((a[2] >> 1) | Q6_R_and_RR((a[3] << 25), 0x3ffffff)); + r[3] = ((a[3] >> 1) | Q6_R_and_RR((a[4] << 25), 0x3ffffff)); + r[4] = ((a[4] >> 1) | Q6_R_and_RR((a[5] << 25), 0x3ffffff)); + r[5] = ((a[5] >> 1) | Q6_R_and_RR((a[6] << 25), 0x3ffffff)); + r[6] = ((a[6] >> 1) | Q6_R_and_RR((a[7] << 25), 0x3ffffff)); + r[7] = ((a[7] >> 1) | Q6_R_and_RR((a[8] << 25), 0x3ffffff)); + r[8] = ((a[8] >> 1) | Q6_R_and_RR((a[9] << 25), 0x3ffffff)); +#endif + r[9] = a[9] >> 1; +} + + +/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) + * + * r Result of division by 2. + * a Number to divide. + * m Modulus (prime). + */ +static void sp_256_div2_10(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + sp_256_cond_add_10(r, a, m, 0 - (a[0] & 1)); + sp_256_norm_10(r); + sp_256_rshift1_10(r, r); +} + + +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static void sp_256_proj_point_dbl_10(sp_point* r, const sp_point* p, sp_digit* t) +{ + sp_point* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*10; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* When infinity don't double point passed in - constant time. */ + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point)); + x = rp[p->infinity]->x; + y = rp[p->infinity]->y; + z = rp[p->infinity]->z; + /* Put point to double into result - good for infinity. */ + if (r != p) { + for (i=0; i<10; i++) { + r->x[i] = p->x[i]; + } + for (i=0; i<10; i++) { + r->y[i] = p->y[i]; + } + for (i=0; i<10; i++) { + r->z[i] = p->z[i]; + } + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_256_mont_sqr_10(t1, z, p256_mod, p256_mp_mod); + /* Z = Y * Z */ + sp_256_mont_mul_10(z, y, z, p256_mod, p256_mp_mod); + /* Z = 2Z */ + sp_256_mont_dbl_10(z, z, p256_mod); + /* T2 = X - T1 */ + sp_256_mont_sub_10(t2, x, t1, p256_mod); + /* T1 = X + T1 */ + sp_256_mont_add_10(t1, x, t1, p256_mod); + /* T2 = T1 * T2 */ + sp_256_mont_mul_10(t2, t1, t2, p256_mod, p256_mp_mod); + /* T1 = 3T2 */ + sp_256_mont_tpl_10(t1, t2, p256_mod); + /* Y = 2Y */ + sp_256_mont_dbl_10(y, y, p256_mod); + /* Y = Y * Y */ + sp_256_mont_sqr_10(y, y, p256_mod, p256_mp_mod); + /* T2 = Y * Y */ + sp_256_mont_sqr_10(t2, y, p256_mod, p256_mp_mod); + /* T2 = T2/2 */ + sp_256_div2_10(t2, t2, p256_mod); + /* Y = Y * X */ + sp_256_mont_mul_10(y, y, x, p256_mod, p256_mp_mod); + /* X = T1 * T1 */ + sp_256_mont_mul_10(x, t1, t1, p256_mod, p256_mp_mod); + /* X = X - Y */ + sp_256_mont_sub_10(x, x, y, p256_mod); + /* X = X - Y */ + sp_256_mont_sub_10(x, x, y, p256_mod); + /* Y = Y - X */ + sp_256_mont_sub_10(y, y, x, p256_mod); + /* Y = Y * T1 */ + sp_256_mont_mul_10(y, y, t1, p256_mod, p256_mp_mod); + /* Y = Y - T2 */ + sp_256_mont_sub_10(y, y, t2, p256_mod); + +} + + +/* Compare two numbers to determine if they are equal. + * Constant time implementation. + * + * a First number to compare. + * b Second number to compare. + * returns 1 when equal and 0 otherwise. + */ +static int sp_256_cmp_equal_10(const sp_digit* a, const sp_digit* b) +{ + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) | + (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6]) | (a[7] ^ b[7]) | + (a[8] ^ b[8]) | (a[9] ^ b[9])) == 0; +} + +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_256_proj_point_add_10(sp_point* r, const sp_point* p, const sp_point* q, + sp_digit* t) +{ + const sp_point* ap[2]; + sp_point* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*10; + sp_digit* t3 = t + 4*10; + sp_digit* t4 = t + 6*10; + sp_digit* t5 = t + 8*10; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* Ensure only the first point is the same as the result. */ + if (q == r) { + const sp_point* a = p; + p = q; + q = a; + } + + /* Check double */ + (void)sp_256_sub_10(t1, p256_mod, q->y); + sp_256_norm_10(t1); + if ((sp_256_cmp_equal_10(p->x, q->x) & sp_256_cmp_equal_10(p->z, q->z) & + (sp_256_cmp_equal_10(p->y, q->y) | sp_256_cmp_equal_10(p->y, t1))) != 0) { + sp_256_proj_point_dbl_10(r, p, t); + } + else { + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point)); + x = rp[p->infinity | q->infinity]->x; + y = rp[p->infinity | q->infinity]->y; + z = rp[p->infinity | q->infinity]->z; + + ap[0] = p; + ap[1] = q; + for (i=0; i<10; i++) { + r->x[i] = ap[p->infinity]->x[i]; + } + for (i=0; i<10; i++) { + r->y[i] = ap[p->infinity]->y[i]; + } + for (i=0; i<10; i++) { + r->z[i] = ap[p->infinity]->z[i]; + } + r->infinity = ap[p->infinity]->infinity; + + /* U1 = X1*Z2^2 */ + sp_256_mont_sqr_10(t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(t3, t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(t1, t1, x, p256_mod, p256_mp_mod); + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_10(t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(t4, t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_10(t3, t3, y, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_10(t4, t4, q->y, p256_mod, p256_mp_mod); + /* H = U2 - U1 */ + sp_256_mont_sub_10(t2, t2, t1, p256_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_10(t4, t4, t3, p256_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_10(z, z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(z, z, t2, p256_mod, p256_mp_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_10(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sqr_10(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(t5, t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_sub_10(x, x, t5, p256_mod); + sp_256_mont_dbl_10(t1, y, p256_mod); + sp_256_mont_sub_10(x, x, t1, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_10(y, y, x, p256_mod); + sp_256_mont_mul_10(y, y, t4, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(t5, t5, t3, p256_mod, p256_mp_mod); + sp_256_mont_sub_10(y, y, t5, p256_mod); + } +} + +#ifdef WOLFSSL_SP_SMALL +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine co-ordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_10(sp_point* r, const sp_point* g, const sp_digit* k, + int map, void* heap) +{ + sp_point* td; + sp_point* t[3]; + sp_digit* tmp; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + + (void)heap; + + td = (sp_point*)XMALLOC(sizeof(sp_point) * 3, heap, DYNAMIC_TYPE_ECC); + if (td == NULL) + err = MEMORY_E; + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; + + if (err == MP_OKAY) { + XMEMSET(td, 0, sizeof(*td) * 3); + + t[0] = &td[0]; + t[1] = &td[1]; + t[2] = &td[2]; + + /* t[0] = {0, 0, 1} * norm */ + t[0]->infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + err = sp_256_mod_mul_norm_10(t[1]->x, g->x, p256_mod); + } + if (err == MP_OKAY) + err = sp_256_mod_mul_norm_10(t[1]->y, g->y, p256_mod); + if (err == MP_OKAY) + err = sp_256_mod_mul_norm_10(t[1]->z, g->z, p256_mod); + + if (err == MP_OKAY) { + i = 9; + c = 22; + n = k[i--] << (26 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) + break; + + n = k[i--]; + c = 26; + } + + y = (n >> 25) & 1; + n <<= 1; + + sp_256_proj_point_add_10(t[y^1], t[0], t[1], tmp); + + XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), + sizeof(sp_point)); + sp_256_proj_point_dbl_10(t[2], t[2], tmp); + XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), t[2], + sizeof(sp_point)); + } + + if (map != 0) { + sp_256_map_10(r, t[0], tmp); + } + else { + XMEMCPY(r, t[0], sizeof(sp_point)); + } + } + + if (tmp != NULL) { + XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 10 * 5); + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } + if (td != NULL) { + XMEMSET(td, 0, sizeof(sp_point) * 3); + XFREE(td, NULL, DYNAMIC_TYPE_ECC); + } + + return err; +} + +#elif defined(WOLFSSL_SP_CACHE_RESISTANT) +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine co-ordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_10(sp_point* r, const sp_point* g, const sp_digit* k, + int map, void* heap) +{ +#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK) + sp_point td[3]; + sp_digit tmpd[2 * 10 * 5]; +#endif + sp_point* t; + sp_digit* tmp; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + + (void)heap; + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + sp_point td[3]; + t = (sp_point*)XMALLOC(sizeof(*td) * 3, heap, DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; +#else + t = td; + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + t[0] = &td[0]; + t[1] = &td[1]; + t[2] = &td[2]; + + /* t[0] = {0, 0, 1} * norm */ + XMEMSET(&t[0], 0, sizeof(t[0])); + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + err = sp_256_mod_mul_norm_10(t[1].x, g->x, p256_mod); + } + if (err == MP_OKAY) + err = sp_256_mod_mul_norm_10(t[1].y, g->y, p256_mod); + if (err == MP_OKAY) + err = sp_256_mod_mul_norm_10(t[1].z, g->z, p256_mod); + + if (err == MP_OKAY) { + i = 9; + c = 22; + n = k[i--] << (26 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) + break; + + n = k[i--]; + c = 26; + } + + y = (n >> 25) & 1; + n <<= 1; + + sp_256_proj_point_add_10(&t[y^1], &t[0], &t[1], tmp); + + XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) + + ((size_t)&t[1] & addr_mask[y])), sizeof(t[2])); + sp_256_proj_point_dbl_10(&t[2], &t[2], tmp); + XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) + + ((size_t)&t[1] & addr_mask[y])), &t[2], sizeof(t[2])); + } + + if (map != 0) { + sp_256_map_10(r, &t[0], tmp); + } + else { + XMEMCPY(r, &t[0], sizeof(sp_point)); + } + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (tmp != NULL) { + XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 10 * 5); + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); + } + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_point) * 3); + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#else + ForceZero(tmpd, sizeof(tmpd)); + ForceZero(td, sizeof(td)); +#endif + + return err; +} + +#else +/* A table entry for pre-computed points. */ +typedef struct sp_table_entry { + sp_digit x[10] __attribute__((aligned(128))); + sp_digit y[10] __attribute__((aligned(128))); +} sp_table_entry; + +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine co-ordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_fast_10(sp_point* r, const sp_point* g, const sp_digit* k, + int map, void* heap) +{ +#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK) + sp_point td[16]; + sp_point rtd; + sp_digit tmpd[2 * 10 * 5]; +#endif + sp_point* t; + sp_point* rt; + sp_digit* tmp; + sp_digit n; + int i; + int c, y; + int err; + + (void)heap; + + err = sp_ecc_point_new(heap, rtd, rt); +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + t = (sp_point*)XMALLOC(sizeof(sp_point) * 16, heap, DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; +#else + t = td; + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + /* t[0] = {0, 0, 1} * norm */ + XMEMSET(&t[0], 0, sizeof(t[0])); + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + (void)sp_256_mod_mul_norm_10(t[1].x, g->x, p256_mod); + (void)sp_256_mod_mul_norm_10(t[1].y, g->y, p256_mod); + (void)sp_256_mod_mul_norm_10(t[1].z, g->z, p256_mod); + t[1].infinity = 0; + sp_256_proj_point_dbl_10(&t[ 2], &t[ 1], tmp); + t[ 2].infinity = 0; + sp_256_proj_point_add_10(&t[ 3], &t[ 2], &t[ 1], tmp); + t[ 3].infinity = 0; + sp_256_proj_point_dbl_10(&t[ 4], &t[ 2], tmp); + t[ 4].infinity = 0; + sp_256_proj_point_add_10(&t[ 5], &t[ 3], &t[ 2], tmp); + t[ 5].infinity = 0; + sp_256_proj_point_dbl_10(&t[ 6], &t[ 3], tmp); + t[ 6].infinity = 0; + sp_256_proj_point_add_10(&t[ 7], &t[ 4], &t[ 3], tmp); + t[ 7].infinity = 0; + sp_256_proj_point_dbl_10(&t[ 8], &t[ 4], tmp); + t[ 8].infinity = 0; + sp_256_proj_point_add_10(&t[ 9], &t[ 5], &t[ 4], tmp); + t[ 9].infinity = 0; + sp_256_proj_point_dbl_10(&t[10], &t[ 5], tmp); + t[10].infinity = 0; + sp_256_proj_point_add_10(&t[11], &t[ 6], &t[ 5], tmp); + t[11].infinity = 0; + sp_256_proj_point_dbl_10(&t[12], &t[ 6], tmp); + t[12].infinity = 0; + sp_256_proj_point_add_10(&t[13], &t[ 7], &t[ 6], tmp); + t[13].infinity = 0; + sp_256_proj_point_dbl_10(&t[14], &t[ 7], tmp); + t[14].infinity = 0; + sp_256_proj_point_add_10(&t[15], &t[ 8], &t[ 7], tmp); + t[15].infinity = 0; + + i = 8; + n = k[i+1] << 6; + c = 18; + y = n >> 24; + XMEMCPY(rt, &t[y], sizeof(sp_point)); + n <<= 8; + for (; i>=0 || c>=4; ) { + if (c < 4) { + n |= k[i--] << (6 - c); + c += 26; + } + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + + sp_256_proj_point_dbl_10(rt, rt, tmp); + sp_256_proj_point_dbl_10(rt, rt, tmp); + sp_256_proj_point_dbl_10(rt, rt, tmp); + sp_256_proj_point_dbl_10(rt, rt, tmp); + + sp_256_proj_point_add_10(rt, rt, &t[y], tmp); + } + + if (map != 0) { + sp_256_map_10(r, rt, tmp); + } + else { + XMEMCPY(r, rt, sizeof(sp_point)); + } + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (tmp != NULL) { + XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 10 * 5); + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); + } + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_point) * 16); + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#else + ForceZero(tmpd, sizeof(tmpd)); + ForceZero(td, sizeof(td)); +#endif + sp_ecc_point_free(rt, 1, heap); + + return err; +} + +#ifdef FP_ECC +/* Double the Montgomery form projective point p a number of times. + * + * r Result of repeated doubling of point. + * p Point to double. + * n Number of times to double + * t Temporary ordinate data. + */ +static void sp_256_proj_point_dbl_n_10(sp_point* r, const sp_point* p, int n, + sp_digit* t) +{ + sp_point* rp[2]; + sp_digit* w = t; + sp_digit* a = t + 2*10; + sp_digit* b = t + 4*10; + sp_digit* t1 = t + 6*10; + sp_digit* t2 = t + 8*10; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point)); + x = rp[p->infinity]->x; + y = rp[p->infinity]->y; + z = rp[p->infinity]->z; + if (r != p) { + for (i=0; i<10; i++) { + r->x[i] = p->x[i]; + } + for (i=0; i<10; i++) { + r->y[i] = p->y[i]; + } + for (i=0; i<10; i++) { + r->z[i] = p->z[i]; + } + r->infinity = p->infinity; + } + + /* Y = 2*Y */ + sp_256_mont_dbl_10(y, y, p256_mod); + /* W = Z^4 */ + sp_256_mont_sqr_10(w, z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_10(w, w, p256_mod, p256_mp_mod); + while (n-- > 0) { + /* A = 3*(X^2 - W) */ + sp_256_mont_sqr_10(t1, x, p256_mod, p256_mp_mod); + sp_256_mont_sub_10(t1, t1, w, p256_mod); + sp_256_mont_tpl_10(a, t1, p256_mod); + /* B = X*Y^2 */ + sp_256_mont_sqr_10(t2, y, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(b, t2, x, p256_mod, p256_mp_mod); + /* X = A^2 - 2B */ + sp_256_mont_sqr_10(x, a, p256_mod, p256_mp_mod); + sp_256_mont_dbl_10(t1, b, p256_mod); + sp_256_mont_sub_10(x, x, t1, p256_mod); + /* Z = Z*Y */ + sp_256_mont_mul_10(z, z, y, p256_mod, p256_mp_mod); + /* t2 = Y^4 */ + sp_256_mont_sqr_10(t2, t2, p256_mod, p256_mp_mod); + if (n != 0) { + /* W = W*Y^4 */ + sp_256_mont_mul_10(w, w, t2, p256_mod, p256_mp_mod); + } + /* y = 2*A*(B - X) - Y^4 */ + sp_256_mont_sub_10(y, b, x, p256_mod); + sp_256_mont_mul_10(y, y, a, p256_mod, p256_mp_mod); + sp_256_mont_dbl_10(y, y, p256_mod); + sp_256_mont_sub_10(y, y, t2, p256_mod); + } + /* Y = Y/2 */ + sp_256_div2_10(y, y, p256_mod); +} + +#endif /* FP_ECC */ + + +/* Add two Montgomery form projective points. The second point has a q value of + * one. + * Only the first point can be the same pointer as the result point. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_256_proj_point_add_qz1_10(sp_point* r, const sp_point* p, + const sp_point* q, sp_digit* t) +{ + const sp_point* ap[2]; + sp_point* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*10; + sp_digit* t3 = t + 4*10; + sp_digit* t4 = t + 6*10; + sp_digit* t5 = t + 8*10; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* Check double */ + (void)sp_256_sub_10(t1, p256_mod, q->y); + sp_256_norm_10(t1); + if ((sp_256_cmp_equal_10(p->x, q->x) & sp_256_cmp_equal_10(p->z, q->z) & + (sp_256_cmp_equal_10(p->y, q->y) | sp_256_cmp_equal_10(p->y, t1))) != 0) { + sp_256_proj_point_dbl_10(r, p, t); + } + else { + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point)); + x = rp[p->infinity | q->infinity]->x; + y = rp[p->infinity | q->infinity]->y; + z = rp[p->infinity | q->infinity]->z; + + ap[0] = p; + ap[1] = q; + for (i=0; i<10; i++) { + r->x[i] = ap[p->infinity]->x[i]; + } + for (i=0; i<10; i++) { + r->y[i] = ap[p->infinity]->y[i]; + } + for (i=0; i<10; i++) { + r->z[i] = ap[p->infinity]->z[i]; + } + r->infinity = ap[p->infinity]->infinity; + + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_10(t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(t4, t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_10(t4, t4, q->y, p256_mod, p256_mp_mod); + /* H = U2 - X1 */ + sp_256_mont_sub_10(t2, t2, x, p256_mod); + /* R = S2 - Y1 */ + sp_256_mont_sub_10(t4, t4, y, p256_mod); + /* Z3 = H*Z1 */ + sp_256_mont_mul_10(z, z, t2, p256_mod, p256_mp_mod); + /* X3 = R^2 - H^3 - 2*X1*H^2 */ + sp_256_mont_sqr_10(t1, t4, p256_mod, p256_mp_mod); + sp_256_mont_sqr_10(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(t3, x, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(t5, t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_sub_10(x, t1, t5, p256_mod); + sp_256_mont_dbl_10(t1, t3, p256_mod); + sp_256_mont_sub_10(x, x, t1, p256_mod); + /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */ + sp_256_mont_sub_10(t3, t3, x, p256_mod); + sp_256_mont_mul_10(t3, t3, t4, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(t5, t5, y, p256_mod, p256_mp_mod); + sp_256_mont_sub_10(y, t3, t5, p256_mod); + } +} + +#ifdef FP_ECC +/* Convert the projective point to affine. + * Ordinates are in Montgomery form. + * + * a Point to convert. + * t Temporary data. + */ +static void sp_256_proj_to_affine_10(sp_point* a, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2 * 10; + sp_digit* tmp = t + 4 * 10; + + sp_256_mont_inv_10(t1, a->z, tmp); + + sp_256_mont_sqr_10(t2, t1, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(t1, t2, t1, p256_mod, p256_mp_mod); + + sp_256_mont_mul_10(a->x, a->x, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(a->y, a->y, t1, p256_mod, p256_mp_mod); + XMEMCPY(a->z, p256_norm_mod, sizeof(p256_norm_mod)); +} + + +/* Generate the pre-computed table of points for the base point. + * + * a The base point. + * table Place to store generated point data. + * tmp Temporary data. + * heap Heap to use for allocation. + */ +static int sp_256_gen_stripe_table_10(const sp_point* a, + sp_table_entry* table, sp_digit* tmp, void* heap) +{ +#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK) + sp_point td, s1d, s2d; +#endif + sp_point* t; + sp_point* s1 = NULL; + sp_point* s2 = NULL; + int i, j; + int err; + + (void)heap; + + err = sp_ecc_point_new(heap, td, t); + if (err == MP_OKAY) { + err = sp_ecc_point_new(heap, s1d, s1); + } + if (err == MP_OKAY) { + err = sp_ecc_point_new(heap, s2d, s2); + } + + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_10(t->x, a->x, p256_mod); + } + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_10(t->y, a->y, p256_mod); + } + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_10(t->z, a->z, p256_mod); + } + if (err == MP_OKAY) { + t->infinity = 0; + sp_256_proj_to_affine_10(t, tmp); + + XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod)); + s1->infinity = 0; + XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod)); + s2->infinity = 0; + + /* table[0] = {0, 0, infinity} */ + XMEMSET(&table[0], 0, sizeof(sp_table_entry)); + /* table[1] = Affine version of 'a' in Montgomery form */ + XMEMCPY(table[1].x, t->x, sizeof(table->x)); + XMEMCPY(table[1].y, t->y, sizeof(table->y)); + + for (i=1; i<8; i++) { + sp_256_proj_point_dbl_n_10(t, t, 32, tmp); + sp_256_proj_to_affine_10(t, tmp); + XMEMCPY(table[1<x, sizeof(table->x)); + XMEMCPY(table[1<y, sizeof(table->y)); + } + + for (i=1; i<8; i++) { + XMEMCPY(s1->x, table[1<x)); + XMEMCPY(s1->y, table[1<y)); + for (j=(1<x, table[j-(1<x)); + XMEMCPY(s2->y, table[j-(1<y)); + sp_256_proj_point_add_qz1_10(t, s1, s2, tmp); + sp_256_proj_to_affine_10(t, tmp); + XMEMCPY(table[j].x, t->x, sizeof(table->x)); + XMEMCPY(table[j].y, t->y, sizeof(table->y)); + } + } + } + + sp_ecc_point_free(s2, 0, heap); + sp_ecc_point_free(s1, 0, heap); + sp_ecc_point_free( t, 0, heap); + + return err; +} + +#endif /* FP_ECC */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine co-ordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_stripe_10(sp_point* r, const sp_point* g, + const sp_table_entry* table, const sp_digit* k, int map, void* heap) +{ +#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK) + sp_point rtd; + sp_point pd; + sp_digit td[2 * 10 * 5]; +#endif + sp_point* rt; + sp_point* p = NULL; + sp_digit* t; + int i, j; + int y, x; + int err; + + (void)g; + (void)heap; + + err = sp_ecc_point_new(heap, rtd, rt); + if (err == MP_OKAY) { + err = sp_ecc_point_new(heap, pd, p); + } +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) { + err = MEMORY_E; + } +#else + t = td; +#endif + + if (err == MP_OKAY) { + XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod)); + XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod)); + + y = 0; + for (j=0,x=31; j<8; j++,x+=32) { + y |= ((k[x / 26] >> (x % 26)) & 1) << j; + } + XMEMCPY(rt->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(rt->y, table[y].y, sizeof(table[y].y)); + rt->infinity = !y; + for (i=30; i>=0; i--) { + y = 0; + for (j=0,x=i; j<8; j++,x+=32) { + y |= ((k[x / 26] >> (x % 26)) & 1) << j; + } + + sp_256_proj_point_dbl_10(rt, rt, t); + XMEMCPY(p->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(p->y, table[y].y, sizeof(table[y].y)); + p->infinity = !y; + sp_256_proj_point_add_qz1_10(rt, rt, p, t); + } + + if (map != 0) { + sp_256_map_10(r, rt, t); + } + else { + XMEMCPY(r, rt, sizeof(sp_point)); + } + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (t != NULL) { + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_ecc_point_free(p, 0, heap); + sp_ecc_point_free(rt, 0, heap); + + return err; +} + +#ifdef FP_ECC +#ifndef FP_ENTRIES + #define FP_ENTRIES 16 +#endif + +typedef struct sp_cache_t { + sp_digit x[10] __attribute__((aligned(128))); + sp_digit y[10] __attribute__((aligned(128))); + sp_table_entry table[256] __attribute__((aligned(128))); + uint32_t cnt; + int set; +} sp_cache_t; + +static THREAD_LS_T sp_cache_t sp_cache[FP_ENTRIES]; +static THREAD_LS_T int sp_cache_last = -1; +static THREAD_LS_T int sp_cache_inited = 0; + +#ifndef HAVE_THREAD_LS + static volatile int initCacheMutex = 0; + static wolfSSL_Mutex sp_cache_lock; +#endif + +static void sp_ecc_get_cache(const sp_point* g, sp_cache_t** cache) +{ + int i, j; + uint32_t least; + + if (sp_cache_inited == 0) { + for (i=0; ix, sp_cache[i].x) & + sp_256_cmp_equal_10(g->y, sp_cache[i].y)) { + sp_cache[i].cnt++; + break; + } + } + + /* No match. */ + if (i == FP_ENTRIES) { + /* Find empty entry. */ + i = (sp_cache_last + 1) % FP_ENTRIES; + for (; i != sp_cache_last; i=(i+1)%FP_ENTRIES) { + if (!sp_cache[i].set) { + break; + } + } + + /* Evict least used. */ + if (i == sp_cache_last) { + least = sp_cache[0].cnt; + for (j=1; jx, sizeof(sp_cache[i].x)); + XMEMCPY(sp_cache[i].y, g->y, sizeof(sp_cache[i].y)); + sp_cache[i].set = 1; + sp_cache[i].cnt = 1; + } + + *cache = &sp_cache[i]; + sp_cache_last = i; +} +#endif /* FP_ECC */ + +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine co-ordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_10(sp_point* r, const sp_point* g, const sp_digit* k, + int map, void* heap) +{ +#ifndef FP_ECC + return sp_256_ecc_mulmod_fast_10(r, g, k, map, heap); +#else + sp_digit tmp[2 * 10 * 5]; + sp_cache_t* cache; + int err = MP_OKAY; + +#ifndef HAVE_THREAD_LS + if (initCacheMutex == 0) { + wc_InitMutex(&sp_cache_lock); + initCacheMutex = 1; + } + if (wc_LockMutex(&sp_cache_lock) != 0) + err = BAD_MUTEX_E; +#endif /* HAVE_THREAD_LS */ + + if (err == MP_OKAY) { + sp_ecc_get_cache(g, &cache); + if (cache->cnt == 2) + sp_256_gen_stripe_table_10(g, cache->table, tmp, heap); + +#ifndef HAVE_THREAD_LS + wc_UnLockMutex(&sp_cache_lock); +#endif /* HAVE_THREAD_LS */ + + if (cache->cnt < 2) { + err = sp_256_ecc_mulmod_fast_10(r, g, k, map, heap); + } + else { + err = sp_256_ecc_mulmod_stripe_10(r, g, cache->table, k, + map, heap); + } + } + + return err; +#endif +} + +#endif + +#ifdef WOLFSSL_SP_SMALL +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine co-ordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_base_10(sp_point* r, const sp_digit* k, + int map, void* heap) +{ + /* No pre-computed values. */ + return sp_256_ecc_mulmod_10(r, &p256_base, k, map, heap); +} + +#else +static const sp_table_entry p256_table[256] = { + /* 0 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 */ + { { 0x0a9143c,0x1cc3506,0x360179e,0x3f17fb6,0x075ba95,0x1d88944, + 0x3b732b7,0x15719e7,0x376a537,0x0062417 }, + { 0x295560a,0x094d5f3,0x245cddf,0x392e867,0x18b4ab8,0x3487cc9, + 0x288688d,0x176174b,0x3182588,0x0215c7f } }, + /* 2 */ + { { 0x147519a,0x2218090,0x32f0202,0x2b09acd,0x0d0981e,0x1e17af2, + 0x14a7caa,0x163a6a7,0x10ddbdf,0x03654f1 }, + { 0x1590f8f,0x0d8733f,0x09179d6,0x1ad139b,0x372e962,0x0bad933, + 0x1961102,0x223cdff,0x37e9eb2,0x0218fae } }, + /* 3 */ + { { 0x0db6485,0x1ad88d7,0x2f97785,0x288bc28,0x3808f0e,0x3df8c02, + 0x28d9544,0x20280f9,0x055b5ff,0x00001d8 }, + { 0x38d2010,0x13ae6e0,0x308a763,0x2ecc90d,0x254014f,0x10a9981, + 0x247d398,0x0fb8383,0x3613437,0x020c21d } }, + /* 4 */ + { { 0x2a0d2bb,0x08bf145,0x34994f9,0x1b06988,0x30d5cc1,0x1f18b22, + 0x01cf3a5,0x199fe49,0x161fd1b,0x00bd79a }, + { 0x1a01797,0x171c2fd,0x21925c1,0x1358255,0x23d20b4,0x1c7f6d4, + 0x111b370,0x03dec12,0x1168d6f,0x03d923e } }, + /* 5 */ + { { 0x137bbbc,0x19a11f8,0x0bec9e5,0x27a29a8,0x3e43446,0x275cd18, + 0x0427617,0x00056c7,0x285133d,0x016af80 }, + { 0x04c7dab,0x2a0df30,0x0c0792a,0x1310c98,0x3573d9f,0x239b30d, + 0x1315627,0x1ce0c32,0x25b6b6f,0x0252edc } }, + /* 6 */ + { { 0x20f141c,0x26d23dc,0x3c74bbf,0x334b7d6,0x06199b3,0x0441171, + 0x3f61294,0x313bf70,0x3cb2f7d,0x03375ae }, + { 0x2f436fd,0x19c02fa,0x26becca,0x1b6e64c,0x26f647f,0x053c948, + 0x0fa7920,0x397d830,0x2bd4bda,0x028d86f } }, + /* 7 */ + { { 0x17c13c7,0x2895616,0x03e128a,0x17d42df,0x1c38d63,0x0f02747, + 0x039aecf,0x0a4b01c,0x209c4b5,0x02e84b2 }, + { 0x1f91dfd,0x023e916,0x07fb9e4,0x19b3ba8,0x13af43b,0x35e02ca, + 0x0eb0899,0x3bd2c7b,0x19d701f,0x014faee } }, + /* 8 */ + { { 0x0e63d34,0x1fb8c6c,0x0fab4fe,0x1caa795,0x0f46005,0x179ed69, + 0x093334d,0x120c701,0x39206d5,0x021627e }, + { 0x183553a,0x03d7319,0x09e5aa7,0x12b8959,0x2087909,0x0011194, + 0x1045071,0x0713f32,0x16d0254,0x03aec1a } }, + /* 9 */ + { { 0x01647c5,0x1b2856b,0x1799461,0x11f133d,0x0b8127d,0x1937eeb, + 0x266aa37,0x1f68f71,0x0cbd1b2,0x03aca08 }, + { 0x287e008,0x1be361a,0x38f3940,0x276488d,0x2d87dfa,0x0333b2c, + 0x2d2e428,0x368755b,0x09b55a7,0x007ca0a } }, + /* 10 */ + { { 0x389da99,0x2a8300e,0x0022abb,0x27ae0a1,0x0a6f2d7,0x207017a, + 0x047862b,0x1358c9e,0x35905e5,0x00cde92 }, + { 0x1f7794a,0x1d40348,0x3f613c6,0x2ddf5b5,0x0207005,0x133f5ba, + 0x1a37810,0x3ef5829,0x0d5f4c2,0x0035978 } }, + /* 11 */ + { { 0x1275d38,0x026efad,0x2358d9d,0x1142f82,0x14268a7,0x1cfac99, + 0x362ff49,0x288cbc1,0x24252f4,0x0308f68 }, + { 0x394520c,0x06e13c2,0x178e5da,0x18ec16f,0x1096667,0x134a7a8, + 0x0dcb869,0x33fc4e9,0x38cc790,0x006778e } }, + /* 12 */ + { { 0x2c5fe04,0x29c5b09,0x1bdb183,0x02ceee8,0x03b28de,0x132dc4b, + 0x32c586a,0x32ff5d0,0x3d491fc,0x038d372 }, + { 0x2a58403,0x2351aea,0x3a53b40,0x21a0ba5,0x39a6974,0x1aaaa2b, + 0x3901273,0x03dfe78,0x3447b4e,0x039d907 } }, + /* 13 */ + { { 0x364ba59,0x14e5077,0x02fc7d7,0x3b02c09,0x1d33f10,0x0560616, + 0x06dfc6a,0x15efd3c,0x357052a,0x01284b7 }, + { 0x039dbd0,0x18ce3e5,0x3e1fbfa,0x352f794,0x0d3c24b,0x07c6cc5, + 0x1e4ffa2,0x3a91bf5,0x293bb5b,0x01abd6a } }, + /* 14 */ + { { 0x0c91999,0x02da644,0x0491da1,0x100a960,0x00a24b4,0x2330824, + 0x0094b4b,0x1004cf8,0x35a66a4,0x017f8d1 }, + { 0x13e7b4b,0x232af7e,0x391ab0f,0x069f08f,0x3292b50,0x3479898, + 0x2889aec,0x2a4590b,0x308ecfe,0x02d5138 } }, + /* 15 */ + { { 0x2ddfdce,0x231ba45,0x39e6647,0x19be245,0x12c3291,0x35399f8, + 0x0d6e764,0x3082d3a,0x2bda6b0,0x0382dac }, + { 0x37efb57,0x04b7cae,0x00070d3,0x379e431,0x01aac0d,0x1e6f251, + 0x0336ad6,0x0ddd3e4,0x3de25a6,0x01c7008 } }, + /* 16 */ + { { 0x3e20925,0x230912f,0x286762a,0x30e3f73,0x391c19a,0x34e1c18, + 0x16a5d5d,0x093d96a,0x3d421d3,0x0187561 }, + { 0x37173ea,0x19ce8a8,0x0b65e87,0x0214dde,0x2238480,0x16ead0f, + 0x38441e0,0x3bef843,0x2124621,0x03e847f } }, + /* 17 */ + { { 0x0b19ffd,0x247cacb,0x3c231c8,0x16ec648,0x201ba8d,0x2b172a3, + 0x103d678,0x2fb72db,0x04c1f13,0x0161bac }, + { 0x3e8ed09,0x171b949,0x2de20c3,0x0f06067,0x21e81a3,0x1b194be, + 0x0fd6c05,0x13c449e,0x0087086,0x006756b } }, + /* 18 */ + { { 0x09a4e1f,0x27d604c,0x00741e9,0x06fa49c,0x0ab7de7,0x3f4a348, + 0x25ef0be,0x158fc9a,0x33f7f9c,0x039f001 }, + { 0x2f59f76,0x3598e83,0x30501f6,0x15083f2,0x0669b3b,0x29980b5, + 0x0c1f7a7,0x0f02b02,0x0fec65b,0x0382141 } }, + /* 19 */ + { { 0x031b3ca,0x23da368,0x2d66f09,0x27b9b69,0x06d1cab,0x13c91ba, + 0x3d81fa9,0x25ad16f,0x0825b09,0x01e3c06 }, + { 0x225787f,0x3bf790e,0x2c9bb7e,0x0347732,0x28016f8,0x0d6ff0d, + 0x2a4877b,0x1d1e833,0x3b87e94,0x010e9dc } }, + /* 20 */ + { { 0x2b533d5,0x1ddcd34,0x1dc0625,0x3da86f7,0x3673b8a,0x1e7b0a4, + 0x3e7c9aa,0x19ac55d,0x251c3b2,0x02edb79 }, + { 0x25259b3,0x24c0ead,0x3480e7e,0x34f40e9,0x3d6a0af,0x2cf3f09, + 0x2c83d19,0x2e66f16,0x19a5d18,0x0182d18 } }, + /* 21 */ + { { 0x2e5aa1c,0x28e3846,0x3658bd6,0x0ad279c,0x1b8b765,0x397e1fb, + 0x130014e,0x3ff342c,0x3b2aeeb,0x02743c9 }, + { 0x2730a55,0x0918c5e,0x083aca9,0x0bf76ef,0x19c955b,0x300669c, + 0x01dfe0a,0x312341f,0x26d356e,0x0091295 } }, + /* 22 */ + { { 0x2cf1f96,0x00e52ba,0x271c6db,0x2a40930,0x19f2122,0x0b2f4ee, + 0x26ac1b8,0x3bda498,0x0873581,0x0117963 }, + { 0x38f9dbc,0x3d1e768,0x2040d3f,0x11ba222,0x3a8aaf1,0x1b82fb5, + 0x1adfb24,0x2de9251,0x21cc1e4,0x0301038 } }, + /* 23 */ + { { 0x38117b6,0x2bc001b,0x1433847,0x3fdce8d,0x3651969,0x3651d7a, + 0x2b35761,0x1bb1d20,0x097682c,0x00737d7 }, + { 0x1f04839,0x1dd6d04,0x16987db,0x3d12378,0x17dbeac,0x1c2cc86, + 0x121dd1b,0x3fcf6ca,0x1f8a92d,0x00119d5 } }, + /* 24 */ + { { 0x0e8ffcd,0x2b174af,0x1a82cc8,0x22cbf98,0x30d53c4,0x080b5b1, + 0x3161727,0x297cfdb,0x2113b83,0x0011b97 }, + { 0x0007f01,0x23fd936,0x3183e7b,0x0496bd0,0x07fb1ef,0x178680f, + 0x1c5ea63,0x0016c11,0x2c3303d,0x01b8041 } }, + /* 25 */ + { { 0x0dd73b1,0x1cd6122,0x10d948c,0x23e657b,0x3767070,0x15a8aad, + 0x385ea8c,0x33c7ce0,0x0ede901,0x0110965 }, + { 0x2d4b65b,0x2a8b244,0x0c37f8f,0x0ee5b24,0x394c234,0x3a5e347, + 0x26e4a15,0x39a3b4c,0x2514c2e,0x029e5be } }, + /* 26 */ + { { 0x23addd7,0x3ed8120,0x13b3359,0x20f959a,0x09e2a61,0x32fcf20, + 0x05b78e3,0x19ba7e2,0x1a9c697,0x0392b4b }, + { 0x2048a61,0x3dfd0a3,0x19a0357,0x233024b,0x3082d19,0x00fb63b, + 0x3a1af4c,0x1450ff0,0x046c37b,0x0317a50 } }, + /* 27 */ + { { 0x3e75f9e,0x294e30a,0x3a78476,0x3a32c48,0x36fd1a9,0x0427012, + 0x1e4df0b,0x11d1f61,0x1afdb46,0x018ca0f }, + { 0x2f2df15,0x0a33dee,0x27f4ce7,0x1542b66,0x3e592c4,0x20d2f30, + 0x3226ade,0x2a4e3ea,0x1ab1981,0x01a2f46 } }, + /* 28 */ + { { 0x087d659,0x3ab5446,0x305ac08,0x3d2cd64,0x33374d5,0x3f9d3f8, + 0x186981c,0x37f5a5a,0x2f53c6f,0x01254a4 }, + { 0x2cec896,0x1e32786,0x04844a8,0x043b16d,0x3d964b2,0x1935829, + 0x16f7e26,0x1a0dd9a,0x30d2603,0x003b1d4 } }, + /* 29 */ + { { 0x12687bb,0x04e816b,0x21fa2da,0x1abccb8,0x3a1f83b,0x375181e, + 0x0f5ef51,0x0fc2ce4,0x3a66486,0x003d881 }, + { 0x3138233,0x1f8eec3,0x2718bd6,0x1b09caa,0x2dd66b9,0x1bb222b, + 0x1004072,0x1b73e3b,0x07208ed,0x03fc36c } }, + /* 30 */ + { { 0x095d553,0x3e84053,0x0a8a749,0x3f575a0,0x3a44052,0x3ced59b, + 0x3b4317f,0x03a8c60,0x13c8874,0x00c4ed4 }, + { 0x0d11549,0x0b8ab02,0x221cb40,0x02ed37b,0x2071ee1,0x1fc8c83, + 0x3987dd4,0x27e049a,0x0f986f1,0x00b4eaf } }, + /* 31 */ + { { 0x15581a2,0x2214060,0x11af4c2,0x1598c88,0x19a0a6d,0x32acba6, + 0x3a7a0f0,0x2337c66,0x210ded9,0x0300dbe }, + { 0x1fbd009,0x3822eb0,0x181629a,0x2401b45,0x30b68b1,0x2e78363, + 0x2b32779,0x006530b,0x2c4b6d4,0x029aca8 } }, + /* 32 */ + { { 0x13549cf,0x0f943db,0x265ed43,0x1bfeb35,0x06f3369,0x3847f2d, + 0x1bfdacc,0x26181a5,0x252af7c,0x02043b8 }, + { 0x159bb2c,0x143f85c,0x357b654,0x2f9d62c,0x2f7dfbe,0x1a7fa9c, + 0x057e74d,0x05d14ac,0x17a9273,0x035215c } }, + /* 33 */ + { { 0x0cb5a98,0x106a2bc,0x10bf117,0x24c7cc4,0x3d3da8f,0x2ce0ab7, + 0x14e2cba,0x1813866,0x1a72f9a,0x01a9811 }, + { 0x2b2411d,0x3034fe8,0x16e0170,0x0f9443a,0x0be0eb8,0x2196cf3, + 0x0c9f738,0x15e40ef,0x0faf9e1,0x034f917 } }, + /* 34 */ + { { 0x03f7669,0x3da6efa,0x3d6bce1,0x209ca1d,0x109f8ae,0x09109e3, + 0x08ae543,0x3067255,0x1dee3c2,0x0081dd5 }, + { 0x3ef1945,0x358765b,0x28c387b,0x3bec4b4,0x218813c,0x0b7d92a, + 0x3cd1d67,0x2c0367e,0x2e57154,0x0123717 } }, + /* 35 */ + { { 0x3e5a199,0x1e42ffd,0x0bb7123,0x33e6273,0x1e0efb8,0x294671e, + 0x3a2bfe0,0x3d11709,0x2eddff6,0x03cbec2 }, + { 0x0b5025f,0x0255d7c,0x1f2241c,0x35d03ea,0x0550543,0x202fef4, + 0x23c8ad3,0x354963e,0x015db28,0x0284fa4 } }, + /* 36 */ + { { 0x2b65cbc,0x1e8d428,0x0226f9f,0x1c8a919,0x10b04b9,0x08fc1e8, + 0x1ce241e,0x149bc99,0x2b01497,0x00afc35 }, + { 0x3216fb7,0x1374fd2,0x226ad3d,0x19fef76,0x0f7d7b8,0x1c21417, + 0x37b83f6,0x3a27eba,0x25a162f,0x010aa52 } }, + /* 37 */ + { { 0x2adf191,0x1ab42fa,0x28d7584,0x2409689,0x20f8a48,0x253707d, + 0x2030504,0x378f7a1,0x169c65e,0x00b0b76 }, + { 0x3849c17,0x085c764,0x10dd6d0,0x2e87689,0x1460488,0x30e9521, + 0x10c7063,0x1b6f120,0x21f42c5,0x03d0dfe } }, + /* 38 */ + { { 0x20f7dab,0x035c512,0x29ac6aa,0x24c5ddb,0x20f0497,0x17ce5e1, + 0x00a050f,0x1eaa14b,0x3335470,0x02abd16 }, + { 0x18d364a,0x0df0cf0,0x316585e,0x018f925,0x0d40b9b,0x17b1511, + 0x1716811,0x1caf3d0,0x10df4f2,0x0337d8c } }, + /* 39 */ + { { 0x2a8b7ef,0x0f188e3,0x2287747,0x06216f0,0x008e935,0x2f6a38d, + 0x1567722,0x0bfc906,0x0bada9e,0x03c3402 }, + { 0x014d3b1,0x099c749,0x2a76291,0x216c067,0x3b37549,0x14ef2f6, + 0x21b96d4,0x1ee2d71,0x2f5ca88,0x016f570 } }, + /* 40 */ + { { 0x09a3154,0x3d1a7bd,0x2e9aef0,0x255b8ac,0x03e85a5,0x2a492a7, + 0x2aec1ea,0x11c6516,0x3c8a09e,0x02a84b7 }, + { 0x1f69f1d,0x09c89d3,0x1e7326f,0x0b28bfd,0x0e0e4c8,0x1ea7751, + 0x18ce73b,0x2a406e7,0x273e48c,0x01b00db } }, + /* 41 */ + { { 0x36e3138,0x2b84a83,0x345a5cf,0x00096b4,0x16966ef,0x159caf1, + 0x13c64b4,0x2f89226,0x25896af,0x00a4bfd }, + { 0x2213402,0x1435117,0x09fed52,0x09d0e4b,0x0f6580e,0x2871cba, + 0x3b397fd,0x1c9d825,0x090311b,0x0191383 } }, + /* 42 */ + { { 0x07153f0,0x1087869,0x18c9e1e,0x1e64810,0x2b86c3b,0x0175d9c, + 0x3dce877,0x269de4e,0x393cab7,0x03c96b9 }, + { 0x1869d0c,0x06528db,0x02641f3,0x209261b,0x29d55c8,0x25ba517, + 0x3b5ea30,0x028f927,0x25313db,0x00e6e39 } }, + /* 43 */ + { { 0x2fd2e59,0x150802d,0x098f377,0x19a4957,0x135e2c0,0x38a95ce, + 0x1ab21a0,0x36c1b67,0x32f0f19,0x00e448b }, + { 0x3cad53c,0x3387800,0x17e3cfb,0x03f9970,0x3225b2c,0x2a84e1d, + 0x3af1d29,0x3fe35ca,0x2f8ce80,0x0237a02 } }, + /* 44 */ + { { 0x07bbb76,0x3aa3648,0x2758afb,0x1f085e0,0x1921c7e,0x3010dac, + 0x22b74b1,0x230137e,0x1062e36,0x021c652 }, + { 0x3993df5,0x24a2ee8,0x126ab5f,0x2d7cecf,0x0639d75,0x16d5414, + 0x1aa78a8,0x3f78404,0x26a5b74,0x03f0c57 } }, + /* 45 */ + { { 0x0d6ecfa,0x3f506ba,0x3f86561,0x3d86bb1,0x15f8c44,0x2491d07, + 0x052a7b4,0x2422261,0x3adee38,0x039b529 }, + { 0x193c75d,0x14bb451,0x1162605,0x293749c,0x370a70d,0x2e8b1f6, + 0x2ede937,0x2b95f4a,0x39a9be2,0x00d77eb } }, + /* 46 */ + { { 0x2736636,0x15bf36a,0x2b7e6b9,0x25eb8b2,0x209f51d,0x3cd2659, + 0x10bf410,0x034afec,0x3d71c83,0x0076971 }, + { 0x0ce6825,0x07920cf,0x3c3b5c4,0x23fe55c,0x015ad11,0x08c0dae, + 0x0552c7f,0x2e75a8a,0x0fddbf4,0x01c1df0 } }, + /* 47 */ + { { 0x2b9661c,0x0ffe351,0x3d71bf6,0x1ac34b3,0x3a1dfd3,0x211fe3d, + 0x33e140a,0x3f9100d,0x32ee50e,0x014ea18 }, + { 0x16d8051,0x1bfda1a,0x068a097,0x2571d3d,0x1daec0c,0x39389af, + 0x194dc35,0x3f3058a,0x36d34e1,0x000a329 } }, + /* 48 */ + { { 0x09877ee,0x351f73f,0x0002d11,0x0420074,0x2c8b362,0x130982d, + 0x02c1175,0x3c11b40,0x0d86962,0x001305f }, + { 0x0daddf5,0x2f4252c,0x15c06d9,0x1d49339,0x1bea235,0x0b680ed, + 0x3356e67,0x1d1d198,0x1e9fed9,0x03dee93 } }, + /* 49 */ + { { 0x3e1263f,0x2fe8d3a,0x3ce6d0d,0x0d5c6b9,0x3557637,0x0a9bd48, + 0x0405538,0x0710749,0x2005213,0x038c7e5 }, + { 0x26b6ec6,0x2e485ba,0x3c44d1b,0x0b9cf0b,0x037a1d1,0x27428a5, + 0x0e7eac8,0x351ef04,0x259ce34,0x02a8e98 } }, + /* 50 */ + { { 0x2f3dcd3,0x3e77d4d,0x3360fbc,0x1434afd,0x36ceded,0x3d413d6, + 0x1710fad,0x36bb924,0x1627e79,0x008e637 }, + { 0x109569e,0x1c168db,0x3769cf4,0x2ed4527,0x0ea0619,0x17d80d3, + 0x1c03773,0x18843fe,0x1b21c04,0x015c5fd } }, + /* 51 */ + { { 0x1dd895e,0x08a7248,0x04519fe,0x001030a,0x18e5185,0x358dfb3, + 0x13d2391,0x0a37be8,0x0560e3c,0x019828b }, + { 0x27fcbd0,0x2a22bb5,0x30969cc,0x1e03aa7,0x1c84724,0x0ba4ad3, + 0x32f4817,0x0914cca,0x14c4f52,0x01893b9 } }, + /* 52 */ + { { 0x097eccc,0x1273936,0x00aa095,0x364fe62,0x04d49d1,0x10e9f08, + 0x3c24230,0x3ef01c8,0x2fb92bd,0x013ce4a }, + { 0x1e44fd9,0x27e3e9f,0x2156696,0x3915ecc,0x0b66cfb,0x1a3af0f, + 0x2fa8033,0x0e6736c,0x177ccdb,0x0228f9e } }, + /* 53 */ + { { 0x2c4b125,0x06207c1,0x0a8cdde,0x003db8f,0x1ae34e3,0x31e84fa, + 0x2999de5,0x11013bd,0x02370c2,0x00e2234 }, + { 0x0f91081,0x200d591,0x1504762,0x1857c05,0x23d9fcf,0x0cb34db, + 0x27edc86,0x08cd860,0x2471810,0x029798b } }, + /* 54 */ + { { 0x3acd6c8,0x097b8cb,0x3c661a8,0x15152f2,0x1699c63,0x237e64c, + 0x23edf79,0x16b7033,0x0e6466a,0x00b11da }, + { 0x0a64bc9,0x1bfe324,0x1f5cb34,0x08391de,0x0630a60,0x3017a21, + 0x09d064b,0x14a8365,0x041f9e6,0x01ed799 } }, + /* 55 */ + { { 0x128444a,0x2508b07,0x2a39216,0x362f84d,0x2e996c5,0x2c31ff3, + 0x07afe5f,0x1d1288e,0x3cb0c8d,0x02e2bdc }, + { 0x38b86fd,0x3a0ea8c,0x1cff5fd,0x1629629,0x3fee3f1,0x02b250c, + 0x2e8f6f2,0x0225727,0x15f7f3f,0x0280d8e } }, + /* 56 */ + { { 0x10f7770,0x0f1aee8,0x0e248c7,0x20684a8,0x3a6f16d,0x06f0ae7, + 0x0df6825,0x2d4cc40,0x301875f,0x012f8da }, + { 0x3b56dbb,0x1821ba7,0x24f8922,0x22c1f9e,0x0306fef,0x1b54bc8, + 0x2ccc056,0x00303ba,0x2871bdc,0x0232f26 } }, + /* 57 */ + { { 0x0dac4ab,0x0625730,0x3112e13,0x101c4bf,0x3a874a4,0x2873b95, + 0x32ae7c6,0x0d7e18c,0x13e0c08,0x01139d5 }, + { 0x334002d,0x00fffdd,0x025c6d5,0x22c2cd1,0x19d35cb,0x3a1ce2d, + 0x3702760,0x3f06257,0x03a5eb8,0x011c29a } }, + /* 58 */ + { { 0x0513482,0x1d87724,0x276a81b,0x0a807a4,0x3028720,0x339cc20, + 0x2441ee0,0x31bbf36,0x290c63d,0x0059041 }, + { 0x106a2ed,0x0d2819b,0x100bf50,0x114626c,0x1dd4d77,0x2e08632, + 0x14ae72a,0x2ed3f64,0x1fd7abc,0x035cd1e } }, + /* 59 */ + { { 0x2d4c6e5,0x3bec596,0x104d7ed,0x23d6c1b,0x0262cf0,0x15d72c5, + 0x2d5bb18,0x199ac4b,0x1e30771,0x020591a }, + { 0x21e291e,0x2e75e55,0x1661d7a,0x08b0778,0x3eb9daf,0x0d78144, + 0x1827eb1,0x0fe73d2,0x123f0dd,0x0028db7 } }, + /* 60 */ + { { 0x1d5533c,0x34cb1d0,0x228f098,0x27a1a11,0x17c5f5a,0x0d26f44, + 0x2228ade,0x2c460e6,0x3d6fdba,0x038cc77 }, + { 0x3cc6ed8,0x02ada1a,0x260e510,0x2f7bde8,0x37160c3,0x33a1435, + 0x23d9a7b,0x0ce2641,0x02a492e,0x034ed1e } }, + /* 61 */ + { { 0x3821f90,0x26dba3c,0x3aada14,0x3b59bad,0x292edd9,0x2804c45, + 0x3669531,0x296f42e,0x35a4c86,0x01ca049 }, + { 0x3ff47e5,0x2163df4,0x2441503,0x2f18405,0x15e1616,0x37f66ec, + 0x30f11a7,0x141658a,0x27ece14,0x00b018b } }, + /* 62 */ + { { 0x159ac2e,0x3e65bc0,0x2713a76,0x0db2f6c,0x3281e77,0x2391811, + 0x16d2880,0x1fbc4ab,0x1f92c4e,0x00a0a8d }, + { 0x0ce5cd2,0x152c7b0,0x02299c3,0x3244de7,0x2cf99ef,0x3a0b047, + 0x2caf383,0x0aaf664,0x113554d,0x031c735 } }, + /* 63 */ + { { 0x1b578f4,0x177a702,0x3a7a488,0x1638ebf,0x31884e2,0x2460bc7, + 0x36b1b75,0x3ce8e3d,0x340cf47,0x03143d9 }, + { 0x34b68ea,0x12b7ccd,0x1fe2a9c,0x08da659,0x0a406f3,0x1694c14, + 0x06a2228,0x16370be,0x3a72129,0x02e7b2c } }, + /* 64 */ + { { 0x0f8b16a,0x21043bd,0x266a56f,0x3fb11ec,0x197241a,0x36721f0, + 0x006b8e6,0x2ac6c29,0x202cd42,0x0200fcf }, + { 0x0dbec69,0x0c26a01,0x105f7f0,0x3dceeeb,0x3a83b85,0x363865f, + 0x097273a,0x2b70718,0x00e5067,0x03025d1 } }, + /* 65 */ + { { 0x379ab34,0x295bcb0,0x38d1846,0x22e1077,0x3a8ee06,0x1db1a3b, + 0x3144591,0x07cc080,0x2d5915f,0x03c6bcc }, + { 0x175bd50,0x0dd4c57,0x27bc99c,0x2ebdcbd,0x3837cff,0x235dc8f, + 0x13a4184,0x0722c18,0x130e2d4,0x008f43c } }, + /* 66 */ + { { 0x01500d9,0x2adbb7d,0x2da8857,0x397f2fa,0x10d890a,0x25c9654, + 0x3e86488,0x3eb754b,0x1d6c0a3,0x02c0a23 }, + { 0x10bcb08,0x083cc19,0x2e16853,0x04da575,0x271af63,0x2626a9d, + 0x3520a7b,0x32348c7,0x24ff408,0x03ff4dc } }, + /* 67 */ + { { 0x058e6cb,0x1a3992d,0x1d28539,0x080c5e9,0x2992dad,0x2a9d7d5, + 0x14ae0b7,0x09b7ce0,0x34ad78c,0x03d5643 }, + { 0x30ba55a,0x092f4f3,0x0bae0fc,0x12831de,0x20fc472,0x20ed9d2, + 0x29864f6,0x1288073,0x254f6f7,0x00635b6 } }, + /* 68 */ + { { 0x1be5a2b,0x0f88975,0x33c6ed9,0x20d64d3,0x06fe799,0x0989bff, + 0x1409262,0x085a90c,0x0d97990,0x0142eed }, + { 0x17ec63e,0x06471b9,0x0db2378,0x1006077,0x265422c,0x08db83d, + 0x28099b0,0x1270d06,0x11801fe,0x00ac400 } }, + /* 69 */ + { { 0x3391593,0x22d7166,0x30fcfc6,0x2896609,0x3c385f5,0x066b72e, + 0x04f3aad,0x2b831c5,0x19983fb,0x0375562 }, + { 0x0b82ff4,0x222e39d,0x34c993b,0x101c79c,0x2d2e03c,0x0f00c8a, + 0x3a9eaf4,0x1810669,0x151149d,0x039b931 } }, + /* 70 */ + { { 0x29af288,0x1956ec7,0x293155f,0x193deb6,0x1647e1a,0x2ca0839, + 0x297e4bc,0x15bfd0d,0x1b107ed,0x0147803 }, + { 0x31c327e,0x05a6e1d,0x02ad43d,0x02d2a5b,0x129cdb2,0x37ad1de, + 0x3d51f53,0x245df01,0x2414982,0x0388bd0 } }, + /* 71 */ + { { 0x35f1abb,0x17a3d18,0x0874cd4,0x2d5a14e,0x17edc0c,0x16a00d3, + 0x072c1fb,0x1232725,0x33d52dc,0x03dc24d }, + { 0x0af30d6,0x259aeea,0x369c401,0x12bc4de,0x295bf5f,0x0d8711f, + 0x26162a9,0x16c44e5,0x288e727,0x02f54b4 } }, + /* 72 */ + { { 0x05fa877,0x1571ea7,0x3d48ab1,0x1c9f4e8,0x017dad6,0x0f46276, + 0x343f9e7,0x1de990f,0x0e4c8aa,0x028343e }, + { 0x094f92d,0x3abf633,0x1b3a0bb,0x2f83137,0x0d818c8,0x20bae85, + 0x0c65f8b,0x1a8008b,0x0c7946d,0x0295b1e } }, + /* 73 */ + { { 0x1d09529,0x08e46c3,0x1fcf296,0x298f6b7,0x1803e0e,0x2d6fd20, + 0x37351f5,0x0d9e8b1,0x1f8731a,0x0362fbf }, + { 0x00157f4,0x06750bf,0x2650ab9,0x35ffb23,0x2f51cae,0x0b522c2, + 0x39cb400,0x191e337,0x0a5ce9f,0x021529a } }, + /* 74 */ + { { 0x3506ea5,0x17d9ed8,0x0d66dc3,0x22693f8,0x19286c4,0x3a57353, + 0x101d3bf,0x1aa54fc,0x20b9884,0x0172b3a }, + { 0x0eac44d,0x37d8327,0x1c3aa90,0x3d0d534,0x23db29a,0x3576eaf, + 0x1d3de8a,0x3bea423,0x11235e4,0x039260b } }, + /* 75 */ + { { 0x34cd55e,0x01288b0,0x1132231,0x2cc9a03,0x358695b,0x3e87650, + 0x345afa1,0x01267ec,0x3f616b2,0x02011ad }, + { 0x0e7d098,0x0d6078e,0x0b70b53,0x237d1bc,0x0d7f61e,0x132de31, + 0x1ea9ea4,0x2bd54c3,0x27b9082,0x03ac5f2 } }, + /* 76 */ + { { 0x2a145b9,0x06d661d,0x31ec175,0x03f06f1,0x3a5cf6b,0x249c56e, + 0x2035653,0x384c74f,0x0bafab5,0x0025ec0 }, + { 0x25f69e1,0x1b23a55,0x1199aa6,0x16ad6f9,0x077e8f7,0x293f661, + 0x33ba11d,0x3327980,0x07bafdb,0x03e571d } }, + /* 77 */ + { { 0x2bae45e,0x3c074ef,0x2955558,0x3c312f1,0x2a8ebe9,0x2f193f1, + 0x3705b1d,0x360deba,0x01e566e,0x00d4498 }, + { 0x21161cd,0x1bc787e,0x2f87933,0x3553197,0x1328ab8,0x093c879, + 0x17eee27,0x2adad1d,0x1236068,0x003be5c } }, + /* 78 */ + { { 0x0ca4226,0x2633dd5,0x2c8e025,0x0e3e190,0x05eede1,0x1a385e4, + 0x163f744,0x2f25522,0x1333b4f,0x03f05b6 }, + { 0x3c800ca,0x1becc79,0x2daabe9,0x0c499e2,0x1138063,0x3fcfa2d, + 0x2244976,0x1e85cf5,0x2f1b95d,0x0053292 } }, + /* 79 */ + { { 0x12f81d5,0x1dc6eaf,0x11967a4,0x1a407df,0x31a5f9d,0x2b67241, + 0x18bef7c,0x08c7762,0x063f59c,0x01015ec }, + { 0x1c05c0a,0x360bfa2,0x1f85bff,0x1bc7703,0x3e4911c,0x0d685b6, + 0x2fccaea,0x02c4cef,0x164f133,0x0070ed7 } }, + /* 80 */ + { { 0x0ec21fe,0x052ffa0,0x3e825fe,0x1ab0956,0x3f6ce11,0x3d29759, + 0x3c5a072,0x18ebe62,0x148db7e,0x03eb49c }, + { 0x1ab05b3,0x02dab0a,0x1ae690c,0x0f13894,0x137a9a8,0x0aab79f, + 0x3dc875c,0x06a1029,0x1e39f0e,0x01dce1f } }, + /* 81 */ + { { 0x16c0dd7,0x3b31269,0x2c741e9,0x3611821,0x2a5cffc,0x1416bb3, + 0x3a1408f,0x311fa3d,0x1c0bef0,0x02cdee1 }, + { 0x00e6a8f,0x1adb933,0x0f23359,0x2fdace2,0x2fd6d4b,0x0e73bd3, + 0x2453fac,0x0a356ae,0x2c8f9f6,0x02704d6 } }, + /* 82 */ + { { 0x0e35743,0x28c80a1,0x0def32a,0x2c6168f,0x1320d6a,0x37c6606, + 0x21b1761,0x2147ee0,0x21fc433,0x015c84d }, + { 0x1fc9168,0x36cda9c,0x003c1f0,0x1cd7971,0x15f98ba,0x1ef363d, + 0x0ca87e3,0x046f7d9,0x3c9e6bb,0x0372eb0 } }, + /* 83 */ + { { 0x118cbe2,0x3665a11,0x304ef01,0x062727a,0x3d242fc,0x11ffbaf, + 0x3663c7e,0x1a189c9,0x09e2d62,0x02e3072 }, + { 0x0e1d569,0x162f772,0x0cd051a,0x322df62,0x3563809,0x047cc7a, + 0x027fd9f,0x08b509b,0x3da2f94,0x01748ee } }, + /* 84 */ + { { 0x1c8f8be,0x31ca525,0x22bf0a1,0x200efcd,0x02961c4,0x3d8f52b, + 0x018403d,0x3a40279,0x1cb91ec,0x030427e }, + { 0x0945705,0x0257416,0x05c0c2d,0x25b77ae,0x3b9083d,0x2901126, + 0x292b8d7,0x07b8611,0x04f2eee,0x026f0cd } }, + /* 85 */ + { { 0x2913074,0x2b8d590,0x02b10d5,0x09d2295,0x255491b,0x0c41cca, + 0x1ca665b,0x133051a,0x1525f1a,0x00a5647 }, + { 0x04f983f,0x3d6daee,0x04e1e76,0x1067d7e,0x1be7eef,0x02ea862, + 0x00d4968,0x0ccb048,0x11f18ef,0x018dd95 } }, + /* 86 */ + { { 0x22976cc,0x17c5395,0x2c38bda,0x3983bc4,0x222bca3,0x332a614, + 0x3a30646,0x261eaef,0x1c808e2,0x02f6de7 }, + { 0x306a772,0x32d7272,0x2dcefd2,0x2abf94d,0x038f475,0x30ad76e, + 0x23e0227,0x3052b0a,0x001add3,0x023ba18 } }, + /* 87 */ + { { 0x0ade873,0x25a6069,0x248ccbe,0x13713ee,0x17ee9aa,0x28152e9, + 0x2e28995,0x2a92cb3,0x17a6f77,0x024b947 }, + { 0x190a34d,0x2ebea1c,0x1ed1948,0x16fdaf4,0x0d698f7,0x32bc451, + 0x0ee6e30,0x2aaab40,0x06f0a56,0x01460be } }, + /* 88 */ + { { 0x24cc99c,0x1884b1e,0x1ca1fba,0x1a0f9b6,0x2ff609b,0x2b26316, + 0x3b27cb5,0x29bc976,0x35d4073,0x024772a }, + { 0x3575a70,0x1b30f57,0x07fa01b,0x0e5be36,0x20cb361,0x26605cd, + 0x1d4e8c8,0x13cac59,0x2db9797,0x005e833 } }, + /* 89 */ + { { 0x36c8d3a,0x1878a81,0x124b388,0x0e4843e,0x1701aad,0x0ea0d76, + 0x10eae41,0x37d0653,0x36c7f4c,0x00ba338 }, + { 0x37a862b,0x1cf6ac0,0x08fa912,0x2dd8393,0x101ba9b,0x0eebcb7, + 0x2453883,0x1a3cfe5,0x2cb34f6,0x03d3331 } }, + /* 90 */ + { { 0x1f79687,0x3d4973c,0x281544e,0x2564bbe,0x17c5954,0x171e34a, + 0x231741a,0x3cf2784,0x0889a0d,0x02b036d }, + { 0x301747f,0x3f1c477,0x1f1386b,0x163bc5f,0x1592b93,0x332daed, + 0x080e4f5,0x1d28b96,0x26194c9,0x0256992 } }, + /* 91 */ + { { 0x15a4c93,0x07bf6b0,0x114172c,0x1ce0961,0x140269b,0x1b2c2eb, + 0x0dfb1c1,0x019ddaa,0x0ba2921,0x008c795 }, + { 0x2e6d2dc,0x37e45e2,0x2918a70,0x0fce444,0x34d6aa6,0x396dc88, + 0x27726b5,0x0c787d8,0x032d8a7,0x02ac2f8 } }, + /* 92 */ + { { 0x1131f2d,0x2b43a63,0x3101097,0x38cec13,0x0637f09,0x17a69d2, + 0x086196d,0x299e46b,0x0802cf6,0x03c6f32 }, + { 0x0daacb4,0x1a4503a,0x100925c,0x15583d9,0x23c4e40,0x1de4de9, + 0x1cc8fc4,0x2c9c564,0x0695aeb,0x02145a5 } }, + /* 93 */ + { { 0x1dcf593,0x17050fc,0x3e3bde3,0x0a6c062,0x178202b,0x2f7674f, + 0x0dadc29,0x15763a7,0x1d2daad,0x023d9f6 }, + { 0x081ea5f,0x045959d,0x190c841,0x3a78d31,0x0e7d2dd,0x1414fea, + 0x1d43f40,0x22d77ff,0x2b9c072,0x03e115c } }, + /* 94 */ + { { 0x3af71c9,0x29e9c65,0x25655e1,0x111e9cd,0x3a14494,0x3875418, + 0x34ae070,0x0b06686,0x310616b,0x03b7b89 }, + { 0x1734121,0x00d3d44,0x29f0b2f,0x1552897,0x31cac6e,0x1030bb3, + 0x0148f3a,0x35fd237,0x29b44eb,0x027f49f } }, + /* 95 */ + { { 0x2e2cb16,0x1d962bd,0x19b63cc,0x0b3f964,0x3e3eb7d,0x1a35560, + 0x0c58161,0x3ce1d6a,0x3b6958f,0x029030b }, + { 0x2dcc158,0x3b1583f,0x30568c9,0x31957c8,0x27ad804,0x28c1f84, + 0x3967049,0x37b3f64,0x3b87dc6,0x0266f26 } }, + /* 96 */ + { { 0x27dafc6,0x2548764,0x0d1984a,0x1a57027,0x252c1fb,0x24d9b77, + 0x1581a0f,0x1f99276,0x10ba16d,0x026af88 }, + { 0x0915220,0x2be1292,0x16c6480,0x1a93760,0x2fa7317,0x1a07296, + 0x1539871,0x112c31f,0x25787f3,0x01e2070 } }, + /* 97 */ + { { 0x0bcf3ff,0x266d478,0x34f6933,0x31449fd,0x00d02cb,0x340765a, + 0x3465a2d,0x225023e,0x319a30e,0x00579b8 }, + { 0x20e05f4,0x35b834f,0x0404646,0x3710d62,0x3fad7bd,0x13e1434, + 0x21c7d1c,0x1cb3af9,0x2cf1911,0x003957e } }, + /* 98 */ + { { 0x0787564,0x36601be,0x1ce67e9,0x084c7a1,0x21a3317,0x2067a35, + 0x0158cab,0x195ddac,0x1766fe9,0x035cf42 }, + { 0x2b7206e,0x20d0947,0x3b42424,0x03f1862,0x0a51929,0x38c2948, + 0x0bb8595,0x2942d77,0x3748f15,0x0249428 } }, + /* 99 */ + { { 0x2577410,0x3c23e2f,0x28c6caf,0x00d41de,0x0fd408a,0x30298e9, + 0x363289e,0x2302fc7,0x082c1cc,0x01dd050 }, + { 0x30991cd,0x103e9ba,0x029605a,0x19927f7,0x0c1ca08,0x0c93f50, + 0x28a3c7b,0x082e4e9,0x34d12eb,0x0232c13 } }, + /* 100 */ + { { 0x106171c,0x0b4155a,0x0c3fb1c,0x336c090,0x19073e9,0x2241a10, + 0x0e6b4fd,0x0ed476e,0x1ef4712,0x039390a }, + { 0x0ec36f4,0x3754f0e,0x2a270b8,0x007fd2d,0x0f9d2dc,0x1e6a692, + 0x066e078,0x1954974,0x2ff3c6e,0x00def28 } }, + /* 101 */ + { { 0x3562470,0x0b8f1f7,0x0ac94cd,0x28b0259,0x244f272,0x031e4ef, + 0x2d5df98,0x2c8a9f1,0x2dc3002,0x016644f }, + { 0x350592a,0x0e6a0d5,0x1e027a1,0x2039e0f,0x399e01d,0x2817593, + 0x0c0375e,0x3889b3e,0x24ab013,0x010de1b } }, + /* 102 */ + { { 0x256b5a6,0x0ac3b67,0x28f9ff3,0x29b67f1,0x30750d9,0x25e11a9, + 0x15e8455,0x279ebb0,0x298b7e7,0x0218e32 }, + { 0x2fc24b2,0x2b82582,0x28f22f5,0x2bd36b3,0x305398e,0x3b2e9e3, + 0x365dd0a,0x29bc0ed,0x36a7b3a,0x007b374 } }, + /* 103 */ + { { 0x05ff2f3,0x2b3589b,0x29785d3,0x300a1ce,0x0a2d516,0x0844355, + 0x14c9fad,0x3ccb6b6,0x385d459,0x0361743 }, + { 0x0b11da3,0x002e344,0x18c49f7,0x0c29e0c,0x1d2c22c,0x08237b3, + 0x2988f49,0x0f18955,0x1c3b4ed,0x02813c6 } }, + /* 104 */ + { { 0x17f93bd,0x249323b,0x11f6087,0x174e4bd,0x3cb64ac,0x086dc6b, + 0x2e330a8,0x142c1f2,0x2ea5c09,0x024acbb }, + { 0x1b6e235,0x3132521,0x00f085a,0x2a4a4db,0x1ab2ca4,0x0142224, + 0x3aa6b3e,0x09db203,0x2215834,0x007b9e0 } }, + /* 105 */ + { { 0x23e79f7,0x28b8039,0x1906a60,0x2cbce67,0x1f590e7,0x181f027, + 0x21054a6,0x3854240,0x2d857a6,0x03cfcb3 }, + { 0x10d9b55,0x1443cfc,0x2648200,0x2b36190,0x09d2fcf,0x22f439f, + 0x231aa7e,0x3884395,0x0543da3,0x003d5a9 } }, + /* 106 */ + { { 0x043e0df,0x06ffe84,0x3e6d5b2,0x3327001,0x26c74b6,0x12a145e, + 0x256ec0d,0x3898c69,0x3411969,0x02f63c5 }, + { 0x2b7494a,0x2eee1af,0x38388a9,0x1bd17ce,0x21567d4,0x13969e6, + 0x3a12a7a,0x3e8277d,0x03530cc,0x00b4687 } }, + /* 107 */ + { { 0x06508da,0x38e04d4,0x15a7192,0x312875e,0x3336180,0x2a6512c, + 0x1b59497,0x2e91b37,0x25eb91f,0x02841e9 }, + { 0x394d639,0x0747143,0x37d7e6d,0x1d62962,0x08b4af3,0x34df287, + 0x3c5584b,0x26bc869,0x20af87a,0x0060f5d } }, + /* 108 */ + { { 0x1de59a4,0x1a5c443,0x2f8729d,0x01c3a2f,0x0f1ad8d,0x3cbaf9e, + 0x1b49634,0x35d508a,0x39dc269,0x0075105 }, + { 0x390d30e,0x37033e0,0x110cb32,0x14c37a0,0x20a3b27,0x2f00ce6, + 0x2f1dc52,0x34988c6,0x0c29606,0x01dc7e7 } }, + /* 109 */ + { { 0x1040739,0x24f9de1,0x2939999,0x2e6009a,0x244539d,0x17e3f09, + 0x00f6f2f,0x1c63b3d,0x2310362,0x019109e }, + { 0x1428aa8,0x3cb61e1,0x09a84f4,0x0ffafed,0x07b7adc,0x08f406b, + 0x1b2c6df,0x035b480,0x3496ae9,0x012766d } }, + /* 110 */ + { { 0x35d1099,0x2362f10,0x1a08cc7,0x13a3a34,0x12adbcd,0x32da290, + 0x02e2a02,0x151140b,0x01b3f60,0x0240df6 }, + { 0x34c7b61,0x2eb09c1,0x172e7cd,0x2ad5eff,0x2fe2031,0x25b54d4, + 0x0cec965,0x18e7187,0x26a7cc0,0x00230f7 } }, + /* 111 */ + { { 0x2d552ab,0x374083d,0x01f120f,0x2601736,0x156baff,0x04d44a4, + 0x3b7c3e9,0x1acbc1b,0x0424579,0x031a425 }, + { 0x1231bd1,0x0eba710,0x020517b,0x21d7316,0x21eac6e,0x275a848, + 0x0837abf,0x0eb0082,0x302cafe,0x00fe8f6 } }, + /* 112 */ + { { 0x1058880,0x28f9941,0x03f2d75,0x3bd90e5,0x17da365,0x2ac9249, + 0x07861cf,0x023fd05,0x1b0fdb8,0x031712f }, + { 0x272b56b,0x04f8d2c,0x043a735,0x25446e4,0x1c8327e,0x221125a, + 0x0ce37df,0x2dad7f6,0x39446c2,0x00b55b6 } }, + /* 113 */ + { { 0x346ac6b,0x05e0bff,0x2425246,0x0981e8b,0x1d19f79,0x2692378, + 0x3ea3c40,0x2e90beb,0x19de503,0x003d5af }, + { 0x05cda49,0x353b44d,0x299d137,0x3f205bc,0x2821158,0x3ad0d00, + 0x06a54aa,0x2d7c79f,0x39d1173,0x01000ee } }, + /* 114 */ + { { 0x0803387,0x3a06268,0x14043b8,0x3d4e72f,0x1ece115,0x0a1dfc8, + 0x17208dd,0x0be790a,0x122a07f,0x014dd95 }, + { 0x0a4182d,0x202886a,0x1f79a49,0x1e8c867,0x0a2bbd0,0x28668b5, + 0x0d0a2e1,0x115259d,0x3586c5d,0x01e815b } }, + /* 115 */ + { { 0x18a2a47,0x2c95627,0x2773646,0x1230f7c,0x15b5829,0x2fc354e, + 0x2c000ea,0x099d547,0x2f17a1a,0x01df520 }, + { 0x3853948,0x06f6561,0x3feeb8a,0x2f5b3ef,0x3a6f817,0x01a0791, + 0x2ec0578,0x2c392ad,0x12b2b38,0x0104540 } }, + /* 116 */ + { { 0x1e28ced,0x0fc3d1b,0x2c473c7,0x1826c4f,0x21d5da7,0x39718e4, + 0x38ce9e6,0x0251986,0x172fbea,0x0337c11 }, + { 0x053c3b0,0x0f162db,0x043c1cb,0x04111ee,0x297fe3c,0x32e5e03, + 0x2b8ae12,0x0c427ec,0x1da9738,0x03b9c0f } }, + /* 117 */ + { { 0x357e43a,0x054503f,0x11b8345,0x34ec6e0,0x2d44660,0x3d0ae61, + 0x3b5dff8,0x33884ac,0x09da162,0x00a82b6 }, + { 0x3c277ba,0x129a51a,0x027664e,0x1530507,0x0c788c9,0x2afd89d, + 0x1aa64cc,0x1196450,0x367ac2b,0x0358b42 } }, + /* 118 */ + { { 0x0054ac4,0x1761ecb,0x378839c,0x167c9f7,0x2570058,0x0604a35, + 0x37cbf3b,0x0909bb7,0x3f2991c,0x02ce688 }, + { 0x0b16ae5,0x212857c,0x351b952,0x2c684db,0x30c6a05,0x09c01e0, + 0x23c137f,0x1331475,0x092c067,0x0013b40 } }, + /* 119 */ + { { 0x2e90393,0x0617466,0x24e61f4,0x0a528f5,0x03047b4,0x2153f05, + 0x0001a69,0x30e1eb8,0x3c10177,0x0282a47 }, + { 0x22c831e,0x28fc06b,0x3e16ff0,0x208adc9,0x0bb76ae,0x28c1d6d, + 0x12c8a15,0x031063c,0x1889ed2,0x002133e } }, + /* 120 */ + { { 0x0a6becf,0x14277bf,0x3328d98,0x201f7fe,0x12fceae,0x1de3a2e, + 0x0a15c44,0x3ddf976,0x1b273ab,0x0355e55 }, + { 0x1b5d4f1,0x369e78c,0x3a1c210,0x12cf3e9,0x3aa52f0,0x309f082, + 0x112089d,0x107c753,0x24202d1,0x023853a } }, + /* 121 */ + { { 0x2897042,0x140d17c,0x2c4aeed,0x07d0d00,0x18d0533,0x22f7ec8, + 0x19c194c,0x3456323,0x2372aa4,0x0165f86 }, + { 0x30bd68c,0x1fb06b3,0x0945032,0x372ac09,0x06d4be0,0x27f8fa1, + 0x1c8d7ac,0x137a96e,0x236199b,0x0328fc0 } }, + /* 122 */ + { { 0x170bd20,0x2842d58,0x1de7592,0x3c5b4fd,0x20ea897,0x12cab78, + 0x363ff14,0x01f928c,0x17e309c,0x02f79ff }, + { 0x0f5432c,0x2edb4ae,0x044b516,0x32f810d,0x2210dc1,0x23e56d6, + 0x301e6ff,0x34660f6,0x10e0a7d,0x02d88eb } }, + /* 123 */ + { { 0x0c7b65b,0x2f59d58,0x2289a75,0x2408e92,0x1ab8c55,0x1ec99e5, + 0x220fd0d,0x04defe0,0x24658ec,0x035aa8b }, + { 0x138bb85,0x2f002d4,0x295c10a,0x08760ce,0x28c31d1,0x1c0a8cb, + 0x0ff00b1,0x144eac9,0x2e02dcc,0x0044598 } }, + /* 124 */ + { { 0x3b42b87,0x050057b,0x0dff781,0x1c06db1,0x1bd9f5d,0x1f5f04a, + 0x2cccd7a,0x143e19b,0x1cb94b7,0x036cfb8 }, + { 0x34837cf,0x3cf6c3c,0x0d4fb26,0x22ee55e,0x1e7eed1,0x315995f, + 0x2cdf937,0x1a96574,0x0425220,0x0221a99 } }, + /* 125 */ + { { 0x1b569ea,0x0d33ed9,0x19c13c2,0x107dc84,0x2200111,0x0569867, + 0x2dc85da,0x05ef22e,0x0eb018a,0x029c33d }, + { 0x04a6a65,0x3e5eba3,0x378f224,0x09c04d0,0x036e5cf,0x3df8258, + 0x3a609e4,0x1eddef8,0x2abd174,0x02a91dc } }, + /* 126 */ + { { 0x2a60cc0,0x1d84c5e,0x115f676,0x1840da0,0x2c79163,0x2f06ed6, + 0x198bb4b,0x3e5d37b,0x1dc30fa,0x018469b }, + { 0x15ee47a,0x1e32f30,0x16a530e,0x2093836,0x02e8962,0x3767b62, + 0x335adf3,0x27220db,0x2f81642,0x0173ffe } }, + /* 127 */ + { { 0x37a99cd,0x1533fe6,0x05a1c0d,0x27610f1,0x17bf3b9,0x0b1ce78, + 0x0a908f6,0x265300e,0x3237dc1,0x01b969a }, + { 0x3a5db77,0x2d15382,0x0d63ef8,0x1feb3d8,0x0b7b880,0x19820de, + 0x11c0c67,0x2af3396,0x38d242d,0x0120688 } }, + /* 128 */ + { { 0x1d0b34a,0x05ef00d,0x00a7e34,0x1ae0c9f,0x1440b38,0x300d8b4, + 0x37262da,0x3e50e3e,0x14ce0cd,0x00b1044 }, + { 0x195a0b1,0x173bc6b,0x03622ba,0x2a19f55,0x1c09b37,0x07921b2, + 0x16cdd20,0x24a5c9b,0x2bf42ff,0x00811de } }, + /* 129 */ + { { 0x0d65dbf,0x145cf06,0x1ad82f7,0x038ce7b,0x077bf94,0x33c4007, + 0x22d26bd,0x25ad9c0,0x09ac773,0x02b1990 }, + { 0x2261cc3,0x2ecdbf1,0x3e908b0,0x3246439,0x0213f7b,0x1179b04, + 0x01cebaa,0x0be1595,0x175cc12,0x033a39a } }, + /* 130 */ + { { 0x00a67d2,0x086d06f,0x248a0f1,0x0291134,0x362d476,0x166d1cd, + 0x044f1d6,0x2d2a038,0x365250b,0x0023f78 }, + { 0x08bf287,0x3b0f6a1,0x1d6eace,0x20b4cda,0x2c2a621,0x0912520, + 0x02dfdc9,0x1b35cd6,0x3d2565d,0x00bdf8b } }, + /* 131 */ + { { 0x3770fa7,0x2e4b6f0,0x03f9ae4,0x170de41,0x1095e8d,0x1dd845c, + 0x334e9d1,0x00ab953,0x12e9077,0x03196fa }, + { 0x2fd0a40,0x228c0fd,0x384b275,0x38ef339,0x3e7d822,0x3e5d9ef, + 0x24f5854,0x0ece9eb,0x247d119,0x012ffe3 } }, + /* 132 */ + { { 0x0ff1480,0x07487c0,0x1b16cd4,0x1f41d53,0x22ab8fb,0x2f83cfa, + 0x01d2efb,0x259f6b2,0x2e65772,0x00f9392 }, + { 0x05303e6,0x23cdb4f,0x23977e1,0x12e4898,0x03bd999,0x0c930f0, + 0x170e261,0x180a27b,0x2fd58ec,0x014e22b } }, + /* 133 */ + { { 0x25d7713,0x0c5fad7,0x09daad1,0x3b9d779,0x109b985,0x1d3ec98, + 0x35bc4fc,0x2f838cb,0x0d14f75,0x0173e42 }, + { 0x2657b12,0x10d4423,0x19e6760,0x296e5bb,0x2bfd421,0x25c3330, + 0x29f51f8,0x0338838,0x24060f0,0x029a62e } }, + /* 134 */ + { { 0x3748fec,0x2c5a1bb,0x2cf973d,0x289fa74,0x3e6e755,0x38997bf, + 0x0b6544c,0x2b6358c,0x38a7aeb,0x02c50bb }, + { 0x3d5770a,0x06be7c5,0x012fad3,0x19cb2cd,0x266af3b,0x3ccd677, + 0x160d1bd,0x141d5af,0x2965851,0x034625a } }, + /* 135 */ + { { 0x3c41c08,0x255eacc,0x22e1ec5,0x2b151a3,0x087de94,0x311cbdb, + 0x016b73a,0x368e462,0x20b7981,0x0099ec3 }, + { 0x262b988,0x1539763,0x21e76e5,0x15445b4,0x1d8ddc7,0x34a9be6, + 0x10faf03,0x24e4d18,0x07aa111,0x02d538a } }, + /* 136 */ + { { 0x38a876b,0x048ad45,0x04b40a0,0x3fc2144,0x251ff96,0x13ca7dd, + 0x0b31ab1,0x3539814,0x28b5f87,0x0212aec }, + { 0x270790a,0x350e7e0,0x346bd5e,0x276178f,0x22d6cb5,0x3078884, + 0x355c1b6,0x15901d7,0x3671765,0x03950db } }, + /* 137 */ + { { 0x286e8d5,0x2409788,0x13be53f,0x2d21911,0x0353c95,0x10238e8, + 0x32f5bde,0x3a67b60,0x28b5b9c,0x001013d }, + { 0x381e8e5,0x0cef7a9,0x2f5bcad,0x06058f0,0x33cdf50,0x04672a8, + 0x1769600,0x31c055d,0x3df0ac1,0x00e9098 } }, + /* 138 */ + { { 0x2eb596d,0x197b326,0x12b4c29,0x39c08f2,0x101ea03,0x3804e58, + 0x04b4b62,0x28d9d1c,0x13f905e,0x0032a3f }, + { 0x11b2b61,0x08e9095,0x0d06925,0x270e43f,0x21eb7a8,0x0e4a98f, + 0x31d2be0,0x030cf9f,0x2644ddb,0x025b728 } }, + /* 139 */ + { { 0x07510af,0x2ed0e8e,0x2a01203,0x2a2a68d,0x0846fea,0x3e540de, + 0x3a57702,0x1677348,0x2123aad,0x010d8f8 }, + { 0x0246a47,0x0e871d0,0x124dca4,0x34b9577,0x2b362b8,0x363ebe5, + 0x3086045,0x26313e6,0x15cd8bb,0x0210384 } }, + /* 140 */ + { { 0x023e8a7,0x0817884,0x3a0bf12,0x3376371,0x3c808a8,0x18e9777, + 0x12a2721,0x35b538a,0x2bd30de,0x017835a }, + { 0x0fc0f64,0x1c8709f,0x2d8807a,0x0743957,0x242eec0,0x347e76c, + 0x27bef91,0x289689a,0x0f42945,0x01f7a92 } }, + /* 141 */ + { { 0x1060a81,0x3dbc739,0x1615abd,0x1cbe3e5,0x3e79f9c,0x1ab09a2, + 0x136c540,0x05b473f,0x2beebfd,0x02af0a8 }, + { 0x3e2eac7,0x19be474,0x04668ac,0x18f4b74,0x36f10ba,0x0a0b4c6, + 0x10e3770,0x3bf059e,0x3946c7e,0x013a8d4 } }, + /* 142 */ + { { 0x266309d,0x28be354,0x1a3eed8,0x3020651,0x10a51c6,0x1e31770, + 0x0af45a5,0x3ff0f3b,0x2891c94,0x00e9db9 }, + { 0x17b0d0f,0x33a291f,0x0a5f9aa,0x25a3d61,0x2963ace,0x39a5fef, + 0x230c724,0x1919146,0x10a465e,0x02084a8 } }, + /* 143 */ + { { 0x3ab8caa,0x31870f3,0x2390ef7,0x2103850,0x218eb8e,0x3a5ccf2, + 0x1dff677,0x2c59334,0x371599c,0x02a9f2a }, + { 0x0837bd1,0x3249cef,0x35d702f,0x3430dab,0x1c06407,0x108f692, + 0x221292f,0x05f0c5d,0x073fe06,0x01038e0 } }, + /* 144 */ + { { 0x3bf9b7c,0x2020929,0x30d0f4f,0x080fef8,0x3365d23,0x1f3e738, + 0x3e53209,0x1549afe,0x300b305,0x038d811 }, + { 0x0c6c2c7,0x2e6445b,0x3ee64dc,0x022e932,0x0726837,0x0deb67b, + 0x1ed4346,0x3857f73,0x277a3de,0x01950b5 } }, + /* 145 */ + { { 0x36c377a,0x0adb41e,0x08be3f3,0x11e40d1,0x36cb038,0x036a2bd, + 0x3dd3a82,0x1bc875b,0x2ee09bb,0x02994d2 }, + { 0x035facf,0x05e0344,0x07e630a,0x0ce772d,0x335e55a,0x111fce4, + 0x250fe1c,0x3bc89ba,0x32fdc9a,0x03cf2d9 } }, + /* 146 */ + { { 0x355fd83,0x1c67f8e,0x1d10eb3,0x1b21d77,0x0e0d7a4,0x173a9e1, + 0x2c9fa90,0x1c39cce,0x22eaae8,0x01f2bea }, + { 0x153b338,0x0534107,0x26c69b8,0x283be1f,0x3e0acc0,0x059cac3, + 0x13d1081,0x148bbee,0x3c1b9bd,0x002aac4 } }, + /* 147 */ + { { 0x2681297,0x3389e34,0x146addc,0x2c6d425,0x2cb350e,0x1986abc, + 0x0431737,0x04ba4b7,0x2028470,0x012e469 }, + { 0x2f8ddcf,0x3c4255c,0x1af4dcf,0x07a6a44,0x208ebf6,0x0dc90c3, + 0x34360ac,0x072ad23,0x0537232,0x01254d3 } }, + /* 148 */ + { { 0x07b7e9d,0x3df5c7c,0x116f83d,0x28c4f35,0x3a478ef,0x3011fb8, + 0x2f264b6,0x317b9e3,0x04fd65a,0x032bd1b }, + { 0x2aa8266,0x3431de4,0x04bba04,0x19a44da,0x0edf454,0x392c5ac, + 0x265168a,0x1dc3d5b,0x25704c6,0x00533a7 } }, + /* 149 */ + { { 0x25e8f91,0x1178fa5,0x2492994,0x2eb2c3c,0x0d3aca1,0x0322828, + 0x1cc70f9,0x269c74c,0x0a53e4c,0x006edc2 }, + { 0x18bdd7a,0x2a79a55,0x26b1d5c,0x0200628,0x0734a05,0x3273c7b, + 0x13aa714,0x0040ac2,0x2f2da30,0x03e7449 } }, + /* 150 */ + { { 0x3f9563e,0x2f29eab,0x14a0749,0x3fad264,0x1dd077a,0x3d7c59c, + 0x3a0311b,0x331a789,0x0b9729e,0x0201ebf }, + { 0x1b08b77,0x2a4cdf2,0x3e387f8,0x21510f1,0x286c3a7,0x1dbf62e, + 0x3afa594,0x3363217,0x0d16568,0x01d46b7 } }, + /* 151 */ + { { 0x0715c0d,0x28e2d04,0x17f78ae,0x1c63dda,0x1d113ea,0x0fefc1b, + 0x1eab149,0x1d0fd99,0x0682537,0x00a7b11 }, + { 0x10bebbc,0x11c672d,0x14223d9,0x2ff9141,0x1399ee5,0x34b7b6c, + 0x0d5b3a8,0x01df643,0x0e392a4,0x03fe4dc } }, + /* 152 */ + { { 0x2b75b65,0x0b5a6f1,0x11c559a,0x3549999,0x24188f8,0x37a75f4, + 0x29f33e3,0x34068a2,0x38ba2a9,0x025dd91 }, + { 0x29af2c7,0x0988b64,0x0923885,0x1b539a4,0x1334f5d,0x226947a, + 0x2cc7e5a,0x20beb39,0x13fac2f,0x01d298c } }, + /* 153 */ + { { 0x35f079c,0x137f76d,0x2fbbb2f,0x254638d,0x185b07c,0x1f34db7, + 0x2cfcf0e,0x218f46d,0x2150ff4,0x02add6f }, + { 0x33fc9b7,0x0d9f005,0x0fd081b,0x0834965,0x2b90a74,0x102448d, + 0x3dbf03c,0x167d857,0x02e0b44,0x013afab } }, + /* 154 */ + { { 0x09f2c53,0x317f9d7,0x1411eb6,0x0463aba,0x0d25220,0x256b176, + 0x087633f,0x2bff322,0x07b2c1b,0x037e662 }, + { 0x10aaecb,0x23bb4a1,0x2272bb7,0x06c075a,0x09d4918,0x0736f2b, + 0x0dd511b,0x101625e,0x0a7779f,0x009ec10 } }, + /* 155 */ + { { 0x33b2eb2,0x0176dfd,0x2118904,0x022386c,0x2e0df85,0x2588c9f, + 0x1b71525,0x28fd540,0x137e4cf,0x02ce4f7 }, + { 0x3d75165,0x0c39ecf,0x3554a12,0x30af34c,0x2d66344,0x3ded408, + 0x36f1be0,0x0d065b0,0x012d046,0x0025623 } }, + /* 156 */ + { { 0x2601c3b,0x1824fc0,0x335fe08,0x3e33d70,0x0fb0252,0x252bfca, + 0x1cf2808,0x1922e55,0x1a9db9f,0x020721e }, + { 0x2f56c51,0x39a1f31,0x218c040,0x1a4fc5d,0x3fed471,0x0164d4e, + 0x388a419,0x06f1113,0x0f55fc1,0x03e8352 } }, + /* 157 */ + { { 0x1608e4d,0x3872778,0x022cbc6,0x044d60a,0x3010dda,0x15fb0b5, + 0x37ddc11,0x19f5bda,0x156b6a3,0x023a838 }, + { 0x383b3b4,0x1380bc8,0x353ca35,0x250fc07,0x169966b,0x3780f29, + 0x36632b2,0x2d6b13f,0x124fa00,0x00fd6ae } }, + /* 158 */ + { { 0x1739efb,0x2ec3656,0x2c0d337,0x3d39faf,0x1c751b0,0x04699f4, + 0x252dd64,0x095b8b6,0x0872b74,0x022f1da }, + { 0x2d3d253,0x38edca0,0x379fa5b,0x287d635,0x3a9f679,0x059d9ee, + 0x0ac168e,0x3cd3e87,0x19060fc,0x02ce1bc } }, + /* 159 */ + { { 0x3edcfc2,0x0f04d4b,0x2f0d31f,0x1898be2,0x25396bf,0x15ca230, + 0x02b4eae,0x2713668,0x0f71b06,0x0132d18 }, + { 0x38095ea,0x1ed34d6,0x3603ae6,0x165bf01,0x192bbf8,0x1852859, + 0x075f66b,0x1488f85,0x10895ef,0x014b035 } }, + /* 160 */ + { { 0x1339848,0x3084385,0x0c8d231,0x3a1c1de,0x0e87a28,0x255b85c, + 0x1de6616,0x2702e74,0x1382bb0,0x012b0f2 }, + { 0x198987d,0x381545a,0x34d619b,0x312b827,0x18b2376,0x28fe4cf, + 0x20b7651,0x017d077,0x0c7e397,0x00e0365 } }, + /* 161 */ + { { 0x1542e75,0x0d56aa0,0x39b701a,0x287b806,0x396c724,0x0935c21, + 0x3a29776,0x0debdac,0x171de26,0x00b38f8 }, + { 0x1d5bc1a,0x3fad27d,0x22b5cfe,0x1f89ddf,0x0a65560,0x144dd5b, + 0x2aac2f9,0x139353f,0x0520b62,0x00b9b36 } }, + /* 162 */ + { { 0x031c31d,0x16552e3,0x1a0c368,0x0016fc8,0x168533d,0x171e7b2, + 0x17626e7,0x275502f,0x14742c6,0x03285dd }, + { 0x2d2dbb2,0x3b6bffd,0x1d18cc6,0x2f45d2a,0x0fd0d8c,0x2915e3a, + 0x1e8793a,0x0b39a1d,0x3139cab,0x02a5da9 } }, + /* 163 */ + { { 0x3fb353d,0x147c6e4,0x3a720a6,0x22d5ff3,0x1d75cab,0x06c54a0, + 0x08cfa73,0x12666aa,0x3170a1f,0x021c829 }, + { 0x13e1b90,0x3a34dda,0x1fc38c3,0x02c5bdb,0x2d345dc,0x14aa1d0, + 0x28d00ab,0x224f23a,0x329c769,0x025c67b } }, + /* 164 */ + { { 0x0e35909,0x3bb6356,0x0116820,0x370cf77,0x29366d8,0x3881409, + 0x3999d06,0x013075f,0x176e157,0x02941ca }, + { 0x0e70b2e,0x28dfab1,0x2a8a002,0x15da242,0x084dcf6,0x116ca97, + 0x31bf186,0x1dc9735,0x09df7b7,0x0264e27 } }, + /* 165 */ + { { 0x2da7a4b,0x3023c9e,0x1366238,0x00ff4e2,0x03abe9d,0x19bd44b, + 0x272e897,0x20b91ad,0x2aa202c,0x02a2201 }, + { 0x380184e,0x08112b4,0x0b85660,0x31049aa,0x3a8cb78,0x36113c5, + 0x1670c0a,0x373f9e7,0x3fb4738,0x00010ef } }, + /* 166 */ + { { 0x2d5192e,0x26d770d,0x32af8d5,0x34d1642,0x1acf885,0x05805e0, + 0x166d0a1,0x1219a0d,0x301ba6c,0x014bcfb }, + { 0x2dcb64d,0x19cca83,0x379f398,0x08e01a0,0x10a482c,0x0103cc2, + 0x0be5fa7,0x1f9d45b,0x1899ef2,0x00ca5af } }, + /* 167 */ + { { 0x14d81d7,0x2aea251,0x1b3c476,0x3bd47ae,0x29eade7,0x0715e61, + 0x1a21cd8,0x1c7a586,0x2bfaee5,0x00ee43f }, + { 0x096f7cb,0x0c08f95,0x1bc4939,0x361fed4,0x255be41,0x26fad73, + 0x31dd489,0x02c600f,0x29d9f81,0x01ba201 } }, + /* 168 */ + { { 0x03ea1db,0x1eac46d,0x1292ce3,0x2a54967,0x20a7ff1,0x3e13c61, + 0x1b02218,0x2b44e14,0x3eadefa,0x029c88a }, + { 0x30a9144,0x31e3b0a,0x19c5a2a,0x147cbe9,0x05a0240,0x051f38e, + 0x11eca56,0x31a4247,0x123bc2a,0x02fa535 } }, + /* 169 */ + { { 0x3226ce7,0x1251782,0x0b7072f,0x11e59fa,0x2b8afd7,0x169b18f, + 0x2a46f18,0x31d9bb7,0x2fe9be8,0x01de0b7 }, + { 0x1b38626,0x34aa90f,0x3ad1760,0x21ddbd9,0x3460ae7,0x1126736, + 0x1b86fc5,0x0b92cd0,0x167a289,0x000e0e1 } }, + /* 170 */ + { { 0x1ec1a0f,0x36bbf5e,0x1c972d8,0x3f73ace,0x13bbcd6,0x23d86a5, + 0x175ffc5,0x2d083d5,0x2c4adf7,0x036f661 }, + { 0x1f39eb7,0x2a20505,0x176c81a,0x3d6e636,0x16ee2fc,0x3cbdc5f, + 0x25475dc,0x2ef4151,0x3c46860,0x0238934 } }, + /* 171 */ + { { 0x2587390,0x3639526,0x0588749,0x13c32fb,0x212bb19,0x09660f1, + 0x207da4b,0x2bf211b,0x1c4407b,0x01506a6 }, + { 0x24c8842,0x105a498,0x05ffdb2,0x0ab61b0,0x26044c1,0x3dff3d8, + 0x1d14b44,0x0d74716,0x049f57d,0x030024b } }, + /* 172 */ + { { 0x32e61ef,0x31d70f7,0x35cad3c,0x320b86c,0x07e8841,0x027ca7d, + 0x2d30d19,0x2513718,0x2347286,0x01d7901 }, + { 0x3c237d0,0x107f16e,0x01c9e7d,0x3c3b13c,0x0c9537b,0x20af54d, + 0x051a162,0x2161a47,0x258c784,0x016df2d } }, + /* 173 */ + { { 0x228ead1,0x29c2122,0x07f6964,0x023f4ed,0x1802dc5,0x19f96ce, + 0x24bfd17,0x25e866b,0x2ba8df0,0x01eb84f }, + { 0x2dd384e,0x05bbe3a,0x3f06fd2,0x366dacb,0x30361a2,0x2f36d7c, + 0x0b98784,0x38ff481,0x074e2a8,0x01e1f60 } }, + /* 174 */ + { { 0x17fbb1c,0x0975add,0x1debc5e,0x2cb2880,0x3e47bdd,0x3488cff, + 0x15e9a36,0x2121129,0x0199ef2,0x017088a }, + { 0x0315250,0x352a162,0x17c1773,0x0ae09c2,0x321b21a,0x3bd74cf, + 0x3c4ea1d,0x3cac2ad,0x3abbaf0,0x039174d } }, + /* 175 */ + { { 0x0511c8a,0x3c78d0a,0x2cd3d2d,0x322f729,0x3ebb229,0x09f0e69, + 0x0a71a76,0x2e74d5e,0x12284df,0x03b5ef0 }, + { 0x3dea561,0x0a9b7e4,0x0ed1cf2,0x237523c,0x05443f1,0x2eb48fa, + 0x3861405,0x1b49f62,0x0c945ca,0x02ab25f } }, + /* 176 */ + { { 0x16bd00a,0x13a9d28,0x3cc1eb5,0x2b7d702,0x2d839e9,0x3e6ff01, + 0x2bb7f11,0x3713824,0x3b31163,0x00c63e5 }, + { 0x30d7138,0x0316fb0,0x0220ecc,0x08eaf0c,0x244e8df,0x0088d81, + 0x37972fb,0x3fd34ae,0x2a19a84,0x03e907e } }, + /* 177 */ + { { 0x2642269,0x0b65d29,0x03bd440,0x33a6ede,0x3c81814,0x2507982, + 0x0d38e47,0x3a788e6,0x32c1d26,0x00e2eda }, + { 0x2577f87,0x392895a,0x3e1cc64,0x14f7047,0x08b52d2,0x08a01ca, + 0x336abf6,0x00697fc,0x105ce76,0x0253742 } }, + /* 178 */ + { { 0x293f92a,0x33df737,0x3315156,0x32e26d7,0x0a01333,0x26579d4, + 0x004df9c,0x0aba409,0x067d25c,0x02481de }, + { 0x3f39d44,0x1c78042,0x13d7e24,0x0825aed,0x35f2c90,0x3270f63, + 0x04b7b35,0x3ad4531,0x28bd29b,0x0207a10 } }, + /* 179 */ + { { 0x077199f,0x270aeb1,0x0dd96dd,0x3b9ad7b,0x28cb8ee,0x3903f43, + 0x37db3fe,0x292c62b,0x362dbbf,0x006e52a }, + { 0x247f143,0x0362cf3,0x216344f,0x3f18fd1,0x351e623,0x31664e0, + 0x0f270fc,0x243bbc6,0x2280555,0x001a8e3 } }, + /* 180 */ + { { 0x3355b49,0x2c04e6c,0x399b2e5,0x182d3af,0x020e265,0x09a7cf7, + 0x0ffa6bd,0x353e302,0x02083d9,0x029ecdb }, + { 0x33e8830,0x0570e86,0x1c0b64d,0x386a27e,0x0d5fcea,0x0b45a4c, + 0x2ee4a2e,0x0a8833f,0x2b4a282,0x02f9531 } }, + /* 181 */ + { { 0x191167c,0x36cf7e3,0x225ed6c,0x1e79e99,0x0517c3f,0x11ab1fd, + 0x05648f3,0x08aedc4,0x1abeae0,0x02fcc29 }, + { 0x3828a68,0x1e16fa4,0x30368e7,0x0c9fcfb,0x25161c3,0x24851ac, + 0x1b5feb5,0x344eb84,0x0de2732,0x0347208 } }, + /* 182 */ + { { 0x038b363,0x384d1e4,0x2519043,0x151ac17,0x158c11f,0x009b2b4, + 0x257abe6,0x2368d3f,0x3ed68a1,0x02df45e }, + { 0x29c2559,0x2962478,0x3d8444c,0x1d96fff,0x04f7a03,0x1391a52, + 0x0de4af7,0x3319126,0x15e6412,0x00e65ff } }, + /* 183 */ + { { 0x3d61507,0x1d1a0a2,0x0d2af20,0x354d299,0x329e132,0x2a28578, + 0x2ddfb08,0x04fa3ff,0x1293c6c,0x003bae2 }, + { 0x3e259f8,0x1a68fa9,0x3e67e9b,0x39b44f9,0x1ce1db7,0x347e9a1, + 0x3318f6a,0x2dbbc9d,0x2f8c922,0x008a245 } }, + /* 184 */ + { { 0x212ab5b,0x2b896c2,0x0136959,0x07e55ef,0x0cc1117,0x05b8ac3, + 0x18429ed,0x025fa01,0x11d6e93,0x03b016b }, + { 0x03f3708,0x2e96fab,0x1d77157,0x0d4c2d6,0x131baf9,0x0608d39, + 0x3552371,0x06cdd1e,0x1567ff1,0x01f4c50 } }, + /* 185 */ + { { 0x2dfefab,0x270173d,0x37077bd,0x1a372cd,0x1be2f22,0x28e2ee5, + 0x3ead973,0x35e8f94,0x2fc9bc1,0x03a7399 }, + { 0x36a02a1,0x2855d9b,0x00ed75a,0x37d8398,0x138c087,0x233706e, + 0x147f346,0x01947e2,0x3017228,0x0365942 } }, + /* 186 */ + { { 0x2057e60,0x2d31296,0x25e4504,0x2fa37bc,0x1cbccc3,0x1f0732f, + 0x3532081,0x2de8a98,0x19a804e,0x005359a }, + { 0x31f411a,0x2a10576,0x369c2c8,0x02fe035,0x109fbaf,0x30bddeb, + 0x1eef901,0x1662ad3,0x0410d43,0x01bd31a } }, + /* 187 */ + { { 0x2c24a96,0x1b7d3a5,0x19a3872,0x217f2f6,0x2534dbc,0x2cab8c2, + 0x066ef28,0x26aecf1,0x0fd6118,0x01310d4 }, + { 0x055b8da,0x1fdc5be,0x38a1296,0x25118f0,0x341a423,0x2ba4cd0, + 0x3e1413e,0x062d70d,0x2425a31,0x029c9b4 } }, + /* 188 */ + { { 0x08c1086,0x1acfba5,0x22e1dae,0x0f72f4e,0x3f1de50,0x0f408bc, + 0x35ed3f0,0x3ce48fc,0x282cc6c,0x004d8e7 }, + { 0x1afaa86,0x24e3ef3,0x22589ac,0x3ec9952,0x1f45bc5,0x14144ca, + 0x23b26e4,0x0d68c65,0x1e1c1a3,0x032a4d9 } }, + /* 189 */ + { { 0x03b2d20,0x16b1d53,0x241b361,0x05e4138,0x1742a54,0x32741c7, + 0x0521c4c,0x1ca96c2,0x034970b,0x02738a7 }, + { 0x13e0ad6,0x207dcdb,0x034c8cc,0x27bcbe1,0x18060da,0x33a18b6, + 0x2d1d1a6,0x2be60d7,0x3d7ab42,0x012312a } }, + /* 190 */ + { { 0x0c7485a,0x06c3310,0x0dbfd22,0x2ef949d,0x0ead455,0x098f4ba, + 0x3c76989,0x0cf2d24,0x032f67b,0x01e005f }, + { 0x30cb5ee,0x0d5da64,0x0ed2b9d,0x2503102,0x1c0d14e,0x1cbc693, + 0x37bf552,0x07013e2,0x054de5c,0x014f341 } }, + /* 191 */ + { { 0x128ccac,0x1617e97,0x346ebcd,0x158016d,0x25f823e,0x34048ea, + 0x39f0a1c,0x3ea3df1,0x1c1d3d7,0x03ba919 }, + { 0x151803b,0x01967c1,0x2f70781,0x27df39a,0x06c0b59,0x24a239c, + 0x15a7702,0x2464d06,0x2a47ae6,0x006db90 } }, + /* 192 */ + { { 0x27d04c3,0x024df3d,0x38112e8,0x38a27ba,0x01e312b,0x0965358, + 0x35d8879,0x2f4f55a,0x214187f,0x0008936 }, + { 0x05fe36f,0x2ee18c3,0x1f5f87a,0x1813bd4,0x0580f3c,0x0ed0a7b, + 0x0fb1bfb,0x3fcce59,0x2f042bf,0x01820e3 } }, + /* 193 */ + { { 0x20bbe99,0x32cbc9f,0x39ee432,0x3cc12a8,0x37bda44,0x3ea4e40, + 0x097c7a9,0x0590d7d,0x2022d33,0x018dbac }, + { 0x3ae00aa,0x3439864,0x2d2ffcf,0x3f8c6b9,0x0875a00,0x3e4e407, + 0x3658a29,0x22eb3d0,0x2b63921,0x022113b } }, + /* 194 */ + { { 0x33bae58,0x05c749a,0x1f3e114,0x1c45f8e,0x27db3df,0x06a3ab6, + 0x37bc7f8,0x1e27b34,0x3dc51fb,0x009eea0 }, + { 0x3f54de5,0x3d0e7fe,0x1a71a7d,0x02ed7f8,0x0727703,0x2ca5e92, + 0x2e8e35d,0x292ad0b,0x13487f3,0x02b6d8b } }, + /* 195 */ + { { 0x175df2a,0x05a28a8,0x32e99b1,0x13d8630,0x2082aa0,0x11ac245, + 0x24f2e71,0x322cb27,0x17675e7,0x02e643f }, + { 0x1f37313,0x2765ad3,0x0789082,0x1e742d0,0x11c2055,0x2021dc4, + 0x09ae4a7,0x346359b,0x2f94d10,0x0205c1f } }, + /* 196 */ + { { 0x3d6ff96,0x1f2ac80,0x336097d,0x3f03610,0x35b851b,0x010b6d2, + 0x0823c4d,0x2a9709a,0x2ead5a8,0x00de4b6 }, + { 0x01afa0b,0x0621965,0x3671528,0x1050b60,0x3f3e9e7,0x2f93829, + 0x0825275,0x006e85f,0x35e94b0,0x016af58 } }, + /* 197 */ + { { 0x2c4927c,0x3ea1382,0x0f23727,0x0d69f23,0x3e38860,0x2b72837, + 0x3cd5ea4,0x2d84292,0x321846a,0x016656f }, + { 0x29dfa33,0x3e182e0,0x018be90,0x2ba563f,0x2caafe2,0x218c0d9, + 0x3baf447,0x1047a6c,0x0a2d483,0x01130cb } }, + /* 198 */ + { { 0x00ed80c,0x2a5fc79,0x0a82a74,0x2c4c74b,0x15f938c,0x30b5ab6, + 0x32124b7,0x295314f,0x2fb8082,0x007c858 }, + { 0x20b173e,0x19f315c,0x12f97e4,0x198217c,0x040e8a6,0x3275977, + 0x2bc20e4,0x01f2633,0x02bc3e9,0x023c750 } }, + /* 199 */ + { { 0x3c4058a,0x24be73e,0x16704f5,0x2d8a4bd,0x3b15e14,0x3076315, + 0x1cfe37b,0x36fe715,0x343926e,0x02c6603 }, + { 0x2c76b09,0x0cf824c,0x3f7898c,0x274cec1,0x11df527,0x18eed18, + 0x08ead48,0x23915bc,0x19b3744,0x00a0a2b } }, + /* 200 */ + { { 0x0cf4ac5,0x1c8b131,0x0afb696,0x0ff7799,0x2f5ac1a,0x022420c, + 0x11baa2e,0x2ce4015,0x1275a14,0x0125cfc }, + { 0x22eac5d,0x360cd4c,0x3568e59,0x3d42f66,0x35e07ee,0x09620e4, + 0x36720fa,0x22b1eac,0x2d0db16,0x01b6b23 } }, + /* 201 */ + { { 0x1a835ef,0x1516bbb,0x2d51f7b,0x3487443,0x14aa113,0x0dd06c2, + 0x1a65e01,0x379300d,0x35920b9,0x012c8fb }, + { 0x04c7341,0x2eda00f,0x3c37e82,0x1b4fd62,0x0d45770,0x1478fba, + 0x127863a,0x26939cd,0x134ddf4,0x01375c5 } }, + /* 202 */ + { { 0x1476cd9,0x1119ca5,0x325bbf9,0x0bf8c69,0x0648d07,0x312d9f8, + 0x01c8b8f,0x136ec51,0x0002f4a,0x03f4c5c }, + { 0x195d0e1,0x10ffd22,0x29aa1cb,0x3443bdc,0x276e695,0x05e6260, + 0x15f9764,0x3cd9783,0x18c9569,0x0053eb1 } }, + /* 203 */ + { { 0x312ae18,0x280197c,0x3fc9ad9,0x303f324,0x251958d,0x29f4a11, + 0x2142408,0x3694366,0x25136ab,0x03b5f1d }, + { 0x1d4abbc,0x1c3c689,0x13ea462,0x3cfc684,0x39b5dd8,0x2d4654b, + 0x09b0755,0x27d4f18,0x3f74d2e,0x03fbf2d } }, + /* 204 */ + { { 0x2119185,0x2525eae,0x1ba4bd0,0x0c2ab11,0x1d54e8c,0x294845e, + 0x2479dea,0x3602d24,0x17e87e0,0x0060069 }, + { 0x0afffb0,0x34fe37f,0x1240073,0x02eb895,0x06cf33c,0x2d7f7ef, + 0x1d763b5,0x04191e0,0x11e1ead,0x027e3f0 } }, + /* 205 */ + { { 0x269544c,0x0e85c57,0x3813158,0x19fc12d,0x20eaf85,0x1e2930c, + 0x22a8fd2,0x1a6a478,0x09d3d3a,0x02a74e0 }, + { 0x1a2da3b,0x30b0b16,0x0847936,0x3d86257,0x138ccbc,0x0f5421a, + 0x25244e6,0x23bdd79,0x1aee117,0x00c01ae } }, + /* 206 */ + { { 0x1eead28,0x07cac32,0x1fbc0bb,0x17627d3,0x17eef63,0x0b3a24e, + 0x0757fdb,0x3dd841d,0x3d745f8,0x002ae17 }, + { 0x25b4549,0x29f24cf,0x2f21ecd,0x1725e48,0x04be2bb,0x10ee010, + 0x1a1274b,0x10b0898,0x27511e9,0x02c48b5 } }, + /* 207 */ + { { 0x2a5ae7a,0x181ef99,0x0be33be,0x3e9dab7,0x101e703,0x3adb971, + 0x1043014,0x2ebb2be,0x1c1097d,0x027d667 }, + { 0x3f250ed,0x16dc603,0x20dc6d7,0x1d0d268,0x38eb915,0x02c89e8, + 0x1605a41,0x12de109,0x0e08a29,0x01f554a } }, + /* 208 */ + { { 0x0c26def,0x163d988,0x2d1ef0f,0x3a960ac,0x1025585,0x0738e20, + 0x27d79b0,0x05cc3ef,0x201303f,0x00a333a }, + { 0x1644ba5,0x2af345e,0x30b8d1d,0x3a01bff,0x31fc643,0x1acf85e, + 0x0a76fc6,0x04efe98,0x348a1d0,0x03062eb } }, + /* 209 */ + { { 0x1c4216d,0x18e3217,0x02ac34e,0x19c8185,0x200c010,0x17d4192, + 0x13a1719,0x165af51,0x09db7a9,0x0277be0 }, + { 0x3ab8d2c,0x2190b99,0x22b641e,0x0cd88de,0x3b42404,0x1310862, + 0x106a6d6,0x23395f5,0x0b06880,0x000d5fe } }, + /* 210 */ + { { 0x0d2cc88,0x36f9913,0x339d8e9,0x237c2e3,0x0cc61c2,0x34c2832, + 0x309874c,0x2621d28,0x2dd1b48,0x0392806 }, + { 0x17cd8f9,0x07bab3d,0x0c482ed,0x0faf565,0x31b767d,0x2f4bde1, + 0x295c717,0x330c29c,0x179ce10,0x0119b5f } }, + /* 211 */ + { { 0x1ada2c7,0x0c624a7,0x227d47d,0x30e3e6a,0x14fa0a6,0x0829678, + 0x24fd288,0x2b46a43,0x122451e,0x0319ca9 }, + { 0x186b655,0x01f3217,0x0af1306,0x0efe6b5,0x2f0235d,0x1c45ca9, + 0x2086805,0x1d44e66,0x0faf2a6,0x0178f59 } }, + /* 212 */ + { { 0x33b4416,0x10431e6,0x2d99aa6,0x217aac9,0x0cd8fcf,0x2d95a9d, + 0x3ff74ad,0x10bf17a,0x295eb8e,0x01b229e }, + { 0x02a63bd,0x182e9ec,0x004710c,0x00e2e3c,0x06b2f23,0x04b642c, + 0x2c37383,0x32a4631,0x022ad82,0x00d22b9 } }, + /* 213 */ + { { 0x0cda2fb,0x1d198d7,0x26d27f4,0x286381c,0x022acca,0x24ac7c8, + 0x2df7824,0x0b4ba16,0x1e0d9ef,0x03041d3 }, + { 0x29a65b3,0x0f3912b,0x151bfcf,0x2b0175c,0x0fd71e4,0x39aa5e2, + 0x311f50c,0x13ff351,0x3dbc9e5,0x03eeb7e } }, + /* 214 */ + { { 0x0a99363,0x0fc7348,0x2775171,0x23db3c8,0x2b91565,0x134d66c, + 0x0175cd2,0x1bf365a,0x2b48371,0x02dfe5d }, + { 0x16dbf74,0x2389357,0x2f36575,0x3f5c70e,0x38d23ba,0x090f7f8, + 0x3477600,0x3201523,0x32ecafc,0x03d3506 } }, + /* 215 */ + { { 0x1abd48d,0x073ca3f,0x38a451f,0x0d8cb01,0x1ce81be,0x05c51ba, + 0x0e29741,0x03c41ab,0x0eae016,0x0060209 }, + { 0x2e58358,0x1da62d9,0x2358038,0x14b39b2,0x1635687,0x39079b1, + 0x380e345,0x1b49608,0x23983cf,0x019f97d } }, + /* 216 */ + { { 0x34899ef,0x332e373,0x04c0f89,0x3c27aed,0x1949015,0x09663b2, + 0x2f9276b,0x07f1951,0x09a04c1,0x027fbde }, + { 0x3d2a071,0x19fb3d4,0x1b096d3,0x1fe9146,0x3b10e1a,0x0478bbb, + 0x2b3fb06,0x1388329,0x181a99c,0x02f2030 } }, + /* 217 */ + { { 0x1eb82e6,0x14dbe39,0x3920972,0x31fd5b2,0x21a484f,0x02d7697, + 0x0e21715,0x37c431e,0x2629f8c,0x01249c3 }, + { 0x26b50ad,0x26deefa,0x0ffc1a3,0x30688e2,0x39a0284,0x041c65e, + 0x03eb178,0x0bdfd50,0x2f96137,0x034bb94 } }, + /* 218 */ + { { 0x0e0362a,0x334a162,0x194dd37,0x29e3e97,0x2442fa8,0x10d2949, + 0x3836e5a,0x2dccebf,0x0bee5ab,0x037ed1e }, + { 0x33eede6,0x3c739d9,0x2f04a91,0x350ad6c,0x3a5390a,0x14c368b, + 0x26f7bf5,0x11ce979,0x0b408df,0x0366850 } }, + /* 219 */ + { { 0x28ea498,0x0886d5b,0x2e090e0,0x0a4d58f,0x2623478,0x0d74ab7, + 0x2b83913,0x12c6b81,0x18d623f,0x01d8301 }, + { 0x198aa79,0x26d6330,0x3a7f0b8,0x34bc1ea,0x2f74890,0x378955a, + 0x204110f,0x0102538,0x02d8f19,0x01c5066 } }, + /* 220 */ + { { 0x14b0f45,0x2838cd3,0x14e16f0,0x0e0e4aa,0x2d9280b,0x0f18757, + 0x3324c6b,0x1391ceb,0x1ce89d5,0x00ebe74 }, + { 0x0930371,0x3de6048,0x3097fd8,0x1308705,0x3eda266,0x3108c26, + 0x1545dcd,0x1f7583a,0x1c37395,0x02c7e05 } }, + /* 221 */ + { { 0x1fec44a,0x2a9e3a2,0x0caf84f,0x11cf2a9,0x0c8c2ae,0x06da989, + 0x1c807dc,0x3c149a4,0x1141543,0x02906bb }, + { 0x15ffe04,0x0d4e65f,0x2e20424,0x37d896d,0x18bacb2,0x1e05ddd, + 0x1660be8,0x183be17,0x1dd86fb,0x035ba70 } }, + /* 222 */ + { { 0x2853264,0x0ba5fb1,0x0a0b3aa,0x2df88c1,0x2771533,0x23aba6f, + 0x112bb7b,0x3e3086e,0x210ae9b,0x027271b }, + { 0x030b74c,0x0269678,0x1e90a23,0x135a98c,0x24ed749,0x126de7c, + 0x344b23a,0x186da27,0x19640fa,0x0159af5 } }, + /* 223 */ + { { 0x18061f3,0x3004630,0x3c70066,0x34df20f,0x1190b25,0x1c9cc91, + 0x1fc8e02,0x0d17bc1,0x390f525,0x033cb1c }, + { 0x0eb30cf,0x2f3ad04,0x303aa09,0x2e835dd,0x1cfd2eb,0x143fc95, + 0x02c43a1,0x025e7a1,0x3558aa2,0x000bd45 } }, + /* 224 */ + { { 0x1db7d07,0x3bde52b,0x1500396,0x1089115,0x20b4fc7,0x1e2a8f3, + 0x3f8eacc,0x365f7eb,0x1a5e8d4,0x0053a6b }, + { 0x37079e2,0x120284b,0x000edaa,0x33792c2,0x145baa3,0x20e055f, + 0x365e2d7,0x26ba005,0x3ab8e9d,0x0282b53 } }, + /* 225 */ + { { 0x2653618,0x2dd8852,0x2a5f0bf,0x0f0c7aa,0x2187281,0x1252757, + 0x13e7374,0x3b47855,0x0b86e56,0x02f354c }, + { 0x2e9c47b,0x2fa14cc,0x19ab169,0x3fad401,0x0dc2776,0x24afeed, + 0x3a97611,0x0d07736,0x3cf6979,0x02424a0 } }, + /* 226 */ + { { 0x2e81a13,0x000c91d,0x123967b,0x265885c,0x29bee1a,0x0cb8675, + 0x2d361bd,0x1526823,0x3c9ace1,0x00d7bad }, + { 0x24e5bdc,0x02b969f,0x2c6e128,0x34edb3b,0x12dcd2c,0x3899af0, + 0x24224c6,0x3a1914b,0x0f4448a,0x026a2cb } }, + /* 227 */ + { { 0x1d03b59,0x1c6fc82,0x32abf64,0x28ed96b,0x1c90e62,0x2f57bb2, + 0x3ff168e,0x04de7fd,0x0f4d449,0x01af6d8 }, + { 0x255bc30,0x2bfaf22,0x3fe0dad,0x0584025,0x1c79ead,0x3078ef7, + 0x2197414,0x022a50b,0x0fd94ba,0x0007b0f } }, + /* 228 */ + { { 0x09485c2,0x09dfaf7,0x10c7ba6,0x1e48bec,0x248cc9a,0x028a362, + 0x21d60f7,0x193d93d,0x1c04754,0x0346b2c }, + { 0x2f36612,0x240ac49,0x0d8bd26,0x13b8186,0x259c3a4,0x020d5fb, + 0x38a8133,0x09b0937,0x39d4056,0x01f7341 } }, + /* 229 */ + { { 0x05a4b48,0x1f534fc,0x07725ce,0x148dc8c,0x2adcd29,0x04aa456, + 0x0f79718,0x066e346,0x189377d,0x002fd4d }, + { 0x068ea73,0x336569b,0x184d35e,0x32a08e9,0x3c7f3bb,0x11ce9c8, + 0x3674c6f,0x21bf27e,0x0d9e166,0x034a2f9 } }, + /* 230 */ + { { 0x0fa8e4b,0x2e6418e,0x18fc5d2,0x1ba24ff,0x0559f18,0x0dbedbf, + 0x2de2aa4,0x22338e9,0x3aa510f,0x035d801 }, + { 0x23a4988,0x02aad94,0x02732d1,0x111d374,0x0b455cf,0x0d01c9e, + 0x067082a,0x2ec05fd,0x368b303,0x03cad4b } }, + /* 231 */ + { { 0x035b4ca,0x1fabea6,0x1cbc0d5,0x3f2ed9a,0x02d2232,0x1990c66, + 0x2eb680c,0x3b4ea3b,0x18ecc5a,0x03636fa }, + { 0x1a02709,0x26f8ff1,0x1fa8cba,0x397d6e8,0x230be68,0x043aa14, + 0x3d43cdf,0x25c17fa,0x3a3ee55,0x0380564 } }, + /* 232 */ + { { 0x275a0a6,0x16bd43a,0x0033d3e,0x2b15e16,0x2512226,0x005d901, + 0x26d50fd,0x3bc19bf,0x3b1aeb8,0x02bfb01 }, + { 0x0bb0a31,0x26559e0,0x1aae7fb,0x330dcc2,0x16f1af3,0x06afce2, + 0x13a15a0,0x2ff7645,0x3546e2d,0x029c6e4 } }, + /* 233 */ + { { 0x0f593d2,0x384b806,0x122bbf8,0x0a281e0,0x1d1a904,0x2e93cab, + 0x0505db0,0x08f6454,0x05c6285,0x014e880 }, + { 0x3f2b935,0x22d8e79,0x161a07c,0x16b060a,0x02bff97,0x146328b, + 0x3ceea77,0x238f61a,0x19b3d58,0x02fd1f4 } }, + /* 234 */ + { { 0x17665d5,0x259e9f7,0x0de5672,0x15cbcbd,0x34e3030,0x035240f, + 0x0005ae8,0x286d851,0x07f39c9,0x000070b }, + { 0x1efc6d6,0x2a0051a,0x2724143,0x2a9ef1e,0x0c810bd,0x1e05429, + 0x25670ba,0x2e66d7d,0x0e786ff,0x03f6b7e } }, + /* 235 */ + { { 0x3c00785,0x232e23f,0x2b67fd3,0x244ed23,0x077fa75,0x3cda3ef, + 0x14d055b,0x0f25011,0x24d5aa4,0x00ea0e3 }, + { 0x297bb9a,0x198ca4f,0x14d9561,0x18d1076,0x39eb933,0x2b6caa0, + 0x1591a60,0x0768d45,0x257873e,0x00f36e0 } }, + /* 236 */ + { { 0x1e77eab,0x0502a5f,0x0109137,0x0350592,0x3f7e1c5,0x3ac7437, + 0x2dcad2c,0x1fee9d8,0x089f1f5,0x0169833 }, + { 0x0d45673,0x0d8e090,0x065580b,0x065644f,0x11b82be,0x3592dd0, + 0x3284b8d,0x23f0015,0x16fdbfd,0x0248bfd } }, + /* 237 */ + { { 0x1a129a1,0x1977bb2,0x0e041b2,0x15f30a1,0x0a5b1ce,0x3afef8f, + 0x380c46c,0x3358810,0x27df6c5,0x01ca466 }, + { 0x3b90f9a,0x3d14ea3,0x031b298,0x02e2390,0x2d719c0,0x25bc615, + 0x2c0e777,0x0226b8c,0x3803624,0x0179e45 } }, + /* 238 */ + { { 0x363cdfb,0x1bb155f,0x24fd5c1,0x1c7c72b,0x28e6a35,0x18165f2, + 0x226bea5,0x0beaff3,0x371e24c,0x0138294 }, + { 0x1765357,0x29034e9,0x22b4276,0x11035ce,0x23c89af,0x074468c, + 0x3370ae4,0x013bae3,0x018d566,0x03d7fde } }, + /* 239 */ + { { 0x209df21,0x0f8ff86,0x0e47fbf,0x23b99ba,0x126d5d2,0x2722405, + 0x16bd0a2,0x1799082,0x0e9533f,0x039077c }, + { 0x3ba9e3f,0x3f6902c,0x1895305,0x3ac9813,0x3f2340c,0x3c0d9f1, + 0x26e1927,0x0557c21,0x16eac4f,0x023b75f } }, + /* 240 */ + { { 0x3fc8ff3,0x0770382,0x342fc9a,0x0afa4db,0x314efd8,0x328e07b, + 0x016f7cc,0x3ba599c,0x1caed8a,0x0050cb0 }, + { 0x0b23c26,0x2120a5c,0x3273ec6,0x1cc1cd6,0x2a64fe8,0x2bbc3d6, + 0x09f6e5e,0x34b1b8e,0x00b5ac8,0x032bbd2 } }, + /* 241 */ + { { 0x1315922,0x1725e1d,0x0ca5524,0x1c4c18f,0x3d82951,0x193bcb2, + 0x0e60d0b,0x388dbcf,0x37e8efa,0x0342e85 }, + { 0x1b3af60,0x26ba3ec,0x220e53a,0x394f4b6,0x01a796a,0x3e7bbca, + 0x163605d,0x2b85807,0x17c1c54,0x03cc725 } }, + /* 242 */ + { { 0x1cc4597,0x1635492,0x2028c0f,0x2c2eb82,0x2dc5015,0x0d2a052, + 0x05fc557,0x1f0ebbf,0x0cb96e1,0x0004d01 }, + { 0x1a824bf,0x3896172,0x2ed7b29,0x178007a,0x0d59318,0x07bda2b, + 0x2ee6826,0x0f9b235,0x04b9193,0x01bcddf } }, + /* 243 */ + { { 0x0333fd2,0x0eeb46a,0x15b89f9,0x00968aa,0x2a89302,0x2bdd6b3, + 0x1e5037e,0x2541884,0x24ed2d0,0x01b6e8f }, + { 0x04399cd,0x3be6334,0x3adea48,0x1bb9adc,0x31811c6,0x05fb2bc, + 0x360752c,0x3d29dcb,0x3423bec,0x03c4f3c } }, + /* 244 */ + { { 0x119e2eb,0x2e7b02a,0x0f68cee,0x257d8b0,0x183a9a1,0x2ae88a6, + 0x3a3bb67,0x2eb4f3e,0x1a9274b,0x0320fea }, + { 0x2fa1ce0,0x346c2d8,0x2fbf0d7,0x3d4d063,0x0e58b60,0x09c1bc1, + 0x28ef9e5,0x09a0efe,0x0f45d70,0x02d275c } }, + /* 245 */ + { { 0x2d5513b,0x31d443e,0x1e2d914,0x3b2c5d4,0x105f32e,0x27ee756, + 0x050418d,0x3c73db6,0x1bb0c30,0x01673eb }, + { 0x1cb7fd6,0x1eb08d5,0x26a3e16,0x2e20810,0x0249367,0x029e219, + 0x2ec58c9,0x12d9fab,0x362354a,0x016eafc } }, + /* 246 */ + { { 0x2424865,0x260747b,0x177f37c,0x1e3cb95,0x08b0028,0x2783016, + 0x2970f1b,0x323c1c0,0x2a79026,0x0186231 }, + { 0x0f244da,0x26866f4,0x087306f,0x173ec20,0x31ecced,0x3c84d8d, + 0x070f9b9,0x2e764d5,0x075df50,0x0264ff9 } }, + /* 247 */ + { { 0x32c3609,0x0c737e6,0x14ea68e,0x300b11b,0x184eb19,0x29dd440, + 0x09ec1a9,0x185adeb,0x0664c80,0x0207dd9 }, + { 0x1fbe978,0x30a969d,0x33561d7,0x34fc60e,0x36743fe,0x00774af, + 0x0d1f045,0x018360e,0x12a5fe9,0x01592a0 } }, + /* 248 */ + { { 0x2817d1d,0x2993d3e,0x2e0f7a5,0x112faa0,0x255f968,0x355fe6a, + 0x3f5a0fc,0x075b2d7,0x3cf00e5,0x0089afc }, + { 0x32833cf,0x06a7e4b,0x09a8d6d,0x1693d3e,0x320a0a3,0x3cfdfdd, + 0x136c498,0x1e0d845,0x347ff25,0x01a1de7 } }, + /* 249 */ + { { 0x3043d08,0x030705c,0x20fa79b,0x1d07f00,0x0a54467,0x29b49b4, + 0x367e289,0x0b82f4d,0x0d1eb09,0x025ef2c }, + { 0x32ed3c3,0x1baaa3c,0x3c482ab,0x146ca06,0x3c8a4f1,0x3e85e3c, + 0x1bf4f3b,0x1195534,0x3e80a78,0x02a1cbf } }, + /* 250 */ + { { 0x32b2086,0x2de4d68,0x3486b1a,0x03a0583,0x2e1eb71,0x2dab9af, + 0x10cd913,0x28daa6f,0x3fcb732,0x000a04a }, + { 0x3605318,0x3f5f2b3,0x2d1da63,0x143f7f5,0x1646e5d,0x040b586, + 0x1683982,0x25abe87,0x0c9fe53,0x001ce47 } }, + /* 251 */ + { { 0x380d02b,0x055fc22,0x3f7fc50,0x3458a1d,0x26b8333,0x23550ab, + 0x0a1af87,0x0a821eb,0x2dc7e6d,0x00d574a }, + { 0x07386e1,0x3ccd68a,0x3275b41,0x253e390,0x2fd272a,0x1e6627a, + 0x2ca2cde,0x0e9e4a1,0x1e37c2a,0x00f70ac } }, + /* 252 */ + { { 0x0581352,0x2748701,0x02bed68,0x094dd9e,0x30a00c8,0x3fb5c07, + 0x3bd5909,0x211ac80,0x1103ccd,0x0311e1a }, + { 0x0c768ed,0x29dc209,0x36575db,0x009a107,0x272feea,0x2b33383, + 0x313ed56,0x134c9cc,0x168d5bb,0x033310a } }, + /* 253 */ + { { 0x17620b9,0x143784f,0x256a94e,0x229664a,0x1d89a5c,0x1d521f2, + 0x0076406,0x1c73f70,0x342aa48,0x03851fa }, + { 0x0f3ae46,0x2ad3bab,0x0fbe274,0x3ed40d4,0x2fd4936,0x232103a, + 0x2afe474,0x25b8f7c,0x047080e,0x008e6b0 } }, + /* 254 */ + { { 0x3fee8d4,0x347cd4a,0x0fec481,0x33fe9ec,0x0ce80b5,0x33a6bcf, + 0x1c4c9e2,0x3967441,0x1a3f5f7,0x03157e8 }, + { 0x257c227,0x1bc53a0,0x200b318,0x0fcd0af,0x2c5b165,0x2a413ec, + 0x2fc998a,0x2da6426,0x19cd4f4,0x0025336 } }, + /* 255 */ + { { 0x303beba,0x2072135,0x32918a9,0x140cb3a,0x08631d1,0x0ef527b, + 0x05f2c9e,0x2b4ce91,0x0b642ab,0x02e428c }, + { 0x0a5abf9,0x15013ed,0x3603b46,0x30dd76d,0x3004750,0x28d7627, + 0x1a42ccc,0x093ddbe,0x39a1b79,0x00067e2 } }, +}; + +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine co-ordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_base_10(sp_point* r, const sp_digit* k, + int map, void* heap) +{ + return sp_256_ecc_mulmod_stripe_10(r, &p256_base, p256_table, + k, map, heap); +} + +#endif + + +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* Multiply a by scalar b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_256_mul_d_10(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int64_t tb = b; + int64_t t = 0; + int i; + + for (i = 0; i < 10; i++) { + t += tb * a[i]; + r[i] = t & 0x3ffffff; + t >>= 26; + } + r[10] = (sp_digit)t; +#else + int64_t tb = b; + int64_t t[10]; + + t[ 0] = Q6_P_mpy_RR(tb, a[0]); + t[ 1] = Q6_P_mpy_RR(tb, a[1]); + t[ 2] = Q6_P_mpy_RR(tb, a[2]); + t[ 3] = Q6_P_mpy_RR(tb, a[3]); + t[ 4] = Q6_P_mpy_RR(tb, a[4]); + t[ 5] = Q6_P_mpy_RR(tb, a[5]); + t[ 6] = Q6_P_mpy_RR(tb, a[6]); + t[ 7] = Q6_P_mpy_RR(tb, a[7]); + t[ 8] = Q6_P_mpy_RR(tb, a[8]); + t[ 9] = Q6_P_mpy_RR(tb, a[9]); + r[ 0] = Q6_R_and_RR(t[ 0], 0x3ffffff); + r[ 1] = (sp_digit)(t[ 0] >> 26) + Q6_R_and_RR(t[ 1], 0x3ffffff); + r[ 2] = (sp_digit)(t[ 1] >> 26) + Q6_R_and_RR(t[ 2], 0x3ffffff); + r[ 3] = (sp_digit)(t[ 2] >> 26) + Q6_R_and_RR(t[ 3], 0x3ffffff); + r[ 4] = (sp_digit)(t[ 3] >> 26) + Q6_R_and_RR(t[ 4], 0x3ffffff); + r[ 5] = (sp_digit)(t[ 4] >> 26) + Q6_R_and_RR(t[ 5], 0x3ffffff); + r[ 6] = (sp_digit)(t[ 5] >> 26) + Q6_R_and_RR(t[ 6], 0x3ffffff); + r[ 7] = (sp_digit)(t[ 6] >> 26) + Q6_R_and_RR(t[ 7], 0x3ffffff); + r[ 8] = (sp_digit)(t[ 7] >> 26) + Q6_R_and_RR(t[ 8], 0x3ffffff); + r[ 9] = (sp_digit)(t[ 8] >> 26) + Q6_R_and_RR(t[ 9], 0x3ffffff); + r[10] = (sp_digit)(t[ 9] >> 26); +#endif /* WOLFSSL_SP_SMALL */ +} + +#ifdef WOLFSSL_SP_DIV_32 +static WC_INLINE sp_digit sp_256_div_word_10(sp_digit d1, sp_digit d0, + sp_digit dv) +{ + sp_digit d, r, t, dv; + int64_t t0, t1; + + /* dv has 14 bits. */ + dv = (div >> 12) + 1; + /* All 26 bits from d1 and top 5 bits from d0. */ + d = (d1 << 5) | (d0 >> 21); + r = d / dv; + d -= r * dv; + /* Up to 17 bits in r */ + /* Next 9 bits from d0. */ + d <<= 9; + r <<= 9; + d |= (d0 >> 12) & ((1 << 9) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 26 bits in r */ + + /* Handle rounding error with dv - top part */ + t0 = ((int64_t)d1 << 26) + d0; + t1 = (int64_t)r * dv; + t1 = t0 - t1; + t = (sp_digit)(t1 >> 12) / dv; + r += t; + + /* Handle rounding error with dv - bottom 32 bits */ + t1 = (sp_digit)t0 - (r * dv); + t = (sp_digit)t1 / dv; + r += t; + + return r; +} +#endif /* WOLFSSL_SP_DIV_32 */ + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Number to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_256_div_10(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + int i; +#ifndef WOLFSSL_SP_DIV_32 + int64_t d1; +#endif + sp_digit dv, r1; +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + sp_digit* td; +#else + sp_digit t1d[20], t2d[10 + 1]; +#endif + sp_digit* t1; + sp_digit* t2; + int err = MP_OKAY; + + (void)m; + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 10 + 1), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + t1 = td; + t2 = td + 2 * 10; +#else + t1 = t1d; + t2 = t2d; +#endif + + dv = d[9]; + XMEMCPY(t1, a, sizeof(*t1) * 2U * 10U); + for (i=9; i>=0; i--) { + t1[10 + i] += t1[10 + i - 1] >> 26; + t1[10 + i - 1] = Q6_R_and_RR(t1[10 + i - 1], 0x3ffffff); +#ifndef WOLFSSL_SP_DIV_32 + d1 = t1[10 + i]; + d1 <<= 26; + d1 += t1[10 + i - 1]; + r1 = (sp_digit)(d1 / dv); +#else + r1 = sp_256_div_word_10(t1[10 + i], t1[10 + i - 1], dv); +#endif + + sp_256_mul_d_10(t2, d, r1); + (void)sp_256_sub_10(&t1[i], &t1[i], t2); + t1[10 + i] -= t2[10]; + t1[10 + i] += t1[10 + i - 1] >> 26; + t1[10 + i - 1] = Q6_R_and_RR(t1[10 + i - 1], 0x3ffffff); + r1 = (((-t1[10 + i]) << 26) - t1[10 + i - 1]) / dv; + r1++; + sp_256_mul_d_10(t2, d, r1); + (void)sp_256_add_10(&t1[i], &t1[i], t2); + t1[10 + i] += t1[10 + i - 1] >> 26; + t1[10 + i - 1] = Q6_R_and_RR(t1[10 + i - 1], 0x3ffffff); + } + t1[10 - 1] += t1[10 - 2] >> 26; + t1[10 - 2] &= 0x3ffffff; + d1 = t1[10 - 1]; + r1 = (sp_digit)(d1 / dv); + + sp_256_mul_d_10(t2, d, r1); + (void)sp_256_sub_10(t1, t1, t2); + XMEMCPY(r, t1, sizeof(*r) * 2U * 10U); + for (i=0; i<8; i++) { + r[i+1] += r[i] >> 26; + r[i] &= 0x3ffffff; + } + sp_256_cond_add_10(r, r, d, 0 - ((r[9] < 0) ? + (sp_digit)1 : (sp_digit)0)); + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_256_mod_10(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_256_div_10(a, m, NULL, r); +} + +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#ifdef WOLFSSL_SP_SMALL +/* Order-2 for the P256 curve. */ +static const uint32_t p256_order_2[8] = { + 0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU,0xffffffffU,0xffffffffU, + 0x00000000U,0xffffffffU +}; +#else +/* The low half of the order-2 of the P256 curve. */ +static const uint32_t p256_order_low[4] = { + 0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU +}; +#endif /* WOLFSSL_SP_SMALL */ + +/* Multiply two number mod the order of P256 curve. (r = a * b mod order) + * + * r Result of the multiplication. + * a First operand of the multiplication. + * b Second operand of the multiplication. + */ +static void sp_256_mont_mul_order_10(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_256_mul_10(r, a, b); + sp_256_mont_reduce_order_10(r, p256_order, p256_mp_order); +} + +/* Square number mod the order of P256 curve. (r = a * a mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_256_mont_sqr_order_10(sp_digit* r, const sp_digit* a) +{ + sp_256_sqr_10(r, a); + sp_256_mont_reduce_order_10(r, p256_order, p256_mp_order); +} + +#ifndef WOLFSSL_SP_SMALL +/* Square number mod the order of P256 curve a number of times. + * (r = a ^ n mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_256_mont_sqr_n_order_10(sp_digit* r, const sp_digit* a, int n) +{ + int i; + + sp_256_mont_sqr_order_10(r, a); + for (i=1; i=0; i--) { + sp_256_mont_sqr_order_10(t, t); + if ((p256_order_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_256_mont_mul_order_10(t, t, a); + } + } + XMEMCPY(r, t, sizeof(sp_digit) * 10U); +#else + sp_digit* t = td; + sp_digit* t2 = td + 2 * 10; + sp_digit* t3 = td + 4 * 10; + int i; + + + /* t = a^2 */ + sp_256_mont_sqr_order_10(t, a); + /* t = a^3 = t * a */ + sp_256_mont_mul_order_10(t, t, a); + /* t2= a^c = t ^ 2 ^ 2 */ + sp_256_mont_sqr_n_order_10(t2, t, 2); + /* t3= a^f = t2 * t */ + sp_256_mont_mul_order_10(t3, t2, t); + /* t2= a^f0 = t3 ^ 2 ^ 4 */ + sp_256_mont_sqr_n_order_10(t2, t3, 4); + /* t = a^ff = t2 * t3 */ + sp_256_mont_mul_order_10(t, t2, t3); + /* t3= a^ff00 = t ^ 2 ^ 8 */ + sp_256_mont_sqr_n_order_10(t2, t, 8); + /* t = a^ffff = t2 * t */ + sp_256_mont_mul_order_10(t, t2, t); + /* t2= a^ffff0000 = t ^ 2 ^ 16 */ + sp_256_mont_sqr_n_order_10(t2, t, 16); + /* t = a^ffffffff = t2 * t */ + sp_256_mont_mul_order_10(t, t2, t); + /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64 */ + sp_256_mont_sqr_n_order_10(t2, t, 64); + /* t2= a^ffffffff00000000ffffffff = t2 * t */ + sp_256_mont_mul_order_10(t2, t2, t); + /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32 */ + sp_256_mont_sqr_n_order_10(t2, t2, 32); + /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */ + sp_256_mont_mul_order_10(t2, t2, t); + /* t2= a^ffffffff00000000ffffffffffffffffbce6 */ + + for (i=127; i>=112; i--) { + sp_256_mont_sqr_order_10(t2, t2); + if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_256_mont_mul_order_10(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6f */ + sp_256_mont_sqr_n_order_10(t2, t2, 4); + sp_256_mont_mul_order_10(t2, t2, t3); + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */ + for (i=107; i>=64; i--) { + sp_256_mont_sqr_order_10(t2, t2); + if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_256_mont_mul_order_10(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */ + sp_256_mont_sqr_n_order_10(t2, t2, 4); + sp_256_mont_mul_order_10(t2, t2, t3); + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */ + for (i=59; i>=32; i--) { + sp_256_mont_sqr_order_10(t2, t2); + if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_256_mont_mul_order_10(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */ + sp_256_mont_sqr_n_order_10(t2, t2, 4); + sp_256_mont_mul_order_10(t2, t2, t3); + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */ + for (i=27; i>=0; i--) { + sp_256_mont_sqr_order_10(t2, t2); + if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_256_mont_mul_order_10(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */ + sp_256_mont_sqr_n_order_10(t2, t2, 4); + /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */ + sp_256_mont_mul_order_10(r, t2, t3); +#endif /* WOLFSSL_SP_SMALL */ +} + +#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */ + +#ifdef HAVE_ECC_VERIFY + + +/* Verify the signature values with the hash and public key. + * e = Truncate(hash, 256) + * u1 = e/s mod order + * u2 = r/s mod order + * r == (u1.G + u2.Q)->x mod order + * Optimization: Leave point in projective form. + * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z') + * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' + * The hash is truncated to the first 256 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +int wolfSSL_DSP_ECC_Verify_256(remote_handle64 h, int32 *u1, int hashLen, int32* r, int rSz, int32* s, int sSz, + int32* x, int xSz, int32* y, int ySz, int32* z, int zSz, int* res) +{ +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + sp_digit* d = NULL; +#else + sp_digit u2d[2*10] __attribute__((aligned(128))); + sp_digit tmpd[2*10 * 5] __attribute__((aligned(128))); + sp_point p1d; + sp_point p2d; +#endif + sp_digit* u2 = NULL; + sp_digit* tmp = NULL; + sp_point* p1; + sp_point* p2 = NULL; + sp_digit carry; + int32_t c; + int err; + void* heap = NULL; + + (void)h; + (void)hashLen; + + err = sp_ecc_point_new(heap, p1d, p1); + if (err == MP_OKAY) { + err = sp_ecc_point_new(heap, p2d, p2); + } + + if (err == MP_OKAY) { + u2 = u2d; + tmp = tmpd; + + XMEMCPY(u2, r, 40); + XMEMCPY(p2->x, x, 40); + XMEMCPY(p2->y, y, 40); + XMEMCPY(p2->z, z, 40); + + sp_256_mul_10(s, s, p256_norm_order); + err = sp_256_mod_10(s, s, p256_order); + } + if (err == MP_OKAY) { + sp_256_norm_10(s); + { + + sp_256_mont_inv_order_10(s, s, tmp); + sp_256_mont_mul_order_10(u1, u1, s); + sp_256_mont_mul_order_10(u2, u2, s); + } + + err = sp_256_ecc_mulmod_base_10(p1, u1, 0, heap); + } + if (err == MP_OKAY) { + err = sp_256_ecc_mulmod_10(p2, p2, u2, 0, heap); + } + + if (err == MP_OKAY) { + sp_256_proj_point_add_10(p1, p1, p2, tmp); + + /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ + /* Reload r and convert to Montgomery form. */ + XMEMCPY(u2, r, 40); + err = sp_256_mod_mul_norm_10(u2, u2, p256_mod); + } + + if (err == MP_OKAY) { + /* u1 = r.z'.z' mod prime */ + sp_256_mont_sqr_10(p1->z, p1->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(u1, u2, p1->z, p256_mod, p256_mp_mod); + *res = (int)(sp_256_cmp_10(p1->x, u1) == 0); + if (*res == 0) { + /* Reload r and add order. */ + XMEMCPY(u2, r, 40); + carry = sp_256_add_10(u2, u2, p256_order); + /* Carry means result is greater than mod and is not valid. */ + if (carry == 0) { + sp_256_norm_10(u2); + + /* Compare with mod and if greater or equal then not valid. */ + c = sp_256_cmp_10(u2, p256_mod); + if (c < 0) { + /* Convert to Montogomery form */ + err = sp_256_mod_mul_norm_10(u2, u2, p256_mod); + if (err == MP_OKAY) { + /* u1 = (r + 1*order).z'.z' mod prime */ + sp_256_mont_mul_10(u1, u2, p1->z, p256_mod, + p256_mp_mod); + *res = (int)(sp_256_cmp_10(p1->x, u2) == 0); + } + } + } + } + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (d != NULL) + XFREE(d, heap, DYNAMIC_TYPE_ECC); +#endif + sp_ecc_point_free(p1, 0, heap); + sp_ecc_point_free(p2, 0, heap); + + return err; +} + +/** Free the Fixed Point cache */ +void wc_ecc_fp_free(void) +{ +} + + +AEEResult wolfSSL_open(const char *uri, remote_handle64 *handle) +{ + void *tptr; + /* can be any value or ignored, rpc layer doesn't care + * also ok + * *handle = 0; + * *handle = 0xdeadc0de; + */ + tptr = (void *)malloc(1); + *handle = (remote_handle64)tptr; + return 0; +} + +AEEResult wolfSSL_close(remote_handle64 handle) +{ + if (handle) + free((void*)handle); + return 0; +} +#endif /* HAVE_ECC_VERIFY */ + +#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL +/* Add two projective EC points together. + * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ) + * + * pX First EC point's X ordinate. + * pY First EC point's Y ordinate. + * pZ First EC point's Z ordinate. + * qX Second EC point's X ordinate. + * qY Second EC point's Y ordinate. + * qZ Second EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* qX, mp_int* qY, mp_int* qZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK) + sp_digit tmpd[2 * 10 * 5]; + sp_point pd; + sp_point qd; +#endif + sp_digit* tmp; + sp_point* p; + sp_point* q = NULL; + int err; + + err = sp_ecc_point_new(NULL, pd, p); + if (err == MP_OKAY) { + err = sp_ecc_point_new(NULL, qd, q); + } +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + sp_256_from_mp(p->x, 10, pX); + sp_256_from_mp(p->y, 10, pY); + sp_256_from_mp(p->z, 10, pZ); + sp_256_from_mp(q->x, 10, qX); + sp_256_from_mp(q->y, 10, qY); + sp_256_from_mp(q->z, 10, qZ); + + sp_256_proj_point_add_10(p, p, q, tmp); + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->z, rZ); + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_ecc_point_free(q, 0, NULL); + sp_ecc_point_free(p, 0, NULL); + + return err; +} + + +/* Double a projective EC point. + * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ) + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK) + sp_digit tmpd[2 * 10 * 2]; + sp_point pd; +#endif + sp_digit* tmp; + sp_point* p; + int err; + + err = sp_ecc_point_new(NULL, pd, p); +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 2, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + sp_256_from_mp(p->x, 10, pX); + sp_256_from_mp(p->y, 10, pY); + sp_256_from_mp(p->z, 10, pZ); + + sp_256_proj_point_dbl_10(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->z, rZ); + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_ecc_point_free(p, 0, NULL); + + return err; +} + +/* Map a projective EC point to affine in place. + * pZ will be one. + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ) +{ +#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK) + sp_digit tmpd[2 * 10 * 4]; + sp_point pd; +#endif + sp_digit* tmp; + sp_point* p; + int err; + + err = sp_ecc_point_new(NULL, pd, p); +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 4, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + if (err == MP_OKAY) { + sp_256_from_mp(p->x, 10, pX); + sp_256_from_mp(p->y, 10, pY); + sp_256_from_mp(p->z, 10, pZ); + + sp_256_map_10(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(p->x, pX); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->y, pY); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->z, pZ); + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_ecc_point_free(p, 0, NULL); + + return err; +} +#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */ +#ifdef HAVE_COMP_KEY +/* Find the square root of a number mod the prime of the curve. + * + * y The number to operate on and the result. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +static int sp_256_mont_sqrt_10(sp_digit* y) +{ +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + sp_digit* d; +#else + sp_digit t1d[2 * 10]; + sp_digit t2d[2 * 10]; +#endif + sp_digit* t1; + sp_digit* t2; + int err = MP_OKAY; + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 10, NULL, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + t1 = d + 0 * 10; + t2 = d + 2 * 10; +#else + t1 = t1d; + t2 = t2d; +#endif + + { + /* t2 = y ^ 0x2 */ + sp_256_mont_sqr_10(t2, y, p256_mod, p256_mp_mod); + /* t1 = y ^ 0x3 */ + sp_256_mont_mul_10(t1, t2, y, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xc */ + sp_256_mont_sqr_n_10(t2, t1, 2, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xf */ + sp_256_mont_mul_10(t1, t1, t2, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xf0 */ + sp_256_mont_sqr_n_10(t2, t1, 4, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xff */ + sp_256_mont_mul_10(t1, t1, t2, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xff00 */ + sp_256_mont_sqr_n_10(t2, t1, 8, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffff */ + sp_256_mont_mul_10(t1, t1, t2, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xffff0000 */ + sp_256_mont_sqr_n_10(t2, t1, 16, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff */ + sp_256_mont_mul_10(t1, t1, t2, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000000 */ + sp_256_mont_sqr_n_10(t1, t1, 32, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000001 */ + sp_256_mont_mul_10(t1, t1, y, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */ + sp_256_mont_sqr_n_10(t1, t1, 96, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */ + sp_256_mont_mul_10(t1, t1, y, p256_mod, p256_mp_mod); + sp_256_mont_sqr_n_10(y, t1, 94, p256_mod, p256_mp_mod); + } + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + +/* Uncompress the point given the X ordinate. + * + * xm X ordinate. + * odd Whether the Y ordinate is odd. + * ym Calculated Y ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym) +{ +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + sp_digit* d; +#else + sp_digit xd[2 * 10]; + sp_digit yd[2 * 10]; +#endif + sp_digit* x = NULL; + sp_digit* y = NULL; + int err = MP_OKAY; + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 10, NULL, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + x = d + 0 * 10; + y = d + 2 * 10; +#else + x = xd; + y = yd; +#endif + + sp_256_from_mp(x, 10, xm); + err = sp_256_mod_mul_norm_10(x, x, p256_mod); + } + if (err == MP_OKAY) { + /* y = x^3 */ + { + sp_256_mont_sqr_10(y, x, p256_mod, p256_mp_mod); + sp_256_mont_mul_10(y, y, x, p256_mod, p256_mp_mod); + } + /* y = x^3 - 3x */ + sp_256_mont_sub_10(y, y, x, p256_mod); + sp_256_mont_sub_10(y, y, x, p256_mod); + sp_256_mont_sub_10(y, y, x, p256_mod); + /* y = x^3 - 3x + b */ + err = sp_256_mod_mul_norm_10(x, p256_b, p256_mod); + } + if (err == MP_OKAY) { + sp_256_mont_add_10(y, y, x, p256_mod); + /* y = sqrt(x^3 - 3x + b) */ + err = sp_256_mont_sqrt_10(y); + } + if (err == MP_OKAY) { + XMEMSET(y + 10, 0, 10U * sizeof(sp_digit)); + sp_256_mont_reduce_10(y, p256_mod, p256_mp_mod); + if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) { + sp_256_mont_sub_10(y, p256_mod, y, p256_mod); + } + + err = sp_256_to_mp(y, ym); + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} +#endif +#endif /* !WOLFSSL_SP_NO_256 */ +#endif /* WOLFSSL_HAVE_SP_ECC */ +#endif /* WOLFSSL_DSP */ +#endif /* WOLFSSL_HAVE_SP_ECC */ + diff --git a/client/wolfssl/wolfcrypt/src/sp_int.c b/client/wolfssl/wolfcrypt/src/sp_int.c new file mode 100644 index 0000000..0db891b --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/sp_int.c @@ -0,0 +1,2203 @@ +/* sp_int.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* Implementation by Sean Parkinson. */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include +#include +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +/* SP Build Options: + * WOLFSSL_HAVE_SP_RSA: Enable SP RSA support + * WOLFSSL_HAVE_SP_DH: Enable SP DH support + * WOLFSSL_HAVE_SP_ECC: Enable SP ECC support + * WOLFSSL_SP_MATH: Use only single precision math and algorithms it supports (no fastmath tfm.c or normal integer.c) + * WOLFSSL_SP_SMALL: Use smaller version of code and avoid large stack variables + * WOLFSSL_SP_NO_MALLOC: Always use stack, no heap XMALLOC/XFREE allowed + * WOLFSSL_SP_NO_3072: Disable RSA/DH 3072-bit support + * WOLFSSL_SP_NO_2048: Disable RSA/DH 2048-bit support + * WOLFSSL_SP_4096: Enable RSA/RH 4096-bit support + * WOLFSSL_SP_384 Enable ECC 384-bit SECP384R1 support + * WOLFSSL_SP_NO_256 Disable ECC 256-bit SECP256R1 support + * WOLFSSL_SP_CACHE_RESISTANT Enable cache resistantant code + * WOLFSSL_SP_ASM Enable assembly speedups (detect platform) + * WOLFSSL_SP_X86_64_ASM Enable Intel x86 assembly speedups like AVX/AVX2 + * WOLFSSL_SP_ARM32_ASM Enable Aarch32 assembly speedups + * WOLFSSL_SP_ARM64_ASM Enable Aarch64 assembly speedups + * WOLFSSL_SP_ARM_CORTEX_M_ASM Enable Cortex-M assembly speedups + * WOLFSSL_SP_ARM_THUMB_ASM Enable ARM Thumb assembly speedups (used with -mthumb) + */ + +#ifdef WOLFSSL_SP_MATH + +#include + +#if defined(WOLFSSL_HAVE_SP_DH) || defined(WOLFSSL_HAVE_SP_RSA) + +WOLFSSL_LOCAL int sp_ModExp_1024(sp_int* base, sp_int* exp, sp_int* mod, + sp_int* res); +WOLFSSL_LOCAL int sp_ModExp_1536(sp_int* base, sp_int* exp, sp_int* mod, + sp_int* res); +WOLFSSL_LOCAL int sp_ModExp_2048(sp_int* base, sp_int* exp, sp_int* mod, + sp_int* res); +WOLFSSL_LOCAL int sp_ModExp_3072(sp_int* base, sp_int* exp, sp_int* mod, + sp_int* res); +WOLFSSL_LOCAL int sp_ModExp_4096(sp_int* base, sp_int* exp, sp_int* mod, + sp_int* res); + +#endif + +int sp_get_digit_count(sp_int *a) +{ + int ret; + if (!a) + ret = 0; + else + ret = a->used; + return ret; +} + +/* Initialize the big number to be zero. + * + * a SP integer. + * returns MP_OKAY always. + */ +int sp_init(sp_int* a) +{ + a->used = 0; + a->size = SP_INT_DIGITS; + + return MP_OKAY; +} + +#if !defined(WOLFSSL_RSA_PUBLIC_ONLY) || (!defined(NO_DH) || defined(HAVE_ECC)) +/* Initialize up to six big numbers to be zero. + * + * a SP integer. + * b SP integer. + * c SP integer. + * d SP integer. + * e SP integer. + * f SP integer. + * returns MP_OKAY always. + */ +int sp_init_multi(sp_int* a, sp_int* b, sp_int* c, sp_int* d, sp_int* e, + sp_int* f) +{ + if (a != NULL) { + a->used = 0; + a->size = SP_INT_DIGITS; + } + if (b != NULL) { + b->used = 0; + b->size = SP_INT_DIGITS; + } + if (c != NULL) { + c->used = 0; + c->size = SP_INT_DIGITS; + } + if (d != NULL) { + d->used = 0; + d->size = SP_INT_DIGITS; + } + if (e != NULL) { + e->used = 0; + e->size = SP_INT_DIGITS; + } + if (f != NULL) { + f->used = 0; + f->size = SP_INT_DIGITS; + } + + return MP_OKAY; +} +#endif + +/* Clear the data from the big number and set to zero. + * + * a SP integer. + */ +void sp_clear(sp_int* a) +{ + if (a != NULL) { + int i; + + for (i=0; iused; i++) + a->dp[i] = 0; + a->used = 0; + } +} + +/* Calculate the number of 8-bit values required to represent the big number. + * + * a SP integer. + * returns the count. + */ +int sp_unsigned_bin_size(sp_int* a) +{ + int size = sp_count_bits(a); + return (size + 7) / 8; +} + +/* Convert a number as an array of bytes in big-endian format to a big number. + * + * a SP integer. + * in Array of bytes. + * inSz Number of data bytes in array. + * returns BAD_FUNC_ARG when the number is too big to fit in an SP and + MP_OKAY otherwise. + */ +int sp_read_unsigned_bin(sp_int* a, const byte* in, int inSz) +{ + int err = MP_OKAY; + int i, j = 0, k; + + if (inSz > SP_INT_DIGITS * (int)sizeof(a->dp[0])) { + err = MP_VAL; + } + + if (err == MP_OKAY) { + for (i = inSz-1; i >= (SP_WORD_SIZE/8); i -= (SP_WORD_SIZE/8), j++) { + a->dp[j] = (((sp_int_digit)in[i-0]) << (0*8)) + | (((sp_int_digit)in[i-1]) << (1*8)) + | (((sp_int_digit)in[i-2]) << (2*8)) + | (((sp_int_digit)in[i-3]) << (3*8)); + #if SP_WORD_SIZE == 64 + a->dp[j] |= (((sp_int_digit)in[i-4]) << (4*8)) + | (((sp_int_digit)in[i-5]) << (5*8)) + | (((sp_int_digit)in[i-6]) << (6*8)) + | (((sp_int_digit)in[i-7]) << (7*8)); + #endif + } + if (i >= 0) { + a->dp[j] = 0; + for (k = 0; k <= i; k++) { + a->dp[j] <<= 8; + a->dp[j] |= in[k]; + } + } + a->used = j + 1; + } + + sp_clamp(a); + + return err; +} + +#ifdef HAVE_ECC +/* Convert a number as string in big-endian format to a big number. + * Only supports base-16 (hexadecimal). + * Negative values not supported. + * + * a SP integer. + * in NUL terminated string. + * radix Number of values in a digit. + * returns BAD_FUNC_ARG when radix not supported or value is negative, MP_VAL + * when a character is not valid and MP_OKAY otherwise. + */ +int sp_read_radix(sp_int* a, const char* in, int radix) +{ + int err = MP_OKAY; + int i, j = 0, k = 0; + char ch; + + if ((radix != 16) || (*in == '-')) { + err = BAD_FUNC_ARG; + } + + while (*in == '0') { + in++; + } + + if (err == MP_OKAY) { + a->dp[0] = 0; + for (i = (int)(XSTRLEN(in) - 1); i >= 0; i--) { + ch = in[i]; + if (ch >= '0' && ch <= '9') + ch -= '0'; + else if (ch >= 'A' && ch <= 'F') + ch -= 'A' - 10; + else if (ch >= 'a' && ch <= 'f') + ch -= 'a' - 10; + else { + err = MP_VAL; + break; + } + + a->dp[k] |= ((sp_int_digit)ch) << j; + j += 4; + if (k >= SP_INT_DIGITS - 1) { + err = MP_VAL; + break; + } + if (j == DIGIT_BIT) + a->dp[++k] = 0; + j &= SP_WORD_SIZE - 1; + } + } + + if (err == MP_OKAY) { + a->used = k + 1; + if (a->dp[k] == 0) + a->used--; + + for (k++; k < a->size; k++) + a->dp[k] = 0; + + sp_clamp(a); + } + + return err; +} +#endif + +/* Compare two big numbers. + * + * a SP integer. + * b SP integer. + * returns MP_GT if a is greater than b, MP_LT if a is less than b and MP_EQ + * when a equals b. + */ +int sp_cmp(sp_int* a, sp_int* b) +{ + int ret = MP_EQ; + int i; + + if (a->used > b->used) + ret = MP_GT; + else if (a->used < b->used) + ret = MP_LT; + else { + for (i = a->used - 1; i >= 0; i--) { + if (a->dp[i] > b->dp[i]) { + ret = MP_GT; + break; + } + else if (a->dp[i] < b->dp[i]) { + ret = MP_LT; + break; + } + } + } + return ret; +} + +/* Count the number of bits in the big number. + * + * a SP integer. + * returns the number of bits. + */ +int sp_count_bits(sp_int* a) +{ + int r = 0; + sp_int_digit d; + + r = a->used - 1; + while (r >= 0 && a->dp[r] == 0) + r--; + if (r < 0) + r = 0; + else { + d = a->dp[r]; + r *= SP_WORD_SIZE; + if (d >= (1L << (SP_WORD_SIZE / 2))) { + r += SP_WORD_SIZE; + while ((d & (1UL << (SP_WORD_SIZE - 1))) == 0) { + r--; + d <<= 1; + } + } + else { + while (d != 0) { + r++; + d >>= 1; + } + } + } + + return r; +} + +/* Determine if the most significant byte of the encoded big number as the top + * bit set. + * + * a SP integer. + * returns 1 when the top bit is set and 0 otherwise. + */ +int sp_leading_bit(sp_int* a) +{ + int bit = 0; + sp_int_digit d; + + if (a->used > 0) { + d = a->dp[a->used - 1]; + while (d > (sp_int_digit)0xff) + d >>= 8; + bit = (int)(d >> 7); + } + + return bit; +} + +#if !defined(NO_DH) || defined(HAVE_ECC) || defined(WC_RSA_BLINDING) || \ + !defined(WOLFSSL_RSA_VERIFY_ONLY) +/* Convert the big number to an array of bytes in big-endian format. + * The array must be large enough for encoded number - use mp_unsigned_bin_size + * to calculate the number of bytes required. + * + * a SP integer. + * out Array to put encoding into. + * returns MP_OKAY always. + */ +int sp_to_unsigned_bin(sp_int* a, byte* out) +{ + int i, j, b; + sp_int_digit d; + + j = sp_unsigned_bin_size(a) - 1; + for (i=0; j>=0; i++) { + d = a->dp[i]; + for (b = 0; b < SP_WORD_SIZE / 8; b++) { + out[j] = d; + if (--j < 0) { + break; + } + d >>= 8; + } + } + + return MP_OKAY; +} +#endif + +/* Convert the big number to an array of bytes in big-endian format. + * The array must be large enough for encoded number - use mp_unsigned_bin_size + * to calculate the number of bytes required. + * Front-pads the output array with zeros make number the size of the array. + * + * a SP integer. + * out Array to put encoding into. + * outSz Size of the array. + * returns MP_OKAY always. + */ +int sp_to_unsigned_bin_len(sp_int* a, byte* out, int outSz) +{ + int i, j, b; + + j = outSz - 1; + for (i=0; j>=0; i++) { + for (b = 0; b < SP_WORD_SIZE; b += 8) { + out[j--] = a->dp[i] >> b; + if (j < 0) + break; + } + } + + return MP_OKAY; +} + +#if !defined(WOLFSSL_RSA_PUBLIC_ONLY) || (!defined(NO_DH) || defined(HAVE_ECC)) +/* Ensure the data in the big number is zeroed. + * + * a SP integer. + */ +void sp_forcezero(sp_int* a) +{ + ForceZero(a->dp, a->used * sizeof(sp_int_digit)); + a->used = 0; +} +#endif + +#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || (!defined(NO_DH) || defined(HAVE_ECC)) +/* Copy value of big number a into r. + * + * a SP integer. + * r SP integer. + * returns MP_OKAY always. + */ +int sp_copy(sp_int* a, sp_int* r) +{ + if (a != r) { + XMEMCPY(r->dp, a->dp, a->used * sizeof(sp_int_digit)); + r->used = a->used; + } + return MP_OKAY; +} + +/* creates "a" then copies b into it */ +int sp_init_copy (sp_int * a, sp_int * b) +{ + int err; + if ((err = sp_init(a)) == MP_OKAY) { + if((err = sp_copy (b, a)) != MP_OKAY) { + sp_clear(a); + } + } + return err; +} +#endif + +/* Set the big number to be the value of the digit. + * + * a SP integer. + * d Digit to be set. + * returns MP_OKAY always. + */ +int sp_set(sp_int* a, sp_int_digit d) +{ + if (d == 0) { + a->dp[0] = d; + a->used = 0; + } + else { + a->dp[0] = d; + a->used = 1; + } + return MP_OKAY; +} + +/* Recalculate the number of digits used. + * + * a SP integer. + */ +void sp_clamp(sp_int* a) +{ + int i; + + for (i = a->used - 1; i >= 0 && a->dp[i] == 0; i--) { + } + a->used = i + 1; +} + +#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || (!defined(NO_DH) || defined(HAVE_ECC)) +/* Grow big number to be able to hold l digits. + * This function does nothing as the number of digits is fixed. + * + * a SP integer. + * l Number of digits. + * returns MP_MEM if the number of digits requested is more than available and + * MP_OKAY otherwise. + */ +int sp_grow(sp_int* a, int l) +{ + int err = MP_OKAY; + + if (l > a->size) + err = MP_MEM; + + return err; +} + +/* Sub a one digit number from the big number. + * + * a SP integer. + * d Digit to subtract. + * r SP integer - result. + * returns MP_OKAY always. + */ +int sp_sub_d(sp_int* a, sp_int_digit d, sp_int* r) +{ + int i = 0; + sp_int_digit t; + + r->used = a->used; + t = a->dp[0] - d; + if (t > a->dp[0]) { + for (++i; i < a->used; i++) { + r->dp[i] = a->dp[i] - 1; + if (r->dp[i] != (sp_int_digit)-1) + break; + } + } + r->dp[0] = t; + if (r != a) { + for (++i; i < a->used; i++) + r->dp[i] = a->dp[i]; + } + sp_clamp(r); + + return MP_OKAY; +} +#endif + +/* Compare a one digit number with a big number. + * + * a SP integer. + * d Digit to compare with. + * returns MP_GT if a is greater than d, MP_LT if a is less than d and MP_EQ + * when a equals d. + */ +int sp_cmp_d(sp_int *a, sp_int_digit d) +{ + int ret = MP_EQ; + + /* special case for zero*/ + if (a->used == 0) { + if (d == 0) + ret = MP_EQ; + else + ret = MP_LT; + } + else if (a->used > 1) + ret = MP_GT; + else { + /* compare the only digit of a to d */ + if (a->dp[0] > d) + ret = MP_GT; + else if (a->dp[0] < d) + ret = MP_LT; + } + + return ret; +} + +#if !defined(NO_DH) || defined(HAVE_ECC) || !defined(WOLFSSL_RSA_VERIFY_ONLY) +/* Left shift the number by number of bits. + * Bits may be larger than the word size. + * + * a SP integer. + * n Number of bits to shift. + * returns MP_OKAY always. + */ +static int sp_lshb(sp_int* a, int n) +{ + int i; + + if (n >= SP_WORD_SIZE) { + sp_lshd(a, n / SP_WORD_SIZE); + n %= SP_WORD_SIZE; + } + + if (n != 0) { + a->dp[a->used] = 0; + for (i = a->used - 1; i >= 0; i--) { + a->dp[i+1] |= a->dp[i] >> (SP_WORD_SIZE - n); + a->dp[i] = a->dp[i] << n; + } + if (a->dp[a->used] != 0) + a->used++; + } + + return MP_OKAY; +} + +/* Subtract two large numbers into result: r = a - b + * a must be greater than b. + * + * a SP integer. + * b SP integer. + * r SP integer. + * returns MP_OKAY always. + */ +int sp_sub(sp_int* a, sp_int* b, sp_int* r) +{ + int i; + sp_int_digit c = 0; + sp_int_digit t; + + for (i = 0; i < a->used && i < b->used; i++) { + t = a->dp[i] - b->dp[i] - c; + if (c == 0) + c = t > a->dp[i]; + else + c = t >= a->dp[i]; + r->dp[i] = t; + } + for (; i < a->used; i++) { + r->dp[i] = a->dp[i] - c; + c &= (r->dp[i] == (sp_int_digit)-1); + } + r->used = i; + sp_clamp(r); + + return MP_OKAY; +} + +/* Shift a right by n bits into r: r = a >> n + * + * a SP integer operand. + * n Number of bits to shift. + * r SP integer result. + */ +void sp_rshb(sp_int* a, int n, sp_int* r) +{ + int i; + int j; + + for (i = n / SP_WORD_SIZE, j = 0; i < a->used-1; i++, j++) + r->dp[i] = (a->dp[j] >> n) | (a->dp[j+1] << (SP_WORD_SIZE - n)); + r->dp[i] = a->dp[j] >> n; + r->used = j + 1; + sp_clamp(r); +} + +/* Multiply a by digit n and put result into r shifting up o digits. + * r = (a * n) << (o * SP_WORD_SIZE) + * + * a SP integer to be multiplied. + * n Number to multiply by. + * r SP integer result. + * o Number of digits to move result up by. + */ +static void _sp_mul_d(sp_int* a, sp_int_digit n, sp_int* r, int o) +{ + int i; + sp_int_word t = 0; + + for (i = 0; i < o; i++) + r->dp[i] = 0; + + for (i = 0; i < a->used; i++) { + t += (sp_int_word)n * a->dp[i]; + r->dp[i + o] = (sp_int_digit)t; + t >>= SP_WORD_SIZE; + } + + r->dp[i+o] = (sp_int_digit)t; + r->used = i+o+1; + sp_clamp(r); +} + +/* Divide a by d and return the quotient in r and the remainder in rem. + * r = a / d; rem = a % d + * + * a SP integer to be divided. + * d SP integer to divide by. + * r SP integer of quotient. + * rem SP integer of remainder. + * returns MP_VAL when d is 0, MP_MEM when dynamic memory allocation fails and + * MP_OKAY otherwise. + */ +static int sp_div(sp_int* a, sp_int* d, sp_int* r, sp_int* rem) +{ + int err = MP_OKAY; + int ret; + int done = 0; + int i; + int s; +#ifndef WOLFSSL_SP_DIV_32 + sp_int_word w = 0; +#endif + sp_int_digit dt; + sp_int_digit t; +#ifdef WOLFSSL_SMALL_STACK + sp_int* sa = NULL; + sp_int* sd; + sp_int* tr; + sp_int* trial; +#else + sp_int sa[1]; + sp_int sd[1]; + sp_int tr[1]; + sp_int trial[1]; +#endif + + if (sp_iszero(d)) + err = MP_VAL; + + ret = sp_cmp(a, d); + if (ret == MP_LT) { + if (rem != NULL) { + sp_copy(a, rem); + } + if (r != NULL) { + sp_set(r, 0); + } + done = 1; + } + else if (ret == MP_EQ) { + if (rem != NULL) { + sp_set(rem, 0); + } + if (r != NULL) { + sp_set(r, 1); + } + done = 1; + } + else if (sp_count_bits(a) == sp_count_bits(d)) { + /* a is greater than d but same bit length */ + if (rem != NULL) { + sp_sub(a, d, rem); + } + if (r != NULL) { + sp_set(r, 1); + } + done = 1; + } + +#ifdef WOLFSSL_SMALL_STACK + if (!done && err == MP_OKAY) { + sa = (sp_int*)XMALLOC(sizeof(sp_int) * 4, NULL, DYNAMIC_TYPE_BIGINT); + if (sa == NULL) { + err = MP_MEM; + } + } +#endif + + if (!done && err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + sd = &sa[1]; + tr = &sa[2]; + trial = &sa[3]; +#endif + + sp_init(sa); + sp_init(sd); + sp_init(tr); + sp_init(trial); + + s = sp_count_bits(d); + s = SP_WORD_SIZE - (s % SP_WORD_SIZE); + sp_copy(a, sa); + if (s != SP_WORD_SIZE) { + sp_lshb(sa, s); + sp_copy(d, sd); + sp_lshb(sd, s); + d = sd; + } + + tr->used = sa->used - d->used + 1; + sp_clear(tr); + tr->used = sa->used - d->used + 1; + dt = d->dp[d->used-1]; +#ifndef WOLFSSL_SP_DIV_32 + for (i = sa->used - 1; i >= d->used; ) { + if (sa->dp[i] > dt) { + t = (sp_int_digit)-1; + } + else { + w = ((sp_int_word)sa->dp[i] << SP_WORD_SIZE) | sa->dp[i-1]; + w /= dt; + if (w > (sp_int_digit)-1) { + t = (sp_int_digit)-1; + } + else { + t = (sp_int_digit)w; + } + } + + if (t > 0) { + _sp_mul_d(d, t, trial, i - d->used); + while (sp_cmp(trial, sa) == MP_GT) { + t--; + _sp_mul_d(d, t, trial, i - d->used); + } + sp_sub(sa, trial, sa); + tr->dp[i - d->used] += t; + if (tr->dp[i - d->used] < t) + tr->dp[i + 1 - d->used]++; + } + i = sa->used - 1; + } +#else + { + sp_int_digit div = (dt >> (SP_WORD_SIZE / 2)) + 1; + for (i = sa->used - 1; i >= d->used; ) { + t = sa->dp[i] / div; + if ((t > 0) && (t << (SP_WORD_SIZE / 2) == 0)) + t = (sp_int_digit)-1; + t <<= SP_WORD_SIZE / 2; + if (t == 0) { + t = sa->dp[i] << (SP_WORD_SIZE / 2); + t += sa->dp[i-1] >> (SP_WORD_SIZE / 2); + t /= div; + } + + if (t > 0) { + _sp_mul_d(d, t, trial, i - d->used); + while (sp_cmp(trial, sa) == MP_GT) { + t--; + _sp_mul_d(d, t, trial, i - d->used); + } + sp_sub(sa, trial, sa); + tr->dp[i - d->used] += t; + if (tr->dp[i - d->used] < t) + tr->dp[i + 1 - d->used]++; + } + i = sa->used - 1; + } + + while (sp_cmp(sa, d) != MP_LT) { + sp_sub(sa, d, sa); + sp_add_d(tr, 1, tr); + } + } +#endif + + sp_clamp(tr); + + if (rem != NULL) { + if (s != SP_WORD_SIZE) + sp_rshb(sa, s, sa); + sp_copy(sa, rem); + } + if (r != NULL) + sp_copy(tr, r); + } + +#ifdef WOLFSSL_SMALL_STACK + if (sa != NULL) + XFREE(sa, NULL, DYNAMIC_TYPE_BIGINT); +#endif + + return err; +} + + +#ifndef FREESCALE_LTC_TFM +/* Calculate the remainder of dividing a by m: r = a mod m. + * + * a SP integer. + * m SP integer. + * r SP integer. + * returns MP_VAL when m is 0 and MP_OKAY otherwise. + */ +int sp_mod(sp_int* a, sp_int* m, sp_int* r) +{ + return sp_div(a, m, NULL, r); +} +#endif +#endif + +/* Clear all data in the big number and sets value to zero. + * + * a SP integer. + */ +void sp_zero(sp_int* a) +{ + XMEMSET(a->dp, 0, a->size * sizeof(*a->dp)); + a->used = 0; +} + +/* Add a one digit number to the big number. + * + * a SP integer. + * d Digit to add. + * r SP integer - result. + * returns MP_OKAY always. + */ +int sp_add_d(sp_int* a, sp_int_digit d, sp_int* r) +{ + int i = 0; + + r->used = a->used; + if (a->used == 0) { + r->used = 1; + } + r->dp[0] = a->dp[0] + d; + if (r->dp[i] < a->dp[i]) { + for (; i < a->used; i++) { + r->dp[i] = a->dp[i] + 1; + if (r->dp[i] != 0) + break; + } + + if (i == a->used) { + r->used++; + r->dp[i] = 1; + } + } + for (; i < a->used; i++) + r->dp[i] = a->dp[i]; + + return MP_OKAY; +} + +#if !defined(NO_DH) || defined(HAVE_ECC) || defined(WC_RSA_BLINDING) || \ + !defined(WOLFSSL_RSA_VERIFY_ONLY) +/* Left shift the big number by a number of digits. + * WIll chop off digits overflowing maximum size. + * + * a SP integer. + * s Number of digits to shift. + * returns MP_OKAY always. + */ +int sp_lshd(sp_int* a, int s) +{ + if (a->used + s > a->size) + a->used = a->size - s; + + XMEMMOVE(a->dp + s, a->dp, a->used * sizeof(sp_int_digit)); + a->used += s; + XMEMSET(a->dp, 0, s * sizeof(sp_int_digit)); + sp_clamp(a); + + return MP_OKAY; +} +#endif + +#if !defined(NO_PWDBASED) || defined(WOLFSSL_KEY_GEN) || !defined(NO_DH) +/* Add two large numbers into result: r = a + b + * + * a SP integer. + * b SP integer. + * r SP integer. + * returns MP_OKAY always. + */ +int sp_add(sp_int* a, sp_int* b, sp_int* r) +{ + int i; + sp_int_digit c = 0; + sp_int_digit t; + + for (i = 0; i < a->used && i < b->used; i++) { + t = a->dp[i] + b->dp[i] + c; + if (c == 0) + c = t < a->dp[i]; + else + c = t <= a->dp[i]; + r->dp[i] = t; + } + for (; i < a->used; i++) { + r->dp[i] = a->dp[i] + c; + c = (a->dp[i] != 0) && (r->dp[i] == 0); + } + for (; i < b->used; i++) { + r->dp[i] = b->dp[i] + c; + c = (b->dp[i] != 0) && (r->dp[i] == 0); + } + r->dp[i] = c; + r->used = (int)(i + c); + + return MP_OKAY; +} +#endif /* !NO_PWDBASED || WOLFSSL_KEY_GEN || !NO_DH */ + +#ifndef NO_RSA +/* Set a number into the big number. + * + * a SP integer. + * b Value to set. + * returns MP_OKAY always. + */ +int sp_set_int(sp_int* a, unsigned long b) +{ + if (b == 0) { + a->used = 0; + a->dp[0] = 0; + } + else { + a->used = 1; + a->dp[0] = (sp_int_digit)b; + } + + return MP_OKAY; +} +#endif /* !NO_RSA */ + +#ifdef WC_MP_TO_RADIX +/* Hex string characters. */ +static const char sp_hex_char[16] = { + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' +}; + +/* Put the hex string version, big-endian, of a in str. + * + * a SP integer. + * str Hex string is stored here. + * returns MP_OKAY always. + */ +int sp_tohex(sp_int* a, char* str) +{ + int i, j; + + /* quick out if its zero */ + if (sp_iszero(a) == MP_YES) { + *str++ = '0'; + *str = '\0'; + } + else { + i = a->used - 1; + for (j = SP_WORD_SIZE - 4; j >= 0; j -= 4) { + if (((a->dp[i] >> j) & 0xf) != 0) + break; + } + for (; j >= 0; j -= 4) + *(str++) = sp_hex_char[(a->dp[i] >> j) & 0xf]; + for (--i; i >= 0; i--) { + for (j = SP_WORD_SIZE - 4; j >= 0; j -= 4) + *(str++) = sp_hex_char[(a->dp[i] >> j) & 0xf]; + } + *str = '\0'; + } + + return MP_OKAY; +} +#endif /* WC_MP_TO_RADIX */ + +#if defined(WOLFSSL_KEY_GEN) || !defined(NO_DH) && !defined(WC_NO_RNG) +/* Set a bit of a: a |= 1 << i + * The field 'used' is updated in a. + * + * a SP integer to modify. + * i Index of bit to set. + * returns MP_OKAY always. + */ +int sp_set_bit(sp_int* a, int i) +{ + int ret = MP_OKAY; + + if ((a == NULL) || (i / SP_WORD_SIZE >= SP_INT_DIGITS)) { + ret = BAD_FUNC_ARG; + } + else { + a->dp[i/SP_WORD_SIZE] |= (sp_int_digit)1 << (i % SP_WORD_SIZE); + if (a->used <= i / SP_WORD_SIZE) + a->used = (i / SP_WORD_SIZE) + 1; + } + return ret; +} + +/* Exponentiate 2 to the power of e: a = 2^e + * This is done by setting the 'e'th bit. + * + * a SP integer. + * e Exponent. + * returns MP_OKAY always. + */ +int sp_2expt(sp_int* a, int e) +{ + sp_zero(a); + return sp_set_bit(a, e); +} + +/* Generate a random prime for RSA only. + * + * r SP integer + * len Number of bytes to prime. + * rng Random number generator. + * heap Unused + * returns MP_OKAY on success and MP_VAL when length is not supported or random + * number genrator fails. + */ +int sp_rand_prime(sp_int* r, int len, WC_RNG* rng, void* heap) +{ + static const int USE_BBS = 1; + int err = 0, type; + int isPrime = MP_NO; + + (void)heap; + + /* get type */ + if (len < 0) { + type = USE_BBS; + len = -len; + } + else { + type = 0; + } + +#if defined(WOLFSSL_HAVE_SP_DH) && defined(WOLFSSL_KEY_GEN) + if (len == 32) { + } + else +#endif + /* Generate RSA primes that are half the modulus length. */ +#ifndef WOLFSSL_SP_NO_3072 + if (len != 128 && len != 192) +#else + if (len != 128) +#endif + { + err = MP_VAL; + } + + r->used = len / (SP_WORD_SIZE / 8); + + /* Assume the candidate is probably prime and then test until + * it is proven composite. */ + while (err == 0 && isPrime == MP_NO) { +#ifdef SHOW_GEN + printf("."); + fflush(stdout); +#endif + /* generate value */ + err = wc_RNG_GenerateBlock(rng, (byte*)r->dp, len); + if (err != 0) { + err = MP_VAL; + break; + } + + /* munge bits */ + ((byte*)r->dp)[len-1] |= 0x80 | 0x40; + r->dp[0] |= 0x01 | ((type & USE_BBS) ? 0x02 : 0x00); + + /* test */ + /* Running Miller-Rabin up to 3 times gives us a 2^{-80} chance + * of a 1024-bit candidate being a false positive, when it is our + * prime candidate. (Note 4.49 of Handbook of Applied Cryptography.) + * Using 8 because we've always used 8 */ + sp_prime_is_prime_ex(r, 8, &isPrime, rng); + } + + return err; +} + +/* Multiply a by b and store in r: r = a * b + * + * a SP integer to multiply. + * b SP integer to multiply. + * r SP integer result. + * returns MP_OKAY always. + */ +int sp_mul(sp_int* a, sp_int* b, sp_int* r) +{ + int err = MP_OKAY; + int i; +#ifdef WOLFSSL_SMALL_STACK + sp_int* t = NULL; + sp_int* tr; +#else + sp_int t[1]; + sp_int tr[1]; +#endif + + if (a->used + b->used > SP_INT_DIGITS) + err = MP_VAL; + +#ifdef WOLFSSL_SMALL_STACK + if (err == MP_OKAY) { + t = (sp_int*)XMALLOC(sizeof(sp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT); + if (t == NULL) + err = MP_MEM; + else + tr = &t[1]; + } +#endif + + if (err == MP_OKAY) { + sp_init(t); + sp_init(tr); + + for (i = 0; i < b->used; i++) { + _sp_mul_d(a, b->dp[i], t, i); + sp_add(tr, t, tr); + } + sp_copy(tr, r); + } + +#ifdef WOLFSSL_SMALL_STACK + if (t != NULL) + XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); +#endif + + return err; +} + +/* Square a mod m and store in r: r = (a * a) mod m + * + * a SP integer to square. + * m SP integer modulus. + * r SP integer result. + * returns MP_VAL when m is 0, MP_MEM when dynamic memory allocation fails, + * BAD_FUNC_ARG when a is to big and MP_OKAY otherwise. + */ +static int sp_sqrmod(sp_int* a, sp_int* m, sp_int* r) +{ + int err = MP_OKAY; + + if (a->used * 2 > SP_INT_DIGITS) + err = MP_VAL; + + if (err == MP_OKAY) + err = sp_mul(a, a, r); + if (err == MP_OKAY) + err = sp_mod(r, m, r); + + return err; +} + +#if defined(WOLFSSL_HAVE_SP_DH) || defined(WOLFSSL_KEY_GEN) +/* Multiply a by b mod m and store in r: r = (a * b) mod m + * + * a SP integer to multiply. + * b SP integer to multiply. + * m SP integer modulus. + * r SP integer result. + * returns MP_VAL when m is 0, MP_MEM when dynamic memory allocation fails and + * MP_OKAY otherwise. + */ +int sp_mulmod(sp_int* a, sp_int* b, sp_int* m, sp_int* r) +{ + int err = MP_OKAY; +#ifdef WOLFSSL_SMALL_STACK + sp_int* t = NULL; +#else + sp_int t[1]; +#endif + + if (a->used + b->used > SP_INT_DIGITS) + err = MP_VAL; + +#ifdef WOLFSSL_SMALL_STACK + if (err == MP_OKAY) { + t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT); + if (t == NULL) { + err = MP_MEM; + } + } +#endif + if (err == MP_OKAY) { + err = sp_mul(a, b, t); + } + if (err == MP_OKAY) { + err = sp_mod(t, m, r); + } + +#ifdef WOLFSSL_SMALL_STACK + if (t != NULL) + XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); +#endif + return err; +} +#endif + +/* Calculate a modulo the digit d into r: r = a mod d + * + * a SP integer to square. + * d SP integer digit, modulus. + * r SP integer digit, result. + * returns MP_VAL when d is 0 and MP_OKAY otherwise. + */ +static int sp_mod_d(sp_int* a, const sp_int_digit d, sp_int_digit* r) +{ + int err = MP_OKAY; + int i; + sp_int_word w = 0; + sp_int_digit t; + + if (d == 0) + err = MP_VAL; + + if (err == MP_OKAY) { + for (i = a->used - 1; i >= 0; i--) { + w = (w << SP_WORD_SIZE) | a->dp[i]; + t = (sp_int_digit)(w / d); + w -= (sp_int_word)t * d; + } + + *r = (sp_int_digit)w; + } + + return err; +} + +/* Calculates the Greatest Common Denominator (GCD) of a and b into r. + * + * a SP integer operand. + * b SP integer operand. + * r SP integer result. + * returns MP_MEM when dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_gcd(sp_int* a, sp_int* b, sp_int* r) +{ + int err = MP_OKAY; +#ifdef WOLFSSL_SMALL_STACK + sp_int* u = NULL; + sp_int* v; + sp_int* t; +#else + sp_int u[1], v[1], t[1]; +#endif + + if (sp_iszero(a)) + sp_copy(b, r); + else if (sp_iszero(b)) + sp_copy(a, r); + else { +#ifdef WOLFSSL_SMALL_STACK + u = (sp_int*)XMALLOC(sizeof(sp_int) * 3, NULL, DYNAMIC_TYPE_BIGINT); + if (u == NULL) + err = MP_MEM; + else { + v = &u[1]; + t = &u[2]; + } +#endif + + if (err == MP_OKAY) { + sp_init(u); + sp_init(v); + sp_init(t); + + if (sp_cmp(a, b) != MP_LT) { + sp_copy(b, u); + /* First iteration - u = a, v = b */ + if (b->used == 1) { + err = sp_mod_d(a, b->dp[0], &v->dp[0]); + if (err == MP_OKAY) + v->used = (v->dp[0] != 0); + } + else + err = sp_mod(a, b, v); + } + else { + sp_copy(a, u); + /* First iteration - u = b, v = a */ + if (a->used == 1) { + err = sp_mod_d(b, a->dp[0], &v->dp[0]); + if (err == MP_OKAY) + v->used = (v->dp[0] != 0); + } + else + err = sp_mod(b, a, v); + } + } + + if (err == MP_OKAY) { + while (!sp_iszero(v)) { + if (v->used == 1) { + sp_mod_d(u, v->dp[0], &t->dp[0]); + t->used = (t->dp[0] != 0); + } + else + sp_mod(u, v, t); + sp_copy(v, u); + sp_copy(t, v); + } + sp_copy(u, r); + } + } + +#ifdef WOLFSSL_SMALL_STACK + if (u != NULL) + XFREE(u, NULL, DYNAMIC_TYPE_BIGINT); +#endif + + return err; +} + +/* Divides a by 2 and stores in r: r = a >> 1 + * + * a SP integer to divide. + * r SP integer result. + * returns MP_OKAY always. + */ +static int sp_div_2(sp_int* a, sp_int* r) +{ + int i; + + for (i = 0; i < a->used-1; i++) + r->dp[i] = (a->dp[i] >> 1) | (a->dp[i+1] << (SP_WORD_SIZE - 1)); + r->dp[i] = a->dp[i] >> 1; + r->used = i + 1; + sp_clamp(r); + + return MP_OKAY; +} + + +/* Calculates the multiplicative inverse in the field. + * + * a SP integer to invert. + * m SP integer that is the modulus of the field. + * r SP integer result. + * returns MP_VAL when a or m is 0, MP_MEM when dynamic memory allocation fails + * and MP_OKAY otherwise. + */ +int sp_invmod(sp_int* a, sp_int* m, sp_int* r) +{ + int err = MP_OKAY; +#ifdef WOLFSSL_SMALL_STACK + sp_int* u = NULL; + sp_int* v; + sp_int* b; + sp_int* c; +#else + sp_int u[1], v[1], b[1], c[1]; +#endif + +#ifdef WOLFSSL_SMALL_STACK + u = (sp_int*)XMALLOC(sizeof(sp_int) * 4, NULL, DYNAMIC_TYPE_BIGINT); + if (u == NULL) { + err = MP_MEM; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + v = &u[1]; + b = &u[2]; + c = &u[3]; +#endif + sp_init(v); + + if (sp_cmp(a, m) != MP_LT) { + err = sp_mod(a, m, v); + a = v; + } + } + + /* 0 != n*m + 1 (+ve m), r*a mod 0 is always 0 (never 1) */ + if ((err == MP_OKAY) && (sp_iszero(a) || sp_iszero(m))) { + err = MP_VAL; + } + /* r*2*x != n*2*y + 1 */ + if ((err == MP_OKAY) && sp_iseven(a) && sp_iseven(m)) { + err = MP_VAL; + } + + /* 1*1 = 0*m + 1 */ + if ((err == MP_OKAY) && sp_isone(a)) { + sp_set(r, 1); + } + else if (err != MP_OKAY) { + } + else if (sp_iseven(m)) { + /* a^-1 mod m = m + (1 - m*(m^-1 % a)) / a + * = m - (m*(m^-1 % a) - 1) / a + */ + err = sp_invmod(m, a, r); + if (err == MP_OKAY) { + err = sp_mul(r, m, r); + } + if (err == MP_OKAY) { + sp_sub_d(r, 1, r); + sp_div(r, a, r, NULL); + sp_sub(m, r, r); + } + } + else { + if (err == MP_OKAY) { + sp_init(u); + sp_init(b); + sp_init(c); + + sp_copy(m, u); + sp_copy(a, v); + sp_zero(b); + sp_set(c, 1); + + while (!sp_isone(v) && !sp_iszero(u)) { + if (sp_iseven(u)) { + sp_div_2(u, u); + if (sp_isodd(b)) { + sp_add(b, m, b); + } + sp_div_2(b, b); + } + else if (sp_iseven(v)) { + sp_div_2(v, v); + if (sp_isodd(c)) { + sp_add(c, m, c); + } + sp_div_2(c, c); + } + else if (sp_cmp(u, v) != MP_LT) { + sp_sub(u, v, u); + if (sp_cmp(b, c) == MP_LT) { + sp_add(b, m, b); + } + sp_sub(b, c, b); + } + else { + sp_sub(v, u, v); + if (sp_cmp(c, b) == MP_LT) { + sp_add(c, m, c); + } + sp_sub(c, b, c); + } + } + if (sp_iszero(u)) { + err = MP_VAL; + } + else { + sp_copy(c, r); + } + } + } + +#ifdef WOLFSSL_SMALL_STACK + if (u != NULL) { + XFREE(u, NULL, DYNAMIC_TYPE_BIGINT); + } +#endif + + return err; +} + +/* Calculates the Lowest Common Multiple (LCM) of a and b and stores in r. + * + * a SP integer operand. + * b SP integer operand. + * r SP integer result. + * returns MP_MEM when dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_lcm(sp_int* a, sp_int* b, sp_int* r) +{ + int err = MP_OKAY; +#ifndef WOLFSSL_SMALL_STACK + sp_int t[2]; +#else + sp_int *t = NULL; +#endif + +#ifdef WOLFSSL_SMALL_STACK + t = (sp_int*)XMALLOC(sizeof(sp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT); + if (t == NULL) { + err = MP_MEM; + } +#endif + + if (err == MP_OKAY) { + sp_init(&t[0]); + sp_init(&t[1]); + err = sp_gcd(a, b, &t[0]); + if (err == MP_OKAY) { + if (sp_cmp(a, b) == MP_GT) { + err = sp_div(a, &t[0], &t[1], NULL); + if (err == MP_OKAY) + err = sp_mul(b, &t[1], r); + } + else { + err = sp_div(b, &t[0], &t[1], NULL); + if (err == MP_OKAY) + err = sp_mul(a, &t[1], r); + } + } + } + +#ifdef WOLFSSL_SMALL_STACK + if (t != NULL) + XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); +#endif + return err; +} + +/* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m + * + * b SP integer base. + * e SP integer exponent. + * m SP integer modulus. + * r SP integer result. + * returns MP_VAL when m is not 1024, 2048, 1536 or 3072 bits and otherwise + * MP_OKAY. + */ +int sp_exptmod(sp_int* b, sp_int* e, sp_int* m, sp_int* r) +{ + int err = MP_OKAY; + int done = 0; + int mBits = sp_count_bits(m); + int bBits = sp_count_bits(b); + int eBits = sp_count_bits(e); + + if (sp_iszero(m)) { + err = MP_VAL; + } + else if (sp_isone(m)) { + sp_set(r, 0); + done = 1; + } + else if (sp_iszero(e)) { + sp_set(r, 1); + done = 1; + } + else if (sp_iszero(b)) { + sp_set(r, 0); + done = 1; + } + else if (m->used * 2 > SP_INT_DIGITS) { + err = BAD_FUNC_ARG; + } + + if (!done && (err == MP_OKAY)) { +#ifndef WOLFSSL_SP_NO_2048 + if ((mBits == 1024) && sp_isodd(m) && (bBits <= 1024) && + (eBits <= 1024)) { + err = sp_ModExp_1024(b, e, m, r); + done = 1; + } + else if ((mBits == 2048) && sp_isodd(m) && (bBits <= 2048) && + (eBits <= 2048)) { + err = sp_ModExp_2048(b, e, m, r); + done = 1; + } + else +#endif +#ifndef WOLFSSL_SP_NO_3072 + if ((mBits == 1536) && sp_isodd(m) && (bBits <= 1536) && + (eBits <= 1536)) { + err = sp_ModExp_1536(b, e, m, r); + done = 1; + } + else if ((mBits == 3072) && sp_isodd(m) && (bBits <= 3072) && + (eBits <= 3072)) { + err = sp_ModExp_3072(b, e, m, r); + done = 1; + } + else +#endif +#ifdef WOLFSSL_SP_NO_4096 + if ((mBits == 4096) && sp_isodd(m) && (bBits <= 4096) && + (eBits <= 4096)) { + err = sp_ModExp_4096(b, e, m, r); + done = 1; + } + else +#endif + { + } + } +#if defined(WOLFSSL_HAVE_SP_DH) && defined(WOLFSSL_KEY_GEN) + if (!done && (err == MP_OKAY)) { + int i; + + #ifdef WOLFSSL_SMALL_STACK + sp_int* t = NULL; + #else + sp_int t[1]; + #endif + + #ifdef WOLFSSL_SMALL_STACK + if (!done && (err == MP_OKAY)) { + t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT); + if (t == NULL) { + err = MP_MEM; + } + } + #endif + if (!done && (err == MP_OKAY)) { + sp_init(t); + + if (sp_cmp(b, m) != MP_LT) { + err = sp_mod(b, m, t); + if (err == MP_OKAY && sp_iszero(t)) { + sp_set(r, 0); + done = 1; + } + } + else { + sp_copy(b, t); + } + + if (!done && (err == MP_OKAY)) { + for (i = eBits-2; err == MP_OKAY && i >= 0; i--) { + err = sp_sqrmod(t, m, t); + if (err == MP_OKAY && (e->dp[i / SP_WORD_SIZE] >> + (i % SP_WORD_SIZE)) & 1) { + err = sp_mulmod(t, b, m, t); + } + } + } + } + if (!done && (err == MP_OKAY)) { + sp_copy(t, r); + } + + #ifdef WOLFSSL_SMALL_STACK + if (t != NULL) { + XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); + } + #endif + } +#else + if (!done && (err == MP_OKAY)) { + err = MP_VAL; + } +#endif + + (void)mBits; + (void)bBits; + (void)eBits; + + return err; +} + + +/* Number of entries in array of number of least significant zero bits. */ +#define SP_LNZ_CNT 16 +/* Number of bits the array checks. */ +#define SP_LNZ_BITS 4 +/* Mask to apply to check with array. */ +#define SP_LNZ_MASK 0xf +/* Number of least significant zero bits in first SP_LNZ_CNT numbers. */ +static const int lnz[SP_LNZ_CNT] = { + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 +}; + +/* Count the number of least significant zero bits. + * + * a Number to check + * returns the count of least significant zero bits. + */ +static int sp_cnt_lsb(sp_int* a) +{ + int i, j; + int cnt = 0; + int bc = 0; + + if (!sp_iszero(a)) { + for (i = 0; i < a->used && a->dp[i] == 0; i++, cnt += SP_WORD_SIZE) { + } + + for (j = 0; j < SP_WORD_SIZE; j += SP_LNZ_BITS) { + bc = lnz[(a->dp[i] >> j) & SP_LNZ_MASK]; + if (bc != 4) { + bc += cnt + j; + break; + } + } + } + + return bc; +} + +/* Miller-Rabin test of "a" to the base of "b" as described in + * HAC pp. 139 Algorithm 4.24 + * + * Sets result to 0 if definitely composite or 1 if probably prime. + * Randomly the chance of error is no more than 1/4 and often + * very much lower. + * + * a SP integer to check. + * b SP integer small prime. + * result Whether a is likely prime: MP_YES or MP_NO. + * n1 SP integer operand. + * y SP integer operand. + * r SP integer operand. + * returns MP_VAL when a is not 1024, 2048, 1536 or 3072 and MP_OKAY otherwise. + */ +static int sp_prime_miller_rabin_ex(sp_int * a, sp_int * b, int *result, + sp_int *n1, sp_int *y, sp_int *r) +{ + int s, j; + int err = MP_OKAY; + + /* default */ + *result = MP_NO; + + /* ensure b > 1 */ + if (sp_cmp_d(b, 1) == MP_GT) { + /* get n1 = a - 1 */ + sp_copy(a, n1); + sp_sub_d(n1, 1, n1); + /* set 2**s * r = n1 */ + sp_copy(n1, r); + + /* count the number of least significant bits + * which are zero + */ + s = sp_cnt_lsb(r); + + /* now divide n - 1 by 2**s */ + sp_rshb(r, s, r); + + /* compute y = b**r mod a */ + sp_zero(y); + + err = sp_exptmod(b, r, a, y); + + if (err == MP_OKAY) { + /* probably prime until shown otherwise */ + *result = MP_YES; + + /* if y != 1 and y != n1 do */ + if (sp_cmp_d(y, 1) != MP_EQ && sp_cmp(y, n1) != MP_EQ) { + j = 1; + /* while j <= s-1 and y != n1 */ + while ((j <= (s - 1)) && sp_cmp(y, n1) != MP_EQ) { + sp_sqrmod(y, a, y); + + /* if y == 1 then composite */ + if (sp_cmp_d(y, 1) == MP_EQ) { + *result = MP_NO; + break; + } + ++j; + } + + /* if y != n1 then composite */ + if (*result == MP_YES && sp_cmp(y, n1) != MP_EQ) + *result = MP_NO; + } + } + } + + return err; +} + +/* Miller-Rabin test of "a" to the base of "b" as described in + * HAC pp. 139 Algorithm 4.24 + * + * Sets result to 0 if definitely composite or 1 if probably prime. + * Randomly the chance of error is no more than 1/4 and often + * very much lower. + * + * a SP integer to check. + * b SP integer small prime. + * result Whether a is likely prime: MP_YES or MP_NO. + * returns MP_MEM when dynamic memory allocation fails, MP_VAL when a is not + * 1024, 2048, 1536 or 3072 and MP_OKAY otherwise. + */ +static int sp_prime_miller_rabin(sp_int * a, sp_int * b, int *result) +{ + int err = MP_OKAY; +#ifndef WOLFSSL_SMALL_STACK + sp_int n1[1], y[1], r[1]; +#else + sp_int *n1 = NULL, *y, *r; +#endif + +#ifdef WOLFSSL_SMALL_STACK + n1 = (sp_int*)XMALLOC(sizeof(sp_int) * 3, NULL, DYNAMIC_TYPE_BIGINT); + if (n1 == NULL) + err = MP_MEM; + else { + y = &n1[1]; + r = &n1[2]; + } +#endif + + if (err == MP_OKAY) { + sp_init(n1); + sp_init(y); + sp_init(r); + + err = sp_prime_miller_rabin_ex(a, b, result, n1, y, r); + + sp_clear(n1); + sp_clear(y); + sp_clear(r); + } + +#ifdef WOLFSSL_SMALL_STACK + if (n1 != NULL) + XFREE(n1, NULL, DYNAMIC_TYPE_BIGINT); +#endif + + return err; +} + +/* Number of pre-computed primes. First n primes. */ +#define SP_PRIME_SIZE 256 + +/* a few primes */ +static const sp_int_digit primes[SP_PRIME_SIZE] = { + 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013, + 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035, + 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059, + 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083, + 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD, + 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF, + 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107, + 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137, + + 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167, + 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199, + 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9, + 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7, + 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239, + 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265, + 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293, + 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF, + + 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301, + 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B, + 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371, + 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD, + 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5, + 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419, + 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449, + 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B, + + 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7, + 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503, + 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529, + 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F, + 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3, + 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7, + 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623, + 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653 +}; + + +/* Check whether a is prime. + * Checks against a number of small primes and does t iterations of + * Miller-Rabin. + * + * a SP integer to check. + * t Number of iterations of Muller-Rabin to perform. + * result MP_YES when prime. + * MP_NO when not prime. + * returns MP_VAL when t is out of range, MP_MEM when dynamic memory allocation + * fails and otherwise MP_OKAY. + */ +int sp_prime_is_prime(sp_int *a, int t, int* result) +{ + int err = MP_OKAY; + int i; + int haveRes = 0; +#ifndef WOLFSSL_SMALL_STACK + sp_int b[1]; +#else + sp_int *b = NULL; +#endif + sp_int_digit d; + + if (t <= 0 || t > SP_PRIME_SIZE) { + *result = MP_NO; + err = MP_VAL; + } + + if (sp_isone(a)) { + *result = MP_NO; + return MP_OKAY; + } + + if (err == MP_OKAY && a->used == 1) { + /* check against primes table */ + for (i = 0; i < SP_PRIME_SIZE; i++) { + if (sp_cmp_d(a, primes[i]) == MP_EQ) { + *result = MP_YES; + haveRes = 1; + break; + } + } + } + + if (err == MP_OKAY && !haveRes) { + /* do trial division */ + for (i = 0; i < SP_PRIME_SIZE; i++) { + err = sp_mod_d(a, primes[i], &d); + if (err != MP_OKAY || d == 0) { + *result = MP_NO; + haveRes = 1; + break; + } + } + } + +#ifdef WOLFSSL_SMALL_STACK + if (err == MP_OKAY && !haveRes) { + b = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT); + if (b == NULL) + err = MP_MEM; + } +#endif + + if (err == MP_OKAY && !haveRes) { + /* now do 't' miller rabins */ + sp_init(b); + for (i = 0; i < t; i++) { + sp_set(b, primes[i]); + err = sp_prime_miller_rabin(a, b, result); + if (err != MP_OKAY || *result == MP_NO) + break; + } + } + +#ifdef WOLFSSL_SMALL_STACK + if (b != NULL) + XFREE(b, NULL, DYNAMIC_TYPE_BIGINT); +#endif + + return err; +} + +/* Check whether a is prime. + * Checks against a number of small primes and does t iterations of + * Miller-Rabin. + * + * a SP integer to check. + * t Number of iterations of Muller-Rabin to perform. + * result MP_YES when prime. + * MP_NO when not prime. + * rng Random number generator. + * returns MP_VAL when t is out of range, MP_MEM when dynamic memory allocation + * fails and otherwise MP_OKAY. + */ +int sp_prime_is_prime_ex(sp_int* a, int t, int* result, WC_RNG* rng) +{ + int err = MP_OKAY; + int ret = MP_YES; + int haveRes = 0; + int i; +#ifndef WC_NO_RNG + #ifndef WOLFSSL_SMALL_STACK + sp_int b[1], c[1], n1[1], y[1], r[1]; + #else + sp_int *b = NULL, *c = NULL, *n1 = NULL, *y = NULL, *r = NULL; + #endif + word32 baseSz; +#endif + + if (a == NULL || result == NULL || rng == NULL) + err = MP_VAL; + + if (sp_isone(a)) { + *result = MP_NO; + return MP_OKAY; + } + + if (err == MP_OKAY && a->used == 1) { + /* check against primes table */ + for (i = 0; i < SP_PRIME_SIZE; i++) { + if (sp_cmp_d(a, primes[i]) == MP_EQ) { + ret = MP_YES; + haveRes = 1; + break; + } + } + } + + if (err == MP_OKAY && !haveRes) { + sp_int_digit d; + + /* do trial division */ + for (i = 0; i < SP_PRIME_SIZE; i++) { + err = sp_mod_d(a, primes[i], &d); + if (err != MP_OKAY || d == 0) { + ret = MP_NO; + haveRes = 1; + break; + } + } + } + +#ifndef WC_NO_RNG + /* now do a miller rabin with up to t random numbers, this should + * give a (1/4)^t chance of a false prime. */ + #ifdef WOLFSSL_SMALL_STACK + if (err == MP_OKAY && !haveRes) { + b = (sp_int*)XMALLOC(sizeof(sp_int) * 5, NULL, DYNAMIC_TYPE_BIGINT); + if (b == NULL) { + err = MP_MEM; + } + else { + c = &b[1]; n1 = &b[2]; y= &b[3]; r = &b[4]; + } + } + #endif + + if (err == MP_OKAY && !haveRes) { + sp_init(b); + sp_init(c); + sp_init(n1); + sp_init(y); + sp_init(r); + + err = sp_sub_d(a, 2, c); + } + + if (err == MP_OKAY && !haveRes) { + baseSz = (sp_count_bits(a) + 7) / 8; + + while (t > 0) { + err = wc_RNG_GenerateBlock(rng, (byte*)b->dp, baseSz); + if (err != MP_OKAY) + break; + b->used = a->used; + + if (sp_cmp_d(b, 2) != MP_GT || sp_cmp(b, c) != MP_LT) + continue; + + err = sp_prime_miller_rabin_ex(a, b, &ret, n1, y, r); + if (err != MP_OKAY || ret == MP_NO) + break; + + t--; + } + + sp_clear(n1); + sp_clear(y); + sp_clear(r); + sp_clear(b); + sp_clear(c); + } + + #ifdef WOLFSSL_SMALL_STACK + if (b != NULL) + XFREE(b, NULL, DYNAMIC_TYPE_BIGINT); + #endif +#else + (void)t; +#endif /* !WC_NO_RNG */ + + *result = ret; + return err; +} + +#ifndef NO_DH +int sp_exch(sp_int* a, sp_int* b) +{ + int err = MP_OKAY; +#ifndef WOLFSSL_SMALL_STACK + sp_int t[1]; +#else + sp_int *t = NULL; +#endif + +#ifdef WOLFSSL_SMALL_STACK + t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT); + if (t == NULL) + err = MP_MEM; +#endif + + if (err == MP_OKAY) { + *t = *a; + *a = *b; + *b = *t; + } + +#ifdef WOLFSSL_SMALL_STACK + if (t != NULL) + XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); +#endif + return MP_OKAY; +} +#endif +#endif + +#if defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA) +/* Multiply a by digit n and put result into r. r = a * n + * + * a SP integer to be multiplied. + * n Number to multiply by. + * r SP integer result. + * returns MP_OKAY always. + */ +int sp_mul_d(sp_int* a, sp_int_digit n, sp_int* r) +{ + _sp_mul_d(a, n, r, 0); + return MP_OKAY; +} +#endif + +/* Returns the run time settings. + * + * returns the settings value. + */ +word32 CheckRunTimeSettings(void) +{ + return CTC_SETTINGS; +} + +#endif /* WOLFSSL_SP_MATH */ diff --git a/client/wolfssl/wolfcrypt/src/sp_x86_64.c b/client/wolfssl/wolfcrypt/src/sp_x86_64.c new file mode 100644 index 0000000..3e49d20 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/sp_x86_64.c @@ -0,0 +1,29555 @@ +/* sp.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* Implementation by Sean Parkinson. */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include +#include +#include +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) || \ + defined(WOLFSSL_HAVE_SP_ECC) + +#ifdef RSA_LOW_MEM +#ifndef WOLFSSL_SP_SMALL +#define WOLFSSL_SP_SMALL +#endif +#endif + +#include + +#ifdef WOLFSSL_SP_X86_64_ASM +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) +#ifndef WOLFSSL_SP_NO_2048 +extern void sp_2048_from_bin(sp_digit* r, int size, const byte* a, int n); +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 64 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 64 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0xffffffffffffffffl; + s = 64U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 64U) <= (word32)DIGIT_BIT) { + s += 64U; + r[j] &= 0xffffffffffffffffl; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 64) { + r[j] &= 0xffffffffffffffffl; + if (j + 1 >= size) { + break; + } + s = 64 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +extern void sp_2048_to_bin(sp_digit* r, byte* a); +extern void sp_2048_mul_16(sp_digit* r, const sp_digit* a, const sp_digit* b); +extern void sp_2048_sqr_16(sp_digit* r, const sp_digit* a); +extern void sp_2048_mul_avx2_16(sp_digit* r, const sp_digit* a, const sp_digit* b); +extern void sp_2048_sqr_avx2_16(sp_digit* r, const sp_digit* a); +extern sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a, const sp_digit* b); +extern sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b); +extern sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b); +extern void sp_2048_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b); + +extern sp_digit sp_2048_dbl_16(sp_digit* r, const sp_digit* a); +extern void sp_2048_sqr_32(sp_digit* r, const sp_digit* a); + +#ifdef HAVE_INTEL_AVX2 +extern void sp_2048_mul_avx2_32(sp_digit* r, const sp_digit* a, const sp_digit* b); +#endif /* HAVE_INTEL_AVX2 */ + +#ifdef HAVE_INTEL_AVX2 +extern void sp_2048_sqr_avx2_32(sp_digit* r, const sp_digit* a); +#endif /* HAVE_INTEL_AVX2 */ + +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +/* Caclulate the bottom digit of -1/a mod 2^n. + * + * a A single precision number. + * rho Bottom word of inverse. + */ +static void sp_2048_mont_setup(const sp_digit* a, sp_digit* rho) +{ + sp_digit x, b; + + b = a[0]; + x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - b * x; /* here x*a==1 mod 2**8 */ + x *= 2 - b * x; /* here x*a==1 mod 2**16 */ + x *= 2 - b * x; /* here x*a==1 mod 2**32 */ + x *= 2 - b * x; /* here x*a==1 mod 2**64 */ + + /* rho = -1/m mod b */ + *rho = -x; +} + +extern void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b); +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +extern sp_digit sp_2048_sub_in_place_16(sp_digit* a, const sp_digit* b); +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 2048 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_2048_mont_norm_16(sp_digit* r, const sp_digit* m) +{ + XMEMSET(r, 0, sizeof(sp_digit) * 16); + + /* r = 2^n mod m */ + sp_2048_sub_in_place_16(r, m); +} + +extern sp_digit sp_2048_cond_sub_16(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m); +extern void sp_2048_mont_reduce_16(sp_digit* a, const sp_digit* m, sp_digit mp); +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_2048_mont_mul_16(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_2048_mul_16(r, a, b); + sp_2048_mont_reduce_16(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_2048_mont_sqr_16(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_2048_sqr_16(r, a); + sp_2048_mont_reduce_16(r, m, mp); +} + +extern sp_digit sp_2048_cond_sub_avx2_16(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m); +extern void sp_2048_mul_d_16(sp_digit* r, const sp_digit* a, sp_digit b); +extern void sp_2048_mul_d_avx2_16(sp_digit* r, const sp_digit* a, const sp_digit b); +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + */ +static WC_INLINE sp_digit div_2048_word_16(sp_digit d1, sp_digit d0, + sp_digit div) +{ + register sp_digit r asm("rax"); + __asm__ __volatile__ ( + "divq %3" + : "=a" (r) + : "d" (d1), "a" (d0), "r" (div) + : + ); + return r; +} +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_2048_mask_16(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<16; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 16; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +extern int64_t sp_2048_cmp_16(const sp_digit* a, const sp_digit* b); +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_2048_div_16(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[32], t2[17]; + sp_digit div, r1; + int i; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + (void)m; + + div = d[15]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 16); + r1 = sp_2048_cmp_16(&t1[16], d) >= 0; +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_2048_cond_sub_avx2_16(&t1[16], &t1[16], d, (sp_digit)0 - r1); + else +#endif + sp_2048_cond_sub_16(&t1[16], &t1[16], d, (sp_digit)0 - r1); + for (i=15; i>=0; i--) { + r1 = div_2048_word_16(t1[16 + i], t1[16 + i - 1], div); + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_2048_mul_d_avx2_16(t2, d, r1); + else +#endif + sp_2048_mul_d_16(t2, d, r1); + t1[16 + i] += sp_2048_sub_in_place_16(&t1[i], t2); + t1[16 + i] -= t2[16]; + sp_2048_mask_16(t2, d, t1[16 + i]); + t1[16 + i] += sp_2048_add_16(&t1[i], &t1[i], t2); + sp_2048_mask_16(t2, d, t1[16 + i]); + t1[16 + i] += sp_2048_add_16(&t1[i], &t1[i], t2); + } + + r1 = sp_2048_cmp_16(t1, d) >= 0; +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_2048_cond_sub_avx2_16(r, t1, d, (sp_digit)0 - r1); + else +#endif + sp_2048_cond_sub_16(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_2048_mod_16(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_2048_div_16(a, m, NULL, r); +} + +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_2048_mod_exp_16(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][32]; + sp_digit rt[32]; +#else + sp_digit* t[32]; + sp_digit* rt; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 33 * 32, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) + t[i] = td + i * 32; + rt = td + 1024; +#endif + norm = t[0]; + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_16(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 16); + if (reduceA) { + err = sp_2048_mod_16(t[1] + 16, a, m); + if (err == MP_OKAY) + err = sp_2048_mod_16(t[1], t[1], m); + } + else { + XMEMCPY(t[1] + 16, a, sizeof(sp_digit) * 16); + err = sp_2048_mod_16(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_2048_mont_sqr_16(t[ 2], t[ 1], m, mp); + sp_2048_mont_mul_16(t[ 3], t[ 2], t[ 1], m, mp); + sp_2048_mont_sqr_16(t[ 4], t[ 2], m, mp); + sp_2048_mont_mul_16(t[ 5], t[ 3], t[ 2], m, mp); + sp_2048_mont_sqr_16(t[ 6], t[ 3], m, mp); + sp_2048_mont_mul_16(t[ 7], t[ 4], t[ 3], m, mp); + sp_2048_mont_sqr_16(t[ 8], t[ 4], m, mp); + sp_2048_mont_mul_16(t[ 9], t[ 5], t[ 4], m, mp); + sp_2048_mont_sqr_16(t[10], t[ 5], m, mp); + sp_2048_mont_mul_16(t[11], t[ 6], t[ 5], m, mp); + sp_2048_mont_sqr_16(t[12], t[ 6], m, mp); + sp_2048_mont_mul_16(t[13], t[ 7], t[ 6], m, mp); + sp_2048_mont_sqr_16(t[14], t[ 7], m, mp); + sp_2048_mont_mul_16(t[15], t[ 8], t[ 7], m, mp); + sp_2048_mont_sqr_16(t[16], t[ 8], m, mp); + sp_2048_mont_mul_16(t[17], t[ 9], t[ 8], m, mp); + sp_2048_mont_sqr_16(t[18], t[ 9], m, mp); + sp_2048_mont_mul_16(t[19], t[10], t[ 9], m, mp); + sp_2048_mont_sqr_16(t[20], t[10], m, mp); + sp_2048_mont_mul_16(t[21], t[11], t[10], m, mp); + sp_2048_mont_sqr_16(t[22], t[11], m, mp); + sp_2048_mont_mul_16(t[23], t[12], t[11], m, mp); + sp_2048_mont_sqr_16(t[24], t[12], m, mp); + sp_2048_mont_mul_16(t[25], t[13], t[12], m, mp); + sp_2048_mont_sqr_16(t[26], t[13], m, mp); + sp_2048_mont_mul_16(t[27], t[14], t[13], m, mp); + sp_2048_mont_sqr_16(t[28], t[14], m, mp); + sp_2048_mont_mul_16(t[29], t[15], t[14], m, mp); + sp_2048_mont_sqr_16(t[30], t[15], m, mp); + sp_2048_mont_mul_16(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) { + c = 64; + } + if ((bits % 5) == 0) { + c -= 5; + } + else { + c -= bits % 5; + } + y = (int)(n >> c); + n <<= 64 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 16); + for (; i>=0 || c>=5; ) { + if (c >= 5) { + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + } + else if (c == 0) { + n = e[i--]; + y = (int)(n >> 59); + n <<= 5; + c = 59; + } + else { + y = (int)(n >> 59); + n = e[i--]; + c = 5 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + + sp_2048_sqr_16(rt, r); + sp_2048_mont_reduce_16(rt, m, mp); + sp_2048_sqr_16(r, rt); + sp_2048_mont_reduce_16(r, m, mp); + sp_2048_sqr_16(rt, r); + sp_2048_mont_reduce_16(rt, m, mp); + sp_2048_sqr_16(r, rt); + sp_2048_mont_reduce_16(r, m, mp); + sp_2048_sqr_16(rt, r); + sp_2048_mont_reduce_16(rt, m, mp); + + sp_2048_mul_16(r, rt, t[y]); + sp_2048_mont_reduce_16(r, m, mp); + } + + XMEMSET(&r[16], 0, sizeof(sp_digit) * 16); + sp_2048_mont_reduce_16(r, m, mp); + + mask = 0 - (sp_2048_cmp_16(r, m) >= 0); + sp_2048_cond_sub_16(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return err; +} + +extern void sp_2048_mont_reduce_avx2_16(sp_digit* a, const sp_digit* m, sp_digit mp); +#ifdef HAVE_INTEL_AVX2 +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_2048_mont_mul_avx2_16(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_2048_mul_avx2_16(r, a, b); + sp_2048_mont_reduce_avx2_16(r, m, mp); +} + +#endif /* HAVE_INTEL_AVX2 */ +#ifdef HAVE_INTEL_AVX2 +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_2048_mont_sqr_avx2_16(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_2048_sqr_avx2_16(r, a); + sp_2048_mont_reduce_avx2_16(r, m, mp); +} + +#endif /* HAVE_INTEL_AVX2 */ +#ifdef HAVE_INTEL_AVX2 +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_2048_mod_exp_avx2_16(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][32]; + sp_digit rt[32]; +#else + sp_digit* t[32]; + sp_digit* rt; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 33 * 32, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) + t[i] = td + i * 32; + rt = td + 1024; +#endif + norm = t[0]; + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_16(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 16); + if (reduceA) { + err = sp_2048_mod_16(t[1] + 16, a, m); + if (err == MP_OKAY) + err = sp_2048_mod_16(t[1], t[1], m); + } + else { + XMEMCPY(t[1] + 16, a, sizeof(sp_digit) * 16); + err = sp_2048_mod_16(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_2048_mont_sqr_avx2_16(t[ 2], t[ 1], m, mp); + sp_2048_mont_mul_avx2_16(t[ 3], t[ 2], t[ 1], m, mp); + sp_2048_mont_sqr_avx2_16(t[ 4], t[ 2], m, mp); + sp_2048_mont_mul_avx2_16(t[ 5], t[ 3], t[ 2], m, mp); + sp_2048_mont_sqr_avx2_16(t[ 6], t[ 3], m, mp); + sp_2048_mont_mul_avx2_16(t[ 7], t[ 4], t[ 3], m, mp); + sp_2048_mont_sqr_avx2_16(t[ 8], t[ 4], m, mp); + sp_2048_mont_mul_avx2_16(t[ 9], t[ 5], t[ 4], m, mp); + sp_2048_mont_sqr_avx2_16(t[10], t[ 5], m, mp); + sp_2048_mont_mul_avx2_16(t[11], t[ 6], t[ 5], m, mp); + sp_2048_mont_sqr_avx2_16(t[12], t[ 6], m, mp); + sp_2048_mont_mul_avx2_16(t[13], t[ 7], t[ 6], m, mp); + sp_2048_mont_sqr_avx2_16(t[14], t[ 7], m, mp); + sp_2048_mont_mul_avx2_16(t[15], t[ 8], t[ 7], m, mp); + sp_2048_mont_sqr_avx2_16(t[16], t[ 8], m, mp); + sp_2048_mont_mul_avx2_16(t[17], t[ 9], t[ 8], m, mp); + sp_2048_mont_sqr_avx2_16(t[18], t[ 9], m, mp); + sp_2048_mont_mul_avx2_16(t[19], t[10], t[ 9], m, mp); + sp_2048_mont_sqr_avx2_16(t[20], t[10], m, mp); + sp_2048_mont_mul_avx2_16(t[21], t[11], t[10], m, mp); + sp_2048_mont_sqr_avx2_16(t[22], t[11], m, mp); + sp_2048_mont_mul_avx2_16(t[23], t[12], t[11], m, mp); + sp_2048_mont_sqr_avx2_16(t[24], t[12], m, mp); + sp_2048_mont_mul_avx2_16(t[25], t[13], t[12], m, mp); + sp_2048_mont_sqr_avx2_16(t[26], t[13], m, mp); + sp_2048_mont_mul_avx2_16(t[27], t[14], t[13], m, mp); + sp_2048_mont_sqr_avx2_16(t[28], t[14], m, mp); + sp_2048_mont_mul_avx2_16(t[29], t[15], t[14], m, mp); + sp_2048_mont_sqr_avx2_16(t[30], t[15], m, mp); + sp_2048_mont_mul_avx2_16(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) { + c = 64; + } + if ((bits % 5) == 0) { + c -= 5; + } + else { + c -= bits % 5; + } + y = (int)(n >> c); + n <<= 64 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 16); + for (; i>=0 || c>=5; ) { + if (c >= 5) { + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + } + else if (c == 0) { + n = e[i--]; + y = (int)(n >> 59); + n <<= 5; + c = 59; + } + else { + y = (int)(n >> 59); + n = e[i--]; + c = 5 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + + sp_2048_sqr_avx2_16(rt, r); + sp_2048_mont_reduce_avx2_16(rt, m, mp); + sp_2048_sqr_avx2_16(r, rt); + sp_2048_mont_reduce_avx2_16(r, m, mp); + sp_2048_sqr_avx2_16(rt, r); + sp_2048_mont_reduce_avx2_16(rt, m, mp); + sp_2048_sqr_avx2_16(r, rt); + sp_2048_mont_reduce_avx2_16(r, m, mp); + sp_2048_sqr_avx2_16(rt, r); + sp_2048_mont_reduce_avx2_16(rt, m, mp); + + sp_2048_mul_avx2_16(r, rt, t[y]); + sp_2048_mont_reduce_avx2_16(r, m, mp); + } + + XMEMSET(&r[16], 0, sizeof(sp_digit) * 16); + sp_2048_mont_reduce_avx2_16(r, m, mp); + + mask = 0 - (sp_2048_cmp_16(r, m) >= 0); + sp_2048_cond_sub_avx2_16(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return err; +} +#endif /* HAVE_INTEL_AVX2 */ + +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 2048 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_2048_mont_norm_32(sp_digit* r, const sp_digit* m) +{ + XMEMSET(r, 0, sizeof(sp_digit) * 32); + + /* r = 2^n mod m */ + sp_2048_sub_in_place_32(r, m); +} + +#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */ +extern sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m); +extern void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp); +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_2048_mont_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_2048_mul_32(r, a, b); + sp_2048_mont_reduce_32(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_2048_mont_sqr_32(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_2048_sqr_32(r, a); + sp_2048_mont_reduce_32(r, m, mp); +} + +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +extern sp_digit sp_2048_cond_sub_avx2_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m); +extern void sp_2048_mul_d_avx2_32(sp_digit* r, const sp_digit* a, const sp_digit b); +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + */ +static WC_INLINE sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, + sp_digit div) +{ + register sp_digit r asm("rax"); + __asm__ __volatile__ ( + "divq %3" + : "=a" (r) + : "d" (d1), "a" (d0), "r" (div) + : + ); + return r; +} +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<32; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 32; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +extern int64_t sp_2048_cmp_32(const sp_digit* a, const sp_digit* b); +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[64], t2[33]; + sp_digit div, r1; + int i; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + (void)m; + + div = d[31]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 32); + r1 = sp_2048_cmp_32(&t1[32], d) >= 0; +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_2048_cond_sub_avx2_32(&t1[32], &t1[32], d, (sp_digit)0 - r1); + else +#endif + sp_2048_cond_sub_32(&t1[32], &t1[32], d, (sp_digit)0 - r1); + for (i=31; i>=0; i--) { + r1 = div_2048_word_32(t1[32 + i], t1[32 + i - 1], div); + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_2048_mul_d_avx2_32(t2, d, r1); + else +#endif + sp_2048_mul_d_32(t2, d, r1); + t1[32 + i] += sp_2048_sub_in_place_32(&t1[i], t2); + t1[32 + i] -= t2[32]; + sp_2048_mask_32(t2, d, t1[32 + i]); + t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2); + sp_2048_mask_32(t2, d, t1[32 + i]); + t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2); + } + + r1 = sp_2048_cmp_32(t1, d) >= 0; +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_2048_cond_sub_avx2_32(r, t1, d, (sp_digit)0 - r1); + else +#endif + sp_2048_cond_sub_32(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_2048_mod_32(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_2048_div_32(a, m, NULL, r); +} + +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ +extern sp_digit sp_2048_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b); +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_2048_div_32_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[64], t2[33]; + sp_digit div, r1; + int i; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + (void)m; + + div = d[31]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 32); + for (i = 31; i > 0; i--) { + if (t1[i + 32] != d[i]) + break; + } + if (t1[i + 32] >= d[i]) { + sp_2048_sub_in_place_32(&t1[32], d); + } + for (i=31; i>=0; i--) { + r1 = div_2048_word_32(t1[32 + i], t1[32 + i - 1], div); + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_2048_mul_d_avx2_32(t2, d, r1); + else +#endif + sp_2048_mul_d_32(t2, d, r1); + t1[32 + i] += sp_2048_sub_in_place_32(&t1[i], t2); + t1[32 + i] -= t2[32]; + if (t1[32 + i] != 0) { + t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], d); + if (t1[32 + i] != 0) + t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], d); + } + } + + for (i = 31; i > 0; i--) { + if (t1[i] != d[i]) + break; + } + if (t1[i] >= d[i]) { + sp_2048_sub_32(r, t1, d); + } + else { + XMEMCPY(r, t1, sizeof(*t1) * 32); + } + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_2048_mod_32_cond(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_2048_div_32_cond(a, m, NULL, r); +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH) +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][64]; + sp_digit rt[64]; +#else + sp_digit* t[32]; + sp_digit* rt; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 33 * 64, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) + t[i] = td + i * 64; + rt = td + 2048; +#endif + norm = t[0]; + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_32(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 32); + if (reduceA) { + err = sp_2048_mod_32(t[1] + 32, a, m); + if (err == MP_OKAY) + err = sp_2048_mod_32(t[1], t[1], m); + } + else { + XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32); + err = sp_2048_mod_32(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp); + sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp); + sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp); + sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp); + sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp); + sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp); + sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp); + sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp); + sp_2048_mont_sqr_32(t[10], t[ 5], m, mp); + sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp); + sp_2048_mont_sqr_32(t[12], t[ 6], m, mp); + sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp); + sp_2048_mont_sqr_32(t[14], t[ 7], m, mp); + sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp); + sp_2048_mont_sqr_32(t[16], t[ 8], m, mp); + sp_2048_mont_mul_32(t[17], t[ 9], t[ 8], m, mp); + sp_2048_mont_sqr_32(t[18], t[ 9], m, mp); + sp_2048_mont_mul_32(t[19], t[10], t[ 9], m, mp); + sp_2048_mont_sqr_32(t[20], t[10], m, mp); + sp_2048_mont_mul_32(t[21], t[11], t[10], m, mp); + sp_2048_mont_sqr_32(t[22], t[11], m, mp); + sp_2048_mont_mul_32(t[23], t[12], t[11], m, mp); + sp_2048_mont_sqr_32(t[24], t[12], m, mp); + sp_2048_mont_mul_32(t[25], t[13], t[12], m, mp); + sp_2048_mont_sqr_32(t[26], t[13], m, mp); + sp_2048_mont_mul_32(t[27], t[14], t[13], m, mp); + sp_2048_mont_sqr_32(t[28], t[14], m, mp); + sp_2048_mont_mul_32(t[29], t[15], t[14], m, mp); + sp_2048_mont_sqr_32(t[30], t[15], m, mp); + sp_2048_mont_mul_32(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) { + c = 64; + } + if ((bits % 5) == 0) { + c -= 5; + } + else { + c -= bits % 5; + } + y = (int)(n >> c); + n <<= 64 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 32); + for (; i>=0 || c>=5; ) { + if (c >= 5) { + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + } + else if (c == 0) { + n = e[i--]; + y = (int)(n >> 59); + n <<= 5; + c = 59; + } + else { + y = (int)(n >> 59); + n = e[i--]; + c = 5 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + + sp_2048_sqr_32(rt, r); + sp_2048_mont_reduce_32(rt, m, mp); + sp_2048_sqr_32(r, rt); + sp_2048_mont_reduce_32(r, m, mp); + sp_2048_sqr_32(rt, r); + sp_2048_mont_reduce_32(rt, m, mp); + sp_2048_sqr_32(r, rt); + sp_2048_mont_reduce_32(r, m, mp); + sp_2048_sqr_32(rt, r); + sp_2048_mont_reduce_32(rt, m, mp); + + sp_2048_mul_32(r, rt, t[y]); + sp_2048_mont_reduce_32(r, m, mp); + } + + XMEMSET(&r[32], 0, sizeof(sp_digit) * 32); + sp_2048_mont_reduce_32(r, m, mp); + + mask = 0 - (sp_2048_cmp_32(r, m) >= 0); + sp_2048_cond_sub_32(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return err; +} +#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */ + +extern void sp_2048_mont_reduce_avx2_32(sp_digit* a, const sp_digit* m, sp_digit mp); +#ifdef HAVE_INTEL_AVX2 +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_2048_mont_mul_avx2_32(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_2048_mul_avx2_32(r, a, b); + sp_2048_mont_reduce_avx2_32(r, m, mp); +} + +#endif /* HAVE_INTEL_AVX2 */ +#ifdef HAVE_INTEL_AVX2 +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_2048_mont_sqr_avx2_32(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_2048_sqr_avx2_32(r, a); + sp_2048_mont_reduce_avx2_32(r, m, mp); +} + +#endif /* HAVE_INTEL_AVX2 */ +#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH) +#ifdef HAVE_INTEL_AVX2 +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_2048_mod_exp_avx2_32(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][64]; + sp_digit rt[64]; +#else + sp_digit* t[32]; + sp_digit* rt; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 33 * 64, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) + t[i] = td + i * 64; + rt = td + 2048; +#endif + norm = t[0]; + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_32(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 32); + if (reduceA) { + err = sp_2048_mod_32(t[1] + 32, a, m); + if (err == MP_OKAY) + err = sp_2048_mod_32(t[1], t[1], m); + } + else { + XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32); + err = sp_2048_mod_32(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_2048_mont_sqr_avx2_32(t[ 2], t[ 1], m, mp); + sp_2048_mont_mul_avx2_32(t[ 3], t[ 2], t[ 1], m, mp); + sp_2048_mont_sqr_avx2_32(t[ 4], t[ 2], m, mp); + sp_2048_mont_mul_avx2_32(t[ 5], t[ 3], t[ 2], m, mp); + sp_2048_mont_sqr_avx2_32(t[ 6], t[ 3], m, mp); + sp_2048_mont_mul_avx2_32(t[ 7], t[ 4], t[ 3], m, mp); + sp_2048_mont_sqr_avx2_32(t[ 8], t[ 4], m, mp); + sp_2048_mont_mul_avx2_32(t[ 9], t[ 5], t[ 4], m, mp); + sp_2048_mont_sqr_avx2_32(t[10], t[ 5], m, mp); + sp_2048_mont_mul_avx2_32(t[11], t[ 6], t[ 5], m, mp); + sp_2048_mont_sqr_avx2_32(t[12], t[ 6], m, mp); + sp_2048_mont_mul_avx2_32(t[13], t[ 7], t[ 6], m, mp); + sp_2048_mont_sqr_avx2_32(t[14], t[ 7], m, mp); + sp_2048_mont_mul_avx2_32(t[15], t[ 8], t[ 7], m, mp); + sp_2048_mont_sqr_avx2_32(t[16], t[ 8], m, mp); + sp_2048_mont_mul_avx2_32(t[17], t[ 9], t[ 8], m, mp); + sp_2048_mont_sqr_avx2_32(t[18], t[ 9], m, mp); + sp_2048_mont_mul_avx2_32(t[19], t[10], t[ 9], m, mp); + sp_2048_mont_sqr_avx2_32(t[20], t[10], m, mp); + sp_2048_mont_mul_avx2_32(t[21], t[11], t[10], m, mp); + sp_2048_mont_sqr_avx2_32(t[22], t[11], m, mp); + sp_2048_mont_mul_avx2_32(t[23], t[12], t[11], m, mp); + sp_2048_mont_sqr_avx2_32(t[24], t[12], m, mp); + sp_2048_mont_mul_avx2_32(t[25], t[13], t[12], m, mp); + sp_2048_mont_sqr_avx2_32(t[26], t[13], m, mp); + sp_2048_mont_mul_avx2_32(t[27], t[14], t[13], m, mp); + sp_2048_mont_sqr_avx2_32(t[28], t[14], m, mp); + sp_2048_mont_mul_avx2_32(t[29], t[15], t[14], m, mp); + sp_2048_mont_sqr_avx2_32(t[30], t[15], m, mp); + sp_2048_mont_mul_avx2_32(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) { + c = 64; + } + if ((bits % 5) == 0) { + c -= 5; + } + else { + c -= bits % 5; + } + y = (int)(n >> c); + n <<= 64 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 32); + for (; i>=0 || c>=5; ) { + if (c >= 5) { + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + } + else if (c == 0) { + n = e[i--]; + y = (int)(n >> 59); + n <<= 5; + c = 59; + } + else { + y = (int)(n >> 59); + n = e[i--]; + c = 5 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + + sp_2048_sqr_avx2_32(rt, r); + sp_2048_mont_reduce_avx2_32(rt, m, mp); + sp_2048_sqr_avx2_32(r, rt); + sp_2048_mont_reduce_avx2_32(r, m, mp); + sp_2048_sqr_avx2_32(rt, r); + sp_2048_mont_reduce_avx2_32(rt, m, mp); + sp_2048_sqr_avx2_32(r, rt); + sp_2048_mont_reduce_avx2_32(r, m, mp); + sp_2048_sqr_avx2_32(rt, r); + sp_2048_mont_reduce_avx2_32(rt, m, mp); + + sp_2048_mul_avx2_32(r, rt, t[y]); + sp_2048_mont_reduce_avx2_32(r, m, mp); + } + + XMEMSET(&r[32], 0, sizeof(sp_digit) * 32); + sp_2048_mont_reduce_avx2_32(r, m, mp); + + mask = 0 - (sp_2048_cmp_32(r, m) >= 0); + sp_2048_cond_sub_avx2_32(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return err; +} +#endif /* HAVE_INTEL_AVX2 */ +#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */ + +#ifdef WOLFSSL_HAVE_SP_RSA +/* RSA public key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * em Public exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 256 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPublic_2048(const byte* in, word32 inLen, mp_int* em, mp_int* mm, + byte* out, word32* outLen) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit ad[64], md[32], rd[64]; +#else + sp_digit* d = NULL; +#endif + sp_digit* a; + sp_digit *ah; + sp_digit* m; + sp_digit* r; + sp_digit e = 0; + int err = MP_OKAY; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + if (*outLen < 256) + err = MP_TO_E; + if (err == MP_OKAY && (mp_count_bits(em) > 64 || inLen > 256 || + mp_count_bits(mm) != 2048)) + err = MP_READ_E; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + a = d; + r = a + 32 * 2; + m = r + 32 * 2; + ah = a + 32; + } +#else + a = ad; + m = md; + r = rd; + ah = a + 32; +#endif + + if (err == MP_OKAY) { + sp_2048_from_bin(ah, 32, in, inLen); +#if DIGIT_BIT >= 64 + e = em->dp[0]; +#else + e = em->dp[0]; + if (em->used > 1) + e |= ((sp_digit)em->dp[1]) << DIGIT_BIT; +#endif + if (e == 0) + err = MP_EXPTMOD_E; + } + if (err == MP_OKAY) { + sp_2048_from_mp(m, 32, mm); + + if (e == 0x3) { +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + if (err == MP_OKAY) { + sp_2048_sqr_avx2_32(r, ah); + err = sp_2048_mod_32_cond(r, r, m); + } + if (err == MP_OKAY) { + sp_2048_mul_avx2_32(r, ah, r); + err = sp_2048_mod_32_cond(r, r, m); + } + } + else +#endif + { + if (err == MP_OKAY) { + sp_2048_sqr_32(r, ah); + err = sp_2048_mod_32_cond(r, r, m); + } + if (err == MP_OKAY) { + sp_2048_mul_32(r, ah, r); + err = sp_2048_mod_32_cond(r, r, m); + } + } + } + else { + int i; + sp_digit mp; + + sp_2048_mont_setup(m, &mp); + + /* Convert to Montgomery form. */ + XMEMSET(a, 0, sizeof(sp_digit) * 32); + err = sp_2048_mod_32_cond(a, a, m); + + if (err == MP_OKAY) { + for (i=63; i>=0; i--) { + if (e >> i) { + break; + } + } + + XMEMCPY(r, a, sizeof(sp_digit) * 32); +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + for (i--; i>=0; i--) { + sp_2048_mont_sqr_avx2_32(r, r, m, mp); + if (((e >> i) & 1) == 1) { + sp_2048_mont_mul_avx2_32(r, r, a, m, mp); + } + } + XMEMSET(&r[32], 0, sizeof(sp_digit) * 32); + sp_2048_mont_reduce_avx2_32(r, m, mp); + } + else +#endif + { + for (i--; i>=0; i--) { + sp_2048_mont_sqr_32(r, r, m, mp); + if (((e >> i) & 1) == 1) { + sp_2048_mont_mul_32(r, r, a, m, mp); + } + } + XMEMSET(&r[32], 0, sizeof(sp_digit) * 32); + sp_2048_mont_reduce_32(r, m, mp); + } + + for (i = 31; i > 0; i--) { + if (r[i] != m[i]) + break; + } + if (r[i] >= m[i]) + sp_2048_sub_in_place_32(r, m); + } + } + } + + if (err == MP_OKAY) { + sp_2048_to_bin(r, out); + *outLen = 256; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) + XFREE(d, NULL, DYNAMIC_TYPE_RSA); +#endif + + return err; +} + +#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) +/* RSA private key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * pm First prime. + * qm Second prime. + * dpm First prime's CRT exponent. + * dqm Second prime's CRT exponent. + * qim Inverse of second prime mod p. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 256 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, + mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm, + byte* out, word32* outLen) +{ +#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK) + sp_digit a[64], d[32], m[32]; +#else + sp_digit* d = NULL; + sp_digit* a; + sp_digit* m; +#endif + sp_digit* r; + int err = MP_OKAY; + + (void)pm; + (void)qm; + (void)dpm; + (void)dqm; + (void)qim; + + if (*outLen < 256U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(dm) > 2048) { + err = MP_READ_E; + } + if (inLen > 256U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 2048) { + err = MP_READ_E; + } + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 4, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + a = d + 32; + m = a + 64; +#endif + r = a; + + sp_2048_from_bin(a, 32, in, inLen); + sp_2048_from_mp(d, 32, dm); + sp_2048_from_mp(m, 32, mm); + err = sp_2048_mod_exp_32(r, a, d, 2048, m, 0); + } + + if (err == MP_OKAY) { + sp_2048_to_bin(r, out); + *outLen = 256; + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 32); + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } +#else + XMEMSET(d, 0, sizeof(sp_digit) * 32); +#endif + + return err; +} + +#else +extern sp_digit sp_2048_cond_add_16(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m); +extern sp_digit sp_2048_cond_add_avx2_16(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m); +/* RSA private key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * pm First prime. + * qm Second prime. + * dpm First prime's CRT exponent. + * dqm Second prime's CRT exponent. + * qim Inverse of second prime mod p. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 256 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, + mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm, + byte* out, word32* outLen) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit ad[32 * 2]; + sp_digit pd[16], qd[16], dpd[16]; + sp_digit tmpad[32], tmpbd[32]; +#else + sp_digit* t = NULL; +#endif + sp_digit* a; + sp_digit* p; + sp_digit* q; + sp_digit* dp; + sp_digit* dq; + sp_digit* qi; + sp_digit* tmpa; + sp_digit* tmpb; + sp_digit* r; + sp_digit c; + int err = MP_OKAY; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + (void)dm; + (void)mm; + + if (*outLen < 256) + err = MP_TO_E; + if (err == MP_OKAY && (inLen > 256 || mp_count_bits(mm) != 2048)) + err = MP_READ_E; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 11, NULL, + DYNAMIC_TYPE_RSA); + if (t == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + a = t; + p = a + 32 * 2; + q = p + 16; + qi = dq = dp = q + 16; + tmpa = qi + 16; + tmpb = tmpa + 32; + + r = t + 32; + } +#else + r = a = ad; + p = pd; + q = qd; + qi = dq = dp = dpd; + tmpa = tmpad; + tmpb = tmpbd; +#endif + + if (err == MP_OKAY) { + sp_2048_from_bin(a, 32, in, inLen); + sp_2048_from_mp(p, 16, pm); + sp_2048_from_mp(q, 16, qm); + sp_2048_from_mp(dp, 16, dpm); + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_2048_mod_exp_avx2_16(tmpa, a, dp, 1024, p, 1); + else +#endif + err = sp_2048_mod_exp_16(tmpa, a, dp, 1024, p, 1); + } + if (err == MP_OKAY) { + sp_2048_from_mp(dq, 16, dqm); +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_2048_mod_exp_avx2_16(tmpb, a, dq, 1024, q, 1); + else +#endif + err = sp_2048_mod_exp_16(tmpb, a, dq, 1024, q, 1); + } + + if (err == MP_OKAY) { + c = sp_2048_sub_in_place_16(tmpa, tmpb); +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + c += sp_2048_cond_add_avx2_16(tmpa, tmpa, p, c); + sp_2048_cond_add_avx2_16(tmpa, tmpa, p, c); + } + else +#endif + { + c += sp_2048_cond_add_16(tmpa, tmpa, p, c); + sp_2048_cond_add_16(tmpa, tmpa, p, c); + } + + sp_2048_from_mp(qi, 16, qim); +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + sp_2048_mul_avx2_16(tmpa, tmpa, qi); + } + else +#endif + { + sp_2048_mul_16(tmpa, tmpa, qi); + } + err = sp_2048_mod_16(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + sp_2048_mul_avx2_16(tmpa, q, tmpa); + } + else +#endif + { + sp_2048_mul_16(tmpa, q, tmpa); + } + XMEMSET(&tmpb[16], 0, sizeof(sp_digit) * 16); + sp_2048_add_32(r, tmpb, tmpa); + + sp_2048_to_bin(r, out); + *outLen = 256; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_digit) * 16 * 11); + XFREE(t, NULL, DYNAMIC_TYPE_RSA); + } +#else + XMEMSET(tmpad, 0, sizeof(tmpad)); + XMEMSET(tmpbd, 0, sizeof(tmpbd)); + XMEMSET(pd, 0, sizeof(pd)); + XMEMSET(qd, 0, sizeof(qd)); + XMEMSET(dpd, 0, sizeof(dpd)); +#endif + + return err; +} +#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */ +#endif /* WOLFSSL_HAVE_SP_RSA */ +#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY)) +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_2048_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (2048 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 64 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 32); + r->used = 32; + mp_clamp(r); +#elif DIGIT_BIT < 64 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 32; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 64) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 64 - s; + } + r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 32; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 64 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 64 - s; + } + else { + s += 64; + } + } + r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ + int err = MP_OKAY; + sp_digit b[64], e[32], m[32]; + sp_digit* r = b; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 2048 || expBits > 2048 || + mp_count_bits(mod) != 2048) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + sp_2048_from_mp(b, 32, base); + sp_2048_from_mp(e, 32, exp); + sp_2048_from_mp(m, 32, mod); + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_2048_mod_exp_avx2_32(r, b, e, expBits, m, 0); + else +#endif + err = sp_2048_mod_exp_32(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + err = sp_2048_to_mp(r, res); + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} + +#ifdef WOLFSSL_HAVE_SP_DH +#ifdef HAVE_FFDHE_2048 +extern void sp_2048_lshift_32(sp_digit* r, const sp_digit* a, int n); +#ifdef HAVE_INTEL_AVX2 +/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m) + * + * r A single precision number that is the result of the operation. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_2048_mod_exp_2_avx2_32(sp_digit* r, const sp_digit* e, int bits, + const sp_digit* m) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit nd[64]; + sp_digit td[33]; +#else + sp_digit* td; +#endif + sp_digit* norm; + sp_digit* tmp; + sp_digit mp = 1; + sp_digit n, o; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 97, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + norm = td; + tmp = td + 64; +#else + norm = nd; + tmp = td; +#endif + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_32(norm, m); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) { + c = 64; + } + if ((bits % 6) == 0) { + c -= 6; + } + else { + c -= bits % 6; + } + y = (int)(n >> c); + n <<= 64 - c; + sp_2048_lshift_32(r, norm, y); + for (; i>=0 || c>=6; ) { + if (c == 0) { + n = e[i--]; + y = (int)(n >> 58); + n <<= 6; + c = 58; + } + else if (c < 6) { + y = (int)(n >> 58); + n = e[i--]; + c = 6 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + else { + y = (n >> 58) & 0x3f; + n <<= 6; + c -= 6; + } + + sp_2048_mont_sqr_avx2_32(r, r, m, mp); + sp_2048_mont_sqr_avx2_32(r, r, m, mp); + sp_2048_mont_sqr_avx2_32(r, r, m, mp); + sp_2048_mont_sqr_avx2_32(r, r, m, mp); + sp_2048_mont_sqr_avx2_32(r, r, m, mp); + sp_2048_mont_sqr_avx2_32(r, r, m, mp); + + sp_2048_lshift_32(r, r, y); + sp_2048_mul_d_avx2_32(tmp, norm, r[32]); + r[32] = 0; + o = sp_2048_add_32(r, r, tmp); + sp_2048_cond_sub_avx2_32(r, r, m, (sp_digit)0 - o); + } + + XMEMSET(&r[32], 0, sizeof(sp_digit) * 32); + sp_2048_mont_reduce_avx2_32(r, m, mp); + + mask = 0 - (sp_2048_cmp_32(r, m) >= 0); + sp_2048_cond_sub_avx2_32(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return err; +} +#endif /* HAVE_INTEL_AVX2 */ + +/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m) + * + * r A single precision number that is the result of the operation. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_2048_mod_exp_2_32(sp_digit* r, const sp_digit* e, int bits, + const sp_digit* m) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit nd[64]; + sp_digit td[33]; +#else + sp_digit* td; +#endif + sp_digit* norm; + sp_digit* tmp; + sp_digit mp = 1; + sp_digit n, o; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 97, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + norm = td; + tmp = td + 64; +#else + norm = nd; + tmp = td; +#endif + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_32(norm, m); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) { + c = 64; + } + if ((bits % 6) == 0) { + c -= 6; + } + else { + c -= bits % 6; + } + y = (int)(n >> c); + n <<= 64 - c; + sp_2048_lshift_32(r, norm, y); + for (; i>=0 || c>=6; ) { + if (c == 0) { + n = e[i--]; + y = (int)(n >> 58); + n <<= 6; + c = 58; + } + else if (c < 6) { + y = (int)(n >> 58); + n = e[i--]; + c = 6 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + else { + y = (n >> 58) & 0x3f; + n <<= 6; + c -= 6; + } + + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + + sp_2048_lshift_32(r, r, y); + sp_2048_mul_d_32(tmp, norm, r[32]); + r[32] = 0; + o = sp_2048_add_32(r, r, tmp); + sp_2048_cond_sub_32(r, r, m, (sp_digit)0 - o); + } + + XMEMSET(&r[32], 0, sizeof(sp_digit) * 32); + sp_2048_mont_reduce_32(r, m, mp); + + mask = 0 - (sp_2048_cmp_32(r, m) >= 0); + sp_2048_cond_sub_32(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return err; +} + +#endif /* HAVE_FFDHE_2048 */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. + * exp Array of bytes that is the exponent. + * expLen Length of data, in bytes, in exponent. + * mod Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 256 bytes long. + * outLen Length, in bytes, of exponentiation result. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen, + mp_int* mod, byte* out, word32* outLen) +{ + int err = MP_OKAY; + sp_digit b[64], e[32], m[32]; + sp_digit* r = b; + word32 i; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + if (mp_count_bits(base) > 2048 || expLen > 256 || + mp_count_bits(mod) != 2048) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + sp_2048_from_mp(b, 32, base); + sp_2048_from_bin(e, 32, exp, expLen); + sp_2048_from_mp(m, 32, mod); + + #ifdef HAVE_FFDHE_2048 + if (base->used == 1 && base->dp[0] == 2 && m[31] == (sp_digit)-1) { +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_2048_mod_exp_2_avx2_32(r, e, expLen * 8, m); + else +#endif + err = sp_2048_mod_exp_2_32(r, e, expLen * 8, m); + } + else + #endif + { +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_2048_mod_exp_avx2_32(r, b, e, expLen * 8, m, 0); + else +#endif + err = sp_2048_mod_exp_32(r, b, e, expLen * 8, m, 0); + } + } + + if (err == MP_OKAY) { + sp_2048_to_bin(r, out); + *outLen = 256; + for (i=0; i<256 && out[i] == 0; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} +#endif +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ + int err = MP_OKAY; + sp_digit b[32], e[16], m[16]; + sp_digit* r = b; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 1024 || expBits > 1024 || + mp_count_bits(mod) != 1024) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + sp_2048_from_mp(b, 16, base); + sp_2048_from_mp(e, 16, exp); + sp_2048_from_mp(m, 16, mod); + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_2048_mod_exp_avx2_16(r, b, e, expBits, m, 0); + else +#endif + err = sp_2048_mod_exp_16(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + XMEMSET(r + 16, 0, sizeof(*r) * 16); + err = sp_2048_to_mp(r, res); + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} + +#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */ + +#endif /* !WOLFSSL_SP_NO_2048 */ + +#ifndef WOLFSSL_SP_NO_3072 +extern void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n); +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 64 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 64 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0xffffffffffffffffl; + s = 64U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 64U) <= (word32)DIGIT_BIT) { + s += 64U; + r[j] &= 0xffffffffffffffffl; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 64) { + r[j] &= 0xffffffffffffffffl; + if (j + 1 >= size) { + break; + } + s = 64 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +extern void sp_3072_to_bin(sp_digit* r, byte* a); +extern void sp_3072_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b); +extern void sp_3072_sqr_12(sp_digit* r, const sp_digit* a); +extern void sp_3072_mul_avx2_12(sp_digit* r, const sp_digit* a, const sp_digit* b); +extern void sp_3072_sqr_avx2_12(sp_digit* r, const sp_digit* a); +extern sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b); +extern sp_digit sp_3072_sub_in_place_24(sp_digit* a, const sp_digit* b); +extern sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a, const sp_digit* b); +extern void sp_3072_mul_24(sp_digit* r, const sp_digit* a, const sp_digit* b); + +extern sp_digit sp_3072_dbl_12(sp_digit* r, const sp_digit* a); +extern void sp_3072_sqr_24(sp_digit* r, const sp_digit* a); + +#ifdef HAVE_INTEL_AVX2 +extern void sp_3072_mul_avx2_24(sp_digit* r, const sp_digit* a, const sp_digit* b); +#endif /* HAVE_INTEL_AVX2 */ + +#ifdef HAVE_INTEL_AVX2 +extern void sp_3072_sqr_avx2_24(sp_digit* r, const sp_digit* a); +#endif /* HAVE_INTEL_AVX2 */ + +extern sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b); +extern sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b); +extern void sp_3072_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b); + +extern sp_digit sp_3072_dbl_24(sp_digit* r, const sp_digit* a); +extern void sp_3072_sqr_48(sp_digit* r, const sp_digit* a); + +#ifdef HAVE_INTEL_AVX2 +extern void sp_3072_mul_avx2_48(sp_digit* r, const sp_digit* a, const sp_digit* b); +#endif /* HAVE_INTEL_AVX2 */ + +#ifdef HAVE_INTEL_AVX2 +extern void sp_3072_sqr_avx2_48(sp_digit* r, const sp_digit* a); +#endif /* HAVE_INTEL_AVX2 */ + +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +/* Caclulate the bottom digit of -1/a mod 2^n. + * + * a A single precision number. + * rho Bottom word of inverse. + */ +static void sp_3072_mont_setup(const sp_digit* a, sp_digit* rho) +{ + sp_digit x, b; + + b = a[0]; + x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - b * x; /* here x*a==1 mod 2**8 */ + x *= 2 - b * x; /* here x*a==1 mod 2**16 */ + x *= 2 - b * x; /* here x*a==1 mod 2**32 */ + x *= 2 - b * x; /* here x*a==1 mod 2**64 */ + + /* rho = -1/m mod b */ + *rho = -x; +} + +extern void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, sp_digit b); +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 3072 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_3072_mont_norm_24(sp_digit* r, const sp_digit* m) +{ + XMEMSET(r, 0, sizeof(sp_digit) * 24); + + /* r = 2^n mod m */ + sp_3072_sub_in_place_24(r, m); +} + +extern sp_digit sp_3072_cond_sub_24(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m); +extern void sp_3072_mont_reduce_24(sp_digit* a, const sp_digit* m, sp_digit mp); +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_3072_mont_mul_24(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_3072_mul_24(r, a, b); + sp_3072_mont_reduce_24(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_3072_mont_sqr_24(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_3072_sqr_24(r, a); + sp_3072_mont_reduce_24(r, m, mp); +} + +extern sp_digit sp_3072_cond_sub_avx2_24(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m); +extern void sp_3072_mul_d_24(sp_digit* r, const sp_digit* a, sp_digit b); +extern void sp_3072_mul_d_avx2_24(sp_digit* r, const sp_digit* a, const sp_digit b); +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + */ +static WC_INLINE sp_digit div_3072_word_24(sp_digit d1, sp_digit d0, + sp_digit div) +{ + register sp_digit r asm("rax"); + __asm__ __volatile__ ( + "divq %3" + : "=a" (r) + : "d" (d1), "a" (d0), "r" (div) + : + ); + return r; +} +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_3072_mask_24(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<24; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 24; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +extern int64_t sp_3072_cmp_24(const sp_digit* a, const sp_digit* b); +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_3072_div_24(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[48], t2[25]; + sp_digit div, r1; + int i; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + (void)m; + + div = d[23]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 24); + r1 = sp_3072_cmp_24(&t1[24], d) >= 0; +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_3072_cond_sub_avx2_24(&t1[24], &t1[24], d, (sp_digit)0 - r1); + else +#endif + sp_3072_cond_sub_24(&t1[24], &t1[24], d, (sp_digit)0 - r1); + for (i=23; i>=0; i--) { + r1 = div_3072_word_24(t1[24 + i], t1[24 + i - 1], div); + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_3072_mul_d_avx2_24(t2, d, r1); + else +#endif + sp_3072_mul_d_24(t2, d, r1); + t1[24 + i] += sp_3072_sub_in_place_24(&t1[i], t2); + t1[24 + i] -= t2[24]; + sp_3072_mask_24(t2, d, t1[24 + i]); + t1[24 + i] += sp_3072_add_24(&t1[i], &t1[i], t2); + sp_3072_mask_24(t2, d, t1[24 + i]); + t1[24 + i] += sp_3072_add_24(&t1[i], &t1[i], t2); + } + + r1 = sp_3072_cmp_24(t1, d) >= 0; +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_3072_cond_sub_avx2_24(r, t1, d, (sp_digit)0 - r1); + else +#endif + sp_3072_cond_sub_24(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_3072_mod_24(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_3072_div_24(a, m, NULL, r); +} + +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_3072_mod_exp_24(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][48]; + sp_digit rt[48]; +#else + sp_digit* t[32]; + sp_digit* rt; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 33 * 48, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) + t[i] = td + i * 48; + rt = td + 1536; +#endif + norm = t[0]; + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_24(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 24); + if (reduceA) { + err = sp_3072_mod_24(t[1] + 24, a, m); + if (err == MP_OKAY) + err = sp_3072_mod_24(t[1], t[1], m); + } + else { + XMEMCPY(t[1] + 24, a, sizeof(sp_digit) * 24); + err = sp_3072_mod_24(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_3072_mont_sqr_24(t[ 2], t[ 1], m, mp); + sp_3072_mont_mul_24(t[ 3], t[ 2], t[ 1], m, mp); + sp_3072_mont_sqr_24(t[ 4], t[ 2], m, mp); + sp_3072_mont_mul_24(t[ 5], t[ 3], t[ 2], m, mp); + sp_3072_mont_sqr_24(t[ 6], t[ 3], m, mp); + sp_3072_mont_mul_24(t[ 7], t[ 4], t[ 3], m, mp); + sp_3072_mont_sqr_24(t[ 8], t[ 4], m, mp); + sp_3072_mont_mul_24(t[ 9], t[ 5], t[ 4], m, mp); + sp_3072_mont_sqr_24(t[10], t[ 5], m, mp); + sp_3072_mont_mul_24(t[11], t[ 6], t[ 5], m, mp); + sp_3072_mont_sqr_24(t[12], t[ 6], m, mp); + sp_3072_mont_mul_24(t[13], t[ 7], t[ 6], m, mp); + sp_3072_mont_sqr_24(t[14], t[ 7], m, mp); + sp_3072_mont_mul_24(t[15], t[ 8], t[ 7], m, mp); + sp_3072_mont_sqr_24(t[16], t[ 8], m, mp); + sp_3072_mont_mul_24(t[17], t[ 9], t[ 8], m, mp); + sp_3072_mont_sqr_24(t[18], t[ 9], m, mp); + sp_3072_mont_mul_24(t[19], t[10], t[ 9], m, mp); + sp_3072_mont_sqr_24(t[20], t[10], m, mp); + sp_3072_mont_mul_24(t[21], t[11], t[10], m, mp); + sp_3072_mont_sqr_24(t[22], t[11], m, mp); + sp_3072_mont_mul_24(t[23], t[12], t[11], m, mp); + sp_3072_mont_sqr_24(t[24], t[12], m, mp); + sp_3072_mont_mul_24(t[25], t[13], t[12], m, mp); + sp_3072_mont_sqr_24(t[26], t[13], m, mp); + sp_3072_mont_mul_24(t[27], t[14], t[13], m, mp); + sp_3072_mont_sqr_24(t[28], t[14], m, mp); + sp_3072_mont_mul_24(t[29], t[15], t[14], m, mp); + sp_3072_mont_sqr_24(t[30], t[15], m, mp); + sp_3072_mont_mul_24(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) { + c = 64; + } + if ((bits % 5) == 0) { + c -= 5; + } + else { + c -= bits % 5; + } + y = (int)(n >> c); + n <<= 64 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 24); + for (; i>=0 || c>=5; ) { + if (c >= 5) { + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + } + else if (c == 0) { + n = e[i--]; + y = (int)(n >> 59); + n <<= 5; + c = 59; + } + else { + y = (int)(n >> 59); + n = e[i--]; + c = 5 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + + sp_3072_sqr_24(rt, r); + sp_3072_mont_reduce_24(rt, m, mp); + sp_3072_sqr_24(r, rt); + sp_3072_mont_reduce_24(r, m, mp); + sp_3072_sqr_24(rt, r); + sp_3072_mont_reduce_24(rt, m, mp); + sp_3072_sqr_24(r, rt); + sp_3072_mont_reduce_24(r, m, mp); + sp_3072_sqr_24(rt, r); + sp_3072_mont_reduce_24(rt, m, mp); + + sp_3072_mul_24(r, rt, t[y]); + sp_3072_mont_reduce_24(r, m, mp); + } + + XMEMSET(&r[24], 0, sizeof(sp_digit) * 24); + sp_3072_mont_reduce_24(r, m, mp); + + mask = 0 - (sp_3072_cmp_24(r, m) >= 0); + sp_3072_cond_sub_24(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return err; +} + +extern void sp_3072_mont_reduce_avx2_24(sp_digit* a, const sp_digit* m, sp_digit mp); +#ifdef HAVE_INTEL_AVX2 +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_3072_mont_mul_avx2_24(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_3072_mul_avx2_24(r, a, b); + sp_3072_mont_reduce_avx2_24(r, m, mp); +} + +#endif /* HAVE_INTEL_AVX2 */ +#ifdef HAVE_INTEL_AVX2 +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_3072_mont_sqr_avx2_24(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_3072_sqr_avx2_24(r, a); + sp_3072_mont_reduce_avx2_24(r, m, mp); +} + +#endif /* HAVE_INTEL_AVX2 */ +#ifdef HAVE_INTEL_AVX2 +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_3072_mod_exp_avx2_24(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][48]; + sp_digit rt[48]; +#else + sp_digit* t[32]; + sp_digit* rt; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 33 * 48, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) + t[i] = td + i * 48; + rt = td + 1536; +#endif + norm = t[0]; + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_24(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 24); + if (reduceA) { + err = sp_3072_mod_24(t[1] + 24, a, m); + if (err == MP_OKAY) + err = sp_3072_mod_24(t[1], t[1], m); + } + else { + XMEMCPY(t[1] + 24, a, sizeof(sp_digit) * 24); + err = sp_3072_mod_24(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_3072_mont_sqr_avx2_24(t[ 2], t[ 1], m, mp); + sp_3072_mont_mul_avx2_24(t[ 3], t[ 2], t[ 1], m, mp); + sp_3072_mont_sqr_avx2_24(t[ 4], t[ 2], m, mp); + sp_3072_mont_mul_avx2_24(t[ 5], t[ 3], t[ 2], m, mp); + sp_3072_mont_sqr_avx2_24(t[ 6], t[ 3], m, mp); + sp_3072_mont_mul_avx2_24(t[ 7], t[ 4], t[ 3], m, mp); + sp_3072_mont_sqr_avx2_24(t[ 8], t[ 4], m, mp); + sp_3072_mont_mul_avx2_24(t[ 9], t[ 5], t[ 4], m, mp); + sp_3072_mont_sqr_avx2_24(t[10], t[ 5], m, mp); + sp_3072_mont_mul_avx2_24(t[11], t[ 6], t[ 5], m, mp); + sp_3072_mont_sqr_avx2_24(t[12], t[ 6], m, mp); + sp_3072_mont_mul_avx2_24(t[13], t[ 7], t[ 6], m, mp); + sp_3072_mont_sqr_avx2_24(t[14], t[ 7], m, mp); + sp_3072_mont_mul_avx2_24(t[15], t[ 8], t[ 7], m, mp); + sp_3072_mont_sqr_avx2_24(t[16], t[ 8], m, mp); + sp_3072_mont_mul_avx2_24(t[17], t[ 9], t[ 8], m, mp); + sp_3072_mont_sqr_avx2_24(t[18], t[ 9], m, mp); + sp_3072_mont_mul_avx2_24(t[19], t[10], t[ 9], m, mp); + sp_3072_mont_sqr_avx2_24(t[20], t[10], m, mp); + sp_3072_mont_mul_avx2_24(t[21], t[11], t[10], m, mp); + sp_3072_mont_sqr_avx2_24(t[22], t[11], m, mp); + sp_3072_mont_mul_avx2_24(t[23], t[12], t[11], m, mp); + sp_3072_mont_sqr_avx2_24(t[24], t[12], m, mp); + sp_3072_mont_mul_avx2_24(t[25], t[13], t[12], m, mp); + sp_3072_mont_sqr_avx2_24(t[26], t[13], m, mp); + sp_3072_mont_mul_avx2_24(t[27], t[14], t[13], m, mp); + sp_3072_mont_sqr_avx2_24(t[28], t[14], m, mp); + sp_3072_mont_mul_avx2_24(t[29], t[15], t[14], m, mp); + sp_3072_mont_sqr_avx2_24(t[30], t[15], m, mp); + sp_3072_mont_mul_avx2_24(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) { + c = 64; + } + if ((bits % 5) == 0) { + c -= 5; + } + else { + c -= bits % 5; + } + y = (int)(n >> c); + n <<= 64 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 24); + for (; i>=0 || c>=5; ) { + if (c >= 5) { + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + } + else if (c == 0) { + n = e[i--]; + y = (int)(n >> 59); + n <<= 5; + c = 59; + } + else { + y = (int)(n >> 59); + n = e[i--]; + c = 5 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + + sp_3072_sqr_avx2_24(rt, r); + sp_3072_mont_reduce_avx2_24(rt, m, mp); + sp_3072_sqr_avx2_24(r, rt); + sp_3072_mont_reduce_avx2_24(r, m, mp); + sp_3072_sqr_avx2_24(rt, r); + sp_3072_mont_reduce_avx2_24(rt, m, mp); + sp_3072_sqr_avx2_24(r, rt); + sp_3072_mont_reduce_avx2_24(r, m, mp); + sp_3072_sqr_avx2_24(rt, r); + sp_3072_mont_reduce_avx2_24(rt, m, mp); + + sp_3072_mul_avx2_24(r, rt, t[y]); + sp_3072_mont_reduce_avx2_24(r, m, mp); + } + + XMEMSET(&r[24], 0, sizeof(sp_digit) * 24); + sp_3072_mont_reduce_avx2_24(r, m, mp); + + mask = 0 - (sp_3072_cmp_24(r, m) >= 0); + sp_3072_cond_sub_avx2_24(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return err; +} +#endif /* HAVE_INTEL_AVX2 */ + +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 3072 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_3072_mont_norm_48(sp_digit* r, const sp_digit* m) +{ + XMEMSET(r, 0, sizeof(sp_digit) * 48); + + /* r = 2^n mod m */ + sp_3072_sub_in_place_48(r, m); +} + +#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */ +extern sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m); +extern void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, sp_digit mp); +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_3072_mont_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_3072_mul_48(r, a, b); + sp_3072_mont_reduce_48(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_3072_mont_sqr_48(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_3072_sqr_48(r, a); + sp_3072_mont_reduce_48(r, m, mp); +} + +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +extern sp_digit sp_3072_cond_sub_avx2_48(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m); +extern void sp_3072_mul_d_avx2_48(sp_digit* r, const sp_digit* a, const sp_digit b); +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + */ +static WC_INLINE sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, + sp_digit div) +{ + register sp_digit r asm("rax"); + __asm__ __volatile__ ( + "divq %3" + : "=a" (r) + : "d" (d1), "a" (d0), "r" (div) + : + ); + return r; +} +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<48; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 48; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +extern int64_t sp_3072_cmp_48(const sp_digit* a, const sp_digit* b); +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[96], t2[49]; + sp_digit div, r1; + int i; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + (void)m; + + div = d[47]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 48); + r1 = sp_3072_cmp_48(&t1[48], d) >= 0; +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_3072_cond_sub_avx2_48(&t1[48], &t1[48], d, (sp_digit)0 - r1); + else +#endif + sp_3072_cond_sub_48(&t1[48], &t1[48], d, (sp_digit)0 - r1); + for (i=47; i>=0; i--) { + r1 = div_3072_word_48(t1[48 + i], t1[48 + i - 1], div); + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_3072_mul_d_avx2_48(t2, d, r1); + else +#endif + sp_3072_mul_d_48(t2, d, r1); + t1[48 + i] += sp_3072_sub_in_place_48(&t1[i], t2); + t1[48 + i] -= t2[48]; + sp_3072_mask_48(t2, d, t1[48 + i]); + t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2); + sp_3072_mask_48(t2, d, t1[48 + i]); + t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2); + } + + r1 = sp_3072_cmp_48(t1, d) >= 0; +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_3072_cond_sub_avx2_48(r, t1, d, (sp_digit)0 - r1); + else +#endif + sp_3072_cond_sub_48(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_3072_mod_48(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_3072_div_48(a, m, NULL, r); +} + +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ +extern sp_digit sp_3072_sub_48(sp_digit* r, const sp_digit* a, const sp_digit* b); +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_3072_div_48_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[96], t2[49]; + sp_digit div, r1; + int i; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + (void)m; + + div = d[47]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 48); + for (i = 47; i > 0; i--) { + if (t1[i + 48] != d[i]) + break; + } + if (t1[i + 48] >= d[i]) { + sp_3072_sub_in_place_48(&t1[48], d); + } + for (i=47; i>=0; i--) { + r1 = div_3072_word_48(t1[48 + i], t1[48 + i - 1], div); + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_3072_mul_d_avx2_48(t2, d, r1); + else +#endif + sp_3072_mul_d_48(t2, d, r1); + t1[48 + i] += sp_3072_sub_in_place_48(&t1[i], t2); + t1[48 + i] -= t2[48]; + if (t1[48 + i] != 0) { + t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], d); + if (t1[48 + i] != 0) + t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], d); + } + } + + for (i = 47; i > 0; i--) { + if (t1[i] != d[i]) + break; + } + if (t1[i] >= d[i]) { + sp_3072_sub_48(r, t1, d); + } + else { + XMEMCPY(r, t1, sizeof(*t1) * 48); + } + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_3072_mod_48_cond(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_3072_div_48_cond(a, m, NULL, r); +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH) +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][96]; + sp_digit rt[96]; +#else + sp_digit* t[32]; + sp_digit* rt; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 33 * 96, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) + t[i] = td + i * 96; + rt = td + 3072; +#endif + norm = t[0]; + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_48(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 48); + if (reduceA) { + err = sp_3072_mod_48(t[1] + 48, a, m); + if (err == MP_OKAY) + err = sp_3072_mod_48(t[1], t[1], m); + } + else { + XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48); + err = sp_3072_mod_48(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_3072_mont_sqr_48(t[ 2], t[ 1], m, mp); + sp_3072_mont_mul_48(t[ 3], t[ 2], t[ 1], m, mp); + sp_3072_mont_sqr_48(t[ 4], t[ 2], m, mp); + sp_3072_mont_mul_48(t[ 5], t[ 3], t[ 2], m, mp); + sp_3072_mont_sqr_48(t[ 6], t[ 3], m, mp); + sp_3072_mont_mul_48(t[ 7], t[ 4], t[ 3], m, mp); + sp_3072_mont_sqr_48(t[ 8], t[ 4], m, mp); + sp_3072_mont_mul_48(t[ 9], t[ 5], t[ 4], m, mp); + sp_3072_mont_sqr_48(t[10], t[ 5], m, mp); + sp_3072_mont_mul_48(t[11], t[ 6], t[ 5], m, mp); + sp_3072_mont_sqr_48(t[12], t[ 6], m, mp); + sp_3072_mont_mul_48(t[13], t[ 7], t[ 6], m, mp); + sp_3072_mont_sqr_48(t[14], t[ 7], m, mp); + sp_3072_mont_mul_48(t[15], t[ 8], t[ 7], m, mp); + sp_3072_mont_sqr_48(t[16], t[ 8], m, mp); + sp_3072_mont_mul_48(t[17], t[ 9], t[ 8], m, mp); + sp_3072_mont_sqr_48(t[18], t[ 9], m, mp); + sp_3072_mont_mul_48(t[19], t[10], t[ 9], m, mp); + sp_3072_mont_sqr_48(t[20], t[10], m, mp); + sp_3072_mont_mul_48(t[21], t[11], t[10], m, mp); + sp_3072_mont_sqr_48(t[22], t[11], m, mp); + sp_3072_mont_mul_48(t[23], t[12], t[11], m, mp); + sp_3072_mont_sqr_48(t[24], t[12], m, mp); + sp_3072_mont_mul_48(t[25], t[13], t[12], m, mp); + sp_3072_mont_sqr_48(t[26], t[13], m, mp); + sp_3072_mont_mul_48(t[27], t[14], t[13], m, mp); + sp_3072_mont_sqr_48(t[28], t[14], m, mp); + sp_3072_mont_mul_48(t[29], t[15], t[14], m, mp); + sp_3072_mont_sqr_48(t[30], t[15], m, mp); + sp_3072_mont_mul_48(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) { + c = 64; + } + if ((bits % 5) == 0) { + c -= 5; + } + else { + c -= bits % 5; + } + y = (int)(n >> c); + n <<= 64 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 48); + for (; i>=0 || c>=5; ) { + if (c >= 5) { + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + } + else if (c == 0) { + n = e[i--]; + y = (int)(n >> 59); + n <<= 5; + c = 59; + } + else { + y = (int)(n >> 59); + n = e[i--]; + c = 5 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + + sp_3072_sqr_48(rt, r); + sp_3072_mont_reduce_48(rt, m, mp); + sp_3072_sqr_48(r, rt); + sp_3072_mont_reduce_48(r, m, mp); + sp_3072_sqr_48(rt, r); + sp_3072_mont_reduce_48(rt, m, mp); + sp_3072_sqr_48(r, rt); + sp_3072_mont_reduce_48(r, m, mp); + sp_3072_sqr_48(rt, r); + sp_3072_mont_reduce_48(rt, m, mp); + + sp_3072_mul_48(r, rt, t[y]); + sp_3072_mont_reduce_48(r, m, mp); + } + + XMEMSET(&r[48], 0, sizeof(sp_digit) * 48); + sp_3072_mont_reduce_48(r, m, mp); + + mask = 0 - (sp_3072_cmp_48(r, m) >= 0); + sp_3072_cond_sub_48(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return err; +} +#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */ + +extern void sp_3072_mont_reduce_avx2_48(sp_digit* a, const sp_digit* m, sp_digit mp); +#ifdef HAVE_INTEL_AVX2 +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_3072_mont_mul_avx2_48(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_3072_mul_avx2_48(r, a, b); + sp_3072_mont_reduce_avx2_48(r, m, mp); +} + +#endif /* HAVE_INTEL_AVX2 */ +#ifdef HAVE_INTEL_AVX2 +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_3072_mont_sqr_avx2_48(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_3072_sqr_avx2_48(r, a); + sp_3072_mont_reduce_avx2_48(r, m, mp); +} + +#endif /* HAVE_INTEL_AVX2 */ +#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH) +#ifdef HAVE_INTEL_AVX2 +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_3072_mod_exp_avx2_48(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][96]; + sp_digit rt[96]; +#else + sp_digit* t[32]; + sp_digit* rt; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 33 * 96, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) + t[i] = td + i * 96; + rt = td + 3072; +#endif + norm = t[0]; + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_48(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 48); + if (reduceA) { + err = sp_3072_mod_48(t[1] + 48, a, m); + if (err == MP_OKAY) + err = sp_3072_mod_48(t[1], t[1], m); + } + else { + XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48); + err = sp_3072_mod_48(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_3072_mont_sqr_avx2_48(t[ 2], t[ 1], m, mp); + sp_3072_mont_mul_avx2_48(t[ 3], t[ 2], t[ 1], m, mp); + sp_3072_mont_sqr_avx2_48(t[ 4], t[ 2], m, mp); + sp_3072_mont_mul_avx2_48(t[ 5], t[ 3], t[ 2], m, mp); + sp_3072_mont_sqr_avx2_48(t[ 6], t[ 3], m, mp); + sp_3072_mont_mul_avx2_48(t[ 7], t[ 4], t[ 3], m, mp); + sp_3072_mont_sqr_avx2_48(t[ 8], t[ 4], m, mp); + sp_3072_mont_mul_avx2_48(t[ 9], t[ 5], t[ 4], m, mp); + sp_3072_mont_sqr_avx2_48(t[10], t[ 5], m, mp); + sp_3072_mont_mul_avx2_48(t[11], t[ 6], t[ 5], m, mp); + sp_3072_mont_sqr_avx2_48(t[12], t[ 6], m, mp); + sp_3072_mont_mul_avx2_48(t[13], t[ 7], t[ 6], m, mp); + sp_3072_mont_sqr_avx2_48(t[14], t[ 7], m, mp); + sp_3072_mont_mul_avx2_48(t[15], t[ 8], t[ 7], m, mp); + sp_3072_mont_sqr_avx2_48(t[16], t[ 8], m, mp); + sp_3072_mont_mul_avx2_48(t[17], t[ 9], t[ 8], m, mp); + sp_3072_mont_sqr_avx2_48(t[18], t[ 9], m, mp); + sp_3072_mont_mul_avx2_48(t[19], t[10], t[ 9], m, mp); + sp_3072_mont_sqr_avx2_48(t[20], t[10], m, mp); + sp_3072_mont_mul_avx2_48(t[21], t[11], t[10], m, mp); + sp_3072_mont_sqr_avx2_48(t[22], t[11], m, mp); + sp_3072_mont_mul_avx2_48(t[23], t[12], t[11], m, mp); + sp_3072_mont_sqr_avx2_48(t[24], t[12], m, mp); + sp_3072_mont_mul_avx2_48(t[25], t[13], t[12], m, mp); + sp_3072_mont_sqr_avx2_48(t[26], t[13], m, mp); + sp_3072_mont_mul_avx2_48(t[27], t[14], t[13], m, mp); + sp_3072_mont_sqr_avx2_48(t[28], t[14], m, mp); + sp_3072_mont_mul_avx2_48(t[29], t[15], t[14], m, mp); + sp_3072_mont_sqr_avx2_48(t[30], t[15], m, mp); + sp_3072_mont_mul_avx2_48(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) { + c = 64; + } + if ((bits % 5) == 0) { + c -= 5; + } + else { + c -= bits % 5; + } + y = (int)(n >> c); + n <<= 64 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 48); + for (; i>=0 || c>=5; ) { + if (c >= 5) { + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + } + else if (c == 0) { + n = e[i--]; + y = (int)(n >> 59); + n <<= 5; + c = 59; + } + else { + y = (int)(n >> 59); + n = e[i--]; + c = 5 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + + sp_3072_sqr_avx2_48(rt, r); + sp_3072_mont_reduce_avx2_48(rt, m, mp); + sp_3072_sqr_avx2_48(r, rt); + sp_3072_mont_reduce_avx2_48(r, m, mp); + sp_3072_sqr_avx2_48(rt, r); + sp_3072_mont_reduce_avx2_48(rt, m, mp); + sp_3072_sqr_avx2_48(r, rt); + sp_3072_mont_reduce_avx2_48(r, m, mp); + sp_3072_sqr_avx2_48(rt, r); + sp_3072_mont_reduce_avx2_48(rt, m, mp); + + sp_3072_mul_avx2_48(r, rt, t[y]); + sp_3072_mont_reduce_avx2_48(r, m, mp); + } + + XMEMSET(&r[48], 0, sizeof(sp_digit) * 48); + sp_3072_mont_reduce_avx2_48(r, m, mp); + + mask = 0 - (sp_3072_cmp_48(r, m) >= 0); + sp_3072_cond_sub_avx2_48(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return err; +} +#endif /* HAVE_INTEL_AVX2 */ +#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */ + +#ifdef WOLFSSL_HAVE_SP_RSA +/* RSA public key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * em Public exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 384 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm, + byte* out, word32* outLen) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit ad[96], md[48], rd[96]; +#else + sp_digit* d = NULL; +#endif + sp_digit* a; + sp_digit *ah; + sp_digit* m; + sp_digit* r; + sp_digit e = 0; + int err = MP_OKAY; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + if (*outLen < 384) + err = MP_TO_E; + if (err == MP_OKAY && (mp_count_bits(em) > 64 || inLen > 384 || + mp_count_bits(mm) != 3072)) + err = MP_READ_E; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 48 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + a = d; + r = a + 48 * 2; + m = r + 48 * 2; + ah = a + 48; + } +#else + a = ad; + m = md; + r = rd; + ah = a + 48; +#endif + + if (err == MP_OKAY) { + sp_3072_from_bin(ah, 48, in, inLen); +#if DIGIT_BIT >= 64 + e = em->dp[0]; +#else + e = em->dp[0]; + if (em->used > 1) + e |= ((sp_digit)em->dp[1]) << DIGIT_BIT; +#endif + if (e == 0) + err = MP_EXPTMOD_E; + } + if (err == MP_OKAY) { + sp_3072_from_mp(m, 48, mm); + + if (e == 0x3) { +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + if (err == MP_OKAY) { + sp_3072_sqr_avx2_48(r, ah); + err = sp_3072_mod_48_cond(r, r, m); + } + if (err == MP_OKAY) { + sp_3072_mul_avx2_48(r, ah, r); + err = sp_3072_mod_48_cond(r, r, m); + } + } + else +#endif + { + if (err == MP_OKAY) { + sp_3072_sqr_48(r, ah); + err = sp_3072_mod_48_cond(r, r, m); + } + if (err == MP_OKAY) { + sp_3072_mul_48(r, ah, r); + err = sp_3072_mod_48_cond(r, r, m); + } + } + } + else { + int i; + sp_digit mp; + + sp_3072_mont_setup(m, &mp); + + /* Convert to Montgomery form. */ + XMEMSET(a, 0, sizeof(sp_digit) * 48); + err = sp_3072_mod_48_cond(a, a, m); + + if (err == MP_OKAY) { + for (i=63; i>=0; i--) { + if (e >> i) { + break; + } + } + + XMEMCPY(r, a, sizeof(sp_digit) * 48); +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + for (i--; i>=0; i--) { + sp_3072_mont_sqr_avx2_48(r, r, m, mp); + if (((e >> i) & 1) == 1) { + sp_3072_mont_mul_avx2_48(r, r, a, m, mp); + } + } + XMEMSET(&r[48], 0, sizeof(sp_digit) * 48); + sp_3072_mont_reduce_avx2_48(r, m, mp); + } + else +#endif + { + for (i--; i>=0; i--) { + sp_3072_mont_sqr_48(r, r, m, mp); + if (((e >> i) & 1) == 1) { + sp_3072_mont_mul_48(r, r, a, m, mp); + } + } + XMEMSET(&r[48], 0, sizeof(sp_digit) * 48); + sp_3072_mont_reduce_48(r, m, mp); + } + + for (i = 47; i > 0; i--) { + if (r[i] != m[i]) + break; + } + if (r[i] >= m[i]) + sp_3072_sub_in_place_48(r, m); + } + } + } + + if (err == MP_OKAY) { + sp_3072_to_bin(r, out); + *outLen = 384; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) + XFREE(d, NULL, DYNAMIC_TYPE_RSA); +#endif + + return err; +} + +#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) +/* RSA private key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * pm First prime. + * qm Second prime. + * dpm First prime's CRT exponent. + * dqm Second prime's CRT exponent. + * qim Inverse of second prime mod p. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 384 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, + mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm, + byte* out, word32* outLen) +{ +#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK) + sp_digit a[96], d[48], m[48]; +#else + sp_digit* d = NULL; + sp_digit* a; + sp_digit* m; +#endif + sp_digit* r; + int err = MP_OKAY; + + (void)pm; + (void)qm; + (void)dpm; + (void)dqm; + (void)qim; + + if (*outLen < 384U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(dm) > 3072) { + err = MP_READ_E; + } + if (inLen > 384U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 3072) { + err = MP_READ_E; + } + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 48 * 4, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + a = d + 48; + m = a + 96; +#endif + r = a; + + sp_3072_from_bin(a, 48, in, inLen); + sp_3072_from_mp(d, 48, dm); + sp_3072_from_mp(m, 48, mm); + err = sp_3072_mod_exp_48(r, a, d, 3072, m, 0); + } + + if (err == MP_OKAY) { + sp_3072_to_bin(r, out); + *outLen = 384; + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 48); + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } +#else + XMEMSET(d, 0, sizeof(sp_digit) * 48); +#endif + + return err; +} + +#else +extern sp_digit sp_3072_cond_add_24(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m); +extern sp_digit sp_3072_cond_add_avx2_24(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m); +/* RSA private key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * pm First prime. + * qm Second prime. + * dpm First prime's CRT exponent. + * dqm Second prime's CRT exponent. + * qim Inverse of second prime mod p. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 384 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, + mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm, + byte* out, word32* outLen) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit ad[48 * 2]; + sp_digit pd[24], qd[24], dpd[24]; + sp_digit tmpad[48], tmpbd[48]; +#else + sp_digit* t = NULL; +#endif + sp_digit* a; + sp_digit* p; + sp_digit* q; + sp_digit* dp; + sp_digit* dq; + sp_digit* qi; + sp_digit* tmpa; + sp_digit* tmpb; + sp_digit* r; + sp_digit c; + int err = MP_OKAY; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + (void)dm; + (void)mm; + + if (*outLen < 384) + err = MP_TO_E; + if (err == MP_OKAY && (inLen > 384 || mp_count_bits(mm) != 3072)) + err = MP_READ_E; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 24 * 11, NULL, + DYNAMIC_TYPE_RSA); + if (t == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + a = t; + p = a + 48 * 2; + q = p + 24; + qi = dq = dp = q + 24; + tmpa = qi + 24; + tmpb = tmpa + 48; + + r = t + 48; + } +#else + r = a = ad; + p = pd; + q = qd; + qi = dq = dp = dpd; + tmpa = tmpad; + tmpb = tmpbd; +#endif + + if (err == MP_OKAY) { + sp_3072_from_bin(a, 48, in, inLen); + sp_3072_from_mp(p, 24, pm); + sp_3072_from_mp(q, 24, qm); + sp_3072_from_mp(dp, 24, dpm); + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_3072_mod_exp_avx2_24(tmpa, a, dp, 1536, p, 1); + else +#endif + err = sp_3072_mod_exp_24(tmpa, a, dp, 1536, p, 1); + } + if (err == MP_OKAY) { + sp_3072_from_mp(dq, 24, dqm); +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_3072_mod_exp_avx2_24(tmpb, a, dq, 1536, q, 1); + else +#endif + err = sp_3072_mod_exp_24(tmpb, a, dq, 1536, q, 1); + } + + if (err == MP_OKAY) { + c = sp_3072_sub_in_place_24(tmpa, tmpb); +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + c += sp_3072_cond_add_avx2_24(tmpa, tmpa, p, c); + sp_3072_cond_add_avx2_24(tmpa, tmpa, p, c); + } + else +#endif + { + c += sp_3072_cond_add_24(tmpa, tmpa, p, c); + sp_3072_cond_add_24(tmpa, tmpa, p, c); + } + + sp_3072_from_mp(qi, 24, qim); +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + sp_3072_mul_avx2_24(tmpa, tmpa, qi); + } + else +#endif + { + sp_3072_mul_24(tmpa, tmpa, qi); + } + err = sp_3072_mod_24(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + sp_3072_mul_avx2_24(tmpa, q, tmpa); + } + else +#endif + { + sp_3072_mul_24(tmpa, q, tmpa); + } + XMEMSET(&tmpb[24], 0, sizeof(sp_digit) * 24); + sp_3072_add_48(r, tmpb, tmpa); + + sp_3072_to_bin(r, out); + *outLen = 384; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_digit) * 24 * 11); + XFREE(t, NULL, DYNAMIC_TYPE_RSA); + } +#else + XMEMSET(tmpad, 0, sizeof(tmpad)); + XMEMSET(tmpbd, 0, sizeof(tmpbd)); + XMEMSET(pd, 0, sizeof(pd)); + XMEMSET(qd, 0, sizeof(qd)); + XMEMSET(dpd, 0, sizeof(dpd)); +#endif + + return err; +} +#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */ +#endif /* WOLFSSL_HAVE_SP_RSA */ +#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY)) +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_3072_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (3072 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 64 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 48); + r->used = 48; + mp_clamp(r); +#elif DIGIT_BIT < 64 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 48; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 64) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 64 - s; + } + r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 48; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 64 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 64 - s; + } + else { + s += 64; + } + } + r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ + int err = MP_OKAY; + sp_digit b[96], e[48], m[48]; + sp_digit* r = b; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 3072 || expBits > 3072 || + mp_count_bits(mod) != 3072) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + sp_3072_from_mp(b, 48, base); + sp_3072_from_mp(e, 48, exp); + sp_3072_from_mp(m, 48, mod); + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_3072_mod_exp_avx2_48(r, b, e, expBits, m, 0); + else +#endif + err = sp_3072_mod_exp_48(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + err = sp_3072_to_mp(r, res); + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} + +#ifdef WOLFSSL_HAVE_SP_DH +#ifdef HAVE_FFDHE_3072 +extern void sp_3072_lshift_48(sp_digit* r, const sp_digit* a, int n); +#ifdef HAVE_INTEL_AVX2 +/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m) + * + * r A single precision number that is the result of the operation. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_3072_mod_exp_2_avx2_48(sp_digit* r, const sp_digit* e, int bits, + const sp_digit* m) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit nd[96]; + sp_digit td[49]; +#else + sp_digit* td; +#endif + sp_digit* norm; + sp_digit* tmp; + sp_digit mp = 1; + sp_digit n, o; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 145, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + norm = td; + tmp = td + 96; +#else + norm = nd; + tmp = td; +#endif + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_48(norm, m); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) { + c = 64; + } + if ((bits % 6) == 0) { + c -= 6; + } + else { + c -= bits % 6; + } + y = (int)(n >> c); + n <<= 64 - c; + sp_3072_lshift_48(r, norm, y); + for (; i>=0 || c>=6; ) { + if (c == 0) { + n = e[i--]; + y = (int)(n >> 58); + n <<= 6; + c = 58; + } + else if (c < 6) { + y = (int)(n >> 58); + n = e[i--]; + c = 6 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + else { + y = (n >> 58) & 0x3f; + n <<= 6; + c -= 6; + } + + sp_3072_mont_sqr_avx2_48(r, r, m, mp); + sp_3072_mont_sqr_avx2_48(r, r, m, mp); + sp_3072_mont_sqr_avx2_48(r, r, m, mp); + sp_3072_mont_sqr_avx2_48(r, r, m, mp); + sp_3072_mont_sqr_avx2_48(r, r, m, mp); + sp_3072_mont_sqr_avx2_48(r, r, m, mp); + + sp_3072_lshift_48(r, r, y); + sp_3072_mul_d_avx2_48(tmp, norm, r[48]); + r[48] = 0; + o = sp_3072_add_48(r, r, tmp); + sp_3072_cond_sub_avx2_48(r, r, m, (sp_digit)0 - o); + } + + XMEMSET(&r[48], 0, sizeof(sp_digit) * 48); + sp_3072_mont_reduce_avx2_48(r, m, mp); + + mask = 0 - (sp_3072_cmp_48(r, m) >= 0); + sp_3072_cond_sub_avx2_48(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return err; +} +#endif /* HAVE_INTEL_AVX2 */ + +/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m) + * + * r A single precision number that is the result of the operation. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_3072_mod_exp_2_48(sp_digit* r, const sp_digit* e, int bits, + const sp_digit* m) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit nd[96]; + sp_digit td[49]; +#else + sp_digit* td; +#endif + sp_digit* norm; + sp_digit* tmp; + sp_digit mp = 1; + sp_digit n, o; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 145, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + norm = td; + tmp = td + 96; +#else + norm = nd; + tmp = td; +#endif + + sp_3072_mont_setup(m, &mp); + sp_3072_mont_norm_48(norm, m); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) { + c = 64; + } + if ((bits % 6) == 0) { + c -= 6; + } + else { + c -= bits % 6; + } + y = (int)(n >> c); + n <<= 64 - c; + sp_3072_lshift_48(r, norm, y); + for (; i>=0 || c>=6; ) { + if (c == 0) { + n = e[i--]; + y = (int)(n >> 58); + n <<= 6; + c = 58; + } + else if (c < 6) { + y = (int)(n >> 58); + n = e[i--]; + c = 6 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + else { + y = (n >> 58) & 0x3f; + n <<= 6; + c -= 6; + } + + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + sp_3072_mont_sqr_48(r, r, m, mp); + + sp_3072_lshift_48(r, r, y); + sp_3072_mul_d_48(tmp, norm, r[48]); + r[48] = 0; + o = sp_3072_add_48(r, r, tmp); + sp_3072_cond_sub_48(r, r, m, (sp_digit)0 - o); + } + + XMEMSET(&r[48], 0, sizeof(sp_digit) * 48); + sp_3072_mont_reduce_48(r, m, mp); + + mask = 0 - (sp_3072_cmp_48(r, m) >= 0); + sp_3072_cond_sub_48(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return err; +} + +#endif /* HAVE_FFDHE_3072 */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. + * exp Array of bytes that is the exponent. + * expLen Length of data, in bytes, in exponent. + * mod Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 384 bytes long. + * outLen Length, in bytes, of exponentiation result. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen, + mp_int* mod, byte* out, word32* outLen) +{ + int err = MP_OKAY; + sp_digit b[96], e[48], m[48]; + sp_digit* r = b; + word32 i; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + if (mp_count_bits(base) > 3072 || expLen > 384 || + mp_count_bits(mod) != 3072) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + sp_3072_from_mp(b, 48, base); + sp_3072_from_bin(e, 48, exp, expLen); + sp_3072_from_mp(m, 48, mod); + + #ifdef HAVE_FFDHE_3072 + if (base->used == 1 && base->dp[0] == 2 && m[47] == (sp_digit)-1) { +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_3072_mod_exp_2_avx2_48(r, e, expLen * 8, m); + else +#endif + err = sp_3072_mod_exp_2_48(r, e, expLen * 8, m); + } + else + #endif + { +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_3072_mod_exp_avx2_48(r, b, e, expLen * 8, m, 0); + else +#endif + err = sp_3072_mod_exp_48(r, b, e, expLen * 8, m, 0); + } + } + + if (err == MP_OKAY) { + sp_3072_to_bin(r, out); + *outLen = 384; + for (i=0; i<384 && out[i] == 0; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} +#endif +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ + int err = MP_OKAY; + sp_digit b[48], e[24], m[24]; + sp_digit* r = b; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 1536 || expBits > 1536 || + mp_count_bits(mod) != 1536) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + sp_3072_from_mp(b, 24, base); + sp_3072_from_mp(e, 24, exp); + sp_3072_from_mp(m, 24, mod); + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_3072_mod_exp_avx2_24(r, b, e, expBits, m, 0); + else +#endif + err = sp_3072_mod_exp_24(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + XMEMSET(r + 24, 0, sizeof(*r) * 24); + err = sp_3072_to_mp(r, res); + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} + +#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */ + +#endif /* !WOLFSSL_SP_NO_3072 */ + +#ifdef WOLFSSL_SP_4096 +extern void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n); +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 64 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 64 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0xffffffffffffffffl; + s = 64U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 64U) <= (word32)DIGIT_BIT) { + s += 64U; + r[j] &= 0xffffffffffffffffl; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 64) { + r[j] &= 0xffffffffffffffffl; + if (j + 1 >= size) { + break; + } + s = 64 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +extern void sp_4096_to_bin(sp_digit* r, byte* a); +extern sp_digit sp_4096_sub_in_place_64(sp_digit* a, const sp_digit* b); +extern sp_digit sp_4096_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b); +extern void sp_4096_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b); + +extern sp_digit sp_2048_dbl_32(sp_digit* r, const sp_digit* a); +extern void sp_4096_sqr_64(sp_digit* r, const sp_digit* a); + +#ifdef HAVE_INTEL_AVX2 +extern void sp_4096_mul_avx2_64(sp_digit* r, const sp_digit* a, const sp_digit* b); +#endif /* HAVE_INTEL_AVX2 */ + +#ifdef HAVE_INTEL_AVX2 +extern void sp_4096_sqr_avx2_64(sp_digit* r, const sp_digit* a); +#endif /* HAVE_INTEL_AVX2 */ + +/* Caclulate the bottom digit of -1/a mod 2^n. + * + * a A single precision number. + * rho Bottom word of inverse. + */ +static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho) +{ + sp_digit x, b; + + b = a[0]; + x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - b * x; /* here x*a==1 mod 2**8 */ + x *= 2 - b * x; /* here x*a==1 mod 2**16 */ + x *= 2 - b * x; /* here x*a==1 mod 2**32 */ + x *= 2 - b * x; /* here x*a==1 mod 2**64 */ + + /* rho = -1/m mod b */ + *rho = -x; +} + +extern void sp_4096_mul_d_64(sp_digit* r, const sp_digit* a, sp_digit b); +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 4096 bits, just need to subtract. + * + * r A single precision number. + * m A single precision number. + */ +static void sp_4096_mont_norm_64(sp_digit* r, const sp_digit* m) +{ + XMEMSET(r, 0, sizeof(sp_digit) * 64); + + /* r = 2^n mod m */ + sp_4096_sub_in_place_64(r, m); +} + +#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */ +extern sp_digit sp_4096_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m); +extern void sp_4096_mont_reduce_64(sp_digit* a, const sp_digit* m, sp_digit mp); +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_4096_mul_64(r, a, b); + sp_4096_mont_reduce_64(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_sqr_64(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_4096_sqr_64(r, a); + sp_4096_mont_reduce_64(r, m, mp); +} + +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +extern sp_digit sp_4096_cond_sub_avx2_64(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m); +extern void sp_4096_mul_d_avx2_64(sp_digit* r, const sp_digit* a, const sp_digit b); +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + */ +static WC_INLINE sp_digit div_4096_word_64(sp_digit d1, sp_digit d0, + sp_digit div) +{ + register sp_digit r asm("rax"); + __asm__ __volatile__ ( + "divq %3" + : "=a" (r) + : "d" (d1), "a" (d0), "r" (div) + : + ); + return r; +} +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_4096_mask_64(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<64; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 64; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +extern int64_t sp_4096_cmp_64(const sp_digit* a, const sp_digit* b); +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_div_64(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[128], t2[65]; + sp_digit div, r1; + int i; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + (void)m; + + div = d[63]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 64); + r1 = sp_4096_cmp_64(&t1[64], d) >= 0; +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_4096_cond_sub_avx2_64(&t1[64], &t1[64], d, (sp_digit)0 - r1); + else +#endif + sp_4096_cond_sub_64(&t1[64], &t1[64], d, (sp_digit)0 - r1); + for (i=63; i>=0; i--) { + r1 = div_4096_word_64(t1[64 + i], t1[64 + i - 1], div); + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_4096_mul_d_avx2_64(t2, d, r1); + else +#endif + sp_4096_mul_d_64(t2, d, r1); + t1[64 + i] += sp_4096_sub_in_place_64(&t1[i], t2); + t1[64 + i] -= t2[64]; + sp_4096_mask_64(t2, d, t1[64 + i]); + t1[64 + i] += sp_4096_add_64(&t1[i], &t1[i], t2); + sp_4096_mask_64(t2, d, t1[64 + i]); + t1[64 + i] += sp_4096_add_64(&t1[i], &t1[i], t2); + } + + r1 = sp_4096_cmp_64(t1, d) >= 0; +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_4096_cond_sub_avx2_64(r, t1, d, (sp_digit)0 - r1); + else +#endif + sp_4096_cond_sub_64(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_mod_64(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_4096_div_64(a, m, NULL, r); +} + +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ +extern sp_digit sp_4096_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b); +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_div_64_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[128], t2[65]; + sp_digit div, r1; + int i; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + (void)m; + + div = d[63]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 64); + for (i = 63; i > 0; i--) { + if (t1[i + 64] != d[i]) + break; + } + if (t1[i + 64] >= d[i]) { + sp_4096_sub_in_place_64(&t1[64], d); + } + for (i=63; i>=0; i--) { + r1 = div_4096_word_64(t1[64 + i], t1[64 + i - 1], div); + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_4096_mul_d_avx2_64(t2, d, r1); + else +#endif + sp_4096_mul_d_64(t2, d, r1); + t1[64 + i] += sp_4096_sub_in_place_64(&t1[i], t2); + t1[64 + i] -= t2[64]; + if (t1[64 + i] != 0) { + t1[64 + i] += sp_4096_add_64(&t1[i], &t1[i], d); + if (t1[64 + i] != 0) + t1[64 + i] += sp_4096_add_64(&t1[i], &t1[i], d); + } + } + + for (i = 63; i > 0; i--) { + if (t1[i] != d[i]) + break; + } + if (t1[i] >= d[i]) { + sp_4096_sub_64(r, t1, d); + } + else { + XMEMCPY(r, t1, sizeof(*t1) * 64); + } + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_mod_64_cond(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_4096_div_64_cond(a, m, NULL, r); +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH) +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][128]; + sp_digit rt[128]; +#else + sp_digit* t[32]; + sp_digit* rt; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 33 * 128, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) + t[i] = td + i * 128; + rt = td + 4096; +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_64(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 64); + if (reduceA) { + err = sp_4096_mod_64(t[1] + 64, a, m); + if (err == MP_OKAY) + err = sp_4096_mod_64(t[1], t[1], m); + } + else { + XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64); + err = sp_4096_mod_64(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_4096_mont_sqr_64(t[ 2], t[ 1], m, mp); + sp_4096_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp); + sp_4096_mont_sqr_64(t[ 4], t[ 2], m, mp); + sp_4096_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp); + sp_4096_mont_sqr_64(t[ 6], t[ 3], m, mp); + sp_4096_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp); + sp_4096_mont_sqr_64(t[ 8], t[ 4], m, mp); + sp_4096_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp); + sp_4096_mont_sqr_64(t[10], t[ 5], m, mp); + sp_4096_mont_mul_64(t[11], t[ 6], t[ 5], m, mp); + sp_4096_mont_sqr_64(t[12], t[ 6], m, mp); + sp_4096_mont_mul_64(t[13], t[ 7], t[ 6], m, mp); + sp_4096_mont_sqr_64(t[14], t[ 7], m, mp); + sp_4096_mont_mul_64(t[15], t[ 8], t[ 7], m, mp); + sp_4096_mont_sqr_64(t[16], t[ 8], m, mp); + sp_4096_mont_mul_64(t[17], t[ 9], t[ 8], m, mp); + sp_4096_mont_sqr_64(t[18], t[ 9], m, mp); + sp_4096_mont_mul_64(t[19], t[10], t[ 9], m, mp); + sp_4096_mont_sqr_64(t[20], t[10], m, mp); + sp_4096_mont_mul_64(t[21], t[11], t[10], m, mp); + sp_4096_mont_sqr_64(t[22], t[11], m, mp); + sp_4096_mont_mul_64(t[23], t[12], t[11], m, mp); + sp_4096_mont_sqr_64(t[24], t[12], m, mp); + sp_4096_mont_mul_64(t[25], t[13], t[12], m, mp); + sp_4096_mont_sqr_64(t[26], t[13], m, mp); + sp_4096_mont_mul_64(t[27], t[14], t[13], m, mp); + sp_4096_mont_sqr_64(t[28], t[14], m, mp); + sp_4096_mont_mul_64(t[29], t[15], t[14], m, mp); + sp_4096_mont_sqr_64(t[30], t[15], m, mp); + sp_4096_mont_mul_64(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) { + c = 64; + } + if ((bits % 5) == 0) { + c -= 5; + } + else { + c -= bits % 5; + } + y = (int)(n >> c); + n <<= 64 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 64); + for (; i>=0 || c>=5; ) { + if (c >= 5) { + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + } + else if (c == 0) { + n = e[i--]; + y = (int)(n >> 59); + n <<= 5; + c = 59; + } + else { + y = (int)(n >> 59); + n = e[i--]; + c = 5 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + + sp_4096_sqr_64(rt, r); + sp_4096_mont_reduce_64(rt, m, mp); + sp_4096_sqr_64(r, rt); + sp_4096_mont_reduce_64(r, m, mp); + sp_4096_sqr_64(rt, r); + sp_4096_mont_reduce_64(rt, m, mp); + sp_4096_sqr_64(r, rt); + sp_4096_mont_reduce_64(r, m, mp); + sp_4096_sqr_64(rt, r); + sp_4096_mont_reduce_64(rt, m, mp); + + sp_4096_mul_64(r, rt, t[y]); + sp_4096_mont_reduce_64(r, m, mp); + } + + XMEMSET(&r[64], 0, sizeof(sp_digit) * 64); + sp_4096_mont_reduce_64(r, m, mp); + + mask = 0 - (sp_4096_cmp_64(r, m) >= 0); + sp_4096_cond_sub_64(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return err; +} +#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */ + +extern void sp_4096_mont_reduce_avx2_64(sp_digit* a, const sp_digit* m, sp_digit mp); +#ifdef HAVE_INTEL_AVX2 +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_mul_avx2_64(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_4096_mul_avx2_64(r, a, b); + sp_4096_mont_reduce_avx2_64(r, m, mp); +} + +#endif /* HAVE_INTEL_AVX2 */ +#ifdef HAVE_INTEL_AVX2 +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_sqr_avx2_64(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_4096_sqr_avx2_64(r, a); + sp_4096_mont_reduce_avx2_64(r, m, mp); +} + +#endif /* HAVE_INTEL_AVX2 */ +#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH) +#ifdef HAVE_INTEL_AVX2 +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_avx2_64(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][128]; + sp_digit rt[128]; +#else + sp_digit* t[32]; + sp_digit* rt; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 33 * 128, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) + t[i] = td + i * 128; + rt = td + 4096; +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_64(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 64); + if (reduceA) { + err = sp_4096_mod_64(t[1] + 64, a, m); + if (err == MP_OKAY) + err = sp_4096_mod_64(t[1], t[1], m); + } + else { + XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64); + err = sp_4096_mod_64(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_4096_mont_sqr_avx2_64(t[ 2], t[ 1], m, mp); + sp_4096_mont_mul_avx2_64(t[ 3], t[ 2], t[ 1], m, mp); + sp_4096_mont_sqr_avx2_64(t[ 4], t[ 2], m, mp); + sp_4096_mont_mul_avx2_64(t[ 5], t[ 3], t[ 2], m, mp); + sp_4096_mont_sqr_avx2_64(t[ 6], t[ 3], m, mp); + sp_4096_mont_mul_avx2_64(t[ 7], t[ 4], t[ 3], m, mp); + sp_4096_mont_sqr_avx2_64(t[ 8], t[ 4], m, mp); + sp_4096_mont_mul_avx2_64(t[ 9], t[ 5], t[ 4], m, mp); + sp_4096_mont_sqr_avx2_64(t[10], t[ 5], m, mp); + sp_4096_mont_mul_avx2_64(t[11], t[ 6], t[ 5], m, mp); + sp_4096_mont_sqr_avx2_64(t[12], t[ 6], m, mp); + sp_4096_mont_mul_avx2_64(t[13], t[ 7], t[ 6], m, mp); + sp_4096_mont_sqr_avx2_64(t[14], t[ 7], m, mp); + sp_4096_mont_mul_avx2_64(t[15], t[ 8], t[ 7], m, mp); + sp_4096_mont_sqr_avx2_64(t[16], t[ 8], m, mp); + sp_4096_mont_mul_avx2_64(t[17], t[ 9], t[ 8], m, mp); + sp_4096_mont_sqr_avx2_64(t[18], t[ 9], m, mp); + sp_4096_mont_mul_avx2_64(t[19], t[10], t[ 9], m, mp); + sp_4096_mont_sqr_avx2_64(t[20], t[10], m, mp); + sp_4096_mont_mul_avx2_64(t[21], t[11], t[10], m, mp); + sp_4096_mont_sqr_avx2_64(t[22], t[11], m, mp); + sp_4096_mont_mul_avx2_64(t[23], t[12], t[11], m, mp); + sp_4096_mont_sqr_avx2_64(t[24], t[12], m, mp); + sp_4096_mont_mul_avx2_64(t[25], t[13], t[12], m, mp); + sp_4096_mont_sqr_avx2_64(t[26], t[13], m, mp); + sp_4096_mont_mul_avx2_64(t[27], t[14], t[13], m, mp); + sp_4096_mont_sqr_avx2_64(t[28], t[14], m, mp); + sp_4096_mont_mul_avx2_64(t[29], t[15], t[14], m, mp); + sp_4096_mont_sqr_avx2_64(t[30], t[15], m, mp); + sp_4096_mont_mul_avx2_64(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) { + c = 64; + } + if ((bits % 5) == 0) { + c -= 5; + } + else { + c -= bits % 5; + } + y = (int)(n >> c); + n <<= 64 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 64); + for (; i>=0 || c>=5; ) { + if (c >= 5) { + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + } + else if (c == 0) { + n = e[i--]; + y = (int)(n >> 59); + n <<= 5; + c = 59; + } + else { + y = (int)(n >> 59); + n = e[i--]; + c = 5 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + + sp_4096_sqr_avx2_64(rt, r); + sp_4096_mont_reduce_avx2_64(rt, m, mp); + sp_4096_sqr_avx2_64(r, rt); + sp_4096_mont_reduce_avx2_64(r, m, mp); + sp_4096_sqr_avx2_64(rt, r); + sp_4096_mont_reduce_avx2_64(rt, m, mp); + sp_4096_sqr_avx2_64(r, rt); + sp_4096_mont_reduce_avx2_64(r, m, mp); + sp_4096_sqr_avx2_64(rt, r); + sp_4096_mont_reduce_avx2_64(rt, m, mp); + + sp_4096_mul_avx2_64(r, rt, t[y]); + sp_4096_mont_reduce_avx2_64(r, m, mp); + } + + XMEMSET(&r[64], 0, sizeof(sp_digit) * 64); + sp_4096_mont_reduce_avx2_64(r, m, mp); + + mask = 0 - (sp_4096_cmp_64(r, m) >= 0); + sp_4096_cond_sub_avx2_64(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return err; +} +#endif /* HAVE_INTEL_AVX2 */ +#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */ + +#ifdef WOLFSSL_HAVE_SP_RSA +/* RSA public key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * em Public exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm, + byte* out, word32* outLen) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit ad[128], md[64], rd[128]; +#else + sp_digit* d = NULL; +#endif + sp_digit* a; + sp_digit *ah; + sp_digit* m; + sp_digit* r; + sp_digit e = 0; + int err = MP_OKAY; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + if (*outLen < 512) + err = MP_TO_E; + if (err == MP_OKAY && (mp_count_bits(em) > 64 || inLen > 512 || + mp_count_bits(mm) != 4096)) + err = MP_READ_E; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + a = d; + r = a + 64 * 2; + m = r + 64 * 2; + ah = a + 64; + } +#else + a = ad; + m = md; + r = rd; + ah = a + 64; +#endif + + if (err == MP_OKAY) { + sp_4096_from_bin(ah, 64, in, inLen); +#if DIGIT_BIT >= 64 + e = em->dp[0]; +#else + e = em->dp[0]; + if (em->used > 1) + e |= ((sp_digit)em->dp[1]) << DIGIT_BIT; +#endif + if (e == 0) + err = MP_EXPTMOD_E; + } + if (err == MP_OKAY) { + sp_4096_from_mp(m, 64, mm); + + if (e == 0x3) { +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + if (err == MP_OKAY) { + sp_4096_sqr_avx2_64(r, ah); + err = sp_4096_mod_64_cond(r, r, m); + } + if (err == MP_OKAY) { + sp_4096_mul_avx2_64(r, ah, r); + err = sp_4096_mod_64_cond(r, r, m); + } + } + else +#endif + { + if (err == MP_OKAY) { + sp_4096_sqr_64(r, ah); + err = sp_4096_mod_64_cond(r, r, m); + } + if (err == MP_OKAY) { + sp_4096_mul_64(r, ah, r); + err = sp_4096_mod_64_cond(r, r, m); + } + } + } + else { + int i; + sp_digit mp; + + sp_4096_mont_setup(m, &mp); + + /* Convert to Montgomery form. */ + XMEMSET(a, 0, sizeof(sp_digit) * 64); + err = sp_4096_mod_64_cond(a, a, m); + + if (err == MP_OKAY) { + for (i=63; i>=0; i--) { + if (e >> i) { + break; + } + } + + XMEMCPY(r, a, sizeof(sp_digit) * 64); +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + for (i--; i>=0; i--) { + sp_4096_mont_sqr_avx2_64(r, r, m, mp); + if (((e >> i) & 1) == 1) { + sp_4096_mont_mul_avx2_64(r, r, a, m, mp); + } + } + XMEMSET(&r[64], 0, sizeof(sp_digit) * 64); + sp_4096_mont_reduce_avx2_64(r, m, mp); + } + else +#endif + { + for (i--; i>=0; i--) { + sp_4096_mont_sqr_64(r, r, m, mp); + if (((e >> i) & 1) == 1) { + sp_4096_mont_mul_64(r, r, a, m, mp); + } + } + XMEMSET(&r[64], 0, sizeof(sp_digit) * 64); + sp_4096_mont_reduce_64(r, m, mp); + } + + for (i = 63; i > 0; i--) { + if (r[i] != m[i]) + break; + } + if (r[i] >= m[i]) + sp_4096_sub_in_place_64(r, m); + } + } + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) + XFREE(d, NULL, DYNAMIC_TYPE_RSA); +#endif + + return err; +} + +#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) +/* RSA private key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * pm First prime. + * qm Second prime. + * dpm First prime's CRT exponent. + * dqm Second prime's CRT exponent. + * qim Inverse of second prime mod p. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, + mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm, + byte* out, word32* outLen) +{ +#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK) + sp_digit a[128], d[64], m[64]; +#else + sp_digit* d = NULL; + sp_digit* a; + sp_digit* m; +#endif + sp_digit* r; + int err = MP_OKAY; + + (void)pm; + (void)qm; + (void)dpm; + (void)dqm; + (void)qim; + + if (*outLen < 512U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(dm) > 4096) { + err = MP_READ_E; + } + if (inLen > 512U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 4096) { + err = MP_READ_E; + } + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 4, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + a = d + 64; + m = a + 128; +#endif + r = a; + + sp_4096_from_bin(a, 64, in, inLen); + sp_4096_from_mp(d, 64, dm); + sp_4096_from_mp(m, 64, mm); + err = sp_4096_mod_exp_64(r, a, d, 4096, m, 0); + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 64); + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } +#else + XMEMSET(d, 0, sizeof(sp_digit) * 64); +#endif + + return err; +} + +#else +extern sp_digit sp_4096_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m); +extern sp_digit sp_4096_cond_add_avx2_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m); +/* RSA private key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * pm First prime. + * qm Second prime. + * dpm First prime's CRT exponent. + * dqm Second prime's CRT exponent. + * qim Inverse of second prime mod p. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, + mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm, + byte* out, word32* outLen) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit ad[64 * 2]; + sp_digit pd[32], qd[32], dpd[32]; + sp_digit tmpad[64], tmpbd[64]; +#else + sp_digit* t = NULL; +#endif + sp_digit* a; + sp_digit* p; + sp_digit* q; + sp_digit* dp; + sp_digit* dq; + sp_digit* qi; + sp_digit* tmpa; + sp_digit* tmpb; + sp_digit* r; + sp_digit c; + int err = MP_OKAY; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + (void)dm; + (void)mm; + + if (*outLen < 512) + err = MP_TO_E; + if (err == MP_OKAY && (inLen > 512 || mp_count_bits(mm) != 4096)) + err = MP_READ_E; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 11, NULL, + DYNAMIC_TYPE_RSA); + if (t == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + a = t; + p = a + 64 * 2; + q = p + 32; + qi = dq = dp = q + 32; + tmpa = qi + 32; + tmpb = tmpa + 64; + + r = t + 64; + } +#else + r = a = ad; + p = pd; + q = qd; + qi = dq = dp = dpd; + tmpa = tmpad; + tmpb = tmpbd; +#endif + + if (err == MP_OKAY) { + sp_4096_from_bin(a, 64, in, inLen); + sp_4096_from_mp(p, 32, pm); + sp_4096_from_mp(q, 32, qm); + sp_4096_from_mp(dp, 32, dpm); + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_2048_mod_exp_avx2_32(tmpa, a, dp, 2048, p, 1); + else +#endif + err = sp_2048_mod_exp_32(tmpa, a, dp, 2048, p, 1); + } + if (err == MP_OKAY) { + sp_4096_from_mp(dq, 32, dqm); +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_2048_mod_exp_avx2_32(tmpb, a, dq, 2048, q, 1); + else +#endif + err = sp_2048_mod_exp_32(tmpb, a, dq, 2048, q, 1); + } + + if (err == MP_OKAY) { + c = sp_2048_sub_in_place_32(tmpa, tmpb); +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + c += sp_4096_cond_add_avx2_32(tmpa, tmpa, p, c); + sp_4096_cond_add_avx2_32(tmpa, tmpa, p, c); + } + else +#endif + { + c += sp_4096_cond_add_32(tmpa, tmpa, p, c); + sp_4096_cond_add_32(tmpa, tmpa, p, c); + } + + sp_2048_from_mp(qi, 32, qim); +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + sp_2048_mul_avx2_32(tmpa, tmpa, qi); + } + else +#endif + { + sp_2048_mul_32(tmpa, tmpa, qi); + } + err = sp_2048_mod_32(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + sp_2048_mul_avx2_32(tmpa, q, tmpa); + } + else +#endif + { + sp_2048_mul_32(tmpa, q, tmpa); + } + XMEMSET(&tmpb[32], 0, sizeof(sp_digit) * 32); + sp_4096_add_64(r, tmpb, tmpa); + + sp_4096_to_bin(r, out); + *outLen = 512; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_digit) * 32 * 11); + XFREE(t, NULL, DYNAMIC_TYPE_RSA); + } +#else + XMEMSET(tmpad, 0, sizeof(tmpad)); + XMEMSET(tmpbd, 0, sizeof(tmpbd)); + XMEMSET(pd, 0, sizeof(pd)); + XMEMSET(qd, 0, sizeof(qd)); + XMEMSET(dpd, 0, sizeof(dpd)); +#endif + + return err; +} +#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */ +#endif /* WOLFSSL_HAVE_SP_RSA */ +#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY)) +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_4096_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (4096 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 64 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 64); + r->used = 64; + mp_clamp(r); +#elif DIGIT_BIT < 64 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 64; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 64) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 64 - s; + } + r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 64; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 64 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 64 - s; + } + else { + s += 64; + } + } + r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ + int err = MP_OKAY; + sp_digit b[128], e[64], m[64]; + sp_digit* r = b; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 4096 || expBits > 4096 || + mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + sp_4096_from_mp(b, 64, base); + sp_4096_from_mp(e, 64, exp); + sp_4096_from_mp(m, 64, mod); + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_4096_mod_exp_avx2_64(r, b, e, expBits, m, 0); + else +#endif + err = sp_4096_mod_exp_64(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + err = sp_4096_to_mp(r, res); + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} + +#ifdef WOLFSSL_HAVE_SP_DH +#ifdef HAVE_FFDHE_4096 +extern void sp_4096_lshift_64(sp_digit* r, const sp_digit* a, int n); +#ifdef HAVE_INTEL_AVX2 +/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m) + * + * r A single precision number that is the result of the operation. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_2_avx2_64(sp_digit* r, const sp_digit* e, int bits, + const sp_digit* m) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit nd[128]; + sp_digit td[65]; +#else + sp_digit* td; +#endif + sp_digit* norm; + sp_digit* tmp; + sp_digit mp = 1; + sp_digit n, o; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 193, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + norm = td; + tmp = td + 128; +#else + norm = nd; + tmp = td; +#endif + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_64(norm, m); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) { + c = 64; + } + if ((bits % 6) == 0) { + c -= 6; + } + else { + c -= bits % 6; + } + y = (int)(n >> c); + n <<= 64 - c; + sp_4096_lshift_64(r, norm, y); + for (; i>=0 || c>=6; ) { + if (c == 0) { + n = e[i--]; + y = (int)(n >> 58); + n <<= 6; + c = 58; + } + else if (c < 6) { + y = (int)(n >> 58); + n = e[i--]; + c = 6 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + else { + y = (n >> 58) & 0x3f; + n <<= 6; + c -= 6; + } + + sp_4096_mont_sqr_avx2_64(r, r, m, mp); + sp_4096_mont_sqr_avx2_64(r, r, m, mp); + sp_4096_mont_sqr_avx2_64(r, r, m, mp); + sp_4096_mont_sqr_avx2_64(r, r, m, mp); + sp_4096_mont_sqr_avx2_64(r, r, m, mp); + sp_4096_mont_sqr_avx2_64(r, r, m, mp); + + sp_4096_lshift_64(r, r, y); + sp_4096_mul_d_avx2_64(tmp, norm, r[64]); + r[64] = 0; + o = sp_4096_add_64(r, r, tmp); + sp_4096_cond_sub_avx2_64(r, r, m, (sp_digit)0 - o); + } + + XMEMSET(&r[64], 0, sizeof(sp_digit) * 64); + sp_4096_mont_reduce_avx2_64(r, m, mp); + + mask = 0 - (sp_4096_cmp_64(r, m) >= 0); + sp_4096_cond_sub_avx2_64(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return err; +} +#endif /* HAVE_INTEL_AVX2 */ + +/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m) + * + * r A single precision number that is the result of the operation. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits, + const sp_digit* m) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit nd[128]; + sp_digit td[65]; +#else + sp_digit* td; +#endif + sp_digit* norm; + sp_digit* tmp; + sp_digit mp = 1; + sp_digit n, o; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 193, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + norm = td; + tmp = td + 128; +#else + norm = nd; + tmp = td; +#endif + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_64(norm, m); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) { + c = 64; + } + if ((bits % 6) == 0) { + c -= 6; + } + else { + c -= bits % 6; + } + y = (int)(n >> c); + n <<= 64 - c; + sp_4096_lshift_64(r, norm, y); + for (; i>=0 || c>=6; ) { + if (c == 0) { + n = e[i--]; + y = (int)(n >> 58); + n <<= 6; + c = 58; + } + else if (c < 6) { + y = (int)(n >> 58); + n = e[i--]; + c = 6 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + else { + y = (n >> 58) & 0x3f; + n <<= 6; + c -= 6; + } + + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + + sp_4096_lshift_64(r, r, y); + sp_4096_mul_d_64(tmp, norm, r[64]); + r[64] = 0; + o = sp_4096_add_64(r, r, tmp); + sp_4096_cond_sub_64(r, r, m, (sp_digit)0 - o); + } + + XMEMSET(&r[64], 0, sizeof(sp_digit) * 64); + sp_4096_mont_reduce_64(r, m, mp); + + mask = 0 - (sp_4096_cmp_64(r, m) >= 0); + sp_4096_cond_sub_64(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return err; +} + +#endif /* HAVE_FFDHE_4096 */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. + * exp Array of bytes that is the exponent. + * expLen Length of data, in bytes, in exponent. + * mod Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Length, in bytes, of exponentiation result. + * returns 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_DhExp_4096(mp_int* base, const byte* exp, word32 expLen, + mp_int* mod, byte* out, word32* outLen) +{ + int err = MP_OKAY; + sp_digit b[128], e[64], m[64]; + sp_digit* r = b; + word32 i; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + if (mp_count_bits(base) > 4096 || expLen > 512 || + mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + sp_4096_from_mp(b, 64, base); + sp_4096_from_bin(e, 64, exp, expLen); + sp_4096_from_mp(m, 64, mod); + + #ifdef HAVE_FFDHE_4096 + if (base->used == 1 && base->dp[0] == 2 && m[63] == (sp_digit)-1) { +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_4096_mod_exp_2_avx2_64(r, e, expLen * 8, m); + else +#endif + err = sp_4096_mod_exp_2_64(r, e, expLen * 8, m); + } + else + #endif + { +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_4096_mod_exp_avx2_64(r, b, e, expLen * 8, m, 0); + else +#endif + err = sp_4096_mod_exp_64(r, b, e, expLen * 8, m, 0); + } + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + for (i=0; i<512 && out[i] == 0; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} +#endif +#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */ + +#endif /* WOLFSSL_SP_4096 */ + +#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */ +#ifdef WOLFSSL_HAVE_SP_ECC +#ifndef WOLFSSL_SP_NO_256 + +/* Point structure to use. */ +typedef struct sp_point_256 { + sp_digit x[2 * 4]; + sp_digit y[2 * 4]; + sp_digit z[2 * 4]; + int infinity; +} sp_point_256; + +/* The modulus (prime) of the curve P256. */ +static const sp_digit p256_mod[4] = { + 0xffffffffffffffffL,0x00000000ffffffffL,0x0000000000000000L, + 0xffffffff00000001L +}; +/* The Montogmery normalizer for modulus of the curve P256. */ +static const sp_digit p256_norm_mod[4] = { + 0x0000000000000001L,0xffffffff00000000L,0xffffffffffffffffL, + 0x00000000fffffffeL +}; +/* The Montogmery multiplier for modulus of the curve P256. */ +static const sp_digit p256_mp_mod = 0x0000000000000001; +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +/* The order of the curve P256. */ +static const sp_digit p256_order[4] = { + 0xf3b9cac2fc632551L,0xbce6faada7179e84L,0xffffffffffffffffL, + 0xffffffff00000000L +}; +#endif +/* The order of the curve P256 minus 2. */ +static const sp_digit p256_order2[4] = { + 0xf3b9cac2fc63254fL,0xbce6faada7179e84L,0xffffffffffffffffL, + 0xffffffff00000000L +}; +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montogmery normalizer for order of the curve P256. */ +static const sp_digit p256_norm_order[4] = { + 0x0c46353d039cdaafL,0x4319055258e8617bL,0x0000000000000000L, + 0x00000000ffffffffL +}; +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montogmery multiplier for order of the curve P256. */ +static const sp_digit p256_mp_order = 0xccd1c8aaee00bc4fL; +#endif +#ifdef WOLFSSL_SP_SMALL +/* The base point of curve P256. */ +static const sp_point_256 p256_base = { + /* X ordinate */ + { + 0xf4a13945d898c296L,0x77037d812deb33a0L,0xf8bce6e563a440f2L, + 0x6b17d1f2e12c4247L, + 0L, 0L, 0L, 0L + }, + /* Y ordinate */ + { + 0xcbb6406837bf51f5L,0x2bce33576b315eceL,0x8ee7eb4a7c0f9e16L, + 0x4fe342e2fe1a7f9bL, + 0L, 0L, 0L, 0L + }, + /* Z ordinate */ + { + 0x0000000000000001L,0x0000000000000000L,0x0000000000000000L, + 0x0000000000000000L, + 0L, 0L, 0L, 0L + }, + /* infinity */ + 0 +}; +#endif /* WOLFSSL_SP_SMALL */ +#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY) +static const sp_digit p256_b[4] = { + 0x3bce3c3e27d2604bL,0x651d06b0cc53b0f6L,0xb3ebbd55769886bcL, + 0x5ac635d8aa3a93e7L +}; +#endif + +static int sp_256_point_new_ex_4(void* heap, sp_point_256* sp, sp_point_256** p) +{ + int ret = MP_OKAY; + (void)heap; +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + (void)sp; + *p = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC); +#else + *p = sp; +#endif + if (*p == NULL) { + ret = MEMORY_E; + } + return ret; +} + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +/* Allocate memory for point and return error. */ +#define sp_256_point_new_4(heap, sp, p) sp_256_point_new_ex_4((heap), NULL, &(p)) +#else +/* Set pointer to data and return no error. */ +#define sp_256_point_new_4(heap, sp, p) sp_256_point_new_ex_4((heap), &(sp), &(p)) +#endif + + +static void sp_256_point_free_4(sp_point_256* p, int clear, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +/* If valid pointer then clear point data if requested and free data. */ + if (p != NULL) { + if (clear != 0) { + XMEMSET(p, 0, sizeof(*p)); + } + XFREE(p, heap, DYNAMIC_TYPE_ECC); + } +#else +/* Clear point data if requested. */ + if (clear != 0) { + XMEMSET(p, 0, sizeof(*p)); + } +#endif + (void)heap; +} + +/* Multiply a number by Montogmery normalizer mod modulus (prime). + * + * r The resulting Montgomery form number. + * a The number to convert. + * m The modulus (prime). + */ +static int sp_256_mod_mul_norm_4(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + int64_t t[8]; + int64_t a32[8]; + int64_t o; + + (void)m; + + a32[0] = a[0] & 0xffffffff; + a32[1] = a[0] >> 32; + a32[2] = a[1] & 0xffffffff; + a32[3] = a[1] >> 32; + a32[4] = a[2] & 0xffffffff; + a32[5] = a[2] >> 32; + a32[6] = a[3] & 0xffffffff; + a32[7] = a[3] >> 32; + + /* 1 1 0 -1 -1 -1 -1 0 */ + t[0] = 0 + a32[0] + a32[1] - a32[3] - a32[4] - a32[5] - a32[6]; + /* 0 1 1 0 -1 -1 -1 -1 */ + t[1] = 0 + a32[1] + a32[2] - a32[4] - a32[5] - a32[6] - a32[7]; + /* 0 0 1 1 0 -1 -1 -1 */ + t[2] = 0 + a32[2] + a32[3] - a32[5] - a32[6] - a32[7]; + /* -1 -1 0 2 2 1 0 -1 */ + t[3] = 0 - a32[0] - a32[1] + 2 * a32[3] + 2 * a32[4] + a32[5] - a32[7]; + /* 0 -1 -1 0 2 2 1 0 */ + t[4] = 0 - a32[1] - a32[2] + 2 * a32[4] + 2 * a32[5] + a32[6]; + /* 0 0 -1 -1 0 2 2 1 */ + t[5] = 0 - a32[2] - a32[3] + 2 * a32[5] + 2 * a32[6] + a32[7]; + /* -1 -1 0 0 0 1 3 2 */ + t[6] = 0 - a32[0] - a32[1] + a32[5] + 3 * a32[6] + 2 * a32[7]; + /* 1 0 -1 -1 -1 -1 0 3 */ + t[7] = 0 + a32[0] - a32[2] - a32[3] - a32[4] - a32[5] + 3 * a32[7]; + + t[1] += t[0] >> 32; t[0] &= 0xffffffff; + t[2] += t[1] >> 32; t[1] &= 0xffffffff; + t[3] += t[2] >> 32; t[2] &= 0xffffffff; + t[4] += t[3] >> 32; t[3] &= 0xffffffff; + t[5] += t[4] >> 32; t[4] &= 0xffffffff; + t[6] += t[5] >> 32; t[5] &= 0xffffffff; + t[7] += t[6] >> 32; t[6] &= 0xffffffff; + o = t[7] >> 32; t[7] &= 0xffffffff; + t[0] += o; + t[3] -= o; + t[6] -= o; + t[7] += o; + t[1] += t[0] >> 32; t[0] &= 0xffffffff; + t[2] += t[1] >> 32; t[1] &= 0xffffffff; + t[3] += t[2] >> 32; t[2] &= 0xffffffff; + t[4] += t[3] >> 32; t[3] &= 0xffffffff; + t[5] += t[4] >> 32; t[4] &= 0xffffffff; + t[6] += t[5] >> 32; t[5] &= 0xffffffff; + t[7] += t[6] >> 32; t[6] &= 0xffffffff; + r[0] = (t[1] << 32) | t[0]; + r[1] = (t[3] << 32) | t[2]; + r[2] = (t[5] << 32) | t[4]; + r[3] = (t[7] << 32) | t[6]; + + return MP_OKAY; +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_256_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 64 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 64 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0xffffffffffffffffl; + s = 64U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 64U) <= (word32)DIGIT_BIT) { + s += 64U; + r[j] &= 0xffffffffffffffffl; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 64) { + r[j] &= 0xffffffffffffffffl; + if (j + 1 >= size) { + break; + } + s = 64 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Convert a point of type ecc_point to type sp_point_256. + * + * p Point of type sp_point_256 (result). + * pm Point of type ecc_point. + */ +static void sp_256_point_from_ecc_point_4(sp_point_256* p, const ecc_point* pm) +{ + XMEMSET(p->x, 0, sizeof(p->x)); + XMEMSET(p->y, 0, sizeof(p->y)); + XMEMSET(p->z, 0, sizeof(p->z)); + sp_256_from_mp(p->x, 4, pm->x); + sp_256_from_mp(p->y, 4, pm->y); + sp_256_from_mp(p->z, 4, pm->z); + p->infinity = 0; +} + +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_256_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (256 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 64 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 4); + r->used = 4; + mp_clamp(r); +#elif DIGIT_BIT < 64 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 4; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 64) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 64 - s; + } + r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 4; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 64 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 64 - s; + } + else { + s += 64; + } + } + r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Convert a point of type sp_point_256 to type ecc_point. + * + * p Point of type sp_point_256. + * pm Point of type ecc_point (result). + * returns MEMORY_E when allocation of memory in ecc_point fails otherwise + * MP_OKAY. + */ +static int sp_256_point_to_ecc_point_4(const sp_point_256* p, ecc_point* pm) +{ + int err; + + err = sp_256_to_mp(p->x, pm->x); + if (err == MP_OKAY) { + err = sp_256_to_mp(p->y, pm->y); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->z, pm->z); + } + + return err; +} + +extern void sp_256_cond_copy_4(sp_digit* r, const sp_digit* a, sp_digit m); +extern void sp_256_mont_mul_4(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp); +extern void sp_256_mont_sqr_4(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp); +#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY) +/* Square the Montgomery form number a number of times. (r = a ^ n mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * n Number of times to square. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_256_mont_sqr_n_4(sp_digit* r, const sp_digit* a, int n, + const sp_digit* m, sp_digit mp) +{ + sp_256_mont_sqr_4(r, a, m, mp); + for (; n > 1; n--) { + sp_256_mont_sqr_4(r, r, m, mp); + } +} + +#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */ +#ifdef WOLFSSL_SP_SMALL +/* Mod-2 for the P256 curve. */ +static const uint64_t p256_mod_minus_2[4] = { + 0xfffffffffffffffdU,0x00000000ffffffffU,0x0000000000000000U, + 0xffffffff00000001U +}; +#endif /* !WOLFSSL_SP_SMALL */ + +/* Invert the number, in Montgomery form, modulo the modulus (prime) of the + * P256 curve. (r = 1 / a mod m) + * + * r Inverse result. + * a Number to invert. + * td Temporary data. + */ +static void sp_256_mont_inv_4(sp_digit* r, const sp_digit* a, sp_digit* td) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* t = td; + int i; + + XMEMCPY(t, a, sizeof(sp_digit) * 4); + for (i=254; i>=0; i--) { + sp_256_mont_sqr_4(t, t, p256_mod, p256_mp_mod); + if (p256_mod_minus_2[i / 64] & ((sp_digit)1 << (i % 64))) + sp_256_mont_mul_4(t, t, a, p256_mod, p256_mp_mod); + } + XMEMCPY(r, t, sizeof(sp_digit) * 4); +#else + sp_digit* t1 = td; + sp_digit* t2 = td + 2 * 4; + sp_digit* t3 = td + 4 * 4; + /* 0x2 */ + sp_256_mont_sqr_4(t1, a, p256_mod, p256_mp_mod); + /* 0x3 */ + sp_256_mont_mul_4(t2, t1, a, p256_mod, p256_mp_mod); + /* 0xc */ + sp_256_mont_sqr_n_4(t1, t2, 2, p256_mod, p256_mp_mod); + /* 0xd */ + sp_256_mont_mul_4(t3, t1, a, p256_mod, p256_mp_mod); + /* 0xf */ + sp_256_mont_mul_4(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xf0 */ + sp_256_mont_sqr_n_4(t1, t2, 4, p256_mod, p256_mp_mod); + /* 0xfd */ + sp_256_mont_mul_4(t3, t3, t1, p256_mod, p256_mp_mod); + /* 0xff */ + sp_256_mont_mul_4(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xff00 */ + sp_256_mont_sqr_n_4(t1, t2, 8, p256_mod, p256_mp_mod); + /* 0xfffd */ + sp_256_mont_mul_4(t3, t3, t1, p256_mod, p256_mp_mod); + /* 0xffff */ + sp_256_mont_mul_4(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xffff0000 */ + sp_256_mont_sqr_n_4(t1, t2, 16, p256_mod, p256_mp_mod); + /* 0xfffffffd */ + sp_256_mont_mul_4(t3, t3, t1, p256_mod, p256_mp_mod); + /* 0xffffffff */ + sp_256_mont_mul_4(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xffffffff00000000 */ + sp_256_mont_sqr_n_4(t1, t2, 32, p256_mod, p256_mp_mod); + /* 0xffffffffffffffff */ + sp_256_mont_mul_4(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xffffffff00000001 */ + sp_256_mont_mul_4(r, t1, a, p256_mod, p256_mp_mod); + /* 0xffffffff000000010000000000000000000000000000000000000000 */ + sp_256_mont_sqr_n_4(r, r, 160, p256_mod, p256_mp_mod); + /* 0xffffffff00000001000000000000000000000000ffffffffffffffff */ + sp_256_mont_mul_4(r, r, t2, p256_mod, p256_mp_mod); + /* 0xffffffff00000001000000000000000000000000ffffffffffffffff00000000 */ + sp_256_mont_sqr_n_4(r, r, 32, p256_mod, p256_mp_mod); + /* 0xffffffff00000001000000000000000000000000fffffffffffffffffffffffd */ + sp_256_mont_mul_4(r, r, t3, p256_mod, p256_mp_mod); +#endif /* WOLFSSL_SP_SMALL */ +} + +extern int64_t sp_256_cmp_4(const sp_digit* a, const sp_digit* b); +/* Normalize the values in each word to 64. + * + * a Array of sp_digit to normalize. + */ +#define sp_256_norm_4(a) + +extern sp_digit sp_256_cond_sub_4(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m); +extern sp_digit sp_256_sub_4(sp_digit* r, const sp_digit* a, const sp_digit* b); +#define sp_256_mont_reduce_order_4 sp_256_mont_reduce_4 + +extern void sp_256_mont_reduce_4(sp_digit* a, const sp_digit* m, sp_digit mp); +/* Map the Montgomery form projective coordinate point to an affine point. + * + * r Resulting affine coordinate point. + * p Montgomery form projective coordinate point. + * t Temporary ordinate data. + */ +static void sp_256_map_4(sp_point_256* r, const sp_point_256* p, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*4; + int64_t n; + + sp_256_mont_inv_4(t1, p->z, t + 2*4); + + sp_256_mont_sqr_4(t2, t1, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t1, t2, t1, p256_mod, p256_mp_mod); + + /* x /= z^2 */ + sp_256_mont_mul_4(r->x, p->x, t2, p256_mod, p256_mp_mod); + XMEMSET(r->x + 4, 0, sizeof(r->x) / 2U); + sp_256_mont_reduce_4(r->x, p256_mod, p256_mp_mod); + /* Reduce x to less than modulus */ + n = sp_256_cmp_4(r->x, p256_mod); + sp_256_cond_sub_4(r->x, r->x, p256_mod, 0 - ((n >= 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_256_norm_4(r->x); + + /* y /= z^3 */ + sp_256_mont_mul_4(r->y, p->y, t1, p256_mod, p256_mp_mod); + XMEMSET(r->y + 4, 0, sizeof(r->y) / 2U); + sp_256_mont_reduce_4(r->y, p256_mod, p256_mp_mod); + /* Reduce y to less than modulus */ + n = sp_256_cmp_4(r->y, p256_mod); + sp_256_cond_sub_4(r->y, r->y, p256_mod, 0 - ((n >= 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_256_norm_4(r->y); + + XMEMSET(r->z, 0, sizeof(r->z)); + r->z[0] = 1; + +} + +extern void sp_256_mont_add_4(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m); +extern void sp_256_mont_dbl_4(const sp_digit* r, const sp_digit* a, const sp_digit* m); +extern void sp_256_mont_tpl_4(sp_digit* r, const sp_digit* a, const sp_digit* m); +extern void sp_256_mont_sub_4(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m); +extern void sp_256_div2_4(sp_digit* r, const sp_digit* a, const sp_digit* m); +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static void sp_256_proj_point_dbl_4(sp_point_256* r, const sp_point_256* p, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*4; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_256_mont_sqr_4(t1, p->z, p256_mod, p256_mp_mod); + /* Z = Y * Z */ + sp_256_mont_mul_4(z, p->y, p->z, p256_mod, p256_mp_mod); + /* Z = 2Z */ + sp_256_mont_dbl_4(z, z, p256_mod); + /* T2 = X - T1 */ + sp_256_mont_sub_4(t2, p->x, t1, p256_mod); + /* T1 = X + T1 */ + sp_256_mont_add_4(t1, p->x, t1, p256_mod); + /* T2 = T1 * T2 */ + sp_256_mont_mul_4(t2, t1, t2, p256_mod, p256_mp_mod); + /* T1 = 3T2 */ + sp_256_mont_tpl_4(t1, t2, p256_mod); + /* Y = 2Y */ + sp_256_mont_dbl_4(y, p->y, p256_mod); + /* Y = Y * Y */ + sp_256_mont_sqr_4(y, y, p256_mod, p256_mp_mod); + /* T2 = Y * Y */ + sp_256_mont_sqr_4(t2, y, p256_mod, p256_mp_mod); + /* T2 = T2/2 */ + sp_256_div2_4(t2, t2, p256_mod); + /* Y = Y * X */ + sp_256_mont_mul_4(y, y, p->x, p256_mod, p256_mp_mod); + /* X = T1 * T1 */ + sp_256_mont_sqr_4(x, t1, p256_mod, p256_mp_mod); + /* X = X - Y */ + sp_256_mont_sub_4(x, x, y, p256_mod); + /* X = X - Y */ + sp_256_mont_sub_4(x, x, y, p256_mod); + /* Y = Y - X */ + sp_256_mont_sub_4(y, y, x, p256_mod); + /* Y = Y * T1 */ + sp_256_mont_mul_4(y, y, t1, p256_mod, p256_mp_mod); + /* Y = Y - T2 */ + sp_256_mont_sub_4(y, y, t2, p256_mod); +} + +/* Double the Montgomery form projective point p a number of times. + * + * r Result of repeated doubling of point. + * p Point to double. + * n Number of times to double + * t Temporary ordinate data. + */ +static void sp_256_proj_point_dbl_n_4(sp_point_256* p, int n, sp_digit* t) +{ + sp_digit* w = t; + sp_digit* a = t + 2*4; + sp_digit* b = t + 4*4; + sp_digit* t1 = t + 6*4; + sp_digit* t2 = t + 8*4; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = p->x; + y = p->y; + z = p->z; + + /* Y = 2*Y */ + sp_256_mont_dbl_4(y, y, p256_mod); + /* W = Z^4 */ + sp_256_mont_sqr_4(w, z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_4(w, w, p256_mod, p256_mp_mod); + +#ifndef WOLFSSL_SP_SMALL + while (--n > 0) +#else + while (--n >= 0) +#endif + { + /* A = 3*(X^2 - W) */ + sp_256_mont_sqr_4(t1, x, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(t1, t1, w, p256_mod); + sp_256_mont_tpl_4(a, t1, p256_mod); + /* B = X*Y^2 */ + sp_256_mont_sqr_4(t1, y, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(b, t1, x, p256_mod, p256_mp_mod); + /* X = A^2 - 2B */ + sp_256_mont_sqr_4(x, a, p256_mod, p256_mp_mod); + sp_256_mont_dbl_4(t2, b, p256_mod); + sp_256_mont_sub_4(x, x, t2, p256_mod); + /* Z = Z*Y */ + sp_256_mont_mul_4(z, z, y, p256_mod, p256_mp_mod); + /* t2 = Y^4 */ + sp_256_mont_sqr_4(t1, t1, p256_mod, p256_mp_mod); +#ifdef WOLFSSL_SP_SMALL + if (n != 0) +#endif + { + /* W = W*Y^4 */ + sp_256_mont_mul_4(w, w, t1, p256_mod, p256_mp_mod); + } + /* y = 2*A*(B - X) - Y^4 */ + sp_256_mont_sub_4(y, b, x, p256_mod); + sp_256_mont_mul_4(y, y, a, p256_mod, p256_mp_mod); + sp_256_mont_dbl_4(y, y, p256_mod); + sp_256_mont_sub_4(y, y, t1, p256_mod); + } +#ifndef WOLFSSL_SP_SMALL + /* A = 3*(X^2 - W) */ + sp_256_mont_sqr_4(t1, x, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(t1, t1, w, p256_mod); + sp_256_mont_tpl_4(a, t1, p256_mod); + /* B = X*Y^2 */ + sp_256_mont_sqr_4(t1, y, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(b, t1, x, p256_mod, p256_mp_mod); + /* X = A^2 - 2B */ + sp_256_mont_sqr_4(x, a, p256_mod, p256_mp_mod); + sp_256_mont_dbl_4(t2, b, p256_mod); + sp_256_mont_sub_4(x, x, t2, p256_mod); + /* Z = Z*Y */ + sp_256_mont_mul_4(z, z, y, p256_mod, p256_mp_mod); + /* t2 = Y^4 */ + sp_256_mont_sqr_4(t1, t1, p256_mod, p256_mp_mod); + /* y = 2*A*(B - X) - Y^4 */ + sp_256_mont_sub_4(y, b, x, p256_mod); + sp_256_mont_mul_4(y, y, a, p256_mod, p256_mp_mod); + sp_256_mont_dbl_4(y, y, p256_mod); + sp_256_mont_sub_4(y, y, t1, p256_mod); +#endif + /* Y = Y/2 */ + sp_256_div2_4(y, y, p256_mod); +} + +/* Compare two numbers to determine if they are equal. + * Constant time implementation. + * + * a First number to compare. + * b Second number to compare. + * returns 1 when equal and 0 otherwise. + */ +static int sp_256_cmp_equal_4(const sp_digit* a, const sp_digit* b) +{ + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3])) == 0; +} + +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_256_proj_point_add_4(sp_point_256* r, const sp_point_256* p, const sp_point_256* q, + sp_digit* t) +{ + const sp_point_256* ap[2]; + sp_point_256* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*4; + sp_digit* t3 = t + 4*4; + sp_digit* t4 = t + 6*4; + sp_digit* t5 = t + 8*4; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* Ensure only the first point is the same as the result. */ + if (q == r) { + const sp_point_256* a = p; + p = q; + q = a; + } + + /* Check double */ + (void)sp_256_sub_4(t1, p256_mod, q->y); + sp_256_norm_4(t1); + if ((sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) & + (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, t1))) != 0) { + sp_256_proj_point_dbl_4(r, p, t); + } + else { + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point_256)); + x = rp[p->infinity | q->infinity]->x; + y = rp[p->infinity | q->infinity]->y; + z = rp[p->infinity | q->infinity]->z; + + ap[0] = p; + ap[1] = q; + for (i=0; i<4; i++) { + r->x[i] = ap[p->infinity]->x[i]; + } + for (i=0; i<4; i++) { + r->y[i] = ap[p->infinity]->y[i]; + } + for (i=0; i<4; i++) { + r->z[i] = ap[p->infinity]->z[i]; + } + r->infinity = ap[p->infinity]->infinity; + + /* U1 = X1*Z2^2 */ + sp_256_mont_sqr_4(t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t3, t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t1, t1, x, p256_mod, p256_mp_mod); + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_4(t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t4, t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_4(t3, t3, y, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_4(t4, t4, q->y, p256_mod, p256_mp_mod); + /* H = U2 - U1 */ + sp_256_mont_sub_4(t2, t2, t1, p256_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_4(t4, t4, t3, p256_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_4(z, z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(z, z, t2, p256_mod, p256_mp_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_4(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sqr_4(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t5, t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(x, x, t5, p256_mod); + sp_256_mont_dbl_4(t1, y, p256_mod); + sp_256_mont_sub_4(x, x, t1, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_4(y, y, x, p256_mod); + sp_256_mont_mul_4(y, y, t4, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t5, t5, t3, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(y, y, t5, p256_mod); + } +} + +/* Double the Montgomery form projective point p a number of times. + * + * r Result of repeated doubling of point. + * p Point to double. + * n Number of times to double + * t Temporary ordinate data. + */ +static void sp_256_proj_point_dbl_n_store_4(sp_point_256* r, const sp_point_256* p, + int n, int m, sp_digit* t) +{ + sp_digit* w = t; + sp_digit* a = t + 2*4; + sp_digit* b = t + 4*4; + sp_digit* t1 = t + 6*4; + sp_digit* t2 = t + 8*4; + sp_digit* x = r[2*m].x; + sp_digit* y = r[(1<x[i]; + } + for (i=0; i<4; i++) { + y[i] = p->y[i]; + } + for (i=0; i<4; i++) { + z[i] = p->z[i]; + } + + /* Y = 2*Y */ + sp_256_mont_dbl_4(y, y, p256_mod); + /* W = Z^4 */ + sp_256_mont_sqr_4(w, z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_4(w, w, p256_mod, p256_mp_mod); + for (i=1; i<=n; i++) { + /* A = 3*(X^2 - W) */ + sp_256_mont_sqr_4(t1, x, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(t1, t1, w, p256_mod); + sp_256_mont_tpl_4(a, t1, p256_mod); + /* B = X*Y^2 */ + sp_256_mont_sqr_4(t2, y, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(b, t2, x, p256_mod, p256_mp_mod); + x = r[(1<x; + sp_digit* y = ra->y; + sp_digit* z = ra->z; + sp_digit* xs = rs->x; + sp_digit* ys = rs->y; + sp_digit* zs = rs->z; + + + XMEMCPY(x, p->x, sizeof(p->x) / 2); + XMEMCPY(y, p->y, sizeof(p->y) / 2); + XMEMCPY(z, p->z, sizeof(p->z) / 2); + ra->infinity = 0; + rs->infinity = 0; + + /* U1 = X1*Z2^2 */ + sp_256_mont_sqr_4(t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t3, t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t1, t1, x, p256_mod, p256_mp_mod); + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_4(t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t4, t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_4(t3, t3, y, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_4(t4, t4, q->y, p256_mod, p256_mp_mod); + /* H = U2 - U1 */ + sp_256_mont_sub_4(t2, t2, t1, p256_mod); + /* RS = S2 + S1 */ + sp_256_mont_add_4(t6, t4, t3, p256_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_4(t4, t4, t3, p256_mod); + /* Z3 = H*Z1*Z2 */ + /* ZS = H*Z1*Z2 */ + sp_256_mont_mul_4(z, z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(z, z, t2, p256_mod, p256_mp_mod); + XMEMCPY(zs, z, sizeof(p->z)/2); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + /* XS = RS^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_4(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sqr_4(xs, t6, p256_mod, p256_mp_mod); + sp_256_mont_sqr_4(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t5, t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(x, x, t5, p256_mod); + sp_256_mont_sub_4(xs, xs, t5, p256_mod); + sp_256_mont_dbl_4(t1, y, p256_mod); + sp_256_mont_sub_4(x, x, t1, p256_mod); + sp_256_mont_sub_4(xs, xs, t1, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + /* YS = -RS*(U1*H^2 - XS) - S1*H^3 */ + sp_256_mont_sub_4(ys, y, xs, p256_mod); + sp_256_mont_sub_4(y, y, x, p256_mod); + sp_256_mont_mul_4(y, y, t4, p256_mod, p256_mp_mod); + sp_256_sub_4(t6, p256_mod, t6); + sp_256_mont_mul_4(ys, ys, t6, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t5, t5, t3, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(y, y, t5, p256_mod); + sp_256_mont_sub_4(ys, ys, t5, p256_mod); +} + +/* Structure used to describe recoding of scalar multiplication. */ +typedef struct ecc_recode_256 { + /* Index into pre-computation table. */ + uint8_t i; + /* Use the negative of the point. */ + uint8_t neg; +} ecc_recode_256; + +/* The index into pre-computation table to use. */ +static const uint8_t recode_index_4_6[66] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, + 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, + 0, 1, +}; + +/* Whether to negate y-ordinate. */ +static const uint8_t recode_neg_4_6[66] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, +}; + +/* Recode the scalar for multiplication using pre-computed values and + * subtraction. + * + * k Scalar to multiply by. + * v Vector of operations to perform. + */ +static void sp_256_ecc_recode_6_4(const sp_digit* k, ecc_recode_256* v) +{ + int i, j; + uint8_t y; + int carry = 0; + int o; + sp_digit n; + + j = 0; + n = k[j]; + o = 0; + for (i=0; i<43; i++) { + y = n; + if (o + 6 < 64) { + y &= 0x3f; + n >>= 6; + o += 6; + } + else if (o + 6 == 64) { + n >>= 6; + if (++j < 4) + n = k[j]; + o = 0; + } + else if (++j < 4) { + n = k[j]; + y |= (n << (64 - o)) & 0x3f; + o -= 58; + n >>= o; + } + + y += carry; + v[i].i = recode_index_4_6[y]; + v[i].neg = recode_neg_4_6[y]; + carry = (y >> 6) + v[i].neg; + } +} + +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_win_add_sub_4(sp_point_256* r, const sp_point_256* g, + const sp_digit* k, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 td[33]; + sp_point_256 rtd, pd; + sp_digit tmpd[2 * 4 * 6]; +#endif + sp_point_256* t; + sp_point_256* rt; + sp_point_256* p = NULL; + sp_digit* tmp; + sp_digit* negy; + int i; + ecc_recode_256 v[43]; + int err; + + (void)heap; + + err = sp_256_point_new_4(heap, rtd, rt); + if (err == MP_OKAY) + err = sp_256_point_new_4(heap, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 33, heap, DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 6, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; +#else + t = td; + tmp = tmpd; +#endif + + + if (err == MP_OKAY) { + /* t[0] = {0, 0, 1} * norm */ + XMEMSET(&t[0], 0, sizeof(t[0])); + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + err = sp_256_mod_mul_norm_4(t[1].x, g->x, p256_mod); + } + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_4(t[1].y, g->y, p256_mod); + } + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_4(t[1].z, g->z, p256_mod); + } + + if (err == MP_OKAY) { + t[1].infinity = 0; + /* t[2] ... t[32] */ + sp_256_proj_point_dbl_n_store_4(t, &t[ 1], 5, 1, tmp); + sp_256_proj_point_add_4(&t[ 3], &t[ 2], &t[ 1], tmp); + sp_256_proj_point_dbl_4(&t[ 6], &t[ 3], tmp); + sp_256_proj_point_add_sub_4(&t[ 7], &t[ 5], &t[ 6], &t[ 1], tmp); + sp_256_proj_point_dbl_4(&t[10], &t[ 5], tmp); + sp_256_proj_point_add_sub_4(&t[11], &t[ 9], &t[10], &t[ 1], tmp); + sp_256_proj_point_dbl_4(&t[12], &t[ 6], tmp); + sp_256_proj_point_dbl_4(&t[14], &t[ 7], tmp); + sp_256_proj_point_add_sub_4(&t[15], &t[13], &t[14], &t[ 1], tmp); + sp_256_proj_point_dbl_4(&t[18], &t[ 9], tmp); + sp_256_proj_point_add_sub_4(&t[19], &t[17], &t[18], &t[ 1], tmp); + sp_256_proj_point_dbl_4(&t[20], &t[10], tmp); + sp_256_proj_point_dbl_4(&t[22], &t[11], tmp); + sp_256_proj_point_add_sub_4(&t[23], &t[21], &t[22], &t[ 1], tmp); + sp_256_proj_point_dbl_4(&t[24], &t[12], tmp); + sp_256_proj_point_dbl_4(&t[26], &t[13], tmp); + sp_256_proj_point_add_sub_4(&t[27], &t[25], &t[26], &t[ 1], tmp); + sp_256_proj_point_dbl_4(&t[28], &t[14], tmp); + sp_256_proj_point_dbl_4(&t[30], &t[15], tmp); + sp_256_proj_point_add_sub_4(&t[31], &t[29], &t[30], &t[ 1], tmp); + + negy = t[0].y; + + sp_256_ecc_recode_6_4(k, v); + + i = 42; + XMEMCPY(rt, &t[v[i].i], sizeof(sp_point_256)); + for (--i; i>=0; i--) { + sp_256_proj_point_dbl_n_4(rt, 6, tmp); + + XMEMCPY(p, &t[v[i].i], sizeof(sp_point_256)); + sp_256_sub_4(negy, p256_mod, p->y); + sp_256_cond_copy_4(p->y, negy, (sp_digit)0 - v[i].neg); + sp_256_proj_point_add_4(rt, rt, p, tmp); + } + + if (map != 0) { + sp_256_map_4(r, rt, tmp); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_256)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) + XFREE(t, heap, DYNAMIC_TYPE_ECC); + if (tmp != NULL) + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); +#endif + sp_256_point_free_4(p, 0, heap); + sp_256_point_free_4(rt, 0, heap); + + return err; +} + +#ifdef HAVE_INTEL_AVX2 +extern void sp_256_mont_mul_avx2_4(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp); +extern void sp_256_mont_sqr_avx2_4(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp); +#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY) +/* Square the Montgomery form number a number of times. (r = a ^ n mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * n Number of times to square. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_256_mont_sqr_n_avx2_4(sp_digit* r, const sp_digit* a, int n, + const sp_digit* m, sp_digit mp) +{ + sp_256_mont_sqr_avx2_4(r, a, m, mp); + for (; n > 1; n--) { + sp_256_mont_sqr_avx2_4(r, r, m, mp); + } +} + +#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */ + +/* Invert the number, in Montgomery form, modulo the modulus (prime) of the + * P256 curve. (r = 1 / a mod m) + * + * r Inverse result. + * a Number to invert. + * td Temporary data. + */ +static void sp_256_mont_inv_avx2_4(sp_digit* r, const sp_digit* a, sp_digit* td) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* t = td; + int i; + + XMEMCPY(t, a, sizeof(sp_digit) * 4); + for (i=254; i>=0; i--) { + sp_256_mont_sqr_avx2_4(t, t, p256_mod, p256_mp_mod); + if (p256_mod_minus_2[i / 64] & ((sp_digit)1 << (i % 64))) + sp_256_mont_mul_avx2_4(t, t, a, p256_mod, p256_mp_mod); + } + XMEMCPY(r, t, sizeof(sp_digit) * 4); +#else + sp_digit* t1 = td; + sp_digit* t2 = td + 2 * 4; + sp_digit* t3 = td + 4 * 4; + /* 0x2 */ + sp_256_mont_sqr_avx2_4(t1, a, p256_mod, p256_mp_mod); + /* 0x3 */ + sp_256_mont_mul_avx2_4(t2, t1, a, p256_mod, p256_mp_mod); + /* 0xc */ + sp_256_mont_sqr_n_avx2_4(t1, t2, 2, p256_mod, p256_mp_mod); + /* 0xd */ + sp_256_mont_mul_avx2_4(t3, t1, a, p256_mod, p256_mp_mod); + /* 0xf */ + sp_256_mont_mul_avx2_4(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xf0 */ + sp_256_mont_sqr_n_avx2_4(t1, t2, 4, p256_mod, p256_mp_mod); + /* 0xfd */ + sp_256_mont_mul_avx2_4(t3, t3, t1, p256_mod, p256_mp_mod); + /* 0xff */ + sp_256_mont_mul_avx2_4(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xff00 */ + sp_256_mont_sqr_n_avx2_4(t1, t2, 8, p256_mod, p256_mp_mod); + /* 0xfffd */ + sp_256_mont_mul_avx2_4(t3, t3, t1, p256_mod, p256_mp_mod); + /* 0xffff */ + sp_256_mont_mul_avx2_4(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xffff0000 */ + sp_256_mont_sqr_n_avx2_4(t1, t2, 16, p256_mod, p256_mp_mod); + /* 0xfffffffd */ + sp_256_mont_mul_avx2_4(t3, t3, t1, p256_mod, p256_mp_mod); + /* 0xffffffff */ + sp_256_mont_mul_avx2_4(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xffffffff00000000 */ + sp_256_mont_sqr_n_avx2_4(t1, t2, 32, p256_mod, p256_mp_mod); + /* 0xffffffffffffffff */ + sp_256_mont_mul_avx2_4(t2, t2, t1, p256_mod, p256_mp_mod); + /* 0xffffffff00000001 */ + sp_256_mont_mul_avx2_4(r, t1, a, p256_mod, p256_mp_mod); + /* 0xffffffff000000010000000000000000000000000000000000000000 */ + sp_256_mont_sqr_n_avx2_4(r, r, 160, p256_mod, p256_mp_mod); + /* 0xffffffff00000001000000000000000000000000ffffffffffffffff */ + sp_256_mont_mul_avx2_4(r, r, t2, p256_mod, p256_mp_mod); + /* 0xffffffff00000001000000000000000000000000ffffffffffffffff00000000 */ + sp_256_mont_sqr_n_avx2_4(r, r, 32, p256_mod, p256_mp_mod); + /* 0xffffffff00000001000000000000000000000000fffffffffffffffffffffffd */ + sp_256_mont_mul_avx2_4(r, r, t3, p256_mod, p256_mp_mod); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Map the Montgomery form projective coordinate point to an affine point. + * + * r Resulting affine coordinate point. + * p Montgomery form projective coordinate point. + * t Temporary ordinate data. + */ +static void sp_256_map_avx2_4(sp_point_256* r, const sp_point_256* p, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*4; + int64_t n; + + sp_256_mont_inv_avx2_4(t1, p->z, t + 2*4); + + sp_256_mont_sqr_avx2_4(t2, t1, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(t1, t2, t1, p256_mod, p256_mp_mod); + + /* x /= z^2 */ + sp_256_mont_mul_avx2_4(r->x, p->x, t2, p256_mod, p256_mp_mod); + XMEMSET(r->x + 4, 0, sizeof(r->x) / 2U); + sp_256_mont_reduce_4(r->x, p256_mod, p256_mp_mod); + /* Reduce x to less than modulus */ + n = sp_256_cmp_4(r->x, p256_mod); + sp_256_cond_sub_4(r->x, r->x, p256_mod, 0 - ((n >= 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_256_norm_4(r->x); + + /* y /= z^3 */ + sp_256_mont_mul_avx2_4(r->y, p->y, t1, p256_mod, p256_mp_mod); + XMEMSET(r->y + 4, 0, sizeof(r->y) / 2U); + sp_256_mont_reduce_4(r->y, p256_mod, p256_mp_mod); + /* Reduce y to less than modulus */ + n = sp_256_cmp_4(r->y, p256_mod); + sp_256_cond_sub_4(r->y, r->y, p256_mod, 0 - ((n >= 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_256_norm_4(r->y); + + XMEMSET(r->z, 0, sizeof(r->z)); + r->z[0] = 1; + +} + +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static void sp_256_proj_point_dbl_avx2_4(sp_point_256* r, const sp_point_256* p, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*4; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_256_mont_sqr_avx2_4(t1, p->z, p256_mod, p256_mp_mod); + /* Z = Y * Z */ + sp_256_mont_mul_avx2_4(z, p->y, p->z, p256_mod, p256_mp_mod); + /* Z = 2Z */ + sp_256_mont_dbl_4(z, z, p256_mod); + /* T2 = X - T1 */ + sp_256_mont_sub_4(t2, p->x, t1, p256_mod); + /* T1 = X + T1 */ + sp_256_mont_add_4(t1, p->x, t1, p256_mod); + /* T2 = T1 * T2 */ + sp_256_mont_mul_avx2_4(t2, t1, t2, p256_mod, p256_mp_mod); + /* T1 = 3T2 */ + sp_256_mont_tpl_4(t1, t2, p256_mod); + /* Y = 2Y */ + sp_256_mont_dbl_4(y, p->y, p256_mod); + /* Y = Y * Y */ + sp_256_mont_sqr_avx2_4(y, y, p256_mod, p256_mp_mod); + /* T2 = Y * Y */ + sp_256_mont_sqr_avx2_4(t2, y, p256_mod, p256_mp_mod); + /* T2 = T2/2 */ + sp_256_div2_4(t2, t2, p256_mod); + /* Y = Y * X */ + sp_256_mont_mul_avx2_4(y, y, p->x, p256_mod, p256_mp_mod); + /* X = T1 * T1 */ + sp_256_mont_sqr_avx2_4(x, t1, p256_mod, p256_mp_mod); + /* X = X - Y */ + sp_256_mont_sub_4(x, x, y, p256_mod); + /* X = X - Y */ + sp_256_mont_sub_4(x, x, y, p256_mod); + /* Y = Y - X */ + sp_256_mont_sub_4(y, y, x, p256_mod); + /* Y = Y * T1 */ + sp_256_mont_mul_avx2_4(y, y, t1, p256_mod, p256_mp_mod); + /* Y = Y - T2 */ + sp_256_mont_sub_4(y, y, t2, p256_mod); +} + +/* Double the Montgomery form projective point p a number of times. + * + * r Result of repeated doubling of point. + * p Point to double. + * n Number of times to double + * t Temporary ordinate data. + */ +static void sp_256_proj_point_dbl_n_avx2_4(sp_point_256* p, int n, sp_digit* t) +{ + sp_digit* w = t; + sp_digit* a = t + 2*4; + sp_digit* b = t + 4*4; + sp_digit* t1 = t + 6*4; + sp_digit* t2 = t + 8*4; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = p->x; + y = p->y; + z = p->z; + + /* Y = 2*Y */ + sp_256_mont_dbl_4(y, y, p256_mod); + /* W = Z^4 */ + sp_256_mont_sqr_avx2_4(w, z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_avx2_4(w, w, p256_mod, p256_mp_mod); + +#ifndef WOLFSSL_SP_SMALL + while (--n > 0) +#else + while (--n >= 0) +#endif + { + /* A = 3*(X^2 - W) */ + sp_256_mont_sqr_avx2_4(t1, x, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(t1, t1, w, p256_mod); + sp_256_mont_tpl_4(a, t1, p256_mod); + /* B = X*Y^2 */ + sp_256_mont_sqr_avx2_4(t1, y, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(b, t1, x, p256_mod, p256_mp_mod); + /* X = A^2 - 2B */ + sp_256_mont_sqr_avx2_4(x, a, p256_mod, p256_mp_mod); + sp_256_mont_dbl_4(t2, b, p256_mod); + sp_256_mont_sub_4(x, x, t2, p256_mod); + /* Z = Z*Y */ + sp_256_mont_mul_avx2_4(z, z, y, p256_mod, p256_mp_mod); + /* t2 = Y^4 */ + sp_256_mont_sqr_avx2_4(t1, t1, p256_mod, p256_mp_mod); +#ifdef WOLFSSL_SP_SMALL + if (n != 0) +#endif + { + /* W = W*Y^4 */ + sp_256_mont_mul_avx2_4(w, w, t1, p256_mod, p256_mp_mod); + } + /* y = 2*A*(B - X) - Y^4 */ + sp_256_mont_sub_4(y, b, x, p256_mod); + sp_256_mont_mul_avx2_4(y, y, a, p256_mod, p256_mp_mod); + sp_256_mont_dbl_4(y, y, p256_mod); + sp_256_mont_sub_4(y, y, t1, p256_mod); + } +#ifndef WOLFSSL_SP_SMALL + /* A = 3*(X^2 - W) */ + sp_256_mont_sqr_avx2_4(t1, x, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(t1, t1, w, p256_mod); + sp_256_mont_tpl_4(a, t1, p256_mod); + /* B = X*Y^2 */ + sp_256_mont_sqr_avx2_4(t1, y, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(b, t1, x, p256_mod, p256_mp_mod); + /* X = A^2 - 2B */ + sp_256_mont_sqr_avx2_4(x, a, p256_mod, p256_mp_mod); + sp_256_mont_dbl_4(t2, b, p256_mod); + sp_256_mont_sub_4(x, x, t2, p256_mod); + /* Z = Z*Y */ + sp_256_mont_mul_avx2_4(z, z, y, p256_mod, p256_mp_mod); + /* t2 = Y^4 */ + sp_256_mont_sqr_avx2_4(t1, t1, p256_mod, p256_mp_mod); + /* y = 2*A*(B - X) - Y^4 */ + sp_256_mont_sub_4(y, b, x, p256_mod); + sp_256_mont_mul_avx2_4(y, y, a, p256_mod, p256_mp_mod); + sp_256_mont_dbl_4(y, y, p256_mod); + sp_256_mont_sub_4(y, y, t1, p256_mod); +#endif + /* Y = Y/2 */ + sp_256_div2_4(y, y, p256_mod); +} + +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_256_proj_point_add_avx2_4(sp_point_256* r, const sp_point_256* p, const sp_point_256* q, + sp_digit* t) +{ + const sp_point_256* ap[2]; + sp_point_256* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*4; + sp_digit* t3 = t + 4*4; + sp_digit* t4 = t + 6*4; + sp_digit* t5 = t + 8*4; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* Ensure only the first point is the same as the result. */ + if (q == r) { + const sp_point_256* a = p; + p = q; + q = a; + } + + /* Check double */ + (void)sp_256_sub_4(t1, p256_mod, q->y); + sp_256_norm_4(t1); + if ((sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) & + (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, t1))) != 0) { + sp_256_proj_point_dbl_4(r, p, t); + } + else { + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point_256)); + x = rp[p->infinity | q->infinity]->x; + y = rp[p->infinity | q->infinity]->y; + z = rp[p->infinity | q->infinity]->z; + + ap[0] = p; + ap[1] = q; + for (i=0; i<4; i++) { + r->x[i] = ap[p->infinity]->x[i]; + } + for (i=0; i<4; i++) { + r->y[i] = ap[p->infinity]->y[i]; + } + for (i=0; i<4; i++) { + r->z[i] = ap[p->infinity]->z[i]; + } + r->infinity = ap[p->infinity]->infinity; + + /* U1 = X1*Z2^2 */ + sp_256_mont_sqr_avx2_4(t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(t3, t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(t1, t1, x, p256_mod, p256_mp_mod); + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_avx2_4(t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(t4, t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_avx2_4(t3, t3, y, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_avx2_4(t4, t4, q->y, p256_mod, p256_mp_mod); + /* H = U2 - U1 */ + sp_256_mont_sub_4(t2, t2, t1, p256_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_4(t4, t4, t3, p256_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_avx2_4(z, z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(z, z, t2, p256_mod, p256_mp_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_avx2_4(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sqr_avx2_4(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(t5, t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(x, x, t5, p256_mod); + sp_256_mont_dbl_4(t1, y, p256_mod); + sp_256_mont_sub_4(x, x, t1, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_4(y, y, x, p256_mod); + sp_256_mont_mul_avx2_4(y, y, t4, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(t5, t5, t3, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(y, y, t5, p256_mod); + } +} + +/* Double the Montgomery form projective point p a number of times. + * + * r Result of repeated doubling of point. + * p Point to double. + * n Number of times to double + * t Temporary ordinate data. + */ +static void sp_256_proj_point_dbl_n_store_avx2_4(sp_point_256* r, const sp_point_256* p, + int n, int m, sp_digit* t) +{ + sp_digit* w = t; + sp_digit* a = t + 2*4; + sp_digit* b = t + 4*4; + sp_digit* t1 = t + 6*4; + sp_digit* t2 = t + 8*4; + sp_digit* x = r[2*m].x; + sp_digit* y = r[(1<x[i]; + } + for (i=0; i<4; i++) { + y[i] = p->y[i]; + } + for (i=0; i<4; i++) { + z[i] = p->z[i]; + } + + /* Y = 2*Y */ + sp_256_mont_dbl_4(y, y, p256_mod); + /* W = Z^4 */ + sp_256_mont_sqr_avx2_4(w, z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_avx2_4(w, w, p256_mod, p256_mp_mod); + for (i=1; i<=n; i++) { + /* A = 3*(X^2 - W) */ + sp_256_mont_sqr_avx2_4(t1, x, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(t1, t1, w, p256_mod); + sp_256_mont_tpl_4(a, t1, p256_mod); + /* B = X*Y^2 */ + sp_256_mont_sqr_avx2_4(t2, y, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(b, t2, x, p256_mod, p256_mp_mod); + x = r[(1<x; + sp_digit* y = ra->y; + sp_digit* z = ra->z; + sp_digit* xs = rs->x; + sp_digit* ys = rs->y; + sp_digit* zs = rs->z; + + + XMEMCPY(x, p->x, sizeof(p->x) / 2); + XMEMCPY(y, p->y, sizeof(p->y) / 2); + XMEMCPY(z, p->z, sizeof(p->z) / 2); + ra->infinity = 0; + rs->infinity = 0; + + /* U1 = X1*Z2^2 */ + sp_256_mont_sqr_avx2_4(t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(t3, t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(t1, t1, x, p256_mod, p256_mp_mod); + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_avx2_4(t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(t4, t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_avx2_4(t3, t3, y, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_avx2_4(t4, t4, q->y, p256_mod, p256_mp_mod); + /* H = U2 - U1 */ + sp_256_mont_sub_4(t2, t2, t1, p256_mod); + /* RS = S2 + S1 */ + sp_256_mont_add_4(t6, t4, t3, p256_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_4(t4, t4, t3, p256_mod); + /* Z3 = H*Z1*Z2 */ + /* ZS = H*Z1*Z2 */ + sp_256_mont_mul_avx2_4(z, z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(z, z, t2, p256_mod, p256_mp_mod); + XMEMCPY(zs, z, sizeof(p->z)/2); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + /* XS = RS^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_avx2_4(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sqr_avx2_4(xs, t6, p256_mod, p256_mp_mod); + sp_256_mont_sqr_avx2_4(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(t5, t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(x, x, t5, p256_mod); + sp_256_mont_sub_4(xs, xs, t5, p256_mod); + sp_256_mont_dbl_4(t1, y, p256_mod); + sp_256_mont_sub_4(x, x, t1, p256_mod); + sp_256_mont_sub_4(xs, xs, t1, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + /* YS = -RS*(U1*H^2 - XS) - S1*H^3 */ + sp_256_mont_sub_4(ys, y, xs, p256_mod); + sp_256_mont_sub_4(y, y, x, p256_mod); + sp_256_mont_mul_avx2_4(y, y, t4, p256_mod, p256_mp_mod); + sp_256_sub_4(t6, p256_mod, t6); + sp_256_mont_mul_avx2_4(ys, ys, t6, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(t5, t5, t3, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(y, y, t5, p256_mod); + sp_256_mont_sub_4(ys, ys, t5, p256_mod); +} + +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_win_add_sub_avx2_4(sp_point_256* r, const sp_point_256* g, + const sp_digit* k, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 td[33]; + sp_point_256 rtd, pd; + sp_digit tmpd[2 * 4 * 6]; +#endif + sp_point_256* t; + sp_point_256* rt; + sp_point_256* p = NULL; + sp_digit* tmp; + sp_digit* negy; + int i; + ecc_recode_256 v[43]; + int err; + + (void)heap; + + err = sp_256_point_new_4(heap, rtd, rt); + if (err == MP_OKAY) + err = sp_256_point_new_4(heap, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 33, heap, DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 6, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; +#else + t = td; + tmp = tmpd; +#endif + + + if (err == MP_OKAY) { + /* t[0] = {0, 0, 1} * norm */ + XMEMSET(&t[0], 0, sizeof(t[0])); + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + err = sp_256_mod_mul_norm_4(t[1].x, g->x, p256_mod); + } + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_4(t[1].y, g->y, p256_mod); + } + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_4(t[1].z, g->z, p256_mod); + } + + if (err == MP_OKAY) { + t[1].infinity = 0; + /* t[2] ... t[32] */ + sp_256_proj_point_dbl_n_store_avx2_4(t, &t[ 1], 5, 1, tmp); + sp_256_proj_point_add_avx2_4(&t[ 3], &t[ 2], &t[ 1], tmp); + sp_256_proj_point_dbl_avx2_4(&t[ 6], &t[ 3], tmp); + sp_256_proj_point_add_sub_avx2_4(&t[ 7], &t[ 5], &t[ 6], &t[ 1], tmp); + sp_256_proj_point_dbl_avx2_4(&t[10], &t[ 5], tmp); + sp_256_proj_point_add_sub_avx2_4(&t[11], &t[ 9], &t[10], &t[ 1], tmp); + sp_256_proj_point_dbl_avx2_4(&t[12], &t[ 6], tmp); + sp_256_proj_point_dbl_avx2_4(&t[14], &t[ 7], tmp); + sp_256_proj_point_add_sub_avx2_4(&t[15], &t[13], &t[14], &t[ 1], tmp); + sp_256_proj_point_dbl_avx2_4(&t[18], &t[ 9], tmp); + sp_256_proj_point_add_sub_avx2_4(&t[19], &t[17], &t[18], &t[ 1], tmp); + sp_256_proj_point_dbl_avx2_4(&t[20], &t[10], tmp); + sp_256_proj_point_dbl_avx2_4(&t[22], &t[11], tmp); + sp_256_proj_point_add_sub_avx2_4(&t[23], &t[21], &t[22], &t[ 1], tmp); + sp_256_proj_point_dbl_avx2_4(&t[24], &t[12], tmp); + sp_256_proj_point_dbl_avx2_4(&t[26], &t[13], tmp); + sp_256_proj_point_add_sub_avx2_4(&t[27], &t[25], &t[26], &t[ 1], tmp); + sp_256_proj_point_dbl_avx2_4(&t[28], &t[14], tmp); + sp_256_proj_point_dbl_avx2_4(&t[30], &t[15], tmp); + sp_256_proj_point_add_sub_avx2_4(&t[31], &t[29], &t[30], &t[ 1], tmp); + + negy = t[0].y; + + sp_256_ecc_recode_6_4(k, v); + + i = 42; + XMEMCPY(rt, &t[v[i].i], sizeof(sp_point_256)); + for (--i; i>=0; i--) { + sp_256_proj_point_dbl_n_avx2_4(rt, 6, tmp); + + XMEMCPY(p, &t[v[i].i], sizeof(sp_point_256)); + sp_256_sub_4(negy, p256_mod, p->y); + sp_256_cond_copy_4(p->y, negy, (sp_digit)0 - v[i].neg); + sp_256_proj_point_add_avx2_4(rt, rt, p, tmp); + } + + if (map != 0) { + sp_256_map_avx2_4(r, rt, tmp); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_256)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) + XFREE(t, heap, DYNAMIC_TYPE_ECC); + if (tmp != NULL) + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); +#endif + sp_256_point_free_4(p, 0, heap); + sp_256_point_free_4(rt, 0, heap); + + return err; +} + +#endif /* HAVE_INTEL_AVX2 */ +/* A table entry for pre-computed points. */ +typedef struct sp_table_entry_256 { + sp_digit x[4]; + sp_digit y[4]; +} sp_table_entry_256; + +#if defined(FP_ECC) || defined(WOLFSSL_SP_SMALL) +#endif /* FP_ECC || WOLFSSL_SP_SMALL */ +/* Add two Montgomery form projective points. The second point has a q value of + * one. + * Only the first point can be the same pointer as the result point. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_256_proj_point_add_qz1_4(sp_point_256* r, const sp_point_256* p, + const sp_point_256* q, sp_digit* t) +{ + const sp_point_256* ap[2]; + sp_point_256* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*4; + sp_digit* t3 = t + 4*4; + sp_digit* t4 = t + 6*4; + sp_digit* t5 = t + 8*4; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* Check double */ + (void)sp_256_sub_4(t1, p256_mod, q->y); + sp_256_norm_4(t1); + if ((sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) & + (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, t1))) != 0) { + sp_256_proj_point_dbl_4(r, p, t); + } + else { + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point_256)); + x = rp[p->infinity | q->infinity]->x; + y = rp[p->infinity | q->infinity]->y; + z = rp[p->infinity | q->infinity]->z; + + ap[0] = p; + ap[1] = q; + for (i=0; i<4; i++) { + r->x[i] = ap[p->infinity]->x[i]; + } + for (i=0; i<4; i++) { + r->y[i] = ap[p->infinity]->y[i]; + } + for (i=0; i<4; i++) { + r->z[i] = ap[p->infinity]->z[i]; + } + r->infinity = ap[p->infinity]->infinity; + + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_4(t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t4, t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_4(t4, t4, q->y, p256_mod, p256_mp_mod); + /* H = U2 - X1 */ + sp_256_mont_sub_4(t2, t2, x, p256_mod); + /* R = S2 - Y1 */ + sp_256_mont_sub_4(t4, t4, y, p256_mod); + /* Z3 = H*Z1 */ + sp_256_mont_mul_4(z, z, t2, p256_mod, p256_mp_mod); + /* X3 = R^2 - H^3 - 2*X1*H^2 */ + sp_256_mont_sqr_4(t1, t4, p256_mod, p256_mp_mod); + sp_256_mont_sqr_4(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t3, x, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t5, t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(x, t1, t5, p256_mod); + sp_256_mont_dbl_4(t1, t3, p256_mod); + sp_256_mont_sub_4(x, x, t1, p256_mod); + /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */ + sp_256_mont_sub_4(t3, t3, x, p256_mod); + sp_256_mont_mul_4(t3, t3, t4, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t5, t5, y, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(y, t3, t5, p256_mod); + } +} + +#ifdef FP_ECC +/* Convert the projective point to affine. + * Ordinates are in Montgomery form. + * + * a Point to convert. + * t Temporary data. + */ +static void sp_256_proj_to_affine_4(sp_point_256* a, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2 * 4; + sp_digit* tmp = t + 4 * 4; + + sp_256_mont_inv_4(t1, a->z, tmp); + + sp_256_mont_sqr_4(t2, t1, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t1, t2, t1, p256_mod, p256_mp_mod); + + sp_256_mont_mul_4(a->x, a->x, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(a->y, a->y, t1, p256_mod, p256_mp_mod); + XMEMCPY(a->z, p256_norm_mod, sizeof(p256_norm_mod)); +} + +/* Generate the pre-computed table of points for the base point. + * + * a The base point. + * table Place to store generated point data. + * tmp Temporary data. + * heap Heap to use for allocation. + */ +static int sp_256_gen_stripe_table_4(const sp_point_256* a, + sp_table_entry_256* table, sp_digit* tmp, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 td, s1d, s2d; +#endif + sp_point_256* t; + sp_point_256* s1 = NULL; + sp_point_256* s2 = NULL; + int i, j; + int err; + + (void)heap; + + err = sp_256_point_new_4(heap, td, t); + if (err == MP_OKAY) { + err = sp_256_point_new_4(heap, s1d, s1); + } + if (err == MP_OKAY) { + err = sp_256_point_new_4(heap, s2d, s2); + } + + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_4(t->x, a->x, p256_mod); + } + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_4(t->y, a->y, p256_mod); + } + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_4(t->z, a->z, p256_mod); + } + if (err == MP_OKAY) { + t->infinity = 0; + sp_256_proj_to_affine_4(t, tmp); + + XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod)); + s1->infinity = 0; + XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod)); + s2->infinity = 0; + + /* table[0] = {0, 0, infinity} */ + XMEMSET(&table[0], 0, sizeof(sp_table_entry_256)); + /* table[1] = Affine version of 'a' in Montgomery form */ + XMEMCPY(table[1].x, t->x, sizeof(table->x)); + XMEMCPY(table[1].y, t->y, sizeof(table->y)); + + for (i=1; i<8; i++) { + sp_256_proj_point_dbl_n_4(t, 32, tmp); + sp_256_proj_to_affine_4(t, tmp); + XMEMCPY(table[1<x, sizeof(table->x)); + XMEMCPY(table[1<y, sizeof(table->y)); + } + + for (i=1; i<8; i++) { + XMEMCPY(s1->x, table[1<x)); + XMEMCPY(s1->y, table[1<y)); + for (j=(1<x, table[j-(1<x)); + XMEMCPY(s2->y, table[j-(1<y)); + sp_256_proj_point_add_qz1_4(t, s1, s2, tmp); + sp_256_proj_to_affine_4(t, tmp); + XMEMCPY(table[j].x, t->x, sizeof(table->x)); + XMEMCPY(table[j].y, t->y, sizeof(table->y)); + } + } + } + + sp_256_point_free_4(s2, 0, heap); + sp_256_point_free_4(s1, 0, heap); + sp_256_point_free_4( t, 0, heap); + + return err; +} + +#endif /* FP_ECC */ +#if defined(FP_ECC) || defined(WOLFSSL_SP_SMALL) +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_stripe_4(sp_point_256* r, const sp_point_256* g, + const sp_table_entry_256* table, const sp_digit* k, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 rtd; + sp_point_256 pd; + sp_digit td[2 * 4 * 5]; +#endif + sp_point_256* rt; + sp_point_256* p = NULL; + sp_digit* t; + int i, j; + int y, x; + int err; + + (void)g; + (void)heap; + + + err = sp_256_point_new_4(heap, rtd, rt); + if (err == MP_OKAY) { + err = sp_256_point_new_4(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 5, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) { + err = MEMORY_E; + } +#else + t = td; +#endif + + if (err == MP_OKAY) { + XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod)); + XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod)); + + y = 0; + for (j=0,x=31; j<8; j++,x+=32) { + y |= ((k[x / 64] >> (x % 64)) & 1) << j; + } + XMEMCPY(rt->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(rt->y, table[y].y, sizeof(table[y].y)); + rt->infinity = !y; + for (i=30; i>=0; i--) { + y = 0; + for (j=0,x=i; j<8; j++,x+=32) { + y |= ((k[x / 64] >> (x % 64)) & 1) << j; + } + + sp_256_proj_point_dbl_4(rt, rt, t); + XMEMCPY(p->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(p->y, table[y].y, sizeof(table[y].y)); + p->infinity = !y; + sp_256_proj_point_add_qz1_4(rt, rt, p, t); + } + + if (map != 0) { + sp_256_map_4(r, rt, t); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_256)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_4(p, 0, heap); + sp_256_point_free_4(rt, 0, heap); + + return err; +} + +#endif /* FP_ECC || WOLFSSL_SP_SMALL */ +#ifdef FP_ECC +#ifndef FP_ENTRIES + #define FP_ENTRIES 16 +#endif + +typedef struct sp_cache_256_t { + sp_digit x[4]; + sp_digit y[4]; + sp_table_entry_256 table[256]; + uint32_t cnt; + int set; +} sp_cache_256_t; + +static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES]; +static THREAD_LS_T int sp_cache_256_last = -1; +static THREAD_LS_T int sp_cache_256_inited = 0; + +#ifndef HAVE_THREAD_LS + static volatile int initCacheMutex_256 = 0; + static wolfSSL_Mutex sp_cache_256_lock; +#endif + +static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache) +{ + int i, j; + uint32_t least; + + if (sp_cache_256_inited == 0) { + for (i=0; ix, sp_cache_256[i].x) & + sp_256_cmp_equal_4(g->y, sp_cache_256[i].y)) { + sp_cache_256[i].cnt++; + break; + } + } + + /* No match. */ + if (i == FP_ENTRIES) { + /* Find empty entry. */ + i = (sp_cache_256_last + 1) % FP_ENTRIES; + for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) { + if (!sp_cache_256[i].set) { + break; + } + } + + /* Evict least used. */ + if (i == sp_cache_256_last) { + least = sp_cache_256[0].cnt; + for (j=1; jx, sizeof(sp_cache_256[i].x)); + XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y)); + sp_cache_256[i].set = 1; + sp_cache_256[i].cnt = 1; + } + + *cache = &sp_cache_256[i]; + sp_cache_256_last = i; +} +#endif /* FP_ECC */ + +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_4(sp_point_256* r, const sp_point_256* g, const sp_digit* k, + int map, void* heap) +{ +#ifndef FP_ECC + return sp_256_ecc_mulmod_win_add_sub_4(r, g, k, map, heap); +#else + sp_digit tmp[2 * 4 * 5]; + sp_cache_256_t* cache; + int err = MP_OKAY; + +#ifndef HAVE_THREAD_LS + if (initCacheMutex_256 == 0) { + wc_InitMutex(&sp_cache_256_lock); + initCacheMutex_256 = 1; + } + if (wc_LockMutex(&sp_cache_256_lock) != 0) + err = BAD_MUTEX_E; +#endif /* HAVE_THREAD_LS */ + + if (err == MP_OKAY) { + sp_ecc_get_cache_256(g, &cache); + if (cache->cnt == 2) + sp_256_gen_stripe_table_4(g, cache->table, tmp, heap); + +#ifndef HAVE_THREAD_LS + wc_UnLockMutex(&sp_cache_256_lock); +#endif /* HAVE_THREAD_LS */ + + if (cache->cnt < 2) { + err = sp_256_ecc_mulmod_win_add_sub_4(r, g, k, map, heap); + } + else { + err = sp_256_ecc_mulmod_stripe_4(r, g, cache->table, k, + map, heap); + } + } + + return err; +#endif +} + +#ifdef HAVE_INTEL_AVX2 +#if defined(FP_ECC) || defined(WOLFSSL_SP_SMALL) +#endif /* FP_ECC || WOLFSSL_SP_SMALL */ +/* Add two Montgomery form projective points. The second point has a q value of + * one. + * Only the first point can be the same pointer as the result point. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_256_proj_point_add_qz1_avx2_4(sp_point_256* r, const sp_point_256* p, + const sp_point_256* q, sp_digit* t) +{ + const sp_point_256* ap[2]; + sp_point_256* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*4; + sp_digit* t3 = t + 4*4; + sp_digit* t4 = t + 6*4; + sp_digit* t5 = t + 8*4; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* Check double */ + (void)sp_256_sub_4(t1, p256_mod, q->y); + sp_256_norm_4(t1); + if ((sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) & + (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, t1))) != 0) { + sp_256_proj_point_dbl_4(r, p, t); + } + else { + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point_256)); + x = rp[p->infinity | q->infinity]->x; + y = rp[p->infinity | q->infinity]->y; + z = rp[p->infinity | q->infinity]->z; + + ap[0] = p; + ap[1] = q; + for (i=0; i<4; i++) { + r->x[i] = ap[p->infinity]->x[i]; + } + for (i=0; i<4; i++) { + r->y[i] = ap[p->infinity]->y[i]; + } + for (i=0; i<4; i++) { + r->z[i] = ap[p->infinity]->z[i]; + } + r->infinity = ap[p->infinity]->infinity; + + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_avx2_4(t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(t4, t2, z, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_avx2_4(t4, t4, q->y, p256_mod, p256_mp_mod); + /* H = U2 - X1 */ + sp_256_mont_sub_4(t2, t2, x, p256_mod); + /* R = S2 - Y1 */ + sp_256_mont_sub_4(t4, t4, y, p256_mod); + /* Z3 = H*Z1 */ + sp_256_mont_mul_avx2_4(z, z, t2, p256_mod, p256_mp_mod); + /* X3 = R^2 - H^3 - 2*X1*H^2 */ + sp_256_mont_sqr_avx2_4(t1, t4, p256_mod, p256_mp_mod); + sp_256_mont_sqr_avx2_4(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(t3, x, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(t5, t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(x, t1, t5, p256_mod); + sp_256_mont_dbl_4(t1, t3, p256_mod); + sp_256_mont_sub_4(x, x, t1, p256_mod); + /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */ + sp_256_mont_sub_4(t3, t3, x, p256_mod); + sp_256_mont_mul_avx2_4(t3, t3, t4, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(t5, t5, y, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(y, t3, t5, p256_mod); + } +} + +#ifdef FP_ECC +/* Convert the projective point to affine. + * Ordinates are in Montgomery form. + * + * a Point to convert. + * t Temporary data. + */ +static void sp_256_proj_to_affine_avx2_4(sp_point_256* a, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2 * 4; + sp_digit* tmp = t + 4 * 4; + + sp_256_mont_inv_avx2_4(t1, a->z, tmp); + + sp_256_mont_sqr_avx2_4(t2, t1, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(t1, t2, t1, p256_mod, p256_mp_mod); + + sp_256_mont_mul_avx2_4(a->x, a->x, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(a->y, a->y, t1, p256_mod, p256_mp_mod); + XMEMCPY(a->z, p256_norm_mod, sizeof(p256_norm_mod)); +} + +/* Generate the pre-computed table of points for the base point. + * + * a The base point. + * table Place to store generated point data. + * tmp Temporary data. + * heap Heap to use for allocation. + */ +static int sp_256_gen_stripe_table_avx2_4(const sp_point_256* a, + sp_table_entry_256* table, sp_digit* tmp, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 td, s1d, s2d; +#endif + sp_point_256* t; + sp_point_256* s1 = NULL; + sp_point_256* s2 = NULL; + int i, j; + int err; + + (void)heap; + + err = sp_256_point_new_4(heap, td, t); + if (err == MP_OKAY) { + err = sp_256_point_new_4(heap, s1d, s1); + } + if (err == MP_OKAY) { + err = sp_256_point_new_4(heap, s2d, s2); + } + + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_4(t->x, a->x, p256_mod); + } + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_4(t->y, a->y, p256_mod); + } + if (err == MP_OKAY) { + err = sp_256_mod_mul_norm_4(t->z, a->z, p256_mod); + } + if (err == MP_OKAY) { + t->infinity = 0; + sp_256_proj_to_affine_avx2_4(t, tmp); + + XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod)); + s1->infinity = 0; + XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod)); + s2->infinity = 0; + + /* table[0] = {0, 0, infinity} */ + XMEMSET(&table[0], 0, sizeof(sp_table_entry_256)); + /* table[1] = Affine version of 'a' in Montgomery form */ + XMEMCPY(table[1].x, t->x, sizeof(table->x)); + XMEMCPY(table[1].y, t->y, sizeof(table->y)); + + for (i=1; i<8; i++) { + sp_256_proj_point_dbl_n_avx2_4(t, 32, tmp); + sp_256_proj_to_affine_avx2_4(t, tmp); + XMEMCPY(table[1<x, sizeof(table->x)); + XMEMCPY(table[1<y, sizeof(table->y)); + } + + for (i=1; i<8; i++) { + XMEMCPY(s1->x, table[1<x)); + XMEMCPY(s1->y, table[1<y)); + for (j=(1<x, table[j-(1<x)); + XMEMCPY(s2->y, table[j-(1<y)); + sp_256_proj_point_add_qz1_avx2_4(t, s1, s2, tmp); + sp_256_proj_to_affine_avx2_4(t, tmp); + XMEMCPY(table[j].x, t->x, sizeof(table->x)); + XMEMCPY(table[j].y, t->y, sizeof(table->y)); + } + } + } + + sp_256_point_free_4(s2, 0, heap); + sp_256_point_free_4(s1, 0, heap); + sp_256_point_free_4( t, 0, heap); + + return err; +} + +#endif /* FP_ECC */ +#if defined(FP_ECC) || defined(WOLFSSL_SP_SMALL) +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_stripe_avx2_4(sp_point_256* r, const sp_point_256* g, + const sp_table_entry_256* table, const sp_digit* k, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 rtd; + sp_point_256 pd; + sp_digit td[2 * 4 * 5]; +#endif + sp_point_256* rt; + sp_point_256* p = NULL; + sp_digit* t; + int i, j; + int y, x; + int err; + + (void)g; + (void)heap; + + + err = sp_256_point_new_4(heap, rtd, rt); + if (err == MP_OKAY) { + err = sp_256_point_new_4(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 5, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) { + err = MEMORY_E; + } +#else + t = td; +#endif + + if (err == MP_OKAY) { + XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod)); + XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod)); + + y = 0; + for (j=0,x=31; j<8; j++,x+=32) { + y |= ((k[x / 64] >> (x % 64)) & 1) << j; + } + XMEMCPY(rt->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(rt->y, table[y].y, sizeof(table[y].y)); + rt->infinity = !y; + for (i=30; i>=0; i--) { + y = 0; + for (j=0,x=i; j<8; j++,x+=32) { + y |= ((k[x / 64] >> (x % 64)) & 1) << j; + } + + sp_256_proj_point_dbl_avx2_4(rt, rt, t); + XMEMCPY(p->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(p->y, table[y].y, sizeof(table[y].y)); + p->infinity = !y; + sp_256_proj_point_add_qz1_avx2_4(rt, rt, p, t); + } + + if (map != 0) { + sp_256_map_avx2_4(r, rt, t); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_256)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_4(p, 0, heap); + sp_256_point_free_4(rt, 0, heap); + + return err; +} + +#endif /* FP_ECC || WOLFSSL_SP_SMALL */ +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_avx2_4(sp_point_256* r, const sp_point_256* g, const sp_digit* k, + int map, void* heap) +{ +#ifndef FP_ECC + return sp_256_ecc_mulmod_win_add_sub_avx2_4(r, g, k, map, heap); +#else + sp_digit tmp[2 * 4 * 5]; + sp_cache_256_t* cache; + int err = MP_OKAY; + +#ifndef HAVE_THREAD_LS + if (initCacheMutex_256 == 0) { + wc_InitMutex(&sp_cache_256_lock); + initCacheMutex_256 = 1; + } + if (wc_LockMutex(&sp_cache_256_lock) != 0) + err = BAD_MUTEX_E; +#endif /* HAVE_THREAD_LS */ + + if (err == MP_OKAY) { + sp_ecc_get_cache_256(g, &cache); + if (cache->cnt == 2) + sp_256_gen_stripe_table_avx2_4(g, cache->table, tmp, heap); + +#ifndef HAVE_THREAD_LS + wc_UnLockMutex(&sp_cache_256_lock); +#endif /* HAVE_THREAD_LS */ + + if (cache->cnt < 2) { + err = sp_256_ecc_mulmod_win_add_sub_avx2_4(r, g, k, map, heap); + } + else { + err = sp_256_ecc_mulmod_stripe_avx2_4(r, g, cache->table, k, + map, heap); + } + } + + return err; +#endif +} + +#endif /* HAVE_INTEL_AVX2 */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * p Point to multiply. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map, + void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 p; + sp_digit kd[4]; +#endif + sp_point_256* point; + sp_digit* k = NULL; + int err = MP_OKAY; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + err = sp_256_point_new_4(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#else + k = kd; +#endif + if (err == MP_OKAY) { + sp_256_from_mp(k, 4, km); + sp_256_point_from_ecc_point_4(point, gm); + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_256_ecc_mulmod_avx2_4(point, point, k, map, heap); + else +#endif + err = sp_256_ecc_mulmod_4(point, point, k, map, heap); + } + if (err == MP_OKAY) { + err = sp_256_point_to_ecc_point_4(point, r); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_4(point, 0, heap); + + return err; +} + +#ifdef WOLFSSL_SP_SMALL +static const sp_table_entry_256 p256_table[256] = { + /* 0 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 */ + { { 0x79e730d418a9143cL,0x75ba95fc5fedb601L,0x79fb732b77622510L, + 0x18905f76a53755c6L }, + { 0xddf25357ce95560aL,0x8b4ab8e4ba19e45cL,0xd2e88688dd21f325L, + 0x8571ff1825885d85L } }, + /* 2 */ + { { 0x202886024147519aL,0xd0981eac26b372f0L,0xa9d4a7caa785ebc8L, + 0xd953c50ddbdf58e9L }, + { 0x9d6361ccfd590f8fL,0x72e9626b44e6c917L,0x7fd9611022eb64cfL, + 0x863ebb7e9eb288f3L } }, + /* 3 */ + { { 0x7856b6235cdb6485L,0x808f0ea22f0a2f97L,0x3e68d9544f7e300bL, + 0x00076055b5ff80a0L }, + { 0x7634eb9b838d2010L,0x54014fbb3243708aL,0xe0e47d39842a6606L, + 0x8308776134373ee0L } }, + /* 4 */ + { { 0x4f922fc516a0d2bbL,0x0d5cc16c1a623499L,0x9241cf3a57c62c8bL, + 0x2f5e6961fd1b667fL }, + { 0x5c15c70bf5a01797L,0x3d20b44d60956192L,0x04911b37071fdb52L, + 0xf648f9168d6f0f7bL } }, + /* 5 */ + { { 0x9e566847e137bbbcL,0xe434469e8a6a0becL,0xb1c4276179d73463L, + 0x5abe0285133d0015L }, + { 0x92aa837cc04c7dabL,0x573d9f4c43260c07L,0x0c93156278e6cc37L, + 0x94bb725b6b6f7383L } }, + /* 6 */ + { { 0xbbf9b48f720f141cL,0x6199b3cd2df5bc74L,0xdc3f6129411045c4L, + 0xcdd6bbcb2f7dc4efL }, + { 0xcca6700beaf436fdL,0x6f647f6db99326beL,0x0c0fa792014f2522L, + 0xa361bebd4bdae5f6L } }, + /* 7 */ + { { 0x28aa2558597c13c7L,0xc38d635f50b7c3e1L,0x07039aecf3c09d1dL, + 0xba12ca09c4b5292cL }, + { 0x9e408fa459f91dfdL,0x3af43b66ceea07fbL,0x1eceb0899d780b29L, + 0x53ebb99d701fef4bL } }, + /* 8 */ + { { 0x4fe7ee31b0e63d34L,0xf4600572a9e54fabL,0xc0493334d5e7b5a4L, + 0x8589fb9206d54831L }, + { 0xaa70f5cc6583553aL,0x0879094ae25649e5L,0xcc90450710044652L, + 0xebb0696d02541c4fL } }, + /* 9 */ + { { 0x4616ca15ac1647c5L,0xb8127d47c4cf5799L,0xdc666aa3764dfbacL, + 0xeb2820cbd1b27da3L }, + { 0x9406f8d86a87e008L,0xd87dfa9d922378f3L,0x56ed2e4280ccecb2L, + 0x1f28289b55a7da1dL } }, + /* 10 */ + { { 0xabbaa0c03b89da99L,0xa6f2d79eb8284022L,0x27847862b81c05e8L, + 0x337a4b5905e54d63L }, + { 0x3c67500d21f7794aL,0x207005b77d6d7f61L,0x0a5a378104cfd6e8L, + 0x0d65e0d5f4c2fbd6L } }, + /* 11 */ + { { 0xd9d09bbeb5275d38L,0x4268a7450be0a358L,0xf0762ff4973eb265L, + 0xc23da24252f4a232L }, + { 0x5da1b84f0b94520cL,0x09666763b05bd78eL,0x3a4dcb8694d29ea1L, + 0x19de3b8cc790cff1L } }, + /* 12 */ + { { 0x183a716c26c5fe04L,0x3b28de0b3bba1bdbL,0x7432c586a4cb712cL, + 0xe34dcbd491fccbfdL }, + { 0xb408d46baaa58403L,0x9a69748682e97a53L,0x9e39012736aaa8afL, + 0xe7641f447b4e0f7fL } }, + /* 13 */ + { { 0x7d753941df64ba59L,0xd33f10ec0b0242fcL,0x4f06dfc6a1581859L, + 0x4a12df57052a57bfL }, + { 0xbfa6338f9439dbd0L,0xd3c24bd4bde53e1fL,0xfd5e4ffa21f1b314L, + 0x6af5aa93bb5bea46L } }, + /* 14 */ + { { 0xda10b69910c91999L,0x0a24b4402a580491L,0x3e0094b4b8cc2090L, + 0x5fe3475a66a44013L }, + { 0xb0f8cabdf93e7b4bL,0x292b501a7c23f91aL,0x42e889aecd1e6263L, + 0xb544e308ecfea916L } }, + /* 15 */ + { { 0x6478c6e916ddfdceL,0x2c329166f89179e6L,0x4e8d6e764d4e67e1L, + 0xe0b6b2bda6b0c20bL }, + { 0x0d312df2bb7efb57L,0x1aac0dde790c4007L,0xf90336ad679bc944L, + 0x71c023de25a63774L } }, + /* 16 */ + { { 0x62a8c244bfe20925L,0x91c19ac38fdce867L,0x5a96a5d5dd387063L, + 0x61d587d421d324f6L }, + { 0xe87673a2a37173eaL,0x2384800853778b65L,0x10f8441e05bab43eL, + 0xfa11fe124621efbeL } }, + /* 17 */ + { { 0x1c891f2b2cb19ffdL,0x01ba8d5bb1923c23L,0xb6d03d678ac5ca8eL, + 0x586eb04c1f13bedcL }, + { 0x0c35c6e527e8ed09L,0x1e81a33c1819ede2L,0x278fd6c056c652faL, + 0x19d5ac0870864f11L } }, + /* 18 */ + { { 0x1e99f581309a4e1fL,0xab7de71be9270074L,0x26a5ef0befd28d20L, + 0xe7c0073f7f9c563fL }, + { 0x1f6d663a0ef59f76L,0x669b3b5420fcb050L,0xc08c1f7a7a6602d4L, + 0xe08504fec65b3c0aL } }, + /* 19 */ + { { 0xf098f68da031b3caL,0x6d1cab9ee6da6d66L,0x5bfd81fa94f246e8L, + 0x78f018825b0996b4L }, + { 0xb7eefde43a25787fL,0x8016f80d1dccac9bL,0x0cea4877b35bfc36L, + 0x43a773b87e94747aL } }, + /* 20 */ + { { 0x62577734d2b533d5L,0x673b8af6a1bdddc0L,0x577e7c9aa79ec293L, + 0xbb6de651c3b266b1L }, + { 0xe7e9303ab65259b3L,0xd6a0afd3d03a7480L,0xc5ac83d19b3cfc27L, + 0x60b4619a5d18b99bL } }, + /* 21 */ + { { 0xbd6a38e11ae5aa1cL,0xb8b7652b49e73658L,0x0b130014ee5f87edL, + 0x9d0f27b2aeebffcdL }, + { 0xca9246317a730a55L,0x9c955b2fddbbc83aL,0x07c1dfe0ac019a71L, + 0x244a566d356ec48dL } }, + /* 22 */ + { { 0x6db0394aeacf1f96L,0x9f2122a9024c271cL,0x2626ac1b82cbd3b9L, + 0x45e58c873581ef69L }, + { 0xd3ff479da38f9dbcL,0xa8aaf146e888a040L,0x945adfb246e0bed7L, + 0xc040e21cc1e4b7a4L } }, + /* 23 */ + { { 0x847af0006f8117b6L,0x651969ff73a35433L,0x482b35761d9475ebL, + 0x1cdf5c97682c6ec7L }, + { 0x7db775b411f04839L,0x7dbeacf448de1698L,0xb2921dd1b70b3219L, + 0x046755f8a92dff3dL } }, + /* 24 */ + { { 0xcc8ac5d2bce8ffcdL,0x0d53c48b2fe61a82L,0xf6f161727202d6c7L, + 0x046e5e113b83a5f3L }, + { 0xe7b8ff64d8007f01L,0x7fb1ef125af43183L,0x045c5ea635e1a03cL, + 0x6e0106c3303d005bL } }, + /* 25 */ + { { 0x48c7358488dd73b1L,0x7670708f995ed0d9L,0x38385ea8c56a2ab7L, + 0x442594ede901cf1fL }, + { 0xf8faa2c912d4b65bL,0x94c2343b96c90c37L,0xd326e4a15e978d1fL, + 0xa796fa514c2ee68eL } }, + /* 26 */ + { { 0x359fb604823addd7L,0x9e2a6183e56693b3L,0xf885b78e3cbf3c80L, + 0xe4ad2da9c69766e9L }, + { 0x357f7f428e048a61L,0x082d198cc092d9a0L,0xfc3a1af4c03ed8efL, + 0xc5e94046c37b5143L } }, + /* 27 */ + { { 0x476a538c2be75f9eL,0x6fd1a9e8cb123a78L,0xd85e4df0b109c04bL, + 0x63283dafdb464747L }, + { 0xce728cf7baf2df15L,0xe592c4550ad9a7f4L,0xfab226ade834bcc3L, + 0x68bd19ab1981a938L } }, + /* 28 */ + { { 0xc08ead511887d659L,0x3374d5f4b359305aL,0x96986981cfe74fe3L, + 0x495292f53c6fdfd6L }, + { 0x4a878c9e1acec896L,0xd964b210ec5b4484L,0x6696f7e2664d60a7L, + 0x0ec7530d26036837L } }, + /* 29 */ + { { 0x2da13a05ad2687bbL,0xa1f83b6af32e21faL,0x390f5ef51dd4607bL, + 0x0f6207a664863f0bL }, + { 0xbd67e3bb0f138233L,0xdd66b96c272aa718L,0x8ed0040726ec88aeL, + 0xff0db07208ed6dcfL } }, + /* 30 */ + { { 0x749fa1014c95d553L,0xa44052fd5d680a8aL,0x183b4317ff3b566fL, + 0x313b513c88740ea3L }, + { 0xb402e2ac08d11549L,0x071ee10bb4dee21cL,0x26b987dd47f2320eL, + 0x2d3abcf986f19f81L } }, + /* 31 */ + { { 0x4c288501815581a2L,0x9a0a6d56632211afL,0x19ba7a0f0cab2e99L, + 0xc036fa10ded98cdfL }, + { 0x29ae08bac1fbd009L,0x0b68b19006d15816L,0xc2eb32779b9e0d8fL, + 0xa6b2a2c4b6d40194L } }, + /* 32 */ + { { 0xd433e50f6d3549cfL,0x6f33696ffacd665eL,0x695bfdacce11fcb4L, + 0x810ee252af7c9860L }, + { 0x65450fe17159bb2cL,0xf7dfbebe758b357bL,0x2b057e74d69fea72L, + 0xd485717a92731745L } }, + /* 33 */ + { { 0x11741a8af0cb5a98L,0xd3da8f931f3110bfL,0x1994e2cbab382adfL, + 0x6a6045a72f9a604eL }, + { 0x170c0d3fa2b2411dL,0xbe0eb83e510e96e0L,0x3bcc9f738865b3ccL, + 0xd3e45cfaf9e15790L } }, + /* 34 */ + { { 0xce1f69bbe83f7669L,0x09f8ae8272877d6bL,0x9548ae543244278dL, + 0x207755dee3c2c19cL }, + { 0x87bd61d96fef1945L,0x18813cefb12d28c3L,0x9fbcd1d672df64aaL, + 0x48dc5ee57154b00dL } }, + /* 35 */ + { { 0x123790bff7e5a199L,0xe0efb8cf989ccbb7L,0xc27a2bfe0a519c79L, + 0xf2fb0aeddff6f445L }, + { 0x41c09575f0b5025fL,0x550543d740fa9f22L,0x8fa3c8ad380bfbd0L, + 0xa13e9015db28d525L } }, + /* 36 */ + { { 0xf9f7a350a2b65cbcL,0x0b04b9722a464226L,0x265ce241e23f07a1L, + 0x2bf0d6b01497526fL }, + { 0xd3d4dd3f4b216fb7L,0xf7d7b867fbdda26aL,0xaeb7b83f6708505cL, + 0x42a94a5a162fe89fL } }, + /* 37 */ + { { 0x5846ad0beaadf191L,0x0f8a489025a268d7L,0xe8603050494dc1f6L, + 0x2c2dd969c65ede3dL }, + { 0x6d02171d93849c17L,0x460488ba1da250ddL,0x4810c7063c3a5485L, + 0xf437fa1f42c56dbcL } }, + /* 38 */ + { { 0x6aa0d7144a0f7dabL,0x0f0497931776e9acL,0x52c0a050f5f39786L, + 0xaaf45b3354707aa8L }, + { 0x85e37c33c18d364aL,0xd40b9b063e497165L,0xf417168115ec5444L, + 0xcdf6310df4f272bcL } }, + /* 39 */ + { { 0x7473c6238ea8b7efL,0x08e9351885bc2287L,0x419567722bda8e34L, + 0xf0d008bada9e2ff2L }, + { 0x2912671d2414d3b1L,0xb3754985b019ea76L,0x5c61b96d453bcbdbL, + 0x5bd5c2f5ca887b8bL } }, + /* 40 */ + { { 0xef0f469ef49a3154L,0x3e85a5956e2b2e9aL,0x45aaec1eaa924a9cL, + 0xaa12dfc8a09e4719L }, + { 0x26f272274df69f1dL,0xe0e4c82ca2ff5e73L,0xb9d8ce73b7a9dd44L, + 0x6c036e73e48ca901L } }, + /* 41 */ + { { 0x5cfae12a0f6e3138L,0x6966ef0025ad345aL,0x8993c64b45672bc5L, + 0x292ff65896afbe24L }, + { 0xd5250d445e213402L,0xf6580e274392c9feL,0x097b397fda1c72e8L, + 0x644e0c90311b7276L } }, + /* 42 */ + { { 0xe1e421e1a47153f0L,0xb86c3b79920418c9L,0x93bdce87705d7672L, + 0xf25ae793cab79a77L }, + { 0x1f3194a36d869d0cL,0x9d55c8824986c264L,0x49fb5ea3096e945eL, + 0x39b8e65313db0a3eL } }, + /* 43 */ + { { 0x37754200b6fd2e59L,0x35e2c0669255c98fL,0xd9dab21a0e2a5739L, + 0x39122f2f0f19db06L }, + { 0xcfbce1e003cad53cL,0x225b2c0fe65c17e3L,0x72baf1d29aa13877L, + 0x8de80af8ce80ff8dL } }, + /* 44 */ + { { 0xafbea8d9207bbb76L,0x921c7e7c21782758L,0xdfa2b74b1c0436b1L, + 0x871949062e368c04L }, + { 0xb5f928bba3993df5L,0x639d75b5f3b3d26aL,0x011aa78a85b55050L, + 0xfc315e6a5b74fde1L } }, + /* 45 */ + { { 0x561fd41ae8d6ecfaL,0x5f8c44f61aec7f86L,0x98452a7b4924741dL, + 0xe6d4a7adee389088L }, + { 0x60552ed14593c75dL,0x70a70da4dd271162L,0xd2aede937ba2c7dbL, + 0x35dfaf9a9be2ae57L } }, + /* 46 */ + { { 0x6b956fcdaa736636L,0x09f51d97ae2cab7eL,0xfb10bf410f349966L, + 0x1da5c7d71c830d2bL }, + { 0x5c41e4833cce6825L,0x15ad118ff9573c3bL,0xa28552c7f23036b8L, + 0x7077c0fddbf4b9d6L } }, + /* 47 */ + { { 0xbf63ff8d46b9661cL,0xa1dfd36b0d2cfd71L,0x0373e140a847f8f7L, + 0x53a8632ee50efe44L }, + { 0x0976ff68696d8051L,0xdaec0c95c74f468aL,0x62994dc35e4e26bdL, + 0x028ca76d34e1fcc1L } }, + /* 48 */ + { { 0xd11d47dcfc9877eeL,0xc8b36210801d0002L,0xd002c11754c260b6L, + 0x04c17cd86962f046L }, + { 0x6d9bd094b0daddf5L,0xbea2357524ce55c0L,0x663356e672da03b5L, + 0xf7ba4de9fed97474L } }, + /* 49 */ + { { 0xd0dbfa34ebe1263fL,0x5576373571ae7ce6L,0xd244055382a6f523L, + 0xe31f960052131c41L }, + { 0xd1bb9216ea6b6ec6L,0x37a1d12e73c2fc44L,0xc10e7eac89d0a294L, + 0xaa3a6259ce34d47bL } }, + /* 50 */ + { { 0xfbcf9df536f3dcd3L,0x6ceded50d2bf7360L,0x491710fadf504f5bL, + 0x2398dd627e79daeeL }, + { 0xcf4705a36d09569eL,0xea0619bb5149f769L,0xff9c037735f6034cL, + 0x5717f5b21c046210L } }, + /* 51 */ + { { 0x9fe229c921dd895eL,0x8e51850040c28451L,0xfa13d2391d637ecdL, + 0x660a2c560e3c28deL }, + { 0x9cca88aed67fcbd0L,0xc84724780ea9f096L,0x32b2f48172e92b4dL, + 0x624ee54c4f522453L } }, + /* 52 */ + { { 0x09549ce4d897ecccL,0x4d49d1d93f9880aaL,0x723c2423043a7c20L, + 0x4f392afb92bdfbc0L }, + { 0x6969f8fa7de44fd9L,0xb66cfbe457b32156L,0xdb2fa803368ebc3cL, + 0x8a3e7977ccdb399cL } }, + /* 53 */ + { { 0xdde1881f06c4b125L,0xae34e300f6e3ca8cL,0xef6999de5c7a13e9L, + 0x3888d02370c24404L }, + { 0x7628035644f91081L,0x3d9fcf615f015504L,0x1827edc8632cd36eL, + 0xa5e62e4718102336L } }, + /* 54 */ + { { 0x1a825ee32facd6c8L,0x699c635454bcbc66L,0x0ce3edf798df9931L, + 0x2c4768e6466a5adcL }, + { 0xb346ff8c90a64bc9L,0x630a6020e4779f5cL,0xd949d064bc05e884L, + 0x7b5e6441f9e652a0L } }, + /* 55 */ + { { 0x2169422c1d28444aL,0xe996c5d8be136a39L,0x2387afe5fb0c7fceL, + 0xb8af73cb0c8d744aL }, + { 0x5fde83aa338b86fdL,0xfee3f158a58a5cffL,0xc9ee8f6f20ac9433L, + 0xa036395f7f3f0895L } }, + /* 56 */ + { { 0x8c73c6bba10f7770L,0xa6f16d81a12a0e24L,0x100df68251bc2b9fL, + 0x4be36b01875fb533L }, + { 0x9226086e9fb56dbbL,0x306fef8b07e7a4f8L,0xeeaccc0566d52f20L, + 0x8cbc9a871bdc00c0L } }, + /* 57 */ + { { 0xe131895cc0dac4abL,0xa874a440712ff112L,0x6332ae7c6a1cee57L, + 0x44e7553e0c0835f8L }, + { 0x6d503fff7734002dL,0x9d35cb8b0b34425cL,0x95f702760e8738b5L, + 0x470a683a5eb8fc18L } }, + /* 58 */ + { { 0x81b761dc90513482L,0x0287202a01e9276aL,0xcda441ee0ce73083L, + 0x16410690c63dc6efL }, + { 0xf5034a066d06a2edL,0xdd4d7745189b100bL,0xd914ae72ab8218c9L, + 0xd73479fd7abcbb4fL } }, + /* 59 */ + { { 0x7edefb165ad4c6e5L,0x262cf08f5b06d04dL,0x12ed5bb18575cb14L, + 0x816469e30771666bL }, + { 0xd7ab9d79561e291eL,0xeb9daf22c1de1661L,0xf49827eb135e0513L, + 0x0a36dd23f0dd3f9cL } }, + /* 60 */ + { { 0x098d32c741d5533cL,0x7c5f5a9e8684628fL,0x39a228ade349bd11L, + 0xe331dfd6fdbab118L }, + { 0x5100ab686bcc6ed8L,0x7160c3bdef7a260eL,0x9063d9a7bce850d7L, + 0xd3b4782a492e3389L } }, + /* 61 */ + { { 0xa149b6e8f3821f90L,0x92edd9ed66eb7aadL,0x0bb669531a013116L, + 0x7281275a4c86a5bdL }, + { 0x503858f7d3ff47e5L,0x5e1616bc61016441L,0x62b0f11a7dfd9bb1L, + 0x2c062e7ece145059L } }, + /* 62 */ + { { 0xa76f996f0159ac2eL,0x281e7736cbdb2713L,0x2ad6d28808e46047L, + 0x282a35f92c4e7ef1L }, + { 0x9c354b1ec0ce5cd2L,0xcf99efc91379c229L,0x992caf383e82c11eL, + 0xc71cd513554d2abdL } }, + /* 63 */ + { { 0x4885de9c09b578f4L,0x1884e258e3affa7aL,0x8f76b1b759182f1fL, + 0xc50f6740cf47f3a3L }, + { 0xa9c4adf3374b68eaL,0xa406f32369965fe2L,0x2f86a22285a53050L, + 0xb9ecb3a7212958dcL } }, + /* 64 */ + { { 0x56f8410ef4f8b16aL,0x97241afec47b266aL,0x0a406b8e6d9c87c1L, + 0x803f3e02cd42ab1bL }, + { 0x7f0309a804dbec69L,0xa83b85f73bbad05fL,0xc6097273ad8e197fL, + 0xc097440e5067adc1L } }, + /* 65 */ + { { 0x846a56f2c379ab34L,0xa8ee068b841df8d1L,0x20314459176c68efL, + 0xf1af32d5915f1f30L }, + { 0x99c375315d75bd50L,0x837cffbaf72f67bcL,0x0613a41848d7723fL, + 0x23d0f130e2d41c8bL } }, + /* 66 */ + { { 0x857ab6edf41500d9L,0x0d890ae5fcbeada8L,0x52fe864889725951L, + 0xb0288dd6c0a3faddL }, + { 0x85320f30650bcb08L,0x71af6313695d6e16L,0x31f520a7b989aa76L, + 0xffd3724ff408c8d2L } }, + /* 67 */ + { { 0x53968e64b458e6cbL,0x992dad20317a5d28L,0x3814ae0b7aa75f56L, + 0xf5590f4ad78c26dfL }, + { 0x0fc24bd3cf0ba55aL,0x0fc4724a0c778baeL,0x1ce9864f683b674aL, + 0x18d6da54f6f74a20L } }, + /* 68 */ + { { 0xed93e225d5be5a2bL,0x6fe799835934f3c6L,0x4314092622626ffcL, + 0x50bbb4d97990216aL }, + { 0x378191c6e57ec63eL,0x65422c40181dcdb2L,0x41a8099b0236e0f6L, + 0x2b10011801fe49c3L } }, + /* 69 */ + { { 0xfc68b5c59b391593L,0xc385f5a2598270fcL,0x7144f3aad19adcbbL, + 0xdd55899983fbae0cL }, + { 0x93b88b8e74b82ff4L,0xd2e03c4071e734c9L,0x9a7a9eaf43c0322aL, + 0xe6e4c551149d6041L } }, + /* 70 */ + { { 0x55f655bb1e9af288L,0x647e1a64f7ada931L,0x43697e4bcb2820e5L, + 0x51e00db107ed56ffL }, + { 0x43d169b8771c327eL,0x29cdb20b4a96c2adL,0xc07d51f53deb4779L, + 0xe22f424149829177L } }, + /* 71 */ + { { 0xcd45e8f4635f1abbL,0x7edc0cb568538874L,0xc9472c1fb5a8034dL, + 0xf709373d52dc48c9L }, + { 0x401966bba8af30d6L,0x95bf5f4af137b69cL,0x3966162a9361c47eL, + 0xbd52d288e7275b11L } }, + /* 72 */ + { { 0xab155c7a9c5fa877L,0x17dad6727d3a3d48L,0x43f43f9e73d189d8L, + 0xa0d0f8e4c8aa77a6L }, + { 0x0bbeafd8cc94f92dL,0xd818c8be0c4ddb3aL,0x22cc65f8b82eba14L, + 0xa56c78c7946d6a00L } }, + /* 73 */ + { { 0x2962391b0dd09529L,0x803e0ea63daddfcfL,0x2c77351f5b5bf481L, + 0xd8befdf8731a367aL }, + { 0xab919d42fc0157f4L,0xf51caed7fec8e650L,0xcdf9cb4002d48b0aL, + 0x854a68a5ce9f6478L } }, + /* 74 */ + { { 0xdc35f67b63506ea5L,0x9286c489a4fe0d66L,0x3f101d3bfe95cd4dL, + 0x5cacea0b98846a95L }, + { 0xa90df60c9ceac44dL,0x3db29af4354d1c3aL,0x08dd3de8ad5dbabeL, + 0xe4982d1235e4efa9L } }, + /* 75 */ + { { 0x23104a22c34cd55eL,0x58695bb32680d132L,0xfb345afa1fa1d943L, + 0x8046b7f616b20499L }, + { 0xb533581e38e7d098L,0xd7f61e8df46f0b70L,0x30dea9ea44cb78c4L, + 0xeb17ca7b9082af55L } }, + /* 76 */ + { { 0x1751b59876a145b9L,0xa5cf6b0fc1bc71ecL,0xd3e03565392715bbL, + 0x097b00bafab5e131L }, + { 0xaa66c8e9565f69e1L,0x77e8f75ab5be5199L,0x6033ba11da4fd984L, + 0xf95c747bafdbcc9eL } }, + /* 77 */ + { { 0x558f01d3bebae45eL,0xa8ebe9f0c4bc6955L,0xaeb705b1dbc64fc6L, + 0x3512601e566ed837L }, + { 0x9336f1e1fa1161cdL,0x328ab8d54c65ef87L,0x4757eee2724f21e5L, + 0x0ef971236068ab6bL } }, + /* 78 */ + { { 0x02598cf754ca4226L,0x5eede138f8642c8eL,0x48963f74468e1790L, + 0xfc16d9333b4fbc95L }, + { 0xbe96fb31e7c800caL,0x138063312678adaaL,0x3d6244976ff3e8b5L, + 0x14ca4af1b95d7a17L } }, + /* 79 */ + { { 0x7a4771babd2f81d5L,0x1a5f9d6901f7d196L,0xd898bef7cad9c907L, + 0x4057b063f59c231dL }, + { 0xbffd82fe89c05c0aL,0xe4911c6f1dc0df85L,0x3befccaea35a16dbL, + 0x1c3b5d64f1330b13L } }, + /* 80 */ + { { 0x5fe14bfe80ec21feL,0xf6ce116ac255be82L,0x98bc5a072f4a5d67L, + 0xfad27148db7e63afL }, + { 0x90c0b6ac29ab05b3L,0x37a9a83c4e251ae6L,0x0a7dc875c2aade7dL, + 0x77387de39f0e1a84L } }, + /* 81 */ + { { 0x1e9ecc49a56c0dd7L,0xa5cffcd846086c74L,0x8f7a1408f505aeceL, + 0xb37b85c0bef0c47eL }, + { 0x3596b6e4cc0e6a8fL,0xfd6d4bbf6b388f23L,0xaba453fac39cef4eL, + 0x9c135ac8f9f628d5L } }, + /* 82 */ + { { 0x32aa320284e35743L,0x320d6ab185a3cdefL,0xb821b1761df19819L, + 0x5721361fc433851fL }, + { 0x1f0db36a71fc9168L,0x5f98ba735e5c403cL,0xf64ca87e37bcd8f5L, + 0xdcbac3c9e6bb11bdL } }, + /* 83 */ + { { 0xf01d99684518cbe2L,0xd242fc189c9eb04eL,0x727663c7e47feebfL, + 0xb8c1c89e2d626862L }, + { 0x51a58bddc8e1d569L,0x563809c8b7d88cd0L,0x26c27fd9f11f31ebL, + 0x5d23bbda2f9422d4L } }, + /* 84 */ + { { 0x0a1c729495c8f8beL,0x2961c4803bf362bfL,0x9e418403df63d4acL, + 0xc109f9cb91ece900L }, + { 0xc2d095d058945705L,0xb9083d96ddeb85c0L,0x84692b8d7a40449bL, + 0x9bc3344f2eee1ee1L } }, + /* 85 */ + { { 0x0d5ae35642913074L,0x55491b2748a542b1L,0x469ca665b310732aL, + 0x29591d525f1a4cc1L }, + { 0xe76f5b6bb84f983fL,0xbe7eef419f5f84e1L,0x1200d49680baa189L, + 0x6376551f18ef332cL } }, + /* 86 */ + { { 0xbda5f14e562976ccL,0x22bca3e60ef12c38L,0xbbfa30646cca9852L, + 0xbdb79dc808e2987aL }, + { 0xfd2cb5c9cb06a772L,0x38f475aafe536dceL,0xc2a3e0227c2b5db8L, + 0x8ee86001add3c14aL } }, + /* 87 */ + { { 0xcbe96981a4ade873L,0x7ee9aa4dc4fba48cL,0x2cee28995a054ba5L, + 0x92e51d7a6f77aa4bL }, + { 0x948bafa87190a34dL,0xd698f75bf6bd1ed1L,0xd00ee6e30caf1144L, + 0x5182f86f0a56aaaaL } }, + /* 88 */ + { { 0xfba6212c7a4cc99cL,0xff609b683e6d9ca1L,0x5dbb27cb5ac98c5aL, + 0x91dcab5d4073a6f2L }, + { 0x01b6cc3d5f575a70L,0x0cb361396f8d87faL,0x165d4e8c89981736L, + 0x17a0cedb97974f2bL } }, + /* 89 */ + { { 0x38861e2a076c8d3aL,0x701aad39210f924bL,0x94d0eae413a835d9L, + 0x2e8ce36c7f4cdf41L }, + { 0x91273dab037a862bL,0x01ba9bb760e4c8faL,0xf964538833baf2ddL, + 0xf4ccc6cb34f668f3L } }, + /* 90 */ + { { 0x44ef525cf1f79687L,0x7c59549592efa815L,0xe1231741a5c78d29L, + 0xac0db4889a0df3c9L }, + { 0x86bfc711df01747fL,0x592b9358ef17df13L,0xe5880e4f5ccb6bb5L, + 0x95a64a6194c974a2L } }, + /* 91 */ + { { 0x72c1efdac15a4c93L,0x40269b7382585141L,0x6a8dfb1c16cb0badL, + 0x231e54ba29210677L }, + { 0xa70df9178ae6d2dcL,0x4d6aa63f39112918L,0xf627726b5e5b7223L, + 0xab0be032d8a731e1L } }, + /* 92 */ + { { 0x097ad0e98d131f2dL,0x637f09e33b04f101L,0x1ac86196d5e9a748L, + 0xf1bcc8802cf6a679L }, + { 0x25c69140e8daacb4L,0x3c4e405560f65009L,0x591cc8fc477937a6L, + 0x851694695aebb271L } }, + /* 93 */ + { { 0xde35c143f1dcf593L,0x78202b29b018be3bL,0xe9cdadc29bdd9d3dL, + 0x8f67d9d2daad55d8L }, + { 0x841116567481ea5fL,0xe7d2dde9e34c590cL,0xffdd43f405053fa8L, + 0xf84572b9c0728b5dL } }, + /* 94 */ + { { 0x5e1a7a7197af71c9L,0xa14494447a736565L,0xa1b4ae070e1d5063L, + 0xedee2710616b2c19L }, + { 0xb2f034f511734121L,0x1cac6e554a25e9f0L,0x8dc148f3a40c2ecfL, + 0x9fd27e9b44ebd7f4L } }, + /* 95 */ + { { 0x3cc7658af6e2cb16L,0xe3eb7d2cfe5919b6L,0x5a8c5816168d5583L, + 0xa40c2fb6958ff387L }, + { 0x8c9ec560fedcc158L,0x7ad804c655f23056L,0xd93967049a307e12L, + 0x99bc9bb87dc6decfL } }, + /* 96 */ + { { 0x84a9521d927dafc6L,0x52c1fb695c09cd19L,0x9d9581a0f9366ddeL, + 0x9abe210ba16d7e64L }, + { 0x480af84a48915220L,0xfa73176a4dd816c6L,0xc7d539871681ca5aL, + 0x7881c25787f344b0L } }, + /* 97 */ + { { 0x93399b51e0bcf3ffL,0x0d02cbc5127f74f6L,0x8fb465a2dd01d968L, + 0x15e6e319a30e8940L }, + { 0x646d6e0d3e0e05f4L,0xfad7bddc43588404L,0xbe61c7d1c4f850d3L, + 0x0e55facf191172ceL } }, + /* 98 */ + { { 0x7e9d9806f8787564L,0x1a33172131e85ce6L,0x6b0158cab819e8d6L, + 0xd73d09766fe96577L }, + { 0x424834251eb7206eL,0xa519290fc618bb42L,0x5dcbb8595e30a520L, + 0x9250a3748f15a50bL } }, + /* 99 */ + { { 0xcaff08f8be577410L,0xfd408a035077a8c6L,0xf1f63289ec0a63a4L, + 0x77414082c1cc8c0bL }, + { 0x05a40fa6eb0991cdL,0xc1ca086649fdc296L,0x3a68a3c7b324fd40L, + 0x8cb04f4d12eb20b9L } }, + /* 100 */ + { { 0xb1c2d0556906171cL,0x9073e9cdb0240c3fL,0xdb8e6b4fd8906841L, + 0xe4e429ef47123b51L }, + { 0x0b8dd53c38ec36f4L,0xf9d2dc01ff4b6a27L,0x5d066e07879a9a48L, + 0x37bca2ff3c6e6552L } }, + /* 101 */ + { { 0x4cd2e3c7df562470L,0x44f272a2c0964ac9L,0x7c6d5df980c793beL, + 0x59913edc3002b22aL }, + { 0x7a139a835750592aL,0x99e01d80e783de02L,0xcf8c0375ea05d64fL, + 0x43786e4ab013e226L } }, + /* 102 */ + { { 0xff32b0ed9e56b5a6L,0x0750d9a6d9fc68f9L,0xec15e845597846a7L, + 0x8638ca98b7e79e7aL }, + { 0x2f5ae0960afc24b2L,0x05398eaf4dace8f2L,0x3b765dd0aecba78fL, + 0x1ecdd36a7b3aa6f0L } }, + /* 103 */ + { { 0x5d3acd626c5ff2f3L,0xa2d516c02873a978L,0xad94c9fad2110d54L, + 0xd85d0f85d459f32dL }, + { 0x9f700b8d10b11da3L,0xd2c22c30a78318c4L,0x556988f49208decdL, + 0xa04f19c3b4ed3c62L } }, + /* 104 */ + { { 0x087924c8ed7f93bdL,0xcb64ac5d392f51f6L,0x7cae330a821b71afL, + 0x92b2eeea5c0950b0L }, + { 0x85ac4c9485b6e235L,0xab2ca4a92936c0f0L,0x80faa6b3e0508891L, + 0x1ee782215834276cL } }, + /* 105 */ + { { 0xa60a2e00e63e79f7L,0xf590e7b2f399d906L,0x9021054a6607c09dL, + 0xf3f2ced857a6e150L }, + { 0x200510f3f10d9b55L,0x9d2fcfacd8642648L,0xe5631aa7e8bd0e7cL, + 0x0f56a4543da3e210L } }, + /* 106 */ + { { 0x5b21bffa1043e0dfL,0x6c74b6cc9c007e6dL,0x1a656ec0d4a8517aL, + 0xbd8f17411969e263L }, + { 0x8a9bbb86beb7494aL,0x1567d46f45f3b838L,0xdf7a12a7a4e5a79aL, + 0x2d1a1c3530ccfa09L } }, + /* 107 */ + { { 0x192e3813506508daL,0x336180c4a1d795a7L,0xcddb59497a9944b3L, + 0xa107a65eb91fba46L }, + { 0xe6d1d1c50f94d639L,0x8b4af3758a58b7d7L,0x1a7c5584bd37ca1cL, + 0x183d760af87a9af2L } }, + /* 108 */ + { { 0x29d697110dde59a4L,0xf1ad8d070e8bef87L,0x229b49634f2ebe78L, + 0x1d44179dc269d754L }, + { 0xb32dc0cf8390d30eL,0x0a3b27530de8110cL,0x31af1dc52bc0339aL, + 0x771f9cc29606d262L } }, + /* 109 */ + { { 0x99993e7785040739L,0x44539db98026a939L,0xcf40f6f2f5f8fc26L, + 0x64427a310362718eL }, + { 0x4f4f2d8785428aa8L,0x7b7adc3febfb49a8L,0x201b2c6df23d01acL, + 0x49d9b7496ae90d6dL } }, + /* 110 */ + { { 0xcc78d8bc435d1099L,0x2adbcd4e8e8d1a08L,0x02c2e2a02cb68a41L, + 0x9037d81b3f605445L }, + { 0x7cdbac27074c7b61L,0xfe2031ab57bfd72eL,0x61ccec96596d5352L, + 0x08c3de6a7cc0639cL } }, + /* 111 */ + { { 0x20fdd020f6d552abL,0x56baff9805cd81f1L,0x06fb7c3e91351291L, + 0xc690944245796b2fL }, + { 0x17b3ae9c41231bd1L,0x1eac6e875cc58205L,0x208837abf9d6a122L, + 0x3fa3db02cafe3ac0L } }, + /* 112 */ + { { 0xd75a3e6505058880L,0x7da365ef643943f2L,0x4147861cfab24925L, + 0xc5c4bdb0fdb808ffL }, + { 0x73513e34b272b56bL,0xc8327e9511b9043aL,0xfd8ce37df8844969L, + 0x2d56db9446c2b6b5L } }, + /* 113 */ + { { 0x2461782fff46ac6bL,0xd19f792607a2e425L,0xfafea3c409a48de1L, + 0x0f56bd9de503ba42L }, + { 0x137d4ed1345cda49L,0x821158fc816f299dL,0xe7c6a54aaeb43402L, + 0x4003bb9d1173b5f1L } }, + /* 114 */ + { { 0x3b8e8189a0803387L,0xece115f539cbd404L,0x4297208dd2877f21L, + 0x53765522a07f2f9eL }, + { 0xa4980a21a8a4182dL,0xa2bbd07a3219df79L,0x674d0a2e1a19a2d4L, + 0x7a056f586c5d4549L } }, + /* 115 */ + { { 0x646b25589d8a2a47L,0x5b582948c3df2773L,0x51ec000eabf0d539L, + 0x77d482f17a1a2675L }, + { 0xb8a1bd9587853948L,0xa6f817bd6cfbffeeL,0xab6ec05780681e47L, + 0x4115012b2b38b0e4L } }, + /* 116 */ + { { 0x3c73f0f46de28cedL,0x1d5da7609b13ec47L,0x61b8ce9e6e5c6392L, + 0xcdf04572fbea0946L }, + { 0x1cb3c58b6c53c3b0L,0x97fe3c10447b843cL,0xfb2b8ae12cb9780eL, + 0xee703dda97383109L } }, + /* 117 */ + { { 0x34515140ff57e43aL,0xd44660d3b1b811b8L,0x2b3b5dff8f42b986L, + 0x2a0ad89da162ce21L }, + { 0x64e4a6946bc277baL,0xc788c954c141c276L,0x141aa64ccabf6274L, + 0xd62d0b67ac2b4659L } }, + /* 118 */ + { { 0x39c5d87b2c054ac4L,0x57005859f27df788L,0xedf7cbf3b18128d6L, + 0xb39a23f2991c2426L }, + { 0x95284a15f0b16ae5L,0x0c6a05b1a136f51bL,0x1d63c137f2700783L, + 0x04ed0092c0674cc5L } }, + /* 119 */ + { { 0x1f4185d19ae90393L,0x3047b4294a3d64e6L,0xae0001a69854fc14L, + 0xa0a91fc10177c387L }, + { 0xff0a3f01ae2c831eL,0xbb76ae822b727e16L,0x8f12c8a15a3075b4L, + 0x084cf9889ed20c41L } }, + /* 120 */ + { { 0xd98509defca6becfL,0x2fceae807dffb328L,0x5d8a15c44778e8b9L, + 0xd57955b273abf77eL }, + { 0x210da79e31b5d4f1L,0xaa52f04b3cfa7a1cL,0xd4d12089dc27c20bL, + 0x8e14ea4202d141f1L } }, + /* 121 */ + { { 0xeed50345f2897042L,0x8d05331f43402c4aL,0xc8d9c194c8bdfb21L, + 0x597e1a372aa4d158L }, + { 0x0327ec1acf0bd68cL,0x6d4be0dcab024945L,0x5b9c8d7ac9fe3e84L, + 0xca3f0236199b4deaL } }, + /* 122 */ + { { 0x592a10b56170bd20L,0x0ea897f16d3f5de7L,0xa3363ff144b2ade2L, + 0xbde7fd7e309c07e4L }, + { 0x516bb6d2b8f5432cL,0x210dc1cbe043444bL,0x3db01e6ff8f95b5aL, + 0xb623ad0e0a7dd198L } }, + /* 123 */ + { { 0xa75bd67560c7b65bL,0xab8c559023a4a289L,0xf8220fd0d7b26795L, + 0xd6aa2e4658ec137bL }, + { 0x10abc00b5138bb85L,0x8c31d121d833a95cL,0xb24ff00b1702a32eL, + 0x111662e02dcc513aL } }, + /* 124 */ + { { 0x78114015efb42b87L,0xbd9f5d701b6c4dffL,0x66ecccd7a7d7c129L, + 0xdb3ee1cb94b750f8L }, + { 0xb26f3db0f34837cfL,0xe7eed18bb9578d4fL,0x5d2cdf937c56657dL, + 0x886a644252206a59L } }, + /* 125 */ + { { 0x3c234cfb65b569eaL,0x20011141f72119c1L,0x8badc85da15a619eL, + 0xa70cf4eb018a17bcL }, + { 0x224f97ae8c4a6a65L,0x36e5cf270134378fL,0xbe3a609e4f7e0960L, + 0xaa4772abd1747b77L } }, + /* 126 */ + { { 0x676761317aa60cc0L,0xc79163610368115fL,0xded98bb4bbc1bb5aL, + 0x611a6ddc30faf974L }, + { 0x30e78cbcc15ee47aL,0x2e8962824e0d96a5L,0x36f35adf3dd9ed88L, + 0x5cfffaf816429c88L } }, + /* 127 */ + { { 0xc0d54cff9b7a99cdL,0x7bf3b99d843c45a1L,0x038a908f62c739e1L, + 0x6e5a6b237dc1994cL }, + { 0xef8b454e0ba5db77L,0xb7b8807facf60d63L,0xe591c0c676608378L, + 0x481a238d242dabccL } }, + /* 128 */ + { { 0xe3417bc035d0b34aL,0x440b386b8327c0a7L,0x8fb7262dac0362d1L, + 0x2c41114ce0cdf943L }, + { 0x2ba5cef1ad95a0b1L,0xc09b37a867d54362L,0x26d6cdd201e486c9L, + 0x20477abf42ff9297L } }, + /* 129 */ + { { 0x2f75173c18d65dbfL,0x77bf940e339edad8L,0x7022d26bdcf1001cL, + 0xac66409ac77396b6L }, + { 0x8b0bb36fc6261cc3L,0x213f7bc9190e7e90L,0x6541cebaa45e6c10L, + 0xce8e6975cc122f85L } }, + /* 130 */ + { { 0x0f121b41bc0a67d2L,0x62d4760a444d248aL,0x0e044f1d659b4737L, + 0x08fde365250bb4a8L }, + { 0xaceec3da848bf287L,0xc2a62182d3369d6eL,0x3582dfdc92449482L, + 0x2f7e2fd2565d6cd7L } }, + /* 131 */ + { { 0xae4b92dbc3770fa7L,0x095e8d5c379043f9L,0x54f34e9d17761171L, + 0xc65be92e907702aeL }, + { 0x2758a303f6fd0a40L,0xe7d822e3bcce784bL,0x7ae4f5854f9767bfL, + 0x4bff8e47d1193b3aL } }, + /* 132 */ + { { 0xcd41d21f00ff1480L,0x2ab8fb7d0754db16L,0xac81d2efbbe0f3eaL, + 0x3e4e4ae65772967dL }, + { 0x7e18f36d3c5303e6L,0x3bd9994b92262397L,0x9ed70e261324c3c0L, + 0x5388aefd58ec6028L } }, + /* 133 */ + { { 0xad1317eb5e5d7713L,0x09b985ee75de49daL,0x32f5bc4fc74fb261L, + 0x5cf908d14f75be0eL }, + { 0x760435108e657b12L,0xbfd421a5b96ed9e6L,0x0e29f51f8970ccc2L, + 0xa698ba4060f00ce2L } }, + /* 134 */ + { { 0x73db1686ef748fecL,0xe6e755a27e9d2cf9L,0x630b6544ce265effL, + 0xb142ef8a7aebad8dL }, + { 0xad31af9f17d5770aL,0x66af3b672cb3412fL,0x6bd60d1bdf3359deL, + 0xd1896a9658515075L } }, + /* 135 */ + { { 0xec5957ab33c41c08L,0x87de94ac5468e2e1L,0x18816b73ac472f6cL, + 0x267b0e0b7981da39L }, + { 0x6e554e5d8e62b988L,0xd8ddc755116d21e7L,0x4610faf03d2a6f99L, + 0xb54e287aa1119393L } }, + /* 136 */ + { { 0x0a0122b5178a876bL,0x51ff96ff085104b4L,0x050b31ab14f29f76L, + 0x84abb28b5f87d4e6L }, + { 0xd5ed439f8270790aL,0x2d6cb59d85e3f46bL,0x75f55c1b6c1e2212L, + 0xe5436f6717655640L } }, + /* 137 */ + { { 0x53f9025e2286e8d5L,0x353c95b4864453beL,0xd832f5bde408e3a0L, + 0x0404f68b5b9ce99eL }, + { 0xcad33bdea781e8e5L,0x3cdf5018163c2f5bL,0x575769600119caa3L, + 0x3a4263df0ac1c701L } }, + /* 138 */ + { { 0xc2965ecc9aeb596dL,0x01ea03e7023c92b4L,0x4704b4b62e013961L, + 0x0ca8fd3f905ea367L }, + { 0x92523a42551b2b61L,0x1eb7a89c390fcd06L,0xe7f1d2be0392a63eL, + 0x96dca2644ddb0c33L } }, + /* 139 */ + { { 0x203bb43a387510afL,0x846feaa8a9a36a01L,0xd23a57702f950378L, + 0x4363e2123aad59dcL }, + { 0xca43a1c740246a47L,0xb362b8d2e55dd24dL,0xf9b086045d8faf96L, + 0x840e115cd8bb98c4L } }, + /* 140 */ + { { 0xf12205e21023e8a7L,0xc808a8cdd8dc7a0bL,0xe292a272163a5ddfL, + 0x5e0d6abd30ded6d4L }, + { 0x07a721c27cfc0f64L,0x42eec01d0e55ed88L,0x26a7bef91d1f9db2L, + 0x7dea48f42945a25aL } }, + /* 141 */ + { { 0xabdf6f1ce5060a81L,0xe79f9c72f8f95615L,0xcfd36c5406ac268bL, + 0xabc2a2beebfd16d1L }, + { 0x8ac66f91d3e2eac7L,0x6f10ba63d2dd0466L,0x6790e3770282d31bL, + 0x4ea353946c7eefc1L } }, + /* 142 */ + { { 0xed8a2f8d5266309dL,0x0a51c6c081945a3eL,0xcecaf45a578c5dc1L, + 0x3a76e6891c94ffc3L }, + { 0x9aace8a47d7b0d0fL,0x963ace968f584a5fL,0x51a30c724e697fbeL, + 0x8212a10a465e6464L } }, + /* 143 */ + { { 0xef7c61c3cfab8caaL,0x18eb8e840e142390L,0xcd1dff677e9733caL, + 0xaa7cab71599cb164L }, + { 0x02fc9273bc837bd1L,0xc06407d0c36af5d7L,0x17621292f423da49L, + 0x40e38073fe0617c3L } }, + /* 144 */ + { { 0xf4f80824a7bf9b7cL,0x365d23203fbe30d0L,0xbfbe532097cf9ce3L, + 0xe3604700b3055526L }, + { 0x4dcb99116cc6c2c7L,0x72683708ba4cbee6L,0xdcded434637ad9ecL, + 0x6542d677a3dee15fL } }, + /* 145 */ + { { 0x3f32b6d07b6c377aL,0x6cb03847903448beL,0xd6fdd3a820da8af7L, + 0xa6534aee09bb6f21L }, + { 0x30a1780d1035facfL,0x35e55a339dcb47e6L,0x6ea50fe1c447f393L, + 0xf3cb672fdc9aef22L } }, + /* 146 */ + { { 0xeb3719fe3b55fd83L,0xe0d7a46c875ddd10L,0x33ac9fa905cea784L, + 0x7cafaa2eaae870e7L }, + { 0x9b814d041d53b338L,0xe0acc0a0ef87e6c6L,0xfb93d10811672b0fL, + 0x0aab13c1b9bd522eL } }, + /* 147 */ + { { 0xddcce278d2681297L,0xcb350eb1b509546aL,0x2dc431737661aaf2L, + 0x4b91a602847012e9L }, + { 0xdcff109572f8ddcfL,0x08ebf61e9a911af4L,0x48f4360ac372430eL, + 0x49534c5372321cabL } }, + /* 148 */ + { { 0x83df7d71f07b7e9dL,0xa478efa313cd516fL,0x78ef264b6c047ee3L, + 0xcaf46c4fd65ac5eeL }, + { 0xa04d0c7792aa8266L,0xedf45466913684bbL,0x56e65168ae4b16b0L, + 0x14ce9e5704c6770fL } }, + /* 149 */ + { { 0x99445e3e965e8f91L,0xd3aca1bacb0f2492L,0xd31cc70f90c8a0a0L, + 0x1bb708a53e4c9a71L }, + { 0xd5ca9e69558bdd7aL,0x734a0508018a26b1L,0xb093aa714c9cf1ecL, + 0xf9d126f2da300102L } }, + /* 150 */ + { { 0x749bca7aaff9563eL,0xdd077afeb49914a0L,0xe27a0311bf5f1671L, + 0x807afcb9729ecc69L }, + { 0x7f8a9337c9b08b77L,0x86c3a785443c7e38L,0x85fafa59476fd8baL, + 0x751adcd16568cd8cL } }, + /* 151 */ + { { 0x8aea38b410715c0dL,0xd113ea718f7697f7L,0x665eab1493fbf06dL, + 0x29ec44682537743fL }, + { 0x3d94719cb50bebbcL,0x399ee5bfe4505422L,0x90cd5b3a8d2dedb1L, + 0xff9370e392a4077dL } }, + /* 152 */ + { { 0x59a2d69bc6b75b65L,0x4188f8d5266651c5L,0x28a9f33e3de9d7d2L, + 0x9776478ba2a9d01aL }, + { 0x8852622d929af2c7L,0x334f5d6d4e690923L,0xce6cc7e5a89a51e9L, + 0x74a6313fac2f82faL } }, + /* 153 */ + { { 0xb2f4dfddb75f079cL,0x85b07c9518e36fbbL,0x1b6cfcf0e7cd36ddL, + 0xab75be150ff4863dL }, + { 0x81b367c0173fc9b7L,0xb90a7420d2594fd0L,0x15fdbf03c4091236L, + 0x4ebeac2e0b4459f6L } }, + /* 154 */ + { { 0xeb6c5fe75c9f2c53L,0xd25220118eae9411L,0xc8887633f95ac5d8L, + 0xdf99887b2c1baffcL }, + { 0xbb78eed2850aaecbL,0x9d49181b01d6a272L,0x978dd511b1cdbcacL, + 0x27b040a7779f4058L } }, + /* 155 */ + { { 0x90405db7f73b2eb2L,0xe0df85088e1b2118L,0x501b71525962327eL, + 0xb393dd37e4cfa3f5L }, + { 0xa1230e7b3fd75165L,0xd66344c2bcd33554L,0x6c36f1be0f7b5022L, + 0x09588c12d0463419L } }, + /* 156 */ + { { 0xe086093f02601c3bL,0xfb0252f8cf5c335fL,0x955cf280894aff28L, + 0x81c879a9db9f648bL }, + { 0x040e687cc6f56c51L,0xfed471693f17618cL,0x44f88a419059353bL, + 0xfa0d48f55fc11bc4L } }, + /* 157 */ + { { 0xbc6e1c9de1608e4dL,0x010dda113582822cL,0xf6b7ddc1157ec2d7L, + 0x8ea0e156b6a367d6L }, + { 0xa354e02f2383b3b4L,0x69966b943f01f53cL,0x4ff6632b2de03ca5L, + 0x3f5ab924fa00b5acL } }, + /* 158 */ + { { 0x337bb0d959739efbL,0xc751b0f4e7ebec0dL,0x2da52dd6411a67d1L, + 0x8bc768872b74256eL }, + { 0xa5be3b7282d3d253L,0xa9f679a1f58d779fL,0xa1cac168e16767bbL, + 0xb386f19060fcf34fL } }, + /* 159 */ + { { 0x31f3c1352fedcfc2L,0x5396bf6262f8af0dL,0x9a02b4eae57288c2L, + 0x4cb460f71b069c4dL }, + { 0xae67b4d35b8095eaL,0x92bbf8596fc07603L,0xe1475f66b614a165L, + 0x52c0d50895ef5223L } }, + /* 160 */ + { { 0x231c210e15339848L,0xe87a28e870778c8dL,0x9d1de6616956e170L, + 0x4ac3c9382bb09c0bL }, + { 0x19be05516998987dL,0x8b2376c4ae09f4d6L,0x1de0b7651a3f933dL, + 0x380d94c7e39705f4L } }, + /* 161 */ + { { 0x01a355aa81542e75L,0x96c724a1ee01b9b7L,0x6b3a2977624d7087L, + 0x2ce3e171de2637afL }, + { 0xcfefeb49f5d5bc1aL,0xa655607e2777e2b5L,0x4feaac2f9513756cL, + 0x2e6cd8520b624e4dL } }, + /* 162 */ + { { 0x3685954b8c31c31dL,0x68533d005bf21a0cL,0x0bd7626e75c79ec9L, + 0xca17754742c69d54L }, + { 0xcc6edafff6d2dbb2L,0xfd0d8cbd174a9d18L,0x875e8793aa4578e8L, + 0xa976a7139cab2ce6L } }, + /* 163 */ + { { 0x0a651f1b93fb353dL,0xd75cab8b57fcfa72L,0xaa88cfa731b15281L, + 0x8720a7170a1f4999L }, + { 0x8c3e8d37693e1b90L,0xd345dc0b16f6dfc3L,0x8ea8d00ab52a8742L, + 0x9719ef29c769893cL } }, + /* 164 */ + { { 0x820eed8d58e35909L,0x9366d8dc33ddc116L,0xd7f999d06e205026L, + 0xa5072976e15704c1L }, + { 0x002a37eac4e70b2eL,0x84dcf6576890aa8aL,0xcd71bf18645b2a5cL, + 0x99389c9df7b77725L } }, + /* 165 */ + { { 0x238c08f27ada7a4bL,0x3abe9d03fd389366L,0x6b672e89766f512cL, + 0xa88806aa202c82e4L }, + { 0x6602044ad380184eL,0xa8cb78c4126a8b85L,0x79d670c0ad844f17L, + 0x0043bffb4738dcfeL } }, + /* 166 */ + { { 0x8d59b5dc36d5192eL,0xacf885d34590b2afL,0x83566d0a11601781L, + 0x52f3ef01ba6c4866L }, + { 0x3986732a0edcb64dL,0x0a482c238068379fL,0x16cbe5fa7040f309L, + 0x3296bd899ef27e75L } }, + /* 167 */ + { { 0x476aba89454d81d7L,0x9eade7ef51eb9b3cL,0x619a21cd81c57986L, + 0x3b90febfaee571e9L }, + { 0x9393023e5496f7cbL,0x55be41d87fb51bc4L,0x03f1dd4899beb5ceL, + 0x6e88069d9f810b18L } }, + /* 168 */ + { { 0xce37ab11b43ea1dbL,0x0a7ff1a95259d292L,0x851b02218f84f186L, + 0xa7222beadefaad13L }, + { 0xa2ac78ec2b0a9144L,0x5a024051f2fa59c5L,0x91d1eca56147ce38L, + 0xbe94d523bc2ac690L } }, + /* 169 */ + { { 0x72f4945e0b226ce7L,0xb8afd747967e8b70L,0xedea46f185a6c63eL, + 0x7782defe9be8c766L }, + { 0x760d2aa43db38626L,0x460ae78776f67ad1L,0x341b86fc54499cdbL, + 0x03838567a2892e4bL } }, + /* 170 */ + { { 0x2d8daefd79ec1a0fL,0x3bbcd6fdceb39c97L,0xf5575ffc58f61a95L, + 0xdbd986c4adf7b420L }, + { 0x81aa881415f39eb7L,0x6ee2fcf5b98d976cL,0x5465475dcf2f717dL, + 0x8e24d3c46860bbd0L } }, + /* 171 */ + { { 0x749d8e549a587390L,0x12bb194f0cbec588L,0x46e07da4b25983c6L, + 0x541a99c4407bafc8L }, + { 0xdb241692624c8842L,0x6044c12ad86c05ffL,0xc59d14b44f7fcf62L, + 0xc0092c49f57d35d1L } }, + /* 172 */ + { { 0xd3cc75c3df2e61efL,0x7e8841c82e1b35caL,0xc62d30d1909f29f4L, + 0x75e406347286944dL }, + { 0xe7d41fc5bbc237d0L,0xc9537bf0ec4f01c9L,0x91c51a16282bd534L, + 0x5b7cb658c7848586L } }, + /* 173 */ + { { 0x964a70848a28ead1L,0x802dc508fd3b47f6L,0x9ae4bfd1767e5b39L, + 0x7ae13eba8df097a1L }, + { 0xfd216ef8eadd384eL,0x0361a2d9b6b2ff06L,0x204b98784bcdb5f3L, + 0x787d8074e2a8e3fdL } }, + /* 174 */ + { { 0xc5e25d6b757fbb1cL,0xe47bddb2ca201debL,0x4a55e9a36d2233ffL, + 0x5c2228199ef28484L }, + { 0x773d4a8588315250L,0x21b21a2b827097c1L,0xab7c4ea1def5d33fL, + 0xe45d37abbaf0f2b0L } }, + /* 175 */ + { { 0xd2df1e3428511c8aL,0xebb229c8bdca6cd3L,0x578a71a7627c39a7L, + 0xed7bc12284dfb9d3L }, + { 0xcf22a6df93dea561L,0x5443f18dd48f0ed1L,0xd8b861405bad23e8L, + 0xaac97cc945ca6d27L } }, + /* 176 */ + { { 0xeb54ea74a16bd00aL,0xd839e9adf5c0bcc1L,0x092bb7f11f9bfc06L, + 0x318f97b31163dc4eL }, + { 0xecc0c5bec30d7138L,0x44e8df23abc30220L,0x2bb7972fb0223606L, + 0xfa41faa19a84ff4dL } }, + /* 177 */ + { { 0x4402d974a6642269L,0xc81814ce9bb783bdL,0x398d38e47941e60bL, + 0x38bb6b2c1d26e9e2L }, + { 0xc64e4a256a577f87L,0x8b52d253dc11fe1cL,0xff336abf62280728L, + 0x94dd0905ce7601a5L } }, + /* 178 */ + { { 0x156cf7dcde93f92aL,0xa01333cb89b5f315L,0x02404df9c995e750L, + 0x92077867d25c2ae9L }, + { 0xe2471e010bf39d44L,0x5f2c902096bb53d7L,0x4c44b7b35c9c3d8fL, + 0x81e8428bd29beb51L } }, + /* 179 */ + { { 0x6dd9c2bac477199fL,0x8cb8eeee6b5ecdd9L,0x8af7db3fee40fd0eL, + 0x1b94ab62dbbfa4b1L }, + { 0x44f0d8b3ce47f143L,0x51e623fc63f46163L,0xf18f270fcc599383L, + 0x06a38e28055590eeL } }, + /* 180 */ + { { 0x2e5b0139b3355b49L,0x20e26560b4ebf99bL,0xc08ffa6bd269f3dcL, + 0xa7b36c2083d9d4f8L }, + { 0x64d15c3a1b3e8830L,0xd5fceae1a89f9c0bL,0xcfeee4a2e2d16930L, + 0xbe54c6b4a2822a20L } }, + /* 181 */ + { { 0xd6cdb3df8d91167cL,0x517c3f79e7a6625eL,0x7105648f346ac7f4L, + 0xbf30a5abeae022bbL }, + { 0x8e7785be93828a68L,0x5161c3327f3ef036L,0xe11b5feb592146b2L, + 0xd1c820de2732d13aL } }, + /* 182 */ + { { 0x043e13479038b363L,0x58c11f546b05e519L,0x4fe57abe6026cad1L, + 0xb7d17bed68a18da3L }, + { 0x44ca5891e29c2559L,0x4f7a03765bfffd84L,0x498de4af74e46948L, + 0x3997fd5e6412cc64L } }, + /* 183 */ + { { 0xf20746828bd61507L,0x29e132d534a64d2aL,0xffeddfb08a8a15e3L, + 0x0eeb89293c6c13e8L }, + { 0xe9b69a3ea7e259f8L,0xce1db7e6d13e7e67L,0x277318f6ad1fa685L, + 0x228916f8c922b6efL } }, + /* 184 */ + { { 0x959ae25b0a12ab5bL,0xcc11171f957bc136L,0x8058429ed16e2b0cL, + 0xec05ad1d6e93097eL }, + { 0x157ba5beac3f3708L,0x31baf93530b59d77L,0x47b55237118234e5L, + 0x7d3141567ff11b37L } }, + /* 185 */ + { { 0x7bd9c05cf6dfefabL,0xbe2f2268dcb37707L,0xe53ead973a38bb95L, + 0xe9ce66fc9bc1d7a3L }, + { 0x75aa15766f6a02a1L,0x38c087df60e600edL,0xf8947f3468cdc1b9L, + 0xd9650b0172280651L } }, + /* 186 */ + { { 0x504b4c4a5a057e60L,0xcbccc3be8def25e4L,0xa635320817c1ccbdL, + 0x14d6699a804eb7a2L }, + { 0x2c8a8415db1f411aL,0x09fbaf0bf80d769cL,0xb4deef901c2f77adL, + 0x6f4c68410d43598aL } }, + /* 187 */ + { { 0x8726df4e96c24a96L,0x534dbc85fcbd99a3L,0x3c466ef28b2ae30aL, + 0x4c4350fd61189abbL }, + { 0x2967f716f855b8daL,0x41a42394463c38a1L,0xc37e1413eae93343L, + 0xa726d2425a3118b5L } }, + /* 188 */ + { { 0xdae6b3ee948c1086L,0xf1de503dcbd3a2e1L,0x3f35ed3f03d022f3L, + 0x13639e82cc6cf392L }, + { 0x9ac938fbcdafaa86L,0xf45bc5fb2654a258L,0x1963b26e45051329L, + 0xca9365e1c1a335a3L } }, + /* 189 */ + { { 0x3615ac754c3b2d20L,0x742a5417904e241bL,0xb08521c4cc9d071dL, + 0x9ce29c34970b72a5L }, + { 0x8cc81f736d3e0ad6L,0x8060da9ef2f8434cL,0x35ed1d1a6ce862d9L, + 0x48c4abd7ab42af98L } }, + /* 190 */ + { { 0xd221b0cc40c7485aL,0xead455bbe5274dbfL,0x493c76989263d2e8L, + 0x78017c32f67b33cbL }, + { 0xb9d35769930cb5eeL,0xc0d14e940c408ed2L,0xf8b7bf55272f1a4dL, + 0x53cd0454de5c1c04L } }, + /* 191 */ + { { 0xbcd585fa5d28ccacL,0x5f823e56005b746eL,0x7c79f0a1cd0123aaL, + 0xeea465c1d3d7fa8fL }, + { 0x7810659f0551803bL,0x6c0b599f7ce6af70L,0x4195a77029288e70L, + 0x1b6e42a47ae69193L } }, + /* 192 */ + { { 0x2e80937cf67d04c3L,0x1e312be289eeb811L,0x56b5d88792594d60L, + 0x0224da14187fbd3dL }, + { 0x87abb8630c5fe36fL,0x580f3c604ef51f5fL,0x964fb1bfb3b429ecL, + 0x60838ef042bfff33L } }, + /* 193 */ + { { 0x432cb2f27e0bbe99L,0x7bda44f304aa39eeL,0x5f497c7a9fa93903L, + 0x636eb2022d331643L }, + { 0xfcfd0e6193ae00aaL,0x875a00fe31ae6d2fL,0xf43658a29f93901cL, + 0x8844eeb639218bacL } }, + /* 194 */ + { { 0x114171d26b3bae58L,0x7db3df7117e39f3eL,0xcd37bc7f81a8eadaL, + 0x27ba83dc51fb789eL }, + { 0xa7df439ffbf54de5L,0x7277030bb5fe1a71L,0x42ee8e35db297a48L, + 0xadb62d3487f3a4abL } }, + /* 195 */ + { { 0x9b1168a2a175df2aL,0x082aa04f618c32e9L,0xc9e4f2e7146b0916L, + 0xb990fd7675e7c8b2L }, + { 0x0829d96b4df37313L,0x1c205579d0b40789L,0x66c9ae4a78087711L, + 0x81707ef94d10d18dL } }, + /* 196 */ + { { 0x97d7cab203d6ff96L,0x5b851bfc0d843360L,0x268823c4d042db4bL, + 0x3792daead5a8aa5cL }, + { 0x52818865941afa0bL,0xf3e9e74142d83671L,0x17c825275be4e0a7L, + 0x5abd635e94b001baL } }, + /* 197 */ + { { 0x727fa84e0ac4927cL,0xe3886035a7c8cf23L,0xa4bcd5ea4adca0dfL, + 0x5995bf21846ab610L }, + { 0xe90f860b829dfa33L,0xcaafe2ae958fc18bL,0x9b3baf4478630366L, + 0x44c32ca2d483411eL } }, + /* 198 */ + { { 0xa74a97f1e40ed80cL,0x5f938cb131d2ca82L,0x53f2124b7c2d6ad9L, + 0x1f2162fb8082a54cL }, + { 0x7e467cc5720b173eL,0x40e8a666085f12f9L,0x8cebc20e4c9d65dcL, + 0x8f1d402bc3e907c9L } }, + /* 199 */ + { { 0x4f592f9cfbc4058aL,0xb15e14b6292f5670L,0xc55cfe37bc1d8c57L, + 0xb1980f43926edbf9L }, + { 0x98c33e0932c76b09L,0x1df5279d33b07f78L,0x6f08ead4863bb461L, + 0x2828ad9b37448e45L } }, + /* 200 */ + { { 0x696722c4c4cf4ac5L,0xf5ac1a3fdde64afbL,0x0551baa2e0890832L, + 0x4973f1275a14b390L }, + { 0xe59d8335322eac5dL,0x5e07eef50bd9b568L,0xab36720fa2588393L, + 0x6dac8ed0db168ac7L } }, + /* 201 */ + { { 0xf7b545aeeda835efL,0x4aa113d21d10ed51L,0x035a65e013741b09L, + 0x4b23ef5920b9de4cL }, + { 0xe82bb6803c4c7341L,0xd457706d3f58bc37L,0x73527863a51e3ee8L, + 0x4dd71534ddf49a4eL } }, + /* 202 */ + { { 0xbf94467295476cd9L,0x648d072fe31a725bL,0x1441c8b8fc4b67e0L, + 0xfd3170002f4a4dbbL }, + { 0x1cb43ff48995d0e1L,0x76e695d10ef729aaL,0xe0d5f97641798982L, + 0x14fac58c9569f365L } }, + /* 203 */ + { { 0xad9a0065f312ae18L,0x51958dc0fcc93fc9L,0xd9a142408a7d2846L, + 0xed7c765136abda50L }, + { 0x46270f1a25d4abbcL,0x9b5dd8f3f1a113eaL,0xc609b0755b51952fL, + 0xfefcb7f74d2e9f53L } }, + /* 204 */ + { { 0xbd09497aba119185L,0xd54e8c30aac45ba4L,0x492479deaa521179L, + 0x1801a57e87e0d80bL }, + { 0x073d3f8dfcafffb0L,0x6cf33c0bae255240L,0x781d763b5b5fdfbcL, + 0x9f8fc11e1ead1064L } }, + /* 205 */ + { { 0x1583a1715e69544cL,0x0eaf8567f04b7813L,0x1e22a8fd278a4c32L, + 0xa9d3809d3d3a69a9L }, + { 0x936c2c2c59a2da3bL,0x38ccbcf61895c847L,0x5e65244e63d50869L, + 0x3006b9aee1178ef7L } }, + /* 206 */ + { { 0x0bb1f2b0c9eead28L,0x7eef635d89f4dfbcL,0x074757fdb2ce8939L, + 0x0ab85fd745f8f761L }, + { 0xecda7c933e5b4549L,0x4be2bb5c97922f21L,0x261a1274b43b8040L, + 0xb122d67511e942c2L } }, + /* 207 */ + { { 0x3be607be66a5ae7aL,0x01e703fa76adcbe3L,0xaf9043014eb6e5c5L, + 0x9f599dc1097dbaecL }, + { 0x6d75b7180ff250edL,0x8eb91574349a20dcL,0x425605a410b227a3L, + 0x7d5528e08a294b78L } }, + /* 208 */ + { { 0xf0f58f6620c26defL,0x025585ea582b2d1eL,0xfbe7d79b01ce3881L, + 0x28ccea01303f1730L }, + { 0xd1dabcd179644ba5L,0x1fc643e806fff0b8L,0xa60a76fc66b3e17bL, + 0xc18baf48a1d013bfL } }, + /* 209 */ + { { 0x34e638c85dc4216dL,0x00c01067206142acL,0xd453a17195f5064aL, + 0x9def809db7a9596bL }, + { 0x41e8642e67ab8d2cL,0xb42404336237a2b6L,0x7d506a6d64c4218bL, + 0x0357f8b068808ce5L } }, + /* 210 */ + { { 0x8e9dbe644cd2cc88L,0xcc61c28df0b8f39dL,0x4a309874cd30a0c8L, + 0xe4a01add1b489887L }, + { 0x2ed1eeacf57cd8f9L,0x1b767d3ebd594c48L,0xa7295c717bd2f787L, + 0x466d7d79ce10cc30L } }, + /* 211 */ + { { 0x47d318929dada2c7L,0x4fa0a6c38f9aa27dL,0x90e4fd28820a59e1L, + 0xc672a522451ead1aL }, + { 0x30607cc85d86b655L,0xf0235d3bf9ad4af1L,0x99a08680571172a6L, + 0x5e3d64faf2a67513L } }, + /* 212 */ + { { 0xaa6410c79b3b4416L,0xcd8fcf85eab26d99L,0x5ebff74adb656a74L, + 0x6c8a7a95eb8e42fcL }, + { 0x10c60ba7b02a63bdL,0x6b2f23038b8f0047L,0x8c6c3738312d90b0L, + 0x348ae422ad82ca91L } }, + /* 213 */ + { { 0x7f4746635ccda2fbL,0x22accaa18e0726d2L,0x85adf782492b1f20L, + 0xc1074de0d9ef2d2eL }, + { 0xfcf3ce44ae9a65b3L,0xfd71e4ac05d7151bL,0xd4711f50ce6a9788L, + 0xfbadfbdbc9e54ffcL } }, + /* 214 */ + { { 0x1713f1cd20a99363L,0xb915658f6cf22775L,0x968175cd24d359b2L, + 0xb7f976b483716fcdL }, + { 0x5758e24d5d6dbf74L,0x8d23bafd71c3af36L,0x48f477600243dfe3L, + 0xf4d41b2ecafcc805L } }, + /* 215 */ + { { 0x51f1cf28fdabd48dL,0xce81be3632c078a4L,0x6ace2974117146e9L, + 0x180824eae0160f10L }, + { 0x0387698b66e58358L,0x63568752ce6ca358L,0x82380e345e41e6c5L, + 0x67e5f63983cf6d25L } }, + /* 216 */ + { { 0xf89ccb8dcf4899efL,0x949015f09ebb44c0L,0x546f9276b2598ec9L, + 0x9fef789a04c11fc6L }, + { 0x6d367ecf53d2a071L,0xb10e1a7fa4519b09L,0xca6b3fb0611e2eefL, + 0xbc80c181a99c4e20L } }, + /* 217 */ + { { 0x972536f8e5eb82e6L,0x1a484fc7f56cb920L,0xc78e217150b5da5eL, + 0x49270e629f8cdf10L }, + { 0x1a39b7bbea6b50adL,0x9a0284c1a2388ffcL,0x5403eb178107197bL, + 0xd2ee52f961372f7fL } }, + /* 218 */ + { { 0xd37cd28588e0362aL,0x442fa8a78fa5d94dL,0xaff836e5a434a526L, + 0xdfb478bee5abb733L }, + { 0xa91f1ce7673eede6L,0xa5390ad42b5b2f04L,0x5e66f7bf5530da2fL, + 0xd9a140b408df473aL } }, + /* 219 */ + { { 0x0e0221b56e8ea498L,0x623478293563ee09L,0xe06b8391335d2adeL, + 0x760c058d623f4b1aL }, + { 0x0b89b58cc198aa79L,0xf74890d2f07aba7fL,0x4e204110fde2556aL, + 0x7141982d8f190409L } }, + /* 220 */ + { { 0x6f0a0e334d4b0f45L,0xd9280b38392a94e1L,0x3af324c6b3c61d5eL, + 0x3af9d1ce89d54e47L }, + { 0xfd8f798120930371L,0xeda2664c21c17097L,0x0e9545dcdc42309bL, + 0xb1f815c373957dd6L } }, + /* 221 */ + { { 0x84faa78e89fec44aL,0xc8c2ae473caa4cafL,0x691c807dc1b6a624L, + 0xa41aed141543f052L }, + { 0x424353997d5ffe04L,0x8bacb2df625b6e20L,0x85d660be87817775L, + 0xd6e9c1dd86fb60efL } }, + /* 222 */ + { { 0x3aa2e97ec6853264L,0x771533b7e2304a0bL,0x1b912bb7b8eae9beL, + 0x9c9c6e10ae9bf8c2L }, + { 0xa2309a59e030b74cL,0x4ed7494d6a631e90L,0x89f44b23a49b79f2L, + 0x566bd59640fa61b6L } }, + /* 223 */ + { { 0x066c0118c18061f3L,0x190b25d37c83fc70L,0xf05fc8e027273245L, + 0xcf2c7390f525345eL }, + { 0xa09bceb410eb30cfL,0xcfd2ebba0d77703aL,0xe842c43a150ff255L, + 0x02f517558aa20979L } }, + /* 224 */ + { { 0x396ef794addb7d07L,0x0b4fc74224455500L,0xfaff8eacc78aa3ceL, + 0x14e9ada5e8d4d97dL }, + { 0xdaa480a12f7079e2L,0x45baa3cde4b0800eL,0x01765e2d7838157dL, + 0xa0ad4fab8e9d9ae8L } }, + /* 225 */ + { { 0x0bfb76214a653618L,0x1872813c31eaaa5fL,0x1553e73744949d5eL, + 0xbcd530b86e56ed1eL }, + { 0x169be85332e9c47bL,0xdc2776feb50059abL,0xcdba9761192bfbb4L, + 0x909283cf6979341dL } }, + /* 226 */ + { { 0x67b0032476e81a13L,0x9bee1a9962171239L,0x08ed361bd32e19d6L, + 0x35eeb7c9ace1549aL }, + { 0x1280ae5a7e4e5bdcL,0x2dcd2cd3b6ceec6eL,0x52e4224c6e266bc1L, + 0x9a8b2cf4448ae864L } }, + /* 227 */ + { { 0xf6471bf209d03b59L,0xc90e62a3b65af2abL,0xff7ff168ebd5eec9L, + 0x6bdb60f4d4491379L }, + { 0xdadafebc8a55bc30L,0xc79ead1610097fe0L,0x42e197414c1e3bddL, + 0x01ec3cfd94ba08a9L } }, + /* 228 */ + { { 0xba6277ebdc9485c2L,0x48cc9a7922fb10c7L,0x4f61d60f70a28d8aL, + 0xd1acb1c0475464f6L }, + { 0xd26902b126f36612L,0x59c3a44ee0618d8bL,0x4df8a813308357eeL, + 0x7dcd079d405626c2L } }, + /* 229 */ + { { 0x5ce7d4d3f05a4b48L,0xadcd295237230772L,0xd18f7971812a915aL, + 0x0bf53589377d19b8L }, + { 0x35ecd95a6c68ea73L,0xc7f3bbca823a584dL,0x9fb674c6f473a723L, + 0xd28be4d9e16686fcL } }, + /* 230 */ + { { 0x5d2b990638fa8e4bL,0x559f186e893fd8fcL,0x3a6de2aa436fb6fcL, + 0xd76007aa510f88ceL }, + { 0x2d10aab6523a4988L,0xb455cf4474dd0273L,0x7f467082a3407278L, + 0xf2b52f68b303bb01L } }, + /* 231 */ + { { 0x0d57eafa9835b4caL,0x2d2232fcbb669cbcL,0x8eeeb680c6643198L, + 0xd8dbe98ecc5aed3aL }, + { 0xcba9be3fc5a02709L,0x30be68e5f5ba1fa8L,0xfebd43cdf10ea852L, + 0xe01593a3ee559705L } }, + /* 232 */ + { { 0xd3e5af50ea75a0a6L,0x512226ac57858033L,0x6fe6d50fd0176406L, + 0xafec07b1aeb8ef06L }, + { 0x7fb9956780bb0a31L,0x6f1af3cc37309aaeL,0x9153a15a01abf389L, + 0xa71b93546e2dbfddL } }, + /* 233 */ + { { 0xbf8e12e018f593d2L,0xd1a90428a078122bL,0x150505db0ba4f2adL, + 0x53a2005c628523d9L }, + { 0x07c8b639e7f2b935L,0x2bff975ac182961aL,0x86bceea77518ca2cL, + 0xbf47d19b3d588e3dL } }, + /* 234 */ + { { 0x672967a7dd7665d5L,0x4e3030572f2f4de5L,0x144005ae80d4903fL, + 0x001c2c7f39c9a1b6L }, + { 0x143a801469efc6d6L,0xc810bdaa7bc7a724L,0x5f65670ba78150a4L, + 0xfdadf8e786ffb99bL } }, + /* 235 */ + { { 0xfd38cb88ffc00785L,0x77fa75913b48eb67L,0x0454d055bf368fbcL, + 0x3a838e4d5aa43c94L }, + { 0x561663293e97bb9aL,0x9eb93363441d94d9L,0x515591a60adb2a83L, + 0x3cdb8257873e1da3L } }, + /* 236 */ + { { 0x137140a97de77eabL,0xf7e1c50d41648109L,0x762dcad2ceb1d0dfL, + 0x5a60cc89f1f57fbaL }, + { 0x80b3638240d45673L,0x1b82be195913c655L,0x057284b8dd64b741L, + 0x922ff56fdbfd8fc0L } }, + /* 237 */ + { { 0x1b265deec9a129a1L,0xa5b1ce57cc284e04L,0x04380c46cebfbe3cL, + 0x72919a7df6c5cd62L }, + { 0x298f453a8fb90f9aL,0xd719c00b88e4031bL,0xe32c0e77796f1856L, + 0x5e7917803624089aL } }, + /* 238 */ + { { 0x5c16ec557f63cdfbL,0x8e6a3571f1cae4fdL,0xfce26bea560597caL, + 0x4e0a5371e24c2fabL }, + { 0x276a40d3a5765357L,0x3c89af440d73a2b4L,0xb8f370ae41d11a32L, + 0xf5ff7818d56604eeL } }, + /* 239 */ + { { 0xfbf3e3fe1a09df21L,0x26d5d28ee66e8e47L,0x2096bd0a29c89015L, + 0xe41df0e9533f5e64L }, + { 0x305fda40b3ba9e3fL,0xf2340ceb2604d895L,0x0866e1927f0367c7L, + 0x8edd7d6eac4f155fL } }, + /* 240 */ + { { 0xc9a1dc0e0bfc8ff3L,0x14efd82be936f42fL,0x67016f7ccca381efL, + 0x1432c1caed8aee96L }, + { 0xec68482970b23c26L,0xa64fe8730735b273L,0xe389f6e5eaef0f5aL, + 0xcaef480b5ac8d2c6L } }, + /* 241 */ + { { 0x5245c97875315922L,0xd82951713063cca5L,0xf3ce60d0b64ef2cbL, + 0xd0ba177e8efae236L }, + { 0x53a9ae8fb1b3af60L,0x1a796ae53d2da20eL,0x01d63605df9eef28L, + 0xf31c957c1c54ae16L } }, + /* 242 */ + { { 0xc0f58d5249cc4597L,0xdc5015b0bae0a028L,0xefc5fc55734a814aL, + 0x013404cb96e17c3aL }, + { 0xb29e2585c9a824bfL,0xd593185e001eaed7L,0x8d6ee68261ef68acL, + 0x6f377c4b91933e6cL } }, + /* 243 */ + { { 0x9f93bad1a8333fd2L,0xa89302025a2a95b8L,0x211e5037eaf75aceL, + 0x6dba3e4ed2d09506L }, + { 0xa48ef98cd04399cdL,0x1811c66ee6b73adeL,0x72f60752c17ecaf3L, + 0xf13cf3423becf4a7L } }, + /* 244 */ + { { 0xceeb9ec0a919e2ebL,0x83a9a195f62c0f68L,0xcfba3bb67aba2299L, + 0xc83fa9a9274bbad3L }, + { 0x0d7d1b0b62fa1ce0L,0xe58b60f53418efbfL,0xbfa8ef9e52706f04L, + 0xb49d70f45d702683L } }, + /* 245 */ + { { 0x914c7510fad5513bL,0x05f32eecb1751e2dL,0x6d850418d9fb9d59L, + 0x59cfadbb0c30f1cfL }, + { 0xe167ac2355cb7fd6L,0x249367b8820426a3L,0xeaeec58c90a78864L, + 0x5babf362354a4b67L } }, + /* 246 */ + { { 0x37c981d1ee424865L,0x8b002878f2e5577fL,0x702970f1b9e0c058L, + 0x6188c6a79026c8f0L }, + { 0x06f9a19bd0f244daL,0x1ecced5cfb080873L,0x35470f9b9f213637L, + 0x993fe475df50b9d9L } }, + /* 247 */ + { { 0x68e31cdf9b2c3609L,0x84eb19c02c46d4eaL,0x7ac9ec1a9a775101L, + 0x81f764664c80616bL }, + { 0x1d7c2a5a75fbe978L,0x6743fed3f183b356L,0x838d1f04501dd2bfL, + 0x564a812a5fe9060dL } }, + /* 248 */ + { { 0x7a5a64f4fa817d1dL,0x55f96844bea82e0fL,0xb5ff5a0fcd57f9aaL, + 0x226bf3cf00e51d6cL }, + { 0xd6d1a9f92f2833cfL,0x20a0a35a4f4f89a8L,0x11536c498f3f7f77L, + 0x68779f47ff257836L } }, + /* 249 */ + { { 0x79b0c1c173043d08L,0xa54467741fc020faL,0xd3767e289a6d26d0L, + 0x97bcb0d1eb092e0bL }, + { 0x2ab6eaa8f32ed3c3L,0xc8a4f151b281bc48L,0x4d1bf4f3bfa178f3L, + 0xa872ffe80a784655L } }, + /* 250 */ + { { 0xb1ab7935a32b2086L,0xe1eb710e8160f486L,0x9bd0cd913b6ae6beL, + 0x02812bfcb732a36aL }, + { 0xa63fd7cacf605318L,0x646e5d50fdfd6d1dL,0xa1d683982102d619L, + 0x07391cc9fe5396afL } }, + /* 251 */ + { { 0xc50157f08b80d02bL,0x6b8333d162877f7fL,0x7aca1af878d542aeL, + 0x355d2adc7e6d2a08L }, + { 0xb41f335a287386e1L,0xfd272a94f8e43275L,0x286ca2cde79989eaL, + 0x3dc2b1e37c2a3a79L } }, + /* 252 */ + { { 0xd689d21c04581352L,0x0a00c825376782beL,0x203bd5909fed701fL, + 0xc47869103ccd846bL }, + { 0x5dba770824c768edL,0x72feea026841f657L,0x73313ed56accce0eL, + 0xccc42968d5bb4d32L } }, + /* 253 */ + { { 0x94e50de13d7620b9L,0xd89a5c8a5992a56aL,0xdc007640675487c9L, + 0xe147eb42aa4871cfL }, + { 0x274ab4eeacf3ae46L,0xfd4936fb50350fbeL,0xdf2afe4748c840eaL, + 0x239ac047080e96e3L } }, + /* 254 */ + { { 0x481d1f352bfee8d4L,0xce80b5cffa7b0fecL,0x105c4c9e2ce9af3cL, + 0xc55fa1a3f5f7e59dL }, + { 0x3186f14e8257c227L,0xc5b1653f342be00bL,0x09afc998aa904fb2L, + 0x094cd99cd4f4b699L } }, + /* 255 */ + { { 0x8a981c84d703bebaL,0x8631d15032ceb291L,0xa445f2c9e3bd49ecL, + 0xb90a30b642abad33L }, + { 0xb465404fb4a5abf9L,0x004750c375db7603L,0x6f9a42ccca35d89fL, + 0x019f8b9a1b7924f7L } }, +}; + +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_base_4(sp_point_256* r, const sp_digit* k, + int map, void* heap) +{ + return sp_256_ecc_mulmod_stripe_4(r, &p256_base, p256_table, + k, map, heap); +} + +#ifdef HAVE_INTEL_AVX2 +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_base_avx2_4(sp_point_256* r, const sp_digit* k, + int map, void* heap) +{ + return sp_256_ecc_mulmod_stripe_avx2_4(r, &p256_base, p256_table, + k, map, heap); +} + +#endif /* HAVE_INTEL_AVX2 */ +#else /* WOLFSSL_SP_SMALL */ +/* The index into pre-computation table to use. */ +static const uint8_t recode_index_4_7[130] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, + 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, + 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, + 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, + 0, 1, +}; + +/* Whether to negate y-ordinate. */ +static const uint8_t recode_neg_4_7[130] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, +}; + +/* Recode the scalar for multiplication using pre-computed values and + * subtraction. + * + * k Scalar to multiply by. + * v Vector of operations to perform. + */ +static void sp_256_ecc_recode_7_4(const sp_digit* k, ecc_recode_256* v) +{ + int i, j; + uint8_t y; + int carry = 0; + int o; + sp_digit n; + + j = 0; + n = k[j]; + o = 0; + for (i=0; i<37; i++) { + y = n; + if (o + 7 < 64) { + y &= 0x7f; + n >>= 7; + o += 7; + } + else if (o + 7 == 64) { + n >>= 7; + if (++j < 4) + n = k[j]; + o = 0; + } + else if (++j < 4) { + n = k[j]; + y |= (n << (64 - o)) & 0x7f; + o -= 57; + n >>= o; + } + + y += carry; + v[i].i = recode_index_4_7[y]; + v[i].neg = recode_neg_4_7[y]; + carry = (y >> 7) + v[i].neg; + } +} + +static const sp_table_entry_256 p256_table[2405] = { + /* 0 << 0 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 0 */ + { { 0x79e730d418a9143cL,0x75ba95fc5fedb601L,0x79fb732b77622510L, + 0x18905f76a53755c6L }, + { 0xddf25357ce95560aL,0x8b4ab8e4ba19e45cL,0xd2e88688dd21f325L, + 0x8571ff1825885d85L } }, + /* 2 << 0 */ + { { 0x850046d410ddd64dL,0xaa6ae3c1a433827dL,0x732205038d1490d9L, + 0xf6bb32e43dcf3a3bL }, + { 0x2f3648d361bee1a5L,0x152cd7cbeb236ff8L,0x19a8fb0e92042dbeL, + 0x78c577510a5b8a3bL } }, + /* 3 << 0 */ + { { 0xffac3f904eebc127L,0xb027f84a087d81fbL,0x66ad77dd87cbbc98L, + 0x26936a3fb6ff747eL }, + { 0xb04c5c1fc983a7ebL,0x583e47ad0861fe1aL,0x788208311a2ee98eL, + 0xd5f06a29e587cc07L } }, + /* 4 << 0 */ + { { 0x74b0b50d46918dccL,0x4650a6edc623c173L,0x0cdaacace8100af2L, + 0x577362f541b0176bL }, + { 0x2d96f24ce4cbaba6L,0x17628471fad6f447L,0x6b6c36dee5ddd22eL, + 0x84b14c394c5ab863L } }, + /* 5 << 0 */ + { { 0xbe1b8aaec45c61f5L,0x90ec649a94b9537dL,0x941cb5aad076c20cL, + 0xc9079605890523c8L }, + { 0xeb309b4ae7ba4f10L,0x73c568efe5eb882bL,0x3540a9877e7a1f68L, + 0x73a076bb2dd1e916L } }, + /* 6 << 0 */ + { { 0x403947373e77664aL,0x55ae744f346cee3eL,0xd50a961a5b17a3adL, + 0x13074b5954213673L }, + { 0x93d36220d377e44bL,0x299c2b53adff14b5L,0xf424d44cef639f11L, + 0xa4c9916d4a07f75fL } }, + /* 7 << 0 */ + { { 0x0746354ea0173b4fL,0x2bd20213d23c00f7L,0xf43eaab50c23bb08L, + 0x13ba5119c3123e03L }, + { 0x2847d0303f5b9d4dL,0x6742f2f25da67bddL,0xef933bdc77c94195L, + 0xeaedd9156e240867L } }, + /* 8 << 0 */ + { { 0x27f14cd19499a78fL,0x462ab5c56f9b3455L,0x8f90f02af02cfc6bL, + 0xb763891eb265230dL }, + { 0xf59da3a9532d4977L,0x21e3327dcf9eba15L,0x123c7b84be60bbf0L, + 0x56ec12f27706df76L } }, + /* 9 << 0 */ + { { 0x75c96e8f264e20e8L,0xabe6bfed59a7a841L,0x2cc09c0444c8eb00L, + 0xe05b3080f0c4e16bL }, + { 0x1eb7777aa45f3314L,0x56af7bedce5d45e3L,0x2b6e019a88b12f1aL, + 0x086659cdfd835f9bL } }, + /* 10 << 0 */ + { { 0x2c18dbd19dc21ec8L,0x98f9868a0fcf8139L,0x737d2cd648250b49L, + 0xcc61c94724b3428fL }, + { 0x0c2b407880dd9e76L,0xc43a8991383fbe08L,0x5f7d2d65779be5d2L, + 0x78719a54eb3b4ab5L } }, + /* 11 << 0 */ + { { 0xea7d260a6245e404L,0x9de407956e7fdfe0L,0x1ff3a4158dac1ab5L, + 0x3e7090f1649c9073L }, + { 0x1a7685612b944e88L,0x250f939ee57f61c8L,0x0c0daa891ead643dL, + 0x68930023e125b88eL } }, + /* 12 << 0 */ + { { 0x04b71aa7d2697768L,0xabdedef5ca345a33L,0x2409d29dee37385eL, + 0x4ee1df77cb83e156L }, + { 0x0cac12d91cbb5b43L,0x170ed2f6ca895637L,0x28228cfa8ade6d66L, + 0x7ff57c9553238acaL } }, + /* 13 << 0 */ + { { 0xccc425634b2ed709L,0x0e356769856fd30dL,0xbcbcd43f559e9811L, + 0x738477ac5395b759L }, + { 0x35752b90c00ee17fL,0x68748390742ed2e3L,0x7cd06422bd1f5bc1L, + 0xfbc08769c9e7b797L } }, + /* 14 << 0 */ + { { 0xa242a35bb0cf664aL,0x126e48f77f9707e3L,0x1717bf54c6832660L, + 0xfaae7332fd12c72eL }, + { 0x27b52db7995d586bL,0xbe29569e832237c2L,0xe8e4193e2a65e7dbL, + 0x152706dc2eaa1bbbL } }, + /* 15 << 0 */ + { { 0x72bcd8b7bc60055bL,0x03cc23ee56e27e4bL,0xee337424e4819370L, + 0xe2aa0e430ad3da09L }, + { 0x40b8524f6383c45dL,0xd766355442a41b25L,0x64efa6de778a4797L, + 0x2042170a7079adf4L } }, + /* 16 << 0 */ + { { 0x808b0b650bc6fb80L,0x5882e0753ffe2e6bL,0xd5ef2f7c2c83f549L, + 0x54d63c809103b723L }, + { 0xf2f11bd652a23f9bL,0x3670c3194b0b6587L,0x55c4623bb1580e9eL, + 0x64edf7b201efe220L } }, + /* 17 << 0 */ + { { 0x97091dcbd53c5c9dL,0xf17624b6ac0a177bL,0xb0f139752cfe2dffL, + 0xc1a35c0a6c7a574eL }, + { 0x227d314693e79987L,0x0575bf30e89cb80eL,0x2f4e247f0d1883bbL, + 0xebd512263274c3d0L } }, + /* 18 << 0 */ + { { 0x5f3e51c856ada97aL,0x4afc964d8f8b403eL,0xa6f247ab412e2979L, + 0x675abd1b6f80ebdaL }, + { 0x66a2bd725e485a1dL,0x4b2a5caf8f4f0b3cL,0x2626927f1b847bbaL, + 0x6c6fc7d90502394dL } }, + /* 19 << 0 */ + { { 0xfea912baa5659ae8L,0x68363aba25e1a16eL,0xb8842277752c41acL, + 0xfe545c282897c3fcL }, + { 0x2d36e9e7dc4c696bL,0x5806244afba977c5L,0x85665e9be39508c1L, + 0xf720ee256d12597bL } }, + /* 20 << 0 */ + { { 0x8a979129d2337a31L,0x5916868f0f862bdcL,0x048099d95dd283baL, + 0xe2d1eeb6fe5bfb4eL }, + { 0x82ef1c417884005dL,0xa2d4ec17ffffcbaeL,0x9161c53f8aa95e66L, + 0x5ee104e1c5fee0d0L } }, + /* 21 << 0 */ + { { 0x562e4cecc135b208L,0x74e1b2654783f47dL,0x6d2a506c5a3f3b30L, + 0xecead9f4c16762fcL }, + { 0xf29dd4b2e286e5b9L,0x1b0fadc083bb3c61L,0x7a75023e7fac29a4L, + 0xc086d5f1c9477fa3L } }, + /* 22 << 0 */ + { { 0x0fc611352f6f3076L,0xc99ffa23e3912a9aL,0x6a0b0685d2f8ba3dL, + 0xfdc777e8e93358a4L }, + { 0x94a787bb35415f04L,0x640c2d6a4d23fea4L,0x9de917da153a35b5L, + 0x793e8d075d5cd074L } }, + /* 23 << 0 */ + { { 0xf4f876532de45068L,0x37c7a7e89e2e1f6eL,0xd0825fa2a3584069L, + 0xaf2cea7c1727bf42L }, + { 0x0360a4fb9e4785a9L,0xe5fda49c27299f4aL,0x48068e1371ac2f71L, + 0x83d0687b9077666fL } }, + /* 24 << 0 */ + { { 0x6d3883b215d02819L,0x6d0d755040dd9a35L,0x61d7cbf91d2b469fL, + 0xf97b232f2efc3115L }, + { 0xa551d750b24bcbc7L,0x11ea494988a1e356L,0x7669f03193cb7501L, + 0x595dc55eca737b8aL } }, + /* 25 << 0 */ + { { 0xa4a319acd837879fL,0x6fc1b49eed6b67b0L,0xe395993332f1f3afL, + 0x966742eb65432a2eL }, + { 0x4b8dc9feb4966228L,0x96cc631243f43950L,0x12068859c9b731eeL, + 0x7b948dc356f79968L } }, + /* 26 << 0 */ + { { 0x61e4ad32ed1f8008L,0xe6c9267ad8b17538L,0x1ac7c5eb857ff6fbL, + 0x994baaa855f2fb10L }, + { 0x84cf14e11d248018L,0x5a39898b628ac508L,0x14fde97b5fa944f5L, + 0xed178030d12e5ac7L } }, + /* 27 << 0 */ + { { 0x042c2af497e2feb4L,0xd36a42d7aebf7313L,0x49d2c9eb084ffdd7L, + 0x9f8aa54b2ef7c76aL }, + { 0x9200b7ba09895e70L,0x3bd0c66fddb7fb58L,0x2d97d10878eb4cbbL, + 0x2d431068d84bde31L } }, + /* 28 << 0 */ + { { 0x4b523eb7172ccd1fL,0x7323cb2830a6a892L,0x97082ec0cfe153ebL, + 0xe97f6b6af2aadb97L }, + { 0x1d3d393ed1a83da1L,0xa6a7f9c7804b2a68L,0x4a688b482d0cb71eL, + 0xa9b4cc5f40585278L } }, + /* 29 << 0 */ + { { 0x5e5db46acb66e132L,0xf1be963a0d925880L,0x944a70270317b9e2L, + 0xe266f95948603d48L }, + { 0x98db66735c208899L,0x90472447a2fb18a3L,0x8a966939777c619fL, + 0x3798142a2a3be21bL } }, + /* 30 << 0 */ + { { 0xb4241cb13298b343L,0xa3a14e49b44f65a1L,0xc5f4d6cd3ac77acdL, + 0xd0288cb552b6fc3cL }, + { 0xd5cc8c2f1c040abcL,0xb675511e06bf9b4aL,0xd667da379b3aa441L, + 0x460d45ce51601f72L } }, + /* 31 << 0 */ + { { 0xe2f73c696755ff89L,0xdd3cf7e7473017e6L,0x8ef5689d3cf7600dL, + 0x948dc4f8b1fc87b4L }, + { 0xd9e9fe814ea53299L,0x2d921ca298eb6028L,0xfaecedfd0c9803fcL, + 0xf38ae8914d7b4745L } }, + /* 32 << 0 */ + { { 0xd8c5fccfc5e3a3d8L,0xbefd904c4079dfbfL,0xbc6d6a58fead0197L, + 0x39227077695532a4L }, + { 0x09e23e6ddbef42f5L,0x7e449b64480a9908L,0x7b969c1aad9a2e40L, + 0x6231d7929591c2a4L } }, + /* 33 << 0 */ + { { 0x871514560f664534L,0x85ceae7c4b68f103L,0xac09c4ae65578ab9L, + 0x33ec6868f044b10cL }, + { 0x6ac4832b3a8ec1f1L,0x5509d1285847d5efL,0xf909604f763f1574L, + 0xb16c4303c32f63c4L } }, + /* 34 << 0 */ + { { 0xb6ab20147ca23cd3L,0xcaa7a5c6a391849dL,0x5b0673a375678d94L, + 0xc982ddd4dd303e64L }, + { 0xfd7b000b5db6f971L,0xbba2cb1f6f876f92L,0xc77332a33c569426L, + 0xa159100c570d74f8L } }, + /* 35 << 0 */ + { { 0xfd16847fdec67ef5L,0x742ee464233e76b7L,0x0b8e4134efc2b4c8L, + 0xca640b8642a3e521L }, + { 0x653a01908ceb6aa9L,0x313c300c547852d5L,0x24e4ab126b237af7L, + 0x2ba901628bb47af8L } }, + /* 36 << 0 */ + { { 0x3d5e58d6a8219bb7L,0xc691d0bd1b06c57fL,0x0ae4cb10d257576eL, + 0x3569656cd54a3dc3L }, + { 0xe5ebaebd94cda03aL,0x934e82d3162bfe13L,0x450ac0bae251a0c6L, + 0x480b9e11dd6da526L } }, + /* 37 << 0 */ + { { 0x00467bc58cce08b5L,0xb636458c7f178d55L,0xc5748baea677d806L, + 0x2763a387dfa394ebL }, + { 0xa12b448a7d3cebb6L,0xe7adda3e6f20d850L,0xf63ebce51558462cL, + 0x58b36143620088a8L } }, + /* 38 << 0 */ + { { 0x8a2cc3ca4d63c0eeL,0x512331170fe948ceL,0x7463fd85222ef33bL, + 0xadf0c7dc7c603d6cL }, + { 0x0ec32d3bfe7765e5L,0xccaab359bf380409L,0xbdaa84d68e59319cL, + 0xd9a4c2809c80c34dL } }, + /* 39 << 0 */ + { { 0xa9d89488a059c142L,0x6f5ae714ff0b9346L,0x068f237d16fb3664L, + 0x5853e4c4363186acL }, + { 0xe2d87d2363c52f98L,0x2ec4a76681828876L,0x47b864fae14e7b1cL, + 0x0c0bc0e569192408L } }, + /* 40 << 0 */ + { { 0xe4d7681db82e9f3eL,0x83200f0bdf25e13cL,0x8909984c66f27280L, + 0x462d7b0075f73227L }, + { 0xd90ba188f2651798L,0x74c6e18c36ab1c34L,0xab256ea35ef54359L, + 0x03466612d1aa702fL } }, + /* 41 << 0 */ + { { 0x624d60492ed22e91L,0x6fdfe0b56f072822L,0xeeca111539ce2271L, + 0x98100a4fdb01614fL }, + { 0xb6b0daa2a35c628fL,0xb6f94d2ec87e9a47L,0xc67732591d57d9ceL, + 0xf70bfeec03884a7bL } }, + /* 42 << 0 */ + { { 0x5fb35ccfed2bad01L,0xa155cbe31da6a5c7L,0xc2e2594c30a92f8fL, + 0x649c89ce5bfafe43L }, + { 0xd158667de9ff257aL,0x9b359611f32c50aeL,0x4b00b20b906014cfL, + 0xf3a8cfe389bc7d3dL } }, + /* 43 << 0 */ + { { 0x4ff23ffd248a7d06L,0x80c5bfb4878873faL,0xb7d9ad9005745981L, + 0x179c85db3db01994L }, + { 0xba41b06261a6966cL,0x4d82d052eadce5a8L,0x9e91cd3ba5e6a318L, + 0x47795f4f95b2dda0L } }, + /* 44 << 0 */ + { { 0xecfd7c1fd55a897cL,0x009194abb29110fbL,0x5f0e2046e381d3b0L, + 0x5f3425f6a98dd291L }, + { 0xbfa06687730d50daL,0x0423446c4b083b7fL,0x397a247dd69d3417L, + 0xeb629f90387ba42aL } }, + /* 45 << 0 */ + { { 0x1ee426ccd5cd79bfL,0x0032940b946c6e18L,0x1b1e8ae057477f58L, + 0xe94f7d346d823278L }, + { 0xc747cb96782ba21aL,0xc5254469f72b33a5L,0x772ef6dec7f80c81L, + 0xd73acbfe2cd9e6b5L } }, + /* 46 << 0 */ + { { 0x4075b5b149ee90d9L,0x785c339aa06e9ebaL,0xa1030d5babf825e0L, + 0xcec684c3a42931dcL }, + { 0x42ab62c9c1586e63L,0x45431d665ab43f2bL,0x57c8b2c055f7835dL, + 0x033da338c1b7f865L } }, + /* 47 << 0 */ + { { 0x283c7513caa76097L,0x0a624fa936c83906L,0x6b20afec715af2c7L, + 0x4b969974eba78bfdL }, + { 0x220755ccd921d60eL,0x9b944e107baeca13L,0x04819d515ded93d4L, + 0x9bbff86e6dddfd27L } }, + /* 48 << 0 */ + { { 0x6b34413077adc612L,0xa7496529bbd803a0L,0x1a1baaa76d8805bdL, + 0xc8403902470343adL }, + { 0x39f59f66175adff1L,0x0b26d7fbb7d8c5b7L,0xa875f5ce529d75e3L, + 0x85efc7e941325cc2L } }, + /* 49 << 0 */ + { { 0x21950b421ff6acd3L,0xffe7048453dc6909L,0xff4cd0b228766127L, + 0xabdbe6084fb7db2bL }, + { 0x837c92285e1109e8L,0x26147d27f4645b5aL,0x4d78f592f7818ed8L, + 0xd394077ef247fa36L } }, + /* 50 << 0 */ + { { 0x0fb9c2d0488c171aL,0xa78bfbaa13685278L,0xedfbe268d5b1fa6aL, + 0x0dceb8db2b7eaba7L }, + { 0xbf9e80899ae2b710L,0xefde7ae6a4449c96L,0x43b7716bcc143a46L, + 0xd7d34194c3628c13L } }, + /* 51 << 0 */ + { { 0x508cec1c3b3f64c9L,0xe20bc0ba1e5edf3fL,0xda1deb852f4318d4L, + 0xd20ebe0d5c3fa443L }, + { 0x370b4ea773241ea3L,0x61f1511c5e1a5f65L,0x99a5e23d82681c62L, + 0xd731e383a2f54c2dL } }, + /* 52 << 0 */ + { { 0x2692f36e83445904L,0x2e0ec469af45f9c0L,0x905a3201c67528b7L, + 0x88f77f34d0e5e542L }, + { 0xf67a8d295864687cL,0x23b92eae22df3562L,0x5c27014b9bbec39eL, + 0x7ef2f2269c0f0f8dL } }, + /* 53 << 0 */ + { { 0x97359638546c4d8dL,0x5f9c3fc492f24679L,0x912e8beda8c8acd9L, + 0xec3a318d306634b0L }, + { 0x80167f41c31cb264L,0x3db82f6f522113f2L,0xb155bcd2dcafe197L, + 0xfba1da5943465283L } }, + /* 54 << 0 */ + { { 0xa0425b8eb212cf53L,0x4f2e512ef8557c5fL,0xc1286ff925c4d56cL, + 0xbb8a0feaee26c851L }, + { 0xc28f70d2e7d6107eL,0x7ee0c444e76265aaL,0x3df277a41d1936b1L, + 0x1a556e3fea9595ebL } }, + /* 55 << 0 */ + { { 0x258bbbf9e7305683L,0x31eea5bf07ef5be6L,0x0deb0e4a46c814c1L, + 0x5cee8449a7b730ddL }, + { 0xeab495c5a0182bdeL,0xee759f879e27a6b4L,0xc2cf6a6880e518caL, + 0x25e8013ff14cf3f4L } }, + /* 56 << 0 */ + { { 0x8fc441407e8d7a14L,0xbb1ff3ca9556f36aL,0x6a84438514600044L, + 0xba3f0c4a7451ae63L }, + { 0xdfcac25b1f9af32aL,0x01e0db86b1f2214bL,0x4e9a5bc2a4b596acL, + 0x83927681026c2c08L } }, + /* 57 << 0 */ + { { 0x3ec832e77acaca28L,0x1bfeea57c7385b29L,0x068212e3fd1eaf38L, + 0xc13298306acf8cccL }, + { 0xb909f2db2aac9e59L,0x5748060db661782aL,0xc5ab2632c79b7a01L, + 0xda44c6c600017626L } }, + /* 58 << 0 */ + { { 0xf26c00e8a7ea82f0L,0x99cac80de4299aafL,0xd66fe3b67ed78be1L, + 0x305f725f648d02cdL }, + { 0x33ed1bc4623fb21bL,0xfa70533e7a6319adL,0x17ab562dbe5ffb3eL, + 0x0637499456674741L } }, + /* 59 << 0 */ + { { 0x69d44ed65c46aa8eL,0x2100d5d3a8d063d1L,0xcb9727eaa2d17c36L, + 0x4c2bab1b8add53b7L }, + { 0xa084e90c15426704L,0x778afcd3a837ebeaL,0x6651f7017ce477f8L, + 0xa062499846fb7a8bL } }, + /* 60 << 0 */ + { { 0xdc1e6828ed8a6e19L,0x33fc23364189d9c7L,0x026f8fe2671c39bcL, + 0xd40c4ccdbc6f9915L }, + { 0xafa135bbf80e75caL,0x12c651a022adff2cL,0xc40a04bd4f51ad96L, + 0x04820109bbe4e832L } }, + /* 61 << 0 */ + { { 0x3667eb1a7f4c04ccL,0x59556621a9404f84L,0x71cdf6537eceb50aL, + 0x994a44a69b8335faL }, + { 0xd7faf819dbeb9b69L,0x473c5680eed4350dL,0xb6658466da44bba2L, + 0x0d1bc780872bdbf3L } }, + /* 62 << 0 */ + { { 0xe535f175a1962f91L,0x6ed7e061ed58f5a7L,0x177aa4c02089a233L, + 0x0dbcb03ae539b413L }, + { 0xe3dc424ebb32e38eL,0x6472e5ef6806701eL,0xdd47ff98814be9eeL, + 0x6b60cfff35ace009L } }, + /* 63 << 0 */ + { { 0xb8d3d9319ff91fe5L,0x039c4800f0518eedL,0x95c376329182cb26L, + 0x0763a43482fc568dL }, + { 0x707c04d5383e76baL,0xac98b930824e8197L,0x92bf7c8f91230de0L, + 0x90876a0140959b70L } }, + /* 64 << 0 */ + { { 0xdb6d96f305968b80L,0x380a0913089f73b9L,0x7da70b83c2c61e01L, + 0x95fb8394569b38c7L }, + { 0x9a3c651280edfe2fL,0x8f726bb98faeaf82L,0x8010a4a078424bf8L, + 0x296720440e844970L } }, + /* 0 << 7 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 7 */ + { { 0x63c5cb817a2ad62aL,0x7ef2b6b9ac62ff54L,0x3749bba4b3ad9db5L, + 0xad311f2c46d5a617L }, + { 0xb77a8087c2ff3b6dL,0xb46feaf3367834ffL,0xf8aa266d75d6b138L, + 0xfa38d320ec008188L } }, + /* 2 << 7 */ + { { 0x486d8ffa696946fcL,0x50fbc6d8b9cba56dL,0x7e3d423e90f35a15L, + 0x7c3da195c0dd962cL }, + { 0xe673fdb03cfd5d8bL,0x0704b7c2889dfca5L,0xf6ce581ff52305aaL, + 0x399d49eb914d5e53L } }, + /* 3 << 7 */ + { { 0x380a496d6ec293cdL,0x733dbda78e7051f5L,0x037e388db849140aL, + 0xee4b32b05946dbf6L }, + { 0xb1c4fda9cae368d1L,0x5001a7b0fdb0b2f3L,0x6df593742e3ac46eL, + 0x4af675f239b3e656L } }, + /* 4 << 7 */ + { { 0x44e3811039949296L,0x5b63827b361db1b5L,0x3e5323ed206eaff5L, + 0x942370d2c21f4290L }, + { 0xf2caaf2ee0d985a1L,0x192cc64b7239846dL,0x7c0b8f47ae6312f8L, + 0x7dc61f9196620108L } }, + /* 5 << 7 */ + { { 0xb830fb5bc2da7de9L,0xd0e643df0ff8d3beL,0x31ee77ba188a9641L, + 0x4e8aa3aabcf6d502L }, + { 0xf9fb65329a49110fL,0xd18317f62dd6b220L,0x7e3ced4152c3ea5aL, + 0x0d296a147d579c4aL } }, + /* 6 << 7 */ + { { 0x35d6a53eed4c3717L,0x9f8240cf3d0ed2a3L,0x8c0d4d05e5543aa5L, + 0x45d5bbfbdd33b4b4L }, + { 0xfa04cc73137fd28eL,0x862ac6efc73b3ffdL,0x403ff9f531f51ef2L, + 0x34d5e0fcbc73f5a2L } }, + /* 7 << 7 */ + { { 0xf252682008913f4fL,0xea20ed61eac93d95L,0x51ed38b46ca6b26cL, + 0x8662dcbcea4327b0L }, + { 0x6daf295c725d2aaaL,0xbad2752f8e52dcdaL,0x2210e7210b17daccL, + 0xa37f7912d51e8232L } }, + /* 8 << 7 */ + { { 0x4f7081e144cc3addL,0xd5ffa1d687be82cfL,0x89890b6c0edd6472L, + 0xada26e1a3ed17863L }, + { 0x276f271563483caaL,0xe6924cd92f6077fdL,0x05a7fe980a466e3cL, + 0xf1c794b0b1902d1fL } }, + /* 9 << 7 */ + { { 0xe521368882a8042cL,0xd931cfafcd278298L,0x069a0ae0f597a740L, + 0x0adbb3f3eb59107cL }, + { 0x983e951e5eaa8eb8L,0xe663a8b511b48e78L,0x1631cc0d8a03f2c5L, + 0x7577c11e11e271e2L } }, + /* 10 << 7 */ + { { 0x33b2385c08369a90L,0x2990c59b190eb4f8L,0x819a6145c68eac80L, + 0x7a786d622ec4a014L }, + { 0x33faadbe20ac3a8dL,0x31a217815aba2d30L,0x209d2742dba4f565L, + 0xdb2ce9e355aa0fbbL } }, + /* 11 << 7 */ + { { 0x8cef334b168984dfL,0xe81dce1733879638L,0xf6e6949c263720f0L, + 0x5c56feaff593cbecL }, + { 0x8bff5601fde58c84L,0x74e241172eccb314L,0xbcf01b614c9a8a78L, + 0xa233e35e544c9868L } }, + /* 12 << 7 */ + { { 0xb3156bf38bd7aff1L,0x1b5ee4cb1d81b146L,0x7ba1ac41d628a915L, + 0x8f3a8f9cfd89699eL }, + { 0x7329b9c9a0748be7L,0x1d391c95a92e621fL,0xe51e6b214d10a837L, + 0xd255f53a4947b435L } }, + /* 13 << 7 */ + { { 0x07669e04f1788ee3L,0xc14f27afa86938a2L,0x8b47a334e93a01c0L, + 0xff627438d9366808L }, + { 0x7a0985d8ca2a5965L,0x3d9a5542d6e9b9b3L,0xc23eb80b4cf972e8L, + 0x5c1c33bb4fdf72fdL } }, + /* 14 << 7 */ + { { 0x0c4a58d474a86108L,0xf8048a8fee4c5d90L,0xe3c7c924e86d4c80L, + 0x28c889de056a1e60L }, + { 0x57e2662eb214a040L,0xe8c48e9837e10347L,0x8774286280ac748aL, + 0xf1c24022186b06f2L } }, + /* 15 << 7 */ + { { 0xac2dd4c35f74040aL,0x409aeb71fceac957L,0x4fbad78255c4ec23L, + 0xb359ed618a7b76ecL }, + { 0x12744926ed6f4a60L,0xe21e8d7f4b912de3L,0xe2575a59fc705a59L, + 0x72f1d4deed2dbc0eL } }, + /* 16 << 7 */ + { { 0x3d2b24b9eb7926b8L,0xbff88cb3cdbe5509L,0xd0f399afe4dd640bL, + 0x3c5fe1302f76ed45L }, + { 0x6f3562f43764fb3dL,0x7b5af3183151b62dL,0xd5bd0bc7d79ce5f3L, + 0xfdaf6b20ec66890fL } }, + /* 17 << 7 */ + { { 0x735c67ec6063540cL,0x50b259c2e5f9cb8fL,0xb8734f9a3f99c6abL, + 0xf8cc13d5a3a7bc85L }, + { 0x80c1b305c5217659L,0xfe5364d44ec12a54L,0xbd87045e681345feL, + 0x7f8efeb1582f897fL } }, + /* 18 << 7 */ + { { 0xe8cbf1e5d5923359L,0xdb0cea9d539b9fb0L,0x0c5b34cf49859b98L, + 0x5e583c56a4403cc6L }, + { 0x11fc1a2dd48185b7L,0xc93fbc7e6e521787L,0x47e7a05805105b8bL, + 0x7b4d4d58db8260c8L } }, + /* 19 << 7 */ + { { 0xe33930b046eb842aL,0x8e844a9a7bdae56dL,0x34ef3a9e13f7fdfcL, + 0xb3768f82636ca176L }, + { 0x2821f4e04e09e61cL,0x414dc3a1a0c7cddcL,0xd537943754945fcdL, + 0x151b6eefb3555ff1L } }, + /* 20 << 7 */ + { { 0xb31bd6136339c083L,0x39ff8155dfb64701L,0x7c3388d2e29604abL, + 0x1e19084ba6b10442L }, + { 0x17cf54c0eccd47efL,0x896933854a5dfb30L,0x69d023fb47daf9f6L, + 0x9222840b7d91d959L } }, + /* 21 << 7 */ + { { 0x439108f5803bac62L,0x0b7dd91d379bd45fL,0xd651e827ca63c581L, + 0x5c5d75f6509c104fL }, + { 0x7d5fc7381f2dc308L,0x20faa7bfd98454beL,0x95374beea517b031L, + 0xf036b9b1642692acL } }, + /* 22 << 7 */ + { { 0xc510610939842194L,0xb7e2353e49d05295L,0xfc8c1d5cefb42ee0L, + 0xe04884eb08ce811cL }, + { 0xf1f75d817419f40eL,0x5b0ac162a995c241L,0x120921bbc4c55646L, + 0x713520c28d33cf97L } }, + /* 23 << 7 */ + { { 0xb4a65a5ce98c5100L,0x6cec871d2ddd0f5aL,0x251f0b7f9ba2e78bL, + 0x224a8434ce3a2a5fL }, + { 0x26827f6125f5c46fL,0x6a22bedc48545ec0L,0x25ae5fa0b1bb5cdcL, + 0xd693682ffcb9b98fL } }, + /* 24 << 7 */ + { { 0x32027fe891e5d7d3L,0xf14b7d1773a07678L,0xf88497b3c0dfdd61L, + 0xf7c2eec02a8c4f48L }, + { 0xaa5573f43756e621L,0xc013a2401825b948L,0x1c03b34563878572L, + 0xa0472bea653a4184L } }, + /* 25 << 7 */ + { { 0xf4222e270ac69a80L,0x34096d25f51e54f6L,0x00a648cb8fffa591L, + 0x4e87acdc69b6527fL }, + { 0x0575e037e285ccb4L,0x188089e450ddcf52L,0xaa96c9a8870ff719L, + 0x74a56cd81fc7e369L } }, + /* 26 << 7 */ + { { 0x41d04ee21726931aL,0x0bbbb2c83660ecfdL,0xa6ef6de524818e18L, + 0xe421cc51e7d57887L }, + { 0xf127d208bea87be6L,0x16a475d3b1cdd682L,0x9db1b684439b63f7L, + 0x5359b3dbf0f113b6L } }, + /* 27 << 7 */ + { { 0xdfccf1de8bf06e31L,0x1fdf8f44dd383901L,0x10775cad5017e7d2L, + 0xdfc3a59758d11eefL }, + { 0x6ec9c8a0b1ecff10L,0xee6ed6cc28400549L,0xb5ad7bae1b4f8d73L, + 0x61b4f11de00aaab9L } }, + /* 28 << 7 */ + { { 0x7b32d69bd4eff2d7L,0x88ae67714288b60fL,0x159461b437a1e723L, + 0x1f3d4789570aae8cL }, + { 0x869118c07f9871daL,0x35fbda78f635e278L,0x738f3641e1541dacL, + 0x6794b13ac0dae45fL } }, + /* 29 << 7 */ + { { 0x065064ac09cc0917L,0x27c53729c68540fdL,0x0d2d4c8eef227671L, + 0xd23a9f80a1785a04L }, + { 0x98c5952852650359L,0xfa09ad0174a1acadL,0x082d5a290b55bf5cL, + 0xa40f1c67419b8084L } }, + /* 30 << 7 */ + { { 0x3a5c752edcc18770L,0x4baf1f2f8825c3a5L,0xebd63f7421b153edL, + 0xa2383e47b2f64723L }, + { 0xe7bf620a2646d19aL,0x56cb44ec03c83ffdL,0xaf7267c94f6be9f1L, + 0x8b2dfd7bc06bb5e9L } }, + /* 31 << 7 */ + { { 0xb87072f2a672c5c7L,0xeacb11c80d53c5e2L,0x22dac29dff435932L, + 0x37bdb99d4408693cL }, + { 0xf6e62fb62899c20fL,0x3535d512447ece24L,0xfbdc6b88ff577ce3L, + 0x726693bd190575f2L } }, + /* 32 << 7 */ + { { 0x6772b0e5ab4b35a2L,0x1d8b6001f5eeaacfL,0x728f7ce4795b9580L, + 0x4a20ed2a41fb81daL }, + { 0x9f685cd44fec01e6L,0x3ed7ddcca7ff50adL,0x460fd2640c2d97fdL, + 0x3a241426eb82f4f9L } }, + /* 33 << 7 */ + { { 0x17d1df2c6a8ea820L,0xb2b50d3bf22cc254L,0x03856cbab7291426L, + 0x87fd26ae04f5ee39L }, + { 0x9cb696cc02bee4baL,0x5312180406820fd6L,0xa5dfc2690212e985L, + 0x666f7ffa160f9a09L } }, + /* 34 << 7 */ + { { 0xc503cd33bccd9617L,0x365dede4ba7730a3L,0x798c63555ddb0786L, + 0xa6c3200efc9cd3bcL }, + { 0x060ffb2ce5e35efdL,0x99a4e25b5555a1c1L,0x11d95375f70b3751L, + 0x0a57354a160e1bf6L } }, + /* 35 << 7 */ + { { 0xecb3ae4bf8e4b065L,0x07a834c42e53022bL,0x1cd300b38692ed96L, + 0x16a6f79261ee14ecL }, + { 0x8f1063c66a8649edL,0xfbcdfcfe869f3e14L,0x2cfb97c100a7b3ecL, + 0xcea49b3c7130c2f1L } }, + /* 36 << 7 */ + { { 0x462d044fe9d96488L,0x4b53d52e8182a0c1L,0x84b6ddd30391e9e9L, + 0x80ab7b48b1741a09L }, + { 0xec0e15d427d3317fL,0x8dfc1ddb1a64671eL,0x93cc5d5fd49c5b92L, + 0xc995d53d3674a331L } }, + /* 37 << 7 */ + { { 0x302e41ec090090aeL,0x2278a0ccedb06830L,0x1d025932fbc99690L, + 0x0c32fbd2b80d68daL }, + { 0xd79146daf341a6c1L,0xae0ba1391bef68a0L,0xc6b8a5638d774b3aL, + 0x1cf307bd880ba4d7L } }, + /* 38 << 7 */ + { { 0xc033bdc719803511L,0xa9f97b3b8888c3beL,0x3d68aebc85c6d05eL, + 0xc3b88a9d193919ebL }, + { 0x2d300748c48b0ee3L,0x7506bc7c07a746c1L,0xfc48437c6e6d57f3L, + 0x5bd71587cfeaa91aL } }, + /* 39 << 7 */ + { { 0xa4ed0408c1bc5225L,0xd0b946db2719226dL,0x109ecd62758d2d43L, + 0x75c8485a2751759bL }, + { 0xb0b75f499ce4177aL,0x4fa61a1e79c10c3dL,0xc062d300a167fcd7L, + 0x4df3874c750f0fa8L } }, + /* 40 << 7 */ + { { 0x29ae2cf983dfedc9L,0xf84371348d87631aL,0xaf5717117429c8d2L, + 0x18d15867146d9272L }, + { 0x83053ecf69769bb7L,0xc55eb856c479ab82L,0x5ef7791c21b0f4b2L, + 0xaa5956ba3d491525L } }, + /* 41 << 7 */ + { { 0x407a96c29fe20ebaL,0xf27168bbe52a5ad3L,0x43b60ab3bf1d9d89L, + 0xe45c51ef710e727aL }, + { 0xdfca5276099b4221L,0x8dc6407c2557a159L,0x0ead833591035895L, + 0x0a9db9579c55dc32L } }, + /* 42 << 7 */ + { { 0xe40736d3df61bc76L,0x13a619c03f778cdbL,0x6dd921a4c56ea28fL, + 0x76a524332fa647b4L }, + { 0x23591891ac5bdc5dL,0xff4a1a72bac7dc01L,0x9905e26162df8453L, + 0x3ac045dfe63b265fL } }, + /* 43 << 7 */ + { { 0x8a3f341bad53dba7L,0x8ec269cc837b625aL,0xd71a27823ae31189L, + 0x8fb4f9a355e96120L }, + { 0x804af823ff9875cfL,0x23224f575d442a9bL,0x1c4d3b9eecc62679L, + 0x91da22fba0e7ddb1L } }, + /* 44 << 7 */ + { { 0xa370324d6c04a661L,0x9710d3b65e376d17L,0xed8c98f03044e357L, + 0xc364ebbe6422701cL }, + { 0x347f5d517733d61cL,0xd55644b9cea826c3L,0x80c6e0ad55a25548L, + 0x0aa7641d844220a7L } }, + /* 45 << 7 */ + { { 0x1438ec8131810660L,0x9dfa6507de4b4043L,0x10b515d8cc3e0273L, + 0x1b6066dd28d8cfb2L }, + { 0xd3b045919c9efebdL,0x425d4bdfa21c1ff4L,0x5fe5af19d57607d3L, + 0xbbf773f754481084L } }, + /* 46 << 7 */ + { { 0x8435bd6994b03ed1L,0xd9ad1de3634cc546L,0x2cf423fc00e420caL, + 0xeed26d80a03096ddL }, + { 0xd7f60be7a4db09d2L,0xf47f569d960622f7L,0xe5925fd77296c729L, + 0xeff2db2626ca2715L } }, + /* 47 << 7 */ + { { 0xa6fcd014b913e759L,0x53da47868ff4de93L,0x14616d79c32068e1L, + 0xb187d664ccdf352eL }, + { 0xf7afb6501dc90b59L,0x8170e9437daa1b26L,0xc8e3bdd8700c0a84L, + 0x6e8d345f6482bdfaL } }, + /* 48 << 7 */ + { { 0x84cfbfa1c5c5ea50L,0xd3baf14c67960681L,0x263984030dd50942L, + 0xe4b7839c4716a663L }, + { 0xd5f1f794e7de6dc0L,0x5cd0f4d4622aa7ceL,0x5295f3f159acfeecL, + 0x8d933552953e0607L } }, + /* 49 << 7 */ + { { 0xc7db8ec5776c5722L,0xdc467e622b5f290cL,0xd4297e704ff425a9L, + 0x4be924c10cf7bb72L }, + { 0x0d5dc5aea1892131L,0x8bf8a8e3a705c992L,0x73a0b0647a305ac5L, + 0x00c9ca4e9a8c77a8L } }, + /* 50 << 7 */ + { { 0x5dfee80f83774bddL,0x6313160285734485L,0xa1b524ae914a69a9L, + 0xebc2ffafd4e300d7L }, + { 0x52c93db77cfa46a5L,0x71e6161f21653b50L,0x3574fc57a4bc580aL, + 0xc09015dde1bc1253L } }, + /* 51 << 7 */ + { { 0x4b7b47b2d174d7aaL,0x4072d8e8f3a15d04L,0xeeb7d47fd6fa07edL, + 0x6f2b9ff9edbdafb1L }, + { 0x18c516153760fe8aL,0x7a96e6bff06c6c13L,0x4d7a04100ea2d071L, + 0xa1914e9b0be2a5ceL } }, + /* 52 << 7 */ + { { 0x5726e357d8a3c5cfL,0x1197ecc32abb2b13L,0x6c0d7f7f31ae88ddL, + 0x15b20d1afdbb3efeL }, + { 0xcd06aa2670584039L,0x2277c969a7dc9747L,0xbca695877855d815L, + 0x899ea2385188b32aL } }, + /* 53 << 7 */ + { { 0x37d9228b760c1c9dL,0xc7efbb119b5c18daL,0x7f0d1bc819f6dbc5L, + 0x4875384b07e6905bL }, + { 0xc7c50baa3ba8cd86L,0xb0ce40fbc2905de0L,0x708406737a231952L, + 0xa912a262cf43de26L } }, + /* 54 << 7 */ + { { 0x9c38ddcceb5b76c1L,0x746f528526fc0ab4L,0x52a63a50d62c269fL, + 0x60049c5599458621L }, + { 0xe7f48f823c2f7c9eL,0x6bd99043917d5cf3L,0xeb1317a88701f469L, + 0xbd3fe2ed9a449fe0L } }, + /* 55 << 7 */ + { { 0x421e79ca12ef3d36L,0x9ee3c36c3e7ea5deL,0xe48198b5cdff36f7L, + 0xaff4f967c6b82228L }, + { 0x15e19dd0c47adb7eL,0x45699b23032e7dfaL,0x40680c8b1fae026aL, + 0x5a347a48550dbf4dL } }, + /* 56 << 7 */ + { { 0xe652533b3cef0d7dL,0xd94f7b182bbb4381L,0x838752be0e80f500L, + 0x8e6e24889e9c9bfbL }, + { 0xc975169716caca6aL,0x866c49d838531ad9L,0xc917e2397151ade1L, + 0x2d016ec16037c407L } }, + /* 57 << 7 */ + { { 0xa407ccc900eac3f9L,0x835f6280e2ed4748L,0xcc54c3471cc98e0dL, + 0x0e969937dcb572ebL }, + { 0x1b16c8e88f30c9cbL,0xa606ae75373c4661L,0x47aa689b35502cabL, + 0xf89014ae4d9bb64fL } }, + /* 58 << 7 */ + { { 0x202f6a9c31c71f7bL,0x01f95aa3296ffe5cL,0x5fc0601453cec3a3L, + 0xeb9912375f498a45L }, + { 0xae9a935e5d91ba87L,0xc6ac62810b564a19L,0x8a8fe81c3bd44e69L, + 0x7c8b467f9dd11d45L } }, + /* 59 << 7 */ + { { 0xf772251fea5b8e69L,0xaeecb3bdc5b75fbcL,0x1aca3331887ff0e5L, + 0xbe5d49ff19f0a131L }, + { 0x582c13aae5c8646fL,0xdbaa12e820e19980L,0x8f40f31af7abbd94L, + 0x1f13f5a81dfc7663L } }, + /* 60 << 7 */ + { { 0x5d81f1eeaceb4fc0L,0x362560025e6f0f42L,0x4b67d6d7751370c8L, + 0x2608b69803e80589L }, + { 0xcfc0d2fc05268301L,0xa6943d3940309212L,0x192a90c21fd0e1c2L, + 0xb209f11337f1dc76L } }, + /* 61 << 7 */ + { { 0xefcc5e0697bf1298L,0xcbdb6730219d639eL,0xd009c116b81e8c6fL, + 0xa3ffdde31a7ce2e5L }, + { 0xc53fbaaaa914d3baL,0x836d500f88df85eeL,0xd98dc71b66ee0751L, + 0x5a3d7005714516fdL } }, + /* 62 << 7 */ + { { 0x21d3634d39eedbbaL,0x35cd2e680455a46dL,0xc8cafe65f9d7eb0cL, + 0xbda3ce9e00cefb3eL }, + { 0xddc17a602c9cf7a4L,0x01572ee47bcb8773L,0xa92b2b018c7548dfL, + 0x732fd309a84600e3L } }, + /* 63 << 7 */ + { { 0xe22109c716543a40L,0x9acafd36fede3c6cL,0xfb2068526824e614L, + 0x2a4544a9da25dca0L }, + { 0x2598526291d60b06L,0x281b7be928753545L,0xec667b1a90f13b27L, + 0x33a83aff940e2eb4L } }, + /* 64 << 7 */ + { { 0x80009862d5d721d5L,0x0c3357a35bd3a182L,0x27f3a83b7aa2cda4L, + 0xb58ae74ef6f83085L }, + { 0x2a911a812e6dad6bL,0xde286051f43d6c5bL,0x4bdccc41f996c4d8L, + 0xe7312ec00ae1e24eL } }, + /* 0 << 14 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 14 */ + { { 0xf8d112e76e6485b3L,0x4d3e24db771c52f8L,0x48e3ee41684a2f6dL, + 0x7161957d21d95551L }, + { 0x19631283cdb12a6cL,0xbf3fa8822e50e164L,0xf6254b633166cc73L, + 0x3aefa7aeaee8cc38L } }, + /* 2 << 14 */ + { { 0x79b0fe623b36f9fdL,0x26543b23fde19fc0L,0x136e64a0958482efL, + 0x23f637719b095825L }, + { 0x14cfd596b6a1142eL,0x5ea6aac6335aac0bL,0x86a0e8bdf3081dd5L, + 0x5fb89d79003dc12aL } }, + /* 3 << 14 */ + { { 0xf615c33af72e34d4L,0x0bd9ea40110eec35L,0x1c12bc5bc1dea34eL, + 0x686584c949ae4699L }, + { 0x13ad95d38c97b942L,0x4609561a4e5c7562L,0x9e94a4aef2737f89L, + 0xf57594c6371c78b6L } }, + /* 4 << 14 */ + { { 0x0f0165fce3779ee3L,0xe00e7f9dbd495d9eL,0x1fa4efa220284e7aL, + 0x4564bade47ac6219L }, + { 0x90e6312ac4708e8eL,0x4f5725fba71e9adfL,0xe95f55ae3d684b9fL, + 0x47f7ccb11e94b415L } }, + /* 5 << 14 */ + { { 0x7322851b8d946581L,0xf0d13133bdf4a012L,0xa3510f696584dae0L, + 0x03a7c1713c9f6c6dL }, + { 0x5be97f38e475381aL,0xca1ba42285823334L,0xf83cc5c70be17ddaL, + 0x158b14940b918c0fL } }, + /* 6 << 14 */ + { { 0xda3a77e5522e6b69L,0x69c908c3bbcd6c18L,0x1f1b9e48d924fd56L, + 0x37c64e36aa4bb3f7L }, + { 0x5a4fdbdfee478d7dL,0xba75c8bc0193f7a0L,0x84bc1e8456cd16dfL, + 0x1fb08f0846fad151L } }, + /* 7 << 14 */ + { { 0x8a7cabf9842e9f30L,0xa331d4bf5eab83afL,0xd272cfba017f2a6aL, + 0x27560abc83aba0e3L }, + { 0x94b833870e3a6b75L,0x25c6aea26b9f50f5L,0x803d691db5fdf6d0L, + 0x03b77509e6333514L } }, + /* 8 << 14 */ + { { 0x3617890361a341c1L,0x3604dc600cfd6142L,0x022295eb8533316cL, + 0x3dbde4ac44af2922L }, + { 0x898afc5d1c7eef69L,0x58896805d14f4fa1L,0x05002160203c21caL, + 0x6f0d1f3040ef730bL } }, + /* 9 << 14 */ + { { 0x8e8c44d4196224f8L,0x75a4ab95374d079dL,0x79085ecc7d48f123L, + 0x56f04d311bf65ad8L }, + { 0xe220bf1cbda602b2L,0x73ee1742f9612c69L,0x76008fc8084fd06bL, + 0x4000ef9ff11380d1L } }, + /* 10 << 14 */ + { { 0x48201b4b12cfe297L,0x3eee129c292f74e5L,0xe1fe114ec9e874e8L, + 0x899b055c92c5fc41L }, + { 0x4e477a643a39c8cfL,0x82f09efe78963cc9L,0x6fd3fd8fd333f863L, + 0x85132b2adc949c63L } }, + /* 11 << 14 */ + { { 0x7e06a3ab516eb17bL,0x73bec06fd2c7372bL,0xe4f74f55ba896da6L, + 0xbb4afef88e9eb40fL }, + { 0x2d75bec8e61d66b0L,0x02bda4b4ef29300bL,0x8bbaa8de026baa5aL, + 0xff54befda07f4440L } }, + /* 12 << 14 */ + { { 0xbd9b8b1dbe7a2af3L,0xec51caa94fb74a72L,0xb9937a4b63879697L, + 0x7c9a9d20ec2687d5L }, + { 0x1773e44f6ef5f014L,0x8abcf412e90c6900L,0x387bd0228142161eL, + 0x50393755fcb6ff2aL } }, + /* 13 << 14 */ + { { 0x9813fd56ed6def63L,0x53cf64827d53106cL,0x991a35bd431f7ac1L, + 0xf1e274dd63e65fafL }, + { 0xf63ffa3c44cc7880L,0x411a426b7c256981L,0xb698b9fd93a420e0L, + 0x89fdddc0ae53f8feL } }, + /* 14 << 14 */ + { { 0x766e072232398baaL,0x205fee425cfca031L,0xa49f53417a029cf2L, + 0xa88c68b84023890dL }, + { 0xbc2750417337aaa8L,0x9ed364ad0eb384f4L,0xe0816f8529aba92fL, + 0x2e9e194104e38a88L } }, + /* 15 << 14 */ + { { 0x57eef44a3dafd2d5L,0x35d1fae597ed98d8L,0x50628c092307f9b1L, + 0x09d84aaed6cba5c6L }, + { 0x67071bc788aaa691L,0x2dea57a9afe6cb03L,0xdfe11bb43d78ac01L, + 0x7286418c7fd7aa51L } }, + /* 16 << 14 */ + { { 0xfabf770977f7195aL,0x8ec86167adeb838fL,0xea1285a8bb4f012dL, + 0xd68835039a3eab3fL }, + { 0xee5d24f8309004c2L,0xa96e4b7613ffe95eL,0x0cdffe12bd223ea4L, + 0x8f5c2ee5b6739a53L } }, + /* 17 << 14 */ + { { 0x5cb4aaa5dd968198L,0xfa131c5272413a6cL,0x53d46a909536d903L, + 0xb270f0d348606d8eL }, + { 0x518c7564a053a3bcL,0x088254b71a86caefL,0xb3ba8cb40ab5efd0L, + 0x5c59900e4605945dL } }, + /* 18 << 14 */ + { { 0xecace1dda1887395L,0x40960f36932a65deL,0x9611ff5c3aa95529L, + 0xc58215b07c1e5a36L }, + { 0xd48c9b58f0e1a524L,0xb406856bf590dfb8L,0xc7605e049cd95662L, + 0x0dd036eea33ecf82L } }, + /* 19 << 14 */ + { { 0xa50171acc33156b3L,0xf09d24ea4a80172eL,0x4e1f72c676dc8eefL, + 0xe60caadc5e3d44eeL }, + { 0x006ef8a6979b1d8fL,0x60908a1c97788d26L,0x6e08f95b266feec0L, + 0x618427c222e8c94eL } }, + /* 20 << 14 */ + { { 0x3d61333959145a65L,0xcd9bc368fa406337L,0x82d11be32d8a52a0L, + 0xf6877b2797a1c590L }, + { 0x837a819bf5cbdb25L,0x2a4fd1d8de090249L,0x622a7de774990e5fL, + 0x840fa5a07945511bL } }, + /* 21 << 14 */ + { { 0x30b974be6558842dL,0x70df8c6417f3d0a6L,0x7c8035207542e46dL, + 0x7251fe7fe4ecc823L }, + { 0xe59134cb5e9aac9aL,0x11bb0934f0045d71L,0x53e5d9b5dbcb1d4eL, + 0x8d97a90592defc91L } }, + /* 22 << 14 */ + { { 0xfe2893277946d3f9L,0xe132bd2407472273L,0xeeeb510c1eb6ae86L, + 0x777708c5f0595067L }, + { 0x18e2c8cd1297029eL,0x2c61095cbbf9305eL,0xe466c2586b85d6d9L, + 0x8ac06c36da1ea530L } }, + /* 23 << 14 */ + { { 0xa365dc39a1304668L,0xe4a9c88507f89606L,0x65a4898facc7228dL, + 0x3e2347ff84ca8303L }, + { 0xa5f6fb77ea7d23a3L,0x2fac257d672a71cdL,0x6908bef87e6a44d3L, + 0x8ff87566891d3d7aL } }, + /* 24 << 14 */ + { { 0xe58e90b36b0cf82eL,0x6438d2462615b5e7L,0x07b1f8fc669c145aL, + 0xb0d8b2da36f1e1cbL }, + { 0x54d5dadbd9184c4dL,0x3dbb18d5f93d9976L,0x0a3e0f56d1147d47L, + 0x2afa8c8da0a48609L } }, + /* 25 << 14 */ + { { 0x275353e8bc36742cL,0x898f427eeea0ed90L,0x26f4947e3e477b00L, + 0x8ad8848a308741e3L }, + { 0x6c703c38d74a2a46L,0x5e3e05a99ba17ba2L,0xc1fa6f664ab9a9e4L, + 0x474a2d9a3841d6ecL } }, + /* 26 << 14 */ + { { 0x871239ad653ae326L,0x14bcf72aa74cbb43L,0x8737650e20d4c083L, + 0x3df86536110ed4afL }, + { 0xd2d86fe7b53ca555L,0x688cb00dabd5d538L,0xcf81bda31ad38468L, + 0x7ccfe3ccf01167b6L } }, + /* 27 << 14 */ + { { 0xcf4f47e06c4c1fe6L,0x557e1f1a298bbb79L,0xf93b974f30d45a14L, + 0x174a1d2d0baf97c4L }, + { 0x7a003b30c51fbf53L,0xd8940991ee68b225L,0x5b0aa7b71c0f4173L, + 0x975797c9a20a7153L } }, + /* 28 << 14 */ + { { 0x26e08c07e3533d77L,0xd7222e6a2e341c99L,0x9d60ec3d8d2dc4edL, + 0xbdfe0d8f7c476cf8L }, + { 0x1fe59ab61d056605L,0xa9ea9df686a8551fL,0x8489941e47fb8d8cL, + 0xfeb874eb4a7f1b10L } }, + /* 29 << 14 */ + { { 0xfe5fea867ee0d98fL,0x201ad34bdbf61864L,0x45d8fe4737c031d4L, + 0xd5f49fae795f0822L }, + { 0xdb0fb291c7f4a40cL,0x2e69d9c1730ddd92L,0x754e105449d76987L, + 0x8a24911d7662db87L } }, + /* 30 << 14 */ + { { 0x61fc181060a71676L,0xe852d1a8f66a8ad1L,0x172bbd656417231eL, + 0x0d6de7bd3babb11fL }, + { 0x6fde6f88c8e347f8L,0x1c5875479bd99cc3L,0x78e54ed034076950L, + 0x97f0f334796e83baL } }, + /* 31 << 14 */ + { { 0xe4dbe1ce4924867aL,0xbd5f51b060b84917L,0x375300403cb09a79L, + 0xdb3fe0f8ff1743d8L }, + { 0xed7894d8556fa9dbL,0xfa26216923412fbfL,0x563be0dbba7b9291L, + 0x6ca8b8c00c9fb234L } }, + /* 32 << 14 */ + { { 0xed406aa9bd763802L,0xc21486a065303da1L,0x61ae291ec7e62ec4L, + 0x622a0492df99333eL }, + { 0x7fd80c9dbb7a8ee0L,0xdc2ed3bc6c01aedbL,0x35c35a1208be74ecL, + 0xd540cb1a469f671fL } }, + /* 33 << 14 */ + { { 0xd16ced4ecf84f6c7L,0x8561fb9c2d090f43L,0x7e693d796f239db4L, + 0xa736f92877bd0d94L }, + { 0x07b4d9292c1950eeL,0xda17754356dc11b3L,0xa5dfbbaa7a6a878eL, + 0x1c70cb294decb08aL } }, + /* 34 << 14 */ + { { 0xfba28c8b6f0f7c50L,0xa8eba2b8854dcc6dL,0x5ff8e89a36b78642L, + 0x070c1c8ef6873adfL }, + { 0xbbd3c3716484d2e4L,0xfb78318f0d414129L,0x2621a39c6ad93b0bL, + 0x979d74c2a9e917f7L } }, + /* 35 << 14 */ + { { 0xfc19564761fb0428L,0x4d78954abee624d4L,0xb94896e0b8ae86fdL, + 0x6667ac0cc91c8b13L }, + { 0x9f18051243bcf832L,0xfbadf8b7a0010137L,0xc69b4089b3ba8aa7L, + 0xfac4bacde687ce85L } }, + /* 36 << 14 */ + { { 0x9164088d977eab40L,0x51f4c5b62760b390L,0xd238238f340dd553L, + 0x358566c3db1d31c9L }, + { 0x3a5ad69e5068f5ffL,0xf31435fcdaff6b06L,0xae549a5bd6debff0L, + 0x59e5f0b775e01331L } }, + /* 37 << 14 */ + { { 0x5d492fb898559acfL,0x96018c2e4db79b50L,0x55f4a48f609f66aaL, + 0x1943b3af4900a14fL }, + { 0xc22496df15a40d39L,0xb2a446844c20f7c5L,0x76a35afa3b98404cL, + 0xbec75725ff5d1b77L } }, + /* 38 << 14 */ + { { 0xb67aa163bea06444L,0x27e95bb2f724b6f2L,0x3c20e3e9d238c8abL, + 0x1213754eddd6ae17L }, + { 0x8c431020716e0f74L,0x6679c82effc095c2L,0x2eb3adf4d0ac2932L, + 0x2cc970d301bb7a76L } }, + /* 39 << 14 */ + { { 0x70c71f2f740f0e66L,0x545c616b2b6b23ccL,0x4528cfcbb40a8bd7L, + 0xff8396332ab27722L }, + { 0x049127d9025ac99aL,0xd314d4a02b63e33bL,0xc8c310e728d84519L, + 0x0fcb8983b3bc84baL } }, + /* 40 << 14 */ + { { 0x2cc5226138634818L,0x501814f4b44c2e0bL,0xf7e181aa54dfdba3L, + 0xcfd58ff0e759718cL }, + { 0xf90cdb14d3b507a8L,0x57bd478ec50bdad8L,0x29c197e250e5f9aaL, + 0x4db6eef8e40bc855L } }, + /* 41 << 14 */ + { { 0x2cc8f21ad1fc0654L,0xc71cc96381269d73L,0xecfbb204077f49f9L, + 0xdde92571ca56b793L }, + { 0x9abed6a3f97ad8f7L,0xe6c19d3f924de3bdL,0x8dce92f4a140a800L, + 0x85f44d1e1337af07L } }, + /* 42 << 14 */ + { { 0x5953c08b09d64c52L,0xa1b5e49ff5df9749L,0x336a8fb852735f7dL, + 0xb332b6db9add676bL }, + { 0x558b88a0b4511aa4L,0x09788752dbd5cc55L,0x16b43b9cd8cd52bdL, + 0x7f0bc5a0c2a2696bL } }, + /* 43 << 14 */ + { { 0x146e12d4c11f61efL,0x9ce107543a83e79eL,0x08ec73d96cbfca15L, + 0x09ff29ad5b49653fL }, + { 0xe31b72bde7da946eL,0xebf9eb3bee80a4f2L,0xd1aabd0817598ce4L, + 0x18b5fef453f37e80L } }, + /* 44 << 14 */ + { { 0xd5d5cdd35958cd79L,0x3580a1b51d373114L,0xa36e4c91fa935726L, + 0xa38c534def20d760L }, + { 0x7088e40a2ff5845bL,0xe5bb40bdbd78177fL,0x4f06a7a8857f9920L, + 0xe3cc3e50e968f05dL } }, + /* 45 << 14 */ + { { 0x1d68b7fee5682d26L,0x5206f76faec7f87cL,0x41110530041951abL, + 0x58ec52c1d4b5a71aL }, + { 0xf3488f990f75cf9aL,0xf411951fba82d0d5L,0x27ee75be618895abL, + 0xeae060d46d8aab14L } }, + /* 46 << 14 */ + { { 0x9ae1df737fb54dc2L,0x1f3e391b25963649L,0x242ec32afe055081L, + 0x5bd450ef8491c9bdL }, + { 0x367efc67981eb389L,0xed7e19283a0550d5L,0x362e776bab3ce75cL, + 0xe890e3081f24c523L } }, + /* 47 << 14 */ + { { 0xb961b682feccef76L,0x8b8e11f58bba6d92L,0x8f2ccc4c2b2375c4L, + 0x0d7f7a52e2f86cfaL }, + { 0xfd94d30a9efe5633L,0x2d8d246b5451f934L,0x2234c6e3244e6a00L, + 0xde2b5b0dddec8c50L } }, + /* 48 << 14 */ + { { 0x2ce53c5abf776f5bL,0x6f72407160357b05L,0xb259371771bf3f7aL, + 0x87d2501c440c4a9fL }, + { 0x440552e187b05340L,0xb7bf7cc821624c32L,0x4155a6ce22facddbL, + 0x5a4228cb889837efL } }, + /* 49 << 14 */ + { { 0xef87d6d6fd4fd671L,0xa233687ec2daa10eL,0x7562224403c0eb96L, + 0x7632d1848bf19be6L }, + { 0x05d0f8e940735ff4L,0x3a3e6e13c00931f1L,0x31ccde6adafe3f18L, + 0xf381366acfe51207L } }, + /* 50 << 14 */ + { { 0x24c222a960167d92L,0x62f9d6f87529f18cL,0x412397c00353b114L, + 0x334d89dcef808043L }, + { 0xd9ec63ba2a4383ceL,0xcec8e9375cf92ba0L,0xfb8b4288c8be74c0L, + 0x67d6912f105d4391L } }, + /* 51 << 14 */ + { { 0x7b996c461b913149L,0x36aae2ef3a4e02daL,0xb68aa003972de594L, + 0x284ec70d4ec6d545L }, + { 0xf3d2b2d061391d54L,0x69c5d5d6fe114e92L,0xbe0f00b5b4482dffL, + 0xe1596fa5f5bf33c5L } }, + /* 52 << 14 */ + { { 0x10595b5696a71cbaL,0x944938b2fdcadeb7L,0xa282da4cfccd8471L, + 0x98ec05f30d37bfe1L }, + { 0xe171ce1b0698304aL,0x2d69144421bdf79bL,0xd0cd3b741b21dec1L, + 0x712ecd8b16a15f71L } }, + /* 53 << 14 */ + { { 0x8d4c00a700fd56e1L,0x02ec9692f9527c18L,0x21c449374a3e42e1L, + 0x9176fbab1392ae0aL }, + { 0x8726f1ba44b7b618L,0xb4d7aae9f1de491cL,0xf91df7b907b582c0L, + 0x7e116c30ef60aa3aL } }, + /* 54 << 14 */ + { { 0x99270f81466265d7L,0xb15b6fe24df7adf0L,0xfe33b2d3f9738f7fL, + 0x48553ab9d6d70f95L }, + { 0x2cc72ac8c21e94dbL,0x795ac38dbdc0bbeeL,0x0a1be4492e40478fL, + 0x81bd3394052bde55L } }, + /* 55 << 14 */ + { { 0x63c8dbe956b3c4f2L,0x017a99cf904177ccL,0x947bbddb4d010fc1L, + 0xacf9b00bbb2c9b21L }, + { 0x2970bc8d47173611L,0x1a4cbe08ac7d756fL,0x06d9f4aa67d541a2L, + 0xa3e8b68959c2cf44L } }, + /* 56 << 14 */ + { { 0xaad066da4d88f1ddL,0xc604f1657ad35deaL,0x7edc07204478ca67L, + 0xa10dfae0ba02ce06L }, + { 0xeceb1c76af36f4e4L,0x994b2292af3f8f48L,0xbf9ed77b77c8a68cL, + 0x74f544ea51744c9dL } }, + /* 57 << 14 */ + { { 0x82d05bb98113a757L,0x4ef2d2b48a9885e4L,0x1e332be51aa7865fL, + 0x22b76b18290d1a52L }, + { 0x308a231044351683L,0x9d861896a3f22840L,0x5959ddcd841ed947L, + 0x0def0c94154b73bfL } }, + /* 58 << 14 */ + { { 0xf01054174c7c15e0L,0x539bfb023a277c32L,0xe699268ef9dccf5fL, + 0x9f5796a50247a3bdL }, + { 0x8b839de84f157269L,0xc825c1e57a30196bL,0x6ef0aabcdc8a5a91L, + 0xf4a8ce6c498b7fe6L } }, + /* 59 << 14 */ + { { 0x1cce35a770cbac78L,0x83488e9bf6b23958L,0x0341a070d76cb011L, + 0xda6c9d06ae1b2658L }, + { 0xb701fb30dd648c52L,0x994ca02c52fb9fd1L,0x069331176f563086L, + 0x3d2b810017856babL } }, + /* 60 << 14 */ + { { 0xe89f48c85963a46eL,0x658ab875a99e61c7L,0x6e296f874b8517b4L, + 0x36c4fcdcfc1bc656L }, + { 0xde5227a1a3906defL,0x9fe95f5762418945L,0x20c91e81fdd96cdeL, + 0x5adbe47eda4480deL } }, + /* 61 << 14 */ + { { 0xa009370f396de2b6L,0x98583d4bf0ecc7bdL,0xf44f6b57e51d0672L, + 0x03d6b078556b1984L }, + { 0x27dbdd93b0b64912L,0x9b3a343415687b09L,0x0dba646151ec20a9L, + 0xec93db7fff28187cL } }, + /* 62 << 14 */ + { { 0x00ff8c2466e48bddL,0x2514f2f911ccd78eL,0xeba11f4fe1250603L, + 0x8a22cd41243fa156L }, + { 0xa4e58df4b283e4c6L,0x78c298598b39783fL,0x5235aee2a5259809L, + 0xc16284b50e0227ddL } }, + /* 63 << 14 */ + { { 0xa5f579161338830dL,0x6d4b8a6bd2123fcaL,0x236ea68af9c546f8L, + 0xc1d36873fa608d36L }, + { 0xcd76e4958d436d13L,0xd4d9c2218fb080afL,0x665c1728e8ad3fb5L, + 0xcf1ebe4db3d572e0L } }, + /* 64 << 14 */ + { { 0xa7a8746a584c5e20L,0x267e4ea1b9dc7035L,0x593a15cfb9548c9bL, + 0x5e6e21354bd012f3L }, + { 0xdf31cc6a8c8f936eL,0x8af84d04b5c241dcL,0x63990a6f345efb86L, + 0x6fef4e61b9b962cbL } }, + /* 0 << 21 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 21 */ + { { 0xf6368f0925722608L,0x131260db131cf5c6L,0x40eb353bfab4f7acL, + 0x85c7888037eee829L }, + { 0x4c1581ffc3bdf24eL,0x5bff75cbf5c3c5a8L,0x35e8c83fa14e6f40L, + 0xb81d1c0f0295e0caL } }, + /* 2 << 21 */ + { { 0xfcde7cc8f43a730fL,0xe89b6f3c33ab590eL,0xc823f529ad03240bL, + 0x82b79afe98bea5dbL }, + { 0x568f2856962fe5deL,0x0c590adb60c591f3L,0x1fc74a144a28a858L, + 0x3b662498b3203f4cL } }, + /* 3 << 21 */ + { { 0x91e3cf0d6c39765aL,0xa2db3acdac3cca0bL,0x288f2f08cb953b50L, + 0x2414582ccf43cf1aL }, + { 0x8dec8bbc60eee9a8L,0x54c79f02729aa042L,0xd81cd5ec6532f5d5L, + 0xa672303acf82e15fL } }, + /* 4 << 21 */ + { { 0x376aafa8719c0563L,0xcd8ad2dcbc5fc79fL,0x303fdb9fcb750cd3L, + 0x14ff052f4418b08eL }, + { 0xf75084cf3e2d6520L,0x7ebdf0f8144ed509L,0xf43bf0f2d3f25b98L, + 0x86ad71cfa354d837L } }, + /* 5 << 21 */ + { { 0xb827fe9226f43572L,0xdfd3ab5b5d824758L,0x315dd23a539094c1L, + 0x85c0e37a66623d68L }, + { 0x575c79727be19ae0L,0x616a3396df0d36b5L,0xa1ebb3c826b1ff7eL, + 0x635b9485140ad453L } }, + /* 6 << 21 */ + { { 0x92bf3cdada430c0bL,0x4702850e3a96dac6L,0xc91cf0a515ac326aL, + 0x95de4f49ab8c25e4L }, + { 0xb01bad09e265c17cL,0x24e45464087b3881L,0xd43e583ce1fac5caL, + 0xe17cb3186ead97a6L } }, + /* 7 << 21 */ + { { 0x6cc3924374dcec46L,0x33cfc02d54c2b73fL,0x82917844f26cd99cL, + 0x8819dd95d1773f89L }, + { 0x09572aa60871f427L,0x8e0cf365f6f01c34L,0x7fa52988bff1f5afL, + 0x4eb357eae75e8e50L } }, + /* 8 << 21 */ + { { 0xd9d0c8c4868af75dL,0xd7325cff45c8c7eaL,0xab471996cc81ecb0L, + 0xff5d55f3611824edL }, + { 0xbe3145411977a0eeL,0x5085c4c5722038c6L,0x2d5335bff94bb495L, + 0x894ad8a6c8e2a082L } }, + /* 9 << 21 */ + { { 0x5c3e2341ada35438L,0xf4a9fc89049b8c4eL,0xbeeb355a9f17cf34L, + 0x3f311e0e6c91fe10L }, + { 0xc2d2003892ab9891L,0x257bdcc13e8ce9a9L,0x1b2d978988c53beeL, + 0x927ce89acdba143aL } }, + /* 10 << 21 */ + { { 0xb0a32cca523db280L,0x5c889f8a50d43783L,0x503e04b34897d16fL, + 0x8cdb6e7808f5f2e8L }, + { 0x6ab91cf0179c8e74L,0xd8874e5248211d60L,0xf948d4d5ea851200L, + 0x4076d41ee6f9840aL } }, + /* 11 << 21 */ + { { 0xc20e263c47b517eaL,0x79a448fd30685e5eL,0xe55f6f78f90631a0L, + 0x88a790b1a79e6346L }, + { 0x62160c7d80969fe8L,0x54f92fd441491bb9L,0xa6645c235c957526L, + 0xf44cc5aebea3ce7bL } }, + /* 12 << 21 */ + { { 0xf76283278b1e68b7L,0xc731ad7a303f29d3L,0xfe5a9ca957d03ecbL, + 0x96c0d50c41bc97a7L }, + { 0xc4669fe79b4f7f24L,0xfdd781d83d9967efL,0x7892c7c35d2c208dL, + 0x8bf64f7cae545cb3L } }, + /* 13 << 21 */ + { { 0xc01f862c467be912L,0xf4c85ee9c73d30ccL,0x1fa6f4be6ab83ec7L, + 0xa07a3c1c4e3e3cf9L }, + { 0x87f8ef450c00beb3L,0x30e2c2b3000d4c3eL,0x1aa00b94fe08bf5bL, + 0x32c133aa9224ef52L } }, + /* 14 << 21 */ + { { 0x38df16bb32e5685dL,0x68a9e06958e6f544L,0x495aaff7cdc5ebc6L, + 0xf894a645378b135fL }, + { 0xf316350a09e27ecfL,0xeced201e58f7179dL,0x2eec273ce97861baL, + 0x47ec2caed693be2eL } }, + /* 15 << 21 */ + { { 0xfa4c97c4f68367ceL,0xe4f47d0bbe5a5755L,0x17de815db298a979L, + 0xd7eca659c177dc7dL }, + { 0x20fdbb7149ded0a3L,0x4cb2aad4fb34d3c5L,0x2cf31d2860858a33L, + 0x3b6873efa24aa40fL } }, + /* 16 << 21 */ + { { 0x540234b22c11bb37L,0x2d0366dded4c74a3L,0xf9a968daeec5f25dL, + 0x3660106867b63142L }, + { 0x07cd6d2c68d7b6d4L,0xa8f74f090c842942L,0xe27514047768b1eeL, + 0x4b5f7e89fe62aee4L } }, + /* 17 << 21 */ + { { 0xc6a7717789070d26L,0xa1f28e4edd1c8bc7L,0xea5f4f06469e1f17L, + 0x78fc242afbdb78e0L }, + { 0xc9c7c5928b0588f1L,0xb6b7a0fd1535921eL,0xcc5bdb91bde5ae35L, + 0xb42c485e12ff1864L } }, + /* 18 << 21 */ + { { 0xa1113e13dbab98aaL,0xde9d469ba17b1024L,0x23f48b37c0462d3aL, + 0x3752e5377c5c078dL }, + { 0xe3a86add15544eb9L,0xf013aea780fba279L,0x8b5bb76cf22001b5L, + 0xe617ba14f02891abL } }, + /* 19 << 21 */ + { { 0xd39182a6936219d3L,0x5ce1f194ae51cb19L,0xc78f8598bf07a74cL, + 0x6d7158f222cbf1bcL }, + { 0x3b846b21e300ce18L,0x35fba6302d11275dL,0x5fe25c36a0239b9bL, + 0xd8beb35ddf05d940L } }, + /* 20 << 21 */ + { { 0x4db02bb01f7e320dL,0x0641c3646da320eaL,0x6d95fa5d821389a3L, + 0x926997488fcd8e3dL }, + { 0x316fef17ceb6c143L,0x67fcb841d933762bL,0xbb837e35118b17f8L, + 0x4b92552f9fd24821L } }, + /* 21 << 21 */ + { { 0xae6bc70e46aca793L,0x1cf0b0e4e579311bL,0x8dc631be5802f716L, + 0x099bdc6fbddbee4dL }, + { 0xcc352bb20caf8b05L,0xf74d505a72d63df2L,0xb9876d4b91c4f408L, + 0x1ce184739e229b2dL } }, + /* 22 << 21 */ + { { 0x4950759783abdb4aL,0x850fbcb6dee84b18L,0x6325236e609e67dcL, + 0x04d831d99336c6d8L }, + { 0x8deaae3bfa12d45dL,0xe425f8ce4746e246L,0x8004c17524f5f31eL, + 0xaca16d8fad62c3b7L } }, + /* 23 << 21 */ + { { 0x0dc15a6a9152f934L,0xf1235e5ded0e12c1L,0xc33c06ecda477dacL, + 0x76be8732b2ea0006L }, + { 0xcf3f78310c0cd313L,0x3c524553a614260dL,0x31a756f8cab22d15L, + 0x03ee10d177827a20L } }, + /* 24 << 21 */ + { { 0xd1e059b21994ef20L,0x2a653b69638ae318L,0x70d5eb582f699010L, + 0x279739f709f5f84aL }, + { 0x5da4663c8b799336L,0xfdfdf14d203c37ebL,0x32d8a9dca1dbfb2dL, + 0xab40cff077d48f9bL } }, + /* 25 << 21 */ + { { 0xc018b383d20b42d5L,0xf9a810ef9f78845fL,0x40af3753bdba9df0L, + 0xb90bdcfc131dfdf9L }, + { 0x18720591f01ab782L,0xc823f2116af12a88L,0xa51b80f30dc14401L, + 0xde248f77fb2dfbe3L } }, + /* 26 << 21 */ + { { 0xef5a44e50cafe751L,0x73997c9cd4dcd221L,0x32fd86d1de854024L, + 0xd5b53adca09b84bbL }, + { 0x008d7a11dcedd8d1L,0x406bd1c874b32c84L,0x5d4472ff05dde8b1L, + 0x2e25f2cdfce2b32fL } }, + /* 27 << 21 */ + { { 0xbec0dd5e29dfc254L,0x4455fcf62b98b267L,0x0b4d43a5c72df2adL, + 0xea70e6be48a75397L }, + { 0x2aad61695820f3bfL,0xf410d2dd9e37f68fL,0x70fb7dba7be5ac83L, + 0x636bb64536ec3eecL } }, + /* 28 << 21 */ + { { 0x27104ea39754e21cL,0xbc87a3e68d63c373L,0x483351d74109db9aL, + 0x0fa724e360134da7L }, + { 0x9ff44c29b0720b16L,0x2dd0cf1306aceeadL,0x5942758ce26929a6L, + 0x96c5db92b766a92bL } }, + /* 29 << 21 */ + { { 0xcec7d4c05f18395eL,0xd3f227441f80d032L,0x7a68b37acb86075bL, + 0x074764ddafef92dbL }, + { 0xded1e9507bc7f389L,0xc580c850b9756460L,0xaeeec2a47da48157L, + 0x3f0b4e7f82c587b3L } }, + /* 30 << 21 */ + { { 0x231c6de8a9f19c53L,0x5717bd736974e34eL,0xd9e1d216f1508fa9L, + 0x9f112361dadaa124L }, + { 0x80145e31823b7348L,0x4dd8f0d5ac634069L,0xe3d82fc72297c258L, + 0x276fcfee9cee7431L } }, + /* 31 << 21 */ + { { 0x8eb61b5e2bc0aea9L,0x4f668fd5de329431L,0x03a32ab138e4b87eL, + 0xe137451773d0ef0bL }, + { 0x1a46f7e6853ac983L,0xc3bdf42e68e78a57L,0xacf207852ea96dd1L, + 0xa10649b9f1638460L } }, + /* 32 << 21 */ + { { 0xf2369f0b879fbbedL,0x0ff0ae86da9d1869L,0x5251d75956766f45L, + 0x4984d8c02be8d0fcL }, + { 0x7ecc95a6d21008f0L,0x29bd54a03a1a1c49L,0xab9828c5d26c50f3L, + 0x32c0087c51d0d251L } }, + /* 33 << 21 */ + { { 0x9bac3ce60c1cdb26L,0xcd94d947557ca205L,0x1b1bd5989db1fdcdL, + 0x0eda0108a3d8b149L }, + { 0x9506661056152fccL,0xc2f037e6e7192b33L,0xdeffb41ac92e05a4L, + 0x1105f6c2c2f6c62eL } }, + /* 34 << 21 */ + { { 0x68e735008733913cL,0xcce861633f3adc40L,0xf407a94238a278e9L, + 0xd13c1b9d2ab21292L }, + { 0x93ed7ec71c74cf5cL,0x8887dc48f1a4c1b4L,0x3830ff304b3a11f1L, + 0x358c5a3c58937cb6L } }, + /* 35 << 21 */ + { { 0x027dc40489022829L,0x40e939773b798f79L,0x90ad333738be6eadL, + 0x9c23f6bcf34c0a5dL }, + { 0xd1711a35fbffd8bbL,0x60fcfb491949d3ddL,0x09c8ef4b7825d93aL, + 0x24233cffa0a8c968L } }, + /* 36 << 21 */ + { { 0x67ade46ce6d982afL,0xebb6bf3ee7544d7cL,0xd6b9ba763d8bd087L, + 0x46fe382d4dc61280L }, + { 0xbd39a7e8b5bdbd75L,0xab381331b8f228feL,0x0709a77cce1c4300L, + 0x6a247e56f337ceacL } }, + /* 37 << 21 */ + { { 0x8f34f21b636288beL,0x9dfdca74c8a7c305L,0x6decfd1bea919e04L, + 0xcdf2688d8e1991f8L }, + { 0xe607df44d0f8a67eL,0xd985df4b0b58d010L,0x57f834c50c24f8f4L, + 0xe976ef56a0bf01aeL } }, + /* 38 << 21 */ + { { 0x536395aca1c32373L,0x351027aa734c0a13L,0xd2f1b5d65e6bd5bcL, + 0x2b539e24223debedL }, + { 0xd4994cec0eaa1d71L,0x2a83381d661dcf65L,0x5f1aed2f7b54c740L, + 0x0bea3fa5d6dda5eeL } }, + /* 39 << 21 */ + { { 0x9d4fb68436cc6134L,0x8eb9bbf3c0a443ddL,0xfc500e2e383b7d2aL, + 0x7aad621c5b775257L }, + { 0x69284d740a8f7cc0L,0xe820c2ce07562d65L,0xbf9531b9499758eeL, + 0x73e95ca56ee0cc2dL } }, + /* 40 << 21 */ + { { 0xf61790abfbaf50a5L,0xdf55e76b684e0750L,0xec516da7f176b005L, + 0x575553bb7a2dddc7L }, + { 0x37c87ca3553afa73L,0x315f3ffc4d55c251L,0xe846442aaf3e5d35L, + 0x61b911496495ff28L } }, + /* 41 << 21 */ + { { 0x23cc95d3fa326dc3L,0x1df4da1f18fc2ceaL,0x24bf9adcd0a37d59L, + 0xb6710053320d6e1eL }, + { 0x96f9667e618344d1L,0xcc7ce042a06445afL,0xa02d8514d68dbc3aL, + 0x4ea109e4280b5a5bL } }, + /* 42 << 21 */ + { { 0x5741a7acb40961bfL,0x4ada59376aa56bfaL,0x7feb914502b765d1L, + 0x561e97bee6ad1582L }, + { 0xbbc4a5b6da3982f5L,0x0c2659edb546f468L,0xb8e7e6aa59612d20L, + 0xd83dfe20ac19e8e0L } }, + /* 43 << 21 */ + { { 0x8530c45fb835398cL,0x6106a8bfb38a41c2L,0x21e8f9a635f5dcdbL, + 0x39707137cae498edL }, + { 0x70c23834d8249f00L,0x9f14b58fab2537a0L,0xd043c3655f61c0c2L, + 0xdc5926d609a194a7L } }, + /* 44 << 21 */ + { { 0xddec03398e77738aL,0xd07a63effba46426L,0x2e58e79cee7f6e86L, + 0xe59b0459ff32d241L }, + { 0xc5ec84e520fa0338L,0x97939ac8eaff5aceL,0x0310a4e3b4a38313L, + 0x9115fba28f9d9885L } }, + /* 45 << 21 */ + { { 0x8dd710c25fadf8c3L,0x66be38a2ce19c0e2L,0xd42a279c4cfe5022L, + 0x597bb5300e24e1b8L }, + { 0x3cde86b7c153ca7fL,0xa8d30fb3707d63bdL,0xac905f92bd60d21eL, + 0x98e7ffb67b9a54abL } }, + /* 46 << 21 */ + { { 0xd7147df8e9726a30L,0xb5e216ffafce3533L,0xb550b7992ff1ec40L, + 0x6b613b87a1e953fdL }, + { 0x87b88dba792d5610L,0x2ee1270aa190fbe1L,0x02f4e2dc2ef581daL, + 0x016530e4eff82a95L } }, + /* 47 << 21 */ + { { 0xcbb93dfd8fd6ee89L,0x16d3d98646848fffL,0x600eff241da47adfL, + 0x1b9754a00ad47a71L }, + { 0x8f9266df70c33b98L,0xaadc87aedf34186eL,0x0d2ce8e14ad24132L, + 0x8a47cbfc19946ebaL } }, + /* 48 << 21 */ + { { 0x47feeb6662b5f3afL,0xcefab5610abb3734L,0x449de60e19f35cb1L, + 0x39f8db14157f0eb9L }, + { 0xffaecc5b3c61bfd6L,0xa5a4d41d41216703L,0x7f8fabed224e1cc2L, + 0x0d5a8186871ad953L } }, + /* 49 << 21 */ + { { 0xf10774f7d22da9a9L,0x45b8a678cc8a9b0dL,0xd9c2e722bdc32cffL, + 0xbf71b5f5337202a5L }, + { 0x95c57f2f69fc4db9L,0xb6dad34c765d01e1L,0x7e0bd13fcb904635L, + 0x61751253763a588cL } }, + /* 50 << 21 */ + { { 0xd85c299781af2c2dL,0xc0f7d9c481b9d7daL,0x838a34ae08533e8dL, + 0x15c4cb08311d8311L }, + { 0x97f832858e121e14L,0xeea7dc1e85000a5fL,0x0c6059b65d256274L, + 0xec9beaceb95075c0L } }, + /* 51 << 21 */ + { { 0x173daad71df97828L,0xbf851cb5a8937877L,0xb083c59401646f3cL, + 0x3bad30cf50c6d352L }, + { 0xfeb2b202496bbceaL,0x3cf9fd4f18a1e8baL,0xd26de7ff1c066029L, + 0x39c81e9e4e9ed4f8L } }, + /* 52 << 21 */ + { { 0xd8be0cb97b390d35L,0x01df2bbd964aab27L,0x3e8c1a65c3ef64f8L, + 0x567291d1716ed1ddL }, + { 0x95499c6c5f5406d3L,0x71fdda395ba8e23fL,0xcfeb320ed5096eceL, + 0xbe7ba92bca66dd16L } }, + /* 53 << 21 */ + { { 0x4608d36bc6fb5a7dL,0xe3eea15a6d2dd0e0L,0x75b0a3eb8f97a36aL, + 0xf59814cc1c83de1eL }, + { 0x56c9c5b01c33c23fL,0xa96c1da46faa4136L,0x46bf2074de316551L, + 0x3b866e7b1f756c8fL } }, + /* 54 << 21 */ + { { 0x727727d81495ed6bL,0xb2394243b682dce7L,0x8ab8454e758610f3L, + 0xc243ce84857d72a4L }, + { 0x7b320d71dbbf370fL,0xff9afa3778e0f7caL,0x0119d1e0ea7b523fL, + 0xb997f8cb058c7d42L } }, + /* 55 << 21 */ + { { 0x285bcd2a37bbb184L,0x51dcec49a45d1fa6L,0x6ade3b64e29634cbL, + 0x080c94a726b86ef1L }, + { 0xba583db12283fbe3L,0x902bddc85a9315edL,0x07c1ccb386964becL, + 0x78f4eacfb6258301L } }, + /* 56 << 21 */ + { { 0x4bdf3a4956f90823L,0xba0f5080741d777bL,0x091d71c3f38bf760L, + 0x9633d50f9b625b02L }, + { 0x03ecb743b8c9de61L,0xb47512545de74720L,0x9f9defc974ce1cb2L, + 0x774a4f6a00bd32efL } }, + /* 57 << 21 */ + { { 0xaca385f773848f22L,0x53dad716f3f8558eL,0xab7b34b093c471f9L, + 0xf530e06919644bc7L }, + { 0x3d9fb1ffdd59d31aL,0x4382e0df08daa795L,0x165c6f4bd5cc88d7L, + 0xeaa392d54a18c900L } }, + /* 58 << 21 */ + { { 0x94203c67648024eeL,0x188763f28c2fabcdL,0xa80f87acbbaec835L, + 0x632c96e0f29d8d54L }, + { 0x29b0a60e4c00a95eL,0x2ef17f40e011e9faL,0xf6c0e1d115b77223L, + 0xaaec2c6214b04e32L } }, + /* 59 << 21 */ + { { 0xd35688d83d84e58cL,0x2af5094c958571dbL,0x4fff7e19760682a6L, + 0x4cb27077e39a407cL }, + { 0x0f59c5474ff0e321L,0x169f34a61b34c8ffL,0x2bff109652bc1ba7L, + 0xa25423b783583544L } }, + /* 60 << 21 */ + { { 0x5d55d5d50ac8b782L,0xff6622ec2db3c892L,0x48fce7416b8bb642L, + 0x31d6998c69d7e3dcL }, + { 0xdbaf8004cadcaed0L,0x801b0142d81d053cL,0x94b189fc59630ec6L, + 0x120e9934af762c8eL } }, + /* 61 << 21 */ + { { 0x53a29aa4fdc6a404L,0x19d8e01ea1909948L,0x3cfcabf1d7e89681L, + 0x3321a50d4e132d37L }, + { 0xd0496863e9a86111L,0x8c0cde6106a3bc65L,0xaf866c49fc9f8eefL, + 0x2066350eff7f5141L } }, + /* 62 << 21 */ + { { 0x4f8a4689e56ddfbdL,0xea1b0c07fe32983aL,0x2b317462873cb8cbL, + 0x658deddc2d93229fL }, + { 0x65efaf4d0f64ef58L,0xfe43287d730cc7a8L,0xaebc0c723d047d70L, + 0x92efa539d92d26c9L } }, + /* 63 << 21 */ + { { 0x06e7845794b56526L,0x415cb80f0961002dL,0x89e5c56576dcb10fL, + 0x8bbb6982ff9259feL }, + { 0x4fe8795b9abc2668L,0xb5d4f5341e678fb1L,0x6601f3be7b7da2b9L, + 0x98da59e2a13d6805L } }, + /* 64 << 21 */ + { { 0x190d8ea601799a52L,0xa20cec41b86d2952L,0x3062ffb27fff2a7cL, + 0x741b32e579f19d37L }, + { 0xf80d81814eb57d47L,0x7a2d0ed416aef06bL,0x09735fb01cecb588L, + 0x1641caaac6061f5bL } }, + /* 0 << 28 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 28 */ + { { 0x7f99824f20151427L,0x206828b692430206L,0xaa9097d7e1112357L, + 0xacf9a2f209e414ecL }, + { 0xdbdac9da27915356L,0x7e0734b7001efee3L,0x54fab5bbd2b288e2L, + 0x4c630fc4f62dd09cL } }, + /* 2 << 28 */ + { { 0x8537107a1ac2703bL,0xb49258d86bc857b5L,0x57df14debcdaccd1L, + 0x24ab68d7c4ae8529L }, + { 0x7ed8b5d4734e59d0L,0x5f8740c8c495cc80L,0x84aedd5a291db9b3L, + 0x80b360f84fb995beL } }, + /* 3 << 28 */ + { { 0xae915f5d5fa067d1L,0x4134b57f9668960cL,0xbd3656d6a48edaacL, + 0xdac1e3e4fc1d7436L }, + { 0x674ff869d81fbb26L,0x449ed3ecb26c33d4L,0x85138705d94203e8L, + 0xccde538bbeeb6f4aL } }, + /* 4 << 28 */ + { { 0x55d5c68da61a76faL,0x598b441dca1554dcL,0xd39923b9773b279cL, + 0x33331d3c36bf9efcL }, + { 0x2d4c848e298de399L,0xcfdb8e77a1a27f56L,0x94c855ea57b8ab70L, + 0xdcdb9dae6f7879baL } }, + /* 5 << 28 */ + { { 0x7bdff8c2019f2a59L,0xb3ce5bb3cb4fbc74L,0xea907f688a9173ddL, + 0x6cd3d0d395a75439L }, + { 0x92ecc4d6efed021cL,0x09a9f9b06a77339aL,0x87ca6b157188c64aL, + 0x10c2996844899158L } }, + /* 6 << 28 */ + { { 0x5859a229ed6e82efL,0x16f338e365ebaf4eL,0x0cd313875ead67aeL, + 0x1c73d22854ef0bb4L }, + { 0x4cb5513174a5c8c7L,0x01cd29707f69ad6aL,0xa04d00dde966f87eL, + 0xd96fe4470b7b0321L } }, + /* 7 << 28 */ + { { 0x342ac06e88fbd381L,0x02cd4a845c35a493L,0xe8fa89de54f1bbcdL, + 0x341d63672575ed4cL }, + { 0xebe357fbd238202bL,0x600b4d1aa984ead9L,0xc35c9f4452436ea0L, + 0x96fe0a39a370751bL } }, + /* 8 << 28 */ + { { 0x4c4f07367f636a38L,0x9f943fb70e76d5cbL,0xb03510baa8b68b8bL, + 0xc246780a9ed07a1fL }, + { 0x3c0514156d549fc2L,0xc2953f31607781caL,0x955e2c69d8d95413L, + 0xb300fadc7bd282e3L } }, + /* 9 << 28 */ + { { 0x81fe7b5087e9189fL,0xdb17375cf42dda27L,0x22f7d896cf0a5904L, + 0xa0e57c5aebe348e6L }, + { 0xa61011d3f40e3c80L,0xb11893218db705c5L,0x4ed9309e50fedec3L, + 0xdcf14a104d6d5c1dL } }, + /* 10 << 28 */ + { { 0x056c265b55691342L,0xe8e0850491049dc7L,0x131329f5c9bae20aL, + 0x96c8b3e8d9dccdb4L }, + { 0x8c5ff838fb4ee6b4L,0xfc5a9aeb41e8ccf0L,0x7417b764fae050c6L, + 0x0953c3d700452080L } }, + /* 11 << 28 */ + { { 0x2137268238dfe7e8L,0xea417e152bb79d4bL,0x59641f1c76e7cf2dL, + 0x271e3059ea0bcfccL }, + { 0x624c7dfd7253ecbdL,0x2f552e254fca6186L,0xcbf84ecd4d866e9cL, + 0x73967709f68d4610L } }, + /* 12 << 28 */ + { { 0xa14b1163c27901b4L,0xfd9236e0899b8bf3L,0x42b091eccbc6da0aL, + 0xbb1dac6f5ad1d297L }, + { 0x80e61d53a91cf76eL,0x4110a412d31f1ee7L,0x2d87c3ba13efcf77L, + 0x1f374bb4df450d76L } }, + /* 13 << 28 */ + { { 0x5e78e2f20d188dabL,0xe3968ed0f4b885efL,0x46c0568e7314570fL, + 0x3161633801170521L }, + { 0x18e1e7e24f0c8afeL,0x4caa75ffdeea78daL,0x82db67f27c5d8a51L, + 0x36a44d866f505370L } }, + /* 14 << 28 */ + { { 0xd72c5bda0333974fL,0x5db516ae27a70146L,0x34705281210ef921L, + 0xbff17a8f0c9c38e5L }, + { 0x78f4814e12476da1L,0xc1e1661333c16980L,0x9e5b386f424d4bcaL, + 0x4c274e87c85740deL } }, + /* 15 << 28 */ + { { 0xb6a9b88d6c2f5226L,0x14d1b944550d7ca8L,0x580c85fc1fc41709L, + 0xc1da368b54c6d519L }, + { 0x2b0785ced5113cf7L,0x0670f6335a34708fL,0x46e2376715cc3f88L, + 0x1b480cfa50c72c8fL } }, + /* 16 << 28 */ + { { 0x202886024147519aL,0xd0981eac26b372f0L,0xa9d4a7caa785ebc8L, + 0xd953c50ddbdf58e9L }, + { 0x9d6361ccfd590f8fL,0x72e9626b44e6c917L,0x7fd9611022eb64cfL, + 0x863ebb7e9eb288f3L } }, + /* 17 << 28 */ + { { 0x6e6ab7616aca8ee7L,0x97d10b39d7b40358L,0x1687d3771e5feb0dL, + 0xc83e50e48265a27aL }, + { 0x8f75a9fec954b313L,0xcc2e8f47310d1f61L,0xf5ba81c56557d0e0L, + 0x25f9680c3eaf6207L } }, + /* 18 << 28 */ + { { 0xf95c66094354080bL,0x5225bfa57bf2fe1cL,0xc5c004e25c7d98faL, + 0x3561bf1c019aaf60L }, + { 0x5e6f9f17ba151474L,0xdec2f934b04f6ecaL,0x64e368a1269acb1eL, + 0x1332d9e40cdda493L } }, + /* 19 << 28 */ + { { 0x60d6cf69df23de05L,0x66d17da2009339a0L,0x9fcac9850a693923L, + 0xbcf057fced7c6a6dL }, + { 0xc3c5c8c5f0b5662cL,0x25318dd8dcba4f24L,0x60e8cb75082b69ffL, + 0x7c23b3ee1e728c01L } }, + /* 20 << 28 */ + { { 0x15e10a0a097e4403L,0xcb3d0a8619854665L,0x88d8e211d67d4826L, + 0xb39af66e0b9d2839L }, + { 0xa5f94588bd475ca8L,0xe06b7966c077b80bL,0xfedb1485da27c26cL, + 0xd290d33afe0fd5e0L } }, + /* 21 << 28 */ + { { 0xa40bcc47f34fb0faL,0xb4760cc81fb1ab09L,0x8fca0993a273bfe3L, + 0x13e4fe07f70b213cL }, + { 0x3bcdb992fdb05163L,0x8c484b110c2b19b6L,0x1acb815faaf2e3e2L, + 0xc6905935b89ff1b4L } }, + /* 22 << 28 */ + { { 0xb2ad6f9d586e74e1L,0x488883ad67b80484L,0x758aa2c7369c3ddbL, + 0x8ab74e699f9afd31L }, + { 0x10fc2d285e21beb1L,0x3484518a318c42f9L,0x377427dc53cf40c3L, + 0x9de0781a391bc1d9L } }, + /* 23 << 28 */ + { { 0x8faee858693807e1L,0xa38653274e81ccc7L,0x02c30ff26f835b84L, + 0xb604437b0d3d38d4L }, + { 0xb3fc8a985ca1823dL,0xb82f7ec903be0324L,0xee36d761cf684a33L, + 0x5a01df0e9f29bf7dL } }, + /* 24 << 28 */ + { { 0x686202f31306583dL,0x05b10da0437c622eL,0xbf9aaa0f076a7bc8L, + 0x25e94efb8f8f4e43L }, + { 0x8a35c9b7fa3dc26dL,0xe0e5fb9396ff03c5L,0xa77e3843ebc394ceL, + 0xcede65958361de60L } }, + /* 25 << 28 */ + { { 0xd27c22f6a1993545L,0xab01cc3624d671baL,0x63fa2877a169c28eL, + 0x925ef9042eb08376L }, + { 0x3b2fa3cf53aa0b32L,0xb27beb5b71c49d7aL,0xb60e1834d105e27fL, + 0xd60897884f68570dL } }, + /* 26 << 28 */ + { { 0x23094ce0d6fbc2acL,0x738037a1815ff551L,0xda73b1bb6bef119cL, + 0xdcf6c430eef506baL }, + { 0x00e4fe7be3ef104aL,0xebdd9a2c0a065628L,0x853a81c38792043eL, + 0x22ad6eceb3b59108L } }, + /* 27 << 28 */ + { { 0x9fb813c039cd297dL,0x8ec7e16e05bda5d9L,0x2834797c0d104b96L, + 0xcc11a2e77c511510L }, + { 0x96ca5a5396ee6380L,0x054c8655cea38742L,0xb5946852d54dfa7dL, + 0x97c422e71f4ab207L } }, + /* 28 << 28 */ + { { 0xbf9075090c22b540L,0x2cde42aab7c267d4L,0xba18f9ed5ab0d693L, + 0x3ba62aa66e4660d9L }, + { 0xb24bf97bab9ea96aL,0x5d039642e3b60e32L,0x4e6a45067c4d9bd5L, + 0x666c5b9e7ed4a6a4L } }, + /* 29 << 28 */ + { { 0xfa3fdcd98edbd7ccL,0x4660bb87c6ccd753L,0x9ae9082021e6b64fL, + 0x8a56a713b36bfb3fL }, + { 0xabfce0965726d47fL,0x9eed01b20b1a9a7fL,0x30e9cad44eb74a37L, + 0x7b2524cc53e9666dL } }, + /* 30 << 28 */ + { { 0x6a29683b8f4b002fL,0xc2200d7a41f4fc20L,0xcf3af47a3a338accL, + 0x6539a4fbe7128975L }, + { 0xcec31c14c33c7fcfL,0x7eb6799bc7be322bL,0x119ef4e96646f623L, + 0x7b7a26a554d7299bL } }, + /* 31 << 28 */ + { { 0xcb37f08d403f46f2L,0x94b8fc431a0ec0c7L,0xbb8514e3c332142fL, + 0xf3ed2c33e80d2a7aL }, + { 0x8d2080afb639126cL,0xf7b6be60e3553adeL,0x3950aa9f1c7e2b09L, + 0x847ff9586410f02bL } }, + /* 32 << 28 */ + { { 0x877b7cf5678a31b0L,0xd50301ae3998b620L,0x734257c5c00fb396L, + 0xf9fb18a004e672a6L }, + { 0xff8bd8ebe8758851L,0x1e64e4c65d99ba44L,0x4b8eaedf7dfd93b7L, + 0xba2f2a9804e76b8cL } }, + /* 33 << 28 */ + { { 0x7d790cbae8053433L,0xc8e725a03d2c9585L,0x58c5c476cdd8f5edL, + 0xd106b952efa9fe1dL }, + { 0x3c5c775b0eff13a9L,0x242442bae057b930L,0xe9f458d4c9b70cbdL, + 0x69b71448a3cdb89aL } }, + /* 34 << 28 */ + { { 0x41ee46f60e2ed742L,0x573f104540067493L,0xb1e154ff9d54c304L, + 0x2ad0436a8d3a7502L }, + { 0xee4aaa2d431a8121L,0xcd38b3ab886f11edL,0x57d49ea6034a0eb7L, + 0xd2b773bdf7e85e58L } }, + /* 35 << 28 */ + { { 0x4a559ac49b5c1f14L,0xc444be1a3e54df2bL,0x13aad704eda41891L, + 0xcd927bec5eb5c788L }, + { 0xeb3c8516e48c8a34L,0x1b7ac8124b546669L,0x1815f896594df8ecL, + 0x87c6a79c79227865L } }, + /* 36 << 28 */ + { { 0xae02a2f09b56ddbdL,0x1339b5ac8a2f1cf3L,0xf2b569c7839dff0dL, + 0xb0b9e864fee9a43dL }, + { 0x4ff8ca4177bb064eL,0x145a2812fd249f63L,0x3ab7beacf86f689aL, + 0x9bafec2701d35f5eL } }, + /* 37 << 28 */ + { { 0x28054c654265aa91L,0xa4b18304035efe42L,0x6887b0e69639dec7L, + 0xf4b8f6ad3d52aea5L }, + { 0xfb9293cc971a8a13L,0x3f159e5d4c934d07L,0x2c50e9b109acbc29L, + 0x08eb65e67154d129L } }, + /* 38 << 28 */ + { { 0x4feff58930b75c3eL,0x0bb82fe294491c93L,0xd8ac377a89af62bbL, + 0xd7b514909685e49fL }, + { 0xabca9a7b04497f19L,0x1b35ed0a1a7ad13fL,0x6b601e213ec86ed6L, + 0xda91fcb9ce0c76f1L } }, + /* 39 << 28 */ + { { 0x9e28507bd7ab27e1L,0x7c19a55563945b7bL,0x6b43f0a1aafc9827L, + 0x443b4fbd3aa55b91L }, + { 0x962b2e656962c88fL,0x139da8d4ce0db0caL,0xb93f05dd1b8d6c4fL, + 0x779cdff7180b9824L } }, + /* 40 << 28 */ + { { 0xbba23fddae57c7b7L,0x345342f21b932522L,0xfd9c80fe556d4aa3L, + 0xa03907ba6525bb61L }, + { 0x38b010e1ff218933L,0xc066b654aa52117bL,0x8e14192094f2e6eaL, + 0x66a27dca0d32f2b2L } }, + /* 41 << 28 */ + { { 0x69c7f993048b3717L,0xbf5a989ab178ae1cL,0x49fa9058564f1d6bL, + 0x27ec6e15d31fde4eL }, + { 0x4cce03737276e7fcL,0x64086d7989d6bf02L,0x5a72f0464ccdd979L, + 0x909c356647775631L } }, + /* 42 << 28 */ + { { 0x1c07bc6b75dd7125L,0xb4c6bc9787a0428dL,0x507ece52fdeb6b9dL, + 0xfca56512b2c95432L }, + { 0x15d97181d0e8bd06L,0x384dd317c6bb46eaL,0x5441ea203952b624L, + 0xbcf70dee4e7dc2fbL } }, + /* 43 << 28 */ + { { 0x372b016e6628e8c3L,0x07a0d667b60a7522L,0xcf05751b0a344ee2L, + 0x0ec09a48118bdeecL }, + { 0x6e4b3d4ed83dce46L,0x43a6316d99d2fc6eL,0xa99d898956cf044cL, + 0x7c7f4454ae3e5fb7L } }, + /* 44 << 28 */ + { { 0xb2e6b121fbabbe92L,0x281850fbe1330076L,0x093581ec97890015L, + 0x69b1dded75ff77f5L }, + { 0x7cf0b18fab105105L,0x953ced31a89ccfefL,0x3151f85feb914009L, + 0x3c9f1b8788ed48adL } }, + /* 45 << 28 */ + { { 0xc9aba1a14a7eadcbL,0x928e7501522e71cfL,0xeaede7273a2e4f83L, + 0x467e10d11ce3bbd3L }, + { 0xf3442ac3b955dcf0L,0xba96307dd3d5e527L,0xf763a10efd77f474L, + 0x5d744bd06a6e1ff0L } }, + /* 46 << 28 */ + { { 0xd287282aa777899eL,0xe20eda8fd03f3cdeL,0x6a7e75bb50b07d31L, + 0x0b7e2a946f379de4L }, + { 0x31cb64ad19f593cfL,0x7b1a9e4f1e76ef1dL,0xe18c9c9db62d609cL, + 0x439bad6de779a650L } }, + /* 47 << 28 */ + { { 0x219d9066e032f144L,0x1db632b8e8b2ec6aL,0xff0d0fd4fda12f78L, + 0x56fb4c2d2a25d265L }, + { 0x5f4e2ee1255a03f1L,0x61cd6af2e96af176L,0xe0317ba8d068bc97L, + 0x927d6bab264b988eL } }, + /* 48 << 28 */ + { { 0xa18f07e0e90fb21eL,0x00fd2b80bba7fca1L,0x20387f2795cd67b5L, + 0x5b89a4e7d39707f7L }, + { 0x8f83ad3f894407ceL,0xa0025b946c226132L,0xc79563c7f906c13bL, + 0x5f548f314e7bb025L } }, + /* 49 << 28 */ + { { 0x2b4c6b8feac6d113L,0xa67e3f9c0e813c76L,0x3982717c3fe1f4b9L, + 0x5886581926d8050eL }, + { 0x99f3640cf7f06f20L,0xdc6102162a66ebc2L,0x52f2c175767a1e08L, + 0x05660e1a5999871bL } }, + /* 50 << 28 */ + { { 0x6b0f17626d3c4693L,0xf0e7d62737ed7beaL,0xc51758c7b75b226dL, + 0x40a886281f91613bL }, + { 0x889dbaa7bbb38ce0L,0xe0404b65bddcad81L,0xfebccd3a8bc9671fL, + 0xfbf9a357ee1f5375L } }, + /* 51 << 28 */ + { { 0x5dc169b028f33398L,0xb07ec11d72e90f65L,0xae7f3b4afaab1eb1L, + 0xd970195e5f17538aL }, + { 0x52b05cbe0181e640L,0xf5debd622643313dL,0x761481545df31f82L, + 0x23e03b333a9e13c5L } }, + /* 52 << 28 */ + { { 0xff7589494fde0c1fL,0xbf8a1abee5b6ec20L,0x702278fb87e1db6cL, + 0xc447ad7a35ed658fL }, + { 0x48d4aa3803d0ccf2L,0x80acb338819a7c03L,0x9bc7c89e6e17ceccL, + 0x46736b8b03be1d82L } }, + /* 53 << 28 */ + { { 0xd65d7b60c0432f96L,0xddebe7a3deb5442fL,0x79a253077dff69a2L, + 0x37a56d9402cf3122L }, + { 0x8bab8aedf2350d0aL,0x13c3f276037b0d9aL,0xc664957c44c65caeL, + 0x88b44089c2e71a88L } }, + /* 54 << 28 */ + { { 0xdb88e5a35cb02664L,0x5d4c0bf18686c72eL,0xea3d9b62a682d53eL, + 0x9b605ef40b2ad431L }, + { 0x71bac202c69645d0L,0xa115f03a6a1b66e7L,0xfe2c563a158f4dc4L, + 0xf715b3a04d12a78cL } }, + /* 55 << 28 */ + { { 0x8f7f0a48d413213aL,0x2035806dc04becdbL,0xecd34a995d8587f5L, + 0x4d8c30799f6d3a71L }, + { 0x1b2a2a678d95a8f6L,0xc58c9d7df2110d0dL,0xdeee81d5cf8fba3fL, + 0xa42be3c00c7cdf68L } }, + /* 56 << 28 */ + { { 0x2126f742d43b5eaaL,0x054a0766dfa59b85L,0x9d0d5e36126bfd45L, + 0xa1f8fbd7384f8a8fL }, + { 0x317680f5d563fcccL,0x48ca5055f280a928L,0xe00b81b227b578cfL, + 0x10aad9182994a514L } }, + /* 57 << 28 */ + { { 0xd9e07b62b7bdc953L,0x9f0f6ff25bc086ddL,0x09d1ccff655eee77L, + 0x45475f795bef7df1L }, + { 0x3faa28fa86f702ccL,0x92e609050f021f07L,0xe9e629687f8fa8c6L, + 0xbd71419af036ea2cL } }, + /* 58 << 28 */ + { { 0x171ee1cc6028da9aL,0x5352fe1ac251f573L,0xf8ff236e3fa997f4L, + 0xd831b6c9a5749d5fL }, + { 0x7c872e1de350e2c2L,0xc56240d91e0ce403L,0xf9deb0776974f5cbL, + 0x7d50ba87961c3728L } }, + /* 59 << 28 */ + { { 0xd6f894265a3a2518L,0xcf817799c6303d43L,0x510a0471619e5696L, + 0xab049ff63a5e307bL }, + { 0xe4cdf9b0feb13ec7L,0xd5e971179d8ff90cL,0xf6f64d069afa96afL, + 0x00d0bf5e9d2012a2L } }, + /* 60 << 28 */ + { { 0xe63f301f358bcdc0L,0x07689e990a9d47f8L,0x1f689e2f4f43d43aL, + 0x4d542a1690920904L }, + { 0xaea293d59ca0a707L,0xd061fe458ac68065L,0x1033bf1b0090008cL, + 0x29749558c08a6db6L } }, + /* 61 << 28 */ + { { 0x74b5fc59c1d5d034L,0xf712e9f667e215e0L,0xfd520cbd860200e6L, + 0x0229acb43ea22588L }, + { 0x9cd1e14cfff0c82eL,0x87684b6259c69e73L,0xda85e61c96ccb989L, + 0x2d5dbb02a3d06493L } }, + /* 62 << 28 */ + { { 0xf22ad33ae86b173cL,0xe8e41ea5a79ff0e3L,0x01d2d725dd0d0c10L, + 0x31f39088032d28f9L }, + { 0x7b3f71e17829839eL,0x0cf691b44502ae58L,0xef658dbdbefc6115L, + 0xa5cd6ee5b3ab5314L } }, + /* 63 << 28 */ + { { 0x206c8d7b5f1d2347L,0x794645ba4cc2253aL,0xd517d8ff58389e08L, + 0x4fa20dee9f847288L }, + { 0xeba072d8d797770aL,0x7360c91dbf429e26L,0x7200a3b380af8279L, + 0x6a1c915082dadce3L } }, + /* 64 << 28 */ + { { 0x0ee6d3a7c35d8794L,0x042e65580356bae5L,0x9f59698d643322fdL, + 0x9379ae1550a61967L }, + { 0x64b9ae62fcc9981eL,0xaed3d6316d2934c6L,0x2454b3025e4e65ebL, + 0xab09f647f9950428L } }, + /* 0 << 35 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 35 */ + { { 0xb2083a1222248accL,0x1f6ec0ef3264e366L,0x5659b7045afdee28L, + 0x7a823a40e6430bb5L }, + { 0x24592a04e1900a79L,0xcde09d4ac9ee6576L,0x52b6463f4b5ea54aL, + 0x1efe9ed3d3ca65a7L } }, + /* 2 << 35 */ + { { 0xe27a6dbe305406ddL,0x8eb7dc7fdd5d1957L,0xf54a6876387d4d8fL, + 0x9c479409c7762de4L }, + { 0xbe4d5b5d99b30778L,0x25380c566e793682L,0x602d37f3dac740e3L, + 0x140deabe1566e4aeL } }, + /* 3 << 35 */ + { { 0x4481d067afd32acfL,0xd8f0fccae1f71ccfL,0xd208dd0cb596f2daL, + 0xd049d7309aad93f9L }, + { 0xc79f263d42ab580eL,0x09411bb123f707b4L,0x8cfde1ff835e0edaL, + 0x7270749090f03402L } }, + /* 4 << 35 */ + { { 0xeaee6126c49a861eL,0x024f3b65e14f0d06L,0x51a3f1e8c69bfc17L, + 0xc3c3a8e9a7686381L }, + { 0x3400752cb103d4c8L,0x02bc46139218b36bL,0xc67f75eb7651504aL, + 0xd6848b56d02aebfaL } }, + /* 5 << 35 */ + { { 0xbd9802e6c30fa92bL,0x5a70d96d9a552784L,0x9085c4ea3f83169bL, + 0xfa9423bb06908228L }, + { 0x2ffebe12fe97a5b9L,0x85da604971b99118L,0x9cbc2f7f63178846L, + 0xfd96bc709153218eL } }, + /* 6 << 35 */ + { { 0x958381db1782269bL,0xae34bf792597e550L,0xbb5c60645f385153L, + 0x6f0e96afe3088048L }, + { 0xbf6a021577884456L,0xb3b5688c69310ea7L,0x17c9429504fad2deL, + 0xe020f0e517896d4dL } }, + /* 7 << 35 */ + { { 0x730ba0ab0976505fL,0x567f6813095e2ec5L,0x470620106331ab71L, + 0x72cfa97741d22b9fL }, + { 0x33e55ead8a2373daL,0xa8d0d5f47ba45a68L,0xba1d8f9c03029d15L, + 0x8f34f1ccfc55b9f3L } }, + /* 8 << 35 */ + { { 0xcca4428dbbe5a1a9L,0x8187fd5f3126bd67L,0x0036973a48105826L, + 0xa39b6663b8bd61a0L }, + { 0x6d42deef2d65a808L,0x4969044f94636b19L,0xf611ee47dd5d564cL, + 0x7b2f3a49d2873077L } }, + /* 9 << 35 */ + { { 0x94157d45300eb294L,0x2b2a656e169c1494L,0xc000dd76d3a47aa9L, + 0xa2864e4fa6243ea4L }, + { 0x82716c47db89842eL,0x12dfd7d761479fb7L,0x3b9a2c56e0b2f6dcL, + 0x46be862ad7f85d67L } }, + /* 10 << 35 */ + { { 0x03b0d8dd0f82b214L,0x460c34f9f103cbc6L,0xf32e5c0318d79e19L, + 0x8b8888baa84117f8L }, + { 0x8f3c37dcc0722677L,0x10d21be91c1c0f27L,0xd47c8468e0f7a0c6L, + 0x9bf02213adecc0e0L } }, + /* 11 << 35 */ + { { 0x0baa7d1242b48b99L,0x1bcb665d48424096L,0x8b847cd6ebfb5cfbL, + 0x87c2ae569ad4d10dL }, + { 0xf1cbb1220de36726L,0xe7043c683fdfbd21L,0x4bd0826a4e79d460L, + 0x11f5e5984bd1a2cbL } }, + /* 12 << 35 */ + { { 0x97554160b7fe7b6eL,0x7d16189a400a3fb2L,0xd73e9beae328ca1eL, + 0x0dd04b97e793d8ccL }, + { 0xa9c83c9b506db8ccL,0x5cd47aaecf38814cL,0x26fc430db64b45e6L, + 0x079b5499d818ea84L } }, + /* 13 << 35 */ + { { 0xebb01102c1c24a3bL,0xca24e5681c161c1aL,0x103eea6936f00a4aL, + 0x9ad76ee876176c7bL }, + { 0x97451fc2538e0ff7L,0x94f898096604b3b0L,0x6311436e3249cfd7L, + 0x27b4a7bd41224f69L } }, + /* 14 << 35 */ + { { 0x03b5d21ae0ac2941L,0x279b0254c2d31937L,0x3307c052cac992d0L, + 0x6aa7cb92efa8b1f3L }, + { 0x5a1825800d37c7a5L,0x13380c37342d5422L,0x92ac2d66d5d2ef92L, + 0x035a70c9030c63c6L } }, + /* 15 << 35 */ + { { 0xc16025dd4ce4f152L,0x1f419a71f9df7c06L,0x6d5b221491e4bb14L, + 0xfc43c6cc839fb4ceL }, + { 0x49f06591925d6b2dL,0x4b37d9d362186598L,0x8c54a971d01b1629L, + 0xe1a9c29f51d50e05L } }, + /* 16 << 35 */ + { { 0x5109b78571ba1861L,0x48b22d5cd0c8f93dL,0xe8fa84a78633bb93L, + 0x53fba6ba5aebbd08L }, + { 0x7ff27df3e5eea7d8L,0x521c879668ca7158L,0xb9d5133bce6f1a05L, + 0x2d50cd53fd0ebee4L } }, + /* 17 << 35 */ + { { 0xc82115d6c5a3ef16L,0x993eff9dba079221L,0xe4da2c5e4b5da81cL, + 0x9a89dbdb8033fd85L }, + { 0x60819ebf2b892891L,0x53902b215d14a4d5L,0x6ac35051d7fda421L, + 0xcc6ab88561c83284L } }, + /* 18 << 35 */ + { { 0x14eba133f74cff17L,0x240aaa03ecb813f2L,0xcfbb65406f665beeL, + 0x084b1fe4a425ad73L }, + { 0x009d5d16d081f6a6L,0x35304fe8eef82c90L,0xf20346d5aa9eaa22L, + 0x0ada9f07ac1c91e3L } }, + /* 19 << 35 */ + { { 0xa6e21678968a6144L,0x54c1f77c07b31a1eL,0xd6bb787e5781fbe1L, + 0x61bd2ee0e31f1c4aL }, + { 0xf25aa1e9781105fcL,0x9cf2971f7b2f8e80L,0x26d15412cdff919bL, + 0x01db4ebe34bc896eL } }, + /* 20 << 35 */ + { { 0x7d9b3e23b40df1cfL,0x5933737394e971b4L,0xbf57bd14669cf921L, + 0x865daedf0c1a1064L }, + { 0x3eb70bd383279125L,0xbc3d5b9f34ecdaabL,0x91e3ed7e5f755cafL, + 0x49699f54d41e6f02L } }, + /* 21 << 35 */ + { { 0x185770e1d4a7a15bL,0x08f3587aeaac87e7L,0x352018db473133eaL, + 0x674ce71904fd30fcL }, + { 0x7b8d9835088b3e0eL,0x7a0356a95d0d47a1L,0x9d9e76596474a3c4L, + 0x61ea48a7ff66966cL } }, + /* 22 << 35 */ + { { 0x304177580f3e4834L,0xfdbb21c217a9afcbL,0x756fa17f2f9a67b3L, + 0x2a6b2421a245c1a8L }, + { 0x64be27944af02291L,0xade465c62a5804feL,0x8dffbd39a6f08fd7L, + 0xc4efa84caa14403bL } }, + /* 23 << 35 */ + { { 0xa1b91b2a442b0f5cL,0xb748e317cf997736L,0x8d1b62bfcee90e16L, + 0x907ae2710b2078c0L }, + { 0xdf31534b0c9bcdddL,0x043fb05439adce83L,0x99031043d826846aL, + 0x61a9c0d6b144f393L } }, + /* 24 << 35 */ + { { 0xdab4804647718427L,0xdf17ff9b6e830f8bL,0x408d7ee8e49a1347L, + 0x6ac71e2391c1d4aeL }, + { 0xc8cbb9fd1defd73cL,0x19840657bbbbfec5L,0x39db1cb59e7ef8eaL, + 0x78aa829664105f30L } }, + /* 25 << 35 */ + { { 0xa3d9b7f0a3738c29L,0x0a2f235abc3250a3L,0x55e506f6445e4cafL, + 0x0974f73d33475f7aL }, + { 0xd37dbba35ba2f5a8L,0x542c6e636af40066L,0x26d99b53c5d73e2cL, + 0x06060d7d6c3ca33eL } }, + /* 26 << 35 */ + { { 0xcdbef1c2065fef4aL,0x77e60f7dfd5b92e3L,0xd7c549f026708350L, + 0x201b3ad034f121bfL }, + { 0x5fcac2a10334fc14L,0x8a9a9e09344552f6L,0x7dd8a1d397653082L, + 0x5fc0738f79d4f289L } }, + /* 27 << 35 */ + { { 0x787d244d17d2d8c3L,0xeffc634570830684L,0x5ddb96dde4f73ae5L, + 0x8efb14b1172549a5L }, + { 0x6eb73eee2245ae7aL,0xbca4061eea11f13eL,0xb577421d30b01f5dL, + 0xaa688b24782e152cL } }, + /* 28 << 35 */ + { { 0x67608e71bd3502baL,0x4ef41f24b4de75a0L,0xb08dde5efd6125e5L, + 0xde484825a409543fL }, + { 0x1f198d9865cc2295L,0x428a37716e0edfa2L,0x4f9697a2adf35fc7L, + 0x01a43c79f7cac3c7L } }, + /* 29 << 35 */ + { { 0xb05d70590fd3659aL,0x8927f30cbb7f2d9aL,0x4023d1ac8cf984d3L, + 0x32125ed302897a45L }, + { 0xfb572dad3d414205L,0x73000ef2e3fa82a9L,0x4c0868e9f10a5581L, + 0x5b61fc676b0b3ca5L } }, + /* 30 << 35 */ + { { 0xc1258d5b7cae440cL,0x21c08b41402b7531L,0xf61a8955de932321L, + 0x3568faf82d1408afL }, + { 0x71b15e999ecf965bL,0xf14ed248e917276fL,0xc6f4caa1820cf9e2L, + 0x681b20b218d83c7eL } }, + /* 31 << 35 */ + { { 0x6cde738dc6c01120L,0x71db0813ae70e0dbL,0x95fc064474afe18cL, + 0x34619053129e2be7L }, + { 0x80615ceadb2a3b15L,0x0a49a19edb4c7073L,0x0e1b84c88fd2d367L, + 0xd74bf462033fb8aaL } }, + /* 32 << 35 */ + { { 0x889f6d65533ef217L,0x7158c7e4c3ca2e87L,0xfb670dfbdc2b4167L, + 0x75910a01844c257fL }, + { 0xf336bf07cf88577dL,0x22245250e45e2aceL,0x2ed92e8d7ca23d85L, + 0x29f8be4c2b812f58L } }, + /* 33 << 35 */ + { { 0xdd9ebaa7076fe12bL,0x3f2400cbae1537f9L,0x1aa9352817bdfb46L, + 0xc0f9843067883b41L }, + { 0x5590ede10170911dL,0x7562f5bb34d4b17fL,0xe1fa1df21826b8d2L, + 0xb40b796a6bd80d59L } }, + /* 34 << 35 */ + { { 0xd65bf1973467ba92L,0x8c9b46dbf70954b0L,0x97c8a0f30e78f15dL, + 0xa8f3a69a85a4c961L }, + { 0x4242660f61e4ce9bL,0xbf06aab36ea6790cL,0xc6706f8eec986416L, + 0x9e56dec19a9fc225L } }, + /* 35 << 35 */ + { { 0x527c46f49a9898d9L,0xd799e77b5633cdefL,0x24eacc167d9e4297L, + 0xabb61cea6b1cb734L }, + { 0xbee2e8a7f778443cL,0x3bb42bf129de2fe6L,0xcbed86a13003bb6fL, + 0xd3918e6cd781cdf6L } }, + /* 36 << 35 */ + { { 0x4bee32719a5103f1L,0x5243efc6f50eac06L,0xb8e122cb6adcc119L, + 0x1b7faa84c0b80a08L }, + { 0x32c3d1bd6dfcd08cL,0x129dec4e0be427deL,0x98ab679c1d263c83L, + 0xafc83cb7cef64effL } }, + /* 37 << 35 */ + { { 0x85eb60882fa6be76L,0x892585fb1328cbfeL,0xc154d3edcf618ddaL, + 0xc44f601b3abaf26eL }, + { 0x7bf57d0b2be1fdfdL,0xa833bd2d21137feeL,0x9353af362db591a8L, + 0xc76f26dc5562a056L } }, + /* 38 << 35 */ + { { 0x1d87e47d3fdf5a51L,0x7afb5f9355c9cab0L,0x91bbf58f89e0586eL, + 0x7c72c0180d843709L }, + { 0xa9a5aafb99b5c3dcL,0xa48a0f1d3844aeb0L,0x7178b7ddb667e482L, + 0x453985e96e23a59aL } }, + /* 39 << 35 */ + { { 0x4a54c86001b25dd8L,0x0dd37f48fb897c8aL,0x5f8aa6100ea90cd9L, + 0xc8892c6816d5830dL }, + { 0xeb4befc0ef514ca5L,0x478eb679e72c9ee6L,0x9bca20dadbc40d5fL, + 0xf015de21dde4f64aL } }, + /* 40 << 35 */ + { { 0xaa6a4de0eaf4b8a5L,0x68cfd9ca4bc60e32L,0x668a4b017fd15e70L, + 0xd9f0694af27dc09dL }, + { 0xf6c3cad5ba708bcdL,0x5cd2ba695bb95c2aL,0xaa28c1d333c0a58fL, + 0x23e274e3abc77870L } }, + /* 41 << 35 */ + { { 0x44c3692ddfd20a4aL,0x091c5fd381a66653L,0x6c0bb69109a0757dL, + 0x9072e8b9667343eaL }, + { 0x31d40eb080848becL,0x95bd480a79fd36ccL,0x01a77c6165ed43f5L, + 0xafccd1272e0d40bfL } }, + /* 42 << 35 */ + { { 0xeccfc82d1cc1884bL,0xc85ac2015d4753b4L,0xc7a6caac658e099fL, + 0xcf46369e04b27390L }, + { 0xe2e7d049506467eaL,0x481b63a237cdecccL,0x4029abd8ed80143aL, + 0x28bfe3c7bcb00b88L } }, + /* 43 << 35 */ + { { 0x3bec10090643d84aL,0x885f3668abd11041L,0xdb02432cf83a34d6L, + 0x32f7b360719ceebeL }, + { 0xf06c7837dad1fe7aL,0x60a157a95441a0b0L,0x704970e9e2d47550L, + 0xcd2bd553271b9020L } }, + /* 44 << 35 */ + { { 0xff57f82f33e24a0bL,0x9cbee23ff2565079L,0x16353427eb5f5825L, + 0x276feec4e948d662L }, + { 0xd1b62bc6da10032bL,0x718351ddf0e72a53L,0x934520762420e7baL, + 0x96368fff3a00118dL } }, + /* 45 << 35 */ + { { 0x00ce2d26150a49e4L,0x0c28b6363f04706bL,0xbad65a4658b196d0L, + 0x6c8455fcec9f8b7cL }, + { 0xe90c895f2d71867eL,0x5c0be31bedf9f38cL,0x2a37a15ed8f6ec04L, + 0x239639e78cd85251L } }, + /* 46 << 35 */ + { { 0xd89753159c7c4c6bL,0x603aa3c0d7409af7L,0xb8d53d0c007132fbL, + 0x68d12af7a6849238L }, + { 0xbe0607e7bf5d9279L,0x9aa50055aada74ceL,0xe81079cbba7e8ccbL, + 0x610c71d1a5f4ff5eL } }, + /* 47 << 35 */ + { { 0x9e2ee1a75aa07093L,0xca84004ba75da47cL,0x074d39513de75401L, + 0xf938f756bb311592L }, + { 0x9619761800a43421L,0x39a2536207bc78c8L,0x278f710a0a171276L, + 0xb28446ea8d1a8f08L } }, + /* 48 << 35 */ + { { 0x184781bfe3b6a661L,0x7751cb1de6d279f7L,0xf8ff95d6c59eb662L, + 0x186d90b758d3dea7L }, + { 0x0e4bb6c1dfb4f754L,0x5c5cf56b2b2801dcL,0xc561e4521f54564dL, + 0xb4fb8c60f0dd7f13L } }, + /* 49 << 35 */ + { { 0xf884963033ff98c7L,0x9619fffacf17769cL,0xf8090bf61bfdd80aL, + 0x14d9a149422cfe63L }, + { 0xb354c3606f6df9eaL,0xdbcf770d218f17eaL,0x207db7c879eb3480L, + 0x213dbda8559b6a26L } }, + /* 50 << 35 */ + { { 0xac4c200b29fc81b3L,0xebc3e09f171d87c1L,0x917995301481aa9eL, + 0x051b92e192e114faL }, + { 0xdf8f92e9ecb5537fL,0x44b1b2cc290c7483L,0xa711455a2adeb016L, + 0x964b685681a10c2cL } }, + /* 51 << 35 */ + { { 0x4f159d99cec03623L,0x05532225ef3271eaL,0xb231bea3c5ee4849L, + 0x57a54f507094f103L }, + { 0x3e2d421d9598b352L,0xe865a49c67412ab4L,0xd2998a251cc3a912L, + 0x5d0928080c74d65dL } }, + /* 52 << 35 */ + { { 0x73f459084088567aL,0xeb6b280e1f214a61L,0x8c9adc34caf0c13dL, + 0x39d12938f561fb80L }, + { 0xb2dc3a5ebc6edfb4L,0x7485b1b1fe4d210eL,0x062e0400e186ae72L, + 0x91e32d5c6eeb3b88L } }, + /* 53 << 35 */ + { { 0x6df574d74be59224L,0xebc88ccc716d55f3L,0x26c2e6d0cad6ed33L, + 0xc6e21e7d0d3e8b10L }, + { 0x2cc5840e5bcc36bbL,0x9292445e7da74f69L,0x8be8d3214e5193a8L, + 0x3ec236298df06413L } }, + /* 54 << 35 */ + { { 0xc7e9ae85b134defaL,0x6073b1d01bb2d475L,0xb9ad615e2863c00dL, + 0x9e29493d525f4ac4L }, + { 0xc32b1dea4e9acf4fL,0x3e1f01c8a50db88dL,0xb05d70ea04da916cL, + 0x714b0d0ad865803eL } }, + /* 55 << 35 */ + { { 0x4bd493fc9920cb5eL,0x5b44b1f792c7a3acL,0xa2a77293bcec9235L, + 0x5ee06e87cd378553L }, + { 0xceff8173da621607L,0x2bb03e4c99f5d290L,0x2945106aa6f734acL, + 0xb5056604d25c4732L } }, + /* 56 << 35 */ + { { 0x5945920ce079afeeL,0x686e17a06789831fL,0x5966bee8b74a5ae5L, + 0x38a673a21e258d46L }, + { 0xbd1cc1f283141c95L,0x3b2ecf4f0e96e486L,0xcd3aa89674e5fc78L, + 0x415ec10c2482fa7aL } }, + /* 57 << 35 */ + { { 0x1523441980503380L,0x513d917ad314b392L,0xb0b52f4e63caecaeL, + 0x07bf22ad2dc7780bL }, + { 0xe761e8a1e4306839L,0x1b3be9625dd7feaaL,0x4fe728de74c778f1L, + 0xf1fa0bda5e0070f6L } }, + /* 58 << 35 */ + { { 0x85205a316ec3f510L,0x2c7e4a14d2980475L,0xde3c19c06f30ebfdL, + 0xdb1c1f38d4b7e644L }, + { 0xfe291a755dce364aL,0xb7b22a3c058f5be3L,0x2cd2c30237fea38cL, + 0x2930967a2e17be17L } }, + /* 59 << 35 */ + { { 0x87f009de0c061c65L,0xcb014aacedc6ed44L,0x49bd1cb43bafb1ebL, + 0x81bd8b5c282d3688L }, + { 0x1cdab87ef01a17afL,0x21f37ac4e710063bL,0x5a6c567642fc8193L, + 0xf4753e7056a6015cL } }, + /* 60 << 35 */ + { { 0x020f795ea15b0a44L,0x8f37c8d78958a958L,0x63b7e89ba4b675b5L, + 0xb4fb0c0c0fc31aeaL }, + { 0xed95e639a7ff1f2eL,0x9880f5a3619614fbL,0xdeb6ff02947151abL, + 0x5bc5118ca868dcdbL } }, + /* 61 << 35 */ + { { 0xd8da20554c20cea5L,0xcac2776e14c4d69aL,0xcccb22c1622d599bL, + 0xa4ddb65368a9bb50L }, + { 0x2c4ff1511b4941b4L,0xe1ff19b46efba588L,0x35034363c48345e0L, + 0x45542e3d1e29dfc4L } }, + /* 62 << 35 */ + { { 0xf197cb91349f7aedL,0x3b2b5a008fca8420L,0x7c175ee823aaf6d8L, + 0x54dcf42135af32b6L }, + { 0x0ba1430727d6561eL,0x879d5ee4d175b1e2L,0xc7c4367399807db5L, + 0x77a544559cd55bcdL } }, + /* 63 << 35 */ + { { 0xe6c2ff130105c072L,0x18f7a99f8dda7da4L,0x4c3018200e2d35c1L, + 0x06a53ca0d9cc6c82L }, + { 0xaa21cc1ef1aa1d9eL,0x324143344a75b1e8L,0x2a6d13280ebe9fdcL, + 0x16bd173f98a4755aL } }, + /* 64 << 35 */ + { { 0xfbb9b2452133ffd9L,0x39a8b2f1830f1a20L,0x484bc97dd5a1f52aL, + 0xd6aebf56a40eddf8L }, + { 0x32257acb76ccdac6L,0xaf4d36ec1586ff27L,0x8eaa8863f8de7dd1L, + 0x0045d5cf88647c16L } }, + /* 0 << 42 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 42 */ + { { 0xa6f3d574c005979dL,0xc2072b426a40e350L,0xfca5c1568de2ecf9L, + 0xa8c8bf5ba515344eL }, + { 0x97aee555114df14aL,0xd4374a4dfdc5ec6bL,0x754cc28f2ca85418L, + 0x71cb9e27d3c41f78L } }, + /* 2 << 42 */ + { { 0x8910507903605c39L,0xf0843d9ea142c96cL,0xf374493416923684L, + 0x732caa2ffa0a2893L }, + { 0xb2e8c27061160170L,0xc32788cc437fbaa3L,0x39cd818ea6eda3acL, + 0xe2e942399e2b2e07L } }, + /* 3 << 42 */ + { { 0x6967d39b0260e52aL,0xd42585cc90653325L,0x0d9bd60521ca7954L, + 0x4fa2087781ed57b3L }, + { 0x60c1eff8e34a0bbeL,0x56b0040c84f6ef64L,0x28be2b24b1af8483L, + 0xb2278163f5531614L } }, + /* 4 << 42 */ + { { 0x8df275455922ac1cL,0xa7b3ef5ca52b3f63L,0x8e77b21471de57c4L, + 0x31682c10834c008bL }, + { 0xc76824f04bd55d31L,0xb6d1c08617b61c71L,0x31db0903c2a5089dL, + 0x9c092172184e5d3fL } }, + /* 5 << 42 */ + { { 0xdd7ced5bc00cc638L,0x1a2015eb61278fc2L,0x2e8e52886a37f8d6L, + 0xc457786fe79933adL }, + { 0xb3fe4cce2c51211aL,0xad9b10b224c20498L,0x90d87a4fd28db5e5L, + 0x698cd1053aca2fc3L } }, + /* 6 << 42 */ + { { 0x4f112d07e91b536dL,0xceb982f29eba09d6L,0x3c157b2c197c396fL, + 0xe23c2d417b66eb24L }, + { 0x480c57d93f330d37L,0xb3a4c8a179108debL,0x702388decb199ce5L, + 0x0b019211b944a8d4L } }, + /* 7 << 42 */ + { { 0x24f2a692840bb336L,0x7c353bdca669fa7bL,0xda20d6fcdec9c300L, + 0x625fbe2fa13a4f17L }, + { 0xa2b1b61adbc17328L,0x008965bfa9515621L,0x49690939c620ff46L, + 0x182dd27d8717e91cL } }, + /* 8 << 42 */ + { { 0x5ace5035ea6c3997L,0x54259aaac2610befL,0xef18bb3f3c80dd39L, + 0x6910b95b5fc3fa39L }, + { 0xfce2f51043e09aeeL,0xced56c9fa7675665L,0x10e265acd872db61L, + 0x6982812eae9fce69L } }, + /* 9 << 42 */ + { { 0x29be11c6ce800998L,0x72bb1752b90360d9L,0x2c1931975a4ad590L, + 0x2ba2f5489fc1dbc0L }, + { 0x7fe4eebbe490ebe0L,0x12a0a4cd7fae11c0L,0x7197cf81e903ba37L, + 0xcf7d4aa8de1c6dd8L } }, + /* 10 << 42 */ + { { 0x92af6bf43fd5684cL,0x2b26eecf80360aa1L,0xbd960f3000546a82L, + 0x407b3c43f59ad8feL }, + { 0x86cae5fe249c82baL,0x9e0faec72463744cL,0x87f551e894916272L, + 0x033f93446ceb0615L } }, + /* 11 << 42 */ + { { 0x1e5eb0d18be82e84L,0x89967f0e7a582fefL,0xbcf687d5a6e921faL, + 0xdfee4cf3d37a09baL }, + { 0x94f06965b493c465L,0x638b9a1c7635c030L,0x7666786466f05e9fL, + 0xccaf6808c04da725L } }, + /* 12 << 42 */ + { { 0xca2eb690768fccfcL,0xf402d37db835b362L,0x0efac0d0e2fdfcceL, + 0xefc9cdefb638d990L }, + { 0x2af12b72d1669a8bL,0x33c536bc5774ccbdL,0x30b21909fb34870eL, + 0xc38fa2f77df25acaL } }, + /* 13 << 42 */ + { { 0x74c5f02bbf81f3f5L,0x0525a5aeaf7e4581L,0x88d2aaba433c54aeL, + 0xed9775db806a56c5L }, + { 0xd320738ac0edb37dL,0x25fdb6ee66cc1f51L,0xac661d1710600d76L, + 0x931ec1f3bdd1ed76L } }, + /* 14 << 42 */ + { { 0x65c11d6219ee43f1L,0x5cd57c3e60829d97L,0xd26c91a3984be6e8L, + 0xf08d93098b0c53bdL }, + { 0x94bc9e5bc016e4eaL,0xd391683911d43d2bL,0x886c5ad773701155L, + 0xe037762620b00715L } }, + /* 15 << 42 */ + { { 0x7f01c9ecaa80ba59L,0x3083411a68538e51L,0x970370f1e88128afL, + 0x625cc3db91dec14bL }, + { 0xfef9666c01ac3107L,0xb2a8d577d5057ac3L,0xb0f2629992be5df7L, + 0xf579c8e500353924L } }, + /* 16 << 42 */ + { { 0xb8fa3d931341ed7aL,0x4223272ca7b59d49L,0x3dcb194783b8c4a4L, + 0x4e413c01ed1302e4L }, + { 0x6d999127e17e44ceL,0xee86bf7533b3adfbL,0xf6902fe625aa96caL, + 0xb73540e4e5aae47dL } }, + /* 17 << 42 */ + { { 0x32801d7b1b4a158cL,0xe571c99e27e2a369L,0x40cb76c010d9f197L, + 0xc308c2893167c0aeL }, + { 0xa6ef9dd3eb7958f2L,0xa7226dfc300879b1L,0x6cd0b3627edf0636L, + 0x4efbce6c7bc37eedL } }, + /* 18 << 42 */ + { { 0x75f92a058d699021L,0x586d4c79772566e3L,0x378ca5f1761ad23aL, + 0x650d86fc1465a8acL }, + { 0x7a4ed457842ba251L,0x6b65e3e642234933L,0xaf1543b731aad657L, + 0xa4cefe98cbfec369L } }, + /* 19 << 42 */ + { { 0xb587da909f47befbL,0x6562e9fb41312d13L,0xa691ea59eff1cefeL, + 0xcc30477a05fc4cf6L }, + { 0xa16324610b0ffd3dL,0xa1f16f3b5b355956L,0x5b148d534224ec24L, + 0xdc834e7bf977012aL } }, + /* 20 << 42 */ + { { 0x7bfc5e75b2c69dbcL,0x3aa77a2903c3da6cL,0xde0df03cca910271L, + 0xcbd5ca4a7806dc55L }, + { 0xe1ca58076db476cbL,0xfde15d625f37a31eL,0xf49af520f41af416L, + 0x96c5c5b17d342db5L } }, + /* 21 << 42 */ + { { 0x155c43b7eb4ceb9bL,0x2e9930104e77371aL,0x1d2987da675d43afL, + 0xef2bc1c08599fd72L }, + { 0x96894b7b9342f6b2L,0x201eadf27c8e71f0L,0xf3479d9f4a1f3efcL, + 0xe0f8a742702a9704L } }, + /* 22 << 42 */ + { { 0xeafd44b6b3eba40cL,0xf9739f29c1c1e0d0L,0x0091471a619d505eL, + 0xc15f9c969d7c263eL }, + { 0x5be4728583afbe33L,0xa3b6d6af04f1e092L,0xe76526b9751a9d11L, + 0x2ec5b26d9a4ae4d2L } }, + /* 23 << 42 */ + { { 0xeb66f4d902f6fb8dL,0x4063c56196912164L,0xeb7050c180ef3000L, + 0x288d1c33eaa5b3f0L }, + { 0xe87c68d607806fd8L,0xb2f7f9d54bbbf50fL,0x25972f3aac8d6627L, + 0xf854777410e8c13bL } }, + /* 24 << 42 */ + { { 0xcc50ef6c872b4a60L,0xab2a34a44613521bL,0x39c5c190983e15d1L, + 0x61dde5df59905512L }, + { 0xe417f6219f2275f3L,0x0750c8b6451d894bL,0x75b04ab978b0bdaaL, + 0x3bfd9fd4458589bdL } }, + /* 25 << 42 */ + { { 0xf1013e30ee9120b6L,0x2b51af9323a4743eL,0xea96ffae48d14d9eL, + 0x71dc0dbe698a1d32L }, + { 0x914962d20180cca4L,0x1ae60677c3568963L,0x8cf227b1437bc444L, + 0xc650c83bc9962c7aL } }, + /* 26 << 42 */ + { { 0x23c2c7ddfe7ccfc4L,0xf925c89d1b929d48L,0x4460f74b06783c33L, + 0xac2c8d49a590475aL }, + { 0xfb40b407b807bba0L,0x9d1e362d69ff8f3aL,0xa33e9681cbef64a4L, + 0x67ece5fa332fb4b2L } }, + /* 27 << 42 */ + { { 0x6900a99b739f10e3L,0xc3341ca9ff525925L,0xee18a626a9e2d041L, + 0xa5a8368529580dddL }, + { 0xf3470c819d7de3cdL,0xedf025862062cf9cL,0xf43522fac010edb0L, + 0x3031413513a4b1aeL } }, + /* 28 << 42 */ + { { 0xc792e02adb22b94bL,0x993d8ae9a1eaa45bL,0x8aad6cd3cd1e1c63L, + 0x89529ca7c5ce688aL }, + { 0x2ccee3aae572a253L,0xe02b643802a21efbL,0xa7091b6ec9430358L, + 0x06d1b1fa9d7db504L } }, + /* 29 << 42 */ + { { 0x58846d32c4744733L,0x40517c71379f9e34L,0x2f65655f130ef6caL, + 0x526e4488f1f3503fL }, + { 0x8467bd177ee4a976L,0x1d9dc913921363d1L,0xd8d24c33b069e041L, + 0x5eb5da0a2cdf7f51L } }, + /* 30 << 42 */ + { { 0x1c0f3cb1197b994fL,0x3c95a6c52843eae9L,0x7766ffc9a6097ea5L, + 0x7bea4093d723b867L }, + { 0xb48e1f734db378f9L,0x70025b00e37b77acL,0x943dc8e7af24ad46L, + 0xb98a15ac16d00a85L } }, + /* 31 << 42 */ + { { 0x3adc38ba2743b004L,0xb1c7f4f7334415eeL,0xea43df8f1e62d05aL, + 0x326189059d76a3b6L }, + { 0x2fbd0bb5a23a0f46L,0x5bc971db6a01918cL,0x7801d94ab4743f94L, + 0xb94df65e676ae22bL } }, + /* 32 << 42 */ + { { 0xaafcbfabaf95894cL,0x7b9bdc07276b2241L,0xeaf983625bdda48bL, + 0x5977faf2a3fcb4dfL }, + { 0xbed042ef052c4b5bL,0x9fe87f71067591f0L,0xc89c73ca22f24ec7L, + 0x7d37fa9ee64a9f1bL } }, + /* 33 << 42 */ + { { 0x2710841a15562627L,0x2c01a613c243b034L,0x1d135c562bc68609L, + 0xc2ca17158b03f1f6L }, + { 0xc9966c2d3eb81d82L,0xc02abf4a8f6df13eL,0x77b34bd78f72b43bL, + 0xaff6218f360c82b0L } }, + /* 34 << 42 */ + { { 0x0aa5726c8d55b9d2L,0xdc0adbe999e9bffbL,0x9097549cefb9e72aL, + 0x167557129dfb3111L }, + { 0xdd8bf984f26847f9L,0xbcb8e387dfb30cb7L,0xc1fd32a75171ef9cL, + 0x977f3fc7389b363fL } }, + /* 35 << 42 */ + { { 0x116eaf2bf4babda0L,0xfeab68bdf7113c8eL,0xd1e3f064b7def526L, + 0x1ac30885e0b3fa02L }, + { 0x1c5a6e7b40142d9dL,0x839b560330921c0bL,0x48f301fa36a116a3L, + 0x380e1107cfd9ee6dL } }, + /* 36 << 42 */ + { { 0x7945ead858854be1L,0x4111c12ecbd4d49dL,0xece3b1ec3a29c2efL, + 0x6356d4048d3616f5L }, + { 0x9f0d6a8f594d320eL,0x0989316df651ccd2L,0x6c32117a0f8fdde4L, + 0x9abe5cc5a26a9bbcL } }, + /* 37 << 42 */ + { { 0xcff560fb9723f671L,0x21b2a12d7f3d593cL,0xe4cb18da24ba0696L, + 0x186e2220c3543384L }, + { 0x722f64e088312c29L,0x94282a9917dc7752L,0x62467bbf5a85ee89L, + 0xf435c650f10076a0L } }, + /* 38 << 42 */ + { { 0xc9ff153943b3a50bL,0x7132130c1a53efbcL,0x31bfe063f7b0c5b7L, + 0xb0179a7d4ea994ccL }, + { 0x12d064b3c85f455bL,0x472593288f6e0062L,0xf64e590bb875d6d9L, + 0x22dd6225ad92bcc7L } }, + /* 39 << 42 */ + { { 0xb658038eb9c3bd6dL,0x00cdb0d6fbba27c8L,0x0c6813371062c45dL, + 0xd8515b8c2d33407dL }, + { 0xcb8f699e8cbb5ecfL,0x8c4347f8c608d7d8L,0x2c11850abb3e00dbL, + 0x20a8dafdecb49d19L } }, + /* 40 << 42 */ + { { 0xbd78148045ee2f40L,0x75e354af416b60cfL,0xde0b58a18d49a8c4L, + 0xe40e94e2fa359536L }, + { 0xbd4fa59f62accd76L,0x05cf466a8c762837L,0xb5abda99448c277bL, + 0x5a9e01bf48b13740L } }, + /* 41 << 42 */ + { { 0x9d457798326aad8dL,0xbdef4954c396f7e7L,0x6fb274a2c253e292L, + 0x2800bf0a1cfe53e7L }, + { 0x22426d3144438fd4L,0xef2339235e259f9aL,0x4188503c03f66264L, + 0x9e5e7f137f9fdfabL } }, + /* 42 << 42 */ + { { 0x565eb76c5fcc1abaL,0xea63254859b5bff8L,0x5587c087aab6d3faL, + 0x92b639ea6ce39c1bL }, + { 0x0706e782953b135cL,0x7308912e425268efL,0x599e92c7090e7469L, + 0x83b90f529bc35e75L } }, + /* 43 << 42 */ + { { 0x4750b3d0244975b3L,0xf3a4435811965d72L,0x179c67749c8dc751L, + 0xff18cdfed23d9ff0L }, + { 0xc40138332028e247L,0x96e280e2f3bfbc79L,0xf60417bdd0880a84L, + 0x263c9f3d2a568151L } }, + /* 44 << 42 */ + { { 0x36be15b32d2ce811L,0x846dc0c2f8291d21L,0x5cfa0ecb789fcfdbL, + 0x45a0beedd7535b9aL }, + { 0xec8e9f0796d69af1L,0x31a7c5b8599ab6dcL,0xd36d45eff9e2e09fL, + 0x3cf49ef1dcee954bL } }, + /* 45 << 42 */ + { { 0x6be34cf3086cff9bL,0x88dbd49139a3360fL,0x1e96b8cc0dbfbd1dL, + 0xc1e5f7bfcb7e2552L }, + { 0x0547b21428819d98L,0xc770dd9c7aea9dcbL,0xaef0d4c7041d68c8L, + 0xcc2b981813cb9ba8L } }, + /* 46 << 42 */ + { { 0x7fc7bc76fe86c607L,0x6b7b9337502a9a95L,0x1948dc27d14dab63L, + 0x249dd198dae047beL }, + { 0xe8356584a981a202L,0x3531dd183a893387L,0x1be11f90c85c7209L, + 0x93d2fe1ee2a52b5aL } }, + /* 47 << 42 */ + { { 0x8225bfe2ec6d6b97L,0x9cf6d6f4bd0aa5deL,0x911459cb54779f5fL, + 0x5649cddb86aeb1f3L }, + { 0x321335793f26ce5aL,0xc289a102550f431eL,0x559dcfda73b84c6fL, + 0x84973819ee3ac4d7L } }, + /* 48 << 42 */ + { { 0xb51e55e6f2606a82L,0xe25f706190f2fb57L,0xacef6c2ab1a4e37cL, + 0x864e359d5dcf2706L }, + { 0x479e6b187ce57316L,0x2cab25003a96b23dL,0xed4898628ef16df7L, + 0x2056538cef3758b5L } }, + /* 49 << 42 */ + { { 0xa7df865ef15d3101L,0x80c5533a61b553d7L,0x366e19974ed14294L, + 0x6620741fb3c0bcd6L }, + { 0x21d1d9c4edc45418L,0x005b859ec1cc4a9dL,0xdf01f630a1c462f0L, + 0x15d06cf3f26820c7L } }, + /* 50 << 42 */ + { { 0x9f7f24ee3484be47L,0x2ff33e964a0c902fL,0x00bdf4575a0bc453L, + 0x2378dfaf1aa238dbL }, + { 0x272420ec856720f2L,0x2ad9d95b96797291L,0xd1242cc6768a1558L, + 0x2e287f8b5cc86aa8L } }, + /* 51 << 42 */ + { { 0x796873d0990cecaaL,0xade55f81675d4080L,0x2645eea321f0cd84L, + 0x7a1efa0fb4e17d02L }, + { 0xf6858420037cc061L,0x682e05f0d5d43e12L,0x59c3699427218710L, + 0x85cbba4d3f7cd2fcL } }, + /* 52 << 42 */ + { { 0x726f97297a3cd22aL,0x9f8cd5dc4a628397L,0x17b93ab9c23165edL, + 0xff5f5dbf122823d4L }, + { 0xc1e4e4b5654a446dL,0xd1a9496f677257baL,0x6387ba94de766a56L, + 0x23608bc8521ec74aL } }, + /* 53 << 42 */ + { { 0x16a522d76688c4d4L,0x9d6b428207373abdL,0xa62f07acb42efaa3L, + 0xf73e00f7e3b90180L }, + { 0x36175fec49421c3eL,0xc4e44f9b3dcf2678L,0x76df436b7220f09fL, + 0x172755fb3aa8b6cfL } }, + /* 54 << 42 */ + { { 0xbab89d57446139ccL,0x0a0a6e025fe0208fL,0xcdbb63e211e5d399L, + 0x33ecaa12a8977f0bL }, + { 0x59598b21f7c42664L,0xb3e91b32ab65d08aL,0x035822eef4502526L, + 0x1dcf0176720a82a9L } }, + /* 55 << 42 */ + { { 0x50f8598f3d589e02L,0xdf0478ffb1d63d2cL,0x8b8068bd1571cd07L, + 0x30c3aa4fd79670cdL }, + { 0x25e8fd4b941ade7fL,0x3d1debdc32790011L,0x65b6dcbd3a3f9ff0L, + 0x282736a4793de69cL } }, + /* 56 << 42 */ + { { 0xef69a0c3d41d3bd3L,0xb533b8c907a26bdeL,0xe2801d97db2edf9fL, + 0xdc4a8269e1877af0L }, + { 0x6c1c58513d590dbeL,0x84632f6bee4e9357L,0xd36d36b779b33374L, + 0xb46833e39bbca2e6L } }, + /* 57 << 42 */ + { { 0x37893913f7fc0586L,0x385315f766bf4719L,0x72c56293b31855dcL, + 0xd1416d4e849061feL }, + { 0xbeb3ab7851047213L,0x447f6e61f040c996L,0xd06d310d638b1d0cL, + 0xe28a413fbad1522eL } }, + /* 58 << 42 */ + { { 0x685a76cb82003f86L,0x610d07f70bcdbca3L,0x6ff660219ca4c455L, + 0x7df39b87cea10eecL }, + { 0xb9255f96e22db218L,0x8cc6d9eb08a34c44L,0xcd4ffb86859f9276L, + 0x8fa15eb250d07335L } }, + /* 59 << 42 */ + { { 0xdf553845cf2c24b5L,0x89f66a9f52f9c3baL,0x8f22b5b9e4a7ceb3L, + 0xaffef8090e134686L }, + { 0x3e53e1c68eb8fac2L,0x93c1e4eb28aec98eL,0xb6b91ec532a43bcbL, + 0x2dbfa947b2d74a51L } }, + /* 60 << 42 */ + { { 0xe065d190ca84bad7L,0xfb13919fad58e65cL,0x3c41718bf1cb6e31L, + 0x688969f006d05c3fL }, + { 0xd4f94ce721264d45L,0xfdfb65e97367532bL,0x5b1be8b10945a39dL, + 0x229f789c2b8baf3bL } }, + /* 61 << 42 */ + { { 0xd8f41f3e6f49f15dL,0x678ce828907f0792L,0xc69ace82fca6e867L, + 0x106451aed01dcc89L }, + { 0x1bb4f7f019fc32d2L,0x64633dfcb00c52d2L,0x8f13549aad9ea445L, + 0x99a3bf50fb323705L } }, + /* 62 << 42 */ + { { 0x0c9625a2534d4dbcL,0x45b8f1d1c2a2fea3L,0x76ec21a1a530fc1aL, + 0x4bac9c2a9e5bd734L }, + { 0x5996d76a7b4e3587L,0x0045cdee1182d9e3L,0x1aee24b91207f13dL, + 0x66452e9797345a41L } }, + /* 63 << 42 */ + { { 0x16e5b0549f950cd0L,0x9cc72fb1d7fdd075L,0x6edd61e766249663L, + 0xde4caa4df043cccbL }, + { 0x11b1f57a55c7ac17L,0x779cbd441a85e24dL,0x78030f86e46081e7L, + 0xfd4a60328e20f643L } }, + /* 64 << 42 */ + { { 0xcc7a64880a750c0fL,0x39bacfe34e548e83L,0x3d418c760c110f05L, + 0x3e4daa4cb1f11588L }, + { 0x2733e7b55ffc69ffL,0x46f147bc92053127L,0x885b2434d722df94L, + 0x6a444f65e6fc6b7cL } }, + /* 0 << 49 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 49 */ + { { 0x7a1a465ac3f16ea8L,0x115a461db2f1d11cL,0x4767dd956c68a172L, + 0x3392f2ebd13a4698L }, + { 0xc7a99ccde526cdc7L,0x8e537fdc22292b81L,0x76d8cf69a6d39198L, + 0xffc5ff432446852dL } }, + /* 2 << 49 */ + { { 0x97b14f7ea90567e6L,0x513257b7b6ae5cb7L,0x85454a3c9f10903dL, + 0xd8d2c9ad69bc3724L }, + { 0x38da93246b29cb44L,0xb540a21d77c8cbacL,0x9bbfe43501918e42L, + 0xfffa707a56c3614eL } }, + /* 3 << 49 */ + { { 0x0ce4e3f1d4e353b7L,0x062d8a14ef46b0a0L,0x6408d5ab574b73fdL, + 0xbc41d1c9d3273ffdL }, + { 0x3538e1e76be77800L,0x71fe8b37c5655031L,0x1cd916216b9b331aL, + 0xad825d0bbb388f73L } }, + /* 4 << 49 */ + { { 0x56c2e05b1cb76219L,0x0ec0bf9171567e7eL,0xe7076f8661c4c910L, + 0xd67b085bbabc04d9L }, + { 0x9fb904595e93a96aL,0x7526c1eafbdc249aL,0x0d44d367ecdd0bb7L, + 0x953999179dc0d695L } }, + /* 5 << 49 */ + { { 0x61360ee99e240d18L,0x057cdcacb4b94466L,0xe7667cd12fe5325cL, + 0x1fa297b521974e3bL }, + { 0xfa4081e7db083d76L,0x31993be6f206bd15L,0x8949269b14c19f8cL, + 0x21468d72a9d92357L } }, + /* 6 << 49 */ + { { 0x2ccbc583a4c506ecL,0x957ed188d1acfe97L,0x8baed83312f1aea2L, + 0xef2a6cb48325362dL }, + { 0x130dde428e195c43L,0xc842025a0e6050c6L,0x2da972a708686a5dL, + 0xb52999a1e508b4a8L } }, + /* 7 << 49 */ + { { 0xd9f090b910a5a8bdL,0xca91d249096864daL,0x8e6a93be3f67dbc1L, + 0xacae6fbaf5f4764cL }, + { 0x1563c6e0d21411a0L,0x28fa787fda0a4ad8L,0xd524491c908c8030L, + 0x1257ba0e4c795f07L } }, + /* 8 << 49 */ + { { 0x83f49167ceca9754L,0x426d2cf64b7939a0L,0x2555e355723fd0bfL, + 0xa96e6d06c4f144e2L }, + { 0x4768a8dd87880e61L,0x15543815e508e4d5L,0x09d7e772b1b65e15L, + 0x63439dd6ac302fa0L } }, + /* 9 << 49 */ + { { 0xb93f802fc14e35c2L,0x71735b7c4341333cL,0x03a2510416d4f362L, + 0x3f4d069bbf433c8eL }, + { 0x0d83ae01f78f5a7cL,0x50a8ffbe7c4eed07L,0xc74f890676e10f83L, + 0x7d0809669ddaf8e1L } }, + /* 10 << 49 */ + { { 0xb11df8e1698e04ccL,0x877be203169005c8L,0x32749e8c4f3c6179L, + 0x2dbc9d0a7853fc05L }, + { 0x187d4f939454d937L,0xe682ce9db4800e1bL,0xa9129ad8165e68e8L, + 0x0fe29735be7f785bL } }, + /* 11 << 49 */ + { { 0x5303f40c5b9e02b7L,0xa37c969235ee04e8L,0x5f46cc2034d6632bL, + 0x55ef72b296ac545bL }, + { 0xabec5c1f7b91b062L,0x0a79e1c7bb33e821L,0xbb04b4283a9f4117L, + 0x0de1f28ffd2a475aL } }, + /* 12 << 49 */ + { { 0x31019ccf3a4434b4L,0xa34581111a7954dcL,0xa9dac80de34972a7L, + 0xb043d05474f6b8ddL }, + { 0x021c319e11137b1aL,0x00a754ceed5cc03fL,0x0aa2c794cbea5ad4L, + 0x093e67f470c015b6L } }, + /* 13 << 49 */ + { { 0x72cdfee9c97e3f6bL,0xc10bcab4b6da7461L,0x3b02d2fcb59806b9L, + 0x85185e89a1de6f47L }, + { 0x39e6931f0eb6c4d4L,0x4d4440bdd4fa5b04L,0x5418786e34be7eb8L, + 0x6380e5219d7259bcL } }, + /* 14 << 49 */ + { { 0x20ac0351d598d710L,0x272c4166cb3a4da4L,0xdb82fe1aca71de1fL, + 0x746e79f2d8f54b0fL }, + { 0x6e7fc7364b573e9bL,0x75d03f46fd4b5040L,0x5c1cc36d0b98d87bL, + 0x513ba3f11f472da1L } }, + /* 15 << 49 */ + { { 0x79d0af26abb177ddL,0xf82ab5687891d564L,0x2b6768a972232173L, + 0xefbb3bb08c1f6619L }, + { 0xb29c11dba6d18358L,0x519e2797b0916d3aL,0xd4dc18f09188e290L, + 0x648e86e398b0ca7fL } }, + /* 16 << 49 */ + { { 0x859d3145983c38b5L,0xb14f176c637abc8bL,0x2793fb9dcaff7be6L, + 0xebe5a55f35a66a5aL }, + { 0x7cec1dcd9f87dc59L,0x7c595cd3fbdbf560L,0x5b543b2226eb3257L, + 0x69080646c4c935fdL } }, + /* 17 << 49 */ + { { 0x7f2e440381e9ede3L,0x243c3894caf6df0aL,0x7c605bb11c073b11L, + 0xcd06a541ba6a4a62L }, + { 0x2916894949d4e2e5L,0x33649d074af66880L,0xbfc0c885e9a85035L, + 0xb4e52113fc410f4bL } }, + /* 18 << 49 */ + { { 0xdca3b70678a6513bL,0x92ea4a2a9edb1943L,0x02642216db6e2dd8L, + 0x9b45d0b49fd57894L }, + { 0x114e70dbc69d11aeL,0x1477dd194c57595fL,0xbc2208b4ec77c272L, + 0x95c5b4d7db68f59cL } }, + /* 19 << 49 */ + { { 0xb8c4fc6342e532b7L,0x386ba4229ae35290L,0xfb5dda42d201ecbcL, + 0x2353dc8ba0e38fd6L }, + { 0x9a0b85ea68f7e978L,0x96ec56822ad6d11fL,0x5e279d6ce5f6886dL, + 0xd3fe03cd3cb1914dL } }, + /* 20 << 49 */ + { { 0xfe541fa47ea67c77L,0x952bd2afe3ea810cL,0x791fef568d01d374L, + 0xa3a1c6210f11336eL }, + { 0x5ad0d5a9c7ec6d79L,0xff7038af3225c342L,0x003c6689bc69601bL, + 0x25059bc745e8747dL } }, + /* 21 << 49 */ + { { 0xfa4965b2f2086fbfL,0xf6840ea686916078L,0xd7ac762070081d6cL, + 0xe600da31b5328645L }, + { 0x01916f63529b8a80L,0xe80e48582d7d6f3eL,0x29eb0fe8d664ca7cL, + 0xf017637be7b43b0cL } }, + /* 22 << 49 */ + { { 0x9a75c80676cb2566L,0x8f76acb1b24892d9L,0x7ae7b9cc1f08fe45L, + 0x19ef73296a4907d8L }, + { 0x2db4ab715f228bf0L,0xf3cdea39817032d7L,0x0b1f482edcabe3c0L, + 0x3baf76b4bb86325cL } }, + /* 23 << 49 */ + { { 0xd49065e010089465L,0x3bab5d298e77c596L,0x7636c3a6193dbd95L, + 0xdef5d294b246e499L }, + { 0xb22c58b9286b2475L,0xa0b93939cd80862bL,0x3002c83af0992388L, + 0x6de01f9beacbe14cL } }, + /* 24 << 49 */ + { { 0x6aac688eadd70482L,0x708de92a7b4a4e8aL,0x75b6dd73758a6eefL, + 0xea4bf352725b3c43L }, + { 0x10041f2c87912868L,0xb1b1be95ef09297aL,0x19ae23c5a9f3860aL, + 0xc4f0f839515dcf4bL } }, + /* 25 << 49 */ + { { 0x3c7ecca397f6306aL,0x744c44ae68a3a4b0L,0x69cd13a0b3a1d8a2L, + 0x7cad0a1e5256b578L }, + { 0xea653fcd33791d9eL,0x9cc2a05d74b2e05fL,0x73b391dcfd7affa2L, + 0xddb7091eb6b05442L } }, + /* 26 << 49 */ + { { 0xc71e27bf8538a5c6L,0x195c63dd89abff17L,0xfd3152851b71e3daL, + 0x9cbdfda7fa680fa0L }, + { 0x9db876ca849d7eabL,0xebe2764b3c273271L,0x663357e3f208dceaL, + 0x8c5bd833565b1b70L } }, + /* 27 << 49 */ + { { 0xccc3b4f59837fc0dL,0x9b641ba8a79cf00fL,0x7428243ddfdf3990L, + 0x83a594c4020786b1L }, + { 0xb712451a526c4502L,0x9d39438e6adb3f93L,0xfdb261e3e9ff0ccdL, + 0x80344e3ce07af4c3L } }, + /* 28 << 49 */ + { { 0x75900d7c2fa4f126L,0x08a3b8655c99a232L,0x2478b6bfdb25e0c3L, + 0x482cc2c271db2edfL }, + { 0x37df7e645f321bb8L,0x8a93821b9a8005b4L,0x3fa2f10ccc8c1958L, + 0x0d3322182c269d0aL } }, + /* 29 << 49 */ + { { 0x20ab8119e246b0e6L,0xb39781e4d349fd17L,0xd293231eb31aa100L, + 0x4b779c97bb032168L }, + { 0x4b3f19e1c8470500L,0x45b7efe90c4c869dL,0xdb84f38aa1a6bbccL, + 0x3b59cb15b2fddbc1L } }, + /* 30 << 49 */ + { { 0xba5514df3fd165e8L,0x499fd6a9061f8811L,0x72cd1fe0bfef9f00L, + 0x120a4bb979ad7e8aL }, + { 0xf2ffd0955f4a5ac5L,0xcfd174f195a7a2f0L,0xd42301ba9d17baf1L, + 0xd2fa487a77f22089L } }, + /* 31 << 49 */ + { { 0x9cb09efeb1dc77e1L,0xe956693921c99682L,0x8c5469016c6067bbL, + 0xfd37857461c24456L }, + { 0x2b6a6cbe81796b33L,0x62d550f658e87f8bL,0x1b763e1c7f1b01b4L, + 0x4b93cfea1b1b5e12L } }, + /* 32 << 49 */ + { { 0xb93452381d531696L,0x57201c0088cdde69L,0xdde922519a86afc7L, + 0xe3043895bd35cea8L }, + { 0x7608c1e18555970dL,0x8267dfa92535935eL,0xd4c60a57322ea38bL, + 0xe0bf7977804ef8b5L } }, + /* 33 << 49 */ + { { 0x1a0dab28c06fece4L,0xd405991e94e7b49dL,0xc542b6d2706dab28L, + 0xcb228da3a91618fbL }, + { 0x224e4164107d1ceaL,0xeb9fdab3d0f5d8f1L,0xc02ba3860d6e41cdL, + 0x676a72c59b1f7146L } }, + /* 34 << 49 */ + { { 0xffd6dd984d6cb00bL,0xcef9c5cade2e8d7cL,0xa1bbf5d7641c7936L, + 0x1b95b230ee8f772eL }, + { 0xf765a92ee8ac25b1L,0xceb04cfc3a18b7c6L,0x27944cef0acc8966L, + 0xcbb3c957434c1004L } }, + /* 35 << 49 */ + { { 0x9c9971a1a43ff93cL,0x5bc2db17a1e358a9L,0x45b4862ea8d9bc82L, + 0x70ebfbfb2201e052L }, + { 0xafdf64c792871591L,0xea5bcae6b42d0219L,0xde536c552ad8f03cL, + 0xcd6c3f4da76aa33cL } }, + /* 36 << 49 */ + { { 0xbeb5f6230bca6de3L,0xdd20dd99b1e706fdL,0x90b3ff9dac9059d4L, + 0x2d7b29027ccccc4eL }, + { 0x8a090a59ce98840fL,0xa5d947e08410680aL,0x49ae346a923379a5L, + 0x7dbc84f9b28a3156L } }, + /* 37 << 49 */ + { { 0xfd40d91654a1aff2L,0xabf318ba3a78fb9bL,0x50152ed83029f95eL, + 0x9fc1dd77c58ad7faL }, + { 0x5fa5791513595c17L,0xb95046688f62b3a9L,0x907b5b24ff3055b0L, + 0x2e995e359a84f125L } }, + /* 38 << 49 */ + { { 0x87dacf697e9bbcfbL,0x95d0c1d6e86d96e3L,0x65726e3c2d95a75cL, + 0x2c3c9001acd27f21L }, + { 0x1deab5616c973f57L,0x108b7e2ca5221643L,0x5fee9859c4ef79d4L, + 0xbd62b88a40d4b8c6L } }, + /* 39 << 49 */ + { { 0xb4dd29c4197c75d6L,0x266a6df2b7076febL,0x9512d0ea4bf2df11L, + 0x1320c24f6b0cc9ecL }, + { 0x6bb1e0e101a59596L,0x8317c5bbeff9aaacL,0x65bb405e385aa6c9L, + 0x613439c18f07988fL } }, + /* 40 << 49 */ + { { 0xd730049f16a66e91L,0xe97f2820fa1b0e0dL,0x4131e003304c28eaL, + 0x820ab732526bac62L }, + { 0xb2ac9ef928714423L,0x54ecfffaadb10cb2L,0x8781476ef886a4ccL, + 0x4b2c87b5db2f8d49L } }, + /* 41 << 49 */ + { { 0xe857cd200a44295dL,0x707d7d2158c6b044L,0xae8521f9f596757cL, + 0x87448f0367b2b714L }, + { 0x13a9bc455ebcd58dL,0x79bcced99122d3c1L,0x3c6442479e076642L, + 0x0cf227782df4767dL } }, + /* 42 << 49 */ + { { 0x5e61aee471d444b6L,0x211236bfc5084a1dL,0x7e15bc9a4fd3eaf6L, + 0x68df2c34ab622bf5L }, + { 0x9e674f0f59bf4f36L,0xf883669bd7f34d73L,0xc48ac1b831497b1dL, + 0x323b925d5106703bL } }, + /* 43 << 49 */ + { { 0x22156f4274082008L,0xeffc521ac8482bcbL,0x5c6831bf12173479L, + 0xcaa2528fc4739490L }, + { 0x84d2102a8f1b3c4dL,0xcf64dfc12d9bec0dL,0x433febad78a546efL, + 0x1f621ec37b73cef1L } }, + /* 44 << 49 */ + { { 0x6aecd62737338615L,0x162082ab01d8edf6L,0x833a811919e86b66L, + 0x6023a251d299b5dbL }, + { 0xf5bb0c3abbf04b89L,0x6735eb69ae749a44L,0xd0e058c54713de3bL, + 0xfdf2593e2c3d4ccdL } }, + /* 45 << 49 */ + { { 0x1b8f414efdd23667L,0xdd52aacafa2015eeL,0x3e31b517bd9625ffL, + 0x5ec9322d8db5918cL }, + { 0xbc73ac85a96f5294L,0x82aa5bf361a0666aL,0x49755810bf08ac42L, + 0xd21cdfd5891cedfcL } }, + /* 46 << 49 */ + { { 0x918cb57b67f8be10L,0x365d1a7c56ffa726L,0x2435c5046532de93L, + 0xc0fc5e102674cd02L }, + { 0x6e51fcf89cbbb142L,0x1d436e5aafc50692L,0x766bffff3fbcae22L, + 0x3148c2fdfd55d3b8L } }, + /* 47 << 49 */ + { { 0x52c7fdc9233222faL,0x89ff1092e419fb6bL,0x3cd6db9925254977L, + 0x2e85a1611cf12ca7L }, + { 0xadd2547cdc810bc9L,0xea3f458f9d257c22L,0x642c1fbe27d6b19bL, + 0xed07e6b5140481a6L } }, + /* 48 << 49 */ + { { 0x6ada1d4286d2e0f8L,0xe59201220e8a9fd5L,0x02c936af708c1b49L, + 0x60f30fee2b4bfaffL }, + { 0x6637ad06858e6a61L,0xce4c77673fd374d0L,0x39d54b2d7188defbL, + 0xa8c9d250f56a6b66L } }, + /* 49 << 49 */ + { { 0x58fc0f5eb24fe1dcL,0x9eaf9dee6b73f24cL,0xa90d588b33650705L, + 0xde5b62c5af2ec729L }, + { 0x5c72cfaed3c2b36eL,0x868c19d5034435daL,0x88605f93e17ee145L, + 0xaa60c4ee77a5d5b1L } }, + /* 50 << 49 */ + { { 0xbcf5bfd23b60c472L,0xaf4ef13ceb1d3049L,0x373f44fce13895c9L, + 0xf29b382f0cbc9822L }, + { 0x1bfcb85373efaef6L,0xcf56ac9ca8c96f40L,0xd7adf1097a191e24L, + 0x98035f44bf8a8dc2L } }, + /* 51 << 49 */ + { { 0xf40a71b91e750c84L,0xc57f7b0c5dc6c469L,0x49a0e79c6fbc19c1L, + 0x6b0f5889a48ebdb8L }, + { 0x5d3fd084a07c4e9fL,0xc3830111ab27de14L,0x0e4929fe33e08dccL, + 0xf4a5ad2440bb73a3L } }, + /* 52 << 49 */ + { { 0xde86c2bf490f97caL,0x288f09c667a1ce18L,0x364bb8861844478dL, + 0x7840fa42ceedb040L }, + { 0x1269fdd25a631b37L,0x94761f1ea47c8b7dL,0xfc0c2e17481c6266L, + 0x85e16ea23daa5fa7L } }, + /* 53 << 49 */ + { { 0xccd8603392491048L,0x0c2f6963f4d402d7L,0x6336f7dfdf6a865cL, + 0x0a2a463cb5c02a87L }, + { 0xb0e29be7bf2f12eeL,0xf0a2200266bad988L,0x27f87e039123c1d7L, + 0x21669c55328a8c98L } }, + /* 54 << 49 */ + { { 0x186b980392f14529L,0xd3d056cc63954df3L,0x2f03fd58175a46f6L, + 0x63e34ebe11558558L }, + { 0xe13fedee5b80cfa5L,0xe872a120d401dbd1L,0x52657616e8a9d667L, + 0xbc8da4b6e08d6693L } }, + /* 55 << 49 */ + { { 0x370fb9bb1b703e75L,0x6773b186d4338363L,0x18dad378ecef7bffL, + 0xaac787ed995677daL }, + { 0x4801ea8b0437164bL,0xf430ad2073fe795eL,0xb164154d8ee5eb73L, + 0x0884ecd8108f7c0eL } }, + /* 56 << 49 */ + { { 0x0e6ec0965f520698L,0x640631fe44f7b8d9L,0x92fd34fca35a68b9L, + 0x9c5a4b664d40cf4eL }, + { 0x949454bf80b6783dL,0x80e701fe3a320a10L,0x8d1a564a1a0a39b2L, + 0x1436d53d320587dbL } }, + /* 57 << 49 */ + { { 0xf5096e6d6556c362L,0xbc23a3c0e2455d7eL,0x3a7aee54807230f9L, + 0x9ba1cfa622ae82fdL }, + { 0x833a057a99c5d706L,0x8be85f4b842315c9L,0xd083179a66a72f12L, + 0x2fc77d5dcdcc73cdL } }, + /* 58 << 49 */ + { { 0x22b88a805616ee30L,0xfb09548fe7ab1083L,0x8ad6ab0d511270cdL, + 0x61f6c57a6924d9abL }, + { 0xa0f7bf7290aecb08L,0x849f87c90df784a4L,0x27c79c15cfaf1d03L, + 0xbbf9f675c463faceL } }, + /* 59 << 49 */ + { { 0x91502c65765ba543L,0x18ce3cac42ea60ddL,0xe5cee6ac6e43ecb3L, + 0x63e4e91068f2aeebL }, + { 0x26234fa3c85932eeL,0x96883e8b4c90c44dL,0x29b9e738a18a50f6L, + 0xbfc62b2a3f0420dfL } }, + /* 60 << 49 */ + { { 0xd22a7d906d3e1fa9L,0x17115618fe05b8a3L,0x2a0c9926bb2b9c01L, + 0xc739fcc6e07e76a2L }, + { 0x540e9157165e439aL,0x06353a626a9063d8L,0x84d9559461e927a3L, + 0x013b9b26e2e0be7fL } }, + /* 61 << 49 */ + { { 0x4feaec3b973497f1L,0x15c0f94e093ebc2dL,0x6af5f22733af0583L, + 0x0c2af206c61f3340L }, + { 0xd25dbdf14457397cL,0x2e8ed017cabcbae0L,0xe3010938c2815306L, + 0xbaa99337e8c6cd68L } }, + /* 62 << 49 */ + { { 0x085131823b0ec7deL,0x1e1b822b58df05dfL,0x5c14842fa5c3b683L, + 0x98fe977e3eba34ceL }, + { 0xfd2316c20d5e8873L,0xe48d839abd0d427dL,0x495b2218623fc961L, + 0x24ee56e7b46fba5eL } }, + /* 63 << 49 */ + { { 0x9184a55b91e4de58L,0xa7488ca5dfdea288L,0xa723862ea8dcc943L, + 0x92d762b2849dc0fcL }, + { 0x3c444a12091ff4a9L,0x581113fa0cada274L,0xb9de0a4530d8eae2L, + 0x5e0fcd85df6b41eaL } }, + /* 64 << 49 */ + { { 0x6233ea68c094dbb5L,0xb77d062ed968d410L,0x3e719bbc58b3002dL, + 0x68e7dd3d3dc49d58L }, + { 0x8d825740013a5e58L,0x213117473c9e3c1bL,0x0cb0a2a77c99b6abL, + 0x5c48a3b3c2f888f2L } }, + /* 0 << 56 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 56 */ + { { 0xc7913e91991724f3L,0x5eda799c39cbd686L,0xddb595c763d4fc1eL, + 0x6b63b80bac4fed54L }, + { 0x6ea0fc697e5fb516L,0x737708bad0f1c964L,0x9628745f11a92ca5L, + 0x61f379589a86967aL } }, + /* 2 << 56 */ + { { 0x9af39b2caa665072L,0x78322fa4efd324efL,0x3d153394c327bd31L, + 0x81d5f2713129dab0L }, + { 0xc72e0c42f48027f5L,0xaa40cdbc8536e717L,0xf45a657a2d369d0fL, + 0xb03bbfc4ea7f74e6L } }, + /* 3 << 56 */ + { { 0x46a8c4180d738dedL,0x6f1a5bb0e0de5729L,0xf10230b98ba81675L, + 0x32c6f30c112b33d4L }, + { 0x7559129dd8fffb62L,0x6a281b47b459bf05L,0x77c1bd3afa3b6776L, + 0x0709b3807829973aL } }, + /* 4 << 56 */ + { { 0x8c26b232a3326505L,0x38d69272ee1d41bfL,0x0459453effe32afaL, + 0xce8143ad7cb3ea87L }, + { 0x932ec1fa7e6ab666L,0x6cd2d23022286264L,0x459a46fe6736f8edL, + 0x50bf0d009eca85bbL } }, + /* 5 << 56 */ + { { 0x0b825852877a21ecL,0x300414a70f537a94L,0x3f1cba4021a9a6a2L, + 0x50824eee76943c00L }, + { 0xa0dbfcecf83cba5dL,0xf953814893b4f3c0L,0x6174416248f24dd7L, + 0x5322d64de4fb09ddL } }, + /* 6 << 56 */ + { { 0x574473843d9325f3L,0xa9bef2d0f371cb84L,0x77d2188ba61e36c5L, + 0xbbd6a7d7c602df72L }, + { 0xba3aa9028f61bc0bL,0xf49085ed6ed0b6a1L,0x8bc625d6ae6e8298L, + 0x832b0b1da2e9c01dL } }, + /* 7 << 56 */ + { { 0xa337c447f1f0ced1L,0x800cc7939492dd2bL,0x4b93151dbea08efaL, + 0x820cf3f8de0a741eL }, + { 0xff1982dc1c0f7d13L,0xef92196084dde6caL,0x1ad7d97245f96ee3L, + 0x319c8dbe29dea0c7L } }, + /* 8 << 56 */ + { { 0xd3ea38717b82b99bL,0x75922d4d470eb624L,0x8f66ec543b95d466L, + 0x66e673ccbee1e346L }, + { 0x6afe67c4b5f2b89aL,0x3de9c1e6290e5cd3L,0x8c278bb6310a2adaL, + 0x420fa3840bdb323bL } }, + /* 9 << 56 */ + { { 0x0ae1d63b0eb919b0L,0xd74ee51da74b9620L,0x395458d0a674290cL, + 0x324c930f4620a510L }, + { 0x2d1f4d19fbac27d4L,0x4086e8ca9bedeeacL,0x0cdd211b9b679ab8L, + 0x5970167d7090fec4L } }, + /* 10 << 56 */ + { { 0x3420f2c9faf1fc63L,0x616d333a328c8bb4L,0x7d65364c57f1fe4aL, + 0x9343e87755e5c73aL }, + { 0x5795176be970e78cL,0xa36ccebf60533627L,0xfc7c738009cdfc1bL, + 0xb39a2afeb3fec326L } }, + /* 11 << 56 */ + { { 0xb7ff1ba16224408aL,0xcc856e92247cfc5eL,0x01f102e7c18bc493L, + 0x4613ab742091c727L }, + { 0xaa25e89cc420bf2bL,0x00a5317690337ec2L,0xd2be9f437d025fc7L, + 0x3316fb856e6fe3dcL } }, + /* 12 << 56 */ + { { 0x27520af59ac50814L,0xfdf95e789a8e4223L,0xb7e7df2a56bec5a0L, + 0xf7022f7ddf159e5dL }, + { 0x93eeeab1cac1fe8fL,0x8040188c37451168L,0x7ee8aa8ad967dce6L, + 0xfa0e79e73abc9299L } }, + /* 13 << 56 */ + { { 0x67332cfc2064cfd1L,0x339c31deb0651934L,0x719b28d52a3bcbeaL, + 0xee74c82b9d6ae5c6L }, + { 0x0927d05ebaf28ee6L,0x82cecf2c9d719028L,0x0b0d353eddb30289L, + 0xfe4bb977fddb2e29L } }, + /* 14 << 56 */ + { { 0xbb5bb990640bfd9eL,0xd226e27782f62108L,0x4bf0098502ffdd56L, + 0x7756758a2ca1b1b5L }, + { 0xc32b62a35285fe91L,0xedbc546a8c9cd140L,0x1e47a013af5cb008L, + 0xbca7e720073ce8f2L } }, + /* 15 << 56 */ + { { 0xe10b2ab817a91caeL,0xb89aab6508e27f63L,0x7b3074a7dba3ddf9L, + 0x1c20ce09330c2972L }, + { 0x6b9917b45fcf7e33L,0xe6793743945ceb42L,0x18fc22155c633d19L, + 0xad1adb3cc7485474L } }, + /* 16 << 56 */ + { { 0x646f96796424c49bL,0xf888dfe867c241c9L,0xe12d4b9324f68b49L, + 0x9a6b62d8a571df20L }, + { 0x81b4b26d179483cbL,0x666f96329511fae2L,0xd281b3e4d53aa51fL, + 0x7f96a7657f3dbd16L } }, + /* 17 << 56 */ + { { 0xa7f8b5bf074a30ceL,0xd7f52107005a32e6L,0x6f9e090750237ed4L, + 0x2f21da478096fa2bL }, + { 0xf3e19cb4eec863a0L,0xd18f77fd9527620aL,0x9505c81c407c1cf8L, + 0x9998db4e1b6ec284L } }, + /* 18 << 56 */ + { { 0x7e3389e5c247d44dL,0x125071413f4f3d80L,0xd4ba01104a78a6c7L, + 0x312874a0767720beL }, + { 0xded059a675944370L,0xd6123d903b2c0bddL,0xa56b717b51c108e3L, + 0x9bb7940e070623e9L } }, + /* 19 << 56 */ + { { 0x794e2d5984ac066cL,0xf5954a92e68c69a0L,0x28c524584fd99dccL, + 0x60e639fcb1012517L }, + { 0xc2e601257de79248L,0xe9ef6404f12fc6d7L,0x4c4f28082a3b5d32L, + 0x865ad32ec768eb8aL } }, + /* 20 << 56 */ + { { 0xac02331b13fb70b6L,0x037b44c195599b27L,0x1a860fc460bd082cL, + 0xa2e25745c980cd01L }, + { 0xee3387a81da0263eL,0x931bfb952d10f3d6L,0x5b687270a1f24a32L, + 0xf140e65dca494b86L } }, + /* 21 << 56 */ + { { 0x4f4ddf91b2f1ac7aL,0xf99eaabb760fee27L,0x57f4008a49c228e5L, + 0x090be4401cf713bbL }, + { 0xac91fbe45004f022L,0xd838c2c2569e1af6L,0xd6c7d20b0f1daaa5L, + 0xaa063ac11bbb02c0L } }, + /* 22 << 56 */ + { { 0x0938a42259558a78L,0x5343c6698435da2fL,0x96f67b18034410dcL, + 0x7cc1e42484510804L }, + { 0x86a1543f16dfbb7dL,0x921fa9425b5bd592L,0x9dcccb6eb33dd03cL, + 0x8581ddd9b843f51eL } }, + /* 23 << 56 */ + { { 0x54935fcb81d73c9eL,0x6d07e9790a5e97abL,0x4dc7b30acf3a6babL, + 0x147ab1f3170bee11L }, + { 0x0aaf8e3d9fafdee4L,0xfab3dbcb538a8b95L,0x405df4b36ef13871L, + 0xf1f4e9cb088d5a49L } }, + /* 24 << 56 */ + { { 0x9bcd24d366b33f1dL,0x3b97b8205ce445c0L,0xe2926549ba93ff61L, + 0xd9c341ce4dafe616L }, + { 0xfb30a76e16efb6f3L,0xdf24b8ca605b953cL,0x8bd52afec2fffb9fL, + 0xbbac5ff7e19d0b96L } }, + /* 25 << 56 */ + { { 0x43c01b87459afccdL,0x6bd45143b7432652L,0x8473453055b5d78eL, + 0x81088fdb1554ba7dL }, + { 0xada0a52c1e269375L,0xf9f037c42dc5ec10L,0xc066060794bfbc11L, + 0xc0a630bbc9c40d2fL } }, + /* 26 << 56 */ + { { 0x5efc797eab64c31eL,0xffdb1dab74507144L,0xf61242871ca6790cL, + 0xe9609d81e69bf1bfL }, + { 0xdb89859500d24fc9L,0x9c750333e51fb417L,0x51830a91fef7bbdeL, + 0x0ce67dc8945f585cL } }, + /* 27 << 56 */ + { { 0x9a730ed44763eb50L,0x24a0e221c1ab0d66L,0x643b6393648748f3L, + 0x1982daa16d3c6291L }, + { 0x6f00a9f78bbc5549L,0x7a1783e17f36384eL,0xe8346323de977f50L, + 0x91ab688db245502aL } }, + /* 28 << 56 */ + { { 0x331ab6b56d0bdd66L,0x0a6ef32e64b71229L,0x1028150efe7c352fL, + 0x27e04350ce7b39d3L }, + { 0x2a3c8acdc1070c82L,0xfb2034d380c9feefL,0x2d729621709f3729L, + 0x8df290bf62cb4549L } }, + /* 29 << 56 */ + { { 0x02f99f33fc2e4326L,0x3b30076d5eddf032L,0xbb21f8cf0c652fb5L, + 0x314fb49eed91cf7bL }, + { 0xa013eca52f700750L,0x2b9e3c23712a4575L,0xe5355557af30fbb0L, + 0x1ada35167c77e771L } }, + /* 30 << 56 */ + { { 0x45f6ecb27b135670L,0xe85d19df7cfc202eL,0x0f1b50c758d1be9fL, + 0x5ebf2c0aead2e344L }, + { 0x1531fe4eabc199c9L,0xc703259256bab0aeL,0x16ab2e486c1fec54L, + 0x0f87fda804280188L } }, + /* 31 << 56 */ + { { 0xdc9f46fc609e4a74L,0x2a44a143ba667f91L,0xbc3d8b95b4d83436L, + 0xa01e4bd0c7bd2958L }, + { 0x7b18293273483c90L,0xa79c6aa1a7c7b598L,0xbf3983c6eaaac07eL, + 0x8f18181e96e0d4e6L } }, + /* 32 << 56 */ + { { 0x8553d37c051af62bL,0xe9a998eb0bf94496L,0xe0844f9fb0d59aa1L, + 0x983fd558e6afb813L }, + { 0x9670c0ca65d69804L,0x732b22de6ea5ff2dL,0xd7640ba95fd8623bL, + 0x9f619163a6351782L } }, + /* 33 << 56 */ + { { 0x0bfc27eeacee5043L,0xae419e732eb10f02L,0x19c028d18943fb05L, + 0x71f01cf7ff13aa2aL }, + { 0x7790737e8887a132L,0x6751330966318410L,0x9819e8a37ddb795eL, + 0xfecb8ef5dad100b2L } }, + /* 34 << 56 */ + { { 0x59f74a223021926aL,0xb7c28a496f9b4c1cL,0xed1a733f912ad0abL, + 0x42a910af01a5659cL }, + { 0x3842c6e07bd68cabL,0x2b57fa3876d70ac8L,0x8a6707a83c53aaebL, + 0x62c1c51065b4db18L } }, + /* 35 << 56 */ + { { 0x8de2c1fbb2d09dc7L,0xc3dfed12266bd23bL,0x927d039bd5b27db6L, + 0x2fb2f0f1103243daL }, + { 0xf855a07b80be7399L,0xed9327ce1f9f27a8L,0xa0bd99c7729bdef7L, + 0x2b67125e28250d88L } }, + /* 36 << 56 */ + { { 0x784b26e88670ced7L,0xe3dfe41fc31bd3b4L,0x9e353a06bcc85cbcL, + 0x302e290960178a9dL }, + { 0x860abf11a6eac16eL,0x76447000aa2b3aacL,0x46ff9d19850afdabL, + 0x35bdd6a5fdb2d4c1L } }, + /* 37 << 56 */ + { { 0xe82594b07e5c9ce9L,0x0f379e5320af346eL,0x608b31e3bc65ad4aL, + 0x710c6b12267c4826L }, + { 0x51c966f971954cf1L,0xb1cec7930d0aa215L,0x1f15598986bd23a8L, + 0xae2ff99cf9452e86L } }, + /* 38 << 56 */ + { { 0xd8dd953c340ceaa2L,0x263552752e2e9333L,0x15d4e5f98586f06dL, + 0xd6bf94a8f7cab546L }, + { 0x33c59a0ab76a9af0L,0x52740ab3ba095af7L,0xc444de8a24389ca0L, + 0xcc6f9863706da0cbL } }, + /* 39 << 56 */ + { { 0xb5a741a76b2515cfL,0x71c416019585c749L,0x78350d4fe683de97L, + 0x31d6152463d0b5f5L }, + { 0x7a0cc5e1fbce090bL,0xaac927edfbcb2a5bL,0xe920de4920d84c35L, + 0x8c06a0b622b4de26L } }, + /* 40 << 56 */ + { { 0xd34dd58bafe7ddf3L,0x55851fedc1e6e55bL,0xd1395616960696e7L, + 0x940304b25f22705fL }, + { 0x6f43f861b0a2a860L,0xcf1212820e7cc981L,0x121862120ab64a96L, + 0x09215b9ab789383cL } }, + /* 41 << 56 */ + { { 0x311eb30537387c09L,0xc5832fcef03ee760L,0x30358f5832f7ea19L, + 0xe01d3c3491d53551L }, + { 0x1ca5ee41da48ea80L,0x34e71e8ecf4fa4c1L,0x312abd257af1e1c7L, + 0xe3afcdeb2153f4a5L } }, + /* 42 << 56 */ + { { 0x9d5c84d700235e9aL,0x0308d3f48c4c836fL,0xc0a66b0489332de5L, + 0x610dd39989e566efL }, + { 0xf8eea460d1ac1635L,0x84cbb3fb20a2c0dfL,0x40afb488e74a48c5L, + 0x29738198d326b150L } }, + /* 43 << 56 */ + { { 0x2a17747fa6d74081L,0x60ea4c0555a26214L,0x53514bb41f88c5feL, + 0xedd645677e83426cL }, + { 0xd5d6cbec96460b25L,0xa12fd0ce68dc115eL,0xc5bc3ed2697840eaL, + 0x969876a8a6331e31L } }, + /* 44 << 56 */ + { { 0x60c36217472ff580L,0xf42297054ad41393L,0x4bd99ef0a03b8b92L, + 0x501c7317c144f4f6L }, + { 0x159009b318464945L,0x6d5e594c74c5c6beL,0x2d587011321a3660L, + 0xd1e184b13898d022L } }, + /* 45 << 56 */ + { { 0x5ba047524c6a7e04L,0x47fa1e2b45550b65L,0x9419daf048c0a9a5L, + 0x663629537c243236L }, + { 0xcd0744b15cb12a88L,0x561b6f9a2b646188L,0x599415a566c2c0c0L, + 0xbe3f08590f83f09aL } }, + /* 46 << 56 */ + { { 0x9141c5beb92041b8L,0x01ae38c726477d0dL,0xca8b71f3d12c7a94L, + 0xfab5b31f765c70dbL }, + { 0x76ae7492487443e9L,0x8595a310990d1349L,0xf8dbeda87d460a37L, + 0x7f7ad0821e45a38fL } }, + /* 47 << 56 */ + { { 0xed1d4db61059705aL,0xa3dd492ae6b9c697L,0x4b92ee3a6eb38bd5L, + 0xbab2609d67cc0bb7L }, + { 0x7fc4fe896e70ee82L,0xeff2c56e13e6b7e3L,0x9b18959e34d26fcaL, + 0x2517ab66889d6b45L } }, + /* 48 << 56 */ + { { 0xf167b4e0bdefdd4fL,0x69958465f366e401L,0x5aa368aba73bbec0L, + 0x121487097b240c21L }, + { 0x378c323318969006L,0xcb4d73cee1fe53d1L,0x5f50a80e130c4361L, + 0xd67f59517ef5212bL } }, + /* 49 << 56 */ + { { 0xf145e21e9e70c72eL,0xb2e52e295566d2fbL,0x44eaba4a032397f5L, + 0x5e56937b7e31a7deL }, + { 0x68dcf517456c61e1L,0xbc2e954aa8b0a388L,0xe3552fa760a8b755L, + 0x03442dae73ad0cdeL } }, + /* 50 << 56 */ + { { 0x37ffe747ceb26210L,0x983545e8787baef9L,0x8b8c853586a3de31L, + 0xc621dbcbfacd46dbL }, + { 0x82e442e959266fbbL,0xa3514c37339d471cL,0x3a11b77162cdad96L, + 0xf0cb3b3cecf9bdf0L } }, + /* 51 << 56 */ + { { 0x3fcbdbce478e2135L,0x7547b5cfbda35342L,0xa97e81f18a677af6L, + 0xc8c2bf8328817987L }, + { 0xdf07eaaf45580985L,0xc68d1f05c93b45cbL,0x106aa2fec77b4cacL, + 0x4c1d8afc04a7ae86L } }, + /* 52 << 56 */ + { { 0xdb41c3fd9eb45ab2L,0x5b234b5bd4b22e74L,0xda253decf215958aL, + 0x67e0606ea04edfa0L }, + { 0xabbbf070ef751b11L,0xf352f175f6f06dceL,0xdfc4b6af6839f6b4L, + 0x53ddf9a89959848eL } }, + /* 53 << 56 */ + { { 0xda49c379c21520b0L,0x90864ff0dbd5d1b6L,0x2f055d235f49c7f7L, + 0xe51e4e6aa796b2d8L }, + { 0xc361a67f5c9dc340L,0x5ad53c37bca7c620L,0xda1d658832c756d0L, + 0xad60d9118bb67e13L } }, + /* 54 << 56 */ + { { 0xd6c47bdf0eeec8c6L,0x4a27fec1078a1821L,0x081f7415c3099524L, + 0x8effdf0b82cd8060L }, + { 0xdb70ec1c65842df8L,0x8821b358d319a901L,0x72ee56eede42b529L, + 0x5bb39592236e4286L } }, + /* 55 << 56 */ + { { 0xd1183316fd6f7140L,0xf9fadb5bbd8e81f7L,0x701d5e0c5a02d962L, + 0xfdee4dbf1b601324L }, + { 0xbed1740735d7620eL,0x04e3c2c3f48c0012L,0x9ee29da73455449aL, + 0x562cdef491a836c4L } }, + /* 56 << 56 */ + { { 0x8f682a5f47701097L,0x617125d8ff88d0c2L,0x948fda2457bb86ddL, + 0x348abb8f289f7286L }, + { 0xeb10eab599d94bbdL,0xd51ba28e4684d160L,0xabe0e51c30c8f41aL, + 0x66588b4513254f4aL } }, + /* 57 << 56 */ + { { 0x147ebf01fad097a5L,0x49883ea8610e815dL,0xe44d60ba8a11de56L, + 0xa970de6e827a7a6dL }, + { 0x2be414245e17fc19L,0xd833c65701214057L,0x1375813b363e723fL, + 0x6820bb88e6a52e9bL } }, + /* 58 << 56 */ + { { 0x7e7f6970d875d56aL,0xd6a0a9ac51fbf6bfL,0x54ba8790a3083c12L, + 0xebaeb23d6ae7eb64L }, + { 0xa8685c3ab99a907aL,0xf1e74550026bf40bL,0x7b73a027c802cd9eL, + 0x9a8a927c4fef4635L } }, + /* 59 << 56 */ + { { 0xe1b6f60c08191224L,0xc4126ebbde4ec091L,0xe1dff4dc4ae38d84L, + 0xde3f57db4f2ef985L }, + { 0x34964337d446a1ddL,0x7bf217a0859e77f6L,0x8ff105278e1d13f5L, + 0xa304ef0374eeae27L } }, + /* 60 << 56 */ + { { 0xfc6f5e47d19dfa5aL,0xdb007de37fad982bL,0x28205ad1613715f5L, + 0x251e67297889529eL }, + { 0x727051841ae98e78L,0xf818537d271cac32L,0xc8a15b7eb7f410f5L, + 0xc474356f81f62393L } }, + /* 61 << 56 */ + { { 0x92dbdc5ac242316bL,0xabe060acdbf4aff5L,0x6e8c38fe909a8ec6L, + 0x43e514e56116cb94L }, + { 0x2078fa3807d784f9L,0x1161a880f4b5b357L,0x5283ce7913adea3dL, + 0x0756c3e6cc6a910bL } }, + /* 62 << 56 */ + { { 0x60bcfe01aaa79697L,0x04a73b2956391db1L,0xdd8dad47189b45a0L, + 0xbfac0dd048d5b8d9L }, + { 0x34ab3af57d3d2ec2L,0x6fa2fc2d207bd3afL,0x9ff4009266550dedL, + 0x719b3e871fd5b913L } }, + /* 63 << 56 */ + { { 0xa573a4966d17fbc7L,0x0cd1a70a73d2b24eL,0x34e2c5cab2676937L, + 0xe7050b06bf669f21L }, + { 0xfbe948b61ede9046L,0xa053005197662659L,0x58cbd4edf10124c5L, + 0xde2646e4dd6c06c8L } }, + /* 64 << 56 */ + { { 0x332f81088cad38c0L,0x471b7e906bd68ae2L,0x56ac3fb20d8e27a3L, + 0xb54660db136b4b0dL }, + { 0x123a1e11a6fd8de4L,0x44dbffeaa37799efL,0x4540b977ce6ac17cL, + 0x495173a8af60acefL } }, + /* 0 << 63 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 63 */ + { { 0x9ebb284d391c2a82L,0xbcdd4863158308e8L,0x006f16ec83f1edcaL, + 0xa13e2c37695dc6c8L }, + { 0x2ab756f04a057a87L,0xa8765500a6b48f98L,0x4252face68651c44L, + 0xa52b540be1765e02L } }, + /* 2 << 63 */ + { { 0x4f922fc516a0d2bbL,0x0d5cc16c1a623499L,0x9241cf3a57c62c8bL, + 0x2f5e6961fd1b667fL }, + { 0x5c15c70bf5a01797L,0x3d20b44d60956192L,0x04911b37071fdb52L, + 0xf648f9168d6f0f7bL } }, + /* 3 << 63 */ + { { 0x6dc1acafe60b7cf7L,0x25860a5084a9d869L,0x56fc6f09e7ba8ac4L, + 0x828c5bd06148d29eL }, + { 0xac6b435edc55ae5fL,0xa527f56cc0117411L,0x94d5045efd24342cL, + 0x2c4c0a3570b67c0dL } }, + /* 4 << 63 */ + { { 0x027cc8b8fac61d9aL,0x7d25e062e3c6fe8aL,0xe08805bfe5bff503L, + 0x13271e6c6ff632f7L }, + { 0x55dca6c0232f76a5L,0x8957c32d701ef426L,0xee728bcba10a5178L, + 0x5ea60411b62c5173L } }, + /* 5 << 63 */ + { { 0xfc4e964ed0b8892bL,0x9ea176839301bb74L,0x6265c5aefcc48626L, + 0xe60cf82ebb3e9102L }, + { 0x57adf797d4df5531L,0x235b59a18deeefe2L,0x60adcf583f306eb1L, + 0x105c27533d09492dL } }, + /* 6 << 63 */ + { { 0x4090914bb5def996L,0x1cb69c83233dd1e7L,0xc1e9c1d39b3d5e76L, + 0x1f3338edfccf6012L }, + { 0xb1e95d0d2f5378a8L,0xacf4c2c72f00cd21L,0x6e984240eb5fe290L, + 0xd66c038d248088aeL } }, + /* 7 << 63 */ + { { 0x804d264af94d70cfL,0xbdb802ef7314bf7eL,0x8fb54de24333ed02L, + 0x740461e0285635d9L }, + { 0x4113b2c8365e9383L,0xea762c833fdef652L,0x4eec6e2e47b956c1L, + 0xa3d814be65620fa4L } }, + /* 8 << 63 */ + { { 0x9ad5462bb4d8bc50L,0x181c0b16a9195770L,0xebd4fe1c78412a68L, + 0xae0341bcc0dff48cL }, + { 0xb6bc45cf7003e866L,0xf11a6dea8a24a41bL,0x5407151ad04c24c2L, + 0x62c9d27dda5b7b68L } }, + /* 9 << 63 */ + { { 0x2e96423588cceff6L,0x8594c54f8b07ed69L,0x1578e73cc84d0d0dL, + 0x7b4e1055ff532868L }, + { 0xa348c0d5b5ec995aL,0xbf4b9d5514289a54L,0x9ba155a658fbd777L, + 0x186ed7a81a84491dL } }, + /* 10 << 63 */ + { { 0xd4992b30614c0900L,0xda98d121bd00c24bL,0x7f534dc87ec4bfa1L, + 0x4a5ff67437dc34bcL }, + { 0x68c196b81d7ea1d7L,0x38cf289380a6d208L,0xfd56cd09e3cbbd6eL, + 0xec72e27e4205a5b6L } }, + /* 11 << 63 */ + { { 0x15ea68f5a44f77f7L,0x7aa5f9fdb43c52bcL,0x86ff676f94f0e609L, + 0xa4cde9632e2d432bL }, + { 0x8cafa0c0eee470afL,0x84137d0e8a3f5ec8L,0xebb40411faa31231L, + 0xa239c13f6f7f7ccfL } }, + /* 12 << 63 */ + { { 0x32865719a8afd30bL,0x867983288a826dceL,0xdf04e891c4a8fbe0L, + 0xbb6b6e1bebf56ad3L }, + { 0x0a695b11471f1ff0L,0xd76c3389be15baf0L,0x018edb95be96c43eL, + 0xf2beaaf490794158L } }, + /* 13 << 63 */ + { { 0x152db09ec3076a27L,0x5e82908ee416545dL,0xa2c41272356d6f2eL, + 0xdc9c964231fd74e1L }, + { 0x66ceb88d519bf615L,0xe29ecd7605a2274eL,0x3a0473c4bf5e2fa0L, + 0x6b6eb67164284e67L } }, + /* 14 << 63 */ + { { 0xe8b97932b88756ddL,0xed4e8652f17e3e61L,0xc2dd14993ee1c4a4L, + 0xc0aaee17597f8c0eL }, + { 0x15c4edb96c168af3L,0x6563c7bfb39ae875L,0xadfadb6f20adb436L, + 0xad55e8c99a042ac0L } }, + /* 15 << 63 */ + { { 0x975a1ed8b76da1f5L,0x10dfa466a58acb94L,0x8dd7f7e3ac060282L, + 0x6813e66a572a051eL }, + { 0xb4ccae1e350cb901L,0xb653d65650cb7822L,0x42484710dfab3b87L, + 0xcd7ee5379b670fd0L } }, + /* 16 << 63 */ + { { 0x0a50b12e523b8bf6L,0x8009eb5b8f910c1bL,0xf535af824a167588L, + 0x0f835f9cfb2a2abdL }, + { 0xf59b29312afceb62L,0xc797df2a169d383fL,0xeb3f5fb066ac02b0L, + 0x029d4c6fdaa2d0caL } }, + /* 17 << 63 */ + { { 0xd4059bc1afab4bc5L,0x833f5c6f56783247L,0xb53466308d2d3605L, + 0x83387891d34d8433L }, + { 0xd973b30fadd9419aL,0xbcca1099afe3fce8L,0x081783150809aac6L, + 0x01b7f21a540f0f11L } }, + /* 18 << 63 */ + { { 0x65c29219909523c8L,0xa62f648fa3a1c741L,0x88598d4f60c9e55aL, + 0xbce9141b0e4f347aL }, + { 0x9af97d8435f9b988L,0x0210da62320475b6L,0x3c076e229191476cL, + 0x7520dbd944fc7834L } }, + /* 19 << 63 */ + { { 0x6a6b2cfec1ab1bbdL,0xef8a65bedc650938L,0x72855540805d7bc4L, + 0xda389396ed11fdfdL }, + { 0xa9d5bd3674660876L,0x11d67c54b45dff35L,0x6af7d148a4f5da94L, + 0xbb8d4c3fc0bbeb31L } }, + /* 20 << 63 */ + { { 0x87a7ebd1e0a1b12aL,0x1e4ef88d770ba95fL,0x8c33345cdc2ae9cbL, + 0xcecf127601cc8403L }, + { 0x687c012e1b39b80fL,0xfd90d0ad35c33ba4L,0xa3ef5a675c9661c2L, + 0x368fc88ee017429eL } }, + /* 21 << 63 */ + { { 0xd30c6761196a2fa2L,0x931b9817bd5b312eL,0xba01000c72f54a31L, + 0xa203d2c866eaa541L }, + { 0xf2abdee098939db3L,0xe37d6c2c3e606c02L,0xf2921574521ff643L, + 0x2781b3c4d7e2fca3L } }, + /* 22 << 63 */ + { { 0x664300b07850ec06L,0xac5a38b97d3a10cfL,0x9233188de34ab39dL, + 0xe77057e45072cbb9L }, + { 0xbcf0c042b59e78dfL,0x4cfc91e81d97de52L,0x4661a26c3ee0ca4aL, + 0x5620a4c1fb8507bcL } }, + /* 23 << 63 */ + { { 0x4b44d4aa049f842cL,0xceabc5d51540e82bL,0x306710fd15c6f156L, + 0xbe5ae52b63db1d72L }, + { 0x06f1e7e6334957f1L,0x57e388f031144a70L,0xfb69bb2fdf96447bL, + 0x0f78ebd373e38a12L } }, + /* 24 << 63 */ + { { 0xb82226052b7ce542L,0xe6d4ce997472bde1L,0x53e16ebe09d2f4daL, + 0x180ff42e53b92b2eL }, + { 0xc59bcc022c34a1c6L,0x3803d6f9422c46c2L,0x18aff74f5c14a8a2L, + 0x55aebf8010a08b28L } }, + /* 25 << 63 */ + { { 0x66097d587135593fL,0x32e6eff72be570cdL,0x584e6a102a8c860dL, + 0xcd185890a2eb4163L }, + { 0x7ceae99d6d97e134L,0xd42c6b70dd8447ceL,0x59ddbb4ab8c50273L, + 0x03c612df3cf34e1eL } }, + /* 26 << 63 */ + { { 0x84b9ca1504b6c5a0L,0x35216f3918f0e3a3L,0x3ec2d2bcbd986c00L, + 0x8bf546d9d19228feL }, + { 0xd1c655a44cd623c3L,0x366ce718502b8e5aL,0x2cfc84b4eea0bfe7L, + 0xe01d5ceecf443e8eL } }, + /* 27 << 63 */ + { { 0x8ec045d9036520f8L,0xdfb3c3d192d40e98L,0x0bac4ccecc559a04L, + 0x35eccae5240ea6b1L }, + { 0x180b32dbf8a5a0acL,0x547972a5eb699700L,0xa3765801ca26bca0L, + 0x57e09d0ea647f25aL } }, + /* 28 << 63 */ + { { 0xb956970e2fdd23ccL,0xb80288bc5682e971L,0xe6e6d91e9ae86ebcL, + 0x0564c83f8c9f1939L }, + { 0x551932a239560368L,0xe893752b049c28e2L,0x0b03cee5a6a158c3L, + 0xe12d656b04964263L } }, + /* 29 << 63 */ + { { 0x4b47554e63e3bc1dL,0xc719b6a245044ff7L,0x4f24d30ae48daa07L, + 0xa3f37556c8c1edc3L }, + { 0x9a47bf760700d360L,0xbb1a1824822ae4e2L,0x22e275a389f1fb4cL, + 0x72b1aa239968c5f5L } }, + /* 30 << 63 */ + { { 0xa75feacabe063f64L,0x9b392f43bce47a09L,0xd42415091ad07acaL, + 0x4b0c591b8d26cd0fL }, + { 0x2d42ddfd92f1169aL,0x63aeb1ac4cbf2392L,0x1de9e8770691a2afL, + 0xebe79af7d98021daL } }, + /* 31 << 63 */ + { { 0xcfdf2a4e40e50acfL,0xf0a98ad7af01d665L,0xefb640bf1831be1fL, + 0x6fe8bd2f80e9ada0L }, + { 0x94c103a16cafbc91L,0x170f87598308e08cL,0x5de2d2ab9780ff4fL, + 0x666466bc45b201f2L } }, + /* 32 << 63 */ + { { 0x58af2010f5b343bcL,0x0f2e400af2f142feL,0x3483bfdea85f4bdfL, + 0xf0b1d09303bfeaa9L }, + { 0x2ea01b95c7081603L,0xe943e4c93dba1097L,0x47be92adb438f3a6L, + 0x00bb7742e5bf6636L } }, + /* 33 << 63 */ + { { 0x136b7083824297b4L,0x9d0e55805584455fL,0xab48cedcf1c7d69eL, + 0x53a9e4812a256e76L }, + { 0x0402b0e065eb2413L,0xdadbbb848fc407a7L,0xa65cd5a48d7f5492L, + 0x21d4429374bae294L } }, + /* 34 << 63 */ + { { 0x66917ce63b5f1cc4L,0x37ae52eace872e62L,0xbb087b722905f244L, + 0x120770861e6af74fL }, + { 0x4b644e491058edeaL,0x827510e3b638ca1dL,0x8cf2b7046038591cL, + 0xffc8b47afe635063L } }, + /* 35 << 63 */ + { { 0x3ae220e61b4d5e63L,0xbd8647429d961b4bL,0x610c107e9bd16bedL, + 0x4270352a1127147bL }, + { 0x7d17ffe664cfc50eL,0x50dee01a1e36cb42L,0x068a762235dc5f9aL, + 0x9a08d536df53f62cL } }, + /* 36 << 63 */ + { { 0x4ed714576be5f7deL,0xd93006f8c2263c9eL,0xe073694ccacacb36L, + 0x2ff7a5b43ae118abL }, + { 0x3cce53f1cd871236L,0xf156a39dc2aa6d52L,0x9cc5f271b198d76dL, + 0xbc615b6f81383d39L } }, + /* 37 << 63 */ + { { 0xa54538e8de3eee6bL,0x58c77538ab910d91L,0x31e5bdbc58d278bdL, + 0x3cde4adfb963acaeL }, + { 0xb1881fd25302169cL,0x8ca60fa0a989ed8bL,0xa1999458ff96a0eeL, + 0xc1141f03ac6c283dL } }, + /* 38 << 63 */ + { { 0x7677408d6dfafed3L,0x33a0165339661588L,0x3c9c15ec0b726fa0L, + 0x090cfd936c9b56daL }, + { 0xe34f4baea3c40af5L,0x3469eadbd21129f1L,0xcc51674a1e207ce8L, + 0x1e293b24c83b1ef9L } }, + /* 39 << 63 */ + { { 0x17173d131e6c0bb4L,0x1900469590776d35L,0xe7980e346de6f922L, + 0x873554cbf4dd9a22L }, + { 0x0316c627cbf18a51L,0x4d93651b3032c081L,0x207f27713946834dL, + 0x2c08d7b430cdbf80L } }, + /* 40 << 63 */ + { { 0x137a4fb486df2a61L,0xa1ed9c07ecf7b4a2L,0xb2e460e27bd042ffL, + 0xb7f5e2fa5f62f5ecL }, + { 0x7aa6ec6bcc2423b7L,0x75ce0a7fba63eea7L,0x67a45fb1f250a6e1L, + 0x93bc919ce53cdc9fL } }, + /* 41 << 63 */ + { { 0x9271f56f871942dfL,0x2372ff6f7859ad66L,0x5f4c2b9633cb1a78L, + 0xe3e291015838aa83L }, + { 0xa7ed1611e4e8110cL,0x2a2d70d5330198ceL,0xbdf132e86720efe0L, + 0xe61a896266a471bfL } }, + /* 42 << 63 */ + { { 0x796d3a85825808bdL,0x51dc3cb73fd6e902L,0x643c768a916219d1L, + 0x36cd7685a2ad7d32L }, + { 0xe3db9d05b22922a4L,0x6494c87edba29660L,0xf0ac91dfbcd2ebc7L, + 0x4deb57a045107f8dL } }, + /* 43 << 63 */ + { { 0x42271f59c3d12a73L,0x5f71687ca5c2c51dL,0xcb1f50c605797bcbL, + 0x29ed0ed9d6d34eb0L }, + { 0xe5fe5b474683c2ebL,0x4956eeb597447c46L,0x5b163a4371207167L, + 0x93fa2fed0248c5efL } }, + /* 44 << 63 */ + { { 0x67930af231f63950L,0xa77797c114caa2c9L,0x526e80ee27ac7e62L, + 0xe1e6e62658b28aecL }, + { 0x636178b0b3c9fef0L,0xaf7752e06d5f90beL,0x94ecaf18eece51cfL, + 0x2864d0edca806e1fL } }, + /* 45 << 63 */ + { { 0x6de2e38397c69134L,0x5a42c316eb291293L,0xc77792196a60bae0L, + 0xa24de3466b7599d1L }, + { 0x49d374aab75d4941L,0x989005862d501ff0L,0x9f16d40eeb7974cfL, + 0x1033860bcdd8c115L } }, + /* 46 << 63 */ + { { 0xb6c69ac82094cec3L,0x9976fb88403b770cL,0x1dea026c4859590dL, + 0xb6acbb468562d1fdL }, + { 0x7cd6c46144569d85L,0xc3190a3697f0891dL,0xc6f5319548d5a17dL, + 0x7d919966d749abc8L } }, + /* 47 << 63 */ + { { 0x65104837dd1c8a20L,0x7e5410c82f683419L,0x958c3ca8be94022eL, + 0x605c31976145dac2L }, + { 0x3fc0750101683d54L,0x1d7127c5595b1234L,0x10b8f87c9481277fL, + 0x677db2a8e65a1adbL } }, + /* 48 << 63 */ + { { 0xec2fccaaddce3345L,0x2a6811b7012a4350L,0x96760ff1ac598bdcL, + 0x054d652ad1bf4128L }, + { 0x0a1151d492a21005L,0xad7f397133110fdfL,0x8c95928c1960100fL, + 0x6c91c8257bf03362L } }, + /* 49 << 63 */ + { { 0xc8c8b2a2ce309f06L,0xfdb27b59ca27204bL,0xd223eaa50848e32eL, + 0xb93e4b2ee7bfaf1eL }, + { 0xc5308ae644aa3dedL,0x317a666ac015d573L,0xc888ce231a979707L, + 0xf141c1e60d5c4958L } }, + /* 50 << 63 */ + { { 0xb53b7de561906373L,0x858dbadeeb999595L,0x8cbb47b2a59e5c36L, + 0x660318b3dcf4e842L }, + { 0xbd161ccd12ba4b7aL,0xf399daabf8c8282aL,0x1587633aeeb2130dL, + 0xa465311ada38dd7dL } }, + /* 51 << 63 */ + { { 0x5f75eec864d3779bL,0x3c5d0476ad64c171L,0x874103712a914428L, + 0x8096a89190e2fc29L }, + { 0xd3d2ae9d23b3ebc2L,0x90bdd6dba580cfd6L,0x52dbb7f3c5b01f6cL, + 0xe68eded4e102a2dcL } }, + /* 52 << 63 */ + { { 0x17785b7799eb6df0L,0x26c3cc517386b779L,0x345ed9886417a48eL, + 0xe990b4e407d6ef31L }, + { 0x0f456b7e2586abbaL,0x239ca6a559c96e9aL,0xe327459ce2eb4206L, + 0x3a4c3313a002b90aL } }, + /* 53 << 63 */ + { { 0x2a114806f6a3f6fbL,0xad5cad2f85c251ddL,0x92c1f613f5a784d3L, + 0xec7bfacf349766d5L }, + { 0x04b3cd333e23cb3bL,0x3979fe84c5a64b2dL,0x192e27207e589106L, + 0xa60c43d1a15b527fL } }, + /* 54 << 63 */ + { { 0x2dae9082be7cf3a6L,0xcc86ba92bc967274L,0xf28a2ce8aea0a8a9L, + 0x404ca6d96ee988b3L }, + { 0xfd7e9c5d005921b8L,0xf56297f144e79bf9L,0xa163b4600d75ddc2L, + 0x30b23616a1f2be87L } }, + /* 55 << 63 */ + { { 0x4b070d21bfe50e2bL,0x7ef8cfd0e1bfede1L,0xadba00112aac4ae0L, + 0x2a3e7d01b9ebd033L }, + { 0x995277ece38d9d1cL,0xb500249e9c5d2de3L,0x8912b820f13ca8c9L, + 0xc8798114877793afL } }, + /* 56 << 63 */ + { { 0x19e6125dec3f1decL,0x07b1f040911178daL,0xd93ededa904a6738L, + 0x55187a5a0bebedcdL }, + { 0xf7d04722eb329d41L,0xf449099ef170b391L,0xfd317a69ca99f828L, + 0x50c3db2b34a4976dL } }, + /* 57 << 63 */ + { { 0xe9ba77843757b392L,0x326caefdaa3ca05aL,0x78e5293bf1e593d4L, + 0x7842a9370d98fd13L }, + { 0xe694bf965f96b10dL,0x373a9df606a8cd05L,0x997d1e51e8f0c7fcL, + 0x1d01979063fd972eL } }, + /* 58 << 63 */ + { { 0x0064d8585499fb32L,0x7b67bad977a8aeb7L,0x1d3eb9772d08eec5L, + 0x5fc047a6cbabae1dL }, + { 0x0577d159e54a64bbL,0x8862201bc43497e4L,0xad6b4e282ce0608dL, + 0x8b687b7d0b167aacL } }, + /* 59 << 63 */ + { { 0x6ed4d3678b2ecfa9L,0x24dfe62da90c3c38L,0xa1862e103fe5c42bL, + 0x1ca73dcad5732a9fL }, + { 0x35f038b776bb87adL,0x674976abf242b81fL,0x4f2bde7eb0fd90cdL, + 0x6efc172ea7fdf092L } }, + /* 60 << 63 */ + { { 0x3806b69b92222f1fL,0x5a2459ca6cf7ae70L,0x6789f69ca85217eeL, + 0x5f232b5ee3dc85acL }, + { 0x660e3ec548e9e516L,0x124b4e473197eb31L,0x10a0cb13aafcca23L, + 0x7bd63ba48213224fL } }, + /* 61 << 63 */ + { { 0xaffad7cc290a7f4fL,0x6b409c9e0286b461L,0x58ab809fffa407afL, + 0xc3122eedc68ac073L }, + { 0x17bf9e504ef24d7eL,0x5d9297943e2a5811L,0x519bc86702902e01L, + 0x76bba5da39c8a851L } }, + /* 62 << 63 */ + { { 0xe9f9669cda94951eL,0x4b6af58d66b8d418L,0xfa32107417d426a4L, + 0xc78e66a99dde6027L }, + { 0x0516c0834a53b964L,0xfc659d38ff602330L,0x0ab55e5c58c5c897L, + 0x985099b2838bc5dfL } }, + /* 63 << 63 */ + { { 0x061d9efcc52fc238L,0x712b27286ac1da3fL,0xfb6581499283fe08L, + 0x4954ac94b8aaa2f7L }, + { 0x85c0ada47fb2e74fL,0xee8ba98eb89926b0L,0xe4f9d37d23d1af5bL, + 0x14ccdbf9ba9b015eL } }, + /* 64 << 63 */ + { { 0xb674481b7bfe7178L,0x4e1debae65405868L,0x061b2821c48c867dL, + 0x69c15b35513b30eaL }, + { 0x3b4a166636871088L,0xe5e29f5d1220b1ffL,0x4b82bb35233d9f4dL, + 0x4e07633318cdc675L } }, + /* 0 << 70 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 70 */ + { { 0x0d53f5c7a3e6fcedL,0xe8cbbdd5f45fbdebL,0xf85c01df13339a70L, + 0x0ff71880142ceb81L }, + { 0x4c4e8774bd70437aL,0x5fb32891ba0bda6aL,0x1cdbebd2f18bd26eL, + 0x2f9526f103a9d522L } }, + /* 2 << 70 */ + { { 0x40ce305192c4d684L,0x8b04d7257612efcdL,0xb9dcda366f9cae20L, + 0x0edc4d24f058856cL }, + { 0x64f2e6bf85427900L,0x3de81295dc09dfeaL,0xd41b4487379bf26cL, + 0x50b62c6d6df135a9L } }, + /* 3 << 70 */ + { { 0xd4f8e3b4c72dfe67L,0xc416b0f690e19fdfL,0x18b9098d4c13bd35L, + 0xac11118a15b8cb9eL }, + { 0xf598a318f0062841L,0xbfe0602f89f356f4L,0x7ae3637e30177a0cL, + 0x3409774761136537L } }, + /* 4 << 70 */ + { { 0x0db2fb5ed005832aL,0x5f5efd3b91042e4fL,0x8c4ffdc6ed70f8caL, + 0xe4645d0bb52da9ccL }, + { 0x9596f58bc9001d1fL,0x52c8f0bc4e117205L,0xfd4aa0d2e398a084L, + 0x815bfe3a104f49deL } }, + /* 5 << 70 */ + { { 0x97e5443f23885e5fL,0xf72f8f99e8433aabL,0xbd00b154e4d4e604L, + 0xd0b35e6ae5e173ffL }, + { 0x57b2a0489164722dL,0x3e3c665b88761ec8L,0x6bdd13973da83832L, + 0x3c8b1a1e73dafe3bL } }, + /* 6 << 70 */ + { { 0x4497ace654317cacL,0xbe600ab9521771b3L,0xb42e409eb0dfe8b8L, + 0x386a67d73942310fL }, + { 0x25548d8d4431cc28L,0xa7cff142985dc524L,0x4d60f5a193c4be32L, + 0x83ebd5c8d071c6e1L } }, + /* 7 << 70 */ + { { 0xba3a80a7b1fd2b0bL,0x9b3ad3965bec33e8L,0xb3868d6179743fb3L, + 0xcfd169fcfdb462faL }, + { 0xd3b499d79ce0a6afL,0x55dc1cf1e42d3ff8L,0x04fb9e6cc6c3e1b2L, + 0x47e6961d6f69a474L } }, + /* 8 << 70 */ + { { 0x54eb3acce548b37bL,0xb38e754284d40549L,0x8c3daa517b341b4fL, + 0x2f6928ec690bf7faL }, + { 0x0496b32386ce6c41L,0x01be1c5510adadcdL,0xc04e67e74bb5faf9L, + 0x3cbaf678e15c9985L } }, + /* 9 << 70 */ + { { 0x8cd1214550ca4247L,0xba1aa47ae7dd30aaL,0x2f81ddf1e58fee24L, + 0x03452936eec9b0e8L }, + { 0x8bdc3b81243aea96L,0x9a2919af15c3d0e5L,0x9ea640ec10948361L, + 0x5ac86d5b6e0bcccfL } }, + /* 10 << 70 */ + { { 0xf892d918c36cf440L,0xaed3e837c939719cL,0xb07b08d2c0218b64L, + 0x6f1bcbbace9790ddL }, + { 0x4a84d6ed60919b8eL,0xd89007918ac1f9ebL,0xf84941aa0dd5daefL, + 0xb22fe40a67fd62c5L } }, + /* 11 << 70 */ + { { 0x97e15ba2157f2db3L,0xbda2fc8f8e28ca9cL,0x5d050da437b9f454L, + 0x3d57eb572379d72eL }, + { 0xe9b5eba2fb5ee997L,0x01648ca2e11538caL,0x32bb76f6f6327974L, + 0x338f14b8ff3f4bb7L } }, + /* 12 << 70 */ + { { 0x524d226ad7ab9a2dL,0x9c00090d7dfae958L,0x0ba5f5398751d8c2L, + 0x8afcbcdd3ab8262dL }, + { 0x57392729e99d043bL,0xef51263baebc943aL,0x9feace9320862935L, + 0x639efc03b06c817bL } }, + /* 13 << 70 */ + { { 0x1fe054b366b4be7aL,0x3f25a9de84a37a1eL,0xf39ef1ad78d75cd9L, + 0xd7b58f495062c1b5L }, + { 0x6f74f9a9ff563436L,0xf718ff29e8af51e7L,0x5234d31315e97fecL, + 0xb6a8e2b1292f1c0aL } }, + /* 14 << 70 */ + { { 0xa7f53aa8327720c1L,0x956ca322ba092cc8L,0x8f03d64a28746c4dL, + 0x51fe178266d0d392L }, + { 0xd19b34db3c832c80L,0x60dccc5c6da2e3b4L,0x245dd62e0a104cccL, + 0xa7ab1de1620b21fdL } }, + /* 15 << 70 */ + { { 0xb293ae0b3893d123L,0xf7b75783b15ee71cL,0x5aa3c61442a9468bL, + 0xd686123cdb15d744L }, + { 0x8c616891a7ab4116L,0x6fcd72c8a4e6a459L,0xac21911077e5fad7L, + 0xfb6a20e7704fa46bL } }, + /* 16 << 70 */ + { { 0xe839be7d341d81dcL,0xcddb688932148379L,0xda6211a1f7026eadL, + 0xf3b2575ff4d1cc5eL }, + { 0x40cfc8f6a7a73ae6L,0x83879a5e61d5b483L,0xc5acb1ed41a50ebcL, + 0x59a60cc83c07d8faL } }, + /* 17 << 70 */ + { { 0x1b73bdceb1876262L,0x2b0d79f012af4ee9L,0x8bcf3b0bd46e1d07L, + 0x17d6af9de45d152fL }, + { 0x735204616d736451L,0x43cbbd9756b0bf5aL,0xb0833a5bd5999b9dL, + 0x702614f0eb72e398L } }, + /* 18 << 70 */ + { { 0x0aadf01a59c3e9f8L,0x40200e77ce6b3d16L,0xda22bdd3deddafadL, + 0x76dedaf4310d72e1L }, + { 0x49ef807c4bc2e88fL,0x6ba81291146dd5a5L,0xa1a4077a7d8d59e9L, + 0x87b6a2e7802db349L } }, + /* 19 << 70 */ + { { 0xd56799971b4e598eL,0xf499ef1f06fe4b1dL,0x3978d3aefcb267c5L, + 0xb582b557235786d0L }, + { 0x32b3b2ca1715cb07L,0x4c3de6a28480241dL,0x63b5ffedcb571ecdL, + 0xeaf53900ed2fe9a9L } }, + /* 20 << 70 */ + { { 0xdec98d4ac3b81990L,0x1cb837229e0cc8feL,0xfe0b0491d2b427b9L, + 0x0f2386ace983a66cL }, + { 0x930c4d1eb3291213L,0xa2f82b2e59a62ae4L,0x77233853f93e89e3L, + 0x7f8063ac11777c7fL } }, + /* 21 << 70 */ + { { 0xff0eb56759ad2877L,0x6f4546429865c754L,0xe6fe701a236e9a84L, + 0xc586ef1606e40fc3L }, + { 0x3f62b6e024bafad9L,0xc8b42bd264da906aL,0xc98e1eb4da3276a0L, + 0x30d0e5fc06cbf852L } }, + /* 22 << 70 */ + { { 0x1b6b2ae1e8b4dfd4L,0xd754d5c78301cbacL,0x66097629112a39acL, + 0xf86b599993ba4ab9L }, + { 0x26c9dea799f9d581L,0x0473b1a8c2fafeaaL,0x1469af553b2505a5L, + 0x227d16d7d6a43323L } }, + /* 23 << 70 */ + { { 0x3316f73cad3d97f9L,0x52bf3bb51f137455L,0x953eafeb09954e7cL, + 0xa721dfeddd732411L }, + { 0xb4929821141d4579L,0x3411321caa3bd435L,0xafb355aa17fa6015L, + 0xb4e7ef4a18e42f0eL } }, + /* 24 << 70 */ + { { 0x604ac97c59371000L,0xe1c48c707f759c18L,0x3f62ecc5a5db6b65L, + 0x0a78b17338a21495L }, + { 0x6be1819dbcc8ad94L,0x70dc04f6d89c3400L,0x462557b4a6b4840aL, + 0x544c6ade60bd21c0L } }, + /* 25 << 70 */ + { { 0x6a00f24e907a544bL,0xa7520dcb313da210L,0xfe939b7511e4994bL, + 0x918b6ba6bc275d70L }, + { 0xd3e5e0fc644be892L,0x707a9816fdaf6c42L,0x60145567f15c13feL, + 0x4818ebaae130a54aL } }, + /* 26 << 70 */ + { { 0x28aad3ad58d2f767L,0xdc5267fdd7e7c773L,0x4919cc88c3afcc98L, + 0xaa2e6ab02db8cd4bL }, + { 0xd46fec04d0c63eaaL,0xa1cb92c519ffa832L,0x678dd178e43a631fL, + 0xfb5ae1cd3dc788b3L } }, + /* 27 << 70 */ + { { 0x68b4fb906e77de04L,0x7992bcf0f06dbb97L,0x896e6a13c417c01dL, + 0x8d96332cb956be01L }, + { 0x902fc93a413aa2b9L,0x99a4d915fc98c8a5L,0x52c29407565f1137L, + 0x4072690f21e4f281L } }, + /* 28 << 70 */ + { { 0x36e607cf02ff6072L,0xa47d2ca98ad98cdcL,0xbf471d1ef5f56609L, + 0xbcf86623f264ada0L }, + { 0xb70c0687aa9e5cb6L,0xc98124f217401c6cL,0x8189635fd4a61435L, + 0xd28fb8afa9d98ea6L } }, + /* 29 << 70 */ + { { 0xb9a67c2a40c251f8L,0x88cd5d87a2da44beL,0x437deb96e09b5423L, + 0x150467db64287dc1L }, + { 0xe161debbcdabb839L,0xa79e9742f1839a3eL,0xbb8dd3c2652d202bL, + 0x7b3e67f7e9f97d96L } }, + /* 30 << 70 */ + { { 0x5aa5d78fb1cb6ac9L,0xffa13e8eca1d0d45L,0x369295dd2ba5bf95L, + 0xd68bd1f839aff05eL }, + { 0xaf0d86f926d783f2L,0x543a59b3fc3aafc1L,0x3fcf81d27b7da97cL, + 0xc990a056d25dee46L } }, + /* 31 << 70 */ + { { 0x3e6775b8519cce2cL,0xfc9af71fae13d863L,0x774a4a6f47c1605cL, + 0x46ba42452fd205e8L }, + { 0xa06feea4d3fd524dL,0x1e7246416de1acc2L,0xf53816f1334e2b42L, + 0x49e5918e922f0024L } }, + /* 32 << 70 */ + { { 0x439530b665c7322dL,0xcf12cc01b3c1b3fbL,0xc70b01860172f685L, + 0xb915ee221b58391dL }, + { 0x9afdf03ba317db24L,0x87dec65917b8ffc4L,0x7f46597be4d3d050L, + 0x80a1c1ed006500e7L } }, + /* 33 << 70 */ + { { 0x84902a9678bf030eL,0xfb5e9c9a50560148L,0x6dae0a9263362426L, + 0xdcaeecf4a9e30c40L }, + { 0xc0d887bb518d0c6bL,0x99181152cb985b9dL,0xad186898ef7bc381L, + 0x18168ffb9ee46201L } }, + /* 34 << 70 */ + { { 0x9a04cdaa2502753cL,0xbb279e2651407c41L,0xeacb03aaf23564e5L, + 0x1833658271e61016L }, + { 0x8684b8c4eb809877L,0xb336e18dea0e672eL,0xefb601f034ee5867L, + 0x2733edbe1341cfd1L } }, + /* 35 << 70 */ + { { 0xb15e809a26025c3cL,0xe6e981a69350df88L,0x923762378502fd8eL, + 0x4791f2160c12be9bL }, + { 0xb725678925f02425L,0xec8631947a974443L,0x7c0ce882fb41cc52L, + 0xc266ff7ef25c07f2L } }, + /* 36 << 70 */ + { { 0x3d4da8c3017025f3L,0xefcf628cfb9579b4L,0x5c4d00161f3716ecL, + 0x9c27ebc46801116eL }, + { 0x5eba0ea11da1767eL,0xfe15145247004c57L,0x3ace6df68c2373b7L, + 0x75c3dffe5dbc37acL } }, + /* 37 << 70 */ + { { 0x3dc32a73ddc925fcL,0xb679c8412f65ee0bL,0x715a3295451cbfebL, + 0xd9889768f76e9a29L }, + { 0xec20ce7fb28ad247L,0xe99146c400894d79L,0x71457d7c9f5e3ea7L, + 0x097b266238030031L } }, + /* 38 << 70 */ + { { 0xdb7f6ae6cf9f82a8L,0x319decb9438f473aL,0xa63ab386283856c3L, + 0x13e3172fb06a361bL }, + { 0x2959f8dc7d5a006cL,0x2dbc27c675fba752L,0xc1227ab287c22c9eL, + 0x06f61f7571a268b2L } }, + /* 39 << 70 */ + { { 0x1b6bb97104779ce2L,0xaca838120aadcb1dL,0x297ae0bcaeaab2d5L, + 0xa5c14ee75bfb9f13L }, + { 0xaa00c583f17a62c7L,0x39eb962c173759f6L,0x1eeba1d486c9a88fL, + 0x0ab6c37adf016c5eL } }, + /* 40 << 70 */ + { { 0xa2a147dba28a0749L,0x246c20d6ee519165L,0x5068d1b1d3810715L, + 0xb1e7018c748160b9L }, + { 0x03f5b1faf380ff62L,0xef7fb1ddf3cb2c1eL,0xeab539a8fc91a7daL, + 0x83ddb707f3f9b561L } }, + /* 41 << 70 */ + { { 0xc550e211fe7df7a4L,0xa7cd07f2063f6f40L,0xb0de36352976879cL, + 0xb5f83f85e55741daL }, + { 0x4ea9d25ef3d8ac3dL,0x6fe2066f62819f02L,0x4ab2b9c2cef4a564L, + 0x1e155d965ffa2de3L } }, + /* 42 << 70 */ + { { 0x0eb0a19bc3a72d00L,0x4037665b8513c31bL,0x2fb2b6bf04c64637L, + 0x45c34d6e08cdc639L }, + { 0x56f1e10ff01fd796L,0x4dfb8101fe3667b8L,0xe0eda2539021d0c0L, + 0x7a94e9ff8a06c6abL } }, + /* 43 << 70 */ + { { 0x2d3bb0d9bb9aa882L,0xea20e4e5ec05fd10L,0xed7eeb5f1a1ca64eL, + 0x2fa6b43cc6327cbdL }, + { 0xb577e3cf3aa91121L,0x8c6bd5ea3a34079bL,0xd7e5ba3960e02fc0L, + 0xf16dd2c390141bf8L } }, + /* 44 << 70 */ + { { 0xb57276d980101b98L,0x760883fdb82f0f66L,0x89d7de754bc3eff3L, + 0x03b606435dc2ab40L }, + { 0xcd6e53dfe05beeacL,0xf2f1e862bc3325cdL,0xdd0f7921774f03c3L, + 0x97ca72214552cc1bL } }, + /* 45 << 70 */ + { { 0x5a0d6afe1cd19f72L,0xa20915dcf183fbebL,0x9fda4b40832c403cL, + 0x32738eddbe425442L }, + { 0x469a1df6b5eccf1aL,0x4b5aff4228bbe1f0L,0x31359d7f570dfc93L, + 0xa18be235f0088628L } }, + /* 46 << 70 */ + { { 0xa5b30fbab00ed3a9L,0x34c6137473cdf8beL,0x2c5c5f46abc56797L, + 0x5cecf93db82a8ae2L }, + { 0x7d3dbe41a968fbf0L,0xd23d45831a5c7f3dL,0xf28f69a0c087a9c7L, + 0xc2d75471474471caL } }, + /* 47 << 70 */ + { { 0x36ec9f4a4eb732ecL,0x6c943bbdb1ca6bedL,0xd64535e1f2457892L, + 0x8b84a8eaf7e2ac06L }, + { 0xe0936cd32499dd5fL,0x12053d7e0ed04e57L,0x4bdd0076e4305d9dL, + 0x34a527b91f67f0a2L } }, + /* 48 << 70 */ + { { 0xe79a4af09cec46eaL,0xb15347a1658b9bc7L,0x6bd2796f35af2f75L, + 0xac9579904051c435L }, + { 0x2669dda3c33a655dL,0x5d503c2e88514aa3L,0xdfa113373753dd41L, + 0x3f0546730b754f78L } }, + /* 49 << 70 */ + { { 0xbf185677496125bdL,0xfb0023c83775006cL,0xfa0f072f3a037899L, + 0x4222b6eb0e4aea57L }, + { 0x3dde5e767866d25aL,0xb6eb04f84837aa6fL,0x5315591a2cf1cdb8L, + 0x6dfb4f412d4e683cL } }, + /* 50 << 70 */ + { { 0x7e923ea448ee1f3aL,0x9604d9f705a2afd5L,0xbe1d4a3340ea4948L, + 0x5b45f1f4b44cbd2fL }, + { 0x5faf83764acc757eL,0xa7cf9ab863d68ff7L,0x8ad62f69df0e404bL, + 0xd65f33c212bdafdfL } }, + /* 51 << 70 */ + { { 0xc365de15a377b14eL,0x6bf5463b8e39f60cL,0x62030d2d2ce68148L, + 0xd95867efe6f843a8L }, + { 0xd39a0244ef5ab017L,0x0bd2d8c14ab55d12L,0xc9503db341639169L, + 0x2d4e25b0f7660c8aL } }, + /* 52 << 70 */ + { { 0x760cb3b5e224c5d7L,0xfa3baf8c68616919L,0x9fbca1138d142552L, + 0x1ab18bf17669ebf5L }, + { 0x55e6f53e9bdf25ddL,0x04cc0bf3cb6cd154L,0x595bef4995e89080L, + 0xfe9459a8104a9ac1L } }, + /* 53 << 70 */ + { { 0xad2d89cacce9bb32L,0xddea65e1f7de8285L,0x62ed8c35b351bd4bL, + 0x4150ff360c0e19a7L }, + { 0x86e3c801345f4e47L,0x3bf21f71203a266cL,0x7ae110d4855b1f13L, + 0x5d6aaf6a07262517L } }, + /* 54 << 70 */ + { { 0x1e0f12e1813d28f1L,0x6000e11d7ad7a523L,0xc7d8deefc744a17bL, + 0x1e990b4814c05a00L }, + { 0x68fddaee93e976d5L,0x696241d146610d63L,0xb204e7c3893dda88L, + 0x8bccfa656a3a6946L } }, + /* 55 << 70 */ + { { 0xb59425b4c5cd1411L,0x701b4042ff3658b1L,0xe3e56bca4784cf93L, + 0x27de5f158fe68d60L }, + { 0x4ab9cfcef8d53f19L,0xddb10311a40a730dL,0x6fa73cd14eee0a8aL, + 0xfd5487485249719dL } }, + /* 56 << 70 */ + { { 0x49d66316a8123ef0L,0x73c32db4e7f95438L,0x2e2ed2090d9e7854L, + 0xf98a93299d9f0507L }, + { 0xc5d33cf60c6aa20aL,0x9a32ba1475279bb2L,0x7e3202cb774a7307L, + 0x64ed4bc4e8c42dbdL } }, + /* 57 << 70 */ + { { 0xc20f1a06d4caed0dL,0xb8021407171d22b3L,0xd426ca04d13268d7L, + 0x9237700725f4d126L }, + { 0x4204cbc371f21a85L,0x18461b7af82369baL,0xc0c07d313fc858f9L, + 0x5deb5a50e2bab569L } }, + /* 58 << 70 */ + { { 0xd5959d46d5eea89eL,0xfdff842408437f4bL,0xf21071e43cfe254fL, + 0x7241769695468321L }, + { 0x5d8288b9102cae3eL,0x2d143e3df1965dffL,0x00c9a376a078d847L, + 0x6fc0da3126028731L } }, + /* 59 << 70 */ + { { 0xa2baeadfe45083a2L,0x66bc72185e5b4bcdL,0x2c826442d04b8e7fL, + 0xc19f54516c4b586bL }, + { 0x60182c495b7eeed5L,0xd9954ecd7aa9dfa1L,0xa403a8ecc73884adL, + 0x7fb17de29bb39041L } }, + /* 60 << 70 */ + { { 0x694b64c5abb020e8L,0x3d18c18419c4eec7L,0x9c4673ef1c4793e5L, + 0xc7b8aeb5056092e6L }, + { 0x3aa1ca43f0f8c16bL,0x224ed5ecd679b2f6L,0x0d56eeaf55a205c9L, + 0xbfe115ba4b8e028bL } }, + /* 61 << 70 */ + { { 0x97e608493927f4feL,0xf91fbf94759aa7c5L,0x985af7696be90a51L, + 0xc1277b7878ccb823L }, + { 0x395b656ee7a75952L,0x00df7de0928da5f5L,0x09c231754ca4454fL, + 0x4ec971f47aa2d3c1L } }, + /* 62 << 70 */ + { { 0x45c3c507e75d9cccL,0x63b7be8a3dc90306L,0x37e09c665db44bdcL, + 0x50d60da16841c6a2L }, + { 0x6f9b65ee08df1b12L,0x387348797ff089dfL,0x9c331a663fe8013dL, + 0x017f5de95f42fcc8L } }, + /* 63 << 70 */ + { { 0x43077866e8e57567L,0xc9f781cef9fcdb18L,0x38131dda9b12e174L, + 0x25d84aa38a03752aL }, + { 0x45e09e094d0c0ce2L,0x1564008b92bebba5L,0xf7e8ad31a87284c7L, + 0xb7c4b46c97e7bbaaL } }, + /* 64 << 70 */ + { { 0x3e22a7b397acf4ecL,0x0426c4005ea8b640L,0x5e3295a64e969285L, + 0x22aabc59a6a45670L }, + { 0xb929714c5f5942bcL,0x9a6168bdfa3182edL,0x2216a665104152baL, + 0x46908d03b6926368L } }, + /* 0 << 77 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 77 */ + { { 0xa9f5d8745a1251fbL,0x967747a8c72725c7L,0x195c33e531ffe89eL, + 0x609d210fe964935eL }, + { 0xcafd6ca82fe12227L,0xaf9b5b960426469dL,0x2e9ee04c5693183cL, + 0x1084a333c8146fefL } }, + /* 2 << 77 */ + { { 0x96649933aed1d1f7L,0x566eaff350563090L,0x345057f0ad2e39cfL, + 0x148ff65b1f832124L }, + { 0x042e89d4cf94cf0dL,0x319bec84520c58b3L,0x2a2676265361aa0dL, + 0xc86fa3028fbc87adL } }, + /* 3 << 77 */ + { { 0xfc83d2ab5c8b06d5L,0xb1a785a2fe4eac46L,0xb99315bc846f7779L, + 0xcf31d816ef9ea505L }, + { 0x2391fe6a15d7dc85L,0x2f132b04b4016b33L,0x29547fe3181cb4c7L, + 0xdb66d8a6650155a1L } }, + /* 4 << 77 */ + { { 0x6b66d7e1adc1696fL,0x98ebe5930acd72d0L,0x65f24550cc1b7435L, + 0xce231393b4b9a5ecL }, + { 0x234a22d4db067df9L,0x98dda095caff9b00L,0x1bbc75a06100c9c1L, + 0x1560a9c8939cf695L } }, + /* 5 << 77 */ + { { 0xcf006d3e99e0925fL,0x2dd74a966322375aL,0xc58b446ab56af5baL, + 0x50292683e0b9b4f1L }, + { 0xe2c34cb41aeaffa3L,0x8b17203f9b9587c1L,0x6d559207ead1350cL, + 0x2b66a215fb7f9604L } }, + /* 6 << 77 */ + { { 0x0850325efe51bf74L,0x9c4f579e5e460094L,0x5c87b92a76da2f25L, + 0x889de4e06febef33L }, + { 0x6900ec06646083ceL,0xbe2a0335bfe12773L,0xadd1da35c5344110L, + 0x757568b7b802cd20L } }, + /* 7 << 77 */ + { { 0x7555977900f7e6c8L,0x38e8b94f0facd2f0L,0xfea1f3af03fde375L, + 0x5e11a1d875881dfcL }, + { 0xb3a6b02ec1e2f2efL,0x193d2bbbc605a6c5L,0x325ffeee339a0b2dL, + 0x27b6a7249e0c8846L } }, + /* 8 << 77 */ + { { 0xe4050f1cf1c367caL,0x9bc85a9bc90fbc7dL,0xa373c4a2e1a11032L, + 0xb64232b7ad0393a9L }, + { 0xf5577eb0167dad29L,0x1604f30194b78ab2L,0x0baa94afe829348bL, + 0x77fbd8dd41654342L } }, + /* 9 << 77 */ + { { 0xdab50ea5b964e39aL,0xd4c29e3cd0d3c76eL,0x80dae67c56d11964L, + 0x7307a8bfe5ffcc2fL }, + { 0x65bbc1aa91708c3bL,0xa151e62c28bf0eebL,0x6cb533816fa34db7L, + 0x5139e05ca29403a8L } }, + /* 10 << 77 */ + { { 0x6ff651b494a7cd2eL,0x5671ffd10699336cL,0x6f5fd2cc979a896aL, + 0x11e893a8d8148cefL }, + { 0x988906a165cf7b10L,0x81b67178c50d8485L,0x7c0deb358a35b3deL, + 0x423ac855c1d29799L } }, + /* 11 << 77 */ + { { 0xaf580d87dac50b74L,0x28b2b89f5869734cL,0x99a3b936874e28fbL, + 0xbb2c919025f3f73aL }, + { 0x199f691884a9d5b7L,0x7ebe23257e770374L,0xf442e1070738efe2L, + 0xcf9f3f56cf9082d2L } }, + /* 12 << 77 */ + { { 0x719f69e109618708L,0xcc9e8364c183f9b1L,0xec203a95366a21afL, + 0x6aec5d6d068b141fL }, + { 0xee2df78a994f04e9L,0xb39ccae8271245b0L,0xb875a4a997e43f4fL, + 0x507dfe11db2cea98L } }, + /* 13 << 77 */ + { { 0x4fbf81cb489b03e9L,0xdb86ec5b6ec414faL,0xfad444f9f51b3ae5L, + 0xca7d33d61914e3feL }, + { 0xa9c32f5c0ae6c4d0L,0xa9ca1d1e73969568L,0x98043c311aa7467eL, + 0xe832e75ce21b5ac6L } }, + /* 14 << 77 */ + { { 0x314b7aea5232123dL,0x08307c8c65ae86dbL,0x06e7165caa4668edL, + 0xb170458bb4d3ec39L }, + { 0x4d2e3ec6c19bb986L,0xc5f34846ae0304edL,0x917695a06c9f9722L, + 0x6c7f73174cab1c0aL } }, + /* 15 << 77 */ + { { 0x6295940e9d6d2e8bL,0xd318b8c1549f7c97L,0x2245320497713885L, + 0x468d834ba8a440feL }, + { 0xd81fe5b2bfba796eL,0x152364db6d71f116L,0xbb8c7c59b5b66e53L, + 0x0b12c61b2641a192L } }, + /* 16 << 77 */ + { { 0x31f14802fcf0a7fdL,0x42fd07895488b01eL,0x71d78d6d9952b498L, + 0x8eb572d907ac5201L }, + { 0xe0a2a44c4d194a88L,0xd2b63fd9ba017e66L,0x78efc6c8f888aefcL, + 0xb76f6bda4a881a11L } }, + /* 17 << 77 */ + { { 0x187f314bb46c2397L,0x004cf5665ded2819L,0xa9ea570438764d34L, + 0xbba4521778084709L }, + { 0x064745711171121eL,0xad7b7eb1e7c9b671L,0xdacfbc40730f7507L, + 0x178cd8c6c7ad7bd1L } }, + /* 18 << 77 */ + { { 0xbf0be101b2a67238L,0x3556d367af9c14f2L,0x104b7831a5662075L, + 0x58ca59bb79d9e60aL }, + { 0x4bc45392a569a73bL,0x517a52e85698f6c9L,0x85643da5aeadd755L, + 0x1aed0cd52a581b84L } }, + /* 19 << 77 */ + { { 0xb9b4ff8480af1372L,0x244c3113f1ba5d1fL,0x2a5dacbef5f98d31L, + 0x2c3323e84375bc2aL }, + { 0x17a3ab4a5594b1ddL,0xa1928bfbceb4797eL,0xe83af245e4886a19L, + 0x8979d54672b5a74aL } }, + /* 20 << 77 */ + { { 0xa0f726bc19f9e967L,0xd9d03152e8fbbf4eL,0xcfd6f51db7707d40L, + 0x633084d963f6e6e0L }, + { 0xedcd9cdc55667eafL,0x73b7f92b2e44d56fL,0xfb2e39b64e962b14L, + 0x7d408f6ef671fcbfL } }, + /* 21 << 77 */ + { { 0xcc634ddc164a89bbL,0x74a42bb23ef3bd05L,0x1280dbb2428decbbL, + 0x6103f6bb402c8596L }, + { 0xfa2bf581355a5752L,0x562f96a800946674L,0x4e4ca16d6da0223bL, + 0xfe47819f28d3aa25L } }, + /* 22 << 77 */ + { { 0x9eea3075f8dfcf8aL,0xa284f0aa95669825L,0xb3fca250867d3fd8L, + 0x20757b5f269d691eL }, + { 0xf2c2402093b8a5deL,0xd3f93359ebc06da6L,0x1178293eb2739c33L, + 0xd2a3e770bcd686e5L } }, + /* 23 << 77 */ + { { 0xa76f49f4cd941534L,0x0d37406be3c71c0eL,0x172d93973b97f7e3L, + 0xec17e239bd7fd0deL }, + { 0xe32905516f496ba2L,0x6a69317236ad50e7L,0xc4e539a283e7eff5L, + 0x752737e718e1b4cfL } }, + /* 24 << 77 */ + { { 0xa2f7932c68af43eeL,0x5502468e703d00bdL,0xe5dc978f2fb061f5L, + 0xc9a1904a28c815adL }, + { 0xd3af538d470c56a4L,0x159abc5f193d8cedL,0x2a37245f20108ef3L, + 0xfa17081e223f7178L } }, + /* 25 << 77 */ + { { 0x27b0fb2b10c8c0f5L,0x2102c3ea40650547L,0x594564df8ac3bfa7L, + 0x98102033509dad96L }, + { 0x6989643ff1d18a13L,0x35eebd91d7fc5af0L,0x078d096afaeaafd8L, + 0xb7a89341def3de98L } }, + /* 26 << 77 */ + { { 0x2a206e8decf2a73aL,0x066a63978e551994L,0x3a6a088ab98d53a2L, + 0x0ce7c67c2d1124aaL }, + { 0x48cec671759a113cL,0xe3b373d34f6f67faL,0x5455d479fd36727bL, + 0xe5a428eea13c0d81L } }, + /* 27 << 77 */ + { { 0xb853dbc81c86682bL,0xb78d2727b8d02b2aL,0xaaf69bed8ebc329aL, + 0xdb6b40b3293b2148L }, + { 0xe42ea77db8c4961fL,0xb1a12f7c20e5e0abL,0xa0ec527479e8b05eL, + 0x68027391fab60a80L } }, + /* 28 << 77 */ + { { 0x6bfeea5f16b1bd5eL,0xf957e4204de30ad3L,0xcbaf664e6a353b9eL, + 0x5c87331226d14febL }, + { 0x4e87f98cb65f57cbL,0xdb60a6215e0cdd41L,0x67c16865a6881440L, + 0x1093ef1a46ab52aaL } }, + /* 29 << 77 */ + { { 0xc095afb53f4ece64L,0x6a6bb02e7604551aL,0x55d44b4e0b26b8cdL, + 0xe5f9a999f971268aL }, + { 0xc08ec42511a7de84L,0x83568095fda469ddL,0x737bfba16c6c90a2L, + 0x1cb9c4a0be229831L } }, + /* 30 << 77 */ + { { 0x93bccbbabb2eec64L,0xa0c23b64da03adbeL,0x5f7aa00ae0e86ac4L, + 0x470b941efc1401e6L }, + { 0x5ad8d6799df43574L,0x4ccfb8a90f65d810L,0x1bce80e3aa7fbd81L, + 0x273291ad9508d20aL } }, + /* 31 << 77 */ + { { 0xf5c4b46b42a92806L,0x810684eca86ab44aL,0x4591640bca0bc9f8L, + 0xb5efcdfc5c4b6054L }, + { 0x16fc89076e9edd12L,0xe29d0b50d4d792f9L,0xa45fd01c9b03116dL, + 0x85035235c81765a4L } }, + /* 32 << 77 */ + { { 0x1fe2a9b2b4b4b67cL,0xc1d10df0e8020604L,0x9d64abfcbc8058d8L, + 0x8943b9b2712a0fbbL }, + { 0x90eed9143b3def04L,0x85ab3aa24ce775ffL,0x605fd4ca7bbc9040L, + 0x8b34a564e2c75dfbL } }, + /* 33 << 77 */ + { { 0x41ffc94a10358560L,0x2d8a50729e5c28aaL,0xe915a0fc4cc7eb15L, + 0xe9efab058f6d0f5dL }, + { 0xdbab47a9d19e9b91L,0x8cfed7450276154cL,0x154357ae2cfede0dL, + 0x520630df19f5a4efL } }, + /* 34 << 77 */ + { { 0x25759f7ce382360fL,0xb6db05c988bf5857L,0x2917d61d6c58d46cL, + 0x14f8e491fd20cb7aL }, + { 0xb68a727a11c20340L,0x0386f86faf7ccbb6L,0x5c8bc6ccfee09a20L, + 0x7d76ff4abb7eea35L } }, + /* 35 << 77 */ + { { 0xa7bdebe7db15be7aL,0x67a08054d89f0302L,0x56bf0ea9c1193364L, + 0xc824446762837ebeL }, + { 0x32bd8e8b20d841b8L,0x127a0548dbb8a54fL,0x83dd4ca663b20236L, + 0x87714718203491faL } }, + /* 36 << 77 */ + { { 0x4dabcaaaaa8a5288L,0x91cc0c8aaf23a1c9L,0x34c72c6a3f220e0cL, + 0xbcc20bdf1232144aL }, + { 0x6e2f42daa20ede1bL,0xc441f00c74a00515L,0xbf46a5b6734b8c4bL, + 0x574095037b56c9a4L } }, + /* 37 << 77 */ + { { 0x9f735261e4585d45L,0x9231faed6734e642L,0x1158a176be70ee6cL, + 0x35f1068d7c3501bfL }, + { 0x6beef900a2d26115L,0x649406f2ef0afee3L,0x3f43a60abc2420a1L, + 0x509002a7d5aee4acL } }, + /* 38 << 77 */ + { { 0xb46836a53ff3571bL,0x24f98b78837927c1L,0x6254256a4533c716L, + 0xf27abb0bd07ee196L }, + { 0xd7cf64fc5c6d5bfdL,0x6915c751f0cd7a77L,0xd9f590128798f534L, + 0x772b0da8f81d8b5fL } }, + /* 39 << 77 */ + { { 0x1244260c2e03fa69L,0x36cf0e3a3be1a374L,0x6e7c1633ef06b960L, + 0xa71a4c55671f90f6L }, + { 0x7a94125133c673dbL,0xc0bea51073e8c131L,0x61a8a699d4f6c734L, + 0x25e78c88341ed001L } }, + /* 40 << 77 */ + { { 0x5c18acf88e2f7d90L,0xfdbf33d777be32cdL,0x0a085cd7d2eb5ee9L, + 0x2d702cfbb3201115L }, + { 0xb6e0ebdb85c88ce8L,0x23a3ce3c1e01d617L,0x3041618e567333acL, + 0x9dd0fd8f157edb6bL } }, + /* 41 << 77 */ + { { 0x27f74702b57872b8L,0x2ef26b4f657d5fe1L,0x95426f0a57cf3d40L, + 0x847e2ad165a6067aL }, + { 0xd474d9a009996a74L,0x16a56acd2a26115cL,0x02a615c3d16f4d43L, + 0xcc3fc965aadb85b7L } }, + /* 42 << 77 */ + { { 0x386bda73ce07d1b0L,0xd82910c258ad4178L,0x124f82cfcd2617f4L, + 0xcc2f5e8def691770L }, + { 0x82702550b8c30cccL,0x7b856aea1a8e575aL,0xbb822fefb1ab9459L, + 0x085928bcec24e38eL } }, + /* 43 << 77 */ + { { 0x5d0402ecba8f4b4dL,0xc07cd4ba00b4d58bL,0x5d8dffd529227e7aL, + 0x61d44d0c31bf386fL }, + { 0xe486dc2b135e6f4dL,0x680962ebe79410efL,0xa61bd343f10088b5L, + 0x6aa76076e2e28686L } }, + /* 44 << 77 */ + { { 0x80463d118fb98871L,0xcb26f5c3bbc76affL,0xd4ab8eddfbe03614L, + 0xc8eb579bc0cf2deeL }, + { 0xcc004c15c93bae41L,0x46fbae5d3aeca3b2L,0x671235cf0f1e9ab1L, + 0xadfba9349ec285c1L } }, + /* 45 << 77 */ + { { 0x88ded013f216c980L,0xc8ac4fb8f79e0bc1L,0xa29b89c6fb97a237L, + 0xb697b7809922d8e7L }, + { 0x3142c639ddb945b5L,0x447b06c7e094c3a9L,0xcdcb364272266c90L, + 0x633aad08a9385046L } }, + /* 46 << 77 */ + { { 0xa36c936bb57c6477L,0x871f8b64e94dbcc6L,0x28d0fb62a591a67bL, + 0x9d40e081c1d926f5L }, + { 0x3111eaf6f2d84b5aL,0x228993f9a565b644L,0x0ccbf5922c83188bL, + 0xf87b30ab3df3e197L } }, + /* 47 << 77 */ + { { 0xb8658b317642bca8L,0x1a032d7f52800f17L,0x051dcae579bf9445L, + 0xeba6b8ee54a2e253L }, + { 0x5c8b9cadd4485692L,0x84bda40e8986e9beL,0xd16d16a42f0db448L, + 0x8ec80050a14d4188L } }, + /* 48 << 77 */ + { { 0xb2b2610798fa7aaaL,0x41209ee4f073aa4eL,0xf1570359f2d6b19bL, + 0xcbe6868cfc577cafL }, + { 0x186c4bdc32c04dd3L,0xa6c35faecfeee397L,0xb4a1b312f086c0cfL, + 0xe0a5ccc6d9461fe2L } }, + /* 49 << 77 */ + { { 0xc32278aa1536189fL,0x1126c55fba6df571L,0x0f71a602b194560eL, + 0x8b2d7405324bd6e1L }, + { 0x8481939e3738be71L,0xb5090b1a1a4d97a9L,0x116c65a3f05ba915L, + 0x21863ad3aae448aaL } }, + /* 50 << 77 */ + { { 0xd24e2679a7aae5d3L,0x7076013d0de5c1c4L,0x2d50f8babb05b629L, + 0x73c1abe26e66efbbL }, + { 0xefd4b422f2488af7L,0xe4105d02663ba575L,0x7eb60a8b53a69457L, + 0x62210008c945973bL } }, + /* 51 << 77 */ + { { 0xfb25547877a50ec6L,0xbf0392f70a37a72cL,0xa0a7a19c4be18e7aL, + 0x90d8ea1625b1e0afL }, + { 0x7582a293ef953f57L,0x90a64d05bdc5465aL,0xca79c497e2510717L, + 0x560dbb7c18cb641fL } }, + /* 52 << 77 */ + { { 0x1d8e32864b66abfbL,0xd26f52e559030900L,0x1ee3f6435584941aL, + 0x6d3b3730569f5958L }, + { 0x9ff2a62f4789dba5L,0x91fcb81572b5c9b7L,0xf446cb7d6c8f9a0eL, + 0x48f625c139b7ecb5L } }, + /* 53 << 77 */ + { { 0xbabae8011c6219b8L,0xe7a562d928ac2f23L,0xe1b4873226e20588L, + 0x06ee1cad775af051L }, + { 0xda29ae43faff79f7L,0xc141a412652ee9e0L,0x1e127f6f195f4bd0L, + 0x29c6ab4f072f34f8L } }, + /* 54 << 77 */ + { { 0x7b7c147730448112L,0x82b51af1e4a38656L,0x2bf2028a2f315010L, + 0xc9a4a01f6ea88cd4L }, + { 0xf63e95d8257e5818L,0xdd8efa10b4519b16L,0xed8973e00da910bfL, + 0xed49d0775c0fe4a9L } }, + /* 55 << 77 */ + { { 0xac3aac5eb7caee1eL,0x1033898da7f4da57L,0x42145c0e5c6669b9L, + 0x42daa688c1aa2aa0L }, + { 0x629cc15c1a1d885aL,0x25572ec0f4b76817L,0x8312e4359c8f8f28L, + 0x8107f8cd81965490L } }, + /* 56 << 77 */ + { { 0x516ff3a36fa6110cL,0x74fb1eb1fb93561fL,0x6c0c90478457522bL, + 0xcfd321046bb8bdc6L }, + { 0x2d6884a2cc80ad57L,0x7c27fc3586a9b637L,0x3461baedadf4e8cdL, + 0x1d56251a617242f0L } }, + /* 57 << 77 */ + { { 0x0b80d209c955bef4L,0xdf02cad206adb047L,0xf0d7cb915ec74feeL, + 0xd25033751111ba44L }, + { 0x9671755edf53cb36L,0x54dcb6123368551bL,0x66d69aacc8a025a4L, + 0x6be946c6e77ef445L } }, + /* 58 << 77 */ + { { 0x719946d1a995e094L,0x65e848f6e51e04d8L,0xe62f33006a1e3113L, + 0x1541c7c1501de503L }, + { 0x4daac9faf4acfadeL,0x0e58589744cd0b71L,0x544fd8690a51cd77L, + 0x60fc20ed0031016dL } }, + /* 59 << 77 */ + { { 0x58b404eca4276867L,0x46f6c3cc34f34993L,0x477ca007c636e5bdL, + 0x8018f5e57c458b47L }, + { 0xa1202270e47b668fL,0xcef48ccdee14f203L,0x23f98bae62ff9b4dL, + 0x55acc035c589edddL } }, + /* 60 << 77 */ + { { 0x3fe712af64db4444L,0x19e9d634becdd480L,0xe08bc047a930978aL, + 0x2dbf24eca1280733L }, + { 0x3c0ae38c2cd706b2L,0x5b012a5b359017b9L,0x3943c38c72e0f5aeL, + 0x786167ea57176fa3L } }, + /* 61 << 77 */ + { { 0xe5f9897d594881dcL,0x6b5efad8cfb820c1L,0xb2179093d55018deL, + 0x39ad7d320bac56ceL }, + { 0xb55122e02cfc0e81L,0x117c4661f6d89daaL,0x362d01e1cb64fa09L, + 0x6a309b4e3e9c4dddL } }, + /* 62 << 77 */ + { { 0xfa979fb7abea49b1L,0xb4b1d27d10e2c6c5L,0xbd61c2c423afde7aL, + 0xeb6614f89786d358L }, + { 0x4a5d816b7f6f7459L,0xe431a44f09360e7bL,0x8c27a032c309914cL, + 0xcea5d68acaede3d8L } }, + /* 63 << 77 */ + { { 0x3668f6653a0a3f95L,0x893694167ceba27bL,0x89981fade4728fe9L, + 0x7102c8a08a093562L }, + { 0xbb80310e235d21c8L,0x505e55d1befb7f7bL,0xa0a9081112958a67L, + 0xd67e106a4d851fefL } }, + /* 64 << 77 */ + { { 0xb84011a9431dd80eL,0xeb7c7cca73306cd9L,0x20fadd29d1b3b730L, + 0x83858b5bfe37b3d3L }, + { 0xbf4cd193b6251d5cL,0x1cca1fd31352d952L,0xc66157a490fbc051L, + 0x7990a63889b98636L } }, + /* 0 << 84 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 84 */ + { { 0xe5aa692a87dec0e1L,0x010ded8df7b39d00L,0x7b1b80c854cfa0b5L, + 0x66beb876a0f8ea28L }, + { 0x50d7f5313476cd0eL,0xa63d0e65b08d3949L,0x1a09eea953479fc6L, + 0x82ae9891f499e742L } }, + /* 2 << 84 */ + { { 0xab58b9105ca7d866L,0x582967e23adb3b34L,0x89ae4447cceac0bcL, + 0x919c667c7bf56af5L }, + { 0x9aec17b160f5dcd7L,0xec697b9fddcaadbcL,0x0b98f341463467f5L, + 0xb187f1f7a967132fL } }, + /* 3 << 84 */ + { { 0x90fe7a1d214aeb18L,0x1506af3c741432f7L,0xbb5565f9e591a0c4L, + 0x10d41a77b44f1bc3L }, + { 0xa09d65e4a84bde96L,0x42f060d8f20a6a1cL,0x652a3bfdf27f9ce7L, + 0xb6bdb65c3b3d739fL } }, + /* 4 << 84 */ + { { 0xeb5ddcb6ec7fae9fL,0x995f2714efb66e5aL,0xdee95d8e69445d52L, + 0x1b6c2d4609e27620L }, + { 0x32621c318129d716L,0xb03909f10958c1aaL,0x8c468ef91af4af63L, + 0x162c429ffba5cdf6L } }, + /* 5 << 84 */ + { { 0x2f682343753b9371L,0x29cab45a5f1f9cd7L,0x571623abb245db96L, + 0xc507db093fd79999L }, + { 0x4e2ef652af036c32L,0x86f0cc7805018e5cL,0xc10a73d4ab8be350L, + 0x6519b3977e826327L } }, + /* 6 << 84 */ + { { 0xe8cb5eef9c053df7L,0x8de25b37b300ea6fL,0xdb03fa92c849cffbL, + 0x242e43a7e84169bbL }, + { 0xe4fa51f4dd6f958eL,0x6925a77ff4445a8dL,0xe6e72a50e90d8949L, + 0xc66648e32b1f6390L } }, + /* 7 << 84 */ + { { 0xb2ab1957173e460cL,0x1bbbce7530704590L,0xc0a90dbddb1c7162L, + 0x505e399e15cdd65dL }, + { 0x68434dcb57797ab7L,0x60ad35ba6a2ca8e8L,0x4bfdb1e0de3336c1L, + 0xbbef99ebd8b39015L } }, + /* 8 << 84 */ + { { 0x6c3b96f31711ebecL,0x2da40f1fce98fdc4L,0xb99774d357b4411fL, + 0x87c8bdf415b65bb6L }, + { 0xda3a89e3c2eef12dL,0xde95bb9b3c7471f3L,0x600f225bd812c594L, + 0x54907c5d2b75a56bL } }, + /* 9 << 84 */ + { { 0xa93cc5f08db60e35L,0x743e3cd6fa833319L,0x7dad5c41f81683c9L, + 0x70c1e7d99c34107eL }, + { 0x0edc4a39a6be0907L,0x36d4703586d0b7d3L,0x8c76da03272bfa60L, + 0x0b4a07ea0f08a414L } }, + /* 10 << 84 */ + { { 0x699e4d2945c1dd53L,0xcadc5898231debb5L,0xdf49fcc7a77f00e0L, + 0x93057bbfa73e5a0eL }, + { 0x2f8b7ecd027a4cd1L,0x114734b3c614011aL,0xe7a01db767677c68L, + 0x89d9be5e7e273f4fL } }, + /* 11 << 84 */ + { { 0xd225cb2e089808efL,0xf1f7a27dd59e4107L,0x53afc7618211b9c9L, + 0x0361bc67e6819159L }, + { 0x2a865d0b7f071426L,0x6a3c1810e7072567L,0x3e3bca1e0d6bcabdL, + 0xa1b02bc1408591bcL } }, + /* 12 << 84 */ + { { 0xe0deee5931fba239L,0xf47424d398bd91d1L,0x0f8886f4071a3c1dL, + 0x3f7d41e8a819233bL }, + { 0x708623c2cf6eb998L,0x86bb49af609a287fL,0x942bb24963c90762L, + 0x0ef6eea555a9654bL } }, + /* 13 << 84 */ + { { 0x5f6d2d7236f5defeL,0xfa9922dc56f99176L,0x6c8c5ecef78ce0c7L, + 0x7b44589dbe09b55eL }, + { 0xe11b3bca9ea83770L,0xd7fa2c7f2ab71547L,0x2a3dd6fa2a1ddcc0L, + 0x09acb4305a7b7707L } }, + /* 14 << 84 */ + { { 0x4add4a2e649d4e57L,0xcd53a2b01917526eL,0xc526233020b44ac4L, + 0x4028746abaa2c31dL }, + { 0x5131839064291d4cL,0xbf48f151ee5ad909L,0xcce57f597b185681L, + 0x7c3ac1b04854d442L } }, + /* 15 << 84 */ + { { 0x65587dc3c093c171L,0xae7acb2424f42b65L,0x5a338adb955996cbL, + 0xc8e656756051f91bL }, + { 0x66711fba28b8d0b1L,0x15d74137b6c10a90L,0x70cdd7eb3a232a80L, + 0xc9e2f07f6191ed24L } }, + /* 16 << 84 */ + { { 0xa80d1db6f79588c0L,0xfa52fc69b55768ccL,0x0b4df1ae7f54438aL, + 0x0cadd1a7f9b46a4fL }, + { 0xb40ea6b31803dd6fL,0x488e4fa555eaae35L,0x9f047d55382e4e16L, + 0xc9b5b7e02f6e0c98L } }, + /* 17 << 84 */ + { { 0x6b1bd2d395762649L,0xa9604ee7c7aea3f6L,0x3646ff276dc6f896L, + 0x9bf0e7f52860bad1L }, + { 0x2d92c8217cb44b92L,0xa2f5ce63aea9c182L,0xd0a2afb19154a5fdL, + 0x482e474c95801da6L } }, + /* 18 << 84 */ + { { 0xc19972d0b611c24bL,0x1d468e6560a8f351L,0xeb7580697bcf6421L, + 0xec9dd0ee88fbc491L }, + { 0x5b59d2bf956c2e32L,0x73dc6864dcddf94eL,0xfd5e2321bcee7665L, + 0xa7b4f8ef5e9a06c4L } }, + /* 19 << 84 */ + { { 0xfba918dd7280f855L,0xbbaac2608baec688L,0xa3b3f00f33400f42L, + 0x3d2dba2966f2e6e4L }, + { 0xb6f71a9498509375L,0x8f33031fcea423ccL,0x009b8dd04807e6fbL, + 0x5163cfe55cdb954cL } }, + /* 20 << 84 */ + { { 0x03cc8f17cf41c6e8L,0xf1f03c2a037b925cL,0xc39c19cc66d2427cL, + 0x823d24ba7b6c18e4L }, + { 0x32ef9013901f0b4fL,0x684360f1f8941c2eL,0x0ebaff522c28092eL, + 0x7891e4e3256c932fL } }, + /* 21 << 84 */ + { { 0x51264319ac445e3dL,0x553432e78ea74381L,0xe6eeaa6967e9c50aL, + 0x27ced28462e628c7L }, + { 0x3f96d3757a4afa57L,0xde0a14c3e484c150L,0x364a24eb38bd9923L, + 0x1df18da0e5177422L } }, + /* 22 << 84 */ + { { 0x174e8f82d8d38a9bL,0x2e97c600e7de1391L,0xc5709850a1c175ddL, + 0x969041a032ae5035L }, + { 0xcbfd533b76a2086bL,0xd6bba71bd7c2e8feL,0xb2d58ee6099dfb67L, + 0x3a8b342d064a85d9L } }, + /* 23 << 84 */ + { { 0x3bc07649522f9be3L,0x690c075bdf1f49a8L,0x80e1aee83854ec42L, + 0x2a7dbf4417689dc7L }, + { 0xc004fc0e3faf4078L,0xb2f02e9edf11862cL,0xf10a5e0fa0a1b7b3L, + 0x30aca6238936ec80L } }, + /* 24 << 84 */ + { { 0xf83cbf0502f40d9aL,0x4681c4682c318a4dL,0x985756180e9c2674L, + 0xbe79d0461847092eL }, + { 0xaf1e480a78bd01e0L,0x6dd359e472a51db9L,0x62ce3821e3afbab6L, + 0xc5cee5b617733199L } }, + /* 25 << 84 */ + { { 0xe08b30d46ffd9fbbL,0x6e5bc69936c610b7L,0xf343cff29ce262cfL, + 0xca2e4e3568b914c1L }, + { 0x011d64c016de36c5L,0xe0b10fdd42e2b829L,0x789429816685aaf8L, + 0xe7511708230ede97L } }, + /* 26 << 84 */ + { { 0x671ed8fc3b922bf8L,0xe4d8c0a04c29b133L,0x87eb12393b6e99c4L, + 0xaff3974c8793bebaL }, + { 0x037494052c18df9bL,0xc5c3a29391007139L,0x6a77234fe37a0b95L, + 0x02c29a21b661c96bL } }, + /* 27 << 84 */ + { { 0xc3aaf1d6141ecf61L,0x9195509e3bb22f53L,0x2959740422d51357L, + 0x1b083822537bed60L }, + { 0xcd7d6e35e07289f0L,0x1f94c48c6dd86effL,0xc8bb1f82eb0f9cfaL, + 0x9ee0b7e61b2eb97dL } }, + /* 28 << 84 */ + { { 0x5a52fe2e34d74e31L,0xa352c3103bf79ab6L,0x97ff6c5aabfeeb8fL, + 0xbfbe8feff5c97305L }, + { 0xd6081ce6a7904608L,0x1f812f3ac4fca249L,0x9b24bc9ab9e5e200L, + 0x91022c6738012ee8L } }, + /* 29 << 84 */ + { { 0xe83d9c5d30a713a1L,0x4876e3f084ef0f93L,0xc9777029c1fbf928L, + 0xef7a6bb3bce7d2a4L }, + { 0xb8067228dfa2a659L,0xd5cd3398d877a48fL,0xbea4fd8f025d0f3fL, + 0xd67d2e352eae7c2bL } }, + /* 30 << 84 */ + { { 0x184de7d7cc5f4394L,0xb5551b5c4536e142L,0x2e89b212d34aa60aL, + 0x14a96feaf50051d5L }, + { 0x4e21ef740d12bb0bL,0xc522f02060b9677eL,0x8b12e4672df7731dL, + 0x39f803827b326d31L } }, + /* 31 << 84 */ + { { 0xdfb8630c39024a94L,0xaacb96a897319452L,0xd68a3961eda3867cL, + 0x0c58e2b077c4ffcaL }, + { 0x3d545d634da919faL,0xef79b69af15e2289L,0x54bc3d3d808bab10L, + 0xc8ab300745f82c37L } }, + /* 32 << 84 */ + { { 0xc12738b67c4a658aL,0xb3c4763940e72182L,0x3b77be468798e44fL, + 0xdc047df217a7f85fL }, + { 0x2439d4c55e59d92dL,0xcedca475e8e64d8dL,0xa724cd0d87ca9b16L, + 0x35e4fd59a5540dfeL } }, + /* 33 << 84 */ + { { 0xf8c1ff18e4bcf6b1L,0x856d6285295018faL,0x433f665c3263c949L, + 0xa6a76dd6a1f21409L }, + { 0x17d32334cc7b4f79L,0xa1d0312206720e4aL,0xadb6661d81d9bed5L, + 0xf0d6fb0211db15d1L } }, + /* 34 << 84 */ + { { 0x7fd11ad51fb747d2L,0xab50f9593033762bL,0x2a7e711bfbefaf5aL, + 0xc73932783fef2bbfL }, + { 0xe29fa2440df6f9beL,0x9092757b71efd215L,0xee60e3114f3d6fd9L, + 0x338542d40acfb78bL } }, + /* 35 << 84 */ + { { 0x44a23f0838961a0fL,0x1426eade986987caL,0x36e6ee2e4a863cc6L, + 0x48059420628b8b79L }, + { 0x30303ad87396e1deL,0x5c8bdc4838c5aad1L,0x3e40e11f5c8f5066L, + 0xabd6e7688d246bbdL } }, + /* 36 << 84 */ + { { 0x68aa40bb23330a01L,0xd23f5ee4c34eafa0L,0x3bbee3155de02c21L, + 0x18dd4397d1d8dd06L }, + { 0x3ba1939a122d7b44L,0xe6d3b40aa33870d6L,0x8e620f701c4fe3f8L, + 0xf6bba1a5d3a50cbfL } }, + /* 37 << 84 */ + { { 0x4a78bde5cfc0aee0L,0x847edc46c08c50bdL,0xbaa2439cad63c9b2L, + 0xceb4a72810fc2acbL }, + { 0xa419e40e26da033dL,0x6cc3889d03e02683L,0x1cd28559fdccf725L, + 0x0fd7e0f18d13d208L } }, + /* 38 << 84 */ + { { 0x01b9733b1f0df9d4L,0x8cc2c5f3a2b5e4f3L,0x43053bfa3a304fd4L, + 0x8e87665c0a9f1aa7L }, + { 0x087f29ecd73dc965L,0x15ace4553e9023dbL,0x2370e3092bce28b4L, + 0xf9723442b6b1e84aL } }, + /* 39 << 84 */ + { { 0xbeee662eb72d9f26L,0xb19396def0e47109L,0x85b1fa73e13289d0L, + 0x436cf77e54e58e32L }, + { 0x0ec833b3e990ef77L,0x7373e3ed1b11fc25L,0xbe0eda870fc332ceL, + 0xced049708d7ea856L } }, + /* 40 << 84 */ + { { 0xf85ff7857e977ca0L,0xb66ee8dadfdd5d2bL,0xf5e37950905af461L, + 0x587b9090966d487cL }, + { 0x6a198a1b32ba0127L,0xa7720e07141615acL,0xa23f3499996ef2f2L, + 0xef5f64b4470bcb3dL } }, + /* 41 << 84 */ + { { 0xa526a96292b8c559L,0x0c14aac069740a0fL,0x0d41a9e3a6bdc0a5L, + 0x97d521069c48aef4L }, + { 0xcf16bd303e7c253bL,0xcc834b1a47fdedc1L,0x7362c6e5373aab2eL, + 0x264ed85ec5f590ffL } }, + /* 42 << 84 */ + { { 0x7a46d9c066d41870L,0xa50c20b14787ba09L,0x185e7e51e3d44635L, + 0xb3b3e08031e2d8dcL }, + { 0xbed1e558a179e9d9L,0x2daa3f7974a76781L,0x4372baf23a40864fL, + 0x46900c544fe75cb5L } }, + /* 43 << 84 */ + { { 0xb95f171ef76765d0L,0x4ad726d295c87502L,0x2ec769da4d7c99bdL, + 0x5e2ddd19c36cdfa8L }, + { 0xc22117fca93e6deaL,0xe8a2583b93771123L,0xbe2f6089fa08a3a2L, + 0x4809d5ed8f0e1112L } }, + /* 44 << 84 */ + { { 0x3b414aa3da7a095eL,0x9049acf126f5aaddL,0x78d46a4d6be8b84aL, + 0xd66b1963b732b9b3L }, + { 0x5c2ac2a0de6e9555L,0xcf52d098b5bd8770L,0x15a15fa60fd28921L, + 0x56ccb81e8b27536dL } }, + /* 45 << 84 */ + { { 0x0f0d8ab89f4ccbb8L,0xed5f44d2db221729L,0x4314198800bed10cL, + 0xc94348a41d735b8bL }, + { 0x79f3e9c429ef8479L,0x4c13a4e3614c693fL,0x32c9af568e143a14L, + 0xbc517799e29ac5c4L } }, + /* 46 << 84 */ + { { 0x05e179922774856fL,0x6e52fb056c1bf55fL,0xaeda4225e4f19e16L, + 0x70f4728aaf5ccb26L }, + { 0x5d2118d1b2947f22L,0xc827ea16281d6fb9L,0x8412328d8cf0eabdL, + 0x45ee9fb203ef9dcfL } }, + /* 47 << 84 */ + { { 0x8e700421bb937d63L,0xdf8ff2d5cc4b37a6L,0xa4c0d5b25ced7b68L, + 0x6537c1efc7308f59L }, + { 0x25ce6a263b37f8e8L,0x170e9a9bdeebc6ceL,0xdd0379528728d72cL, + 0x445b0e55850154bcL } }, + /* 48 << 84 */ + { { 0x4b7d0e0683a7337bL,0x1e3416d4ffecf249L,0x24840eff66a2b71fL, + 0xd0d9a50ab37cc26dL }, + { 0xe21981506fe28ef7L,0x3cc5ef1623324c7fL,0x220f3455769b5263L, + 0xe2ade2f1a10bf475L } }, + /* 49 << 84 */ + { { 0x28cd20fa458d3671L,0x1549722c2dc4847bL,0x6dd01e55591941e3L, + 0x0e6fbcea27128ccbL }, + { 0xae1a1e6b3bef0262L,0xfa8c472c8f54e103L,0x7539c0a872c052ecL, + 0xd7b273695a3490e9L } }, + /* 50 << 84 */ + { { 0x143fe1f171684349L,0x36b4722e32e19b97L,0xdc05922790980affL, + 0x175c9c889e13d674L }, + { 0xa7de5b226e6bfdb1L,0x5ea5b7b2bedb4b46L,0xd5570191d34a6e44L, + 0xfcf60d2ea24ff7e6L } }, + /* 51 << 84 */ + { { 0x614a392d677819e1L,0x7be74c7eaa5a29e8L,0xab50fece63c85f3fL, + 0xaca2e2a946cab337L }, + { 0x7f700388122a6fe3L,0xdb69f703882a04a8L,0x9a77935dcf7aed57L, + 0xdf16207c8d91c86fL } }, + /* 52 << 84 */ + { { 0x2fca49ab63ed9998L,0xa3125c44a77ddf96L,0x05dd8a8624344072L, + 0xa023dda2fec3fb56L }, + { 0x421b41fc0c743032L,0x4f2120c15e438639L,0xfb7cae51c83c1b07L, + 0xb2370caacac2171aL } }, + /* 53 << 84 */ + { { 0x2eb2d9626cc820fbL,0x59feee5cb85a44bfL,0x94620fca5b6598f0L, + 0x6b922cae7e314051L }, + { 0xff8745ad106bed4eL,0x546e71f5dfa1e9abL,0x935c1e481ec29487L, + 0x9509216c4d936530L } }, + /* 54 << 84 */ + { { 0xc7ca306785c9a2dbL,0xd6ae51526be8606fL,0x09dbcae6e14c651dL, + 0xc9536e239bc32f96L }, + { 0xa90535a934521b03L,0xf39c526c878756ffL,0x383172ec8aedf03cL, + 0x20a8075eefe0c034L } }, + /* 55 << 84 */ + { { 0xf22f9c6264026422L,0x8dd1078024b9d076L,0x944c742a3bef2950L, + 0x55b9502e88a2b00bL }, + { 0xa59e14b486a09817L,0xa39dd3ac47bb4071L,0x55137f663be0592fL, + 0x07fcafd4c9e63f5bL } }, + /* 56 << 84 */ + { { 0x963652ee346eb226L,0x7dfab085ec2facb7L,0x273bf2b8691add26L, + 0x30d74540f2b46c44L }, + { 0x05e8e73ef2c2d065L,0xff9b8a00d42eeac9L,0x2fcbd20597209d22L, + 0xeb740ffade14ea2cL } }, + /* 57 << 84 */ + { { 0xc71ff913a8aef518L,0x7bfc74bbfff4cfa2L,0x1716680cb6b36048L, + 0x121b2cce9ef79af1L }, + { 0xbff3c836a01eb3d3L,0x50eb1c6a5f79077bL,0xa48c32d6a004bbcfL, + 0x47a593167d64f61dL } }, + /* 58 << 84 */ + { { 0x6068147f93102016L,0x12c5f65494d12576L,0xefb071a7c9bc6b91L, + 0x7c2da0c56e23ea95L }, + { 0xf4fd45b6d4a1dd5dL,0x3e7ad9b69122b13cL,0x342ca118e6f57a48L, + 0x1c2e94a706f8288fL } }, + /* 59 << 84 */ + { { 0x99e68f075a97d231L,0x7c80de974d838758L,0xbce0f5d005872727L, + 0xbe5d95c219c4d016L }, + { 0x921d5cb19c2492eeL,0x42192dc1404d6fb3L,0x4c84dcd132f988d3L, + 0xde26d61fa17b8e85L } }, + /* 60 << 84 */ + { { 0xc466dcb6137c7408L,0x9a38d7b636a266daL,0x7ef5cb0683bebf1bL, + 0xe5cdcbbf0fd014e3L }, + { 0x30aa376df65965a0L,0x60fe88c2ebb3e95eL,0x33fd0b6166ee6f20L, + 0x8827dcdb3f41f0a0L } }, + /* 61 << 84 */ + { { 0xbf8a9d240c56c690L,0x40265dadddb7641dL,0x522b05bf3a6b662bL, + 0x466d1dfeb1478c9bL }, + { 0xaa6169621484469bL,0x0db6054902df8f9fL,0xc37bca023cb8bf51L, + 0x5effe34621371ce8L } }, + /* 62 << 84 */ + { { 0xe8f65264ff112c32L,0x8a9c736d7b971fb2L,0xa4f194707b75080dL, + 0xfc3f2c5a8839c59bL }, + { 0x1d6c777e5aeb49c2L,0xf3db034dda1addfeL,0xd76fee5a5535affcL, + 0x0853ac70b92251fdL } }, + /* 63 << 84 */ + { { 0x37e3d5948b2a29d5L,0x28f1f4574de00ddbL,0x8083c1b5f42c328bL, + 0xd8ef1d8fe493c73bL }, + { 0x96fb626041dc61bdL,0xf74e8a9d27ee2f8aL,0x7c605a802c946a5dL, + 0xeed48d653839ccfdL } }, + /* 64 << 84 */ + { { 0x9894344f3a29467aL,0xde81e949c51eba6dL,0xdaea066ba5e5c2f2L, + 0x3fc8a61408c8c7b3L }, + { 0x7adff88f06d0de9fL,0xbbc11cf53b75ce0aL,0x9fbb7accfbbc87d5L, + 0xa1458e267badfde2L } }, + /* 0 << 91 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 91 */ + { { 0x1cb43668e039c256L,0x5f26fb8b7c17fd5dL,0xeee426af79aa062bL, + 0x072002d0d78fbf04L }, + { 0x4c9ca237e84fb7e3L,0xb401d8a10c82133dL,0xaaa525926d7e4181L, + 0xe943083373dbb152L } }, + /* 2 << 91 */ + { { 0xf92dda31be24319aL,0x03f7d28be095a8e7L,0xa52fe84098782185L, + 0x276ddafe29c24dbcL }, + { 0x80cd54961d7a64ebL,0xe43608897f1dbe42L,0x2f81a8778438d2d5L, + 0x7e4d52a885169036L } }, + /* 3 << 91 */ + { { 0x19e3d5b11d59715dL,0xc7eaa762d788983eL,0xe5a730b0abf1f248L, + 0xfbab8084fae3fd83L }, + { 0x65e50d2153765b2fL,0xbdd4e083fa127f3dL,0x9cf3c074397b1b10L, + 0x59f8090cb1b59fd3L } }, + /* 4 << 91 */ + { { 0x7b15fd9d615faa8fL,0x8fa1eb40968554edL,0x7bb4447e7aa44882L, + 0x2bb2d0d1029fff32L }, + { 0x075e2a646caa6d2fL,0x8eb879de22e7351bL,0xbcd5624e9a506c62L, + 0x218eaef0a87e24dcL } }, + /* 5 << 91 */ + { { 0x37e5684744ddfa35L,0x9ccfc5c5dab3f747L,0x9ac1df3f1ee96cf4L, + 0x0c0571a13b480b8fL }, + { 0x2fbeb3d54b3a7b3cL,0x35c036695dcdbb99L,0x52a0f5dcb2415b3aL, + 0xd57759b44413ed9aL } }, + /* 6 << 91 */ + { { 0x1fe647d83d30a2c5L,0x0857f77ef78a81dcL,0x11d5a334131a4a9bL, + 0xc0a94af929d393f5L }, + { 0xbc3a5c0bdaa6ec1aL,0xba9fe49388d2d7edL,0xbb4335b4bb614797L, + 0x991c4d6872f83533L } }, + /* 7 << 91 */ + { { 0x53258c28d2f01cb3L,0x93d6eaa3d75db0b1L,0x419a2b0de87d0db4L, + 0xa1e48f03d8fe8493L }, + { 0xf747faf6c508b23aL,0xf137571a35d53549L,0x9f5e58e2fcf9b838L, + 0xc7186ceea7fd3cf5L } }, + /* 8 << 91 */ + { { 0x77b868cee978a1d3L,0xe3a68b337ab92d04L,0x5102979487a5b862L, + 0x5f0606c33a61d41dL }, + { 0x2814be276f9326f1L,0x2f521c14c6fe3c2eL,0x17464d7dacdf7351L, + 0x10f5f9d3777f7e44L } }, + /* 9 << 91 */ + { { 0xce8e616b269fb37dL,0xaaf738047de62de5L,0xaba111754fdd4153L, + 0x515759ba3770b49bL }, + { 0x8b09ebf8aa423a61L,0x592245a1cd41fb92L,0x1cba8ec19b4c8936L, + 0xa87e91e3af36710eL } }, + /* 10 << 91 */ + { { 0x1fd84ce43d34a2e3L,0xee3759ceb43b5d61L,0x895bc78c619186c7L, + 0xf19c3809cbb9725aL }, + { 0xc0be21aade744b1fL,0xa7d222b060f8056bL,0x74be6157b23efe11L, + 0x6fab2b4f0cd68253L } }, + /* 11 << 91 */ + { { 0xad33ea5f4bf1d725L,0x9c1d8ee24f6c950fL,0x544ee78aa377af06L, + 0x54f489bb94a113e1L }, + { 0x8f11d634992fb7e8L,0x0169a7aaa2a44347L,0x1d49d4af95020e00L, + 0x95945722e08e120bL } }, + /* 12 << 91 */ + { { 0xb6e33878a4d32282L,0xe36e029d48020ae7L,0xe05847fb37a9b750L, + 0xf876812cb29e3819L }, + { 0x84ad138ed23a17f0L,0x6d7b4480f0b3950eL,0xdfa8aef42fd67ae0L, + 0x8d3eea2452333af6L } }, + /* 13 << 91 */ + { { 0x0d052075b15d5accL,0xc6d9c79fbd815bc4L,0x8dcafd88dfa36cf2L, + 0x908ccbe238aa9070L }, + { 0x638722c4ba35afceL,0x5a3da8b0fd6abf0bL,0x2dce252cc9c335c1L, + 0x84e7f0de65aa799bL } }, + /* 14 << 91 */ + { { 0x2101a522b99a72cbL,0x06de6e6787618016L,0x5ff8c7cde6f3653eL, + 0x0a821ab5c7a6754aL }, + { 0x7e3fa52b7cb0b5a2L,0xa7fb121cc9048790L,0x1a72502006ce053aL, + 0xb490a31f04e929b0L } }, + /* 15 << 91 */ + { { 0xe17be47d62dd61adL,0x781a961c6be01371L,0x1063bfd3dae3cbbaL, + 0x356474067f73c9baL }, + { 0xf50e957b2736a129L,0xa6313702ed13f256L,0x9436ee653a19fcc5L, + 0xcf2bdb29e7a4c8b6L } }, + /* 16 << 91 */ + { { 0xb06b1244c5f95cd8L,0xda8c8af0f4ab95f4L,0x1bae59c2b9e5836dL, + 0x07d51e7e3acffffcL }, + { 0x01e15e6ac2ccbcdaL,0x3bc1923f8528c3e0L,0x43324577a49fead4L, + 0x61a1b8842aa7a711L } }, + /* 17 << 91 */ + { { 0xf9a86e08700230efL,0x0af585a1bd19adf8L,0x7645f361f55ad8f2L, + 0x6e67622346c3614cL }, + { 0x23cb257c4e774d3fL,0x82a38513ac102d1bL,0x9bcddd887b126aa5L, + 0xe716998beefd3ee4L } }, + /* 18 << 91 */ + { { 0x4239d571fb167583L,0xdd011c78d16c8f8aL,0x271c289569a27519L, + 0x9ce0a3b7d2d64b6aL }, + { 0x8c977289d5ec6738L,0xa3b49f9a8840ef6bL,0x808c14c99a453419L, + 0x5c00295b0cf0a2d5L } }, + /* 19 << 91 */ + { { 0x524414fb1d4bcc76L,0xb07691d2459a88f1L,0x77f43263f70d110fL, + 0x64ada5e0b7abf9f3L }, + { 0xafd0f94e5b544cf5L,0xb4a13a15fd2713feL,0xb99b7d6e250c74f4L, + 0x097f2f7320324e45L } }, + /* 20 << 91 */ + { { 0x994b37d8affa8208L,0xc3c31b0bdc29aafcL,0x3da746517a3a607fL, + 0xd8e1b8c1fe6955d6L }, + { 0x716e1815c8418682L,0x541d487f7dc91d97L,0x48a04669c6996982L, + 0xf39cab1583a6502eL } }, + /* 21 << 91 */ + { { 0x025801a0e68db055L,0xf3569758ba3338d5L,0xb0c8c0aaee2afa84L, + 0x4f6985d3fb6562d1L }, + { 0x351f1f15132ed17aL,0x510ed0b4c04365feL,0xa3f98138e5b1f066L, + 0xbc9d95d632df03dcL } }, + /* 22 << 91 */ + { { 0xa83ccf6e19abd09eL,0x0b4097c14ff17edbL,0x58a5c478d64a06ceL, + 0x2ddcc3fd544a58fdL }, + { 0xd449503d9e8153b8L,0x3324fd027774179bL,0xaf5d47c8dbd9120cL, + 0xeb86016234fa94dbL } }, + /* 23 << 91 */ + { { 0x5817bdd1972f07f4L,0xe5579e2ed27bbcebL,0x86847a1f5f11e5a6L, + 0xb39ed2557c3cf048L }, + { 0xe1076417a2f62e55L,0x6b9ab38f1bcf82a2L,0x4bb7c3197aeb29f9L, + 0xf6d17da317227a46L } }, + /* 24 << 91 */ + { { 0xab53ddbd0f968c00L,0xa03da7ec000c880bL,0x7b2396246a9ad24dL, + 0x612c040101ec60d0L }, + { 0x70d10493109f5df1L,0xfbda403080af7550L,0x30b93f95c6b9a9b3L, + 0x0c74ec71007d9418L } }, + /* 25 << 91 */ + { { 0x941755646edb951fL,0x5f4a9d787f22c282L,0xb7870895b38d1196L, + 0xbc593df3a228ce7cL }, + { 0xc78c5bd46af3641aL,0x7802200b3d9b3dccL,0x0dc73f328be33304L, + 0x847ed87d61ffb79aL } }, + /* 26 << 91 */ + { { 0xf85c974e6d671192L,0x1e14100ade16f60fL,0x45cb0d5a95c38797L, + 0x18923bba9b022da4L }, + { 0xef2be899bbe7e86eL,0x4a1510ee216067bfL,0xd98c815484d5ce3eL, + 0x1af777f0f92a2b90L } }, + /* 27 << 91 */ + { { 0x9fbcb4004ef65724L,0x3e04a4c93c0ca6feL,0xfb3e2cb555002994L, + 0x1f3a93c55363ecabL }, + { 0x1fe00efe3923555bL,0x744bedd91e1751eaL,0x3fb2db596ab69357L, + 0x8dbd7365f5e6618bL } }, + /* 28 << 91 */ + { { 0x99d53099df1ea40eL,0xb3f24a0b57d61e64L,0xd088a198596eb812L, + 0x22c8361b5762940bL }, + { 0x66f01f97f9c0d95cL,0x884611728e43cdaeL,0x11599a7fb72b15c3L, + 0x135a7536420d95ccL } }, + /* 29 << 91 */ + { { 0x2dcdf0f75f7ae2f6L,0x15fc6e1dd7fa6da2L,0x81ca829ad1d441b6L, + 0x84c10cf804a106b6L }, + { 0xa9b26c95a73fbbd0L,0x7f24e0cb4d8f6ee8L,0x48b459371e25a043L, + 0xf8a74fca036f3dfeL } }, + /* 30 << 91 */ + { { 0x1ed46585c9f84296L,0x7fbaa8fb3bc278b0L,0xa8e96cd46c4fcbd0L, + 0x940a120273b60a5fL }, + { 0x34aae12055a4aec8L,0x550e9a74dbd742f0L,0x794456d7228c68abL, + 0x492f8868a4e25ec6L } }, + /* 31 << 91 */ + { { 0x682915adb2d8f398L,0xf13b51cc5b84c953L,0xcda90ab85bb917d6L, + 0x4b6155604ea3dee1L }, + { 0x578b4e850a52c1c8L,0xeab1a69520b75fc4L,0x60c14f3caa0bb3c6L, + 0x220f448ab8216094L } }, + /* 32 << 91 */ + { { 0x4fe7ee31b0e63d34L,0xf4600572a9e54fabL,0xc0493334d5e7b5a4L, + 0x8589fb9206d54831L }, + { 0xaa70f5cc6583553aL,0x0879094ae25649e5L,0xcc90450710044652L, + 0xebb0696d02541c4fL } }, + /* 33 << 91 */ + { { 0x5a171fdeb9718710L,0x38f1bed8f374a9f5L,0xc8c582e1ba39bdc1L, + 0xfc457b0a908cc0ceL }, + { 0x9a187fd4883841e2L,0x8ec25b3938725381L,0x2553ed0596f84395L, + 0x095c76616f6c6897L } }, + /* 34 << 91 */ + { { 0x917ac85c4bdc5610L,0xb2885fe4179eb301L,0x5fc655478b78bdccL, + 0x4a9fc893e59e4699L }, + { 0xbb7ff0cd3ce299afL,0x195be9b3adf38b20L,0x6a929c87d38ddb8fL, + 0x55fcc99cb21a51b9L } }, + /* 35 << 91 */ + { { 0x2b695b4c721a4593L,0xed1e9a15768eaac2L,0xfb63d71c7489f914L, + 0xf98ba31c78118910L }, + { 0x802913739b128eb4L,0x7801214ed448af4aL,0xdbd2e22b55418dd3L, + 0xeffb3c0dd3998242L } }, + /* 36 << 91 */ + { { 0xdfa6077cc7bf3827L,0xf2165bcb47f8238fL,0xfe37cf688564d554L, + 0xe5f825c40a81fb98L }, + { 0x43cc4f67ffed4d6fL,0xbc609578b50a34b0L,0x8aa8fcf95041faf1L, + 0x5659f053651773b6L } }, + /* 37 << 91 */ + { { 0xe87582c36044d63bL,0xa60894090cdb0ca0L,0x8c993e0fbfb2bcf6L, + 0xfc64a71945985cfcL }, + { 0x15c4da8083dbedbaL,0x804ae1122be67df7L,0xda4c9658a23defdeL, + 0x12002ddd5156e0d3L } }, + /* 38 << 91 */ + { { 0xe68eae895dd21b96L,0x8b99f28bcf44624dL,0x0ae008081ec8897aL, + 0xdd0a93036712f76eL }, + { 0x962375224e233de4L,0x192445b12b36a8a5L,0xabf9ff74023993d9L, + 0x21f37bf42aad4a8fL } }, + /* 39 << 91 */ + { { 0x340a4349f8bd2bbdL,0x1d902cd94868195dL,0x3d27bbf1e5fdb6f1L, + 0x7a5ab088124f9f1cL }, + { 0xc466ab06f7a09e03L,0x2f8a197731f2c123L,0xda355dc7041b6657L, + 0xcb840d128ece2a7cL } }, + /* 40 << 91 */ + { { 0xb600ad9f7db32675L,0x78fea13307a06f1bL,0x5d032269b31f6094L, + 0x07753ef583ec37aaL }, + { 0x03485aed9c0bea78L,0x41bb3989bc3f4524L,0x09403761697f726dL, + 0x6109beb3df394820L } }, + /* 41 << 91 */ + { { 0x804111ea3b6d1145L,0xb6271ea9a8582654L,0x619615e624e66562L, + 0xa2554945d7b6ad9cL }, + { 0xd9c4985e99bfe35fL,0x9770ccc07b51cdf6L,0x7c32701392881832L, + 0x8777d45f286b26d1L } }, + /* 42 << 91 */ + { { 0x9bbeda22d847999dL,0x03aa33b6c3525d32L,0x4b7b96d428a959a1L, + 0xbb3786e531e5d234L }, + { 0xaeb5d3ce6961f247L,0x20aa85af02f93d3fL,0x9cd1ad3dd7a7ae4fL, + 0xbf6688f0781adaa8L } }, + /* 43 << 91 */ + { { 0xb1b40e867469ceadL,0x1904c524309fca48L,0x9b7312af4b54bbc7L, + 0xbe24bf8f593affa2L }, + { 0xbe5e0790bd98764bL,0xa0f45f17a26e299eL,0x4af0d2c26b8fe4c7L, + 0xef170db18ae8a3e6L } }, + /* 44 << 91 */ + { { 0x0e8d61a029e0ccc1L,0xcd53e87e60ad36caL,0x328c6623c8173822L, + 0x7ee1767da496be55L }, + { 0x89f13259648945afL,0x9e45a5fd25c8009cL,0xaf2febd91f61ab8cL, + 0x43f6bc868a275385L } }, + /* 45 << 91 */ + { { 0x87792348f2142e79L,0x17d89259c6e6238aL,0x7536d2f64a839d9bL, + 0x1f428fce76a1fbdcL }, + { 0x1c1096010db06dfeL,0xbfc16bc150a3a3ccL,0xf9cbd9ec9b30f41bL, + 0x5b5da0d600138cceL } }, + /* 46 << 91 */ + { { 0xec1d0a4856ef96a7L,0xb47eb848982bf842L,0x66deae32ec3f700dL, + 0x4e43c42caa1181e0L }, + { 0xa1d72a31d1a4aa2aL,0x440d4668c004f3ceL,0x0d6a2d3b45fe8a7aL, + 0x820e52e2fb128365L } }, + /* 47 << 91 */ + { { 0x29ac5fcf25e51b09L,0x180cd2bf2023d159L,0xa9892171a1ebf90eL, + 0xf97c4c877c132181L }, + { 0x9f1dc724c03dbb7eL,0xae043765018cbbe4L,0xfb0b2a360767d153L, + 0xa8e2f4d6249cbaebL } }, + /* 48 << 91 */ + { { 0x172a5247d95ea168L,0x1758fada2970764aL,0xac803a511d978169L, + 0x299cfe2ede77e01bL }, + { 0x652a1e17b0a98927L,0x2e26e1d120014495L,0x7ae0af9f7175b56aL, + 0xc2e22a80d64b9f95L } }, + /* 49 << 91 */ + { { 0x4d0ff9fbd90a060aL,0x496a27dbbaf38085L,0x32305401da776bcfL, + 0xb8cdcef6725f209eL }, + { 0x61ba0f37436a0bbaL,0x263fa10876860049L,0x92beb98eda3542cfL, + 0xa2d4d14ad5849538L } }, + /* 50 << 91 */ + { { 0x989b9d6812e9a1bcL,0x61d9075c5f6e3268L,0x352c6aa999ace638L, + 0xde4e4a55920f43ffL }, + { 0xe5e4144ad673c017L,0x667417ae6f6e05eaL,0x613416aedcd1bd56L, + 0x5eb3620186693711L } }, + /* 51 << 91 */ + { { 0x2d7bc5043a1aa914L,0x175a129976dc5975L,0xe900e0f23fc8125cL, + 0x569ef68c11198875L }, + { 0x9012db6363a113b4L,0xe3bd3f5698835766L,0xa5c94a5276412deaL, + 0xad9e2a09aa735e5cL } }, + /* 52 << 91 */ + { { 0x405a984c508b65e9L,0xbde4a1d16df1a0d1L,0x1a9433a1dfba80daL, + 0xe9192ff99440ad2eL }, + { 0x9f6496965099fe92L,0x25ddb65c0b27a54aL,0x178279ddc590da61L, + 0x5479a999fbde681aL } }, + /* 53 << 91 */ + { { 0xd0e84e05013fe162L,0xbe11dc92632d471bL,0xdf0b0c45fc0e089fL, + 0x04fb15b04c144025L }, + { 0xa61d5fc213c99927L,0xa033e9e03de2eb35L,0xf8185d5cb8dacbb4L, + 0x9a88e2658644549dL } }, + /* 54 << 91 */ + { { 0xf717af6254671ff6L,0x4bd4241b5fa58603L,0x06fba40be67773c0L, + 0xc1d933d26a2847e9L }, + { 0xf4f5acf3689e2c70L,0x92aab0e746bafd31L,0x798d76aa3473f6e5L, + 0xcc6641db93141934L } }, + /* 55 << 91 */ + { { 0xcae27757d31e535eL,0x04cc43b687c2ee11L,0x8d1f96752e029ffaL, + 0xc2150672e4cc7a2cL }, + { 0x3b03c1e08d68b013L,0xa9d6816fedf298f3L,0x1bfbb529a2804464L, + 0x95a52fae5db22125L } }, + /* 56 << 91 */ + { { 0x55b321600e1cb64eL,0x004828f67e7fc9feL,0x13394b821bb0fb93L, + 0xb6293a2d35f1a920L }, + { 0xde35ef21d145d2d9L,0xbe6225b3bb8fa603L,0x00fc8f6b32cf252dL, + 0xa28e52e6117cf8c2L } }, + /* 57 << 91 */ + { { 0x9d1dc89b4c371e6dL,0xcebe067536ef0f28L,0x5de05d09a4292f81L, + 0xa8303593353e3083L }, + { 0xa1715b0a7e37a9bbL,0x8c56f61e2b8faec3L,0x5250743133c9b102L, + 0x0130cefca44431f0L } }, + /* 58 << 91 */ + { { 0x56039fa0bd865cfbL,0x4b03e578bc5f1dd7L,0x40edf2e4babe7224L, + 0xc752496d3a1988f6L }, + { 0xd1572d3b564beb6bL,0x0db1d11039a1c608L,0x568d193416f60126L, + 0x05ae9668f354af33L } }, + /* 59 << 91 */ + { { 0x19de6d37c92544f2L,0xcc084353a35837d5L,0xcbb6869c1a514eceL, + 0xb633e7282e1d1066L }, + { 0xf15dd69f936c581cL,0x96e7b8ce7439c4f9L,0x5e676f482e448a5bL, + 0xb2ca7d5bfd916bbbL } }, + /* 60 << 91 */ + { { 0xd55a2541f5024025L,0x47bc5769e4c2d937L,0x7d31b92a0362189fL, + 0x83f3086eef7816f9L }, + { 0xf9f46d94b587579aL,0xec2d22d830e76c5fL,0x27d57461b000ffcfL, + 0xbb7e65f9364ffc2cL } }, + /* 61 << 91 */ + { { 0x7c7c94776652a220L,0x61618f89d696c981L,0x5021701d89effff3L, + 0xf2c8ff8e7c314163L }, + { 0x2da413ad8efb4d3eL,0x937b5adfce176d95L,0x22867d342a67d51cL, + 0x262b9b1018eb3ac9L } }, + /* 62 << 91 */ + { { 0x4e314fe4c43ff28bL,0x764766276a664e7aL,0x3e90e40bb7a565c2L, + 0x8588993ac1acf831L }, + { 0xd7b501d68f938829L,0x996627ee3edd7d4cL,0x37d44a6290cd34c7L, + 0xa8327499f3833e8dL } }, + /* 63 << 91 */ + { { 0x2e18917d4bf50353L,0x85dd726b556765fbL,0x54fe65d693d5ab66L, + 0x3ddbaced915c25feL }, + { 0xa799d9a412f22e85L,0xe2a248676d06f6bcL,0xf4f1ee5643ca1637L, + 0xfda2828b61ece30aL } }, + /* 64 << 91 */ + { { 0x758c1a3ea2dee7a6L,0xdcde2f3c734b2284L,0xaba445d24eaba6adL, + 0x35aaf66876cee0a7L }, + { 0x7e0b04a9e5aa049aL,0xe74083ad91103e84L,0xbeb183ce40afecc3L, + 0x6b89de9fea043f7aL } }, + /* 0 << 98 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 98 */ + { { 0x0e299d23fe67ba66L,0x9145076093cf2f34L,0xf45b5ea997fcf913L, + 0x5be008438bd7dddaL }, + { 0x358c3e05d53ff04dL,0xbf7ccdc35de91ef7L,0xad684dbfb69ec1a0L, + 0x367e7cf2801fd997L } }, + /* 2 << 98 */ + { { 0x0ca1f3b7b0dc8595L,0x27de46089f1d9f2eL,0x1af3bf39badd82a7L, + 0x79356a7965862448L }, + { 0xc0602345f5f9a052L,0x1a8b0f89139a42f9L,0xb53eee42844d40fcL, + 0x93b0bfe54e5b6368L } }, + /* 3 << 98 */ + { { 0x5434dd02c024789cL,0x90dca9ea41b57bfcL,0x8aa898e2243398dfL, + 0xf607c834894a94bbL }, + { 0xbb07be97c2c99b76L,0x6576ba6718c29302L,0x3d79efcce703a88cL, + 0xf259ced7b6a0d106L } }, + /* 4 << 98 */ + { { 0x0f893a5dc8de610bL,0xe8c515fb67e223ceL,0x7774bfa64ead6dc5L, + 0x89d20f95925c728fL }, + { 0x7a1e0966098583ceL,0xa2eedb9493f2a7d7L,0x1b2820974c304d4aL, + 0x0842e3dac077282dL } }, + /* 5 << 98 */ + { { 0xe4d972a33b9e2d7bL,0x7cc60b27c48218ffL,0x8fc7083884149d91L, + 0x5c04346f2f461eccL }, + { 0xebe9fdf2614650a9L,0x5e35b537c1f666acL,0x645613d188babc83L, + 0x88cace3ac5e1c93eL } }, + /* 6 << 98 */ + { { 0x209ca3753de92e23L,0xccb03cc85fbbb6e3L,0xccb90f03d7b1487eL, + 0xfa9c2a38c710941fL }, + { 0x756c38236724ceedL,0x3a902258192d0323L,0xb150e519ea5e038eL, + 0xdcba2865c7427591L } }, + /* 7 << 98 */ + { { 0xe549237f78890732L,0xc443bef953fcb4d9L,0x9884d8a6eb3480d6L, + 0x8a35b6a13048b186L }, + { 0xb4e4471665e9a90aL,0x45bf380d653006c0L,0x8f3f820d4fe9ae3bL, + 0x244a35a0979a3b71L } }, + /* 8 << 98 */ + { { 0xa1010e9d74cd06ffL,0x9c17c7dfaca3eeacL,0x74c86cd38063aa2bL, + 0x8595c4b3734614ffL }, + { 0xa3de00ca990f62ccL,0xd9bed213ca0c3be5L,0x7886078adf8ce9f5L, + 0xddb27ce35cd44444L } }, + /* 9 << 98 */ + { { 0xed374a6658926dddL,0x138b2d49908015b8L,0x886c6579de1f7ab8L, + 0x888b9aa0c3020b7aL }, + { 0xd3ec034e3a96e355L,0xba65b0b8f30fbe9aL,0x064c8e50ff21367aL, + 0x1f508ea40b04b46eL } }, + /* 10 << 98 */ + { { 0x98561a49747c866cL,0xbbb1e5fe0518a062L,0x20ff4e8becdc3608L, + 0x7f55cded20184027L }, + { 0x8d73ec95f38c85f0L,0x5b589fdf8bc3b8c3L,0xbe95dd980f12b66fL, + 0xf5bd1a090e338e01L } }, + /* 11 << 98 */ + { { 0x65163ae55e915918L,0x6158d6d986f8a46bL,0x8466b538eeebf99cL, + 0xca8761f6bca477efL }, + { 0xaf3449c29ebbc601L,0xef3b0f41e0c3ae2fL,0xaa6c577d5de63752L, + 0xe916660164682a51L } }, + /* 12 << 98 */ + { { 0x5a3097befc15aa1eL,0x40d12548b54b0745L,0x5bad4706519a5f12L, + 0xed03f717a439dee6L }, + { 0x0794bb6c4a02c499L,0xf725083dcffe71d2L,0x2cad75190f3adcafL, + 0x7f68ea1c43729310L } }, + /* 13 << 98 */ + { { 0xe747c8c7b7ffd977L,0xec104c3580761a22L,0x8395ebaf5a3ffb83L, + 0xfb3261f4e4b63db7L }, + { 0x53544960d883e544L,0x13520d708cc2eeb8L,0x08f6337bd3d65f99L, + 0x83997db2781cf95bL } }, + /* 14 << 98 */ + { { 0xce6ff1060dbd2c01L,0x4f8eea6b1f9ce934L,0x546f7c4b0e993921L, + 0x6236a3245e753fc7L }, + { 0x65a41f84a16022e9L,0x0c18d87843d1dbb2L,0x73c556402d4cef9cL, + 0xa042810870444c74L } }, + /* 15 << 98 */ + { { 0x68e4f15e9afdfb3cL,0x49a561435bdfb6dfL,0xa9bc1bd45f823d97L, + 0xbceb5970ea111c2aL }, + { 0x366b455fb269bbc4L,0x7cd85e1ee9bc5d62L,0xc743c41c4f18b086L, + 0xa4b4099095294fb9L } }, + /* 16 << 98 */ + { { 0x9c7c581d26ee8382L,0xcf17dcc5359d638eL,0xee8273abb728ae3dL, + 0x1d112926f821f047L }, + { 0x1149847750491a74L,0x687fa761fde0dfb9L,0x2c2580227ea435abL, + 0x6b8bdb9491ce7e3fL } }, + /* 17 << 98 */ + { { 0x4c5b5dc93bf834aaL,0x043718194f6c7e4bL,0xc284e00a3736bcadL, + 0x0d88111821ae8f8dL }, + { 0xf9cf0f82f48c8e33L,0xa11fd075a1bf40dbL,0xdceab0dedc2733e5L, + 0xc560a8b58e986bd7L } }, + /* 18 << 98 */ + { { 0x48dd1fe23929d097L,0x3885b29092f188f1L,0x0f2ae613da6fcdacL, + 0x9054303eb662a46cL }, + { 0xb6871e440738042aL,0x98e6a977bdaf6449L,0xd8bc0650d1c9df1bL, + 0xef3d645136e098f9L } }, + /* 19 << 98 */ + { { 0x03fbae82b6d72d28L,0x77ca9db1f5d84080L,0x8a112cffa58efc1cL, + 0x518d761cc564cb4aL }, + { 0x69b5740ef0d1b5ceL,0x717039cce9eb1785L,0x3fe29f9022f53382L, + 0x8e54ba566bc7c95cL } }, + /* 20 << 98 */ + { { 0x9c806d8af7f91d0fL,0x3b61b0f1a82a5728L,0x4640032d94d76754L, + 0x273eb5de47d834c6L }, + { 0x2988abf77b4e4d53L,0xb7ce66bfde401777L,0x9fba6b32715071b3L, + 0x82413c24ad3a1a98L } }, + /* 21 << 98 */ + { { 0x5b7fc8c4e0e8ad93L,0xb5679aee5fab868dL,0xb1f9d2fa2b3946f3L, + 0x458897dc5685b50aL }, + { 0x1e98c93089d0caf3L,0x39564c5f78642e92L,0x1b77729a0dbdaf18L, + 0xf9170722579e82e6L } }, + /* 22 << 98 */ + { { 0x680c0317e4515fa5L,0xf85cff84fb0c790fL,0xc7a82aab6d2e0765L, + 0x7446bca935c82b32L }, + { 0x5de607aa6d63184fL,0x7c1a46a8262803a6L,0xd218313daebe8035L, + 0x92113ffdc73c51f8L } }, + /* 23 << 98 */ + { { 0x4b38e08312e7e46cL,0x69d0a37a56126bd5L,0xfb3f324b73c07e04L, + 0xa0c22f678fda7267L }, + { 0x8f2c00514d2c7d8fL,0xbc45ced3cbe2cae5L,0xe1c6cf07a8f0f277L, + 0xbc3923121eb99a98L } }, + /* 24 << 98 */ + { { 0x75537b7e3cc8ac85L,0x8d725f57dd02753bL,0xfd05ff64b737df2fL, + 0x55fe8712f6d2531dL }, + { 0x57ce04a96ab6b01cL,0x69a02a897cd93724L,0x4f82ac35cf86699bL, + 0x8242d3ad9cb4b232L } }, + /* 25 << 98 */ + { { 0x713d0f65d62105e5L,0xbb222bfa2d29be61L,0xf2f9a79e6cfbef09L, + 0xfc24d8d3d5d6782fL }, + { 0x5db77085d4129967L,0xdb81c3ccdc3c2a43L,0x9d655fc005d8d9a3L, + 0x3f5d057a54298026L } }, + /* 26 << 98 */ + { { 0x1157f56d88c54694L,0xb26baba59b09573eL,0x2cab03b022adffd1L, + 0x60a412c8dd69f383L }, + { 0xed76e98b54b25039L,0xd4ee67d3687e714dL,0x877396487b00b594L, + 0xce419775c9ef709bL } }, + /* 27 << 98 */ + { { 0x40f76f851c203a40L,0x30d352d6eafd8f91L,0xaf196d3d95578dd2L, + 0xea4bb3d777cc3f3dL }, + { 0x42a5bd03b98e782bL,0xac958c400624920dL,0xb838134cfc56fcc8L, + 0x86ec4ccf89572e5eL } }, + /* 28 << 98 */ + { { 0x69c435269be47be0L,0x323b7dd8cb28fea1L,0xfa5538ba3a6c67e5L, + 0xef921d701d378e46L }, + { 0xf92961fc3c4b880eL,0x3f6f914e98940a67L,0xa990eb0afef0ff39L, + 0xa6c2920ff0eeff9cL } }, + /* 29 << 98 */ + { { 0xca80416651b8d9a3L,0x42531bc90ffb0db1L,0x72ce4718aa82e7ceL, + 0x6e199913df574741L }, + { 0xd5f1b13dd5d36946L,0x8255dc65f68f0194L,0xdc9df4cd8710d230L, + 0x3453c20f138c1988L } }, + /* 30 << 98 */ + { { 0x9af98dc089a6ef01L,0x4dbcc3f09857df85L,0x348056015c1ad924L, + 0x40448da5d0493046L }, + { 0xf629926d4ee343e2L,0x6343f1bd90e8a301L,0xefc9349140815b3fL, + 0xf882a423de8f66fbL } }, + /* 31 << 98 */ + { { 0x3a12d5f4e7db9f57L,0x7dfba38a3c384c27L,0x7a904bfd6fc660b1L, + 0xeb6c5db32773b21cL }, + { 0xc350ee661cdfe049L,0x9baac0ce44540f29L,0xbc57b6aba5ec6aadL, + 0x167ce8c30a7c1baaL } }, + /* 32 << 98 */ + { { 0xb23a03a553fb2b56L,0x6ce141e74e057f78L,0x796525c389e490d9L, + 0x0bc95725a31a7e75L }, + { 0x1ec567911220fd06L,0x716e3a3c408b0bd6L,0x31cd6bf7e8ebeba9L, + 0xa7326ca6bee6b670L } }, + /* 33 << 98 */ + { { 0x3d9f851ccd090c43L,0x561e8f13f12c3988L,0x50490b6a904b7be4L, + 0x61690ce10410737bL }, + { 0x299e9a370f009052L,0x258758f0f026092eL,0x9fa255f3fdfcdc0fL, + 0xdbc9fb1fc0e1bcd2L } }, + /* 34 << 98 */ + { { 0x35f9dd6e24651840L,0xdca45a84a5c59abcL,0x103d396fecca4938L, + 0x4532da0ab97b3f29L }, + { 0xc4135ea51999a6bfL,0x3aa9505a5e6bf2eeL,0xf77cef063f5be093L, + 0x97d1a0f8a943152eL } }, + /* 35 << 98 */ + { { 0x2cb0ebba2e1c21ddL,0xf41b29fc2c6797c4L,0xc6e17321b300101fL, + 0x4422b0e9d0d79a89L }, + { 0x49e4901c92f1bfc4L,0x06ab1f8fe1e10ed9L,0x84d35577db2926b8L, + 0xca349d39356e8ec2L } }, + /* 36 << 98 */ + { { 0x70b63d32343bf1a9L,0x8fd3bd2837d1a6b1L,0x0454879c316865b4L, + 0xee959ff6c458efa2L }, + { 0x0461dcf89706dc3fL,0x737db0e2164e4b2eL,0x092626802f8843c8L, + 0x54498bbc7745e6f6L } }, + /* 37 << 98 */ + { { 0x359473faa29e24afL,0xfcc3c45470aa87a1L,0xfd2c4bf500573aceL, + 0xb65b514e28dd1965L }, + { 0xe46ae7cf2193e393L,0x60e9a4e1f5444d97L,0xe7594e9600ff38edL, + 0x43d84d2f0a0e0f02L } }, + /* 38 << 98 */ + { { 0x8b6db141ee398a21L,0xb88a56aee3bcc5beL,0x0a1aa52f373460eaL, + 0x20da1a56160bb19bL }, + { 0xfb54999d65bf0384L,0x71a14d245d5a180eL,0xbc44db7b21737b04L, + 0xd84fcb1801dd8e92L } }, + /* 39 << 98 */ + { { 0x80de937bfa44b479L,0x535054995c98fd4fL,0x1edb12ab28f08727L, + 0x4c58b582a5f3ef53L }, + { 0xbfb236d88327f246L,0xc3a3bfaa4d7df320L,0xecd96c59b96024f2L, + 0xfc293a537f4e0433L } }, + /* 40 << 98 */ + { { 0x5341352b5acf6e10L,0xc50343fdafe652c3L,0x4af3792d18577a7fL, + 0xe1a4c617af16823dL }, + { 0x9b26d0cd33425d0aL,0x306399ed9b7bc47fL,0x2a792f33706bb20bL, + 0x3121961498111055L } }, + /* 41 << 98 */ + { { 0x864ec06487f5d28bL,0x11392d91962277fdL,0xb5aa7942bb6aed5fL, + 0x080094dc47e799d9L }, + { 0x4afa588c208ba19bL,0xd3e7570f8512f284L,0xcbae64e602f5799aL, + 0xdeebe7ef514b9492L } }, + /* 42 << 98 */ + { { 0x30300f98e5c298ffL,0x17f561be3678361fL,0xf52ff31298cb9a16L, + 0x6233c3bc5562d490L }, + { 0x7bfa15a192e3a2cbL,0x961bcfd1e6365119L,0x3bdd29bf2c8c53b1L, + 0x739704df822844baL } }, + /* 43 << 98 */ + { { 0x7dacfb587e7b754bL,0x23360791a806c9b9L,0xe7eb88c923504452L, + 0x2983e996852c1783L }, + { 0xdd4ae529958d881dL,0x026bae03262c7b3cL,0x3a6f9193960b52d1L, + 0xd0980f9092696cfbL } }, + /* 44 << 98 */ + { { 0x4c1f428cd5f30851L,0x94dfed272a4f6630L,0x4df53772fc5d48a4L, + 0xdd2d5a2f933260ceL }, + { 0x574115bdd44cc7a5L,0x4ba6b20dbd12533aL,0x30e93cb8243057c9L, + 0x794c486a14de320eL } }, + /* 45 << 98 */ + { { 0xe925d4cef21496e4L,0xf951d198ec696331L,0x9810e2de3e8d812fL, + 0xd0a47259389294abL }, + { 0x513ba2b50e3bab66L,0x462caff5abad306fL,0xe2dc6d59af04c49eL, + 0x1aeb8750e0b84b0bL } }, + /* 46 << 98 */ + { { 0xc034f12f2f7d0ca2L,0x6d2e8128e06acf2fL,0x801f4f8321facc2fL, + 0xa1170c03f40ef607L }, + { 0xfe0a1d4f7805a99cL,0xbde56a36cc26aba5L,0x5b1629d035531f40L, + 0xac212c2b9afa6108L } }, + /* 47 << 98 */ + { { 0x30a06bf315697be5L,0x6f0545dc2c63c7c1L,0x5d8cb8427ccdadafL, + 0xd52e379bac7015bbL }, + { 0xc4f56147f462c23eL,0xd44a429846bc24b0L,0xbc73d23ae2856d4fL, + 0x61cedd8c0832bcdfL } }, + /* 48 << 98 */ + { { 0x6095355699f241d7L,0xee4adbd7001a349dL,0x0b35bf6aaa89e491L, + 0x7f0076f4136f7546L }, + { 0xd19a18ba9264da3dL,0x6eb2d2cd62a7a28bL,0xcdba941f8761c971L, + 0x1550518ba3be4a5dL } }, + /* 49 << 98 */ + { { 0xd0e8e2f057d0b70cL,0xeea8612ecd133ba3L,0x814670f044416aecL, + 0x424db6c330775061L }, + { 0xd96039d116213fd1L,0xc61e7fa518a3478fL,0xa805bdcccb0c5021L, + 0xbdd6f3a80cc616ddL } }, + /* 50 << 98 */ + { { 0x060096675d97f7e2L,0x31db0fc1af0bf4b6L,0x23680ed45491627aL, + 0xb99a3c667d741fb1L }, + { 0xe9bb5f5536b1ff92L,0x29738577512b388dL,0xdb8a2ce750fcf263L, + 0x385346d46c4f7b47L } }, + /* 51 << 98 */ + { { 0xbe86c5ef31631f9eL,0xbf91da2103a57a29L,0xc3b1f7967b23f821L, + 0x0f7d00d2770db354L }, + { 0x8ffc6c3bd8fe79daL,0xcc5e8c40d525c996L,0x4640991dcfff632aL, + 0x64d97e8c67112528L } }, + /* 52 << 98 */ + { { 0xc232d97302f1cd1eL,0xce87eacb1dd212a4L,0x6e4c8c73e69802f7L, + 0x12ef02901fffddbdL }, + { 0x941ec74e1bcea6e2L,0xd0b540243cb92cbbL,0x809fb9d47e8f9d05L, + 0x3bf16159f2992aaeL } }, + /* 53 << 98 */ + { { 0xad40f279f8a7a838L,0x11aea63105615660L,0xbf52e6f1a01f6fa1L, + 0xef0469953dc2aec9L }, + { 0x785dbec9d8080711L,0xe1aec60a9fdedf76L,0xece797b5fa21c126L, + 0xc66e898f05e52732L } }, + /* 54 << 98 */ + { { 0x39bb69c408811fdbL,0x8bfe1ef82fc7f082L,0xc8e7a393174f4138L, + 0xfba8ad1dd58d1f98L }, + { 0xbc21d0cebfd2fd5bL,0x0b839a826ee60d61L,0xaacf7658afd22253L, + 0xb526bed8aae396b3L } }, + /* 55 << 98 */ + { { 0xccc1bbc238564464L,0x9e3ff9478c45bc73L,0xcde9bca358188a78L, + 0x138b8ee0d73bf8f7L }, + { 0x5c7e234c4123c489L,0x66e69368fa643297L,0x0629eeee39a15fa3L, + 0x95fab881a9e2a927L } }, + /* 56 << 98 */ + { { 0xb2497007eafbb1e1L,0xd75c9ce6e75b7a93L,0x3558352defb68d78L, + 0xa2f26699223f6396L }, + { 0xeb911ecfe469b17aL,0x62545779e72d3ec2L,0x8ea47de782cb113fL, + 0xebe4b0864e1fa98dL } }, + /* 57 << 98 */ + { { 0xec2d5ed78cdfedb1L,0xa535c077fe211a74L,0x9678109b11d244c5L, + 0xf17c8bfbbe299a76L }, + { 0xb651412efb11fbc4L,0xea0b548294ab3f65L,0xd8dffd950cf78243L, + 0x2e719e57ce0361d4L } }, + /* 58 << 98 */ + { { 0x9007f085304ddc5bL,0x095e8c6d4daba2eaL,0x5a33cdb43f9d28a9L, + 0x85b95cd8e2283003L }, + { 0xbcd6c819b9744733L,0x29c5f538fc7f5783L,0x6c49b2fad59038e4L, + 0x68349cc13bbe1018L } }, + /* 59 << 98 */ + { { 0xcc490c1d21830ee5L,0x36f9c4eee9bfa297L,0x58fd729448de1a94L, + 0xaadb13a84e8f2cdcL }, + { 0x515eaaa081313dbaL,0xc76bb468c2152dd8L,0x357f8d75a653dbf8L, + 0xe4d8c4d1b14ac143L } }, + /* 60 << 98 */ + { { 0xbdb8e675b055cb40L,0x898f8e7b977b5167L,0xecc65651b82fb863L, + 0x565448146d88f01fL }, + { 0xb0928e95263a75a9L,0xcfb6836f1a22fcdaL,0x651d14db3f3bd37cL, + 0x1d3837fbb6ad4664L } }, + /* 61 << 98 */ + { { 0x7c5fb538ff4f94abL,0x7243c7126d7fb8f2L,0xef13d60ca85c5287L, + 0x18cfb7c74bb8dd1bL }, + { 0x82f9bfe672908219L,0x35c4592b9d5144abL,0x52734f379cf4b42fL, + 0x6bac55e78c60ddc4L } }, + /* 62 << 98 */ + { { 0xb5cd811e94dea0f6L,0x259ecae4e18cc1a3L,0x6a0e836e15e660f8L, + 0x6c639ea60e02bff2L }, + { 0x8721b8cb7e1026fdL,0x9e73b50b63261942L,0xb8c7097477f01da3L, + 0x1839e6a68268f57fL } }, + /* 63 << 98 */ + { { 0x571b94155150b805L,0x1892389ef92c7097L,0x8d69c18e4a084b95L, + 0x7014c512be5b495cL }, + { 0x4780db361b07523cL,0x2f6219ce2c1c64faL,0xc38b81b0602c105aL, + 0xab4f4f205dc8e360L } }, + /* 64 << 98 */ + { { 0x20d3c982cf7d62d2L,0x1f36e29d23ba8150L,0x48ae0bf092763f9eL, + 0x7a527e6b1d3a7007L }, + { 0xb4a89097581a85e3L,0x1f1a520fdc158be5L,0xf98db37d167d726eL, + 0x8802786e1113e862L } }, + /* 0 << 105 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 105 */ + { { 0xefb2149e36f09ab0L,0x03f163ca4a10bb5bL,0xd029704506e20998L, + 0x56f0af001b5a3babL }, + { 0x7af4cfec70880e0dL,0x7332a66fbe3d913fL,0x32e6c84a7eceb4bdL, + 0xedc4a79a9c228f55L } }, + /* 2 << 105 */ + { { 0xc37c7dd0c55c4496L,0xa6a9635725bbabd2L,0x5b7e63f2add7f363L, + 0x9dce37822e73f1dfL }, + { 0xe1e5a16ab2b91f71L,0xe44898235ba0163cL,0xf2759c32f6e515adL, + 0xa5e2f1f88615eecfL } }, + /* 3 << 105 */ + { { 0x74519be7abded551L,0x03d358b8c8b74410L,0x4d00b10b0e10d9a9L, + 0x6392b0b128da52b7L }, + { 0x6744a2980b75c904L,0xc305b0aea8f7f96cL,0x042e421d182cf932L, + 0xf6fc5d509e4636caL } }, + /* 4 << 105 */ + { { 0x795847c9d64cc78cL,0x6c50621b9b6cb27bL,0x07099bf8df8022abL, + 0x48f862ebc04eda1dL }, + { 0xd12732ede1603c16L,0x19a80e0f5c9a9450L,0xe2257f54b429b4fcL, + 0x66d3b2c645460515L } }, + /* 5 << 105 */ + { { 0x6ca4f87e822e37beL,0x73f237b4253bda4eL,0xf747f3a241190aebL, + 0xf06fa36f804cf284L }, + { 0x0a6bbb6efc621c12L,0x5d624b6440b80ec6L,0x4b0724257ba556f3L, + 0x7fa0c3543e2d20a8L } }, + /* 6 << 105 */ + { { 0xe921fa31e3229d41L,0xa929c65294531bd4L,0x84156027a6d38209L, + 0xf3d69f736bdb97bdL }, + { 0x8906d19a16833631L,0x68a34c2e03d51be3L,0xcb59583b0e511cd8L, + 0x99ce6bfdfdc132a8L } }, + /* 7 << 105 */ + { { 0x3facdaaaffcdb463L,0x658bbc1a34a38b08L,0x12a801f8f1a9078dL, + 0x1567bcf96ab855deL }, + { 0xe08498e03572359bL,0xcf0353e58659e68bL,0xbb86e9c87d23807cL, + 0xbc08728d2198e8a2L } }, + /* 8 << 105 */ + { { 0x8de2b7bc453cadd6L,0x203900a7bc0bc1f8L,0xbcd86e47a6abd3afL, + 0x911cac128502effbL }, + { 0x2d550242ec965469L,0x0e9f769229e0017eL,0x633f078f65979885L, + 0xfb87d4494cf751efL } }, + /* 9 << 105 */ + { { 0xe1790e4bfc25419aL,0x364672034bff3cfdL,0xc8db638625b6e83fL, + 0x6cc69f236cad6fd2L }, + { 0x0219e45a6bc68bb9L,0xe43d79b6297f7334L,0x7d445368465dc97cL, + 0x4b9eea322a0b949aL } }, + /* 10 << 105 */ + { { 0x1b96c6ba6102d021L,0xeaafac782f4461eaL,0xd4b85c41c49f19a8L, + 0x275c28e4cf538875L }, + { 0x35451a9ddd2e54e0L,0x6991adb50605618bL,0x5b8b4bcd7b36cd24L, + 0x372a4f8c56f37216L } }, + /* 11 << 105 */ + { { 0xc890bd73a6a5da60L,0x6f083da0dc4c9ff0L,0xf4e14d94f0536e57L, + 0xf9ee1edaaaec8243L }, + { 0x571241ec8bdcf8e7L,0xa5db82710b041e26L,0x9a0b9a99e3fff040L, + 0xcaaf21dd7c271202L } }, + /* 12 << 105 */ + { { 0xb4e2b2e14f0dd2e8L,0xe77e7c4f0a377ac7L,0x69202c3f0d7a2198L, + 0xf759b7ff28200eb8L }, + { 0xc87526eddcfe314eL,0xeb84c52453d5cf99L,0xb1b52ace515138b6L, + 0x5aa7ff8c23fca3f4L } }, + /* 13 << 105 */ + { { 0xff0b13c3b9791a26L,0x960022dacdd58b16L,0xdbd55c9257aad2deL, + 0x3baaaaa3f30fe619L }, + { 0x9a4b23460d881efdL,0x506416c046325e2aL,0x91381e76035c18d4L, + 0xb3bb68bef27817b0L } }, + /* 14 << 105 */ + { { 0x15bfb8bf5116f937L,0x7c64a586c1268943L,0x71e25cc38419a2c8L, + 0x9fd6b0c48335f463L }, + { 0x4bf0ba3ce8ee0e0eL,0x6f6fba60298c21faL,0x57d57b39ae66bee0L, + 0x292d513022672544L } }, + /* 15 << 105 */ + { { 0xf451105dbab093b3L,0x012f59b902839986L,0x8a9158023474a89cL, + 0x048c919c2de03e97L }, + { 0xc476a2b591071cd5L,0x791ed89a034970a5L,0x89bd9042e1b7994bL, + 0x8eaf5179a1057ffdL } }, + /* 16 << 105 */ + { { 0x6066e2a2d551ee10L,0x87a8f1d8727e09a6L,0x00d08bab2c01148dL, + 0x6da8e4f1424f33feL }, + { 0x466d17f0cf9a4e71L,0xff5020103bf5cb19L,0xdccf97d8d062ecc0L, + 0x80c0d9af81d80ac4L } }, + /* 17 << 105 */ + { { 0xe87771d8033f2876L,0xb0186ec67d5cc3dbL,0x58e8bb803bc9bc1dL, + 0x4d1395cc6f6ef60eL }, + { 0xa73c62d6186244a0L,0x918e5f23110a5b53L,0xed4878ca741b7eabL, + 0x3038d71adbe03e51L } }, + /* 18 << 105 */ + { { 0x840204b7a93c3246L,0x21ab6069a0b9b4cdL,0xf5fa6e2bb1d64218L, + 0x1de6ad0ef3d56191L }, + { 0x570aaa88ff1929c7L,0xc6df4c6b640e87b5L,0xde8a74f2c65f0cccL, + 0x8b972fd5e6f6cc01L } }, + /* 19 << 105 */ + { { 0x3fff36b60b846531L,0xba7e45e610a5e475L,0x84a1d10e4145b6c5L, + 0xf1f7f91a5e046d9dL }, + { 0x0317a69244de90d7L,0x951a1d4af199c15eL,0x91f78046c9d73debL, + 0x74c82828fab8224fL } }, + /* 20 << 105 */ + { { 0xaa6778fce7560b90L,0xb4073e61a7e824ceL,0xff0d693cd642eba8L, + 0x7ce2e57a5dccef38L }, + { 0x89c2c7891df1ad46L,0x83a06922098346fdL,0x2d715d72da2fc177L, + 0x7b6dd71d85b6cf1dL } }, + /* 21 << 105 */ + { { 0xc60a6d0a73fa9cb0L,0xedd3992e328bf5a9L,0xc380ddd0832c8c82L, + 0xd182d410a2a0bf50L }, + { 0x7d9d7438d9a528dbL,0xe8b1a0e9caf53994L,0xddd6e5fe0e19987cL, + 0xacb8df03190b059dL } }, + /* 22 << 105 */ + { { 0x53703a328300129fL,0x1f63766268c43bfdL,0xbcbd191300e54051L, + 0x812fcc627bf5a8c5L }, + { 0x3f969d5f29fb85daL,0x72f4e00a694759e8L,0x426b6e52790726b7L, + 0x617bbc873bdbb209L } }, + /* 23 << 105 */ + { { 0x511f8bb997aee317L,0x812a4096e81536a8L,0x137dfe593ac09b9bL, + 0x0682238fba8c9a7aL }, + { 0x7072ead6aeccb4bdL,0x6a34e9aa692ba633L,0xc82eaec26fff9d33L, + 0xfb7535121d4d2b62L } }, + /* 24 << 105 */ + { { 0x1a0445ff1d7aadabL,0x65d38260d5f6a67cL,0x6e62fb0891cfb26fL, + 0xef1e0fa55c7d91d6L }, + { 0x47e7c7ba33db72cdL,0x017cbc09fa7c74b2L,0x3c931590f50a503cL, + 0xcac54f60616baa42L } }, + /* 25 << 105 */ + { { 0x9b6cd380b2369f0fL,0x97d3a70d23c76151L,0x5f9dd6fc9862a9c6L, + 0x044c4ab212312f51L }, + { 0x035ea0fd834a2ddcL,0x49e6b862cc7b826dL,0xb03d688362fce490L, + 0x62f2497ab37e36e9L } }, + /* 26 << 105 */ + { { 0x04b005b6c6458293L,0x36bb5276e8d10af7L,0xacf2dc138ee617b8L, + 0x470d2d35b004b3d4L }, + { 0x06790832feeb1b77L,0x2bb75c3985657f9cL,0xd70bd4edc0f60004L, + 0xfe797ecc219b018bL } }, + /* 27 << 105 */ + { { 0x9b5bec2a753aebccL,0xdaf9f3dcc939eca5L,0xd6bc6833d095ad09L, + 0x98abdd51daa4d2fcL }, + { 0xd9840a318d168be5L,0xcf7c10e02325a23cL,0xa5c02aa07e6ecfafL, + 0x2462e7e6b5bfdf18L } }, + /* 28 << 105 */ + { { 0xab2d8a8ba0cc3f12L,0x68dd485dbc672a29L,0x72039752596f2cd3L, + 0x5d3eea67a0cf3d8dL }, + { 0x810a1a81e6602671L,0x8f144a4014026c0cL,0xbc753a6d76b50f85L, + 0xc4dc21e8645cd4a4L } }, + /* 29 << 105 */ + { { 0xc5262dea521d0378L,0x802b8e0e05011c6fL,0x1ba19cbb0b4c19eaL, + 0x21db64b5ebf0aaecL }, + { 0x1f394ee970342f9dL,0x93a10aee1bc44a14L,0xa7eed31b3efd0baaL, + 0x6e7c824e1d154e65L } }, + /* 30 << 105 */ + { { 0xee23fa819966e7eeL,0x64ec4aa805b7920dL,0x2d44462d2d90aad4L, + 0xf44dd195df277ad5L }, + { 0x8d6471f1bb46b6a1L,0x1e65d313fd885090L,0x33a800f513a977b4L, + 0xaca9d7210797e1efL } }, + /* 31 << 105 */ + { { 0x9a5a85a0fcff6a17L,0x9970a3f31eca7ceeL,0xbb9f0d6bc9504be3L, + 0xe0c504beadd24ee2L }, + { 0x7e09d95677fcc2f4L,0xef1a522765bb5fc4L,0x145d4fb18b9286aaL, + 0x66fd0c5d6649028bL } }, + /* 32 << 105 */ + { { 0x98857ceb1bf4581cL,0xe635e186aca7b166L,0x278ddd22659722acL, + 0xa0903c4c1db68007L }, + { 0x366e458948f21402L,0x31b49c14b96abda2L,0x329c4b09e0403190L, + 0x97197ca3d29f43feL } }, + /* 33 << 105 */ + { { 0x8073dd1e274983d8L,0xda1a3bde55717c8fL,0xfd3d4da20361f9d1L, + 0x1332d0814c7de1ceL }, + { 0x9b7ef7a3aa6d0e10L,0x17db2e73f54f1c4aL,0xaf3dffae4cd35567L, + 0xaaa2f406e56f4e71L } }, + /* 34 << 105 */ + { { 0x8966759e7ace3fc7L,0x9594eacf45a8d8c6L,0x8de3bd8b91834e0eL, + 0xafe4ca53548c0421L }, + { 0xfdd7e856e6ee81c6L,0x8f671beb6b891a3aL,0xf7a58f2bfae63829L, + 0x9ab186fb9c11ac9fL } }, + /* 35 << 105 */ + { { 0x8d6eb36910b5be76L,0x046b7739fb040bcdL,0xccb4529fcb73de88L, + 0x1df0fefccf26be03L }, + { 0xad7757a6bcfcd027L,0xa8786c75bb3165caL,0xe9db1e347e99a4d9L, + 0x99ee86dfb06c504bL } }, + /* 36 << 105 */ + { { 0x5b7c2dddc15c9f0aL,0xdf87a7344295989eL,0x59ece47c03d08fdaL, + 0xb074d3ddad5fc702L }, + { 0x2040790351a03776L,0x2bb1f77b2a608007L,0x25c58f4fe1153185L, + 0xe6df62f6766e6447L } }, + /* 37 << 105 */ + { { 0xefb3d1beed51275aL,0x5de47dc72f0f483fL,0x7932d98e97c2bedfL, + 0xd5c119270219f8a1L }, + { 0x9d751200a73a294eL,0x5f88434a9dc20172L,0xd28d9fd3a26f506aL, + 0xa890cd319d1dcd48L } }, + /* 38 << 105 */ + { { 0x0aebaec170f4d3b4L,0xfd1a13690ffc8d00L,0xb9d9c24057d57838L, + 0x45929d2668bac361L }, + { 0x5a2cd06025b15ca6L,0x4b3c83e16e474446L,0x1aac7578ee1e5134L, + 0xa418f5d6c91e2f41L } }, + /* 39 << 105 */ + { { 0x6936fc8a213ed68bL,0x860ae7ed510a5224L,0x63660335def09b53L, + 0x641b2897cd79c98dL }, + { 0x29bd38e101110f35L,0x79c26f42648b1937L,0x64dae5199d9164f4L, + 0xd85a23100265c273L } }, + /* 40 << 105 */ + { { 0x7173dd5d4b07e2b1L,0xd144c4cb8d9ea221L,0xe8b04ea41105ab14L, + 0x92dda542fe80d8f1L }, + { 0xe9982fa8cf03dce6L,0x8b5ea9651a22cffcL,0xf7f4ea7f3fad88c4L, + 0x62db773e6a5ba95cL } }, + /* 41 << 105 */ + { { 0xd20f02fb93f24567L,0xfd46c69a315257caL,0x0ac74cc78bcab987L, + 0x46f31c015ceca2f5L }, + { 0x40aedb59888b219eL,0xe50ecc37e1fccd02L,0x1bcd9dad911f816cL, + 0x583cc1ec8db9b00cL } }, + /* 42 << 105 */ + { { 0xf3cd2e66a483bf11L,0xfa08a6f5b1b2c169L,0xf375e2454be9fa28L, + 0x99a7ffec5b6d011fL }, + { 0x6a3ebddbc4ae62daL,0x6cea00ae374aef5dL,0xab5fb98d9d4d05bcL, + 0x7cba1423d560f252L } }, + /* 43 << 105 */ + { { 0x49b2cc21208490deL,0x1ca66ec3bcfb2879L,0x7f1166b71b6fb16fL, + 0xfff63e0865fe5db3L }, + { 0xb8345abe8b2610beL,0xb732ed8039de3df4L,0x0e24ed50211c32b4L, + 0xd10d8a69848ff27dL } }, + /* 44 << 105 */ + { { 0xc1074398ed4de248L,0xd7cedace10488927L,0xa4aa6bf885673e13L, + 0xb46bae916daf30afL }, + { 0x07088472fcef7ad8L,0x61151608d4b35e97L,0xbcfe8f26dde29986L, + 0xeb84c4c7d5a34c79L } }, + /* 45 << 105 */ + { { 0xc1eec55c164e1214L,0x891be86da147bb03L,0x9fab4d100ba96835L, + 0xbf01e9b8a5c1ae9fL }, + { 0x6b4de139b186ebc0L,0xd5c74c2685b91bcaL,0x5086a99cc2d93854L, + 0xeed62a7ba7a9dfbcL } }, + /* 46 << 105 */ + { { 0x8778ed6f76b7618aL,0xbff750a503b66062L,0x4cb7be22b65186dbL, + 0x369dfbf0cc3a6d13L }, + { 0xc7dab26c7191a321L,0x9edac3f940ed718eL,0xbc142b36d0cfd183L, + 0xc8af82f67c991693L } }, + /* 47 << 105 */ + { { 0xb3d1e4d897ce0b2aL,0xe6d7c87fc3a55cdfL,0x35846b9568b81afeL, + 0x018d12afd3c239d8L }, + { 0x2b2c620801206e15L,0xe0e42453a3b882c6L,0x854470a3a50162d5L, + 0x081574787017a62aL } }, + /* 48 << 105 */ + { { 0x18bd3fb4820357c7L,0x992039ae6f1458adL,0x9a1df3c525b44aa1L, + 0x2d780357ed3d5281L }, + { 0x58cf7e4dc77ad4d4L,0xd49a7998f9df4fc4L,0x4465a8b51d71205eL, + 0xa0ee0ea6649254aaL } }, + /* 49 << 105 */ + { { 0x4b5eeecfab7bd771L,0x6c87307335c262b9L,0xdc5bd6483c9d61e7L, + 0x233d6d54321460d2L }, + { 0xd20c5626fc195bccL,0x2544595804d78b63L,0xe03fcb3d17ec8ef3L, + 0x54b690d146b8f781L } }, + /* 50 << 105 */ + { { 0x82fa2c8a21230646L,0xf51aabb9084f418cL,0xff4fbec11a30ba43L, + 0x6a5acf73743c9df7L }, + { 0x1da2b357d635b4d5L,0xc3de68ddecd5c1daL,0xa689080bd61af0ddL, + 0xdea5938ad665bf99L } }, + /* 51 << 105 */ + { { 0x0231d71afe637294L,0x01968aa6a5a81cd8L,0x11252d50048e63b5L, + 0xc446bc526ca007e9L }, + { 0xef8c50a696d6134bL,0x9361fbf59e09a05cL,0xf17f85a6dca3291aL, + 0xb178d548ff251a21L } }, + /* 52 << 105 */ + { { 0x87f6374ba4df3915L,0x566ce1bf2fd5d608L,0x425cba4d7de35102L, + 0x6b745f8f58c5d5e2L }, + { 0x88402af663122edfL,0x3190f9ed3b989a89L,0x4ad3d387ebba3156L, + 0xef385ad9c7c469a5L } }, + /* 53 << 105 */ + { { 0xb08281de3f642c29L,0x20be0888910ffb88L,0xf353dd4ad5292546L, + 0x3f1627de8377a262L }, + { 0xa5faa013eefcd638L,0x8f3bf62674cc77c3L,0x32618f65a348f55eL, + 0x5787c0dc9fefeb9eL } }, + /* 54 << 105 */ + { { 0xf1673aa2d9a23e44L,0x88dfa9934e10690dL,0x1ced1b362bf91108L, + 0x9193ceca3af48649L }, + { 0xfb34327d2d738fc5L,0x6697b037975fee6cL,0x2f485da0c04079a5L, + 0x2cdf57352feaa1acL } }, + /* 55 << 105 */ + { { 0x76944420bd55659eL,0x7973e32b4376090cL,0x86bb4fe1163b591aL, + 0x10441aedc196f0caL }, + { 0x3b431f4a045ad915L,0x6c11b437a4afacb1L,0x30b0c7db71fdbbd8L, + 0xb642931feda65acdL } }, + /* 56 << 105 */ + { { 0x4baae6e89c92b235L,0xa73bbd0e6b3993a1L,0xd06d60ec693dd031L, + 0x03cab91b7156881cL }, + { 0xd615862f1db3574bL,0x485b018564bb061aL,0x27434988a0181e06L, + 0x2cd61ad4c1c0c757L } }, + /* 57 << 105 */ + { { 0x3effed5a2ff9f403L,0x8dc98d8b62239029L,0x2206021e1f17b70dL, + 0xafbec0cabf510015L }, + { 0x9fed716480130dfaL,0x306dc2b58a02dcf5L,0x48f06620feb10fc0L, + 0x78d1e1d55a57cf51L } }, + /* 58 << 105 */ + { { 0xadef8c5a192ef710L,0x88afbd4b3b7431f9L,0x7e1f740764250c9eL, + 0x6e31318db58bec07L }, + { 0xfd4fc4b824f89b4eL,0x65a5dd8848c36a2aL,0x4f1eccfff024baa7L, + 0x22a21cf2cba94650L } }, + /* 59 << 105 */ + { { 0x95d29dee42a554f7L,0x828983a5002ec4baL,0x8112a1f78badb73dL, + 0x79ea8897a27c1839L }, + { 0x8969a5a7d065fd83L,0xf49af791b262a0bcL,0xfcdea8b6af2b5127L, + 0x10e913e1564c2dbcL } }, + /* 60 << 105 */ + { { 0x51239d14bc21ef51L,0xe51c3ceb4ce57292L,0x795ff06847bbcc3bL, + 0x86b46e1ebd7e11e6L }, + { 0x0ea6ba2380041ef4L,0xd72fe5056262342eL,0x8abc6dfd31d294d4L, + 0xbbe017a21278c2c9L } }, + /* 61 << 105 */ + { { 0xb1fcfa09b389328aL,0x322fbc62d01771b5L,0x04c0d06360b045bfL, + 0xdb652edc10e52d01L }, + { 0x50ef932c03ec6627L,0xde1b3b2dc1ee50e3L,0x5ab7bdc5dc37a90dL, + 0xfea6721331e33a96L } }, + /* 62 << 105 */ + { { 0x6482b5cb4f2999aaL,0x38476cc6b8cbf0ddL,0x93ebfacb173405bbL, + 0x15cdafe7e52369ecL }, + { 0xd42d5ba4d935b7dbL,0x648b60041c99a4cdL,0x785101bda3b5545bL, + 0x4bf2c38a9dd67fafL } }, + /* 63 << 105 */ + { { 0xb1aadc634442449cL,0xe0e9921a33ad4fb8L,0x5c552313aa686d82L, + 0xdee635fa465d866cL }, + { 0xbc3c224a18ee6e8aL,0xeed748a6ed42e02fL,0xe70f930ad474cd08L, + 0x774ea6ecfff24adfL } }, + /* 64 << 105 */ + { { 0x03e2de1cf3480d4aL,0xf0d8edc7bc8acf1aL,0xf23e330368295a9cL, + 0xfadd5f68c546a97dL }, + { 0x895597ad96f8acb1L,0xbddd49d5671bdae2L,0x16fcd52821dd43f4L, + 0xa5a454126619141aL } }, + /* 0 << 112 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 112 */ + { { 0x8ce9b6bfc360e25aL,0xe6425195075a1a78L,0x9dc756a8481732f4L, + 0x83c0440f5432b57aL }, + { 0xc670b3f1d720281fL,0x2205910ed135e051L,0xded14b0edb052be7L, + 0x697b3d27c568ea39L } }, + /* 2 << 112 */ + { { 0x2e599b9afb3ff9edL,0x28c2e0ab17f6515cL,0x1cbee4fd474da449L, + 0x071279a44f364452L }, + { 0x97abff6601fbe855L,0x3ee394e85fda51c4L,0x190385f667597c0bL, + 0x6e9fccc6a27ee34bL } }, + /* 3 << 112 */ + { { 0x0b89de9314092ebbL,0xf17256bd428e240cL,0xcf89a7f393d2f064L, + 0x4f57841ee1ed3b14L }, + { 0x4ee14405e708d855L,0x856aae7203f1c3d0L,0xc8e5424fbdd7eed5L, + 0x3333e4ef73ab4270L } }, + /* 4 << 112 */ + { { 0x3bc77adedda492f8L,0xc11a3aea78297205L,0x5e89a3e734931b4cL, + 0x17512e2e9f5694bbL }, + { 0x5dc349f3177bf8b6L,0x232ea4ba08c7ff3eL,0x9c4f9d16f511145dL, + 0xccf109a333b379c3L } }, + /* 5 << 112 */ + { { 0xe75e7a88a1f25897L,0x7ac6961fa1b5d4d8L,0xe3e1077308f3ed5cL, + 0x208a54ec0a892dfbL }, + { 0xbe826e1978660710L,0x0cf70a97237df2c8L,0x418a7340ed704da5L, + 0xa3eeb9a908ca33fdL } }, + /* 6 << 112 */ + { { 0x49d96233169bca96L,0x04d286d42da6aafbL,0xc09606eca0c2fa94L, + 0x8869d0d523ff0fb3L }, + { 0xa99937e5d0150d65L,0xa92e2503240c14c9L,0x656bf945108e2d49L, + 0x152a733aa2f59e2bL } }, + /* 7 << 112 */ + { { 0xb4323d588434a920L,0xc0af8e93622103c5L,0x667518ef938dbf9aL, + 0xa184307383a9cdf2L }, + { 0x350a94aa5447ab80L,0xe5e5a325c75a3d61L,0x74ba507f68411a9eL, + 0x10581fc1594f70c5L } }, + /* 8 << 112 */ + { { 0x60e2857080eb24a9L,0x7bedfb4d488e0cfdL,0x721ebbd7c259cdb8L, + 0x0b0da855bc6390a9L }, + { 0x2b4d04dbde314c70L,0xcdbf1fbc6c32e846L,0x33833eabb162fc9eL, + 0x9939b48bb0dd3ab7L } }, + /* 9 << 112 */ + { { 0x5aaa98a7cb0c9c8cL,0x75105f3081c4375cL,0xceee50575ef1c90fL, + 0xb31e065fc23a17bfL }, + { 0x5364d275d4b6d45aL,0xd363f3ad62ec8996L,0xb5d212394391c65bL, + 0x84564765ebb41b47L } }, + /* 10 << 112 */ + { { 0x20d18ecc37107c78L,0xacff3b6b570c2a66L,0x22f975d99bd0d845L, + 0xef0a0c46ba178fa0L }, + { 0x1a41965176b6028eL,0xc49ec674248612d4L,0x5b6ac4f27338af55L, + 0x06145e627bee5a36L } }, + /* 11 << 112 */ + { { 0x33e95d07e75746b5L,0x1c1e1f6dc40c78beL,0x967833ef222ff8e2L, + 0x4bedcf6ab49180adL }, + { 0x6b37e9c13d7a4c8aL,0x2748887c6ddfe760L,0xf7055123aa3a5bbcL, + 0x954ff2257bbb8e74L } }, + /* 12 << 112 */ + { { 0xc42b8ab197c3dfb9L,0x55a549b0cf168154L,0xad6748e7c1b50692L, + 0x2775780f6fc5cbcbL }, + { 0x4eab80b8e1c9d7c8L,0x8c69dae13fdbcd56L,0x47e6b4fb9969eaceL, + 0x002f1085a705cb5aL } }, + /* 13 << 112 */ + { { 0x4e23ca446d3fea55L,0xb4ae9c86f4810568L,0x47bfb91b2a62f27dL, + 0x60deb4c9d9bac28cL }, + { 0xa892d8947de6c34cL,0x4ee682594494587dL,0x914ee14e1a3f8a5bL, + 0xbb113eaa28700385L } }, + /* 14 << 112 */ + { { 0x81ca03b92115b4c9L,0x7c163d388908cad1L,0xc912a118aa18179aL, + 0xe09ed750886e3081L }, + { 0xa676e3fa26f516caL,0x753cacf78e732f91L,0x51592aea833da8b4L, + 0xc626f42f4cbea8aaL } }, + /* 15 << 112 */ + { { 0xef9dc899a7b56eafL,0x00c0e52c34ef7316L,0x5b1e4e24fe818a86L, + 0x9d31e20dc538be47L }, + { 0x22eb932d3ed68974L,0xe44bbc087c4e87c4L,0x4121086e0dde9aefL, + 0x8e6b9cff134f4345L } }, + /* 16 << 112 */ + { { 0x96892c1f711b0eb9L,0xb905f2c8780ab954L,0xace26309a20792dbL, + 0xec8ac9b30684e126L }, + { 0x486ad8b6b40a2447L,0x60121fc19fe3fb24L,0x5626fccf1a8e3b3fL, + 0x4e5686226ad1f394L } }, + /* 17 << 112 */ + { { 0xda7aae0d196aa5a1L,0xe0df8c771041b5fbL,0x451465d926b318b7L, + 0xc29b6e557ab136e9L }, + { 0x2c2ab48b71148463L,0xb5738de364454a76L,0x54ccf9a05a03abe4L, + 0x377c02960427d58eL } }, + /* 18 << 112 */ + { { 0x73f5f0b92bb39c1fL,0x14373f2ce608d8c5L,0xdcbfd31400fbb805L, + 0xdf18fb2083afdcfbL }, + { 0x81a57f4242b3523fL,0xe958532d87f650fbL,0xaa8dc8b68b0a7d7cL, + 0x1b75dfb7150166beL } }, + /* 19 << 112 */ + { { 0x90e4f7c92d7d1413L,0x67e2d6b59834f597L,0x4fd4f4f9a808c3e8L, + 0xaf8237e0d5281ec1L }, + { 0x25ab5fdc84687ceeL,0xc5ded6b1a5b26c09L,0x8e4a5aecc8ea7650L, + 0x23b73e5c14cc417fL } }, + /* 20 << 112 */ + { { 0x2bfb43183037bf52L,0xb61e6db578c725d7L,0x8efd4060bbb3e5d7L, + 0x2e014701dbac488eL }, + { 0xac75cf9a360aa449L,0xb70cfd0579634d08L,0xa591536dfffb15efL, + 0xb2c37582d07c106cL } }, + /* 21 << 112 */ + { { 0xb4293fdcf50225f9L,0xc52e175cb0e12b03L,0xf649c3bad0a8bf64L, + 0x745a8fefeb8ae3c6L }, + { 0x30d7e5a358321bc3L,0xb1732be70bc4df48L,0x1f217993e9ea5058L, + 0xf7a71cde3e4fd745L } }, + /* 22 << 112 */ + { { 0x86cc533e894c5bbbL,0x6915c7d969d83082L,0xa6aa2d055815c244L, + 0xaeeee59249b22ce5L }, + { 0x89e39d1378135486L,0x3a275c1f16b76f2fL,0xdb6bcc1be036e8f5L, + 0x4df69b215e4709f5L } }, + /* 23 << 112 */ + { { 0xa188b2502d0f39aaL,0x622118bb15a85947L,0x2ebf520ffde0f4faL, + 0xa40e9f294860e539L }, + { 0x7b6a51eb22b57f0fL,0x849a33b97e80644aL,0x50e5d16f1cf095feL, + 0xd754b54eec55f002L } }, + /* 24 << 112 */ + { { 0x5cfbbb22236f4a98L,0x0b0c59e9066800bbL,0x4ac69a8f5a9a7774L, + 0x2b33f804d6bec948L }, + { 0xb372929532e6c466L,0x68956d0f4e599c73L,0xa47a249f155c31ccL, + 0x24d80f0de1ce284eL } }, + /* 25 << 112 */ + { { 0xcd821dfb988baf01L,0xe6331a7ddbb16647L,0x1eb8ad33094cb960L, + 0x593cca38c91bbca5L }, + { 0x384aac8d26567456L,0x40fa0309c04b6490L,0x97834cd6dab6c8f6L, + 0x68a7318d3f91e55fL } }, + /* 26 << 112 */ + { { 0xa00fd04efc4d3157L,0xb56f8ab22bf3bdeaL,0x014f56484fa57172L, + 0x948c5860450abdb3L }, + { 0x342b5df00ebd4f08L,0x3e5168cd0e82938eL,0x7aedc1ceb0df5dd0L, + 0x6bbbc6d9e5732516L } }, + /* 27 << 112 */ + { { 0xc7bfd486605daaa6L,0x46fd72b7bb9a6c9eL,0xe4847fb1a124fb89L, + 0x75959cbda2d8ffbcL }, + { 0x42579f65c8a588eeL,0x368c92e6b80b499dL,0xea4ef6cd999a5df1L, + 0xaa73bb7f936fe604L } }, + /* 28 << 112 */ + { { 0xf347a70d6457d188L,0x86eda86b8b7a388bL,0xb7cdff060ccd6013L, + 0xbeb1b6c7d0053fb2L }, + { 0x0b02238799240a9fL,0x1bbb384f776189b2L,0x8695e71e9066193aL, + 0x2eb5009706ffac7eL } }, + /* 29 << 112 */ + { { 0x0654a9c04a7d2caaL,0x6f3fb3d1a5aaa290L,0x835db041ff476e8fL, + 0x540b8b0bc42295e4L }, + { 0xa5c73ac905e214f5L,0x9a74075a56a0b638L,0x2e4b1090ce9e680bL, + 0x57a5b4796b8d9afaL } }, + /* 30 << 112 */ + { { 0x0dca48e726bfe65cL,0x097e391c7290c307L,0x683c462e6669e72eL, + 0xf505be1e062559acL }, + { 0x5fbe3ea1e3a3035aL,0x6431ebf69cd50da8L,0xfd169d5c1f6407f2L, + 0x8d838a9560fce6b8L } }, + /* 31 << 112 */ + { { 0x2a2bfa7f650006f0L,0xdfd7dad350c0fbb2L,0x92452495ccf9ad96L, + 0x183bf494d95635f9L }, + { 0x02d5df434a7bd989L,0x505385cca5431095L,0xdd98e67dfd43f53eL, + 0xd61e1a6c500c34a9L } }, + /* 32 << 112 */ + { { 0x5a4b46c64a8a3d62L,0x8469c4d0247743d2L,0x2bb3a13d88f7e433L, + 0x62b23a1001be5849L }, + { 0xe83596b4a63d1a4cL,0x454e7fea7d183f3eL,0x643fce6117afb01cL, + 0x4e65e5e61c4c3638L } }, + /* 33 << 112 */ + { { 0x41d85ea1ef74c45bL,0x2cfbfa66ae328506L,0x98b078f53ada7da9L, + 0xd985fe37ec752fbbL }, + { 0xeece68fe5a0148b4L,0x6f9a55c72d78136dL,0x232dccc4d2b729ceL, + 0xa27e0dfd90aafbc4L } }, + /* 34 << 112 */ + { { 0x9647445212b4603eL,0xa876c5516b706d14L,0xdf145fcf69a9d412L, + 0xe2ab75b72d479c34L }, + { 0x12df9a761a23ff97L,0xc61389925d359d10L,0x6e51c7aefa835f22L, + 0x69a79cb1c0fcc4d9L } }, + /* 35 << 112 */ + { { 0xf57f350d594cc7e1L,0x3079ca633350ab79L,0x226fb6149aff594aL, + 0x35afec026d59a62bL }, + { 0x9bee46f406ed2c6eL,0x58da17357d939a57L,0x44c504028fd1797eL, + 0xd8853e7c5ccea6caL } }, + /* 36 << 112 */ + { { 0x4065508da35fcd5fL,0x8965df8c495ccaebL,0x0f2da85012e1a962L, + 0xee471b94c1cf1cc4L }, + { 0xcef19bc80a08fb75L,0x704958f581de3591L,0x2867f8b23aef4f88L, + 0x8d749384ea9f9a5fL } }, + /* 37 << 112 */ + { { 0x1b3855378c9049f4L,0x5be948f37b92d8b6L,0xd96f725db6e2bd6bL, + 0x37a222bc958c454dL }, + { 0xe7c61abb8809bf61L,0x46f07fbc1346f18dL,0xfb567a7ae87c0d1cL, + 0x84a461c87ef3d07aL } }, + /* 38 << 112 */ + { { 0x0a5adce6d9278d98L,0x24d948139dfc73e1L,0x4f3528b6054321c3L, + 0x2e03fdde692ea706L }, + { 0x10e6061947b533c0L,0x1a8bc73f2ca3c055L,0xae58d4b21bb62b8fL, + 0xb2045a73584a24e3L } }, + /* 39 << 112 */ + { { 0x3ab3d5afbd76e195L,0x478dd1ad6938a810L,0x6ffab3936ee3d5cbL, + 0xdfb693db22b361e4L }, + { 0xf969449651dbf1a7L,0xcab4b4ef08a2e762L,0xe8c92f25d39bba9aL, + 0x850e61bcf1464d96L } }, + /* 40 << 112 */ + { { 0xb7e830e3dc09508bL,0xfaf6d2cf74317655L,0x72606cebdf690355L, + 0x48bb92b3d0c3ded6L }, + { 0x65b754845c7cf892L,0xf6cd7ac9d5d5f01fL,0xc2c30a5996401d69L, + 0x91268650ed921878L } }, + /* 41 << 112 */ + { { 0x380bf913b78c558fL,0x43c0baebc8afdaa9L,0x377f61d554f169d3L, + 0xf8da07e3ae5ff20bL }, + { 0xb676c49da8a90ea8L,0x81c1ff2b83a29b21L,0x383297ac2ad8d276L, + 0x3001122fba89f982L } }, + /* 42 << 112 */ + { { 0xe1d794be6718e448L,0x246c14827c3e6e13L,0x56646ef85d26b5efL, + 0x80f5091e88069cddL }, + { 0xc5992e2f724bdd38L,0x02e915b48471e8c7L,0x96ff320a0d0ff2a9L, + 0xbf8864874384d1a0L } }, + /* 43 << 112 */ + { { 0xbbe1e6a6c93f72d6L,0xd5f75d12cad800eaL,0xfa40a09fe7acf117L, + 0x32c8cdd57581a355L }, + { 0x742219927023c499L,0xa8afe5d738ec3901L,0x5691afcba90e83f0L, + 0x41bcaa030b8f8eacL } }, + /* 44 << 112 */ + { { 0xe38b5ff98d2668d5L,0x0715281a7ad81965L,0x1bc8fc7c03c6ce11L, + 0xcbbee6e28b650436L }, + { 0x06b00fe80cdb9808L,0x17d6e066fe3ed315L,0x2e9d38c64d0b5018L, + 0xab8bfd56844dcaefL } }, + /* 45 << 112 */ + { { 0x42894a59513aed8bL,0xf77f3b6d314bd07aL,0xbbdecb8f8e42b582L, + 0xf10e2fa8d2390fe6L }, + { 0xefb9502262a2f201L,0x4d59ea5050ee32b0L,0xd87f77286da789a8L, + 0xcf98a2cff79492c4L } }, + /* 46 << 112 */ + { { 0xf9577239720943c2L,0xba044cf53990b9d0L,0x5aa8e82395f2884aL, + 0x834de6ed0278a0afL }, + { 0xc8e1ee9a5f25bd12L,0x9259ceaa6f7ab271L,0x7e6d97a277d00b76L, + 0x5c0c6eeaa437832aL } }, + /* 47 << 112 */ + { { 0x5232c20f5606b81dL,0xabd7b3750d991ee5L,0x4d2bfe358632d951L, + 0x78f8514698ed9364L }, + { 0x951873f0f30c3282L,0x0da8ac80a789230bL,0x3ac7789c5398967fL, + 0xa69b8f7fbdda0fb5L } }, + /* 48 << 112 */ + { { 0xe5db77176add8545L,0x1b71cb6672c49b66L,0xd856073968421d77L, + 0x03840fe883e3afeaL }, + { 0xb391dad51ec69977L,0xae243fb9307f6726L,0xc88ac87be8ca160cL, + 0x5174cced4ce355f4L } }, + /* 49 << 112 */ + { { 0x98a35966e58ba37dL,0xfdcc8da27817335dL,0x5b75283083fbc7bfL, + 0x68e419d4d9c96984L }, + { 0x409a39f402a40380L,0x88940faf1fe977bcL,0xc640a94b8f8edea6L, + 0x1e22cd17ed11547dL } }, + /* 50 << 112 */ + { { 0xe28568ce59ffc3e2L,0x60aa1b55c1dee4e7L,0xc67497c8837cb363L, + 0x06fb438a105a2bf2L }, + { 0x30357ec4500d8e20L,0x1ad9095d0670db10L,0x7f589a05c73b7cfdL, + 0xf544607d880d6d28L } }, + /* 51 << 112 */ + { { 0x17ba93b1a20ef103L,0xad8591306ba6577bL,0x65c91cf66fa214a0L, + 0xd7d49c6c27990da5L }, + { 0xecd9ec8d20bb569dL,0xbd4b2502eeffbc33L,0x2056ca5a6bed0467L, + 0x7916a1f75b63728cL } }, + /* 52 << 112 */ + { { 0xd4f9497d53a4f566L,0x8973466497b56810L,0xf8e1da740494a621L, + 0x82546a938d011c68L }, + { 0x1f3acb19c61ac162L,0x52f8fa9cabad0d3eL,0x15356523b4b7ea43L, + 0x5a16ad61ae608125L } }, + /* 53 << 112 */ + { { 0xb0bcb87f4faed184L,0x5f236b1d5029f45fL,0xd42c76070bc6b1fcL, + 0xc644324e68aefce3L }, + { 0x8e191d595c5d8446L,0xc020807713ae1979L,0xadcaee553ba59cc7L, + 0x20ed6d6ba2cb81baL } }, + /* 54 << 112 */ + { { 0x0952ba19b6efcffcL,0x60f12d6897c0b87cL,0x4ee2c7c49caa30bcL, + 0x767238b797fbff4eL }, + { 0xebc73921501b5d92L,0x3279e3dfc2a37737L,0x9fc12bc86d197543L, + 0xfa94dc6f0a40db4eL } }, + /* 55 << 112 */ + { { 0x7392b41a530ccbbdL,0x87c82146ea823525L,0xa52f984c05d98d0cL, + 0x2ae57d735ef6974cL }, + { 0x9377f7bf3042a6ddL,0xb1a007c019647a64L,0xfaa9079a0cca9767L, + 0x3d81a25bf68f72d5L } }, + /* 56 << 112 */ + { { 0x752067f8ff81578eL,0x786221509045447dL,0xc0c22fcf0505aa6fL, + 0x1030f0a66bed1c77L }, + { 0x31f29f151f0bd739L,0x2d7989c7e6debe85L,0x5c070e728e677e98L, + 0x0a817bd306e81fd5L } }, + /* 57 << 112 */ + { { 0xc110d830b0f2ac95L,0x48d0995aab20e64eL,0x0f3e00e17729cd9aL, + 0x2a570c20dd556946L }, + { 0x912dbcfd4e86214dL,0x2d014ee2cf615498L,0x55e2b1e63530d76eL, + 0xc5135ae4fd0fd6d1L } }, + /* 58 << 112 */ + { { 0x0066273ad4f3049fL,0xbb8e9893e7087477L,0x2dba1ddb14c6e5fdL, + 0xdba3788651f57e6cL }, + { 0x5aaee0a65a72f2cfL,0x1208bfbf7bea5642L,0xf5c6aa3b67872c37L, + 0xd726e08343f93224L } }, + /* 59 << 112 */ + { { 0x1854daa5061f1658L,0xc0016df1df0cd2b3L,0xc2a3f23e833d50deL, + 0x73b681d2bbbd3017L }, + { 0x2f046dc43ac343c0L,0x9c847e7d85716421L,0xe1e13c910917eed4L, + 0x3fc9eebd63a1b9c6L } }, + /* 60 << 112 */ + { { 0x0f816a727fe02299L,0x6335ccc2294f3319L,0x3820179f4745c5beL, + 0xe647b782922f066eL }, + { 0xc22e49de02cafb8aL,0x299bc2fffcc2ecccL,0x9a8feea26e0e8282L, + 0xa627278bfe893205L } }, + /* 61 << 112 */ + { { 0xa7e197337933e47bL,0xf4ff6b132e766402L,0xa4d8be0a98440d9fL, + 0x658f5c2f38938808L }, + { 0x90b75677c95b3b3eL,0xfa0442693137b6ffL,0x077b039b43c47c29L, + 0xcca95dd38a6445b2L } }, + /* 62 << 112 */ + { { 0x0b498ba42333fc4cL,0x274f8e68f736a1b1L,0x6ca348fd5f1d4b2eL, + 0x24d3be78a8f10199L }, + { 0x8535f858ca14f530L,0xa6e7f1635b982e51L,0x847c851236e1bf62L, + 0xf6a7c58e03448418L } }, + /* 63 << 112 */ + { { 0x583f3703f9374ab6L,0x864f91956e564145L,0x33bc3f4822526d50L, + 0x9f323c801262a496L }, + { 0xaa97a7ae3f046a9aL,0x70da183edf8a039aL,0x5b68f71c52aa0ba6L, + 0x9be0fe5121459c2dL } }, + /* 64 << 112 */ + { { 0xc1e17eb6cbc613e5L,0x33131d55497ea61cL,0x2f69d39eaf7eded5L, + 0x73c2f434de6af11bL }, + { 0x4ca52493a4a375faL,0x5f06787cb833c5c2L,0x814e091f3e6e71cfL, + 0x76451f578b746666L } }, + /* 0 << 119 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 119 */ + { { 0x80f9bdef694db7e0L,0xedca8787b9fcddc6L,0x51981c3403b8dce1L, + 0x4274dcf170e10ba1L }, + { 0xf72743b86def6d1aL,0xd25b1670ebdb1866L,0xc4491e8c050c6f58L, + 0x2be2b2ab87fbd7f5L } }, + /* 2 << 119 */ + { { 0x3e0e5c9dd111f8ecL,0xbcc33f8db7c4e760L,0x702f9a91bd392a51L, + 0x7da4a795c132e92dL }, + { 0x1a0b0ae30bb1151bL,0x54febac802e32251L,0xea3a5082694e9e78L, + 0xe58ffec1e4fe40b8L } }, + /* 3 << 119 */ + { { 0xf85592fcd1e0cf9eL,0xdea75f0dc0e7b2e8L,0xc04215cfc135584eL, + 0x174fc7272f57092aL }, + { 0xe7277877eb930beaL,0x504caccb5eb02a5aL,0xf9fe08f7f5241b9bL, + 0xe7fb62f48d5ca954L } }, + /* 4 << 119 */ + { { 0xfbb8349d29c4120bL,0x9f94391fc0d0d915L,0xc4074fa75410ba51L, + 0xa66adbf6150a5911L }, + { 0xc164543c34bfca38L,0xe0f27560b9e1ccfcL,0x99da0f53e820219cL, + 0xe8234498c6b4997aL } }, + /* 5 << 119 */ + { { 0xcfb88b769d4c5423L,0x9e56eb10b0521c49L,0x418e0b5ebe8700a1L, + 0x00cbaad6f93cb58aL }, + { 0xe923fbded92a5e67L,0xca4979ac1f347f11L,0x89162d856bc0585bL, + 0xdd6254afac3c70e3L } }, + /* 6 << 119 */ + { { 0x7b23c513516e19e4L,0x56e2e847c5c4d593L,0x9f727d735ce71ef6L, + 0x5b6304a6f79a44c5L }, + { 0x6638a7363ab7e433L,0x1adea470fe742f83L,0xe054b8545b7fc19fL, + 0xf935381aba1d0698L } }, + /* 7 << 119 */ + { { 0x546eab2d799e9a74L,0x96239e0ea949f729L,0xca274c6b7090055aL, + 0x835142c39020c9b0L }, + { 0xa405667aa2e8807fL,0x29f2c0851aa3d39eL,0xcc555d6442fc72f5L, + 0xe856e0e7fbeacb3cL } }, + /* 8 << 119 */ + { { 0xb5504f9d918e4936L,0x65035ef6b2513982L,0x0553a0c26f4d9cb9L, + 0x6cb10d56bea85509L }, + { 0x48d957b7a242da11L,0x16a4d3dd672b7268L,0x3d7e637c8502a96bL, + 0x27c7032b730d463bL } }, + /* 9 << 119 */ + { { 0xbdc02b18e4136a14L,0xbacf969d678e32bfL,0xc98d89a3dd9c3c03L, + 0x7b92420a23becc4fL }, + { 0xd4b41f78c64d565cL,0x9f969d0010f28295L,0xec7f7f76b13d051aL, + 0x08945e1ea92da585L } }, + /* 10 << 119 */ + { { 0x55366b7d5846426fL,0xe7d09e89247d441dL,0x510b404d736fbf48L, + 0x7fa003d0e784bd7dL }, + { 0x25f7614f17fd9596L,0x49e0e0a135cb98dbL,0x2c65957b2e83a76aL, + 0x5d40da8dcddbe0f8L } }, + /* 11 << 119 */ + { { 0xf2b8c405050bad24L,0x8918426dc2aa4823L,0x2aeab3dda38365a7L, + 0x720317177c91b690L }, + { 0x8b00d69960a94120L,0x478a255de99eaeecL,0xbf656a5f6f60aafdL, + 0xdfd7cb755dee77b3L } }, + /* 12 << 119 */ + { { 0x37f68bb4a595939dL,0x0355647928740217L,0x8e740e7c84ad7612L, + 0xd89bc8439044695fL }, + { 0xf7f3da5d85a9184dL,0x562563bb9fc0b074L,0x06d2e6aaf88a888eL, + 0x612d8643161fbe7cL } }, + /* 13 << 119 */ + { { 0x465edba7f64085e7L,0xb230f30429aa8511L,0x53388426cda2d188L, + 0x908857354b666649L }, + { 0x6f02ff9a652f54f6L,0x65c822945fae2bf0L,0x7816ade062f5eee3L, + 0xdcdbdf43fcc56d70L } }, + /* 14 << 119 */ + { { 0x9fb3bba354530bb2L,0xbde3ef77cb0869eaL,0x89bc90460b431163L, + 0x4d03d7d2e4819a35L }, + { 0x33ae4f9e43b6a782L,0x216db3079c88a686L,0x91dd88e000ffedd9L, + 0xb280da9f12bd4840L } }, + /* 15 << 119 */ + { { 0x32a7cb8a1635e741L,0xfe14008a78be02a7L,0x3fafb3341b7ae030L, + 0x7fd508e75add0ce9L }, + { 0x72c83219d607ad51L,0x0f229c0a8d40964aL,0x1be2c3361c878da2L, + 0xe0c96742eab2ab86L } }, + /* 16 << 119 */ + { { 0x458f86913e538cd7L,0xa7001f6c8e08ad53L,0x52b8c6e6bf5d15ffL, + 0x548234a4011215ddL }, + { 0xff5a9d2d3d5b4045L,0xb0ffeeb64a904190L,0x55a3aca448607f8bL, + 0x8cbd665c30a0672aL } }, + /* 17 << 119 */ + { { 0x87f834e042583068L,0x02da2aebf3f6e683L,0x6b763e5d05c12248L, + 0x7230378f65a8aefcL }, + { 0x93bd80b571e8e5caL,0x53ab041cb3b62524L,0x1b8605136c9c552eL, + 0xe84d402cd5524e66L } }, + /* 18 << 119 */ + { { 0xa37f3573f37f5937L,0xeb0f6c7dd1e4fca5L,0x2965a554ac8ab0fcL, + 0x17fbf56c274676acL }, + { 0x2e2f6bd9acf7d720L,0x41fc8f8810224766L,0x517a14b385d53befL, + 0xdae327a57d76a7d1L } }, + /* 19 << 119 */ + { { 0x6ad0a065c4818267L,0x33aa189b37c1bbc1L,0x64970b5227392a92L, + 0x21699a1c2d1535eaL }, + { 0xcd20779cc2d7a7fdL,0xe318605999c83cf2L,0x9b69440b72c0b8c7L, + 0xa81497d77b9e0e4dL } }, + /* 20 << 119 */ + { { 0x515d5c891f5f82dcL,0x9a7f67d76361079eL,0xa8da81e311a35330L, + 0xe44990c44b18be1bL }, + { 0xc7d5ed95af103e59L,0xece8aba78dac9261L,0xbe82b0999394b8d3L, + 0x6830f09a16adfe83L } }, + /* 21 << 119 */ + { { 0x250a29b488172d01L,0x8b20bd65caff9e02L,0xb8a7661ee8a6329aL, + 0x4520304dd3fce920L }, + { 0xae45da1f2b47f7efL,0xe07f52885bffc540L,0xf79970093464f874L, + 0x2244c2cda6fa1f38L } }, + /* 22 << 119 */ + { { 0x43c41ac194d7d9b1L,0x5bafdd82c82e7f17L,0xdf0614c15fda0fcaL, + 0x74b043a7a8ae37adL }, + { 0x3ba6afa19e71734cL,0x15d5437e9c450f2eL,0x4a5883fe67e242b1L, + 0x5143bdc22c1953c2L } }, + /* 23 << 119 */ + { { 0x542b8b53fc5e8920L,0x363bf9a89a9cee08L,0x02375f10c3486e08L, + 0x2037543b8c5e70d2L }, + { 0x7109bccc625640b4L,0xcbc1051e8bc62c3bL,0xf8455fed803f26eaL, + 0x6badceabeb372424L } }, + /* 24 << 119 */ + { { 0xa2a9ce7c6b53f5f9L,0x642465951b176d99L,0xb1298d36b95c081bL, + 0x53505bb81d9a9ee6L }, + { 0x3f6f9e61f2ba70b0L,0xd07e16c98afad453L,0x9f1694bbe7eb4a6aL, + 0xdfebced93cb0bc8eL } }, + /* 25 << 119 */ + { { 0x92d3dcdc53868c8bL,0x174311a2386107a6L,0x4109e07c689b4e64L, + 0x30e4587f2df3dcb6L }, + { 0x841aea310811b3b2L,0x6144d41d0cce43eaL,0x464c45812a9a7803L, + 0xd03d371f3e158930L } }, + /* 26 << 119 */ + { { 0xc676d7f2b1f3390bL,0x9f7a1b8ca5b61272L,0x4ebebfc9c2e127a9L, + 0x4602500c5dd997bfL }, + { 0x7f09771c4711230fL,0x058eb37c020f09c1L,0xab693d4bfee5e38bL, + 0x9289eb1f4653cbc0L } }, + /* 27 << 119 */ + { { 0xbecf46abd51b9cf5L,0xd2aa9c029f0121afL,0x36aaf7d2e90dc274L, + 0x909e4ea048b95a3cL }, + { 0xe6b704966f32dbdbL,0x672188a08b030b3eL,0xeeffe5b3cfb617e2L, + 0x87e947de7c82709eL } }, + /* 28 << 119 */ + { { 0xa44d2b391770f5a7L,0xe4d4d7910e44eb82L,0x42e69d1e3f69712aL, + 0xbf11c4d6ac6a820eL }, + { 0xb5e7f3e542c4224cL,0xd6b4e81c449d941cL,0x5d72bd165450e878L, + 0x6a61e28aee25ac54L } }, + /* 29 << 119 */ + { { 0x33272094e6f1cd95L,0x7512f30d0d18673fL,0x32f7a4ca5afc1464L, + 0x2f0956566bbb977bL }, + { 0x586f47caa8226200L,0x02c868ad1ac07369L,0x4ef2b845c613acbeL, + 0x43d7563e0386054cL } }, + /* 30 << 119 */ + { { 0x54da9dc7ab952578L,0xb5423df226e84d0bL,0xa8b64eeb9b872042L, + 0xac2057825990f6dfL }, + { 0x4ff696eb21f4c77aL,0x1a79c3e4aab273afL,0x29bc922e9436b3f1L, + 0xff807ef8d6d9a27aL } }, + /* 31 << 119 */ + { { 0x82acea3d778f22a0L,0xfb10b2e85b5e7469L,0xc0b169802818ee7dL, + 0x011afff4c91c1a2fL }, + { 0x95a6d126ad124418L,0x31c081a5e72e295fL,0x36bb283af2f4db75L, + 0xd115540f7acef462L } }, + /* 32 << 119 */ + { { 0xc7f3a8f833f6746cL,0x21e46f65fea990caL,0x915fd5c5caddb0a9L, + 0xbd41f01678614555L }, + { 0x346f4434426ffb58L,0x8055943614dbc204L,0xf3dd20fe5a969b7fL, + 0x9d59e956e899a39aL } }, + /* 33 << 119 */ + { { 0xf1b0971c8ad4cf4bL,0x034488602ffb8fb8L,0xf071ac3c65340ba4L, + 0x408d0596b27fd758L }, + { 0xe7c78ea498c364b0L,0xa4aac4a5051e8ab5L,0xb9e1d560485d9002L, + 0x9acd518a88844455L } }, + /* 34 << 119 */ + { { 0xe4ca688fd06f56c0L,0xa48af70ddf027972L,0x691f0f045e9a609dL, + 0xa9dd82cdee61270eL }, + { 0x8903ca63a0ef18d3L,0x9fb7ee353d6ca3bdL,0xa7b4a09cabf47d03L, + 0x4cdada011c67de8eL } }, + /* 35 << 119 */ + { { 0x520037499355a244L,0xe77fd2b64f2151a9L,0x695d6cf666b4efcbL, + 0xc5a0cacfda2cfe25L }, + { 0x104efe5cef811865L,0xf52813e89ea5cc3dL,0x855683dc40b58dbcL, + 0x0338ecde175fcb11L } }, + /* 36 << 119 */ + { { 0xf9a0563774921592L,0xb4f1261db9bb9d31L,0x551429b74e9c5459L, + 0xbe182e6f6ea71f53L }, + { 0xd3a3b07cdfc50573L,0x9ba1afda62be8d44L,0x9bcfd2cb52ab65d3L, + 0xdf11d547a9571802L } }, + /* 37 << 119 */ + { { 0x099403ee02a2404aL,0x497406f421088a71L,0x994794095004ae71L, + 0xbdb42078a812c362L }, + { 0x2b72a30fd8828442L,0x283add27fcb5ed1cL,0xf7c0e20066a40015L, + 0x3e3be64108b295efL } }, + /* 38 << 119 */ + { { 0xac127dc1e038a675L,0x729deff38c5c6320L,0xb7df8fd4a90d2c53L, + 0x9b74b0ec681e7cd3L }, + { 0x5cb5a623dab407e5L,0xcdbd361576b340c6L,0xa184415a7d28392cL, + 0xc184c1d8e96f7830L } }, + /* 39 << 119 */ + { { 0xc3204f1981d3a80fL,0xfde0c841c8e02432L,0x78203b3e8149e0c1L, + 0x5904bdbb08053a73L }, + { 0x30fc1dd1101b6805L,0x43c223bc49aa6d49L,0x9ed671417a174087L, + 0x311469a0d5997008L } }, + /* 40 << 119 */ + { { 0xb189b6845e43fc61L,0xf3282375e0d3ab57L,0x4fa34b67b1181da8L, + 0x621ed0b299ee52b8L }, + { 0x9b178de1ad990676L,0xd51de67b56d54065L,0x2a2c27c47538c201L, + 0x33856ec838a40f5cL } }, + /* 41 << 119 */ + { { 0x2522fc15be6cdcdeL,0x1e603f339f0c6f89L,0x7994edc3103e30a6L, + 0x033a00db220c853eL }, + { 0xd3cfa409f7bb7fd7L,0x70f8781e462d18f6L,0xbbd82980687fe295L, + 0x6eef4c32595669f3L } }, + /* 42 << 119 */ + { { 0x86a9303b2f7e85c3L,0x5fce462171988f9bL,0x5b935bf6c138acb5L, + 0x30ea7d6725661212L }, + { 0xef1eb5f4e51ab9a2L,0x0587c98aae067c78L,0xb3ce1b3c77ca9ca6L, + 0x2a553d4d54b5f057L } }, + /* 43 << 119 */ + { { 0xc78982364da29ec2L,0xdbdd5d13b9c57316L,0xc57d6e6b2cd80d47L, + 0x80b460cffe9e7391L }, + { 0x98648cabf963c31eL,0x67f9f633cc4d32fdL,0x0af42a9dfdf7c687L, + 0x55f292a30b015ea7L } }, + /* 44 << 119 */ + { { 0x89e468b2cd21ab3dL,0xe504f022c393d392L,0xab21e1d4a5013af9L, + 0xe3283f78c2c28acbL }, + { 0xf38b35f6226bf99fL,0xe83542740e291e69L,0x61673a15b20c162dL, + 0xc101dc75b04fbdbeL } }, + /* 45 << 119 */ + { { 0x8323b4c2255bd617L,0x6c9696936c2a9154L,0xc6e6586062679387L, + 0x8e01db0cb8c88e23L }, + { 0x33c42873893a5559L,0x7630f04b47a3e149L,0xb5d80805ddcf35f8L, + 0x582ca08077dfe732L } }, + /* 46 << 119 */ + { { 0x2c7156e10b1894a0L,0x92034001d81c68c0L,0xed225d00c8b115b5L, + 0x237f9c2283b907f2L }, + { 0x0ea2f32f4470e2c0L,0xb725f7c158be4e95L,0x0f1dcafab1ae5463L, + 0x59ed51871ba2fc04L } }, + /* 47 << 119 */ + { { 0xf6e0f316d0115d4dL,0x5180b12fd3691599L,0x157e32c9527f0a41L, + 0x7b0b081da8e0ecc0L }, + { 0x6dbaaa8abf4f0dd0L,0x99b289c74d252696L,0x79b7755edbf864feL, + 0x6974e2b176cad3abL } }, + /* 48 << 119 */ + { { 0x35dbbee206ddd657L,0xe7cbdd112ff3a96dL,0x88381968076be758L, + 0x2d737e7208c91f5dL }, + { 0x5f83ab6286ec3776L,0x98aa649d945fa7a1L,0xf477ec3772ef0933L, + 0x66f52b1e098c17b1L } }, + /* 49 << 119 */ + { { 0x9eec58fbd803738bL,0x91aaade7e4e86aa4L,0x6b1ae617a5b51492L, + 0x63272121bbc45974L }, + { 0x7e0e28f0862c5129L,0x0a8f79a93321a4a0L,0xe26d16645041c88fL, + 0x0571b80553233e3aL } }, + /* 50 << 119 */ + { { 0xd1b0ccdec9520711L,0x55a9e4ed3c8b84bfL,0x9426bd39a1fef314L, + 0x4f5f638e6eb93f2bL }, + { 0xba2a1ed32bf9341bL,0xd63c13214d42d5a9L,0xd2964a89316dc7c5L, + 0xd1759606ca511851L } }, + /* 51 << 119 */ + { { 0xd8a9201ff9e6ed35L,0xb7b5ee456736925aL,0x0a83fbbc99581af7L, + 0x3076bc4064eeb051L }, + { 0x5511c98c02dec312L,0x270de898238dcb78L,0x2cf4cf9c539c08c9L, + 0xa70cb65e38d3b06eL } }, + /* 52 << 119 */ + { { 0xb12ec10ecfe57bbdL,0x82c7b65635a0c2b5L,0xddc7d5cd161c67bdL, + 0xe32e8985ae3a32ccL }, + { 0x7aba9444d11a5529L,0xe964ed022427fa1aL,0x1528392d24a1770aL, + 0xa152ce2c12c72fcdL } }, + /* 53 << 119 */ + { { 0x714553a48ec07649L,0x18b4c290459dd453L,0xea32b7147b64b110L, + 0xb871bfa52e6f07a2L }, + { 0xb67112e59e2e3c9bL,0xfbf250e544aa90f6L,0xf77aedb8bd539006L, + 0x3b0cdf9ad172a66fL } }, + /* 54 << 119 */ + { { 0xedf69feaf8c51187L,0x05bb67ec741e4da7L,0x47df0f3208114345L, + 0x56facb07bb9792b1L }, + { 0xf3e007e98f6229e4L,0x62d103f4526fba0fL,0x4f33bef7b0339d79L, + 0x9841357bb59bfec1L } }, + /* 55 << 119 */ + { { 0xfa8dbb59c34e6705L,0xc3c7180b7fdaa84cL,0xf95872fca4108537L, + 0x8750cc3b932a3e5aL }, + { 0xb61cc69db7275d7dL,0xffa0168b2e59b2e9L,0xca032abc6ecbb493L, + 0x1d86dbd32c9082d8L } }, + /* 56 << 119 */ + { { 0xae1e0b67e28ef5baL,0x2c9a4699cb18e169L,0x0ecd0e331e6bbd20L, + 0x571b360eaf5e81d2L }, + { 0xcd9fea58101c1d45L,0x6651788e18880452L,0xa99726351f8dd446L, + 0x44bed022e37281d0L } }, + /* 57 << 119 */ + { { 0x094b2b2d33da525dL,0xf193678e13144fd8L,0xb8ab5ba4f4c1061dL, + 0x4343b5fadccbe0f4L }, + { 0xa870237163812713L,0x47bf6d2df7611d93L,0x46729b8cbd21e1d7L, + 0x7484d4e0d629e77dL } }, + /* 58 << 119 */ + { { 0x830e6eea60dbac1fL,0x23d8c484da06a2f7L,0x896714b050ca535bL, + 0xdc8d3644ebd97a9bL }, + { 0x106ef9fab12177b4L,0xf79bf464534d5d9cL,0x2537a349a6ab360bL, + 0xc7c54253a00c744fL } }, + /* 59 << 119 */ + { { 0xb3c7a047e5911a76L,0x61ffa5c8647f1ee7L,0x15aed36f8f56ab42L, + 0x6a0d41b0a3ff9ac9L }, + { 0x68f469f5cc30d357L,0xbe9adf816b72be96L,0x1cd926fe903ad461L, + 0x7e89e38fcaca441bL } }, + /* 60 << 119 */ + { { 0xf0f82de5facf69d4L,0x363b7e764775344cL,0x6894f312b2e36d04L, + 0x3c6cb4fe11d1c9a5L }, + { 0x85d9c3394008e1f2L,0x5e9a85ea249f326cL,0xdc35c60a678c5e06L, + 0xc08b944f9f86fba9L } }, + /* 61 << 119 */ + { { 0xde40c02c89f71f0fL,0xad8f3e31ff3da3c0L,0x3ea5096b42125dedL, + 0x13879cbfa7379183L }, + { 0x6f4714a56b306a0bL,0x359c2ea667646c5eL,0xfacf894307726368L, + 0x07a5893565ff431eL } }, + /* 62 << 119 */ + { { 0x24d661d168754ab0L,0x801fce1d6f429a76L,0xc068a85fa58ce769L, + 0xedc35c545d5eca2bL }, + { 0xea31276fa3f660d1L,0xa0184ebeb8fc7167L,0x0f20f21a1d8db0aeL, + 0xd96d095f56c35e12L } }, + /* 63 << 119 */ + { { 0xedf402b5f8c2a25bL,0x1bb772b9059204b6L,0x50cbeae219b4e34cL, + 0x93109d803fa0845aL }, + { 0x54f7ccf78ef59fb5L,0x3b438fe288070963L,0x9e28c65931f3ba9bL, + 0x9cc31b46ead9da92L } }, + /* 64 << 119 */ + { { 0x3c2f0ba9b733aa5fL,0xdece47cbf05af235L,0xf8e3f715a2ac82a5L, + 0xc97ba6412203f18aL }, + { 0xc3af550409c11060L,0x56ea2c0546af512dL,0xfac28daff3f28146L, + 0x87fab43a959ef494L } }, + /* 0 << 126 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 126 */ + { { 0x09891641d4c5105fL,0x1ae80f8e6d7fbd65L,0x9d67225fbee6bdb0L, + 0x3b433b597fc4d860L }, + { 0x44e66db693e85638L,0xf7b59252e3e9862fL,0xdb785157665c32ecL, + 0x702fefd7ae362f50L } }, + /* 2 << 126 */ + { { 0x3754475d0fefb0c3L,0xd48fb56b46d7c35dL,0xa070b633363798a4L, + 0xae89f3d28fdb98e6L }, + { 0x970b89c86363d14cL,0x8981752167abd27dL,0x9bf7d47444d5a021L, + 0xb3083bafcac72aeeL } }, + /* 3 << 126 */ + { { 0x389741debe949a44L,0x638e9388546a4fa5L,0x3fe6419ca0047bdcL, + 0x7047f648aaea57caL }, + { 0x54e48a9041fbab17L,0xda8e0b28576bdba2L,0xe807eebcc72afddcL, + 0x07d3336df42577bfL } }, + /* 4 << 126 */ + { { 0x62a8c244bfe20925L,0x91c19ac38fdce867L,0x5a96a5d5dd387063L, + 0x61d587d421d324f6L }, + { 0xe87673a2a37173eaL,0x2384800853778b65L,0x10f8441e05bab43eL, + 0xfa11fe124621efbeL } }, + /* 5 << 126 */ + { { 0x047b772e81685d7bL,0x23f27d81bf34a976L,0xc27608e2915f48efL, + 0x3b0b43faa521d5c3L }, + { 0x7613fb2663ca7284L,0x7f5729b41d4db837L,0x87b14898583b526bL, + 0x00b732a6bbadd3d1L } }, + /* 6 << 126 */ + { { 0x8e02f4262048e396L,0x436b50b6383d9de4L,0xf78d3481471e85adL, + 0x8b01ea6ad005c8d6L }, + { 0xd3c7afee97015c07L,0x46cdf1a94e3ba2aeL,0x7a42e50183d3a1d2L, + 0xd54b5268b541dff4L } }, + /* 7 << 126 */ + { { 0x3f24cf304e23e9bcL,0x4387f816126e3624L,0x26a46a033b0b6d61L, + 0xaf1bc8458b2d777cL }, + { 0x25c401ba527de79cL,0x0e1346d44261bbb6L,0x4b96c44b287b4bc7L, + 0x658493c75254562fL } }, + /* 8 << 126 */ + { { 0x23f949feb8a24a20L,0x17ebfed1f52ca53fL,0x9b691bbebcfb4853L, + 0x5617ff6b6278a05dL }, + { 0x241b34c5e3c99ebdL,0xfc64242e1784156aL,0x4206482f695d67dfL, + 0xb967ce0eee27c011L } }, + /* 9 << 126 */ + { { 0x65db375121c80b5dL,0x2e7a563ca31ecca0L,0xe56ffc4e5238a07eL, + 0x3d6c296632ced854L }, + { 0xe99d7d1aaf70b885L,0xafc3bad92d686459L,0x9c78bf460cc8ba5bL, + 0x5a43951918955aa3L } }, + /* 10 << 126 */ + { { 0xf8b517a85fe4e314L,0xe60234d0fcb8906fL,0xffe542acf2061b23L, + 0x287e191f6b4cb59cL }, + { 0x21857ddc09d877d8L,0x1c23478c14678941L,0xbbf0c056b6e05ea4L, + 0x82da4b53b01594feL } }, + /* 11 << 126 */ + { { 0xf7526791fadb8608L,0x049e832d7b74cdf6L,0xa43581ccc2b90a34L, + 0x73639eb89360b10cL }, + { 0x4fba331fe1e4a71bL,0x6ffd6b938072f919L,0x6e53271c65679032L, + 0x67206444f14272ceL } }, + /* 12 << 126 */ + { { 0xc0f734a3b2335834L,0x9526205a90ef6860L,0xcb8be71704e2bb0dL, + 0x2418871e02f383faL }, + { 0xd71776814082c157L,0xcc914ad029c20073L,0xf186c1ebe587e728L, + 0x6fdb3c2261bcd5fdL } }, + /* 13 << 126 */ + { { 0x30d014a6f2f9f8e9L,0x963ece234fec49d2L,0x862025c59605a8d9L, + 0x3987444519f8929aL }, + { 0x01b6ff6512bf476aL,0x598a64d809cf7d91L,0xd7ec774993be56caL, + 0x10899785cbb33615L } }, + /* 14 << 126 */ + { { 0xb8a092fd02eee3adL,0xa86b3d3530145270L,0x323d98c68512b675L, + 0x4b8bc78562ebb40fL }, + { 0x7d301f54413f9cdeL,0xa5e4fb4f2bab5664L,0x1d2b252d1cbfec23L, + 0xfcd576bbe177120dL } }, + /* 15 << 126 */ + { { 0x04427d3e83731a34L,0x2bb9028eed836e8eL,0xb36acff8b612ca7cL, + 0xb88fe5efd3d9c73aL }, + { 0xbe2a6bc6edea4eb3L,0x43b93133488eec77L,0xf41ff566b17106e1L, + 0x469e9172654efa32L } }, + /* 16 << 126 */ + { { 0xb4480f0441c23fa3L,0xb4712eb0c1989a2eL,0x3ccbba0f93a29ca7L, + 0x6e205c14d619428cL }, + { 0x90db7957b3641686L,0x0432691d45ac8b4eL,0x07a759acf64e0350L, + 0x0514d89c9c972517L } }, + /* 17 << 126 */ + { { 0x1701147fa8e67fc3L,0x9e2e0b8bab2085beL,0xd5651824ac284e57L, + 0x890d432574893664L }, + { 0x8a7c5e6ec55e68a3L,0xbf12e90b4339c85aL,0x31846b85f922b655L, + 0x9a54ce4d0bf4d700L } }, + /* 18 << 126 */ + { { 0xd7f4e83af1a14295L,0x916f955cb285d4f9L,0xe57bb0e099ffdabaL, + 0x28a43034eab0d152L }, + { 0x0a36ffa2b8a9cef8L,0x5517407eb9ec051aL,0x9c796096ea68e672L, + 0x853db5fbfb3c77fbL } }, + /* 19 << 126 */ + { { 0x21474ba9e864a51aL,0x6c2676996e8a1b8bL,0x7c82362694120a28L, + 0xe61e9a488383a5dbL }, + { 0x7dd750039f84216dL,0xab020d07ad43cd85L,0x9437ae48da12c659L, + 0x6449c2ebe65452adL } }, + /* 20 << 126 */ + { { 0xcc7c4c1c2cf9d7c1L,0x1320886aee95e5abL,0xbb7b9056beae170cL, + 0xc8a5b250dbc0d662L }, + { 0x4ed81432c11d2303L,0x7da669121f03769fL,0x3ac7a5fd84539828L, + 0x14dada943bccdd02L } }, + /* 21 << 126 */ + { { 0x8b84c3217ef6b0d1L,0x52a9477a7c933f22L,0x5ef6728afd440b82L, + 0x5c3bd8596ce4bd5eL }, + { 0x918b80f5f22c2d3eL,0x368d5040b7bb6cc5L,0xb66142a12695a11cL, + 0x60ac583aeb19ea70L } }, + /* 22 << 126 */ + { { 0x317cbb980eab2437L,0x8cc08c555e2654c8L,0xfe2d6520e6d8307fL, + 0xe9f147f357428993L }, + { 0x5f9c7d14d2fd6cf1L,0xa3ecd0642d4fcbb0L,0xad83fef08e7341f7L, + 0x643f23a03a63115cL } }, + /* 23 << 126 */ + { { 0xd38a78abe65ab743L,0xbf7c75b135edc89cL,0x3dd8752e530df568L, + 0xf85c4a76e308c682L }, + { 0x4c9955b2e68acf37L,0xa544df3dab32af85L,0x4b8ec3f5a25cf493L, + 0x4d8f27641a622febL } }, + /* 24 << 126 */ + { { 0x7bb4f7aaf0dcbc49L,0x7de551f970bbb45bL,0xcfd0f3e49f2ca2e5L, + 0xece587091f5c76efL }, + { 0x32920edd167d79aeL,0x039df8a2fa7d7ec1L,0xf46206c0bb30af91L, + 0x1ff5e2f522676b59L } }, + /* 25 << 126 */ + { { 0x11f4a0396ea51d66L,0x506c1445807d7a26L,0x60da5705755a9b24L, + 0x8fc8cc321f1a319eL }, + { 0x83642d4d9433d67dL,0x7fa5cb8f6a7dd296L,0x576591db9b7bde07L, + 0x13173d25419716fbL } }, + /* 26 << 126 */ + { { 0xea30599dd5b340ffL,0xfc6b5297b0fe76c5L,0x1c6968c8ab8f5adcL, + 0xf723c7f5901c928dL }, + { 0x4203c3219773d402L,0xdf7c6aa31b51dd47L,0x3d49e37a552be23cL, + 0x57febee80b5a6e87L } }, + /* 27 << 126 */ + { { 0xc5ecbee47bd8e739L,0x79d44994ae63bf75L,0x168bd00f38fb8923L, + 0x75d48ee4d0533130L }, + { 0x554f77aadb5cdf33L,0x3396e8963c696769L,0x2fdddbf2d3fd674eL, + 0xbbb8f6ee99d0e3e5L } }, + /* 28 << 126 */ + { { 0x51b90651cbae2f70L,0xefc4bc0593aaa8ebL,0x8ecd8689dd1df499L, + 0x1aee99a822f367a5L }, + { 0x95d485b9ae8274c5L,0x6c14d4457d30b39cL,0xbafea90bbcc1ef81L, + 0x7c5f317aa459a2edL } }, + /* 29 << 126 */ + { { 0x012110754ef44227L,0xa17bed6edc20f496L,0x0cdfe424819853cdL, + 0x13793298f71e2ce7L }, + { 0x3c1f3078dbbe307bL,0x6dd1c20e76ee9936L,0x23ee4b57423caa20L, + 0x4ac3793b8efb840eL } }, + /* 30 << 126 */ + { { 0x934438ebed1f8ca0L,0x3e5466584ebb25a2L,0xc415af0ec069896fL, + 0xc13eddb09a5aa43dL }, + { 0x7a04204fd49eb8f6L,0xd0d5bdfcd74f1670L,0x3697e28656fc0558L, + 0x1020737101cebadeL } }, + /* 31 << 126 */ + { { 0x5f87e6900647a82bL,0x908e0ed48f40054fL,0xa9f633d479853803L, + 0x8ed13c9a4a28b252L }, + { 0x3e2ef6761f460f64L,0x53930b9b36d06336L,0x347073ac8fc4979bL, + 0x84380e0e5ecd5597L } }, + /* 32 << 126 */ + { { 0xe3b22c6bc4fe3c39L,0xba4a81536c7bebdfL,0xf23ab6b725693459L, + 0x53bc377014922b11L }, + { 0x4645c8ab5afc60dbL,0xaa02235520b9f2a3L,0x52a2954cce0fc507L, + 0x8c2731bb7ce1c2e7L } }, + /* 33 << 126 */ + { { 0xf39608ab18a0339dL,0xac7a658d3735436cL,0xb22c2b07cd992b4fL, + 0x4e83daecf40dcfd4L }, + { 0x8a34c7be2f39ea3eL,0xef0c005fb0a56d2eL,0x62731f6a6edd8038L, + 0x5721d7404e3cb075L } }, + /* 34 << 126 */ + { { 0x1ea41511fbeeee1bL,0xd1ef5e73ef1d0c05L,0x42feefd173c07d35L, + 0xe530a00a8a329493L }, + { 0x5d55b7fef15ebfb0L,0x549de03cd322491aL,0xf7b5f602745b3237L, + 0x3632a3a21ab6e2b6L } }, + /* 35 << 126 */ + { { 0x0d3bba890ef59f78L,0x0dfc6443c9e52b9aL,0x1dc7969972631447L, + 0xef033917b3be20b1L }, + { 0x0c92735db1383948L,0xc1fc29a2c0dd7d7dL,0x6485b697403ed068L, + 0x13bfaab3aac93bdcL } }, + /* 36 << 126 */ + { { 0x410dc6a90deeaf52L,0xb003fb024c641c15L,0x1384978c5bc504c4L, + 0x37640487864a6a77L }, + { 0x05991bc6222a77daL,0x62260a575e47eb11L,0xc7af6613f21b432cL, + 0x22f3acc9ab4953e9L } }, + /* 37 << 126 */ + { { 0x529349228e41d155L,0x4d0245683ac059efL,0xb02017554d884411L, + 0xce8055cfa59a178fL }, + { 0xcd77d1aff6204549L,0xa0a00a3ec7066759L,0x471071ef0272c229L, + 0x009bcf6bd3c4b6b0L } }, + /* 38 << 126 */ + { { 0x2a2638a822305177L,0xd51d59df41645bbfL,0xa81142fdc0a7a3c0L, + 0xa17eca6d4c7063eeL }, + { 0x0bb887ed60d9dcecL,0xd6d28e5120ad2455L,0xebed6308a67102baL, + 0x042c31148bffa408L } }, + /* 39 << 126 */ + { { 0xfd099ac58aa68e30L,0x7a6a3d7c1483513eL,0xffcc6b75ba2d8f0cL, + 0x54dacf961e78b954L }, + { 0xf645696fa4a9af89L,0x3a41194006ac98ecL,0x41b8b3f622a67a20L, + 0x2d0b1e0f99dec626L } }, + /* 40 << 126 */ + { { 0x27c8919240be34e8L,0xc7162b3791907f35L,0x90188ec1a956702bL, + 0xca132f7ddf93769cL }, + { 0x3ece44f90e2025b4L,0x67aaec690c62f14cL,0xad74141822e3cc11L, + 0xcf9b75c37ff9a50eL } }, + /* 41 << 126 */ + { { 0x02fa2b164d348272L,0xbd99d61a9959d56dL,0xbc4f19db18762916L, + 0xcc7cce5049c1ac80L }, + { 0x4d59ebaad846bd83L,0x8775a9dca9202849L,0x07ec4ae16e1f4ca9L, + 0x27eb5875ba893f11L } }, + /* 42 << 126 */ + { { 0x00284d51662cc565L,0x82353a6b0db4138dL,0xd9c7aaaaaa32a594L, + 0xf5528b5ea5669c47L }, + { 0xf32202312f23c5ffL,0xe3e8147a6affa3a1L,0xfb423d5c202ddda0L, + 0x3d6414ac6b871bd4L } }, + /* 43 << 126 */ + { { 0x586f82e1a51a168aL,0xb712c67148ae5448L,0x9a2e4bd176233eb8L, + 0x0188223a78811ca9L }, + { 0x553c5e21f7c18de1L,0x7682e451b27bb286L,0x3ed036b30e51e929L, + 0xf487211bec9cb34fL } }, + /* 44 << 126 */ + { { 0x0d0942770c24efc8L,0x0349fd04bef737a4L,0x6d1c9dd2514cdd28L, + 0x29c135ff30da9521L }, + { 0xea6e4508f78b0b6fL,0x176f5dd2678c143cL,0x081484184be21e65L, + 0x27f7525ce7df38c4L } }, + /* 45 << 126 */ + { { 0x1fb70e09748ab1a4L,0x9cba50a05efe4433L,0x7846c7a615f75af2L, + 0x2a7c2c575ee73ea8L }, + { 0x42e566a43f0a449aL,0x45474c3bad90fc3dL,0x7447be3d8b61d057L, + 0x3e9d1cf13a4ec092L } }, + /* 46 << 126 */ + { { 0x1603e453f380a6e6L,0x0b86e4319b1437c2L,0x7a4173f2ef29610aL, + 0x8fa729a7f03d57f7L }, + { 0x3e186f6e6c9c217eL,0xbe1d307991919524L,0x92a62a70153d4fb1L, + 0x32ed3e34d68c2f71L } }, + /* 47 << 126 */ + { { 0xd785027f9eb1a8b7L,0xbc37eb77c5b22fe8L,0x466b34f0b9d6a191L, + 0x008a89af9a05f816L }, + { 0x19b028fb7d42c10aL,0x7fe8c92f49b3f6b8L,0x58907cc0a5a0ade3L, + 0xb3154f51559d1a7cL } }, + /* 48 << 126 */ + { { 0x5066efb6d9790ed6L,0xa77a0cbca6aa793bL,0x1a915f3c223e042eL, + 0x1c5def0469c5874bL }, + { 0x0e83007873b6c1daL,0x55cf85d2fcd8557aL,0x0f7c7c760460f3b1L, + 0x87052acb46e58063L } }, + /* 49 << 126 */ + { { 0x09212b80907eae66L,0x3cb068e04d721c89L,0xa87941aedd45ac1cL, + 0xde8d5c0d0daa0dbbL }, + { 0xda421fdce3502e6eL,0xc89442014d89a084L,0x7307ba5ef0c24bfbL, + 0xda212beb20bde0efL } }, + /* 50 << 126 */ + { { 0xea2da24bf82ce682L,0x058d381607f71fe4L,0x35a024625ffad8deL, + 0xcd7b05dcaadcefabL }, + { 0xd442f8ed1d9f54ecL,0x8be3d618b2d3b5caL,0xe2220ed0e06b2ce2L, + 0x82699a5f1b0da4c0L } }, + /* 51 << 126 */ + { { 0x3ff106f571c0c3a7L,0x8f580f5a0d34180cL,0x4ebb120e22d7d375L, + 0x5e5782cce9513675L }, + { 0x2275580c99c82a70L,0xe8359fbf15ea8c4cL,0x53b48db87b415e70L, + 0xaacf2240100c6014L } }, + /* 52 << 126 */ + { { 0x9faaccf5e4652f1dL,0xbd6fdd2ad56157b2L,0xa4f4fb1f6261ec50L, + 0x244e55ad476bcd52L }, + { 0x881c9305047d320bL,0x1ca983d56181263fL,0x354e9a44278fb8eeL, + 0xad2dbc0f396e4964L } }, + /* 53 << 126 */ + { { 0x723f3aa29268b3deL,0x0d1ca29ae6e0609aL,0x794866aa6cf44252L, + 0x0b59f3e301af87edL }, + { 0xe234e5ff7f4a6c51L,0xa8768fd261dc2f7eL,0xdafc73320a94d81fL, + 0xd7f8428206938ce1L } }, + /* 54 << 126 */ + { { 0xae0b3c0e0546063eL,0x7fbadcb25d61abc6L,0xd5d7a2c9369ac400L, + 0xa5978d09ae67d10cL }, + { 0x290f211e4f85eaacL,0xe61e2ad1facac681L,0xae125225388384cdL, + 0xa7fb68e9ccfde30fL } }, + /* 55 << 126 */ + { { 0x7a59b9363daed4c2L,0x80a9aa402606f789L,0xb40c1ea5f6a6d90aL, + 0x948364d3514d5885L }, + { 0x062ebc6070985182L,0xa6db5b0e33310895L,0x64a12175e329c2f5L, + 0xc5f25bd290ea237eL } }, + /* 56 << 126 */ + { { 0x7915c5242d0a4c23L,0xeb5d26e46bb3cc52L,0x369a9116c09e2c92L, + 0x0c527f92cf182cf8L }, + { 0x9e5919382aede0acL,0xb29222086cc34939L,0x3c9d896299a34361L, + 0x3c81836dc1905fe6L } }, + /* 57 << 126 */ + { { 0x4bfeb57fa001ec5aL,0xe993f5bba0dc5dbaL,0x47884109724a1380L, + 0x8a0369ab32fe9a04L }, + { 0xea068d608c927db8L,0xbf5f37cf94655741L,0x47d402a204b6c7eaL, + 0x4551c2956af259cbL } }, + /* 58 << 126 */ + { { 0x698b71e7ed77ee8bL,0xbddf7bd0f309d5c7L,0x6201c22c34e780caL, + 0xab04f7d84c295ef4L }, + { 0x1c9472944313a8ceL,0xe532e4ac92ca4cfeL,0x89738f80d0a7a97aL, + 0xec088c88a580fd5bL } }, + /* 59 << 126 */ + { { 0x612b1ecc42ce9e51L,0x8f9840fdb25fdd2aL,0x3cda78c001e7f839L, + 0x546b3d3aece05480L }, + { 0x271719a980d30916L,0x45497107584c20c4L,0xaf8f94785bc78608L, + 0x28c7d484277e2a4cL } }, + /* 60 << 126 */ + { { 0xfce0176788a2ffe4L,0xdc506a3528e169a5L,0x0ea108617af9c93aL, + 0x1ed2436103fa0e08L }, + { 0x96eaaa92a3d694e7L,0xc0f43b4def50bc74L,0xce6aa58c64114db4L, + 0x8218e8ea7c000fd4L } }, + /* 61 << 126 */ + { { 0xac815dfb185f8844L,0xcd7e90cb1557abfbL,0x23d16655afbfecdfL, + 0x80f3271f085cac4aL }, + { 0x7fc39aa7d0e62f47L,0x88d519d1460a48e5L,0x59559ac4d28f101eL, + 0x7981d9e9ca9ae816L } }, + /* 62 << 126 */ + { { 0x5c38652c9ac38203L,0x86eaf87f57657fe5L,0x568fc472e21f5416L, + 0x2afff39ce7e597b5L }, + { 0x3adbbb07256d4eabL,0x225986928285ab89L,0x35f8112a041caefeL, + 0x95df02e3a5064c8bL } }, + /* 63 << 126 */ + { { 0x4d63356ec7004bf3L,0x230a08f4db83c7deL,0xca27b2708709a7b7L, + 0x0d1c4cc4cb9abd2dL }, + { 0x8a0bc66e7550fee8L,0x369cd4c79cf7247eL,0x75562e8492b5b7e7L, + 0x8fed0da05802af7bL } }, + /* 64 << 126 */ + { { 0x6a7091c2e48fb889L,0x26882c137b8a9d06L,0xa24986631b82a0e2L, + 0x844ed7363518152dL }, + { 0x282f476fd86e27c7L,0xa04edaca04afefdcL,0x8b256ebc6119e34dL, + 0x56a413e90787d78bL } }, + /* 0 << 133 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 133 */ + { { 0x82ee061d5a74be50L,0xe41781c4dea16ff5L,0xe0b0c81e99bfc8a2L, + 0x624f4d690b547e2dL }, + { 0x3a83545dbdcc9ae4L,0x2573dbb6409b1e8eL,0x482960c4a6c93539L, + 0xf01059ad5ae18798L } }, + /* 2 << 133 */ + { { 0x715c9f973112795fL,0xe8244437984e6ee1L,0x55cb4858ecb66bcdL, + 0x7c136735abaffbeeL }, + { 0x546615955dbec38eL,0x51c0782c388ad153L,0x9ba4c53ac6e0952fL, + 0x27e6782a1b21dfa8L } }, + /* 3 << 133 */ + { { 0x682f903d4ed2dbc2L,0x0eba59c87c3b2d83L,0x8e9dc84d9c7e9335L, + 0x5f9b21b00eb226d7L }, + { 0xe33bd394af267baeL,0xaa86cc25be2e15aeL,0x4f0bf67d6a8ec500L, + 0x5846aa44f9630658L } }, + /* 4 << 133 */ + { { 0xfeb09740e2c2bf15L,0x627a2205a9e99704L,0xec8d73d0c2fbc565L, + 0x223eed8fc20c8de8L }, + { 0x1ee32583a8363b49L,0x1a0b6cb9c9c2b0a6L,0x49f7c3d290dbc85cL, + 0xa8dfbb971ef4c1acL } }, + /* 5 << 133 */ + { { 0xafb34d4c65c7c2abL,0x1d4610e7e2c5ea84L,0x893f6d1b973c4ab5L, + 0xa3cdd7e9945ba5c4L }, + { 0x60514983064417eeL,0x1459b23cad6bdf2bL,0x23b2c3415cf726c3L, + 0x3a82963532d6354aL } }, + /* 6 << 133 */ + { { 0x294f901fab192c18L,0xec5fcbfe7030164fL,0xe2e2fcb7e2246ba6L, + 0x1e7c88b3221a1a0cL }, + { 0x72c7dd93c92d88c5L,0x41c2148e1106fb59L,0x547dd4f5a0f60f14L, + 0xed9b52b263960f31L } }, + /* 7 << 133 */ + { { 0x6c8349ebb0a5b358L,0xb154c5c29e7e2ed6L,0xcad5eccfeda462dbL, + 0xf2d6dbe42de66b69L }, + { 0x426aedf38665e5b2L,0x488a85137b7f5723L,0x15cc43b38bcbb386L, + 0x27ad0af3d791d879L } }, + /* 8 << 133 */ + { { 0xc16c236e846e364fL,0x7f33527cdea50ca0L,0xc48107750926b86dL, + 0x6c2a36090598e70cL }, + { 0xa6755e52f024e924L,0xe0fa07a49db4afcaL,0x15c3ce7d66831790L, + 0x5b4ef350a6cbb0d6L } }, + /* 9 << 133 */ + { { 0x2c4aafc4b6205969L,0x42563f02f6c7854fL,0x016aced51d983b48L, + 0xfeb356d899949755L }, + { 0x8c2a2c81d1a39bd7L,0x8f44340fe6934ae9L,0x148cf91c447904daL, + 0x7340185f0f51a926L } }, + /* 10 << 133 */ + { { 0x2f8f00fb7409ab46L,0x057e78e680e289b2L,0x03e5022ca888e5d1L, + 0x3c87111a9dede4e2L }, + { 0x5b9b0e1c7809460bL,0xe751c85271c9abc7L,0x8b944e28c7cc1dc9L, + 0x4f201ffa1d3cfa08L } }, + /* 11 << 133 */ + { { 0x02fc905c3e6721ceL,0xd52d70dad0b3674cL,0x5dc2e5ca18810da4L, + 0xa984b2735c69dd99L }, + { 0x63b9252784de5ca4L,0x2f1c9872c852dec4L,0x18b03593c2e3de09L, + 0x19d70b019813dc2fL } }, + /* 12 << 133 */ + { { 0x42806b2da6dc1d29L,0xd3030009f871e144L,0xa1feb333aaf49276L, + 0xb5583b9ec70bc04bL }, + { 0x1db0be7895695f20L,0xfc84181189d012b5L,0x6409f27205f61643L, + 0x40d34174d5883128L } }, + /* 13 << 133 */ + { { 0xd79196f567419833L,0x6059e252863b7b08L,0x84da18171c56700cL, + 0x5758ee56b28d3ec4L }, + { 0x7da2771d013b0ea6L,0xfddf524b54c5e9b9L,0x7df4faf824305d80L, + 0x58f5c1bf3a97763fL } }, + /* 14 << 133 */ + { { 0xa5af37f17c696042L,0xd4cba22c4a2538deL,0x211cb9959ea42600L, + 0xcd105f417b069889L }, + { 0xb1e1cf19ddb81e74L,0x472f2d895157b8caL,0x086fb008ee9db885L, + 0x365cd5700f26d131L } }, + /* 15 << 133 */ + { { 0x284b02bba2be7053L,0xdcbbf7c67ab9a6d6L,0x4425559c20f7a530L, + 0x961f2dfa188767c8L }, + { 0xe2fd943570dc80c4L,0x104d6b63f0784120L,0x7f592bc153567122L, + 0xf6bc1246f688ad77L } }, + /* 16 << 133 */ + { { 0x05214c050f15dde9L,0xa47a76a80d5f2b82L,0xbb254d3062e82b62L, + 0x11a05fe03ec955eeL }, + { 0x7eaff46e9d529b36L,0x55ab13018f9e3df6L,0xc463e37199317698L, + 0xfd251438ccda47adL } }, + /* 17 << 133 */ + { { 0xca9c354723d695eaL,0x48ce626e16e589b5L,0x6b5b64c7b187d086L, + 0xd02e1794b2207948L }, + { 0x8b58e98f7198111dL,0x90ca6305dcf9c3ccL,0x5691fe72f34089b0L, + 0x60941af1fc7c80ffL } }, + /* 18 << 133 */ + { { 0xa09bc0a222eb51e5L,0xc0bb7244aa9cf09aL,0x36a8077f80159f06L, + 0x8b5c989edddc560eL }, + { 0x19d2f316512e1f43L,0x02eac554ad08ff62L,0x012ab84c07d20b4eL, + 0x37d1e115d6d4e4e1L } }, + /* 19 << 133 */ + { { 0xb6443e1aab7b19a8L,0xf08d067edef8cd45L,0x63adf3e9685e03daL, + 0xcf15a10e4792b916L }, + { 0xf44bcce5b738a425L,0xebe131d59636b2fdL,0x940688417850d605L, + 0x09684eaab40d749dL } }, + /* 20 << 133 */ + { { 0x8c3c669c72ba075bL,0x89f78b55ba469015L,0x5706aade3e9f8ba8L, + 0x6d8bd565b32d7ed7L }, + { 0x25f4e63b805f08d6L,0x7f48200dc3bcc1b5L,0x4e801968b025d847L, + 0x74afac0487cbe0a8L } }, + /* 21 << 133 */ + { { 0x43ed2c2b7e63d690L,0xefb6bbf00223cdb8L,0x4fec3cae2884d3feL, + 0x065ecce6d75e25a4L }, + { 0x6c2294ce69f79071L,0x0d9a8e5f044b8666L,0x5009f23817b69d8fL, + 0x3c29f8fec5dfdaf7L } }, + /* 22 << 133 */ + { { 0x9067528febae68c4L,0x5b38563230c5ba21L,0x540df1191fdd1aecL, + 0xcf37825bcfba4c78L }, + { 0x77eff980beb11454L,0x40a1a99160c1b066L,0xe8018980f889a1c7L, + 0xb9c52ae976c24be0L } }, + /* 23 << 133 */ + { { 0x05fbbcce45650ef4L,0xae000f108aa29ac7L,0x884b71724f04c470L, + 0x7cd4fde219bb5c25L }, + { 0x6477b22ae8840869L,0xa88688595fbd0686L,0xf23cc02e1116dfbaL, + 0x76cd563fd87d7776L } }, + /* 24 << 133 */ + { { 0xe2a37598a9d82abfL,0x5f188ccbe6c170f5L,0x816822005066b087L, + 0xda22c212c7155adaL }, + { 0x151e5d3afbddb479L,0x4b606b846d715b99L,0x4a73b54bf997cb2eL, + 0x9a1bfe433ecd8b66L } }, + /* 25 << 133 */ + { { 0x1c3128092a67d48aL,0xcd6a671e031fa9e2L,0xbec3312a0e43a34aL, + 0x1d93563955ef47d3L }, + { 0x5ea024898fea73eaL,0x8247b364a035afb2L,0xb58300a65265b54cL, + 0x3286662f722c7148L } }, + /* 26 << 133 */ + { { 0xb77fd76bb4ec4c20L,0xf0a12fa70f3fe3fdL,0xf845bbf541d8c7e8L, + 0xe4d969ca5ec10aa8L }, + { 0x4c0053b743e232a3L,0xdc7a3fac37f8a45aL,0x3c4261c520d81c8fL, + 0xfd4b3453b00eab00L } }, + /* 27 << 133 */ + { { 0x76d48f86d36e3062L,0x626c5277a143ff02L,0x538174deaf76f42eL, + 0x2267aa866407ceacL }, + { 0xfad7635172e572d5L,0xab861af7ba7330ebL,0xa0a1c8c7418d8657L, + 0x988821cb20289a52L } }, + /* 28 << 133 */ + { { 0x79732522cccc18adL,0xaadf3f8df1a6e027L,0xf7382c9317c2354dL, + 0x5ce1680cd818b689L }, + { 0x359ebbfcd9ecbee9L,0x4330689c1cae62acL,0xb55ce5b4c51ac38aL, + 0x7921dfeafe238ee8L } }, + /* 29 << 133 */ + { { 0x3972bef8271d1ca5L,0x3e423bc7e8aabd18L,0x57b09f3f44a3e5e3L, + 0x5da886ae7b444d66L }, + { 0x68206634a9964375L,0x356a2fa3699cd0ffL,0xaf0faa24dba515e9L, + 0x536e1f5cb321d79aL } }, + /* 30 << 133 */ + { { 0xd3b9913a5c04e4eaL,0xd549dcfed6f11513L,0xee227bf579fd1d94L, + 0x9f35afeeb43f2c67L }, + { 0xd2638d24f1314f53L,0x62baf948cabcd822L,0x5542de294ef48db0L, + 0xb3eb6a04fc5f6bb2L } }, + /* 31 << 133 */ + { { 0x23c110ae1208e16aL,0x1a4d15b5f8363e24L,0x30716844164be00bL, + 0xa8e24824f6f4690dL }, + { 0x548773a290b170cfL,0xa1bef33142f191f4L,0x70f418d09247aa97L, + 0xea06028e48be9147L } }, + /* 32 << 133 */ + { { 0xe13122f3dbfb894eL,0xbe9b79f6ce274b18L,0x85a49de5ca58aadfL, + 0x2495775811487351L }, + { 0x111def61bb939099L,0x1d6a974a26d13694L,0x4474b4ced3fc253bL, + 0x3a1485e64c5db15eL } }, + /* 33 << 133 */ + { { 0xe79667b4147c15b4L,0xe34f553b7bc61301L,0x032b80f817094381L, + 0x55d8bafd723eaa21L }, + { 0x5a987995f1c0e74eL,0x5a9b292eebba289cL,0x413cd4b2eb4c8251L, + 0x98b5d243d162db0aL } }, + /* 34 << 133 */ + { { 0xbb47bf6668342520L,0x08d68949baa862d1L,0x11f349c7e906abcdL, + 0x454ce985ed7bf00eL }, + { 0xacab5c9eb55b803bL,0xb03468ea31e3c16dL,0x5c24213dd273bf12L, + 0x211538eb71587887L } }, + /* 35 << 133 */ + { { 0x198e4a2f731dea2dL,0xd5856cf274ed7b2aL,0x86a632eb13a664feL, + 0x932cd909bda41291L }, + { 0x850e95d4c0c4ddc0L,0xc0f422f8347fc2c9L,0xe68cbec486076bcbL, + 0xf9e7c0c0cd6cd286L } }, + /* 36 << 133 */ + { { 0x65994ddb0f5f27caL,0xe85461fba80d59ffL,0xff05481a66601023L, + 0xc665427afc9ebbfbL }, + { 0xb0571a697587fd52L,0x935289f88d49efceL,0x61becc60ea420688L, + 0xb22639d913a786afL } }, + /* 37 << 133 */ + { { 0x1a8e6220361ecf90L,0x001f23e025506463L,0xe4ae9b5d0a5c2b79L, + 0xebc9cdadd8149db5L }, + { 0xb33164a1934aa728L,0x750eb00eae9b60f3L,0x5a91615b9b9cfbfdL, + 0x97015cbfef45f7f6L } }, + /* 38 << 133 */ + { { 0xb462c4a5bf5151dfL,0x21adcc41b07118f2L,0xd60c545b043fa42cL, + 0xfc21aa54e96be1abL }, + { 0xe84bc32f4e51ea80L,0x3dae45f0259b5d8dL,0xbb73c7ebc38f1b5eL, + 0xe405a74ae8ae617dL } }, + /* 39 << 133 */ + { { 0xbb1ae9c69f1c56bdL,0x8c176b9849f196a4L,0xc448f3116875092bL, + 0xb5afe3de9f976033L }, + { 0xa8dafd49145813e5L,0x687fc4d9e2b34226L,0xf2dfc92d4c7ff57fL, + 0x004e3fc1401f1b46L } }, + /* 40 << 133 */ + { { 0x5afddab61430c9abL,0x0bdd41d32238e997L,0xf0947430418042aeL, + 0x71f9addacdddc4cbL }, + { 0x7090c016c52dd907L,0xd9bdf44d29e2047fL,0xe6f1fe801b1011a6L, + 0xb63accbcd9acdc78L } }, + /* 41 << 133 */ + { { 0xcfc7e2351272a95bL,0x0c667717a6276ac8L,0x3c0d3709e2d7eef7L, + 0x5add2b069a685b3eL }, + { 0x363ad32d14ea5d65L,0xf8e01f068d7dd506L,0xc9ea221375b4aac6L, + 0xed2a2bf90d353466L } }, + /* 42 << 133 */ + { { 0x439d79b5e9d3a7c3L,0x8e0ee5a681b7f34bL,0xcf3dacf51dc4ba75L, + 0x1d3d1773eb3310c7L }, + { 0xa8e671127747ae83L,0x31f43160197d6b40L,0x0521cceecd961400L, + 0x67246f11f6535768L } }, + /* 43 << 133 */ + { { 0x702fcc5aef0c3133L,0x247cc45d7e16693bL,0xfd484e49c729b749L, + 0x522cef7db218320fL }, + { 0xe56ef40559ab93b3L,0x225fba119f181071L,0x33bd659515330ed0L, + 0xc4be69d51ddb32f7L } }, + /* 44 << 133 */ + { { 0x264c76680448087cL,0xac30903f71432daeL,0x3851b26600f9bf47L, + 0x400ed3116cdd6d03L }, + { 0x045e79fef8fd2424L,0xfdfd974afa6da98bL,0x45c9f6410c1e673aL, + 0x76f2e7335b2c5168L } }, + /* 45 << 133 */ + { { 0x1adaebb52a601753L,0xb286514cc57c2d49L,0xd87696701e0bfd24L, + 0x950c547e04478922L }, + { 0xd1d41969e5d32bfeL,0x30bc1472750d6c3eL,0x8f3679fee0e27f3aL, + 0x8f64a7dca4a6ee0cL } }, + /* 46 << 133 */ + { { 0x2fe59937633dfb1fL,0xea82c395977f2547L,0xcbdfdf1a661ea646L, + 0xc7ccc591b9085451L }, + { 0x8217796281761e13L,0xda57596f9196885cL,0xbc17e84928ffbd70L, + 0x1e6e0a412671d36fL } }, + /* 47 << 133 */ + { { 0x61ae872c4152fcf5L,0x441c87b09e77e754L,0xd0799dd5a34dff09L, + 0x766b4e4488a6b171L }, + { 0xdc06a51211f1c792L,0xea02ae934be35c3eL,0xe5ca4d6de90c469eL, + 0x4df4368e56e4ff5cL } }, + /* 48 << 133 */ + { { 0x7817acab4baef62eL,0x9f5a2202a85b91e8L,0x9666ebe66ce57610L, + 0x32ad31f3f73bfe03L }, + { 0x628330a425bcf4d6L,0xea950593515056e6L,0x59811c89e1332156L, + 0xc89cf1fe8c11b2d7L } }, + /* 49 << 133 */ + { { 0x75b6391304e60cc0L,0xce811e8d4625d375L,0x030e43fc2d26e562L, + 0xfbb30b4b608d36a0L }, + { 0x634ff82c48528118L,0x7c6fe085cd285911L,0x7f2830c099358f28L, + 0x2e60a95e665e6c09L } }, + /* 50 << 133 */ + { { 0x08407d3d9b785dbfL,0x530889aba759bce7L,0xf228e0e652f61239L, + 0x2b6d14616879be3cL }, + { 0xe6902c0451a7bbf7L,0x30ad99f076f24a64L,0x66d9317a98bc6da0L, + 0xf4f877f3cb596ac0L } }, + /* 51 << 133 */ + { { 0xb05ff62d4c44f119L,0x4555f536e9b77416L,0xc7c0d0598caed63bL, + 0x0cd2b7cec358b2a9L }, + { 0x3f33287b46945fa3L,0xf8785b20d67c8791L,0xc54a7a619637bd08L, + 0x54d4598c18be79d7L } }, + /* 52 << 133 */ + { { 0x889e5acbc46d7ce1L,0x9a515bb78b085877L,0xfac1a03d0b7a5050L, + 0x7d3e738af2926035L }, + { 0x861cc2ce2a6cb0ebL,0x6f2e29558f7adc79L,0x61c4d45133016376L, + 0xd9fd2c805ad59090L } }, + /* 53 << 133 */ + { { 0xe5a83738b2b836a1L,0x855b41a07c0d6622L,0x186fe3177cc19af1L, + 0x6465c1fffdd99acbL }, + { 0x46e5c23f6974b99eL,0x75a7cf8ba2717cbeL,0x4d2ebc3f062be658L, + 0x094b44475f209c98L } }, + /* 54 << 133 */ + { { 0x4af285edb940cb5aL,0x6706d7927cc82f10L,0xc8c8776c030526faL, + 0xfa8e6f76a0da9140L }, + { 0x77ea9d34591ee4f0L,0x5f46e33740274166L,0x1bdf98bbea671457L, + 0xd7c08b46862a1fe2L } }, + /* 55 << 133 */ + { { 0x46cc303c1c08ad63L,0x995434404c845e7bL,0x1b8fbdb548f36bf7L, + 0x5b82c3928c8273a7L }, + { 0x08f712c4928435d5L,0x071cf0f179330380L,0xc74c2d24a8da054aL, + 0xcb0e720143c46b5cL } }, + /* 56 << 133 */ + { { 0x0ad7337ac0b7eff3L,0x8552225ec5e48b3cL,0xe6f78b0c73f13a5fL, + 0x5e70062e82349cbeL }, + { 0x6b8d5048e7073969L,0x392d2a29c33cb3d2L,0xee4f727c4ecaa20fL, + 0xa068c99e2ccde707L } }, + /* 57 << 133 */ + { { 0xfcd5651fb87a2913L,0xea3e3c153cc252f0L,0x777d92df3b6cd3e4L, + 0x7a414143c5a732e7L }, + { 0xa895951aa71ff493L,0xfe980c92bbd37cf6L,0x45bd5e64decfeeffL, + 0x910dc2a9a44c43e9L } }, + /* 58 << 133 */ + { { 0xcb403f26cca9f54dL,0x928bbdfb9303f6dbL,0x3c37951ea9eee67cL, + 0x3bd61a52f79961c3L }, + { 0x09a238e6395c9a79L,0x6940ca2d61eb352dL,0x7d1e5c5ec1875631L, + 0x1e19742c1e1b20d1L } }, + /* 59 << 133 */ + { { 0x4633d90823fc2e6eL,0xa76e29a908959149L,0x61069d9c84ed7da5L, + 0x0baa11cf5dbcad51L }, + { 0xd01eec64961849daL,0x93b75f1faf3d8c28L,0x57bc4f9f1ca2ee44L, + 0x5a26322d00e00558L } }, + /* 60 << 133 */ + { { 0x1888d65861a023efL,0x1d72aab4b9e5246eL,0xa9a26348e5563ec0L, + 0xa0971963c3439a43L }, + { 0x567dd54badb9b5b7L,0x73fac1a1c45a524bL,0x8fe97ef7fe38e608L, + 0x608748d23f384f48L } }, + /* 61 << 133 */ + { { 0xb0571794c486094fL,0x869254a38bf3a8d6L,0x148a8dd1310b0e25L, + 0x99ab9f3f9aa3f7d8L }, + { 0x0927c68a6706c02eL,0x22b5e76c69790e6cL,0x6c3252606c71376cL, + 0x53a5769009ef6657L } }, + /* 62 << 133 */ + { { 0x8d63f852edffcf3aL,0xb4d2ed043c0a6f55L,0xdb3aa8de12519b9eL, + 0x5d38e9c41e0a569aL }, + { 0x871528bf303747e2L,0xa208e77cf5b5c18dL,0x9d129c88ca6bf923L, + 0xbcbf197fbf02839fL } }, + /* 63 << 133 */ + { { 0x9b9bf03027323194L,0x3b055a8b339ca59dL,0xb46b23120f669520L, + 0x19789f1f497e5f24L }, + { 0x9c499468aaf01801L,0x72ee11908b69d59cL,0x8bd39595acf4c079L, + 0x3ee11ece8e0cd048L } }, + /* 64 << 133 */ + { { 0xebde86ec1ed66f18L,0x225d906bd61fce43L,0x5cab07d6e8bed74dL, + 0x16e4617f27855ab7L }, + { 0x6568aaddb2fbc3ddL,0xedb5484f8aeddf5bL,0x878f20e86dcf2fadL, + 0x3516497c615f5699L } }, + /* 0 << 140 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 140 */ + { { 0xef0a3fecfa181e69L,0x9ea02f8130d69a98L,0xb2e9cf8e66eab95dL, + 0x520f2beb24720021L }, + { 0x621c540a1df84361L,0x1203772171fa6d5dL,0x6e3c7b510ff5f6ffL, + 0x817a069babb2bef3L } }, + /* 2 << 140 */ + { { 0x83572fb6b294cda6L,0x6ce9bf75b9039f34L,0x20e012f0095cbb21L, + 0xa0aecc1bd063f0daL }, + { 0x57c21c3af02909e5L,0xc7d59ecf48ce9cdcL,0x2732b8448ae336f8L, + 0x056e37233f4f85f4L } }, + /* 3 << 140 */ + { { 0x8a10b53189e800caL,0x50fe0c17145208fdL,0x9e43c0d3b714ba37L, + 0x427d200e34189accL }, + { 0x05dee24fe616e2c0L,0x9c25f4c8ee1854c1L,0x4d3222a58f342a73L, + 0x0807804fa027c952L } }, + /* 4 << 140 */ + { { 0xc222653a4f0d56f3L,0x961e4047ca28b805L,0x2c03f8b04a73434bL, + 0x4c966787ab712a19L }, + { 0xcc196c42864fee42L,0xc1be93da5b0ece5cL,0xa87d9f22c131c159L, + 0x2bb6d593dce45655L } }, + /* 5 << 140 */ + { { 0x22c49ec9b809b7ceL,0x8a41486be2c72c2cL,0x813b9420fea0bf36L, + 0xb3d36ee9a66dac69L }, + { 0x6fddc08a328cc987L,0x0a3bcd2c3a326461L,0x7103c49dd810dbbaL, + 0xf9d81a284b78a4c4L } }, + /* 6 << 140 */ + { { 0x3de865ade4d55941L,0xdedafa5e30384087L,0x6f414abb4ef18b9bL, + 0x9ee9ea42faee5268L }, + { 0x260faa1637a55a4aL,0xeb19a514015f93b9L,0x51d7ebd29e9c3598L, + 0x523fc56d1932178eL } }, + /* 7 << 140 */ + { { 0x501d070cb98fe684L,0xd60fbe9a124a1458L,0xa45761c892bc6b3fL, + 0xf5384858fe6f27cbL }, + { 0x4b0271f7b59e763bL,0x3d4606a95b5a8e5eL,0x1eda5d9b05a48292L, + 0xda7731d0e6fec446L } }, + /* 8 << 140 */ + { { 0xa3e3369390d45871L,0xe976404006166d8dL,0xb5c3368289a90403L, + 0x4bd1798372f1d637L }, + { 0xa616679ed5d2c53aL,0x5ec4bcd8fdcf3b87L,0xae6d7613b66a694eL, + 0x7460fc76e3fc27e5L } }, + /* 9 << 140 */ + { { 0x70469b8295caabeeL,0xde024ca5889501e3L,0x6bdadc06076ed265L, + 0x0cb1236b5a0ef8b2L }, + { 0x4065ddbf0972ebf9L,0xf1dd387522aca432L,0xa88b97cf744aff76L, + 0xd1359afdfe8e3d24L } }, + /* 10 << 140 */ + { { 0x52a3ba2b91502cf3L,0x2c3832a8084db75dL,0x04a12dddde30b1c9L, + 0x7802eabce31fd60cL }, + { 0x33707327a37fddabL,0x65d6f2abfaafa973L,0x3525c5b811e6f91aL, + 0x76aeb0c95f46530bL } }, + /* 11 << 140 */ + { { 0xe8815ff62f93a675L,0xa6ec968405f48679L,0x6dcbb556358ae884L, + 0x0af61472e19e3873L }, + { 0x72334372a5f696beL,0xc65e57ea6f22fb70L,0x268da30c946cea90L, + 0x136a8a8765681b2aL } }, + /* 12 << 140 */ + { { 0xad5e81dc0f9f44d4L,0xf09a69602c46585aL,0xd1649164c447d1b1L, + 0x3b4b36c8879dc8b1L }, + { 0x20d4177b3b6b234cL,0x096a25051730d9d0L,0x0611b9b8ef80531dL, + 0xba904b3b64bb495dL } }, + /* 13 << 140 */ + { { 0x1192d9d493a3147aL,0x9f30a5dc9a565545L,0x90b1f9cb6ef07212L, + 0x299585460d87fc13L }, + { 0xd3323effc17db9baL,0xcb18548ccb1644a8L,0x18a306d44f49ffbcL, + 0x28d658f14c2e8684L } }, + /* 14 << 140 */ + { { 0x44ba60cda99f8c71L,0x67b7abdb4bf742ffL,0x66310f9c914b3f99L, + 0xae430a32f412c161L }, + { 0x1e6776d388ace52fL,0x4bc0fa2452d7067dL,0x03c286aa8f07cd1bL, + 0x4cb8f38ca985b2c1L } }, + /* 15 << 140 */ + { { 0x83ccbe808c3bff36L,0x005a0bd25263e575L,0x460d7dda259bdcd1L, + 0x4a1c5642fa5cab6bL }, + { 0x2b7bdbb99fe4fc88L,0x09418e28cc97bbb5L,0xd8274fb4a12321aeL, + 0xb137007d5c87b64eL } }, + /* 16 << 140 */ + { { 0x80531fe1c63c4962L,0x50541e89981fdb25L,0xdc1291a1fd4c2b6bL, + 0xc0693a17a6df4fcaL }, + { 0xb2c4604e0117f203L,0x245f19630a99b8d0L,0xaedc20aac6212c44L, + 0xb1ed4e56520f52a8L } }, + /* 17 << 140 */ + { { 0xfe48f575f8547be3L,0x0a7033cda9e45f98L,0x4b45d3a918c50100L, + 0xb2a6cd6aa61d41daL }, + { 0x60bbb4f557933c6bL,0xa7538ebd2b0d7ffcL,0x9ea3ab8d8cd626b6L, + 0x8273a4843601625aL } }, + /* 18 << 140 */ + { { 0x888598450168e508L,0x8cbc9bb299a94abdL,0x713ac792fab0a671L, + 0xa3995b196c9ebffcL }, + { 0xe711668e1239e152L,0x56892558bbb8dff4L,0x8bfc7dabdbf17963L, + 0x5b59fe5ab3de1253L } }, + /* 19 << 140 */ + { { 0x7e3320eb34a9f7aeL,0xe5e8cf72d751efe4L,0x7ea003bcd9be2f37L, + 0xc0f551a0b6c08ef7L }, + { 0x56606268038f6725L,0x1dd38e356d92d3b6L,0x07dfce7cc3cbd686L, + 0x4e549e04651c5da8L } }, + /* 20 << 140 */ + { { 0x4058f93b08b19340L,0xc2fae6f4cac6d89dL,0x4bad8a8c8f159cc7L, + 0x0ddba4b3cb0b601cL }, + { 0xda4fc7b51dd95f8cL,0x1d163cd7cea5c255L,0x30707d06274a8c4cL, + 0x79d9e0082802e9ceL } }, + /* 21 << 140 */ + { { 0x02a29ebfe6ddd505L,0x37064e74b50bed1aL,0x3f6bae65a7327d57L, + 0x3846f5f1f83920bcL }, + { 0x87c3749160df1b9bL,0x4cfb28952d1da29fL,0x10a478ca4ed1743cL, + 0x390c60303edd47c6L } }, + /* 22 << 140 */ + { { 0x8f3e53128c0a78deL,0xccd02bda1e85df70L,0xd6c75c03a61b6582L, + 0x0762921cfc0eebd1L }, + { 0xd34d0823d85010c0L,0xd73aaacb0044cf1fL,0xfb4159bba3b5e78aL, + 0x2287c7f7e5826f3fL } }, + /* 23 << 140 */ + { { 0x4aeaf742580b1a01L,0xf080415d60423b79L,0xe12622cda7dea144L, + 0x49ea499659d62472L }, + { 0xb42991ef571f3913L,0x0610f214f5b25a8aL,0x47adc58530b79e8fL, + 0xf90e3df607a065a2L } }, + /* 24 << 140 */ + { { 0x5d0a5deb43e2e034L,0x53fb5a34444024aaL,0xa8628c686b0c9f7fL, + 0x9c69c29cac563656L }, + { 0x5a231febbace47b6L,0xbdce02899ea5a2ecL,0x05da1fac9463853eL, + 0x96812c52509e78aaL } }, + /* 25 << 140 */ + { { 0xd3fb577157151692L,0xeb2721f8d98e1c44L,0xc050608732399be1L, + 0xda5a5511d979d8b8L }, + { 0x737ed55dc6f56780L,0xe20d30040dc7a7f4L,0x02ce7301f5941a03L, + 0x91ef5215ed30f83aL } }, + /* 26 << 140 */ + { { 0x28727fc14092d85fL,0x72d223c65c49e41aL,0xa7cf30a2ba6a4d81L, + 0x7c086209b030d87dL }, + { 0x04844c7dfc588b09L,0x728cd4995874bbb0L,0xcc1281eee84c0495L, + 0x0769b5baec31958fL } }, + /* 27 << 140 */ + { { 0x665c228bf99c2471L,0xf2d8a11b191eb110L,0x4594f494d36d7024L, + 0x482ded8bcdcb25a1L }, + { 0xc958a9d8dadd4885L,0x7004477ef1d2b547L,0x0a45f6ef2a0af550L, + 0x4fc739d62f8d6351L } }, + /* 28 << 140 */ + { { 0x75cdaf27786f08a9L,0x8700bb2642c2737fL,0x855a71411c4e2670L, + 0x810188c115076fefL }, + { 0xc251d0c9abcd3297L,0xae4c8967f48108ebL,0xbd146de718ceed30L, + 0xf9d4f07ac986bcedL } }, + /* 29 << 140 */ + { { 0x5ad98ed583fa1e08L,0x7780d33ebeabd1fbL,0xe330513c903b1196L, + 0xba11de9ea47bc8c4L }, + { 0x684334da02c2d064L,0x7ecf360da48de23bL,0x57a1b4740a9089d8L, + 0xf28fa439ff36734cL } }, + /* 30 << 140 */ + { { 0xf2a482cbea4570b3L,0xee65d68ba5ebcee9L,0x988d0036b9694cd5L, + 0x53edd0e937885d32L }, + { 0xe37e3307beb9bc6dL,0xe9abb9079f5c6768L,0x4396ccd551f2160fL, + 0x2500888c47336da6L } }, + /* 31 << 140 */ + { { 0x383f9ed9926fce43L,0x809dd1c704da2930L,0x30f6f5968a4cb227L, + 0x0d700c7f73a56b38L }, + { 0x1825ea33ab64a065L,0xaab9b7351338df80L,0x1516100d9b63f57fL, + 0x2574395a27a6a634L } }, + /* 32 << 140 */ + { { 0xb5560fb6700a1acdL,0xe823fd73fd999681L,0xda915d1f6cb4e1baL, + 0x0d0301186ebe00a3L }, + { 0x744fb0c989fca8cdL,0x970d01dbf9da0e0bL,0x0ad8c5647931d76fL, + 0xb15737bff659b96aL } }, + /* 33 << 140 */ + { { 0xdc9933e8a8b484e7L,0xb2fdbdf97a26dec7L,0x2349e9a49f1f0136L, + 0x7860368e70fddddbL }, + { 0xd93d2c1cf9ad3e18L,0x6d6c5f17689f4e79L,0x7a544d91b24ff1b6L, + 0x3e12a5ebfe16cd8cL } }, + /* 34 << 140 */ + { { 0x543574e9a56b872fL,0xa1ad550cfcf68ea2L,0x689e37d23f560ef7L, + 0x8c54b9cac9d47a8bL }, + { 0x46d40a4a088ac342L,0xec450c7c1576c6d0L,0xb589e31c1f9689e9L, + 0xdacf2602b8781718L } }, + /* 35 << 140 */ + { { 0xa89237c6c8cb6b42L,0x1326fc93b96ef381L,0x55d56c6db5f07825L, + 0xacba2eea7449e22dL }, + { 0x74e0887a633c3000L,0xcb6cd172d7cbcf71L,0x309e81dec36cf1beL, + 0x07a18a6d60ae399bL } }, + /* 36 << 140 */ + { { 0xb36c26799edce57eL,0x52b892f4df001d41L,0xd884ae5d16a1f2c6L, + 0x9b329424efcc370aL }, + { 0x3120daf2bd2e21dfL,0x55298d2d02470a99L,0x0b78af6ca05db32eL, + 0x5c76a331601f5636L } }, + /* 37 << 140 */ + { { 0xaae861fff8a4f29cL,0x70dc9240d68f8d49L,0x960e649f81b1321cL, + 0x3d2c801b8792e4ceL }, + { 0xf479f77242521876L,0x0bed93bc416c79b1L,0xa67fbc05263e5bc9L, + 0x01e8e630521db049L } }, + /* 38 << 140 */ + { { 0x76f26738c6f3431eL,0xe609cb02e3267541L,0xb10cff2d818c877cL, + 0x1f0e75ce786a13cbL }, + { 0xf4fdca641158544dL,0x5d777e896cb71ed0L,0x3c233737a9aa4755L, + 0x7b453192e527ab40L } }, + /* 39 << 140 */ + { { 0xdb59f68839f05ffeL,0x8f4f4be06d82574eL,0xcce3450cee292d1bL, + 0xaa448a1261ccd086L }, + { 0xabce91b3f7914967L,0x4537f09b1908a5edL,0xa812421ef51042e7L, + 0xfaf5cebcec0b3a34L } }, + /* 40 << 140 */ + { { 0x730ffd874ca6b39aL,0x70fb72ed02efd342L,0xeb4735f9d75c8edbL, + 0xc11f2157c278aa51L }, + { 0xc459f635bf3bfebfL,0x3a1ff0b46bd9601fL,0xc9d12823c420cb73L, + 0x3e9af3e23c2915a3L } }, + /* 41 << 140 */ + { { 0xe0c82c72b41c3440L,0x175239e5e3039a5fL,0xe1084b8a558795a3L, + 0x328d0a1dd01e5c60L }, + { 0x0a495f2ed3788a04L,0x25d8ff1666c11a9fL,0xf5155f059ed692d6L, + 0x954fa1074f425fe4L } }, + /* 42 << 140 */ + { { 0xd16aabf2e98aaa99L,0x90cd8ba096b0f88aL,0x957f4782c154026aL, + 0x54ee073452af56d2L }, + { 0xbcf89e5445b4147aL,0x3d102f219a52816cL,0x6808517e39b62e77L, + 0x92e2542169169ad8L } }, + /* 43 << 140 */ + { { 0xd721d871bb608558L,0x60e4ebaef6d4ff9bL,0x0ba1081941f2763eL, + 0xca2e45be51ee3247L }, + { 0x66d172ec2bfd7a5fL,0x528a8f2f74d0b12dL,0xe17f1e38dabe70dcL, + 0x1d5d73169f93983cL } }, + /* 44 << 140 */ + { { 0x51b2184adf423e31L,0xcb417291aedb1a10L,0x2054ca93625bcab9L, + 0x54396860a98998f0L }, + { 0x4e53f6c4a54ae57eL,0x0ffeb590ee648e9dL,0xfbbdaadc6afaf6bcL, + 0xf88ae796aa3bfb8aL } }, + /* 45 << 140 */ + { { 0x209f1d44d2359ed9L,0xac68dd03f3544ce2L,0xf378da47fd51e569L, + 0xe1abd8602cc80097L }, + { 0x23ca18d9343b6e3aL,0x480797e8b40a1baeL,0xd1f0c717533f3e67L, + 0x4489697006e6cdfcL } }, + /* 46 << 140 */ + { { 0x8ca2105552a82e8dL,0xb2caf78578460cdcL,0x4c1b7b62e9037178L, + 0xefc09d2cdb514b58L }, + { 0x5f2df9ee9113be5cL,0x2fbda78fb3f9271cL,0xe09a81af8f83fc54L, + 0x06b138668afb5141L } }, + /* 47 << 140 */ + { { 0x38f6480f43e3865dL,0x72dd77a81ddf47d9L,0xf2a8e9714c205ff7L, + 0x46d449d89d088ad8L }, + { 0x926619ea185d706fL,0xe47e02ebc7dd7f62L,0xe7f120a78cbc2031L, + 0xc18bef00998d4ac9L } }, + /* 48 << 140 */ + { { 0x18f37a9c6bdf22daL,0xefbc432f90dc82dfL,0xc52cef8e5d703651L, + 0x82887ba0d99881a5L }, + { 0x7cec9ddab920ec1dL,0xd0d7e8c3ec3e8d3bL,0x445bc3954ca88747L, + 0xedeaa2e09fd53535L } }, + /* 49 << 140 */ + { { 0x461b1d936cc87475L,0xd92a52e26d2383bdL,0xfabccb59d7903546L, + 0x6111a7613d14b112L }, + { 0x0ae584feb3d5f612L,0x5ea69b8d60e828ecL,0x6c07898554087030L, + 0x649cab04ac4821feL } }, + /* 50 << 140 */ + { { 0x25ecedcf8bdce214L,0xb5622f7286af7361L,0x0e1227aa7038b9e2L, + 0xd0efb273ac20fa77L }, + { 0x817ff88b79df975bL,0x856bf2861999503eL,0xb4d5351f5038ec46L, + 0x740a52c5fc42af6eL } }, + /* 51 << 140 */ + { { 0x2e38bb152cbb1a3fL,0xc3eb99fe17a83429L,0xca4fcbf1dd66bb74L, + 0x880784d6cde5e8fcL }, + { 0xddc84c1cb4e7a0beL,0x8780510dbd15a72fL,0x44bcf1af81ec30e1L, + 0x141e50a80a61073eL } }, + /* 52 << 140 */ + { { 0x0d95571847be87aeL,0x68a61417f76a4372L,0xf57e7e87c607c3d3L, + 0x043afaf85252f332L }, + { 0xcc14e1211552a4d2L,0xb6dee692bb4d4ab4L,0xb6ab74c8a03816a4L, + 0x84001ae46f394a29L } }, + /* 53 << 140 */ + { { 0x5bed8344d795fb45L,0x57326e7db79f55a5L,0xc9533ce04accdffcL, + 0x53473caf3993fa04L }, + { 0x7906eb93a13df4c8L,0xa73e51f697cbe46fL,0xd1ab3ae10ae4ccf8L, + 0x256145088a5b3dbcL } }, + /* 54 << 140 */ + { { 0x61eff96211a71b27L,0xdf71412b6bb7fa39L,0xb31ba6b82bd7f3efL, + 0xb0b9c41569180d29L }, + { 0xeec14552014cdde5L,0x702c624b227b4bbbL,0x2b15e8c2d3e988f3L, + 0xee3bcc6da4f7fd04L } }, + /* 55 << 140 */ + { { 0x9d00822a42ac6c85L,0x2db0cea61df9f2b7L,0xd7cad2ab42de1e58L, + 0x346ed5262d6fbb61L }, + { 0xb39629951a2faf09L,0x2fa8a5807c25612eL,0x30ae04da7cf56490L, + 0x756629080eea3961L } }, + /* 56 << 140 */ + { { 0x3609f5c53d080847L,0xcb081d395241d4f6L,0xb4fb381077961a63L, + 0xc20c59842abb66fcL }, + { 0x3d40aa7cf902f245L,0x9cb127364e536b1eL,0x5eda24da99b3134fL, + 0xafbd9c695cd011afL } }, + /* 57 << 140 */ + { { 0x9a16e30ac7088c7dL,0x5ab657103207389fL,0x1b09547fe7407a53L, + 0x2322f9d74fdc6eabL }, + { 0xc0f2f22d7430de4dL,0x19382696e68ca9a9L,0x17f1eff1918e5868L, + 0xe3b5b635586f4204L } }, + /* 58 << 140 */ + { { 0x146ef9803fbc4341L,0x359f2c805b5eed4eL,0x9f35744e7482e41dL, + 0x9a9ac3ecf3b224c2L }, + { 0x9161a6fe91fc50aeL,0x89ccc66bc613fa7cL,0x89268b14c732f15aL, + 0x7cd6f4e2b467ed03L } }, + /* 59 << 140 */ + { { 0xfbf79869ce56b40eL,0xf93e094cc02dde98L,0xefe0c3a8edee2cd7L, + 0x90f3ffc0b268fd42L }, + { 0x81a7fd5608241aedL,0x95ab7ad800b1afe8L,0x401270563e310d52L, + 0xd3ffdeb109d9fc43L } }, + /* 60 << 140 */ + { { 0xc8f85c91d11a8594L,0x2e74d25831cf6db8L,0x829c7ca302b5dfd0L, + 0xe389cfbe69143c86L }, + { 0xd01b6405941768d8L,0x4510399503bf825dL,0xcc4ee16656cd17e2L, + 0xbea3c283ba037e79L } }, + /* 61 << 140 */ + { { 0x4e1ac06ed9a47520L,0xfbfe18aaaf852404L,0x5615f8e28087648aL, + 0x7301e47eb9d150d9L }, + { 0x79f9f9ddb299b977L,0x76697a7ba5b78314L,0x10d674687d7c90e7L, + 0x7afffe03937210b5L } }, + /* 62 << 140 */ + { { 0x5aef3e4b28c22ceeL,0xefb0ecd809fd55aeL,0x4cea71320d2a5d6aL, + 0x9cfb5fa101db6357L }, + { 0x395e0b57f36e1ac5L,0x008fa9ad36cafb7dL,0x8f6cdf705308c4dbL, + 0x51527a3795ed2477L } }, + /* 63 << 140 */ + { { 0xba0dee305bd21311L,0x6ed41b22909c90d7L,0xc5f6b7587c8696d3L, + 0x0db8eaa83ce83a80L }, + { 0xd297fe37b24b4b6fL,0xfe58afe8522d1f0dL,0x973587368c98dbd9L, + 0x6bc226ca9454a527L } }, + /* 64 << 140 */ + { { 0xa12b384ece53c2d0L,0x779d897d5e4606daL,0xa53e47b073ec12b0L, + 0x462dbbba5756f1adL }, + { 0x69fe09f2cafe37b6L,0x273d1ebfecce2e17L,0x8ac1d5383cf607fdL, + 0x8035f7ff12e10c25L } }, + /* 0 << 147 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 147 */ + { { 0x854d34c77e6c5520L,0xc27df9efdcb9ea58L,0x405f2369d686666dL, + 0x29d1febf0417aa85L }, + { 0x9846819e93470afeL,0x3e6a9669e2a27f9eL,0x24d008a2e31e6504L, + 0xdba7cecf9cb7680aL } }, + /* 2 << 147 */ + { { 0xecaff541338d6e43L,0x56f7dd734541d5ccL,0xb5d426de96bc88caL, + 0x48d94f6b9ed3a2c3L }, + { 0x6354a3bb2ef8279cL,0xd575465b0b1867f2L,0xef99b0ff95225151L, + 0xf3e19d88f94500d8L } }, + /* 3 << 147 */ + { { 0x92a83268e32dd620L,0x913ec99f627849a2L,0xedd8fdfa2c378882L, + 0xaf96f33eee6f8cfeL }, + { 0xc06737e5dc3fa8a5L,0x236bb531b0b03a1dL,0x33e59f2989f037b0L, + 0x13f9b5a7d9a12a53L } }, + /* 4 << 147 */ + { { 0x0d0df6ce51efb310L,0xcb5b2eb4958df5beL,0xd6459e2936158e59L, + 0x82aae2b91466e336L }, + { 0xfb658a39411aa636L,0x7152ecc5d4c0a933L,0xf10c758a49f026b7L, + 0xf4837f97cb09311fL } }, + /* 5 << 147 */ + { { 0xddfb02c4c753c45fL,0x18ca81b6f9c840feL,0x846fd09ab0f8a3e6L, + 0xb1162adde7733dbcL }, + { 0x7070ad20236e3ab6L,0xf88cdaf5b2a56326L,0x05fc8719997cbc7aL, + 0x442cd4524b665272L } }, + /* 6 << 147 */ + { { 0x7807f364b71698f5L,0x6ba418d29f7b605eL,0xfd20b00fa03b2cbbL, + 0x883eca37da54386fL }, + { 0xff0be43ff3437f24L,0xe910b432a48bb33cL,0x4963a128329df765L, + 0xac1dd556be2fe6f7L } }, + /* 7 << 147 */ + { { 0x557610f924a0a3fcL,0x38e17bf4e881c3f9L,0x6ba84fafed0dac99L, + 0xd4a222c359eeb918L }, + { 0xc79c1dbe13f542b6L,0x1fc65e0de425d457L,0xeffb754f1debb779L, + 0x638d8fd09e08af60L } }, + /* 8 << 147 */ + { { 0x994f523a626332d5L,0x7bc388335561bb44L,0x005ed4b03d845ea2L, + 0xd39d3ee1c2a1f08aL }, + { 0x6561fdd3e7676b0dL,0x620e35fffb706017L,0x36ce424ff264f9a8L, + 0xc4c3419fda2681f7L } }, + /* 9 << 147 */ + { { 0xfb6afd2f69beb6e8L,0x3a50b9936d700d03L,0xc840b2ad0c83a14fL, + 0x573207be54085befL }, + { 0x5af882e309fe7e5bL,0x957678a43b40a7e1L,0x172d4bdd543056e2L, + 0x9c1b26b40df13c0aL } }, + /* 10 << 147 */ + { { 0x1c30861cf405ff06L,0xebac86bd486e828bL,0xe791a971636933fcL, + 0x50e7c2be7aeee947L }, + { 0xc3d4a095fa90d767L,0xae60eb7be670ab7bL,0x17633a64397b056dL, + 0x93a21f33105012aaL } }, + /* 11 << 147 */ + { { 0x663c370babb88643L,0x91df36d722e21599L,0x183ba8358b761671L, + 0x381eea1d728f3bf1L }, + { 0xb9b2f1ba39966e6cL,0x7c464a28e7295492L,0x0fd5f70a09b26b7fL, + 0xa9aba1f9fbe009dfL } }, + /* 12 << 147 */ + { { 0x857c1f22369b87adL,0x3c00e5d932fca556L,0x1ad74cab90b06466L, + 0xa7112386550faaf2L }, + { 0x7435e1986d9bd5f5L,0x2dcc7e3859c3463fL,0xdc7df748ca7bd4b2L, + 0x13cd4c089dec2f31L } }, + /* 13 << 147 */ + { { 0x0d3b5df8e3237710L,0x0dadb26ecbd2f7b0L,0x9f5966abe4aa082bL, + 0x666ec8de350e966eL }, + { 0x1bfd1ed5ee524216L,0xcd93c59b41dab0b6L,0x658a8435d186d6baL, + 0x1b7d34d2159d1195L } }, + /* 14 << 147 */ + { { 0x5936e46022caf46bL,0x6a45dd8f9a96fe4fL,0xf7925434b98f474eL, + 0x414104120053ef15L }, + { 0x71cf8d1241de97bfL,0xb8547b61bd80bef4L,0xb47d3970c4db0037L, + 0xf1bcd328fef20dffL } }, + /* 15 << 147 */ + { { 0x31a92e0910caad67L,0x1f5919605531a1e1L,0x3bb852e05f4fc840L, + 0x63e297ca93a72c6cL }, + { 0x3c2b0b2e49abad67L,0x6ec405fced3db0d9L,0xdc14a5307fef1d40L, + 0xccd19846280896fcL } }, + /* 16 << 147 */ + { { 0x00f831769bb81648L,0xd69eb485653120d0L,0xd17d75f44ccabc62L, + 0x34a07f82b749fcb1L }, + { 0x2c3af787bbfb5554L,0xb06ed4d062e283f8L,0x5722889fa19213a0L, + 0x162b085edcf3c7b4L } }, + /* 17 << 147 */ + { { 0xbcaecb31e0dd3ecaL,0xc6237fbce52f13a5L,0xcc2b6b0327bac297L, + 0x2ae1cac5b917f54aL }, + { 0x474807d47845ae4fL,0xfec7dd92ce5972e0L,0xc3bd25411d7915bbL, + 0x66f85dc4d94907caL } }, + /* 18 << 147 */ + { { 0xd981b888bdbcf0caL,0xd75f5da6df279e9fL,0x128bbf247054e934L, + 0x3c6ff6e581db134bL }, + { 0x795b7cf4047d26e4L,0xf370f7b85049ec37L,0xc6712d4dced945afL, + 0xdf30b5ec095642bcL } }, + /* 19 << 147 */ + { { 0x9b034c624896246eL,0x5652c016ee90bbd1L,0xeb38636f87fedb73L, + 0x5e32f8470135a613L }, + { 0x0703b312cf933c83L,0xd05bb76e1a7f47e6L,0x825e4f0c949c2415L, + 0x569e56227250d6f8L } }, + /* 20 << 147 */ + { { 0xbbe9eb3a6568013eL,0x8dbd203f22f243fcL,0x9dbd7694b342734aL, + 0x8f6d12f846afa984L }, + { 0xb98610a2c9eade29L,0xbab4f32347dd0f18L,0x5779737b671c0d46L, + 0x10b6a7c6d3e0a42aL } }, + /* 21 << 147 */ + { { 0xfb19ddf33035b41cL,0xd336343f99c45895L,0x61fe493854c857e5L, + 0xc4d506beae4e57d5L }, + { 0x3cd8c8cbbbc33f75L,0x7281f08a9262c77dL,0x083f4ea6f11a2823L, + 0x8895041e9fba2e33L } }, + /* 22 << 147 */ + { { 0xfcdfea499c438edfL,0x7678dcc391edba44L,0xf07b3b87e2ba50f0L, + 0xc13888ef43948c1bL }, + { 0xc2135ad41140af42L,0x8e5104f3926ed1a7L,0xf24430cb88f6695fL, + 0x0ce0637b6d73c120L } }, + /* 23 << 147 */ + { { 0xb2db01e6fe631e8fL,0x1c5563d7d7bdd24bL,0x8daea3ba369ad44fL, + 0x000c81b68187a9f9L }, + { 0x5f48a951aae1fd9aL,0xe35626c78d5aed8aL,0x209527630498c622L, + 0x76d17634773aa504L } }, + /* 24 << 147 */ + { { 0x36d90ddaeb300f7aL,0x9dcf7dfcedb5e801L,0x645cb26874d5244cL, + 0xa127ee79348e3aa2L }, + { 0x488acc53575f1dbbL,0x95037e8580e6161eL,0x57e59283292650d0L, + 0xabe67d9914938216L } }, + /* 25 << 147 */ + { { 0x3c7f944b3f8e1065L,0xed908cb6330e8924L,0x08ee8fd56f530136L, + 0x2227b7d5d7ffc169L }, + { 0x4f55c893b5cd6dd5L,0x82225e11a62796e8L,0x5c6cead1cb18e12cL, + 0x4381ae0c84f5a51aL } }, + /* 26 << 147 */ + { { 0x345913d37fafa4c8L,0x3d9180820491aac0L,0x9347871f3e69264cL, + 0xbea9dd3cb4f4f0cdL }, + { 0xbda5d0673eadd3e7L,0x0033c1b80573bcd8L,0x255893795da2486cL, + 0xcb89ee5b86abbee7L } }, + /* 27 << 147 */ + { { 0x8fe0a8f322532e5dL,0xb6410ff0727dfc4cL,0x619b9d58226726dbL, + 0x5ec256697a2b2dc7L }, + { 0xaf4d2e064c3beb01L,0x852123d07acea556L,0x0e9470faf783487aL, + 0x75a7ea045664b3ebL } }, + /* 28 << 147 */ + { { 0x4ad78f356798e4baL,0x9214e6e5c7d0e091L,0xc420b488b1290403L, + 0x64049e0afc295749L }, + { 0x03ef5af13ae9841fL,0xdbe4ca19b0b662a6L,0x46845c5ffa453458L, + 0xf8dabf1910b66722L } }, + /* 29 << 147 */ + { { 0xb650f0aacce2793bL,0x71db851ec5ec47c1L,0x3eb78f3e3b234fa9L, + 0xb0c60f35fc0106ceL }, + { 0x05427121774eadbdL,0x25367fafce323863L,0x7541b5c9cd086976L, + 0x4ff069e2dc507ad1L } }, + /* 30 << 147 */ + { { 0x741452568776e667L,0x6e76142cb23c6bb5L,0xdbf307121b3a8a87L, + 0x60e7363e98450836L }, + { 0x5741450eb7366d80L,0xe4ee14ca4837dbdfL,0xa765eb9b69d4316fL, + 0x04548dca8ef43825L } }, + /* 31 << 147 */ + { { 0x9c9f4e4c5ae888ebL,0x733abb5156e9ac99L,0xdaad3c20ba6ac029L, + 0x9b8dd3d32ba3e38eL }, + { 0xa9bb4c920bc5d11aL,0xf20127a79c5f88a3L,0x4f52b06e161d3cb8L, + 0x26c1ff096afaf0a6L } }, + /* 32 << 147 */ + { { 0x32670d2f7189e71fL,0xc64387485ecf91e7L,0x15758e57db757a21L, + 0x427d09f8290a9ce5L }, + { 0x846a308f38384a7aL,0xaac3acb4b0732b99L,0x9e94100917845819L, + 0x95cba111a7ce5e03L } }, + /* 33 << 147 */ + { { 0x6f3d4f7fb00009c4L,0xb8396c278ff28b5fL,0xb1a9ae431c97975dL, + 0x9d7ba8afe5d9fed5L }, + { 0x338cf09f34f485b6L,0xbc0ddacc64122516L,0xa450da1205d471feL, + 0x4c3a6250628dd8c9L } }, + /* 34 << 147 */ + { { 0x69c7d103d1295837L,0xa2893e503807eb2fL,0xd6e1e1debdb41491L, + 0xc630745b5e138235L }, + { 0xc892109e48661ae1L,0x8d17e7ebea2b2674L,0x00ec0f87c328d6b5L, + 0x6d858645f079ff9eL } }, + /* 35 << 147 */ + { { 0x6cdf243e19115eadL,0x1ce1393e4bac4fcfL,0x2c960ed09c29f25bL, + 0x59be4d8e9d388a05L }, + { 0x0d46e06cd0def72bL,0xb923db5de0342748L,0xf7d3aacd936d4a3dL, + 0x558519cc0b0b099eL } }, + /* 36 << 147 */ + { { 0x3ea8ebf8827097efL,0x259353dbd054f55dL,0x84c89abc6d2ed089L, + 0x5c548b698e096a7cL }, + { 0xd587f616994b995dL,0x4d1531f6a5845601L,0x792ab31e451fd9f0L, + 0xc8b57bb265adf6caL } }, + /* 37 << 147 */ + { { 0x68440fcb1cd5ad73L,0xb9c860e66144da4fL,0x2ab286aa8462beb8L, + 0xcc6b8fffef46797fL }, + { 0xac820da420c8a471L,0x69ae05a177ff7fafL,0xb9163f39bfb5da77L, + 0xbd03e5902c73ab7aL } }, + /* 38 << 147 */ + { { 0x7e862b5eb2940d9eL,0x3c663d864b9af564L,0xd8309031bde3033dL, + 0x298231b2d42c5bc6L }, + { 0x42090d2c552ad093L,0xa4799d1cff854695L,0x0a88b5d6d31f0d00L, + 0xf8b40825a2f26b46L } }, + /* 39 << 147 */ + { { 0xec29b1edf1bd7218L,0xd491c53b4b24c86eL,0xd2fe588f3395ea65L, + 0x6f3764f74456ef15L }, + { 0xdb43116dcdc34800L,0xcdbcd456c1e33955L,0xefdb554074ab286bL, + 0x948c7a51d18c5d7cL } }, + /* 40 << 147 */ + { { 0xeb81aa377378058eL,0x41c746a104411154L,0xa10c73bcfb828ac7L, + 0x6439be919d972b29L }, + { 0x4bf3b4b043a2fbadL,0x39e6dadf82b5e840L,0x4f7164086397bd4cL, + 0x0f7de5687f1eeccbL } }, + /* 41 << 147 */ + { { 0x5865c5a1d2ffbfc1L,0xf74211fa4ccb6451L,0x66368a88c0b32558L, + 0x5b539dc29ad7812eL }, + { 0x579483d02f3af6f6L,0x5213207899934eceL,0x50b9650fdcc9e983L, + 0xca989ec9aee42b8aL } }, + /* 42 << 147 */ + { { 0x6a44c829d6f62f99L,0x8f06a3094c2a7c0cL,0x4ea2b3a098a0cb0aL, + 0x5c547b70beee8364L }, + { 0x461d40e1682afe11L,0x9e0fc77a7b41c0a8L,0x79e4aefde20d5d36L, + 0x2916e52032dd9f63L } }, + /* 43 << 147 */ + { { 0xf59e52e83f883fafL,0x396f96392b868d35L,0xc902a9df4ca19881L, + 0x0fc96822db2401a6L }, + { 0x4123758766f1c68dL,0x10fc6de3fb476c0dL,0xf8b6b579841f5d90L, + 0x2ba8446cfa24f44aL } }, + /* 44 << 147 */ + { { 0xa237b920ef4a9975L,0x60bb60042330435fL,0xd6f4ab5acfb7e7b5L, + 0xb2ac509783435391L }, + { 0xf036ee2fb0d1ea67L,0xae779a6a74c56230L,0x59bff8c8ab838ae6L, + 0xcd83ca999b38e6f0L } }, + /* 45 << 147 */ + { { 0xbb27bef5e33deed3L,0xe6356f6f001892a8L,0xbf3be6cc7adfbd3eL, + 0xaecbc81c33d1ac9dL }, + { 0xe4feb909e6e861dcL,0x90a247a453f5f801L,0x01c50acb27346e57L, + 0xce29242e461acc1bL } }, + /* 46 << 147 */ + { { 0x04dd214a2f998a91L,0x271ee9b1d4baf27bL,0x7e3027d1e8c26722L, + 0x21d1645c1820dce5L }, + { 0x086f242c7501779cL,0xf0061407fa0e8009L,0xf23ce47760187129L, + 0x05bbdedb0fde9bd0L } }, + /* 47 << 147 */ + { { 0x682f483225d98473L,0xf207fe855c658427L,0xb6fdd7ba4166ffa1L, + 0x0c3140569eed799dL }, + { 0x0db8048f4107e28fL,0x74ed387141216840L,0x74489f8f56a3c06eL, + 0x1e1c005b12777134L } }, + /* 48 << 147 */ + { { 0xdb332a73f37ec3c3L,0xc65259bddd59eba0L,0x2291709cdb4d3257L, + 0x9a793b25bd389390L }, + { 0xf39fe34be43756f0L,0x2f76bdce9afb56c9L,0x9f37867a61208b27L, + 0xea1d4307089972c3L } }, + /* 49 << 147 */ + { { 0x8c5953308bdf623aL,0x5f5accda8441fb7dL,0xfafa941832ddfd95L, + 0x6ad40c5a0fde9be7L }, + { 0x43faba89aeca8709L,0xc64a7cf12c248a9dL,0x1662025272637a76L, + 0xaee1c79122b8d1bbL } }, + /* 50 << 147 */ + { { 0xf0f798fd21a843b2L,0x56e4ed4d8d005cb1L,0x355f77801f0d8abeL, + 0x197b04cf34522326L }, + { 0x41f9b31ffd42c13fL,0x5ef7feb2b40f933dL,0x27326f425d60bad4L, + 0x027ecdb28c92cf89L } }, + /* 51 << 147 */ + { { 0x04aae4d14e3352feL,0x08414d2f73591b90L,0x5ed6124eb7da7d60L, + 0xb985b9314d13d4ecL }, + { 0xa592d3ab96bf36f9L,0x012dbed5bbdf51dfL,0xa57963c0df6c177dL, + 0x010ec86987ca29cfL } }, + /* 52 << 147 */ + { { 0xba1700f6bf926dffL,0x7c9fdbd1f4bf6bc2L,0xdc18dc8f64da11f5L, + 0xa6074b7ad938ae75L }, + { 0x14270066e84f44a4L,0x99998d38d27b954eL,0xc1be8ab2b4f38e9aL, + 0x8bb55bbf15c01016L } }, + /* 53 << 147 */ + { { 0xf73472b40ea2ab30L,0xd365a340f73d68ddL,0xc01a716819c2e1ebL, + 0x32f49e3734061719L }, + { 0xb73c57f101d8b4d6L,0x03c8423c26b47700L,0x321d0bc8a4d8826aL, + 0x6004213c4bc0e638L } }, + /* 54 << 147 */ + { { 0xf78c64a1c1c06681L,0x16e0a16fef018e50L,0x31cbdf91db42b2b3L, + 0xf8f4ffcee0d36f58L }, + { 0xcdcc71cd4cc5e3e0L,0xd55c7cfaa129e3e0L,0xccdb6ba00fb2cbf1L, + 0x6aba0005c4bce3cbL } }, + /* 55 << 147 */ + { { 0x501cdb30d232cfc4L,0x9ddcf12ed58a3cefL,0x02d2cf9c87e09149L, + 0xdc5d7ec72c976257L }, + { 0x6447986e0b50d7ddL,0x88fdbaf7807f112aL,0x58c9822ab00ae9f6L, + 0x6abfb9506d3d27e0L } }, + /* 56 << 147 */ + { { 0xd0a744878a429f4fL,0x0649712bdb516609L,0xb826ba57e769b5dfL, + 0x82335df21fc7aaf2L }, + { 0x2389f0675c93d995L,0x59ac367a68677be6L,0xa77985ff21d9951bL, + 0x038956fb85011cceL } }, + /* 57 << 147 */ + { { 0x608e48cbbb734e37L,0xc08c0bf22be5b26fL,0x17bbdd3bf9b1a0d9L, + 0xeac7d89810483319L }, + { 0xc95c4bafbc1a6deaL,0xfdd0e2bf172aafdbL,0x40373cbc8235c41aL, + 0x14303f21fb6f41d5L } }, + /* 58 << 147 */ + { { 0xba0636210408f237L,0xcad3b09aecd2d1edL,0x4667855a52abb6a2L, + 0xba9157dcaa8b417bL }, + { 0xfe7f35074f013efbL,0x1b112c4baa38c4a2L,0xa1406a609ba64345L, + 0xe53cba336993c80bL } }, + /* 59 << 147 */ + { { 0x45466063ded40d23L,0x3d5f1f4d54908e25L,0x9ebefe62403c3c31L, + 0x274ea0b50672a624L }, + { 0xff818d99451d1b71L,0x80e826438f79cf79L,0xa165df1373ce37f5L, + 0xa744ef4ffe3a21fdL } }, + /* 60 << 147 */ + { { 0x73f1e7f5cf551396L,0xc616898e868c676bL,0x671c28c78c442c36L, + 0xcfe5e5585e0a317dL }, + { 0x1242d8187051f476L,0x56fad2a614f03442L,0x262068bc0a44d0f6L, + 0xdfa2cd6ece6edf4eL } }, + /* 61 << 147 */ + { { 0x0f43813ad15d1517L,0x61214cb2377d44f5L,0xd399aa29c639b35fL, + 0x42136d7154c51c19L }, + { 0x9774711b08417221L,0x0a5546b352545a57L,0x80624c411150582dL, + 0x9ec5c418fbc555bcL } }, + /* 62 << 147 */ + { { 0x2c87dcad771849f1L,0xb0c932c501d7bf6fL,0x6aa5cd3e89116eb2L, + 0xd378c25a51ca7bd3L }, + { 0xc612a0da9e6e3e31L,0x0417a54db68ad5d0L,0x00451e4a22c6edb8L, + 0x9fbfe019b42827ceL } }, + /* 63 << 147 */ + { { 0x2fa92505ba9384a2L,0x21b8596e64ad69c1L,0x8f4fcc49983b35a6L, + 0xde09376072754672L }, + { 0x2f14ccc8f7bffe6dL,0x27566bff5d94263dL,0xb5b4e9c62df3ec30L, + 0x94f1d7d53e6ea6baL } }, + /* 64 << 147 */ + { { 0x97b7851aaaca5e9bL,0x518aa52156713b97L,0x3357e8c7150a61f6L, + 0x7842e7e2ec2c2b69L }, + { 0x8dffaf656868a548L,0xd963bd82e068fc81L,0x64da5c8b65917733L, + 0x927090ff7b247328L } }, + /* 0 << 154 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 154 */ + { { 0x214bc9a7d298c241L,0xe3b697ba56807cfdL,0xef1c78024564eadbL, + 0xdde8cdcfb48149c5L }, + { 0x946bf0a75a4d2604L,0x27154d7f6c1538afL,0x95cc9230de5b1fccL, + 0xd88519e966864f82L } }, + /* 2 << 154 */ + { { 0xb828dd1a7cb1282cL,0xa08d7626be46973aL,0x6baf8d40e708d6b2L, + 0x72571fa14daeb3f3L }, + { 0x85b1732ff22dfd98L,0x87ab01a70087108dL,0xaaaafea85988207aL, + 0xccc832f869f00755L } }, + /* 3 << 154 */ + { { 0x964d950e36ff3bf0L,0x8ad20f6ff0b34638L,0x4d9177b3b5d7585fL, + 0xcf839760ef3f019fL }, + { 0x582fc5b38288c545L,0x2f8e4e9b13116bd1L,0xf91e1b2f332120efL, + 0xcf5687242a17dd23L } }, + /* 4 << 154 */ + { { 0x488f1185ca8d9d1aL,0xadf2c77dd987ded2L,0x5f3039f060c46124L, + 0xe5d70b7571e095f4L }, + { 0x82d586506260e70fL,0x39d75ea7f750d105L,0x8cf3d0b175bac364L, + 0xf3a7564d21d01329L } }, + /* 5 << 154 */ + { { 0x182f04cd2f52d2a7L,0x4fde149ae2df565aL,0xb80c5eeca79fb2f7L, + 0xab491d7b22ddc897L }, + { 0x99d76c18c6312c7fL,0xca0d5f3d6aa41a57L,0x71207325d15363a0L, + 0xe82aa265beb252c2L } }, + /* 6 << 154 */ + { { 0x94ab4700ec3128c2L,0x6c76d8628e383f49L,0xdc36b150c03024ebL, + 0xfb43947753daac69L }, + { 0xfc68764a8dc79623L,0x5b86995db440fbb2L,0xd66879bfccc5ee0dL, + 0x0522894295aa8bd3L } }, + /* 7 << 154 */ + { { 0xb51a40a51e6a75c1L,0x24327c760ea7d817L,0x0663018207774597L, + 0xd6fdbec397fa7164L }, + { 0x20c99dfb13c90f48L,0xd6ac5273686ef263L,0xc6a50bdcfef64eebL, + 0xcd87b28186fdfc32L } }, + /* 8 << 154 */ + { { 0xb24aa43e3fcd3efcL,0xdd26c034b8088e9aL,0xa5ef4dc9bd3d46eaL, + 0xa2f99d588a4c6a6fL }, + { 0xddabd3552f1da46cL,0x72c3f8ce1afacdd1L,0xd90c4eee92d40578L, + 0xd28bb41fca623b94L } }, + /* 9 << 154 */ + { { 0x50fc0711745edc11L,0x9dd9ad7d3dc87558L,0xce6931fbb49d1e64L, + 0x6c77a0a2c98bd0f9L }, + { 0x62b9a6296baf7cb1L,0xcf065f91ccf72d22L,0x7203cce979639071L, + 0x09ae4885f9cb732fL } }, + /* 10 << 154 */ + { { 0x5e7c3becee8314f3L,0x1c068aeddbea298fL,0x08d381f17c80acecL, + 0x03b56be8e330495bL }, + { 0xaeffb8f29222882dL,0x95ff38f6c4af8bf7L,0x50e32d351fc57d8cL, + 0x6635be5217b444f0L } }, + /* 11 << 154 */ + { { 0x04d15276a5177900L,0x4e1dbb47f6858752L,0x5b475622c615796cL, + 0xa6fa0387691867bfL }, + { 0xed7f5d562844c6d0L,0xc633cf9b03a2477dL,0xf6be5c402d3721d6L, + 0xaf312eb7e9fd68e6L } }, + /* 12 << 154 */ + { { 0x242792d2e7417ce1L,0xff42bc71970ee7f5L,0x1ff4dc6d5c67a41eL, + 0x77709b7b20882a58L }, + { 0x3554731dbe217f2cL,0x2af2a8cd5bb72177L,0x58eee769591dd059L, + 0xbb2930c94bba6477L } }, + /* 13 << 154 */ + { { 0x863ee0477d930cfcL,0x4c262ad1396fd1f4L,0xf4765bc8039af7e1L, + 0x2519834b5ba104f6L }, + { 0x7cd61b4cd105f961L,0xa5415da5d63bca54L,0x778280a088a1f17cL, + 0xc49689492329512cL } }, + /* 14 << 154 */ + { { 0x174a9126cecdaa7aL,0xfc8c7e0e0b13247bL,0x29c110d23484c1c4L, + 0xf8eb8757831dfc3bL }, + { 0x022f0212c0067452L,0x3f6f69ee7b9b926cL,0x09032da0ef42daf4L, + 0x79f00ade83f80de4L } }, + /* 15 << 154 */ + { { 0x6210db7181236c97L,0x74f7685b3ee0781fL,0x4df7da7ba3e41372L, + 0x2aae38b1b1a1553eL }, + { 0x1688e222f6dd9d1bL,0x576954485b8b6487L,0x478d21274b2edeaaL, + 0xb2818fa51e85956aL } }, + /* 16 << 154 */ + { { 0x1e6adddaf176f2c0L,0x01ca4604e2572658L,0x0a404ded85342ffbL, + 0x8cf60f96441838d6L }, + { 0x9bbc691cc9071c4aL,0xfd58874434442803L,0x97101c85809c0d81L, + 0xa7fb754c8c456f7fL } }, + /* 17 << 154 */ + { { 0xc95f3c5cd51805e1L,0xab4ccd39b299dca8L,0x3e03d20b47eaf500L, + 0xfa3165c1d7b80893L }, + { 0x005e8b54e160e552L,0xdc4972ba9019d11fL,0x21a6972e0c9a4a7aL, + 0xa52c258f37840fd7L } }, + /* 18 << 154 */ + { { 0xf8559ff4c1e99d81L,0x08e1a7d6a3c617c0L,0xb398fd43248c6ba7L, + 0x6ffedd91d1283794L }, + { 0x8a6a59d2d629d208L,0xa9d141d53490530eL,0x42f6fc1838505989L, + 0x09bf250d479d94eeL } }, + /* 19 << 154 */ + { { 0x223ad3b1b3822790L,0x6c5926c093b8971cL,0x609efc7e75f7fa62L, + 0x45d66a6d1ec2d989L }, + { 0x4422d663987d2792L,0x4a73caad3eb31d2bL,0xf06c2ac1a32cb9e6L, + 0xd9445c5f91aeba84L } }, + /* 20 << 154 */ + { { 0x6af7a1d5af71013fL,0xe68216e50bedc946L,0xf4cba30bd27370a0L, + 0x7981afbf870421ccL }, + { 0x02496a679449f0e1L,0x86cfc4be0a47edaeL,0x3073c936b1feca22L, + 0xf569461203f8f8fbL } }, + /* 21 << 154 */ + { { 0xd063b723901515eaL,0x4c6c77a5749cf038L,0x6361e360ab9e5059L, + 0x596cf171a76a37c0L }, + { 0x800f53fa6530ae7aL,0x0f5e631e0792a7a6L,0x5cc29c24efdb81c9L, + 0xa269e8683f9c40baL } }, + /* 22 << 154 */ + { { 0xec14f9e12cb7191eL,0x78ea1bd8e5b08ea6L,0x3c65aa9b46332bb9L, + 0x84cc22b3bf80ce25L }, + { 0x0098e9e9d49d5bf1L,0xcd4ec1c619087da4L,0x3c9d07c5aef6e357L, + 0x839a02689f8f64b8L } }, + /* 23 << 154 */ + { { 0xc5e9eb62c6d8607fL,0x759689f56aa995e4L,0x70464669bbb48317L, + 0x921474bfe402417dL }, + { 0xcabe135b2a354c8cL,0xd51e52d2812fa4b5L,0xec74109653311fe8L, + 0x4f774535b864514bL } }, + /* 24 << 154 */ + { { 0xbcadd6715bde48f8L,0xc97038732189bc7dL,0x5d45299ec709ee8aL, + 0xd1287ee2845aaff8L }, + { 0x7d1f8874db1dbf1fL,0xea46588b990c88d6L,0x60ba649a84368313L, + 0xd5fdcbce60d543aeL } }, + /* 25 << 154 */ + { { 0x90b46d43810d5ab0L,0x6739d8f904d7e5ccL,0x021c1a580d337c33L, + 0x00a6116268e67c40L }, + { 0x95ef413b379f0a1fL,0xfe126605e9e2ab95L,0x67578b852f5f199cL, + 0xf5c003292cb84913L } }, + /* 26 << 154 */ + { { 0xf795643037577dd8L,0x83b82af429c5fe88L,0x9c1bea26cdbdc132L, + 0x589fa0869c04339eL }, + { 0x033e9538b13799dfL,0x85fa8b21d295d034L,0xdf17f73fbd9ddccaL, + 0xf32bd122ddb66334L } }, + /* 27 << 154 */ + { { 0x55ef88a7858b044cL,0x1f0d69c25aa9e397L,0x55fd9cc340d85559L, + 0xc774df727785ddb2L }, + { 0x5dcce9f6d3bd2e1cL,0xeb30da20a85dfed0L,0x5ed7f5bbd3ed09c4L, + 0x7d42a35c82a9c1bdL } }, + /* 28 << 154 */ + { { 0xcf3de9959890272dL,0x75f3432a3e713a10L,0x5e13479fe28227b8L, + 0xb8561ea9fefacdc8L }, + { 0xa6a297a08332aafdL,0x9b0d8bb573809b62L,0xd2fa1cfd0c63036fL, + 0x7a16eb55bd64bda8L } }, + /* 29 << 154 */ + { { 0x3f5cf5f678e62ddcL,0x2267c45407fd752bL,0x5e361b6b5e437bbeL, + 0x95c595018354e075L }, + { 0xec725f85f2b254d9L,0x844b617d2cb52b4eL,0xed8554f5cf425fb5L, + 0xab67703e2af9f312L } }, + /* 30 << 154 */ + { { 0x4cc34ec13cf48283L,0xb09daa259c8a705eL,0xd1e9d0d05b7d4f84L, + 0x4df6ef64db38929dL }, + { 0xe16b0763aa21ba46L,0xc6b1d178a293f8fbL,0x0ff5b602d520aabfL, + 0x94d671bdc339397aL } }, + /* 31 << 154 */ + { { 0x7c7d98cf4f5792faL,0x7c5e0d6711215261L,0x9b19a631a7c5a6d4L, + 0xc8511a627a45274dL }, + { 0x0c16621ca5a60d99L,0xf7fbab88cf5e48cbL,0xab1e6ca2f7ddee08L, + 0x83bd08cee7867f3cL } }, + /* 32 << 154 */ + { { 0xf7e48e8a2ac13e27L,0x4494f6df4eb1a9f5L,0xedbf84eb981f0a62L, + 0x49badc32536438f0L }, + { 0x50bea541004f7571L,0xbac67d10df1c94eeL,0x253d73a1b727bc31L, + 0xb3d01cf230686e28L } }, + /* 33 << 154 */ + { { 0x51b77b1b55fd0b8bL,0xa099d183feec3173L,0x202b1fb7670e72b7L, + 0xadc88b33a8e1635fL }, + { 0x34e8216af989d905L,0xc2e68d2029b58d01L,0x11f81c926fe55a93L, + 0x15f1462a8f296f40L } }, + /* 34 << 154 */ + { { 0x1915d375ea3d62f2L,0xa17765a301c8977dL,0x7559710ae47b26f6L, + 0xe0bd29c8535077a5L }, + { 0x615f976d08d84858L,0x370dfe8569ced5c1L,0xbbc7503ca734fa56L, + 0xfbb9f1ec91ac4574L } }, + /* 35 << 154 */ + { { 0x95d7ec53060dd7efL,0xeef2dacd6e657979L,0x54511af3e2a08235L, + 0x1e324aa41f4aea3dL }, + { 0x550e7e71e6e67671L,0xbccd5190bf52faf7L,0xf880d316223cc62aL, + 0x0d402c7e2b32eb5dL } }, + /* 36 << 154 */ + { { 0xa40bc039306a5a3bL,0x4e0a41fd96783a1bL,0xa1e8d39a0253cdd4L, + 0x6480be26c7388638L }, + { 0xee365e1d2285f382L,0x188d8d8fec0b5c36L,0x34ef1a481f0f4d82L, + 0x1a8f43e1a487d29aL } }, + /* 37 << 154 */ + { { 0x8168226d77aefb3aL,0xf69a751e1e72c253L,0x8e04359ae9594df1L, + 0x475ffd7dd14c0467L }, + { 0xb5a2c2b13844e95cL,0x85caf647dd12ef94L,0x1ecd2a9ff1063d00L, + 0x1dd2e22923843311L } }, + /* 38 << 154 */ + { { 0x38f0e09d73d17244L,0x3ede77468fc653f1L,0xae4459f5dc20e21cL, + 0x00db2ffa6a8599eaL }, + { 0x11682c3930cfd905L,0x4934d074a5c112a6L,0xbdf063c5568bfe95L, + 0x779a440a016c441aL } }, + /* 39 << 154 */ + { { 0x0c23f21897d6fbdcL,0xd3a5cd87e0776aacL,0xcee37f72d712e8dbL, + 0xfb28c70d26f74e8dL }, + { 0xffe0c728b61301a0L,0xa6282168d3724354L,0x7ff4cb00768ffedcL, + 0xc51b308803b02de9L } }, + /* 40 << 154 */ + { { 0xa5a8147c3902dda5L,0x35d2f706fe6973b4L,0x5ac2efcfc257457eL, + 0x933f48d48700611bL }, + { 0xc365af884912beb2L,0x7f5a4de6162edf94L,0xc646ba7c0c32f34bL, + 0x632c6af3b2091074L } }, + /* 41 << 154 */ + { { 0x58d4f2e3753e43a9L,0x70e1d21724d4e23fL,0xb24bf729afede6a6L, + 0x7f4a94d8710c8b60L }, + { 0xaad90a968d4faa6aL,0xd9ed0b32b066b690L,0x52fcd37b78b6dbfdL, + 0x0b64615e8bd2b431L } }, + /* 42 << 154 */ + { { 0x228e2048cfb9fad5L,0xbeaa386d240b76bdL,0x2d6681c890dad7bcL, + 0x3e553fc306d38f5eL }, + { 0xf27cdb9b9d5f9750L,0x3e85c52ad28c5b0eL,0x190795af5247c39bL, + 0x547831ebbddd6828L } }, + /* 43 << 154 */ + { { 0xf327a2274a82f424L,0x36919c787e47f89dL,0xe478391943c7392cL, + 0xf101b9aa2316fefeL }, + { 0xbcdc9e9c1c5009d2L,0xfb55ea139cd18345L,0xf5b5e231a3ce77c7L, + 0xde6b4527d2f2cb3dL } }, + /* 44 << 154 */ + { { 0x10f6a3339bb26f5fL,0x1e85db8e044d85b6L,0xc3697a0894197e54L, + 0x65e18cc0a7cb4ea8L }, + { 0xa38c4f50a471fe6eL,0xf031747a2f13439cL,0x53c4a6bac007318bL, + 0xa8da3ee51deccb3dL } }, + /* 45 << 154 */ + { { 0x0555b31c558216b1L,0x90c7810c2f79e6c2L,0x9b669f4dfe8eed3cL, + 0x70398ec8e0fac126L }, + { 0xa96a449ef701b235L,0x0ceecdb3eb94f395L,0x285fc368d0cb7431L, + 0x0d37bb5216a18c64L } }, + /* 46 << 154 */ + { { 0x05110d38b880d2ddL,0xa60f177b65930d57L,0x7da34a67f36235f5L, + 0x47f5e17c183816b9L }, + { 0xc7664b57db394af4L,0x39ba215d7036f789L,0x46d2ca0e2f27b472L, + 0xc42647eef73a84b7L } }, + /* 47 << 154 */ + { { 0x44bc754564488f1dL,0xaa922708f4cf85d5L,0x721a01d553e4df63L, + 0x649c0c515db46cedL }, + { 0x6bf0d64e3cffcb6cL,0xe3bf93fe50f71d96L,0x75044558bcc194a0L, + 0x16ae33726afdc554L } }, + /* 48 << 154 */ + { { 0xbfc01adf5ca48f3fL,0x64352f06e22a9b84L,0xcee54da1c1099e4aL, + 0xbbda54e8fa1b89c0L }, + { 0x166a3df56f6e55fbL,0x1ca44a2420176f88L,0x936afd88dfb7b5ffL, + 0xe34c24378611d4a0L } }, + /* 49 << 154 */ + { { 0x7effbb7586142103L,0x6704ba1b1f34fc4dL,0x7c2a468f10c1b122L, + 0x36b3a6108c6aace9L }, + { 0xabfcc0a775a0d050L,0x066f91973ce33e32L,0xce905ef429fe09beL, + 0x89ee25baa8376351L } }, + /* 50 << 154 */ + { { 0x2a3ede22fd29dc76L,0x7fd32ed936f17260L,0x0cadcf68284b4126L, + 0x63422f08a7951fc8L }, + { 0x562b24f40807e199L,0xfe9ce5d122ad4490L,0xc2f51b100db2b1b4L, + 0xeb3613ffe4541d0dL } }, + /* 51 << 154 */ + { { 0xbd2c4a052680813bL,0x527aa55d561b08d6L,0xa9f8a40ea7205558L, + 0xe3eea56f243d0becL }, + { 0x7b853817a0ff58b3L,0xb67d3f651a69e627L,0x0b76bbb9a869b5d6L, + 0xa3afeb82546723edL } }, + /* 52 << 154 */ + { { 0x5f24416d3e554892L,0x8413b53d430e2a45L,0x99c56aee9032a2a0L, + 0x09432bf6eec367b1L }, + { 0x552850c6daf0ecc1L,0x49ebce555bc92048L,0xdfb66ba654811307L, + 0x1b84f7976f298597L } }, + /* 53 << 154 */ + { { 0x795904818d1d7a0dL,0xd9fabe033a6fa556L,0xa40f9c59ba9e5d35L, + 0xcb1771c1f6247577L }, + { 0x542a47cae9a6312bL,0xa34b3560552dd8c5L,0xfdf94de00d794716L, + 0xd46124a99c623094L } }, + /* 54 << 154 */ + { { 0x56b7435d68afe8b4L,0x27f205406c0d8ea1L,0x12b77e1473186898L, + 0xdbc3dd467479490fL }, + { 0x951a9842c03b0c05L,0x8b1b3bb37921bc96L,0xa573b3462b202e0aL, + 0x77e4665d47254d56L } }, + /* 55 << 154 */ + { { 0x08b70dfcd23e3984L,0xab86e8bcebd14236L,0xaa3e07f857114ba7L, + 0x5ac71689ab0ef4f2L }, + { 0x88fca3840139d9afL,0x72733f8876644af0L,0xf122f72a65d74f4aL, + 0x13931577a5626c7aL } }, + /* 56 << 154 */ + { { 0xd5b5d9eb70f8d5a4L,0x375adde7d7bbb228L,0x31e88b860c1c0b32L, + 0xd1f568c4173edbaaL }, + { 0x1592fc835459df02L,0x2beac0fb0fcd9a7eL,0xb0a6fdb81b473b0aL, + 0xe3224c6f0fe8fc48L } }, + /* 57 << 154 */ + { { 0x680bd00ee87edf5bL,0x30385f0220e77cf5L,0xe9ab98c04d42d1b2L, + 0x72d191d2d3816d77L }, + { 0x1564daca0917d9e5L,0x394eab591f8fed7fL,0xa209aa8d7fbb3896L, + 0x5564f3b9be6ac98eL } }, + /* 58 << 154 */ + { { 0xead21d05d73654efL,0x68d1a9c413d78d74L,0x61e017086d4973a0L, + 0x83da350046e6d32aL }, + { 0x6a3dfca468ae0118L,0xa1b9a4c9d02da069L,0x0b2ff9c7ebab8302L, + 0x98af07c3944ba436L } }, + /* 59 << 154 */ + { { 0x85997326995f0f9fL,0x467fade071b58bc6L,0x47e4495abd625a2bL, + 0xfdd2d01d33c3b8cdL }, + { 0x2c38ae28c693f9faL,0x48622329348f7999L,0x97bf738e2161f583L, + 0x15ee2fa7565e8cc9L } }, + /* 60 << 154 */ + { { 0xa1a5c8455777e189L,0xcc10bee0456f2829L,0x8ad95c56da762bd5L, + 0x152e2214e9d91da8L }, + { 0x975b0e727cb23c74L,0xfd5d7670a90c66dfL,0xb5b5b8ad225ffc53L, + 0xab6dff73faded2aeL } }, + /* 61 << 154 */ + { { 0xebd567816f4cbe9dL,0x0ed8b2496a574bd7L,0x41c246fe81a881faL, + 0x91564805c3db9c70L }, + { 0xd7c12b085b862809L,0x1facd1f155858d7bL,0x7693747caf09e92aL, + 0x3b69dcba189a425fL } }, + /* 62 << 154 */ + { { 0x0be28e9f967365efL,0x57300eb2e801f5c9L,0x93b8ac6ad583352fL, + 0xa2cf1f89cd05b2b7L }, + { 0x7c0c9b744dcc40ccL,0xfee38c45ada523fbL,0xb49a4dec1099cc4dL, + 0x325c377f69f069c6L } }, + /* 63 << 154 */ + { { 0xe12458ce476cc9ffL,0x580e0b6cc6d4cb63L,0xd561c8b79072289bL, + 0x0377f264a619e6daL }, + { 0x2668536288e591a5L,0xa453a7bd7523ca2bL,0x8a9536d2c1df4533L, + 0xc8e50f2fbe972f79L } }, + /* 64 << 154 */ + { { 0xd433e50f6d3549cfL,0x6f33696ffacd665eL,0x695bfdacce11fcb4L, + 0x810ee252af7c9860L }, + { 0x65450fe17159bb2cL,0xf7dfbebe758b357bL,0x2b057e74d69fea72L, + 0xd485717a92731745L } }, + /* 0 << 161 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 161 */ + { { 0x896c42e8ee36860cL,0xdaf04dfd4113c22dL,0x1adbb7b744104213L, + 0xe5fd5fa11fd394eaL }, + { 0x68235d941a4e0551L,0x6772cfbe18d10151L,0x276071e309984523L, + 0xe4e879de5a56ba98L } }, + /* 2 << 161 */ + { { 0xaaafafb0285b9491L,0x01a0be881e4c705eL,0xff1d4f5d2ad9caabL, + 0x6e349a4ac37a233fL }, + { 0xcf1c12464a1c6a16L,0xd99e6b6629383260L,0xea3d43665f6d5471L, + 0x36974d04ff8cc89bL } }, + /* 3 << 161 */ + { { 0xc26c49a1cfe89d80L,0xb42c026dda9c8371L,0xca6c013adad066d2L, + 0xfb8f722856a4f3eeL }, + { 0x08b579ecd850935bL,0x34c1a74cd631e1b3L,0xcb5fe596ac198534L, + 0x39ff21f6e1f24f25L } }, + /* 4 << 161 */ + { { 0x27f29e148f929057L,0x7a64ae06c0c853dfL,0x256cd18358e9c5ceL, + 0x9d9cce82ded092a5L }, + { 0xcc6e59796e93b7c7L,0xe1e4709231bb9e27L,0xb70b3083aa9e29a0L, + 0xbf181a753785e644L } }, + /* 5 << 161 */ + { { 0xf53f2c658ead09f7L,0x1335e1d59780d14dL,0x69cc20e0cd1b66bcL, + 0x9b670a37bbe0bfc8L }, + { 0xce53dc8128efbeedL,0x0c74e77c8326a6e5L,0x3604e0d2b88e9a63L, + 0xbab38fca13dc2248L } }, + /* 6 << 161 */ + { { 0x8ed6e8c85c0a3f1eL,0xbcad24927c87c37fL,0xfdfb62bb9ee3b78dL, + 0xeba8e477cbceba46L }, + { 0x37d38cb0eeaede4bL,0x0bc498e87976deb6L,0xb2944c046b6147fbL, + 0x8b123f35f71f9609L } }, + /* 7 << 161 */ + { { 0xa155dcc7de79dc24L,0xf1168a32558f69cdL,0xbac215950d1850dfL, + 0x15c8295bb204c848L }, + { 0xf661aa367d8184ffL,0xc396228e30447bdbL,0x11cd5143bde4a59eL, + 0xe3a26e3b6beab5e6L } }, + /* 8 << 161 */ + { { 0xd3b3a13f1402b9d0L,0x573441c32c7bc863L,0x4b301ec4578c3e6eL, + 0xc26fc9c40adaf57eL }, + { 0x96e71bfd7493cea3L,0xd05d4b3f1af81456L,0xdaca2a8a6a8c608fL, + 0x53ef07f60725b276L } }, + /* 9 << 161 */ + { { 0x07a5fbd27824fc56L,0x3467521813289077L,0x5bf69fd5e0c48349L, + 0xa613ddd3b6aa7875L }, + { 0x7f78c19c5450d866L,0x46f4409c8f84a481L,0x9f1d192890fce239L, + 0x016c4168b2ce44b9L } }, + /* 10 << 161 */ + { { 0xbae023f0c7435978L,0xb152c88820e30e19L,0x9c241645e3fa6fafL, + 0x735d95c184823e60L }, + { 0x0319757303955317L,0x0b4b02a9f03b4995L,0x076bf55970274600L, + 0x32c5cc53aaf57508L } }, + /* 11 << 161 */ + { { 0xe8af6d1f60624129L,0xb7bc5d649a5e2b5eL,0x3814b0485f082d72L, + 0x76f267f2ce19677aL }, + { 0x626c630fb36eed93L,0x55230cd73bf56803L,0x78837949ce2736a0L, + 0x0d792d60aa6c55f1L } }, + /* 12 << 161 */ + { { 0x0318dbfdd5c7c5d2L,0xb38f8da7072b342dL,0x3569bddc7b8de38aL, + 0xf25b5887a1c94842L }, + { 0xb2d5b2842946ad60L,0x854f29ade9d1707eL,0xaa5159dc2c6a4509L, + 0x899f94c057189837L } }, + /* 13 << 161 */ + { { 0xcf6adc51f4a55b03L,0x261762de35e3b2d5L,0x4cc4301204827b51L, + 0xcd22a113c6021442L }, + { 0xce2fd61a247c9569L,0x59a50973d152becaL,0x6c835a1163a716d4L, + 0xc26455ed187dedcfL } }, + /* 14 << 161 */ + { { 0x27f536e049ce89e7L,0x18908539cc890cb5L,0x308909abd83c2aa1L, + 0xecd3142b1ab73bd3L }, + { 0x6a85bf59b3f5ab84L,0x3c320a68f2bea4c6L,0xad8dc5386da4541fL, + 0xeaf34eb0b7c41186L } }, + /* 15 << 161 */ + { { 0x1c780129977c97c4L,0x5ff9beebc57eb9faL,0xa24d0524c822c478L, + 0xfd8eec2a461cd415L }, + { 0xfbde194ef027458cL,0xb4ff53191d1be115L,0x63f874d94866d6f4L, + 0x35c75015b21ad0c9L } }, + /* 16 << 161 */ + { { 0xa6b5c9d646ac49d2L,0x42c77c0b83137aa9L,0x24d000fc68225a38L, + 0x0f63cfc82fe1e907L }, + { 0x22d1b01bc6441f95L,0x7d38f719ec8e448fL,0x9b33fa5f787fb1baL, + 0x94dcfda1190158dfL } }, + /* 17 << 161 */ + { { 0xc47cb3395f6d4a09L,0x6b4f355cee52b826L,0x3d100f5df51b930aL, + 0xf4512fac9f668f69L }, + { 0x546781d5206c4c74L,0xd021d4d4cb4d2e48L,0x494a54c2ca085c2dL, + 0xf1dbaca4520850a8L } }, + /* 18 << 161 */ + { { 0x63c79326490a1acaL,0xcb64dd9c41526b02L,0xbb772591a2979258L, + 0x3f58297048d97846L }, + { 0xd66b70d17c213ba7L,0xc28febb5e8a0ced4L,0x6b911831c10338c1L, + 0x0d54e389bf0126f3L } }, + /* 19 << 161 */ + { { 0x7048d4604af206eeL,0x786c88f677e97cb9L,0xd4375ae1ac64802eL, + 0x469bcfe1d53ec11cL }, + { 0xfc9b340d47062230L,0xe743bb57c5b4a3acL,0xfe00b4aa59ef45acL, + 0x29a4ef2359edf188L } }, + /* 20 << 161 */ + { { 0x40242efeb483689bL,0x2575d3f6513ac262L,0xf30037c80ca6db72L, + 0xc9fcce8298864be2L }, + { 0x84a112ff0149362dL,0x95e575821c4ae971L,0x1fa4b1a8945cf86cL, + 0x4525a7340b024a2fL } }, + /* 21 << 161 */ + { { 0xe76c8b628f338360L,0x483ff59328edf32bL,0x67e8e90a298b1aecL, + 0x9caab338736d9a21L }, + { 0x5c09d2fd66892709L,0x2496b4dcb55a1d41L,0x93f5fb1ae24a4394L, + 0x08c750496fa8f6c1L } }, + /* 22 << 161 */ + { { 0xcaead1c2c905d85fL,0xe9d7f7900733ae57L,0x24c9a65cf07cdd94L, + 0x7389359ca4b55931L }, + { 0xf58709b7367e45f7L,0x1f203067cb7e7adcL,0x82444bffc7b72818L, + 0x07303b35baac8033L } }, + /* 23 << 161 */ + { { 0x1e1ee4e4d13b7ea1L,0xe6489b24e0e74180L,0xa5f2c6107e70ef70L, + 0xa1655412bdd10894L }, + { 0x555ebefb7af4194eL,0x533c1c3c8e89bd9cL,0x735b9b5789895856L, + 0x15fb3cd2567f5c15L } }, + /* 24 << 161 */ + { { 0x057fed45526f09fdL,0xe8a4f10c8128240aL,0x9332efc4ff2bfd8dL, + 0x214e77a0bd35aa31L }, + { 0x32896d7314faa40eL,0x767867ec01e5f186L,0xc9adf8f117a1813eL, + 0xcb6cda7854741795L } }, + /* 25 << 161 */ + { { 0xb7521b6d349d51aaL,0xf56b5a9ee3c7b8e9L,0xc6f1e5c932a096dfL, + 0x083667c4a3635024L }, + { 0x365ea13518087f2fL,0xf1b8eaacd136e45dL,0xc8a0e48473aec989L, + 0xd75a324b142c9259L } }, + /* 26 << 161 */ + { { 0xb7b4d00101dae185L,0x45434e0b9b7a94bcL,0xf54339affbd8cb0bL, + 0xdcc4569ee98ef49eL }, + { 0x7789318a09a51299L,0x81b4d206b2b025d8L,0xf64aa418fae85792L, + 0x3e50258facd7baf7L } }, + /* 27 << 161 */ + { { 0xdce84cdb2996864bL,0xa2e670891f485fa4L,0xb28b2bb6534c6a5aL, + 0x31a7ec6bc94b9d39L }, + { 0x1d217766d6bc20daL,0x4acdb5ec86761190L,0x6872632873701063L, + 0x4d24ee7c2128c29bL } }, + /* 28 << 161 */ + { { 0xc072ebd3a19fd868L,0x612e481cdb8ddd3bL,0xb4e1d7541a64d852L, + 0x00ef95acc4c6c4abL }, + { 0x1536d2edaa0a6c46L,0x6129408643774790L,0x54af25e8343fda10L, + 0x9ff9d98dfd25d6f2L } }, + /* 29 << 161 */ + { { 0x0746af7c468b8835L,0x977a31cb730ecea7L,0xa5096b80c2cf4a81L, + 0xaa9868336458c37aL }, + { 0x6af29bf3a6bd9d34L,0x6a62fe9b33c5d854L,0x50e6c304b7133b5eL, + 0x04b601597d6e6848L } }, + /* 30 << 161 */ + { { 0x4cd296df5579bea4L,0x10e35ac85ceedaf1L,0x04c4c5fde3bcc5b1L, + 0x95f9ee8a89412cf9L }, + { 0x2c9459ee82b6eb0fL,0x2e84576595c2aaddL,0x774a84aed327fcfeL, + 0xd8c937220368d476L } }, + /* 31 << 161 */ + { { 0x0dbd5748f83e8a3bL,0xa579aa968d2495f3L,0x535996a0ae496e9bL, + 0x07afbfe9b7f9bcc2L }, + { 0x3ac1dc6d5b7bd293L,0x3b592cff7022323dL,0xba0deb989c0a3e76L, + 0x18e78e9f4b197acbL } }, + /* 32 << 161 */ + { { 0x211cde10296c36efL,0x7ee8967282c4da77L,0xb617d270a57836daL, + 0xf0cd9c319cb7560bL }, + { 0x01fdcbf7e455fe90L,0x3fb53cbb7e7334f3L,0x781e2ea44e7de4ecL, + 0x8adab3ad0b384fd0L } }, + /* 33 << 161 */ + { { 0x129eee2f53d64829L,0x7a471e17a261492bL,0xe4f9adb9e4cb4a2cL, + 0x3d359f6f97ba2c2dL }, + { 0x346c67860aacd697L,0x92b444c375c2f8a8L,0xc79fa117d85df44eL, + 0x56782372398ddf31L } }, + /* 34 << 161 */ + { { 0x60e690f2bbbab3b8L,0x4851f8ae8b04816bL,0xc72046ab9c92e4d2L, + 0x518c74a17cf3136bL }, + { 0xff4eb50af9877d4cL,0x14578d90a919cabbL,0x8218f8c4ac5eb2b6L, + 0xa3ccc547542016e4L } }, + /* 35 << 161 */ + { { 0x025bf48e327f8349L,0xf3e97346f43cb641L,0xdc2bafdf500f1085L, + 0x571678762f063055L }, + { 0x5bd914b9411925a6L,0x7c078d48a1123de5L,0xee6bf835182b165dL, + 0xb11b5e5bba519727L } }, + /* 36 << 161 */ + { { 0xe33ea76c1eea7b85L,0x2352b46192d4f85eL,0xf101d334afe115bbL, + 0xfabc1294889175a3L }, + { 0x7f6bcdc05233f925L,0xe0a802dbe77fec55L,0xbdb47b758069b659L, + 0x1c5e12def98fbd74L } }, + /* 37 << 161 */ + { { 0x869c58c64b8457eeL,0xa5360f694f7ea9f7L,0xe576c09ff460b38fL, + 0x6b70d54822b7fb36L }, + { 0x3fd237f13bfae315L,0x33797852cbdff369L,0x97df25f525b516f9L, + 0x46f388f2ba38ad2dL } }, + /* 38 << 161 */ + { { 0x656c465889d8ddbbL,0x8830b26e70f38ee8L,0x4320fd5cde1212b0L, + 0xc34f30cfe4a2edb2L }, + { 0xabb131a356ab64b8L,0x7f77f0ccd99c5d26L,0x66856a37bf981d94L, + 0x19e76d09738bd76eL } }, + /* 39 << 161 */ + { { 0xe76c8ac396238f39L,0xc0a482bea830b366L,0xb7b8eaff0b4eb499L, + 0x8ecd83bc4bfb4865L }, + { 0x971b2cb7a2f3776fL,0xb42176a4f4b88adfL,0xb9617df5be1fa446L, + 0x8b32d508cd031bd2L } }, + /* 40 << 161 */ + { { 0x1c6bd47d53b618c0L,0xc424f46c6a227923L,0x7303ffdedd92d964L, + 0xe971287871b5abf2L }, + { 0x8f48a632f815561dL,0x85f48ff5d3c055d1L,0x222a14277525684fL, + 0xd0d841a067360cc3L } }, + /* 41 << 161 */ + { { 0x4245a9260b9267c6L,0xc78913f1cf07f863L,0xaa844c8e4d0d9e24L, + 0xa42ad5223d5f9017L }, + { 0xbd371749a2c989d5L,0x928292dfe1f5e78eL,0x493b383e0a1ea6daL, + 0x5136fd8d13aee529L } }, + /* 42 << 161 */ + { { 0x860c44b1f2c34a99L,0x3b00aca4bf5855acL,0xabf6aaa0faaf37beL, + 0x65f436822a53ec08L }, + { 0x1d9a5801a11b12e1L,0x78a7ab2ce20ed475L,0x0de1067e9a41e0d5L, + 0x30473f5f305023eaL } }, + /* 43 << 161 */ + { { 0xdd3ae09d169c7d97L,0x5cd5baa4cfaef9cdL,0x5cd7440b65a44803L, + 0xdc13966a47f364deL }, + { 0x077b2be82b8357c1L,0x0cb1b4c5e9d57c2aL,0x7a4ceb3205ff363eL, + 0xf310fa4dca35a9efL } }, + /* 44 << 161 */ + { { 0xdbb7b352f97f68c6L,0x0c773b500b02cf58L,0xea2e48213c1f96d9L, + 0xffb357b0eee01815L }, + { 0xb9c924cde0f28039L,0x0b36c95a46a3fbe4L,0x1faaaea45e46db6cL, + 0xcae575c31928aaffL } }, + /* 45 << 161 */ + { { 0x7f671302a70dab86L,0xfcbd12a971c58cfcL,0xcbef9acfbee0cb92L, + 0x573da0b9f8c1b583L }, + { 0x4752fcfe0d41d550L,0xe7eec0e32155cffeL,0x0fc39fcb545ae248L, + 0x522cb8d18065f44eL } }, + /* 46 << 161 */ + { { 0x263c962a70cbb96cL,0xe034362abcd124a9L,0xf120db283c2ae58dL, + 0xb9a38d49fef6d507L }, + { 0xb1fd2a821ff140fdL,0xbd162f3020aee7e0L,0x4e17a5d4cb251949L, + 0x2aebcb834f7e1c3dL } }, + /* 47 << 161 */ + { { 0x608eb25f937b0527L,0xf42e1e47eb7d9997L,0xeba699c4b8a53a29L, + 0x1f921c71e091b536L }, + { 0xcce29e7b5b26bbd5L,0x7a8ef5ed3b61a680L,0xe5ef8043ba1f1c7eL, + 0x16ea821718158ddaL } }, + /* 48 << 161 */ + { { 0x01778a2b599ff0f9L,0x68a923d78104fc6bL,0x5bfa44dfda694ff3L, + 0x4f7199dbf7667f12L }, + { 0xc06d8ff6e46f2a79L,0x08b5deade9f8131dL,0x02519a59abb4ce7cL, + 0xc4f710bcb42aec3eL } }, + /* 49 << 161 */ + { { 0x3d77b05778bde41aL,0x6474bf80b4186b5aL,0x048b3f6788c65741L, + 0xc64519de03c7c154L }, + { 0xdf0738460edfcc4fL,0x319aa73748f1aa6bL,0x8b9f8a02ca909f77L, + 0x902581397580bfefL } }, + /* 50 << 161 */ + { { 0xd8bfd3cac0c22719L,0xc60209e4c9ca151eL,0x7a744ab5d9a1a69cL, + 0x6de5048b14937f8fL }, + { 0x171938d8e115ac04L,0x7df709401c6b16d2L,0xa6aeb6637f8e94e7L, + 0xc130388e2a2cf094L } }, + /* 51 << 161 */ + { { 0x1850be8477f54e6eL,0x9f258a7265d60fe5L,0xff7ff0c06c9146d6L, + 0x039aaf90e63a830bL }, + { 0x38f27a739460342fL,0x4703148c3f795f8aL,0x1bb5467b9681a97eL, + 0x00931ba5ecaeb594L } }, + /* 52 << 161 */ + { { 0xcdb6719d786f337cL,0xd9c01cd2e704397dL,0x0f4a3f20555c2fefL, + 0x004525097c0af223L }, + { 0x54a5804784db8e76L,0x3bacf1aa93c8aa06L,0x11ca957cf7919422L, + 0x5064105378cdaa40L } }, + /* 53 << 161 */ + { { 0x7a3038749f7144aeL,0x170c963f43d4acfdL,0x5e14814958ddd3efL, + 0xa7bde5829e72dba8L }, + { 0x0769da8b6fa68750L,0xfa64e532572e0249L,0xfcaadf9d2619ad31L, + 0x87882daaa7b349cdL } }, + /* 54 << 161 */ + { { 0x9f6eb7316c67a775L,0xcb10471aefc5d0b1L,0xb433750ce1b806b2L, + 0x19c5714d57b1ae7eL }, + { 0xc0dc8b7bed03fd3fL,0xdd03344f31bc194eL,0xa66c52a78c6320b5L, + 0x8bc82ce3d0b6fd93L } }, + /* 55 << 161 */ + { { 0xf8e13501b35f1341L,0xe53156dd25a43e42L,0xd3adf27e4daeb85cL, + 0xb81d8379bbeddeb5L }, + { 0x1b0b546e2e435867L,0x9020eb94eba5dd60L,0x37d911618210cb9dL, + 0x4c596b315c91f1cfL } }, + /* 56 << 161 */ + { { 0xb228a90f0e0b040dL,0xbaf02d8245ff897fL,0x2aac79e600fa6122L, + 0x248288178e36f557L }, + { 0xb9521d31113ec356L,0x9e48861e15eff1f8L,0x2aa1d412e0d41715L, + 0x71f8620353f131b8L } }, + /* 57 << 161 */ + { { 0xf60da8da3fd19408L,0x4aa716dc278d9d99L,0x394531f7a8c51c90L, + 0xb560b0e8f59db51cL }, + { 0xa28fc992fa34bdadL,0xf024fa149cd4f8bdL,0x5cf530f723a9d0d3L, + 0x615ca193e28c9b56L } }, + /* 58 << 161 */ + { { 0x6d2a483d6f73c51eL,0xa4cb2412ea0dc2ddL,0x50663c411eb917ffL, + 0x3d3a74cfeade299eL }, + { 0x29b3990f4a7a9202L,0xa9bccf59a7b15c3dL,0x66a3ccdca5df9208L, + 0x48027c1443f2f929L } }, + /* 59 << 161 */ + { { 0xd385377c40b557f0L,0xe001c366cd684660L,0x1b18ed6be2183a27L, + 0x879738d863210329L }, + { 0xa687c74bbda94882L,0xd1bbcc48a684b299L,0xaf6f1112863b3724L, + 0x6943d1b42c8ce9f8L } }, + /* 60 << 161 */ + { { 0xe044a3bb098cafb4L,0x27ed231060d48cafL,0x542b56753a31b84dL, + 0xcbf3dd50fcddbed7L }, + { 0x25031f1641b1d830L,0xa7ec851dcb0c1e27L,0xac1c8fe0b5ae75dbL, + 0xb24c755708c52120L } }, + /* 61 << 161 */ + { { 0x57f811dc1d4636c3L,0xf8436526681a9939L,0x1f6bc6d99c81adb3L, + 0x840f8ac35b7d80d4L }, + { 0x731a9811f4387f1aL,0x7c501cd3b5156880L,0xa5ca4a07dfe68867L, + 0xf123d8f05fcea120L } }, + /* 62 << 161 */ + { { 0x1fbb0e71d607039eL,0x2b70e215cd3a4546L,0x32d2f01d53324091L, + 0xb796ff08180ab19bL }, + { 0x32d87a863c57c4aaL,0x2aed9cafb7c49a27L,0x9fb35eac31630d98L, + 0x338e8cdf5c3e20a3L } }, + /* 63 << 161 */ + { { 0x80f1618266cde8dbL,0x4e1599802d72fd36L,0xd7b8f13b9b6e5072L, + 0xf52139073b7b5dc1L }, + { 0x4d431f1d8ce4396eL,0x37a1a680a7ed2142L,0xbf375696d01aaf6bL, + 0xaa1c0c54e63aab66L } }, + /* 64 << 161 */ + { { 0x3014368b4ed80940L,0x67e6d0567a6fceddL,0x7c208c49ca97579fL, + 0xfe3d7a81a23597f6L }, + { 0x5e2032027e096ae2L,0xb1f3e1e724b39366L,0x26da26f32fdcdffcL, + 0x79422f1d6097be83L } }, + /* 0 << 168 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 168 */ + { { 0x263a2cfb9db3b381L,0x9c3a2deed4df0a4bL,0x728d06e97d04e61fL, + 0x8b1adfbc42449325L }, + { 0x6ec1d9397e053a1bL,0xee2be5c766daf707L,0x80ba1e14810ac7abL, + 0xdd2ae778f530f174L } }, + /* 2 << 168 */ + { { 0x0435d97a205b9d8bL,0x6eb8f064056756d4L,0xd5e88a8bb6f8210eL, + 0x070ef12dec9fd9eaL }, + { 0x4d8495053bcc876aL,0x12a75338a7404ce3L,0xd22b49e1b8a1db5eL, + 0xec1f205114bfa5adL } }, + /* 3 << 168 */ + { { 0xadbaeb79b6828f36L,0x9d7a025801bd5b9eL,0xeda01e0d1e844b0cL, + 0x4b625175887edfc9L }, + { 0x14109fdd9669b621L,0x88a2ca56f6f87b98L,0xfe2eb788170df6bcL, + 0x0cea06f4ffa473f9L } }, + /* 4 << 168 */ + { { 0x43ed81b5c4e83d33L,0xd9f358795efd488bL,0x164a620f9deb4d0fL, + 0xc6927bdbac6a7394L }, + { 0x45c28df79f9e0f03L,0x2868661efcd7e1a9L,0x7cf4e8d0ffa348f1L, + 0x6bd4c284398538e0L } }, + /* 5 << 168 */ + { { 0x2618a091289a8619L,0xef796e606671b173L,0x664e46e59090c632L, + 0xa38062d41e66f8fbL }, + { 0x6c744a200573274eL,0xd07b67e4a9271394L,0x391223b26bdc0e20L, + 0xbe2d93f1eb0a05a7L } }, + /* 6 << 168 */ + { { 0xf23e2e533f36d141L,0xe84bb3d44dfca442L,0xb804a48d6b7c023aL, + 0x1e16a8fa76431c3bL }, + { 0x1b5452adddd472e0L,0x7d405ee70d1ee127L,0x50fc6f1dffa27599L, + 0x351ac53cbf391b35L } }, + /* 7 << 168 */ + { { 0x7efa14b84444896bL,0x64974d2ff94027fbL,0xefdcd0e8de84487dL, + 0x8c45b2602b48989bL }, + { 0xa8fcbbc2d8463487L,0xd1b2b3f73fbc476cL,0x21d005b7c8f443c0L, + 0x518f2e6740c0139cL } }, + /* 8 << 168 */ + { { 0x56036e8c06d75fc1L,0x2dcf7bb73249a89fL,0x81dd1d3de245e7ddL, + 0xf578dc4bebd6e2a7L }, + { 0x4c028903df2ce7a0L,0xaee362889c39afacL,0xdc847c31146404abL, + 0x6304c0d8a4e97818L } }, + /* 9 << 168 */ + { { 0xae51dca2a91f6791L,0x2abe41909baa9efcL,0xd9d2e2f4559c7ac1L, + 0xe82f4b51fc9f773aL }, + { 0xa77130274073e81cL,0xc0276facfbb596fcL,0x1d819fc9a684f70cL, + 0x29b47fddc9f7b1e0L } }, + /* 10 << 168 */ + { { 0x358de103459b1940L,0xec881c595b013e93L,0x51574c9349532ad3L, + 0x2db1d445b37b46deL }, + { 0xc6445b87df239fd8L,0xc718af75151d24eeL,0xaea1c4a4f43c6259L, + 0x40c0e5d770be02f7L } }, + /* 11 << 168 */ + { { 0x6a4590f4721b33f2L,0x2124f1fbfedf04eaL,0xf8e53cde9745efe7L, + 0xe7e1043265f046d9L }, + { 0xc3fca28ee4d0c7e6L,0x847e339a87253b1bL,0x9b5953483743e643L, + 0xcb6a0a0b4fd12fc5L } }, + /* 12 << 168 */ + { { 0xfb6836c327d02dccL,0x5ad009827a68bcc2L,0x1b24b44c005e912dL, + 0xcc83d20f811fdcfeL }, + { 0x36527ec1666fba0cL,0x6994819714754635L,0xfcdcb1a8556da9c2L, + 0xa593426781a732b2L } }, + /* 13 << 168 */ + { { 0xec1214eda714181dL,0x609ac13b6067b341L,0xff4b4c97a545df1fL, + 0xa124050134d2076bL }, + { 0x6efa0c231409ca97L,0x254cc1a820638c43L,0xd4e363afdcfb46cdL, + 0x62c2adc303942a27L } }, + /* 14 << 168 */ + { { 0xc67b9df056e46483L,0xa55abb2063736356L,0xab93c098c551bc52L, + 0x382b49f9b15fe64bL }, + { 0x9ec221ad4dff8d47L,0x79caf615437df4d6L,0x5f13dc64bb456509L, + 0xe4c589d9191f0714L } }, + /* 15 << 168 */ + { { 0x27b6a8ab3fd40e09L,0xe455842e77313ea9L,0x8b51d1e21f55988bL, + 0x5716dd73062bbbfcL }, + { 0x633c11e54e8bf3deL,0x9a0e77b61b85be3bL,0x565107290911cca6L, + 0x27e76495efa6590fL } }, + /* 16 << 168 */ + { { 0xe4ac8b33070d3aabL,0x2643672b9a2cd5e5L,0x52eff79b1cfc9173L, + 0x665ca49b90a7c13fL }, + { 0x5a8dda59b3efb998L,0x8a5b922d052f1341L,0xae9ebbab3cf9a530L, + 0x35986e7bf56da4d7L } }, + /* 17 << 168 */ + { { 0x3a636b5cff3513ccL,0xbb0cf8ba3198f7ddL,0xb8d4052241f16f86L, + 0x760575d8de13a7bfL }, + { 0x36f74e169f7aa181L,0x163a3ecff509ed1cL,0x6aead61f3c40a491L, + 0x158c95fcdfe8fcaaL } }, + /* 18 << 168 */ + { { 0xa3991b6e13cda46fL,0x79482415342faed0L,0xf3ba5bde666b5970L, + 0x1d52e6bcb26ab6ddL }, + { 0x768ba1e78608dd3dL,0x4930db2aea076586L,0xd9575714e7dc1afaL, + 0x1fc7bf7df7c58817L } }, + /* 19 << 168 */ + { { 0x6b47accdd9eee96cL,0x0ca277fbe58cec37L,0x113fe413e702c42aL, + 0xdd1764eec47cbe51L }, + { 0x041e7cde7b3ed739L,0x50cb74595ce9e1c0L,0x355685132925b212L, + 0x7cff95c4001b081cL } }, + /* 20 << 168 */ + { { 0x63ee4cbd8088b454L,0xdb7f32f79a9e0c8aL,0xb377d4186b2447cbL, + 0xe3e982aad370219bL }, + { 0x06ccc1e4c2a2a593L,0x72c368650773f24fL,0xa13b4da795859423L, + 0x8bbf1d3375040c8fL } }, + /* 21 << 168 */ + { { 0x726f0973da50c991L,0x48afcd5b822d6ee2L,0xe5fc718b20fd7771L, + 0xb9e8e77dfd0807a1L }, + { 0x7f5e0f4499a7703dL,0x6972930e618e36f3L,0x2b7c77b823807bbeL, + 0xe5b82405cb27ff50L } }, + /* 22 << 168 */ + { { 0xba8b8be3bd379062L,0xd64b7a1d2dce4a92L,0x040a73c5b2952e37L, + 0x0a9e252ed438aecaL }, + { 0xdd43956bc39d3bcbL,0x1a31ca00b32b2d63L,0xd67133b85c417a18L, + 0xd08e47902ef442c8L } }, + /* 23 << 168 */ + { { 0x98cb1ae9255c0980L,0x4bd863812b4a739fL,0x5a5c31e11e4a45a1L, + 0x1e5d55fe9cb0db2fL }, + { 0x74661b068ff5cc29L,0x026b389f0eb8a4f4L,0x536b21a458848c24L, + 0x2e5bf8ec81dc72b0L } }, + /* 24 << 168 */ + { { 0x03c187d0ad886aacL,0x5c16878ab771b645L,0xb07dfc6fc74045abL, + 0x2c6360bf7800caedL }, + { 0x24295bb5b9c972a3L,0xc9e6f88e7c9a6dbaL,0x90ffbf2492a79aa6L, + 0xde29d50a41c26ac2L } }, + /* 25 << 168 */ + { { 0x9f0af483d309cbe6L,0x5b020d8ae0bced4fL,0x606e986db38023e3L, + 0xad8f2c9d1abc6933L }, + { 0x19292e1de7400e93L,0xfe3e18a952be5e4dL,0xe8e9771d2e0680bfL, + 0x8c5bec98c54db063L } }, + /* 26 << 168 */ + { { 0x2af9662a74a55d1fL,0xe3fbf28f046f66d8L,0xa3a72ab4d4dc4794L, + 0x09779f455c7c2dd8L }, + { 0xd893bdafc3d19d8dL,0xd5a7509457d6a6dfL,0x8cf8fef9952e6255L, + 0x3da67cfbda9a8affL } }, + /* 27 << 168 */ + { { 0x4c23f62a2c160dcdL,0x34e6c5e38f90eaefL,0x35865519a9a65d5aL, + 0x07c48aae8fd38a3dL }, + { 0xb7e7aeda50068527L,0x2c09ef231c90936aL,0x31ecfeb6e879324cL, + 0xa0871f6bfb0ec938L } }, + /* 28 << 168 */ + { { 0xb1f0fb68d84d835dL,0xc90caf39861dc1e6L,0x12e5b0467594f8d7L, + 0x26897ae265012b92L }, + { 0xbcf68a08a4d6755dL,0x403ee41c0991fbdaL,0x733e343e3bbf17e8L, + 0xd2c7980d679b3d65L } }, + /* 29 << 168 */ + { { 0x33056232d2e11305L,0x966be492f3c07a6fL,0x6a8878ffbb15509dL, + 0xff2211010a9b59a4L }, + { 0x6c9f564aabe30129L,0xc6f2c940336e64cfL,0x0fe752628b0c8022L, + 0xbe0267e96ae8db87L } }, + /* 30 << 168 */ + { { 0x22e192f193bc042bL,0xf085b534b237c458L,0xa0d192bd832c4168L, + 0x7a76e9e3bdf6271dL }, + { 0x52a882fab88911b5L,0xc85345e4b4db0eb5L,0xa3be02a681a7c3ffL, + 0x51889c8cf0ec0469L } }, + /* 31 << 168 */ + { { 0x9d031369a5e829e5L,0xcbb4c6fc1607aa41L,0x75ac59a6241d84c1L, + 0xc043f2bf8829e0eeL }, + { 0x82a38f758ea5e185L,0x8bda40b9d87cbd9fL,0x9e65e75e2d8fc601L, + 0x3d515f74a35690b3L } }, + /* 32 << 168 */ + { { 0x534acf4fda79e5acL,0x68b83b3a8630215fL,0x5c748b2ed085756eL, + 0xb0317258e5d37cb2L }, + { 0x6735841ac5ccc2c4L,0x7d7dc96b3d9d5069L,0xa147e410fd1754bdL, + 0x65296e94d399ddd5L } }, + /* 33 << 168 */ + { { 0xf6b5b2d0bc8fa5bcL,0x8a5ead67500c277bL,0x214625e6dfa08a5dL, + 0x51fdfedc959cf047L }, + { 0x6bc9430b289fca32L,0xe36ff0cf9d9bdc3fL,0x2fe187cb58ea0edeL, + 0xed66af205a900b3fL } }, + /* 34 << 168 */ + { { 0x00e0968b5fa9f4d6L,0x2d4066ce37a362e7L,0xa99a9748bd07e772L, + 0x710989c006a4f1d0L }, + { 0xd5dedf35ce40cbd8L,0xab55c5f01743293dL,0x766f11448aa24e2cL, + 0x94d874f8605fbcb4L } }, + /* 35 << 168 */ + { { 0xa365f0e8a518001bL,0xee605eb69d04ef0fL,0x5a3915cdba8d4d25L, + 0x44c0e1b8b5113472L }, + { 0xcbb024e88b6740dcL,0x89087a53ee1d4f0cL,0xa88fa05c1fc4e372L, + 0x8bf395cbaf8b3af2L } }, + /* 36 << 168 */ + { { 0x1e71c9a1deb8568bL,0xa35daea080fb3d32L,0xe8b6f2662cf8fb81L, + 0x6d51afe89490696aL }, + { 0x81beac6e51803a19L,0xe3d24b7f86219080L,0x727cfd9ddf6f463cL, + 0x8c6865ca72284ee8L } }, + /* 37 << 168 */ + { { 0x32c88b7db743f4efL,0x3793909be7d11dceL,0xd398f9222ff2ebe8L, + 0x2c70ca44e5e49796L }, + { 0xdf4d9929cb1131b1L,0x7826f29825888e79L,0x4d3a112cf1d8740aL, + 0x00384cb6270afa8bL } }, + /* 38 << 168 */ + { { 0xcb64125b3ab48095L,0x3451c25662d05106L,0xd73d577da4955845L, + 0x39570c16bf9f4433L }, + { 0xd7dfaad3adecf263L,0xf1c3d8d1dc76e102L,0x5e774a5854c6a836L, + 0xdad4b6723e92d47bL } }, + /* 39 << 168 */ + { { 0xbe7e990ff0d796a0L,0x5fc62478df0e8b02L,0x8aae8bf4030c00adL, + 0x3d2db93b9004ba0fL }, + { 0xe48c8a79d85d5ddcL,0xe907caa76bb07f34L,0x58db343aa39eaed5L, + 0x0ea6e007adaf5724L } }, + /* 40 << 168 */ + { { 0xe00df169d23233f3L,0x3e32279677cb637fL,0x1f897c0e1da0cf6cL, + 0xa651f5d831d6bbddL }, + { 0xdd61af191a230c76L,0xbd527272cdaa5e4aL,0xca753636d0abcd7eL, + 0x78bdd37c370bd8dcL } }, + /* 41 << 168 */ + { { 0xc23916c217cd93feL,0x65b97a4ddadce6e2L,0xe04ed4eb174e42f8L, + 0x1491ccaabb21480aL }, + { 0x145a828023196332L,0x3c3862d7587b479aL,0x9f4a88a301dcd0edL, + 0x4da2b7ef3ea12f1fL } }, + /* 42 << 168 */ + { { 0xf8e7ae33b126e48eL,0x404a0b32f494e237L,0x9beac474c55acadbL, + 0x4ee5cf3bcbec9fd9L }, + { 0x336b33b97df3c8c3L,0xbd905fe3b76808fdL,0x8f436981aa45c16aL, + 0x255c5bfa3dd27b62L } }, + /* 43 << 168 */ + { { 0x71965cbfc3dd9b4dL,0xce23edbffc068a87L,0xb78d4725745b029bL, + 0x74610713cefdd9bdL }, + { 0x7116f75f1266bf52L,0x0204672218e49bb6L,0xdf43df9f3d6f19e3L, + 0xef1bc7d0e685cb2fL } }, + /* 44 << 168 */ + { { 0xcddb27c17078c432L,0xe1961b9cb77fedb7L,0x1edc2f5cc2290570L, + 0x2c3fefca19cbd886L }, + { 0xcf880a36c2af389aL,0x96c610fdbda71ceaL,0xf03977a932aa8463L, + 0x8eb7763f8586d90aL } }, + /* 45 << 168 */ + { { 0x3f3424542a296e77L,0xc871868342837a35L,0x7dc710906a09c731L, + 0x54778ffb51b816dbL }, + { 0x6b33bfecaf06defdL,0xfe3c105f8592b70bL,0xf937fda461da6114L, + 0x3c13e6514c266ad7L } }, + /* 46 << 168 */ + { { 0xe363a829855938e8L,0x2eeb5d9e9de54b72L,0xbeb93b0e20ccfab9L, + 0x3dffbb5f25e61a25L }, + { 0x7f655e431acc093dL,0x0cb6cc3d3964ce61L,0x6ab283a1e5e9b460L, + 0x55d787c5a1c7e72dL } }, + /* 47 << 168 */ + { { 0x4d2efd47deadbf02L,0x11e80219ac459068L,0x810c762671f311f0L, + 0xfa17ef8d4ab6ef53L }, + { 0xaf47fd2593e43bffL,0x5cb5ff3f0be40632L,0x546871068ee61da3L, + 0x7764196eb08afd0fL } }, + /* 48 << 168 */ + { { 0x831ab3edf0290a8fL,0xcae81966cb47c387L,0xaad7dece184efb4fL, + 0xdcfc53b34749110eL }, + { 0x6698f23c4cb632f9L,0xc42a1ad6b91f8067L,0xb116a81d6284180aL, + 0xebedf5f8e901326fL } }, + /* 49 << 168 */ + { { 0xf2274c9f97e3e044L,0x4201852011d09fc9L,0x56a65f17d18e6e23L, + 0x2ea61e2a352b683cL }, + { 0x27d291bc575eaa94L,0x9e7bc721b8ff522dL,0x5f7268bfa7f04d6fL, + 0x5868c73faba41748L } }, + /* 50 << 168 */ + { { 0x9f85c2db7be0eeadL,0x511e7842ff719135L,0x5a06b1e9c5ea90d7L, + 0x0c19e28326fab631L }, + { 0x8af8f0cfe9206c55L,0x89389cb43553c06aL,0x39dbed97f65f8004L, + 0x0621b037c508991dL } }, + /* 51 << 168 */ + { { 0x1c52e63596e78cc4L,0x5385c8b20c06b4a8L,0xd84ddfdbb0e87d03L, + 0xc49dfb66934bafadL }, + { 0x7071e17059f70772L,0x3a073a843a1db56bL,0x034949033b8af190L, + 0x7d882de3d32920f0L } }, + /* 52 << 168 */ + { { 0x91633f0ab2cf8940L,0x72b0b1786f948f51L,0x2d28dc30782653c8L, + 0x88829849db903a05L }, + { 0xb8095d0c6a19d2bbL,0x4b9e7f0c86f782cbL,0x7af739882d907064L, + 0xd12be0fe8b32643cL } }, + /* 53 << 168 */ + { { 0x358ed23d0e165dc3L,0x3d47ce624e2378ceL,0x7e2bb0b9feb8a087L, + 0x3246e8aee29e10b9L }, + { 0x459f4ec703ce2b4dL,0xe9b4ca1bbbc077cfL,0x2613b4f20e9940c1L, + 0xfc598bb9047d1eb1L } }, + /* 54 << 168 */ + { { 0x9744c62b45036099L,0xa9dee742167c65d8L,0x0c511525dabe1943L, + 0xda11055493c6c624L }, + { 0xae00a52c651a3be2L,0xcda5111d884449a6L,0x063c06f4ff33bed1L, + 0x73baaf9a0d3d76b4L } }, + /* 55 << 168 */ + { { 0x52fb0c9d7fc63668L,0x6886c9dd0c039cdeL,0x602bd59955b22351L, + 0xb00cab02360c7c13L }, + { 0x8cb616bc81b69442L,0x41486700b55c3ceeL,0x71093281f49ba278L, + 0xad956d9c64a50710L } }, + /* 56 << 168 */ + { { 0x9561f28b638a7e81L,0x54155cdf5980ddc3L,0xb2db4a96d26f247aL, + 0x9d774e4e4787d100L }, + { 0x1a9e6e2e078637d2L,0x1c363e2d5e0ae06aL,0x7493483ee9cfa354L, + 0x76843cb37f74b98dL } }, + /* 57 << 168 */ + { { 0xbaca6591d4b66947L,0xb452ce9804460a8cL,0x6830d24643768f55L, + 0xf4197ed87dff12dfL }, + { 0x6521b472400dd0f7L,0x59f5ca8f4b1e7093L,0x6feff11b080338aeL, + 0x0ada31f6a29ca3c6L } }, + /* 58 << 168 */ + { { 0x24794eb694a2c215L,0xd83a43ab05a57ab4L,0x264a543a2a6f89feL, + 0x2c2a3868dd5ec7c2L }, + { 0xd33739408439d9b2L,0x715ea6720acd1f11L,0x42c1d235e7e6cc19L, + 0x81ce6e96b990585cL } }, + /* 59 << 168 */ + { { 0x04e5dfe0d809c7bdL,0xd7b2580c8f1050abL,0x6d91ad78d8a4176fL, + 0x0af556ee4e2e897cL }, + { 0x162a8b73921de0acL,0x52ac9c227ea78400L,0xee2a4eeaefce2174L, + 0xbe61844e6d637f79L } }, + /* 60 << 168 */ + { { 0x0491f1bc789a283bL,0x72d3ac3d880836f4L,0xaa1c5ea388e5402dL, + 0x1b192421d5cc473dL }, + { 0x5c0b99989dc84cacL,0xb0a8482d9c6e75b8L,0x639961d03a191ce2L, + 0xda3bc8656d837930L } }, + /* 61 << 168 */ + { { 0xca990653056e6f8fL,0x84861c4164d133a7L,0x8b403276746abe40L, + 0xb7b4d51aebf8e303L }, + { 0x05b43211220a255dL,0xc997152c02419e6eL,0x76ff47b6630c2feaL, + 0x50518677281fdadeL } }, + /* 62 << 168 */ + { { 0x3283b8bacf902b0bL,0x8d4b4eb537db303bL,0xcc89f42d755011bcL, + 0xb43d74bbdd09d19bL }, + { 0x65746bc98adba350L,0x364eaf8cb51c1927L,0x13c7659610ad72ecL, + 0x30045121f8d40c20L } }, + /* 63 << 168 */ + { { 0x6d2d99b7ea7b979bL,0xcd78cd74e6fb3bcdL,0x11e45a9e86cffbfeL, + 0x78a61cf4637024f6L }, + { 0xd06bc8723d502295L,0xf1376854458cb288L,0xb9db26a1342f8586L, + 0xf33effcf4beee09eL } }, + /* 64 << 168 */ + { { 0xd7e0c4cdb30cfb3aL,0x6d09b8c16c9db4c8L,0x40ba1a4207c8d9dfL, + 0x6fd495f71c52c66dL }, + { 0xfb0e169f275264daL,0x80c2b746e57d8362L,0xedd987f749ad7222L, + 0xfdc229af4398ec7bL } }, + /* 0 << 175 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 175 */ + { { 0xb0d1ed8452666a58L,0x4bcb6e00e6a9c3c2L,0x3c57411c26906408L, + 0xcfc2075513556400L }, + { 0xa08b1c505294dba3L,0xa30ba2868b7dd31eL,0xd70ba90e991eca74L, + 0x094e142ce762c2b9L } }, + /* 2 << 175 */ + { { 0xb81d783e979f3925L,0x1efd130aaf4c89a7L,0x525c2144fd1bf7faL, + 0x4b2969041b265a9eL }, + { 0xed8e9634b9db65b6L,0x35c82e3203599d8aL,0xdaa7a54f403563f3L, + 0x9df088ad022c38abL } }, + /* 3 << 175 */ + { { 0xe5cfb066bb3fd30aL,0x429169daeff0354eL,0x809cf8523524e36cL, + 0x136f4fb30155be1dL }, + { 0x4826af011fbba712L,0x6ef0f0b4506ba1a1L,0xd9928b3177aea73eL, + 0xe2bf6af25eaa244eL } }, + /* 4 << 175 */ + { { 0x8d084f124237b64bL,0x688ebe99e3ecfd07L,0x57b8a70cf6845dd8L, + 0x808fc59c5da4a325L }, + { 0xa9032b2ba3585862L,0xb66825d5edf29386L,0xb5a5a8db431ec29bL, + 0xbb143a983a1e8dc8L } }, + /* 5 << 175 */ + { { 0x35ee94ce12ae381bL,0x3a7f176c86ccda90L,0xc63a657e4606eacaL, + 0x9ae5a38043cd04dfL }, + { 0x9bec8d15ed251b46L,0x1f5d6d30caca5e64L,0x347b3b359ff20f07L, + 0x4d65f034f7e4b286L } }, + /* 6 << 175 */ + { { 0x9e93ba24f111661eL,0xedced484b105eb04L,0x96dc9ba1f424b578L, + 0xbf8f66b7e83e9069L }, + { 0x872d4df4d7ed8216L,0xbf07f3778e2cbecfL,0x4281d89998e73754L, + 0xfec85fbb8aab8708L } }, + /* 7 << 175 */ + { { 0x9a3c0deea5ba5b0bL,0xe6a116ce42d05299L,0xae9775fee9b02d42L, + 0x72b05200a1545cb6L }, + { 0xbc506f7d31a3b4eaL,0xe58930788bbd9b32L,0xc8bc5f37e4b12a97L, + 0x6b000c064a73b671L } }, + /* 8 << 175 */ + { { 0x13b5bf22765fa7d0L,0x59805bf01d6a5370L,0x67a5e29d4280db98L, + 0x4f53916f776b1ce3L }, + { 0x714ff61f33ddf626L,0x4206238ea085d103L,0x1c50d4b7e5809ee3L, + 0x999f450d85f8eb1dL } }, + /* 9 << 175 */ + { { 0x658a6051e4c79e9bL,0x1394cb73c66a9feaL,0x27f31ed5c6be7b23L, + 0xf4c88f365aa6f8feL }, + { 0x0fb0721f4aaa499eL,0x68b3a7d5e3fb2a6bL,0xa788097d3a92851dL, + 0x060e7f8ae96f4913L } }, + /* 10 << 175 */ + { { 0x82eebe731a3a93bcL,0x42bbf465a21adc1aL,0xc10b6fa4ef030efdL, + 0x247aa4c787b097bbL }, + { 0x8b8dc632f60c77daL,0x6ffbc26ac223523eL,0xa4f6ff11344579cfL, + 0x5825653c980250f6L } }, + /* 11 << 175 */ + { { 0xb2dd097ebc1aa2b9L,0x0788939337a0333aL,0x1cf55e7137a0db38L, + 0x2648487f792c1613L }, + { 0xdad013363fcef261L,0x6239c81d0eabf129L,0x8ee761de9d276be2L, + 0x406a7a341eda6ad3L } }, + /* 12 << 175 */ + { { 0x4bf367ba4a493b31L,0x54f20a529bf7f026L,0xb696e0629795914bL, + 0xcddab96d8bf236acL }, + { 0x4ff2c70aed25ea13L,0xfa1d09eb81cbbbe7L,0x88fc8c87468544c5L, + 0x847a670d696b3317L } }, + /* 13 << 175 */ + { { 0xf133421e64bcb626L,0xaea638c826dee0b5L,0xd6e7680bb310346cL, + 0xe06f4097d5d4ced3L }, + { 0x099614527512a30bL,0xf3d867fde589a59aL,0x2e73254f52d0c180L, + 0x9063d8a3333c74acL } }, + /* 14 << 175 */ + { { 0xeda6c595d314e7bcL,0x2ee7464b467899edL,0x1cef423c0a1ed5d3L, + 0x217e76ea69cc7613L }, + { 0x27ccce1fe7cda917L,0x12d8016b8a893f16L,0xbcd6de849fc74f6bL, + 0xfa5817e2f3144e61L } }, + /* 15 << 175 */ + { { 0x1f3541640821ee4cL,0x1583eab40bc61992L,0x7490caf61d72879fL, + 0x998ad9f3f76ae7b2L }, + { 0x1e181950a41157f7L,0xa9d7e1e6e8da3a7eL,0x963784eb8426b95fL, + 0x0ee4ed6e542e2a10L } }, + /* 16 << 175 */ + { { 0xb79d4cc5ac751e7bL,0x93f96472fd4211bdL,0x8c72d3d2c8de4fc6L, + 0x7b69cbf5df44f064L }, + { 0x3da90ca2f4bf94e1L,0x1a5325f8f12894e2L,0x0a437f6c7917d60bL, + 0x9be7048696c9cb5dL } }, + /* 17 << 175 */ + { { 0xb4d880bfe1dc5c05L,0xd738addaeebeeb57L,0x6f0119d3df0fe6a3L, + 0x5c686e5566eaaf5aL }, + { 0x9cb10b50dfd0b7ecL,0xbdd0264b6a497c21L,0xfc0935148c546c96L, + 0x58a947fa79dbf42aL } }, + /* 18 << 175 */ + { { 0xc0b48d4e49ccd6d7L,0xff8fb02c88bd5580L,0xc75235e907d473b2L, + 0x4fab1ac5a2188af3L }, + { 0x030fa3bc97576ec0L,0xe8c946e80b7e7d2fL,0x40a5c9cc70305600L, + 0x6d8260a9c8b013b4L } }, + /* 19 << 175 */ + { { 0x0368304f70bba85cL,0xad090da1a4a0d311L,0x7170e8702415eec1L, + 0xbfba35fe8461ea47L }, + { 0x6279019ac1e91938L,0xa47638f31afc415fL,0x36c65cbbbcba0e0fL, + 0x02160efb034e2c48L } }, + /* 20 << 175 */ + { { 0xe6c51073615cd9e4L,0x498ec047f1243c06L,0x3e5a8809b17b3d8cL, + 0x5cd99e610cc565f1L }, + { 0x81e312df7851dafeL,0xf156f5baa79061e2L,0x80d62b71880c590eL, + 0xbec9746f0a39faa1L } }, + /* 21 << 175 */ + { { 0x1d98a9c1c8ed1f7aL,0x09e43bb5a81d5ff2L,0xd5f00f680da0794aL, + 0x412050d9661aa836L }, + { 0xa89f7c4e90747e40L,0x6dc05ebbb62a3686L,0xdf4de847308e3353L, + 0x53868fbb9fb53bb9L } }, + /* 22 << 175 */ + { { 0x2b09d2c3cfdcf7ddL,0x41a9fce3723fcab4L,0x73d905f707f57ca3L, + 0x080f9fb1ac8e1555L }, + { 0x7c088e849ba7a531L,0x07d35586ed9a147fL,0x602846abaf48c336L, + 0x7320fd320ccf0e79L } }, + /* 23 << 175 */ + { { 0xaa780798b18bd1ffL,0x52c2e300afdd2905L,0xf27ea3d6434267cdL, + 0x8b96d16d15605b5fL }, + { 0x7bb310494b45706bL,0xe7f58b8e743d25f8L,0xe9b5e45b87f30076L, + 0xd19448d65d053d5aL } }, + /* 24 << 175 */ + { { 0x1ecc8cb9d3210a04L,0x6bc7d463dafb5269L,0x3e59b10a67c3489fL, + 0x1769788c65641e1bL }, + { 0x8a53b82dbd6cb838L,0x7066d6e6236d5f22L,0x03aa1c616908536eL, + 0xc971da0d66ae9809L } }, + /* 25 << 175 */ + { { 0x01b3a86bc49a2facL,0x3b8420c03092e77aL,0x020573007d6fb556L, + 0x6941b2a1bff40a87L }, + { 0x140b63080658ff2aL,0x878043633424ab36L,0x0253bd515751e299L, + 0xc75bcd76449c3e3aL } }, + /* 26 << 175 */ + { { 0x92eb40907f8f875dL,0x9c9d754e56c26bbfL,0x158cea618110bbe7L, + 0x62a6b802745f91eaL }, + { 0xa79c41aac6e7394bL,0x445b6a83ad57ef10L,0x0c5277eb6ea6f40cL, + 0x319fe96b88633365L } }, + /* 27 << 175 */ + { { 0x0b0fc61f385f63cbL,0x41250c8422bdd127L,0x67d153f109e942c2L, + 0x60920d08c021ad5dL }, + { 0x229f5746724d81a5L,0xb7ffb8925bba3299L,0x518c51a1de413032L, + 0x2a9bfe773c2fd94cL } }, + /* 28 << 175 */ + { { 0xcbcde2393191f4fdL,0x43093e16d3d6ada1L,0x184579f358769606L, + 0x2c94a8b3d236625cL }, + { 0x6922b9c05c437d8eL,0x3d4ae423d8d9f3c8L,0xf72c31c12e7090a2L, + 0x4ac3f5f3d76a55bdL } }, + /* 29 << 175 */ + { { 0x342508fc6b6af991L,0x0d5271001b5cebbdL,0xb84740d0dd440dd7L, + 0x748ef841780162fdL }, + { 0xa8dbfe0edfc6fafbL,0xeadfdf05f7300f27L,0x7d06555ffeba4ec9L, + 0x12c56f839e25fa97L } }, + /* 30 << 175 */ + { { 0x77f84203d39b8c34L,0xed8b1be63125eddbL,0x5bbf2441f6e39dc5L, + 0xb00f6ee66a5d678aL }, + { 0xba456ecf57d0ea99L,0xdcae0f5817e06c43L,0x01643de40f5b4baaL, + 0x2c324341d161b9beL } }, + /* 31 << 175 */ + { { 0x80177f55e126d468L,0xed325f1f76748e09L,0x6116004acfa9bdc2L, + 0x2d8607e63a9fb468L }, + { 0x0e573e276009d660L,0x3a525d2e8d10c5a1L,0xd26cb45c3b9009a0L, + 0xb6b0cdc0de9d7448L } }, + /* 32 << 175 */ + { { 0x949c9976e1337c26L,0x6faadebdd73d68e5L,0x9e158614f1b768d9L, + 0x22dfa5579cc4f069L }, + { 0xccd6da17be93c6d6L,0x24866c61a504f5b9L,0x2121353c8d694da1L, + 0x1c6ca5800140b8c6L } }, + /* 33 << 175 */ + { { 0xc245ad8ce964021eL,0xb83bffba032b82b3L,0xfaa220c647ef9898L, + 0x7e8d3ac6982c948aL }, + { 0x1faa2091bc2d124aL,0xbd54c3dd05b15ff4L,0x386bf3abc87c6fb7L, + 0xfb2b0563fdeb6f66L } }, + /* 34 << 175 */ + { { 0x4e77c5575b45afb4L,0xe9ded649efb8912dL,0x7ec9bbf542f6e557L, + 0x2570dfff62671f00L }, + { 0x2b3bfb7888e084bdL,0xa024b238f37fe5b4L,0x44e7dc0495649aeeL, + 0x498ca2555e7ec1d8L } }, + /* 35 << 175 */ + { { 0x3bc766eaaaa07e86L,0x0db6facbf3608586L,0xbadd2549bdc259c8L, + 0x95af3c6e041c649fL }, + { 0xb36a928c02e30afbL,0x9b5356ad008a88b8L,0x4b67a5f1cf1d9e9dL, + 0xc6542e47a5d8d8ceL } }, + /* 36 << 175 */ + { { 0x73061fe87adfb6ccL,0xcc826fd398678141L,0x00e758b13c80515aL, + 0x6afe324741485083L }, + { 0x0fcb08b9b6ae8a75L,0xb8cf388d4acf51e1L,0x344a55606961b9d6L, + 0x1a6778b86a97fd0cL } }, + /* 37 << 175 */ + { { 0xd840fdc1ecc4c7e3L,0xde9fe47d16db68ccL,0xe95f89dea3e216aaL, + 0x84f1a6a49594a8beL }, + { 0x7ddc7d725a7b162bL,0xc5cfda19adc817a3L,0x80a5d35078b58d46L, + 0x93365b1382978f19L } }, + /* 38 << 175 */ + { { 0x2e44d22526a1fc90L,0x0d6d10d24d70705dL,0xd94b6b10d70c45f4L, + 0x0f201022b216c079L }, + { 0xcec966c5658fde41L,0xa8d2bc7d7e27601dL,0xbfcce3e1ff230be7L, + 0x3394ff6b0033ffb5L } }, + /* 39 << 175 */ + { { 0xd890c5098132c9afL,0xaac4b0eb361e7868L,0x5194ded3e82d15aaL, + 0x4550bd2e23ae6b7dL }, + { 0x3fda318eea5399d4L,0xd989bffa91638b80L,0x5ea124d0a14aa12dL, + 0x1fb1b8993667b944L } }, + /* 40 << 175 */ + { { 0x95ec796944c44d6aL,0x91df144a57e86137L,0x915fd62073adac44L, + 0x8f01732d59a83801L }, + { 0xec579d253aa0a633L,0x06de5e7cc9d6d59cL,0xc132f958b1ef8010L, + 0x29476f96e65c1a02L } }, + /* 41 << 175 */ + { { 0x336a77c0d34c3565L,0xef1105b21b9f1e9eL,0x63e6d08bf9e08002L, + 0x9aff2f21c613809eL }, + { 0xb5754f853a80e75dL,0xde71853e6bbda681L,0x86f041df8197fd7aL, + 0x8b332e08127817faL } }, + /* 42 << 175 */ + { { 0x05d99be8b9c20cdaL,0x89f7aad5d5cd0c98L,0x7ef936fe5bb94183L, + 0x92ca0753b05cd7f2L }, + { 0x9d65db1174a1e035L,0x02628cc813eaea92L,0xf2d9e24249e4fbf2L, + 0x94fdfd9be384f8b7L } }, + /* 43 << 175 */ + { { 0x65f5605463428c6bL,0x2f7205b290b409a5L,0xf778bb78ff45ae11L, + 0xa13045bec5ee53b2L }, + { 0xe00a14ff03ef77feL,0x689cd59fffef8befL,0x3578f0ed1e9ade22L, + 0xe99f3ec06268b6a8L } }, + /* 44 << 175 */ + { { 0xa2057d91ea1b3c3eL,0x2d1a7053b8823a4aL,0xabbb336a2cca451eL, + 0xcd2466e32218bb5dL }, + { 0x3ac1f42fc8cb762dL,0x7e312aae7690211fL,0xebb9bd7345d07450L, + 0x207c4b8246c2213fL } }, + /* 45 << 175 */ + { { 0x99d425c1375913ecL,0x94e45e9667908220L,0xc08f3087cd67dbf6L, + 0xa5670fbec0887056L }, + { 0x6717b64a66f5b8fcL,0xd5a56aea786fec28L,0xa8c3f55fc0ff4952L, + 0xa77fefae457ac49bL } }, + /* 46 << 175 */ + { { 0x29882d7c98379d44L,0xd000bdfb509edc8aL,0xc6f95979e66fe464L, + 0x504a6115fa61bde0L }, + { 0x56b3b871effea31aL,0x2d3de26df0c21a54L,0x21dbff31834753bfL, + 0xe67ecf4969269d86L } }, + /* 47 << 175 */ + { { 0x7a176952151fe690L,0x035158047f2adb5fL,0xee794b15d1b62a8dL, + 0xf004ceecaae454e6L }, + { 0x0897ea7cf0386facL,0x3b62ff12d1fca751L,0x154181df1b7a04ecL, + 0x2008e04afb5847ecL } }, + /* 48 << 175 */ + { { 0xd147148e41dbd772L,0x2b419f7322942654L,0x669f30d3e9c544f7L, + 0x52a2c223c8540149L }, + { 0x5da9ee14634dfb02L,0x5f074ff0f47869f3L,0x74ee878da3933accL, + 0xe65106514fe35ed1L } }, + /* 49 << 175 */ + { { 0xb3eb9482f1012e7aL,0x51013cc0a8a566aeL,0xdd5e924347c00d3bL, + 0x7fde089d946bb0e5L }, + { 0x030754fec731b4b3L,0x12a136a499fda062L,0x7c1064b85a1a35bcL, + 0xbf1f5763446c84efL } }, + /* 50 << 175 */ + { { 0xed29a56da16d4b34L,0x7fba9d09dca21c4fL,0x66d7ac006d8de486L, + 0x6006198773a2a5e1L }, + { 0x8b400f869da28ff0L,0x3133f70843c4599cL,0x9911c9b8ee28cb0dL, + 0xcd7e28748e0af61dL } }, + /* 51 << 175 */ + { { 0x5a85f0f272ed91fcL,0x85214f319cd4a373L,0x881fe5be1925253cL, + 0xd8dc98e091e8bc76L }, + { 0x7120affe585cc3a2L,0x724952ed735bf97aL,0x5581e7dc3eb34581L, + 0x5cbff4f2e52ee57dL } }, + /* 52 << 175 */ + { { 0x8d320a0e87d8cc7bL,0x9beaa7f3f1d280d0L,0x7a0b95719beec704L, + 0x9126332e5b7f0057L }, + { 0x01fbc1b48ed3bd6dL,0x35bb2c12d945eb24L,0x6404694e9a8ae255L, + 0xb6092eec8d6abfb3L } }, + /* 53 << 175 */ + { { 0x4d76143fcc058865L,0x7b0a5af26e249922L,0x8aef94406a50d353L, + 0xe11e4bcc64f0e07aL }, + { 0x4472993aa14a90faL,0x7706e20cba0c51d4L,0xf403292f1532672dL, + 0x52573bfa21829382L } }, + /* 54 << 175 */ + { { 0x6a7bb6a93b5bdb83L,0x08da65c0a4a72318L,0xc58d22aa63eb065fL, + 0x1717596c1b15d685L }, + { 0x112df0d0b266d88bL,0xf688ae975941945aL,0x487386e37c292cacL, + 0x42f3b50d57d6985cL } }, + /* 55 << 175 */ + { { 0x6da4f9986a90fc34L,0xc8f257d365ca8a8dL,0xc2feabca6951f762L, + 0xe1bc81d074c323acL }, + { 0x1bc68f67251a2a12L,0x10d86587be8a70dcL,0xd648af7ff0f84d2eL, + 0xf0aa9ebc6a43ac92L } }, + /* 56 << 175 */ + { { 0x69e3be0427596893L,0xb6bb02a645bf452bL,0x0875c11af4c698c8L, + 0x6652b5c7bece3794L }, + { 0x7b3755fd4f5c0499L,0x6ea16558b5532b38L,0xd1c69889a2e96ef7L, + 0x9c773c3a61ed8f48L } }, + /* 57 << 175 */ + { { 0x2b653a409b323abcL,0xe26605e1f0e1d791L,0x45d410644a87157aL, + 0x8f9a78b7cbbce616L }, + { 0xcf1e44aac407edddL,0x81ddd1d8a35b964fL,0x473e339efd083999L, + 0x6c94bdde8e796802L } }, + /* 58 << 175 */ + { { 0x5a304ada8545d185L,0x82ae44ea738bb8cbL,0x628a35e3df87e10eL, + 0xd3624f3da15b9fe3L }, + { 0xcc44209b14be4254L,0x7d0efcbcbdbc2ea5L,0x1f60336204c37bbeL, + 0x21f363f556a5852cL } }, + /* 59 << 175 */ + { { 0xa1503d1ca8501550L,0x2251e0e1d8ab10bbL,0xde129c966961c51cL, + 0x1f7246a481910f68L }, + { 0x2eb744ee5f2591f2L,0x3c47d33f5e627157L,0x4d6d62c922f3bd68L, + 0x6120a64bcb8df856L } }, + /* 60 << 175 */ + { { 0x3a9ac6c07b5d07dfL,0xa92b95587ef39783L,0xe128a134ab3a9b4fL, + 0x41c18807b1252f05L }, + { 0xfc7ed08980ba9b1cL,0xac8dc6dec532a9ddL,0xbf829cef55246809L, + 0x101b784f5b4ee80fL } }, + /* 61 << 175 */ + { { 0xc09945bbb6f11603L,0x57b09dbe41d2801eL,0xfba5202fa97534a8L, + 0x7fd8ae5fc17b9614L }, + { 0xa50ba66678308435L,0x9572f77cd3868c4dL,0x0cef7bfd2dd7aab0L, + 0xe7958e082c7c79ffL } }, + /* 62 << 175 */ + { { 0x81262e4225346689L,0x716da290b07c7004L,0x35f911eab7950ee3L, + 0x6fd72969261d21b5L }, + { 0x5238980308b640d3L,0x5b0026ee887f12a1L,0x20e21660742e9311L, + 0x0ef6d5415ff77ff7L } }, + /* 63 << 175 */ + { { 0x969127f0f9c41135L,0xf21d60c968a64993L,0x656e5d0ce541875cL, + 0xf1e0f84ea1d3c233L }, + { 0x9bcca35906002d60L,0xbe2da60c06191552L,0x5da8bbae61181ec3L, + 0x9f04b82365806f19L } }, + /* 64 << 175 */ + { { 0xf1604a7dd4b79bb8L,0xaee806fb52c878c8L,0x34144f118d47b8e8L, + 0x72edf52b949f9054L }, + { 0xebfca84e2127015aL,0x9051d0c09cb7cef3L,0x86e8fe58296deec8L, + 0x33b2818841010d74L } }, + /* 0 << 182 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 182 */ + { { 0x01079383171b445fL,0x9bcf21e38131ad4cL,0x8cdfe205c93987e8L, + 0xe63f4152c92e8c8fL }, + { 0x729462a930add43dL,0x62ebb143c980f05aL,0x4f3954e53b06e968L, + 0xfe1d75ad242cf6b1L } }, + /* 2 << 182 */ + { { 0x5f95c6c7af8685c8L,0xd4c1c8ce2f8f01aaL,0xc44bbe322574692aL, + 0xb8003478d4a4a068L }, + { 0x7c8fc6e52eca3cdbL,0xea1db16bec04d399L,0xb05bc82e8f2bc5cfL, + 0x763d517ff44793d2L } }, + /* 3 << 182 */ + { { 0x4451c1b808bd98d0L,0x644b1cd46575f240L,0x6907eb337375d270L, + 0x56c8bebdfa2286bdL }, + { 0xc713d2acc4632b46L,0x17da427aafd60242L,0x313065b7c95c7546L, + 0xf8239898bf17a3deL } }, + /* 4 << 182 */ + { { 0xf3b7963f4c830320L,0x842c7aa0903203e3L,0xaf22ca0ae7327afbL, + 0x38e13092967609b6L }, + { 0x73b8fb62757558f1L,0x3cc3e831f7eca8c1L,0xe4174474f6331627L, + 0xa77989cac3c40234L } }, + /* 5 << 182 */ + { { 0xe5fd17a144a081e0L,0xd797fb7db70e296aL,0x2b472b30481f719cL, + 0x0e632a98fe6f8c52L }, + { 0x89ccd116c5f0c284L,0xf51088af2d987c62L,0x2a2bccda4c2de6cfL, + 0x810f9efef679f0f9L } }, + /* 6 << 182 */ + { { 0xb0f394b97ffe4b3eL,0x0b691d21e5fa5d21L,0xb0bd77479dfbbc75L, + 0xd2830fdafaf78b00L }, + { 0xf78c249c52434f57L,0x4b1f754598096dabL,0x73bf6f948ff8c0b3L, + 0x34aef03d454e134cL } }, + /* 7 << 182 */ + { { 0xf8d151f4b7ac7ec5L,0xd6ceb95ae50da7d5L,0xa1b492b0dc3a0eb8L, + 0x75157b69b3dd2863L }, + { 0xe2c4c74ec5413d62L,0xbe329ff7bc5fc4c7L,0x835a2aea60fa9ddaL, + 0xf117f5ad7445cb87L } }, + /* 8 << 182 */ + { { 0xae8317f4b0166f7aL,0xfbd3e3f7ceec74e6L,0xfdb516ace0874bfdL, + 0x3d846019c681f3a3L }, + { 0x0b12ee5c7c1620b0L,0xba68b4dd2b63c501L,0xac03cd326668c51eL, + 0x2a6279f74e0bcb5bL } }, + /* 9 << 182 */ + { { 0x17bd69b06ae85c10L,0x729469791dfdd3a6L,0xd9a032682c078becL, + 0x41c6a658bfd68a52L }, + { 0xcdea10240e023900L,0xbaeec121b10d144dL,0x5a600e74058ab8dcL, + 0x1333af21bb89ccddL } }, + /* 10 << 182 */ + { { 0xdf25eae03aaba1f1L,0x2cada16e3b7144cfL,0x657ee27d71ab98bcL, + 0x99088b4c7a6fc96eL }, + { 0x05d5c0a03549dbd4L,0x42cbdf8ff158c3acL,0x3fb6b3b087edd685L, + 0x22071cf686f064d0L } }, + /* 11 << 182 */ + { { 0xd2d6721fff2811e5L,0xdb81b703fe7fae8cL,0x3cfb74efd3f1f7bbL, + 0x0cdbcd7616cdeb5dL }, + { 0x4f39642a566a808cL,0x02b74454340064d6L,0xfabbadca0528fa6fL, + 0xe4c3074cd3fc0bb6L } }, + /* 12 << 182 */ + { { 0xb32cb8b0b796d219L,0xc3e95f4f34741dd9L,0x8721212568edf6f5L, + 0x7a03aee4a2b9cb8eL }, + { 0x0cd3c376f53a89aaL,0x0d8af9b1948a28dcL,0xcf86a3f4902ab04fL, + 0x8aacb62a7f42002dL } }, + /* 13 << 182 */ + { { 0x106985ebf62ffd52L,0xe670b54e5797bf10L,0x4b405209c5e30aefL, + 0x12c97a204365b5e9L }, + { 0x104646ce1fe32093L,0x13cb4ff63907a8c9L,0x8b9f30d1d46e726bL, + 0xe1985e21aba0f499L } }, + /* 14 << 182 */ + { { 0xc573dea910a230cdL,0x24f46a93cd30f947L,0xf2623fcfabe2010aL, + 0x3f278cb273f00e4fL }, + { 0xed55c67d50b920ebL,0xf1cb9a2d8e760571L,0x7c50d1090895b709L, + 0x4207cf07190d4369L } }, + /* 15 << 182 */ + { { 0x3b027e81c4127fe1L,0xa9f8b9ad3ae9c566L,0x5ab10851acbfbba5L, + 0xa747d648569556f5L }, + { 0xcc172b5c2ba97bf7L,0x15e0f77dbcfa3324L,0xa345b7977686279dL, + 0x5a723480e38003d3L } }, + /* 16 << 182 */ + { { 0xfd8e139f8f5fcda8L,0xf3e558c4bdee5bfdL,0xd76cbaf4e33f9f77L, + 0x3a4c97a471771969L }, + { 0xda27e84bf6dce6a7L,0xff373d9613e6c2d1L,0xf115193cd759a6e9L, + 0x3f9b702563d2262cL } }, + /* 17 << 182 */ + { { 0xd9764a31317cd062L,0x30779d8e199f8332L,0xd807410616b11b0bL, + 0x7917ab9f78aeaed8L }, + { 0xb67a9cbe28fb1d8eL,0x2e313563136eda33L,0x010b7069a371a86cL, + 0x44d90fa26744e6b7L } }, + /* 18 << 182 */ + { { 0x68190867d6b3e243L,0x9fe6cd9d59048c48L,0xb900b02895731538L, + 0xa012062f32cae04fL }, + { 0x8107c8bc9399d082L,0x47e8c54a41df12e2L,0x14ba5117b6ef3f73L, + 0x22260bea81362f0bL } }, + /* 19 << 182 */ + { { 0x90ea261e1a18cc20L,0x2192999f2321d636L,0xef64d314e311b6a0L, + 0xd7401e4c3b54a1f5L }, + { 0x190199836fbca2baL,0x46ad32938fbffc4bL,0xa142d3f63786bf40L, + 0xeb5cbc26b67039fcL } }, + /* 20 << 182 */ + { { 0x9cb0ae6c252bd479L,0x05e0f88a12b5848fL,0x78f6d2b2a5c97663L, + 0x6f6e149bc162225cL }, + { 0xe602235cde601a89L,0xd17bbe98f373be1fL,0xcaf49a5ba8471827L, + 0x7e1a0a8518aaa116L } }, + /* 21 << 182 */ + { { 0x6c833196270580c3L,0x1e233839f1c98a14L,0x67b2f7b4ae34e0a5L, + 0x47ac8745d8ce7289L }, + { 0x2b74779a100dd467L,0x274a43374ee50d09L,0x603dcf1383608bc9L, + 0xcd9da6c3c89e8388L } }, + /* 22 << 182 */ + { { 0x2660199f355116acL,0xcc38bb59b6d18eedL,0x3075f31f2f4bc071L, + 0x9774457f265dc57eL }, + { 0x06a6a9c8c6db88bbL,0x6429d07f4ec98e04L,0x8d05e57b05ecaa8bL, + 0x20f140b17872ea7bL } }, + /* 23 << 182 */ + { { 0xdf8c0f09ca494693L,0x48d3a020f252e909L,0x4c5c29af57b14b12L, + 0x7e6fa37dbf47ad1cL }, + { 0x66e7b50649a0c938L,0xb72c0d486be5f41fL,0x6a6242b8b2359412L, + 0xcd35c7748e859480L } }, + /* 24 << 182 */ + { { 0x12536fea87baa627L,0x58c1fec1f72aa680L,0x6c29b637601e5dc9L, + 0x9e3c3c1cde9e01b9L }, + { 0xefc8127b2bcfe0b0L,0x351071022a12f50dL,0x6ccd6cb14879b397L, + 0xf792f804f8a82f21L } }, + /* 25 << 182 */ + { { 0x509d4804a9b46402L,0xedddf85dc10f0850L,0x928410dc4b6208aaL, + 0xf6229c46391012dcL }, + { 0xc5a7c41e7727b9b6L,0x289e4e4baa444842L,0x049ba1d9e9a947eaL, + 0x44f9e47f83c8debcL } }, + /* 26 << 182 */ + { { 0xfa77a1fe611f8b8eL,0xfd2e416af518f427L,0xc5fffa70114ebac3L, + 0xfe57c4e95d89697bL }, + { 0xfdd053acb1aaf613L,0x31df210fea585a45L,0x318cc10e24985034L, + 0x1a38efd15f1d6130L } }, + /* 27 << 182 */ + { { 0xbf86f2370b1e9e21L,0xb258514d1dbe88aaL,0x1e38a58890c1baf9L, + 0x2936a01ebdb9b692L }, + { 0xd576de986dd5b20cL,0xb586bf7170f98ecfL,0xcccf0f12c42d2fd7L, + 0x8717e61cfb35bd7bL } }, + /* 28 << 182 */ + { { 0x8b1e572235e6fc06L,0x3477728f0b3e13d5L,0x150c294daa8a7372L, + 0xc0291d433bfa528aL }, + { 0xc6c8bc67cec5a196L,0xdeeb31e45c2e8a7cL,0xba93e244fb6e1c51L, + 0xb9f8b71b2e28e156L } }, + /* 29 << 182 */ + { { 0xce65a287968a2ab9L,0xe3c5ce6946bbcb1fL,0xf8c835b9e7ae3f30L, + 0x16bbee26ff72b82bL }, + { 0x665e2017fd42cd22L,0x1e139970f8b1d2a0L,0x125cda2979204932L, + 0x7aee94a549c3bee5L } }, + /* 30 << 182 */ + { { 0x68c7016089821a66L,0xf7c376788f981669L,0xd90829fc48cc3645L, + 0x346af049d70addfcL }, + { 0x2057b232370bf29cL,0xf90c73ce42e650eeL,0xe03386eaa126ab90L, + 0x0e266e7e975a087bL } }, + /* 31 << 182 */ + { { 0x80578eb90fca65d9L,0x7e2989ea16af45b8L,0x7438212dcac75a4eL, + 0x38c7ca394fef36b8L }, + { 0x8650c494d402676aL,0x26ab5a66f72c7c48L,0x4e6cb426ce3a464eL, + 0xf8f998962b72f841L } }, + /* 32 << 182 */ + { { 0x8c3184911a335cc8L,0x563459ba6a5913e4L,0x1b920d61c7b32919L, + 0x805ab8b6a02425adL }, + { 0x2ac512da8d006086L,0x6ca4846abcf5c0fdL,0xafea51d8ac2138d7L, + 0xcb647545344cd443L } }, + /* 33 << 182 */ + { { 0x0429ee8fbd7d9040L,0xee66a2de819b9c96L,0x54f9ec25dea7d744L, + 0x2ffea642671721bbL }, + { 0x4f19dbd1114344eaL,0x04304536fd0dbc8bL,0x014b50aa29ec7f91L, + 0xb5fc22febb06014dL } }, + /* 34 << 182 */ + { { 0x60d963a91ee682e0L,0xdf48abc0fe85c727L,0x0cadba132e707c2dL, + 0xde608d3aa645aeffL }, + { 0x05f1c28bedafd883L,0x3c362edebd94de1fL,0x8dd0629d13593e41L, + 0x0a5e736f766d6eafL } }, + /* 35 << 182 */ + { { 0xbfa92311f68cf9d1L,0xa4f9ef87c1797556L,0x10d75a1f5601c209L, + 0x651c374c09b07361L }, + { 0x49950b5888b5ceadL,0x0ef000586fa9dbaaL,0xf51ddc264e15f33aL, + 0x1f8b5ca62ef46140L } }, + /* 36 << 182 */ + { { 0x343ac0a3ee9523f0L,0xbb75eab2975ea978L,0x1bccf332107387f4L, + 0x790f92599ab0062eL }, + { 0xf1a363ad1e4f6a5fL,0x06e08b8462519a50L,0x609151877265f1eeL, + 0x6a80ca3493ae985eL } }, + /* 37 << 182 */ + { { 0x81b29768aaba4864L,0xb13cabf28d52a7d6L,0xb5c363488ead03f1L, + 0xc932ad9581c7c1c0L }, + { 0x5452708ecae1e27bL,0x9dac42691b0df648L,0x233e3f0cdfcdb8bcL, + 0xe6ceccdfec540174L } }, + /* 38 << 182 */ + { { 0xbd0d845e95081181L,0xcc8a7920699355d5L,0x111c0f6dc3b375a8L, + 0xfd95bc6bfd51e0dcL }, + { 0x4a106a266888523aL,0x4d142bd6cb01a06dL,0x79bfd289adb9b397L, + 0x0bdbfb94e9863914L } }, + /* 39 << 182 */ + { { 0x29d8a2291660f6a6L,0x7f6abcd6551c042dL,0x13039deb0ac3ffe8L, + 0xa01be628ec8523fbL }, + { 0x6ea341030ca1c328L,0xc74114bdb903928eL,0x8aa4ff4e9e9144b0L, + 0x7064091f7f9a4b17L } }, + /* 40 << 182 */ + { { 0xa3f4f521e447f2c4L,0x81b8da7a604291f0L,0xd680bc467d5926deL, + 0x84f21fd534a1202fL }, + { 0x1d1e31814e9df3d8L,0x1ca4861a39ab8d34L,0x809ddeec5b19aa4aL, + 0x59f72f7e4d329366L } }, + /* 41 << 182 */ + { { 0xa2f93f41386d5087L,0x40bf739cdd67d64fL,0xb449420566702158L, + 0xc33c65be73b1e178L }, + { 0xcdcd657c38ca6153L,0x97f4519adc791976L,0xcc7c7f29cd6e1f39L, + 0x38de9cfb7e3c3932L } }, + /* 42 << 182 */ + { { 0xe448eba37b793f85L,0xe9f8dbf9f067e914L,0xc0390266f114ae87L, + 0x39ed75a7cd6a8e2aL }, + { 0xadb148487ffba390L,0x67f8cb8b6af9bc09L,0x322c38489c7476dbL, + 0xa320fecf52a538d6L } }, + /* 43 << 182 */ + { { 0xe0493002b2aced2bL,0xdfba1809616bd430L,0x531c4644c331be70L, + 0xbc04d32e90d2e450L }, + { 0x1805a0d10f9f142dL,0x2c44a0c547ee5a23L,0x31875a433989b4e3L, + 0x6b1949fd0c063481L } }, + /* 44 << 182 */ + { { 0x2dfb9e08be0f4492L,0x3ff0da03e9d5e517L,0x03dbe9a1f79466a8L, + 0x0b87bcd015ea9932L }, + { 0xeb64fc83ab1f58abL,0x6d9598da817edc8aL,0x699cff661d3b67e5L, + 0x645c0f2992635853L } }, + /* 45 << 182 */ + { { 0x253cdd82eabaf21cL,0x82b9602a2241659eL,0x2cae07ec2d9f7091L, + 0xbe4c720c8b48cd9bL }, + { 0x6ce5bc036f08d6c9L,0x36e8a997af10bf40L,0x83422d213e10ff12L, + 0x7b26d3ebbcc12494L } }, + /* 46 << 182 */ + { { 0xb240d2d0c9469ad6L,0xc4a11b4d30afa05bL,0x4b604acedd6ba286L, + 0x184866003ee2864cL }, + { 0x5869d6ba8d9ce5beL,0x0d8f68c5ff4bfb0dL,0xb69f210b5700cf73L, + 0x61f6653a6d37c135L } }, + /* 47 << 182 */ + { { 0xff3d432b5aff5a48L,0x0d81c4b972ba3a69L,0xee879ae9fa1899efL, + 0xbac7e2a02d6acafdL }, + { 0xd6d93f6c1c664399L,0x4c288de15bcb135dL,0x83031dab9dab7cbfL, + 0xfe23feb03abbf5f0L } }, + /* 48 << 182 */ + { { 0x9f1b2466cdedca85L,0x140bb7101a09538cL,0xac8ae8515e11115dL, + 0x0d63ff676f03f59eL }, + { 0x755e55517d234afbL,0x61c2db4e7e208fc1L,0xaa9859cef28a4b5dL, + 0xbdd6d4fc34af030fL } }, + /* 49 << 182 */ + { { 0xd1c4a26d3be01cb1L,0x9ba14ffc243aa07cL,0xf95cd3a9b2503502L, + 0xe379bc067d2a93abL }, + { 0x3efc18e9d4ca8d68L,0x083558ec80bb412aL,0xd903b9409645a968L, + 0xa499f0b69ba6054fL } }, + /* 50 << 182 */ + { { 0x208b573cb8349abeL,0x3baab3e530b4fc1cL,0x87e978bacb524990L, + 0x3524194eccdf0e80L }, + { 0x627117257d4bcc42L,0xe90a3d9bb90109baL,0x3b1bdd571323e1e0L, + 0xb78e9bd55eae1599L } }, + /* 51 << 182 */ + { { 0x0794b7469e03d278L,0x80178605d70e6297L,0x171792f899c97855L, + 0x11b393eef5a86b5cL }, + { 0x48ef6582d8884f27L,0xbd44737abf19ba5fL,0x8698de4ca42062c6L, + 0x8975eb8061ce9c54L } }, + /* 52 << 182 */ + { { 0xd50e57c7d7fe71f3L,0x15342190bc97ce38L,0x51bda2de4df07b63L, + 0xba12aeae200eb87dL }, + { 0xabe135d2a9b4f8f6L,0x04619d65fad6d99cL,0x4a6683a77994937cL, + 0x7a778c8b6f94f09aL } }, + /* 53 << 182 */ + { { 0x8c50862320a71b89L,0x241a2aed1c229165L,0x352be595aaf83a99L, + 0x9fbfee7f1562bac8L }, + { 0xeaf658b95c4017e3L,0x1dc7f9e015120b86L,0xd84f13dd4c034d6fL, + 0x283dd737eaea3038L } }, + /* 54 << 182 */ + { { 0x197f2609cd85d6a2L,0x6ebbc345fae60177L,0xb80f031b4e12fedeL, + 0xde55d0c207a2186bL }, + { 0x1fb3e37f24dcdd5aL,0x8d602da57ed191fbL,0x108fb05676023e0dL, + 0x70178c71459c20c0L } }, + /* 55 << 182 */ + { { 0xfad5a3863fe54cf0L,0xa4a3ec4f02bbb475L,0x1aa5ec20919d94d7L, + 0x5d3b63b5a81e4ab3L }, + { 0x7fa733d85ad3d2afL,0xfbc586ddd1ac7a37L,0x282925de40779614L, + 0xfe0ffffbe74a242aL } }, + /* 56 << 182 */ + { { 0x3f39e67f906151e5L,0xcea27f5f55e10649L,0xdca1d4e1c17cf7b7L, + 0x0c326d122fe2362dL }, + { 0x05f7ac337dd35df3L,0x0c3b7639c396dbdfL,0x0912f5ac03b7db1cL, + 0x9dea4b705c9ed4a9L } }, + /* 57 << 182 */ + { { 0x475e6e53aae3f639L,0xfaba0e7cfc278bacL,0x16f9e2219490375fL, + 0xaebf9746a5a7ed0aL }, + { 0x45f9af3ff41ad5d6L,0x03c4623cb2e99224L,0x82c5bb5cb3cf56aaL, + 0x6431181934567ed3L } }, + /* 58 << 182 */ + { { 0xec57f2118be489acL,0x2821895db9a1104bL,0x610dc8756064e007L, + 0x8e526f3f5b20d0feL }, + { 0x6e71ca775b645aeeL,0x3d1dcb9f800e10ffL,0x36b51162189cf6deL, + 0x2c5a3e306bb17353L } }, + /* 59 << 182 */ + { { 0xc186cd3e2a6c6fbfL,0xa74516fa4bf97906L,0x5b4b8f4b279d6901L, + 0x0c4e57b42b573743L }, + { 0x75fdb229b6e386b6L,0xb46793fd99deac27L,0xeeec47eacf712629L, + 0xe965f3c4cbc3b2ddL } }, + /* 60 << 182 */ + { { 0x8dd1fb83425c6559L,0x7fc00ee60af06fdaL,0xe98c922533d956dfL, + 0x0f1ef3354fbdc8a2L }, + { 0x2abb5145b79b8ea2L,0x40fd2945bdbff288L,0x6a814ac4d7185db7L, + 0xc4329d6fc084609aL } }, + /* 61 << 182 */ + { { 0xc9ba7b52ed1be45dL,0x891dd20de4cd2c74L,0x5a4d4a7f824139b1L, + 0x66c17716b873c710L }, + { 0x5e5bc1412843c4e0L,0xd5ac4817b97eb5bfL,0xc0f8af54450c95c7L, + 0xc91b3fa0318406c5L } }, + /* 62 << 182 */ + { { 0x360c340aab9d97f8L,0xfb57bd0790a2d611L,0x4339ae3ca6a6f7e5L, + 0x9c1fcd2a2feb8a10L }, + { 0x972bcca9c7ea7432L,0x1b0b924c308076f6L,0x80b2814a2a5b4ca5L, + 0x2f78f55b61ef3b29L } }, + /* 63 << 182 */ + { { 0xf838744ac18a414fL,0xc611eaae903d0a86L,0x94dabc162a453f55L, + 0xe6f2e3da14efb279L }, + { 0x5b7a60179320dc3cL,0x692e382f8df6b5a4L,0x3f5e15e02d40fa90L, + 0xc87883ae643dd318L } }, + /* 64 << 182 */ + { { 0x511053e453544774L,0x834d0ecc3adba2bcL,0x4215d7f7bae371f5L, + 0xfcfd57bf6c8663bcL }, + { 0xded2383dd6901b1dL,0x3b49fbb4b5587dc3L,0xfd44a08d07625f62L, + 0x3ee4d65b9de9b762L } }, + /* 0 << 189 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 189 */ + { { 0x64e5137d0d63d1faL,0x658fc05202a9d89fL,0x4889487450436309L, + 0xe9ae30f8d598da61L }, + { 0x2ed710d1818baf91L,0xe27e9e068b6a0c20L,0x1e28dcfb1c1a6b44L, + 0x883acb64d6ac57dcL } }, + /* 2 << 189 */ + { { 0x8735728dc2c6ff70L,0x79d6122fc5dc2235L,0x23f5d00319e277f9L, + 0x7ee84e25dded8cc7L }, + { 0x91a8afb063cd880aL,0x3f3ea7c63574af60L,0x0cfcdc8402de7f42L, + 0x62d0792fb31aa152L } }, + /* 3 << 189 */ + { { 0x8e1b4e438a5807ceL,0xad283893e4109a7eL,0xc30cc9cbafd59ddaL, + 0xf65f36c63d8d8093L }, + { 0xdf31469ea60d32b2L,0xee93df4b3e8191c8L,0x9c1017c5355bdeb5L, + 0xd26231858616aa28L } }, + /* 4 << 189 */ + { { 0xb02c83f9dec31a21L,0x988c8b236ad9d573L,0x53e983aea57be365L, + 0xe968734d646f834eL }, + { 0x9137ea8f5da6309bL,0x10f3a624c1f1ce16L,0x782a9ea2ca440921L, + 0xdf94739e5b46f1b5L } }, + /* 5 << 189 */ + { { 0x9f9be006cce85c9bL,0x360e70d6a4c7c2d3L,0x2cd5beeaaefa1e60L, + 0x64cf63c08c3d2b6dL }, + { 0xfb107fa3e1cf6f90L,0xb7e937c6d5e044e6L,0x74e8ca78ce34db9fL, + 0x4f8b36c13e210bd0L } }, + /* 6 << 189 */ + { { 0x1df165a434a35ea8L,0x3418e0f74d4412f6L,0x5af1f8af518836c3L, + 0x42ceef4d130e1965L }, + { 0x5560ca0b543a1957L,0xc33761e5886cb123L,0x66624b1ffe98ed30L, + 0xf772f4bf1090997dL } }, + /* 7 << 189 */ + { { 0xf4e540bb4885d410L,0x7287f8109ba5f8d7L,0x22d0d865de98dfb1L, + 0x49ff51a1bcfbb8a3L }, + { 0xb6b6fa536bc3012eL,0x3d31fd72170d541dL,0x8018724f4b0f4966L, + 0x79e7399f87dbde07L } }, + /* 8 << 189 */ + { { 0x56f8410ef4f8b16aL,0x97241afec47b266aL,0x0a406b8e6d9c87c1L, + 0x803f3e02cd42ab1bL }, + { 0x7f0309a804dbec69L,0xa83b85f73bbad05fL,0xc6097273ad8e197fL, + 0xc097440e5067adc1L } }, + /* 9 << 189 */ + { { 0x730eafb63524ff16L,0xd7f9b51e823fc6ceL,0x27bd0d32443e4ac0L, + 0x40c59ad94d66f217L }, + { 0x6c33136f17c387a4L,0x5043b8d5eb86804dL,0x74970312675a73c9L, + 0x838fdb31f16669b6L } }, + /* 10 << 189 */ + { { 0xc507b6dd418e7dddL,0x39888d93472f19d6L,0x7eae26be0c27eb4dL, + 0x17b53ed3fbabb884L }, + { 0xfc27021b2b01ae4fL,0x88462e87cf488682L,0xbee096ec215e2d87L, + 0xeb2fea9ad242e29bL } }, + /* 11 << 189 */ + { { 0x5d985b5fb821fc28L,0x89d2e197dc1e2ad2L,0x55b566b89030ba62L, + 0xe3fd41b54f41b1c6L }, + { 0xb738ac2eb9a96d61L,0x7f8567ca369443f4L,0x8698622df803a440L, + 0x2b5862368fe2f4dcL } }, + /* 12 << 189 */ + { { 0xbbcc00c756b95bceL,0x5ec03906616da680L,0x79162ee672214252L, + 0x43132b6386a892d2L }, + { 0x4bdd3ff22f3263bfL,0xd5b3733c9cd0a142L,0x592eaa8244415ccbL, + 0x663e89248d5474eaL } }, + /* 13 << 189 */ + { { 0x8058a25e5236344eL,0x82e8df9dbda76ee6L,0xdcf6efd811cc3d22L, + 0x00089cda3b4ab529L }, + { 0x91d3a071bd38a3dbL,0x4ea97fc0ef72b925L,0x0c9fc15bea3edf75L, + 0x5a6297cda4348ed3L } }, + /* 14 << 189 */ + { { 0x0d38ab35ce7c42d4L,0x9fd493ef82feab10L,0x46056b6d82111b45L, + 0xda11dae173efc5c3L }, + { 0xdc7402785545a7fbL,0xbdb2601c40d507e6L,0x121dfeeb7066fa58L, + 0x214369a839ae8c2aL } }, + /* 15 << 189 */ + { { 0x195709cb06e0956cL,0x4c9d254f010cd34bL,0xf51e13f70471a532L, + 0xe19d67911e73054dL }, + { 0xf702a628db5c7be3L,0xc7141218b24dde05L,0xdc18233cf29b2e2eL, + 0x3a6bd1e885342dbaL } }, + /* 16 << 189 */ + { { 0x3f747fa0b311898cL,0xe2a272e4cd0eac65L,0x4bba5851f914d0bcL, + 0x7a1a9660c4a43ee3L }, + { 0xe5a367cea1c8cde9L,0x9d958ba97271abe3L,0xf3ff7eb63d1615cdL, + 0xa2280dcef5ae20b0L } }, + /* 17 << 189 */ + { { 0x56dba5c1cf640147L,0xea5a2e3d5e83d118L,0x04cd6b6dda24c511L, + 0x1c0f4671e854d214L }, + { 0x91a6b7a969565381L,0xdc966240decf1f5bL,0x1b22d21cfcf5d009L, + 0x2a05f6419021dbd5L } }, + /* 18 << 189 */ + { { 0x8c0ed566d4312483L,0x5179a95d643e216fL,0xcc185fec17044493L, + 0xb306333954991a21L }, + { 0xd801ecdb0081a726L,0x0149b0c64fa89bbbL,0xafe9065a4391b6b9L, + 0xedc92786d633f3a3L } }, + /* 19 << 189 */ + { { 0xe408c24aae6a8e13L,0x85833fde9f3897abL,0x43800e7ed81a0715L, + 0xde08e346b44ffc5fL }, + { 0x7094184ccdeff2e0L,0x49f9387b165eaed1L,0x635d6129777c468aL, + 0x8c0dcfd1538c2dd8L } }, + /* 20 << 189 */ + { { 0xd6d9d9e37a6a308bL,0x623758304c2767d3L,0x874a8bc6f38cbeb6L, + 0xd94d3f1accb6fd9eL }, + { 0x92a9735bba21f248L,0x272ad0e56cd1efb0L,0x7437b69c05b03284L, + 0xe7f047026948c225L } }, + /* 21 << 189 */ + { { 0x8a56c04acba2ececL,0x0c181270e3a73e41L,0x6cb34e9d03e93725L, + 0xf77c8713496521a9L }, + { 0x94569183fa7f9f90L,0xf2e7aa4c8c9707adL,0xced2c9ba26c1c9a3L, + 0x9109fe9640197507L } }, + /* 22 << 189 */ + { { 0x9ae868a9e9adfe1cL,0x3984403d314e39bbL,0xb5875720f2fe378fL, + 0x33f901e0ba44a628L }, + { 0xea1125fe3652438cL,0xae9ec4e69dd1f20bL,0x1e740d9ebebf7fbdL, + 0x6dbd3ddc42dbe79cL } }, + /* 23 << 189 */ + { { 0x62082aecedd36776L,0xf612c478e9859039L,0xa493b201032f7065L, + 0xebd4d8f24ff9b211L }, + { 0x3f23a0aaaac4cb32L,0xea3aadb715ed4005L,0xacf17ea4afa27e63L, + 0x56125c1ac11fd66cL } }, + /* 24 << 189 */ + { { 0x266344a43794f8dcL,0xdcca923a483c5c36L,0x2d6b6bbf3f9d10a0L, + 0xb320c5ca81d9bdf3L }, + { 0x620e28ff47b50a95L,0x933e3b01cef03371L,0xf081bf8599100153L, + 0x183be9a0c3a8c8d6L } }, + /* 25 << 189 */ + { { 0x4e3ddc5ad6bbe24dL,0xc6c7463053843795L,0x78193dd765ec2d4cL, + 0xb8df26cccd3c89b2L }, + { 0x98dbe3995a483f8dL,0x72d8a9577dd3313aL,0x65087294ab0bd375L, + 0xfcd892487c259d16L } }, + /* 26 << 189 */ + { { 0x8a9443d77613aa81L,0x8010080085fe6584L,0x70fc4dbc7fb10288L, + 0xf58280d3e86beee8L }, + { 0x14fdd82f7c978c38L,0xdf1204c10de44d7bL,0xa08a1c844160252fL, + 0x591554cac17646a5L } }, + /* 27 << 189 */ + { { 0x214a37d6a05bd525L,0x48d5f09b07957b3cL,0x0247cdcbd7109bc9L, + 0x40f9e4bb30599ce7L }, + { 0xc325fa03f46ad2ecL,0x00f766cfc3e3f9eeL,0xab556668d43a4577L, + 0x68d30a613ee03b93L } }, + /* 28 << 189 */ + { { 0x7ddc81ea77b46a08L,0xcf5a6477c7480699L,0x43a8cb346633f683L, + 0x1b867e6b92363c60L }, + { 0x439211141f60558eL,0xcdbcdd632f41450eL,0x7fc04601cc630e8bL, + 0xea7c66d597038b43L } }, + /* 29 << 189 */ + { { 0x7259b8a504e99fd8L,0x98a8dd124785549aL,0x0e459a7c840552e1L, + 0xcdfcf4d04bb0909eL }, + { 0x34a86db253758da7L,0xe643bb83eac997e1L,0x96400bd7530c5b7eL, + 0x9f97af87b41c8b52L } }, + /* 30 << 189 */ + { { 0x34fc8820fbeee3f9L,0x93e5349049091afdL,0x764b9be59a31f35cL, + 0x71f3786457e3d924L }, + { 0x02fb34e0943aa75eL,0xa18c9c58ab8ff6e4L,0x080f31b133cf0d19L, + 0x5c9682db083518a7L } }, + /* 31 << 189 */ + { { 0x873d4ca6b709c3deL,0x64a842623575b8f0L,0x6275da1f020154bbL, + 0x97678caad17cf1abL }, + { 0x8779795f951a95c3L,0xdd35b16350fccc08L,0x3270962733d8f031L, + 0x3c5ab10a498dd85cL } }, + /* 32 << 189 */ + { { 0xb6c185c341dca566L,0x7de7fedad8622aa3L,0x99e84d92901b6dfbL, + 0x30a02b0e7c4ad288L }, + { 0xc7c81daa2fd3cf36L,0xd1319547df89e59fL,0xb2be8184cd496733L, + 0xd5f449eb93d3412bL } }, + /* 33 << 189 */ + { { 0x7ea41b1b25fe531dL,0xf97974326a1d5646L,0x86067f722bde501aL, + 0xf91481c00c85e89cL }, + { 0xca8ee465f8b05bc6L,0x1844e1cf02e83cdaL,0xca82114ab4dbe33bL, + 0x0f9f87694eabfde2L } }, + /* 34 << 189 */ + { { 0x4936b1c038b27fe2L,0x63b6359baba402dfL,0x40c0ea2f656bdbabL, + 0x9c992a896580c39cL }, + { 0x600e8f152a60aed1L,0xeb089ca4e0bf49dfL,0x9c233d7d2d42d99aL, + 0x648d3f954c6bc2faL } }, + /* 35 << 189 */ + { { 0xdcc383a8e1add3f3L,0xf42c0c6a4f64a348L,0x2abd176f0030dbdbL, + 0x4de501a37d6c215eL }, + { 0x4a107c1f4b9a64bcL,0xa77f0ad32496cd59L,0xfb78ac627688dffbL, + 0x7025a2ca67937d8eL } }, + /* 36 << 189 */ + { { 0xfde8b2d1d1a8f4e7L,0xf5b3da477354927cL,0xe48606a3d9205735L, + 0xac477cc6e177b917L }, + { 0xfb1f73d2a883239aL,0xe12572f6cc8b8357L,0x9d355e9cfb1f4f86L, + 0x89b795f8d9f3ec6eL } }, + /* 37 << 189 */ + { { 0x27be56f1b54398dcL,0x1890efd73fedeed5L,0x62f77f1f9c6d0140L, + 0x7ef0e314596f0ee4L }, + { 0x50ca6631cc61dab3L,0x4a39801df4866e4fL,0x66c8d032ae363b39L, + 0x22c591e52ead66aaL } }, + /* 38 << 189 */ + { { 0x954ba308de02a53eL,0x2a6c060fd389f357L,0xe6cfcde8fbf40b66L, + 0x8e02fc56c6340ce1L }, + { 0xe495779573adb4baL,0x7b86122ca7b03805L,0x63f835120c8e6fa6L, + 0x83660ea0057d7804L } }, + /* 39 << 189 */ + { { 0xbad7910521ba473cL,0xb6c50beeded5389dL,0xee2caf4daa7c9bc0L, + 0xd97b8de48c4e98a7L }, + { 0xa9f63e70ab3bbddbL,0x3898aabf2597815aL,0x7659af89ac15b3d9L, + 0xedf7725b703ce784L } }, + /* 40 << 189 */ + { { 0x25470fabe085116bL,0x04a4337587285310L,0x4e39187ee2bfd52fL, + 0x36166b447d9ebc74L }, + { 0x92ad433cfd4b322cL,0x726aa817ba79ab51L,0xf96eacd8c1db15ebL, + 0xfaf71e910476be63L } }, + /* 41 << 189 */ + { { 0xdd69a640641fad98L,0xb799591829622559L,0x03c6daa5de4199dcL, + 0x92cadc97ad545eb4L }, + { 0x1028238b256534e4L,0x73e80ce68595409aL,0x690d4c66d05dc59bL, + 0xc95f7b8f981dee80L } }, + /* 42 << 189 */ + { { 0xf4337014d856ac25L,0x441bd9ddac524dcaL,0x640b3d855f0499f5L, + 0x39cf84a9d5fda182L }, + { 0x04e7b055b2aa95a0L,0x29e33f0a0ddf1860L,0x082e74b5423f6b43L, + 0x217edeb90aaa2b0fL } }, + /* 43 << 189 */ + { { 0x58b83f3583cbea55L,0xc485ee4dbc185d70L,0x833ff03b1e5f6992L, + 0xb5b9b9cccf0c0dd5L }, + { 0x7caaee8e4e9e8a50L,0x462e907b6269dafdL,0x6ed5cee9fbe791c6L, + 0x68ca3259ed430790L } }, + /* 44 << 189 */ + { { 0x2b72bdf213b5ba88L,0x60294c8a35ef0ac4L,0x9c3230ed19b99b08L, + 0x560fff176c2589aaL }, + { 0x552b8487d6770374L,0xa373202d9a56f685L,0xd3e7f90745f175d9L, + 0x3c2f315fd080d810L } }, + /* 45 << 189 */ + { { 0x1130e9dd7b9520e8L,0xc078f9e20af037b5L,0x38cd2ec71e9c104cL, + 0x0f684368c472fe92L }, + { 0xd3f1b5ed6247e7efL,0xb32d33a9396dfe21L,0x46f59cf44a9aa2c2L, + 0x69cd5168ff0f7e41L } }, + /* 46 << 189 */ + { { 0x3f59da0f4b3234daL,0xcf0b0235b4579ebeL,0x6d1cbb256d2476c7L, + 0x4f0837e69dc30f08L }, + { 0x9a4075bb906f6e98L,0x253bb434c761e7d1L,0xde2e645f6e73af10L, + 0xb89a40600c5f131cL } }, + /* 47 << 189 */ + { { 0xd12840c5b8cc037fL,0x3d093a5b7405bb47L,0x6202c253206348b8L, + 0xbf5d57fcc55a3ca7L }, + { 0x89f6c90c8c3bef48L,0x23ac76235a0a960aL,0xdfbd3d6b552b42abL, + 0x3ef22458132061f6L } }, + /* 48 << 189 */ + { { 0xd74e9bdac97e6516L,0x88779360c230f49eL,0xa6ec1de31e74ea49L, + 0x581dcee53fb645a2L }, + { 0xbaef23918f483f14L,0x6d2dddfcd137d13bL,0x54cde50ed2743a42L, + 0x89a34fc5e4d97e67L } }, + /* 49 << 189 */ + { { 0x13f1f5b312e08ce5L,0xa80540b8a7f0b2caL,0x854bcf7701982805L, + 0xb8653ffd233bea04L }, + { 0x8e7b878702b0b4c9L,0x2675261f9acb170aL,0x061a9d90930c14e5L, + 0xb59b30e0def0abeaL } }, + /* 50 << 189 */ + { { 0x1dc19ea60200ec7dL,0xb6f4a3f90bce132bL,0xb8d5de90f13e27e0L, + 0xbaee5ef01fade16fL }, + { 0x6f406aaae4c6cf38L,0xab4cfe06d1369815L,0x0dcffe87efd550c6L, + 0x9d4f59c775ff7d39L } }, + /* 51 << 189 */ + { { 0xb02553b151deb6adL,0x812399a4b1877749L,0xce90f71fca6006e1L, + 0xc32363a6b02b6e77L }, + { 0x02284fbedc36c64dL,0x86c81e31a7e1ae61L,0x2576c7e5b909d94aL, + 0x8b6f7d02818b2bb0L } }, + /* 52 << 189 */ + { { 0xeca3ed0756faa38aL,0xa3790e6c9305bb54L,0xd784eeda7bc73061L, + 0xbd56d3696dd50614L }, + { 0xd6575949229a8aa9L,0xdcca8f474595ec28L,0x814305c106ab4fe6L, + 0xc8c3976824f43f16L } }, + /* 53 << 189 */ + { { 0xe2a45f36523f2b36L,0x995c6493920d93bbL,0xf8afdab790f1632bL, + 0x79ebbecd1c295954L }, + { 0xc7bb3ddb79592f48L,0x67216a7b5f88e998L,0xd91f098bbc01193eL, + 0xf7d928a5b1db83fcL } }, + /* 54 << 189 */ + { { 0x55e38417e991f600L,0x2a91113e2981a934L,0xcbc9d64806b13bdeL, + 0xb011b6ac0755ff44L }, + { 0x6f4cb518045ec613L,0x522d2d31c2f5930aL,0x5acae1af382e65deL, + 0x5764306727bc966fL } }, + /* 55 << 189 */ + { { 0x5e12705d1c7193f0L,0xf0f32f473be8858eL,0x785c3d7d96c6dfc7L, + 0xd75b4a20bf31795dL }, + { 0x91acf17b342659d4L,0xe596ea3444f0378fL,0x4515708fce52129dL, + 0x17387e1e79f2f585L } }, + /* 56 << 189 */ + { { 0x72cfd2e949dee168L,0x1ae052233e2af239L,0x009e75be1d94066aL, + 0x6cca31c738abf413L }, + { 0xb50bd61d9bc49908L,0x4a9b4a8cf5e2bc1eL,0xeb6cc5f7946f83acL, + 0x27da93fcebffab28L } }, + /* 57 << 189 */ + { { 0xea314c964821c8c5L,0x8de49deda83c15f4L,0x7a64cf207af33004L, + 0x45f1bfebc9627e10L }, + { 0x878b062654b9df60L,0x5e4fdc3ca95c0b33L,0xe54a37cac2035d8eL, + 0x9087cda980f20b8cL } }, + /* 58 << 189 */ + { { 0x36f61c238319ade4L,0x766f287ade8cfdf8L,0x48821948346f3705L, + 0x49a7b85316e4f4a2L }, + { 0xb9b3f8a75cedadfdL,0x8f5628158db2a815L,0xc0b7d55401f68f95L, + 0x12971e27688a208eL } }, + /* 59 << 189 */ + { { 0xc9f8b696d0ff34fcL,0x20824de21222718cL,0x7213cf9f0c95284dL, + 0xe2ad741bdc158240L }, + { 0x0ee3a6df54043ccfL,0x16ff479bd84412b3L,0xf6c74ee0dfc98af0L, + 0xa78a169f52fcd2fbL } }, + /* 60 << 189 */ + { { 0xd8ae874699c930e9L,0x1d33e85849e117a5L,0x7581fcb46624759fL, + 0xde50644f5bedc01dL }, + { 0xbeec5d00caf3155eL,0x672d66acbc73e75fL,0x86b9d8c6270b01dbL, + 0xd249ef8350f55b79L } }, + /* 61 << 189 */ + { { 0x6131d6d473978fe3L,0xcc4e4542754b00a1L,0x4e05df0557dfcfe9L, + 0x94b29cdd51ef6bf0L }, + { 0xe4530cff9bc7edf2L,0x8ac236fdd3da65f3L,0x0faf7d5fc8eb0b48L, + 0x4d2de14c660eb039L } }, + /* 62 << 189 */ + { { 0xc006bba760430e54L,0x10a2d0d6da3289abL,0x9c037a5dd7979c59L, + 0x04d1f3d3a116d944L }, + { 0x9ff224738a0983cdL,0x28e25b38c883cabbL,0xe968dba547a58995L, + 0x2c80b505774eebdfL } }, + /* 63 << 189 */ + { { 0xee763b714a953bebL,0x502e223f1642e7f6L,0x6fe4b64161d5e722L, + 0x9d37c5b0dbef5316L }, + { 0x0115ed70f8330bc7L,0x139850e675a72789L,0x27d7faecffceccc2L, + 0x3016a8604fd9f7f6L } }, + /* 64 << 189 */ + { { 0xc492ec644cd8f64cL,0x58a2d790279d7b51L,0x0ced1fc51fc75256L, + 0x3e658aed8f433017L }, + { 0x0b61942e05da59ebL,0xba3d60a30ddc3722L,0x7c311cd1742e7f87L, + 0x6473ffeef6b01b6eL } }, + /* 0 << 196 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 196 */ + { { 0x8303604f692ac542L,0xf079ffe1227b91d3L,0x19f63e6315aaf9bdL, + 0xf99ee565f1f344fbL }, + { 0x8a1d661fd6219199L,0x8c883bc6d48ce41cL,0x1065118f3c74d904L, + 0x713889ee0faf8b1bL } }, + /* 2 << 196 */ + { { 0x972b3f8f81a1b3beL,0x4f3ce145ce2764a0L,0xe2d0f1cc28c4f5f7L, + 0xdeee0c0dc7f3985bL }, + { 0x7df4adc0d39e25c3L,0x40619820c467a080L,0x440ebc9361cf5a58L, + 0x527729a6422ad600L } }, + /* 3 << 196 */ + { { 0xca6c0937b1b76ba6L,0x1a2eab854d2026dcL,0xb1715e1519d9ae0aL, + 0xf1ad9199bac4a026L }, + { 0x35b3dfb807ea7b0eL,0xedf5496f3ed9eb89L,0x8932e5ff2d6d08abL, + 0xf314874e25bd2731L } }, + /* 4 << 196 */ + { { 0xefb26a753f73f449L,0x1d1c94f88d44fc79L,0x49f0fbc53bc0dc4dL, + 0xb747ea0b3698a0d0L }, + { 0x5218c3fe228d291eL,0x35b804b543c129d6L,0xfac859b8d1acc516L, + 0x6c10697d95d6e668L } }, + /* 5 << 196 */ + { { 0xc38e438f0876fd4eL,0x45f0c30783d2f383L,0x203cc2ecb10934cbL, + 0x6a8f24392c9d46eeL }, + { 0xf16b431b65ccde7bL,0x41e2cd1827e76a6fL,0xb9c8cf8f4e3484d7L, + 0x64426efd8315244aL } }, + /* 6 << 196 */ + { { 0x1c0a8e44fc94dea3L,0x34c8cdbfdad6a0b0L,0x919c384004113cefL, + 0xfd32fba415490ffaL }, + { 0x58d190f6795dcfb7L,0xfef01b0383588bafL,0x9e6d1d63ca1fc1c0L, + 0x53173f96f0a41ac9L } }, + /* 7 << 196 */ + { { 0x2b1d402aba16f73bL,0x2fb310148cf9b9fcL,0x2d51e60e446ef7bfL, + 0xc731021bb91e1745L }, + { 0x9d3b47244fee99d4L,0x4bca48b6fac5c1eaL,0x70f5f514bbea9af7L, + 0x751f55a5974c283aL } }, + /* 8 << 196 */ + { { 0x6e30251acb452fdbL,0x31ee696550f30650L,0xb0b3e508933548d9L, + 0xb8949a4ff4b0ef5bL }, + { 0x208b83263c88f3bdL,0xab147c30db1d9989L,0xed6515fd44d4df03L, + 0x17a12f75e72eb0c5L } }, + /* 9 << 196 */ + { { 0x3b59796d36cf69dbL,0x1219eee956670c18L,0xfe3341f77a070d8eL, + 0x9b70130ba327f90cL }, + { 0x36a324620ae18e0eL,0x2021a62346c0a638L,0x251b5817c62eb0d4L, + 0x87bfbcdf4c762293L } }, + /* 10 << 196 */ + { { 0xf78ab505cdd61d64L,0x8c7a53fcc8c18857L,0xa653ce6f16147515L, + 0x9c923aa5ea7d52d5L }, + { 0xc24709cb5c18871fL,0x7d53bec873b3cc74L,0x59264afffdd1d4c4L, + 0x5555917e240da582L } }, + /* 11 << 196 */ + { { 0xcae8bbda548f5a0eL,0x1910eaba3bbfbbe1L,0xae5796857677afc3L, + 0x49ea61f173ff0b5cL }, + { 0x786554784f7c3922L,0x95d337cd20c68eefL,0x68f1e1e5df779ab9L, + 0x14b491b0b5cf69a8L } }, + /* 12 << 196 */ + { { 0x7a6cbbe028e3fe89L,0xe7e1fee4c5aac0ebL,0x7f47eda5697e5140L, + 0x4f450137b454921fL }, + { 0xdb625f8495cd8185L,0x74be0ba1cdb2e583L,0xaee4fd7cdd5e6de4L, + 0x4251437de8101739L } }, + /* 13 << 196 */ + { { 0x686d72a0ac620366L,0x4be3fb9cb6d59344L,0x6e8b44e7a1eb75b9L, + 0x84e39da391a5c10cL }, + { 0x37cc1490b38f0409L,0x029519432c2ade82L,0x9b6887831190a2d8L, + 0x25627d14231182baL } }, + /* 14 << 196 */ + { { 0x6eb550aa658a6d87L,0x1405aaa7cf9c7325L,0xd147142e5c8748c9L, + 0x7f637e4f53ede0e0L }, + { 0xf8ca277614ffad2cL,0xe58fb1bdbafb6791L,0x17158c23bf8f93fcL, + 0x7f15b3730a4a4655L } }, + /* 15 << 196 */ + { { 0x39d4add2d842ca72L,0xa71e43913ed96305L,0x5bb09cbe6700be14L, + 0x68d69d54d8befcf6L }, + { 0xa45f536737183bcfL,0x7152b7bb3370dff7L,0xcf887baabf12525bL, + 0xe7ac7bddd6d1e3cdL } }, + /* 16 << 196 */ + { { 0x25914f7881fdad90L,0xcf638f560d2cf6abL,0xb90bc03fcc054de5L, + 0x932811a718b06350L }, + { 0x2f00b3309bbd11ffL,0x76108a6fb4044974L,0x801bb9e0a851d266L, + 0x0dd099bebf8990c1L } }, + /* 17 << 196 */ + { { 0x58c5aaaaabe32986L,0x0fe9dd2a50d59c27L,0x84951ff48d307305L, + 0x6c23f82986529b78L }, + { 0x50bb22180b136a79L,0x7e2174de77a20996L,0x6f00a4b9c0bb4da6L, + 0x89a25a17efdde8daL } }, + /* 18 << 196 */ + { { 0xf728a27ec11ee01dL,0xf900553ae5f10dfbL,0x189a83c802ec893cL, + 0x3ca5bdc123f66d77L }, + { 0x9878153797eada9fL,0x59c50ab310256230L,0x346042d9323c69b3L, + 0x1b715a6d2c460449L } }, + /* 19 << 196 */ + { { 0xa41dd4766ae06e0bL,0xcdd7888e9d42e25fL,0x0f395f7456b25a20L, + 0xeadfe0ae8700e27eL }, + { 0xb09d52a969950093L,0x3525d9cb327f8d40L,0xb8235a9467df886aL, + 0x77e4b0dd035faec2L } }, + /* 20 << 196 */ + { { 0x115eb20a517d7061L,0x77fe34336c2df683L,0x6870ddc7cdc6fc67L, + 0xb16105880b87de83L }, + { 0x343584cad9c4ddbeL,0xb3164f1c3d754be2L,0x0731ed3ac1e6c894L, + 0x26327dec4f6b904cL } }, + /* 21 << 196 */ + { { 0x9d49c6de97b5cd32L,0x40835daeb5eceecdL,0xc66350edd9ded7feL, + 0x8aeebb5c7a678804L }, + { 0x51d42fb75b8ee9ecL,0xd7a17bdd8e3ca118L,0x40d7511a2ef4400eL, + 0xc48990ac875a66f4L } }, + /* 22 << 196 */ + { { 0x8de07d2a2199e347L,0xbee755562a39e051L,0x56918786916e51dcL, + 0xeb1913134a2d89ecL }, + { 0x6679610d37d341edL,0x434fbb4156d51c2bL,0xe54b7ee7d7492dbaL, + 0xaa33a79a59021493L } }, + /* 23 << 196 */ + { { 0x49fc5054e4bd6d3dL,0x09540f045ab551d0L,0x8acc90854942d3a6L, + 0x231af02f2d28323bL }, + { 0x93458cac0992c163L,0x1fef8e71888e3bb4L,0x27578da5be8c268cL, + 0xcc8be792e805ec00L } }, + /* 24 << 196 */ + { { 0x29267baec61c3855L,0xebff429d58c1fd3bL,0x22d886c08c0b93b8L, + 0xca5e00b22ddb8953L }, + { 0xcf330117c3fed8b7L,0xd49ac6fa819c01f6L,0x6ddaa6bd3c0fbd54L, + 0x917430688049a2cfL } }, + /* 25 << 196 */ + { { 0xd67f981eaff2ef81L,0xc3654d352818ae80L,0x81d050441b2aa892L, + 0x2db067bf3d099328L }, + { 0xe7c79e86703dcc97L,0xe66f9b37e133e215L,0xcdf119a6e39a7a5cL, + 0x47c60de3876f1b61L } }, + /* 26 << 196 */ + { { 0x6e405939d860f1b2L,0x3e9a1dbcf5ed4d4aL,0x3f23619ec9b6bcbdL, + 0x5ee790cf734e4497L }, + { 0xf0a834b15bdaf9bbL,0x02cedda74ca295f0L,0x4619aa2bcb8e378cL, + 0xe5613244cc987ea4L } }, + /* 27 << 196 */ + { { 0x0bc022cc76b23a50L,0x4a2793ad0a6c21ceL,0x3832878089cac3f5L, + 0x29176f1bcba26d56L }, + { 0x062961874f6f59ebL,0x86e9bca98bdc658eL,0x2ca9c4d357e30402L, + 0x5438b216516a09bbL } }, + /* 28 << 196 */ + { { 0x0a6a063c7672765aL,0x37a3ce640547b9bfL,0x42c099c898b1a633L, + 0xb5ab800d05ee6961L }, + { 0xf1963f5911a5acd6L,0xbaee615746201063L,0x36d9a649a596210aL, + 0xaed043631ba7138cL } }, + /* 29 << 196 */ + { { 0xcf817d1ca4a82b76L,0x5586960ef3806be9L,0x7ab67c8909dc6bb5L, + 0x52ace7a0114fe7ebL }, + { 0xcd987618cbbc9b70L,0x4f06fd5a604ca5e1L,0x90af14ca6dbde133L, + 0x1afe4322948a3264L } }, + /* 30 << 196 */ + { { 0xa70d2ca6c44b2c6cL,0xab7267990ef87dfeL,0x310f64dc2e696377L, + 0x49b42e684c8126a0L }, + { 0x0ea444c3cea0b176L,0x53a8ddf7cb269182L,0xf3e674ebbbba9dcbL, + 0x0d2878a8d8669d33L } }, + /* 31 << 196 */ + { { 0x04b935d5d019b6a3L,0xbb5cf88e406f1e46L,0xa1912d165b57c111L, + 0x9803fc2119ebfd78L }, + { 0x4f231c9ec07764a9L,0xd93286eeb75bd055L,0x83a9457d8ee6c9deL, + 0x046959156087ec90L } }, + /* 32 << 196 */ + { { 0x14c6dd8a58d6cd46L,0x9cb633b58e6634d2L,0xc1305047f81bc328L, + 0x12ede0e226a177e5L }, + { 0x332cca62065a6f4fL,0xc3a47ecd67be487bL,0x741eb1870f47ed1cL, + 0x99e66e58e7598b14L } }, + /* 33 << 196 */ + { { 0x6f0544ca63d0ff12L,0xe5efc784b610a05fL,0xf72917b17cad7b47L, + 0x3ff6ea20f2cac0c0L }, + { 0xcc23791bf21db8b7L,0x7dac70b1d7d93565L,0x682cda1d694bdaadL, + 0xeb88bb8c1023516dL } }, + /* 34 << 196 */ + { { 0xc4c634b4dfdbeb1bL,0x22f5ca72b4ee4deaL,0x1045a368e6524821L, + 0xed9e8a3f052b18b2L }, + { 0x9b7f2cb1b961f49aL,0x7fee2ec17b009670L,0x350d875422507a6dL, + 0x561bd7114db55f1dL } }, + /* 35 << 196 */ + { { 0x4c189ccc320bbcafL,0x568434cfdf1de48cL,0x6af1b00e0fa8f128L, + 0xf0ba9d028907583cL }, + { 0x735a400432ff9f60L,0x3dd8e4b6c25dcf33L,0xf2230f1642c74cefL, + 0xd8117623013fa8adL } }, + /* 36 << 196 */ + { { 0x36822876f51fe76eL,0x8a6811cc11d62589L,0xc3fc7e6546225718L, + 0xb7df2c9fc82fdbcdL }, + { 0x3b1d4e52dd7b205bL,0xb695947847a2e414L,0x05e4d793efa91148L, + 0xb47ed446fd2e9675L } }, + /* 37 << 196 */ + { { 0x1a7098b904c9d9bfL,0x661e28811b793048L,0xb1a16966b01ee461L, + 0xbc5213082954746fL }, + { 0xc909a0fc2477de50L,0xd80bb41c7dbd51efL,0xa85be7ec53294905L, + 0x6d465b1883958f97L } }, + /* 38 << 196 */ + { { 0x16f6f330fb6840fdL,0xfaaeb2143401e6c8L,0xaf83d30fccb5b4f8L, + 0x22885739266dec4bL }, + { 0x51b4367c7bc467dfL,0x926562e3d842d27aL,0xdfcb66140fea14a6L, + 0xeb394daef2734cd9L } }, + /* 39 << 196 */ + { { 0x3eeae5d211c0be98L,0xb1e6ed11814e8165L,0x191086bce52bce1cL, + 0x14b74cc6a75a04daL }, + { 0x63cf11868c060985L,0x071047de2dbd7f7cL,0x4e433b8bce0942caL, + 0xecbac447d8fec61dL } }, + /* 40 << 196 */ + { { 0x8f0ed0e2ebf3232fL,0xfff80f9ec52a2eddL,0xad9ab43375b55fdbL, + 0x73ca7820e42e0c11L }, + { 0x6dace0a0e6251b46L,0x89bc6b5c4c0d932dL,0x3438cd77095da19aL, + 0x2f24a9398d48bdfbL } }, + /* 41 << 196 */ + { { 0x99b47e46766561b7L,0x736600e60ed0322aL,0x06a47cb1638e1865L, + 0x927c1c2dcb136000L }, + { 0x295423370cc5df69L,0x99b37c0209d649a9L,0xc5f0043c6aefdb27L, + 0x6cdd99871be95c27L } }, + /* 42 << 196 */ + { { 0x69850931390420d2L,0x299c40ac0983efa4L,0x3a05e778af39aeadL, + 0x8427440843a45193L }, + { 0x6bcd0fb991a711a0L,0x461592c89f52ab17L,0xb49302b4da3c6ed6L, + 0xc51fddc7330d7067L } }, + /* 43 << 196 */ + { { 0x94babeb6da50d531L,0x521b840da6a7b9daL,0x5305151e404bdc89L, + 0x1bcde201d0d07449L }, + { 0xf427a78b3b76a59aL,0xf84841ce07791a1bL,0xebd314bebf91ed1cL, + 0x8e61d34cbf172943L } }, + /* 44 << 196 */ + { { 0x1d5dc4515541b892L,0xb186ee41fc9d9e54L,0x9d9f345ed5bf610dL, + 0x3e7ba65df6acca9fL }, + { 0x9dda787aa8369486L,0x09f9dab78eb5ba53L,0x5afb2033d6481bc3L, + 0x76f4ce30afa62104L } }, + /* 45 << 196 */ + { { 0xa8fa00cff4f066b5L,0x89ab5143461dafc2L,0x44339ed7a3389998L, + 0x2ff862f1bc214903L }, + { 0x2c88f985b05556e3L,0xcd96058e3467081eL,0x7d6a4176edc637eaL, + 0xe1743d0936a5acdcL } }, + /* 46 << 196 */ + { { 0x66fd72e27eb37726L,0xf7fa264e1481a037L,0x9fbd3bde45f4aa79L, + 0xed1e0147767c3e22L }, + { 0x7621f97982e7abe2L,0x19eedc7245f633f8L,0xe69b155e6137bf3aL, + 0xa0ad13ce414ee94eL } }, + /* 47 << 196 */ + { { 0x93e3d5241c0e651aL,0xab1a6e2a02ce227eL,0xe7af17974ab27ecaL, + 0x245446debd444f39L }, + { 0x59e22a2156c07613L,0x43deafcef4275498L,0x10834ccb67fd0946L, + 0xa75841e547406edfL } }, + /* 48 << 196 */ + { { 0xebd6a6777b0ac93dL,0xa6e37b0d78f5e0d7L,0x2516c09676f5492bL, + 0x1e4bf8889ac05f3aL }, + { 0xcdb42ce04df0ba2bL,0x935d5cfd5062341bL,0x8a30333382acac20L, + 0x429438c45198b00eL } }, + /* 49 << 196 */ + { { 0x1d083bc9049d33faL,0x58b82dda946f67ffL,0xac3e2db867a1d6a3L, + 0x62e6bead1798aac8L }, + { 0xfc85980fde46c58cL,0xa7f6937969c8d7beL,0x23557927837b35ecL, + 0x06a933d8e0790c0cL } }, + /* 50 << 196 */ + { { 0x827c0e9b077ff55dL,0x53977798bb26e680L,0x595308741d9cb54fL, + 0xcca3f4494aac53efL }, + { 0x11dc5c87a07eda0fL,0xc138bccffd6400c8L,0x549680d313e5da72L, + 0xc93eed824540617eL } }, + /* 51 << 196 */ + { { 0xfd3db1574d0b75c0L,0x9716eb426386075bL,0x0639605c817b2c16L, + 0x09915109f1e4f201L }, + { 0x35c9a9285cca6c3bL,0xb25f7d1a3505c900L,0xeb9f7d20630480c4L, + 0xc3c7b8c62a1a501cL } }, + /* 52 << 196 */ + { { 0x3f99183c5a1f8e24L,0xfdb118fa9dd255f0L,0xb9b18b90c27f62a6L, + 0xe8f732f7396ec191L }, + { 0x524a2d910be786abL,0x5d32adef0ac5a0f5L,0x9b53d4d69725f694L, + 0x032a76c60510ba89L } }, + /* 53 << 196 */ + { { 0x840391a3ebeb1544L,0x44b7b88c3ed73ac3L,0xd24bae7a256cb8b3L, + 0x7ceb151ae394cb12L }, + { 0xbd6b66d05bc1e6a8L,0xec70cecb090f07bfL,0x270644ed7d937589L, + 0xee9e1a3d5f1dccfeL } }, + /* 54 << 196 */ + { { 0xb0d40a84745b98d2L,0xda429a212556ed40L,0xf676eced85148cb9L, + 0x5a22d40cded18936L }, + { 0x3bc4b9e570e8a4ceL,0xbfd1445b9eae0379L,0xf23f2c0c1a0bd47eL, + 0xa9c0bb31e1845531L } }, + /* 55 << 196 */ + { { 0x9ddc4d600a4c3f6bL,0xbdfaad792c15ef44L,0xce55a2367f484accL, + 0x08653ca7055b1f15L }, + { 0x2efa8724538873a3L,0x09299e5dace1c7e7L,0x07afab66ade332baL, + 0x9be1fdf692dd71b7L } }, + /* 56 << 196 */ + { { 0xa49b5d595758b11cL,0x0b852893c8654f40L,0xb63ef6f452379447L, + 0xd4957d29105e690cL }, + { 0x7d484363646559b0L,0xf4a8273c49788a8eL,0xee406cb834ce54a9L, + 0x1e1c260ff86fda9bL } }, + /* 57 << 196 */ + { { 0xe150e228cf6a4a81L,0x1fa3b6a31b488772L,0x1e6ff110c5a9c15bL, + 0xc6133b918ad6aa47L }, + { 0x8ac5d55c9dffa978L,0xba1d1c1d5f3965f2L,0xf969f4e07732b52fL, + 0xfceecdb5a5172a07L } }, + /* 58 << 196 */ + { { 0xb0120a5f10f2b8f5L,0xc83a6cdf5c4c2f63L,0x4d47a491f8f9c213L, + 0xd9e1cce5d3f1bbd5L }, + { 0x0d91bc7caba7e372L,0xfcdc74c8dfd1a2dbL,0x05efa800374618e5L, + 0x1121696915a7925eL } }, + /* 59 << 196 */ + { { 0xd4c89823f6021c5dL,0x880d5e84eff14423L,0x6523bc5a6dcd1396L, + 0xd1acfdfc113c978bL }, + { 0xb0c164e8bbb66840L,0xf7f4301e72b58459L,0xc29ad4a6a638e8ecL, + 0xf5ab896146b78699L } }, + /* 60 << 196 */ + { { 0x9dbd79740e954750L,0x0121de8864f9d2c6L,0x2e597b42d985232eL, + 0x55b6c3c553451777L }, + { 0xbb53e547519cb9fbL,0xf134019f8428600dL,0x5a473176e081791aL, + 0x2f3e226335fb0c08L } }, + /* 61 << 196 */ + { { 0xb28c301773d273b0L,0xccd210767721ef9aL,0x054cc292b650dc39L, + 0x662246de6188045eL }, + { 0x904b52fa6b83c0d1L,0xa72df26797e9cd46L,0x886b43cd899725e4L, + 0x2b651688d849ff22L } }, + /* 62 << 196 */ + { { 0x60479b7902f34533L,0x5e354c140c77c148L,0xb4bb7581a8537c78L, + 0x188043d7efe1495fL }, + { 0x9ba12f428c1d5026L,0x2e0c8a2693d4aaabL,0xbdba7b8baa57c450L, + 0x140c9ad69bbdafefL } }, + /* 63 << 196 */ + { { 0x2067aa4225ac0f18L,0xf7b1295b04d1fbf3L,0x14829111a4b04824L, + 0x2ce3f19233bd5e91L }, + { 0x9c7a1d558f2e1b72L,0xfe932286302aa243L,0x497ca7b4d4be9554L, + 0xb8e821b8e0547a6eL } }, + /* 64 << 196 */ + { { 0xfb2838be67e573e0L,0x05891db94084c44bL,0x9131137396c1c2c5L, + 0x6aebfa3fd958444bL }, + { 0xac9cdce9e56e55c1L,0x7148ced32caa46d0L,0x2e10c7efb61fe8ebL, + 0x9fd835daff97cf4dL } }, + /* 0 << 203 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 203 */ + { { 0xa36da109081e9387L,0xfb9780d78c935828L,0xd5940332e540b015L, + 0xc9d7b51be0f466faL }, + { 0xfaadcd41d6d9f671L,0xba6c1e28b1a2ac17L,0x066a7833ed201e5fL, + 0x19d99719f90f462bL } }, + /* 2 << 203 */ + { { 0xf431f462060b5f61L,0xa56f46b47bd057c2L,0x348dca6c47e1bf65L, + 0x9a38783e41bcf1ffL }, + { 0x7a5d33a9da710718L,0x5a7799872e0aeaf6L,0xca87314d2d29d187L, + 0xfa0edc3ec687d733L } }, + /* 3 << 203 */ + { { 0x9df336216a31e09bL,0xde89e44dc1350e35L,0x292148714ca0cf52L, + 0xdf3796720b88a538L }, + { 0xc92a510a2591d61bL,0x79aa87d7585b447bL,0xf67db604e5287f77L, + 0x1697c8bf5efe7a80L } }, + /* 4 << 203 */ + { { 0x1c894849cb198ac7L,0xa884a93d0f264665L,0x2da964ef9b200678L, + 0x3c351b87009834e6L }, + { 0xafb2ef9fe2c4b44bL,0x580f6c473326790cL,0xb84805210b02264aL, + 0x8ba6f9e242a194e2L } }, + /* 5 << 203 */ + { { 0xfc87975f8fb54738L,0x3516078827c3ead3L,0x834116d2b74a085aL, + 0x53c99a73a62fe996L }, + { 0x87585be05b81c51bL,0x925bafa8be0852b7L,0x76a4fafda84d19a7L, + 0x39a45982585206d4L } }, + /* 6 << 203 */ + { { 0x499b6ab65eb03c0eL,0xf19b795472bc3fdeL,0xa86b5b9c6e3a80d2L, + 0xe43775086d42819fL }, + { 0xc1663650bb3ee8a3L,0x75eb14fcb132075fL,0xa8ccc9067ad834f6L, + 0xea6a2474e6e92ffdL } }, + /* 7 << 203 */ + { { 0x9d72fd950f8d6758L,0xcb84e101408c07ddL,0xb9114bfda5e23221L, + 0x358b5fe2e94e742cL }, + { 0x1c0577ec95f40e75L,0xf01554513d73f3d6L,0x9d55cd67bd1b9b66L, + 0x63e86e78af8d63c7L } }, + /* 8 << 203 */ + { { 0x39d934abd3c095f1L,0x04b261bee4b76d71L,0x1d2e6970e73e6984L, + 0x879fb23b5e5fcb11L }, + { 0x11506c72dfd75490L,0x3a97d08561bcf1c1L,0x43201d82bf5e7007L, + 0x7f0ac52f798232a7L } }, + /* 9 << 203 */ + { { 0x2715cbc46eb564d4L,0x8d6c752c9e570e29L,0xf80247c89ef5fd5dL, + 0xc3c66b46d53eb514L }, + { 0x9666b4010f87de56L,0xce62c06fc6c603b5L,0xae7b4c607e4fc942L, + 0x38ac0b77663a9c19L } }, + /* 10 << 203 */ + { { 0xcb4d20ee4b049136L,0x8b63bf12356a4613L,0x1221aef670e08128L, + 0xe62d8c514acb6b16L }, + { 0x71f64a67379e7896L,0xb25237a2cafd7fa5L,0xf077bd983841ba6aL, + 0xc4ac02443cd16e7eL } }, + /* 11 << 203 */ + { { 0x548ba86921fea4caL,0xd36d0817f3dfdac1L,0x09d8d71ff4685fafL, + 0x8eff66bec52c459aL }, + { 0x182faee70b57235eL,0xee3c39b10106712bL,0x5107331fc0fcdcb0L, + 0x669fb9dca51054baL } }, + /* 12 << 203 */ + { { 0xb25101fb319d7682L,0xb02931290a982feeL,0x51c1c9b90261b344L, + 0x0e008c5bbfd371faL }, + { 0xd866dd1c0278ca33L,0x666f76a6e5aa53b1L,0xe5cfb7796013a2cfL, + 0x1d3a1aada3521836L } }, + /* 13 << 203 */ + { { 0xcedd253173faa485L,0xc8ee6c4fc0a76878L,0xddbccfc92a11667dL, + 0x1a418ea91c2f695aL }, + { 0xdb11bd9251f73971L,0x3e4b3c82da2ed89fL,0x9a44f3f4e73e0319L, + 0xd1e3de0f303431afL } }, + /* 14 << 203 */ + { { 0x3c5604ff50f75f9cL,0x1d8eddf37e752b22L,0x0ef074dd3c9a1118L, + 0xd0ffc172ccb86d7bL }, + { 0xabd1ece3037d90f2L,0xe3f307d66055856cL,0x422f93287e4c6dafL, + 0x902aac66334879a0L } }, + /* 15 << 203 */ + { { 0xb6a1e7bf94cdfadeL,0x6c97e1ed7fc6d634L,0x662ad24da2fb63f8L, + 0xf81be1b9a5928405L }, + { 0x86d765e4d14b4206L,0xbecc2e0e8fa0db65L,0xa28838e0b17fc76cL, + 0xe49a602ae37cf24eL } }, + /* 16 << 203 */ + { { 0x76b4131a567193ecL,0xaf3c305ae5f6e70bL,0x9587bd39031eebddL, + 0x5709def871bbe831L }, + { 0x570599830eb2b669L,0x4d80ce1b875b7029L,0x838a7da80364ac16L, + 0x2f431d23be1c83abL } }, + /* 17 << 203 */ + { { 0xe56812a6f9294dd3L,0xb448d01f9b4b0d77L,0xf3ae606104e8305cL, + 0x2bead64594d8c63eL }, + { 0x0a85434d84fd8b07L,0x537b983ff7a9dee5L,0xedcc5f18ef55bd85L, + 0x2041af6221c6cf8bL } }, + /* 18 << 203 */ + { { 0x8e52874cb940c71eL,0x211935a9db5f4b3aL,0x94350492301b1dc3L, + 0x33d2646d29958620L }, + { 0x16b0d64bef911404L,0x9d1f25ea9a3c5ef4L,0x20f200eb4a352c78L, + 0x43929f2c4bd0b428L } }, + /* 19 << 203 */ + { { 0xa5656667c7196e29L,0x7992c2f09391be48L,0xaaa97cbd9ee0cd6eL, + 0x51b0310c3dc8c9bfL }, + { 0x237f8acfdd9f22cbL,0xbb1d81a1b585d584L,0x8d5d85f58c416388L, + 0x0d6e5a5a42fe474fL } }, + /* 20 << 203 */ + { { 0xe781276638235d4eL,0x1c62bd67496e3298L,0x8378660c3f175bc8L, + 0x4d04e18917afdd4dL }, + { 0x32a8160185a8068cL,0xdb58e4e192b29a85L,0xe8a65b86c70d8a3bL, + 0x5f0e6f4e98a0403bL } }, + /* 21 << 203 */ + { { 0x0812968469ed2370L,0x34dc30bd0871ee26L,0x3a5ce9487c9c5b05L, + 0x7d487b8043a90c87L }, + { 0x4089ba37dd0e7179L,0x45f80191b4041811L,0x1c3e105898747ba5L, + 0x98c4e13a6e1ae592L } }, + /* 22 << 203 */ + { { 0xd44636e6e82c9f9eL,0x711db87cc33a1043L,0x6f431263aa8aec05L, + 0x43ff120d2744a4aaL }, + { 0xd3bd892fae77779bL,0xf0fe0cc98cdc9f82L,0xca5f7fe6f1c5b1bcL, + 0xcc63a68244929a72L } }, + /* 23 << 203 */ + { { 0xc7eaba0c09dbe19aL,0x2f3585ad6b5c73c2L,0x8ab8924b0ae50c30L, + 0x17fcd27a638b30baL }, + { 0xaf414d3410b3d5a5L,0x09c107d22a9accf1L,0x15dac49f946a6242L, + 0xaec3df2ad707d642L } }, + /* 24 << 203 */ + { { 0x2c2492b73f894ae0L,0xf59df3e5b75f18ceL,0x7cb740d28f53cad0L, + 0x3eb585fbc4f01294L }, + { 0x17da0c8632c7f717L,0xeb8c795baf943f4cL,0x4ee23fb5f67c51d2L, + 0xef18757568889949L } }, + /* 25 << 203 */ + { { 0xa6b4bdb20389168bL,0xc4ecd258ea577d03L,0x3a63782b55743082L, + 0x6f678f4cc72f08cdL }, + { 0x553511cf65e58dd8L,0xd53b4e3ed402c0cdL,0x37de3e29a037c14cL, + 0x86b6c516c05712aaL } }, + /* 26 << 203 */ + { { 0x2834da3eb38dff6fL,0xbe012c52ea636be8L,0x292d238c61dd37f8L, + 0x0e54523f8f8142dbL }, + { 0xe31eb436036a05d8L,0x83e3cdff1e93c0ffL,0x3fd2fe0f50821ddfL, + 0xc8e19b0dff9eb33bL } }, + /* 27 << 203 */ + { { 0xc8cc943fb569a5feL,0xad0090d4d4342d75L,0x82090b4bcaeca000L, + 0xca39687f1bd410ebL }, + { 0xe7bb0df765959d77L,0x39d782189c964999L,0xd87f62e8b2415451L, + 0xe5efb774bed76108L } }, + /* 28 << 203 */ + { { 0x3ea011a4e822f0d0L,0xbc647ad15a8704f8L,0xbb315b3550c6820fL, + 0x863dec3db7e76becL }, + { 0x01ff5d3af017bfc7L,0x20054439976b8229L,0x067fca370bbd0d3bL, + 0xf63dde647f5e3d0fL } }, + /* 29 << 203 */ + { { 0x22dbefb32a4c94e9L,0xafbff0fe96f8278aL,0x80aea0b13503793dL, + 0xb22380295f06cd29L }, + { 0x65703e578ec3fecaL,0x06c38314393e7053L,0xa0b751eb7c6734c4L, + 0xd2e8a435c59f0f1eL } }, + /* 30 << 203 */ + { { 0x147d90525e9ca895L,0x2f4dd31e972072dfL,0xa16fda8ee6c6755cL, + 0xc66826ffcf196558L }, + { 0x1f1a76a30cf43895L,0xa9d604e083c3097bL,0xe190830966390e0eL, + 0xa50bf753b3c85effL } }, + /* 31 << 203 */ + { { 0x0696bddef6a70251L,0x548b801b3c6ab16aL,0x37fcf704a4d08762L, + 0x090b3defdff76c4eL }, + { 0x87e8cb8969cb9158L,0x44a90744995ece43L,0xf85395f40ad9fbf5L, + 0x49b0f6c54fb0c82dL } }, + /* 32 << 203 */ + { { 0x75d9bc15adf7cccfL,0x81a3e5d6dfa1e1b0L,0x8c39e444249bc17eL, + 0xf37dccb28ea7fd43L }, + { 0xda654873907fba12L,0x35daa6da4a372904L,0x0564cfc66283a6c5L, + 0xd09fa4f64a9395bfL } }, + /* 33 << 203 */ + { { 0x688e9ec9aeb19a36L,0xd913f1cec7bfbfb4L,0x797b9a3c61c2faa6L, + 0x2f979bec6a0a9c12L }, + { 0xb5969d0f359679ecL,0xebcf523d079b0460L,0xfd6b000810fab870L, + 0x3f2edcda9373a39cL } }, + /* 34 << 203 */ + { { 0x0d64f9a76f568431L,0xf848c27c02f8898cL,0xf418ade1260b5bd5L, + 0xc1f3e3236973dee8L }, + { 0x46e9319c26c185ddL,0x6d85b7d8546f0ac4L,0x427965f2247f9d57L, + 0xb519b636b0035f48L } }, + /* 35 << 203 */ + { { 0x6b6163a9ab87d59cL,0xff9f58c339caaa11L,0x4ac39cde3177387bL, + 0x5f6557c2873e77f9L }, + { 0x6750400636a83041L,0x9b1c96ca75ef196cL,0xf34283deb08c7940L, + 0x7ea096441128c316L } }, + /* 36 << 203 */ + { { 0xb510b3b56aa39dffL,0x59b43da29f8e4d8cL,0xa8ce31fd9e4c4b9fL, + 0x0e20be26c1303c01L }, + { 0x18187182e8ee47c9L,0xd9687cdb7db98101L,0x7a520e4da1e14ff6L, + 0x429808ba8836d572L } }, + /* 37 << 203 */ + { { 0xa37ca60d4944b663L,0xf901f7a9a3f91ae5L,0xe4e3e76e9e36e3b1L, + 0x9aa219cf29d93250L }, + { 0x347fe275056a2512L,0xa4d643d9de65d95cL,0x9669d396699fc3edL, + 0xb598dee2cf8c6bbeL } }, + /* 38 << 203 */ + { { 0x682ac1e5dda9e5c6L,0x4e0d3c72caa9fc95L,0x17faaade772bea44L, + 0x5ef8428cab0009c8L }, + { 0xcc4ce47a460ff016L,0xda6d12bf725281cbL,0x44c678480223aad2L, + 0x6e342afa36256e28L } }, + /* 39 << 203 */ + { { 0x1400bb0b93a37c04L,0x62b1bc9bdd10bd96L,0x7251adeb0dac46b7L, + 0x7d33b92e7be4ef51L }, + { 0x28b2a94be61fa29aL,0x4b2be13f06422233L,0x36d6d062330d8d37L, + 0x5ef80e1eb28ca005L } }, + /* 40 << 203 */ + { { 0x174d46996d16768eL,0x9fc4ff6a628bf217L,0x77705a94154e490dL, + 0x9d96dd288d2d997aL }, + { 0x77e2d9d8ce5d72c4L,0x9d06c5a4c11c714fL,0x02aa513679e4a03eL, + 0x1386b3c2030ff28bL } }, + /* 41 << 203 */ + { { 0xfe82e8a6fb283f61L,0x7df203e5f3abc3fbL,0xeec7c3513a4d3622L, + 0xf7d17dbfdf762761L }, + { 0xc3956e44522055f0L,0xde3012db8fa748dbL,0xca9fcb63bf1dcc14L, + 0xa56d9dcfbe4e2f3aL } }, + /* 42 << 203 */ + { { 0xb86186b68bcec9c2L,0x7cf24df9680b9f06L,0xc46b45eac0d29281L, + 0xfff42bc507b10e12L }, + { 0x12263c404d289427L,0x3d5f1899b4848ec4L,0x11f97010d040800cL, + 0xb4c5f529300feb20L } }, + /* 43 << 203 */ + { { 0xcc543f8fde94fdcbL,0xe96af739c7c2f05eL,0xaa5e0036882692e1L, + 0x09c75b68950d4ae9L }, + { 0x62f63df2b5932a7aL,0x2658252ede0979adL,0x2a19343fb5e69631L, + 0x718c7501525b666bL } }, + /* 44 << 203 */ + { { 0x26a42d69ea40dc3aL,0xdc84ad22aecc018fL,0x25c36c7b3270f04aL, + 0x46ba6d4750fa72edL }, + { 0x6c37d1c593e58a8eL,0xa2394731120c088cL,0xc3be4263cb6e86daL, + 0x2c417d367126d038L } }, + /* 45 << 203 */ + { { 0x5b70f9c58b6f8efaL,0x671a2faa37718536L,0xd3ced3c6b539c92bL, + 0xe56f1bd9a31203c2L }, + { 0x8b096ec49ff3c8ebL,0x2deae43243491ceaL,0x2465c6eb17943794L, + 0x5d267e6620586843L } }, + /* 46 << 203 */ + { { 0x9d3d116db07159d0L,0xae07a67fc1896210L,0x8fc84d87bb961579L, + 0x30009e491c1f8dd6L }, + { 0x8a8caf22e3132819L,0xcffa197cf23ab4ffL,0x58103a44205dd687L, + 0x57b796c30ded67a2L } }, + /* 47 << 203 */ + { { 0x0b9c3a6ca1779ad7L,0xa33cfe2e357c09c5L,0x2ea293153db4a57eL, + 0x919596958ebeb52eL }, + { 0x118db9a6e546c879L,0x8e996df46295c8d6L,0xdd99048455ec806bL, + 0x24f291ca165c1035L } }, + /* 48 << 203 */ + { { 0xcca523bb440e2229L,0x324673a273ef4d04L,0xaf3adf343e11ec39L, + 0x6136d7f1dc5968d3L }, + { 0x7a7b2899b053a927L,0x3eaa2661ae067ecdL,0x8549b9c802779cd9L, + 0x061d7940c53385eaL } }, + /* 49 << 203 */ + { { 0x3e0ba883f06d18bdL,0x4ba6de53b2700843L,0xb966b668591a9e4dL, + 0x93f675677f4fa0edL }, + { 0x5a02711b4347237bL,0xbc041e2fe794608eL,0x55af10f570f73d8cL, + 0xd2d4d4f7bb7564f7L } }, + /* 50 << 203 */ + { { 0xd7d27a89b3e93ce7L,0xf7b5a8755d3a2c1bL,0xb29e68a0255b218aL, + 0xb533837e8af76754L }, + { 0xd1b05a73579fab2eL,0xb41055a1ecd74385L,0xb2369274445e9115L, + 0x2972a7c4f520274eL } }, + /* 51 << 203 */ + { { 0x6c08334ef678e68aL,0x4e4160f099b057edL,0x3cfe11b852ccb69aL, + 0x2fd1823a21c8f772L }, + { 0xdf7f072f3298f055L,0x8c0566f9fec74a6eL,0xe549e0195bb4d041L, + 0x7c3930ba9208d850L } }, + /* 52 << 203 */ + { { 0xe07141fcaaa2902bL,0x539ad799e4f69ad3L,0xa6453f94813f9ffdL, + 0xc58d3c48375bc2f7L }, + { 0xb3326fad5dc64e96L,0x3aafcaa9b240e354L,0x1d1b0903aca1e7a9L, + 0x4ceb97671211b8a0L } }, + /* 53 << 203 */ + { { 0xeca83e49e32a858eL,0x4c32892eae907badL,0xd5b42ab62eb9b494L, + 0x7fde3ee21eabae1bL }, + { 0x13b5ab09caf54957L,0xbfb028bee5f5d5d5L,0x928a06502003e2c0L, + 0x90793aac67476843L } }, + /* 54 << 203 */ + { { 0x5e942e79c81710a0L,0x557e4a3627ccadd4L,0x72a2bc564bcf6d0cL, + 0x09ee5f4326d7b80cL }, + { 0x6b70dbe9d4292f19L,0x56f74c2663f16b18L,0xc23db0f735fbb42aL, + 0xb606bdf66ae10040L } }, + /* 55 << 203 */ + { { 0x1eb15d4d044573acL,0x7dc3cf86556b0ba4L,0x97af9a33c60df6f7L, + 0x0b1ef85ca716ce8cL }, + { 0x2922f884c96958beL,0x7c32fa9435690963L,0x2d7f667ceaa00061L, + 0xeaaf7c173547365cL } }, + /* 56 << 203 */ + { { 0x1eb4de4687032d58L,0xc54f3d835e2c79e0L,0x07818df45d04ef23L, + 0x55faa9c8673d41b4L }, + { 0xced64f6f89b95355L,0x4860d2eab7415c84L,0x5fdb9bd2050ebad3L, + 0xdb53e0cc6685a5bfL } }, + /* 57 << 203 */ + { { 0xb830c0319feb6593L,0xdd87f3106accff17L,0x2303ebab9f555c10L, + 0x94603695287e7065L }, + { 0xf88311c32e83358cL,0x508dd9b4eefb0178L,0x7ca237062dba8652L, + 0x62aac5a30047abe5L } }, + /* 58 << 203 */ + { { 0x9a61d2a08b1ea7b3L,0xd495ab63ae8b1485L,0x38740f8487052f99L, + 0x178ebe5bb2974eeaL }, + { 0x030bbcca5b36d17fL,0xb5e4cce3aaf86eeaL,0xb51a022068f8e9e0L, + 0xa434879609eb3e75L } }, + /* 59 << 203 */ + { { 0xbe592309eef1a752L,0x5d7162d76f2aa1edL,0xaebfb5ed0f007dd2L, + 0x255e14b2c89edd22L }, + { 0xba85e0720303b697L,0xc5d17e25f05720ffL,0x02b58d6e5128ebb6L, + 0x2c80242dd754e113L } }, + /* 60 << 203 */ + { { 0x919fca5fabfae1caL,0x937afaac1a21459bL,0x9e0ca91c1f66a4d2L, + 0x194cc7f323ec1331L }, + { 0xad25143a8aa11690L,0xbe40ad8d09b59e08L,0x37d60d9be750860aL, + 0x6c53b008c6bf434cL } }, + /* 61 << 203 */ + { { 0xb572415d1356eb80L,0xb8bf9da39578ded8L,0x22658e365e8fb38bL, + 0x9b70ce225af8cb22L }, + { 0x7c00018a829a8180L,0x84329f93b81ed295L,0x7c343ea25f3cea83L, + 0x38f8655f67586536L } }, + /* 62 << 203 */ + { { 0xa661a0d01d3ec517L,0x98744652512321aeL,0x084ca591eca92598L, + 0xa9bb9dc91dcb3febL }, + { 0x14c5435578b4c240L,0x5ed62a3b610cafdcL,0x07512f371b38846bL, + 0x571bb70ab0e38161L } }, + /* 63 << 203 */ + { { 0xb556b95b2da705d2L,0x3ef8ada6b1a08f98L,0x85302ca7ddecfbe5L, + 0x0e530573943105cdL }, + { 0x60554d5521a9255dL,0x63a32fa1f2f3802aL,0x35c8c5b0cd477875L, + 0x97f458ea6ad42da1L } }, + /* 64 << 203 */ + { { 0x832d7080eb6b242dL,0xd30bd0233b71e246L,0x7027991bbe31139dL, + 0x68797e91462e4e53L }, + { 0x423fe20a6b4e185aL,0x82f2c67e42d9b707L,0x25c817684cf7811bL, + 0xbd53005e045bb95dL } }, + /* 0 << 210 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 210 */ + { { 0xe5f649be9d8e68fdL,0xdb0f05331b044320L,0xf6fde9b3e0c33398L, + 0x92f4209b66c8cfaeL }, + { 0xe9d1afcc1a739d4bL,0x09aea75fa28ab8deL,0x14375fb5eac6f1d0L, + 0x6420b560708f7aa5L } }, + /* 2 << 210 */ + { { 0x9eae499c6254dc41L,0x7e2939247a837e7eL,0x74aec08c090524a7L, + 0xf82b92198d6f55f2L }, + { 0x493c962e1402cec5L,0x9f17ca17fa2f30e7L,0xbcd783e8e9b879cbL, + 0xea3d8c145a6f145fL } }, + /* 3 << 210 */ + { { 0xdede15e75e0dee6eL,0x74f24872dc628aa2L,0xd3e9c4fe7861bb93L, + 0x56d4822a6187b2e0L }, + { 0xb66417cfc59826f9L,0xca2609692408169eL,0xedf69d06c79ef885L, + 0x00031f8adc7d138fL } }, + /* 4 << 210 */ + { { 0x103c46e60ebcf726L,0x4482b8316231470eL,0x6f6dfaca487c2109L, + 0x2e0ace9762e666efL }, + { 0x3246a9d31f8d1f42L,0x1b1e83f1574944d2L,0x13dfa63aa57f334bL, + 0x0cf8daed9f025d81L } }, + /* 5 << 210 */ + { { 0x30d78ea800ee11c1L,0xeb053cd4b5e3dd75L,0x9b65b13ed58c43c5L, + 0xc3ad49bdbd151663L }, + { 0x99fd8e41b6427990L,0x12cf15bd707eae1eL,0x29ad4f1b1aabb71eL, + 0x5143e74d07545d0eL } }, + /* 6 << 210 */ + { { 0x30266336c88bdee1L,0x25f293065876767cL,0x9c078571c6731996L, + 0xc88690b2ed552951L }, + { 0x274f2c2d852705b4L,0xb0bf8d444e09552dL,0x7628beeb986575d1L, + 0x407be2387f864651L } }, + /* 7 << 210 */ + { { 0x0e5e3049a639fc6bL,0xe75c35d986003625L,0x0cf35bd85dcc1646L, + 0x8bcaced26c26273aL }, + { 0xe22ecf1db5536742L,0x013dd8971a9e068bL,0x17f411cb8a7909c5L, + 0x5757ac98861dd506L } }, + /* 8 << 210 */ + { { 0x85de1f0d1e935abbL,0xdefd10b4154de37aL,0xb8d9e392369cebb5L, + 0x54d5ef9b761324beL }, + { 0x4d6341ba74f17e26L,0xc0a0e3c878c1dde4L,0xa6d7758187d918fdL, + 0x6687601502ca3a13L } }, + /* 9 << 210 */ + { { 0xc7313e9cf36658f0L,0xc433ef1c71f8057eL,0x853262461b6a835aL, + 0xc8f053987c86394cL }, + { 0xff398cdfe983c4a1L,0xbf5e816203b7b931L,0x93193c46b7b9045bL, + 0x1e4ebf5da4a6e46bL } }, + /* 10 << 210 */ + { { 0xf9942a6043a24fe7L,0x29c1191effb3492bL,0x9f662449902fde05L, + 0xc792a7ac6713c32dL }, + { 0x2fd88ad8b737982cL,0x7e3a0319a21e60e3L,0x09b0de447383591aL, + 0x6df141ee8310a456L } }, + /* 11 << 210 */ + { { 0xaec1a039e6d6f471L,0x14b2ba0f1198d12eL,0xebc1a1603aeee5acL, + 0x401f4836e0b964ceL }, + { 0x2ee437964fd03f66L,0x3fdb4e49dd8f3f12L,0x6ef267f629380f18L, + 0x3e8e96708da64d16L } }, + /* 12 << 210 */ + { { 0xbc19180c207674f1L,0x112e09a733ae8fdbL,0x996675546aaeb71eL, + 0x79432af1e101b1c7L }, + { 0xd5eb558fde2ddec6L,0x81392d1f5357753fL,0xa7a76b973ae1158aL, + 0x416fbbff4a899991L } }, + /* 13 << 210 */ + { { 0x9e65fdfd0d4a9dcfL,0x7bc29e48944ddf12L,0xbc1a92d93c856866L, + 0x273c69056e98dfe2L }, + { 0x69fce418cdfaa6b8L,0x606bd8235061c69fL,0x42d495a06af75e27L, + 0x8ed3d5056d873a1fL } }, + /* 14 << 210 */ + { { 0xaf5528416ab25b6aL,0xc6c0ffc72b1a4523L,0xab18827b21c99e03L, + 0x060e86489034691bL }, + { 0x5207f90f93c7f398L,0x9f4a96cb82f8d10bL,0xdd71cd793ad0f9e3L, + 0x84f435d2fc3a54f5L } }, + /* 15 << 210 */ + { { 0x4b03c55b8e33787fL,0xef42f975a6384673L,0xff7304f75051b9f0L, + 0x18aca1dc741c87c2L }, + { 0x56f120a72d4bfe80L,0xfd823b3d053e732cL,0x11bccfe47537ca16L, + 0xdf6c9c741b5a996bL } }, + /* 16 << 210 */ + { { 0xee7332c7904fc3faL,0x14a23f45c7e3636aL,0xc38659c3f091d9aaL, + 0x4a995e5db12d8540L }, + { 0x20a53becf3a5598aL,0x56534b17b1eaa995L,0x9ed3dca4bf04e03cL, + 0x716c563ad8d56268L } }, + /* 17 << 210 */ + { { 0x27ba77a41d6178e7L,0xe4c80c4068a1ff8eL,0x750110990a13f63dL, + 0x7bf33521a61d46f3L }, + { 0x0aff218e10b365bbL,0x810218040fd7ea75L,0x05a3fd8aa4b3a925L, + 0xb829e75f9b3db4e6L } }, + /* 18 << 210 */ + { { 0x6bdc75a54d53e5fbL,0x04a5dc02d52717e3L,0x86af502fe9a42ec2L, + 0x8867e8fb2630e382L }, + { 0xbf845c6ebec9889bL,0x54f491f2cb47c98dL,0xa3091fba790c2a12L, + 0xd7f6fd78c20f708bL } }, + /* 19 << 210 */ + { { 0xa569ac30acde5e17L,0xd0f996d06852b4d7L,0xe51d4bb54609ae54L, + 0x3fa37d170daed061L }, + { 0x62a8868434b8fb41L,0x99a2acbd9efb64f1L,0xb75c1a5e6448e1f2L, + 0xfa99951a42b5a069L } }, + /* 20 << 210 */ + { { 0x6d956e892f3b26e7L,0xf4709860da875247L,0x3ad151792482dda3L, + 0xd64110e3017d82f0L }, + { 0x14928d2cfad414e4L,0x2b155f582ed02b24L,0x481a141bcb821bf1L, + 0x12e3c7704f81f5daL } }, + /* 21 << 210 */ + { { 0xe49c5de59fff8381L,0x110532325bbec894L,0xa0d051cc454d88c4L, + 0x4f6db89c1f8e531bL }, + { 0x34fe3fd6ca563a44L,0x7f5c221558da8ab9L,0x8445016d9474f0a1L, + 0x17d34d61cb7d8a0aL } }, + /* 22 << 210 */ + { { 0x8e9d39101c474019L,0xcaff2629d52ceefbL,0xf9cf3e32c1622c2bL, + 0xd4b95e3ce9071a05L }, + { 0xfbbca61f1594438cL,0x1eb6e6a604aadedfL,0x853027f468e14940L, + 0x221d322adfabda9cL } }, + /* 23 << 210 */ + { { 0xed8ea9f6b7cb179aL,0xdc7b764db7934dccL,0xfcb139405e09180dL, + 0x6629a6bfb47dc2ddL }, + { 0xbfc55e4e9f5a915eL,0xb1db9d376204441eL,0xf82d68cf930c5f53L, + 0x17d3a142cbb605b1L } }, + /* 24 << 210 */ + { { 0xdd5944ea308780f2L,0xdc8de7613845f5e4L,0x6beaba7d7624d7a3L, + 0x1e709afd304df11eL }, + { 0x9536437602170456L,0xbf204b3ac8f94b64L,0x4e53af7c5680ca68L, + 0x0526074ae0c67574L } }, + /* 25 << 210 */ + { { 0x95d8cef8ecd92af6L,0xe6b9fa7a6cd1745aL,0x3d546d3da325c3e4L, + 0x1f57691d9ae93aaeL }, + { 0xe891f3fe9d2e1a33L,0xd430093fac063d35L,0xeda59b125513a327L, + 0xdc2134f35536f18fL } }, + /* 26 << 210 */ + { { 0xaa51fe2c5c210286L,0x3f68aaee1cab658cL,0x5a23a00bf9357292L, + 0x9a626f397efdabedL }, + { 0xfe2b3bf3199d78e3L,0xb7a2af7771bbc345L,0x3d19827a1e59802cL, + 0x823bbc15b487a51cL } }, + /* 27 << 210 */ + { { 0x856139f299d0a422L,0x9ac3df65f456c6fbL,0xaddf65c6701f8bd6L, + 0x149f321e3758df87L }, + { 0xb1ecf714721b7ebaL,0xe17df09831a3312aL,0xdb2fd6ecd5c4d581L, + 0xfd02996f8fcea1b3L } }, + /* 28 << 210 */ + { { 0xe29fa63e7882f14fL,0xc9f6dc3507c6cadcL,0x46f22d6fb882bed0L, + 0x1a45755bd118e52cL }, + { 0x9f2c7c277c4608cfL,0x7ccbdf32568012c2L,0xfcb0aedd61729b0eL, + 0x7ca2ca9ef7d75dbfL } }, + /* 29 << 210 */ + { { 0xf58fecb16f640f62L,0xe274b92b39f51946L,0x7f4dfc046288af44L, + 0x0a91f32aeac329e5L }, + { 0x43ad274bd6aaba31L,0x719a16400f6884f9L,0x685d29f6daf91e20L, + 0x5ec1cc3327e49d52L } }, + /* 30 << 210 */ + { { 0x38f4de963b54a059L,0x0e0015e5efbcfdb3L,0x177d23d94dbb8da6L, + 0x98724aa297a617adL }, + { 0x30f0885bfdb6558eL,0xf9f7a28ac7899a96L,0xd2ae8ac8872dc112L, + 0xfa0642ca73c3c459L } }, + /* 31 << 210 */ + { { 0x15296981e7dfc8d6L,0x67cd44501fb5b94aL,0x0ec71cf10eddfd37L, + 0xc7e5eeb39a8eddc7L }, + { 0x02ac8e3d81d95028L,0x0088f17270b0e35dL,0xec041fabe1881fe3L, + 0x62cf71b8d99e7faaL } }, + /* 32 << 210 */ + { { 0x5043dea7e0f222c2L,0x309d42ac72e65142L,0x94fe9ddd9216cd30L, + 0xd6539c7d0f87feecL }, + { 0x03c5a57c432ac7d7L,0x72692cf0327fda10L,0xec28c85f280698deL, + 0x2331fb467ec283b1L } }, + /* 33 << 210 */ + { { 0xd34bfa322867e633L,0x78709a820a9cc815L,0xb7fe6964875e2fa5L, + 0x25cc064f9e98bfb5L }, + { 0x9eb0151c493a65c5L,0x5fb5d94153182464L,0x69e6f130f04618e2L, + 0xa8ecec22f89c8ab6L } }, + /* 34 << 210 */ + { { 0xcd6ac88bb96209bdL,0x65fa8cdbb3e1c9e0L,0xa47d22f54a8d8eacL, + 0x83895cdf8d33f963L }, + { 0xa8adca59b56cd3d1L,0x10c8350bdaf38232L,0x2b161fb3a5080a9fL, + 0xbe7f5c643af65b3aL } }, + /* 35 << 210 */ + { { 0x2c75403997403a11L,0x94626cf7121b96afL,0x431de7c46a983ec2L, + 0x3780dd3a52cc3df7L }, + { 0xe28a0e462baf8e3bL,0xabe68aad51d299aeL,0x603eb8f9647a2408L, + 0x14c61ed65c750981L } }, + /* 36 << 210 */ + { { 0x88b34414c53352e7L,0x5a34889c1337d46eL,0x612c1560f95f2bc8L, + 0x8a3f8441d4807a3aL }, + { 0x680d9e975224da68L,0x60cd6e88c3eb00e9L,0x3875a98e9a6bc375L, + 0xdc80f9244fd554c2L } }, + /* 37 << 210 */ + { { 0x6c4b34156ac77407L,0xa1e5ea8f25420681L,0x541bfa144607a458L, + 0x5dbc7e7a96d7fbf9L }, + { 0x646a851b31590a47L,0x039e85ba15ee6df8L,0xd19fa231d7b43fc0L, + 0x84bc8be8299a0e04L } }, + /* 38 << 210 */ + { { 0x2b9d2936f20df03aL,0x240543828608d472L,0x76b6ba049149202aL, + 0xb21c38313670e7b7L }, + { 0xddd93059d6fdee10L,0x9da47ad378488e71L,0x99cc1dfda0fcfb25L, + 0x42abde1064696954L } }, + /* 39 << 210 */ + { { 0x14cc15fc17eab9feL,0xd6e863e4d3e70972L,0x29a7765c6432112cL, + 0x886600015b0774d8L }, + { 0x3729175a2c088eaeL,0x13afbcae8230b8d4L,0x44768151915f4379L, + 0xf086431ad8d22812L } }, + /* 40 << 210 */ + { { 0x37461955c298b974L,0x905fb5f0f8711e04L,0x787abf3afe969d18L, + 0x392167c26f6a494eL }, + { 0xfc7a0d2d28c511daL,0xf127c7dcb66a262dL,0xf9c4bb95fd63fdf0L, + 0x900165893913ef46L } }, + /* 41 << 210 */ + { { 0x74d2a73c11aa600dL,0x2f5379bd9fb5ab52L,0xe49e53a47fb70068L, + 0x68dd39e5404aa9a7L }, + { 0xb9b0cf572ecaa9c3L,0xba0e103be824826bL,0x60c2198b4631a3c4L, + 0xc5ff84abfa8966a2L } }, + /* 42 << 210 */ + { { 0x2d6ebe22ac95aff8L,0x1c9bb6dbb5a46d09L,0x419062da53ee4f8dL, + 0x7b9042d0bb97efefL }, + { 0x0f87f080830cf6bdL,0x4861d19a6ec8a6c6L,0xd3a0daa1202f01aaL, + 0xb0111674f25afbd5L } }, + /* 43 << 210 */ + { { 0x6d00d6cf1afb20d9L,0x1369500040671bc5L,0x913ab0dc2485ea9bL, + 0x1f2bed069eef61acL }, + { 0x850c82176d799e20L,0x93415f373271c2deL,0x5afb06e96c4f5910L, + 0x688a52dfc4e9e421L } }, + /* 44 << 210 */ + { { 0x30495ba3e2a9a6dbL,0x4601303d58f9268bL,0xbe3b0dad7eb0f04fL, + 0x4ea472504456936dL }, + { 0x8caf8798d33fd3e7L,0x1ccd8a89eb433708L,0x9effe3e887fd50adL, + 0xbe240a566b29c4dfL } }, + /* 45 << 210 */ + { { 0xec4ffd98ca0e7ebdL,0xf586783ae748616eL,0xa5b00d8fc77baa99L, + 0x0acada29b4f34c9cL }, + { 0x36dad67d0fe723acL,0x1d8e53a539c36c1eL,0xe4dd342d1f4bea41L, + 0x64fd5e35ebc9e4e0L } }, + /* 46 << 210 */ + { { 0x96f01f9057908805L,0xb5b9ea3d5ed480ddL,0x366c5dc23efd2dd0L, + 0xed2fe3056e9dfa27L }, + { 0x4575e8926e9197e2L,0x11719c09ab502a5dL,0x264c7bece81f213fL, + 0x741b924155f5c457L } }, + /* 47 << 210 */ + { { 0x78ac7b6849a5f4f4L,0xf91d70a29fc45b7dL,0x39b05544b0f5f355L, + 0x11f06bceeef930d9L }, + { 0xdb84d25d038d05e1L,0x04838ee5bacc1d51L,0x9da3ce869e8ee00bL, + 0xc3412057c36eda1fL } }, + /* 48 << 210 */ + { { 0xae80b91364d9c2f4L,0x7468bac3a010a8ffL,0xdfd2003737359d41L, + 0x1a0f5ab815efeaccL }, + { 0x7c25ad2f659d0ce0L,0x4011bcbb6785cff1L,0x128b99127e2192c7L, + 0xa549d8e113ccb0e8L } }, + /* 49 << 210 */ + { { 0x805588d8c85438b1L,0x5680332dbc25cb27L,0xdcd1bc961a4bfdf4L, + 0x779ff428706f6566L }, + { 0x8bbee998f059987aL,0xf6ce8cf2cc686de7L,0xf8ad3c4a953cfdb2L, + 0xd1d426d92205da36L } }, + /* 50 << 210 */ + { { 0xb3c0f13fc781a241L,0x3e89360ed75362a8L,0xccd05863c8a91184L, + 0x9bd0c9b7efa8a7f4L }, + { 0x97ee4d538a912a4bL,0xde5e15f8bcf518fdL,0x6a055bf8c467e1e0L, + 0x10be4b4b1587e256L } }, + /* 51 << 210 */ + { { 0xd90c14f2668621c9L,0xd5518f51ab9c92c1L,0x8e6a0100d6d47b3cL, + 0xcbe980dd66716175L }, + { 0x500d3f10ddd83683L,0x3b6cb35d99cac73cL,0x53730c8b6083d550L, + 0xcf159767df0a1987L } }, + /* 52 << 210 */ + { { 0x84bfcf5343ad73b3L,0x1b528c204f035a94L,0x4294edf733eeac69L, + 0xb6283e83817f3240L }, + { 0xc3fdc9590a5f25b1L,0xefaf8aa55844ee22L,0xde269ba5dbdde4deL, + 0xe3347160c56133bfL } }, + /* 53 << 210 */ + { { 0xc11842198d9ea9f8L,0x090de5dbf3fc1ab5L,0x404c37b10bf22cdaL, + 0x7de20ec8f5618894L }, + { 0x754c588eecdaecabL,0x6ca4b0ed88342743L,0x76f08bddf4a938ecL, + 0xd182de8991493ccbL } }, + /* 54 << 210 */ + { { 0xd652c53ec8a4186aL,0xb3e878db946d8e33L,0x088453c05f37663cL, + 0x5cd9daaab407748bL }, + { 0xa1f5197f586d5e72L,0x47500be8c443ca59L,0x78ef35b2e2652424L, + 0x09c5d26f6dd7767dL } }, + /* 55 << 210 */ + { { 0x7175a79aa74d3f7bL,0x0428fd8dcf5ea459L,0x511cb97ca5d1746dL, + 0x36363939e71d1278L }, + { 0xcf2df95510350bf4L,0xb381743960aae782L,0xa748c0e43e688809L, + 0x98021fbfd7a5a006L } }, + /* 56 << 210 */ + { { 0x9076a70c0e367a98L,0xbea1bc150f62b7c2L,0x2645a68c30fe0343L, + 0xacaffa78699dc14fL }, + { 0xf4469964457bf9c4L,0x0db6407b0d2ead83L,0x68d56cadb2c6f3ebL, + 0x3b512e73f376356cL } }, + /* 57 << 210 */ + { { 0xe43b0e1ffce10408L,0x89ddc0035a5e257dL,0xb0ae0d120362e5b3L, + 0x07f983c7b0519161L }, + { 0xc2e94d155d5231e7L,0xcff22aed0b4f9513L,0xb02588dd6ad0b0b5L, + 0xb967d1ac11d0dcd5L } }, + /* 58 << 210 */ + { { 0x8dac6bc6cf777b6cL,0x0062bdbd4c6d1959L,0x53da71b50ef5cc85L, + 0x07012c7d4006f14fL }, + { 0x4617f962ac47800dL,0x53365f2bc102ed75L,0xb422efcb4ab8c9d3L, + 0x195cb26b34af31c9L } }, + /* 59 << 210 */ + { { 0x3a926e2905f2c4ceL,0xbd2bdecb9856966cL,0x5d16ab3a85527015L, + 0x9f81609e4486c231L }, + { 0xd8b96b2cda350002L,0xbd054690fa1b7d36L,0xdc90ebf5e71d79bcL, + 0xf241b6f908964e4eL } }, + /* 60 << 210 */ + { { 0x7c8386432fe3cd4cL,0xe0f33acbb4bc633cL,0xb4a9ecec3d139f1fL, + 0x05ce69cddc4a1f49L }, + { 0xa19d1b16f5f98aafL,0x45bb71d66f23e0efL,0x33789fcd46cdfdd3L, + 0x9b8e2978cee040caL } }, + /* 61 << 210 */ + { { 0x9c69b246ae0a6828L,0xba533d247078d5aaL,0x7a2e42c07bb4fbdbL, + 0xcfb4879a7035385cL }, + { 0x8c3dd30b3281705bL,0x7e361c6c404fe081L,0x7b21649c3f604edfL, + 0x5dbf6a3fe52ffe47L } }, + /* 62 << 210 */ + { { 0xc41b7c234b54d9bfL,0x1374e6813511c3d9L,0x1863bf16c1b2b758L, + 0x90e785071e9e6a96L }, + { 0xab4bf98d5d86f174L,0xd74e0bd385e96fe4L,0x8afde39fcac5d344L, + 0x90946dbcbd91b847L } }, + /* 63 << 210 */ + { { 0xf5b42358fe1a838cL,0x05aae6c5620ac9d8L,0x8e193bd8a1ce5a0bL, + 0x8f7105714dabfd72L }, + { 0x8d8fdd48182caaacL,0x8c4aeefa040745cfL,0x73c6c30af3b93e6dL, + 0x991241f316f42011L } }, + /* 64 << 210 */ + { { 0xa0158eeae457a477L,0xd19857dbee6ddc05L,0xb326522418c41671L, + 0x3ffdfc7e3c2c0d58L }, + { 0x3a3a525426ee7cdaL,0x341b0869df02c3a8L,0xa023bf42723bbfc8L, + 0x3d15002a14452691L } }, + /* 0 << 217 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 217 */ + { { 0x5ef7324c85edfa30L,0x2597655487d4f3daL,0x352f5bc0dcb50c86L, + 0x8f6927b04832a96cL }, + { 0xd08ee1ba55f2f94cL,0x6a996f99344b45faL,0xe133cb8da8aa455dL, + 0x5d0721ec758dc1f7L } }, + /* 2 << 217 */ + { { 0x6ba7a92079e5fb67L,0xe1331feb70aa725eL,0x5080ccf57df5d837L, + 0xe4cae01d7ff72e21L }, + { 0xd9243ee60412a77dL,0x06ff7cacdf449025L,0xbe75f7cd23ef5a31L, + 0xbc9578220ddef7a8L } }, + /* 3 << 217 */ + { { 0x8cf7230cb0ce1c55L,0x5b534d050bbfb607L,0xee1ef1130e16363bL, + 0x27e0aa7ab4999e82L }, + { 0xce1dac2d79362c41L,0x67920c9091bb6cb0L,0x1e648d632223df24L, + 0x0f7d9eefe32e8f28L } }, + /* 4 << 217 */ + { { 0x6943f39afa833834L,0x22951722a6328562L,0x81d63dd54170fc10L, + 0x9f5fa58faecc2e6dL }, + { 0xb66c8725e77d9a3bL,0x11235cea6384ebe0L,0x06a8c1185845e24aL, + 0x0137b286ebd093b1L } }, + /* 5 << 217 */ + { { 0xc589e1ce44ace150L,0xe0f8d3d94381e97cL,0x59e99b1162c5a4b8L, + 0x90d262f7fd0ec9f9L }, + { 0xfbc854c9283e13c9L,0x2d04fde7aedc7085L,0x057d776547dcbecbL, + 0x8dbdf5919a76fa5fL } }, + /* 6 << 217 */ + { { 0xd01506950de1e578L,0x2e1463e7e9f72bc6L,0xffa684411b39eca5L, + 0x673c85307c037f2fL }, + { 0xd0d6a600747f91daL,0xb08d43e1c9cb78e9L,0x0fc0c64427b5cef5L, + 0x5c1d160aa60a2fd6L } }, + /* 7 << 217 */ + { { 0xf98cae5328c8e13bL,0x375f10c4b2eddcd1L,0xd4eb8b7f5cce06adL, + 0xb4669f4580a2e1efL }, + { 0xd593f9d05bbd8699L,0x5528a4c9e7976d13L,0x3923e0951c7e28d3L, + 0xb92937903f6bb577L } }, + /* 8 << 217 */ + { { 0xdb567d6ac42bd6d2L,0x6df86468bb1f96aeL,0x0efe5b1a4843b28eL, + 0x961bbb056379b240L }, + { 0xb6caf5f070a6a26bL,0x70686c0d328e6e39L,0x80da06cf895fc8d3L, + 0x804d8810b363fdc9L } }, + /* 9 << 217 */ + { { 0xbe22877b207f1670L,0x9b0dd1884e615291L,0x625ae8dc97a3c2bfL, + 0x08584ef7439b86e8L }, + { 0xde7190a5dcd898ffL,0x26286c402058ee3dL,0x3db0b2175f87b1c1L, + 0xcc334771102a6db5L } }, + /* 10 << 217 */ + { { 0xd99de9542f770fb1L,0x97c1c6204cd7535eL,0xd3b6c4483f09cefcL, + 0xd725af155a63b4f8L }, + { 0x0c95d24fc01e20ecL,0xdfd374949ae7121fL,0x7d6ddb72ec77b7ecL, + 0xfe079d3b0353a4aeL } }, + /* 11 << 217 */ + { { 0x3066e70a2e6ac8d2L,0x9c6b5a43106e5c05L,0x52d3c6f5ede59b8cL, + 0x30d6a5c3fccec9aeL }, + { 0xedec7c224fc0a9efL,0x190ff08395c16cedL,0xbe12ec8f94de0fdeL, + 0x0d131ab8852d3433L } }, + /* 12 << 217 */ + { { 0x42ace07e85701291L,0x94793ed9194061a8L,0x30e83ed6d7f4a485L, + 0x9eec7269f9eeff4dL }, + { 0x90acba590c9d8005L,0x5feca4581e79b9d1L,0x8fbe54271d506a1eL, + 0xa32b2c8e2439cfa7L } }, + /* 13 << 217 */ + { { 0x1671c17373dd0b4eL,0x37a2821444a054c6L,0x81760a1b4e8b53f1L, + 0xa6c04224f9f93b9eL }, + { 0x18784b34cf671e3cL,0x81bbecd2cda9b994L,0x38831979b2ab3848L, + 0xef54feb7f2e03c2dL } }, + /* 14 << 217 */ + { { 0xcf197ca7fb8088faL,0x014272474ddc96c5L,0xa2d2550a30777176L, + 0x534698984d0cf71dL }, + { 0x6ce937b83a2aaac6L,0xe9f91dc35af38d9bL,0x2598ad83c8bf2899L, + 0x8e706ac9b5536c16L } }, + /* 15 << 217 */ + { { 0x40dc7495f688dc98L,0x26490cd7124c4afcL,0xe651ec841f18775cL, + 0x393ea6c3b4fdaf4aL }, + { 0x1e1f33437f338e0dL,0x39fb832b6053e7b5L,0x46e702da619e14d5L, + 0x859cacd1cdeef6e0L } }, + /* 16 << 217 */ + { { 0x63b99ce74462007dL,0xb8ab48a54cb5f5b7L,0x9ec673d2f55edde7L, + 0xd1567f748cfaefdaL }, + { 0x46381b6b0887bcecL,0x694497cee178f3c2L,0x5e6525e31e6266cbL, + 0x5931de26697d6413L } }, + /* 17 << 217 */ + { { 0x87f8df7c0e58d493L,0xb1ae5ed058b73f12L,0xc368f784dea0c34dL, + 0x9bd0a120859a91a0L }, + { 0xb00d88b7cc863c68L,0x3a1cc11e3d1f4d65L,0xea38e0e70aa85593L, + 0x37f13e987dc4aee8L } }, + /* 18 << 217 */ + { { 0x10d38667bc947badL,0x738e07ce2a36ee2eL,0xc93470cdc577fcacL, + 0xdee1b6162782470dL }, + { 0x36a25e672e793d12L,0xd6aa6caee0f186daL,0x474d0fd980e07af7L, + 0xf7cdc47dba8a5cd4L } }, + /* 19 << 217 */ + { { 0x28af6d9dab15247fL,0x7c789c10493a537fL,0x7ac9b11023a334e7L, + 0x0236ac0912c9c277L }, + { 0xa7e5bd251d7a5144L,0x098b9c2af13ec4ecL,0x3639dacad3f0abcaL, + 0x642da81aa23960f9L } }, + /* 20 << 217 */ + { { 0x7d2e5c054f7269b1L,0xfcf30777e287c385L,0x10edc84ff2a46f21L, + 0x354417574f43fa36L }, + { 0xf1327899fd703431L,0xa438d7a616dd587aL,0x65c34c57e9c8352dL, + 0xa728edab5cc5a24eL } }, + /* 21 << 217 */ + { { 0xaed78abc42531689L,0x0a51a0e8010963efL,0x5776fa0ad717d9b3L, + 0xf356c2397dd3428bL }, + { 0x29903fff8d3a3dacL,0x409597fa3d94491fL,0x4cd7a5ffbf4a56a4L, + 0xe50964748adab462L } }, + /* 22 << 217 */ + { { 0xa97b51265c3427b0L,0x6401405cd282c9bdL,0x3629f8d7222c5c45L, + 0xb1c02c16e8d50aedL }, + { 0xbea2ed75d9635bc9L,0x226790c76e24552fL,0x3c33f2a365f1d066L, + 0x2a43463e6dfccc2eL } }, + /* 23 << 217 */ + { { 0x8cc3453adb483761L,0xe7cc608565d5672bL,0x277ed6cbde3efc87L, + 0x19f2f36869234eafL }, + { 0x9aaf43175c0b800bL,0x1f1e7c898b6da6e2L,0x6cfb4715b94ec75eL, + 0xd590dd5f453118c2L } }, + /* 24 << 217 */ + { { 0x14e49da11f17a34cL,0x5420ab39235a1456L,0xb76372412f50363bL, + 0x7b15d623c3fabb6eL }, + { 0xa0ef40b1e274e49cL,0x5cf5074496b1860aL,0xd6583fbf66afe5a4L, + 0x44240510f47e3e9aL } }, + /* 25 << 217 */ + { { 0x9925434311b2d595L,0xf1367499eec8df57L,0x3cb12c613e73dd05L, + 0xd248c0337dac102aL }, + { 0xcf154f13a77739f5L,0xbf4288cb23d2af42L,0xaa64c9b632e4a1cfL, + 0xee8c07a8c8a208f3L } }, + /* 26 << 217 */ + { { 0xe10d49996fe8393fL,0x0f809a3fe91f3a32L,0x61096d1c802f63c8L, + 0x289e146257750d3dL }, + { 0xed06167e9889feeaL,0xd5c9c0e2e0993909L,0x46fca0d856508ac6L, + 0x918260474f1b8e83L } }, + /* 27 << 217 */ + { { 0x4f2c877a9a4a2751L,0x71bd0072cae6feadL,0x38df8dcc06aa1941L, + 0x5a074b4c63beeaa8L }, + { 0xd6d65934c1cec8edL,0xa6ecb49eaabc03bdL,0xaade91c2de8a8415L, + 0xcfb0efdf691136e0L } }, + /* 28 << 217 */ + { { 0x11af45ee23ab3495L,0xa132df880b77463dL,0x8923c15c815d06f4L, + 0xc3ceb3f50d61a436L }, + { 0xaf52291de88fb1daL,0xea0579741da12179L,0xb0d7218cd2fef720L, + 0x6c0899c98e1d8845L } }, + /* 29 << 217 */ + { { 0x98157504752ddad7L,0xd60bd74fa1a68a97L,0x7047a3a9f658fb99L, + 0x1f5d86d65f8511e4L }, + { 0xb8a4bc424b5a6d88L,0x69eb2c331abefa7dL,0x95bf39e813c9c510L, + 0xf571960ad48aab43L } }, + /* 30 << 217 */ + { { 0x7e8cfbcf704e23c6L,0xc71b7d2228aaa65bL,0xa041b2bd245e3c83L, + 0x69b98834d21854ffL }, + { 0x89d227a3963bfeecL,0x99947aaade7da7cbL,0x1d9ee9dbee68a9b1L, + 0x0a08f003698ec368L } }, + /* 31 << 217 */ + { { 0xe9ea409478ef2487L,0xc8d2d41502cfec26L,0xc52f9a6eb7dcf328L, + 0x0ed489e385b6a937L }, + { 0x9b94986bbef3366eL,0x0de59c70edddddb8L,0xffdb748ceadddbe2L, + 0x9b9784bb8266ea40L } }, + /* 32 << 217 */ + { { 0x142b55021a93507aL,0xb4cd11878d3c06cfL,0xdf70e76a91ec3f40L, + 0x484e81ad4e7553c2L }, + { 0x830f87b5272e9d6eL,0xea1c93e5c6ff514aL,0x67cc2adcc4192a8eL, + 0xc77e27e242f4535aL } }, + /* 33 << 217 */ + { { 0x9cdbab36d2b713c5L,0x86274ea0cf7b0cd3L,0x784680f309af826bL, + 0xbfcc837a0c72dea3L }, + { 0xa8bdfe9dd6529b73L,0x708aa22863a88002L,0x6c7a9a54c91d45b9L, + 0xdf1a38bbfd004f56L } }, + /* 34 << 217 */ + { { 0x2e8c9a26b8bad853L,0x2d52cea33723eae7L,0x054d6d8156ca2830L, + 0xa3317d149a8dc411L }, + { 0xa08662fefd4ddedaL,0xed2a153ab55d792bL,0x7035c16abfc6e944L, + 0xb6bc583400171cf3L } }, + /* 35 << 217 */ + { { 0xe27152b383d102b6L,0xfe695a470646b848L,0xa5bb09d8916e6d37L, + 0xb4269d640d17015eL }, + { 0x8d8156a10a1d2285L,0xfeef6c5146d26d72L,0x9dac57c84c5434a7L, + 0x0282e5be59d39e31L } }, + /* 36 << 217 */ + { { 0xedfff181721c486dL,0x301baf10bc58824eL,0x8136a6aa00570031L, + 0x55aaf78c1cddde68L }, + { 0x2682937159c63952L,0x3a3bd2748bc25bafL,0xecdf8657b7e52dc3L, + 0x2dd8c087fd78e6c8L } }, + /* 37 << 217 */ + { { 0x20553274f5531461L,0x8b4a12815d95499bL,0xe2c8763a1a80f9d2L, + 0xd1dbe32b4ddec758L }, + { 0xaf12210d30c34169L,0xba74a95378baa533L,0x3d133c6ea438f254L, + 0xa431531a201bef5bL } }, + /* 38 << 217 */ + { { 0x15295e22f669d7ecL,0xca374f64357fb515L,0x8a8406ffeaa3fdb3L, + 0x106ae448df3f2da8L }, + { 0x8f9b0a9033c8e9a1L,0x234645e271ad5885L,0x3d0832241c0aed14L, + 0xf10a7d3e7a942d46L } }, + /* 39 << 217 */ + { { 0x7c11deee40d5c9beL,0xb2bae7ffba84ed98L,0x93e97139aad58dddL, + 0x3d8727963f6d1fa3L }, + { 0x483aca818569ff13L,0x8b89a5fb9a600f72L,0x4cbc27c3c06f2b86L, + 0x2213071363ad9c0bL } }, + /* 40 << 217 */ + { { 0xb5358b1e48ac2840L,0x18311294ecba9477L,0xda58f990a6946b43L, + 0x3098baf99ab41819L }, + { 0x66c4c1584198da52L,0xab4fc17c146bfd1bL,0x2f0a4c3cbf36a908L, + 0x2ae9e34b58cf7838L } }, + /* 41 << 217 */ + { { 0xf411529e3fa11b1fL,0x21e43677974af2b4L,0x7c20958ec230793bL, + 0x710ea88516e840f3L }, + { 0xfc0b21fcc5dc67cfL,0x08d5164788405718L,0xd955c21fcfe49eb7L, + 0x9722a5d556dd4a1fL } }, + /* 42 << 217 */ + { { 0xc9ef50e2c861baa5L,0xc0c21a5d9505ac3eL,0xaf6b9a338b7c063fL, + 0xc63703392f4779c1L }, + { 0x22df99c7638167c3L,0xfe6ffe76795db30cL,0x2b822d33a4854989L, + 0xfef031dd30563aa5L } }, + /* 43 << 217 */ + { { 0x16b09f82d57c667fL,0xc70312cecc0b76f1L,0xbf04a9e6c9118aecL, + 0x82fcb4193409d133L }, + { 0x1a8ab385ab45d44dL,0xfba07222617b83a3L,0xb05f50dd58e81b52L, + 0x1d8db55321ce5affL } }, + /* 44 << 217 */ + { { 0x3097b8d4e344a873L,0x7d8d116dfe36d53eL,0x6db22f587875e750L, + 0x2dc5e37343e144eaL }, + { 0xc05f32e6e799eb95L,0xe9e5f4df6899e6ecL,0xbdc3bd681fab23d5L, + 0xb72b8ab773af60e6L } }, + /* 45 << 217 */ + { { 0x8db27ae02cecc84aL,0x600016d87bdb871cL,0x42a44b13d7c46f58L, + 0xb8919727c3a77d39L }, + { 0xcfc6bbbddafd6088L,0x1a7401466bd20d39L,0x8c747abd98c41072L, + 0x4c91e765bdf68ea1L } }, + /* 46 << 217 */ + { { 0x7c95e5ca08819a78L,0xcf48b729c9587921L,0x091c7c5fdebbcc7dL, + 0x6f287404f0e05149L }, + { 0xf83b5ac226cd44ecL,0x88ae32a6cfea250eL,0x6ac5047a1d06ebc5L, + 0xc7e550b4d434f781L } }, + /* 47 << 217 */ + { { 0x61ab1cf25c727bd2L,0x2e4badb11cf915b0L,0x1b4dadecf69d3920L, + 0xe61b1ca6f14c1dfeL }, + { 0x90b479ccbd6bd51fL,0x8024e4018045ec30L,0xcab29ca325ef0e62L, + 0x4f2e941649e4ebc0L } }, + /* 48 << 217 */ + { { 0x45eb40ec0ccced58L,0x25cd4b9c0da44f98L,0x43e06458871812c6L, + 0x99f80d5516cef651L }, + { 0x571340c9ce6dc153L,0x138d5117d8665521L,0xacdb45bc4e07014dL, + 0x2f34bb3884b60b91L } }, + /* 49 << 217 */ + { { 0xf44a4fd22ae8921eL,0xb039288e892ba1e2L,0x9da50174b1c180b2L, + 0x6b70ab661693dc87L }, + { 0x7e9babc9e7057481L,0x4581ddef9c80dc41L,0x0c890da951294682L, + 0x0b5629d33f4736e5L } }, + /* 50 << 217 */ + { { 0x2340c79eb06f5b41L,0xa42e84ce4e243469L,0xf9a20135045a71a9L, + 0xefbfb415d27b6fb6L }, + { 0x25ebea239d33cd6fL,0x9caedb88aa6c0af8L,0x53dc7e9ad9ce6f96L, + 0x3897f9fd51e0b15aL } }, + /* 51 << 217 */ + { { 0xf51cb1f88e5d788eL,0x1aec7ba8e1d490eeL,0x265991e0cc58cb3cL, + 0x9f306e8c9fc3ad31L }, + { 0x5fed006e5040a0acL,0xca9d5043fb476f2eL,0xa19c06e8beea7a23L, + 0xd28658010edabb63L } }, + /* 52 << 217 */ + { { 0xdb92293f6967469aL,0x2894d8398d8a8ed8L,0x87c9e406bbc77122L, + 0x8671c6f12ea3a26aL }, + { 0xe42df8d6d7de9853L,0x2e3ce346b1f2bcc7L,0xda601dfc899d50cfL, + 0xbfc913defb1b598fL } }, + /* 53 << 217 */ + { { 0x81c4909fe61f7908L,0x192e304f9bbc7b29L,0xc3ed8738c104b338L, + 0xedbe9e47783f5d61L }, + { 0x0c06e9be2db30660L,0xda3e613fc0eb7d8eL,0xd8fa3e97322e096eL, + 0xfebd91e8d336e247L } }, + /* 54 << 217 */ + { { 0x8f13ccc4df655a49L,0xa9e00dfc5eb20210L,0x84631d0fc656b6eaL, + 0x93a058cdd8c0d947L }, + { 0x6846904a67bd3448L,0x4a3d4e1af394fd5cL,0xc102c1a5db225f52L, + 0xe3455bbafc4f5e9aL } }, + /* 55 << 217 */ + { { 0x6b36985b4b9ad1ceL,0xa98185365bb7f793L,0x6c25e1d048b1a416L, + 0x1381dd533c81bee7L }, + { 0xd2a30d617a4a7620L,0xc841292639b8944cL,0x3c1c6fbe7a97c33aL, + 0x941e541d938664e7L } }, + /* 56 << 217 */ + { { 0x417499e84a34f239L,0x15fdb83cb90402d5L,0xb75f46bf433aa832L, + 0xb61e15af63215db1L }, + { 0xaabe59d4a127f89aL,0x5d541e0c07e816daL,0xaaba0659a618b692L, + 0x5532773317266026L } }, + /* 57 << 217 */ + { { 0xaf53a0fc95f57552L,0x329476506cacb0c9L,0x253ff58dc821be01L, + 0xb0309531a06f1146L }, + { 0x59bbbdf505c2e54dL,0x158f27ad26e8dd22L,0xcc5b7ffb397e1e53L, + 0xae03f65b7fc1e50dL } }, + /* 58 << 217 */ + { { 0xa9784ebd9c95f0f9L,0x5ed9deb224640771L,0x31244af7035561c4L, + 0x87332f3a7ee857deL }, + { 0x09e16e9e2b9e0d88L,0x52d910f456a06049L,0x507ed477a9592f48L, + 0x85cb917b2365d678L } }, + /* 59 << 217 */ + { { 0xf8511c934c8998d1L,0x2186a3f1730ea58fL,0x50189626b2029db0L, + 0x9137a6d902ceb75aL }, + { 0x2fe17f37748bc82cL,0x87c2e93180469f8cL,0x850f71cdbf891aa2L, + 0x0ca1b89b75ec3d8dL } }, + /* 60 << 217 */ + { { 0x516c43aa5e1cd3cdL,0x893978089a887c28L,0x0059c699ddea1f9fL, + 0x7737d6fa8e6868f7L }, + { 0x6d93746a60f1524bL,0x36985e55ba052aa7L,0x41b1d322ed923ea5L, + 0x3429759f25852a11L } }, + /* 61 << 217 */ + { { 0xbeca6ec3092e9f41L,0x3a238c6662256bbdL,0xd82958ea70ad487dL, + 0x4ac8aaf965610d93L }, + { 0x3fa101b15e4ccab0L,0x9bf430f29de14bfbL,0xa10f5cc66531899dL, + 0x590005fbea8ce17dL } }, + /* 62 << 217 */ + { { 0xc437912f24544cb6L,0x9987b71ad79ac2e3L,0x13e3d9ddc058a212L, + 0x00075aacd2de9606L }, + { 0x80ab508b6cac8369L,0x87842be7f54f6c89L,0xa7ad663d6bc532a4L, + 0x67813de778a91bc8L } }, + /* 63 << 217 */ + { { 0x5dcb61cec3427239L,0x5f3c7cf0c56934d9L,0xc079e0fbe3191591L, + 0xe40896bdb01aada7L }, + { 0x8d4667910492d25fL,0x8aeb30c9e7408276L,0xe94374959287aaccL, + 0x23d4708d79fe03d4L } }, + /* 64 << 217 */ + { { 0x8cda9cf2d0c05199L,0x502fbc22fae78454L,0xc0bda9dff572a182L, + 0x5f9b71b86158b372L }, + { 0xe0f33a592b82dd07L,0x763027359523032eL,0x7fe1a721c4505a32L, + 0x7b6e3e82f796409fL } }, + /* 0 << 224 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 224 */ + { { 0xe3417bc035d0b34aL,0x440b386b8327c0a7L,0x8fb7262dac0362d1L, + 0x2c41114ce0cdf943L }, + { 0x2ba5cef1ad95a0b1L,0xc09b37a867d54362L,0x26d6cdd201e486c9L, + 0x20477abf42ff9297L } }, + /* 2 << 224 */ + { { 0xa004dcb3292a9287L,0xddc15cf677b092c7L,0x083a8464806c0605L, + 0x4a68df703db997b0L }, + { 0x9c134e4505bf7dd0L,0xa4e63d398ccf7f8cL,0xa6e6517f41b5f8afL, + 0xaa8b9342ad7bc1ccL } }, + /* 3 << 224 */ + { { 0x126f35b51e706ad9L,0xb99cebb4c3a9ebdfL,0xa75389afbf608d90L, + 0x76113c4fc6c89858L }, + { 0x80de8eb097e2b5aaL,0x7e1022cc63b91304L,0x3bdab6056ccc066cL, + 0x33cbb144b2edf900L } }, + /* 4 << 224 */ + { { 0xc41764717af715d2L,0xe2f7f594d0134a96L,0x2c1873efa41ec956L, + 0xe4e7b4f677821304L }, + { 0xe5c8ff9788d5374aL,0x2b915e6380823d5bL,0xea6bc755b2ee8fe2L, + 0x6657624ce7112651L } }, + /* 5 << 224 */ + { { 0x157af101dace5acaL,0xc4fdbcf211a6a267L,0xdaddf340c49c8609L, + 0x97e49f52e9604a65L }, + { 0x9be8e790937e2ad5L,0x846e2508326e17f1L,0x3f38007a0bbbc0dcL, + 0xcf03603fb11e16d6L } }, + /* 6 << 224 */ + { { 0xd6f800e07442f1d5L,0x475607d166e0e3abL,0x82807f16b7c64047L, + 0x8858e1e3a749883dL }, + { 0x5859120b8231ee10L,0x1b80e7eb638a1eceL,0xcb72525ac6aa73a4L, + 0xa7cdea3d844423acL } }, + /* 7 << 224 */ + { { 0x5ed0c007f8ae7c38L,0x6db07a5c3d740192L,0xbe5e9c2a5fe36db3L, + 0xd5b9d57a76e95046L }, + { 0x54ac32e78eba20f2L,0xef11ca8f71b9a352L,0x305e373eff98a658L, + 0xffe5a100823eb667L } }, + /* 8 << 224 */ + { { 0x57477b11e51732d2L,0xdfd6eb282538fc0eL,0x5c43b0cc3b39eec5L, + 0x6af12778cb36cc57L }, + { 0x70b0852d06c425aeL,0x6df92f8c5c221b9bL,0x6c8d4f9ece826d9cL, + 0xf59aba7bb49359c3L } }, + /* 9 << 224 */ + { { 0x5c8ed8d5da64309dL,0x61a6de5691b30704L,0xd6b52f6a2f9b5808L, + 0x0eee419498c958a7L }, + { 0xcddd9aab771e4caaL,0x83965dfd78bc21beL,0x02affce3b3b504f5L, + 0x30847a21561c8291L } }, + /* 10 << 224 */ + { { 0xd2eb2cf152bfda05L,0xe0e4c4e96197b98cL,0x1d35076cf8a1726fL, + 0x6c06085b2db11e3dL }, + { 0x15c0c4d74463ba14L,0x9d292f830030238cL,0x1311ee8b3727536dL, + 0xfeea86efbeaedc1eL } }, + /* 11 << 224 */ + { { 0xb9d18cd366131e2eL,0xf31d974f80fe2682L,0xb6e49e0fe4160289L, + 0x7c48ec0b08e92799L }, + { 0x818111d8d1989aa7L,0xb34fa0aaebf926f9L,0xdb5fe2f5a245474aL, + 0xf80a6ebb3c7ca756L } }, + /* 12 << 224 */ + { { 0xa7f96054afa05dd8L,0x26dfcf21fcaf119eL,0xe20ef2e30564bb59L, + 0xef4dca5061cb02b8L }, + { 0xcda7838a65d30672L,0x8b08d534fd657e86L,0x4c5b439546d595c8L, + 0x39b58725425cb836L } }, + /* 13 << 224 */ + { { 0x8ea610593de9abe3L,0x404348819cdc03beL,0x9b261245cfedce8cL, + 0x78c318b4cf5234a1L }, + { 0x510bcf16fde24c99L,0x2a77cb75a2c2ff5dL,0x9c895c2b27960fb4L, + 0xd30ce975b0eda42bL } }, + /* 14 << 224 */ + { { 0xfda853931a62cc26L,0x23c69b9650c0e052L,0xa227df15bfc633f3L, + 0x2ac788481bae7d48L }, + { 0x487878f9187d073dL,0x6c2be919967f807dL,0x765861d8336e6d8fL, + 0x88b8974cce528a43L } }, + /* 15 << 224 */ + { { 0x09521177ff57d051L,0x2ff38037fb6a1961L,0xfc0aba74a3d76ad4L, + 0x7c76480325a7ec17L }, + { 0x7532d75f48879bc8L,0xea7eacc058ce6bc1L,0xc82176b48e896c16L, + 0x9a30e0b22c750fedL } }, + /* 16 << 224 */ + { { 0xc37e2c2e421d3aa4L,0xf926407ce84fa840L,0x18abc03d1454e41cL, + 0x26605ecd3f7af644L }, + { 0x242341a6d6a5eabfL,0x1edb84f4216b668eL,0xd836edb804010102L, + 0x5b337ce7945e1d8cL } }, + /* 17 << 224 */ + { { 0xd2075c77c055dc14L,0x2a0ffa2581d89cdfL,0x8ce815ea6ffdcbafL, + 0xa3428878fb648867L }, + { 0x277699cf884655fbL,0xfa5b5bd6364d3e41L,0x01f680c6441e1cb7L, + 0x3fd61e66b70a7d67L } }, + /* 18 << 224 */ + { { 0x666ba2dccc78cf66L,0xb30181746fdbff77L,0x8d4dd0db168d4668L, + 0x259455d01dab3a2aL }, + { 0xf58564c5cde3acecL,0x7714192513adb276L,0x527d725d8a303f65L, + 0x55deb6c9e6f38f7bL } }, + /* 19 << 224 */ + { { 0xfd5bb657b1fa70fbL,0xfa07f50fd8073a00L,0xf72e3aa7bca02500L, + 0xf68f895d9975740dL }, + { 0x301120605cae2a6aL,0x01bd721802874842L,0x3d4238917ce47bd3L, + 0xa66663c1789544f6L } }, + /* 20 << 224 */ + { { 0x864d05d73272d838L,0xe22924f9fa6295c5L,0x8189593f6c2fda32L, + 0x330d7189b184b544L }, + { 0x79efa62cbde1f714L,0x35771c94e5cb1a63L,0x2f4826b8641c8332L, + 0x00a894fbc8cee854L } }, + /* 21 << 224 */ + { { 0xb4b9a39b36194d40L,0xe857a7c577612601L,0xf4209dd24ecf2f58L, + 0x82b9e66d5a033487L }, + { 0xc1e36934e4e8b9ddL,0xd2372c9da42377d7L,0x51dc94c70e3ae43bL, + 0x4c57761e04474f6fL } }, + /* 22 << 224 */ + { { 0xdcdacd0a1058a318L,0x369cf3f578053a9aL,0xc6c3de5031c68de2L, + 0x4653a5763c4b6d9fL }, + { 0x1688dd5aaa4e5c97L,0x5be80aa1b7ab3c74L,0x70cefe7cbc65c283L, + 0x57f95f1306867091L } }, + /* 23 << 224 */ + { { 0xa39114e24415503bL,0xc08ff7c64cbb17e9L,0x1eff674dd7dec966L, + 0x6d4690af53376f63L }, + { 0xff6fe32eea74237bL,0xc436d17ecd57508eL,0x15aa28e1edcc40feL, + 0x0d769c04581bbb44L } }, + /* 24 << 224 */ + { { 0xc240b6de34eaacdaL,0xd9e116e82ba0f1deL,0xcbe45ec779438e55L, + 0x91787c9d96f752d7L }, + { 0x897f532bf129ac2fL,0xd307b7c85a36e22cL,0x91940675749fb8f3L, + 0xd14f95d0157fdb28L } }, + /* 25 << 224 */ + { { 0xfe51d0296ae55043L,0x8931e98f44a87de1L,0xe57f1cc609e4fee2L, + 0x0d063b674e072d92L }, + { 0x70a998b9ed0e4316L,0xe74a736b306aca46L,0xecf0fbf24fda97c7L, + 0xa40f65cb3e178d93L } }, + /* 26 << 224 */ + { { 0x1625360416df4285L,0xb0c9babbd0c56ae2L,0x73032b19cfc5cfc3L, + 0xe497e5c309752056L }, + { 0x12096bb4164bda96L,0x1ee42419a0b74da1L,0x8fc36243403826baL, + 0x0c8f0069dc09e660L } }, + /* 27 << 224 */ + { { 0x8667e981c27253c9L,0x05a6aefb92b36a45L,0xa62c4b369cb7bb46L, + 0x8394f37511f7027bL }, + { 0x747bc79c5f109d0fL,0xcad88a765b8cc60aL,0x80c5a66b58f09e68L, + 0xe753d451f6127eacL } }, + /* 28 << 224 */ + { { 0xc44b74a15b0ec6f5L,0x47989fe45289b2b8L,0x745f848458d6fc73L, + 0xec362a6ff61c70abL }, + { 0x070c98a7b3a8ad41L,0x73a20fc07b63db51L,0xed2c2173f44c35f4L, + 0x8a56149d9acc9dcaL } }, + /* 29 << 224 */ + { { 0x98f178819ac6e0f4L,0x360fdeafa413b5edL,0x0625b8f4a300b0fdL, + 0xf1f4d76a5b3222d3L }, + { 0x9d6f5109587f76b8L,0x8b4ee08d2317fdb5L,0x88089bb78c68b095L, + 0x95570e9a5808d9b9L } }, + /* 30 << 224 */ + { { 0xa395c36f35d33ae7L,0x200ea12350bb5a94L,0x20c789bd0bafe84bL, + 0x243ef52d0919276aL }, + { 0x3934c577e23ae233L,0xb93807afa460d1ecL,0xb72a53b1f8fa76a4L, + 0xd8914cb0c3ca4491L } }, + /* 31 << 224 */ + { { 0x2e1284943fb42622L,0x3b2700ac500907d5L,0xf370fb091a95ec63L, + 0xf8f30be231b6dfbdL }, + { 0xf2b2f8d269e55f15L,0x1fead851cc1323e9L,0xfa366010d9e5eef6L, + 0x64d487b0e316107eL } }, + /* 32 << 224 */ + { { 0x4c076b86d23ddc82L,0x03fd344c7e0143f0L,0xa95362ff317af2c5L, + 0x0add3db7e18b7a4fL }, + { 0x9c673e3f8260e01bL,0xfbeb49e554a1cc91L,0x91351bf292f2e433L, + 0xc755e7ec851141ebL } }, + /* 33 << 224 */ + { { 0xc9a9513929607745L,0x0ca07420a26f2b28L,0xcb2790e74bc6f9ddL, + 0x345bbb58adcaffc0L }, + { 0xc65ea38cbe0f27a2L,0x67c24d7c641fcb56L,0x2c25f0a7a9e2c757L, + 0x93f5cdb016f16c49L } }, + /* 34 << 224 */ + { { 0x2ca5a9d7c5ee30a1L,0xd1593635b909b729L,0x804ce9f3dadeff48L, + 0xec464751b07c30c3L }, + { 0x89d65ff39e49af6aL,0xf2d6238a6f3d01bcL,0x1095561e0bced843L, + 0x51789e12c8a13fd8L } }, + /* 35 << 224 */ + { { 0xd633f929763231dfL,0x46df9f7de7cbddefL,0x01c889c0cb265da8L, + 0xfce1ad10af4336d2L }, + { 0x8d110df6fc6a0a7eL,0xdd431b986da425dcL,0xcdc4aeab1834aabeL, + 0x84deb1248439b7fcL } }, + /* 36 << 224 */ + { { 0x8796f1693c2a5998L,0x9b9247b47947190dL,0x55b9d9a511597014L, + 0x7e9dd70d7b1566eeL }, + { 0x94ad78f7cbcd5e64L,0x0359ac179bd4c032L,0x3b11baaf7cc222aeL, + 0xa6a6e284ba78e812L } }, + /* 37 << 224 */ + { { 0x8392053f24cea1a0L,0xc97bce4a33621491L,0x7eb1db3435399ee9L, + 0x473f78efece81ad1L }, + { 0x41d72fe0f63d3d0dL,0xe620b880afab62fcL,0x92096bc993158383L, + 0x41a213578f896f6cL } }, + /* 38 << 224 */ + { { 0x1b5ee2fac7dcfcabL,0x650acfde9546e007L,0xc081b749b1b02e07L, + 0xda9e41a0f9eca03dL }, + { 0x013ba727175a54abL,0xca0cd190ea5d8d10L,0x85ea52c095fd96a9L, + 0x2c591b9fbc5c3940L } }, + /* 39 << 224 */ + { { 0x6fb4d4e42bad4d5fL,0xfa4c3590fef0059bL,0x6a10218af5122294L, + 0x9a78a81aa85751d1L }, + { 0x04f20579a98e84e7L,0xfe1242c04997e5b5L,0xe77a273bca21e1e4L, + 0xfcc8b1ef9411939dL } }, + /* 40 << 224 */ + { { 0xe20ea30292d0487aL,0x1442dbec294b91feL,0x1f7a4afebb6b0e8fL, + 0x1700ef746889c318L }, + { 0xf5bbffc370f1fc62L,0x3b31d4b669c79ccaL,0xe8bc2aaba7f6340dL, + 0xb0b08ab4a725e10aL } }, + /* 41 << 224 */ + { { 0x44f05701ae340050L,0xba4b30161cf0c569L,0x5aa29f83fbe19a51L, + 0x1b9ed428b71d752eL }, + { 0x1666e54eeb4819f5L,0x616cdfed9e18b75bL,0x112ed5be3ee27b0bL, + 0xfbf2831944c7de4dL } }, + /* 42 << 224 */ + { { 0xd685ec85e0e60d84L,0x68037e301db7ee78L,0x5b65bdcd003c4d6eL, + 0x33e7363a93e29a6aL }, + { 0x995b3a6108d0756cL,0xd727f85c2faf134bL,0xfac6edf71d337823L, + 0x99b9aa500439b8b4L } }, + /* 43 << 224 */ + { { 0x722eb104e2b4e075L,0x49987295437c4926L,0xb1e4c0e446a9b82dL, + 0xd0cb319757a006f5L }, + { 0xf3de0f7dd7808c56L,0xb5c54d8f51f89772L,0x500a114aadbd31aaL, + 0x9afaaaa6295f6cabL } }, + /* 44 << 224 */ + { { 0x94705e2104cf667aL,0xfc2a811b9d3935d7L,0x560b02806d09267cL, + 0xf19ed119f780e53bL }, + { 0xf0227c09067b6269L,0x967b85335caef599L,0x155b924368efeebcL, + 0xcd6d34f5c497bae6L } }, + /* 45 << 224 */ + { { 0x1dd8d5d36cceb370L,0x2aeac579a78d7bf9L,0x5d65017d70b67a62L, + 0x70c8e44f17c53f67L }, + { 0xd1fc095086a34d09L,0xe0fca256e7134907L,0xe24fa29c80fdd315L, + 0x2c4acd03d87499adL } }, + /* 46 << 224 */ + { { 0xbaaf75173b5a9ba6L,0xb9cbe1f612e51a51L,0xd88edae35e154897L, + 0xe4309c3c77b66ca0L }, + { 0xf5555805f67f3746L,0x85fc37baa36401ffL,0xdf86e2cad9499a53L, + 0x6270b2a3ecbc955bL } }, + /* 47 << 224 */ + { { 0xafae64f5974ad33bL,0x04d85977fe7b2df1L,0x2a3db3ff4ab03f73L, + 0x0b87878a8702740aL }, + { 0x6d263f015a061732L,0xc25430cea32a1901L,0xf7ebab3ddb155018L, + 0x3a86f69363a9b78eL } }, + /* 48 << 224 */ + { { 0x349ae368da9f3804L,0x470f07fea164349cL,0xd52f4cc98562baa5L, + 0xc74a9e862b290df3L }, + { 0xd3a1aa3543471a24L,0x239446beb8194511L,0xbec2dd0081dcd44dL, + 0xca3d7f0fc42ac82dL } }, + /* 49 << 224 */ + { { 0x1f3db085fdaf4520L,0xbb6d3e804549daf2L,0xf5969d8a19ad5c42L, + 0x7052b13ddbfd1511L }, + { 0x11890d1b682b9060L,0xa71d3883ac34452cL,0xa438055b783805b4L, + 0x432412774725b23eL } }, + /* 50 << 224 */ + { { 0xf20cf96e4901bbedL,0x6419c710f432a2bbL,0x57a0fbb9dfa9cd7dL, + 0x589111e400daa249L }, + { 0x19809a337b60554eL,0xea5f8887ede283a4L,0x2d713802503bfd35L, + 0x151bb0af585d2a53L } }, + /* 51 << 224 */ + { { 0x40b08f7443b30ca8L,0xe10b5bbad9934583L,0xe8a546d6b51110adL, + 0x1dd50e6628e0b6c5L }, + { 0x292e9d54cff2b821L,0x3882555d47281760L,0x134838f83724d6e3L, + 0xf2c679e022ddcda1L } }, + /* 52 << 224 */ + { { 0x40ee88156d2a5768L,0x7f227bd21c1e7e2dL,0x487ba134d04ff443L, + 0x76e2ff3dc614e54bL }, + { 0x36b88d6fa3177ec7L,0xbf731d512328fff5L,0x758caea249ba158eL, + 0x5ab8ff4c02938188L } }, + /* 53 << 224 */ + { { 0x33e1605635edc56dL,0x5a69d3497e940d79L,0x6c4fd00103866dcbL, + 0x20a38f574893cdefL }, + { 0xfbf3e790fac3a15bL,0x6ed7ea2e7a4f8e6bL,0xa663eb4fbc3aca86L, + 0x22061ea5080d53f7L } }, + /* 54 << 224 */ + { { 0x2480dfe6f546783fL,0xd38bc6da5a0a641eL,0xfb093cd12ede8965L, + 0x89654db4acb455cfL }, + { 0x413cbf9a26e1adeeL,0x291f3764373294d4L,0x00797257648083feL, + 0x25f504d3208cc341L } }, + /* 55 << 224 */ + { { 0x635a8e5ec3a0ee43L,0x70aaebca679898ffL,0x9ee9f5475dc63d56L, + 0xce987966ffb34d00L }, + { 0xf9f86b195e26310aL,0x9e435484382a8ca8L,0x253bcb81c2352fe4L, + 0xa4eac8b04474b571L } }, + /* 56 << 224 */ + { { 0xc1b97512c1ad8cf8L,0x193b4e9e99e0b697L,0x939d271601e85df0L, + 0x4fb265b3cd44eafdL }, + { 0x321e7dcde51e1ae2L,0x8e3a8ca6e3d8b096L,0x8de46cb052604998L, + 0x91099ad839072aa7L } }, + /* 57 << 224 */ + { { 0x2617f91c93aa96b8L,0x0fc8716b7fca2e13L,0xa7106f5e95328723L, + 0xd1c9c40b262e6522L }, + { 0xb9bafe8642b7c094L,0x1873439d1543c021L,0xe1baa5de5cbefd5dL, + 0xa363fc5e521e8affL } }, + /* 58 << 224 */ + { { 0xefe6320df862eaacL,0x14419c6322c647dcL,0x0e06707c4e46d428L, + 0xcb6c834f4a178f8fL }, + { 0x0f993a45d30f917cL,0xd4c4b0499879afeeL,0xb6142a1e70500063L, + 0x7c9b41c3a5d9d605L } }, + /* 59 << 224 */ + { { 0xbc00fc2f2f8ba2c7L,0x0966eb2f7c67aa28L,0x13f7b5165a786972L, + 0x3bfb75578a2fbba0L }, + { 0x131c4f235a2b9620L,0xbff3ed276faf46beL,0x9b4473d17e172323L, + 0x421e8878339f6246L } }, + /* 60 << 224 */ + { { 0x0fa8587a25a41632L,0xc0814124a35b6c93L,0x2b18a9f559ebb8dbL, + 0x264e335776edb29cL }, + { 0xaf245ccdc87c51e2L,0x16b3015b501e6214L,0xbb31c5600a3882ceL, + 0x6961bb94fec11e04L } }, + /* 61 << 224 */ + { { 0x3b825b8deff7a3a0L,0xbec33738b1df7326L,0x68ad747c99604a1fL, + 0xd154c9349a3bd499L }, + { 0xac33506f1cc7a906L,0x73bb53926c560e8fL,0x6428fcbe263e3944L, + 0xc11828d51c387434L } }, + /* 62 << 224 */ + { { 0x3cd04be13e4b12ffL,0xc3aad9f92d88667cL,0xc52ddcf8248120cfL, + 0x985a892e2a389532L }, + { 0xfbb4b21b3bb85fa0L,0xf95375e08dfc6269L,0xfb4fb06c7ee2aceaL, + 0x6785426e309c4d1fL } }, + /* 63 << 224 */ + { { 0x659b17c8d8ceb147L,0x9b649eeeb70a5554L,0x6b7fa0b5ac6bc634L, + 0xd99fe2c71d6e732fL }, + { 0x30e6e7628d3abba2L,0x18fee6e7a797b799L,0x5c9d360dc696464dL, + 0xe3baeb4827bfde12L } }, + /* 64 << 224 */ + { { 0x2bf5db47f23206d5L,0x2f6d34201d260152L,0x17b876533f8ff89aL, + 0x5157c30c378fa458L }, + { 0x7517c5c52d4fb936L,0xef22f7ace6518cdcL,0xdeb483e6bf847a64L, + 0xf508455892e0fa89L } }, + /* 0 << 231 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 231 */ + { { 0xab9659d8df7304d4L,0xb71bcf1bff210e8eL,0xa9a2438bd73fbd60L, + 0x4595cd1f5d11b4deL }, + { 0x9c0d329a4835859dL,0x4a0f0d2d7dbb6e56L,0xc6038e5edf928a4eL, + 0xc94296218f5ad154L } }, + /* 2 << 231 */ + { { 0x91213462f23f2d92L,0x6cab71bd60b94078L,0x6bdd0a63176cde20L, + 0x54c9b20cee4d54bcL }, + { 0x3cd2d8aa9f2ac02fL,0x03f8e617206eedb0L,0xc7f68e1693086434L, + 0x831469c592dd3db9L } }, + /* 3 << 231 */ + { { 0x8521df248f981354L,0x587e23ec3588a259L,0xcbedf281d7a0992cL, + 0x06930a5538961407L }, + { 0x09320debbe5bbe21L,0xa7ffa5b52491817fL,0xe6c8b4d909065160L, + 0xac4f3992fff6d2a9L } }, + /* 4 << 231 */ + { { 0x7aa7a1583ae9c1bdL,0xe0af6d98e37ce240L,0xe54342d928ab38b4L, + 0xe8b750070a1c98caL }, + { 0xefce86afe02358f2L,0x31b8b856ea921228L,0x052a19120a1c67fcL, + 0xb4069ea4e3aead59L } }, + /* 5 << 231 */ + { { 0x3232d6e27fa03cb3L,0xdb938e5b0fdd7d88L,0x04c1d2cd2ccbfc5dL, + 0xd2f45c12af3a580fL }, + { 0x592620b57883e614L,0x5fd27e68be7c5f26L,0x139e45a91567e1e3L, + 0x2cc71d2d44d8aaafL } }, + /* 6 << 231 */ + { { 0x4a9090cde36d0757L,0xf722d7b1d9a29382L,0xfb7fb04c04b48ddfL, + 0x628ad2a7ebe16f43L }, + { 0xcd3fbfb520226040L,0x6c34ecb15104b6c4L,0x30c0754ec903c188L, + 0xec336b082d23cab0L } }, + /* 7 << 231 */ + { { 0x473d62a21e206ee5L,0xf1e274808c49a633L,0x87ab956ce9f6b2c3L, + 0x61830b4862b606eaL }, + { 0x67cd6846e78e815fL,0xfe40139f4c02082aL,0x52bbbfcb952ec365L, + 0x74c116426b9836abL } }, + /* 8 << 231 */ + { { 0x9f51439e558df019L,0x230da4baac712b27L,0x518919e355185a24L, + 0x4dcefcdd84b78f50L }, + { 0xa7d90fb2a47d4c5aL,0x55ac9abfb30e009eL,0xfd2fc35974eed273L, + 0xb72d824cdbea8fafL } }, + /* 9 << 231 */ + { { 0xce721a744513e2caL,0x0b41861238240b2cL,0x05199968d5baa450L, + 0xeb1757ed2b0e8c25L }, + { 0x6ebc3e283dfac6d5L,0xb2431e2e48a237f5L,0x2acb5e2352f61499L, + 0x5558a2a7e06c936bL } }, + /* 10 << 231 */ + { { 0xd213f923cbb13d1bL,0x98799f425bfb9bfeL,0x1ae8ddc9701144a9L, + 0x0b8b3bb64c5595eeL }, + { 0x0ea9ef2e3ecebb21L,0x17cb6c4b3671f9a7L,0x47ef464f726f1d1fL, + 0x171b94846943a276L } }, + /* 11 << 231 */ + { { 0x51a4ae2d7ef0329cL,0x0850922291c4402aL,0x64a61d35afd45bbcL, + 0x38f096fe3035a851L }, + { 0xc7468b74a1dec027L,0xe8cf10e74fc7dcbaL,0xea35ff40f4a06353L, + 0x0b4c0dfa8b77dd66L } }, + /* 12 << 231 */ + { { 0x779b8552de7e5c19L,0xfab28609c1c0256cL,0x64f58eeeabd4743dL, + 0x4e8ef8387b6cc93bL }, + { 0xee650d264cb1bf3dL,0x4c1f9d0973dedf61L,0xaef7c9d7bfb70cedL, + 0x1ec0507e1641de1eL } }, + /* 13 << 231 */ + { { 0xcd7e5cc7cde45079L,0xde173c9a516ac9e4L,0x517a8494c170315cL, + 0x438fd90591d8e8fbL }, + { 0x5145c506c7d9630bL,0x6457a87bf47d4d75L,0xd31646bf0d9a80e8L, + 0x453add2bcef3aabeL } }, + /* 14 << 231 */ + { { 0xc9941109a607419dL,0xfaa71e62bb6bca80L,0x34158c1307c431f3L, + 0x594abebc992bc47aL }, + { 0x6dfea691eb78399fL,0x48aafb353f42cba4L,0xedcd65af077c04f0L, + 0x1a29a366e884491aL } }, + /* 15 << 231 */ + { { 0x023a40e51c21f2bfL,0xf99a513ca5057aeeL,0xa3fe7e25bcab072eL, + 0x8568d2e140e32bcfL }, + { 0x904594ebd3f69d9fL,0x181a973307affab1L,0xe4d68d76b6e330f4L, + 0x87a6dafbc75a7fc1L } }, + /* 16 << 231 */ + { { 0x549db2b5ef7d9289L,0x2480d4a8197f015aL,0x61d5590bc40493b6L, + 0x3a55b52e6f780331L }, + { 0x40eb8115309eadb0L,0xdea7de5a92e5c625L,0x64d631f0cc6a3d5aL, + 0x9d5e9d7c93e8dd61L } }, + /* 17 << 231 */ + { { 0xf297bef5206d3ffcL,0x23d5e0337d808bd4L,0x4a4f6912d24cf5baL, + 0xe4d8163b09cdaa8aL }, + { 0x0e0de9efd3082e8eL,0x4fe1246c0192f360L,0x1f9001504b8eee0aL, + 0x5219da81f1da391bL } }, + /* 18 << 231 */ + { { 0x7bf6a5c1f7ea25aaL,0xd165e6bffbb07d5fL,0xe353936189e78671L, + 0xa3fcac892bac4219L }, + { 0xdfab6fd4f0baa8abL,0x5a4adac1e2c1c2e5L,0x6cd75e3140d85849L, + 0xce263fea19b39181L } }, + /* 19 << 231 */ + { { 0xcb6803d307032c72L,0x7f40d5ce790968c8L,0xa6de86bddce978f0L, + 0x25547c4f368f751cL }, + { 0xb1e685fd65fb2a9eL,0xce69336f1eb9179cL,0xb15d1c2712504442L, + 0xb7df465cb911a06bL } }, + /* 20 << 231 */ + { { 0xb8d804a3315980cdL,0x693bc492fa3bebf7L,0x3578aeee2253c504L, + 0x158de498cd2474a2L }, + { 0x1331f5c7cfda8368L,0xd2d7bbb378d7177eL,0xdf61133af3c1e46eL, + 0x5836ce7dd30e7be8L } }, + /* 21 << 231 */ + { { 0x83084f1994f834cbL,0xd35653d4429ed782L,0xa542f16f59e58243L, + 0xc2b52f650470a22dL }, + { 0xe3b6221b18f23d96L,0xcb05abac3f5252b4L,0xca00938b87d61402L, + 0x2f186cdd411933e4L } }, + /* 22 << 231 */ + { { 0xe042ece59a29a5c5L,0xb19b3c073b6c8402L,0xc97667c719d92684L, + 0xb5624622ebc66372L }, + { 0x0cb96e653c04fa02L,0x83a7176c8eaa39aaL,0x2033561deaa1633fL, + 0x45a9d0864533df73L } }, + /* 23 << 231 */ + { { 0xe0542c1d3dc090bcL,0x82c996efaa59c167L,0xe3f735e80ee7fc4dL, + 0x7b1793937c35db79L }, + { 0xb6419e25f8c5dbfdL,0x4d9d7a1e1f327b04L,0x979f6f9b298dfca8L, + 0xc7c5dff18de9366aL } }, + /* 24 << 231 */ + { { 0x1b7a588d04c82bddL,0x68005534f8319dfdL,0xde8a55b5d8eb9580L, + 0x5ea886da8d5bca81L }, + { 0xe8530a01252a0b4dL,0x1bffb4fe35eaa0a1L,0x2ad828b1d8e99563L, + 0x7de96ef595f9cd87L } }, + /* 25 << 231 */ + { { 0x4abb2d0cd77d970cL,0x03cfb933d33ef9cbL,0xb0547c018b211fe9L, + 0x2fe64809a56ed1c6L }, + { 0xcb7d5624c2ac98ccL,0x2a1372c01a393e33L,0xc8d1ec1c29660521L, + 0xf3d31b04b37ac3e9L } }, + /* 26 << 231 */ + { { 0xa29ae9df5ece6e7cL,0x0603ac8f0facfb55L,0xcfe85b7adda233a5L, + 0xe618919fbd75f0b8L }, + { 0xf555a3d299bf1603L,0x1f43afc9f184255aL,0xdcdaf341319a3e02L, + 0xd3b117ef03903a39L } }, + /* 27 << 231 */ + { { 0xe095da1365d1d131L,0x86f16367c37ad03eL,0x5f37389e462cd8ddL, + 0xc103fa04d67a60e6L }, + { 0x57c34344f4b478f0L,0xce91edd8e117c98dL,0x001777b0231fc12eL, + 0x11ae47f2b207bccbL } }, + /* 28 << 231 */ + { { 0xd983cf8d20f8a242L,0x7aff5b1df22e1ad8L,0x68fd11d07fc4feb3L, + 0x5d53ae90b0f1c3e1L }, + { 0x50fb7905ec041803L,0x85e3c97714404888L,0x0e67faedac628d8fL, + 0x2e8651506668532cL } }, + /* 29 << 231 */ + { { 0x15acaaa46a67a6b0L,0xf4cdee25b25cec41L,0x49ee565ae4c6701eL, + 0x2a04ca66fc7d63d8L }, + { 0xeb105018ef0543fbL,0xf709a4f5d1b0d81dL,0x5b906ee62915d333L, + 0xf4a8741296f1f0abL } }, + /* 30 << 231 */ + { { 0xb6b82fa74d82f4c2L,0x90725a606804efb3L,0xbc82ec46adc3425eL, + 0xb7b805812787843eL }, + { 0xdf46d91cdd1fc74cL,0xdc1c62cbe783a6c4L,0x59d1b9f31a04cbbaL, + 0xd87f6f7295e40764L } }, + /* 31 << 231 */ + { { 0x02b4cfc1317f4a76L,0x8d2703eb91036bceL,0x98206cc6a5e72a56L, + 0x57be9ed1cf53fb0fL }, + { 0x09374571ef0b17acL,0x74b2655ed9181b38L,0xc8f80ea889935d0eL, + 0xc0d9e94291529936L } }, + /* 32 << 231 */ + { { 0x196860411e84e0e5L,0xa5db84d3aea34c93L,0xf9d5bb197073a732L, + 0xb8d2fe566bcfd7c0L }, + { 0x45775f36f3eb82faL,0x8cb20cccfdff8b58L,0x1659b65f8374c110L, + 0xb8b4a422330c789aL } }, + /* 33 << 231 */ + { { 0x75e3c3ea6fe8208bL,0xbd74b9e4286e78feL,0x0be2e81bd7d93a1aL, + 0x7ed06e27dd0a5aaeL }, + { 0x721f5a586be8b800L,0x428299d1d846db28L,0x95cb8e6b5be88ed3L, + 0xc3186b231c034e11L } }, + /* 34 << 231 */ + { { 0xa6312c9e8977d99bL,0xbe94433183f531e7L,0x8232c0c218d3b1d4L, + 0x617aae8be1247b73L }, + { 0x40153fc4282aec3bL,0xc6063d2ff7b8f823L,0x68f10e583304f94cL, + 0x31efae74ee676346L } }, + /* 35 << 231 */ + { { 0xbadb6c6d40a9b97cL,0x14702c634f666256L,0xdeb954f15184b2e3L, + 0x5184a52694b6ca40L }, + { 0xfff05337003c32eaL,0x5aa374dd205974c7L,0x9a7638544b0dd71aL, + 0x459cd27fdeb947ecL } }, + /* 36 << 231 */ + { { 0xa6e28161459c2b92L,0x2f020fa875ee8ef5L,0xb132ec2d30b06310L, + 0xc3e15899bc6a4530L }, + { 0xdc5f53feaa3f451aL,0x3a3c7f23c2d9acacL,0x2ec2f8926b27e58bL, + 0x68466ee7d742799fL } }, + /* 37 << 231 */ + { { 0x98324dd41fa26613L,0xa2dc6dabbdc29d63L,0xf9675faad712d657L, + 0x813994be21fd8d15L }, + { 0x5ccbb722fd4f7553L,0x5135ff8bf3a36b20L,0x44be28af69559df5L, + 0x40b65bed9d41bf30L } }, + /* 38 << 231 */ + { { 0xd98bf2a43734e520L,0x5e3abbe3209bdcbaL,0x77c76553bc945b35L, + 0x5331c093c6ef14aaL }, + { 0x518ffe2976b60c80L,0x2285593b7ace16f8L,0xab1f64ccbe2b9784L, + 0xe8f2c0d9ab2421b6L } }, + /* 39 << 231 */ + { { 0x617d7174c1df065cL,0xafeeb5ab5f6578faL,0x16ff1329263b54a8L, + 0x45c55808c990dce3L }, + { 0x42eab6c0ecc8c177L,0x799ea9b55982ecaaL,0xf65da244b607ef8eL, + 0x8ab226ce32a3fc2cL } }, + /* 40 << 231 */ + { { 0x745741e57ea973dcL,0x5c00ca7020888f2eL,0x7cdce3cf45fd9cf1L, + 0x8a741ef15507f872L }, + { 0x47c51c2f196b4cecL,0x70d08e43c97ea618L,0x930da15c15b18a2bL, + 0x33b6c6782f610514L } }, + /* 41 << 231 */ + { { 0xc662e4f807ac9794L,0x1eccf050ba06cb79L,0x1ff08623e7d954e5L, + 0x6ef2c5fb24cf71c3L }, + { 0xb2c063d267978453L,0xa0cf37961d654af8L,0x7cb242ea7ebdaa37L, + 0x206e0b10b86747e0L } }, + /* 42 << 231 */ + { { 0x481dae5fd5ecfefcL,0x07084fd8c2bff8fcL,0x8040a01aea324596L, + 0x4c646980d4de4036L }, + { 0x9eb8ab4ed65abfc3L,0xe01cb91f13541ec7L,0x8f029adbfd695012L, + 0x9ae284833c7569ecL } }, + /* 43 << 231 */ + { { 0xa5614c9ea66d80a1L,0x680a3e4475f5f911L,0x0c07b14dceba4fc1L, + 0x891c285ba13071c1L }, + { 0xcac67ceb799ece3cL,0x29b910a941e07e27L,0x66bdb409f2e43123L, + 0x06f8b1377ac9ecbeL } }, + /* 44 << 231 */ + { { 0x5981fafd38547090L,0x19ab8b9f85e3415dL,0xfc28c194c7e31b27L, + 0x843be0aa6fbcbb42L }, + { 0xf3b1ed43a6db836cL,0x2a1330e401a45c05L,0x4f19f3c595c1a377L, + 0xa85f39d044b5ee33L } }, + /* 45 << 231 */ + { { 0x3da18e6d4ae52834L,0x5a403b397423dcb0L,0xbb555e0af2374aefL, + 0x2ad599c41e8ca111L }, + { 0x1b3a2fb9014b3bf8L,0x73092684f66d5007L,0x079f1426c4340102L, + 0x1827cf818fddf4deL } }, + /* 46 << 231 */ + { { 0xc83605f6f10ff927L,0xd387145123739fc6L,0x6d163450cac1c2ccL, + 0x6b521296a2ec1ac5L }, + { 0x0606c4f96e3cb4a5L,0xe47d3f41778abff7L,0x425a8d5ebe8e3a45L, + 0x53ea9e97a6102160L } }, + /* 47 << 231 */ + { { 0x477a106e39cbb688L,0x532401d2f3386d32L,0x8e564f64b1b9b421L, + 0xca9b838881dad33fL }, + { 0xb1422b4e2093913eL,0x533d2f9269bc8112L,0x3fa017beebe7b2c7L, + 0xb2767c4acaf197c6L } }, + /* 48 << 231 */ + { { 0xc925ff87aedbae9fL,0x7daf0eb936880a54L,0x9284ddf59c4d0e71L, + 0x1581cf93316f8cf5L }, + { 0x3eeca8873ac1f452L,0xb417fce9fb6aeffeL,0xa5918046eefb8dc3L, + 0x73d318ac02209400L } }, + /* 49 << 231 */ + { { 0xe800400f728693e5L,0xe87d814b339927edL,0x93e94d3b57ea9910L, + 0xff8a35b62245fb69L }, + { 0x043853d77f200d34L,0x470f1e680f653ce1L,0x81ac05bd59a06379L, + 0xa14052c203930c29L } }, + /* 50 << 231 */ + { { 0x6b72fab526bc2797L,0x13670d1699f16771L,0x001700521e3e48d1L, + 0x978fe401b7adf678L }, + { 0x55ecfb92d41c5dd4L,0x5ff8e247c7b27da5L,0xe7518272013fb606L, + 0x5768d7e52f547a3cL } }, + /* 51 << 231 */ + { { 0xbb24eaa360017a5fL,0x6b18e6e49c64ce9bL,0xc225c655103dde07L, + 0xfc3672ae7592f7eaL }, + { 0x9606ad77d06283a1L,0x542fc650e4d59d99L,0xabb57c492a40e7c2L, + 0xac948f13a8db9f55L } }, + /* 52 << 231 */ + { { 0x6d4c9682b04465c3L,0xe3d062fa6468bd15L,0xa51729ac5f318d7eL, + 0x1fc87df69eb6fc95L }, + { 0x63d146a80591f652L,0xa861b8f7589621aaL,0x59f5f15ace31348cL, + 0x8f663391440da6daL } }, + /* 53 << 231 */ + { { 0xcfa778acb591ffa3L,0x027ca9c54cdfebceL,0xbe8e05a5444ea6b3L, + 0x8aab4e69a78d8254L }, + { 0x2437f04fb474d6b8L,0x6597ffd4045b3855L,0xbb0aea4eca47ecaaL, + 0x568aae8385c7ebfcL } }, + /* 54 << 231 */ + { { 0x0e966e64c73b2383L,0x49eb3447d17d8762L,0xde1078218da05dabL, + 0x443d8baa016b7236L }, + { 0x163b63a5ea7610d6L,0xe47e4185ce1ca979L,0xae648b6580baa132L, + 0xebf53de20e0d5b64L } }, + /* 55 << 231 */ + { { 0x8d3bfcb4d3c8c1caL,0x0d914ef35d04b309L,0x55ef64153de7d395L, + 0xbde1666f26b850e8L }, + { 0xdbe1ca6ed449ab19L,0x8902b322e89a2672L,0xb1674b7edacb7a53L, + 0x8e9faf6ef52523ffL } }, + /* 56 << 231 */ + { { 0x6ba535da9a85788bL,0xd21f03aebd0626d4L,0x099f8c47e873dc64L, + 0xcda8564d018ec97eL }, + { 0x3e8d7a5cde92c68cL,0x78e035a173323cc4L,0x3ef26275f880ff7cL, + 0xa4ee3dff273eedaaL } }, + /* 57 << 231 */ + { { 0x58823507af4e18f8L,0x967ec9b50672f328L,0x9ded19d9559d3186L, + 0x5e2ab3de6cdce39cL }, + { 0xabad6e4d11c226dfL,0xf9783f4387723014L,0x9a49a0cf1a885719L, + 0xfc0c1a5a90da9dbfL } }, + /* 58 << 231 */ + { { 0x8bbaec49571d92acL,0x569e85fe4692517fL,0x8333b014a14ea4afL, + 0x32f2a62f12e5c5adL }, + { 0x98c2ce3a06d89b85L,0xb90741aa2ff77a08L,0x2530defc01f795a2L, + 0xd6e5ba0b84b3c199L } }, + /* 59 << 231 */ + { { 0x7d8e845112e4c936L,0xae419f7dbd0be17bL,0xa583fc8c22262bc9L, + 0x6b842ac791bfe2bdL }, + { 0x33cef4e9440d6827L,0x5f69f4deef81fb14L,0xf16cf6f6234fbb92L, + 0x76ae3fc3d9e7e158L } }, + /* 60 << 231 */ + { { 0x4e89f6c2e9740b33L,0x677bc85d4962d6a1L,0x6c6d8a7f68d10d15L, + 0x5f9a72240257b1cdL }, + { 0x7096b9164ad85961L,0x5f8c47f7e657ab4aL,0xde57d7d0f7461d7eL, + 0x7eb6094d80ce5ee2L } }, + /* 61 << 231 */ + { { 0x0b1e1dfd34190547L,0x8a394f43f05dd150L,0x0a9eb24d97df44e6L, + 0x78ca06bf87675719L }, + { 0x6f0b34626ffeec22L,0x9d91bcea36cdd8fbL,0xac83363ca105be47L, + 0x81ba76c1069710e3L } }, + /* 62 << 231 */ + { { 0x3d1b24cb28c682c6L,0x27f252288612575bL,0xb587c779e8e66e98L, + 0x7b0c03e9405eb1feL }, + { 0xfdf0d03015b548e7L,0xa8be76e038b36af7L,0x4cdab04a4f310c40L, + 0x6287223ef47ecaecL } }, + /* 63 << 231 */ + { { 0x678e60558b399320L,0x61fe3fa6c01e4646L,0xc482866b03261a5eL, + 0xdfcf45b85c2f244aL }, + { 0x8fab9a512f684b43L,0xf796c654c7220a66L,0x1d90707ef5afa58fL, + 0x2c421d974fdbe0deL } }, + /* 64 << 231 */ + { { 0xc4f4cda3af2ebc2fL,0xa0af843dcb4efe24L,0x53b857c19ccd10b1L, + 0xddc9d1eb914d3e04L }, + { 0x7bdec8bb62771debL,0x829277aa91c5aa81L,0x7af18dd6832391aeL, + 0x1740f316c71a84caL } }, + /* 0 << 238 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 238 */ + { { 0x8928e99aeeaf8c49L,0xee7aa73d6e24d728L,0x4c5007c2e72b156cL, + 0x5fcf57c5ed408a1dL }, + { 0x9f719e39b6057604L,0x7d343c01c2868bbfL,0x2cca254b7e103e2dL, + 0xe6eb38a9f131bea2L } }, + /* 2 << 238 */ + { { 0xb33e624f8be762b4L,0x2a9ee4d1058e3413L,0x968e636967d805faL, + 0x9848949b7db8bfd7L }, + { 0x5308d7e5d23a8417L,0x892f3b1df3e29da5L,0xc95c139e3dee471fL, + 0x8631594dd757e089L } }, + /* 3 << 238 */ + { { 0xe0c82a3cde918dccL,0x2e7b599426fdcf4bL,0x82c5024932cb1b2dL, + 0xea613a9d7657ae07L }, + { 0xc2eb5f6cf1fdc9f7L,0xb6eae8b8879fe682L,0x253dfee0591cbc7fL, + 0x000da7133e1290e6L } }, + /* 4 << 238 */ + { { 0x1083e2ea1f095615L,0x0a28ad7714e68c33L,0x6bfc02523d8818beL, + 0xb585113af35850cdL }, + { 0x7d935f0b30df8aa1L,0xaddda07c4ab7e3acL,0x92c34299552f00cbL, + 0xc33ed1de2909df6cL } }, + /* 5 << 238 */ + { { 0x22c2195d80e87766L,0x9e99e6d89ddf4ac0L,0x09642e4e65e74934L, + 0x2610ffa2ff1ff241L }, + { 0x4d1d47d4751c8159L,0x697b4985af3a9363L,0x0318ca4687477c33L, + 0xa90cb5659441eff3L } }, + /* 6 << 238 */ + { { 0x58bb384836f024cbL,0x85be1f7736016168L,0x6c59587cdc7e07f1L, + 0x191be071af1d8f02L }, + { 0xbf169fa5cca5e55cL,0x3864ba3cf7d04eacL,0x915e367f8d7d05dbL, + 0xb48a876da6549e5dL } }, + /* 7 << 238 */ + { { 0xef89c656580e40a2L,0xf194ed8c728068bcL,0x74528045a47990c9L, + 0xf53fc7d75e1a4649L }, + { 0xbec5ae9b78593e7dL,0x2cac4ee341db65d7L,0xa8c1eb2404a3d39bL, + 0x53b7d63403f8f3efL } }, + /* 8 << 238 */ + { { 0x2dc40d483e07113cL,0x6e4a5d397d8b63aeL,0x5582a94b79684c2bL, + 0x932b33d4622da26cL }, + { 0xf534f6510dbbf08dL,0x211d07c964c23a52L,0x0eeece0fee5bdc9bL, + 0xdf178168f7015558L } }, + /* 9 << 238 */ + { { 0xd42946350a712229L,0x93cbe44809273f8cL,0x00b095ef8f13bc83L, + 0xbb7419728798978cL }, + { 0x9d7309a256dbe6e7L,0xe578ec565a5d39ecL,0x3961151b851f9a31L, + 0x2da7715de5709eb4L } }, + /* 10 << 238 */ + { { 0x867f301753dfabf0L,0x728d2078b8e39259L,0x5c75a0cd815d9958L, + 0xf84867a616603be1L }, + { 0xc865b13d70e35b1cL,0x0241446819b03e2cL,0xe46041daac1f3121L, + 0x7c9017ad6f028a7cL } }, + /* 11 << 238 */ + { { 0xabc96de90a482873L,0x4265d6b1b77e54d4L,0x68c38e79a57d88e7L, + 0xd461d7669ce82de3L }, + { 0x817a9ec564a7e489L,0xcc5675cda0def5f2L,0x9a00e785985d494eL, + 0xc626833f1b03514aL } }, + /* 12 << 238 */ + { { 0xabe7905a83cdd60eL,0x50602fb5a1170184L,0x689886cdb023642aL, + 0xd568d090a6e1fb00L }, + { 0x5b1922c70259217fL,0x93831cd9c43141e4L,0xdfca35870c95f86eL, + 0xdec2057a568ae828L } }, + /* 13 << 238 */ + { { 0xc44ea599f98a759aL,0x55a0a7a2f7c23c1dL,0xd5ffb6e694c4f687L, + 0x3563cce212848478L }, + { 0x812b3517e7b1fbe1L,0x8a7dc9794f7338e0L,0x211ecee952d048dbL, + 0x2eea4056c86ea3b8L } }, + /* 14 << 238 */ + { { 0xd8cb68a7ba772b34L,0xe16ed3415f4e2541L,0x9b32f6a60fec14dbL, + 0xeee376f7391698beL }, + { 0xe9a7aa1783674c02L,0x65832f975843022aL,0x29f3a8da5ba4990fL, + 0x79a59c3afb8e3216L } }, + /* 15 << 238 */ + { { 0x9cdc4d2ebd19bb16L,0xc6c7cfd0b3262d86L,0xd4ce14d0969c0b47L, + 0x1fa352b713e56128L }, + { 0x383d55b8973db6d3L,0x71836850e8e5b7bfL,0xc7714596e6bb571fL, + 0x259df31f2d5b2dd2L } }, + /* 16 << 238 */ + { { 0x568f8925913cc16dL,0x18bc5b6de1a26f5aL,0xdfa413bef5f499aeL, + 0xf8835decc3f0ae84L }, + { 0xb6e60bd865a40ab0L,0x65596439194b377eL,0xbcd8562592084a69L, + 0x5ce433b94f23ede0L } }, + /* 17 << 238 */ + { { 0xe8e8f04f6ad65143L,0x11511827d6e14af6L,0x3d390a108295c0c7L, + 0x71e29ee4621eba16L }, + { 0xa588fc0963717b46L,0x02be02fee06ad4a2L,0x931558c604c22b22L, + 0xbb4d4bd612f3c849L } }, + /* 18 << 238 */ + { { 0x54a4f49620efd662L,0x92ba6d20c5952d14L,0x2db8ea1ecc9784c2L, + 0x81cc10ca4b353644L }, + { 0x40b570ad4b4d7f6cL,0x5c9f1d9684a1dcd2L,0x01379f813147e797L, + 0xe5c6097b2bd499f5L } }, + /* 19 << 238 */ + { { 0x40dcafa6328e5e20L,0xf7b5244a54815550L,0xb9a4f11847bfc978L, + 0x0ea0e79fd25825b1L }, + { 0xa50f96eb646c7ecfL,0xeb811493446dea9dL,0x2af04677dfabcf69L, + 0xbe3a068fc713f6e8L } }, + /* 20 << 238 */ + { { 0x860d523d42e06189L,0xbf0779414e3aff13L,0x0b616dcac1b20650L, + 0xe66dd6d12131300dL }, + { 0xd4a0fd67ff99abdeL,0xc9903550c7aac50dL,0x022ecf8b7c46b2d7L, + 0x3333b1e83abf92afL } }, + /* 21 << 238 */ + { { 0x11cc113c6c491c14L,0x0597668880dd3f88L,0xf5b4d9e729d932edL, + 0xe982aad8a2c38b6dL }, + { 0x6f9253478be0dcf0L,0x700080ae65ca53f2L,0xd8131156443ca77fL, + 0xe92d6942ec51f984L } }, + /* 22 << 238 */ + { { 0xd2a08af885dfe9aeL,0xd825d9a54d2a86caL,0x2c53988d39dff020L, + 0xf38b135a430cdc40L }, + { 0x0c918ae062a7150bL,0xf31fd8de0c340e9bL,0xafa0e7ae4dbbf02eL, + 0x5847fb2a5eba6239L } }, + /* 23 << 238 */ + { { 0x6b1647dcdccbac8bL,0xb642aa7806f485c8L,0x873f37657038ecdfL, + 0x2ce5e865fa49d3feL }, + { 0xea223788c98c4400L,0x8104a8cdf1fa5279L,0xbcf7cc7a06becfd7L, + 0x49424316c8f974aeL } }, + /* 24 << 238 */ + { { 0xc0da65e784d6365dL,0xbcb7443f8f759fb8L,0x35c712b17ae81930L, + 0x80428dff4c6e08abL }, + { 0xf19dafefa4faf843L,0xced8538dffa9855fL,0x20ac409cbe3ac7ceL, + 0x358c1fb6882da71eL } }, + /* 25 << 238 */ + { { 0xafa9c0e5fd349961L,0x2b2cfa518421c2fcL,0x2a80db17f3a28d38L, + 0xa8aba5395d138e7eL }, + { 0x52012d1d6e96eb8dL,0x65d8dea0cbaf9622L,0x57735447b264f56cL, + 0xbeebef3f1b6c8da2L } }, + /* 26 << 238 */ + { { 0xfc346d98ce785254L,0xd50e8d72bb64a161L,0xc03567c749794addL, + 0x15a76065752c7ef6L }, + { 0x59f3a222961f23d6L,0x378e443873ecc0b0L,0xc74be4345a82fde4L, + 0xae509af2d8b9cf34L } }, + /* 27 << 238 */ + { { 0x4a61ee46577f44a1L,0xe09b748cb611deebL,0xc0481b2cf5f7b884L, + 0x3562667861acfa6bL }, + { 0x37f4c518bf8d21e6L,0x22d96531b205a76dL,0x37fb85e1954073c0L, + 0xbceafe4f65b3a567L } }, + /* 28 << 238 */ + { { 0xefecdef7be42a582L,0xd3fc608065046be6L,0xc9af13c809e8dba9L, + 0x1e6c9847641491ffL }, + { 0x3b574925d30c31f7L,0xb7eb72baac2a2122L,0x776a0dacef0859e7L, + 0x06fec31421900942L } }, + /* 29 << 238 */ + { { 0x2464bc10f8c22049L,0x9bfbcce7875ebf69L,0xd7a88e2a4336326bL, + 0xda05261c5bc2acfaL }, + { 0xc29f5bdceba7efc8L,0x471237ca25dbbf2eL,0xa72773f22975f127L, + 0xdc744e8e04d0b326L } }, + /* 30 << 238 */ + { { 0x38a7ed16a56edb73L,0x64357e372c007e70L,0xa167d15b5080b400L, + 0x07b4116423de4be1L }, + { 0xb2d91e3274c89883L,0x3c1628212882e7edL,0xad6b36ba7503e482L, + 0x48434e8e0ea34331L } }, + /* 31 << 238 */ + { { 0x79f4f24f2c7ae0b9L,0xc46fbf811939b44aL,0x76fefae856595eb1L, + 0x417b66abcd5f29c7L }, + { 0x5f2332b2c5ceec20L,0xd69661ffe1a1cae2L,0x5ede7e529b0286e6L, + 0x9d062529e276b993L } }, + /* 32 << 238 */ + { { 0x324794b07e50122bL,0xdd744f8b4af07ca5L,0x30a12f08d63fc97bL, + 0x39650f1a76626d9dL }, + { 0x101b47f71fa38477L,0x3d815f19d4dc124fL,0x1569ae95b26eb58aL, + 0xc3cde18895fb1887L } }, + /* 33 << 238 */ + { { 0x54e9f37bf9539a48L,0xb0100e067408c1a5L,0x821d9811ea580cbbL, + 0x8af52d3586e50c56L }, + { 0xdfbd9d47dbbf698bL,0x2961a1ea03dc1c73L,0x203d38f8e76a5df8L, + 0x08a53a686def707aL } }, + /* 34 << 238 */ + { { 0x26eefb481bee45d4L,0xb3cee3463c688036L,0x463c5315c42f2469L, + 0x19d84d2e81378162L }, + { 0x22d7c3c51c4d349fL,0x65965844163d59c5L,0xcf198c56b8abceaeL, + 0x6fb1fb1b628559d5L } }, + /* 35 << 238 */ + { { 0x8bbffd0607bf8fe3L,0x46259c583467734bL,0xd8953cea35f7f0d3L, + 0x1f0bece2d65b0ff1L }, + { 0xf7d5b4b3f3c72914L,0x29e8ea953cb53389L,0x4a365626836b6d46L, + 0xe849f910ea174fdeL } }, + /* 36 << 238 */ + { { 0x7ec62fbbf4737f21L,0xd8dba5ab6209f5acL,0x24b5d7a9a5f9adbeL, + 0x707d28f7a61dc768L }, + { 0x7711460bcaa999eaL,0xba7b174d1c92e4ccL,0x3c4bab6618d4bf2dL, + 0xb8f0c980eb8bd279L } }, + /* 37 << 238 */ + { { 0x024bea9a324b4737L,0xfba9e42332a83bcaL,0x6e635643a232dcedL, + 0x996193672571c8baL }, + { 0xe8c9f35754b7032bL,0xf936b3ba2442d54aL,0x2263f0f08290c65aL, + 0x48989780ee2c7fdbL } }, + /* 38 << 238 */ + { { 0xadc5d55a13d4f95eL,0x737cff85ad9b8500L,0x271c557b8a73f43dL, + 0xbed617a4e18bc476L }, + { 0x662454017dfd8ab2L,0xae7b89ae3a2870aaL,0x1b555f5323a7e545L, + 0x6791e247be057e4cL } }, + /* 39 << 238 */ + { { 0x860136ad324fa34dL,0xea1114474cbeae28L,0x023a4270bedd3299L, + 0x3d5c3a7fc1c35c34L }, + { 0xb0f6db678d0412d2L,0xd92625e2fcdc6b9aL,0x92ae5ccc4e28a982L, + 0xea251c3647a3ce7eL } }, + /* 40 << 238 */ + { { 0x9d658932790691bfL,0xed61058906b736aeL,0x712c2f04c0d63b6eL, + 0x5cf06fd5c63d488fL }, + { 0x97363facd9588e41L,0x1f9bf7622b93257eL,0xa9d1ffc4667acaceL, + 0x1cf4a1aa0a061ecfL } }, + /* 41 << 238 */ + { { 0x40e48a49dc1818d0L,0x0643ff39a3621ab0L,0x5768640ce39ef639L, + 0x1fc099ea04d86854L }, + { 0x9130b9c3eccd28fdL,0xd743cbd27eec54abL,0x052b146fe5b475b6L, + 0x058d9a82900a7d1fL } }, + /* 42 << 238 */ + { { 0x65e0229291262b72L,0x96f924f9bb0edf03L,0x5cfa59c8fe206842L, + 0xf60370045eafa720L }, + { 0x5f30699e18d7dd96L,0x381e8782cbab2495L,0x91669b46dd8be949L, + 0xb40606f526aae8efL } }, + /* 43 << 238 */ + { { 0x2812b839fc6751a4L,0x16196214fba800efL,0x4398d5ca4c1a2875L, + 0x720c00ee653d8349L }, + { 0xc2699eb0d820007cL,0x880ee660a39b5825L,0x70694694471f6984L, + 0xf7d16ea8e3dda99aL } }, + /* 44 << 238 */ + { { 0x28d675b2c0519a23L,0x9ebf94fe4f6952e3L,0xf28bb767a2294a8aL, + 0x85512b4dfe0af3f5L }, + { 0x18958ba899b16a0dL,0x95c2430cba7548a7L,0xb30d1b10a16be615L, + 0xe3ebbb9785bfb74cL } }, + /* 45 << 238 */ + { { 0xa3273cfe18549fdbL,0xf6e200bf4fcdb792L,0x54a76e1883aba56cL, + 0x73ec66f689ef6aa2L }, + { 0x8d17add7d1b9a305L,0xa959c5b9b7ae1b9dL,0x886435226bcc094aL, + 0xcc5616c4d7d429b9L } }, + /* 46 << 238 */ + { { 0xa6dada01e6a33f7cL,0xc6217a079d4e70adL,0xd619a81809c15b7cL, + 0xea06b3290e80c854L }, + { 0x174811cea5f5e7b9L,0x66dfc310787c65f4L,0x4ea7bd693316ab54L, + 0xc12c4acb1dcc0f70L } }, + /* 47 << 238 */ + { { 0xe4308d1a1e407dd9L,0xe8a3587c91afa997L,0xea296c12ab77b7a5L, + 0xb5ad49e4673c0d52L }, + { 0x40f9b2b27006085aL,0xa88ff34087bf6ec2L,0x978603b14e3066a6L, + 0xb3f99fc2b5e486e2L } }, + /* 48 << 238 */ + { { 0x07b53f5eb2e63645L,0xbe57e54784c84232L,0xd779c2167214d5cfL, + 0x617969cd029a3acaL }, + { 0xd17668cd8a7017a0L,0x77b4d19abe9b7ee8L,0x58fd0e939c161776L, + 0xa8c4f4efd5968a72L } }, + /* 49 << 238 */ + { { 0x296071cc67b3de77L,0xae3c0b8e634f7905L,0x67e440c28a7100c9L, + 0xbb8c3c1beb4b9b42L }, + { 0x6d71e8eac51b3583L,0x7591f5af9525e642L,0xf73a2f7b13f509f3L, + 0x618487aa5619ac9bL } }, + /* 50 << 238 */ + { { 0x3a72e5f79d61718aL,0x00413bcc7592d28cL,0x7d9b11d3963c35cfL, + 0x77623bcfb90a46edL }, + { 0xdeef273bdcdd2a50L,0x4a741f9b0601846eL,0x33b89e510ec6e929L, + 0xcb02319f8b7f22cdL } }, + /* 51 << 238 */ + { { 0xbbe1500d084bae24L,0x2f0ae8d7343d2693L,0xacffb5f27cdef811L, + 0xaa0c030a263fb94fL }, + { 0x6eef0d61a0f442deL,0xf92e181727b139d3L,0x1ae6deb70ad8bc28L, + 0xa89e38dcc0514130L } }, + /* 52 << 238 */ + { { 0x81eeb865d2fdca23L,0x5a15ee08cc8ef895L,0x768fa10a01905614L, + 0xeff5b8ef880ee19bL }, + { 0xf0c0cabbcb1c8a0eL,0x2e1ee9cdb8c838f9L,0x0587d8b88a4a14c0L, + 0xf6f278962ff698e5L } }, + /* 53 << 238 */ + { { 0xed38ef1c89ee6256L,0xf44ee1fe6b353b45L,0x9115c0c770e903b3L, + 0xc78ec0a1818f31dfL }, + { 0x6c003324b7dccbc6L,0xd96dd1f3163bbc25L,0x33aa82dd5cedd805L, + 0x123aae4f7f7eb2f1L } }, + /* 54 << 238 */ + { { 0x1723fcf5a26262cdL,0x1f7f4d5d0060ebd5L,0xf19c5c01b2eaa3afL, + 0x2ccb9b149790accfL }, + { 0x1f9c1cad52324aa6L,0x632005267247df54L,0x5732fe42bac96f82L, + 0x52fe771f01a1c384L } }, + /* 55 << 238 */ + { { 0x546ca13db1001684L,0xb56b4eeea1709f75L,0x266545a9d5db8672L, + 0xed971c901e8f3cfbL }, + { 0x4e7d8691e3a07b29L,0x7570d9ece4b696b9L,0xdc5fa0677bc7e9aeL, + 0x68b44cafc82c4844L } }, + /* 56 << 238 */ + { { 0x519d34b3bf44da80L,0x283834f95ab32e66L,0x6e6087976278a000L, + 0x1e62960e627312f6L }, + { 0x9b87b27be6901c55L,0x80e7853824fdbc1fL,0xbbbc09512facc27dL, + 0x06394239ac143b5aL } }, + /* 57 << 238 */ + { { 0x35bb4a40376c1944L,0x7cb6269463da1511L,0xafd29161b7148a3bL, + 0xa6f9d9ed4e2ea2eeL }, + { 0x15dc2ca2880dd212L,0x903c3813a61139a9L,0x2aa7b46d6c0f8785L, + 0x36ce2871901c60ffL } }, + /* 58 << 238 */ + { { 0xc683b028e10d9c12L,0x7573baa2032f33d3L,0x87a9b1f667a31b58L, + 0xfd3ed11af4ffae12L }, + { 0x83dcaa9a0cb2748eL,0x8239f0185d6fdf16L,0xba67b49c72753941L, + 0x2beec455c321cb36L } }, + /* 59 << 238 */ + { { 0x880156063f8b84ceL,0x764170838d38c86fL,0x054f1ca7598953ddL, + 0xc939e1104e8e7429L }, + { 0x9b1ac2b35a914f2fL,0x39e35ed3e74b8f9cL,0xd0debdb2781b2fb0L, + 0x1585638f2d997ba2L } }, + /* 60 << 238 */ + { { 0x9c4b646e9e2fce99L,0x68a210811e80857fL,0x06d54e443643b52aL, + 0xde8d6d630d8eb843L }, + { 0x7032156342146a0aL,0x8ba826f25eaa3622L,0x227a58bd86138787L, + 0x43b6c03c10281d37L } }, + /* 61 << 238 */ + { { 0x6326afbbb54dde39L,0x744e5e8adb6f2d5fL,0x48b2a99acff158e1L, + 0xa93c8fa0ef87918fL }, + { 0x2182f956de058c5cL,0x216235d2936f9e7aL,0xace0c0dbd2e31e67L, + 0xc96449bff23ac3e7L } }, + /* 62 << 238 */ + { { 0x7e9a2874170693bdL,0xa28e14fda45e6335L,0x5757f6b356427344L, + 0x822e4556acf8edf9L }, + { 0x2b7a6ee2e6a285cdL,0x5866f211a9df3af0L,0x40dde2ddf845b844L, + 0x986c3726110e5e49L } }, + /* 63 << 238 */ + { { 0x73680c2af7172277L,0x57b94f0f0cccb244L,0xbdff72672d438ca7L, + 0xbad1ce11cf4663fdL }, + { 0x9813ed9dd8f71caeL,0xf43272a6961fdaa6L,0xbeff0119bd6d1637L, + 0xfebc4f9130361978L } }, + /* 64 << 238 */ + { { 0x02b37a952f41deffL,0x0e44a59ae63b89b7L,0x673257dc143ff951L, + 0x19c02205d752baf4L }, + { 0x46c23069c4b7d692L,0x2e6392c3fd1502acL,0x6057b1a21b220846L, + 0xe51ff9460c1b5b63L } }, + /* 0 << 245 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 245 */ + { { 0x6e85cb51566c5c43L,0xcff9c9193597f046L,0x9354e90c4994d94aL, + 0xe0a393322147927dL }, + { 0x8427fac10dc1eb2bL,0x88cfd8c22ff319faL,0xe2d4e68401965274L, + 0xfa2e067d67aaa746L } }, + /* 2 << 245 */ + { { 0xb6d92a7f3e5f9f11L,0x9afe153ad6cb3b8eL,0x4d1a6dd7ddf800bdL, + 0xf6c13cc0caf17e19L }, + { 0x15f6c58e325fc3eeL,0x71095400a31dc3b2L,0x168e7c07afa3d3e7L, + 0x3f8417a194c7ae2dL } }, + /* 3 << 245 */ + { { 0xec234772813b230dL,0x634d0f5f17344427L,0x11548ab1d77fc56aL, + 0x7fab1750ce06af77L }, + { 0xb62c10a74f7c4f83L,0xa7d2edc4220a67d9L,0x1c404170921209a0L, + 0x0b9815a0face59f0L } }, + /* 4 << 245 */ + { { 0x2842589b319540c3L,0x18490f59a283d6f8L,0xa2731f84daae9fcbL, + 0x3db6d960c3683ba0L }, + { 0xc85c63bb14611069L,0xb19436af0788bf05L,0x905459df347460d2L, + 0x73f6e094e11a7db1L } }, + /* 5 << 245 */ + { { 0xdc7f938eb6357f37L,0xc5d00f792bd8aa62L,0xc878dcb92ca979fcL, + 0x37e83ed9eb023a99L }, + { 0x6b23e2731560bf3dL,0x1086e4591d0fae61L,0x782483169a9414bdL, + 0x1b956bc0f0ea9ea1L } }, + /* 6 << 245 */ + { { 0x7b85bb91c31b9c38L,0x0c5aa90b48ef57b5L,0xdedeb169af3bab6fL, + 0xe610ad732d373685L }, + { 0xf13870df02ba8e15L,0x0337edb68ca7f771L,0xe4acf747b62c036cL, + 0xd921d576b6b94e81L } }, + /* 7 << 245 */ + { { 0xdbc864392c422f7aL,0xfb635362ed348898L,0x83084668c45bfcd1L, + 0xc357c9e32b315e11L }, + { 0xb173b5405b2e5b8cL,0x7e946931e102b9a4L,0x17c890eb7b0fb199L, + 0xec225a83d61b662bL } }, + /* 8 << 245 */ + { { 0xf306a3c8ee3c76cbL,0x3cf11623d32a1f6eL,0xe6d5ab646863e956L, + 0x3b8a4cbe5c005c26L }, + { 0xdcd529a59ce6bb27L,0xc4afaa5204d4b16fL,0xb0624a267923798dL, + 0x85e56df66b307fabL } }, + /* 9 << 245 */ + { { 0x0281893c2bf29698L,0x91fc19a4d7ce7603L,0x75a5dca3ad9a558fL, + 0x40ceb3fa4d50bf77L }, + { 0x1baf6060bc9ba369L,0x927e1037597888c2L,0xd936bf1986a34c07L, + 0xd4cf10c1c34ae980L } }, + /* 10 << 245 */ + { { 0x3a3e5334859dd614L,0x9c475b5b18d0c8eeL,0x63080d1f07cd51d5L, + 0xc9c0d0a6b88b4326L }, + { 0x1ac98691c234296fL,0x2a0a83a494887fb6L,0x565114270cea9cf2L, + 0x5230a6e8a24802f5L } }, + /* 11 << 245 */ + { { 0xf7a2bf0f72e3d5c1L,0x377174464f21439eL,0xfedcbf259ce30334L, + 0xe0030a787ce202f9L }, + { 0x6f2d9ebf1202e9caL,0xe79dde6c75e6e591L,0xf52072aff1dac4f8L, + 0x6c8d087ebb9b404dL } }, + /* 12 << 245 */ + { { 0xad0fc73dbce913afL,0x909e587b458a07cbL,0x1300da84d4f00c8aL, + 0x425cd048b54466acL }, + { 0xb59cb9be90e9d8bfL,0x991616db3e431b0eL,0xd3aa117a531aecffL, + 0x91af92d359f4dc3bL } }, + /* 13 << 245 */ + { { 0x9b1ec292e93fda29L,0x76bb6c17e97d91bcL,0x7509d95faface1e6L, + 0x3653fe47be855ae3L }, + { 0x73180b280f680e75L,0x75eefd1beeb6c26cL,0xa4cdf29fb66d4236L, + 0x2d70a9976b5821d8L } }, + /* 14 << 245 */ + { { 0x7a3ee20720445c36L,0x71d1ac8259877174L,0x0fc539f7949f73e9L, + 0xd05cf3d7982e3081L }, + { 0x8758e20b7b1c7129L,0xffadcc20569e61f2L,0xb05d3a2f59544c2dL, + 0xbe16f5c19fff5e53L } }, + /* 15 << 245 */ + { { 0x73cf65b8aad58135L,0x622c2119037aa5beL,0x79373b3f646fd6a0L, + 0x0e029db50d3978cfL }, + { 0x8bdfc43794fba037L,0xaefbd687620797a6L,0x3fa5382bbd30d38eL, + 0x7627cfbf585d7464L } }, + /* 16 << 245 */ + { { 0xb2330fef4e4ca463L,0xbcef72873566cc63L,0xd161d2cacf780900L, + 0x135dc5395b54827dL }, + { 0x638f052e27bf1bc6L,0x10a224f007dfa06cL,0xe973586d6d3321daL, + 0x8b0c573826152c8fL } }, + /* 17 << 245 */ + { { 0x07ef4f2a34606074L,0x80fe7fe8a0f7047aL,0x3d1a8152e1a0e306L, + 0x32cf43d888da5222L }, + { 0xbf89a95f5f02ffe6L,0x3d9eb9a4806ad3eaL,0x012c17bb79c8e55eL, + 0xfdcd1a7499c81dacL } }, + /* 18 << 245 */ + { { 0x7043178bb9556098L,0x4090a1df801c3886L,0x759800ff9b67b912L, + 0x3e5c0304232620c8L }, + { 0x4b9d3c4b70dceecaL,0xbb2d3c15181f648eL,0xf981d8376e33345cL, + 0xb626289b0cf2297aL } }, + /* 19 << 245 */ + { { 0x766ac6598baebdcfL,0x1a28ae0975df01e5L,0xb71283da375876d8L, + 0x4865a96d607b9800L }, + { 0x25dd1bcd237936b2L,0x332f4f4b60417494L,0xd0923d68370a2147L, + 0x497f5dfbdc842203L } }, + /* 20 << 245 */ + { { 0x9dc74cbd32be5e0fL,0x7475bcb717a01375L,0x438477c950d872b1L, + 0xcec67879ffe1d63dL }, + { 0x9b006014d8578c70L,0xc9ad99a878bb6b8bL,0x6799008e11fb3806L, + 0xcfe81435cd44cab3L } }, + /* 21 << 245 */ + { { 0xa2ee15822f4fb344L,0xb8823450483fa6ebL,0x622d323d652c7749L, + 0xd8474a98beb0a15bL }, + { 0xe43c154d5d1c00d0L,0x7fd581d90e3e7aacL,0x2b44c6192525ddf8L, + 0x67a033ebb8ae9739L } }, + /* 22 << 245 */ + { { 0x113ffec19ef2d2e4L,0x1bf6767ed5a0ea7fL,0x57fff75e03714c0aL, + 0xa23c422e0a23e9eeL }, + { 0xdd5f6b2d540f83afL,0xc2c2c27e55ea46a7L,0xeb6b4246672a1208L, + 0xd13599f7ae634f7aL } }, + /* 23 << 245 */ + { { 0xcf914b5cd7b32c6eL,0x61a5a640eaf61814L,0x8dc3df8b208a1bbbL, + 0xef627fd6b6d79aa5L }, + { 0x44232ffcc4c86bc8L,0xe6f9231b061539feL,0x1d04f25a958b9533L, + 0x180cf93449e8c885L } }, + /* 24 << 245 */ + { { 0x896895959884aaf7L,0xb1959be307b348a6L,0x96250e573c147c87L, + 0xae0efb3add0c61f8L }, + { 0xed00745eca8c325eL,0x3c911696ecff3f70L,0x73acbc65319ad41dL, + 0x7b01a020f0b1c7efL } }, + /* 25 << 245 */ + { { 0xea32b29363a1483fL,0x89eabe717a248f96L,0x9c6231d3343157e5L, + 0x93a375e5df3c546dL }, + { 0xe76e93436a2afe69L,0xc4f89100e166c88eL,0x248efd0d4f872093L, + 0xae0eb3ea8fe0ea61L } }, + /* 26 << 245 */ + { { 0xaf89790d9d79046eL,0x4d650f2d6cee0976L,0xa3935d9a43071ecaL, + 0x66fcd2c9283b0bfeL }, + { 0x0e665eb5696605f1L,0xe77e5d07a54cd38dL,0x90ee050a43d950cfL, + 0x86ddebdad32e69b5L } }, + /* 27 << 245 */ + { { 0x6ad94a3dfddf7415L,0xf7fa13093f6e8d5aL,0xc4831d1de9957f75L, + 0x7de28501d5817447L }, + { 0x6f1d70789e2aeb6bL,0xba2b9ff4f67a53c2L,0x36963767df9defc3L, + 0x479deed30d38022cL } }, + /* 28 << 245 */ + { { 0xd2edb89b3a8631e8L,0x8de855de7a213746L,0xb2056cb7b00c5f11L, + 0xdeaefbd02c9b85e4L }, + { 0x03f39a8dd150892dL,0x37b84686218b7985L,0x36296dd8b7375f1aL, + 0x472cd4b1b78e898eL } }, + /* 29 << 245 */ + { { 0x15dff651e9f05de9L,0xd40450692ce98ba9L,0x8466a7ae9b38024cL, + 0xb910e700e5a6b5efL }, + { 0xae1c56eab3aa8f0dL,0xbab2a5077eee74a6L,0x0dca11e24b4c4620L, + 0xfd896e2e4c47d1f4L } }, + /* 30 << 245 */ + { { 0xeb45ae53308fbd93L,0x46cd5a2e02c36fdaL,0x6a3d4e90baa48385L, + 0xdd55e62e9dbe9960L }, + { 0xa1406aa02a81ede7L,0x6860dd14f9274ea7L,0xcfdcb0c280414f86L, + 0xff410b1022f94327L } }, + /* 31 << 245 */ + { { 0x5a33cc3849ad467bL,0xefb48b6c0a7335f1L,0x14fb54a4b153a360L, + 0x604aa9d2b52469ccL }, + { 0x5e9dc486754e48e9L,0x693cb45537471e8eL,0xfb2fd7cd8d3b37b6L, + 0x63345e16cf09ff07L } }, + /* 32 << 245 */ + { { 0x9910ba6b23a5d896L,0x1fe19e357fe4364eL,0x6e1da8c39a33c677L, + 0x15b4488b29fd9fd0L }, + { 0x1f4392541a1f22bfL,0x920a8a70ab8163e8L,0x3fd1b24907e5658eL, + 0xf2c4f79cb6ec839bL } }, + /* 33 << 245 */ + { { 0x1abbc3d04aa38d1bL,0x3b0db35cb5d9510eL,0x1754ac783e60dec0L, + 0x53272fd7ea099b33L }, + { 0x5fb0494f07a8e107L,0x4a89e1376a8191faL,0xa113b7f63c4ad544L, + 0x88a2e9096cb9897bL } }, + /* 34 << 245 */ + { { 0x17d55de3b44a3f84L,0xacb2f34417c6c690L,0x3208816810232390L, + 0xf2e8a61f6c733bf7L }, + { 0xa774aab69c2d7652L,0xfb5307e3ed95c5bcL,0xa05c73c24981f110L, + 0x1baae31ca39458c9L } }, + /* 35 << 245 */ + { { 0x1def185bcbea62e7L,0xe8ac9eaeeaf63059L,0x098a8cfd9921851cL, + 0xd959c3f13abe2f5bL }, + { 0xa4f1952520e40ae5L,0x320789e307a24aa1L,0x259e69277392b2bcL, + 0x58f6c6671918668bL } }, + /* 36 << 245 */ + { { 0xce1db2bbc55d2d8bL,0x41d58bb7f4f6ca56L,0x7650b6808f877614L, + 0x905e16baf4c349edL }, + { 0xed415140f661acacL,0x3b8784f0cb2270afL,0x3bc280ac8a402cbaL, + 0xd53f71460937921aL } }, + /* 37 << 245 */ + { { 0xc03c8ee5e5681e83L,0x62126105f6ac9e4aL,0x9503a53f936b1a38L, + 0x3d45e2d4782fecbdL }, + { 0x69a5c43976e8ae98L,0xb53b2eebbfb4b00eL,0xf167471272386c89L, + 0x30ca34a24268bce4L } }, + /* 38 << 245 */ + { { 0x7f1ed86c78341730L,0x8ef5beb8b525e248L,0xbbc489fdb74fbf38L, + 0x38a92a0e91a0b382L }, + { 0x7a77ba3f22433ccfL,0xde8362d6a29f05a9L,0x7f6a30ea61189afcL, + 0x693b550559ef114fL } }, + /* 39 << 245 */ + { { 0x50266bc0cd1797a1L,0xea17b47ef4b7af2dL,0xd6c4025c3df9483eL, + 0x8cbb9d9fa37b18c9L }, + { 0x91cbfd9c4d8424cfL,0xdb7048f1ab1c3506L,0x9eaf641f028206a3L, + 0xf986f3f925bdf6ceL } }, + /* 40 << 245 */ + { { 0x262143b5224c08dcL,0x2bbb09b481b50c91L,0xc16ed709aca8c84fL, + 0xa6210d9db2850ca8L }, + { 0x6d8df67a09cb54d6L,0x91eef6e0500919a4L,0x90f613810f132857L, + 0x9acede47f8d5028bL } }, + /* 41 << 245 */ + { { 0x844d1b7190b771c3L,0x563b71e4ba6426beL,0x2efa2e83bdb802ffL, + 0x3410cbabab5b4a41L }, + { 0x555b2d2630da84ddL,0xd0711ae9ee1cc29aL,0xcf3e8c602f547792L, + 0x03d7d5dedc678b35L } }, + /* 42 << 245 */ + { { 0x071a2fa8ced806b8L,0x222e6134697f1478L,0xdc16fd5dabfcdbbfL, + 0x44912ebf121b53b8L }, + { 0xac9436742496c27cL,0x8ea3176c1ffc26b0L,0xb6e224ac13debf2cL, + 0x524cc235f372a832L } }, + /* 43 << 245 */ + { { 0xd706e1d89f6f1b18L,0x2552f00544cce35bL,0x8c8326c2a88e31fcL, + 0xb5468b2cf9552047L }, + { 0xce683e883ff90f2bL,0x77947bdf2f0a5423L,0xd0a1b28bed56e328L, + 0xaee35253c20134acL } }, + /* 44 << 245 */ + { { 0x7e98367d3567962fL,0x379ed61f8188bffbL,0x73bba348faf130a1L, + 0x6c1f75e1904ed734L }, + { 0x189566423b4a79fcL,0xf20bc83d54ef4493L,0x836d425d9111eca1L, + 0xe5b5c318009a8dcfL } }, + /* 45 << 245 */ + { { 0x3360b25d13221bc5L,0x707baad26b3eeaf7L,0xd7279ed8743a95a1L, + 0x7450a875969e809fL }, + { 0x32b6bd53e5d0338fL,0x1e77f7af2b883bbcL,0x90da12cc1063ecd0L, + 0xe2697b58c315be47L } }, + /* 46 << 245 */ + { { 0x2771a5bdda85d534L,0x53e78c1fff980eeaL,0xadf1cf84900385e7L, + 0x7d3b14f6c9387b62L }, + { 0x170e74b0cb8f2bd2L,0x2d50b486827fa993L,0xcdbe8c9af6f32babL, + 0x55e906b0c3b93ab8L } }, + /* 47 << 245 */ + { { 0x747f22fc8fe280d1L,0xcd8e0de5b2e114abL,0x5ab7dbebe10b68b0L, + 0x9dc63a9ca480d4b2L }, + { 0x78d4bc3b4be1495fL,0x25eb3db89359122dL,0x3f8ac05b0809cbdcL, + 0xbf4187bbd37c702fL } }, + /* 48 << 245 */ + { { 0x84cea0691416a6a5L,0x8f860c7943ef881cL,0x41311f8a38038a5dL, + 0xe78c2ec0fc612067L }, + { 0x494d2e815ad73581L,0xb4cc9e0059604097L,0xff558aecf3612cbaL, + 0x35beef7a9e36c39eL } }, + /* 49 << 245 */ + { { 0x1845c7cfdbcf41b9L,0x5703662aaea997c0L,0x8b925afee402f6d8L, + 0xd0a1b1ae4dd72162L }, + { 0x9f47b37503c41c4bL,0xa023829b0391d042L,0x5f5045c3503b8b0aL, + 0x123c268898c010e5L } }, + /* 50 << 245 */ + { { 0x324ec0cc36ba06eeL,0xface31153dd2cc0cL,0xb364f3bef333e91fL, + 0xef8aff7328e832b0L }, + { 0x1e9bad042d05841bL,0x42f0e3df356a21e2L,0xa3270bcb4add627eL, + 0xb09a8158d322e711L } }, + /* 51 << 245 */ + { { 0x86e326a10fee104aL,0xad7788f83703f65dL,0x7e76543047bc4833L, + 0x6cee582b2b9b893aL }, + { 0x9cd2a167e8f55a7bL,0xefbee3c6d9e4190dL,0x33ee7185d40c2e9dL, + 0x844cc9c5a380b548L } }, + /* 52 << 245 */ + { { 0x323f8ecd66926e04L,0x0001e38f8110c1baL,0x8dbcac12fc6a7f07L, + 0xd65e1d580cec0827L }, + { 0xd2cd4141be76ca2dL,0x7895cf5ce892f33aL,0x956d230d367139d2L, + 0xa91abd3ed012c4c1L } }, + /* 53 << 245 */ + { { 0x34fa488387eb36bfL,0xc5f07102914b8fb4L,0x90f0e579adb9c95fL, + 0xfe6ea8cb28888195L }, + { 0x7b9b5065edfa9284L,0x6c510bd22b8c8d65L,0xd7b8ebefcbe8aafdL, + 0xedb3af9896b1da07L } }, + /* 54 << 245 */ + { { 0x28ff779d6295d426L,0x0c4f6ac73fa3ad7bL,0xec44d0548b8e2604L, + 0x9b32a66d8b0050e1L }, + { 0x1f943366f0476ce2L,0x7554d953a602c7b4L,0xbe35aca6524f2809L, + 0xb6881229fd4edbeaL } }, + /* 55 << 245 */ + { { 0xe8cd0c8f508efb63L,0x9eb5b5c86abcefc7L,0xf5621f5fb441ab4fL, + 0x79e6c046b76a2b22L }, + { 0x74a4792ce37a1f69L,0xcbd252cb03542b60L,0x785f65d5b3c20bd3L, + 0x8dea61434fabc60cL } }, + /* 56 << 245 */ + { { 0x45e21446de673629L,0x57f7aa1e703c2d21L,0xa0e99b7f98c868c7L, + 0x4e42f66d8b641676L }, + { 0x602884dc91077896L,0xa0d690cfc2c9885bL,0xfeb4da333b9a5187L, + 0x5f789598153c87eeL } }, + /* 57 << 245 */ + { { 0x2192dd4752b16dbaL,0xdeefc0e63524c1b1L,0x465ea76ee4383693L, + 0x79401711361b8d98L }, + { 0xa5f9ace9f21a15cbL,0x73d26163efee9aebL,0xcca844b3e677016cL, + 0x6c122b0757eaee06L } }, + /* 58 << 245 */ + { { 0xb782dce715f09690L,0x508b9b122dfc0fc9L,0x9015ab4b65d89fc6L, + 0x5e79dab7d6d5bb0fL }, + { 0x64f021f06c775aa2L,0xdf09d8cc37c7eca1L,0x9a761367ef2fa506L, + 0xed4ca4765b81eec6L } }, + /* 59 << 245 */ + { { 0x262ede3610bbb8b5L,0x0737ce830641ada3L,0x4c94288ae9831cccL, + 0x487fc1ce8065e635L }, + { 0xb13d7ab3b8bb3659L,0xdea5df3e855e4120L,0xb9a1857385eb0244L, + 0x1a1b8ea3a7cfe0a3L } }, + /* 60 << 245 */ + { { 0x3b83711967b0867cL,0x8d5e0d089d364520L,0x52dccc1ed930f0e3L, + 0xefbbcec7bf20bbafL }, + { 0x99cffcab0263ad10L,0xd8199e6dfcd18f8aL,0x64e2773fe9f10617L, + 0x0079e8e108704848L } }, + /* 61 << 245 */ + { { 0x1169989f8a342283L,0x8097799ca83012e6L,0xece966cb8a6a9001L, + 0x93b3afef072ac7fcL }, + { 0xe6893a2a2db3d5baL,0x263dc46289bf4fdcL,0x8852dfc9e0396673L, + 0x7ac708953af362b6L } }, + /* 62 << 245 */ + { { 0xbb9cce4d5c2f342bL,0xbf80907ab52d7aaeL,0x97f3d3cd2161bcd0L, + 0xb25b08340962744dL }, + { 0xc5b18ea56c3a1ddaL,0xfe4ec7eb06c92317L,0xb787b890ad1c4afeL, + 0xdccd9a920ede801aL } }, + /* 63 << 245 */ + { { 0x9ac6dddadb58da1fL,0x22bbc12fb8cae6eeL,0xc6f8bced815c4a43L, + 0x8105a92cf96480c7L }, + { 0x0dc3dbf37a859d51L,0xe3ec7ce63041196bL,0xd9f64b250d1067c9L, + 0xf23213213d1f8dd8L } }, + /* 64 << 245 */ + { { 0x8b5c619c76497ee8L,0x5d2b0ac6c717370eL,0x98204cb64fcf68e1L, + 0x0bdec21162bc6792L }, + { 0x6973ccefa63b1011L,0xf9e3fa97e0de1ac5L,0x5efb693e3d0e0c8bL, + 0x037248e9d2d4fcb4L } }, + /* 0 << 252 */ + { { 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 252 */ + { { 0x80802dc91ec34f9eL,0xd8772d3533810603L,0x3f06d66c530cb4f3L, + 0x7be5ed0dc475c129L }, + { 0xcb9e3c1931e82b10L,0xc63d2857c9ff6b4cL,0xb92118c692a1b45eL, + 0x0aec44147285bbcaL } }, + /* 2 << 252 */ + { { 0xfc189ae71e29a3efL,0xcbe906f04c93302eL,0xd0107914ceaae10eL, + 0xb7a23f34b68e19f8L }, + { 0xe9d875c2efd2119dL,0x03198c6efcadc9c8L,0x65591bf64da17113L, + 0x3cf0bbf83d443038L } }, + /* 3 << 252 */ + { { 0xae485bb72b724759L,0x945353e1b2d4c63aL,0x82159d07de7d6f2cL, + 0x389caef34ec5b109L }, + { 0x4a8ebb53db65ef14L,0x2dc2cb7edd99de43L,0x816fa3ed83f2405fL, + 0x73429bb9c14208a3L } }, + /* 4 << 252 */ + { { 0xb618d590b01e6e27L,0x047e2ccde180b2dcL,0xd1b299b504aea4a9L, + 0x412c9e1e9fa403a4L }, + { 0x88d28a3679407552L,0x49c50136f332b8e3L,0x3a1b6fcce668de19L, + 0x178851bc75122b97L } }, + /* 5 << 252 */ + { { 0xb1e13752fb85fa4cL,0xd61257ce383c8ce9L,0xd43da670d2f74daeL, + 0xa35aa23fbf846bbbL }, + { 0x5e74235d4421fc83L,0xf6df8ee0c363473bL,0x34d7f52a3c4aa158L, + 0x50d05aab9bc6d22eL } }, + /* 6 << 252 */ + { { 0x8c56e735a64785f4L,0xbc56637b5f29cd07L,0x53b2bb803ee35067L, + 0x50235a0fdc919270L }, + { 0x191ab6d8f2c4aa65L,0xc34758318396023bL,0x80400ba5f0f805baL, + 0x8881065b5ec0f80fL } }, + /* 7 << 252 */ + { { 0xc370e522cc1b5e83L,0xde2d4ad1860b8bfbL,0xad364df067b256dfL, + 0x8f12502ee0138997L }, + { 0x503fa0dc7783920aL,0xe80014adc0bc866aL,0x3f89b744d3064ba6L, + 0x03511dcdcba5dba5L } }, + /* 8 << 252 */ + { { 0x197dd46d95a7b1a2L,0x9c4e7ad63c6341fbL,0x426eca29484c2eceL, + 0x9211e489de7f4f8aL }, + { 0x14997f6ec78ef1f4L,0x2b2c091006574586L,0x17286a6e1c3eede8L, + 0x25f92e470f60e018L } }, + /* 9 << 252 */ + { { 0x805c564631890a36L,0x703ef60057feea5bL,0x389f747caf3c3030L, + 0xe0e5daeb54dd3739L }, + { 0xfe24a4c3c9c9f155L,0x7e4bf176b5393962L,0x37183de2af20bf29L, + 0x4a1bd7b5f95a8c3bL } }, + /* 10 << 252 */ + { { 0xa83b969946191d3dL,0x281fc8dd7b87f257L,0xb18e2c1354107588L, + 0x6372def79b2bafe8L }, + { 0xdaf4bb480d8972caL,0x3f2dd4b756167a3fL,0x1eace32d84310cf4L, + 0xe3bcefafe42700aaL } }, + /* 11 << 252 */ + { { 0x5fe5691ed785e73dL,0xa5db5ab62ea60467L,0x02e23d41dfc6514aL, + 0x35e8048ee03c3665L }, + { 0x3f8b118f1adaa0f8L,0x28ec3b4584ce1a5aL,0xe8cacc6e2c6646b8L, + 0x1343d185dbd0e40fL } }, + /* 12 << 252 */ + { { 0xe5d7f844caaa358cL,0x1a1db7e49924182aL,0xd64cd42d9c875d9aL, + 0xb37b515f042eeec8L }, + { 0x4d4dd4097b165fbeL,0xfc322ed9e206eff3L,0x7dee410259b7e17eL, + 0x55a481c08236ca00L } }, + /* 13 << 252 */ + { { 0x8c885312c23fc975L,0x1571580605d6297bL,0xa078868ef78edd39L, + 0x956b31e003c45e52L }, + { 0x470275d5ff7b33a6L,0xc8d5dc3a0c7e673fL,0x419227b47e2f2598L, + 0x8b37b6344c14a975L } }, + /* 14 << 252 */ + { { 0xd0667ed68b11888cL,0x5e0e8c3e803e25dcL,0x34e5d0dcb987a24aL, + 0x9f40ac3bae920323L }, + { 0x5463de9534e0f63aL,0xa128bf926b6328f9L,0x491ccd7cda64f1b7L, + 0x7ef1ec27c47bde35L } }, + /* 15 << 252 */ + { { 0xa857240fa36a2737L,0x35dc136663621bc1L,0x7a3a6453d4fb6897L, + 0x80f1a439c929319dL }, + { 0xfc18274bf8cb0ba0L,0xb0b537668078c5ebL,0xfb0d49241e01d0efL, + 0x50d7c67d372ab09cL } }, + /* 16 << 252 */ + { { 0xb4e370af3aeac968L,0xe4f7fee9c4b63266L,0xb4acd4c2e3ac5664L, + 0xf8910bd2ceb38cbfL }, + { 0x1c3ae50cc9c0726eL,0x15309569d97b40bfL,0x70884b7ffd5a5a1bL, + 0x3890896aef8314cdL } }, + /* 17 << 252 */ + { { 0x58e1515ca5618c93L,0xe665432b77d942d1L,0xb32181bfb6f767a8L, + 0x753794e83a604110L }, + { 0x09afeb7ce8c0dbccL,0x31e02613598673a3L,0x5d98e5577d46db00L, + 0xfc21fb8c9d985b28L } }, + /* 18 << 252 */ + { { 0xc9040116b0843e0bL,0x53b1b3a869b04531L,0xdd1649f085d7d830L, + 0xbb3bcc87cb7427e8L }, + { 0x77261100c93dce83L,0x7e79da61a1922a2aL,0x587a2b02f3149ce8L, + 0x147e1384de92ec83L } }, + /* 19 << 252 */ + { { 0x484c83d3af077f30L,0xea78f8440658b53aL,0x912076c2027aec53L, + 0xf34714e393c8177dL }, + { 0x37ef5d15c2376c84L,0x8315b6593d1aa783L,0x3a75c484ef852a90L, + 0x0ba0c58a16086bd4L } }, + /* 20 << 252 */ + { { 0x29688d7a529a6d48L,0x9c7f250dc2f19203L,0x123042fb682e2df9L, + 0x2b7587e7ad8121bcL }, + { 0x30fc0233e0182a65L,0xb82ecf87e3e1128aL,0x7168286193fb098fL, + 0x043e21ae85e9e6a7L } }, + /* 21 << 252 */ + { { 0xab5b49d666c834eaL,0x3be43e1847414287L,0xf40fb859219a2a47L, + 0x0e6559e9cc58df3cL }, + { 0xfe1dfe8e0c6615b4L,0x14abc8fd56459d70L,0x7be0fa8e05de0386L, + 0x8e63ef68e9035c7cL } }, + /* 22 << 252 */ + { { 0x116401b453b31e91L,0x0cba7ad44436b4d8L,0x9151f9a0107afd66L, + 0xafaca8d01f0ee4c4L }, + { 0x75fe5c1d9ee9761cL,0x3497a16bf0c0588fL,0x3ee2bebd0304804cL, + 0xa8fb9a60c2c990b9L } }, + /* 23 << 252 */ + { { 0xd14d32fe39251114L,0x36bf25bccac73366L,0xc9562c66dba7495cL, + 0x324d301b46ad348bL }, + { 0x9f46620cd670407eL,0x0ea8d4f1e3733a01L,0xd396d532b0c324e0L, + 0x5b211a0e03c317cdL } }, + /* 24 << 252 */ + { { 0x090d7d205ffe7b37L,0x3b7f3efb1747d2daL,0xa2cb525fb54fc519L, + 0x6e220932f66a971eL }, + { 0xddc160dfb486d440L,0x7fcfec463fe13465L,0x83da7e4e76e4c151L, + 0xd6fa48a1d8d302b5L } }, + /* 25 << 252 */ + { { 0xc6304f265872cd88L,0x806c1d3c278b90a1L,0x3553e725caf0bc1cL, + 0xff59e603bb9d8d5cL }, + { 0xa4550f327a0b85ddL,0xdec5720a93ecc217L,0x0b88b74169d62213L, + 0x7212f2455b365955L } }, + /* 26 << 252 */ + { { 0x20764111b5cae787L,0x13cb7f581dfd3124L,0x2dca77da1175aefbL, + 0xeb75466bffaae775L }, + { 0x74d76f3bdb6cff32L,0x7440f37a61fcda9aL,0x1bb3ac92b525028bL, + 0x20fbf8f7a1975f29L } }, + /* 27 << 252 */ + { { 0x982692e1df83097fL,0x28738f6c554b0800L,0xdc703717a2ce2f2fL, + 0x7913b93c40814194L }, + { 0x049245931fe89636L,0x7b98443ff78834a6L,0x11c6ab015114a5a1L, + 0x60deb383ffba5f4cL } }, + /* 28 << 252 */ + { { 0x4caa54c601a982e6L,0x1dd35e113491cd26L,0x973c315f7cbd6b05L, + 0xcab0077552494724L }, + { 0x04659b1f6565e15aL,0xbf30f5298c8fb026L,0xfc21641ba8a0de37L, + 0xe9c7a366fa5e5114L } }, + /* 29 << 252 */ + { { 0xdb849ca552f03ad8L,0xc7e8dbe9024e35c0L,0xa1a2bbaccfc3c789L, + 0xbf733e7d9c26f262L }, + { 0x882ffbf5b8444823L,0xb7224e886bf8483bL,0x53023b8b65bef640L, + 0xaabfec91d4d5f8cdL } }, + /* 30 << 252 */ + { { 0xa40e1510079ea1bdL,0x1ad9addcd05d5d26L,0xdb3f2eab13e68d4fL, + 0x1cff1ae2640f803fL }, + { 0xe0e7b749d4cee117L,0x8e9f275b4036d909L,0xce34e31d8f4d4c38L, + 0x22b37f69d75130fcL } }, + /* 31 << 252 */ + { { 0x83e0f1fdb4014604L,0xa8ce991989415078L,0x82375b7541792efeL, + 0x4f59bf5c97d4515bL }, + { 0xac4f324f923a277dL,0xd9bc9b7d650f3406L,0xc6fa87d18a39bc51L, + 0x825885305ccc108fL } }, + /* 32 << 252 */ + { { 0x5ced3c9f82e4c634L,0x8efb83143a4464f8L,0xe706381b7a1dca25L, + 0x6cd15a3c5a2a412bL }, + { 0x9347a8fdbfcd8fb5L,0x31db2eef6e54cd22L,0xc4aeb11ef8d8932fL, + 0x11e7c1ed344411afL } }, + /* 33 << 252 */ + { { 0x2653050cdc9a151eL,0x9edbfc083bb0a859L,0x926c81c7fd5691e7L, + 0x9c1b23426f39019aL }, + { 0x64a81c8b7f8474b9L,0x90657c0701761819L,0x390b333155e0375aL, + 0xc676c626b6ebc47dL } }, + /* 34 << 252 */ + { { 0x51623247b7d6dee8L,0x0948d92779659313L,0x99700161e9ab35edL, + 0x06cc32b48ddde408L }, + { 0x6f2fd664061ef338L,0x1606fa02c202e9edL,0x55388bc1929ba99bL, + 0xc4428c5e1e81df69L } }, + /* 35 << 252 */ + { { 0xce2028aef91b0b2aL,0xce870a23f03dfd3fL,0x66ec2c870affe8edL, + 0xb205fb46284d0c00L }, + { 0xbf5dffe744cefa48L,0xb6fc37a8a19876d7L,0xbecfa84c08b72863L, + 0xd7205ff52576374fL } }, + /* 36 << 252 */ + { { 0x80330d328887de41L,0x5de0df0c869ea534L,0x13f427533c56ea17L, + 0xeb1f6069452b1a78L }, + { 0x50474396e30ea15cL,0x575816a1c1494125L,0xbe1ce55bfe6bb38fL, + 0xb901a94896ae30f7L } }, + /* 37 << 252 */ + { { 0xe5af0f08d8fc3548L,0x5010b5d0d73bfd08L,0x993d288053fe655aL, + 0x99f2630b1c1309fdL }, + { 0xd8677bafb4e3b76fL,0x14e51ddcb840784bL,0x326c750cbf0092ceL, + 0xc83d306bf528320fL } }, + /* 38 << 252 */ + { { 0xc445671577d4715cL,0xd30019f96b703235L,0x207ccb2ed669e986L, + 0x57c824aff6dbfc28L }, + { 0xf0eb532fd8f92a23L,0x4a557fd49bb98fd2L,0xa57acea7c1e6199aL, + 0x0c6638208b94b1edL } }, + /* 39 << 252 */ + { { 0x9b42be8ff83a9266L,0xc7741c970101bd45L,0x95770c1107bd9cebL, + 0x1f50250a8b2e0744L }, + { 0xf762eec81477b654L,0xc65b900e15efe59aL,0x88c961489546a897L, + 0x7e8025b3c30b4d7cL } }, + /* 40 << 252 */ + { { 0xae4065ef12045cf9L,0x6fcb2caf9ccce8bdL,0x1fa0ba4ef2cf6525L, + 0xf683125dcb72c312L }, + { 0xa01da4eae312410eL,0x67e286776cd8e830L,0xabd9575298fb3f07L, + 0x05f11e11eef649a5L } }, + /* 41 << 252 */ + { { 0xba47faef9d3472c2L,0x3adff697c77d1345L,0x4761fa04dd15afeeL, + 0x64f1f61ab9e69462L }, + { 0xfa691fab9bfb9093L,0x3df8ae8fa1133dfeL,0xcd5f896758cc710dL, + 0xfbb88d5016c7fe79L } }, + /* 42 << 252 */ + { { 0x8e011b4ce88c50d1L,0x7532e807a8771c4fL,0x64c78a48e2278ee4L, + 0x0b283e833845072aL }, + { 0x98a6f29149e69274L,0xb96e96681868b21cL,0x38f0adc2b1a8908eL, + 0x90afcff71feb829dL } }, + /* 43 << 252 */ + { { 0x9915a383210b0856L,0xa5a80602def04889L,0x800e9af97c64d509L, + 0x81382d0bb8996f6fL }, + { 0x490eba5381927e27L,0x46c63b324af50182L,0x784c5fd9d3ad62ceL, + 0xe4fa1870f8ae8736L } }, + /* 44 << 252 */ + { { 0x4ec9d0bcd7466b25L,0x84ddbe1adb235c65L,0x5e2645ee163c1688L, + 0x570bd00e00eba747L }, + { 0xfa51b629128bfa0fL,0x92fce1bd6c1d3b68L,0x3e7361dcb66778b1L, + 0x9c7d249d5561d2bbL } }, + /* 45 << 252 */ + { { 0xa40b28bf0bbc6229L,0x1c83c05edfd91497L,0x5f9f5154f083df05L, + 0xbac38b3ceee66c9dL }, + { 0xf71db7e3ec0dfcfdL,0xf2ecda8e8b0a8416L,0x52fddd867812aa66L, + 0x2896ef104e6f4272L } }, + /* 46 << 252 */ + { { 0xff27186a0fe9a745L,0x08249fcd49ca70dbL,0x7425a2e6441cac49L, + 0xf4a0885aece5ff57L }, + { 0x6e2cb7317d7ead58L,0xf96cf7d61898d104L,0xafe67c9d4f2c9a89L, + 0x89895a501c7bf5bcL } }, + /* 47 << 252 */ + { { 0xdc7cb8e5573cecfaL,0x66497eaed15f03e6L,0x6bc0de693f084420L, + 0x323b9b36acd532b0L }, + { 0xcfed390a0115a3c1L,0x9414c40b2d65ca0eL,0x641406bd2f530c78L, + 0x29369a44833438f2L } }, + /* 48 << 252 */ + { { 0x996884f5903fa271L,0xe6da0fd2b9da921eL,0xa6f2f2695db01e54L, + 0x1ee3e9bd6876214eL }, + { 0xa26e181ce27a9497L,0x36d254e48e215e04L,0x42f32a6c252cabcaL, + 0x9948148780b57614L } }, + /* 49 << 252 */ + { { 0x4c4dfe6940d9cae1L,0x0586958011a10f09L,0xca287b573491b64bL, + 0x77862d5d3fd4a53bL }, + { 0xbf94856e50349126L,0x2be30bd171c5268fL,0x10393f19cbb650a6L, + 0x639531fe778cf9fdL } }, + /* 50 << 252 */ + { { 0x02556a11b2935359L,0xda38aa96af8c126eL,0x47dbe6c20960167fL, + 0x37bbabb6501901cdL }, + { 0xb6e979e02c947778L,0xd69a51757a1a1dc6L,0xc3ed50959d9faf0cL, + 0x4dd9c0961d5fa5f0L } }, + /* 51 << 252 */ + { { 0xa0c4304d64f16ea8L,0x8b1cac167e718623L,0x0b5765467c67f03eL, + 0x559cf5adcbd88c01L }, + { 0x074877bb0e2af19aL,0x1f717ec1a1228c92L,0x70bcb800326e8920L, + 0xec6e2c5c4f312804L } }, + /* 52 << 252 */ + { { 0x426aea7d3fca4752L,0xf12c09492211f62aL,0x24beecd87be7b6b5L, + 0xb77eaf4c36d7a27dL }, + { 0x154c2781fda78fd3L,0x848a83b0264eeabeL,0x81287ef04ffe2bc4L, + 0x7b6d88c6b6b6fc2aL } }, + /* 53 << 252 */ + { { 0x805fb947ce417d99L,0x4b93dcc38b916cc4L,0x72e65bb321273323L, + 0xbcc1badd6ea9886eL }, + { 0x0e2230114bc5ee85L,0xa561be74c18ee1e4L,0x762fd2d4a6bcf1f1L, + 0x50e6a5a495231489L } }, + /* 54 << 252 */ + { { 0xca96001fa00b500bL,0x5c098cfc5d7dcdf5L,0xa64e2d2e8c446a85L, + 0xbae9bcf1971f3c62L }, + { 0x4ec226838435a2c5L,0x8ceaed6c4bad4643L,0xe9f8fb47ccccf4e3L, + 0xbd4f3fa41ce3b21eL } }, + /* 55 << 252 */ + { { 0xd79fb110a3db3292L,0xe28a37dab536c66aL,0x279ce87b8e49e6a9L, + 0x70ccfe8dfdcec8e3L }, + { 0x2193e4e03ba464b2L,0x0f39d60eaca9a398L,0x7d7932aff82c12abL, + 0xd8ff50ed91e7e0f7L } }, + /* 56 << 252 */ + { { 0xea961058fa28a7e0L,0xc726cf250bf5ec74L,0xe74d55c8db229666L, + 0x0bd9abbfa57f5799L }, + { 0x7479ef074dfc47b3L,0xd9c65fc30c52f91dL,0x8e0283fe36a8bde2L, + 0xa32a8b5e7d4b7280L } }, + /* 57 << 252 */ + { { 0x6a677c6112e83233L,0x0fbb3512dcc9bf28L,0x562e8ea50d780f61L, + 0x0db8b22b1dc4e89cL }, + { 0x0a6fd1fb89be0144L,0x8c77d246ca57113bL,0x4639075dff09c91cL, + 0x5b47b17f5060824cL } }, + /* 58 << 252 */ + { { 0x58aea2b016287b52L,0xa1343520d0cd8eb0L,0x6148b4d0c5d58573L, + 0xdd2b6170291c68aeL }, + { 0xa61b39291da3b3b7L,0x5f946d7908c4ac10L,0x4105d4a57217d583L, + 0x5061da3d25e6de5eL } }, + /* 59 << 252 */ + { { 0x3113940dec1b4991L,0xf12195e136f485aeL,0xa7507fb2731a2ee0L, + 0x95057a8e6e9e196eL }, + { 0xa3c2c9112e130136L,0x97dfbb3633c60d15L,0xcaf3c581b300ee2bL, + 0x77f25d90f4bac8b8L } }, + /* 60 << 252 */ + { { 0xdb1c4f986d840cd6L,0x471d62c0e634288cL,0x8ec2f85ecec8a161L, + 0x41f37cbcfa6f4ae2L }, + { 0x6793a20f4b709985L,0x7a7bd33befa8985bL,0x2c6a3fbd938e6446L, + 0x190426192a8d47c1L } }, + /* 61 << 252 */ + { { 0x16848667cc36975fL,0x02acf1689d5f1dfbL,0x62d41ad4613baa94L, + 0xb56fbb929f684670L }, + { 0xce610d0de9e40569L,0x7b99c65f35489fefL,0x0c88ad1b3df18b97L, + 0x81b7d9be5d0e9edbL } }, + /* 62 << 252 */ + { { 0xd85218c0c716cc0aL,0xf4b5ff9085691c49L,0xa4fd666bce356ac6L, + 0x17c728954b327a7aL }, + { 0xf93d5085da6be7deL,0xff71530e3301d34eL,0x4cd96442d8f448e8L, + 0x9283d3312ed18ffaL } }, + /* 63 << 252 */ + { { 0x4d33dd992a849870L,0xa716964b41576335L,0xff5e3a9b179be0e5L, + 0x5b9d6b1b83b13632L }, + { 0x3b8bd7d4a52f313bL,0xc9dd95a0637a4660L,0x300359620b3e218fL, + 0xce1481a3c7b28a3cL } }, + /* 64 << 252 */ + { { 0xab41b43a43228d83L,0x24ae1c304ad63f99L,0x8e525f1a46a51229L, + 0x14af860fcd26d2b4L }, + { 0xd6baef613f714aa1L,0xf51865adeb78795eL,0xd3e21fcee6a9d694L, + 0x82ceb1dd8a37b527L } }, +}; + +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_add_only_4(sp_point_256* r, const sp_point_256* g, + const sp_table_entry_256* table, const sp_digit* k, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 rtd; + sp_point_256 pd; + sp_digit tmpd[2 * 4 * 5]; +#endif + sp_point_256* rt; + sp_point_256* p = NULL; + sp_digit* tmp; + sp_digit* negy; + int i; + ecc_recode_256 v[37]; + int err; + + (void)g; + (void)heap; + + err = sp_256_point_new_4(heap, rtd, rt); + if (err == MP_OKAY) + err = sp_256_point_new_4(heap, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 5, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; +#else + tmp = tmpd; +#endif + negy = tmp; + + if (err == MP_OKAY) { + sp_256_ecc_recode_7_4(k, v); + + XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod)); + XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod)); + + i = 36; + XMEMCPY(rt->x, table[i * 65 + v[i].i].x, sizeof(table->x)); + XMEMCPY(rt->y, table[i * 65 + v[i].i].y, sizeof(table->y)); + rt->infinity = !v[i].i; + for (--i; i>=0; i--) { + XMEMCPY(p->x, table[i * 65 + v[i].i].x, sizeof(table->x)); + XMEMCPY(p->y, table[i * 65 + v[i].i].y, sizeof(table->y)); + p->infinity = !v[i].i; + sp_256_sub_4(negy, p256_mod, p->y); + sp_256_cond_copy_4(p->y, negy, 0 - v[i].neg); + sp_256_proj_point_add_qz1_4(rt, rt, p, tmp); + } + if (map != 0) { + sp_256_map_4(r, rt, tmp); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_256)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 4 * 5); + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); + } +#else + ForceZero(tmp, sizeof(sp_digit) * 2 * 4 * 5); +#endif + sp_256_point_free_4(p, 0, heap); + sp_256_point_free_4(rt, 0, heap); + + return MP_OKAY; +} + +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_base_4(sp_point_256* r, const sp_digit* k, + int map, void* heap) +{ + return sp_256_ecc_mulmod_add_only_4(r, NULL, p256_table, + k, map, heap); +} + +#ifdef HAVE_INTEL_AVX2 +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_add_only_avx2_4(sp_point_256* r, const sp_point_256* g, + const sp_table_entry_256* table, const sp_digit* k, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 rtd; + sp_point_256 pd; + sp_digit tmpd[2 * 4 * 5]; +#endif + sp_point_256* rt; + sp_point_256* p = NULL; + sp_digit* tmp; + sp_digit* negy; + int i; + ecc_recode_256 v[37]; + int err; + + (void)g; + (void)heap; + + err = sp_256_point_new_4(heap, rtd, rt); + if (err == MP_OKAY) + err = sp_256_point_new_4(heap, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 5, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; +#else + tmp = tmpd; +#endif + negy = tmp; + + if (err == MP_OKAY) { + sp_256_ecc_recode_7_4(k, v); + + XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod)); + XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod)); + + i = 36; + XMEMCPY(rt->x, table[i * 65 + v[i].i].x, sizeof(table->x)); + XMEMCPY(rt->y, table[i * 65 + v[i].i].y, sizeof(table->y)); + rt->infinity = !v[i].i; + for (--i; i>=0; i--) { + XMEMCPY(p->x, table[i * 65 + v[i].i].x, sizeof(table->x)); + XMEMCPY(p->y, table[i * 65 + v[i].i].y, sizeof(table->y)); + p->infinity = !v[i].i; + sp_256_sub_4(negy, p256_mod, p->y); + sp_256_cond_copy_4(p->y, negy, 0 - v[i].neg); + sp_256_proj_point_add_qz1_avx2_4(rt, rt, p, tmp); + } + if (map != 0) { + sp_256_map_avx2_4(r, rt, tmp); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_256)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 4 * 5); + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); + } +#else + ForceZero(tmp, sizeof(sp_digit) * 2 * 4 * 5); +#endif + sp_256_point_free_4(p, 0, heap); + sp_256_point_free_4(rt, 0, heap); + + return MP_OKAY; +} + +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_256_ecc_mulmod_base_avx2_4(sp_point_256* r, const sp_digit* k, + int map, void* heap) +{ + return sp_256_ecc_mulmod_add_only_avx2_4(r, NULL, p256_table, + k, map, heap); +} + +#endif /* HAVE_INTEL_AVX2 */ +#endif /* WOLFSSL_SP_SMALL */ +/* Multiply the base point of P256 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 p; + sp_digit kd[4]; +#endif + sp_point_256* point; + sp_digit* k = NULL; + int err = MP_OKAY; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + err = sp_256_point_new_4(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) { + err = MEMORY_E; + } + } +#else + k = kd; +#endif + if (err == MP_OKAY) { + sp_256_from_mp(k, 4, km); + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_256_ecc_mulmod_base_avx2_4(point, k, map, heap); + else +#endif + err = sp_256_ecc_mulmod_base_4(point, k, map, heap); + } + if (err == MP_OKAY) { + err = sp_256_point_to_ecc_point_4(point, r); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_4(point, 0, heap); + + return err; +} + +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_256_iszero_4(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3]) == 0; +} + +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */ +extern void sp_256_add_one_4(sp_digit* a); +extern void sp_256_from_bin(sp_digit* r, int size, const byte* a, int n); +/* Generates a scalar that is in the range 1..order-1. + * + * rng Random number generator. + * k Scalar value. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +static int sp_256_ecc_gen_k_4(WC_RNG* rng, sp_digit* k) +{ + int err; + byte buf[32]; + + do { + err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf)); + if (err == 0) { + sp_256_from_bin(k, 4, buf, (int)sizeof(buf)); + if (sp_256_cmp_4(k, p256_order2) < 0) { + sp_256_add_one_4(k); + break; + } + } + } + while (err == 0); + + return err; +} + +/* Makes a random EC key pair. + * + * rng Random number generator. + * priv Generated private value. + * pub Generated public point. + * heap Heap to use for allocation. + * returns ECC_INF_E when the point does not have the correct order, RNG + * failures, MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 p; + sp_digit kd[4]; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_256 inf; +#endif +#endif + sp_point_256* point; + sp_digit* k = NULL; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_256* infinity; +#endif + int err; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + (void)heap; + + err = sp_256_point_new_4(heap, p, point); +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + if (err == MP_OKAY) { + err = sp_256_point_new_4(heap, inf, infinity); + } +#endif +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) { + err = MEMORY_E; + } + } +#else + k = kd; +#endif + + if (err == MP_OKAY) { + err = sp_256_ecc_gen_k_4(rng, k); + } + if (err == MP_OKAY) { +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_256_ecc_mulmod_base_avx2_4(point, k, 1, NULL); + else +#endif + err = sp_256_ecc_mulmod_base_4(point, k, 1, NULL); + } + +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + if (err == MP_OKAY) { +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + err = sp_256_ecc_mulmod_avx2_4(infinity, point, p256_order, 1, + NULL); + } + else +#endif + err = sp_256_ecc_mulmod_4(infinity, point, p256_order, 1, NULL); + } + if (err == MP_OKAY) { + if ((sp_256_iszero_4(point->x) == 0) || (sp_256_iszero_4(point->y) == 0)) { + err = ECC_INF_E; + } + } +#endif + + if (err == MP_OKAY) { + err = sp_256_to_mp(k, priv); + } + if (err == MP_OKAY) { + err = sp_256_point_to_ecc_point_4(point, pub); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_256_point_free_4(infinity, 1, heap); +#endif + sp_256_point_free_4(point, 1, heap); + + return err; +} + +#ifdef HAVE_ECC_DHE +extern void sp_256_to_bin(sp_digit* r, byte* a); +/* Multiply the point by the scalar and serialize the X ordinate. + * The number is 0 padded to maximum size on output. + * + * priv Scalar to multiply the point by. + * pub Point to multiply. + * out Buffer to hold X ordinate. + * outLen On entry, size of the buffer in bytes. + * On exit, length of data in buffer in bytes. + * heap Heap to use for allocation. + * returns BUFFER_E if the buffer is to small for output size, + * MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out, + word32* outLen, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 p; + sp_digit kd[4]; +#endif + sp_point_256* point = NULL; + sp_digit* k = NULL; + int err = MP_OKAY; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + if (*outLen < 32U) { + err = BUFFER_E; + } + + if (err == MP_OKAY) { + err = sp_256_point_new_4(heap, p, point); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#else + k = kd; +#endif + + if (err == MP_OKAY) { + sp_256_from_mp(k, 4, priv); + sp_256_point_from_ecc_point_4(point, pub); +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_256_ecc_mulmod_avx2_4(point, point, k, 1, heap); + else +#endif + err = sp_256_ecc_mulmod_4(point, point, k, 1, heap); + } + if (err == MP_OKAY) { + sp_256_to_bin(point->x, out); + *outLen = 32; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_4(point, 0, heap); + + return err; +} +#endif /* HAVE_ECC_DHE */ + +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +extern sp_digit sp_256_add_4(sp_digit* r, const sp_digit* a, const sp_digit* b); +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +extern void sp_256_mul_4(sp_digit* r, const sp_digit* a, const sp_digit* b); +#ifdef HAVE_INTEL_AVX2 +extern void sp_256_mul_avx2_4(sp_digit* r, const sp_digit* a, const sp_digit* b); +#endif /* HAVE_INTEL_AVX2 */ +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +extern sp_digit sp_256_sub_in_place_4(sp_digit* a, const sp_digit* b); +extern sp_digit sp_256_cond_sub_avx2_4(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m); +extern void sp_256_mul_d_4(sp_digit* r, const sp_digit* a, sp_digit b); +extern void sp_256_mul_d_avx2_4(sp_digit* r, const sp_digit* a, const sp_digit b); +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + */ +static WC_INLINE sp_digit div_256_word_4(sp_digit d1, sp_digit d0, + sp_digit div) +{ + register sp_digit r asm("rax"); + __asm__ __volatile__ ( + "divq %3" + : "=a" (r) + : "d" (d1), "a" (d0), "r" (div) + : + ); + return r; +} +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_256_mask_4(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<4; i++) { + r[i] = a[i] & m; + } +#else + r[0] = a[0] & m; + r[1] = a[1] & m; + r[2] = a[2] & m; + r[3] = a[3] & m; +#endif +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_256_div_4(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[8], t2[5]; + sp_digit div, r1; + int i; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + (void)m; + + div = d[3]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 4); + r1 = sp_256_cmp_4(&t1[4], d) >= 0; +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_256_cond_sub_avx2_4(&t1[4], &t1[4], d, (sp_digit)0 - r1); + else +#endif + sp_256_cond_sub_4(&t1[4], &t1[4], d, (sp_digit)0 - r1); + for (i=3; i>=0; i--) { + r1 = div_256_word_4(t1[4 + i], t1[4 + i - 1], div); + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_256_mul_d_avx2_4(t2, d, r1); + else +#endif + sp_256_mul_d_4(t2, d, r1); + t1[4 + i] += sp_256_sub_in_place_4(&t1[i], t2); + t1[4 + i] -= t2[4]; + sp_256_mask_4(t2, d, t1[4 + i]); + t1[4 + i] += sp_256_add_4(&t1[i], &t1[i], t2); + sp_256_mask_4(t2, d, t1[4 + i]); + t1[4 + i] += sp_256_add_4(&t1[i], &t1[i], t2); + } + + r1 = sp_256_cmp_4(t1, d) >= 0; +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_256_cond_sub_avx2_4(r, t1, d, (sp_digit)0 - r1); + else +#endif + sp_256_cond_sub_4(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_256_mod_4(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_256_div_4(a, m, NULL, r); +} + +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +extern void sp_256_sqr_4(sp_digit* r, const sp_digit* a); +#ifdef WOLFSSL_SP_SMALL +/* Order-2 for the P256 curve. */ +static const uint64_t p256_order_minus_2[4] = { + 0xf3b9cac2fc63254fU,0xbce6faada7179e84U,0xffffffffffffffffU, + 0xffffffff00000000U +}; +#else +/* The low half of the order-2 of the P256 curve. */ +static const uint64_t p256_order_low[2] = { + 0xf3b9cac2fc63254fU,0xbce6faada7179e84U +}; +#endif /* WOLFSSL_SP_SMALL */ + +/* Multiply two number mod the order of P256 curve. (r = a * b mod order) + * + * r Result of the multiplication. + * a First operand of the multiplication. + * b Second operand of the multiplication. + */ +static void sp_256_mont_mul_order_4(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_256_mul_4(r, a, b); + sp_256_mont_reduce_order_4(r, p256_order, p256_mp_order); +} + +/* Square number mod the order of P256 curve. (r = a * a mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_256_mont_sqr_order_4(sp_digit* r, const sp_digit* a) +{ + sp_256_sqr_4(r, a); + sp_256_mont_reduce_order_4(r, p256_order, p256_mp_order); +} + +#ifndef WOLFSSL_SP_SMALL +/* Square number mod the order of P256 curve a number of times. + * (r = a ^ n mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_256_mont_sqr_n_order_4(sp_digit* r, const sp_digit* a, int n) +{ + int i; + + sp_256_mont_sqr_order_4(r, a); + for (i=1; i=0; i--) { + sp_256_mont_sqr_order_4(t, t); + if ((p256_order_minus_2[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { + sp_256_mont_mul_order_4(t, t, a); + } + } + XMEMCPY(r, t, sizeof(sp_digit) * 4U); +#else + sp_digit* t = td; + sp_digit* t2 = td + 2 * 4; + sp_digit* t3 = td + 4 * 4; + int i; + + /* t = a^2 */ + sp_256_mont_sqr_order_4(t, a); + /* t = a^3 = t * a */ + sp_256_mont_mul_order_4(t, t, a); + /* t2= a^c = t ^ 2 ^ 2 */ + sp_256_mont_sqr_n_order_4(t2, t, 2); + /* t3= a^f = t2 * t */ + sp_256_mont_mul_order_4(t3, t2, t); + /* t2= a^f0 = t3 ^ 2 ^ 4 */ + sp_256_mont_sqr_n_order_4(t2, t3, 4); + /* t = a^ff = t2 * t3 */ + sp_256_mont_mul_order_4(t, t2, t3); + /* t3= a^ff00 = t ^ 2 ^ 8 */ + sp_256_mont_sqr_n_order_4(t2, t, 8); + /* t = a^ffff = t2 * t */ + sp_256_mont_mul_order_4(t, t2, t); + /* t2= a^ffff0000 = t ^ 2 ^ 16 */ + sp_256_mont_sqr_n_order_4(t2, t, 16); + /* t = a^ffffffff = t2 * t */ + sp_256_mont_mul_order_4(t, t2, t); + /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64 */ + sp_256_mont_sqr_n_order_4(t2, t, 64); + /* t2= a^ffffffff00000000ffffffff = t2 * t */ + sp_256_mont_mul_order_4(t2, t2, t); + /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32 */ + sp_256_mont_sqr_n_order_4(t2, t2, 32); + /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */ + sp_256_mont_mul_order_4(t2, t2, t); + /* t2= a^ffffffff00000000ffffffffffffffffbce6 */ + for (i=127; i>=112; i--) { + sp_256_mont_sqr_order_4(t2, t2); + if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { + sp_256_mont_mul_order_4(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6f */ + sp_256_mont_sqr_n_order_4(t2, t2, 4); + sp_256_mont_mul_order_4(t2, t2, t3); + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */ + for (i=107; i>=64; i--) { + sp_256_mont_sqr_order_4(t2, t2); + if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { + sp_256_mont_mul_order_4(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */ + sp_256_mont_sqr_n_order_4(t2, t2, 4); + sp_256_mont_mul_order_4(t2, t2, t3); + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */ + for (i=59; i>=32; i--) { + sp_256_mont_sqr_order_4(t2, t2); + if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { + sp_256_mont_mul_order_4(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */ + sp_256_mont_sqr_n_order_4(t2, t2, 4); + sp_256_mont_mul_order_4(t2, t2, t3); + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */ + for (i=27; i>=0; i--) { + sp_256_mont_sqr_order_4(t2, t2); + if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { + sp_256_mont_mul_order_4(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */ + sp_256_mont_sqr_n_order_4(t2, t2, 4); + /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */ + sp_256_mont_mul_order_4(r, t2, t3); +#endif /* WOLFSSL_SP_SMALL */ +} + +#ifdef HAVE_INTEL_AVX2 +extern void sp_256_sqr_avx2_4(sp_digit* r, const sp_digit* a); +#define sp_256_mont_reduce_order_avx2_4 sp_256_mont_reduce_avx2_4 + +extern void sp_256_mont_reduce_avx2_4(sp_digit* a, const sp_digit* m, sp_digit mp); +/* Multiply two number mod the order of P256 curve. (r = a * b mod order) + * + * r Result of the multiplication. + * a First operand of the multiplication. + * b Second operand of the multiplication. + */ +static void sp_256_mont_mul_order_avx2_4(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_256_mul_avx2_4(r, a, b); + sp_256_mont_reduce_order_avx2_4(r, p256_order, p256_mp_order); +} + +/* Square number mod the order of P256 curve. (r = a * a mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_256_mont_sqr_order_avx2_4(sp_digit* r, const sp_digit* a) +{ + sp_256_sqr_avx2_4(r, a); + sp_256_mont_reduce_order_avx2_4(r, p256_order, p256_mp_order); +} + +#ifndef WOLFSSL_SP_SMALL +/* Square number mod the order of P256 curve a number of times. + * (r = a ^ n mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_256_mont_sqr_n_order_avx2_4(sp_digit* r, const sp_digit* a, int n) +{ + int i; + + sp_256_mont_sqr_order_avx2_4(r, a); + for (i=1; i=0; i--) { + sp_256_mont_sqr_order_avx2_4(t, t); + if ((p256_order_minus_2[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { + sp_256_mont_mul_order_avx2_4(t, t, a); + } + } + XMEMCPY(r, t, sizeof(sp_digit) * 4U); +#else + sp_digit* t = td; + sp_digit* t2 = td + 2 * 4; + sp_digit* t3 = td + 4 * 4; + int i; + + /* t = a^2 */ + sp_256_mont_sqr_order_avx2_4(t, a); + /* t = a^3 = t * a */ + sp_256_mont_mul_order_avx2_4(t, t, a); + /* t2= a^c = t ^ 2 ^ 2 */ + sp_256_mont_sqr_n_order_avx2_4(t2, t, 2); + /* t3= a^f = t2 * t */ + sp_256_mont_mul_order_avx2_4(t3, t2, t); + /* t2= a^f0 = t3 ^ 2 ^ 4 */ + sp_256_mont_sqr_n_order_avx2_4(t2, t3, 4); + /* t = a^ff = t2 * t3 */ + sp_256_mont_mul_order_avx2_4(t, t2, t3); + /* t3= a^ff00 = t ^ 2 ^ 8 */ + sp_256_mont_sqr_n_order_avx2_4(t2, t, 8); + /* t = a^ffff = t2 * t */ + sp_256_mont_mul_order_avx2_4(t, t2, t); + /* t2= a^ffff0000 = t ^ 2 ^ 16 */ + sp_256_mont_sqr_n_order_avx2_4(t2, t, 16); + /* t = a^ffffffff = t2 * t */ + sp_256_mont_mul_order_avx2_4(t, t2, t); + /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64 */ + sp_256_mont_sqr_n_order_avx2_4(t2, t, 64); + /* t2= a^ffffffff00000000ffffffff = t2 * t */ + sp_256_mont_mul_order_avx2_4(t2, t2, t); + /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32 */ + sp_256_mont_sqr_n_order_avx2_4(t2, t2, 32); + /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */ + sp_256_mont_mul_order_avx2_4(t2, t2, t); + /* t2= a^ffffffff00000000ffffffffffffffffbce6 */ + for (i=127; i>=112; i--) { + sp_256_mont_sqr_order_avx2_4(t2, t2); + if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { + sp_256_mont_mul_order_avx2_4(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6f */ + sp_256_mont_sqr_n_order_avx2_4(t2, t2, 4); + sp_256_mont_mul_order_avx2_4(t2, t2, t3); + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */ + for (i=107; i>=64; i--) { + sp_256_mont_sqr_order_avx2_4(t2, t2); + if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { + sp_256_mont_mul_order_avx2_4(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */ + sp_256_mont_sqr_n_order_avx2_4(t2, t2, 4); + sp_256_mont_mul_order_avx2_4(t2, t2, t3); + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */ + for (i=59; i>=32; i--) { + sp_256_mont_sqr_order_avx2_4(t2, t2); + if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { + sp_256_mont_mul_order_avx2_4(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */ + sp_256_mont_sqr_n_order_avx2_4(t2, t2, 4); + sp_256_mont_mul_order_avx2_4(t2, t2, t3); + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */ + for (i=27; i>=0; i--) { + sp_256_mont_sqr_order_avx2_4(t2, t2); + if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { + sp_256_mont_mul_order_avx2_4(t2, t2, a); + } + } + /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */ + sp_256_mont_sqr_n_order_avx2_4(t2, t2, 4); + /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */ + sp_256_mont_mul_order_avx2_4(r, t2, t3); +#endif /* WOLFSSL_SP_SMALL */ +} + +#endif /* HAVE_INTEL_AVX2 */ +#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */ +#ifdef HAVE_ECC_SIGN +#ifndef SP_ECC_MAX_SIG_GEN +#define SP_ECC_MAX_SIG_GEN 64 +#endif + +/* Sign the hash using the private key. + * e = [hash, 256 bits] from binary + * r = (k.G)->x mod order + * s = (r * x + e) / k mod order + * The hash is truncated to the first 256 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv, + mp_int* rm, mp_int* sm, mp_int* km, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit ed[2*4]; + sp_digit xd[2*4]; + sp_digit kd[2*4]; + sp_digit rd[2*4]; + sp_digit td[3 * 2*4]; + sp_point_256 p; +#endif + sp_digit* e = NULL; + sp_digit* x = NULL; + sp_digit* k = NULL; + sp_digit* r = NULL; + sp_digit* tmp = NULL; + sp_point_256* point = NULL; + sp_digit carry; + sp_digit* s = NULL; + sp_digit* kInv = NULL; + int err = MP_OKAY; + int64_t c; + int i; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + (void)heap; + + err = sp_256_point_new_4(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 4, heap, + DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + e = d + 0 * 4; + x = d + 2 * 4; + k = d + 4 * 4; + r = d + 6 * 4; + tmp = d + 8 * 4; +#else + e = ed; + x = xd; + k = kd; + r = rd; + tmp = td; +#endif + s = e; + kInv = k; + + if (hashLen > 32U) { + hashLen = 32U; + } + + sp_256_from_bin(e, 4, hash, (int)hashLen); + } + + for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) { + sp_256_from_mp(x, 4, priv); + + /* New random point. */ + if (km == NULL || mp_iszero(km)) { + err = sp_256_ecc_gen_k_4(rng, k); + } + else { + sp_256_from_mp(k, 4, km); + mp_zero(km); + } + if (err == MP_OKAY) { +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_256_ecc_mulmod_base_avx2_4(point, k, 1, heap); + else +#endif + err = sp_256_ecc_mulmod_base_4(point, k, 1, NULL); + } + + if (err == MP_OKAY) { + /* r = point->x mod order */ + XMEMCPY(r, point->x, sizeof(sp_digit) * 4U); + sp_256_norm_4(r); + c = sp_256_cmp_4(r, p256_order); + sp_256_cond_sub_4(r, r, p256_order, 0L - (sp_digit)(c >= 0)); + sp_256_norm_4(r); + + /* Conv k to Montgomery form (mod order) */ +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_256_mul_avx2_4(k, k, p256_norm_order); + else +#endif + sp_256_mul_4(k, k, p256_norm_order); + err = sp_256_mod_4(k, k, p256_order); + } + if (err == MP_OKAY) { + sp_256_norm_4(k); + /* kInv = 1/k mod order */ +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_256_mont_inv_order_avx2_4(kInv, k, tmp); + else +#endif + sp_256_mont_inv_order_4(kInv, k, tmp); + sp_256_norm_4(kInv); + + /* s = r * x + e */ +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_256_mul_avx2_4(x, x, r); + else +#endif + sp_256_mul_4(x, x, r); + err = sp_256_mod_4(x, x, p256_order); + } + if (err == MP_OKAY) { + sp_256_norm_4(x); + carry = sp_256_add_4(s, e, x); + sp_256_cond_sub_4(s, s, p256_order, 0 - carry); + sp_256_norm_4(s); + c = sp_256_cmp_4(s, p256_order); + sp_256_cond_sub_4(s, s, p256_order, 0L - (sp_digit)(c >= 0)); + sp_256_norm_4(s); + + /* s = s * k^-1 mod order */ +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_256_mont_mul_order_avx2_4(s, s, kInv); + else +#endif + sp_256_mont_mul_order_4(s, s, kInv); + sp_256_norm_4(s); + + /* Check that signature is usable. */ + if (sp_256_iszero_4(s) == 0) { + break; + } + } + } + + if (i == 0) { + err = RNG_FAILURE_E; + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(r, rm); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(s, sm); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 8 * 4); + XFREE(d, heap, DYNAMIC_TYPE_ECC); + } +#else + XMEMSET(e, 0, sizeof(sp_digit) * 2U * 4U); + XMEMSET(x, 0, sizeof(sp_digit) * 2U * 4U); + XMEMSET(k, 0, sizeof(sp_digit) * 2U * 4U); + XMEMSET(r, 0, sizeof(sp_digit) * 2U * 4U); + XMEMSET(r, 0, sizeof(sp_digit) * 2U * 4U); + XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 4U); +#endif + sp_256_point_free_4(point, 1, heap); + + return err; +} +#endif /* HAVE_ECC_SIGN */ + +#ifdef HAVE_ECC_VERIFY +/* Verify the signature values with the hash and public key. + * e = Truncate(hash, 256) + * u1 = e/s mod order + * u2 = r/s mod order + * r == (u1.G + u2.Q)->x mod order + * Optimization: Leave point in projective form. + * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z') + * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' + * The hash is truncated to the first 256 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX, + mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit u1d[2*4]; + sp_digit u2d[2*4]; + sp_digit sd[2*4]; + sp_digit tmpd[2*4 * 5]; + sp_point_256 p1d; + sp_point_256 p2d; +#endif + sp_digit* u1 = NULL; + sp_digit* u2 = NULL; + sp_digit* s = NULL; + sp_digit* tmp = NULL; + sp_point_256* p1; + sp_point_256* p2 = NULL; + sp_digit carry; + int64_t c; + int err; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + err = sp_256_point_new_4(heap, p1d, p1); + if (err == MP_OKAY) { + err = sp_256_point_new_4(heap, p2d, p2); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 4, heap, + DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + u1 = d + 0 * 4; + u2 = d + 2 * 4; + s = d + 4 * 4; + tmp = d + 6 * 4; +#else + u1 = u1d; + u2 = u2d; + s = sd; + tmp = tmpd; +#endif + + if (hashLen > 32U) { + hashLen = 32U; + } + + sp_256_from_bin(u1, 4, hash, (int)hashLen); + sp_256_from_mp(u2, 4, r); + sp_256_from_mp(s, 4, sm); + sp_256_from_mp(p2->x, 4, pX); + sp_256_from_mp(p2->y, 4, pY); + sp_256_from_mp(p2->z, 4, pZ); + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + sp_256_mul_avx2_4(s, s, p256_norm_order); + } + else +#endif + { + sp_256_mul_4(s, s, p256_norm_order); + } + err = sp_256_mod_4(s, s, p256_order); + } + if (err == MP_OKAY) { + sp_256_norm_4(s); +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + sp_256_mont_inv_order_avx2_4(s, s, tmp); + sp_256_mont_mul_order_avx2_4(u1, u1, s); + sp_256_mont_mul_order_avx2_4(u2, u2, s); + } + else +#endif + { + sp_256_mont_inv_order_4(s, s, tmp); + sp_256_mont_mul_order_4(u1, u1, s); + sp_256_mont_mul_order_4(u2, u2, s); + } + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_256_ecc_mulmod_base_avx2_4(p1, u1, 0, heap); + else +#endif + err = sp_256_ecc_mulmod_base_4(p1, u1, 0, heap); + } + if (err == MP_OKAY) { +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_256_ecc_mulmod_avx2_4(p2, p2, u2, 0, heap); + else +#endif + err = sp_256_ecc_mulmod_4(p2, p2, u2, 0, heap); + } + + if (err == MP_OKAY) { +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + sp_256_proj_point_add_avx2_4(p1, p1, p2, tmp); + if (sp_256_iszero_4(p1->z)) { + if (sp_256_iszero_4(p1->x) && sp_256_iszero_4(p1->y)) { + sp_256_proj_point_dbl_avx2_4(p1, p2, tmp); + } + else { + /* Y ordinate is not used from here - don't set. */ + p1->x[0] = 0; + p1->x[1] = 0; + p1->x[2] = 0; + p1->x[3] = 0; + XMEMCPY(p1->z, p256_norm_mod, sizeof(p256_norm_mod)); + } + } + } + else +#endif + { + sp_256_proj_point_add_4(p1, p1, p2, tmp); + if (sp_256_iszero_4(p1->z)) { + if (sp_256_iszero_4(p1->x) && sp_256_iszero_4(p1->y)) { + sp_256_proj_point_dbl_4(p1, p2, tmp); + } + else { + /* Y ordinate is not used from here - don't set. */ + p1->x[0] = 0; + p1->x[1] = 0; + p1->x[2] = 0; + p1->x[3] = 0; + XMEMCPY(p1->z, p256_norm_mod, sizeof(p256_norm_mod)); + } + } + } + + /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ + /* Reload r and convert to Montgomery form. */ + sp_256_from_mp(u2, 4, r); + err = sp_256_mod_mul_norm_4(u2, u2, p256_mod); + } + + if (err == MP_OKAY) { + /* u1 = r.z'.z' mod prime */ + sp_256_mont_sqr_4(p1->z, p1->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(u1, u2, p1->z, p256_mod, p256_mp_mod); + *res = (int)(sp_256_cmp_4(p1->x, u1) == 0); + if (*res == 0) { + /* Reload r and add order. */ + sp_256_from_mp(u2, 4, r); + carry = sp_256_add_4(u2, u2, p256_order); + /* Carry means result is greater than mod and is not valid. */ + if (carry == 0) { + sp_256_norm_4(u2); + + /* Compare with mod and if greater or equal then not valid. */ + c = sp_256_cmp_4(u2, p256_mod); + if (c < 0) { + /* Convert to Montogomery form */ + err = sp_256_mod_mul_norm_4(u2, u2, p256_mod); + if (err == MP_OKAY) { + /* u1 = (r + 1*order).z'.z' mod prime */ + sp_256_mont_mul_4(u1, u2, p1->z, p256_mod, + p256_mp_mod); + *res = (int)(sp_256_cmp_4(p1->x, u1) == 0); + } + } + } + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) + XFREE(d, heap, DYNAMIC_TYPE_ECC); +#endif + sp_256_point_free_4(p1, 0, heap); + sp_256_point_free_4(p2, 0, heap); + + return err; +} +#endif /* HAVE_ECC_VERIFY */ + +#ifdef HAVE_ECC_CHECK_KEY +/* Check that the x and y oridinates are a valid point on the curve. + * + * point EC point. + * heap Heap to use if dynamically allocating. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +static int sp_256_ecc_is_point_4(sp_point_256* point, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit t1d[2*4]; + sp_digit t2d[2*4]; +#endif + sp_digit* t1; + sp_digit* t2; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 4, heap, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = d + 0 * 4; + t2 = d + 2 * 4; +#else + (void)heap; + + t1 = t1d; + t2 = t2d; +#endif + + sp_256_sqr_4(t1, point->y); + (void)sp_256_mod_4(t1, t1, p256_mod); + sp_256_sqr_4(t2, point->x); + (void)sp_256_mod_4(t2, t2, p256_mod); + sp_256_mul_4(t2, t2, point->x); + (void)sp_256_mod_4(t2, t2, p256_mod); + (void)sp_256_sub_4(t2, p256_mod, t2); + sp_256_mont_add_4(t1, t1, t2, p256_mod); + + sp_256_mont_add_4(t1, t1, point->x, p256_mod); + sp_256_mont_add_4(t1, t1, point->x, p256_mod); + sp_256_mont_add_4(t1, t1, point->x, p256_mod); + + if (sp_256_cmp_4(t1, p256_b) != 0) { + err = MP_VAL; + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, heap, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + +/* Check that the x and y oridinates are a valid point on the curve. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +int sp_ecc_is_point_256(mp_int* pX, mp_int* pY) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_256 pubd; +#endif + sp_point_256* pub; + byte one[1] = { 1 }; + int err; + + err = sp_256_point_new_4(NULL, pubd, pub); + if (err == MP_OKAY) { + sp_256_from_mp(pub->x, 4, pX); + sp_256_from_mp(pub->y, 4, pY); + sp_256_from_bin(pub->z, 4, one, (int)sizeof(one)); + + err = sp_256_ecc_is_point_4(pub, NULL); + } + + sp_256_point_free_4(pub, 0, NULL); + + return err; +} + +/* Check that the private scalar generates the EC point (px, py), the point is + * on the curve and the point has the correct order. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * privm Private scalar that generates EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve, ECC_INF_E if the point does not have the correct order, + * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and + * MP_OKAY otherwise. + */ +int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit privd[4]; + sp_point_256 pubd; + sp_point_256 pd; +#endif + sp_digit* priv = NULL; + sp_point_256* pub; + sp_point_256* p = NULL; + byte one[1] = { 1 }; + int err; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + err = sp_256_point_new_4(heap, pubd, pub); + if (err == MP_OKAY) { + err = sp_256_point_new_4(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4, heap, + DYNAMIC_TYPE_ECC); + if (priv == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + priv = privd; +#endif + + sp_256_from_mp(pub->x, 4, pX); + sp_256_from_mp(pub->y, 4, pY); + sp_256_from_bin(pub->z, 4, one, (int)sizeof(one)); + sp_256_from_mp(priv, 4, privm); + + /* Check point at infinitiy. */ + if ((sp_256_iszero_4(pub->x) != 0) && + (sp_256_iszero_4(pub->y) != 0)) { + err = ECC_INF_E; + } + } + + if (err == MP_OKAY) { + /* Check range of X and Y */ + if (sp_256_cmp_4(pub->x, p256_mod) >= 0 || + sp_256_cmp_4(pub->y, p256_mod) >= 0) { + err = ECC_OUT_OF_RANGE_E; + } + } + + if (err == MP_OKAY) { + /* Check point is on curve */ + err = sp_256_ecc_is_point_4(pub, heap); + } + + if (err == MP_OKAY) { + /* Point * order = infinity */ +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_256_ecc_mulmod_avx2_4(p, pub, p256_order, 1, heap); + else +#endif + err = sp_256_ecc_mulmod_4(p, pub, p256_order, 1, heap); + } + if (err == MP_OKAY) { + /* Check result is infinity */ + if ((sp_256_iszero_4(p->x) == 0) || + (sp_256_iszero_4(p->y) == 0)) { + err = ECC_INF_E; + } + } + + if (err == MP_OKAY) { + /* Base * private = point */ +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_256_ecc_mulmod_base_avx2_4(p, priv, 1, heap); + else +#endif + err = sp_256_ecc_mulmod_base_4(p, priv, 1, heap); + } + if (err == MP_OKAY) { + /* Check result is public key */ + if (sp_256_cmp_4(p->x, pub->x) != 0 || + sp_256_cmp_4(p->y, pub->y) != 0) { + err = ECC_PRIV_KEY_E; + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (priv != NULL) { + XFREE(priv, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_4(p, 0, heap); + sp_256_point_free_4(pub, 0, heap); + + return err; +} +#endif +#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL +/* Add two projective EC points together. + * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ) + * + * pX First EC point's X ordinate. + * pY First EC point's Y ordinate. + * pZ First EC point's Z ordinate. + * qX Second EC point's X ordinate. + * qY Second EC point's Y ordinate. + * qZ Second EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* qX, mp_int* qY, mp_int* qZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 4 * 5]; + sp_point_256 pd; + sp_point_256 qd; +#endif + sp_digit* tmp; + sp_point_256* p; + sp_point_256* q = NULL; + int err; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + err = sp_256_point_new_4(NULL, pd, p); + if (err == MP_OKAY) { + err = sp_256_point_new_4(NULL, qd, q); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 5, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + sp_256_from_mp(p->x, 4, pX); + sp_256_from_mp(p->y, 4, pY); + sp_256_from_mp(p->z, 4, pZ); + sp_256_from_mp(q->x, 4, qX); + sp_256_from_mp(q->y, 4, qY); + sp_256_from_mp(q->z, 4, qZ); + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_256_proj_point_add_avx2_4(p, p, q, tmp); + else +#endif + sp_256_proj_point_add_4(p, p, q, tmp); + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->z, rZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_4(q, 0, NULL); + sp_256_point_free_4(p, 0, NULL); + + return err; +} + +/* Double a projective EC point. + * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ) + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 4 * 2]; + sp_point_256 pd; +#endif + sp_digit* tmp; + sp_point_256* p; + int err; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + err = sp_256_point_new_4(NULL, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 2, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + sp_256_from_mp(p->x, 4, pX); + sp_256_from_mp(p->y, 4, pY); + sp_256_from_mp(p->z, 4, pZ); + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_256_proj_point_dbl_avx2_4(p, p, tmp); + else +#endif + sp_256_proj_point_dbl_4(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->z, rZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_4(p, 0, NULL); + + return err; +} + +/* Map a projective EC point to affine in place. + * pZ will be one. + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 4 * 4]; + sp_point_256 pd; +#endif + sp_digit* tmp; + sp_point_256* p; + int err; + + err = sp_256_point_new_4(NULL, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 4, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + if (err == MP_OKAY) { + sp_256_from_mp(p->x, 4, pX); + sp_256_from_mp(p->y, 4, pY); + sp_256_from_mp(p->z, 4, pZ); + + sp_256_map_4(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(p->x, pX); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->y, pY); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(p->z, pZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_256_point_free_4(p, 0, NULL); + + return err; +} +#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */ +#ifdef HAVE_COMP_KEY +/* Find the square root of a number mod the prime of the curve. + * + * y The number to operate on and the result. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +static int sp_256_mont_sqrt_4(sp_digit* y) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d; +#else + sp_digit t1d[2 * 4]; + sp_digit t2d[2 * 4]; +#endif + sp_digit* t1; + sp_digit* t2; + int err = MP_OKAY; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 4, NULL, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = d + 0 * 4; + t2 = d + 2 * 4; +#else + t1 = t1d; + t2 = t2d; +#endif + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + /* t2 = y ^ 0x2 */ + sp_256_mont_sqr_avx2_4(t2, y, p256_mod, p256_mp_mod); + /* t1 = y ^ 0x3 */ + sp_256_mont_mul_avx2_4(t1, t2, y, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xc */ + sp_256_mont_sqr_n_avx2_4(t2, t1, 2, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xf */ + sp_256_mont_mul_avx2_4(t1, t1, t2, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xf0 */ + sp_256_mont_sqr_n_avx2_4(t2, t1, 4, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xff */ + sp_256_mont_mul_avx2_4(t1, t1, t2, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xff00 */ + sp_256_mont_sqr_n_avx2_4(t2, t1, 8, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffff */ + sp_256_mont_mul_avx2_4(t1, t1, t2, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xffff0000 */ + sp_256_mont_sqr_n_avx2_4(t2, t1, 16, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff */ + sp_256_mont_mul_avx2_4(t1, t1, t2, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000000 */ + sp_256_mont_sqr_n_avx2_4(t1, t1, 32, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000001 */ + sp_256_mont_mul_avx2_4(t1, t1, y, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */ + sp_256_mont_sqr_n_avx2_4(t1, t1, 96, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */ + sp_256_mont_mul_avx2_4(t1, t1, y, p256_mod, p256_mp_mod); + sp_256_mont_sqr_n_avx2_4(y, t1, 94, p256_mod, p256_mp_mod); + } + else +#endif + { + /* t2 = y ^ 0x2 */ + sp_256_mont_sqr_4(t2, y, p256_mod, p256_mp_mod); + /* t1 = y ^ 0x3 */ + sp_256_mont_mul_4(t1, t2, y, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xc */ + sp_256_mont_sqr_n_4(t2, t1, 2, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xf */ + sp_256_mont_mul_4(t1, t1, t2, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xf0 */ + sp_256_mont_sqr_n_4(t2, t1, 4, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xff */ + sp_256_mont_mul_4(t1, t1, t2, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xff00 */ + sp_256_mont_sqr_n_4(t2, t1, 8, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffff */ + sp_256_mont_mul_4(t1, t1, t2, p256_mod, p256_mp_mod); + /* t2 = y ^ 0xffff0000 */ + sp_256_mont_sqr_n_4(t2, t1, 16, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff */ + sp_256_mont_mul_4(t1, t1, t2, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000000 */ + sp_256_mont_sqr_n_4(t1, t1, 32, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000001 */ + sp_256_mont_mul_4(t1, t1, y, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */ + sp_256_mont_sqr_n_4(t1, t1, 96, p256_mod, p256_mp_mod); + /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */ + sp_256_mont_mul_4(t1, t1, y, p256_mod, p256_mp_mod); + sp_256_mont_sqr_n_4(y, t1, 94, p256_mod, p256_mp_mod); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + + +/* Uncompress the point given the X ordinate. + * + * xm X ordinate. + * odd Whether the Y ordinate is odd. + * ym Calculated Y ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d; +#else + sp_digit xd[2 * 4]; + sp_digit yd[2 * 4]; +#endif + sp_digit* x = NULL; + sp_digit* y = NULL; + int err = MP_OKAY; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 4, NULL, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + x = d + 0 * 4; + y = d + 2 * 4; +#else + x = xd; + y = yd; +#endif + + sp_256_from_mp(x, 4, xm); + err = sp_256_mod_mul_norm_4(x, x, p256_mod); + } + if (err == MP_OKAY) { + /* y = x^3 */ +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + sp_256_mont_sqr_avx2_4(y, x, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(y, y, x, p256_mod, p256_mp_mod); + } + else +#endif + { + sp_256_mont_sqr_4(y, x, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(y, y, x, p256_mod, p256_mp_mod); + } + /* y = x^3 - 3x */ + sp_256_mont_sub_4(y, y, x, p256_mod); + sp_256_mont_sub_4(y, y, x, p256_mod); + sp_256_mont_sub_4(y, y, x, p256_mod); + /* y = x^3 - 3x + b */ + err = sp_256_mod_mul_norm_4(x, p256_b, p256_mod); + } + if (err == MP_OKAY) { + sp_256_mont_add_4(y, y, x, p256_mod); + /* y = sqrt(x^3 - 3x + b) */ + err = sp_256_mont_sqrt_4(y); + } + if (err == MP_OKAY) { + XMEMSET(y + 4, 0, 4U * sizeof(sp_digit)); + sp_256_mont_reduce_4(y, p256_mod, p256_mp_mod); + if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) { + sp_256_mont_sub_4(y, p256_mod, y, p256_mod); + } + + err = sp_256_to_mp(y, ym); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} +#endif +#endif /* !WOLFSSL_SP_NO_256 */ +#ifdef WOLFSSL_SP_384 + +/* Point structure to use. */ +typedef struct sp_point_384 { + sp_digit x[2 * 6]; + sp_digit y[2 * 6]; + sp_digit z[2 * 6]; + int infinity; +} sp_point_384; + +/* The modulus (prime) of the curve P384. */ +static const sp_digit p384_mod[6] = { + 0x00000000ffffffffL,0xffffffff00000000L,0xfffffffffffffffeL, + 0xffffffffffffffffL,0xffffffffffffffffL,0xffffffffffffffffL +}; +/* The Montogmery normalizer for modulus of the curve P384. */ +static const sp_digit p384_norm_mod[6] = { + 0xffffffff00000001L,0x00000000ffffffffL,0x0000000000000001L, + 0x0000000000000000L,0x0000000000000000L,0x0000000000000000L +}; +/* The Montogmery multiplier for modulus of the curve P384. */ +static sp_digit p384_mp_mod = 0x0000000100000001; +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +/* The order of the curve P384. */ +static const sp_digit p384_order[6] = { + 0xecec196accc52973L,0x581a0db248b0a77aL,0xc7634d81f4372ddfL, + 0xffffffffffffffffL,0xffffffffffffffffL,0xffffffffffffffffL +}; +#endif +/* The order of the curve P384 minus 2. */ +static const sp_digit p384_order2[6] = { + 0xecec196accc52971L,0x581a0db248b0a77aL,0xc7634d81f4372ddfL, + 0xffffffffffffffffL,0xffffffffffffffffL,0xffffffffffffffffL +}; +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montogmery normalizer for order of the curve P384. */ +static const sp_digit p384_norm_order[6] = { + 0x1313e695333ad68dL,0xa7e5f24db74f5885L,0x389cb27e0bc8d220L, + 0x0000000000000000L,0x0000000000000000L,0x0000000000000000L +}; +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montogmery multiplier for order of the curve P384. */ +static sp_digit p384_mp_order = 0x6ed46089e88fdc45l; +#endif +/* The base point of curve P384. */ +static const sp_point_384 p384_base = { + /* X ordinate */ + { + 0x3a545e3872760ab7L,0x5502f25dbf55296cL,0x59f741e082542a38L, + 0x6e1d3b628ba79b98L,0x8eb1c71ef320ad74L,0xaa87ca22be8b0537L, + 0L, 0L, 0L, 0L, 0L, 0L + }, + /* Y ordinate */ + { + 0x7a431d7c90ea0e5fL,0x0a60b1ce1d7e819dL,0xe9da3113b5f0b8c0L, + 0xf8f41dbd289a147cL,0x5d9e98bf9292dc29L,0x3617de4a96262c6fL, + 0L, 0L, 0L, 0L, 0L, 0L + }, + /* Z ordinate */ + { + 0x0000000000000001L,0x0000000000000000L,0x0000000000000000L, + 0x0000000000000000L,0x0000000000000000L,0x0000000000000000L, + 0L, 0L, 0L, 0L, 0L, 0L + }, + /* infinity */ + 0 +}; +#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY) +static const sp_digit p384_b[6] = { + 0x2a85c8edd3ec2aefL,0xc656398d8a2ed19dL,0x0314088f5013875aL, + 0x181d9c6efe814112L,0x988e056be3f82d19L,0xb3312fa7e23ee7e4L +}; +#endif + +static int sp_384_point_new_ex_6(void* heap, sp_point_384* sp, sp_point_384** p) +{ + int ret = MP_OKAY; + (void)heap; +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + (void)sp; + *p = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC); +#else + *p = sp; +#endif + if (*p == NULL) { + ret = MEMORY_E; + } + return ret; +} + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +/* Allocate memory for point and return error. */ +#define sp_384_point_new_6(heap, sp, p) sp_384_point_new_ex_6((heap), NULL, &(p)) +#else +/* Set pointer to data and return no error. */ +#define sp_384_point_new_6(heap, sp, p) sp_384_point_new_ex_6((heap), &(sp), &(p)) +#endif + + +static void sp_384_point_free_6(sp_point_384* p, int clear, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +/* If valid pointer then clear point data if requested and free data. */ + if (p != NULL) { + if (clear != 0) { + XMEMSET(p, 0, sizeof(*p)); + } + XFREE(p, heap, DYNAMIC_TYPE_ECC); + } +#else +/* Clear point data if requested. */ + if (clear != 0) { + XMEMSET(p, 0, sizeof(*p)); + } +#endif + (void)heap; +} + +/* Multiply a number by Montogmery normalizer mod modulus (prime). + * + * r The resulting Montgomery form number. + * a The number to convert. + * m The modulus (prime). + * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise. + */ +static int sp_384_mod_mul_norm_6(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + int64_t* td; +#else + int64_t td[12]; + int64_t a32d[12]; +#endif + int64_t* t; + int64_t* a32; + int64_t o; + int err = MP_OKAY; + + (void)m; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + td = (int64_t*)XMALLOC(sizeof(int64_t) * 2 * 12, NULL, DYNAMIC_TYPE_ECC); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = td; + a32 = td + 12; +#else + t = td; + a32 = a32d; +#endif + + a32[0] = a[0] & 0xffffffff; + a32[1] = a[0] >> 32; + a32[2] = a[1] & 0xffffffff; + a32[3] = a[1] >> 32; + a32[4] = a[2] & 0xffffffff; + a32[5] = a[2] >> 32; + a32[6] = a[3] & 0xffffffff; + a32[7] = a[3] >> 32; + a32[8] = a[4] & 0xffffffff; + a32[9] = a[4] >> 32; + a32[10] = a[5] & 0xffffffff; + a32[11] = a[5] >> 32; + + /* 1 0 0 0 0 0 0 0 1 1 0 -1 */ + t[0] = 0 + a32[0] + a32[8] + a32[9] - a32[11]; + /* -1 1 0 0 0 0 0 0 -1 0 1 1 */ + t[1] = 0 - a32[0] + a32[1] - a32[8] + a32[10] + a32[11]; + /* 0 -1 1 0 0 0 0 0 0 -1 0 1 */ + t[2] = 0 - a32[1] + a32[2] - a32[9] + a32[11]; + /* 1 0 -1 1 0 0 0 0 1 1 -1 -1 */ + t[3] = 0 + a32[0] - a32[2] + a32[3] + a32[8] + a32[9] - a32[10] - a32[11]; + /* 1 1 0 -1 1 0 0 0 1 2 1 -2 */ + t[4] = 0 + a32[0] + a32[1] - a32[3] + a32[4] + a32[8] + 2 * a32[9] + a32[10] - 2 * a32[11]; + /* 0 1 1 0 -1 1 0 0 0 1 2 1 */ + t[5] = 0 + a32[1] + a32[2] - a32[4] + a32[5] + a32[9] + 2 * a32[10] + a32[11]; + /* 0 0 1 1 0 -1 1 0 0 0 1 2 */ + t[6] = 0 + a32[2] + a32[3] - a32[5] + a32[6] + a32[10] + 2 * a32[11]; + /* 0 0 0 1 1 0 -1 1 0 0 0 1 */ + t[7] = 0 + a32[3] + a32[4] - a32[6] + a32[7] + a32[11]; + /* 0 0 0 0 1 1 0 -1 1 0 0 0 */ + t[8] = 0 + a32[4] + a32[5] - a32[7] + a32[8]; + /* 0 0 0 0 0 1 1 0 -1 1 0 0 */ + t[9] = 0 + a32[5] + a32[6] - a32[8] + a32[9]; + /* 0 0 0 0 0 0 1 1 0 -1 1 0 */ + t[10] = 0 + a32[6] + a32[7] - a32[9] + a32[10]; + /* 0 0 0 0 0 0 0 1 1 0 -1 1 */ + t[11] = 0 + a32[7] + a32[8] - a32[10] + a32[11]; + + t[1] += t[0] >> 32; t[0] &= 0xffffffff; + t[2] += t[1] >> 32; t[1] &= 0xffffffff; + t[3] += t[2] >> 32; t[2] &= 0xffffffff; + t[4] += t[3] >> 32; t[3] &= 0xffffffff; + t[5] += t[4] >> 32; t[4] &= 0xffffffff; + t[6] += t[5] >> 32; t[5] &= 0xffffffff; + t[7] += t[6] >> 32; t[6] &= 0xffffffff; + t[8] += t[7] >> 32; t[7] &= 0xffffffff; + t[9] += t[8] >> 32; t[8] &= 0xffffffff; + t[10] += t[9] >> 32; t[9] &= 0xffffffff; + t[11] += t[10] >> 32; t[10] &= 0xffffffff; + o = t[11] >> 32; t[11] &= 0xffffffff; + t[0] += o; + t[1] -= o; + t[3] += o; + t[4] += o; + t[1] += t[0] >> 32; t[0] &= 0xffffffff; + t[2] += t[1] >> 32; t[1] &= 0xffffffff; + t[3] += t[2] >> 32; t[2] &= 0xffffffff; + t[4] += t[3] >> 32; t[3] &= 0xffffffff; + t[5] += t[4] >> 32; t[4] &= 0xffffffff; + t[6] += t[5] >> 32; t[5] &= 0xffffffff; + t[7] += t[6] >> 32; t[6] &= 0xffffffff; + t[8] += t[7] >> 32; t[7] &= 0xffffffff; + t[9] += t[8] >> 32; t[8] &= 0xffffffff; + t[10] += t[9] >> 32; t[9] &= 0xffffffff; + t[11] += t[10] >> 32; t[10] &= 0xffffffff; + + r[0] = (t[1] << 32) | t[0]; + r[1] = (t[3] << 32) | t[2]; + r[2] = (t[5] << 32) | t[4]; + r[3] = (t[7] << 32) | t[6]; + r[4] = (t[9] << 32) | t[8]; + r[5] = (t[11] << 32) | t[10]; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (td != NULL) + XFREE(td, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_384_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 64 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 64 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0xffffffffffffffffl; + s = 64U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 64U) <= (word32)DIGIT_BIT) { + s += 64U; + r[j] &= 0xffffffffffffffffl; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 64) { + r[j] &= 0xffffffffffffffffl; + if (j + 1 >= size) { + break; + } + s = 64 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Convert a point of type ecc_point to type sp_point_384. + * + * p Point of type sp_point_384 (result). + * pm Point of type ecc_point. + */ +static void sp_384_point_from_ecc_point_6(sp_point_384* p, const ecc_point* pm) +{ + XMEMSET(p->x, 0, sizeof(p->x)); + XMEMSET(p->y, 0, sizeof(p->y)); + XMEMSET(p->z, 0, sizeof(p->z)); + sp_384_from_mp(p->x, 6, pm->x); + sp_384_from_mp(p->y, 6, pm->y); + sp_384_from_mp(p->z, 6, pm->z); + p->infinity = 0; +} + +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_384_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (384 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 64 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 6); + r->used = 6; + mp_clamp(r); +#elif DIGIT_BIT < 64 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 6; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 64) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 64 - s; + } + r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 6; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 64 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 64 - s; + } + else { + s += 64; + } + } + r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Convert a point of type sp_point_384 to type ecc_point. + * + * p Point of type sp_point_384. + * pm Point of type ecc_point (result). + * returns MEMORY_E when allocation of memory in ecc_point fails otherwise + * MP_OKAY. + */ +static int sp_384_point_to_ecc_point_6(const sp_point_384* p, ecc_point* pm) +{ + int err; + + err = sp_384_to_mp(p->x, pm->x); + if (err == MP_OKAY) { + err = sp_384_to_mp(p->y, pm->y); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->z, pm->z); + } + + return err; +} + +extern void sp_384_cond_copy_6(sp_digit* r, const sp_digit* a, sp_digit m); +extern void sp_384_mul_6(sp_digit* r, const sp_digit* a, const sp_digit* b); +extern sp_digit sp_384_cond_sub_6(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m); +extern void sp_384_mont_reduce_6(sp_digit* a, const sp_digit* m, sp_digit mp); +extern void sp_384_mont_reduce_order_6(sp_digit* a, const sp_digit* m, sp_digit mp); +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_384_mont_mul_6(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_384_mul_6(r, a, b); + sp_384_mont_reduce_6(r, m, mp); +} + +extern void sp_384_sqr_6(sp_digit* r, const sp_digit* a); +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_384_mont_sqr_6(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_384_sqr_6(r, a); + sp_384_mont_reduce_6(r, m, mp); +} + +#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY) +/* Square the Montgomery form number a number of times. (r = a ^ n mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * n Number of times to square. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_384_mont_sqr_n_6(sp_digit* r, const sp_digit* a, int n, + const sp_digit* m, sp_digit mp) +{ + sp_384_mont_sqr_6(r, a, m, mp); + for (; n > 1; n--) { + sp_384_mont_sqr_6(r, r, m, mp); + } +} + +#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */ +#ifdef WOLFSSL_SP_SMALL +/* Mod-2 for the P384 curve. */ +static const uint64_t p384_mod_minus_2[6] = { + 0x00000000fffffffdU,0xffffffff00000000U,0xfffffffffffffffeU, + 0xffffffffffffffffU,0xffffffffffffffffU,0xffffffffffffffffU +}; +#endif /* !WOLFSSL_SP_SMALL */ + +/* Invert the number, in Montgomery form, modulo the modulus (prime) of the + * P384 curve. (r = 1 / a mod m) + * + * r Inverse result. + * a Number to invert. + * td Temporary data. + */ +static void sp_384_mont_inv_6(sp_digit* r, const sp_digit* a, sp_digit* td) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* t = td; + int i; + + XMEMCPY(t, a, sizeof(sp_digit) * 6); + for (i=382; i>=0; i--) { + sp_384_mont_sqr_6(t, t, p384_mod, p384_mp_mod); + if (p384_mod_minus_2[i / 64] & ((sp_digit)1 << (i % 64))) + sp_384_mont_mul_6(t, t, a, p384_mod, p384_mp_mod); + } + XMEMCPY(r, t, sizeof(sp_digit) * 6); +#else + sp_digit* t1 = td; + sp_digit* t2 = td + 2 * 6; + sp_digit* t3 = td + 4 * 6; + sp_digit* t4 = td + 6 * 6; + sp_digit* t5 = td + 8 * 6; + + /* 0x2 */ + sp_384_mont_sqr_6(t1, a, p384_mod, p384_mp_mod); + /* 0x3 */ + sp_384_mont_mul_6(t5, t1, a, p384_mod, p384_mp_mod); + /* 0xc */ + sp_384_mont_sqr_n_6(t1, t5, 2, p384_mod, p384_mp_mod); + /* 0xf */ + sp_384_mont_mul_6(t2, t5, t1, p384_mod, p384_mp_mod); + /* 0x1e */ + sp_384_mont_sqr_6(t1, t2, p384_mod, p384_mp_mod); + /* 0x1f */ + sp_384_mont_mul_6(t4, t1, a, p384_mod, p384_mp_mod); + /* 0x3e0 */ + sp_384_mont_sqr_n_6(t1, t4, 5, p384_mod, p384_mp_mod); + /* 0x3ff */ + sp_384_mont_mul_6(t2, t4, t1, p384_mod, p384_mp_mod); + /* 0x7fe0 */ + sp_384_mont_sqr_n_6(t1, t2, 5, p384_mod, p384_mp_mod); + /* 0x7fff */ + sp_384_mont_mul_6(t4, t4, t1, p384_mod, p384_mp_mod); + /* 0x3fff8000 */ + sp_384_mont_sqr_n_6(t1, t4, 15, p384_mod, p384_mp_mod); + /* 0x3fffffff */ + sp_384_mont_mul_6(t2, t4, t1, p384_mod, p384_mp_mod); + /* 0xfffffffc */ + sp_384_mont_sqr_n_6(t3, t2, 2, p384_mod, p384_mp_mod); + /* 0xfffffffd */ + sp_384_mont_mul_6(r, t3, a, p384_mod, p384_mp_mod); + /* 0xffffffff */ + sp_384_mont_mul_6(t3, t5, t3, p384_mod, p384_mp_mod); + /* 0xfffffffc0000000 */ + sp_384_mont_sqr_n_6(t1, t2, 30, p384_mod, p384_mp_mod); + /* 0xfffffffffffffff */ + sp_384_mont_mul_6(t2, t2, t1, p384_mod, p384_mp_mod); + /* 0xfffffffffffffff000000000000000 */ + sp_384_mont_sqr_n_6(t1, t2, 60, p384_mod, p384_mp_mod); + /* 0xffffffffffffffffffffffffffffff */ + sp_384_mont_mul_6(t2, t2, t1, p384_mod, p384_mp_mod); + /* 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */ + sp_384_mont_sqr_n_6(t1, t2, 120, p384_mod, p384_mp_mod); + /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_384_mont_mul_6(t2, t2, t1, p384_mod, p384_mp_mod); + /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */ + sp_384_mont_sqr_n_6(t1, t2, 15, p384_mod, p384_mp_mod); + /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_384_mont_mul_6(t2, t4, t1, p384_mod, p384_mp_mod); + /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000 */ + sp_384_mont_sqr_n_6(t1, t2, 33, p384_mod, p384_mp_mod); + /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff */ + sp_384_mont_mul_6(t2, t3, t1, p384_mod, p384_mp_mod); + /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff000000000000000000000000 */ + sp_384_mont_sqr_n_6(t1, t2, 96, p384_mod, p384_mp_mod); + /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000fffffffd */ + sp_384_mont_mul_6(r, r, t1, p384_mod, p384_mp_mod); + +#endif /* WOLFSSL_SP_SMALL */ +} + +extern int64_t sp_384_cmp_6(const sp_digit* a, const sp_digit* b); +/* Normalize the values in each word to 64. + * + * a Array of sp_digit to normalize. + */ +#define sp_384_norm_6(a) + +/* Map the Montgomery form projective coordinate point to an affine point. + * + * r Resulting affine coordinate point. + * p Montgomery form projective coordinate point. + * t Temporary ordinate data. + */ +static void sp_384_map_6(sp_point_384* r, const sp_point_384* p, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*6; + int64_t n; + + sp_384_mont_inv_6(t1, p->z, t + 2*6); + + sp_384_mont_sqr_6(t2, t1, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t1, t2, t1, p384_mod, p384_mp_mod); + + /* x /= z^2 */ + sp_384_mont_mul_6(r->x, p->x, t2, p384_mod, p384_mp_mod); + XMEMSET(r->x + 6, 0, sizeof(r->x) / 2U); + sp_384_mont_reduce_6(r->x, p384_mod, p384_mp_mod); + /* Reduce x to less than modulus */ + n = sp_384_cmp_6(r->x, p384_mod); + sp_384_cond_sub_6(r->x, r->x, p384_mod, 0 - ((n >= 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_384_norm_6(r->x); + + /* y /= z^3 */ + sp_384_mont_mul_6(r->y, p->y, t1, p384_mod, p384_mp_mod); + XMEMSET(r->y + 6, 0, sizeof(r->y) / 2U); + sp_384_mont_reduce_6(r->y, p384_mod, p384_mp_mod); + /* Reduce y to less than modulus */ + n = sp_384_cmp_6(r->y, p384_mod); + sp_384_cond_sub_6(r->y, r->y, p384_mod, 0 - ((n >= 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_384_norm_6(r->y); + + XMEMSET(r->z, 0, sizeof(r->z)); + r->z[0] = 1; + +} + +extern sp_digit sp_384_add_6(sp_digit* r, const sp_digit* a, const sp_digit* b); +/* Add two Montgomery form numbers (r = a + b % m). + * + * r Result of addition. + * a First number to add in Montogmery form. + * b Second number to add in Montogmery form. + * m Modulus (prime). + */ +static void sp_384_mont_add_6(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m) +{ + sp_digit o; + + o = sp_384_add_6(r, a, b); + sp_384_cond_sub_6(r, r, m, 0 - o); +} + +extern sp_digit sp_384_dbl_6(sp_digit* r, const sp_digit* a); +/* Double a Montgomery form number (r = a + a % m). + * + * r Result of doubling. + * a Number to double in Montogmery form. + * m Modulus (prime). + */ +static void sp_384_mont_dbl_6(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + sp_digit o; + + o = sp_384_dbl_6(r, a); + sp_384_cond_sub_6(r, r, m, 0 - o); +} + +/* Triple a Montgomery form number (r = a + a + a % m). + * + * r Result of Tripling. + * a Number to triple in Montogmery form. + * m Modulus (prime). + */ +static void sp_384_mont_tpl_6(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + sp_digit o; + + o = sp_384_dbl_6(r, a); + sp_384_cond_sub_6(r, r, m, 0 - o); + o = sp_384_add_6(r, r, a); + sp_384_cond_sub_6(r, r, m, 0 - o); +} + +extern sp_digit sp_384_sub_6(sp_digit* r, const sp_digit* a, const sp_digit* b); +extern sp_digit sp_384_cond_add_6(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m); +/* Subtract two Montgomery form numbers (r = a - b % m). + * + * r Result of subtration. + * a Number to subtract from in Montogmery form. + * b Number to subtract with in Montogmery form. + * m Modulus (prime). + */ +static void sp_384_mont_sub_6(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m) +{ + sp_digit o; + + o = sp_384_sub_6(r, a, b); + sp_384_cond_add_6(r, r, m, o); +} + +extern void sp_384_div2_6(sp_digit* r, const sp_digit* a, const sp_digit* m); +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static void sp_384_proj_point_dbl_6(sp_point_384* r, const sp_point_384* p, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*6; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_384_mont_sqr_6(t1, p->z, p384_mod, p384_mp_mod); + /* Z = Y * Z */ + sp_384_mont_mul_6(z, p->y, p->z, p384_mod, p384_mp_mod); + /* Z = 2Z */ + sp_384_mont_dbl_6(z, z, p384_mod); + /* T2 = X - T1 */ + sp_384_mont_sub_6(t2, p->x, t1, p384_mod); + /* T1 = X + T1 */ + sp_384_mont_add_6(t1, p->x, t1, p384_mod); + /* T2 = T1 * T2 */ + sp_384_mont_mul_6(t2, t1, t2, p384_mod, p384_mp_mod); + /* T1 = 3T2 */ + sp_384_mont_tpl_6(t1, t2, p384_mod); + /* Y = 2Y */ + sp_384_mont_dbl_6(y, p->y, p384_mod); + /* Y = Y * Y */ + sp_384_mont_sqr_6(y, y, p384_mod, p384_mp_mod); + /* T2 = Y * Y */ + sp_384_mont_sqr_6(t2, y, p384_mod, p384_mp_mod); + /* T2 = T2/2 */ + sp_384_div2_6(t2, t2, p384_mod); + /* Y = Y * X */ + sp_384_mont_mul_6(y, y, p->x, p384_mod, p384_mp_mod); + /* X = T1 * T1 */ + sp_384_mont_sqr_6(x, t1, p384_mod, p384_mp_mod); + /* X = X - Y */ + sp_384_mont_sub_6(x, x, y, p384_mod); + /* X = X - Y */ + sp_384_mont_sub_6(x, x, y, p384_mod); + /* Y = Y - X */ + sp_384_mont_sub_6(y, y, x, p384_mod); + /* Y = Y * T1 */ + sp_384_mont_mul_6(y, y, t1, p384_mod, p384_mp_mod); + /* Y = Y - T2 */ + sp_384_mont_sub_6(y, y, t2, p384_mod); +} + +/* Double the Montgomery form projective point p a number of times. + * + * r Result of repeated doubling of point. + * p Point to double. + * n Number of times to double + * t Temporary ordinate data. + */ +static void sp_384_proj_point_dbl_n_6(sp_point_384* p, int n, sp_digit* t) +{ + sp_digit* w = t; + sp_digit* a = t + 2*6; + sp_digit* b = t + 4*6; + sp_digit* t1 = t + 6*6; + sp_digit* t2 = t + 8*6; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = p->x; + y = p->y; + z = p->z; + + /* Y = 2*Y */ + sp_384_mont_dbl_6(y, y, p384_mod); + /* W = Z^4 */ + sp_384_mont_sqr_6(w, z, p384_mod, p384_mp_mod); + sp_384_mont_sqr_6(w, w, p384_mod, p384_mp_mod); + +#ifndef WOLFSSL_SP_SMALL + while (--n > 0) +#else + while (--n >= 0) +#endif + { + /* A = 3*(X^2 - W) */ + sp_384_mont_sqr_6(t1, x, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(t1, t1, w, p384_mod); + sp_384_mont_tpl_6(a, t1, p384_mod); + /* B = X*Y^2 */ + sp_384_mont_sqr_6(t1, y, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(b, t1, x, p384_mod, p384_mp_mod); + /* X = A^2 - 2B */ + sp_384_mont_sqr_6(x, a, p384_mod, p384_mp_mod); + sp_384_mont_dbl_6(t2, b, p384_mod); + sp_384_mont_sub_6(x, x, t2, p384_mod); + /* Z = Z*Y */ + sp_384_mont_mul_6(z, z, y, p384_mod, p384_mp_mod); + /* t2 = Y^4 */ + sp_384_mont_sqr_6(t1, t1, p384_mod, p384_mp_mod); +#ifdef WOLFSSL_SP_SMALL + if (n != 0) +#endif + { + /* W = W*Y^4 */ + sp_384_mont_mul_6(w, w, t1, p384_mod, p384_mp_mod); + } + /* y = 2*A*(B - X) - Y^4 */ + sp_384_mont_sub_6(y, b, x, p384_mod); + sp_384_mont_mul_6(y, y, a, p384_mod, p384_mp_mod); + sp_384_mont_dbl_6(y, y, p384_mod); + sp_384_mont_sub_6(y, y, t1, p384_mod); + } +#ifndef WOLFSSL_SP_SMALL + /* A = 3*(X^2 - W) */ + sp_384_mont_sqr_6(t1, x, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(t1, t1, w, p384_mod); + sp_384_mont_tpl_6(a, t1, p384_mod); + /* B = X*Y^2 */ + sp_384_mont_sqr_6(t1, y, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(b, t1, x, p384_mod, p384_mp_mod); + /* X = A^2 - 2B */ + sp_384_mont_sqr_6(x, a, p384_mod, p384_mp_mod); + sp_384_mont_dbl_6(t2, b, p384_mod); + sp_384_mont_sub_6(x, x, t2, p384_mod); + /* Z = Z*Y */ + sp_384_mont_mul_6(z, z, y, p384_mod, p384_mp_mod); + /* t2 = Y^4 */ + sp_384_mont_sqr_6(t1, t1, p384_mod, p384_mp_mod); + /* y = 2*A*(B - X) - Y^4 */ + sp_384_mont_sub_6(y, b, x, p384_mod); + sp_384_mont_mul_6(y, y, a, p384_mod, p384_mp_mod); + sp_384_mont_dbl_6(y, y, p384_mod); + sp_384_mont_sub_6(y, y, t1, p384_mod); +#endif + /* Y = Y/2 */ + sp_384_div2_6(y, y, p384_mod); +} + +/* Compare two numbers to determine if they are equal. + * Constant time implementation. + * + * a First number to compare. + * b Second number to compare. + * returns 1 when equal and 0 otherwise. + */ +static int sp_384_cmp_equal_6(const sp_digit* a, const sp_digit* b) +{ + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) | + (a[4] ^ b[4]) | (a[5] ^ b[5])) == 0; +} + +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_384_proj_point_add_6(sp_point_384* r, const sp_point_384* p, const sp_point_384* q, + sp_digit* t) +{ + const sp_point_384* ap[2]; + sp_point_384* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*6; + sp_digit* t3 = t + 4*6; + sp_digit* t4 = t + 6*6; + sp_digit* t5 = t + 8*6; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* Ensure only the first point is the same as the result. */ + if (q == r) { + const sp_point_384* a = p; + p = q; + q = a; + } + + /* Check double */ + (void)sp_384_sub_6(t1, p384_mod, q->y); + sp_384_norm_6(t1); + if ((sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) & + (sp_384_cmp_equal_6(p->y, q->y) | sp_384_cmp_equal_6(p->y, t1))) != 0) { + sp_384_proj_point_dbl_6(r, p, t); + } + else { + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point_384)); + x = rp[p->infinity | q->infinity]->x; + y = rp[p->infinity | q->infinity]->y; + z = rp[p->infinity | q->infinity]->z; + + ap[0] = p; + ap[1] = q; + for (i=0; i<6; i++) { + r->x[i] = ap[p->infinity]->x[i]; + } + for (i=0; i<6; i++) { + r->y[i] = ap[p->infinity]->y[i]; + } + for (i=0; i<6; i++) { + r->z[i] = ap[p->infinity]->z[i]; + } + r->infinity = ap[p->infinity]->infinity; + + /* U1 = X1*Z2^2 */ + sp_384_mont_sqr_6(t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t3, t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t1, t1, x, p384_mod, p384_mp_mod); + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_6(t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t4, t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_6(t3, t3, y, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_6(t4, t4, q->y, p384_mod, p384_mp_mod); + /* H = U2 - U1 */ + sp_384_mont_sub_6(t2, t2, t1, p384_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_6(t4, t4, t3, p384_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_6(z, z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(z, z, t2, p384_mod, p384_mp_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_6(x, t4, p384_mod, p384_mp_mod); + sp_384_mont_sqr_6(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(y, t1, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t5, t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(x, x, t5, p384_mod); + sp_384_mont_dbl_6(t1, y, p384_mod); + sp_384_mont_sub_6(x, x, t1, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_6(y, y, x, p384_mod); + sp_384_mont_mul_6(y, y, t4, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t5, t5, t3, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(y, y, t5, p384_mod); + } +} + +/* Double the Montgomery form projective point p a number of times. + * + * r Result of repeated doubling of point. + * p Point to double. + * n Number of times to double + * t Temporary ordinate data. + */ +static void sp_384_proj_point_dbl_n_store_6(sp_point_384* r, const sp_point_384* p, + int n, int m, sp_digit* t) +{ + sp_digit* w = t; + sp_digit* a = t + 2*6; + sp_digit* b = t + 4*6; + sp_digit* t1 = t + 6*6; + sp_digit* t2 = t + 8*6; + sp_digit* x = r[2*m].x; + sp_digit* y = r[(1<x[i]; + } + for (i=0; i<6; i++) { + y[i] = p->y[i]; + } + for (i=0; i<6; i++) { + z[i] = p->z[i]; + } + + /* Y = 2*Y */ + sp_384_mont_dbl_6(y, y, p384_mod); + /* W = Z^4 */ + sp_384_mont_sqr_6(w, z, p384_mod, p384_mp_mod); + sp_384_mont_sqr_6(w, w, p384_mod, p384_mp_mod); + for (i=1; i<=n; i++) { + /* A = 3*(X^2 - W) */ + sp_384_mont_sqr_6(t1, x, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(t1, t1, w, p384_mod); + sp_384_mont_tpl_6(a, t1, p384_mod); + /* B = X*Y^2 */ + sp_384_mont_sqr_6(t2, y, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(b, t2, x, p384_mod, p384_mp_mod); + x = r[(1<x; + sp_digit* y = ra->y; + sp_digit* z = ra->z; + sp_digit* xs = rs->x; + sp_digit* ys = rs->y; + sp_digit* zs = rs->z; + + + XMEMCPY(x, p->x, sizeof(p->x) / 2); + XMEMCPY(y, p->y, sizeof(p->y) / 2); + XMEMCPY(z, p->z, sizeof(p->z) / 2); + ra->infinity = 0; + rs->infinity = 0; + + /* U1 = X1*Z2^2 */ + sp_384_mont_sqr_6(t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t3, t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t1, t1, x, p384_mod, p384_mp_mod); + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_6(t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t4, t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_6(t3, t3, y, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_6(t4, t4, q->y, p384_mod, p384_mp_mod); + /* H = U2 - U1 */ + sp_384_mont_sub_6(t2, t2, t1, p384_mod); + /* RS = S2 + S1 */ + sp_384_mont_add_6(t6, t4, t3, p384_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_6(t4, t4, t3, p384_mod); + /* Z3 = H*Z1*Z2 */ + /* ZS = H*Z1*Z2 */ + sp_384_mont_mul_6(z, z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(z, z, t2, p384_mod, p384_mp_mod); + XMEMCPY(zs, z, sizeof(p->z)/2); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + /* XS = RS^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_6(x, t4, p384_mod, p384_mp_mod); + sp_384_mont_sqr_6(xs, t6, p384_mod, p384_mp_mod); + sp_384_mont_sqr_6(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(y, t1, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t5, t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(x, x, t5, p384_mod); + sp_384_mont_sub_6(xs, xs, t5, p384_mod); + sp_384_mont_dbl_6(t1, y, p384_mod); + sp_384_mont_sub_6(x, x, t1, p384_mod); + sp_384_mont_sub_6(xs, xs, t1, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + /* YS = -RS*(U1*H^2 - XS) - S1*H^3 */ + sp_384_mont_sub_6(ys, y, xs, p384_mod); + sp_384_mont_sub_6(y, y, x, p384_mod); + sp_384_mont_mul_6(y, y, t4, p384_mod, p384_mp_mod); + sp_384_sub_6(t6, p384_mod, t6); + sp_384_mont_mul_6(ys, ys, t6, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t5, t5, t3, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(y, y, t5, p384_mod); + sp_384_mont_sub_6(ys, ys, t5, p384_mod); +} + +/* Structure used to describe recoding of scalar multiplication. */ +typedef struct ecc_recode_384 { + /* Index into pre-computation table. */ + uint8_t i; + /* Use the negative of the point. */ + uint8_t neg; +} ecc_recode_384; + +/* The index into pre-computation table to use. */ +static const uint8_t recode_index_6_6[66] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, + 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, + 0, 1, +}; + +/* Whether to negate y-ordinate. */ +static const uint8_t recode_neg_6_6[66] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, +}; + +/* Recode the scalar for multiplication using pre-computed values and + * subtraction. + * + * k Scalar to multiply by. + * v Vector of operations to perform. + */ +static void sp_384_ecc_recode_6_6(const sp_digit* k, ecc_recode_384* v) +{ + int i, j; + uint8_t y; + int carry = 0; + int o; + sp_digit n; + + j = 0; + n = k[j]; + o = 0; + for (i=0; i<65; i++) { + y = n; + if (o + 6 < 64) { + y &= 0x3f; + n >>= 6; + o += 6; + } + else if (o + 6 == 64) { + n >>= 6; + if (++j < 6) + n = k[j]; + o = 0; + } + else if (++j < 6) { + n = k[j]; + y |= (n << (64 - o)) & 0x3f; + o -= 58; + n >>= o; + } + + y += carry; + v[i].i = recode_index_6_6[y]; + v[i].neg = recode_neg_6_6[y]; + carry = (y >> 6) + v[i].neg; + } +} + +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_win_add_sub_6(sp_point_384* r, const sp_point_384* g, + const sp_digit* k, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 td[33]; + sp_point_384 rtd, pd; + sp_digit tmpd[2 * 6 * 6]; +#endif + sp_point_384* t; + sp_point_384* rt; + sp_point_384* p = NULL; + sp_digit* tmp; + sp_digit* negy; + int i; + ecc_recode_384 v[65]; + int err; + + (void)heap; + + err = sp_384_point_new_6(heap, rtd, rt); + if (err == MP_OKAY) + err = sp_384_point_new_6(heap, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 33, heap, DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 6 * 6, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; +#else + t = td; + tmp = tmpd; +#endif + + + if (err == MP_OKAY) { + /* t[0] = {0, 0, 1} * norm */ + XMEMSET(&t[0], 0, sizeof(t[0])); + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + err = sp_384_mod_mul_norm_6(t[1].x, g->x, p384_mod); + } + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_6(t[1].y, g->y, p384_mod); + } + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_6(t[1].z, g->z, p384_mod); + } + + if (err == MP_OKAY) { + t[1].infinity = 0; + /* t[2] ... t[32] */ + sp_384_proj_point_dbl_n_store_6(t, &t[ 1], 5, 1, tmp); + sp_384_proj_point_add_6(&t[ 3], &t[ 2], &t[ 1], tmp); + sp_384_proj_point_dbl_6(&t[ 6], &t[ 3], tmp); + sp_384_proj_point_add_sub_6(&t[ 7], &t[ 5], &t[ 6], &t[ 1], tmp); + sp_384_proj_point_dbl_6(&t[10], &t[ 5], tmp); + sp_384_proj_point_add_sub_6(&t[11], &t[ 9], &t[10], &t[ 1], tmp); + sp_384_proj_point_dbl_6(&t[12], &t[ 6], tmp); + sp_384_proj_point_dbl_6(&t[14], &t[ 7], tmp); + sp_384_proj_point_add_sub_6(&t[15], &t[13], &t[14], &t[ 1], tmp); + sp_384_proj_point_dbl_6(&t[18], &t[ 9], tmp); + sp_384_proj_point_add_sub_6(&t[19], &t[17], &t[18], &t[ 1], tmp); + sp_384_proj_point_dbl_6(&t[20], &t[10], tmp); + sp_384_proj_point_dbl_6(&t[22], &t[11], tmp); + sp_384_proj_point_add_sub_6(&t[23], &t[21], &t[22], &t[ 1], tmp); + sp_384_proj_point_dbl_6(&t[24], &t[12], tmp); + sp_384_proj_point_dbl_6(&t[26], &t[13], tmp); + sp_384_proj_point_add_sub_6(&t[27], &t[25], &t[26], &t[ 1], tmp); + sp_384_proj_point_dbl_6(&t[28], &t[14], tmp); + sp_384_proj_point_dbl_6(&t[30], &t[15], tmp); + sp_384_proj_point_add_sub_6(&t[31], &t[29], &t[30], &t[ 1], tmp); + + negy = t[0].y; + + sp_384_ecc_recode_6_6(k, v); + + i = 64; + XMEMCPY(rt, &t[v[i].i], sizeof(sp_point_384)); + for (--i; i>=0; i--) { + sp_384_proj_point_dbl_n_6(rt, 6, tmp); + + XMEMCPY(p, &t[v[i].i], sizeof(sp_point_384)); + sp_384_sub_6(negy, p384_mod, p->y); + sp_384_cond_copy_6(p->y, negy, (sp_digit)0 - v[i].neg); + sp_384_proj_point_add_6(rt, rt, p, tmp); + } + + if (map != 0) { + sp_384_map_6(r, rt, tmp); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_384)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) + XFREE(t, heap, DYNAMIC_TYPE_ECC); + if (tmp != NULL) + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); +#endif + sp_384_point_free_6(p, 0, heap); + sp_384_point_free_6(rt, 0, heap); + + return err; +} + +#ifdef HAVE_INTEL_AVX2 +#ifdef HAVE_INTEL_AVX2 +extern void sp_384_mul_avx2_6(sp_digit* r, const sp_digit* a, const sp_digit* b); +#define sp_384_mont_reduce_avx2_6 sp_384_mont_reduce_6 +extern void sp_384_mont_reduce_order_avx2_6(sp_digit* a, const sp_digit* m, sp_digit mp); +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_384_mont_mul_avx2_6(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_384_mul_avx2_6(r, a, b); + sp_384_mont_reduce_avx2_6(r, m, mp); +} + +#endif /* HAVE_INTEL_AVX2 */ +#ifdef HAVE_INTEL_AVX2 +extern void sp_384_sqr_avx2_6(sp_digit* r, const sp_digit* a); +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_384_mont_sqr_avx2_6(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_384_sqr_avx2_6(r, a); + sp_384_mont_reduce_avx2_6(r, m, mp); +} + +#endif /* HAVE_INTEL_AVX2 */ +#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY) +/* Square the Montgomery form number a number of times. (r = a ^ n mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * n Number of times to square. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_384_mont_sqr_n_avx2_6(sp_digit* r, const sp_digit* a, int n, + const sp_digit* m, sp_digit mp) +{ + sp_384_mont_sqr_avx2_6(r, a, m, mp); + for (; n > 1; n--) { + sp_384_mont_sqr_avx2_6(r, r, m, mp); + } +} + +#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */ + +/* Invert the number, in Montgomery form, modulo the modulus (prime) of the + * P384 curve. (r = 1 / a mod m) + * + * r Inverse result. + * a Number to invert. + * td Temporary data. + */ +static void sp_384_mont_inv_avx2_6(sp_digit* r, const sp_digit* a, sp_digit* td) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* t = td; + int i; + + XMEMCPY(t, a, sizeof(sp_digit) * 6); + for (i=382; i>=0; i--) { + sp_384_mont_sqr_avx2_6(t, t, p384_mod, p384_mp_mod); + if (p384_mod_minus_2[i / 64] & ((sp_digit)1 << (i % 64))) + sp_384_mont_mul_avx2_6(t, t, a, p384_mod, p384_mp_mod); + } + XMEMCPY(r, t, sizeof(sp_digit) * 6); +#else + sp_digit* t1 = td; + sp_digit* t2 = td + 2 * 6; + sp_digit* t3 = td + 4 * 6; + sp_digit* t4 = td + 6 * 6; + sp_digit* t5 = td + 8 * 6; + + /* 0x2 */ + sp_384_mont_sqr_avx2_6(t1, a, p384_mod, p384_mp_mod); + /* 0x3 */ + sp_384_mont_mul_avx2_6(t5, t1, a, p384_mod, p384_mp_mod); + /* 0xc */ + sp_384_mont_sqr_n_avx2_6(t1, t5, 2, p384_mod, p384_mp_mod); + /* 0xf */ + sp_384_mont_mul_avx2_6(t2, t5, t1, p384_mod, p384_mp_mod); + /* 0x1e */ + sp_384_mont_sqr_avx2_6(t1, t2, p384_mod, p384_mp_mod); + /* 0x1f */ + sp_384_mont_mul_avx2_6(t4, t1, a, p384_mod, p384_mp_mod); + /* 0x3e0 */ + sp_384_mont_sqr_n_avx2_6(t1, t4, 5, p384_mod, p384_mp_mod); + /* 0x3ff */ + sp_384_mont_mul_avx2_6(t2, t4, t1, p384_mod, p384_mp_mod); + /* 0x7fe0 */ + sp_384_mont_sqr_n_avx2_6(t1, t2, 5, p384_mod, p384_mp_mod); + /* 0x7fff */ + sp_384_mont_mul_avx2_6(t4, t4, t1, p384_mod, p384_mp_mod); + /* 0x3fff8000 */ + sp_384_mont_sqr_n_avx2_6(t1, t4, 15, p384_mod, p384_mp_mod); + /* 0x3fffffff */ + sp_384_mont_mul_avx2_6(t2, t4, t1, p384_mod, p384_mp_mod); + /* 0xfffffffc */ + sp_384_mont_sqr_n_avx2_6(t3, t2, 2, p384_mod, p384_mp_mod); + /* 0xfffffffd */ + sp_384_mont_mul_avx2_6(r, t3, a, p384_mod, p384_mp_mod); + /* 0xffffffff */ + sp_384_mont_mul_avx2_6(t3, t5, t3, p384_mod, p384_mp_mod); + /* 0xfffffffc0000000 */ + sp_384_mont_sqr_n_avx2_6(t1, t2, 30, p384_mod, p384_mp_mod); + /* 0xfffffffffffffff */ + sp_384_mont_mul_avx2_6(t2, t2, t1, p384_mod, p384_mp_mod); + /* 0xfffffffffffffff000000000000000 */ + sp_384_mont_sqr_n_avx2_6(t1, t2, 60, p384_mod, p384_mp_mod); + /* 0xffffffffffffffffffffffffffffff */ + sp_384_mont_mul_avx2_6(t2, t2, t1, p384_mod, p384_mp_mod); + /* 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */ + sp_384_mont_sqr_n_avx2_6(t1, t2, 120, p384_mod, p384_mp_mod); + /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_384_mont_mul_avx2_6(t2, t2, t1, p384_mod, p384_mp_mod); + /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */ + sp_384_mont_sqr_n_avx2_6(t1, t2, 15, p384_mod, p384_mp_mod); + /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_384_mont_mul_avx2_6(t2, t4, t1, p384_mod, p384_mp_mod); + /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000 */ + sp_384_mont_sqr_n_avx2_6(t1, t2, 33, p384_mod, p384_mp_mod); + /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff */ + sp_384_mont_mul_avx2_6(t2, t3, t1, p384_mod, p384_mp_mod); + /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff000000000000000000000000 */ + sp_384_mont_sqr_n_avx2_6(t1, t2, 96, p384_mod, p384_mp_mod); + /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000fffffffd */ + sp_384_mont_mul_avx2_6(r, r, t1, p384_mod, p384_mp_mod); + +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Map the Montgomery form projective coordinate point to an affine point. + * + * r Resulting affine coordinate point. + * p Montgomery form projective coordinate point. + * t Temporary ordinate data. + */ +static void sp_384_map_avx2_6(sp_point_384* r, const sp_point_384* p, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*6; + int64_t n; + + sp_384_mont_inv_avx2_6(t1, p->z, t + 2*6); + + sp_384_mont_sqr_avx2_6(t2, t1, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(t1, t2, t1, p384_mod, p384_mp_mod); + + /* x /= z^2 */ + sp_384_mont_mul_avx2_6(r->x, p->x, t2, p384_mod, p384_mp_mod); + XMEMSET(r->x + 6, 0, sizeof(r->x) / 2U); + sp_384_mont_reduce_6(r->x, p384_mod, p384_mp_mod); + /* Reduce x to less than modulus */ + n = sp_384_cmp_6(r->x, p384_mod); + sp_384_cond_sub_6(r->x, r->x, p384_mod, 0 - ((n >= 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_384_norm_6(r->x); + + /* y /= z^3 */ + sp_384_mont_mul_avx2_6(r->y, p->y, t1, p384_mod, p384_mp_mod); + XMEMSET(r->y + 6, 0, sizeof(r->y) / 2U); + sp_384_mont_reduce_6(r->y, p384_mod, p384_mp_mod); + /* Reduce y to less than modulus */ + n = sp_384_cmp_6(r->y, p384_mod); + sp_384_cond_sub_6(r->y, r->y, p384_mod, 0 - ((n >= 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_384_norm_6(r->y); + + XMEMSET(r->z, 0, sizeof(r->z)); + r->z[0] = 1; + +} + +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static void sp_384_proj_point_dbl_avx2_6(sp_point_384* r, const sp_point_384* p, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*6; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_384_mont_sqr_avx2_6(t1, p->z, p384_mod, p384_mp_mod); + /* Z = Y * Z */ + sp_384_mont_mul_avx2_6(z, p->y, p->z, p384_mod, p384_mp_mod); + /* Z = 2Z */ + sp_384_mont_dbl_6(z, z, p384_mod); + /* T2 = X - T1 */ + sp_384_mont_sub_6(t2, p->x, t1, p384_mod); + /* T1 = X + T1 */ + sp_384_mont_add_6(t1, p->x, t1, p384_mod); + /* T2 = T1 * T2 */ + sp_384_mont_mul_avx2_6(t2, t1, t2, p384_mod, p384_mp_mod); + /* T1 = 3T2 */ + sp_384_mont_tpl_6(t1, t2, p384_mod); + /* Y = 2Y */ + sp_384_mont_dbl_6(y, p->y, p384_mod); + /* Y = Y * Y */ + sp_384_mont_sqr_avx2_6(y, y, p384_mod, p384_mp_mod); + /* T2 = Y * Y */ + sp_384_mont_sqr_avx2_6(t2, y, p384_mod, p384_mp_mod); + /* T2 = T2/2 */ + sp_384_div2_6(t2, t2, p384_mod); + /* Y = Y * X */ + sp_384_mont_mul_avx2_6(y, y, p->x, p384_mod, p384_mp_mod); + /* X = T1 * T1 */ + sp_384_mont_sqr_avx2_6(x, t1, p384_mod, p384_mp_mod); + /* X = X - Y */ + sp_384_mont_sub_6(x, x, y, p384_mod); + /* X = X - Y */ + sp_384_mont_sub_6(x, x, y, p384_mod); + /* Y = Y - X */ + sp_384_mont_sub_6(y, y, x, p384_mod); + /* Y = Y * T1 */ + sp_384_mont_mul_avx2_6(y, y, t1, p384_mod, p384_mp_mod); + /* Y = Y - T2 */ + sp_384_mont_sub_6(y, y, t2, p384_mod); +} + +/* Double the Montgomery form projective point p a number of times. + * + * r Result of repeated doubling of point. + * p Point to double. + * n Number of times to double + * t Temporary ordinate data. + */ +static void sp_384_proj_point_dbl_n_avx2_6(sp_point_384* p, int n, sp_digit* t) +{ + sp_digit* w = t; + sp_digit* a = t + 2*6; + sp_digit* b = t + 4*6; + sp_digit* t1 = t + 6*6; + sp_digit* t2 = t + 8*6; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = p->x; + y = p->y; + z = p->z; + + /* Y = 2*Y */ + sp_384_mont_dbl_6(y, y, p384_mod); + /* W = Z^4 */ + sp_384_mont_sqr_avx2_6(w, z, p384_mod, p384_mp_mod); + sp_384_mont_sqr_avx2_6(w, w, p384_mod, p384_mp_mod); + +#ifndef WOLFSSL_SP_SMALL + while (--n > 0) +#else + while (--n >= 0) +#endif + { + /* A = 3*(X^2 - W) */ + sp_384_mont_sqr_avx2_6(t1, x, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(t1, t1, w, p384_mod); + sp_384_mont_tpl_6(a, t1, p384_mod); + /* B = X*Y^2 */ + sp_384_mont_sqr_avx2_6(t1, y, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(b, t1, x, p384_mod, p384_mp_mod); + /* X = A^2 - 2B */ + sp_384_mont_sqr_avx2_6(x, a, p384_mod, p384_mp_mod); + sp_384_mont_dbl_6(t2, b, p384_mod); + sp_384_mont_sub_6(x, x, t2, p384_mod); + /* Z = Z*Y */ + sp_384_mont_mul_avx2_6(z, z, y, p384_mod, p384_mp_mod); + /* t2 = Y^4 */ + sp_384_mont_sqr_avx2_6(t1, t1, p384_mod, p384_mp_mod); +#ifdef WOLFSSL_SP_SMALL + if (n != 0) +#endif + { + /* W = W*Y^4 */ + sp_384_mont_mul_avx2_6(w, w, t1, p384_mod, p384_mp_mod); + } + /* y = 2*A*(B - X) - Y^4 */ + sp_384_mont_sub_6(y, b, x, p384_mod); + sp_384_mont_mul_avx2_6(y, y, a, p384_mod, p384_mp_mod); + sp_384_mont_dbl_6(y, y, p384_mod); + sp_384_mont_sub_6(y, y, t1, p384_mod); + } +#ifndef WOLFSSL_SP_SMALL + /* A = 3*(X^2 - W) */ + sp_384_mont_sqr_avx2_6(t1, x, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(t1, t1, w, p384_mod); + sp_384_mont_tpl_6(a, t1, p384_mod); + /* B = X*Y^2 */ + sp_384_mont_sqr_avx2_6(t1, y, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(b, t1, x, p384_mod, p384_mp_mod); + /* X = A^2 - 2B */ + sp_384_mont_sqr_avx2_6(x, a, p384_mod, p384_mp_mod); + sp_384_mont_dbl_6(t2, b, p384_mod); + sp_384_mont_sub_6(x, x, t2, p384_mod); + /* Z = Z*Y */ + sp_384_mont_mul_avx2_6(z, z, y, p384_mod, p384_mp_mod); + /* t2 = Y^4 */ + sp_384_mont_sqr_avx2_6(t1, t1, p384_mod, p384_mp_mod); + /* y = 2*A*(B - X) - Y^4 */ + sp_384_mont_sub_6(y, b, x, p384_mod); + sp_384_mont_mul_avx2_6(y, y, a, p384_mod, p384_mp_mod); + sp_384_mont_dbl_6(y, y, p384_mod); + sp_384_mont_sub_6(y, y, t1, p384_mod); +#endif + /* Y = Y/2 */ + sp_384_div2_6(y, y, p384_mod); +} + +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_384_proj_point_add_avx2_6(sp_point_384* r, const sp_point_384* p, const sp_point_384* q, + sp_digit* t) +{ + const sp_point_384* ap[2]; + sp_point_384* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*6; + sp_digit* t3 = t + 4*6; + sp_digit* t4 = t + 6*6; + sp_digit* t5 = t + 8*6; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* Ensure only the first point is the same as the result. */ + if (q == r) { + const sp_point_384* a = p; + p = q; + q = a; + } + + /* Check double */ + (void)sp_384_sub_6(t1, p384_mod, q->y); + sp_384_norm_6(t1); + if ((sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) & + (sp_384_cmp_equal_6(p->y, q->y) | sp_384_cmp_equal_6(p->y, t1))) != 0) { + sp_384_proj_point_dbl_6(r, p, t); + } + else { + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point_384)); + x = rp[p->infinity | q->infinity]->x; + y = rp[p->infinity | q->infinity]->y; + z = rp[p->infinity | q->infinity]->z; + + ap[0] = p; + ap[1] = q; + for (i=0; i<6; i++) { + r->x[i] = ap[p->infinity]->x[i]; + } + for (i=0; i<6; i++) { + r->y[i] = ap[p->infinity]->y[i]; + } + for (i=0; i<6; i++) { + r->z[i] = ap[p->infinity]->z[i]; + } + r->infinity = ap[p->infinity]->infinity; + + /* U1 = X1*Z2^2 */ + sp_384_mont_sqr_avx2_6(t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(t3, t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(t1, t1, x, p384_mod, p384_mp_mod); + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_avx2_6(t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(t4, t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_avx2_6(t3, t3, y, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_avx2_6(t4, t4, q->y, p384_mod, p384_mp_mod); + /* H = U2 - U1 */ + sp_384_mont_sub_6(t2, t2, t1, p384_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_6(t4, t4, t3, p384_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_avx2_6(z, z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(z, z, t2, p384_mod, p384_mp_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_avx2_6(x, t4, p384_mod, p384_mp_mod); + sp_384_mont_sqr_avx2_6(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(y, t1, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(t5, t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(x, x, t5, p384_mod); + sp_384_mont_dbl_6(t1, y, p384_mod); + sp_384_mont_sub_6(x, x, t1, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_6(y, y, x, p384_mod); + sp_384_mont_mul_avx2_6(y, y, t4, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(t5, t5, t3, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(y, y, t5, p384_mod); + } +} + +/* Double the Montgomery form projective point p a number of times. + * + * r Result of repeated doubling of point. + * p Point to double. + * n Number of times to double + * t Temporary ordinate data. + */ +static void sp_384_proj_point_dbl_n_store_avx2_6(sp_point_384* r, const sp_point_384* p, + int n, int m, sp_digit* t) +{ + sp_digit* w = t; + sp_digit* a = t + 2*6; + sp_digit* b = t + 4*6; + sp_digit* t1 = t + 6*6; + sp_digit* t2 = t + 8*6; + sp_digit* x = r[2*m].x; + sp_digit* y = r[(1<x[i]; + } + for (i=0; i<6; i++) { + y[i] = p->y[i]; + } + for (i=0; i<6; i++) { + z[i] = p->z[i]; + } + + /* Y = 2*Y */ + sp_384_mont_dbl_6(y, y, p384_mod); + /* W = Z^4 */ + sp_384_mont_sqr_avx2_6(w, z, p384_mod, p384_mp_mod); + sp_384_mont_sqr_avx2_6(w, w, p384_mod, p384_mp_mod); + for (i=1; i<=n; i++) { + /* A = 3*(X^2 - W) */ + sp_384_mont_sqr_avx2_6(t1, x, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(t1, t1, w, p384_mod); + sp_384_mont_tpl_6(a, t1, p384_mod); + /* B = X*Y^2 */ + sp_384_mont_sqr_avx2_6(t2, y, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(b, t2, x, p384_mod, p384_mp_mod); + x = r[(1<x; + sp_digit* y = ra->y; + sp_digit* z = ra->z; + sp_digit* xs = rs->x; + sp_digit* ys = rs->y; + sp_digit* zs = rs->z; + + + XMEMCPY(x, p->x, sizeof(p->x) / 2); + XMEMCPY(y, p->y, sizeof(p->y) / 2); + XMEMCPY(z, p->z, sizeof(p->z) / 2); + ra->infinity = 0; + rs->infinity = 0; + + /* U1 = X1*Z2^2 */ + sp_384_mont_sqr_avx2_6(t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(t3, t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(t1, t1, x, p384_mod, p384_mp_mod); + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_avx2_6(t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(t4, t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_avx2_6(t3, t3, y, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_avx2_6(t4, t4, q->y, p384_mod, p384_mp_mod); + /* H = U2 - U1 */ + sp_384_mont_sub_6(t2, t2, t1, p384_mod); + /* RS = S2 + S1 */ + sp_384_mont_add_6(t6, t4, t3, p384_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_6(t4, t4, t3, p384_mod); + /* Z3 = H*Z1*Z2 */ + /* ZS = H*Z1*Z2 */ + sp_384_mont_mul_avx2_6(z, z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(z, z, t2, p384_mod, p384_mp_mod); + XMEMCPY(zs, z, sizeof(p->z)/2); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + /* XS = RS^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_avx2_6(x, t4, p384_mod, p384_mp_mod); + sp_384_mont_sqr_avx2_6(xs, t6, p384_mod, p384_mp_mod); + sp_384_mont_sqr_avx2_6(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(y, t1, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(t5, t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(x, x, t5, p384_mod); + sp_384_mont_sub_6(xs, xs, t5, p384_mod); + sp_384_mont_dbl_6(t1, y, p384_mod); + sp_384_mont_sub_6(x, x, t1, p384_mod); + sp_384_mont_sub_6(xs, xs, t1, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + /* YS = -RS*(U1*H^2 - XS) - S1*H^3 */ + sp_384_mont_sub_6(ys, y, xs, p384_mod); + sp_384_mont_sub_6(y, y, x, p384_mod); + sp_384_mont_mul_avx2_6(y, y, t4, p384_mod, p384_mp_mod); + sp_384_sub_6(t6, p384_mod, t6); + sp_384_mont_mul_avx2_6(ys, ys, t6, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(t5, t5, t3, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(y, y, t5, p384_mod); + sp_384_mont_sub_6(ys, ys, t5, p384_mod); +} + +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_win_add_sub_avx2_6(sp_point_384* r, const sp_point_384* g, + const sp_digit* k, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 td[33]; + sp_point_384 rtd, pd; + sp_digit tmpd[2 * 6 * 6]; +#endif + sp_point_384* t; + sp_point_384* rt; + sp_point_384* p = NULL; + sp_digit* tmp; + sp_digit* negy; + int i; + ecc_recode_384 v[65]; + int err; + + (void)heap; + + err = sp_384_point_new_6(heap, rtd, rt); + if (err == MP_OKAY) + err = sp_384_point_new_6(heap, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 33, heap, DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 6 * 6, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; +#else + t = td; + tmp = tmpd; +#endif + + + if (err == MP_OKAY) { + /* t[0] = {0, 0, 1} * norm */ + XMEMSET(&t[0], 0, sizeof(t[0])); + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + err = sp_384_mod_mul_norm_6(t[1].x, g->x, p384_mod); + } + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_6(t[1].y, g->y, p384_mod); + } + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_6(t[1].z, g->z, p384_mod); + } + + if (err == MP_OKAY) { + t[1].infinity = 0; + /* t[2] ... t[32] */ + sp_384_proj_point_dbl_n_store_avx2_6(t, &t[ 1], 5, 1, tmp); + sp_384_proj_point_add_avx2_6(&t[ 3], &t[ 2], &t[ 1], tmp); + sp_384_proj_point_dbl_avx2_6(&t[ 6], &t[ 3], tmp); + sp_384_proj_point_add_sub_avx2_6(&t[ 7], &t[ 5], &t[ 6], &t[ 1], tmp); + sp_384_proj_point_dbl_avx2_6(&t[10], &t[ 5], tmp); + sp_384_proj_point_add_sub_avx2_6(&t[11], &t[ 9], &t[10], &t[ 1], tmp); + sp_384_proj_point_dbl_avx2_6(&t[12], &t[ 6], tmp); + sp_384_proj_point_dbl_avx2_6(&t[14], &t[ 7], tmp); + sp_384_proj_point_add_sub_avx2_6(&t[15], &t[13], &t[14], &t[ 1], tmp); + sp_384_proj_point_dbl_avx2_6(&t[18], &t[ 9], tmp); + sp_384_proj_point_add_sub_avx2_6(&t[19], &t[17], &t[18], &t[ 1], tmp); + sp_384_proj_point_dbl_avx2_6(&t[20], &t[10], tmp); + sp_384_proj_point_dbl_avx2_6(&t[22], &t[11], tmp); + sp_384_proj_point_add_sub_avx2_6(&t[23], &t[21], &t[22], &t[ 1], tmp); + sp_384_proj_point_dbl_avx2_6(&t[24], &t[12], tmp); + sp_384_proj_point_dbl_avx2_6(&t[26], &t[13], tmp); + sp_384_proj_point_add_sub_avx2_6(&t[27], &t[25], &t[26], &t[ 1], tmp); + sp_384_proj_point_dbl_avx2_6(&t[28], &t[14], tmp); + sp_384_proj_point_dbl_avx2_6(&t[30], &t[15], tmp); + sp_384_proj_point_add_sub_avx2_6(&t[31], &t[29], &t[30], &t[ 1], tmp); + + negy = t[0].y; + + sp_384_ecc_recode_6_6(k, v); + + i = 64; + XMEMCPY(rt, &t[v[i].i], sizeof(sp_point_384)); + for (--i; i>=0; i--) { + sp_384_proj_point_dbl_n_avx2_6(rt, 6, tmp); + + XMEMCPY(p, &t[v[i].i], sizeof(sp_point_384)); + sp_384_sub_6(negy, p384_mod, p->y); + sp_384_cond_copy_6(p->y, negy, (sp_digit)0 - v[i].neg); + sp_384_proj_point_add_avx2_6(rt, rt, p, tmp); + } + + if (map != 0) { + sp_384_map_avx2_6(r, rt, tmp); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_384)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) + XFREE(t, heap, DYNAMIC_TYPE_ECC); + if (tmp != NULL) + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); +#endif + sp_384_point_free_6(p, 0, heap); + sp_384_point_free_6(rt, 0, heap); + + return err; +} + +#endif /* HAVE_INTEL_AVX2 */ +/* A table entry for pre-computed points. */ +typedef struct sp_table_entry_384 { + sp_digit x[6]; + sp_digit y[6]; +} sp_table_entry_384; + +#ifdef FP_ECC +#endif /* FP_ECC */ +/* Add two Montgomery form projective points. The second point has a q value of + * one. + * Only the first point can be the same pointer as the result point. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_384_proj_point_add_qz1_6(sp_point_384* r, const sp_point_384* p, + const sp_point_384* q, sp_digit* t) +{ + const sp_point_384* ap[2]; + sp_point_384* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*6; + sp_digit* t3 = t + 4*6; + sp_digit* t4 = t + 6*6; + sp_digit* t5 = t + 8*6; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* Check double */ + (void)sp_384_sub_6(t1, p384_mod, q->y); + sp_384_norm_6(t1); + if ((sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) & + (sp_384_cmp_equal_6(p->y, q->y) | sp_384_cmp_equal_6(p->y, t1))) != 0) { + sp_384_proj_point_dbl_6(r, p, t); + } + else { + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point_384)); + x = rp[p->infinity | q->infinity]->x; + y = rp[p->infinity | q->infinity]->y; + z = rp[p->infinity | q->infinity]->z; + + ap[0] = p; + ap[1] = q; + for (i=0; i<6; i++) { + r->x[i] = ap[p->infinity]->x[i]; + } + for (i=0; i<6; i++) { + r->y[i] = ap[p->infinity]->y[i]; + } + for (i=0; i<6; i++) { + r->z[i] = ap[p->infinity]->z[i]; + } + r->infinity = ap[p->infinity]->infinity; + + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_6(t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t4, t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_6(t4, t4, q->y, p384_mod, p384_mp_mod); + /* H = U2 - X1 */ + sp_384_mont_sub_6(t2, t2, x, p384_mod); + /* R = S2 - Y1 */ + sp_384_mont_sub_6(t4, t4, y, p384_mod); + /* Z3 = H*Z1 */ + sp_384_mont_mul_6(z, z, t2, p384_mod, p384_mp_mod); + /* X3 = R^2 - H^3 - 2*X1*H^2 */ + sp_384_mont_sqr_6(t1, t4, p384_mod, p384_mp_mod); + sp_384_mont_sqr_6(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t3, x, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t5, t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(x, t1, t5, p384_mod); + sp_384_mont_dbl_6(t1, t3, p384_mod); + sp_384_mont_sub_6(x, x, t1, p384_mod); + /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */ + sp_384_mont_sub_6(t3, t3, x, p384_mod); + sp_384_mont_mul_6(t3, t3, t4, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t5, t5, y, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(y, t3, t5, p384_mod); + } +} + +#ifdef FP_ECC +/* Convert the projective point to affine. + * Ordinates are in Montgomery form. + * + * a Point to convert. + * t Temporary data. + */ +static void sp_384_proj_to_affine_6(sp_point_384* a, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2 * 6; + sp_digit* tmp = t + 4 * 6; + + sp_384_mont_inv_6(t1, a->z, tmp); + + sp_384_mont_sqr_6(t2, t1, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t1, t2, t1, p384_mod, p384_mp_mod); + + sp_384_mont_mul_6(a->x, a->x, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(a->y, a->y, t1, p384_mod, p384_mp_mod); + XMEMCPY(a->z, p384_norm_mod, sizeof(p384_norm_mod)); +} + +/* Generate the pre-computed table of points for the base point. + * + * a The base point. + * table Place to store generated point data. + * tmp Temporary data. + * heap Heap to use for allocation. + */ +static int sp_384_gen_stripe_table_6(const sp_point_384* a, + sp_table_entry_384* table, sp_digit* tmp, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 td, s1d, s2d; +#endif + sp_point_384* t; + sp_point_384* s1 = NULL; + sp_point_384* s2 = NULL; + int i, j; + int err; + + (void)heap; + + err = sp_384_point_new_6(heap, td, t); + if (err == MP_OKAY) { + err = sp_384_point_new_6(heap, s1d, s1); + } + if (err == MP_OKAY) { + err = sp_384_point_new_6(heap, s2d, s2); + } + + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_6(t->x, a->x, p384_mod); + } + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_6(t->y, a->y, p384_mod); + } + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_6(t->z, a->z, p384_mod); + } + if (err == MP_OKAY) { + t->infinity = 0; + sp_384_proj_to_affine_6(t, tmp); + + XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod)); + s1->infinity = 0; + XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod)); + s2->infinity = 0; + + /* table[0] = {0, 0, infinity} */ + XMEMSET(&table[0], 0, sizeof(sp_table_entry_384)); + /* table[1] = Affine version of 'a' in Montgomery form */ + XMEMCPY(table[1].x, t->x, sizeof(table->x)); + XMEMCPY(table[1].y, t->y, sizeof(table->y)); + + for (i=1; i<8; i++) { + sp_384_proj_point_dbl_n_6(t, 48, tmp); + sp_384_proj_to_affine_6(t, tmp); + XMEMCPY(table[1<x, sizeof(table->x)); + XMEMCPY(table[1<y, sizeof(table->y)); + } + + for (i=1; i<8; i++) { + XMEMCPY(s1->x, table[1<x)); + XMEMCPY(s1->y, table[1<y)); + for (j=(1<x, table[j-(1<x)); + XMEMCPY(s2->y, table[j-(1<y)); + sp_384_proj_point_add_qz1_6(t, s1, s2, tmp); + sp_384_proj_to_affine_6(t, tmp); + XMEMCPY(table[j].x, t->x, sizeof(table->x)); + XMEMCPY(table[j].y, t->y, sizeof(table->y)); + } + } + } + + sp_384_point_free_6(s2, 0, heap); + sp_384_point_free_6(s1, 0, heap); + sp_384_point_free_6( t, 0, heap); + + return err; +} + +#endif /* FP_ECC */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_stripe_6(sp_point_384* r, const sp_point_384* g, + const sp_table_entry_384* table, const sp_digit* k, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 rtd; + sp_point_384 pd; + sp_digit td[2 * 6 * 6]; +#endif + sp_point_384* rt; + sp_point_384* p = NULL; + sp_digit* t; + int i, j; + int y, x; + int err; + + (void)g; + (void)heap; + + + err = sp_384_point_new_6(heap, rtd, rt); + if (err == MP_OKAY) { + err = sp_384_point_new_6(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 6 * 6, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) { + err = MEMORY_E; + } +#else + t = td; +#endif + + if (err == MP_OKAY) { + XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod)); + XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod)); + + y = 0; + for (j=0,x=47; j<8; j++,x+=48) { + y |= ((k[x / 64] >> (x % 64)) & 1) << j; + } + XMEMCPY(rt->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(rt->y, table[y].y, sizeof(table[y].y)); + rt->infinity = !y; + for (i=46; i>=0; i--) { + y = 0; + for (j=0,x=i; j<8; j++,x+=48) { + y |= ((k[x / 64] >> (x % 64)) & 1) << j; + } + + sp_384_proj_point_dbl_6(rt, rt, t); + XMEMCPY(p->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(p->y, table[y].y, sizeof(table[y].y)); + p->infinity = !y; + sp_384_proj_point_add_qz1_6(rt, rt, p, t); + } + + if (map != 0) { + sp_384_map_6(r, rt, t); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_384)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_6(p, 0, heap); + sp_384_point_free_6(rt, 0, heap); + + return err; +} + +#ifdef FP_ECC +#ifndef FP_ENTRIES + #define FP_ENTRIES 16 +#endif + +typedef struct sp_cache_384_t { + sp_digit x[6]; + sp_digit y[6]; + sp_table_entry_384 table[256]; + uint32_t cnt; + int set; +} sp_cache_384_t; + +static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES]; +static THREAD_LS_T int sp_cache_384_last = -1; +static THREAD_LS_T int sp_cache_384_inited = 0; + +#ifndef HAVE_THREAD_LS + static volatile int initCacheMutex_384 = 0; + static wolfSSL_Mutex sp_cache_384_lock; +#endif + +static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache) +{ + int i, j; + uint32_t least; + + if (sp_cache_384_inited == 0) { + for (i=0; ix, sp_cache_384[i].x) & + sp_384_cmp_equal_6(g->y, sp_cache_384[i].y)) { + sp_cache_384[i].cnt++; + break; + } + } + + /* No match. */ + if (i == FP_ENTRIES) { + /* Find empty entry. */ + i = (sp_cache_384_last + 1) % FP_ENTRIES; + for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) { + if (!sp_cache_384[i].set) { + break; + } + } + + /* Evict least used. */ + if (i == sp_cache_384_last) { + least = sp_cache_384[0].cnt; + for (j=1; jx, sizeof(sp_cache_384[i].x)); + XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y)); + sp_cache_384[i].set = 1; + sp_cache_384[i].cnt = 1; + } + + *cache = &sp_cache_384[i]; + sp_cache_384_last = i; +} +#endif /* FP_ECC */ + +/* Multiply the base point of P384 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_6(sp_point_384* r, const sp_point_384* g, const sp_digit* k, + int map, void* heap) +{ +#ifndef FP_ECC + return sp_384_ecc_mulmod_win_add_sub_6(r, g, k, map, heap); +#else + sp_digit tmp[2 * 6 * 7]; + sp_cache_384_t* cache; + int err = MP_OKAY; + +#ifndef HAVE_THREAD_LS + if (initCacheMutex_384 == 0) { + wc_InitMutex(&sp_cache_384_lock); + initCacheMutex_384 = 1; + } + if (wc_LockMutex(&sp_cache_384_lock) != 0) + err = BAD_MUTEX_E; +#endif /* HAVE_THREAD_LS */ + + if (err == MP_OKAY) { + sp_ecc_get_cache_384(g, &cache); + if (cache->cnt == 2) + sp_384_gen_stripe_table_6(g, cache->table, tmp, heap); + +#ifndef HAVE_THREAD_LS + wc_UnLockMutex(&sp_cache_384_lock); +#endif /* HAVE_THREAD_LS */ + + if (cache->cnt < 2) { + err = sp_384_ecc_mulmod_win_add_sub_6(r, g, k, map, heap); + } + else { + err = sp_384_ecc_mulmod_stripe_6(r, g, cache->table, k, + map, heap); + } + } + + return err; +#endif +} + +#ifdef HAVE_INTEL_AVX2 +#ifdef FP_ECC +#endif /* FP_ECC */ +/* Add two Montgomery form projective points. The second point has a q value of + * one. + * Only the first point can be the same pointer as the result point. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_384_proj_point_add_qz1_avx2_6(sp_point_384* r, const sp_point_384* p, + const sp_point_384* q, sp_digit* t) +{ + const sp_point_384* ap[2]; + sp_point_384* rp[2]; + sp_digit* t1 = t; + sp_digit* t2 = t + 2*6; + sp_digit* t3 = t + 4*6; + sp_digit* t4 = t + 6*6; + sp_digit* t5 = t + 8*6; + sp_digit* x; + sp_digit* y; + sp_digit* z; + int i; + + /* Check double */ + (void)sp_384_sub_6(t1, p384_mod, q->y); + sp_384_norm_6(t1); + if ((sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) & + (sp_384_cmp_equal_6(p->y, q->y) | sp_384_cmp_equal_6(p->y, t1))) != 0) { + sp_384_proj_point_dbl_6(r, p, t); + } + else { + rp[0] = r; + + /*lint allow cast to different type of pointer*/ + rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/ + XMEMSET(rp[1], 0, sizeof(sp_point_384)); + x = rp[p->infinity | q->infinity]->x; + y = rp[p->infinity | q->infinity]->y; + z = rp[p->infinity | q->infinity]->z; + + ap[0] = p; + ap[1] = q; + for (i=0; i<6; i++) { + r->x[i] = ap[p->infinity]->x[i]; + } + for (i=0; i<6; i++) { + r->y[i] = ap[p->infinity]->y[i]; + } + for (i=0; i<6; i++) { + r->z[i] = ap[p->infinity]->z[i]; + } + r->infinity = ap[p->infinity]->infinity; + + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_avx2_6(t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(t4, t2, z, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_avx2_6(t4, t4, q->y, p384_mod, p384_mp_mod); + /* H = U2 - X1 */ + sp_384_mont_sub_6(t2, t2, x, p384_mod); + /* R = S2 - Y1 */ + sp_384_mont_sub_6(t4, t4, y, p384_mod); + /* Z3 = H*Z1 */ + sp_384_mont_mul_avx2_6(z, z, t2, p384_mod, p384_mp_mod); + /* X3 = R^2 - H^3 - 2*X1*H^2 */ + sp_384_mont_sqr_avx2_6(t1, t4, p384_mod, p384_mp_mod); + sp_384_mont_sqr_avx2_6(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(t3, x, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(t5, t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(x, t1, t5, p384_mod); + sp_384_mont_dbl_6(t1, t3, p384_mod); + sp_384_mont_sub_6(x, x, t1, p384_mod); + /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */ + sp_384_mont_sub_6(t3, t3, x, p384_mod); + sp_384_mont_mul_avx2_6(t3, t3, t4, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(t5, t5, y, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(y, t3, t5, p384_mod); + } +} + +#ifdef FP_ECC +/* Convert the projective point to affine. + * Ordinates are in Montgomery form. + * + * a Point to convert. + * t Temporary data. + */ +static void sp_384_proj_to_affine_avx2_6(sp_point_384* a, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2 * 6; + sp_digit* tmp = t + 4 * 6; + + sp_384_mont_inv_avx2_6(t1, a->z, tmp); + + sp_384_mont_sqr_avx2_6(t2, t1, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(t1, t2, t1, p384_mod, p384_mp_mod); + + sp_384_mont_mul_avx2_6(a->x, a->x, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(a->y, a->y, t1, p384_mod, p384_mp_mod); + XMEMCPY(a->z, p384_norm_mod, sizeof(p384_norm_mod)); +} + +/* Generate the pre-computed table of points for the base point. + * + * a The base point. + * table Place to store generated point data. + * tmp Temporary data. + * heap Heap to use for allocation. + */ +static int sp_384_gen_stripe_table_avx2_6(const sp_point_384* a, + sp_table_entry_384* table, sp_digit* tmp, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 td, s1d, s2d; +#endif + sp_point_384* t; + sp_point_384* s1 = NULL; + sp_point_384* s2 = NULL; + int i, j; + int err; + + (void)heap; + + err = sp_384_point_new_6(heap, td, t); + if (err == MP_OKAY) { + err = sp_384_point_new_6(heap, s1d, s1); + } + if (err == MP_OKAY) { + err = sp_384_point_new_6(heap, s2d, s2); + } + + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_6(t->x, a->x, p384_mod); + } + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_6(t->y, a->y, p384_mod); + } + if (err == MP_OKAY) { + err = sp_384_mod_mul_norm_6(t->z, a->z, p384_mod); + } + if (err == MP_OKAY) { + t->infinity = 0; + sp_384_proj_to_affine_avx2_6(t, tmp); + + XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod)); + s1->infinity = 0; + XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod)); + s2->infinity = 0; + + /* table[0] = {0, 0, infinity} */ + XMEMSET(&table[0], 0, sizeof(sp_table_entry_384)); + /* table[1] = Affine version of 'a' in Montgomery form */ + XMEMCPY(table[1].x, t->x, sizeof(table->x)); + XMEMCPY(table[1].y, t->y, sizeof(table->y)); + + for (i=1; i<8; i++) { + sp_384_proj_point_dbl_n_avx2_6(t, 48, tmp); + sp_384_proj_to_affine_avx2_6(t, tmp); + XMEMCPY(table[1<x, sizeof(table->x)); + XMEMCPY(table[1<y, sizeof(table->y)); + } + + for (i=1; i<8; i++) { + XMEMCPY(s1->x, table[1<x)); + XMEMCPY(s1->y, table[1<y)); + for (j=(1<x, table[j-(1<x)); + XMEMCPY(s2->y, table[j-(1<y)); + sp_384_proj_point_add_qz1_avx2_6(t, s1, s2, tmp); + sp_384_proj_to_affine_avx2_6(t, tmp); + XMEMCPY(table[j].x, t->x, sizeof(table->x)); + XMEMCPY(table[j].y, t->y, sizeof(table->y)); + } + } + } + + sp_384_point_free_6(s2, 0, heap); + sp_384_point_free_6(s1, 0, heap); + sp_384_point_free_6( t, 0, heap); + + return err; +} + +#endif /* FP_ECC */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_stripe_avx2_6(sp_point_384* r, const sp_point_384* g, + const sp_table_entry_384* table, const sp_digit* k, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 rtd; + sp_point_384 pd; + sp_digit td[2 * 6 * 6]; +#endif + sp_point_384* rt; + sp_point_384* p = NULL; + sp_digit* t; + int i, j; + int y, x; + int err; + + (void)g; + (void)heap; + + + err = sp_384_point_new_6(heap, rtd, rt); + if (err == MP_OKAY) { + err = sp_384_point_new_6(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 6 * 6, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) { + err = MEMORY_E; + } +#else + t = td; +#endif + + if (err == MP_OKAY) { + XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod)); + XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod)); + + y = 0; + for (j=0,x=47; j<8; j++,x+=48) { + y |= ((k[x / 64] >> (x % 64)) & 1) << j; + } + XMEMCPY(rt->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(rt->y, table[y].y, sizeof(table[y].y)); + rt->infinity = !y; + for (i=46; i>=0; i--) { + y = 0; + for (j=0,x=i; j<8; j++,x+=48) { + y |= ((k[x / 64] >> (x % 64)) & 1) << j; + } + + sp_384_proj_point_dbl_avx2_6(rt, rt, t); + XMEMCPY(p->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(p->y, table[y].y, sizeof(table[y].y)); + p->infinity = !y; + sp_384_proj_point_add_qz1_avx2_6(rt, rt, p, t); + } + + if (map != 0) { + sp_384_map_avx2_6(r, rt, t); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_384)); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { + XFREE(t, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_6(p, 0, heap); + sp_384_point_free_6(rt, 0, heap); + + return err; +} + +/* Multiply the base point of P384 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_avx2_6(sp_point_384* r, const sp_point_384* g, const sp_digit* k, + int map, void* heap) +{ +#ifndef FP_ECC + return sp_384_ecc_mulmod_win_add_sub_avx2_6(r, g, k, map, heap); +#else + sp_digit tmp[2 * 6 * 7]; + sp_cache_384_t* cache; + int err = MP_OKAY; + +#ifndef HAVE_THREAD_LS + if (initCacheMutex_384 == 0) { + wc_InitMutex(&sp_cache_384_lock); + initCacheMutex_384 = 1; + } + if (wc_LockMutex(&sp_cache_384_lock) != 0) + err = BAD_MUTEX_E; +#endif /* HAVE_THREAD_LS */ + + if (err == MP_OKAY) { + sp_ecc_get_cache_384(g, &cache); + if (cache->cnt == 2) + sp_384_gen_stripe_table_avx2_6(g, cache->table, tmp, heap); + +#ifndef HAVE_THREAD_LS + wc_UnLockMutex(&sp_cache_384_lock); +#endif /* HAVE_THREAD_LS */ + + if (cache->cnt < 2) { + err = sp_384_ecc_mulmod_win_add_sub_avx2_6(r, g, k, map, heap); + } + else { + err = sp_384_ecc_mulmod_stripe_avx2_6(r, g, cache->table, k, + map, heap); + } + } + + return err; +#endif +} + +#endif /* HAVE_INTEL_AVX2 */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * p Point to multiply. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_384(mp_int* km, ecc_point* gm, ecc_point* r, int map, + void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 p; + sp_digit kd[6]; +#endif + sp_point_384* point; + sp_digit* k = NULL; + int err = MP_OKAY; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + err = sp_384_point_new_6(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#else + k = kd; +#endif + if (err == MP_OKAY) { + sp_384_from_mp(k, 6, km); + sp_384_point_from_ecc_point_6(point, gm); + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_384_ecc_mulmod_avx2_6(point, point, k, map, heap); + else +#endif + err = sp_384_ecc_mulmod_6(point, point, k, map, heap); + } + if (err == MP_OKAY) { + err = sp_384_point_to_ecc_point_6(point, r); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_6(point, 0, heap); + + return err; +} + +static const sp_table_entry_384 p384_table[256] = { + /* 0 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 */ + { { 0x3dd0756649c0b528L,0x20e378e2a0d6ce38L,0x879c3afc541b4d6eL, + 0x6454868459a30effL,0x812ff723614ede2bL,0x4d3aadc2299e1513L }, + { 0x23043dad4b03a4feL,0xa1bfa8bf7bb4a9acL,0x8bade7562e83b050L, + 0xc6c3521968f4ffd9L,0xdd8002263969a840L,0x2b78abc25a15c5e9L } }, + /* 2 */ + { { 0x298647532b0c535bL,0x90dd695370506296L,0x038cd6b4216ab9acL, + 0x3df9b7b7be12d76aL,0x13f4d9785f347bdbL,0x222c5c9c13e94489L }, + { 0x5f8e796f2680dc64L,0x120e7cb758352417L,0x254b5d8ad10740b8L, + 0xc38b8efb5337dee6L,0xf688c2e194f02247L,0x7b5c75f36c25bc4cL } }, + /* 3 */ + { { 0xe26a3cc39edffea5L,0x35bbfd1c37d7e9fcL,0xf0e7700d9bde3ef6L, + 0x0380eb471a538f5aL,0x2e9da8bb05bf9eb3L,0xdbb93c731a460c3eL }, + { 0x37dba260f526b605L,0x95d4978efd785537L,0x24ed793aed72a04aL, + 0x2694837776005b1aL,0x99f557b99e681f82L,0xae5f9557d64954efL } }, + /* 4 */ + { { 0x24480c57f26feef9L,0xc31a26943a0e1240L,0x735002c3273e2bc7L, + 0x8c42e9c53ef1ed4cL,0x028babf67f4948e8L,0x6a502f438a978632L }, + { 0xf5f13a46b74536feL,0x1d218babd8a9f0ebL,0x30f36bcc37232768L, + 0xc5317b31576e8c18L,0xef1d57a69bbcb766L,0x917c4930b3e3d4dcL } }, + /* 5 */ + { { 0x11426e2ee349ddd0L,0x9f117ef99b2fc250L,0xff36b480ec0174a6L, + 0x4f4bde7618458466L,0x2f2edb6d05806049L,0x8adc75d119dfca92L }, + { 0xa619d097b7d5a7ceL,0x874275e5a34411e9L,0x5403e0470da4b4efL, + 0x2ebaafd977901d8fL,0x5e63ebcea747170fL,0x12a369447f9d8036L } }, + /* 6 */ + { { 0x28f9c07a4fc52870L,0xce0b37481a53a961L,0xd550fa180e1828d9L, + 0xa24abaf76adb225aL,0xd11ed0a56e58a348L,0xf3d811e6948acb62L }, + { 0x8618dd774c61ed22L,0x0bb747f980b47c9dL,0x22bf796fde6b8559L, + 0xfdfd1c6d680a21e9L,0xc0db15772af2c9ddL,0xa09379e6c1e90f3dL } }, + /* 7 */ + { { 0x386c66efe085c629L,0x5fc2a461095bc89aL,0x1353d631203f4b41L, + 0x7ca1972b7e4bd8f5L,0xb077380aa7df8ce9L,0xd8a90389ee7e4ea3L }, + { 0x1bc74dc7e7b14461L,0xdc2cb0140c9c4f78L,0x52b4b3a684ef0a10L, + 0xbde6ea5d20327fe2L,0xb71ec435660f9615L,0xeede5a04b8ad8173L } }, + /* 8 */ + { { 0x5584cbb3893b9a2dL,0x820c660b00850c5dL,0x4126d8267df2d43dL, + 0xdd5bbbf00109e801L,0x85b92ee338172f1cL,0x609d4f93f31430d9L }, + { 0x1e059a07eadaf9d6L,0x70e6536c0f125fb0L,0xd6220751560f20e7L, + 0xa59489ae7aaf3a9aL,0x7b70e2f664bae14eL,0x0dd0370176d08249L } }, + /* 9 */ + { { 0x4cc13be88510521fL,0x87315ba9f724cc17L,0xb49d83bb353dc263L, + 0x8b677efe0c279257L,0x510a1c1cc93c9537L,0x33e30cd8a4702c99L }, + { 0xf0ffc89d2208353fL,0x0170fa8dced42b2bL,0x090851ed26e2a5f5L, + 0x81276455ecb52c96L,0x0646c4e17fe1adf4L,0x513f047eb0868eabL } }, + /* 10 */ + { { 0xc07611f4df5bdf53L,0x45d331a758b11a6dL,0x58965daf1c4ee394L, + 0xba8bebe75a5878d1L,0xaecc0a1882dd3025L,0xcf2a3899a923eb8bL }, + { 0xf98c9281d24fd048L,0x841bfb598bbb025dL,0xb8ddf8cec9ab9d53L, + 0x538a4cb67fef044eL,0x092ac21f23236662L,0xa919d3850b66f065L } }, + /* 11 */ + { { 0x3db03b4085d480d8L,0x8cd9f4791b287a7dL,0x8f24dc754a8f3baeL, + 0x482eb8003db41892L,0x38bf9eb39c56e0f5L,0x8b9773209a91dc6fL }, + { 0xa31b05b27209cfc2L,0x4c49bf8505b2db70L,0x56462498d619527bL, + 0x3fe510391fac51baL,0xfb04f55eab4b8342L,0xc07c10dc04c6eabfL } }, + /* 12 */ + { { 0xad22fe4cdb32f048L,0x5f23bf91475ed6dfL,0xa50ce0c0aa66b6cbL, + 0xdf627a89f03405c0L,0x3674837df95e2d6aL,0x081c95b6ba42e64eL }, + { 0xeba3e036e71d6cebL,0xb45bcccf6c6b0271L,0x67b47e630684701dL, + 0x60f8f942e712523fL,0x824234725cd47adcL,0x83027d7987649cbbL } }, + /* 13 */ + { { 0xb3929ea63615b0b8L,0xb41441fda54dac41L,0x8995d556b5b6a368L, + 0xa80d4529167ef05eL,0xf6bcb4a16d25a27fL,0x210d6a4c7bd55b68L }, + { 0xf3804abb25351130L,0x1d2df699903e37ebL,0x5f201efc084c25c8L, + 0x31a28c87a1c68e91L,0x81dad253563f62a5L,0x5dd6de70d6c415d4L } }, + /* 14 */ + { { 0x29f470fd846612ceL,0x986f3eecda18d997L,0x6b84c1612f34af86L, + 0x5ef0a40846ddaf8bL,0x14405a00e49e795fL,0x5f491b16aa2f7a37L }, + { 0xc7f07ae4db41b38dL,0xef7d119e18fbfcaaL,0x3a18e07614443b19L, + 0x4356841a79a19926L,0x91f4a91ce2226fbeL,0xdc77248c3cc88721L } }, + /* 15 */ + { { 0xd570ff1ae4b1ec9dL,0x21d23e0ee7eef706L,0x3cde40f4ca19e086L, + 0x7d6523c4cd4bb270L,0x16c1f06cbf13aa6cL,0x5aa7245ad14c4b60L }, + { 0x37f8146744b74de8L,0x839e7a17620a934eL,0xf74d14e8de8b1aa1L, + 0x8789fa51f30d75e2L,0x09b24052c81c261eL,0x654e267833c565eeL } }, + /* 16 */ + { { 0x378205de2f9fbe67L,0xc4afcb837f728e44L,0xdbcec06c682e00f1L, + 0xf2a145c3114d5423L,0xa01d98747a52463eL,0xfc0935b17d717b0aL }, + { 0x9653bc4fd4d01f95L,0x9aa83ea89560ad34L,0xf77943dcaf8e3f3fL, + 0x70774a10e86fe16eL,0x6b62e6f1bf9ffdcfL,0x8a72f39e588745c9L } }, + /* 17 */ + { { 0x73ade4da2341c342L,0xdd326e54ea704422L,0x336c7d983741cef3L, + 0x1eafa00d59e61549L,0xcd3ed892bd9a3efdL,0x03faf26cc5c6c7e4L }, + { 0x087e2fcf3045f8acL,0x14a65532174f1e73L,0x2cf84f28fe0af9a7L, + 0xddfd7a842cdc935bL,0x4c0f117b6929c895L,0x356572d64c8bcfccL } }, + /* 18 */ + { { 0x7ecbac017d8c1bbaL,0x6058f9c390b0f3d5L,0xaee116e3f6197d0fL, + 0xc4dd70684033b128L,0xf084dba6c209b983L,0x97c7c2cf831dbc4aL }, + { 0x2f4e61ddf96010e8L,0xd97e4e20529faa17L,0x4ee6666069d37f20L, + 0xccc139ed3d366d72L,0x690b6ee213488e0fL,0x7cad1dc5f3a6d533L } }, + /* 19 */ + { { 0x660a9a81da57a41fL,0xe74a0412ec0039b6L,0x42343c6b5e1dad15L, + 0x284f3ff546681d4cL,0xb51087f163749e89L,0x070f23cc6f9f2f13L }, + { 0x542211da5d186e14L,0x84748f37fddb0dffL,0x41a3aab4db1f4180L, + 0x25ed667ba6402d0eL,0x2f2924a902f58355L,0x5844ee7cfa44a689L } }, + /* 20 */ + { { 0xfab086073f3b236fL,0x19e9d41d81e221daL,0xf3f6571e3927b428L, + 0x4348a9337550f1f6L,0x7167b996a85e62f0L,0x62d437597f5452bfL }, + { 0xd85feb9ef2955926L,0x440a561f6df78353L,0x389668ec9ca36b59L, + 0x052bf1a1a22da016L,0xbdfbff72f6093254L,0x94e50f28e22209f3L } }, + /* 21 */ + { { 0x90b2e5b33062e8afL,0xa8572375e8a3d369L,0x3fe1b00b201db7b1L, + 0xe926def0ee651aa2L,0x6542c9beb9b10ad7L,0x098e309ba2fcbe74L }, + { 0x779deeb3fff1d63fL,0x23d0e80a20bfd374L,0x8452bb3b8768f797L, + 0xcf75bb4d1f952856L,0x8fe6b40029ea3faaL,0x12bd3e4081373a53L } }, + /* 22 */ + { { 0xc023780d104cbba5L,0x6207e747fa35dd4cL,0x35c239281ca9b6a3L, + 0x4ff19be897987b10L,0xb8476bbf8022eee8L,0xaa0a4a14d3bbe74dL }, + { 0x20f94331187d4543L,0x3215387079f6e066L,0x83b0f74eac7e82e1L, + 0xa7748ba2828f06abL,0xc5f0298ac26ef35fL,0x0f0c50708e9a7dbdL } }, + /* 23 */ + { { 0x0c5c244cdef029ddL,0x3dabc687850661b8L,0x9992b865fe11d981L, + 0xe9801b8f6274dbadL,0xe54e6319098da242L,0x9929a91a91a53d08L }, + { 0x37bffd7235285887L,0xbc759425f1418102L,0x9280cc35fd2e6e20L, + 0x735c600cfbc42ee5L,0xb7ad28648837619aL,0xa3627231a778c57bL } }, + /* 24 */ + { { 0xae799b5c91361ed8L,0x47d71b756c63366cL,0x54cdd5211b265a6aL, + 0xe0215a5998d77b74L,0x4424d9b7bab29db0L,0x8b0ffacc7fd9e536L }, + { 0x46d85d1237b5d9efL,0x5b106d62bfa91747L,0xed0479f85f99ba2dL, + 0x0e6f39231d104de4L,0x83a84c8425e8983fL,0xa9507e0af8105a70L } }, + /* 25 */ + { { 0xf6c68a6e14cf381cL,0xaf9d27bdc22e31ccL,0x23568d4daa8a5ccbL, + 0xe431eec0e338e4d2L,0xf1a828fe8f52ad1fL,0xdb6a0579e86acd80L }, + { 0x2885672e4507832aL,0x73fc275f887e5289L,0x65f8027805610d08L, + 0x8d9b4554075ff5b0L,0x3a8e8fb109f712b5L,0x39f0ac862ebe9cf2L } }, + /* 26 */ + { { 0xd8fabf784c52edf5L,0xdcd737e5a589ae53L,0x94918bf0d791ab17L, + 0xb5fbd956bcff06c9L,0xf6d3032edca46d45L,0x2cdff7e141a3e486L }, + { 0x6674b3ba61f47ec8L,0x8a882163eef84608L,0xa257c7054c687f90L, + 0xe30cb2edf6cdf227L,0x2c4c64ca7f6ea846L,0x186fa17ccc6bcd3cL } }, + /* 27 */ + { { 0x48a3f5361dfcb91eL,0x83595e13646d358aL,0xbd15827b91128798L, + 0x3ce612b82187757aL,0x873150a161bd7372L,0xf4684530b662f568L }, + { 0x8833950b401896f6L,0xe11cb89a77f3e090L,0xb2f12cac48e7f4a5L, + 0x313dd769f606677eL,0xfdcf08b316579f93L,0x6429cec946b8f22bL } }, + /* 28 */ + { { 0x4984dd54bb75f9a4L,0x4aef06b929d3b570L,0xb5f84ca23d6e4c1eL, + 0x24c61c11b083ef35L,0xce4a7392392ca9ffL,0x865d65176730a800L }, + { 0xca3dfe76722b4a2bL,0x12c04bf97b083e0eL,0x803ce5b51b86b8a5L, + 0x3fc7632d6a7e3e0cL,0xc89970c2c81adbe4L,0x3cbcd3ad120e16b1L } }, + /* 29 */ + { { 0xfbfb4cc7ec30ce93L,0x10ed6c7db72720a2L,0xec675bf747b55500L, + 0x90725903333ff7c3L,0xc7c3973e5075bfc0L,0xb049ecb007acf31bL }, + { 0xb4076eaf4f58839cL,0x101896daa2b05e4fL,0x3f6033b0ab40c66eL, + 0x19ee9eebc8d864baL,0xeb6cf15547bf6d2aL,0x8e5a9663f826477dL } }, + /* 30 */ + { { 0x69e62fddf7fbd5e1L,0x38ecfe5476912b1dL,0x845a3d56d1da3bfbL, + 0x0494950e1c86f0d4L,0x83cadbf93bc36ce8L,0x41fce5724fccc8d1L }, + { 0x05f939c28332c144L,0xb17f248b0871e46eL,0x3d8534e266e8aff6L, + 0x1d06f1dc3b85c629L,0xdb06a32ea3131b73L,0xf295184d8b3f64e5L } }, + /* 31 */ + { { 0xd9653ff736ddc103L,0x25f43e3795ef606fL,0x09e301fcfe06dce8L, + 0x85af234130b6eebfL,0x79b12b530ff56b20L,0x9b4fb499fe9a3c6bL }, + { 0x0154f89251d27ac2L,0xd33167e356ca5389L,0x7828ec1fafc065a6L, + 0x0959a2587f746c9bL,0xb18f1be30c44f837L,0xa7946117c4132fdbL } }, + /* 32 */ + { { 0xc0426b775e3c647bL,0xbfcbd9398cf05348L,0x31d312e3172c0d3dL, + 0x5f49fde6ee754737L,0x895530f06da7ee61L,0xcf281b0ae8b3a5fbL }, + { 0xfd14973541b8a543L,0x41a625a73080dd30L,0xe2baae07653908cfL, + 0xc3d01436ba02a278L,0xa0d0222e7b21b8f8L,0xfdc270e9d7ec1297L } }, + /* 33 */ + { { 0x00873c0cbc7f41d6L,0xd976113e1b7ad641L,0x2a536ff4238443fbL, + 0x030d00e241e62e45L,0x532e98675f545fc6L,0xcd0331088e91208cL }, + { 0xd1a04c999797612cL,0xd4393e02eea674e2L,0xd56fa69ee19742a1L, + 0xdd2ab48085f0590eL,0xa5cefc5248a2243dL,0x48cc67b654383f41L } }, + /* 34 */ + { { 0x4e50430efc14ab48L,0x195b7f4f26706a74L,0x2fe8a228cc881ff6L, + 0xb1b968e2d945013dL,0x936aa5794b92162bL,0x4fb766b7364e754aL }, + { 0x13f93bca31e1ff7fL,0x696eb5cace4f2691L,0xff754bf8a2b09e02L, + 0x58f13c9ce58e3ff8L,0xb757346f1678c0b0L,0xd54200dba86692b3L } }, + /* 35 */ + { { 0x9a030bbd6dda1265L,0xf7b4f3fce89718ddL,0xa6a4931f936065b8L, + 0xbce72d875f72241cL,0x6cbb51cb65775857L,0xc71618154e993675L }, + { 0xe81a0f792ee32189L,0xef2fab26277dc0b2L,0x9e64f6feb71f469fL, + 0xb448ce33dfdaf859L,0x3f5c1c4cbe6b5df1L,0xfb8dfb001de45f7bL } }, + /* 36 */ + { { 0xc7345fa74d5bb921L,0x5c7e04be4d2b667eL,0x47ed3a80282d7a3eL, + 0x5c2777f87e47b2a4L,0x89b3b10008488e2eL,0x9aad77c2b2eb5b45L }, + { 0xd681bca7daac34aeL,0x2452e4e526afb326L,0x0c88792441a1ee14L, + 0x743b04d4c2407adeL,0xcb5e999bfc17a2acL,0x4dca2f824a701a06L } }, + /* 37 */ + { { 0x68e31ca61127bc1aL,0xa3edd59b17ead3beL,0x67b6b645e25f5a15L, + 0x76221794a420e15eL,0x794fd83b4b1e872eL,0x7cab3f03b2dece1bL }, + { 0x7119bf15ca9b3586L,0xa55459244d250bd7L,0x173633eacc6bcf24L, + 0x9bd308c2b1b6f884L,0x3bae06f5447d38c3L,0x54dcc135f341fe1cL } }, + /* 38 */ + { { 0x56d3598d943caf0dL,0xce044ea9225ff133L,0x9edf6a7c563fadeaL, + 0x632eb94473e8dc27L,0x814b467e3190dcabL,0x2d4f4f316dbb1e31L }, + { 0x8d69811ca143b7caL,0x4ec1ac32de7cf950L,0x223ab5fd37b5fe82L, + 0xe82616e49390f1d9L,0xabff4b2075804610L,0x11b9be15875b08f0L } }, + /* 39 */ + { { 0x4ae31a3d3bbe682cL,0xbc7c5d2674eef2ddL,0x92afd10a3c47dd40L, + 0xec7e0a3bc14ab9e1L,0x6a6c3dd1b2e495e4L,0x085ee5e9309bcd85L }, + { 0xf381a9088c2e67fdL,0x32083a80e261eaf2L,0x0fcd6a4996deee15L, + 0xe3b8fb035e524c79L,0x8dc360d91d5b08b9L,0x3a06e2c87f26719fL } }, + /* 40 */ + { { 0x5cd9f5a87237cac0L,0x93f0b59d43586794L,0x4384a764e94f6c4eL, + 0x8304ed2bb62782d3L,0x0b8db8b3cde06015L,0x4336dd535dbe190fL }, + { 0x5744355392ab473aL,0x031c7275be5ed046L,0x3e78678c21909aa4L, + 0x4ab7e04f99202ddbL,0x2648d2066977e635L,0xd427d184093198beL } }, + /* 41 */ + { { 0x822848f50f9b5a31L,0xbb003468baadb62aL,0x233a04723357559cL, + 0x49ef688079aee843L,0xa89867a0aeb9e1e3L,0xc151931b1f6f9a55L }, + { 0xd264eb0bad74251eL,0x37b9b2634abf295eL,0xb600921b04960d10L, + 0x0de53dbc4da77dc0L,0x01d9bab3d2b18697L,0xad54ec7af7156ddfL } }, + /* 42 */ + { { 0x8e74dc3579efdc58L,0x456bd3694ff68ddbL,0x724e74ccd32096a5L, + 0xe41cff42386783d0L,0xa04c7f217c70d8a4L,0x41199d2fe61a19a2L }, + { 0xd389a3e029c05dd2L,0x535f2a6be7e3fda9L,0x26ecf72d7c2b4df8L, + 0x678275f4fe745294L,0x6319c9cc9d23f519L,0x1e05a02d88048fc4L } }, + /* 43 */ + { { 0x75cc8e2ed4d5ffe8L,0xf8bb4896dbea17f2L,0x35059790cee3cb4aL, + 0x4c06ee85a47c6165L,0xf98fff2592935d2fL,0x34c4a57232ffd7c7L }, + { 0xc4b14806ea0376a2L,0x2ea5e7504f115e02L,0x532d76e21e55d7c0L, + 0x68dc9411f31044daL,0x9272e46571b77993L,0xadaa38bb93a8cfd5L } }, + /* 44 */ + { { 0x4bf0c7127d4ed72aL,0xda0e9264ba1f79a3L,0x48c0258bf4c39ea4L, + 0xa5394ed82a715138L,0x4af511cebf06c660L,0xfcebceefec5c37cdL }, + { 0xf23b75aa779ae8c1L,0xdeff59ccad1e606eL,0xf3f526fd22755c82L, + 0x64c5ab44bb32cefdL,0xa96e11a2915bdefdL,0xab19746a1143813eL } }, + /* 45 */ + { { 0x43c78585ec837d7dL,0xca5b6fbcb8ee0ba4L,0x34e924d9d5dbb5eeL, + 0x3f4fa104bb4f1ca5L,0x15458b72398640f7L,0x4231faa9d7f407eaL }, + { 0x53e0661ef96e6896L,0x554e4c69d03b0f9dL,0xd4fcb07b9c7858d1L, + 0x7e95279352cb04faL,0x5f5f15748974e7f7L,0x2e3fa5586b6d57c8L } }, + /* 46 */ + { { 0x42cd48036a9951a8L,0xa8b15b8842792ad0L,0x18e8bcf9abb29a73L, + 0xbfd9a092409933e8L,0x760a3594efb88dc4L,0x1441886340724458L }, + { 0x162a56ee99caedc7L,0x8fb12ecd91d101c9L,0xea671967393202daL, + 0x1aac8c4aa4ccd796L,0x7db050361cf185a8L,0x0c9f86cd8cfd095aL } }, + /* 47 */ + { { 0x9a72814710b2a556L,0x767ca964327b70b2L,0x04ed9e125e3799b7L, + 0x6781d2dc22a3eb2aL,0x5bd116eb0d9450acL,0xeccac1fca7ebe08aL }, + { 0xde68444fdc2d6e94L,0x3621f42935ecf21bL,0x14e2d54329e03a2cL, + 0x53e42cd57d3e7f0aL,0xbba26c0973ed00b9L,0x00297c39c57d2272L } }, + /* 48 */ + { { 0x3aaaab10b8243a7dL,0x6eeef93e8fa58c5bL,0xf866fca39ae7f764L, + 0x64105a2661ab04d3L,0xa3578d8a03945d66L,0xb08cd3e4791b848cL }, + { 0x45edc5f8756d2411L,0xd4a790d9a755128cL,0xc2cf096349e5f6a0L, + 0xc66d267df649beaaL,0x3ce6d9688467039eL,0x50046c6b42f7816fL } }, + /* 49 */ + { { 0x92ae160266425043L,0x1ff66afdf08db890L,0x386f5a7f8f162ce5L, + 0x18d2dea0fcf5598fL,0x78372b3a1a8ca18eL,0xdf0d20eb8cd0e6f7L }, + { 0x7edd5e1d75bb4045L,0x252a47ceb96d94b7L,0xbdb293582c626776L, + 0x853c394340dd1031L,0x9dc9becf7d5f47fdL,0x27c2302fbae4044aL } }, + /* 50 */ + { { 0x2d1d208a8f2d49ceL,0x0d91aa02162df0a2L,0x9c5cce8709a07f65L, + 0xdf07238b84339012L,0x5028e2c8419442cdL,0x2dcbd35872062abaL }, + { 0xb5fbc3cbe4680967L,0x2a7bc6459f92d72cL,0x806c76e1116c369dL, + 0x5c50677a3177e8d8L,0x753739eb4569df57L,0x2d481ef636c3f40bL } }, + /* 51 */ + { { 0x1a2d39fdfea1103eL,0xeaae559295f81b17L,0xdbd0aa18f59b264aL, + 0x90c39c1acb592ee0L,0xdf62f80d9750cca3L,0xda4d8283df97cc6cL }, + { 0x0a6dd3461e201067L,0x1531f85969fb1f6bL,0x4895e5521d60121fL, + 0x0b21aab04c041c91L,0x9d896c46bcc1ccf8L,0xd24da3b33141bde7L } }, + /* 52 */ + { { 0x575a053753b0a354L,0x392ff2f40c6ddcd8L,0x0b8e8cff56157b94L, + 0x073e57bd3b1b80d1L,0x2a75e0f03fedee15L,0x752380e4aa8e6f19L }, + { 0x1f4e227c6558ffe9L,0x3a34861819ec5415L,0xab382d5ef7997085L, + 0x5e6deaffddc46ac2L,0xe5144078fc8d094cL,0xf674fe51f60e37c6L } }, + /* 53 */ + { { 0x6fb87ae5af63408fL,0xa39c36a9cd75a737L,0x7833313fcf4c618dL, + 0xfbcd4482f034c88dL,0x4469a76139b35288L,0x77a711c566b5d9c9L }, + { 0x4a695dc7944f8d65L,0xe6da5f65161aaba8L,0x8654e9c324601669L, + 0xbc8b93f528ae7491L,0x5f1d1e838f5580d8L,0x8ccf9a1acea32cc8L } }, + /* 54 */ + { { 0x28ab110c7196fee2L,0x75799d63874c8945L,0xa262934829aedaddL, + 0x9714cc7b2be88ff4L,0xf71293cfd58d60d6L,0xda6b6cb332a564e9L }, + { 0xf43fddb13dd821c2L,0xf2f2785f90dd323dL,0x91246419048489f8L, + 0x61660f26d24c6749L,0x961d9e8cc803c15cL,0x631c6158faadc4c9L } }, + /* 55 */ + { { 0xacf2ebe0fd752366L,0xb93c340e139be88bL,0x98f664850f20179eL, + 0x14820254ff1da785L,0x5278e2764f85c16eL,0xa246ee457aab1913L }, + { 0x43861eb453763b33L,0xc49f03fc45c0bc0dL,0xafff16bcad6b1ea1L, + 0xce33908b6fd49c99L,0x5c51e9bff7fde8c3L,0x076a7a39ff142c5eL } }, + /* 56 */ + { { 0x04639dfe9e338d10L,0x8ee6996ff42b411bL,0x960461d1a875cef2L, + 0x1057b6d695b4d0baL,0x27639252a906e0bcL,0x2c19f09ae1c20f8aL }, + { 0x5b8fc3f0eef4c43dL,0xe2e1b1a807a84aa9L,0x5f455528835d2bdbL, + 0x0f4aee4d207132ddL,0xe9f8338c3907f675L,0x7a874dc90e0531f0L } }, + /* 57 */ + { { 0x84b22d4597c27050L,0xbd0b8df759e70bf8L,0xb4d6740579738b9bL, + 0x47f4d5f5cd917c4fL,0x9099c4ce13ce6e33L,0x942bfd39521d0f8bL }, + { 0x5028f0f6a43b566dL,0xaf6e866921bff7deL,0x83f6f856c44232cdL, + 0x65680579f915069aL,0xd12095a2ecfecb85L,0xcf7f06aedb01ba16L } }, + /* 58 */ + { { 0x0f56e3c48ef96c80L,0xd521f2b33ddb609cL,0x2be941027dc1450dL, + 0x2d21a07102a91fe2L,0x2e6f74fa1efa37deL,0x9a9a90b8156c28a1L }, + { 0xc54ea9ea9dc7dfcbL,0xc74e66fc2c2c1d62L,0x9f23f96749d3e067L, + 0x1c7c3a4654dd38adL,0xc70058845946cee3L,0x8985636845cc045dL } }, + /* 59 */ + { { 0x29da7cd4fce73946L,0x8f697db523168563L,0x8e235e9ccba92ec6L, + 0x55d4655f9f91d3eaL,0xf3689f23aa50a6cdL,0xdcf21c2621e6a1a0L }, + { 0xcffbc82e61b818bfL,0xc74a2f96da47a243L,0x234e980a8bc1a0cfL, + 0xf35fd6b57929cb6dL,0x81468e12efe17d6cL,0xddea6ae558b2dafbL } }, + /* 60 */ + { { 0x294de8877e787b2eL,0x258acc1f39a9310dL,0x92d9714aac14265dL, + 0x18b5591c708b48a0L,0x27cc6bb0e1abbf71L,0xc0581fa3568307b9L }, + { 0x9e0f58a3f24d4d58L,0xfebe9bb8e0ce2327L,0x91fd6a419d1be702L, + 0x9a7d8a45facac993L,0xabc0a08c9e50d66dL,0x02c342f706498201L } }, + /* 61 */ + { { 0xccd71407157bdbc2L,0x72fa89c6ad0e1605L,0xb1d3da2bb92a015fL, + 0x8ad9e7cda0a3fe56L,0x160edcbd24f06737L,0x79d4db3361275be6L }, + { 0xd3d31fd95f3497c4L,0x8cafeaee04192fb0L,0xe13ca74513a50af3L, + 0x188261678c85aae5L,0xce06cea89eb556ffL,0x2eef1995bdb549f3L } }, + /* 62 */ + { { 0x8ed7d3eb50596edcL,0xaa359362905243a2L,0xa212c2c2a4b6d02bL, + 0x611fd727c4fbec68L,0x8a0b8ff7b84f733dL,0xd85a6b905f0daf0eL }, + { 0x60e899f5d4091cf7L,0x4fef2b672eff2768L,0xc1f195cb10c33964L, + 0x8275d36993626a8fL,0xc77904f40d6c840aL,0x88d8b7fd7a868acdL } }, + /* 63 */ + { { 0x85f237237bd98425L,0xd4463992c70b154eL,0xcbb00ee296687a2eL, + 0x905fdbf7c83214fdL,0x2019d29313593684L,0x0428c393ef51218eL }, + { 0x40c7623f981e909aL,0x925133857be192daL,0x48fe480f4010907eL, + 0xdd7a187c3120b459L,0xc9d7702da1fd8f3cL,0x66e4753be358efc5L } }, + /* 64 */ + { { 0x070d34e116973cf4L,0x20aee08b7e4f34f7L,0x269af9b95eb8ad29L, + 0xdde0a036a6a45ddaL,0xa18b528e63df41e0L,0x03cc71b2a260df2aL }, + { 0x24a6770aa06b1dd7L,0x5bfa9c119d2675d3L,0x73c1e2a196844432L, + 0x3660558d131a6cf0L,0xb0289c832ee79454L,0xa6aefb01c6d8ddcdL } }, + /* 65 */ + { { 0xba1464b401ab5245L,0x9b8d0b6dc48d93ffL,0x939867dc93ad272cL, + 0xbebe085eae9fdc77L,0x73ae5103894ea8bdL,0x740fc89a39ac22e1L }, + { 0x5e28b0a328e23b23L,0x2352722ee13104d0L,0xf4667a18b0a2640dL, + 0xac74a72e49bb37c3L,0x79f734f0e81e183aL,0xbffe5b6c3fd9c0ebL } }, + /* 66 */ + { { 0xb1a358f5c6a2123fL,0x927b2d95fe28df6dL,0x89702753f199d2f9L, + 0x0a73754c1a3f82dcL,0x063d029d777affe1L,0x5439817edae6d34dL }, + { 0xf7979eef6b8b83c4L,0x615cb2149d945682L,0x8f0e4facc5e57eaeL, + 0x042b89b8113047ddL,0x888356dc93f36508L,0xbf008d185fd1f32fL } }, + /* 67 */ + { { 0x8012aa244e8068dbL,0xc72cc641a5729a47L,0x3c33df2c43f0691dL, + 0xfa0573471d92145fL,0xaefc0f2fb97f7946L,0x813d75cb2f8121bfL }, + { 0x05613c724383bba6L,0xa924ce70a4224b3fL,0xe59cecbe5f2179a6L, + 0x78e2e8aa79f62b61L,0x3ac2cc3b53ad8079L,0x55518d71d8f4fa96L } }, + /* 68 */ + { { 0x03cf292200623f3bL,0x095c71115f29ebffL,0x42d7224780aa6823L, + 0x044c7ba17458c0b0L,0xca62f7ef0959ec20L,0x40ae2ab7f8ca929fL }, + { 0xb8c5377aa927b102L,0x398a86a0dc031771L,0x04908f9dc216a406L, + 0xb423a73a918d3300L,0x634b0ff1e0b94739L,0xe29de7252d69f697L } }, + /* 69 */ + { { 0x744d14008435af04L,0x5f255b1dfec192daL,0x1f17dc12336dc542L, + 0x5c90c2a7636a68a8L,0x960c9eb77704ca1eL,0x9de8cf1e6fb3d65aL }, + { 0xc60fee0d511d3d06L,0x466e2313f9eb52c7L,0x743c0f5f206b0914L, + 0x42f55bac2191aa4dL,0xcefc7c8fffebdbc2L,0xd4fa6081e6e8ed1cL } }, + /* 70 */ + { { 0xb5e405d3b0ab9645L,0xaeec7f98d5f1f711L,0x8ad42311585c2a6eL, + 0x045acb9e512c6944L,0xae106c4ea90db1c6L,0xb89f33d5898e6563L }, + { 0x43b07cd97fed2ce4L,0xf9934e17dd815b20L,0x6778d4d50a81a349L, + 0x9e616ade52918061L,0xfa06db06d7e67112L,0x1da23cf188488091L } }, + /* 71 */ + { { 0x821c46b342f2c4b5L,0x931513ef66059e47L,0x7030ae4366f50cd1L, + 0x43b536c943e7b127L,0x006258cf5fca5360L,0xe4e3ee796b557abfL }, + { 0xbb6b390024c8b22fL,0x2eb5e2c1fcbf1054L,0x937b18c9567492afL, + 0xf09432e4acf53957L,0x585f5a9d1dbf3a56L,0xf86751fdbe0887cfL } }, + /* 72 */ + { { 0x157399cb9d10e0b2L,0x1c0d595660dc51b7L,0x1d496b8a1f583090L, + 0x6658bc2688590484L,0x88c08ab703213f28L,0x8d2e0f737ae58de4L }, + { 0x9b79bc95486cfee6L,0x036a26c7e9e5bc57L,0x1ad03601cd8ae97aL, + 0x06907f87ff3a0494L,0x078f4bbf2c7eb584L,0xe3731bf57e8d0a5aL } }, + /* 73 */ + { { 0x72f2282be1cd0abeL,0xd4f9015e87efefa2L,0x9d1898066c3834bdL, + 0x9c8cdcc1b8a29cedL,0x0601b9f4fee82ebcL,0x371052bc7206a756L }, + { 0x76fa109246f32562L,0xdaad534c17351bb4L,0xc3d64c37b3636bb5L, + 0x038a8c5145d54e00L,0x301e618032c09e7cL,0x9764eae795735151L } }, + /* 74 */ + { { 0x8791b19fcbd5256aL,0x4007e0f26ca13a3bL,0x03b794604cf06904L, + 0xb18a9c22b6c17589L,0xa1cb7d7d81d45908L,0x6e13fa9d21bb68f1L }, + { 0x47183c62a71e6e16L,0x5cf0ef8ee18749edL,0x2c9c7f9b2e5ed409L, + 0x042eeacce6e117e1L,0xb86d481613fb5a7fL,0xea1cf0edc9e5feb1L } }, + /* 75 */ + { { 0x6e6573c9cea4cc9bL,0x5417961dafcec8f3L,0x804bf02aa438b6f6L, + 0xb894b03cdcd4ea88L,0xd0f807e93799571fL,0x3466a7f5862156e8L }, + { 0x51e59acd56515664L,0x55b0f93ca3c5eb0bL,0x84a06b026a4279dbL, + 0x5c850579c5fae08eL,0xcf07b8dba663a1a2L,0x49a36bbcf46ffc8dL } }, + /* 76 */ + { { 0xe47f5acc46d93106L,0x65b7ade0aa897c9cL,0x37cf4c9412d7e4beL, + 0xa2ae9b80d4b2caa9L,0x5e7ce09ce60357a3L,0x29f77667c8ecd5f9L }, + { 0xdf6868f5a8a0b1c5L,0x240858cf62978ad8L,0x0f7ac101dc0002a1L, + 0x1d28a9d7ffe9aa05L,0x744984d65b962c97L,0xa8a7c00b3d28c8b2L } }, + /* 77 */ + { { 0x7c58a852ae11a338L,0xa78613f1d1af96e7L,0x7e9767d25355cc73L, + 0x6ba37009792a2de6L,0x7d60f618124386b2L,0xab09b53111157674L }, + { 0x95a0484198eb9dd0L,0xe6c17acc15070328L,0xafc6da45489c6e49L, + 0xab45a60abb211530L,0xc58d65927d7ea933L,0xa3ef3c65095642c6L } }, + /* 78 */ + { { 0x89d420e9df010879L,0x9d25255d39576179L,0x9cdefd50e39513b6L, + 0xe4efe45bd5d1c313L,0xc0149de73f7af771L,0x55a6b4f4340ab06bL }, + { 0xf1325251ebeaf771L,0x2ab44128878d4288L,0xfcd5832e18e05afeL, + 0xef52a348cc1fb62bL,0x2bd08274c1c4792aL,0x345c5846877c6dc7L } }, + /* 79 */ + { { 0xde15ceb0bea65e90L,0x0987f72b2416d99cL,0x44db578dfd863decL, + 0xf617b74bac6a3578L,0x9e62bd7adb48e999L,0x877cae61eab1a1beL }, + { 0x23adddaa3a358610L,0x2fc4d6d1325e2b07L,0x897198f51585754eL, + 0xf741852cb392b584L,0x9927804cb55f7de1L,0xe9e6c4ed1aa8efaeL } }, + /* 80 */ + { { 0x867db63998683186L,0xfb5cf424ddcc4ea9L,0xcc9a7ffed4f0e7bdL, + 0x7c57f71c7a779f7eL,0x90774079d6b25ef2L,0x90eae903b4081680L }, + { 0xdf2aae5e0ee1fcebL,0x3ff1da24e86c1a1fL,0x80f587d6ca193edfL, + 0xa5695523dc9b9d6aL,0x7b84090085920303L,0x1efa4dfcba6dbdefL } }, + /* 81 */ + { { 0xfbd838f9e0540015L,0x2c323946c39077dcL,0x8b1fb9e6ad619124L, + 0x9612440c0ca62ea8L,0x9ad9b52c2dbe00ffL,0xf52abaa1ae197643L }, + { 0xd0e898942cac32adL,0xdfb79e4262a98f91L,0x65452ecf276f55cbL, + 0xdb1ac0d27ad23e12L,0xf68c5f6ade4986f0L,0x389ac37b82ce327dL } }, + /* 82 */ + { { 0x511188b4f8e60f5bL,0x7fe6701548aa2adaL,0xdb333cb8381abca2L, + 0xb15e6d9ddaf3fc97L,0x4b24f6eb36aabc03L,0xc59789df72a748b4L }, + { 0x26fcb8a529cf5279L,0x7a3c6bfc01ad9a6cL,0x866cf88d4b8bac9bL, + 0xf4c899899c80d041L,0xf0a0424170add148L,0x5a02f47945d81a41L } }, + /* 83 */ + { { 0xfa5c877cc1c90202L,0xd099d440f8ac7570L,0x428a5b1bd17881f7L, + 0x61e267db5b2501d7L,0xf889bf04f2e4465bL,0x4da3ae0876aa4cb8L }, + { 0x3ef0fe26e3e66861L,0x5e7729533318b86dL,0xc3c35fbc747396dfL, + 0x5115a29c439ffd37L,0xbfc4bd97b2d70374L,0x088630ea56246b9dL } }, + /* 84 */ + { { 0xcd96866db8a9e8c9L,0xa11963b85bb8091eL,0xc7f90d53045b3cd2L, + 0x755a72b580f36504L,0x46f8b39921d3751cL,0x4bffdc9153c193deL }, + { 0xcd15c049b89554e7L,0x353c6754f7a26be6L,0x79602370bd41d970L, + 0xde16470b12b176c0L,0x56ba117540c8809dL,0xe2db35c3e435fb1eL } }, + /* 85 */ + { { 0xd71e4aab6328e33fL,0x5486782baf8136d1L,0x07a4995f86d57231L, + 0xf1f0a5bd1651a968L,0xa5dc5b2476803b6dL,0x5c587cbc42dda935L }, + { 0x2b6cdb32bae8b4c0L,0x66d1598bb1331138L,0x4a23b2d25d7e9614L, + 0x93e402a674a8c05dL,0x45ac94e6da7ce82eL,0xeb9f8281e463d465L } }, + /* 86 */ + { { 0x34e0f9d1fecf5b9bL,0xa115b12bf206966aL,0x5591cf3b1eaa0534L, + 0x5f0293cbfb1558f9L,0x1c8507a41bc703a5L,0x92e6b81c862c1f81L }, + { 0xcc9ebc66cdaf24e3L,0x68917ecd72fcfc70L,0x6dc9a9308157ba48L, + 0x5d425c08b06ab2b2L,0x362f8ce736e929c4L,0x09f6f57c62e89324L } }, + /* 87 */ + { { 0x1c7d6b78d29375fbL,0xfabd851ee35d1157L,0xf6f62dcd4243ea47L, + 0x1dd924608fe30b0fL,0x08166dfaffc6e709L,0xc6c4c6930881e6a7L }, + { 0x20368f87d6a53fb0L,0x38718e9f9eb4d1f9L,0x03f08acdafd7e790L, + 0x0835eb4472fe2a1cL,0x7e05090388076e5dL,0x538f765ea638e731L } }, + /* 88 */ + { { 0x0e0249d9c2663b4bL,0xe700ab5b47cd38ddL,0xb192559d2c46559fL, + 0x8f9f74a84bcde66dL,0xad1615233e2aced5L,0xc155c0473dd03a5bL }, + { 0x346a87993be454ebL,0x66ee94db83b7dccdL,0x1f6d8378ab9d2abeL, + 0x4a396dd27733f355L,0x419bd40af53553c2L,0xd0ead98d731dd943L } }, + /* 89 */ + { { 0x908e0b0eec142408L,0x98943cb94114b310L,0x03dbf7d81742b1d7L, + 0xd270df6b693412f4L,0xc50654948f69e20cL,0xa76a90c3697e43a1L }, + { 0xe0fa33844624825aL,0x82e48c0b8acc34c2L,0x7b24bd14e9a14f2bL, + 0x4f5dd5e24db30803L,0x0c77a9e7932da0a3L,0x20db90f274c653dcL } }, + /* 90 */ + { { 0x261179b70e6c5fd9L,0xf8bec1236c982eeaL,0x47683338d4957b7eL, + 0xcc47e6640a72f66aL,0xbd54bf6a1bad9350L,0xdfbf4c6af454e95aL }, + { 0x3f7a7afa6907f4faL,0x7311fae0865ca735L,0x24737ab82a496adaL, + 0x13e425f115feb79bL,0xe9e97c50a1b93c21L,0xb26b6eac4ddd3eb5L } }, + /* 91 */ + { { 0x81cab9f52a2e5f2bL,0xf93caf29bf385ac4L,0xf4bf35c3c909963aL, + 0x081e730074c9143cL,0x3ea57fa8c281b4c5L,0xe497905c9b340741L }, + { 0xf556dd8a55ab3cfbL,0xd444b96b518db6adL,0x34f5425a5ef4b955L, + 0xdda7a3acecd26aa3L,0xb57da11bda655e97L,0x02da3effc2024c70L } }, + /* 92 */ + { { 0xe24b00366481d0d9L,0x3740dbe5818fdfe2L,0xc1fc1f45190fda00L, + 0x329c92803cf27fdeL,0x7435cb536934f43eL,0x2b505a5d7884e8feL }, + { 0x6cfcc6a6711adcc9L,0xf034325c531e21e1L,0xa2f4a9679b2a8a99L, + 0x9d5f38423c21bdffL,0xb25c781131b57d66L,0xdb5344d80b8093b9L } }, + /* 93 */ + { { 0x0d72e667ae50a2f5L,0x9b7f8d8ae4a861d1L,0xa129f70f330df1cbL, + 0xe90aa5d7e04fefc3L,0xff561ecbe72c3ae1L,0x0d8fb428cdb955faL }, + { 0xd2235f73d7663784L,0xc05baec67e2c456aL,0xe5c292e42adbfcccL, + 0x4fd17988efb110d5L,0x27e57734d19d49f3L,0x188ac4ce84f679feL } }, + /* 94 */ + { { 0x7ee344cfa796c53eL,0xbbf6074d0868009bL,0x1f1594f7474a1295L, + 0x66776edcac11632dL,0x1862278b04e2fa5aL,0x52665cf2c854a89aL }, + { 0x7e3764648104ab58L,0x167759137204fd6dL,0x86ca06a544ea1199L, + 0xaa3f765b1c9240ddL,0x5f8501a924746149L,0x7b982e30dcd251d7L } }, + /* 95 */ + { { 0xe44e9efcc15f3060L,0x5ad62f2ea87ebbe6L,0x36499d41c79500d4L, + 0xa66d6dc0336fa9d1L,0xf8afc4955afd3b1fL,0x1d8ccb24e5c9822bL }, + { 0x4031422b79d7584bL,0xc54a0580ea3f20ddL,0x3f837c8f958468c5L, + 0x3d82f110fbea7735L,0x679a87787dffe2fcL,0x48eba63b20704803L } }, + /* 96 */ + { { 0x89b10d41df46e2f6L,0x13ab57f819514367L,0x067372b91d469c87L, + 0x0c195afa4f6c5798L,0xea43a12a272c9acfL,0x9dadd8cb678abdacL }, + { 0xcce56c6be182579aL,0x86febadb2d26c2d8L,0x1c668ee12a44745cL, + 0x580acd8698dc047aL,0x5a2b79cc51b9ec2dL,0x007da6084054f6a0L } }, + /* 97 */ + { { 0x9e3ca35217b00dd0L,0x046779cb0e81a7a6L,0xb999fef3d482d871L, + 0xe6f38134d9233fbcL,0x112c3001f48cd0e0L,0x934e75763c6c66aeL }, + { 0xb44d4fc3d73234dcL,0xfcae2062864eafc1L,0x843afe2526bef21aL, + 0x61355107f3b75fdfL,0x8367a5aa794c2e6bL,0x3d2629b18548a372L } }, + /* 98 */ + { { 0x6230618f437cfaf8L,0x5b8742cb2032c299L,0x949f72472293643aL, + 0xb8040f1a09464f79L,0x049462d24f254143L,0xabd6b522366c7e76L }, + { 0x119b392bd5338f55L,0x1a80a9ce01495a0cL,0xf3118ca7f8d7537eL, + 0xb715adc26bf4b762L,0x24506165a8482b6cL,0xd958d7c696a7c84dL } }, + /* 99 */ + { { 0x9ad8aa87bdc21f31L,0xadb3cab48063e58cL,0xefd86283b07dd7b8L, + 0xc7b9b7621be7c6b4L,0x2ef58741015582deL,0xc970c52e299addf3L }, + { 0x78f02e2a22f24d66L,0xefec1d1074cc100aL,0xaf2a6a3909316e1aL, + 0xce7c22055849dd49L,0x9c1fe75c96bffc4cL,0xcad98fd27ba06ec0L } }, + /* 100 */ + { { 0xed76e2d0b648b73eL,0xa9f92ce51cfd285eL,0xa8c86c062ed13de1L, + 0x1d3a574ea5191a93L,0x385cdf8b1ad1b8bfL,0xbbecc28a47d2cfe3L }, + { 0x98d326c069cec548L,0x4f5bc1ddf240a0b2L,0x241a706229057236L, + 0x0fc6e9c5c68294a4L,0x4d04838ba319f17aL,0x8b612cf19ffc1c6fL } }, + /* 101 */ + { { 0x9bb0b5014c3830ebL,0x3d08f83c8ee0d0c5L,0xa4a6264279ba9389L, + 0x5d5d40449cbc2914L,0xae9eb83e074c46f0L,0x63bb758f74ead7d6L }, + { 0x1c40d2eac6bb29e0L,0x95aa2d874b02f41eL,0x9298917553cb199aL, + 0xdd91bafe51584f6dL,0x3715efb931a1aaecL,0xc1b6ae5b46780f9eL } }, + /* 102 */ + { { 0xcded3e4b42772f41L,0x3a700d5d3bcb79d1L,0x4430d50e80feee60L, + 0x444ef1fcf5e5d4bbL,0xc660194fe6e358ffL,0xe68a2f326a91b43cL }, + { 0x5842775c977fe4d2L,0x78fdef5c7e2a41ebL,0x5f3bec02ff8df00eL, + 0xf4b840cd5852525dL,0x0870483a4e6988bdL,0x39499e39cc64b837L } }, + /* 103 */ + { { 0xfc05de80b08df5feL,0x0c12957c63ba0362L,0xea379414d5cf1428L, + 0xc559132a54ef6216L,0x33d5f12fb9e65cf8L,0x09c602781695d663L }, + { 0x3ac1ced461f7a2fbL,0xdd838444d4f5eeb8L,0x82a38c6c8318fcadL, + 0x315be2e5e9f1a864L,0x317b5771442daf47L,0x81b5904a95aa5f9eL } }, + /* 104 */ + { { 0x6b6b1c508b21d232L,0x87f3dbc08c2cba75L,0xa7e74b46ae9f0fafL, + 0x036a0985bb7b8079L,0x4f185b908d974a25L,0x5aa7cef0d9af5ec9L }, + { 0xe0566a7057dcfffcL,0x6ea311dab8453225L,0x72ea1a8d23368aa9L, + 0xed9b208348cd552dL,0xb987967cc80ea435L,0xad735c756c104173L } }, + /* 105 */ + { { 0xaea85ab3cee76ef4L,0x44997444af1d2b93L,0x0851929beacb923fL, + 0xb080b59051e3bc0cL,0xc4ee1d8659be68a2L,0xf00de21964b26cdaL }, + { 0x8d7fb5c0f2e90d4dL,0x00e219a777d9ec64L,0xc4e6febd5d1c491cL, + 0x080e37541a8f4585L,0x4a9b86c848d2af9cL,0x2ed70db6b6679851L } }, + /* 106 */ + { { 0xaee44116586f25cbL,0xf7b6861fa0fcf70fL,0x55d2cd2018a350e8L, + 0x861bf3e592dc286fL,0x9ab18ffa6226aba7L,0xd15827bea9857b03L }, + { 0x26c1f54792e6acefL,0x422c63c8ac1fbac3L,0xa2d8760dfcbfd71dL, + 0x35f6a539b2511224L,0xbaa88fa1048d1a21L,0x49f1abe9ebf999dbL } }, + /* 107 */ + { { 0x16f9f4f4f7492b73L,0xcf28ec1ecb392b1aL,0x45b130d469ca6ffcL, + 0x28ba8d40b72efa58L,0xace987c75ca066f5L,0x3e3992464ad022ebL }, + { 0x63a2d84e752555bbL,0xaaa93b4a9c2ae394L,0xcd80424ec89539caL, + 0x6d6b5a6daa119a99L,0xbd50334c379f2629L,0x899e925eef3cc7d3L } }, + /* 108 */ + { { 0xb7ff3651bf825dc4L,0x0f741cc440b9c462L,0x771ff5a95cc4fb5bL, + 0xcb9e9c9b47fd56feL,0xbdf053db5626c0d3L,0xa97ce675f7e14098L }, + { 0x68afe5a36c934f5eL,0x6cd5e148ccefc46fL,0xc7758570d7a88586L, + 0x49978f5edd558d40L,0xa1d5088a64ae00c1L,0x58f2a720f1d65bb2L } }, + /* 109 */ + { { 0x66fdda4a3e4daedbL,0x38318c1265d1b052L,0x28d910a24c4bbf5cL, + 0x762fe5c478a9cd14L,0x08e5ebaad2cc0aeeL,0xd2cdf257ca0c654cL }, + { 0x48f7c58b08b717d2L,0x3807184a386cd07aL,0x3240f626ae7d0112L, + 0x03e9361bc43917b0L,0xf261a87620aea018L,0x53f556a47e1e6372L } }, + /* 110 */ + { { 0xc84cee562f512a90L,0x24b3c0041b0ea9f1L,0x0ee15d2de26cc1eaL, + 0xd848762cf0c9ef7dL,0x1026e9c5d5341435L,0x8f5b73dcfdb16b31L }, + { 0x1f69bef2d2c75d95L,0x8d33d581be064ddaL,0x8c024c1257ed35e6L, + 0xf8d435f9c309c281L,0xfd295061d6960193L,0x66618d78e9e49541L } }, + /* 111 */ + { { 0x571cfd458ce382deL,0x175806eede900ddeL,0x6184996534aba3b5L, + 0xe899778ade7aec95L,0xe8f00f6eff4aa97fL,0xae971cb5010b0c6dL }, + { 0x1827eebc3af788f1L,0xd46229ffe413fe2dL,0x8a15455b4741c9b4L, + 0x5f02e690f8e424ebL,0x40a1202edae87712L,0x49b3bda264944f6dL } }, + /* 112 */ + { { 0xd63c6067035b2d69L,0xb507150d6bed91b0L,0x1f35f82f7afb39b2L, + 0xb9bd9c0116012b66L,0x00d97960ed0a5f50L,0xed7054512716f7c9L }, + { 0x1576eff4127abdb4L,0x6850d698f01e701cL,0x9fa7d7493fc87e2fL, + 0x0b6bcc6fb0ce3e48L,0xf4fbe1f5f7d8c1c0L,0xcf75230e02719cc6L } }, + /* 113 */ + { { 0x6761d6c2722d94edL,0xd1ec3f213718820eL,0x65a40b7025d0e7c6L, + 0xd67f830ebaf3cf31L,0x633b3807b93ea430L,0x17faa0ea0bc96c69L }, + { 0xe6bf3482df866b98L,0x205c1ee9a9db52d4L,0x51ef9bbdff9ab869L, + 0x3863dad175eeb985L,0xef216c3bd3cf442aL,0x3fb228e3f9c8e321L } }, + /* 114 */ + { { 0x94f9b70c0760ac07L,0xf3c9ccae9d79bf4dL,0x73cea084c5ffc83dL, + 0xef50f943dc49c38eL,0xf467a2aebc9e7330L,0x5ee534b644ea7fbaL }, + { 0x20cb627203609e7fL,0x0984435562fdc9f0L,0xaf5c8e580f1457f7L, + 0xd1f50a6cb4b25941L,0x77cb247c2ec82395L,0xa5f3e1e5da3dca33L } }, + /* 115 */ + { { 0x023489d67d85fa94L,0x0ba405372db9ce47L,0x0fdf7a1faed7aad1L, + 0xa57b0d739a4ccb40L,0x48fcec995b18967cL,0xf30b5b6eb7274d24L }, + { 0x7ccb4773c81c5338L,0xb85639e6a3ed6bd0L,0x7d9df95f1d56eadaL, + 0xe256d57f0a1607adL,0x6da7ffdc957574d6L,0x65f8404601c7a8c4L } }, + /* 116 */ + { { 0x8d45d0cbcba1e7f1L,0xef0a08c002b55f64L,0x771ca31b17e19892L, + 0xe1843ecb4885907eL,0x67797ebc364ce16aL,0x816d2b2d8df4b338L }, + { 0xe870b0e539aa8671L,0x9f0db3e4c102b5f5L,0x342966591720c697L, + 0x0ad4c89e613c0d2aL,0x1af900b2418ddd61L,0xe087ca72d336e20eL } }, + /* 117 */ + { { 0x222831ffaba10079L,0x0dc5f87b6d64fff2L,0x445479073e8cb330L, + 0xe815aaa2702a33fbL,0x338d6b2e5fba3215L,0x0f7535cb79f549c8L }, + { 0x471ecd972ee95923L,0x1e868b37c6d1c09fL,0x2bc7b8ecc666ef4eL, + 0xf5416589808a4bfcL,0xf23e9ee23fbc4d2eL,0x4357236c2d75125bL } }, + /* 118 */ + { { 0xfe176d95ba9cdb1bL,0x45a1ca012f82791eL,0x97654af24de4cca2L, + 0xbdbf9d0e5cc4bcb9L,0xf6a7df50ad97ac0aL,0xc52112b061359fd6L }, + { 0x696d9ce34f05eae3L,0x903adc02e943ac2bL,0xa90753470848be17L, + 0x1e20f1702a3973e5L,0xe1aacc1c6feb67e9L,0x2ca0ac32e16bc6b9L } }, + /* 119 */ + { { 0xffea12e4ef871eb5L,0x94c2f25da8bf0a7aL,0x4d1e4c2a78134eaaL, + 0x11ed16fb0360fb10L,0x4029b6db85fc11beL,0x5e9f7ab7f4d390faL }, + { 0x5076d72f30646612L,0xa0afed1ddda1d0d8L,0x2902225785a1d103L, + 0xcb499e174e276bcdL,0x16d1da7151246c3dL,0xc72d56d3589a0443L } }, + /* 120 */ + { { 0xdf5ffc74dae5bb45L,0x99068c4a261bd6dcL,0xdc0afa7aaa98ec7bL, + 0xedd2ee00f121e96dL,0x163cc7be1414045cL,0xb0b1bbce335af50eL }, + { 0xd440d78501a06293L,0xcdebab7c6552e644L,0x48cb8dbc8c757e46L, + 0x81f9cf783cabe3cbL,0xddd02611b123f59aL,0x3dc7b88eeeb3784dL } }, + /* 121 */ + { { 0xe1b8d398c4741456L,0xa9dfa9026032a121L,0x1cbfc86d1263245bL, + 0xf411c7625244718cL,0x96521d5405b0fc54L,0x1afab46edbaa4985L }, + { 0xa75902ba8674b4adL,0x486b43ad5ad87d12L,0x72b1c73636e0d099L, + 0x39890e07bb6cd6d6L,0x8128999c59bace4eL,0xd8da430b7b535e33L } }, + /* 122 */ + { { 0x39f65642c6b75791L,0x050947a621806bfbL,0x0ca3e3701362ef84L, + 0x9bc60aed8c3d2391L,0x9b488671732e1ddcL,0x12d10d9ea98ee077L }, + { 0xb6f2822d3651b7dcL,0x6345a5ba80abd138L,0x62033262472d3c84L, + 0xd54a1d40acc57527L,0x6ea46b3a424447cbL,0x5bc410572fb1a496L } }, + /* 123 */ + { { 0xe70c57a3a751cd0eL,0x190d8419eba3c7d6L,0xb1c3bee79d47d55aL, + 0xda941266f912c6d8L,0x12e9aacc407a6ad6L,0xd6ce5f116e838911L }, + { 0x063ca97b70e1f2ceL,0xa3e47c728213d434L,0xa016e24184df810aL, + 0x688ad7b0dfd881a4L,0xa37d99fca89bf0adL,0xd8e3f339a23c2d23L } }, + /* 124 */ + { { 0xbdf53163750bed6fL,0x808abc3283e68b0aL,0x85a366275bb08a33L, + 0xf72a3a0f6b0e4abeL,0xf7716d19faf0c6adL,0x22dcc0205379b25fL }, + { 0x7400bf8df9a56e11L,0x6cb8bad756a47f21L,0x7c97176f7a6eb644L, + 0xe8fd84f7d1f5b646L,0x98320a9444ddb054L,0x07071ba31dde86f5L } }, + /* 125 */ + { { 0x6fdfa0e598f8fcb9L,0x89cec8e094d0d70cL,0xa0899397106d20a8L, + 0x915bfb9aba8acc9cL,0x1370c94b5507e01cL,0x83246a608a821ffbL }, + { 0xa8273a9fbe3c378fL,0x7e54478935a25be9L,0x6cfa49724dd929d7L, + 0x987fed9d365bd878L,0x4982ac945c29a7aeL,0x4589a5d75ddd7ec5L } }, + /* 126 */ + { { 0x9fabb174a95540a9L,0x7cfb886f0162c5b0L,0x17be766bea3dee18L, + 0xff7da41fe88e624cL,0xad0b71eb8b919c38L,0x86a522e0f31ff9a9L }, + { 0xbc8e6f72868bc259L,0x6130c6383ccef9e4L,0x09f1f4549a466555L, + 0x8e6c0f0919b2bfb4L,0x945c46c90ca7bb22L,0xacd871684dafb67bL } }, + /* 127 */ + { { 0x090c72ca10c53841L,0xc20ae01b55a4fcedL,0x03f7ebd5e10234adL, + 0xb3f42a6a85892064L,0xbdbc30c0b4a14722L,0x971bc4378ca124ccL }, + { 0x6f79f46d517ff2ffL,0x6a9c96e2ecba947bL,0x5e79f2f462925122L, + 0x30a96bb16a4e91f1L,0x1147c9232d4c72daL,0x65bc311f5811e4dfL } }, + /* 128 */ + { { 0x87c7dd7d139b3239L,0x8b57824e4d833baeL,0xbcbc48789fff0015L, + 0x8ffcef8b909eaf1aL,0x9905f4eef1443a78L,0x020dd4a2e15cbfedL }, + { 0xca2969eca306d695L,0xdf940cadb93caf60L,0x67f7fab787ea6e39L, + 0x0d0ee10ff98c4fe5L,0xc646879ac19cb91eL,0x4b4ea50c7d1d7ab4L } }, + /* 129 */ + { { 0x19e409457a0db57eL,0xe6017cad9a8c9702L,0xdbf739e51be5cff9L, + 0x3646b3cda7a938a2L,0x0451108568350dfcL,0xad3bd6f356e098b5L }, + { 0x935ebabfee2e3e3eL,0xfbd01702473926cbL,0x7c735b029e9fb5aaL, + 0xc52a1b852e3feff0L,0x9199abd3046b405aL,0xe306fcec39039971L } }, + /* 130 */ + { { 0xd6d9aec823e4712cL,0x7ca8376cc3c198eeL,0xe6d8318731bebd8aL, + 0xed57aff3d88bfef3L,0x72a645eecf44edc7L,0xd4e63d0b5cbb1517L }, + { 0x98ce7a1cceee0ecfL,0x8f0126335383ee8eL,0x3b879078a6b455e8L, + 0xcbcd3d96c7658c06L,0x721d6fe70783336aL,0xf21a72635a677136L } }, + /* 131 */ + { { 0x19d8b3cd9586ba11L,0xd9e0aeb28a5c0480L,0xe4261dbf2230ef5cL, + 0x095a9dee02e6bf09L,0x8963723c80dc7784L,0x5c97dbaf145157b1L }, + { 0x97e744344bc4503eL,0x0fb1cb3185a6b370L,0x3e8df2becd205d4bL, + 0x497dd1bcf8f765daL,0x92ef95c76c988a1aL,0x3f924baa64dc4cfaL } }, + /* 132 */ + { { 0x6bf1b8dd7268b448L,0xd4c28ba1efd79b94L,0x2fa1f8c8e4e3551fL, + 0x769e3ad45c9187a9L,0x28843b4d40326c0dL,0xfefc809450d5d669L }, + { 0x30c85bfd90339366L,0x4eeb56f15ccf6c3aL,0x0e72b14928ccd1dcL, + 0x73ee85b5f2ce978eL,0xcdeb2bf33165bb23L,0x8106c9234e410abfL } }, + /* 133 */ + { { 0xc8df01617d02f4eeL,0x8a78154718e21225L,0x4ea895eb6acf9e40L, + 0x8b000cb56e5a633dL,0xf31d86d57e981ffbL,0xf5c8029c4475bc32L }, + { 0x764561ce1b568973L,0x2f809b81a62996ecL,0x9e513d64da085408L, + 0xc27d815de61ce309L,0x0da6ff99272999e0L,0xbd284779fead73f7L } }, + /* 134 */ + { { 0x6033c2f99b1cdf2bL,0x2a99cf06bc5fa151L,0x7d27d25912177b3bL, + 0xb1f15273c4485483L,0x5fd57d81102e2297L,0x3d43e017c7f6acb7L }, + { 0x41a8bb0b3a70eb28L,0x67de2d8e3e80b06bL,0x09245a4170c28de5L, + 0xad7dbcb1a7b26023L,0x70b08a352cbc6c1eL,0xb504fb669b33041fL } }, + /* 135 */ + { { 0xa8e85ab5f97a27c2L,0x6ac5ec8bc10a011bL,0x55745533ffbcf161L, + 0x01780e8565790a60L,0xe451bf8599ee75b0L,0x8907a63b39c29881L }, + { 0x76d46738260189edL,0x284a443647bd35cbL,0xd74e8c4020cab61eL, + 0x6264bf8c416cf20aL,0xfa5a6c955fd820ceL,0xfa7154d0f24bb5fcL } }, + /* 136 */ + { { 0x18482cec9b3f5034L,0x962d445acd9e68fdL,0x266fb1d695746f23L, + 0xc66ade5a58c94a4bL,0xdbbda826ed68a5b6L,0x05664a4d7ab0d6aeL }, + { 0xbcd4fe51025e32fcL,0x61a5aebfa96df252L,0xd88a07e231592a31L, + 0x5d9d94de98905517L,0x96bb40105fd440e7L,0x1b0c47a2e807db4cL } }, + /* 137 */ + { { 0x5c2a6ac808223878L,0xba08c269e65a5558L,0xd22b1b9b9bbc27fdL, + 0x919171bf72b9607dL,0x9ab455f9e588dc58L,0x6d54916e23662d93L }, + { 0x8da8e9383b1de0c1L,0xa84d186a804f278fL,0xbf4988ccd3461695L, + 0xf5eae3bee10eb0cbL,0x1ff8b68fbf2a66edL,0xa68daf67c305b570L } }, + /* 138 */ + { { 0xc1004cff44b2e045L,0x91b5e1364b1c05d4L,0x53ae409088a48a07L, + 0x73fb2995ea11bb1aL,0x320485703d93a4eaL,0xcce45de83bfc8a5fL }, + { 0xaff4a97ec2b3106eL,0x9069c630b6848b4fL,0xeda837a6ed76241cL, + 0x8a0daf136cc3f6cfL,0x199d049d3da018a8L,0xf867c6b1d9093ba3L } }, + /* 139 */ + { { 0xe4d42a5656527296L,0xae26c73dce71178dL,0x70a0adac6c251664L, + 0x813483ae5dc0ae1dL,0x7574eacddaab2dafL,0xc56b52dcc2d55f4fL }, + { 0x872bc16795f32923L,0x4be175815bdd2a89L,0x9b57f1e7a7699f00L, + 0x5fcd9c723ac2de02L,0x83af3ba192377739L,0xa64d4e2bfc50b97fL } }, + /* 140 */ + { { 0x2172dae20e552b40L,0x62f49725d34d52e8L,0x7930ee4007958f98L, + 0x56da2a90751fdd74L,0xf1192834f53e48c3L,0x34d2ac268e53c343L }, + { 0x1073c21813111286L,0x201dac14da9d9827L,0xec2c29dbee95d378L, + 0x9316f1191f3ee0b1L,0x7890c9f0544ce71cL,0xd77138af27612127L } }, + /* 141 */ + { { 0x78045e6d3b4ad1cdL,0xcd86b94e4aa49bc1L,0x57e51f1dfd677a16L, + 0xd9290935fa613697L,0x7a3f959334f4d893L,0x8c9c248b5d5fcf9bL }, + { 0x9f23a4826f70d4e9L,0x1727345463190ae9L,0x4bdd7c135b081a48L, + 0x1e2de38928d65271L,0x0bbaaa25e5841d1fL,0xc4c18a79746772e5L } }, + /* 142 */ + { { 0x10ee2681593375acL,0x4f3288be7dd5e113L,0x9a97b2fb240f3538L, + 0xfa11089f1de6b1e2L,0x516da5621351bc58L,0x573b61192dfa85b5L }, + { 0x89e966836cba7df5L,0xf299be158c28ab40L,0xe91c9348ad43fcbfL, + 0xe9bbc7cc9a1cefb3L,0xc8add876738b2775L,0x6e3b1f2e775eaa01L } }, + /* 143 */ + { { 0x0365a888b677788bL,0x634ae8c43fd6173cL,0x304987619e498dbeL, + 0x08c43e6dc8f779abL,0x068ae3844c09aca9L,0x2380c70b2018d170L }, + { 0xcf77fbc3a297c5ecL,0xdacbc853ca457948L,0x3690de04336bec7eL, + 0x26bbac6414eec461L,0xd1c23c7e1f713abfL,0xf08bbfcde6fd569eL } }, + /* 144 */ + { { 0x5f8163f484770ee3L,0x0e0c7f94744a1706L,0x9c8f05f7e1b2d46dL, + 0x417eafe7d01fd99aL,0x2ba15df511440e5bL,0xdc5c552a91a6fbcfL }, + { 0x86271d74a270f721L,0x32c0a075a004485bL,0x9d1a87e38defa075L, + 0xb590a7acbf0d20feL,0x430c41c28feda1f5L,0x454d287958f6ec24L } }, + /* 145 */ + { { 0x52b7a6357c525435L,0x3d9ef57f37c4bdbcL,0x2bb93e9edffcc475L, + 0xf7b8ba987710f3beL,0x42ee86da21b727deL,0x55ac3f192e490d01L }, + { 0x487e3a6ec0c1c390L,0x036fb345446cde7bL,0x089eb276496ae951L, + 0xedfed4d971ed1234L,0x661b0dd5900f0b46L,0x11bd6f1b8582f0d3L } }, + /* 146 */ + { { 0x5cf9350f076bc9d1L,0x15d903becf3cd2c3L,0x21cfc8c225af031cL, + 0xe0ad32488b1cc657L,0xdd9fb96370014e87L,0xf0f3a5a1297f1658L }, + { 0xbb908fbaf1f703aaL,0x2f9cc4202f6760baL,0x00ceec6666a38b51L, + 0x4deda33005d645daL,0xb9cf5c72f7de3394L,0xaeef65021ad4c906L } }, + /* 147 */ + { { 0x0583c8b17a19045dL,0xae7c3102d052824cL,0x2a234979ff6cfa58L, + 0xfe9dffc962c733c0L,0x3a7fa2509c0c4b09L,0x516437bb4fe21805L }, + { 0x9454e3d5c2a23ddbL,0x0726d887289c104eL,0x8977d9184fd15243L, + 0xc559e73f6d7790baL,0x8fd3e87d465af85fL,0xa2615c745feee46bL } }, + /* 148 */ + { { 0xc8d607a84335167dL,0x8b42d804e0f5c887L,0x5f9f13df398d11f9L, + 0x5aaa508720740c67L,0x83da9a6aa3d9234bL,0xbd3a5c4e2a54bad1L }, + { 0xdd13914c2db0f658L,0x29dcb66e5a3f373aL,0xbfd62df55245a72bL, + 0x19d1802391e40847L,0xd9df74dbb136b1aeL,0x72a06b6b3f93bc5bL } }, + /* 149 */ + { { 0x6da19ec3ad19d96fL,0xb342daa4fb2a4099L,0x0e61633a662271eaL, + 0x3bcece81ce8c054bL,0x7cc8e0618bd62dc6L,0xae189e19ee578d8bL }, + { 0x73e7a25ddced1eedL,0xc1257f0a7875d3abL,0x2cb2d5a21cfef026L, + 0xd98ef39bb1fdf61cL,0xcd8e6f6924e83e6cL,0xd71e7076c7b7088bL } }, + /* 150 */ + { { 0x339368309d4245bfL,0x22d962172ac2953bL,0xb3bf5a8256c3c3cdL, + 0x50c9be910d0699e8L,0xec0944638f366459L,0x6c056dba513b7c35L }, + { 0x687a6a83045ab0e3L,0x8d40b57f445c9295L,0x0f345048a16f5954L, + 0x64b5c6393d8f0a87L,0x106353a29f71c5e2L,0xdd58b475874f0dd4L } }, + /* 151 */ + { { 0x67ec084f62230c72L,0xf14f6cca481385e3L,0xf58bb4074cda7774L, + 0xe15011b1aa2dbb6bL,0xd488369d0c035ab1L,0xef83c24a8245f2fdL }, + { 0xfb57328f9fdc2538L,0x79808293191fe46aL,0xe28f5c4432ede548L, + 0x1b3cda99ea1a022cL,0x39e639b73df2ec7fL,0x77b6272b760e9a18L } }, + /* 152 */ + { { 0x2b1d51bda65d56d5L,0x3a9b71f97ea696e0L,0x95250ecc9904f4c4L, + 0x8bc4d6ebe75774b7L,0x0e343f8aeaeeb9aaL,0xc473c1d1930e04cbL }, + { 0x282321b1064cd8aeL,0xf4b4371e5562221cL,0xc1cc81ecd1bf1221L, + 0xa52a07a9e2c8082fL,0x350d8e59ba64a958L,0x29e4f3de6fb32c9aL } }, + /* 153 */ + { { 0x0aa9d56cba89aaa5L,0xf0208ac0c4c6059eL,0x7400d9c6bd6ddca4L, + 0xb384e475f2c2f74aL,0x4c1061fcb1562dd3L,0x3924e2482e153b8dL }, + { 0xf38b8d98849808abL,0x29bf3260a491aa36L,0x85159ada88220edeL, + 0x8b47915bbe5bc422L,0xa934d72ed7300967L,0xc4f303982e515d0dL } }, + /* 154 */ + { { 0xe3e9ee421b1de38bL,0xa124e25a42636760L,0x90bf73c090165b1aL, + 0x21802a34146434c5L,0x54aa83f22e1fa109L,0x1d4bd03ced9c51e9L }, + { 0xc2d96a38798751e6L,0xed27235f8c3507f5L,0xb5fb80e2c8c24f88L, + 0xf873eefad37f4f78L,0x7229fd74f224ba96L,0x9dcd91999edd7149L } }, + /* 155 */ + { { 0xee9f81a64e94f22aL,0xe5609892f71ec341L,0x6c818ddda998284eL, + 0x9fd472953b54b098L,0x47a6ac030e8a7cc9L,0xde684e5eb207a382L }, + { 0x4bdd1ecd2b6b956bL,0x09084414f01b3583L,0xe2f80b3255233b14L, + 0x5a0fec54ef5ebc5eL,0x74cf25e6bf8b29a2L,0x1c757fa07f29e014L } }, + /* 156 */ + { { 0x1bcb5c4aeb0fdfe4L,0xd7c649b3f0899367L,0xaef68e3f05bc083bL, + 0x57a06e46a78aa607L,0xa2136ecc21223a44L,0x89bd648452f5a50bL }, + { 0x724411b94455f15aL,0x23dfa97008a9c0fdL,0x7b0da4d16db63befL, + 0x6f8a7ec1fb162443L,0xc1ac9ceee98284fbL,0x085a582b33566022L } }, + /* 157 */ + { { 0x15cb61f9ec1f138aL,0x11c9a230668f0c28L,0xac829729df93f38fL, + 0xcef256984048848dL,0x3f686da02bba8fbfL,0xed5fea78111c619aL }, + { 0x9b4f73bcd6d1c833L,0x5095160686e7bf80L,0xa2a73508042b1d51L, + 0x9ef6ea495fb89ec2L,0xf1008ce95ef8b892L,0x78a7e6849ae8568bL } }, + /* 158 */ + { { 0x3fe83a7c10470cd8L,0x92734682f86df000L,0xb5dac06bda9409b5L, + 0x1e7a966094939c5fL,0xdec6c1505cc116dcL,0x1a52b40866bac8ccL }, + { 0x5303a3656e864045L,0x45eae72a9139efc1L,0x83bec6466f31d54fL, + 0x2fb4a86f6e958a6dL,0x6760718e4ff44030L,0x008117e3e91ae0dfL } }, + /* 159 */ + { { 0x5d5833ba384310a2L,0xbdfb4edc1fd6c9fcL,0xb9a4f102849c4fb8L, + 0xe5fb239a581c1e1fL,0xba44b2e7d0a9746dL,0x78f7b7683bd942b9L }, + { 0x076c8ca1c87607aeL,0x82b23c2ed5caaa7eL,0x6a581f392763e461L, + 0xca8a5e4a3886df11L,0xc87e90cf264e7f22L,0x04f74870215cfcfcL } }, + /* 160 */ + { { 0x5285d116141d161cL,0x67cd2e0e93c4ed17L,0x12c62a647c36187eL, + 0xf5329539ed2584caL,0xc4c777c442fbbd69L,0x107de7761bdfc50aL }, + { 0x9976dcc5e96beebdL,0xbe2aff95a865a151L,0x0e0a9da19d8872afL, + 0x5e357a3da63c17ccL,0xd31fdfd8e15cc67cL,0xc44bbefd7970c6d8L } }, + /* 161 */ + { { 0x703f83e24c0c62f1L,0x9b1e28ee4e195572L,0x6a82858bfe26ccedL, + 0xd381c84bc43638faL,0x94f72867a5ba43d8L,0x3b4a783d10b82743L }, + { 0xee1ad7b57576451eL,0xc3d0b59714b6b5c8L,0x3dc30954fcacc1b8L, + 0x55df110e472c9d7bL,0x97c86ed702f8a328L,0xd043341388dc098fL } }, + /* 162 */ + { { 0x1a60d1522ca8f2feL,0x61640948491bd41fL,0x6dae29a558dfe035L, + 0x9a615bea278e4863L,0xbbdb44779ad7c8e5L,0x1c7066302ceac2fcL }, + { 0x5e2b54c699699b4bL,0xb509ca6d239e17e8L,0x728165feea063a82L, + 0x6b5e609db6a22e02L,0x12813905b26ee1dfL,0x07b9f722439491faL } }, + /* 163 */ + { { 0x1592ec1448ff4e49L,0x3e4e9f176d644129L,0x7acf82881156acc0L, + 0x5aa34ba8bb092b0bL,0xcd0f90227d38393dL,0x416724ddea4f8187L }, + { 0x3c4e641cc0139e73L,0xe0fe46cf91e4d87dL,0xedb3c792cab61f8aL, + 0x4cb46de4d3868753L,0xe449c21d20f1098aL,0x5e5fd059f5b8ea6eL } }, + /* 164 */ + { { 0x7fcadd4675856031L,0x89c7a4cdeaf2fbd0L,0x1af523ce7a87c480L, + 0xe5fc109561d9ae90L,0x3fb5864fbcdb95f5L,0xbeb5188ebb5b2c7dL }, + { 0x3d1563c33ae65825L,0x116854c40e57d641L,0x11f73d341942ebd3L, + 0x24dc5904c06955b3L,0x8a0d4c83995a0a62L,0xfb26b86d5d577b7dL } }, + /* 165 */ + { { 0xc53108e7c686ae17L,0x9090d739d1c1da56L,0x4583b0139aec50aeL, + 0xdd9a088ba49a6ab2L,0x28192eeaf382f850L,0xcc8df756f5fe910eL }, + { 0x877823a39cab7630L,0x64984a9afb8e7fc1L,0x5448ef9c364bfc16L, + 0xbbb4f871c44e2a9aL,0x901a41ab435c95e9L,0xc6c23e5faaa50a06L } }, + /* 166 */ + { { 0xb78016c19034d8ddL,0x856bb44b0b13e79bL,0x85c6409ab3241a05L, + 0x8d2fe19a2d78ed21L,0xdcc7c26d726eddf2L,0x3ccaff5f25104f04L }, + { 0x397d7edc6b21f843L,0xda88e4dde975de4cL,0x5273d3964f5ab69eL, + 0x537680e39aae6cc0L,0xf749cce53e6f9461L,0x021ddbd9957bffd3L } }, + /* 167 */ + { { 0x7b64585f777233cfL,0xfe6771f60942a6f0L,0x636aba7adfe6eef0L, + 0x63bbeb5686038029L,0xacee5842de8fcf36L,0x48d9aa99d4a20524L }, + { 0xcff7a74c0da5e57aL,0xc232593ce549d6c9L,0x68504bccf0f2287bL, + 0x6d7d098dbc8360b5L,0xeac5f1495b402f41L,0x61936f11b87d1bf1L } }, + /* 168 */ + { { 0xaa9da167b8153a9dL,0xa49fe3ac9e83ecf0L,0x14c18f8e1b661384L, + 0x61c24dab38434de1L,0x3d973c3a283dae96L,0xc99baa0182754fc9L }, + { 0x477d198f4c26b1e3L,0x12e8e186a7516202L,0x386e52f6362addfaL, + 0x31e8f695c3962853L,0xdec2af136aaedb60L,0xfcfdb4c629cf74acL } }, + /* 169 */ + { { 0x6b3ee958cca40298L,0xc3878153f2f5d195L,0x0c565630ed2eae5bL, + 0xd089b37e3a697cf2L,0xc2ed2ac7ad5029eaL,0x7e5cdfad0f0dda6aL }, + { 0xf98426dfd9b86202L,0xed1960b14335e054L,0x1fdb02463f14639eL, + 0x17f709c30db6c670L,0xbfc687ae773421e1L,0x13fefc4a26c1a8acL } }, + /* 170 */ + { { 0xe361a1987ffa0a5fL,0xf4b26102c63fe109L,0x264acbc56c74e111L, + 0x4af445fa77abebafL,0x448c4fdd24cddb75L,0x0b13157d44506eeaL }, + { 0x22a6b15972e9993dL,0x2c3c57e485e5ecbeL,0xa673560bfd83e1a1L, + 0x6be23f82c3b8c83bL,0x40b13a9640bbe38eL,0x66eea033ad17399bL } }, + /* 171 */ + { { 0x49fc6e95b4c6c693L,0xefc735de36af7d38L,0xe053343d35fe42fcL, + 0xf0aa427c6a9ab7c3L,0xc79f04364a0fcb24L,0x1628724393ebbc50L }, + { 0x5c3d6bd016927e1eL,0x40158ed2673b984cL,0xa7f86fc84cd48b9aL, + 0x1643eda660ea282dL,0x45b393eae2a1beedL,0x664c839e19571a94L } }, + /* 172 */ + { { 0x5774575027eeaf94L,0x2875c925ea99e1e7L,0xc127e7ba5086adeaL, + 0x765252a086fe424fL,0x1143cc6c2b6c0281L,0xc9bb2989d671312dL }, + { 0x880c337c51acb0a5L,0xa3710915d3c60f78L,0x496113c09262b6edL, + 0x5d25d9f89ce48182L,0x53b6ad72b3813586L,0x0ea3bebc4c0e159cL } }, + /* 173 */ + { { 0xcaba450ac5e49beaL,0x684e54157c05da59L,0xa2e9cab9de7ac36cL, + 0x4ca79b5f2e6f957bL,0xef7b024709b817b1L,0xeb3049907d89df0fL }, + { 0x508f730746fe5096L,0x695810e82e04eaafL,0x88ef1bd93512f76cL, + 0x776613513ebca06bL,0xf7d4863accf158b7L,0xb2a81e4494ee57daL } }, + /* 174 */ + { { 0xff288e5b6d53e6baL,0xa90de1a914484ea2L,0x2fadb60ced33c8ecL, + 0x579d6ef328b66a40L,0x4f2dd6ddec24372dL,0xe9e33fc91d66ec7dL }, + { 0x110899d2039eab6eL,0xa31a667a3e97bb5eL,0x6200166dcfdce68eL, + 0xbe83ebae5137d54bL,0x085f7d874800acdfL,0xcf4ab1330c6f8c86L } }, + /* 175 */ + { { 0x03f65845931e08fbL,0x6438551e1506e2c0L,0x5791f0dc9c36961fL, + 0x68107b29e3dcc916L,0x83242374f495d2caL,0xd8cfb6636ee5895bL }, + { 0x525e0f16a0349b1bL,0x33cd2c6c4a0fab86L,0x46c12ee82af8dda9L, + 0x7cc424ba71e97ad3L,0x69766ddf37621eb0L,0x95565f56a5f0d390L } }, + /* 176 */ + { { 0xe0e7bbf21a0f5e94L,0xf771e1151d82d327L,0x10033e3dceb111faL, + 0xd269744dd3426638L,0xbdf2d9da00d01ef6L,0x1cb80c71a049ceafL }, + { 0x17f183289e21c677L,0x6452af0519c8f98bL,0x35b9c5f780b67997L, + 0x5c2e1cbe40f8f3d4L,0x43f9165666d667caL,0x9faaa059cf9d6e79L } }, + /* 177 */ + { { 0x8ad246180a078fe6L,0xf6cc73e6464fd1ddL,0x4d2ce34dc3e37448L, + 0x624950c5e3271b5fL,0x62910f5eefc5af72L,0x8b585bf8aa132bc6L }, + { 0x11723985a839327fL,0x34e2d27d4aac252fL,0x402f59ef6296cc4eL, + 0x00ae055c47053de9L,0xfc22a97228b4f09bL,0xa9e86264fa0c180eL } }, + /* 178 */ + { { 0x0b7b6224bc310eccL,0x8a1a74f167fa14edL,0x87dd09607214395cL, + 0xdf1b3d09f5c91128L,0x39ff23c686b264a8L,0xdc2d49d03e58d4c5L }, + { 0x2152b7d3a9d6f501L,0xf4c32e24c04094f7L,0xc6366596d938990fL, + 0x084d078f94fb207fL,0xfd99f1d7328594cbL,0x36defa64cb2d96b3L } }, + /* 179 */ + { { 0x4619b78113ed7cbeL,0x95e500159784bd0eL,0x2a32251c2c7705feL, + 0xa376af995f0dd083L,0x55425c6c0361a45bL,0x812d2cef1f291e7bL }, + { 0xccf581a05fd94972L,0x26e20e39e56dc383L,0x0093685d63dbfbf0L, + 0x1fc164cc36b8c575L,0xb9c5ab81390ef5e7L,0x40086beb26908c66L } }, + /* 180 */ + { { 0xe5e54f7937e3c115L,0x69b8ee8cc1445a8aL,0x79aedff2b7659709L, + 0xe288e1631b46fbe6L,0xdb4844f0d18d7bb7L,0xe0ea23d048aa6424L }, + { 0x714c0e4ef3d80a73L,0x87a0aa9e3bd64f98L,0x8844b8a82ec63080L, + 0xe0ac9c30255d81a3L,0x86151237455397fcL,0x0b9794642f820155L } }, + /* 181 */ + { { 0x127a255a4ae03080L,0x232306b4580a89fbL,0x04e8cd6a6416f539L, + 0xaeb70dee13b02a0eL,0xa3038cf84c09684aL,0xa710ec3c28e433eeL }, + { 0x77a72567681b1f7dL,0x86fbce952fc28170L,0xd3408683f5735ac8L, + 0x3a324e2a6bd68e93L,0x7ec74353c027d155L,0xab60354cd4427177L } }, + /* 182 */ + { { 0x32a5342aef4c209dL,0x2ba7527408d62704L,0x4bb4af6fc825d5feL, + 0x1c3919ced28e7ff1L,0x1dfc2fdcde0340f6L,0xc6580baf29f33ba9L }, + { 0xae121e7541d442cbL,0x4c7727fd3a4724e4L,0xe556d6a4524f3474L, + 0x87e13cc7785642a2L,0x182efbb1a17845fdL,0xdcec0cf14e144857L } }, + /* 183 */ + { { 0x1cb89541e9539819L,0xc8cb3b4f9d94dbf1L,0x1d353f63417da578L, + 0xb7a697fb8053a09eL,0x8d841731c35d8b78L,0x85748d6fb656a7a9L }, + { 0x1fd03947c1859c5dL,0x6ce965c1535d22a2L,0x1966a13e0ca3aadcL, + 0x9802e41d4fb14effL,0xa9048cbb76dd3fcdL,0x89b182b5e9455bbaL } }, + /* 184 */ + { { 0xd777ad6a43360710L,0x841287ef55e9936bL,0xbaf5c67004a21b24L, + 0xf2c0725f35ad86f1L,0x338fa650c707e72eL,0x2bf8ed2ed8883e52L }, + { 0xb0212cf4b56e0d6aL,0x50537e126843290cL,0xd8b184a198b3dc6fL, + 0xd2be9a350210b722L,0x407406db559781eeL,0x5a78d5910bc18534L } }, + /* 185 */ + { { 0x4d57aa2ad748b02cL,0xbe5b3451a12b3b95L,0xadca7a4564711258L, + 0x597e091a322153dbL,0xf327100632eb1eabL,0xbd9adcba2873f301L }, + { 0xd1dc79d138543f7fL,0x00022092921b1fefL,0x86db3ef51e5df8edL, + 0x888cae049e6b944aL,0x71bd29ec791a32b4L,0xd3516206a6d1c13eL } }, + /* 186 */ + { { 0x2ef6b95255924f43L,0xd2f401ae4f9de8d5L,0xfc73e8d7adc68042L, + 0x627ea70c0d9d1bb4L,0xc3bb3e3ebbf35679L,0x7e8a254ad882dee4L }, + { 0x08906f50b5924407L,0xf14a0e61a1ad444aL,0xaa0efa2165f3738eL, + 0xd60c7dd6ae71f161L,0x9e8390faf175894dL,0xd115cd20149f4c00L } }, + /* 187 */ + { { 0x2f2e2c1da52abf77L,0xc2a0dca554232568L,0xed423ea254966dccL, + 0xe48c93c7cd0dd039L,0x1e54a225176405c7L,0x1efb5b1670d58f2eL }, + { 0xa751f9d994fb1471L,0xfdb31e1f67d2941dL,0xa6c74eb253733698L, + 0xd3155d1189a0f64aL,0x4414cfe4a4b8d2b6L,0x8d5a4be8f7a8e9e3L } }, + /* 188 */ + { { 0x5c96b4d452669e98L,0x4547f9228fd42a03L,0xcf5c1319d285174eL, + 0x805cd1ae064bffa0L,0x50e8bc4f246d27e7L,0xf89ef98fd5781e11L }, + { 0xb4ff95f6dee0b63fL,0xad850047222663a4L,0x026918604d23ce9cL, + 0x3e5309ce50019f59L,0x27e6f72269a508aeL,0xe9376652267ba52cL } }, + /* 189 */ + { { 0xa04d289cc0368708L,0xc458872f5e306e1dL,0x76fa23de33112feaL, + 0x718e39746efde42eL,0xf0c98cdc1d206091L,0x5fa3ca6214a71987L }, + { 0xeee8188bdcaa9f2aL,0x312cc732589a860dL,0xf9808dd6c63aeb1fL, + 0x70fd43db4ea62b53L,0x2c2bfe34890b6e97L,0x105f863cfa426aa6L } }, + /* 190 */ + { { 0x0b29795db38059adL,0x5686b77e90647ea0L,0xeff0470edb473a3eL, + 0x278d2340f9b6d1e2L,0xebbff95bbd594ec7L,0xf4b72334d3a7f23dL }, + { 0x2a285980a5a83f0bL,0x0786c41a9716a8b3L,0x138901bd22511812L, + 0xd1b55221e2fede6eL,0x0806e264df4eb590L,0x6c4c897e762e462eL } }, + /* 191 */ + { { 0xd10b905fb4b41d9dL,0x826ca4664523a65bL,0x535bbd13b699fa37L, + 0x5b9933d773bc8f90L,0x9332d61fcd2118adL,0x158c693ed4a65fd0L }, + { 0x4ddfb2a8e6806e63L,0xe31ed3ecb5de651bL,0xf9460e51819bc69aL, + 0x6229c0d62c76b1f8L,0xbb78f231901970a3L,0x31f3820f9cee72b8L } }, + /* 192 */ + { { 0xe931caf2c09e1c72L,0x0715f29812990cf4L,0x33aad81d943262d8L, + 0x5d292b7a73048d3fL,0xb152aaa4dc7415f6L,0xc3d10fd90fd19587L }, + { 0xf76b35c575ddadd0L,0x9f5f4a511e7b694cL,0x2f1ab7ebc0663025L, + 0x01c9cc87920260b0L,0xc4b1f61a05d39da6L,0x6dcd76c4eb4a9c4eL } }, + /* 193 */ + { { 0x0ba0916ffdc83f01L,0x354c8b449553e4f9L,0xa6cc511affc5e622L, + 0xb954726ae95be787L,0xcb04811575b41a62L,0xfa2ae6cdebfde989L }, + { 0x6376bbc70f24659aL,0x13a999fd4c289c43L,0xc7134184ec9abd8bL, + 0x28c02bf6a789ab04L,0xff841ebcd3e526ecL,0x442b191e640893a8L } }, + /* 194 */ + { { 0x4cac6c62fa2b6e20L,0x97f29e9bf6d69861L,0x228ab1dbbc96d12dL, + 0x6eb913275e8e108dL,0xd4b3d4d140771245L,0x61b20623ca8a803aL }, + { 0x2c2f3b41a6a560b1L,0x879e1d403859fcf4L,0x7cdb5145024dbfc3L, + 0x55d08f153bfa5315L,0x2f57d773aa93823aL,0xa97f259cc6a2c9a2L } }, + /* 195 */ + { { 0xc306317be58edbbbL,0x25ade51c79dfdf13L,0x6b5beaf116d83dd6L, + 0xe8038a441dd8f925L,0x7f00143cb2a87b6bL,0xa885d00df5b438deL }, + { 0xe9f76790cf9e48bdL,0xf0bdf9f0a5162768L,0x0436709fad7b57cbL, + 0x7e151c12f7c15db7L,0x3514f0225d90ee3bL,0x2e84e8032c361a8dL } }, + /* 196 */ + { { 0x2277607d563ec8d8L,0xa661811fe3934cb7L,0x3ca72e7af58fd5deL, + 0x7989da0462294c6aL,0x88b3708bf6bbefe9L,0x0d524cf753ed7c82L }, + { 0x69f699ca2f30c073L,0xf0fa264b9dc1dcf3L,0x44ca456805f0aaf6L, + 0x0f5b23c7d19b9bafL,0x39193f41eabd1107L,0x9e3e10ad2a7c9b83L } }, + /* 197 */ + { { 0xa90824f0d4ae972fL,0x43eef02bc6e846e7L,0x7e46061229d2160aL, + 0x29a178acfe604e91L,0x23056f044eb184b2L,0x4fcad55feb54cdf4L }, + { 0xa0ff96f3ae728d15L,0x8a2680c6c6a00331L,0x5f84cae07ee52556L, + 0x5e462c3ac5a65dadL,0x5d2b81dfe2d23f4fL,0x6e47301bc5b1eb07L } }, + /* 198 */ + { { 0x77411d68af8219b9L,0xcb883ce651b1907aL,0x25c87e57101383b5L, + 0x9c7d9859982f970dL,0xaa6abca5118305d2L,0x725fed2f9013a5dbL }, + { 0x487cdbafababd109L,0xc0f8cf5687586528L,0xa02591e68ad58254L, + 0xc071b1d1debbd526L,0x927dfe8b961e7e31L,0x55f895f99263dfe1L } }, + /* 199 */ + { { 0xf899b00db175645bL,0x51f3a627b65b4b92L,0xa2f3ac8db67399efL, + 0xe717867fe400bc20L,0x42cc90201967b952L,0x3d5967513ecd1de1L }, + { 0xd41ebcdedb979775L,0x99ba61bc6a2e7e88L,0x039149a5321504f2L, + 0xe7dc231427ba2fadL,0x9f556308b57d8368L,0x2b6d16c957da80a7L } }, + /* 200 */ + { { 0x84af5e76279ad982L,0x9bb4c92d9c8b81a6L,0xd79ad44e0e698e67L, + 0xe8be9048265fc167L,0xf135f7e60c3a4cccL,0xa0a10d38b8863a33L }, + { 0xe197247cd386efd9L,0x0eefd3f9b52346c2L,0xc22415f978607bc8L, + 0xa2a8f862508674ceL,0xa72ad09ec8c9d607L,0xcd9f0ede50fa764fL } }, + /* 201 */ + { { 0x063391c7d1a46d4dL,0x2df51c119eb01693L,0xc5849800849e83deL, + 0x48fd09aa8ad08382L,0xa405d873aa742736L,0xee49e61ee1f9600cL }, + { 0xd76676be48c76f73L,0xd9c100f601274b2aL,0x110bb67c83f8718dL, + 0xec85a42002fc0d73L,0xc0449e1e744656adL,0x28ce737637d9939bL } }, + /* 202 */ + { { 0x97e9af7244544ac7L,0xf2c658d5ba010426L,0x732dec39fb3adfbdL, + 0xd12faf91a2df0b07L,0x8ac267252171e208L,0xf820cdc85b24fa54L }, + { 0x307a6eea94f4cf77L,0x18c783d2944a33c6L,0x4b939d4c0b741ac5L, + 0x1d7acd153ffbb6e4L,0x06a248587a255e44L,0x14fbc494ce336d50L } }, + /* 203 */ + { { 0x9b920c0c51584e3cL,0xc7733c59f7e54027L,0xe24ce13988422bbeL, + 0x11ada812523bd6abL,0xde068800b88e6defL,0x7b872671fe8c582dL }, + { 0x4e746f287de53510L,0x492f8b99f7971968L,0x1ec80bc77d928ac2L, + 0xb3913e48432eb1b5L,0xad08486632028f6eL,0x122bb8358fc2f38bL } }, + /* 204 */ + { { 0x0a9f3b1e3b0b29c3L,0x837b64324fa44151L,0xb9905c9217b28ea7L, + 0xf39bc93798451750L,0xcd383c24ce8b6da1L,0x299f57db010620b2L }, + { 0x7b6ac39658afdce3L,0xa15206b33d05ef47L,0xa0ae37e2b9bb02ffL, + 0x107760ab9db3964cL,0xe29de9a067954beaL,0x446a1ad8431c3f82L } }, + /* 205 */ + { { 0xc6fecea05c6b8195L,0xd744a7c5f49e71b9L,0xa8e96acc177a7ae7L, + 0x1a05746c358773a7L,0xa416214637567369L,0xaa0217f787d1c971L }, + { 0x61e9d15877fd3226L,0x0f6f2304e4f600beL,0xa9c4cebc7a6dff07L, + 0xd15afa0109f12a24L,0x2bbadb228c863ee9L,0xa28290e4e5eb8c78L } }, + /* 206 */ + { { 0x55b87fa03e9de330L,0x12b26066195c145bL,0xe08536e0a920bef0L, + 0x7bff6f2c4d195adcL,0x7f319e9d945f4187L,0xf9848863f892ce47L }, + { 0xd0efc1d34fe37657L,0x3c58de825cf0e45aL,0x626ad21a8b0ccbbeL, + 0xd2a31208af952fc5L,0x81791995eb437357L,0x5f19d30f98e95d4fL } }, + /* 207 */ + { { 0x72e83d9a0e6865bbL,0x22f5af3bf63456a6L,0x409e9c73463c8d9eL, + 0x40e9e578dfe6970eL,0x876b6efa711b91caL,0x895512cf942625a3L }, + { 0x84c8eda8cb4e462bL,0x84c0154a4412e7c8L,0x04325db1ceb7b71fL, + 0x1537dde366f70877L,0xf3a093991992b9acL,0xa7316606d498ae77L } }, + /* 208 */ + { { 0x13990d2fcad260f5L,0x76c3be29eec0e8c0L,0x7dc5bee00f7bd7d5L, + 0x9be167d2efebda4bL,0xcce3dde69122b87eL,0x75a28b0982b5415cL }, + { 0xf6810bcde84607a6L,0xc6d581286f4dbf0dL,0xfead577d1b4dafebL, + 0x9bc440b2066b28ebL,0x53f1da978b17e84bL,0x0459504bcda9a575L } }, + /* 209 */ + { { 0x13e39a02329e5836L,0x2c9e7d51f717269dL,0xc5ac58d6f26c963bL, + 0x3b0c6c4379967bf5L,0x60bbea3f55908d9dL,0xd84811e7f07c9ad1L }, + { 0xfe7609a75bd20e4aL,0xe4325dd20a70baa8L,0x3711f370b3600386L, + 0x97f9562fd0924302L,0x040dc0c34acc4436L,0xfd6d725cde79cdd4L } }, + /* 210 */ + { { 0xb3efd0e3cf13eafbL,0x21009cbb5aa0ae5fL,0xe480c55379022279L, + 0x755cf334b2fc9a6dL,0x8564a5bf07096ae7L,0xddd649d0bd238139L }, + { 0xd0de10b18a045041L,0x6e05b413c957d572L,0x5c5ff8064e0fb25cL, + 0xd933179b641162fbL,0x42d48485e57439f9L,0x70c5bd0a8a8d72aaL } }, + /* 211 */ + { { 0xa767173897bdf646L,0xaa1485b4ab329f7cL,0xce3e11d6f8f25fdfL, + 0x76a3fc7ec6221824L,0x045f281ff3924740L,0x24557d4e96d13a9aL }, + { 0x875c804bdd4c27cdL,0x11c5f0f40f5c7feaL,0xac8c880bdc55ff7eL, + 0x2acddec51103f101L,0x38341a21f99faa89L,0xc7b67a2cce9d6b57L } }, + /* 212 */ + { { 0x9a0d724f8e357586L,0x1d7f4ff5df648da0L,0x9c3e6c9bfdee62a5L, + 0x0499cef00389b372L,0xe904050d98eab879L,0xe8eef1b66c051617L }, + { 0xebf5bfebc37e3ca9L,0x7c5e946da4e0b91dL,0x790973142c4bea28L, + 0x81f6c109ee67b2b7L,0xaf237d9bdafc5edeL,0xd2e602012abb04c7L } }, + /* 213 */ + { { 0x6156060c8a4f57bfL,0xf9758696ff11182aL,0x8336773c6296ef00L, + 0x9c054bceff666899L,0xd6a11611719cd11cL,0x9824a641dbe1acfaL }, + { 0x0b7b7a5fba89fd01L,0xf8d3b809889f79d8L,0xc5e1ea08f578285cL, + 0x7ac74536ae6d8288L,0x5d37a2007521ef5fL,0x5ecc4184b260a25dL } }, + /* 214 */ + { { 0xddcebb19a708c8d3L,0xe63ed04fc63f81ecL,0xd045f5a011873f95L, + 0x3b5ad54479f276d5L,0x81272a3d425ae5b3L,0x8bfeb50110ce1605L }, + { 0x4233809c888228bfL,0x4bd82acfb2aff7dfL,0x9c68f1800cbd4a7fL, + 0xfcd771246b44323dL,0x60c0fcf6891db957L,0xcfbb4d8904da8f7fL } }, + /* 215 */ + { { 0x9a6a5df93b26139aL,0x3e076a83b2cc7eb8L,0x47a8e82d5a964bcdL, + 0x8a4e2a39b9278d6bL,0x93506c98e4443549L,0x06497a8ff1e0d566L }, + { 0x3dee8d992b1efa05L,0x2da63ca845393e33L,0xa4af7277cf0579adL, + 0xaf4b46393236d8eaL,0x6ccad95b32b617f5L,0xce76d8b8b88bb124L } }, + /* 216 */ + { { 0x63d2537a083843dcL,0x89eb35141e4153b4L,0x5175ebc4ea9afc94L, + 0x7a6525808ed1aed7L,0x67295611d85e8297L,0x8dd2d68bb584b73dL }, + { 0x237139e60133c3a4L,0x9de838ab4bd278eaL,0xe829b072c062fcd9L, + 0x70730d4f63ba8706L,0x6080483fd3cd05ecL,0x872ab5b80c85f84dL } }, + /* 217 */ + { { 0xfc0776d3999d4d49L,0xa3eb59deec3f45e7L,0xbc990e440dae1fc1L, + 0x33596b1ea15371ffL,0xd447dcb29bc7ab25L,0xcd5b63e935979582L }, + { 0xae3366fa77d1ff11L,0x59f28f05edee6903L,0x6f43fed1a4433bf2L, + 0x15409c9bdf9ce00eL,0x21b5cdedaca9c5dcL,0xf9f3359582d7bdb4L } }, + /* 218 */ + { { 0x959443789422c792L,0x239ea923c958b8bfL,0x4b61a247df076541L, + 0x4d29ce85bb9fc544L,0x9a692a670b424559L,0x6e0ca5a00e486900L }, + { 0x6b79a78285b3beceL,0x41f35e39c61f9892L,0xff82099aae747f82L, + 0x58c8ae3fd0ca59d6L,0x4ac930e299406b5fL,0x2ce04eb99df24243L } }, + /* 219 */ + { { 0x4366b9941ac37b82L,0xff0c728d25b04d83L,0x1f55136119c47b7cL, + 0xdbf2d5edbeff13e7L,0xf78efd51e12a683dL,0x82cd85b9989cf9c4L }, + { 0xe23c6db6e0cb5d37L,0x818aeebd72ee1a15L,0x8212aafd28771b14L, + 0x7bc221d91def817dL,0xdac403a29445c51fL,0x711b051712c3746bL } }, + /* 220 */ + { { 0x0ed9ed485ea99eccL,0xf799500db8cab5e1L,0xa8ec87dcb570cbdcL, + 0x52cfb2c2d35dfaecL,0x8d31fae26e4d80a4L,0xe6a37dc9dcdeabe5L }, + { 0x5d365a341deca452L,0x09a5f8a50d68b44eL,0x59238ea5a60744b1L, + 0xf2fedc0dbb4249e9L,0xe395c74ea909b2e3L,0xe156d1a539388250L } }, + /* 221 */ + { { 0xd796b3d047181ae9L,0xbaf44ba844197808L,0xe693309434cf3facL, + 0x41aa6adec3bd5c46L,0x4fda75d8eed947c6L,0xacd9d4129ea5a525L }, + { 0x65cc55a3d430301bL,0x3c9a5bcf7b52ea49L,0x22d319cf159507f0L, + 0x2ee0b9b5de74a8ddL,0x20c26a1e877ac2b6L,0x387d73da92e7c314L } }, + /* 222 */ + { { 0x13c4833e8cd3fdacL,0x76fcd473332e5b8eL,0xff671b4be2fe1fd3L, + 0x4d734e8b5d98d8ecL,0xb1ead3c6514bbc11L,0xd14ca8587b390494L }, + { 0x95a443af5d2d37e9L,0x73c6ea7300464622L,0xa44aeb4b15755044L, + 0xba3f8575fab58feeL,0x9779dbc9dc680a6fL,0xe1ee5f5a7b37ddfcL } }, + /* 223 */ + { { 0xcd0b464812d29f46L,0x93295b0b0ed53137L,0xbfe2609480bef6c9L, + 0xa656578854248b00L,0x69c43fca80e7f9c4L,0x2190837bbe141ea1L }, + { 0x875e159aa1b26cfbL,0x90ca9f877affe852L,0x15e6550d92ca598eL, + 0xe3e0945d1938ad11L,0xef7636bb366ef937L,0xb6034d0bb39869e5L } }, + /* 224 */ + { { 0x4d255e3026d8356eL,0xf83666edd314626fL,0x421ddf61d0c8ed64L, + 0x96e473c526677b61L,0xdad4af7e9e9b18b3L,0xfceffd4aa9393f75L }, + { 0x843138a111c731d5L,0x05bcb3a1b2f141d9L,0x20e1fa95617b7671L, + 0xbefce81288ccec7bL,0x582073dc90f1b568L,0xf572261a1f055cb7L } }, + /* 225 */ + { { 0xf314827736973088L,0xc008e70886a9f980L,0x1b795947e046c261L, + 0xdf1e6a7dca76bca0L,0xabafd88671acddf0L,0xff7054d91364d8f4L }, + { 0x2cf63547e2260594L,0x468a5372d73b277eL,0xc7419e24ef9bd35eL, + 0x2b4a1c2024043cc3L,0xa28f047a890b39cdL,0xdca2cea146f9a2e3L } }, + /* 226 */ + { { 0xab78873653277538L,0xa734e225cf697738L,0x66ee1d1e6b22e2c1L, + 0x2c615389ebe1d212L,0xf36cad4002bb0766L,0x120885c33e64f207L }, + { 0x59e77d5690fbfec2L,0xf9e781aad7a574aeL,0x801410b05d045e53L, + 0xd3b5f0aaa91b5f0eL,0xb3d1df007fbb3521L,0x11c4b33ec72bee9aL } }, + /* 227 */ + { { 0xd32b983283c3a7f3L,0x8083abcf88d8a354L,0xdeb1640450f4ec5aL, + 0x18d747f0641e2907L,0x4e8978aef1bbf03eL,0x932447dc88a0cd89L }, + { 0x561e0febcf3d5897L,0xfc3a682f13600e6dL,0xc78b9d73d16a6b73L, + 0xe713feded29bf580L,0x0a22522308d69e5cL,0x3a924a571ff7fda4L } }, + /* 228 */ + { { 0xfb64554cb4093beeL,0xa6d65a25a58c6ec0L,0x4126994d43d0ed37L, + 0xa5689a5155152d44L,0xb8e5ea8c284caa8dL,0x33f05d4fd1f25538L }, + { 0xe0fdfe091b615d6eL,0x2ded7e8f705507daL,0xdd5631e517bbcc80L, + 0x4f87453e267fd11fL,0xc6da723fff89d62dL,0x55cbcae2e3cda21dL } }, + /* 229 */ + { { 0x336bc94e6b4e84f3L,0x728630314ef72c35L,0x6d85fdeeeeb57f99L, + 0x7f4e3272a42ece1bL,0x7f86cbb536f0320aL,0xf09b6a2b923331e6L }, + { 0x21d3ecf156778435L,0x2977ba998323b2d2L,0x6a1b57fb1704bc0fL, + 0xd777cf8b389f048aL,0x9ce2174fac6b42cdL,0x404e2bff09e6c55aL } }, + /* 230 */ + { { 0x9b9b135e204c5ddbL,0x9dbfe0443eff550eL,0x35eab4bfec3be0f6L, + 0x8b4c3f0d0a43e56fL,0x4c1c66730e73f9b3L,0x92ed38bd2c78c905L }, + { 0xc7003f6aa386e27cL,0xb9c4f46faced8507L,0xea024ec859df5464L, + 0x4af96152429572eaL,0x279cd5e2e1fc1194L,0xaa376a03281e358cL } }, + /* 231 */ + { { 0x078592233cdbc95cL,0xaae1aa6aef2e337aL,0xc040108d472a8544L, + 0x80c853e68d037b7dL,0xd221315c8c7eee24L,0x195d38568ee47752L }, + { 0xd4b1ba03dacd7fbeL,0x4b5ac61ed3e0c52bL,0x68d3c0526aab7b52L, + 0xf0d7248c660e3feaL,0xafdb3f893145efb4L,0xa73fd9a38f40936dL } }, + /* 232 */ + { { 0x891b9ef3bb1b17ceL,0x14023667c6127f31L,0x12b2e58d305521fdL, + 0x3a47e449e3508088L,0xe49fc84bff751507L,0x4023f7225310d16eL }, + { 0xa608e5edb73399faL,0xf12632d8d532aa3eL,0x13a2758e845e8415L, + 0xae4b6f851fc2d861L,0x3879f5b1339d02f2L,0x446d22a680d99ebdL } }, + /* 233 */ + { { 0x0f5023024be164f1L,0x8d09d2d688b81920L,0x514056f1984aceffL, + 0xa5c4ddf075e9e80dL,0x38cb47e6df496a93L,0x899e1d6b38df6bf7L }, + { 0x69e87e88b59eb2a6L,0x280d9d639b47f38bL,0x599411ea3654e955L, + 0xcf8dd4fd969aa581L,0xff5c2baf530742a7L,0xa43915361a373085L } }, + /* 234 */ + { { 0x6ace72a3a8a4bdd2L,0xc656cdd1b68ef702L,0xd4a33e7e90c4dad8L, + 0x4aece08a9d951c50L,0xea8005ae085d68e6L,0xfdd7a7d76f7502b8L }, + { 0xce6fb0a698d6fa45L,0x228f86721104eb8cL,0xd23d8787da09d7dcL, + 0x5521428b2ae93065L,0x95faba3dea56c366L,0xedbe50390a88aca5L } }, + /* 235 */ + { { 0xd64da0adbfb26c82L,0xe5d70b3c952c2f9cL,0xf5e8f365f7e77f68L, + 0x7234e00208f2d695L,0xfaf900eed12e7be6L,0x27dc69344acf734eL }, + { 0x80e4ff5ec260a46aL,0x7da5ebce2dc31c28L,0x485c5d73ca69f552L, + 0xcdfb6b2969cc84c2L,0x031c5afeed6d4ecaL,0xc7bbf4c822247637L } }, + /* 236 */ + { { 0x9d5b72c749fe01b2L,0x34785186793a91b8L,0xa3ba3c54cf460438L, + 0x73e8e43d3ab21b6fL,0x50cde8e0be57b8abL,0x6488b3a7dd204264L }, + { 0xa9e398b3dddc4582L,0x1698c1a95bec46feL,0x7f1446ef156d3843L, + 0x3fd25dd8770329a2L,0x05b1221a2c710668L,0x65b2dc2aa72ee6cfL } }, + /* 237 */ + { { 0x21a885f7cd021d63L,0x3f344b15fea61f08L,0xad5ba6ddc5cf73e6L, + 0x154d0d8f227a8b23L,0x9b74373cdc559311L,0x4feab71598620fa1L }, + { 0x5098938e7d9ec924L,0x84d54a5e6d47e550L,0x1a2d1bdc1b617506L, + 0x99fe1782615868a4L,0x171da7803005a924L,0xa70bf5ed7d8f79b6L } }, + /* 238 */ + { { 0x0bc1250dfe2216c5L,0x2c37e2507601b351L,0xb6300175d6f06b7eL, + 0x4dde8ca18bfeb9b7L,0x4f210432b82f843dL,0x8d70e2f9b1ac0afdL }, + { 0x25c73b78aae91abbL,0x0230dca3863028f2L,0x8b923ecfe5cf30b7L, + 0xed754ec25506f265L,0x8e41b88c729a5e39L,0xee67cec2babf889bL } }, + /* 239 */ + { { 0xe183acf51be46c65L,0x9789538fe7565d7aL,0x87873391d9627b4eL, + 0xbf4ac4c19f1d9187L,0x5db99f634691f5c8L,0xa68df80374a1fb98L }, + { 0x3c448ed1bf92b5faL,0xa098c8413e0bdc32L,0x8e74cd5579bf016cL, + 0x5df0d09c115e244dL,0x9418ad013410b66eL,0x8b6124cb17a02130L } }, + /* 240 */ + { { 0x425ec3afc26e3392L,0xc07f8470a1722e00L,0xdcc28190e2356b43L, + 0x4ed97dffb1ef59a6L,0xc22b3ad1c63028c1L,0x070723c268c18988L }, + { 0x70da302f4cf49e7dL,0xc5e87c933f12a522L,0x74acdd1d18594148L, + 0xad5f73abca74124cL,0xe72e4a3ed69fd478L,0x615938687b117cc3L } }, + /* 241 */ + { { 0x7b7b9577a9aa0486L,0x6e41fb35a063d557L,0xb017d5c7da9047d7L, + 0x8c74828068a87ba9L,0xab45fa5cdf08ad93L,0xcd9fb2174c288a28L }, + { 0x595446425747843dL,0x34d64c6ca56111e3L,0x12e47ea14bfce8d5L, + 0x17740e056169267fL,0x5c49438eeed03fb5L,0x9da30add4fc3f513L } }, + /* 242 */ + { { 0xc4e85282ccfa5200L,0x2707608f6a19b13dL,0xdcb9a53df5726e2fL, + 0x612407c9e9427de5L,0x3e5a17e1d54d582aL,0xb99877de655ae118L }, + { 0x6f0e972b015254deL,0x92a56db1f0a6f7c5L,0xd297e4e1a656f8b2L, + 0x99fe0052ad981983L,0xd3652d2f07cfed84L,0xc784352e843c1738L } }, + /* 243 */ + { { 0x6ee90af07e9b2d8aL,0xac8d701857cf1964L,0xf6ed903171f28efcL, + 0x7f70d5a96812b20eL,0x27b557f4f1c61eeeL,0xf1c9bd57c6263758L }, + { 0x5cf7d0142a1a6194L,0xdd614e0b1890ab84L,0x3ef9de100e93c2a6L, + 0xf98cf575e0cd91c5L,0x504ec0c614befc32L,0xd0513a666279d68cL } }, + /* 244 */ + { { 0xa8eadbada859fb6aL,0xcf8346e7db283666L,0x7b35e61a3e22e355L, + 0x293ece2c99639c6bL,0xfa0162e256f241c8L,0xd2e6c7b9bf7a1ddaL }, + { 0xd0de625340075e63L,0x2405aa61f9ec8286L,0x2237830a8fe45494L, + 0x4fd01ac7364e9c8cL,0x4d9c3d21904ba750L,0xd589be14af1b520bL } }, + /* 245 */ + { { 0x13576a4f4662e53bL,0x35ec2f51f9077676L,0x66297d1397c0af97L, + 0xed3201fe9e598b58L,0x49bc752a5e70f604L,0xb54af535bb12d951L }, + { 0x36ea4c2b212c1c76L,0x18f5bbc7eb250dfdL,0xa0d466cc9a0a1a46L, + 0x52564da4dac2d917L,0x206559f48e95fab5L,0x7487c1909ca67a33L } }, + /* 246 */ + { { 0x75abfe37dde98e9cL,0x99b90b262a411199L,0x1b410996dcdb1f7cL, + 0xab346f118b3b5675L,0x04852193f1f8ae1eL,0x1ec4d2276b8b98c1L }, + { 0xba3bc92645452baaL,0x387d1858acc4a572L,0x9478eff6e51f171eL, + 0xf357077d931e1c00L,0xffee77cde54c8ca8L,0xfb4892ff551dc9a4L } }, + /* 247 */ + { { 0x5b1bdad02db8dff8L,0xd462f4fd5a2285a2L,0x1d6aad8eda00b461L, + 0x43fbefcf41306d1bL,0x428e86f36a13fe19L,0xc8b2f11817f89404L }, + { 0x762528aaf0d51afbL,0xa3e2fea4549b1d06L,0x86fad8f2ea3ddf66L, + 0x0d9ccc4b4fbdd206L,0xcde97d4cc189ff5aL,0xc36793d6199f19a6L } }, + /* 248 */ + { { 0xea38909b51b85197L,0xffb17dd0b4c92895L,0x0eb0878b1ddb3f3fL, + 0xb05d28ffc57cf0f2L,0xd8bde2e71abd57e2L,0x7f2be28dc40c1b20L }, + { 0x6554dca2299a2d48L,0x5130ba2e8377982dL,0x8863205f1071971aL, + 0x15ee62827cf2825dL,0xd4b6c57f03748f2bL,0xa9e3f4da430385a0L } }, + /* 249 */ + { { 0x33eb7cec83fbc9c6L,0x24a311c74541777eL,0xc81377f74f0767fcL, + 0x12adae364ab702daL,0xb7fcb6db2a779696L,0x4a6fb28401cea6adL }, + { 0x5e8b1d2acdfc73deL,0xd0efae8d1b02fd32L,0x3f99c190d81d8519L, + 0x3c18f7fafc808971L,0x41f713e751b7ae7bL,0x0a4b3435f07fc3f8L } }, + /* 250 */ + { { 0x7dda3c4c019b7d2eL,0x631c8d1ad4dc4b89L,0x5489cd6e1cdb313cL, + 0xd44aed104c07bb06L,0x8f97e13a75f000d1L,0x0e9ee64fdda5df4dL }, + { 0xeaa99f3b3e346910L,0x622f6921fa294ad7L,0x22aaa20d0d0b2fe9L, + 0x4fed2f991e5881baL,0x9af3b2d6c1571802L,0x919e67a8dc7ee17cL } }, + /* 251 */ + { { 0xc724fe4c76250533L,0x8a2080e57d817ef8L,0xa2afb0f4172c9751L, + 0x9b10cdeb17c0702eL,0xbf3975e3c9b7e3e9L,0x206117df1cd0cdc5L }, + { 0xfb049e61be05ebd5L,0xeb0bb55c16c782c0L,0x13a331b8ab7fed09L, + 0xf6c58b1d632863f0L,0x6264ef6e4d3b6195L,0x92c51b639a53f116L } }, + /* 252 */ + { { 0xa57c7bc8288b364dL,0x4a562e087b41e5c4L,0x699d21c6698a9a11L, + 0xa4ed9581f3f849b9L,0xa223eef39eb726baL,0x13159c23cc2884f9L }, + { 0x73931e583a3f4963L,0x965003890ada6a81L,0x3ee8a1c65ab2950bL, + 0xeedf4949775fab52L,0x63d652e14f2671b6L,0xfed4491c3c4e2f55L } }, + /* 253 */ + { { 0x335eadc3f4eb453eL,0x5ff74b63cadd1a5bL,0x6933d0d75d84a91aL, + 0x9ca3eeb9b49ba337L,0x1f6faccec04c15b8L,0x4ef19326dc09a7e4L }, + { 0x53d2d3243dca3233L,0x0ee40590a2259d4bL,0x18c22edb5546f002L, + 0x9242980109ea6b71L,0xaada0addb0e91e61L,0x5fe53ef499963c50L } }, + /* 254 */ + { { 0x372dd06b90c28c65L,0x1765242c119ce47dL,0xc041fb806b22fc82L, + 0x667edf07b0a7ccc1L,0xc79599e71261beceL,0xbc69d9ba19cff22aL }, + { 0x009d77cd13c06819L,0x635a66aee282b79dL,0x4edac4a6225b1be8L, + 0x57d4f4e4524008f9L,0xee299ac5b056af84L,0xcc38444c3a0bc386L } }, + /* 255 */ + { { 0x490643b1cd4c2356L,0x740a4851750547beL,0x643eaf29d4944c04L, + 0xba572479299a98a0L,0x48b29f16ee05fdf9L,0x33fb4f61089b2d7bL }, + { 0x86704902a950f955L,0x97e1034dfedc3ddfL,0x211320b605fbb6a2L, + 0x23d7b93f432299bbL,0x1fe1a0578590e4a3L,0x8e1d0586f58c0ce6L } }, +}; + +/* Multiply the base point of P384 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_base_6(sp_point_384* r, const sp_digit* k, + int map, void* heap) +{ + return sp_384_ecc_mulmod_stripe_6(r, &p384_base, p384_table, + k, map, heap); +} + +#ifdef HAVE_INTEL_AVX2 +/* Multiply the base point of P384 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_384_ecc_mulmod_base_avx2_6(sp_point_384* r, const sp_digit* k, + int map, void* heap) +{ + return sp_384_ecc_mulmod_stripe_avx2_6(r, &p384_base, p384_table, + k, map, heap); +} + +#endif /* HAVE_INTEL_AVX2 */ +/* Multiply the base point of P384 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_base_384(mp_int* km, ecc_point* r, int map, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 p; + sp_digit kd[6]; +#endif + sp_point_384* point; + sp_digit* k = NULL; + int err = MP_OKAY; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + err = sp_384_point_new_6(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) { + err = MEMORY_E; + } + } +#else + k = kd; +#endif + if (err == MP_OKAY) { + sp_384_from_mp(k, 6, km); + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_384_ecc_mulmod_base_avx2_6(point, k, map, heap); + else +#endif + err = sp_384_ecc_mulmod_base_6(point, k, map, heap); + } + if (err == MP_OKAY) { + err = sp_384_point_to_ecc_point_6(point, r); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_6(point, 0, heap); + + return err; +} + +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_384_iszero_6(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5]) == 0; +} + +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */ +extern void sp_384_add_one_6(sp_digit* a); +extern void sp_384_from_bin(sp_digit* r, int size, const byte* a, int n); +/* Generates a scalar that is in the range 1..order-1. + * + * rng Random number generator. + * k Scalar value. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +static int sp_384_ecc_gen_k_6(WC_RNG* rng, sp_digit* k) +{ + int err; + byte buf[48]; + + do { + err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf)); + if (err == 0) { + sp_384_from_bin(k, 6, buf, (int)sizeof(buf)); + if (sp_384_cmp_6(k, p384_order2) < 0) { + sp_384_add_one_6(k); + break; + } + } + } + while (err == 0); + + return err; +} + +/* Makes a random EC key pair. + * + * rng Random number generator. + * priv Generated private value. + * pub Generated public point. + * heap Heap to use for allocation. + * returns ECC_INF_E when the point does not have the correct order, RNG + * failures, MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 p; + sp_digit kd[6]; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_384 inf; +#endif +#endif + sp_point_384* point; + sp_digit* k = NULL; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_384* infinity; +#endif + int err; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + (void)heap; + + err = sp_384_point_new_6(heap, p, point); +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + if (err == MP_OKAY) { + err = sp_384_point_new_6(heap, inf, infinity); + } +#endif +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) { + err = MEMORY_E; + } + } +#else + k = kd; +#endif + + if (err == MP_OKAY) { + err = sp_384_ecc_gen_k_6(rng, k); + } + if (err == MP_OKAY) { +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_384_ecc_mulmod_base_avx2_6(point, k, 1, NULL); + else +#endif + err = sp_384_ecc_mulmod_base_6(point, k, 1, NULL); + } + +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + if (err == MP_OKAY) { +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + err = sp_384_ecc_mulmod_avx2_6(infinity, point, p384_order, 1, + NULL); + } + else +#endif + err = sp_384_ecc_mulmod_6(infinity, point, p384_order, 1, NULL); + } + if (err == MP_OKAY) { + if ((sp_384_iszero_6(point->x) == 0) || (sp_384_iszero_6(point->y) == 0)) { + err = ECC_INF_E; + } + } +#endif + + if (err == MP_OKAY) { + err = sp_384_to_mp(k, priv); + } + if (err == MP_OKAY) { + err = sp_384_point_to_ecc_point_6(point, pub); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_384_point_free_6(infinity, 1, heap); +#endif + sp_384_point_free_6(point, 1, heap); + + return err; +} + +#ifdef HAVE_ECC_DHE +extern void sp_384_to_bin(sp_digit* r, byte* a); +/* Multiply the point by the scalar and serialize the X ordinate. + * The number is 0 padded to maximum size on output. + * + * priv Scalar to multiply the point by. + * pub Point to multiply. + * out Buffer to hold X ordinate. + * outLen On entry, size of the buffer in bytes. + * On exit, length of data in buffer in bytes. + * heap Heap to use for allocation. + * returns BUFFER_E if the buffer is to small for output size, + * MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_secret_gen_384(mp_int* priv, ecc_point* pub, byte* out, + word32* outLen, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 p; + sp_digit kd[6]; +#endif + sp_point_384* point = NULL; + sp_digit* k = NULL; + int err = MP_OKAY; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + if (*outLen < 48U) { + err = BUFFER_E; + } + + if (err == MP_OKAY) { + err = sp_384_point_new_6(heap, p, point); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#else + k = kd; +#endif + + if (err == MP_OKAY) { + sp_384_from_mp(k, 6, priv); + sp_384_point_from_ecc_point_6(point, pub); +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_384_ecc_mulmod_avx2_6(point, point, k, 1, heap); + else +#endif + err = sp_384_ecc_mulmod_6(point, point, k, 1, heap); + } + if (err == MP_OKAY) { + sp_384_to_bin(point->x, out); + *outLen = 48; + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (k != NULL) { + XFREE(k, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_6(point, 0, heap); + + return err; +} +#endif /* HAVE_ECC_DHE */ + +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#ifdef HAVE_INTEL_AVX2 +#endif /* HAVE_INTEL_AVX2 */ +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +extern sp_digit sp_384_sub_in_place_6(sp_digit* a, const sp_digit* b); +extern sp_digit sp_384_cond_sub_avx2_6(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m); +extern void sp_384_mul_d_6(sp_digit* r, const sp_digit* a, sp_digit b); +extern void sp_384_mul_d_avx2_6(sp_digit* r, const sp_digit* a, const sp_digit b); +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + */ +static WC_INLINE sp_digit div_384_word_6(sp_digit d1, sp_digit d0, + sp_digit div) +{ + register sp_digit r asm("rax"); + __asm__ __volatile__ ( + "divq %3" + : "=a" (r) + : "d" (d1), "a" (d0), "r" (div) + : + ); + return r; +} +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_384_mask_6(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<6; i++) { + r[i] = a[i] & m; + } +#else + r[0] = a[0] & m; + r[1] = a[1] & m; + r[2] = a[2] & m; + r[3] = a[3] & m; + r[4] = a[4] & m; + r[5] = a[5] & m; +#endif +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_384_div_6(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[12], t2[7]; + sp_digit div, r1; + int i; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + (void)m; + + div = d[5]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 6); + r1 = sp_384_cmp_6(&t1[6], d) >= 0; +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_384_cond_sub_avx2_6(&t1[6], &t1[6], d, (sp_digit)0 - r1); + else +#endif + sp_384_cond_sub_6(&t1[6], &t1[6], d, (sp_digit)0 - r1); + for (i=5; i>=0; i--) { + r1 = div_384_word_6(t1[6 + i], t1[6 + i - 1], div); + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_384_mul_d_avx2_6(t2, d, r1); + else +#endif + sp_384_mul_d_6(t2, d, r1); + t1[6 + i] += sp_384_sub_in_place_6(&t1[i], t2); + t1[6 + i] -= t2[6]; + sp_384_mask_6(t2, d, t1[6 + i]); + t1[6 + i] += sp_384_add_6(&t1[i], &t1[i], t2); + sp_384_mask_6(t2, d, t1[6 + i]); + t1[6 + i] += sp_384_add_6(&t1[i], &t1[i], t2); + } + + r1 = sp_384_cmp_6(t1, d) >= 0; +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_384_cond_sub_avx2_6(r, t1, d, (sp_digit)0 - r1); + else +#endif + sp_384_cond_sub_6(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_384_mod_6(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_384_div_6(a, m, NULL, r); +} + +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#ifdef WOLFSSL_SP_SMALL +/* Order-2 for the P384 curve. */ +static const uint64_t p384_order_minus_2[6] = { + 0xecec196accc52971U,0x581a0db248b0a77aU,0xc7634d81f4372ddfU, + 0xffffffffffffffffU,0xffffffffffffffffU,0xffffffffffffffffU +}; +#else +/* The low half of the order-2 of the P384 curve. */ +static const uint64_t p384_order_low[3] = { + 0xecec196accc52971U,0x581a0db248b0a77aU,0xc7634d81f4372ddfU + +}; +#endif /* WOLFSSL_SP_SMALL */ + +/* Multiply two number mod the order of P384 curve. (r = a * b mod order) + * + * r Result of the multiplication. + * a First operand of the multiplication. + * b Second operand of the multiplication. + */ +static void sp_384_mont_mul_order_6(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_384_mul_6(r, a, b); + sp_384_mont_reduce_order_6(r, p384_order, p384_mp_order); +} + +/* Square number mod the order of P384 curve. (r = a * a mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_384_mont_sqr_order_6(sp_digit* r, const sp_digit* a) +{ + sp_384_sqr_6(r, a); + sp_384_mont_reduce_order_6(r, p384_order, p384_mp_order); +} + +#ifndef WOLFSSL_SP_SMALL +/* Square number mod the order of P384 curve a number of times. + * (r = a ^ n mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_384_mont_sqr_n_order_6(sp_digit* r, const sp_digit* a, int n) +{ + int i; + + sp_384_mont_sqr_order_6(r, a); + for (i=1; i=0; i--) { + sp_384_mont_sqr_order_6(t, t); + if ((p384_order_minus_2[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { + sp_384_mont_mul_order_6(t, t, a); + } + } + XMEMCPY(r, t, sizeof(sp_digit) * 6U); +#else + sp_digit* t = td; + sp_digit* t2 = td + 2 * 6; + sp_digit* t3 = td + 4 * 6; + int i; + + /* t = a^2 */ + sp_384_mont_sqr_order_6(t, a); + /* t = a^3 = t * a */ + sp_384_mont_mul_order_6(t, t, a); + /* t2= a^c = t ^ 2 ^ 2 */ + sp_384_mont_sqr_n_order_6(t2, t, 2); + /* t = a^f = t2 * t */ + sp_384_mont_mul_order_6(t, t2, t); + /* t2= a^f0 = t ^ 2 ^ 4 */ + sp_384_mont_sqr_n_order_6(t2, t, 4); + /* t = a^ff = t2 * t */ + sp_384_mont_mul_order_6(t, t2, t); + /* t2= a^ff00 = t ^ 2 ^ 8 */ + sp_384_mont_sqr_n_order_6(t2, t, 8); + /* t3= a^ffff = t2 * t */ + sp_384_mont_mul_order_6(t3, t2, t); + /* t2= a^ffff0000 = t3 ^ 2 ^ 16 */ + sp_384_mont_sqr_n_order_6(t2, t3, 16); + /* t = a^ffffffff = t2 * t3 */ + sp_384_mont_mul_order_6(t, t2, t3); + /* t2= a^ffffffff0000 = t ^ 2 ^ 16 */ + sp_384_mont_sqr_n_order_6(t2, t, 16); + /* t = a^ffffffffffff = t2 * t3 */ + sp_384_mont_mul_order_6(t, t2, t3); + /* t2= a^ffffffffffff000000000000 = t ^ 2 ^ 48 */ + sp_384_mont_sqr_n_order_6(t2, t, 48); + /* t= a^fffffffffffffffffffffffff = t2 * t */ + sp_384_mont_mul_order_6(t, t2, t); + /* t2= a^ffffffffffffffffffffffff000000000000000000000000 */ + sp_384_mont_sqr_n_order_6(t2, t, 96); + /* t2= a^ffffffffffffffffffffffffffffffffffffffffffffffff = t2 * t */ + sp_384_mont_mul_order_6(t2, t2, t); + for (i=191; i>=1; i--) { + sp_384_mont_sqr_order_6(t2, t2); + if (((sp_digit)p384_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { + sp_384_mont_mul_order_6(t2, t2, a); + } + } + sp_384_mont_sqr_order_6(t2, t2); + sp_384_mont_mul_order_6(r, t2, a); +#endif /* WOLFSSL_SP_SMALL */ +} + +#ifdef HAVE_INTEL_AVX2 +/* Multiply two number mod the order of P384 curve. (r = a * b mod order) + * + * r Result of the multiplication. + * a First operand of the multiplication. + * b Second operand of the multiplication. + */ +static void sp_384_mont_mul_order_avx2_6(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_384_mul_avx2_6(r, a, b); + sp_384_mont_reduce_order_avx2_6(r, p384_order, p384_mp_order); +} + +/* Square number mod the order of P384 curve. (r = a * a mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_384_mont_sqr_order_avx2_6(sp_digit* r, const sp_digit* a) +{ + sp_384_sqr_avx2_6(r, a); + sp_384_mont_reduce_order_avx2_6(r, p384_order, p384_mp_order); +} + +#ifndef WOLFSSL_SP_SMALL +/* Square number mod the order of P384 curve a number of times. + * (r = a ^ n mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_384_mont_sqr_n_order_avx2_6(sp_digit* r, const sp_digit* a, int n) +{ + int i; + + sp_384_mont_sqr_order_avx2_6(r, a); + for (i=1; i=0; i--) { + sp_384_mont_sqr_order_avx2_6(t, t); + if ((p384_order_minus_2[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { + sp_384_mont_mul_order_avx2_6(t, t, a); + } + } + XMEMCPY(r, t, sizeof(sp_digit) * 6U); +#else + sp_digit* t = td; + sp_digit* t2 = td + 2 * 6; + sp_digit* t3 = td + 4 * 6; + int i; + + /* t = a^2 */ + sp_384_mont_sqr_order_avx2_6(t, a); + /* t = a^3 = t * a */ + sp_384_mont_mul_order_avx2_6(t, t, a); + /* t2= a^c = t ^ 2 ^ 2 */ + sp_384_mont_sqr_n_order_avx2_6(t2, t, 2); + /* t = a^f = t2 * t */ + sp_384_mont_mul_order_avx2_6(t, t2, t); + /* t2= a^f0 = t ^ 2 ^ 4 */ + sp_384_mont_sqr_n_order_avx2_6(t2, t, 4); + /* t = a^ff = t2 * t */ + sp_384_mont_mul_order_avx2_6(t, t2, t); + /* t2= a^ff00 = t ^ 2 ^ 8 */ + sp_384_mont_sqr_n_order_avx2_6(t2, t, 8); + /* t3= a^ffff = t2 * t */ + sp_384_mont_mul_order_avx2_6(t3, t2, t); + /* t2= a^ffff0000 = t3 ^ 2 ^ 16 */ + sp_384_mont_sqr_n_order_avx2_6(t2, t3, 16); + /* t = a^ffffffff = t2 * t3 */ + sp_384_mont_mul_order_avx2_6(t, t2, t3); + /* t2= a^ffffffff0000 = t ^ 2 ^ 16 */ + sp_384_mont_sqr_n_order_avx2_6(t2, t, 16); + /* t = a^ffffffffffff = t2 * t3 */ + sp_384_mont_mul_order_avx2_6(t, t2, t3); + /* t2= a^ffffffffffff000000000000 = t ^ 2 ^ 48 */ + sp_384_mont_sqr_n_order_avx2_6(t2, t, 48); + /* t= a^fffffffffffffffffffffffff = t2 * t */ + sp_384_mont_mul_order_avx2_6(t, t2, t); + /* t2= a^ffffffffffffffffffffffff000000000000000000000000 */ + sp_384_mont_sqr_n_order_avx2_6(t2, t, 96); + /* t2= a^ffffffffffffffffffffffffffffffffffffffffffffffff = t2 * t */ + sp_384_mont_mul_order_avx2_6(t2, t2, t); + for (i=191; i>=1; i--) { + sp_384_mont_sqr_order_avx2_6(t2, t2); + if (((sp_digit)p384_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { + sp_384_mont_mul_order_avx2_6(t2, t2, a); + } + } + sp_384_mont_sqr_order_avx2_6(t2, t2); + sp_384_mont_mul_order_avx2_6(r, t2, a); +#endif /* WOLFSSL_SP_SMALL */ +} + +#endif /* HAVE_INTEL_AVX2 */ +#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */ +#ifdef HAVE_ECC_SIGN +#ifndef SP_ECC_MAX_SIG_GEN +#define SP_ECC_MAX_SIG_GEN 64 +#endif + +/* Sign the hash using the private key. + * e = [hash, 384 bits] from binary + * r = (k.G)->x mod order + * s = (r * x + e) / k mod order + * The hash is truncated to the first 384 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv, + mp_int* rm, mp_int* sm, mp_int* km, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit ed[2*6]; + sp_digit xd[2*6]; + sp_digit kd[2*6]; + sp_digit rd[2*6]; + sp_digit td[3 * 2*6]; + sp_point_384 p; +#endif + sp_digit* e = NULL; + sp_digit* x = NULL; + sp_digit* k = NULL; + sp_digit* r = NULL; + sp_digit* tmp = NULL; + sp_point_384* point = NULL; + sp_digit carry; + sp_digit* s = NULL; + sp_digit* kInv = NULL; + int err = MP_OKAY; + int64_t c; + int i; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + (void)heap; + + err = sp_384_point_new_6(heap, p, point); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 6, heap, + DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + e = d + 0 * 6; + x = d + 2 * 6; + k = d + 4 * 6; + r = d + 6 * 6; + tmp = d + 8 * 6; +#else + e = ed; + x = xd; + k = kd; + r = rd; + tmp = td; +#endif + s = e; + kInv = k; + + if (hashLen > 48U) { + hashLen = 48U; + } + + sp_384_from_bin(e, 6, hash, (int)hashLen); + } + + for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) { + sp_384_from_mp(x, 6, priv); + + /* New random point. */ + if (km == NULL || mp_iszero(km)) { + err = sp_384_ecc_gen_k_6(rng, k); + } + else { + sp_384_from_mp(k, 6, km); + mp_zero(km); + } + if (err == MP_OKAY) { +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_384_ecc_mulmod_base_avx2_6(point, k, 1, heap); + else +#endif + err = sp_384_ecc_mulmod_base_6(point, k, 1, NULL); + } + + if (err == MP_OKAY) { + /* r = point->x mod order */ + XMEMCPY(r, point->x, sizeof(sp_digit) * 6U); + sp_384_norm_6(r); + c = sp_384_cmp_6(r, p384_order); + sp_384_cond_sub_6(r, r, p384_order, 0L - (sp_digit)(c >= 0)); + sp_384_norm_6(r); + + /* Conv k to Montgomery form (mod order) */ +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_384_mul_avx2_6(k, k, p384_norm_order); + else +#endif + sp_384_mul_6(k, k, p384_norm_order); + err = sp_384_mod_6(k, k, p384_order); + } + if (err == MP_OKAY) { + sp_384_norm_6(k); + /* kInv = 1/k mod order */ +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_384_mont_inv_order_avx2_6(kInv, k, tmp); + else +#endif + sp_384_mont_inv_order_6(kInv, k, tmp); + sp_384_norm_6(kInv); + + /* s = r * x + e */ +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_384_mul_avx2_6(x, x, r); + else +#endif + sp_384_mul_6(x, x, r); + err = sp_384_mod_6(x, x, p384_order); + } + if (err == MP_OKAY) { + sp_384_norm_6(x); + carry = sp_384_add_6(s, e, x); + sp_384_cond_sub_6(s, s, p384_order, 0 - carry); + sp_384_norm_6(s); + c = sp_384_cmp_6(s, p384_order); + sp_384_cond_sub_6(s, s, p384_order, 0L - (sp_digit)(c >= 0)); + sp_384_norm_6(s); + + /* s = s * k^-1 mod order */ +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_384_mont_mul_order_avx2_6(s, s, kInv); + else +#endif + sp_384_mont_mul_order_6(s, s, kInv); + sp_384_norm_6(s); + + /* Check that signature is usable. */ + if (sp_384_iszero_6(s) == 0) { + break; + } + } + } + + if (i == 0) { + err = RNG_FAILURE_E; + } + + if (err == MP_OKAY) { + err = sp_384_to_mp(r, rm); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(s, sm); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 8 * 6); + XFREE(d, heap, DYNAMIC_TYPE_ECC); + } +#else + XMEMSET(e, 0, sizeof(sp_digit) * 2U * 6U); + XMEMSET(x, 0, sizeof(sp_digit) * 2U * 6U); + XMEMSET(k, 0, sizeof(sp_digit) * 2U * 6U); + XMEMSET(r, 0, sizeof(sp_digit) * 2U * 6U); + XMEMSET(r, 0, sizeof(sp_digit) * 2U * 6U); + XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 6U); +#endif + sp_384_point_free_6(point, 1, heap); + + return err; +} +#endif /* HAVE_ECC_SIGN */ + +#ifdef HAVE_ECC_VERIFY +/* Verify the signature values with the hash and public key. + * e = Truncate(hash, 384) + * u1 = e/s mod order + * u2 = r/s mod order + * r == (u1.G + u2.Q)->x mod order + * Optimization: Leave point in projective form. + * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z') + * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' + * The hash is truncated to the first 384 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +int sp_ecc_verify_384(const byte* hash, word32 hashLen, mp_int* pX, + mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit u1d[2*6]; + sp_digit u2d[2*6]; + sp_digit sd[2*6]; + sp_digit tmpd[2*6 * 5]; + sp_point_384 p1d; + sp_point_384 p2d; +#endif + sp_digit* u1 = NULL; + sp_digit* u2 = NULL; + sp_digit* s = NULL; + sp_digit* tmp = NULL; + sp_point_384* p1; + sp_point_384* p2 = NULL; + sp_digit carry; + int64_t c; + int err; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + err = sp_384_point_new_6(heap, p1d, p1); + if (err == MP_OKAY) { + err = sp_384_point_new_6(heap, p2d, p2); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 6, heap, + DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + u1 = d + 0 * 6; + u2 = d + 2 * 6; + s = d + 4 * 6; + tmp = d + 6 * 6; +#else + u1 = u1d; + u2 = u2d; + s = sd; + tmp = tmpd; +#endif + + if (hashLen > 48U) { + hashLen = 48U; + } + + sp_384_from_bin(u1, 6, hash, (int)hashLen); + sp_384_from_mp(u2, 6, r); + sp_384_from_mp(s, 6, sm); + sp_384_from_mp(p2->x, 6, pX); + sp_384_from_mp(p2->y, 6, pY); + sp_384_from_mp(p2->z, 6, pZ); + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + sp_384_mul_avx2_6(s, s, p384_norm_order); + } + else +#endif + { + sp_384_mul_6(s, s, p384_norm_order); + } + err = sp_384_mod_6(s, s, p384_order); + } + if (err == MP_OKAY) { + sp_384_norm_6(s); +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + sp_384_mont_inv_order_avx2_6(s, s, tmp); + sp_384_mont_mul_order_avx2_6(u1, u1, s); + sp_384_mont_mul_order_avx2_6(u2, u2, s); + } + else +#endif + { + sp_384_mont_inv_order_6(s, s, tmp); + sp_384_mont_mul_order_6(u1, u1, s); + sp_384_mont_mul_order_6(u2, u2, s); + } + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_384_ecc_mulmod_base_avx2_6(p1, u1, 0, heap); + else +#endif + err = sp_384_ecc_mulmod_base_6(p1, u1, 0, heap); + } + if (err == MP_OKAY) { +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_384_ecc_mulmod_avx2_6(p2, p2, u2, 0, heap); + else +#endif + err = sp_384_ecc_mulmod_6(p2, p2, u2, 0, heap); + } + + if (err == MP_OKAY) { +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + sp_384_proj_point_add_avx2_6(p1, p1, p2, tmp); + if (sp_384_iszero_6(p1->z)) { + if (sp_384_iszero_6(p1->x) && sp_384_iszero_6(p1->y)) { + sp_384_proj_point_dbl_avx2_6(p1, p2, tmp); + } + else { + /* Y ordinate is not used from here - don't set. */ + p1->x[0] = 0; + p1->x[1] = 0; + p1->x[2] = 0; + p1->x[3] = 0; + p1->x[4] = 0; + p1->x[5] = 0; + XMEMCPY(p1->z, p384_norm_mod, sizeof(p384_norm_mod)); + } + } + } + else +#endif + { + sp_384_proj_point_add_6(p1, p1, p2, tmp); + if (sp_384_iszero_6(p1->z)) { + if (sp_384_iszero_6(p1->x) && sp_384_iszero_6(p1->y)) { + sp_384_proj_point_dbl_6(p1, p2, tmp); + } + else { + /* Y ordinate is not used from here - don't set. */ + p1->x[0] = 0; + p1->x[1] = 0; + p1->x[2] = 0; + p1->x[3] = 0; + p1->x[4] = 0; + p1->x[5] = 0; + XMEMCPY(p1->z, p384_norm_mod, sizeof(p384_norm_mod)); + } + } + } + + /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ + /* Reload r and convert to Montgomery form. */ + sp_384_from_mp(u2, 6, r); + err = sp_384_mod_mul_norm_6(u2, u2, p384_mod); + } + + if (err == MP_OKAY) { + /* u1 = r.z'.z' mod prime */ + sp_384_mont_sqr_6(p1->z, p1->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(u1, u2, p1->z, p384_mod, p384_mp_mod); + *res = (int)(sp_384_cmp_6(p1->x, u1) == 0); + if (*res == 0) { + /* Reload r and add order. */ + sp_384_from_mp(u2, 6, r); + carry = sp_384_add_6(u2, u2, p384_order); + /* Carry means result is greater than mod and is not valid. */ + if (carry == 0) { + sp_384_norm_6(u2); + + /* Compare with mod and if greater or equal then not valid. */ + c = sp_384_cmp_6(u2, p384_mod); + if (c < 0) { + /* Convert to Montogomery form */ + err = sp_384_mod_mul_norm_6(u2, u2, p384_mod); + if (err == MP_OKAY) { + /* u1 = (r + 1*order).z'.z' mod prime */ + sp_384_mont_mul_6(u1, u2, p1->z, p384_mod, + p384_mp_mod); + *res = (int)(sp_384_cmp_6(p1->x, u1) == 0); + } + } + } + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) + XFREE(d, heap, DYNAMIC_TYPE_ECC); +#endif + sp_384_point_free_6(p1, 0, heap); + sp_384_point_free_6(p2, 0, heap); + + return err; +} +#endif /* HAVE_ECC_VERIFY */ + +#ifdef HAVE_ECC_CHECK_KEY +/* Check that the x and y oridinates are a valid point on the curve. + * + * point EC point. + * heap Heap to use if dynamically allocating. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +static int sp_384_ecc_is_point_6(sp_point_384* point, void* heap) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d = NULL; +#else + sp_digit t1d[2*6]; + sp_digit t2d[2*6]; +#endif + sp_digit* t1; + sp_digit* t2; + int err = MP_OKAY; + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6 * 4, heap, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = d + 0 * 6; + t2 = d + 2 * 6; +#else + (void)heap; + + t1 = t1d; + t2 = t2d; +#endif + + sp_384_sqr_6(t1, point->y); + (void)sp_384_mod_6(t1, t1, p384_mod); + sp_384_sqr_6(t2, point->x); + (void)sp_384_mod_6(t2, t2, p384_mod); + sp_384_mul_6(t2, t2, point->x); + (void)sp_384_mod_6(t2, t2, p384_mod); + (void)sp_384_sub_6(t2, p384_mod, t2); + sp_384_mont_add_6(t1, t1, t2, p384_mod); + + sp_384_mont_add_6(t1, t1, point->x, p384_mod); + sp_384_mont_add_6(t1, t1, point->x, p384_mod); + sp_384_mont_add_6(t1, t1, point->x, p384_mod); + + if (sp_384_cmp_6(t1, p384_b) != 0) { + err = MP_VAL; + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, heap, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + +/* Check that the x and y oridinates are a valid point on the curve. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +int sp_ecc_is_point_384(mp_int* pX, mp_int* pY) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_point_384 pubd; +#endif + sp_point_384* pub; + byte one[1] = { 1 }; + int err; + + err = sp_384_point_new_6(NULL, pubd, pub); + if (err == MP_OKAY) { + sp_384_from_mp(pub->x, 6, pX); + sp_384_from_mp(pub->y, 6, pY); + sp_384_from_bin(pub->z, 6, one, (int)sizeof(one)); + + err = sp_384_ecc_is_point_6(pub, NULL); + } + + sp_384_point_free_6(pub, 0, NULL); + + return err; +} + +/* Check that the private scalar generates the EC point (px, py), the point is + * on the curve and the point has the correct order. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * privm Private scalar that generates EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve, ECC_INF_E if the point does not have the correct order, + * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and + * MP_OKAY otherwise. + */ +int sp_ecc_check_key_384(mp_int* pX, mp_int* pY, mp_int* privm, void* heap) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit privd[6]; + sp_point_384 pubd; + sp_point_384 pd; +#endif + sp_digit* priv = NULL; + sp_point_384* pub; + sp_point_384* p = NULL; + byte one[1] = { 1 }; + int err; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + err = sp_384_point_new_6(heap, pubd, pub); + if (err == MP_OKAY) { + err = sp_384_point_new_6(heap, pd, p); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6, heap, + DYNAMIC_TYPE_ECC); + if (priv == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + priv = privd; +#endif + + sp_384_from_mp(pub->x, 6, pX); + sp_384_from_mp(pub->y, 6, pY); + sp_384_from_bin(pub->z, 6, one, (int)sizeof(one)); + sp_384_from_mp(priv, 6, privm); + + /* Check point at infinitiy. */ + if ((sp_384_iszero_6(pub->x) != 0) && + (sp_384_iszero_6(pub->y) != 0)) { + err = ECC_INF_E; + } + } + + if (err == MP_OKAY) { + /* Check range of X and Y */ + if (sp_384_cmp_6(pub->x, p384_mod) >= 0 || + sp_384_cmp_6(pub->y, p384_mod) >= 0) { + err = ECC_OUT_OF_RANGE_E; + } + } + + if (err == MP_OKAY) { + /* Check point is on curve */ + err = sp_384_ecc_is_point_6(pub, heap); + } + + if (err == MP_OKAY) { + /* Point * order = infinity */ +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_384_ecc_mulmod_avx2_6(p, pub, p384_order, 1, heap); + else +#endif + err = sp_384_ecc_mulmod_6(p, pub, p384_order, 1, heap); + } + if (err == MP_OKAY) { + /* Check result is infinity */ + if ((sp_384_iszero_6(p->x) == 0) || + (sp_384_iszero_6(p->y) == 0)) { + err = ECC_INF_E; + } + } + + if (err == MP_OKAY) { + /* Base * private = point */ +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_384_ecc_mulmod_base_avx2_6(p, priv, 1, heap); + else +#endif + err = sp_384_ecc_mulmod_base_6(p, priv, 1, heap); + } + if (err == MP_OKAY) { + /* Check result is public key */ + if (sp_384_cmp_6(p->x, pub->x) != 0 || + sp_384_cmp_6(p->y, pub->y) != 0) { + err = ECC_PRIV_KEY_E; + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (priv != NULL) { + XFREE(priv, heap, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_6(p, 0, heap); + sp_384_point_free_6(pub, 0, heap); + + return err; +} +#endif +#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL +/* Add two projective EC points together. + * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ) + * + * pX First EC point's X ordinate. + * pY First EC point's Y ordinate. + * pZ First EC point's Z ordinate. + * qX Second EC point's X ordinate. + * qY Second EC point's Y ordinate. + * qZ Second EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* qX, mp_int* qY, mp_int* qZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 6 * 5]; + sp_point_384 pd; + sp_point_384 qd; +#endif + sp_digit* tmp; + sp_point_384* p; + sp_point_384* q = NULL; + int err; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + err = sp_384_point_new_6(NULL, pd, p); + if (err == MP_OKAY) { + err = sp_384_point_new_6(NULL, qd, q); + } +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 6 * 5, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + sp_384_from_mp(p->x, 6, pX); + sp_384_from_mp(p->y, 6, pY); + sp_384_from_mp(p->z, 6, pZ); + sp_384_from_mp(q->x, 6, qX); + sp_384_from_mp(q->y, 6, qY); + sp_384_from_mp(q->z, 6, qZ); + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_384_proj_point_add_avx2_6(p, p, q, tmp); + else +#endif + sp_384_proj_point_add_6(p, p, q, tmp); + } + + if (err == MP_OKAY) { + err = sp_384_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->z, rZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_6(q, 0, NULL); + sp_384_point_free_6(p, 0, NULL); + + return err; +} + +/* Double a projective EC point. + * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ) + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 6 * 2]; + sp_point_384 pd; +#endif + sp_digit* tmp; + sp_point_384* p; + int err; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + err = sp_384_point_new_6(NULL, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 6 * 2, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + + if (err == MP_OKAY) { + sp_384_from_mp(p->x, 6, pX); + sp_384_from_mp(p->y, 6, pY); + sp_384_from_mp(p->z, 6, pZ); + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_384_proj_point_dbl_avx2_6(p, p, tmp); + else +#endif + sp_384_proj_point_dbl_6(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_384_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->z, rZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_6(p, 0, NULL); + + return err; +} + +/* Map a projective EC point to affine in place. + * pZ will be one. + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ) +{ +#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC) + sp_digit tmpd[2 * 6 * 6]; + sp_point_384 pd; +#endif + sp_digit* tmp; + sp_point_384* p; + int err; + + err = sp_384_point_new_6(NULL, pd, p); +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 6 * 6, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#else + tmp = tmpd; +#endif + if (err == MP_OKAY) { + sp_384_from_mp(p->x, 6, pX); + sp_384_from_mp(p->y, 6, pY); + sp_384_from_mp(p->z, 6, pZ); + + sp_384_map_6(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_384_to_mp(p->x, pX); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->y, pY); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(p->z, pZ); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (tmp != NULL) { + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + } +#endif + sp_384_point_free_6(p, 0, NULL); + + return err; +} +#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */ +#ifdef HAVE_COMP_KEY +/* Find the square root of a number mod the prime of the curve. + * + * y The number to operate on and the result. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +static int sp_384_mont_sqrt_6(sp_digit* y) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d; +#else + sp_digit t1d[2 * 6]; + sp_digit t2d[2 * 6]; + sp_digit t3d[2 * 6]; + sp_digit t4d[2 * 6]; + sp_digit t5d[2 * 6]; +#endif + sp_digit* t1; + sp_digit* t2; + sp_digit* t3; + sp_digit* t4; + sp_digit* t5; + int err = MP_OKAY; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 2 * 6, NULL, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + t1 = d + 0 * 6; + t2 = d + 2 * 6; + t3 = d + 4 * 6; + t4 = d + 6 * 6; + t5 = d + 8 * 6; +#else + t1 = t1d; + t2 = t2d; + t3 = t3d; + t4 = t4d; + t5 = t5d; +#endif + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + /* t2 = y ^ 0x2 */ + sp_384_mont_sqr_avx2_6(t2, y, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x3 */ + sp_384_mont_mul_avx2_6(t1, t2, y, p384_mod, p384_mp_mod); + /* t5 = y ^ 0xc */ + sp_384_mont_sqr_n_avx2_6(t5, t1, 2, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xf */ + sp_384_mont_mul_avx2_6(t1, t1, t5, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x1e */ + sp_384_mont_sqr_avx2_6(t2, t1, p384_mod, p384_mp_mod); + /* t3 = y ^ 0x1f */ + sp_384_mont_mul_avx2_6(t3, t2, y, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3e0 */ + sp_384_mont_sqr_n_avx2_6(t2, t3, 5, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x3ff */ + sp_384_mont_mul_avx2_6(t1, t3, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x7fe0 */ + sp_384_mont_sqr_n_avx2_6(t2, t1, 5, p384_mod, p384_mp_mod); + /* t3 = y ^ 0x7fff */ + sp_384_mont_mul_avx2_6(t3, t3, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3fff800 */ + sp_384_mont_sqr_n_avx2_6(t2, t3, 15, p384_mod, p384_mp_mod); + /* t4 = y ^ 0x3ffffff */ + sp_384_mont_mul_avx2_6(t4, t3, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0xffffffc000000 */ + sp_384_mont_sqr_n_avx2_6(t2, t4, 30, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xfffffffffffff */ + sp_384_mont_mul_avx2_6(t1, t4, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0xfffffffffffffff000000000000000 */ + sp_384_mont_sqr_n_avx2_6(t2, t1, 60, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xffffffffffffffffffffffffffffff */ + sp_384_mont_mul_avx2_6(t1, t1, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */ + sp_384_mont_sqr_n_avx2_6(t2, t1, 120, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_384_mont_mul_avx2_6(t1, t1, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */ + sp_384_mont_sqr_n_avx2_6(t2, t1, 15, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_384_mont_mul_avx2_6(t1, t3, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff80000000 */ + sp_384_mont_sqr_n_avx2_6(t2, t1, 31, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff */ + sp_384_mont_mul_avx2_6(t1, t4, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff0 */ + sp_384_mont_sqr_n_avx2_6(t2, t1, 4, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc */ + sp_384_mont_mul_avx2_6(t1, t5, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000 */ + sp_384_mont_sqr_n_avx2_6(t2, t1, 62, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000001 */ + sp_384_mont_mul_avx2_6(t1, y, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc00000000000000040000000 */ + sp_384_mont_sqr_n_avx2_6(y, t1, 30, p384_mod, p384_mp_mod); + } + else +#endif + { + /* t2 = y ^ 0x2 */ + sp_384_mont_sqr_6(t2, y, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x3 */ + sp_384_mont_mul_6(t1, t2, y, p384_mod, p384_mp_mod); + /* t5 = y ^ 0xc */ + sp_384_mont_sqr_n_6(t5, t1, 2, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xf */ + sp_384_mont_mul_6(t1, t1, t5, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x1e */ + sp_384_mont_sqr_6(t2, t1, p384_mod, p384_mp_mod); + /* t3 = y ^ 0x1f */ + sp_384_mont_mul_6(t3, t2, y, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3e0 */ + sp_384_mont_sqr_n_6(t2, t3, 5, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x3ff */ + sp_384_mont_mul_6(t1, t3, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x7fe0 */ + sp_384_mont_sqr_n_6(t2, t1, 5, p384_mod, p384_mp_mod); + /* t3 = y ^ 0x7fff */ + sp_384_mont_mul_6(t3, t3, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3fff800 */ + sp_384_mont_sqr_n_6(t2, t3, 15, p384_mod, p384_mp_mod); + /* t4 = y ^ 0x3ffffff */ + sp_384_mont_mul_6(t4, t3, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0xffffffc000000 */ + sp_384_mont_sqr_n_6(t2, t4, 30, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xfffffffffffff */ + sp_384_mont_mul_6(t1, t4, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0xfffffffffffffff000000000000000 */ + sp_384_mont_sqr_n_6(t2, t1, 60, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xffffffffffffffffffffffffffffff */ + sp_384_mont_mul_6(t1, t1, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */ + sp_384_mont_sqr_n_6(t2, t1, 120, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_384_mont_mul_6(t1, t1, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */ + sp_384_mont_sqr_n_6(t2, t1, 15, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_384_mont_mul_6(t1, t3, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff80000000 */ + sp_384_mont_sqr_n_6(t2, t1, 31, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff */ + sp_384_mont_mul_6(t1, t4, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff0 */ + sp_384_mont_sqr_n_6(t2, t1, 4, p384_mod, p384_mp_mod); + /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc */ + sp_384_mont_mul_6(t1, t5, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000 */ + sp_384_mont_sqr_n_6(t2, t1, 62, p384_mod, p384_mp_mod); + /* t1 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000001 */ + sp_384_mont_mul_6(t1, y, t2, p384_mod, p384_mp_mod); + /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc00000000000000040000000 */ + sp_384_mont_sqr_n_6(y, t1, 30, p384_mod, p384_mp_mod); + } + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + + +/* Uncompress the point given the X ordinate. + * + * xm X ordinate. + * odd Whether the Y ordinate is odd. + * ym Calculated Y ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym) +{ +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit* d; +#else + sp_digit xd[2 * 6]; + sp_digit yd[2 * 6]; +#endif + sp_digit* x = NULL; + sp_digit* y = NULL; + int err = MP_OKAY; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 6, NULL, DYNAMIC_TYPE_ECC); + if (d == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + x = d + 0 * 6; + y = d + 2 * 6; +#else + x = xd; + y = yd; +#endif + + sp_384_from_mp(x, 6, xm); + err = sp_384_mod_mul_norm_6(x, x, p384_mod); + } + if (err == MP_OKAY) { + /* y = x^3 */ +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + sp_384_mont_sqr_avx2_6(y, x, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(y, y, x, p384_mod, p384_mp_mod); + } + else +#endif + { + sp_384_mont_sqr_6(y, x, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(y, y, x, p384_mod, p384_mp_mod); + } + /* y = x^3 - 3x */ + sp_384_mont_sub_6(y, y, x, p384_mod); + sp_384_mont_sub_6(y, y, x, p384_mod); + sp_384_mont_sub_6(y, y, x, p384_mod); + /* y = x^3 - 3x + b */ + err = sp_384_mod_mul_norm_6(x, p384_b, p384_mod); + } + if (err == MP_OKAY) { + sp_384_mont_add_6(y, y, x, p384_mod); + /* y = sqrt(x^3 - 3x + b) */ + err = sp_384_mont_sqrt_6(y); + } + if (err == MP_OKAY) { + XMEMSET(y + 6, 0, 6U * sizeof(sp_digit)); + sp_384_mont_reduce_6(y, p384_mod, p384_mp_mod); + if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) { + sp_384_mont_sub_6(y, p384_mod, y, p384_mod); + } + + err = sp_384_to_mp(y, ym); + } + +#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} +#endif +#endif /* WOLFSSL_SP_384 */ +#endif /* WOLFSSL_HAVE_SP_ECC */ +#endif /* WOLFSSL_SP_X86_64_ASM */ +#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_ECC */ diff --git a/client/wolfssl/wolfcrypt/src/sp_x86_64_asm.S b/client/wolfssl/wolfcrypt/src/sp_x86_64_asm.S new file mode 100644 index 0000000..c6941f1 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/sp_x86_64_asm.S @@ -0,0 +1,41830 @@ +/* sp_x86_64_asm + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#define HAVE_INTEL_AVX2 +#ifndef WOLFSSL_SP_NO_2048 +#ifndef WOLFSSL_SP_NO_2048 +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +#ifndef __APPLE__ +.globl sp_2048_from_bin +.type sp_2048_from_bin,@function +.align 16 +sp_2048_from_bin: +#else +.globl _sp_2048_from_bin +.p2align 4 +_sp_2048_from_bin: +#endif /* __APPLE__ */ + movq %rdx, %r9 + movq %rdi, %r10 + addq %rcx, %r9 + addq $256, %r10 + xorq %r11, %r11 + jmp L_2048_from_bin_64_end +L_2048_from_bin_64_start: + subq $64, %r9 + movbeq 56(%r9), %rax + movbeq 48(%r9), %r8 + movq %rax, (%rdi) + movq %r8, 8(%rdi) + movbeq 40(%r9), %rax + movbeq 32(%r9), %r8 + movq %rax, 16(%rdi) + movq %r8, 24(%rdi) + movbeq 24(%r9), %rax + movbeq 16(%r9), %r8 + movq %rax, 32(%rdi) + movq %r8, 40(%rdi) + movbeq 8(%r9), %rax + movbeq (%r9), %r8 + movq %rax, 48(%rdi) + movq %r8, 56(%rdi) + addq $64, %rdi + subq $64, %rcx +L_2048_from_bin_64_end: + cmpq $63, %rcx + jg L_2048_from_bin_64_start + jmp L_2048_from_bin_8_end +L_2048_from_bin_8_start: + subq $8, %r9 + movbeq (%r9), %rax + movq %rax, (%rdi) + addq $8, %rdi + subq $8, %rcx +L_2048_from_bin_8_end: + cmpq $7, %rcx + jg L_2048_from_bin_8_start + cmpq %r11, %rcx + je L_2048_from_bin_hi_end + movq %r11, %r8 + movq %r11, %rax +L_2048_from_bin_hi_start: + movb (%rdx), %al + shlq $8, %r8 + incq %rdx + addq %rax, %r8 + decq %rcx + jg L_2048_from_bin_hi_start + movq %r8, (%rdi) + addq $8, %rdi +L_2048_from_bin_hi_end: + cmpq %r10, %rdi + je L_2048_from_bin_zero_end +L_2048_from_bin_zero_start: + movq %r11, (%rdi) + addq $8, %rdi + cmpq %r10, %rdi + jl L_2048_from_bin_zero_start +L_2048_from_bin_zero_end: + repz retq +#ifndef __APPLE__ +.size sp_2048_from_bin,.-sp_2048_from_bin +#endif /* __APPLE__ */ +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 256 + * + * r A single precision integer. + * a Byte array. + */ +#ifndef __APPLE__ +.globl sp_2048_to_bin +.type sp_2048_to_bin,@function +.align 16 +sp_2048_to_bin: +#else +.globl _sp_2048_to_bin +.p2align 4 +_sp_2048_to_bin: +#endif /* __APPLE__ */ + movbeq 248(%rdi), %rdx + movbeq 240(%rdi), %rax + movq %rdx, (%rsi) + movq %rax, 8(%rsi) + movbeq 232(%rdi), %rdx + movbeq 224(%rdi), %rax + movq %rdx, 16(%rsi) + movq %rax, 24(%rsi) + movbeq 216(%rdi), %rdx + movbeq 208(%rdi), %rax + movq %rdx, 32(%rsi) + movq %rax, 40(%rsi) + movbeq 200(%rdi), %rdx + movbeq 192(%rdi), %rax + movq %rdx, 48(%rsi) + movq %rax, 56(%rsi) + movbeq 184(%rdi), %rdx + movbeq 176(%rdi), %rax + movq %rdx, 64(%rsi) + movq %rax, 72(%rsi) + movbeq 168(%rdi), %rdx + movbeq 160(%rdi), %rax + movq %rdx, 80(%rsi) + movq %rax, 88(%rsi) + movbeq 152(%rdi), %rdx + movbeq 144(%rdi), %rax + movq %rdx, 96(%rsi) + movq %rax, 104(%rsi) + movbeq 136(%rdi), %rdx + movbeq 128(%rdi), %rax + movq %rdx, 112(%rsi) + movq %rax, 120(%rsi) + movbeq 120(%rdi), %rdx + movbeq 112(%rdi), %rax + movq %rdx, 128(%rsi) + movq %rax, 136(%rsi) + movbeq 104(%rdi), %rdx + movbeq 96(%rdi), %rax + movq %rdx, 144(%rsi) + movq %rax, 152(%rsi) + movbeq 88(%rdi), %rdx + movbeq 80(%rdi), %rax + movq %rdx, 160(%rsi) + movq %rax, 168(%rsi) + movbeq 72(%rdi), %rdx + movbeq 64(%rdi), %rax + movq %rdx, 176(%rsi) + movq %rax, 184(%rsi) + movbeq 56(%rdi), %rdx + movbeq 48(%rdi), %rax + movq %rdx, 192(%rsi) + movq %rax, 200(%rsi) + movbeq 40(%rdi), %rdx + movbeq 32(%rdi), %rax + movq %rdx, 208(%rsi) + movq %rax, 216(%rsi) + movbeq 24(%rdi), %rdx + movbeq 16(%rdi), %rax + movq %rdx, 224(%rsi) + movq %rax, 232(%rsi) + movbeq 8(%rdi), %rdx + movbeq (%rdi), %rax + movq %rdx, 240(%rsi) + movq %rax, 248(%rsi) + repz retq +#ifndef __APPLE__ +.size sp_2048_to_bin,.-sp_2048_to_bin +#endif /* __APPLE__ */ +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_2048_mul_16 +.type sp_2048_mul_16,@function +.align 16 +sp_2048_mul_16: +#else +.globl _sp_2048_mul_16 +.p2align 4 +_sp_2048_mul_16: +#endif /* __APPLE__ */ + movq %rdx, %rcx + subq $128, %rsp + # A[0] * B[0] + movq (%rcx), %rax + mulq (%rsi) + xorq %r10, %r10 + movq %rax, (%rsp) + movq %rdx, %r9 + # A[0] * B[1] + movq 8(%rcx), %rax + mulq (%rsi) + xorq %r8, %r8 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[1] * B[0] + movq (%rcx), %rax + mulq 8(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + movq %r9, 8(%rsp) + # A[0] * B[2] + movq 16(%rcx), %rax + mulq (%rsi) + xorq %r9, %r9 + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[1] * B[1] + movq 8(%rcx), %rax + mulq 8(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[2] * B[0] + movq (%rcx), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + movq %r10, 16(%rsp) + # A[0] * B[3] + movq 24(%rcx), %rax + mulq (%rsi) + xorq %r10, %r10 + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[1] * B[2] + movq 16(%rcx), %rax + mulq 8(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[2] * B[1] + movq 8(%rcx), %rax + mulq 16(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[3] * B[0] + movq (%rcx), %rax + mulq 24(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + movq %r8, 24(%rsp) + # A[0] * B[4] + movq 32(%rcx), %rax + mulq (%rsi) + xorq %r8, %r8 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[1] * B[3] + movq 24(%rcx), %rax + mulq 8(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[2] * B[2] + movq 16(%rcx), %rax + mulq 16(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[3] * B[1] + movq 8(%rcx), %rax + mulq 24(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[4] * B[0] + movq (%rcx), %rax + mulq 32(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + movq %r9, 32(%rsp) + # A[0] * B[5] + movq 40(%rcx), %rax + mulq (%rsi) + xorq %r9, %r9 + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[1] * B[4] + movq 32(%rcx), %rax + mulq 8(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[2] * B[3] + movq 24(%rcx), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[3] * B[2] + movq 16(%rcx), %rax + mulq 24(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[4] * B[1] + movq 8(%rcx), %rax + mulq 32(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[5] * B[0] + movq (%rcx), %rax + mulq 40(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + movq %r10, 40(%rsp) + # A[0] * B[6] + movq 48(%rcx), %rax + mulq (%rsi) + xorq %r10, %r10 + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[1] * B[5] + movq 40(%rcx), %rax + mulq 8(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[2] * B[4] + movq 32(%rcx), %rax + mulq 16(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[3] * B[3] + movq 24(%rcx), %rax + mulq 24(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[4] * B[2] + movq 16(%rcx), %rax + mulq 32(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[5] * B[1] + movq 8(%rcx), %rax + mulq 40(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[6] * B[0] + movq (%rcx), %rax + mulq 48(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + movq %r8, 48(%rsp) + # A[0] * B[7] + movq 56(%rcx), %rax + mulq (%rsi) + xorq %r8, %r8 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[1] * B[6] + movq 48(%rcx), %rax + mulq 8(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[2] * B[5] + movq 40(%rcx), %rax + mulq 16(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[3] * B[4] + movq 32(%rcx), %rax + mulq 24(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[4] * B[3] + movq 24(%rcx), %rax + mulq 32(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[5] * B[2] + movq 16(%rcx), %rax + mulq 40(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[6] * B[1] + movq 8(%rcx), %rax + mulq 48(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[7] * B[0] + movq (%rcx), %rax + mulq 56(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + movq %r9, 56(%rsp) + # A[0] * B[8] + movq 64(%rcx), %rax + mulq (%rsi) + xorq %r9, %r9 + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[1] * B[7] + movq 56(%rcx), %rax + mulq 8(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[2] * B[6] + movq 48(%rcx), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[3] * B[5] + movq 40(%rcx), %rax + mulq 24(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[4] * B[4] + movq 32(%rcx), %rax + mulq 32(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[5] * B[3] + movq 24(%rcx), %rax + mulq 40(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[6] * B[2] + movq 16(%rcx), %rax + mulq 48(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[7] * B[1] + movq 8(%rcx), %rax + mulq 56(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[8] * B[0] + movq (%rcx), %rax + mulq 64(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + movq %r10, 64(%rsp) + # A[0] * B[9] + movq 72(%rcx), %rax + mulq (%rsi) + xorq %r10, %r10 + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[1] * B[8] + movq 64(%rcx), %rax + mulq 8(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[2] * B[7] + movq 56(%rcx), %rax + mulq 16(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[3] * B[6] + movq 48(%rcx), %rax + mulq 24(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[4] * B[5] + movq 40(%rcx), %rax + mulq 32(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[5] * B[4] + movq 32(%rcx), %rax + mulq 40(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[6] * B[3] + movq 24(%rcx), %rax + mulq 48(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[7] * B[2] + movq 16(%rcx), %rax + mulq 56(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[8] * B[1] + movq 8(%rcx), %rax + mulq 64(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[9] * B[0] + movq (%rcx), %rax + mulq 72(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + movq %r8, 72(%rsp) + # A[0] * B[10] + movq 80(%rcx), %rax + mulq (%rsi) + xorq %r8, %r8 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[1] * B[9] + movq 72(%rcx), %rax + mulq 8(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[2] * B[8] + movq 64(%rcx), %rax + mulq 16(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[3] * B[7] + movq 56(%rcx), %rax + mulq 24(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[4] * B[6] + movq 48(%rcx), %rax + mulq 32(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[5] * B[5] + movq 40(%rcx), %rax + mulq 40(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[6] * B[4] + movq 32(%rcx), %rax + mulq 48(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[7] * B[3] + movq 24(%rcx), %rax + mulq 56(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[8] * B[2] + movq 16(%rcx), %rax + mulq 64(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[9] * B[1] + movq 8(%rcx), %rax + mulq 72(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[10] * B[0] + movq (%rcx), %rax + mulq 80(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + movq %r9, 80(%rsp) + # A[0] * B[11] + movq 88(%rcx), %rax + mulq (%rsi) + xorq %r9, %r9 + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[1] * B[10] + movq 80(%rcx), %rax + mulq 8(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[2] * B[9] + movq 72(%rcx), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[3] * B[8] + movq 64(%rcx), %rax + mulq 24(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[4] * B[7] + movq 56(%rcx), %rax + mulq 32(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[5] * B[6] + movq 48(%rcx), %rax + mulq 40(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[6] * B[5] + movq 40(%rcx), %rax + mulq 48(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[7] * B[4] + movq 32(%rcx), %rax + mulq 56(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[8] * B[3] + movq 24(%rcx), %rax + mulq 64(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[9] * B[2] + movq 16(%rcx), %rax + mulq 72(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[10] * B[1] + movq 8(%rcx), %rax + mulq 80(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[11] * B[0] + movq (%rcx), %rax + mulq 88(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + movq %r10, 88(%rsp) + # A[0] * B[12] + movq 96(%rcx), %rax + mulq (%rsi) + xorq %r10, %r10 + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[1] * B[11] + movq 88(%rcx), %rax + mulq 8(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[2] * B[10] + movq 80(%rcx), %rax + mulq 16(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[3] * B[9] + movq 72(%rcx), %rax + mulq 24(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[4] * B[8] + movq 64(%rcx), %rax + mulq 32(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[5] * B[7] + movq 56(%rcx), %rax + mulq 40(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[6] * B[6] + movq 48(%rcx), %rax + mulq 48(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[7] * B[5] + movq 40(%rcx), %rax + mulq 56(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[8] * B[4] + movq 32(%rcx), %rax + mulq 64(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[9] * B[3] + movq 24(%rcx), %rax + mulq 72(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[10] * B[2] + movq 16(%rcx), %rax + mulq 80(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[11] * B[1] + movq 8(%rcx), %rax + mulq 88(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[12] * B[0] + movq (%rcx), %rax + mulq 96(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + movq %r8, 96(%rsp) + # A[0] * B[13] + movq 104(%rcx), %rax + mulq (%rsi) + xorq %r8, %r8 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[1] * B[12] + movq 96(%rcx), %rax + mulq 8(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[2] * B[11] + movq 88(%rcx), %rax + mulq 16(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[3] * B[10] + movq 80(%rcx), %rax + mulq 24(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[4] * B[9] + movq 72(%rcx), %rax + mulq 32(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[5] * B[8] + movq 64(%rcx), %rax + mulq 40(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[6] * B[7] + movq 56(%rcx), %rax + mulq 48(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[7] * B[6] + movq 48(%rcx), %rax + mulq 56(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[8] * B[5] + movq 40(%rcx), %rax + mulq 64(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[9] * B[4] + movq 32(%rcx), %rax + mulq 72(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[10] * B[3] + movq 24(%rcx), %rax + mulq 80(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[11] * B[2] + movq 16(%rcx), %rax + mulq 88(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[12] * B[1] + movq 8(%rcx), %rax + mulq 96(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[13] * B[0] + movq (%rcx), %rax + mulq 104(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + movq %r9, 104(%rsp) + # A[0] * B[14] + movq 112(%rcx), %rax + mulq (%rsi) + xorq %r9, %r9 + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[1] * B[13] + movq 104(%rcx), %rax + mulq 8(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[2] * B[12] + movq 96(%rcx), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[3] * B[11] + movq 88(%rcx), %rax + mulq 24(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[4] * B[10] + movq 80(%rcx), %rax + mulq 32(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[5] * B[9] + movq 72(%rcx), %rax + mulq 40(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[6] * B[8] + movq 64(%rcx), %rax + mulq 48(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[7] * B[7] + movq 56(%rcx), %rax + mulq 56(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[8] * B[6] + movq 48(%rcx), %rax + mulq 64(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[9] * B[5] + movq 40(%rcx), %rax + mulq 72(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[10] * B[4] + movq 32(%rcx), %rax + mulq 80(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[11] * B[3] + movq 24(%rcx), %rax + mulq 88(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[12] * B[2] + movq 16(%rcx), %rax + mulq 96(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[13] * B[1] + movq 8(%rcx), %rax + mulq 104(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[14] * B[0] + movq (%rcx), %rax + mulq 112(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + movq %r10, 112(%rsp) + # A[0] * B[15] + movq 120(%rcx), %rax + mulq (%rsi) + xorq %r10, %r10 + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[1] * B[14] + movq 112(%rcx), %rax + mulq 8(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[2] * B[13] + movq 104(%rcx), %rax + mulq 16(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[3] * B[12] + movq 96(%rcx), %rax + mulq 24(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[4] * B[11] + movq 88(%rcx), %rax + mulq 32(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[5] * B[10] + movq 80(%rcx), %rax + mulq 40(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[6] * B[9] + movq 72(%rcx), %rax + mulq 48(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[7] * B[8] + movq 64(%rcx), %rax + mulq 56(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[8] * B[7] + movq 56(%rcx), %rax + mulq 64(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[9] * B[6] + movq 48(%rcx), %rax + mulq 72(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[10] * B[5] + movq 40(%rcx), %rax + mulq 80(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[11] * B[4] + movq 32(%rcx), %rax + mulq 88(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[12] * B[3] + movq 24(%rcx), %rax + mulq 96(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[13] * B[2] + movq 16(%rcx), %rax + mulq 104(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[14] * B[1] + movq 8(%rcx), %rax + mulq 112(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[15] * B[0] + movq (%rcx), %rax + mulq 120(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + movq %r8, 120(%rsp) + # A[1] * B[15] + movq 120(%rcx), %rax + mulq 8(%rsi) + xorq %r8, %r8 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[2] * B[14] + movq 112(%rcx), %rax + mulq 16(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[3] * B[13] + movq 104(%rcx), %rax + mulq 24(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[4] * B[12] + movq 96(%rcx), %rax + mulq 32(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[5] * B[11] + movq 88(%rcx), %rax + mulq 40(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[6] * B[10] + movq 80(%rcx), %rax + mulq 48(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[7] * B[9] + movq 72(%rcx), %rax + mulq 56(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[8] * B[8] + movq 64(%rcx), %rax + mulq 64(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[9] * B[7] + movq 56(%rcx), %rax + mulq 72(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[10] * B[6] + movq 48(%rcx), %rax + mulq 80(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[11] * B[5] + movq 40(%rcx), %rax + mulq 88(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[12] * B[4] + movq 32(%rcx), %rax + mulq 96(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[13] * B[3] + movq 24(%rcx), %rax + mulq 104(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[14] * B[2] + movq 16(%rcx), %rax + mulq 112(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[15] * B[1] + movq 8(%rcx), %rax + mulq 120(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + movq %r9, 128(%rdi) + # A[2] * B[15] + movq 120(%rcx), %rax + mulq 16(%rsi) + xorq %r9, %r9 + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[3] * B[14] + movq 112(%rcx), %rax + mulq 24(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[4] * B[13] + movq 104(%rcx), %rax + mulq 32(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[5] * B[12] + movq 96(%rcx), %rax + mulq 40(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[6] * B[11] + movq 88(%rcx), %rax + mulq 48(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[7] * B[10] + movq 80(%rcx), %rax + mulq 56(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[8] * B[9] + movq 72(%rcx), %rax + mulq 64(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[9] * B[8] + movq 64(%rcx), %rax + mulq 72(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[10] * B[7] + movq 56(%rcx), %rax + mulq 80(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[11] * B[6] + movq 48(%rcx), %rax + mulq 88(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[12] * B[5] + movq 40(%rcx), %rax + mulq 96(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[13] * B[4] + movq 32(%rcx), %rax + mulq 104(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[14] * B[3] + movq 24(%rcx), %rax + mulq 112(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[15] * B[2] + movq 16(%rcx), %rax + mulq 120(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + movq %r10, 136(%rdi) + # A[3] * B[15] + movq 120(%rcx), %rax + mulq 24(%rsi) + xorq %r10, %r10 + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[4] * B[14] + movq 112(%rcx), %rax + mulq 32(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[5] * B[13] + movq 104(%rcx), %rax + mulq 40(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[6] * B[12] + movq 96(%rcx), %rax + mulq 48(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[7] * B[11] + movq 88(%rcx), %rax + mulq 56(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[8] * B[10] + movq 80(%rcx), %rax + mulq 64(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[9] * B[9] + movq 72(%rcx), %rax + mulq 72(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[10] * B[8] + movq 64(%rcx), %rax + mulq 80(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[11] * B[7] + movq 56(%rcx), %rax + mulq 88(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[12] * B[6] + movq 48(%rcx), %rax + mulq 96(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[13] * B[5] + movq 40(%rcx), %rax + mulq 104(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[14] * B[4] + movq 32(%rcx), %rax + mulq 112(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[15] * B[3] + movq 24(%rcx), %rax + mulq 120(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + movq %r8, 144(%rdi) + # A[4] * B[15] + movq 120(%rcx), %rax + mulq 32(%rsi) + xorq %r8, %r8 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[5] * B[14] + movq 112(%rcx), %rax + mulq 40(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[6] * B[13] + movq 104(%rcx), %rax + mulq 48(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[7] * B[12] + movq 96(%rcx), %rax + mulq 56(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[8] * B[11] + movq 88(%rcx), %rax + mulq 64(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[9] * B[10] + movq 80(%rcx), %rax + mulq 72(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[10] * B[9] + movq 72(%rcx), %rax + mulq 80(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[11] * B[8] + movq 64(%rcx), %rax + mulq 88(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[12] * B[7] + movq 56(%rcx), %rax + mulq 96(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[13] * B[6] + movq 48(%rcx), %rax + mulq 104(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[14] * B[5] + movq 40(%rcx), %rax + mulq 112(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[15] * B[4] + movq 32(%rcx), %rax + mulq 120(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + movq %r9, 152(%rdi) + # A[5] * B[15] + movq 120(%rcx), %rax + mulq 40(%rsi) + xorq %r9, %r9 + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[6] * B[14] + movq 112(%rcx), %rax + mulq 48(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[7] * B[13] + movq 104(%rcx), %rax + mulq 56(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[8] * B[12] + movq 96(%rcx), %rax + mulq 64(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[9] * B[11] + movq 88(%rcx), %rax + mulq 72(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[10] * B[10] + movq 80(%rcx), %rax + mulq 80(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[11] * B[9] + movq 72(%rcx), %rax + mulq 88(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[12] * B[8] + movq 64(%rcx), %rax + mulq 96(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[13] * B[7] + movq 56(%rcx), %rax + mulq 104(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[14] * B[6] + movq 48(%rcx), %rax + mulq 112(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[15] * B[5] + movq 40(%rcx), %rax + mulq 120(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + movq %r10, 160(%rdi) + # A[6] * B[15] + movq 120(%rcx), %rax + mulq 48(%rsi) + xorq %r10, %r10 + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[7] * B[14] + movq 112(%rcx), %rax + mulq 56(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[8] * B[13] + movq 104(%rcx), %rax + mulq 64(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[9] * B[12] + movq 96(%rcx), %rax + mulq 72(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[10] * B[11] + movq 88(%rcx), %rax + mulq 80(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[11] * B[10] + movq 80(%rcx), %rax + mulq 88(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[12] * B[9] + movq 72(%rcx), %rax + mulq 96(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[13] * B[8] + movq 64(%rcx), %rax + mulq 104(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[14] * B[7] + movq 56(%rcx), %rax + mulq 112(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[15] * B[6] + movq 48(%rcx), %rax + mulq 120(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + movq %r8, 168(%rdi) + # A[7] * B[15] + movq 120(%rcx), %rax + mulq 56(%rsi) + xorq %r8, %r8 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[8] * B[14] + movq 112(%rcx), %rax + mulq 64(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[9] * B[13] + movq 104(%rcx), %rax + mulq 72(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[10] * B[12] + movq 96(%rcx), %rax + mulq 80(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[11] * B[11] + movq 88(%rcx), %rax + mulq 88(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[12] * B[10] + movq 80(%rcx), %rax + mulq 96(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[13] * B[9] + movq 72(%rcx), %rax + mulq 104(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[14] * B[8] + movq 64(%rcx), %rax + mulq 112(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[15] * B[7] + movq 56(%rcx), %rax + mulq 120(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + movq %r9, 176(%rdi) + # A[8] * B[15] + movq 120(%rcx), %rax + mulq 64(%rsi) + xorq %r9, %r9 + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[9] * B[14] + movq 112(%rcx), %rax + mulq 72(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[10] * B[13] + movq 104(%rcx), %rax + mulq 80(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[11] * B[12] + movq 96(%rcx), %rax + mulq 88(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[12] * B[11] + movq 88(%rcx), %rax + mulq 96(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[13] * B[10] + movq 80(%rcx), %rax + mulq 104(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[14] * B[9] + movq 72(%rcx), %rax + mulq 112(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[15] * B[8] + movq 64(%rcx), %rax + mulq 120(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + movq %r10, 184(%rdi) + # A[9] * B[15] + movq 120(%rcx), %rax + mulq 72(%rsi) + xorq %r10, %r10 + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[10] * B[14] + movq 112(%rcx), %rax + mulq 80(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[11] * B[13] + movq 104(%rcx), %rax + mulq 88(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[12] * B[12] + movq 96(%rcx), %rax + mulq 96(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[13] * B[11] + movq 88(%rcx), %rax + mulq 104(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[14] * B[10] + movq 80(%rcx), %rax + mulq 112(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[15] * B[9] + movq 72(%rcx), %rax + mulq 120(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + movq %r8, 192(%rdi) + # A[10] * B[15] + movq 120(%rcx), %rax + mulq 80(%rsi) + xorq %r8, %r8 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[11] * B[14] + movq 112(%rcx), %rax + mulq 88(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[12] * B[13] + movq 104(%rcx), %rax + mulq 96(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[13] * B[12] + movq 96(%rcx), %rax + mulq 104(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[14] * B[11] + movq 88(%rcx), %rax + mulq 112(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[15] * B[10] + movq 80(%rcx), %rax + mulq 120(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + movq %r9, 200(%rdi) + # A[11] * B[15] + movq 120(%rcx), %rax + mulq 88(%rsi) + xorq %r9, %r9 + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[12] * B[14] + movq 112(%rcx), %rax + mulq 96(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[13] * B[13] + movq 104(%rcx), %rax + mulq 104(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[14] * B[12] + movq 96(%rcx), %rax + mulq 112(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[15] * B[11] + movq 88(%rcx), %rax + mulq 120(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + movq %r10, 208(%rdi) + # A[12] * B[15] + movq 120(%rcx), %rax + mulq 96(%rsi) + xorq %r10, %r10 + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[13] * B[14] + movq 112(%rcx), %rax + mulq 104(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[14] * B[13] + movq 104(%rcx), %rax + mulq 112(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[15] * B[12] + movq 96(%rcx), %rax + mulq 120(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + movq %r8, 216(%rdi) + # A[13] * B[15] + movq 120(%rcx), %rax + mulq 104(%rsi) + xorq %r8, %r8 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[14] * B[14] + movq 112(%rcx), %rax + mulq 112(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[15] * B[13] + movq 104(%rcx), %rax + mulq 120(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + movq %r9, 224(%rdi) + # A[14] * B[15] + movq 120(%rcx), %rax + mulq 112(%rsi) + xorq %r9, %r9 + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[15] * B[14] + movq 112(%rcx), %rax + mulq 120(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + movq %r10, 232(%rdi) + # A[15] * B[15] + movq 120(%rcx), %rax + mulq 120(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + movq %r8, 240(%rdi) + movq %r9, 248(%rdi) + movq (%rsp), %rax + movq 8(%rsp), %rdx + movq 16(%rsp), %r8 + movq 24(%rsp), %r9 + movq %rax, (%rdi) + movq %rdx, 8(%rdi) + movq %r8, 16(%rdi) + movq %r9, 24(%rdi) + movq 32(%rsp), %rax + movq 40(%rsp), %rdx + movq 48(%rsp), %r8 + movq 56(%rsp), %r9 + movq %rax, 32(%rdi) + movq %rdx, 40(%rdi) + movq %r8, 48(%rdi) + movq %r9, 56(%rdi) + movq 64(%rsp), %rax + movq 72(%rsp), %rdx + movq 80(%rsp), %r8 + movq 88(%rsp), %r9 + movq %rax, 64(%rdi) + movq %rdx, 72(%rdi) + movq %r8, 80(%rdi) + movq %r9, 88(%rdi) + movq 96(%rsp), %rax + movq 104(%rsp), %rdx + movq 112(%rsp), %r8 + movq 120(%rsp), %r9 + movq %rax, 96(%rdi) + movq %rdx, 104(%rdi) + movq %r8, 112(%rdi) + movq %r9, 120(%rdi) + addq $128, %rsp + repz retq +#ifndef __APPLE__ +.size sp_2048_mul_16,.-sp_2048_mul_16 +#endif /* __APPLE__ */ +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_2048_sqr_16 +.type sp_2048_sqr_16,@function +.align 16 +sp_2048_sqr_16: +#else +.globl _sp_2048_sqr_16 +.p2align 4 +_sp_2048_sqr_16: +#endif /* __APPLE__ */ + push %r12 + subq $128, %rsp + # A[0] * A[0] + movq (%rsi), %rax + mulq %rax + xorq %r9, %r9 + movq %rax, (%rsp) + movq %rdx, %r8 + # A[0] * A[1] + movq 8(%rsi), %rax + mulq (%rsi) + xorq %rcx, %rcx + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + movq %r8, 8(%rsp) + # A[0] * A[2] + movq 16(%rsi), %rax + mulq (%rsi) + xorq %r8, %r8 + addq %rax, %r9 + adcq %rdx, %rcx + adcq $0, %r8 + addq %rax, %r9 + adcq %rdx, %rcx + adcq $0, %r8 + # A[1] * A[1] + movq 8(%rsi), %rax + mulq %rax + addq %rax, %r9 + adcq %rdx, %rcx + adcq $0, %r8 + movq %r9, 16(%rsp) + # A[0] * A[3] + movq 24(%rsi), %rax + mulq (%rsi) + xorq %r9, %r9 + addq %rax, %rcx + adcq %rdx, %r8 + adcq $0, %r9 + addq %rax, %rcx + adcq %rdx, %r8 + adcq $0, %r9 + # A[1] * A[2] + movq 16(%rsi), %rax + mulq 8(%rsi) + addq %rax, %rcx + adcq %rdx, %r8 + adcq $0, %r9 + addq %rax, %rcx + adcq %rdx, %r8 + adcq $0, %r9 + movq %rcx, 24(%rsp) + # A[0] * A[4] + movq 32(%rsi), %rax + mulq (%rsi) + xorq %rcx, %rcx + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + # A[1] * A[3] + movq 24(%rsi), %rax + mulq 8(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + # A[2] * A[2] + movq 16(%rsi), %rax + mulq %rax + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + movq %r8, 32(%rsp) + # A[0] * A[5] + movq 40(%rsi), %rax + mulq (%rsi) + xorq %r8, %r8 + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[1] * A[4] + movq 32(%rsi), %rax + mulq 8(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[2] * A[3] + movq 24(%rsi), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %r10, %r9 + adcq %r11, %rcx + adcq %r12, %r8 + movq %r9, 40(%rsp) + # A[0] * A[6] + movq 48(%rsi), %rax + mulq (%rsi) + xorq %r9, %r9 + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[1] * A[5] + movq 40(%rsi), %rax + mulq 8(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[2] * A[4] + movq 32(%rsi), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[3] * A[3] + movq 24(%rsi), %rax + mulq %rax + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %rcx + adcq %r11, %r8 + adcq %r12, %r9 + movq %rcx, 48(%rsp) + # A[0] * A[7] + movq 56(%rsi), %rax + mulq (%rsi) + xorq %rcx, %rcx + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[1] * A[6] + movq 48(%rsi), %rax + mulq 8(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[2] * A[5] + movq 40(%rsi), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[3] * A[4] + movq 32(%rsi), %rax + mulq 24(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %r10, %r8 + adcq %r11, %r9 + adcq %r12, %rcx + movq %r8, 56(%rsp) + # A[0] * A[8] + movq 64(%rsi), %rax + mulq (%rsi) + xorq %r8, %r8 + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[1] * A[7] + movq 56(%rsi), %rax + mulq 8(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[2] * A[6] + movq 48(%rsi), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[3] * A[5] + movq 40(%rsi), %rax + mulq 24(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[4] * A[4] + movq 32(%rsi), %rax + mulq %rax + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %r9 + adcq %r11, %rcx + adcq %r12, %r8 + movq %r9, 64(%rsp) + # A[0] * A[9] + movq 72(%rsi), %rax + mulq (%rsi) + xorq %r9, %r9 + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[1] * A[8] + movq 64(%rsi), %rax + mulq 8(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[2] * A[7] + movq 56(%rsi), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[3] * A[6] + movq 48(%rsi), %rax + mulq 24(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[4] * A[5] + movq 40(%rsi), %rax + mulq 32(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %r10, %rcx + adcq %r11, %r8 + adcq %r12, %r9 + movq %rcx, 72(%rsp) + # A[0] * A[10] + movq 80(%rsi), %rax + mulq (%rsi) + xorq %rcx, %rcx + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[1] * A[9] + movq 72(%rsi), %rax + mulq 8(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[2] * A[8] + movq 64(%rsi), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[3] * A[7] + movq 56(%rsi), %rax + mulq 24(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[4] * A[6] + movq 48(%rsi), %rax + mulq 32(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[5] * A[5] + movq 40(%rsi), %rax + mulq %rax + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %r8 + adcq %r11, %r9 + adcq %r12, %rcx + movq %r8, 80(%rsp) + # A[0] * A[11] + movq 88(%rsi), %rax + mulq (%rsi) + xorq %r8, %r8 + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[1] * A[10] + movq 80(%rsi), %rax + mulq 8(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[2] * A[9] + movq 72(%rsi), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[3] * A[8] + movq 64(%rsi), %rax + mulq 24(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[4] * A[7] + movq 56(%rsi), %rax + mulq 32(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[5] * A[6] + movq 48(%rsi), %rax + mulq 40(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %r10, %r9 + adcq %r11, %rcx + adcq %r12, %r8 + movq %r9, 88(%rsp) + # A[0] * A[12] + movq 96(%rsi), %rax + mulq (%rsi) + xorq %r9, %r9 + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[1] * A[11] + movq 88(%rsi), %rax + mulq 8(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[2] * A[10] + movq 80(%rsi), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[3] * A[9] + movq 72(%rsi), %rax + mulq 24(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[4] * A[8] + movq 64(%rsi), %rax + mulq 32(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[5] * A[7] + movq 56(%rsi), %rax + mulq 40(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[6] * A[6] + movq 48(%rsi), %rax + mulq %rax + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %rcx + adcq %r11, %r8 + adcq %r12, %r9 + movq %rcx, 96(%rsp) + # A[0] * A[13] + movq 104(%rsi), %rax + mulq (%rsi) + xorq %rcx, %rcx + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[1] * A[12] + movq 96(%rsi), %rax + mulq 8(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[2] * A[11] + movq 88(%rsi), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[3] * A[10] + movq 80(%rsi), %rax + mulq 24(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[4] * A[9] + movq 72(%rsi), %rax + mulq 32(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[5] * A[8] + movq 64(%rsi), %rax + mulq 40(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[6] * A[7] + movq 56(%rsi), %rax + mulq 48(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %r10, %r8 + adcq %r11, %r9 + adcq %r12, %rcx + movq %r8, 104(%rsp) + # A[0] * A[14] + movq 112(%rsi), %rax + mulq (%rsi) + xorq %r8, %r8 + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[1] * A[13] + movq 104(%rsi), %rax + mulq 8(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[2] * A[12] + movq 96(%rsi), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[3] * A[11] + movq 88(%rsi), %rax + mulq 24(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[4] * A[10] + movq 80(%rsi), %rax + mulq 32(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[5] * A[9] + movq 72(%rsi), %rax + mulq 40(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[6] * A[8] + movq 64(%rsi), %rax + mulq 48(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[7] * A[7] + movq 56(%rsi), %rax + mulq %rax + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %r9 + adcq %r11, %rcx + adcq %r12, %r8 + movq %r9, 112(%rsp) + # A[0] * A[15] + movq 120(%rsi), %rax + mulq (%rsi) + xorq %r9, %r9 + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[1] * A[14] + movq 112(%rsi), %rax + mulq 8(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[2] * A[13] + movq 104(%rsi), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[3] * A[12] + movq 96(%rsi), %rax + mulq 24(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[4] * A[11] + movq 88(%rsi), %rax + mulq 32(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[5] * A[10] + movq 80(%rsi), %rax + mulq 40(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[6] * A[9] + movq 72(%rsi), %rax + mulq 48(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[7] * A[8] + movq 64(%rsi), %rax + mulq 56(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %r10, %rcx + adcq %r11, %r8 + adcq %r12, %r9 + movq %rcx, 120(%rsp) + # A[1] * A[15] + movq 120(%rsi), %rax + mulq 8(%rsi) + xorq %rcx, %rcx + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[2] * A[14] + movq 112(%rsi), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[3] * A[13] + movq 104(%rsi), %rax + mulq 24(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[4] * A[12] + movq 96(%rsi), %rax + mulq 32(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[5] * A[11] + movq 88(%rsi), %rax + mulq 40(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[6] * A[10] + movq 80(%rsi), %rax + mulq 48(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[7] * A[9] + movq 72(%rsi), %rax + mulq 56(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[8] * A[8] + movq 64(%rsi), %rax + mulq %rax + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %r8 + adcq %r11, %r9 + adcq %r12, %rcx + movq %r8, 128(%rdi) + # A[2] * A[15] + movq 120(%rsi), %rax + mulq 16(%rsi) + xorq %r8, %r8 + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[3] * A[14] + movq 112(%rsi), %rax + mulq 24(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[4] * A[13] + movq 104(%rsi), %rax + mulq 32(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[5] * A[12] + movq 96(%rsi), %rax + mulq 40(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[6] * A[11] + movq 88(%rsi), %rax + mulq 48(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[7] * A[10] + movq 80(%rsi), %rax + mulq 56(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[8] * A[9] + movq 72(%rsi), %rax + mulq 64(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %r10, %r9 + adcq %r11, %rcx + adcq %r12, %r8 + movq %r9, 136(%rdi) + # A[3] * A[15] + movq 120(%rsi), %rax + mulq 24(%rsi) + xorq %r9, %r9 + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[4] * A[14] + movq 112(%rsi), %rax + mulq 32(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[5] * A[13] + movq 104(%rsi), %rax + mulq 40(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[6] * A[12] + movq 96(%rsi), %rax + mulq 48(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[7] * A[11] + movq 88(%rsi), %rax + mulq 56(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[8] * A[10] + movq 80(%rsi), %rax + mulq 64(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[9] * A[9] + movq 72(%rsi), %rax + mulq %rax + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %rcx + adcq %r11, %r8 + adcq %r12, %r9 + movq %rcx, 144(%rdi) + # A[4] * A[15] + movq 120(%rsi), %rax + mulq 32(%rsi) + xorq %rcx, %rcx + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[5] * A[14] + movq 112(%rsi), %rax + mulq 40(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[6] * A[13] + movq 104(%rsi), %rax + mulq 48(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[7] * A[12] + movq 96(%rsi), %rax + mulq 56(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[8] * A[11] + movq 88(%rsi), %rax + mulq 64(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[9] * A[10] + movq 80(%rsi), %rax + mulq 72(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %r10, %r8 + adcq %r11, %r9 + adcq %r12, %rcx + movq %r8, 152(%rdi) + # A[5] * A[15] + movq 120(%rsi), %rax + mulq 40(%rsi) + xorq %r8, %r8 + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[6] * A[14] + movq 112(%rsi), %rax + mulq 48(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[7] * A[13] + movq 104(%rsi), %rax + mulq 56(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[8] * A[12] + movq 96(%rsi), %rax + mulq 64(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[9] * A[11] + movq 88(%rsi), %rax + mulq 72(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[10] * A[10] + movq 80(%rsi), %rax + mulq %rax + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %r9 + adcq %r11, %rcx + adcq %r12, %r8 + movq %r9, 160(%rdi) + # A[6] * A[15] + movq 120(%rsi), %rax + mulq 48(%rsi) + xorq %r9, %r9 + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[7] * A[14] + movq 112(%rsi), %rax + mulq 56(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[8] * A[13] + movq 104(%rsi), %rax + mulq 64(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[9] * A[12] + movq 96(%rsi), %rax + mulq 72(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[10] * A[11] + movq 88(%rsi), %rax + mulq 80(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %r10, %rcx + adcq %r11, %r8 + adcq %r12, %r9 + movq %rcx, 168(%rdi) + # A[7] * A[15] + movq 120(%rsi), %rax + mulq 56(%rsi) + xorq %rcx, %rcx + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[8] * A[14] + movq 112(%rsi), %rax + mulq 64(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[9] * A[13] + movq 104(%rsi), %rax + mulq 72(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[10] * A[12] + movq 96(%rsi), %rax + mulq 80(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[11] * A[11] + movq 88(%rsi), %rax + mulq %rax + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %r8 + adcq %r11, %r9 + adcq %r12, %rcx + movq %r8, 176(%rdi) + # A[8] * A[15] + movq 120(%rsi), %rax + mulq 64(%rsi) + xorq %r8, %r8 + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[9] * A[14] + movq 112(%rsi), %rax + mulq 72(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[10] * A[13] + movq 104(%rsi), %rax + mulq 80(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[11] * A[12] + movq 96(%rsi), %rax + mulq 88(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %r10, %r9 + adcq %r11, %rcx + adcq %r12, %r8 + movq %r9, 184(%rdi) + # A[9] * A[15] + movq 120(%rsi), %rax + mulq 72(%rsi) + xorq %r9, %r9 + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[10] * A[14] + movq 112(%rsi), %rax + mulq 80(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[11] * A[13] + movq 104(%rsi), %rax + mulq 88(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[12] * A[12] + movq 96(%rsi), %rax + mulq %rax + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %rcx + adcq %r11, %r8 + adcq %r12, %r9 + movq %rcx, 192(%rdi) + # A[10] * A[15] + movq 120(%rsi), %rax + mulq 80(%rsi) + xorq %rcx, %rcx + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[11] * A[14] + movq 112(%rsi), %rax + mulq 88(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[12] * A[13] + movq 104(%rsi), %rax + mulq 96(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %r10, %r8 + adcq %r11, %r9 + adcq %r12, %rcx + movq %r8, 200(%rdi) + # A[11] * A[15] + movq 120(%rsi), %rax + mulq 88(%rsi) + xorq %r8, %r8 + addq %rax, %r9 + adcq %rdx, %rcx + adcq $0, %r8 + addq %rax, %r9 + adcq %rdx, %rcx + adcq $0, %r8 + # A[12] * A[14] + movq 112(%rsi), %rax + mulq 96(%rsi) + addq %rax, %r9 + adcq %rdx, %rcx + adcq $0, %r8 + addq %rax, %r9 + adcq %rdx, %rcx + adcq $0, %r8 + # A[13] * A[13] + movq 104(%rsi), %rax + mulq %rax + addq %rax, %r9 + adcq %rdx, %rcx + adcq $0, %r8 + movq %r9, 208(%rdi) + # A[12] * A[15] + movq 120(%rsi), %rax + mulq 96(%rsi) + xorq %r9, %r9 + addq %rax, %rcx + adcq %rdx, %r8 + adcq $0, %r9 + addq %rax, %rcx + adcq %rdx, %r8 + adcq $0, %r9 + # A[13] * A[14] + movq 112(%rsi), %rax + mulq 104(%rsi) + addq %rax, %rcx + adcq %rdx, %r8 + adcq $0, %r9 + addq %rax, %rcx + adcq %rdx, %r8 + adcq $0, %r9 + movq %rcx, 216(%rdi) + # A[13] * A[15] + movq 120(%rsi), %rax + mulq 104(%rsi) + xorq %rcx, %rcx + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + # A[14] * A[14] + movq 112(%rsi), %rax + mulq %rax + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + movq %r8, 224(%rdi) + # A[14] * A[15] + movq 120(%rsi), %rax + mulq 112(%rsi) + xorq %r8, %r8 + addq %rax, %r9 + adcq %rdx, %rcx + adcq $0, %r8 + addq %rax, %r9 + adcq %rdx, %rcx + adcq $0, %r8 + movq %r9, 232(%rdi) + # A[15] * A[15] + movq 120(%rsi), %rax + mulq %rax + addq %rax, %rcx + adcq %rdx, %r8 + movq %rcx, 240(%rdi) + movq %r8, 248(%rdi) + movq (%rsp), %rax + movq 8(%rsp), %rdx + movq 16(%rsp), %r10 + movq 24(%rsp), %r11 + movq %rax, (%rdi) + movq %rdx, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 32(%rsp), %rax + movq 40(%rsp), %rdx + movq 48(%rsp), %r10 + movq 56(%rsp), %r11 + movq %rax, 32(%rdi) + movq %rdx, 40(%rdi) + movq %r10, 48(%rdi) + movq %r11, 56(%rdi) + movq 64(%rsp), %rax + movq 72(%rsp), %rdx + movq 80(%rsp), %r10 + movq 88(%rsp), %r11 + movq %rax, 64(%rdi) + movq %rdx, 72(%rdi) + movq %r10, 80(%rdi) + movq %r11, 88(%rdi) + movq 96(%rsp), %rax + movq 104(%rsp), %rdx + movq 112(%rsp), %r10 + movq 120(%rsp), %r11 + movq %rax, 96(%rdi) + movq %rdx, 104(%rdi) + movq %r10, 112(%rdi) + movq %r11, 120(%rdi) + addq $128, %rsp + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_2048_sqr_16,.-sp_2048_sqr_16 +#endif /* __APPLE__ */ +#ifdef HAVE_INTEL_AVX2 +/* Multiply a and b into r. (r = a * b) + * + * r Result of multiplication. + * a First number to multiply. + * b Second number to multiply. + */ +#ifndef __APPLE__ +.globl sp_2048_mul_avx2_16 +.type sp_2048_mul_avx2_16,@function +.align 16 +sp_2048_mul_avx2_16: +#else +.globl _sp_2048_mul_avx2_16 +.p2align 4 +_sp_2048_mul_avx2_16: +#endif /* __APPLE__ */ + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + movq %rdx, %rbp + subq $128, %rsp + cmpq %rdi, %rsi + movq %rsp, %rbx + cmovne %rdi, %rbx + cmpq %rdi, %rbp + cmove %rsp, %rbx + xorq %r14, %r14 + movq (%rsi), %rdx + # A[0] * B[0] + mulx (%rbp), %r8, %r9 + # A[0] * B[1] + mulx 8(%rbp), %rax, %r10 + movq %r8, (%rbx) + adcxq %rax, %r9 + # A[0] * B[2] + mulx 16(%rbp), %rax, %r11 + movq %r9, 8(%rbx) + adcxq %rax, %r10 + # A[0] * B[3] + mulx 24(%rbp), %rax, %r12 + movq %r10, 16(%rbx) + adcxq %rax, %r11 + movq %r11, 24(%rbx) + # A[0] * B[4] + mulx 32(%rbp), %rax, %r8 + adcxq %rax, %r12 + # A[0] * B[5] + mulx 40(%rbp), %rax, %r9 + movq %r12, 32(%rbx) + adcxq %rax, %r8 + # A[0] * B[6] + mulx 48(%rbp), %rax, %r10 + movq %r8, 40(%rbx) + adcxq %rax, %r9 + # A[0] * B[7] + mulx 56(%rbp), %rax, %r11 + movq %r9, 48(%rbx) + adcxq %rax, %r10 + movq %r10, 56(%rbx) + # A[0] * B[8] + mulx 64(%rbp), %rax, %r12 + adcxq %rax, %r11 + # A[0] * B[9] + mulx 72(%rbp), %rax, %r8 + movq %r11, 64(%rbx) + adcxq %rax, %r12 + # A[0] * B[10] + mulx 80(%rbp), %rax, %r9 + movq %r12, 72(%rbx) + adcxq %rax, %r8 + # A[0] * B[11] + mulx 88(%rbp), %rax, %r10 + movq %r8, 80(%rbx) + adcxq %rax, %r9 + movq %r9, 88(%rbx) + # A[0] * B[12] + mulx 96(%rbp), %rax, %r11 + adcxq %rax, %r10 + # A[0] * B[13] + mulx 104(%rbp), %rax, %r12 + movq %r10, 96(%rbx) + adcxq %rax, %r11 + # A[0] * B[14] + mulx 112(%rbp), %rax, %r8 + movq %r11, 104(%rbx) + adcxq %rax, %r12 + # A[0] * B[15] + mulx 120(%rbp), %rax, %r9 + movq %r12, 112(%rbx) + adcxq %rax, %r8 + adcxq %r14, %r9 + movq %r14, %r13 + adcxq %r14, %r13 + movq %r8, 120(%rbx) + movq %r9, 128(%rdi) + movq 8(%rsi), %rdx + movq 8(%rbx), %r9 + movq 16(%rbx), %r10 + movq 24(%rbx), %r11 + movq 32(%rbx), %r12 + movq 40(%rbx), %r8 + # A[1] * B[0] + mulx (%rbp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[1] * B[1] + mulx 8(%rbp), %rax, %rcx + movq %r9, 8(%rbx) + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[1] * B[2] + mulx 16(%rbp), %rax, %rcx + movq %r10, 16(%rbx) + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[1] * B[3] + mulx 24(%rbp), %rax, %rcx + movq %r11, 24(%rbx) + adcxq %rax, %r12 + adoxq %rcx, %r8 + movq %r12, 32(%rbx) + movq 48(%rbx), %r9 + movq 56(%rbx), %r10 + movq 64(%rbx), %r11 + movq 72(%rbx), %r12 + # A[1] * B[4] + mulx 32(%rbp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[1] * B[5] + mulx 40(%rbp), %rax, %rcx + movq %r8, 40(%rbx) + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[1] * B[6] + mulx 48(%rbp), %rax, %rcx + movq %r9, 48(%rbx) + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[1] * B[7] + mulx 56(%rbp), %rax, %rcx + movq %r10, 56(%rbx) + adcxq %rax, %r11 + adoxq %rcx, %r12 + movq %r11, 64(%rbx) + movq 80(%rbx), %r8 + movq 88(%rbx), %r9 + movq 96(%rbx), %r10 + movq 104(%rbx), %r11 + # A[1] * B[8] + mulx 64(%rbp), %rax, %rcx + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[1] * B[9] + mulx 72(%rbp), %rax, %rcx + movq %r12, 72(%rbx) + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[1] * B[10] + mulx 80(%rbp), %rax, %rcx + movq %r8, 80(%rbx) + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[1] * B[11] + mulx 88(%rbp), %rax, %rcx + movq %r9, 88(%rbx) + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 96(%rbx) + movq 112(%rbx), %r12 + movq 120(%rbx), %r8 + movq 128(%rdi), %r9 + # A[1] * B[12] + mulx 96(%rbp), %rax, %rcx + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[1] * B[13] + mulx 104(%rbp), %rax, %rcx + movq %r11, 104(%rbx) + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[1] * B[14] + mulx 112(%rbp), %rax, %rcx + movq %r12, 112(%rbx) + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[1] * B[15] + mulx 120(%rbp), %rax, %rcx + movq %r8, 120(%rbx) + movq %r14, %r10 + adcxq %rax, %r9 + adoxq %rcx, %r10 + adcxq %r13, %r10 + movq %r14, %r13 + adoxq %r14, %r13 + adcxq %r14, %r13 + movq %r9, 128(%rdi) + movq %r10, 136(%rdi) + movq 16(%rsi), %rdx + movq 16(%rbx), %r10 + movq 24(%rbx), %r11 + movq 32(%rbx), %r12 + movq 40(%rbx), %r8 + movq 48(%rbx), %r9 + # A[2] * B[0] + mulx (%rbp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[2] * B[1] + mulx 8(%rbp), %rax, %rcx + movq %r10, 16(%rbx) + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[2] * B[2] + mulx 16(%rbp), %rax, %rcx + movq %r11, 24(%rbx) + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[2] * B[3] + mulx 24(%rbp), %rax, %rcx + movq %r12, 32(%rbx) + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 40(%rbx) + movq 56(%rbx), %r10 + movq 64(%rbx), %r11 + movq 72(%rbx), %r12 + movq 80(%rbx), %r8 + # A[2] * B[4] + mulx 32(%rbp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[2] * B[5] + mulx 40(%rbp), %rax, %rcx + movq %r9, 48(%rbx) + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[2] * B[6] + mulx 48(%rbp), %rax, %rcx + movq %r10, 56(%rbx) + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[2] * B[7] + mulx 56(%rbp), %rax, %rcx + movq %r11, 64(%rbx) + adcxq %rax, %r12 + adoxq %rcx, %r8 + movq %r12, 72(%rbx) + movq 88(%rbx), %r9 + movq 96(%rbx), %r10 + movq 104(%rbx), %r11 + movq 112(%rbx), %r12 + # A[2] * B[8] + mulx 64(%rbp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[2] * B[9] + mulx 72(%rbp), %rax, %rcx + movq %r8, 80(%rbx) + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[2] * B[10] + mulx 80(%rbp), %rax, %rcx + movq %r9, 88(%rbx) + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[2] * B[11] + mulx 88(%rbp), %rax, %rcx + movq %r10, 96(%rbx) + adcxq %rax, %r11 + adoxq %rcx, %r12 + movq %r11, 104(%rbx) + movq 120(%rbx), %r8 + movq 128(%rdi), %r9 + movq 136(%rdi), %r10 + # A[2] * B[12] + mulx 96(%rbp), %rax, %rcx + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[2] * B[13] + mulx 104(%rbp), %rax, %rcx + movq %r12, 112(%rbx) + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[2] * B[14] + mulx 112(%rbp), %rax, %rcx + movq %r8, 120(%rbx) + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[2] * B[15] + mulx 120(%rbp), %rax, %rcx + movq %r9, 128(%rdi) + movq %r14, %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + adcxq %r13, %r11 + movq %r14, %r13 + adoxq %r14, %r13 + adcxq %r14, %r13 + movq %r10, 136(%rdi) + movq %r11, 144(%rdi) + movq 24(%rsi), %rdx + movq 24(%rbx), %r11 + movq 32(%rbx), %r12 + movq 40(%rbx), %r8 + movq 48(%rbx), %r9 + movq 56(%rbx), %r10 + # A[3] * B[0] + mulx (%rbp), %rax, %rcx + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[3] * B[1] + mulx 8(%rbp), %rax, %rcx + movq %r11, 24(%rbx) + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[3] * B[2] + mulx 16(%rbp), %rax, %rcx + movq %r12, 32(%rbx) + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[3] * B[3] + mulx 24(%rbp), %rax, %rcx + movq %r8, 40(%rbx) + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r9, 48(%rbx) + movq 64(%rbx), %r11 + movq 72(%rbx), %r12 + movq 80(%rbx), %r8 + movq 88(%rbx), %r9 + # A[3] * B[4] + mulx 32(%rbp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[3] * B[5] + mulx 40(%rbp), %rax, %rcx + movq %r10, 56(%rbx) + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[3] * B[6] + mulx 48(%rbp), %rax, %rcx + movq %r11, 64(%rbx) + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[3] * B[7] + mulx 56(%rbp), %rax, %rcx + movq %r12, 72(%rbx) + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 80(%rbx) + movq 96(%rbx), %r10 + movq 104(%rbx), %r11 + movq 112(%rbx), %r12 + movq 120(%rbx), %r8 + # A[3] * B[8] + mulx 64(%rbp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[3] * B[9] + mulx 72(%rbp), %rax, %rcx + movq %r9, 88(%rbx) + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[3] * B[10] + mulx 80(%rbp), %rax, %rcx + movq %r10, 96(%rbx) + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[3] * B[11] + mulx 88(%rbp), %rax, %rcx + movq %r11, 104(%rbx) + adcxq %rax, %r12 + adoxq %rcx, %r8 + movq %r12, 112(%rbx) + movq 128(%rdi), %r9 + movq 136(%rdi), %r10 + movq 144(%rdi), %r11 + # A[3] * B[12] + mulx 96(%rbp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[3] * B[13] + mulx 104(%rbp), %rax, %rcx + movq %r8, 120(%rbx) + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[3] * B[14] + mulx 112(%rbp), %rax, %rcx + movq %r9, 128(%rdi) + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[3] * B[15] + mulx 120(%rbp), %rax, %rcx + movq %r10, 136(%rdi) + movq %r14, %r12 + adcxq %rax, %r11 + adoxq %rcx, %r12 + adcxq %r13, %r12 + movq %r14, %r13 + adoxq %r14, %r13 + adcxq %r14, %r13 + movq %r11, 144(%rdi) + movq %r12, 152(%rdi) + movq 32(%rsi), %rdx + movq 32(%rbx), %r12 + movq 40(%rbx), %r8 + movq 48(%rbx), %r9 + movq 56(%rbx), %r10 + movq 64(%rbx), %r11 + # A[4] * B[0] + mulx (%rbp), %rax, %rcx + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[4] * B[1] + mulx 8(%rbp), %rax, %rcx + movq %r12, 32(%rbx) + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[4] * B[2] + mulx 16(%rbp), %rax, %rcx + movq %r8, 40(%rbx) + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[4] * B[3] + mulx 24(%rbp), %rax, %rcx + movq %r9, 48(%rbx) + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 56(%rbx) + movq 72(%rbx), %r12 + movq 80(%rbx), %r8 + movq 88(%rbx), %r9 + movq 96(%rbx), %r10 + # A[4] * B[4] + mulx 32(%rbp), %rax, %rcx + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[4] * B[5] + mulx 40(%rbp), %rax, %rcx + movq %r11, 64(%rbx) + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[4] * B[6] + mulx 48(%rbp), %rax, %rcx + movq %r12, 72(%rbx) + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[4] * B[7] + mulx 56(%rbp), %rax, %rcx + movq %r8, 80(%rbx) + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r9, 88(%rbx) + movq 104(%rbx), %r11 + movq 112(%rbx), %r12 + movq 120(%rbx), %r8 + movq 128(%rdi), %r9 + # A[4] * B[8] + mulx 64(%rbp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[4] * B[9] + mulx 72(%rbp), %rax, %rcx + movq %r10, 96(%rbx) + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[4] * B[10] + mulx 80(%rbp), %rax, %rcx + movq %r11, 104(%rbx) + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[4] * B[11] + mulx 88(%rbp), %rax, %rcx + movq %r12, 112(%rbx) + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 120(%rbx) + movq 136(%rdi), %r10 + movq 144(%rdi), %r11 + movq 152(%rdi), %r12 + # A[4] * B[12] + mulx 96(%rbp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[4] * B[13] + mulx 104(%rbp), %rax, %rcx + movq %r9, 128(%rdi) + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[4] * B[14] + mulx 112(%rbp), %rax, %rcx + movq %r10, 136(%rdi) + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[4] * B[15] + mulx 120(%rbp), %rax, %rcx + movq %r11, 144(%rdi) + movq %r14, %r8 + adcxq %rax, %r12 + adoxq %rcx, %r8 + adcxq %r13, %r8 + movq %r14, %r13 + adoxq %r14, %r13 + adcxq %r14, %r13 + movq %r12, 152(%rdi) + movq %r8, 160(%rdi) + movq 40(%rsi), %rdx + movq 40(%rbx), %r8 + movq 48(%rbx), %r9 + movq 56(%rbx), %r10 + movq 64(%rbx), %r11 + movq 72(%rbx), %r12 + # A[5] * B[0] + mulx (%rbp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[5] * B[1] + mulx 8(%rbp), %rax, %rcx + movq %r8, 40(%rbx) + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[5] * B[2] + mulx 16(%rbp), %rax, %rcx + movq %r9, 48(%rbx) + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[5] * B[3] + mulx 24(%rbp), %rax, %rcx + movq %r10, 56(%rbx) + adcxq %rax, %r11 + adoxq %rcx, %r12 + movq %r11, 64(%rbx) + movq 80(%rbx), %r8 + movq 88(%rbx), %r9 + movq 96(%rbx), %r10 + movq 104(%rbx), %r11 + # A[5] * B[4] + mulx 32(%rbp), %rax, %rcx + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[5] * B[5] + mulx 40(%rbp), %rax, %rcx + movq %r12, 72(%rbx) + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[5] * B[6] + mulx 48(%rbp), %rax, %rcx + movq %r8, 80(%rbx) + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[5] * B[7] + mulx 56(%rbp), %rax, %rcx + movq %r9, 88(%rbx) + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 96(%rbx) + movq 112(%rbx), %r12 + movq 120(%rbx), %r8 + movq 128(%rdi), %r9 + movq 136(%rdi), %r10 + # A[5] * B[8] + mulx 64(%rbp), %rax, %rcx + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[5] * B[9] + mulx 72(%rbp), %rax, %rcx + movq %r11, 104(%rbx) + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[5] * B[10] + mulx 80(%rbp), %rax, %rcx + movq %r12, 112(%rbx) + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[5] * B[11] + mulx 88(%rbp), %rax, %rcx + movq %r8, 120(%rbx) + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r9, 128(%rdi) + movq 144(%rdi), %r11 + movq 152(%rdi), %r12 + movq 160(%rdi), %r8 + # A[5] * B[12] + mulx 96(%rbp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[5] * B[13] + mulx 104(%rbp), %rax, %rcx + movq %r10, 136(%rdi) + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[5] * B[14] + mulx 112(%rbp), %rax, %rcx + movq %r11, 144(%rdi) + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[5] * B[15] + mulx 120(%rbp), %rax, %rcx + movq %r12, 152(%rdi) + movq %r14, %r9 + adcxq %rax, %r8 + adoxq %rcx, %r9 + adcxq %r13, %r9 + movq %r14, %r13 + adoxq %r14, %r13 + adcxq %r14, %r13 + movq %r8, 160(%rdi) + movq %r9, 168(%rdi) + movq 48(%rsi), %rdx + movq 48(%rbx), %r9 + movq 56(%rbx), %r10 + movq 64(%rbx), %r11 + movq 72(%rbx), %r12 + movq 80(%rbx), %r8 + # A[6] * B[0] + mulx (%rbp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[6] * B[1] + mulx 8(%rbp), %rax, %rcx + movq %r9, 48(%rbx) + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[6] * B[2] + mulx 16(%rbp), %rax, %rcx + movq %r10, 56(%rbx) + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[6] * B[3] + mulx 24(%rbp), %rax, %rcx + movq %r11, 64(%rbx) + adcxq %rax, %r12 + adoxq %rcx, %r8 + movq %r12, 72(%rbx) + movq 88(%rbx), %r9 + movq 96(%rbx), %r10 + movq 104(%rbx), %r11 + movq 112(%rbx), %r12 + # A[6] * B[4] + mulx 32(%rbp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[6] * B[5] + mulx 40(%rbp), %rax, %rcx + movq %r8, 80(%rbx) + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[6] * B[6] + mulx 48(%rbp), %rax, %rcx + movq %r9, 88(%rbx) + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[6] * B[7] + mulx 56(%rbp), %rax, %rcx + movq %r10, 96(%rbx) + adcxq %rax, %r11 + adoxq %rcx, %r12 + movq %r11, 104(%rbx) + movq 120(%rbx), %r8 + movq 128(%rdi), %r9 + movq 136(%rdi), %r10 + movq 144(%rdi), %r11 + # A[6] * B[8] + mulx 64(%rbp), %rax, %rcx + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[6] * B[9] + mulx 72(%rbp), %rax, %rcx + movq %r12, 112(%rbx) + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[6] * B[10] + mulx 80(%rbp), %rax, %rcx + movq %r8, 120(%rbx) + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[6] * B[11] + mulx 88(%rbp), %rax, %rcx + movq %r9, 128(%rdi) + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 136(%rdi) + movq 152(%rdi), %r12 + movq 160(%rdi), %r8 + movq 168(%rdi), %r9 + # A[6] * B[12] + mulx 96(%rbp), %rax, %rcx + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[6] * B[13] + mulx 104(%rbp), %rax, %rcx + movq %r11, 144(%rdi) + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[6] * B[14] + mulx 112(%rbp), %rax, %rcx + movq %r12, 152(%rdi) + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[6] * B[15] + mulx 120(%rbp), %rax, %rcx + movq %r8, 160(%rdi) + movq %r14, %r10 + adcxq %rax, %r9 + adoxq %rcx, %r10 + adcxq %r13, %r10 + movq %r14, %r13 + adoxq %r14, %r13 + adcxq %r14, %r13 + movq %r9, 168(%rdi) + movq %r10, 176(%rdi) + movq 56(%rsi), %rdx + movq 56(%rbx), %r10 + movq 64(%rbx), %r11 + movq 72(%rbx), %r12 + movq 80(%rbx), %r8 + movq 88(%rbx), %r9 + # A[7] * B[0] + mulx (%rbp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[7] * B[1] + mulx 8(%rbp), %rax, %rcx + movq %r10, 56(%rbx) + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[7] * B[2] + mulx 16(%rbp), %rax, %rcx + movq %r11, 64(%rbx) + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[7] * B[3] + mulx 24(%rbp), %rax, %rcx + movq %r12, 72(%rbx) + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 80(%rbx) + movq 96(%rbx), %r10 + movq 104(%rbx), %r11 + movq 112(%rbx), %r12 + movq 120(%rbx), %r8 + # A[7] * B[4] + mulx 32(%rbp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[7] * B[5] + mulx 40(%rbp), %rax, %rcx + movq %r9, 88(%rbx) + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[7] * B[6] + mulx 48(%rbp), %rax, %rcx + movq %r10, 96(%rbx) + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[7] * B[7] + mulx 56(%rbp), %rax, %rcx + movq %r11, 104(%rbx) + adcxq %rax, %r12 + adoxq %rcx, %r8 + movq %r12, 112(%rbx) + movq 128(%rdi), %r9 + movq 136(%rdi), %r10 + movq 144(%rdi), %r11 + movq 152(%rdi), %r12 + # A[7] * B[8] + mulx 64(%rbp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[7] * B[9] + mulx 72(%rbp), %rax, %rcx + movq %r8, 120(%rbx) + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[7] * B[10] + mulx 80(%rbp), %rax, %rcx + movq %r9, 128(%rdi) + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[7] * B[11] + mulx 88(%rbp), %rax, %rcx + movq %r10, 136(%rdi) + adcxq %rax, %r11 + adoxq %rcx, %r12 + movq %r11, 144(%rdi) + movq 160(%rdi), %r8 + movq 168(%rdi), %r9 + movq 176(%rdi), %r10 + # A[7] * B[12] + mulx 96(%rbp), %rax, %rcx + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[7] * B[13] + mulx 104(%rbp), %rax, %rcx + movq %r12, 152(%rdi) + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[7] * B[14] + mulx 112(%rbp), %rax, %rcx + movq %r8, 160(%rdi) + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[7] * B[15] + mulx 120(%rbp), %rax, %rcx + movq %r9, 168(%rdi) + movq %r14, %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + adcxq %r13, %r11 + movq %r14, %r13 + adoxq %r14, %r13 + adcxq %r14, %r13 + movq %r10, 176(%rdi) + movq %r11, 184(%rdi) + movq 64(%rsi), %rdx + movq 64(%rbx), %r11 + movq 72(%rbx), %r12 + movq 80(%rbx), %r8 + movq 88(%rbx), %r9 + movq 96(%rbx), %r10 + # A[8] * B[0] + mulx (%rbp), %rax, %rcx + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[8] * B[1] + mulx 8(%rbp), %rax, %rcx + movq %r11, 64(%rbx) + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[8] * B[2] + mulx 16(%rbp), %rax, %rcx + movq %r12, 72(%rbx) + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[8] * B[3] + mulx 24(%rbp), %rax, %rcx + movq %r8, 80(%rbx) + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r9, 88(%rbx) + movq 104(%rbx), %r11 + movq 112(%rbx), %r12 + movq 120(%rbx), %r8 + movq 128(%rdi), %r9 + # A[8] * B[4] + mulx 32(%rbp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[8] * B[5] + mulx 40(%rbp), %rax, %rcx + movq %r10, 96(%rbx) + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[8] * B[6] + mulx 48(%rbp), %rax, %rcx + movq %r11, 104(%rbx) + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[8] * B[7] + mulx 56(%rbp), %rax, %rcx + movq %r12, 112(%rbx) + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 120(%rbx) + movq 136(%rdi), %r10 + movq 144(%rdi), %r11 + movq 152(%rdi), %r12 + movq 160(%rdi), %r8 + # A[8] * B[8] + mulx 64(%rbp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[8] * B[9] + mulx 72(%rbp), %rax, %rcx + movq %r9, 128(%rdi) + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[8] * B[10] + mulx 80(%rbp), %rax, %rcx + movq %r10, 136(%rdi) + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[8] * B[11] + mulx 88(%rbp), %rax, %rcx + movq %r11, 144(%rdi) + adcxq %rax, %r12 + adoxq %rcx, %r8 + movq %r12, 152(%rdi) + movq 168(%rdi), %r9 + movq 176(%rdi), %r10 + movq 184(%rdi), %r11 + # A[8] * B[12] + mulx 96(%rbp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[8] * B[13] + mulx 104(%rbp), %rax, %rcx + movq %r8, 160(%rdi) + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[8] * B[14] + mulx 112(%rbp), %rax, %rcx + movq %r9, 168(%rdi) + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[8] * B[15] + mulx 120(%rbp), %rax, %rcx + movq %r10, 176(%rdi) + movq %r14, %r12 + adcxq %rax, %r11 + adoxq %rcx, %r12 + adcxq %r13, %r12 + movq %r14, %r13 + adoxq %r14, %r13 + adcxq %r14, %r13 + movq %r11, 184(%rdi) + movq %r12, 192(%rdi) + movq 72(%rsi), %rdx + movq 72(%rbx), %r12 + movq 80(%rbx), %r8 + movq 88(%rbx), %r9 + movq 96(%rbx), %r10 + movq 104(%rbx), %r11 + # A[9] * B[0] + mulx (%rbp), %rax, %rcx + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[9] * B[1] + mulx 8(%rbp), %rax, %rcx + movq %r12, 72(%rbx) + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[9] * B[2] + mulx 16(%rbp), %rax, %rcx + movq %r8, 80(%rbx) + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[9] * B[3] + mulx 24(%rbp), %rax, %rcx + movq %r9, 88(%rbx) + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 96(%rbx) + movq 112(%rbx), %r12 + movq 120(%rbx), %r8 + movq 128(%rdi), %r9 + movq 136(%rdi), %r10 + # A[9] * B[4] + mulx 32(%rbp), %rax, %rcx + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[9] * B[5] + mulx 40(%rbp), %rax, %rcx + movq %r11, 104(%rbx) + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[9] * B[6] + mulx 48(%rbp), %rax, %rcx + movq %r12, 112(%rbx) + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[9] * B[7] + mulx 56(%rbp), %rax, %rcx + movq %r8, 120(%rbx) + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r9, 128(%rdi) + movq 144(%rdi), %r11 + movq 152(%rdi), %r12 + movq 160(%rdi), %r8 + movq 168(%rdi), %r9 + # A[9] * B[8] + mulx 64(%rbp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[9] * B[9] + mulx 72(%rbp), %rax, %rcx + movq %r10, 136(%rdi) + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[9] * B[10] + mulx 80(%rbp), %rax, %rcx + movq %r11, 144(%rdi) + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[9] * B[11] + mulx 88(%rbp), %rax, %rcx + movq %r12, 152(%rdi) + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 160(%rdi) + movq 176(%rdi), %r10 + movq 184(%rdi), %r11 + movq 192(%rdi), %r12 + # A[9] * B[12] + mulx 96(%rbp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[9] * B[13] + mulx 104(%rbp), %rax, %rcx + movq %r9, 168(%rdi) + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[9] * B[14] + mulx 112(%rbp), %rax, %rcx + movq %r10, 176(%rdi) + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[9] * B[15] + mulx 120(%rbp), %rax, %rcx + movq %r11, 184(%rdi) + movq %r14, %r8 + adcxq %rax, %r12 + adoxq %rcx, %r8 + adcxq %r13, %r8 + movq %r14, %r13 + adoxq %r14, %r13 + adcxq %r14, %r13 + movq %r12, 192(%rdi) + movq %r8, 200(%rdi) + movq 80(%rsi), %rdx + movq 80(%rbx), %r8 + movq 88(%rbx), %r9 + movq 96(%rbx), %r10 + movq 104(%rbx), %r11 + movq 112(%rbx), %r12 + # A[10] * B[0] + mulx (%rbp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[10] * B[1] + mulx 8(%rbp), %rax, %rcx + movq %r8, 80(%rbx) + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[10] * B[2] + mulx 16(%rbp), %rax, %rcx + movq %r9, 88(%rbx) + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[10] * B[3] + mulx 24(%rbp), %rax, %rcx + movq %r10, 96(%rbx) + adcxq %rax, %r11 + adoxq %rcx, %r12 + movq %r11, 104(%rbx) + movq 120(%rbx), %r8 + movq 128(%rdi), %r9 + movq 136(%rdi), %r10 + movq 144(%rdi), %r11 + # A[10] * B[4] + mulx 32(%rbp), %rax, %rcx + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[10] * B[5] + mulx 40(%rbp), %rax, %rcx + movq %r12, 112(%rbx) + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[10] * B[6] + mulx 48(%rbp), %rax, %rcx + movq %r8, 120(%rbx) + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[10] * B[7] + mulx 56(%rbp), %rax, %rcx + movq %r9, 128(%rdi) + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 136(%rdi) + movq 152(%rdi), %r12 + movq 160(%rdi), %r8 + movq 168(%rdi), %r9 + movq 176(%rdi), %r10 + # A[10] * B[8] + mulx 64(%rbp), %rax, %rcx + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[10] * B[9] + mulx 72(%rbp), %rax, %rcx + movq %r11, 144(%rdi) + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[10] * B[10] + mulx 80(%rbp), %rax, %rcx + movq %r12, 152(%rdi) + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[10] * B[11] + mulx 88(%rbp), %rax, %rcx + movq %r8, 160(%rdi) + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r9, 168(%rdi) + movq 184(%rdi), %r11 + movq 192(%rdi), %r12 + movq 200(%rdi), %r8 + # A[10] * B[12] + mulx 96(%rbp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[10] * B[13] + mulx 104(%rbp), %rax, %rcx + movq %r10, 176(%rdi) + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[10] * B[14] + mulx 112(%rbp), %rax, %rcx + movq %r11, 184(%rdi) + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[10] * B[15] + mulx 120(%rbp), %rax, %rcx + movq %r12, 192(%rdi) + movq %r14, %r9 + adcxq %rax, %r8 + adoxq %rcx, %r9 + adcxq %r13, %r9 + movq %r14, %r13 + adoxq %r14, %r13 + adcxq %r14, %r13 + movq %r8, 200(%rdi) + movq %r9, 208(%rdi) + movq 88(%rsi), %rdx + movq 88(%rbx), %r9 + movq 96(%rbx), %r10 + movq 104(%rbx), %r11 + movq 112(%rbx), %r12 + movq 120(%rbx), %r8 + # A[11] * B[0] + mulx (%rbp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[11] * B[1] + mulx 8(%rbp), %rax, %rcx + movq %r9, 88(%rbx) + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[11] * B[2] + mulx 16(%rbp), %rax, %rcx + movq %r10, 96(%rbx) + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[11] * B[3] + mulx 24(%rbp), %rax, %rcx + movq %r11, 104(%rbx) + adcxq %rax, %r12 + adoxq %rcx, %r8 + movq %r12, 112(%rbx) + movq 128(%rdi), %r9 + movq 136(%rdi), %r10 + movq 144(%rdi), %r11 + movq 152(%rdi), %r12 + # A[11] * B[4] + mulx 32(%rbp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[11] * B[5] + mulx 40(%rbp), %rax, %rcx + movq %r8, 120(%rbx) + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[11] * B[6] + mulx 48(%rbp), %rax, %rcx + movq %r9, 128(%rdi) + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[11] * B[7] + mulx 56(%rbp), %rax, %rcx + movq %r10, 136(%rdi) + adcxq %rax, %r11 + adoxq %rcx, %r12 + movq %r11, 144(%rdi) + movq 160(%rdi), %r8 + movq 168(%rdi), %r9 + movq 176(%rdi), %r10 + movq 184(%rdi), %r11 + # A[11] * B[8] + mulx 64(%rbp), %rax, %rcx + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[11] * B[9] + mulx 72(%rbp), %rax, %rcx + movq %r12, 152(%rdi) + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[11] * B[10] + mulx 80(%rbp), %rax, %rcx + movq %r8, 160(%rdi) + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[11] * B[11] + mulx 88(%rbp), %rax, %rcx + movq %r9, 168(%rdi) + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 176(%rdi) + movq 192(%rdi), %r12 + movq 200(%rdi), %r8 + movq 208(%rdi), %r9 + # A[11] * B[12] + mulx 96(%rbp), %rax, %rcx + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[11] * B[13] + mulx 104(%rbp), %rax, %rcx + movq %r11, 184(%rdi) + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[11] * B[14] + mulx 112(%rbp), %rax, %rcx + movq %r12, 192(%rdi) + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[11] * B[15] + mulx 120(%rbp), %rax, %rcx + movq %r8, 200(%rdi) + movq %r14, %r10 + adcxq %rax, %r9 + adoxq %rcx, %r10 + adcxq %r13, %r10 + movq %r14, %r13 + adoxq %r14, %r13 + adcxq %r14, %r13 + movq %r9, 208(%rdi) + movq %r10, 216(%rdi) + movq 96(%rsi), %rdx + movq 96(%rbx), %r10 + movq 104(%rbx), %r11 + movq 112(%rbx), %r12 + movq 120(%rbx), %r8 + movq 128(%rdi), %r9 + # A[12] * B[0] + mulx (%rbp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[12] * B[1] + mulx 8(%rbp), %rax, %rcx + movq %r10, 96(%rbx) + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[12] * B[2] + mulx 16(%rbp), %rax, %rcx + movq %r11, 104(%rbx) + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[12] * B[3] + mulx 24(%rbp), %rax, %rcx + movq %r12, 112(%rbx) + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 120(%rbx) + movq 136(%rdi), %r10 + movq 144(%rdi), %r11 + movq 152(%rdi), %r12 + movq 160(%rdi), %r8 + # A[12] * B[4] + mulx 32(%rbp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[12] * B[5] + mulx 40(%rbp), %rax, %rcx + movq %r9, 128(%rdi) + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[12] * B[6] + mulx 48(%rbp), %rax, %rcx + movq %r10, 136(%rdi) + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[12] * B[7] + mulx 56(%rbp), %rax, %rcx + movq %r11, 144(%rdi) + adcxq %rax, %r12 + adoxq %rcx, %r8 + movq %r12, 152(%rdi) + movq 168(%rdi), %r9 + movq 176(%rdi), %r10 + movq 184(%rdi), %r11 + movq 192(%rdi), %r12 + # A[12] * B[8] + mulx 64(%rbp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[12] * B[9] + mulx 72(%rbp), %rax, %rcx + movq %r8, 160(%rdi) + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[12] * B[10] + mulx 80(%rbp), %rax, %rcx + movq %r9, 168(%rdi) + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[12] * B[11] + mulx 88(%rbp), %rax, %rcx + movq %r10, 176(%rdi) + adcxq %rax, %r11 + adoxq %rcx, %r12 + movq %r11, 184(%rdi) + movq 200(%rdi), %r8 + movq 208(%rdi), %r9 + movq 216(%rdi), %r10 + # A[12] * B[12] + mulx 96(%rbp), %rax, %rcx + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[12] * B[13] + mulx 104(%rbp), %rax, %rcx + movq %r12, 192(%rdi) + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[12] * B[14] + mulx 112(%rbp), %rax, %rcx + movq %r8, 200(%rdi) + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[12] * B[15] + mulx 120(%rbp), %rax, %rcx + movq %r9, 208(%rdi) + movq %r14, %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + adcxq %r13, %r11 + movq %r14, %r13 + adoxq %r14, %r13 + adcxq %r14, %r13 + movq %r10, 216(%rdi) + movq %r11, 224(%rdi) + movq 104(%rsi), %rdx + movq 104(%rbx), %r11 + movq 112(%rbx), %r12 + movq 120(%rbx), %r8 + movq 128(%rdi), %r9 + movq 136(%rdi), %r10 + # A[13] * B[0] + mulx (%rbp), %rax, %rcx + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[13] * B[1] + mulx 8(%rbp), %rax, %rcx + movq %r11, 104(%rbx) + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[13] * B[2] + mulx 16(%rbp), %rax, %rcx + movq %r12, 112(%rbx) + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[13] * B[3] + mulx 24(%rbp), %rax, %rcx + movq %r8, 120(%rbx) + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r9, 128(%rdi) + movq 144(%rdi), %r11 + movq 152(%rdi), %r12 + movq 160(%rdi), %r8 + movq 168(%rdi), %r9 + # A[13] * B[4] + mulx 32(%rbp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[13] * B[5] + mulx 40(%rbp), %rax, %rcx + movq %r10, 136(%rdi) + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[13] * B[6] + mulx 48(%rbp), %rax, %rcx + movq %r11, 144(%rdi) + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[13] * B[7] + mulx 56(%rbp), %rax, %rcx + movq %r12, 152(%rdi) + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 160(%rdi) + movq 176(%rdi), %r10 + movq 184(%rdi), %r11 + movq 192(%rdi), %r12 + movq 200(%rdi), %r8 + # A[13] * B[8] + mulx 64(%rbp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[13] * B[9] + mulx 72(%rbp), %rax, %rcx + movq %r9, 168(%rdi) + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[13] * B[10] + mulx 80(%rbp), %rax, %rcx + movq %r10, 176(%rdi) + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[13] * B[11] + mulx 88(%rbp), %rax, %rcx + movq %r11, 184(%rdi) + adcxq %rax, %r12 + adoxq %rcx, %r8 + movq %r12, 192(%rdi) + movq 208(%rdi), %r9 + movq 216(%rdi), %r10 + movq 224(%rdi), %r11 + # A[13] * B[12] + mulx 96(%rbp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[13] * B[13] + mulx 104(%rbp), %rax, %rcx + movq %r8, 200(%rdi) + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[13] * B[14] + mulx 112(%rbp), %rax, %rcx + movq %r9, 208(%rdi) + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[13] * B[15] + mulx 120(%rbp), %rax, %rcx + movq %r10, 216(%rdi) + movq %r14, %r12 + adcxq %rax, %r11 + adoxq %rcx, %r12 + adcxq %r13, %r12 + movq %r14, %r13 + adoxq %r14, %r13 + adcxq %r14, %r13 + movq %r11, 224(%rdi) + movq %r12, 232(%rdi) + movq 112(%rsi), %rdx + movq 112(%rbx), %r12 + movq 120(%rbx), %r8 + movq 128(%rdi), %r9 + movq 136(%rdi), %r10 + movq 144(%rdi), %r11 + # A[14] * B[0] + mulx (%rbp), %rax, %rcx + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[14] * B[1] + mulx 8(%rbp), %rax, %rcx + movq %r12, 112(%rbx) + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[14] * B[2] + mulx 16(%rbp), %rax, %rcx + movq %r8, 120(%rbx) + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[14] * B[3] + mulx 24(%rbp), %rax, %rcx + movq %r9, 128(%rdi) + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 136(%rdi) + movq 152(%rdi), %r12 + movq 160(%rdi), %r8 + movq 168(%rdi), %r9 + movq 176(%rdi), %r10 + # A[14] * B[4] + mulx 32(%rbp), %rax, %rcx + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[14] * B[5] + mulx 40(%rbp), %rax, %rcx + movq %r11, 144(%rdi) + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[14] * B[6] + mulx 48(%rbp), %rax, %rcx + movq %r12, 152(%rdi) + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[14] * B[7] + mulx 56(%rbp), %rax, %rcx + movq %r8, 160(%rdi) + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r9, 168(%rdi) + movq 184(%rdi), %r11 + movq 192(%rdi), %r12 + movq 200(%rdi), %r8 + movq 208(%rdi), %r9 + # A[14] * B[8] + mulx 64(%rbp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[14] * B[9] + mulx 72(%rbp), %rax, %rcx + movq %r10, 176(%rdi) + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[14] * B[10] + mulx 80(%rbp), %rax, %rcx + movq %r11, 184(%rdi) + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[14] * B[11] + mulx 88(%rbp), %rax, %rcx + movq %r12, 192(%rdi) + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 200(%rdi) + movq 216(%rdi), %r10 + movq 224(%rdi), %r11 + movq 232(%rdi), %r12 + # A[14] * B[12] + mulx 96(%rbp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[14] * B[13] + mulx 104(%rbp), %rax, %rcx + movq %r9, 208(%rdi) + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[14] * B[14] + mulx 112(%rbp), %rax, %rcx + movq %r10, 216(%rdi) + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[14] * B[15] + mulx 120(%rbp), %rax, %rcx + movq %r11, 224(%rdi) + movq %r14, %r8 + adcxq %rax, %r12 + adoxq %rcx, %r8 + adcxq %r13, %r8 + movq %r14, %r13 + adoxq %r14, %r13 + adcxq %r14, %r13 + movq %r12, 232(%rdi) + movq %r8, 240(%rdi) + movq 120(%rsi), %rdx + movq 120(%rbx), %r8 + movq 128(%rdi), %r9 + movq 136(%rdi), %r10 + movq 144(%rdi), %r11 + movq 152(%rdi), %r12 + # A[15] * B[0] + mulx (%rbp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[15] * B[1] + mulx 8(%rbp), %rax, %rcx + movq %r8, 120(%rbx) + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[15] * B[2] + mulx 16(%rbp), %rax, %rcx + movq %r9, 128(%rdi) + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[15] * B[3] + mulx 24(%rbp), %rax, %rcx + movq %r10, 136(%rdi) + adcxq %rax, %r11 + adoxq %rcx, %r12 + movq %r11, 144(%rdi) + movq 160(%rdi), %r8 + movq 168(%rdi), %r9 + movq 176(%rdi), %r10 + movq 184(%rdi), %r11 + # A[15] * B[4] + mulx 32(%rbp), %rax, %rcx + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[15] * B[5] + mulx 40(%rbp), %rax, %rcx + movq %r12, 152(%rdi) + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[15] * B[6] + mulx 48(%rbp), %rax, %rcx + movq %r8, 160(%rdi) + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[15] * B[7] + mulx 56(%rbp), %rax, %rcx + movq %r9, 168(%rdi) + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 176(%rdi) + movq 192(%rdi), %r12 + movq 200(%rdi), %r8 + movq 208(%rdi), %r9 + movq 216(%rdi), %r10 + # A[15] * B[8] + mulx 64(%rbp), %rax, %rcx + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[15] * B[9] + mulx 72(%rbp), %rax, %rcx + movq %r11, 184(%rdi) + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[15] * B[10] + mulx 80(%rbp), %rax, %rcx + movq %r12, 192(%rdi) + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[15] * B[11] + mulx 88(%rbp), %rax, %rcx + movq %r8, 200(%rdi) + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r9, 208(%rdi) + movq 224(%rdi), %r11 + movq 232(%rdi), %r12 + movq 240(%rdi), %r8 + # A[15] * B[12] + mulx 96(%rbp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[15] * B[13] + mulx 104(%rbp), %rax, %rcx + movq %r10, 216(%rdi) + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[15] * B[14] + mulx 112(%rbp), %rax, %rcx + movq %r11, 224(%rdi) + adcxq %rax, %r12 + adoxq %rcx, %r8 + # A[15] * B[15] + mulx 120(%rbp), %rax, %rcx + movq %r12, 232(%rdi) + movq %r14, %r9 + adcxq %rax, %r8 + adoxq %rcx, %r9 + adcxq %r13, %r9 + movq %r8, 240(%rdi) + movq %r9, 248(%rdi) + cmpq %rdi, %rsi + je L_start_2048_mul_avx2_16 + cmpq %rdi, %rbp + jne L_end_2048_mul_avx2_16 +L_start_2048_mul_avx2_16: + vmovdqu (%rbx), %xmm0 + vmovups %xmm0, (%rdi) + vmovdqu 16(%rbx), %xmm0 + vmovups %xmm0, 16(%rdi) + vmovdqu 32(%rbx), %xmm0 + vmovups %xmm0, 32(%rdi) + vmovdqu 48(%rbx), %xmm0 + vmovups %xmm0, 48(%rdi) + vmovdqu 64(%rbx), %xmm0 + vmovups %xmm0, 64(%rdi) + vmovdqu 80(%rbx), %xmm0 + vmovups %xmm0, 80(%rdi) + vmovdqu 96(%rbx), %xmm0 + vmovups %xmm0, 96(%rdi) + vmovdqu 112(%rbx), %xmm0 + vmovups %xmm0, 112(%rdi) +L_end_2048_mul_avx2_16: + addq $128, %rsp + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + repz retq +#ifndef __APPLE__ +.size sp_2048_mul_avx2_16,.-sp_2048_mul_avx2_16 +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX2 */ +#ifdef HAVE_INTEL_AVX2 +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_2048_sqr_avx2_16 +.type sp_2048_sqr_avx2_16,@function +.align 16 +sp_2048_sqr_avx2_16: +#else +.globl _sp_2048_sqr_avx2_16 +.p2align 4 +_sp_2048_sqr_avx2_16: +#endif /* __APPLE__ */ + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + push %rbx + subq $128, %rsp + cmpq %rdi, %rsi + movq %rsp, %rbp + cmovne %rdi, %rbp + xorq %r11, %r11 + # Diagonal 1 + xorq %r10, %r10 + # A[1] x A[0] + movq (%rsi), %rdx + mulxq 8(%rsi), %r8, %r9 + # A[2] x A[0] + mulxq 16(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r8, 8(%rbp) + movq %r9, 16(%rbp) + movq %r11, %r8 + movq %r11, %r9 + # A[3] x A[0] + mulxq 24(%rsi), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + # A[4] x A[0] + mulxq 32(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r10, 24(%rbp) + movq %r8, 32(%rbp) + movq %r11, %r10 + movq %r11, %r8 + # A[5] x A[0] + mulxq 40(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[6] x A[0] + mulxq 48(%rsi), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r9, 40(%rbp) + movq %r10, 48(%rbp) + movq %r11, %r9 + movq %r11, %r10 + # A[7] x A[0] + mulxq 56(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[8] x A[0] + mulxq 64(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r8, 56(%rbp) + movq %r9, 64(%rbp) + movq %r11, %r8 + movq %r11, %r9 + # A[9] x A[0] + mulxq 72(%rsi), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + # A[10] x A[0] + mulxq 80(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r10, 72(%rbp) + movq %r8, 80(%rbp) + movq %r11, %r10 + movq %r11, %r8 + # A[11] x A[0] + mulxq 88(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[12] x A[0] + mulxq 96(%rsi), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r9, 88(%rbp) + movq %r10, %r13 + movq %r11, %r9 + movq %r11, %r10 + # A[13] x A[0] + mulxq 104(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[14] x A[0] + mulxq 112(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r8, %r14 + movq %r9, %r15 + movq %r11, %r8 + # A[15] x A[0] + mulxq 120(%rsi), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r10, %rbx + # Carry + adcxq %r11, %r8 + movq %r11, %r12 + adcxq %r11, %r12 + adoxq %r11, %r12 + movq %r8, 128(%rdi) + # Diagonal 2 + movq 24(%rbp), %r8 + movq 32(%rbp), %r9 + movq 40(%rbp), %r10 + # A[2] x A[1] + movq 8(%rsi), %rdx + mulxq 16(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[3] x A[1] + mulxq 24(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r8, 24(%rbp) + movq %r9, 32(%rbp) + movq 48(%rbp), %r8 + movq 56(%rbp), %r9 + # A[4] x A[1] + mulxq 32(%rsi), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + # A[5] x A[1] + mulxq 40(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r10, 40(%rbp) + movq %r8, 48(%rbp) + movq 64(%rbp), %r10 + movq 72(%rbp), %r8 + # A[6] x A[1] + mulxq 48(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[7] x A[1] + mulxq 56(%rsi), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r9, 56(%rbp) + movq %r10, 64(%rbp) + movq 80(%rbp), %r9 + movq 88(%rbp), %r10 + # A[8] x A[1] + mulxq 64(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[9] x A[1] + mulxq 72(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r8, 72(%rbp) + movq %r9, 80(%rbp) + # No load %r13 - %r8 + # No load %r14 - %r9 + # A[10] x A[1] + mulxq 80(%rsi), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r13 + # A[11] x A[1] + mulxq 88(%rsi), %rax, %rcx + adcxq %rax, %r13 + adoxq %rcx, %r14 + movq %r10, 88(%rbp) + # No store %r13 + # No load %r15 - %r10 + # No load %rbx - %r8 + # A[12] x A[1] + mulxq 96(%rsi), %rax, %rcx + adcxq %rax, %r14 + adoxq %rcx, %r15 + # A[13] x A[1] + mulxq 104(%rsi), %rax, %rcx + adcxq %rax, %r15 + adoxq %rcx, %rbx + # No store %r14 + # No store %r15 + movq 128(%rdi), %r9 + movq %r11, %r10 + # A[14] x A[1] + mulxq 112(%rsi), %rax, %rcx + adcxq %rax, %rbx + adoxq %rcx, %r9 + # A[15] x A[1] + mulxq 120(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # No store %rbx + movq %r9, 128(%rdi) + movq %r11, %r8 + # A[15] x A[2] + movq 16(%rsi), %rdx + mulxq 120(%rsi), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r10, 136(%rdi) + # Carry + adcxq %r12, %r8 + movq %r11, %r12 + adcxq %r11, %r12 + adoxq %r11, %r12 + movq %r8, 144(%rdi) + # Diagonal 3 + movq 40(%rbp), %r8 + movq 48(%rbp), %r9 + movq 56(%rbp), %r10 + # A[3] x A[2] + mulxq 24(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[4] x A[2] + mulxq 32(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r8, 40(%rbp) + movq %r9, 48(%rbp) + movq 64(%rbp), %r8 + movq 72(%rbp), %r9 + # A[5] x A[2] + mulxq 40(%rsi), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + # A[6] x A[2] + mulxq 48(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r10, 56(%rbp) + movq %r8, 64(%rbp) + movq 80(%rbp), %r10 + movq 88(%rbp), %r8 + # A[7] x A[2] + mulxq 56(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[8] x A[2] + mulxq 64(%rsi), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r9, 72(%rbp) + movq %r10, 80(%rbp) + # No load %r13 - %r9 + # No load %r14 - %r10 + # A[9] x A[2] + mulxq 72(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r13 + # A[10] x A[2] + mulxq 80(%rsi), %rax, %rcx + adcxq %rax, %r13 + adoxq %rcx, %r14 + movq %r8, 88(%rbp) + # No store %r13 + # No load %r15 - %r8 + # No load %rbx - %r9 + # A[11] x A[2] + mulxq 88(%rsi), %rax, %rcx + adcxq %rax, %r14 + adoxq %rcx, %r15 + # A[12] x A[2] + mulxq 96(%rsi), %rax, %rcx + adcxq %rax, %r15 + adoxq %rcx, %rbx + # No store %r14 + # No store %r15 + movq 128(%rdi), %r10 + movq 136(%rdi), %r8 + # A[13] x A[2] + mulxq 104(%rsi), %rax, %rcx + adcxq %rax, %rbx + adoxq %rcx, %r10 + # A[14] x A[2] + mulxq 112(%rsi), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + # No store %rbx + movq %r10, 128(%rdi) + movq 144(%rdi), %r9 + movq %r11, %r10 + # A[14] x A[3] + movq 112(%rsi), %rdx + mulxq 24(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[14] x A[4] + mulxq 32(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r8, 136(%rdi) + movq %r9, 144(%rdi) + movq %r11, %r8 + # A[14] x A[5] + mulxq 40(%rsi), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r10, 152(%rdi) + # Carry + adcxq %r12, %r8 + movq %r11, %r12 + adcxq %r11, %r12 + adoxq %r11, %r12 + movq %r8, 160(%rdi) + # Diagonal 4 + movq 56(%rbp), %r8 + movq 64(%rbp), %r9 + movq 72(%rbp), %r10 + # A[4] x A[3] + movq 24(%rsi), %rdx + mulxq 32(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[5] x A[3] + mulxq 40(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r8, 56(%rbp) + movq %r9, 64(%rbp) + movq 80(%rbp), %r8 + movq 88(%rbp), %r9 + # A[6] x A[3] + mulxq 48(%rsi), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + # A[7] x A[3] + mulxq 56(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r10, 72(%rbp) + movq %r8, 80(%rbp) + # No load %r13 - %r10 + # No load %r14 - %r8 + # A[8] x A[3] + mulxq 64(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r13 + # A[9] x A[3] + mulxq 72(%rsi), %rax, %rcx + adcxq %rax, %r13 + adoxq %rcx, %r14 + movq %r9, 88(%rbp) + # No store %r13 + # No load %r15 - %r9 + # No load %rbx - %r10 + # A[10] x A[3] + mulxq 80(%rsi), %rax, %rcx + adcxq %rax, %r14 + adoxq %rcx, %r15 + # A[11] x A[3] + mulxq 88(%rsi), %rax, %rcx + adcxq %rax, %r15 + adoxq %rcx, %rbx + # No store %r14 + # No store %r15 + movq 128(%rdi), %r8 + movq 136(%rdi), %r9 + # A[12] x A[3] + mulxq 96(%rsi), %rax, %rcx + adcxq %rax, %rbx + adoxq %rcx, %r8 + # A[13] x A[3] + mulxq 104(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # No store %rbx + movq %r8, 128(%rdi) + movq 144(%rdi), %r10 + movq 152(%rdi), %r8 + # A[13] x A[4] + movq 104(%rsi), %rdx + mulxq 32(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[13] x A[5] + mulxq 40(%rsi), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r9, 136(%rdi) + movq %r10, 144(%rdi) + movq 160(%rdi), %r9 + movq %r11, %r10 + # A[13] x A[6] + mulxq 48(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[13] x A[7] + mulxq 56(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r8, 152(%rdi) + movq %r9, 160(%rdi) + movq %r11, %r8 + # A[13] x A[8] + mulxq 64(%rsi), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r10, 168(%rdi) + # Carry + adcxq %r12, %r8 + movq %r11, %r12 + adcxq %r11, %r12 + adoxq %r11, %r12 + movq %r8, 176(%rdi) + # Diagonal 5 + movq 72(%rbp), %r8 + movq 80(%rbp), %r9 + movq 88(%rbp), %r10 + # A[5] x A[4] + movq 32(%rsi), %rdx + mulxq 40(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[6] x A[4] + mulxq 48(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r8, 72(%rbp) + movq %r9, 80(%rbp) + # No load %r13 - %r8 + # No load %r14 - %r9 + # A[7] x A[4] + mulxq 56(%rsi), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r13 + # A[8] x A[4] + mulxq 64(%rsi), %rax, %rcx + adcxq %rax, %r13 + adoxq %rcx, %r14 + movq %r10, 88(%rbp) + # No store %r13 + # No load %r15 - %r10 + # No load %rbx - %r8 + # A[9] x A[4] + mulxq 72(%rsi), %rax, %rcx + adcxq %rax, %r14 + adoxq %rcx, %r15 + # A[10] x A[4] + mulxq 80(%rsi), %rax, %rcx + adcxq %rax, %r15 + adoxq %rcx, %rbx + # No store %r14 + # No store %r15 + movq 128(%rdi), %r9 + movq 136(%rdi), %r10 + # A[11] x A[4] + mulxq 88(%rsi), %rax, %rcx + adcxq %rax, %rbx + adoxq %rcx, %r9 + # A[12] x A[4] + mulxq 96(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # No store %rbx + movq %r9, 128(%rdi) + movq 144(%rdi), %r8 + movq 152(%rdi), %r9 + # A[12] x A[5] + movq 96(%rsi), %rdx + mulxq 40(%rsi), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + # A[12] x A[6] + mulxq 48(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r10, 136(%rdi) + movq %r8, 144(%rdi) + movq 160(%rdi), %r10 + movq 168(%rdi), %r8 + # A[12] x A[7] + mulxq 56(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[12] x A[8] + mulxq 64(%rsi), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r9, 152(%rdi) + movq %r10, 160(%rdi) + movq 176(%rdi), %r9 + movq %r11, %r10 + # A[12] x A[9] + mulxq 72(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[12] x A[10] + mulxq 80(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r8, 168(%rdi) + movq %r9, 176(%rdi) + movq %r11, %r8 + # A[12] x A[11] + mulxq 88(%rsi), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r10, 184(%rdi) + # Carry + adcxq %r12, %r8 + movq %r11, %r12 + adcxq %r11, %r12 + adoxq %r11, %r12 + movq %r8, 192(%rdi) + # Diagonal 6 + movq 88(%rbp), %r8 + # No load %r13 - %r9 + # No load %r14 - %r10 + # A[6] x A[5] + movq 40(%rsi), %rdx + mulxq 48(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r13 + # A[7] x A[5] + mulxq 56(%rsi), %rax, %rcx + adcxq %rax, %r13 + adoxq %rcx, %r14 + movq %r8, 88(%rbp) + # No store %r13 + # No load %r15 - %r8 + # No load %rbx - %r9 + # A[8] x A[5] + mulxq 64(%rsi), %rax, %rcx + adcxq %rax, %r14 + adoxq %rcx, %r15 + # A[9] x A[5] + mulxq 72(%rsi), %rax, %rcx + adcxq %rax, %r15 + adoxq %rcx, %rbx + # No store %r14 + # No store %r15 + movq 128(%rdi), %r10 + movq 136(%rdi), %r8 + # A[10] x A[5] + mulxq 80(%rsi), %rax, %rcx + adcxq %rax, %rbx + adoxq %rcx, %r10 + # A[11] x A[5] + mulxq 88(%rsi), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + # No store %rbx + movq %r10, 128(%rdi) + movq 144(%rdi), %r9 + movq 152(%rdi), %r10 + # A[11] x A[6] + movq 88(%rsi), %rdx + mulxq 48(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[11] x A[7] + mulxq 56(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r8, 136(%rdi) + movq %r9, 144(%rdi) + movq 160(%rdi), %r8 + movq 168(%rdi), %r9 + # A[11] x A[8] + mulxq 64(%rsi), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + # A[11] x A[9] + mulxq 72(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r10, 152(%rdi) + movq %r8, 160(%rdi) + movq 176(%rdi), %r10 + movq 184(%rdi), %r8 + # A[11] x A[10] + mulxq 80(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[13] x A[9] + movq 104(%rsi), %rdx + mulxq 72(%rsi), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r9, 168(%rdi) + movq %r10, 176(%rdi) + movq 192(%rdi), %r9 + movq %r11, %r10 + # A[13] x A[10] + mulxq 80(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[13] x A[11] + mulxq 88(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r8, 184(%rdi) + movq %r9, 192(%rdi) + movq %r11, %r8 + # A[13] x A[12] + mulxq 96(%rsi), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r10, 200(%rdi) + # Carry + adcxq %r12, %r8 + movq %r11, %r12 + adcxq %r11, %r12 + adoxq %r11, %r12 + movq %r8, 208(%rdi) + # Diagonal 7 + # No load %r14 - %r8 + # No load %r15 - %r9 + # No load %rbx - %r10 + # A[7] x A[6] + movq 48(%rsi), %rdx + mulxq 56(%rsi), %rax, %rcx + adcxq %rax, %r14 + adoxq %rcx, %r15 + # A[8] x A[6] + mulxq 64(%rsi), %rax, %rcx + adcxq %rax, %r15 + adoxq %rcx, %rbx + # No store %r14 + # No store %r15 + movq 128(%rdi), %r8 + movq 136(%rdi), %r9 + # A[9] x A[6] + mulxq 72(%rsi), %rax, %rcx + adcxq %rax, %rbx + adoxq %rcx, %r8 + # A[10] x A[6] + mulxq 80(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # No store %rbx + movq %r8, 128(%rdi) + movq 144(%rdi), %r10 + movq 152(%rdi), %r8 + # A[10] x A[7] + movq 80(%rsi), %rdx + mulxq 56(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[10] x A[8] + mulxq 64(%rsi), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r9, 136(%rdi) + movq %r10, 144(%rdi) + movq 160(%rdi), %r9 + movq 168(%rdi), %r10 + # A[10] x A[9] + mulxq 72(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[14] x A[6] + movq 112(%rsi), %rdx + mulxq 48(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r8, 152(%rdi) + movq %r9, 160(%rdi) + movq 176(%rdi), %r8 + movq 184(%rdi), %r9 + # A[14] x A[7] + mulxq 56(%rsi), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + # A[14] x A[8] + mulxq 64(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r10, 168(%rdi) + movq %r8, 176(%rdi) + movq 192(%rdi), %r10 + movq 200(%rdi), %r8 + # A[14] x A[9] + mulxq 72(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[14] x A[10] + mulxq 80(%rsi), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r9, 184(%rdi) + movq %r10, 192(%rdi) + movq 208(%rdi), %r9 + movq %r11, %r10 + # A[14] x A[11] + mulxq 88(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[14] x A[12] + mulxq 96(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r8, 200(%rdi) + movq %r9, 208(%rdi) + movq %r11, %r8 + # A[14] x A[13] + mulxq 104(%rsi), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r10, 216(%rdi) + # Carry + adcxq %r12, %r8 + movq %r11, %r12 + adcxq %r11, %r12 + adoxq %r11, %r12 + movq %r8, 224(%rdi) + # Diagonal 8 + # No load %rbx - %r8 + movq 128(%rdi), %r9 + movq 136(%rdi), %r10 + # A[8] x A[7] + movq 56(%rsi), %rdx + mulxq 64(%rsi), %rax, %rcx + adcxq %rax, %rbx + adoxq %rcx, %r9 + # A[9] x A[7] + mulxq 72(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # No store %rbx + movq %r9, 128(%rdi) + movq 144(%rdi), %r8 + movq 152(%rdi), %r9 + # A[9] x A[8] + movq 64(%rsi), %rdx + mulxq 72(%rsi), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + # A[15] x A[3] + movq 120(%rsi), %rdx + mulxq 24(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r10, 136(%rdi) + movq %r8, 144(%rdi) + movq 160(%rdi), %r10 + movq 168(%rdi), %r8 + # A[15] x A[4] + mulxq 32(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[15] x A[5] + mulxq 40(%rsi), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r9, 152(%rdi) + movq %r10, 160(%rdi) + movq 176(%rdi), %r9 + movq 184(%rdi), %r10 + # A[15] x A[6] + mulxq 48(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[15] x A[7] + mulxq 56(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r8, 168(%rdi) + movq %r9, 176(%rdi) + movq 192(%rdi), %r8 + movq 200(%rdi), %r9 + # A[15] x A[8] + mulxq 64(%rsi), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + # A[15] x A[9] + mulxq 72(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r10, 184(%rdi) + movq %r8, 192(%rdi) + movq 208(%rdi), %r10 + movq 216(%rdi), %r8 + # A[15] x A[10] + mulxq 80(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[15] x A[11] + mulxq 88(%rsi), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r9, 200(%rdi) + movq %r10, 208(%rdi) + movq 224(%rdi), %r9 + movq %r11, %r10 + # A[15] x A[12] + mulxq 96(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[15] x A[13] + mulxq 104(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r8, 216(%rdi) + movq %r9, 224(%rdi) + movq %r11, %r8 + # A[15] x A[14] + mulxq 112(%rsi), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r10, 232(%rdi) + # Carry + adcxq %r12, %r8 + movq %r11, %r12 + adcxq %r11, %r12 + adoxq %r11, %r12 + movq %r8, 240(%rdi) + movq %r12, 248(%rdi) + # Double and Add in A[i] x A[i] + movq 8(%rbp), %r9 + # A[0] x A[0] + movq (%rsi), %rdx + mulxq %rdx, %rax, %rcx + movq %rax, (%rbp) + adoxq %r9, %r9 + adcxq %rcx, %r9 + movq %r9, 8(%rbp) + movq 16(%rbp), %r8 + movq 24(%rbp), %r9 + # A[1] x A[1] + movq 8(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adoxq %r8, %r8 + adoxq %r9, %r9 + adcxq %rax, %r8 + adcxq %rcx, %r9 + movq %r8, 16(%rbp) + movq %r9, 24(%rbp) + movq 32(%rbp), %r8 + movq 40(%rbp), %r9 + # A[2] x A[2] + movq 16(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adoxq %r8, %r8 + adoxq %r9, %r9 + adcxq %rax, %r8 + adcxq %rcx, %r9 + movq %r8, 32(%rbp) + movq %r9, 40(%rbp) + movq 48(%rbp), %r8 + movq 56(%rbp), %r9 + # A[3] x A[3] + movq 24(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adoxq %r8, %r8 + adoxq %r9, %r9 + adcxq %rax, %r8 + adcxq %rcx, %r9 + movq %r8, 48(%rbp) + movq %r9, 56(%rbp) + movq 64(%rbp), %r8 + movq 72(%rbp), %r9 + # A[4] x A[4] + movq 32(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adoxq %r8, %r8 + adoxq %r9, %r9 + adcxq %rax, %r8 + adcxq %rcx, %r9 + movq %r8, 64(%rbp) + movq %r9, 72(%rbp) + movq 80(%rbp), %r8 + movq 88(%rbp), %r9 + # A[5] x A[5] + movq 40(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adoxq %r8, %r8 + adoxq %r9, %r9 + adcxq %rax, %r8 + adcxq %rcx, %r9 + movq %r8, 80(%rbp) + movq %r9, 88(%rbp) + # A[6] x A[6] + movq 48(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adoxq %r13, %r13 + adoxq %r14, %r14 + adcxq %rax, %r13 + adcxq %rcx, %r14 + # A[7] x A[7] + movq 56(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adoxq %r15, %r15 + adoxq %rbx, %rbx + adcxq %rax, %r15 + adcxq %rcx, %rbx + movq 128(%rdi), %r8 + movq 136(%rdi), %r9 + # A[8] x A[8] + movq 64(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adoxq %r8, %r8 + adoxq %r9, %r9 + adcxq %rax, %r8 + adcxq %rcx, %r9 + movq %r8, 128(%rdi) + movq %r9, 136(%rdi) + movq 144(%rdi), %r8 + movq 152(%rdi), %r9 + # A[9] x A[9] + movq 72(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adoxq %r8, %r8 + adoxq %r9, %r9 + adcxq %rax, %r8 + adcxq %rcx, %r9 + movq %r8, 144(%rdi) + movq %r9, 152(%rdi) + movq 160(%rdi), %r8 + movq 168(%rdi), %r9 + # A[10] x A[10] + movq 80(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adoxq %r8, %r8 + adoxq %r9, %r9 + adcxq %rax, %r8 + adcxq %rcx, %r9 + movq %r8, 160(%rdi) + movq %r9, 168(%rdi) + movq 176(%rdi), %r8 + movq 184(%rdi), %r9 + # A[11] x A[11] + movq 88(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adoxq %r8, %r8 + adoxq %r9, %r9 + adcxq %rax, %r8 + adcxq %rcx, %r9 + movq %r8, 176(%rdi) + movq %r9, 184(%rdi) + movq 192(%rdi), %r8 + movq 200(%rdi), %r9 + # A[12] x A[12] + movq 96(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adoxq %r8, %r8 + adoxq %r9, %r9 + adcxq %rax, %r8 + adcxq %rcx, %r9 + movq %r8, 192(%rdi) + movq %r9, 200(%rdi) + movq 208(%rdi), %r8 + movq 216(%rdi), %r9 + # A[13] x A[13] + movq 104(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adoxq %r8, %r8 + adoxq %r9, %r9 + adcxq %rax, %r8 + adcxq %rcx, %r9 + movq %r8, 208(%rdi) + movq %r9, 216(%rdi) + movq 224(%rdi), %r8 + movq 232(%rdi), %r9 + # A[14] x A[14] + movq 112(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adoxq %r8, %r8 + adoxq %r9, %r9 + adcxq %rax, %r8 + adcxq %rcx, %r9 + movq %r8, 224(%rdi) + movq %r9, 232(%rdi) + movq 240(%rdi), %r8 + movq 248(%rdi), %r9 + # A[15] x A[15] + movq 120(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adoxq %r8, %r8 + adoxq %r9, %r9 + adcxq %rax, %r8 + adcxq %rcx, %r9 + movq %r8, 240(%rdi) + movq %r9, 248(%rdi) + movq %r13, 96(%rdi) + movq %r14, 104(%rdi) + movq %r15, 112(%rdi) + movq %rbx, 120(%rdi) + cmpq %rdi, %rsi + jne L_end_2048_sqr_avx2_16 + vmovdqu (%rbp), %xmm0 + vmovups %xmm0, (%rdi) + vmovdqu 16(%rbp), %xmm0 + vmovups %xmm0, 16(%rdi) + vmovdqu 32(%rbp), %xmm0 + vmovups %xmm0, 32(%rdi) + vmovdqu 48(%rbp), %xmm0 + vmovups %xmm0, 48(%rdi) + vmovdqu 64(%rbp), %xmm0 + vmovups %xmm0, 64(%rdi) + vmovdqu 80(%rbp), %xmm0 + vmovups %xmm0, 80(%rdi) +L_end_2048_sqr_avx2_16: + addq $128, %rsp + pop %rbx + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + repz retq +#ifndef __APPLE__ +.size sp_2048_sqr_avx2_16,.-sp_2048_sqr_avx2_16 +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX2 */ +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_2048_add_16 +.type sp_2048_add_16,@function +.align 16 +sp_2048_add_16: +#else +.globl _sp_2048_add_16 +.p2align 4 +_sp_2048_add_16: +#endif /* __APPLE__ */ + # Add + movq (%rsi), %rcx + xorq %rax, %rax + addq (%rdx), %rcx + movq 8(%rsi), %r8 + movq %rcx, (%rdi) + adcq 8(%rdx), %r8 + movq 16(%rsi), %rcx + movq %r8, 8(%rdi) + adcq 16(%rdx), %rcx + movq 24(%rsi), %r8 + movq %rcx, 16(%rdi) + adcq 24(%rdx), %r8 + movq 32(%rsi), %rcx + movq %r8, 24(%rdi) + adcq 32(%rdx), %rcx + movq 40(%rsi), %r8 + movq %rcx, 32(%rdi) + adcq 40(%rdx), %r8 + movq 48(%rsi), %rcx + movq %r8, 40(%rdi) + adcq 48(%rdx), %rcx + movq 56(%rsi), %r8 + movq %rcx, 48(%rdi) + adcq 56(%rdx), %r8 + movq 64(%rsi), %rcx + movq %r8, 56(%rdi) + adcq 64(%rdx), %rcx + movq 72(%rsi), %r8 + movq %rcx, 64(%rdi) + adcq 72(%rdx), %r8 + movq 80(%rsi), %rcx + movq %r8, 72(%rdi) + adcq 80(%rdx), %rcx + movq 88(%rsi), %r8 + movq %rcx, 80(%rdi) + adcq 88(%rdx), %r8 + movq 96(%rsi), %rcx + movq %r8, 88(%rdi) + adcq 96(%rdx), %rcx + movq 104(%rsi), %r8 + movq %rcx, 96(%rdi) + adcq 104(%rdx), %r8 + movq 112(%rsi), %rcx + movq %r8, 104(%rdi) + adcq 112(%rdx), %rcx + movq 120(%rsi), %r8 + movq %rcx, 112(%rdi) + adcq 120(%rdx), %r8 + movq %r8, 120(%rdi) + adcq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_2048_add_16,.-sp_2048_add_16 +#endif /* __APPLE__ */ +/* Sub b from a into a. (a -= b) + * + * a A single precision integer and result. + * b A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_2048_sub_in_place_32 +.type sp_2048_sub_in_place_32,@function +.align 16 +sp_2048_sub_in_place_32: +#else +.globl _sp_2048_sub_in_place_32 +.p2align 4 +_sp_2048_sub_in_place_32: +#endif /* __APPLE__ */ + movq (%rdi), %rdx + xorq %rax, %rax + subq (%rsi), %rdx + movq 8(%rdi), %rcx + movq %rdx, (%rdi) + sbbq 8(%rsi), %rcx + movq 16(%rdi), %rdx + movq %rcx, 8(%rdi) + sbbq 16(%rsi), %rdx + movq 24(%rdi), %rcx + movq %rdx, 16(%rdi) + sbbq 24(%rsi), %rcx + movq 32(%rdi), %rdx + movq %rcx, 24(%rdi) + sbbq 32(%rsi), %rdx + movq 40(%rdi), %rcx + movq %rdx, 32(%rdi) + sbbq 40(%rsi), %rcx + movq 48(%rdi), %rdx + movq %rcx, 40(%rdi) + sbbq 48(%rsi), %rdx + movq 56(%rdi), %rcx + movq %rdx, 48(%rdi) + sbbq 56(%rsi), %rcx + movq 64(%rdi), %rdx + movq %rcx, 56(%rdi) + sbbq 64(%rsi), %rdx + movq 72(%rdi), %rcx + movq %rdx, 64(%rdi) + sbbq 72(%rsi), %rcx + movq 80(%rdi), %rdx + movq %rcx, 72(%rdi) + sbbq 80(%rsi), %rdx + movq 88(%rdi), %rcx + movq %rdx, 80(%rdi) + sbbq 88(%rsi), %rcx + movq 96(%rdi), %rdx + movq %rcx, 88(%rdi) + sbbq 96(%rsi), %rdx + movq 104(%rdi), %rcx + movq %rdx, 96(%rdi) + sbbq 104(%rsi), %rcx + movq 112(%rdi), %rdx + movq %rcx, 104(%rdi) + sbbq 112(%rsi), %rdx + movq 120(%rdi), %rcx + movq %rdx, 112(%rdi) + sbbq 120(%rsi), %rcx + movq 128(%rdi), %rdx + movq %rcx, 120(%rdi) + sbbq 128(%rsi), %rdx + movq 136(%rdi), %rcx + movq %rdx, 128(%rdi) + sbbq 136(%rsi), %rcx + movq 144(%rdi), %rdx + movq %rcx, 136(%rdi) + sbbq 144(%rsi), %rdx + movq 152(%rdi), %rcx + movq %rdx, 144(%rdi) + sbbq 152(%rsi), %rcx + movq 160(%rdi), %rdx + movq %rcx, 152(%rdi) + sbbq 160(%rsi), %rdx + movq 168(%rdi), %rcx + movq %rdx, 160(%rdi) + sbbq 168(%rsi), %rcx + movq 176(%rdi), %rdx + movq %rcx, 168(%rdi) + sbbq 176(%rsi), %rdx + movq 184(%rdi), %rcx + movq %rdx, 176(%rdi) + sbbq 184(%rsi), %rcx + movq 192(%rdi), %rdx + movq %rcx, 184(%rdi) + sbbq 192(%rsi), %rdx + movq 200(%rdi), %rcx + movq %rdx, 192(%rdi) + sbbq 200(%rsi), %rcx + movq 208(%rdi), %rdx + movq %rcx, 200(%rdi) + sbbq 208(%rsi), %rdx + movq 216(%rdi), %rcx + movq %rdx, 208(%rdi) + sbbq 216(%rsi), %rcx + movq 224(%rdi), %rdx + movq %rcx, 216(%rdi) + sbbq 224(%rsi), %rdx + movq 232(%rdi), %rcx + movq %rdx, 224(%rdi) + sbbq 232(%rsi), %rcx + movq 240(%rdi), %rdx + movq %rcx, 232(%rdi) + sbbq 240(%rsi), %rdx + movq 248(%rdi), %rcx + movq %rdx, 240(%rdi) + sbbq 248(%rsi), %rcx + movq %rcx, 248(%rdi) + sbbq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_2048_sub_in_place_32,.-sp_2048_sub_in_place_32 +#endif /* __APPLE__ */ +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_2048_add_32 +.type sp_2048_add_32,@function +.align 16 +sp_2048_add_32: +#else +.globl _sp_2048_add_32 +.p2align 4 +_sp_2048_add_32: +#endif /* __APPLE__ */ + # Add + movq (%rsi), %rcx + xorq %rax, %rax + addq (%rdx), %rcx + movq 8(%rsi), %r8 + movq %rcx, (%rdi) + adcq 8(%rdx), %r8 + movq 16(%rsi), %rcx + movq %r8, 8(%rdi) + adcq 16(%rdx), %rcx + movq 24(%rsi), %r8 + movq %rcx, 16(%rdi) + adcq 24(%rdx), %r8 + movq 32(%rsi), %rcx + movq %r8, 24(%rdi) + adcq 32(%rdx), %rcx + movq 40(%rsi), %r8 + movq %rcx, 32(%rdi) + adcq 40(%rdx), %r8 + movq 48(%rsi), %rcx + movq %r8, 40(%rdi) + adcq 48(%rdx), %rcx + movq 56(%rsi), %r8 + movq %rcx, 48(%rdi) + adcq 56(%rdx), %r8 + movq 64(%rsi), %rcx + movq %r8, 56(%rdi) + adcq 64(%rdx), %rcx + movq 72(%rsi), %r8 + movq %rcx, 64(%rdi) + adcq 72(%rdx), %r8 + movq 80(%rsi), %rcx + movq %r8, 72(%rdi) + adcq 80(%rdx), %rcx + movq 88(%rsi), %r8 + movq %rcx, 80(%rdi) + adcq 88(%rdx), %r8 + movq 96(%rsi), %rcx + movq %r8, 88(%rdi) + adcq 96(%rdx), %rcx + movq 104(%rsi), %r8 + movq %rcx, 96(%rdi) + adcq 104(%rdx), %r8 + movq 112(%rsi), %rcx + movq %r8, 104(%rdi) + adcq 112(%rdx), %rcx + movq 120(%rsi), %r8 + movq %rcx, 112(%rdi) + adcq 120(%rdx), %r8 + movq 128(%rsi), %rcx + movq %r8, 120(%rdi) + adcq 128(%rdx), %rcx + movq 136(%rsi), %r8 + movq %rcx, 128(%rdi) + adcq 136(%rdx), %r8 + movq 144(%rsi), %rcx + movq %r8, 136(%rdi) + adcq 144(%rdx), %rcx + movq 152(%rsi), %r8 + movq %rcx, 144(%rdi) + adcq 152(%rdx), %r8 + movq 160(%rsi), %rcx + movq %r8, 152(%rdi) + adcq 160(%rdx), %rcx + movq 168(%rsi), %r8 + movq %rcx, 160(%rdi) + adcq 168(%rdx), %r8 + movq 176(%rsi), %rcx + movq %r8, 168(%rdi) + adcq 176(%rdx), %rcx + movq 184(%rsi), %r8 + movq %rcx, 176(%rdi) + adcq 184(%rdx), %r8 + movq 192(%rsi), %rcx + movq %r8, 184(%rdi) + adcq 192(%rdx), %rcx + movq 200(%rsi), %r8 + movq %rcx, 192(%rdi) + adcq 200(%rdx), %r8 + movq 208(%rsi), %rcx + movq %r8, 200(%rdi) + adcq 208(%rdx), %rcx + movq 216(%rsi), %r8 + movq %rcx, 208(%rdi) + adcq 216(%rdx), %r8 + movq 224(%rsi), %rcx + movq %r8, 216(%rdi) + adcq 224(%rdx), %rcx + movq 232(%rsi), %r8 + movq %rcx, 224(%rdi) + adcq 232(%rdx), %r8 + movq 240(%rsi), %rcx + movq %r8, 232(%rdi) + adcq 240(%rdx), %rcx + movq 248(%rsi), %r8 + movq %rcx, 240(%rdi) + adcq 248(%rdx), %r8 + movq %r8, 248(%rdi) + adcq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_2048_add_32,.-sp_2048_add_32 +#endif /* __APPLE__ */ +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_2048_mul_32 +.type sp_2048_mul_32,@function +.align 16 +sp_2048_mul_32: +#else +.globl _sp_2048_mul_32 +.p2align 4 +_sp_2048_mul_32: +#endif /* __APPLE__ */ + push %r12 + push %r13 + push %r14 + push %r15 + subq $808, %rsp + movq %rdi, 768(%rsp) + movq %rsi, 776(%rsp) + movq %rdx, 784(%rsp) + leaq 512(%rsp), %r10 + leaq 128(%rsi), %r12 + # Add + movq (%rsi), %rax + xorq %r13, %r13 + addq (%r12), %rax + movq 8(%rsi), %rcx + movq %rax, (%r10) + adcq 8(%r12), %rcx + movq 16(%rsi), %r8 + movq %rcx, 8(%r10) + adcq 16(%r12), %r8 + movq 24(%rsi), %rax + movq %r8, 16(%r10) + adcq 24(%r12), %rax + movq 32(%rsi), %rcx + movq %rax, 24(%r10) + adcq 32(%r12), %rcx + movq 40(%rsi), %r8 + movq %rcx, 32(%r10) + adcq 40(%r12), %r8 + movq 48(%rsi), %rax + movq %r8, 40(%r10) + adcq 48(%r12), %rax + movq 56(%rsi), %rcx + movq %rax, 48(%r10) + adcq 56(%r12), %rcx + movq 64(%rsi), %r8 + movq %rcx, 56(%r10) + adcq 64(%r12), %r8 + movq 72(%rsi), %rax + movq %r8, 64(%r10) + adcq 72(%r12), %rax + movq 80(%rsi), %rcx + movq %rax, 72(%r10) + adcq 80(%r12), %rcx + movq 88(%rsi), %r8 + movq %rcx, 80(%r10) + adcq 88(%r12), %r8 + movq 96(%rsi), %rax + movq %r8, 88(%r10) + adcq 96(%r12), %rax + movq 104(%rsi), %rcx + movq %rax, 96(%r10) + adcq 104(%r12), %rcx + movq 112(%rsi), %r8 + movq %rcx, 104(%r10) + adcq 112(%r12), %r8 + movq 120(%rsi), %rax + movq %r8, 112(%r10) + adcq 120(%r12), %rax + movq %rax, 120(%r10) + adcq $0, %r13 + movq %r13, 792(%rsp) + leaq 640(%rsp), %r11 + leaq 128(%rdx), %r12 + # Add + movq (%rdx), %rax + xorq %r14, %r14 + addq (%r12), %rax + movq 8(%rdx), %rcx + movq %rax, (%r11) + adcq 8(%r12), %rcx + movq 16(%rdx), %r8 + movq %rcx, 8(%r11) + adcq 16(%r12), %r8 + movq 24(%rdx), %rax + movq %r8, 16(%r11) + adcq 24(%r12), %rax + movq 32(%rdx), %rcx + movq %rax, 24(%r11) + adcq 32(%r12), %rcx + movq 40(%rdx), %r8 + movq %rcx, 32(%r11) + adcq 40(%r12), %r8 + movq 48(%rdx), %rax + movq %r8, 40(%r11) + adcq 48(%r12), %rax + movq 56(%rdx), %rcx + movq %rax, 48(%r11) + adcq 56(%r12), %rcx + movq 64(%rdx), %r8 + movq %rcx, 56(%r11) + adcq 64(%r12), %r8 + movq 72(%rdx), %rax + movq %r8, 64(%r11) + adcq 72(%r12), %rax + movq 80(%rdx), %rcx + movq %rax, 72(%r11) + adcq 80(%r12), %rcx + movq 88(%rdx), %r8 + movq %rcx, 80(%r11) + adcq 88(%r12), %r8 + movq 96(%rdx), %rax + movq %r8, 88(%r11) + adcq 96(%r12), %rax + movq 104(%rdx), %rcx + movq %rax, 96(%r11) + adcq 104(%r12), %rcx + movq 112(%rdx), %r8 + movq %rcx, 104(%r11) + adcq 112(%r12), %r8 + movq 120(%rdx), %rax + movq %r8, 112(%r11) + adcq 120(%r12), %rax + movq %rax, 120(%r11) + adcq $0, %r14 + movq %r14, 800(%rsp) + movq %r11, %rdx + movq %r10, %rsi + movq %rsp, %rdi +#ifndef __APPLE__ + callq sp_2048_mul_16@plt +#else + callq _sp_2048_mul_16 +#endif /* __APPLE__ */ + movq 784(%rsp), %rdx + movq 776(%rsp), %rsi + leaq 256(%rsp), %rdi + addq $128, %rdx + addq $128, %rsi +#ifndef __APPLE__ + callq sp_2048_mul_16@plt +#else + callq _sp_2048_mul_16 +#endif /* __APPLE__ */ + movq 784(%rsp), %rdx + movq 776(%rsp), %rsi + movq 768(%rsp), %rdi +#ifndef __APPLE__ + callq sp_2048_mul_16@plt +#else + callq _sp_2048_mul_16 +#endif /* __APPLE__ */ + movq 792(%rsp), %r13 + movq 800(%rsp), %r14 + movq 768(%rsp), %r15 + movq %r13, %r9 + leaq 512(%rsp), %r10 + leaq 640(%rsp), %r11 + andq %r14, %r9 + negq %r13 + negq %r14 + addq $256, %r15 + movq (%r10), %rax + movq (%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, (%r10) + movq %rcx, (%r11) + movq 8(%r10), %rax + movq 8(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 8(%r10) + movq %rcx, 8(%r11) + movq 16(%r10), %rax + movq 16(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 16(%r10) + movq %rcx, 16(%r11) + movq 24(%r10), %rax + movq 24(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 24(%r10) + movq %rcx, 24(%r11) + movq 32(%r10), %rax + movq 32(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 32(%r10) + movq %rcx, 32(%r11) + movq 40(%r10), %rax + movq 40(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 40(%r10) + movq %rcx, 40(%r11) + movq 48(%r10), %rax + movq 48(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 48(%r10) + movq %rcx, 48(%r11) + movq 56(%r10), %rax + movq 56(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 56(%r10) + movq %rcx, 56(%r11) + movq 64(%r10), %rax + movq 64(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 64(%r10) + movq %rcx, 64(%r11) + movq 72(%r10), %rax + movq 72(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 72(%r10) + movq %rcx, 72(%r11) + movq 80(%r10), %rax + movq 80(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 80(%r10) + movq %rcx, 80(%r11) + movq 88(%r10), %rax + movq 88(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 88(%r10) + movq %rcx, 88(%r11) + movq 96(%r10), %rax + movq 96(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 96(%r10) + movq %rcx, 96(%r11) + movq 104(%r10), %rax + movq 104(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 104(%r10) + movq %rcx, 104(%r11) + movq 112(%r10), %rax + movq 112(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 112(%r10) + movq %rcx, 112(%r11) + movq 120(%r10), %rax + movq 120(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 120(%r10) + movq %rcx, 120(%r11) + movq (%r10), %rax + addq (%r11), %rax + movq 8(%r10), %rcx + movq %rax, (%r15) + adcq 8(%r11), %rcx + movq 16(%r10), %r8 + movq %rcx, 8(%r15) + adcq 16(%r11), %r8 + movq 24(%r10), %rax + movq %r8, 16(%r15) + adcq 24(%r11), %rax + movq 32(%r10), %rcx + movq %rax, 24(%r15) + adcq 32(%r11), %rcx + movq 40(%r10), %r8 + movq %rcx, 32(%r15) + adcq 40(%r11), %r8 + movq 48(%r10), %rax + movq %r8, 40(%r15) + adcq 48(%r11), %rax + movq 56(%r10), %rcx + movq %rax, 48(%r15) + adcq 56(%r11), %rcx + movq 64(%r10), %r8 + movq %rcx, 56(%r15) + adcq 64(%r11), %r8 + movq 72(%r10), %rax + movq %r8, 64(%r15) + adcq 72(%r11), %rax + movq 80(%r10), %rcx + movq %rax, 72(%r15) + adcq 80(%r11), %rcx + movq 88(%r10), %r8 + movq %rcx, 80(%r15) + adcq 88(%r11), %r8 + movq 96(%r10), %rax + movq %r8, 88(%r15) + adcq 96(%r11), %rax + movq 104(%r10), %rcx + movq %rax, 96(%r15) + adcq 104(%r11), %rcx + movq 112(%r10), %r8 + movq %rcx, 104(%r15) + adcq 112(%r11), %r8 + movq 120(%r10), %rax + movq %r8, 112(%r15) + adcq 120(%r11), %rax + movq %rax, 120(%r15) + adcq $0, %r9 + leaq 256(%rsp), %r11 + movq %rsp, %r10 + movq (%r10), %rax + subq (%r11), %rax + movq 8(%r10), %rcx + movq %rax, (%r10) + sbbq 8(%r11), %rcx + movq 16(%r10), %r8 + movq %rcx, 8(%r10) + sbbq 16(%r11), %r8 + movq 24(%r10), %rax + movq %r8, 16(%r10) + sbbq 24(%r11), %rax + movq 32(%r10), %rcx + movq %rax, 24(%r10) + sbbq 32(%r11), %rcx + movq 40(%r10), %r8 + movq %rcx, 32(%r10) + sbbq 40(%r11), %r8 + movq 48(%r10), %rax + movq %r8, 40(%r10) + sbbq 48(%r11), %rax + movq 56(%r10), %rcx + movq %rax, 48(%r10) + sbbq 56(%r11), %rcx + movq 64(%r10), %r8 + movq %rcx, 56(%r10) + sbbq 64(%r11), %r8 + movq 72(%r10), %rax + movq %r8, 64(%r10) + sbbq 72(%r11), %rax + movq 80(%r10), %rcx + movq %rax, 72(%r10) + sbbq 80(%r11), %rcx + movq 88(%r10), %r8 + movq %rcx, 80(%r10) + sbbq 88(%r11), %r8 + movq 96(%r10), %rax + movq %r8, 88(%r10) + sbbq 96(%r11), %rax + movq 104(%r10), %rcx + movq %rax, 96(%r10) + sbbq 104(%r11), %rcx + movq 112(%r10), %r8 + movq %rcx, 104(%r10) + sbbq 112(%r11), %r8 + movq 120(%r10), %rax + movq %r8, 112(%r10) + sbbq 120(%r11), %rax + movq 128(%r10), %rcx + movq %rax, 120(%r10) + sbbq 128(%r11), %rcx + movq 136(%r10), %r8 + movq %rcx, 128(%r10) + sbbq 136(%r11), %r8 + movq 144(%r10), %rax + movq %r8, 136(%r10) + sbbq 144(%r11), %rax + movq 152(%r10), %rcx + movq %rax, 144(%r10) + sbbq 152(%r11), %rcx + movq 160(%r10), %r8 + movq %rcx, 152(%r10) + sbbq 160(%r11), %r8 + movq 168(%r10), %rax + movq %r8, 160(%r10) + sbbq 168(%r11), %rax + movq 176(%r10), %rcx + movq %rax, 168(%r10) + sbbq 176(%r11), %rcx + movq 184(%r10), %r8 + movq %rcx, 176(%r10) + sbbq 184(%r11), %r8 + movq 192(%r10), %rax + movq %r8, 184(%r10) + sbbq 192(%r11), %rax + movq 200(%r10), %rcx + movq %rax, 192(%r10) + sbbq 200(%r11), %rcx + movq 208(%r10), %r8 + movq %rcx, 200(%r10) + sbbq 208(%r11), %r8 + movq 216(%r10), %rax + movq %r8, 208(%r10) + sbbq 216(%r11), %rax + movq 224(%r10), %rcx + movq %rax, 216(%r10) + sbbq 224(%r11), %rcx + movq 232(%r10), %r8 + movq %rcx, 224(%r10) + sbbq 232(%r11), %r8 + movq 240(%r10), %rax + movq %r8, 232(%r10) + sbbq 240(%r11), %rax + movq 248(%r10), %rcx + movq %rax, 240(%r10) + sbbq 248(%r11), %rcx + movq %rcx, 248(%r10) + sbbq $0, %r9 + movq (%r10), %rax + subq (%rdi), %rax + movq 8(%r10), %rcx + movq %rax, (%r10) + sbbq 8(%rdi), %rcx + movq 16(%r10), %r8 + movq %rcx, 8(%r10) + sbbq 16(%rdi), %r8 + movq 24(%r10), %rax + movq %r8, 16(%r10) + sbbq 24(%rdi), %rax + movq 32(%r10), %rcx + movq %rax, 24(%r10) + sbbq 32(%rdi), %rcx + movq 40(%r10), %r8 + movq %rcx, 32(%r10) + sbbq 40(%rdi), %r8 + movq 48(%r10), %rax + movq %r8, 40(%r10) + sbbq 48(%rdi), %rax + movq 56(%r10), %rcx + movq %rax, 48(%r10) + sbbq 56(%rdi), %rcx + movq 64(%r10), %r8 + movq %rcx, 56(%r10) + sbbq 64(%rdi), %r8 + movq 72(%r10), %rax + movq %r8, 64(%r10) + sbbq 72(%rdi), %rax + movq 80(%r10), %rcx + movq %rax, 72(%r10) + sbbq 80(%rdi), %rcx + movq 88(%r10), %r8 + movq %rcx, 80(%r10) + sbbq 88(%rdi), %r8 + movq 96(%r10), %rax + movq %r8, 88(%r10) + sbbq 96(%rdi), %rax + movq 104(%r10), %rcx + movq %rax, 96(%r10) + sbbq 104(%rdi), %rcx + movq 112(%r10), %r8 + movq %rcx, 104(%r10) + sbbq 112(%rdi), %r8 + movq 120(%r10), %rax + movq %r8, 112(%r10) + sbbq 120(%rdi), %rax + movq 128(%r10), %rcx + movq %rax, 120(%r10) + sbbq 128(%rdi), %rcx + movq 136(%r10), %r8 + movq %rcx, 128(%r10) + sbbq 136(%rdi), %r8 + movq 144(%r10), %rax + movq %r8, 136(%r10) + sbbq 144(%rdi), %rax + movq 152(%r10), %rcx + movq %rax, 144(%r10) + sbbq 152(%rdi), %rcx + movq 160(%r10), %r8 + movq %rcx, 152(%r10) + sbbq 160(%rdi), %r8 + movq 168(%r10), %rax + movq %r8, 160(%r10) + sbbq 168(%rdi), %rax + movq 176(%r10), %rcx + movq %rax, 168(%r10) + sbbq 176(%rdi), %rcx + movq 184(%r10), %r8 + movq %rcx, 176(%r10) + sbbq 184(%rdi), %r8 + movq 192(%r10), %rax + movq %r8, 184(%r10) + sbbq 192(%rdi), %rax + movq 200(%r10), %rcx + movq %rax, 192(%r10) + sbbq 200(%rdi), %rcx + movq 208(%r10), %r8 + movq %rcx, 200(%r10) + sbbq 208(%rdi), %r8 + movq 216(%r10), %rax + movq %r8, 208(%r10) + sbbq 216(%rdi), %rax + movq 224(%r10), %rcx + movq %rax, 216(%r10) + sbbq 224(%rdi), %rcx + movq 232(%r10), %r8 + movq %rcx, 224(%r10) + sbbq 232(%rdi), %r8 + movq 240(%r10), %rax + movq %r8, 232(%r10) + sbbq 240(%rdi), %rax + movq 248(%r10), %rcx + movq %rax, 240(%r10) + sbbq 248(%rdi), %rcx + movq %rcx, 248(%r10) + sbbq $0, %r9 + subq $128, %r15 + # Add + movq (%r15), %rax + addq (%r10), %rax + movq 8(%r15), %rcx + movq %rax, (%r15) + adcq 8(%r10), %rcx + movq 16(%r15), %r8 + movq %rcx, 8(%r15) + adcq 16(%r10), %r8 + movq 24(%r15), %rax + movq %r8, 16(%r15) + adcq 24(%r10), %rax + movq 32(%r15), %rcx + movq %rax, 24(%r15) + adcq 32(%r10), %rcx + movq 40(%r15), %r8 + movq %rcx, 32(%r15) + adcq 40(%r10), %r8 + movq 48(%r15), %rax + movq %r8, 40(%r15) + adcq 48(%r10), %rax + movq 56(%r15), %rcx + movq %rax, 48(%r15) + adcq 56(%r10), %rcx + movq 64(%r15), %r8 + movq %rcx, 56(%r15) + adcq 64(%r10), %r8 + movq 72(%r15), %rax + movq %r8, 64(%r15) + adcq 72(%r10), %rax + movq 80(%r15), %rcx + movq %rax, 72(%r15) + adcq 80(%r10), %rcx + movq 88(%r15), %r8 + movq %rcx, 80(%r15) + adcq 88(%r10), %r8 + movq 96(%r15), %rax + movq %r8, 88(%r15) + adcq 96(%r10), %rax + movq 104(%r15), %rcx + movq %rax, 96(%r15) + adcq 104(%r10), %rcx + movq 112(%r15), %r8 + movq %rcx, 104(%r15) + adcq 112(%r10), %r8 + movq 120(%r15), %rax + movq %r8, 112(%r15) + adcq 120(%r10), %rax + movq 128(%r15), %rcx + movq %rax, 120(%r15) + adcq 128(%r10), %rcx + movq 136(%r15), %r8 + movq %rcx, 128(%r15) + adcq 136(%r10), %r8 + movq 144(%r15), %rax + movq %r8, 136(%r15) + adcq 144(%r10), %rax + movq 152(%r15), %rcx + movq %rax, 144(%r15) + adcq 152(%r10), %rcx + movq 160(%r15), %r8 + movq %rcx, 152(%r15) + adcq 160(%r10), %r8 + movq 168(%r15), %rax + movq %r8, 160(%r15) + adcq 168(%r10), %rax + movq 176(%r15), %rcx + movq %rax, 168(%r15) + adcq 176(%r10), %rcx + movq 184(%r15), %r8 + movq %rcx, 176(%r15) + adcq 184(%r10), %r8 + movq 192(%r15), %rax + movq %r8, 184(%r15) + adcq 192(%r10), %rax + movq 200(%r15), %rcx + movq %rax, 192(%r15) + adcq 200(%r10), %rcx + movq 208(%r15), %r8 + movq %rcx, 200(%r15) + adcq 208(%r10), %r8 + movq 216(%r15), %rax + movq %r8, 208(%r15) + adcq 216(%r10), %rax + movq 224(%r15), %rcx + movq %rax, 216(%r15) + adcq 224(%r10), %rcx + movq 232(%r15), %r8 + movq %rcx, 224(%r15) + adcq 232(%r10), %r8 + movq 240(%r15), %rax + movq %r8, 232(%r15) + adcq 240(%r10), %rax + movq 248(%r15), %rcx + movq %rax, 240(%r15) + adcq 248(%r10), %rcx + movq %rcx, 248(%r15) + adcq $0, %r9 + movq %r9, 384(%rdi) + addq $128, %r15 + # Add + movq (%r15), %rax + xorq %r9, %r9 + addq (%r11), %rax + movq 8(%r15), %rcx + movq %rax, (%r15) + adcq 8(%r11), %rcx + movq 16(%r15), %r8 + movq %rcx, 8(%r15) + adcq 16(%r11), %r8 + movq 24(%r15), %rax + movq %r8, 16(%r15) + adcq 24(%r11), %rax + movq 32(%r15), %rcx + movq %rax, 24(%r15) + adcq 32(%r11), %rcx + movq 40(%r15), %r8 + movq %rcx, 32(%r15) + adcq 40(%r11), %r8 + movq 48(%r15), %rax + movq %r8, 40(%r15) + adcq 48(%r11), %rax + movq 56(%r15), %rcx + movq %rax, 48(%r15) + adcq 56(%r11), %rcx + movq 64(%r15), %r8 + movq %rcx, 56(%r15) + adcq 64(%r11), %r8 + movq 72(%r15), %rax + movq %r8, 64(%r15) + adcq 72(%r11), %rax + movq 80(%r15), %rcx + movq %rax, 72(%r15) + adcq 80(%r11), %rcx + movq 88(%r15), %r8 + movq %rcx, 80(%r15) + adcq 88(%r11), %r8 + movq 96(%r15), %rax + movq %r8, 88(%r15) + adcq 96(%r11), %rax + movq 104(%r15), %rcx + movq %rax, 96(%r15) + adcq 104(%r11), %rcx + movq 112(%r15), %r8 + movq %rcx, 104(%r15) + adcq 112(%r11), %r8 + movq 120(%r15), %rax + movq %r8, 112(%r15) + adcq 120(%r11), %rax + movq 128(%r15), %rcx + movq %rax, 120(%r15) + adcq 128(%r11), %rcx + movq %rcx, 128(%r15) + adcq $0, %r9 + # Add to zero + movq 136(%r11), %rax + adcq $0, %rax + movq 144(%r11), %rcx + movq %rax, 136(%r15) + adcq $0, %rcx + movq 152(%r11), %r8 + movq %rcx, 144(%r15) + adcq $0, %r8 + movq 160(%r11), %rax + movq %r8, 152(%r15) + adcq $0, %rax + movq 168(%r11), %rcx + movq %rax, 160(%r15) + adcq $0, %rcx + movq 176(%r11), %r8 + movq %rcx, 168(%r15) + adcq $0, %r8 + movq 184(%r11), %rax + movq %r8, 176(%r15) + adcq $0, %rax + movq 192(%r11), %rcx + movq %rax, 184(%r15) + adcq $0, %rcx + movq 200(%r11), %r8 + movq %rcx, 192(%r15) + adcq $0, %r8 + movq 208(%r11), %rax + movq %r8, 200(%r15) + adcq $0, %rax + movq 216(%r11), %rcx + movq %rax, 208(%r15) + adcq $0, %rcx + movq 224(%r11), %r8 + movq %rcx, 216(%r15) + adcq $0, %r8 + movq 232(%r11), %rax + movq %r8, 224(%r15) + adcq $0, %rax + movq 240(%r11), %rcx + movq %rax, 232(%r15) + adcq $0, %rcx + movq 248(%r11), %r8 + movq %rcx, 240(%r15) + adcq $0, %r8 + movq %r8, 248(%r15) + addq $808, %rsp + pop %r15 + pop %r14 + pop %r13 + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_2048_mul_32,.-sp_2048_mul_32 +#endif /* __APPLE__ */ +/* Add a to a into r. (r = a + a) + * + * r A single precision integer. + * a A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_2048_dbl_16 +.type sp_2048_dbl_16,@function +.align 16 +sp_2048_dbl_16: +#else +.globl _sp_2048_dbl_16 +.p2align 4 +_sp_2048_dbl_16: +#endif /* __APPLE__ */ + movq (%rsi), %rdx + xorq %rax, %rax + addq %rdx, %rdx + movq 8(%rsi), %rcx + movq %rdx, (%rdi) + adcq %rcx, %rcx + movq 16(%rsi), %rdx + movq %rcx, 8(%rdi) + adcq %rdx, %rdx + movq 24(%rsi), %rcx + movq %rdx, 16(%rdi) + adcq %rcx, %rcx + movq 32(%rsi), %rdx + movq %rcx, 24(%rdi) + adcq %rdx, %rdx + movq 40(%rsi), %rcx + movq %rdx, 32(%rdi) + adcq %rcx, %rcx + movq 48(%rsi), %rdx + movq %rcx, 40(%rdi) + adcq %rdx, %rdx + movq 56(%rsi), %rcx + movq %rdx, 48(%rdi) + adcq %rcx, %rcx + movq 64(%rsi), %rdx + movq %rcx, 56(%rdi) + adcq %rdx, %rdx + movq 72(%rsi), %rcx + movq %rdx, 64(%rdi) + adcq %rcx, %rcx + movq 80(%rsi), %rdx + movq %rcx, 72(%rdi) + adcq %rdx, %rdx + movq 88(%rsi), %rcx + movq %rdx, 80(%rdi) + adcq %rcx, %rcx + movq 96(%rsi), %rdx + movq %rcx, 88(%rdi) + adcq %rdx, %rdx + movq 104(%rsi), %rcx + movq %rdx, 96(%rdi) + adcq %rcx, %rcx + movq 112(%rsi), %rdx + movq %rcx, 104(%rdi) + adcq %rdx, %rdx + movq 120(%rsi), %rcx + movq %rdx, 112(%rdi) + adcq %rcx, %rcx + movq %rcx, 120(%rdi) + adcq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_2048_dbl_16,.-sp_2048_dbl_16 +#endif /* __APPLE__ */ +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_2048_sqr_32 +.type sp_2048_sqr_32,@function +.align 16 +sp_2048_sqr_32: +#else +.globl _sp_2048_sqr_32 +.p2align 4 +_sp_2048_sqr_32: +#endif /* __APPLE__ */ + subq $664, %rsp + movq %rdi, 640(%rsp) + movq %rsi, 648(%rsp) + leaq 512(%rsp), %r8 + leaq 128(%rsi), %r9 + # Add + movq (%rsi), %rdx + xorq %rcx, %rcx + addq (%r9), %rdx + movq 8(%rsi), %rax + movq %rdx, (%r8) + adcq 8(%r9), %rax + movq 16(%rsi), %rdx + movq %rax, 8(%r8) + adcq 16(%r9), %rdx + movq 24(%rsi), %rax + movq %rdx, 16(%r8) + adcq 24(%r9), %rax + movq 32(%rsi), %rdx + movq %rax, 24(%r8) + adcq 32(%r9), %rdx + movq 40(%rsi), %rax + movq %rdx, 32(%r8) + adcq 40(%r9), %rax + movq 48(%rsi), %rdx + movq %rax, 40(%r8) + adcq 48(%r9), %rdx + movq 56(%rsi), %rax + movq %rdx, 48(%r8) + adcq 56(%r9), %rax + movq 64(%rsi), %rdx + movq %rax, 56(%r8) + adcq 64(%r9), %rdx + movq 72(%rsi), %rax + movq %rdx, 64(%r8) + adcq 72(%r9), %rax + movq 80(%rsi), %rdx + movq %rax, 72(%r8) + adcq 80(%r9), %rdx + movq 88(%rsi), %rax + movq %rdx, 80(%r8) + adcq 88(%r9), %rax + movq 96(%rsi), %rdx + movq %rax, 88(%r8) + adcq 96(%r9), %rdx + movq 104(%rsi), %rax + movq %rdx, 96(%r8) + adcq 104(%r9), %rax + movq 112(%rsi), %rdx + movq %rax, 104(%r8) + adcq 112(%r9), %rdx + movq 120(%rsi), %rax + movq %rdx, 112(%r8) + adcq 120(%r9), %rax + movq %rax, 120(%r8) + adcq $0, %rcx + movq %rcx, 656(%rsp) + movq %r8, %rsi + movq %rsp, %rdi +#ifndef __APPLE__ + callq sp_2048_sqr_16@plt +#else + callq _sp_2048_sqr_16 +#endif /* __APPLE__ */ + movq 648(%rsp), %rsi + leaq 256(%rsp), %rdi + addq $128, %rsi +#ifndef __APPLE__ + callq sp_2048_sqr_16@plt +#else + callq _sp_2048_sqr_16 +#endif /* __APPLE__ */ + movq 648(%rsp), %rsi + movq 640(%rsp), %rdi +#ifndef __APPLE__ + callq sp_2048_sqr_16@plt +#else + callq _sp_2048_sqr_16 +#endif /* __APPLE__ */ + movq 656(%rsp), %r10 + leaq 512(%rsp), %r8 + movq %r10, %rcx + negq %r10 + movq (%r8), %rdx + movq 8(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 256(%rdi) + movq %rax, 264(%rdi) + movq 16(%r8), %rdx + movq 24(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 272(%rdi) + movq %rax, 280(%rdi) + movq 32(%r8), %rdx + movq 40(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 288(%rdi) + movq %rax, 296(%rdi) + movq 48(%r8), %rdx + movq 56(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 304(%rdi) + movq %rax, 312(%rdi) + movq 64(%r8), %rdx + movq 72(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 320(%rdi) + movq %rax, 328(%rdi) + movq 80(%r8), %rdx + movq 88(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 336(%rdi) + movq %rax, 344(%rdi) + movq 96(%r8), %rdx + movq 104(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 352(%rdi) + movq %rax, 360(%rdi) + movq 112(%r8), %rdx + movq 120(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 368(%rdi) + movq %rax, 376(%rdi) + movq 256(%rdi), %rdx + addq %rdx, %rdx + movq 264(%rdi), %rax + movq %rdx, 256(%rdi) + adcq %rax, %rax + movq 272(%rdi), %rdx + movq %rax, 264(%rdi) + adcq %rdx, %rdx + movq 280(%rdi), %rax + movq %rdx, 272(%rdi) + adcq %rax, %rax + movq 288(%rdi), %rdx + movq %rax, 280(%rdi) + adcq %rdx, %rdx + movq 296(%rdi), %rax + movq %rdx, 288(%rdi) + adcq %rax, %rax + movq 304(%rdi), %rdx + movq %rax, 296(%rdi) + adcq %rdx, %rdx + movq 312(%rdi), %rax + movq %rdx, 304(%rdi) + adcq %rax, %rax + movq 320(%rdi), %rdx + movq %rax, 312(%rdi) + adcq %rdx, %rdx + movq 328(%rdi), %rax + movq %rdx, 320(%rdi) + adcq %rax, %rax + movq 336(%rdi), %rdx + movq %rax, 328(%rdi) + adcq %rdx, %rdx + movq 344(%rdi), %rax + movq %rdx, 336(%rdi) + adcq %rax, %rax + movq 352(%rdi), %rdx + movq %rax, 344(%rdi) + adcq %rdx, %rdx + movq 360(%rdi), %rax + movq %rdx, 352(%rdi) + adcq %rax, %rax + movq 368(%rdi), %rdx + movq %rax, 360(%rdi) + adcq %rdx, %rdx + movq 376(%rdi), %rax + movq %rdx, 368(%rdi) + adcq %rax, %rax + movq %rax, 376(%rdi) + adcq $0, %rcx + leaq 256(%rsp), %rsi + movq %rsp, %r8 + movq (%r8), %rdx + subq (%rsi), %rdx + movq 8(%r8), %rax + movq %rdx, (%r8) + sbbq 8(%rsi), %rax + movq 16(%r8), %rdx + movq %rax, 8(%r8) + sbbq 16(%rsi), %rdx + movq 24(%r8), %rax + movq %rdx, 16(%r8) + sbbq 24(%rsi), %rax + movq 32(%r8), %rdx + movq %rax, 24(%r8) + sbbq 32(%rsi), %rdx + movq 40(%r8), %rax + movq %rdx, 32(%r8) + sbbq 40(%rsi), %rax + movq 48(%r8), %rdx + movq %rax, 40(%r8) + sbbq 48(%rsi), %rdx + movq 56(%r8), %rax + movq %rdx, 48(%r8) + sbbq 56(%rsi), %rax + movq 64(%r8), %rdx + movq %rax, 56(%r8) + sbbq 64(%rsi), %rdx + movq 72(%r8), %rax + movq %rdx, 64(%r8) + sbbq 72(%rsi), %rax + movq 80(%r8), %rdx + movq %rax, 72(%r8) + sbbq 80(%rsi), %rdx + movq 88(%r8), %rax + movq %rdx, 80(%r8) + sbbq 88(%rsi), %rax + movq 96(%r8), %rdx + movq %rax, 88(%r8) + sbbq 96(%rsi), %rdx + movq 104(%r8), %rax + movq %rdx, 96(%r8) + sbbq 104(%rsi), %rax + movq 112(%r8), %rdx + movq %rax, 104(%r8) + sbbq 112(%rsi), %rdx + movq 120(%r8), %rax + movq %rdx, 112(%r8) + sbbq 120(%rsi), %rax + movq 128(%r8), %rdx + movq %rax, 120(%r8) + sbbq 128(%rsi), %rdx + movq 136(%r8), %rax + movq %rdx, 128(%r8) + sbbq 136(%rsi), %rax + movq 144(%r8), %rdx + movq %rax, 136(%r8) + sbbq 144(%rsi), %rdx + movq 152(%r8), %rax + movq %rdx, 144(%r8) + sbbq 152(%rsi), %rax + movq 160(%r8), %rdx + movq %rax, 152(%r8) + sbbq 160(%rsi), %rdx + movq 168(%r8), %rax + movq %rdx, 160(%r8) + sbbq 168(%rsi), %rax + movq 176(%r8), %rdx + movq %rax, 168(%r8) + sbbq 176(%rsi), %rdx + movq 184(%r8), %rax + movq %rdx, 176(%r8) + sbbq 184(%rsi), %rax + movq 192(%r8), %rdx + movq %rax, 184(%r8) + sbbq 192(%rsi), %rdx + movq 200(%r8), %rax + movq %rdx, 192(%r8) + sbbq 200(%rsi), %rax + movq 208(%r8), %rdx + movq %rax, 200(%r8) + sbbq 208(%rsi), %rdx + movq 216(%r8), %rax + movq %rdx, 208(%r8) + sbbq 216(%rsi), %rax + movq 224(%r8), %rdx + movq %rax, 216(%r8) + sbbq 224(%rsi), %rdx + movq 232(%r8), %rax + movq %rdx, 224(%r8) + sbbq 232(%rsi), %rax + movq 240(%r8), %rdx + movq %rax, 232(%r8) + sbbq 240(%rsi), %rdx + movq 248(%r8), %rax + movq %rdx, 240(%r8) + sbbq 248(%rsi), %rax + movq %rax, 248(%r8) + sbbq $0, %rcx + movq (%r8), %rdx + subq (%rdi), %rdx + movq 8(%r8), %rax + movq %rdx, (%r8) + sbbq 8(%rdi), %rax + movq 16(%r8), %rdx + movq %rax, 8(%r8) + sbbq 16(%rdi), %rdx + movq 24(%r8), %rax + movq %rdx, 16(%r8) + sbbq 24(%rdi), %rax + movq 32(%r8), %rdx + movq %rax, 24(%r8) + sbbq 32(%rdi), %rdx + movq 40(%r8), %rax + movq %rdx, 32(%r8) + sbbq 40(%rdi), %rax + movq 48(%r8), %rdx + movq %rax, 40(%r8) + sbbq 48(%rdi), %rdx + movq 56(%r8), %rax + movq %rdx, 48(%r8) + sbbq 56(%rdi), %rax + movq 64(%r8), %rdx + movq %rax, 56(%r8) + sbbq 64(%rdi), %rdx + movq 72(%r8), %rax + movq %rdx, 64(%r8) + sbbq 72(%rdi), %rax + movq 80(%r8), %rdx + movq %rax, 72(%r8) + sbbq 80(%rdi), %rdx + movq 88(%r8), %rax + movq %rdx, 80(%r8) + sbbq 88(%rdi), %rax + movq 96(%r8), %rdx + movq %rax, 88(%r8) + sbbq 96(%rdi), %rdx + movq 104(%r8), %rax + movq %rdx, 96(%r8) + sbbq 104(%rdi), %rax + movq 112(%r8), %rdx + movq %rax, 104(%r8) + sbbq 112(%rdi), %rdx + movq 120(%r8), %rax + movq %rdx, 112(%r8) + sbbq 120(%rdi), %rax + movq 128(%r8), %rdx + movq %rax, 120(%r8) + sbbq 128(%rdi), %rdx + movq 136(%r8), %rax + movq %rdx, 128(%r8) + sbbq 136(%rdi), %rax + movq 144(%r8), %rdx + movq %rax, 136(%r8) + sbbq 144(%rdi), %rdx + movq 152(%r8), %rax + movq %rdx, 144(%r8) + sbbq 152(%rdi), %rax + movq 160(%r8), %rdx + movq %rax, 152(%r8) + sbbq 160(%rdi), %rdx + movq 168(%r8), %rax + movq %rdx, 160(%r8) + sbbq 168(%rdi), %rax + movq 176(%r8), %rdx + movq %rax, 168(%r8) + sbbq 176(%rdi), %rdx + movq 184(%r8), %rax + movq %rdx, 176(%r8) + sbbq 184(%rdi), %rax + movq 192(%r8), %rdx + movq %rax, 184(%r8) + sbbq 192(%rdi), %rdx + movq 200(%r8), %rax + movq %rdx, 192(%r8) + sbbq 200(%rdi), %rax + movq 208(%r8), %rdx + movq %rax, 200(%r8) + sbbq 208(%rdi), %rdx + movq 216(%r8), %rax + movq %rdx, 208(%r8) + sbbq 216(%rdi), %rax + movq 224(%r8), %rdx + movq %rax, 216(%r8) + sbbq 224(%rdi), %rdx + movq 232(%r8), %rax + movq %rdx, 224(%r8) + sbbq 232(%rdi), %rax + movq 240(%r8), %rdx + movq %rax, 232(%r8) + sbbq 240(%rdi), %rdx + movq 248(%r8), %rax + movq %rdx, 240(%r8) + sbbq 248(%rdi), %rax + movq %rax, 248(%r8) + sbbq $0, %rcx + # Add in place + movq 128(%rdi), %rdx + addq (%r8), %rdx + movq 136(%rdi), %rax + movq %rdx, 128(%rdi) + adcq 8(%r8), %rax + movq 144(%rdi), %rdx + movq %rax, 136(%rdi) + adcq 16(%r8), %rdx + movq 152(%rdi), %rax + movq %rdx, 144(%rdi) + adcq 24(%r8), %rax + movq 160(%rdi), %rdx + movq %rax, 152(%rdi) + adcq 32(%r8), %rdx + movq 168(%rdi), %rax + movq %rdx, 160(%rdi) + adcq 40(%r8), %rax + movq 176(%rdi), %rdx + movq %rax, 168(%rdi) + adcq 48(%r8), %rdx + movq 184(%rdi), %rax + movq %rdx, 176(%rdi) + adcq 56(%r8), %rax + movq 192(%rdi), %rdx + movq %rax, 184(%rdi) + adcq 64(%r8), %rdx + movq 200(%rdi), %rax + movq %rdx, 192(%rdi) + adcq 72(%r8), %rax + movq 208(%rdi), %rdx + movq %rax, 200(%rdi) + adcq 80(%r8), %rdx + movq 216(%rdi), %rax + movq %rdx, 208(%rdi) + adcq 88(%r8), %rax + movq 224(%rdi), %rdx + movq %rax, 216(%rdi) + adcq 96(%r8), %rdx + movq 232(%rdi), %rax + movq %rdx, 224(%rdi) + adcq 104(%r8), %rax + movq 240(%rdi), %rdx + movq %rax, 232(%rdi) + adcq 112(%r8), %rdx + movq 248(%rdi), %rax + movq %rdx, 240(%rdi) + adcq 120(%r8), %rax + movq 256(%rdi), %rdx + movq %rax, 248(%rdi) + adcq 128(%r8), %rdx + movq 264(%rdi), %rax + movq %rdx, 256(%rdi) + adcq 136(%r8), %rax + movq 272(%rdi), %rdx + movq %rax, 264(%rdi) + adcq 144(%r8), %rdx + movq 280(%rdi), %rax + movq %rdx, 272(%rdi) + adcq 152(%r8), %rax + movq 288(%rdi), %rdx + movq %rax, 280(%rdi) + adcq 160(%r8), %rdx + movq 296(%rdi), %rax + movq %rdx, 288(%rdi) + adcq 168(%r8), %rax + movq 304(%rdi), %rdx + movq %rax, 296(%rdi) + adcq 176(%r8), %rdx + movq 312(%rdi), %rax + movq %rdx, 304(%rdi) + adcq 184(%r8), %rax + movq 320(%rdi), %rdx + movq %rax, 312(%rdi) + adcq 192(%r8), %rdx + movq 328(%rdi), %rax + movq %rdx, 320(%rdi) + adcq 200(%r8), %rax + movq 336(%rdi), %rdx + movq %rax, 328(%rdi) + adcq 208(%r8), %rdx + movq 344(%rdi), %rax + movq %rdx, 336(%rdi) + adcq 216(%r8), %rax + movq 352(%rdi), %rdx + movq %rax, 344(%rdi) + adcq 224(%r8), %rdx + movq 360(%rdi), %rax + movq %rdx, 352(%rdi) + adcq 232(%r8), %rax + movq 368(%rdi), %rdx + movq %rax, 360(%rdi) + adcq 240(%r8), %rdx + movq 376(%rdi), %rax + movq %rdx, 368(%rdi) + adcq 248(%r8), %rax + movq %rax, 376(%rdi) + adcq $0, %rcx + movq %rcx, 384(%rdi) + # Add in place + movq 256(%rdi), %rdx + xorq %rcx, %rcx + addq (%rsi), %rdx + movq 264(%rdi), %rax + movq %rdx, 256(%rdi) + adcq 8(%rsi), %rax + movq 272(%rdi), %rdx + movq %rax, 264(%rdi) + adcq 16(%rsi), %rdx + movq 280(%rdi), %rax + movq %rdx, 272(%rdi) + adcq 24(%rsi), %rax + movq 288(%rdi), %rdx + movq %rax, 280(%rdi) + adcq 32(%rsi), %rdx + movq 296(%rdi), %rax + movq %rdx, 288(%rdi) + adcq 40(%rsi), %rax + movq 304(%rdi), %rdx + movq %rax, 296(%rdi) + adcq 48(%rsi), %rdx + movq 312(%rdi), %rax + movq %rdx, 304(%rdi) + adcq 56(%rsi), %rax + movq 320(%rdi), %rdx + movq %rax, 312(%rdi) + adcq 64(%rsi), %rdx + movq 328(%rdi), %rax + movq %rdx, 320(%rdi) + adcq 72(%rsi), %rax + movq 336(%rdi), %rdx + movq %rax, 328(%rdi) + adcq 80(%rsi), %rdx + movq 344(%rdi), %rax + movq %rdx, 336(%rdi) + adcq 88(%rsi), %rax + movq 352(%rdi), %rdx + movq %rax, 344(%rdi) + adcq 96(%rsi), %rdx + movq 360(%rdi), %rax + movq %rdx, 352(%rdi) + adcq 104(%rsi), %rax + movq 368(%rdi), %rdx + movq %rax, 360(%rdi) + adcq 112(%rsi), %rdx + movq 376(%rdi), %rax + movq %rdx, 368(%rdi) + adcq 120(%rsi), %rax + movq 384(%rdi), %rdx + movq %rax, 376(%rdi) + adcq 128(%rsi), %rdx + movq %rdx, 384(%rdi) + adcq $0, %rcx + # Add to zero + movq 136(%rsi), %rdx + adcq $0, %rdx + movq 144(%rsi), %rax + movq %rdx, 392(%rdi) + adcq $0, %rax + movq 152(%rsi), %rdx + movq %rax, 400(%rdi) + adcq $0, %rdx + movq 160(%rsi), %rax + movq %rdx, 408(%rdi) + adcq $0, %rax + movq 168(%rsi), %rdx + movq %rax, 416(%rdi) + adcq $0, %rdx + movq 176(%rsi), %rax + movq %rdx, 424(%rdi) + adcq $0, %rax + movq 184(%rsi), %rdx + movq %rax, 432(%rdi) + adcq $0, %rdx + movq 192(%rsi), %rax + movq %rdx, 440(%rdi) + adcq $0, %rax + movq 200(%rsi), %rdx + movq %rax, 448(%rdi) + adcq $0, %rdx + movq 208(%rsi), %rax + movq %rdx, 456(%rdi) + adcq $0, %rax + movq 216(%rsi), %rdx + movq %rax, 464(%rdi) + adcq $0, %rdx + movq 224(%rsi), %rax + movq %rdx, 472(%rdi) + adcq $0, %rax + movq 232(%rsi), %rdx + movq %rax, 480(%rdi) + adcq $0, %rdx + movq 240(%rsi), %rax + movq %rdx, 488(%rdi) + adcq $0, %rax + movq 248(%rsi), %rdx + movq %rax, 496(%rdi) + adcq $0, %rdx + movq %rdx, 504(%rdi) + addq $664, %rsp + repz retq +#ifndef __APPLE__ +.size sp_2048_sqr_32,.-sp_2048_sqr_32 +#endif /* __APPLE__ */ +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_2048_mul_avx2_32 +.type sp_2048_mul_avx2_32,@function +.align 16 +sp_2048_mul_avx2_32: +#else +.globl _sp_2048_mul_avx2_32 +.p2align 4 +_sp_2048_mul_avx2_32: +#endif /* __APPLE__ */ + push %r12 + push %r13 + push %r14 + push %r15 + subq $808, %rsp + movq %rdi, 768(%rsp) + movq %rsi, 776(%rsp) + movq %rdx, 784(%rsp) + leaq 512(%rsp), %r10 + leaq 128(%rsi), %r12 + # Add + movq (%rsi), %rax + xorq %r13, %r13 + addq (%r12), %rax + movq 8(%rsi), %rcx + movq %rax, (%r10) + adcq 8(%r12), %rcx + movq 16(%rsi), %r8 + movq %rcx, 8(%r10) + adcq 16(%r12), %r8 + movq 24(%rsi), %rax + movq %r8, 16(%r10) + adcq 24(%r12), %rax + movq 32(%rsi), %rcx + movq %rax, 24(%r10) + adcq 32(%r12), %rcx + movq 40(%rsi), %r8 + movq %rcx, 32(%r10) + adcq 40(%r12), %r8 + movq 48(%rsi), %rax + movq %r8, 40(%r10) + adcq 48(%r12), %rax + movq 56(%rsi), %rcx + movq %rax, 48(%r10) + adcq 56(%r12), %rcx + movq 64(%rsi), %r8 + movq %rcx, 56(%r10) + adcq 64(%r12), %r8 + movq 72(%rsi), %rax + movq %r8, 64(%r10) + adcq 72(%r12), %rax + movq 80(%rsi), %rcx + movq %rax, 72(%r10) + adcq 80(%r12), %rcx + movq 88(%rsi), %r8 + movq %rcx, 80(%r10) + adcq 88(%r12), %r8 + movq 96(%rsi), %rax + movq %r8, 88(%r10) + adcq 96(%r12), %rax + movq 104(%rsi), %rcx + movq %rax, 96(%r10) + adcq 104(%r12), %rcx + movq 112(%rsi), %r8 + movq %rcx, 104(%r10) + adcq 112(%r12), %r8 + movq 120(%rsi), %rax + movq %r8, 112(%r10) + adcq 120(%r12), %rax + movq %rax, 120(%r10) + adcq $0, %r13 + movq %r13, 792(%rsp) + leaq 640(%rsp), %r11 + leaq 128(%rdx), %r12 + # Add + movq (%rdx), %rax + xorq %r14, %r14 + addq (%r12), %rax + movq 8(%rdx), %rcx + movq %rax, (%r11) + adcq 8(%r12), %rcx + movq 16(%rdx), %r8 + movq %rcx, 8(%r11) + adcq 16(%r12), %r8 + movq 24(%rdx), %rax + movq %r8, 16(%r11) + adcq 24(%r12), %rax + movq 32(%rdx), %rcx + movq %rax, 24(%r11) + adcq 32(%r12), %rcx + movq 40(%rdx), %r8 + movq %rcx, 32(%r11) + adcq 40(%r12), %r8 + movq 48(%rdx), %rax + movq %r8, 40(%r11) + adcq 48(%r12), %rax + movq 56(%rdx), %rcx + movq %rax, 48(%r11) + adcq 56(%r12), %rcx + movq 64(%rdx), %r8 + movq %rcx, 56(%r11) + adcq 64(%r12), %r8 + movq 72(%rdx), %rax + movq %r8, 64(%r11) + adcq 72(%r12), %rax + movq 80(%rdx), %rcx + movq %rax, 72(%r11) + adcq 80(%r12), %rcx + movq 88(%rdx), %r8 + movq %rcx, 80(%r11) + adcq 88(%r12), %r8 + movq 96(%rdx), %rax + movq %r8, 88(%r11) + adcq 96(%r12), %rax + movq 104(%rdx), %rcx + movq %rax, 96(%r11) + adcq 104(%r12), %rcx + movq 112(%rdx), %r8 + movq %rcx, 104(%r11) + adcq 112(%r12), %r8 + movq 120(%rdx), %rax + movq %r8, 112(%r11) + adcq 120(%r12), %rax + movq %rax, 120(%r11) + adcq $0, %r14 + movq %r14, 800(%rsp) + movq %r11, %rdx + movq %r10, %rsi + movq %rsp, %rdi +#ifndef __APPLE__ + callq sp_2048_mul_avx2_16@plt +#else + callq _sp_2048_mul_avx2_16 +#endif /* __APPLE__ */ + movq 784(%rsp), %rdx + movq 776(%rsp), %rsi + leaq 256(%rsp), %rdi + addq $128, %rdx + addq $128, %rsi +#ifndef __APPLE__ + callq sp_2048_mul_avx2_16@plt +#else + callq _sp_2048_mul_avx2_16 +#endif /* __APPLE__ */ + movq 784(%rsp), %rdx + movq 776(%rsp), %rsi + movq 768(%rsp), %rdi +#ifndef __APPLE__ + callq sp_2048_mul_avx2_16@plt +#else + callq _sp_2048_mul_avx2_16 +#endif /* __APPLE__ */ + movq 792(%rsp), %r13 + movq 800(%rsp), %r14 + movq 768(%rsp), %r15 + movq %r13, %r9 + leaq 512(%rsp), %r10 + leaq 640(%rsp), %r11 + andq %r14, %r9 + negq %r13 + negq %r14 + addq $256, %r15 + movq (%r10), %rax + movq (%r11), %rcx + pextq %r14, %rax, %rax + pextq %r13, %rcx, %rcx + addq %rcx, %rax + movq 8(%r10), %rcx + movq 8(%r11), %r8 + pextq %r14, %rcx, %rcx + pextq %r13, %r8, %r8 + movq %rax, (%r15) + adcq %r8, %rcx + movq 16(%r10), %r8 + movq 16(%r11), %rax + pextq %r14, %r8, %r8 + pextq %r13, %rax, %rax + movq %rcx, 8(%r15) + adcq %rax, %r8 + movq 24(%r10), %rax + movq 24(%r11), %rcx + pextq %r14, %rax, %rax + pextq %r13, %rcx, %rcx + movq %r8, 16(%r15) + adcq %rcx, %rax + movq 32(%r10), %rcx + movq 32(%r11), %r8 + pextq %r14, %rcx, %rcx + pextq %r13, %r8, %r8 + movq %rax, 24(%r15) + adcq %r8, %rcx + movq 40(%r10), %r8 + movq 40(%r11), %rax + pextq %r14, %r8, %r8 + pextq %r13, %rax, %rax + movq %rcx, 32(%r15) + adcq %rax, %r8 + movq 48(%r10), %rax + movq 48(%r11), %rcx + pextq %r14, %rax, %rax + pextq %r13, %rcx, %rcx + movq %r8, 40(%r15) + adcq %rcx, %rax + movq 56(%r10), %rcx + movq 56(%r11), %r8 + pextq %r14, %rcx, %rcx + pextq %r13, %r8, %r8 + movq %rax, 48(%r15) + adcq %r8, %rcx + movq 64(%r10), %r8 + movq 64(%r11), %rax + pextq %r14, %r8, %r8 + pextq %r13, %rax, %rax + movq %rcx, 56(%r15) + adcq %rax, %r8 + movq 72(%r10), %rax + movq 72(%r11), %rcx + pextq %r14, %rax, %rax + pextq %r13, %rcx, %rcx + movq %r8, 64(%r15) + adcq %rcx, %rax + movq 80(%r10), %rcx + movq 80(%r11), %r8 + pextq %r14, %rcx, %rcx + pextq %r13, %r8, %r8 + movq %rax, 72(%r15) + adcq %r8, %rcx + movq 88(%r10), %r8 + movq 88(%r11), %rax + pextq %r14, %r8, %r8 + pextq %r13, %rax, %rax + movq %rcx, 80(%r15) + adcq %rax, %r8 + movq 96(%r10), %rax + movq 96(%r11), %rcx + pextq %r14, %rax, %rax + pextq %r13, %rcx, %rcx + movq %r8, 88(%r15) + adcq %rcx, %rax + movq 104(%r10), %rcx + movq 104(%r11), %r8 + pextq %r14, %rcx, %rcx + pextq %r13, %r8, %r8 + movq %rax, 96(%r15) + adcq %r8, %rcx + movq 112(%r10), %r8 + movq 112(%r11), %rax + pextq %r14, %r8, %r8 + pextq %r13, %rax, %rax + movq %rcx, 104(%r15) + adcq %rax, %r8 + movq 120(%r10), %rax + movq 120(%r11), %rcx + pextq %r14, %rax, %rax + pextq %r13, %rcx, %rcx + movq %r8, 112(%r15) + adcq %rcx, %rax + movq %rax, 120(%r15) + adcq $0, %r9 + leaq 256(%rsp), %r11 + movq %rsp, %r10 + movq (%r10), %rax + subq (%r11), %rax + movq 8(%r10), %rcx + movq %rax, (%r10) + sbbq 8(%r11), %rcx + movq 16(%r10), %r8 + movq %rcx, 8(%r10) + sbbq 16(%r11), %r8 + movq 24(%r10), %rax + movq %r8, 16(%r10) + sbbq 24(%r11), %rax + movq 32(%r10), %rcx + movq %rax, 24(%r10) + sbbq 32(%r11), %rcx + movq 40(%r10), %r8 + movq %rcx, 32(%r10) + sbbq 40(%r11), %r8 + movq 48(%r10), %rax + movq %r8, 40(%r10) + sbbq 48(%r11), %rax + movq 56(%r10), %rcx + movq %rax, 48(%r10) + sbbq 56(%r11), %rcx + movq 64(%r10), %r8 + movq %rcx, 56(%r10) + sbbq 64(%r11), %r8 + movq 72(%r10), %rax + movq %r8, 64(%r10) + sbbq 72(%r11), %rax + movq 80(%r10), %rcx + movq %rax, 72(%r10) + sbbq 80(%r11), %rcx + movq 88(%r10), %r8 + movq %rcx, 80(%r10) + sbbq 88(%r11), %r8 + movq 96(%r10), %rax + movq %r8, 88(%r10) + sbbq 96(%r11), %rax + movq 104(%r10), %rcx + movq %rax, 96(%r10) + sbbq 104(%r11), %rcx + movq 112(%r10), %r8 + movq %rcx, 104(%r10) + sbbq 112(%r11), %r8 + movq 120(%r10), %rax + movq %r8, 112(%r10) + sbbq 120(%r11), %rax + movq 128(%r10), %rcx + movq %rax, 120(%r10) + sbbq 128(%r11), %rcx + movq 136(%r10), %r8 + movq %rcx, 128(%r10) + sbbq 136(%r11), %r8 + movq 144(%r10), %rax + movq %r8, 136(%r10) + sbbq 144(%r11), %rax + movq 152(%r10), %rcx + movq %rax, 144(%r10) + sbbq 152(%r11), %rcx + movq 160(%r10), %r8 + movq %rcx, 152(%r10) + sbbq 160(%r11), %r8 + movq 168(%r10), %rax + movq %r8, 160(%r10) + sbbq 168(%r11), %rax + movq 176(%r10), %rcx + movq %rax, 168(%r10) + sbbq 176(%r11), %rcx + movq 184(%r10), %r8 + movq %rcx, 176(%r10) + sbbq 184(%r11), %r8 + movq 192(%r10), %rax + movq %r8, 184(%r10) + sbbq 192(%r11), %rax + movq 200(%r10), %rcx + movq %rax, 192(%r10) + sbbq 200(%r11), %rcx + movq 208(%r10), %r8 + movq %rcx, 200(%r10) + sbbq 208(%r11), %r8 + movq 216(%r10), %rax + movq %r8, 208(%r10) + sbbq 216(%r11), %rax + movq 224(%r10), %rcx + movq %rax, 216(%r10) + sbbq 224(%r11), %rcx + movq 232(%r10), %r8 + movq %rcx, 224(%r10) + sbbq 232(%r11), %r8 + movq 240(%r10), %rax + movq %r8, 232(%r10) + sbbq 240(%r11), %rax + movq 248(%r10), %rcx + movq %rax, 240(%r10) + sbbq 248(%r11), %rcx + movq %rcx, 248(%r10) + sbbq $0, %r9 + movq (%r10), %rax + subq (%rdi), %rax + movq 8(%r10), %rcx + movq %rax, (%r10) + sbbq 8(%rdi), %rcx + movq 16(%r10), %r8 + movq %rcx, 8(%r10) + sbbq 16(%rdi), %r8 + movq 24(%r10), %rax + movq %r8, 16(%r10) + sbbq 24(%rdi), %rax + movq 32(%r10), %rcx + movq %rax, 24(%r10) + sbbq 32(%rdi), %rcx + movq 40(%r10), %r8 + movq %rcx, 32(%r10) + sbbq 40(%rdi), %r8 + movq 48(%r10), %rax + movq %r8, 40(%r10) + sbbq 48(%rdi), %rax + movq 56(%r10), %rcx + movq %rax, 48(%r10) + sbbq 56(%rdi), %rcx + movq 64(%r10), %r8 + movq %rcx, 56(%r10) + sbbq 64(%rdi), %r8 + movq 72(%r10), %rax + movq %r8, 64(%r10) + sbbq 72(%rdi), %rax + movq 80(%r10), %rcx + movq %rax, 72(%r10) + sbbq 80(%rdi), %rcx + movq 88(%r10), %r8 + movq %rcx, 80(%r10) + sbbq 88(%rdi), %r8 + movq 96(%r10), %rax + movq %r8, 88(%r10) + sbbq 96(%rdi), %rax + movq 104(%r10), %rcx + movq %rax, 96(%r10) + sbbq 104(%rdi), %rcx + movq 112(%r10), %r8 + movq %rcx, 104(%r10) + sbbq 112(%rdi), %r8 + movq 120(%r10), %rax + movq %r8, 112(%r10) + sbbq 120(%rdi), %rax + movq 128(%r10), %rcx + movq %rax, 120(%r10) + sbbq 128(%rdi), %rcx + movq 136(%r10), %r8 + movq %rcx, 128(%r10) + sbbq 136(%rdi), %r8 + movq 144(%r10), %rax + movq %r8, 136(%r10) + sbbq 144(%rdi), %rax + movq 152(%r10), %rcx + movq %rax, 144(%r10) + sbbq 152(%rdi), %rcx + movq 160(%r10), %r8 + movq %rcx, 152(%r10) + sbbq 160(%rdi), %r8 + movq 168(%r10), %rax + movq %r8, 160(%r10) + sbbq 168(%rdi), %rax + movq 176(%r10), %rcx + movq %rax, 168(%r10) + sbbq 176(%rdi), %rcx + movq 184(%r10), %r8 + movq %rcx, 176(%r10) + sbbq 184(%rdi), %r8 + movq 192(%r10), %rax + movq %r8, 184(%r10) + sbbq 192(%rdi), %rax + movq 200(%r10), %rcx + movq %rax, 192(%r10) + sbbq 200(%rdi), %rcx + movq 208(%r10), %r8 + movq %rcx, 200(%r10) + sbbq 208(%rdi), %r8 + movq 216(%r10), %rax + movq %r8, 208(%r10) + sbbq 216(%rdi), %rax + movq 224(%r10), %rcx + movq %rax, 216(%r10) + sbbq 224(%rdi), %rcx + movq 232(%r10), %r8 + movq %rcx, 224(%r10) + sbbq 232(%rdi), %r8 + movq 240(%r10), %rax + movq %r8, 232(%r10) + sbbq 240(%rdi), %rax + movq 248(%r10), %rcx + movq %rax, 240(%r10) + sbbq 248(%rdi), %rcx + movq %rcx, 248(%r10) + sbbq $0, %r9 + subq $128, %r15 + # Add + movq (%r15), %rax + addq (%r10), %rax + movq 8(%r15), %rcx + movq %rax, (%r15) + adcq 8(%r10), %rcx + movq 16(%r15), %r8 + movq %rcx, 8(%r15) + adcq 16(%r10), %r8 + movq 24(%r15), %rax + movq %r8, 16(%r15) + adcq 24(%r10), %rax + movq 32(%r15), %rcx + movq %rax, 24(%r15) + adcq 32(%r10), %rcx + movq 40(%r15), %r8 + movq %rcx, 32(%r15) + adcq 40(%r10), %r8 + movq 48(%r15), %rax + movq %r8, 40(%r15) + adcq 48(%r10), %rax + movq 56(%r15), %rcx + movq %rax, 48(%r15) + adcq 56(%r10), %rcx + movq 64(%r15), %r8 + movq %rcx, 56(%r15) + adcq 64(%r10), %r8 + movq 72(%r15), %rax + movq %r8, 64(%r15) + adcq 72(%r10), %rax + movq 80(%r15), %rcx + movq %rax, 72(%r15) + adcq 80(%r10), %rcx + movq 88(%r15), %r8 + movq %rcx, 80(%r15) + adcq 88(%r10), %r8 + movq 96(%r15), %rax + movq %r8, 88(%r15) + adcq 96(%r10), %rax + movq 104(%r15), %rcx + movq %rax, 96(%r15) + adcq 104(%r10), %rcx + movq 112(%r15), %r8 + movq %rcx, 104(%r15) + adcq 112(%r10), %r8 + movq 120(%r15), %rax + movq %r8, 112(%r15) + adcq 120(%r10), %rax + movq 128(%r15), %rcx + movq %rax, 120(%r15) + adcq 128(%r10), %rcx + movq 136(%r15), %r8 + movq %rcx, 128(%r15) + adcq 136(%r10), %r8 + movq 144(%r15), %rax + movq %r8, 136(%r15) + adcq 144(%r10), %rax + movq 152(%r15), %rcx + movq %rax, 144(%r15) + adcq 152(%r10), %rcx + movq 160(%r15), %r8 + movq %rcx, 152(%r15) + adcq 160(%r10), %r8 + movq 168(%r15), %rax + movq %r8, 160(%r15) + adcq 168(%r10), %rax + movq 176(%r15), %rcx + movq %rax, 168(%r15) + adcq 176(%r10), %rcx + movq 184(%r15), %r8 + movq %rcx, 176(%r15) + adcq 184(%r10), %r8 + movq 192(%r15), %rax + movq %r8, 184(%r15) + adcq 192(%r10), %rax + movq 200(%r15), %rcx + movq %rax, 192(%r15) + adcq 200(%r10), %rcx + movq 208(%r15), %r8 + movq %rcx, 200(%r15) + adcq 208(%r10), %r8 + movq 216(%r15), %rax + movq %r8, 208(%r15) + adcq 216(%r10), %rax + movq 224(%r15), %rcx + movq %rax, 216(%r15) + adcq 224(%r10), %rcx + movq 232(%r15), %r8 + movq %rcx, 224(%r15) + adcq 232(%r10), %r8 + movq 240(%r15), %rax + movq %r8, 232(%r15) + adcq 240(%r10), %rax + movq 248(%r15), %rcx + movq %rax, 240(%r15) + adcq 248(%r10), %rcx + movq %rcx, 248(%r15) + adcq $0, %r9 + movq %r9, 384(%rdi) + addq $128, %r15 + # Add + movq (%r15), %rax + xorq %r9, %r9 + addq (%r11), %rax + movq 8(%r15), %rcx + movq %rax, (%r15) + adcq 8(%r11), %rcx + movq 16(%r15), %r8 + movq %rcx, 8(%r15) + adcq 16(%r11), %r8 + movq 24(%r15), %rax + movq %r8, 16(%r15) + adcq 24(%r11), %rax + movq 32(%r15), %rcx + movq %rax, 24(%r15) + adcq 32(%r11), %rcx + movq 40(%r15), %r8 + movq %rcx, 32(%r15) + adcq 40(%r11), %r8 + movq 48(%r15), %rax + movq %r8, 40(%r15) + adcq 48(%r11), %rax + movq 56(%r15), %rcx + movq %rax, 48(%r15) + adcq 56(%r11), %rcx + movq 64(%r15), %r8 + movq %rcx, 56(%r15) + adcq 64(%r11), %r8 + movq 72(%r15), %rax + movq %r8, 64(%r15) + adcq 72(%r11), %rax + movq 80(%r15), %rcx + movq %rax, 72(%r15) + adcq 80(%r11), %rcx + movq 88(%r15), %r8 + movq %rcx, 80(%r15) + adcq 88(%r11), %r8 + movq 96(%r15), %rax + movq %r8, 88(%r15) + adcq 96(%r11), %rax + movq 104(%r15), %rcx + movq %rax, 96(%r15) + adcq 104(%r11), %rcx + movq 112(%r15), %r8 + movq %rcx, 104(%r15) + adcq 112(%r11), %r8 + movq 120(%r15), %rax + movq %r8, 112(%r15) + adcq 120(%r11), %rax + movq 128(%r15), %rcx + movq %rax, 120(%r15) + adcq 128(%r11), %rcx + movq %rcx, 128(%r15) + adcq $0, %r9 + # Add to zero + movq 136(%r11), %rax + adcq $0, %rax + movq 144(%r11), %rcx + movq %rax, 136(%r15) + adcq $0, %rcx + movq 152(%r11), %r8 + movq %rcx, 144(%r15) + adcq $0, %r8 + movq 160(%r11), %rax + movq %r8, 152(%r15) + adcq $0, %rax + movq 168(%r11), %rcx + movq %rax, 160(%r15) + adcq $0, %rcx + movq 176(%r11), %r8 + movq %rcx, 168(%r15) + adcq $0, %r8 + movq 184(%r11), %rax + movq %r8, 176(%r15) + adcq $0, %rax + movq 192(%r11), %rcx + movq %rax, 184(%r15) + adcq $0, %rcx + movq 200(%r11), %r8 + movq %rcx, 192(%r15) + adcq $0, %r8 + movq 208(%r11), %rax + movq %r8, 200(%r15) + adcq $0, %rax + movq 216(%r11), %rcx + movq %rax, 208(%r15) + adcq $0, %rcx + movq 224(%r11), %r8 + movq %rcx, 216(%r15) + adcq $0, %r8 + movq 232(%r11), %rax + movq %r8, 224(%r15) + adcq $0, %rax + movq 240(%r11), %rcx + movq %rax, 232(%r15) + adcq $0, %rcx + movq 248(%r11), %r8 + movq %rcx, 240(%r15) + adcq $0, %r8 + movq %r8, 248(%r15) + addq $808, %rsp + pop %r15 + pop %r14 + pop %r13 + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_2048_mul_avx2_32,.-sp_2048_mul_avx2_32 +#endif /* __APPLE__ */ +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_2048_sqr_avx2_32 +.type sp_2048_sqr_avx2_32,@function +.align 16 +sp_2048_sqr_avx2_32: +#else +.globl _sp_2048_sqr_avx2_32 +.p2align 4 +_sp_2048_sqr_avx2_32: +#endif /* __APPLE__ */ + subq $664, %rsp + movq %rdi, 640(%rsp) + movq %rsi, 648(%rsp) + leaq 512(%rsp), %r8 + leaq 128(%rsi), %r9 + # Add + movq (%rsi), %rdx + xorq %rcx, %rcx + addq (%r9), %rdx + movq 8(%rsi), %rax + movq %rdx, (%r8) + adcq 8(%r9), %rax + movq 16(%rsi), %rdx + movq %rax, 8(%r8) + adcq 16(%r9), %rdx + movq 24(%rsi), %rax + movq %rdx, 16(%r8) + adcq 24(%r9), %rax + movq 32(%rsi), %rdx + movq %rax, 24(%r8) + adcq 32(%r9), %rdx + movq 40(%rsi), %rax + movq %rdx, 32(%r8) + adcq 40(%r9), %rax + movq 48(%rsi), %rdx + movq %rax, 40(%r8) + adcq 48(%r9), %rdx + movq 56(%rsi), %rax + movq %rdx, 48(%r8) + adcq 56(%r9), %rax + movq 64(%rsi), %rdx + movq %rax, 56(%r8) + adcq 64(%r9), %rdx + movq 72(%rsi), %rax + movq %rdx, 64(%r8) + adcq 72(%r9), %rax + movq 80(%rsi), %rdx + movq %rax, 72(%r8) + adcq 80(%r9), %rdx + movq 88(%rsi), %rax + movq %rdx, 80(%r8) + adcq 88(%r9), %rax + movq 96(%rsi), %rdx + movq %rax, 88(%r8) + adcq 96(%r9), %rdx + movq 104(%rsi), %rax + movq %rdx, 96(%r8) + adcq 104(%r9), %rax + movq 112(%rsi), %rdx + movq %rax, 104(%r8) + adcq 112(%r9), %rdx + movq 120(%rsi), %rax + movq %rdx, 112(%r8) + adcq 120(%r9), %rax + movq %rax, 120(%r8) + adcq $0, %rcx + movq %rcx, 656(%rsp) + movq %r8, %rsi + movq %rsp, %rdi +#ifndef __APPLE__ + callq sp_2048_sqr_avx2_16@plt +#else + callq _sp_2048_sqr_avx2_16 +#endif /* __APPLE__ */ + movq 648(%rsp), %rsi + leaq 256(%rsp), %rdi + addq $128, %rsi +#ifndef __APPLE__ + callq sp_2048_sqr_avx2_16@plt +#else + callq _sp_2048_sqr_avx2_16 +#endif /* __APPLE__ */ + movq 648(%rsp), %rsi + movq 640(%rsp), %rdi +#ifndef __APPLE__ + callq sp_2048_sqr_avx2_16@plt +#else + callq _sp_2048_sqr_avx2_16 +#endif /* __APPLE__ */ + movq 656(%rsp), %r10 + leaq 512(%rsp), %r8 + movq %r10, %rcx + negq %r10 + movq (%r8), %rdx + pextq %r10, %rdx, %rdx + addq %rdx, %rdx + movq 8(%r8), %rax + movq %rdx, 256(%rdi) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 16(%r8), %rdx + movq %rax, 264(%rdi) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 24(%r8), %rax + movq %rdx, 272(%rdi) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 32(%r8), %rdx + movq %rax, 280(%rdi) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 40(%r8), %rax + movq %rdx, 288(%rdi) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 48(%r8), %rdx + movq %rax, 296(%rdi) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 56(%r8), %rax + movq %rdx, 304(%rdi) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 64(%r8), %rdx + movq %rax, 312(%rdi) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 72(%r8), %rax + movq %rdx, 320(%rdi) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 80(%r8), %rdx + movq %rax, 328(%rdi) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 88(%r8), %rax + movq %rdx, 336(%rdi) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 96(%r8), %rdx + movq %rax, 344(%rdi) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 104(%r8), %rax + movq %rdx, 352(%rdi) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 112(%r8), %rdx + movq %rax, 360(%rdi) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 120(%r8), %rax + movq %rdx, 368(%rdi) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq %rax, 376(%rdi) + adcq $0, %rcx + leaq 256(%rsp), %rsi + movq %rsp, %r8 + movq (%r8), %rdx + subq (%rsi), %rdx + movq 8(%r8), %rax + movq %rdx, (%r8) + sbbq 8(%rsi), %rax + movq 16(%r8), %rdx + movq %rax, 8(%r8) + sbbq 16(%rsi), %rdx + movq 24(%r8), %rax + movq %rdx, 16(%r8) + sbbq 24(%rsi), %rax + movq 32(%r8), %rdx + movq %rax, 24(%r8) + sbbq 32(%rsi), %rdx + movq 40(%r8), %rax + movq %rdx, 32(%r8) + sbbq 40(%rsi), %rax + movq 48(%r8), %rdx + movq %rax, 40(%r8) + sbbq 48(%rsi), %rdx + movq 56(%r8), %rax + movq %rdx, 48(%r8) + sbbq 56(%rsi), %rax + movq 64(%r8), %rdx + movq %rax, 56(%r8) + sbbq 64(%rsi), %rdx + movq 72(%r8), %rax + movq %rdx, 64(%r8) + sbbq 72(%rsi), %rax + movq 80(%r8), %rdx + movq %rax, 72(%r8) + sbbq 80(%rsi), %rdx + movq 88(%r8), %rax + movq %rdx, 80(%r8) + sbbq 88(%rsi), %rax + movq 96(%r8), %rdx + movq %rax, 88(%r8) + sbbq 96(%rsi), %rdx + movq 104(%r8), %rax + movq %rdx, 96(%r8) + sbbq 104(%rsi), %rax + movq 112(%r8), %rdx + movq %rax, 104(%r8) + sbbq 112(%rsi), %rdx + movq 120(%r8), %rax + movq %rdx, 112(%r8) + sbbq 120(%rsi), %rax + movq 128(%r8), %rdx + movq %rax, 120(%r8) + sbbq 128(%rsi), %rdx + movq 136(%r8), %rax + movq %rdx, 128(%r8) + sbbq 136(%rsi), %rax + movq 144(%r8), %rdx + movq %rax, 136(%r8) + sbbq 144(%rsi), %rdx + movq 152(%r8), %rax + movq %rdx, 144(%r8) + sbbq 152(%rsi), %rax + movq 160(%r8), %rdx + movq %rax, 152(%r8) + sbbq 160(%rsi), %rdx + movq 168(%r8), %rax + movq %rdx, 160(%r8) + sbbq 168(%rsi), %rax + movq 176(%r8), %rdx + movq %rax, 168(%r8) + sbbq 176(%rsi), %rdx + movq 184(%r8), %rax + movq %rdx, 176(%r8) + sbbq 184(%rsi), %rax + movq 192(%r8), %rdx + movq %rax, 184(%r8) + sbbq 192(%rsi), %rdx + movq 200(%r8), %rax + movq %rdx, 192(%r8) + sbbq 200(%rsi), %rax + movq 208(%r8), %rdx + movq %rax, 200(%r8) + sbbq 208(%rsi), %rdx + movq 216(%r8), %rax + movq %rdx, 208(%r8) + sbbq 216(%rsi), %rax + movq 224(%r8), %rdx + movq %rax, 216(%r8) + sbbq 224(%rsi), %rdx + movq 232(%r8), %rax + movq %rdx, 224(%r8) + sbbq 232(%rsi), %rax + movq 240(%r8), %rdx + movq %rax, 232(%r8) + sbbq 240(%rsi), %rdx + movq 248(%r8), %rax + movq %rdx, 240(%r8) + sbbq 248(%rsi), %rax + movq %rax, 248(%r8) + sbbq $0, %rcx + movq (%r8), %rdx + subq (%rdi), %rdx + movq 8(%r8), %rax + movq %rdx, (%r8) + sbbq 8(%rdi), %rax + movq 16(%r8), %rdx + movq %rax, 8(%r8) + sbbq 16(%rdi), %rdx + movq 24(%r8), %rax + movq %rdx, 16(%r8) + sbbq 24(%rdi), %rax + movq 32(%r8), %rdx + movq %rax, 24(%r8) + sbbq 32(%rdi), %rdx + movq 40(%r8), %rax + movq %rdx, 32(%r8) + sbbq 40(%rdi), %rax + movq 48(%r8), %rdx + movq %rax, 40(%r8) + sbbq 48(%rdi), %rdx + movq 56(%r8), %rax + movq %rdx, 48(%r8) + sbbq 56(%rdi), %rax + movq 64(%r8), %rdx + movq %rax, 56(%r8) + sbbq 64(%rdi), %rdx + movq 72(%r8), %rax + movq %rdx, 64(%r8) + sbbq 72(%rdi), %rax + movq 80(%r8), %rdx + movq %rax, 72(%r8) + sbbq 80(%rdi), %rdx + movq 88(%r8), %rax + movq %rdx, 80(%r8) + sbbq 88(%rdi), %rax + movq 96(%r8), %rdx + movq %rax, 88(%r8) + sbbq 96(%rdi), %rdx + movq 104(%r8), %rax + movq %rdx, 96(%r8) + sbbq 104(%rdi), %rax + movq 112(%r8), %rdx + movq %rax, 104(%r8) + sbbq 112(%rdi), %rdx + movq 120(%r8), %rax + movq %rdx, 112(%r8) + sbbq 120(%rdi), %rax + movq 128(%r8), %rdx + movq %rax, 120(%r8) + sbbq 128(%rdi), %rdx + movq 136(%r8), %rax + movq %rdx, 128(%r8) + sbbq 136(%rdi), %rax + movq 144(%r8), %rdx + movq %rax, 136(%r8) + sbbq 144(%rdi), %rdx + movq 152(%r8), %rax + movq %rdx, 144(%r8) + sbbq 152(%rdi), %rax + movq 160(%r8), %rdx + movq %rax, 152(%r8) + sbbq 160(%rdi), %rdx + movq 168(%r8), %rax + movq %rdx, 160(%r8) + sbbq 168(%rdi), %rax + movq 176(%r8), %rdx + movq %rax, 168(%r8) + sbbq 176(%rdi), %rdx + movq 184(%r8), %rax + movq %rdx, 176(%r8) + sbbq 184(%rdi), %rax + movq 192(%r8), %rdx + movq %rax, 184(%r8) + sbbq 192(%rdi), %rdx + movq 200(%r8), %rax + movq %rdx, 192(%r8) + sbbq 200(%rdi), %rax + movq 208(%r8), %rdx + movq %rax, 200(%r8) + sbbq 208(%rdi), %rdx + movq 216(%r8), %rax + movq %rdx, 208(%r8) + sbbq 216(%rdi), %rax + movq 224(%r8), %rdx + movq %rax, 216(%r8) + sbbq 224(%rdi), %rdx + movq 232(%r8), %rax + movq %rdx, 224(%r8) + sbbq 232(%rdi), %rax + movq 240(%r8), %rdx + movq %rax, 232(%r8) + sbbq 240(%rdi), %rdx + movq 248(%r8), %rax + movq %rdx, 240(%r8) + sbbq 248(%rdi), %rax + movq %rax, 248(%r8) + sbbq $0, %rcx + # Add in place + movq 128(%rdi), %rdx + addq (%r8), %rdx + movq 136(%rdi), %rax + movq %rdx, 128(%rdi) + adcq 8(%r8), %rax + movq 144(%rdi), %rdx + movq %rax, 136(%rdi) + adcq 16(%r8), %rdx + movq 152(%rdi), %rax + movq %rdx, 144(%rdi) + adcq 24(%r8), %rax + movq 160(%rdi), %rdx + movq %rax, 152(%rdi) + adcq 32(%r8), %rdx + movq 168(%rdi), %rax + movq %rdx, 160(%rdi) + adcq 40(%r8), %rax + movq 176(%rdi), %rdx + movq %rax, 168(%rdi) + adcq 48(%r8), %rdx + movq 184(%rdi), %rax + movq %rdx, 176(%rdi) + adcq 56(%r8), %rax + movq 192(%rdi), %rdx + movq %rax, 184(%rdi) + adcq 64(%r8), %rdx + movq 200(%rdi), %rax + movq %rdx, 192(%rdi) + adcq 72(%r8), %rax + movq 208(%rdi), %rdx + movq %rax, 200(%rdi) + adcq 80(%r8), %rdx + movq 216(%rdi), %rax + movq %rdx, 208(%rdi) + adcq 88(%r8), %rax + movq 224(%rdi), %rdx + movq %rax, 216(%rdi) + adcq 96(%r8), %rdx + movq 232(%rdi), %rax + movq %rdx, 224(%rdi) + adcq 104(%r8), %rax + movq 240(%rdi), %rdx + movq %rax, 232(%rdi) + adcq 112(%r8), %rdx + movq 248(%rdi), %rax + movq %rdx, 240(%rdi) + adcq 120(%r8), %rax + movq 256(%rdi), %rdx + movq %rax, 248(%rdi) + adcq 128(%r8), %rdx + movq 264(%rdi), %rax + movq %rdx, 256(%rdi) + adcq 136(%r8), %rax + movq 272(%rdi), %rdx + movq %rax, 264(%rdi) + adcq 144(%r8), %rdx + movq 280(%rdi), %rax + movq %rdx, 272(%rdi) + adcq 152(%r8), %rax + movq 288(%rdi), %rdx + movq %rax, 280(%rdi) + adcq 160(%r8), %rdx + movq 296(%rdi), %rax + movq %rdx, 288(%rdi) + adcq 168(%r8), %rax + movq 304(%rdi), %rdx + movq %rax, 296(%rdi) + adcq 176(%r8), %rdx + movq 312(%rdi), %rax + movq %rdx, 304(%rdi) + adcq 184(%r8), %rax + movq 320(%rdi), %rdx + movq %rax, 312(%rdi) + adcq 192(%r8), %rdx + movq 328(%rdi), %rax + movq %rdx, 320(%rdi) + adcq 200(%r8), %rax + movq 336(%rdi), %rdx + movq %rax, 328(%rdi) + adcq 208(%r8), %rdx + movq 344(%rdi), %rax + movq %rdx, 336(%rdi) + adcq 216(%r8), %rax + movq 352(%rdi), %rdx + movq %rax, 344(%rdi) + adcq 224(%r8), %rdx + movq 360(%rdi), %rax + movq %rdx, 352(%rdi) + adcq 232(%r8), %rax + movq 368(%rdi), %rdx + movq %rax, 360(%rdi) + adcq 240(%r8), %rdx + movq 376(%rdi), %rax + movq %rdx, 368(%rdi) + adcq 248(%r8), %rax + movq %rax, 376(%rdi) + adcq $0, %rcx + movq %rcx, 384(%rdi) + # Add in place + movq 256(%rdi), %rdx + xorq %rcx, %rcx + addq (%rsi), %rdx + movq 264(%rdi), %rax + movq %rdx, 256(%rdi) + adcq 8(%rsi), %rax + movq 272(%rdi), %rdx + movq %rax, 264(%rdi) + adcq 16(%rsi), %rdx + movq 280(%rdi), %rax + movq %rdx, 272(%rdi) + adcq 24(%rsi), %rax + movq 288(%rdi), %rdx + movq %rax, 280(%rdi) + adcq 32(%rsi), %rdx + movq 296(%rdi), %rax + movq %rdx, 288(%rdi) + adcq 40(%rsi), %rax + movq 304(%rdi), %rdx + movq %rax, 296(%rdi) + adcq 48(%rsi), %rdx + movq 312(%rdi), %rax + movq %rdx, 304(%rdi) + adcq 56(%rsi), %rax + movq 320(%rdi), %rdx + movq %rax, 312(%rdi) + adcq 64(%rsi), %rdx + movq 328(%rdi), %rax + movq %rdx, 320(%rdi) + adcq 72(%rsi), %rax + movq 336(%rdi), %rdx + movq %rax, 328(%rdi) + adcq 80(%rsi), %rdx + movq 344(%rdi), %rax + movq %rdx, 336(%rdi) + adcq 88(%rsi), %rax + movq 352(%rdi), %rdx + movq %rax, 344(%rdi) + adcq 96(%rsi), %rdx + movq 360(%rdi), %rax + movq %rdx, 352(%rdi) + adcq 104(%rsi), %rax + movq 368(%rdi), %rdx + movq %rax, 360(%rdi) + adcq 112(%rsi), %rdx + movq 376(%rdi), %rax + movq %rdx, 368(%rdi) + adcq 120(%rsi), %rax + movq 384(%rdi), %rdx + movq %rax, 376(%rdi) + adcq 128(%rsi), %rdx + movq %rdx, 384(%rdi) + adcq $0, %rcx + # Add to zero + movq 136(%rsi), %rdx + adcq $0, %rdx + movq 144(%rsi), %rax + movq %rdx, 392(%rdi) + adcq $0, %rax + movq 152(%rsi), %rdx + movq %rax, 400(%rdi) + adcq $0, %rdx + movq 160(%rsi), %rax + movq %rdx, 408(%rdi) + adcq $0, %rax + movq 168(%rsi), %rdx + movq %rax, 416(%rdi) + adcq $0, %rdx + movq 176(%rsi), %rax + movq %rdx, 424(%rdi) + adcq $0, %rax + movq 184(%rsi), %rdx + movq %rax, 432(%rdi) + adcq $0, %rdx + movq 192(%rsi), %rax + movq %rdx, 440(%rdi) + adcq $0, %rax + movq 200(%rsi), %rdx + movq %rax, 448(%rdi) + adcq $0, %rdx + movq 208(%rsi), %rax + movq %rdx, 456(%rdi) + adcq $0, %rax + movq 216(%rsi), %rdx + movq %rax, 464(%rdi) + adcq $0, %rdx + movq 224(%rsi), %rax + movq %rdx, 472(%rdi) + adcq $0, %rax + movq 232(%rsi), %rdx + movq %rax, 480(%rdi) + adcq $0, %rdx + movq 240(%rsi), %rax + movq %rdx, 488(%rdi) + adcq $0, %rax + movq 248(%rsi), %rdx + movq %rax, 496(%rdi) + adcq $0, %rdx + movq %rdx, 504(%rdi) + addq $664, %rsp + repz retq +#ifndef __APPLE__ +.size sp_2048_sqr_avx2_32,.-sp_2048_sqr_avx2_32 +#endif /* __APPLE__ */ +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +#ifndef __APPLE__ +.globl sp_2048_mul_d_32 +.type sp_2048_mul_d_32,@function +.align 16 +sp_2048_mul_d_32: +#else +.globl _sp_2048_mul_d_32 +.p2align 4 +_sp_2048_mul_d_32: +#endif /* __APPLE__ */ + movq %rdx, %rcx + # A[0] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq (%rsi) + movq %rax, %r8 + movq %rdx, %r9 + movq %r8, (%rdi) + # A[1] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 8(%rsi) + addq %rax, %r9 + movq %r9, 8(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[2] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 16(%rsi) + addq %rax, %r10 + movq %r10, 16(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[3] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 24(%rsi) + addq %rax, %r8 + movq %r8, 24(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[4] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 32(%rsi) + addq %rax, %r9 + movq %r9, 32(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[5] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 40(%rsi) + addq %rax, %r10 + movq %r10, 40(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[6] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 48(%rsi) + addq %rax, %r8 + movq %r8, 48(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[7] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 56(%rsi) + addq %rax, %r9 + movq %r9, 56(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[8] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 64(%rsi) + addq %rax, %r10 + movq %r10, 64(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[9] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 72(%rsi) + addq %rax, %r8 + movq %r8, 72(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[10] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 80(%rsi) + addq %rax, %r9 + movq %r9, 80(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[11] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 88(%rsi) + addq %rax, %r10 + movq %r10, 88(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[12] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 96(%rsi) + addq %rax, %r8 + movq %r8, 96(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[13] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 104(%rsi) + addq %rax, %r9 + movq %r9, 104(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[14] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 112(%rsi) + addq %rax, %r10 + movq %r10, 112(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[15] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 120(%rsi) + addq %rax, %r8 + movq %r8, 120(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[16] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 128(%rsi) + addq %rax, %r9 + movq %r9, 128(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[17] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 136(%rsi) + addq %rax, %r10 + movq %r10, 136(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[18] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 144(%rsi) + addq %rax, %r8 + movq %r8, 144(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[19] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 152(%rsi) + addq %rax, %r9 + movq %r9, 152(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[20] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 160(%rsi) + addq %rax, %r10 + movq %r10, 160(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[21] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 168(%rsi) + addq %rax, %r8 + movq %r8, 168(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[22] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 176(%rsi) + addq %rax, %r9 + movq %r9, 176(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[23] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 184(%rsi) + addq %rax, %r10 + movq %r10, 184(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[24] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 192(%rsi) + addq %rax, %r8 + movq %r8, 192(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[25] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 200(%rsi) + addq %rax, %r9 + movq %r9, 200(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[26] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 208(%rsi) + addq %rax, %r10 + movq %r10, 208(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[27] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 216(%rsi) + addq %rax, %r8 + movq %r8, 216(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[28] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 224(%rsi) + addq %rax, %r9 + movq %r9, 224(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[29] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 232(%rsi) + addq %rax, %r10 + movq %r10, 232(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[30] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 240(%rsi) + addq %rax, %r8 + movq %r8, 240(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[31] * B + movq %rcx, %rax + mulq 248(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + movq %r9, 248(%rdi) + movq %r10, 256(%rdi) + repz retq +#ifndef __APPLE__ +.size sp_2048_mul_d_32,.-sp_2048_mul_d_32 +#endif /* __APPLE__ */ +/* Sub b from a into a. (a -= b) + * + * a A single precision integer and result. + * b A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_2048_sub_in_place_16 +.type sp_2048_sub_in_place_16,@function +.align 16 +sp_2048_sub_in_place_16: +#else +.globl _sp_2048_sub_in_place_16 +.p2align 4 +_sp_2048_sub_in_place_16: +#endif /* __APPLE__ */ + movq (%rdi), %rdx + xorq %rax, %rax + subq (%rsi), %rdx + movq 8(%rdi), %rcx + movq %rdx, (%rdi) + sbbq 8(%rsi), %rcx + movq 16(%rdi), %rdx + movq %rcx, 8(%rdi) + sbbq 16(%rsi), %rdx + movq 24(%rdi), %rcx + movq %rdx, 16(%rdi) + sbbq 24(%rsi), %rcx + movq 32(%rdi), %rdx + movq %rcx, 24(%rdi) + sbbq 32(%rsi), %rdx + movq 40(%rdi), %rcx + movq %rdx, 32(%rdi) + sbbq 40(%rsi), %rcx + movq 48(%rdi), %rdx + movq %rcx, 40(%rdi) + sbbq 48(%rsi), %rdx + movq 56(%rdi), %rcx + movq %rdx, 48(%rdi) + sbbq 56(%rsi), %rcx + movq 64(%rdi), %rdx + movq %rcx, 56(%rdi) + sbbq 64(%rsi), %rdx + movq 72(%rdi), %rcx + movq %rdx, 64(%rdi) + sbbq 72(%rsi), %rcx + movq 80(%rdi), %rdx + movq %rcx, 72(%rdi) + sbbq 80(%rsi), %rdx + movq 88(%rdi), %rcx + movq %rdx, 80(%rdi) + sbbq 88(%rsi), %rcx + movq 96(%rdi), %rdx + movq %rcx, 88(%rdi) + sbbq 96(%rsi), %rdx + movq 104(%rdi), %rcx + movq %rdx, 96(%rdi) + sbbq 104(%rsi), %rcx + movq 112(%rdi), %rdx + movq %rcx, 104(%rdi) + sbbq 112(%rsi), %rdx + movq 120(%rdi), %rcx + movq %rdx, 112(%rdi) + sbbq 120(%rsi), %rcx + movq %rcx, 120(%rdi) + sbbq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_2048_sub_in_place_16,.-sp_2048_sub_in_place_16 +#endif /* __APPLE__ */ +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +#ifndef __APPLE__ +.globl sp_2048_cond_sub_16 +.type sp_2048_cond_sub_16,@function +.align 16 +sp_2048_cond_sub_16: +#else +.globl _sp_2048_cond_sub_16 +.p2align 4 +_sp_2048_cond_sub_16: +#endif /* __APPLE__ */ + subq $128, %rsp + movq $0, %rax + movq (%rdx), %r8 + movq 8(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, (%rsp) + movq %r9, 8(%rsp) + movq 16(%rdx), %r8 + movq 24(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 16(%rsp) + movq %r9, 24(%rsp) + movq 32(%rdx), %r8 + movq 40(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 32(%rsp) + movq %r9, 40(%rsp) + movq 48(%rdx), %r8 + movq 56(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 48(%rsp) + movq %r9, 56(%rsp) + movq 64(%rdx), %r8 + movq 72(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 64(%rsp) + movq %r9, 72(%rsp) + movq 80(%rdx), %r8 + movq 88(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 80(%rsp) + movq %r9, 88(%rsp) + movq 96(%rdx), %r8 + movq 104(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 96(%rsp) + movq %r9, 104(%rsp) + movq 112(%rdx), %r8 + movq 120(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 112(%rsp) + movq %r9, 120(%rsp) + movq (%rsi), %r8 + movq (%rsp), %rdx + subq %rdx, %r8 + movq 8(%rsi), %r9 + movq 8(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, (%rdi) + movq 16(%rsi), %r8 + movq 16(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 8(%rdi) + movq 24(%rsi), %r9 + movq 24(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 16(%rdi) + movq 32(%rsi), %r8 + movq 32(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 24(%rdi) + movq 40(%rsi), %r9 + movq 40(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 32(%rdi) + movq 48(%rsi), %r8 + movq 48(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 40(%rdi) + movq 56(%rsi), %r9 + movq 56(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 48(%rdi) + movq 64(%rsi), %r8 + movq 64(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 56(%rdi) + movq 72(%rsi), %r9 + movq 72(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 64(%rdi) + movq 80(%rsi), %r8 + movq 80(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 72(%rdi) + movq 88(%rsi), %r9 + movq 88(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 80(%rdi) + movq 96(%rsi), %r8 + movq 96(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 88(%rdi) + movq 104(%rsi), %r9 + movq 104(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 96(%rdi) + movq 112(%rsi), %r8 + movq 112(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 104(%rdi) + movq 120(%rsi), %r9 + movq 120(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 112(%rdi) + movq %r9, 120(%rdi) + sbbq $0, %rax + addq $128, %rsp + repz retq +#ifndef __APPLE__ +.size sp_2048_cond_sub_16,.-sp_2048_cond_sub_16 +#endif /* __APPLE__ */ +/* Reduce the number back to 2048 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +#ifndef __APPLE__ +.globl sp_2048_mont_reduce_16 +.type sp_2048_mont_reduce_16,@function +.align 16 +sp_2048_mont_reduce_16: +#else +.globl _sp_2048_mont_reduce_16 +.p2align 4 +_sp_2048_mont_reduce_16: +#endif /* __APPLE__ */ + push %r12 + push %r13 + push %r14 + push %r15 + movq %rdx, %rcx + xorq %r15, %r15 + # i = 16 + movq $16, %r8 + movq (%rdi), %r13 + movq 8(%rdi), %r14 +L_mont_loop_16: + # mu = a[i] * mp + movq %r13, %r11 + imulq %rcx, %r11 + # a[i+0] += m[0] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq (%rsi) + addq %rax, %r13 + adcq %rdx, %r10 + # a[i+1] += m[1] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 8(%rsi) + movq %r14, %r13 + addq %rax, %r13 + adcq %rdx, %r9 + addq %r10, %r13 + adcq $0, %r9 + # a[i+2] += m[2] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 16(%rsi) + movq 16(%rdi), %r14 + addq %rax, %r14 + adcq %rdx, %r10 + addq %r9, %r14 + adcq $0, %r10 + # a[i+3] += m[3] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 24(%rsi) + movq 24(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 24(%rdi) + adcq $0, %r9 + # a[i+4] += m[4] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 32(%rsi) + movq 32(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 32(%rdi) + adcq $0, %r10 + # a[i+5] += m[5] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 40(%rsi) + movq 40(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 40(%rdi) + adcq $0, %r9 + # a[i+6] += m[6] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 48(%rsi) + movq 48(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 48(%rdi) + adcq $0, %r10 + # a[i+7] += m[7] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 56(%rsi) + movq 56(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 56(%rdi) + adcq $0, %r9 + # a[i+8] += m[8] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 64(%rsi) + movq 64(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 64(%rdi) + adcq $0, %r10 + # a[i+9] += m[9] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 72(%rsi) + movq 72(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 72(%rdi) + adcq $0, %r9 + # a[i+10] += m[10] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 80(%rsi) + movq 80(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 80(%rdi) + adcq $0, %r10 + # a[i+11] += m[11] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 88(%rsi) + movq 88(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 88(%rdi) + adcq $0, %r9 + # a[i+12] += m[12] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 96(%rsi) + movq 96(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 96(%rdi) + adcq $0, %r10 + # a[i+13] += m[13] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 104(%rsi) + movq 104(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 104(%rdi) + adcq $0, %r9 + # a[i+14] += m[14] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 112(%rsi) + movq 112(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 112(%rdi) + adcq $0, %r10 + # a[i+15] += m[15] * mu + movq %r11, %rax + mulq 120(%rsi) + movq 120(%rdi), %r12 + addq %rax, %r10 + adcq %r15, %rdx + movq $0, %r15 + adcq $0, %r15 + addq %r10, %r12 + movq %r12, 120(%rdi) + adcq %rdx, 128(%rdi) + adcq $0, %r15 + # i -= 1 + addq $8, %rdi + decq %r8 + jnz L_mont_loop_16 + movq %r13, (%rdi) + movq %r14, 8(%rdi) + negq %r15 + movq %r15, %rcx + movq %rsi, %rdx + movq %rdi, %rsi + subq $128, %rdi +#ifndef __APPLE__ + callq sp_2048_cond_sub_16@plt +#else + callq _sp_2048_cond_sub_16 +#endif /* __APPLE__ */ + pop %r15 + pop %r14 + pop %r13 + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_2048_mont_reduce_16,.-sp_2048_mont_reduce_16 +#endif /* __APPLE__ */ +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +#ifndef __APPLE__ +.globl sp_2048_cond_sub_avx2_16 +.type sp_2048_cond_sub_avx2_16,@function +.align 16 +sp_2048_cond_sub_avx2_16: +#else +.globl _sp_2048_cond_sub_avx2_16 +.p2align 4 +_sp_2048_cond_sub_avx2_16: +#endif /* __APPLE__ */ + movq $0, %rax + movq (%rdx), %r10 + movq (%rsi), %r8 + pextq %rcx, %r10, %r10 + subq %r10, %r8 + movq 8(%rdx), %r10 + movq 8(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, (%rdi) + sbbq %r10, %r9 + movq 16(%rdx), %r8 + movq 16(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 8(%rdi) + sbbq %r8, %r10 + movq 24(%rdx), %r9 + movq 24(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 16(%rdi) + sbbq %r9, %r8 + movq 32(%rdx), %r10 + movq 32(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 24(%rdi) + sbbq %r10, %r9 + movq 40(%rdx), %r8 + movq 40(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 32(%rdi) + sbbq %r8, %r10 + movq 48(%rdx), %r9 + movq 48(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 40(%rdi) + sbbq %r9, %r8 + movq 56(%rdx), %r10 + movq 56(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 48(%rdi) + sbbq %r10, %r9 + movq 64(%rdx), %r8 + movq 64(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 56(%rdi) + sbbq %r8, %r10 + movq 72(%rdx), %r9 + movq 72(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 64(%rdi) + sbbq %r9, %r8 + movq 80(%rdx), %r10 + movq 80(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 72(%rdi) + sbbq %r10, %r9 + movq 88(%rdx), %r8 + movq 88(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 80(%rdi) + sbbq %r8, %r10 + movq 96(%rdx), %r9 + movq 96(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 88(%rdi) + sbbq %r9, %r8 + movq 104(%rdx), %r10 + movq 104(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 96(%rdi) + sbbq %r10, %r9 + movq 112(%rdx), %r8 + movq 112(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 104(%rdi) + sbbq %r8, %r10 + movq 120(%rdx), %r9 + movq 120(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 112(%rdi) + sbbq %r9, %r8 + movq %r8, 120(%rdi) + sbbq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_2048_cond_sub_avx2_16,.-sp_2048_cond_sub_avx2_16 +#endif /* __APPLE__ */ +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +#ifndef __APPLE__ +.globl sp_2048_mul_d_16 +.type sp_2048_mul_d_16,@function +.align 16 +sp_2048_mul_d_16: +#else +.globl _sp_2048_mul_d_16 +.p2align 4 +_sp_2048_mul_d_16: +#endif /* __APPLE__ */ + movq %rdx, %rcx + # A[0] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq (%rsi) + movq %rax, %r8 + movq %rdx, %r9 + movq %r8, (%rdi) + # A[1] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 8(%rsi) + addq %rax, %r9 + movq %r9, 8(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[2] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 16(%rsi) + addq %rax, %r10 + movq %r10, 16(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[3] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 24(%rsi) + addq %rax, %r8 + movq %r8, 24(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[4] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 32(%rsi) + addq %rax, %r9 + movq %r9, 32(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[5] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 40(%rsi) + addq %rax, %r10 + movq %r10, 40(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[6] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 48(%rsi) + addq %rax, %r8 + movq %r8, 48(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[7] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 56(%rsi) + addq %rax, %r9 + movq %r9, 56(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[8] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 64(%rsi) + addq %rax, %r10 + movq %r10, 64(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[9] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 72(%rsi) + addq %rax, %r8 + movq %r8, 72(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[10] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 80(%rsi) + addq %rax, %r9 + movq %r9, 80(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[11] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 88(%rsi) + addq %rax, %r10 + movq %r10, 88(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[12] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 96(%rsi) + addq %rax, %r8 + movq %r8, 96(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[13] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 104(%rsi) + addq %rax, %r9 + movq %r9, 104(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[14] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 112(%rsi) + addq %rax, %r10 + movq %r10, 112(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[15] * B + movq %rcx, %rax + mulq 120(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + movq %r8, 120(%rdi) + movq %r9, 128(%rdi) + repz retq +#ifndef __APPLE__ +.size sp_2048_mul_d_16,.-sp_2048_mul_d_16 +#endif /* __APPLE__ */ +#ifdef HAVE_INTEL_AVX2 +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +#ifndef __APPLE__ +.globl sp_2048_mul_d_avx2_16 +.type sp_2048_mul_d_avx2_16,@function +.align 16 +sp_2048_mul_d_avx2_16: +#else +.globl _sp_2048_mul_d_avx2_16 +.p2align 4 +_sp_2048_mul_d_avx2_16: +#endif /* __APPLE__ */ + movq %rdx, %rax + # A[0] * B + movq %rax, %rdx + xorq %r11, %r11 + mulxq (%rsi), %r9, %r10 + movq %r9, (%rdi) + # A[1] * B + mulxq 8(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 8(%rdi) + adoxq %r8, %r9 + # A[2] * B + mulxq 16(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 16(%rdi) + adoxq %r8, %r10 + # A[3] * B + mulxq 24(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 24(%rdi) + adoxq %r8, %r9 + # A[4] * B + mulxq 32(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 32(%rdi) + adoxq %r8, %r10 + # A[5] * B + mulxq 40(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 40(%rdi) + adoxq %r8, %r9 + # A[6] * B + mulxq 48(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 48(%rdi) + adoxq %r8, %r10 + # A[7] * B + mulxq 56(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 56(%rdi) + adoxq %r8, %r9 + # A[8] * B + mulxq 64(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 64(%rdi) + adoxq %r8, %r10 + # A[9] * B + mulxq 72(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 72(%rdi) + adoxq %r8, %r9 + # A[10] * B + mulxq 80(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 80(%rdi) + adoxq %r8, %r10 + # A[11] * B + mulxq 88(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 88(%rdi) + adoxq %r8, %r9 + # A[12] * B + mulxq 96(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 96(%rdi) + adoxq %r8, %r10 + # A[13] * B + mulxq 104(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 104(%rdi) + adoxq %r8, %r9 + # A[14] * B + mulxq 112(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 112(%rdi) + adoxq %r8, %r10 + # A[15] * B + mulxq 120(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + adoxq %r8, %r9 + adcxq %r11, %r9 + movq %r10, 120(%rdi) + movq %r9, 128(%rdi) + repz retq +#ifndef __APPLE__ +.size sp_2048_mul_d_avx2_16,.-sp_2048_mul_d_avx2_16 +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX2 */ +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +#ifndef __APPLE__ +.globl sp_2048_cmp_16 +.type sp_2048_cmp_16,@function +.align 16 +sp_2048_cmp_16: +#else +.globl _sp_2048_cmp_16 +.p2align 4 +_sp_2048_cmp_16: +#endif /* __APPLE__ */ + xorq %rcx, %rcx + movq $-1, %rdx + movq $-1, %rax + movq $1, %r8 + movq 120(%rdi), %r9 + movq 120(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 112(%rdi), %r9 + movq 112(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 104(%rdi), %r9 + movq 104(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 96(%rdi), %r9 + movq 96(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 88(%rdi), %r9 + movq 88(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 80(%rdi), %r9 + movq 80(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 72(%rdi), %r9 + movq 72(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 64(%rdi), %r9 + movq 64(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 56(%rdi), %r9 + movq 56(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 48(%rdi), %r9 + movq 48(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 40(%rdi), %r9 + movq 40(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 32(%rdi), %r9 + movq 32(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 24(%rdi), %r9 + movq 24(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 16(%rdi), %r9 + movq 16(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 8(%rdi), %r9 + movq 8(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq (%rdi), %r9 + movq (%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + xorq %rdx, %rax + repz retq +#ifndef __APPLE__ +.size sp_2048_cmp_16,.-sp_2048_cmp_16 +#endif /* __APPLE__ */ +#ifdef HAVE_INTEL_AVX2 +/* Reduce the number back to 2048 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +#ifndef __APPLE__ +.globl sp_2048_mont_reduce_avx2_16 +.type sp_2048_mont_reduce_avx2_16,@function +.align 16 +sp_2048_mont_reduce_avx2_16: +#else +.globl _sp_2048_mont_reduce_avx2_16 +.p2align 4 +_sp_2048_mont_reduce_avx2_16: +#endif /* __APPLE__ */ + push %r12 + push %r13 + push %r14 + movq %rdx, %r8 + xorq %r14, %r14 + # i = 16 + movq $16, %r9 + movq (%rdi), %r13 + addq $64, %rdi + xorq %r12, %r12 +L_mont_loop_avx2_16: + # mu = a[i] * mp + movq %r13, %rdx + movq %r13, %r10 + imulq %r8, %rdx + xorq %r12, %r12 + # a[i+0] += m[0] * mu + mulxq (%rsi), %rax, %rcx + movq -56(%rdi), %r13 + adcxq %rax, %r10 + adoxq %rcx, %r13 + # a[i+1] += m[1] * mu + mulxq 8(%rsi), %rax, %rcx + movq -48(%rdi), %r10 + adcxq %rax, %r13 + adoxq %rcx, %r10 + # a[i+2] += m[2] * mu + mulxq 16(%rsi), %rax, %rcx + movq -40(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -48(%rdi) + # a[i+3] += m[3] * mu + mulxq 24(%rsi), %rax, %rcx + movq -32(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -40(%rdi) + # a[i+4] += m[4] * mu + mulxq 32(%rsi), %rax, %rcx + movq -24(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -32(%rdi) + # a[i+5] += m[5] * mu + mulxq 40(%rsi), %rax, %rcx + movq -16(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -24(%rdi) + # a[i+6] += m[6] * mu + mulxq 48(%rsi), %rax, %rcx + movq -8(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -16(%rdi) + # a[i+7] += m[7] * mu + mulxq 56(%rsi), %rax, %rcx + movq (%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -8(%rdi) + # a[i+8] += m[8] * mu + mulxq 64(%rsi), %rax, %rcx + movq 8(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, (%rdi) + # a[i+9] += m[9] * mu + mulxq 72(%rsi), %rax, %rcx + movq 16(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 8(%rdi) + # a[i+10] += m[10] * mu + mulxq 80(%rsi), %rax, %rcx + movq 24(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 16(%rdi) + # a[i+11] += m[11] * mu + mulxq 88(%rsi), %rax, %rcx + movq 32(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 24(%rdi) + # a[i+12] += m[12] * mu + mulxq 96(%rsi), %rax, %rcx + movq 40(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 32(%rdi) + # a[i+13] += m[13] * mu + mulxq 104(%rsi), %rax, %rcx + movq 48(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 40(%rdi) + # a[i+14] += m[14] * mu + mulxq 112(%rsi), %rax, %rcx + movq 56(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 48(%rdi) + # a[i+15] += m[15] * mu + mulxq 120(%rsi), %rax, %rcx + movq 64(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 56(%rdi) + adcxq %r14, %r10 + movq %r10, 64(%rdi) + movq %r12, %r14 + adoxq %r12, %r14 + adcxq %r12, %r14 + # mu = a[i] * mp + movq %r13, %rdx + movq %r13, %r10 + imulq %r8, %rdx + xorq %r12, %r12 + # a[i+0] += m[0] * mu + mulxq (%rsi), %rax, %rcx + movq -48(%rdi), %r13 + adcxq %rax, %r10 + adoxq %rcx, %r13 + # a[i+1] += m[1] * mu + mulxq 8(%rsi), %rax, %rcx + movq -40(%rdi), %r10 + adcxq %rax, %r13 + adoxq %rcx, %r10 + # a[i+2] += m[2] * mu + mulxq 16(%rsi), %rax, %rcx + movq -32(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -40(%rdi) + # a[i+3] += m[3] * mu + mulxq 24(%rsi), %rax, %rcx + movq -24(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -32(%rdi) + # a[i+4] += m[4] * mu + mulxq 32(%rsi), %rax, %rcx + movq -16(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -24(%rdi) + # a[i+5] += m[5] * mu + mulxq 40(%rsi), %rax, %rcx + movq -8(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -16(%rdi) + # a[i+6] += m[6] * mu + mulxq 48(%rsi), %rax, %rcx + movq (%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -8(%rdi) + # a[i+7] += m[7] * mu + mulxq 56(%rsi), %rax, %rcx + movq 8(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, (%rdi) + # a[i+8] += m[8] * mu + mulxq 64(%rsi), %rax, %rcx + movq 16(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 8(%rdi) + # a[i+9] += m[9] * mu + mulxq 72(%rsi), %rax, %rcx + movq 24(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 16(%rdi) + # a[i+10] += m[10] * mu + mulxq 80(%rsi), %rax, %rcx + movq 32(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 24(%rdi) + # a[i+11] += m[11] * mu + mulxq 88(%rsi), %rax, %rcx + movq 40(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 32(%rdi) + # a[i+12] += m[12] * mu + mulxq 96(%rsi), %rax, %rcx + movq 48(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 40(%rdi) + # a[i+13] += m[13] * mu + mulxq 104(%rsi), %rax, %rcx + movq 56(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 48(%rdi) + # a[i+14] += m[14] * mu + mulxq 112(%rsi), %rax, %rcx + movq 64(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 56(%rdi) + # a[i+15] += m[15] * mu + mulxq 120(%rsi), %rax, %rcx + movq 72(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 64(%rdi) + adcxq %r14, %r10 + movq %r10, 72(%rdi) + movq %r12, %r14 + adoxq %r12, %r14 + adcxq %r12, %r14 + # a += 2 + addq $16, %rdi + # i -= 2 + subq $2, %r9 + jnz L_mont_loop_avx2_16 + subq $64, %rdi + negq %r14 + movq %rdi, %r8 + subq $128, %rdi + movq (%rsi), %rcx + movq %r13, %rdx + pextq %r14, %rcx, %rcx + subq %rcx, %rdx + movq 8(%rsi), %rcx + movq 8(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, (%rdi) + sbbq %rcx, %rax + movq 16(%rsi), %rdx + movq 16(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 8(%rdi) + sbbq %rdx, %rcx + movq 24(%rsi), %rax + movq 24(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 16(%rdi) + sbbq %rax, %rdx + movq 32(%rsi), %rcx + movq 32(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 24(%rdi) + sbbq %rcx, %rax + movq 40(%rsi), %rdx + movq 40(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 32(%rdi) + sbbq %rdx, %rcx + movq 48(%rsi), %rax + movq 48(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 40(%rdi) + sbbq %rax, %rdx + movq 56(%rsi), %rcx + movq 56(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 48(%rdi) + sbbq %rcx, %rax + movq 64(%rsi), %rdx + movq 64(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 56(%rdi) + sbbq %rdx, %rcx + movq 72(%rsi), %rax + movq 72(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 64(%rdi) + sbbq %rax, %rdx + movq 80(%rsi), %rcx + movq 80(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 72(%rdi) + sbbq %rcx, %rax + movq 88(%rsi), %rdx + movq 88(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 80(%rdi) + sbbq %rdx, %rcx + movq 96(%rsi), %rax + movq 96(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 88(%rdi) + sbbq %rax, %rdx + movq 104(%rsi), %rcx + movq 104(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 96(%rdi) + sbbq %rcx, %rax + movq 112(%rsi), %rdx + movq 112(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 104(%rdi) + sbbq %rdx, %rcx + movq 120(%rsi), %rax + movq 120(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 112(%rdi) + sbbq %rax, %rdx + movq %rdx, 120(%rdi) + pop %r14 + pop %r13 + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_2048_mont_reduce_avx2_16,.-sp_2048_mont_reduce_avx2_16 +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX2 */ +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +#ifndef __APPLE__ +.globl sp_2048_cond_sub_32 +.type sp_2048_cond_sub_32,@function +.align 16 +sp_2048_cond_sub_32: +#else +.globl _sp_2048_cond_sub_32 +.p2align 4 +_sp_2048_cond_sub_32: +#endif /* __APPLE__ */ + subq $256, %rsp + movq $0, %rax + movq (%rdx), %r8 + movq 8(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, (%rsp) + movq %r9, 8(%rsp) + movq 16(%rdx), %r8 + movq 24(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 16(%rsp) + movq %r9, 24(%rsp) + movq 32(%rdx), %r8 + movq 40(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 32(%rsp) + movq %r9, 40(%rsp) + movq 48(%rdx), %r8 + movq 56(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 48(%rsp) + movq %r9, 56(%rsp) + movq 64(%rdx), %r8 + movq 72(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 64(%rsp) + movq %r9, 72(%rsp) + movq 80(%rdx), %r8 + movq 88(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 80(%rsp) + movq %r9, 88(%rsp) + movq 96(%rdx), %r8 + movq 104(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 96(%rsp) + movq %r9, 104(%rsp) + movq 112(%rdx), %r8 + movq 120(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 112(%rsp) + movq %r9, 120(%rsp) + movq 128(%rdx), %r8 + movq 136(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 128(%rsp) + movq %r9, 136(%rsp) + movq 144(%rdx), %r8 + movq 152(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 144(%rsp) + movq %r9, 152(%rsp) + movq 160(%rdx), %r8 + movq 168(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 160(%rsp) + movq %r9, 168(%rsp) + movq 176(%rdx), %r8 + movq 184(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 176(%rsp) + movq %r9, 184(%rsp) + movq 192(%rdx), %r8 + movq 200(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 192(%rsp) + movq %r9, 200(%rsp) + movq 208(%rdx), %r8 + movq 216(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 208(%rsp) + movq %r9, 216(%rsp) + movq 224(%rdx), %r8 + movq 232(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 224(%rsp) + movq %r9, 232(%rsp) + movq 240(%rdx), %r8 + movq 248(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 240(%rsp) + movq %r9, 248(%rsp) + movq (%rsi), %r8 + movq (%rsp), %rdx + subq %rdx, %r8 + movq 8(%rsi), %r9 + movq 8(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, (%rdi) + movq 16(%rsi), %r8 + movq 16(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 8(%rdi) + movq 24(%rsi), %r9 + movq 24(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 16(%rdi) + movq 32(%rsi), %r8 + movq 32(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 24(%rdi) + movq 40(%rsi), %r9 + movq 40(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 32(%rdi) + movq 48(%rsi), %r8 + movq 48(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 40(%rdi) + movq 56(%rsi), %r9 + movq 56(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 48(%rdi) + movq 64(%rsi), %r8 + movq 64(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 56(%rdi) + movq 72(%rsi), %r9 + movq 72(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 64(%rdi) + movq 80(%rsi), %r8 + movq 80(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 72(%rdi) + movq 88(%rsi), %r9 + movq 88(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 80(%rdi) + movq 96(%rsi), %r8 + movq 96(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 88(%rdi) + movq 104(%rsi), %r9 + movq 104(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 96(%rdi) + movq 112(%rsi), %r8 + movq 112(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 104(%rdi) + movq 120(%rsi), %r9 + movq 120(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 112(%rdi) + movq 128(%rsi), %r8 + movq 128(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 120(%rdi) + movq 136(%rsi), %r9 + movq 136(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 128(%rdi) + movq 144(%rsi), %r8 + movq 144(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 136(%rdi) + movq 152(%rsi), %r9 + movq 152(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 144(%rdi) + movq 160(%rsi), %r8 + movq 160(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 152(%rdi) + movq 168(%rsi), %r9 + movq 168(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 160(%rdi) + movq 176(%rsi), %r8 + movq 176(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 168(%rdi) + movq 184(%rsi), %r9 + movq 184(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 176(%rdi) + movq 192(%rsi), %r8 + movq 192(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 184(%rdi) + movq 200(%rsi), %r9 + movq 200(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 192(%rdi) + movq 208(%rsi), %r8 + movq 208(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 200(%rdi) + movq 216(%rsi), %r9 + movq 216(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 208(%rdi) + movq 224(%rsi), %r8 + movq 224(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 216(%rdi) + movq 232(%rsi), %r9 + movq 232(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 224(%rdi) + movq 240(%rsi), %r8 + movq 240(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 232(%rdi) + movq 248(%rsi), %r9 + movq 248(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 240(%rdi) + movq %r9, 248(%rdi) + sbbq $0, %rax + addq $256, %rsp + repz retq +#ifndef __APPLE__ +.size sp_2048_cond_sub_32,.-sp_2048_cond_sub_32 +#endif /* __APPLE__ */ +/* Reduce the number back to 2048 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +#ifndef __APPLE__ +.globl sp_2048_mont_reduce_32 +.type sp_2048_mont_reduce_32,@function +.align 16 +sp_2048_mont_reduce_32: +#else +.globl _sp_2048_mont_reduce_32 +.p2align 4 +_sp_2048_mont_reduce_32: +#endif /* __APPLE__ */ + push %r12 + push %r13 + push %r14 + push %r15 + movq %rdx, %rcx + xorq %r15, %r15 + # i = 32 + movq $32, %r8 + movq (%rdi), %r13 + movq 8(%rdi), %r14 +L_mont_loop_32: + # mu = a[i] * mp + movq %r13, %r11 + imulq %rcx, %r11 + # a[i+0] += m[0] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq (%rsi) + addq %rax, %r13 + adcq %rdx, %r10 + # a[i+1] += m[1] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 8(%rsi) + movq %r14, %r13 + addq %rax, %r13 + adcq %rdx, %r9 + addq %r10, %r13 + adcq $0, %r9 + # a[i+2] += m[2] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 16(%rsi) + movq 16(%rdi), %r14 + addq %rax, %r14 + adcq %rdx, %r10 + addq %r9, %r14 + adcq $0, %r10 + # a[i+3] += m[3] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 24(%rsi) + movq 24(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 24(%rdi) + adcq $0, %r9 + # a[i+4] += m[4] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 32(%rsi) + movq 32(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 32(%rdi) + adcq $0, %r10 + # a[i+5] += m[5] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 40(%rsi) + movq 40(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 40(%rdi) + adcq $0, %r9 + # a[i+6] += m[6] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 48(%rsi) + movq 48(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 48(%rdi) + adcq $0, %r10 + # a[i+7] += m[7] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 56(%rsi) + movq 56(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 56(%rdi) + adcq $0, %r9 + # a[i+8] += m[8] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 64(%rsi) + movq 64(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 64(%rdi) + adcq $0, %r10 + # a[i+9] += m[9] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 72(%rsi) + movq 72(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 72(%rdi) + adcq $0, %r9 + # a[i+10] += m[10] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 80(%rsi) + movq 80(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 80(%rdi) + adcq $0, %r10 + # a[i+11] += m[11] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 88(%rsi) + movq 88(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 88(%rdi) + adcq $0, %r9 + # a[i+12] += m[12] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 96(%rsi) + movq 96(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 96(%rdi) + adcq $0, %r10 + # a[i+13] += m[13] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 104(%rsi) + movq 104(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 104(%rdi) + adcq $0, %r9 + # a[i+14] += m[14] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 112(%rsi) + movq 112(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 112(%rdi) + adcq $0, %r10 + # a[i+15] += m[15] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 120(%rsi) + movq 120(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 120(%rdi) + adcq $0, %r9 + # a[i+16] += m[16] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 128(%rsi) + movq 128(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 128(%rdi) + adcq $0, %r10 + # a[i+17] += m[17] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 136(%rsi) + movq 136(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 136(%rdi) + adcq $0, %r9 + # a[i+18] += m[18] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 144(%rsi) + movq 144(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 144(%rdi) + adcq $0, %r10 + # a[i+19] += m[19] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 152(%rsi) + movq 152(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 152(%rdi) + adcq $0, %r9 + # a[i+20] += m[20] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 160(%rsi) + movq 160(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 160(%rdi) + adcq $0, %r10 + # a[i+21] += m[21] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 168(%rsi) + movq 168(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 168(%rdi) + adcq $0, %r9 + # a[i+22] += m[22] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 176(%rsi) + movq 176(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 176(%rdi) + adcq $0, %r10 + # a[i+23] += m[23] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 184(%rsi) + movq 184(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 184(%rdi) + adcq $0, %r9 + # a[i+24] += m[24] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 192(%rsi) + movq 192(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 192(%rdi) + adcq $0, %r10 + # a[i+25] += m[25] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 200(%rsi) + movq 200(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 200(%rdi) + adcq $0, %r9 + # a[i+26] += m[26] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 208(%rsi) + movq 208(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 208(%rdi) + adcq $0, %r10 + # a[i+27] += m[27] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 216(%rsi) + movq 216(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 216(%rdi) + adcq $0, %r9 + # a[i+28] += m[28] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 224(%rsi) + movq 224(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 224(%rdi) + adcq $0, %r10 + # a[i+29] += m[29] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 232(%rsi) + movq 232(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 232(%rdi) + adcq $0, %r9 + # a[i+30] += m[30] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 240(%rsi) + movq 240(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 240(%rdi) + adcq $0, %r10 + # a[i+31] += m[31] * mu + movq %r11, %rax + mulq 248(%rsi) + movq 248(%rdi), %r12 + addq %rax, %r10 + adcq %r15, %rdx + movq $0, %r15 + adcq $0, %r15 + addq %r10, %r12 + movq %r12, 248(%rdi) + adcq %rdx, 256(%rdi) + adcq $0, %r15 + # i -= 1 + addq $8, %rdi + decq %r8 + jnz L_mont_loop_32 + movq %r13, (%rdi) + movq %r14, 8(%rdi) + negq %r15 + movq %r15, %rcx + movq %rsi, %rdx + movq %rdi, %rsi + subq $256, %rdi +#ifndef __APPLE__ + callq sp_2048_cond_sub_32@plt +#else + callq _sp_2048_cond_sub_32 +#endif /* __APPLE__ */ + pop %r15 + pop %r14 + pop %r13 + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_2048_mont_reduce_32,.-sp_2048_mont_reduce_32 +#endif /* __APPLE__ */ +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +#ifndef __APPLE__ +.globl sp_2048_cond_sub_avx2_32 +.type sp_2048_cond_sub_avx2_32,@function +.align 16 +sp_2048_cond_sub_avx2_32: +#else +.globl _sp_2048_cond_sub_avx2_32 +.p2align 4 +_sp_2048_cond_sub_avx2_32: +#endif /* __APPLE__ */ + movq $0, %rax + movq (%rdx), %r10 + movq (%rsi), %r8 + pextq %rcx, %r10, %r10 + subq %r10, %r8 + movq 8(%rdx), %r10 + movq 8(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, (%rdi) + sbbq %r10, %r9 + movq 16(%rdx), %r8 + movq 16(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 8(%rdi) + sbbq %r8, %r10 + movq 24(%rdx), %r9 + movq 24(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 16(%rdi) + sbbq %r9, %r8 + movq 32(%rdx), %r10 + movq 32(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 24(%rdi) + sbbq %r10, %r9 + movq 40(%rdx), %r8 + movq 40(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 32(%rdi) + sbbq %r8, %r10 + movq 48(%rdx), %r9 + movq 48(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 40(%rdi) + sbbq %r9, %r8 + movq 56(%rdx), %r10 + movq 56(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 48(%rdi) + sbbq %r10, %r9 + movq 64(%rdx), %r8 + movq 64(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 56(%rdi) + sbbq %r8, %r10 + movq 72(%rdx), %r9 + movq 72(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 64(%rdi) + sbbq %r9, %r8 + movq 80(%rdx), %r10 + movq 80(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 72(%rdi) + sbbq %r10, %r9 + movq 88(%rdx), %r8 + movq 88(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 80(%rdi) + sbbq %r8, %r10 + movq 96(%rdx), %r9 + movq 96(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 88(%rdi) + sbbq %r9, %r8 + movq 104(%rdx), %r10 + movq 104(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 96(%rdi) + sbbq %r10, %r9 + movq 112(%rdx), %r8 + movq 112(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 104(%rdi) + sbbq %r8, %r10 + movq 120(%rdx), %r9 + movq 120(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 112(%rdi) + sbbq %r9, %r8 + movq 128(%rdx), %r10 + movq 128(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 120(%rdi) + sbbq %r10, %r9 + movq 136(%rdx), %r8 + movq 136(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 128(%rdi) + sbbq %r8, %r10 + movq 144(%rdx), %r9 + movq 144(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 136(%rdi) + sbbq %r9, %r8 + movq 152(%rdx), %r10 + movq 152(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 144(%rdi) + sbbq %r10, %r9 + movq 160(%rdx), %r8 + movq 160(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 152(%rdi) + sbbq %r8, %r10 + movq 168(%rdx), %r9 + movq 168(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 160(%rdi) + sbbq %r9, %r8 + movq 176(%rdx), %r10 + movq 176(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 168(%rdi) + sbbq %r10, %r9 + movq 184(%rdx), %r8 + movq 184(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 176(%rdi) + sbbq %r8, %r10 + movq 192(%rdx), %r9 + movq 192(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 184(%rdi) + sbbq %r9, %r8 + movq 200(%rdx), %r10 + movq 200(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 192(%rdi) + sbbq %r10, %r9 + movq 208(%rdx), %r8 + movq 208(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 200(%rdi) + sbbq %r8, %r10 + movq 216(%rdx), %r9 + movq 216(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 208(%rdi) + sbbq %r9, %r8 + movq 224(%rdx), %r10 + movq 224(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 216(%rdi) + sbbq %r10, %r9 + movq 232(%rdx), %r8 + movq 232(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 224(%rdi) + sbbq %r8, %r10 + movq 240(%rdx), %r9 + movq 240(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 232(%rdi) + sbbq %r9, %r8 + movq 248(%rdx), %r10 + movq 248(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 240(%rdi) + sbbq %r10, %r9 + movq %r9, 248(%rdi) + sbbq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_2048_cond_sub_avx2_32,.-sp_2048_cond_sub_avx2_32 +#endif /* __APPLE__ */ +#ifdef HAVE_INTEL_AVX2 +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +#ifndef __APPLE__ +.globl sp_2048_mul_d_avx2_32 +.type sp_2048_mul_d_avx2_32,@function +.align 16 +sp_2048_mul_d_avx2_32: +#else +.globl _sp_2048_mul_d_avx2_32 +.p2align 4 +_sp_2048_mul_d_avx2_32: +#endif /* __APPLE__ */ + movq %rdx, %rax + # A[0] * B + movq %rax, %rdx + xorq %r11, %r11 + mulxq (%rsi), %r9, %r10 + movq %r9, (%rdi) + # A[1] * B + mulxq 8(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 8(%rdi) + adoxq %r8, %r9 + # A[2] * B + mulxq 16(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 16(%rdi) + adoxq %r8, %r10 + # A[3] * B + mulxq 24(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 24(%rdi) + adoxq %r8, %r9 + # A[4] * B + mulxq 32(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 32(%rdi) + adoxq %r8, %r10 + # A[5] * B + mulxq 40(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 40(%rdi) + adoxq %r8, %r9 + # A[6] * B + mulxq 48(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 48(%rdi) + adoxq %r8, %r10 + # A[7] * B + mulxq 56(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 56(%rdi) + adoxq %r8, %r9 + # A[8] * B + mulxq 64(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 64(%rdi) + adoxq %r8, %r10 + # A[9] * B + mulxq 72(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 72(%rdi) + adoxq %r8, %r9 + # A[10] * B + mulxq 80(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 80(%rdi) + adoxq %r8, %r10 + # A[11] * B + mulxq 88(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 88(%rdi) + adoxq %r8, %r9 + # A[12] * B + mulxq 96(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 96(%rdi) + adoxq %r8, %r10 + # A[13] * B + mulxq 104(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 104(%rdi) + adoxq %r8, %r9 + # A[14] * B + mulxq 112(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 112(%rdi) + adoxq %r8, %r10 + # A[15] * B + mulxq 120(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 120(%rdi) + adoxq %r8, %r9 + # A[16] * B + mulxq 128(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 128(%rdi) + adoxq %r8, %r10 + # A[17] * B + mulxq 136(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 136(%rdi) + adoxq %r8, %r9 + # A[18] * B + mulxq 144(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 144(%rdi) + adoxq %r8, %r10 + # A[19] * B + mulxq 152(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 152(%rdi) + adoxq %r8, %r9 + # A[20] * B + mulxq 160(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 160(%rdi) + adoxq %r8, %r10 + # A[21] * B + mulxq 168(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 168(%rdi) + adoxq %r8, %r9 + # A[22] * B + mulxq 176(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 176(%rdi) + adoxq %r8, %r10 + # A[23] * B + mulxq 184(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 184(%rdi) + adoxq %r8, %r9 + # A[24] * B + mulxq 192(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 192(%rdi) + adoxq %r8, %r10 + # A[25] * B + mulxq 200(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 200(%rdi) + adoxq %r8, %r9 + # A[26] * B + mulxq 208(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 208(%rdi) + adoxq %r8, %r10 + # A[27] * B + mulxq 216(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 216(%rdi) + adoxq %r8, %r9 + # A[28] * B + mulxq 224(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 224(%rdi) + adoxq %r8, %r10 + # A[29] * B + mulxq 232(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 232(%rdi) + adoxq %r8, %r9 + # A[30] * B + mulxq 240(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 240(%rdi) + adoxq %r8, %r10 + # A[31] * B + mulxq 248(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + adoxq %r8, %r9 + adcxq %r11, %r9 + movq %r10, 248(%rdi) + movq %r9, 256(%rdi) + repz retq +#ifndef __APPLE__ +.size sp_2048_mul_d_avx2_32,.-sp_2048_mul_d_avx2_32 +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX2 */ +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +#ifndef __APPLE__ +.globl sp_2048_cmp_32 +.type sp_2048_cmp_32,@function +.align 16 +sp_2048_cmp_32: +#else +.globl _sp_2048_cmp_32 +.p2align 4 +_sp_2048_cmp_32: +#endif /* __APPLE__ */ + xorq %rcx, %rcx + movq $-1, %rdx + movq $-1, %rax + movq $1, %r8 + movq 248(%rdi), %r9 + movq 248(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 240(%rdi), %r9 + movq 240(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 232(%rdi), %r9 + movq 232(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 224(%rdi), %r9 + movq 224(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 216(%rdi), %r9 + movq 216(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 208(%rdi), %r9 + movq 208(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 200(%rdi), %r9 + movq 200(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 192(%rdi), %r9 + movq 192(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 184(%rdi), %r9 + movq 184(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 176(%rdi), %r9 + movq 176(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 168(%rdi), %r9 + movq 168(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 160(%rdi), %r9 + movq 160(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 152(%rdi), %r9 + movq 152(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 144(%rdi), %r9 + movq 144(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 136(%rdi), %r9 + movq 136(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 128(%rdi), %r9 + movq 128(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 120(%rdi), %r9 + movq 120(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 112(%rdi), %r9 + movq 112(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 104(%rdi), %r9 + movq 104(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 96(%rdi), %r9 + movq 96(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 88(%rdi), %r9 + movq 88(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 80(%rdi), %r9 + movq 80(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 72(%rdi), %r9 + movq 72(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 64(%rdi), %r9 + movq 64(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 56(%rdi), %r9 + movq 56(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 48(%rdi), %r9 + movq 48(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 40(%rdi), %r9 + movq 40(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 32(%rdi), %r9 + movq 32(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 24(%rdi), %r9 + movq 24(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 16(%rdi), %r9 + movq 16(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 8(%rdi), %r9 + movq 8(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq (%rdi), %r9 + movq (%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + xorq %rdx, %rax + repz retq +#ifndef __APPLE__ +.size sp_2048_cmp_32,.-sp_2048_cmp_32 +#endif /* __APPLE__ */ +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_2048_sub_32 +.type sp_2048_sub_32,@function +.align 16 +sp_2048_sub_32: +#else +.globl _sp_2048_sub_32 +.p2align 4 +_sp_2048_sub_32: +#endif /* __APPLE__ */ + movq (%rsi), %rcx + xorq %rax, %rax + subq (%rdx), %rcx + movq 8(%rsi), %r8 + movq %rcx, (%rdi) + sbbq 8(%rdx), %r8 + movq 16(%rsi), %rcx + movq %r8, 8(%rdi) + sbbq 16(%rdx), %rcx + movq 24(%rsi), %r8 + movq %rcx, 16(%rdi) + sbbq 24(%rdx), %r8 + movq 32(%rsi), %rcx + movq %r8, 24(%rdi) + sbbq 32(%rdx), %rcx + movq 40(%rsi), %r8 + movq %rcx, 32(%rdi) + sbbq 40(%rdx), %r8 + movq 48(%rsi), %rcx + movq %r8, 40(%rdi) + sbbq 48(%rdx), %rcx + movq 56(%rsi), %r8 + movq %rcx, 48(%rdi) + sbbq 56(%rdx), %r8 + movq 64(%rsi), %rcx + movq %r8, 56(%rdi) + sbbq 64(%rdx), %rcx + movq 72(%rsi), %r8 + movq %rcx, 64(%rdi) + sbbq 72(%rdx), %r8 + movq 80(%rsi), %rcx + movq %r8, 72(%rdi) + sbbq 80(%rdx), %rcx + movq 88(%rsi), %r8 + movq %rcx, 80(%rdi) + sbbq 88(%rdx), %r8 + movq 96(%rsi), %rcx + movq %r8, 88(%rdi) + sbbq 96(%rdx), %rcx + movq 104(%rsi), %r8 + movq %rcx, 96(%rdi) + sbbq 104(%rdx), %r8 + movq 112(%rsi), %rcx + movq %r8, 104(%rdi) + sbbq 112(%rdx), %rcx + movq 120(%rsi), %r8 + movq %rcx, 112(%rdi) + sbbq 120(%rdx), %r8 + movq 128(%rsi), %rcx + movq %r8, 120(%rdi) + sbbq 128(%rdx), %rcx + movq 136(%rsi), %r8 + movq %rcx, 128(%rdi) + sbbq 136(%rdx), %r8 + movq 144(%rsi), %rcx + movq %r8, 136(%rdi) + sbbq 144(%rdx), %rcx + movq 152(%rsi), %r8 + movq %rcx, 144(%rdi) + sbbq 152(%rdx), %r8 + movq 160(%rsi), %rcx + movq %r8, 152(%rdi) + sbbq 160(%rdx), %rcx + movq 168(%rsi), %r8 + movq %rcx, 160(%rdi) + sbbq 168(%rdx), %r8 + movq 176(%rsi), %rcx + movq %r8, 168(%rdi) + sbbq 176(%rdx), %rcx + movq 184(%rsi), %r8 + movq %rcx, 176(%rdi) + sbbq 184(%rdx), %r8 + movq 192(%rsi), %rcx + movq %r8, 184(%rdi) + sbbq 192(%rdx), %rcx + movq 200(%rsi), %r8 + movq %rcx, 192(%rdi) + sbbq 200(%rdx), %r8 + movq 208(%rsi), %rcx + movq %r8, 200(%rdi) + sbbq 208(%rdx), %rcx + movq 216(%rsi), %r8 + movq %rcx, 208(%rdi) + sbbq 216(%rdx), %r8 + movq 224(%rsi), %rcx + movq %r8, 216(%rdi) + sbbq 224(%rdx), %rcx + movq 232(%rsi), %r8 + movq %rcx, 224(%rdi) + sbbq 232(%rdx), %r8 + movq 240(%rsi), %rcx + movq %r8, 232(%rdi) + sbbq 240(%rdx), %rcx + movq 248(%rsi), %r8 + movq %rcx, 240(%rdi) + sbbq 248(%rdx), %r8 + movq %r8, 248(%rdi) + sbbq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_2048_sub_32,.-sp_2048_sub_32 +#endif /* __APPLE__ */ +#ifdef HAVE_INTEL_AVX2 +/* Reduce the number back to 2048 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +#ifndef __APPLE__ +.globl sp_2048_mont_reduce_avx2_32 +.type sp_2048_mont_reduce_avx2_32,@function +.align 16 +sp_2048_mont_reduce_avx2_32: +#else +.globl _sp_2048_mont_reduce_avx2_32 +.p2align 4 +_sp_2048_mont_reduce_avx2_32: +#endif /* __APPLE__ */ + push %r12 + push %r13 + push %r14 + movq %rdx, %r8 + xorq %r14, %r14 + # i = 32 + movq $32, %r9 + movq (%rdi), %r13 + addq $128, %rdi + xorq %r12, %r12 +L_mont_loop_avx2_32: + # mu = a[i] * mp + movq %r13, %rdx + movq %r13, %r10 + imulq %r8, %rdx + xorq %r12, %r12 + # a[i+0] += m[0] * mu + mulxq (%rsi), %rax, %rcx + movq -120(%rdi), %r13 + adcxq %rax, %r10 + adoxq %rcx, %r13 + # a[i+1] += m[1] * mu + mulxq 8(%rsi), %rax, %rcx + movq -112(%rdi), %r10 + adcxq %rax, %r13 + adoxq %rcx, %r10 + # a[i+2] += m[2] * mu + mulxq 16(%rsi), %rax, %rcx + movq -104(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -112(%rdi) + # a[i+3] += m[3] * mu + mulxq 24(%rsi), %rax, %rcx + movq -96(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -104(%rdi) + # a[i+4] += m[4] * mu + mulxq 32(%rsi), %rax, %rcx + movq -88(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -96(%rdi) + # a[i+5] += m[5] * mu + mulxq 40(%rsi), %rax, %rcx + movq -80(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -88(%rdi) + # a[i+6] += m[6] * mu + mulxq 48(%rsi), %rax, %rcx + movq -72(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -80(%rdi) + # a[i+7] += m[7] * mu + mulxq 56(%rsi), %rax, %rcx + movq -64(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -72(%rdi) + # a[i+8] += m[8] * mu + mulxq 64(%rsi), %rax, %rcx + movq -56(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -64(%rdi) + # a[i+9] += m[9] * mu + mulxq 72(%rsi), %rax, %rcx + movq -48(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -56(%rdi) + # a[i+10] += m[10] * mu + mulxq 80(%rsi), %rax, %rcx + movq -40(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -48(%rdi) + # a[i+11] += m[11] * mu + mulxq 88(%rsi), %rax, %rcx + movq -32(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -40(%rdi) + # a[i+12] += m[12] * mu + mulxq 96(%rsi), %rax, %rcx + movq -24(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -32(%rdi) + # a[i+13] += m[13] * mu + mulxq 104(%rsi), %rax, %rcx + movq -16(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -24(%rdi) + # a[i+14] += m[14] * mu + mulxq 112(%rsi), %rax, %rcx + movq -8(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -16(%rdi) + # a[i+15] += m[15] * mu + mulxq 120(%rsi), %rax, %rcx + movq (%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -8(%rdi) + # a[i+16] += m[16] * mu + mulxq 128(%rsi), %rax, %rcx + movq 8(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, (%rdi) + # a[i+17] += m[17] * mu + mulxq 136(%rsi), %rax, %rcx + movq 16(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 8(%rdi) + # a[i+18] += m[18] * mu + mulxq 144(%rsi), %rax, %rcx + movq 24(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 16(%rdi) + # a[i+19] += m[19] * mu + mulxq 152(%rsi), %rax, %rcx + movq 32(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 24(%rdi) + # a[i+20] += m[20] * mu + mulxq 160(%rsi), %rax, %rcx + movq 40(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 32(%rdi) + # a[i+21] += m[21] * mu + mulxq 168(%rsi), %rax, %rcx + movq 48(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 40(%rdi) + # a[i+22] += m[22] * mu + mulxq 176(%rsi), %rax, %rcx + movq 56(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 48(%rdi) + # a[i+23] += m[23] * mu + mulxq 184(%rsi), %rax, %rcx + movq 64(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 56(%rdi) + # a[i+24] += m[24] * mu + mulxq 192(%rsi), %rax, %rcx + movq 72(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 64(%rdi) + # a[i+25] += m[25] * mu + mulxq 200(%rsi), %rax, %rcx + movq 80(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 72(%rdi) + # a[i+26] += m[26] * mu + mulxq 208(%rsi), %rax, %rcx + movq 88(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 80(%rdi) + # a[i+27] += m[27] * mu + mulxq 216(%rsi), %rax, %rcx + movq 96(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 88(%rdi) + # a[i+28] += m[28] * mu + mulxq 224(%rsi), %rax, %rcx + movq 104(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 96(%rdi) + # a[i+29] += m[29] * mu + mulxq 232(%rsi), %rax, %rcx + movq 112(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 104(%rdi) + # a[i+30] += m[30] * mu + mulxq 240(%rsi), %rax, %rcx + movq 120(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 112(%rdi) + # a[i+31] += m[31] * mu + mulxq 248(%rsi), %rax, %rcx + movq 128(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 120(%rdi) + adcxq %r14, %r10 + movq %r10, 128(%rdi) + movq %r12, %r14 + adoxq %r12, %r14 + adcxq %r12, %r14 + # a += 1 + addq $8, %rdi + # i -= 1 + subq $1, %r9 + jnz L_mont_loop_avx2_32 + subq $128, %rdi + negq %r14 + movq %rdi, %r8 + subq $256, %rdi + movq (%rsi), %rcx + movq %r13, %rdx + pextq %r14, %rcx, %rcx + subq %rcx, %rdx + movq 8(%rsi), %rcx + movq 8(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, (%rdi) + sbbq %rcx, %rax + movq 16(%rsi), %rdx + movq 16(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 8(%rdi) + sbbq %rdx, %rcx + movq 24(%rsi), %rax + movq 24(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 16(%rdi) + sbbq %rax, %rdx + movq 32(%rsi), %rcx + movq 32(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 24(%rdi) + sbbq %rcx, %rax + movq 40(%rsi), %rdx + movq 40(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 32(%rdi) + sbbq %rdx, %rcx + movq 48(%rsi), %rax + movq 48(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 40(%rdi) + sbbq %rax, %rdx + movq 56(%rsi), %rcx + movq 56(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 48(%rdi) + sbbq %rcx, %rax + movq 64(%rsi), %rdx + movq 64(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 56(%rdi) + sbbq %rdx, %rcx + movq 72(%rsi), %rax + movq 72(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 64(%rdi) + sbbq %rax, %rdx + movq 80(%rsi), %rcx + movq 80(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 72(%rdi) + sbbq %rcx, %rax + movq 88(%rsi), %rdx + movq 88(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 80(%rdi) + sbbq %rdx, %rcx + movq 96(%rsi), %rax + movq 96(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 88(%rdi) + sbbq %rax, %rdx + movq 104(%rsi), %rcx + movq 104(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 96(%rdi) + sbbq %rcx, %rax + movq 112(%rsi), %rdx + movq 112(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 104(%rdi) + sbbq %rdx, %rcx + movq 120(%rsi), %rax + movq 120(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 112(%rdi) + sbbq %rax, %rdx + movq 128(%rsi), %rcx + movq 128(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 120(%rdi) + sbbq %rcx, %rax + movq 136(%rsi), %rdx + movq 136(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 128(%rdi) + sbbq %rdx, %rcx + movq 144(%rsi), %rax + movq 144(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 136(%rdi) + sbbq %rax, %rdx + movq 152(%rsi), %rcx + movq 152(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 144(%rdi) + sbbq %rcx, %rax + movq 160(%rsi), %rdx + movq 160(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 152(%rdi) + sbbq %rdx, %rcx + movq 168(%rsi), %rax + movq 168(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 160(%rdi) + sbbq %rax, %rdx + movq 176(%rsi), %rcx + movq 176(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 168(%rdi) + sbbq %rcx, %rax + movq 184(%rsi), %rdx + movq 184(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 176(%rdi) + sbbq %rdx, %rcx + movq 192(%rsi), %rax + movq 192(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 184(%rdi) + sbbq %rax, %rdx + movq 200(%rsi), %rcx + movq 200(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 192(%rdi) + sbbq %rcx, %rax + movq 208(%rsi), %rdx + movq 208(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 200(%rdi) + sbbq %rdx, %rcx + movq 216(%rsi), %rax + movq 216(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 208(%rdi) + sbbq %rax, %rdx + movq 224(%rsi), %rcx + movq 224(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 216(%rdi) + sbbq %rcx, %rax + movq 232(%rsi), %rdx + movq 232(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 224(%rdi) + sbbq %rdx, %rcx + movq 240(%rsi), %rax + movq 240(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 232(%rdi) + sbbq %rax, %rdx + movq 248(%rsi), %rcx + movq 248(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 240(%rdi) + sbbq %rcx, %rax + movq %rax, 248(%rdi) + pop %r14 + pop %r13 + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_2048_mont_reduce_avx2_32,.-sp_2048_mont_reduce_avx2_32 +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX2 */ +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +#ifndef __APPLE__ +.globl sp_2048_cond_add_16 +.type sp_2048_cond_add_16,@function +.align 16 +sp_2048_cond_add_16: +#else +.globl _sp_2048_cond_add_16 +.p2align 4 +_sp_2048_cond_add_16: +#endif /* __APPLE__ */ + subq $128, %rsp + movq $0, %rax + movq (%rdx), %r8 + movq 8(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, (%rsp) + movq %r9, 8(%rsp) + movq 16(%rdx), %r8 + movq 24(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 16(%rsp) + movq %r9, 24(%rsp) + movq 32(%rdx), %r8 + movq 40(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 32(%rsp) + movq %r9, 40(%rsp) + movq 48(%rdx), %r8 + movq 56(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 48(%rsp) + movq %r9, 56(%rsp) + movq 64(%rdx), %r8 + movq 72(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 64(%rsp) + movq %r9, 72(%rsp) + movq 80(%rdx), %r8 + movq 88(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 80(%rsp) + movq %r9, 88(%rsp) + movq 96(%rdx), %r8 + movq 104(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 96(%rsp) + movq %r9, 104(%rsp) + movq 112(%rdx), %r8 + movq 120(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 112(%rsp) + movq %r9, 120(%rsp) + movq (%rsi), %r8 + movq (%rsp), %rdx + addq %rdx, %r8 + movq 8(%rsi), %r9 + movq 8(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, (%rdi) + movq 16(%rsi), %r8 + movq 16(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 8(%rdi) + movq 24(%rsi), %r9 + movq 24(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 16(%rdi) + movq 32(%rsi), %r8 + movq 32(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 24(%rdi) + movq 40(%rsi), %r9 + movq 40(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 32(%rdi) + movq 48(%rsi), %r8 + movq 48(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 40(%rdi) + movq 56(%rsi), %r9 + movq 56(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 48(%rdi) + movq 64(%rsi), %r8 + movq 64(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 56(%rdi) + movq 72(%rsi), %r9 + movq 72(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 64(%rdi) + movq 80(%rsi), %r8 + movq 80(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 72(%rdi) + movq 88(%rsi), %r9 + movq 88(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 80(%rdi) + movq 96(%rsi), %r8 + movq 96(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 88(%rdi) + movq 104(%rsi), %r9 + movq 104(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 96(%rdi) + movq 112(%rsi), %r8 + movq 112(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 104(%rdi) + movq 120(%rsi), %r9 + movq 120(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 112(%rdi) + movq %r9, 120(%rdi) + adcq $0, %rax + addq $128, %rsp + repz retq +#ifndef __APPLE__ +.size sp_2048_cond_add_16,.-sp_2048_cond_add_16 +#endif /* __APPLE__ */ +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +#ifndef __APPLE__ +.globl sp_2048_cond_add_avx2_16 +.type sp_2048_cond_add_avx2_16,@function +.align 16 +sp_2048_cond_add_avx2_16: +#else +.globl _sp_2048_cond_add_avx2_16 +.p2align 4 +_sp_2048_cond_add_avx2_16: +#endif /* __APPLE__ */ + movq $0, %rax + movq (%rdx), %r10 + movq (%rsi), %r8 + pextq %rcx, %r10, %r10 + addq %r10, %r8 + movq 8(%rdx), %r10 + movq 8(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, (%rdi) + adcq %r10, %r9 + movq 16(%rdx), %r8 + movq 16(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 8(%rdi) + adcq %r8, %r10 + movq 24(%rdx), %r9 + movq 24(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 16(%rdi) + adcq %r9, %r8 + movq 32(%rdx), %r10 + movq 32(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 24(%rdi) + adcq %r10, %r9 + movq 40(%rdx), %r8 + movq 40(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 32(%rdi) + adcq %r8, %r10 + movq 48(%rdx), %r9 + movq 48(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 40(%rdi) + adcq %r9, %r8 + movq 56(%rdx), %r10 + movq 56(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 48(%rdi) + adcq %r10, %r9 + movq 64(%rdx), %r8 + movq 64(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 56(%rdi) + adcq %r8, %r10 + movq 72(%rdx), %r9 + movq 72(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 64(%rdi) + adcq %r9, %r8 + movq 80(%rdx), %r10 + movq 80(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 72(%rdi) + adcq %r10, %r9 + movq 88(%rdx), %r8 + movq 88(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 80(%rdi) + adcq %r8, %r10 + movq 96(%rdx), %r9 + movq 96(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 88(%rdi) + adcq %r9, %r8 + movq 104(%rdx), %r10 + movq 104(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 96(%rdi) + adcq %r10, %r9 + movq 112(%rdx), %r8 + movq 112(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 104(%rdi) + adcq %r8, %r10 + movq 120(%rdx), %r9 + movq 120(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 112(%rdi) + adcq %r9, %r8 + movq %r8, 120(%rdi) + adcq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_2048_cond_add_avx2_16,.-sp_2048_cond_add_avx2_16 +#endif /* __APPLE__ */ +/* Shift number left by n bit. (r = a << n) + * + * r Result of left shift by n. + * a Number to shift. + * n Amoutnt o shift. + */ +#ifndef __APPLE__ +.globl sp_2048_lshift_32 +.type sp_2048_lshift_32,@function +.align 16 +sp_2048_lshift_32: +#else +.globl _sp_2048_lshift_32 +.p2align 4 +_sp_2048_lshift_32: +#endif /* __APPLE__ */ + movq %rdx, %rcx + movq $0, %r10 + movq 216(%rsi), %r11 + movq 224(%rsi), %rdx + movq 232(%rsi), %rax + movq 240(%rsi), %r8 + movq 248(%rsi), %r9 + shldq %cl, %r9, %r10 + shldq %cl, %r8, %r9 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r11, %rdx + movq %rdx, 224(%rdi) + movq %rax, 232(%rdi) + movq %r8, 240(%rdi) + movq %r9, 248(%rdi) + movq %r10, 256(%rdi) + movq 184(%rsi), %r9 + movq 192(%rsi), %rdx + movq 200(%rsi), %rax + movq 208(%rsi), %r8 + shldq %cl, %r8, %r11 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r9, %rdx + movq %rdx, 192(%rdi) + movq %rax, 200(%rdi) + movq %r8, 208(%rdi) + movq %r11, 216(%rdi) + movq 152(%rsi), %r11 + movq 160(%rsi), %rdx + movq 168(%rsi), %rax + movq 176(%rsi), %r8 + shldq %cl, %r8, %r9 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r11, %rdx + movq %rdx, 160(%rdi) + movq %rax, 168(%rdi) + movq %r8, 176(%rdi) + movq %r9, 184(%rdi) + movq 120(%rsi), %r9 + movq 128(%rsi), %rdx + movq 136(%rsi), %rax + movq 144(%rsi), %r8 + shldq %cl, %r8, %r11 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r9, %rdx + movq %rdx, 128(%rdi) + movq %rax, 136(%rdi) + movq %r8, 144(%rdi) + movq %r11, 152(%rdi) + movq 88(%rsi), %r11 + movq 96(%rsi), %rdx + movq 104(%rsi), %rax + movq 112(%rsi), %r8 + shldq %cl, %r8, %r9 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r11, %rdx + movq %rdx, 96(%rdi) + movq %rax, 104(%rdi) + movq %r8, 112(%rdi) + movq %r9, 120(%rdi) + movq 56(%rsi), %r9 + movq 64(%rsi), %rdx + movq 72(%rsi), %rax + movq 80(%rsi), %r8 + shldq %cl, %r8, %r11 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r9, %rdx + movq %rdx, 64(%rdi) + movq %rax, 72(%rdi) + movq %r8, 80(%rdi) + movq %r11, 88(%rdi) + movq 24(%rsi), %r11 + movq 32(%rsi), %rdx + movq 40(%rsi), %rax + movq 48(%rsi), %r8 + shldq %cl, %r8, %r9 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r11, %rdx + movq %rdx, 32(%rdi) + movq %rax, 40(%rdi) + movq %r8, 48(%rdi) + movq %r9, 56(%rdi) + movq (%rsi), %rdx + movq 8(%rsi), %rax + movq 16(%rsi), %r8 + shldq %cl, %r8, %r11 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shlq %cl, %rdx + movq %rdx, (%rdi) + movq %rax, 8(%rdi) + movq %r8, 16(%rdi) + movq %r11, 24(%rdi) + repz retq +#endif /* !WOLFSSL_SP_NO_2048 */ +#endif /* !WOLFSSL_SP_NO_2048 */ +#ifndef WOLFSSL_SP_NO_3072 +#ifndef WOLFSSL_SP_NO_3072 +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +#ifndef __APPLE__ +.globl sp_3072_from_bin +.type sp_3072_from_bin,@function +.align 16 +sp_3072_from_bin: +#else +.globl _sp_3072_from_bin +.p2align 4 +_sp_3072_from_bin: +#endif /* __APPLE__ */ + movq %rdx, %r9 + movq %rdi, %r10 + addq %rcx, %r9 + addq $384, %r10 + xorq %r11, %r11 + jmp L_3072_from_bin_64_end +L_3072_from_bin_64_start: + subq $64, %r9 + movbeq 56(%r9), %rax + movbeq 48(%r9), %r8 + movq %rax, (%rdi) + movq %r8, 8(%rdi) + movbeq 40(%r9), %rax + movbeq 32(%r9), %r8 + movq %rax, 16(%rdi) + movq %r8, 24(%rdi) + movbeq 24(%r9), %rax + movbeq 16(%r9), %r8 + movq %rax, 32(%rdi) + movq %r8, 40(%rdi) + movbeq 8(%r9), %rax + movbeq (%r9), %r8 + movq %rax, 48(%rdi) + movq %r8, 56(%rdi) + addq $64, %rdi + subq $64, %rcx +L_3072_from_bin_64_end: + cmpq $63, %rcx + jg L_3072_from_bin_64_start + jmp L_3072_from_bin_8_end +L_3072_from_bin_8_start: + subq $8, %r9 + movbeq (%r9), %rax + movq %rax, (%rdi) + addq $8, %rdi + subq $8, %rcx +L_3072_from_bin_8_end: + cmpq $7, %rcx + jg L_3072_from_bin_8_start + cmpq %r11, %rcx + je L_3072_from_bin_hi_end + movq %r11, %r8 + movq %r11, %rax +L_3072_from_bin_hi_start: + movb (%rdx), %al + shlq $8, %r8 + incq %rdx + addq %rax, %r8 + decq %rcx + jg L_3072_from_bin_hi_start + movq %r8, (%rdi) + addq $8, %rdi +L_3072_from_bin_hi_end: + cmpq %r10, %rdi + je L_3072_from_bin_zero_end +L_3072_from_bin_zero_start: + movq %r11, (%rdi) + addq $8, %rdi + cmpq %r10, %rdi + jl L_3072_from_bin_zero_start +L_3072_from_bin_zero_end: + repz retq +#ifndef __APPLE__ +.size sp_3072_from_bin,.-sp_3072_from_bin +#endif /* __APPLE__ */ +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 384 + * + * r A single precision integer. + * a Byte array. + */ +#ifndef __APPLE__ +.globl sp_3072_to_bin +.type sp_3072_to_bin,@function +.align 16 +sp_3072_to_bin: +#else +.globl _sp_3072_to_bin +.p2align 4 +_sp_3072_to_bin: +#endif /* __APPLE__ */ + movbeq 376(%rdi), %rdx + movbeq 368(%rdi), %rax + movq %rdx, (%rsi) + movq %rax, 8(%rsi) + movbeq 360(%rdi), %rdx + movbeq 352(%rdi), %rax + movq %rdx, 16(%rsi) + movq %rax, 24(%rsi) + movbeq 344(%rdi), %rdx + movbeq 336(%rdi), %rax + movq %rdx, 32(%rsi) + movq %rax, 40(%rsi) + movbeq 328(%rdi), %rdx + movbeq 320(%rdi), %rax + movq %rdx, 48(%rsi) + movq %rax, 56(%rsi) + movbeq 312(%rdi), %rdx + movbeq 304(%rdi), %rax + movq %rdx, 64(%rsi) + movq %rax, 72(%rsi) + movbeq 296(%rdi), %rdx + movbeq 288(%rdi), %rax + movq %rdx, 80(%rsi) + movq %rax, 88(%rsi) + movbeq 280(%rdi), %rdx + movbeq 272(%rdi), %rax + movq %rdx, 96(%rsi) + movq %rax, 104(%rsi) + movbeq 264(%rdi), %rdx + movbeq 256(%rdi), %rax + movq %rdx, 112(%rsi) + movq %rax, 120(%rsi) + movbeq 248(%rdi), %rdx + movbeq 240(%rdi), %rax + movq %rdx, 128(%rsi) + movq %rax, 136(%rsi) + movbeq 232(%rdi), %rdx + movbeq 224(%rdi), %rax + movq %rdx, 144(%rsi) + movq %rax, 152(%rsi) + movbeq 216(%rdi), %rdx + movbeq 208(%rdi), %rax + movq %rdx, 160(%rsi) + movq %rax, 168(%rsi) + movbeq 200(%rdi), %rdx + movbeq 192(%rdi), %rax + movq %rdx, 176(%rsi) + movq %rax, 184(%rsi) + movbeq 184(%rdi), %rdx + movbeq 176(%rdi), %rax + movq %rdx, 192(%rsi) + movq %rax, 200(%rsi) + movbeq 168(%rdi), %rdx + movbeq 160(%rdi), %rax + movq %rdx, 208(%rsi) + movq %rax, 216(%rsi) + movbeq 152(%rdi), %rdx + movbeq 144(%rdi), %rax + movq %rdx, 224(%rsi) + movq %rax, 232(%rsi) + movbeq 136(%rdi), %rdx + movbeq 128(%rdi), %rax + movq %rdx, 240(%rsi) + movq %rax, 248(%rsi) + movbeq 120(%rdi), %rdx + movbeq 112(%rdi), %rax + movq %rdx, 256(%rsi) + movq %rax, 264(%rsi) + movbeq 104(%rdi), %rdx + movbeq 96(%rdi), %rax + movq %rdx, 272(%rsi) + movq %rax, 280(%rsi) + movbeq 88(%rdi), %rdx + movbeq 80(%rdi), %rax + movq %rdx, 288(%rsi) + movq %rax, 296(%rsi) + movbeq 72(%rdi), %rdx + movbeq 64(%rdi), %rax + movq %rdx, 304(%rsi) + movq %rax, 312(%rsi) + movbeq 56(%rdi), %rdx + movbeq 48(%rdi), %rax + movq %rdx, 320(%rsi) + movq %rax, 328(%rsi) + movbeq 40(%rdi), %rdx + movbeq 32(%rdi), %rax + movq %rdx, 336(%rsi) + movq %rax, 344(%rsi) + movbeq 24(%rdi), %rdx + movbeq 16(%rdi), %rax + movq %rdx, 352(%rsi) + movq %rax, 360(%rsi) + movbeq 8(%rdi), %rdx + movbeq (%rdi), %rax + movq %rdx, 368(%rsi) + movq %rax, 376(%rsi) + repz retq +#ifndef __APPLE__ +.size sp_3072_to_bin,.-sp_3072_to_bin +#endif /* __APPLE__ */ +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_3072_mul_12 +.type sp_3072_mul_12,@function +.align 16 +sp_3072_mul_12: +#else +.globl _sp_3072_mul_12 +.p2align 4 +_sp_3072_mul_12: +#endif /* __APPLE__ */ + movq %rdx, %rcx + subq $96, %rsp + # A[0] * B[0] + movq (%rcx), %rax + mulq (%rsi) + xorq %r10, %r10 + movq %rax, (%rsp) + movq %rdx, %r9 + # A[0] * B[1] + movq 8(%rcx), %rax + mulq (%rsi) + xorq %r8, %r8 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[1] * B[0] + movq (%rcx), %rax + mulq 8(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + movq %r9, 8(%rsp) + # A[0] * B[2] + movq 16(%rcx), %rax + mulq (%rsi) + xorq %r9, %r9 + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[1] * B[1] + movq 8(%rcx), %rax + mulq 8(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[2] * B[0] + movq (%rcx), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + movq %r10, 16(%rsp) + # A[0] * B[3] + movq 24(%rcx), %rax + mulq (%rsi) + xorq %r10, %r10 + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[1] * B[2] + movq 16(%rcx), %rax + mulq 8(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[2] * B[1] + movq 8(%rcx), %rax + mulq 16(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[3] * B[0] + movq (%rcx), %rax + mulq 24(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + movq %r8, 24(%rsp) + # A[0] * B[4] + movq 32(%rcx), %rax + mulq (%rsi) + xorq %r8, %r8 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[1] * B[3] + movq 24(%rcx), %rax + mulq 8(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[2] * B[2] + movq 16(%rcx), %rax + mulq 16(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[3] * B[1] + movq 8(%rcx), %rax + mulq 24(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[4] * B[0] + movq (%rcx), %rax + mulq 32(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + movq %r9, 32(%rsp) + # A[0] * B[5] + movq 40(%rcx), %rax + mulq (%rsi) + xorq %r9, %r9 + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[1] * B[4] + movq 32(%rcx), %rax + mulq 8(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[2] * B[3] + movq 24(%rcx), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[3] * B[2] + movq 16(%rcx), %rax + mulq 24(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[4] * B[1] + movq 8(%rcx), %rax + mulq 32(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[5] * B[0] + movq (%rcx), %rax + mulq 40(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + movq %r10, 40(%rsp) + # A[0] * B[6] + movq 48(%rcx), %rax + mulq (%rsi) + xorq %r10, %r10 + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[1] * B[5] + movq 40(%rcx), %rax + mulq 8(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[2] * B[4] + movq 32(%rcx), %rax + mulq 16(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[3] * B[3] + movq 24(%rcx), %rax + mulq 24(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[4] * B[2] + movq 16(%rcx), %rax + mulq 32(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[5] * B[1] + movq 8(%rcx), %rax + mulq 40(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[6] * B[0] + movq (%rcx), %rax + mulq 48(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + movq %r8, 48(%rsp) + # A[0] * B[7] + movq 56(%rcx), %rax + mulq (%rsi) + xorq %r8, %r8 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[1] * B[6] + movq 48(%rcx), %rax + mulq 8(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[2] * B[5] + movq 40(%rcx), %rax + mulq 16(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[3] * B[4] + movq 32(%rcx), %rax + mulq 24(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[4] * B[3] + movq 24(%rcx), %rax + mulq 32(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[5] * B[2] + movq 16(%rcx), %rax + mulq 40(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[6] * B[1] + movq 8(%rcx), %rax + mulq 48(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[7] * B[0] + movq (%rcx), %rax + mulq 56(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + movq %r9, 56(%rsp) + # A[0] * B[8] + movq 64(%rcx), %rax + mulq (%rsi) + xorq %r9, %r9 + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[1] * B[7] + movq 56(%rcx), %rax + mulq 8(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[2] * B[6] + movq 48(%rcx), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[3] * B[5] + movq 40(%rcx), %rax + mulq 24(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[4] * B[4] + movq 32(%rcx), %rax + mulq 32(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[5] * B[3] + movq 24(%rcx), %rax + mulq 40(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[6] * B[2] + movq 16(%rcx), %rax + mulq 48(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[7] * B[1] + movq 8(%rcx), %rax + mulq 56(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[8] * B[0] + movq (%rcx), %rax + mulq 64(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + movq %r10, 64(%rsp) + # A[0] * B[9] + movq 72(%rcx), %rax + mulq (%rsi) + xorq %r10, %r10 + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[1] * B[8] + movq 64(%rcx), %rax + mulq 8(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[2] * B[7] + movq 56(%rcx), %rax + mulq 16(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[3] * B[6] + movq 48(%rcx), %rax + mulq 24(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[4] * B[5] + movq 40(%rcx), %rax + mulq 32(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[5] * B[4] + movq 32(%rcx), %rax + mulq 40(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[6] * B[3] + movq 24(%rcx), %rax + mulq 48(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[7] * B[2] + movq 16(%rcx), %rax + mulq 56(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[8] * B[1] + movq 8(%rcx), %rax + mulq 64(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[9] * B[0] + movq (%rcx), %rax + mulq 72(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + movq %r8, 72(%rsp) + # A[0] * B[10] + movq 80(%rcx), %rax + mulq (%rsi) + xorq %r8, %r8 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[1] * B[9] + movq 72(%rcx), %rax + mulq 8(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[2] * B[8] + movq 64(%rcx), %rax + mulq 16(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[3] * B[7] + movq 56(%rcx), %rax + mulq 24(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[4] * B[6] + movq 48(%rcx), %rax + mulq 32(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[5] * B[5] + movq 40(%rcx), %rax + mulq 40(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[6] * B[4] + movq 32(%rcx), %rax + mulq 48(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[7] * B[3] + movq 24(%rcx), %rax + mulq 56(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[8] * B[2] + movq 16(%rcx), %rax + mulq 64(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[9] * B[1] + movq 8(%rcx), %rax + mulq 72(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[10] * B[0] + movq (%rcx), %rax + mulq 80(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + movq %r9, 80(%rsp) + # A[0] * B[11] + movq 88(%rcx), %rax + mulq (%rsi) + xorq %r9, %r9 + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[1] * B[10] + movq 80(%rcx), %rax + mulq 8(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[2] * B[9] + movq 72(%rcx), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[3] * B[8] + movq 64(%rcx), %rax + mulq 24(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[4] * B[7] + movq 56(%rcx), %rax + mulq 32(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[5] * B[6] + movq 48(%rcx), %rax + mulq 40(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[6] * B[5] + movq 40(%rcx), %rax + mulq 48(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[7] * B[4] + movq 32(%rcx), %rax + mulq 56(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[8] * B[3] + movq 24(%rcx), %rax + mulq 64(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[9] * B[2] + movq 16(%rcx), %rax + mulq 72(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[10] * B[1] + movq 8(%rcx), %rax + mulq 80(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[11] * B[0] + movq (%rcx), %rax + mulq 88(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + movq %r10, 88(%rsp) + # A[1] * B[11] + movq 88(%rcx), %rax + mulq 8(%rsi) + xorq %r10, %r10 + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[2] * B[10] + movq 80(%rcx), %rax + mulq 16(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[3] * B[9] + movq 72(%rcx), %rax + mulq 24(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[4] * B[8] + movq 64(%rcx), %rax + mulq 32(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[5] * B[7] + movq 56(%rcx), %rax + mulq 40(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[6] * B[6] + movq 48(%rcx), %rax + mulq 48(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[7] * B[5] + movq 40(%rcx), %rax + mulq 56(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[8] * B[4] + movq 32(%rcx), %rax + mulq 64(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[9] * B[3] + movq 24(%rcx), %rax + mulq 72(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[10] * B[2] + movq 16(%rcx), %rax + mulq 80(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[11] * B[1] + movq 8(%rcx), %rax + mulq 88(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + movq %r8, 96(%rdi) + # A[2] * B[11] + movq 88(%rcx), %rax + mulq 16(%rsi) + xorq %r8, %r8 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[3] * B[10] + movq 80(%rcx), %rax + mulq 24(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[4] * B[9] + movq 72(%rcx), %rax + mulq 32(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[5] * B[8] + movq 64(%rcx), %rax + mulq 40(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[6] * B[7] + movq 56(%rcx), %rax + mulq 48(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[7] * B[6] + movq 48(%rcx), %rax + mulq 56(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[8] * B[5] + movq 40(%rcx), %rax + mulq 64(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[9] * B[4] + movq 32(%rcx), %rax + mulq 72(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[10] * B[3] + movq 24(%rcx), %rax + mulq 80(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[11] * B[2] + movq 16(%rcx), %rax + mulq 88(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + movq %r9, 104(%rdi) + # A[3] * B[11] + movq 88(%rcx), %rax + mulq 24(%rsi) + xorq %r9, %r9 + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[4] * B[10] + movq 80(%rcx), %rax + mulq 32(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[5] * B[9] + movq 72(%rcx), %rax + mulq 40(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[6] * B[8] + movq 64(%rcx), %rax + mulq 48(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[7] * B[7] + movq 56(%rcx), %rax + mulq 56(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[8] * B[6] + movq 48(%rcx), %rax + mulq 64(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[9] * B[5] + movq 40(%rcx), %rax + mulq 72(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[10] * B[4] + movq 32(%rcx), %rax + mulq 80(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[11] * B[3] + movq 24(%rcx), %rax + mulq 88(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + movq %r10, 112(%rdi) + # A[4] * B[11] + movq 88(%rcx), %rax + mulq 32(%rsi) + xorq %r10, %r10 + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[5] * B[10] + movq 80(%rcx), %rax + mulq 40(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[6] * B[9] + movq 72(%rcx), %rax + mulq 48(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[7] * B[8] + movq 64(%rcx), %rax + mulq 56(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[8] * B[7] + movq 56(%rcx), %rax + mulq 64(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[9] * B[6] + movq 48(%rcx), %rax + mulq 72(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[10] * B[5] + movq 40(%rcx), %rax + mulq 80(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[11] * B[4] + movq 32(%rcx), %rax + mulq 88(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + movq %r8, 120(%rdi) + # A[5] * B[11] + movq 88(%rcx), %rax + mulq 40(%rsi) + xorq %r8, %r8 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[6] * B[10] + movq 80(%rcx), %rax + mulq 48(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[7] * B[9] + movq 72(%rcx), %rax + mulq 56(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[8] * B[8] + movq 64(%rcx), %rax + mulq 64(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[9] * B[7] + movq 56(%rcx), %rax + mulq 72(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[10] * B[6] + movq 48(%rcx), %rax + mulq 80(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[11] * B[5] + movq 40(%rcx), %rax + mulq 88(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + movq %r9, 128(%rdi) + # A[6] * B[11] + movq 88(%rcx), %rax + mulq 48(%rsi) + xorq %r9, %r9 + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[7] * B[10] + movq 80(%rcx), %rax + mulq 56(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[8] * B[9] + movq 72(%rcx), %rax + mulq 64(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[9] * B[8] + movq 64(%rcx), %rax + mulq 72(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[10] * B[7] + movq 56(%rcx), %rax + mulq 80(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[11] * B[6] + movq 48(%rcx), %rax + mulq 88(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + movq %r10, 136(%rdi) + # A[7] * B[11] + movq 88(%rcx), %rax + mulq 56(%rsi) + xorq %r10, %r10 + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[8] * B[10] + movq 80(%rcx), %rax + mulq 64(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[9] * B[9] + movq 72(%rcx), %rax + mulq 72(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[10] * B[8] + movq 64(%rcx), %rax + mulq 80(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[11] * B[7] + movq 56(%rcx), %rax + mulq 88(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + movq %r8, 144(%rdi) + # A[8] * B[11] + movq 88(%rcx), %rax + mulq 64(%rsi) + xorq %r8, %r8 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[9] * B[10] + movq 80(%rcx), %rax + mulq 72(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[10] * B[9] + movq 72(%rcx), %rax + mulq 80(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[11] * B[8] + movq 64(%rcx), %rax + mulq 88(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + movq %r9, 152(%rdi) + # A[9] * B[11] + movq 88(%rcx), %rax + mulq 72(%rsi) + xorq %r9, %r9 + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[10] * B[10] + movq 80(%rcx), %rax + mulq 80(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[11] * B[9] + movq 72(%rcx), %rax + mulq 88(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + movq %r10, 160(%rdi) + # A[10] * B[11] + movq 88(%rcx), %rax + mulq 80(%rsi) + xorq %r10, %r10 + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[11] * B[10] + movq 80(%rcx), %rax + mulq 88(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + movq %r8, 168(%rdi) + # A[11] * B[11] + movq 88(%rcx), %rax + mulq 88(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + movq %r9, 176(%rdi) + movq %r10, 184(%rdi) + movq (%rsp), %rax + movq 8(%rsp), %rdx + movq 16(%rsp), %r8 + movq 24(%rsp), %r9 + movq %rax, (%rdi) + movq %rdx, 8(%rdi) + movq %r8, 16(%rdi) + movq %r9, 24(%rdi) + movq 32(%rsp), %rax + movq 40(%rsp), %rdx + movq 48(%rsp), %r8 + movq 56(%rsp), %r9 + movq %rax, 32(%rdi) + movq %rdx, 40(%rdi) + movq %r8, 48(%rdi) + movq %r9, 56(%rdi) + movq 64(%rsp), %rax + movq 72(%rsp), %rdx + movq 80(%rsp), %r8 + movq 88(%rsp), %r9 + movq %rax, 64(%rdi) + movq %rdx, 72(%rdi) + movq %r8, 80(%rdi) + movq %r9, 88(%rdi) + addq $96, %rsp + repz retq +#ifndef __APPLE__ +.size sp_3072_mul_12,.-sp_3072_mul_12 +#endif /* __APPLE__ */ +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_3072_sqr_12 +.type sp_3072_sqr_12,@function +.align 16 +sp_3072_sqr_12: +#else +.globl _sp_3072_sqr_12 +.p2align 4 +_sp_3072_sqr_12: +#endif /* __APPLE__ */ + push %r12 + subq $96, %rsp + # A[0] * A[0] + movq (%rsi), %rax + mulq %rax + xorq %r9, %r9 + movq %rax, (%rsp) + movq %rdx, %r8 + # A[0] * A[1] + movq 8(%rsi), %rax + mulq (%rsi) + xorq %rcx, %rcx + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + movq %r8, 8(%rsp) + # A[0] * A[2] + movq 16(%rsi), %rax + mulq (%rsi) + xorq %r8, %r8 + addq %rax, %r9 + adcq %rdx, %rcx + adcq $0, %r8 + addq %rax, %r9 + adcq %rdx, %rcx + adcq $0, %r8 + # A[1] * A[1] + movq 8(%rsi), %rax + mulq %rax + addq %rax, %r9 + adcq %rdx, %rcx + adcq $0, %r8 + movq %r9, 16(%rsp) + # A[0] * A[3] + movq 24(%rsi), %rax + mulq (%rsi) + xorq %r9, %r9 + addq %rax, %rcx + adcq %rdx, %r8 + adcq $0, %r9 + addq %rax, %rcx + adcq %rdx, %r8 + adcq $0, %r9 + # A[1] * A[2] + movq 16(%rsi), %rax + mulq 8(%rsi) + addq %rax, %rcx + adcq %rdx, %r8 + adcq $0, %r9 + addq %rax, %rcx + adcq %rdx, %r8 + adcq $0, %r9 + movq %rcx, 24(%rsp) + # A[0] * A[4] + movq 32(%rsi), %rax + mulq (%rsi) + xorq %rcx, %rcx + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + # A[1] * A[3] + movq 24(%rsi), %rax + mulq 8(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + # A[2] * A[2] + movq 16(%rsi), %rax + mulq %rax + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + movq %r8, 32(%rsp) + # A[0] * A[5] + movq 40(%rsi), %rax + mulq (%rsi) + xorq %r8, %r8 + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[1] * A[4] + movq 32(%rsi), %rax + mulq 8(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[2] * A[3] + movq 24(%rsi), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %r10, %r9 + adcq %r11, %rcx + adcq %r12, %r8 + movq %r9, 40(%rsp) + # A[0] * A[6] + movq 48(%rsi), %rax + mulq (%rsi) + xorq %r9, %r9 + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[1] * A[5] + movq 40(%rsi), %rax + mulq 8(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[2] * A[4] + movq 32(%rsi), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[3] * A[3] + movq 24(%rsi), %rax + mulq %rax + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %rcx + adcq %r11, %r8 + adcq %r12, %r9 + movq %rcx, 48(%rsp) + # A[0] * A[7] + movq 56(%rsi), %rax + mulq (%rsi) + xorq %rcx, %rcx + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[1] * A[6] + movq 48(%rsi), %rax + mulq 8(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[2] * A[5] + movq 40(%rsi), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[3] * A[4] + movq 32(%rsi), %rax + mulq 24(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %r10, %r8 + adcq %r11, %r9 + adcq %r12, %rcx + movq %r8, 56(%rsp) + # A[0] * A[8] + movq 64(%rsi), %rax + mulq (%rsi) + xorq %r8, %r8 + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[1] * A[7] + movq 56(%rsi), %rax + mulq 8(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[2] * A[6] + movq 48(%rsi), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[3] * A[5] + movq 40(%rsi), %rax + mulq 24(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[4] * A[4] + movq 32(%rsi), %rax + mulq %rax + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %r9 + adcq %r11, %rcx + adcq %r12, %r8 + movq %r9, 64(%rsp) + # A[0] * A[9] + movq 72(%rsi), %rax + mulq (%rsi) + xorq %r9, %r9 + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[1] * A[8] + movq 64(%rsi), %rax + mulq 8(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[2] * A[7] + movq 56(%rsi), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[3] * A[6] + movq 48(%rsi), %rax + mulq 24(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[4] * A[5] + movq 40(%rsi), %rax + mulq 32(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %r10, %rcx + adcq %r11, %r8 + adcq %r12, %r9 + movq %rcx, 72(%rsp) + # A[0] * A[10] + movq 80(%rsi), %rax + mulq (%rsi) + xorq %rcx, %rcx + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[1] * A[9] + movq 72(%rsi), %rax + mulq 8(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[2] * A[8] + movq 64(%rsi), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[3] * A[7] + movq 56(%rsi), %rax + mulq 24(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[4] * A[6] + movq 48(%rsi), %rax + mulq 32(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[5] * A[5] + movq 40(%rsi), %rax + mulq %rax + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %r8 + adcq %r11, %r9 + adcq %r12, %rcx + movq %r8, 80(%rsp) + # A[0] * A[11] + movq 88(%rsi), %rax + mulq (%rsi) + xorq %r8, %r8 + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[1] * A[10] + movq 80(%rsi), %rax + mulq 8(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[2] * A[9] + movq 72(%rsi), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[3] * A[8] + movq 64(%rsi), %rax + mulq 24(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[4] * A[7] + movq 56(%rsi), %rax + mulq 32(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[5] * A[6] + movq 48(%rsi), %rax + mulq 40(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %r10, %r9 + adcq %r11, %rcx + adcq %r12, %r8 + movq %r9, 88(%rsp) + # A[1] * A[11] + movq 88(%rsi), %rax + mulq 8(%rsi) + xorq %r9, %r9 + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[2] * A[10] + movq 80(%rsi), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[3] * A[9] + movq 72(%rsi), %rax + mulq 24(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[4] * A[8] + movq 64(%rsi), %rax + mulq 32(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[5] * A[7] + movq 56(%rsi), %rax + mulq 40(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[6] * A[6] + movq 48(%rsi), %rax + mulq %rax + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %rcx + adcq %r11, %r8 + adcq %r12, %r9 + movq %rcx, 96(%rdi) + # A[2] * A[11] + movq 88(%rsi), %rax + mulq 16(%rsi) + xorq %rcx, %rcx + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[3] * A[10] + movq 80(%rsi), %rax + mulq 24(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[4] * A[9] + movq 72(%rsi), %rax + mulq 32(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[5] * A[8] + movq 64(%rsi), %rax + mulq 40(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[6] * A[7] + movq 56(%rsi), %rax + mulq 48(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %r10, %r8 + adcq %r11, %r9 + adcq %r12, %rcx + movq %r8, 104(%rdi) + # A[3] * A[11] + movq 88(%rsi), %rax + mulq 24(%rsi) + xorq %r8, %r8 + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[4] * A[10] + movq 80(%rsi), %rax + mulq 32(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[5] * A[9] + movq 72(%rsi), %rax + mulq 40(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[6] * A[8] + movq 64(%rsi), %rax + mulq 48(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[7] * A[7] + movq 56(%rsi), %rax + mulq %rax + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %r9 + adcq %r11, %rcx + adcq %r12, %r8 + movq %r9, 112(%rdi) + # A[4] * A[11] + movq 88(%rsi), %rax + mulq 32(%rsi) + xorq %r9, %r9 + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[5] * A[10] + movq 80(%rsi), %rax + mulq 40(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[6] * A[9] + movq 72(%rsi), %rax + mulq 48(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[7] * A[8] + movq 64(%rsi), %rax + mulq 56(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %r10, %rcx + adcq %r11, %r8 + adcq %r12, %r9 + movq %rcx, 120(%rdi) + # A[5] * A[11] + movq 88(%rsi), %rax + mulq 40(%rsi) + xorq %rcx, %rcx + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[6] * A[10] + movq 80(%rsi), %rax + mulq 48(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[7] * A[9] + movq 72(%rsi), %rax + mulq 56(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[8] * A[8] + movq 64(%rsi), %rax + mulq %rax + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %r8 + adcq %r11, %r9 + adcq %r12, %rcx + movq %r8, 128(%rdi) + # A[6] * A[11] + movq 88(%rsi), %rax + mulq 48(%rsi) + xorq %r8, %r8 + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[7] * A[10] + movq 80(%rsi), %rax + mulq 56(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[8] * A[9] + movq 72(%rsi), %rax + mulq 64(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %r10, %r9 + adcq %r11, %rcx + adcq %r12, %r8 + movq %r9, 136(%rdi) + # A[7] * A[11] + movq 88(%rsi), %rax + mulq 56(%rsi) + xorq %r9, %r9 + addq %rax, %rcx + adcq %rdx, %r8 + adcq $0, %r9 + addq %rax, %rcx + adcq %rdx, %r8 + adcq $0, %r9 + # A[8] * A[10] + movq 80(%rsi), %rax + mulq 64(%rsi) + addq %rax, %rcx + adcq %rdx, %r8 + adcq $0, %r9 + addq %rax, %rcx + adcq %rdx, %r8 + adcq $0, %r9 + # A[9] * A[9] + movq 72(%rsi), %rax + mulq %rax + addq %rax, %rcx + adcq %rdx, %r8 + adcq $0, %r9 + movq %rcx, 144(%rdi) + # A[8] * A[11] + movq 88(%rsi), %rax + mulq 64(%rsi) + xorq %rcx, %rcx + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + # A[9] * A[10] + movq 80(%rsi), %rax + mulq 72(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + movq %r8, 152(%rdi) + # A[9] * A[11] + movq 88(%rsi), %rax + mulq 72(%rsi) + xorq %r8, %r8 + addq %rax, %r9 + adcq %rdx, %rcx + adcq $0, %r8 + addq %rax, %r9 + adcq %rdx, %rcx + adcq $0, %r8 + # A[10] * A[10] + movq 80(%rsi), %rax + mulq %rax + addq %rax, %r9 + adcq %rdx, %rcx + adcq $0, %r8 + movq %r9, 160(%rdi) + # A[10] * A[11] + movq 88(%rsi), %rax + mulq 80(%rsi) + xorq %r9, %r9 + addq %rax, %rcx + adcq %rdx, %r8 + adcq $0, %r9 + addq %rax, %rcx + adcq %rdx, %r8 + adcq $0, %r9 + movq %rcx, 168(%rdi) + # A[11] * A[11] + movq 88(%rsi), %rax + mulq %rax + addq %rax, %r8 + adcq %rdx, %r9 + movq %r8, 176(%rdi) + movq %r9, 184(%rdi) + movq (%rsp), %rax + movq 8(%rsp), %rdx + movq 16(%rsp), %r10 + movq 24(%rsp), %r11 + movq %rax, (%rdi) + movq %rdx, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 32(%rsp), %rax + movq 40(%rsp), %rdx + movq 48(%rsp), %r10 + movq 56(%rsp), %r11 + movq %rax, 32(%rdi) + movq %rdx, 40(%rdi) + movq %r10, 48(%rdi) + movq %r11, 56(%rdi) + movq 64(%rsp), %rax + movq 72(%rsp), %rdx + movq 80(%rsp), %r10 + movq 88(%rsp), %r11 + movq %rax, 64(%rdi) + movq %rdx, 72(%rdi) + movq %r10, 80(%rdi) + movq %r11, 88(%rdi) + addq $96, %rsp + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_3072_sqr_12,.-sp_3072_sqr_12 +#endif /* __APPLE__ */ +#ifdef HAVE_INTEL_AVX2 +/* Multiply a and b into r. (r = a * b) + * + * r Result of multiplication. + * a First number to multiply. + * b Second number to multiply. + */ +#ifndef __APPLE__ +.globl sp_3072_mul_avx2_12 +.type sp_3072_mul_avx2_12,@function +.align 16 +sp_3072_mul_avx2_12: +#else +.globl _sp_3072_mul_avx2_12 +.p2align 4 +_sp_3072_mul_avx2_12: +#endif /* __APPLE__ */ + push %rbx + push %rbp + push %r12 + movq %rdx, %rbp + subq $96, %rsp + cmpq %rdi, %rsi + movq %rsp, %rbx + cmovne %rdi, %rbx + cmpq %rdi, %rbp + cmove %rsp, %rbx + xorq %r12, %r12 + movq (%rsi), %rdx + # A[0] * B[0] + mulx (%rbp), %r8, %r9 + # A[0] * B[1] + mulx 8(%rbp), %rax, %r10 + movq %r8, (%rbx) + adcxq %rax, %r9 + movq %r9, 8(%rbx) + # A[0] * B[2] + mulx 16(%rbp), %rax, %r8 + adcxq %rax, %r10 + # A[0] * B[3] + mulx 24(%rbp), %rax, %r9 + movq %r10, 16(%rbx) + adcxq %rax, %r8 + movq %r8, 24(%rbx) + # A[0] * B[4] + mulx 32(%rbp), %rax, %r10 + adcxq %rax, %r9 + # A[0] * B[5] + mulx 40(%rbp), %rax, %r8 + movq %r9, 32(%rbx) + adcxq %rax, %r10 + movq %r10, 40(%rbx) + # A[0] * B[6] + mulx 48(%rbp), %rax, %r9 + adcxq %rax, %r8 + # A[0] * B[7] + mulx 56(%rbp), %rax, %r10 + movq %r8, 48(%rbx) + adcxq %rax, %r9 + movq %r9, 56(%rbx) + # A[0] * B[8] + mulx 64(%rbp), %rax, %r8 + adcxq %rax, %r10 + # A[0] * B[9] + mulx 72(%rbp), %rax, %r9 + movq %r10, 64(%rbx) + adcxq %rax, %r8 + movq %r8, 72(%rbx) + # A[0] * B[10] + mulx 80(%rbp), %rax, %r10 + adcxq %rax, %r9 + # A[0] * B[11] + mulx 88(%rbp), %rax, %r8 + movq %r9, 80(%rbx) + adcxq %rax, %r10 + adcxq %r12, %r8 + movq %r12, %r11 + adcxq %r12, %r11 + movq %r10, 88(%rbx) + movq %r8, 96(%rdi) + movq 8(%rsi), %rdx + movq 8(%rbx), %r9 + movq 16(%rbx), %r10 + movq 24(%rbx), %r8 + # A[1] * B[0] + mulx (%rbp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[1] * B[1] + mulx 8(%rbp), %rax, %rcx + movq %r9, 8(%rbx) + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r10, 16(%rbx) + movq 32(%rbx), %r9 + movq 40(%rbx), %r10 + # A[1] * B[2] + mulx 16(%rbp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[1] * B[3] + mulx 24(%rbp), %rax, %rcx + movq %r8, 24(%rbx) + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r9, 32(%rbx) + movq 48(%rbx), %r8 + movq 56(%rbx), %r9 + # A[1] * B[4] + mulx 32(%rbp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + # A[1] * B[5] + mulx 40(%rbp), %rax, %rcx + movq %r10, 40(%rbx) + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 48(%rbx) + movq 64(%rbx), %r10 + movq 72(%rbx), %r8 + # A[1] * B[6] + mulx 48(%rbp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[1] * B[7] + mulx 56(%rbp), %rax, %rcx + movq %r9, 56(%rbx) + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r10, 64(%rbx) + movq 80(%rbx), %r9 + movq 88(%rbx), %r10 + # A[1] * B[8] + mulx 64(%rbp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[1] * B[9] + mulx 72(%rbp), %rax, %rcx + movq %r8, 72(%rbx) + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r9, 80(%rbx) + movq 96(%rdi), %r8 + # A[1] * B[10] + mulx 80(%rbp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + # A[1] * B[11] + mulx 88(%rbp), %rax, %rcx + movq %r10, 88(%rbx) + movq %r12, %r9 + adcxq %rax, %r8 + adoxq %rcx, %r9 + adcxq %r11, %r9 + movq %r12, %r11 + adoxq %r12, %r11 + adcxq %r12, %r11 + movq %r8, 96(%rdi) + movq %r9, 104(%rdi) + movq 16(%rsi), %rdx + movq 16(%rbx), %r10 + movq 24(%rbx), %r8 + movq 32(%rbx), %r9 + # A[2] * B[0] + mulx (%rbp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + # A[2] * B[1] + mulx 8(%rbp), %rax, %rcx + movq %r10, 16(%rbx) + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 24(%rbx) + movq 40(%rbx), %r10 + movq 48(%rbx), %r8 + # A[2] * B[2] + mulx 16(%rbp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[2] * B[3] + mulx 24(%rbp), %rax, %rcx + movq %r9, 32(%rbx) + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r10, 40(%rbx) + movq 56(%rbx), %r9 + movq 64(%rbx), %r10 + # A[2] * B[4] + mulx 32(%rbp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[2] * B[5] + mulx 40(%rbp), %rax, %rcx + movq %r8, 48(%rbx) + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r9, 56(%rbx) + movq 72(%rbx), %r8 + movq 80(%rbx), %r9 + # A[2] * B[6] + mulx 48(%rbp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + # A[2] * B[7] + mulx 56(%rbp), %rax, %rcx + movq %r10, 64(%rbx) + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 72(%rbx) + movq 88(%rbx), %r10 + movq 96(%rdi), %r8 + # A[2] * B[8] + mulx 64(%rbp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[2] * B[9] + mulx 72(%rbp), %rax, %rcx + movq %r9, 80(%rbx) + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r10, 88(%rbx) + movq 104(%rdi), %r9 + # A[2] * B[10] + mulx 80(%rbp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[2] * B[11] + mulx 88(%rbp), %rax, %rcx + movq %r8, 96(%rdi) + movq %r12, %r10 + adcxq %rax, %r9 + adoxq %rcx, %r10 + adcxq %r11, %r10 + movq %r12, %r11 + adoxq %r12, %r11 + adcxq %r12, %r11 + movq %r9, 104(%rdi) + movq %r10, 112(%rdi) + movq 24(%rsi), %rdx + movq 24(%rbx), %r8 + movq 32(%rbx), %r9 + movq 40(%rbx), %r10 + # A[3] * B[0] + mulx (%rbp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[3] * B[1] + mulx 8(%rbp), %rax, %rcx + movq %r8, 24(%rbx) + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r9, 32(%rbx) + movq 48(%rbx), %r8 + movq 56(%rbx), %r9 + # A[3] * B[2] + mulx 16(%rbp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + # A[3] * B[3] + mulx 24(%rbp), %rax, %rcx + movq %r10, 40(%rbx) + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 48(%rbx) + movq 64(%rbx), %r10 + movq 72(%rbx), %r8 + # A[3] * B[4] + mulx 32(%rbp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[3] * B[5] + mulx 40(%rbp), %rax, %rcx + movq %r9, 56(%rbx) + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r10, 64(%rbx) + movq 80(%rbx), %r9 + movq 88(%rbx), %r10 + # A[3] * B[6] + mulx 48(%rbp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[3] * B[7] + mulx 56(%rbp), %rax, %rcx + movq %r8, 72(%rbx) + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r9, 80(%rbx) + movq 96(%rdi), %r8 + movq 104(%rdi), %r9 + # A[3] * B[8] + mulx 64(%rbp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + # A[3] * B[9] + mulx 72(%rbp), %rax, %rcx + movq %r10, 88(%rbx) + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 96(%rdi) + movq 112(%rdi), %r10 + # A[3] * B[10] + mulx 80(%rbp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[3] * B[11] + mulx 88(%rbp), %rax, %rcx + movq %r9, 104(%rdi) + movq %r12, %r8 + adcxq %rax, %r10 + adoxq %rcx, %r8 + adcxq %r11, %r8 + movq %r12, %r11 + adoxq %r12, %r11 + adcxq %r12, %r11 + movq %r10, 112(%rdi) + movq %r8, 120(%rdi) + movq 32(%rsi), %rdx + movq 32(%rbx), %r9 + movq 40(%rbx), %r10 + movq 48(%rbx), %r8 + # A[4] * B[0] + mulx (%rbp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[4] * B[1] + mulx 8(%rbp), %rax, %rcx + movq %r9, 32(%rbx) + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r10, 40(%rbx) + movq 56(%rbx), %r9 + movq 64(%rbx), %r10 + # A[4] * B[2] + mulx 16(%rbp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[4] * B[3] + mulx 24(%rbp), %rax, %rcx + movq %r8, 48(%rbx) + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r9, 56(%rbx) + movq 72(%rbx), %r8 + movq 80(%rbx), %r9 + # A[4] * B[4] + mulx 32(%rbp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + # A[4] * B[5] + mulx 40(%rbp), %rax, %rcx + movq %r10, 64(%rbx) + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 72(%rbx) + movq 88(%rbx), %r10 + movq 96(%rdi), %r8 + # A[4] * B[6] + mulx 48(%rbp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[4] * B[7] + mulx 56(%rbp), %rax, %rcx + movq %r9, 80(%rbx) + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r10, 88(%rbx) + movq 104(%rdi), %r9 + movq 112(%rdi), %r10 + # A[4] * B[8] + mulx 64(%rbp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[4] * B[9] + mulx 72(%rbp), %rax, %rcx + movq %r8, 96(%rdi) + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r9, 104(%rdi) + movq 120(%rdi), %r8 + # A[4] * B[10] + mulx 80(%rbp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + # A[4] * B[11] + mulx 88(%rbp), %rax, %rcx + movq %r10, 112(%rdi) + movq %r12, %r9 + adcxq %rax, %r8 + adoxq %rcx, %r9 + adcxq %r11, %r9 + movq %r12, %r11 + adoxq %r12, %r11 + adcxq %r12, %r11 + movq %r8, 120(%rdi) + movq %r9, 128(%rdi) + movq 40(%rsi), %rdx + movq 40(%rbx), %r10 + movq 48(%rbx), %r8 + movq 56(%rbx), %r9 + # A[5] * B[0] + mulx (%rbp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + # A[5] * B[1] + mulx 8(%rbp), %rax, %rcx + movq %r10, 40(%rbx) + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 48(%rbx) + movq 64(%rbx), %r10 + movq 72(%rbx), %r8 + # A[5] * B[2] + mulx 16(%rbp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[5] * B[3] + mulx 24(%rbp), %rax, %rcx + movq %r9, 56(%rbx) + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r10, 64(%rbx) + movq 80(%rbx), %r9 + movq 88(%rbx), %r10 + # A[5] * B[4] + mulx 32(%rbp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[5] * B[5] + mulx 40(%rbp), %rax, %rcx + movq %r8, 72(%rbx) + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r9, 80(%rbx) + movq 96(%rdi), %r8 + movq 104(%rdi), %r9 + # A[5] * B[6] + mulx 48(%rbp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + # A[5] * B[7] + mulx 56(%rbp), %rax, %rcx + movq %r10, 88(%rbx) + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 96(%rdi) + movq 112(%rdi), %r10 + movq 120(%rdi), %r8 + # A[5] * B[8] + mulx 64(%rbp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[5] * B[9] + mulx 72(%rbp), %rax, %rcx + movq %r9, 104(%rdi) + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r10, 112(%rdi) + movq 128(%rdi), %r9 + # A[5] * B[10] + mulx 80(%rbp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[5] * B[11] + mulx 88(%rbp), %rax, %rcx + movq %r8, 120(%rdi) + movq %r12, %r10 + adcxq %rax, %r9 + adoxq %rcx, %r10 + adcxq %r11, %r10 + movq %r12, %r11 + adoxq %r12, %r11 + adcxq %r12, %r11 + movq %r9, 128(%rdi) + movq %r10, 136(%rdi) + movq 48(%rsi), %rdx + movq 48(%rbx), %r8 + movq 56(%rbx), %r9 + movq 64(%rbx), %r10 + # A[6] * B[0] + mulx (%rbp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[6] * B[1] + mulx 8(%rbp), %rax, %rcx + movq %r8, 48(%rbx) + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r9, 56(%rbx) + movq 72(%rbx), %r8 + movq 80(%rbx), %r9 + # A[6] * B[2] + mulx 16(%rbp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + # A[6] * B[3] + mulx 24(%rbp), %rax, %rcx + movq %r10, 64(%rbx) + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 72(%rbx) + movq 88(%rbx), %r10 + movq 96(%rdi), %r8 + # A[6] * B[4] + mulx 32(%rbp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[6] * B[5] + mulx 40(%rbp), %rax, %rcx + movq %r9, 80(%rbx) + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r10, 88(%rbx) + movq 104(%rdi), %r9 + movq 112(%rdi), %r10 + # A[6] * B[6] + mulx 48(%rbp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[6] * B[7] + mulx 56(%rbp), %rax, %rcx + movq %r8, 96(%rdi) + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r9, 104(%rdi) + movq 120(%rdi), %r8 + movq 128(%rdi), %r9 + # A[6] * B[8] + mulx 64(%rbp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + # A[6] * B[9] + mulx 72(%rbp), %rax, %rcx + movq %r10, 112(%rdi) + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 120(%rdi) + movq 136(%rdi), %r10 + # A[6] * B[10] + mulx 80(%rbp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[6] * B[11] + mulx 88(%rbp), %rax, %rcx + movq %r9, 128(%rdi) + movq %r12, %r8 + adcxq %rax, %r10 + adoxq %rcx, %r8 + adcxq %r11, %r8 + movq %r12, %r11 + adoxq %r12, %r11 + adcxq %r12, %r11 + movq %r10, 136(%rdi) + movq %r8, 144(%rdi) + movq 56(%rsi), %rdx + movq 56(%rbx), %r9 + movq 64(%rbx), %r10 + movq 72(%rbx), %r8 + # A[7] * B[0] + mulx (%rbp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[7] * B[1] + mulx 8(%rbp), %rax, %rcx + movq %r9, 56(%rbx) + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r10, 64(%rbx) + movq 80(%rbx), %r9 + movq 88(%rbx), %r10 + # A[7] * B[2] + mulx 16(%rbp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[7] * B[3] + mulx 24(%rbp), %rax, %rcx + movq %r8, 72(%rbx) + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r9, 80(%rbx) + movq 96(%rdi), %r8 + movq 104(%rdi), %r9 + # A[7] * B[4] + mulx 32(%rbp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + # A[7] * B[5] + mulx 40(%rbp), %rax, %rcx + movq %r10, 88(%rbx) + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 96(%rdi) + movq 112(%rdi), %r10 + movq 120(%rdi), %r8 + # A[7] * B[6] + mulx 48(%rbp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[7] * B[7] + mulx 56(%rbp), %rax, %rcx + movq %r9, 104(%rdi) + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r10, 112(%rdi) + movq 128(%rdi), %r9 + movq 136(%rdi), %r10 + # A[7] * B[8] + mulx 64(%rbp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[7] * B[9] + mulx 72(%rbp), %rax, %rcx + movq %r8, 120(%rdi) + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r9, 128(%rdi) + movq 144(%rdi), %r8 + # A[7] * B[10] + mulx 80(%rbp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + # A[7] * B[11] + mulx 88(%rbp), %rax, %rcx + movq %r10, 136(%rdi) + movq %r12, %r9 + adcxq %rax, %r8 + adoxq %rcx, %r9 + adcxq %r11, %r9 + movq %r12, %r11 + adoxq %r12, %r11 + adcxq %r12, %r11 + movq %r8, 144(%rdi) + movq %r9, 152(%rdi) + movq 64(%rsi), %rdx + movq 64(%rbx), %r10 + movq 72(%rbx), %r8 + movq 80(%rbx), %r9 + # A[8] * B[0] + mulx (%rbp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + # A[8] * B[1] + mulx 8(%rbp), %rax, %rcx + movq %r10, 64(%rbx) + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 72(%rbx) + movq 88(%rbx), %r10 + movq 96(%rdi), %r8 + # A[8] * B[2] + mulx 16(%rbp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[8] * B[3] + mulx 24(%rbp), %rax, %rcx + movq %r9, 80(%rbx) + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r10, 88(%rbx) + movq 104(%rdi), %r9 + movq 112(%rdi), %r10 + # A[8] * B[4] + mulx 32(%rbp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[8] * B[5] + mulx 40(%rbp), %rax, %rcx + movq %r8, 96(%rdi) + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r9, 104(%rdi) + movq 120(%rdi), %r8 + movq 128(%rdi), %r9 + # A[8] * B[6] + mulx 48(%rbp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + # A[8] * B[7] + mulx 56(%rbp), %rax, %rcx + movq %r10, 112(%rdi) + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 120(%rdi) + movq 136(%rdi), %r10 + movq 144(%rdi), %r8 + # A[8] * B[8] + mulx 64(%rbp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[8] * B[9] + mulx 72(%rbp), %rax, %rcx + movq %r9, 128(%rdi) + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r10, 136(%rdi) + movq 152(%rdi), %r9 + # A[8] * B[10] + mulx 80(%rbp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[8] * B[11] + mulx 88(%rbp), %rax, %rcx + movq %r8, 144(%rdi) + movq %r12, %r10 + adcxq %rax, %r9 + adoxq %rcx, %r10 + adcxq %r11, %r10 + movq %r12, %r11 + adoxq %r12, %r11 + adcxq %r12, %r11 + movq %r9, 152(%rdi) + movq %r10, 160(%rdi) + movq 72(%rsi), %rdx + movq 72(%rbx), %r8 + movq 80(%rbx), %r9 + movq 88(%rbx), %r10 + # A[9] * B[0] + mulx (%rbp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[9] * B[1] + mulx 8(%rbp), %rax, %rcx + movq %r8, 72(%rbx) + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r9, 80(%rbx) + movq 96(%rdi), %r8 + movq 104(%rdi), %r9 + # A[9] * B[2] + mulx 16(%rbp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + # A[9] * B[3] + mulx 24(%rbp), %rax, %rcx + movq %r10, 88(%rbx) + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 96(%rdi) + movq 112(%rdi), %r10 + movq 120(%rdi), %r8 + # A[9] * B[4] + mulx 32(%rbp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[9] * B[5] + mulx 40(%rbp), %rax, %rcx + movq %r9, 104(%rdi) + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r10, 112(%rdi) + movq 128(%rdi), %r9 + movq 136(%rdi), %r10 + # A[9] * B[6] + mulx 48(%rbp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[9] * B[7] + mulx 56(%rbp), %rax, %rcx + movq %r8, 120(%rdi) + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r9, 128(%rdi) + movq 144(%rdi), %r8 + movq 152(%rdi), %r9 + # A[9] * B[8] + mulx 64(%rbp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + # A[9] * B[9] + mulx 72(%rbp), %rax, %rcx + movq %r10, 136(%rdi) + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 144(%rdi) + movq 160(%rdi), %r10 + # A[9] * B[10] + mulx 80(%rbp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[9] * B[11] + mulx 88(%rbp), %rax, %rcx + movq %r9, 152(%rdi) + movq %r12, %r8 + adcxq %rax, %r10 + adoxq %rcx, %r8 + adcxq %r11, %r8 + movq %r12, %r11 + adoxq %r12, %r11 + adcxq %r12, %r11 + movq %r10, 160(%rdi) + movq %r8, 168(%rdi) + movq 80(%rsi), %rdx + movq 80(%rbx), %r9 + movq 88(%rbx), %r10 + movq 96(%rdi), %r8 + # A[10] * B[0] + mulx (%rbp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[10] * B[1] + mulx 8(%rbp), %rax, %rcx + movq %r9, 80(%rbx) + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r10, 88(%rbx) + movq 104(%rdi), %r9 + movq 112(%rdi), %r10 + # A[10] * B[2] + mulx 16(%rbp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[10] * B[3] + mulx 24(%rbp), %rax, %rcx + movq %r8, 96(%rdi) + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r9, 104(%rdi) + movq 120(%rdi), %r8 + movq 128(%rdi), %r9 + # A[10] * B[4] + mulx 32(%rbp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + # A[10] * B[5] + mulx 40(%rbp), %rax, %rcx + movq %r10, 112(%rdi) + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 120(%rdi) + movq 136(%rdi), %r10 + movq 144(%rdi), %r8 + # A[10] * B[6] + mulx 48(%rbp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[10] * B[7] + mulx 56(%rbp), %rax, %rcx + movq %r9, 128(%rdi) + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r10, 136(%rdi) + movq 152(%rdi), %r9 + movq 160(%rdi), %r10 + # A[10] * B[8] + mulx 64(%rbp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[10] * B[9] + mulx 72(%rbp), %rax, %rcx + movq %r8, 144(%rdi) + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r9, 152(%rdi) + movq 168(%rdi), %r8 + # A[10] * B[10] + mulx 80(%rbp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + # A[10] * B[11] + mulx 88(%rbp), %rax, %rcx + movq %r10, 160(%rdi) + movq %r12, %r9 + adcxq %rax, %r8 + adoxq %rcx, %r9 + adcxq %r11, %r9 + movq %r12, %r11 + adoxq %r12, %r11 + adcxq %r12, %r11 + movq %r8, 168(%rdi) + movq %r9, 176(%rdi) + movq 88(%rsi), %rdx + movq 88(%rbx), %r10 + movq 96(%rdi), %r8 + movq 104(%rdi), %r9 + # A[11] * B[0] + mulx (%rbp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + # A[11] * B[1] + mulx 8(%rbp), %rax, %rcx + movq %r10, 88(%rbx) + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 96(%rdi) + movq 112(%rdi), %r10 + movq 120(%rdi), %r8 + # A[11] * B[2] + mulx 16(%rbp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[11] * B[3] + mulx 24(%rbp), %rax, %rcx + movq %r9, 104(%rdi) + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r10, 112(%rdi) + movq 128(%rdi), %r9 + movq 136(%rdi), %r10 + # A[11] * B[4] + mulx 32(%rbp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[11] * B[5] + mulx 40(%rbp), %rax, %rcx + movq %r8, 120(%rdi) + adcxq %rax, %r9 + adoxq %rcx, %r10 + movq %r9, 128(%rdi) + movq 144(%rdi), %r8 + movq 152(%rdi), %r9 + # A[11] * B[6] + mulx 48(%rbp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r8 + # A[11] * B[7] + mulx 56(%rbp), %rax, %rcx + movq %r10, 136(%rdi) + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 144(%rdi) + movq 160(%rdi), %r10 + movq 168(%rdi), %r8 + # A[11] * B[8] + mulx 64(%rbp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + # A[11] * B[9] + mulx 72(%rbp), %rax, %rcx + movq %r9, 152(%rdi) + adcxq %rax, %r10 + adoxq %rcx, %r8 + movq %r10, 160(%rdi) + movq 176(%rdi), %r9 + # A[11] * B[10] + mulx 80(%rbp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + # A[11] * B[11] + mulx 88(%rbp), %rax, %rcx + movq %r8, 168(%rdi) + movq %r12, %r10 + adcxq %rax, %r9 + adoxq %rcx, %r10 + adcxq %r11, %r10 + movq %r9, 176(%rdi) + movq %r10, 184(%rdi) + cmpq %rdi, %rsi + je L_start_3072_mul_avx2_12 + cmpq %rdi, %rbp + jne L_end_3072_mul_avx2_12 +L_start_3072_mul_avx2_12: + vmovdqu (%rbx), %xmm0 + vmovups %xmm0, (%rdi) + vmovdqu 16(%rbx), %xmm0 + vmovups %xmm0, 16(%rdi) + vmovdqu 32(%rbx), %xmm0 + vmovups %xmm0, 32(%rdi) + vmovdqu 48(%rbx), %xmm0 + vmovups %xmm0, 48(%rdi) + vmovdqu 64(%rbx), %xmm0 + vmovups %xmm0, 64(%rdi) + vmovdqu 80(%rbx), %xmm0 + vmovups %xmm0, 80(%rdi) +L_end_3072_mul_avx2_12: + addq $96, %rsp + pop %r12 + pop %rbp + pop %rbx + repz retq +#ifndef __APPLE__ +.size sp_3072_mul_avx2_12,.-sp_3072_mul_avx2_12 +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX2 */ +#ifdef HAVE_INTEL_AVX2 +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_3072_sqr_avx2_12 +.type sp_3072_sqr_avx2_12,@function +.align 16 +sp_3072_sqr_avx2_12: +#else +.globl _sp_3072_sqr_avx2_12 +.p2align 4 +_sp_3072_sqr_avx2_12: +#endif /* __APPLE__ */ + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + push %rbx + subq $96, %rsp + cmpq %rdi, %rsi + movq %rsp, %rbp + cmovne %rdi, %rbp + xorq %r10, %r10 + # Diagonal 1 + # A[1] x A[0] + movq (%rsi), %rdx + mulxq 8(%rsi), %r8, %r9 + movq %r8, 8(%rbp) + movq %r10, %r8 + # A[2] x A[0] + mulxq 16(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r8 + movq %r9, 16(%rbp) + movq %r10, %r9 + # A[3] x A[0] + mulxq 24(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 24(%rbp) + movq %r10, %r8 + # A[4] x A[0] + mulxq 32(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r8 + movq %r9, 32(%rbp) + movq %r10, %r9 + # A[5] x A[0] + mulxq 40(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 40(%rbp) + movq %r10, %r8 + # A[6] x A[0] + mulxq 48(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r8 + movq %r9, 48(%rbp) + movq %r10, %r9 + # A[7] x A[0] + mulxq 56(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, %r12 + movq %r10, %r8 + # A[8] x A[0] + mulxq 64(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r8 + movq %r9, %r13 + movq %r10, %r9 + # A[9] x A[0] + mulxq 72(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, %r14 + movq %r10, %r8 + # A[10] x A[0] + mulxq 80(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r8 + movq %r9, %r15 + movq %r10, %r9 + # A[11] x A[0] + mulxq 88(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, %rbx + # Carry + adcxq %r10, %r9 + movq %r10, %r11 + adcxq %r10, %r11 + adoxq %r10, %r11 + movq %r9, 96(%rdi) + # Diagonal 2 + movq 24(%rbp), %r9 + movq 32(%rbp), %r8 + # A[2] x A[1] + movq 8(%rsi), %rdx + mulxq 16(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r8 + movq %r9, 24(%rbp) + movq 40(%rbp), %r9 + # A[3] x A[1] + mulxq 24(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 32(%rbp) + movq 48(%rbp), %r8 + # A[4] x A[1] + mulxq 32(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r8 + movq %r9, 40(%rbp) + # No load %r12 - %r9 + # A[5] x A[1] + mulxq 40(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r12 + movq %r8, 48(%rbp) + # No load %r13 - %r8 + # A[6] x A[1] + mulxq 48(%rsi), %rax, %rcx + adcxq %rax, %r12 + adoxq %rcx, %r13 + # No store %r12 + # No load %r14 - %r9 + # A[7] x A[1] + mulxq 56(%rsi), %rax, %rcx + adcxq %rax, %r13 + adoxq %rcx, %r14 + # No store %r13 + # No load %r15 - %r8 + # A[8] x A[1] + mulxq 64(%rsi), %rax, %rcx + adcxq %rax, %r14 + adoxq %rcx, %r15 + # No store %r14 + # No load %rbx - %r9 + # A[9] x A[1] + mulxq 72(%rsi), %rax, %rcx + adcxq %rax, %r15 + adoxq %rcx, %rbx + # No store %r15 + movq 96(%rdi), %r8 + # A[10] x A[1] + mulxq 80(%rsi), %rax, %rcx + adcxq %rax, %rbx + adoxq %rcx, %r8 + # No store %rbx + movq %r10, %r9 + # A[11] x A[1] + mulxq 88(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 96(%rdi) + movq %r10, %r8 + # A[11] x A[2] + movq 16(%rsi), %rdx + mulxq 88(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r8 + movq %r9, 104(%rdi) + # Carry + adcxq %r11, %r8 + movq %r10, %r11 + adcxq %r10, %r11 + adoxq %r10, %r11 + movq %r8, 112(%rdi) + # Diagonal 3 + movq 40(%rbp), %r8 + movq 48(%rbp), %r9 + # A[3] x A[2] + mulxq 24(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 40(%rbp) + # No load %r12 - %r8 + # A[4] x A[2] + mulxq 32(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r12 + movq %r9, 48(%rbp) + # No load %r13 - %r9 + # A[5] x A[2] + mulxq 40(%rsi), %rax, %rcx + adcxq %rax, %r12 + adoxq %rcx, %r13 + # No store %r12 + # No load %r14 - %r8 + # A[6] x A[2] + mulxq 48(%rsi), %rax, %rcx + adcxq %rax, %r13 + adoxq %rcx, %r14 + # No store %r13 + # No load %r15 - %r9 + # A[7] x A[2] + mulxq 56(%rsi), %rax, %rcx + adcxq %rax, %r14 + adoxq %rcx, %r15 + # No store %r14 + # No load %rbx - %r8 + # A[8] x A[2] + mulxq 64(%rsi), %rax, %rcx + adcxq %rax, %r15 + adoxq %rcx, %rbx + # No store %r15 + movq 96(%rdi), %r9 + # A[9] x A[2] + mulxq 72(%rsi), %rax, %rcx + adcxq %rax, %rbx + adoxq %rcx, %r9 + # No store %rbx + movq 104(%rdi), %r8 + # A[10] x A[2] + mulxq 80(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r8 + movq %r9, 96(%rdi) + movq 112(%rdi), %r9 + # A[10] x A[3] + movq 80(%rsi), %rdx + mulxq 24(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 104(%rdi) + movq %r10, %r8 + # A[10] x A[4] + mulxq 32(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r8 + movq %r9, 112(%rdi) + movq %r10, %r9 + # A[10] x A[5] + mulxq 40(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 120(%rdi) + # Carry + adcxq %r11, %r9 + movq %r10, %r11 + adcxq %r10, %r11 + adoxq %r10, %r11 + movq %r9, 128(%rdi) + # Diagonal 4 + # No load %r12 - %r9 + # No load %r13 - %r8 + # A[4] x A[3] + movq 24(%rsi), %rdx + mulxq 32(%rsi), %rax, %rcx + adcxq %rax, %r12 + adoxq %rcx, %r13 + # No store %r12 + # No load %r14 - %r9 + # A[5] x A[3] + mulxq 40(%rsi), %rax, %rcx + adcxq %rax, %r13 + adoxq %rcx, %r14 + # No store %r13 + # No load %r15 - %r8 + # A[6] x A[3] + mulxq 48(%rsi), %rax, %rcx + adcxq %rax, %r14 + adoxq %rcx, %r15 + # No store %r14 + # No load %rbx - %r9 + # A[7] x A[3] + mulxq 56(%rsi), %rax, %rcx + adcxq %rax, %r15 + adoxq %rcx, %rbx + # No store %r15 + movq 96(%rdi), %r8 + # A[8] x A[3] + mulxq 64(%rsi), %rax, %rcx + adcxq %rax, %rbx + adoxq %rcx, %r8 + # No store %rbx + movq 104(%rdi), %r9 + # A[9] x A[3] + mulxq 72(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 96(%rdi) + movq 112(%rdi), %r8 + # A[9] x A[4] + movq 72(%rsi), %rdx + mulxq 32(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r8 + movq %r9, 104(%rdi) + movq 120(%rdi), %r9 + # A[9] x A[5] + mulxq 40(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 112(%rdi) + movq 128(%rdi), %r8 + # A[9] x A[6] + mulxq 48(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r8 + movq %r9, 120(%rdi) + movq %r10, %r9 + # A[9] x A[7] + mulxq 56(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 128(%rdi) + movq %r10, %r8 + # A[9] x A[8] + mulxq 64(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r8 + movq %r9, 136(%rdi) + # Carry + adcxq %r11, %r8 + movq %r10, %r11 + adcxq %r10, %r11 + adoxq %r10, %r11 + movq %r8, 144(%rdi) + # Diagonal 5 + # No load %r14 - %r8 + # No load %r15 - %r9 + # A[5] x A[4] + movq 32(%rsi), %rdx + mulxq 40(%rsi), %rax, %rcx + adcxq %rax, %r14 + adoxq %rcx, %r15 + # No store %r14 + # No load %rbx - %r8 + # A[6] x A[4] + mulxq 48(%rsi), %rax, %rcx + adcxq %rax, %r15 + adoxq %rcx, %rbx + # No store %r15 + movq 96(%rdi), %r9 + # A[7] x A[4] + mulxq 56(%rsi), %rax, %rcx + adcxq %rax, %rbx + adoxq %rcx, %r9 + # No store %rbx + movq 104(%rdi), %r8 + # A[8] x A[4] + mulxq 64(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r8 + movq %r9, 96(%rdi) + movq 112(%rdi), %r9 + # A[8] x A[5] + movq 64(%rsi), %rdx + mulxq 40(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 104(%rdi) + movq 120(%rdi), %r8 + # A[8] x A[6] + mulxq 48(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r8 + movq %r9, 112(%rdi) + movq 128(%rdi), %r9 + # A[8] x A[7] + mulxq 56(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 120(%rdi) + movq 136(%rdi), %r8 + # A[10] x A[6] + movq 80(%rsi), %rdx + mulxq 48(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r8 + movq %r9, 128(%rdi) + movq 144(%rdi), %r9 + # A[10] x A[7] + mulxq 56(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 136(%rdi) + movq %r10, %r8 + # A[10] x A[8] + mulxq 64(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r8 + movq %r9, 144(%rdi) + movq %r10, %r9 + # A[10] x A[9] + mulxq 72(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 152(%rdi) + # Carry + adcxq %r11, %r9 + movq %r10, %r11 + adcxq %r10, %r11 + adoxq %r10, %r11 + movq %r9, 160(%rdi) + # Diagonal 6 + # No load %rbx - %r9 + movq 96(%rdi), %r8 + # A[6] x A[5] + movq 40(%rsi), %rdx + mulxq 48(%rsi), %rax, %rcx + adcxq %rax, %rbx + adoxq %rcx, %r8 + # No store %rbx + movq 104(%rdi), %r9 + # A[7] x A[5] + mulxq 56(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 96(%rdi) + movq 112(%rdi), %r8 + # A[7] x A[6] + movq 48(%rsi), %rdx + mulxq 56(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r8 + movq %r9, 104(%rdi) + movq 120(%rdi), %r9 + # A[11] x A[3] + movq 88(%rsi), %rdx + mulxq 24(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 112(%rdi) + movq 128(%rdi), %r8 + # A[11] x A[4] + mulxq 32(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r8 + movq %r9, 120(%rdi) + movq 136(%rdi), %r9 + # A[11] x A[5] + mulxq 40(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 128(%rdi) + movq 144(%rdi), %r8 + # A[11] x A[6] + mulxq 48(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r8 + movq %r9, 136(%rdi) + movq 152(%rdi), %r9 + # A[11] x A[7] + mulxq 56(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 144(%rdi) + movq 160(%rdi), %r8 + # A[11] x A[8] + mulxq 64(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r8 + movq %r9, 152(%rdi) + movq %r10, %r9 + # A[11] x A[9] + mulxq 72(%rsi), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movq %r8, 160(%rdi) + movq %r10, %r8 + # A[11] x A[10] + mulxq 80(%rsi), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r8 + movq %r9, 168(%rdi) + # Carry + adcxq %r11, %r8 + movq %r10, %r11 + adcxq %r10, %r11 + adoxq %r10, %r11 + movq %r8, 176(%rdi) + movq %r11, 184(%rdi) + # Double and Add in A[i] x A[i] + movq 8(%rbp), %r9 + # A[0] x A[0] + movq (%rsi), %rdx + mulxq %rdx, %rax, %rcx + movq %rax, (%rbp) + adoxq %r9, %r9 + adcxq %rcx, %r9 + movq %r9, 8(%rbp) + movq 16(%rbp), %r8 + movq 24(%rbp), %r9 + # A[1] x A[1] + movq 8(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adoxq %r8, %r8 + adoxq %r9, %r9 + adcxq %rax, %r8 + adcxq %rcx, %r9 + movq %r8, 16(%rbp) + movq %r9, 24(%rbp) + movq 32(%rbp), %r8 + movq 40(%rbp), %r9 + # A[2] x A[2] + movq 16(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adoxq %r8, %r8 + adoxq %r9, %r9 + adcxq %rax, %r8 + adcxq %rcx, %r9 + movq %r8, 32(%rbp) + movq %r9, 40(%rbp) + movq 48(%rbp), %r8 + # A[3] x A[3] + movq 24(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adoxq %r8, %r8 + adoxq %r12, %r12 + adcxq %rax, %r8 + adcxq %rcx, %r12 + movq %r8, 48(%rbp) + # A[4] x A[4] + movq 32(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adoxq %r13, %r13 + adoxq %r14, %r14 + adcxq %rax, %r13 + adcxq %rcx, %r14 + # A[5] x A[5] + movq 40(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adoxq %r15, %r15 + adoxq %rbx, %rbx + adcxq %rax, %r15 + adcxq %rcx, %rbx + movq 96(%rdi), %r8 + movq 104(%rdi), %r9 + # A[6] x A[6] + movq 48(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adoxq %r8, %r8 + adoxq %r9, %r9 + adcxq %rax, %r8 + adcxq %rcx, %r9 + movq %r8, 96(%rdi) + movq %r9, 104(%rdi) + movq 112(%rdi), %r8 + movq 120(%rdi), %r9 + # A[7] x A[7] + movq 56(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adoxq %r8, %r8 + adoxq %r9, %r9 + adcxq %rax, %r8 + adcxq %rcx, %r9 + movq %r8, 112(%rdi) + movq %r9, 120(%rdi) + movq 128(%rdi), %r8 + movq 136(%rdi), %r9 + # A[8] x A[8] + movq 64(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adoxq %r8, %r8 + adoxq %r9, %r9 + adcxq %rax, %r8 + adcxq %rcx, %r9 + movq %r8, 128(%rdi) + movq %r9, 136(%rdi) + movq 144(%rdi), %r8 + movq 152(%rdi), %r9 + # A[9] x A[9] + movq 72(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adoxq %r8, %r8 + adoxq %r9, %r9 + adcxq %rax, %r8 + adcxq %rcx, %r9 + movq %r8, 144(%rdi) + movq %r9, 152(%rdi) + movq 160(%rdi), %r8 + movq 168(%rdi), %r9 + # A[10] x A[10] + movq 80(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adoxq %r8, %r8 + adoxq %r9, %r9 + adcxq %rax, %r8 + adcxq %rcx, %r9 + movq %r8, 160(%rdi) + movq %r9, 168(%rdi) + movq 176(%rdi), %r8 + movq 184(%rdi), %r9 + # A[11] x A[11] + movq 88(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adoxq %r8, %r8 + adoxq %r9, %r9 + adcxq %rax, %r8 + adcxq %rcx, %r9 + movq %r8, 176(%rdi) + movq %r9, 184(%rdi) + movq %r12, 56(%rdi) + movq %r13, 64(%rdi) + movq %r14, 72(%rdi) + movq %r15, 80(%rdi) + movq %rbx, 88(%rdi) + cmpq %rdi, %rsi + jne L_end_3072_sqr_avx2_12 + vmovdqu (%rbp), %xmm0 + vmovups %xmm0, (%rdi) + vmovdqu 16(%rbp), %xmm0 + vmovups %xmm0, 16(%rdi) + vmovdqu 32(%rbp), %xmm0 + vmovups %xmm0, 32(%rdi) + movq 48(%rbp), %rax + movq %rax, 48(%rdi) +L_end_3072_sqr_avx2_12: + addq $96, %rsp + pop %rbx + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + repz retq +#ifndef __APPLE__ +.size sp_3072_sqr_avx2_12,.-sp_3072_sqr_avx2_12 +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX2 */ +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_3072_add_12 +.type sp_3072_add_12,@function +.align 16 +sp_3072_add_12: +#else +.globl _sp_3072_add_12 +.p2align 4 +_sp_3072_add_12: +#endif /* __APPLE__ */ + # Add + movq (%rsi), %rcx + xorq %rax, %rax + addq (%rdx), %rcx + movq 8(%rsi), %r8 + movq %rcx, (%rdi) + adcq 8(%rdx), %r8 + movq 16(%rsi), %rcx + movq %r8, 8(%rdi) + adcq 16(%rdx), %rcx + movq 24(%rsi), %r8 + movq %rcx, 16(%rdi) + adcq 24(%rdx), %r8 + movq 32(%rsi), %rcx + movq %r8, 24(%rdi) + adcq 32(%rdx), %rcx + movq 40(%rsi), %r8 + movq %rcx, 32(%rdi) + adcq 40(%rdx), %r8 + movq 48(%rsi), %rcx + movq %r8, 40(%rdi) + adcq 48(%rdx), %rcx + movq 56(%rsi), %r8 + movq %rcx, 48(%rdi) + adcq 56(%rdx), %r8 + movq 64(%rsi), %rcx + movq %r8, 56(%rdi) + adcq 64(%rdx), %rcx + movq 72(%rsi), %r8 + movq %rcx, 64(%rdi) + adcq 72(%rdx), %r8 + movq 80(%rsi), %rcx + movq %r8, 72(%rdi) + adcq 80(%rdx), %rcx + movq 88(%rsi), %r8 + movq %rcx, 80(%rdi) + adcq 88(%rdx), %r8 + movq %r8, 88(%rdi) + adcq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_3072_add_12,.-sp_3072_add_12 +#endif /* __APPLE__ */ +/* Sub b from a into a. (a -= b) + * + * a A single precision integer and result. + * b A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_3072_sub_in_place_24 +.type sp_3072_sub_in_place_24,@function +.align 16 +sp_3072_sub_in_place_24: +#else +.globl _sp_3072_sub_in_place_24 +.p2align 4 +_sp_3072_sub_in_place_24: +#endif /* __APPLE__ */ + movq (%rdi), %rdx + xorq %rax, %rax + subq (%rsi), %rdx + movq 8(%rdi), %rcx + movq %rdx, (%rdi) + sbbq 8(%rsi), %rcx + movq 16(%rdi), %rdx + movq %rcx, 8(%rdi) + sbbq 16(%rsi), %rdx + movq 24(%rdi), %rcx + movq %rdx, 16(%rdi) + sbbq 24(%rsi), %rcx + movq 32(%rdi), %rdx + movq %rcx, 24(%rdi) + sbbq 32(%rsi), %rdx + movq 40(%rdi), %rcx + movq %rdx, 32(%rdi) + sbbq 40(%rsi), %rcx + movq 48(%rdi), %rdx + movq %rcx, 40(%rdi) + sbbq 48(%rsi), %rdx + movq 56(%rdi), %rcx + movq %rdx, 48(%rdi) + sbbq 56(%rsi), %rcx + movq 64(%rdi), %rdx + movq %rcx, 56(%rdi) + sbbq 64(%rsi), %rdx + movq 72(%rdi), %rcx + movq %rdx, 64(%rdi) + sbbq 72(%rsi), %rcx + movq 80(%rdi), %rdx + movq %rcx, 72(%rdi) + sbbq 80(%rsi), %rdx + movq 88(%rdi), %rcx + movq %rdx, 80(%rdi) + sbbq 88(%rsi), %rcx + movq 96(%rdi), %rdx + movq %rcx, 88(%rdi) + sbbq 96(%rsi), %rdx + movq 104(%rdi), %rcx + movq %rdx, 96(%rdi) + sbbq 104(%rsi), %rcx + movq 112(%rdi), %rdx + movq %rcx, 104(%rdi) + sbbq 112(%rsi), %rdx + movq 120(%rdi), %rcx + movq %rdx, 112(%rdi) + sbbq 120(%rsi), %rcx + movq 128(%rdi), %rdx + movq %rcx, 120(%rdi) + sbbq 128(%rsi), %rdx + movq 136(%rdi), %rcx + movq %rdx, 128(%rdi) + sbbq 136(%rsi), %rcx + movq 144(%rdi), %rdx + movq %rcx, 136(%rdi) + sbbq 144(%rsi), %rdx + movq 152(%rdi), %rcx + movq %rdx, 144(%rdi) + sbbq 152(%rsi), %rcx + movq 160(%rdi), %rdx + movq %rcx, 152(%rdi) + sbbq 160(%rsi), %rdx + movq 168(%rdi), %rcx + movq %rdx, 160(%rdi) + sbbq 168(%rsi), %rcx + movq 176(%rdi), %rdx + movq %rcx, 168(%rdi) + sbbq 176(%rsi), %rdx + movq 184(%rdi), %rcx + movq %rdx, 176(%rdi) + sbbq 184(%rsi), %rcx + movq %rcx, 184(%rdi) + sbbq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_3072_sub_in_place_24,.-sp_3072_sub_in_place_24 +#endif /* __APPLE__ */ +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_3072_add_24 +.type sp_3072_add_24,@function +.align 16 +sp_3072_add_24: +#else +.globl _sp_3072_add_24 +.p2align 4 +_sp_3072_add_24: +#endif /* __APPLE__ */ + # Add + movq (%rsi), %rcx + xorq %rax, %rax + addq (%rdx), %rcx + movq 8(%rsi), %r8 + movq %rcx, (%rdi) + adcq 8(%rdx), %r8 + movq 16(%rsi), %rcx + movq %r8, 8(%rdi) + adcq 16(%rdx), %rcx + movq 24(%rsi), %r8 + movq %rcx, 16(%rdi) + adcq 24(%rdx), %r8 + movq 32(%rsi), %rcx + movq %r8, 24(%rdi) + adcq 32(%rdx), %rcx + movq 40(%rsi), %r8 + movq %rcx, 32(%rdi) + adcq 40(%rdx), %r8 + movq 48(%rsi), %rcx + movq %r8, 40(%rdi) + adcq 48(%rdx), %rcx + movq 56(%rsi), %r8 + movq %rcx, 48(%rdi) + adcq 56(%rdx), %r8 + movq 64(%rsi), %rcx + movq %r8, 56(%rdi) + adcq 64(%rdx), %rcx + movq 72(%rsi), %r8 + movq %rcx, 64(%rdi) + adcq 72(%rdx), %r8 + movq 80(%rsi), %rcx + movq %r8, 72(%rdi) + adcq 80(%rdx), %rcx + movq 88(%rsi), %r8 + movq %rcx, 80(%rdi) + adcq 88(%rdx), %r8 + movq 96(%rsi), %rcx + movq %r8, 88(%rdi) + adcq 96(%rdx), %rcx + movq 104(%rsi), %r8 + movq %rcx, 96(%rdi) + adcq 104(%rdx), %r8 + movq 112(%rsi), %rcx + movq %r8, 104(%rdi) + adcq 112(%rdx), %rcx + movq 120(%rsi), %r8 + movq %rcx, 112(%rdi) + adcq 120(%rdx), %r8 + movq 128(%rsi), %rcx + movq %r8, 120(%rdi) + adcq 128(%rdx), %rcx + movq 136(%rsi), %r8 + movq %rcx, 128(%rdi) + adcq 136(%rdx), %r8 + movq 144(%rsi), %rcx + movq %r8, 136(%rdi) + adcq 144(%rdx), %rcx + movq 152(%rsi), %r8 + movq %rcx, 144(%rdi) + adcq 152(%rdx), %r8 + movq 160(%rsi), %rcx + movq %r8, 152(%rdi) + adcq 160(%rdx), %rcx + movq 168(%rsi), %r8 + movq %rcx, 160(%rdi) + adcq 168(%rdx), %r8 + movq 176(%rsi), %rcx + movq %r8, 168(%rdi) + adcq 176(%rdx), %rcx + movq 184(%rsi), %r8 + movq %rcx, 176(%rdi) + adcq 184(%rdx), %r8 + movq %r8, 184(%rdi) + adcq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_3072_add_24,.-sp_3072_add_24 +#endif /* __APPLE__ */ +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_3072_mul_24 +.type sp_3072_mul_24,@function +.align 16 +sp_3072_mul_24: +#else +.globl _sp_3072_mul_24 +.p2align 4 +_sp_3072_mul_24: +#endif /* __APPLE__ */ + push %r12 + push %r13 + push %r14 + push %r15 + subq $616, %rsp + movq %rdi, 576(%rsp) + movq %rsi, 584(%rsp) + movq %rdx, 592(%rsp) + leaq 384(%rsp), %r10 + leaq 96(%rsi), %r12 + # Add + movq (%rsi), %rax + xorq %r13, %r13 + addq (%r12), %rax + movq 8(%rsi), %rcx + movq %rax, (%r10) + adcq 8(%r12), %rcx + movq 16(%rsi), %r8 + movq %rcx, 8(%r10) + adcq 16(%r12), %r8 + movq 24(%rsi), %rax + movq %r8, 16(%r10) + adcq 24(%r12), %rax + movq 32(%rsi), %rcx + movq %rax, 24(%r10) + adcq 32(%r12), %rcx + movq 40(%rsi), %r8 + movq %rcx, 32(%r10) + adcq 40(%r12), %r8 + movq 48(%rsi), %rax + movq %r8, 40(%r10) + adcq 48(%r12), %rax + movq 56(%rsi), %rcx + movq %rax, 48(%r10) + adcq 56(%r12), %rcx + movq 64(%rsi), %r8 + movq %rcx, 56(%r10) + adcq 64(%r12), %r8 + movq 72(%rsi), %rax + movq %r8, 64(%r10) + adcq 72(%r12), %rax + movq 80(%rsi), %rcx + movq %rax, 72(%r10) + adcq 80(%r12), %rcx + movq 88(%rsi), %r8 + movq %rcx, 80(%r10) + adcq 88(%r12), %r8 + movq %r8, 88(%r10) + adcq $0, %r13 + movq %r13, 600(%rsp) + leaq 480(%rsp), %r11 + leaq 96(%rdx), %r12 + # Add + movq (%rdx), %rax + xorq %r14, %r14 + addq (%r12), %rax + movq 8(%rdx), %rcx + movq %rax, (%r11) + adcq 8(%r12), %rcx + movq 16(%rdx), %r8 + movq %rcx, 8(%r11) + adcq 16(%r12), %r8 + movq 24(%rdx), %rax + movq %r8, 16(%r11) + adcq 24(%r12), %rax + movq 32(%rdx), %rcx + movq %rax, 24(%r11) + adcq 32(%r12), %rcx + movq 40(%rdx), %r8 + movq %rcx, 32(%r11) + adcq 40(%r12), %r8 + movq 48(%rdx), %rax + movq %r8, 40(%r11) + adcq 48(%r12), %rax + movq 56(%rdx), %rcx + movq %rax, 48(%r11) + adcq 56(%r12), %rcx + movq 64(%rdx), %r8 + movq %rcx, 56(%r11) + adcq 64(%r12), %r8 + movq 72(%rdx), %rax + movq %r8, 64(%r11) + adcq 72(%r12), %rax + movq 80(%rdx), %rcx + movq %rax, 72(%r11) + adcq 80(%r12), %rcx + movq 88(%rdx), %r8 + movq %rcx, 80(%r11) + adcq 88(%r12), %r8 + movq %r8, 88(%r11) + adcq $0, %r14 + movq %r14, 608(%rsp) + movq %r11, %rdx + movq %r10, %rsi + movq %rsp, %rdi +#ifndef __APPLE__ + callq sp_3072_mul_12@plt +#else + callq _sp_3072_mul_12 +#endif /* __APPLE__ */ + movq 592(%rsp), %rdx + movq 584(%rsp), %rsi + leaq 192(%rsp), %rdi + addq $96, %rdx + addq $96, %rsi +#ifndef __APPLE__ + callq sp_3072_mul_12@plt +#else + callq _sp_3072_mul_12 +#endif /* __APPLE__ */ + movq 592(%rsp), %rdx + movq 584(%rsp), %rsi + movq 576(%rsp), %rdi +#ifndef __APPLE__ + callq sp_3072_mul_12@plt +#else + callq _sp_3072_mul_12 +#endif /* __APPLE__ */ + movq 600(%rsp), %r13 + movq 608(%rsp), %r14 + movq 576(%rsp), %r15 + movq %r13, %r9 + leaq 384(%rsp), %r10 + leaq 480(%rsp), %r11 + andq %r14, %r9 + negq %r13 + negq %r14 + addq $192, %r15 + movq (%r10), %rax + movq (%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, (%r10) + movq %rcx, (%r11) + movq 8(%r10), %rax + movq 8(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 8(%r10) + movq %rcx, 8(%r11) + movq 16(%r10), %rax + movq 16(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 16(%r10) + movq %rcx, 16(%r11) + movq 24(%r10), %rax + movq 24(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 24(%r10) + movq %rcx, 24(%r11) + movq 32(%r10), %rax + movq 32(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 32(%r10) + movq %rcx, 32(%r11) + movq 40(%r10), %rax + movq 40(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 40(%r10) + movq %rcx, 40(%r11) + movq 48(%r10), %rax + movq 48(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 48(%r10) + movq %rcx, 48(%r11) + movq 56(%r10), %rax + movq 56(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 56(%r10) + movq %rcx, 56(%r11) + movq 64(%r10), %rax + movq 64(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 64(%r10) + movq %rcx, 64(%r11) + movq 72(%r10), %rax + movq 72(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 72(%r10) + movq %rcx, 72(%r11) + movq 80(%r10), %rax + movq 80(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 80(%r10) + movq %rcx, 80(%r11) + movq 88(%r10), %rax + movq 88(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 88(%r10) + movq %rcx, 88(%r11) + movq (%r10), %rax + addq (%r11), %rax + movq 8(%r10), %rcx + movq %rax, (%r15) + adcq 8(%r11), %rcx + movq 16(%r10), %r8 + movq %rcx, 8(%r15) + adcq 16(%r11), %r8 + movq 24(%r10), %rax + movq %r8, 16(%r15) + adcq 24(%r11), %rax + movq 32(%r10), %rcx + movq %rax, 24(%r15) + adcq 32(%r11), %rcx + movq 40(%r10), %r8 + movq %rcx, 32(%r15) + adcq 40(%r11), %r8 + movq 48(%r10), %rax + movq %r8, 40(%r15) + adcq 48(%r11), %rax + movq 56(%r10), %rcx + movq %rax, 48(%r15) + adcq 56(%r11), %rcx + movq 64(%r10), %r8 + movq %rcx, 56(%r15) + adcq 64(%r11), %r8 + movq 72(%r10), %rax + movq %r8, 64(%r15) + adcq 72(%r11), %rax + movq 80(%r10), %rcx + movq %rax, 72(%r15) + adcq 80(%r11), %rcx + movq 88(%r10), %r8 + movq %rcx, 80(%r15) + adcq 88(%r11), %r8 + movq %r8, 88(%r15) + adcq $0, %r9 + leaq 192(%rsp), %r11 + movq %rsp, %r10 + movq (%r10), %rax + subq (%r11), %rax + movq 8(%r10), %rcx + movq %rax, (%r10) + sbbq 8(%r11), %rcx + movq 16(%r10), %r8 + movq %rcx, 8(%r10) + sbbq 16(%r11), %r8 + movq 24(%r10), %rax + movq %r8, 16(%r10) + sbbq 24(%r11), %rax + movq 32(%r10), %rcx + movq %rax, 24(%r10) + sbbq 32(%r11), %rcx + movq 40(%r10), %r8 + movq %rcx, 32(%r10) + sbbq 40(%r11), %r8 + movq 48(%r10), %rax + movq %r8, 40(%r10) + sbbq 48(%r11), %rax + movq 56(%r10), %rcx + movq %rax, 48(%r10) + sbbq 56(%r11), %rcx + movq 64(%r10), %r8 + movq %rcx, 56(%r10) + sbbq 64(%r11), %r8 + movq 72(%r10), %rax + movq %r8, 64(%r10) + sbbq 72(%r11), %rax + movq 80(%r10), %rcx + movq %rax, 72(%r10) + sbbq 80(%r11), %rcx + movq 88(%r10), %r8 + movq %rcx, 80(%r10) + sbbq 88(%r11), %r8 + movq 96(%r10), %rax + movq %r8, 88(%r10) + sbbq 96(%r11), %rax + movq 104(%r10), %rcx + movq %rax, 96(%r10) + sbbq 104(%r11), %rcx + movq 112(%r10), %r8 + movq %rcx, 104(%r10) + sbbq 112(%r11), %r8 + movq 120(%r10), %rax + movq %r8, 112(%r10) + sbbq 120(%r11), %rax + movq 128(%r10), %rcx + movq %rax, 120(%r10) + sbbq 128(%r11), %rcx + movq 136(%r10), %r8 + movq %rcx, 128(%r10) + sbbq 136(%r11), %r8 + movq 144(%r10), %rax + movq %r8, 136(%r10) + sbbq 144(%r11), %rax + movq 152(%r10), %rcx + movq %rax, 144(%r10) + sbbq 152(%r11), %rcx + movq 160(%r10), %r8 + movq %rcx, 152(%r10) + sbbq 160(%r11), %r8 + movq 168(%r10), %rax + movq %r8, 160(%r10) + sbbq 168(%r11), %rax + movq 176(%r10), %rcx + movq %rax, 168(%r10) + sbbq 176(%r11), %rcx + movq 184(%r10), %r8 + movq %rcx, 176(%r10) + sbbq 184(%r11), %r8 + movq %r8, 184(%r10) + sbbq $0, %r9 + movq (%r10), %rax + subq (%rdi), %rax + movq 8(%r10), %rcx + movq %rax, (%r10) + sbbq 8(%rdi), %rcx + movq 16(%r10), %r8 + movq %rcx, 8(%r10) + sbbq 16(%rdi), %r8 + movq 24(%r10), %rax + movq %r8, 16(%r10) + sbbq 24(%rdi), %rax + movq 32(%r10), %rcx + movq %rax, 24(%r10) + sbbq 32(%rdi), %rcx + movq 40(%r10), %r8 + movq %rcx, 32(%r10) + sbbq 40(%rdi), %r8 + movq 48(%r10), %rax + movq %r8, 40(%r10) + sbbq 48(%rdi), %rax + movq 56(%r10), %rcx + movq %rax, 48(%r10) + sbbq 56(%rdi), %rcx + movq 64(%r10), %r8 + movq %rcx, 56(%r10) + sbbq 64(%rdi), %r8 + movq 72(%r10), %rax + movq %r8, 64(%r10) + sbbq 72(%rdi), %rax + movq 80(%r10), %rcx + movq %rax, 72(%r10) + sbbq 80(%rdi), %rcx + movq 88(%r10), %r8 + movq %rcx, 80(%r10) + sbbq 88(%rdi), %r8 + movq 96(%r10), %rax + movq %r8, 88(%r10) + sbbq 96(%rdi), %rax + movq 104(%r10), %rcx + movq %rax, 96(%r10) + sbbq 104(%rdi), %rcx + movq 112(%r10), %r8 + movq %rcx, 104(%r10) + sbbq 112(%rdi), %r8 + movq 120(%r10), %rax + movq %r8, 112(%r10) + sbbq 120(%rdi), %rax + movq 128(%r10), %rcx + movq %rax, 120(%r10) + sbbq 128(%rdi), %rcx + movq 136(%r10), %r8 + movq %rcx, 128(%r10) + sbbq 136(%rdi), %r8 + movq 144(%r10), %rax + movq %r8, 136(%r10) + sbbq 144(%rdi), %rax + movq 152(%r10), %rcx + movq %rax, 144(%r10) + sbbq 152(%rdi), %rcx + movq 160(%r10), %r8 + movq %rcx, 152(%r10) + sbbq 160(%rdi), %r8 + movq 168(%r10), %rax + movq %r8, 160(%r10) + sbbq 168(%rdi), %rax + movq 176(%r10), %rcx + movq %rax, 168(%r10) + sbbq 176(%rdi), %rcx + movq 184(%r10), %r8 + movq %rcx, 176(%r10) + sbbq 184(%rdi), %r8 + movq %r8, 184(%r10) + sbbq $0, %r9 + subq $96, %r15 + # Add + movq (%r15), %rax + addq (%r10), %rax + movq 8(%r15), %rcx + movq %rax, (%r15) + adcq 8(%r10), %rcx + movq 16(%r15), %r8 + movq %rcx, 8(%r15) + adcq 16(%r10), %r8 + movq 24(%r15), %rax + movq %r8, 16(%r15) + adcq 24(%r10), %rax + movq 32(%r15), %rcx + movq %rax, 24(%r15) + adcq 32(%r10), %rcx + movq 40(%r15), %r8 + movq %rcx, 32(%r15) + adcq 40(%r10), %r8 + movq 48(%r15), %rax + movq %r8, 40(%r15) + adcq 48(%r10), %rax + movq 56(%r15), %rcx + movq %rax, 48(%r15) + adcq 56(%r10), %rcx + movq 64(%r15), %r8 + movq %rcx, 56(%r15) + adcq 64(%r10), %r8 + movq 72(%r15), %rax + movq %r8, 64(%r15) + adcq 72(%r10), %rax + movq 80(%r15), %rcx + movq %rax, 72(%r15) + adcq 80(%r10), %rcx + movq 88(%r15), %r8 + movq %rcx, 80(%r15) + adcq 88(%r10), %r8 + movq 96(%r15), %rax + movq %r8, 88(%r15) + adcq 96(%r10), %rax + movq 104(%r15), %rcx + movq %rax, 96(%r15) + adcq 104(%r10), %rcx + movq 112(%r15), %r8 + movq %rcx, 104(%r15) + adcq 112(%r10), %r8 + movq 120(%r15), %rax + movq %r8, 112(%r15) + adcq 120(%r10), %rax + movq 128(%r15), %rcx + movq %rax, 120(%r15) + adcq 128(%r10), %rcx + movq 136(%r15), %r8 + movq %rcx, 128(%r15) + adcq 136(%r10), %r8 + movq 144(%r15), %rax + movq %r8, 136(%r15) + adcq 144(%r10), %rax + movq 152(%r15), %rcx + movq %rax, 144(%r15) + adcq 152(%r10), %rcx + movq 160(%r15), %r8 + movq %rcx, 152(%r15) + adcq 160(%r10), %r8 + movq 168(%r15), %rax + movq %r8, 160(%r15) + adcq 168(%r10), %rax + movq 176(%r15), %rcx + movq %rax, 168(%r15) + adcq 176(%r10), %rcx + movq 184(%r15), %r8 + movq %rcx, 176(%r15) + adcq 184(%r10), %r8 + movq %r8, 184(%r15) + adcq $0, %r9 + movq %r9, 288(%rdi) + addq $96, %r15 + # Add + movq (%r15), %rax + addq (%r11), %rax + movq 8(%r15), %rcx + movq %rax, (%r15) + adcq 8(%r11), %rcx + movq 16(%r15), %r8 + movq %rcx, 8(%r15) + adcq 16(%r11), %r8 + movq 24(%r15), %rax + movq %r8, 16(%r15) + adcq 24(%r11), %rax + movq 32(%r15), %rcx + movq %rax, 24(%r15) + adcq 32(%r11), %rcx + movq 40(%r15), %r8 + movq %rcx, 32(%r15) + adcq 40(%r11), %r8 + movq 48(%r15), %rax + movq %r8, 40(%r15) + adcq 48(%r11), %rax + movq 56(%r15), %rcx + movq %rax, 48(%r15) + adcq 56(%r11), %rcx + movq 64(%r15), %r8 + movq %rcx, 56(%r15) + adcq 64(%r11), %r8 + movq 72(%r15), %rax + movq %r8, 64(%r15) + adcq 72(%r11), %rax + movq 80(%r15), %rcx + movq %rax, 72(%r15) + adcq 80(%r11), %rcx + movq 88(%r15), %r8 + movq %rcx, 80(%r15) + adcq 88(%r11), %r8 + movq 96(%r15), %rax + movq %r8, 88(%r15) + adcq 96(%r11), %rax + movq %rax, 96(%r15) + # Add to zero + movq 104(%r11), %rax + adcq $0, %rax + movq 112(%r11), %rcx + movq %rax, 104(%r15) + adcq $0, %rcx + movq 120(%r11), %r8 + movq %rcx, 112(%r15) + adcq $0, %r8 + movq 128(%r11), %rax + movq %r8, 120(%r15) + adcq $0, %rax + movq 136(%r11), %rcx + movq %rax, 128(%r15) + adcq $0, %rcx + movq 144(%r11), %r8 + movq %rcx, 136(%r15) + adcq $0, %r8 + movq 152(%r11), %rax + movq %r8, 144(%r15) + adcq $0, %rax + movq 160(%r11), %rcx + movq %rax, 152(%r15) + adcq $0, %rcx + movq 168(%r11), %r8 + movq %rcx, 160(%r15) + adcq $0, %r8 + movq 176(%r11), %rax + movq %r8, 168(%r15) + adcq $0, %rax + movq 184(%r11), %rcx + movq %rax, 176(%r15) + adcq $0, %rcx + movq %rcx, 184(%r15) + addq $616, %rsp + pop %r15 + pop %r14 + pop %r13 + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_3072_mul_24,.-sp_3072_mul_24 +#endif /* __APPLE__ */ +/* Add a to a into r. (r = a + a) + * + * r A single precision integer. + * a A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_3072_dbl_12 +.type sp_3072_dbl_12,@function +.align 16 +sp_3072_dbl_12: +#else +.globl _sp_3072_dbl_12 +.p2align 4 +_sp_3072_dbl_12: +#endif /* __APPLE__ */ + movq (%rsi), %rdx + xorq %rax, %rax + addq %rdx, %rdx + movq 8(%rsi), %rcx + movq %rdx, (%rdi) + adcq %rcx, %rcx + movq 16(%rsi), %rdx + movq %rcx, 8(%rdi) + adcq %rdx, %rdx + movq 24(%rsi), %rcx + movq %rdx, 16(%rdi) + adcq %rcx, %rcx + movq 32(%rsi), %rdx + movq %rcx, 24(%rdi) + adcq %rdx, %rdx + movq 40(%rsi), %rcx + movq %rdx, 32(%rdi) + adcq %rcx, %rcx + movq 48(%rsi), %rdx + movq %rcx, 40(%rdi) + adcq %rdx, %rdx + movq 56(%rsi), %rcx + movq %rdx, 48(%rdi) + adcq %rcx, %rcx + movq 64(%rsi), %rdx + movq %rcx, 56(%rdi) + adcq %rdx, %rdx + movq 72(%rsi), %rcx + movq %rdx, 64(%rdi) + adcq %rcx, %rcx + movq 80(%rsi), %rdx + movq %rcx, 72(%rdi) + adcq %rdx, %rdx + movq 88(%rsi), %rcx + movq %rdx, 80(%rdi) + adcq %rcx, %rcx + movq %rcx, 88(%rdi) + adcq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_3072_dbl_12,.-sp_3072_dbl_12 +#endif /* __APPLE__ */ +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_3072_sqr_24 +.type sp_3072_sqr_24,@function +.align 16 +sp_3072_sqr_24: +#else +.globl _sp_3072_sqr_24 +.p2align 4 +_sp_3072_sqr_24: +#endif /* __APPLE__ */ + subq $504, %rsp + movq %rdi, 480(%rsp) + movq %rsi, 488(%rsp) + leaq 384(%rsp), %r8 + leaq 96(%rsi), %r9 + # Add + movq (%rsi), %rdx + xorq %rcx, %rcx + addq (%r9), %rdx + movq 8(%rsi), %rax + movq %rdx, (%r8) + adcq 8(%r9), %rax + movq 16(%rsi), %rdx + movq %rax, 8(%r8) + adcq 16(%r9), %rdx + movq 24(%rsi), %rax + movq %rdx, 16(%r8) + adcq 24(%r9), %rax + movq 32(%rsi), %rdx + movq %rax, 24(%r8) + adcq 32(%r9), %rdx + movq 40(%rsi), %rax + movq %rdx, 32(%r8) + adcq 40(%r9), %rax + movq 48(%rsi), %rdx + movq %rax, 40(%r8) + adcq 48(%r9), %rdx + movq 56(%rsi), %rax + movq %rdx, 48(%r8) + adcq 56(%r9), %rax + movq 64(%rsi), %rdx + movq %rax, 56(%r8) + adcq 64(%r9), %rdx + movq 72(%rsi), %rax + movq %rdx, 64(%r8) + adcq 72(%r9), %rax + movq 80(%rsi), %rdx + movq %rax, 72(%r8) + adcq 80(%r9), %rdx + movq 88(%rsi), %rax + movq %rdx, 80(%r8) + adcq 88(%r9), %rax + movq %rax, 88(%r8) + adcq $0, %rcx + movq %rcx, 496(%rsp) + movq %r8, %rsi + movq %rsp, %rdi +#ifndef __APPLE__ + callq sp_3072_sqr_12@plt +#else + callq _sp_3072_sqr_12 +#endif /* __APPLE__ */ + movq 488(%rsp), %rsi + leaq 192(%rsp), %rdi + addq $96, %rsi +#ifndef __APPLE__ + callq sp_3072_sqr_12@plt +#else + callq _sp_3072_sqr_12 +#endif /* __APPLE__ */ + movq 488(%rsp), %rsi + movq 480(%rsp), %rdi +#ifndef __APPLE__ + callq sp_3072_sqr_12@plt +#else + callq _sp_3072_sqr_12 +#endif /* __APPLE__ */ + movq 496(%rsp), %r10 + movq %rdi, %r9 + leaq 384(%rsp), %r8 + movq %r10, %rcx + negq %r10 + addq $192, %r9 + movq (%r8), %rdx + movq 8(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, (%r9) + movq %rax, 8(%r9) + movq 16(%r8), %rdx + movq 24(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 16(%r9) + movq %rax, 24(%r9) + movq 32(%r8), %rdx + movq 40(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 32(%r9) + movq %rax, 40(%r9) + movq 48(%r8), %rdx + movq 56(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 48(%r9) + movq %rax, 56(%r9) + movq 64(%r8), %rdx + movq 72(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 64(%r9) + movq %rax, 72(%r9) + movq 80(%r8), %rdx + movq 88(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 80(%r9) + movq %rax, 88(%r9) + movq (%r9), %rdx + addq %rdx, %rdx + movq 8(%r9), %rax + movq %rdx, (%r9) + adcq %rax, %rax + movq 16(%r9), %rdx + movq %rax, 8(%r9) + adcq %rdx, %rdx + movq 24(%r9), %rax + movq %rdx, 16(%r9) + adcq %rax, %rax + movq 32(%r9), %rdx + movq %rax, 24(%r9) + adcq %rdx, %rdx + movq 40(%r9), %rax + movq %rdx, 32(%r9) + adcq %rax, %rax + movq 48(%r9), %rdx + movq %rax, 40(%r9) + adcq %rdx, %rdx + movq 56(%r9), %rax + movq %rdx, 48(%r9) + adcq %rax, %rax + movq 64(%r9), %rdx + movq %rax, 56(%r9) + adcq %rdx, %rdx + movq 72(%r9), %rax + movq %rdx, 64(%r9) + adcq %rax, %rax + movq 80(%r9), %rdx + movq %rax, 72(%r9) + adcq %rdx, %rdx + movq 88(%r9), %rax + movq %rdx, 80(%r9) + adcq %rax, %rax + movq %rax, 88(%r9) + adcq $0, %rcx + leaq 192(%rsp), %rsi + movq %rsp, %r8 + movq (%r8), %rdx + subq (%rsi), %rdx + movq 8(%r8), %rax + movq %rdx, (%r8) + sbbq 8(%rsi), %rax + movq 16(%r8), %rdx + movq %rax, 8(%r8) + sbbq 16(%rsi), %rdx + movq 24(%r8), %rax + movq %rdx, 16(%r8) + sbbq 24(%rsi), %rax + movq 32(%r8), %rdx + movq %rax, 24(%r8) + sbbq 32(%rsi), %rdx + movq 40(%r8), %rax + movq %rdx, 32(%r8) + sbbq 40(%rsi), %rax + movq 48(%r8), %rdx + movq %rax, 40(%r8) + sbbq 48(%rsi), %rdx + movq 56(%r8), %rax + movq %rdx, 48(%r8) + sbbq 56(%rsi), %rax + movq 64(%r8), %rdx + movq %rax, 56(%r8) + sbbq 64(%rsi), %rdx + movq 72(%r8), %rax + movq %rdx, 64(%r8) + sbbq 72(%rsi), %rax + movq 80(%r8), %rdx + movq %rax, 72(%r8) + sbbq 80(%rsi), %rdx + movq 88(%r8), %rax + movq %rdx, 80(%r8) + sbbq 88(%rsi), %rax + movq 96(%r8), %rdx + movq %rax, 88(%r8) + sbbq 96(%rsi), %rdx + movq 104(%r8), %rax + movq %rdx, 96(%r8) + sbbq 104(%rsi), %rax + movq 112(%r8), %rdx + movq %rax, 104(%r8) + sbbq 112(%rsi), %rdx + movq 120(%r8), %rax + movq %rdx, 112(%r8) + sbbq 120(%rsi), %rax + movq 128(%r8), %rdx + movq %rax, 120(%r8) + sbbq 128(%rsi), %rdx + movq 136(%r8), %rax + movq %rdx, 128(%r8) + sbbq 136(%rsi), %rax + movq 144(%r8), %rdx + movq %rax, 136(%r8) + sbbq 144(%rsi), %rdx + movq 152(%r8), %rax + movq %rdx, 144(%r8) + sbbq 152(%rsi), %rax + movq 160(%r8), %rdx + movq %rax, 152(%r8) + sbbq 160(%rsi), %rdx + movq 168(%r8), %rax + movq %rdx, 160(%r8) + sbbq 168(%rsi), %rax + movq 176(%r8), %rdx + movq %rax, 168(%r8) + sbbq 176(%rsi), %rdx + movq 184(%r8), %rax + movq %rdx, 176(%r8) + sbbq 184(%rsi), %rax + movq %rax, 184(%r8) + sbbq $0, %rcx + movq (%r8), %rdx + subq (%rdi), %rdx + movq 8(%r8), %rax + movq %rdx, (%r8) + sbbq 8(%rdi), %rax + movq 16(%r8), %rdx + movq %rax, 8(%r8) + sbbq 16(%rdi), %rdx + movq 24(%r8), %rax + movq %rdx, 16(%r8) + sbbq 24(%rdi), %rax + movq 32(%r8), %rdx + movq %rax, 24(%r8) + sbbq 32(%rdi), %rdx + movq 40(%r8), %rax + movq %rdx, 32(%r8) + sbbq 40(%rdi), %rax + movq 48(%r8), %rdx + movq %rax, 40(%r8) + sbbq 48(%rdi), %rdx + movq 56(%r8), %rax + movq %rdx, 48(%r8) + sbbq 56(%rdi), %rax + movq 64(%r8), %rdx + movq %rax, 56(%r8) + sbbq 64(%rdi), %rdx + movq 72(%r8), %rax + movq %rdx, 64(%r8) + sbbq 72(%rdi), %rax + movq 80(%r8), %rdx + movq %rax, 72(%r8) + sbbq 80(%rdi), %rdx + movq 88(%r8), %rax + movq %rdx, 80(%r8) + sbbq 88(%rdi), %rax + movq 96(%r8), %rdx + movq %rax, 88(%r8) + sbbq 96(%rdi), %rdx + movq 104(%r8), %rax + movq %rdx, 96(%r8) + sbbq 104(%rdi), %rax + movq 112(%r8), %rdx + movq %rax, 104(%r8) + sbbq 112(%rdi), %rdx + movq 120(%r8), %rax + movq %rdx, 112(%r8) + sbbq 120(%rdi), %rax + movq 128(%r8), %rdx + movq %rax, 120(%r8) + sbbq 128(%rdi), %rdx + movq 136(%r8), %rax + movq %rdx, 128(%r8) + sbbq 136(%rdi), %rax + movq 144(%r8), %rdx + movq %rax, 136(%r8) + sbbq 144(%rdi), %rdx + movq 152(%r8), %rax + movq %rdx, 144(%r8) + sbbq 152(%rdi), %rax + movq 160(%r8), %rdx + movq %rax, 152(%r8) + sbbq 160(%rdi), %rdx + movq 168(%r8), %rax + movq %rdx, 160(%r8) + sbbq 168(%rdi), %rax + movq 176(%r8), %rdx + movq %rax, 168(%r8) + sbbq 176(%rdi), %rdx + movq 184(%r8), %rax + movq %rdx, 176(%r8) + sbbq 184(%rdi), %rax + movq %rax, 184(%r8) + sbbq $0, %rcx + subq $96, %r9 + # Add in place + movq (%r9), %rdx + addq (%r8), %rdx + movq 8(%r9), %rax + movq %rdx, (%r9) + adcq 8(%r8), %rax + movq 16(%r9), %rdx + movq %rax, 8(%r9) + adcq 16(%r8), %rdx + movq 24(%r9), %rax + movq %rdx, 16(%r9) + adcq 24(%r8), %rax + movq 32(%r9), %rdx + movq %rax, 24(%r9) + adcq 32(%r8), %rdx + movq 40(%r9), %rax + movq %rdx, 32(%r9) + adcq 40(%r8), %rax + movq 48(%r9), %rdx + movq %rax, 40(%r9) + adcq 48(%r8), %rdx + movq 56(%r9), %rax + movq %rdx, 48(%r9) + adcq 56(%r8), %rax + movq 64(%r9), %rdx + movq %rax, 56(%r9) + adcq 64(%r8), %rdx + movq 72(%r9), %rax + movq %rdx, 64(%r9) + adcq 72(%r8), %rax + movq 80(%r9), %rdx + movq %rax, 72(%r9) + adcq 80(%r8), %rdx + movq 88(%r9), %rax + movq %rdx, 80(%r9) + adcq 88(%r8), %rax + movq 96(%r9), %rdx + movq %rax, 88(%r9) + adcq 96(%r8), %rdx + movq 104(%r9), %rax + movq %rdx, 96(%r9) + adcq 104(%r8), %rax + movq 112(%r9), %rdx + movq %rax, 104(%r9) + adcq 112(%r8), %rdx + movq 120(%r9), %rax + movq %rdx, 112(%r9) + adcq 120(%r8), %rax + movq 128(%r9), %rdx + movq %rax, 120(%r9) + adcq 128(%r8), %rdx + movq 136(%r9), %rax + movq %rdx, 128(%r9) + adcq 136(%r8), %rax + movq 144(%r9), %rdx + movq %rax, 136(%r9) + adcq 144(%r8), %rdx + movq 152(%r9), %rax + movq %rdx, 144(%r9) + adcq 152(%r8), %rax + movq 160(%r9), %rdx + movq %rax, 152(%r9) + adcq 160(%r8), %rdx + movq 168(%r9), %rax + movq %rdx, 160(%r9) + adcq 168(%r8), %rax + movq 176(%r9), %rdx + movq %rax, 168(%r9) + adcq 176(%r8), %rdx + movq 184(%r9), %rax + movq %rdx, 176(%r9) + adcq 184(%r8), %rax + movq %rax, 184(%r9) + adcq $0, %rcx + movq %rcx, 288(%rdi) + # Add in place + movq 96(%r9), %rdx + addq (%rsi), %rdx + movq 104(%r9), %rax + movq %rdx, 96(%r9) + adcq 8(%rsi), %rax + movq 112(%r9), %rdx + movq %rax, 104(%r9) + adcq 16(%rsi), %rdx + movq 120(%r9), %rax + movq %rdx, 112(%r9) + adcq 24(%rsi), %rax + movq 128(%r9), %rdx + movq %rax, 120(%r9) + adcq 32(%rsi), %rdx + movq 136(%r9), %rax + movq %rdx, 128(%r9) + adcq 40(%rsi), %rax + movq 144(%r9), %rdx + movq %rax, 136(%r9) + adcq 48(%rsi), %rdx + movq 152(%r9), %rax + movq %rdx, 144(%r9) + adcq 56(%rsi), %rax + movq 160(%r9), %rdx + movq %rax, 152(%r9) + adcq 64(%rsi), %rdx + movq 168(%r9), %rax + movq %rdx, 160(%r9) + adcq 72(%rsi), %rax + movq 176(%r9), %rdx + movq %rax, 168(%r9) + adcq 80(%rsi), %rdx + movq 184(%r9), %rax + movq %rdx, 176(%r9) + adcq 88(%rsi), %rax + movq 192(%r9), %rdx + movq %rax, 184(%r9) + adcq 96(%rsi), %rdx + movq %rdx, 192(%r9) + # Add to zero + movq 104(%rsi), %rdx + adcq $0, %rdx + movq 112(%rsi), %rax + movq %rdx, 200(%r9) + adcq $0, %rax + movq 120(%rsi), %rdx + movq %rax, 208(%r9) + adcq $0, %rdx + movq 128(%rsi), %rax + movq %rdx, 216(%r9) + adcq $0, %rax + movq 136(%rsi), %rdx + movq %rax, 224(%r9) + adcq $0, %rdx + movq 144(%rsi), %rax + movq %rdx, 232(%r9) + adcq $0, %rax + movq 152(%rsi), %rdx + movq %rax, 240(%r9) + adcq $0, %rdx + movq 160(%rsi), %rax + movq %rdx, 248(%r9) + adcq $0, %rax + movq 168(%rsi), %rdx + movq %rax, 256(%r9) + adcq $0, %rdx + movq 176(%rsi), %rax + movq %rdx, 264(%r9) + adcq $0, %rax + movq 184(%rsi), %rdx + movq %rax, 272(%r9) + adcq $0, %rdx + movq %rdx, 280(%r9) + addq $504, %rsp + repz retq +#ifndef __APPLE__ +.size sp_3072_sqr_24,.-sp_3072_sqr_24 +#endif /* __APPLE__ */ +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_3072_mul_avx2_24 +.type sp_3072_mul_avx2_24,@function +.align 16 +sp_3072_mul_avx2_24: +#else +.globl _sp_3072_mul_avx2_24 +.p2align 4 +_sp_3072_mul_avx2_24: +#endif /* __APPLE__ */ + push %r12 + push %r13 + push %r14 + push %r15 + subq $616, %rsp + movq %rdi, 576(%rsp) + movq %rsi, 584(%rsp) + movq %rdx, 592(%rsp) + leaq 384(%rsp), %r10 + leaq 96(%rsi), %r12 + # Add + movq (%rsi), %rax + xorq %r13, %r13 + addq (%r12), %rax + movq 8(%rsi), %rcx + movq %rax, (%r10) + adcq 8(%r12), %rcx + movq 16(%rsi), %r8 + movq %rcx, 8(%r10) + adcq 16(%r12), %r8 + movq 24(%rsi), %rax + movq %r8, 16(%r10) + adcq 24(%r12), %rax + movq 32(%rsi), %rcx + movq %rax, 24(%r10) + adcq 32(%r12), %rcx + movq 40(%rsi), %r8 + movq %rcx, 32(%r10) + adcq 40(%r12), %r8 + movq 48(%rsi), %rax + movq %r8, 40(%r10) + adcq 48(%r12), %rax + movq 56(%rsi), %rcx + movq %rax, 48(%r10) + adcq 56(%r12), %rcx + movq 64(%rsi), %r8 + movq %rcx, 56(%r10) + adcq 64(%r12), %r8 + movq 72(%rsi), %rax + movq %r8, 64(%r10) + adcq 72(%r12), %rax + movq 80(%rsi), %rcx + movq %rax, 72(%r10) + adcq 80(%r12), %rcx + movq 88(%rsi), %r8 + movq %rcx, 80(%r10) + adcq 88(%r12), %r8 + movq %r8, 88(%r10) + adcq $0, %r13 + movq %r13, 600(%rsp) + leaq 480(%rsp), %r11 + leaq 96(%rdx), %r12 + # Add + movq (%rdx), %rax + xorq %r14, %r14 + addq (%r12), %rax + movq 8(%rdx), %rcx + movq %rax, (%r11) + adcq 8(%r12), %rcx + movq 16(%rdx), %r8 + movq %rcx, 8(%r11) + adcq 16(%r12), %r8 + movq 24(%rdx), %rax + movq %r8, 16(%r11) + adcq 24(%r12), %rax + movq 32(%rdx), %rcx + movq %rax, 24(%r11) + adcq 32(%r12), %rcx + movq 40(%rdx), %r8 + movq %rcx, 32(%r11) + adcq 40(%r12), %r8 + movq 48(%rdx), %rax + movq %r8, 40(%r11) + adcq 48(%r12), %rax + movq 56(%rdx), %rcx + movq %rax, 48(%r11) + adcq 56(%r12), %rcx + movq 64(%rdx), %r8 + movq %rcx, 56(%r11) + adcq 64(%r12), %r8 + movq 72(%rdx), %rax + movq %r8, 64(%r11) + adcq 72(%r12), %rax + movq 80(%rdx), %rcx + movq %rax, 72(%r11) + adcq 80(%r12), %rcx + movq 88(%rdx), %r8 + movq %rcx, 80(%r11) + adcq 88(%r12), %r8 + movq %r8, 88(%r11) + adcq $0, %r14 + movq %r14, 608(%rsp) + movq %r11, %rdx + movq %r10, %rsi + movq %rsp, %rdi +#ifndef __APPLE__ + callq sp_3072_mul_avx2_12@plt +#else + callq _sp_3072_mul_avx2_12 +#endif /* __APPLE__ */ + movq 592(%rsp), %rdx + movq 584(%rsp), %rsi + leaq 192(%rsp), %rdi + addq $96, %rdx + addq $96, %rsi +#ifndef __APPLE__ + callq sp_3072_mul_avx2_12@plt +#else + callq _sp_3072_mul_avx2_12 +#endif /* __APPLE__ */ + movq 592(%rsp), %rdx + movq 584(%rsp), %rsi + movq 576(%rsp), %rdi +#ifndef __APPLE__ + callq sp_3072_mul_avx2_12@plt +#else + callq _sp_3072_mul_avx2_12 +#endif /* __APPLE__ */ + movq 600(%rsp), %r13 + movq 608(%rsp), %r14 + movq 576(%rsp), %r15 + movq %r13, %r9 + leaq 384(%rsp), %r10 + leaq 480(%rsp), %r11 + andq %r14, %r9 + negq %r13 + negq %r14 + addq $192, %r15 + movq (%r10), %rax + movq (%r11), %rcx + pextq %r14, %rax, %rax + pextq %r13, %rcx, %rcx + addq %rcx, %rax + movq 8(%r10), %rcx + movq 8(%r11), %r8 + pextq %r14, %rcx, %rcx + pextq %r13, %r8, %r8 + movq %rax, (%r15) + adcq %r8, %rcx + movq 16(%r10), %r8 + movq 16(%r11), %rax + pextq %r14, %r8, %r8 + pextq %r13, %rax, %rax + movq %rcx, 8(%r15) + adcq %rax, %r8 + movq 24(%r10), %rax + movq 24(%r11), %rcx + pextq %r14, %rax, %rax + pextq %r13, %rcx, %rcx + movq %r8, 16(%r15) + adcq %rcx, %rax + movq 32(%r10), %rcx + movq 32(%r11), %r8 + pextq %r14, %rcx, %rcx + pextq %r13, %r8, %r8 + movq %rax, 24(%r15) + adcq %r8, %rcx + movq 40(%r10), %r8 + movq 40(%r11), %rax + pextq %r14, %r8, %r8 + pextq %r13, %rax, %rax + movq %rcx, 32(%r15) + adcq %rax, %r8 + movq 48(%r10), %rax + movq 48(%r11), %rcx + pextq %r14, %rax, %rax + pextq %r13, %rcx, %rcx + movq %r8, 40(%r15) + adcq %rcx, %rax + movq 56(%r10), %rcx + movq 56(%r11), %r8 + pextq %r14, %rcx, %rcx + pextq %r13, %r8, %r8 + movq %rax, 48(%r15) + adcq %r8, %rcx + movq 64(%r10), %r8 + movq 64(%r11), %rax + pextq %r14, %r8, %r8 + pextq %r13, %rax, %rax + movq %rcx, 56(%r15) + adcq %rax, %r8 + movq 72(%r10), %rax + movq 72(%r11), %rcx + pextq %r14, %rax, %rax + pextq %r13, %rcx, %rcx + movq %r8, 64(%r15) + adcq %rcx, %rax + movq 80(%r10), %rcx + movq 80(%r11), %r8 + pextq %r14, %rcx, %rcx + pextq %r13, %r8, %r8 + movq %rax, 72(%r15) + adcq %r8, %rcx + movq 88(%r10), %r8 + movq 88(%r11), %rax + pextq %r14, %r8, %r8 + pextq %r13, %rax, %rax + movq %rcx, 80(%r15) + adcq %rax, %r8 + movq %r8, 88(%r15) + adcq $0, %r9 + leaq 192(%rsp), %r11 + movq %rsp, %r10 + movq (%r10), %rax + subq (%r11), %rax + movq 8(%r10), %rcx + movq %rax, (%r10) + sbbq 8(%r11), %rcx + movq 16(%r10), %r8 + movq %rcx, 8(%r10) + sbbq 16(%r11), %r8 + movq 24(%r10), %rax + movq %r8, 16(%r10) + sbbq 24(%r11), %rax + movq 32(%r10), %rcx + movq %rax, 24(%r10) + sbbq 32(%r11), %rcx + movq 40(%r10), %r8 + movq %rcx, 32(%r10) + sbbq 40(%r11), %r8 + movq 48(%r10), %rax + movq %r8, 40(%r10) + sbbq 48(%r11), %rax + movq 56(%r10), %rcx + movq %rax, 48(%r10) + sbbq 56(%r11), %rcx + movq 64(%r10), %r8 + movq %rcx, 56(%r10) + sbbq 64(%r11), %r8 + movq 72(%r10), %rax + movq %r8, 64(%r10) + sbbq 72(%r11), %rax + movq 80(%r10), %rcx + movq %rax, 72(%r10) + sbbq 80(%r11), %rcx + movq 88(%r10), %r8 + movq %rcx, 80(%r10) + sbbq 88(%r11), %r8 + movq 96(%r10), %rax + movq %r8, 88(%r10) + sbbq 96(%r11), %rax + movq 104(%r10), %rcx + movq %rax, 96(%r10) + sbbq 104(%r11), %rcx + movq 112(%r10), %r8 + movq %rcx, 104(%r10) + sbbq 112(%r11), %r8 + movq 120(%r10), %rax + movq %r8, 112(%r10) + sbbq 120(%r11), %rax + movq 128(%r10), %rcx + movq %rax, 120(%r10) + sbbq 128(%r11), %rcx + movq 136(%r10), %r8 + movq %rcx, 128(%r10) + sbbq 136(%r11), %r8 + movq 144(%r10), %rax + movq %r8, 136(%r10) + sbbq 144(%r11), %rax + movq 152(%r10), %rcx + movq %rax, 144(%r10) + sbbq 152(%r11), %rcx + movq 160(%r10), %r8 + movq %rcx, 152(%r10) + sbbq 160(%r11), %r8 + movq 168(%r10), %rax + movq %r8, 160(%r10) + sbbq 168(%r11), %rax + movq 176(%r10), %rcx + movq %rax, 168(%r10) + sbbq 176(%r11), %rcx + movq 184(%r10), %r8 + movq %rcx, 176(%r10) + sbbq 184(%r11), %r8 + movq %r8, 184(%r10) + sbbq $0, %r9 + movq (%r10), %rax + subq (%rdi), %rax + movq 8(%r10), %rcx + movq %rax, (%r10) + sbbq 8(%rdi), %rcx + movq 16(%r10), %r8 + movq %rcx, 8(%r10) + sbbq 16(%rdi), %r8 + movq 24(%r10), %rax + movq %r8, 16(%r10) + sbbq 24(%rdi), %rax + movq 32(%r10), %rcx + movq %rax, 24(%r10) + sbbq 32(%rdi), %rcx + movq 40(%r10), %r8 + movq %rcx, 32(%r10) + sbbq 40(%rdi), %r8 + movq 48(%r10), %rax + movq %r8, 40(%r10) + sbbq 48(%rdi), %rax + movq 56(%r10), %rcx + movq %rax, 48(%r10) + sbbq 56(%rdi), %rcx + movq 64(%r10), %r8 + movq %rcx, 56(%r10) + sbbq 64(%rdi), %r8 + movq 72(%r10), %rax + movq %r8, 64(%r10) + sbbq 72(%rdi), %rax + movq 80(%r10), %rcx + movq %rax, 72(%r10) + sbbq 80(%rdi), %rcx + movq 88(%r10), %r8 + movq %rcx, 80(%r10) + sbbq 88(%rdi), %r8 + movq 96(%r10), %rax + movq %r8, 88(%r10) + sbbq 96(%rdi), %rax + movq 104(%r10), %rcx + movq %rax, 96(%r10) + sbbq 104(%rdi), %rcx + movq 112(%r10), %r8 + movq %rcx, 104(%r10) + sbbq 112(%rdi), %r8 + movq 120(%r10), %rax + movq %r8, 112(%r10) + sbbq 120(%rdi), %rax + movq 128(%r10), %rcx + movq %rax, 120(%r10) + sbbq 128(%rdi), %rcx + movq 136(%r10), %r8 + movq %rcx, 128(%r10) + sbbq 136(%rdi), %r8 + movq 144(%r10), %rax + movq %r8, 136(%r10) + sbbq 144(%rdi), %rax + movq 152(%r10), %rcx + movq %rax, 144(%r10) + sbbq 152(%rdi), %rcx + movq 160(%r10), %r8 + movq %rcx, 152(%r10) + sbbq 160(%rdi), %r8 + movq 168(%r10), %rax + movq %r8, 160(%r10) + sbbq 168(%rdi), %rax + movq 176(%r10), %rcx + movq %rax, 168(%r10) + sbbq 176(%rdi), %rcx + movq 184(%r10), %r8 + movq %rcx, 176(%r10) + sbbq 184(%rdi), %r8 + movq %r8, 184(%r10) + sbbq $0, %r9 + subq $96, %r15 + # Add + movq (%r15), %rax + addq (%r10), %rax + movq 8(%r15), %rcx + movq %rax, (%r15) + adcq 8(%r10), %rcx + movq 16(%r15), %r8 + movq %rcx, 8(%r15) + adcq 16(%r10), %r8 + movq 24(%r15), %rax + movq %r8, 16(%r15) + adcq 24(%r10), %rax + movq 32(%r15), %rcx + movq %rax, 24(%r15) + adcq 32(%r10), %rcx + movq 40(%r15), %r8 + movq %rcx, 32(%r15) + adcq 40(%r10), %r8 + movq 48(%r15), %rax + movq %r8, 40(%r15) + adcq 48(%r10), %rax + movq 56(%r15), %rcx + movq %rax, 48(%r15) + adcq 56(%r10), %rcx + movq 64(%r15), %r8 + movq %rcx, 56(%r15) + adcq 64(%r10), %r8 + movq 72(%r15), %rax + movq %r8, 64(%r15) + adcq 72(%r10), %rax + movq 80(%r15), %rcx + movq %rax, 72(%r15) + adcq 80(%r10), %rcx + movq 88(%r15), %r8 + movq %rcx, 80(%r15) + adcq 88(%r10), %r8 + movq 96(%r15), %rax + movq %r8, 88(%r15) + adcq 96(%r10), %rax + movq 104(%r15), %rcx + movq %rax, 96(%r15) + adcq 104(%r10), %rcx + movq 112(%r15), %r8 + movq %rcx, 104(%r15) + adcq 112(%r10), %r8 + movq 120(%r15), %rax + movq %r8, 112(%r15) + adcq 120(%r10), %rax + movq 128(%r15), %rcx + movq %rax, 120(%r15) + adcq 128(%r10), %rcx + movq 136(%r15), %r8 + movq %rcx, 128(%r15) + adcq 136(%r10), %r8 + movq 144(%r15), %rax + movq %r8, 136(%r15) + adcq 144(%r10), %rax + movq 152(%r15), %rcx + movq %rax, 144(%r15) + adcq 152(%r10), %rcx + movq 160(%r15), %r8 + movq %rcx, 152(%r15) + adcq 160(%r10), %r8 + movq 168(%r15), %rax + movq %r8, 160(%r15) + adcq 168(%r10), %rax + movq 176(%r15), %rcx + movq %rax, 168(%r15) + adcq 176(%r10), %rcx + movq 184(%r15), %r8 + movq %rcx, 176(%r15) + adcq 184(%r10), %r8 + movq %r8, 184(%r15) + adcq $0, %r9 + movq %r9, 288(%rdi) + addq $96, %r15 + # Add + movq (%r15), %rax + addq (%r11), %rax + movq 8(%r15), %rcx + movq %rax, (%r15) + adcq 8(%r11), %rcx + movq 16(%r15), %r8 + movq %rcx, 8(%r15) + adcq 16(%r11), %r8 + movq 24(%r15), %rax + movq %r8, 16(%r15) + adcq 24(%r11), %rax + movq 32(%r15), %rcx + movq %rax, 24(%r15) + adcq 32(%r11), %rcx + movq 40(%r15), %r8 + movq %rcx, 32(%r15) + adcq 40(%r11), %r8 + movq 48(%r15), %rax + movq %r8, 40(%r15) + adcq 48(%r11), %rax + movq 56(%r15), %rcx + movq %rax, 48(%r15) + adcq 56(%r11), %rcx + movq 64(%r15), %r8 + movq %rcx, 56(%r15) + adcq 64(%r11), %r8 + movq 72(%r15), %rax + movq %r8, 64(%r15) + adcq 72(%r11), %rax + movq 80(%r15), %rcx + movq %rax, 72(%r15) + adcq 80(%r11), %rcx + movq 88(%r15), %r8 + movq %rcx, 80(%r15) + adcq 88(%r11), %r8 + movq 96(%r15), %rax + movq %r8, 88(%r15) + adcq 96(%r11), %rax + movq %rax, 96(%r15) + # Add to zero + movq 104(%r11), %rax + adcq $0, %rax + movq 112(%r11), %rcx + movq %rax, 104(%r15) + adcq $0, %rcx + movq 120(%r11), %r8 + movq %rcx, 112(%r15) + adcq $0, %r8 + movq 128(%r11), %rax + movq %r8, 120(%r15) + adcq $0, %rax + movq 136(%r11), %rcx + movq %rax, 128(%r15) + adcq $0, %rcx + movq 144(%r11), %r8 + movq %rcx, 136(%r15) + adcq $0, %r8 + movq 152(%r11), %rax + movq %r8, 144(%r15) + adcq $0, %rax + movq 160(%r11), %rcx + movq %rax, 152(%r15) + adcq $0, %rcx + movq 168(%r11), %r8 + movq %rcx, 160(%r15) + adcq $0, %r8 + movq 176(%r11), %rax + movq %r8, 168(%r15) + adcq $0, %rax + movq 184(%r11), %rcx + movq %rax, 176(%r15) + adcq $0, %rcx + movq %rcx, 184(%r15) + addq $616, %rsp + pop %r15 + pop %r14 + pop %r13 + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_3072_mul_avx2_24,.-sp_3072_mul_avx2_24 +#endif /* __APPLE__ */ +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_3072_sqr_avx2_24 +.type sp_3072_sqr_avx2_24,@function +.align 16 +sp_3072_sqr_avx2_24: +#else +.globl _sp_3072_sqr_avx2_24 +.p2align 4 +_sp_3072_sqr_avx2_24: +#endif /* __APPLE__ */ + subq $504, %rsp + movq %rdi, 480(%rsp) + movq %rsi, 488(%rsp) + leaq 384(%rsp), %r8 + leaq 96(%rsi), %r9 + # Add + movq (%rsi), %rdx + xorq %rcx, %rcx + addq (%r9), %rdx + movq 8(%rsi), %rax + movq %rdx, (%r8) + adcq 8(%r9), %rax + movq 16(%rsi), %rdx + movq %rax, 8(%r8) + adcq 16(%r9), %rdx + movq 24(%rsi), %rax + movq %rdx, 16(%r8) + adcq 24(%r9), %rax + movq 32(%rsi), %rdx + movq %rax, 24(%r8) + adcq 32(%r9), %rdx + movq 40(%rsi), %rax + movq %rdx, 32(%r8) + adcq 40(%r9), %rax + movq 48(%rsi), %rdx + movq %rax, 40(%r8) + adcq 48(%r9), %rdx + movq 56(%rsi), %rax + movq %rdx, 48(%r8) + adcq 56(%r9), %rax + movq 64(%rsi), %rdx + movq %rax, 56(%r8) + adcq 64(%r9), %rdx + movq 72(%rsi), %rax + movq %rdx, 64(%r8) + adcq 72(%r9), %rax + movq 80(%rsi), %rdx + movq %rax, 72(%r8) + adcq 80(%r9), %rdx + movq 88(%rsi), %rax + movq %rdx, 80(%r8) + adcq 88(%r9), %rax + movq %rax, 88(%r8) + adcq $0, %rcx + movq %rcx, 496(%rsp) + movq %r8, %rsi + movq %rsp, %rdi +#ifndef __APPLE__ + callq sp_3072_sqr_avx2_12@plt +#else + callq _sp_3072_sqr_avx2_12 +#endif /* __APPLE__ */ + movq 488(%rsp), %rsi + leaq 192(%rsp), %rdi + addq $96, %rsi +#ifndef __APPLE__ + callq sp_3072_sqr_avx2_12@plt +#else + callq _sp_3072_sqr_avx2_12 +#endif /* __APPLE__ */ + movq 488(%rsp), %rsi + movq 480(%rsp), %rdi +#ifndef __APPLE__ + callq sp_3072_sqr_avx2_12@plt +#else + callq _sp_3072_sqr_avx2_12 +#endif /* __APPLE__ */ + movq 496(%rsp), %r10 + movq %rdi, %r9 + leaq 384(%rsp), %r8 + movq %r10, %rcx + negq %r10 + addq $192, %r9 + movq (%r8), %rdx + pextq %r10, %rdx, %rdx + addq %rdx, %rdx + movq 8(%r8), %rax + movq %rdx, (%r9) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 16(%r8), %rdx + movq %rax, 8(%r9) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 24(%r8), %rax + movq %rdx, 16(%r9) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 32(%r8), %rdx + movq %rax, 24(%r9) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 40(%r8), %rax + movq %rdx, 32(%r9) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 48(%r8), %rdx + movq %rax, 40(%r9) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 56(%r8), %rax + movq %rdx, 48(%r9) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 64(%r8), %rdx + movq %rax, 56(%r9) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 72(%r8), %rax + movq %rdx, 64(%r9) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 80(%r8), %rdx + movq %rax, 72(%r9) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 88(%r8), %rax + movq %rdx, 80(%r9) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq %rax, 88(%r9) + adcq $0, %rcx + leaq 192(%rsp), %rsi + movq %rsp, %r8 + movq (%r8), %rdx + subq (%rsi), %rdx + movq 8(%r8), %rax + movq %rdx, (%r8) + sbbq 8(%rsi), %rax + movq 16(%r8), %rdx + movq %rax, 8(%r8) + sbbq 16(%rsi), %rdx + movq 24(%r8), %rax + movq %rdx, 16(%r8) + sbbq 24(%rsi), %rax + movq 32(%r8), %rdx + movq %rax, 24(%r8) + sbbq 32(%rsi), %rdx + movq 40(%r8), %rax + movq %rdx, 32(%r8) + sbbq 40(%rsi), %rax + movq 48(%r8), %rdx + movq %rax, 40(%r8) + sbbq 48(%rsi), %rdx + movq 56(%r8), %rax + movq %rdx, 48(%r8) + sbbq 56(%rsi), %rax + movq 64(%r8), %rdx + movq %rax, 56(%r8) + sbbq 64(%rsi), %rdx + movq 72(%r8), %rax + movq %rdx, 64(%r8) + sbbq 72(%rsi), %rax + movq 80(%r8), %rdx + movq %rax, 72(%r8) + sbbq 80(%rsi), %rdx + movq 88(%r8), %rax + movq %rdx, 80(%r8) + sbbq 88(%rsi), %rax + movq 96(%r8), %rdx + movq %rax, 88(%r8) + sbbq 96(%rsi), %rdx + movq 104(%r8), %rax + movq %rdx, 96(%r8) + sbbq 104(%rsi), %rax + movq 112(%r8), %rdx + movq %rax, 104(%r8) + sbbq 112(%rsi), %rdx + movq 120(%r8), %rax + movq %rdx, 112(%r8) + sbbq 120(%rsi), %rax + movq 128(%r8), %rdx + movq %rax, 120(%r8) + sbbq 128(%rsi), %rdx + movq 136(%r8), %rax + movq %rdx, 128(%r8) + sbbq 136(%rsi), %rax + movq 144(%r8), %rdx + movq %rax, 136(%r8) + sbbq 144(%rsi), %rdx + movq 152(%r8), %rax + movq %rdx, 144(%r8) + sbbq 152(%rsi), %rax + movq 160(%r8), %rdx + movq %rax, 152(%r8) + sbbq 160(%rsi), %rdx + movq 168(%r8), %rax + movq %rdx, 160(%r8) + sbbq 168(%rsi), %rax + movq 176(%r8), %rdx + movq %rax, 168(%r8) + sbbq 176(%rsi), %rdx + movq 184(%r8), %rax + movq %rdx, 176(%r8) + sbbq 184(%rsi), %rax + movq %rax, 184(%r8) + sbbq $0, %rcx + movq (%r8), %rdx + subq (%rdi), %rdx + movq 8(%r8), %rax + movq %rdx, (%r8) + sbbq 8(%rdi), %rax + movq 16(%r8), %rdx + movq %rax, 8(%r8) + sbbq 16(%rdi), %rdx + movq 24(%r8), %rax + movq %rdx, 16(%r8) + sbbq 24(%rdi), %rax + movq 32(%r8), %rdx + movq %rax, 24(%r8) + sbbq 32(%rdi), %rdx + movq 40(%r8), %rax + movq %rdx, 32(%r8) + sbbq 40(%rdi), %rax + movq 48(%r8), %rdx + movq %rax, 40(%r8) + sbbq 48(%rdi), %rdx + movq 56(%r8), %rax + movq %rdx, 48(%r8) + sbbq 56(%rdi), %rax + movq 64(%r8), %rdx + movq %rax, 56(%r8) + sbbq 64(%rdi), %rdx + movq 72(%r8), %rax + movq %rdx, 64(%r8) + sbbq 72(%rdi), %rax + movq 80(%r8), %rdx + movq %rax, 72(%r8) + sbbq 80(%rdi), %rdx + movq 88(%r8), %rax + movq %rdx, 80(%r8) + sbbq 88(%rdi), %rax + movq 96(%r8), %rdx + movq %rax, 88(%r8) + sbbq 96(%rdi), %rdx + movq 104(%r8), %rax + movq %rdx, 96(%r8) + sbbq 104(%rdi), %rax + movq 112(%r8), %rdx + movq %rax, 104(%r8) + sbbq 112(%rdi), %rdx + movq 120(%r8), %rax + movq %rdx, 112(%r8) + sbbq 120(%rdi), %rax + movq 128(%r8), %rdx + movq %rax, 120(%r8) + sbbq 128(%rdi), %rdx + movq 136(%r8), %rax + movq %rdx, 128(%r8) + sbbq 136(%rdi), %rax + movq 144(%r8), %rdx + movq %rax, 136(%r8) + sbbq 144(%rdi), %rdx + movq 152(%r8), %rax + movq %rdx, 144(%r8) + sbbq 152(%rdi), %rax + movq 160(%r8), %rdx + movq %rax, 152(%r8) + sbbq 160(%rdi), %rdx + movq 168(%r8), %rax + movq %rdx, 160(%r8) + sbbq 168(%rdi), %rax + movq 176(%r8), %rdx + movq %rax, 168(%r8) + sbbq 176(%rdi), %rdx + movq 184(%r8), %rax + movq %rdx, 176(%r8) + sbbq 184(%rdi), %rax + movq %rax, 184(%r8) + sbbq $0, %rcx + subq $96, %r9 + # Add in place + movq (%r9), %rdx + addq (%r8), %rdx + movq 8(%r9), %rax + movq %rdx, (%r9) + adcq 8(%r8), %rax + movq 16(%r9), %rdx + movq %rax, 8(%r9) + adcq 16(%r8), %rdx + movq 24(%r9), %rax + movq %rdx, 16(%r9) + adcq 24(%r8), %rax + movq 32(%r9), %rdx + movq %rax, 24(%r9) + adcq 32(%r8), %rdx + movq 40(%r9), %rax + movq %rdx, 32(%r9) + adcq 40(%r8), %rax + movq 48(%r9), %rdx + movq %rax, 40(%r9) + adcq 48(%r8), %rdx + movq 56(%r9), %rax + movq %rdx, 48(%r9) + adcq 56(%r8), %rax + movq 64(%r9), %rdx + movq %rax, 56(%r9) + adcq 64(%r8), %rdx + movq 72(%r9), %rax + movq %rdx, 64(%r9) + adcq 72(%r8), %rax + movq 80(%r9), %rdx + movq %rax, 72(%r9) + adcq 80(%r8), %rdx + movq 88(%r9), %rax + movq %rdx, 80(%r9) + adcq 88(%r8), %rax + movq 96(%r9), %rdx + movq %rax, 88(%r9) + adcq 96(%r8), %rdx + movq 104(%r9), %rax + movq %rdx, 96(%r9) + adcq 104(%r8), %rax + movq 112(%r9), %rdx + movq %rax, 104(%r9) + adcq 112(%r8), %rdx + movq 120(%r9), %rax + movq %rdx, 112(%r9) + adcq 120(%r8), %rax + movq 128(%r9), %rdx + movq %rax, 120(%r9) + adcq 128(%r8), %rdx + movq 136(%r9), %rax + movq %rdx, 128(%r9) + adcq 136(%r8), %rax + movq 144(%r9), %rdx + movq %rax, 136(%r9) + adcq 144(%r8), %rdx + movq 152(%r9), %rax + movq %rdx, 144(%r9) + adcq 152(%r8), %rax + movq 160(%r9), %rdx + movq %rax, 152(%r9) + adcq 160(%r8), %rdx + movq 168(%r9), %rax + movq %rdx, 160(%r9) + adcq 168(%r8), %rax + movq 176(%r9), %rdx + movq %rax, 168(%r9) + adcq 176(%r8), %rdx + movq 184(%r9), %rax + movq %rdx, 176(%r9) + adcq 184(%r8), %rax + movq %rax, 184(%r9) + adcq $0, %rcx + movq %rcx, 288(%rdi) + # Add in place + movq 96(%r9), %rdx + addq (%rsi), %rdx + movq 104(%r9), %rax + movq %rdx, 96(%r9) + adcq 8(%rsi), %rax + movq 112(%r9), %rdx + movq %rax, 104(%r9) + adcq 16(%rsi), %rdx + movq 120(%r9), %rax + movq %rdx, 112(%r9) + adcq 24(%rsi), %rax + movq 128(%r9), %rdx + movq %rax, 120(%r9) + adcq 32(%rsi), %rdx + movq 136(%r9), %rax + movq %rdx, 128(%r9) + adcq 40(%rsi), %rax + movq 144(%r9), %rdx + movq %rax, 136(%r9) + adcq 48(%rsi), %rdx + movq 152(%r9), %rax + movq %rdx, 144(%r9) + adcq 56(%rsi), %rax + movq 160(%r9), %rdx + movq %rax, 152(%r9) + adcq 64(%rsi), %rdx + movq 168(%r9), %rax + movq %rdx, 160(%r9) + adcq 72(%rsi), %rax + movq 176(%r9), %rdx + movq %rax, 168(%r9) + adcq 80(%rsi), %rdx + movq 184(%r9), %rax + movq %rdx, 176(%r9) + adcq 88(%rsi), %rax + movq 192(%r9), %rdx + movq %rax, 184(%r9) + adcq 96(%rsi), %rdx + movq %rdx, 192(%r9) + # Add to zero + movq 104(%rsi), %rdx + adcq $0, %rdx + movq 112(%rsi), %rax + movq %rdx, 200(%r9) + adcq $0, %rax + movq 120(%rsi), %rdx + movq %rax, 208(%r9) + adcq $0, %rdx + movq 128(%rsi), %rax + movq %rdx, 216(%r9) + adcq $0, %rax + movq 136(%rsi), %rdx + movq %rax, 224(%r9) + adcq $0, %rdx + movq 144(%rsi), %rax + movq %rdx, 232(%r9) + adcq $0, %rax + movq 152(%rsi), %rdx + movq %rax, 240(%r9) + adcq $0, %rdx + movq 160(%rsi), %rax + movq %rdx, 248(%r9) + adcq $0, %rax + movq 168(%rsi), %rdx + movq %rax, 256(%r9) + adcq $0, %rdx + movq 176(%rsi), %rax + movq %rdx, 264(%r9) + adcq $0, %rax + movq 184(%rsi), %rdx + movq %rax, 272(%r9) + adcq $0, %rdx + movq %rdx, 280(%r9) + addq $504, %rsp + repz retq +#ifndef __APPLE__ +.size sp_3072_sqr_avx2_24,.-sp_3072_sqr_avx2_24 +#endif /* __APPLE__ */ +/* Sub b from a into a. (a -= b) + * + * a A single precision integer and result. + * b A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_3072_sub_in_place_48 +.type sp_3072_sub_in_place_48,@function +.align 16 +sp_3072_sub_in_place_48: +#else +.globl _sp_3072_sub_in_place_48 +.p2align 4 +_sp_3072_sub_in_place_48: +#endif /* __APPLE__ */ + movq (%rdi), %rdx + xorq %rax, %rax + subq (%rsi), %rdx + movq 8(%rdi), %rcx + movq %rdx, (%rdi) + sbbq 8(%rsi), %rcx + movq 16(%rdi), %rdx + movq %rcx, 8(%rdi) + sbbq 16(%rsi), %rdx + movq 24(%rdi), %rcx + movq %rdx, 16(%rdi) + sbbq 24(%rsi), %rcx + movq 32(%rdi), %rdx + movq %rcx, 24(%rdi) + sbbq 32(%rsi), %rdx + movq 40(%rdi), %rcx + movq %rdx, 32(%rdi) + sbbq 40(%rsi), %rcx + movq 48(%rdi), %rdx + movq %rcx, 40(%rdi) + sbbq 48(%rsi), %rdx + movq 56(%rdi), %rcx + movq %rdx, 48(%rdi) + sbbq 56(%rsi), %rcx + movq 64(%rdi), %rdx + movq %rcx, 56(%rdi) + sbbq 64(%rsi), %rdx + movq 72(%rdi), %rcx + movq %rdx, 64(%rdi) + sbbq 72(%rsi), %rcx + movq 80(%rdi), %rdx + movq %rcx, 72(%rdi) + sbbq 80(%rsi), %rdx + movq 88(%rdi), %rcx + movq %rdx, 80(%rdi) + sbbq 88(%rsi), %rcx + movq 96(%rdi), %rdx + movq %rcx, 88(%rdi) + sbbq 96(%rsi), %rdx + movq 104(%rdi), %rcx + movq %rdx, 96(%rdi) + sbbq 104(%rsi), %rcx + movq 112(%rdi), %rdx + movq %rcx, 104(%rdi) + sbbq 112(%rsi), %rdx + movq 120(%rdi), %rcx + movq %rdx, 112(%rdi) + sbbq 120(%rsi), %rcx + movq 128(%rdi), %rdx + movq %rcx, 120(%rdi) + sbbq 128(%rsi), %rdx + movq 136(%rdi), %rcx + movq %rdx, 128(%rdi) + sbbq 136(%rsi), %rcx + movq 144(%rdi), %rdx + movq %rcx, 136(%rdi) + sbbq 144(%rsi), %rdx + movq 152(%rdi), %rcx + movq %rdx, 144(%rdi) + sbbq 152(%rsi), %rcx + movq 160(%rdi), %rdx + movq %rcx, 152(%rdi) + sbbq 160(%rsi), %rdx + movq 168(%rdi), %rcx + movq %rdx, 160(%rdi) + sbbq 168(%rsi), %rcx + movq 176(%rdi), %rdx + movq %rcx, 168(%rdi) + sbbq 176(%rsi), %rdx + movq 184(%rdi), %rcx + movq %rdx, 176(%rdi) + sbbq 184(%rsi), %rcx + movq 192(%rdi), %rdx + movq %rcx, 184(%rdi) + sbbq 192(%rsi), %rdx + movq 200(%rdi), %rcx + movq %rdx, 192(%rdi) + sbbq 200(%rsi), %rcx + movq 208(%rdi), %rdx + movq %rcx, 200(%rdi) + sbbq 208(%rsi), %rdx + movq 216(%rdi), %rcx + movq %rdx, 208(%rdi) + sbbq 216(%rsi), %rcx + movq 224(%rdi), %rdx + movq %rcx, 216(%rdi) + sbbq 224(%rsi), %rdx + movq 232(%rdi), %rcx + movq %rdx, 224(%rdi) + sbbq 232(%rsi), %rcx + movq 240(%rdi), %rdx + movq %rcx, 232(%rdi) + sbbq 240(%rsi), %rdx + movq 248(%rdi), %rcx + movq %rdx, 240(%rdi) + sbbq 248(%rsi), %rcx + movq 256(%rdi), %rdx + movq %rcx, 248(%rdi) + sbbq 256(%rsi), %rdx + movq 264(%rdi), %rcx + movq %rdx, 256(%rdi) + sbbq 264(%rsi), %rcx + movq 272(%rdi), %rdx + movq %rcx, 264(%rdi) + sbbq 272(%rsi), %rdx + movq 280(%rdi), %rcx + movq %rdx, 272(%rdi) + sbbq 280(%rsi), %rcx + movq 288(%rdi), %rdx + movq %rcx, 280(%rdi) + sbbq 288(%rsi), %rdx + movq 296(%rdi), %rcx + movq %rdx, 288(%rdi) + sbbq 296(%rsi), %rcx + movq 304(%rdi), %rdx + movq %rcx, 296(%rdi) + sbbq 304(%rsi), %rdx + movq 312(%rdi), %rcx + movq %rdx, 304(%rdi) + sbbq 312(%rsi), %rcx + movq 320(%rdi), %rdx + movq %rcx, 312(%rdi) + sbbq 320(%rsi), %rdx + movq 328(%rdi), %rcx + movq %rdx, 320(%rdi) + sbbq 328(%rsi), %rcx + movq 336(%rdi), %rdx + movq %rcx, 328(%rdi) + sbbq 336(%rsi), %rdx + movq 344(%rdi), %rcx + movq %rdx, 336(%rdi) + sbbq 344(%rsi), %rcx + movq 352(%rdi), %rdx + movq %rcx, 344(%rdi) + sbbq 352(%rsi), %rdx + movq 360(%rdi), %rcx + movq %rdx, 352(%rdi) + sbbq 360(%rsi), %rcx + movq 368(%rdi), %rdx + movq %rcx, 360(%rdi) + sbbq 368(%rsi), %rdx + movq 376(%rdi), %rcx + movq %rdx, 368(%rdi) + sbbq 376(%rsi), %rcx + movq %rcx, 376(%rdi) + sbbq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_3072_sub_in_place_48,.-sp_3072_sub_in_place_48 +#endif /* __APPLE__ */ +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_3072_add_48 +.type sp_3072_add_48,@function +.align 16 +sp_3072_add_48: +#else +.globl _sp_3072_add_48 +.p2align 4 +_sp_3072_add_48: +#endif /* __APPLE__ */ + # Add + movq (%rsi), %rcx + xorq %rax, %rax + addq (%rdx), %rcx + movq 8(%rsi), %r8 + movq %rcx, (%rdi) + adcq 8(%rdx), %r8 + movq 16(%rsi), %rcx + movq %r8, 8(%rdi) + adcq 16(%rdx), %rcx + movq 24(%rsi), %r8 + movq %rcx, 16(%rdi) + adcq 24(%rdx), %r8 + movq 32(%rsi), %rcx + movq %r8, 24(%rdi) + adcq 32(%rdx), %rcx + movq 40(%rsi), %r8 + movq %rcx, 32(%rdi) + adcq 40(%rdx), %r8 + movq 48(%rsi), %rcx + movq %r8, 40(%rdi) + adcq 48(%rdx), %rcx + movq 56(%rsi), %r8 + movq %rcx, 48(%rdi) + adcq 56(%rdx), %r8 + movq 64(%rsi), %rcx + movq %r8, 56(%rdi) + adcq 64(%rdx), %rcx + movq 72(%rsi), %r8 + movq %rcx, 64(%rdi) + adcq 72(%rdx), %r8 + movq 80(%rsi), %rcx + movq %r8, 72(%rdi) + adcq 80(%rdx), %rcx + movq 88(%rsi), %r8 + movq %rcx, 80(%rdi) + adcq 88(%rdx), %r8 + movq 96(%rsi), %rcx + movq %r8, 88(%rdi) + adcq 96(%rdx), %rcx + movq 104(%rsi), %r8 + movq %rcx, 96(%rdi) + adcq 104(%rdx), %r8 + movq 112(%rsi), %rcx + movq %r8, 104(%rdi) + adcq 112(%rdx), %rcx + movq 120(%rsi), %r8 + movq %rcx, 112(%rdi) + adcq 120(%rdx), %r8 + movq 128(%rsi), %rcx + movq %r8, 120(%rdi) + adcq 128(%rdx), %rcx + movq 136(%rsi), %r8 + movq %rcx, 128(%rdi) + adcq 136(%rdx), %r8 + movq 144(%rsi), %rcx + movq %r8, 136(%rdi) + adcq 144(%rdx), %rcx + movq 152(%rsi), %r8 + movq %rcx, 144(%rdi) + adcq 152(%rdx), %r8 + movq 160(%rsi), %rcx + movq %r8, 152(%rdi) + adcq 160(%rdx), %rcx + movq 168(%rsi), %r8 + movq %rcx, 160(%rdi) + adcq 168(%rdx), %r8 + movq 176(%rsi), %rcx + movq %r8, 168(%rdi) + adcq 176(%rdx), %rcx + movq 184(%rsi), %r8 + movq %rcx, 176(%rdi) + adcq 184(%rdx), %r8 + movq 192(%rsi), %rcx + movq %r8, 184(%rdi) + adcq 192(%rdx), %rcx + movq 200(%rsi), %r8 + movq %rcx, 192(%rdi) + adcq 200(%rdx), %r8 + movq 208(%rsi), %rcx + movq %r8, 200(%rdi) + adcq 208(%rdx), %rcx + movq 216(%rsi), %r8 + movq %rcx, 208(%rdi) + adcq 216(%rdx), %r8 + movq 224(%rsi), %rcx + movq %r8, 216(%rdi) + adcq 224(%rdx), %rcx + movq 232(%rsi), %r8 + movq %rcx, 224(%rdi) + adcq 232(%rdx), %r8 + movq 240(%rsi), %rcx + movq %r8, 232(%rdi) + adcq 240(%rdx), %rcx + movq 248(%rsi), %r8 + movq %rcx, 240(%rdi) + adcq 248(%rdx), %r8 + movq 256(%rsi), %rcx + movq %r8, 248(%rdi) + adcq 256(%rdx), %rcx + movq 264(%rsi), %r8 + movq %rcx, 256(%rdi) + adcq 264(%rdx), %r8 + movq 272(%rsi), %rcx + movq %r8, 264(%rdi) + adcq 272(%rdx), %rcx + movq 280(%rsi), %r8 + movq %rcx, 272(%rdi) + adcq 280(%rdx), %r8 + movq 288(%rsi), %rcx + movq %r8, 280(%rdi) + adcq 288(%rdx), %rcx + movq 296(%rsi), %r8 + movq %rcx, 288(%rdi) + adcq 296(%rdx), %r8 + movq 304(%rsi), %rcx + movq %r8, 296(%rdi) + adcq 304(%rdx), %rcx + movq 312(%rsi), %r8 + movq %rcx, 304(%rdi) + adcq 312(%rdx), %r8 + movq 320(%rsi), %rcx + movq %r8, 312(%rdi) + adcq 320(%rdx), %rcx + movq 328(%rsi), %r8 + movq %rcx, 320(%rdi) + adcq 328(%rdx), %r8 + movq 336(%rsi), %rcx + movq %r8, 328(%rdi) + adcq 336(%rdx), %rcx + movq 344(%rsi), %r8 + movq %rcx, 336(%rdi) + adcq 344(%rdx), %r8 + movq 352(%rsi), %rcx + movq %r8, 344(%rdi) + adcq 352(%rdx), %rcx + movq 360(%rsi), %r8 + movq %rcx, 352(%rdi) + adcq 360(%rdx), %r8 + movq 368(%rsi), %rcx + movq %r8, 360(%rdi) + adcq 368(%rdx), %rcx + movq 376(%rsi), %r8 + movq %rcx, 368(%rdi) + adcq 376(%rdx), %r8 + movq %r8, 376(%rdi) + adcq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_3072_add_48,.-sp_3072_add_48 +#endif /* __APPLE__ */ +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_3072_mul_48 +.type sp_3072_mul_48,@function +.align 16 +sp_3072_mul_48: +#else +.globl _sp_3072_mul_48 +.p2align 4 +_sp_3072_mul_48: +#endif /* __APPLE__ */ + push %r12 + push %r13 + push %r14 + push %r15 + subq $1192, %rsp + movq %rdi, 1152(%rsp) + movq %rsi, 1160(%rsp) + movq %rdx, 1168(%rsp) + leaq 768(%rsp), %r10 + leaq 192(%rsi), %r12 + # Add + movq (%rsi), %rax + xorq %r13, %r13 + addq (%r12), %rax + movq 8(%rsi), %rcx + movq %rax, (%r10) + adcq 8(%r12), %rcx + movq 16(%rsi), %r8 + movq %rcx, 8(%r10) + adcq 16(%r12), %r8 + movq 24(%rsi), %rax + movq %r8, 16(%r10) + adcq 24(%r12), %rax + movq 32(%rsi), %rcx + movq %rax, 24(%r10) + adcq 32(%r12), %rcx + movq 40(%rsi), %r8 + movq %rcx, 32(%r10) + adcq 40(%r12), %r8 + movq 48(%rsi), %rax + movq %r8, 40(%r10) + adcq 48(%r12), %rax + movq 56(%rsi), %rcx + movq %rax, 48(%r10) + adcq 56(%r12), %rcx + movq 64(%rsi), %r8 + movq %rcx, 56(%r10) + adcq 64(%r12), %r8 + movq 72(%rsi), %rax + movq %r8, 64(%r10) + adcq 72(%r12), %rax + movq 80(%rsi), %rcx + movq %rax, 72(%r10) + adcq 80(%r12), %rcx + movq 88(%rsi), %r8 + movq %rcx, 80(%r10) + adcq 88(%r12), %r8 + movq 96(%rsi), %rax + movq %r8, 88(%r10) + adcq 96(%r12), %rax + movq 104(%rsi), %rcx + movq %rax, 96(%r10) + adcq 104(%r12), %rcx + movq 112(%rsi), %r8 + movq %rcx, 104(%r10) + adcq 112(%r12), %r8 + movq 120(%rsi), %rax + movq %r8, 112(%r10) + adcq 120(%r12), %rax + movq 128(%rsi), %rcx + movq %rax, 120(%r10) + adcq 128(%r12), %rcx + movq 136(%rsi), %r8 + movq %rcx, 128(%r10) + adcq 136(%r12), %r8 + movq 144(%rsi), %rax + movq %r8, 136(%r10) + adcq 144(%r12), %rax + movq 152(%rsi), %rcx + movq %rax, 144(%r10) + adcq 152(%r12), %rcx + movq 160(%rsi), %r8 + movq %rcx, 152(%r10) + adcq 160(%r12), %r8 + movq 168(%rsi), %rax + movq %r8, 160(%r10) + adcq 168(%r12), %rax + movq 176(%rsi), %rcx + movq %rax, 168(%r10) + adcq 176(%r12), %rcx + movq 184(%rsi), %r8 + movq %rcx, 176(%r10) + adcq 184(%r12), %r8 + movq %r8, 184(%r10) + adcq $0, %r13 + movq %r13, 1176(%rsp) + leaq 960(%rsp), %r11 + leaq 192(%rdx), %r12 + # Add + movq (%rdx), %rax + xorq %r14, %r14 + addq (%r12), %rax + movq 8(%rdx), %rcx + movq %rax, (%r11) + adcq 8(%r12), %rcx + movq 16(%rdx), %r8 + movq %rcx, 8(%r11) + adcq 16(%r12), %r8 + movq 24(%rdx), %rax + movq %r8, 16(%r11) + adcq 24(%r12), %rax + movq 32(%rdx), %rcx + movq %rax, 24(%r11) + adcq 32(%r12), %rcx + movq 40(%rdx), %r8 + movq %rcx, 32(%r11) + adcq 40(%r12), %r8 + movq 48(%rdx), %rax + movq %r8, 40(%r11) + adcq 48(%r12), %rax + movq 56(%rdx), %rcx + movq %rax, 48(%r11) + adcq 56(%r12), %rcx + movq 64(%rdx), %r8 + movq %rcx, 56(%r11) + adcq 64(%r12), %r8 + movq 72(%rdx), %rax + movq %r8, 64(%r11) + adcq 72(%r12), %rax + movq 80(%rdx), %rcx + movq %rax, 72(%r11) + adcq 80(%r12), %rcx + movq 88(%rdx), %r8 + movq %rcx, 80(%r11) + adcq 88(%r12), %r8 + movq 96(%rdx), %rax + movq %r8, 88(%r11) + adcq 96(%r12), %rax + movq 104(%rdx), %rcx + movq %rax, 96(%r11) + adcq 104(%r12), %rcx + movq 112(%rdx), %r8 + movq %rcx, 104(%r11) + adcq 112(%r12), %r8 + movq 120(%rdx), %rax + movq %r8, 112(%r11) + adcq 120(%r12), %rax + movq 128(%rdx), %rcx + movq %rax, 120(%r11) + adcq 128(%r12), %rcx + movq 136(%rdx), %r8 + movq %rcx, 128(%r11) + adcq 136(%r12), %r8 + movq 144(%rdx), %rax + movq %r8, 136(%r11) + adcq 144(%r12), %rax + movq 152(%rdx), %rcx + movq %rax, 144(%r11) + adcq 152(%r12), %rcx + movq 160(%rdx), %r8 + movq %rcx, 152(%r11) + adcq 160(%r12), %r8 + movq 168(%rdx), %rax + movq %r8, 160(%r11) + adcq 168(%r12), %rax + movq 176(%rdx), %rcx + movq %rax, 168(%r11) + adcq 176(%r12), %rcx + movq 184(%rdx), %r8 + movq %rcx, 176(%r11) + adcq 184(%r12), %r8 + movq %r8, 184(%r11) + adcq $0, %r14 + movq %r14, 1184(%rsp) + movq %r11, %rdx + movq %r10, %rsi + movq %rsp, %rdi +#ifndef __APPLE__ + callq sp_3072_mul_24@plt +#else + callq _sp_3072_mul_24 +#endif /* __APPLE__ */ + movq 1168(%rsp), %rdx + movq 1160(%rsp), %rsi + leaq 384(%rsp), %rdi + addq $192, %rdx + addq $192, %rsi +#ifndef __APPLE__ + callq sp_3072_mul_24@plt +#else + callq _sp_3072_mul_24 +#endif /* __APPLE__ */ + movq 1168(%rsp), %rdx + movq 1160(%rsp), %rsi + movq 1152(%rsp), %rdi +#ifndef __APPLE__ + callq sp_3072_mul_24@plt +#else + callq _sp_3072_mul_24 +#endif /* __APPLE__ */ + movq 1176(%rsp), %r13 + movq 1184(%rsp), %r14 + movq 1152(%rsp), %r15 + movq %r13, %r9 + leaq 768(%rsp), %r10 + leaq 960(%rsp), %r11 + andq %r14, %r9 + negq %r13 + negq %r14 + addq $384, %r15 + movq (%r10), %rax + movq (%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, (%r10) + movq %rcx, (%r11) + movq 8(%r10), %rax + movq 8(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 8(%r10) + movq %rcx, 8(%r11) + movq 16(%r10), %rax + movq 16(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 16(%r10) + movq %rcx, 16(%r11) + movq 24(%r10), %rax + movq 24(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 24(%r10) + movq %rcx, 24(%r11) + movq 32(%r10), %rax + movq 32(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 32(%r10) + movq %rcx, 32(%r11) + movq 40(%r10), %rax + movq 40(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 40(%r10) + movq %rcx, 40(%r11) + movq 48(%r10), %rax + movq 48(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 48(%r10) + movq %rcx, 48(%r11) + movq 56(%r10), %rax + movq 56(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 56(%r10) + movq %rcx, 56(%r11) + movq 64(%r10), %rax + movq 64(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 64(%r10) + movq %rcx, 64(%r11) + movq 72(%r10), %rax + movq 72(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 72(%r10) + movq %rcx, 72(%r11) + movq 80(%r10), %rax + movq 80(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 80(%r10) + movq %rcx, 80(%r11) + movq 88(%r10), %rax + movq 88(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 88(%r10) + movq %rcx, 88(%r11) + movq 96(%r10), %rax + movq 96(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 96(%r10) + movq %rcx, 96(%r11) + movq 104(%r10), %rax + movq 104(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 104(%r10) + movq %rcx, 104(%r11) + movq 112(%r10), %rax + movq 112(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 112(%r10) + movq %rcx, 112(%r11) + movq 120(%r10), %rax + movq 120(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 120(%r10) + movq %rcx, 120(%r11) + movq 128(%r10), %rax + movq 128(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 128(%r10) + movq %rcx, 128(%r11) + movq 136(%r10), %rax + movq 136(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 136(%r10) + movq %rcx, 136(%r11) + movq 144(%r10), %rax + movq 144(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 144(%r10) + movq %rcx, 144(%r11) + movq 152(%r10), %rax + movq 152(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 152(%r10) + movq %rcx, 152(%r11) + movq 160(%r10), %rax + movq 160(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 160(%r10) + movq %rcx, 160(%r11) + movq 168(%r10), %rax + movq 168(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 168(%r10) + movq %rcx, 168(%r11) + movq 176(%r10), %rax + movq 176(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 176(%r10) + movq %rcx, 176(%r11) + movq 184(%r10), %rax + movq 184(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 184(%r10) + movq %rcx, 184(%r11) + movq (%r10), %rax + addq (%r11), %rax + movq 8(%r10), %rcx + movq %rax, (%r15) + adcq 8(%r11), %rcx + movq 16(%r10), %r8 + movq %rcx, 8(%r15) + adcq 16(%r11), %r8 + movq 24(%r10), %rax + movq %r8, 16(%r15) + adcq 24(%r11), %rax + movq 32(%r10), %rcx + movq %rax, 24(%r15) + adcq 32(%r11), %rcx + movq 40(%r10), %r8 + movq %rcx, 32(%r15) + adcq 40(%r11), %r8 + movq 48(%r10), %rax + movq %r8, 40(%r15) + adcq 48(%r11), %rax + movq 56(%r10), %rcx + movq %rax, 48(%r15) + adcq 56(%r11), %rcx + movq 64(%r10), %r8 + movq %rcx, 56(%r15) + adcq 64(%r11), %r8 + movq 72(%r10), %rax + movq %r8, 64(%r15) + adcq 72(%r11), %rax + movq 80(%r10), %rcx + movq %rax, 72(%r15) + adcq 80(%r11), %rcx + movq 88(%r10), %r8 + movq %rcx, 80(%r15) + adcq 88(%r11), %r8 + movq 96(%r10), %rax + movq %r8, 88(%r15) + adcq 96(%r11), %rax + movq 104(%r10), %rcx + movq %rax, 96(%r15) + adcq 104(%r11), %rcx + movq 112(%r10), %r8 + movq %rcx, 104(%r15) + adcq 112(%r11), %r8 + movq 120(%r10), %rax + movq %r8, 112(%r15) + adcq 120(%r11), %rax + movq 128(%r10), %rcx + movq %rax, 120(%r15) + adcq 128(%r11), %rcx + movq 136(%r10), %r8 + movq %rcx, 128(%r15) + adcq 136(%r11), %r8 + movq 144(%r10), %rax + movq %r8, 136(%r15) + adcq 144(%r11), %rax + movq 152(%r10), %rcx + movq %rax, 144(%r15) + adcq 152(%r11), %rcx + movq 160(%r10), %r8 + movq %rcx, 152(%r15) + adcq 160(%r11), %r8 + movq 168(%r10), %rax + movq %r8, 160(%r15) + adcq 168(%r11), %rax + movq 176(%r10), %rcx + movq %rax, 168(%r15) + adcq 176(%r11), %rcx + movq 184(%r10), %r8 + movq %rcx, 176(%r15) + adcq 184(%r11), %r8 + movq %r8, 184(%r15) + adcq $0, %r9 + leaq 384(%rsp), %r11 + movq %rsp, %r10 + movq (%r10), %rax + subq (%r11), %rax + movq 8(%r10), %rcx + movq %rax, (%r10) + sbbq 8(%r11), %rcx + movq 16(%r10), %r8 + movq %rcx, 8(%r10) + sbbq 16(%r11), %r8 + movq 24(%r10), %rax + movq %r8, 16(%r10) + sbbq 24(%r11), %rax + movq 32(%r10), %rcx + movq %rax, 24(%r10) + sbbq 32(%r11), %rcx + movq 40(%r10), %r8 + movq %rcx, 32(%r10) + sbbq 40(%r11), %r8 + movq 48(%r10), %rax + movq %r8, 40(%r10) + sbbq 48(%r11), %rax + movq 56(%r10), %rcx + movq %rax, 48(%r10) + sbbq 56(%r11), %rcx + movq 64(%r10), %r8 + movq %rcx, 56(%r10) + sbbq 64(%r11), %r8 + movq 72(%r10), %rax + movq %r8, 64(%r10) + sbbq 72(%r11), %rax + movq 80(%r10), %rcx + movq %rax, 72(%r10) + sbbq 80(%r11), %rcx + movq 88(%r10), %r8 + movq %rcx, 80(%r10) + sbbq 88(%r11), %r8 + movq 96(%r10), %rax + movq %r8, 88(%r10) + sbbq 96(%r11), %rax + movq 104(%r10), %rcx + movq %rax, 96(%r10) + sbbq 104(%r11), %rcx + movq 112(%r10), %r8 + movq %rcx, 104(%r10) + sbbq 112(%r11), %r8 + movq 120(%r10), %rax + movq %r8, 112(%r10) + sbbq 120(%r11), %rax + movq 128(%r10), %rcx + movq %rax, 120(%r10) + sbbq 128(%r11), %rcx + movq 136(%r10), %r8 + movq %rcx, 128(%r10) + sbbq 136(%r11), %r8 + movq 144(%r10), %rax + movq %r8, 136(%r10) + sbbq 144(%r11), %rax + movq 152(%r10), %rcx + movq %rax, 144(%r10) + sbbq 152(%r11), %rcx + movq 160(%r10), %r8 + movq %rcx, 152(%r10) + sbbq 160(%r11), %r8 + movq 168(%r10), %rax + movq %r8, 160(%r10) + sbbq 168(%r11), %rax + movq 176(%r10), %rcx + movq %rax, 168(%r10) + sbbq 176(%r11), %rcx + movq 184(%r10), %r8 + movq %rcx, 176(%r10) + sbbq 184(%r11), %r8 + movq 192(%r10), %rax + movq %r8, 184(%r10) + sbbq 192(%r11), %rax + movq 200(%r10), %rcx + movq %rax, 192(%r10) + sbbq 200(%r11), %rcx + movq 208(%r10), %r8 + movq %rcx, 200(%r10) + sbbq 208(%r11), %r8 + movq 216(%r10), %rax + movq %r8, 208(%r10) + sbbq 216(%r11), %rax + movq 224(%r10), %rcx + movq %rax, 216(%r10) + sbbq 224(%r11), %rcx + movq 232(%r10), %r8 + movq %rcx, 224(%r10) + sbbq 232(%r11), %r8 + movq 240(%r10), %rax + movq %r8, 232(%r10) + sbbq 240(%r11), %rax + movq 248(%r10), %rcx + movq %rax, 240(%r10) + sbbq 248(%r11), %rcx + movq 256(%r10), %r8 + movq %rcx, 248(%r10) + sbbq 256(%r11), %r8 + movq 264(%r10), %rax + movq %r8, 256(%r10) + sbbq 264(%r11), %rax + movq 272(%r10), %rcx + movq %rax, 264(%r10) + sbbq 272(%r11), %rcx + movq 280(%r10), %r8 + movq %rcx, 272(%r10) + sbbq 280(%r11), %r8 + movq 288(%r10), %rax + movq %r8, 280(%r10) + sbbq 288(%r11), %rax + movq 296(%r10), %rcx + movq %rax, 288(%r10) + sbbq 296(%r11), %rcx + movq 304(%r10), %r8 + movq %rcx, 296(%r10) + sbbq 304(%r11), %r8 + movq 312(%r10), %rax + movq %r8, 304(%r10) + sbbq 312(%r11), %rax + movq 320(%r10), %rcx + movq %rax, 312(%r10) + sbbq 320(%r11), %rcx + movq 328(%r10), %r8 + movq %rcx, 320(%r10) + sbbq 328(%r11), %r8 + movq 336(%r10), %rax + movq %r8, 328(%r10) + sbbq 336(%r11), %rax + movq 344(%r10), %rcx + movq %rax, 336(%r10) + sbbq 344(%r11), %rcx + movq 352(%r10), %r8 + movq %rcx, 344(%r10) + sbbq 352(%r11), %r8 + movq 360(%r10), %rax + movq %r8, 352(%r10) + sbbq 360(%r11), %rax + movq 368(%r10), %rcx + movq %rax, 360(%r10) + sbbq 368(%r11), %rcx + movq 376(%r10), %r8 + movq %rcx, 368(%r10) + sbbq 376(%r11), %r8 + movq %r8, 376(%r10) + sbbq $0, %r9 + movq (%r10), %rax + subq (%rdi), %rax + movq 8(%r10), %rcx + movq %rax, (%r10) + sbbq 8(%rdi), %rcx + movq 16(%r10), %r8 + movq %rcx, 8(%r10) + sbbq 16(%rdi), %r8 + movq 24(%r10), %rax + movq %r8, 16(%r10) + sbbq 24(%rdi), %rax + movq 32(%r10), %rcx + movq %rax, 24(%r10) + sbbq 32(%rdi), %rcx + movq 40(%r10), %r8 + movq %rcx, 32(%r10) + sbbq 40(%rdi), %r8 + movq 48(%r10), %rax + movq %r8, 40(%r10) + sbbq 48(%rdi), %rax + movq 56(%r10), %rcx + movq %rax, 48(%r10) + sbbq 56(%rdi), %rcx + movq 64(%r10), %r8 + movq %rcx, 56(%r10) + sbbq 64(%rdi), %r8 + movq 72(%r10), %rax + movq %r8, 64(%r10) + sbbq 72(%rdi), %rax + movq 80(%r10), %rcx + movq %rax, 72(%r10) + sbbq 80(%rdi), %rcx + movq 88(%r10), %r8 + movq %rcx, 80(%r10) + sbbq 88(%rdi), %r8 + movq 96(%r10), %rax + movq %r8, 88(%r10) + sbbq 96(%rdi), %rax + movq 104(%r10), %rcx + movq %rax, 96(%r10) + sbbq 104(%rdi), %rcx + movq 112(%r10), %r8 + movq %rcx, 104(%r10) + sbbq 112(%rdi), %r8 + movq 120(%r10), %rax + movq %r8, 112(%r10) + sbbq 120(%rdi), %rax + movq 128(%r10), %rcx + movq %rax, 120(%r10) + sbbq 128(%rdi), %rcx + movq 136(%r10), %r8 + movq %rcx, 128(%r10) + sbbq 136(%rdi), %r8 + movq 144(%r10), %rax + movq %r8, 136(%r10) + sbbq 144(%rdi), %rax + movq 152(%r10), %rcx + movq %rax, 144(%r10) + sbbq 152(%rdi), %rcx + movq 160(%r10), %r8 + movq %rcx, 152(%r10) + sbbq 160(%rdi), %r8 + movq 168(%r10), %rax + movq %r8, 160(%r10) + sbbq 168(%rdi), %rax + movq 176(%r10), %rcx + movq %rax, 168(%r10) + sbbq 176(%rdi), %rcx + movq 184(%r10), %r8 + movq %rcx, 176(%r10) + sbbq 184(%rdi), %r8 + movq 192(%r10), %rax + movq %r8, 184(%r10) + sbbq 192(%rdi), %rax + movq 200(%r10), %rcx + movq %rax, 192(%r10) + sbbq 200(%rdi), %rcx + movq 208(%r10), %r8 + movq %rcx, 200(%r10) + sbbq 208(%rdi), %r8 + movq 216(%r10), %rax + movq %r8, 208(%r10) + sbbq 216(%rdi), %rax + movq 224(%r10), %rcx + movq %rax, 216(%r10) + sbbq 224(%rdi), %rcx + movq 232(%r10), %r8 + movq %rcx, 224(%r10) + sbbq 232(%rdi), %r8 + movq 240(%r10), %rax + movq %r8, 232(%r10) + sbbq 240(%rdi), %rax + movq 248(%r10), %rcx + movq %rax, 240(%r10) + sbbq 248(%rdi), %rcx + movq 256(%r10), %r8 + movq %rcx, 248(%r10) + sbbq 256(%rdi), %r8 + movq 264(%r10), %rax + movq %r8, 256(%r10) + sbbq 264(%rdi), %rax + movq 272(%r10), %rcx + movq %rax, 264(%r10) + sbbq 272(%rdi), %rcx + movq 280(%r10), %r8 + movq %rcx, 272(%r10) + sbbq 280(%rdi), %r8 + movq 288(%r10), %rax + movq %r8, 280(%r10) + sbbq 288(%rdi), %rax + movq 296(%r10), %rcx + movq %rax, 288(%r10) + sbbq 296(%rdi), %rcx + movq 304(%r10), %r8 + movq %rcx, 296(%r10) + sbbq 304(%rdi), %r8 + movq 312(%r10), %rax + movq %r8, 304(%r10) + sbbq 312(%rdi), %rax + movq 320(%r10), %rcx + movq %rax, 312(%r10) + sbbq 320(%rdi), %rcx + movq 328(%r10), %r8 + movq %rcx, 320(%r10) + sbbq 328(%rdi), %r8 + movq 336(%r10), %rax + movq %r8, 328(%r10) + sbbq 336(%rdi), %rax + movq 344(%r10), %rcx + movq %rax, 336(%r10) + sbbq 344(%rdi), %rcx + movq 352(%r10), %r8 + movq %rcx, 344(%r10) + sbbq 352(%rdi), %r8 + movq 360(%r10), %rax + movq %r8, 352(%r10) + sbbq 360(%rdi), %rax + movq 368(%r10), %rcx + movq %rax, 360(%r10) + sbbq 368(%rdi), %rcx + movq 376(%r10), %r8 + movq %rcx, 368(%r10) + sbbq 376(%rdi), %r8 + movq %r8, 376(%r10) + sbbq $0, %r9 + subq $192, %r15 + # Add + movq (%r15), %rax + addq (%r10), %rax + movq 8(%r15), %rcx + movq %rax, (%r15) + adcq 8(%r10), %rcx + movq 16(%r15), %r8 + movq %rcx, 8(%r15) + adcq 16(%r10), %r8 + movq 24(%r15), %rax + movq %r8, 16(%r15) + adcq 24(%r10), %rax + movq 32(%r15), %rcx + movq %rax, 24(%r15) + adcq 32(%r10), %rcx + movq 40(%r15), %r8 + movq %rcx, 32(%r15) + adcq 40(%r10), %r8 + movq 48(%r15), %rax + movq %r8, 40(%r15) + adcq 48(%r10), %rax + movq 56(%r15), %rcx + movq %rax, 48(%r15) + adcq 56(%r10), %rcx + movq 64(%r15), %r8 + movq %rcx, 56(%r15) + adcq 64(%r10), %r8 + movq 72(%r15), %rax + movq %r8, 64(%r15) + adcq 72(%r10), %rax + movq 80(%r15), %rcx + movq %rax, 72(%r15) + adcq 80(%r10), %rcx + movq 88(%r15), %r8 + movq %rcx, 80(%r15) + adcq 88(%r10), %r8 + movq 96(%r15), %rax + movq %r8, 88(%r15) + adcq 96(%r10), %rax + movq 104(%r15), %rcx + movq %rax, 96(%r15) + adcq 104(%r10), %rcx + movq 112(%r15), %r8 + movq %rcx, 104(%r15) + adcq 112(%r10), %r8 + movq 120(%r15), %rax + movq %r8, 112(%r15) + adcq 120(%r10), %rax + movq 128(%r15), %rcx + movq %rax, 120(%r15) + adcq 128(%r10), %rcx + movq 136(%r15), %r8 + movq %rcx, 128(%r15) + adcq 136(%r10), %r8 + movq 144(%r15), %rax + movq %r8, 136(%r15) + adcq 144(%r10), %rax + movq 152(%r15), %rcx + movq %rax, 144(%r15) + adcq 152(%r10), %rcx + movq 160(%r15), %r8 + movq %rcx, 152(%r15) + adcq 160(%r10), %r8 + movq 168(%r15), %rax + movq %r8, 160(%r15) + adcq 168(%r10), %rax + movq 176(%r15), %rcx + movq %rax, 168(%r15) + adcq 176(%r10), %rcx + movq 184(%r15), %r8 + movq %rcx, 176(%r15) + adcq 184(%r10), %r8 + movq 192(%r15), %rax + movq %r8, 184(%r15) + adcq 192(%r10), %rax + movq 200(%r15), %rcx + movq %rax, 192(%r15) + adcq 200(%r10), %rcx + movq 208(%r15), %r8 + movq %rcx, 200(%r15) + adcq 208(%r10), %r8 + movq 216(%r15), %rax + movq %r8, 208(%r15) + adcq 216(%r10), %rax + movq 224(%r15), %rcx + movq %rax, 216(%r15) + adcq 224(%r10), %rcx + movq 232(%r15), %r8 + movq %rcx, 224(%r15) + adcq 232(%r10), %r8 + movq 240(%r15), %rax + movq %r8, 232(%r15) + adcq 240(%r10), %rax + movq 248(%r15), %rcx + movq %rax, 240(%r15) + adcq 248(%r10), %rcx + movq 256(%r15), %r8 + movq %rcx, 248(%r15) + adcq 256(%r10), %r8 + movq 264(%r15), %rax + movq %r8, 256(%r15) + adcq 264(%r10), %rax + movq 272(%r15), %rcx + movq %rax, 264(%r15) + adcq 272(%r10), %rcx + movq 280(%r15), %r8 + movq %rcx, 272(%r15) + adcq 280(%r10), %r8 + movq 288(%r15), %rax + movq %r8, 280(%r15) + adcq 288(%r10), %rax + movq 296(%r15), %rcx + movq %rax, 288(%r15) + adcq 296(%r10), %rcx + movq 304(%r15), %r8 + movq %rcx, 296(%r15) + adcq 304(%r10), %r8 + movq 312(%r15), %rax + movq %r8, 304(%r15) + adcq 312(%r10), %rax + movq 320(%r15), %rcx + movq %rax, 312(%r15) + adcq 320(%r10), %rcx + movq 328(%r15), %r8 + movq %rcx, 320(%r15) + adcq 328(%r10), %r8 + movq 336(%r15), %rax + movq %r8, 328(%r15) + adcq 336(%r10), %rax + movq 344(%r15), %rcx + movq %rax, 336(%r15) + adcq 344(%r10), %rcx + movq 352(%r15), %r8 + movq %rcx, 344(%r15) + adcq 352(%r10), %r8 + movq 360(%r15), %rax + movq %r8, 352(%r15) + adcq 360(%r10), %rax + movq 368(%r15), %rcx + movq %rax, 360(%r15) + adcq 368(%r10), %rcx + movq 376(%r15), %r8 + movq %rcx, 368(%r15) + adcq 376(%r10), %r8 + movq %r8, 376(%r15) + adcq $0, %r9 + movq %r9, 576(%rdi) + addq $192, %r15 + # Add + movq (%r15), %rax + addq (%r11), %rax + movq 8(%r15), %rcx + movq %rax, (%r15) + adcq 8(%r11), %rcx + movq 16(%r15), %r8 + movq %rcx, 8(%r15) + adcq 16(%r11), %r8 + movq 24(%r15), %rax + movq %r8, 16(%r15) + adcq 24(%r11), %rax + movq 32(%r15), %rcx + movq %rax, 24(%r15) + adcq 32(%r11), %rcx + movq 40(%r15), %r8 + movq %rcx, 32(%r15) + adcq 40(%r11), %r8 + movq 48(%r15), %rax + movq %r8, 40(%r15) + adcq 48(%r11), %rax + movq 56(%r15), %rcx + movq %rax, 48(%r15) + adcq 56(%r11), %rcx + movq 64(%r15), %r8 + movq %rcx, 56(%r15) + adcq 64(%r11), %r8 + movq 72(%r15), %rax + movq %r8, 64(%r15) + adcq 72(%r11), %rax + movq 80(%r15), %rcx + movq %rax, 72(%r15) + adcq 80(%r11), %rcx + movq 88(%r15), %r8 + movq %rcx, 80(%r15) + adcq 88(%r11), %r8 + movq 96(%r15), %rax + movq %r8, 88(%r15) + adcq 96(%r11), %rax + movq 104(%r15), %rcx + movq %rax, 96(%r15) + adcq 104(%r11), %rcx + movq 112(%r15), %r8 + movq %rcx, 104(%r15) + adcq 112(%r11), %r8 + movq 120(%r15), %rax + movq %r8, 112(%r15) + adcq 120(%r11), %rax + movq 128(%r15), %rcx + movq %rax, 120(%r15) + adcq 128(%r11), %rcx + movq 136(%r15), %r8 + movq %rcx, 128(%r15) + adcq 136(%r11), %r8 + movq 144(%r15), %rax + movq %r8, 136(%r15) + adcq 144(%r11), %rax + movq 152(%r15), %rcx + movq %rax, 144(%r15) + adcq 152(%r11), %rcx + movq 160(%r15), %r8 + movq %rcx, 152(%r15) + adcq 160(%r11), %r8 + movq 168(%r15), %rax + movq %r8, 160(%r15) + adcq 168(%r11), %rax + movq 176(%r15), %rcx + movq %rax, 168(%r15) + adcq 176(%r11), %rcx + movq 184(%r15), %r8 + movq %rcx, 176(%r15) + adcq 184(%r11), %r8 + movq 192(%r15), %rax + movq %r8, 184(%r15) + adcq 192(%r11), %rax + movq %rax, 192(%r15) + # Add to zero + movq 200(%r11), %rax + adcq $0, %rax + movq 208(%r11), %rcx + movq %rax, 200(%r15) + adcq $0, %rcx + movq 216(%r11), %r8 + movq %rcx, 208(%r15) + adcq $0, %r8 + movq 224(%r11), %rax + movq %r8, 216(%r15) + adcq $0, %rax + movq 232(%r11), %rcx + movq %rax, 224(%r15) + adcq $0, %rcx + movq 240(%r11), %r8 + movq %rcx, 232(%r15) + adcq $0, %r8 + movq 248(%r11), %rax + movq %r8, 240(%r15) + adcq $0, %rax + movq 256(%r11), %rcx + movq %rax, 248(%r15) + adcq $0, %rcx + movq 264(%r11), %r8 + movq %rcx, 256(%r15) + adcq $0, %r8 + movq 272(%r11), %rax + movq %r8, 264(%r15) + adcq $0, %rax + movq 280(%r11), %rcx + movq %rax, 272(%r15) + adcq $0, %rcx + movq 288(%r11), %r8 + movq %rcx, 280(%r15) + adcq $0, %r8 + movq 296(%r11), %rax + movq %r8, 288(%r15) + adcq $0, %rax + movq 304(%r11), %rcx + movq %rax, 296(%r15) + adcq $0, %rcx + movq 312(%r11), %r8 + movq %rcx, 304(%r15) + adcq $0, %r8 + movq 320(%r11), %rax + movq %r8, 312(%r15) + adcq $0, %rax + movq 328(%r11), %rcx + movq %rax, 320(%r15) + adcq $0, %rcx + movq 336(%r11), %r8 + movq %rcx, 328(%r15) + adcq $0, %r8 + movq 344(%r11), %rax + movq %r8, 336(%r15) + adcq $0, %rax + movq 352(%r11), %rcx + movq %rax, 344(%r15) + adcq $0, %rcx + movq 360(%r11), %r8 + movq %rcx, 352(%r15) + adcq $0, %r8 + movq 368(%r11), %rax + movq %r8, 360(%r15) + adcq $0, %rax + movq 376(%r11), %rcx + movq %rax, 368(%r15) + adcq $0, %rcx + movq %rcx, 376(%r15) + addq $1192, %rsp + pop %r15 + pop %r14 + pop %r13 + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_3072_mul_48,.-sp_3072_mul_48 +#endif /* __APPLE__ */ +/* Add a to a into r. (r = a + a) + * + * r A single precision integer. + * a A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_3072_dbl_24 +.type sp_3072_dbl_24,@function +.align 16 +sp_3072_dbl_24: +#else +.globl _sp_3072_dbl_24 +.p2align 4 +_sp_3072_dbl_24: +#endif /* __APPLE__ */ + movq (%rsi), %rdx + xorq %rax, %rax + addq %rdx, %rdx + movq 8(%rsi), %rcx + movq %rdx, (%rdi) + adcq %rcx, %rcx + movq 16(%rsi), %rdx + movq %rcx, 8(%rdi) + adcq %rdx, %rdx + movq 24(%rsi), %rcx + movq %rdx, 16(%rdi) + adcq %rcx, %rcx + movq 32(%rsi), %rdx + movq %rcx, 24(%rdi) + adcq %rdx, %rdx + movq 40(%rsi), %rcx + movq %rdx, 32(%rdi) + adcq %rcx, %rcx + movq 48(%rsi), %rdx + movq %rcx, 40(%rdi) + adcq %rdx, %rdx + movq 56(%rsi), %rcx + movq %rdx, 48(%rdi) + adcq %rcx, %rcx + movq 64(%rsi), %rdx + movq %rcx, 56(%rdi) + adcq %rdx, %rdx + movq 72(%rsi), %rcx + movq %rdx, 64(%rdi) + adcq %rcx, %rcx + movq 80(%rsi), %rdx + movq %rcx, 72(%rdi) + adcq %rdx, %rdx + movq 88(%rsi), %rcx + movq %rdx, 80(%rdi) + adcq %rcx, %rcx + movq 96(%rsi), %rdx + movq %rcx, 88(%rdi) + adcq %rdx, %rdx + movq 104(%rsi), %rcx + movq %rdx, 96(%rdi) + adcq %rcx, %rcx + movq 112(%rsi), %rdx + movq %rcx, 104(%rdi) + adcq %rdx, %rdx + movq 120(%rsi), %rcx + movq %rdx, 112(%rdi) + adcq %rcx, %rcx + movq 128(%rsi), %rdx + movq %rcx, 120(%rdi) + adcq %rdx, %rdx + movq 136(%rsi), %rcx + movq %rdx, 128(%rdi) + adcq %rcx, %rcx + movq 144(%rsi), %rdx + movq %rcx, 136(%rdi) + adcq %rdx, %rdx + movq 152(%rsi), %rcx + movq %rdx, 144(%rdi) + adcq %rcx, %rcx + movq 160(%rsi), %rdx + movq %rcx, 152(%rdi) + adcq %rdx, %rdx + movq 168(%rsi), %rcx + movq %rdx, 160(%rdi) + adcq %rcx, %rcx + movq 176(%rsi), %rdx + movq %rcx, 168(%rdi) + adcq %rdx, %rdx + movq 184(%rsi), %rcx + movq %rdx, 176(%rdi) + adcq %rcx, %rcx + movq %rcx, 184(%rdi) + adcq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_3072_dbl_24,.-sp_3072_dbl_24 +#endif /* __APPLE__ */ +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_3072_sqr_48 +.type sp_3072_sqr_48,@function +.align 16 +sp_3072_sqr_48: +#else +.globl _sp_3072_sqr_48 +.p2align 4 +_sp_3072_sqr_48: +#endif /* __APPLE__ */ + subq $984, %rsp + movq %rdi, 960(%rsp) + movq %rsi, 968(%rsp) + leaq 768(%rsp), %r8 + leaq 192(%rsi), %r9 + # Add + movq (%rsi), %rdx + xorq %rcx, %rcx + addq (%r9), %rdx + movq 8(%rsi), %rax + movq %rdx, (%r8) + adcq 8(%r9), %rax + movq 16(%rsi), %rdx + movq %rax, 8(%r8) + adcq 16(%r9), %rdx + movq 24(%rsi), %rax + movq %rdx, 16(%r8) + adcq 24(%r9), %rax + movq 32(%rsi), %rdx + movq %rax, 24(%r8) + adcq 32(%r9), %rdx + movq 40(%rsi), %rax + movq %rdx, 32(%r8) + adcq 40(%r9), %rax + movq 48(%rsi), %rdx + movq %rax, 40(%r8) + adcq 48(%r9), %rdx + movq 56(%rsi), %rax + movq %rdx, 48(%r8) + adcq 56(%r9), %rax + movq 64(%rsi), %rdx + movq %rax, 56(%r8) + adcq 64(%r9), %rdx + movq 72(%rsi), %rax + movq %rdx, 64(%r8) + adcq 72(%r9), %rax + movq 80(%rsi), %rdx + movq %rax, 72(%r8) + adcq 80(%r9), %rdx + movq 88(%rsi), %rax + movq %rdx, 80(%r8) + adcq 88(%r9), %rax + movq 96(%rsi), %rdx + movq %rax, 88(%r8) + adcq 96(%r9), %rdx + movq 104(%rsi), %rax + movq %rdx, 96(%r8) + adcq 104(%r9), %rax + movq 112(%rsi), %rdx + movq %rax, 104(%r8) + adcq 112(%r9), %rdx + movq 120(%rsi), %rax + movq %rdx, 112(%r8) + adcq 120(%r9), %rax + movq 128(%rsi), %rdx + movq %rax, 120(%r8) + adcq 128(%r9), %rdx + movq 136(%rsi), %rax + movq %rdx, 128(%r8) + adcq 136(%r9), %rax + movq 144(%rsi), %rdx + movq %rax, 136(%r8) + adcq 144(%r9), %rdx + movq 152(%rsi), %rax + movq %rdx, 144(%r8) + adcq 152(%r9), %rax + movq 160(%rsi), %rdx + movq %rax, 152(%r8) + adcq 160(%r9), %rdx + movq 168(%rsi), %rax + movq %rdx, 160(%r8) + adcq 168(%r9), %rax + movq 176(%rsi), %rdx + movq %rax, 168(%r8) + adcq 176(%r9), %rdx + movq 184(%rsi), %rax + movq %rdx, 176(%r8) + adcq 184(%r9), %rax + movq %rax, 184(%r8) + adcq $0, %rcx + movq %rcx, 976(%rsp) + movq %r8, %rsi + movq %rsp, %rdi +#ifndef __APPLE__ + callq sp_3072_sqr_24@plt +#else + callq _sp_3072_sqr_24 +#endif /* __APPLE__ */ + movq 968(%rsp), %rsi + leaq 384(%rsp), %rdi + addq $192, %rsi +#ifndef __APPLE__ + callq sp_3072_sqr_24@plt +#else + callq _sp_3072_sqr_24 +#endif /* __APPLE__ */ + movq 968(%rsp), %rsi + movq 960(%rsp), %rdi +#ifndef __APPLE__ + callq sp_3072_sqr_24@plt +#else + callq _sp_3072_sqr_24 +#endif /* __APPLE__ */ + movq 976(%rsp), %r10 + movq %rdi, %r9 + leaq 768(%rsp), %r8 + movq %r10, %rcx + negq %r10 + addq $384, %r9 + movq (%r8), %rdx + movq 8(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, (%r9) + movq %rax, 8(%r9) + movq 16(%r8), %rdx + movq 24(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 16(%r9) + movq %rax, 24(%r9) + movq 32(%r8), %rdx + movq 40(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 32(%r9) + movq %rax, 40(%r9) + movq 48(%r8), %rdx + movq 56(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 48(%r9) + movq %rax, 56(%r9) + movq 64(%r8), %rdx + movq 72(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 64(%r9) + movq %rax, 72(%r9) + movq 80(%r8), %rdx + movq 88(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 80(%r9) + movq %rax, 88(%r9) + movq 96(%r8), %rdx + movq 104(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 96(%r9) + movq %rax, 104(%r9) + movq 112(%r8), %rdx + movq 120(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 112(%r9) + movq %rax, 120(%r9) + movq 128(%r8), %rdx + movq 136(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 128(%r9) + movq %rax, 136(%r9) + movq 144(%r8), %rdx + movq 152(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 144(%r9) + movq %rax, 152(%r9) + movq 160(%r8), %rdx + movq 168(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 160(%r9) + movq %rax, 168(%r9) + movq 176(%r8), %rdx + movq 184(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 176(%r9) + movq %rax, 184(%r9) + movq (%r9), %rdx + addq %rdx, %rdx + movq 8(%r9), %rax + movq %rdx, (%r9) + adcq %rax, %rax + movq 16(%r9), %rdx + movq %rax, 8(%r9) + adcq %rdx, %rdx + movq 24(%r9), %rax + movq %rdx, 16(%r9) + adcq %rax, %rax + movq 32(%r9), %rdx + movq %rax, 24(%r9) + adcq %rdx, %rdx + movq 40(%r9), %rax + movq %rdx, 32(%r9) + adcq %rax, %rax + movq 48(%r9), %rdx + movq %rax, 40(%r9) + adcq %rdx, %rdx + movq 56(%r9), %rax + movq %rdx, 48(%r9) + adcq %rax, %rax + movq 64(%r9), %rdx + movq %rax, 56(%r9) + adcq %rdx, %rdx + movq 72(%r9), %rax + movq %rdx, 64(%r9) + adcq %rax, %rax + movq 80(%r9), %rdx + movq %rax, 72(%r9) + adcq %rdx, %rdx + movq 88(%r9), %rax + movq %rdx, 80(%r9) + adcq %rax, %rax + movq 96(%r9), %rdx + movq %rax, 88(%r9) + adcq %rdx, %rdx + movq 104(%r9), %rax + movq %rdx, 96(%r9) + adcq %rax, %rax + movq 112(%r9), %rdx + movq %rax, 104(%r9) + adcq %rdx, %rdx + movq 120(%r9), %rax + movq %rdx, 112(%r9) + adcq %rax, %rax + movq 128(%r9), %rdx + movq %rax, 120(%r9) + adcq %rdx, %rdx + movq 136(%r9), %rax + movq %rdx, 128(%r9) + adcq %rax, %rax + movq 144(%r9), %rdx + movq %rax, 136(%r9) + adcq %rdx, %rdx + movq 152(%r9), %rax + movq %rdx, 144(%r9) + adcq %rax, %rax + movq 160(%r9), %rdx + movq %rax, 152(%r9) + adcq %rdx, %rdx + movq 168(%r9), %rax + movq %rdx, 160(%r9) + adcq %rax, %rax + movq 176(%r9), %rdx + movq %rax, 168(%r9) + adcq %rdx, %rdx + movq 184(%r9), %rax + movq %rdx, 176(%r9) + adcq %rax, %rax + movq %rax, 184(%r9) + adcq $0, %rcx + leaq 384(%rsp), %rsi + movq %rsp, %r8 + movq (%r8), %rdx + subq (%rsi), %rdx + movq 8(%r8), %rax + movq %rdx, (%r8) + sbbq 8(%rsi), %rax + movq 16(%r8), %rdx + movq %rax, 8(%r8) + sbbq 16(%rsi), %rdx + movq 24(%r8), %rax + movq %rdx, 16(%r8) + sbbq 24(%rsi), %rax + movq 32(%r8), %rdx + movq %rax, 24(%r8) + sbbq 32(%rsi), %rdx + movq 40(%r8), %rax + movq %rdx, 32(%r8) + sbbq 40(%rsi), %rax + movq 48(%r8), %rdx + movq %rax, 40(%r8) + sbbq 48(%rsi), %rdx + movq 56(%r8), %rax + movq %rdx, 48(%r8) + sbbq 56(%rsi), %rax + movq 64(%r8), %rdx + movq %rax, 56(%r8) + sbbq 64(%rsi), %rdx + movq 72(%r8), %rax + movq %rdx, 64(%r8) + sbbq 72(%rsi), %rax + movq 80(%r8), %rdx + movq %rax, 72(%r8) + sbbq 80(%rsi), %rdx + movq 88(%r8), %rax + movq %rdx, 80(%r8) + sbbq 88(%rsi), %rax + movq 96(%r8), %rdx + movq %rax, 88(%r8) + sbbq 96(%rsi), %rdx + movq 104(%r8), %rax + movq %rdx, 96(%r8) + sbbq 104(%rsi), %rax + movq 112(%r8), %rdx + movq %rax, 104(%r8) + sbbq 112(%rsi), %rdx + movq 120(%r8), %rax + movq %rdx, 112(%r8) + sbbq 120(%rsi), %rax + movq 128(%r8), %rdx + movq %rax, 120(%r8) + sbbq 128(%rsi), %rdx + movq 136(%r8), %rax + movq %rdx, 128(%r8) + sbbq 136(%rsi), %rax + movq 144(%r8), %rdx + movq %rax, 136(%r8) + sbbq 144(%rsi), %rdx + movq 152(%r8), %rax + movq %rdx, 144(%r8) + sbbq 152(%rsi), %rax + movq 160(%r8), %rdx + movq %rax, 152(%r8) + sbbq 160(%rsi), %rdx + movq 168(%r8), %rax + movq %rdx, 160(%r8) + sbbq 168(%rsi), %rax + movq 176(%r8), %rdx + movq %rax, 168(%r8) + sbbq 176(%rsi), %rdx + movq 184(%r8), %rax + movq %rdx, 176(%r8) + sbbq 184(%rsi), %rax + movq 192(%r8), %rdx + movq %rax, 184(%r8) + sbbq 192(%rsi), %rdx + movq 200(%r8), %rax + movq %rdx, 192(%r8) + sbbq 200(%rsi), %rax + movq 208(%r8), %rdx + movq %rax, 200(%r8) + sbbq 208(%rsi), %rdx + movq 216(%r8), %rax + movq %rdx, 208(%r8) + sbbq 216(%rsi), %rax + movq 224(%r8), %rdx + movq %rax, 216(%r8) + sbbq 224(%rsi), %rdx + movq 232(%r8), %rax + movq %rdx, 224(%r8) + sbbq 232(%rsi), %rax + movq 240(%r8), %rdx + movq %rax, 232(%r8) + sbbq 240(%rsi), %rdx + movq 248(%r8), %rax + movq %rdx, 240(%r8) + sbbq 248(%rsi), %rax + movq 256(%r8), %rdx + movq %rax, 248(%r8) + sbbq 256(%rsi), %rdx + movq 264(%r8), %rax + movq %rdx, 256(%r8) + sbbq 264(%rsi), %rax + movq 272(%r8), %rdx + movq %rax, 264(%r8) + sbbq 272(%rsi), %rdx + movq 280(%r8), %rax + movq %rdx, 272(%r8) + sbbq 280(%rsi), %rax + movq 288(%r8), %rdx + movq %rax, 280(%r8) + sbbq 288(%rsi), %rdx + movq 296(%r8), %rax + movq %rdx, 288(%r8) + sbbq 296(%rsi), %rax + movq 304(%r8), %rdx + movq %rax, 296(%r8) + sbbq 304(%rsi), %rdx + movq 312(%r8), %rax + movq %rdx, 304(%r8) + sbbq 312(%rsi), %rax + movq 320(%r8), %rdx + movq %rax, 312(%r8) + sbbq 320(%rsi), %rdx + movq 328(%r8), %rax + movq %rdx, 320(%r8) + sbbq 328(%rsi), %rax + movq 336(%r8), %rdx + movq %rax, 328(%r8) + sbbq 336(%rsi), %rdx + movq 344(%r8), %rax + movq %rdx, 336(%r8) + sbbq 344(%rsi), %rax + movq 352(%r8), %rdx + movq %rax, 344(%r8) + sbbq 352(%rsi), %rdx + movq 360(%r8), %rax + movq %rdx, 352(%r8) + sbbq 360(%rsi), %rax + movq 368(%r8), %rdx + movq %rax, 360(%r8) + sbbq 368(%rsi), %rdx + movq 376(%r8), %rax + movq %rdx, 368(%r8) + sbbq 376(%rsi), %rax + movq %rax, 376(%r8) + sbbq $0, %rcx + movq (%r8), %rdx + subq (%rdi), %rdx + movq 8(%r8), %rax + movq %rdx, (%r8) + sbbq 8(%rdi), %rax + movq 16(%r8), %rdx + movq %rax, 8(%r8) + sbbq 16(%rdi), %rdx + movq 24(%r8), %rax + movq %rdx, 16(%r8) + sbbq 24(%rdi), %rax + movq 32(%r8), %rdx + movq %rax, 24(%r8) + sbbq 32(%rdi), %rdx + movq 40(%r8), %rax + movq %rdx, 32(%r8) + sbbq 40(%rdi), %rax + movq 48(%r8), %rdx + movq %rax, 40(%r8) + sbbq 48(%rdi), %rdx + movq 56(%r8), %rax + movq %rdx, 48(%r8) + sbbq 56(%rdi), %rax + movq 64(%r8), %rdx + movq %rax, 56(%r8) + sbbq 64(%rdi), %rdx + movq 72(%r8), %rax + movq %rdx, 64(%r8) + sbbq 72(%rdi), %rax + movq 80(%r8), %rdx + movq %rax, 72(%r8) + sbbq 80(%rdi), %rdx + movq 88(%r8), %rax + movq %rdx, 80(%r8) + sbbq 88(%rdi), %rax + movq 96(%r8), %rdx + movq %rax, 88(%r8) + sbbq 96(%rdi), %rdx + movq 104(%r8), %rax + movq %rdx, 96(%r8) + sbbq 104(%rdi), %rax + movq 112(%r8), %rdx + movq %rax, 104(%r8) + sbbq 112(%rdi), %rdx + movq 120(%r8), %rax + movq %rdx, 112(%r8) + sbbq 120(%rdi), %rax + movq 128(%r8), %rdx + movq %rax, 120(%r8) + sbbq 128(%rdi), %rdx + movq 136(%r8), %rax + movq %rdx, 128(%r8) + sbbq 136(%rdi), %rax + movq 144(%r8), %rdx + movq %rax, 136(%r8) + sbbq 144(%rdi), %rdx + movq 152(%r8), %rax + movq %rdx, 144(%r8) + sbbq 152(%rdi), %rax + movq 160(%r8), %rdx + movq %rax, 152(%r8) + sbbq 160(%rdi), %rdx + movq 168(%r8), %rax + movq %rdx, 160(%r8) + sbbq 168(%rdi), %rax + movq 176(%r8), %rdx + movq %rax, 168(%r8) + sbbq 176(%rdi), %rdx + movq 184(%r8), %rax + movq %rdx, 176(%r8) + sbbq 184(%rdi), %rax + movq 192(%r8), %rdx + movq %rax, 184(%r8) + sbbq 192(%rdi), %rdx + movq 200(%r8), %rax + movq %rdx, 192(%r8) + sbbq 200(%rdi), %rax + movq 208(%r8), %rdx + movq %rax, 200(%r8) + sbbq 208(%rdi), %rdx + movq 216(%r8), %rax + movq %rdx, 208(%r8) + sbbq 216(%rdi), %rax + movq 224(%r8), %rdx + movq %rax, 216(%r8) + sbbq 224(%rdi), %rdx + movq 232(%r8), %rax + movq %rdx, 224(%r8) + sbbq 232(%rdi), %rax + movq 240(%r8), %rdx + movq %rax, 232(%r8) + sbbq 240(%rdi), %rdx + movq 248(%r8), %rax + movq %rdx, 240(%r8) + sbbq 248(%rdi), %rax + movq 256(%r8), %rdx + movq %rax, 248(%r8) + sbbq 256(%rdi), %rdx + movq 264(%r8), %rax + movq %rdx, 256(%r8) + sbbq 264(%rdi), %rax + movq 272(%r8), %rdx + movq %rax, 264(%r8) + sbbq 272(%rdi), %rdx + movq 280(%r8), %rax + movq %rdx, 272(%r8) + sbbq 280(%rdi), %rax + movq 288(%r8), %rdx + movq %rax, 280(%r8) + sbbq 288(%rdi), %rdx + movq 296(%r8), %rax + movq %rdx, 288(%r8) + sbbq 296(%rdi), %rax + movq 304(%r8), %rdx + movq %rax, 296(%r8) + sbbq 304(%rdi), %rdx + movq 312(%r8), %rax + movq %rdx, 304(%r8) + sbbq 312(%rdi), %rax + movq 320(%r8), %rdx + movq %rax, 312(%r8) + sbbq 320(%rdi), %rdx + movq 328(%r8), %rax + movq %rdx, 320(%r8) + sbbq 328(%rdi), %rax + movq 336(%r8), %rdx + movq %rax, 328(%r8) + sbbq 336(%rdi), %rdx + movq 344(%r8), %rax + movq %rdx, 336(%r8) + sbbq 344(%rdi), %rax + movq 352(%r8), %rdx + movq %rax, 344(%r8) + sbbq 352(%rdi), %rdx + movq 360(%r8), %rax + movq %rdx, 352(%r8) + sbbq 360(%rdi), %rax + movq 368(%r8), %rdx + movq %rax, 360(%r8) + sbbq 368(%rdi), %rdx + movq 376(%r8), %rax + movq %rdx, 368(%r8) + sbbq 376(%rdi), %rax + movq %rax, 376(%r8) + sbbq $0, %rcx + subq $192, %r9 + # Add in place + movq (%r9), %rdx + addq (%r8), %rdx + movq 8(%r9), %rax + movq %rdx, (%r9) + adcq 8(%r8), %rax + movq 16(%r9), %rdx + movq %rax, 8(%r9) + adcq 16(%r8), %rdx + movq 24(%r9), %rax + movq %rdx, 16(%r9) + adcq 24(%r8), %rax + movq 32(%r9), %rdx + movq %rax, 24(%r9) + adcq 32(%r8), %rdx + movq 40(%r9), %rax + movq %rdx, 32(%r9) + adcq 40(%r8), %rax + movq 48(%r9), %rdx + movq %rax, 40(%r9) + adcq 48(%r8), %rdx + movq 56(%r9), %rax + movq %rdx, 48(%r9) + adcq 56(%r8), %rax + movq 64(%r9), %rdx + movq %rax, 56(%r9) + adcq 64(%r8), %rdx + movq 72(%r9), %rax + movq %rdx, 64(%r9) + adcq 72(%r8), %rax + movq 80(%r9), %rdx + movq %rax, 72(%r9) + adcq 80(%r8), %rdx + movq 88(%r9), %rax + movq %rdx, 80(%r9) + adcq 88(%r8), %rax + movq 96(%r9), %rdx + movq %rax, 88(%r9) + adcq 96(%r8), %rdx + movq 104(%r9), %rax + movq %rdx, 96(%r9) + adcq 104(%r8), %rax + movq 112(%r9), %rdx + movq %rax, 104(%r9) + adcq 112(%r8), %rdx + movq 120(%r9), %rax + movq %rdx, 112(%r9) + adcq 120(%r8), %rax + movq 128(%r9), %rdx + movq %rax, 120(%r9) + adcq 128(%r8), %rdx + movq 136(%r9), %rax + movq %rdx, 128(%r9) + adcq 136(%r8), %rax + movq 144(%r9), %rdx + movq %rax, 136(%r9) + adcq 144(%r8), %rdx + movq 152(%r9), %rax + movq %rdx, 144(%r9) + adcq 152(%r8), %rax + movq 160(%r9), %rdx + movq %rax, 152(%r9) + adcq 160(%r8), %rdx + movq 168(%r9), %rax + movq %rdx, 160(%r9) + adcq 168(%r8), %rax + movq 176(%r9), %rdx + movq %rax, 168(%r9) + adcq 176(%r8), %rdx + movq 184(%r9), %rax + movq %rdx, 176(%r9) + adcq 184(%r8), %rax + movq 192(%r9), %rdx + movq %rax, 184(%r9) + adcq 192(%r8), %rdx + movq 200(%r9), %rax + movq %rdx, 192(%r9) + adcq 200(%r8), %rax + movq 208(%r9), %rdx + movq %rax, 200(%r9) + adcq 208(%r8), %rdx + movq 216(%r9), %rax + movq %rdx, 208(%r9) + adcq 216(%r8), %rax + movq 224(%r9), %rdx + movq %rax, 216(%r9) + adcq 224(%r8), %rdx + movq 232(%r9), %rax + movq %rdx, 224(%r9) + adcq 232(%r8), %rax + movq 240(%r9), %rdx + movq %rax, 232(%r9) + adcq 240(%r8), %rdx + movq 248(%r9), %rax + movq %rdx, 240(%r9) + adcq 248(%r8), %rax + movq 256(%r9), %rdx + movq %rax, 248(%r9) + adcq 256(%r8), %rdx + movq 264(%r9), %rax + movq %rdx, 256(%r9) + adcq 264(%r8), %rax + movq 272(%r9), %rdx + movq %rax, 264(%r9) + adcq 272(%r8), %rdx + movq 280(%r9), %rax + movq %rdx, 272(%r9) + adcq 280(%r8), %rax + movq 288(%r9), %rdx + movq %rax, 280(%r9) + adcq 288(%r8), %rdx + movq 296(%r9), %rax + movq %rdx, 288(%r9) + adcq 296(%r8), %rax + movq 304(%r9), %rdx + movq %rax, 296(%r9) + adcq 304(%r8), %rdx + movq 312(%r9), %rax + movq %rdx, 304(%r9) + adcq 312(%r8), %rax + movq 320(%r9), %rdx + movq %rax, 312(%r9) + adcq 320(%r8), %rdx + movq 328(%r9), %rax + movq %rdx, 320(%r9) + adcq 328(%r8), %rax + movq 336(%r9), %rdx + movq %rax, 328(%r9) + adcq 336(%r8), %rdx + movq 344(%r9), %rax + movq %rdx, 336(%r9) + adcq 344(%r8), %rax + movq 352(%r9), %rdx + movq %rax, 344(%r9) + adcq 352(%r8), %rdx + movq 360(%r9), %rax + movq %rdx, 352(%r9) + adcq 360(%r8), %rax + movq 368(%r9), %rdx + movq %rax, 360(%r9) + adcq 368(%r8), %rdx + movq 376(%r9), %rax + movq %rdx, 368(%r9) + adcq 376(%r8), %rax + movq %rax, 376(%r9) + adcq $0, %rcx + movq %rcx, 576(%rdi) + # Add in place + movq 192(%r9), %rdx + addq (%rsi), %rdx + movq 200(%r9), %rax + movq %rdx, 192(%r9) + adcq 8(%rsi), %rax + movq 208(%r9), %rdx + movq %rax, 200(%r9) + adcq 16(%rsi), %rdx + movq 216(%r9), %rax + movq %rdx, 208(%r9) + adcq 24(%rsi), %rax + movq 224(%r9), %rdx + movq %rax, 216(%r9) + adcq 32(%rsi), %rdx + movq 232(%r9), %rax + movq %rdx, 224(%r9) + adcq 40(%rsi), %rax + movq 240(%r9), %rdx + movq %rax, 232(%r9) + adcq 48(%rsi), %rdx + movq 248(%r9), %rax + movq %rdx, 240(%r9) + adcq 56(%rsi), %rax + movq 256(%r9), %rdx + movq %rax, 248(%r9) + adcq 64(%rsi), %rdx + movq 264(%r9), %rax + movq %rdx, 256(%r9) + adcq 72(%rsi), %rax + movq 272(%r9), %rdx + movq %rax, 264(%r9) + adcq 80(%rsi), %rdx + movq 280(%r9), %rax + movq %rdx, 272(%r9) + adcq 88(%rsi), %rax + movq 288(%r9), %rdx + movq %rax, 280(%r9) + adcq 96(%rsi), %rdx + movq 296(%r9), %rax + movq %rdx, 288(%r9) + adcq 104(%rsi), %rax + movq 304(%r9), %rdx + movq %rax, 296(%r9) + adcq 112(%rsi), %rdx + movq 312(%r9), %rax + movq %rdx, 304(%r9) + adcq 120(%rsi), %rax + movq 320(%r9), %rdx + movq %rax, 312(%r9) + adcq 128(%rsi), %rdx + movq 328(%r9), %rax + movq %rdx, 320(%r9) + adcq 136(%rsi), %rax + movq 336(%r9), %rdx + movq %rax, 328(%r9) + adcq 144(%rsi), %rdx + movq 344(%r9), %rax + movq %rdx, 336(%r9) + adcq 152(%rsi), %rax + movq 352(%r9), %rdx + movq %rax, 344(%r9) + adcq 160(%rsi), %rdx + movq 360(%r9), %rax + movq %rdx, 352(%r9) + adcq 168(%rsi), %rax + movq 368(%r9), %rdx + movq %rax, 360(%r9) + adcq 176(%rsi), %rdx + movq 376(%r9), %rax + movq %rdx, 368(%r9) + adcq 184(%rsi), %rax + movq 384(%r9), %rdx + movq %rax, 376(%r9) + adcq 192(%rsi), %rdx + movq %rdx, 384(%r9) + # Add to zero + movq 200(%rsi), %rdx + adcq $0, %rdx + movq 208(%rsi), %rax + movq %rdx, 392(%r9) + adcq $0, %rax + movq 216(%rsi), %rdx + movq %rax, 400(%r9) + adcq $0, %rdx + movq 224(%rsi), %rax + movq %rdx, 408(%r9) + adcq $0, %rax + movq 232(%rsi), %rdx + movq %rax, 416(%r9) + adcq $0, %rdx + movq 240(%rsi), %rax + movq %rdx, 424(%r9) + adcq $0, %rax + movq 248(%rsi), %rdx + movq %rax, 432(%r9) + adcq $0, %rdx + movq 256(%rsi), %rax + movq %rdx, 440(%r9) + adcq $0, %rax + movq 264(%rsi), %rdx + movq %rax, 448(%r9) + adcq $0, %rdx + movq 272(%rsi), %rax + movq %rdx, 456(%r9) + adcq $0, %rax + movq 280(%rsi), %rdx + movq %rax, 464(%r9) + adcq $0, %rdx + movq 288(%rsi), %rax + movq %rdx, 472(%r9) + adcq $0, %rax + movq 296(%rsi), %rdx + movq %rax, 480(%r9) + adcq $0, %rdx + movq 304(%rsi), %rax + movq %rdx, 488(%r9) + adcq $0, %rax + movq 312(%rsi), %rdx + movq %rax, 496(%r9) + adcq $0, %rdx + movq 320(%rsi), %rax + movq %rdx, 504(%r9) + adcq $0, %rax + movq 328(%rsi), %rdx + movq %rax, 512(%r9) + adcq $0, %rdx + movq 336(%rsi), %rax + movq %rdx, 520(%r9) + adcq $0, %rax + movq 344(%rsi), %rdx + movq %rax, 528(%r9) + adcq $0, %rdx + movq 352(%rsi), %rax + movq %rdx, 536(%r9) + adcq $0, %rax + movq 360(%rsi), %rdx + movq %rax, 544(%r9) + adcq $0, %rdx + movq 368(%rsi), %rax + movq %rdx, 552(%r9) + adcq $0, %rax + movq 376(%rsi), %rdx + movq %rax, 560(%r9) + adcq $0, %rdx + movq %rdx, 568(%r9) + addq $984, %rsp + repz retq +#ifndef __APPLE__ +.size sp_3072_sqr_48,.-sp_3072_sqr_48 +#endif /* __APPLE__ */ +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_3072_mul_avx2_48 +.type sp_3072_mul_avx2_48,@function +.align 16 +sp_3072_mul_avx2_48: +#else +.globl _sp_3072_mul_avx2_48 +.p2align 4 +_sp_3072_mul_avx2_48: +#endif /* __APPLE__ */ + push %r12 + push %r13 + push %r14 + push %r15 + subq $1192, %rsp + movq %rdi, 1152(%rsp) + movq %rsi, 1160(%rsp) + movq %rdx, 1168(%rsp) + leaq 768(%rsp), %r10 + leaq 192(%rsi), %r12 + # Add + movq (%rsi), %rax + xorq %r13, %r13 + addq (%r12), %rax + movq 8(%rsi), %rcx + movq %rax, (%r10) + adcq 8(%r12), %rcx + movq 16(%rsi), %r8 + movq %rcx, 8(%r10) + adcq 16(%r12), %r8 + movq 24(%rsi), %rax + movq %r8, 16(%r10) + adcq 24(%r12), %rax + movq 32(%rsi), %rcx + movq %rax, 24(%r10) + adcq 32(%r12), %rcx + movq 40(%rsi), %r8 + movq %rcx, 32(%r10) + adcq 40(%r12), %r8 + movq 48(%rsi), %rax + movq %r8, 40(%r10) + adcq 48(%r12), %rax + movq 56(%rsi), %rcx + movq %rax, 48(%r10) + adcq 56(%r12), %rcx + movq 64(%rsi), %r8 + movq %rcx, 56(%r10) + adcq 64(%r12), %r8 + movq 72(%rsi), %rax + movq %r8, 64(%r10) + adcq 72(%r12), %rax + movq 80(%rsi), %rcx + movq %rax, 72(%r10) + adcq 80(%r12), %rcx + movq 88(%rsi), %r8 + movq %rcx, 80(%r10) + adcq 88(%r12), %r8 + movq 96(%rsi), %rax + movq %r8, 88(%r10) + adcq 96(%r12), %rax + movq 104(%rsi), %rcx + movq %rax, 96(%r10) + adcq 104(%r12), %rcx + movq 112(%rsi), %r8 + movq %rcx, 104(%r10) + adcq 112(%r12), %r8 + movq 120(%rsi), %rax + movq %r8, 112(%r10) + adcq 120(%r12), %rax + movq 128(%rsi), %rcx + movq %rax, 120(%r10) + adcq 128(%r12), %rcx + movq 136(%rsi), %r8 + movq %rcx, 128(%r10) + adcq 136(%r12), %r8 + movq 144(%rsi), %rax + movq %r8, 136(%r10) + adcq 144(%r12), %rax + movq 152(%rsi), %rcx + movq %rax, 144(%r10) + adcq 152(%r12), %rcx + movq 160(%rsi), %r8 + movq %rcx, 152(%r10) + adcq 160(%r12), %r8 + movq 168(%rsi), %rax + movq %r8, 160(%r10) + adcq 168(%r12), %rax + movq 176(%rsi), %rcx + movq %rax, 168(%r10) + adcq 176(%r12), %rcx + movq 184(%rsi), %r8 + movq %rcx, 176(%r10) + adcq 184(%r12), %r8 + movq %r8, 184(%r10) + adcq $0, %r13 + movq %r13, 1176(%rsp) + leaq 960(%rsp), %r11 + leaq 192(%rdx), %r12 + # Add + movq (%rdx), %rax + xorq %r14, %r14 + addq (%r12), %rax + movq 8(%rdx), %rcx + movq %rax, (%r11) + adcq 8(%r12), %rcx + movq 16(%rdx), %r8 + movq %rcx, 8(%r11) + adcq 16(%r12), %r8 + movq 24(%rdx), %rax + movq %r8, 16(%r11) + adcq 24(%r12), %rax + movq 32(%rdx), %rcx + movq %rax, 24(%r11) + adcq 32(%r12), %rcx + movq 40(%rdx), %r8 + movq %rcx, 32(%r11) + adcq 40(%r12), %r8 + movq 48(%rdx), %rax + movq %r8, 40(%r11) + adcq 48(%r12), %rax + movq 56(%rdx), %rcx + movq %rax, 48(%r11) + adcq 56(%r12), %rcx + movq 64(%rdx), %r8 + movq %rcx, 56(%r11) + adcq 64(%r12), %r8 + movq 72(%rdx), %rax + movq %r8, 64(%r11) + adcq 72(%r12), %rax + movq 80(%rdx), %rcx + movq %rax, 72(%r11) + adcq 80(%r12), %rcx + movq 88(%rdx), %r8 + movq %rcx, 80(%r11) + adcq 88(%r12), %r8 + movq 96(%rdx), %rax + movq %r8, 88(%r11) + adcq 96(%r12), %rax + movq 104(%rdx), %rcx + movq %rax, 96(%r11) + adcq 104(%r12), %rcx + movq 112(%rdx), %r8 + movq %rcx, 104(%r11) + adcq 112(%r12), %r8 + movq 120(%rdx), %rax + movq %r8, 112(%r11) + adcq 120(%r12), %rax + movq 128(%rdx), %rcx + movq %rax, 120(%r11) + adcq 128(%r12), %rcx + movq 136(%rdx), %r8 + movq %rcx, 128(%r11) + adcq 136(%r12), %r8 + movq 144(%rdx), %rax + movq %r8, 136(%r11) + adcq 144(%r12), %rax + movq 152(%rdx), %rcx + movq %rax, 144(%r11) + adcq 152(%r12), %rcx + movq 160(%rdx), %r8 + movq %rcx, 152(%r11) + adcq 160(%r12), %r8 + movq 168(%rdx), %rax + movq %r8, 160(%r11) + adcq 168(%r12), %rax + movq 176(%rdx), %rcx + movq %rax, 168(%r11) + adcq 176(%r12), %rcx + movq 184(%rdx), %r8 + movq %rcx, 176(%r11) + adcq 184(%r12), %r8 + movq %r8, 184(%r11) + adcq $0, %r14 + movq %r14, 1184(%rsp) + movq %r11, %rdx + movq %r10, %rsi + movq %rsp, %rdi +#ifndef __APPLE__ + callq sp_3072_mul_avx2_24@plt +#else + callq _sp_3072_mul_avx2_24 +#endif /* __APPLE__ */ + movq 1168(%rsp), %rdx + movq 1160(%rsp), %rsi + leaq 384(%rsp), %rdi + addq $192, %rdx + addq $192, %rsi +#ifndef __APPLE__ + callq sp_3072_mul_avx2_24@plt +#else + callq _sp_3072_mul_avx2_24 +#endif /* __APPLE__ */ + movq 1168(%rsp), %rdx + movq 1160(%rsp), %rsi + movq 1152(%rsp), %rdi +#ifndef __APPLE__ + callq sp_3072_mul_avx2_24@plt +#else + callq _sp_3072_mul_avx2_24 +#endif /* __APPLE__ */ + movq 1176(%rsp), %r13 + movq 1184(%rsp), %r14 + movq 1152(%rsp), %r15 + movq %r13, %r9 + leaq 768(%rsp), %r10 + leaq 960(%rsp), %r11 + andq %r14, %r9 + negq %r13 + negq %r14 + addq $384, %r15 + movq (%r10), %rax + movq (%r11), %rcx + pextq %r14, %rax, %rax + pextq %r13, %rcx, %rcx + addq %rcx, %rax + movq 8(%r10), %rcx + movq 8(%r11), %r8 + pextq %r14, %rcx, %rcx + pextq %r13, %r8, %r8 + movq %rax, (%r15) + adcq %r8, %rcx + movq 16(%r10), %r8 + movq 16(%r11), %rax + pextq %r14, %r8, %r8 + pextq %r13, %rax, %rax + movq %rcx, 8(%r15) + adcq %rax, %r8 + movq 24(%r10), %rax + movq 24(%r11), %rcx + pextq %r14, %rax, %rax + pextq %r13, %rcx, %rcx + movq %r8, 16(%r15) + adcq %rcx, %rax + movq 32(%r10), %rcx + movq 32(%r11), %r8 + pextq %r14, %rcx, %rcx + pextq %r13, %r8, %r8 + movq %rax, 24(%r15) + adcq %r8, %rcx + movq 40(%r10), %r8 + movq 40(%r11), %rax + pextq %r14, %r8, %r8 + pextq %r13, %rax, %rax + movq %rcx, 32(%r15) + adcq %rax, %r8 + movq 48(%r10), %rax + movq 48(%r11), %rcx + pextq %r14, %rax, %rax + pextq %r13, %rcx, %rcx + movq %r8, 40(%r15) + adcq %rcx, %rax + movq 56(%r10), %rcx + movq 56(%r11), %r8 + pextq %r14, %rcx, %rcx + pextq %r13, %r8, %r8 + movq %rax, 48(%r15) + adcq %r8, %rcx + movq 64(%r10), %r8 + movq 64(%r11), %rax + pextq %r14, %r8, %r8 + pextq %r13, %rax, %rax + movq %rcx, 56(%r15) + adcq %rax, %r8 + movq 72(%r10), %rax + movq 72(%r11), %rcx + pextq %r14, %rax, %rax + pextq %r13, %rcx, %rcx + movq %r8, 64(%r15) + adcq %rcx, %rax + movq 80(%r10), %rcx + movq 80(%r11), %r8 + pextq %r14, %rcx, %rcx + pextq %r13, %r8, %r8 + movq %rax, 72(%r15) + adcq %r8, %rcx + movq 88(%r10), %r8 + movq 88(%r11), %rax + pextq %r14, %r8, %r8 + pextq %r13, %rax, %rax + movq %rcx, 80(%r15) + adcq %rax, %r8 + movq 96(%r10), %rax + movq 96(%r11), %rcx + pextq %r14, %rax, %rax + pextq %r13, %rcx, %rcx + movq %r8, 88(%r15) + adcq %rcx, %rax + movq 104(%r10), %rcx + movq 104(%r11), %r8 + pextq %r14, %rcx, %rcx + pextq %r13, %r8, %r8 + movq %rax, 96(%r15) + adcq %r8, %rcx + movq 112(%r10), %r8 + movq 112(%r11), %rax + pextq %r14, %r8, %r8 + pextq %r13, %rax, %rax + movq %rcx, 104(%r15) + adcq %rax, %r8 + movq 120(%r10), %rax + movq 120(%r11), %rcx + pextq %r14, %rax, %rax + pextq %r13, %rcx, %rcx + movq %r8, 112(%r15) + adcq %rcx, %rax + movq 128(%r10), %rcx + movq 128(%r11), %r8 + pextq %r14, %rcx, %rcx + pextq %r13, %r8, %r8 + movq %rax, 120(%r15) + adcq %r8, %rcx + movq 136(%r10), %r8 + movq 136(%r11), %rax + pextq %r14, %r8, %r8 + pextq %r13, %rax, %rax + movq %rcx, 128(%r15) + adcq %rax, %r8 + movq 144(%r10), %rax + movq 144(%r11), %rcx + pextq %r14, %rax, %rax + pextq %r13, %rcx, %rcx + movq %r8, 136(%r15) + adcq %rcx, %rax + movq 152(%r10), %rcx + movq 152(%r11), %r8 + pextq %r14, %rcx, %rcx + pextq %r13, %r8, %r8 + movq %rax, 144(%r15) + adcq %r8, %rcx + movq 160(%r10), %r8 + movq 160(%r11), %rax + pextq %r14, %r8, %r8 + pextq %r13, %rax, %rax + movq %rcx, 152(%r15) + adcq %rax, %r8 + movq 168(%r10), %rax + movq 168(%r11), %rcx + pextq %r14, %rax, %rax + pextq %r13, %rcx, %rcx + movq %r8, 160(%r15) + adcq %rcx, %rax + movq 176(%r10), %rcx + movq 176(%r11), %r8 + pextq %r14, %rcx, %rcx + pextq %r13, %r8, %r8 + movq %rax, 168(%r15) + adcq %r8, %rcx + movq 184(%r10), %r8 + movq 184(%r11), %rax + pextq %r14, %r8, %r8 + pextq %r13, %rax, %rax + movq %rcx, 176(%r15) + adcq %rax, %r8 + movq %r8, 184(%r15) + adcq $0, %r9 + leaq 384(%rsp), %r11 + movq %rsp, %r10 + movq (%r10), %rax + subq (%r11), %rax + movq 8(%r10), %rcx + movq %rax, (%r10) + sbbq 8(%r11), %rcx + movq 16(%r10), %r8 + movq %rcx, 8(%r10) + sbbq 16(%r11), %r8 + movq 24(%r10), %rax + movq %r8, 16(%r10) + sbbq 24(%r11), %rax + movq 32(%r10), %rcx + movq %rax, 24(%r10) + sbbq 32(%r11), %rcx + movq 40(%r10), %r8 + movq %rcx, 32(%r10) + sbbq 40(%r11), %r8 + movq 48(%r10), %rax + movq %r8, 40(%r10) + sbbq 48(%r11), %rax + movq 56(%r10), %rcx + movq %rax, 48(%r10) + sbbq 56(%r11), %rcx + movq 64(%r10), %r8 + movq %rcx, 56(%r10) + sbbq 64(%r11), %r8 + movq 72(%r10), %rax + movq %r8, 64(%r10) + sbbq 72(%r11), %rax + movq 80(%r10), %rcx + movq %rax, 72(%r10) + sbbq 80(%r11), %rcx + movq 88(%r10), %r8 + movq %rcx, 80(%r10) + sbbq 88(%r11), %r8 + movq 96(%r10), %rax + movq %r8, 88(%r10) + sbbq 96(%r11), %rax + movq 104(%r10), %rcx + movq %rax, 96(%r10) + sbbq 104(%r11), %rcx + movq 112(%r10), %r8 + movq %rcx, 104(%r10) + sbbq 112(%r11), %r8 + movq 120(%r10), %rax + movq %r8, 112(%r10) + sbbq 120(%r11), %rax + movq 128(%r10), %rcx + movq %rax, 120(%r10) + sbbq 128(%r11), %rcx + movq 136(%r10), %r8 + movq %rcx, 128(%r10) + sbbq 136(%r11), %r8 + movq 144(%r10), %rax + movq %r8, 136(%r10) + sbbq 144(%r11), %rax + movq 152(%r10), %rcx + movq %rax, 144(%r10) + sbbq 152(%r11), %rcx + movq 160(%r10), %r8 + movq %rcx, 152(%r10) + sbbq 160(%r11), %r8 + movq 168(%r10), %rax + movq %r8, 160(%r10) + sbbq 168(%r11), %rax + movq 176(%r10), %rcx + movq %rax, 168(%r10) + sbbq 176(%r11), %rcx + movq 184(%r10), %r8 + movq %rcx, 176(%r10) + sbbq 184(%r11), %r8 + movq 192(%r10), %rax + movq %r8, 184(%r10) + sbbq 192(%r11), %rax + movq 200(%r10), %rcx + movq %rax, 192(%r10) + sbbq 200(%r11), %rcx + movq 208(%r10), %r8 + movq %rcx, 200(%r10) + sbbq 208(%r11), %r8 + movq 216(%r10), %rax + movq %r8, 208(%r10) + sbbq 216(%r11), %rax + movq 224(%r10), %rcx + movq %rax, 216(%r10) + sbbq 224(%r11), %rcx + movq 232(%r10), %r8 + movq %rcx, 224(%r10) + sbbq 232(%r11), %r8 + movq 240(%r10), %rax + movq %r8, 232(%r10) + sbbq 240(%r11), %rax + movq 248(%r10), %rcx + movq %rax, 240(%r10) + sbbq 248(%r11), %rcx + movq 256(%r10), %r8 + movq %rcx, 248(%r10) + sbbq 256(%r11), %r8 + movq 264(%r10), %rax + movq %r8, 256(%r10) + sbbq 264(%r11), %rax + movq 272(%r10), %rcx + movq %rax, 264(%r10) + sbbq 272(%r11), %rcx + movq 280(%r10), %r8 + movq %rcx, 272(%r10) + sbbq 280(%r11), %r8 + movq 288(%r10), %rax + movq %r8, 280(%r10) + sbbq 288(%r11), %rax + movq 296(%r10), %rcx + movq %rax, 288(%r10) + sbbq 296(%r11), %rcx + movq 304(%r10), %r8 + movq %rcx, 296(%r10) + sbbq 304(%r11), %r8 + movq 312(%r10), %rax + movq %r8, 304(%r10) + sbbq 312(%r11), %rax + movq 320(%r10), %rcx + movq %rax, 312(%r10) + sbbq 320(%r11), %rcx + movq 328(%r10), %r8 + movq %rcx, 320(%r10) + sbbq 328(%r11), %r8 + movq 336(%r10), %rax + movq %r8, 328(%r10) + sbbq 336(%r11), %rax + movq 344(%r10), %rcx + movq %rax, 336(%r10) + sbbq 344(%r11), %rcx + movq 352(%r10), %r8 + movq %rcx, 344(%r10) + sbbq 352(%r11), %r8 + movq 360(%r10), %rax + movq %r8, 352(%r10) + sbbq 360(%r11), %rax + movq 368(%r10), %rcx + movq %rax, 360(%r10) + sbbq 368(%r11), %rcx + movq 376(%r10), %r8 + movq %rcx, 368(%r10) + sbbq 376(%r11), %r8 + movq %r8, 376(%r10) + sbbq $0, %r9 + movq (%r10), %rax + subq (%rdi), %rax + movq 8(%r10), %rcx + movq %rax, (%r10) + sbbq 8(%rdi), %rcx + movq 16(%r10), %r8 + movq %rcx, 8(%r10) + sbbq 16(%rdi), %r8 + movq 24(%r10), %rax + movq %r8, 16(%r10) + sbbq 24(%rdi), %rax + movq 32(%r10), %rcx + movq %rax, 24(%r10) + sbbq 32(%rdi), %rcx + movq 40(%r10), %r8 + movq %rcx, 32(%r10) + sbbq 40(%rdi), %r8 + movq 48(%r10), %rax + movq %r8, 40(%r10) + sbbq 48(%rdi), %rax + movq 56(%r10), %rcx + movq %rax, 48(%r10) + sbbq 56(%rdi), %rcx + movq 64(%r10), %r8 + movq %rcx, 56(%r10) + sbbq 64(%rdi), %r8 + movq 72(%r10), %rax + movq %r8, 64(%r10) + sbbq 72(%rdi), %rax + movq 80(%r10), %rcx + movq %rax, 72(%r10) + sbbq 80(%rdi), %rcx + movq 88(%r10), %r8 + movq %rcx, 80(%r10) + sbbq 88(%rdi), %r8 + movq 96(%r10), %rax + movq %r8, 88(%r10) + sbbq 96(%rdi), %rax + movq 104(%r10), %rcx + movq %rax, 96(%r10) + sbbq 104(%rdi), %rcx + movq 112(%r10), %r8 + movq %rcx, 104(%r10) + sbbq 112(%rdi), %r8 + movq 120(%r10), %rax + movq %r8, 112(%r10) + sbbq 120(%rdi), %rax + movq 128(%r10), %rcx + movq %rax, 120(%r10) + sbbq 128(%rdi), %rcx + movq 136(%r10), %r8 + movq %rcx, 128(%r10) + sbbq 136(%rdi), %r8 + movq 144(%r10), %rax + movq %r8, 136(%r10) + sbbq 144(%rdi), %rax + movq 152(%r10), %rcx + movq %rax, 144(%r10) + sbbq 152(%rdi), %rcx + movq 160(%r10), %r8 + movq %rcx, 152(%r10) + sbbq 160(%rdi), %r8 + movq 168(%r10), %rax + movq %r8, 160(%r10) + sbbq 168(%rdi), %rax + movq 176(%r10), %rcx + movq %rax, 168(%r10) + sbbq 176(%rdi), %rcx + movq 184(%r10), %r8 + movq %rcx, 176(%r10) + sbbq 184(%rdi), %r8 + movq 192(%r10), %rax + movq %r8, 184(%r10) + sbbq 192(%rdi), %rax + movq 200(%r10), %rcx + movq %rax, 192(%r10) + sbbq 200(%rdi), %rcx + movq 208(%r10), %r8 + movq %rcx, 200(%r10) + sbbq 208(%rdi), %r8 + movq 216(%r10), %rax + movq %r8, 208(%r10) + sbbq 216(%rdi), %rax + movq 224(%r10), %rcx + movq %rax, 216(%r10) + sbbq 224(%rdi), %rcx + movq 232(%r10), %r8 + movq %rcx, 224(%r10) + sbbq 232(%rdi), %r8 + movq 240(%r10), %rax + movq %r8, 232(%r10) + sbbq 240(%rdi), %rax + movq 248(%r10), %rcx + movq %rax, 240(%r10) + sbbq 248(%rdi), %rcx + movq 256(%r10), %r8 + movq %rcx, 248(%r10) + sbbq 256(%rdi), %r8 + movq 264(%r10), %rax + movq %r8, 256(%r10) + sbbq 264(%rdi), %rax + movq 272(%r10), %rcx + movq %rax, 264(%r10) + sbbq 272(%rdi), %rcx + movq 280(%r10), %r8 + movq %rcx, 272(%r10) + sbbq 280(%rdi), %r8 + movq 288(%r10), %rax + movq %r8, 280(%r10) + sbbq 288(%rdi), %rax + movq 296(%r10), %rcx + movq %rax, 288(%r10) + sbbq 296(%rdi), %rcx + movq 304(%r10), %r8 + movq %rcx, 296(%r10) + sbbq 304(%rdi), %r8 + movq 312(%r10), %rax + movq %r8, 304(%r10) + sbbq 312(%rdi), %rax + movq 320(%r10), %rcx + movq %rax, 312(%r10) + sbbq 320(%rdi), %rcx + movq 328(%r10), %r8 + movq %rcx, 320(%r10) + sbbq 328(%rdi), %r8 + movq 336(%r10), %rax + movq %r8, 328(%r10) + sbbq 336(%rdi), %rax + movq 344(%r10), %rcx + movq %rax, 336(%r10) + sbbq 344(%rdi), %rcx + movq 352(%r10), %r8 + movq %rcx, 344(%r10) + sbbq 352(%rdi), %r8 + movq 360(%r10), %rax + movq %r8, 352(%r10) + sbbq 360(%rdi), %rax + movq 368(%r10), %rcx + movq %rax, 360(%r10) + sbbq 368(%rdi), %rcx + movq 376(%r10), %r8 + movq %rcx, 368(%r10) + sbbq 376(%rdi), %r8 + movq %r8, 376(%r10) + sbbq $0, %r9 + subq $192, %r15 + # Add + movq (%r15), %rax + addq (%r10), %rax + movq 8(%r15), %rcx + movq %rax, (%r15) + adcq 8(%r10), %rcx + movq 16(%r15), %r8 + movq %rcx, 8(%r15) + adcq 16(%r10), %r8 + movq 24(%r15), %rax + movq %r8, 16(%r15) + adcq 24(%r10), %rax + movq 32(%r15), %rcx + movq %rax, 24(%r15) + adcq 32(%r10), %rcx + movq 40(%r15), %r8 + movq %rcx, 32(%r15) + adcq 40(%r10), %r8 + movq 48(%r15), %rax + movq %r8, 40(%r15) + adcq 48(%r10), %rax + movq 56(%r15), %rcx + movq %rax, 48(%r15) + adcq 56(%r10), %rcx + movq 64(%r15), %r8 + movq %rcx, 56(%r15) + adcq 64(%r10), %r8 + movq 72(%r15), %rax + movq %r8, 64(%r15) + adcq 72(%r10), %rax + movq 80(%r15), %rcx + movq %rax, 72(%r15) + adcq 80(%r10), %rcx + movq 88(%r15), %r8 + movq %rcx, 80(%r15) + adcq 88(%r10), %r8 + movq 96(%r15), %rax + movq %r8, 88(%r15) + adcq 96(%r10), %rax + movq 104(%r15), %rcx + movq %rax, 96(%r15) + adcq 104(%r10), %rcx + movq 112(%r15), %r8 + movq %rcx, 104(%r15) + adcq 112(%r10), %r8 + movq 120(%r15), %rax + movq %r8, 112(%r15) + adcq 120(%r10), %rax + movq 128(%r15), %rcx + movq %rax, 120(%r15) + adcq 128(%r10), %rcx + movq 136(%r15), %r8 + movq %rcx, 128(%r15) + adcq 136(%r10), %r8 + movq 144(%r15), %rax + movq %r8, 136(%r15) + adcq 144(%r10), %rax + movq 152(%r15), %rcx + movq %rax, 144(%r15) + adcq 152(%r10), %rcx + movq 160(%r15), %r8 + movq %rcx, 152(%r15) + adcq 160(%r10), %r8 + movq 168(%r15), %rax + movq %r8, 160(%r15) + adcq 168(%r10), %rax + movq 176(%r15), %rcx + movq %rax, 168(%r15) + adcq 176(%r10), %rcx + movq 184(%r15), %r8 + movq %rcx, 176(%r15) + adcq 184(%r10), %r8 + movq 192(%r15), %rax + movq %r8, 184(%r15) + adcq 192(%r10), %rax + movq 200(%r15), %rcx + movq %rax, 192(%r15) + adcq 200(%r10), %rcx + movq 208(%r15), %r8 + movq %rcx, 200(%r15) + adcq 208(%r10), %r8 + movq 216(%r15), %rax + movq %r8, 208(%r15) + adcq 216(%r10), %rax + movq 224(%r15), %rcx + movq %rax, 216(%r15) + adcq 224(%r10), %rcx + movq 232(%r15), %r8 + movq %rcx, 224(%r15) + adcq 232(%r10), %r8 + movq 240(%r15), %rax + movq %r8, 232(%r15) + adcq 240(%r10), %rax + movq 248(%r15), %rcx + movq %rax, 240(%r15) + adcq 248(%r10), %rcx + movq 256(%r15), %r8 + movq %rcx, 248(%r15) + adcq 256(%r10), %r8 + movq 264(%r15), %rax + movq %r8, 256(%r15) + adcq 264(%r10), %rax + movq 272(%r15), %rcx + movq %rax, 264(%r15) + adcq 272(%r10), %rcx + movq 280(%r15), %r8 + movq %rcx, 272(%r15) + adcq 280(%r10), %r8 + movq 288(%r15), %rax + movq %r8, 280(%r15) + adcq 288(%r10), %rax + movq 296(%r15), %rcx + movq %rax, 288(%r15) + adcq 296(%r10), %rcx + movq 304(%r15), %r8 + movq %rcx, 296(%r15) + adcq 304(%r10), %r8 + movq 312(%r15), %rax + movq %r8, 304(%r15) + adcq 312(%r10), %rax + movq 320(%r15), %rcx + movq %rax, 312(%r15) + adcq 320(%r10), %rcx + movq 328(%r15), %r8 + movq %rcx, 320(%r15) + adcq 328(%r10), %r8 + movq 336(%r15), %rax + movq %r8, 328(%r15) + adcq 336(%r10), %rax + movq 344(%r15), %rcx + movq %rax, 336(%r15) + adcq 344(%r10), %rcx + movq 352(%r15), %r8 + movq %rcx, 344(%r15) + adcq 352(%r10), %r8 + movq 360(%r15), %rax + movq %r8, 352(%r15) + adcq 360(%r10), %rax + movq 368(%r15), %rcx + movq %rax, 360(%r15) + adcq 368(%r10), %rcx + movq 376(%r15), %r8 + movq %rcx, 368(%r15) + adcq 376(%r10), %r8 + movq %r8, 376(%r15) + adcq $0, %r9 + movq %r9, 576(%rdi) + addq $192, %r15 + # Add + movq (%r15), %rax + addq (%r11), %rax + movq 8(%r15), %rcx + movq %rax, (%r15) + adcq 8(%r11), %rcx + movq 16(%r15), %r8 + movq %rcx, 8(%r15) + adcq 16(%r11), %r8 + movq 24(%r15), %rax + movq %r8, 16(%r15) + adcq 24(%r11), %rax + movq 32(%r15), %rcx + movq %rax, 24(%r15) + adcq 32(%r11), %rcx + movq 40(%r15), %r8 + movq %rcx, 32(%r15) + adcq 40(%r11), %r8 + movq 48(%r15), %rax + movq %r8, 40(%r15) + adcq 48(%r11), %rax + movq 56(%r15), %rcx + movq %rax, 48(%r15) + adcq 56(%r11), %rcx + movq 64(%r15), %r8 + movq %rcx, 56(%r15) + adcq 64(%r11), %r8 + movq 72(%r15), %rax + movq %r8, 64(%r15) + adcq 72(%r11), %rax + movq 80(%r15), %rcx + movq %rax, 72(%r15) + adcq 80(%r11), %rcx + movq 88(%r15), %r8 + movq %rcx, 80(%r15) + adcq 88(%r11), %r8 + movq 96(%r15), %rax + movq %r8, 88(%r15) + adcq 96(%r11), %rax + movq 104(%r15), %rcx + movq %rax, 96(%r15) + adcq 104(%r11), %rcx + movq 112(%r15), %r8 + movq %rcx, 104(%r15) + adcq 112(%r11), %r8 + movq 120(%r15), %rax + movq %r8, 112(%r15) + adcq 120(%r11), %rax + movq 128(%r15), %rcx + movq %rax, 120(%r15) + adcq 128(%r11), %rcx + movq 136(%r15), %r8 + movq %rcx, 128(%r15) + adcq 136(%r11), %r8 + movq 144(%r15), %rax + movq %r8, 136(%r15) + adcq 144(%r11), %rax + movq 152(%r15), %rcx + movq %rax, 144(%r15) + adcq 152(%r11), %rcx + movq 160(%r15), %r8 + movq %rcx, 152(%r15) + adcq 160(%r11), %r8 + movq 168(%r15), %rax + movq %r8, 160(%r15) + adcq 168(%r11), %rax + movq 176(%r15), %rcx + movq %rax, 168(%r15) + adcq 176(%r11), %rcx + movq 184(%r15), %r8 + movq %rcx, 176(%r15) + adcq 184(%r11), %r8 + movq 192(%r15), %rax + movq %r8, 184(%r15) + adcq 192(%r11), %rax + movq %rax, 192(%r15) + # Add to zero + movq 200(%r11), %rax + adcq $0, %rax + movq 208(%r11), %rcx + movq %rax, 200(%r15) + adcq $0, %rcx + movq 216(%r11), %r8 + movq %rcx, 208(%r15) + adcq $0, %r8 + movq 224(%r11), %rax + movq %r8, 216(%r15) + adcq $0, %rax + movq 232(%r11), %rcx + movq %rax, 224(%r15) + adcq $0, %rcx + movq 240(%r11), %r8 + movq %rcx, 232(%r15) + adcq $0, %r8 + movq 248(%r11), %rax + movq %r8, 240(%r15) + adcq $0, %rax + movq 256(%r11), %rcx + movq %rax, 248(%r15) + adcq $0, %rcx + movq 264(%r11), %r8 + movq %rcx, 256(%r15) + adcq $0, %r8 + movq 272(%r11), %rax + movq %r8, 264(%r15) + adcq $0, %rax + movq 280(%r11), %rcx + movq %rax, 272(%r15) + adcq $0, %rcx + movq 288(%r11), %r8 + movq %rcx, 280(%r15) + adcq $0, %r8 + movq 296(%r11), %rax + movq %r8, 288(%r15) + adcq $0, %rax + movq 304(%r11), %rcx + movq %rax, 296(%r15) + adcq $0, %rcx + movq 312(%r11), %r8 + movq %rcx, 304(%r15) + adcq $0, %r8 + movq 320(%r11), %rax + movq %r8, 312(%r15) + adcq $0, %rax + movq 328(%r11), %rcx + movq %rax, 320(%r15) + adcq $0, %rcx + movq 336(%r11), %r8 + movq %rcx, 328(%r15) + adcq $0, %r8 + movq 344(%r11), %rax + movq %r8, 336(%r15) + adcq $0, %rax + movq 352(%r11), %rcx + movq %rax, 344(%r15) + adcq $0, %rcx + movq 360(%r11), %r8 + movq %rcx, 352(%r15) + adcq $0, %r8 + movq 368(%r11), %rax + movq %r8, 360(%r15) + adcq $0, %rax + movq 376(%r11), %rcx + movq %rax, 368(%r15) + adcq $0, %rcx + movq %rcx, 376(%r15) + addq $1192, %rsp + pop %r15 + pop %r14 + pop %r13 + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_3072_mul_avx2_48,.-sp_3072_mul_avx2_48 +#endif /* __APPLE__ */ +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_3072_sqr_avx2_48 +.type sp_3072_sqr_avx2_48,@function +.align 16 +sp_3072_sqr_avx2_48: +#else +.globl _sp_3072_sqr_avx2_48 +.p2align 4 +_sp_3072_sqr_avx2_48: +#endif /* __APPLE__ */ + subq $984, %rsp + movq %rdi, 960(%rsp) + movq %rsi, 968(%rsp) + leaq 768(%rsp), %r8 + leaq 192(%rsi), %r9 + # Add + movq (%rsi), %rdx + xorq %rcx, %rcx + addq (%r9), %rdx + movq 8(%rsi), %rax + movq %rdx, (%r8) + adcq 8(%r9), %rax + movq 16(%rsi), %rdx + movq %rax, 8(%r8) + adcq 16(%r9), %rdx + movq 24(%rsi), %rax + movq %rdx, 16(%r8) + adcq 24(%r9), %rax + movq 32(%rsi), %rdx + movq %rax, 24(%r8) + adcq 32(%r9), %rdx + movq 40(%rsi), %rax + movq %rdx, 32(%r8) + adcq 40(%r9), %rax + movq 48(%rsi), %rdx + movq %rax, 40(%r8) + adcq 48(%r9), %rdx + movq 56(%rsi), %rax + movq %rdx, 48(%r8) + adcq 56(%r9), %rax + movq 64(%rsi), %rdx + movq %rax, 56(%r8) + adcq 64(%r9), %rdx + movq 72(%rsi), %rax + movq %rdx, 64(%r8) + adcq 72(%r9), %rax + movq 80(%rsi), %rdx + movq %rax, 72(%r8) + adcq 80(%r9), %rdx + movq 88(%rsi), %rax + movq %rdx, 80(%r8) + adcq 88(%r9), %rax + movq 96(%rsi), %rdx + movq %rax, 88(%r8) + adcq 96(%r9), %rdx + movq 104(%rsi), %rax + movq %rdx, 96(%r8) + adcq 104(%r9), %rax + movq 112(%rsi), %rdx + movq %rax, 104(%r8) + adcq 112(%r9), %rdx + movq 120(%rsi), %rax + movq %rdx, 112(%r8) + adcq 120(%r9), %rax + movq 128(%rsi), %rdx + movq %rax, 120(%r8) + adcq 128(%r9), %rdx + movq 136(%rsi), %rax + movq %rdx, 128(%r8) + adcq 136(%r9), %rax + movq 144(%rsi), %rdx + movq %rax, 136(%r8) + adcq 144(%r9), %rdx + movq 152(%rsi), %rax + movq %rdx, 144(%r8) + adcq 152(%r9), %rax + movq 160(%rsi), %rdx + movq %rax, 152(%r8) + adcq 160(%r9), %rdx + movq 168(%rsi), %rax + movq %rdx, 160(%r8) + adcq 168(%r9), %rax + movq 176(%rsi), %rdx + movq %rax, 168(%r8) + adcq 176(%r9), %rdx + movq 184(%rsi), %rax + movq %rdx, 176(%r8) + adcq 184(%r9), %rax + movq %rax, 184(%r8) + adcq $0, %rcx + movq %rcx, 976(%rsp) + movq %r8, %rsi + movq %rsp, %rdi +#ifndef __APPLE__ + callq sp_3072_sqr_avx2_24@plt +#else + callq _sp_3072_sqr_avx2_24 +#endif /* __APPLE__ */ + movq 968(%rsp), %rsi + leaq 384(%rsp), %rdi + addq $192, %rsi +#ifndef __APPLE__ + callq sp_3072_sqr_avx2_24@plt +#else + callq _sp_3072_sqr_avx2_24 +#endif /* __APPLE__ */ + movq 968(%rsp), %rsi + movq 960(%rsp), %rdi +#ifndef __APPLE__ + callq sp_3072_sqr_avx2_24@plt +#else + callq _sp_3072_sqr_avx2_24 +#endif /* __APPLE__ */ + movq 976(%rsp), %r10 + movq %rdi, %r9 + leaq 768(%rsp), %r8 + movq %r10, %rcx + negq %r10 + addq $384, %r9 + movq (%r8), %rdx + pextq %r10, %rdx, %rdx + addq %rdx, %rdx + movq 8(%r8), %rax + movq %rdx, (%r9) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 16(%r8), %rdx + movq %rax, 8(%r9) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 24(%r8), %rax + movq %rdx, 16(%r9) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 32(%r8), %rdx + movq %rax, 24(%r9) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 40(%r8), %rax + movq %rdx, 32(%r9) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 48(%r8), %rdx + movq %rax, 40(%r9) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 56(%r8), %rax + movq %rdx, 48(%r9) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 64(%r8), %rdx + movq %rax, 56(%r9) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 72(%r8), %rax + movq %rdx, 64(%r9) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 80(%r8), %rdx + movq %rax, 72(%r9) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 88(%r8), %rax + movq %rdx, 80(%r9) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 96(%r8), %rdx + movq %rax, 88(%r9) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 104(%r8), %rax + movq %rdx, 96(%r9) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 112(%r8), %rdx + movq %rax, 104(%r9) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 120(%r8), %rax + movq %rdx, 112(%r9) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 128(%r8), %rdx + movq %rax, 120(%r9) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 136(%r8), %rax + movq %rdx, 128(%r9) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 144(%r8), %rdx + movq %rax, 136(%r9) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 152(%r8), %rax + movq %rdx, 144(%r9) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 160(%r8), %rdx + movq %rax, 152(%r9) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 168(%r8), %rax + movq %rdx, 160(%r9) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 176(%r8), %rdx + movq %rax, 168(%r9) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 184(%r8), %rax + movq %rdx, 176(%r9) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq %rax, 184(%r9) + adcq $0, %rcx + leaq 384(%rsp), %rsi + movq %rsp, %r8 + movq (%r8), %rdx + subq (%rsi), %rdx + movq 8(%r8), %rax + movq %rdx, (%r8) + sbbq 8(%rsi), %rax + movq 16(%r8), %rdx + movq %rax, 8(%r8) + sbbq 16(%rsi), %rdx + movq 24(%r8), %rax + movq %rdx, 16(%r8) + sbbq 24(%rsi), %rax + movq 32(%r8), %rdx + movq %rax, 24(%r8) + sbbq 32(%rsi), %rdx + movq 40(%r8), %rax + movq %rdx, 32(%r8) + sbbq 40(%rsi), %rax + movq 48(%r8), %rdx + movq %rax, 40(%r8) + sbbq 48(%rsi), %rdx + movq 56(%r8), %rax + movq %rdx, 48(%r8) + sbbq 56(%rsi), %rax + movq 64(%r8), %rdx + movq %rax, 56(%r8) + sbbq 64(%rsi), %rdx + movq 72(%r8), %rax + movq %rdx, 64(%r8) + sbbq 72(%rsi), %rax + movq 80(%r8), %rdx + movq %rax, 72(%r8) + sbbq 80(%rsi), %rdx + movq 88(%r8), %rax + movq %rdx, 80(%r8) + sbbq 88(%rsi), %rax + movq 96(%r8), %rdx + movq %rax, 88(%r8) + sbbq 96(%rsi), %rdx + movq 104(%r8), %rax + movq %rdx, 96(%r8) + sbbq 104(%rsi), %rax + movq 112(%r8), %rdx + movq %rax, 104(%r8) + sbbq 112(%rsi), %rdx + movq 120(%r8), %rax + movq %rdx, 112(%r8) + sbbq 120(%rsi), %rax + movq 128(%r8), %rdx + movq %rax, 120(%r8) + sbbq 128(%rsi), %rdx + movq 136(%r8), %rax + movq %rdx, 128(%r8) + sbbq 136(%rsi), %rax + movq 144(%r8), %rdx + movq %rax, 136(%r8) + sbbq 144(%rsi), %rdx + movq 152(%r8), %rax + movq %rdx, 144(%r8) + sbbq 152(%rsi), %rax + movq 160(%r8), %rdx + movq %rax, 152(%r8) + sbbq 160(%rsi), %rdx + movq 168(%r8), %rax + movq %rdx, 160(%r8) + sbbq 168(%rsi), %rax + movq 176(%r8), %rdx + movq %rax, 168(%r8) + sbbq 176(%rsi), %rdx + movq 184(%r8), %rax + movq %rdx, 176(%r8) + sbbq 184(%rsi), %rax + movq 192(%r8), %rdx + movq %rax, 184(%r8) + sbbq 192(%rsi), %rdx + movq 200(%r8), %rax + movq %rdx, 192(%r8) + sbbq 200(%rsi), %rax + movq 208(%r8), %rdx + movq %rax, 200(%r8) + sbbq 208(%rsi), %rdx + movq 216(%r8), %rax + movq %rdx, 208(%r8) + sbbq 216(%rsi), %rax + movq 224(%r8), %rdx + movq %rax, 216(%r8) + sbbq 224(%rsi), %rdx + movq 232(%r8), %rax + movq %rdx, 224(%r8) + sbbq 232(%rsi), %rax + movq 240(%r8), %rdx + movq %rax, 232(%r8) + sbbq 240(%rsi), %rdx + movq 248(%r8), %rax + movq %rdx, 240(%r8) + sbbq 248(%rsi), %rax + movq 256(%r8), %rdx + movq %rax, 248(%r8) + sbbq 256(%rsi), %rdx + movq 264(%r8), %rax + movq %rdx, 256(%r8) + sbbq 264(%rsi), %rax + movq 272(%r8), %rdx + movq %rax, 264(%r8) + sbbq 272(%rsi), %rdx + movq 280(%r8), %rax + movq %rdx, 272(%r8) + sbbq 280(%rsi), %rax + movq 288(%r8), %rdx + movq %rax, 280(%r8) + sbbq 288(%rsi), %rdx + movq 296(%r8), %rax + movq %rdx, 288(%r8) + sbbq 296(%rsi), %rax + movq 304(%r8), %rdx + movq %rax, 296(%r8) + sbbq 304(%rsi), %rdx + movq 312(%r8), %rax + movq %rdx, 304(%r8) + sbbq 312(%rsi), %rax + movq 320(%r8), %rdx + movq %rax, 312(%r8) + sbbq 320(%rsi), %rdx + movq 328(%r8), %rax + movq %rdx, 320(%r8) + sbbq 328(%rsi), %rax + movq 336(%r8), %rdx + movq %rax, 328(%r8) + sbbq 336(%rsi), %rdx + movq 344(%r8), %rax + movq %rdx, 336(%r8) + sbbq 344(%rsi), %rax + movq 352(%r8), %rdx + movq %rax, 344(%r8) + sbbq 352(%rsi), %rdx + movq 360(%r8), %rax + movq %rdx, 352(%r8) + sbbq 360(%rsi), %rax + movq 368(%r8), %rdx + movq %rax, 360(%r8) + sbbq 368(%rsi), %rdx + movq 376(%r8), %rax + movq %rdx, 368(%r8) + sbbq 376(%rsi), %rax + movq %rax, 376(%r8) + sbbq $0, %rcx + movq (%r8), %rdx + subq (%rdi), %rdx + movq 8(%r8), %rax + movq %rdx, (%r8) + sbbq 8(%rdi), %rax + movq 16(%r8), %rdx + movq %rax, 8(%r8) + sbbq 16(%rdi), %rdx + movq 24(%r8), %rax + movq %rdx, 16(%r8) + sbbq 24(%rdi), %rax + movq 32(%r8), %rdx + movq %rax, 24(%r8) + sbbq 32(%rdi), %rdx + movq 40(%r8), %rax + movq %rdx, 32(%r8) + sbbq 40(%rdi), %rax + movq 48(%r8), %rdx + movq %rax, 40(%r8) + sbbq 48(%rdi), %rdx + movq 56(%r8), %rax + movq %rdx, 48(%r8) + sbbq 56(%rdi), %rax + movq 64(%r8), %rdx + movq %rax, 56(%r8) + sbbq 64(%rdi), %rdx + movq 72(%r8), %rax + movq %rdx, 64(%r8) + sbbq 72(%rdi), %rax + movq 80(%r8), %rdx + movq %rax, 72(%r8) + sbbq 80(%rdi), %rdx + movq 88(%r8), %rax + movq %rdx, 80(%r8) + sbbq 88(%rdi), %rax + movq 96(%r8), %rdx + movq %rax, 88(%r8) + sbbq 96(%rdi), %rdx + movq 104(%r8), %rax + movq %rdx, 96(%r8) + sbbq 104(%rdi), %rax + movq 112(%r8), %rdx + movq %rax, 104(%r8) + sbbq 112(%rdi), %rdx + movq 120(%r8), %rax + movq %rdx, 112(%r8) + sbbq 120(%rdi), %rax + movq 128(%r8), %rdx + movq %rax, 120(%r8) + sbbq 128(%rdi), %rdx + movq 136(%r8), %rax + movq %rdx, 128(%r8) + sbbq 136(%rdi), %rax + movq 144(%r8), %rdx + movq %rax, 136(%r8) + sbbq 144(%rdi), %rdx + movq 152(%r8), %rax + movq %rdx, 144(%r8) + sbbq 152(%rdi), %rax + movq 160(%r8), %rdx + movq %rax, 152(%r8) + sbbq 160(%rdi), %rdx + movq 168(%r8), %rax + movq %rdx, 160(%r8) + sbbq 168(%rdi), %rax + movq 176(%r8), %rdx + movq %rax, 168(%r8) + sbbq 176(%rdi), %rdx + movq 184(%r8), %rax + movq %rdx, 176(%r8) + sbbq 184(%rdi), %rax + movq 192(%r8), %rdx + movq %rax, 184(%r8) + sbbq 192(%rdi), %rdx + movq 200(%r8), %rax + movq %rdx, 192(%r8) + sbbq 200(%rdi), %rax + movq 208(%r8), %rdx + movq %rax, 200(%r8) + sbbq 208(%rdi), %rdx + movq 216(%r8), %rax + movq %rdx, 208(%r8) + sbbq 216(%rdi), %rax + movq 224(%r8), %rdx + movq %rax, 216(%r8) + sbbq 224(%rdi), %rdx + movq 232(%r8), %rax + movq %rdx, 224(%r8) + sbbq 232(%rdi), %rax + movq 240(%r8), %rdx + movq %rax, 232(%r8) + sbbq 240(%rdi), %rdx + movq 248(%r8), %rax + movq %rdx, 240(%r8) + sbbq 248(%rdi), %rax + movq 256(%r8), %rdx + movq %rax, 248(%r8) + sbbq 256(%rdi), %rdx + movq 264(%r8), %rax + movq %rdx, 256(%r8) + sbbq 264(%rdi), %rax + movq 272(%r8), %rdx + movq %rax, 264(%r8) + sbbq 272(%rdi), %rdx + movq 280(%r8), %rax + movq %rdx, 272(%r8) + sbbq 280(%rdi), %rax + movq 288(%r8), %rdx + movq %rax, 280(%r8) + sbbq 288(%rdi), %rdx + movq 296(%r8), %rax + movq %rdx, 288(%r8) + sbbq 296(%rdi), %rax + movq 304(%r8), %rdx + movq %rax, 296(%r8) + sbbq 304(%rdi), %rdx + movq 312(%r8), %rax + movq %rdx, 304(%r8) + sbbq 312(%rdi), %rax + movq 320(%r8), %rdx + movq %rax, 312(%r8) + sbbq 320(%rdi), %rdx + movq 328(%r8), %rax + movq %rdx, 320(%r8) + sbbq 328(%rdi), %rax + movq 336(%r8), %rdx + movq %rax, 328(%r8) + sbbq 336(%rdi), %rdx + movq 344(%r8), %rax + movq %rdx, 336(%r8) + sbbq 344(%rdi), %rax + movq 352(%r8), %rdx + movq %rax, 344(%r8) + sbbq 352(%rdi), %rdx + movq 360(%r8), %rax + movq %rdx, 352(%r8) + sbbq 360(%rdi), %rax + movq 368(%r8), %rdx + movq %rax, 360(%r8) + sbbq 368(%rdi), %rdx + movq 376(%r8), %rax + movq %rdx, 368(%r8) + sbbq 376(%rdi), %rax + movq %rax, 376(%r8) + sbbq $0, %rcx + subq $192, %r9 + # Add in place + movq (%r9), %rdx + addq (%r8), %rdx + movq 8(%r9), %rax + movq %rdx, (%r9) + adcq 8(%r8), %rax + movq 16(%r9), %rdx + movq %rax, 8(%r9) + adcq 16(%r8), %rdx + movq 24(%r9), %rax + movq %rdx, 16(%r9) + adcq 24(%r8), %rax + movq 32(%r9), %rdx + movq %rax, 24(%r9) + adcq 32(%r8), %rdx + movq 40(%r9), %rax + movq %rdx, 32(%r9) + adcq 40(%r8), %rax + movq 48(%r9), %rdx + movq %rax, 40(%r9) + adcq 48(%r8), %rdx + movq 56(%r9), %rax + movq %rdx, 48(%r9) + adcq 56(%r8), %rax + movq 64(%r9), %rdx + movq %rax, 56(%r9) + adcq 64(%r8), %rdx + movq 72(%r9), %rax + movq %rdx, 64(%r9) + adcq 72(%r8), %rax + movq 80(%r9), %rdx + movq %rax, 72(%r9) + adcq 80(%r8), %rdx + movq 88(%r9), %rax + movq %rdx, 80(%r9) + adcq 88(%r8), %rax + movq 96(%r9), %rdx + movq %rax, 88(%r9) + adcq 96(%r8), %rdx + movq 104(%r9), %rax + movq %rdx, 96(%r9) + adcq 104(%r8), %rax + movq 112(%r9), %rdx + movq %rax, 104(%r9) + adcq 112(%r8), %rdx + movq 120(%r9), %rax + movq %rdx, 112(%r9) + adcq 120(%r8), %rax + movq 128(%r9), %rdx + movq %rax, 120(%r9) + adcq 128(%r8), %rdx + movq 136(%r9), %rax + movq %rdx, 128(%r9) + adcq 136(%r8), %rax + movq 144(%r9), %rdx + movq %rax, 136(%r9) + adcq 144(%r8), %rdx + movq 152(%r9), %rax + movq %rdx, 144(%r9) + adcq 152(%r8), %rax + movq 160(%r9), %rdx + movq %rax, 152(%r9) + adcq 160(%r8), %rdx + movq 168(%r9), %rax + movq %rdx, 160(%r9) + adcq 168(%r8), %rax + movq 176(%r9), %rdx + movq %rax, 168(%r9) + adcq 176(%r8), %rdx + movq 184(%r9), %rax + movq %rdx, 176(%r9) + adcq 184(%r8), %rax + movq 192(%r9), %rdx + movq %rax, 184(%r9) + adcq 192(%r8), %rdx + movq 200(%r9), %rax + movq %rdx, 192(%r9) + adcq 200(%r8), %rax + movq 208(%r9), %rdx + movq %rax, 200(%r9) + adcq 208(%r8), %rdx + movq 216(%r9), %rax + movq %rdx, 208(%r9) + adcq 216(%r8), %rax + movq 224(%r9), %rdx + movq %rax, 216(%r9) + adcq 224(%r8), %rdx + movq 232(%r9), %rax + movq %rdx, 224(%r9) + adcq 232(%r8), %rax + movq 240(%r9), %rdx + movq %rax, 232(%r9) + adcq 240(%r8), %rdx + movq 248(%r9), %rax + movq %rdx, 240(%r9) + adcq 248(%r8), %rax + movq 256(%r9), %rdx + movq %rax, 248(%r9) + adcq 256(%r8), %rdx + movq 264(%r9), %rax + movq %rdx, 256(%r9) + adcq 264(%r8), %rax + movq 272(%r9), %rdx + movq %rax, 264(%r9) + adcq 272(%r8), %rdx + movq 280(%r9), %rax + movq %rdx, 272(%r9) + adcq 280(%r8), %rax + movq 288(%r9), %rdx + movq %rax, 280(%r9) + adcq 288(%r8), %rdx + movq 296(%r9), %rax + movq %rdx, 288(%r9) + adcq 296(%r8), %rax + movq 304(%r9), %rdx + movq %rax, 296(%r9) + adcq 304(%r8), %rdx + movq 312(%r9), %rax + movq %rdx, 304(%r9) + adcq 312(%r8), %rax + movq 320(%r9), %rdx + movq %rax, 312(%r9) + adcq 320(%r8), %rdx + movq 328(%r9), %rax + movq %rdx, 320(%r9) + adcq 328(%r8), %rax + movq 336(%r9), %rdx + movq %rax, 328(%r9) + adcq 336(%r8), %rdx + movq 344(%r9), %rax + movq %rdx, 336(%r9) + adcq 344(%r8), %rax + movq 352(%r9), %rdx + movq %rax, 344(%r9) + adcq 352(%r8), %rdx + movq 360(%r9), %rax + movq %rdx, 352(%r9) + adcq 360(%r8), %rax + movq 368(%r9), %rdx + movq %rax, 360(%r9) + adcq 368(%r8), %rdx + movq 376(%r9), %rax + movq %rdx, 368(%r9) + adcq 376(%r8), %rax + movq %rax, 376(%r9) + adcq $0, %rcx + movq %rcx, 576(%rdi) + # Add in place + movq 192(%r9), %rdx + addq (%rsi), %rdx + movq 200(%r9), %rax + movq %rdx, 192(%r9) + adcq 8(%rsi), %rax + movq 208(%r9), %rdx + movq %rax, 200(%r9) + adcq 16(%rsi), %rdx + movq 216(%r9), %rax + movq %rdx, 208(%r9) + adcq 24(%rsi), %rax + movq 224(%r9), %rdx + movq %rax, 216(%r9) + adcq 32(%rsi), %rdx + movq 232(%r9), %rax + movq %rdx, 224(%r9) + adcq 40(%rsi), %rax + movq 240(%r9), %rdx + movq %rax, 232(%r9) + adcq 48(%rsi), %rdx + movq 248(%r9), %rax + movq %rdx, 240(%r9) + adcq 56(%rsi), %rax + movq 256(%r9), %rdx + movq %rax, 248(%r9) + adcq 64(%rsi), %rdx + movq 264(%r9), %rax + movq %rdx, 256(%r9) + adcq 72(%rsi), %rax + movq 272(%r9), %rdx + movq %rax, 264(%r9) + adcq 80(%rsi), %rdx + movq 280(%r9), %rax + movq %rdx, 272(%r9) + adcq 88(%rsi), %rax + movq 288(%r9), %rdx + movq %rax, 280(%r9) + adcq 96(%rsi), %rdx + movq 296(%r9), %rax + movq %rdx, 288(%r9) + adcq 104(%rsi), %rax + movq 304(%r9), %rdx + movq %rax, 296(%r9) + adcq 112(%rsi), %rdx + movq 312(%r9), %rax + movq %rdx, 304(%r9) + adcq 120(%rsi), %rax + movq 320(%r9), %rdx + movq %rax, 312(%r9) + adcq 128(%rsi), %rdx + movq 328(%r9), %rax + movq %rdx, 320(%r9) + adcq 136(%rsi), %rax + movq 336(%r9), %rdx + movq %rax, 328(%r9) + adcq 144(%rsi), %rdx + movq 344(%r9), %rax + movq %rdx, 336(%r9) + adcq 152(%rsi), %rax + movq 352(%r9), %rdx + movq %rax, 344(%r9) + adcq 160(%rsi), %rdx + movq 360(%r9), %rax + movq %rdx, 352(%r9) + adcq 168(%rsi), %rax + movq 368(%r9), %rdx + movq %rax, 360(%r9) + adcq 176(%rsi), %rdx + movq 376(%r9), %rax + movq %rdx, 368(%r9) + adcq 184(%rsi), %rax + movq 384(%r9), %rdx + movq %rax, 376(%r9) + adcq 192(%rsi), %rdx + movq %rdx, 384(%r9) + # Add to zero + movq 200(%rsi), %rdx + adcq $0, %rdx + movq 208(%rsi), %rax + movq %rdx, 392(%r9) + adcq $0, %rax + movq 216(%rsi), %rdx + movq %rax, 400(%r9) + adcq $0, %rdx + movq 224(%rsi), %rax + movq %rdx, 408(%r9) + adcq $0, %rax + movq 232(%rsi), %rdx + movq %rax, 416(%r9) + adcq $0, %rdx + movq 240(%rsi), %rax + movq %rdx, 424(%r9) + adcq $0, %rax + movq 248(%rsi), %rdx + movq %rax, 432(%r9) + adcq $0, %rdx + movq 256(%rsi), %rax + movq %rdx, 440(%r9) + adcq $0, %rax + movq 264(%rsi), %rdx + movq %rax, 448(%r9) + adcq $0, %rdx + movq 272(%rsi), %rax + movq %rdx, 456(%r9) + adcq $0, %rax + movq 280(%rsi), %rdx + movq %rax, 464(%r9) + adcq $0, %rdx + movq 288(%rsi), %rax + movq %rdx, 472(%r9) + adcq $0, %rax + movq 296(%rsi), %rdx + movq %rax, 480(%r9) + adcq $0, %rdx + movq 304(%rsi), %rax + movq %rdx, 488(%r9) + adcq $0, %rax + movq 312(%rsi), %rdx + movq %rax, 496(%r9) + adcq $0, %rdx + movq 320(%rsi), %rax + movq %rdx, 504(%r9) + adcq $0, %rax + movq 328(%rsi), %rdx + movq %rax, 512(%r9) + adcq $0, %rdx + movq 336(%rsi), %rax + movq %rdx, 520(%r9) + adcq $0, %rax + movq 344(%rsi), %rdx + movq %rax, 528(%r9) + adcq $0, %rdx + movq 352(%rsi), %rax + movq %rdx, 536(%r9) + adcq $0, %rax + movq 360(%rsi), %rdx + movq %rax, 544(%r9) + adcq $0, %rdx + movq 368(%rsi), %rax + movq %rdx, 552(%r9) + adcq $0, %rax + movq 376(%rsi), %rdx + movq %rax, 560(%r9) + adcq $0, %rdx + movq %rdx, 568(%r9) + addq $984, %rsp + repz retq +#ifndef __APPLE__ +.size sp_3072_sqr_avx2_48,.-sp_3072_sqr_avx2_48 +#endif /* __APPLE__ */ +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +#ifndef __APPLE__ +.globl sp_3072_mul_d_48 +.type sp_3072_mul_d_48,@function +.align 16 +sp_3072_mul_d_48: +#else +.globl _sp_3072_mul_d_48 +.p2align 4 +_sp_3072_mul_d_48: +#endif /* __APPLE__ */ + movq %rdx, %rcx + # A[0] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq (%rsi) + movq %rax, %r8 + movq %rdx, %r9 + movq %r8, (%rdi) + # A[1] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 8(%rsi) + addq %rax, %r9 + movq %r9, 8(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[2] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 16(%rsi) + addq %rax, %r10 + movq %r10, 16(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[3] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 24(%rsi) + addq %rax, %r8 + movq %r8, 24(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[4] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 32(%rsi) + addq %rax, %r9 + movq %r9, 32(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[5] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 40(%rsi) + addq %rax, %r10 + movq %r10, 40(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[6] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 48(%rsi) + addq %rax, %r8 + movq %r8, 48(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[7] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 56(%rsi) + addq %rax, %r9 + movq %r9, 56(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[8] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 64(%rsi) + addq %rax, %r10 + movq %r10, 64(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[9] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 72(%rsi) + addq %rax, %r8 + movq %r8, 72(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[10] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 80(%rsi) + addq %rax, %r9 + movq %r9, 80(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[11] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 88(%rsi) + addq %rax, %r10 + movq %r10, 88(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[12] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 96(%rsi) + addq %rax, %r8 + movq %r8, 96(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[13] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 104(%rsi) + addq %rax, %r9 + movq %r9, 104(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[14] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 112(%rsi) + addq %rax, %r10 + movq %r10, 112(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[15] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 120(%rsi) + addq %rax, %r8 + movq %r8, 120(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[16] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 128(%rsi) + addq %rax, %r9 + movq %r9, 128(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[17] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 136(%rsi) + addq %rax, %r10 + movq %r10, 136(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[18] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 144(%rsi) + addq %rax, %r8 + movq %r8, 144(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[19] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 152(%rsi) + addq %rax, %r9 + movq %r9, 152(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[20] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 160(%rsi) + addq %rax, %r10 + movq %r10, 160(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[21] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 168(%rsi) + addq %rax, %r8 + movq %r8, 168(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[22] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 176(%rsi) + addq %rax, %r9 + movq %r9, 176(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[23] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 184(%rsi) + addq %rax, %r10 + movq %r10, 184(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[24] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 192(%rsi) + addq %rax, %r8 + movq %r8, 192(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[25] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 200(%rsi) + addq %rax, %r9 + movq %r9, 200(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[26] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 208(%rsi) + addq %rax, %r10 + movq %r10, 208(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[27] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 216(%rsi) + addq %rax, %r8 + movq %r8, 216(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[28] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 224(%rsi) + addq %rax, %r9 + movq %r9, 224(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[29] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 232(%rsi) + addq %rax, %r10 + movq %r10, 232(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[30] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 240(%rsi) + addq %rax, %r8 + movq %r8, 240(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[31] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 248(%rsi) + addq %rax, %r9 + movq %r9, 248(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[32] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 256(%rsi) + addq %rax, %r10 + movq %r10, 256(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[33] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 264(%rsi) + addq %rax, %r8 + movq %r8, 264(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[34] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 272(%rsi) + addq %rax, %r9 + movq %r9, 272(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[35] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 280(%rsi) + addq %rax, %r10 + movq %r10, 280(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[36] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 288(%rsi) + addq %rax, %r8 + movq %r8, 288(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[37] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 296(%rsi) + addq %rax, %r9 + movq %r9, 296(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[38] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 304(%rsi) + addq %rax, %r10 + movq %r10, 304(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[39] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 312(%rsi) + addq %rax, %r8 + movq %r8, 312(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[40] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 320(%rsi) + addq %rax, %r9 + movq %r9, 320(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[41] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 328(%rsi) + addq %rax, %r10 + movq %r10, 328(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[42] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 336(%rsi) + addq %rax, %r8 + movq %r8, 336(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[43] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 344(%rsi) + addq %rax, %r9 + movq %r9, 344(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[44] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 352(%rsi) + addq %rax, %r10 + movq %r10, 352(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[45] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 360(%rsi) + addq %rax, %r8 + movq %r8, 360(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[46] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 368(%rsi) + addq %rax, %r9 + movq %r9, 368(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[47] * B + movq %rcx, %rax + mulq 376(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + movq %r10, 376(%rdi) + movq %r8, 384(%rdi) + repz retq +#ifndef __APPLE__ +.size sp_3072_mul_d_48,.-sp_3072_mul_d_48 +#endif /* __APPLE__ */ +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +#ifndef __APPLE__ +.globl sp_3072_cond_sub_24 +.type sp_3072_cond_sub_24,@function +.align 16 +sp_3072_cond_sub_24: +#else +.globl _sp_3072_cond_sub_24 +.p2align 4 +_sp_3072_cond_sub_24: +#endif /* __APPLE__ */ + subq $192, %rsp + movq $0, %rax + movq (%rdx), %r8 + movq 8(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, (%rsp) + movq %r9, 8(%rsp) + movq 16(%rdx), %r8 + movq 24(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 16(%rsp) + movq %r9, 24(%rsp) + movq 32(%rdx), %r8 + movq 40(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 32(%rsp) + movq %r9, 40(%rsp) + movq 48(%rdx), %r8 + movq 56(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 48(%rsp) + movq %r9, 56(%rsp) + movq 64(%rdx), %r8 + movq 72(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 64(%rsp) + movq %r9, 72(%rsp) + movq 80(%rdx), %r8 + movq 88(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 80(%rsp) + movq %r9, 88(%rsp) + movq 96(%rdx), %r8 + movq 104(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 96(%rsp) + movq %r9, 104(%rsp) + movq 112(%rdx), %r8 + movq 120(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 112(%rsp) + movq %r9, 120(%rsp) + movq 128(%rdx), %r8 + movq 136(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 128(%rsp) + movq %r9, 136(%rsp) + movq 144(%rdx), %r8 + movq 152(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 144(%rsp) + movq %r9, 152(%rsp) + movq 160(%rdx), %r8 + movq 168(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 160(%rsp) + movq %r9, 168(%rsp) + movq 176(%rdx), %r8 + movq 184(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 176(%rsp) + movq %r9, 184(%rsp) + movq (%rsi), %r8 + movq (%rsp), %rdx + subq %rdx, %r8 + movq 8(%rsi), %r9 + movq 8(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, (%rdi) + movq 16(%rsi), %r8 + movq 16(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 8(%rdi) + movq 24(%rsi), %r9 + movq 24(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 16(%rdi) + movq 32(%rsi), %r8 + movq 32(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 24(%rdi) + movq 40(%rsi), %r9 + movq 40(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 32(%rdi) + movq 48(%rsi), %r8 + movq 48(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 40(%rdi) + movq 56(%rsi), %r9 + movq 56(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 48(%rdi) + movq 64(%rsi), %r8 + movq 64(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 56(%rdi) + movq 72(%rsi), %r9 + movq 72(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 64(%rdi) + movq 80(%rsi), %r8 + movq 80(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 72(%rdi) + movq 88(%rsi), %r9 + movq 88(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 80(%rdi) + movq 96(%rsi), %r8 + movq 96(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 88(%rdi) + movq 104(%rsi), %r9 + movq 104(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 96(%rdi) + movq 112(%rsi), %r8 + movq 112(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 104(%rdi) + movq 120(%rsi), %r9 + movq 120(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 112(%rdi) + movq 128(%rsi), %r8 + movq 128(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 120(%rdi) + movq 136(%rsi), %r9 + movq 136(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 128(%rdi) + movq 144(%rsi), %r8 + movq 144(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 136(%rdi) + movq 152(%rsi), %r9 + movq 152(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 144(%rdi) + movq 160(%rsi), %r8 + movq 160(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 152(%rdi) + movq 168(%rsi), %r9 + movq 168(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 160(%rdi) + movq 176(%rsi), %r8 + movq 176(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 168(%rdi) + movq 184(%rsi), %r9 + movq 184(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 176(%rdi) + movq %r9, 184(%rdi) + sbbq $0, %rax + addq $192, %rsp + repz retq +#ifndef __APPLE__ +.size sp_3072_cond_sub_24,.-sp_3072_cond_sub_24 +#endif /* __APPLE__ */ +/* Reduce the number back to 3072 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +#ifndef __APPLE__ +.globl sp_3072_mont_reduce_24 +.type sp_3072_mont_reduce_24,@function +.align 16 +sp_3072_mont_reduce_24: +#else +.globl _sp_3072_mont_reduce_24 +.p2align 4 +_sp_3072_mont_reduce_24: +#endif /* __APPLE__ */ + push %r12 + push %r13 + push %r14 + push %r15 + movq %rdx, %rcx + xorq %r15, %r15 + # i = 24 + movq $24, %r8 + movq (%rdi), %r13 + movq 8(%rdi), %r14 +L_mont_loop_24: + # mu = a[i] * mp + movq %r13, %r11 + imulq %rcx, %r11 + # a[i+0] += m[0] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq (%rsi) + addq %rax, %r13 + adcq %rdx, %r10 + # a[i+1] += m[1] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 8(%rsi) + movq %r14, %r13 + addq %rax, %r13 + adcq %rdx, %r9 + addq %r10, %r13 + adcq $0, %r9 + # a[i+2] += m[2] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 16(%rsi) + movq 16(%rdi), %r14 + addq %rax, %r14 + adcq %rdx, %r10 + addq %r9, %r14 + adcq $0, %r10 + # a[i+3] += m[3] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 24(%rsi) + movq 24(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 24(%rdi) + adcq $0, %r9 + # a[i+4] += m[4] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 32(%rsi) + movq 32(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 32(%rdi) + adcq $0, %r10 + # a[i+5] += m[5] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 40(%rsi) + movq 40(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 40(%rdi) + adcq $0, %r9 + # a[i+6] += m[6] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 48(%rsi) + movq 48(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 48(%rdi) + adcq $0, %r10 + # a[i+7] += m[7] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 56(%rsi) + movq 56(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 56(%rdi) + adcq $0, %r9 + # a[i+8] += m[8] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 64(%rsi) + movq 64(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 64(%rdi) + adcq $0, %r10 + # a[i+9] += m[9] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 72(%rsi) + movq 72(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 72(%rdi) + adcq $0, %r9 + # a[i+10] += m[10] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 80(%rsi) + movq 80(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 80(%rdi) + adcq $0, %r10 + # a[i+11] += m[11] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 88(%rsi) + movq 88(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 88(%rdi) + adcq $0, %r9 + # a[i+12] += m[12] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 96(%rsi) + movq 96(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 96(%rdi) + adcq $0, %r10 + # a[i+13] += m[13] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 104(%rsi) + movq 104(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 104(%rdi) + adcq $0, %r9 + # a[i+14] += m[14] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 112(%rsi) + movq 112(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 112(%rdi) + adcq $0, %r10 + # a[i+15] += m[15] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 120(%rsi) + movq 120(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 120(%rdi) + adcq $0, %r9 + # a[i+16] += m[16] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 128(%rsi) + movq 128(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 128(%rdi) + adcq $0, %r10 + # a[i+17] += m[17] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 136(%rsi) + movq 136(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 136(%rdi) + adcq $0, %r9 + # a[i+18] += m[18] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 144(%rsi) + movq 144(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 144(%rdi) + adcq $0, %r10 + # a[i+19] += m[19] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 152(%rsi) + movq 152(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 152(%rdi) + adcq $0, %r9 + # a[i+20] += m[20] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 160(%rsi) + movq 160(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 160(%rdi) + adcq $0, %r10 + # a[i+21] += m[21] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 168(%rsi) + movq 168(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 168(%rdi) + adcq $0, %r9 + # a[i+22] += m[22] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 176(%rsi) + movq 176(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 176(%rdi) + adcq $0, %r10 + # a[i+23] += m[23] * mu + movq %r11, %rax + mulq 184(%rsi) + movq 184(%rdi), %r12 + addq %rax, %r10 + adcq %r15, %rdx + movq $0, %r15 + adcq $0, %r15 + addq %r10, %r12 + movq %r12, 184(%rdi) + adcq %rdx, 192(%rdi) + adcq $0, %r15 + # i -= 1 + addq $8, %rdi + decq %r8 + jnz L_mont_loop_24 + movq %r13, (%rdi) + movq %r14, 8(%rdi) + negq %r15 + movq %r15, %rcx + movq %rsi, %rdx + movq %rdi, %rsi + subq $192, %rdi +#ifndef __APPLE__ + callq sp_3072_cond_sub_24@plt +#else + callq _sp_3072_cond_sub_24 +#endif /* __APPLE__ */ + pop %r15 + pop %r14 + pop %r13 + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_3072_mont_reduce_24,.-sp_3072_mont_reduce_24 +#endif /* __APPLE__ */ +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +#ifndef __APPLE__ +.globl sp_3072_cond_sub_avx2_24 +.type sp_3072_cond_sub_avx2_24,@function +.align 16 +sp_3072_cond_sub_avx2_24: +#else +.globl _sp_3072_cond_sub_avx2_24 +.p2align 4 +_sp_3072_cond_sub_avx2_24: +#endif /* __APPLE__ */ + movq $0, %rax + movq (%rdx), %r10 + movq (%rsi), %r8 + pextq %rcx, %r10, %r10 + subq %r10, %r8 + movq 8(%rdx), %r10 + movq 8(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, (%rdi) + sbbq %r10, %r9 + movq 16(%rdx), %r8 + movq 16(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 8(%rdi) + sbbq %r8, %r10 + movq 24(%rdx), %r9 + movq 24(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 16(%rdi) + sbbq %r9, %r8 + movq 32(%rdx), %r10 + movq 32(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 24(%rdi) + sbbq %r10, %r9 + movq 40(%rdx), %r8 + movq 40(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 32(%rdi) + sbbq %r8, %r10 + movq 48(%rdx), %r9 + movq 48(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 40(%rdi) + sbbq %r9, %r8 + movq 56(%rdx), %r10 + movq 56(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 48(%rdi) + sbbq %r10, %r9 + movq 64(%rdx), %r8 + movq 64(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 56(%rdi) + sbbq %r8, %r10 + movq 72(%rdx), %r9 + movq 72(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 64(%rdi) + sbbq %r9, %r8 + movq 80(%rdx), %r10 + movq 80(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 72(%rdi) + sbbq %r10, %r9 + movq 88(%rdx), %r8 + movq 88(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 80(%rdi) + sbbq %r8, %r10 + movq 96(%rdx), %r9 + movq 96(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 88(%rdi) + sbbq %r9, %r8 + movq 104(%rdx), %r10 + movq 104(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 96(%rdi) + sbbq %r10, %r9 + movq 112(%rdx), %r8 + movq 112(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 104(%rdi) + sbbq %r8, %r10 + movq 120(%rdx), %r9 + movq 120(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 112(%rdi) + sbbq %r9, %r8 + movq 128(%rdx), %r10 + movq 128(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 120(%rdi) + sbbq %r10, %r9 + movq 136(%rdx), %r8 + movq 136(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 128(%rdi) + sbbq %r8, %r10 + movq 144(%rdx), %r9 + movq 144(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 136(%rdi) + sbbq %r9, %r8 + movq 152(%rdx), %r10 + movq 152(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 144(%rdi) + sbbq %r10, %r9 + movq 160(%rdx), %r8 + movq 160(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 152(%rdi) + sbbq %r8, %r10 + movq 168(%rdx), %r9 + movq 168(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 160(%rdi) + sbbq %r9, %r8 + movq 176(%rdx), %r10 + movq 176(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 168(%rdi) + sbbq %r10, %r9 + movq 184(%rdx), %r8 + movq 184(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 176(%rdi) + sbbq %r8, %r10 + movq %r10, 184(%rdi) + sbbq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_3072_cond_sub_avx2_24,.-sp_3072_cond_sub_avx2_24 +#endif /* __APPLE__ */ +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +#ifndef __APPLE__ +.globl sp_3072_mul_d_24 +.type sp_3072_mul_d_24,@function +.align 16 +sp_3072_mul_d_24: +#else +.globl _sp_3072_mul_d_24 +.p2align 4 +_sp_3072_mul_d_24: +#endif /* __APPLE__ */ + movq %rdx, %rcx + # A[0] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq (%rsi) + movq %rax, %r8 + movq %rdx, %r9 + movq %r8, (%rdi) + # A[1] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 8(%rsi) + addq %rax, %r9 + movq %r9, 8(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[2] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 16(%rsi) + addq %rax, %r10 + movq %r10, 16(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[3] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 24(%rsi) + addq %rax, %r8 + movq %r8, 24(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[4] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 32(%rsi) + addq %rax, %r9 + movq %r9, 32(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[5] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 40(%rsi) + addq %rax, %r10 + movq %r10, 40(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[6] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 48(%rsi) + addq %rax, %r8 + movq %r8, 48(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[7] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 56(%rsi) + addq %rax, %r9 + movq %r9, 56(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[8] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 64(%rsi) + addq %rax, %r10 + movq %r10, 64(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[9] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 72(%rsi) + addq %rax, %r8 + movq %r8, 72(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[10] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 80(%rsi) + addq %rax, %r9 + movq %r9, 80(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[11] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 88(%rsi) + addq %rax, %r10 + movq %r10, 88(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[12] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 96(%rsi) + addq %rax, %r8 + movq %r8, 96(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[13] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 104(%rsi) + addq %rax, %r9 + movq %r9, 104(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[14] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 112(%rsi) + addq %rax, %r10 + movq %r10, 112(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[15] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 120(%rsi) + addq %rax, %r8 + movq %r8, 120(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[16] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 128(%rsi) + addq %rax, %r9 + movq %r9, 128(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[17] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 136(%rsi) + addq %rax, %r10 + movq %r10, 136(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[18] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 144(%rsi) + addq %rax, %r8 + movq %r8, 144(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[19] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 152(%rsi) + addq %rax, %r9 + movq %r9, 152(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[20] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 160(%rsi) + addq %rax, %r10 + movq %r10, 160(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[21] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 168(%rsi) + addq %rax, %r8 + movq %r8, 168(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[22] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 176(%rsi) + addq %rax, %r9 + movq %r9, 176(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[23] * B + movq %rcx, %rax + mulq 184(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + movq %r10, 184(%rdi) + movq %r8, 192(%rdi) + repz retq +#ifndef __APPLE__ +.size sp_3072_mul_d_24,.-sp_3072_mul_d_24 +#endif /* __APPLE__ */ +#ifdef HAVE_INTEL_AVX2 +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +#ifndef __APPLE__ +.globl sp_3072_mul_d_avx2_24 +.type sp_3072_mul_d_avx2_24,@function +.align 16 +sp_3072_mul_d_avx2_24: +#else +.globl _sp_3072_mul_d_avx2_24 +.p2align 4 +_sp_3072_mul_d_avx2_24: +#endif /* __APPLE__ */ + movq %rdx, %rax + # A[0] * B + movq %rax, %rdx + xorq %r11, %r11 + mulxq (%rsi), %r9, %r10 + movq %r9, (%rdi) + # A[1] * B + mulxq 8(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 8(%rdi) + adoxq %r8, %r9 + # A[2] * B + mulxq 16(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 16(%rdi) + adoxq %r8, %r10 + # A[3] * B + mulxq 24(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 24(%rdi) + adoxq %r8, %r9 + # A[4] * B + mulxq 32(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 32(%rdi) + adoxq %r8, %r10 + # A[5] * B + mulxq 40(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 40(%rdi) + adoxq %r8, %r9 + # A[6] * B + mulxq 48(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 48(%rdi) + adoxq %r8, %r10 + # A[7] * B + mulxq 56(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 56(%rdi) + adoxq %r8, %r9 + # A[8] * B + mulxq 64(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 64(%rdi) + adoxq %r8, %r10 + # A[9] * B + mulxq 72(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 72(%rdi) + adoxq %r8, %r9 + # A[10] * B + mulxq 80(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 80(%rdi) + adoxq %r8, %r10 + # A[11] * B + mulxq 88(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 88(%rdi) + adoxq %r8, %r9 + # A[12] * B + mulxq 96(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 96(%rdi) + adoxq %r8, %r10 + # A[13] * B + mulxq 104(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 104(%rdi) + adoxq %r8, %r9 + # A[14] * B + mulxq 112(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 112(%rdi) + adoxq %r8, %r10 + # A[15] * B + mulxq 120(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 120(%rdi) + adoxq %r8, %r9 + # A[16] * B + mulxq 128(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 128(%rdi) + adoxq %r8, %r10 + # A[17] * B + mulxq 136(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 136(%rdi) + adoxq %r8, %r9 + # A[18] * B + mulxq 144(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 144(%rdi) + adoxq %r8, %r10 + # A[19] * B + mulxq 152(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 152(%rdi) + adoxq %r8, %r9 + # A[20] * B + mulxq 160(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 160(%rdi) + adoxq %r8, %r10 + # A[21] * B + mulxq 168(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 168(%rdi) + adoxq %r8, %r9 + # A[22] * B + mulxq 176(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 176(%rdi) + adoxq %r8, %r10 + # A[23] * B + mulxq 184(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + adoxq %r8, %r9 + adcxq %r11, %r9 + movq %r10, 184(%rdi) + movq %r9, 192(%rdi) + repz retq +#ifndef __APPLE__ +.size sp_3072_mul_d_avx2_24,.-sp_3072_mul_d_avx2_24 +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX2 */ +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +#ifndef __APPLE__ +.globl sp_3072_cmp_24 +.type sp_3072_cmp_24,@function +.align 16 +sp_3072_cmp_24: +#else +.globl _sp_3072_cmp_24 +.p2align 4 +_sp_3072_cmp_24: +#endif /* __APPLE__ */ + xorq %rcx, %rcx + movq $-1, %rdx + movq $-1, %rax + movq $1, %r8 + movq 184(%rdi), %r9 + movq 184(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 176(%rdi), %r9 + movq 176(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 168(%rdi), %r9 + movq 168(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 160(%rdi), %r9 + movq 160(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 152(%rdi), %r9 + movq 152(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 144(%rdi), %r9 + movq 144(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 136(%rdi), %r9 + movq 136(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 128(%rdi), %r9 + movq 128(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 120(%rdi), %r9 + movq 120(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 112(%rdi), %r9 + movq 112(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 104(%rdi), %r9 + movq 104(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 96(%rdi), %r9 + movq 96(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 88(%rdi), %r9 + movq 88(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 80(%rdi), %r9 + movq 80(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 72(%rdi), %r9 + movq 72(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 64(%rdi), %r9 + movq 64(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 56(%rdi), %r9 + movq 56(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 48(%rdi), %r9 + movq 48(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 40(%rdi), %r9 + movq 40(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 32(%rdi), %r9 + movq 32(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 24(%rdi), %r9 + movq 24(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 16(%rdi), %r9 + movq 16(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 8(%rdi), %r9 + movq 8(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq (%rdi), %r9 + movq (%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + xorq %rdx, %rax + repz retq +#ifndef __APPLE__ +.size sp_3072_cmp_24,.-sp_3072_cmp_24 +#endif /* __APPLE__ */ +#ifdef HAVE_INTEL_AVX2 +/* Reduce the number back to 3072 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +#ifndef __APPLE__ +.globl sp_3072_mont_reduce_avx2_24 +.type sp_3072_mont_reduce_avx2_24,@function +.align 16 +sp_3072_mont_reduce_avx2_24: +#else +.globl _sp_3072_mont_reduce_avx2_24 +.p2align 4 +_sp_3072_mont_reduce_avx2_24: +#endif /* __APPLE__ */ + push %r12 + push %r13 + push %r14 + movq %rdx, %r8 + xorq %r14, %r14 + # i = 24 + movq $24, %r9 + movq (%rdi), %r13 + addq $96, %rdi + xorq %r12, %r12 +L_mont_loop_avx2_24: + # mu = a[i] * mp + movq %r13, %rdx + movq %r13, %r10 + imulq %r8, %rdx + xorq %r12, %r12 + # a[i+0] += m[0] * mu + mulxq (%rsi), %rax, %rcx + movq -88(%rdi), %r13 + adcxq %rax, %r10 + adoxq %rcx, %r13 + # a[i+1] += m[1] * mu + mulxq 8(%rsi), %rax, %rcx + movq -80(%rdi), %r10 + adcxq %rax, %r13 + adoxq %rcx, %r10 + # a[i+2] += m[2] * mu + mulxq 16(%rsi), %rax, %rcx + movq -72(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -80(%rdi) + # a[i+3] += m[3] * mu + mulxq 24(%rsi), %rax, %rcx + movq -64(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -72(%rdi) + # a[i+4] += m[4] * mu + mulxq 32(%rsi), %rax, %rcx + movq -56(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -64(%rdi) + # a[i+5] += m[5] * mu + mulxq 40(%rsi), %rax, %rcx + movq -48(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -56(%rdi) + # a[i+6] += m[6] * mu + mulxq 48(%rsi), %rax, %rcx + movq -40(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -48(%rdi) + # a[i+7] += m[7] * mu + mulxq 56(%rsi), %rax, %rcx + movq -32(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -40(%rdi) + # a[i+8] += m[8] * mu + mulxq 64(%rsi), %rax, %rcx + movq -24(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -32(%rdi) + # a[i+9] += m[9] * mu + mulxq 72(%rsi), %rax, %rcx + movq -16(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -24(%rdi) + # a[i+10] += m[10] * mu + mulxq 80(%rsi), %rax, %rcx + movq -8(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -16(%rdi) + # a[i+11] += m[11] * mu + mulxq 88(%rsi), %rax, %rcx + movq (%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -8(%rdi) + # a[i+12] += m[12] * mu + mulxq 96(%rsi), %rax, %rcx + movq 8(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, (%rdi) + # a[i+13] += m[13] * mu + mulxq 104(%rsi), %rax, %rcx + movq 16(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 8(%rdi) + # a[i+14] += m[14] * mu + mulxq 112(%rsi), %rax, %rcx + movq 24(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 16(%rdi) + # a[i+15] += m[15] * mu + mulxq 120(%rsi), %rax, %rcx + movq 32(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 24(%rdi) + # a[i+16] += m[16] * mu + mulxq 128(%rsi), %rax, %rcx + movq 40(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 32(%rdi) + # a[i+17] += m[17] * mu + mulxq 136(%rsi), %rax, %rcx + movq 48(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 40(%rdi) + # a[i+18] += m[18] * mu + mulxq 144(%rsi), %rax, %rcx + movq 56(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 48(%rdi) + # a[i+19] += m[19] * mu + mulxq 152(%rsi), %rax, %rcx + movq 64(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 56(%rdi) + # a[i+20] += m[20] * mu + mulxq 160(%rsi), %rax, %rcx + movq 72(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 64(%rdi) + # a[i+21] += m[21] * mu + mulxq 168(%rsi), %rax, %rcx + movq 80(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 72(%rdi) + # a[i+22] += m[22] * mu + mulxq 176(%rsi), %rax, %rcx + movq 88(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 80(%rdi) + # a[i+23] += m[23] * mu + mulxq 184(%rsi), %rax, %rcx + movq 96(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 88(%rdi) + adcxq %r14, %r10 + movq %r10, 96(%rdi) + movq %r12, %r14 + adoxq %r12, %r14 + adcxq %r12, %r14 + # a += 1 + addq $8, %rdi + # i -= 1 + subq $1, %r9 + jnz L_mont_loop_avx2_24 + subq $96, %rdi + negq %r14 + movq %rdi, %r8 + subq $192, %rdi + movq (%rsi), %rcx + movq %r13, %rdx + pextq %r14, %rcx, %rcx + subq %rcx, %rdx + movq 8(%rsi), %rcx + movq 8(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, (%rdi) + sbbq %rcx, %rax + movq 16(%rsi), %rdx + movq 16(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 8(%rdi) + sbbq %rdx, %rcx + movq 24(%rsi), %rax + movq 24(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 16(%rdi) + sbbq %rax, %rdx + movq 32(%rsi), %rcx + movq 32(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 24(%rdi) + sbbq %rcx, %rax + movq 40(%rsi), %rdx + movq 40(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 32(%rdi) + sbbq %rdx, %rcx + movq 48(%rsi), %rax + movq 48(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 40(%rdi) + sbbq %rax, %rdx + movq 56(%rsi), %rcx + movq 56(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 48(%rdi) + sbbq %rcx, %rax + movq 64(%rsi), %rdx + movq 64(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 56(%rdi) + sbbq %rdx, %rcx + movq 72(%rsi), %rax + movq 72(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 64(%rdi) + sbbq %rax, %rdx + movq 80(%rsi), %rcx + movq 80(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 72(%rdi) + sbbq %rcx, %rax + movq 88(%rsi), %rdx + movq 88(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 80(%rdi) + sbbq %rdx, %rcx + movq 96(%rsi), %rax + movq 96(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 88(%rdi) + sbbq %rax, %rdx + movq 104(%rsi), %rcx + movq 104(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 96(%rdi) + sbbq %rcx, %rax + movq 112(%rsi), %rdx + movq 112(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 104(%rdi) + sbbq %rdx, %rcx + movq 120(%rsi), %rax + movq 120(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 112(%rdi) + sbbq %rax, %rdx + movq 128(%rsi), %rcx + movq 128(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 120(%rdi) + sbbq %rcx, %rax + movq 136(%rsi), %rdx + movq 136(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 128(%rdi) + sbbq %rdx, %rcx + movq 144(%rsi), %rax + movq 144(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 136(%rdi) + sbbq %rax, %rdx + movq 152(%rsi), %rcx + movq 152(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 144(%rdi) + sbbq %rcx, %rax + movq 160(%rsi), %rdx + movq 160(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 152(%rdi) + sbbq %rdx, %rcx + movq 168(%rsi), %rax + movq 168(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 160(%rdi) + sbbq %rax, %rdx + movq 176(%rsi), %rcx + movq 176(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 168(%rdi) + sbbq %rcx, %rax + movq 184(%rsi), %rdx + movq 184(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 176(%rdi) + sbbq %rdx, %rcx + movq %rcx, 184(%rdi) + pop %r14 + pop %r13 + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_3072_mont_reduce_avx2_24,.-sp_3072_mont_reduce_avx2_24 +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX2 */ +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +#ifndef __APPLE__ +.globl sp_3072_cond_sub_48 +.type sp_3072_cond_sub_48,@function +.align 16 +sp_3072_cond_sub_48: +#else +.globl _sp_3072_cond_sub_48 +.p2align 4 +_sp_3072_cond_sub_48: +#endif /* __APPLE__ */ + subq $384, %rsp + movq $0, %rax + movq (%rdx), %r8 + movq 8(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, (%rsp) + movq %r9, 8(%rsp) + movq 16(%rdx), %r8 + movq 24(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 16(%rsp) + movq %r9, 24(%rsp) + movq 32(%rdx), %r8 + movq 40(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 32(%rsp) + movq %r9, 40(%rsp) + movq 48(%rdx), %r8 + movq 56(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 48(%rsp) + movq %r9, 56(%rsp) + movq 64(%rdx), %r8 + movq 72(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 64(%rsp) + movq %r9, 72(%rsp) + movq 80(%rdx), %r8 + movq 88(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 80(%rsp) + movq %r9, 88(%rsp) + movq 96(%rdx), %r8 + movq 104(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 96(%rsp) + movq %r9, 104(%rsp) + movq 112(%rdx), %r8 + movq 120(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 112(%rsp) + movq %r9, 120(%rsp) + movq 128(%rdx), %r8 + movq 136(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 128(%rsp) + movq %r9, 136(%rsp) + movq 144(%rdx), %r8 + movq 152(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 144(%rsp) + movq %r9, 152(%rsp) + movq 160(%rdx), %r8 + movq 168(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 160(%rsp) + movq %r9, 168(%rsp) + movq 176(%rdx), %r8 + movq 184(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 176(%rsp) + movq %r9, 184(%rsp) + movq 192(%rdx), %r8 + movq 200(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 192(%rsp) + movq %r9, 200(%rsp) + movq 208(%rdx), %r8 + movq 216(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 208(%rsp) + movq %r9, 216(%rsp) + movq 224(%rdx), %r8 + movq 232(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 224(%rsp) + movq %r9, 232(%rsp) + movq 240(%rdx), %r8 + movq 248(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 240(%rsp) + movq %r9, 248(%rsp) + movq 256(%rdx), %r8 + movq 264(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 256(%rsp) + movq %r9, 264(%rsp) + movq 272(%rdx), %r8 + movq 280(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 272(%rsp) + movq %r9, 280(%rsp) + movq 288(%rdx), %r8 + movq 296(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 288(%rsp) + movq %r9, 296(%rsp) + movq 304(%rdx), %r8 + movq 312(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 304(%rsp) + movq %r9, 312(%rsp) + movq 320(%rdx), %r8 + movq 328(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 320(%rsp) + movq %r9, 328(%rsp) + movq 336(%rdx), %r8 + movq 344(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 336(%rsp) + movq %r9, 344(%rsp) + movq 352(%rdx), %r8 + movq 360(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 352(%rsp) + movq %r9, 360(%rsp) + movq 368(%rdx), %r8 + movq 376(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 368(%rsp) + movq %r9, 376(%rsp) + movq (%rsi), %r8 + movq (%rsp), %rdx + subq %rdx, %r8 + movq 8(%rsi), %r9 + movq 8(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, (%rdi) + movq 16(%rsi), %r8 + movq 16(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 8(%rdi) + movq 24(%rsi), %r9 + movq 24(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 16(%rdi) + movq 32(%rsi), %r8 + movq 32(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 24(%rdi) + movq 40(%rsi), %r9 + movq 40(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 32(%rdi) + movq 48(%rsi), %r8 + movq 48(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 40(%rdi) + movq 56(%rsi), %r9 + movq 56(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 48(%rdi) + movq 64(%rsi), %r8 + movq 64(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 56(%rdi) + movq 72(%rsi), %r9 + movq 72(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 64(%rdi) + movq 80(%rsi), %r8 + movq 80(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 72(%rdi) + movq 88(%rsi), %r9 + movq 88(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 80(%rdi) + movq 96(%rsi), %r8 + movq 96(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 88(%rdi) + movq 104(%rsi), %r9 + movq 104(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 96(%rdi) + movq 112(%rsi), %r8 + movq 112(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 104(%rdi) + movq 120(%rsi), %r9 + movq 120(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 112(%rdi) + movq 128(%rsi), %r8 + movq 128(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 120(%rdi) + movq 136(%rsi), %r9 + movq 136(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 128(%rdi) + movq 144(%rsi), %r8 + movq 144(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 136(%rdi) + movq 152(%rsi), %r9 + movq 152(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 144(%rdi) + movq 160(%rsi), %r8 + movq 160(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 152(%rdi) + movq 168(%rsi), %r9 + movq 168(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 160(%rdi) + movq 176(%rsi), %r8 + movq 176(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 168(%rdi) + movq 184(%rsi), %r9 + movq 184(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 176(%rdi) + movq 192(%rsi), %r8 + movq 192(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 184(%rdi) + movq 200(%rsi), %r9 + movq 200(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 192(%rdi) + movq 208(%rsi), %r8 + movq 208(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 200(%rdi) + movq 216(%rsi), %r9 + movq 216(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 208(%rdi) + movq 224(%rsi), %r8 + movq 224(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 216(%rdi) + movq 232(%rsi), %r9 + movq 232(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 224(%rdi) + movq 240(%rsi), %r8 + movq 240(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 232(%rdi) + movq 248(%rsi), %r9 + movq 248(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 240(%rdi) + movq 256(%rsi), %r8 + movq 256(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 248(%rdi) + movq 264(%rsi), %r9 + movq 264(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 256(%rdi) + movq 272(%rsi), %r8 + movq 272(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 264(%rdi) + movq 280(%rsi), %r9 + movq 280(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 272(%rdi) + movq 288(%rsi), %r8 + movq 288(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 280(%rdi) + movq 296(%rsi), %r9 + movq 296(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 288(%rdi) + movq 304(%rsi), %r8 + movq 304(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 296(%rdi) + movq 312(%rsi), %r9 + movq 312(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 304(%rdi) + movq 320(%rsi), %r8 + movq 320(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 312(%rdi) + movq 328(%rsi), %r9 + movq 328(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 320(%rdi) + movq 336(%rsi), %r8 + movq 336(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 328(%rdi) + movq 344(%rsi), %r9 + movq 344(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 336(%rdi) + movq 352(%rsi), %r8 + movq 352(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 344(%rdi) + movq 360(%rsi), %r9 + movq 360(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 352(%rdi) + movq 368(%rsi), %r8 + movq 368(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 360(%rdi) + movq 376(%rsi), %r9 + movq 376(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 368(%rdi) + movq %r9, 376(%rdi) + sbbq $0, %rax + addq $384, %rsp + repz retq +#ifndef __APPLE__ +.size sp_3072_cond_sub_48,.-sp_3072_cond_sub_48 +#endif /* __APPLE__ */ +/* Reduce the number back to 3072 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +#ifndef __APPLE__ +.globl sp_3072_mont_reduce_48 +.type sp_3072_mont_reduce_48,@function +.align 16 +sp_3072_mont_reduce_48: +#else +.globl _sp_3072_mont_reduce_48 +.p2align 4 +_sp_3072_mont_reduce_48: +#endif /* __APPLE__ */ + push %r12 + push %r13 + push %r14 + push %r15 + movq %rdx, %rcx + xorq %r15, %r15 + # i = 48 + movq $48, %r8 + movq (%rdi), %r13 + movq 8(%rdi), %r14 +L_mont_loop_48: + # mu = a[i] * mp + movq %r13, %r11 + imulq %rcx, %r11 + # a[i+0] += m[0] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq (%rsi) + addq %rax, %r13 + adcq %rdx, %r10 + # a[i+1] += m[1] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 8(%rsi) + movq %r14, %r13 + addq %rax, %r13 + adcq %rdx, %r9 + addq %r10, %r13 + adcq $0, %r9 + # a[i+2] += m[2] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 16(%rsi) + movq 16(%rdi), %r14 + addq %rax, %r14 + adcq %rdx, %r10 + addq %r9, %r14 + adcq $0, %r10 + # a[i+3] += m[3] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 24(%rsi) + movq 24(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 24(%rdi) + adcq $0, %r9 + # a[i+4] += m[4] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 32(%rsi) + movq 32(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 32(%rdi) + adcq $0, %r10 + # a[i+5] += m[5] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 40(%rsi) + movq 40(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 40(%rdi) + adcq $0, %r9 + # a[i+6] += m[6] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 48(%rsi) + movq 48(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 48(%rdi) + adcq $0, %r10 + # a[i+7] += m[7] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 56(%rsi) + movq 56(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 56(%rdi) + adcq $0, %r9 + # a[i+8] += m[8] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 64(%rsi) + movq 64(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 64(%rdi) + adcq $0, %r10 + # a[i+9] += m[9] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 72(%rsi) + movq 72(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 72(%rdi) + adcq $0, %r9 + # a[i+10] += m[10] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 80(%rsi) + movq 80(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 80(%rdi) + adcq $0, %r10 + # a[i+11] += m[11] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 88(%rsi) + movq 88(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 88(%rdi) + adcq $0, %r9 + # a[i+12] += m[12] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 96(%rsi) + movq 96(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 96(%rdi) + adcq $0, %r10 + # a[i+13] += m[13] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 104(%rsi) + movq 104(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 104(%rdi) + adcq $0, %r9 + # a[i+14] += m[14] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 112(%rsi) + movq 112(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 112(%rdi) + adcq $0, %r10 + # a[i+15] += m[15] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 120(%rsi) + movq 120(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 120(%rdi) + adcq $0, %r9 + # a[i+16] += m[16] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 128(%rsi) + movq 128(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 128(%rdi) + adcq $0, %r10 + # a[i+17] += m[17] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 136(%rsi) + movq 136(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 136(%rdi) + adcq $0, %r9 + # a[i+18] += m[18] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 144(%rsi) + movq 144(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 144(%rdi) + adcq $0, %r10 + # a[i+19] += m[19] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 152(%rsi) + movq 152(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 152(%rdi) + adcq $0, %r9 + # a[i+20] += m[20] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 160(%rsi) + movq 160(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 160(%rdi) + adcq $0, %r10 + # a[i+21] += m[21] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 168(%rsi) + movq 168(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 168(%rdi) + adcq $0, %r9 + # a[i+22] += m[22] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 176(%rsi) + movq 176(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 176(%rdi) + adcq $0, %r10 + # a[i+23] += m[23] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 184(%rsi) + movq 184(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 184(%rdi) + adcq $0, %r9 + # a[i+24] += m[24] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 192(%rsi) + movq 192(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 192(%rdi) + adcq $0, %r10 + # a[i+25] += m[25] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 200(%rsi) + movq 200(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 200(%rdi) + adcq $0, %r9 + # a[i+26] += m[26] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 208(%rsi) + movq 208(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 208(%rdi) + adcq $0, %r10 + # a[i+27] += m[27] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 216(%rsi) + movq 216(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 216(%rdi) + adcq $0, %r9 + # a[i+28] += m[28] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 224(%rsi) + movq 224(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 224(%rdi) + adcq $0, %r10 + # a[i+29] += m[29] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 232(%rsi) + movq 232(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 232(%rdi) + adcq $0, %r9 + # a[i+30] += m[30] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 240(%rsi) + movq 240(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 240(%rdi) + adcq $0, %r10 + # a[i+31] += m[31] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 248(%rsi) + movq 248(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 248(%rdi) + adcq $0, %r9 + # a[i+32] += m[32] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 256(%rsi) + movq 256(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 256(%rdi) + adcq $0, %r10 + # a[i+33] += m[33] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 264(%rsi) + movq 264(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 264(%rdi) + adcq $0, %r9 + # a[i+34] += m[34] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 272(%rsi) + movq 272(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 272(%rdi) + adcq $0, %r10 + # a[i+35] += m[35] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 280(%rsi) + movq 280(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 280(%rdi) + adcq $0, %r9 + # a[i+36] += m[36] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 288(%rsi) + movq 288(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 288(%rdi) + adcq $0, %r10 + # a[i+37] += m[37] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 296(%rsi) + movq 296(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 296(%rdi) + adcq $0, %r9 + # a[i+38] += m[38] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 304(%rsi) + movq 304(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 304(%rdi) + adcq $0, %r10 + # a[i+39] += m[39] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 312(%rsi) + movq 312(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 312(%rdi) + adcq $0, %r9 + # a[i+40] += m[40] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 320(%rsi) + movq 320(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 320(%rdi) + adcq $0, %r10 + # a[i+41] += m[41] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 328(%rsi) + movq 328(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 328(%rdi) + adcq $0, %r9 + # a[i+42] += m[42] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 336(%rsi) + movq 336(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 336(%rdi) + adcq $0, %r10 + # a[i+43] += m[43] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 344(%rsi) + movq 344(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 344(%rdi) + adcq $0, %r9 + # a[i+44] += m[44] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 352(%rsi) + movq 352(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 352(%rdi) + adcq $0, %r10 + # a[i+45] += m[45] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 360(%rsi) + movq 360(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 360(%rdi) + adcq $0, %r9 + # a[i+46] += m[46] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 368(%rsi) + movq 368(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 368(%rdi) + adcq $0, %r10 + # a[i+47] += m[47] * mu + movq %r11, %rax + mulq 376(%rsi) + movq 376(%rdi), %r12 + addq %rax, %r10 + adcq %r15, %rdx + movq $0, %r15 + adcq $0, %r15 + addq %r10, %r12 + movq %r12, 376(%rdi) + adcq %rdx, 384(%rdi) + adcq $0, %r15 + # i -= 1 + addq $8, %rdi + decq %r8 + jnz L_mont_loop_48 + movq %r13, (%rdi) + movq %r14, 8(%rdi) + negq %r15 + movq %r15, %rcx + movq %rsi, %rdx + movq %rdi, %rsi + subq $384, %rdi +#ifndef __APPLE__ + callq sp_3072_cond_sub_48@plt +#else + callq _sp_3072_cond_sub_48 +#endif /* __APPLE__ */ + pop %r15 + pop %r14 + pop %r13 + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_3072_mont_reduce_48,.-sp_3072_mont_reduce_48 +#endif /* __APPLE__ */ +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +#ifndef __APPLE__ +.globl sp_3072_cond_sub_avx2_48 +.type sp_3072_cond_sub_avx2_48,@function +.align 16 +sp_3072_cond_sub_avx2_48: +#else +.globl _sp_3072_cond_sub_avx2_48 +.p2align 4 +_sp_3072_cond_sub_avx2_48: +#endif /* __APPLE__ */ + movq $0, %rax + movq (%rdx), %r10 + movq (%rsi), %r8 + pextq %rcx, %r10, %r10 + subq %r10, %r8 + movq 8(%rdx), %r10 + movq 8(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, (%rdi) + sbbq %r10, %r9 + movq 16(%rdx), %r8 + movq 16(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 8(%rdi) + sbbq %r8, %r10 + movq 24(%rdx), %r9 + movq 24(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 16(%rdi) + sbbq %r9, %r8 + movq 32(%rdx), %r10 + movq 32(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 24(%rdi) + sbbq %r10, %r9 + movq 40(%rdx), %r8 + movq 40(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 32(%rdi) + sbbq %r8, %r10 + movq 48(%rdx), %r9 + movq 48(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 40(%rdi) + sbbq %r9, %r8 + movq 56(%rdx), %r10 + movq 56(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 48(%rdi) + sbbq %r10, %r9 + movq 64(%rdx), %r8 + movq 64(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 56(%rdi) + sbbq %r8, %r10 + movq 72(%rdx), %r9 + movq 72(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 64(%rdi) + sbbq %r9, %r8 + movq 80(%rdx), %r10 + movq 80(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 72(%rdi) + sbbq %r10, %r9 + movq 88(%rdx), %r8 + movq 88(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 80(%rdi) + sbbq %r8, %r10 + movq 96(%rdx), %r9 + movq 96(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 88(%rdi) + sbbq %r9, %r8 + movq 104(%rdx), %r10 + movq 104(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 96(%rdi) + sbbq %r10, %r9 + movq 112(%rdx), %r8 + movq 112(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 104(%rdi) + sbbq %r8, %r10 + movq 120(%rdx), %r9 + movq 120(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 112(%rdi) + sbbq %r9, %r8 + movq 128(%rdx), %r10 + movq 128(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 120(%rdi) + sbbq %r10, %r9 + movq 136(%rdx), %r8 + movq 136(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 128(%rdi) + sbbq %r8, %r10 + movq 144(%rdx), %r9 + movq 144(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 136(%rdi) + sbbq %r9, %r8 + movq 152(%rdx), %r10 + movq 152(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 144(%rdi) + sbbq %r10, %r9 + movq 160(%rdx), %r8 + movq 160(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 152(%rdi) + sbbq %r8, %r10 + movq 168(%rdx), %r9 + movq 168(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 160(%rdi) + sbbq %r9, %r8 + movq 176(%rdx), %r10 + movq 176(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 168(%rdi) + sbbq %r10, %r9 + movq 184(%rdx), %r8 + movq 184(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 176(%rdi) + sbbq %r8, %r10 + movq 192(%rdx), %r9 + movq 192(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 184(%rdi) + sbbq %r9, %r8 + movq 200(%rdx), %r10 + movq 200(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 192(%rdi) + sbbq %r10, %r9 + movq 208(%rdx), %r8 + movq 208(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 200(%rdi) + sbbq %r8, %r10 + movq 216(%rdx), %r9 + movq 216(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 208(%rdi) + sbbq %r9, %r8 + movq 224(%rdx), %r10 + movq 224(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 216(%rdi) + sbbq %r10, %r9 + movq 232(%rdx), %r8 + movq 232(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 224(%rdi) + sbbq %r8, %r10 + movq 240(%rdx), %r9 + movq 240(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 232(%rdi) + sbbq %r9, %r8 + movq 248(%rdx), %r10 + movq 248(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 240(%rdi) + sbbq %r10, %r9 + movq 256(%rdx), %r8 + movq 256(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 248(%rdi) + sbbq %r8, %r10 + movq 264(%rdx), %r9 + movq 264(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 256(%rdi) + sbbq %r9, %r8 + movq 272(%rdx), %r10 + movq 272(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 264(%rdi) + sbbq %r10, %r9 + movq 280(%rdx), %r8 + movq 280(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 272(%rdi) + sbbq %r8, %r10 + movq 288(%rdx), %r9 + movq 288(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 280(%rdi) + sbbq %r9, %r8 + movq 296(%rdx), %r10 + movq 296(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 288(%rdi) + sbbq %r10, %r9 + movq 304(%rdx), %r8 + movq 304(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 296(%rdi) + sbbq %r8, %r10 + movq 312(%rdx), %r9 + movq 312(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 304(%rdi) + sbbq %r9, %r8 + movq 320(%rdx), %r10 + movq 320(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 312(%rdi) + sbbq %r10, %r9 + movq 328(%rdx), %r8 + movq 328(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 320(%rdi) + sbbq %r8, %r10 + movq 336(%rdx), %r9 + movq 336(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 328(%rdi) + sbbq %r9, %r8 + movq 344(%rdx), %r10 + movq 344(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 336(%rdi) + sbbq %r10, %r9 + movq 352(%rdx), %r8 + movq 352(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 344(%rdi) + sbbq %r8, %r10 + movq 360(%rdx), %r9 + movq 360(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 352(%rdi) + sbbq %r9, %r8 + movq 368(%rdx), %r10 + movq 368(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 360(%rdi) + sbbq %r10, %r9 + movq 376(%rdx), %r8 + movq 376(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 368(%rdi) + sbbq %r8, %r10 + movq %r10, 376(%rdi) + sbbq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_3072_cond_sub_avx2_48,.-sp_3072_cond_sub_avx2_48 +#endif /* __APPLE__ */ +#ifdef HAVE_INTEL_AVX2 +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +#ifndef __APPLE__ +.globl sp_3072_mul_d_avx2_48 +.type sp_3072_mul_d_avx2_48,@function +.align 16 +sp_3072_mul_d_avx2_48: +#else +.globl _sp_3072_mul_d_avx2_48 +.p2align 4 +_sp_3072_mul_d_avx2_48: +#endif /* __APPLE__ */ + movq %rdx, %rax + # A[0] * B + movq %rax, %rdx + xorq %r11, %r11 + mulxq (%rsi), %r9, %r10 + movq %r9, (%rdi) + # A[1] * B + mulxq 8(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 8(%rdi) + adoxq %r8, %r9 + # A[2] * B + mulxq 16(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 16(%rdi) + adoxq %r8, %r10 + # A[3] * B + mulxq 24(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 24(%rdi) + adoxq %r8, %r9 + # A[4] * B + mulxq 32(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 32(%rdi) + adoxq %r8, %r10 + # A[5] * B + mulxq 40(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 40(%rdi) + adoxq %r8, %r9 + # A[6] * B + mulxq 48(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 48(%rdi) + adoxq %r8, %r10 + # A[7] * B + mulxq 56(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 56(%rdi) + adoxq %r8, %r9 + # A[8] * B + mulxq 64(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 64(%rdi) + adoxq %r8, %r10 + # A[9] * B + mulxq 72(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 72(%rdi) + adoxq %r8, %r9 + # A[10] * B + mulxq 80(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 80(%rdi) + adoxq %r8, %r10 + # A[11] * B + mulxq 88(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 88(%rdi) + adoxq %r8, %r9 + # A[12] * B + mulxq 96(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 96(%rdi) + adoxq %r8, %r10 + # A[13] * B + mulxq 104(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 104(%rdi) + adoxq %r8, %r9 + # A[14] * B + mulxq 112(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 112(%rdi) + adoxq %r8, %r10 + # A[15] * B + mulxq 120(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 120(%rdi) + adoxq %r8, %r9 + # A[16] * B + mulxq 128(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 128(%rdi) + adoxq %r8, %r10 + # A[17] * B + mulxq 136(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 136(%rdi) + adoxq %r8, %r9 + # A[18] * B + mulxq 144(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 144(%rdi) + adoxq %r8, %r10 + # A[19] * B + mulxq 152(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 152(%rdi) + adoxq %r8, %r9 + # A[20] * B + mulxq 160(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 160(%rdi) + adoxq %r8, %r10 + # A[21] * B + mulxq 168(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 168(%rdi) + adoxq %r8, %r9 + # A[22] * B + mulxq 176(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 176(%rdi) + adoxq %r8, %r10 + # A[23] * B + mulxq 184(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 184(%rdi) + adoxq %r8, %r9 + # A[24] * B + mulxq 192(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 192(%rdi) + adoxq %r8, %r10 + # A[25] * B + mulxq 200(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 200(%rdi) + adoxq %r8, %r9 + # A[26] * B + mulxq 208(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 208(%rdi) + adoxq %r8, %r10 + # A[27] * B + mulxq 216(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 216(%rdi) + adoxq %r8, %r9 + # A[28] * B + mulxq 224(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 224(%rdi) + adoxq %r8, %r10 + # A[29] * B + mulxq 232(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 232(%rdi) + adoxq %r8, %r9 + # A[30] * B + mulxq 240(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 240(%rdi) + adoxq %r8, %r10 + # A[31] * B + mulxq 248(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 248(%rdi) + adoxq %r8, %r9 + # A[32] * B + mulxq 256(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 256(%rdi) + adoxq %r8, %r10 + # A[33] * B + mulxq 264(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 264(%rdi) + adoxq %r8, %r9 + # A[34] * B + mulxq 272(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 272(%rdi) + adoxq %r8, %r10 + # A[35] * B + mulxq 280(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 280(%rdi) + adoxq %r8, %r9 + # A[36] * B + mulxq 288(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 288(%rdi) + adoxq %r8, %r10 + # A[37] * B + mulxq 296(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 296(%rdi) + adoxq %r8, %r9 + # A[38] * B + mulxq 304(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 304(%rdi) + adoxq %r8, %r10 + # A[39] * B + mulxq 312(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 312(%rdi) + adoxq %r8, %r9 + # A[40] * B + mulxq 320(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 320(%rdi) + adoxq %r8, %r10 + # A[41] * B + mulxq 328(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 328(%rdi) + adoxq %r8, %r9 + # A[42] * B + mulxq 336(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 336(%rdi) + adoxq %r8, %r10 + # A[43] * B + mulxq 344(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 344(%rdi) + adoxq %r8, %r9 + # A[44] * B + mulxq 352(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 352(%rdi) + adoxq %r8, %r10 + # A[45] * B + mulxq 360(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 360(%rdi) + adoxq %r8, %r9 + # A[46] * B + mulxq 368(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 368(%rdi) + adoxq %r8, %r10 + # A[47] * B + mulxq 376(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + adoxq %r8, %r9 + adcxq %r11, %r9 + movq %r10, 376(%rdi) + movq %r9, 384(%rdi) + repz retq +#ifndef __APPLE__ +.size sp_3072_mul_d_avx2_48,.-sp_3072_mul_d_avx2_48 +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX2 */ +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +#ifndef __APPLE__ +.globl sp_3072_cmp_48 +.type sp_3072_cmp_48,@function +.align 16 +sp_3072_cmp_48: +#else +.globl _sp_3072_cmp_48 +.p2align 4 +_sp_3072_cmp_48: +#endif /* __APPLE__ */ + xorq %rcx, %rcx + movq $-1, %rdx + movq $-1, %rax + movq $1, %r8 + movq 376(%rdi), %r9 + movq 376(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 368(%rdi), %r9 + movq 368(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 360(%rdi), %r9 + movq 360(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 352(%rdi), %r9 + movq 352(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 344(%rdi), %r9 + movq 344(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 336(%rdi), %r9 + movq 336(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 328(%rdi), %r9 + movq 328(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 320(%rdi), %r9 + movq 320(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 312(%rdi), %r9 + movq 312(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 304(%rdi), %r9 + movq 304(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 296(%rdi), %r9 + movq 296(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 288(%rdi), %r9 + movq 288(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 280(%rdi), %r9 + movq 280(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 272(%rdi), %r9 + movq 272(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 264(%rdi), %r9 + movq 264(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 256(%rdi), %r9 + movq 256(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 248(%rdi), %r9 + movq 248(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 240(%rdi), %r9 + movq 240(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 232(%rdi), %r9 + movq 232(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 224(%rdi), %r9 + movq 224(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 216(%rdi), %r9 + movq 216(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 208(%rdi), %r9 + movq 208(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 200(%rdi), %r9 + movq 200(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 192(%rdi), %r9 + movq 192(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 184(%rdi), %r9 + movq 184(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 176(%rdi), %r9 + movq 176(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 168(%rdi), %r9 + movq 168(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 160(%rdi), %r9 + movq 160(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 152(%rdi), %r9 + movq 152(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 144(%rdi), %r9 + movq 144(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 136(%rdi), %r9 + movq 136(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 128(%rdi), %r9 + movq 128(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 120(%rdi), %r9 + movq 120(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 112(%rdi), %r9 + movq 112(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 104(%rdi), %r9 + movq 104(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 96(%rdi), %r9 + movq 96(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 88(%rdi), %r9 + movq 88(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 80(%rdi), %r9 + movq 80(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 72(%rdi), %r9 + movq 72(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 64(%rdi), %r9 + movq 64(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 56(%rdi), %r9 + movq 56(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 48(%rdi), %r9 + movq 48(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 40(%rdi), %r9 + movq 40(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 32(%rdi), %r9 + movq 32(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 24(%rdi), %r9 + movq 24(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 16(%rdi), %r9 + movq 16(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 8(%rdi), %r9 + movq 8(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq (%rdi), %r9 + movq (%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + xorq %rdx, %rax + repz retq +#ifndef __APPLE__ +.size sp_3072_cmp_48,.-sp_3072_cmp_48 +#endif /* __APPLE__ */ +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_3072_sub_48 +.type sp_3072_sub_48,@function +.align 16 +sp_3072_sub_48: +#else +.globl _sp_3072_sub_48 +.p2align 4 +_sp_3072_sub_48: +#endif /* __APPLE__ */ + movq (%rsi), %rcx + xorq %rax, %rax + subq (%rdx), %rcx + movq 8(%rsi), %r8 + movq %rcx, (%rdi) + sbbq 8(%rdx), %r8 + movq 16(%rsi), %rcx + movq %r8, 8(%rdi) + sbbq 16(%rdx), %rcx + movq 24(%rsi), %r8 + movq %rcx, 16(%rdi) + sbbq 24(%rdx), %r8 + movq 32(%rsi), %rcx + movq %r8, 24(%rdi) + sbbq 32(%rdx), %rcx + movq 40(%rsi), %r8 + movq %rcx, 32(%rdi) + sbbq 40(%rdx), %r8 + movq 48(%rsi), %rcx + movq %r8, 40(%rdi) + sbbq 48(%rdx), %rcx + movq 56(%rsi), %r8 + movq %rcx, 48(%rdi) + sbbq 56(%rdx), %r8 + movq 64(%rsi), %rcx + movq %r8, 56(%rdi) + sbbq 64(%rdx), %rcx + movq 72(%rsi), %r8 + movq %rcx, 64(%rdi) + sbbq 72(%rdx), %r8 + movq 80(%rsi), %rcx + movq %r8, 72(%rdi) + sbbq 80(%rdx), %rcx + movq 88(%rsi), %r8 + movq %rcx, 80(%rdi) + sbbq 88(%rdx), %r8 + movq 96(%rsi), %rcx + movq %r8, 88(%rdi) + sbbq 96(%rdx), %rcx + movq 104(%rsi), %r8 + movq %rcx, 96(%rdi) + sbbq 104(%rdx), %r8 + movq 112(%rsi), %rcx + movq %r8, 104(%rdi) + sbbq 112(%rdx), %rcx + movq 120(%rsi), %r8 + movq %rcx, 112(%rdi) + sbbq 120(%rdx), %r8 + movq 128(%rsi), %rcx + movq %r8, 120(%rdi) + sbbq 128(%rdx), %rcx + movq 136(%rsi), %r8 + movq %rcx, 128(%rdi) + sbbq 136(%rdx), %r8 + movq 144(%rsi), %rcx + movq %r8, 136(%rdi) + sbbq 144(%rdx), %rcx + movq 152(%rsi), %r8 + movq %rcx, 144(%rdi) + sbbq 152(%rdx), %r8 + movq 160(%rsi), %rcx + movq %r8, 152(%rdi) + sbbq 160(%rdx), %rcx + movq 168(%rsi), %r8 + movq %rcx, 160(%rdi) + sbbq 168(%rdx), %r8 + movq 176(%rsi), %rcx + movq %r8, 168(%rdi) + sbbq 176(%rdx), %rcx + movq 184(%rsi), %r8 + movq %rcx, 176(%rdi) + sbbq 184(%rdx), %r8 + movq 192(%rsi), %rcx + movq %r8, 184(%rdi) + sbbq 192(%rdx), %rcx + movq 200(%rsi), %r8 + movq %rcx, 192(%rdi) + sbbq 200(%rdx), %r8 + movq 208(%rsi), %rcx + movq %r8, 200(%rdi) + sbbq 208(%rdx), %rcx + movq 216(%rsi), %r8 + movq %rcx, 208(%rdi) + sbbq 216(%rdx), %r8 + movq 224(%rsi), %rcx + movq %r8, 216(%rdi) + sbbq 224(%rdx), %rcx + movq 232(%rsi), %r8 + movq %rcx, 224(%rdi) + sbbq 232(%rdx), %r8 + movq 240(%rsi), %rcx + movq %r8, 232(%rdi) + sbbq 240(%rdx), %rcx + movq 248(%rsi), %r8 + movq %rcx, 240(%rdi) + sbbq 248(%rdx), %r8 + movq 256(%rsi), %rcx + movq %r8, 248(%rdi) + sbbq 256(%rdx), %rcx + movq 264(%rsi), %r8 + movq %rcx, 256(%rdi) + sbbq 264(%rdx), %r8 + movq 272(%rsi), %rcx + movq %r8, 264(%rdi) + sbbq 272(%rdx), %rcx + movq 280(%rsi), %r8 + movq %rcx, 272(%rdi) + sbbq 280(%rdx), %r8 + movq 288(%rsi), %rcx + movq %r8, 280(%rdi) + sbbq 288(%rdx), %rcx + movq 296(%rsi), %r8 + movq %rcx, 288(%rdi) + sbbq 296(%rdx), %r8 + movq 304(%rsi), %rcx + movq %r8, 296(%rdi) + sbbq 304(%rdx), %rcx + movq 312(%rsi), %r8 + movq %rcx, 304(%rdi) + sbbq 312(%rdx), %r8 + movq 320(%rsi), %rcx + movq %r8, 312(%rdi) + sbbq 320(%rdx), %rcx + movq 328(%rsi), %r8 + movq %rcx, 320(%rdi) + sbbq 328(%rdx), %r8 + movq 336(%rsi), %rcx + movq %r8, 328(%rdi) + sbbq 336(%rdx), %rcx + movq 344(%rsi), %r8 + movq %rcx, 336(%rdi) + sbbq 344(%rdx), %r8 + movq 352(%rsi), %rcx + movq %r8, 344(%rdi) + sbbq 352(%rdx), %rcx + movq 360(%rsi), %r8 + movq %rcx, 352(%rdi) + sbbq 360(%rdx), %r8 + movq 368(%rsi), %rcx + movq %r8, 360(%rdi) + sbbq 368(%rdx), %rcx + movq 376(%rsi), %r8 + movq %rcx, 368(%rdi) + sbbq 376(%rdx), %r8 + movq %r8, 376(%rdi) + sbbq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_3072_sub_48,.-sp_3072_sub_48 +#endif /* __APPLE__ */ +#ifdef HAVE_INTEL_AVX2 +/* Reduce the number back to 3072 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +#ifndef __APPLE__ +.globl sp_3072_mont_reduce_avx2_48 +.type sp_3072_mont_reduce_avx2_48,@function +.align 16 +sp_3072_mont_reduce_avx2_48: +#else +.globl _sp_3072_mont_reduce_avx2_48 +.p2align 4 +_sp_3072_mont_reduce_avx2_48: +#endif /* __APPLE__ */ + push %r12 + push %r13 + push %r14 + movq %rdx, %r8 + xorq %r14, %r14 + # i = 48 + movq $48, %r9 + movq (%rdi), %r13 + addq $192, %rdi + xorq %r12, %r12 +L_mont_loop_avx2_48: + # mu = a[i] * mp + movq %r13, %rdx + movq %r13, %r10 + imulq %r8, %rdx + xorq %r12, %r12 + # a[i+0] += m[0] * mu + mulxq (%rsi), %rax, %rcx + movq -184(%rdi), %r13 + adcxq %rax, %r10 + adoxq %rcx, %r13 + # a[i+1] += m[1] * mu + mulxq 8(%rsi), %rax, %rcx + movq -176(%rdi), %r10 + adcxq %rax, %r13 + adoxq %rcx, %r10 + # a[i+2] += m[2] * mu + mulxq 16(%rsi), %rax, %rcx + movq -168(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -176(%rdi) + # a[i+3] += m[3] * mu + mulxq 24(%rsi), %rax, %rcx + movq -160(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -168(%rdi) + # a[i+4] += m[4] * mu + mulxq 32(%rsi), %rax, %rcx + movq -152(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -160(%rdi) + # a[i+5] += m[5] * mu + mulxq 40(%rsi), %rax, %rcx + movq -144(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -152(%rdi) + # a[i+6] += m[6] * mu + mulxq 48(%rsi), %rax, %rcx + movq -136(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -144(%rdi) + # a[i+7] += m[7] * mu + mulxq 56(%rsi), %rax, %rcx + movq -128(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -136(%rdi) + # a[i+8] += m[8] * mu + mulxq 64(%rsi), %rax, %rcx + movq -120(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -128(%rdi) + # a[i+9] += m[9] * mu + mulxq 72(%rsi), %rax, %rcx + movq -112(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -120(%rdi) + # a[i+10] += m[10] * mu + mulxq 80(%rsi), %rax, %rcx + movq -104(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -112(%rdi) + # a[i+11] += m[11] * mu + mulxq 88(%rsi), %rax, %rcx + movq -96(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -104(%rdi) + # a[i+12] += m[12] * mu + mulxq 96(%rsi), %rax, %rcx + movq -88(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -96(%rdi) + # a[i+13] += m[13] * mu + mulxq 104(%rsi), %rax, %rcx + movq -80(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -88(%rdi) + # a[i+14] += m[14] * mu + mulxq 112(%rsi), %rax, %rcx + movq -72(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -80(%rdi) + # a[i+15] += m[15] * mu + mulxq 120(%rsi), %rax, %rcx + movq -64(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -72(%rdi) + # a[i+16] += m[16] * mu + mulxq 128(%rsi), %rax, %rcx + movq -56(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -64(%rdi) + # a[i+17] += m[17] * mu + mulxq 136(%rsi), %rax, %rcx + movq -48(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -56(%rdi) + # a[i+18] += m[18] * mu + mulxq 144(%rsi), %rax, %rcx + movq -40(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -48(%rdi) + # a[i+19] += m[19] * mu + mulxq 152(%rsi), %rax, %rcx + movq -32(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -40(%rdi) + # a[i+20] += m[20] * mu + mulxq 160(%rsi), %rax, %rcx + movq -24(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -32(%rdi) + # a[i+21] += m[21] * mu + mulxq 168(%rsi), %rax, %rcx + movq -16(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -24(%rdi) + # a[i+22] += m[22] * mu + mulxq 176(%rsi), %rax, %rcx + movq -8(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -16(%rdi) + # a[i+23] += m[23] * mu + mulxq 184(%rsi), %rax, %rcx + movq (%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -8(%rdi) + # a[i+24] += m[24] * mu + mulxq 192(%rsi), %rax, %rcx + movq 8(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, (%rdi) + # a[i+25] += m[25] * mu + mulxq 200(%rsi), %rax, %rcx + movq 16(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 8(%rdi) + # a[i+26] += m[26] * mu + mulxq 208(%rsi), %rax, %rcx + movq 24(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 16(%rdi) + # a[i+27] += m[27] * mu + mulxq 216(%rsi), %rax, %rcx + movq 32(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 24(%rdi) + # a[i+28] += m[28] * mu + mulxq 224(%rsi), %rax, %rcx + movq 40(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 32(%rdi) + # a[i+29] += m[29] * mu + mulxq 232(%rsi), %rax, %rcx + movq 48(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 40(%rdi) + # a[i+30] += m[30] * mu + mulxq 240(%rsi), %rax, %rcx + movq 56(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 48(%rdi) + # a[i+31] += m[31] * mu + mulxq 248(%rsi), %rax, %rcx + movq 64(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 56(%rdi) + # a[i+32] += m[32] * mu + mulxq 256(%rsi), %rax, %rcx + movq 72(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 64(%rdi) + # a[i+33] += m[33] * mu + mulxq 264(%rsi), %rax, %rcx + movq 80(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 72(%rdi) + # a[i+34] += m[34] * mu + mulxq 272(%rsi), %rax, %rcx + movq 88(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 80(%rdi) + # a[i+35] += m[35] * mu + mulxq 280(%rsi), %rax, %rcx + movq 96(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 88(%rdi) + # a[i+36] += m[36] * mu + mulxq 288(%rsi), %rax, %rcx + movq 104(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 96(%rdi) + # a[i+37] += m[37] * mu + mulxq 296(%rsi), %rax, %rcx + movq 112(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 104(%rdi) + # a[i+38] += m[38] * mu + mulxq 304(%rsi), %rax, %rcx + movq 120(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 112(%rdi) + # a[i+39] += m[39] * mu + mulxq 312(%rsi), %rax, %rcx + movq 128(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 120(%rdi) + # a[i+40] += m[40] * mu + mulxq 320(%rsi), %rax, %rcx + movq 136(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 128(%rdi) + # a[i+41] += m[41] * mu + mulxq 328(%rsi), %rax, %rcx + movq 144(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 136(%rdi) + # a[i+42] += m[42] * mu + mulxq 336(%rsi), %rax, %rcx + movq 152(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 144(%rdi) + # a[i+43] += m[43] * mu + mulxq 344(%rsi), %rax, %rcx + movq 160(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 152(%rdi) + # a[i+44] += m[44] * mu + mulxq 352(%rsi), %rax, %rcx + movq 168(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 160(%rdi) + # a[i+45] += m[45] * mu + mulxq 360(%rsi), %rax, %rcx + movq 176(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 168(%rdi) + # a[i+46] += m[46] * mu + mulxq 368(%rsi), %rax, %rcx + movq 184(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 176(%rdi) + # a[i+47] += m[47] * mu + mulxq 376(%rsi), %rax, %rcx + movq 192(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 184(%rdi) + adcxq %r14, %r10 + movq %r10, 192(%rdi) + movq %r12, %r14 + adoxq %r12, %r14 + adcxq %r12, %r14 + # a += 1 + addq $8, %rdi + # i -= 1 + subq $1, %r9 + jnz L_mont_loop_avx2_48 + subq $192, %rdi + negq %r14 + movq %rdi, %r8 + subq $384, %rdi + movq (%rsi), %rcx + movq %r13, %rdx + pextq %r14, %rcx, %rcx + subq %rcx, %rdx + movq 8(%rsi), %rcx + movq 8(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, (%rdi) + sbbq %rcx, %rax + movq 16(%rsi), %rdx + movq 16(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 8(%rdi) + sbbq %rdx, %rcx + movq 24(%rsi), %rax + movq 24(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 16(%rdi) + sbbq %rax, %rdx + movq 32(%rsi), %rcx + movq 32(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 24(%rdi) + sbbq %rcx, %rax + movq 40(%rsi), %rdx + movq 40(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 32(%rdi) + sbbq %rdx, %rcx + movq 48(%rsi), %rax + movq 48(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 40(%rdi) + sbbq %rax, %rdx + movq 56(%rsi), %rcx + movq 56(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 48(%rdi) + sbbq %rcx, %rax + movq 64(%rsi), %rdx + movq 64(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 56(%rdi) + sbbq %rdx, %rcx + movq 72(%rsi), %rax + movq 72(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 64(%rdi) + sbbq %rax, %rdx + movq 80(%rsi), %rcx + movq 80(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 72(%rdi) + sbbq %rcx, %rax + movq 88(%rsi), %rdx + movq 88(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 80(%rdi) + sbbq %rdx, %rcx + movq 96(%rsi), %rax + movq 96(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 88(%rdi) + sbbq %rax, %rdx + movq 104(%rsi), %rcx + movq 104(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 96(%rdi) + sbbq %rcx, %rax + movq 112(%rsi), %rdx + movq 112(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 104(%rdi) + sbbq %rdx, %rcx + movq 120(%rsi), %rax + movq 120(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 112(%rdi) + sbbq %rax, %rdx + movq 128(%rsi), %rcx + movq 128(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 120(%rdi) + sbbq %rcx, %rax + movq 136(%rsi), %rdx + movq 136(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 128(%rdi) + sbbq %rdx, %rcx + movq 144(%rsi), %rax + movq 144(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 136(%rdi) + sbbq %rax, %rdx + movq 152(%rsi), %rcx + movq 152(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 144(%rdi) + sbbq %rcx, %rax + movq 160(%rsi), %rdx + movq 160(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 152(%rdi) + sbbq %rdx, %rcx + movq 168(%rsi), %rax + movq 168(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 160(%rdi) + sbbq %rax, %rdx + movq 176(%rsi), %rcx + movq 176(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 168(%rdi) + sbbq %rcx, %rax + movq 184(%rsi), %rdx + movq 184(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 176(%rdi) + sbbq %rdx, %rcx + movq 192(%rsi), %rax + movq 192(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 184(%rdi) + sbbq %rax, %rdx + movq 200(%rsi), %rcx + movq 200(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 192(%rdi) + sbbq %rcx, %rax + movq 208(%rsi), %rdx + movq 208(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 200(%rdi) + sbbq %rdx, %rcx + movq 216(%rsi), %rax + movq 216(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 208(%rdi) + sbbq %rax, %rdx + movq 224(%rsi), %rcx + movq 224(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 216(%rdi) + sbbq %rcx, %rax + movq 232(%rsi), %rdx + movq 232(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 224(%rdi) + sbbq %rdx, %rcx + movq 240(%rsi), %rax + movq 240(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 232(%rdi) + sbbq %rax, %rdx + movq 248(%rsi), %rcx + movq 248(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 240(%rdi) + sbbq %rcx, %rax + movq 256(%rsi), %rdx + movq 256(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 248(%rdi) + sbbq %rdx, %rcx + movq 264(%rsi), %rax + movq 264(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 256(%rdi) + sbbq %rax, %rdx + movq 272(%rsi), %rcx + movq 272(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 264(%rdi) + sbbq %rcx, %rax + movq 280(%rsi), %rdx + movq 280(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 272(%rdi) + sbbq %rdx, %rcx + movq 288(%rsi), %rax + movq 288(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 280(%rdi) + sbbq %rax, %rdx + movq 296(%rsi), %rcx + movq 296(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 288(%rdi) + sbbq %rcx, %rax + movq 304(%rsi), %rdx + movq 304(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 296(%rdi) + sbbq %rdx, %rcx + movq 312(%rsi), %rax + movq 312(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 304(%rdi) + sbbq %rax, %rdx + movq 320(%rsi), %rcx + movq 320(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 312(%rdi) + sbbq %rcx, %rax + movq 328(%rsi), %rdx + movq 328(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 320(%rdi) + sbbq %rdx, %rcx + movq 336(%rsi), %rax + movq 336(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 328(%rdi) + sbbq %rax, %rdx + movq 344(%rsi), %rcx + movq 344(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 336(%rdi) + sbbq %rcx, %rax + movq 352(%rsi), %rdx + movq 352(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 344(%rdi) + sbbq %rdx, %rcx + movq 360(%rsi), %rax + movq 360(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 352(%rdi) + sbbq %rax, %rdx + movq 368(%rsi), %rcx + movq 368(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 360(%rdi) + sbbq %rcx, %rax + movq 376(%rsi), %rdx + movq 376(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 368(%rdi) + sbbq %rdx, %rcx + movq %rcx, 376(%rdi) + pop %r14 + pop %r13 + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_3072_mont_reduce_avx2_48,.-sp_3072_mont_reduce_avx2_48 +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX2 */ +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +#ifndef __APPLE__ +.globl sp_3072_cond_add_24 +.type sp_3072_cond_add_24,@function +.align 16 +sp_3072_cond_add_24: +#else +.globl _sp_3072_cond_add_24 +.p2align 4 +_sp_3072_cond_add_24: +#endif /* __APPLE__ */ + subq $192, %rsp + movq $0, %rax + movq (%rdx), %r8 + movq 8(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, (%rsp) + movq %r9, 8(%rsp) + movq 16(%rdx), %r8 + movq 24(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 16(%rsp) + movq %r9, 24(%rsp) + movq 32(%rdx), %r8 + movq 40(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 32(%rsp) + movq %r9, 40(%rsp) + movq 48(%rdx), %r8 + movq 56(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 48(%rsp) + movq %r9, 56(%rsp) + movq 64(%rdx), %r8 + movq 72(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 64(%rsp) + movq %r9, 72(%rsp) + movq 80(%rdx), %r8 + movq 88(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 80(%rsp) + movq %r9, 88(%rsp) + movq 96(%rdx), %r8 + movq 104(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 96(%rsp) + movq %r9, 104(%rsp) + movq 112(%rdx), %r8 + movq 120(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 112(%rsp) + movq %r9, 120(%rsp) + movq 128(%rdx), %r8 + movq 136(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 128(%rsp) + movq %r9, 136(%rsp) + movq 144(%rdx), %r8 + movq 152(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 144(%rsp) + movq %r9, 152(%rsp) + movq 160(%rdx), %r8 + movq 168(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 160(%rsp) + movq %r9, 168(%rsp) + movq 176(%rdx), %r8 + movq 184(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 176(%rsp) + movq %r9, 184(%rsp) + movq (%rsi), %r8 + movq (%rsp), %rdx + addq %rdx, %r8 + movq 8(%rsi), %r9 + movq 8(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, (%rdi) + movq 16(%rsi), %r8 + movq 16(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 8(%rdi) + movq 24(%rsi), %r9 + movq 24(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 16(%rdi) + movq 32(%rsi), %r8 + movq 32(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 24(%rdi) + movq 40(%rsi), %r9 + movq 40(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 32(%rdi) + movq 48(%rsi), %r8 + movq 48(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 40(%rdi) + movq 56(%rsi), %r9 + movq 56(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 48(%rdi) + movq 64(%rsi), %r8 + movq 64(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 56(%rdi) + movq 72(%rsi), %r9 + movq 72(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 64(%rdi) + movq 80(%rsi), %r8 + movq 80(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 72(%rdi) + movq 88(%rsi), %r9 + movq 88(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 80(%rdi) + movq 96(%rsi), %r8 + movq 96(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 88(%rdi) + movq 104(%rsi), %r9 + movq 104(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 96(%rdi) + movq 112(%rsi), %r8 + movq 112(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 104(%rdi) + movq 120(%rsi), %r9 + movq 120(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 112(%rdi) + movq 128(%rsi), %r8 + movq 128(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 120(%rdi) + movq 136(%rsi), %r9 + movq 136(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 128(%rdi) + movq 144(%rsi), %r8 + movq 144(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 136(%rdi) + movq 152(%rsi), %r9 + movq 152(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 144(%rdi) + movq 160(%rsi), %r8 + movq 160(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 152(%rdi) + movq 168(%rsi), %r9 + movq 168(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 160(%rdi) + movq 176(%rsi), %r8 + movq 176(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 168(%rdi) + movq 184(%rsi), %r9 + movq 184(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 176(%rdi) + movq %r9, 184(%rdi) + adcq $0, %rax + addq $192, %rsp + repz retq +#ifndef __APPLE__ +.size sp_3072_cond_add_24,.-sp_3072_cond_add_24 +#endif /* __APPLE__ */ +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +#ifndef __APPLE__ +.globl sp_3072_cond_add_avx2_24 +.type sp_3072_cond_add_avx2_24,@function +.align 16 +sp_3072_cond_add_avx2_24: +#else +.globl _sp_3072_cond_add_avx2_24 +.p2align 4 +_sp_3072_cond_add_avx2_24: +#endif /* __APPLE__ */ + movq $0, %rax + movq (%rdx), %r10 + movq (%rsi), %r8 + pextq %rcx, %r10, %r10 + addq %r10, %r8 + movq 8(%rdx), %r10 + movq 8(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, (%rdi) + adcq %r10, %r9 + movq 16(%rdx), %r8 + movq 16(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 8(%rdi) + adcq %r8, %r10 + movq 24(%rdx), %r9 + movq 24(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 16(%rdi) + adcq %r9, %r8 + movq 32(%rdx), %r10 + movq 32(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 24(%rdi) + adcq %r10, %r9 + movq 40(%rdx), %r8 + movq 40(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 32(%rdi) + adcq %r8, %r10 + movq 48(%rdx), %r9 + movq 48(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 40(%rdi) + adcq %r9, %r8 + movq 56(%rdx), %r10 + movq 56(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 48(%rdi) + adcq %r10, %r9 + movq 64(%rdx), %r8 + movq 64(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 56(%rdi) + adcq %r8, %r10 + movq 72(%rdx), %r9 + movq 72(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 64(%rdi) + adcq %r9, %r8 + movq 80(%rdx), %r10 + movq 80(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 72(%rdi) + adcq %r10, %r9 + movq 88(%rdx), %r8 + movq 88(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 80(%rdi) + adcq %r8, %r10 + movq 96(%rdx), %r9 + movq 96(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 88(%rdi) + adcq %r9, %r8 + movq 104(%rdx), %r10 + movq 104(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 96(%rdi) + adcq %r10, %r9 + movq 112(%rdx), %r8 + movq 112(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 104(%rdi) + adcq %r8, %r10 + movq 120(%rdx), %r9 + movq 120(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 112(%rdi) + adcq %r9, %r8 + movq 128(%rdx), %r10 + movq 128(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 120(%rdi) + adcq %r10, %r9 + movq 136(%rdx), %r8 + movq 136(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 128(%rdi) + adcq %r8, %r10 + movq 144(%rdx), %r9 + movq 144(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 136(%rdi) + adcq %r9, %r8 + movq 152(%rdx), %r10 + movq 152(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 144(%rdi) + adcq %r10, %r9 + movq 160(%rdx), %r8 + movq 160(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 152(%rdi) + adcq %r8, %r10 + movq 168(%rdx), %r9 + movq 168(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 160(%rdi) + adcq %r9, %r8 + movq 176(%rdx), %r10 + movq 176(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 168(%rdi) + adcq %r10, %r9 + movq 184(%rdx), %r8 + movq 184(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 176(%rdi) + adcq %r8, %r10 + movq %r10, 184(%rdi) + adcq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_3072_cond_add_avx2_24,.-sp_3072_cond_add_avx2_24 +#endif /* __APPLE__ */ +/* Shift number left by n bit. (r = a << n) + * + * r Result of left shift by n. + * a Number to shift. + * n Amoutnt o shift. + */ +#ifndef __APPLE__ +.globl sp_3072_lshift_48 +.type sp_3072_lshift_48,@function +.align 16 +sp_3072_lshift_48: +#else +.globl _sp_3072_lshift_48 +.p2align 4 +_sp_3072_lshift_48: +#endif /* __APPLE__ */ + movq %rdx, %rcx + movq $0, %r10 + movq 344(%rsi), %r11 + movq 352(%rsi), %rdx + movq 360(%rsi), %rax + movq 368(%rsi), %r8 + movq 376(%rsi), %r9 + shldq %cl, %r9, %r10 + shldq %cl, %r8, %r9 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r11, %rdx + movq %rdx, 352(%rdi) + movq %rax, 360(%rdi) + movq %r8, 368(%rdi) + movq %r9, 376(%rdi) + movq %r10, 384(%rdi) + movq 312(%rsi), %r9 + movq 320(%rsi), %rdx + movq 328(%rsi), %rax + movq 336(%rsi), %r8 + shldq %cl, %r8, %r11 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r9, %rdx + movq %rdx, 320(%rdi) + movq %rax, 328(%rdi) + movq %r8, 336(%rdi) + movq %r11, 344(%rdi) + movq 280(%rsi), %r11 + movq 288(%rsi), %rdx + movq 296(%rsi), %rax + movq 304(%rsi), %r8 + shldq %cl, %r8, %r9 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r11, %rdx + movq %rdx, 288(%rdi) + movq %rax, 296(%rdi) + movq %r8, 304(%rdi) + movq %r9, 312(%rdi) + movq 248(%rsi), %r9 + movq 256(%rsi), %rdx + movq 264(%rsi), %rax + movq 272(%rsi), %r8 + shldq %cl, %r8, %r11 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r9, %rdx + movq %rdx, 256(%rdi) + movq %rax, 264(%rdi) + movq %r8, 272(%rdi) + movq %r11, 280(%rdi) + movq 216(%rsi), %r11 + movq 224(%rsi), %rdx + movq 232(%rsi), %rax + movq 240(%rsi), %r8 + shldq %cl, %r8, %r9 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r11, %rdx + movq %rdx, 224(%rdi) + movq %rax, 232(%rdi) + movq %r8, 240(%rdi) + movq %r9, 248(%rdi) + movq 184(%rsi), %r9 + movq 192(%rsi), %rdx + movq 200(%rsi), %rax + movq 208(%rsi), %r8 + shldq %cl, %r8, %r11 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r9, %rdx + movq %rdx, 192(%rdi) + movq %rax, 200(%rdi) + movq %r8, 208(%rdi) + movq %r11, 216(%rdi) + movq 152(%rsi), %r11 + movq 160(%rsi), %rdx + movq 168(%rsi), %rax + movq 176(%rsi), %r8 + shldq %cl, %r8, %r9 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r11, %rdx + movq %rdx, 160(%rdi) + movq %rax, 168(%rdi) + movq %r8, 176(%rdi) + movq %r9, 184(%rdi) + movq 120(%rsi), %r9 + movq 128(%rsi), %rdx + movq 136(%rsi), %rax + movq 144(%rsi), %r8 + shldq %cl, %r8, %r11 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r9, %rdx + movq %rdx, 128(%rdi) + movq %rax, 136(%rdi) + movq %r8, 144(%rdi) + movq %r11, 152(%rdi) + movq 88(%rsi), %r11 + movq 96(%rsi), %rdx + movq 104(%rsi), %rax + movq 112(%rsi), %r8 + shldq %cl, %r8, %r9 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r11, %rdx + movq %rdx, 96(%rdi) + movq %rax, 104(%rdi) + movq %r8, 112(%rdi) + movq %r9, 120(%rdi) + movq 56(%rsi), %r9 + movq 64(%rsi), %rdx + movq 72(%rsi), %rax + movq 80(%rsi), %r8 + shldq %cl, %r8, %r11 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r9, %rdx + movq %rdx, 64(%rdi) + movq %rax, 72(%rdi) + movq %r8, 80(%rdi) + movq %r11, 88(%rdi) + movq 24(%rsi), %r11 + movq 32(%rsi), %rdx + movq 40(%rsi), %rax + movq 48(%rsi), %r8 + shldq %cl, %r8, %r9 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r11, %rdx + movq %rdx, 32(%rdi) + movq %rax, 40(%rdi) + movq %r8, 48(%rdi) + movq %r9, 56(%rdi) + movq (%rsi), %rdx + movq 8(%rsi), %rax + movq 16(%rsi), %r8 + shldq %cl, %r8, %r11 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shlq %cl, %rdx + movq %rdx, (%rdi) + movq %rax, 8(%rdi) + movq %r8, 16(%rdi) + movq %r11, 24(%rdi) + repz retq +#endif /* !WOLFSSL_SP_NO_3072 */ +#endif /* !WOLFSSL_SP_NO_3072 */ +#ifdef WOLFSSL_SP_4096 +#ifdef WOLFSSL_SP_4096 +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +#ifndef __APPLE__ +.globl sp_4096_from_bin +.type sp_4096_from_bin,@function +.align 16 +sp_4096_from_bin: +#else +.globl _sp_4096_from_bin +.p2align 4 +_sp_4096_from_bin: +#endif /* __APPLE__ */ + movq %rdx, %r9 + movq %rdi, %r10 + addq %rcx, %r9 + addq $512, %r10 + xorq %r11, %r11 + jmp L_4096_from_bin_64_end +L_4096_from_bin_64_start: + subq $64, %r9 + movbeq 56(%r9), %rax + movbeq 48(%r9), %r8 + movq %rax, (%rdi) + movq %r8, 8(%rdi) + movbeq 40(%r9), %rax + movbeq 32(%r9), %r8 + movq %rax, 16(%rdi) + movq %r8, 24(%rdi) + movbeq 24(%r9), %rax + movbeq 16(%r9), %r8 + movq %rax, 32(%rdi) + movq %r8, 40(%rdi) + movbeq 8(%r9), %rax + movbeq (%r9), %r8 + movq %rax, 48(%rdi) + movq %r8, 56(%rdi) + addq $64, %rdi + subq $64, %rcx +L_4096_from_bin_64_end: + cmpq $63, %rcx + jg L_4096_from_bin_64_start + jmp L_4096_from_bin_8_end +L_4096_from_bin_8_start: + subq $8, %r9 + movbeq (%r9), %rax + movq %rax, (%rdi) + addq $8, %rdi + subq $8, %rcx +L_4096_from_bin_8_end: + cmpq $7, %rcx + jg L_4096_from_bin_8_start + cmpq %r11, %rcx + je L_4096_from_bin_hi_end + movq %r11, %r8 + movq %r11, %rax +L_4096_from_bin_hi_start: + movb (%rdx), %al + shlq $8, %r8 + incq %rdx + addq %rax, %r8 + decq %rcx + jg L_4096_from_bin_hi_start + movq %r8, (%rdi) + addq $8, %rdi +L_4096_from_bin_hi_end: + cmpq %r10, %rdi + je L_4096_from_bin_zero_end +L_4096_from_bin_zero_start: + movq %r11, (%rdi) + addq $8, %rdi + cmpq %r10, %rdi + jl L_4096_from_bin_zero_start +L_4096_from_bin_zero_end: + repz retq +#ifndef __APPLE__ +.size sp_4096_from_bin,.-sp_4096_from_bin +#endif /* __APPLE__ */ +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 512 + * + * r A single precision integer. + * a Byte array. + */ +#ifndef __APPLE__ +.globl sp_4096_to_bin +.type sp_4096_to_bin,@function +.align 16 +sp_4096_to_bin: +#else +.globl _sp_4096_to_bin +.p2align 4 +_sp_4096_to_bin: +#endif /* __APPLE__ */ + movbeq 504(%rdi), %rdx + movbeq 496(%rdi), %rax + movq %rdx, (%rsi) + movq %rax, 8(%rsi) + movbeq 488(%rdi), %rdx + movbeq 480(%rdi), %rax + movq %rdx, 16(%rsi) + movq %rax, 24(%rsi) + movbeq 472(%rdi), %rdx + movbeq 464(%rdi), %rax + movq %rdx, 32(%rsi) + movq %rax, 40(%rsi) + movbeq 456(%rdi), %rdx + movbeq 448(%rdi), %rax + movq %rdx, 48(%rsi) + movq %rax, 56(%rsi) + movbeq 440(%rdi), %rdx + movbeq 432(%rdi), %rax + movq %rdx, 64(%rsi) + movq %rax, 72(%rsi) + movbeq 424(%rdi), %rdx + movbeq 416(%rdi), %rax + movq %rdx, 80(%rsi) + movq %rax, 88(%rsi) + movbeq 408(%rdi), %rdx + movbeq 400(%rdi), %rax + movq %rdx, 96(%rsi) + movq %rax, 104(%rsi) + movbeq 392(%rdi), %rdx + movbeq 384(%rdi), %rax + movq %rdx, 112(%rsi) + movq %rax, 120(%rsi) + movbeq 376(%rdi), %rdx + movbeq 368(%rdi), %rax + movq %rdx, 128(%rsi) + movq %rax, 136(%rsi) + movbeq 360(%rdi), %rdx + movbeq 352(%rdi), %rax + movq %rdx, 144(%rsi) + movq %rax, 152(%rsi) + movbeq 344(%rdi), %rdx + movbeq 336(%rdi), %rax + movq %rdx, 160(%rsi) + movq %rax, 168(%rsi) + movbeq 328(%rdi), %rdx + movbeq 320(%rdi), %rax + movq %rdx, 176(%rsi) + movq %rax, 184(%rsi) + movbeq 312(%rdi), %rdx + movbeq 304(%rdi), %rax + movq %rdx, 192(%rsi) + movq %rax, 200(%rsi) + movbeq 296(%rdi), %rdx + movbeq 288(%rdi), %rax + movq %rdx, 208(%rsi) + movq %rax, 216(%rsi) + movbeq 280(%rdi), %rdx + movbeq 272(%rdi), %rax + movq %rdx, 224(%rsi) + movq %rax, 232(%rsi) + movbeq 264(%rdi), %rdx + movbeq 256(%rdi), %rax + movq %rdx, 240(%rsi) + movq %rax, 248(%rsi) + movbeq 248(%rdi), %rdx + movbeq 240(%rdi), %rax + movq %rdx, 256(%rsi) + movq %rax, 264(%rsi) + movbeq 232(%rdi), %rdx + movbeq 224(%rdi), %rax + movq %rdx, 272(%rsi) + movq %rax, 280(%rsi) + movbeq 216(%rdi), %rdx + movbeq 208(%rdi), %rax + movq %rdx, 288(%rsi) + movq %rax, 296(%rsi) + movbeq 200(%rdi), %rdx + movbeq 192(%rdi), %rax + movq %rdx, 304(%rsi) + movq %rax, 312(%rsi) + movbeq 184(%rdi), %rdx + movbeq 176(%rdi), %rax + movq %rdx, 320(%rsi) + movq %rax, 328(%rsi) + movbeq 168(%rdi), %rdx + movbeq 160(%rdi), %rax + movq %rdx, 336(%rsi) + movq %rax, 344(%rsi) + movbeq 152(%rdi), %rdx + movbeq 144(%rdi), %rax + movq %rdx, 352(%rsi) + movq %rax, 360(%rsi) + movbeq 136(%rdi), %rdx + movbeq 128(%rdi), %rax + movq %rdx, 368(%rsi) + movq %rax, 376(%rsi) + movbeq 120(%rdi), %rdx + movbeq 112(%rdi), %rax + movq %rdx, 384(%rsi) + movq %rax, 392(%rsi) + movbeq 104(%rdi), %rdx + movbeq 96(%rdi), %rax + movq %rdx, 400(%rsi) + movq %rax, 408(%rsi) + movbeq 88(%rdi), %rdx + movbeq 80(%rdi), %rax + movq %rdx, 416(%rsi) + movq %rax, 424(%rsi) + movbeq 72(%rdi), %rdx + movbeq 64(%rdi), %rax + movq %rdx, 432(%rsi) + movq %rax, 440(%rsi) + movbeq 56(%rdi), %rdx + movbeq 48(%rdi), %rax + movq %rdx, 448(%rsi) + movq %rax, 456(%rsi) + movbeq 40(%rdi), %rdx + movbeq 32(%rdi), %rax + movq %rdx, 464(%rsi) + movq %rax, 472(%rsi) + movbeq 24(%rdi), %rdx + movbeq 16(%rdi), %rax + movq %rdx, 480(%rsi) + movq %rax, 488(%rsi) + movbeq 8(%rdi), %rdx + movbeq (%rdi), %rax + movq %rdx, 496(%rsi) + movq %rax, 504(%rsi) + repz retq +#ifndef __APPLE__ +.size sp_4096_to_bin,.-sp_4096_to_bin +#endif /* __APPLE__ */ +/* Sub b from a into a. (a -= b) + * + * a A single precision integer and result. + * b A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_4096_sub_in_place_64 +.type sp_4096_sub_in_place_64,@function +.align 16 +sp_4096_sub_in_place_64: +#else +.globl _sp_4096_sub_in_place_64 +.p2align 4 +_sp_4096_sub_in_place_64: +#endif /* __APPLE__ */ + movq (%rdi), %rdx + xorq %rax, %rax + subq (%rsi), %rdx + movq 8(%rdi), %rcx + movq %rdx, (%rdi) + sbbq 8(%rsi), %rcx + movq 16(%rdi), %rdx + movq %rcx, 8(%rdi) + sbbq 16(%rsi), %rdx + movq 24(%rdi), %rcx + movq %rdx, 16(%rdi) + sbbq 24(%rsi), %rcx + movq 32(%rdi), %rdx + movq %rcx, 24(%rdi) + sbbq 32(%rsi), %rdx + movq 40(%rdi), %rcx + movq %rdx, 32(%rdi) + sbbq 40(%rsi), %rcx + movq 48(%rdi), %rdx + movq %rcx, 40(%rdi) + sbbq 48(%rsi), %rdx + movq 56(%rdi), %rcx + movq %rdx, 48(%rdi) + sbbq 56(%rsi), %rcx + movq 64(%rdi), %rdx + movq %rcx, 56(%rdi) + sbbq 64(%rsi), %rdx + movq 72(%rdi), %rcx + movq %rdx, 64(%rdi) + sbbq 72(%rsi), %rcx + movq 80(%rdi), %rdx + movq %rcx, 72(%rdi) + sbbq 80(%rsi), %rdx + movq 88(%rdi), %rcx + movq %rdx, 80(%rdi) + sbbq 88(%rsi), %rcx + movq 96(%rdi), %rdx + movq %rcx, 88(%rdi) + sbbq 96(%rsi), %rdx + movq 104(%rdi), %rcx + movq %rdx, 96(%rdi) + sbbq 104(%rsi), %rcx + movq 112(%rdi), %rdx + movq %rcx, 104(%rdi) + sbbq 112(%rsi), %rdx + movq 120(%rdi), %rcx + movq %rdx, 112(%rdi) + sbbq 120(%rsi), %rcx + movq 128(%rdi), %rdx + movq %rcx, 120(%rdi) + sbbq 128(%rsi), %rdx + movq 136(%rdi), %rcx + movq %rdx, 128(%rdi) + sbbq 136(%rsi), %rcx + movq 144(%rdi), %rdx + movq %rcx, 136(%rdi) + sbbq 144(%rsi), %rdx + movq 152(%rdi), %rcx + movq %rdx, 144(%rdi) + sbbq 152(%rsi), %rcx + movq 160(%rdi), %rdx + movq %rcx, 152(%rdi) + sbbq 160(%rsi), %rdx + movq 168(%rdi), %rcx + movq %rdx, 160(%rdi) + sbbq 168(%rsi), %rcx + movq 176(%rdi), %rdx + movq %rcx, 168(%rdi) + sbbq 176(%rsi), %rdx + movq 184(%rdi), %rcx + movq %rdx, 176(%rdi) + sbbq 184(%rsi), %rcx + movq 192(%rdi), %rdx + movq %rcx, 184(%rdi) + sbbq 192(%rsi), %rdx + movq 200(%rdi), %rcx + movq %rdx, 192(%rdi) + sbbq 200(%rsi), %rcx + movq 208(%rdi), %rdx + movq %rcx, 200(%rdi) + sbbq 208(%rsi), %rdx + movq 216(%rdi), %rcx + movq %rdx, 208(%rdi) + sbbq 216(%rsi), %rcx + movq 224(%rdi), %rdx + movq %rcx, 216(%rdi) + sbbq 224(%rsi), %rdx + movq 232(%rdi), %rcx + movq %rdx, 224(%rdi) + sbbq 232(%rsi), %rcx + movq 240(%rdi), %rdx + movq %rcx, 232(%rdi) + sbbq 240(%rsi), %rdx + movq 248(%rdi), %rcx + movq %rdx, 240(%rdi) + sbbq 248(%rsi), %rcx + movq 256(%rdi), %rdx + movq %rcx, 248(%rdi) + sbbq 256(%rsi), %rdx + movq 264(%rdi), %rcx + movq %rdx, 256(%rdi) + sbbq 264(%rsi), %rcx + movq 272(%rdi), %rdx + movq %rcx, 264(%rdi) + sbbq 272(%rsi), %rdx + movq 280(%rdi), %rcx + movq %rdx, 272(%rdi) + sbbq 280(%rsi), %rcx + movq 288(%rdi), %rdx + movq %rcx, 280(%rdi) + sbbq 288(%rsi), %rdx + movq 296(%rdi), %rcx + movq %rdx, 288(%rdi) + sbbq 296(%rsi), %rcx + movq 304(%rdi), %rdx + movq %rcx, 296(%rdi) + sbbq 304(%rsi), %rdx + movq 312(%rdi), %rcx + movq %rdx, 304(%rdi) + sbbq 312(%rsi), %rcx + movq 320(%rdi), %rdx + movq %rcx, 312(%rdi) + sbbq 320(%rsi), %rdx + movq 328(%rdi), %rcx + movq %rdx, 320(%rdi) + sbbq 328(%rsi), %rcx + movq 336(%rdi), %rdx + movq %rcx, 328(%rdi) + sbbq 336(%rsi), %rdx + movq 344(%rdi), %rcx + movq %rdx, 336(%rdi) + sbbq 344(%rsi), %rcx + movq 352(%rdi), %rdx + movq %rcx, 344(%rdi) + sbbq 352(%rsi), %rdx + movq 360(%rdi), %rcx + movq %rdx, 352(%rdi) + sbbq 360(%rsi), %rcx + movq 368(%rdi), %rdx + movq %rcx, 360(%rdi) + sbbq 368(%rsi), %rdx + movq 376(%rdi), %rcx + movq %rdx, 368(%rdi) + sbbq 376(%rsi), %rcx + movq 384(%rdi), %rdx + movq %rcx, 376(%rdi) + sbbq 384(%rsi), %rdx + movq 392(%rdi), %rcx + movq %rdx, 384(%rdi) + sbbq 392(%rsi), %rcx + movq 400(%rdi), %rdx + movq %rcx, 392(%rdi) + sbbq 400(%rsi), %rdx + movq 408(%rdi), %rcx + movq %rdx, 400(%rdi) + sbbq 408(%rsi), %rcx + movq 416(%rdi), %rdx + movq %rcx, 408(%rdi) + sbbq 416(%rsi), %rdx + movq 424(%rdi), %rcx + movq %rdx, 416(%rdi) + sbbq 424(%rsi), %rcx + movq 432(%rdi), %rdx + movq %rcx, 424(%rdi) + sbbq 432(%rsi), %rdx + movq 440(%rdi), %rcx + movq %rdx, 432(%rdi) + sbbq 440(%rsi), %rcx + movq 448(%rdi), %rdx + movq %rcx, 440(%rdi) + sbbq 448(%rsi), %rdx + movq 456(%rdi), %rcx + movq %rdx, 448(%rdi) + sbbq 456(%rsi), %rcx + movq 464(%rdi), %rdx + movq %rcx, 456(%rdi) + sbbq 464(%rsi), %rdx + movq 472(%rdi), %rcx + movq %rdx, 464(%rdi) + sbbq 472(%rsi), %rcx + movq 480(%rdi), %rdx + movq %rcx, 472(%rdi) + sbbq 480(%rsi), %rdx + movq 488(%rdi), %rcx + movq %rdx, 480(%rdi) + sbbq 488(%rsi), %rcx + movq 496(%rdi), %rdx + movq %rcx, 488(%rdi) + sbbq 496(%rsi), %rdx + movq 504(%rdi), %rcx + movq %rdx, 496(%rdi) + sbbq 504(%rsi), %rcx + movq %rcx, 504(%rdi) + sbbq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_4096_sub_in_place_64,.-sp_4096_sub_in_place_64 +#endif /* __APPLE__ */ +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_4096_add_64 +.type sp_4096_add_64,@function +.align 16 +sp_4096_add_64: +#else +.globl _sp_4096_add_64 +.p2align 4 +_sp_4096_add_64: +#endif /* __APPLE__ */ + # Add + movq (%rsi), %rcx + xorq %rax, %rax + addq (%rdx), %rcx + movq 8(%rsi), %r8 + movq %rcx, (%rdi) + adcq 8(%rdx), %r8 + movq 16(%rsi), %rcx + movq %r8, 8(%rdi) + adcq 16(%rdx), %rcx + movq 24(%rsi), %r8 + movq %rcx, 16(%rdi) + adcq 24(%rdx), %r8 + movq 32(%rsi), %rcx + movq %r8, 24(%rdi) + adcq 32(%rdx), %rcx + movq 40(%rsi), %r8 + movq %rcx, 32(%rdi) + adcq 40(%rdx), %r8 + movq 48(%rsi), %rcx + movq %r8, 40(%rdi) + adcq 48(%rdx), %rcx + movq 56(%rsi), %r8 + movq %rcx, 48(%rdi) + adcq 56(%rdx), %r8 + movq 64(%rsi), %rcx + movq %r8, 56(%rdi) + adcq 64(%rdx), %rcx + movq 72(%rsi), %r8 + movq %rcx, 64(%rdi) + adcq 72(%rdx), %r8 + movq 80(%rsi), %rcx + movq %r8, 72(%rdi) + adcq 80(%rdx), %rcx + movq 88(%rsi), %r8 + movq %rcx, 80(%rdi) + adcq 88(%rdx), %r8 + movq 96(%rsi), %rcx + movq %r8, 88(%rdi) + adcq 96(%rdx), %rcx + movq 104(%rsi), %r8 + movq %rcx, 96(%rdi) + adcq 104(%rdx), %r8 + movq 112(%rsi), %rcx + movq %r8, 104(%rdi) + adcq 112(%rdx), %rcx + movq 120(%rsi), %r8 + movq %rcx, 112(%rdi) + adcq 120(%rdx), %r8 + movq 128(%rsi), %rcx + movq %r8, 120(%rdi) + adcq 128(%rdx), %rcx + movq 136(%rsi), %r8 + movq %rcx, 128(%rdi) + adcq 136(%rdx), %r8 + movq 144(%rsi), %rcx + movq %r8, 136(%rdi) + adcq 144(%rdx), %rcx + movq 152(%rsi), %r8 + movq %rcx, 144(%rdi) + adcq 152(%rdx), %r8 + movq 160(%rsi), %rcx + movq %r8, 152(%rdi) + adcq 160(%rdx), %rcx + movq 168(%rsi), %r8 + movq %rcx, 160(%rdi) + adcq 168(%rdx), %r8 + movq 176(%rsi), %rcx + movq %r8, 168(%rdi) + adcq 176(%rdx), %rcx + movq 184(%rsi), %r8 + movq %rcx, 176(%rdi) + adcq 184(%rdx), %r8 + movq 192(%rsi), %rcx + movq %r8, 184(%rdi) + adcq 192(%rdx), %rcx + movq 200(%rsi), %r8 + movq %rcx, 192(%rdi) + adcq 200(%rdx), %r8 + movq 208(%rsi), %rcx + movq %r8, 200(%rdi) + adcq 208(%rdx), %rcx + movq 216(%rsi), %r8 + movq %rcx, 208(%rdi) + adcq 216(%rdx), %r8 + movq 224(%rsi), %rcx + movq %r8, 216(%rdi) + adcq 224(%rdx), %rcx + movq 232(%rsi), %r8 + movq %rcx, 224(%rdi) + adcq 232(%rdx), %r8 + movq 240(%rsi), %rcx + movq %r8, 232(%rdi) + adcq 240(%rdx), %rcx + movq 248(%rsi), %r8 + movq %rcx, 240(%rdi) + adcq 248(%rdx), %r8 + movq 256(%rsi), %rcx + movq %r8, 248(%rdi) + adcq 256(%rdx), %rcx + movq 264(%rsi), %r8 + movq %rcx, 256(%rdi) + adcq 264(%rdx), %r8 + movq 272(%rsi), %rcx + movq %r8, 264(%rdi) + adcq 272(%rdx), %rcx + movq 280(%rsi), %r8 + movq %rcx, 272(%rdi) + adcq 280(%rdx), %r8 + movq 288(%rsi), %rcx + movq %r8, 280(%rdi) + adcq 288(%rdx), %rcx + movq 296(%rsi), %r8 + movq %rcx, 288(%rdi) + adcq 296(%rdx), %r8 + movq 304(%rsi), %rcx + movq %r8, 296(%rdi) + adcq 304(%rdx), %rcx + movq 312(%rsi), %r8 + movq %rcx, 304(%rdi) + adcq 312(%rdx), %r8 + movq 320(%rsi), %rcx + movq %r8, 312(%rdi) + adcq 320(%rdx), %rcx + movq 328(%rsi), %r8 + movq %rcx, 320(%rdi) + adcq 328(%rdx), %r8 + movq 336(%rsi), %rcx + movq %r8, 328(%rdi) + adcq 336(%rdx), %rcx + movq 344(%rsi), %r8 + movq %rcx, 336(%rdi) + adcq 344(%rdx), %r8 + movq 352(%rsi), %rcx + movq %r8, 344(%rdi) + adcq 352(%rdx), %rcx + movq 360(%rsi), %r8 + movq %rcx, 352(%rdi) + adcq 360(%rdx), %r8 + movq 368(%rsi), %rcx + movq %r8, 360(%rdi) + adcq 368(%rdx), %rcx + movq 376(%rsi), %r8 + movq %rcx, 368(%rdi) + adcq 376(%rdx), %r8 + movq 384(%rsi), %rcx + movq %r8, 376(%rdi) + adcq 384(%rdx), %rcx + movq 392(%rsi), %r8 + movq %rcx, 384(%rdi) + adcq 392(%rdx), %r8 + movq 400(%rsi), %rcx + movq %r8, 392(%rdi) + adcq 400(%rdx), %rcx + movq 408(%rsi), %r8 + movq %rcx, 400(%rdi) + adcq 408(%rdx), %r8 + movq 416(%rsi), %rcx + movq %r8, 408(%rdi) + adcq 416(%rdx), %rcx + movq 424(%rsi), %r8 + movq %rcx, 416(%rdi) + adcq 424(%rdx), %r8 + movq 432(%rsi), %rcx + movq %r8, 424(%rdi) + adcq 432(%rdx), %rcx + movq 440(%rsi), %r8 + movq %rcx, 432(%rdi) + adcq 440(%rdx), %r8 + movq 448(%rsi), %rcx + movq %r8, 440(%rdi) + adcq 448(%rdx), %rcx + movq 456(%rsi), %r8 + movq %rcx, 448(%rdi) + adcq 456(%rdx), %r8 + movq 464(%rsi), %rcx + movq %r8, 456(%rdi) + adcq 464(%rdx), %rcx + movq 472(%rsi), %r8 + movq %rcx, 464(%rdi) + adcq 472(%rdx), %r8 + movq 480(%rsi), %rcx + movq %r8, 472(%rdi) + adcq 480(%rdx), %rcx + movq 488(%rsi), %r8 + movq %rcx, 480(%rdi) + adcq 488(%rdx), %r8 + movq 496(%rsi), %rcx + movq %r8, 488(%rdi) + adcq 496(%rdx), %rcx + movq 504(%rsi), %r8 + movq %rcx, 496(%rdi) + adcq 504(%rdx), %r8 + movq %r8, 504(%rdi) + adcq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_4096_add_64,.-sp_4096_add_64 +#endif /* __APPLE__ */ +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_4096_mul_64 +.type sp_4096_mul_64,@function +.align 16 +sp_4096_mul_64: +#else +.globl _sp_4096_mul_64 +.p2align 4 +_sp_4096_mul_64: +#endif /* __APPLE__ */ + push %r12 + push %r13 + push %r14 + push %r15 + subq $1576, %rsp + movq %rdi, 1536(%rsp) + movq %rsi, 1544(%rsp) + movq %rdx, 1552(%rsp) + leaq 1024(%rsp), %r10 + leaq 256(%rsi), %r12 + # Add + movq (%rsi), %rax + xorq %r13, %r13 + addq (%r12), %rax + movq 8(%rsi), %rcx + movq %rax, (%r10) + adcq 8(%r12), %rcx + movq 16(%rsi), %r8 + movq %rcx, 8(%r10) + adcq 16(%r12), %r8 + movq 24(%rsi), %rax + movq %r8, 16(%r10) + adcq 24(%r12), %rax + movq 32(%rsi), %rcx + movq %rax, 24(%r10) + adcq 32(%r12), %rcx + movq 40(%rsi), %r8 + movq %rcx, 32(%r10) + adcq 40(%r12), %r8 + movq 48(%rsi), %rax + movq %r8, 40(%r10) + adcq 48(%r12), %rax + movq 56(%rsi), %rcx + movq %rax, 48(%r10) + adcq 56(%r12), %rcx + movq 64(%rsi), %r8 + movq %rcx, 56(%r10) + adcq 64(%r12), %r8 + movq 72(%rsi), %rax + movq %r8, 64(%r10) + adcq 72(%r12), %rax + movq 80(%rsi), %rcx + movq %rax, 72(%r10) + adcq 80(%r12), %rcx + movq 88(%rsi), %r8 + movq %rcx, 80(%r10) + adcq 88(%r12), %r8 + movq 96(%rsi), %rax + movq %r8, 88(%r10) + adcq 96(%r12), %rax + movq 104(%rsi), %rcx + movq %rax, 96(%r10) + adcq 104(%r12), %rcx + movq 112(%rsi), %r8 + movq %rcx, 104(%r10) + adcq 112(%r12), %r8 + movq 120(%rsi), %rax + movq %r8, 112(%r10) + adcq 120(%r12), %rax + movq 128(%rsi), %rcx + movq %rax, 120(%r10) + adcq 128(%r12), %rcx + movq 136(%rsi), %r8 + movq %rcx, 128(%r10) + adcq 136(%r12), %r8 + movq 144(%rsi), %rax + movq %r8, 136(%r10) + adcq 144(%r12), %rax + movq 152(%rsi), %rcx + movq %rax, 144(%r10) + adcq 152(%r12), %rcx + movq 160(%rsi), %r8 + movq %rcx, 152(%r10) + adcq 160(%r12), %r8 + movq 168(%rsi), %rax + movq %r8, 160(%r10) + adcq 168(%r12), %rax + movq 176(%rsi), %rcx + movq %rax, 168(%r10) + adcq 176(%r12), %rcx + movq 184(%rsi), %r8 + movq %rcx, 176(%r10) + adcq 184(%r12), %r8 + movq 192(%rsi), %rax + movq %r8, 184(%r10) + adcq 192(%r12), %rax + movq 200(%rsi), %rcx + movq %rax, 192(%r10) + adcq 200(%r12), %rcx + movq 208(%rsi), %r8 + movq %rcx, 200(%r10) + adcq 208(%r12), %r8 + movq 216(%rsi), %rax + movq %r8, 208(%r10) + adcq 216(%r12), %rax + movq 224(%rsi), %rcx + movq %rax, 216(%r10) + adcq 224(%r12), %rcx + movq 232(%rsi), %r8 + movq %rcx, 224(%r10) + adcq 232(%r12), %r8 + movq 240(%rsi), %rax + movq %r8, 232(%r10) + adcq 240(%r12), %rax + movq 248(%rsi), %rcx + movq %rax, 240(%r10) + adcq 248(%r12), %rcx + movq %rcx, 248(%r10) + adcq $0, %r13 + movq %r13, 1560(%rsp) + leaq 1280(%rsp), %r11 + leaq 256(%rdx), %r12 + # Add + movq (%rdx), %rax + xorq %r14, %r14 + addq (%r12), %rax + movq 8(%rdx), %rcx + movq %rax, (%r11) + adcq 8(%r12), %rcx + movq 16(%rdx), %r8 + movq %rcx, 8(%r11) + adcq 16(%r12), %r8 + movq 24(%rdx), %rax + movq %r8, 16(%r11) + adcq 24(%r12), %rax + movq 32(%rdx), %rcx + movq %rax, 24(%r11) + adcq 32(%r12), %rcx + movq 40(%rdx), %r8 + movq %rcx, 32(%r11) + adcq 40(%r12), %r8 + movq 48(%rdx), %rax + movq %r8, 40(%r11) + adcq 48(%r12), %rax + movq 56(%rdx), %rcx + movq %rax, 48(%r11) + adcq 56(%r12), %rcx + movq 64(%rdx), %r8 + movq %rcx, 56(%r11) + adcq 64(%r12), %r8 + movq 72(%rdx), %rax + movq %r8, 64(%r11) + adcq 72(%r12), %rax + movq 80(%rdx), %rcx + movq %rax, 72(%r11) + adcq 80(%r12), %rcx + movq 88(%rdx), %r8 + movq %rcx, 80(%r11) + adcq 88(%r12), %r8 + movq 96(%rdx), %rax + movq %r8, 88(%r11) + adcq 96(%r12), %rax + movq 104(%rdx), %rcx + movq %rax, 96(%r11) + adcq 104(%r12), %rcx + movq 112(%rdx), %r8 + movq %rcx, 104(%r11) + adcq 112(%r12), %r8 + movq 120(%rdx), %rax + movq %r8, 112(%r11) + adcq 120(%r12), %rax + movq 128(%rdx), %rcx + movq %rax, 120(%r11) + adcq 128(%r12), %rcx + movq 136(%rdx), %r8 + movq %rcx, 128(%r11) + adcq 136(%r12), %r8 + movq 144(%rdx), %rax + movq %r8, 136(%r11) + adcq 144(%r12), %rax + movq 152(%rdx), %rcx + movq %rax, 144(%r11) + adcq 152(%r12), %rcx + movq 160(%rdx), %r8 + movq %rcx, 152(%r11) + adcq 160(%r12), %r8 + movq 168(%rdx), %rax + movq %r8, 160(%r11) + adcq 168(%r12), %rax + movq 176(%rdx), %rcx + movq %rax, 168(%r11) + adcq 176(%r12), %rcx + movq 184(%rdx), %r8 + movq %rcx, 176(%r11) + adcq 184(%r12), %r8 + movq 192(%rdx), %rax + movq %r8, 184(%r11) + adcq 192(%r12), %rax + movq 200(%rdx), %rcx + movq %rax, 192(%r11) + adcq 200(%r12), %rcx + movq 208(%rdx), %r8 + movq %rcx, 200(%r11) + adcq 208(%r12), %r8 + movq 216(%rdx), %rax + movq %r8, 208(%r11) + adcq 216(%r12), %rax + movq 224(%rdx), %rcx + movq %rax, 216(%r11) + adcq 224(%r12), %rcx + movq 232(%rdx), %r8 + movq %rcx, 224(%r11) + adcq 232(%r12), %r8 + movq 240(%rdx), %rax + movq %r8, 232(%r11) + adcq 240(%r12), %rax + movq 248(%rdx), %rcx + movq %rax, 240(%r11) + adcq 248(%r12), %rcx + movq %rcx, 248(%r11) + adcq $0, %r14 + movq %r14, 1568(%rsp) + movq %r11, %rdx + movq %r10, %rsi + movq %rsp, %rdi +#ifndef __APPLE__ + callq sp_2048_mul_32@plt +#else + callq _sp_2048_mul_32 +#endif /* __APPLE__ */ + movq 1552(%rsp), %rdx + movq 1544(%rsp), %rsi + leaq 512(%rsp), %rdi + addq $256, %rdx + addq $256, %rsi +#ifndef __APPLE__ + callq sp_2048_mul_32@plt +#else + callq _sp_2048_mul_32 +#endif /* __APPLE__ */ + movq 1552(%rsp), %rdx + movq 1544(%rsp), %rsi + movq 1536(%rsp), %rdi +#ifndef __APPLE__ + callq sp_2048_mul_32@plt +#else + callq _sp_2048_mul_32 +#endif /* __APPLE__ */ + movq 1560(%rsp), %r13 + movq 1568(%rsp), %r14 + movq 1536(%rsp), %r15 + movq %r13, %r9 + leaq 1024(%rsp), %r10 + leaq 1280(%rsp), %r11 + andq %r14, %r9 + negq %r13 + negq %r14 + addq $512, %r15 + movq (%r10), %rax + movq (%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, (%r10) + movq %rcx, (%r11) + movq 8(%r10), %rax + movq 8(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 8(%r10) + movq %rcx, 8(%r11) + movq 16(%r10), %rax + movq 16(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 16(%r10) + movq %rcx, 16(%r11) + movq 24(%r10), %rax + movq 24(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 24(%r10) + movq %rcx, 24(%r11) + movq 32(%r10), %rax + movq 32(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 32(%r10) + movq %rcx, 32(%r11) + movq 40(%r10), %rax + movq 40(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 40(%r10) + movq %rcx, 40(%r11) + movq 48(%r10), %rax + movq 48(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 48(%r10) + movq %rcx, 48(%r11) + movq 56(%r10), %rax + movq 56(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 56(%r10) + movq %rcx, 56(%r11) + movq 64(%r10), %rax + movq 64(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 64(%r10) + movq %rcx, 64(%r11) + movq 72(%r10), %rax + movq 72(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 72(%r10) + movq %rcx, 72(%r11) + movq 80(%r10), %rax + movq 80(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 80(%r10) + movq %rcx, 80(%r11) + movq 88(%r10), %rax + movq 88(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 88(%r10) + movq %rcx, 88(%r11) + movq 96(%r10), %rax + movq 96(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 96(%r10) + movq %rcx, 96(%r11) + movq 104(%r10), %rax + movq 104(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 104(%r10) + movq %rcx, 104(%r11) + movq 112(%r10), %rax + movq 112(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 112(%r10) + movq %rcx, 112(%r11) + movq 120(%r10), %rax + movq 120(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 120(%r10) + movq %rcx, 120(%r11) + movq 128(%r10), %rax + movq 128(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 128(%r10) + movq %rcx, 128(%r11) + movq 136(%r10), %rax + movq 136(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 136(%r10) + movq %rcx, 136(%r11) + movq 144(%r10), %rax + movq 144(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 144(%r10) + movq %rcx, 144(%r11) + movq 152(%r10), %rax + movq 152(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 152(%r10) + movq %rcx, 152(%r11) + movq 160(%r10), %rax + movq 160(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 160(%r10) + movq %rcx, 160(%r11) + movq 168(%r10), %rax + movq 168(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 168(%r10) + movq %rcx, 168(%r11) + movq 176(%r10), %rax + movq 176(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 176(%r10) + movq %rcx, 176(%r11) + movq 184(%r10), %rax + movq 184(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 184(%r10) + movq %rcx, 184(%r11) + movq 192(%r10), %rax + movq 192(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 192(%r10) + movq %rcx, 192(%r11) + movq 200(%r10), %rax + movq 200(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 200(%r10) + movq %rcx, 200(%r11) + movq 208(%r10), %rax + movq 208(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 208(%r10) + movq %rcx, 208(%r11) + movq 216(%r10), %rax + movq 216(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 216(%r10) + movq %rcx, 216(%r11) + movq 224(%r10), %rax + movq 224(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 224(%r10) + movq %rcx, 224(%r11) + movq 232(%r10), %rax + movq 232(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 232(%r10) + movq %rcx, 232(%r11) + movq 240(%r10), %rax + movq 240(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 240(%r10) + movq %rcx, 240(%r11) + movq 248(%r10), %rax + movq 248(%r11), %rcx + andq %r14, %rax + andq %r13, %rcx + movq %rax, 248(%r10) + movq %rcx, 248(%r11) + movq (%r10), %rax + addq (%r11), %rax + movq 8(%r10), %rcx + movq %rax, (%r15) + adcq 8(%r11), %rcx + movq 16(%r10), %r8 + movq %rcx, 8(%r15) + adcq 16(%r11), %r8 + movq 24(%r10), %rax + movq %r8, 16(%r15) + adcq 24(%r11), %rax + movq 32(%r10), %rcx + movq %rax, 24(%r15) + adcq 32(%r11), %rcx + movq 40(%r10), %r8 + movq %rcx, 32(%r15) + adcq 40(%r11), %r8 + movq 48(%r10), %rax + movq %r8, 40(%r15) + adcq 48(%r11), %rax + movq 56(%r10), %rcx + movq %rax, 48(%r15) + adcq 56(%r11), %rcx + movq 64(%r10), %r8 + movq %rcx, 56(%r15) + adcq 64(%r11), %r8 + movq 72(%r10), %rax + movq %r8, 64(%r15) + adcq 72(%r11), %rax + movq 80(%r10), %rcx + movq %rax, 72(%r15) + adcq 80(%r11), %rcx + movq 88(%r10), %r8 + movq %rcx, 80(%r15) + adcq 88(%r11), %r8 + movq 96(%r10), %rax + movq %r8, 88(%r15) + adcq 96(%r11), %rax + movq 104(%r10), %rcx + movq %rax, 96(%r15) + adcq 104(%r11), %rcx + movq 112(%r10), %r8 + movq %rcx, 104(%r15) + adcq 112(%r11), %r8 + movq 120(%r10), %rax + movq %r8, 112(%r15) + adcq 120(%r11), %rax + movq 128(%r10), %rcx + movq %rax, 120(%r15) + adcq 128(%r11), %rcx + movq 136(%r10), %r8 + movq %rcx, 128(%r15) + adcq 136(%r11), %r8 + movq 144(%r10), %rax + movq %r8, 136(%r15) + adcq 144(%r11), %rax + movq 152(%r10), %rcx + movq %rax, 144(%r15) + adcq 152(%r11), %rcx + movq 160(%r10), %r8 + movq %rcx, 152(%r15) + adcq 160(%r11), %r8 + movq 168(%r10), %rax + movq %r8, 160(%r15) + adcq 168(%r11), %rax + movq 176(%r10), %rcx + movq %rax, 168(%r15) + adcq 176(%r11), %rcx + movq 184(%r10), %r8 + movq %rcx, 176(%r15) + adcq 184(%r11), %r8 + movq 192(%r10), %rax + movq %r8, 184(%r15) + adcq 192(%r11), %rax + movq 200(%r10), %rcx + movq %rax, 192(%r15) + adcq 200(%r11), %rcx + movq 208(%r10), %r8 + movq %rcx, 200(%r15) + adcq 208(%r11), %r8 + movq 216(%r10), %rax + movq %r8, 208(%r15) + adcq 216(%r11), %rax + movq 224(%r10), %rcx + movq %rax, 216(%r15) + adcq 224(%r11), %rcx + movq 232(%r10), %r8 + movq %rcx, 224(%r15) + adcq 232(%r11), %r8 + movq 240(%r10), %rax + movq %r8, 232(%r15) + adcq 240(%r11), %rax + movq 248(%r10), %rcx + movq %rax, 240(%r15) + adcq 248(%r11), %rcx + movq %rcx, 248(%r15) + adcq $0, %r9 + leaq 512(%rsp), %r11 + movq %rsp, %r10 + movq (%r10), %rax + subq (%r11), %rax + movq 8(%r10), %rcx + movq %rax, (%r10) + sbbq 8(%r11), %rcx + movq 16(%r10), %r8 + movq %rcx, 8(%r10) + sbbq 16(%r11), %r8 + movq 24(%r10), %rax + movq %r8, 16(%r10) + sbbq 24(%r11), %rax + movq 32(%r10), %rcx + movq %rax, 24(%r10) + sbbq 32(%r11), %rcx + movq 40(%r10), %r8 + movq %rcx, 32(%r10) + sbbq 40(%r11), %r8 + movq 48(%r10), %rax + movq %r8, 40(%r10) + sbbq 48(%r11), %rax + movq 56(%r10), %rcx + movq %rax, 48(%r10) + sbbq 56(%r11), %rcx + movq 64(%r10), %r8 + movq %rcx, 56(%r10) + sbbq 64(%r11), %r8 + movq 72(%r10), %rax + movq %r8, 64(%r10) + sbbq 72(%r11), %rax + movq 80(%r10), %rcx + movq %rax, 72(%r10) + sbbq 80(%r11), %rcx + movq 88(%r10), %r8 + movq %rcx, 80(%r10) + sbbq 88(%r11), %r8 + movq 96(%r10), %rax + movq %r8, 88(%r10) + sbbq 96(%r11), %rax + movq 104(%r10), %rcx + movq %rax, 96(%r10) + sbbq 104(%r11), %rcx + movq 112(%r10), %r8 + movq %rcx, 104(%r10) + sbbq 112(%r11), %r8 + movq 120(%r10), %rax + movq %r8, 112(%r10) + sbbq 120(%r11), %rax + movq 128(%r10), %rcx + movq %rax, 120(%r10) + sbbq 128(%r11), %rcx + movq 136(%r10), %r8 + movq %rcx, 128(%r10) + sbbq 136(%r11), %r8 + movq 144(%r10), %rax + movq %r8, 136(%r10) + sbbq 144(%r11), %rax + movq 152(%r10), %rcx + movq %rax, 144(%r10) + sbbq 152(%r11), %rcx + movq 160(%r10), %r8 + movq %rcx, 152(%r10) + sbbq 160(%r11), %r8 + movq 168(%r10), %rax + movq %r8, 160(%r10) + sbbq 168(%r11), %rax + movq 176(%r10), %rcx + movq %rax, 168(%r10) + sbbq 176(%r11), %rcx + movq 184(%r10), %r8 + movq %rcx, 176(%r10) + sbbq 184(%r11), %r8 + movq 192(%r10), %rax + movq %r8, 184(%r10) + sbbq 192(%r11), %rax + movq 200(%r10), %rcx + movq %rax, 192(%r10) + sbbq 200(%r11), %rcx + movq 208(%r10), %r8 + movq %rcx, 200(%r10) + sbbq 208(%r11), %r8 + movq 216(%r10), %rax + movq %r8, 208(%r10) + sbbq 216(%r11), %rax + movq 224(%r10), %rcx + movq %rax, 216(%r10) + sbbq 224(%r11), %rcx + movq 232(%r10), %r8 + movq %rcx, 224(%r10) + sbbq 232(%r11), %r8 + movq 240(%r10), %rax + movq %r8, 232(%r10) + sbbq 240(%r11), %rax + movq 248(%r10), %rcx + movq %rax, 240(%r10) + sbbq 248(%r11), %rcx + movq 256(%r10), %r8 + movq %rcx, 248(%r10) + sbbq 256(%r11), %r8 + movq 264(%r10), %rax + movq %r8, 256(%r10) + sbbq 264(%r11), %rax + movq 272(%r10), %rcx + movq %rax, 264(%r10) + sbbq 272(%r11), %rcx + movq 280(%r10), %r8 + movq %rcx, 272(%r10) + sbbq 280(%r11), %r8 + movq 288(%r10), %rax + movq %r8, 280(%r10) + sbbq 288(%r11), %rax + movq 296(%r10), %rcx + movq %rax, 288(%r10) + sbbq 296(%r11), %rcx + movq 304(%r10), %r8 + movq %rcx, 296(%r10) + sbbq 304(%r11), %r8 + movq 312(%r10), %rax + movq %r8, 304(%r10) + sbbq 312(%r11), %rax + movq 320(%r10), %rcx + movq %rax, 312(%r10) + sbbq 320(%r11), %rcx + movq 328(%r10), %r8 + movq %rcx, 320(%r10) + sbbq 328(%r11), %r8 + movq 336(%r10), %rax + movq %r8, 328(%r10) + sbbq 336(%r11), %rax + movq 344(%r10), %rcx + movq %rax, 336(%r10) + sbbq 344(%r11), %rcx + movq 352(%r10), %r8 + movq %rcx, 344(%r10) + sbbq 352(%r11), %r8 + movq 360(%r10), %rax + movq %r8, 352(%r10) + sbbq 360(%r11), %rax + movq 368(%r10), %rcx + movq %rax, 360(%r10) + sbbq 368(%r11), %rcx + movq 376(%r10), %r8 + movq %rcx, 368(%r10) + sbbq 376(%r11), %r8 + movq 384(%r10), %rax + movq %r8, 376(%r10) + sbbq 384(%r11), %rax + movq 392(%r10), %rcx + movq %rax, 384(%r10) + sbbq 392(%r11), %rcx + movq 400(%r10), %r8 + movq %rcx, 392(%r10) + sbbq 400(%r11), %r8 + movq 408(%r10), %rax + movq %r8, 400(%r10) + sbbq 408(%r11), %rax + movq 416(%r10), %rcx + movq %rax, 408(%r10) + sbbq 416(%r11), %rcx + movq 424(%r10), %r8 + movq %rcx, 416(%r10) + sbbq 424(%r11), %r8 + movq 432(%r10), %rax + movq %r8, 424(%r10) + sbbq 432(%r11), %rax + movq 440(%r10), %rcx + movq %rax, 432(%r10) + sbbq 440(%r11), %rcx + movq 448(%r10), %r8 + movq %rcx, 440(%r10) + sbbq 448(%r11), %r8 + movq 456(%r10), %rax + movq %r8, 448(%r10) + sbbq 456(%r11), %rax + movq 464(%r10), %rcx + movq %rax, 456(%r10) + sbbq 464(%r11), %rcx + movq 472(%r10), %r8 + movq %rcx, 464(%r10) + sbbq 472(%r11), %r8 + movq 480(%r10), %rax + movq %r8, 472(%r10) + sbbq 480(%r11), %rax + movq 488(%r10), %rcx + movq %rax, 480(%r10) + sbbq 488(%r11), %rcx + movq 496(%r10), %r8 + movq %rcx, 488(%r10) + sbbq 496(%r11), %r8 + movq 504(%r10), %rax + movq %r8, 496(%r10) + sbbq 504(%r11), %rax + movq %rax, 504(%r10) + sbbq $0, %r9 + movq (%r10), %rax + subq (%rdi), %rax + movq 8(%r10), %rcx + movq %rax, (%r10) + sbbq 8(%rdi), %rcx + movq 16(%r10), %r8 + movq %rcx, 8(%r10) + sbbq 16(%rdi), %r8 + movq 24(%r10), %rax + movq %r8, 16(%r10) + sbbq 24(%rdi), %rax + movq 32(%r10), %rcx + movq %rax, 24(%r10) + sbbq 32(%rdi), %rcx + movq 40(%r10), %r8 + movq %rcx, 32(%r10) + sbbq 40(%rdi), %r8 + movq 48(%r10), %rax + movq %r8, 40(%r10) + sbbq 48(%rdi), %rax + movq 56(%r10), %rcx + movq %rax, 48(%r10) + sbbq 56(%rdi), %rcx + movq 64(%r10), %r8 + movq %rcx, 56(%r10) + sbbq 64(%rdi), %r8 + movq 72(%r10), %rax + movq %r8, 64(%r10) + sbbq 72(%rdi), %rax + movq 80(%r10), %rcx + movq %rax, 72(%r10) + sbbq 80(%rdi), %rcx + movq 88(%r10), %r8 + movq %rcx, 80(%r10) + sbbq 88(%rdi), %r8 + movq 96(%r10), %rax + movq %r8, 88(%r10) + sbbq 96(%rdi), %rax + movq 104(%r10), %rcx + movq %rax, 96(%r10) + sbbq 104(%rdi), %rcx + movq 112(%r10), %r8 + movq %rcx, 104(%r10) + sbbq 112(%rdi), %r8 + movq 120(%r10), %rax + movq %r8, 112(%r10) + sbbq 120(%rdi), %rax + movq 128(%r10), %rcx + movq %rax, 120(%r10) + sbbq 128(%rdi), %rcx + movq 136(%r10), %r8 + movq %rcx, 128(%r10) + sbbq 136(%rdi), %r8 + movq 144(%r10), %rax + movq %r8, 136(%r10) + sbbq 144(%rdi), %rax + movq 152(%r10), %rcx + movq %rax, 144(%r10) + sbbq 152(%rdi), %rcx + movq 160(%r10), %r8 + movq %rcx, 152(%r10) + sbbq 160(%rdi), %r8 + movq 168(%r10), %rax + movq %r8, 160(%r10) + sbbq 168(%rdi), %rax + movq 176(%r10), %rcx + movq %rax, 168(%r10) + sbbq 176(%rdi), %rcx + movq 184(%r10), %r8 + movq %rcx, 176(%r10) + sbbq 184(%rdi), %r8 + movq 192(%r10), %rax + movq %r8, 184(%r10) + sbbq 192(%rdi), %rax + movq 200(%r10), %rcx + movq %rax, 192(%r10) + sbbq 200(%rdi), %rcx + movq 208(%r10), %r8 + movq %rcx, 200(%r10) + sbbq 208(%rdi), %r8 + movq 216(%r10), %rax + movq %r8, 208(%r10) + sbbq 216(%rdi), %rax + movq 224(%r10), %rcx + movq %rax, 216(%r10) + sbbq 224(%rdi), %rcx + movq 232(%r10), %r8 + movq %rcx, 224(%r10) + sbbq 232(%rdi), %r8 + movq 240(%r10), %rax + movq %r8, 232(%r10) + sbbq 240(%rdi), %rax + movq 248(%r10), %rcx + movq %rax, 240(%r10) + sbbq 248(%rdi), %rcx + movq 256(%r10), %r8 + movq %rcx, 248(%r10) + sbbq 256(%rdi), %r8 + movq 264(%r10), %rax + movq %r8, 256(%r10) + sbbq 264(%rdi), %rax + movq 272(%r10), %rcx + movq %rax, 264(%r10) + sbbq 272(%rdi), %rcx + movq 280(%r10), %r8 + movq %rcx, 272(%r10) + sbbq 280(%rdi), %r8 + movq 288(%r10), %rax + movq %r8, 280(%r10) + sbbq 288(%rdi), %rax + movq 296(%r10), %rcx + movq %rax, 288(%r10) + sbbq 296(%rdi), %rcx + movq 304(%r10), %r8 + movq %rcx, 296(%r10) + sbbq 304(%rdi), %r8 + movq 312(%r10), %rax + movq %r8, 304(%r10) + sbbq 312(%rdi), %rax + movq 320(%r10), %rcx + movq %rax, 312(%r10) + sbbq 320(%rdi), %rcx + movq 328(%r10), %r8 + movq %rcx, 320(%r10) + sbbq 328(%rdi), %r8 + movq 336(%r10), %rax + movq %r8, 328(%r10) + sbbq 336(%rdi), %rax + movq 344(%r10), %rcx + movq %rax, 336(%r10) + sbbq 344(%rdi), %rcx + movq 352(%r10), %r8 + movq %rcx, 344(%r10) + sbbq 352(%rdi), %r8 + movq 360(%r10), %rax + movq %r8, 352(%r10) + sbbq 360(%rdi), %rax + movq 368(%r10), %rcx + movq %rax, 360(%r10) + sbbq 368(%rdi), %rcx + movq 376(%r10), %r8 + movq %rcx, 368(%r10) + sbbq 376(%rdi), %r8 + movq 384(%r10), %rax + movq %r8, 376(%r10) + sbbq 384(%rdi), %rax + movq 392(%r10), %rcx + movq %rax, 384(%r10) + sbbq 392(%rdi), %rcx + movq 400(%r10), %r8 + movq %rcx, 392(%r10) + sbbq 400(%rdi), %r8 + movq 408(%r10), %rax + movq %r8, 400(%r10) + sbbq 408(%rdi), %rax + movq 416(%r10), %rcx + movq %rax, 408(%r10) + sbbq 416(%rdi), %rcx + movq 424(%r10), %r8 + movq %rcx, 416(%r10) + sbbq 424(%rdi), %r8 + movq 432(%r10), %rax + movq %r8, 424(%r10) + sbbq 432(%rdi), %rax + movq 440(%r10), %rcx + movq %rax, 432(%r10) + sbbq 440(%rdi), %rcx + movq 448(%r10), %r8 + movq %rcx, 440(%r10) + sbbq 448(%rdi), %r8 + movq 456(%r10), %rax + movq %r8, 448(%r10) + sbbq 456(%rdi), %rax + movq 464(%r10), %rcx + movq %rax, 456(%r10) + sbbq 464(%rdi), %rcx + movq 472(%r10), %r8 + movq %rcx, 464(%r10) + sbbq 472(%rdi), %r8 + movq 480(%r10), %rax + movq %r8, 472(%r10) + sbbq 480(%rdi), %rax + movq 488(%r10), %rcx + movq %rax, 480(%r10) + sbbq 488(%rdi), %rcx + movq 496(%r10), %r8 + movq %rcx, 488(%r10) + sbbq 496(%rdi), %r8 + movq 504(%r10), %rax + movq %r8, 496(%r10) + sbbq 504(%rdi), %rax + movq %rax, 504(%r10) + sbbq $0, %r9 + subq $256, %r15 + # Add + movq (%r15), %rax + addq (%r10), %rax + movq 8(%r15), %rcx + movq %rax, (%r15) + adcq 8(%r10), %rcx + movq 16(%r15), %r8 + movq %rcx, 8(%r15) + adcq 16(%r10), %r8 + movq 24(%r15), %rax + movq %r8, 16(%r15) + adcq 24(%r10), %rax + movq 32(%r15), %rcx + movq %rax, 24(%r15) + adcq 32(%r10), %rcx + movq 40(%r15), %r8 + movq %rcx, 32(%r15) + adcq 40(%r10), %r8 + movq 48(%r15), %rax + movq %r8, 40(%r15) + adcq 48(%r10), %rax + movq 56(%r15), %rcx + movq %rax, 48(%r15) + adcq 56(%r10), %rcx + movq 64(%r15), %r8 + movq %rcx, 56(%r15) + adcq 64(%r10), %r8 + movq 72(%r15), %rax + movq %r8, 64(%r15) + adcq 72(%r10), %rax + movq 80(%r15), %rcx + movq %rax, 72(%r15) + adcq 80(%r10), %rcx + movq 88(%r15), %r8 + movq %rcx, 80(%r15) + adcq 88(%r10), %r8 + movq 96(%r15), %rax + movq %r8, 88(%r15) + adcq 96(%r10), %rax + movq 104(%r15), %rcx + movq %rax, 96(%r15) + adcq 104(%r10), %rcx + movq 112(%r15), %r8 + movq %rcx, 104(%r15) + adcq 112(%r10), %r8 + movq 120(%r15), %rax + movq %r8, 112(%r15) + adcq 120(%r10), %rax + movq 128(%r15), %rcx + movq %rax, 120(%r15) + adcq 128(%r10), %rcx + movq 136(%r15), %r8 + movq %rcx, 128(%r15) + adcq 136(%r10), %r8 + movq 144(%r15), %rax + movq %r8, 136(%r15) + adcq 144(%r10), %rax + movq 152(%r15), %rcx + movq %rax, 144(%r15) + adcq 152(%r10), %rcx + movq 160(%r15), %r8 + movq %rcx, 152(%r15) + adcq 160(%r10), %r8 + movq 168(%r15), %rax + movq %r8, 160(%r15) + adcq 168(%r10), %rax + movq 176(%r15), %rcx + movq %rax, 168(%r15) + adcq 176(%r10), %rcx + movq 184(%r15), %r8 + movq %rcx, 176(%r15) + adcq 184(%r10), %r8 + movq 192(%r15), %rax + movq %r8, 184(%r15) + adcq 192(%r10), %rax + movq 200(%r15), %rcx + movq %rax, 192(%r15) + adcq 200(%r10), %rcx + movq 208(%r15), %r8 + movq %rcx, 200(%r15) + adcq 208(%r10), %r8 + movq 216(%r15), %rax + movq %r8, 208(%r15) + adcq 216(%r10), %rax + movq 224(%r15), %rcx + movq %rax, 216(%r15) + adcq 224(%r10), %rcx + movq 232(%r15), %r8 + movq %rcx, 224(%r15) + adcq 232(%r10), %r8 + movq 240(%r15), %rax + movq %r8, 232(%r15) + adcq 240(%r10), %rax + movq 248(%r15), %rcx + movq %rax, 240(%r15) + adcq 248(%r10), %rcx + movq 256(%r15), %r8 + movq %rcx, 248(%r15) + adcq 256(%r10), %r8 + movq 264(%r15), %rax + movq %r8, 256(%r15) + adcq 264(%r10), %rax + movq 272(%r15), %rcx + movq %rax, 264(%r15) + adcq 272(%r10), %rcx + movq 280(%r15), %r8 + movq %rcx, 272(%r15) + adcq 280(%r10), %r8 + movq 288(%r15), %rax + movq %r8, 280(%r15) + adcq 288(%r10), %rax + movq 296(%r15), %rcx + movq %rax, 288(%r15) + adcq 296(%r10), %rcx + movq 304(%r15), %r8 + movq %rcx, 296(%r15) + adcq 304(%r10), %r8 + movq 312(%r15), %rax + movq %r8, 304(%r15) + adcq 312(%r10), %rax + movq 320(%r15), %rcx + movq %rax, 312(%r15) + adcq 320(%r10), %rcx + movq 328(%r15), %r8 + movq %rcx, 320(%r15) + adcq 328(%r10), %r8 + movq 336(%r15), %rax + movq %r8, 328(%r15) + adcq 336(%r10), %rax + movq 344(%r15), %rcx + movq %rax, 336(%r15) + adcq 344(%r10), %rcx + movq 352(%r15), %r8 + movq %rcx, 344(%r15) + adcq 352(%r10), %r8 + movq 360(%r15), %rax + movq %r8, 352(%r15) + adcq 360(%r10), %rax + movq 368(%r15), %rcx + movq %rax, 360(%r15) + adcq 368(%r10), %rcx + movq 376(%r15), %r8 + movq %rcx, 368(%r15) + adcq 376(%r10), %r8 + movq 384(%r15), %rax + movq %r8, 376(%r15) + adcq 384(%r10), %rax + movq 392(%r15), %rcx + movq %rax, 384(%r15) + adcq 392(%r10), %rcx + movq 400(%r15), %r8 + movq %rcx, 392(%r15) + adcq 400(%r10), %r8 + movq 408(%r15), %rax + movq %r8, 400(%r15) + adcq 408(%r10), %rax + movq 416(%r15), %rcx + movq %rax, 408(%r15) + adcq 416(%r10), %rcx + movq 424(%r15), %r8 + movq %rcx, 416(%r15) + adcq 424(%r10), %r8 + movq 432(%r15), %rax + movq %r8, 424(%r15) + adcq 432(%r10), %rax + movq 440(%r15), %rcx + movq %rax, 432(%r15) + adcq 440(%r10), %rcx + movq 448(%r15), %r8 + movq %rcx, 440(%r15) + adcq 448(%r10), %r8 + movq 456(%r15), %rax + movq %r8, 448(%r15) + adcq 456(%r10), %rax + movq 464(%r15), %rcx + movq %rax, 456(%r15) + adcq 464(%r10), %rcx + movq 472(%r15), %r8 + movq %rcx, 464(%r15) + adcq 472(%r10), %r8 + movq 480(%r15), %rax + movq %r8, 472(%r15) + adcq 480(%r10), %rax + movq 488(%r15), %rcx + movq %rax, 480(%r15) + adcq 488(%r10), %rcx + movq 496(%r15), %r8 + movq %rcx, 488(%r15) + adcq 496(%r10), %r8 + movq 504(%r15), %rax + movq %r8, 496(%r15) + adcq 504(%r10), %rax + movq %rax, 504(%r15) + adcq $0, %r9 + movq %r9, 768(%rdi) + addq $256, %r15 + # Add + movq (%r15), %rax + xorq %r9, %r9 + addq (%r11), %rax + movq 8(%r15), %rcx + movq %rax, (%r15) + adcq 8(%r11), %rcx + movq 16(%r15), %r8 + movq %rcx, 8(%r15) + adcq 16(%r11), %r8 + movq 24(%r15), %rax + movq %r8, 16(%r15) + adcq 24(%r11), %rax + movq 32(%r15), %rcx + movq %rax, 24(%r15) + adcq 32(%r11), %rcx + movq 40(%r15), %r8 + movq %rcx, 32(%r15) + adcq 40(%r11), %r8 + movq 48(%r15), %rax + movq %r8, 40(%r15) + adcq 48(%r11), %rax + movq 56(%r15), %rcx + movq %rax, 48(%r15) + adcq 56(%r11), %rcx + movq 64(%r15), %r8 + movq %rcx, 56(%r15) + adcq 64(%r11), %r8 + movq 72(%r15), %rax + movq %r8, 64(%r15) + adcq 72(%r11), %rax + movq 80(%r15), %rcx + movq %rax, 72(%r15) + adcq 80(%r11), %rcx + movq 88(%r15), %r8 + movq %rcx, 80(%r15) + adcq 88(%r11), %r8 + movq 96(%r15), %rax + movq %r8, 88(%r15) + adcq 96(%r11), %rax + movq 104(%r15), %rcx + movq %rax, 96(%r15) + adcq 104(%r11), %rcx + movq 112(%r15), %r8 + movq %rcx, 104(%r15) + adcq 112(%r11), %r8 + movq 120(%r15), %rax + movq %r8, 112(%r15) + adcq 120(%r11), %rax + movq 128(%r15), %rcx + movq %rax, 120(%r15) + adcq 128(%r11), %rcx + movq 136(%r15), %r8 + movq %rcx, 128(%r15) + adcq 136(%r11), %r8 + movq 144(%r15), %rax + movq %r8, 136(%r15) + adcq 144(%r11), %rax + movq 152(%r15), %rcx + movq %rax, 144(%r15) + adcq 152(%r11), %rcx + movq 160(%r15), %r8 + movq %rcx, 152(%r15) + adcq 160(%r11), %r8 + movq 168(%r15), %rax + movq %r8, 160(%r15) + adcq 168(%r11), %rax + movq 176(%r15), %rcx + movq %rax, 168(%r15) + adcq 176(%r11), %rcx + movq 184(%r15), %r8 + movq %rcx, 176(%r15) + adcq 184(%r11), %r8 + movq 192(%r15), %rax + movq %r8, 184(%r15) + adcq 192(%r11), %rax + movq 200(%r15), %rcx + movq %rax, 192(%r15) + adcq 200(%r11), %rcx + movq 208(%r15), %r8 + movq %rcx, 200(%r15) + adcq 208(%r11), %r8 + movq 216(%r15), %rax + movq %r8, 208(%r15) + adcq 216(%r11), %rax + movq 224(%r15), %rcx + movq %rax, 216(%r15) + adcq 224(%r11), %rcx + movq 232(%r15), %r8 + movq %rcx, 224(%r15) + adcq 232(%r11), %r8 + movq 240(%r15), %rax + movq %r8, 232(%r15) + adcq 240(%r11), %rax + movq 248(%r15), %rcx + movq %rax, 240(%r15) + adcq 248(%r11), %rcx + movq 256(%r15), %r8 + movq %rcx, 248(%r15) + adcq 256(%r11), %r8 + movq %r8, 256(%r15) + adcq $0, %r9 + # Add to zero + movq 264(%r11), %rax + adcq $0, %rax + movq 272(%r11), %rcx + movq %rax, 264(%r15) + adcq $0, %rcx + movq 280(%r11), %r8 + movq %rcx, 272(%r15) + adcq $0, %r8 + movq 288(%r11), %rax + movq %r8, 280(%r15) + adcq $0, %rax + movq 296(%r11), %rcx + movq %rax, 288(%r15) + adcq $0, %rcx + movq 304(%r11), %r8 + movq %rcx, 296(%r15) + adcq $0, %r8 + movq 312(%r11), %rax + movq %r8, 304(%r15) + adcq $0, %rax + movq 320(%r11), %rcx + movq %rax, 312(%r15) + adcq $0, %rcx + movq 328(%r11), %r8 + movq %rcx, 320(%r15) + adcq $0, %r8 + movq 336(%r11), %rax + movq %r8, 328(%r15) + adcq $0, %rax + movq 344(%r11), %rcx + movq %rax, 336(%r15) + adcq $0, %rcx + movq 352(%r11), %r8 + movq %rcx, 344(%r15) + adcq $0, %r8 + movq 360(%r11), %rax + movq %r8, 352(%r15) + adcq $0, %rax + movq 368(%r11), %rcx + movq %rax, 360(%r15) + adcq $0, %rcx + movq 376(%r11), %r8 + movq %rcx, 368(%r15) + adcq $0, %r8 + movq 384(%r11), %rax + movq %r8, 376(%r15) + adcq $0, %rax + movq 392(%r11), %rcx + movq %rax, 384(%r15) + adcq $0, %rcx + movq 400(%r11), %r8 + movq %rcx, 392(%r15) + adcq $0, %r8 + movq 408(%r11), %rax + movq %r8, 400(%r15) + adcq $0, %rax + movq 416(%r11), %rcx + movq %rax, 408(%r15) + adcq $0, %rcx + movq 424(%r11), %r8 + movq %rcx, 416(%r15) + adcq $0, %r8 + movq 432(%r11), %rax + movq %r8, 424(%r15) + adcq $0, %rax + movq 440(%r11), %rcx + movq %rax, 432(%r15) + adcq $0, %rcx + movq 448(%r11), %r8 + movq %rcx, 440(%r15) + adcq $0, %r8 + movq 456(%r11), %rax + movq %r8, 448(%r15) + adcq $0, %rax + movq 464(%r11), %rcx + movq %rax, 456(%r15) + adcq $0, %rcx + movq 472(%r11), %r8 + movq %rcx, 464(%r15) + adcq $0, %r8 + movq 480(%r11), %rax + movq %r8, 472(%r15) + adcq $0, %rax + movq 488(%r11), %rcx + movq %rax, 480(%r15) + adcq $0, %rcx + movq 496(%r11), %r8 + movq %rcx, 488(%r15) + adcq $0, %r8 + movq 504(%r11), %rax + movq %r8, 496(%r15) + adcq $0, %rax + movq %rax, 504(%r15) + addq $1576, %rsp + pop %r15 + pop %r14 + pop %r13 + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_4096_mul_64,.-sp_4096_mul_64 +#endif /* __APPLE__ */ +/* Add a to a into r. (r = a + a) + * + * r A single precision integer. + * a A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_2048_dbl_32 +.type sp_2048_dbl_32,@function +.align 16 +sp_2048_dbl_32: +#else +.globl _sp_2048_dbl_32 +.p2align 4 +_sp_2048_dbl_32: +#endif /* __APPLE__ */ + movq (%rsi), %rdx + xorq %rax, %rax + addq %rdx, %rdx + movq 8(%rsi), %rcx + movq %rdx, (%rdi) + adcq %rcx, %rcx + movq 16(%rsi), %rdx + movq %rcx, 8(%rdi) + adcq %rdx, %rdx + movq 24(%rsi), %rcx + movq %rdx, 16(%rdi) + adcq %rcx, %rcx + movq 32(%rsi), %rdx + movq %rcx, 24(%rdi) + adcq %rdx, %rdx + movq 40(%rsi), %rcx + movq %rdx, 32(%rdi) + adcq %rcx, %rcx + movq 48(%rsi), %rdx + movq %rcx, 40(%rdi) + adcq %rdx, %rdx + movq 56(%rsi), %rcx + movq %rdx, 48(%rdi) + adcq %rcx, %rcx + movq 64(%rsi), %rdx + movq %rcx, 56(%rdi) + adcq %rdx, %rdx + movq 72(%rsi), %rcx + movq %rdx, 64(%rdi) + adcq %rcx, %rcx + movq 80(%rsi), %rdx + movq %rcx, 72(%rdi) + adcq %rdx, %rdx + movq 88(%rsi), %rcx + movq %rdx, 80(%rdi) + adcq %rcx, %rcx + movq 96(%rsi), %rdx + movq %rcx, 88(%rdi) + adcq %rdx, %rdx + movq 104(%rsi), %rcx + movq %rdx, 96(%rdi) + adcq %rcx, %rcx + movq 112(%rsi), %rdx + movq %rcx, 104(%rdi) + adcq %rdx, %rdx + movq 120(%rsi), %rcx + movq %rdx, 112(%rdi) + adcq %rcx, %rcx + movq 128(%rsi), %rdx + movq %rcx, 120(%rdi) + adcq %rdx, %rdx + movq 136(%rsi), %rcx + movq %rdx, 128(%rdi) + adcq %rcx, %rcx + movq 144(%rsi), %rdx + movq %rcx, 136(%rdi) + adcq %rdx, %rdx + movq 152(%rsi), %rcx + movq %rdx, 144(%rdi) + adcq %rcx, %rcx + movq 160(%rsi), %rdx + movq %rcx, 152(%rdi) + adcq %rdx, %rdx + movq 168(%rsi), %rcx + movq %rdx, 160(%rdi) + adcq %rcx, %rcx + movq 176(%rsi), %rdx + movq %rcx, 168(%rdi) + adcq %rdx, %rdx + movq 184(%rsi), %rcx + movq %rdx, 176(%rdi) + adcq %rcx, %rcx + movq 192(%rsi), %rdx + movq %rcx, 184(%rdi) + adcq %rdx, %rdx + movq 200(%rsi), %rcx + movq %rdx, 192(%rdi) + adcq %rcx, %rcx + movq 208(%rsi), %rdx + movq %rcx, 200(%rdi) + adcq %rdx, %rdx + movq 216(%rsi), %rcx + movq %rdx, 208(%rdi) + adcq %rcx, %rcx + movq 224(%rsi), %rdx + movq %rcx, 216(%rdi) + adcq %rdx, %rdx + movq 232(%rsi), %rcx + movq %rdx, 224(%rdi) + adcq %rcx, %rcx + movq 240(%rsi), %rdx + movq %rcx, 232(%rdi) + adcq %rdx, %rdx + movq 248(%rsi), %rcx + movq %rdx, 240(%rdi) + adcq %rcx, %rcx + movq %rcx, 248(%rdi) + adcq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_2048_dbl_32,.-sp_2048_dbl_32 +#endif /* __APPLE__ */ +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_4096_sqr_64 +.type sp_4096_sqr_64,@function +.align 16 +sp_4096_sqr_64: +#else +.globl _sp_4096_sqr_64 +.p2align 4 +_sp_4096_sqr_64: +#endif /* __APPLE__ */ + subq $1304, %rsp + movq %rdi, 1280(%rsp) + movq %rsi, 1288(%rsp) + leaq 1024(%rsp), %r8 + leaq 256(%rsi), %r9 + # Add + movq (%rsi), %rdx + xorq %rcx, %rcx + addq (%r9), %rdx + movq 8(%rsi), %rax + movq %rdx, (%r8) + adcq 8(%r9), %rax + movq 16(%rsi), %rdx + movq %rax, 8(%r8) + adcq 16(%r9), %rdx + movq 24(%rsi), %rax + movq %rdx, 16(%r8) + adcq 24(%r9), %rax + movq 32(%rsi), %rdx + movq %rax, 24(%r8) + adcq 32(%r9), %rdx + movq 40(%rsi), %rax + movq %rdx, 32(%r8) + adcq 40(%r9), %rax + movq 48(%rsi), %rdx + movq %rax, 40(%r8) + adcq 48(%r9), %rdx + movq 56(%rsi), %rax + movq %rdx, 48(%r8) + adcq 56(%r9), %rax + movq 64(%rsi), %rdx + movq %rax, 56(%r8) + adcq 64(%r9), %rdx + movq 72(%rsi), %rax + movq %rdx, 64(%r8) + adcq 72(%r9), %rax + movq 80(%rsi), %rdx + movq %rax, 72(%r8) + adcq 80(%r9), %rdx + movq 88(%rsi), %rax + movq %rdx, 80(%r8) + adcq 88(%r9), %rax + movq 96(%rsi), %rdx + movq %rax, 88(%r8) + adcq 96(%r9), %rdx + movq 104(%rsi), %rax + movq %rdx, 96(%r8) + adcq 104(%r9), %rax + movq 112(%rsi), %rdx + movq %rax, 104(%r8) + adcq 112(%r9), %rdx + movq 120(%rsi), %rax + movq %rdx, 112(%r8) + adcq 120(%r9), %rax + movq 128(%rsi), %rdx + movq %rax, 120(%r8) + adcq 128(%r9), %rdx + movq 136(%rsi), %rax + movq %rdx, 128(%r8) + adcq 136(%r9), %rax + movq 144(%rsi), %rdx + movq %rax, 136(%r8) + adcq 144(%r9), %rdx + movq 152(%rsi), %rax + movq %rdx, 144(%r8) + adcq 152(%r9), %rax + movq 160(%rsi), %rdx + movq %rax, 152(%r8) + adcq 160(%r9), %rdx + movq 168(%rsi), %rax + movq %rdx, 160(%r8) + adcq 168(%r9), %rax + movq 176(%rsi), %rdx + movq %rax, 168(%r8) + adcq 176(%r9), %rdx + movq 184(%rsi), %rax + movq %rdx, 176(%r8) + adcq 184(%r9), %rax + movq 192(%rsi), %rdx + movq %rax, 184(%r8) + adcq 192(%r9), %rdx + movq 200(%rsi), %rax + movq %rdx, 192(%r8) + adcq 200(%r9), %rax + movq 208(%rsi), %rdx + movq %rax, 200(%r8) + adcq 208(%r9), %rdx + movq 216(%rsi), %rax + movq %rdx, 208(%r8) + adcq 216(%r9), %rax + movq 224(%rsi), %rdx + movq %rax, 216(%r8) + adcq 224(%r9), %rdx + movq 232(%rsi), %rax + movq %rdx, 224(%r8) + adcq 232(%r9), %rax + movq 240(%rsi), %rdx + movq %rax, 232(%r8) + adcq 240(%r9), %rdx + movq 248(%rsi), %rax + movq %rdx, 240(%r8) + adcq 248(%r9), %rax + movq %rax, 248(%r8) + adcq $0, %rcx + movq %rcx, 1296(%rsp) + movq %r8, %rsi + movq %rsp, %rdi +#ifndef __APPLE__ + callq sp_2048_sqr_32@plt +#else + callq _sp_2048_sqr_32 +#endif /* __APPLE__ */ + movq 1288(%rsp), %rsi + leaq 512(%rsp), %rdi + addq $256, %rsi +#ifndef __APPLE__ + callq sp_2048_sqr_32@plt +#else + callq _sp_2048_sqr_32 +#endif /* __APPLE__ */ + movq 1288(%rsp), %rsi + movq 1280(%rsp), %rdi +#ifndef __APPLE__ + callq sp_2048_sqr_32@plt +#else + callq _sp_2048_sqr_32 +#endif /* __APPLE__ */ + movq 1296(%rsp), %r10 + leaq 1024(%rsp), %r8 + movq %r10, %rcx + negq %r10 + movq (%r8), %rdx + movq 8(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 512(%rdi) + movq %rax, 520(%rdi) + movq 16(%r8), %rdx + movq 24(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 528(%rdi) + movq %rax, 536(%rdi) + movq 32(%r8), %rdx + movq 40(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 544(%rdi) + movq %rax, 552(%rdi) + movq 48(%r8), %rdx + movq 56(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 560(%rdi) + movq %rax, 568(%rdi) + movq 64(%r8), %rdx + movq 72(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 576(%rdi) + movq %rax, 584(%rdi) + movq 80(%r8), %rdx + movq 88(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 592(%rdi) + movq %rax, 600(%rdi) + movq 96(%r8), %rdx + movq 104(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 608(%rdi) + movq %rax, 616(%rdi) + movq 112(%r8), %rdx + movq 120(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 624(%rdi) + movq %rax, 632(%rdi) + movq 128(%r8), %rdx + movq 136(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 640(%rdi) + movq %rax, 648(%rdi) + movq 144(%r8), %rdx + movq 152(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 656(%rdi) + movq %rax, 664(%rdi) + movq 160(%r8), %rdx + movq 168(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 672(%rdi) + movq %rax, 680(%rdi) + movq 176(%r8), %rdx + movq 184(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 688(%rdi) + movq %rax, 696(%rdi) + movq 192(%r8), %rdx + movq 200(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 704(%rdi) + movq %rax, 712(%rdi) + movq 208(%r8), %rdx + movq 216(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 720(%rdi) + movq %rax, 728(%rdi) + movq 224(%r8), %rdx + movq 232(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 736(%rdi) + movq %rax, 744(%rdi) + movq 240(%r8), %rdx + movq 248(%r8), %rax + andq %r10, %rdx + andq %r10, %rax + movq %rdx, 752(%rdi) + movq %rax, 760(%rdi) + movq 512(%rdi), %rdx + addq %rdx, %rdx + movq 520(%rdi), %rax + movq %rdx, 512(%rdi) + adcq %rax, %rax + movq 528(%rdi), %rdx + movq %rax, 520(%rdi) + adcq %rdx, %rdx + movq 536(%rdi), %rax + movq %rdx, 528(%rdi) + adcq %rax, %rax + movq 544(%rdi), %rdx + movq %rax, 536(%rdi) + adcq %rdx, %rdx + movq 552(%rdi), %rax + movq %rdx, 544(%rdi) + adcq %rax, %rax + movq 560(%rdi), %rdx + movq %rax, 552(%rdi) + adcq %rdx, %rdx + movq 568(%rdi), %rax + movq %rdx, 560(%rdi) + adcq %rax, %rax + movq 576(%rdi), %rdx + movq %rax, 568(%rdi) + adcq %rdx, %rdx + movq 584(%rdi), %rax + movq %rdx, 576(%rdi) + adcq %rax, %rax + movq 592(%rdi), %rdx + movq %rax, 584(%rdi) + adcq %rdx, %rdx + movq 600(%rdi), %rax + movq %rdx, 592(%rdi) + adcq %rax, %rax + movq 608(%rdi), %rdx + movq %rax, 600(%rdi) + adcq %rdx, %rdx + movq 616(%rdi), %rax + movq %rdx, 608(%rdi) + adcq %rax, %rax + movq 624(%rdi), %rdx + movq %rax, 616(%rdi) + adcq %rdx, %rdx + movq 632(%rdi), %rax + movq %rdx, 624(%rdi) + adcq %rax, %rax + movq 640(%rdi), %rdx + movq %rax, 632(%rdi) + adcq %rdx, %rdx + movq 648(%rdi), %rax + movq %rdx, 640(%rdi) + adcq %rax, %rax + movq 656(%rdi), %rdx + movq %rax, 648(%rdi) + adcq %rdx, %rdx + movq 664(%rdi), %rax + movq %rdx, 656(%rdi) + adcq %rax, %rax + movq 672(%rdi), %rdx + movq %rax, 664(%rdi) + adcq %rdx, %rdx + movq 680(%rdi), %rax + movq %rdx, 672(%rdi) + adcq %rax, %rax + movq 688(%rdi), %rdx + movq %rax, 680(%rdi) + adcq %rdx, %rdx + movq 696(%rdi), %rax + movq %rdx, 688(%rdi) + adcq %rax, %rax + movq 704(%rdi), %rdx + movq %rax, 696(%rdi) + adcq %rdx, %rdx + movq 712(%rdi), %rax + movq %rdx, 704(%rdi) + adcq %rax, %rax + movq 720(%rdi), %rdx + movq %rax, 712(%rdi) + adcq %rdx, %rdx + movq 728(%rdi), %rax + movq %rdx, 720(%rdi) + adcq %rax, %rax + movq 736(%rdi), %rdx + movq %rax, 728(%rdi) + adcq %rdx, %rdx + movq 744(%rdi), %rax + movq %rdx, 736(%rdi) + adcq %rax, %rax + movq 752(%rdi), %rdx + movq %rax, 744(%rdi) + adcq %rdx, %rdx + movq 760(%rdi), %rax + movq %rdx, 752(%rdi) + adcq %rax, %rax + movq %rax, 760(%rdi) + adcq $0, %rcx + leaq 512(%rsp), %rsi + movq %rsp, %r8 + movq (%r8), %rdx + subq (%rsi), %rdx + movq 8(%r8), %rax + movq %rdx, (%r8) + sbbq 8(%rsi), %rax + movq 16(%r8), %rdx + movq %rax, 8(%r8) + sbbq 16(%rsi), %rdx + movq 24(%r8), %rax + movq %rdx, 16(%r8) + sbbq 24(%rsi), %rax + movq 32(%r8), %rdx + movq %rax, 24(%r8) + sbbq 32(%rsi), %rdx + movq 40(%r8), %rax + movq %rdx, 32(%r8) + sbbq 40(%rsi), %rax + movq 48(%r8), %rdx + movq %rax, 40(%r8) + sbbq 48(%rsi), %rdx + movq 56(%r8), %rax + movq %rdx, 48(%r8) + sbbq 56(%rsi), %rax + movq 64(%r8), %rdx + movq %rax, 56(%r8) + sbbq 64(%rsi), %rdx + movq 72(%r8), %rax + movq %rdx, 64(%r8) + sbbq 72(%rsi), %rax + movq 80(%r8), %rdx + movq %rax, 72(%r8) + sbbq 80(%rsi), %rdx + movq 88(%r8), %rax + movq %rdx, 80(%r8) + sbbq 88(%rsi), %rax + movq 96(%r8), %rdx + movq %rax, 88(%r8) + sbbq 96(%rsi), %rdx + movq 104(%r8), %rax + movq %rdx, 96(%r8) + sbbq 104(%rsi), %rax + movq 112(%r8), %rdx + movq %rax, 104(%r8) + sbbq 112(%rsi), %rdx + movq 120(%r8), %rax + movq %rdx, 112(%r8) + sbbq 120(%rsi), %rax + movq 128(%r8), %rdx + movq %rax, 120(%r8) + sbbq 128(%rsi), %rdx + movq 136(%r8), %rax + movq %rdx, 128(%r8) + sbbq 136(%rsi), %rax + movq 144(%r8), %rdx + movq %rax, 136(%r8) + sbbq 144(%rsi), %rdx + movq 152(%r8), %rax + movq %rdx, 144(%r8) + sbbq 152(%rsi), %rax + movq 160(%r8), %rdx + movq %rax, 152(%r8) + sbbq 160(%rsi), %rdx + movq 168(%r8), %rax + movq %rdx, 160(%r8) + sbbq 168(%rsi), %rax + movq 176(%r8), %rdx + movq %rax, 168(%r8) + sbbq 176(%rsi), %rdx + movq 184(%r8), %rax + movq %rdx, 176(%r8) + sbbq 184(%rsi), %rax + movq 192(%r8), %rdx + movq %rax, 184(%r8) + sbbq 192(%rsi), %rdx + movq 200(%r8), %rax + movq %rdx, 192(%r8) + sbbq 200(%rsi), %rax + movq 208(%r8), %rdx + movq %rax, 200(%r8) + sbbq 208(%rsi), %rdx + movq 216(%r8), %rax + movq %rdx, 208(%r8) + sbbq 216(%rsi), %rax + movq 224(%r8), %rdx + movq %rax, 216(%r8) + sbbq 224(%rsi), %rdx + movq 232(%r8), %rax + movq %rdx, 224(%r8) + sbbq 232(%rsi), %rax + movq 240(%r8), %rdx + movq %rax, 232(%r8) + sbbq 240(%rsi), %rdx + movq 248(%r8), %rax + movq %rdx, 240(%r8) + sbbq 248(%rsi), %rax + movq 256(%r8), %rdx + movq %rax, 248(%r8) + sbbq 256(%rsi), %rdx + movq 264(%r8), %rax + movq %rdx, 256(%r8) + sbbq 264(%rsi), %rax + movq 272(%r8), %rdx + movq %rax, 264(%r8) + sbbq 272(%rsi), %rdx + movq 280(%r8), %rax + movq %rdx, 272(%r8) + sbbq 280(%rsi), %rax + movq 288(%r8), %rdx + movq %rax, 280(%r8) + sbbq 288(%rsi), %rdx + movq 296(%r8), %rax + movq %rdx, 288(%r8) + sbbq 296(%rsi), %rax + movq 304(%r8), %rdx + movq %rax, 296(%r8) + sbbq 304(%rsi), %rdx + movq 312(%r8), %rax + movq %rdx, 304(%r8) + sbbq 312(%rsi), %rax + movq 320(%r8), %rdx + movq %rax, 312(%r8) + sbbq 320(%rsi), %rdx + movq 328(%r8), %rax + movq %rdx, 320(%r8) + sbbq 328(%rsi), %rax + movq 336(%r8), %rdx + movq %rax, 328(%r8) + sbbq 336(%rsi), %rdx + movq 344(%r8), %rax + movq %rdx, 336(%r8) + sbbq 344(%rsi), %rax + movq 352(%r8), %rdx + movq %rax, 344(%r8) + sbbq 352(%rsi), %rdx + movq 360(%r8), %rax + movq %rdx, 352(%r8) + sbbq 360(%rsi), %rax + movq 368(%r8), %rdx + movq %rax, 360(%r8) + sbbq 368(%rsi), %rdx + movq 376(%r8), %rax + movq %rdx, 368(%r8) + sbbq 376(%rsi), %rax + movq 384(%r8), %rdx + movq %rax, 376(%r8) + sbbq 384(%rsi), %rdx + movq 392(%r8), %rax + movq %rdx, 384(%r8) + sbbq 392(%rsi), %rax + movq 400(%r8), %rdx + movq %rax, 392(%r8) + sbbq 400(%rsi), %rdx + movq 408(%r8), %rax + movq %rdx, 400(%r8) + sbbq 408(%rsi), %rax + movq 416(%r8), %rdx + movq %rax, 408(%r8) + sbbq 416(%rsi), %rdx + movq 424(%r8), %rax + movq %rdx, 416(%r8) + sbbq 424(%rsi), %rax + movq 432(%r8), %rdx + movq %rax, 424(%r8) + sbbq 432(%rsi), %rdx + movq 440(%r8), %rax + movq %rdx, 432(%r8) + sbbq 440(%rsi), %rax + movq 448(%r8), %rdx + movq %rax, 440(%r8) + sbbq 448(%rsi), %rdx + movq 456(%r8), %rax + movq %rdx, 448(%r8) + sbbq 456(%rsi), %rax + movq 464(%r8), %rdx + movq %rax, 456(%r8) + sbbq 464(%rsi), %rdx + movq 472(%r8), %rax + movq %rdx, 464(%r8) + sbbq 472(%rsi), %rax + movq 480(%r8), %rdx + movq %rax, 472(%r8) + sbbq 480(%rsi), %rdx + movq 488(%r8), %rax + movq %rdx, 480(%r8) + sbbq 488(%rsi), %rax + movq 496(%r8), %rdx + movq %rax, 488(%r8) + sbbq 496(%rsi), %rdx + movq 504(%r8), %rax + movq %rdx, 496(%r8) + sbbq 504(%rsi), %rax + movq %rax, 504(%r8) + sbbq $0, %rcx + movq (%r8), %rdx + subq (%rdi), %rdx + movq 8(%r8), %rax + movq %rdx, (%r8) + sbbq 8(%rdi), %rax + movq 16(%r8), %rdx + movq %rax, 8(%r8) + sbbq 16(%rdi), %rdx + movq 24(%r8), %rax + movq %rdx, 16(%r8) + sbbq 24(%rdi), %rax + movq 32(%r8), %rdx + movq %rax, 24(%r8) + sbbq 32(%rdi), %rdx + movq 40(%r8), %rax + movq %rdx, 32(%r8) + sbbq 40(%rdi), %rax + movq 48(%r8), %rdx + movq %rax, 40(%r8) + sbbq 48(%rdi), %rdx + movq 56(%r8), %rax + movq %rdx, 48(%r8) + sbbq 56(%rdi), %rax + movq 64(%r8), %rdx + movq %rax, 56(%r8) + sbbq 64(%rdi), %rdx + movq 72(%r8), %rax + movq %rdx, 64(%r8) + sbbq 72(%rdi), %rax + movq 80(%r8), %rdx + movq %rax, 72(%r8) + sbbq 80(%rdi), %rdx + movq 88(%r8), %rax + movq %rdx, 80(%r8) + sbbq 88(%rdi), %rax + movq 96(%r8), %rdx + movq %rax, 88(%r8) + sbbq 96(%rdi), %rdx + movq 104(%r8), %rax + movq %rdx, 96(%r8) + sbbq 104(%rdi), %rax + movq 112(%r8), %rdx + movq %rax, 104(%r8) + sbbq 112(%rdi), %rdx + movq 120(%r8), %rax + movq %rdx, 112(%r8) + sbbq 120(%rdi), %rax + movq 128(%r8), %rdx + movq %rax, 120(%r8) + sbbq 128(%rdi), %rdx + movq 136(%r8), %rax + movq %rdx, 128(%r8) + sbbq 136(%rdi), %rax + movq 144(%r8), %rdx + movq %rax, 136(%r8) + sbbq 144(%rdi), %rdx + movq 152(%r8), %rax + movq %rdx, 144(%r8) + sbbq 152(%rdi), %rax + movq 160(%r8), %rdx + movq %rax, 152(%r8) + sbbq 160(%rdi), %rdx + movq 168(%r8), %rax + movq %rdx, 160(%r8) + sbbq 168(%rdi), %rax + movq 176(%r8), %rdx + movq %rax, 168(%r8) + sbbq 176(%rdi), %rdx + movq 184(%r8), %rax + movq %rdx, 176(%r8) + sbbq 184(%rdi), %rax + movq 192(%r8), %rdx + movq %rax, 184(%r8) + sbbq 192(%rdi), %rdx + movq 200(%r8), %rax + movq %rdx, 192(%r8) + sbbq 200(%rdi), %rax + movq 208(%r8), %rdx + movq %rax, 200(%r8) + sbbq 208(%rdi), %rdx + movq 216(%r8), %rax + movq %rdx, 208(%r8) + sbbq 216(%rdi), %rax + movq 224(%r8), %rdx + movq %rax, 216(%r8) + sbbq 224(%rdi), %rdx + movq 232(%r8), %rax + movq %rdx, 224(%r8) + sbbq 232(%rdi), %rax + movq 240(%r8), %rdx + movq %rax, 232(%r8) + sbbq 240(%rdi), %rdx + movq 248(%r8), %rax + movq %rdx, 240(%r8) + sbbq 248(%rdi), %rax + movq 256(%r8), %rdx + movq %rax, 248(%r8) + sbbq 256(%rdi), %rdx + movq 264(%r8), %rax + movq %rdx, 256(%r8) + sbbq 264(%rdi), %rax + movq 272(%r8), %rdx + movq %rax, 264(%r8) + sbbq 272(%rdi), %rdx + movq 280(%r8), %rax + movq %rdx, 272(%r8) + sbbq 280(%rdi), %rax + movq 288(%r8), %rdx + movq %rax, 280(%r8) + sbbq 288(%rdi), %rdx + movq 296(%r8), %rax + movq %rdx, 288(%r8) + sbbq 296(%rdi), %rax + movq 304(%r8), %rdx + movq %rax, 296(%r8) + sbbq 304(%rdi), %rdx + movq 312(%r8), %rax + movq %rdx, 304(%r8) + sbbq 312(%rdi), %rax + movq 320(%r8), %rdx + movq %rax, 312(%r8) + sbbq 320(%rdi), %rdx + movq 328(%r8), %rax + movq %rdx, 320(%r8) + sbbq 328(%rdi), %rax + movq 336(%r8), %rdx + movq %rax, 328(%r8) + sbbq 336(%rdi), %rdx + movq 344(%r8), %rax + movq %rdx, 336(%r8) + sbbq 344(%rdi), %rax + movq 352(%r8), %rdx + movq %rax, 344(%r8) + sbbq 352(%rdi), %rdx + movq 360(%r8), %rax + movq %rdx, 352(%r8) + sbbq 360(%rdi), %rax + movq 368(%r8), %rdx + movq %rax, 360(%r8) + sbbq 368(%rdi), %rdx + movq 376(%r8), %rax + movq %rdx, 368(%r8) + sbbq 376(%rdi), %rax + movq 384(%r8), %rdx + movq %rax, 376(%r8) + sbbq 384(%rdi), %rdx + movq 392(%r8), %rax + movq %rdx, 384(%r8) + sbbq 392(%rdi), %rax + movq 400(%r8), %rdx + movq %rax, 392(%r8) + sbbq 400(%rdi), %rdx + movq 408(%r8), %rax + movq %rdx, 400(%r8) + sbbq 408(%rdi), %rax + movq 416(%r8), %rdx + movq %rax, 408(%r8) + sbbq 416(%rdi), %rdx + movq 424(%r8), %rax + movq %rdx, 416(%r8) + sbbq 424(%rdi), %rax + movq 432(%r8), %rdx + movq %rax, 424(%r8) + sbbq 432(%rdi), %rdx + movq 440(%r8), %rax + movq %rdx, 432(%r8) + sbbq 440(%rdi), %rax + movq 448(%r8), %rdx + movq %rax, 440(%r8) + sbbq 448(%rdi), %rdx + movq 456(%r8), %rax + movq %rdx, 448(%r8) + sbbq 456(%rdi), %rax + movq 464(%r8), %rdx + movq %rax, 456(%r8) + sbbq 464(%rdi), %rdx + movq 472(%r8), %rax + movq %rdx, 464(%r8) + sbbq 472(%rdi), %rax + movq 480(%r8), %rdx + movq %rax, 472(%r8) + sbbq 480(%rdi), %rdx + movq 488(%r8), %rax + movq %rdx, 480(%r8) + sbbq 488(%rdi), %rax + movq 496(%r8), %rdx + movq %rax, 488(%r8) + sbbq 496(%rdi), %rdx + movq 504(%r8), %rax + movq %rdx, 496(%r8) + sbbq 504(%rdi), %rax + movq %rax, 504(%r8) + sbbq $0, %rcx + # Add in place + movq 256(%rdi), %rdx + addq (%r8), %rdx + movq 264(%rdi), %rax + movq %rdx, 256(%rdi) + adcq 8(%r8), %rax + movq 272(%rdi), %rdx + movq %rax, 264(%rdi) + adcq 16(%r8), %rdx + movq 280(%rdi), %rax + movq %rdx, 272(%rdi) + adcq 24(%r8), %rax + movq 288(%rdi), %rdx + movq %rax, 280(%rdi) + adcq 32(%r8), %rdx + movq 296(%rdi), %rax + movq %rdx, 288(%rdi) + adcq 40(%r8), %rax + movq 304(%rdi), %rdx + movq %rax, 296(%rdi) + adcq 48(%r8), %rdx + movq 312(%rdi), %rax + movq %rdx, 304(%rdi) + adcq 56(%r8), %rax + movq 320(%rdi), %rdx + movq %rax, 312(%rdi) + adcq 64(%r8), %rdx + movq 328(%rdi), %rax + movq %rdx, 320(%rdi) + adcq 72(%r8), %rax + movq 336(%rdi), %rdx + movq %rax, 328(%rdi) + adcq 80(%r8), %rdx + movq 344(%rdi), %rax + movq %rdx, 336(%rdi) + adcq 88(%r8), %rax + movq 352(%rdi), %rdx + movq %rax, 344(%rdi) + adcq 96(%r8), %rdx + movq 360(%rdi), %rax + movq %rdx, 352(%rdi) + adcq 104(%r8), %rax + movq 368(%rdi), %rdx + movq %rax, 360(%rdi) + adcq 112(%r8), %rdx + movq 376(%rdi), %rax + movq %rdx, 368(%rdi) + adcq 120(%r8), %rax + movq 384(%rdi), %rdx + movq %rax, 376(%rdi) + adcq 128(%r8), %rdx + movq 392(%rdi), %rax + movq %rdx, 384(%rdi) + adcq 136(%r8), %rax + movq 400(%rdi), %rdx + movq %rax, 392(%rdi) + adcq 144(%r8), %rdx + movq 408(%rdi), %rax + movq %rdx, 400(%rdi) + adcq 152(%r8), %rax + movq 416(%rdi), %rdx + movq %rax, 408(%rdi) + adcq 160(%r8), %rdx + movq 424(%rdi), %rax + movq %rdx, 416(%rdi) + adcq 168(%r8), %rax + movq 432(%rdi), %rdx + movq %rax, 424(%rdi) + adcq 176(%r8), %rdx + movq 440(%rdi), %rax + movq %rdx, 432(%rdi) + adcq 184(%r8), %rax + movq 448(%rdi), %rdx + movq %rax, 440(%rdi) + adcq 192(%r8), %rdx + movq 456(%rdi), %rax + movq %rdx, 448(%rdi) + adcq 200(%r8), %rax + movq 464(%rdi), %rdx + movq %rax, 456(%rdi) + adcq 208(%r8), %rdx + movq 472(%rdi), %rax + movq %rdx, 464(%rdi) + adcq 216(%r8), %rax + movq 480(%rdi), %rdx + movq %rax, 472(%rdi) + adcq 224(%r8), %rdx + movq 488(%rdi), %rax + movq %rdx, 480(%rdi) + adcq 232(%r8), %rax + movq 496(%rdi), %rdx + movq %rax, 488(%rdi) + adcq 240(%r8), %rdx + movq 504(%rdi), %rax + movq %rdx, 496(%rdi) + adcq 248(%r8), %rax + movq 512(%rdi), %rdx + movq %rax, 504(%rdi) + adcq 256(%r8), %rdx + movq 520(%rdi), %rax + movq %rdx, 512(%rdi) + adcq 264(%r8), %rax + movq 528(%rdi), %rdx + movq %rax, 520(%rdi) + adcq 272(%r8), %rdx + movq 536(%rdi), %rax + movq %rdx, 528(%rdi) + adcq 280(%r8), %rax + movq 544(%rdi), %rdx + movq %rax, 536(%rdi) + adcq 288(%r8), %rdx + movq 552(%rdi), %rax + movq %rdx, 544(%rdi) + adcq 296(%r8), %rax + movq 560(%rdi), %rdx + movq %rax, 552(%rdi) + adcq 304(%r8), %rdx + movq 568(%rdi), %rax + movq %rdx, 560(%rdi) + adcq 312(%r8), %rax + movq 576(%rdi), %rdx + movq %rax, 568(%rdi) + adcq 320(%r8), %rdx + movq 584(%rdi), %rax + movq %rdx, 576(%rdi) + adcq 328(%r8), %rax + movq 592(%rdi), %rdx + movq %rax, 584(%rdi) + adcq 336(%r8), %rdx + movq 600(%rdi), %rax + movq %rdx, 592(%rdi) + adcq 344(%r8), %rax + movq 608(%rdi), %rdx + movq %rax, 600(%rdi) + adcq 352(%r8), %rdx + movq 616(%rdi), %rax + movq %rdx, 608(%rdi) + adcq 360(%r8), %rax + movq 624(%rdi), %rdx + movq %rax, 616(%rdi) + adcq 368(%r8), %rdx + movq 632(%rdi), %rax + movq %rdx, 624(%rdi) + adcq 376(%r8), %rax + movq 640(%rdi), %rdx + movq %rax, 632(%rdi) + adcq 384(%r8), %rdx + movq 648(%rdi), %rax + movq %rdx, 640(%rdi) + adcq 392(%r8), %rax + movq 656(%rdi), %rdx + movq %rax, 648(%rdi) + adcq 400(%r8), %rdx + movq 664(%rdi), %rax + movq %rdx, 656(%rdi) + adcq 408(%r8), %rax + movq 672(%rdi), %rdx + movq %rax, 664(%rdi) + adcq 416(%r8), %rdx + movq 680(%rdi), %rax + movq %rdx, 672(%rdi) + adcq 424(%r8), %rax + movq 688(%rdi), %rdx + movq %rax, 680(%rdi) + adcq 432(%r8), %rdx + movq 696(%rdi), %rax + movq %rdx, 688(%rdi) + adcq 440(%r8), %rax + movq 704(%rdi), %rdx + movq %rax, 696(%rdi) + adcq 448(%r8), %rdx + movq 712(%rdi), %rax + movq %rdx, 704(%rdi) + adcq 456(%r8), %rax + movq 720(%rdi), %rdx + movq %rax, 712(%rdi) + adcq 464(%r8), %rdx + movq 728(%rdi), %rax + movq %rdx, 720(%rdi) + adcq 472(%r8), %rax + movq 736(%rdi), %rdx + movq %rax, 728(%rdi) + adcq 480(%r8), %rdx + movq 744(%rdi), %rax + movq %rdx, 736(%rdi) + adcq 488(%r8), %rax + movq 752(%rdi), %rdx + movq %rax, 744(%rdi) + adcq 496(%r8), %rdx + movq 760(%rdi), %rax + movq %rdx, 752(%rdi) + adcq 504(%r8), %rax + movq %rax, 760(%rdi) + adcq $0, %rcx + movq %rcx, 768(%rdi) + # Add in place + movq 512(%rdi), %rdx + xorq %rcx, %rcx + addq (%rsi), %rdx + movq 520(%rdi), %rax + movq %rdx, 512(%rdi) + adcq 8(%rsi), %rax + movq 528(%rdi), %rdx + movq %rax, 520(%rdi) + adcq 16(%rsi), %rdx + movq 536(%rdi), %rax + movq %rdx, 528(%rdi) + adcq 24(%rsi), %rax + movq 544(%rdi), %rdx + movq %rax, 536(%rdi) + adcq 32(%rsi), %rdx + movq 552(%rdi), %rax + movq %rdx, 544(%rdi) + adcq 40(%rsi), %rax + movq 560(%rdi), %rdx + movq %rax, 552(%rdi) + adcq 48(%rsi), %rdx + movq 568(%rdi), %rax + movq %rdx, 560(%rdi) + adcq 56(%rsi), %rax + movq 576(%rdi), %rdx + movq %rax, 568(%rdi) + adcq 64(%rsi), %rdx + movq 584(%rdi), %rax + movq %rdx, 576(%rdi) + adcq 72(%rsi), %rax + movq 592(%rdi), %rdx + movq %rax, 584(%rdi) + adcq 80(%rsi), %rdx + movq 600(%rdi), %rax + movq %rdx, 592(%rdi) + adcq 88(%rsi), %rax + movq 608(%rdi), %rdx + movq %rax, 600(%rdi) + adcq 96(%rsi), %rdx + movq 616(%rdi), %rax + movq %rdx, 608(%rdi) + adcq 104(%rsi), %rax + movq 624(%rdi), %rdx + movq %rax, 616(%rdi) + adcq 112(%rsi), %rdx + movq 632(%rdi), %rax + movq %rdx, 624(%rdi) + adcq 120(%rsi), %rax + movq 640(%rdi), %rdx + movq %rax, 632(%rdi) + adcq 128(%rsi), %rdx + movq 648(%rdi), %rax + movq %rdx, 640(%rdi) + adcq 136(%rsi), %rax + movq 656(%rdi), %rdx + movq %rax, 648(%rdi) + adcq 144(%rsi), %rdx + movq 664(%rdi), %rax + movq %rdx, 656(%rdi) + adcq 152(%rsi), %rax + movq 672(%rdi), %rdx + movq %rax, 664(%rdi) + adcq 160(%rsi), %rdx + movq 680(%rdi), %rax + movq %rdx, 672(%rdi) + adcq 168(%rsi), %rax + movq 688(%rdi), %rdx + movq %rax, 680(%rdi) + adcq 176(%rsi), %rdx + movq 696(%rdi), %rax + movq %rdx, 688(%rdi) + adcq 184(%rsi), %rax + movq 704(%rdi), %rdx + movq %rax, 696(%rdi) + adcq 192(%rsi), %rdx + movq 712(%rdi), %rax + movq %rdx, 704(%rdi) + adcq 200(%rsi), %rax + movq 720(%rdi), %rdx + movq %rax, 712(%rdi) + adcq 208(%rsi), %rdx + movq 728(%rdi), %rax + movq %rdx, 720(%rdi) + adcq 216(%rsi), %rax + movq 736(%rdi), %rdx + movq %rax, 728(%rdi) + adcq 224(%rsi), %rdx + movq 744(%rdi), %rax + movq %rdx, 736(%rdi) + adcq 232(%rsi), %rax + movq 752(%rdi), %rdx + movq %rax, 744(%rdi) + adcq 240(%rsi), %rdx + movq 760(%rdi), %rax + movq %rdx, 752(%rdi) + adcq 248(%rsi), %rax + movq 768(%rdi), %rdx + movq %rax, 760(%rdi) + adcq 256(%rsi), %rdx + movq %rdx, 768(%rdi) + adcq $0, %rcx + # Add to zero + movq 264(%rsi), %rdx + adcq $0, %rdx + movq 272(%rsi), %rax + movq %rdx, 776(%rdi) + adcq $0, %rax + movq 280(%rsi), %rdx + movq %rax, 784(%rdi) + adcq $0, %rdx + movq 288(%rsi), %rax + movq %rdx, 792(%rdi) + adcq $0, %rax + movq 296(%rsi), %rdx + movq %rax, 800(%rdi) + adcq $0, %rdx + movq 304(%rsi), %rax + movq %rdx, 808(%rdi) + adcq $0, %rax + movq 312(%rsi), %rdx + movq %rax, 816(%rdi) + adcq $0, %rdx + movq 320(%rsi), %rax + movq %rdx, 824(%rdi) + adcq $0, %rax + movq 328(%rsi), %rdx + movq %rax, 832(%rdi) + adcq $0, %rdx + movq 336(%rsi), %rax + movq %rdx, 840(%rdi) + adcq $0, %rax + movq 344(%rsi), %rdx + movq %rax, 848(%rdi) + adcq $0, %rdx + movq 352(%rsi), %rax + movq %rdx, 856(%rdi) + adcq $0, %rax + movq 360(%rsi), %rdx + movq %rax, 864(%rdi) + adcq $0, %rdx + movq 368(%rsi), %rax + movq %rdx, 872(%rdi) + adcq $0, %rax + movq 376(%rsi), %rdx + movq %rax, 880(%rdi) + adcq $0, %rdx + movq 384(%rsi), %rax + movq %rdx, 888(%rdi) + adcq $0, %rax + movq 392(%rsi), %rdx + movq %rax, 896(%rdi) + adcq $0, %rdx + movq 400(%rsi), %rax + movq %rdx, 904(%rdi) + adcq $0, %rax + movq 408(%rsi), %rdx + movq %rax, 912(%rdi) + adcq $0, %rdx + movq 416(%rsi), %rax + movq %rdx, 920(%rdi) + adcq $0, %rax + movq 424(%rsi), %rdx + movq %rax, 928(%rdi) + adcq $0, %rdx + movq 432(%rsi), %rax + movq %rdx, 936(%rdi) + adcq $0, %rax + movq 440(%rsi), %rdx + movq %rax, 944(%rdi) + adcq $0, %rdx + movq 448(%rsi), %rax + movq %rdx, 952(%rdi) + adcq $0, %rax + movq 456(%rsi), %rdx + movq %rax, 960(%rdi) + adcq $0, %rdx + movq 464(%rsi), %rax + movq %rdx, 968(%rdi) + adcq $0, %rax + movq 472(%rsi), %rdx + movq %rax, 976(%rdi) + adcq $0, %rdx + movq 480(%rsi), %rax + movq %rdx, 984(%rdi) + adcq $0, %rax + movq 488(%rsi), %rdx + movq %rax, 992(%rdi) + adcq $0, %rdx + movq 496(%rsi), %rax + movq %rdx, 1000(%rdi) + adcq $0, %rax + movq 504(%rsi), %rdx + movq %rax, 1008(%rdi) + adcq $0, %rdx + movq %rdx, 1016(%rdi) + addq $1304, %rsp + repz retq +#ifndef __APPLE__ +.size sp_4096_sqr_64,.-sp_4096_sqr_64 +#endif /* __APPLE__ */ +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_4096_mul_avx2_64 +.type sp_4096_mul_avx2_64,@function +.align 16 +sp_4096_mul_avx2_64: +#else +.globl _sp_4096_mul_avx2_64 +.p2align 4 +_sp_4096_mul_avx2_64: +#endif /* __APPLE__ */ + push %r12 + push %r13 + push %r14 + push %r15 + subq $1576, %rsp + movq %rdi, 1536(%rsp) + movq %rsi, 1544(%rsp) + movq %rdx, 1552(%rsp) + leaq 1024(%rsp), %r10 + leaq 256(%rsi), %r12 + # Add + movq (%rsi), %rax + xorq %r13, %r13 + addq (%r12), %rax + movq 8(%rsi), %rcx + movq %rax, (%r10) + adcq 8(%r12), %rcx + movq 16(%rsi), %r8 + movq %rcx, 8(%r10) + adcq 16(%r12), %r8 + movq 24(%rsi), %rax + movq %r8, 16(%r10) + adcq 24(%r12), %rax + movq 32(%rsi), %rcx + movq %rax, 24(%r10) + adcq 32(%r12), %rcx + movq 40(%rsi), %r8 + movq %rcx, 32(%r10) + adcq 40(%r12), %r8 + movq 48(%rsi), %rax + movq %r8, 40(%r10) + adcq 48(%r12), %rax + movq 56(%rsi), %rcx + movq %rax, 48(%r10) + adcq 56(%r12), %rcx + movq 64(%rsi), %r8 + movq %rcx, 56(%r10) + adcq 64(%r12), %r8 + movq 72(%rsi), %rax + movq %r8, 64(%r10) + adcq 72(%r12), %rax + movq 80(%rsi), %rcx + movq %rax, 72(%r10) + adcq 80(%r12), %rcx + movq 88(%rsi), %r8 + movq %rcx, 80(%r10) + adcq 88(%r12), %r8 + movq 96(%rsi), %rax + movq %r8, 88(%r10) + adcq 96(%r12), %rax + movq 104(%rsi), %rcx + movq %rax, 96(%r10) + adcq 104(%r12), %rcx + movq 112(%rsi), %r8 + movq %rcx, 104(%r10) + adcq 112(%r12), %r8 + movq 120(%rsi), %rax + movq %r8, 112(%r10) + adcq 120(%r12), %rax + movq 128(%rsi), %rcx + movq %rax, 120(%r10) + adcq 128(%r12), %rcx + movq 136(%rsi), %r8 + movq %rcx, 128(%r10) + adcq 136(%r12), %r8 + movq 144(%rsi), %rax + movq %r8, 136(%r10) + adcq 144(%r12), %rax + movq 152(%rsi), %rcx + movq %rax, 144(%r10) + adcq 152(%r12), %rcx + movq 160(%rsi), %r8 + movq %rcx, 152(%r10) + adcq 160(%r12), %r8 + movq 168(%rsi), %rax + movq %r8, 160(%r10) + adcq 168(%r12), %rax + movq 176(%rsi), %rcx + movq %rax, 168(%r10) + adcq 176(%r12), %rcx + movq 184(%rsi), %r8 + movq %rcx, 176(%r10) + adcq 184(%r12), %r8 + movq 192(%rsi), %rax + movq %r8, 184(%r10) + adcq 192(%r12), %rax + movq 200(%rsi), %rcx + movq %rax, 192(%r10) + adcq 200(%r12), %rcx + movq 208(%rsi), %r8 + movq %rcx, 200(%r10) + adcq 208(%r12), %r8 + movq 216(%rsi), %rax + movq %r8, 208(%r10) + adcq 216(%r12), %rax + movq 224(%rsi), %rcx + movq %rax, 216(%r10) + adcq 224(%r12), %rcx + movq 232(%rsi), %r8 + movq %rcx, 224(%r10) + adcq 232(%r12), %r8 + movq 240(%rsi), %rax + movq %r8, 232(%r10) + adcq 240(%r12), %rax + movq 248(%rsi), %rcx + movq %rax, 240(%r10) + adcq 248(%r12), %rcx + movq %rcx, 248(%r10) + adcq $0, %r13 + movq %r13, 1560(%rsp) + leaq 1280(%rsp), %r11 + leaq 256(%rdx), %r12 + # Add + movq (%rdx), %rax + xorq %r14, %r14 + addq (%r12), %rax + movq 8(%rdx), %rcx + movq %rax, (%r11) + adcq 8(%r12), %rcx + movq 16(%rdx), %r8 + movq %rcx, 8(%r11) + adcq 16(%r12), %r8 + movq 24(%rdx), %rax + movq %r8, 16(%r11) + adcq 24(%r12), %rax + movq 32(%rdx), %rcx + movq %rax, 24(%r11) + adcq 32(%r12), %rcx + movq 40(%rdx), %r8 + movq %rcx, 32(%r11) + adcq 40(%r12), %r8 + movq 48(%rdx), %rax + movq %r8, 40(%r11) + adcq 48(%r12), %rax + movq 56(%rdx), %rcx + movq %rax, 48(%r11) + adcq 56(%r12), %rcx + movq 64(%rdx), %r8 + movq %rcx, 56(%r11) + adcq 64(%r12), %r8 + movq 72(%rdx), %rax + movq %r8, 64(%r11) + adcq 72(%r12), %rax + movq 80(%rdx), %rcx + movq %rax, 72(%r11) + adcq 80(%r12), %rcx + movq 88(%rdx), %r8 + movq %rcx, 80(%r11) + adcq 88(%r12), %r8 + movq 96(%rdx), %rax + movq %r8, 88(%r11) + adcq 96(%r12), %rax + movq 104(%rdx), %rcx + movq %rax, 96(%r11) + adcq 104(%r12), %rcx + movq 112(%rdx), %r8 + movq %rcx, 104(%r11) + adcq 112(%r12), %r8 + movq 120(%rdx), %rax + movq %r8, 112(%r11) + adcq 120(%r12), %rax + movq 128(%rdx), %rcx + movq %rax, 120(%r11) + adcq 128(%r12), %rcx + movq 136(%rdx), %r8 + movq %rcx, 128(%r11) + adcq 136(%r12), %r8 + movq 144(%rdx), %rax + movq %r8, 136(%r11) + adcq 144(%r12), %rax + movq 152(%rdx), %rcx + movq %rax, 144(%r11) + adcq 152(%r12), %rcx + movq 160(%rdx), %r8 + movq %rcx, 152(%r11) + adcq 160(%r12), %r8 + movq 168(%rdx), %rax + movq %r8, 160(%r11) + adcq 168(%r12), %rax + movq 176(%rdx), %rcx + movq %rax, 168(%r11) + adcq 176(%r12), %rcx + movq 184(%rdx), %r8 + movq %rcx, 176(%r11) + adcq 184(%r12), %r8 + movq 192(%rdx), %rax + movq %r8, 184(%r11) + adcq 192(%r12), %rax + movq 200(%rdx), %rcx + movq %rax, 192(%r11) + adcq 200(%r12), %rcx + movq 208(%rdx), %r8 + movq %rcx, 200(%r11) + adcq 208(%r12), %r8 + movq 216(%rdx), %rax + movq %r8, 208(%r11) + adcq 216(%r12), %rax + movq 224(%rdx), %rcx + movq %rax, 216(%r11) + adcq 224(%r12), %rcx + movq 232(%rdx), %r8 + movq %rcx, 224(%r11) + adcq 232(%r12), %r8 + movq 240(%rdx), %rax + movq %r8, 232(%r11) + adcq 240(%r12), %rax + movq 248(%rdx), %rcx + movq %rax, 240(%r11) + adcq 248(%r12), %rcx + movq %rcx, 248(%r11) + adcq $0, %r14 + movq %r14, 1568(%rsp) + movq %r11, %rdx + movq %r10, %rsi + movq %rsp, %rdi +#ifndef __APPLE__ + callq sp_2048_mul_avx2_32@plt +#else + callq _sp_2048_mul_avx2_32 +#endif /* __APPLE__ */ + movq 1552(%rsp), %rdx + movq 1544(%rsp), %rsi + leaq 512(%rsp), %rdi + addq $256, %rdx + addq $256, %rsi +#ifndef __APPLE__ + callq sp_2048_mul_avx2_32@plt +#else + callq _sp_2048_mul_avx2_32 +#endif /* __APPLE__ */ + movq 1552(%rsp), %rdx + movq 1544(%rsp), %rsi + movq 1536(%rsp), %rdi +#ifndef __APPLE__ + callq sp_2048_mul_avx2_32@plt +#else + callq _sp_2048_mul_avx2_32 +#endif /* __APPLE__ */ + movq 1560(%rsp), %r13 + movq 1568(%rsp), %r14 + movq 1536(%rsp), %r15 + movq %r13, %r9 + leaq 1024(%rsp), %r10 + leaq 1280(%rsp), %r11 + andq %r14, %r9 + negq %r13 + negq %r14 + addq $512, %r15 + movq (%r10), %rax + movq (%r11), %rcx + pextq %r14, %rax, %rax + pextq %r13, %rcx, %rcx + addq %rcx, %rax + movq 8(%r10), %rcx + movq 8(%r11), %r8 + pextq %r14, %rcx, %rcx + pextq %r13, %r8, %r8 + movq %rax, (%r15) + adcq %r8, %rcx + movq 16(%r10), %r8 + movq 16(%r11), %rax + pextq %r14, %r8, %r8 + pextq %r13, %rax, %rax + movq %rcx, 8(%r15) + adcq %rax, %r8 + movq 24(%r10), %rax + movq 24(%r11), %rcx + pextq %r14, %rax, %rax + pextq %r13, %rcx, %rcx + movq %r8, 16(%r15) + adcq %rcx, %rax + movq 32(%r10), %rcx + movq 32(%r11), %r8 + pextq %r14, %rcx, %rcx + pextq %r13, %r8, %r8 + movq %rax, 24(%r15) + adcq %r8, %rcx + movq 40(%r10), %r8 + movq 40(%r11), %rax + pextq %r14, %r8, %r8 + pextq %r13, %rax, %rax + movq %rcx, 32(%r15) + adcq %rax, %r8 + movq 48(%r10), %rax + movq 48(%r11), %rcx + pextq %r14, %rax, %rax + pextq %r13, %rcx, %rcx + movq %r8, 40(%r15) + adcq %rcx, %rax + movq 56(%r10), %rcx + movq 56(%r11), %r8 + pextq %r14, %rcx, %rcx + pextq %r13, %r8, %r8 + movq %rax, 48(%r15) + adcq %r8, %rcx + movq 64(%r10), %r8 + movq 64(%r11), %rax + pextq %r14, %r8, %r8 + pextq %r13, %rax, %rax + movq %rcx, 56(%r15) + adcq %rax, %r8 + movq 72(%r10), %rax + movq 72(%r11), %rcx + pextq %r14, %rax, %rax + pextq %r13, %rcx, %rcx + movq %r8, 64(%r15) + adcq %rcx, %rax + movq 80(%r10), %rcx + movq 80(%r11), %r8 + pextq %r14, %rcx, %rcx + pextq %r13, %r8, %r8 + movq %rax, 72(%r15) + adcq %r8, %rcx + movq 88(%r10), %r8 + movq 88(%r11), %rax + pextq %r14, %r8, %r8 + pextq %r13, %rax, %rax + movq %rcx, 80(%r15) + adcq %rax, %r8 + movq 96(%r10), %rax + movq 96(%r11), %rcx + pextq %r14, %rax, %rax + pextq %r13, %rcx, %rcx + movq %r8, 88(%r15) + adcq %rcx, %rax + movq 104(%r10), %rcx + movq 104(%r11), %r8 + pextq %r14, %rcx, %rcx + pextq %r13, %r8, %r8 + movq %rax, 96(%r15) + adcq %r8, %rcx + movq 112(%r10), %r8 + movq 112(%r11), %rax + pextq %r14, %r8, %r8 + pextq %r13, %rax, %rax + movq %rcx, 104(%r15) + adcq %rax, %r8 + movq 120(%r10), %rax + movq 120(%r11), %rcx + pextq %r14, %rax, %rax + pextq %r13, %rcx, %rcx + movq %r8, 112(%r15) + adcq %rcx, %rax + movq 128(%r10), %rcx + movq 128(%r11), %r8 + pextq %r14, %rcx, %rcx + pextq %r13, %r8, %r8 + movq %rax, 120(%r15) + adcq %r8, %rcx + movq 136(%r10), %r8 + movq 136(%r11), %rax + pextq %r14, %r8, %r8 + pextq %r13, %rax, %rax + movq %rcx, 128(%r15) + adcq %rax, %r8 + movq 144(%r10), %rax + movq 144(%r11), %rcx + pextq %r14, %rax, %rax + pextq %r13, %rcx, %rcx + movq %r8, 136(%r15) + adcq %rcx, %rax + movq 152(%r10), %rcx + movq 152(%r11), %r8 + pextq %r14, %rcx, %rcx + pextq %r13, %r8, %r8 + movq %rax, 144(%r15) + adcq %r8, %rcx + movq 160(%r10), %r8 + movq 160(%r11), %rax + pextq %r14, %r8, %r8 + pextq %r13, %rax, %rax + movq %rcx, 152(%r15) + adcq %rax, %r8 + movq 168(%r10), %rax + movq 168(%r11), %rcx + pextq %r14, %rax, %rax + pextq %r13, %rcx, %rcx + movq %r8, 160(%r15) + adcq %rcx, %rax + movq 176(%r10), %rcx + movq 176(%r11), %r8 + pextq %r14, %rcx, %rcx + pextq %r13, %r8, %r8 + movq %rax, 168(%r15) + adcq %r8, %rcx + movq 184(%r10), %r8 + movq 184(%r11), %rax + pextq %r14, %r8, %r8 + pextq %r13, %rax, %rax + movq %rcx, 176(%r15) + adcq %rax, %r8 + movq 192(%r10), %rax + movq 192(%r11), %rcx + pextq %r14, %rax, %rax + pextq %r13, %rcx, %rcx + movq %r8, 184(%r15) + adcq %rcx, %rax + movq 200(%r10), %rcx + movq 200(%r11), %r8 + pextq %r14, %rcx, %rcx + pextq %r13, %r8, %r8 + movq %rax, 192(%r15) + adcq %r8, %rcx + movq 208(%r10), %r8 + movq 208(%r11), %rax + pextq %r14, %r8, %r8 + pextq %r13, %rax, %rax + movq %rcx, 200(%r15) + adcq %rax, %r8 + movq 216(%r10), %rax + movq 216(%r11), %rcx + pextq %r14, %rax, %rax + pextq %r13, %rcx, %rcx + movq %r8, 208(%r15) + adcq %rcx, %rax + movq 224(%r10), %rcx + movq 224(%r11), %r8 + pextq %r14, %rcx, %rcx + pextq %r13, %r8, %r8 + movq %rax, 216(%r15) + adcq %r8, %rcx + movq 232(%r10), %r8 + movq 232(%r11), %rax + pextq %r14, %r8, %r8 + pextq %r13, %rax, %rax + movq %rcx, 224(%r15) + adcq %rax, %r8 + movq 240(%r10), %rax + movq 240(%r11), %rcx + pextq %r14, %rax, %rax + pextq %r13, %rcx, %rcx + movq %r8, 232(%r15) + adcq %rcx, %rax + movq 248(%r10), %rcx + movq 248(%r11), %r8 + pextq %r14, %rcx, %rcx + pextq %r13, %r8, %r8 + movq %rax, 240(%r15) + adcq %r8, %rcx + movq %rcx, 248(%r15) + adcq $0, %r9 + leaq 512(%rsp), %r11 + movq %rsp, %r10 + movq (%r10), %rax + subq (%r11), %rax + movq 8(%r10), %rcx + movq %rax, (%r10) + sbbq 8(%r11), %rcx + movq 16(%r10), %r8 + movq %rcx, 8(%r10) + sbbq 16(%r11), %r8 + movq 24(%r10), %rax + movq %r8, 16(%r10) + sbbq 24(%r11), %rax + movq 32(%r10), %rcx + movq %rax, 24(%r10) + sbbq 32(%r11), %rcx + movq 40(%r10), %r8 + movq %rcx, 32(%r10) + sbbq 40(%r11), %r8 + movq 48(%r10), %rax + movq %r8, 40(%r10) + sbbq 48(%r11), %rax + movq 56(%r10), %rcx + movq %rax, 48(%r10) + sbbq 56(%r11), %rcx + movq 64(%r10), %r8 + movq %rcx, 56(%r10) + sbbq 64(%r11), %r8 + movq 72(%r10), %rax + movq %r8, 64(%r10) + sbbq 72(%r11), %rax + movq 80(%r10), %rcx + movq %rax, 72(%r10) + sbbq 80(%r11), %rcx + movq 88(%r10), %r8 + movq %rcx, 80(%r10) + sbbq 88(%r11), %r8 + movq 96(%r10), %rax + movq %r8, 88(%r10) + sbbq 96(%r11), %rax + movq 104(%r10), %rcx + movq %rax, 96(%r10) + sbbq 104(%r11), %rcx + movq 112(%r10), %r8 + movq %rcx, 104(%r10) + sbbq 112(%r11), %r8 + movq 120(%r10), %rax + movq %r8, 112(%r10) + sbbq 120(%r11), %rax + movq 128(%r10), %rcx + movq %rax, 120(%r10) + sbbq 128(%r11), %rcx + movq 136(%r10), %r8 + movq %rcx, 128(%r10) + sbbq 136(%r11), %r8 + movq 144(%r10), %rax + movq %r8, 136(%r10) + sbbq 144(%r11), %rax + movq 152(%r10), %rcx + movq %rax, 144(%r10) + sbbq 152(%r11), %rcx + movq 160(%r10), %r8 + movq %rcx, 152(%r10) + sbbq 160(%r11), %r8 + movq 168(%r10), %rax + movq %r8, 160(%r10) + sbbq 168(%r11), %rax + movq 176(%r10), %rcx + movq %rax, 168(%r10) + sbbq 176(%r11), %rcx + movq 184(%r10), %r8 + movq %rcx, 176(%r10) + sbbq 184(%r11), %r8 + movq 192(%r10), %rax + movq %r8, 184(%r10) + sbbq 192(%r11), %rax + movq 200(%r10), %rcx + movq %rax, 192(%r10) + sbbq 200(%r11), %rcx + movq 208(%r10), %r8 + movq %rcx, 200(%r10) + sbbq 208(%r11), %r8 + movq 216(%r10), %rax + movq %r8, 208(%r10) + sbbq 216(%r11), %rax + movq 224(%r10), %rcx + movq %rax, 216(%r10) + sbbq 224(%r11), %rcx + movq 232(%r10), %r8 + movq %rcx, 224(%r10) + sbbq 232(%r11), %r8 + movq 240(%r10), %rax + movq %r8, 232(%r10) + sbbq 240(%r11), %rax + movq 248(%r10), %rcx + movq %rax, 240(%r10) + sbbq 248(%r11), %rcx + movq 256(%r10), %r8 + movq %rcx, 248(%r10) + sbbq 256(%r11), %r8 + movq 264(%r10), %rax + movq %r8, 256(%r10) + sbbq 264(%r11), %rax + movq 272(%r10), %rcx + movq %rax, 264(%r10) + sbbq 272(%r11), %rcx + movq 280(%r10), %r8 + movq %rcx, 272(%r10) + sbbq 280(%r11), %r8 + movq 288(%r10), %rax + movq %r8, 280(%r10) + sbbq 288(%r11), %rax + movq 296(%r10), %rcx + movq %rax, 288(%r10) + sbbq 296(%r11), %rcx + movq 304(%r10), %r8 + movq %rcx, 296(%r10) + sbbq 304(%r11), %r8 + movq 312(%r10), %rax + movq %r8, 304(%r10) + sbbq 312(%r11), %rax + movq 320(%r10), %rcx + movq %rax, 312(%r10) + sbbq 320(%r11), %rcx + movq 328(%r10), %r8 + movq %rcx, 320(%r10) + sbbq 328(%r11), %r8 + movq 336(%r10), %rax + movq %r8, 328(%r10) + sbbq 336(%r11), %rax + movq 344(%r10), %rcx + movq %rax, 336(%r10) + sbbq 344(%r11), %rcx + movq 352(%r10), %r8 + movq %rcx, 344(%r10) + sbbq 352(%r11), %r8 + movq 360(%r10), %rax + movq %r8, 352(%r10) + sbbq 360(%r11), %rax + movq 368(%r10), %rcx + movq %rax, 360(%r10) + sbbq 368(%r11), %rcx + movq 376(%r10), %r8 + movq %rcx, 368(%r10) + sbbq 376(%r11), %r8 + movq 384(%r10), %rax + movq %r8, 376(%r10) + sbbq 384(%r11), %rax + movq 392(%r10), %rcx + movq %rax, 384(%r10) + sbbq 392(%r11), %rcx + movq 400(%r10), %r8 + movq %rcx, 392(%r10) + sbbq 400(%r11), %r8 + movq 408(%r10), %rax + movq %r8, 400(%r10) + sbbq 408(%r11), %rax + movq 416(%r10), %rcx + movq %rax, 408(%r10) + sbbq 416(%r11), %rcx + movq 424(%r10), %r8 + movq %rcx, 416(%r10) + sbbq 424(%r11), %r8 + movq 432(%r10), %rax + movq %r8, 424(%r10) + sbbq 432(%r11), %rax + movq 440(%r10), %rcx + movq %rax, 432(%r10) + sbbq 440(%r11), %rcx + movq 448(%r10), %r8 + movq %rcx, 440(%r10) + sbbq 448(%r11), %r8 + movq 456(%r10), %rax + movq %r8, 448(%r10) + sbbq 456(%r11), %rax + movq 464(%r10), %rcx + movq %rax, 456(%r10) + sbbq 464(%r11), %rcx + movq 472(%r10), %r8 + movq %rcx, 464(%r10) + sbbq 472(%r11), %r8 + movq 480(%r10), %rax + movq %r8, 472(%r10) + sbbq 480(%r11), %rax + movq 488(%r10), %rcx + movq %rax, 480(%r10) + sbbq 488(%r11), %rcx + movq 496(%r10), %r8 + movq %rcx, 488(%r10) + sbbq 496(%r11), %r8 + movq 504(%r10), %rax + movq %r8, 496(%r10) + sbbq 504(%r11), %rax + movq %rax, 504(%r10) + sbbq $0, %r9 + movq (%r10), %rax + subq (%rdi), %rax + movq 8(%r10), %rcx + movq %rax, (%r10) + sbbq 8(%rdi), %rcx + movq 16(%r10), %r8 + movq %rcx, 8(%r10) + sbbq 16(%rdi), %r8 + movq 24(%r10), %rax + movq %r8, 16(%r10) + sbbq 24(%rdi), %rax + movq 32(%r10), %rcx + movq %rax, 24(%r10) + sbbq 32(%rdi), %rcx + movq 40(%r10), %r8 + movq %rcx, 32(%r10) + sbbq 40(%rdi), %r8 + movq 48(%r10), %rax + movq %r8, 40(%r10) + sbbq 48(%rdi), %rax + movq 56(%r10), %rcx + movq %rax, 48(%r10) + sbbq 56(%rdi), %rcx + movq 64(%r10), %r8 + movq %rcx, 56(%r10) + sbbq 64(%rdi), %r8 + movq 72(%r10), %rax + movq %r8, 64(%r10) + sbbq 72(%rdi), %rax + movq 80(%r10), %rcx + movq %rax, 72(%r10) + sbbq 80(%rdi), %rcx + movq 88(%r10), %r8 + movq %rcx, 80(%r10) + sbbq 88(%rdi), %r8 + movq 96(%r10), %rax + movq %r8, 88(%r10) + sbbq 96(%rdi), %rax + movq 104(%r10), %rcx + movq %rax, 96(%r10) + sbbq 104(%rdi), %rcx + movq 112(%r10), %r8 + movq %rcx, 104(%r10) + sbbq 112(%rdi), %r8 + movq 120(%r10), %rax + movq %r8, 112(%r10) + sbbq 120(%rdi), %rax + movq 128(%r10), %rcx + movq %rax, 120(%r10) + sbbq 128(%rdi), %rcx + movq 136(%r10), %r8 + movq %rcx, 128(%r10) + sbbq 136(%rdi), %r8 + movq 144(%r10), %rax + movq %r8, 136(%r10) + sbbq 144(%rdi), %rax + movq 152(%r10), %rcx + movq %rax, 144(%r10) + sbbq 152(%rdi), %rcx + movq 160(%r10), %r8 + movq %rcx, 152(%r10) + sbbq 160(%rdi), %r8 + movq 168(%r10), %rax + movq %r8, 160(%r10) + sbbq 168(%rdi), %rax + movq 176(%r10), %rcx + movq %rax, 168(%r10) + sbbq 176(%rdi), %rcx + movq 184(%r10), %r8 + movq %rcx, 176(%r10) + sbbq 184(%rdi), %r8 + movq 192(%r10), %rax + movq %r8, 184(%r10) + sbbq 192(%rdi), %rax + movq 200(%r10), %rcx + movq %rax, 192(%r10) + sbbq 200(%rdi), %rcx + movq 208(%r10), %r8 + movq %rcx, 200(%r10) + sbbq 208(%rdi), %r8 + movq 216(%r10), %rax + movq %r8, 208(%r10) + sbbq 216(%rdi), %rax + movq 224(%r10), %rcx + movq %rax, 216(%r10) + sbbq 224(%rdi), %rcx + movq 232(%r10), %r8 + movq %rcx, 224(%r10) + sbbq 232(%rdi), %r8 + movq 240(%r10), %rax + movq %r8, 232(%r10) + sbbq 240(%rdi), %rax + movq 248(%r10), %rcx + movq %rax, 240(%r10) + sbbq 248(%rdi), %rcx + movq 256(%r10), %r8 + movq %rcx, 248(%r10) + sbbq 256(%rdi), %r8 + movq 264(%r10), %rax + movq %r8, 256(%r10) + sbbq 264(%rdi), %rax + movq 272(%r10), %rcx + movq %rax, 264(%r10) + sbbq 272(%rdi), %rcx + movq 280(%r10), %r8 + movq %rcx, 272(%r10) + sbbq 280(%rdi), %r8 + movq 288(%r10), %rax + movq %r8, 280(%r10) + sbbq 288(%rdi), %rax + movq 296(%r10), %rcx + movq %rax, 288(%r10) + sbbq 296(%rdi), %rcx + movq 304(%r10), %r8 + movq %rcx, 296(%r10) + sbbq 304(%rdi), %r8 + movq 312(%r10), %rax + movq %r8, 304(%r10) + sbbq 312(%rdi), %rax + movq 320(%r10), %rcx + movq %rax, 312(%r10) + sbbq 320(%rdi), %rcx + movq 328(%r10), %r8 + movq %rcx, 320(%r10) + sbbq 328(%rdi), %r8 + movq 336(%r10), %rax + movq %r8, 328(%r10) + sbbq 336(%rdi), %rax + movq 344(%r10), %rcx + movq %rax, 336(%r10) + sbbq 344(%rdi), %rcx + movq 352(%r10), %r8 + movq %rcx, 344(%r10) + sbbq 352(%rdi), %r8 + movq 360(%r10), %rax + movq %r8, 352(%r10) + sbbq 360(%rdi), %rax + movq 368(%r10), %rcx + movq %rax, 360(%r10) + sbbq 368(%rdi), %rcx + movq 376(%r10), %r8 + movq %rcx, 368(%r10) + sbbq 376(%rdi), %r8 + movq 384(%r10), %rax + movq %r8, 376(%r10) + sbbq 384(%rdi), %rax + movq 392(%r10), %rcx + movq %rax, 384(%r10) + sbbq 392(%rdi), %rcx + movq 400(%r10), %r8 + movq %rcx, 392(%r10) + sbbq 400(%rdi), %r8 + movq 408(%r10), %rax + movq %r8, 400(%r10) + sbbq 408(%rdi), %rax + movq 416(%r10), %rcx + movq %rax, 408(%r10) + sbbq 416(%rdi), %rcx + movq 424(%r10), %r8 + movq %rcx, 416(%r10) + sbbq 424(%rdi), %r8 + movq 432(%r10), %rax + movq %r8, 424(%r10) + sbbq 432(%rdi), %rax + movq 440(%r10), %rcx + movq %rax, 432(%r10) + sbbq 440(%rdi), %rcx + movq 448(%r10), %r8 + movq %rcx, 440(%r10) + sbbq 448(%rdi), %r8 + movq 456(%r10), %rax + movq %r8, 448(%r10) + sbbq 456(%rdi), %rax + movq 464(%r10), %rcx + movq %rax, 456(%r10) + sbbq 464(%rdi), %rcx + movq 472(%r10), %r8 + movq %rcx, 464(%r10) + sbbq 472(%rdi), %r8 + movq 480(%r10), %rax + movq %r8, 472(%r10) + sbbq 480(%rdi), %rax + movq 488(%r10), %rcx + movq %rax, 480(%r10) + sbbq 488(%rdi), %rcx + movq 496(%r10), %r8 + movq %rcx, 488(%r10) + sbbq 496(%rdi), %r8 + movq 504(%r10), %rax + movq %r8, 496(%r10) + sbbq 504(%rdi), %rax + movq %rax, 504(%r10) + sbbq $0, %r9 + subq $256, %r15 + # Add + movq (%r15), %rax + addq (%r10), %rax + movq 8(%r15), %rcx + movq %rax, (%r15) + adcq 8(%r10), %rcx + movq 16(%r15), %r8 + movq %rcx, 8(%r15) + adcq 16(%r10), %r8 + movq 24(%r15), %rax + movq %r8, 16(%r15) + adcq 24(%r10), %rax + movq 32(%r15), %rcx + movq %rax, 24(%r15) + adcq 32(%r10), %rcx + movq 40(%r15), %r8 + movq %rcx, 32(%r15) + adcq 40(%r10), %r8 + movq 48(%r15), %rax + movq %r8, 40(%r15) + adcq 48(%r10), %rax + movq 56(%r15), %rcx + movq %rax, 48(%r15) + adcq 56(%r10), %rcx + movq 64(%r15), %r8 + movq %rcx, 56(%r15) + adcq 64(%r10), %r8 + movq 72(%r15), %rax + movq %r8, 64(%r15) + adcq 72(%r10), %rax + movq 80(%r15), %rcx + movq %rax, 72(%r15) + adcq 80(%r10), %rcx + movq 88(%r15), %r8 + movq %rcx, 80(%r15) + adcq 88(%r10), %r8 + movq 96(%r15), %rax + movq %r8, 88(%r15) + adcq 96(%r10), %rax + movq 104(%r15), %rcx + movq %rax, 96(%r15) + adcq 104(%r10), %rcx + movq 112(%r15), %r8 + movq %rcx, 104(%r15) + adcq 112(%r10), %r8 + movq 120(%r15), %rax + movq %r8, 112(%r15) + adcq 120(%r10), %rax + movq 128(%r15), %rcx + movq %rax, 120(%r15) + adcq 128(%r10), %rcx + movq 136(%r15), %r8 + movq %rcx, 128(%r15) + adcq 136(%r10), %r8 + movq 144(%r15), %rax + movq %r8, 136(%r15) + adcq 144(%r10), %rax + movq 152(%r15), %rcx + movq %rax, 144(%r15) + adcq 152(%r10), %rcx + movq 160(%r15), %r8 + movq %rcx, 152(%r15) + adcq 160(%r10), %r8 + movq 168(%r15), %rax + movq %r8, 160(%r15) + adcq 168(%r10), %rax + movq 176(%r15), %rcx + movq %rax, 168(%r15) + adcq 176(%r10), %rcx + movq 184(%r15), %r8 + movq %rcx, 176(%r15) + adcq 184(%r10), %r8 + movq 192(%r15), %rax + movq %r8, 184(%r15) + adcq 192(%r10), %rax + movq 200(%r15), %rcx + movq %rax, 192(%r15) + adcq 200(%r10), %rcx + movq 208(%r15), %r8 + movq %rcx, 200(%r15) + adcq 208(%r10), %r8 + movq 216(%r15), %rax + movq %r8, 208(%r15) + adcq 216(%r10), %rax + movq 224(%r15), %rcx + movq %rax, 216(%r15) + adcq 224(%r10), %rcx + movq 232(%r15), %r8 + movq %rcx, 224(%r15) + adcq 232(%r10), %r8 + movq 240(%r15), %rax + movq %r8, 232(%r15) + adcq 240(%r10), %rax + movq 248(%r15), %rcx + movq %rax, 240(%r15) + adcq 248(%r10), %rcx + movq 256(%r15), %r8 + movq %rcx, 248(%r15) + adcq 256(%r10), %r8 + movq 264(%r15), %rax + movq %r8, 256(%r15) + adcq 264(%r10), %rax + movq 272(%r15), %rcx + movq %rax, 264(%r15) + adcq 272(%r10), %rcx + movq 280(%r15), %r8 + movq %rcx, 272(%r15) + adcq 280(%r10), %r8 + movq 288(%r15), %rax + movq %r8, 280(%r15) + adcq 288(%r10), %rax + movq 296(%r15), %rcx + movq %rax, 288(%r15) + adcq 296(%r10), %rcx + movq 304(%r15), %r8 + movq %rcx, 296(%r15) + adcq 304(%r10), %r8 + movq 312(%r15), %rax + movq %r8, 304(%r15) + adcq 312(%r10), %rax + movq 320(%r15), %rcx + movq %rax, 312(%r15) + adcq 320(%r10), %rcx + movq 328(%r15), %r8 + movq %rcx, 320(%r15) + adcq 328(%r10), %r8 + movq 336(%r15), %rax + movq %r8, 328(%r15) + adcq 336(%r10), %rax + movq 344(%r15), %rcx + movq %rax, 336(%r15) + adcq 344(%r10), %rcx + movq 352(%r15), %r8 + movq %rcx, 344(%r15) + adcq 352(%r10), %r8 + movq 360(%r15), %rax + movq %r8, 352(%r15) + adcq 360(%r10), %rax + movq 368(%r15), %rcx + movq %rax, 360(%r15) + adcq 368(%r10), %rcx + movq 376(%r15), %r8 + movq %rcx, 368(%r15) + adcq 376(%r10), %r8 + movq 384(%r15), %rax + movq %r8, 376(%r15) + adcq 384(%r10), %rax + movq 392(%r15), %rcx + movq %rax, 384(%r15) + adcq 392(%r10), %rcx + movq 400(%r15), %r8 + movq %rcx, 392(%r15) + adcq 400(%r10), %r8 + movq 408(%r15), %rax + movq %r8, 400(%r15) + adcq 408(%r10), %rax + movq 416(%r15), %rcx + movq %rax, 408(%r15) + adcq 416(%r10), %rcx + movq 424(%r15), %r8 + movq %rcx, 416(%r15) + adcq 424(%r10), %r8 + movq 432(%r15), %rax + movq %r8, 424(%r15) + adcq 432(%r10), %rax + movq 440(%r15), %rcx + movq %rax, 432(%r15) + adcq 440(%r10), %rcx + movq 448(%r15), %r8 + movq %rcx, 440(%r15) + adcq 448(%r10), %r8 + movq 456(%r15), %rax + movq %r8, 448(%r15) + adcq 456(%r10), %rax + movq 464(%r15), %rcx + movq %rax, 456(%r15) + adcq 464(%r10), %rcx + movq 472(%r15), %r8 + movq %rcx, 464(%r15) + adcq 472(%r10), %r8 + movq 480(%r15), %rax + movq %r8, 472(%r15) + adcq 480(%r10), %rax + movq 488(%r15), %rcx + movq %rax, 480(%r15) + adcq 488(%r10), %rcx + movq 496(%r15), %r8 + movq %rcx, 488(%r15) + adcq 496(%r10), %r8 + movq 504(%r15), %rax + movq %r8, 496(%r15) + adcq 504(%r10), %rax + movq %rax, 504(%r15) + adcq $0, %r9 + movq %r9, 768(%rdi) + addq $256, %r15 + # Add + movq (%r15), %rax + xorq %r9, %r9 + addq (%r11), %rax + movq 8(%r15), %rcx + movq %rax, (%r15) + adcq 8(%r11), %rcx + movq 16(%r15), %r8 + movq %rcx, 8(%r15) + adcq 16(%r11), %r8 + movq 24(%r15), %rax + movq %r8, 16(%r15) + adcq 24(%r11), %rax + movq 32(%r15), %rcx + movq %rax, 24(%r15) + adcq 32(%r11), %rcx + movq 40(%r15), %r8 + movq %rcx, 32(%r15) + adcq 40(%r11), %r8 + movq 48(%r15), %rax + movq %r8, 40(%r15) + adcq 48(%r11), %rax + movq 56(%r15), %rcx + movq %rax, 48(%r15) + adcq 56(%r11), %rcx + movq 64(%r15), %r8 + movq %rcx, 56(%r15) + adcq 64(%r11), %r8 + movq 72(%r15), %rax + movq %r8, 64(%r15) + adcq 72(%r11), %rax + movq 80(%r15), %rcx + movq %rax, 72(%r15) + adcq 80(%r11), %rcx + movq 88(%r15), %r8 + movq %rcx, 80(%r15) + adcq 88(%r11), %r8 + movq 96(%r15), %rax + movq %r8, 88(%r15) + adcq 96(%r11), %rax + movq 104(%r15), %rcx + movq %rax, 96(%r15) + adcq 104(%r11), %rcx + movq 112(%r15), %r8 + movq %rcx, 104(%r15) + adcq 112(%r11), %r8 + movq 120(%r15), %rax + movq %r8, 112(%r15) + adcq 120(%r11), %rax + movq 128(%r15), %rcx + movq %rax, 120(%r15) + adcq 128(%r11), %rcx + movq 136(%r15), %r8 + movq %rcx, 128(%r15) + adcq 136(%r11), %r8 + movq 144(%r15), %rax + movq %r8, 136(%r15) + adcq 144(%r11), %rax + movq 152(%r15), %rcx + movq %rax, 144(%r15) + adcq 152(%r11), %rcx + movq 160(%r15), %r8 + movq %rcx, 152(%r15) + adcq 160(%r11), %r8 + movq 168(%r15), %rax + movq %r8, 160(%r15) + adcq 168(%r11), %rax + movq 176(%r15), %rcx + movq %rax, 168(%r15) + adcq 176(%r11), %rcx + movq 184(%r15), %r8 + movq %rcx, 176(%r15) + adcq 184(%r11), %r8 + movq 192(%r15), %rax + movq %r8, 184(%r15) + adcq 192(%r11), %rax + movq 200(%r15), %rcx + movq %rax, 192(%r15) + adcq 200(%r11), %rcx + movq 208(%r15), %r8 + movq %rcx, 200(%r15) + adcq 208(%r11), %r8 + movq 216(%r15), %rax + movq %r8, 208(%r15) + adcq 216(%r11), %rax + movq 224(%r15), %rcx + movq %rax, 216(%r15) + adcq 224(%r11), %rcx + movq 232(%r15), %r8 + movq %rcx, 224(%r15) + adcq 232(%r11), %r8 + movq 240(%r15), %rax + movq %r8, 232(%r15) + adcq 240(%r11), %rax + movq 248(%r15), %rcx + movq %rax, 240(%r15) + adcq 248(%r11), %rcx + movq 256(%r15), %r8 + movq %rcx, 248(%r15) + adcq 256(%r11), %r8 + movq %r8, 256(%r15) + adcq $0, %r9 + # Add to zero + movq 264(%r11), %rax + adcq $0, %rax + movq 272(%r11), %rcx + movq %rax, 264(%r15) + adcq $0, %rcx + movq 280(%r11), %r8 + movq %rcx, 272(%r15) + adcq $0, %r8 + movq 288(%r11), %rax + movq %r8, 280(%r15) + adcq $0, %rax + movq 296(%r11), %rcx + movq %rax, 288(%r15) + adcq $0, %rcx + movq 304(%r11), %r8 + movq %rcx, 296(%r15) + adcq $0, %r8 + movq 312(%r11), %rax + movq %r8, 304(%r15) + adcq $0, %rax + movq 320(%r11), %rcx + movq %rax, 312(%r15) + adcq $0, %rcx + movq 328(%r11), %r8 + movq %rcx, 320(%r15) + adcq $0, %r8 + movq 336(%r11), %rax + movq %r8, 328(%r15) + adcq $0, %rax + movq 344(%r11), %rcx + movq %rax, 336(%r15) + adcq $0, %rcx + movq 352(%r11), %r8 + movq %rcx, 344(%r15) + adcq $0, %r8 + movq 360(%r11), %rax + movq %r8, 352(%r15) + adcq $0, %rax + movq 368(%r11), %rcx + movq %rax, 360(%r15) + adcq $0, %rcx + movq 376(%r11), %r8 + movq %rcx, 368(%r15) + adcq $0, %r8 + movq 384(%r11), %rax + movq %r8, 376(%r15) + adcq $0, %rax + movq 392(%r11), %rcx + movq %rax, 384(%r15) + adcq $0, %rcx + movq 400(%r11), %r8 + movq %rcx, 392(%r15) + adcq $0, %r8 + movq 408(%r11), %rax + movq %r8, 400(%r15) + adcq $0, %rax + movq 416(%r11), %rcx + movq %rax, 408(%r15) + adcq $0, %rcx + movq 424(%r11), %r8 + movq %rcx, 416(%r15) + adcq $0, %r8 + movq 432(%r11), %rax + movq %r8, 424(%r15) + adcq $0, %rax + movq 440(%r11), %rcx + movq %rax, 432(%r15) + adcq $0, %rcx + movq 448(%r11), %r8 + movq %rcx, 440(%r15) + adcq $0, %r8 + movq 456(%r11), %rax + movq %r8, 448(%r15) + adcq $0, %rax + movq 464(%r11), %rcx + movq %rax, 456(%r15) + adcq $0, %rcx + movq 472(%r11), %r8 + movq %rcx, 464(%r15) + adcq $0, %r8 + movq 480(%r11), %rax + movq %r8, 472(%r15) + adcq $0, %rax + movq 488(%r11), %rcx + movq %rax, 480(%r15) + adcq $0, %rcx + movq 496(%r11), %r8 + movq %rcx, 488(%r15) + adcq $0, %r8 + movq 504(%r11), %rax + movq %r8, 496(%r15) + adcq $0, %rax + movq %rax, 504(%r15) + addq $1576, %rsp + pop %r15 + pop %r14 + pop %r13 + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_4096_mul_avx2_64,.-sp_4096_mul_avx2_64 +#endif /* __APPLE__ */ +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_4096_sqr_avx2_64 +.type sp_4096_sqr_avx2_64,@function +.align 16 +sp_4096_sqr_avx2_64: +#else +.globl _sp_4096_sqr_avx2_64 +.p2align 4 +_sp_4096_sqr_avx2_64: +#endif /* __APPLE__ */ + subq $1304, %rsp + movq %rdi, 1280(%rsp) + movq %rsi, 1288(%rsp) + leaq 1024(%rsp), %r8 + leaq 256(%rsi), %r9 + # Add + movq (%rsi), %rdx + xorq %rcx, %rcx + addq (%r9), %rdx + movq 8(%rsi), %rax + movq %rdx, (%r8) + adcq 8(%r9), %rax + movq 16(%rsi), %rdx + movq %rax, 8(%r8) + adcq 16(%r9), %rdx + movq 24(%rsi), %rax + movq %rdx, 16(%r8) + adcq 24(%r9), %rax + movq 32(%rsi), %rdx + movq %rax, 24(%r8) + adcq 32(%r9), %rdx + movq 40(%rsi), %rax + movq %rdx, 32(%r8) + adcq 40(%r9), %rax + movq 48(%rsi), %rdx + movq %rax, 40(%r8) + adcq 48(%r9), %rdx + movq 56(%rsi), %rax + movq %rdx, 48(%r8) + adcq 56(%r9), %rax + movq 64(%rsi), %rdx + movq %rax, 56(%r8) + adcq 64(%r9), %rdx + movq 72(%rsi), %rax + movq %rdx, 64(%r8) + adcq 72(%r9), %rax + movq 80(%rsi), %rdx + movq %rax, 72(%r8) + adcq 80(%r9), %rdx + movq 88(%rsi), %rax + movq %rdx, 80(%r8) + adcq 88(%r9), %rax + movq 96(%rsi), %rdx + movq %rax, 88(%r8) + adcq 96(%r9), %rdx + movq 104(%rsi), %rax + movq %rdx, 96(%r8) + adcq 104(%r9), %rax + movq 112(%rsi), %rdx + movq %rax, 104(%r8) + adcq 112(%r9), %rdx + movq 120(%rsi), %rax + movq %rdx, 112(%r8) + adcq 120(%r9), %rax + movq 128(%rsi), %rdx + movq %rax, 120(%r8) + adcq 128(%r9), %rdx + movq 136(%rsi), %rax + movq %rdx, 128(%r8) + adcq 136(%r9), %rax + movq 144(%rsi), %rdx + movq %rax, 136(%r8) + adcq 144(%r9), %rdx + movq 152(%rsi), %rax + movq %rdx, 144(%r8) + adcq 152(%r9), %rax + movq 160(%rsi), %rdx + movq %rax, 152(%r8) + adcq 160(%r9), %rdx + movq 168(%rsi), %rax + movq %rdx, 160(%r8) + adcq 168(%r9), %rax + movq 176(%rsi), %rdx + movq %rax, 168(%r8) + adcq 176(%r9), %rdx + movq 184(%rsi), %rax + movq %rdx, 176(%r8) + adcq 184(%r9), %rax + movq 192(%rsi), %rdx + movq %rax, 184(%r8) + adcq 192(%r9), %rdx + movq 200(%rsi), %rax + movq %rdx, 192(%r8) + adcq 200(%r9), %rax + movq 208(%rsi), %rdx + movq %rax, 200(%r8) + adcq 208(%r9), %rdx + movq 216(%rsi), %rax + movq %rdx, 208(%r8) + adcq 216(%r9), %rax + movq 224(%rsi), %rdx + movq %rax, 216(%r8) + adcq 224(%r9), %rdx + movq 232(%rsi), %rax + movq %rdx, 224(%r8) + adcq 232(%r9), %rax + movq 240(%rsi), %rdx + movq %rax, 232(%r8) + adcq 240(%r9), %rdx + movq 248(%rsi), %rax + movq %rdx, 240(%r8) + adcq 248(%r9), %rax + movq %rax, 248(%r8) + adcq $0, %rcx + movq %rcx, 1296(%rsp) + movq %r8, %rsi + movq %rsp, %rdi +#ifndef __APPLE__ + callq sp_2048_sqr_avx2_32@plt +#else + callq _sp_2048_sqr_avx2_32 +#endif /* __APPLE__ */ + movq 1288(%rsp), %rsi + leaq 512(%rsp), %rdi + addq $256, %rsi +#ifndef __APPLE__ + callq sp_2048_sqr_avx2_32@plt +#else + callq _sp_2048_sqr_avx2_32 +#endif /* __APPLE__ */ + movq 1288(%rsp), %rsi + movq 1280(%rsp), %rdi +#ifndef __APPLE__ + callq sp_2048_sqr_avx2_32@plt +#else + callq _sp_2048_sqr_avx2_32 +#endif /* __APPLE__ */ + movq 1296(%rsp), %r10 + leaq 1024(%rsp), %r8 + movq %r10, %rcx + negq %r10 + movq (%r8), %rdx + pextq %r10, %rdx, %rdx + addq %rdx, %rdx + movq 8(%r8), %rax + movq %rdx, 512(%rdi) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 16(%r8), %rdx + movq %rax, 520(%rdi) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 24(%r8), %rax + movq %rdx, 528(%rdi) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 32(%r8), %rdx + movq %rax, 536(%rdi) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 40(%r8), %rax + movq %rdx, 544(%rdi) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 48(%r8), %rdx + movq %rax, 552(%rdi) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 56(%r8), %rax + movq %rdx, 560(%rdi) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 64(%r8), %rdx + movq %rax, 568(%rdi) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 72(%r8), %rax + movq %rdx, 576(%rdi) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 80(%r8), %rdx + movq %rax, 584(%rdi) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 88(%r8), %rax + movq %rdx, 592(%rdi) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 96(%r8), %rdx + movq %rax, 600(%rdi) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 104(%r8), %rax + movq %rdx, 608(%rdi) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 112(%r8), %rdx + movq %rax, 616(%rdi) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 120(%r8), %rax + movq %rdx, 624(%rdi) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 128(%r8), %rdx + movq %rax, 632(%rdi) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 136(%r8), %rax + movq %rdx, 640(%rdi) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 144(%r8), %rdx + movq %rax, 648(%rdi) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 152(%r8), %rax + movq %rdx, 656(%rdi) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 160(%r8), %rdx + movq %rax, 664(%rdi) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 168(%r8), %rax + movq %rdx, 672(%rdi) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 176(%r8), %rdx + movq %rax, 680(%rdi) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 184(%r8), %rax + movq %rdx, 688(%rdi) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 192(%r8), %rdx + movq %rax, 696(%rdi) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 200(%r8), %rax + movq %rdx, 704(%rdi) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 208(%r8), %rdx + movq %rax, 712(%rdi) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 216(%r8), %rax + movq %rdx, 720(%rdi) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 224(%r8), %rdx + movq %rax, 728(%rdi) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 232(%r8), %rax + movq %rdx, 736(%rdi) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq 240(%r8), %rdx + movq %rax, 744(%rdi) + pextq %r10, %rdx, %rdx + adcq %rdx, %rdx + movq 248(%r8), %rax + movq %rdx, 752(%rdi) + pextq %r10, %rax, %rax + adcq %rax, %rax + movq %rax, 760(%rdi) + adcq $0, %rcx + leaq 512(%rsp), %rsi + movq %rsp, %r8 + movq (%r8), %rdx + subq (%rsi), %rdx + movq 8(%r8), %rax + movq %rdx, (%r8) + sbbq 8(%rsi), %rax + movq 16(%r8), %rdx + movq %rax, 8(%r8) + sbbq 16(%rsi), %rdx + movq 24(%r8), %rax + movq %rdx, 16(%r8) + sbbq 24(%rsi), %rax + movq 32(%r8), %rdx + movq %rax, 24(%r8) + sbbq 32(%rsi), %rdx + movq 40(%r8), %rax + movq %rdx, 32(%r8) + sbbq 40(%rsi), %rax + movq 48(%r8), %rdx + movq %rax, 40(%r8) + sbbq 48(%rsi), %rdx + movq 56(%r8), %rax + movq %rdx, 48(%r8) + sbbq 56(%rsi), %rax + movq 64(%r8), %rdx + movq %rax, 56(%r8) + sbbq 64(%rsi), %rdx + movq 72(%r8), %rax + movq %rdx, 64(%r8) + sbbq 72(%rsi), %rax + movq 80(%r8), %rdx + movq %rax, 72(%r8) + sbbq 80(%rsi), %rdx + movq 88(%r8), %rax + movq %rdx, 80(%r8) + sbbq 88(%rsi), %rax + movq 96(%r8), %rdx + movq %rax, 88(%r8) + sbbq 96(%rsi), %rdx + movq 104(%r8), %rax + movq %rdx, 96(%r8) + sbbq 104(%rsi), %rax + movq 112(%r8), %rdx + movq %rax, 104(%r8) + sbbq 112(%rsi), %rdx + movq 120(%r8), %rax + movq %rdx, 112(%r8) + sbbq 120(%rsi), %rax + movq 128(%r8), %rdx + movq %rax, 120(%r8) + sbbq 128(%rsi), %rdx + movq 136(%r8), %rax + movq %rdx, 128(%r8) + sbbq 136(%rsi), %rax + movq 144(%r8), %rdx + movq %rax, 136(%r8) + sbbq 144(%rsi), %rdx + movq 152(%r8), %rax + movq %rdx, 144(%r8) + sbbq 152(%rsi), %rax + movq 160(%r8), %rdx + movq %rax, 152(%r8) + sbbq 160(%rsi), %rdx + movq 168(%r8), %rax + movq %rdx, 160(%r8) + sbbq 168(%rsi), %rax + movq 176(%r8), %rdx + movq %rax, 168(%r8) + sbbq 176(%rsi), %rdx + movq 184(%r8), %rax + movq %rdx, 176(%r8) + sbbq 184(%rsi), %rax + movq 192(%r8), %rdx + movq %rax, 184(%r8) + sbbq 192(%rsi), %rdx + movq 200(%r8), %rax + movq %rdx, 192(%r8) + sbbq 200(%rsi), %rax + movq 208(%r8), %rdx + movq %rax, 200(%r8) + sbbq 208(%rsi), %rdx + movq 216(%r8), %rax + movq %rdx, 208(%r8) + sbbq 216(%rsi), %rax + movq 224(%r8), %rdx + movq %rax, 216(%r8) + sbbq 224(%rsi), %rdx + movq 232(%r8), %rax + movq %rdx, 224(%r8) + sbbq 232(%rsi), %rax + movq 240(%r8), %rdx + movq %rax, 232(%r8) + sbbq 240(%rsi), %rdx + movq 248(%r8), %rax + movq %rdx, 240(%r8) + sbbq 248(%rsi), %rax + movq 256(%r8), %rdx + movq %rax, 248(%r8) + sbbq 256(%rsi), %rdx + movq 264(%r8), %rax + movq %rdx, 256(%r8) + sbbq 264(%rsi), %rax + movq 272(%r8), %rdx + movq %rax, 264(%r8) + sbbq 272(%rsi), %rdx + movq 280(%r8), %rax + movq %rdx, 272(%r8) + sbbq 280(%rsi), %rax + movq 288(%r8), %rdx + movq %rax, 280(%r8) + sbbq 288(%rsi), %rdx + movq 296(%r8), %rax + movq %rdx, 288(%r8) + sbbq 296(%rsi), %rax + movq 304(%r8), %rdx + movq %rax, 296(%r8) + sbbq 304(%rsi), %rdx + movq 312(%r8), %rax + movq %rdx, 304(%r8) + sbbq 312(%rsi), %rax + movq 320(%r8), %rdx + movq %rax, 312(%r8) + sbbq 320(%rsi), %rdx + movq 328(%r8), %rax + movq %rdx, 320(%r8) + sbbq 328(%rsi), %rax + movq 336(%r8), %rdx + movq %rax, 328(%r8) + sbbq 336(%rsi), %rdx + movq 344(%r8), %rax + movq %rdx, 336(%r8) + sbbq 344(%rsi), %rax + movq 352(%r8), %rdx + movq %rax, 344(%r8) + sbbq 352(%rsi), %rdx + movq 360(%r8), %rax + movq %rdx, 352(%r8) + sbbq 360(%rsi), %rax + movq 368(%r8), %rdx + movq %rax, 360(%r8) + sbbq 368(%rsi), %rdx + movq 376(%r8), %rax + movq %rdx, 368(%r8) + sbbq 376(%rsi), %rax + movq 384(%r8), %rdx + movq %rax, 376(%r8) + sbbq 384(%rsi), %rdx + movq 392(%r8), %rax + movq %rdx, 384(%r8) + sbbq 392(%rsi), %rax + movq 400(%r8), %rdx + movq %rax, 392(%r8) + sbbq 400(%rsi), %rdx + movq 408(%r8), %rax + movq %rdx, 400(%r8) + sbbq 408(%rsi), %rax + movq 416(%r8), %rdx + movq %rax, 408(%r8) + sbbq 416(%rsi), %rdx + movq 424(%r8), %rax + movq %rdx, 416(%r8) + sbbq 424(%rsi), %rax + movq 432(%r8), %rdx + movq %rax, 424(%r8) + sbbq 432(%rsi), %rdx + movq 440(%r8), %rax + movq %rdx, 432(%r8) + sbbq 440(%rsi), %rax + movq 448(%r8), %rdx + movq %rax, 440(%r8) + sbbq 448(%rsi), %rdx + movq 456(%r8), %rax + movq %rdx, 448(%r8) + sbbq 456(%rsi), %rax + movq 464(%r8), %rdx + movq %rax, 456(%r8) + sbbq 464(%rsi), %rdx + movq 472(%r8), %rax + movq %rdx, 464(%r8) + sbbq 472(%rsi), %rax + movq 480(%r8), %rdx + movq %rax, 472(%r8) + sbbq 480(%rsi), %rdx + movq 488(%r8), %rax + movq %rdx, 480(%r8) + sbbq 488(%rsi), %rax + movq 496(%r8), %rdx + movq %rax, 488(%r8) + sbbq 496(%rsi), %rdx + movq 504(%r8), %rax + movq %rdx, 496(%r8) + sbbq 504(%rsi), %rax + movq %rax, 504(%r8) + sbbq $0, %rcx + movq (%r8), %rdx + subq (%rdi), %rdx + movq 8(%r8), %rax + movq %rdx, (%r8) + sbbq 8(%rdi), %rax + movq 16(%r8), %rdx + movq %rax, 8(%r8) + sbbq 16(%rdi), %rdx + movq 24(%r8), %rax + movq %rdx, 16(%r8) + sbbq 24(%rdi), %rax + movq 32(%r8), %rdx + movq %rax, 24(%r8) + sbbq 32(%rdi), %rdx + movq 40(%r8), %rax + movq %rdx, 32(%r8) + sbbq 40(%rdi), %rax + movq 48(%r8), %rdx + movq %rax, 40(%r8) + sbbq 48(%rdi), %rdx + movq 56(%r8), %rax + movq %rdx, 48(%r8) + sbbq 56(%rdi), %rax + movq 64(%r8), %rdx + movq %rax, 56(%r8) + sbbq 64(%rdi), %rdx + movq 72(%r8), %rax + movq %rdx, 64(%r8) + sbbq 72(%rdi), %rax + movq 80(%r8), %rdx + movq %rax, 72(%r8) + sbbq 80(%rdi), %rdx + movq 88(%r8), %rax + movq %rdx, 80(%r8) + sbbq 88(%rdi), %rax + movq 96(%r8), %rdx + movq %rax, 88(%r8) + sbbq 96(%rdi), %rdx + movq 104(%r8), %rax + movq %rdx, 96(%r8) + sbbq 104(%rdi), %rax + movq 112(%r8), %rdx + movq %rax, 104(%r8) + sbbq 112(%rdi), %rdx + movq 120(%r8), %rax + movq %rdx, 112(%r8) + sbbq 120(%rdi), %rax + movq 128(%r8), %rdx + movq %rax, 120(%r8) + sbbq 128(%rdi), %rdx + movq 136(%r8), %rax + movq %rdx, 128(%r8) + sbbq 136(%rdi), %rax + movq 144(%r8), %rdx + movq %rax, 136(%r8) + sbbq 144(%rdi), %rdx + movq 152(%r8), %rax + movq %rdx, 144(%r8) + sbbq 152(%rdi), %rax + movq 160(%r8), %rdx + movq %rax, 152(%r8) + sbbq 160(%rdi), %rdx + movq 168(%r8), %rax + movq %rdx, 160(%r8) + sbbq 168(%rdi), %rax + movq 176(%r8), %rdx + movq %rax, 168(%r8) + sbbq 176(%rdi), %rdx + movq 184(%r8), %rax + movq %rdx, 176(%r8) + sbbq 184(%rdi), %rax + movq 192(%r8), %rdx + movq %rax, 184(%r8) + sbbq 192(%rdi), %rdx + movq 200(%r8), %rax + movq %rdx, 192(%r8) + sbbq 200(%rdi), %rax + movq 208(%r8), %rdx + movq %rax, 200(%r8) + sbbq 208(%rdi), %rdx + movq 216(%r8), %rax + movq %rdx, 208(%r8) + sbbq 216(%rdi), %rax + movq 224(%r8), %rdx + movq %rax, 216(%r8) + sbbq 224(%rdi), %rdx + movq 232(%r8), %rax + movq %rdx, 224(%r8) + sbbq 232(%rdi), %rax + movq 240(%r8), %rdx + movq %rax, 232(%r8) + sbbq 240(%rdi), %rdx + movq 248(%r8), %rax + movq %rdx, 240(%r8) + sbbq 248(%rdi), %rax + movq 256(%r8), %rdx + movq %rax, 248(%r8) + sbbq 256(%rdi), %rdx + movq 264(%r8), %rax + movq %rdx, 256(%r8) + sbbq 264(%rdi), %rax + movq 272(%r8), %rdx + movq %rax, 264(%r8) + sbbq 272(%rdi), %rdx + movq 280(%r8), %rax + movq %rdx, 272(%r8) + sbbq 280(%rdi), %rax + movq 288(%r8), %rdx + movq %rax, 280(%r8) + sbbq 288(%rdi), %rdx + movq 296(%r8), %rax + movq %rdx, 288(%r8) + sbbq 296(%rdi), %rax + movq 304(%r8), %rdx + movq %rax, 296(%r8) + sbbq 304(%rdi), %rdx + movq 312(%r8), %rax + movq %rdx, 304(%r8) + sbbq 312(%rdi), %rax + movq 320(%r8), %rdx + movq %rax, 312(%r8) + sbbq 320(%rdi), %rdx + movq 328(%r8), %rax + movq %rdx, 320(%r8) + sbbq 328(%rdi), %rax + movq 336(%r8), %rdx + movq %rax, 328(%r8) + sbbq 336(%rdi), %rdx + movq 344(%r8), %rax + movq %rdx, 336(%r8) + sbbq 344(%rdi), %rax + movq 352(%r8), %rdx + movq %rax, 344(%r8) + sbbq 352(%rdi), %rdx + movq 360(%r8), %rax + movq %rdx, 352(%r8) + sbbq 360(%rdi), %rax + movq 368(%r8), %rdx + movq %rax, 360(%r8) + sbbq 368(%rdi), %rdx + movq 376(%r8), %rax + movq %rdx, 368(%r8) + sbbq 376(%rdi), %rax + movq 384(%r8), %rdx + movq %rax, 376(%r8) + sbbq 384(%rdi), %rdx + movq 392(%r8), %rax + movq %rdx, 384(%r8) + sbbq 392(%rdi), %rax + movq 400(%r8), %rdx + movq %rax, 392(%r8) + sbbq 400(%rdi), %rdx + movq 408(%r8), %rax + movq %rdx, 400(%r8) + sbbq 408(%rdi), %rax + movq 416(%r8), %rdx + movq %rax, 408(%r8) + sbbq 416(%rdi), %rdx + movq 424(%r8), %rax + movq %rdx, 416(%r8) + sbbq 424(%rdi), %rax + movq 432(%r8), %rdx + movq %rax, 424(%r8) + sbbq 432(%rdi), %rdx + movq 440(%r8), %rax + movq %rdx, 432(%r8) + sbbq 440(%rdi), %rax + movq 448(%r8), %rdx + movq %rax, 440(%r8) + sbbq 448(%rdi), %rdx + movq 456(%r8), %rax + movq %rdx, 448(%r8) + sbbq 456(%rdi), %rax + movq 464(%r8), %rdx + movq %rax, 456(%r8) + sbbq 464(%rdi), %rdx + movq 472(%r8), %rax + movq %rdx, 464(%r8) + sbbq 472(%rdi), %rax + movq 480(%r8), %rdx + movq %rax, 472(%r8) + sbbq 480(%rdi), %rdx + movq 488(%r8), %rax + movq %rdx, 480(%r8) + sbbq 488(%rdi), %rax + movq 496(%r8), %rdx + movq %rax, 488(%r8) + sbbq 496(%rdi), %rdx + movq 504(%r8), %rax + movq %rdx, 496(%r8) + sbbq 504(%rdi), %rax + movq %rax, 504(%r8) + sbbq $0, %rcx + # Add in place + movq 256(%rdi), %rdx + addq (%r8), %rdx + movq 264(%rdi), %rax + movq %rdx, 256(%rdi) + adcq 8(%r8), %rax + movq 272(%rdi), %rdx + movq %rax, 264(%rdi) + adcq 16(%r8), %rdx + movq 280(%rdi), %rax + movq %rdx, 272(%rdi) + adcq 24(%r8), %rax + movq 288(%rdi), %rdx + movq %rax, 280(%rdi) + adcq 32(%r8), %rdx + movq 296(%rdi), %rax + movq %rdx, 288(%rdi) + adcq 40(%r8), %rax + movq 304(%rdi), %rdx + movq %rax, 296(%rdi) + adcq 48(%r8), %rdx + movq 312(%rdi), %rax + movq %rdx, 304(%rdi) + adcq 56(%r8), %rax + movq 320(%rdi), %rdx + movq %rax, 312(%rdi) + adcq 64(%r8), %rdx + movq 328(%rdi), %rax + movq %rdx, 320(%rdi) + adcq 72(%r8), %rax + movq 336(%rdi), %rdx + movq %rax, 328(%rdi) + adcq 80(%r8), %rdx + movq 344(%rdi), %rax + movq %rdx, 336(%rdi) + adcq 88(%r8), %rax + movq 352(%rdi), %rdx + movq %rax, 344(%rdi) + adcq 96(%r8), %rdx + movq 360(%rdi), %rax + movq %rdx, 352(%rdi) + adcq 104(%r8), %rax + movq 368(%rdi), %rdx + movq %rax, 360(%rdi) + adcq 112(%r8), %rdx + movq 376(%rdi), %rax + movq %rdx, 368(%rdi) + adcq 120(%r8), %rax + movq 384(%rdi), %rdx + movq %rax, 376(%rdi) + adcq 128(%r8), %rdx + movq 392(%rdi), %rax + movq %rdx, 384(%rdi) + adcq 136(%r8), %rax + movq 400(%rdi), %rdx + movq %rax, 392(%rdi) + adcq 144(%r8), %rdx + movq 408(%rdi), %rax + movq %rdx, 400(%rdi) + adcq 152(%r8), %rax + movq 416(%rdi), %rdx + movq %rax, 408(%rdi) + adcq 160(%r8), %rdx + movq 424(%rdi), %rax + movq %rdx, 416(%rdi) + adcq 168(%r8), %rax + movq 432(%rdi), %rdx + movq %rax, 424(%rdi) + adcq 176(%r8), %rdx + movq 440(%rdi), %rax + movq %rdx, 432(%rdi) + adcq 184(%r8), %rax + movq 448(%rdi), %rdx + movq %rax, 440(%rdi) + adcq 192(%r8), %rdx + movq 456(%rdi), %rax + movq %rdx, 448(%rdi) + adcq 200(%r8), %rax + movq 464(%rdi), %rdx + movq %rax, 456(%rdi) + adcq 208(%r8), %rdx + movq 472(%rdi), %rax + movq %rdx, 464(%rdi) + adcq 216(%r8), %rax + movq 480(%rdi), %rdx + movq %rax, 472(%rdi) + adcq 224(%r8), %rdx + movq 488(%rdi), %rax + movq %rdx, 480(%rdi) + adcq 232(%r8), %rax + movq 496(%rdi), %rdx + movq %rax, 488(%rdi) + adcq 240(%r8), %rdx + movq 504(%rdi), %rax + movq %rdx, 496(%rdi) + adcq 248(%r8), %rax + movq 512(%rdi), %rdx + movq %rax, 504(%rdi) + adcq 256(%r8), %rdx + movq 520(%rdi), %rax + movq %rdx, 512(%rdi) + adcq 264(%r8), %rax + movq 528(%rdi), %rdx + movq %rax, 520(%rdi) + adcq 272(%r8), %rdx + movq 536(%rdi), %rax + movq %rdx, 528(%rdi) + adcq 280(%r8), %rax + movq 544(%rdi), %rdx + movq %rax, 536(%rdi) + adcq 288(%r8), %rdx + movq 552(%rdi), %rax + movq %rdx, 544(%rdi) + adcq 296(%r8), %rax + movq 560(%rdi), %rdx + movq %rax, 552(%rdi) + adcq 304(%r8), %rdx + movq 568(%rdi), %rax + movq %rdx, 560(%rdi) + adcq 312(%r8), %rax + movq 576(%rdi), %rdx + movq %rax, 568(%rdi) + adcq 320(%r8), %rdx + movq 584(%rdi), %rax + movq %rdx, 576(%rdi) + adcq 328(%r8), %rax + movq 592(%rdi), %rdx + movq %rax, 584(%rdi) + adcq 336(%r8), %rdx + movq 600(%rdi), %rax + movq %rdx, 592(%rdi) + adcq 344(%r8), %rax + movq 608(%rdi), %rdx + movq %rax, 600(%rdi) + adcq 352(%r8), %rdx + movq 616(%rdi), %rax + movq %rdx, 608(%rdi) + adcq 360(%r8), %rax + movq 624(%rdi), %rdx + movq %rax, 616(%rdi) + adcq 368(%r8), %rdx + movq 632(%rdi), %rax + movq %rdx, 624(%rdi) + adcq 376(%r8), %rax + movq 640(%rdi), %rdx + movq %rax, 632(%rdi) + adcq 384(%r8), %rdx + movq 648(%rdi), %rax + movq %rdx, 640(%rdi) + adcq 392(%r8), %rax + movq 656(%rdi), %rdx + movq %rax, 648(%rdi) + adcq 400(%r8), %rdx + movq 664(%rdi), %rax + movq %rdx, 656(%rdi) + adcq 408(%r8), %rax + movq 672(%rdi), %rdx + movq %rax, 664(%rdi) + adcq 416(%r8), %rdx + movq 680(%rdi), %rax + movq %rdx, 672(%rdi) + adcq 424(%r8), %rax + movq 688(%rdi), %rdx + movq %rax, 680(%rdi) + adcq 432(%r8), %rdx + movq 696(%rdi), %rax + movq %rdx, 688(%rdi) + adcq 440(%r8), %rax + movq 704(%rdi), %rdx + movq %rax, 696(%rdi) + adcq 448(%r8), %rdx + movq 712(%rdi), %rax + movq %rdx, 704(%rdi) + adcq 456(%r8), %rax + movq 720(%rdi), %rdx + movq %rax, 712(%rdi) + adcq 464(%r8), %rdx + movq 728(%rdi), %rax + movq %rdx, 720(%rdi) + adcq 472(%r8), %rax + movq 736(%rdi), %rdx + movq %rax, 728(%rdi) + adcq 480(%r8), %rdx + movq 744(%rdi), %rax + movq %rdx, 736(%rdi) + adcq 488(%r8), %rax + movq 752(%rdi), %rdx + movq %rax, 744(%rdi) + adcq 496(%r8), %rdx + movq 760(%rdi), %rax + movq %rdx, 752(%rdi) + adcq 504(%r8), %rax + movq %rax, 760(%rdi) + adcq $0, %rcx + movq %rcx, 768(%rdi) + # Add in place + movq 512(%rdi), %rdx + xorq %rcx, %rcx + addq (%rsi), %rdx + movq 520(%rdi), %rax + movq %rdx, 512(%rdi) + adcq 8(%rsi), %rax + movq 528(%rdi), %rdx + movq %rax, 520(%rdi) + adcq 16(%rsi), %rdx + movq 536(%rdi), %rax + movq %rdx, 528(%rdi) + adcq 24(%rsi), %rax + movq 544(%rdi), %rdx + movq %rax, 536(%rdi) + adcq 32(%rsi), %rdx + movq 552(%rdi), %rax + movq %rdx, 544(%rdi) + adcq 40(%rsi), %rax + movq 560(%rdi), %rdx + movq %rax, 552(%rdi) + adcq 48(%rsi), %rdx + movq 568(%rdi), %rax + movq %rdx, 560(%rdi) + adcq 56(%rsi), %rax + movq 576(%rdi), %rdx + movq %rax, 568(%rdi) + adcq 64(%rsi), %rdx + movq 584(%rdi), %rax + movq %rdx, 576(%rdi) + adcq 72(%rsi), %rax + movq 592(%rdi), %rdx + movq %rax, 584(%rdi) + adcq 80(%rsi), %rdx + movq 600(%rdi), %rax + movq %rdx, 592(%rdi) + adcq 88(%rsi), %rax + movq 608(%rdi), %rdx + movq %rax, 600(%rdi) + adcq 96(%rsi), %rdx + movq 616(%rdi), %rax + movq %rdx, 608(%rdi) + adcq 104(%rsi), %rax + movq 624(%rdi), %rdx + movq %rax, 616(%rdi) + adcq 112(%rsi), %rdx + movq 632(%rdi), %rax + movq %rdx, 624(%rdi) + adcq 120(%rsi), %rax + movq 640(%rdi), %rdx + movq %rax, 632(%rdi) + adcq 128(%rsi), %rdx + movq 648(%rdi), %rax + movq %rdx, 640(%rdi) + adcq 136(%rsi), %rax + movq 656(%rdi), %rdx + movq %rax, 648(%rdi) + adcq 144(%rsi), %rdx + movq 664(%rdi), %rax + movq %rdx, 656(%rdi) + adcq 152(%rsi), %rax + movq 672(%rdi), %rdx + movq %rax, 664(%rdi) + adcq 160(%rsi), %rdx + movq 680(%rdi), %rax + movq %rdx, 672(%rdi) + adcq 168(%rsi), %rax + movq 688(%rdi), %rdx + movq %rax, 680(%rdi) + adcq 176(%rsi), %rdx + movq 696(%rdi), %rax + movq %rdx, 688(%rdi) + adcq 184(%rsi), %rax + movq 704(%rdi), %rdx + movq %rax, 696(%rdi) + adcq 192(%rsi), %rdx + movq 712(%rdi), %rax + movq %rdx, 704(%rdi) + adcq 200(%rsi), %rax + movq 720(%rdi), %rdx + movq %rax, 712(%rdi) + adcq 208(%rsi), %rdx + movq 728(%rdi), %rax + movq %rdx, 720(%rdi) + adcq 216(%rsi), %rax + movq 736(%rdi), %rdx + movq %rax, 728(%rdi) + adcq 224(%rsi), %rdx + movq 744(%rdi), %rax + movq %rdx, 736(%rdi) + adcq 232(%rsi), %rax + movq 752(%rdi), %rdx + movq %rax, 744(%rdi) + adcq 240(%rsi), %rdx + movq 760(%rdi), %rax + movq %rdx, 752(%rdi) + adcq 248(%rsi), %rax + movq 768(%rdi), %rdx + movq %rax, 760(%rdi) + adcq 256(%rsi), %rdx + movq %rdx, 768(%rdi) + adcq $0, %rcx + # Add to zero + movq 264(%rsi), %rdx + adcq $0, %rdx + movq 272(%rsi), %rax + movq %rdx, 776(%rdi) + adcq $0, %rax + movq 280(%rsi), %rdx + movq %rax, 784(%rdi) + adcq $0, %rdx + movq 288(%rsi), %rax + movq %rdx, 792(%rdi) + adcq $0, %rax + movq 296(%rsi), %rdx + movq %rax, 800(%rdi) + adcq $0, %rdx + movq 304(%rsi), %rax + movq %rdx, 808(%rdi) + adcq $0, %rax + movq 312(%rsi), %rdx + movq %rax, 816(%rdi) + adcq $0, %rdx + movq 320(%rsi), %rax + movq %rdx, 824(%rdi) + adcq $0, %rax + movq 328(%rsi), %rdx + movq %rax, 832(%rdi) + adcq $0, %rdx + movq 336(%rsi), %rax + movq %rdx, 840(%rdi) + adcq $0, %rax + movq 344(%rsi), %rdx + movq %rax, 848(%rdi) + adcq $0, %rdx + movq 352(%rsi), %rax + movq %rdx, 856(%rdi) + adcq $0, %rax + movq 360(%rsi), %rdx + movq %rax, 864(%rdi) + adcq $0, %rdx + movq 368(%rsi), %rax + movq %rdx, 872(%rdi) + adcq $0, %rax + movq 376(%rsi), %rdx + movq %rax, 880(%rdi) + adcq $0, %rdx + movq 384(%rsi), %rax + movq %rdx, 888(%rdi) + adcq $0, %rax + movq 392(%rsi), %rdx + movq %rax, 896(%rdi) + adcq $0, %rdx + movq 400(%rsi), %rax + movq %rdx, 904(%rdi) + adcq $0, %rax + movq 408(%rsi), %rdx + movq %rax, 912(%rdi) + adcq $0, %rdx + movq 416(%rsi), %rax + movq %rdx, 920(%rdi) + adcq $0, %rax + movq 424(%rsi), %rdx + movq %rax, 928(%rdi) + adcq $0, %rdx + movq 432(%rsi), %rax + movq %rdx, 936(%rdi) + adcq $0, %rax + movq 440(%rsi), %rdx + movq %rax, 944(%rdi) + adcq $0, %rdx + movq 448(%rsi), %rax + movq %rdx, 952(%rdi) + adcq $0, %rax + movq 456(%rsi), %rdx + movq %rax, 960(%rdi) + adcq $0, %rdx + movq 464(%rsi), %rax + movq %rdx, 968(%rdi) + adcq $0, %rax + movq 472(%rsi), %rdx + movq %rax, 976(%rdi) + adcq $0, %rdx + movq 480(%rsi), %rax + movq %rdx, 984(%rdi) + adcq $0, %rax + movq 488(%rsi), %rdx + movq %rax, 992(%rdi) + adcq $0, %rdx + movq 496(%rsi), %rax + movq %rdx, 1000(%rdi) + adcq $0, %rax + movq 504(%rsi), %rdx + movq %rax, 1008(%rdi) + adcq $0, %rdx + movq %rdx, 1016(%rdi) + addq $1304, %rsp + repz retq +#ifndef __APPLE__ +.size sp_4096_sqr_avx2_64,.-sp_4096_sqr_avx2_64 +#endif /* __APPLE__ */ +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +#ifndef __APPLE__ +.globl sp_4096_mul_d_64 +.type sp_4096_mul_d_64,@function +.align 16 +sp_4096_mul_d_64: +#else +.globl _sp_4096_mul_d_64 +.p2align 4 +_sp_4096_mul_d_64: +#endif /* __APPLE__ */ + movq %rdx, %rcx + # A[0] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq (%rsi) + movq %rax, %r8 + movq %rdx, %r9 + movq %r8, (%rdi) + # A[1] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 8(%rsi) + addq %rax, %r9 + movq %r9, 8(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[2] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 16(%rsi) + addq %rax, %r10 + movq %r10, 16(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[3] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 24(%rsi) + addq %rax, %r8 + movq %r8, 24(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[4] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 32(%rsi) + addq %rax, %r9 + movq %r9, 32(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[5] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 40(%rsi) + addq %rax, %r10 + movq %r10, 40(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[6] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 48(%rsi) + addq %rax, %r8 + movq %r8, 48(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[7] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 56(%rsi) + addq %rax, %r9 + movq %r9, 56(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[8] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 64(%rsi) + addq %rax, %r10 + movq %r10, 64(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[9] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 72(%rsi) + addq %rax, %r8 + movq %r8, 72(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[10] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 80(%rsi) + addq %rax, %r9 + movq %r9, 80(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[11] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 88(%rsi) + addq %rax, %r10 + movq %r10, 88(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[12] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 96(%rsi) + addq %rax, %r8 + movq %r8, 96(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[13] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 104(%rsi) + addq %rax, %r9 + movq %r9, 104(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[14] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 112(%rsi) + addq %rax, %r10 + movq %r10, 112(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[15] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 120(%rsi) + addq %rax, %r8 + movq %r8, 120(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[16] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 128(%rsi) + addq %rax, %r9 + movq %r9, 128(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[17] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 136(%rsi) + addq %rax, %r10 + movq %r10, 136(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[18] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 144(%rsi) + addq %rax, %r8 + movq %r8, 144(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[19] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 152(%rsi) + addq %rax, %r9 + movq %r9, 152(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[20] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 160(%rsi) + addq %rax, %r10 + movq %r10, 160(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[21] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 168(%rsi) + addq %rax, %r8 + movq %r8, 168(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[22] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 176(%rsi) + addq %rax, %r9 + movq %r9, 176(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[23] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 184(%rsi) + addq %rax, %r10 + movq %r10, 184(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[24] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 192(%rsi) + addq %rax, %r8 + movq %r8, 192(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[25] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 200(%rsi) + addq %rax, %r9 + movq %r9, 200(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[26] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 208(%rsi) + addq %rax, %r10 + movq %r10, 208(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[27] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 216(%rsi) + addq %rax, %r8 + movq %r8, 216(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[28] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 224(%rsi) + addq %rax, %r9 + movq %r9, 224(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[29] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 232(%rsi) + addq %rax, %r10 + movq %r10, 232(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[30] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 240(%rsi) + addq %rax, %r8 + movq %r8, 240(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[31] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 248(%rsi) + addq %rax, %r9 + movq %r9, 248(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[32] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 256(%rsi) + addq %rax, %r10 + movq %r10, 256(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[33] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 264(%rsi) + addq %rax, %r8 + movq %r8, 264(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[34] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 272(%rsi) + addq %rax, %r9 + movq %r9, 272(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[35] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 280(%rsi) + addq %rax, %r10 + movq %r10, 280(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[36] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 288(%rsi) + addq %rax, %r8 + movq %r8, 288(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[37] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 296(%rsi) + addq %rax, %r9 + movq %r9, 296(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[38] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 304(%rsi) + addq %rax, %r10 + movq %r10, 304(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[39] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 312(%rsi) + addq %rax, %r8 + movq %r8, 312(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[40] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 320(%rsi) + addq %rax, %r9 + movq %r9, 320(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[41] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 328(%rsi) + addq %rax, %r10 + movq %r10, 328(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[42] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 336(%rsi) + addq %rax, %r8 + movq %r8, 336(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[43] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 344(%rsi) + addq %rax, %r9 + movq %r9, 344(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[44] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 352(%rsi) + addq %rax, %r10 + movq %r10, 352(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[45] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 360(%rsi) + addq %rax, %r8 + movq %r8, 360(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[46] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 368(%rsi) + addq %rax, %r9 + movq %r9, 368(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[47] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 376(%rsi) + addq %rax, %r10 + movq %r10, 376(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[48] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 384(%rsi) + addq %rax, %r8 + movq %r8, 384(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[49] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 392(%rsi) + addq %rax, %r9 + movq %r9, 392(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[50] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 400(%rsi) + addq %rax, %r10 + movq %r10, 400(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[51] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 408(%rsi) + addq %rax, %r8 + movq %r8, 408(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[52] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 416(%rsi) + addq %rax, %r9 + movq %r9, 416(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[53] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 424(%rsi) + addq %rax, %r10 + movq %r10, 424(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[54] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 432(%rsi) + addq %rax, %r8 + movq %r8, 432(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[55] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 440(%rsi) + addq %rax, %r9 + movq %r9, 440(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[56] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 448(%rsi) + addq %rax, %r10 + movq %r10, 448(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[57] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 456(%rsi) + addq %rax, %r8 + movq %r8, 456(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[58] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 464(%rsi) + addq %rax, %r9 + movq %r9, 464(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[59] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 472(%rsi) + addq %rax, %r10 + movq %r10, 472(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[60] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 480(%rsi) + addq %rax, %r8 + movq %r8, 480(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[61] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 488(%rsi) + addq %rax, %r9 + movq %r9, 488(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[62] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 496(%rsi) + addq %rax, %r10 + movq %r10, 496(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[63] * B + movq %rcx, %rax + mulq 504(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + movq %r8, 504(%rdi) + movq %r9, 512(%rdi) + repz retq +#ifndef __APPLE__ +.size sp_4096_mul_d_64,.-sp_4096_mul_d_64 +#endif /* __APPLE__ */ +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +#ifndef __APPLE__ +.globl sp_4096_cond_sub_64 +.type sp_4096_cond_sub_64,@function +.align 16 +sp_4096_cond_sub_64: +#else +.globl _sp_4096_cond_sub_64 +.p2align 4 +_sp_4096_cond_sub_64: +#endif /* __APPLE__ */ + subq $512, %rsp + movq $0, %rax + movq (%rdx), %r8 + movq 8(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, (%rsp) + movq %r9, 8(%rsp) + movq 16(%rdx), %r8 + movq 24(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 16(%rsp) + movq %r9, 24(%rsp) + movq 32(%rdx), %r8 + movq 40(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 32(%rsp) + movq %r9, 40(%rsp) + movq 48(%rdx), %r8 + movq 56(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 48(%rsp) + movq %r9, 56(%rsp) + movq 64(%rdx), %r8 + movq 72(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 64(%rsp) + movq %r9, 72(%rsp) + movq 80(%rdx), %r8 + movq 88(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 80(%rsp) + movq %r9, 88(%rsp) + movq 96(%rdx), %r8 + movq 104(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 96(%rsp) + movq %r9, 104(%rsp) + movq 112(%rdx), %r8 + movq 120(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 112(%rsp) + movq %r9, 120(%rsp) + movq 128(%rdx), %r8 + movq 136(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 128(%rsp) + movq %r9, 136(%rsp) + movq 144(%rdx), %r8 + movq 152(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 144(%rsp) + movq %r9, 152(%rsp) + movq 160(%rdx), %r8 + movq 168(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 160(%rsp) + movq %r9, 168(%rsp) + movq 176(%rdx), %r8 + movq 184(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 176(%rsp) + movq %r9, 184(%rsp) + movq 192(%rdx), %r8 + movq 200(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 192(%rsp) + movq %r9, 200(%rsp) + movq 208(%rdx), %r8 + movq 216(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 208(%rsp) + movq %r9, 216(%rsp) + movq 224(%rdx), %r8 + movq 232(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 224(%rsp) + movq %r9, 232(%rsp) + movq 240(%rdx), %r8 + movq 248(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 240(%rsp) + movq %r9, 248(%rsp) + movq 256(%rdx), %r8 + movq 264(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 256(%rsp) + movq %r9, 264(%rsp) + movq 272(%rdx), %r8 + movq 280(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 272(%rsp) + movq %r9, 280(%rsp) + movq 288(%rdx), %r8 + movq 296(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 288(%rsp) + movq %r9, 296(%rsp) + movq 304(%rdx), %r8 + movq 312(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 304(%rsp) + movq %r9, 312(%rsp) + movq 320(%rdx), %r8 + movq 328(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 320(%rsp) + movq %r9, 328(%rsp) + movq 336(%rdx), %r8 + movq 344(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 336(%rsp) + movq %r9, 344(%rsp) + movq 352(%rdx), %r8 + movq 360(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 352(%rsp) + movq %r9, 360(%rsp) + movq 368(%rdx), %r8 + movq 376(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 368(%rsp) + movq %r9, 376(%rsp) + movq 384(%rdx), %r8 + movq 392(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 384(%rsp) + movq %r9, 392(%rsp) + movq 400(%rdx), %r8 + movq 408(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 400(%rsp) + movq %r9, 408(%rsp) + movq 416(%rdx), %r8 + movq 424(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 416(%rsp) + movq %r9, 424(%rsp) + movq 432(%rdx), %r8 + movq 440(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 432(%rsp) + movq %r9, 440(%rsp) + movq 448(%rdx), %r8 + movq 456(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 448(%rsp) + movq %r9, 456(%rsp) + movq 464(%rdx), %r8 + movq 472(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 464(%rsp) + movq %r9, 472(%rsp) + movq 480(%rdx), %r8 + movq 488(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 480(%rsp) + movq %r9, 488(%rsp) + movq 496(%rdx), %r8 + movq 504(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 496(%rsp) + movq %r9, 504(%rsp) + movq (%rsi), %r8 + movq (%rsp), %rdx + subq %rdx, %r8 + movq 8(%rsi), %r9 + movq 8(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, (%rdi) + movq 16(%rsi), %r8 + movq 16(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 8(%rdi) + movq 24(%rsi), %r9 + movq 24(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 16(%rdi) + movq 32(%rsi), %r8 + movq 32(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 24(%rdi) + movq 40(%rsi), %r9 + movq 40(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 32(%rdi) + movq 48(%rsi), %r8 + movq 48(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 40(%rdi) + movq 56(%rsi), %r9 + movq 56(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 48(%rdi) + movq 64(%rsi), %r8 + movq 64(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 56(%rdi) + movq 72(%rsi), %r9 + movq 72(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 64(%rdi) + movq 80(%rsi), %r8 + movq 80(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 72(%rdi) + movq 88(%rsi), %r9 + movq 88(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 80(%rdi) + movq 96(%rsi), %r8 + movq 96(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 88(%rdi) + movq 104(%rsi), %r9 + movq 104(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 96(%rdi) + movq 112(%rsi), %r8 + movq 112(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 104(%rdi) + movq 120(%rsi), %r9 + movq 120(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 112(%rdi) + movq 128(%rsi), %r8 + movq 128(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 120(%rdi) + movq 136(%rsi), %r9 + movq 136(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 128(%rdi) + movq 144(%rsi), %r8 + movq 144(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 136(%rdi) + movq 152(%rsi), %r9 + movq 152(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 144(%rdi) + movq 160(%rsi), %r8 + movq 160(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 152(%rdi) + movq 168(%rsi), %r9 + movq 168(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 160(%rdi) + movq 176(%rsi), %r8 + movq 176(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 168(%rdi) + movq 184(%rsi), %r9 + movq 184(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 176(%rdi) + movq 192(%rsi), %r8 + movq 192(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 184(%rdi) + movq 200(%rsi), %r9 + movq 200(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 192(%rdi) + movq 208(%rsi), %r8 + movq 208(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 200(%rdi) + movq 216(%rsi), %r9 + movq 216(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 208(%rdi) + movq 224(%rsi), %r8 + movq 224(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 216(%rdi) + movq 232(%rsi), %r9 + movq 232(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 224(%rdi) + movq 240(%rsi), %r8 + movq 240(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 232(%rdi) + movq 248(%rsi), %r9 + movq 248(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 240(%rdi) + movq 256(%rsi), %r8 + movq 256(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 248(%rdi) + movq 264(%rsi), %r9 + movq 264(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 256(%rdi) + movq 272(%rsi), %r8 + movq 272(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 264(%rdi) + movq 280(%rsi), %r9 + movq 280(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 272(%rdi) + movq 288(%rsi), %r8 + movq 288(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 280(%rdi) + movq 296(%rsi), %r9 + movq 296(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 288(%rdi) + movq 304(%rsi), %r8 + movq 304(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 296(%rdi) + movq 312(%rsi), %r9 + movq 312(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 304(%rdi) + movq 320(%rsi), %r8 + movq 320(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 312(%rdi) + movq 328(%rsi), %r9 + movq 328(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 320(%rdi) + movq 336(%rsi), %r8 + movq 336(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 328(%rdi) + movq 344(%rsi), %r9 + movq 344(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 336(%rdi) + movq 352(%rsi), %r8 + movq 352(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 344(%rdi) + movq 360(%rsi), %r9 + movq 360(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 352(%rdi) + movq 368(%rsi), %r8 + movq 368(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 360(%rdi) + movq 376(%rsi), %r9 + movq 376(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 368(%rdi) + movq 384(%rsi), %r8 + movq 384(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 376(%rdi) + movq 392(%rsi), %r9 + movq 392(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 384(%rdi) + movq 400(%rsi), %r8 + movq 400(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 392(%rdi) + movq 408(%rsi), %r9 + movq 408(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 400(%rdi) + movq 416(%rsi), %r8 + movq 416(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 408(%rdi) + movq 424(%rsi), %r9 + movq 424(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 416(%rdi) + movq 432(%rsi), %r8 + movq 432(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 424(%rdi) + movq 440(%rsi), %r9 + movq 440(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 432(%rdi) + movq 448(%rsi), %r8 + movq 448(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 440(%rdi) + movq 456(%rsi), %r9 + movq 456(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 448(%rdi) + movq 464(%rsi), %r8 + movq 464(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 456(%rdi) + movq 472(%rsi), %r9 + movq 472(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 464(%rdi) + movq 480(%rsi), %r8 + movq 480(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 472(%rdi) + movq 488(%rsi), %r9 + movq 488(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 480(%rdi) + movq 496(%rsi), %r8 + movq 496(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 488(%rdi) + movq 504(%rsi), %r9 + movq 504(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 496(%rdi) + movq %r9, 504(%rdi) + sbbq $0, %rax + addq $512, %rsp + repz retq +#ifndef __APPLE__ +.size sp_4096_cond_sub_64,.-sp_4096_cond_sub_64 +#endif /* __APPLE__ */ +/* Reduce the number back to 4096 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +#ifndef __APPLE__ +.globl sp_4096_mont_reduce_64 +.type sp_4096_mont_reduce_64,@function +.align 16 +sp_4096_mont_reduce_64: +#else +.globl _sp_4096_mont_reduce_64 +.p2align 4 +_sp_4096_mont_reduce_64: +#endif /* __APPLE__ */ + push %r12 + push %r13 + push %r14 + push %r15 + movq %rdx, %rcx + xorq %r15, %r15 + # i = 64 + movq $64, %r8 + movq (%rdi), %r13 + movq 8(%rdi), %r14 +L_mont_loop_64: + # mu = a[i] * mp + movq %r13, %r11 + imulq %rcx, %r11 + # a[i+0] += m[0] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq (%rsi) + addq %rax, %r13 + adcq %rdx, %r10 + # a[i+1] += m[1] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 8(%rsi) + movq %r14, %r13 + addq %rax, %r13 + adcq %rdx, %r9 + addq %r10, %r13 + adcq $0, %r9 + # a[i+2] += m[2] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 16(%rsi) + movq 16(%rdi), %r14 + addq %rax, %r14 + adcq %rdx, %r10 + addq %r9, %r14 + adcq $0, %r10 + # a[i+3] += m[3] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 24(%rsi) + movq 24(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 24(%rdi) + adcq $0, %r9 + # a[i+4] += m[4] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 32(%rsi) + movq 32(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 32(%rdi) + adcq $0, %r10 + # a[i+5] += m[5] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 40(%rsi) + movq 40(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 40(%rdi) + adcq $0, %r9 + # a[i+6] += m[6] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 48(%rsi) + movq 48(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 48(%rdi) + adcq $0, %r10 + # a[i+7] += m[7] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 56(%rsi) + movq 56(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 56(%rdi) + adcq $0, %r9 + # a[i+8] += m[8] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 64(%rsi) + movq 64(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 64(%rdi) + adcq $0, %r10 + # a[i+9] += m[9] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 72(%rsi) + movq 72(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 72(%rdi) + adcq $0, %r9 + # a[i+10] += m[10] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 80(%rsi) + movq 80(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 80(%rdi) + adcq $0, %r10 + # a[i+11] += m[11] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 88(%rsi) + movq 88(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 88(%rdi) + adcq $0, %r9 + # a[i+12] += m[12] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 96(%rsi) + movq 96(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 96(%rdi) + adcq $0, %r10 + # a[i+13] += m[13] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 104(%rsi) + movq 104(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 104(%rdi) + adcq $0, %r9 + # a[i+14] += m[14] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 112(%rsi) + movq 112(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 112(%rdi) + adcq $0, %r10 + # a[i+15] += m[15] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 120(%rsi) + movq 120(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 120(%rdi) + adcq $0, %r9 + # a[i+16] += m[16] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 128(%rsi) + movq 128(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 128(%rdi) + adcq $0, %r10 + # a[i+17] += m[17] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 136(%rsi) + movq 136(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 136(%rdi) + adcq $0, %r9 + # a[i+18] += m[18] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 144(%rsi) + movq 144(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 144(%rdi) + adcq $0, %r10 + # a[i+19] += m[19] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 152(%rsi) + movq 152(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 152(%rdi) + adcq $0, %r9 + # a[i+20] += m[20] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 160(%rsi) + movq 160(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 160(%rdi) + adcq $0, %r10 + # a[i+21] += m[21] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 168(%rsi) + movq 168(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 168(%rdi) + adcq $0, %r9 + # a[i+22] += m[22] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 176(%rsi) + movq 176(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 176(%rdi) + adcq $0, %r10 + # a[i+23] += m[23] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 184(%rsi) + movq 184(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 184(%rdi) + adcq $0, %r9 + # a[i+24] += m[24] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 192(%rsi) + movq 192(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 192(%rdi) + adcq $0, %r10 + # a[i+25] += m[25] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 200(%rsi) + movq 200(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 200(%rdi) + adcq $0, %r9 + # a[i+26] += m[26] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 208(%rsi) + movq 208(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 208(%rdi) + adcq $0, %r10 + # a[i+27] += m[27] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 216(%rsi) + movq 216(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 216(%rdi) + adcq $0, %r9 + # a[i+28] += m[28] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 224(%rsi) + movq 224(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 224(%rdi) + adcq $0, %r10 + # a[i+29] += m[29] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 232(%rsi) + movq 232(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 232(%rdi) + adcq $0, %r9 + # a[i+30] += m[30] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 240(%rsi) + movq 240(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 240(%rdi) + adcq $0, %r10 + # a[i+31] += m[31] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 248(%rsi) + movq 248(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 248(%rdi) + adcq $0, %r9 + # a[i+32] += m[32] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 256(%rsi) + movq 256(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 256(%rdi) + adcq $0, %r10 + # a[i+33] += m[33] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 264(%rsi) + movq 264(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 264(%rdi) + adcq $0, %r9 + # a[i+34] += m[34] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 272(%rsi) + movq 272(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 272(%rdi) + adcq $0, %r10 + # a[i+35] += m[35] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 280(%rsi) + movq 280(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 280(%rdi) + adcq $0, %r9 + # a[i+36] += m[36] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 288(%rsi) + movq 288(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 288(%rdi) + adcq $0, %r10 + # a[i+37] += m[37] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 296(%rsi) + movq 296(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 296(%rdi) + adcq $0, %r9 + # a[i+38] += m[38] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 304(%rsi) + movq 304(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 304(%rdi) + adcq $0, %r10 + # a[i+39] += m[39] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 312(%rsi) + movq 312(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 312(%rdi) + adcq $0, %r9 + # a[i+40] += m[40] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 320(%rsi) + movq 320(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 320(%rdi) + adcq $0, %r10 + # a[i+41] += m[41] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 328(%rsi) + movq 328(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 328(%rdi) + adcq $0, %r9 + # a[i+42] += m[42] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 336(%rsi) + movq 336(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 336(%rdi) + adcq $0, %r10 + # a[i+43] += m[43] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 344(%rsi) + movq 344(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 344(%rdi) + adcq $0, %r9 + # a[i+44] += m[44] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 352(%rsi) + movq 352(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 352(%rdi) + adcq $0, %r10 + # a[i+45] += m[45] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 360(%rsi) + movq 360(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 360(%rdi) + adcq $0, %r9 + # a[i+46] += m[46] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 368(%rsi) + movq 368(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 368(%rdi) + adcq $0, %r10 + # a[i+47] += m[47] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 376(%rsi) + movq 376(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 376(%rdi) + adcq $0, %r9 + # a[i+48] += m[48] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 384(%rsi) + movq 384(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 384(%rdi) + adcq $0, %r10 + # a[i+49] += m[49] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 392(%rsi) + movq 392(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 392(%rdi) + adcq $0, %r9 + # a[i+50] += m[50] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 400(%rsi) + movq 400(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 400(%rdi) + adcq $0, %r10 + # a[i+51] += m[51] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 408(%rsi) + movq 408(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 408(%rdi) + adcq $0, %r9 + # a[i+52] += m[52] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 416(%rsi) + movq 416(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 416(%rdi) + adcq $0, %r10 + # a[i+53] += m[53] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 424(%rsi) + movq 424(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 424(%rdi) + adcq $0, %r9 + # a[i+54] += m[54] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 432(%rsi) + movq 432(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 432(%rdi) + adcq $0, %r10 + # a[i+55] += m[55] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 440(%rsi) + movq 440(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 440(%rdi) + adcq $0, %r9 + # a[i+56] += m[56] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 448(%rsi) + movq 448(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 448(%rdi) + adcq $0, %r10 + # a[i+57] += m[57] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 456(%rsi) + movq 456(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 456(%rdi) + adcq $0, %r9 + # a[i+58] += m[58] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 464(%rsi) + movq 464(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 464(%rdi) + adcq $0, %r10 + # a[i+59] += m[59] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 472(%rsi) + movq 472(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 472(%rdi) + adcq $0, %r9 + # a[i+60] += m[60] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 480(%rsi) + movq 480(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 480(%rdi) + adcq $0, %r10 + # a[i+61] += m[61] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 488(%rsi) + movq 488(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 488(%rdi) + adcq $0, %r9 + # a[i+62] += m[62] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 496(%rsi) + movq 496(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 496(%rdi) + adcq $0, %r10 + # a[i+63] += m[63] * mu + movq %r11, %rax + mulq 504(%rsi) + movq 504(%rdi), %r12 + addq %rax, %r10 + adcq %r15, %rdx + movq $0, %r15 + adcq $0, %r15 + addq %r10, %r12 + movq %r12, 504(%rdi) + adcq %rdx, 512(%rdi) + adcq $0, %r15 + # i -= 1 + addq $8, %rdi + decq %r8 + jnz L_mont_loop_64 + movq %r13, (%rdi) + movq %r14, 8(%rdi) + negq %r15 + movq %r15, %rcx + movq %rsi, %rdx + movq %rdi, %rsi + subq $512, %rdi +#ifndef __APPLE__ + callq sp_4096_cond_sub_64@plt +#else + callq _sp_4096_cond_sub_64 +#endif /* __APPLE__ */ + pop %r15 + pop %r14 + pop %r13 + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_4096_mont_reduce_64,.-sp_4096_mont_reduce_64 +#endif /* __APPLE__ */ +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +#ifndef __APPLE__ +.globl sp_4096_cond_sub_avx2_64 +.type sp_4096_cond_sub_avx2_64,@function +.align 16 +sp_4096_cond_sub_avx2_64: +#else +.globl _sp_4096_cond_sub_avx2_64 +.p2align 4 +_sp_4096_cond_sub_avx2_64: +#endif /* __APPLE__ */ + movq $0, %rax + movq (%rdx), %r10 + movq (%rsi), %r8 + pextq %rcx, %r10, %r10 + subq %r10, %r8 + movq 8(%rdx), %r10 + movq 8(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, (%rdi) + sbbq %r10, %r9 + movq 16(%rdx), %r8 + movq 16(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 8(%rdi) + sbbq %r8, %r10 + movq 24(%rdx), %r9 + movq 24(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 16(%rdi) + sbbq %r9, %r8 + movq 32(%rdx), %r10 + movq 32(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 24(%rdi) + sbbq %r10, %r9 + movq 40(%rdx), %r8 + movq 40(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 32(%rdi) + sbbq %r8, %r10 + movq 48(%rdx), %r9 + movq 48(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 40(%rdi) + sbbq %r9, %r8 + movq 56(%rdx), %r10 + movq 56(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 48(%rdi) + sbbq %r10, %r9 + movq 64(%rdx), %r8 + movq 64(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 56(%rdi) + sbbq %r8, %r10 + movq 72(%rdx), %r9 + movq 72(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 64(%rdi) + sbbq %r9, %r8 + movq 80(%rdx), %r10 + movq 80(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 72(%rdi) + sbbq %r10, %r9 + movq 88(%rdx), %r8 + movq 88(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 80(%rdi) + sbbq %r8, %r10 + movq 96(%rdx), %r9 + movq 96(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 88(%rdi) + sbbq %r9, %r8 + movq 104(%rdx), %r10 + movq 104(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 96(%rdi) + sbbq %r10, %r9 + movq 112(%rdx), %r8 + movq 112(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 104(%rdi) + sbbq %r8, %r10 + movq 120(%rdx), %r9 + movq 120(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 112(%rdi) + sbbq %r9, %r8 + movq 128(%rdx), %r10 + movq 128(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 120(%rdi) + sbbq %r10, %r9 + movq 136(%rdx), %r8 + movq 136(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 128(%rdi) + sbbq %r8, %r10 + movq 144(%rdx), %r9 + movq 144(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 136(%rdi) + sbbq %r9, %r8 + movq 152(%rdx), %r10 + movq 152(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 144(%rdi) + sbbq %r10, %r9 + movq 160(%rdx), %r8 + movq 160(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 152(%rdi) + sbbq %r8, %r10 + movq 168(%rdx), %r9 + movq 168(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 160(%rdi) + sbbq %r9, %r8 + movq 176(%rdx), %r10 + movq 176(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 168(%rdi) + sbbq %r10, %r9 + movq 184(%rdx), %r8 + movq 184(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 176(%rdi) + sbbq %r8, %r10 + movq 192(%rdx), %r9 + movq 192(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 184(%rdi) + sbbq %r9, %r8 + movq 200(%rdx), %r10 + movq 200(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 192(%rdi) + sbbq %r10, %r9 + movq 208(%rdx), %r8 + movq 208(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 200(%rdi) + sbbq %r8, %r10 + movq 216(%rdx), %r9 + movq 216(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 208(%rdi) + sbbq %r9, %r8 + movq 224(%rdx), %r10 + movq 224(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 216(%rdi) + sbbq %r10, %r9 + movq 232(%rdx), %r8 + movq 232(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 224(%rdi) + sbbq %r8, %r10 + movq 240(%rdx), %r9 + movq 240(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 232(%rdi) + sbbq %r9, %r8 + movq 248(%rdx), %r10 + movq 248(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 240(%rdi) + sbbq %r10, %r9 + movq 256(%rdx), %r8 + movq 256(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 248(%rdi) + sbbq %r8, %r10 + movq 264(%rdx), %r9 + movq 264(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 256(%rdi) + sbbq %r9, %r8 + movq 272(%rdx), %r10 + movq 272(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 264(%rdi) + sbbq %r10, %r9 + movq 280(%rdx), %r8 + movq 280(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 272(%rdi) + sbbq %r8, %r10 + movq 288(%rdx), %r9 + movq 288(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 280(%rdi) + sbbq %r9, %r8 + movq 296(%rdx), %r10 + movq 296(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 288(%rdi) + sbbq %r10, %r9 + movq 304(%rdx), %r8 + movq 304(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 296(%rdi) + sbbq %r8, %r10 + movq 312(%rdx), %r9 + movq 312(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 304(%rdi) + sbbq %r9, %r8 + movq 320(%rdx), %r10 + movq 320(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 312(%rdi) + sbbq %r10, %r9 + movq 328(%rdx), %r8 + movq 328(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 320(%rdi) + sbbq %r8, %r10 + movq 336(%rdx), %r9 + movq 336(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 328(%rdi) + sbbq %r9, %r8 + movq 344(%rdx), %r10 + movq 344(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 336(%rdi) + sbbq %r10, %r9 + movq 352(%rdx), %r8 + movq 352(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 344(%rdi) + sbbq %r8, %r10 + movq 360(%rdx), %r9 + movq 360(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 352(%rdi) + sbbq %r9, %r8 + movq 368(%rdx), %r10 + movq 368(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 360(%rdi) + sbbq %r10, %r9 + movq 376(%rdx), %r8 + movq 376(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 368(%rdi) + sbbq %r8, %r10 + movq 384(%rdx), %r9 + movq 384(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 376(%rdi) + sbbq %r9, %r8 + movq 392(%rdx), %r10 + movq 392(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 384(%rdi) + sbbq %r10, %r9 + movq 400(%rdx), %r8 + movq 400(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 392(%rdi) + sbbq %r8, %r10 + movq 408(%rdx), %r9 + movq 408(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 400(%rdi) + sbbq %r9, %r8 + movq 416(%rdx), %r10 + movq 416(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 408(%rdi) + sbbq %r10, %r9 + movq 424(%rdx), %r8 + movq 424(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 416(%rdi) + sbbq %r8, %r10 + movq 432(%rdx), %r9 + movq 432(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 424(%rdi) + sbbq %r9, %r8 + movq 440(%rdx), %r10 + movq 440(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 432(%rdi) + sbbq %r10, %r9 + movq 448(%rdx), %r8 + movq 448(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 440(%rdi) + sbbq %r8, %r10 + movq 456(%rdx), %r9 + movq 456(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 448(%rdi) + sbbq %r9, %r8 + movq 464(%rdx), %r10 + movq 464(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 456(%rdi) + sbbq %r10, %r9 + movq 472(%rdx), %r8 + movq 472(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 464(%rdi) + sbbq %r8, %r10 + movq 480(%rdx), %r9 + movq 480(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 472(%rdi) + sbbq %r9, %r8 + movq 488(%rdx), %r10 + movq 488(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 480(%rdi) + sbbq %r10, %r9 + movq 496(%rdx), %r8 + movq 496(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 488(%rdi) + sbbq %r8, %r10 + movq 504(%rdx), %r9 + movq 504(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 496(%rdi) + sbbq %r9, %r8 + movq %r8, 504(%rdi) + sbbq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_4096_cond_sub_avx2_64,.-sp_4096_cond_sub_avx2_64 +#endif /* __APPLE__ */ +#ifdef HAVE_INTEL_AVX2 +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +#ifndef __APPLE__ +.globl sp_4096_mul_d_avx2_64 +.type sp_4096_mul_d_avx2_64,@function +.align 16 +sp_4096_mul_d_avx2_64: +#else +.globl _sp_4096_mul_d_avx2_64 +.p2align 4 +_sp_4096_mul_d_avx2_64: +#endif /* __APPLE__ */ + movq %rdx, %rax + # A[0] * B + movq %rax, %rdx + xorq %r11, %r11 + mulxq (%rsi), %r9, %r10 + movq %r9, (%rdi) + # A[1] * B + mulxq 8(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 8(%rdi) + adoxq %r8, %r9 + # A[2] * B + mulxq 16(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 16(%rdi) + adoxq %r8, %r10 + # A[3] * B + mulxq 24(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 24(%rdi) + adoxq %r8, %r9 + # A[4] * B + mulxq 32(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 32(%rdi) + adoxq %r8, %r10 + # A[5] * B + mulxq 40(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 40(%rdi) + adoxq %r8, %r9 + # A[6] * B + mulxq 48(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 48(%rdi) + adoxq %r8, %r10 + # A[7] * B + mulxq 56(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 56(%rdi) + adoxq %r8, %r9 + # A[8] * B + mulxq 64(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 64(%rdi) + adoxq %r8, %r10 + # A[9] * B + mulxq 72(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 72(%rdi) + adoxq %r8, %r9 + # A[10] * B + mulxq 80(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 80(%rdi) + adoxq %r8, %r10 + # A[11] * B + mulxq 88(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 88(%rdi) + adoxq %r8, %r9 + # A[12] * B + mulxq 96(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 96(%rdi) + adoxq %r8, %r10 + # A[13] * B + mulxq 104(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 104(%rdi) + adoxq %r8, %r9 + # A[14] * B + mulxq 112(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 112(%rdi) + adoxq %r8, %r10 + # A[15] * B + mulxq 120(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 120(%rdi) + adoxq %r8, %r9 + # A[16] * B + mulxq 128(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 128(%rdi) + adoxq %r8, %r10 + # A[17] * B + mulxq 136(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 136(%rdi) + adoxq %r8, %r9 + # A[18] * B + mulxq 144(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 144(%rdi) + adoxq %r8, %r10 + # A[19] * B + mulxq 152(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 152(%rdi) + adoxq %r8, %r9 + # A[20] * B + mulxq 160(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 160(%rdi) + adoxq %r8, %r10 + # A[21] * B + mulxq 168(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 168(%rdi) + adoxq %r8, %r9 + # A[22] * B + mulxq 176(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 176(%rdi) + adoxq %r8, %r10 + # A[23] * B + mulxq 184(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 184(%rdi) + adoxq %r8, %r9 + # A[24] * B + mulxq 192(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 192(%rdi) + adoxq %r8, %r10 + # A[25] * B + mulxq 200(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 200(%rdi) + adoxq %r8, %r9 + # A[26] * B + mulxq 208(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 208(%rdi) + adoxq %r8, %r10 + # A[27] * B + mulxq 216(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 216(%rdi) + adoxq %r8, %r9 + # A[28] * B + mulxq 224(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 224(%rdi) + adoxq %r8, %r10 + # A[29] * B + mulxq 232(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 232(%rdi) + adoxq %r8, %r9 + # A[30] * B + mulxq 240(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 240(%rdi) + adoxq %r8, %r10 + # A[31] * B + mulxq 248(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 248(%rdi) + adoxq %r8, %r9 + # A[32] * B + mulxq 256(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 256(%rdi) + adoxq %r8, %r10 + # A[33] * B + mulxq 264(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 264(%rdi) + adoxq %r8, %r9 + # A[34] * B + mulxq 272(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 272(%rdi) + adoxq %r8, %r10 + # A[35] * B + mulxq 280(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 280(%rdi) + adoxq %r8, %r9 + # A[36] * B + mulxq 288(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 288(%rdi) + adoxq %r8, %r10 + # A[37] * B + mulxq 296(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 296(%rdi) + adoxq %r8, %r9 + # A[38] * B + mulxq 304(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 304(%rdi) + adoxq %r8, %r10 + # A[39] * B + mulxq 312(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 312(%rdi) + adoxq %r8, %r9 + # A[40] * B + mulxq 320(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 320(%rdi) + adoxq %r8, %r10 + # A[41] * B + mulxq 328(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 328(%rdi) + adoxq %r8, %r9 + # A[42] * B + mulxq 336(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 336(%rdi) + adoxq %r8, %r10 + # A[43] * B + mulxq 344(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 344(%rdi) + adoxq %r8, %r9 + # A[44] * B + mulxq 352(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 352(%rdi) + adoxq %r8, %r10 + # A[45] * B + mulxq 360(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 360(%rdi) + adoxq %r8, %r9 + # A[46] * B + mulxq 368(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 368(%rdi) + adoxq %r8, %r10 + # A[47] * B + mulxq 376(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 376(%rdi) + adoxq %r8, %r9 + # A[48] * B + mulxq 384(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 384(%rdi) + adoxq %r8, %r10 + # A[49] * B + mulxq 392(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 392(%rdi) + adoxq %r8, %r9 + # A[50] * B + mulxq 400(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 400(%rdi) + adoxq %r8, %r10 + # A[51] * B + mulxq 408(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 408(%rdi) + adoxq %r8, %r9 + # A[52] * B + mulxq 416(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 416(%rdi) + adoxq %r8, %r10 + # A[53] * B + mulxq 424(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 424(%rdi) + adoxq %r8, %r9 + # A[54] * B + mulxq 432(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 432(%rdi) + adoxq %r8, %r10 + # A[55] * B + mulxq 440(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 440(%rdi) + adoxq %r8, %r9 + # A[56] * B + mulxq 448(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 448(%rdi) + adoxq %r8, %r10 + # A[57] * B + mulxq 456(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 456(%rdi) + adoxq %r8, %r9 + # A[58] * B + mulxq 464(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 464(%rdi) + adoxq %r8, %r10 + # A[59] * B + mulxq 472(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 472(%rdi) + adoxq %r8, %r9 + # A[60] * B + mulxq 480(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 480(%rdi) + adoxq %r8, %r10 + # A[61] * B + mulxq 488(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 488(%rdi) + adoxq %r8, %r9 + # A[62] * B + mulxq 496(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 496(%rdi) + adoxq %r8, %r10 + # A[63] * B + mulxq 504(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + adoxq %r8, %r9 + adcxq %r11, %r9 + movq %r10, 504(%rdi) + movq %r9, 512(%rdi) + repz retq +#ifndef __APPLE__ +.size sp_4096_mul_d_avx2_64,.-sp_4096_mul_d_avx2_64 +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX2 */ +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +#ifndef __APPLE__ +.globl sp_4096_cmp_64 +.type sp_4096_cmp_64,@function +.align 16 +sp_4096_cmp_64: +#else +.globl _sp_4096_cmp_64 +.p2align 4 +_sp_4096_cmp_64: +#endif /* __APPLE__ */ + xorq %rcx, %rcx + movq $-1, %rdx + movq $-1, %rax + movq $1, %r8 + movq 504(%rdi), %r9 + movq 504(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 496(%rdi), %r9 + movq 496(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 488(%rdi), %r9 + movq 488(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 480(%rdi), %r9 + movq 480(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 472(%rdi), %r9 + movq 472(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 464(%rdi), %r9 + movq 464(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 456(%rdi), %r9 + movq 456(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 448(%rdi), %r9 + movq 448(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 440(%rdi), %r9 + movq 440(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 432(%rdi), %r9 + movq 432(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 424(%rdi), %r9 + movq 424(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 416(%rdi), %r9 + movq 416(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 408(%rdi), %r9 + movq 408(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 400(%rdi), %r9 + movq 400(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 392(%rdi), %r9 + movq 392(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 384(%rdi), %r9 + movq 384(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 376(%rdi), %r9 + movq 376(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 368(%rdi), %r9 + movq 368(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 360(%rdi), %r9 + movq 360(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 352(%rdi), %r9 + movq 352(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 344(%rdi), %r9 + movq 344(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 336(%rdi), %r9 + movq 336(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 328(%rdi), %r9 + movq 328(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 320(%rdi), %r9 + movq 320(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 312(%rdi), %r9 + movq 312(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 304(%rdi), %r9 + movq 304(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 296(%rdi), %r9 + movq 296(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 288(%rdi), %r9 + movq 288(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 280(%rdi), %r9 + movq 280(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 272(%rdi), %r9 + movq 272(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 264(%rdi), %r9 + movq 264(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 256(%rdi), %r9 + movq 256(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 248(%rdi), %r9 + movq 248(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 240(%rdi), %r9 + movq 240(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 232(%rdi), %r9 + movq 232(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 224(%rdi), %r9 + movq 224(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 216(%rdi), %r9 + movq 216(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 208(%rdi), %r9 + movq 208(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 200(%rdi), %r9 + movq 200(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 192(%rdi), %r9 + movq 192(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 184(%rdi), %r9 + movq 184(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 176(%rdi), %r9 + movq 176(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 168(%rdi), %r9 + movq 168(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 160(%rdi), %r9 + movq 160(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 152(%rdi), %r9 + movq 152(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 144(%rdi), %r9 + movq 144(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 136(%rdi), %r9 + movq 136(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 128(%rdi), %r9 + movq 128(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 120(%rdi), %r9 + movq 120(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 112(%rdi), %r9 + movq 112(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 104(%rdi), %r9 + movq 104(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 96(%rdi), %r9 + movq 96(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 88(%rdi), %r9 + movq 88(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 80(%rdi), %r9 + movq 80(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 72(%rdi), %r9 + movq 72(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 64(%rdi), %r9 + movq 64(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 56(%rdi), %r9 + movq 56(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 48(%rdi), %r9 + movq 48(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 40(%rdi), %r9 + movq 40(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 32(%rdi), %r9 + movq 32(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 24(%rdi), %r9 + movq 24(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 16(%rdi), %r9 + movq 16(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 8(%rdi), %r9 + movq 8(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq (%rdi), %r9 + movq (%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + xorq %rdx, %rax + repz retq +#ifndef __APPLE__ +.size sp_4096_cmp_64,.-sp_4096_cmp_64 +#endif /* __APPLE__ */ +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_4096_sub_64 +.type sp_4096_sub_64,@function +.align 16 +sp_4096_sub_64: +#else +.globl _sp_4096_sub_64 +.p2align 4 +_sp_4096_sub_64: +#endif /* __APPLE__ */ + movq (%rsi), %rcx + xorq %rax, %rax + subq (%rdx), %rcx + movq 8(%rsi), %r8 + movq %rcx, (%rdi) + sbbq 8(%rdx), %r8 + movq 16(%rsi), %rcx + movq %r8, 8(%rdi) + sbbq 16(%rdx), %rcx + movq 24(%rsi), %r8 + movq %rcx, 16(%rdi) + sbbq 24(%rdx), %r8 + movq 32(%rsi), %rcx + movq %r8, 24(%rdi) + sbbq 32(%rdx), %rcx + movq 40(%rsi), %r8 + movq %rcx, 32(%rdi) + sbbq 40(%rdx), %r8 + movq 48(%rsi), %rcx + movq %r8, 40(%rdi) + sbbq 48(%rdx), %rcx + movq 56(%rsi), %r8 + movq %rcx, 48(%rdi) + sbbq 56(%rdx), %r8 + movq 64(%rsi), %rcx + movq %r8, 56(%rdi) + sbbq 64(%rdx), %rcx + movq 72(%rsi), %r8 + movq %rcx, 64(%rdi) + sbbq 72(%rdx), %r8 + movq 80(%rsi), %rcx + movq %r8, 72(%rdi) + sbbq 80(%rdx), %rcx + movq 88(%rsi), %r8 + movq %rcx, 80(%rdi) + sbbq 88(%rdx), %r8 + movq 96(%rsi), %rcx + movq %r8, 88(%rdi) + sbbq 96(%rdx), %rcx + movq 104(%rsi), %r8 + movq %rcx, 96(%rdi) + sbbq 104(%rdx), %r8 + movq 112(%rsi), %rcx + movq %r8, 104(%rdi) + sbbq 112(%rdx), %rcx + movq 120(%rsi), %r8 + movq %rcx, 112(%rdi) + sbbq 120(%rdx), %r8 + movq 128(%rsi), %rcx + movq %r8, 120(%rdi) + sbbq 128(%rdx), %rcx + movq 136(%rsi), %r8 + movq %rcx, 128(%rdi) + sbbq 136(%rdx), %r8 + movq 144(%rsi), %rcx + movq %r8, 136(%rdi) + sbbq 144(%rdx), %rcx + movq 152(%rsi), %r8 + movq %rcx, 144(%rdi) + sbbq 152(%rdx), %r8 + movq 160(%rsi), %rcx + movq %r8, 152(%rdi) + sbbq 160(%rdx), %rcx + movq 168(%rsi), %r8 + movq %rcx, 160(%rdi) + sbbq 168(%rdx), %r8 + movq 176(%rsi), %rcx + movq %r8, 168(%rdi) + sbbq 176(%rdx), %rcx + movq 184(%rsi), %r8 + movq %rcx, 176(%rdi) + sbbq 184(%rdx), %r8 + movq 192(%rsi), %rcx + movq %r8, 184(%rdi) + sbbq 192(%rdx), %rcx + movq 200(%rsi), %r8 + movq %rcx, 192(%rdi) + sbbq 200(%rdx), %r8 + movq 208(%rsi), %rcx + movq %r8, 200(%rdi) + sbbq 208(%rdx), %rcx + movq 216(%rsi), %r8 + movq %rcx, 208(%rdi) + sbbq 216(%rdx), %r8 + movq 224(%rsi), %rcx + movq %r8, 216(%rdi) + sbbq 224(%rdx), %rcx + movq 232(%rsi), %r8 + movq %rcx, 224(%rdi) + sbbq 232(%rdx), %r8 + movq 240(%rsi), %rcx + movq %r8, 232(%rdi) + sbbq 240(%rdx), %rcx + movq 248(%rsi), %r8 + movq %rcx, 240(%rdi) + sbbq 248(%rdx), %r8 + movq 256(%rsi), %rcx + movq %r8, 248(%rdi) + sbbq 256(%rdx), %rcx + movq 264(%rsi), %r8 + movq %rcx, 256(%rdi) + sbbq 264(%rdx), %r8 + movq 272(%rsi), %rcx + movq %r8, 264(%rdi) + sbbq 272(%rdx), %rcx + movq 280(%rsi), %r8 + movq %rcx, 272(%rdi) + sbbq 280(%rdx), %r8 + movq 288(%rsi), %rcx + movq %r8, 280(%rdi) + sbbq 288(%rdx), %rcx + movq 296(%rsi), %r8 + movq %rcx, 288(%rdi) + sbbq 296(%rdx), %r8 + movq 304(%rsi), %rcx + movq %r8, 296(%rdi) + sbbq 304(%rdx), %rcx + movq 312(%rsi), %r8 + movq %rcx, 304(%rdi) + sbbq 312(%rdx), %r8 + movq 320(%rsi), %rcx + movq %r8, 312(%rdi) + sbbq 320(%rdx), %rcx + movq 328(%rsi), %r8 + movq %rcx, 320(%rdi) + sbbq 328(%rdx), %r8 + movq 336(%rsi), %rcx + movq %r8, 328(%rdi) + sbbq 336(%rdx), %rcx + movq 344(%rsi), %r8 + movq %rcx, 336(%rdi) + sbbq 344(%rdx), %r8 + movq 352(%rsi), %rcx + movq %r8, 344(%rdi) + sbbq 352(%rdx), %rcx + movq 360(%rsi), %r8 + movq %rcx, 352(%rdi) + sbbq 360(%rdx), %r8 + movq 368(%rsi), %rcx + movq %r8, 360(%rdi) + sbbq 368(%rdx), %rcx + movq 376(%rsi), %r8 + movq %rcx, 368(%rdi) + sbbq 376(%rdx), %r8 + movq 384(%rsi), %rcx + movq %r8, 376(%rdi) + sbbq 384(%rdx), %rcx + movq 392(%rsi), %r8 + movq %rcx, 384(%rdi) + sbbq 392(%rdx), %r8 + movq 400(%rsi), %rcx + movq %r8, 392(%rdi) + sbbq 400(%rdx), %rcx + movq 408(%rsi), %r8 + movq %rcx, 400(%rdi) + sbbq 408(%rdx), %r8 + movq 416(%rsi), %rcx + movq %r8, 408(%rdi) + sbbq 416(%rdx), %rcx + movq 424(%rsi), %r8 + movq %rcx, 416(%rdi) + sbbq 424(%rdx), %r8 + movq 432(%rsi), %rcx + movq %r8, 424(%rdi) + sbbq 432(%rdx), %rcx + movq 440(%rsi), %r8 + movq %rcx, 432(%rdi) + sbbq 440(%rdx), %r8 + movq 448(%rsi), %rcx + movq %r8, 440(%rdi) + sbbq 448(%rdx), %rcx + movq 456(%rsi), %r8 + movq %rcx, 448(%rdi) + sbbq 456(%rdx), %r8 + movq 464(%rsi), %rcx + movq %r8, 456(%rdi) + sbbq 464(%rdx), %rcx + movq 472(%rsi), %r8 + movq %rcx, 464(%rdi) + sbbq 472(%rdx), %r8 + movq 480(%rsi), %rcx + movq %r8, 472(%rdi) + sbbq 480(%rdx), %rcx + movq 488(%rsi), %r8 + movq %rcx, 480(%rdi) + sbbq 488(%rdx), %r8 + movq 496(%rsi), %rcx + movq %r8, 488(%rdi) + sbbq 496(%rdx), %rcx + movq 504(%rsi), %r8 + movq %rcx, 496(%rdi) + sbbq 504(%rdx), %r8 + movq %r8, 504(%rdi) + sbbq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_4096_sub_64,.-sp_4096_sub_64 +#endif /* __APPLE__ */ +#ifdef HAVE_INTEL_AVX2 +/* Reduce the number back to 4096 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +#ifndef __APPLE__ +.globl sp_4096_mont_reduce_avx2_64 +.type sp_4096_mont_reduce_avx2_64,@function +.align 16 +sp_4096_mont_reduce_avx2_64: +#else +.globl _sp_4096_mont_reduce_avx2_64 +.p2align 4 +_sp_4096_mont_reduce_avx2_64: +#endif /* __APPLE__ */ + push %r12 + push %r13 + push %r14 + movq %rdx, %r8 + xorq %r14, %r14 + # i = 64 + movq $64, %r9 + movq (%rdi), %r13 + addq $256, %rdi + xorq %r12, %r12 +L_mont_loop_avx2_64: + # mu = a[i] * mp + movq %r13, %rdx + movq %r13, %r10 + imulq %r8, %rdx + xorq %r12, %r12 + # a[i+0] += m[0] * mu + mulxq (%rsi), %rax, %rcx + movq -248(%rdi), %r13 + adcxq %rax, %r10 + adoxq %rcx, %r13 + # a[i+1] += m[1] * mu + mulxq 8(%rsi), %rax, %rcx + movq -240(%rdi), %r10 + adcxq %rax, %r13 + adoxq %rcx, %r10 + # a[i+2] += m[2] * mu + mulxq 16(%rsi), %rax, %rcx + movq -232(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -240(%rdi) + # a[i+3] += m[3] * mu + mulxq 24(%rsi), %rax, %rcx + movq -224(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -232(%rdi) + # a[i+4] += m[4] * mu + mulxq 32(%rsi), %rax, %rcx + movq -216(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -224(%rdi) + # a[i+5] += m[5] * mu + mulxq 40(%rsi), %rax, %rcx + movq -208(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -216(%rdi) + # a[i+6] += m[6] * mu + mulxq 48(%rsi), %rax, %rcx + movq -200(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -208(%rdi) + # a[i+7] += m[7] * mu + mulxq 56(%rsi), %rax, %rcx + movq -192(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -200(%rdi) + # a[i+8] += m[8] * mu + mulxq 64(%rsi), %rax, %rcx + movq -184(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -192(%rdi) + # a[i+9] += m[9] * mu + mulxq 72(%rsi), %rax, %rcx + movq -176(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -184(%rdi) + # a[i+10] += m[10] * mu + mulxq 80(%rsi), %rax, %rcx + movq -168(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -176(%rdi) + # a[i+11] += m[11] * mu + mulxq 88(%rsi), %rax, %rcx + movq -160(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -168(%rdi) + # a[i+12] += m[12] * mu + mulxq 96(%rsi), %rax, %rcx + movq -152(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -160(%rdi) + # a[i+13] += m[13] * mu + mulxq 104(%rsi), %rax, %rcx + movq -144(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -152(%rdi) + # a[i+14] += m[14] * mu + mulxq 112(%rsi), %rax, %rcx + movq -136(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -144(%rdi) + # a[i+15] += m[15] * mu + mulxq 120(%rsi), %rax, %rcx + movq -128(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -136(%rdi) + # a[i+16] += m[16] * mu + mulxq 128(%rsi), %rax, %rcx + movq -120(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -128(%rdi) + # a[i+17] += m[17] * mu + mulxq 136(%rsi), %rax, %rcx + movq -112(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -120(%rdi) + # a[i+18] += m[18] * mu + mulxq 144(%rsi), %rax, %rcx + movq -104(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -112(%rdi) + # a[i+19] += m[19] * mu + mulxq 152(%rsi), %rax, %rcx + movq -96(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -104(%rdi) + # a[i+20] += m[20] * mu + mulxq 160(%rsi), %rax, %rcx + movq -88(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -96(%rdi) + # a[i+21] += m[21] * mu + mulxq 168(%rsi), %rax, %rcx + movq -80(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -88(%rdi) + # a[i+22] += m[22] * mu + mulxq 176(%rsi), %rax, %rcx + movq -72(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -80(%rdi) + # a[i+23] += m[23] * mu + mulxq 184(%rsi), %rax, %rcx + movq -64(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -72(%rdi) + # a[i+24] += m[24] * mu + mulxq 192(%rsi), %rax, %rcx + movq -56(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -64(%rdi) + # a[i+25] += m[25] * mu + mulxq 200(%rsi), %rax, %rcx + movq -48(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -56(%rdi) + # a[i+26] += m[26] * mu + mulxq 208(%rsi), %rax, %rcx + movq -40(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -48(%rdi) + # a[i+27] += m[27] * mu + mulxq 216(%rsi), %rax, %rcx + movq -32(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -40(%rdi) + # a[i+28] += m[28] * mu + mulxq 224(%rsi), %rax, %rcx + movq -24(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -32(%rdi) + # a[i+29] += m[29] * mu + mulxq 232(%rsi), %rax, %rcx + movq -16(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -24(%rdi) + # a[i+30] += m[30] * mu + mulxq 240(%rsi), %rax, %rcx + movq -8(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, -16(%rdi) + # a[i+31] += m[31] * mu + mulxq 248(%rsi), %rax, %rcx + movq (%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, -8(%rdi) + # a[i+32] += m[32] * mu + mulxq 256(%rsi), %rax, %rcx + movq 8(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, (%rdi) + # a[i+33] += m[33] * mu + mulxq 264(%rsi), %rax, %rcx + movq 16(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 8(%rdi) + # a[i+34] += m[34] * mu + mulxq 272(%rsi), %rax, %rcx + movq 24(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 16(%rdi) + # a[i+35] += m[35] * mu + mulxq 280(%rsi), %rax, %rcx + movq 32(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 24(%rdi) + # a[i+36] += m[36] * mu + mulxq 288(%rsi), %rax, %rcx + movq 40(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 32(%rdi) + # a[i+37] += m[37] * mu + mulxq 296(%rsi), %rax, %rcx + movq 48(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 40(%rdi) + # a[i+38] += m[38] * mu + mulxq 304(%rsi), %rax, %rcx + movq 56(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 48(%rdi) + # a[i+39] += m[39] * mu + mulxq 312(%rsi), %rax, %rcx + movq 64(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 56(%rdi) + # a[i+40] += m[40] * mu + mulxq 320(%rsi), %rax, %rcx + movq 72(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 64(%rdi) + # a[i+41] += m[41] * mu + mulxq 328(%rsi), %rax, %rcx + movq 80(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 72(%rdi) + # a[i+42] += m[42] * mu + mulxq 336(%rsi), %rax, %rcx + movq 88(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 80(%rdi) + # a[i+43] += m[43] * mu + mulxq 344(%rsi), %rax, %rcx + movq 96(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 88(%rdi) + # a[i+44] += m[44] * mu + mulxq 352(%rsi), %rax, %rcx + movq 104(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 96(%rdi) + # a[i+45] += m[45] * mu + mulxq 360(%rsi), %rax, %rcx + movq 112(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 104(%rdi) + # a[i+46] += m[46] * mu + mulxq 368(%rsi), %rax, %rcx + movq 120(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 112(%rdi) + # a[i+47] += m[47] * mu + mulxq 376(%rsi), %rax, %rcx + movq 128(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 120(%rdi) + # a[i+48] += m[48] * mu + mulxq 384(%rsi), %rax, %rcx + movq 136(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 128(%rdi) + # a[i+49] += m[49] * mu + mulxq 392(%rsi), %rax, %rcx + movq 144(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 136(%rdi) + # a[i+50] += m[50] * mu + mulxq 400(%rsi), %rax, %rcx + movq 152(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 144(%rdi) + # a[i+51] += m[51] * mu + mulxq 408(%rsi), %rax, %rcx + movq 160(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 152(%rdi) + # a[i+52] += m[52] * mu + mulxq 416(%rsi), %rax, %rcx + movq 168(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 160(%rdi) + # a[i+53] += m[53] * mu + mulxq 424(%rsi), %rax, %rcx + movq 176(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 168(%rdi) + # a[i+54] += m[54] * mu + mulxq 432(%rsi), %rax, %rcx + movq 184(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 176(%rdi) + # a[i+55] += m[55] * mu + mulxq 440(%rsi), %rax, %rcx + movq 192(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 184(%rdi) + # a[i+56] += m[56] * mu + mulxq 448(%rsi), %rax, %rcx + movq 200(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 192(%rdi) + # a[i+57] += m[57] * mu + mulxq 456(%rsi), %rax, %rcx + movq 208(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 200(%rdi) + # a[i+58] += m[58] * mu + mulxq 464(%rsi), %rax, %rcx + movq 216(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 208(%rdi) + # a[i+59] += m[59] * mu + mulxq 472(%rsi), %rax, %rcx + movq 224(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 216(%rdi) + # a[i+60] += m[60] * mu + mulxq 480(%rsi), %rax, %rcx + movq 232(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 224(%rdi) + # a[i+61] += m[61] * mu + mulxq 488(%rsi), %rax, %rcx + movq 240(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 232(%rdi) + # a[i+62] += m[62] * mu + mulxq 496(%rsi), %rax, %rcx + movq 248(%rdi), %r11 + adcxq %rax, %r10 + adoxq %rcx, %r11 + movq %r10, 240(%rdi) + # a[i+63] += m[63] * mu + mulxq 504(%rsi), %rax, %rcx + movq 256(%rdi), %r10 + adcxq %rax, %r11 + adoxq %rcx, %r10 + movq %r11, 248(%rdi) + adcxq %r14, %r10 + movq %r10, 256(%rdi) + movq %r12, %r14 + adoxq %r12, %r14 + adcxq %r12, %r14 + # a += 1 + addq $8, %rdi + # i -= 1 + subq $1, %r9 + jnz L_mont_loop_avx2_64 + subq $256, %rdi + negq %r14 + movq %rdi, %r8 + subq $512, %rdi + movq (%rsi), %rcx + movq %r13, %rdx + pextq %r14, %rcx, %rcx + subq %rcx, %rdx + movq 8(%rsi), %rcx + movq 8(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, (%rdi) + sbbq %rcx, %rax + movq 16(%rsi), %rdx + movq 16(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 8(%rdi) + sbbq %rdx, %rcx + movq 24(%rsi), %rax + movq 24(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 16(%rdi) + sbbq %rax, %rdx + movq 32(%rsi), %rcx + movq 32(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 24(%rdi) + sbbq %rcx, %rax + movq 40(%rsi), %rdx + movq 40(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 32(%rdi) + sbbq %rdx, %rcx + movq 48(%rsi), %rax + movq 48(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 40(%rdi) + sbbq %rax, %rdx + movq 56(%rsi), %rcx + movq 56(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 48(%rdi) + sbbq %rcx, %rax + movq 64(%rsi), %rdx + movq 64(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 56(%rdi) + sbbq %rdx, %rcx + movq 72(%rsi), %rax + movq 72(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 64(%rdi) + sbbq %rax, %rdx + movq 80(%rsi), %rcx + movq 80(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 72(%rdi) + sbbq %rcx, %rax + movq 88(%rsi), %rdx + movq 88(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 80(%rdi) + sbbq %rdx, %rcx + movq 96(%rsi), %rax + movq 96(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 88(%rdi) + sbbq %rax, %rdx + movq 104(%rsi), %rcx + movq 104(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 96(%rdi) + sbbq %rcx, %rax + movq 112(%rsi), %rdx + movq 112(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 104(%rdi) + sbbq %rdx, %rcx + movq 120(%rsi), %rax + movq 120(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 112(%rdi) + sbbq %rax, %rdx + movq 128(%rsi), %rcx + movq 128(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 120(%rdi) + sbbq %rcx, %rax + movq 136(%rsi), %rdx + movq 136(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 128(%rdi) + sbbq %rdx, %rcx + movq 144(%rsi), %rax + movq 144(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 136(%rdi) + sbbq %rax, %rdx + movq 152(%rsi), %rcx + movq 152(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 144(%rdi) + sbbq %rcx, %rax + movq 160(%rsi), %rdx + movq 160(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 152(%rdi) + sbbq %rdx, %rcx + movq 168(%rsi), %rax + movq 168(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 160(%rdi) + sbbq %rax, %rdx + movq 176(%rsi), %rcx + movq 176(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 168(%rdi) + sbbq %rcx, %rax + movq 184(%rsi), %rdx + movq 184(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 176(%rdi) + sbbq %rdx, %rcx + movq 192(%rsi), %rax + movq 192(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 184(%rdi) + sbbq %rax, %rdx + movq 200(%rsi), %rcx + movq 200(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 192(%rdi) + sbbq %rcx, %rax + movq 208(%rsi), %rdx + movq 208(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 200(%rdi) + sbbq %rdx, %rcx + movq 216(%rsi), %rax + movq 216(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 208(%rdi) + sbbq %rax, %rdx + movq 224(%rsi), %rcx + movq 224(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 216(%rdi) + sbbq %rcx, %rax + movq 232(%rsi), %rdx + movq 232(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 224(%rdi) + sbbq %rdx, %rcx + movq 240(%rsi), %rax + movq 240(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 232(%rdi) + sbbq %rax, %rdx + movq 248(%rsi), %rcx + movq 248(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 240(%rdi) + sbbq %rcx, %rax + movq 256(%rsi), %rdx + movq 256(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 248(%rdi) + sbbq %rdx, %rcx + movq 264(%rsi), %rax + movq 264(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 256(%rdi) + sbbq %rax, %rdx + movq 272(%rsi), %rcx + movq 272(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 264(%rdi) + sbbq %rcx, %rax + movq 280(%rsi), %rdx + movq 280(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 272(%rdi) + sbbq %rdx, %rcx + movq 288(%rsi), %rax + movq 288(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 280(%rdi) + sbbq %rax, %rdx + movq 296(%rsi), %rcx + movq 296(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 288(%rdi) + sbbq %rcx, %rax + movq 304(%rsi), %rdx + movq 304(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 296(%rdi) + sbbq %rdx, %rcx + movq 312(%rsi), %rax + movq 312(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 304(%rdi) + sbbq %rax, %rdx + movq 320(%rsi), %rcx + movq 320(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 312(%rdi) + sbbq %rcx, %rax + movq 328(%rsi), %rdx + movq 328(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 320(%rdi) + sbbq %rdx, %rcx + movq 336(%rsi), %rax + movq 336(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 328(%rdi) + sbbq %rax, %rdx + movq 344(%rsi), %rcx + movq 344(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 336(%rdi) + sbbq %rcx, %rax + movq 352(%rsi), %rdx + movq 352(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 344(%rdi) + sbbq %rdx, %rcx + movq 360(%rsi), %rax + movq 360(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 352(%rdi) + sbbq %rax, %rdx + movq 368(%rsi), %rcx + movq 368(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 360(%rdi) + sbbq %rcx, %rax + movq 376(%rsi), %rdx + movq 376(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 368(%rdi) + sbbq %rdx, %rcx + movq 384(%rsi), %rax + movq 384(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 376(%rdi) + sbbq %rax, %rdx + movq 392(%rsi), %rcx + movq 392(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 384(%rdi) + sbbq %rcx, %rax + movq 400(%rsi), %rdx + movq 400(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 392(%rdi) + sbbq %rdx, %rcx + movq 408(%rsi), %rax + movq 408(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 400(%rdi) + sbbq %rax, %rdx + movq 416(%rsi), %rcx + movq 416(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 408(%rdi) + sbbq %rcx, %rax + movq 424(%rsi), %rdx + movq 424(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 416(%rdi) + sbbq %rdx, %rcx + movq 432(%rsi), %rax + movq 432(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 424(%rdi) + sbbq %rax, %rdx + movq 440(%rsi), %rcx + movq 440(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 432(%rdi) + sbbq %rcx, %rax + movq 448(%rsi), %rdx + movq 448(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 440(%rdi) + sbbq %rdx, %rcx + movq 456(%rsi), %rax + movq 456(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 448(%rdi) + sbbq %rax, %rdx + movq 464(%rsi), %rcx + movq 464(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 456(%rdi) + sbbq %rcx, %rax + movq 472(%rsi), %rdx + movq 472(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 464(%rdi) + sbbq %rdx, %rcx + movq 480(%rsi), %rax + movq 480(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 472(%rdi) + sbbq %rax, %rdx + movq 488(%rsi), %rcx + movq 488(%r8), %rax + pextq %r14, %rcx, %rcx + movq %rdx, 480(%rdi) + sbbq %rcx, %rax + movq 496(%rsi), %rdx + movq 496(%r8), %rcx + pextq %r14, %rdx, %rdx + movq %rax, 488(%rdi) + sbbq %rdx, %rcx + movq 504(%rsi), %rax + movq 504(%r8), %rdx + pextq %r14, %rax, %rax + movq %rcx, 496(%rdi) + sbbq %rax, %rdx + movq %rdx, 504(%rdi) + pop %r14 + pop %r13 + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_4096_mont_reduce_avx2_64,.-sp_4096_mont_reduce_avx2_64 +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX2 */ +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +#ifndef __APPLE__ +.globl sp_4096_cond_add_32 +.type sp_4096_cond_add_32,@function +.align 16 +sp_4096_cond_add_32: +#else +.globl _sp_4096_cond_add_32 +.p2align 4 +_sp_4096_cond_add_32: +#endif /* __APPLE__ */ + subq $256, %rsp + movq $0, %rax + movq (%rdx), %r8 + movq 8(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, (%rsp) + movq %r9, 8(%rsp) + movq 16(%rdx), %r8 + movq 24(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 16(%rsp) + movq %r9, 24(%rsp) + movq 32(%rdx), %r8 + movq 40(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 32(%rsp) + movq %r9, 40(%rsp) + movq 48(%rdx), %r8 + movq 56(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 48(%rsp) + movq %r9, 56(%rsp) + movq 64(%rdx), %r8 + movq 72(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 64(%rsp) + movq %r9, 72(%rsp) + movq 80(%rdx), %r8 + movq 88(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 80(%rsp) + movq %r9, 88(%rsp) + movq 96(%rdx), %r8 + movq 104(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 96(%rsp) + movq %r9, 104(%rsp) + movq 112(%rdx), %r8 + movq 120(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 112(%rsp) + movq %r9, 120(%rsp) + movq 128(%rdx), %r8 + movq 136(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 128(%rsp) + movq %r9, 136(%rsp) + movq 144(%rdx), %r8 + movq 152(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 144(%rsp) + movq %r9, 152(%rsp) + movq 160(%rdx), %r8 + movq 168(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 160(%rsp) + movq %r9, 168(%rsp) + movq 176(%rdx), %r8 + movq 184(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 176(%rsp) + movq %r9, 184(%rsp) + movq 192(%rdx), %r8 + movq 200(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 192(%rsp) + movq %r9, 200(%rsp) + movq 208(%rdx), %r8 + movq 216(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 208(%rsp) + movq %r9, 216(%rsp) + movq 224(%rdx), %r8 + movq 232(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 224(%rsp) + movq %r9, 232(%rsp) + movq 240(%rdx), %r8 + movq 248(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 240(%rsp) + movq %r9, 248(%rsp) + movq (%rsi), %r8 + movq (%rsp), %rdx + addq %rdx, %r8 + movq 8(%rsi), %r9 + movq 8(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, (%rdi) + movq 16(%rsi), %r8 + movq 16(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 8(%rdi) + movq 24(%rsi), %r9 + movq 24(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 16(%rdi) + movq 32(%rsi), %r8 + movq 32(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 24(%rdi) + movq 40(%rsi), %r9 + movq 40(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 32(%rdi) + movq 48(%rsi), %r8 + movq 48(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 40(%rdi) + movq 56(%rsi), %r9 + movq 56(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 48(%rdi) + movq 64(%rsi), %r8 + movq 64(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 56(%rdi) + movq 72(%rsi), %r9 + movq 72(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 64(%rdi) + movq 80(%rsi), %r8 + movq 80(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 72(%rdi) + movq 88(%rsi), %r9 + movq 88(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 80(%rdi) + movq 96(%rsi), %r8 + movq 96(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 88(%rdi) + movq 104(%rsi), %r9 + movq 104(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 96(%rdi) + movq 112(%rsi), %r8 + movq 112(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 104(%rdi) + movq 120(%rsi), %r9 + movq 120(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 112(%rdi) + movq 128(%rsi), %r8 + movq 128(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 120(%rdi) + movq 136(%rsi), %r9 + movq 136(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 128(%rdi) + movq 144(%rsi), %r8 + movq 144(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 136(%rdi) + movq 152(%rsi), %r9 + movq 152(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 144(%rdi) + movq 160(%rsi), %r8 + movq 160(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 152(%rdi) + movq 168(%rsi), %r9 + movq 168(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 160(%rdi) + movq 176(%rsi), %r8 + movq 176(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 168(%rdi) + movq 184(%rsi), %r9 + movq 184(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 176(%rdi) + movq 192(%rsi), %r8 + movq 192(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 184(%rdi) + movq 200(%rsi), %r9 + movq 200(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 192(%rdi) + movq 208(%rsi), %r8 + movq 208(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 200(%rdi) + movq 216(%rsi), %r9 + movq 216(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 208(%rdi) + movq 224(%rsi), %r8 + movq 224(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 216(%rdi) + movq 232(%rsi), %r9 + movq 232(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 224(%rdi) + movq 240(%rsi), %r8 + movq 240(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 232(%rdi) + movq 248(%rsi), %r9 + movq 248(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 240(%rdi) + movq %r9, 248(%rdi) + adcq $0, %rax + addq $256, %rsp + repz retq +#ifndef __APPLE__ +.size sp_4096_cond_add_32,.-sp_4096_cond_add_32 +#endif /* __APPLE__ */ +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +#ifndef __APPLE__ +.globl sp_4096_cond_add_avx2_32 +.type sp_4096_cond_add_avx2_32,@function +.align 16 +sp_4096_cond_add_avx2_32: +#else +.globl _sp_4096_cond_add_avx2_32 +.p2align 4 +_sp_4096_cond_add_avx2_32: +#endif /* __APPLE__ */ + movq $0, %rax + movq (%rdx), %r10 + movq (%rsi), %r8 + pextq %rcx, %r10, %r10 + addq %r10, %r8 + movq 8(%rdx), %r10 + movq 8(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, (%rdi) + adcq %r10, %r9 + movq 16(%rdx), %r8 + movq 16(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 8(%rdi) + adcq %r8, %r10 + movq 24(%rdx), %r9 + movq 24(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 16(%rdi) + adcq %r9, %r8 + movq 32(%rdx), %r10 + movq 32(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 24(%rdi) + adcq %r10, %r9 + movq 40(%rdx), %r8 + movq 40(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 32(%rdi) + adcq %r8, %r10 + movq 48(%rdx), %r9 + movq 48(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 40(%rdi) + adcq %r9, %r8 + movq 56(%rdx), %r10 + movq 56(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 48(%rdi) + adcq %r10, %r9 + movq 64(%rdx), %r8 + movq 64(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 56(%rdi) + adcq %r8, %r10 + movq 72(%rdx), %r9 + movq 72(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 64(%rdi) + adcq %r9, %r8 + movq 80(%rdx), %r10 + movq 80(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 72(%rdi) + adcq %r10, %r9 + movq 88(%rdx), %r8 + movq 88(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 80(%rdi) + adcq %r8, %r10 + movq 96(%rdx), %r9 + movq 96(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 88(%rdi) + adcq %r9, %r8 + movq 104(%rdx), %r10 + movq 104(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 96(%rdi) + adcq %r10, %r9 + movq 112(%rdx), %r8 + movq 112(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 104(%rdi) + adcq %r8, %r10 + movq 120(%rdx), %r9 + movq 120(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 112(%rdi) + adcq %r9, %r8 + movq 128(%rdx), %r10 + movq 128(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 120(%rdi) + adcq %r10, %r9 + movq 136(%rdx), %r8 + movq 136(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 128(%rdi) + adcq %r8, %r10 + movq 144(%rdx), %r9 + movq 144(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 136(%rdi) + adcq %r9, %r8 + movq 152(%rdx), %r10 + movq 152(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 144(%rdi) + adcq %r10, %r9 + movq 160(%rdx), %r8 + movq 160(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 152(%rdi) + adcq %r8, %r10 + movq 168(%rdx), %r9 + movq 168(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 160(%rdi) + adcq %r9, %r8 + movq 176(%rdx), %r10 + movq 176(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 168(%rdi) + adcq %r10, %r9 + movq 184(%rdx), %r8 + movq 184(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 176(%rdi) + adcq %r8, %r10 + movq 192(%rdx), %r9 + movq 192(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 184(%rdi) + adcq %r9, %r8 + movq 200(%rdx), %r10 + movq 200(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 192(%rdi) + adcq %r10, %r9 + movq 208(%rdx), %r8 + movq 208(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 200(%rdi) + adcq %r8, %r10 + movq 216(%rdx), %r9 + movq 216(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 208(%rdi) + adcq %r9, %r8 + movq 224(%rdx), %r10 + movq 224(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 216(%rdi) + adcq %r10, %r9 + movq 232(%rdx), %r8 + movq 232(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 224(%rdi) + adcq %r8, %r10 + movq 240(%rdx), %r9 + movq 240(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 232(%rdi) + adcq %r9, %r8 + movq 248(%rdx), %r10 + movq 248(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 240(%rdi) + adcq %r10, %r9 + movq %r9, 248(%rdi) + adcq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_4096_cond_add_avx2_32,.-sp_4096_cond_add_avx2_32 +#endif /* __APPLE__ */ +/* Shift number left by n bit. (r = a << n) + * + * r Result of left shift by n. + * a Number to shift. + * n Amoutnt o shift. + */ +#ifndef __APPLE__ +.globl sp_4096_lshift_64 +.type sp_4096_lshift_64,@function +.align 16 +sp_4096_lshift_64: +#else +.globl _sp_4096_lshift_64 +.p2align 4 +_sp_4096_lshift_64: +#endif /* __APPLE__ */ + movq %rdx, %rcx + movq $0, %r10 + movq 472(%rsi), %r11 + movq 480(%rsi), %rdx + movq 488(%rsi), %rax + movq 496(%rsi), %r8 + movq 504(%rsi), %r9 + shldq %cl, %r9, %r10 + shldq %cl, %r8, %r9 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r11, %rdx + movq %rdx, 480(%rdi) + movq %rax, 488(%rdi) + movq %r8, 496(%rdi) + movq %r9, 504(%rdi) + movq %r10, 512(%rdi) + movq 440(%rsi), %r9 + movq 448(%rsi), %rdx + movq 456(%rsi), %rax + movq 464(%rsi), %r8 + shldq %cl, %r8, %r11 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r9, %rdx + movq %rdx, 448(%rdi) + movq %rax, 456(%rdi) + movq %r8, 464(%rdi) + movq %r11, 472(%rdi) + movq 408(%rsi), %r11 + movq 416(%rsi), %rdx + movq 424(%rsi), %rax + movq 432(%rsi), %r8 + shldq %cl, %r8, %r9 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r11, %rdx + movq %rdx, 416(%rdi) + movq %rax, 424(%rdi) + movq %r8, 432(%rdi) + movq %r9, 440(%rdi) + movq 376(%rsi), %r9 + movq 384(%rsi), %rdx + movq 392(%rsi), %rax + movq 400(%rsi), %r8 + shldq %cl, %r8, %r11 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r9, %rdx + movq %rdx, 384(%rdi) + movq %rax, 392(%rdi) + movq %r8, 400(%rdi) + movq %r11, 408(%rdi) + movq 344(%rsi), %r11 + movq 352(%rsi), %rdx + movq 360(%rsi), %rax + movq 368(%rsi), %r8 + shldq %cl, %r8, %r9 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r11, %rdx + movq %rdx, 352(%rdi) + movq %rax, 360(%rdi) + movq %r8, 368(%rdi) + movq %r9, 376(%rdi) + movq 312(%rsi), %r9 + movq 320(%rsi), %rdx + movq 328(%rsi), %rax + movq 336(%rsi), %r8 + shldq %cl, %r8, %r11 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r9, %rdx + movq %rdx, 320(%rdi) + movq %rax, 328(%rdi) + movq %r8, 336(%rdi) + movq %r11, 344(%rdi) + movq 280(%rsi), %r11 + movq 288(%rsi), %rdx + movq 296(%rsi), %rax + movq 304(%rsi), %r8 + shldq %cl, %r8, %r9 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r11, %rdx + movq %rdx, 288(%rdi) + movq %rax, 296(%rdi) + movq %r8, 304(%rdi) + movq %r9, 312(%rdi) + movq 248(%rsi), %r9 + movq 256(%rsi), %rdx + movq 264(%rsi), %rax + movq 272(%rsi), %r8 + shldq %cl, %r8, %r11 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r9, %rdx + movq %rdx, 256(%rdi) + movq %rax, 264(%rdi) + movq %r8, 272(%rdi) + movq %r11, 280(%rdi) + movq 216(%rsi), %r11 + movq 224(%rsi), %rdx + movq 232(%rsi), %rax + movq 240(%rsi), %r8 + shldq %cl, %r8, %r9 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r11, %rdx + movq %rdx, 224(%rdi) + movq %rax, 232(%rdi) + movq %r8, 240(%rdi) + movq %r9, 248(%rdi) + movq 184(%rsi), %r9 + movq 192(%rsi), %rdx + movq 200(%rsi), %rax + movq 208(%rsi), %r8 + shldq %cl, %r8, %r11 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r9, %rdx + movq %rdx, 192(%rdi) + movq %rax, 200(%rdi) + movq %r8, 208(%rdi) + movq %r11, 216(%rdi) + movq 152(%rsi), %r11 + movq 160(%rsi), %rdx + movq 168(%rsi), %rax + movq 176(%rsi), %r8 + shldq %cl, %r8, %r9 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r11, %rdx + movq %rdx, 160(%rdi) + movq %rax, 168(%rdi) + movq %r8, 176(%rdi) + movq %r9, 184(%rdi) + movq 120(%rsi), %r9 + movq 128(%rsi), %rdx + movq 136(%rsi), %rax + movq 144(%rsi), %r8 + shldq %cl, %r8, %r11 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r9, %rdx + movq %rdx, 128(%rdi) + movq %rax, 136(%rdi) + movq %r8, 144(%rdi) + movq %r11, 152(%rdi) + movq 88(%rsi), %r11 + movq 96(%rsi), %rdx + movq 104(%rsi), %rax + movq 112(%rsi), %r8 + shldq %cl, %r8, %r9 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r11, %rdx + movq %rdx, 96(%rdi) + movq %rax, 104(%rdi) + movq %r8, 112(%rdi) + movq %r9, 120(%rdi) + movq 56(%rsi), %r9 + movq 64(%rsi), %rdx + movq 72(%rsi), %rax + movq 80(%rsi), %r8 + shldq %cl, %r8, %r11 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r9, %rdx + movq %rdx, 64(%rdi) + movq %rax, 72(%rdi) + movq %r8, 80(%rdi) + movq %r11, 88(%rdi) + movq 24(%rsi), %r11 + movq 32(%rsi), %rdx + movq 40(%rsi), %rax + movq 48(%rsi), %r8 + shldq %cl, %r8, %r9 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r11, %rdx + movq %rdx, 32(%rdi) + movq %rax, 40(%rdi) + movq %r8, 48(%rdi) + movq %r9, 56(%rdi) + movq (%rsi), %rdx + movq 8(%rsi), %rax + movq 16(%rsi), %r8 + shldq %cl, %r8, %r11 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shlq %cl, %rdx + movq %rdx, (%rdi) + movq %rax, 8(%rdi) + movq %r8, 16(%rdi) + movq %r11, 24(%rdi) + repz retq +#endif /* WOLFSSL_SP_4096 */ +#endif /* WOLFSSL_SP_4096 */ +#ifndef WOLFSSL_SP_NO_256 +/* Conditionally copy a into r using the mask m. + * m is -1 to copy and 0 when not. + * + * r A single precision number to copy over. + * a A single precision number to copy. + * m Mask value to apply. + */ +#ifndef __APPLE__ +.globl sp_256_cond_copy_4 +.type sp_256_cond_copy_4,@function +.align 16 +sp_256_cond_copy_4: +#else +.globl _sp_256_cond_copy_4 +.p2align 4 +_sp_256_cond_copy_4: +#endif /* __APPLE__ */ + movq (%rdi), %rax + movq 8(%rdi), %rcx + movq 16(%rdi), %r8 + movq 24(%rdi), %r9 + xorq (%rsi), %rax + xorq 8(%rsi), %rcx + xorq 16(%rsi), %r8 + xorq 24(%rsi), %r9 + andq %rdx, %rax + andq %rdx, %rcx + andq %rdx, %r8 + andq %rdx, %r9 + xorq %rax, (%rdi) + xorq %rcx, 8(%rdi) + xorq %r8, 16(%rdi) + xorq %r9, 24(%rdi) + repz retq +#ifndef __APPLE__ +.size sp_256_cond_copy_4,.-sp_256_cond_copy_4 +#endif /* __APPLE__ */ +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +#ifndef __APPLE__ +.globl sp_256_mont_mul_4 +.type sp_256_mont_mul_4,@function +.align 16 +sp_256_mont_mul_4: +#else +.globl _sp_256_mont_mul_4 +.p2align 4 +_sp_256_mont_mul_4: +#endif /* __APPLE__ */ + push %r12 + push %r13 + push %r14 + push %r15 + push %rbx + movq %rdx, %r8 + # A[0] * B[0] + movq (%r8), %rax + mulq (%rsi) + movq %rax, %r9 + movq %rdx, %r10 + # A[0] * B[1] + movq 8(%r8), %rax + mulq (%rsi) + xorq %r11, %r11 + addq %rax, %r10 + adcq %rdx, %r11 + # A[1] * B[0] + movq (%r8), %rax + mulq 8(%rsi) + xorq %r12, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[0] * B[2] + movq 16(%r8), %rax + mulq (%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + # A[1] * B[1] + movq 8(%r8), %rax + mulq 8(%rsi) + xorq %r13, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0, %r13 + # A[2] * B[0] + movq (%r8), %rax + mulq 16(%rsi) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0, %r13 + # A[0] * B[3] + movq 24(%r8), %rax + mulq (%rsi) + xorq %r14, %r14 + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0, %r14 + # A[1] * B[2] + movq 16(%r8), %rax + mulq 8(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0, %r14 + # A[2] * B[1] + movq 8(%r8), %rax + mulq 16(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0, %r14 + # A[3] * B[0] + movq (%r8), %rax + mulq 24(%rsi) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0, %r14 + # A[1] * B[3] + movq 24(%r8), %rax + mulq 8(%rsi) + xorq %r15, %r15 + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0, %r15 + # A[2] * B[2] + movq 16(%r8), %rax + mulq 16(%rsi) + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0, %r15 + # A[3] * B[1] + movq 8(%r8), %rax + mulq 24(%rsi) + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0, %r15 + # A[2] * B[3] + movq 24(%r8), %rax + mulq 16(%rsi) + xorq %rbx, %rbx + addq %rax, %r14 + adcq %rdx, %r15 + adcq $0, %rbx + # A[3] * B[2] + movq 16(%r8), %rax + mulq 24(%rsi) + addq %rax, %r14 + adcq %rdx, %r15 + adcq $0, %rbx + # A[3] * B[3] + movq 24(%r8), %rax + mulq 24(%rsi) + addq %rax, %r15 + adcq %rdx, %rbx + # Start Reduction + # mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192 + # - a[0] << 32 << 192 + # + (a[0] * 2) << 192 + movq %r9, %rax + movq %r12, %rdx + addq %r9, %rdx + movq %r10, %rsi + addq %r9, %rdx + movq %r11, %r8 + # a[0]-a[2] << 32 + shlq $32, %r9 + shldq $32, %rsi, %r11 + shldq $32, %rax, %r10 + # - a[0] << 32 << 192 + subq %r9, %rdx + # + a[0]-a[2] << 32 << 64 + addq %r9, %rsi + adcq %r10, %r8 + adcq %r11, %rdx + # a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu + # a += mu << 256 + xorq %r9, %r9 + addq %rax, %r13 + adcq %rsi, %r14 + adcq %r8, %r15 + adcq %rdx, %rbx + sbbq $0, %r9 + # a += mu << 192 + addq %rax, %r12 + adcq %rsi, %r13 + adcq %r8, %r14 + adcq %rdx, %r15 + adcq $0, %rbx + sbbq $0, %r9 + # mu <<= 32 + movq %rdx, %rcx + shldq $32, %r8, %rdx + shldq $32, %rsi, %r8 + shldq $32, %rax, %rsi + shrq $32, %rcx + shlq $32, %rax + # a += (mu << 32) << 64 + addq %r8, %r12 + adcq %rdx, %r13 + adcq %rcx, %r14 + adcq $0, %r15 + adcq $0, %rbx + sbbq $0, %r9 + # a -= (mu << 32) << 192 + subq %rax, %r12 + sbbq %rsi, %r13 + sbbq %r8, %r14 + sbbq %rdx, %r15 + sbbq %rcx, %rbx + adcq $0, %r9 + movq $4294967295, %rax + movq $18446744069414584321, %rsi + # mask m and sub from result if overflow + # m[0] = -1 & mask = mask + andq %r9, %rax + # m[2] = 0 & mask = 0 + andq %r9, %rsi + subq %r9, %r13 + sbbq %rax, %r14 + sbbq $0, %r15 + sbbq %rsi, %rbx + movq %r13, (%rdi) + movq %r14, 8(%rdi) + movq %r15, 16(%rdi) + movq %rbx, 24(%rdi) + pop %rbx + pop %r15 + pop %r14 + pop %r13 + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_256_mont_mul_4,.-sp_256_mont_mul_4 +#endif /* __APPLE__ */ +/* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +#ifndef __APPLE__ +.globl sp_256_mont_sqr_4 +.type sp_256_mont_sqr_4,@function +.align 16 +sp_256_mont_sqr_4: +#else +.globl _sp_256_mont_sqr_4 +.p2align 4 +_sp_256_mont_sqr_4: +#endif /* __APPLE__ */ + push %r12 + push %r13 + push %r14 + push %r15 + push %rbx + # A[0] * A[1] + movq (%rsi), %rax + mulq 8(%rsi) + movq %rax, %r10 + movq %rdx, %r11 + # A[0] * A[2] + movq (%rsi), %rax + mulq 16(%rsi) + xorq %r12, %r12 + addq %rax, %r11 + adcq %rdx, %r12 + # A[0] * A[3] + movq (%rsi), %rax + mulq 24(%rsi) + xorq %r13, %r13 + addq %rax, %r12 + adcq %rdx, %r13 + # A[1] * A[2] + movq 8(%rsi), %rax + mulq 16(%rsi) + xorq %r14, %r14 + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0, %r14 + # A[1] * A[3] + movq 8(%rsi), %rax + mulq 24(%rsi) + addq %rax, %r13 + adcq %rdx, %r14 + # A[2] * A[3] + movq 16(%rsi), %rax + mulq 24(%rsi) + xorq %r15, %r15 + addq %rax, %r14 + adcq %rdx, %r15 + # Double + xorq %rbx, %rbx + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + adcq %r13, %r13 + adcq %r14, %r14 + adcq %r15, %r15 + adcq $0, %rbx + # A[0] * A[0] + movq (%rsi), %rax + mulq %rax + movq %rax, %rax + movq %rdx, %rdx + movq %rax, %r9 + movq %rdx, %r8 + # A[1] * A[1] + movq 8(%rsi), %rax + mulq %rax + movq %rax, %rax + movq %rdx, %rdx + addq %r8, %r10 + adcq %rax, %r11 + adcq $0, %rdx + movq %rdx, %r8 + # A[2] * A[2] + movq 16(%rsi), %rax + mulq %rax + movq %rax, %rax + movq %rdx, %rdx + addq %r8, %r12 + adcq %rax, %r13 + adcq $0, %rdx + movq %rdx, %r8 + # A[3] * A[3] + movq 24(%rsi), %rax + mulq %rax + addq %rax, %r15 + adcq %rdx, %rbx + addq %r8, %r14 + adcq $0, %r15 + adcq $0, %rbx + # Start Reduction + # mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192 + # - a[0] << 32 << 192 + # + (a[0] * 2) << 192 + movq %r9, %rax + movq %r12, %rdx + addq %r9, %rdx + movq %r10, %rsi + addq %r9, %rdx + movq %r11, %r8 + # a[0]-a[2] << 32 + shlq $32, %r9 + shldq $32, %rsi, %r11 + shldq $32, %rax, %r10 + # - a[0] << 32 << 192 + subq %r9, %rdx + # + a[0]-a[2] << 32 << 64 + addq %r9, %rsi + adcq %r10, %r8 + adcq %r11, %rdx + # a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu + # a += mu << 256 + xorq %r9, %r9 + addq %rax, %r13 + adcq %rsi, %r14 + adcq %r8, %r15 + adcq %rdx, %rbx + sbbq $0, %r9 + # a += mu << 192 + addq %rax, %r12 + adcq %rsi, %r13 + adcq %r8, %r14 + adcq %rdx, %r15 + adcq $0, %rbx + sbbq $0, %r9 + # mu <<= 32 + movq %rdx, %rcx + shldq $32, %r8, %rdx + shldq $32, %rsi, %r8 + shldq $32, %rax, %rsi + shrq $32, %rcx + shlq $32, %rax + # a += (mu << 32) << 64 + addq %r8, %r12 + adcq %rdx, %r13 + adcq %rcx, %r14 + adcq $0, %r15 + adcq $0, %rbx + sbbq $0, %r9 + # a -= (mu << 32) << 192 + subq %rax, %r12 + sbbq %rsi, %r13 + sbbq %r8, %r14 + sbbq %rdx, %r15 + sbbq %rcx, %rbx + adcq $0, %r9 + movq $4294967295, %rax + movq $18446744069414584321, %rsi + # mask m and sub from result if overflow + # m[0] = -1 & mask = mask + andq %r9, %rax + # m[2] = 0 & mask = 0 + andq %r9, %rsi + subq %r9, %r13 + sbbq %rax, %r14 + sbbq $0, %r15 + sbbq %rsi, %rbx + movq %r13, (%rdi) + movq %r14, 8(%rdi) + movq %r15, 16(%rdi) + movq %rbx, 24(%rdi) + pop %rbx + pop %r15 + pop %r14 + pop %r13 + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_256_mont_sqr_4,.-sp_256_mont_sqr_4 +#endif /* __APPLE__ */ +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +#ifndef __APPLE__ +.globl sp_256_cmp_4 +.type sp_256_cmp_4,@function +.align 16 +sp_256_cmp_4: +#else +.globl _sp_256_cmp_4 +.p2align 4 +_sp_256_cmp_4: +#endif /* __APPLE__ */ + xorq %rcx, %rcx + movq $-1, %rdx + movq $-1, %rax + movq $1, %r8 + movq 24(%rdi), %r9 + movq 24(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 16(%rdi), %r9 + movq 16(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 8(%rdi), %r9 + movq 8(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq (%rdi), %r9 + movq (%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + xorq %rdx, %rax + repz retq +#ifndef __APPLE__ +.size sp_256_cmp_4,.-sp_256_cmp_4 +#endif /* __APPLE__ */ +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +#ifndef __APPLE__ +.globl sp_256_cond_sub_4 +.type sp_256_cond_sub_4,@function +.align 16 +sp_256_cond_sub_4: +#else +.globl _sp_256_cond_sub_4 +.p2align 4 +_sp_256_cond_sub_4: +#endif /* __APPLE__ */ + push %r12 + push %r13 + push %r14 + push %r15 + movq $0, %rax + movq (%rdx), %r12 + movq 8(%rdx), %r13 + movq 16(%rdx), %r14 + movq 24(%rdx), %r15 + andq %rcx, %r12 + andq %rcx, %r13 + andq %rcx, %r14 + andq %rcx, %r15 + movq (%rsi), %r8 + movq 8(%rsi), %r9 + movq 16(%rsi), %r10 + movq 24(%rsi), %r11 + subq %r12, %r8 + sbbq %r13, %r9 + sbbq %r14, %r10 + sbbq %r15, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + sbbq $0, %rax + pop %r15 + pop %r14 + pop %r13 + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_256_cond_sub_4,.-sp_256_cond_sub_4 +#endif /* __APPLE__ */ +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_256_sub_4 +.type sp_256_sub_4,@function +.align 16 +sp_256_sub_4: +#else +.globl _sp_256_sub_4 +.p2align 4 +_sp_256_sub_4: +#endif /* __APPLE__ */ + xorq %rax, %rax + movq (%rsi), %rcx + movq 8(%rsi), %r8 + movq 16(%rsi), %r9 + movq 24(%rsi), %r10 + subq (%rdx), %rcx + sbbq 8(%rdx), %r8 + sbbq 16(%rdx), %r9 + sbbq 24(%rdx), %r10 + movq %rcx, (%rdi) + movq %r8, 8(%rdi) + movq %r9, 16(%rdi) + movq %r10, 24(%rdi) + sbbq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_256_sub_4,.-sp_256_sub_4 +#endif /* __APPLE__ */ +/* Reduce the number back to 256 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +#ifndef __APPLE__ +.globl sp_256_mont_reduce_4 +.type sp_256_mont_reduce_4,@function +.align 16 +sp_256_mont_reduce_4: +#else +.globl _sp_256_mont_reduce_4 +.p2align 4 +_sp_256_mont_reduce_4: +#endif /* __APPLE__ */ + push %r12 + push %r13 + push %r14 + push %r15 + movq %rdx, %rcx + # i = 0 + xorq %r14, %r14 + movq $4, %r8 + movq %rdi, %r13 +L_mont_loop_4: + # mu = a[i] * mp + movq (%r13), %r12 + imulq %rcx, %r12 + # a[i+0] += m[0] * mu + movq (%rsi), %rax + movq 8(%rsi), %r10 + mulq %r12 + movq (%r13), %r15 + addq %rax, %r15 + movq %rdx, %r9 + movq %r15, (%r13) + adcq $0, %r9 + # a[i+1] += m[1] * mu + movq %r10, %rax + mulq %r12 + movq 16(%rsi), %r10 + movq 8(%r13), %r15 + addq %r9, %rax + movq %rdx, %r11 + adcq $0, %r11 + addq %rax, %r15 + movq %r15, 8(%r13) + adcq $0, %r11 + # a[i+2] += m[2] * mu + movq %r10, %rax + mulq %r12 + movq 24(%rsi), %r10 + movq 16(%r13), %r15 + addq %r11, %rax + movq %rdx, %r9 + adcq $0, %r9 + addq %rax, %r15 + movq %r15, 16(%r13) + adcq $0, %r9 + # a[i+3] += m[3] * mu + movq %r10, %rax + mulq %r12 + movq 24(%r13), %r15 + addq %r9, %rax + adcq %r14, %rdx + movq $0, %r14 + adcq $0, %r14 + addq %rax, %r15 + movq %r15, 24(%r13) + adcq %rdx, 32(%r13) + adcq $0, %r14 + # i += 1 + addq $8, %r13 + decq %r8 + jnz L_mont_loop_4 + xorq %rax, %rax + movq 32(%rdi), %rdx + movq 40(%rdi), %r8 + movq 48(%rdi), %r15 + movq 56(%rdi), %r9 + subq %r14, %rax + movq (%rsi), %r10 + movq 8(%rsi), %r11 + movq 16(%rsi), %r12 + movq 24(%rsi), %r13 + andq %rax, %r10 + andq %rax, %r11 + andq %rax, %r12 + andq %rax, %r13 + subq %r10, %rdx + sbbq %r11, %r8 + sbbq %r12, %r15 + sbbq %r13, %r9 + movq %rdx, (%rdi) + movq %r8, 8(%rdi) + movq %r15, 16(%rdi) + movq %r9, 24(%rdi) + pop %r15 + pop %r14 + pop %r13 + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_256_mont_reduce_4,.-sp_256_mont_reduce_4 +#endif /* __APPLE__ */ +/* Add two Montgomery form numbers (r = a + b % m). + * + * r Result of addition. + * a First number to add in Montogmery form. + * b Second number to add in Montogmery form. + * m Modulus (prime). + */ +#ifndef __APPLE__ +.globl sp_256_mont_add_4 +.type sp_256_mont_add_4,@function +.align 16 +sp_256_mont_add_4: +#else +.globl _sp_256_mont_add_4 +.p2align 4 +_sp_256_mont_add_4: +#endif /* __APPLE__ */ + movq (%rsi), %rax + movq 8(%rsi), %rcx + movq 16(%rsi), %r8 + movq 24(%rsi), %r9 + movq $4294967295, %r10 + movq $18446744069414584321, %r11 + addq (%rdx), %rax + adcq 8(%rdx), %rcx + adcq 16(%rdx), %r8 + movq $0, %rsi + adcq 24(%rdx), %r9 + sbbq $0, %rsi + andq %rsi, %r10 + andq %rsi, %r11 + subq %rsi, %rax + sbbq %r10, %rcx + movq %rax, (%rdi) + sbbq $0, %r8 + movq %rcx, 8(%rdi) + sbbq %r11, %r9 + movq %r8, 16(%rdi) + movq %r9, 24(%rdi) + repz retq +#ifndef __APPLE__ +.size sp_256_mont_add_4,.-sp_256_mont_add_4 +#endif /* __APPLE__ */ +/* Double a Montgomery form number (r = a + a % m). + * + * r Result of doubling. + * a Number to double in Montogmery form. + * m Modulus (prime). + */ +#ifndef __APPLE__ +.globl sp_256_mont_dbl_4 +.type sp_256_mont_dbl_4,@function +.align 16 +sp_256_mont_dbl_4: +#else +.globl _sp_256_mont_dbl_4 +.p2align 4 +_sp_256_mont_dbl_4: +#endif /* __APPLE__ */ + movq (%rsi), %rdx + movq 8(%rsi), %rax + movq 16(%rsi), %rcx + movq 24(%rsi), %r8 + movq $4294967295, %r9 + movq $18446744069414584321, %r10 + addq %rdx, %rdx + adcq %rax, %rax + adcq %rcx, %rcx + movq $0, %r11 + adcq %r8, %r8 + sbbq $0, %r11 + andq %r11, %r9 + andq %r11, %r10 + subq %r11, %rdx + sbbq %r9, %rax + movq %rdx, (%rdi) + sbbq $0, %rcx + movq %rax, 8(%rdi) + sbbq %r10, %r8 + movq %rcx, 16(%rdi) + movq %r8, 24(%rdi) + repz retq +#ifndef __APPLE__ +.size sp_256_mont_dbl_4,.-sp_256_mont_dbl_4 +#endif /* __APPLE__ */ +/* Triple a Montgomery form number (r = a + a + a % m). + * + * r Result of Tripling. + * a Number to triple in Montogmery form. + * m Modulus (prime). + */ +#ifndef __APPLE__ +.globl sp_256_mont_tpl_4 +.type sp_256_mont_tpl_4,@function +.align 16 +sp_256_mont_tpl_4: +#else +.globl _sp_256_mont_tpl_4 +.p2align 4 +_sp_256_mont_tpl_4: +#endif /* __APPLE__ */ + movq (%rsi), %rdx + movq 8(%rsi), %rax + movq 16(%rsi), %rcx + movq 24(%rsi), %r8 + movq $4294967295, %r9 + movq $18446744069414584321, %r10 + addq %rdx, %rdx + adcq %rax, %rax + adcq %rcx, %rcx + movq $0, %r11 + adcq %r8, %r8 + sbbq $0, %r11 + andq %r11, %r9 + andq %r11, %r10 + subq %r11, %rdx + sbbq %r9, %rax + sbbq $0, %rcx + sbbq %r10, %r8 + movq $4294967295, %r9 + movq $18446744069414584321, %r10 + addq (%rsi), %rdx + adcq 8(%rsi), %rax + adcq 16(%rsi), %rcx + movq $0, %r11 + adcq 24(%rsi), %r8 + sbbq $0, %r11 + andq %r11, %r9 + andq %r11, %r10 + subq %r11, %rdx + sbbq %r9, %rax + movq %rdx, (%rdi) + sbbq $0, %rcx + movq %rax, 8(%rdi) + sbbq %r10, %r8 + movq %rcx, 16(%rdi) + movq %r8, 24(%rdi) + repz retq +#ifndef __APPLE__ +.size sp_256_mont_tpl_4,.-sp_256_mont_tpl_4 +#endif /* __APPLE__ */ +/* Subtract two Montgomery form numbers (r = a - b % m). + * + * r Result of subtration. + * a Number to subtract from in Montogmery form. + * b Number to subtract with in Montogmery form. + * m Modulus (prime). + */ +#ifndef __APPLE__ +.globl sp_256_mont_sub_4 +.type sp_256_mont_sub_4,@function +.align 16 +sp_256_mont_sub_4: +#else +.globl _sp_256_mont_sub_4 +.p2align 4 +_sp_256_mont_sub_4: +#endif /* __APPLE__ */ + movq (%rsi), %rax + movq 8(%rsi), %rcx + movq 16(%rsi), %r8 + movq 24(%rsi), %r9 + movq $4294967295, %r10 + movq $18446744069414584321, %r11 + subq (%rdx), %rax + sbbq 8(%rdx), %rcx + sbbq 16(%rdx), %r8 + movq $0, %rsi + sbbq 24(%rdx), %r9 + sbbq $0, %rsi + andq %rsi, %r10 + andq %rsi, %r11 + addq %rsi, %rax + adcq %r10, %rcx + movq %rax, (%rdi) + adcq $0, %r8 + movq %rcx, 8(%rdi) + adcq %r11, %r9 + movq %r8, 16(%rdi) + movq %r9, 24(%rdi) + repz retq +#ifndef __APPLE__ +.size sp_256_mont_sub_4,.-sp_256_mont_sub_4 +#endif /* __APPLE__ */ +/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) + * + * r Result of division by 2. + * a Number to divide. + * m Modulus (prime). + */ +#ifndef __APPLE__ +.globl sp_256_div2_4 +.type sp_256_div2_4,@function +.align 16 +sp_256_div2_4: +#else +.globl _sp_256_div2_4 +.p2align 4 +_sp_256_div2_4: +#endif /* __APPLE__ */ + movq (%rsi), %rdx + movq 8(%rsi), %rax + movq 16(%rsi), %rcx + movq 24(%rsi), %r8 + movq $4294967295, %r9 + movq $18446744069414584321, %r10 + movq %rdx, %r11 + andq $1, %r11 + negq %r11 + andq %r11, %r9 + andq %r11, %r10 + addq %r11, %rdx + adcq %r9, %rax + adcq $0, %rcx + adcq %r10, %r8 + movq $0, %r11 + adcq $0, %r11 + shrdq $1, %rax, %rdx + shrdq $1, %rcx, %rax + shrdq $1, %r8, %rcx + shrdq $1, %r11, %r8 + movq %rdx, (%rdi) + movq %rax, 8(%rdi) + movq %rcx, 16(%rdi) + movq %r8, 24(%rdi) + repz retq +#ifndef __APPLE__ +.size sp_256_div2_4,.-sp_256_div2_4 +#endif /* __APPLE__ */ +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +#ifndef __APPLE__ +.globl sp_256_mont_mul_avx2_4 +.type sp_256_mont_mul_avx2_4,@function +.align 16 +sp_256_mont_mul_avx2_4: +#else +.globl _sp_256_mont_mul_avx2_4 +.p2align 4 +_sp_256_mont_mul_avx2_4: +#endif /* __APPLE__ */ + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + movq %rdx, %rbp + # A[0] * B[0] + movq (%rbp), %rdx + mulxq (%rsi), %r8, %r9 + # A[2] * B[0] + mulxq 16(%rsi), %r10, %r11 + # A[1] * B[0] + mulxq 8(%rsi), %rax, %rcx + xorq %r15, %r15 + adcxq %rax, %r9 + # A[1] * B[3] + movq 24(%rbp), %rdx + mulxq 8(%rsi), %r12, %r13 + adcxq %rcx, %r10 + # A[0] * B[1] + movq 8(%rbp), %rdx + mulxq (%rsi), %rax, %rcx + adoxq %rax, %r9 + # A[2] * B[1] + mulxq 16(%rsi), %rax, %r14 + adoxq %rcx, %r10 + adcxq %rax, %r11 + # A[1] * B[2] + movq 16(%rbp), %rdx + mulxq 8(%rsi), %rax, %rcx + adcxq %r14, %r12 + adoxq %rax, %r11 + adcxq %r15, %r13 + adoxq %rcx, %r12 + # A[0] * B[2] + mulxq (%rsi), %rax, %rcx + adoxq %r15, %r13 + xorq %r14, %r14 + adcxq %rax, %r10 + # A[1] * B[1] + movq 8(%rbp), %rdx + mulxq 8(%rsi), %rdx, %rax + adcxq %rcx, %r11 + adoxq %rdx, %r10 + # A[3] * B[1] + movq 8(%rbp), %rdx + adoxq %rax, %r11 + mulxq 24(%rsi), %rax, %rcx + adcxq %rax, %r12 + # A[2] * B[2] + movq 16(%rbp), %rdx + mulxq 16(%rsi), %rdx, %rax + adcxq %rcx, %r13 + adoxq %rdx, %r12 + # A[3] * B[3] + movq 24(%rbp), %rdx + adoxq %rax, %r13 + mulxq 24(%rsi), %rax, %rcx + adoxq %r15, %r14 + adcxq %rax, %r14 + # A[0] * B[3] + mulxq (%rsi), %rdx, %rax + adcxq %rcx, %r15 + xorq %rcx, %rcx + adcxq %rdx, %r11 + # A[3] * B[0] + movq 24(%rsi), %rdx + adcxq %rax, %r12 + mulxq (%rbp), %rbx, %rax + adoxq %rbx, %r11 + adoxq %rax, %r12 + # A[3] * B[2] + mulxq 16(%rbp), %rdx, %rax + adcxq %rdx, %r13 + # A[2] * B[3] + movq 24(%rbp), %rdx + adcxq %rax, %r14 + mulxq 16(%rsi), %rax, %rdx + adcxq %rcx, %r15 + adoxq %rax, %r13 + adoxq %rdx, %r14 + adoxq %rcx, %r15 + # Start Reduction + # mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192 + # - a[0] << 32 << 192 + # + (a[0] * 2) << 192 + movq %r8, %rax + movq %r11, %rdx + addq %r8, %rdx + movq %r9, %rsi + addq %r8, %rdx + movq %r10, %rbp + # a[0]-a[2] << 32 + shlq $32, %r8 + shldq $32, %rsi, %r10 + shldq $32, %rax, %r9 + # - a[0] << 32 << 192 + subq %r8, %rdx + # + a[0]-a[2] << 32 << 64 + addq %r8, %rsi + adcq %r9, %rbp + adcq %r10, %rdx + # a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu + # a += mu << 256 + xorq %r8, %r8 + addq %rax, %r12 + adcq %rsi, %r13 + adcq %rbp, %r14 + adcq %rdx, %r15 + sbbq $0, %r8 + # a += mu << 192 + addq %rax, %r11 + adcq %rsi, %r12 + adcq %rbp, %r13 + adcq %rdx, %r14 + adcq $0, %r15 + sbbq $0, %r8 + # mu <<= 32 + movq %rdx, %rcx + shldq $32, %rbp, %rdx + shldq $32, %rsi, %rbp + shldq $32, %rax, %rsi + shrq $32, %rcx + shlq $32, %rax + # a += (mu << 32) << 64 + addq %rbp, %r11 + adcq %rdx, %r12 + adcq %rcx, %r13 + adcq $0, %r14 + adcq $0, %r15 + sbbq $0, %r8 + # a -= (mu << 32) << 192 + subq %rax, %r11 + sbbq %rsi, %r12 + sbbq %rbp, %r13 + sbbq %rdx, %r14 + sbbq %rcx, %r15 + adcq $0, %r8 + movq $4294967295, %rax + movq $18446744069414584321, %rsi + # mask m and sub from result if overflow + # m[0] = -1 & mask = mask + andq %r8, %rax + # m[2] = 0 & mask = 0 + andq %r8, %rsi + subq %r8, %r12 + sbbq %rax, %r13 + sbbq $0, %r14 + sbbq %rsi, %r15 + movq %r12, (%rdi) + movq %r13, 8(%rdi) + movq %r14, 16(%rdi) + movq %r15, 24(%rdi) + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + repz retq +#ifndef __APPLE__ +.size sp_256_mont_mul_avx2_4,.-sp_256_mont_mul_avx2_4 +#endif /* __APPLE__ */ +/* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +#ifndef __APPLE__ +.globl sp_256_mont_sqr_avx2_4 +.type sp_256_mont_sqr_avx2_4,@function +.align 16 +sp_256_mont_sqr_avx2_4: +#else +.globl _sp_256_mont_sqr_avx2_4 +.p2align 4 +_sp_256_mont_sqr_avx2_4: +#endif /* __APPLE__ */ + push %r12 + push %r13 + push %r14 + push %r15 + push %rbx + # A[0] * A[1] + movq (%rsi), %rdx + movq 16(%rsi), %r15 + mulxq 8(%rsi), %r9, %r10 + # A[0] * A[3] + mulxq 24(%rsi), %r11, %r12 + # A[2] * A[1] + movq %r15, %rdx + mulxq 8(%rsi), %rcx, %rbx + # A[2] * A[3] + mulxq 24(%rsi), %r13, %r14 + xorq %r15, %r15 + adoxq %rcx, %r11 + adoxq %rbx, %r12 + # A[2] * A[0] + mulxq (%rsi), %rcx, %rbx + # A[1] * A[3] + movq 8(%rsi), %rdx + adoxq %r15, %r13 + mulxq 24(%rsi), %rax, %r8 + adcxq %rcx, %r10 + adoxq %r15, %r14 + adcxq %rbx, %r11 + adcxq %rax, %r12 + adcxq %r8, %r13 + adcxq %r15, %r14 + # Double with Carry Flag + xorq %r15, %r15 + # A[0] * A[0] + movq (%rsi), %rdx + mulxq %rdx, %r8, %rax + adcxq %r9, %r9 + adcxq %r10, %r10 + adoxq %rax, %r9 + # A[1] * A[1] + movq 8(%rsi), %rdx + mulxq %rdx, %rcx, %rbx + adcxq %r11, %r11 + adoxq %rcx, %r10 + # A[2] * A[2] + movq 16(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adcxq %r12, %r12 + adoxq %rbx, %r11 + adcxq %r13, %r13 + adoxq %rax, %r12 + adcxq %r14, %r14 + # A[3] * A[3] + movq 24(%rsi), %rdx + mulxq %rdx, %rax, %rbx + adoxq %rcx, %r13 + adcxq %r15, %r15 + adoxq %rax, %r14 + adoxq %rbx, %r15 + # Start Reduction + # mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192 + # - a[0] << 32 << 192 + # + (a[0] * 2) << 192 + movq %r8, %rax + movq %r11, %rdx + addq %r8, %rdx + movq %r9, %rsi + addq %r8, %rdx + movq %r10, %rcx + # a[0]-a[2] << 32 + shlq $32, %r8 + shldq $32, %rsi, %r10 + shldq $32, %rax, %r9 + # - a[0] << 32 << 192 + subq %r8, %rdx + # + a[0]-a[2] << 32 << 64 + addq %r8, %rsi + adcq %r9, %rcx + adcq %r10, %rdx + # a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu + # a += mu << 256 + xorq %r8, %r8 + addq %rax, %r12 + adcq %rsi, %r13 + adcq %rcx, %r14 + adcq %rdx, %r15 + sbbq $0, %r8 + # a += mu << 192 + addq %rax, %r11 + adcq %rsi, %r12 + adcq %rcx, %r13 + adcq %rdx, %r14 + adcq $0, %r15 + sbbq $0, %r8 + # mu <<= 32 + movq %rdx, %rbx + shldq $32, %rcx, %rdx + shldq $32, %rsi, %rcx + shldq $32, %rax, %rsi + shrq $32, %rbx + shlq $32, %rax + # a += (mu << 32) << 64 + addq %rcx, %r11 + adcq %rdx, %r12 + adcq %rbx, %r13 + adcq $0, %r14 + adcq $0, %r15 + sbbq $0, %r8 + # a -= (mu << 32) << 192 + subq %rax, %r11 + sbbq %rsi, %r12 + sbbq %rcx, %r13 + sbbq %rdx, %r14 + sbbq %rbx, %r15 + adcq $0, %r8 + movq $4294967295, %rax + movq $18446744069414584321, %rsi + # mask m and sub from result if overflow + # m[0] = -1 & mask = mask + andq %r8, %rax + # m[2] = 0 & mask = 0 + andq %r8, %rsi + subq %r8, %r12 + sbbq %rax, %r13 + sbbq $0, %r14 + sbbq %rsi, %r15 + movq %r12, (%rdi) + movq %r13, 8(%rdi) + movq %r14, 16(%rdi) + movq %r15, 24(%rdi) + pop %rbx + pop %r15 + pop %r14 + pop %r13 + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_256_mont_sqr_avx2_4,.-sp_256_mont_sqr_avx2_4 +#endif /* __APPLE__ */ +/* Add 1 to a. (a = a + 1) + * + * a A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_256_add_one_4 +.type sp_256_add_one_4,@function +.align 16 +sp_256_add_one_4: +#else +.globl _sp_256_add_one_4 +.p2align 4 +_sp_256_add_one_4: +#endif /* __APPLE__ */ + addq $1, (%rdi) + adcq $0, 8(%rdi) + adcq $0, 16(%rdi) + adcq $0, 24(%rdi) + repz retq +#ifndef __APPLE__ +.size sp_256_add_one_4,.-sp_256_add_one_4 +#endif /* __APPLE__ */ +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +#ifndef __APPLE__ +.globl sp_256_from_bin +.type sp_256_from_bin,@function +.align 16 +sp_256_from_bin: +#else +.globl _sp_256_from_bin +.p2align 4 +_sp_256_from_bin: +#endif /* __APPLE__ */ + movq %rdx, %r9 + movq %rdi, %r10 + addq %rcx, %r9 + addq $32, %r10 + xorq %r11, %r11 + jmp L_256_from_bin_64_end +L_256_from_bin_64_start: + subq $64, %r9 + movbeq 56(%r9), %rax + movbeq 48(%r9), %r8 + movq %rax, (%rdi) + movq %r8, 8(%rdi) + movbeq 40(%r9), %rax + movbeq 32(%r9), %r8 + movq %rax, 16(%rdi) + movq %r8, 24(%rdi) + movbeq 24(%r9), %rax + movbeq 16(%r9), %r8 + movq %rax, 32(%rdi) + movq %r8, 40(%rdi) + movbeq 8(%r9), %rax + movbeq (%r9), %r8 + movq %rax, 48(%rdi) + movq %r8, 56(%rdi) + addq $64, %rdi + subq $64, %rcx +L_256_from_bin_64_end: + cmpq $63, %rcx + jg L_256_from_bin_64_start + jmp L_256_from_bin_8_end +L_256_from_bin_8_start: + subq $8, %r9 + movbeq (%r9), %rax + movq %rax, (%rdi) + addq $8, %rdi + subq $8, %rcx +L_256_from_bin_8_end: + cmpq $7, %rcx + jg L_256_from_bin_8_start + cmpq %r11, %rcx + je L_256_from_bin_hi_end + movq %r11, %r8 + movq %r11, %rax +L_256_from_bin_hi_start: + movb (%rdx), %al + shlq $8, %r8 + incq %rdx + addq %rax, %r8 + decq %rcx + jg L_256_from_bin_hi_start + movq %r8, (%rdi) + addq $8, %rdi +L_256_from_bin_hi_end: + cmpq %r10, %rdi + je L_256_from_bin_zero_end +L_256_from_bin_zero_start: + movq %r11, (%rdi) + addq $8, %rdi + cmpq %r10, %rdi + jl L_256_from_bin_zero_start +L_256_from_bin_zero_end: + repz retq +#ifndef __APPLE__ +.size sp_256_from_bin,.-sp_256_from_bin +#endif /* __APPLE__ */ +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 32 + * + * r A single precision integer. + * a Byte array. + */ +#ifndef __APPLE__ +.globl sp_256_to_bin +.type sp_256_to_bin,@function +.align 16 +sp_256_to_bin: +#else +.globl _sp_256_to_bin +.p2align 4 +_sp_256_to_bin: +#endif /* __APPLE__ */ + movbeq 24(%rdi), %rdx + movbeq 16(%rdi), %rax + movq %rdx, (%rsi) + movq %rax, 8(%rsi) + movbeq 8(%rdi), %rdx + movbeq (%rdi), %rax + movq %rdx, 16(%rsi) + movq %rax, 24(%rsi) + repz retq +#ifndef __APPLE__ +.size sp_256_to_bin,.-sp_256_to_bin +#endif /* __APPLE__ */ +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_256_add_4 +.type sp_256_add_4,@function +.align 16 +sp_256_add_4: +#else +.globl _sp_256_add_4 +.p2align 4 +_sp_256_add_4: +#endif /* __APPLE__ */ + # Add + movq (%rsi), %rcx + xorq %rax, %rax + addq (%rdx), %rcx + movq 8(%rsi), %r8 + movq %rcx, (%rdi) + adcq 8(%rdx), %r8 + movq 16(%rsi), %rcx + movq %r8, 8(%rdi) + adcq 16(%rdx), %rcx + movq 24(%rsi), %r8 + movq %rcx, 16(%rdi) + adcq 24(%rdx), %r8 + movq %r8, 24(%rdi) + adcq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_256_add_4,.-sp_256_add_4 +#endif /* __APPLE__ */ +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_256_mul_4 +.type sp_256_mul_4,@function +.align 16 +sp_256_mul_4: +#else +.globl _sp_256_mul_4 +.p2align 4 +_sp_256_mul_4: +#endif /* __APPLE__ */ + movq %rdx, %rcx + subq $32, %rsp + # A[0] * B[0] + movq (%rcx), %rax + mulq (%rsi) + xorq %r10, %r10 + movq %rax, (%rsp) + movq %rdx, %r9 + # A[0] * B[1] + movq 8(%rcx), %rax + mulq (%rsi) + xorq %r8, %r8 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[1] * B[0] + movq (%rcx), %rax + mulq 8(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + movq %r9, 8(%rsp) + # A[0] * B[2] + movq 16(%rcx), %rax + mulq (%rsi) + xorq %r9, %r9 + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[1] * B[1] + movq 8(%rcx), %rax + mulq 8(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[2] * B[0] + movq (%rcx), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + movq %r10, 16(%rsp) + # A[0] * B[3] + movq 24(%rcx), %rax + mulq (%rsi) + xorq %r10, %r10 + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[1] * B[2] + movq 16(%rcx), %rax + mulq 8(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[2] * B[1] + movq 8(%rcx), %rax + mulq 16(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[3] * B[0] + movq (%rcx), %rax + mulq 24(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + movq %r8, 24(%rsp) + # A[1] * B[3] + movq 24(%rcx), %rax + mulq 8(%rsi) + xorq %r8, %r8 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[2] * B[2] + movq 16(%rcx), %rax + mulq 16(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[3] * B[1] + movq 8(%rcx), %rax + mulq 24(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + movq %r9, 32(%rdi) + # A[2] * B[3] + movq 24(%rcx), %rax + mulq 16(%rsi) + xorq %r9, %r9 + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[3] * B[2] + movq 16(%rcx), %rax + mulq 24(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + movq %r10, 40(%rdi) + # A[3] * B[3] + movq 24(%rcx), %rax + mulq 24(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + movq %r8, 48(%rdi) + movq %r9, 56(%rdi) + movq (%rsp), %rax + movq 8(%rsp), %rdx + movq 16(%rsp), %r8 + movq 24(%rsp), %r9 + movq %rax, (%rdi) + movq %rdx, 8(%rdi) + movq %r8, 16(%rdi) + movq %r9, 24(%rdi) + addq $32, %rsp + repz retq +#ifndef __APPLE__ +.size sp_256_mul_4,.-sp_256_mul_4 +#endif /* __APPLE__ */ +/* Multiply a and b into r. (r = a * b) + * + * r Result of multiplication. + * a First number to multiply. + * b Second number to multiply. + */ +#ifndef __APPLE__ +.globl sp_256_mul_avx2_4 +.type sp_256_mul_avx2_4,@function +.align 16 +sp_256_mul_avx2_4: +#else +.globl _sp_256_mul_avx2_4 +.p2align 4 +_sp_256_mul_avx2_4: +#endif /* __APPLE__ */ + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + movq %rdx, %rbp + # A[0] * B[0] + movq (%rbp), %rdx + mulxq (%rsi), %r8, %r9 + # A[2] * B[0] + mulxq 16(%rsi), %r10, %r11 + # A[1] * B[0] + mulxq 8(%rsi), %rax, %rcx + xorq %r15, %r15 + adcxq %rax, %r9 + # A[1] * B[3] + movq 24(%rbp), %rdx + mulxq 8(%rsi), %r12, %r13 + adcxq %rcx, %r10 + # A[0] * B[1] + movq 8(%rbp), %rdx + mulxq (%rsi), %rax, %rcx + adoxq %rax, %r9 + # A[2] * B[1] + mulxq 16(%rsi), %rax, %r14 + adoxq %rcx, %r10 + adcxq %rax, %r11 + # A[1] * B[2] + movq 16(%rbp), %rdx + mulxq 8(%rsi), %rax, %rcx + adcxq %r14, %r12 + adoxq %rax, %r11 + adcxq %r15, %r13 + adoxq %rcx, %r12 + # A[0] * B[2] + mulxq (%rsi), %rax, %rcx + adoxq %r15, %r13 + xorq %r14, %r14 + adcxq %rax, %r10 + # A[1] * B[1] + movq 8(%rbp), %rdx + mulxq 8(%rsi), %rdx, %rax + adcxq %rcx, %r11 + adoxq %rdx, %r10 + # A[3] * B[1] + movq 8(%rbp), %rdx + adoxq %rax, %r11 + mulxq 24(%rsi), %rax, %rcx + adcxq %rax, %r12 + # A[2] * B[2] + movq 16(%rbp), %rdx + mulxq 16(%rsi), %rdx, %rax + adcxq %rcx, %r13 + adoxq %rdx, %r12 + # A[3] * B[3] + movq 24(%rbp), %rdx + adoxq %rax, %r13 + mulxq 24(%rsi), %rax, %rcx + adoxq %r15, %r14 + adcxq %rax, %r14 + # A[0] * B[3] + mulxq (%rsi), %rdx, %rax + adcxq %rcx, %r15 + xorq %rcx, %rcx + adcxq %rdx, %r11 + # A[3] * B[0] + movq 24(%rsi), %rdx + adcxq %rax, %r12 + mulxq (%rbp), %rbx, %rax + adoxq %rbx, %r11 + adoxq %rax, %r12 + # A[3] * B[2] + mulxq 16(%rbp), %rdx, %rax + adcxq %rdx, %r13 + # A[2] * B[3] + movq 24(%rbp), %rdx + adcxq %rax, %r14 + mulxq 16(%rsi), %rax, %rdx + adcxq %rcx, %r15 + adoxq %rax, %r13 + adoxq %rdx, %r14 + adoxq %rcx, %r15 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq %r12, 32(%rdi) + movq %r13, 40(%rdi) + movq %r14, 48(%rdi) + movq %r15, 56(%rdi) + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + repz retq +#ifndef __APPLE__ +.size sp_256_mul_avx2_4,.-sp_256_mul_avx2_4 +#endif /* __APPLE__ */ +/* Sub b from a into a. (a -= b) + * + * a A single precision integer and result. + * b A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_256_sub_in_place_4 +.type sp_256_sub_in_place_4,@function +.align 16 +sp_256_sub_in_place_4: +#else +.globl _sp_256_sub_in_place_4 +.p2align 4 +_sp_256_sub_in_place_4: +#endif /* __APPLE__ */ + xorq %rax, %rax + movq (%rsi), %rdx + movq 8(%rsi), %rcx + movq 16(%rsi), %r8 + movq 24(%rsi), %r9 + subq %rdx, (%rdi) + sbbq %rcx, 8(%rdi) + sbbq %r8, 16(%rdi) + sbbq %r9, 24(%rdi) + sbbq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_256_sub_in_place_4,.-sp_256_sub_in_place_4 +#endif /* __APPLE__ */ +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +#ifndef __APPLE__ +.globl sp_256_cond_sub_avx2_4 +.type sp_256_cond_sub_avx2_4,@function +.align 16 +sp_256_cond_sub_avx2_4: +#else +.globl _sp_256_cond_sub_avx2_4 +.p2align 4 +_sp_256_cond_sub_avx2_4: +#endif /* __APPLE__ */ + push %r12 + push %r13 + push %r14 + push %r15 + movq $0, %rax + movq (%rdx), %r12 + movq 8(%rdx), %r13 + movq 16(%rdx), %r14 + movq 24(%rdx), %r15 + andq %rcx, %r12 + andq %rcx, %r13 + andq %rcx, %r14 + andq %rcx, %r15 + movq (%rsi), %r8 + movq 8(%rsi), %r9 + movq 16(%rsi), %r10 + movq 24(%rsi), %r11 + subq %r12, %r8 + sbbq %r13, %r9 + sbbq %r14, %r10 + sbbq %r15, %r11 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + sbbq $0, %rax + pop %r15 + pop %r14 + pop %r13 + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_256_cond_sub_avx2_4,.-sp_256_cond_sub_avx2_4 +#endif /* __APPLE__ */ +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +#ifndef __APPLE__ +.globl sp_256_mul_d_4 +.type sp_256_mul_d_4,@function +.align 16 +sp_256_mul_d_4: +#else +.globl _sp_256_mul_d_4 +.p2align 4 +_sp_256_mul_d_4: +#endif /* __APPLE__ */ + movq %rdx, %rcx + # A[0] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq (%rsi) + movq %rax, %r8 + movq %rdx, %r9 + movq %r8, (%rdi) + # A[1] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 8(%rsi) + addq %rax, %r9 + movq %r9, 8(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[2] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 16(%rsi) + addq %rax, %r10 + movq %r10, 16(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[3] * B + movq %rcx, %rax + mulq 24(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + movq %r8, 24(%rdi) + movq %r9, 32(%rdi) + repz retq +#ifndef __APPLE__ +.size sp_256_mul_d_4,.-sp_256_mul_d_4 +#endif /* __APPLE__ */ +#ifdef HAVE_INTEL_AVX2 +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +#ifndef __APPLE__ +.globl sp_256_mul_d_avx2_4 +.type sp_256_mul_d_avx2_4,@function +.align 16 +sp_256_mul_d_avx2_4: +#else +.globl _sp_256_mul_d_avx2_4 +.p2align 4 +_sp_256_mul_d_avx2_4: +#endif /* __APPLE__ */ + movq %rdx, %rax + # A[0] * B + movq %rax, %rdx + xorq %r11, %r11 + mulxq (%rsi), %r9, %r10 + movq %r9, (%rdi) + # A[1] * B + mulxq 8(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 8(%rdi) + adoxq %r8, %r9 + # A[2] * B + mulxq 16(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 16(%rdi) + adoxq %r8, %r10 + # A[3] * B + mulxq 24(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + adoxq %r8, %r9 + adcxq %r11, %r9 + movq %r10, 24(%rdi) + movq %r9, 32(%rdi) + repz retq +#ifndef __APPLE__ +.size sp_256_mul_d_avx2_4,.-sp_256_mul_d_avx2_4 +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX2 */ +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_256_sqr_4 +.type sp_256_sqr_4,@function +.align 16 +sp_256_sqr_4: +#else +.globl _sp_256_sqr_4 +.p2align 4 +_sp_256_sqr_4: +#endif /* __APPLE__ */ + push %r12 + subq $32, %rsp + # A[0] * A[0] + movq (%rsi), %rax + mulq %rax + xorq %r9, %r9 + movq %rax, (%rsp) + movq %rdx, %r8 + # A[0] * A[1] + movq 8(%rsi), %rax + mulq (%rsi) + xorq %rcx, %rcx + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + movq %r8, 8(%rsp) + # A[0] * A[2] + movq 16(%rsi), %rax + mulq (%rsi) + xorq %r8, %r8 + addq %rax, %r9 + adcq %rdx, %rcx + adcq $0, %r8 + addq %rax, %r9 + adcq %rdx, %rcx + adcq $0, %r8 + # A[1] * A[1] + movq 8(%rsi), %rax + mulq %rax + addq %rax, %r9 + adcq %rdx, %rcx + adcq $0, %r8 + movq %r9, 16(%rsp) + # A[0] * A[3] + movq 24(%rsi), %rax + mulq (%rsi) + xorq %r9, %r9 + addq %rax, %rcx + adcq %rdx, %r8 + adcq $0, %r9 + addq %rax, %rcx + adcq %rdx, %r8 + adcq $0, %r9 + # A[1] * A[2] + movq 16(%rsi), %rax + mulq 8(%rsi) + addq %rax, %rcx + adcq %rdx, %r8 + adcq $0, %r9 + addq %rax, %rcx + adcq %rdx, %r8 + adcq $0, %r9 + movq %rcx, 24(%rsp) + # A[1] * A[3] + movq 24(%rsi), %rax + mulq 8(%rsi) + xorq %rcx, %rcx + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + # A[2] * A[2] + movq 16(%rsi), %rax + mulq %rax + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + movq %r8, 32(%rdi) + # A[2] * A[3] + movq 24(%rsi), %rax + mulq 16(%rsi) + xorq %r8, %r8 + addq %rax, %r9 + adcq %rdx, %rcx + adcq $0, %r8 + addq %rax, %r9 + adcq %rdx, %rcx + adcq $0, %r8 + movq %r9, 40(%rdi) + # A[3] * A[3] + movq 24(%rsi), %rax + mulq %rax + addq %rax, %rcx + adcq %rdx, %r8 + movq %rcx, 48(%rdi) + movq %r8, 56(%rdi) + movq (%rsp), %rax + movq 8(%rsp), %rdx + movq 16(%rsp), %r10 + movq 24(%rsp), %r11 + movq %rax, (%rdi) + movq %rdx, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + addq $32, %rsp + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_256_sqr_4,.-sp_256_sqr_4 +#endif /* __APPLE__ */ +/* Square a and put result in r. (r = a * a) + * + * r Result of squaring. + * a Number to square in Montogmery form. + */ +#ifndef __APPLE__ +.globl sp_256_sqr_avx2_4 +.type sp_256_sqr_avx2_4,@function +.align 16 +sp_256_sqr_avx2_4: +#else +.globl _sp_256_sqr_avx2_4 +.p2align 4 +_sp_256_sqr_avx2_4: +#endif /* __APPLE__ */ + push %r12 + push %r13 + push %r14 + push %r15 + push %rbx + # A[0] * A[1] + movq (%rsi), %rdx + movq 16(%rsi), %r15 + mulxq 8(%rsi), %r9, %r10 + # A[0] * A[3] + mulxq 24(%rsi), %r11, %r12 + # A[2] * A[1] + movq %r15, %rdx + mulxq 8(%rsi), %rcx, %rbx + # A[2] * A[3] + mulxq 24(%rsi), %r13, %r14 + xorq %r15, %r15 + adoxq %rcx, %r11 + adoxq %rbx, %r12 + # A[2] * A[0] + mulxq (%rsi), %rcx, %rbx + # A[1] * A[3] + movq 8(%rsi), %rdx + adoxq %r15, %r13 + mulxq 24(%rsi), %rax, %r8 + adcxq %rcx, %r10 + adoxq %r15, %r14 + adcxq %rbx, %r11 + adcxq %rax, %r12 + adcxq %r8, %r13 + adcxq %r15, %r14 + # Double with Carry Flag + xorq %r15, %r15 + # A[0] * A[0] + movq (%rsi), %rdx + mulxq %rdx, %r8, %rax + adcxq %r9, %r9 + adcxq %r10, %r10 + adoxq %rax, %r9 + # A[1] * A[1] + movq 8(%rsi), %rdx + mulxq %rdx, %rcx, %rbx + adcxq %r11, %r11 + adoxq %rcx, %r10 + # A[2] * A[2] + movq 16(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adcxq %r12, %r12 + adoxq %rbx, %r11 + adcxq %r13, %r13 + adoxq %rax, %r12 + adcxq %r14, %r14 + # A[3] * A[3] + movq 24(%rsi), %rdx + mulxq %rdx, %rax, %rbx + adoxq %rcx, %r13 + adcxq %r15, %r15 + adoxq %rax, %r14 + adoxq %rbx, %r15 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq %r12, 32(%rdi) + movq %r13, 40(%rdi) + movq %r14, 48(%rdi) + movq %r15, 56(%rdi) + pop %rbx + pop %r15 + pop %r14 + pop %r13 + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_256_sqr_avx2_4,.-sp_256_sqr_avx2_4 +#endif /* __APPLE__ */ +#ifdef HAVE_INTEL_AVX2 +/* Reduce the number back to 256 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +#ifndef __APPLE__ +.globl sp_256_mont_reduce_avx2_4 +.type sp_256_mont_reduce_avx2_4,@function +.align 16 +sp_256_mont_reduce_avx2_4: +#else +.globl _sp_256_mont_reduce_avx2_4 +.p2align 4 +_sp_256_mont_reduce_avx2_4: +#endif /* __APPLE__ */ + push %r12 + push %r13 + push %r14 + push %r15 + push %rbx + movq %rdx, %rax + movq (%rdi), %r12 + movq 8(%rdi), %r13 + movq 16(%rdi), %r14 + movq 24(%rdi), %r15 + xorq %r11, %r11 + xorq %r10, %r10 + # a[0-4] += m[0-3] * mu = m[0-3] * (a[0] * mp) + movq 32(%rdi), %rbx + # mu = a[0] * mp + movq %r12, %rdx + mulxq %rax, %rdx, %rcx + # a[0] += m[0] * mu + mulx (%rsi), %r8, %r9 + adcxq %r8, %r12 + # a[1] += m[1] * mu + mulx 8(%rsi), %r8, %rcx + adoxq %r9, %r13 + adcxq %r8, %r13 + # a[2] += m[2] * mu + mulx 16(%rsi), %r8, %r9 + adoxq %rcx, %r14 + adcxq %r8, %r14 + # a[3] += m[3] * mu + mulx 24(%rsi), %r8, %rcx + adoxq %r9, %r15 + adcxq %r8, %r15 + # a[4] += carry + adoxq %rcx, %rbx + adcxq %r10, %rbx + # carry + adoxq %r10, %r11 + adcxq %r10, %r11 + # a[1-5] += m[0-3] * mu = m[0-3] * (a[1] * mp) + movq 40(%rdi), %r12 + # mu = a[1] * mp + movq %r13, %rdx + mulxq %rax, %rdx, %rcx + # a[1] += m[0] * mu + mulx (%rsi), %r8, %r9 + adcxq %r8, %r13 + # a[2] += m[1] * mu + mulx 8(%rsi), %r8, %rcx + adoxq %r9, %r14 + adcxq %r8, %r14 + # a[3] += m[2] * mu + mulx 16(%rsi), %r8, %r9 + adoxq %rcx, %r15 + adcxq %r8, %r15 + # a[4] += m[3] * mu + mulx 24(%rsi), %r8, %rcx + adoxq %r9, %rbx + adcxq %r8, %rbx + # a[5] += carry + adoxq %rcx, %r12 + adcxq %r11, %r12 + movq %r10, %r11 + # carry + adoxq %r10, %r11 + adcxq %r10, %r11 + # a[2-6] += m[0-3] * mu = m[0-3] * (a[2] * mp) + movq 48(%rdi), %r13 + # mu = a[2] * mp + movq %r14, %rdx + mulxq %rax, %rdx, %rcx + # a[2] += m[0] * mu + mulx (%rsi), %r8, %r9 + adcxq %r8, %r14 + # a[3] += m[1] * mu + mulx 8(%rsi), %r8, %rcx + adoxq %r9, %r15 + adcxq %r8, %r15 + # a[4] += m[2] * mu + mulx 16(%rsi), %r8, %r9 + adoxq %rcx, %rbx + adcxq %r8, %rbx + # a[5] += m[3] * mu + mulx 24(%rsi), %r8, %rcx + adoxq %r9, %r12 + adcxq %r8, %r12 + # a[6] += carry + adoxq %rcx, %r13 + adcxq %r11, %r13 + movq %r10, %r11 + # carry + adoxq %r10, %r11 + adcxq %r10, %r11 + # a[3-7] += m[0-3] * mu = m[0-3] * (a[3] * mp) + movq 56(%rdi), %r14 + # mu = a[3] * mp + movq %r15, %rdx + mulxq %rax, %rdx, %rcx + # a[3] += m[0] * mu + mulx (%rsi), %r8, %r9 + adcxq %r8, %r15 + # a[4] += m[1] * mu + mulx 8(%rsi), %r8, %rcx + adoxq %r9, %rbx + adcxq %r8, %rbx + # a[5] += m[2] * mu + mulx 16(%rsi), %r8, %r9 + adoxq %rcx, %r12 + adcxq %r8, %r12 + # a[6] += m[3] * mu + mulx 24(%rsi), %r8, %rcx + adoxq %r9, %r13 + adcxq %r8, %r13 + # a[7] += carry + adoxq %rcx, %r14 + adcxq %r11, %r14 + movq %r10, %r11 + # carry + adoxq %r10, %r11 + adcxq %r10, %r11 + # Subtract mod if carry + negq %r11 + movq $17562291160714782033, %r8 + movq $13611842547513532036, %r9 + movq $18446744069414584320, %rdx + andq %r11, %r8 + andq %r11, %r9 + andq %r11, %rdx + subq %r8, %rbx + sbbq %r9, %r12 + sbbq %r11, %r13 + sbbq %rdx, %r14 + movq %rbx, (%rdi) + movq %r12, 8(%rdi) + movq %r13, 16(%rdi) + movq %r14, 24(%rdi) + pop %rbx + pop %r15 + pop %r14 + pop %r13 + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_256_mont_reduce_avx2_4,.-sp_256_mont_reduce_avx2_4 +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX2 */ +#endif /* !WOLFSSL_SP_NO_256 */ +#ifdef WOLFSSL_SP_384 +/* Conditionally copy a into r using the mask m. + * m is -1 to copy and 0 when not. + * + * r A single precision number to copy over. + * a A single precision number to copy. + * m Mask value to apply. + */ +#ifndef __APPLE__ +.globl sp_384_cond_copy_6 +.type sp_384_cond_copy_6,@function +.align 16 +sp_384_cond_copy_6: +#else +.globl _sp_384_cond_copy_6 +.p2align 4 +_sp_384_cond_copy_6: +#endif /* __APPLE__ */ + movq (%rdi), %rax + movq 8(%rdi), %rcx + movq 16(%rdi), %r8 + movq 24(%rdi), %r9 + movq 32(%rdi), %r10 + movq 40(%rdi), %r11 + xorq (%rsi), %rax + xorq 8(%rsi), %rcx + xorq 16(%rsi), %r8 + xorq 24(%rsi), %r9 + xorq 32(%rsi), %r10 + xorq 40(%rsi), %r11 + andq %rdx, %rax + andq %rdx, %rcx + andq %rdx, %r8 + andq %rdx, %r9 + andq %rdx, %r10 + andq %rdx, %r11 + xorq %rax, (%rdi) + xorq %rcx, 8(%rdi) + xorq %r8, 16(%rdi) + xorq %r9, 24(%rdi) + xorq %r10, 32(%rdi) + xorq %r11, 40(%rdi) + repz retq +#ifndef __APPLE__ +.size sp_384_cond_copy_6,.-sp_384_cond_copy_6 +#endif /* __APPLE__ */ +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_384_mul_6 +.type sp_384_mul_6,@function +.align 16 +sp_384_mul_6: +#else +.globl _sp_384_mul_6 +.p2align 4 +_sp_384_mul_6: +#endif /* __APPLE__ */ + movq %rdx, %rcx + subq $48, %rsp + # A[0] * B[0] + movq (%rcx), %rax + mulq (%rsi) + xorq %r10, %r10 + movq %rax, (%rsp) + movq %rdx, %r9 + # A[0] * B[1] + movq 8(%rcx), %rax + mulq (%rsi) + xorq %r8, %r8 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[1] * B[0] + movq (%rcx), %rax + mulq 8(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + movq %r9, 8(%rsp) + # A[0] * B[2] + movq 16(%rcx), %rax + mulq (%rsi) + xorq %r9, %r9 + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[1] * B[1] + movq 8(%rcx), %rax + mulq 8(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[2] * B[0] + movq (%rcx), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + movq %r10, 16(%rsp) + # A[0] * B[3] + movq 24(%rcx), %rax + mulq (%rsi) + xorq %r10, %r10 + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[1] * B[2] + movq 16(%rcx), %rax + mulq 8(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[2] * B[1] + movq 8(%rcx), %rax + mulq 16(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[3] * B[0] + movq (%rcx), %rax + mulq 24(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + movq %r8, 24(%rsp) + # A[0] * B[4] + movq 32(%rcx), %rax + mulq (%rsi) + xorq %r8, %r8 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[1] * B[3] + movq 24(%rcx), %rax + mulq 8(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[2] * B[2] + movq 16(%rcx), %rax + mulq 16(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[3] * B[1] + movq 8(%rcx), %rax + mulq 24(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[4] * B[0] + movq (%rcx), %rax + mulq 32(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + movq %r9, 32(%rsp) + # A[0] * B[5] + movq 40(%rcx), %rax + mulq (%rsi) + xorq %r9, %r9 + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[1] * B[4] + movq 32(%rcx), %rax + mulq 8(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[2] * B[3] + movq 24(%rcx), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[3] * B[2] + movq 16(%rcx), %rax + mulq 24(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[4] * B[1] + movq 8(%rcx), %rax + mulq 32(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[5] * B[0] + movq (%rcx), %rax + mulq 40(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + movq %r10, 40(%rsp) + # A[1] * B[5] + movq 40(%rcx), %rax + mulq 8(%rsi) + xorq %r10, %r10 + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[2] * B[4] + movq 32(%rcx), %rax + mulq 16(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[3] * B[3] + movq 24(%rcx), %rax + mulq 24(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[4] * B[2] + movq 16(%rcx), %rax + mulq 32(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[5] * B[1] + movq 8(%rcx), %rax + mulq 40(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + movq %r8, 48(%rdi) + # A[2] * B[5] + movq 40(%rcx), %rax + mulq 16(%rsi) + xorq %r8, %r8 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[3] * B[4] + movq 32(%rcx), %rax + mulq 24(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[4] * B[3] + movq 24(%rcx), %rax + mulq 32(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + # A[5] * B[2] + movq 16(%rcx), %rax + mulq 40(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0, %r8 + movq %r9, 56(%rdi) + # A[3] * B[5] + movq 40(%rcx), %rax + mulq 24(%rsi) + xorq %r9, %r9 + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[4] * B[4] + movq 32(%rcx), %rax + mulq 32(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + # A[5] * B[3] + movq 24(%rcx), %rax + mulq 40(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + adcq $0, %r9 + movq %r10, 64(%rdi) + # A[4] * B[5] + movq 40(%rcx), %rax + mulq 32(%rsi) + xorq %r10, %r10 + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + # A[5] * B[4] + movq 32(%rcx), %rax + mulq 40(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %r10 + movq %r8, 72(%rdi) + # A[5] * B[5] + movq 40(%rcx), %rax + mulq 40(%rsi) + addq %rax, %r9 + adcq %rdx, %r10 + movq %r9, 80(%rdi) + movq %r10, 88(%rdi) + movq (%rsp), %rax + movq 8(%rsp), %rdx + movq 16(%rsp), %r8 + movq 24(%rsp), %r9 + movq %rax, (%rdi) + movq %rdx, 8(%rdi) + movq %r8, 16(%rdi) + movq %r9, 24(%rdi) + movq 32(%rsp), %rax + movq 40(%rsp), %rdx + movq %rax, 32(%rdi) + movq %rdx, 40(%rdi) + addq $48, %rsp + repz retq +#ifndef __APPLE__ +.size sp_384_mul_6,.-sp_384_mul_6 +#endif /* __APPLE__ */ +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +#ifndef __APPLE__ +.globl sp_384_cond_sub_6 +.type sp_384_cond_sub_6,@function +.align 16 +sp_384_cond_sub_6: +#else +.globl _sp_384_cond_sub_6 +.p2align 4 +_sp_384_cond_sub_6: +#endif /* __APPLE__ */ + subq $48, %rsp + movq $0, %rax + movq (%rdx), %r8 + movq 8(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, (%rsp) + movq %r9, 8(%rsp) + movq 16(%rdx), %r8 + movq 24(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 16(%rsp) + movq %r9, 24(%rsp) + movq 32(%rdx), %r8 + movq 40(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 32(%rsp) + movq %r9, 40(%rsp) + movq (%rsi), %r8 + movq (%rsp), %rdx + subq %rdx, %r8 + movq 8(%rsi), %r9 + movq 8(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, (%rdi) + movq 16(%rsi), %r8 + movq 16(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 8(%rdi) + movq 24(%rsi), %r9 + movq 24(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 16(%rdi) + movq 32(%rsi), %r8 + movq 32(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 24(%rdi) + movq 40(%rsi), %r9 + movq 40(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 32(%rdi) + movq %r9, 40(%rdi) + sbbq $0, %rax + addq $48, %rsp + repz retq +#ifndef __APPLE__ +.size sp_384_cond_sub_6,.-sp_384_cond_sub_6 +#endif /* __APPLE__ */ +#ifdef HAVE_INTEL_AVX2 +/* Reduce the number back to 384 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +#ifndef __APPLE__ +.globl sp_384_mont_reduce_6 +.type sp_384_mont_reduce_6,@function +.align 16 +sp_384_mont_reduce_6: +#else +.globl _sp_384_mont_reduce_6 +.p2align 4 +_sp_384_mont_reduce_6: +#endif /* __APPLE__ */ + push %r12 + push %r13 + push %r14 + push %r15 + push %rbx + push %rbp + movq (%rdi), %r11 + movq 8(%rdi), %r12 + movq 16(%rdi), %r13 + movq 24(%rdi), %r14 + movq 32(%rdi), %r15 + movq 40(%rdi), %rsi + xorq %r10, %r10 + # a[0-7] += m[0-5] * mu[0..1] = m[0-5] * (a[0..1] * mp) + movq 48(%rdi), %rbx + movq 56(%rdi), %rbp + movq %r11, %rdx + movq %r12, %rax + shldq $32, %rdx, %rax + shlq $32, %rdx + addq %r11, %rdx + adcq %r12, %rax + addq %r11, %rax + movq %rdx, %rcx + movq %rax, %r8 + movq %rax, %r9 + shldq $32, %rcx, %r8 + shlq $32, %rcx + shrq $32, %r9 + addq %rcx, %r11 + adcq %r8, %r12 + adcq %r9, %r13 + adcq $0, %r14 + adcq $0, %r15 + adcq $0, %rsi + adcq %rdx, %rbx + adcq %rax, %rbp + adcq $0, %r10 + addq %rax, %rcx + adcq %rdx, %r8 + adcq %rax, %r9 + movq $0, %rax + adcq $0, %rax + subq %r8, %r13 + sbbq %r9, %r14 + sbbq %rax, %r15 + sbbq $0, %rsi + sbbq $0, %rbx + sbbq $0, %rbp + sbbq $0, %r10 + # a[2-9] += m[0-5] * mu[0..1] = m[0-5] * (a[2..3] * mp) + movq 64(%rdi), %r11 + movq 72(%rdi), %r12 + movq %r13, %rdx + movq %r14, %rax + shldq $32, %rdx, %rax + shlq $32, %rdx + addq %r13, %rdx + adcq %r14, %rax + addq %r13, %rax + movq %rdx, %rcx + movq %rax, %r8 + movq %rax, %r9 + shldq $32, %rcx, %r8 + shlq $32, %rcx + shrq $32, %r9 + addq %r10, %r11 + adcq $0, %r12 + movq $0, %r10 + adcq $0, %r10 + addq %rcx, %r13 + adcq %r8, %r14 + adcq %r9, %r15 + adcq $0, %rsi + adcq $0, %rbx + adcq $0, %rbp + adcq %rdx, %r11 + adcq %rax, %r12 + adcq $0, %r10 + addq %rax, %rcx + adcq %rdx, %r8 + adcq %rax, %r9 + movq $0, %rax + adcq $0, %rax + subq %r8, %r15 + sbbq %r9, %rsi + sbbq %rax, %rbx + sbbq $0, %rbp + sbbq $0, %r11 + sbbq $0, %r12 + sbbq $0, %r10 + # a[4-11] += m[0-5] * mu[0..1] = m[0-5] * (a[4..5] * mp) + movq 80(%rdi), %r13 + movq 88(%rdi), %r14 + movq %r15, %rdx + movq %rsi, %rax + shldq $32, %rdx, %rax + shlq $32, %rdx + addq %r15, %rdx + adcq %rsi, %rax + addq %r15, %rax + movq %rdx, %rcx + movq %rax, %r8 + movq %rax, %r9 + shldq $32, %rcx, %r8 + shlq $32, %rcx + shrq $32, %r9 + addq %r10, %r13 + adcq $0, %r14 + movq $0, %r10 + adcq $0, %r10 + addq %rcx, %r15 + adcq %r8, %rsi + adcq %r9, %rbx + adcq $0, %rbp + adcq $0, %r11 + adcq $0, %r12 + adcq %rdx, %r13 + adcq %rax, %r14 + adcq $0, %r10 + addq %rax, %rcx + adcq %rdx, %r8 + adcq %rax, %r9 + movq $0, %rax + adcq $0, %rax + subq %r8, %rbx + sbbq %r9, %rbp + sbbq %rax, %r11 + sbbq $0, %r12 + sbbq $0, %r13 + sbbq $0, %r14 + sbbq $0, %r10 + # Subtract mod if carry + negq %r10 + movq $18446744073709551614, %r9 + movq %r10, %rcx + movq %r10, %r8 + shrq $32, %rcx + shlq $32, %r8 + andq %r10, %r9 + subq %rcx, %rbx + sbbq %r8, %rbp + sbbq %r9, %r11 + sbbq %r10, %r12 + sbbq %r10, %r13 + sbbq %r10, %r14 + movq %rbx, (%rdi) + movq %rbp, 8(%rdi) + movq %r11, 16(%rdi) + movq %r12, 24(%rdi) + movq %r13, 32(%rdi) + movq %r14, 40(%rdi) + pop %rbp + pop %rbx + pop %r15 + pop %r14 + pop %r13 + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_384_mont_reduce_6,.-sp_384_mont_reduce_6 +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX2 */ +/* Reduce the number back to 384 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +#ifndef __APPLE__ +.globl sp_384_mont_reduce_order_6 +.type sp_384_mont_reduce_order_6,@function +.align 16 +sp_384_mont_reduce_order_6: +#else +.globl _sp_384_mont_reduce_order_6 +.p2align 4 +_sp_384_mont_reduce_order_6: +#endif /* __APPLE__ */ + push %r12 + push %r13 + push %r14 + push %r15 + movq %rdx, %rcx + xorq %r15, %r15 + # i = 6 + movq $6, %r8 + movq (%rdi), %r13 + movq 8(%rdi), %r14 +L_mont_loop_order_6: + # mu = a[i] * mp + movq %r13, %r11 + imulq %rcx, %r11 + # a[i+0] += m[0] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq (%rsi) + addq %rax, %r13 + adcq %rdx, %r10 + # a[i+1] += m[1] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 8(%rsi) + movq %r14, %r13 + addq %rax, %r13 + adcq %rdx, %r9 + addq %r10, %r13 + adcq $0, %r9 + # a[i+2] += m[2] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 16(%rsi) + movq 16(%rdi), %r14 + addq %rax, %r14 + adcq %rdx, %r10 + addq %r9, %r14 + adcq $0, %r10 + # a[i+3] += m[3] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 24(%rsi) + movq 24(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 24(%rdi) + adcq $0, %r9 + # a[i+4] += m[4] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 32(%rsi) + movq 32(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 32(%rdi) + adcq $0, %r10 + # a[i+5] += m[5] * mu + movq %r11, %rax + mulq 40(%rsi) + movq 40(%rdi), %r12 + addq %rax, %r10 + adcq %r15, %rdx + movq $0, %r15 + adcq $0, %r15 + addq %r10, %r12 + movq %r12, 40(%rdi) + adcq %rdx, 48(%rdi) + adcq $0, %r15 + # i -= 1 + addq $8, %rdi + decq %r8 + jnz L_mont_loop_order_6 + movq %r13, (%rdi) + movq %r14, 8(%rdi) + negq %r15 + movq %r15, %rcx + movq %rsi, %rdx + movq %rdi, %rsi + subq $48, %rdi +#ifndef __APPLE__ + callq sp_384_cond_sub_6@plt +#else + callq _sp_384_cond_sub_6 +#endif /* __APPLE__ */ + pop %r15 + pop %r14 + pop %r13 + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_384_mont_reduce_order_6,.-sp_384_mont_reduce_order_6 +#endif /* __APPLE__ */ +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_384_sqr_6 +.type sp_384_sqr_6,@function +.align 16 +sp_384_sqr_6: +#else +.globl _sp_384_sqr_6 +.p2align 4 +_sp_384_sqr_6: +#endif /* __APPLE__ */ + push %r12 + subq $48, %rsp + # A[0] * A[0] + movq (%rsi), %rax + mulq %rax + xorq %r9, %r9 + movq %rax, (%rsp) + movq %rdx, %r8 + # A[0] * A[1] + movq 8(%rsi), %rax + mulq (%rsi) + xorq %rcx, %rcx + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + movq %r8, 8(%rsp) + # A[0] * A[2] + movq 16(%rsi), %rax + mulq (%rsi) + xorq %r8, %r8 + addq %rax, %r9 + adcq %rdx, %rcx + adcq $0, %r8 + addq %rax, %r9 + adcq %rdx, %rcx + adcq $0, %r8 + # A[1] * A[1] + movq 8(%rsi), %rax + mulq %rax + addq %rax, %r9 + adcq %rdx, %rcx + adcq $0, %r8 + movq %r9, 16(%rsp) + # A[0] * A[3] + movq 24(%rsi), %rax + mulq (%rsi) + xorq %r9, %r9 + addq %rax, %rcx + adcq %rdx, %r8 + adcq $0, %r9 + addq %rax, %rcx + adcq %rdx, %r8 + adcq $0, %r9 + # A[1] * A[2] + movq 16(%rsi), %rax + mulq 8(%rsi) + addq %rax, %rcx + adcq %rdx, %r8 + adcq $0, %r9 + addq %rax, %rcx + adcq %rdx, %r8 + adcq $0, %r9 + movq %rcx, 24(%rsp) + # A[0] * A[4] + movq 32(%rsi), %rax + mulq (%rsi) + xorq %rcx, %rcx + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + # A[1] * A[3] + movq 24(%rsi), %rax + mulq 8(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + # A[2] * A[2] + movq 16(%rsi), %rax + mulq %rax + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + movq %r8, 32(%rsp) + # A[0] * A[5] + movq 40(%rsi), %rax + mulq (%rsi) + xorq %r8, %r8 + xorq %r12, %r12 + movq %rax, %r10 + movq %rdx, %r11 + # A[1] * A[4] + movq 32(%rsi), %rax + mulq 8(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + # A[2] * A[3] + movq 24(%rsi), %rax + mulq 16(%rsi) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0, %r12 + addq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + addq %r10, %r9 + adcq %r11, %rcx + adcq %r12, %r8 + movq %r9, 40(%rsp) + # A[1] * A[5] + movq 40(%rsi), %rax + mulq 8(%rsi) + xorq %r9, %r9 + addq %rax, %rcx + adcq %rdx, %r8 + adcq $0, %r9 + addq %rax, %rcx + adcq %rdx, %r8 + adcq $0, %r9 + # A[2] * A[4] + movq 32(%rsi), %rax + mulq 16(%rsi) + addq %rax, %rcx + adcq %rdx, %r8 + adcq $0, %r9 + addq %rax, %rcx + adcq %rdx, %r8 + adcq $0, %r9 + # A[3] * A[3] + movq 24(%rsi), %rax + mulq %rax + addq %rax, %rcx + adcq %rdx, %r8 + adcq $0, %r9 + movq %rcx, 48(%rdi) + # A[2] * A[5] + movq 40(%rsi), %rax + mulq 16(%rsi) + xorq %rcx, %rcx + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + # A[3] * A[4] + movq 32(%rsi), %rax + mulq 24(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + addq %rax, %r8 + adcq %rdx, %r9 + adcq $0, %rcx + movq %r8, 56(%rdi) + # A[3] * A[5] + movq 40(%rsi), %rax + mulq 24(%rsi) + xorq %r8, %r8 + addq %rax, %r9 + adcq %rdx, %rcx + adcq $0, %r8 + addq %rax, %r9 + adcq %rdx, %rcx + adcq $0, %r8 + # A[4] * A[4] + movq 32(%rsi), %rax + mulq %rax + addq %rax, %r9 + adcq %rdx, %rcx + adcq $0, %r8 + movq %r9, 64(%rdi) + # A[4] * A[5] + movq 40(%rsi), %rax + mulq 32(%rsi) + xorq %r9, %r9 + addq %rax, %rcx + adcq %rdx, %r8 + adcq $0, %r9 + addq %rax, %rcx + adcq %rdx, %r8 + adcq $0, %r9 + movq %rcx, 72(%rdi) + # A[5] * A[5] + movq 40(%rsi), %rax + mulq %rax + addq %rax, %r8 + adcq %rdx, %r9 + movq %r8, 80(%rdi) + movq %r9, 88(%rdi) + movq (%rsp), %rax + movq 8(%rsp), %rdx + movq 16(%rsp), %r10 + movq 24(%rsp), %r11 + movq %rax, (%rdi) + movq %rdx, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq 32(%rsp), %rax + movq 40(%rsp), %rdx + movq %rax, 32(%rdi) + movq %rdx, 40(%rdi) + addq $48, %rsp + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_384_sqr_6,.-sp_384_sqr_6 +#endif /* __APPLE__ */ +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +#ifndef __APPLE__ +.globl sp_384_cmp_6 +.type sp_384_cmp_6,@function +.align 16 +sp_384_cmp_6: +#else +.globl _sp_384_cmp_6 +.p2align 4 +_sp_384_cmp_6: +#endif /* __APPLE__ */ + xorq %rcx, %rcx + movq $-1, %rdx + movq $-1, %rax + movq $1, %r8 + movq 40(%rdi), %r9 + movq 40(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 32(%rdi), %r9 + movq 32(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 24(%rdi), %r9 + movq 24(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 16(%rdi), %r9 + movq 16(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 8(%rdi), %r9 + movq 8(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq (%rdi), %r9 + movq (%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + xorq %rdx, %rax + repz retq +#ifndef __APPLE__ +.size sp_384_cmp_6,.-sp_384_cmp_6 +#endif /* __APPLE__ */ +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_384_add_6 +.type sp_384_add_6,@function +.align 16 +sp_384_add_6: +#else +.globl _sp_384_add_6 +.p2align 4 +_sp_384_add_6: +#endif /* __APPLE__ */ + # Add + movq (%rsi), %rcx + xorq %rax, %rax + addq (%rdx), %rcx + movq 8(%rsi), %r8 + movq %rcx, (%rdi) + adcq 8(%rdx), %r8 + movq 16(%rsi), %rcx + movq %r8, 8(%rdi) + adcq 16(%rdx), %rcx + movq 24(%rsi), %r8 + movq %rcx, 16(%rdi) + adcq 24(%rdx), %r8 + movq 32(%rsi), %rcx + movq %r8, 24(%rdi) + adcq 32(%rdx), %rcx + movq 40(%rsi), %r8 + movq %rcx, 32(%rdi) + adcq 40(%rdx), %r8 + movq %r8, 40(%rdi) + adcq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_384_add_6,.-sp_384_add_6 +#endif /* __APPLE__ */ +/* Add a to a into r. (r = a + a) + * + * r A single precision integer. + * a A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_384_dbl_6 +.type sp_384_dbl_6,@function +.align 16 +sp_384_dbl_6: +#else +.globl _sp_384_dbl_6 +.p2align 4 +_sp_384_dbl_6: +#endif /* __APPLE__ */ + movq (%rsi), %rdx + xorq %rax, %rax + addq %rdx, %rdx + movq 8(%rsi), %rcx + movq %rdx, (%rdi) + adcq %rcx, %rcx + movq 16(%rsi), %rdx + movq %rcx, 8(%rdi) + adcq %rdx, %rdx + movq 24(%rsi), %rcx + movq %rdx, 16(%rdi) + adcq %rcx, %rcx + movq 32(%rsi), %rdx + movq %rcx, 24(%rdi) + adcq %rdx, %rdx + movq 40(%rsi), %rcx + movq %rdx, 32(%rdi) + adcq %rcx, %rcx + movq %rcx, 40(%rdi) + adcq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_384_dbl_6,.-sp_384_dbl_6 +#endif /* __APPLE__ */ +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_384_sub_6 +.type sp_384_sub_6,@function +.align 16 +sp_384_sub_6: +#else +.globl _sp_384_sub_6 +.p2align 4 +_sp_384_sub_6: +#endif /* __APPLE__ */ + push %r12 + xorq %rax, %rax + movq (%rsi), %rcx + movq 8(%rsi), %r8 + movq 16(%rsi), %r9 + movq 24(%rsi), %r10 + movq 32(%rsi), %r11 + movq 40(%rsi), %r12 + subq (%rdx), %rcx + sbbq 8(%rdx), %r8 + sbbq 16(%rdx), %r9 + sbbq 24(%rdx), %r10 + sbbq 32(%rdx), %r11 + sbbq 40(%rdx), %r12 + movq %rcx, (%rdi) + movq %r8, 8(%rdi) + movq %r9, 16(%rdi) + movq %r10, 24(%rdi) + movq %r11, 32(%rdi) + movq %r12, 40(%rdi) + sbbq $0, %rax + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_384_sub_6,.-sp_384_sub_6 +#endif /* __APPLE__ */ +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +#ifndef __APPLE__ +.globl sp_384_cond_add_6 +.type sp_384_cond_add_6,@function +.align 16 +sp_384_cond_add_6: +#else +.globl _sp_384_cond_add_6 +.p2align 4 +_sp_384_cond_add_6: +#endif /* __APPLE__ */ + subq $48, %rsp + movq $0, %rax + movq (%rdx), %r8 + movq 8(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, (%rsp) + movq %r9, 8(%rsp) + movq 16(%rdx), %r8 + movq 24(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 16(%rsp) + movq %r9, 24(%rsp) + movq 32(%rdx), %r8 + movq 40(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 32(%rsp) + movq %r9, 40(%rsp) + movq (%rsi), %r8 + movq (%rsp), %rdx + addq %rdx, %r8 + movq 8(%rsi), %r9 + movq 8(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, (%rdi) + movq 16(%rsi), %r8 + movq 16(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 8(%rdi) + movq 24(%rsi), %r9 + movq 24(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 16(%rdi) + movq 32(%rsi), %r8 + movq 32(%rsp), %rdx + adcq %rdx, %r8 + movq %r9, 24(%rdi) + movq 40(%rsi), %r9 + movq 40(%rsp), %rdx + adcq %rdx, %r9 + movq %r8, 32(%rdi) + movq %r9, 40(%rdi) + adcq $0, %rax + addq $48, %rsp + repz retq +#ifndef __APPLE__ +.size sp_384_cond_add_6,.-sp_384_cond_add_6 +#endif /* __APPLE__ */ +/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) + * + * r Result of division by 2. + * a Number to divide. + * m Modulus (prime). + */ +#ifndef __APPLE__ +.globl sp_384_div2_6 +.type sp_384_div2_6,@function +.align 16 +sp_384_div2_6: +#else +.globl _sp_384_div2_6 +.p2align 4 +_sp_384_div2_6: +#endif /* __APPLE__ */ + subq $48, %rsp + movq (%rsi), %rax + movq %rax, %r11 + andq $1, %r11 + negq %r11 + xorq %r10, %r10 + movq (%rdx), %r8 + andq %r11, %r8 + movq %r8, (%rsp) + movq 8(%rdx), %r8 + andq %r11, %r8 + movq %r8, 8(%rsp) + movq 16(%rdx), %r8 + andq %r11, %r8 + movq %r8, 16(%rsp) + movq 24(%rdx), %r8 + andq %r11, %r8 + movq %r8, 24(%rsp) + movq 32(%rdx), %r8 + andq %r11, %r8 + movq %r8, 32(%rsp) + movq 40(%rdx), %r8 + andq %r11, %r8 + movq %r8, 40(%rsp) + addq %rax, (%rsp) + movq 8(%rsi), %rax + adcq %rax, 8(%rsp) + movq 16(%rsi), %rax + adcq %rax, 16(%rsp) + movq 24(%rsi), %rax + adcq %rax, 24(%rsp) + movq 32(%rsi), %rax + adcq %rax, 32(%rsp) + movq 40(%rsi), %rax + adcq %rax, 40(%rsp) + adcq $0, %r10 + movq (%rsp), %rax + movq 8(%rsp), %rcx + shrdq $1, %rcx, %rax + movq %rax, (%rdi) + movq 16(%rsp), %rax + shrdq $1, %rax, %rcx + movq %rcx, 8(%rdi) + movq 24(%rsp), %rcx + shrdq $1, %rcx, %rax + movq %rax, 16(%rdi) + movq 32(%rsp), %rax + shrdq $1, %rax, %rcx + movq %rcx, 24(%rdi) + movq 40(%rsp), %rcx + shrdq $1, %rcx, %rax + movq %rax, 32(%rdi) + shrdq $1, %r10, %rcx + movq %rcx, 40(%rdi) + addq $48, %rsp + repz retq +#ifndef __APPLE__ +.size sp_384_div2_6,.-sp_384_div2_6 +#endif /* __APPLE__ */ +/* Multiply a and b into r. (r = a * b) + * + * r Result of multiplication. + * a First number to multiply. + * b Second number to multiply. + */ +#ifndef __APPLE__ +.globl sp_384_mul_avx2_6 +.type sp_384_mul_avx2_6,@function +.align 16 +sp_384_mul_avx2_6: +#else +.globl _sp_384_mul_avx2_6 +.p2align 4 +_sp_384_mul_avx2_6: +#endif /* __APPLE__ */ + push %r12 + push %r13 + push %r14 + push %r15 + push %rbx + movq %rdx, %rax + subq $40, %rsp + xorq %rbx, %rbx + movq (%rsi), %rdx + # A[0] * B[0] + mulxq (%rax), %r9, %r10 + # A[0] * B[1] + mulxq 8(%rax), %rcx, %r11 + adcxq %rcx, %r10 + # A[0] * B[2] + mulxq 16(%rax), %rcx, %r12 + adcxq %rcx, %r11 + # A[0] * B[3] + mulxq 24(%rax), %rcx, %r13 + adcxq %rcx, %r12 + # A[0] * B[4] + mulxq 32(%rax), %rcx, %r14 + adcxq %rcx, %r13 + # A[0] * B[5] + mulxq 40(%rax), %rcx, %r15 + adcxq %rcx, %r14 + adcxq %rbx, %r15 + movq %r9, (%rsp) + movq $0, %r9 + adcxq %rbx, %r9 + xorq %rbx, %rbx + movq 8(%rsi), %rdx + # A[1] * B[0] + mulxq (%rax), %rcx, %r8 + adcxq %rcx, %r10 + adoxq %r8, %r11 + # A[1] * B[1] + mulxq 8(%rax), %rcx, %r8 + adcxq %rcx, %r11 + adoxq %r8, %r12 + # A[1] * B[2] + mulxq 16(%rax), %rcx, %r8 + adcxq %rcx, %r12 + adoxq %r8, %r13 + # A[1] * B[3] + mulxq 24(%rax), %rcx, %r8 + adcxq %rcx, %r13 + adoxq %r8, %r14 + # A[1] * B[4] + mulxq 32(%rax), %rcx, %r8 + adcxq %rcx, %r14 + adoxq %r8, %r15 + # A[1] * B[5] + mulxq 40(%rax), %rcx, %r8 + adcxq %rcx, %r15 + adoxq %r8, %r9 + adcxq %rbx, %r9 + movq %r10, 8(%rsp) + movq $0, %r10 + adcxq %rbx, %r10 + adoxq %rbx, %r10 + xorq %rbx, %rbx + movq 16(%rsi), %rdx + # A[2] * B[0] + mulxq (%rax), %rcx, %r8 + adcxq %rcx, %r11 + adoxq %r8, %r12 + # A[2] * B[1] + mulxq 8(%rax), %rcx, %r8 + adcxq %rcx, %r12 + adoxq %r8, %r13 + # A[2] * B[2] + mulxq 16(%rax), %rcx, %r8 + adcxq %rcx, %r13 + adoxq %r8, %r14 + # A[2] * B[3] + mulxq 24(%rax), %rcx, %r8 + adcxq %rcx, %r14 + adoxq %r8, %r15 + # A[2] * B[4] + mulxq 32(%rax), %rcx, %r8 + adcxq %rcx, %r15 + adoxq %r8, %r9 + # A[2] * B[5] + mulxq 40(%rax), %rcx, %r8 + adcxq %rcx, %r9 + adoxq %r8, %r10 + adcxq %rbx, %r10 + movq %r11, 16(%rsp) + movq $0, %r11 + adcxq %rbx, %r11 + adoxq %rbx, %r11 + xorq %rbx, %rbx + movq 24(%rsi), %rdx + # A[3] * B[0] + mulxq (%rax), %rcx, %r8 + adcxq %rcx, %r12 + adoxq %r8, %r13 + # A[3] * B[1] + mulxq 8(%rax), %rcx, %r8 + adcxq %rcx, %r13 + adoxq %r8, %r14 + # A[3] * B[2] + mulxq 16(%rax), %rcx, %r8 + adcxq %rcx, %r14 + adoxq %r8, %r15 + # A[3] * B[3] + mulxq 24(%rax), %rcx, %r8 + adcxq %rcx, %r15 + adoxq %r8, %r9 + # A[3] * B[4] + mulxq 32(%rax), %rcx, %r8 + adcxq %rcx, %r9 + adoxq %r8, %r10 + # A[3] * B[5] + mulxq 40(%rax), %rcx, %r8 + adcxq %rcx, %r10 + adoxq %r8, %r11 + adcxq %rbx, %r11 + movq %r12, 24(%rsp) + movq $0, %r12 + adcxq %rbx, %r12 + adoxq %rbx, %r12 + xorq %rbx, %rbx + movq 32(%rsi), %rdx + # A[4] * B[0] + mulxq (%rax), %rcx, %r8 + adcxq %rcx, %r13 + adoxq %r8, %r14 + # A[4] * B[1] + mulxq 8(%rax), %rcx, %r8 + adcxq %rcx, %r14 + adoxq %r8, %r15 + # A[4] * B[2] + mulxq 16(%rax), %rcx, %r8 + adcxq %rcx, %r15 + adoxq %r8, %r9 + # A[4] * B[3] + mulxq 24(%rax), %rcx, %r8 + adcxq %rcx, %r9 + adoxq %r8, %r10 + # A[4] * B[4] + mulxq 32(%rax), %rcx, %r8 + adcxq %rcx, %r10 + adoxq %r8, %r11 + # A[4] * B[5] + mulxq 40(%rax), %rcx, %r8 + adcxq %rcx, %r11 + adoxq %r8, %r12 + adcxq %rbx, %r12 + movq %r13, 32(%rsp) + movq 40(%rsi), %rdx + # A[5] * B[0] + mulxq (%rax), %rcx, %r8 + adcxq %rcx, %r14 + adoxq %r8, %r15 + # A[5] * B[1] + mulxq 8(%rax), %rcx, %r8 + adcxq %rcx, %r15 + adoxq %r8, %r9 + # A[5] * B[2] + mulxq 16(%rax), %rcx, %r8 + adcxq %rcx, %r9 + adoxq %r8, %r10 + # A[5] * B[3] + mulxq 24(%rax), %rcx, %r8 + adcxq %rcx, %r10 + adoxq %r8, %r11 + # A[5] * B[4] + mulxq 32(%rax), %rcx, %r8 + adcxq %rcx, %r11 + adoxq %r8, %r12 + # A[5] * B[5] + mulxq 40(%rax), %rcx, %r13 + adcxq %rcx, %r12 + adoxq %rbx, %r13 + adcxq %rbx, %r13 + movq %r14, 40(%rdi) + movq %r15, 48(%rdi) + movq %r9, 56(%rdi) + movq %r10, 64(%rdi) + movq %r11, 72(%rdi) + movq %r12, 80(%rdi) + movq %r13, 88(%rdi) + movq (%rsp), %r9 + movq 8(%rsp), %r10 + movq 16(%rsp), %r11 + movq 24(%rsp), %r12 + movq 32(%rsp), %r13 + movq %r9, (%rdi) + movq %r10, 8(%rdi) + movq %r11, 16(%rdi) + movq %r12, 24(%rdi) + movq %r13, 32(%rdi) + addq $40, %rsp + pop %rbx + pop %r15 + pop %r14 + pop %r13 + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_384_mul_avx2_6,.-sp_384_mul_avx2_6 +#endif /* __APPLE__ */ +#ifdef HAVE_INTEL_AVX2 +/* Reduce the number back to 384 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +#ifndef __APPLE__ +.globl sp_384_mont_reduce_order_avx2_6 +.type sp_384_mont_reduce_order_avx2_6,@function +.align 16 +sp_384_mont_reduce_order_avx2_6: +#else +.globl _sp_384_mont_reduce_order_avx2_6 +.p2align 4 +_sp_384_mont_reduce_order_avx2_6: +#endif /* __APPLE__ */ + push %r12 + push %r13 + movq %rdx, %rax + xorq %r13, %r13 + movq (%rdi), %r12 + xorq %r11, %r11 +L_mont_loop_order_avx2_6: + # mu = a[i] * mp + movq %r12, %rdx + movq %r12, %r9 + imulq %rax, %rdx + xorq %r11, %r11 + # a[i+0] += m[0] * mu + mulxq (%rsi), %rcx, %r8 + movq 8(%rdi), %r12 + adcxq %rcx, %r9 + adoxq %r8, %r12 + # a[i+1] += m[1] * mu + mulxq 8(%rsi), %rcx, %r8 + movq 16(%rdi), %r9 + adcxq %rcx, %r12 + adoxq %r8, %r9 + # a[i+2] += m[2] * mu + mulxq 16(%rsi), %rcx, %r8 + movq 24(%rdi), %r10 + adcxq %rcx, %r9 + adoxq %r8, %r10 + movq %r9, 16(%rdi) + # a[i+3] += m[3] * mu + mulxq 24(%rsi), %rcx, %r8 + movq 32(%rdi), %r9 + adcxq %rcx, %r10 + adoxq %r8, %r9 + movq %r10, 24(%rdi) + # a[i+4] += m[4] * mu + mulxq 32(%rsi), %rcx, %r8 + movq 40(%rdi), %r10 + adcxq %rcx, %r9 + adoxq %r8, %r10 + movq %r9, 32(%rdi) + # a[i+5] += m[5] * mu + mulxq 40(%rsi), %rcx, %r8 + movq 48(%rdi), %r9 + adcxq %rcx, %r10 + adoxq %r8, %r9 + movq %r10, 40(%rdi) + adcxq %r13, %r9 + movq %r9, 48(%rdi) + movq %r11, %r13 + adoxq %r11, %r13 + adcxq %r11, %r13 + # mu = a[i] * mp + movq %r12, %rdx + movq %r12, %r9 + imulq %rax, %rdx + xorq %r11, %r11 + # a[i+0] += m[0] * mu + mulxq (%rsi), %rcx, %r8 + movq 16(%rdi), %r12 + adcxq %rcx, %r9 + adoxq %r8, %r12 + # a[i+1] += m[1] * mu + mulxq 8(%rsi), %rcx, %r8 + movq 24(%rdi), %r9 + adcxq %rcx, %r12 + adoxq %r8, %r9 + # a[i+2] += m[2] * mu + mulxq 16(%rsi), %rcx, %r8 + movq 32(%rdi), %r10 + adcxq %rcx, %r9 + adoxq %r8, %r10 + movq %r9, 24(%rdi) + # a[i+3] += m[3] * mu + mulxq 24(%rsi), %rcx, %r8 + movq 40(%rdi), %r9 + adcxq %rcx, %r10 + adoxq %r8, %r9 + movq %r10, 32(%rdi) + # a[i+4] += m[4] * mu + mulxq 32(%rsi), %rcx, %r8 + movq 48(%rdi), %r10 + adcxq %rcx, %r9 + adoxq %r8, %r10 + movq %r9, 40(%rdi) + # a[i+5] += m[5] * mu + mulxq 40(%rsi), %rcx, %r8 + movq 56(%rdi), %r9 + adcxq %rcx, %r10 + adoxq %r8, %r9 + movq %r10, 48(%rdi) + adcxq %r13, %r9 + movq %r9, 56(%rdi) + movq %r11, %r13 + adoxq %r11, %r13 + adcxq %r11, %r13 + # mu = a[i] * mp + movq %r12, %rdx + movq %r12, %r9 + imulq %rax, %rdx + xorq %r11, %r11 + # a[i+0] += m[0] * mu + mulxq (%rsi), %rcx, %r8 + movq 24(%rdi), %r12 + adcxq %rcx, %r9 + adoxq %r8, %r12 + # a[i+1] += m[1] * mu + mulxq 8(%rsi), %rcx, %r8 + movq 32(%rdi), %r9 + adcxq %rcx, %r12 + adoxq %r8, %r9 + # a[i+2] += m[2] * mu + mulxq 16(%rsi), %rcx, %r8 + movq 40(%rdi), %r10 + adcxq %rcx, %r9 + adoxq %r8, %r10 + movq %r9, 32(%rdi) + # a[i+3] += m[3] * mu + mulxq 24(%rsi), %rcx, %r8 + movq 48(%rdi), %r9 + adcxq %rcx, %r10 + adoxq %r8, %r9 + movq %r10, 40(%rdi) + # a[i+4] += m[4] * mu + mulxq 32(%rsi), %rcx, %r8 + movq 56(%rdi), %r10 + adcxq %rcx, %r9 + adoxq %r8, %r10 + movq %r9, 48(%rdi) + # a[i+5] += m[5] * mu + mulxq 40(%rsi), %rcx, %r8 + movq 64(%rdi), %r9 + adcxq %rcx, %r10 + adoxq %r8, %r9 + movq %r10, 56(%rdi) + adcxq %r13, %r9 + movq %r9, 64(%rdi) + movq %r11, %r13 + adoxq %r11, %r13 + adcxq %r11, %r13 + # mu = a[i] * mp + movq %r12, %rdx + movq %r12, %r9 + imulq %rax, %rdx + xorq %r11, %r11 + # a[i+0] += m[0] * mu + mulxq (%rsi), %rcx, %r8 + movq 32(%rdi), %r12 + adcxq %rcx, %r9 + adoxq %r8, %r12 + # a[i+1] += m[1] * mu + mulxq 8(%rsi), %rcx, %r8 + movq 40(%rdi), %r9 + adcxq %rcx, %r12 + adoxq %r8, %r9 + # a[i+2] += m[2] * mu + mulxq 16(%rsi), %rcx, %r8 + movq 48(%rdi), %r10 + adcxq %rcx, %r9 + adoxq %r8, %r10 + movq %r9, 40(%rdi) + # a[i+3] += m[3] * mu + mulxq 24(%rsi), %rcx, %r8 + movq 56(%rdi), %r9 + adcxq %rcx, %r10 + adoxq %r8, %r9 + movq %r10, 48(%rdi) + # a[i+4] += m[4] * mu + mulxq 32(%rsi), %rcx, %r8 + movq 64(%rdi), %r10 + adcxq %rcx, %r9 + adoxq %r8, %r10 + movq %r9, 56(%rdi) + # a[i+5] += m[5] * mu + mulxq 40(%rsi), %rcx, %r8 + movq 72(%rdi), %r9 + adcxq %rcx, %r10 + adoxq %r8, %r9 + movq %r10, 64(%rdi) + adcxq %r13, %r9 + movq %r9, 72(%rdi) + movq %r11, %r13 + adoxq %r11, %r13 + adcxq %r11, %r13 + # mu = a[i] * mp + movq %r12, %rdx + movq %r12, %r9 + imulq %rax, %rdx + xorq %r11, %r11 + # a[i+0] += m[0] * mu + mulxq (%rsi), %rcx, %r8 + movq 40(%rdi), %r12 + adcxq %rcx, %r9 + adoxq %r8, %r12 + # a[i+1] += m[1] * mu + mulxq 8(%rsi), %rcx, %r8 + movq 48(%rdi), %r9 + adcxq %rcx, %r12 + adoxq %r8, %r9 + # a[i+2] += m[2] * mu + mulxq 16(%rsi), %rcx, %r8 + movq 56(%rdi), %r10 + adcxq %rcx, %r9 + adoxq %r8, %r10 + movq %r9, 48(%rdi) + # a[i+3] += m[3] * mu + mulxq 24(%rsi), %rcx, %r8 + movq 64(%rdi), %r9 + adcxq %rcx, %r10 + adoxq %r8, %r9 + movq %r10, 56(%rdi) + # a[i+4] += m[4] * mu + mulxq 32(%rsi), %rcx, %r8 + movq 72(%rdi), %r10 + adcxq %rcx, %r9 + adoxq %r8, %r10 + movq %r9, 64(%rdi) + # a[i+5] += m[5] * mu + mulxq 40(%rsi), %rcx, %r8 + movq 80(%rdi), %r9 + adcxq %rcx, %r10 + adoxq %r8, %r9 + movq %r10, 72(%rdi) + adcxq %r13, %r9 + movq %r9, 80(%rdi) + movq %r11, %r13 + adoxq %r11, %r13 + adcxq %r11, %r13 + # mu = a[i] * mp + movq %r12, %rdx + movq %r12, %r9 + imulq %rax, %rdx + xorq %r11, %r11 + # a[i+0] += m[0] * mu + mulxq (%rsi), %rcx, %r8 + movq 48(%rdi), %r12 + adcxq %rcx, %r9 + adoxq %r8, %r12 + # a[i+1] += m[1] * mu + mulxq 8(%rsi), %rcx, %r8 + movq 56(%rdi), %r9 + adcxq %rcx, %r12 + adoxq %r8, %r9 + # a[i+2] += m[2] * mu + mulxq 16(%rsi), %rcx, %r8 + movq 64(%rdi), %r10 + adcxq %rcx, %r9 + adoxq %r8, %r10 + movq %r9, 56(%rdi) + # a[i+3] += m[3] * mu + mulxq 24(%rsi), %rcx, %r8 + movq 72(%rdi), %r9 + adcxq %rcx, %r10 + adoxq %r8, %r9 + movq %r10, 64(%rdi) + # a[i+4] += m[4] * mu + mulxq 32(%rsi), %rcx, %r8 + movq 80(%rdi), %r10 + adcxq %rcx, %r9 + adoxq %r8, %r10 + movq %r9, 72(%rdi) + # a[i+5] += m[5] * mu + mulxq 40(%rsi), %rcx, %r8 + movq 88(%rdi), %r9 + adcxq %rcx, %r10 + adoxq %r8, %r9 + movq %r10, 80(%rdi) + adcxq %r13, %r9 + movq %r9, 88(%rdi) + movq %r11, %r13 + adoxq %r11, %r13 + adcxq %r11, %r13 + negq %r13 + movq %rdi, %rax + addq $48, %rdi + movq (%rsi), %r8 + movq %r12, %rdx + pextq %r13, %r8, %r8 + subq %r8, %rdx + movq 8(%rsi), %r8 + movq 8(%rdi), %rcx + pextq %r13, %r8, %r8 + movq %rdx, (%rax) + sbbq %r8, %rcx + movq 16(%rsi), %rdx + movq 16(%rdi), %r8 + pextq %r13, %rdx, %rdx + movq %rcx, 8(%rax) + sbbq %rdx, %r8 + movq 24(%rsi), %rcx + movq 24(%rdi), %rdx + pextq %r13, %rcx, %rcx + movq %r8, 16(%rax) + sbbq %rcx, %rdx + movq 32(%rsi), %r8 + movq 32(%rdi), %rcx + pextq %r13, %r8, %r8 + movq %rdx, 24(%rax) + sbbq %r8, %rcx + movq 40(%rsi), %rdx + movq 40(%rdi), %r8 + pextq %r13, %rdx, %rdx + movq %rcx, 32(%rax) + sbbq %rdx, %r8 + movq %r8, 40(%rax) + pop %r13 + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_384_mont_reduce_order_avx2_6,.-sp_384_mont_reduce_order_avx2_6 +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX2 */ +/* Square a and put result in r. (r = a * a) + * + * r Result of squaring. + * a Number to square in Montogmery form. + */ +#ifndef __APPLE__ +.globl sp_384_sqr_avx2_6 +.type sp_384_sqr_avx2_6,@function +.align 16 +sp_384_sqr_avx2_6: +#else +.globl _sp_384_sqr_avx2_6 +.p2align 4 +_sp_384_sqr_avx2_6: +#endif /* __APPLE__ */ + push %r12 + push %r13 + push %r14 + push %r15 + push %rbx + push %rbp + push %rdi + xorq %rdi, %rdi + movq (%rsi), %rdx + movq 8(%rsi), %r15 + movq 16(%rsi), %rbx + movq 24(%rsi), %rbp + # Diagonal 0 + # A[1] * A[0] + mulxq 8(%rsi), %r8, %r9 + # A[2] * A[0] + mulxq 16(%rsi), %rax, %r10 + adcxq %rax, %r9 + # A[3] * A[0] + mulxq 24(%rsi), %rax, %r11 + adcxq %rax, %r10 + # A[4] * A[0] + mulxq 32(%rsi), %rax, %r12 + adcxq %rax, %r11 + # A[5] * A[0] + mulxq 40(%rsi), %rax, %r13 + adcxq %rax, %r12 + adcxq %rdi, %r13 + # Diagonal 1 + movq %r15, %rdx + # A[2] * A[1] + mulxq 16(%rsi), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r11 + # A[3] * A[1] + mulxq 24(%rsi), %rax, %rcx + adcxq %rax, %r11 + adoxq %rcx, %r12 + # A[4] * A[1] + mulxq 32(%rsi), %rax, %rcx + adcxq %rax, %r12 + adoxq %rcx, %r13 + # A[5] * A[1] + mulxq 40(%rsi), %rax, %r14 + adcxq %rax, %r13 + adoxq %rdi, %r14 + movq %rbx, %rdx + # A[5] * A[2] + mulxq 40(%rsi), %rax, %r15 + adcxq %rax, %r14 + adoxq %rdi, %r15 + adcxq %rdi, %r15 + adcxq %rdi, %rbx + # Diagonal 2 + # A[3] * A[2] + mulxq 24(%rsi), %rax, %rcx + adcxq %rax, %r12 + adoxq %rcx, %r13 + # A[4] * A[2] + mulxq 32(%rsi), %rax, %rcx + adcxq %rax, %r13 + adoxq %rcx, %r14 + movq %rbp, %rdx + # A[4] * A[3] + mulxq 32(%rsi), %rax, %rcx + adcxq %rax, %r14 + adoxq %rcx, %r15 + # A[5] * A[3] + mulxq 40(%rsi), %rax, %rbx + adcxq %rax, %r15 + adoxq %rdi, %rbx + movq 32(%rsi), %rdx + # A[5] * A[4] + mulxq 40(%rsi), %rax, %rbp + adcxq %rax, %rbx + adoxq %rdi, %rbp + adcxq %rdi, %rbp + adcxq %rdi, %rdi + # Doubling previous result as we add in square words results + # A[0] * A[0] + movq (%rsi), %rdx + mulxq %rdx, %rax, %rcx + pop %rdx + movq %rax, (%rdx) + adoxq %r8, %r8 + push %rdx + adcxq %rcx, %r8 + # A[1] * A[1] + movq 8(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adoxq %r9, %r9 + adcxq %rax, %r9 + adoxq %r10, %r10 + adcxq %rcx, %r10 + # A[2] * A[2] + movq 16(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adoxq %r11, %r11 + adcxq %rax, %r11 + adoxq %r12, %r12 + adcxq %rcx, %r12 + # A[3] * A[3] + movq 24(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adoxq %r13, %r13 + adcxq %rax, %r13 + adoxq %r14, %r14 + adcxq %rcx, %r14 + # A[4] * A[4] + movq 32(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adoxq %r15, %r15 + adcxq %rax, %r15 + adoxq %rbx, %rbx + adcxq %rcx, %rbx + # A[5] * A[5] + movq 40(%rsi), %rdx + mulxq %rdx, %rax, %rcx + adoxq %rbp, %rbp + adcxq %rax, %rbp + adcxq %rdi, %rcx + movq $0, %rax + adoxq %rax, %rcx + pop %rdi + movq %r8, 8(%rdi) + movq %r9, 16(%rdi) + movq %r10, 24(%rdi) + movq %r11, 32(%rdi) + movq %r12, 40(%rdi) + movq %r13, 48(%rdi) + movq %r14, 56(%rdi) + movq %r15, 64(%rdi) + movq %rbx, 72(%rdi) + movq %rbp, 80(%rdi) + movq %rcx, 88(%rdi) + pop %rbp + pop %rbx + pop %r15 + pop %r14 + pop %r13 + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_384_sqr_avx2_6,.-sp_384_sqr_avx2_6 +#endif /* __APPLE__ */ +/* Add 1 to a. (a = a + 1) + * + * a A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_384_add_one_6 +.type sp_384_add_one_6,@function +.align 16 +sp_384_add_one_6: +#else +.globl _sp_384_add_one_6 +.p2align 4 +_sp_384_add_one_6: +#endif /* __APPLE__ */ + addq $1, (%rdi) + adcq $0, 8(%rdi) + adcq $0, 16(%rdi) + adcq $0, 24(%rdi) + adcq $0, 32(%rdi) + adcq $0, 40(%rdi) + repz retq +#ifndef __APPLE__ +.size sp_384_add_one_6,.-sp_384_add_one_6 +#endif /* __APPLE__ */ +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +#ifndef __APPLE__ +.globl sp_384_from_bin +.type sp_384_from_bin,@function +.align 16 +sp_384_from_bin: +#else +.globl _sp_384_from_bin +.p2align 4 +_sp_384_from_bin: +#endif /* __APPLE__ */ + movq %rdx, %r9 + movq %rdi, %r10 + addq %rcx, %r9 + addq $48, %r10 + xorq %r11, %r11 + jmp L_384_from_bin_64_end +L_384_from_bin_64_start: + subq $64, %r9 + movbeq 56(%r9), %rax + movbeq 48(%r9), %r8 + movq %rax, (%rdi) + movq %r8, 8(%rdi) + movbeq 40(%r9), %rax + movbeq 32(%r9), %r8 + movq %rax, 16(%rdi) + movq %r8, 24(%rdi) + movbeq 24(%r9), %rax + movbeq 16(%r9), %r8 + movq %rax, 32(%rdi) + movq %r8, 40(%rdi) + movbeq 8(%r9), %rax + movbeq (%r9), %r8 + movq %rax, 48(%rdi) + movq %r8, 56(%rdi) + addq $64, %rdi + subq $64, %rcx +L_384_from_bin_64_end: + cmpq $63, %rcx + jg L_384_from_bin_64_start + jmp L_384_from_bin_8_end +L_384_from_bin_8_start: + subq $8, %r9 + movbeq (%r9), %rax + movq %rax, (%rdi) + addq $8, %rdi + subq $8, %rcx +L_384_from_bin_8_end: + cmpq $7, %rcx + jg L_384_from_bin_8_start + cmpq %r11, %rcx + je L_384_from_bin_hi_end + movq %r11, %r8 + movq %r11, %rax +L_384_from_bin_hi_start: + movb (%rdx), %al + shlq $8, %r8 + incq %rdx + addq %rax, %r8 + decq %rcx + jg L_384_from_bin_hi_start + movq %r8, (%rdi) + addq $8, %rdi +L_384_from_bin_hi_end: + cmpq %r10, %rdi + je L_384_from_bin_zero_end +L_384_from_bin_zero_start: + movq %r11, (%rdi) + addq $8, %rdi + cmpq %r10, %rdi + jl L_384_from_bin_zero_start +L_384_from_bin_zero_end: + repz retq +#ifndef __APPLE__ +.size sp_384_from_bin,.-sp_384_from_bin +#endif /* __APPLE__ */ +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 48 + * + * r A single precision integer. + * a Byte array. + */ +#ifndef __APPLE__ +.globl sp_384_to_bin +.type sp_384_to_bin,@function +.align 16 +sp_384_to_bin: +#else +.globl _sp_384_to_bin +.p2align 4 +_sp_384_to_bin: +#endif /* __APPLE__ */ + movbeq 40(%rdi), %rdx + movbeq 32(%rdi), %rax + movq %rdx, (%rsi) + movq %rax, 8(%rsi) + movbeq 24(%rdi), %rdx + movbeq 16(%rdi), %rax + movq %rdx, 16(%rsi) + movq %rax, 24(%rsi) + movbeq 8(%rdi), %rdx + movbeq (%rdi), %rax + movq %rdx, 32(%rsi) + movq %rax, 40(%rsi) + repz retq +#ifndef __APPLE__ +.size sp_384_to_bin,.-sp_384_to_bin +#endif /* __APPLE__ */ +/* Sub b from a into a. (a -= b) + * + * a A single precision integer and result. + * b A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_384_sub_in_place_6 +.type sp_384_sub_in_place_6,@function +.align 16 +sp_384_sub_in_place_6: +#else +.globl _sp_384_sub_in_place_6 +.p2align 4 +_sp_384_sub_in_place_6: +#endif /* __APPLE__ */ + xorq %rax, %rax + movq (%rsi), %rdx + movq 8(%rsi), %rcx + movq 16(%rsi), %r8 + movq 24(%rsi), %r9 + movq 32(%rsi), %r10 + movq 40(%rsi), %r11 + subq %rdx, (%rdi) + sbbq %rcx, 8(%rdi) + sbbq %r8, 16(%rdi) + sbbq %r9, 24(%rdi) + sbbq %r10, 32(%rdi) + sbbq %r11, 40(%rdi) + sbbq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_384_sub_in_place_6,.-sp_384_sub_in_place_6 +#endif /* __APPLE__ */ +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +#ifndef __APPLE__ +.globl sp_384_cond_sub_avx2_6 +.type sp_384_cond_sub_avx2_6,@function +.align 16 +sp_384_cond_sub_avx2_6: +#else +.globl _sp_384_cond_sub_avx2_6 +.p2align 4 +_sp_384_cond_sub_avx2_6: +#endif /* __APPLE__ */ + movq $0, %rax + movq (%rdx), %r10 + movq (%rsi), %r8 + pextq %rcx, %r10, %r10 + subq %r10, %r8 + movq 8(%rdx), %r10 + movq 8(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, (%rdi) + sbbq %r10, %r9 + movq 16(%rdx), %r8 + movq 16(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 8(%rdi) + sbbq %r8, %r10 + movq 24(%rdx), %r9 + movq 24(%rsi), %r8 + pextq %rcx, %r9, %r9 + movq %r10, 16(%rdi) + sbbq %r9, %r8 + movq 32(%rdx), %r10 + movq 32(%rsi), %r9 + pextq %rcx, %r10, %r10 + movq %r8, 24(%rdi) + sbbq %r10, %r9 + movq 40(%rdx), %r8 + movq 40(%rsi), %r10 + pextq %rcx, %r8, %r8 + movq %r9, 32(%rdi) + sbbq %r8, %r10 + movq %r10, 40(%rdi) + sbbq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_384_cond_sub_avx2_6,.-sp_384_cond_sub_avx2_6 +#endif /* __APPLE__ */ +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +#ifndef __APPLE__ +.globl sp_384_mul_d_6 +.type sp_384_mul_d_6,@function +.align 16 +sp_384_mul_d_6: +#else +.globl _sp_384_mul_d_6 +.p2align 4 +_sp_384_mul_d_6: +#endif /* __APPLE__ */ + movq %rdx, %rcx + # A[0] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq (%rsi) + movq %rax, %r8 + movq %rdx, %r9 + movq %r8, (%rdi) + # A[1] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 8(%rsi) + addq %rax, %r9 + movq %r9, 8(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[2] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 16(%rsi) + addq %rax, %r10 + movq %r10, 16(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[3] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 24(%rsi) + addq %rax, %r8 + movq %r8, 24(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[4] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 32(%rsi) + addq %rax, %r9 + movq %r9, 32(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[5] * B + movq %rcx, %rax + mulq 40(%rsi) + addq %rax, %r10 + adcq %rdx, %r8 + movq %r10, 40(%rdi) + movq %r8, 48(%rdi) + repz retq +#ifndef __APPLE__ +.size sp_384_mul_d_6,.-sp_384_mul_d_6 +#endif /* __APPLE__ */ +#ifdef HAVE_INTEL_AVX2 +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +#ifndef __APPLE__ +.globl sp_384_mul_d_avx2_6 +.type sp_384_mul_d_avx2_6,@function +.align 16 +sp_384_mul_d_avx2_6: +#else +.globl _sp_384_mul_d_avx2_6 +.p2align 4 +_sp_384_mul_d_avx2_6: +#endif /* __APPLE__ */ + movq %rdx, %rax + # A[0] * B + movq %rax, %rdx + xorq %r11, %r11 + mulxq (%rsi), %r9, %r10 + movq %r9, (%rdi) + # A[1] * B + mulxq 8(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 8(%rdi) + adoxq %r8, %r9 + # A[2] * B + mulxq 16(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 16(%rdi) + adoxq %r8, %r10 + # A[3] * B + mulxq 24(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 24(%rdi) + adoxq %r8, %r9 + # A[4] * B + mulxq 32(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 32(%rdi) + adoxq %r8, %r10 + # A[5] * B + mulxq 40(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + adoxq %r8, %r9 + adcxq %r11, %r9 + movq %r10, 40(%rdi) + movq %r9, 48(%rdi) + repz retq +#ifndef __APPLE__ +.size sp_384_mul_d_avx2_6,.-sp_384_mul_d_avx2_6 +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX2 */ +#endif /* WOLFSSL_SP_384 */ diff --git a/client/wolfssl/wolfcrypt/src/srp.c b/client/wolfssl/wolfcrypt/src/srp.c new file mode 100644 index 0000000..cf5eff1 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/srp.c @@ -0,0 +1,756 @@ +/* srp.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef WOLFCRYPT_HAVE_SRP + +#include +#include +#include + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +/** Computes the session key using the Mask Generation Function 1. */ +static int wc_SrpSetKey(Srp* srp, byte* secret, word32 size); + +static int SrpHashInit(SrpHash* hash, SrpType type) +{ + hash->type = type; + + switch (type) { + case SRP_TYPE_SHA: + #ifndef NO_SHA + return wc_InitSha(&hash->data.sha); + #else + return BAD_FUNC_ARG; + #endif + + case SRP_TYPE_SHA256: + #ifndef NO_SHA256 + return wc_InitSha256(&hash->data.sha256); + #else + return BAD_FUNC_ARG; + #endif + + case SRP_TYPE_SHA384: + #ifdef WOLFSSL_SHA384 + return wc_InitSha384(&hash->data.sha384); + #else + return BAD_FUNC_ARG; + #endif + + case SRP_TYPE_SHA512: + #ifdef WOLFSSL_SHA512 + return wc_InitSha512(&hash->data.sha512); + #else + return BAD_FUNC_ARG; + #endif + + default: + return BAD_FUNC_ARG; + } +} + +static int SrpHashUpdate(SrpHash* hash, const byte* data, word32 size) +{ + switch (hash->type) { + case SRP_TYPE_SHA: + #ifndef NO_SHA + return wc_ShaUpdate(&hash->data.sha, data, size); + #else + return BAD_FUNC_ARG; + #endif + + case SRP_TYPE_SHA256: + #ifndef NO_SHA256 + return wc_Sha256Update(&hash->data.sha256, data, size); + #else + return BAD_FUNC_ARG; + #endif + + case SRP_TYPE_SHA384: + #ifdef WOLFSSL_SHA384 + return wc_Sha384Update(&hash->data.sha384, data, size); + #else + return BAD_FUNC_ARG; + #endif + + case SRP_TYPE_SHA512: + #ifdef WOLFSSL_SHA512 + return wc_Sha512Update(&hash->data.sha512, data, size); + #else + return BAD_FUNC_ARG; + #endif + + default: + return BAD_FUNC_ARG; + } +} + +static int SrpHashFinal(SrpHash* hash, byte* digest) +{ + switch (hash->type) { + case SRP_TYPE_SHA: + #ifndef NO_SHA + return wc_ShaFinal(&hash->data.sha, digest); + #else + return BAD_FUNC_ARG; + #endif + + case SRP_TYPE_SHA256: + #ifndef NO_SHA256 + return wc_Sha256Final(&hash->data.sha256, digest); + #else + return BAD_FUNC_ARG; + #endif + + case SRP_TYPE_SHA384: + #ifdef WOLFSSL_SHA384 + return wc_Sha384Final(&hash->data.sha384, digest); + #else + return BAD_FUNC_ARG; + #endif + + case SRP_TYPE_SHA512: + #ifdef WOLFSSL_SHA512 + return wc_Sha512Final(&hash->data.sha512, digest); + #else + return BAD_FUNC_ARG; + #endif + + default: + return BAD_FUNC_ARG; + } +} + +static word32 SrpHashSize(SrpType type) +{ + switch (type) { + case SRP_TYPE_SHA: + #ifndef NO_SHA + return WC_SHA_DIGEST_SIZE; + #else + return 0; + #endif + + case SRP_TYPE_SHA256: + #ifndef NO_SHA256 + return WC_SHA256_DIGEST_SIZE; + #else + return 0; + #endif + + case SRP_TYPE_SHA384: + #ifdef WOLFSSL_SHA384 + return WC_SHA384_DIGEST_SIZE; + #else + return 0; + #endif + + case SRP_TYPE_SHA512: + #ifdef WOLFSSL_SHA512 + return WC_SHA512_DIGEST_SIZE; + #else + return 0; + #endif + + default: + return 0; + } +} + +int wc_SrpInit(Srp* srp, SrpType type, SrpSide side) +{ + int r; + + /* validating params */ + + if (!srp) + return BAD_FUNC_ARG; + + if (side != SRP_CLIENT_SIDE && side != SRP_SERVER_SIDE) + return BAD_FUNC_ARG; + + switch (type) { + case SRP_TYPE_SHA: + #ifdef NO_SHA + return NOT_COMPILED_IN; + #else + break; /* OK */ + #endif + + case SRP_TYPE_SHA256: + #ifdef NO_SHA256 + return NOT_COMPILED_IN; + #else + break; /* OK */ + #endif + + case SRP_TYPE_SHA384: + #ifndef WOLFSSL_SHA384 + return NOT_COMPILED_IN; + #else + break; /* OK */ + #endif + + case SRP_TYPE_SHA512: + #ifndef WOLFSSL_SHA512 + return NOT_COMPILED_IN; + #else + break; /* OK */ + #endif + + default: + return BAD_FUNC_ARG; + } + + /* initializing variables */ + + XMEMSET(srp, 0, sizeof(Srp)); + + if ((r = SrpHashInit(&srp->client_proof, type)) != 0) + return r; + + if ((r = SrpHashInit(&srp->server_proof, type)) != 0) + return r; + + if ((r = mp_init_multi(&srp->N, &srp->g, &srp->auth, + &srp->priv, 0, 0)) != 0) + return r; + + srp->side = side; srp->type = type; + srp->salt = NULL; srp->saltSz = 0; + srp->user = NULL; srp->userSz = 0; + srp->key = NULL; srp->keySz = 0; + + srp->keyGenFunc_cb = wc_SrpSetKey; + + /* default heap hint to NULL or test value */ +#ifdef WOLFSSL_HEAP_TEST + srp->heap = (void*)WOLFSSL_HEAP_TEST; +#else + srp->heap = NULL; +#endif + + return 0; +} + +void wc_SrpTerm(Srp* srp) +{ + if (srp) { + mp_clear(&srp->N); mp_clear(&srp->g); + mp_clear(&srp->auth); mp_clear(&srp->priv); + if (srp->salt) { + ForceZero(srp->salt, srp->saltSz); + XFREE(srp->salt, srp->heap, DYNAMIC_TYPE_SRP); + } + if (srp->user) { + ForceZero(srp->user, srp->userSz); + XFREE(srp->user, srp->heap, DYNAMIC_TYPE_SRP); + } + if (srp->key) { + ForceZero(srp->key, srp->keySz); + XFREE(srp->key, srp->heap, DYNAMIC_TYPE_SRP); + } + + ForceZero(srp, sizeof(Srp)); + } +} + +int wc_SrpSetUsername(Srp* srp, const byte* username, word32 size) +{ + if (!srp || !username) + return BAD_FUNC_ARG; + + srp->user = (byte*)XMALLOC(size, srp->heap, DYNAMIC_TYPE_SRP); + if (srp->user == NULL) + return MEMORY_E; + + srp->userSz = size; + XMEMCPY(srp->user, username, srp->userSz); + + return 0; +} + +int wc_SrpSetParams(Srp* srp, const byte* N, word32 nSz, + const byte* g, word32 gSz, + const byte* salt, word32 saltSz) +{ + SrpHash hash; + byte digest1[SRP_MAX_DIGEST_SIZE]; + byte digest2[SRP_MAX_DIGEST_SIZE]; + byte pad = 0; + int i, r; + int j = 0; + + if (!srp || !N || !g || !salt || nSz < gSz) + return BAD_FUNC_ARG; + + if (!srp->user) + return SRP_CALL_ORDER_E; + + /* Set N */ + if (mp_read_unsigned_bin(&srp->N, N, nSz) != MP_OKAY) + return MP_READ_E; + + if (mp_count_bits(&srp->N) < SRP_MODULUS_MIN_BITS) + return BAD_FUNC_ARG; + + /* Set g */ + if (mp_read_unsigned_bin(&srp->g, g, gSz) != MP_OKAY) + return MP_READ_E; + + if (mp_cmp(&srp->N, &srp->g) != MP_GT) + return BAD_FUNC_ARG; + + /* Set salt */ + if (srp->salt) { + ForceZero(srp->salt, srp->saltSz); + XFREE(srp->salt, srp->heap, DYNAMIC_TYPE_SRP); + } + + srp->salt = (byte*)XMALLOC(saltSz, srp->heap, DYNAMIC_TYPE_SRP); + if (srp->salt == NULL) + return MEMORY_E; + + XMEMCPY(srp->salt, salt, saltSz); + srp->saltSz = saltSz; + + /* Set k = H(N, g) */ + r = SrpHashInit(&hash, srp->type); + if (!r) r = SrpHashUpdate(&hash, (byte*) N, nSz); + for (i = 0; (word32)i < nSz - gSz; i++) { + if (!r) r = SrpHashUpdate(&hash, &pad, 1); + } + if (!r) r = SrpHashUpdate(&hash, (byte*) g, gSz); + if (!r) r = SrpHashFinal(&hash, srp->k); + + /* update client proof */ + + /* digest1 = H(N) */ + if (!r) r = SrpHashInit(&hash, srp->type); + if (!r) r = SrpHashUpdate(&hash, (byte*) N, nSz); + if (!r) r = SrpHashFinal(&hash, digest1); + + /* digest2 = H(g) */ + if (!r) r = SrpHashInit(&hash, srp->type); + if (!r) r = SrpHashUpdate(&hash, (byte*) g, gSz); + if (!r) r = SrpHashFinal(&hash, digest2); + + /* digest1 = H(N) ^ H(g) */ + if (r == 0) { + for (i = 0, j = SrpHashSize(srp->type); i < j; i++) + digest1[i] ^= digest2[i]; + } + + /* digest2 = H(user) */ + if (!r) r = SrpHashInit(&hash, srp->type); + if (!r) r = SrpHashUpdate(&hash, srp->user, srp->userSz); + if (!r) r = SrpHashFinal(&hash, digest2); + + /* client proof = H( H(N) ^ H(g) | H(user) | salt) */ + if (!r) r = SrpHashUpdate(&srp->client_proof, digest1, j); + if (!r) r = SrpHashUpdate(&srp->client_proof, digest2, j); + if (!r) r = SrpHashUpdate(&srp->client_proof, salt, saltSz); + + return r; +} + +int wc_SrpSetPassword(Srp* srp, const byte* password, word32 size) +{ + SrpHash hash; + byte digest[SRP_MAX_DIGEST_SIZE]; + word32 digestSz; + int r; + + if (!srp || !password || srp->side != SRP_CLIENT_SIDE) + return BAD_FUNC_ARG; + + if (!srp->salt) + return SRP_CALL_ORDER_E; + + digestSz = SrpHashSize(srp->type); + + /* digest = H(username | ':' | password) */ + r = SrpHashInit(&hash, srp->type); + if (!r) r = SrpHashUpdate(&hash, srp->user, srp->userSz); + if (!r) r = SrpHashUpdate(&hash, (const byte*) ":", 1); + if (!r) r = SrpHashUpdate(&hash, password, size); + if (!r) r = SrpHashFinal(&hash, digest); + + /* digest = H(salt | H(username | ':' | password)) */ + if (!r) r = SrpHashInit(&hash, srp->type); + if (!r) r = SrpHashUpdate(&hash, srp->salt, srp->saltSz); + if (!r) r = SrpHashUpdate(&hash, digest, digestSz); + if (!r) r = SrpHashFinal(&hash, digest); + + /* Set x (private key) */ + if (!r) r = mp_read_unsigned_bin(&srp->auth, digest, digestSz); + + ForceZero(digest, SRP_MAX_DIGEST_SIZE); + + return r; +} + +int wc_SrpGetVerifier(Srp* srp, byte* verifier, word32* size) +{ + mp_int v; + int r; + + if (!srp || !verifier || !size || srp->side != SRP_CLIENT_SIDE) + return BAD_FUNC_ARG; + + if (mp_iszero(&srp->auth) == MP_YES) + return SRP_CALL_ORDER_E; + + r = mp_init(&v); + if (r != MP_OKAY) + return MP_INIT_E; + + /* v = g ^ x % N */ + if (!r) r = mp_exptmod(&srp->g, &srp->auth, &srp->N, &v); + if (!r) r = *size < (word32)mp_unsigned_bin_size(&v) ? BUFFER_E : MP_OKAY; + if (!r) r = mp_to_unsigned_bin(&v, verifier); + if (!r) *size = mp_unsigned_bin_size(&v); + + mp_clear(&v); + + return r; +} + +int wc_SrpSetVerifier(Srp* srp, const byte* verifier, word32 size) +{ + if (!srp || !verifier || srp->side != SRP_SERVER_SIDE) + return BAD_FUNC_ARG; + + return mp_read_unsigned_bin(&srp->auth, verifier, size); +} + +int wc_SrpSetPrivate(Srp* srp, const byte* priv, word32 size) +{ + mp_int p; + int r; + + if (!srp || !priv || !size) + return BAD_FUNC_ARG; + + if (mp_iszero(&srp->auth) == MP_YES) + return SRP_CALL_ORDER_E; + + r = mp_init(&p); + if (r != MP_OKAY) + return MP_INIT_E; + if (!r) r = mp_read_unsigned_bin(&p, priv, size); + if (!r) r = mp_mod(&p, &srp->N, &srp->priv); + if (!r) r = mp_iszero(&srp->priv) == MP_YES ? SRP_BAD_KEY_E : 0; + + mp_clear(&p); + + return r; +} + +/** Generates random data using wolfcrypt RNG. */ +static int wc_SrpGenPrivate(Srp* srp, byte* priv, word32 size) +{ + WC_RNG rng; + int r = wc_InitRng(&rng); + + if (!r) r = wc_RNG_GenerateBlock(&rng, priv, size); + if (!r) r = wc_SrpSetPrivate(srp, priv, size); + if (!r) wc_FreeRng(&rng); + + return r; +} + +int wc_SrpGetPublic(Srp* srp, byte* pub, word32* size) +{ + mp_int pubkey; + word32 modulusSz; + int r; + + if (!srp || !pub || !size) + return BAD_FUNC_ARG; + + if (mp_iszero(&srp->auth) == MP_YES) + return SRP_CALL_ORDER_E; + + modulusSz = mp_unsigned_bin_size(&srp->N); + if (*size < modulusSz) + return BUFFER_E; + + r = mp_init(&pubkey); + if (r != MP_OKAY) + return MP_INIT_E; + + /* priv = random() */ + if (mp_iszero(&srp->priv) == MP_YES) + r = wc_SrpGenPrivate(srp, pub, SRP_PRIVATE_KEY_MIN_BITS / 8); + + /* client side: A = g ^ a % N */ + if (srp->side == SRP_CLIENT_SIDE) { + if (!r) r = mp_exptmod(&srp->g, &srp->priv, &srp->N, &pubkey); + + /* server side: B = (k * v + (g ^ b % N)) % N */ + } else { + mp_int i, j; + + if (mp_init_multi(&i, &j, 0, 0, 0, 0) == MP_OKAY) { + if (!r) r = mp_read_unsigned_bin(&i, srp->k,SrpHashSize(srp->type)); + if (!r) r = mp_iszero(&i) == MP_YES ? SRP_BAD_KEY_E : 0; + if (!r) r = mp_exptmod(&srp->g, &srp->priv, &srp->N, &pubkey); + if (!r) r = mp_mulmod(&i, &srp->auth, &srp->N, &j); + if (!r) r = mp_add(&j, &pubkey, &i); + if (!r) r = mp_mod(&i, &srp->N, &pubkey); + + mp_clear(&i); mp_clear(&j); + } + } + + /* extract public key to buffer */ + XMEMSET(pub, 0, modulusSz); + if (!r) r = mp_to_unsigned_bin(&pubkey, pub); + if (!r) *size = mp_unsigned_bin_size(&pubkey); + mp_clear(&pubkey); + + return r; +} + +static int wc_SrpSetKey(Srp* srp, byte* secret, word32 size) +{ + SrpHash hash; + byte digest[SRP_MAX_DIGEST_SIZE]; + word32 i, j, digestSz = SrpHashSize(srp->type); + byte counter[4]; + int r = BAD_FUNC_ARG; + + XMEMSET(digest, 0, SRP_MAX_DIGEST_SIZE); + + srp->key = (byte*)XMALLOC(2 * digestSz, srp->heap, DYNAMIC_TYPE_SRP); + if (srp->key == NULL) + return MEMORY_E; + + srp->keySz = 2 * digestSz; + + for (i = j = 0; j < srp->keySz; i++) { + counter[0] = (i >> 24) & 0xFF; + counter[1] = (i >> 16) & 0xFF; + counter[2] = (i >> 8) & 0xFF; + counter[3] = i & 0xFF; + + r = SrpHashInit(&hash, srp->type); + if (!r) r = SrpHashUpdate(&hash, secret, size); + if (!r) r = SrpHashUpdate(&hash, counter, 4); + + if (j + digestSz > srp->keySz) { + if (!r) r = SrpHashFinal(&hash, digest); + XMEMCPY(srp->key + j, digest, srp->keySz - j); + j = srp->keySz; + } + else { + if (!r) r = SrpHashFinal(&hash, srp->key + j); + j += digestSz; + } + } + + ForceZero(digest, sizeof(digest)); + ForceZero(&hash, sizeof(SrpHash)); + + return r; +} + +int wc_SrpComputeKey(Srp* srp, byte* clientPubKey, word32 clientPubKeySz, + byte* serverPubKey, word32 serverPubKeySz) +{ + SrpHash hash; + byte *secret; + byte digest[SRP_MAX_DIGEST_SIZE]; + word32 i, secretSz, digestSz; + mp_int u, s, temp1, temp2; + byte pad = 0; + int r; + + /* validating params */ + + if (!srp || !clientPubKey || clientPubKeySz == 0 + || !serverPubKey || serverPubKeySz == 0) + return BAD_FUNC_ARG; + + if (mp_iszero(&srp->priv) == MP_YES) + return SRP_CALL_ORDER_E; + + /* initializing variables */ + + if ((r = SrpHashInit(&hash, srp->type)) != 0) + return r; + + digestSz = SrpHashSize(srp->type); + secretSz = mp_unsigned_bin_size(&srp->N); + + if ((secret = (byte*)XMALLOC(secretSz, srp->heap, DYNAMIC_TYPE_SRP)) ==NULL) + return MEMORY_E; + + if ((r = mp_init_multi(&u, &s, &temp1, &temp2, 0, 0)) != MP_OKAY) { + XFREE(secret, srp->heap, DYNAMIC_TYPE_SRP); + return r; + } + + /* building u (random scrambling parameter) */ + + /* H(A) */ + for (i = 0; !r && i < secretSz - clientPubKeySz; i++) + r = SrpHashUpdate(&hash, &pad, 1); + if (!r) r = SrpHashUpdate(&hash, clientPubKey, clientPubKeySz); + + /* H(A | B) */ + for (i = 0; !r && i < secretSz - serverPubKeySz; i++) + r = SrpHashUpdate(&hash, &pad, 1); + if (!r) r = SrpHashUpdate(&hash, serverPubKey, serverPubKeySz); + + /* set u */ + if (!r) r = SrpHashFinal(&hash, digest); + if (!r) r = mp_read_unsigned_bin(&u, digest, SrpHashSize(srp->type)); + + /* building s (secret) */ + + if (!r && srp->side == SRP_CLIENT_SIDE) { + + /* temp1 = B - k * v; rejects k == 0, B == 0 and B >= N. */ + r = mp_read_unsigned_bin(&temp1, srp->k, digestSz); + if (!r) r = mp_iszero(&temp1) == MP_YES ? SRP_BAD_KEY_E : 0; + if (!r) r = mp_exptmod(&srp->g, &srp->auth, &srp->N, &temp2); + if (!r) r = mp_mulmod(&temp1, &temp2, &srp->N, &s); + if (!r) r = mp_read_unsigned_bin(&temp2, serverPubKey, serverPubKeySz); + if (!r) r = mp_iszero(&temp2) == MP_YES ? SRP_BAD_KEY_E : 0; + if (!r) r = mp_cmp(&temp2, &srp->N) != MP_LT ? SRP_BAD_KEY_E : 0; + if (!r) r = mp_sub(&temp2, &s, &temp1); + + /* temp2 = a + u * x */ + if (!r) r = mp_mulmod(&u, &srp->auth, &srp->N, &s); + if (!r) r = mp_add(&srp->priv, &s, &temp2); + + /* secret = temp1 ^ temp2 % N */ + if (!r) r = mp_exptmod(&temp1, &temp2, &srp->N, &s); + + } else if (!r && srp->side == SRP_SERVER_SIDE) { + /* temp1 = v ^ u % N */ + r = mp_exptmod(&srp->auth, &u, &srp->N, &temp1); + + /* temp2 = A * temp1 % N; rejects A == 0, A >= N */ + if (!r) r = mp_read_unsigned_bin(&s, clientPubKey, clientPubKeySz); + if (!r) r = mp_iszero(&s) == MP_YES ? SRP_BAD_KEY_E : 0; + if (!r) r = mp_cmp(&s, &srp->N) != MP_LT ? SRP_BAD_KEY_E : 0; + if (!r) r = mp_mulmod(&s, &temp1, &srp->N, &temp2); + + /* rejects A * v ^ u % N >= 1, A * v ^ u % N == -1 % N */ + if (!r) r = mp_read_unsigned_bin(&temp1, (const byte*)"\001", 1); + if (!r) r = mp_cmp(&temp2, &temp1) != MP_GT ? SRP_BAD_KEY_E : 0; + if (!r) r = mp_sub(&srp->N, &temp1, &s); + if (!r) r = mp_cmp(&temp2, &s) == MP_EQ ? SRP_BAD_KEY_E : 0; + + /* secret = temp2 * b % N */ + if (!r) r = mp_exptmod(&temp2, &srp->priv, &srp->N, &s); + } + + /* building session key from secret */ + + if (!r) r = mp_to_unsigned_bin(&s, secret); + if (!r) r = srp->keyGenFunc_cb(srp, secret, mp_unsigned_bin_size(&s)); + + /* updating client proof = H( H(N) ^ H(g) | H(user) | salt | A | B | K) */ + + if (!r) r = SrpHashUpdate(&srp->client_proof, clientPubKey, clientPubKeySz); + if (!r) r = SrpHashUpdate(&srp->client_proof, serverPubKey, serverPubKeySz); + if (!r) r = SrpHashUpdate(&srp->client_proof, srp->key, srp->keySz); + + /* updating server proof = H(A) */ + + if (!r) r = SrpHashUpdate(&srp->server_proof, clientPubKey, clientPubKeySz); + + XFREE(secret, srp->heap, DYNAMIC_TYPE_SRP); + mp_clear(&u); mp_clear(&s); mp_clear(&temp1); mp_clear(&temp2); + + return r; +} + +int wc_SrpGetProof(Srp* srp, byte* proof, word32* size) +{ + int r; + + if (!srp || !proof || !size) + return BAD_FUNC_ARG; + + if (*size < SrpHashSize(srp->type)) + return BUFFER_E; + + if ((r = SrpHashFinal(srp->side == SRP_CLIENT_SIDE + ? &srp->client_proof + : &srp->server_proof, proof)) != 0) + return r; + + *size = SrpHashSize(srp->type); + + if (srp->side == SRP_CLIENT_SIDE) { + /* server proof = H( A | client proof | K) */ + if (!r) r = SrpHashUpdate(&srp->server_proof, proof, *size); + if (!r) r = SrpHashUpdate(&srp->server_proof, srp->key, srp->keySz); + } + + return r; +} + +int wc_SrpVerifyPeersProof(Srp* srp, byte* proof, word32 size) +{ + byte digest[SRP_MAX_DIGEST_SIZE]; + int r; + + if (!srp || !proof) + return BAD_FUNC_ARG; + + if (size != SrpHashSize(srp->type)) + return BUFFER_E; + + r = SrpHashFinal(srp->side == SRP_CLIENT_SIDE ? &srp->server_proof + : &srp->client_proof, digest); + + if (srp->side == SRP_SERVER_SIDE) { + /* server proof = H( A | client proof | K) */ + if (!r) r = SrpHashUpdate(&srp->server_proof, proof, size); + if (!r) r = SrpHashUpdate(&srp->server_proof, srp->key, srp->keySz); + } + + if (!r && XMEMCMP(proof, digest, size) != 0) + r = SRP_VERIFY_E; + + return r; +} + +#endif /* WOLFCRYPT_HAVE_SRP */ diff --git a/client/wolfssl/wolfcrypt/src/tfm.c b/client/wolfssl/wolfcrypt/src/tfm.c new file mode 100644 index 0000000..61b31f0 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/tfm.c @@ -0,0 +1,5068 @@ +/* tfm.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + + +/* + * Based on public domain TomsFastMath 0.10 by Tom St Denis, tomstdenis@iahu.ca, + * http://math.libtomcrypt.com + */ + +/** + * Edited by Moises Guimaraes (moises@wolfssl.com) + * to fit wolfSSL's needs. + */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +/* in case user set USE_FAST_MATH there */ +#include +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#ifdef USE_FAST_MATH + +#include +#include +#include /* will define asm MACROS or C ones */ +#include /* common functions */ + +#if defined(FREESCALE_LTC_TFM) + #include +#endif +#ifdef WOLFSSL_DEBUG_MATH + #include +#endif + +#ifdef USE_WINDOWS_API + #pragma warning(disable:4127) + /* Disables the warning: + * 4127: conditional expression is constant + * in this file. + */ +#endif + +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) +#ifdef __cplusplus + extern "C" { +#endif +WOLFSSL_LOCAL int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod, + mp_int* res); +WOLFSSL_LOCAL int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, + mp_int* res); +WOLFSSL_LOCAL int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, + mp_int* res); +WOLFSSL_LOCAL int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, + mp_int* res); +WOLFSSL_LOCAL int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod, + mp_int* res); +#ifdef __cplusplus + } /* extern "C" */ +#endif +#endif + + +#ifndef WOLFSSL_SP_MATH +/* math settings check */ +word32 CheckRunTimeSettings(void) +{ + return CTC_SETTINGS; +} +#endif + +/* math settings size check */ +word32 CheckRunTimeFastMath(void) +{ + return FP_SIZE; +} + + +/* Functions */ + +void fp_add(fp_int *a, fp_int *b, fp_int *c) +{ + int sa, sb; + + /* get sign of both inputs */ + sa = a->sign; + sb = b->sign; + + /* handle two cases, not four */ + if (sa == sb) { + /* both positive or both negative */ + /* add their magnitudes, copy the sign */ + c->sign = sa; + s_fp_add (a, b, c); + } else { + /* one positive, the other negative */ + /* subtract the one with the greater magnitude from */ + /* the one of the lesser magnitude. The result gets */ + /* the sign of the one with the greater magnitude. */ + if (fp_cmp_mag (a, b) == FP_LT) { + c->sign = sb; + s_fp_sub (b, a, c); + } else { + c->sign = sa; + s_fp_sub (a, b, c); + } + } +} + +/* unsigned addition */ +void s_fp_add(fp_int *a, fp_int *b, fp_int *c) +{ + int x, y, oldused; + fp_word t; + + y = MAX(a->used, b->used); + oldused = MIN(c->used, FP_SIZE); /* help static analysis w/ largest size */ + c->used = y; + + t = 0; + for (x = 0; x < y; x++) { + t += ((fp_word)a->dp[x]) + ((fp_word)b->dp[x]); + c->dp[x] = (fp_digit)t; + t >>= DIGIT_BIT; + } + if (t != 0 && x < FP_SIZE) { + c->dp[c->used++] = (fp_digit)t; + ++x; + } + + c->used = x; + + /* zero any excess digits on the destination that we didn't write to */ + for (; x < oldused; x++) { + c->dp[x] = 0; + } + fp_clamp(c); +} + +/* c = a - b */ +void fp_sub(fp_int *a, fp_int *b, fp_int *c) +{ + int sa, sb; + + sa = a->sign; + sb = b->sign; + + if (sa != sb) { + /* subtract a negative from a positive, OR */ + /* subtract a positive from a negative. */ + /* In either case, ADD their magnitudes, */ + /* and use the sign of the first number. */ + c->sign = sa; + s_fp_add (a, b, c); + } else { + /* subtract a positive from a positive, OR */ + /* subtract a negative from a negative. */ + /* First, take the difference between their */ + /* magnitudes, then... */ + if (fp_cmp_mag (a, b) != FP_LT) { + /* Copy the sign from the first */ + c->sign = sa; + /* The first has a larger or equal magnitude */ + s_fp_sub (a, b, c); + } else { + /* The result has the *opposite* sign from */ + /* the first number. */ + c->sign = (sa == FP_ZPOS) ? FP_NEG : FP_ZPOS; + /* The second has a larger magnitude */ + s_fp_sub (b, a, c); + } + } +} + +/* unsigned subtraction ||a|| >= ||b|| ALWAYS! */ +void s_fp_sub(fp_int *a, fp_int *b, fp_int *c) +{ + int x, oldbused, oldused; + fp_word t; + + oldused = c->used; + oldbused = b->used; + c->used = a->used; + t = 0; + for (x = 0; x < oldbused; x++) { + t = ((fp_word)a->dp[x]) - (((fp_word)b->dp[x]) + t); + c->dp[x] = (fp_digit)t; + t = (t >> DIGIT_BIT)&1; + } + for (; x < a->used; x++) { + t = ((fp_word)a->dp[x]) - t; + c->dp[x] = (fp_digit)t; + t = (t >> DIGIT_BIT)&1; + } + + /* zero any excess digits on the destination that we didn't write to */ + for (; x < oldused; x++) { + c->dp[x] = 0; + } + fp_clamp(c); +} + +/* c = a * b */ +int fp_mul(fp_int *A, fp_int *B, fp_int *C) +{ + int ret = 0; + int y, yy, oldused; + +#if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) + ret = esp_mp_mul(A, B, C); + if(ret != -2) return ret; +#endif + + oldused = C->used; + + y = MAX(A->used, B->used); + yy = MIN(A->used, B->used); + + /* call generic if we're out of range */ + if (y + yy > FP_SIZE) { + ret = fp_mul_comba(A, B, C); + goto clean; + } + + /* pick a comba (unrolled 4/8/16/32 x or rolled) based on the size + of the largest input. We also want to avoid doing excess mults if the + inputs are not close to the next power of two. That is, for example, + if say y=17 then we would do (32-17)^2 = 225 unneeded multiplications + */ + +#if defined(TFM_MUL3) && FP_SIZE >= 6 + if (y <= 3) { + ret = fp_mul_comba3(A,B,C); + goto clean; + } +#endif +#if defined(TFM_MUL4) && FP_SIZE >= 8 + if (y == 4) { + ret = fp_mul_comba4(A,B,C); + goto clean; + } +#endif +#if defined(TFM_MUL6) && FP_SIZE >= 12 + if (y <= 6) { + ret = fp_mul_comba6(A,B,C); + goto clean; + } +#endif +#if defined(TFM_MUL7) && FP_SIZE >= 14 + if (y == 7) { + ret = fp_mul_comba7(A,B,C); + goto clean; + } +#endif +#if defined(TFM_MUL8) && FP_SIZE >= 16 + if (y == 8) { + ret = fp_mul_comba8(A,B,C); + goto clean; + } +#endif +#if defined(TFM_MUL9) && FP_SIZE >= 18 + if (y == 9) { + ret = fp_mul_comba9(A,B,C); + goto clean; + } +#endif +#if defined(TFM_MUL12) && FP_SIZE >= 24 + if (y <= 12) { + ret = fp_mul_comba12(A,B,C); + goto clean; + } +#endif +#if defined(TFM_MUL17) && FP_SIZE >= 34 + if (y <= 17) { + ret = fp_mul_comba17(A,B,C); + goto clean; + } +#endif + +#if defined(TFM_SMALL_SET) && FP_SIZE >= 32 + if (y <= 16) { + ret = fp_mul_comba_small(A,B,C); + goto clean; + } +#endif +#if defined(TFM_MUL20) && FP_SIZE >= 40 + if (y <= 20) { + ret = fp_mul_comba20(A,B,C); + goto clean; + } +#endif +#if defined(TFM_MUL24) && FP_SIZE >= 48 + if (yy >= 16 && y <= 24) { + ret = fp_mul_comba24(A,B,C); + goto clean; + } +#endif +#if defined(TFM_MUL28) && FP_SIZE >= 56 + if (yy >= 20 && y <= 28) { + ret = fp_mul_comba28(A,B,C); + goto clean; + } +#endif +#if defined(TFM_MUL32) && FP_SIZE >= 64 + if (yy >= 24 && y <= 32) { + ret = fp_mul_comba32(A,B,C); + goto clean; + } +#endif +#if defined(TFM_MUL48) && FP_SIZE >= 96 + if (yy >= 40 && y <= 48) { + ret = fp_mul_comba48(A,B,C); + goto clean; + } +#endif +#if defined(TFM_MUL64) && FP_SIZE >= 128 + if (yy >= 56 && y <= 64) { + ret = fp_mul_comba64(A,B,C); + goto clean; + } +#endif + ret = fp_mul_comba(A,B,C); + +clean: + /* zero any excess digits on the destination that we didn't write to */ + for (y = C->used; y >= 0 && y < oldused; y++) { + C->dp[y] = 0; + } + + return ret; +} + +void fp_mul_2(fp_int * a, fp_int * b) +{ + int x, oldused; + + oldused = b->used; + b->used = a->used; + + { + fp_digit r, rr, *tmpa, *tmpb; + + /* alias for source */ + tmpa = a->dp; + + /* alias for dest */ + tmpb = b->dp; + + /* carry */ + r = 0; + for (x = 0; x < a->used; x++) { + + /* get what will be the *next* carry bit from the + * MSB of the current digit + */ + rr = *tmpa >> ((fp_digit)(DIGIT_BIT - 1)); + + /* now shift up this digit, add in the carry [from the previous] */ + *tmpb++ = ((*tmpa++ << ((fp_digit)1)) | r); + + /* copy the carry that would be from the source + * digit into the next iteration + */ + r = rr; + } + + /* new leading digit? */ + if (r != 0 && b->used != (FP_SIZE-1)) { + /* add a MSB which is always 1 at this point */ + *tmpb = 1; + ++(b->used); + } + + /* zero any excess digits on the destination that we didn't write to */ + tmpb = b->dp + b->used; + for (x = b->used; x < oldused; x++) { + *tmpb++ = 0; + } + } + b->sign = a->sign; +} + +/* c = a * b */ +void fp_mul_d(fp_int *a, fp_digit b, fp_int *c) +{ + fp_word w; + int x, oldused; + + oldused = c->used; + c->used = a->used; + c->sign = a->sign; + w = 0; + for (x = 0; x < a->used; x++) { + w = ((fp_word)a->dp[x]) * ((fp_word)b) + w; + c->dp[x] = (fp_digit)w; + w = w >> DIGIT_BIT; + } + if (w != 0 && (a->used != FP_SIZE)) { + c->dp[c->used++] = (fp_digit) w; + ++x; + } + + /* zero any excess digits on the destination that we didn't write to */ + /* also checking FP_SIZE here for static analysis */ + for (; x < oldused && x < FP_SIZE; x++) { + c->dp[x] = 0; + } + fp_clamp(c); +} + +/* c = a * 2**d */ +void fp_mul_2d(fp_int *a, int b, fp_int *c) +{ + fp_digit carry, carrytmp, shift; + int x; + + /* copy it */ + fp_copy(a, c); + + /* handle whole digits */ + if (b >= DIGIT_BIT) { + fp_lshd(c, b/DIGIT_BIT); + } + b %= DIGIT_BIT; + + /* shift the digits */ + if (b != 0) { + carry = 0; + shift = DIGIT_BIT - b; + for (x = 0; x < c->used; x++) { + carrytmp = c->dp[x] >> shift; + c->dp[x] = (c->dp[x] << b) + carry; + carry = carrytmp; + } + /* store last carry if room */ + if (carry && x < FP_SIZE) { + c->dp[c->used++] = carry; + } + } + fp_clamp(c); +} + +/* generic PxQ multiplier */ +#if defined(HAVE_INTEL_MULX) + +WC_INLINE static int fp_mul_comba_mulx(fp_int *A, fp_int *B, fp_int *C) + +{ + int ix, iy, iz, pa; + fp_int *dst; +#ifndef WOLFSSL_SMALL_STACK + fp_int tmp[1]; +#else + fp_int *tmp; +#endif + + /* Variables used but not seen by cppcheck. */ + (void)ix; (void)iy; (void)iz; + +#ifdef WOLFSSL_SMALL_STACK + tmp = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT); + if (tmp == NULL) + return FP_MEM; +#endif + + /* get size of output and trim */ + pa = A->used + B->used; + if (pa >= FP_SIZE) { + pa = FP_SIZE-1; + } + + /* Always take branch to use tmp variable. This avoids a cache attack for + * determining if C equals A */ + if (1) { + fp_init(tmp); + dst = tmp; + } + + TFM_INTEL_MUL_COMBA(A, B, dst) ; + + dst->used = pa; + dst->sign = A->sign ^ B->sign; + fp_clamp(dst); + fp_copy(dst, C); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(tmp, NULL, DYNAMIC_TYPE_BIGINT); +#endif + + return FP_OKAY; +} +#endif + +int fp_mul_comba(fp_int *A, fp_int *B, fp_int *C) +{ + int ret = 0; + int ix, iy, iz, tx, ty, pa; + fp_digit c0, c1, c2, *tmpx, *tmpy; + fp_int *dst; +#ifndef WOLFSSL_SMALL_STACK + fp_int tmp[1]; +#else + fp_int *tmp; +#endif + + IF_HAVE_INTEL_MULX(ret = fp_mul_comba_mulx(A, B, C), return ret) ; + +#ifdef WOLFSSL_SMALL_STACK + tmp = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT); + if (tmp == NULL) + return FP_MEM; +#endif + + COMBA_START; + COMBA_CLEAR; + + /* get size of output and trim */ + pa = A->used + B->used; + if (pa >= FP_SIZE) { + pa = FP_SIZE-1; + } + + /* Always take branch to use tmp variable. This avoids a cache attack for + * determining if C equals A */ + if (1) { + fp_init(tmp); + dst = tmp; + } + + for (ix = 0; ix < pa; ix++) { + /* get offsets into the two bignums */ + ty = MIN(ix, (B->used > 0 ? B->used - 1 : 0)); + tx = ix - ty; + + /* setup temp aliases */ + tmpx = A->dp + tx; + tmpy = B->dp + ty; + + /* this is the number of times the loop will iterate, essentially its + while (tx++ < a->used && ty-- >= 0) { ... } + */ + iy = MIN(A->used-tx, ty+1); + + /* execute loop */ + COMBA_FORWARD; + for (iz = 0; iz < iy; ++iz) { + fp_digit _tmpx = *tmpx++; + fp_digit _tmpy = *tmpy--; + MULADD(_tmpx, _tmpy); + } + + /* store term */ + COMBA_STORE(dst->dp[ix]); + } + COMBA_FINI; + + dst->used = pa; + dst->sign = A->sign ^ B->sign; + fp_clamp(dst); + fp_copy(dst, C); + + /* Variables used but not seen by cppcheck. */ + (void)c0; (void)c1; (void)c2; + +#ifdef WOLFSSL_SMALL_STACK + XFREE(tmp, NULL, DYNAMIC_TYPE_BIGINT); +#endif + return ret; +} + +/* a/b => cb + d == a */ +int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d) +{ + int n, t, i, norm, neg; +#ifndef WOLFSSL_SMALL_STACK + fp_int q[1], x[1], y[1], t1[1], t2[1]; +#else + fp_int *q, *x, *y, *t1, *t2; +#endif + + /* is divisor zero ? */ + if (fp_iszero (b) == FP_YES) { + return FP_VAL; + } + + /* if a < b then q=0, r = a */ + if (fp_cmp_mag (a, b) == FP_LT) { + if (d != NULL) { + fp_copy (a, d); + } + if (c != NULL) { + fp_zero (c); + } + return FP_OKAY; + } + +#ifdef WOLFSSL_SMALL_STACK + q = (fp_int*)XMALLOC(sizeof(fp_int) * 5, NULL, DYNAMIC_TYPE_BIGINT); + if (q == NULL) { + return FP_MEM; + } + x = &q[1]; y = &q[2]; t1 = &q[3]; t2 = &q[4]; +#endif + + fp_init(q); + q->used = a->used + 2; + + fp_init(t1); + fp_init(t2); + fp_init_copy(x, a); + fp_init_copy(y, b); + + /* fix the sign */ + neg = (a->sign == b->sign) ? FP_ZPOS : FP_NEG; + x->sign = y->sign = FP_ZPOS; + + /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */ + norm = fp_count_bits(y) % DIGIT_BIT; + if (norm < (int)(DIGIT_BIT-1)) { + norm = (DIGIT_BIT-1) - norm; + fp_mul_2d (x, norm, x); + fp_mul_2d (y, norm, y); + } else { + norm = 0; + } + + /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */ + n = x->used - 1; + t = y->used - 1; + + /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */ + fp_lshd (y, n - t); /* y = y*b**{n-t} */ + + while (fp_cmp (x, y) != FP_LT) { + ++(q->dp[n - t]); + fp_sub (x, y, x); + } + + /* reset y by shifting it back down */ + fp_rshd (y, n - t); + + /* step 3. for i from n down to (t + 1) */ + for (i = n; i >= (t + 1); i--) { + if (i > x->used) { + continue; + } + + /* step 3.1 if xi == yt then set q{i-t-1} to b-1, + * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */ + if (x->dp[i] == y->dp[t]) { + q->dp[i - t - 1] = (fp_digit) ((((fp_word)1) << DIGIT_BIT) - 1); + } else { + fp_word tmp; + tmp = ((fp_word) x->dp[i]) << ((fp_word) DIGIT_BIT); + tmp |= ((fp_word) x->dp[i - 1]); + tmp /= ((fp_word)y->dp[t]); + q->dp[i - t - 1] = (fp_digit) (tmp); + } + + /* while (q{i-t-1} * (yt * b + y{t-1})) > + xi * b**2 + xi-1 * b + xi-2 + + do q{i-t-1} -= 1; + */ + q->dp[i - t - 1] = (q->dp[i - t - 1] + 1); + do { + q->dp[i - t - 1] = (q->dp[i - t - 1] - 1); + + /* find left hand */ + fp_zero (t1); + t1->dp[0] = (t - 1 < 0) ? 0 : y->dp[t - 1]; + t1->dp[1] = y->dp[t]; + t1->used = 2; + fp_mul_d (t1, q->dp[i - t - 1], t1); + + /* find right hand */ + t2->dp[0] = (i - 2 < 0) ? 0 : x->dp[i - 2]; + t2->dp[1] = (i - 1 < 0) ? 0 : x->dp[i - 1]; + t2->dp[2] = x->dp[i]; + t2->used = 3; + } while (fp_cmp_mag(t1, t2) == FP_GT); + + /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */ + fp_mul_d (y, q->dp[i - t - 1], t1); + fp_lshd (t1, i - t - 1); + fp_sub (x, t1, x); + + /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */ + if (x->sign == FP_NEG) { + fp_copy (y, t1); + fp_lshd (t1, i - t - 1); + fp_add (x, t1, x); + q->dp[i - t - 1] = q->dp[i - t - 1] - 1; + } + } + + /* now q is the quotient and x is the remainder + * [which we have to normalize] + */ + + /* get sign before writing to c */ + x->sign = x->used == 0 ? FP_ZPOS : a->sign; + + if (c != NULL) { + fp_clamp (q); + fp_copy (q, c); + c->sign = neg; + } + + if (d != NULL) { + fp_div_2d (x, norm, x, NULL); + + /* zero any excess digits on the destination that we didn't write to */ + for (i = b->used; i < x->used; i++) { + x->dp[i] = 0; + } + fp_clamp(x); + fp_copy (x, d); + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(q, NULL, DYNAMIC_TYPE_BIGINT); +#endif + return FP_OKAY; +} + +/* b = a/2 */ +void fp_div_2(fp_int * a, fp_int * b) +{ + int x, oldused; + + oldused = b->used; + b->used = a->used; + { + fp_digit r, rr, *tmpa, *tmpb; + + /* source alias */ + tmpa = a->dp + b->used - 1; + + /* dest alias */ + tmpb = b->dp + b->used - 1; + + /* carry */ + r = 0; + for (x = b->used - 1; x >= 0; x--) { + /* get the carry for the next iteration */ + rr = *tmpa & 1; + + /* shift the current digit, add in carry and store */ + *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1)); + + /* forward carry to next iteration */ + r = rr; + } + + /* zero any excess digits on the destination that we didn't write to */ + tmpb = b->dp + b->used; + for (x = b->used; x < oldused; x++) { + *tmpb++ = 0; + } + } + b->sign = a->sign; + fp_clamp (b); +} + +/* c = a / 2**b */ +void fp_div_2d(fp_int *a, int b, fp_int *c, fp_int *d) +{ + int D; + + /* if the shift count is <= 0 then we do no work */ + if (b <= 0) { + fp_copy (a, c); + if (d != NULL) { + fp_zero (d); + } + return; + } + + /* get the remainder before a is changed in calculating c */ + if (a == c && d != NULL) { + fp_mod_2d (a, b, d); + } + + /* copy */ + fp_copy(a, c); + + /* shift by as many digits in the bit count */ + if (b >= (int)DIGIT_BIT) { + fp_rshd (c, b / DIGIT_BIT); + } + + /* shift any bit count < DIGIT_BIT */ + D = (b % DIGIT_BIT); + if (D != 0) { + fp_rshb(c, D); + } + + /* get the remainder if a is not changed in calculating c */ + if (a != c && d != NULL) { + fp_mod_2d (a, b, d); + } + + fp_clamp (c); +} + +/* c = a mod b, 0 <= c < b */ +int fp_mod(fp_int *a, fp_int *b, fp_int *c) +{ +#ifndef WOLFSSL_SMALL_STACK + fp_int t[1]; +#else + fp_int *t; +#endif + int err; + +#ifdef WOLFSSL_SMALL_STACK + t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT); + if (t == NULL) + return FP_MEM; +#endif + + fp_init(t); + err = fp_div(a, b, NULL, t); + if (err == FP_OKAY) { + if (t->sign != b->sign) { + fp_add(t, b, c); + } else { + fp_copy(t, c); + } + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); +#endif + return err; +} + +/* c = a mod 2**d */ +void fp_mod_2d(fp_int *a, int b, fp_int *c) +{ + int x; + + /* zero if count less than or equal to zero */ + if (b <= 0) { + fp_zero(c); + return; + } + + /* get copy of input */ + fp_copy(a, c); + + /* if 2**d is larger than we just return */ + if (b >= (DIGIT_BIT * a->used)) { + return; + } + + /* zero digits above the last digit of the modulus */ + for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) { + c->dp[x] = 0; + } + /* clear the digit that is not completely outside/inside the modulus */ + c->dp[b / DIGIT_BIT] &= ~((fp_digit)0) >> (DIGIT_BIT - b); + fp_clamp (c); +} + +static int fp_invmod_slow (fp_int * a, fp_int * b, fp_int * c) +{ +#ifndef WOLFSSL_SMALL_STACK + fp_int x[1], y[1], u[1], v[1], A[1], B[1], C[1], D[1]; +#else + fp_int *x, *y, *u, *v, *A, *B, *C, *D; +#endif + int err; + + /* b cannot be negative */ + if (b->sign == FP_NEG || fp_iszero(b) == FP_YES) { + return FP_VAL; + } + if (fp_iszero(a) == FP_YES) { + return FP_VAL; + } + +#ifdef WOLFSSL_SMALL_STACK + x = (fp_int*)XMALLOC(sizeof(fp_int) * 8, NULL, DYNAMIC_TYPE_BIGINT); + if (x == NULL) { + return FP_MEM; + } + y = &x[1]; u = &x[2]; v = &x[3]; A = &x[4]; B = &x[5]; C = &x[6]; D = &x[7]; +#endif + + /* init temps */ + fp_init(x); fp_init(y); + fp_init(u); fp_init(v); + fp_init(A); fp_init(B); + fp_init(C); fp_init(D); + + /* x = a, y = b */ + if ((err = fp_mod(a, b, x)) != FP_OKAY) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(x, NULL, DYNAMIC_TYPE_BIGINT); + #endif + return err; + } + fp_copy(b, y); + + /* 2. [modified] if x,y are both even then return an error! */ + if (fp_iseven(x) == FP_YES && fp_iseven(y) == FP_YES) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(x, NULL, DYNAMIC_TYPE_BIGINT); + #endif + return FP_VAL; + } + + /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */ + fp_copy (x, u); + fp_copy (y, v); + fp_set (A, 1); + fp_set (D, 1); + +top: + /* 4. while u is even do */ + while (fp_iseven (u) == FP_YES) { + /* 4.1 u = u/2 */ + fp_div_2 (u, u); + + /* 4.2 if A or B is odd then */ + if (fp_isodd (A) == FP_YES || fp_isodd (B) == FP_YES) { + /* A = (A+y)/2, B = (B-x)/2 */ + fp_add (A, y, A); + fp_sub (B, x, B); + } + /* A = A/2, B = B/2 */ + fp_div_2 (A, A); + fp_div_2 (B, B); + } + + /* 5. while v is even do */ + while (fp_iseven (v) == FP_YES) { + /* 5.1 v = v/2 */ + fp_div_2 (v, v); + + /* 5.2 if C or D is odd then */ + if (fp_isodd (C) == FP_YES || fp_isodd (D) == FP_YES) { + /* C = (C+y)/2, D = (D-x)/2 */ + fp_add (C, y, C); + fp_sub (D, x, D); + } + /* C = C/2, D = D/2 */ + fp_div_2 (C, C); + fp_div_2 (D, D); + } + + /* 6. if u >= v then */ + if (fp_cmp (u, v) != FP_LT) { + /* u = u - v, A = A - C, B = B - D */ + fp_sub (u, v, u); + fp_sub (A, C, A); + fp_sub (B, D, B); + } else { + /* v - v - u, C = C - A, D = D - B */ + fp_sub (v, u, v); + fp_sub (C, A, C); + fp_sub (D, B, D); + } + + /* if not zero goto step 4 */ + if (fp_iszero (u) == FP_NO) + goto top; + + /* now a = C, b = D, gcd == g*v */ + + /* if v != 1 then there is no inverse */ + if (fp_cmp_d (v, 1) != FP_EQ) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(x, NULL, DYNAMIC_TYPE_BIGINT); + #endif + return FP_VAL; + } + + /* if its too low */ + while (fp_cmp_d(C, 0) == FP_LT) { + fp_add(C, b, C); + } + + /* too big */ + while (fp_cmp_mag(C, b) != FP_LT) { + fp_sub(C, b, C); + } + + /* C is now the inverse */ + fp_copy(C, c); +#ifdef WOLFSSL_SMALL_STACK + XFREE(x, NULL, DYNAMIC_TYPE_BIGINT); +#endif + return FP_OKAY; +} + +/* c = 1/a (mod b) for odd b only */ +int fp_invmod(fp_int *a, fp_int *b, fp_int *c) +{ +#ifndef WOLFSSL_SMALL_STACK + fp_int x[1], y[1], u[1], v[1], B[1], D[1]; +#else + fp_int *x, *y, *u, *v, *B, *D; +#endif + int neg; + int err; + + if (b->sign == FP_NEG || fp_iszero(b) == FP_YES) { + return FP_VAL; + } + + /* [modified] sanity check on "a" */ + if (fp_iszero(a) == FP_YES) { + return FP_VAL; /* can not divide by 0 here */ + } + + /* 2. [modified] b must be odd */ + if (fp_iseven(b) == FP_YES) { + return fp_invmod_slow(a,b,c); + } + +#ifdef WOLFSSL_SMALL_STACK + x = (fp_int*)XMALLOC(sizeof(fp_int) * 6, NULL, DYNAMIC_TYPE_BIGINT); + if (x == NULL) { + return FP_MEM; + } + y = &x[1]; u = &x[2]; v = &x[3]; B = &x[4]; D = &x[5]; +#endif + + /* init all our temps */ + fp_init(x); fp_init(y); + fp_init(u); fp_init(v); + fp_init(B); fp_init(D); + + if (fp_cmp(a, b) != MP_LT) { + err = mp_mod(a, b, y); + if (err != FP_OKAY) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(x, NULL, DYNAMIC_TYPE_BIGINT); + #endif + return err; + } + a = y; + } + + if (fp_iszero(a) == FP_YES) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(x, NULL, DYNAMIC_TYPE_BIGINT); + #endif + return FP_VAL; + } + + /* x == modulus, y == value to invert */ + fp_copy(b, x); + + /* we need y = |a| */ + fp_abs(a, y); + + /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */ + fp_copy(x, u); + fp_copy(y, v); + fp_set (D, 1); + +top: + /* 4. while u is even do */ + while (fp_iseven (u) == FP_YES) { + /* 4.1 u = u/2 */ + fp_div_2 (u, u); + + /* 4.2 if B is odd then */ + if (fp_isodd (B) == FP_YES) { + fp_sub (B, x, B); + } + /* B = B/2 */ + fp_div_2 (B, B); + } + + /* 5. while v is even do */ + while (fp_iseven (v) == FP_YES) { + /* 5.1 v = v/2 */ + fp_div_2 (v, v); + + /* 5.2 if D is odd then */ + if (fp_isodd (D) == FP_YES) { + /* D = (D-x)/2 */ + fp_sub (D, x, D); + } + /* D = D/2 */ + fp_div_2 (D, D); + } + + /* 6. if u >= v then */ + if (fp_cmp (u, v) != FP_LT) { + /* u = u - v, B = B - D */ + fp_sub (u, v, u); + fp_sub (B, D, B); + } else { + /* v - v - u, D = D - B */ + fp_sub (v, u, v); + fp_sub (D, B, D); + } + + /* if not zero goto step 4 */ + if (fp_iszero (u) == FP_NO) { + goto top; + } + + /* now a = C, b = D, gcd == g*v */ + + /* if v != 1 then there is no inverse */ + if (fp_cmp_d (v, 1) != FP_EQ) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(x, NULL, DYNAMIC_TYPE_BIGINT); + #endif + return FP_VAL; + } + + /* b is now the inverse */ + neg = a->sign; + while (D->sign == FP_NEG) { + fp_add (D, b, D); + } + /* too big */ + while (fp_cmp_mag(D, b) != FP_LT) { + fp_sub(D, b, D); + } + fp_copy (D, c); + c->sign = neg; +#ifdef WOLFSSL_SMALL_STACK + XFREE(x, NULL, DYNAMIC_TYPE_BIGINT); +#endif + return FP_OKAY; +} + +#define CT_INV_MOD_PRE_CNT 8 + +/* modulus (b) must be greater than 2 and a prime */ +int fp_invmod_mont_ct(fp_int *a, fp_int *b, fp_int *c, fp_digit mp) +{ + int i, j; +#ifndef WOLFSSL_SMALL_STACK + fp_int t[1], e[1]; + fp_int pre[CT_INV_MOD_PRE_CNT]; +#else + fp_int* t; + fp_int* e; + fp_int* pre; +#endif + +#ifdef WOLFSSL_SMALL_STACK + t = (fp_int*)XMALLOC(sizeof(fp_int) * (2 + CT_INV_MOD_PRE_CNT), NULL, + DYNAMIC_TYPE_BIGINT); + if (t == NULL) + return FP_MEM; + e = t + 1; + pre = t + 2; +#endif + + fp_init(t); + fp_init(e); + + fp_init(&pre[0]); + fp_copy(a, &pre[0]); + for (i = 1; i < CT_INV_MOD_PRE_CNT; i++) { + fp_init(&pre[i]); + fp_sqr(&pre[i-1], &pre[i]); + fp_montgomery_reduce(&pre[i], b, mp); + fp_mul(&pre[i], a, &pre[i]); + fp_montgomery_reduce(&pre[i], b, mp); + } + + fp_sub_d(b, 2, e); + /* Highest bit is always set. */ + for (i = fp_count_bits(e)-2, j = 1; i >= 0; i--, j++) { + if (!fp_is_bit_set(e, i) || j == CT_INV_MOD_PRE_CNT) + break; + } + fp_copy(&pre[j-1], t); + for (j = 0; i >= 0; i--) { + int set = fp_is_bit_set(e, i); + + if ((j == CT_INV_MOD_PRE_CNT) || (!set && j > 0)) { + fp_mul(t, &pre[j-1], t); + fp_montgomery_reduce(t, b, mp); + j = 0; + } + fp_sqr(t, t); + fp_montgomery_reduce(t, b, mp); + j += set; + } + if (j > 0) { + fp_mul(t, &pre[j-1], c); + fp_montgomery_reduce(c, b, mp); + } + else + fp_copy(t, c); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); +#endif + return FP_OKAY; +} + +/* d = a * b (mod c) */ +int fp_mulmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d) +{ + int err; +#ifndef WOLFSSL_SMALL_STACK + fp_int t[1]; +#else + fp_int *t; +#endif + +#ifdef WOLFSSL_SMALL_STACK + t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT); + if (t == NULL) + return FP_MEM; +#endif + + fp_init(t); + err = fp_mul(a, b, t); + if (err == FP_OKAY) { + #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT) + if (d->size < FP_SIZE) { + err = fp_mod(t, c, t); + fp_copy(t, d); + } else + #endif + { + err = fp_mod(t, c, d); + } + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); +#endif + return err; +} + +/* d = a - b (mod c) */ +int fp_submod(fp_int *a, fp_int *b, fp_int *c, fp_int *d) +{ + int err; +#ifndef WOLFSSL_SMALL_STACK + fp_int t[1]; +#else + fp_int *t; +#endif + +#ifdef WOLFSSL_SMALL_STACK + t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT); + if (t == NULL) + return FP_MEM; +#endif + + fp_init(t); + fp_sub(a, b, t); +#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT) + if (d->size < FP_SIZE) { + err = fp_mod(t, c, t); + fp_copy(t, d); + } else +#endif + { + err = fp_mod(t, c, d); + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); +#endif + return err; +} + +/* d = a + b (mod c) */ +int fp_addmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d) +{ + int err; +#ifndef WOLFSSL_SMALL_STACK + fp_int t[1]; +#else + fp_int *t; +#endif + +#ifdef WOLFSSL_SMALL_STACK + t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT); + if (t == NULL) + return FP_MEM; +#endif + + fp_init(t); + fp_add(a, b, t); +#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT) + if (d->size < FP_SIZE) { + err = fp_mod(t, c, t); + fp_copy(t, d); + } else +#endif + { + err = fp_mod(t, c, d); + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); +#endif + return err; +} + +#ifdef TFM_TIMING_RESISTANT + +#ifdef WC_RSA_NONBLOCK + +#ifdef WC_RSA_NONBLOCK_TIME + /* User can override the check-time at build-time using the + * FP_EXPTMOD_NB_CHECKTIME macro to define your own function */ + #ifndef FP_EXPTMOD_NB_CHECKTIME + /* instruction count for each type of operation */ + /* array lookup is using TFM_EXPTMOD_NB_* states */ + static const word32 exptModNbInst[TFM_EXPTMOD_NB_COUNT] = { + #ifdef TFM_PPC32 + #ifdef _DEBUG + 11098, 8701, 3971, 178394, 858093, 1040, 822, 178056, 181574, 90883, 184339, 236813 + #else + 7050, 2554, 3187, 43178, 200422, 384, 275, 43024, 43550, 30450, 46270, 61376 + #endif + #elif defined(TFM_X86_64) + #ifdef _DEBUG + 954, 2377, 858, 19027, 90840, 287, 407, 20140, 7874, 11385, 8005, 6151 + #else + 765, 1007, 771, 5216, 34993, 248, 193, 4975, 4201, 3947, 4275, 3811 + #endif + #else /* software only fast math */ + #ifdef _DEBUG + 798, 2245, 802, 16657, 66920, 352, 186, 16997, 16145, 12789, 16742, 15006 + #else + 775, 1084, 783, 4692, 37510, 207, 183, 4374, 4392, 3097, 4442, 4079 + #endif + #endif + }; + + static int fp_exptmod_nb_checktime(exptModNb_t* nb) + { + word32 totalInst; + + /* if no max time has been set then stop (do not block) */ + if (nb->maxBlockInst == 0 || nb->state >= TFM_EXPTMOD_NB_COUNT) { + return TFM_EXPTMOD_NB_STOP; + } + + /* if instruction table not set then use maxBlockInst as simple counter */ + if (exptModNbInst[nb->state] == 0) { + if (++nb->totalInst < nb->maxBlockInst) + return TFM_EXPTMOD_NB_CONTINUE; + + nb->totalInst = 0; /* reset counter */ + return TFM_EXPTMOD_NB_STOP; + } + + /* get total instruction count including next operation */ + totalInst = nb->totalInst + exptModNbInst[nb->state]; + /* if the next operation can completed within the maximum then continue */ + if (totalInst <= nb->maxBlockInst) { + return TFM_EXPTMOD_NB_CONTINUE; + } + + return TFM_EXPTMOD_NB_STOP; + } + #define FP_EXPTMOD_NB_CHECKTIME(nb) fp_exptmod_nb_checktime((nb)) + #endif /* !FP_EXPTMOD_NB_CHECKTIME */ +#endif /* WC_RSA_NONBLOCK_TIME */ + +/* non-blocking version of timing resistant fp_exptmod function */ +/* supports cache resistance */ +int fp_exptmod_nb(exptModNb_t* nb, fp_int* G, fp_int* X, fp_int* P, fp_int* Y) +{ + int err, ret = FP_WOULDBLOCK; + + if (nb == NULL) + return FP_VAL; + +#ifdef WC_RSA_NONBLOCK_TIME + nb->totalInst = 0; + do { + nb->totalInst += exptModNbInst[nb->state]; +#endif + + switch (nb->state) { + case TFM_EXPTMOD_NB_INIT: + /* now setup montgomery */ + if ((err = fp_montgomery_setup(P, &nb->mp)) != FP_OKAY) { + nb->state = TFM_EXPTMOD_NB_INIT; + return err; + } + + /* init ints */ + fp_init(&nb->R[0]); + fp_init(&nb->R[1]); + #ifndef WC_NO_CACHE_RESISTANT + fp_init(&nb->R[2]); + #endif + nb->state = TFM_EXPTMOD_NB_MONT; + break; + + case TFM_EXPTMOD_NB_MONT: + /* mod m -> R[0] */ + fp_montgomery_calc_normalization(&nb->R[0], P); + + nb->state = TFM_EXPTMOD_NB_MONT_RED; + break; + + case TFM_EXPTMOD_NB_MONT_RED: + /* reduce G -> R[1] */ + if (fp_cmp_mag(P, G) != FP_GT) { + /* G > P so we reduce it first */ + fp_mod(G, P, &nb->R[1]); + } else { + fp_copy(G, &nb->R[1]); + } + + nb->state = TFM_EXPTMOD_NB_MONT_MUL; + break; + + case TFM_EXPTMOD_NB_MONT_MUL: + /* G (R[1]) * m (R[0]) */ + err = fp_mul(&nb->R[1], &nb->R[0], &nb->R[1]); + if (err != FP_OKAY) { + nb->state = TFM_EXPTMOD_NB_INIT; + return err; + } + + nb->state = TFM_EXPTMOD_NB_MONT_MOD; + break; + + case TFM_EXPTMOD_NB_MONT_MOD: + /* mod m */ + err = fp_div(&nb->R[1], P, NULL, &nb->R[1]); + if (err != FP_OKAY) { + nb->state = TFM_EXPTMOD_NB_INIT; + return err; + } + + nb->state = TFM_EXPTMOD_NB_MONT_MODCHK; + break; + + case TFM_EXPTMOD_NB_MONT_MODCHK: + /* m matches sign of (G * R mod m) */ + if (nb->R[1].sign != P->sign) { + fp_add(&nb->R[1], P, &nb->R[1]); + } + + /* set initial mode and bit cnt */ + nb->bitcnt = 1; + nb->buf = 0; + nb->digidx = X->used - 1; + + nb->state = TFM_EXPTMOD_NB_NEXT; + break; + + case TFM_EXPTMOD_NB_NEXT: + /* grab next digit as required */ + if (--nb->bitcnt == 0) { + /* if nb->digidx == -1 we are out of digits so break */ + if (nb->digidx == -1) { + nb->state = TFM_EXPTMOD_NB_RED; + break; + } + /* read next digit and reset nb->bitcnt */ + nb->buf = X->dp[nb->digidx--]; + nb->bitcnt = (int)DIGIT_BIT; + } + + /* grab the next msb from the exponent */ + nb->y = (int)(nb->buf >> (DIGIT_BIT - 1)) & 1; + nb->buf <<= (fp_digit)1; + nb->state = TFM_EXPTMOD_NB_MUL; + FALL_THROUGH; + + case TFM_EXPTMOD_NB_MUL: + fp_mul(&nb->R[0], &nb->R[1], &nb->R[nb->y^1]); + nb->state = TFM_EXPTMOD_NB_MUL_RED; + break; + + case TFM_EXPTMOD_NB_MUL_RED: + fp_montgomery_reduce(&nb->R[nb->y^1], P, nb->mp); + nb->state = TFM_EXPTMOD_NB_SQR; + break; + + case TFM_EXPTMOD_NB_SQR: + #ifdef WC_NO_CACHE_RESISTANT + fp_sqr(&nb->R[nb->y], &nb->R[nb->y]); + #else + fp_copy((fp_int*) ( ((wolfssl_word)&nb->R[0] & wc_off_on_addr[nb->y^1]) + + ((wolfssl_word)&nb->R[1] & wc_off_on_addr[nb->y]) ), + &nb->R[2]); + fp_sqr(&nb->R[2], &nb->R[2]); + #endif /* WC_NO_CACHE_RESISTANT */ + + nb->state = TFM_EXPTMOD_NB_SQR_RED; + break; + + case TFM_EXPTMOD_NB_SQR_RED: + #ifdef WC_NO_CACHE_RESISTANT + fp_montgomery_reduce(&nb->R[nb->y], P, nb->mp); + #else + fp_montgomery_reduce(&nb->R[2], P, nb->mp); + fp_copy(&nb->R[2], + (fp_int*) ( ((wolfssl_word)&nb->R[0] & wc_off_on_addr[nb->y^1]) + + ((wolfssl_word)&nb->R[1] & wc_off_on_addr[nb->y]) ) ); + #endif /* WC_NO_CACHE_RESISTANT */ + + nb->state = TFM_EXPTMOD_NB_NEXT; + break; + + case TFM_EXPTMOD_NB_RED: + /* final reduce */ + fp_montgomery_reduce(&nb->R[0], P, nb->mp); + fp_copy(&nb->R[0], Y); + + nb->state = TFM_EXPTMOD_NB_INIT; + ret = FP_OKAY; + break; + } /* switch */ + +#ifdef WC_RSA_NONBLOCK_TIME + /* determine if maximum blocking time has been reached */ + } while (ret == FP_WOULDBLOCK && + FP_EXPTMOD_NB_CHECKTIME(nb) == TFM_EXPTMOD_NB_CONTINUE); +#endif + + return ret; +} + +#endif /* WC_RSA_NONBLOCK */ + + +/* timing resistant montgomery ladder based exptmod + Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder", + Cryptographic Hardware and Embedded Systems, CHES 2002 +*/ +static int _fp_exptmod_ct(fp_int * G, fp_int * X, int digits, fp_int * P, + fp_int * Y) +{ +#ifndef WOLFSSL_SMALL_STACK +#ifdef WC_NO_CACHE_RESISTANT + fp_int R[2]; +#else + fp_int R[3]; /* need a temp for cache resistance */ +#endif +#else + fp_int *R; +#endif + fp_digit buf, mp; + int err, bitcnt, digidx, y; + + /* now setup montgomery */ + if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) { + return err; + } + +#ifdef WOLFSSL_SMALL_STACK +#ifndef WC_NO_CACHE_RESISTANT + R = (fp_int*)XMALLOC(sizeof(fp_int) * 3, NULL, DYNAMIC_TYPE_BIGINT); +#else + R = (fp_int*)XMALLOC(sizeof(fp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT); +#endif + if (R == NULL) + return FP_MEM; +#endif + fp_init(&R[0]); + fp_init(&R[1]); +#ifndef WC_NO_CACHE_RESISTANT + fp_init(&R[2]); +#endif + + /* now we need R mod m */ + fp_montgomery_calc_normalization (&R[0], P); + + /* now set R[0][1] to G * R mod m */ + if (fp_cmp_mag(P, G) != FP_GT) { + /* G > P so we reduce it first */ + fp_mod(G, P, &R[1]); + } else { + fp_copy(G, &R[1]); + } + fp_mulmod (&R[1], &R[0], P, &R[1]); + + /* for j = t-1 downto 0 do + r_!k = R0*R1; r_k = r_k^2 + */ + + /* set initial mode and bit cnt */ + bitcnt = 1; + buf = 0; + digidx = digits - 1; + + for (;;) { + /* grab next digit as required */ + if (--bitcnt == 0) { + /* if digidx == -1 we are out of digits so break */ + if (digidx == -1) { + break; + } + /* read next digit and reset bitcnt */ + buf = X->dp[digidx--]; + bitcnt = (int)DIGIT_BIT; + } + + /* grab the next msb from the exponent */ + y = (int)(buf >> (DIGIT_BIT - 1)) & 1; + buf <<= (fp_digit)1; + + /* do ops */ + err = fp_mul(&R[0], &R[1], &R[y^1]); + if (err != FP_OKAY) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(R, NULL, DYNAMIC_TYPE_BIGINT); + #endif + return err; + } + err = fp_montgomery_reduce(&R[y^1], P, mp); + if (err != FP_OKAY) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(R, NULL, DYNAMIC_TYPE_BIGINT); + #endif + return err; + } + +#ifdef WC_NO_CACHE_RESISTANT + err = fp_sqr(&R[y], &R[y]); + if (err != FP_OKAY) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(R, NULL, DYNAMIC_TYPE_BIGINT); + #endif + return err; + } + err = fp_montgomery_reduce(&R[y], P, mp); + if (err != FP_OKAY) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(R, NULL, DYNAMIC_TYPE_BIGINT); + #endif + return err; + } +#else + /* instead of using R[y] for sqr, which leaks key bit to cache monitor, + * use R[2] as temp, make sure address calc is constant, keep + * &R[0] and &R[1] in cache */ + fp_copy((fp_int*) ( ((wolfssl_word)&R[0] & wc_off_on_addr[y^1]) + + ((wolfssl_word)&R[1] & wc_off_on_addr[y]) ), + &R[2]); + err = fp_sqr(&R[2], &R[2]); + if (err != FP_OKAY) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(R, NULL, DYNAMIC_TYPE_BIGINT); + #endif + return err; + } + err = fp_montgomery_reduce(&R[2], P, mp); + if (err != FP_OKAY) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(R, NULL, DYNAMIC_TYPE_BIGINT); + #endif + return err; + } + fp_copy(&R[2], + (fp_int*) ( ((wolfssl_word)&R[0] & wc_off_on_addr[y^1]) + + ((wolfssl_word)&R[1] & wc_off_on_addr[y]) ) ); +#endif /* WC_NO_CACHE_RESISTANT */ + } + + err = fp_montgomery_reduce(&R[0], P, mp); + fp_copy(&R[0], Y); +#ifdef WOLFSSL_SMALL_STACK + XFREE(R, NULL, DYNAMIC_TYPE_BIGINT); +#endif + return err; +} + +#endif /* TFM_TIMING_RESISTANT */ + +/* y = g**x (mod b) + * Some restrictions... x must be positive and < b + */ +static int _fp_exptmod_nct(fp_int * G, fp_int * X, fp_int * P, fp_int * Y) +{ + fp_int *res; + fp_int *M; + fp_digit buf, mp; + int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; + + /* find window size */ + x = fp_count_bits (X); + if (x <= 21) { + winsize = 1; + } else if (x <= 36) { + winsize = 3; + } else if (x <= 140) { + winsize = 4; + } else if (x <= 450) { + winsize = 5; + } else { + winsize = 6; + } + + /* now setup montgomery */ + if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) { + return err; + } + + /* only allocate space for what's needed for window plus res */ + M = (fp_int*)XMALLOC(sizeof(fp_int)*((1 << winsize) + 1), NULL, + DYNAMIC_TYPE_BIGINT); + if (M == NULL) { + return FP_MEM; + } + res = &M[1 << winsize]; + + /* init M array */ + for(x = 0; x < (1 << winsize); x++) + fp_init(&M[x]); + + /* setup result */ + fp_init(res); + + /* create M table + * + * The M table contains powers of the input base, e.g. M[x] = G^x mod P + * + * The first half of the table is not computed though except for M[0] and M[1] + */ + + /* now we need R mod m */ + fp_montgomery_calc_normalization (res, P); + + /* now set M[1] to G * R mod m */ + if (fp_cmp_mag(P, G) != FP_GT) { + /* G > P so we reduce it first */ + fp_mod(G, P, &M[1]); + } else { + fp_copy(G, &M[1]); + } + fp_mulmod (&M[1], res, P, &M[1]); + + /* compute the value at M[1<<(winsize-1)] by + * squaring M[1] (winsize-1) times */ + fp_copy (&M[1], &M[1 << (winsize - 1)]); + for (x = 0; x < (winsize - 1); x++) { + fp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)]); + err = fp_montgomery_reduce (&M[1 << (winsize - 1)], P, mp); + if (err != FP_OKAY) { + XFREE(M, NULL, DYNAMIC_TYPE_BIGINT); + return err; + } + } + + /* create upper table */ + for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { + err = fp_mul(&M[x - 1], &M[1], &M[x]); + if (err != FP_OKAY) { + XFREE(M, NULL, DYNAMIC_TYPE_BIGINT); + return err; + } + err = fp_montgomery_reduce(&M[x], P, mp); + if (err != FP_OKAY) { + XFREE(M, NULL, DYNAMIC_TYPE_BIGINT); + return err; + } + } + + /* set initial mode and bit cnt */ + mode = 0; + bitcnt = (x % DIGIT_BIT) + 1; + buf = 0; + digidx = X->used - 1; + bitcpy = 0; + bitbuf = 0; + + for (;;) { + /* grab next digit as required */ + if (--bitcnt == 0) { + /* if digidx == -1 we are out of digits so break */ + if (digidx == -1) { + break; + } + /* read next digit and reset bitcnt */ + buf = X->dp[digidx--]; + bitcnt = (int)DIGIT_BIT; + } + + /* grab the next msb from the exponent */ + y = (int)(buf >> (DIGIT_BIT - 1)) & 1; + buf <<= (fp_digit)1; + + /* if the bit is zero and mode == 0 then we ignore it + * These represent the leading zero bits before the first 1 bit + * in the exponent. Technically this opt is not required but it + * does lower the # of trivial squaring/reductions used + */ + if (mode == 0 && y == 0) { + continue; + } + + /* if the bit is zero and mode == 1 then we square */ + if (mode == 1 && y == 0) { + err = fp_sqr(res, res); + if (err != FP_OKAY) { + XFREE(M, NULL, DYNAMIC_TYPE_BIGINT); + return err; + } + fp_montgomery_reduce(res, P, mp); + if (err != FP_OKAY) { + XFREE(M, NULL, DYNAMIC_TYPE_BIGINT); + return err; + } + continue; + } + + /* else we add it to the window */ + bitbuf |= (y << (winsize - ++bitcpy)); + mode = 2; + + if (bitcpy == winsize) { + /* ok window is filled so square as required and multiply */ + /* square first */ + for (x = 0; x < winsize; x++) { + err = fp_sqr(res, res); + if (err != FP_OKAY) { + XFREE(M, NULL, DYNAMIC_TYPE_BIGINT); + return err; + } + err = fp_montgomery_reduce(res, P, mp); + if (err != FP_OKAY) { + XFREE(M, NULL, DYNAMIC_TYPE_BIGINT); + return err; + } + } + + /* then multiply */ + err = fp_mul(res, &M[bitbuf], res); + if (err != FP_OKAY) { + XFREE(M, NULL, DYNAMIC_TYPE_BIGINT); + return err; + } + err = fp_montgomery_reduce(res, P, mp); + if (err != FP_OKAY) { + XFREE(M, NULL, DYNAMIC_TYPE_BIGINT); + return err; + } + + /* empty window and reset */ + bitcpy = 0; + bitbuf = 0; + mode = 1; + } + } + + /* if bits remain then square/multiply */ + if (mode == 2 && bitcpy > 0) { + /* square then multiply if the bit is set */ + for (x = 0; x < bitcpy; x++) { + err = fp_sqr(res, res); + if (err != FP_OKAY) { + XFREE(M, NULL, DYNAMIC_TYPE_BIGINT); + return err; + } + err = fp_montgomery_reduce(res, P, mp); + if (err != FP_OKAY) { + XFREE(M, NULL, DYNAMIC_TYPE_BIGINT); + return err; + } + + /* get next bit of the window */ + bitbuf <<= 1; + if ((bitbuf & (1 << winsize)) != 0) { + /* then multiply */ + err = fp_mul(res, &M[1], res); + if (err != FP_OKAY) { + XFREE(M, NULL, DYNAMIC_TYPE_BIGINT); + return err; + } + err = fp_montgomery_reduce(res, P, mp); + if (err != FP_OKAY) { + XFREE(M, NULL, DYNAMIC_TYPE_BIGINT); + return err; + } + } + } + } + + /* fixup result if Montgomery reduction is used + * recall that any value in a Montgomery system is + * actually multiplied by R mod n. So we have + * to reduce one more time to cancel out the factor + * of R. + */ + err = fp_montgomery_reduce(res, P, mp); + + /* swap res with Y */ + fp_copy (res, Y); + + XFREE(M, NULL, DYNAMIC_TYPE_BIGINT); + return err; +} + + +#ifdef TFM_TIMING_RESISTANT +#if DIGIT_BIT <= 16 + #define WINSIZE 2 +#elif DIGIT_BIT <= 32 + #define WINSIZE 3 +#elif DIGIT_BIT <= 64 + #define WINSIZE 4 +#elif DIGIT_BIT <= 128 + #define WINSIZE 5 +#endif + +/* y = 2**x (mod b) + * Some restrictions... x must be positive and < b + */ +static int _fp_exptmod_base_2(fp_int * X, int digits, fp_int * P, + fp_int * Y) +{ + fp_digit buf, mp; + int err, bitbuf, bitcpy, bitcnt, digidx, x, y; +#ifdef WOLFSSL_SMALL_STACK + fp_int *res; + fp_int *tmp; +#else + fp_int res[1]; + fp_int tmp[1]; +#endif + +#ifdef WOLFSSL_SMALL_STACK + res = (fp_int*)XMALLOC(2*sizeof(fp_int), NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (res == NULL) { + return FP_MEM; + } + tmp = &res[1]; +#endif + + /* now setup montgomery */ + if ((err = fp_montgomery_setup(P, &mp)) != FP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return err; + } + + /* setup result */ + fp_init(res); + fp_init(tmp); + + fp_mul_2d(P, 1 << WINSIZE, tmp); + + /* now we need R mod m */ + fp_montgomery_calc_normalization(res, P); + + /* Get the top bits left over after taking WINSIZE bits starting at the + * least-significant. + */ + digidx = digits - 1; + bitcpy = (digits * DIGIT_BIT) % WINSIZE; + if (bitcpy > 0) { + bitcnt = (int)DIGIT_BIT - bitcpy; + buf = X->dp[digidx--]; + bitbuf = (int)(buf >> bitcnt); + /* Multiply montgomery representation of 1 by 2 ^ top */ + fp_mul_2d(res, bitbuf, res); + fp_add(res, tmp, res); + err = fp_mod(res, P, res); + if (err != FP_OKAY) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return err; + } + /* Move out bits used */ + buf <<= bitcpy; + bitcnt++; + } + else { + bitcnt = 1; + buf = 0; + } + + /* empty window and reset */ + bitbuf = 0; + bitcpy = 0; + + for (;;) { + /* grab next digit as required */ + if (--bitcnt == 0) { + /* if digidx == -1 we are out of digits so break */ + if (digidx == -1) { + break; + } + /* read next digit and reset bitcnt */ + buf = X->dp[digidx--]; + bitcnt = (int)DIGIT_BIT; + } + + /* grab the next msb from the exponent */ + y = (int)(buf >> (DIGIT_BIT - 1)) & 1; + buf <<= (fp_digit)1; + /* add bit to the window */ + bitbuf |= (y << (WINSIZE - ++bitcpy)); + + if (bitcpy == WINSIZE) { + /* ok window is filled so square as required and multiply */ + /* square first */ + for (x = 0; x < WINSIZE; x++) { + err = fp_sqr(res, res); + if (err != FP_OKAY) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return err; + } + err = fp_montgomery_reduce(res, P, mp); + if (err != FP_OKAY) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return err; + } + } + + /* then multiply by 2^bitbuf */ + fp_mul_2d(res, bitbuf, res); + /* Add in value to make mod operation take same time */ + fp_add(res, tmp, res); + err = fp_mod(res, P, res); + if (err != FP_OKAY) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return err; + } + + /* empty window and reset */ + bitcpy = 0; + bitbuf = 0; + } + } + + /* fixup result if Montgomery reduction is used + * recall that any value in a Montgomery system is + * actually multiplied by R mod n. So we have + * to reduce one more time to cancel out the factor + * of R. + */ + err = fp_montgomery_reduce(res, P, mp); + + /* swap res with Y */ + fp_copy(res, Y); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return err; +} + +#undef WINSIZE +#else +#if DIGIT_BIT < 16 + #define WINSIZE 3 +#elif DIGIT_BIT < 32 + #define WINSIZE 4 +#elif DIGIT_BIT < 64 + #define WINSIZE 5 +#elif DIGIT_BIT < 128 + #define WINSIZE 6 +#elif DIGIT_BIT == 128 + #define WINSIZE 7 +#endif + +/* y = 2**x (mod b) + * Some restrictions... x must be positive and < b + */ +static int _fp_exptmod_base_2(fp_int * X, int digits, fp_int * P, + fp_int * Y) +{ + fp_digit buf, mp; + int err, bitbuf, bitcpy, bitcnt, digidx, x, y; +#ifdef WOLFSSL_SMALL_STACK + fp_int *res; +#else + fp_int res[1]; +#endif + +#ifdef WOLFSSL_SMALL_STACK + res = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (res == NULL) { + return FP_MEM; + } +#endif + + /* now setup montgomery */ + if ((err = fp_montgomery_setup(P, &mp)) != FP_OKAY) { + return err; + } + + /* setup result */ + fp_init(res); + + /* now we need R mod m */ + fp_montgomery_calc_normalization(res, P); + + /* Get the top bits left over after taking WINSIZE bits starting at the + * least-significant. + */ + digidx = digits - 1; + bitcpy = (digits * DIGIT_BIT) % WINSIZE; + if (bitcpy > 0) { + bitcnt = (int)DIGIT_BIT - bitcpy; + buf = X->dp[digidx--]; + bitbuf = (int)(buf >> bitcnt); + /* Multiply montgomery representation of 1 by 2 ^ top */ + fp_mul_2d(res, bitbuf, res); + err = fp_mod(res, P, res); + if (err != FP_OKAY) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return err; + } + /* Move out bits used */ + buf <<= bitcpy; + bitcnt++; + } + else { + bitcnt = 1; + buf = 0; + } + + /* empty window and reset */ + bitbuf = 0; + bitcpy = 0; + + for (;;) { + /* grab next digit as required */ + if (--bitcnt == 0) { + /* if digidx == -1 we are out of digits so break */ + if (digidx == -1) { + break; + } + /* read next digit and reset bitcnt */ + buf = X->dp[digidx--]; + bitcnt = (int)DIGIT_BIT; + } + + /* grab the next msb from the exponent */ + y = (int)(buf >> (DIGIT_BIT - 1)) & 1; + buf <<= (fp_digit)1; + /* add bit to the window */ + bitbuf |= (y << (WINSIZE - ++bitcpy)); + + if (bitcpy == WINSIZE) { + /* ok window is filled so square as required and multiply */ + /* square first */ + for (x = 0; x < WINSIZE; x++) { + err = fp_sqr(res, res); + if (err != FP_OKAY) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return err; + } + err = fp_montgomery_reduce(res, P, mp); + if (err != FP_OKAY) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return err; + } + } + + /* then multiply by 2^bitbuf */ + fp_mul_2d(res, bitbuf, res); + err = fp_mod(res, P, res); + if (err != FP_OKAY) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return err; + } + + /* empty window and reset */ + bitcpy = 0; + bitbuf = 0; + } + } + + /* fixup result if Montgomery reduction is used + * recall that any value in a Montgomery system is + * actually multiplied by R mod n. So we have + * to reduce one more time to cancel out the factor + * of R. + */ + err = fp_montgomery_reduce(res, P, mp); + + /* swap res with Y */ + fp_copy(res, Y); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return err; +} + +#undef WINSIZE +#endif + + +int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y) +{ + +#if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) + int x = fp_count_bits (X); +#endif + + /* handle modulus of zero and prevent overflows */ + if (fp_iszero(P) || (P->used > (FP_SIZE/2))) { + return FP_VAL; + } + if (fp_isone(P)) { + fp_set(Y, 0); + return FP_OKAY; + } + if (fp_iszero(X)) { + fp_set(Y, 1); + return FP_OKAY; + } + if (fp_iszero(G)) { + fp_set(Y, 0); + return FP_OKAY; + } + +#if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) + if(x > EPS_RSA_EXPT_XBTIS) { + return esp_mp_exptmod(G, X, x, P, Y); + } +#endif + + if (X->sign == FP_NEG) { +#ifndef POSITIVE_EXP_ONLY /* reduce stack if assume no negatives */ + int err; + #ifndef WOLFSSL_SMALL_STACK + fp_int tmp[2]; + #else + fp_int *tmp; + #endif + + #ifdef WOLFSSL_SMALL_STACK + tmp = (fp_int*)XMALLOC(sizeof(fp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT); + if (tmp == NULL) + return FP_MEM; + #endif + + /* yes, copy G and invmod it */ + fp_init_copy(&tmp[0], G); + fp_init_copy(&tmp[1], P); + tmp[1].sign = FP_ZPOS; + err = fp_invmod(&tmp[0], &tmp[1], &tmp[0]); + if (err == FP_OKAY) { + fp_copy(X, &tmp[1]); + tmp[1].sign = FP_ZPOS; +#ifdef TFM_TIMING_RESISTANT + err = _fp_exptmod_ct(&tmp[0], &tmp[1], tmp[1].used, P, Y); +#else + err = _fp_exptmod_nct(&tmp[0], &tmp[1], P, Y); +#endif + if (P->sign == FP_NEG) { + fp_add(Y, P, Y); + } + } + #ifdef WOLFSSL_SMALL_STACK + XFREE(tmp, NULL, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return err; +#else + return FP_VAL; +#endif + } + else if (G->used == 1 && G->dp[0] == 2) { + return _fp_exptmod_base_2(X, X->used, P, Y); + } + else { + /* Positive exponent so just exptmod */ +#ifdef TFM_TIMING_RESISTANT + return _fp_exptmod_ct(G, X, X->used, P, Y); +#else + return _fp_exptmod_nct(G, X, P, Y); +#endif + } +} + +int fp_exptmod_ex(fp_int * G, fp_int * X, int digits, fp_int * P, fp_int * Y) +{ + +#if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) + int x = fp_count_bits (X); +#endif + + if (fp_iszero(G)) { + fp_set(G, 0); + return FP_OKAY; + } + + /* prevent overflows */ + if (P->used > (FP_SIZE/2)) { + return FP_VAL; + } + +#if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) + if(x > EPS_RSA_EXPT_XBTIS) { + return esp_mp_exptmod(G, X, x, P, Y); + } +#endif + + if (X->sign == FP_NEG) { +#ifndef POSITIVE_EXP_ONLY /* reduce stack if assume no negatives */ + int err; + #ifndef WOLFSSL_SMALL_STACK + fp_int tmp[2]; + #else + fp_int *tmp; + #endif + + #ifdef WOLFSSL_SMALL_STACK + tmp = (fp_int*)XMALLOC(sizeof(fp_int) * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (tmp == NULL) + return FP_MEM; + #endif + + /* yes, copy G and invmod it */ + fp_init_copy(&tmp[0], G); + fp_init_copy(&tmp[1], P); + tmp[1].sign = FP_ZPOS; + err = fp_invmod(&tmp[0], &tmp[1], &tmp[0]); + if (err == FP_OKAY) { + X->sign = FP_ZPOS; +#ifdef TFM_TIMING_RESISTANT + err = _fp_exptmod_ct(&tmp[0], X, digits, P, Y); +#else + err = _fp_exptmod_nct(&tmp[0], X, P, Y); + (void)digits; +#endif + if (X != Y) { + X->sign = FP_NEG; + } + if (P->sign == FP_NEG) { + fp_add(Y, P, Y); + } + } + #ifdef WOLFSSL_SMALL_STACK + XFREE(tmp, NULL, DYNAMIC_TYPE_BIGINT); + #endif + return err; +#else + return FP_VAL; +#endif + } + else { + /* Positive exponent so just exptmod */ +#ifdef TFM_TIMING_RESISTANT + return _fp_exptmod_ct(G, X, digits, P, Y); +#else + return _fp_exptmod_nct(G, X, P, Y); +#endif + } +} + +int fp_exptmod_nct(fp_int * G, fp_int * X, fp_int * P, fp_int * Y) +{ +#if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) + int x = fp_count_bits (X); +#endif + + if (fp_iszero(G)) { + fp_set(G, 0); + return FP_OKAY; + } + + /* prevent overflows */ + if (P->used > (FP_SIZE/2)) { + return FP_VAL; + } + +#if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) + if(x > EPS_RSA_EXPT_XBTIS) { + return esp_mp_exptmod(G, X, x, P, Y); + } +#endif + + if (X->sign == FP_NEG) { +#ifndef POSITIVE_EXP_ONLY /* reduce stack if assume no negatives */ + int err; + #ifndef WOLFSSL_SMALL_STACK + fp_int tmp[2]; + #else + fp_int *tmp; + #endif + + #ifdef WOLFSSL_SMALL_STACK + tmp = (fp_int*)XMALLOC(sizeof(fp_int) * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (tmp == NULL) + return FP_MEM; + #endif + + /* yes, copy G and invmod it */ + fp_init_copy(&tmp[0], G); + fp_init_copy(&tmp[1], P); + tmp[1].sign = FP_ZPOS; + err = fp_invmod(&tmp[0], &tmp[1], &tmp[0]); + if (err == FP_OKAY) { + X->sign = FP_ZPOS; + err = _fp_exptmod_nct(&tmp[0], X, P, Y); + if (X != Y) { + X->sign = FP_NEG; + } + if (P->sign == FP_NEG) { + fp_add(Y, P, Y); + } + } + #ifdef WOLFSSL_SMALL_STACK + XFREE(tmp, NULL, DYNAMIC_TYPE_BIGINT); + #endif + return err; +#else + return FP_VAL; +#endif + } + else { + /* Positive exponent so just exptmod */ + return _fp_exptmod_nct(G, X, P, Y); + } +} + +/* computes a = 2**b */ +void fp_2expt(fp_int *a, int b) +{ + int z; + + /* zero a as per default */ + fp_zero (a); + + if (b < 0) { + return; + } + + z = b / DIGIT_BIT; + if (z >= FP_SIZE) { + return; + } + + /* set the used count of where the bit will go */ + a->used = z + 1; + + /* put the single bit in its place */ + a->dp[z] = ((fp_digit)1) << (b % DIGIT_BIT); +} + +/* b = a*a */ +int fp_sqr(fp_int *A, fp_int *B) +{ + int err; + int y, oldused; + + oldused = B->used; + y = A->used; + + /* call generic if we're out of range */ + if (y + y > FP_SIZE) { + err = fp_sqr_comba(A, B); + goto clean; + } + +#if defined(TFM_SQR3) && FP_SIZE >= 6 + if (y <= 3) { + err = fp_sqr_comba3(A,B); + goto clean; + } +#endif +#if defined(TFM_SQR4) && FP_SIZE >= 8 + if (y == 4) { + err = fp_sqr_comba4(A,B); + goto clean; + } +#endif +#if defined(TFM_SQR6) && FP_SIZE >= 12 + if (y <= 6) { + err = fp_sqr_comba6(A,B); + goto clean; + } +#endif +#if defined(TFM_SQR7) && FP_SIZE >= 14 + if (y == 7) { + err = fp_sqr_comba7(A,B); + goto clean; + } +#endif +#if defined(TFM_SQR8) && FP_SIZE >= 16 + if (y == 8) { + err = fp_sqr_comba8(A,B); + goto clean; + } +#endif +#if defined(TFM_SQR9) && FP_SIZE >= 18 + if (y == 9) { + err = fp_sqr_comba9(A,B); + goto clean; + } +#endif +#if defined(TFM_SQR12) && FP_SIZE >= 24 + if (y <= 12) { + err = fp_sqr_comba12(A,B); + goto clean; + } +#endif +#if defined(TFM_SQR17) && FP_SIZE >= 34 + if (y <= 17) { + err = fp_sqr_comba17(A,B); + goto clean; + } +#endif +#if defined(TFM_SMALL_SET) + if (y <= 16) { + err = fp_sqr_comba_small(A,B); + goto clean; + } +#endif +#if defined(TFM_SQR20) && FP_SIZE >= 40 + if (y <= 20) { + err = fp_sqr_comba20(A,B); + goto clean; + } +#endif +#if defined(TFM_SQR24) && FP_SIZE >= 48 + if (y <= 24) { + err = fp_sqr_comba24(A,B); + goto clean; + } +#endif +#if defined(TFM_SQR28) && FP_SIZE >= 56 + if (y <= 28) { + err = fp_sqr_comba28(A,B); + goto clean; + } +#endif +#if defined(TFM_SQR32) && FP_SIZE >= 64 + if (y <= 32) { + err = fp_sqr_comba32(A,B); + goto clean; + } +#endif +#if defined(TFM_SQR48) && FP_SIZE >= 96 + if (y <= 48) { + err = fp_sqr_comba48(A,B); + goto clean; + } +#endif +#if defined(TFM_SQR64) && FP_SIZE >= 128 + if (y <= 64) { + err = fp_sqr_comba64(A,B); + goto clean; + } +#endif + err = fp_sqr_comba(A, B); + +clean: + /* zero any excess digits on the destination that we didn't write to */ + for (y = B->used; y >= 0 && y < oldused; y++) { + B->dp[y] = 0; + } + + return err; +} + +/* generic comba squarer */ +int fp_sqr_comba(fp_int *A, fp_int *B) +{ + int pa, ix, iz; + fp_digit c0, c1, c2; +#ifdef TFM_ISO + fp_word tt; +#endif + fp_int *dst; +#ifndef WOLFSSL_SMALL_STACK + fp_int tmp[1]; +#else + fp_int *tmp; +#endif + +#ifdef WOLFSSL_SMALL_STACK + tmp = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT); + if (tmp == NULL) + return FP_MEM; +#endif + + /* get size of output and trim */ + pa = A->used + A->used; + if (pa >= FP_SIZE) { + pa = FP_SIZE-1; + } + + /* number of output digits to produce */ + COMBA_START; + COMBA_CLEAR; + + if (A == B) { + fp_init(tmp); + dst = tmp; + } else { + fp_zero(B); + dst = B; + } + + for (ix = 0; ix < pa; ix++) { + int tx, ty, iy; + fp_digit *tmpy, *tmpx; + + /* get offsets into the two bignums */ + ty = MIN(A->used-1, ix); + tx = ix - ty; + + /* setup temp aliases */ + tmpx = A->dp + tx; + tmpy = A->dp + ty; + + /* this is the number of times the loop will iterate, + while (tx++ < a->used && ty-- >= 0) { ... } + */ + iy = MIN(A->used-tx, ty+1); + + /* now for squaring tx can never equal ty + * we halve the distance since they approach + * at a rate of 2x and we have to round because + * odd cases need to be executed + */ + iy = MIN(iy, (ty-tx+1)>>1); + + /* forward carries */ + COMBA_FORWARD; + + /* execute loop */ + for (iz = 0; iz < iy; iz++) { + SQRADD2(*tmpx++, *tmpy--); + } + + /* even columns have the square term in them */ + if ((ix&1) == 0) { + /* TAO change COMBA_ADD back to SQRADD */ + SQRADD(A->dp[ix>>1], A->dp[ix>>1]); + } + + /* store it */ + COMBA_STORE(dst->dp[ix]); + } + + COMBA_FINI; + + /* setup dest */ + dst->used = pa; + fp_clamp (dst); + if (dst != B) { + fp_copy(dst, B); + } + + /* Variables used but not seen by cppcheck. */ + (void)c0; (void)c1; (void)c2; +#ifdef TFM_ISO + (void)tt; +#endif + +#ifdef WOLFSSL_SMALL_STACK + XFREE(tmp, NULL, DYNAMIC_TYPE_BIGINT); +#endif + return FP_OKAY; +} + +int fp_cmp(fp_int *a, fp_int *b) +{ + if (a->sign == FP_NEG && b->sign == FP_ZPOS) { + return FP_LT; + } else if (a->sign == FP_ZPOS && b->sign == FP_NEG) { + return FP_GT; + } else { + /* compare digits */ + if (a->sign == FP_NEG) { + /* if negative compare opposite direction */ + return fp_cmp_mag(b, a); + } else { + return fp_cmp_mag(a, b); + } + } +} + +/* compare against a single digit */ +int fp_cmp_d(fp_int *a, fp_digit b) +{ + /* special case for zero*/ + if (a->used == 0 && b == 0) + return FP_EQ; + + /* compare based on sign */ + if ((b && a->used == 0) || a->sign == FP_NEG) { + return FP_LT; + } + + /* compare based on magnitude */ + if (a->used > 1) { + return FP_GT; + } + + /* compare the only digit of a to b */ + if (a->dp[0] > b) { + return FP_GT; + } else if (a->dp[0] < b) { + return FP_LT; + } else { + return FP_EQ; + } + +} + +int fp_cmp_mag(fp_int *a, fp_int *b) +{ + int x; + + if (a->used > b->used) { + return FP_GT; + } else if (a->used < b->used) { + return FP_LT; + } else { + for (x = a->used - 1; x >= 0; x--) { + if (a->dp[x] > b->dp[x]) { + return FP_GT; + } else if (a->dp[x] < b->dp[x]) { + return FP_LT; + } + } + } + return FP_EQ; +} + +/* sets up the montgomery reduction */ +int fp_montgomery_setup(fp_int *a, fp_digit *rho) +{ + fp_digit x, b; + +/* fast inversion mod 2**k + * + * Based on the fact that + * + * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n) + * => 2*X*A - X*X*A*A = 1 + * => 2*(1) - (1) = 1 + */ + b = a->dp[0]; + + if ((b & 1) == 0) { + return FP_VAL; + } + + x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - b * x; /* here x*a==1 mod 2**8 */ + x *= 2 - b * x; /* here x*a==1 mod 2**16 */ + x *= 2 - b * x; /* here x*a==1 mod 2**32 */ +#ifdef FP_64BIT + x *= 2 - b * x; /* here x*a==1 mod 2**64 */ +#endif + + /* rho = -1/m mod b */ + *rho = (fp_digit) (((fp_word) 1 << ((fp_word) DIGIT_BIT)) - ((fp_word)x)); + + return FP_OKAY; +} + +/* computes a = B**n mod b without division or multiplication useful for + * normalizing numbers in a Montgomery system. + */ +void fp_montgomery_calc_normalization(fp_int *a, fp_int *b) +{ + int x, bits; + + /* how many bits of last digit does b use */ + bits = fp_count_bits (b) % DIGIT_BIT; + if (!bits) bits = DIGIT_BIT; + + /* compute A = B^(n-1) * 2^(bits-1) */ + if (b->used > 1) { + fp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1); + } else { + fp_set(a, 1); + bits = 1; + } + + /* now compute C = A * B mod b */ + for (x = bits - 1; x < (int)DIGIT_BIT; x++) { + fp_mul_2 (a, a); + if (fp_cmp_mag (a, b) != FP_LT) { + s_fp_sub (a, b, a); + } + } +} + + +#ifdef TFM_SMALL_MONT_SET + #include "fp_mont_small.i" +#endif + +#ifdef HAVE_INTEL_MULX +static WC_INLINE void innermul8_mulx(fp_digit *c_mulx, fp_digit *cy_mulx, fp_digit *tmpm, fp_digit mu) +{ + fp_digit cy = *cy_mulx ; + INNERMUL8_MULX ; + *cy_mulx = cy ; +} + +/* computes x/R == x (mod N) via Montgomery Reduction */ +static int fp_montgomery_reduce_mulx(fp_int *a, fp_int *m, fp_digit mp) +{ +#ifndef WOLFSSL_SMALL_STACK + fp_digit c[FP_SIZE+1]; +#else + fp_digit *c; +#endif + fp_digit *_c, *tmpm, mu = 0; + int oldused, x, y, pa; + + /* bail if too large */ + if (m->used > (FP_SIZE/2)) { + (void)mu; /* shut up compiler */ + return FP_OKAY; + } + +#ifdef TFM_SMALL_MONT_SET + if (m->used <= 16) { + return fp_montgomery_reduce_small(a, m, mp); + } +#endif + +#ifdef WOLFSSL_SMALL_STACK + /* only allocate space for what's needed for window plus res */ + c = (fp_digit*)XMALLOC(sizeof(fp_digit)*(FP_SIZE + 1), NULL, DYNAMIC_TYPE_BIGINT); + if (c == NULL) { + return FP_MEM; + } +#endif + + /* now zero the buff */ + XMEMSET(c, 0, sizeof(fp_digit)*(FP_SIZE + 1)); + pa = m->used; + + /* copy the input */ + oldused = a->used; + for (x = 0; x < oldused; x++) { + c[x] = a->dp[x]; + } + MONT_START; + + for (x = 0; x < pa; x++) { + fp_digit cy = 0; + /* get Mu for this round */ + LOOP_START; + _c = c + x; + tmpm = m->dp; + y = 0; + for (; y < (pa & ~7); y += 8) { + innermul8_mulx(_c, &cy, tmpm, mu) ; + _c += 8; + tmpm += 8; + } + for (; y < pa; y++) { + INNERMUL; + ++_c; + } + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + } + + /* now copy out */ + _c = c + pa; + tmpm = a->dp; + for (x = 0; x < pa+1; x++) { + *tmpm++ = *_c++; + } + + /* zero any excess digits on the destination that we didn't write to */ + for (; x < oldused; x++) { + *tmpm++ = 0; + } + + MONT_FINI; + + a->used = pa+1; + fp_clamp(a); + + /* if A >= m then A = A - m */ + if (fp_cmp_mag (a, m) != FP_LT) { + s_fp_sub (a, m, a); + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(c, NULL, DYNAMIC_TYPE_BIGINT); +#endif + return FP_OKAY; +} +#endif + +/* computes x/R == x (mod N) via Montgomery Reduction */ +int fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp) +{ +#ifndef WOLFSSL_SMALL_STACK + fp_digit c[FP_SIZE+1]; +#else + fp_digit *c; +#endif + fp_digit *_c, *tmpm, mu = 0; + int oldused, x, y, pa, err = 0; + + IF_HAVE_INTEL_MULX(err = fp_montgomery_reduce_mulx(a, m, mp), return err) ; + (void)err; + + /* bail if too large */ + if (m->used > (FP_SIZE/2)) { + (void)mu; /* shut up compiler */ + return FP_OKAY; + } + +#ifdef TFM_SMALL_MONT_SET + if (m->used <= 16) { + return fp_montgomery_reduce_small(a, m, mp); + } +#endif + +#ifdef WOLFSSL_SMALL_STACK + /* only allocate space for what's needed for window plus res */ + c = (fp_digit*)XMALLOC(sizeof(fp_digit)*(FP_SIZE + 1), NULL, DYNAMIC_TYPE_BIGINT); + if (c == NULL) { + return FP_MEM; + } +#endif + + /* now zero the buff */ + XMEMSET(c, 0, sizeof(fp_digit)*(FP_SIZE + 1)); + pa = m->used; + + /* copy the input */ + oldused = a->used; + for (x = 0; x < oldused; x++) { + c[x] = a->dp[x]; + } + MONT_START; + + for (x = 0; x < pa; x++) { + fp_digit cy = 0; + /* get Mu for this round */ + LOOP_START; + _c = c + x; + tmpm = m->dp; + y = 0; +#if defined(INNERMUL8) + for (; y < (pa & ~7); y += 8) { + INNERMUL8 ; + _c += 8; + tmpm += 8; + } +#endif + for (; y < pa; y++) { + INNERMUL; + ++_c; + } + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + } + + /* now copy out */ + _c = c + pa; + tmpm = a->dp; + for (x = 0; x < pa+1; x++) { + *tmpm++ = *_c++; + } + + /* zero any excess digits on the destination that we didn't write to */ + for (; x < oldused; x++) { + *tmpm++ = 0; + } + + MONT_FINI; + + a->used = pa+1; + fp_clamp(a); + + /* if A >= m then A = A - m */ + if (fp_cmp_mag (a, m) != FP_LT) { + s_fp_sub (a, m, a); + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(c, NULL, DYNAMIC_TYPE_BIGINT); +#endif + return FP_OKAY; +} + +void fp_read_unsigned_bin(fp_int *a, const unsigned char *b, int c) +{ +#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT) + const word32 maxC = (a->size * sizeof(fp_digit)); +#else + const word32 maxC = (FP_SIZE * sizeof(fp_digit)); +#endif + + /* zero the int */ + fp_zero (a); + + /* if input b excess max, then truncate */ + if (c > 0 && (word32)c > maxC) { + int excess = (c - maxC); + c -= excess; + b += excess; + } + + /* If we know the endianness of this architecture, and we're using + 32-bit fp_digits, we can optimize this */ +#if (defined(LITTLE_ENDIAN_ORDER) || defined(BIG_ENDIAN_ORDER)) && \ + defined(FP_32BIT) + /* But not for both simultaneously */ +#if defined(LITTLE_ENDIAN_ORDER) && defined(BIG_ENDIAN_ORDER) +#error Both LITTLE_ENDIAN_ORDER and BIG_ENDIAN_ORDER defined. +#endif + { + unsigned char *pd = (unsigned char *)a->dp; + + a->used = (c + sizeof(fp_digit) - 1)/sizeof(fp_digit); + /* read the bytes in */ +#ifdef BIG_ENDIAN_ORDER + { + /* Use Duff's device to unroll the loop. */ + int idx = (c - 1) & ~3; + switch (c % 4) { + case 0: do { pd[idx+0] = *b++; // fallthrough + case 3: pd[idx+1] = *b++; // fallthrough + case 2: pd[idx+2] = *b++; // fallthrough + case 1: pd[idx+3] = *b++; // fallthrough + idx -= 4; + } while ((c -= 4) > 0); + } + } +#else + for (c -= 1; c >= 0; c -= 1) { + pd[c] = *b++; + } +#endif + } +#else + /* read the bytes in */ + for (; c > 0; c--) { + fp_mul_2d (a, 8, a); + a->dp[0] |= *b++; + + if (a->used == 0) { + a->used = 1; + } + } +#endif + fp_clamp (a); +} + +int fp_to_unsigned_bin_at_pos(int x, fp_int *t, unsigned char *b) +{ +#if DIGIT_BIT == 64 || DIGIT_BIT == 32 + int i, j; + fp_digit n; + + for (j=0,i=0; iused-1; ) { + b[x++] = (unsigned char)(t->dp[i] >> j); + j += 8; + i += j == DIGIT_BIT; + j &= DIGIT_BIT - 1; + } + n = t->dp[i]; + while (n != 0) { + b[x++] = (unsigned char)n; + n >>= 8; + } + return x; +#else + while (fp_iszero (t) == FP_NO) { + b[x++] = (unsigned char) (t->dp[0] & 255); + fp_div_2d (t, 8, t, NULL); + } + return x; +#endif +} + +int fp_to_unsigned_bin(fp_int *a, unsigned char *b) +{ + int x; +#ifndef WOLFSSL_SMALL_STACK + fp_int t[1]; +#else + fp_int *t; +#endif + +#ifdef WOLFSSL_SMALL_STACK + t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT); + if (t == NULL) + return FP_MEM; +#endif + + fp_init_copy(t, a); + + x = fp_to_unsigned_bin_at_pos(0, t, b); + fp_reverse (b, x); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); +#endif + return FP_OKAY; +} + +int fp_to_unsigned_bin_len(fp_int *a, unsigned char *b, int c) +{ +#if DIGIT_BIT == 64 || DIGIT_BIT == 32 + int i, j, x; + + for (x=c-1,j=0,i=0; x >= 0; x--) { + b[x] = (unsigned char)(a->dp[i] >> j); + j += 8; + i += j == DIGIT_BIT; + j &= DIGIT_BIT - 1; + } + + return FP_OKAY; +#else + int x; +#ifndef WOLFSSL_SMALL_STACK + fp_int t[1]; +#else + fp_int *t; +#endif + +#ifdef WOLFSSL_SMALL_STACK + t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT); + if (t == NULL) + return FP_MEM; +#endif + + fp_init_copy(t, a); + + for (x = 0; x < c; x++) { + b[x] = (unsigned char) (t->dp[0] & 255); + fp_div_2d (t, 8, t, NULL); + } + fp_reverse (b, x); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); +#endif + return FP_OKAY; +#endif +} + +int fp_unsigned_bin_size(fp_int *a) +{ + int size = fp_count_bits (a); + return (size / 8 + ((size & 7) != 0 ? 1 : 0)); +} + +void fp_set(fp_int *a, fp_digit b) +{ + fp_zero(a); + a->dp[0] = b; + a->used = a->dp[0] ? 1 : 0; +} + + +#ifndef MP_SET_CHUNK_BITS + #define MP_SET_CHUNK_BITS 4 +#endif +void fp_set_int(fp_int *a, unsigned long b) +{ + int x; + + /* use direct fp_set if b is less than fp_digit max */ + if (b < FP_DIGIT_MAX) { + fp_set (a, (fp_digit)b); + return; + } + + fp_zero (a); + + /* set chunk bits at a time */ + for (x = 0; x < (int)(sizeof(b) * 8) / MP_SET_CHUNK_BITS; x++) { + fp_mul_2d (a, MP_SET_CHUNK_BITS, a); + + /* OR in the top bits of the source */ + a->dp[0] |= (b >> ((sizeof(b) * 8) - MP_SET_CHUNK_BITS)) & + ((1 << MP_SET_CHUNK_BITS) - 1); + + /* shift the source up to the next chunk bits */ + b <<= MP_SET_CHUNK_BITS; + + /* ensure that digits are not clamped off */ + a->used += 1; + } + + /* clamp digits */ + fp_clamp(a); +} + +/* check if a bit is set */ +int fp_is_bit_set (fp_int *a, fp_digit b) +{ + fp_digit i; + + if (b > FP_MAX_BITS) + return 0; + else + i = b/DIGIT_BIT; + + if ((fp_digit)a->used < i) + return 0; + + return (int)((a->dp[i] >> b%DIGIT_BIT) & (fp_digit)1); +} + +/* set the b bit of a */ +int fp_set_bit (fp_int * a, fp_digit b) +{ + fp_digit i; + + if (b > FP_MAX_BITS) + return 0; + else + i = b/DIGIT_BIT; + + /* set the used count of where the bit will go if required */ + if (a->used < (int)(i+1)) + a->used = (int)(i+1); + + /* put the single bit in its place */ + a->dp[i] |= ((fp_digit)1) << (b % DIGIT_BIT); + + return MP_OKAY; +} + +int fp_count_bits (fp_int * a) +{ + int r; + fp_digit q; + + /* shortcut */ + if (a->used == 0) { + return 0; + } + + /* get number of digits and add that */ + r = (a->used - 1) * DIGIT_BIT; + + /* take the last digit and count the bits in it */ + q = a->dp[a->used - 1]; + while (q > ((fp_digit) 0)) { + ++r; + q >>= ((fp_digit) 1); + } + + return r; +} + +int fp_leading_bit(fp_int *a) +{ + int bit = 0; + + if (a->used != 0) { + fp_digit q = a->dp[a->used - 1]; + int qSz = sizeof(fp_digit); + + while (qSz > 0) { + if ((unsigned char)q != 0) + bit = (q & 0x80) != 0; + q >>= 8; + qSz--; + } + } + + return bit; +} + +void fp_lshd(fp_int *a, int x) +{ + int y; + + /* move up and truncate as required */ + y = MIN(a->used + x - 1, (int)(FP_SIZE-1)); + + /* store new size */ + a->used = y + 1; + + /* move digits */ + for (; y >= x; y--) { + a->dp[y] = a->dp[y-x]; + } + + /* zero lower digits */ + for (; y >= 0; y--) { + a->dp[y] = 0; + } + + /* clamp digits */ + fp_clamp(a); +} + + +/* right shift by bit count */ +void fp_rshb(fp_int *c, int x) +{ + fp_digit *tmpc, mask, shift; + fp_digit r, rr; + fp_digit D = x; + + if (fp_iszero(c)) return; + + /* mask */ + mask = (((fp_digit)1) << D) - 1; + + /* shift for lsb */ + shift = DIGIT_BIT - D; + + /* alias */ + tmpc = c->dp + (c->used - 1); + + /* carry */ + r = 0; + for (x = c->used - 1; x >= 0; x--) { + /* get the lower bits of this word in a temp */ + rr = *tmpc & mask; + + /* shift the current word and mix in the carry bits from previous word */ + *tmpc = (*tmpc >> D) | (r << shift); + --tmpc; + + /* set the carry to the carry bits of the current word found above */ + r = rr; + } + + /* clamp digits */ + fp_clamp(c); +} + + +void fp_rshd(fp_int *a, int x) +{ + int y; + + /* too many digits just zero and return */ + if (x >= a->used) { + fp_zero(a); + return; + } + + /* shift */ + for (y = 0; y < a->used - x; y++) { + a->dp[y] = a->dp[y+x]; + } + + /* zero rest */ + for (; y < a->used; y++) { + a->dp[y] = 0; + } + + /* decrement count */ + a->used -= x; + fp_clamp(a); +} + +/* reverse an array, used for radix code */ +void fp_reverse (unsigned char *s, int len) +{ + int ix, iy; + unsigned char t; + + ix = 0; + iy = len - 1; + while (ix < iy) { + t = s[ix]; + s[ix] = s[iy]; + s[iy] = t; + ++ix; + --iy; + } +} + + +/* c = a - b */ +int fp_sub_d(fp_int *a, fp_digit b, fp_int *c) +{ +#ifndef WOLFSSL_SMALL_STACK + fp_int tmp[1]; +#else + fp_int *tmp; +#endif + +#ifdef WOLFSSL_SMALL_STACK + tmp = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT); + if (tmp == NULL) + return FP_MEM; +#endif + + fp_init(tmp); + fp_set(tmp, b); +#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT) + if (c->size < FP_SIZE) { + fp_sub(a, tmp, tmp); + fp_copy(tmp, c); + } else +#endif + { + fp_sub(a, tmp, c); + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(tmp, NULL, DYNAMIC_TYPE_BIGINT); +#endif + return FP_OKAY; +} + + +/* wolfSSL callers from normal lib */ + +/* init a new mp_int */ +int mp_init (mp_int * a) +{ + if (a) + fp_init(a); + return MP_OKAY; +} + +void fp_init(fp_int *a) +{ +#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT) + a->size = FP_SIZE; +#endif +#ifdef HAVE_WOLF_BIGINT + wc_bigint_init(&a->raw); +#endif + fp_zero(a); +} + +void fp_zero(fp_int *a) +{ + int size; + a->used = 0; + a->sign = FP_ZPOS; +#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT) + size = a->size; +#else + size = FP_SIZE; +#endif + XMEMSET(a->dp, 0, size * sizeof(fp_digit)); +} + +void fp_clear(fp_int *a) +{ + int size; + a->used = 0; + a->sign = FP_ZPOS; +#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT) + size = a->size; +#else + size = FP_SIZE; +#endif + XMEMSET(a->dp, 0, size * sizeof(fp_digit)); + fp_free(a); +} + +void fp_forcezero (mp_int * a) +{ + int size; + a->used = 0; + a->sign = FP_ZPOS; +#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT) + size = a->size; +#else + size = FP_SIZE; +#endif + ForceZero(a->dp, size * sizeof(fp_digit)); +#ifdef HAVE_WOLF_BIGINT + wc_bigint_zero(&a->raw); +#endif + fp_free(a); +} + +void mp_forcezero (mp_int * a) +{ + fp_forcezero(a); +} + +void fp_free(fp_int* a) +{ +#ifdef HAVE_WOLF_BIGINT + wc_bigint_free(&a->raw); +#else + (void)a; +#endif +} + + +/* clear one (frees) */ +void mp_clear (mp_int * a) +{ + if (a == NULL) + return; + fp_clear(a); +} + +void mp_free(mp_int* a) +{ + fp_free(a); +} + +/* handle up to 6 inits */ +int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d, + mp_int* e, mp_int* f) +{ + if (a) + fp_init(a); + if (b) + fp_init(b); + if (c) + fp_init(c); + if (d) + fp_init(d); + if (e) + fp_init(e); + if (f) + fp_init(f); + + return MP_OKAY; +} + +/* high level addition (handles signs) */ +int mp_add (mp_int * a, mp_int * b, mp_int * c) +{ + fp_add(a, b, c); + return MP_OKAY; +} + +/* high level subtraction (handles signs) */ +int mp_sub (mp_int * a, mp_int * b, mp_int * c) +{ + fp_sub(a, b, c); + return MP_OKAY; +} + +/* high level multiplication (handles sign) */ +#if defined(FREESCALE_LTC_TFM) +int wolfcrypt_mp_mul(mp_int * a, mp_int * b, mp_int * c) +#else +int mp_mul (mp_int * a, mp_int * b, mp_int * c) +#endif +{ + return fp_mul(a, b, c); +} + +int mp_mul_d (mp_int * a, mp_digit b, mp_int * c) +{ + fp_mul_d(a, b, c); + return MP_OKAY; +} + +/* d = a * b (mod c) */ +#if defined(FREESCALE_LTC_TFM) +int wolfcrypt_mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d) +#else +int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d) +#endif +{ + #if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) + int A = fp_count_bits (a); + int B = fp_count_bits (b); + + if( A >= ESP_RSA_MULM_BITS && B >= ESP_RSA_MULM_BITS) + return esp_mp_mulmod(a, b, c, d); + else + #endif + return fp_mulmod(a, b, c, d); +} + +/* d = a - b (mod c) */ +int mp_submod(mp_int *a, mp_int *b, mp_int *c, mp_int *d) +{ + return fp_submod(a, b, c, d); +} + +/* d = a + b (mod c) */ +int mp_addmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d) +{ + return fp_addmod(a, b, c, d); +} + +/* c = a mod b, 0 <= c < b */ +#if defined(FREESCALE_LTC_TFM) +int wolfcrypt_mp_mod (mp_int * a, mp_int * b, mp_int * c) +#else +int mp_mod (mp_int * a, mp_int * b, mp_int * c) +#endif +{ + return fp_mod (a, b, c); +} + +/* hac 14.61, pp608 */ +#if defined(FREESCALE_LTC_TFM) +int wolfcrypt_mp_invmod (mp_int * a, mp_int * b, mp_int * c) +#else +int mp_invmod (mp_int * a, mp_int * b, mp_int * c) +#endif +{ + return fp_invmod(a, b, c); +} + +/* hac 14.61, pp608 */ +int mp_invmod_mont_ct (mp_int * a, mp_int * b, mp_int * c, mp_digit mp) +{ + return fp_invmod_mont_ct(a, b, c, mp); +} + +/* this is a shell function that calls either the normal or Montgomery + * exptmod functions. Originally the call to the montgomery code was + * embedded in the normal function but that wasted a lot of stack space + * for nothing (since 99% of the time the Montgomery code would be called) + */ +#if defined(FREESCALE_LTC_TFM) +int wolfcrypt_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y) +#else +int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y) +#endif +{ + return fp_exptmod(G, X, P, Y); +} + +int mp_exptmod_ex (mp_int * G, mp_int * X, int digits, mp_int * P, mp_int * Y) +{ + return fp_exptmod_ex(G, X, digits, P, Y); +} + +int mp_exptmod_nct (mp_int * G, mp_int * X, mp_int * P, mp_int * Y) +{ + return fp_exptmod_nct(G, X, P, Y); +} + + +/* compare two ints (signed)*/ +int mp_cmp (mp_int * a, mp_int * b) +{ + return fp_cmp(a, b); +} + +/* compare a digit */ +int mp_cmp_d(mp_int * a, mp_digit b) +{ + return fp_cmp_d(a, b); +} + +/* get the size for an unsigned equivalent */ +int mp_unsigned_bin_size (mp_int * a) +{ + return fp_unsigned_bin_size(a); +} + +int mp_to_unsigned_bin_at_pos(int x, fp_int *t, unsigned char *b) +{ + return fp_to_unsigned_bin_at_pos(x, t, b); +} + +/* store in unsigned [big endian] format */ +int mp_to_unsigned_bin (mp_int * a, unsigned char *b) +{ + return fp_to_unsigned_bin(a,b); +} + +int mp_to_unsigned_bin_len(mp_int * a, unsigned char *b, int c) +{ + return fp_to_unsigned_bin_len(a, b, c); +} +/* reads a unsigned char array, assumes the msb is stored first [big endian] */ +int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c) +{ + fp_read_unsigned_bin(a, b, c); + return MP_OKAY; +} + + +int mp_sub_d(fp_int *a, fp_digit b, fp_int *c) +{ + return fp_sub_d(a, b, c); +} + +int mp_mul_2d(fp_int *a, int b, fp_int *c) +{ + fp_mul_2d(a, b, c); + return MP_OKAY; +} + +int mp_2expt(fp_int* a, int b) +{ + fp_2expt(a, b); + return MP_OKAY; +} + +int mp_div(fp_int * a, fp_int * b, fp_int * c, fp_int * d) +{ + return fp_div(a, b, c, d); +} + +int mp_div_2d(fp_int* a, int b, fp_int* c, fp_int* d) +{ + fp_div_2d(a, b, c, d); + return MP_OKAY; +} + +void fp_copy(fp_int *a, fp_int *b) +{ + /* if source and destination are different */ + if (a != b) { +#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT) + /* verify a will fit in b */ + if (b->size >= a->used) { + int x, oldused; + oldused = b->used; + b->used = a->used; + b->sign = a->sign; + + XMEMCPY(b->dp, a->dp, a->used * sizeof(fp_digit)); + + /* zero any excess digits on the destination that we didn't write to */ + for (x = b->used; x >= 0 && x < oldused; x++) { + b->dp[x] = 0; + } + } + else { + /* TODO: Handle error case */ + } +#else + /* all dp's are same size, so do straight copy */ + b->used = a->used; + b->sign = a->sign; + XMEMCPY(b->dp, a->dp, FP_SIZE * sizeof(fp_digit)); +#endif + } +} + +void fp_init_copy(fp_int *a, fp_int* b) +{ + if (a != b) { + fp_init(a); + fp_copy(b, a); + } +} + +/* fast math wrappers */ +int mp_copy(fp_int* a, fp_int* b) +{ + fp_copy(a, b); + return MP_OKAY; +} + +int mp_isodd(mp_int* a) +{ + return fp_isodd(a); +} + +int mp_iszero(mp_int* a) +{ + return fp_iszero(a); +} + +int mp_count_bits (mp_int* a) +{ + return fp_count_bits(a); +} + +int mp_leading_bit (mp_int* a) +{ + return fp_leading_bit(a); +} + +void mp_rshb (mp_int* a, int x) +{ + fp_rshb(a, x); +} + +void mp_rshd (mp_int* a, int x) +{ + fp_rshd(a, x); +} + +int mp_set_int(mp_int *a, unsigned long b) +{ + fp_set_int(a, b); + return MP_OKAY; +} + +int mp_is_bit_set (mp_int *a, mp_digit b) +{ + return fp_is_bit_set(a, b); +} + +int mp_set_bit(mp_int *a, mp_digit b) +{ + return fp_set_bit(a, b); +} + +#if defined(WOLFSSL_KEY_GEN) || defined (HAVE_ECC) || !defined(NO_DH) || \ + !defined(NO_DSA) || !defined(NO_RSA) + +/* c = a * a (mod b) */ +int fp_sqrmod(fp_int *a, fp_int *b, fp_int *c) +{ + int err; +#ifndef WOLFSSL_SMALL_STACK + fp_int t[1]; +#else + fp_int *t; +#endif + +#ifdef WOLFSSL_SMALL_STACK + t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT); + if (t == NULL) + return FP_MEM; +#endif + + fp_init(t); + err = fp_sqr(a, t); + if (err == FP_OKAY) { + #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT) + if (c->size < FP_SIZE) { + err = fp_mod(t, b, t); + fp_copy(t, c); + } + else + #endif + { + err = fp_mod(t, b, c); + } + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); +#endif + return err; +} + +/* fast math conversion */ +int mp_sqrmod(mp_int *a, mp_int *b, mp_int *c) +{ + return fp_sqrmod(a, b, c); +} + +/* fast math conversion */ +int mp_montgomery_calc_normalization(mp_int *a, mp_int *b) +{ + fp_montgomery_calc_normalization(a, b); + return MP_OKAY; +} + +#endif /* WOLFSSL_KEYGEN || HAVE_ECC */ + + +#if defined(WC_MP_TO_RADIX) || !defined(NO_DH) || !defined(NO_DSA) || \ + !defined(NO_RSA) + +#ifdef WOLFSSL_KEY_GEN +/* swap the elements of two integers, for cases where you can't simply swap the + * mp_int pointers around + */ +static int fp_exch (fp_int * a, fp_int * b) +{ +#ifndef WOLFSSL_SMALL_STACK + fp_int t[1]; +#else + fp_int *t; +#endif + +#ifdef WOLFSSL_SMALL_STACK + t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT); + if (t == NULL) + return FP_MEM; +#endif + + *t = *a; + *a = *b; + *b = *t; + +#ifdef WOLFSSL_SMALL_STACK + XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); +#endif + return FP_OKAY; +} +#endif + +static const int lnz[16] = { + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 +}; + +/* Counts the number of lsbs which are zero before the first zero bit */ +int fp_cnt_lsb(fp_int *a) +{ + int x; + fp_digit q, qq; + + /* easy out */ + if (fp_iszero(a) == FP_YES) { + return 0; + } + + /* scan lower digits until non-zero */ + for (x = 0; x < a->used && a->dp[x] == 0; x++) {} + q = a->dp[x]; + x *= DIGIT_BIT; + + /* now scan this digit until a 1 is found */ + if ((q & 1) == 0) { + do { + qq = q & 15; + x += lnz[qq]; + q >>= 4; + } while (qq == 0); + } + return x; +} + + +static int s_is_power_of_two(fp_digit b, int *p) +{ + int x; + + /* fast return if no power of two */ + if ((b==0) || (b & (b-1))) { + return FP_NO; + } + + for (x = 0; x < DIGIT_BIT; x++) { + if (b == (((fp_digit)1)< cb + d == a */ +static int fp_div_d(fp_int *a, fp_digit b, fp_int *c, fp_digit *d) +{ +#ifndef WOLFSSL_SMALL_STACK + fp_int q[1]; +#else + fp_int *q; +#endif + fp_word w; + fp_digit t; + int ix; + + /* cannot divide by zero */ + if (b == 0) { + return FP_VAL; + } + + /* quick outs */ + if (b == 1 || fp_iszero(a) == FP_YES) { + if (d != NULL) { + *d = 0; + } + if (c != NULL) { + fp_copy(a, c); + } + return FP_OKAY; + } + + /* power of two ? */ + if (s_is_power_of_two(b, &ix) == FP_YES) { + if (d != NULL) { + *d = a->dp[0] & ((((fp_digit)1)<used = a->used; + q->sign = a->sign; + } + + w = 0; + for (ix = a->used - 1; ix >= 0; ix--) { + w = (w << ((fp_word)DIGIT_BIT)) | ((fp_word)a->dp[ix]); + + if (w >= b) { + t = (fp_digit)(w / b); + w -= ((fp_word)t) * ((fp_word)b); + } else { + t = 0; + } + if (c != NULL) + q->dp[ix] = (fp_digit)t; + } + + if (d != NULL) { + *d = (fp_digit)w; + } + + if (c != NULL) { + fp_clamp(q); + fp_copy(q, c); + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(q, NULL, DYNAMIC_TYPE_BIGINT); +#endif + return FP_OKAY; +} + + +/* c = a mod b, 0 <= c < b */ +static int fp_mod_d(fp_int *a, fp_digit b, fp_digit *c) +{ + return fp_div_d(a, b, NULL, c); +} + +int mp_mod_d(fp_int *a, fp_digit b, fp_digit *c) +{ + return fp_mod_d(a, b, c); +} + +#endif /* WC_MP_TO_RADIX || !NO_DH || !NO_DSA || !NO_RSA */ + + +#if !defined(NO_DH) || !defined(NO_DSA) || !defined(NO_RSA) || \ + defined(WOLFSSL_KEY_GEN) + +static int fp_isprime_ex(fp_int *a, int t, int* result); + + +int mp_prime_is_prime(mp_int* a, int t, int* result) +{ + return fp_isprime_ex(a, t, result); +} + +/* Miller-Rabin test of "a" to the base of "b" as described in + * HAC pp. 139 Algorithm 4.24 + * + * Sets result to 0 if definitely composite or 1 if probably prime. + * Randomly the chance of error is no more than 1/4 and often + * very much lower. + */ +static int fp_prime_miller_rabin_ex(fp_int * a, fp_int * b, int *result, + fp_int *n1, fp_int *y, fp_int *r) +{ + int s, j; + int err; + + /* default */ + *result = FP_NO; + + /* ensure b > 1 */ + if (fp_cmp_d(b, 1) != FP_GT) { + return FP_OKAY; + } + + /* get n1 = a - 1 */ + fp_copy(a, n1); + err = fp_sub_d(n1, 1, n1); + if (err != FP_OKAY) { + return err; + } + + /* set 2**s * r = n1 */ + fp_copy(n1, r); + + /* count the number of least significant bits + * which are zero + */ + s = fp_cnt_lsb(r); + + /* now divide n - 1 by 2**s */ + fp_div_2d (r, s, r, NULL); + + /* compute y = b**r mod a */ + fp_zero(y); +#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \ + defined(WOLFSSL_HAVE_SP_DH) +#ifndef WOLFSSL_SP_NO_2048 + if (fp_count_bits(a) == 1024) + sp_ModExp_1024(b, r, a, y); + else if (fp_count_bits(a) == 2048) + sp_ModExp_2048(b, r, a, y); + else +#endif +#ifndef WOLFSSL_SP_NO_3072 + if (fp_count_bits(a) == 1536) + sp_ModExp_1536(b, r, a, y); + else if (fp_count_bits(a) == 3072) + sp_ModExp_3072(b, r, a, y); + else +#endif +#ifdef WOLFSSL_SP_4096 + if (fp_count_bits(a) == 4096) + sp_ModExp_4096(b, r, a, y); + else +#endif +#endif + fp_exptmod(b, r, a, y); + + /* if y != 1 and y != n1 do */ + if (fp_cmp_d (y, 1) != FP_EQ && fp_cmp (y, n1) != FP_EQ) { + j = 1; + /* while j <= s-1 and y != n1 */ + while ((j <= (s - 1)) && fp_cmp (y, n1) != FP_EQ) { + fp_sqrmod (y, a, y); + + /* if y == 1 then composite */ + if (fp_cmp_d (y, 1) == FP_EQ) { + return FP_OKAY; + } + ++j; + } + + /* if y != n1 then composite */ + if (fp_cmp (y, n1) != FP_EQ) { + return FP_OKAY; + } + } + + /* probably prime now */ + *result = FP_YES; + + return FP_OKAY; +} + +static int fp_prime_miller_rabin(fp_int * a, fp_int * b, int *result) +{ + int err; +#ifndef WOLFSSL_SMALL_STACK + fp_int n1[1], y[1], r[1]; +#else + fp_int *n1, *y, *r; +#endif + +#ifdef WOLFSSL_SMALL_STACK + n1 = (fp_int*)XMALLOC(sizeof(fp_int) * 3, NULL, DYNAMIC_TYPE_BIGINT); + if (n1 == NULL) { + return FP_MEM; + } + y = &n1[1]; r = &n1[2]; +#endif + + fp_init(n1); + fp_init(y); + fp_init(r); + + err = fp_prime_miller_rabin_ex(a, b, result, n1, y, r); + + fp_clear(n1); + fp_clear(y); + fp_clear(r); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(n1, NULL, DYNAMIC_TYPE_BIGINT); +#endif + + return err; +} + + +/* a few primes */ +static const fp_digit primes[FP_PRIME_SIZE] = { + 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013, + 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035, + 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059, + 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083, + 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD, + 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF, + 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107, + 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137, + + 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167, + 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199, + 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9, + 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7, + 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239, + 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265, + 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293, + 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF, + + 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301, + 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B, + 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371, + 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD, + 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5, + 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419, + 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449, + 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B, + + 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7, + 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503, + 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529, + 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F, + 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3, + 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7, + 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623, + 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653 +}; + +int fp_isprime_ex(fp_int *a, int t, int* result) +{ +#ifndef WOLFSSL_SMALL_STACK + fp_int b[1]; +#else + fp_int *b; +#endif + fp_digit d; + int r, res; + + if (t <= 0 || t > FP_PRIME_SIZE) { + *result = FP_NO; + return FP_VAL; + } + + if (fp_isone(a)) { + *result = FP_NO; + return FP_OKAY; + } + + /* check against primes table */ + for (r = 0; r < FP_PRIME_SIZE; r++) { + if (fp_cmp_d(a, primes[r]) == FP_EQ) { + *result = FP_YES; + return FP_OKAY; + } + } + + /* do trial division */ + for (r = 0; r < FP_PRIME_SIZE; r++) { + res = fp_mod_d(a, primes[r], &d); + if (res != MP_OKAY || d == 0) { + *result = FP_NO; + return FP_OKAY; + } + } + +#ifdef WOLFSSL_SMALL_STACK + b = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT); + if (b == NULL) + return FP_MEM; +#endif + /* now do 't' miller rabins */ + fp_init(b); + for (r = 0; r < t; r++) { + fp_set(b, primes[r]); + fp_prime_miller_rabin(a, b, &res); + if (res == FP_NO) { + *result = FP_NO; + #ifdef WOLFSSL_SMALL_STACK + XFREE(b, NULL, DYNAMIC_TYPE_BIGINT); + #endif + return FP_OKAY; + } + } + *result = FP_YES; +#ifdef WOLFSSL_SMALL_STACK + XFREE(b, NULL, DYNAMIC_TYPE_BIGINT); +#endif + return FP_OKAY; +} + + +int mp_prime_is_prime_ex(mp_int* a, int t, int* result, WC_RNG* rng) +{ + int ret = FP_YES; + fp_digit d; + int i; + + if (a == NULL || result == NULL || rng == NULL) + return FP_VAL; + + if (fp_isone(a)) { + *result = FP_NO; + return FP_OKAY; + } + + /* check against primes table */ + for (i = 0; i < FP_PRIME_SIZE; i++) { + if (fp_cmp_d(a, primes[i]) == FP_EQ) { + *result = FP_YES; + return FP_OKAY; + } + } + + /* do trial division */ + for (i = 0; i < FP_PRIME_SIZE; i++) { + if (fp_mod_d(a, primes[i], &d) == MP_OKAY) { + if (d == 0) { + *result = FP_NO; + return FP_OKAY; + } + } + else + return FP_VAL; + } + +#ifndef WC_NO_RNG + /* now do a miller rabin with up to t random numbers, this should + * give a (1/4)^t chance of a false prime. */ + { + #ifndef WOLFSSL_SMALL_STACK + fp_int b[1], c[1], n1[1], y[1], r[1]; + byte base[FP_MAX_PRIME_SIZE]; + #else + fp_int *b, *c, *n1, *y, *r; + byte* base; + #endif + word32 baseSz; + int err; + + baseSz = fp_count_bits(a); + /* The base size is the number of bits / 8. One is added if the number + * of bits isn't an even 8. */ + baseSz = (baseSz / 8) + ((baseSz % 8) ? 1 : 0); + + #ifndef WOLFSSL_SMALL_STACK + if (baseSz > sizeof(base)) + return FP_MEM; + #else + base = (byte*)XMALLOC(baseSz, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (base == NULL) + return FP_MEM; + + b = (fp_int*)XMALLOC(sizeof(fp_int) * 5, NULL, DYNAMIC_TYPE_BIGINT); + if (b == NULL) { + return FP_MEM; + } + c = &b[1]; n1 = &b[2]; y= &b[3]; r = &b[4]; + #endif + + fp_init(b); + fp_init(c); + fp_init(n1); + fp_init(y); + fp_init(r); + + err = fp_sub_d(a, 2, c); + if (err != FP_OKAY) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(b, NULL, DYNAMIC_TYPE_BIGINT); + XFREE(base, NULL, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return err; + } + while (t > 0) { + if ((err = wc_RNG_GenerateBlock(rng, base, baseSz)) != 0) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(b, NULL, DYNAMIC_TYPE_BIGINT); + XFREE(base, NULL, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return err; + } + + fp_read_unsigned_bin(b, base, baseSz); + if (fp_cmp_d(b, 2) != FP_GT || fp_cmp(b, c) != FP_LT) { + continue; + } + + fp_prime_miller_rabin_ex(a, b, &ret, n1, y, r); + if (ret == FP_NO) + break; + fp_zero(b); + t--; + } + + fp_clear(n1); + fp_clear(y); + fp_clear(r); + fp_clear(b); + fp_clear(c); + #ifdef WOLFSSL_SMALL_STACK + XFREE(b, NULL, DYNAMIC_TYPE_BIGINT); + XFREE(base, NULL, DYNAMIC_TYPE_TMP_BUFFER); + #endif + } +#else + (void)t; +#endif /* !WC_NO_RNG */ + + *result = ret; + return FP_OKAY; +} +#endif /* !NO_RSA || !NO_DSA || !NO_DH || WOLFSSL_KEY_GEN */ + + +#ifdef WOLFSSL_KEY_GEN + +static int fp_gcd(fp_int *a, fp_int *b, fp_int *c); +static int fp_lcm(fp_int *a, fp_int *b, fp_int *c); +static int fp_randprime(fp_int* N, int len, WC_RNG* rng, void* heap); + +int mp_gcd(fp_int *a, fp_int *b, fp_int *c) +{ + return fp_gcd(a, b, c); +} + + +int mp_lcm(fp_int *a, fp_int *b, fp_int *c) +{ + return fp_lcm(a, b, c); +} + +int mp_rand_prime(mp_int* N, int len, WC_RNG* rng, void* heap) +{ + int err; + + err = fp_randprime(N, len, rng, heap); + switch(err) { + case FP_VAL: + return MP_VAL; + case FP_MEM: + return MP_MEM; + default: + break; + } + + return MP_OKAY; +} + +int mp_exch (mp_int * a, mp_int * b) +{ + return fp_exch(a, b); +} + + + +int fp_randprime(fp_int* N, int len, WC_RNG* rng, void* heap) +{ + static const int USE_BBS = 1; + int err, type; + int isPrime = FP_YES; + /* Assume the candidate is probably prime and then test until + * it is proven composite. */ + byte* buf; + + (void)heap; + + /* get type */ + if (len < 0) { + type = USE_BBS; + len = -len; + } else { + type = 0; + } + + /* allow sizes between 2 and 512 bytes for a prime size */ + if (len < 2 || len > 512) { + return FP_VAL; + } + + /* allocate buffer to work with */ + buf = (byte*)XMALLOC(len, heap, DYNAMIC_TYPE_TMP_BUFFER); + if (buf == NULL) { + return FP_MEM; + } + XMEMSET(buf, 0, len); + + do { +#ifdef SHOW_GEN + printf("."); + fflush(stdout); +#endif + /* generate value */ + err = wc_RNG_GenerateBlock(rng, buf, len); + if (err != 0) { + XFREE(buf, heap, DYNAMIC_TYPE_TMP_BUFFER); + return FP_VAL; + } + + /* munge bits */ + buf[0] |= 0x80 | 0x40; + buf[len-1] |= 0x01 | ((type & USE_BBS) ? 0x02 : 0x00); + + /* load value */ + fp_read_unsigned_bin(N, buf, len); + + /* test */ + /* Running Miller-Rabin up to 3 times gives us a 2^{-80} chance + * of a 1024-bit candidate being a false positive, when it is our + * prime candidate. (Note 4.49 of Handbook of Applied Cryptography.) + * Using 8 because we've always used 8 */ + mp_prime_is_prime_ex(N, 8, &isPrime, rng); + } while (isPrime == FP_NO); + + XMEMSET(buf, 0, len); + XFREE(buf, heap, DYNAMIC_TYPE_TMP_BUFFER); + + return FP_OKAY; +} + +/* c = [a, b] */ +int fp_lcm(fp_int *a, fp_int *b, fp_int *c) +{ + int err; +#ifndef WOLFSSL_SMALL_STACK + fp_int t[2]; +#else + fp_int *t; +#endif + +#ifdef WOLFSSL_SMALL_STACK + t = (fp_int*)XMALLOC(sizeof(fp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT); + if (t == NULL) { + return FP_MEM; + } +#endif + + fp_init(&t[0]); + fp_init(&t[1]); + err = fp_gcd(a, b, &t[0]); + if (err == FP_OKAY) { + if (fp_cmp_mag(a, b) == FP_GT) { + err = fp_div(a, &t[0], &t[1], NULL); + if (err == FP_OKAY) + err = fp_mul(b, &t[1], c); + } else { + err = fp_div(b, &t[0], &t[1], NULL); + if (err == FP_OKAY) + err = fp_mul(a, &t[1], c); + } + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); +#endif + return err; +} + + + +/* c = (a, b) */ +int fp_gcd(fp_int *a, fp_int *b, fp_int *c) +{ +#ifndef WOLFSSL_SMALL_STACK + fp_int u[1], v[1], r[1]; +#else + fp_int *u, *v, *r; +#endif + + /* either zero than gcd is the largest */ + if (fp_iszero (a) == FP_YES && fp_iszero (b) == FP_NO) { + fp_abs (b, c); + return FP_OKAY; + } + if (fp_iszero (a) == FP_NO && fp_iszero (b) == FP_YES) { + fp_abs (a, c); + return FP_OKAY; + } + + /* optimized. At this point if a == 0 then + * b must equal zero too + */ + if (fp_iszero (a) == FP_YES) { + fp_zero(c); + return FP_OKAY; + } + +#ifdef WOLFSSL_SMALL_STACK + u = (fp_int*)XMALLOC(sizeof(fp_int) * 3, NULL, DYNAMIC_TYPE_BIGINT); + if (u == NULL) { + return FP_MEM; + } + v = &u[1]; r = &u[2]; +#endif + + /* sort inputs */ + if (fp_cmp_mag(a, b) != FP_LT) { + fp_init_copy(u, a); + fp_init_copy(v, b); + } else { + fp_init_copy(u, b); + fp_init_copy(v, a); + } + + u->sign = FP_ZPOS; + v->sign = FP_ZPOS; + + fp_init(r); + while (fp_iszero(v) == FP_NO) { + fp_mod(u, v, r); + fp_copy(v, u); + fp_copy(r, v); + } + fp_copy(u, c); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(u, NULL, DYNAMIC_TYPE_BIGINT); +#endif + return FP_OKAY; +} + +#endif /* WOLFSSL_KEY_GEN */ + + +#if defined(HAVE_ECC) || !defined(NO_PWDBASED) || defined(OPENSSL_EXTRA) || \ + defined(WC_RSA_BLINDING) || !defined(NO_DSA) || \ + (!defined(NO_RSA) && !defined(NO_RSA_BOUNDS_CHECK)) +/* c = a + b */ +void fp_add_d(fp_int *a, fp_digit b, fp_int *c) +{ +#ifndef WOLFSSL_SMALL_STACK + fp_int tmp; + fp_init(&tmp); + fp_set(&tmp, b); + fp_add(a, &tmp, c); +#else + int i; + fp_word t = b; + + fp_copy(a, c); + for (i = 0; t != 0 && i < FP_SIZE && i < c->used; i++) { + t += c->dp[i]; + c->dp[i] = (fp_digit)t; + t >>= DIGIT_BIT; + } + if (i == c->used && i < FP_SIZE && t != 0) { + c->dp[i] = t; + c->used++; + } +#endif +} + +/* external compatibility */ +int mp_add_d(fp_int *a, fp_digit b, fp_int *c) +{ + fp_add_d(a, b, c); + return MP_OKAY; +} + +#endif /* HAVE_ECC || !NO_PWDBASED || OPENSSL_EXTRA || WC_RSA_BLINDING || + !NO_DSA || (!NO_RSA && !NO_RSA_BOUNDS_CHECK) */ + + +#if !defined(NO_DSA) || defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN) || \ + defined(HAVE_COMP_KEY) || defined(WOLFSSL_DEBUG_MATH) || \ + defined(DEBUG_WOLFSSL) || defined(OPENSSL_EXTRA) || defined(WC_MP_TO_RADIX) + +/* chars used in radix conversions */ +static wcchar fp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz+/"; +#endif + +#if !defined(NO_DSA) || defined(HAVE_ECC) +#if DIGIT_BIT == 64 || DIGIT_BIT == 32 +static int fp_read_radix_16(fp_int *a, const char *str) +{ + int i, j, k, neg; + char ch; + + /* if the leading digit is a + * minus set the sign to negative. + */ + if (*str == '-') { + ++str; + neg = FP_NEG; + } else { + neg = FP_ZPOS; + } + + j = 0; + k = 0; + for (i = (int)(XSTRLEN(str) - 1); i >= 0; i--) { + ch = str[i]; + if (ch >= '0' && ch <= '9') + ch -= '0'; + else if (ch >= 'A' && ch <= 'F') + ch -= 'A' - 10; + else if (ch >= 'a' && ch <= 'f') + ch -= 'a' - 10; + else + return FP_VAL; + + a->dp[k] |= ((fp_digit)ch) << j; + j += 4; + k += j == DIGIT_BIT; + j &= DIGIT_BIT - 1; + } + + a->used = k + 1; + fp_clamp(a); + /* set the sign only if a != 0 */ + if (fp_iszero(a) != FP_YES) { + a->sign = neg; + } + return FP_OKAY; +} +#endif + +static int fp_read_radix(fp_int *a, const char *str, int radix) +{ + int y, neg; + char ch; + + /* set the integer to the default of zero */ + fp_zero (a); + +#if DIGIT_BIT == 64 || DIGIT_BIT == 32 + if (radix == 16) + return fp_read_radix_16(a, str); +#endif + + /* make sure the radix is ok */ + if (radix < 2 || radix > 64) { + return FP_VAL; + } + + /* if the leading digit is a + * minus set the sign to negative. + */ + if (*str == '-') { + ++str; + neg = FP_NEG; + } else { + neg = FP_ZPOS; + } + + /* process each digit of the string */ + while (*str) { + /* if the radix <= 36 the conversion is case insensitive + * this allows numbers like 1AB and 1ab to represent the same value + * [e.g. in hex] + */ + ch = (char)((radix <= 36) ? XTOUPPER((unsigned char)*str) : *str); + for (y = 0; y < 64; y++) { + if (ch == fp_s_rmap[y]) { + break; + } + } + + /* if the char was found in the map + * and is less than the given radix add it + * to the number, otherwise exit the loop. + */ + if (y < radix) { + fp_mul_d (a, (fp_digit) radix, a); + fp_add_d (a, (fp_digit) y, a); + } else { + break; + } + ++str; + } + + /* set the sign only if a != 0 */ + if (fp_iszero(a) != FP_YES) { + a->sign = neg; + } + return FP_OKAY; +} + +/* fast math conversion */ +int mp_read_radix(mp_int *a, const char *str, int radix) +{ + return fp_read_radix(a, str, radix); +} + +#endif /* !defined(NO_DSA) || defined(HAVE_ECC) */ + +#ifdef HAVE_ECC + +/* fast math conversion */ +int mp_sqr(fp_int *A, fp_int *B) +{ + return fp_sqr(A, B); +} + +/* fast math conversion */ +int mp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp) +{ + return fp_montgomery_reduce(a, m, mp); +} + + +/* fast math conversion */ +int mp_montgomery_setup(fp_int *a, fp_digit *rho) +{ + return fp_montgomery_setup(a, rho); +} + +int mp_div_2(fp_int * a, fp_int * b) +{ + fp_div_2(a, b); + return MP_OKAY; +} + + +int mp_init_copy(fp_int * a, fp_int * b) +{ + fp_init_copy(a, b); + return MP_OKAY; +} + +#ifdef HAVE_COMP_KEY + +int mp_cnt_lsb(fp_int* a) +{ + return fp_cnt_lsb(a); +} + +#endif /* HAVE_COMP_KEY */ + +#endif /* HAVE_ECC */ + +#if defined(HAVE_ECC) || !defined(NO_RSA) || !defined(NO_DSA) || \ + defined(WOLFSSL_KEY_GEN) +/* fast math conversion */ +int mp_set(fp_int *a, fp_digit b) +{ + fp_set(a,b); + return MP_OKAY; +} +#endif + +#ifdef WC_MP_TO_RADIX + +/* returns size of ASCII representation */ +int mp_radix_size (mp_int *a, int radix, int *size) +{ + int res, digs; + fp_digit d; +#ifndef WOLFSSL_SMALL_STACK + fp_int t[1]; +#else + fp_int *t; +#endif + + *size = 0; + + /* special case for binary */ + if (radix == 2) { + *size = fp_count_bits (a) + (a->sign == FP_NEG ? 1 : 0) + 1; + return FP_YES; + } + + /* make sure the radix is in range */ + if (radix < 2 || radix > 64) { + return FP_VAL; + } + + if (fp_iszero(a) == MP_YES) { + *size = 2; + return FP_OKAY; + } + + /* digs is the digit count */ + digs = 0; + + /* if it's negative add one for the sign */ + if (a->sign == FP_NEG) { + ++digs; + } + +#ifdef WOLFSSL_SMALL_STACK + t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT); + if (t == NULL) + return FP_MEM; +#endif + + /* init a copy of the input */ + fp_init_copy (t, a); + + /* force temp to positive */ + t->sign = FP_ZPOS; + + /* fetch out all of the digits */ + while (fp_iszero (t) == FP_NO) { + if ((res = fp_div_d (t, (mp_digit) radix, t, &d)) != FP_OKAY) { + fp_zero (t); + #ifdef WOLFSSL_SMALL_STACK + XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); + #endif + return res; + } + ++digs; + } + fp_zero (t); + + /* return digs + 1, the 1 is for the NULL byte that would be required. */ + *size = digs + 1; +#ifdef WOLFSSL_SMALL_STACK + XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); +#endif + return FP_OKAY; +} + +/* stores a bignum as a ASCII string in a given radix (2..64) */ +int mp_toradix (mp_int *a, char *str, int radix) +{ + int res, digs; + fp_digit d; + char *_s = str; +#ifndef WOLFSSL_SMALL_STACK + fp_int t[1]; +#else + fp_int *t; +#endif + + /* check range of the radix */ + if (radix < 2 || radix > 64) { + return FP_VAL; + } + + /* quick out if its zero */ + if (fp_iszero(a) == FP_YES) { + *str++ = '0'; + *str = '\0'; + return FP_OKAY; + } + +#ifdef WOLFSSL_SMALL_STACK + t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT); + if (t == NULL) + return FP_MEM; +#endif + + /* init a copy of the input */ + fp_init_copy (t, a); + + /* if it is negative output a - */ + if (t->sign == FP_NEG) { + ++_s; + *str++ = '-'; + t->sign = FP_ZPOS; + } + + digs = 0; + while (fp_iszero (t) == FP_NO) { + if ((res = fp_div_d (t, (fp_digit) radix, t, &d)) != FP_OKAY) { + fp_zero (t); + #ifdef WOLFSSL_SMALL_STACK + XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); + #endif + return res; + } + *str++ = fp_s_rmap[d]; + ++digs; + } +#ifndef WC_DISABLE_RADIX_ZERO_PAD + /* For hexadecimal output, add zero padding when number of digits is odd */ + if ((digs & 1) && (radix == 16)) { + *str++ = fp_s_rmap[0]; + ++digs; + } +#endif + /* reverse the digits of the string. In this case _s points + * to the first digit [excluding the sign] of the number] + */ + fp_reverse ((unsigned char *)_s, digs); + + /* append a NULL so the string is properly terminated */ + *str = '\0'; + + fp_zero (t); +#ifdef WOLFSSL_SMALL_STACK + XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); +#endif + return FP_OKAY; +} + +#ifdef WOLFSSL_DEBUG_MATH +void mp_dump(const char* desc, mp_int* a, byte verbose) +{ + char buffer[FP_SIZE * sizeof(fp_digit) * 2]; + int size; + +#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT) + size = a->size; +#else + size = FP_SIZE; +#endif + + printf("%s: ptr=%p, used=%d, sign=%d, size=%d, fpd=%d\n", + desc, a, a->used, a->sign, size, (int)sizeof(fp_digit)); + + mp_tohex(a, buffer); + printf(" %s\n ", buffer); + + if (verbose) { + int i; + for(i=0; idp) + i)); + } + printf("\n"); + } +} +#endif /* WOLFSSL_DEBUG_MATH */ + +#endif /* WC_MP_TO_RADIX */ + + +int mp_abs(mp_int* a, mp_int* b) +{ + fp_abs(a, b); + return FP_OKAY; +} + + +int mp_lshd (mp_int * a, int b) +{ + fp_lshd(a, b); + return FP_OKAY; +} + +#endif /* USE_FAST_MATH */ diff --git a/client/wolfssl/wolfcrypt/src/wc_dsp.c b/client/wolfssl/wolfcrypt/src/wc_dsp.c new file mode 100644 index 0000000..594ad04 --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/wc_dsp.c @@ -0,0 +1,327 @@ +/* wc_dsp.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include +#include +#include +#include +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#if defined(WOLFSSL_DSP) +#include "remote.h" +#include "rpcmem.h" +static wolfSSL_DSP_Handle_cb handle_function = NULL; +static remote_handle64 defaultHandle; +static wolfSSL_Mutex handle_mutex; /* mutex for access to single default handle */ + +#define WOLFSSL_HANDLE_DONE 1 +#define WOLFSSL_HANDLE_GET 0 + +/* callback function for setting the default handle in single threaded + * use cases */ +static int default_handle_cb(remote_handle64 *handle, int finished, void *ctx) +{ + (void)ctx; + if (finished == WOLFSSL_HANDLE_DONE) { + if (wc_UnLockMutex(&handle_mutex) != 0) { + WOLFSSL_MSG("Unlock handle mutex failed"); + return -1; + } + } + else { + if (wc_LockMutex(&handle_mutex) != 0) { + WOLFSSL_MSG("Lock handle mutex failed"); + return -1; + } + *handle = defaultHandle; + } + return 0; +} + + +/* Set global callback for getting handle to use + * return 0 on success */ +int wolfSSL_SetHandleCb(wolfSSL_DSP_Handle_cb in) +{ + handle_function = in; + return 0; +} + + +/* returns 1 if global handle callback is set and 0 if not */ +int wolfSSL_GetHandleCbSet() +{ + return (handle_function != NULL)? 1: 0; +} + + +/* Local function for setting up default handle + * returns 0 on success */ +int wolfSSL_InitHandle() +{ + char *sp_URI_value; + int ret; + + sp_URI_value = wolfSSL_URI "&_dom=adsp"; + ret = wolfSSL_open(sp_URI_value, &defaultHandle); + if (ret != 0) { + WOLFSSL_MSG("Unable to open aDSP?"); + return -1; + } + wolfSSL_SetHandleCb(default_handle_cb); + ret = wc_InitMutex(&handle_mutex); + if (ret != 0) { + WOLFSSL_MSG("Unable to init handle mutex"); + return -1; + } + return 0; +} + + +/* internal function that closes default handle and frees mutex */ +void wolfSSL_CleanupHandle() +{ + wolfSSL_close(defaultHandle); + wc_FreeMutex(&handle_mutex); +} +#if defined(WOLFSSL_HAVE_SP_ECC) + +/* ecc conversion from sp_c32.c */ +#include + + +#ifndef WOLFSSL_SP_NO_256 + +#ifdef HAVE_ECC_VERIFY +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void int_256_from_bin(int32* r, int size, const byte* a, int n) +{ + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((int32)a[i]) << s); + if (s >= 18U) { + r[j] &= 0x3ffffff; + s = 26U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (int32)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void int_256_from_mp(int32* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 26 + int j; + + XMEMCPY(r, a->dp, sizeof(int32) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 26 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((int32)a->dp[i] << s); + r[j] &= 0x3ffffff; + s = 26U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (int32)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 26U) <= (word32)DIGIT_BIT) { + s += 26U; + r[j] &= 0x3ffffff; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (int32)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((int32)a->dp[i]) << s; + if (s + DIGIT_BIT >= 26) { + r[j] &= 0x3ffffff; + if (j + 1 >= size) { + break; + } + s = 26 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Verify the signature values with the hash and public key. + * e = Truncate(hash, 256) + * u1 = e/s mod order + * u2 = r/s mod order + * r == (u1.G + u2.Q)->x mod order + * Optimization: Leave point in projective form. + * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z') + * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' + * The hash is truncated to the first 256 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +int sp_dsp_ecc_verify_256(remote_handle64 handleIn, const byte* hash, word32 hashLen, mp_int* pX, + mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap) +{ + int ret; + remote_handle64 handle = handleIn; + +#if 0 + /* calling to alloc memory on the ION using these settings slowed the performance down slightly */ + int32 *x = (int32*)rpcmem_alloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, 10*sizeof(int)); + int32 *y = (int32*)rpcmem_alloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, 10*sizeof(int)); + int32 *z = (int32*)rpcmem_alloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, 10*sizeof(int)); + int32 *s = (int32*)rpcmem_alloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, 10*sizeof(int)); + int32 *u1 = (int32*)rpcmem_alloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, 10*sizeof(int)); + int32 *u2 = (int32*)rpcmem_alloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, 10*sizeof(int)); +#endif + int32 x[10] __attribute__((aligned(128))); + int32 y[10] __attribute__((aligned(128))); + int32 z[10] __attribute__((aligned(128))); + int32 s[10] __attribute__((aligned(128))); + int32 u1[10] __attribute__((aligned(128))); + int32 u2[10] __attribute__((aligned(128))); + + if (hashLen > 32U) { + hashLen = 32U; + } + + int_256_from_bin(u1, 10, hash, (int)hashLen); + int_256_from_mp(u2, 10, r); + int_256_from_mp(s, 10, sm); + int_256_from_mp(x, 10, pX); + int_256_from_mp(y, 10, pY); + int_256_from_mp(z, 10, pZ); + + if (handle_function != NULL) { + handle_function(&handle, WOLFSSL_HANDLE_GET, NULL); + } + + *res = 0; + ret = wolfSSL_DSP_ECC_Verify_256(handle, u1, 10, u2, 10, s, 10, x, 10, y, 10, z, 10, res); + + if (handle_function != NULL) { + handle_function(&handle, WOLFSSL_HANDLE_DONE, NULL); + } +#if 0 + rpcmem_free(x); + rpcmem_free(y); + rpcmem_free(z); + rpcmem_free(s); + rpcmem_free(u1); + rpcmem_free(u2); +#endif + return ret; +} + + +/* Used to assign a handle to an ecc_key structure. + * returns 0 on success */ +int wc_ecc_set_handle(ecc_key* key, remote_handle64 handle) +{ + if (key == NULL) { + return BAD_FUNC_ARG; + } + key->handle = handle; + return 0; +} +#endif /* HAVE_ECC_VERIFY */ +#endif /* !WOLFSSL_SP_NO_256 */ +#endif /* WOLFSSL_HAVE_SP_ECC */ +#endif /* WOLFSSL_DSP */ diff --git a/client/wolfssl/wolfcrypt/src/wc_encrypt.c b/client/wolfssl/wolfcrypt/src/wc_encrypt.c new file mode 100644 index 0000000..39dbeec --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/wc_encrypt.c @@ -0,0 +1,660 @@ +/* wc_encrypt.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#if !defined(NO_AES) && defined(HAVE_AES_CBC) +#ifdef HAVE_AES_DECRYPT +int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz, + const byte* key, word32 keySz, const byte* iv) +{ + int ret = 0; +#ifdef WOLFSSL_SMALL_STACK + Aes* aes = NULL; +#else + Aes aes[1]; +#endif + + if (out == NULL || in == NULL || key == NULL || iv == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef WOLFSSL_SMALL_STACK + aes = (Aes*)XMALLOC(sizeof(Aes), NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (aes == NULL) + return MEMORY_E; +#endif + + ret = wc_AesInit(aes, NULL, INVALID_DEVID); + if (ret == 0) { + ret = wc_AesSetKey(aes, key, keySz, iv, AES_DECRYPTION); + if (ret == 0) + ret = wc_AesCbcDecrypt(aes, out, in, inSz); + + wc_AesFree(aes); + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(aes, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return ret; +} +#endif /* HAVE_AES_DECRYPT */ + +int wc_AesCbcEncryptWithKey(byte* out, const byte* in, word32 inSz, + const byte* key, word32 keySz, const byte* iv) +{ + int ret = 0; +#ifdef WOLFSSL_SMALL_STACK + Aes* aes; +#else + Aes aes[1]; +#endif + +#ifdef WOLFSSL_SMALL_STACK + aes = (Aes*)XMALLOC(sizeof(Aes), NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (aes == NULL) + return MEMORY_E; +#endif + + ret = wc_AesInit(aes, NULL, INVALID_DEVID); + if (ret == 0) { + ret = wc_AesSetKey(aes, key, keySz, iv, AES_ENCRYPTION); + if (ret == 0) + ret = wc_AesCbcEncrypt(aes, out, in, inSz); + + wc_AesFree(aes); + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(aes, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return ret; +} +#endif /* !NO_AES && HAVE_AES_CBC */ + + +#if !defined(NO_DES3) && !defined(WOLFSSL_TI_CRYPT) +int wc_Des_CbcEncryptWithKey(byte* out, const byte* in, word32 sz, + const byte* key, const byte* iv) +{ + int ret = 0; +#ifdef WOLFSSL_SMALL_STACK + Des* des; +#else + Des des[1]; +#endif + +#ifdef WOLFSSL_SMALL_STACK + des = (Des*)XMALLOC(sizeof(Des), NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (des == NULL) + return MEMORY_E; +#endif + + ret = wc_Des_SetKey(des, key, iv, DES_ENCRYPTION); + if (ret == 0) + ret = wc_Des_CbcEncrypt(des, out, in, sz); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(des, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return ret; +} + +int wc_Des_CbcDecryptWithKey(byte* out, const byte* in, word32 sz, + const byte* key, const byte* iv) +{ + int ret = 0; +#ifdef WOLFSSL_SMALL_STACK + Des* des; +#else + Des des[1]; +#endif + +#ifdef WOLFSSL_SMALL_STACK + des = (Des*)XMALLOC(sizeof(Des), NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (des == NULL) + return MEMORY_E; +#endif + + ret = wc_Des_SetKey(des, key, iv, DES_DECRYPTION); + if (ret == 0) + ret = wc_Des_CbcDecrypt(des, out, in, sz); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(des, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return ret; +} + + +int wc_Des3_CbcEncryptWithKey(byte* out, const byte* in, word32 sz, + const byte* key, const byte* iv) +{ + int ret = 0; +#ifdef WOLFSSL_SMALL_STACK + Des3* des3; +#else + Des3 des3[1]; +#endif + +#ifdef WOLFSSL_SMALL_STACK + des3 = (Des3*)XMALLOC(sizeof(Des3), NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (des3 == NULL) + return MEMORY_E; +#endif + + ret = wc_Des3Init(des3, NULL, INVALID_DEVID); + if (ret == 0) { + ret = wc_Des3_SetKey(des3, key, iv, DES_ENCRYPTION); + if (ret == 0) + ret = wc_Des3_CbcEncrypt(des3, out, in, sz); + wc_Des3Free(des3); + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(des3, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return ret; +} + + +int wc_Des3_CbcDecryptWithKey(byte* out, const byte* in, word32 sz, + const byte* key, const byte* iv) +{ + int ret = 0; +#ifdef WOLFSSL_SMALL_STACK + Des3* des3; +#else + Des3 des3[1]; +#endif + +#ifdef WOLFSSL_SMALL_STACK + des3 = (Des3*)XMALLOC(sizeof(Des3), NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (des3 == NULL) + return MEMORY_E; +#endif + + ret = wc_Des3Init(des3, NULL, INVALID_DEVID); + if (ret == 0) { + ret = wc_Des3_SetKey(des3, key, iv, DES_DECRYPTION); + if (ret == 0) + ret = wc_Des3_CbcDecrypt(des3, out, in, sz); + wc_Des3Free(des3); + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(des3, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return ret; +} + +#endif /* !NO_DES3 */ + + +#ifdef WOLFSSL_ENCRYPTED_KEYS + +int wc_BufferKeyDecrypt(EncryptedInfo* info, byte* der, word32 derSz, + const byte* password, int passwordSz, int hashType) +{ + int ret = NOT_COMPILED_IN; +#ifdef WOLFSSL_SMALL_STACK + byte* key = NULL; +#else + byte key[WC_MAX_SYM_KEY_SIZE]; +#endif + + (void)derSz; + (void)passwordSz; + (void)hashType; + + if (der == NULL || password == NULL || info == NULL || info->keySz == 0) { + return BAD_FUNC_ARG; + } + + /* use file's salt for key derivation, hex decode first */ + if (Base16_Decode(info->iv, info->ivSz, info->iv, &info->ivSz) != 0) { + return BUFFER_E; + } + if (info->ivSz < PKCS5_SALT_SZ) + return BUFFER_E; + +#ifdef WOLFSSL_SMALL_STACK + key = (byte*)XMALLOC(WC_MAX_SYM_KEY_SIZE, NULL, DYNAMIC_TYPE_SYMMETRIC_KEY); + if (key == NULL) { + return MEMORY_E; + } +#endif + + (void)XMEMSET(key, 0, WC_MAX_SYM_KEY_SIZE); + +#ifndef NO_PWDBASED + if ((ret = wc_PBKDF1(key, password, passwordSz, info->iv, PKCS5_SALT_SZ, 1, + info->keySz, hashType)) != 0) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(key, NULL, DYNAMIC_TYPE_SYMMETRIC_KEY); +#endif + return ret; + } +#endif + +#ifndef NO_DES3 + if (info->cipherType == WC_CIPHER_DES) + ret = wc_Des_CbcDecryptWithKey(der, der, derSz, key, info->iv); + if (info->cipherType == WC_CIPHER_DES3) + ret = wc_Des3_CbcDecryptWithKey(der, der, derSz, key, info->iv); +#endif /* NO_DES3 */ +#if !defined(NO_AES) && defined(HAVE_AES_CBC) && defined(HAVE_AES_DECRYPT) + if (info->cipherType == WC_CIPHER_AES_CBC) + ret = wc_AesCbcDecryptWithKey(der, der, derSz, key, info->keySz, + info->iv); +#endif /* !NO_AES && HAVE_AES_CBC && HAVE_AES_DECRYPT */ + +#ifdef WOLFSSL_SMALL_STACK + XFREE(key, NULL, DYNAMIC_TYPE_SYMMETRIC_KEY); +#endif + + return ret; +} + +int wc_BufferKeyEncrypt(EncryptedInfo* info, byte* der, word32 derSz, + const byte* password, int passwordSz, int hashType) +{ + int ret = NOT_COMPILED_IN; +#ifdef WOLFSSL_SMALL_STACK + byte* key = NULL; +#else + byte key[WC_MAX_SYM_KEY_SIZE]; +#endif + + (void)derSz; + (void)passwordSz; + (void)hashType; + + if (der == NULL || password == NULL || info == NULL || info->keySz == 0 || + info->ivSz < PKCS5_SALT_SZ) { + return BAD_FUNC_ARG; + } + +#ifdef WOLFSSL_SMALL_STACK + key = (byte*)XMALLOC(WC_MAX_SYM_KEY_SIZE, NULL, DYNAMIC_TYPE_SYMMETRIC_KEY); + if (key == NULL) { + return MEMORY_E; + } +#endif /* WOLFSSL_SMALL_STACK */ + + (void)XMEMSET(key, 0, WC_MAX_SYM_KEY_SIZE); + +#ifndef NO_PWDBASED + if ((ret = wc_PBKDF1(key, password, passwordSz, info->iv, PKCS5_SALT_SZ, 1, + info->keySz, hashType)) != 0) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(key, NULL, DYNAMIC_TYPE_SYMMETRIC_KEY); +#endif + return ret; + } +#endif + +#ifndef NO_DES3 + if (info->cipherType == WC_CIPHER_DES) + ret = wc_Des_CbcEncryptWithKey(der, der, derSz, key, info->iv); + if (info->cipherType == WC_CIPHER_DES3) + ret = wc_Des3_CbcEncryptWithKey(der, der, derSz, key, info->iv); +#endif /* NO_DES3 */ +#if !defined(NO_AES) && defined(HAVE_AES_CBC) + if (info->cipherType == WC_CIPHER_AES_CBC) + ret = wc_AesCbcEncryptWithKey(der, der, derSz, key, info->keySz, + info->iv); +#endif /* !NO_AES && HAVE_AES_CBC */ + +#ifdef WOLFSSL_SMALL_STACK + XFREE(key, NULL, DYNAMIC_TYPE_SYMMETRIC_KEY); +#endif + + return ret; +} + +#endif /* WOLFSSL_ENCRYPTED_KEYS */ + + +#if !defined(NO_PWDBASED) && !defined(NO_ASN) + +#if defined(HAVE_PKCS8) || defined(HAVE_PKCS12) +/* Decrypt/Encrypt input in place from parameters based on id + * + * returns a negative value on fail case + */ +int wc_CryptKey(const char* password, int passwordSz, byte* salt, + int saltSz, int iterations, int id, byte* input, + int length, int version, byte* cbcIv, int enc, int shaOid) +{ + int typeH; + int derivedLen = 0; + int ret = 0; +#ifdef WOLFSSL_SMALL_STACK + byte* key; +#else + byte key[MAX_KEY_SIZE]; +#endif + + (void)input; + (void)length; + (void)enc; + + WOLFSSL_ENTER("wc_CryptKey"); + + switch (id) { + #ifndef NO_DES3 + #ifndef NO_MD5 + case PBE_MD5_DES: + typeH = WC_MD5; + derivedLen = 16; /* may need iv for v1.5 */ + break; + #endif + #ifndef NO_SHA + case PBE_SHA1_DES: + typeH = WC_SHA; + derivedLen = 16; /* may need iv for v1.5 */ + break; + + case PBE_SHA1_DES3: + switch(shaOid) { + case HMAC_SHA256_OID: + typeH = WC_SHA256; + derivedLen = 32; + break; + default: + typeH = WC_SHA; + derivedLen = 32; /* may need iv for v1.5 */ + break; + } + break; + #endif /* !NO_SHA */ + #endif /* !NO_DES3 */ + #if !defined(NO_SHA) && !defined(NO_RC4) + case PBE_SHA1_RC4_128: + typeH = WC_SHA; + derivedLen = 16; + break; + #endif + #if defined(WOLFSSL_AES_256) + case PBE_AES256_CBC: + switch(shaOid) { + case HMAC_SHA256_OID: + typeH = WC_SHA256; + derivedLen = 32; + break; + #ifndef NO_SHA + default: + typeH = WC_SHA; + derivedLen = 32; + break; + #endif + } + break; + #endif /* WOLFSSL_AES_256 && !NO_SHA */ + #if defined(WOLFSSL_AES_128) + case PBE_AES128_CBC: + switch(shaOid) { + case HMAC_SHA256_OID: + typeH = WC_SHA256; + derivedLen = 16; + break; + #ifndef NO_SHA + default: + typeH = WC_SHA; + derivedLen = 16; + break; + #endif + } + break; + #endif /* WOLFSSL_AES_128 && !NO_SHA */ + default: + WOLFSSL_MSG("Unknown/Unsupported encrypt/decrypt id"); + (void)shaOid; + return ALGO_ID_E; + } + +#ifdef WOLFSSL_SMALL_STACK + key = (byte*)XMALLOC(MAX_KEY_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (key == NULL) + return MEMORY_E; +#endif + + if (version == PKCS5v2) + ret = wc_PBKDF2(key, (byte*)password, passwordSz, + salt, saltSz, iterations, derivedLen, typeH); +#ifndef NO_SHA + else if (version == PKCS5) + ret = wc_PBKDF1(key, (byte*)password, passwordSz, + salt, saltSz, iterations, derivedLen, typeH); +#endif +#ifdef HAVE_PKCS12 + else if (version == PKCS12v1) { + int i, idx = 0; + byte unicodePasswd[MAX_UNICODE_SZ]; + + if ( (passwordSz * 2 + 2) > (int)sizeof(unicodePasswd)) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return UNICODE_SIZE_E; + } + + for (i = 0; i < passwordSz; i++) { + unicodePasswd[idx++] = 0x00; + unicodePasswd[idx++] = (byte)password[i]; + } + /* add trailing NULL */ + unicodePasswd[idx++] = 0x00; + unicodePasswd[idx++] = 0x00; + + ret = wc_PKCS12_PBKDF(key, unicodePasswd, idx, salt, saltSz, + iterations, derivedLen, typeH, 1); + if (id != PBE_SHA1_RC4_128) + ret += wc_PKCS12_PBKDF(cbcIv, unicodePasswd, idx, salt, saltSz, + iterations, 8, typeH, 2); + } +#endif /* HAVE_PKCS12 */ + else { +#ifdef WOLFSSL_SMALL_STACK + XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + WOLFSSL_MSG("Unknown/Unsupported PKCS version"); + return ALGO_ID_E; + } + + if (ret != 0) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return ret; + } + + switch (id) { +#ifndef NO_DES3 + #if !defined(NO_SHA) || !defined(NO_MD5) + case PBE_MD5_DES: + case PBE_SHA1_DES: + { + Des des; + byte* desIv = key + 8; + + if (version == PKCS5v2 || version == PKCS12v1) + desIv = cbcIv; + + if (enc) { + ret = wc_Des_SetKey(&des, key, desIv, DES_ENCRYPTION); + } + else { + ret = wc_Des_SetKey(&des, key, desIv, DES_DECRYPTION); + } + if (ret != 0) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return ret; + } + + if (enc) { + wc_Des_CbcEncrypt(&des, input, input, length); + } + else { + wc_Des_CbcDecrypt(&des, input, input, length); + } + break; + } + #endif /* !NO_SHA || !NO_MD5 */ + + #ifndef NO_SHA + case PBE_SHA1_DES3: + { + Des3 des; + byte* desIv = key + 24; + + if (version == PKCS5v2 || version == PKCS12v1) + desIv = cbcIv; + + ret = wc_Des3Init(&des, NULL, INVALID_DEVID); + if (ret != 0) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return ret; + } + if (enc) { + ret = wc_Des3_SetKey(&des, key, desIv, DES_ENCRYPTION); + } + else { + ret = wc_Des3_SetKey(&des, key, desIv, DES_DECRYPTION); + } + if (ret != 0) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return ret; + } + if (enc) { + ret = wc_Des3_CbcEncrypt(&des, input, input, length); + } + else { + ret = wc_Des3_CbcDecrypt(&des, input, input, length); + } + if (ret != 0) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return ret; + } + break; + } + #endif /* !NO_SHA */ +#endif +#if !defined(NO_RC4) && !defined(NO_SHA) + case PBE_SHA1_RC4_128: + { + Arc4 dec; + + wc_Arc4SetKey(&dec, key, derivedLen); + wc_Arc4Process(&dec, input, input, length); + break; + } +#endif +#if !defined(NO_AES) && defined(HAVE_AES_CBC) + #ifdef WOLFSSL_AES_256 + case PBE_AES256_CBC: + case PBE_AES128_CBC: + { + Aes aes; + ret = wc_AesInit(&aes, NULL, INVALID_DEVID); + if (ret == 0) { + if (enc) { + ret = wc_AesSetKey(&aes, key, derivedLen, cbcIv, + AES_ENCRYPTION); + } + else { + ret = wc_AesSetKey(&aes, key, derivedLen, cbcIv, + AES_DECRYPTION); + } + } + if (ret == 0) { + if (enc) + ret = wc_AesCbcEncrypt(&aes, input, input, length); + else + ret = wc_AesCbcDecrypt(&aes, input, input, length); + } + if (ret != 0) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return ret; + } + ForceZero(&aes, sizeof(Aes)); + break; + } + #endif /* WOLFSSL_AES_256 */ +#endif /* !NO_AES && HAVE_AES_CBC */ + + default: +#ifdef WOLFSSL_SMALL_STACK + XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + WOLFSSL_MSG("Unknown/Unsupported encrypt/decryption algorithm"); + return ALGO_ID_E; + } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return ret; +} + +#endif /* HAVE_PKCS8 || HAVE_PKCS12 */ +#endif /* !NO_PWDBASED */ diff --git a/client/wolfssl/wolfcrypt/src/wc_pkcs11.c b/client/wolfssl/wolfcrypt/src/wc_pkcs11.c new file mode 100644 index 0000000..cac0a0f --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/wc_pkcs11.c @@ -0,0 +1,2546 @@ +/* wc_pkcs11.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef HAVE_PKCS11 + +#include + +#include +#include +#include +#include +#ifndef NO_RSA + #include +#endif +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#define MAX_EC_PARAM_LEN 16 + +#if defined(NO_PKCS11_RSA) && !defined(NO_RSA) + #define NO_RSA +#endif +#if defined(NO_PKCS11_ECC) && defined(HAVE_ECC) + #undef HAVE_ECC +#endif +#if defined(NO_PKCS11_AES) && !defined(NO_AES) + #define NO_AES +#endif +#if defined(NO_PKCS11_AESGCM) && defined(HAVE_AESGCM) + #undef HAVE_AESGCM +#endif +#if defined(NO_PKCS11_AESCBC) && defined(HAVE_AES_CBC) + #undef HAVE_AES_CBC +#endif +#if defined(NO_PKCS11_HMAC) && !defined(NO_HMAC) + #define NO_HMAC +#endif +#if defined(NO_PKCS11_RNG) && !defined(WC_NO_RNG) + #define WC_NO_RNG +#endif + + +#if defined(HAVE_ECC) && !defined(NO_PKCS11_ECDH) +static CK_BBOOL ckFalse = CK_FALSE; +#endif +#if !defined(NO_RSA) || defined(HAVE_ECC) || (!defined(NO_AES) && \ + (defined(HAVE_AESGCM) || defined(HAVE_AES_CBC))) || !defined(NO_HMAC) +static CK_BBOOL ckTrue = CK_TRUE; +#endif + +#ifndef NO_RSA +static CK_KEY_TYPE rsaKeyType = CKK_RSA; +#endif +#ifdef HAVE_ECC +static CK_KEY_TYPE ecKeyType = CKK_EC; +#endif +#if !defined(NO_RSA) || defined(HAVE_ECC) +static CK_OBJECT_CLASS pubKeyClass = CKO_PUBLIC_KEY; +static CK_OBJECT_CLASS privKeyClass = CKO_PRIVATE_KEY; +#endif +#if (!defined(NO_AES) && (defined(HAVE_AESGCM) || defined(HAVE_AES_CBC))) || \ + !defined(NO_HMAC) || (defined(HAVE_ECC) && !defined(NO_PKCS11_ECDH)) +static CK_OBJECT_CLASS secretKeyClass = CKO_SECRET_KEY; +#endif + +/** + * Load library, get function list and initialize PKCS#11. + * + * @param dev [in] Device object. + * @param library [in] Library name including path. + * @return BAD_FUNC_ARG when dev or library are NULL pointers. + * BAD_PATH_ERROR when dynamic library cannot be opened. + * WC_INIT_E when the initialization PKCS#11 fails. + * WC_HW_E when unable to get PKCS#11 function list. + * 0 on success. + */ +int wc_Pkcs11_Initialize(Pkcs11Dev* dev, const char* library, void* heap) +{ + int ret = 0; + void* func; + CK_C_INITIALIZE_ARGS args; + + if (dev == NULL || library == NULL) + ret = BAD_FUNC_ARG; + + if (ret == 0) { + dev->heap = heap; + dev->dlHandle = dlopen(library, RTLD_NOW | RTLD_LOCAL); + if (dev->dlHandle == NULL) { + WOLFSSL_MSG(dlerror()); + ret = BAD_PATH_ERROR; + } + } + + if (ret == 0) { + dev->func = NULL; + func = dlsym(dev->dlHandle, "C_GetFunctionList"); + if (func == NULL) + ret = WC_HW_E; + } + if (ret == 0) { + if (((CK_C_GetFunctionList)func)(&dev->func) != CKR_OK) + ret = WC_HW_E; + } + + if (ret == 0) { + XMEMSET(&args, 0x00, sizeof(args)); + args.flags = CKF_OS_LOCKING_OK; + if (dev->func->C_Initialize(&args) != CKR_OK) + ret = WC_INIT_E; + } + + if (ret != 0) + wc_Pkcs11_Finalize(dev); + + return ret; +} + +/** + * Close the Pkcs#11 library. + * + * @param dev [in] Device object. + */ +void wc_Pkcs11_Finalize(Pkcs11Dev* dev) +{ + if (dev != NULL && dev->dlHandle != NULL) { + if (dev->func != NULL) { + dev->func->C_Finalize(NULL); + dev->func = NULL; + } + dlclose(dev->dlHandle); + dev->dlHandle = NULL; + } +} + +/** + * Set up a token for use. + * + * @param token [in] Token object. + * @param dev [in] PKCS#11 device object. + * @param slotId [in] Slot number of the token.
+ * Passing -1 uses the first available slot. + * @param tokenName [in] Name of token to initialize. + * @param userPin [in] PIN to use to login as user. + * @param userPinSz [in] Number of bytes in PIN. + * @return BAD_FUNC_ARG when token, dev and/or tokenName is NULL. + * WC_INIT_E when initializing token fails. + * WC_HW_E when another PKCS#11 library call fails. + * -1 when no slot available. + * 0 on success. + */ +int wc_Pkcs11Token_Init(Pkcs11Token* token, Pkcs11Dev* dev, int slotId, + const char* tokenName, const unsigned char* userPin, int userPinSz) +{ + int ret = 0; + CK_RV rv; + CK_SLOT_ID* slot = NULL; + CK_ULONG slotCnt = 0; + + if (token == NULL || dev == NULL || tokenName == NULL) + ret = BAD_FUNC_ARG; + + if (ret == 0) { + if (slotId < 0) { + /* Use first available slot with a token. */ + rv = dev->func->C_GetSlotList(CK_TRUE, NULL, &slotCnt); + if (rv != CKR_OK) + ret = WC_HW_E; + if (ret == 0) { + slot = (CK_SLOT_ID*)XMALLOC(slotCnt * sizeof(*slot), dev->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (slot == NULL) + ret = MEMORY_E; + } + if (ret == 0) { + rv = dev->func->C_GetSlotList(CK_TRUE, slot, &slotCnt); + if (rv != CKR_OK) + ret = WC_HW_E; + } + if (ret == 0) { + if (slotCnt > 0) + slotId = (int)slot[0]; + else + ret = WC_HW_E; + } + } + } + if (ret == 0) { + token->func = dev->func; + token->slotId = (CK_SLOT_ID)slotId; + token->handle = NULL_PTR; + token->userPin = (CK_UTF8CHAR_PTR)userPin; + token->userPinSz = (CK_ULONG)userPinSz; + } + + if (slot != NULL) + XFREE(slot, dev->heap, DYNAMIC_TYPE_TMP_BUFFER); + + return ret; +} + +/** + * Finalize token. + * Closes all sessions on token. + * + * @param token [in] Token object. + */ +void wc_Pkcs11Token_Final(Pkcs11Token* token) +{ + if (token != NULL && token->func != NULL) { + token->func->C_CloseAllSessions(token->slotId); + token->handle = NULL_PTR; + ForceZero(token->userPin, (word32)token->userPinSz); + } +} + +/** + * Open a session on a token. + * + * @param token [in] Token object. + * @param session [in] Session object. + * @param readWrite [in] Boolean indicating to open session for Read/Write. + * @return BAD_FUNC_ARG when token or session is NULL. + * WC_HW_E when opening the session fails. + * 0 on success. + */ +static int Pkcs11OpenSession(Pkcs11Token* token, Pkcs11Session* session, + int readWrite) +{ + int ret = 0; + CK_RV rv; + + if (token == NULL || session == NULL) + ret = BAD_FUNC_ARG; + + if (ret == 0) { + if (token->handle != NULL_PTR) + session->handle = token->handle; + else { + /* Create a new session. */ + CK_FLAGS flags = CKF_SERIAL_SESSION; + + if (readWrite) + flags |= CKF_RW_SESSION; + + rv = token->func->C_OpenSession(token->slotId, flags, + (CK_VOID_PTR)NULL, (CK_NOTIFY)NULL, + &session->handle); + if (rv != CKR_OK) + ret = WC_HW_E; + if (ret == 0 && token->userPin != NULL) { + rv = token->func->C_Login(session->handle, CKU_USER, + token->userPin, token->userPinSz); + if (rv != CKR_OK) + ret = WC_HW_E; + } + } + } + if (ret == 0) { + session->func = token->func; + session->slotId = token->slotId; + } + + return ret; +} + +/** + * Close a session on a token. + * Won't close a session created externally. + * + * @param token [in] Token object. + * @param session [in] Session object. + */ +static void Pkcs11CloseSession(Pkcs11Token* token, Pkcs11Session* session) +{ + if (token != NULL && session != NULL && token->handle != session->handle) { + if (token->userPin != NULL) + session->func->C_Logout(session->handle); + session->func->C_CloseSession(session->handle); + } +} + +/** + * Open a session on the token to be used for all operations. + * + * @param token [in] Token object. + * @param readWrite [in] Boolean indicating to open session for Read/Write. + * @return BAD_FUNC_ARG when token is NULL. + * WC_HW_E when opening the session fails. + * 0 on success. + */ +int wc_Pkcs11Token_Open(Pkcs11Token* token, int readWrite) +{ + int ret = 0; + Pkcs11Session session; + + if (token == NULL) + ret = BAD_FUNC_ARG; + + if (ret == 0) { + ret = Pkcs11OpenSession(token, &session, readWrite); + token->handle = session.handle; + } + + return ret; +} + +/** + * Close the token's session. + * All object, like keys, will be destroyed. + * + * @param token [in] Token object. + */ +void wc_Pkcs11Token_Close(Pkcs11Token* token) +{ + Pkcs11Session session; + + if (token != NULL) { + session.func = token->func; + session.handle = token->handle; + token->handle = NULL_PTR; + Pkcs11CloseSession(token, &session); + } +} + + +#if (!defined(NO_AES) && (defined(HAVE_AESGCM) || defined(HAVE_AES_CBC))) || \ + !defined(NO_HMAC) +static int Pkcs11CreateSecretKey(CK_OBJECT_HANDLE* key, Pkcs11Session* session, + CK_KEY_TYPE keyType, unsigned char* data, + int len, unsigned char* id, int idLen) +{ + int ret = 0; + CK_RV rv; + CK_ATTRIBUTE keyTemplate[] = { + { CKA_CLASS, &secretKeyClass, sizeof(secretKeyClass) }, + { CKA_KEY_TYPE, &keyType, sizeof(keyType) }, + { CKA_ENCRYPT, &ckTrue, sizeof(ckTrue) }, + { CKA_VALUE, NULL, 0 }, + { CKA_ID, id, (CK_ULONG)idLen } + }; + int keyTmplCnt = 4; + + WOLFSSL_MSG("PKCS#11: Create Secret Key"); + + /* Set the modulus and public exponent data. */ + keyTemplate[3].pValue = data; + keyTemplate[3].ulValueLen = (CK_ULONG)len; + + if (idLen > 0) + keyTmplCnt++; + + /* Create an object containing key data for device to use. */ + rv = session->func->C_CreateObject(session->handle, keyTemplate, keyTmplCnt, + key); + if (rv != CKR_OK) + ret = WC_HW_E; + + return ret; +} +#endif + +#ifndef NO_RSA +/** + * Create a PKCS#11 object containing the RSA private key data. + * + * @param privateKey [out] Henadle to private key object. + * @param session [in] Session object. + * @param rsaKey [in] RSA key with private key data. + * @return WC_HW_E when a PKCS#11 library call fails. + * 0 on success. + */ +static int Pkcs11CreateRsaPrivateKey(CK_OBJECT_HANDLE* privateKey, + Pkcs11Session* session, + RsaKey* rsaKey) +{ + int ret = 0; + CK_RV rv; + CK_ATTRIBUTE keyTemplate[] = { + { CKA_CLASS, &privKeyClass, sizeof(privKeyClass) }, + { CKA_KEY_TYPE, &rsaKeyType, sizeof(rsaKeyType) }, + { CKA_DECRYPT, &ckTrue, sizeof(ckTrue) }, + { CKA_MODULUS, NULL, 0 }, + { CKA_PRIVATE_EXPONENT, NULL, 0 }, + { CKA_PRIME_1, NULL, 0 }, + { CKA_PRIME_2, NULL, 0 }, + { CKA_EXPONENT_1, NULL, 0 }, + { CKA_EXPONENT_2, NULL, 0 }, + { CKA_COEFFICIENT, NULL, 0 }, + { CKA_PUBLIC_EXPONENT, NULL, 0 } + }; + CK_ULONG keyTmplCnt = sizeof(keyTemplate) / sizeof(*keyTemplate); + + /* Set the modulus and private key data. */ + keyTemplate[ 3].pValue = rsaKey->n.raw.buf; + keyTemplate[ 3].ulValueLen = rsaKey->n.raw.len; + keyTemplate[ 4].pValue = rsaKey->d.raw.buf; + keyTemplate[ 4].ulValueLen = rsaKey->d.raw.len; + keyTemplate[ 5].pValue = rsaKey->p.raw.buf; + keyTemplate[ 5].ulValueLen = rsaKey->p.raw.len; + keyTemplate[ 6].pValue = rsaKey->q.raw.buf; + keyTemplate[ 6].ulValueLen = rsaKey->q.raw.len; + keyTemplate[ 7].pValue = rsaKey->dP.raw.buf; + keyTemplate[ 7].ulValueLen = rsaKey->dP.raw.len; + keyTemplate[ 8].pValue = rsaKey->dQ.raw.buf; + keyTemplate[ 8].ulValueLen = rsaKey->dQ.raw.len; + keyTemplate[ 9].pValue = rsaKey->u.raw.buf; + keyTemplate[ 9].ulValueLen = rsaKey->u.raw.len; + keyTemplate[10].pValue = rsaKey->e.raw.buf; + keyTemplate[10].ulValueLen = rsaKey->e.raw.len; + + rv = session->func->C_CreateObject(session->handle, keyTemplate, keyTmplCnt, + privateKey); + if (rv != CKR_OK) + ret = WC_HW_E; + + return ret; +} +#endif + +#ifdef HAVE_ECC +/** + * Set the ECC parameters into the template. + * + * @param key [in] ECC key. + * @param tmpl [in] PKCS#11 template. + * @param idx [in] Index of template to put parameters into. + * @return NOT_COMPILE_IN when the EC parameters are not known. + * 0 on success. + */ +static int Pkcs11EccSetParams(ecc_key* key, CK_ATTRIBUTE* tmpl, int idx) +{ + int ret = 0; + + if (key->dp != NULL && key->dp->oid != NULL) { + unsigned char* derParams = tmpl[idx].pValue; + /* ASN.1 encoding: OBJ + ecc parameters OID */ + tmpl[idx].ulValueLen = key->dp->oidSz + 2; + derParams[0] = ASN_OBJECT_ID; + derParams[1] = key->dp->oidSz; + XMEMCPY(derParams + 2, key->dp->oid, key->dp->oidSz); + } + else + ret = NOT_COMPILED_IN; + + return ret; +} + +/** + * Create a PKCS#11 object containing the ECC private key data. + * + * @param privateKey [out] Henadle to private key object. + * @param session [in] Session object. + * @param private_key [in] ECC private key. + * @param operation [in] Cryptographic operation key is to be used for. + * @return WC_HW_E when a PKCS#11 library call fails. + * 0 on success. + */ +static int Pkcs11CreateEccPrivateKey(CK_OBJECT_HANDLE* privateKey, + Pkcs11Session* session, + ecc_key* private_key, + CK_ATTRIBUTE_TYPE operation) +{ + int ret = 0; + CK_RV rv; + CK_UTF8CHAR params[MAX_EC_PARAM_LEN]; + CK_ATTRIBUTE keyTemplate[] = { + { CKA_CLASS, &privKeyClass, sizeof(privKeyClass) }, + { CKA_KEY_TYPE, &ecKeyType, sizeof(ecKeyType) }, + { operation, &ckTrue, sizeof(ckTrue) }, + { CKA_EC_PARAMS, params, 0 }, + { CKA_VALUE, NULL, 0 } + }; + CK_ULONG keyTmplCnt = sizeof(keyTemplate) / sizeof(*keyTemplate); + + ret = Pkcs11EccSetParams(private_key, keyTemplate, 3); + if (ret == 0) { + keyTemplate[4].pValue = private_key->k.raw.buf; + keyTemplate[4].ulValueLen = private_key->k.raw.len; + + rv = session->func->C_CreateObject(session->handle, keyTemplate, + keyTmplCnt, privateKey); + if (rv != CKR_OK) + ret = WC_HW_E; + } + + return ret; +} +#endif + +#if !defined(NO_RSA) || defined(HAVE_ECC) || (!defined(NO_AES) && \ + (defined(HAVE_AESGCM) || defined(HAVE_AES_CBC))) || !defined(NO_HMAC) +/** + * Check if mechanism is available in session on token. + * + * @param session [in] Session object. + * @param mech [in] Mechanism to look for. + * @return NOT_COMPILED_IN when mechanism not available. + * 0 when mechanism is available. + */ +static int Pkcs11MechAvail(Pkcs11Session* session, CK_MECHANISM_TYPE mech) +{ + int ret = 0; + CK_RV rv; + CK_MECHANISM_INFO mechInfo; + + rv = session->func->C_GetMechanismInfo(session->slotId, mech, &mechInfo); + if (rv != CKR_OK) + ret = NOT_COMPILED_IN; + + return ret; +} +#endif + +#ifndef NO_HMAC +/** + * Return the mechanism type and key type for the digest type when using HMAC. + * + * @param macType [in] Digest type - e.g. WC_SHA256. + * @param mechType [in] Mechanism type - e.g. CKM_SHA256_HMAC. + * @param keyType [in] Key type - e.g. CKK_SHA256_HMAC. + * @return NOT_COMPILED_IN if the digest algorithm isn't recognised. + * 0 otherwise. + */ +static int Pkcs11HmacTypes(int macType, int* mechType, int* keyType) +{ + int ret = 0; + + switch (macType) + { + #ifndef NO_MD5 + case WC_MD5: + *mechType = CKM_MD5_HMAC; + *keyType = CKK_MD5_HMAC; + break; + #endif + #ifndef NO_SHA + case WC_SHA: + *mechType = CKM_SHA_1_HMAC; + *keyType = CKK_SHA_1_HMAC; + break; + #endif + #ifdef WOLFSSL_SHA224 + case WC_SHA224: + *mechType = CKM_SHA224_HMAC; + *keyType = CKK_SHA224_HMAC; + break; + #endif + #ifndef NO_SHA256 + case WC_SHA256: + *mechType = CKM_SHA256_HMAC; + *keyType = CKK_SHA256_HMAC; + break; + #endif + #ifdef WOLFSSL_SHA384 + case WC_SHA384: + *mechType = CKM_SHA384_HMAC; + *keyType = CKK_SHA384_HMAC; + break; + #endif + #ifdef WOLFSSL_SHA512 + case WC_SHA512: + *mechType = CKM_SHA512_HMAC; + *keyType = CKK_SHA512_HMAC; + break; + #endif + default: + ret = NOT_COMPILED_IN; + break; + } + + return ret; +} +#endif + +/** + * Store the private key on the token in the session. + * + * @param token [in] Token to store private key on. + * @param type [in] Key type. + * @param clear [in] Clear out the private data from software key. + * @param key [in] Key type specific object. + * @return NOT_COMPILED_IN when mechanism not available. + * 0 on success. + */ +int wc_Pkcs11StoreKey(Pkcs11Token* token, int type, int clear, void* key) +{ + int ret = 0; + Pkcs11Session session; + CK_OBJECT_HANDLE privKey = NULL_PTR; + + ret = Pkcs11OpenSession(token, &session, 1); + if (ret == 0) { + switch (type) { + #if !defined(NO_AES) && defined(HAVE_AESGCM) + case PKCS11_KEY_TYPE_AES_GCM: { + Aes* aes = (Aes*)key; + + ret = Pkcs11MechAvail(&session, CKM_AES_GCM); + if (ret == 0) { + ret = Pkcs11CreateSecretKey(&privKey, &session, CKK_AES, + (unsigned char*)aes->devKey, + aes->keylen, + (unsigned char*)aes->id, + aes->idLen); + } + if (ret == 0 && clear) + ForceZero(aes->devKey, aes->keylen); + break; + } + #endif + #if !defined(NO_AES) && defined(HAVE_AES_CBC) + case PKCS11_KEY_TYPE_AES_CBC: { + Aes* aes = (Aes*)key; + + ret = Pkcs11MechAvail(&session, CKM_AES_CBC); + if (ret == 0) { + ret = Pkcs11CreateSecretKey(&privKey, &session, CKK_AES, + (unsigned char*)aes->devKey, + aes->keylen, + (unsigned char*)aes->id, + aes->idLen); + } + if (ret == 0 && clear) + ForceZero(aes->devKey, aes->keylen); + break; + } + #endif + #ifndef NO_HMAC + case PKCS11_KEY_TYPE_HMAC: { + Hmac* hmac = (Hmac*)key; + int mechType; + int keyType; + + ret = Pkcs11HmacTypes(hmac->macType, &mechType, &keyType); + if (ret == NOT_COMPILED_IN) + break; + + if (ret == 0) + ret = Pkcs11MechAvail(&session, mechType); + if (ret == 0) { + ret = Pkcs11CreateSecretKey(&privKey, &session, keyType, + (unsigned char*)hmac->keyRaw, + hmac->keyLen, + (unsigned char*)hmac->id, + hmac->idLen); + if (ret == WC_HW_E) { + ret = Pkcs11CreateSecretKey(&privKey, &session, + CKK_GENERIC_SECRET, + (unsigned char*)hmac->keyRaw, + hmac->keyLen, + (unsigned char*)hmac->id, + hmac->idLen); + } + } + break; + } + #endif + #ifndef NO_RSA + case PKCS11_KEY_TYPE_RSA: { + RsaKey* rsaKey = (RsaKey*)key; + + ret = Pkcs11MechAvail(&session, CKM_RSA_X_509); + if (ret == 0) + ret = Pkcs11CreateRsaPrivateKey(&privKey, &session, rsaKey); + if (ret == 0 && clear) { + mp_forcezero(&rsaKey->u); + mp_forcezero(&rsaKey->dQ); + mp_forcezero(&rsaKey->dP); + mp_forcezero(&rsaKey->q); + mp_forcezero(&rsaKey->p); + mp_forcezero(&rsaKey->d); + } + break; + } + #endif + #ifdef HAVE_ECC + case PKCS11_KEY_TYPE_EC: { + ecc_key* eccKey = (ecc_key*)key; + int ret2 = NOT_COMPILED_IN; + + #ifndef NO_PKCS11_ECDH + /* Try ECDH mechanism first. */ + ret = Pkcs11MechAvail(&session, CKM_ECDH1_DERIVE); + if (ret == 0) { + ret = Pkcs11CreateEccPrivateKey(&privKey, &session, eccKey, + CKA_DERIVE); + } + #endif + if (ret == 0 || ret == NOT_COMPILED_IN) { + /* Try ECDSA mechanism next. */ + ret2 = Pkcs11MechAvail(&session, CKM_ECDSA); + if (ret2 == 0) { + ret2 = Pkcs11CreateEccPrivateKey(&privKey, &session, + eccKey, CKA_SIGN); + } + /* OK for this to fail if set for ECDH. */ + if (ret == NOT_COMPILED_IN) + ret = ret2; + } + if (ret == 0 && clear) + mp_forcezero(&eccKey->k); + break; + } + #endif + default: + ret = NOT_COMPILED_IN; + break; + } + + Pkcs11CloseSession(token, &session); + } + + (void)privKey; + (void)clear; + (void)key; + + return ret; +} + +#if !defined(NO_RSA) || defined(HAVE_ECC) || (!defined(NO_AES) && \ + (defined(HAVE_AESGCM) || defined(HAVE_AES_CBC))) || !defined(NO_HMAC) +/** + * Find the PKCS#11 object containing the RSA public or private key data with + * the modulus specified. + * + * @param key [out] Henadle to key object. + * @param keyClass [in] Public or private key class. + * @param keyType [in] Type of key. + * @param session [in] Session object. + * @param id [in] Identifier set against a key. + * @param idLen [in] Length of identifier. + * @return WC_HW_E when a PKCS#11 library call fails. + * 0 on success. + */ +static int Pkcs11FindKeyById(CK_OBJECT_HANDLE* key, CK_OBJECT_CLASS keyClass, + CK_KEY_TYPE keyType, Pkcs11Session* session, + byte* id, int idLen) +{ + int ret = 0; + CK_RV rv; + CK_ULONG count; + CK_ATTRIBUTE keyTemplate[] = { + { CKA_CLASS, &keyClass, sizeof(keyClass) }, + { CKA_KEY_TYPE, &keyType, sizeof(keyType) }, + { CKA_ID, id, (CK_ULONG)idLen } + }; + CK_ULONG keyTmplCnt = sizeof(keyTemplate) / sizeof(*keyTemplate); + + WOLFSSL_MSG("PKCS#11: Find Key By Id"); + + rv = session->func->C_FindObjectsInit(session->handle, keyTemplate, + keyTmplCnt); + if (rv != CKR_OK) + ret = WC_HW_E; + if (ret == 0) { + rv = session->func->C_FindObjects(session->handle, key, 1, &count); + if (rv != CKR_OK) + ret = WC_HW_E; + rv = session->func->C_FindObjectsFinal(session->handle); + if (rv != CKR_OK) + ret = WC_HW_E; + } + if (ret == 0 && count == 0) + ret = WC_HW_E; + + return ret; +} +#endif + +#ifndef NO_RSA +/** + * Find the PKCS#11 object containing the RSA public or private key data with + * the modulus specified. + * + * @param key [out] Henadle to key object. + * @param keyClass [in] Public or private key class. + * @param session [in] Session object. + * @param rsaKey [in] RSA key with modulus to search on. + * @return WC_HW_E when a PKCS#11 library call fails. + * 0 on success. + */ +static int Pkcs11FindRsaKey(CK_OBJECT_HANDLE* key, CK_OBJECT_CLASS keyClass, + Pkcs11Session* session, RsaKey* rsaKey) +{ + int ret = 0; + CK_RV rv; + CK_ULONG count; + CK_ATTRIBUTE keyTemplate[] = { + { CKA_CLASS, &keyClass, sizeof(keyClass) }, + { CKA_KEY_TYPE, &rsaKeyType, sizeof(rsaKeyType) }, + { CKA_MODULUS, NULL, 0 }, + }; + CK_ULONG keyTmplCnt = sizeof(keyTemplate) / sizeof(*keyTemplate); + + /* Set the modulus. */ + keyTemplate[2].pValue = rsaKey->n.raw.buf; + keyTemplate[2].ulValueLen = rsaKey->n.raw.len; + + rv = session->func->C_FindObjectsInit(session->handle, keyTemplate, + keyTmplCnt); + if (rv != CKR_OK) + ret = WC_HW_E; + if (ret == 0) { + rv = session->func->C_FindObjects(session->handle, key, 1, &count); + if (rv != CKR_OK) + ret = WC_HW_E; + rv = session->func->C_FindObjectsFinal(session->handle); + if (rv != CKR_OK) + ret = WC_HW_E; + } + + return ret; +} + +/** + * Exponentiate the input with the public part of the RSA key. + * Used in public encrypt and decrypt. + * + * @param session [in] Session object. + * @param info [in] Cryptographic operation data. + * @return WC_HW_E when a PKCS#11 library call fails. + * 0 on success. + */ +static int Pkcs11RsaPublic(Pkcs11Session* session, wc_CryptoInfo* info) +{ + int ret = 0; + CK_RV rv; + CK_MECHANISM mech; + CK_ULONG outLen; + CK_OBJECT_HANDLE publicKey = NULL_PTR; + int sessionKey = 0; + CK_ATTRIBUTE keyTemplate[] = { + { CKA_CLASS, &pubKeyClass, sizeof(pubKeyClass) }, + { CKA_KEY_TYPE, &rsaKeyType, sizeof(rsaKeyType) }, + { CKA_ENCRYPT, &ckTrue, sizeof(ckTrue) }, + { CKA_MODULUS, NULL, 0 }, + { CKA_PUBLIC_EXPONENT, NULL, 0 } + }; + CK_ULONG keyTmplCnt = sizeof(keyTemplate) / sizeof(*keyTemplate); + + WOLFSSL_MSG("PKCS#11: RSA Public Key Operation"); + + if (info->pk.rsa.outLen == NULL) { + ret = BAD_FUNC_ARG; + } + + if (ret == 0) { + if ((sessionKey = !mp_iszero(&info->pk.rsa.key->e))) { + /* Set the modulus and public exponent data. */ + keyTemplate[3].pValue = info->pk.rsa.key->n.raw.buf; + keyTemplate[3].ulValueLen = info->pk.rsa.key->n.raw.len; + keyTemplate[4].pValue = info->pk.rsa.key->e.raw.buf; + keyTemplate[4].ulValueLen = info->pk.rsa.key->e.raw.len; + + /* Create an object containing public key data for device to use. */ + rv = session->func->C_CreateObject(session->handle, keyTemplate, + keyTmplCnt, &publicKey); + if (rv != CKR_OK) + ret = WC_HW_E; + } + else { + ret = Pkcs11FindKeyById(&publicKey, CKO_PUBLIC_KEY, CKK_RSA, + session, info->pk.rsa.key->id, + info->pk.rsa.key->idLen); + } + } + + if (ret == 0) { + /* Raw RSA encrypt/decrypt operation. */ + mech.mechanism = CKM_RSA_X_509; + mech.ulParameterLen = 0; + mech.pParameter = NULL; + + rv = session->func->C_EncryptInit(session->handle, &mech, publicKey); + if (rv != CKR_OK) + ret = WC_HW_E; + } + if (ret == 0) { + outLen = (CK_ULONG)*info->pk.rsa.outLen; + rv = session->func->C_Encrypt(session->handle, + (CK_BYTE_PTR)info->pk.rsa.in, info->pk.rsa.inLen, + info->pk.rsa.out, &outLen); + if (rv != CKR_OK) + ret = WC_HW_E; + } + if (ret == 0) + *info->pk.rsa.outLen = (word32)outLen; + + if (sessionKey) + session->func->C_DestroyObject(session->handle, publicKey); + + return ret; +} + +/** + * Exponentiate the input with the private part of the RSA key. + * Used in private encrypt and decrypt. + * + * @param session [in] Session object. + * @param info [in] Cryptographic operation data. + * @return WC_HW_E when a PKCS#11 library call fails. + * 0 on success. + */ +static int Pkcs11RsaPrivate(Pkcs11Session* session, wc_CryptoInfo* info) +{ + int ret = 0; + CK_RV rv; + CK_MECHANISM mech; + CK_ULONG outLen; + CK_OBJECT_HANDLE privateKey = NULL_PTR; + int sessionKey = 0; + + WOLFSSL_MSG("PKCS#11: RSA Private Key Operation"); + + if (info->pk.rsa.outLen == NULL) { + ret = BAD_FUNC_ARG; + } + + if (ret == 0) { + if ((sessionKey = !mp_iszero(&info->pk.rsa.key->d))) { + ret = Pkcs11CreateRsaPrivateKey(&privateKey, session, + info->pk.rsa.key); + } + else if (info->pk.rsa.key->idLen > 0) { + ret = Pkcs11FindKeyById(&privateKey, CKO_PRIVATE_KEY, CKK_RSA, + session, info->pk.rsa.key->id, + info->pk.rsa.key->idLen); + } + else { + ret = Pkcs11FindRsaKey(&privateKey, CKO_PRIVATE_KEY, session, + info->pk.rsa.key); + } + } + + if (ret == 0) { + /* Raw RSA encrypt/decrypt operation. */ + mech.mechanism = CKM_RSA_X_509; + mech.ulParameterLen = 0; + mech.pParameter = NULL; + + rv = session->func->C_DecryptInit(session->handle, &mech, privateKey); + if (rv != CKR_OK) + ret = WC_HW_E; + } + if (ret == 0) { + outLen = (CK_ULONG)*info->pk.rsa.outLen; + rv = session->func->C_Decrypt(session->handle, + (CK_BYTE_PTR)info->pk.rsa.in, info->pk.rsa.inLen, + info->pk.rsa.out, &outLen); + if (rv != CKR_OK) + ret = WC_HW_E; + } + if (ret == 0) + *info->pk.rsa.outLen = (word32)outLen; + + if (sessionKey) + session->func->C_DestroyObject(session->handle, privateKey); + + return ret; +} + +/** + * Perform an RSA operation. + * + * @param session [in] Session object. + * @param info [in] Cryptographic operation data. + * @return WC_HW_E when a PKCS#11 library call fails. + * 0 on success. + */ +static int Pkcs11Rsa(Pkcs11Session* session, wc_CryptoInfo* info) +{ + int ret = 0; + CK_RV rv; + CK_MECHANISM_INFO mechInfo; + + /* Check operation is supported. */ + rv = session->func->C_GetMechanismInfo(session->slotId, CKM_RSA_X_509, + &mechInfo); + if (rv != CKR_OK) + ret = NOT_COMPILED_IN; + + if (ret == 0) { + if (info->pk.rsa.type == RSA_PUBLIC_ENCRYPT || + info->pk.rsa.type == RSA_PUBLIC_DECRYPT) { + if ((mechInfo.flags & CKF_ENCRYPT) == 0) + ret = NOT_COMPILED_IN; + else + ret = Pkcs11RsaPublic(session, info); + } + else if (info->pk.rsa.type == RSA_PRIVATE_ENCRYPT || + info->pk.rsa.type == RSA_PRIVATE_DECRYPT) { + if ((mechInfo.flags & CKF_DECRYPT) == 0) + ret = NOT_COMPILED_IN; + else + ret = Pkcs11RsaPrivate(session, info); + } + else + ret = NOT_COMPILED_IN; + } + + return ret; +} + +#ifdef WOLFSSL_KEY_GEN +/** + * Get the RSA public key data from the PKCS#11 object. + * + * @param key [in] RSA key to put the data into. + * @param session [in] Session object. + * @param pubkey [in] Public key object. + * @return WC_HW_E when a PKCS#11 library call fails. + * MEMORY_E when a memory allocation fails. + * 0 on success. + */ +static int Pkcs11GetRsaPublicKey(RsaKey* key, Pkcs11Session* session, + CK_OBJECT_HANDLE pubKey) +{ + int ret = 0; + unsigned char* mod = NULL; + unsigned char* exp = NULL; + int modSz, expSz; + CK_ATTRIBUTE tmpl[] = { + { CKA_MODULUS, NULL_PTR, 0 }, + { CKA_PUBLIC_EXPONENT, NULL_PTR, 0 } + }; + CK_ULONG tmplCnt = sizeof(tmpl) / sizeof(*tmpl); + CK_RV rv; + + rv = session->func->C_GetAttributeValue(session->handle, pubKey, tmpl, + tmplCnt); + if (rv != CKR_OK) + ret = WC_HW_E; + + if (ret == 0) { + modSz = tmpl[0].ulValueLen; + expSz = tmpl[1].ulValueLen; + mod = (unsigned char*)XMALLOC(modSz, key->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (mod == NULL) + ret = MEMORY_E; + } + if (ret == 0) { + exp = (unsigned char*)XMALLOC(expSz, key->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (exp == NULL) + ret = MEMORY_E; + } + if (ret == 0) { + tmpl[0].pValue = mod; + tmpl[1].pValue = exp; + + rv = session->func->C_GetAttributeValue(session->handle, pubKey, + tmpl, tmplCnt); + if (rv != CKR_OK) + ret = WC_HW_E; + } + if (ret == 0) + ret = wc_RsaPublicKeyDecodeRaw(mod, modSz, exp, expSz, key); + + if (exp != NULL) + XFREE(exp, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (mod != NULL) + XFREE(mod, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + + return ret; +} + +/** + * Perform an RSA key generation operation. + * The private key data stays on the device. + * + * @param session [in] Session object. + * @param info [in] Cryptographic operation data. + * @return WC_HW_E when a PKCS#11 library call fails. + * 0 on success. + */ +static int Pkcs11RsaKeyGen(Pkcs11Session* session, wc_CryptoInfo* info) +{ + int ret = 0; + RsaKey* key = info->pk.rsakg.key; + CK_RV rv; + CK_ULONG bits = info->pk.rsakg.size; + CK_OBJECT_HANDLE pubKey = NULL_PTR, privKey = NULL_PTR; + CK_MECHANISM mech; + static CK_BYTE pub_exp[] = { 0x01, 0x00, 0x01, 0x00 }; + CK_ATTRIBUTE pubKeyTmpl[] = { + { CKA_MODULUS_BITS, &bits, sizeof(bits) }, + { CKA_ENCRYPT, &ckTrue, sizeof(ckTrue) }, + { CKA_VERIFY, &ckTrue, sizeof(ckTrue) }, + { CKA_PUBLIC_EXPONENT, &pub_exp, sizeof(pub_exp) } + }; + CK_ULONG pubTmplCnt = sizeof(pubKeyTmpl)/sizeof(*pubKeyTmpl); + CK_ATTRIBUTE privKeyTmpl[] = { + {CKA_DECRYPT, &ckTrue, sizeof(ckTrue) }, + {CKA_SIGN, &ckTrue, sizeof(ckTrue) }, + {CKA_ID, NULL, 0 } + }; + int privTmplCnt = 2; + int i; + + ret = Pkcs11MechAvail(session, CKM_RSA_PKCS_KEY_PAIR_GEN); + if (ret == 0) { + WOLFSSL_MSG("PKCS#11: RSA Key Generation Operation"); + + /* Most commonly used public exponent value (array initialized). */ + if (info->pk.rsakg.e != WC_RSA_EXPONENT) { + for (i = 0; i < (int)sizeof(pub_exp); i++) + pub_exp[i] = (info->pk.rsakg.e >> (8 * i)) & 0xff; + } + for (i = (int)sizeof(pub_exp) - 1; pub_exp[i] == 0; i--) { + } + pubKeyTmpl[3].ulValueLen = i + 1; + + if (key->idLen != 0) { + privKeyTmpl[privTmplCnt].pValue = key->id; + privKeyTmpl[privTmplCnt].ulValueLen = key->idLen; + privTmplCnt++; + } + + mech.mechanism = CKM_RSA_PKCS_KEY_PAIR_GEN; + mech.ulParameterLen = 0; + mech.pParameter = NULL; + + rv = session->func->C_GenerateKeyPair(session->handle, &mech, + pubKeyTmpl, pubTmplCnt, + privKeyTmpl, privTmplCnt, + &pubKey, &privKey); + if (rv != CKR_OK) + ret = -1; + } + + if (ret == 0) + ret = Pkcs11GetRsaPublicKey(key, session, pubKey); + + if (pubKey != NULL_PTR) + ret = session->func->C_DestroyObject(session->handle, pubKey); + if (ret != 0 && privKey != NULL_PTR) + ret = session->func->C_DestroyObject(session->handle, privKey); + + return ret; +} +#endif /* WOLFSSL_KEY_GEN */ +#endif /* !NO_RSA */ + +#ifdef HAVE_ECC +/** + * Find the PKCS#11 object containing the ECC public or private key data with + * the modulus specified. + * + * @param key [out] Henadle to key object. + * @param keyClass [in] Public or private key class. + * @param session [in] Session object. + * @param eccKey [in] ECC key with parameters. + * @return WC_HW_E when a PKCS#11 library call fails. + * MEMORY_E when a memory allocation fails. + * 0 on success. + */ +static int Pkcs11FindEccKey(CK_OBJECT_HANDLE* key, CK_OBJECT_CLASS keyClass, + Pkcs11Session* session, ecc_key* eccKey) +{ + int ret = 0; + int i; + unsigned char* ecPoint = NULL; + word32 len = 0; + CK_RV rv; + CK_ULONG count; + CK_UTF8CHAR params[MAX_EC_PARAM_LEN]; + CK_ATTRIBUTE keyTemplate[] = { + { CKA_CLASS, &keyClass, sizeof(keyClass) }, + { CKA_KEY_TYPE, &ecKeyType, sizeof(ecKeyType) }, + { CKA_EC_PARAMS, params, 0 }, + { CKA_EC_POINT, NULL, 0 }, + }; + CK_ULONG attrCnt = 3; + + ret = Pkcs11EccSetParams(eccKey, keyTemplate, 2); + if (ret == 0 && keyClass == CKO_PUBLIC_KEY) { + /* ASN1 encoded: OCT + uncompressed point */ + len = 3 + 1 + 2 * eccKey->dp->size; + ecPoint = (unsigned char*)XMALLOC(len, eccKey->heap, DYNAMIC_TYPE_ECC); + if (ecPoint == NULL) + ret = MEMORY_E; + } + if (ret == 0 && keyClass == CKO_PUBLIC_KEY) { + len -= 3; + i = 0; + ecPoint[i++] = ASN_OCTET_STRING; + if (len >= ASN_LONG_LENGTH) + ecPoint[i++] = (ASN_LONG_LENGTH | 1); + ecPoint[i++] = len; + ret = wc_ecc_export_x963(eccKey, ecPoint + i, &len); + } + if (ret == 0 && keyClass == CKO_PUBLIC_KEY) { + keyTemplate[3].pValue = ecPoint; + keyTemplate[3].ulValueLen = len + i; + attrCnt++; + } + if (ret == 0) { + rv = session->func->C_FindObjectsInit(session->handle, keyTemplate, + attrCnt); + if (rv != CKR_OK) + ret = WC_HW_E; + } + if (ret == 0) { + rv = session->func->C_FindObjects(session->handle, key, 1, &count); + if (rv != CKR_OK) + ret = WC_HW_E; + rv = session->func->C_FindObjectsFinal(session->handle); + if (rv != CKR_OK) + ret = WC_HW_E; + } + + if (ecPoint != NULL) + XFREE(ecPoint, eccKey->heap, DYNAMIC_TYPE_ECC); + + return ret; +} + +/** + * Create a PKCS#11 object containing the ECC public key data. + * Encode the public key as an OCTET_STRING of the encoded point. + * + * @param publicKey [out] Henadle to public key object. + * @param session [in] Session object. + * @param public_key [in] ECC public key. + * @param operation [in] Cryptographic operation key is to be used for. + * @return WC_HW_E when a PKCS#11 library call fails. + * MEMORY_E when a memory allocation fails. + * 0 on success. + */ +static int Pkcs11CreateEccPublicKey(CK_OBJECT_HANDLE* publicKey, + Pkcs11Session* session, + ecc_key* public_key, + CK_ATTRIBUTE_TYPE operation) +{ + int ret = 0; + int i; + unsigned char* ecPoint = NULL; + word32 len; + CK_RV rv; + CK_UTF8CHAR params[MAX_EC_PARAM_LEN]; + CK_ATTRIBUTE keyTemplate[] = { + { CKA_CLASS, &pubKeyClass, sizeof(pubKeyClass) }, + { CKA_KEY_TYPE, &ecKeyType, sizeof(ecKeyType) }, + { operation, &ckTrue, sizeof(ckTrue) }, + { CKA_EC_PARAMS, params, 0 }, + { CKA_EC_POINT, NULL, 0 } + }; + CK_ULONG keyTmplCnt = sizeof(keyTemplate) / sizeof(*keyTemplate); + + ret = Pkcs11EccSetParams(public_key, keyTemplate, 3); + if (ret == 0) { + /* ASN1 encoded: OCT + uncompressed point */ + len = 3 + 1 + 2 * public_key->dp->size; + ecPoint = (unsigned char*)XMALLOC(len, public_key->heap, + DYNAMIC_TYPE_ECC); + if (ecPoint == NULL) + ret = MEMORY_E; + } + if (ret == 0) { + len -= 3; + i = 0; + ecPoint[i++] = ASN_OCTET_STRING; + if (len >= ASN_LONG_LENGTH) + ecPoint[i++] = ASN_LONG_LENGTH | 1; + ecPoint[i++] = len; + ret = wc_ecc_export_x963(public_key, ecPoint + i, &len); + } + if (ret == 0) { + keyTemplate[4].pValue = ecPoint; + keyTemplate[4].ulValueLen = len + i; + + rv = session->func->C_CreateObject(session->handle, keyTemplate, + keyTmplCnt, publicKey); + if (rv != CKR_OK) + ret = WC_HW_E; + } + + if (ecPoint != NULL) + XFREE(ecPoint, public_key->heap, DYNAMIC_TYPE_ECC); + + return ret; +} + +#ifndef NO_PKCS11_EC_KEYGEN +/** + * Gets the public key data from the PKCS#11 object and puts into the ECC key. + * + * @param key [in] ECC public key. + * @param session [in] Session object. + * @param pubKey [in] ECC public key PKCS#11 object. + * @return WC_HW_E when a PKCS#11 library call fails. + * MEMORY_E when a memory allocation fails. + * 0 on success. + */ +static int Pkcs11GetEccPublicKey(ecc_key* key, Pkcs11Session* session, + CK_OBJECT_HANDLE pubKey) +{ + int ret = 0; + word32 i = 0; + int curveIdx; + unsigned char* point = NULL; + int pointSz; + byte tag; + CK_RV rv; + CK_ATTRIBUTE tmpl[] = { + { CKA_EC_POINT, NULL_PTR, 0 }, + }; + CK_ULONG tmplCnt = sizeof(tmpl) / sizeof(*tmpl); + + rv = session->func->C_GetAttributeValue(session->handle, pubKey, tmpl, + tmplCnt); + if (rv != CKR_OK) + ret = WC_HW_E; + + if (ret == 0) { + pointSz = (int)tmpl[0].ulValueLen; + point = (unsigned char*)XMALLOC(pointSz, key->heap, DYNAMIC_TYPE_ECC); + if (point == NULL) + ret = MEMORY_E; + } + if (ret == 0) { + tmpl[0].pValue = point; + + rv = session->func->C_GetAttributeValue(session->handle, pubKey, + tmpl, tmplCnt); + if (rv != CKR_OK) + ret = WC_HW_E; + } + + /* Make sure the data is big enough for ASN.1: OCT + uncompressed point */ + if (ret == 0 && pointSz < key->dp->size * 2 + 1 + 2) + ret = ASN_PARSE_E; + /* Step over the OCTET_STRING wrapper. */ + if (ret == 0 && GetASNTag(point, &i, &tag, pointSz) != 0) + ret = ASN_PARSE_E; + if (ret == 0 && tag != ASN_OCTET_STRING) + ret = ASN_PARSE_E; + if (ret == 0 && point[i] >= ASN_LONG_LENGTH) { + if (point[i++] != (ASN_LONG_LENGTH | 1)) + ret = ASN_PARSE_E; + else if (pointSz < key->dp->size * 2 + 1 + 3) + ret = ASN_PARSE_E; + } + if (ret == 0 && point[i++] != key->dp->size * 2 + 1) + ret = ASN_PARSE_E; + + if (ret == 0) { + curveIdx = wc_ecc_get_curve_idx(key->dp->id); + ret = wc_ecc_import_point_der(point + i, pointSz - i, curveIdx, + &key->pubkey); + } + + if (point != NULL) + XFREE(point, key->heap, DYNAMIC_TYPE_ECC); + + return ret; +} + +/** + * Perform an ECC key generation operation. + * The private key data stays on the device. + * + * @param session [in] Session object. + * @param info [in] Cryptographic operation data. + * @return WC_HW_E when a PKCS#11 library call fails. + * 0 on success. + */ +static int Pkcs11EcKeyGen(Pkcs11Session* session, wc_CryptoInfo* info) +{ + int ret = 0; + ecc_key* key = info->pk.eckg.key; + CK_RV rv; + CK_OBJECT_HANDLE pubKey = NULL_PTR, privKey = NULL_PTR; + CK_MECHANISM mech; + CK_UTF8CHAR params[MAX_EC_PARAM_LEN]; + CK_ATTRIBUTE pubKeyTmpl[] = { + { CKA_EC_PARAMS, params, 0 }, + { CKA_ENCRYPT, &ckTrue, sizeof(ckTrue) }, + { CKA_VERIFY, &ckTrue, sizeof(ckTrue) }, + }; + int pubTmplCnt = sizeof(pubKeyTmpl)/sizeof(*pubKeyTmpl); + CK_ATTRIBUTE privKeyTmpl[] = { + { CKA_DECRYPT, &ckTrue, sizeof(ckTrue) }, + { CKA_SIGN, &ckTrue, sizeof(ckTrue) }, + { CKA_DERIVE, &ckTrue, sizeof(ckTrue) }, + { CKA_ID, NULL, 0 }, + }; + int privTmplCnt = 3; + + ret = Pkcs11MechAvail(session, CKM_EC_KEY_PAIR_GEN); + if (ret == 0) { + WOLFSSL_MSG("PKCS#11: EC Key Generation Operation"); + + ret = Pkcs11EccSetParams(key, pubKeyTmpl, 0); + } + if (ret == 0) { + if (key->idLen != 0) { + privKeyTmpl[privTmplCnt].pValue = key->id; + privKeyTmpl[privTmplCnt].ulValueLen = key->idLen; + privTmplCnt++; + } + + mech.mechanism = CKM_EC_KEY_PAIR_GEN; + mech.ulParameterLen = 0; + mech.pParameter = NULL; + + rv = session->func->C_GenerateKeyPair(session->handle, &mech, + pubKeyTmpl, pubTmplCnt, + privKeyTmpl, privTmplCnt, + &pubKey, &privKey); + if (rv != CKR_OK) + ret = -1; + } + + if (ret == 0) + ret = Pkcs11GetEccPublicKey(key, session, pubKey); + + if (pubKey != NULL_PTR) + session->func->C_DestroyObject(session->handle, pubKey); + if (ret != 0 && privKey != NULL_PTR) + session->func->C_DestroyObject(session->handle, privKey); + + return ret; +} +#endif + +#ifndef NO_PKCS11_ECDH +/** + * Extracts the secret key data from the PKCS#11 object. + * + * @param session [in] Session object. + * @param secret [in] PKCS#11 object with the secret key data. + * @param out [in] Buffer to hold secret data. + * @param outLen [in,out] On in, length of buffer. + * On out, the length of data in buffer. + * @return WC_HW_E when a PKCS#11 library call fails. + * 0 on success. + */ +static int Pkcs11ExtractSecret(Pkcs11Session* session, CK_OBJECT_HANDLE secret, + byte* out, word32* outLen) +{ + int ret = 0; + CK_ATTRIBUTE tmpl[] = { + {CKA_VALUE, NULL_PTR, 0} + }; + CK_ULONG tmplCnt = sizeof(tmpl) / sizeof(*tmpl); + CK_RV rv; + + rv = session->func->C_GetAttributeValue(session->handle, secret, tmpl, + tmplCnt); + if (rv != CKR_OK) + ret = WC_HW_E; + if (ret == 0) { + if (tmpl[0].ulValueLen > *outLen) + ret = BUFFER_E; + } + if (ret == 0) { + tmpl[0].pValue = out; + rv = session->func->C_GetAttributeValue(session->handle, secret, + tmpl, tmplCnt); + if (rv != CKR_OK) + ret = WC_HW_E; + *outLen = (word32)tmpl[0].ulValueLen; + } + + return ret; +} + +/** + * Performs the ECDH secret generation operation. + * + * @param session [in] Session object. + * @param info [in] Cryptographic operation data. + * @return WC_HW_E when a PKCS#11 library call fails. + * 0 on success. + */ +static int Pkcs11ECDH(Pkcs11Session* session, wc_CryptoInfo* info) +{ + int ret = 0; + int sessionKey = 0; + unsigned char* point = NULL; + word32 pointLen; + CK_RV rv; + CK_KEY_TYPE keyType = CKK_GENERIC_SECRET; + CK_MECHANISM mech; + CK_ECDH1_DERIVE_PARAMS params; + CK_OBJECT_HANDLE privateKey = NULL_PTR; + CK_OBJECT_HANDLE secret = CK_INVALID_HANDLE; + CK_ULONG secSz; + CK_ATTRIBUTE tmpl[] = { + { CKA_CLASS, &secretKeyClass, sizeof(secretKeyClass) }, + { CKA_KEY_TYPE, &keyType, sizeof(keyType) }, + { CKA_PRIVATE, &ckFalse, sizeof(ckFalse) }, + { CKA_SENSITIVE, &ckFalse, sizeof(ckFalse) }, + { CKA_EXTRACTABLE, &ckTrue, sizeof(ckTrue) }, + { CKA_VALUE_LEN, &secSz, sizeof(secSz) } + }; + CK_ULONG tmplCnt = sizeof(tmpl) / sizeof(*tmpl); + + ret = Pkcs11MechAvail(session, CKM_ECDH1_DERIVE); + if (ret == 0 && info->pk.ecdh.outlen == NULL) { + ret = BAD_FUNC_ARG; + } + if (ret == 0) { + WOLFSSL_MSG("PKCS#11: EC Key Derivation Operation"); + + + if ((sessionKey = !mp_iszero(&info->pk.ecdh.private_key->k))) + ret = Pkcs11CreateEccPrivateKey(&privateKey, session, + info->pk.ecdh.private_key, CKA_DERIVE); + else if (info->pk.ecdh.private_key->idLen > 0) { + ret = Pkcs11FindKeyById(&privateKey, CKO_PRIVATE_KEY, CKK_EC, + session, info->pk.ecdh.private_key->id, + info->pk.ecdh.private_key->idLen); + } + else { + ret = Pkcs11FindEccKey(&privateKey, CKO_PRIVATE_KEY, session, + info->pk.ecdh.public_key); + } + } + if (ret == 0) { + ret = wc_ecc_export_x963(info->pk.ecdh.public_key, NULL, &pointLen); + if (ret == LENGTH_ONLY_E) { + point = (unsigned char*)XMALLOC(pointLen, + info->pk.ecdh.public_key->heap, + DYNAMIC_TYPE_ECC_BUFFER); + ret = wc_ecc_export_x963(info->pk.ecdh.public_key, point, + &pointLen); + } + } + + if (ret == 0) { + secSz = *info->pk.ecdh.outlen; + if (secSz > (CK_ULONG)info->pk.ecdh.private_key->dp->size) + secSz = info->pk.ecdh.private_key->dp->size; + + params.kdf = CKD_NULL; + params.pSharedData = NULL; + params.ulSharedDataLen = 0; + params.pPublicData = point; + params.ulPublicDataLen = pointLen; + + mech.mechanism = CKM_ECDH1_DERIVE; + mech.ulParameterLen = sizeof(params); + mech.pParameter = ¶ms; + + rv = session->func->C_DeriveKey(session->handle, &mech, privateKey, + tmpl, tmplCnt, &secret); + if (rv != CKR_OK) + ret = WC_HW_E; + } + + if (ret == 0) { + ret = Pkcs11ExtractSecret(session, secret, info->pk.ecdh.out, + info->pk.ecdh.outlen); + } + + if (sessionKey) + session->func->C_DestroyObject(session->handle, privateKey); + + if (point != NULL) + XFREE(point, info->pk.ecdh.public_key->heap, DYNAMIC_TYPE_ECC_BUFFER); + + return ret; +} +#endif + +/** + * Encode, in place, the ECDSA signature. + * Two fixed width values into ASN.1 DER encoded SEQ { INT, INT } + * + * @param sig [in,out] Signature data. + * @param sz [in] Size of original signature data. + * @return Length of the ASN.1 DER enencoded signature. + */ +static word32 Pkcs11ECDSASig_Encode(byte* sig, word32 sz) +{ + word32 rHigh, sHigh, seqLen; + word32 rStart = 0, sStart = 0; + word32 sigSz, rSz, rLen, sSz, sLen; + word32 i; + + /* Find first byte of data in r and s. */ + while (rStart < sz - 1 && sig[rStart] == 0x00) + rStart++; + while (sStart < sz - 1 && sig[sz + sStart] == 0x00) + sStart++; + /* Check if 0 needs to be prepended to make integer a positive number. */ + rHigh = sig[rStart] >> 7; + sHigh = sig[sz + sStart] >> 7; + /* Calculate length of integer to put into ASN.1 encoding. */ + rLen = sz - rStart; + sLen = sz - sStart; + /* r and s: INT (2 bytes) + [ 0x00 ] + integer */ + rSz = 2 + rHigh + rLen; + sSz = 2 + sHigh + sLen; + /* Calculate the complete ASN.1 DER encoded size. */ + sigSz = rSz + sSz; + if (sigSz >= ASN_LONG_LENGTH) + seqLen = 3; + else + seqLen = 2; + + /* Move s and then r integers into their final places. */ + XMEMMOVE(sig + seqLen + rSz + (sSz - sLen), sig + sz + sStart, sLen); + XMEMMOVE(sig + seqLen + (rSz - rLen), sig + rStart, rLen); + + /* Put the ASN.1 DER encoding around data. */ + i = 0; + sig[i++] = ASN_CONSTRUCTED | ASN_SEQUENCE; + if (seqLen == 3) + sig[i++] = ASN_LONG_LENGTH | 0x01; + sig[i++] = sigSz; + sig[i++] = ASN_INTEGER; + sig[i++] = rHigh + (sz - rStart); + if (rHigh) + sig[i++] = 0x00; + i += sz - rStart; + sig[i++] = ASN_INTEGER; + sig[i++] = sHigh + (sz - sStart); + if (sHigh) + sig[i] = 0x00; + + return seqLen + sigSz; +} + +/** + * Decode the ECDSA signature. + * ASN.1 DER encode SEQ { INT, INT } converted to two fixed with values. + * + * @param in [in] ASN.1 DER encoded signature. + * @param inSz [in] Size of ASN.1 signature. + * @param sig [in] Output buffer. + * @param sz [in] Size of output buffer. + * @return ASN_PARSE_E when the ASN.1 encoding is invalid. + * 0 on success. + */ +static int Pkcs11ECDSASig_Decode(const byte* in, word32 inSz, byte* sig, + word32 sz) +{ + int ret = 0; + word32 i = 0; + byte tag; + int len, seqLen = 2; + + /* Make sure zeros in place when decoding short integers. */ + XMEMSET(sig, 0, sz * 2); + + /* Check min data for: SEQ + INT. */ + if (inSz < 5) + ret = ASN_PARSE_E; + /* Check SEQ */ + if (ret == 0 && in[i++] != (ASN_CONSTRUCTED | ASN_SEQUENCE)) + ret = ASN_PARSE_E; + if (ret == 0 && in[i] >= ASN_LONG_LENGTH) { + if (in[i] != (ASN_LONG_LENGTH | 0x01)) + ret = ASN_PARSE_E; + else { + i++; + seqLen++; + } + } + if (ret == 0 && in[i++] != inSz - seqLen) + ret = ASN_PARSE_E; + + /* Check INT */ + if (ret == 0 && GetASNTag(in, &i, &tag, inSz) != 0) + ret = ASN_PARSE_E; + if (ret == 0 && tag != ASN_INTEGER) + ret = ASN_PARSE_E; + if (ret == 0 && (len = in[i++]) > sz + 1) + ret = ASN_PARSE_E; + /* Check there is space for INT data */ + if (ret == 0 && i + len > inSz) + ret = ASN_PARSE_E; + if (ret == 0) { + /* Skip leading zero */ + if (in[i] == 0x00) { + i++; + len--; + } + /* Copy r into sig. */ + XMEMCPY(sig + sz - len, in + i, len); + i += len; + } + + /* Check min data for: INT. */ + if (ret == 0 && i + 2 > inSz) + ret = ASN_PARSE_E; + /* Check INT */ + if (ret == 0 && GetASNTag(in, &i, &tag, inSz) != 0) + ret = ASN_PARSE_E; + if (ret == 0 && tag != ASN_INTEGER) + ret = ASN_PARSE_E; + if (ret == 0 && (len = in[i++]) > sz + 1) + ret = ASN_PARSE_E; + /* Check there is space for INT data */ + if (ret == 0 && i + len > inSz) + ret = ASN_PARSE_E; + if (ret == 0) { + /* Skip leading zero */ + if (in[i] == 0x00) { + i++; + len--; + } + /* Copy s into sig. */ + XMEMCPY(sig + sz + sz - len, in + i, len); + } + + return ret; +} + +/** + * Get the parameters from the private key on the device. + * + * @param session [in] Session object. + * @param privKey [in] PKCS #11 object handle of private key.. + * @param key [in] Ecc key to set parameters against. + * @return WC_HW_E when a PKCS#11 library call fails. + * 0 on success. + */ +static int Pkcs11GetEccParams(Pkcs11Session* session, CK_OBJECT_HANDLE privKey, + ecc_key* key) +{ + int ret = 0; + int curveId; + CK_RV rv; + byte oid[16]; + CK_ATTRIBUTE template[] = { + { CKA_EC_PARAMS, (CK_VOID_PTR)oid, sizeof(oid) } + }; + + rv = session->func->C_GetAttributeValue(session->handle, privKey, template, + 1); + if (rv != CKR_OK) + ret = WC_HW_E; + if (ret == 0) { + /* PKCS #11 wraps the OID in ASN.1 */ + curveId = wc_ecc_get_curve_id_from_oid(oid + 2, + (word32)template[0].ulValueLen - 2); + if (curveId == ECC_CURVE_INVALID) + ret = WC_HW_E; + } + if (ret == 0) + ret = wc_ecc_set_curve(key, 0, curveId); + + return ret; +} + +/** + * Performs the ECDSA signing operation. + * + * @param session [in] Session object. + * @param info [in] Cryptographic operation data. + * @return WC_HW_E when a PKCS#11 library call fails. + * 0 on success. + */ +static int Pkcs11ECDSA_Sign(Pkcs11Session* session, wc_CryptoInfo* info) +{ + int ret = 0; + int sessionKey = 0; + word32 sz; + CK_RV rv; + CK_ULONG outLen; + CK_MECHANISM mech; + CK_MECHANISM_INFO mechInfo; + CK_OBJECT_HANDLE privateKey = NULL_PTR; + + /* Check operation is supported. */ + rv = session->func->C_GetMechanismInfo(session->slotId, CKM_ECDSA, + &mechInfo); + if (rv != CKR_OK || (mechInfo.flags & CKF_SIGN) == 0) + ret = NOT_COMPILED_IN; + + if (ret == 0 && info->pk.eccsign.outlen == NULL) { + ret = BAD_FUNC_ARG; + } + if (ret == 0) { + WOLFSSL_MSG("PKCS#11: EC Signing Operation"); + + if ((sessionKey = !mp_iszero(&info->pk.eccsign.key->k))) + ret = Pkcs11CreateEccPrivateKey(&privateKey, session, + info->pk.eccsign.key, CKA_SIGN); + else if (info->pk.eccsign.key->idLen > 0) { + ret = Pkcs11FindKeyById(&privateKey, CKO_PRIVATE_KEY, CKK_EC, + session, info->pk.eccsign.key->id, + info->pk.eccsign.key->idLen); + if (ret == 0 && info->pk.eccsign.key->dp == NULL) { + ret = Pkcs11GetEccParams(session, privateKey, + info->pk.eccsign.key); + } + } + else { + ret = Pkcs11FindEccKey(&privateKey, CKO_PRIVATE_KEY, session, + info->pk.eccsign.key); + } + } + + if (ret == 0) { + sz = info->pk.eccsign.key->dp->size; + /* Maximum encoded size is two ordinates + 8 bytes of ASN.1. */ + if (*info->pk.eccsign.outlen < (word32)wc_ecc_sig_size_calc(sz)) + ret = BUFFER_E; + } + + if (ret == 0) { + mech.mechanism = CKM_ECDSA; + mech.ulParameterLen = 0; + mech.pParameter = NULL; + + rv = session->func->C_SignInit(session->handle, &mech, privateKey); + if (rv != CKR_OK) + ret = WC_HW_E; + } + + if (ret == 0) { + outLen = *info->pk.eccsign.outlen; + rv = session->func->C_Sign(session->handle, + (CK_BYTE_PTR)info->pk.eccsign.in, + info->pk.eccsign.inlen, info->pk.eccsign.out, + &outLen); + if (rv != CKR_OK) + ret = WC_HW_E; + } + + if (ret == 0) { + *info->pk.eccsign.outlen = Pkcs11ECDSASig_Encode(info->pk.eccsign.out, + sz); + } + + if (sessionKey) + session->func->C_DestroyObject(session->handle, privateKey); + + return ret; +} + +/** + * Performs the ECDSA verification operation. + * + * @param session [in] Session object. + * @param info [in] Cryptographic operation data. + * @return WC_HW_E when a PKCS#11 library call fails. + * MEMORY_E when a memory allocation fails. + * 0 on success. + */ +static int Pkcs11ECDSA_Verify(Pkcs11Session* session, wc_CryptoInfo* info) +{ + int ret = 0; + CK_RV rv; + CK_MECHANISM mech; + CK_MECHANISM_INFO mechInfo; + CK_OBJECT_HANDLE publicKey = NULL_PTR; + unsigned char* sig = NULL; + word32 sz = info->pk.eccverify.key->dp->size; + + /* Check operation is supported. */ + rv = session->func->C_GetMechanismInfo(session->slotId, CKM_ECDSA, + &mechInfo); + if (rv != CKR_OK || (mechInfo.flags & CKF_VERIFY) == 0) + ret = NOT_COMPILED_IN; + + if (ret == 0 && info->pk.eccverify.res == NULL) { + ret = BAD_FUNC_ARG; + } + + if (ret == 0) { + WOLFSSL_MSG("PKCS#11: EC Verification Operation"); + + ret = Pkcs11CreateEccPublicKey(&publicKey, session, + info->pk.eccverify.key, CKA_VERIFY); + } + + if (ret == 0) { + sig = XMALLOC(sz * 2, info->pk.eccverify.key->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (sig == NULL) + ret = MEMORY_E; + } + + if (ret == 0) { + ret = Pkcs11ECDSASig_Decode(info->pk.eccverify.sig, + info->pk.eccverify.siglen, sig, sz); + } + if (ret == 0) { + mech.mechanism = CKM_ECDSA; + mech.ulParameterLen = 0; + mech.pParameter = NULL; + + rv = session->func->C_VerifyInit(session->handle, &mech, publicKey); + if (rv != CKR_OK) + ret = WC_HW_E; + } + + if (ret == 0) { + *info->pk.eccverify.res = 0; + rv = session->func->C_Verify(session->handle, + (CK_BYTE_PTR)info->pk.eccverify.hash, + info->pk.eccverify.hashlen, + (CK_BYTE_PTR)sig, sz * 2); + if (rv == CKR_SIGNATURE_INVALID) { + } + else if (rv != CKR_OK) + ret = WC_HW_E; + else + *info->pk.eccverify.res = 1; + } + + if (publicKey != NULL_PTR) + session->func->C_DestroyObject(session->handle, publicKey); + + if (sig != NULL) + XFREE(sig, info->pk.eccverify.key->heap, DYNAMIC_TYPE_TMP_BUFFER); + + return ret; +} +#endif + +#if !defined(NO_AES) && defined(HAVE_AESGCM) +/** + * Performs the AES-GCM encryption operation. + * + * @param session [in] Session object. + * @param info [in] Cryptographic operation data. + * @return WC_HW_E when a PKCS#11 library call fails. + * MEMORY_E when a memory allocation fails. + * 0 on success. + */ +static int Pkcs11AesGcmEncrypt(Pkcs11Session* session, wc_CryptoInfo* info) +{ + int ret = 0; + CK_RV rv; + Aes* aes = info->cipher.aesgcm_enc.aes; + CK_GCM_PARAMS params; + CK_MECHANISM_INFO mechInfo; + CK_OBJECT_HANDLE key = NULL_PTR; + CK_MECHANISM mech; + CK_ULONG outLen; + + /* Check operation is supported. */ + rv = session->func->C_GetMechanismInfo(session->slotId, CKM_AES_GCM, + &mechInfo); + if (rv != CKR_OK || (mechInfo.flags & CKF_ENCRYPT) == 0) + ret = NOT_COMPILED_IN; + + if (ret == 0) { + WOLFSSL_MSG("PKCS#11: AES-GCM Encryption Operation"); + } + + /* Create a private key object or find by id. */ + if (ret == 0 && aes->idLen == 0) { + ret = Pkcs11CreateSecretKey(&key, session, CKK_AES, + (unsigned char*)aes->devKey, aes->keylen, + NULL, 0); + + } + else if (ret == 0) { + ret = Pkcs11FindKeyById(&key, CKO_SECRET_KEY, CKK_AES, session, aes->id, + aes->idLen); + } + + if (ret == 0) { + params.pIv = (CK_BYTE_PTR)info->cipher.aesgcm_enc.iv; + params.ulIvLen = info->cipher.aesgcm_enc.ivSz; + params.pAAD = (CK_BYTE_PTR)info->cipher.aesgcm_enc.authIn; + params.ulAADLen = info->cipher.aesgcm_enc.authInSz; + params.ulTagBits = info->cipher.aesgcm_enc.authTagSz * 8; + + mech.mechanism = CKM_AES_GCM; + mech.ulParameterLen = sizeof(params); + mech.pParameter = ¶ms; + + rv = session->func->C_EncryptInit(session->handle, &mech, key); + if (rv != CKR_OK) + ret = WC_HW_E; + } + if (ret == 0) { + outLen = info->cipher.aesgcm_enc.sz; + rv = session->func->C_EncryptUpdate(session->handle, + (CK_BYTE_PTR)info->cipher.aesgcm_enc.in, + info->cipher.aesgcm_enc.sz, + info->cipher.aesgcm_enc.out, + &outLen); + if (rv != CKR_OK) + ret = WC_HW_E; + } + if (ret == 0) { + /* Authentication tag comes out in final block. */ + outLen = info->cipher.aesgcm_enc.authTagSz; + rv = session->func->C_EncryptFinal(session->handle, + info->cipher.aesgcm_enc.authTag, + &outLen); + if (rv != CKR_OK) + ret = WC_HW_E; + } + + if (aes->idLen == 0 && key != NULL_PTR) + session->func->C_DestroyObject(session->handle, key); + + return ret; +} + +/** + * Performs the AES-GCM decryption operation. + * + * @param session [in] Session object. + * @param info [in] Cryptographic operation data. + * @return WC_HW_E when a PKCS#11 library call fails. + * MEMORY_E when a memory allocation fails. + * 0 on success. + */ +static int Pkcs11AesGcmDecrypt(Pkcs11Session* session, wc_CryptoInfo* info) +{ + int ret = 0; + CK_RV rv; + Aes* aes = info->cipher.aesgcm_enc.aes; + CK_GCM_PARAMS params; + CK_MECHANISM_INFO mechInfo; + CK_OBJECT_HANDLE key = NULL_PTR; + CK_MECHANISM mech; + CK_ULONG outLen; + word32 len; + + /* Check operation is supported. */ + rv = session->func->C_GetMechanismInfo(session->slotId, CKM_AES_GCM, + &mechInfo); + if (rv != CKR_OK || (mechInfo.flags & CKF_DECRYPT) == 0) + ret = NOT_COMPILED_IN; + + if (ret == 0) { + WOLFSSL_MSG("PKCS#11: AES-GCM Decryption Operation"); + } + + /* Create a private key object or find by id. */ + if (ret == 0 && aes->idLen == 0) { + ret = Pkcs11CreateSecretKey(&key, session, CKK_AES, + (unsigned char*)aes->devKey, aes->keylen, + NULL, 0); + } + else if (ret == 0) { + ret = Pkcs11FindKeyById(&key, CKO_SECRET_KEY, CKK_AES, session, aes->id, + aes->idLen); + } + + if (ret == 0) { + params.pIv = (CK_BYTE_PTR)info->cipher.aesgcm_dec.iv; + params.ulIvLen = info->cipher.aesgcm_dec.ivSz; + params.pAAD = (CK_BYTE_PTR)info->cipher.aesgcm_dec.authIn; + params.ulAADLen = info->cipher.aesgcm_dec.authInSz; + params.ulTagBits = info->cipher.aesgcm_dec.authTagSz * 8; + + mech.mechanism = CKM_AES_GCM; + mech.ulParameterLen = sizeof(params); + mech.pParameter = ¶ms; + + rv = session->func->C_DecryptInit(session->handle, &mech, key); + if (rv != CKR_OK) + ret = WC_HW_E; + } + if (ret == 0) { + outLen = len = info->cipher.aesgcm_dec.sz; + rv = session->func->C_DecryptUpdate(session->handle, + (CK_BYTE_PTR)info->cipher.aesgcm_dec.in, + info->cipher.aesgcm_dec.sz, + info->cipher.aesgcm_dec.out, + &outLen); + if (rv != CKR_OK) + ret = WC_HW_E; + } + if (ret == 0) { + /* Put authentication tag in as encrypted data. */ + outLen = len = (len + info->cipher.aesgcm_dec.authTagSz - + (word32)outLen); + rv = session->func->C_DecryptUpdate(session->handle, + (CK_BYTE_PTR)info->cipher.aesgcm_dec.authTag, + info->cipher.aesgcm_dec.authTagSz, + info->cipher.aesgcm_dec.out, + &outLen); + if (rv != CKR_OK) + ret = WC_HW_E; + } + if (ret == 0) { + outLen = len = (len - (word32)outLen); + /* Decrypted data comes out now. */ + rv = session->func->C_DecryptFinal(session->handle, + info->cipher.aesgcm_dec.out, + &outLen); + if (rv != CKR_OK) + ret = WC_HW_E; + } + + if (aes->idLen == 0 && key != NULL_PTR) + session->func->C_DestroyObject(session->handle, key); + + return ret; +} +#endif + +#if !defined(NO_AES) && defined(HAVE_AES_CBC) +/** + * Performs the AES-CBC encryption operation. + * + * @param session [in] Session object. + * @param info [in] Cryptographic operation data. + * @return WC_HW_E when a PKCS#11 library call fails. + * MEMORY_E when a memory allocation fails. + * 0 on success. + */ +static int Pkcs11AesCbcEncrypt(Pkcs11Session* session, wc_CryptoInfo* info) +{ + int ret = 0; + CK_RV rv; + Aes* aes = info->cipher.aescbc.aes; + CK_MECHANISM_INFO mechInfo; + CK_OBJECT_HANDLE key = NULL_PTR; + CK_MECHANISM mech; + CK_ULONG outLen; + + /* Check operation is supported. */ + rv = session->func->C_GetMechanismInfo(session->slotId, CKM_AES_CBC, + &mechInfo); + if (rv != CKR_OK || (mechInfo.flags & CKF_ENCRYPT) == 0) + ret = NOT_COMPILED_IN; + + if (ret == 0) { + WOLFSSL_MSG("PKCS#11: AES-CBC Encryption Operation"); + } + + /* Create a private key object or find by id. */ + if (ret == 0 && aes->idLen == 0) { + ret = Pkcs11CreateSecretKey(&key, session, CKK_AES, + (unsigned char*)aes->devKey, aes->keylen, + NULL, 0); + + } + else if (ret == 0) { + ret = Pkcs11FindKeyById(&key, CKO_SECRET_KEY, CKK_AES, session, aes->id, + aes->idLen); + } + + if (ret == 0) { + mech.mechanism = CKM_AES_CBC; + mech.ulParameterLen = AES_BLOCK_SIZE; + mech.pParameter = (CK_BYTE_PTR)info->cipher.aescbc.aes->reg; + + rv = session->func->C_EncryptInit(session->handle, &mech, key); + if (rv != CKR_OK) + ret = WC_HW_E; + } + if (ret == 0) { + outLen = info->cipher.aescbc.sz; + rv = session->func->C_Encrypt(session->handle, + (CK_BYTE_PTR)info->cipher.aescbc.in, + info->cipher.aescbc.sz, + info->cipher.aescbc.out, + &outLen); + if (rv != CKR_OK) + ret = WC_HW_E; + } + + if (aes->idLen == 0 && key != NULL_PTR) + session->func->C_DestroyObject(session->handle, key); + + return ret; +} + +/** + * Performs the AES-CBC decryption operation. + * + * @param session [in] Session object. + * @param info [in] Cryptographic operation data. + * @return WC_HW_E when a PKCS#11 library call fails. + * MEMORY_E when a memory allocation fails. + * 0 on success. + */ +static int Pkcs11AesCbcDecrypt(Pkcs11Session* session, wc_CryptoInfo* info) +{ + int ret = 0; + CK_RV rv; + Aes* aes = info->cipher.aescbc.aes; + CK_MECHANISM_INFO mechInfo; + CK_OBJECT_HANDLE key = NULL_PTR; + CK_MECHANISM mech; + CK_ULONG outLen; + + /* Check operation is supported. */ + rv = session->func->C_GetMechanismInfo(session->slotId, CKM_AES_CBC, + &mechInfo); + if (rv != CKR_OK || (mechInfo.flags & CKF_DECRYPT) == 0) + ret = NOT_COMPILED_IN; + + if (ret == 0) { + WOLFSSL_MSG("PKCS#11: AES-CBC Decryption Operation"); + } + + /* Create a private key object or find by id. */ + if (ret == 0 && aes->idLen == 0) { + ret = Pkcs11CreateSecretKey(&key, session, CKK_AES, + (unsigned char*)aes->devKey, aes->keylen, + NULL, 0); + } + else if (ret == 0) { + ret = Pkcs11FindKeyById(&key, CKO_SECRET_KEY, CKK_AES, session, aes->id, + aes->idLen); + } + + if (ret == 0) { + mech.mechanism = CKM_AES_CBC; + mech.ulParameterLen = AES_BLOCK_SIZE; + mech.pParameter = (CK_BYTE_PTR)info->cipher.aescbc.aes->reg; + + rv = session->func->C_DecryptInit(session->handle, &mech, key); + if (rv != CKR_OK) + ret = WC_HW_E; + } + if (ret == 0) { + outLen = info->cipher.aescbc.sz; + rv = session->func->C_DecryptUpdate(session->handle, + (CK_BYTE_PTR)info->cipher.aescbc.in, + info->cipher.aescbc.sz, + info->cipher.aescbc.out, + &outLen); + if (rv != CKR_OK) + ret = WC_HW_E; + } + + if (aes->idLen == 0 && key != NULL_PTR) + session->func->C_DestroyObject(session->handle, key); + + return ret; +} +#endif + +#ifndef NO_HMAC +/** + * Updates or calculates the HMAC of the data. + * + * @param session [in] Session object. + * @param info [in] Cryptographic operation data. + * @return WC_HW_E when a PKCS#11 library call fails. + * 0 on success. + */ +static int Pkcs11Hmac(Pkcs11Session* session, wc_CryptoInfo* info) +{ + int ret = 0; + CK_RV rv; + Hmac* hmac = info->hmac.hmac; + CK_MECHANISM_INFO mechInfo; + CK_OBJECT_HANDLE key = NULL_PTR; + CK_MECHANISM mech; + CK_ULONG outLen; + int mechType; + int keyType; + + if (hmac->innerHashKeyed == WC_HMAC_INNER_HASH_KEYED_SW) + ret = NOT_COMPILED_IN; + + if (ret == 0) + ret = Pkcs11HmacTypes(info->hmac.macType, &mechType, &keyType); + if (ret == 0) { + /* Check operation is supported. */ + rv = session->func->C_GetMechanismInfo(session->slotId, mechType, + &mechInfo); + if (rv != CKR_OK || (mechInfo.flags & CKF_SIGN) == 0) + ret = NOT_COMPILED_IN; + } + + /* Check whether key been used to initialized. */ + if (ret == 0 && !hmac->innerHashKeyed) { + WOLFSSL_MSG("PKCS#11: HMAC Init"); + + /* Check device supports key length. */ + if (mechInfo.ulMaxKeySize > 0 && + (hmac->keyLen < mechInfo.ulMinKeySize || + hmac->keyLen > mechInfo.ulMaxKeySize)) { + WOLFSSL_MSG("PKCS#11: Key Length not supported"); + ret = NOT_COMPILED_IN; + } + + /* Create a private key object or find by id. */ + if (ret == 0 && hmac->idLen == 0) { + ret = Pkcs11CreateSecretKey(&key, session, keyType, + (unsigned char*)hmac->keyRaw, hmac->keyLen, + NULL, 0); + if (ret == WC_HW_E) { + ret = Pkcs11CreateSecretKey(&key, session, CKK_GENERIC_SECRET, + (unsigned char*)hmac->keyRaw, hmac->keyLen, + NULL, 0); + } + + } + else if (ret == 0) { + ret = Pkcs11FindKeyById(&key, CKO_SECRET_KEY, keyType, session, + hmac->id, hmac->idLen); + if (ret == WC_HW_E) { + ret = Pkcs11FindKeyById(&key, CKO_SECRET_KEY, + CKK_GENERIC_SECRET, session, hmac->id, + hmac->idLen); + } + } + + /* Initialize HMAC operation */ + if (ret == 0) { + mech.mechanism = mechType; + mech.ulParameterLen = 0; + mech.pParameter = NULL; + + rv = session->func->C_SignInit(session->handle, &mech, key); + if (rv != CKR_OK) + ret = WC_HW_E; + } + + /* Don't imitialize HMAC again if this succeeded */ + if (ret == 0) + hmac->innerHashKeyed = WC_HMAC_INNER_HASH_KEYED_DEV; + } + /* Update the HMAC if input data passed in. */ + if (ret == 0 && info->hmac.inSz > 0) { + WOLFSSL_MSG("PKCS#11: HMAC Update"); + + rv = session->func->C_SignUpdate(session->handle, + (CK_BYTE_PTR)info->hmac.in, + info->hmac.inSz); + /* Some algorithm implementations only support C_Sign. */ + if (rv == CKR_MECHANISM_INVALID) { + WOLFSSL_MSG("PKCS#11: HMAC Update/Final not supported"); + ret = NOT_COMPILED_IN; + /* Allow software implementation to set key. */ + hmac->innerHashKeyed = 0; + } + else if (rv != CKR_OK) + ret = WC_HW_E; + } + /* Calculate the HMAC result if output buffer specified. */ + if (ret == 0 && info->hmac.digest != NULL) { + WOLFSSL_MSG("PKCS#11: HMAC Final"); + + outLen = WC_MAX_DIGEST_SIZE; + rv = session->func->C_SignFinal(session->handle, + (CK_BYTE_PTR)info->hmac.digest, + &outLen); + /* Some algorithm implementations only support C_Sign. */ + if (rv != CKR_OK) + ret = WC_HW_E; + else + hmac->innerHashKeyed = 0; + } + + if (hmac->idLen == 0 && key != NULL_PTR) + session->func->C_DestroyObject(session->handle, key); + + return ret; +} +#endif + +#ifndef WC_NO_RNG +#ifndef HAVE_HASHDRBG +/** + * Performs random number generation. + * + * @param session [in] Session object. + * @param info [in] Cryptographic operation data. + * @return WC_HW_E when a PKCS#11 library call fails. + * 0 on success. + */ +static int Pkcs11RandomBlock(Pkcs11Session* session, wc_CryptoInfo* info) +{ + int ret = 0; + CK_RV rv; + + rv = session->func->C_GenerateRandom(session->handle, info->rng.out, + info->rng.sz); + if (rv != CKR_OK) + ret = WC_HW_E; + return ret; +} +#endif + +/** + * Generates entropy (seed) data. + * + * @param session [in] Session object. + * @param info [in] Cryptographic operation data. + * @return WC_HW_E when a PKCS#11 library call fails. + * 0 on success. + */ +static int Pkcs11RandomSeed(Pkcs11Session* session, wc_CryptoInfo* info) +{ + int ret = 0; + CK_RV rv; + + rv = session->func->C_GenerateRandom(session->handle, info->seed.seed, + info->seed.sz); + if (rv != CKR_OK) + ret = WC_HW_E; + return ret; +} +#endif + +/** + * Perform a cryptographic operation using PKCS#11 device. + * + * @param devId [in] Device identifier. + * @param info [in] Cryptographic operation data. + * @param ctx [in] Context data for device - the token object. + * @return WC_HW_E when a PKCS#11 library call fails. + * 0 on success. + */ +int wc_Pkcs11_CryptoDevCb(int devId, wc_CryptoInfo* info, void* ctx) +{ + int ret = 0; + Pkcs11Token* token = (Pkcs11Token*)ctx; + Pkcs11Session session; + int readWrite = 0; + + if (devId <= INVALID_DEVID || info == NULL || ctx == NULL) + ret = BAD_FUNC_ARG; + + if (ret == 0) { + ret = Pkcs11OpenSession(token, &session, readWrite); + if (ret == 0) { + if (info->algo_type == WC_ALGO_TYPE_PK) { +#if !defined(NO_RSA) || defined(HAVE_ECC) + switch (info->pk.type) { + #ifndef NO_RSA + case WC_PK_TYPE_RSA: + ret = Pkcs11Rsa(&session, info); + break; + #ifdef WOLFSSL_KEY_GEN + case WC_PK_TYPE_RSA_KEYGEN: + ret = Pkcs11RsaKeyGen(&session, info); + break; + #endif + #endif + #ifdef HAVE_ECC + #ifndef NO_PKCS11_EC_KEYGEN + case WC_PK_TYPE_EC_KEYGEN: + ret = Pkcs11EcKeyGen(&session, info); + break; + #endif + #ifndef NO_PKCS11_ECDH + case WC_PK_TYPE_ECDH: + ret = Pkcs11ECDH(&session, info); + break; + #endif + case WC_PK_TYPE_ECDSA_SIGN: + ret = Pkcs11ECDSA_Sign(&session, info); + break; + case WC_PK_TYPE_ECDSA_VERIFY: + ret = Pkcs11ECDSA_Verify(&session, info); + break; + #endif + default: + ret = NOT_COMPILED_IN; + break; + } +#else + ret = NOT_COMPILED_IN; +#endif /* !NO_RSA || HAVE_ECC */ + } + else if (info->algo_type == WC_ALGO_TYPE_CIPHER) { + #ifndef NO_AES + switch (info->cipher.type) { + #ifdef HAVE_AESGCM + case WC_CIPHER_AES_GCM: + if (info->cipher.enc) + ret = Pkcs11AesGcmEncrypt(&session, info); + else + ret = Pkcs11AesGcmDecrypt(&session, info); + break; + #endif + #ifdef HAVE_AES_CBC + case WC_CIPHER_AES_CBC: + if (info->cipher.enc) + ret = Pkcs11AesCbcEncrypt(&session, info); + else + ret = Pkcs11AesCbcDecrypt(&session, info); + break; + #endif + } + #else + ret = NOT_COMPILED_IN; + #endif + } + else if (info->algo_type == WC_ALGO_TYPE_HMAC) { + #ifndef NO_HMAC + ret = Pkcs11Hmac(&session, info); + #else + ret = NOT_COMPILED_IN; + #endif + } + else if (info->algo_type == WC_ALGO_TYPE_RNG) { + #if !defined(WC_NO_RNG) && !defined(HAVE_HASHDRBG) + ret = Pkcs11RandomBlock(&session, info); + #else + ret = NOT_COMPILED_IN; + #endif + } + else if (info->algo_type == WC_ALGO_TYPE_SEED) { + #ifndef WC_NO_RNG + ret = Pkcs11RandomSeed(&session, info); + #else + ret = NOT_COMPILED_IN; + #endif + } + else + ret = NOT_COMPILED_IN; + + Pkcs11CloseSession(token, &session); + } + } + + return ret; +} + +#endif /* HAVE_PKCS11 */ + diff --git a/client/wolfssl/wolfcrypt/src/wc_port.c b/client/wolfssl/wolfcrypt/src/wc_port.c new file mode 100644 index 0000000..087807b --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/wc_port.c @@ -0,0 +1,2276 @@ +/* port.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include +#include +#include +#include +#include +#ifdef HAVE_ECC + #include +#endif +#ifdef WOLFSSL_ASYNC_CRYPT + #include +#endif + +/* IPP header files for library initialization */ +#ifdef HAVE_FAST_RSA + #include + #include +#endif + +#ifdef FREESCALE_LTC_TFM + #include +#endif + +#if defined(WOLFSSL_ATMEL) || defined(WOLFSSL_ATECC508A) + #include +#endif +#if defined(WOLFSSL_RENESAS_TSIP) + #include +#endif +#if defined(WOLFSSL_STSAFEA100) + #include +#endif + +#if defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER) + #include +#endif + +#if defined(USE_WOLFSSL_MEMORY) && defined(WOLFSSL_TRACK_MEMORY) + #include + #include +#endif + +#if defined(WOLFSSL_IMX6_CAAM) || defined(WOLFSSL_IMX6_CAAM_RNG) || \ + defined(WOLFSSL_IMX6_CAAM_BLOB) + #include +#endif + +#ifdef WOLF_CRYPTO_CB + #include +#endif + +#ifdef HAVE_INTEL_QA_SYNC + #include +#endif + +#ifdef HAVE_CAVIUM_OCTEON_SYNC + #include +#endif + +#ifdef WOLFSSL_SCE + #include "hal_data.h" +#endif + +#if defined(WOLFSSL_DSP) && !defined(WOLFSSL_DSP_BUILD) + #include "rpcmem.h" +#endif + +#ifdef _MSC_VER + /* 4996 warning to use MS extensions e.g., strcpy_s instead of strncpy */ + #pragma warning(disable: 4996) +#endif + +/* prevent multiple mutex initializations */ +static volatile int initRefCount = 0; + +/* Used to initialize state for wolfcrypt + return 0 on success + */ +int wolfCrypt_Init(void) +{ + int ret = 0; + + if (initRefCount == 0) { + WOLFSSL_ENTER("wolfCrypt_Init"); + + #ifdef WOLFSSL_FORCE_MALLOC_FAIL_TEST + { + word32 rngMallocFail; + time_t seed = time(NULL); + srand((word32)seed); + rngMallocFail = rand() % 2000; /* max 2000 */ + printf("\n--- RNG MALLOC FAIL AT %d---\n", rngMallocFail); + wolfSSL_SetMemFailCount(rngMallocFail); + } + #endif + + #ifdef WOLF_CRYPTO_CB + wc_CryptoCb_Init(); + #endif + + #ifdef WOLFSSL_ASYNC_CRYPT + ret = wolfAsync_HardwareStart(); + if (ret != 0) { + WOLFSSL_MSG("Async hardware start failed"); + /* don't return failure, allow operation to continue */ + } + #endif + + #if defined(WOLFSSL_RENESAS_TSIP_CRYPT) + ret = tsip_Open( ); + if( ret != TSIP_SUCCESS ) { + WOLFSSL_MSG("RENESAS TSIP Open failed"); + /* not return 1 since WOLFSSL_SUCCESS=1*/ + ret = -1;/* FATAL ERROR */ + return ret; + } + #endif + + #if defined(WOLFSSL_TRACK_MEMORY) && !defined(WOLFSSL_STATIC_MEMORY) + ret = InitMemoryTracker(); + if (ret != 0) { + WOLFSSL_MSG("InitMemoryTracker failed"); + return ret; + } + #endif + + #if WOLFSSL_CRYPT_HW_MUTEX + /* If crypto hardware mutex protection is enabled, then initialize it */ + ret = wolfSSL_CryptHwMutexInit(); + if (ret != 0) { + WOLFSSL_MSG("Hw crypt mutex init failed"); + return ret; + } + #endif + + /* if defined have fast RSA then initialize Intel IPP */ + #ifdef HAVE_FAST_RSA + WOLFSSL_MSG("Attempting to use optimized IPP Library"); + if ((ret = ippInit()) != ippStsNoErr) { + /* possible to get a CPU feature support status on optimized IPP + library but still use default library and see competitive speeds */ + WOLFSSL_MSG("Warning when trying to set up optimization"); + WOLFSSL_MSG(ippGetStatusString(ret)); + WOLFSSL_MSG("Using default fast IPP library"); + ret = 0; + (void)ret; /* suppress not read warning */ + } + #endif + + #if defined(FREESCALE_LTC_TFM) || defined(FREESCALE_LTC_ECC) + ret = ksdk_port_init(); + if (ret != 0) { + WOLFSSL_MSG("KSDK port init failed"); + return ret; + } + #endif + + #if defined(WOLFSSL_ATMEL) || defined(WOLFSSL_ATECC508A) + ret = atmel_init(); + if (ret != 0) { + WOLFSSL_MSG("CryptoAuthLib init failed"); + return ret; + } + #endif + #if defined(WOLFSSL_CRYPTOCELL) + /* enable and initialize the ARM CryptoCell 3xx runtime library */ + ret = cc310_Init(); + if (ret != 0) { + WOLFSSL_MSG("CRYPTOCELL init failed"); + return ret; + } + #endif + #if defined(WOLFSSL_STSAFEA100) + stsafe_interface_init(); + #endif + + #ifdef WOLFSSL_ARMASM + WOLFSSL_MSG("Using ARM hardware acceleration"); + #endif + + #ifdef WOLFSSL_AFALG + WOLFSSL_MSG("Using AF_ALG for crypto acceleration"); + #endif + + #if !defined(WOLFCRYPT_ONLY) && defined(OPENSSL_EXTRA) + wolfSSL_EVP_init(); + #endif + + #if defined(OPENSSL_EXTRA) || defined(DEBUG_WOLFSSL_VERBOSE) + if ((ret = wc_LoggingInit()) != 0) { + WOLFSSL_MSG("Error creating logging mutex"); + return ret; + } + #endif + +#ifdef HAVE_ECC + #ifdef ECC_CACHE_CURVE + if ((ret = wc_ecc_curve_cache_init()) != 0) { + WOLFSSL_MSG("Error creating curve cache"); + return ret; + } + #endif +#endif + +#ifdef WOLFSSL_SCE + ret = (int)WOLFSSL_SCE_GSCE_HANDLE.p_api->open( + WOLFSSL_SCE_GSCE_HANDLE.p_ctrl, WOLFSSL_SCE_GSCE_HANDLE.p_cfg); + if (ret == SSP_ERR_CRYPTO_SCE_ALREADY_OPEN) { + WOLFSSL_MSG("SCE already open"); + ret = 0; + } + if (ret != SSP_SUCCESS) { + WOLFSSL_MSG("Error opening SCE"); + return -1; /* FATAL_ERROR */ + } +#endif + +#if defined(WOLFSSL_IMX6_CAAM) || defined(WOLFSSL_IMX6_CAAM_RNG) || \ + defined(WOLFSSL_IMX6_CAAM_BLOB) + if ((ret = wc_caamInit()) != 0) { + return ret; + } +#endif + +#if defined(WOLFSSL_DSP) && !defined(WOLFSSL_DSP_BUILD) + if ((ret = wolfSSL_InitHandle()) != 0) { + return ret; + } + rpcmem_init(); +#endif + } + initRefCount++; + + return ret; +} + + +/* return success value is the same as wolfCrypt_Init */ +int wolfCrypt_Cleanup(void) +{ + int ret = 0; + + initRefCount--; + if (initRefCount < 0) + initRefCount = 0; + + if (initRefCount == 0) { + WOLFSSL_ENTER("wolfCrypt_Cleanup"); + +#ifdef HAVE_ECC + #ifdef FP_ECC + wc_ecc_fp_free(); + #endif + #ifdef ECC_CACHE_CURVE + wc_ecc_curve_cache_free(); + #endif +#endif /* HAVE_ECC */ + + #if defined(OPENSSL_EXTRA) || defined(DEBUG_WOLFSSL_VERBOSE) + ret = wc_LoggingCleanup(); + #endif + + #if defined(WOLFSSL_TRACK_MEMORY) && !defined(WOLFSSL_STATIC_MEMORY) + ShowMemoryTracker(); + #endif + + #ifdef WOLFSSL_ASYNC_CRYPT + wolfAsync_HardwareStop(); + #endif + #ifdef WOLFSSL_SCE + WOLFSSL_SCE_GSCE_HANDLE.p_api->close(WOLFSSL_SCE_GSCE_HANDLE.p_ctrl); + #endif + #if defined(WOLFSSL_IMX6_CAAM) || defined(WOLFSSL_IMX6_CAAM_RNG) || \ + defined(WOLFSSL_IMX6_CAAM_BLOB) + wc_caamFree(); + #endif + #if defined(WOLFSSL_CRYPTOCELL) + cc310_Free(); + #endif + #if defined(WOLFSSL_RENESAS_TSIP_CRYPT) + tsip_Close(); + #endif + #if defined(WOLFSSL_DSP) && !defined(WOLFSSL_DSP_BUILD) + rpcmem_deinit(); + wolfSSL_CleanupHandle(); + #endif + } + + return ret; +} + +#if !defined(NO_FILESYSTEM) && !defined(NO_WOLFSSL_DIR) && \ + !defined(WOLFSSL_NUCLEUS) && !defined(WOLFSSL_NUCLEUS_1_2) + +/* File Handling Helpers */ +/* returns 0 if file found, WC_READDIR_NOFILE if no files or negative error */ +int wc_ReadDirFirst(ReadDirCtx* ctx, const char* path, char** name) +{ + int ret = WC_READDIR_NOFILE; /* default to no files found */ + int pathLen = 0; + int dnameLen = 0; + + if (name) + *name = NULL; + + if (ctx == NULL || path == NULL) { + return BAD_FUNC_ARG; + } + + XMEMSET(ctx->name, 0, MAX_FILENAME_SZ); + pathLen = (int)XSTRLEN(path); + +#ifdef USE_WINDOWS_API + if (pathLen > MAX_FILENAME_SZ - 3) + return BAD_PATH_ERROR; + + XSTRNCPY(ctx->name, path, MAX_FILENAME_SZ - 3); + XSTRNCPY(ctx->name + pathLen, "\\*", MAX_FILENAME_SZ - pathLen); + + ctx->hFind = FindFirstFileA(ctx->name, &ctx->FindFileData); + if (ctx->hFind == INVALID_HANDLE_VALUE) { + WOLFSSL_MSG("FindFirstFile for path verify locations failed"); + return BAD_PATH_ERROR; + } + + do { + if (!(ctx->FindFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) { + dnameLen = (int)XSTRLEN(ctx->FindFileData.cFileName); + + if (pathLen + dnameLen + 2 > MAX_FILENAME_SZ) { + return BAD_PATH_ERROR; + } + XSTRNCPY(ctx->name, path, pathLen + 1); + ctx->name[pathLen] = '\\'; + XSTRNCPY(ctx->name + pathLen + 1, + ctx->FindFileData.cFileName, + MAX_FILENAME_SZ - pathLen - 1); + if (name) + *name = ctx->name; + return 0; + } + } while (FindNextFileA(ctx->hFind, &ctx->FindFileData)); +#elif defined(WOLFSSL_ZEPHYR) + if (fs_opendir(&ctx->dir, path) != 0) { + WOLFSSL_MSG("opendir path verify locations failed"); + return BAD_PATH_ERROR; + } + ctx->dirp = &ctx->dir; + + while ((fs_readdir(&ctx->dir, &ctx->entry)) != 0) { + dnameLen = (int)XSTRLEN(ctx->entry.name); + + if (pathLen + dnameLen + 2 >= MAX_FILENAME_SZ) { + ret = BAD_PATH_ERROR; + break; + } + XSTRNCPY(ctx->name, path, pathLen + 1); + ctx->name[pathLen] = '/'; + + /* Use dnameLen + 1 for GCC 8 warnings of truncating d_name. Because + * of earlier check it is known that dnameLen is less than + * MAX_FILENAME_SZ - (pathLen + 2) so dnameLen +1 will fit */ + XSTRNCPY(ctx->name + pathLen + 1, ctx->entry.name, dnameLen + 1); + if (fs_stat(ctx->name, &ctx->s) != 0) { + WOLFSSL_MSG("stat on name failed"); + ret = BAD_PATH_ERROR; + break; + } else if (ctx->s.type == FS_DIR_ENTRY_FILE) { + if (name) + *name = ctx->name; + return 0; + } + } +#elif defined(WOLFSSL_TELIT_M2MB) + ctx->dir = m2mb_fs_opendir((const CHAR*)path); + if (ctx->dir == NULL) { + WOLFSSL_MSG("opendir path verify locations failed"); + return BAD_PATH_ERROR; + } + + while ((ctx->entry = m2mb_fs_readdir(ctx->dir)) != NULL) { + dnameLen = (int)XSTRLEN(ctx->entry->d_name); + + if (pathLen + dnameLen + 2 >= MAX_FILENAME_SZ) { + ret = BAD_PATH_ERROR; + break; + } + XSTRNCPY(ctx->name, path, pathLen + 1); + ctx->name[pathLen] = '/'; + + /* Use dnameLen + 1 for GCC 8 warnings of truncating d_name. Because + * of earlier check it is known that dnameLen is less than + * MAX_FILENAME_SZ - (pathLen + 2) so dnameLen +1 will fit */ + XSTRNCPY(ctx->name + pathLen + 1, ctx->entry->d_name, dnameLen + 1); + + if (m2mb_fs_stat(ctx->name, &ctx->s) != 0) { + WOLFSSL_MSG("stat on name failed"); + ret = BAD_PATH_ERROR; + break; + } + else if (ctx->s.st_mode & M2MB_S_IFREG) { + if (name) + *name = ctx->name; + return 0; + } + } +#else + ctx->dir = opendir(path); + if (ctx->dir == NULL) { + WOLFSSL_MSG("opendir path verify locations failed"); + return BAD_PATH_ERROR; + } + + while ((ctx->entry = readdir(ctx->dir)) != NULL) { + dnameLen = (int)XSTRLEN(ctx->entry->d_name); + + if (pathLen + dnameLen + 2 >= MAX_FILENAME_SZ) { + ret = BAD_PATH_ERROR; + break; + } + XSTRNCPY(ctx->name, path, pathLen + 1); + ctx->name[pathLen] = '/'; + + /* Use dnameLen + 1 for GCC 8 warnings of truncating d_name. Because + * of earlier check it is known that dnameLen is less than + * MAX_FILENAME_SZ - (pathLen + 2) so dnameLen +1 will fit */ + XSTRNCPY(ctx->name + pathLen + 1, ctx->entry->d_name, dnameLen + 1); + if (stat(ctx->name, &ctx->s) != 0) { + WOLFSSL_MSG("stat on name failed"); + ret = BAD_PATH_ERROR; + break; + } else if (S_ISREG(ctx->s.st_mode)) { + if (name) + *name = ctx->name; + return 0; + } + } +#endif + wc_ReadDirClose(ctx); + + return ret; +} + +/* returns 0 if file found, WC_READDIR_NOFILE if no more files */ +int wc_ReadDirNext(ReadDirCtx* ctx, const char* path, char** name) +{ + int ret = WC_READDIR_NOFILE; /* default to no file found */ + int pathLen = 0; + int dnameLen = 0; + + if (name) + *name = NULL; + + if (ctx == NULL || path == NULL) { + return BAD_FUNC_ARG; + } + + XMEMSET(ctx->name, 0, MAX_FILENAME_SZ); + pathLen = (int)XSTRLEN(path); + +#ifdef USE_WINDOWS_API + while (FindNextFileA(ctx->hFind, &ctx->FindFileData)) { + if (!(ctx->FindFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) { + dnameLen = (int)XSTRLEN(ctx->FindFileData.cFileName); + + if (pathLen + dnameLen + 2 > MAX_FILENAME_SZ) { + return BAD_PATH_ERROR; + } + XSTRNCPY(ctx->name, path, pathLen + 1); + ctx->name[pathLen] = '\\'; + XSTRNCPY(ctx->name + pathLen + 1, + ctx->FindFileData.cFileName, + MAX_FILENAME_SZ - pathLen - 1); + if (name) + *name = ctx->name; + return 0; + } + } +#elif defined(WOLFSSL_ZEPHYR) + while ((fs_readdir(&ctx->dir, &ctx->entry)) != 0) { + dnameLen = (int)XSTRLEN(ctx->entry.name); + + if (pathLen + dnameLen + 2 >= MAX_FILENAME_SZ) { + ret = BAD_PATH_ERROR; + break; + } + XSTRNCPY(ctx->name, path, pathLen + 1); + ctx->name[pathLen] = '/'; + /* Use dnameLen + 1 for GCC 8 warnings of truncating d_name. Because + * of earlier check it is known that dnameLen is less than + * MAX_FILENAME_SZ - (pathLen + 2) so that dnameLen +1 will fit */ + XSTRNCPY(ctx->name + pathLen + 1, ctx->entry.name, dnameLen + 1); + + if (fs_stat(ctx->name, &ctx->s) != 0) { + WOLFSSL_MSG("stat on name failed"); + ret = BAD_PATH_ERROR; + break; + } else if (ctx->s.type == FS_DIR_ENTRY_FILE) { + if (name) + *name = ctx->name; + return 0; + } + } +#elif defined(WOLFSSL_TELIT_M2MB) + while ((ctx->entry = m2mb_fs_readdir(ctx->dir)) != NULL) { + dnameLen = (int)XSTRLEN(ctx->entry->d_name); + + if (pathLen + dnameLen + 2 >= MAX_FILENAME_SZ) { + ret = BAD_PATH_ERROR; + break; + } + XSTRNCPY(ctx->name, path, pathLen + 1); + ctx->name[pathLen] = '/'; + + /* Use dnameLen + 1 for GCC 8 warnings of truncating d_name. Because + * of earlier check it is known that dnameLen is less than + * MAX_FILENAME_SZ - (pathLen + 2) so dnameLen +1 will fit */ + XSTRNCPY(ctx->name + pathLen + 1, ctx->entry->d_name, dnameLen + 1); + + if (m2mb_fs_stat(ctx->name, &ctx->s) != 0) { + WOLFSSL_MSG("stat on name failed"); + ret = BAD_PATH_ERROR; + break; + } + else if (ctx->s.st_mode & M2MB_S_IFREG) { + if (name) + *name = ctx->name; + return 0; + } + } +#else + while ((ctx->entry = readdir(ctx->dir)) != NULL) { + dnameLen = (int)XSTRLEN(ctx->entry->d_name); + + if (pathLen + dnameLen + 2 >= MAX_FILENAME_SZ) { + ret = BAD_PATH_ERROR; + break; + } + XSTRNCPY(ctx->name, path, pathLen + 1); + ctx->name[pathLen] = '/'; + /* Use dnameLen + 1 for GCC 8 warnings of truncating d_name. Because + * of earlier check it is known that dnameLen is less than + * MAX_FILENAME_SZ - (pathLen + 2) so that dnameLen +1 will fit */ + XSTRNCPY(ctx->name + pathLen + 1, ctx->entry->d_name, dnameLen + 1); + + if (stat(ctx->name, &ctx->s) != 0) { + WOLFSSL_MSG("stat on name failed"); + ret = BAD_PATH_ERROR; + break; + } else if (S_ISREG(ctx->s.st_mode)) { + if (name) + *name = ctx->name; + return 0; + } + } +#endif + + wc_ReadDirClose(ctx); + + return ret; +} + +void wc_ReadDirClose(ReadDirCtx* ctx) +{ + if (ctx == NULL) { + return; + } + +#ifdef USE_WINDOWS_API + if (ctx->hFind != INVALID_HANDLE_VALUE) { + FindClose(ctx->hFind); + ctx->hFind = INVALID_HANDLE_VALUE; + } +#elif defined(WOLFSSL_ZEPHYR) + if (ctx->dirp) { + fs_closedir(ctx->dirp); + ctx->dirp = NULL; + } +#elif defined(WOLFSSL_TELIT_M2MB) + if (ctx->dir) { + m2mb_fs_closedir(ctx->dir); + ctx->dir = NULL; + } +#else + if (ctx->dir) { + closedir(ctx->dir); + ctx->dir = NULL; + } +#endif +} + +#endif /* !NO_FILESYSTEM && !NO_WOLFSSL_DIR */ + +#if !defined(NO_FILESYSTEM) && defined(WOLFSSL_ZEPHYR) +XFILE z_fs_open(const char* filename, const char* perm) +{ + XFILE file; + + file = XMALLOC(sizeof(*file), NULL, DYNAMIC_TYPE_FILE); + if (file != NULL) { + if (fs_open(file, filename) != 0) { + XFREE(file, NULL, DYNAMIC_TYPE_FILE); + file = NULL; + } + } + + return file; +} + +int z_fs_close(XFILE file) +{ + int ret; + + if (file == NULL) + return -1; + ret = (fs_close(file) == 0) ? 0 : -1; + + XFREE(file, NULL, DYNAMIC_TYPE_FILE); + + return ret; +} + +#endif /* !NO_FILESYSTEM && !WOLFSSL_ZEPHYR */ + + +wolfSSL_Mutex* wc_InitAndAllocMutex(void) +{ + wolfSSL_Mutex* m = (wolfSSL_Mutex*) XMALLOC(sizeof(wolfSSL_Mutex), NULL, + DYNAMIC_TYPE_MUTEX); + if (m != NULL) { + if (wc_InitMutex(m) != 0) { + WOLFSSL_MSG("Init Mutex failed"); + XFREE(m, NULL, DYNAMIC_TYPE_MUTEX); + m = NULL; + } + } + else { + WOLFSSL_MSG("Memory error with Mutex allocation"); + } + + return m; +} + +#ifdef USE_WOLF_STRTOK +/* String token (delim) search. If str is null use nextp. */ +char* wc_strtok(char *str, const char *delim, char **nextp) +{ + char* ret; + int i, j; + + /* Use next if str is NULL */ + if (str == NULL && nextp) + str = *nextp; + + /* verify str input */ + if (str == NULL || *str == '\0') + return NULL; + + /* match on entire delim */ + for (i = 0; str[i]; i++) { + for (j = 0; delim[j]; j++) { + if (delim[j] == str[i]) + break; + } + if (!delim[j]) + break; + } + str += i; + /* if end of string, not found so return NULL */ + if (*str == '\0') + return NULL; + + ret = str; + + /* match on first delim */ + for (i = 0; str[i]; i++) { + for (j = 0; delim[j]; j++) { + if (delim[j] == str[i]) + break; + } + if (delim[j] == str[i]) + break; + } + str += i; + + /* null terminate found string */ + if (*str) + *str++ = '\0'; + + /* return pointer to next */ + if (nextp) + *nextp = str; + + return ret; +} +#endif /* USE_WOLF_STRTOK */ + +#ifdef USE_WOLF_STRSEP +char* wc_strsep(char **stringp, const char *delim) +{ + char *s, *tok; + const char *spanp; + + /* null check */ + if (stringp == NULL || *stringp == NULL) + return NULL; + + s = *stringp; + for (tok = s; *tok; ++tok) { + for (spanp = delim; *spanp; ++spanp) { + /* found delimiter */ + if (*tok == *spanp) { + *tok = '\0'; /* replace delim with null term */ + *stringp = tok + 1; /* return past delim */ + return s; + } + } + } + + *stringp = NULL; + return s; +} +#endif /* USE_WOLF_STRSEP */ + +#if WOLFSSL_CRYPT_HW_MUTEX +/* Mutex for protection of cryptography hardware */ +static wolfSSL_Mutex wcCryptHwMutex; +static int wcCryptHwMutexInit = 0; + +int wolfSSL_CryptHwMutexInit(void) { + int ret = 0; + if(wcCryptHwMutexInit == 0) { + ret = wc_InitMutex(&wcCryptHwMutex); + if(ret == 0) { + wcCryptHwMutexInit = 1; + } + } + return ret; +} + +int wolfSSL_CryptHwMutexLock(void) { + int ret = BAD_MUTEX_E; + + /* Make sure HW Mutex has been initialized */ + wolfSSL_CryptHwMutexInit(); + + if(wcCryptHwMutexInit) { + ret = wc_LockMutex(&wcCryptHwMutex); + } + return ret; +} + +int wolfSSL_CryptHwMutexUnLock(void) { + int ret = BAD_MUTEX_E; + + if(wcCryptHwMutexInit) { + ret = wc_UnLockMutex(&wcCryptHwMutex); + } + return ret; +} +#endif /* WOLFSSL_CRYPT_HW_MUTEX */ + + +/* ---------------------------------------------------------------------------*/ +/* Mutex Ports */ +/* ---------------------------------------------------------------------------*/ +#if defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER) + static mutex_cb* compat_mutex_cb = NULL; + + /* Function that locks or unlocks a mutex based on the flag passed in. + * + * flag lock or unlock i.e. CRYPTO_LOCK + * type the type of lock to unlock or lock + * file name of the file calling + * line the line number from file calling + */ + int wc_LockMutex_ex(int flag, int type, const char* file, int line) + { + if (compat_mutex_cb != NULL) { + compat_mutex_cb(flag, type, file, line); + return 0; + } + else { + WOLFSSL_MSG("Mutex call back function not set. Call wc_SetMutexCb"); + return BAD_STATE_E; + } + } + + + /* Set the callback function to use for locking/unlocking mutex + * + * cb callback function to use + */ + int wc_SetMutexCb(mutex_cb* cb) + { + compat_mutex_cb = cb; + return 0; + } +#endif /* defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER) */ +#ifdef SINGLE_THREADED + + int wc_InitMutex(wolfSSL_Mutex* m) + { + (void)m; + return 0; + } + + int wc_FreeMutex(wolfSSL_Mutex *m) + { + (void)m; + return 0; + } + + + int wc_LockMutex(wolfSSL_Mutex *m) + { + (void)m; + return 0; + } + + + int wc_UnLockMutex(wolfSSL_Mutex *m) + { + (void)m; + return 0; + } + +#elif defined(FREERTOS) || defined(FREERTOS_TCP) || \ + defined(FREESCALE_FREE_RTOS) + + int wc_InitMutex(wolfSSL_Mutex* m) + { + int iReturn; + + *m = ( wolfSSL_Mutex ) xSemaphoreCreateMutex(); + if( *m != NULL ) + iReturn = 0; + else + iReturn = BAD_MUTEX_E; + + return iReturn; + } + + int wc_FreeMutex(wolfSSL_Mutex* m) + { + vSemaphoreDelete( *m ); + return 0; + } + + int wc_LockMutex(wolfSSL_Mutex* m) + { + /* Assume an infinite block, or should there be zero block? */ + xSemaphoreTake( *m, portMAX_DELAY ); + return 0; + } + + int wc_UnLockMutex(wolfSSL_Mutex* m) + { + xSemaphoreGive( *m ); + return 0; + } + +#elif defined(WOLFSSL_SAFERTOS) + + int wc_InitMutex(wolfSSL_Mutex* m) + { + vSemaphoreCreateBinary(m->mutexBuffer, m->mutex); + if (m->mutex == NULL) + return BAD_MUTEX_E; + + return 0; + } + + int wc_FreeMutex(wolfSSL_Mutex* m) + { + (void)m; + return 0; + } + + int wc_LockMutex(wolfSSL_Mutex* m) + { + /* Assume an infinite block */ + xSemaphoreTake(m->mutex, portMAX_DELAY); + return 0; + } + + int wc_UnLockMutex(wolfSSL_Mutex* m) + { + xSemaphoreGive(m->mutex); + return 0; + } + +#elif defined(USE_WINDOWS_API) + + int wc_InitMutex(wolfSSL_Mutex* m) + { + InitializeCriticalSection(m); + return 0; + } + + + int wc_FreeMutex(wolfSSL_Mutex* m) + { + DeleteCriticalSection(m); + return 0; + } + + + int wc_LockMutex(wolfSSL_Mutex* m) + { + EnterCriticalSection(m); + return 0; + } + + + int wc_UnLockMutex(wolfSSL_Mutex* m) + { + LeaveCriticalSection(m); + return 0; + } + +#elif defined(WOLFSSL_PTHREADS) + + int wc_InitMutex(wolfSSL_Mutex* m) + { + if (pthread_mutex_init(m, 0) == 0) + return 0; + else + return BAD_MUTEX_E; + } + + + int wc_FreeMutex(wolfSSL_Mutex* m) + { + if (pthread_mutex_destroy(m) == 0) + return 0; + else + return BAD_MUTEX_E; + } + + + int wc_LockMutex(wolfSSL_Mutex* m) + { + if (pthread_mutex_lock(m) == 0) + return 0; + else + return BAD_MUTEX_E; + } + + + int wc_UnLockMutex(wolfSSL_Mutex* m) + { + if (pthread_mutex_unlock(m) == 0) + return 0; + else + return BAD_MUTEX_E; + } + +#elif defined(WOLFSSL_VXWORKS) + + int wc_InitMutex(wolfSSL_Mutex* m) + { + if (m) { + if ((*m = semMCreate(0)) != SEM_ID_NULL) + return 0; + } + return BAD_MUTEX_E; + } + + + int wc_FreeMutex(wolfSSL_Mutex* m) + { + if (m) { + if (semDelete(*m) == OK) + return 0; + } + return BAD_MUTEX_E; + } + + + int wc_LockMutex(wolfSSL_Mutex* m) + { + if (m) { + if (semTake(*m, WAIT_FOREVER) == OK) + return 0; + } + return BAD_MUTEX_E; + } + + + int wc_UnLockMutex(wolfSSL_Mutex* m) + { + if (m) { + if (semGive(*m) == OK) + return 0; + } + return BAD_MUTEX_E; + } + +#elif defined(THREADX) + + int wc_InitMutex(wolfSSL_Mutex* m) + { + if (tx_mutex_create(m, "wolfSSL Mutex", TX_NO_INHERIT) == 0) + return 0; + else + return BAD_MUTEX_E; + } + + + int wc_FreeMutex(wolfSSL_Mutex* m) + { + if (tx_mutex_delete(m) == 0) + return 0; + else + return BAD_MUTEX_E; + } + + + int wc_LockMutex(wolfSSL_Mutex* m) + { + if (tx_mutex_get(m, TX_WAIT_FOREVER) == 0) + return 0; + else + return BAD_MUTEX_E; + } + + int wc_UnLockMutex(wolfSSL_Mutex* m) + { + if (tx_mutex_put(m) == 0) + return 0; + else + return BAD_MUTEX_E; + } + +#elif defined(WOLFSSL_DEOS) + + int wc_InitMutex(wolfSSL_Mutex* m) + { + mutexStatus mutStat; + /* + The empty string "" denotes an anonymous mutex, so objects do not cause name collisions. + `protectWolfSSLTemp` in an XML configuration element template describing a mutex. + */ + if (m) { + mutStat = createMutex("", "protectWolfSSLTemp", m); + if (mutStat == mutexSuccess) + return 0; + else{ + WOLFSSL_MSG("wc_InitMutex failed"); + return mutStat; + } + } + return BAD_MUTEX_E; + } + + int wc_FreeMutex(wolfSSL_Mutex* m) + { + mutexStatus mutStat; + if (m) { + mutStat = deleteMutex(*m); + if (mutStat == mutexSuccess) + return 0; + else{ + WOLFSSL_MSG("wc_FreeMutex failed"); + return mutStat; + } + } + return BAD_MUTEX_E; + } + + int wc_LockMutex(wolfSSL_Mutex* m) + { + mutexStatus mutStat; + if (m) { + mutStat = lockMutex(*m); + if (mutStat == mutexSuccess) + return 0; + else{ + WOLFSSL_MSG("wc_LockMutex failed"); + return mutStat; + } + } + return BAD_MUTEX_E; + } + + int wc_UnLockMutex(wolfSSL_Mutex* m) + { + mutexStatus mutStat; + if (m) { + mutStat = unlockMutex(*m); + if (mutStat== mutexSuccess) + return 0; + else{ + WOLFSSL_MSG("wc_UnLockMutex failed"); + return mutStat; + } + } + return BAD_MUTEX_E; + } + +#elif defined(MICRIUM) + + int wc_InitMutex(wolfSSL_Mutex* m) + { + OS_ERR err; + + OSMutexCreate(m, "wolfSSL Mutex", &err); + + if (err == OS_ERR_NONE) + return 0; + else + return BAD_MUTEX_E; + } + + int wc_FreeMutex(wolfSSL_Mutex* m) + { + #if (OS_CFG_MUTEX_DEL_EN == DEF_ENABLED) + OS_ERR err; + + OSMutexDel(m, OS_OPT_DEL_ALWAYS, &err); + + if (err == OS_ERR_NONE) + return 0; + else + return BAD_MUTEX_E; + #else + return 0; + #endif + } + + int wc_LockMutex(wolfSSL_Mutex* m) + { + OS_ERR err; + + OSMutexPend(m, 0, OS_OPT_PEND_BLOCKING, NULL, &err); + + if (err == OS_ERR_NONE) + return 0; + else + return BAD_MUTEX_E; + } + + int wc_UnLockMutex(wolfSSL_Mutex* m) + { + OS_ERR err; + + OSMutexPost(m, OS_OPT_POST_NONE, &err); + + if (err == OS_ERR_NONE) + return 0; + else + return BAD_MUTEX_E; + } + +#elif defined(EBSNET) + + int wc_InitMutex(wolfSSL_Mutex* m) + { + if (rtp_sig_mutex_alloc(m, "wolfSSL Mutex") == -1) + return BAD_MUTEX_E; + else + return 0; + } + + int wc_FreeMutex(wolfSSL_Mutex* m) + { + rtp_sig_mutex_free(*m); + return 0; + } + + int wc_LockMutex(wolfSSL_Mutex* m) + { + if (rtp_sig_mutex_claim_timed(*m, RTIP_INF) == 0) + return 0; + else + return BAD_MUTEX_E; + } + + int wc_UnLockMutex(wolfSSL_Mutex* m) + { + rtp_sig_mutex_release(*m); + return 0; + } + + int ebsnet_fseek(int a, long b, int c) + { + int retval; + + retval = vf_lseek(a, b, c); + if (retval > 0) + retval = 0; + else + retval = -1; + + return(retval); + } + +#elif defined(FREESCALE_MQX) || defined(FREESCALE_KSDK_MQX) + + int wc_InitMutex(wolfSSL_Mutex* m) + { + if (_mutex_init(m, NULL) == MQX_EOK) + return 0; + else + return BAD_MUTEX_E; + } + + int wc_FreeMutex(wolfSSL_Mutex* m) + { + if (_mutex_destroy(m) == MQX_EOK) + return 0; + else + return BAD_MUTEX_E; + } + + int wc_LockMutex(wolfSSL_Mutex* m) + { + if (_mutex_lock(m) == MQX_EOK) + return 0; + else + return BAD_MUTEX_E; + } + + int wc_UnLockMutex(wolfSSL_Mutex* m) + { + if (_mutex_unlock(m) == MQX_EOK) + return 0; + else + return BAD_MUTEX_E; + } + +#elif defined(WOLFSSL_TIRTOS) + #include + + int wc_InitMutex(wolfSSL_Mutex* m) + { + Semaphore_Params params; + Error_Block eb; + + Error_init(&eb); + Semaphore_Params_init(¶ms); + params.mode = Semaphore_Mode_BINARY; + + *m = Semaphore_create(1, ¶ms, &eb); + if (Error_check(&eb)) { + Error_raise(&eb, Error_E_generic, "Failed to Create the semaphore.", + NULL); + return BAD_MUTEX_E; + } + else + return 0; + } + + int wc_FreeMutex(wolfSSL_Mutex* m) + { + Semaphore_delete(m); + + return 0; + } + + int wc_LockMutex(wolfSSL_Mutex* m) + { + Semaphore_pend(*m, BIOS_WAIT_FOREVER); + + return 0; + } + + int wc_UnLockMutex(wolfSSL_Mutex* m) + { + Semaphore_post(*m); + + return 0; + } + +#elif defined(WOLFSSL_uITRON4) + + int wc_InitMutex(wolfSSL_Mutex* m) + { + int iReturn; + m->sem.sematr = TA_TFIFO; + m->sem.isemcnt = 1; + m->sem.maxsem = 1; + m->sem.name = NULL; + + m->id = acre_sem(&m->sem); + if( m->id != E_OK ) + iReturn = 0; + else + iReturn = BAD_MUTEX_E; + + return iReturn; + } + + int wc_FreeMutex(wolfSSL_Mutex* m) + { + del_sem( m->id ); + return 0; + } + + int wc_LockMutex(wolfSSL_Mutex* m) + { + wai_sem(m->id); + return 0; + } + + int wc_UnLockMutex(wolfSSL_Mutex* m) + { + sig_sem(m->id); + return 0; + } + + /**** uITRON malloc/free ***/ + static ID ID_wolfssl_MPOOL = 0; + static T_CMPL wolfssl_MPOOL = {TA_TFIFO, 0, NULL, "wolfSSL_MPOOL"}; + + int uITRON4_minit(size_t poolsz) { + ER ercd; + wolfssl_MPOOL.mplsz = poolsz; + ercd = acre_mpl(&wolfssl_MPOOL); + if (ercd > 0) { + ID_wolfssl_MPOOL = ercd; + return 0; + } else { + return -1; + } + } + + void *uITRON4_malloc(size_t sz) { + ER ercd; + void *p = NULL; + ercd = get_mpl(ID_wolfssl_MPOOL, sz, (VP)&p); + if (ercd == E_OK) { + return p; + } else { + return 0; + } + } + + void *uITRON4_realloc(void *p, size_t sz) { + ER ercd; + void *newp; + if(p) { + ercd = get_mpl(ID_wolfssl_MPOOL, sz, (VP)&newp); + if (ercd == E_OK) { + XMEMCPY(newp, p, sz); + ercd = rel_mpl(ID_wolfssl_MPOOL, (VP)p); + if (ercd == E_OK) { + return newp; + } + } + } + return 0; + } + + void uITRON4_free(void *p) { + ER ercd; + ercd = rel_mpl(ID_wolfssl_MPOOL, (VP)p); + if (ercd == E_OK) { + return; + } else { + return; + } + } + +#elif defined(WOLFSSL_uTKERNEL2) + + int wc_InitMutex(wolfSSL_Mutex* m) + { + int iReturn; + m->sem.sematr = TA_TFIFO; + m->sem.isemcnt = 1; + m->sem.maxsem = 1; + + m->id = tk_cre_sem(&m->sem); + if( m->id != NULL ) + iReturn = 0; + else + iReturn = BAD_MUTEX_E; + + return iReturn; + } + + int wc_FreeMutex(wolfSSL_Mutex* m) + { + tk_del_sem(m->id); + return 0; + } + + int wc_LockMutex(wolfSSL_Mutex* m) + { + tk_wai_sem(m->id, 1, TMO_FEVR); + return 0; + } + + int wc_UnLockMutex(wolfSSL_Mutex* m) + { + tk_sig_sem(m->id, 1); + return 0; + } + + /**** uT-Kernel malloc/free ***/ + static ID ID_wolfssl_MPOOL = 0; + static T_CMPL wolfssl_MPOOL = { + NULL, /* Extended information */ + TA_TFIFO, /* Memory pool attribute */ + 0, /* Size of whole memory pool (byte) */ + "wolfSSL" /* Object name (max 8-char) */ + }; + + int uTKernel_init_mpool(unsigned int sz) { + ER ercd; + wolfssl_MPOOL.mplsz = sz; + ercd = tk_cre_mpl(&wolfssl_MPOOL); + if (ercd > 0) { + ID_wolfssl_MPOOL = ercd; + return 0; + } else { + return (int)ercd; + } + } + + void *uTKernel_malloc(unsigned int sz) { + ER ercd; + void *p = NULL; + ercd = tk_get_mpl(ID_wolfssl_MPOOL, sz, (VP)&p, TMO_FEVR); + if (ercd == E_OK) { + return p; + } else { + return 0; + } + } + + void *uTKernel_realloc(void *p, unsigned int sz) { + ER ercd; + void *newp; + if (p) { + ercd = tk_get_mpl(ID_wolfssl_MPOOL, sz, (VP)&newp, TMO_FEVR); + if (ercd == E_OK) { + XMEMCPY(newp, p, sz); + ercd = tk_rel_mpl(ID_wolfssl_MPOOL, (VP)p); + if (ercd == E_OK) { + return newp; + } + } + } + return 0; + } + + void uTKernel_free(void *p) { + ER ercd; + ercd = tk_rel_mpl(ID_wolfssl_MPOOL, (VP)p); + if (ercd == E_OK) { + return; + } else { + return; + } + } + +#elif defined (WOLFSSL_FROSTED) + + int wc_InitMutex(wolfSSL_Mutex* m) + { + *m = mutex_init(); + if (*m) + return 0; + else + return -1; + } + + int wc_FreeMutex(wolfSSL_Mutex* m) + { + mutex_destroy(*m); + return(0); + } + + int wc_LockMutex(wolfSSL_Mutex* m) + { + mutex_lock(*m); + return 0; + } + + int wc_UnLockMutex(wolfSSL_Mutex* m) + { + mutex_unlock(*m); + return 0; + } + +#elif defined(WOLFSSL_CMSIS_RTOS) + + #define CMSIS_NMUTEX 10 + osMutexDef(wolfSSL_mt0); osMutexDef(wolfSSL_mt1); osMutexDef(wolfSSL_mt2); + osMutexDef(wolfSSL_mt3); osMutexDef(wolfSSL_mt4); osMutexDef(wolfSSL_mt5); + osMutexDef(wolfSSL_mt6); osMutexDef(wolfSSL_mt7); osMutexDef(wolfSSL_mt8); + osMutexDef(wolfSSL_mt9); + + static const osMutexDef_t *CMSIS_mutex[] = { osMutex(wolfSSL_mt0), + osMutex(wolfSSL_mt1), osMutex(wolfSSL_mt2), osMutex(wolfSSL_mt3), + osMutex(wolfSSL_mt4), osMutex(wolfSSL_mt5), osMutex(wolfSSL_mt6), + osMutex(wolfSSL_mt7), osMutex(wolfSSL_mt8), osMutex(wolfSSL_mt9) }; + + static osMutexId CMSIS_mutexID[CMSIS_NMUTEX] = {0}; + + int wc_InitMutex(wolfSSL_Mutex* m) + { + int i; + for (i=0; itm_sec = (int) dayclock % 60; + ret->tm_min = (int)(dayclock % 3600) / 60; + ret->tm_hour = (int) dayclock / 3600; + ret->tm_wday = (int) (dayno + 4) % 7; /* day 0 a Thursday */ + + while(dayno >= (unsigned long)YEARSIZE(year)) { + dayno -= YEARSIZE(year); + year++; + } + + ret->tm_year = year - YEAR0; + ret->tm_yday = (int)dayno; + ret->tm_mon = 0; + + while(dayno >= (unsigned long)_ytab[LEAPYEAR(year)][ret->tm_mon]) { + dayno -= _ytab[LEAPYEAR(year)][ret->tm_mon]; + ret->tm_mon++; + } + + ret->tm_mday = (int)++dayno; + ret->tm_isdst = 0; + + return ret; +} +#endif /* WOLFSSL_GMTIME */ + + +#if defined(HAVE_RTP_SYS) +#define YEAR0 1900 + +struct tm* rtpsys_gmtime(const time_t* timer) /* has a gmtime() but hangs */ +{ + static struct tm st_time; + struct tm* ret = &st_time; + + DC_RTC_CALENDAR cal; + dc_rtc_time_get(&cal, TRUE); + + ret->tm_year = cal.year - YEAR0; /* gm starts at 1900 */ + ret->tm_mon = cal.month - 1; /* gm starts at 0 */ + ret->tm_mday = cal.day; + ret->tm_hour = cal.hour; + ret->tm_min = cal.minute; + ret->tm_sec = cal.second; + + return ret; +} + +#endif /* HAVE_RTP_SYS */ + + +#if defined(MICROCHIP_TCPIP_V5) || defined(MICROCHIP_TCPIP) + +/* + * time() is just a stub in Microchip libraries. We need our own + * implementation. Use SNTP client to get seconds since epoch. + */ +time_t pic32_time(time_t* timer) +{ +#ifdef MICROCHIP_TCPIP_V5 + DWORD sec = 0; +#else + uint32_t sec = 0; +#endif + time_t localTime; + + if (timer == NULL) + timer = &localTime; + +#ifdef MICROCHIP_MPLAB_HARMONY + sec = TCPIP_SNTP_UTCSecondsGet(); +#else + sec = SNTPGetUTCSeconds(); +#endif + *timer = (time_t) sec; + + return *timer; +} + +#endif /* MICROCHIP_TCPIP || MICROCHIP_TCPIP_V5 */ + +#if defined(WOLFSSL_DEOS) + +time_t deos_time(time_t* timer) +{ + const uint32_t systemTickTimeInHz = 1000000 / systemTickInMicroseconds(); + uint32_t *systemTickPtr = systemTickPointer(); + + if (timer != NULL) + *timer = *systemTickPtr/systemTickTimeInHz; + + #if defined(CURRENT_UNIX_TIMESTAMP) + /* CURRENT_UNIX_TIMESTAMP is seconds since Jan 01 1970. (UTC) */ + return (time_t) *systemTickPtr/systemTickTimeInHz + CURRENT_UNIX_TIMESTAMP; + #else + return (time_t) *systemTickPtr/systemTickTimeInHz; + #endif +} +#endif /* WOLFSSL_DEOS */ + +#if defined(MICRIUM) + +time_t micrium_time(time_t* timer) +{ + CLK_TS_SEC sec; + + Clk_GetTS_Unix(&sec); + + if (timer != NULL) + *timer = sec; + + return (time_t) sec; +} + +#endif /* MICRIUM */ + +#if defined(FREESCALE_MQX) || defined(FREESCALE_KSDK_MQX) + +time_t mqx_time(time_t* timer) +{ + time_t localTime; + TIME_STRUCT time_s; + + if (timer == NULL) + timer = &localTime; + + _time_get(&time_s); + *timer = (time_t) time_s.SECONDS; + + return *timer; +} + +#endif /* FREESCALE_MQX || FREESCALE_KSDK_MQX */ + + +#if defined(WOLFSSL_TIRTOS) && defined(USER_TIME) + +time_t XTIME(time_t * timer) +{ + time_t sec = 0; + + sec = (time_t) Seconds_get(); + + if (timer != NULL) + *timer = sec; + + return sec; +} + +#endif /* WOLFSSL_TIRTOS */ + +#if defined(WOLFSSL_XILINX) +#include "xrtcpsu.h" + +time_t XTIME(time_t * timer) +{ + time_t sec = 0; + XRtcPsu_Config* con; + XRtcPsu rtc; + + con = XRtcPsu_LookupConfig(XPAR_XRTCPSU_0_DEVICE_ID); + if (con != NULL) { + if (XRtcPsu_CfgInitialize(&rtc, con, con->BaseAddr) == XST_SUCCESS) { + sec = (time_t)XRtcPsu_GetCurrentTime(&rtc); + } + else { + WOLFSSL_MSG("Unable to initialize RTC"); + } + } + + if (timer != NULL) + *timer = sec; + + return sec; +} + +#endif /* WOLFSSL_XILINX */ + +#if defined(WOLFSSL_ZEPHYR) + +time_t z_time(time_t * timer) +{ + struct timespec ts; + + if (clock_gettime(CLOCK_REALTIME, &ts) == 0) + if (timer != NULL) + *timer = ts.tv_sec; + + return ts.tv_sec; +} + +#endif /* WOLFSSL_ZEPHYR */ + + +#if defined(WOLFSSL_WICED) + #ifndef WOLFSSL_WICED_PSEUDO_UNIX_EPOCH_TIME + #error Please define WOLFSSL_WICED_PSEUDO_UNIX_EPOCH_TIME at build time. + #endif /* WOLFSSL_WICED_PSEUDO_UNIX_EPOCH_TIME */ + +time_t wiced_pseudo_unix_epoch_time(time_t * timer) +{ + time_t epoch_time; + /* The time() function return uptime on WICED platform. */ + epoch_time = time(NULL) + WOLFSSL_WICED_PSEUDO_UNIX_EPOCH_TIME; + + if (timer != NULL) { + *timer = epoch_time; + } + return epoch_time; +} +#endif /* WOLFSSL_WICED */ + +#ifdef WOLFSSL_TELIT_M2MB + time_t m2mb_xtime(time_t * timer) + { + time_t myTime = 0; + INT32 fd = m2mb_rtc_open("/dev/rtc0", 0); + if (fd != -1) { + M2MB_RTC_TIMEVAL_T timeval; + + m2mb_rtc_ioctl(fd, M2MB_RTC_IOCTL_GET_TIMEVAL, &timeval); + + myTime = timeval.sec; + + m2mb_rtc_close(fd); + } + return myTime; + } + #ifdef WOLFSSL_TLS13 + time_t m2mb_xtime_ms(time_t * timer) + { + time_t myTime = 0; + INT32 fd = m2mb_rtc_open("/dev/rtc0", 0); + if (fd != -1) { + M2MB_RTC_TIMEVAL_T timeval; + + m2mb_rtc_ioctl(fd, M2MB_RTC_IOCTL_GET_TIMEVAL, &timeval); + + myTime = timeval.sec + timeval.msec; + + m2mb_rtc_close(fd); + } + return myTime; + } + #endif /* WOLFSSL_TLS13 */ + #ifndef NO_CRYPT_BENCHMARK + double m2mb_xtime_bench(int reset) + { + double myTime = 0; + INT32 fd = m2mb_rtc_open("/dev/rtc0", 0); + if (fd != -1) { + M2MB_RTC_TIMEVAL_T timeval; + + m2mb_rtc_ioctl(fd, M2MB_RTC_IOCTL_GET_TIMEVAL, &timeval); + + myTime = (double)timeval.sec + ((double)timeval.msec / 1000); + + m2mb_rtc_close(fd); + } + return myTime; + } + #endif /* !NO_CRYPT_BENCHMARK */ +#endif /* WOLFSSL_TELIT_M2MB */ + +#endif /* !NO_ASN_TIME */ + +#ifndef WOLFSSL_LEANPSK +char* mystrnstr(const char* s1, const char* s2, unsigned int n) +{ + unsigned int s2_len = (unsigned int)XSTRLEN(s2); + + if (s2_len == 0) + return (char*)s1; + + while (n >= s2_len && s1[0]) { + if (s1[0] == s2[0]) + if (XMEMCMP(s1, s2, s2_len) == 0) + return (char*)s1; + s1++; + n--; + } + + return NULL; +} +#endif + +/* custom memory wrappers */ +#ifdef WOLFSSL_NUCLEUS_1_2 + + /* system memory pool */ + extern NU_MEMORY_POOL System_Memory; + + void* nucleus_malloc(unsigned long size, void* heap, int type) + { + STATUS status; + void* stack_ptr; + + status = NU_Allocate_Memory(&System_Memory, &stack_ptr, size, + NU_NO_SUSPEND); + if (status == NU_SUCCESS) { + return 0; + } else { + return stack_ptr; + } + } + + void* nucleus_realloc(void* ptr, unsigned long size, void* heap, int type) + { + DM_HEADER* old_header; + word32 old_size, copy_size; + void* new_mem; + + /* if ptr is NULL, behave like malloc */ + new_mem = nucleus_malloc(size, NULL, 0); + if (new_mem == 0 || ptr == 0) { + return new_mem; + } + + /* calculate old memory block size */ + /* mem pointers stored in block headers (ref dm_defs.h) */ + old_header = (DM_HEADER*) ((byte*)ptr - DM_OVERHEAD); + old_size = (byte*)old_header->dm_next_memory - (byte*)ptr; + + /* copy old to new */ + if (old_size < size) { + copy_size = old_size; + } else { + copy_size = size; + } + XMEMCPY(new_mem, ptr, copy_size); + + /* free old */ + nucleus_free(ptr, NULL, 0); + + return new_mem; + } + + void nucleus_free(void* ptr, void* heap, int type) + { + if (ptr != NULL) + NU_Deallocate_Memory(ptr); + } + +#endif /* WOLFSSL_NUCLEUS_1_2 */ + +#if defined(WOLFSSL_TI_CRYPT) || defined(WOLFSSL_TI_HASH) + #include /* initialize and Mutex for TI Crypt Engine */ + #include /* md5, sha1, sha224, sha256 */ +#endif + +#if defined(WOLFSSL_CRYPTOCELL) + #define WOLFSSL_CRYPTOCELL_C + #include /* CC310, RTC and RNG */ + #if !defined(NO_SHA256) + #define WOLFSSL_CRYPTOCELL_HASH_C + #include /* sha256 */ + #endif +#endif diff --git a/client/wolfssl/wolfcrypt/src/wolfcrypt_first.c b/client/wolfssl/wolfcrypt/src/wolfcrypt_first.c new file mode 100644 index 0000000..e69de29 diff --git a/client/wolfssl/wolfcrypt/src/wolfcrypt_last.c b/client/wolfssl/wolfcrypt/src/wolfcrypt_last.c new file mode 100644 index 0000000..e69de29 diff --git a/client/wolfssl/wolfcrypt/src/wolfevent.c b/client/wolfssl/wolfcrypt/src/wolfevent.c new file mode 100644 index 0000000..20848cd --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/wolfevent.c @@ -0,0 +1,283 @@ +/* wolfevent.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + + +#ifdef HAVE_WOLF_EVENT + +#include +#include +#include + +#include + + +int wolfEvent_Init(WOLF_EVENT* event, WOLF_EVENT_TYPE type, void* context) +{ + if (event == NULL) { + return BAD_FUNC_ARG; + } + + if (event->state == WOLF_EVENT_STATE_PENDING) { + WOLFSSL_MSG("Event already pending!"); + return BAD_COND_E; + } + + XMEMSET(event, 0, sizeof(WOLF_EVENT)); + event->type = type; + event->context = context; + + return 0; +} + +int wolfEvent_Poll(WOLF_EVENT* event, WOLF_EVENT_FLAG flags) +{ + int ret = BAD_COND_E; + + /* Check hardware */ +#ifdef WOLFSSL_ASYNC_CRYPT + if (event->type >= WOLF_EVENT_TYPE_ASYNC_FIRST && + event->type <= WOLF_EVENT_TYPE_ASYNC_LAST) + { + ret = wolfAsync_EventPoll(event, flags); + } +#endif /* WOLFSSL_ASYNC_CRYPT */ + + return ret; +} + +int wolfEventQueue_Init(WOLF_EVENT_QUEUE* queue) +{ + int ret = 0; + + if (queue == NULL) { + return BAD_FUNC_ARG; + } + + XMEMSET(queue, 0, sizeof(WOLF_EVENT_QUEUE)); +#ifndef SINGLE_THREADED + ret = wc_InitMutex(&queue->lock); +#endif + return ret; +} + + +int wolfEventQueue_Push(WOLF_EVENT_QUEUE* queue, WOLF_EVENT* event) +{ + int ret; + + if (queue == NULL || event == NULL) { + return BAD_FUNC_ARG; + } + +#ifndef SINGLE_THREADED + if ((ret = wc_LockMutex(&queue->lock)) != 0) { + return ret; + } +#endif + + ret = wolfEventQueue_Add(queue, event); + +#ifndef SINGLE_THREADED + wc_UnLockMutex(&queue->lock); +#endif + + return ret; +} + +int wolfEventQueue_Pop(WOLF_EVENT_QUEUE* queue, WOLF_EVENT** event) +{ + int ret = 0; + + if (queue == NULL || event == NULL) { + return BAD_FUNC_ARG; + } + +#ifndef SINGLE_THREADED + /* In single threaded mode "event_queue.lock" doesn't exist */ + if ((ret = wc_LockMutex(&queue->lock)) != 0) { + return ret; + } +#endif + + /* Pop first item off queue */ + *event = queue->head; + ret = wolfEventQueue_Remove(queue, *event); + +#ifndef SINGLE_THREADED + wc_UnLockMutex(&queue->lock); +#endif + + return ret; +} + +/* assumes queue is locked by caller */ +int wolfEventQueue_Add(WOLF_EVENT_QUEUE* queue, WOLF_EVENT* event) +{ + if (queue == NULL || event == NULL) { + return BAD_FUNC_ARG; + } + + event->next = NULL; /* added to end */ + event->prev = NULL; + if (queue->tail == NULL) { + queue->head = event; + } + else { + queue->tail->next = event; + event->prev = queue->tail; + } + queue->tail = event; /* add to the end either way */ + queue->count++; + + return 0; +} + +/* assumes queue is locked by caller */ +int wolfEventQueue_Remove(WOLF_EVENT_QUEUE* queue, WOLF_EVENT* event) +{ + int ret = 0; + + if (queue == NULL || event == NULL) { + return BAD_FUNC_ARG; + } + + if (event == queue->head && event == queue->tail) { + queue->head = NULL; + queue->tail = NULL; + } + else if (event == queue->head) { + queue->head = event->next; + queue->head->prev = NULL; + } + else if (event == queue->tail) { + queue->tail = event->prev; + queue->tail->next = NULL; + } + else { + WOLF_EVENT* next = event->next; + WOLF_EVENT* prev = event->prev; + next->prev = prev; + prev->next = next; + } + queue->count--; + + return ret; +} + +int wolfEventQueue_Poll(WOLF_EVENT_QUEUE* queue, void* context_filter, + WOLF_EVENT** events, int maxEvents, WOLF_EVENT_FLAG flags, int* eventCount) +{ + WOLF_EVENT* event; + int ret = 0, count = 0; + + if (queue == NULL) { + return BAD_FUNC_ARG; + } + +#ifndef SINGLE_THREADED + /* In single threaded mode "event_queue.lock" doesn't exist */ + if ((ret = wc_LockMutex(&queue->lock)) != 0) { + return ret; + } +#endif + + /* itterate event queue */ + for (event = queue->head; event != NULL; event = event->next) + { + /* optional filter based on context */ + if (context_filter == NULL || event->context == context_filter) { + + /* poll event */ + ret = wolfEvent_Poll(event, flags); + if (ret < 0) break; /* exit for */ + + /* If event is done then process */ + if (event->state == WOLF_EVENT_STATE_DONE) { + /* remove from queue */ + ret = wolfEventQueue_Remove(queue, event); + if (ret < 0) break; /* exit for */ + + /* return pointer in 'events' arg */ + if (events) { + events[count] = event; /* return pointer */ + } + count++; + + /* check to make sure our event list isn't full */ + if (events && count >= maxEvents) { + break; /* exit for */ + } + } + } + } + +#ifndef SINGLE_THREADED + wc_UnLockMutex(&queue->lock); +#endif + + /* return number of properly populated events */ + if (eventCount) { + *eventCount = count; + } + + return ret; +} + +int wolfEventQueue_Count(WOLF_EVENT_QUEUE* queue) +{ + int ret; + + if (queue == NULL) { + return BAD_FUNC_ARG; + } + +#ifndef SINGLE_THREADED + /* In single threaded mode "event_queue.lock" doesn't exist */ + if ((ret = wc_LockMutex(&queue->lock)) != 0) { + return ret; + } +#endif + + ret = queue->count; + +#ifndef SINGLE_THREADED + wc_UnLockMutex(&queue->lock); +#endif + + return ret; +} + +void wolfEventQueue_Free(WOLF_EVENT_QUEUE* queue) +{ + if (queue) { + #ifndef SINGLE_THREADED + wc_FreeMutex(&queue->lock); + #endif + } +} + +#endif /* HAVE_WOLF_EVENT */ diff --git a/client/wolfssl/wolfcrypt/src/wolfmath.c b/client/wolfssl/wolfcrypt/src/wolfmath.c new file mode 100644 index 0000000..0c17a0a --- /dev/null +++ b/client/wolfssl/wolfcrypt/src/wolfmath.c @@ -0,0 +1,381 @@ +/* wolfmath.c + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +/* common functions for either math library */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +/* in case user set USE_FAST_MATH there */ +#include + +#include + +#include +#include + +#if defined(USE_FAST_MATH) || !defined(NO_BIG_INT) + +#ifdef WOLFSSL_ASYNC_CRYPT + #include +#endif + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + + +#if !defined(WC_NO_CACHE_RESISTANT) && \ + ((defined(HAVE_ECC) && defined(ECC_TIMING_RESISTANT)) || \ + (defined(USE_FAST_MATH) && defined(TFM_TIMING_RESISTANT))) + + /* all off / all on pointer addresses for constant calculations */ + /* ecc.c uses same table */ + const wolfssl_word wc_off_on_addr[2] = + { + #if defined(WC_64BIT_CPU) + W64LIT(0x0000000000000000), + W64LIT(0xffffffffffffffff) + #elif defined(WC_16BIT_CPU) + 0x0000U, + 0xffffU + #else + /* 32 bit */ + 0x00000000U, + 0xffffffffU + #endif + }; +#endif + + +#if !defined(WOLFSSL_SP_MATH) +int get_digit_count(mp_int* a) +{ + if (a == NULL) + return 0; + + return a->used; +} +#endif + +mp_digit get_digit(mp_int* a, int n) +{ + if (a == NULL) + return 0; + + return (n >= a->used || n < 0) ? 0 : a->dp[n]; +} + +/* Conditionally copy a into b. Performed in constant time. + * + * a MP integer to copy. + * copy On 1, copy a into b. on 0 leave b unchanged. + * b MP integer to copy into. + * returns BAD_FUNC_ARG when a or b is NULL, MEMORY_E when growing b fails and + * MP_OKAY otherwise. + */ +int mp_cond_copy(mp_int* a, int copy, mp_int* b) +{ + int err = MP_OKAY; + int i; + mp_digit mask = (mp_digit)0 - copy; + + if (a == NULL || b == NULL) + err = BAD_FUNC_ARG; + + /* Ensure b has enough space to copy a into */ + if (err == MP_OKAY) + err = mp_grow(b, a->used + 1); + if (err == MP_OKAY) { + /* When mask 0, b is unchanged2 + * When mask all set, b ^ b ^ a = a + */ + /* Conditionaly copy all digits and then number of used diigits. + * get_digit() returns 0 when index greater than available digit. + */ + for (i = 0; i < a->used; i++) { + b->dp[i] ^= (get_digit(a, i) ^ get_digit(b, i)) & mask; + } + for (; i < b->used; i++) { + b->dp[i] ^= (get_digit(a, i) ^ get_digit(b, i)) & mask; + } + b->used ^= (a->used ^ b->used) & (int)mask; + } + + return err; +} + +#ifndef WC_NO_RNG +int get_rand_digit(WC_RNG* rng, mp_digit* d) +{ + return wc_RNG_GenerateBlock(rng, (byte*)d, sizeof(mp_digit)); +} + +#ifdef WC_RSA_BLINDING +int mp_rand(mp_int* a, int digits, WC_RNG* rng) +{ + int ret = 0; + int cnt = digits * sizeof(mp_digit); +#if !defined(USE_FAST_MATH) && !defined(WOLFSSL_SP_MATH) + int i; +#endif + + if (rng == NULL) { + ret = MISSING_RNG_E; + } + else if (a == NULL) { + ret = BAD_FUNC_ARG; + } + +#if !defined(USE_FAST_MATH) && !defined(WOLFSSL_SP_MATH) + /* allocate space for digits */ + if (ret == MP_OKAY) { + ret = mp_set_bit(a, digits * DIGIT_BIT - 1); + } +#else +#if defined(WOLFSSL_SP_MATH) + if ((ret == MP_OKAY) && (digits > SP_INT_DIGITS)) +#else + if ((ret == MP_OKAY) && (digits > FP_SIZE)) +#endif + { + ret = BAD_FUNC_ARG; + } + if (ret == MP_OKAY) { + a->used = digits; + } +#endif + /* fill the data with random bytes */ + if (ret == MP_OKAY) { + ret = wc_RNG_GenerateBlock(rng, (byte*)a->dp, cnt); + } + if (ret == MP_OKAY) { +#if !defined(USE_FAST_MATH) && !defined(WOLFSSL_SP_MATH) + /* Mask down each digit to only bits used */ + for (i = 0; i < a->used; i++) { + a->dp[i] &= MP_MASK; + } +#endif + /* ensure top digit is not zero */ + while ((ret == MP_OKAY) && (a->dp[a->used - 1] == 0)) { + ret = get_rand_digit(rng, &a->dp[a->used - 1]); +#if !defined(USE_FAST_MATH) && !defined(WOLFSSL_SP_MATH) + a->dp[a->used - 1] &= MP_MASK; +#endif + } + } + + return ret; +} +#endif /* WC_RSA_BLINDING */ +#endif + +/* export an mp_int as unsigned char or hex string + * encType is WC_TYPE_UNSIGNED_BIN or WC_TYPE_HEX_STR + * return MP_OKAY on success */ +int wc_export_int(mp_int* mp, byte* buf, word32* len, word32 keySz, + int encType) +{ + int err; + + if (mp == NULL) + return BAD_FUNC_ARG; + + /* check buffer size */ + if (*len < keySz) { + *len = keySz; + return BUFFER_E; + } + + *len = keySz; + XMEMSET(buf, 0, *len); + + if (encType == WC_TYPE_HEX_STR) { + #ifdef WC_MP_TO_RADIX + err = mp_tohex(mp, (char*)buf); + #else + err = NOT_COMPILED_IN; + #endif + } + else { + err = mp_to_unsigned_bin(mp, buf + (keySz - mp_unsigned_bin_size(mp))); + } + + return err; +} + + +#ifdef HAVE_WOLF_BIGINT +void wc_bigint_init(WC_BIGINT* a) +{ + if (a != NULL) { + a->buf = NULL; + a->len = 0; + a->heap = NULL; + } +} + +int wc_bigint_alloc(WC_BIGINT* a, word32 sz) +{ + int err = MP_OKAY; + + if (a == NULL) + return BAD_FUNC_ARG; + + if (sz > 0) { + if (a->buf && sz > a->len) { + wc_bigint_free(a); + } + if (a->buf == NULL) { + a->buf = (byte*)XMALLOC(sz, a->heap, DYNAMIC_TYPE_WOLF_BIGINT); + if (a->buf == NULL) { + err = MP_MEM; + } + } + else { + XMEMSET(a->buf, 0, sz); + } + } + a->len = sz; + + return err; +} + +/* assumes input is big endian format */ +int wc_bigint_from_unsigned_bin(WC_BIGINT* a, const byte* in, word32 inlen) +{ + int err; + + if (a == NULL || in == NULL || inlen == 0) + return BAD_FUNC_ARG; + + err = wc_bigint_alloc(a, inlen); + if (err == 0) { + XMEMCPY(a->buf, in, inlen); + } + + return err; +} + +int wc_bigint_to_unsigned_bin(WC_BIGINT* a, byte* out, word32* outlen) +{ + word32 sz; + + if (a == NULL || out == NULL || outlen == NULL || *outlen == 0) + return BAD_FUNC_ARG; + + /* trim to fit into output buffer */ + sz = a->len; + if (a->len > *outlen) { + WOLFSSL_MSG("wc_bigint_export: Truncating output"); + sz = *outlen; + } + + if (a->buf) { + XMEMCPY(out, a->buf, sz); + } + + *outlen = sz; + + return MP_OKAY; +} + +void wc_bigint_zero(WC_BIGINT* a) +{ + if (a && a->buf) { + ForceZero(a->buf, a->len); + } +} + +void wc_bigint_free(WC_BIGINT* a) +{ + if (a) { + if (a->buf) { + XFREE(a->buf, a->heap, DYNAMIC_TYPE_WOLF_BIGINT); + } + a->buf = NULL; + a->len = 0; + } +} + +/* sz: make sure the buffer is at least that size and zero padded. + * A `sz == 0` will use the size of `src`. + * The calulcates sz is stored into dst->len in `wc_bigint_alloc`. + */ +int wc_mp_to_bigint_sz(mp_int* src, WC_BIGINT* dst, word32 sz) +{ + int err; + word32 x, y; + + if (src == NULL || dst == NULL) + return BAD_FUNC_ARG; + + /* get size of source */ + x = mp_unsigned_bin_size(src); + if (sz < x) + sz = x; + + /* make sure destination is allocated and large enough */ + err = wc_bigint_alloc(dst, sz); + if (err == MP_OKAY) { + + /* leading zero pad */ + y = sz - x; + XMEMSET(dst->buf, 0, y); + + /* export src as unsigned bin to destination buf */ + err = mp_to_unsigned_bin(src, dst->buf + y); + } + + return err; +} + +int wc_mp_to_bigint(mp_int* src, WC_BIGINT* dst) +{ + if (src == NULL || dst == NULL) + return BAD_FUNC_ARG; + + return wc_mp_to_bigint_sz(src, dst, 0); +} + +int wc_bigint_to_mp(WC_BIGINT* src, mp_int* dst) +{ + int err; + + if (src == NULL || dst == NULL) + return BAD_FUNC_ARG; + + if (src->buf == NULL) + return BAD_FUNC_ARG; + + err = mp_read_unsigned_bin(dst, src->buf, src->len); + wc_bigint_free(src); + + return err; +} +#endif /* HAVE_WOLF_BIGINT */ + +#endif /* USE_FAST_MATH || !NO_BIG_INT */ -- cgit v1.2.3